summaryrefslogtreecommitdiffstats
path: root/doc/admin-guide/en-US/admin_commandref.xml
blob: df4c78f4869f5f9bc8fe7f5d120416d664c81760 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
<?xml version='1.0' encoding='UTF-8'?>
<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "docbookV4.5/docbookx.dtd" []>
<chapter id="chap-Administration_Guide-Com_Ref">
  <title>Command Reference </title>
  <para>This section describes the available  commands and includes the
following section:
</para>
  <itemizedlist>
    <listitem>
      <para>gluster Command
</para>
      <para>Gluster Console Manager (command line interpreter)
</para>
    </listitem>
    <listitem>
      <para>glusterd Daemon
</para>
      <para>Gluster elastic volume management daemon
</para>
    </listitem>
  </itemizedlist>
  <section>
    <title>gluster Command </title>
    <para><emphasis role="bold">NAME</emphasis>
</para>
    <para>gluster - Gluster Console Manager (command line interpreter)
</para>
    <para><emphasis role="bold">SYNOPSIS</emphasis>
</para>
    <para>To run the program and display the gluster prompt:
</para>
    <para><emphasis role="bold">gluster</emphasis>
</para>
    <para>To specify a command directly:
gluster [COMMANDS] [OPTIONS]
</para>
    <para><emphasis role="bold">DESCRIPTION</emphasis>
</para>
    <para>The Gluster Console Manager is a command line utility for elastic volume management. You can run
the gluster command on any export server. The command enables administrators to perform cloud
operations such as creating, expanding, shrinking, rebalancing, and migrating volumes without
needing to schedule server downtime.
</para>
    <para><emphasis role="bold">COMMANDS</emphasis>
</para>
    <para><informaltable frame="none">
        <tgroup cols="3">
          <colspec colnum="1" colname="c0" colsep="0"/>
          <colspec colnum="2" colname="cgen1" colsep="0"/>
          <colspec colnum="3" colname="c1" colsep="0"/>
          <thead>
            <row>
              <entry>Command</entry>
              <entry namest="cgen1" nameend="c1">Description</entry>
            </row>
          </thead>
          <tbody>
            <row>
              <entry namest="c0" nameend="c1" align="left">
                <emphasis role="bold">Volume</emphasis>
              </entry>
            </row>
            <row>
              <entry>volume info [all | VOLNAME]</entry>
              <entry namest="cgen1" nameend="c1">Displays information about all volumes, or the specified volume.</entry>
            </row>
            <row>
              <entry>volume create NEW-VOLNAME [stripe COUNT] [replica COUNT] [transport tcp | rdma | tcp,rdma] NEW-BRICK ...</entry>
              <entry namest="cgen1" nameend="c1">Creates a new volume of the specified type using the specified bricks and transport type (the default transport type is tcp).</entry>
            </row>
            <row>
              <entry>volume delete VOLNAME</entry>
              <entry namest="cgen1" nameend="c1">Deletes the specified volume.</entry>
            </row>
            <row>
              <entry>volume start VOLNAME </entry>
              <entry namest="cgen1" nameend="c1">Starts the specified volume.</entry>
            </row>
            <row>
              <entry>volume stop VOLNAME [force] </entry>
              <entry namest="cgen1" nameend="c1">Stops the specified volume. </entry>
            </row>
            <row>
              <entry>volume rename VOLNAME NEW-VOLNAME </entry>
              <entry namest="cgen1" nameend="c1">Renames the specified volume.</entry>
            </row>
            <row>
              <entry>volume help </entry>
              <entry namest="cgen1" nameend="c1">Displays help for the volume command.</entry>
            </row>
            <row>
              <entry namest="c0" nameend="c1" align="left">
                <emphasis role="bold">Brick</emphasis>
              </entry>
            </row>
            <row>
              <entry>volume add-brick VOLNAME NEW-BRICK ... </entry>
              <entry namest="cgen1" nameend="c1">Adds the specified brick to the specified volume.</entry>
            </row>
            <row>
              <entry>volume replace-brick VOLNAME (BRICK NEW-BRICK) start | pause | abort | status </entry>
              <entry namest="cgen1" nameend="c1">Replaces the specified brick.</entry>
            </row>
            <row>
              <entry>volume remove-brick VOLNAME [(replica COUNT)|(stripe COUNT)] BRICK ... </entry>
              <entry namest="cgen1" nameend="c1">Removes the specified brick from the specified volume.</entry>
            </row>
            <row>
              <entry namest="c0" nameend="c1" align="left">
                <emphasis role="bold">Rebalance</emphasis>
              </entry>
            </row>
            <row>
              <entry>volume rebalance VOLNAME start</entry>
              <entry namest="cgen1" nameend="c1">Starts rebalancing the specified volume.</entry>
            </row>
            <row>
              <entry>volume rebalance VOLNAME stop </entry>
              <entry namest="cgen1" nameend="c1">Stops rebalancing the specified volume. </entry>
            </row>
            <row>
              <entry>volume rebalance VOLNAME status </entry>
              <entry namest="cgen1" nameend="c1">Displays the rebalance status of the specified volume.</entry>
            </row>
            <row>
              <entry namest="c0" nameend="c1" align="left">
                <emphasis role="bold">Log</emphasis>
              </entry>
            </row>
            <row>
              <entry>volume log filename VOLNAME [BRICK] DIRECTORY </entry>
              <entry namest="cgen1" nameend="c1">Sets the log directory for the corresponding volume/brick. </entry>
            </row>
            <row>
              <entry>volume log rotate VOLNAME [BRICK] </entry>
              <entry namest="cgen1" nameend="c1">Rotates the log file for corresponding volume/brick.</entry>
            </row>
            <row>
              <entry>volume log locate VOLNAME [BRICK] </entry>
              <entry namest="cgen1" nameend="c1">Locates the log file for corresponding volume/brick. </entry>
            </row>
            <row>
              <entry namest="c0" nameend="c1" align="left">
                <emphasis role="bold">Peer</emphasis>
              </entry>
            </row>
            <row>
              <entry>peer probe HOSTNAME </entry>
              <entry namest="cgen1" nameend="c1">Probes the specified peer. </entry>
            </row>
            <row>
              <entry>peer detach HOSTNAME </entry>
              <entry namest="cgen1" nameend="c1">Detaches the specified peer. </entry>
            </row>
            <row>
              <entry>peer status </entry>
              <entry namest="cgen1" nameend="c1">Displays the status of peers. </entry>
            </row>
            <row>
              <entry>peer help </entry>
              <entry namest="cgen1" nameend="c1">Displays help for the peer command.</entry>
            </row>
            <row>
              <entry namest="c0" nameend="c1" align="left">
                <emphasis role="bold">Geo-replication</emphasis>
              </entry>
            </row>
            <row>
              <entry>volume geo-replication MASTER SLAVE start</entry>
              <entry namest="cgen1" nameend="c1">
                <para>Start geo-replication between the hosts specified by MASTER and SLAVE. You can specify a local master volume as :VOLNAME.</para>
                <para>You can specify a local slave volume as :VOLUME and a local slave directory as /DIRECTORY/SUB-DIRECTORY. You can specify a remote slave volume as DOMAIN::VOLNAME and a remote slave directory as DOMAIN:/DIRECTORY/SUB-DIRECTORY.</para>
              </entry>
            </row>
            <row>
              <entry>volume geo-replication MASTER SLAVE stop</entry>
              <entry namest="cgen1" nameend="c1">
                <para>Stop geo-replication between the hosts specified by MASTER and SLAVE. You can specify a local master volume as :VOLNAME and a local master directory as /DIRECTORY/SUB-DIRECTORY.</para>
                <para>You can specify a local slave volume as :VOLNAME and a local slave directory as /DIRECTORY/SUB-DIRECTORY. You can specify a remote slave volume as DOMAIN::VOLNAME and a remote slave directory as DOMAIN:/DIRECTORY/SUB-DIRECTORY.
</para>
              </entry>
            </row>
            <row>
              <entry morerows="10">volume geo-replication MASTER SLAVE config [options]</entry>
              <entry/>
              <entry>Configure geo-replication options between the hosts specified by MASTER and SLAVE. </entry>
            </row>
            <row>
              <entry>gluster-command COMMAND</entry>
              <entry>The path where the gluster command is installed.</entry>
            </row>
            <row>
              <entry>gluster-log-level LOGFILELEVEL</entry>
              <entry>The log level for gluster processes.</entry>
            </row>
            <row>
              <entry>log-file LOGFILE</entry>
              <entry>The path to the geo-replication log file.</entry>
            </row>
            <row>
              <entry>log-level LOGFILELEVEL</entry>
              <entry>The log level for geo-replication.</entry>
            </row>
            <row>
              <entry>remote-gsyncd COMMAND</entry>
              <entry>The path where the gsyncd binary is installed on the remote machine.</entry>
            </row>
            <row>
              <entry>ssh-command COMMAND</entry>
              <entry>The ssh command to use to connect to the remote machine (the default is ssh).</entry>
            </row>
            <row>
              <entry>rsync-command COMMAND</entry>
              <entry>The rsync command to use for synchronizing the files (the default is rsync).</entry>
            </row>
            <row>
              <entry>volume_id= UID</entry>
              <entry>The command to delete the existing master UID for the intermediate/slave node.</entry>
            </row>
            <row>
              <entry>timeout SECONDS</entry>
              <entry>The timeout period.</entry>
            </row>
            <row>
              <entry>sync-jobs N</entry>
              <entry>The number of simultaneous files/directories that can be synchronized.</entry>
            </row>
            <row>
              <entry/>
              <entry>ignore-deletes</entry>
              <entry>If this option is set to 1, a file deleted on master will not trigger a delete operation on the slave. Hence, the slave will remain as a superset of the master and can be used to recover the master in case of crash and/or accidental delete.</entry>
            </row>
            <row>
              <entry namest="c0" nameend="c1" align="left">
                <emphasis role="bold">Other</emphasis>
              </entry>
            </row>
            <row>
              <entry>help</entry>
              <entry/>
              <entry>Display the command options.</entry>
            </row>
            <row>
              <entry>quit</entry>
              <entry/>
              <entry>Exit the gluster command line interface.</entry>
            </row>
          </tbody>
        </tgroup>
      </informaltable></para>
    <para><emphasis role="bold">FILES</emphasis>

</para>
    <para>/etc/glusterd/*
</para>
    <para><emphasis role="bold">SEE ALSO </emphasis></para>
    <para>fusermount(1), mount.glusterfs(8), glusterfs-volgen(8), glusterfs(8), glusterd(8)</para>
  </section>
  <section>
    <title>glusterd Daemon </title>
    <para><emphasis role="bold">NAME</emphasis>
</para>
    <para>glusterd - Gluster elastic volume management daemon</para>
    <para><emphasis role="bold">SYNOPSIS</emphasis>
</para>
    <para>glusterd [OPTION...]
</para>
    <para><emphasis role="bold">DESCRIPTION</emphasis>
</para>
    <para>The glusterd daemon is used for elastic volume management. The daemon must be run on all export servers.
</para>
    <para><emphasis role="bold">OPTIONS</emphasis>
</para>
    <para><informaltable frame="none">
        <tgroup cols="2">
          <colspec colnum="1" colname="c0" colsep="0"/>
          <colspec colnum="2" colname="c1" colsep="0"/>
          <thead>
            <row>
              <entry>Option</entry>
              <entry>Description</entry>
            </row>
          </thead>
          <tbody>
            <row>
              <entry namest="c0" nameend="c1" align="left">
                <emphasis role="bold">Basic</emphasis>
              </entry>
            </row>
            <row>
              <entry>-l=LOGFILE, --log-file=LOGFILE</entry>
              <entry>Files to use for logging (the default is /usr/local/var/log/glusterfs/glusterfs.log).</entry>
            </row>
            <row>
              <entry>-L=LOGLEVEL, --log-level=LOGLEVEL</entry>
              <entry>Logging severity. Valid options are TRACE, DEBUG, INFO, WARNING, ERROR and CRITICAL (the default is INFO). </entry>
            </row>
            <row>
              <entry>--debug</entry>
              <entry>Runs the program in debug mode. This option sets --no-daemon, --log-level to DEBUG, and --log-file to console.</entry>
            </row>
            <row>
              <entry>-N, --no-daemon</entry>
              <entry>Runs the program in the foreground.</entry>
            </row>
            <row>
              <entry namest="c0" nameend="c1" align="left">
                <emphasis role="bold">Miscellaneous</emphasis>
              </entry>
            </row>
            <row>
              <entry>-?, --help</entry>
              <entry>Displays this help.</entry>
            </row>
            <row>
              <entry>--usage</entry>
              <entry>Displays a short usage message.</entry>
            </row>
            <row>
              <entry>-V, --version</entry>
              <entry>Prints the program version.</entry>
            </row>
          </tbody>
        </tgroup>
      </informaltable></para>
    <para><emphasis role="bold">FILES</emphasis>

</para>
    <para>/etc/glusterd/*
</para>
    <para><emphasis role="bold">SEE ALSO </emphasis></para>
    <para>fusermount(1), mount.glusterfs(8), glusterfs-volgen(8), glusterfs(8), gluster(8)</para>
  </section>
</chapter>
'graph'>
-rw-r--r--contrib/fuse-include/fuse-mount.h6
-rw-r--r--contrib/fuse-include/fuse_kernel.h539
-rw-r--r--contrib/fuse-include/fuse_kernel_macfuse.h (renamed from glusterfs-guts/src/fuse_kernel.h)73
-rw-r--r--contrib/fuse-include/mount_util.h (renamed from contrib/fuse-util/mount_util.h)3
-rw-r--r--contrib/fuse-lib/misc.c5
-rw-r--r--contrib/fuse-lib/mount-common.c265
-rw-r--r--contrib/fuse-lib/mount-gluster-compat.h56
-rw-r--r--contrib/fuse-lib/mount.c645
-rw-r--r--contrib/fuse-util/Makefile.am10
-rw-r--r--contrib/fuse-util/fusermount.c539
-rw-r--r--contrib/fuse-util/mount_util.c64
-rw-r--r--contrib/ipaddr-py/COPYING202
-rw-r--r--contrib/ipaddr-py/MANIFEST.in3
-rw-r--r--contrib/ipaddr-py/OWNERS4
-rw-r--r--contrib/ipaddr-py/README8
-rw-r--r--contrib/ipaddr-py/ipaddr.py1907
-rwxr-xr-xcontrib/ipaddr-py/ipaddr_test.py1099
-rwxr-xr-xcontrib/ipaddr-py/setup.py36
-rwxr-xr-xcontrib/ipaddr-py/test-2to3.sh15
-rw-r--r--contrib/libgen/basename_r.c40
-rw-r--r--contrib/libgen/dirname_r.c243
-rw-r--r--contrib/macfuse/COPYING.txt128
-rw-r--r--contrib/macfuse/fuse_ioctl.h64
-rw-r--r--contrib/macfuse/fuse_param.h147
-rw-r--r--contrib/macfuse/mount_darwin.c357
-rw-r--r--contrib/qemu/block.c4604
-rw-r--r--contrib/qemu/block/qcow.c914
-rw-r--r--contrib/qemu/block/qcow2-cache.c323
-rw-r--r--contrib/qemu/block/qcow2-cluster.c1478
-rw-r--r--contrib/qemu/block/qcow2-refcount.c1374
-rw-r--r--contrib/qemu/block/qcow2-snapshot.c660
-rw-r--r--contrib/qemu/block/qcow2.c1825
-rw-r--r--contrib/qemu/block/qcow2.h437
-rw-r--r--contrib/qemu/block/qed-check.c248
-rw-r--r--contrib/qemu/block/qed-cluster.c165
-rw-r--r--contrib/qemu/block/qed-gencb.c32
-rw-r--r--contrib/qemu/block/qed-l2-cache.c187
-rw-r--r--contrib/qemu/block/qed-table.c296
-rw-r--r--contrib/qemu/block/qed.c1596
-rw-r--r--contrib/qemu/block/qed.h344
-rw-r--r--contrib/qemu/block/snapshot.c157
-rw-r--r--contrib/qemu/config-host.h73
-rw-r--r--contrib/qemu/coroutine-ucontext.c225
-rw-r--r--contrib/qemu/include/block/aio.h247
-rw-r--r--contrib/qemu/include/block/block.h443
-rw-r--r--contrib/qemu/include/block/block_int.h421
-rw-r--r--contrib/qemu/include/block/blockjob.h278
-rw-r--r--contrib/qemu/include/block/coroutine.h218
-rw-r--r--contrib/qemu/include/block/coroutine_int.h53
-rw-r--r--contrib/qemu/include/block/snapshot.h53
-rw-r--r--contrib/qemu/include/config.h2
-rw-r--r--contrib/qemu/include/exec/cpu-common.h124
-rw-r--r--contrib/qemu/include/exec/hwaddr.h20
-rw-r--r--contrib/qemu/include/exec/poison.h63
-rw-r--r--contrib/qemu/include/fpu/softfloat.h641
-rw-r--r--contrib/qemu/include/glib-compat.h27
-rw-r--r--contrib/qemu/include/migration/migration.h157
-rw-r--r--contrib/qemu/include/migration/qemu-file.h266
-rw-r--r--contrib/qemu/include/migration/vmstate.h740
-rw-r--r--contrib/qemu/include/monitor/monitor.h104
-rw-r--r--contrib/qemu/include/monitor/readline.h55
-rw-r--r--contrib/qemu/include/qapi/error.h85
-rw-r--r--contrib/qemu/include/qapi/qmp/json-lexer.h51
-rw-r--r--contrib/qemu/include/qapi/qmp/json-parser.h24
-rw-r--r--contrib/qemu/include/qapi/qmp/json-streamer.h40
-rw-r--r--contrib/qemu/include/qapi/qmp/qbool.h29
-rw-r--r--contrib/qemu/include/qapi/qmp/qdict.h69
-rw-r--r--contrib/qemu/include/qapi/qmp/qerror.h249
-rw-r--r--contrib/qemu/include/qapi/qmp/qfloat.h29
-rw-r--r--contrib/qemu/include/qapi/qmp/qint.h28
-rw-r--r--contrib/qemu/include/qapi/qmp/qjson.h29
-rw-r--r--contrib/qemu/include/qapi/qmp/qlist.h63
-rw-r--r--contrib/qemu/include/qapi/qmp/qobject.h112
-rw-r--r--contrib/qemu/include/qapi/qmp/qstring.h36
-rw-r--r--contrib/qemu/include/qapi/qmp/types.h25
-rw-r--r--contrib/qemu/include/qemu-common.h478
-rw-r--r--contrib/qemu/include/qemu/aes.h45
-rw-r--r--contrib/qemu/include/qemu/atomic.h202
-rw-r--r--contrib/qemu/include/qemu/bitmap.h222
-rw-r--r--contrib/qemu/include/qemu/bitops.h276
-rw-r--r--contrib/qemu/include/qemu/bswap.h478
-rw-r--r--contrib/qemu/include/qemu/compiler.h55
-rw-r--r--contrib/qemu/include/qemu/error-report.h46
-rw-r--r--contrib/qemu/include/qemu/event_notifier.h46
-rw-r--r--contrib/qemu/include/qemu/hbitmap.h209
-rw-r--r--contrib/qemu/include/qemu/host-utils.h322
-rw-r--r--contrib/qemu/include/qemu/iov.h115
-rw-r--r--contrib/qemu/include/qemu/main-loop.h311
-rw-r--r--contrib/qemu/include/qemu/module.h40
-rw-r--r--contrib/qemu/include/qemu/notify.h72
-rw-r--r--contrib/qemu/include/qemu/option.h157
-rw-r--r--contrib/qemu/include/qemu/option_int.h54
-rw-r--r--contrib/qemu/include/qemu/osdep.h218
-rw-r--r--contrib/qemu/include/qemu/queue.h414
-rw-r--r--contrib/qemu/include/qemu/sockets.h83
-rw-r--r--contrib/qemu/include/qemu/thread-posix.h28
-rw-r--r--contrib/qemu/include/qemu/thread.h56
-rw-r--r--contrib/qemu/include/qemu/timer.h305
-rw-r--r--contrib/qemu/include/qemu/typedefs.h69
-rw-r--r--contrib/qemu/include/sysemu/os-posix.h52
-rw-r--r--contrib/qemu/include/sysemu/sysemu.h200
-rw-r--r--contrib/qemu/include/trace.h6
-rw-r--r--contrib/qemu/nop-symbols.c12
-rw-r--r--contrib/qemu/qapi-types.h2746
-rw-r--r--contrib/qemu/qemu-coroutine-lock.c178
-rw-r--r--contrib/qemu/qemu-coroutine-sleep.c39
-rw-r--r--contrib/qemu/qemu-coroutine.c135
-rw-r--r--contrib/qemu/qmp-commands.h204
-rw-r--r--contrib/qemu/qobject/json-lexer.c373
-rw-r--r--contrib/qemu/qobject/json-parser.c724
-rw-r--r--contrib/qemu/qobject/json-streamer.c122
-rw-r--r--contrib/qemu/qobject/qbool.c68
-rw-r--r--contrib/qemu/qobject/qdict.c478
-rw-r--r--contrib/qemu/qobject/qerror.c156
-rw-r--r--contrib/qemu/qobject/qfloat.c68
-rw-r--r--contrib/qemu/qobject/qint.c67
-rw-r--r--contrib/qemu/qobject/qjson.c282
-rw-r--r--contrib/qemu/qobject/qlist.c170
-rw-r--r--contrib/qemu/qobject/qstring.c149
-rw-r--r--contrib/qemu/trace/generated-tracers.h3759
-rw-r--r--contrib/qemu/util/aes.c1314
-rw-r--r--contrib/qemu/util/bitmap.c256
-rw-r--r--contrib/qemu/util/bitops.c158
-rw-r--r--contrib/qemu/util/cutils.c532
-rw-r--r--contrib/qemu/util/error.c120
-rw-r--r--contrib/qemu/util/hbitmap.c402
-rw-r--r--contrib/qemu/util/hexdump.c37
-rw-r--r--contrib/qemu/util/iov.c426
-rw-r--r--contrib/qemu/util/module.c81
-rw-r--r--contrib/qemu/util/oslib-posix.c243
-rw-r--r--contrib/qemu/util/qemu-error.c225
-rw-r--r--contrib/qemu/util/qemu-option.c1126
-rw-r--r--contrib/qemu/util/qemu-thread-posix.c327
-rw-r--r--contrib/qemu/util/unicode.c100
-rw-r--r--contrib/rbtree/rb.c933
-rw-r--r--contrib/rbtree/rb.h126
-rw-r--r--contrib/stdlib/gf_mkostemp.c107
-rw-r--r--contrib/uuid/clear.c43
-rw-r--r--contrib/uuid/compare.c55
-rw-r--r--contrib/uuid/copy.c45
-rw-r--r--contrib/uuid/gen_uuid.c679
-rw-r--r--contrib/uuid/gen_uuid_nt.c92
-rw-r--r--contrib/uuid/isnull.c48
-rw-r--r--contrib/uuid/pack.c69
-rw-r--r--contrib/uuid/parse.c79
-rw-r--r--contrib/uuid/tst_uuid.c180
-rw-r--r--contrib/uuid/unpack.c63
-rw-r--r--contrib/uuid/unparse.c76
-rw-r--r--contrib/uuid/uuid.h104
-rw-r--r--contrib/uuid/uuidP.h63
-rw-r--r--contrib/uuid/uuid_time.c171
-rw-r--r--contrib/uuid/uuid_types.h.in50
-rw-r--r--contrib/uuid/uuidd.h54
-rw-r--r--doc/Makefile.am14
-rw-r--r--doc/admin-guide/en-US/images/640px-GlusterFS_Architecture.pngbin0 -> 97477 bytes-rw-r--r--doc/admin-guide/en-US/images/Distributed_Replicated_Volume.pngbin0 -> 51326 bytes-rw-r--r--doc/admin-guide/en-US/images/Distributed_Striped_Replicated_Volume.pngbin0 -> 57210 bytes-rw-r--r--doc/admin-guide/en-US/images/Distributed_Striped_Volume.pngbin0 -> 53781 bytes-rw-r--r--doc/admin-guide/en-US/images/Distributed_Volume.pngbin0 -> 47211 bytes-rw-r--r--doc/admin-guide/en-US/images/Geo-Rep03_Internet.pngbin0 -> 131824 bytes-rw-r--r--doc/admin-guide/en-US/images/Geo-Rep04_Cascading.pngbin0 -> 187341 bytes-rw-r--r--doc/admin-guide/en-US/images/Geo-Rep_LAN.pngbin0 -> 163417 bytes-rw-r--r--doc/admin-guide/en-US/images/Geo-Rep_WAN.pngbin0 -> 96291 bytes-rw-r--r--doc/admin-guide/en-US/images/GlusterFS_Architecture.pngbin0 -> 133597 bytes-rw-r--r--doc/admin-guide/en-US/images/Hadoop_Architecture.pngbin0 -> 43815 bytes-rw-r--r--doc/admin-guide/en-US/images/Replicated_Volume.pngbin0 -> 44077 bytes-rw-r--r--doc/admin-guide/en-US/images/Striped_Replicated_Volume.pngbin0 -> 50193 bytes-rw-r--r--doc/admin-guide/en-US/images/Striped_Volume.pngbin0 -> 43316 bytes-rw-r--r--doc/admin-guide/en-US/images/UFO_Architecture.pngbin0 -> 72139 bytes-rw-r--r--doc/admin-guide/en-US/images/VSA_Architecture.pngbin0 -> 38875 bytes-rw-r--r--doc/admin-guide/en-US/images/icon.svg19
-rw-r--r--doc/admin-guide/en-US/markdown/Administration_Guide.md1
-rw-r--r--doc/admin-guide/en-US/markdown/Author_Group.md5
-rw-r--r--doc/admin-guide/en-US/markdown/Book_Info.md1
-rw-r--r--doc/admin-guide/en-US/markdown/Chapter.md18
-rw-r--r--doc/admin-guide/en-US/markdown/Preface.md22
-rw-r--r--doc/admin-guide/en-US/markdown/Revision_History.md4
-rw-r--r--doc/admin-guide/en-US/markdown/admin_ACLs.md197
-rw-r--r--doc/admin-guide/en-US/markdown/admin_Hadoop.md170
-rw-r--r--doc/admin-guide/en-US/markdown/admin_UFO.md1219
-rw-r--r--doc/admin-guide/en-US/markdown/admin_commandref.md180
-rw-r--r--doc/admin-guide/en-US/markdown/admin_console.md51
-rw-r--r--doc/admin-guide/en-US/markdown/admin_directory_Quota.md172
-rw-r--r--doc/admin-guide/en-US/markdown/admin_geo-replication.md738
-rw-r--r--doc/admin-guide/en-US/markdown/admin_managing_snapshots.md66
-rw-r--r--doc/admin-guide/en-US/markdown/admin_managing_volumes.md710
-rw-r--r--doc/admin-guide/en-US/markdown/admin_monitoring_workload.md931
-rw-r--r--doc/admin-guide/en-US/markdown/admin_setting_volumes.md419
-rw-r--r--doc/admin-guide/en-US/markdown/admin_settingup_clients.md641
-rw-r--r--doc/admin-guide/en-US/markdown/admin_start_stop_daemon.md70
-rw-r--r--doc/admin-guide/en-US/markdown/admin_storage_pools.md73
-rw-r--r--doc/admin-guide/en-US/markdown/admin_troubleshooting.md543
-rw-r--r--doc/admin-guide/en-US/markdown/gfs_introduction.md50
-rw-r--r--doc/admin-guide/en-US/markdown/glossary.md134
-rw-r--r--doc/authentication.txt10
-rw-r--r--doc/coding-standard.tex35
-rw-r--r--doc/examples/Makefile.am8
-rw-r--r--doc/examples/README13
-rw-r--r--doc/examples/filter.vol23
-rw-r--r--doc/examples/io-cache.vol25
-rw-r--r--doc/examples/io-threads.vol21
-rw-r--r--doc/examples/posix-locks.vol20
-rw-r--r--doc/examples/protocol-client.vol17
-rw-r--r--doc/examples/protocol-server.vol25
-rw-r--r--doc/examples/read-ahead.vol22
-rw-r--r--doc/examples/replicate.vol119
-rw-r--r--doc/examples/stripe.vol121
-rw-r--r--doc/examples/trace.vol16
-rw-r--r--doc/examples/trash.vol20
-rw-r--r--doc/examples/unify.vol178
-rw-r--r--doc/examples/write-behind.vol26
-rw-r--r--doc/features/rdma-cm-in-3.4.0.txt9
-rw-r--r--doc/features/rebalance.md74
-rw-r--r--doc/gluster.8125
-rw-r--r--doc/glusterd.864
-rw-r--r--doc/glusterfs.8164
-rw-r--r--doc/glusterfs.vol.sample61
-rw-r--r--doc/glusterfsd.8136
-rw-r--r--doc/glusterfsd.vol.sample47
-rw-r--r--doc/hacker-guide/adding-fops.txt33
-rw-r--r--doc/hacker-guide/bdb.txt70
-rw-r--r--doc/hacker-guide/en-US/markdown/adding-fops.md18
-rw-r--r--doc/hacker-guide/en-US/markdown/afr.md191
-rw-r--r--doc/hacker-guide/en-US/markdown/coding-standard.md402
-rw-r--r--doc/hacker-guide/en-US/markdown/posix.md59
-rw-r--r--doc/hacker-guide/en-US/markdown/translator-development.md666
-rw-r--r--doc/hacker-guide/en-US/markdown/write-behind.md56
-rw-r--r--doc/hacker-guide/lock-ahead.txt80
-rw-r--r--doc/hacker-guide/posix.txt59
-rw-r--r--doc/hacker-guide/write-behind.txt45
-rw-r--r--doc/legacy/Makefile.am (renamed from doc/user-guide/Makefile.am)2
-rw-r--r--doc/legacy/advanced-stripe.odg (renamed from doc/user-guide/advanced-stripe.odg)bin12648 -> 12648 bytes-rw-r--r--doc/legacy/advanced-stripe.pdf (renamed from doc/user-guide/advanced-stripe.pdf)bin13382 -> 13382 bytes-rw-r--r--doc/legacy/booster.txt (renamed from doc/booster.txt)22
-rw-r--r--doc/legacy/colonO-icon.jpg (renamed from doc/user-guide/colonO-icon.jpg)bin779 -> 779 bytes-rw-r--r--doc/legacy/errno.list.bsd.txt (renamed from doc/errno.list.bsd.txt)0
-rw-r--r--doc/legacy/errno.list.linux.txt (renamed from doc/errno.list.linux.txt)170
-rw-r--r--doc/legacy/errno.list.macosx.txt (renamed from doc/errno.list.macosx.txt)186
-rw-r--r--doc/legacy/errno.list.solaris.txt (renamed from doc/errno.list.solaris.txt)0
-rw-r--r--doc/legacy/fdl.texi (renamed from doc/user-guide/fdl.texi)0
-rw-r--r--doc/legacy/fuse.odg (renamed from doc/user-guide/fuse.odg)bin13190 -> 13190 bytes-rw-r--r--doc/legacy/fuse.pdf (renamed from doc/user-guide/fuse.pdf)bin14948 -> 14948 bytes-rw-r--r--doc/legacy/get_put_api_using_xattr.txt (renamed from doc/get_put_api_using_xattr.txt)2
-rw-r--r--doc/legacy/ha.odg (renamed from doc/user-guide/ha.odg)bin37290 -> 37290 bytes-rw-r--r--doc/legacy/ha.pdf (renamed from doc/user-guide/ha.pdf)bin19403 -> 19403 bytes-rw-r--r--doc/legacy/hacker-guide/Makefile.am (renamed from doc/hacker-guide/Makefile.am)0
-rw-r--r--doc/legacy/hacker-guide/call-stub.txt (renamed from doc/hacker-guide/call-stub.txt)178
-rw-r--r--doc/legacy/hacker-guide/hacker-guide.tex (renamed from doc/hacker-guide/hacker-guide.tex)26
-rw-r--r--doc/legacy/hacker-guide/replicate.txt (renamed from doc/hacker-guide/replicate.txt)24
-rw-r--r--doc/legacy/handling-options.txt (renamed from doc/handling-options.txt)6
-rw-r--r--doc/legacy/mac-related-xattrs.txt (renamed from doc/mac-related-xattrs.txt)22
-rw-r--r--doc/legacy/porting_guide.txt (renamed from doc/porting_guide.txt)12
-rw-r--r--doc/legacy/replicate.lyx (renamed from doc/replicate.lyx)48
-rw-r--r--doc/legacy/replicate.pdf (renamed from doc/replicate.pdf)bin109057 -> 109057 bytes-rw-r--r--doc/legacy/solaris-related-xattrs.txt (renamed from doc/solaris-related-xattrs.txt)24
-rw-r--r--doc/legacy/stat-prefetch-design.txt (renamed from doc/stat-prefetch-design.txt)80
-rw-r--r--doc/legacy/stripe.odg (renamed from doc/user-guide/stripe.odg)bin10188 -> 10188 bytes-rw-r--r--doc/legacy/stripe.pdf (renamed from doc/user-guide/stripe.pdf)bin11941 -> 11941 bytes-rw-r--r--doc/legacy/translator-options.txt (renamed from doc/translator-options.txt)117
-rw-r--r--doc/legacy/unify.odg (renamed from doc/user-guide/unify.odg)bin12955 -> 12955 bytes-rw-r--r--doc/legacy/unify.pdf (renamed from doc/user-guide/unify.pdf)bin18969 -> 18969 bytes-rw-r--r--doc/legacy/user-guide.info (renamed from doc/user-guide/user-guide.info)50
-rw-r--r--doc/legacy/user-guide.pdf (renamed from doc/user-guide/user-guide.pdf)bin353986 -> 353986 bytes-rw-r--r--doc/legacy/user-guide.texi (renamed from doc/user-guide/user-guide.texi)335
-rw-r--r--doc/legacy/xlator.odg (renamed from doc/user-guide/xlator.odg)bin12169 -> 12169 bytes-rw-r--r--doc/legacy/xlator.pdf (renamed from doc/user-guide/xlator.pdf)bin14358 -> 14358 bytes-rw-r--r--doc/logging.txt66
-rw-r--r--doc/mount.glusterfs.896
-rw-r--r--doc/qa/qa-client.vol170
-rw-r--r--doc/qa/qa-high-avail-client.vol17
-rw-r--r--doc/qa/qa-high-avail-server.vol344
-rw-r--r--doc/qa/qa-server.vol284
-rw-r--r--doc/rpc-for-glusterfs.changes-done.txt18
-rw-r--r--doc/split-brain.md251
-rw-r--r--error-codes.json4
-rw-r--r--extras/FreeBSD/Makefile6
-rw-r--r--extras/LinuxRPM/Makefile.am57
-rw-r--r--extras/MacOSX/Portfile6
-rw-r--r--extras/MacOSX/README.MacOSX10
-rw-r--r--extras/Makefile.am19
-rw-r--r--extras/Solaris/Prototype3
-rw-r--r--extras/Solaris/README.solaris2
-rw-r--r--extras/Solaris/pkginfo4
-rw-r--r--extras/Ubuntu/README.Ubuntu24
-rw-r--r--extras/Ubuntu/glusterd.conf10
-rw-r--r--extras/Ubuntu/mounting-glusterfs.conf7
-rw-r--r--extras/backend-cleanup.sh28
-rw-r--r--extras/benchmarking/Makefile.am4
-rw-r--r--extras/benchmarking/glfs-bm.c246
-rw-r--r--extras/benchmarking/rdd.c882
-rwxr-xr-xextras/clear_xattrs.sh53
-rwxr-xr-xextras/contri-add.sh73
-rwxr-xr-xextras/disk_usage_sync.sh86
-rwxr-xr-xextras/file_size_contri.sh17
-rwxr-xr-xextras/generate-xdr-files.sh98
-rw-r--r--extras/geo-rep/Makefile.am2
-rw-r--r--extras/geo-rep/generate-gfid-file.sh53
-rwxr-xr-xextras/geo-rep/get-gfid.sh7
-rw-r--r--extras/geo-rep/gsync-sync-gfid.c106
-rw-r--r--extras/geo-rep/gsync-upgrade.sh123
-rw-r--r--extras/geo-rep/slave-upgrade.sh102
-rw-r--r--extras/gluster-rsyslog-5.8.conf51
-rw-r--r--extras/gluster-rsyslog-7.2.conf76
-rw-r--r--extras/glusterd-sysconfig6
-rw-r--r--extras/glusterd.vol9
-rw-r--r--extras/glusterfs-georep-logrotate18
-rw-r--r--extras/glusterfs-logrotate27
-rw-r--r--extras/glusterfs-mode.el2
-rw-r--r--extras/glusterfs.vim21
-rw-r--r--extras/gnfs-loganalyse.py260
-rw-r--r--extras/group-virt.example6
-rw-r--r--extras/hook-scripts/Makefile.am1
-rw-r--r--extras/hook-scripts/S29CTDBsetup.sh69
-rwxr-xr-xextras/hook-scripts/S30samba-set.sh109
-rwxr-xr-xextras/hook-scripts/S30samba-start.sh110
-rwxr-xr-xextras/hook-scripts/S30samba-stop.sh71
-rwxr-xr-xextras/hook-scripts/S40ufo-stop.py24
-rwxr-xr-xextras/hook-scripts/S56glusterd-geo-rep-create-post.sh42
-rw-r--r--extras/init.d/Makefile.am15
-rwxr-xr-xextras/init.d/glusterd-Debian.in (renamed from extras/init.d/glusterfsd-Debian.in)31
-rwxr-xr-xextras/init.d/glusterd-Redhat.in142
-rwxr-xr-xextras/init.d/glusterd-SuSE.in (renamed from extras/init.d/glusterfsd-SuSE.in)31
-rw-r--r--extras/init.d/glusterd.plist.in (renamed from extras/init.d/glusterfs-server.plist.in)6
-rwxr-xr-xextras/init.d/glusterfsd-Redhat.in54
-rwxr-xr-xextras/init.d/rhel5-load-fuse.modules7
-rw-r--r--extras/logger.conf.example13
-rw-r--r--extras/ocf/Makefile.am11
-rwxr-xr-xextras/ocf/glusterd.in212
-rwxr-xr-xextras/ocf/volume.in246
-rwxr-xr-xextras/profiler/glusterfs-profiler817
-rwxr-xr-xextras/prot_filter.py144
-rwxr-xr-xextras/quota-metadata-cleanup.sh24
-rwxr-xr-xextras/quota-remove-xattr.sh24
-rwxr-xr-xextras/rebalance.py299
-rwxr-xr-xextras/rpc-coverage.sh480
-rwxr-xr-xextras/specgen.scm2
-rw-r--r--extras/stripe-merge.c492
-rw-r--r--extras/systemd/Makefile.am11
-rw-r--r--extras/systemd/glusterd.service.in14
-rwxr-xr-xextras/test/bug-920583.t50
-rwxr-xr-xextras/test/gluster_commands.sh265
-rw-r--r--extras/test/ld-preload-test/ld-preload-lib.c20
-rw-r--r--extras/test/ld-preload-test/ld-preload-test.c20
-rw-r--r--extras/test/open-fd-tests.c64
-rwxr-xr-xextras/test/run.sh22
-rwxr-xr-xextras/test/stop_glusterd.sh19
-rw-r--r--extras/test/test-ffop.c870
-rw-r--r--extras/volfilter.py167
-rw-r--r--extras/who-wrote-glusterfs/gitdm.aliases48
-rw-r--r--extras/who-wrote-glusterfs/gitdm.config8
-rw-r--r--extras/who-wrote-glusterfs/gitdm.domain-map15
-rwxr-xr-xextras/who-wrote-glusterfs/who-wrote-glusterfs.sh50
-rwxr-xr-xformat-patch.sh60
-rwxr-xr-xgen-headers.py54
-rw-r--r--geo-replication/Makefile.am3
-rw-r--r--geo-replication/src/Makefile.am33
-rw-r--r--geo-replication/src/gsyncd.c408
-rwxr-xr-xgeo-replication/src/gverify.sh160
-rw-r--r--geo-replication/src/peer_add_secret_pub.in9
-rwxr-xr-xgeo-replication/src/peer_gsec_create.in12
-rw-r--r--geo-replication/src/procdiggy.c121
-rw-r--r--geo-replication/src/procdiggy.h20
-rw-r--r--geo-replication/syncdaemon/Makefile.am7
-rw-r--r--geo-replication/syncdaemon/README.md58
-rw-r--r--geo-replication/syncdaemon/__codecheck.py46
-rw-r--r--geo-replication/syncdaemon/__init__.py0
-rw-r--r--geo-replication/syncdaemon/configinterface.py224
-rw-r--r--geo-replication/syncdaemon/gconf.py15
-rw-r--r--geo-replication/syncdaemon/gsyncd.py534
-rw-r--r--geo-replication/syncdaemon/libcxattr.py87
-rw-r--r--geo-replication/syncdaemon/libgfchangelog.py64
-rw-r--r--geo-replication/syncdaemon/master.py1022
-rw-r--r--geo-replication/syncdaemon/monitor.py244
-rw-r--r--geo-replication/syncdaemon/repce.py225
-rw-r--r--geo-replication/syncdaemon/resource.py1172
-rw-r--r--geo-replication/syncdaemon/syncdutils.py438
-rw-r--r--gf-error-codes.h.template33
-rw-r--r--glusterfs-api.pc.in12
-rw-r--r--glusterfs-guts/src/Makefile.am17
-rw-r--r--glusterfs-guts/src/fuse-bridge.c2724
-rw-r--r--glusterfs-guts/src/fuse-extra.c137
-rw-r--r--glusterfs-guts/src/fuse-extra.h38
-rw-r--r--glusterfs-guts/src/glusterfs-fuse.h58
-rw-r--r--glusterfs-guts/src/glusterfs-guts.c400
-rw-r--r--glusterfs-guts/src/glusterfs-guts.h62
-rw-r--r--glusterfs-guts/src/guts-extra.c18
-rw-r--r--glusterfs-guts/src/guts-lowlevel.h86
-rw-r--r--glusterfs-guts/src/guts-parse.c217
-rw-r--r--glusterfs-guts/src/guts-parse.h140
-rw-r--r--glusterfs-guts/src/guts-replay.c834
-rw-r--r--glusterfs-guts/src/guts-replay.h33
-rw-r--r--glusterfs-guts/src/guts-tables.c248
-rw-r--r--glusterfs-guts/src/guts-tables.h80
-rw-r--r--glusterfs-guts/src/guts-trace.c650
-rw-r--r--glusterfs-guts/src/guts-trace.h54
-rw-r--r--glusterfs-hadoop/0.20.2/conf/core-site.xml38
-rw-r--r--glusterfs-hadoop/0.20.2/pom.xml36
-rw-r--r--glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFSBrickClass.java109
-rw-r--r--glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFSBrickRepl.java52
-rw-r--r--glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFSXattr.java472
-rw-r--r--glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFUSEInputStream.java205
-rw-r--r--glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFUSEOutputStream.java86
-rw-r--r--glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFileSystem.java492
-rw-r--r--glusterfs-hadoop/0.20.2/src/test/java/org/apache/hadoop/fs/glusterfs/AppTest.java38
-rwxr-xr-xglusterfs-hadoop/0.20.2/tools/build-deploy-jar.py212
-rw-r--r--glusterfs-hadoop/COPYING202
-rw-r--r--glusterfs-hadoop/README182
-rw-r--r--glusterfs.spec.in1194
-rw-r--r--glusterfsd/src/Makefile.am23
-rw-r--r--glusterfsd/src/fetch-spec.c296
-rw-r--r--glusterfsd/src/glusterfsd-mem-types.h27
-rw-r--r--glusterfsd/src/glusterfsd-mgmt.c2105
-rw-r--r--glusterfsd/src/glusterfsd.c2867
-rw-r--r--glusterfsd/src/glusterfsd.h128
-rw-r--r--libgfchangelog.pc.in11
-rw-r--r--libglusterfs/src/Makefile.am65
-rw-r--r--libglusterfs/src/authenticate.c248
-rw-r--r--libglusterfs/src/authenticate.h61
-rw-r--r--libglusterfs/src/byte-order.h195
-rw-r--r--libglusterfs/src/call-stub.c5302
-rw-r--r--libglusterfs/src/call-stub.h1174
-rw-r--r--libglusterfs/src/checksum.c65
-rw-r--r--libglusterfs/src/checksum.h20
-rw-r--r--libglusterfs/src/circ-buff.c202
-rw-r--r--libglusterfs/src/circ-buff.h64
-rw-r--r--libglusterfs/src/client_t.c890
-rw-r--r--libglusterfs/src/client_t.h135
-rw-r--r--libglusterfs/src/common-utils.c3676
-rw-r--r--libglusterfs/src/common-utils.h401
-rw-r--r--libglusterfs/src/compat-errno.c1596
-rw-r--r--libglusterfs/src/compat-errno.h33
-rw-r--r--libglusterfs/src/compat.c835
-rw-r--r--libglusterfs/src/compat.h202
-rw-r--r--libglusterfs/src/ctx.c48
-rw-r--r--libglusterfs/src/daemon.c66
-rw-r--r--libglusterfs/src/daemon.h23
-rw-r--r--libglusterfs/src/defaults.c2130
-rw-r--r--libglusterfs/src/defaults.h808
-rw-r--r--libglusterfs/src/dict.c3357
-rw-r--r--libglusterfs/src/dict.h158
-rw-r--r--libglusterfs/src/event-epoll.c463
-rw-r--r--libglusterfs/src/event-history.c83
-rw-r--r--libglusterfs/src/event-history.h44
-rw-r--r--libglusterfs/src/event-poll.c451
-rw-r--r--libglusterfs/src/event.c967
-rw-r--r--libglusterfs/src/event.h78
-rw-r--r--libglusterfs/src/fd-lk.c490
-rw-r--r--libglusterfs/src/fd-lk.h70
-rw-r--r--libglusterfs/src/fd.c1103
-rw-r--r--libglusterfs/src/fd.h110
-rw-r--r--libglusterfs/src/gf-dirent.c174
-rw-r--r--libglusterfs/src/gf-dirent.h35
-rw-r--r--libglusterfs/src/gidcache.c192
-rw-r--r--libglusterfs/src/gidcache.h53
-rw-r--r--libglusterfs/src/globals.c306
-rw-r--r--libglusterfs/src/globals.h68
-rw-r--r--libglusterfs/src/glusterfs-acl.h81
-rw-r--r--libglusterfs/src/glusterfs.h493
-rw-r--r--libglusterfs/src/graph-print.c200
-rw-r--r--libglusterfs/src/graph-utils.h20
-rw-r--r--libglusterfs/src/graph.c751
-rw-r--r--libglusterfs/src/graph.l79
-rw-r--r--libglusterfs/src/graph.y620
-rw-r--r--libglusterfs/src/hashfn.c265
-rw-r--r--libglusterfs/src/hashfn.h24
-rw-r--r--libglusterfs/src/iatt.h331
-rw-r--r--libglusterfs/src/inode.c1776
-rw-r--r--libglusterfs/src/inode.h219
-rw-r--r--libglusterfs/src/iobuf.c765
-rw-r--r--libglusterfs/src/iobuf.h85
-rw-r--r--libglusterfs/src/latency.c189
-rw-r--r--libglusterfs/src/latency.h28
-rw-r--r--libglusterfs/src/list.h96
-rw-r--r--libglusterfs/src/lkowner.h83
-rw-r--r--libglusterfs/src/locking.h23
-rw-r--r--libglusterfs/src/logging.c1317
-rw-r--r--libglusterfs/src/logging.h220
-rw-r--r--libglusterfs/src/mem-pool.c574
-rw-r--r--libglusterfs/src/mem-pool.h205
-rw-r--r--libglusterfs/src/mem-types.h125
-rw-r--r--libglusterfs/src/options.c1127
-rw-r--r--libglusterfs/src/options.h259
-rw-r--r--libglusterfs/src/protocol.h945
-rw-r--r--libglusterfs/src/rbthash.c458
-rw-r--r--libglusterfs/src/rbthash.h77
-rw-r--r--libglusterfs/src/revision.h2
-rw-r--r--libglusterfs/src/run.c513
-rw-r--r--libglusterfs/src/run.h194
-rw-r--r--libglusterfs/src/scheduler.c85
-rw-r--r--libglusterfs/src/scheduler.h40
-rw-r--r--libglusterfs/src/spec.l94
-rw-r--r--libglusterfs/src/spec.y606
-rw-r--r--libglusterfs/src/stack.c405
-rw-r--r--libglusterfs/src/stack.h551
-rw-r--r--libglusterfs/src/statedump.c854
-rw-r--r--libglusterfs/src/statedump.h98
-rw-r--r--libglusterfs/src/store.c709
-rw-r--r--libglusterfs/src/store.h112
-rw-r--r--libglusterfs/src/syncop.c2237
-rw-r--r--libglusterfs/src/syncop.h412
-rw-r--r--libglusterfs/src/syscall.c191
-rw-r--r--libglusterfs/src/syscall.h84
-rw-r--r--libglusterfs/src/timer.c107
-rw-r--r--libglusterfs/src/timer.h49
-rw-r--r--libglusterfs/src/timespec.c68
-rw-r--r--libglusterfs/src/timespec.h24
-rw-r--r--libglusterfs/src/transport.c441
-rw-r--r--libglusterfs/src/transport.h106
-rw-r--r--libglusterfs/src/trie.c387
-rw-r--r--libglusterfs/src/trie.h51
-rw-r--r--libglusterfs/src/xlator.c1477
-rw-r--r--libglusterfs/src/xlator.h1389
-rw-r--r--libglusterfsclient/src/Makefile.am16
-rw-r--r--libglusterfsclient/src/libglusterfsclient-dentry.c389
-rwxr-xr-xlibglusterfsclient/src/libglusterfsclient-internals.h298
-rwxr-xr-xlibglusterfsclient/src/libglusterfsclient.c7677
-rwxr-xr-xlibglusterfsclient/src/libglusterfsclient.h1327
-rw-r--r--mod_glusterfs/Makefile.am3
-rw-r--r--mod_glusterfs/apache/1.3/src/Makefile.am30
-rw-r--r--mod_glusterfs/apache/1.3/src/README.txt107
-rw-r--r--mod_glusterfs/apache/1.3/src/mod_glusterfs.c507
-rw-r--r--mod_glusterfs/apache/2.2/src/Makefile.am31
-rw-r--r--mod_glusterfs/apache/2.2/src/README.txt105
-rw-r--r--mod_glusterfs/apache/2.2/src/mod_glusterfs.c3627
-rw-r--r--mod_glusterfs/apache/Makefile.am10
-rw-r--r--mod_glusterfs/lighttpd/1.4/Makefile.am3
-rw-r--r--mod_glusterfs/lighttpd/1.4/Makefile.am.diff29
-rw-r--r--mod_glusterfs/lighttpd/1.4/README.txt57
-rw-r--r--mod_glusterfs/lighttpd/1.4/mod_glusterfs.c1820
-rw-r--r--mod_glusterfs/lighttpd/1.4/mod_glusterfs.h32
-rw-r--r--mod_glusterfs/lighttpd/1.5/Makefile.am3
-rw-r--r--mod_glusterfs/lighttpd/1.5/Makefile.am.diff29
-rw-r--r--mod_glusterfs/lighttpd/1.5/README.txt57
-rw-r--r--mod_glusterfs/lighttpd/1.5/mod_glusterfs.c1476
-rw-r--r--mod_glusterfs/lighttpd/1.5/mod_glusterfs.h29
-rw-r--r--mod_glusterfs/lighttpd/Makefile.am3
-rwxr-xr-xrfc.sh114
-rw-r--r--rpc/Makefile.am1
-rw-r--r--rpc/rpc-lib/Makefile.am (renamed from xlators/performance/stat-prefetch/Makefile.am)0
-rw-r--r--rpc/rpc-lib/src/Makefile.am19
-rw-r--r--rpc/rpc-lib/src/auth-glusterfs.c276
-rw-r--r--rpc/rpc-lib/src/auth-null.c61
-rw-r--r--rpc/rpc-lib/src/auth-unix.c83
-rw-r--r--rpc/rpc-lib/src/protocol-common.h264
-rw-r--r--rpc/rpc-lib/src/rpc-clnt.c1700
-rw-r--r--rpc/rpc-lib/src/rpc-clnt.h246
-rw-r--r--rpc/rpc-lib/src/rpc-drc.c872
-rw-r--r--rpc/rpc-lib/src/rpc-drc.h104
-rw-r--r--rpc/rpc-lib/src/rpc-transport.c679
-rw-r--r--rpc/rpc-lib/src/rpc-transport.h314
-rw-r--r--rpc/rpc-lib/src/rpcsvc-auth.c486
-rw-r--r--rpc/rpc-lib/src/rpcsvc-common.h126
-rw-r--r--rpc/rpc-lib/src/rpcsvc.c2480
-rw-r--r--rpc/rpc-lib/src/rpcsvc.h606
-rw-r--r--rpc/rpc-lib/src/xdr-common.h79
-rw-r--r--rpc/rpc-lib/src/xdr-rpc.c212
-rw-r--r--rpc/rpc-lib/src/xdr-rpc.h81
-rw-r--r--rpc/rpc-lib/src/xdr-rpcclnt.c117
-rw-r--r--rpc/rpc-lib/src/xdr-rpcclnt.h42
-rw-r--r--rpc/rpc-transport/Makefile.am1
-rw-r--r--rpc/rpc-transport/rdma/Makefile.am (renamed from glusterfs-guts/Makefile.am)0
-rw-r--r--rpc/rpc-transport/rdma/src/Makefile.am22
-rw-r--r--rpc/rpc-transport/rdma/src/name.c (renamed from transport/ib-verbs/src/name.c)390
-rw-r--r--rpc/rpc-transport/rdma/src/name.h36
-rw-r--r--rpc/rpc-transport/rdma/src/rdma.c4575
-rw-r--r--rpc/rpc-transport/rdma/src/rdma.h382
-rw-r--r--rpc/rpc-transport/socket/Makefile.am (renamed from transport/ib-verbs/Makefile.am)0
-rw-r--r--rpc/rpc-transport/socket/src/Makefile.am17
-rw-r--r--rpc/rpc-transport/socket/src/name.c (renamed from transport/socket/src/name.c)452
-rw-r--r--rpc/rpc-transport/socket/src/name.h35
-rw-r--r--rpc/rpc-transport/socket/src/socket.c3744
-rw-r--r--rpc/rpc-transport/socket/src/socket.h239
-rw-r--r--rpc/xdr/Makefile.am1
-rw-r--r--rpc/xdr/src/Makefile.am25
-rw-r--r--rpc/xdr/src/acl.x48
-rw-r--r--rpc/xdr/src/acl3-xdr.c94
-rw-r--r--rpc/xdr/src/acl3-xdr.h107
-rw-r--r--rpc/xdr/src/cli1-xdr.c365
-rw-r--r--rpc/xdr/src/cli1-xdr.h369
-rw-r--r--rpc/xdr/src/cli1-xdr.x207
-rw-r--r--rpc/xdr/src/glusterd1-xdr.c923
-rw-r--r--rpc/xdr/src/glusterd1-xdr.h418
-rw-r--r--rpc/xdr/src/glusterd1-xdr.x218
-rw-r--r--rpc/xdr/src/glusterfs3-xdr.c2058
-rw-r--r--rpc/xdr/src/glusterfs3-xdr.h1394
-rw-r--r--rpc/xdr/src/glusterfs3-xdr.x756
-rw-r--r--rpc/xdr/src/glusterfs3.h270
-rw-r--r--rpc/xdr/src/mount3udp.x25
-rw-r--r--rpc/xdr/src/msg-nfs3.c572
-rw-r--r--rpc/xdr/src/msg-nfs3.h224
-rw-r--r--rpc/xdr/src/nlm4-xdr.c245
-rw-r--r--rpc/xdr/src/nlm4-xdr.h258
-rw-r--r--rpc/xdr/src/nlm4.x154
-rw-r--r--rpc/xdr/src/nlmcbk-xdr.c28
-rw-r--r--rpc/xdr/src/nlmcbk-xdr.h65
-rw-r--r--rpc/xdr/src/nlmcbk.x24
-rw-r--r--rpc/xdr/src/nsm-xdr.c96
-rw-r--r--rpc/xdr/src/nsm-xdr.h95
-rw-r--r--rpc/xdr/src/nsm.x47
-rw-r--r--rpc/xdr/src/portmap-xdr.c237
-rw-r--r--rpc/xdr/src/portmap-xdr.h130
-rw-r--r--rpc/xdr/src/portmap-xdr.x55
-rw-r--r--rpc/xdr/src/rpc-common-xdr.c223
-rw-r--r--rpc/xdr/src/rpc-common-xdr.h104
-rw-r--r--rpc/xdr/src/rpc-common-xdr.x39
-rw-r--r--rpc/xdr/src/xdr-generic.c125
-rw-r--r--rpc/xdr/src/xdr-generic.h48
-rw-r--r--rpc/xdr/src/xdr-nfs3.c1888
-rw-r--r--rpc/xdr/src/xdr-nfs3.h1199
-rwxr-xr-xrun-tests.sh30
-rw-r--r--scheduler/Makefile.am3
-rw-r--r--scheduler/alu/src/Makefile.am14
-rw-r--r--scheduler/alu/src/alu.c993
-rw-r--r--scheduler/alu/src/alu.h89
-rw-r--r--scheduler/nufa/src/Makefile.am12
-rw-r--r--scheduler/nufa/src/nufa.c405
-rw-r--r--scheduler/random/src/Makefile.am14
-rw-r--r--scheduler/random/src/random.c283
-rw-r--r--scheduler/random/src/random.h46
-rw-r--r--scheduler/rr/src/Makefile.am13
-rw-r--r--scheduler/rr/src/rr-options.c256
-rw-r--r--scheduler/rr/src/rr-options.h34
-rw-r--r--scheduler/rr/src/rr.c565
-rw-r--r--scheduler/rr/src/rr.h70
-rw-r--r--scheduler/switch/Makefile.am3
-rw-r--r--scheduler/switch/src/Makefile.am12
-rw-r--r--scheduler/switch/src/switch.c429
-rw-r--r--tests/README.md27
-rw-r--r--tests/afr.rc15
-rwxr-xr-xtests/basic/bd.t131
-rwxr-xr-xtests/basic/cdc.t135
-rwxr-xr-xtests/basic/file-snapshot.t56
-rwxr-xr-xtests/basic/mgmt_v3-locks.t121
-rwxr-xr-xtests/basic/mount.t78
-rw-r--r--tests/basic/nufa.t32
-rwxr-xr-xtests/basic/posixonly.t30
-rw-r--r--tests/basic/pump.t44
-rwxr-xr-xtests/basic/quota.t51
-rwxr-xr-xtests/basic/rpm.t109
-rw-r--r--tests/basic/self-heald.t48
-rwxr-xr-xtests/basic/volume-snapshot.t83
-rw-r--r--tests/basic/volume-status.t66
-rwxr-xr-xtests/basic/volume.t34
-rwxr-xr-xtests/bugs/859927/repl.t69
-rw-r--r--tests/bugs/886998/strict-readdir.t52
-rw-r--r--tests/bugs/949327.t23
-rwxr-xr-xtests/bugs/bug-000000.t9
-rw-r--r--tests/bugs/bug-1002207.t54
-rwxr-xr-xtests/bugs/bug-1002556.t25
-rw-r--r--tests/bugs/bug-1004218.t26
-rw-r--r--tests/bugs/bug-1004744.t48
-rwxr-xr-xtests/bugs/bug-1015990-rep.t81
-rwxr-xr-xtests/bugs/bug-1015990.t95
-rwxr-xr-xtests/bugs/bug-1022055.t26
-rw-r--r--tests/bugs/bug-1022905.t39
-rw-r--r--tests/bugs/bug-1030208.t35
-rw-r--r--tests/bugs/bug-1040934.t37
-rw-r--r--tests/bugs/bug-1045333.t48
-rwxr-xr-xtests/bugs/bug-1049834.t40
-rwxr-xr-xtests/bugs/bug-1064768.t20
-rwxr-xr-xtests/bugs/bug-762989.t32
-rw-r--r--tests/bugs/bug-764638.t13
-rwxr-xr-xtests/bugs/bug-765230.t60
-rw-r--r--tests/bugs/bug-765380.t39
-rwxr-xr-xtests/bugs/bug-765473.t33
-rw-r--r--tests/bugs/bug-765564.t83
-rwxr-xr-xtests/bugs/bug-767095.t51
-rwxr-xr-xtests/bugs/bug-767585-gfid.t43
-rwxr-xr-xtests/bugs/bug-770655.t168
-rwxr-xr-xtests/bugs/bug-782095.t48
-rwxr-xr-xtests/bugs/bug-797171.t43
-rwxr-xr-xtests/bugs/bug-802417.t108
-rwxr-xr-xtests/bugs/bug-808400-dist.t31
-rw-r--r--tests/bugs/bug-808400-fcntl.c113
-rw-r--r--tests/bugs/bug-808400-flock.c92
-rwxr-xr-xtests/bugs/bug-808400-repl.t30
-rwxr-xr-xtests/bugs/bug-808400-stripe.t31
-rwxr-xr-xtests/bugs/bug-808400.t34
-rwxr-xr-xtests/bugs/bug-811493.t18
-rw-r--r--tests/bugs/bug-821056.t52
-rwxr-xr-xtests/bugs/bug-822830.t44
-rwxr-xr-xtests/bugs/bug-823081.t40
-rw-r--r--tests/bugs/bug-824753-file-locker.c42
-rwxr-xr-xtests/bugs/bug-824753.t45
-rwxr-xr-xtests/bugs/bug-830665.t106
-rw-r--r--tests/bugs/bug-834465.c61
-rwxr-xr-xtests/bugs/bug-834465.t44
-rw-r--r--tests/bugs/bug-839595.t31
-rwxr-xr-xtests/bugs/bug-844688.t37
-rw-r--r--tests/bugs/bug-845213.t19
-rw-r--r--tests/bugs/bug-846240.t58
-rwxr-xr-xtests/bugs/bug-847622.t25
-rwxr-xr-xtests/bugs/bug-847624.t23
-rw-r--r--tests/bugs/bug-848251.t50
-rwxr-xr-xtests/bugs/bug-852147.t85
-rwxr-xr-xtests/bugs/bug-853258.t45
-rwxr-xr-xtests/bugs/bug-853680.t52
-rwxr-xr-xtests/bugs/bug-853690.t94
-rw-r--r--tests/bugs/bug-856455.t42
-rw-r--r--tests/bugs/bug-857330/common.rc55
-rwxr-xr-xtests/bugs/bug-857330/normal.t78
-rwxr-xr-xtests/bugs/bug-857330/xml.t101
-rwxr-xr-xtests/bugs/bug-858215.t81
-rw-r--r--tests/bugs/bug-858242.c77
-rwxr-xr-xtests/bugs/bug-858242.t28
-rw-r--r--tests/bugs/bug-858488-min-free-disk.t114
-rwxr-xr-xtests/bugs/bug-859927.t70
-rw-r--r--tests/bugs/bug-860297.t13
-rw-r--r--tests/bugs/bug-860663.t51
-rw-r--r--tests/bugs/bug-861015-index.t36
-rw-r--r--tests/bugs/bug-861015-log.t29
-rwxr-xr-xtests/bugs/bug-861542.t51
-rwxr-xr-xtests/bugs/bug-862834.t46
-rw-r--r--tests/bugs/bug-862967.t59
-rw-r--r--tests/bugs/bug-863068.t76
-rwxr-xr-xtests/bugs/bug-864222.t26
-rwxr-xr-xtests/bugs/bug-865825.t76
-rw-r--r--tests/bugs/bug-866459.t44
-rw-r--r--tests/bugs/bug-867252.t41
-rw-r--r--tests/bugs/bug-867253.t59
-rw-r--r--tests/bugs/bug-869724.t37
-rwxr-xr-xtests/bugs/bug-872923.t57
-rwxr-xr-xtests/bugs/bug-873367.t41
-rw-r--r--tests/bugs/bug-873549.t17
-rw-r--r--tests/bugs/bug-873962-spb.t39
-rwxr-xr-xtests/bugs/bug-873962.t108
-rw-r--r--tests/bugs/bug-874498.t61
-rwxr-xr-xtests/bugs/bug-877293.t41
-rwxr-xr-xtests/bugs/bug-877885.t35
-rwxr-xr-xtests/bugs/bug-877992.t61
-rw-r--r--tests/bugs/bug-878004.t29
-rwxr-xr-xtests/bugs/bug-879490.t37
-rwxr-xr-xtests/bugs/bug-879494.t37
-rw-r--r--tests/bugs/bug-880898.t23
-rwxr-xr-xtests/bugs/bug-882278.t72
-rw-r--r--tests/bugs/bug-884328.t12
-rw-r--r--tests/bugs/bug-884452.t46
-rwxr-xr-xtests/bugs/bug-884455.t84
-rwxr-xr-xtests/bugs/bug-884597.t152
-rw-r--r--tests/bugs/bug-886998.t52
-rw-r--r--tests/bugs/bug-887098-gmount-crash.t48
-rwxr-xr-xtests/bugs/bug-887145.t89
-rw-r--r--tests/bugs/bug-888174.t65
-rw-r--r--tests/bugs/bug-888752.t24
-rwxr-xr-xtests/bugs/bug-889630.t56
-rw-r--r--tests/bugs/bug-889996.t19
-rwxr-xr-xtests/bugs/bug-892730.t76
-rw-r--r--tests/bugs/bug-893338.t34
-rwxr-xr-xtests/bugs/bug-893378.t73
-rw-r--r--tests/bugs/bug-895235.t23
-rwxr-xr-xtests/bugs/bug-896431.t124
-rwxr-xr-xtests/bugs/bug-902610.t59
-rw-r--r--tests/bugs/bug-903336.t13
-rwxr-xr-xtests/bugs/bug-904065.t90
-rwxr-xr-xtests/bugs/bug-904300.t61
-rw-r--r--tests/bugs/bug-905307.t36
-rw-r--r--tests/bugs/bug-905864.c82
-rw-r--r--tests/bugs/bug-905864.t32
-rw-r--r--tests/bugs/bug-906646.t93
-rwxr-xr-xtests/bugs/bug-907072.t46
-rwxr-xr-xtests/bugs/bug-908146.t39
-rwxr-xr-xtests/bugs/bug-912297.t44
-rwxr-xr-xtests/bugs/bug-912564.t92
-rw-r--r--tests/bugs/bug-913051.t65
-rw-r--r--tests/bugs/bug-913487.t14
-rw-r--r--tests/bugs/bug-913544.t24
-rwxr-xr-xtests/bugs/bug-913555.t54
-rwxr-xr-xtests/bugs/bug-915280.t51
-rwxr-xr-xtests/bugs/bug-915554.t75
-rw-r--r--tests/bugs/bug-916226.t26
-rwxr-xr-xtests/bugs/bug-916549.t19
-rw-r--r--tests/bugs/bug-918437-sh-mtime.t52
-rwxr-xr-xtests/bugs/bug-921072.t118
-rw-r--r--tests/bugs/bug-921231.t31
-rwxr-xr-xtests/bugs/bug-921408.t89
-rwxr-xr-xtests/bugs/bug-924075.t23
-rwxr-xr-xtests/bugs/bug-924265.t35
-rwxr-xr-xtests/bugs/bug-927616.t61
-rw-r--r--tests/bugs/bug-948686.t46
-rw-r--r--tests/bugs/bug-948729/bug-948729-force.t84
-rw-r--r--tests/bugs/bug-948729/bug-948729-mode-script.t85
-rw-r--r--tests/bugs/bug-948729/bug-948729.t67
-rw-r--r--tests/bugs/bug-949242.t54
-rw-r--r--tests/bugs/bug-949298.t12
-rw-r--r--tests/bugs/bug-949930.t27
-rwxr-xr-xtests/bugs/bug-955588.t27
-rw-r--r--tests/bugs/bug-957877.t31
-rw-r--r--tests/bugs/bug-958691.t50
-rw-r--r--tests/bugs/bug-958790.t21
-rw-r--r--tests/bugs/bug-961307.t32
-rw-r--r--tests/bugs/bug-961615.t34
-rw-r--r--tests/bugs/bug-961669.t48
-rwxr-xr-xtests/bugs/bug-963541.t33
-rw-r--r--tests/bugs/bug-963678.t56
-rwxr-xr-xtests/bugs/bug-964059.t30
-rw-r--r--tests/bugs/bug-966018.t34
-rwxr-xr-xtests/bugs/bug-969193.t13
-rwxr-xr-xtests/bugs/bug-970070.t14
-rwxr-xr-xtests/bugs/bug-973073.t48
-rw-r--r--tests/bugs/bug-974007.t52
-rwxr-xr-xtests/bugs/bug-974972.t36
-rw-r--r--tests/bugs/bug-976800.t28
-rw-r--r--tests/bugs/bug-977246.t21
-rwxr-xr-xtests/bugs/bug-977797.t114
-rw-r--r--tests/bugs/bug-978794.t29
-rwxr-xr-xtests/bugs/bug-979365.t47
-rw-r--r--tests/bugs/bug-982174.t36
-rwxr-xr-xtests/bugs/bug-983477.t52
-rw-r--r--tests/bugs/bug-985074.t55
-rw-r--r--tests/bugs/bug-986429.t19
-rwxr-xr-xtests/bugs/bug-986905.t27
-rw-r--r--tests/bugs/bug-991622.t35
-rw-r--r--tests/bugs/getlk_owner.c120
-rwxr-xr-xtests/bugs/overlap.py59
-rwxr-xr-xtests/cluster.rc112
-rw-r--r--tests/dht.rc79
-rw-r--r--tests/fallocate.rc19
-rwxr-xr-xtests/features/glupy.t29
-rwxr-xr-xtests/features/readdir-ahead.t44
-rw-r--r--tests/fileio.rc61
-rw-r--r--tests/include.rc248
-rw-r--r--tests/nfs.rc21
-rwxr-xr-xtests/performance/open-behind.t63
-rw-r--r--tests/performance/quick-read.t55
-rwxr-xr-xtests/snapshot.rc251
-rwxr-xr-xtests/utils/create-files.py207
-rw-r--r--tests/volume.rc325
-rw-r--r--transport/Makefile.am3
-rw-r--r--transport/ib-verbs/src/Makefile.am15
-rw-r--r--transport/ib-verbs/src/ib-verbs.c2406
-rw-r--r--transport/ib-verbs/src/ib-verbs.h218
-rw-r--r--transport/ib-verbs/src/name.h47
-rw-r--r--transport/socket/Makefile.am1
-rw-r--r--transport/socket/src/Makefile.am14
-rw-r--r--transport/socket/src/name.h44
-rw-r--r--transport/socket/src/socket.c1447
-rw-r--r--transport/socket/src/socket.h122
-rw-r--r--xlators/Makefile.am5
-rw-r--r--xlators/bindings/python/src/Makefile.am2
-rw-r--r--xlators/bindings/python/src/gluster.py23
-rw-r--r--xlators/bindings/python/src/glusterstack.py23
-rw-r--r--xlators/bindings/python/src/glustertypes.py23
-rw-r--r--xlators/bindings/python/src/python.c33
-rw-r--r--xlators/bindings/python/src/testxlator.py25
-rw-r--r--xlators/cluster/Makefile.am2
-rw-r--r--xlators/cluster/afr/src/Makefile.am33
-rw-r--r--xlators/cluster/afr/src/afr-common.c4591
-rw-r--r--xlators/cluster/afr/src/afr-dir-read.c669
-rw-r--r--xlators/cluster/afr/src/afr-dir-read.h37
-rw-r--r--xlators/cluster/afr/src/afr-dir-write.c2896
-rw-r--r--xlators/cluster/afr/src/afr-dir-write.h46
-rw-r--r--xlators/cluster/afr/src/afr-inode-read.c2147
-rw-r--r--xlators/cluster/afr/src/afr-inode-read.h39
-rw-r--r--xlators/cluster/afr/src/afr-inode-write.c3700
-rw-r--r--xlators/cluster/afr/src/afr-inode-write.h72
-rw-r--r--xlators/cluster/afr/src/afr-lk-common.c2174
-rw-r--r--xlators/cluster/afr/src/afr-mem-types.h51
-rw-r--r--xlators/cluster/afr/src/afr-open.c382
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-algorithm.c837
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-algorithm.h32
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c3270
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.h142
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-data.c2242
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-entry.c3634
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-metadata.c1238
-rw-r--r--xlators/cluster/afr/src/afr-self-heal.h51
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.c1787
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.h65
-rw-r--r--xlators/cluster/afr/src/afr-transaction.c2555
-rw-r--r--xlators/cluster/afr/src/afr-transaction.h53
-rw-r--r--xlators/cluster/afr/src/afr.c3176
-rw-r--r--xlators/cluster/afr/src/afr.h1450
-rw-r--r--xlators/cluster/afr/src/pump.c2641
-rw-r--r--xlators/cluster/afr/src/pump.h78
-rw-r--r--xlators/cluster/dht/src/Makefile.am32
-rw-r--r--xlators/cluster/dht/src/dht-common.c6421
-rw-r--r--xlators/cluster/dht/src/dht-common.h857
-rw-r--r--xlators/cluster/dht/src/dht-diskusage.c503
-rw-r--r--xlators/cluster/dht/src/dht-hashfn.c147
-rw-r--r--xlators/cluster/dht/src/dht-helper.c1150
-rw-r--r--xlators/cluster/dht/src/dht-inode-read.c1139
-rw-r--r--xlators/cluster/dht/src/dht-inode-write.c1013
-rw-r--r--xlators/cluster/dht/src/dht-layout.c1041
-rw-r--r--xlators/cluster/dht/src/dht-linkfile.c428
-rw-r--r--xlators/cluster/dht/src/dht-mem-types.h35
-rw-r--r--xlators/cluster/dht/src/dht-rebalance.c1815
-rw-r--r--xlators/cluster/dht/src/dht-rename.c1128
-rw-r--r--xlators/cluster/dht/src/dht-selfheal.c1239
-rw-r--r--xlators/cluster/dht/src/dht-shared.c758
-rw-r--r--xlators/cluster/dht/src/dht.c462
-rw-r--r--xlators/cluster/dht/src/nufa.c1000
-rw-r--r--xlators/cluster/dht/src/switch.c904
-rw-r--r--xlators/cluster/ha/src/Makefile.am7
-rw-r--r--xlators/cluster/ha/src/ha-helpers.c43
-rw-r--r--xlators/cluster/ha/src/ha-mem-types.h26
-rw-r--r--xlators/cluster/ha/src/ha.c1268
-rw-r--r--xlators/cluster/ha/src/ha.h30
-rw-r--r--xlators/cluster/map/src/Makefile.am7
-rw-r--r--xlators/cluster/map/src/map-helper.c29
-rw-r--r--xlators/cluster/map/src/map-mem-types.h24
-rw-r--r--xlators/cluster/map/src/map.c541
-rw-r--r--xlators/cluster/map/src/map.h25
-rw-r--r--xlators/cluster/stripe/src/Makefile.am14
-rw-r--r--xlators/cluster/stripe/src/stripe-helpers.c675
-rw-r--r--xlators/cluster/stripe/src/stripe-mem-types.h31
-rw-r--r--xlators/cluster/stripe/src/stripe.c6099
-rw-r--r--xlators/cluster/stripe/src/stripe.h212
-rw-r--r--xlators/cluster/unify/Makefile.am3
-rw-r--r--xlators/cluster/unify/src/Makefile.am16
-rw-r--r--xlators/cluster/unify/src/unify-self-heal.c1225
-rw-r--r--xlators/cluster/unify/src/unify.c4506
-rw-r--r--xlators/cluster/unify/src/unify.h132
-rw-r--r--xlators/debug/error-gen/src/Makefile.am9
-rw-r--r--xlators/debug/error-gen/src/error-gen-mem-types.h20
-rw-r--r--xlators/debug/error-gen/src/error-gen.c2954
-rw-r--r--xlators/debug/error-gen/src/error-gen.h48
-rw-r--r--xlators/debug/io-stats/src/Makefile.am9
-rw-r--r--xlators/debug/io-stats/src/io-stats-mem-types.h24
-rw-r--r--xlators/debug/io-stats/src/io-stats.c3359
-rw-r--r--xlators/debug/trace/src/Makefile.am8
-rw-r--r--xlators/debug/trace/src/trace-mem-types.h21
-rw-r--r--xlators/debug/trace/src/trace.c5398
-rw-r--r--xlators/debug/trace/src/trace.h98
-rw-r--r--xlators/encryption/Makefile.am2
-rw-r--r--xlators/encryption/crypt/Makefile.am (renamed from auth/addr/Makefile.am)0
-rw-r--r--xlators/encryption/crypt/src/Makefile.am24
-rw-r--r--xlators/encryption/crypt/src/atom.c962
-rw-r--r--xlators/encryption/crypt/src/crypt-common.h141
-rw-r--r--xlators/encryption/crypt/src/crypt-mem-types.h43
-rw-r--r--xlators/encryption/crypt/src/crypt.c4498
-rw-r--r--xlators/encryption/crypt/src/crypt.h899
-rw-r--r--xlators/encryption/crypt/src/data.c769
-rw-r--r--xlators/encryption/crypt/src/keys.c302
-rw-r--r--xlators/encryption/crypt/src/metadata.c605
-rw-r--r--xlators/encryption/crypt/src/metadata.h74
-rw-r--r--xlators/encryption/rot-13/src/Makefile.am7
-rw-r--r--xlators/encryption/rot-13/src/rot-13.c94
-rw-r--r--xlators/encryption/rot-13/src/rot-13.h20
-rw-r--r--xlators/features/Makefile.am5
-rw-r--r--xlators/features/changelog/Makefile.am3
-rw-r--r--xlators/features/changelog/lib/Makefile.am (renamed from scheduler/nufa/Makefile.am)2
-rw-r--r--xlators/features/changelog/lib/examples/c/get-changes.c87
-rw-r--r--xlators/features/changelog/lib/examples/python/changes.py32
-rw-r--r--xlators/features/changelog/lib/examples/python/libgfchangelog.py64
-rw-r--r--xlators/features/changelog/lib/src/Makefile.am37
-rw-r--r--xlators/features/changelog/lib/src/changelog.h31
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-helpers.c180
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-helpers.h97
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-process.c571
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog.c515
-rw-r--r--xlators/features/changelog/src/Makefile.am19
-rw-r--r--xlators/features/changelog/src/changelog-encoders.c176
-rw-r--r--xlators/features/changelog/src/changelog-encoders.h46
-rw-r--r--xlators/features/changelog/src/changelog-helpers.c693
-rw-r--r--xlators/features/changelog/src/changelog-helpers.h395
-rw-r--r--xlators/features/changelog/src/changelog-mem-types.h29
-rw-r--r--xlators/features/changelog/src/changelog-misc.h101
-rw-r--r--xlators/features/changelog/src/changelog-notifier.c314
-rw-r--r--xlators/features/changelog/src/changelog-notifier.h19
-rw-r--r--xlators/features/changelog/src/changelog-rt.c72
-rw-r--r--xlators/features/changelog/src/changelog-rt.h33
-rw-r--r--xlators/features/changelog/src/changelog.c1477
-rw-r--r--xlators/features/compress/Makefile.am (renamed from scheduler/rr/Makefile.am)2
-rw-r--r--xlators/features/compress/src/Makefile.am17
-rw-r--r--xlators/features/compress/src/cdc-helper.c547
-rw-r--r--xlators/features/compress/src/cdc-mem-types.h22
-rw-r--r--xlators/features/compress/src/cdc.c342
-rw-r--r--xlators/features/compress/src/cdc.h107
-rw-r--r--xlators/features/filter/src/Makefile.am9
-rw-r--r--xlators/features/filter/src/filter-mem-types.h20
-rw-r--r--xlators/features/filter/src/filter.c521
-rw-r--r--xlators/features/gfid-access/Makefile.am1
-rw-r--r--xlators/features/gfid-access/src/Makefile.am15
-rw-r--r--xlators/features/gfid-access/src/gfid-access-mem-types.h23
-rw-r--r--xlators/features/gfid-access/src/gfid-access.c1172
-rw-r--r--xlators/features/gfid-access/src/gfid-access.h128
-rw-r--r--xlators/features/glupy/Makefile.am (renamed from scheduler/alu/Makefile.am)2
-rw-r--r--xlators/features/glupy/doc/README.md44
-rw-r--r--xlators/features/glupy/doc/TESTING9
-rw-r--r--xlators/features/glupy/doc/test.vol10
-rw-r--r--xlators/features/glupy/src/Makefile.am20
-rw-r--r--xlators/features/glupy/src/debug-trace.py774
-rw-r--r--xlators/features/glupy/src/glupy.c2470
-rw-r--r--xlators/features/glupy/src/glupy.h69
-rw-r--r--xlators/features/glupy/src/gluster.py841
-rw-r--r--xlators/features/glupy/src/helloworld.py19
-rw-r--r--xlators/features/glupy/src/negative.py92
-rw-r--r--xlators/features/index/Makefile.am3
-rw-r--r--xlators/features/index/src/Makefile.am17
-rw-r--r--xlators/features/index/src/index-mem-types.h22
-rw-r--r--xlators/features/index/src/index.c1489
-rw-r--r--xlators/features/index/src/index.h73
-rw-r--r--xlators/features/locks/src/Makefile.am17
-rw-r--r--xlators/features/locks/src/clear.c424
-rw-r--r--xlators/features/locks/src/clear.h76
-rw-r--r--xlators/features/locks/src/common.c1414
-rw-r--r--xlators/features/locks/src/common.h179
-rw-r--r--xlators/features/locks/src/entrylk.c848
-rw-r--r--xlators/features/locks/src/inodelk.c825
-rw-r--r--xlators/features/locks/src/internal.c896
-rw-r--r--xlators/features/locks/src/locks-mem-types.h29
-rw-r--r--xlators/features/locks/src/locks.h195
-rw-r--r--xlators/features/locks/src/posix.c3040
-rw-r--r--xlators/features/locks/src/reservelk.c443
-rw-r--r--xlators/features/locks/tests/unit-test.c22
-rw-r--r--xlators/features/mac-compat/Makefile.am (renamed from auth/login/Makefile.am)0
-rw-r--r--xlators/features/mac-compat/src/Makefile.am14
-rw-r--r--xlators/features/mac-compat/src/mac-compat.c237
-rw-r--r--xlators/features/marker/Makefile.am3
-rw-r--r--xlators/features/marker/src/Makefile.am17
-rw-r--r--xlators/features/marker/src/marker-common.c69
-rw-r--r--xlators/features/marker/src/marker-common.h27
-rw-r--r--xlators/features/marker/src/marker-mem-types.h25
-rw-r--r--xlators/features/marker/src/marker-quota-helper.c414
-rw-r--r--xlators/features/marker/src/marker-quota-helper.h76
-rw-r--r--xlators/features/marker/src/marker-quota.c2520
-rw-r--r--xlators/features/marker/src/marker-quota.h130
-rw-r--r--xlators/features/marker/src/marker.c2862
-rw-r--r--xlators/features/marker/src/marker.h138
-rw-r--r--xlators/features/path-convertor/src/Makefile.am7
-rw-r--r--xlators/features/path-convertor/src/path-mem-types.h22
-rw-r--r--xlators/features/path-convertor/src/path.c301
-rw-r--r--xlators/features/protect/Makefile.am (renamed from libglusterfsclient/Makefile.am)0
-rw-r--r--xlators/features/protect/src/Makefile.am21
-rw-r--r--xlators/features/protect/src/prot_client.c215
-rw-r--r--xlators/features/protect/src/prot_dht.c168
-rw-r--r--xlators/features/protect/src/prot_server.c51
-rw-r--r--xlators/features/qemu-block/Makefile.am1
-rw-r--r--xlators/features/qemu-block/src/Makefile.am155
-rw-r--r--xlators/features/qemu-block/src/bdrv-xlator.c397
-rw-r--r--xlators/features/qemu-block/src/bh-syncop.c48
-rw-r--r--xlators/features/qemu-block/src/clock-timer.c60
-rw-r--r--xlators/features/qemu-block/src/coroutine-synctask.c116
-rw-r--r--xlators/features/qemu-block/src/monitor-logging.c50
-rw-r--r--xlators/features/qemu-block/src/qb-coroutines.c662
-rw-r--r--xlators/features/qemu-block/src/qb-coroutines.h30
-rw-r--r--xlators/features/qemu-block/src/qemu-block-memory-types.h25
-rw-r--r--xlators/features/qemu-block/src/qemu-block.c1140
-rw-r--r--xlators/features/qemu-block/src/qemu-block.h109
-rw-r--r--xlators/features/quiesce/Makefile.am3
-rw-r--r--xlators/features/quiesce/src/Makefile.am15
-rw-r--r--xlators/features/quiesce/src/quiesce-mem-types.h20
-rw-r--r--xlators/features/quiesce/src/quiesce.c2610
-rw-r--r--xlators/features/quiesce/src/quiesce.h51
-rw-r--r--xlators/features/quota/src/Makefile.am12
-rw-r--r--xlators/features/quota/src/quota-mem-types.h27
-rw-r--r--xlators/features/quota/src/quota.c3906
-rw-r--r--xlators/features/quota/src/quota.h151
-rw-r--r--xlators/features/read-only/Makefile.am (renamed from mod_glusterfs/apache/1.3/Makefile.am)0
-rw-r--r--xlators/features/read-only/src/Makefile.am22
-rw-r--r--xlators/features/read-only/src/read-only-common.c239
-rw-r--r--xlators/features/read-only/src/read-only-common.h115
-rw-r--r--xlators/features/read-only/src/read-only.c77
-rw-r--r--xlators/features/read-only/src/worm.c89
-rw-r--r--xlators/features/trash/src/Makefile.am9
-rw-r--r--xlators/features/trash/src/trash-mem-types.h22
-rw-r--r--xlators/features/trash/src/trash.c1971
-rw-r--r--xlators/features/trash/src/trash.h79
-rw-r--r--xlators/lib/src/libxlator.c470
-rw-r--r--xlators/lib/src/libxlator.h153
-rw-r--r--xlators/meta/src/Makefile.am5
-rw-r--r--xlators/meta/src/meta-mem-types.h25
-rw-r--r--xlators/meta/src/meta.c88
-rw-r--r--xlators/meta/src/meta.h20
-rw-r--r--xlators/meta/src/misc.c20
-rw-r--r--xlators/meta/src/misc.h20
-rw-r--r--xlators/meta/src/tree.c43
-rw-r--r--xlators/meta/src/tree.h20
-rw-r--r--xlators/meta/src/view.c20
-rw-r--r--xlators/meta/src/view.h20
-rw-r--r--xlators/mgmt/Makefile.am3
-rw-r--r--xlators/mgmt/glusterd/Makefile.am (renamed from mod_glusterfs/apache/2.2/Makefile.am)0
-rw-r--r--xlators/mgmt/glusterd/src/Makefile.am50
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-brick-ops.c1953
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-geo-rep.c4236
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handler.c4237
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handshake.c1366
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-hooks.c531
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-hooks.h89
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-locks.c637
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-locks.h51
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-log-ops.c271
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mem-types.h75
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c924
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt.c1893
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt.h45
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mountbroker.c693
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mountbroker.h42
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.c6243
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.h299
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-pmap.c492
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-pmap.h54
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-quota.c839
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rebalance.c749
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-replace-brick.c2024
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rpc-ops.c1974
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-sm.c1109
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-sm.h217
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapshot.c5590
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.c3564
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.h164
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-syncop.c1639
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-syncop.h71
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c8894
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.h634
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.c4065
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.h176
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-ops.c2225
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-set.c1452
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.c1597
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.h952
-rw-r--r--xlators/mount/fuse/src/Makefile.am39
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.c5210
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.h537
-rw-r--r--xlators/mount/fuse/src/fuse-helpers.c605
-rw-r--r--xlators/mount/fuse/src/fuse-mem-types.h28
-rw-r--r--xlators/mount/fuse/src/fuse-resolve.c724
-rwxr-xr-xxlators/mount/fuse/utils/mount.glusterfs.in498
-rwxr-xr-xxlators/mount/fuse/utils/mount_glusterfs.in28
-rw-r--r--xlators/nfs/Makefile.am3
-rw-r--r--xlators/nfs/server/Makefile.am3
-rw-r--r--xlators/nfs/server/src/Makefile.am24
-rw-r--r--xlators/nfs/server/src/acl3.c708
-rw-r--r--xlators/nfs/server/src/acl3.h31
-rw-r--r--xlators/nfs/server/src/mount3.c2759
-rw-r--r--xlators/nfs/server/src/mount3.h131
-rw-r--r--xlators/nfs/server/src/mount3udp_svc.c189
-rw-r--r--xlators/nfs/server/src/nfs-common.c465
-rw-r--r--xlators/nfs/server/src/nfs-common.h81
-rw-r--r--xlators/nfs/server/src/nfs-fops.c1661
-rw-r--r--xlators/nfs/server/src/nfs-fops.h248
-rw-r--r--xlators/nfs/server/src/nfs-generics.c345
-rw-r--r--xlators/nfs/server/src/nfs-generics.h168
-rw-r--r--xlators/nfs/server/src/nfs-inodes.c608
-rw-r--r--xlators/nfs/server/src/nfs-inodes.h76
-rw-r--r--xlators/nfs/server/src/nfs-mem-types.h53
-rw-r--r--xlators/nfs/server/src/nfs.c1825
-rw-r--r--xlators/nfs/server/src/nfs.h135
-rw-r--r--xlators/nfs/server/src/nfs3-fh.c188
-rw-r--r--xlators/nfs/server/src/nfs3-fh.h103
-rw-r--r--xlators/nfs/server/src/nfs3-helpers.c3881
-rw-r--r--xlators/nfs/server/src/nfs3-helpers.h337
-rw-r--r--xlators/nfs/server/src/nfs3.c5630
-rw-r--r--xlators/nfs/server/src/nfs3.h285
-rw-r--r--xlators/nfs/server/src/nlm4.c2525
-rw-r--r--xlators/nfs/server/src/nlm4.h77
-rw-r--r--xlators/nfs/server/src/nlmcbk_svc.c117
-rw-r--r--xlators/performance/Makefile.am2
-rw-r--r--xlators/performance/io-cache/src/Makefile.am10
-rw-r--r--xlators/performance/io-cache/src/io-cache.c2628
-rw-r--r--xlators/performance/io-cache/src/io-cache.h345
-rw-r--r--xlators/performance/io-cache/src/ioc-inode.c297
-rw-r--r--xlators/performance/io-cache/src/ioc-mem-types.h29
-rw-r--r--xlators/performance/io-cache/src/page.c1333
-rw-r--r--xlators/performance/io-threads/src/Makefile.am9
-rw-r--r--xlators/performance/io-threads/src/io-threads.c2713
-rw-r--r--xlators/performance/io-threads/src/io-threads.h178
-rw-r--r--xlators/performance/io-threads/src/iot-mem-types.h22
-rw-r--r--xlators/performance/md-cache/Makefile.am1
-rw-r--r--xlators/performance/md-cache/src/Makefile.am25
-rw-r--r--xlators/performance/md-cache/src/md-cache-mem-types.h24
-rw-r--r--xlators/performance/md-cache/src/md-cache.c2303
-rw-r--r--xlators/performance/open-behind/Makefile.am1
-rw-r--r--xlators/performance/open-behind/src/Makefile.am15
-rw-r--r--xlators/performance/open-behind/src/open-behind-mem-types.h21
-rw-r--r--xlators/performance/open-behind/src/open-behind.c1001
-rw-r--r--xlators/performance/quick-read/src/Makefile.am9
-rw-r--r--xlators/performance/quick-read/src/quick-read-mem-types.h27
-rw-r--r--xlators/performance/quick-read/src/quick-read.c2759
-rw-r--r--xlators/performance/quick-read/src/quick-read.h81
-rw-r--r--xlators/performance/read-ahead/src/Makefile.am9
-rw-r--r--xlators/performance/read-ahead/src/page.c696
-rw-r--r--xlators/performance/read-ahead/src/read-ahead-mem-types.h26
-rw-r--r--xlators/performance/read-ahead/src/read-ahead.c1614
-rw-r--r--xlators/performance/read-ahead/src/read-ahead.h148
-rw-r--r--xlators/performance/readdir-ahead/Makefile.am3
-rw-r--r--xlators/performance/readdir-ahead/src/Makefile.am15
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h24
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead.c560
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead.h46
-rw-r--r--xlators/performance/stat-prefetch/src/Makefile.am14
-rw-r--r--xlators/performance/stat-prefetch/src/stat-prefetch.c1838
-rw-r--r--xlators/performance/stat-prefetch/src/stat-prefetch.h77
-rw-r--r--xlators/performance/symlink-cache/src/Makefile.am7
-rw-r--r--xlators/performance/symlink-cache/src/symlink-cache.c78
-rw-r--r--xlators/performance/write-behind/src/Makefile.am9
-rw-r--r--xlators/performance/write-behind/src/write-behind-mem-types.h26
-rw-r--r--xlators/performance/write-behind/src/write-behind.c3487
-rw-r--r--xlators/playground/Makefile.am2
-rw-r--r--xlators/playground/template/Makefile.am2
-rw-r--r--xlators/playground/template/src/Makefile.am16
-rw-r--r--xlators/playground/template/src/template.c49
-rw-r--r--xlators/playground/template/src/template.h24
-rw-r--r--xlators/protocol/Makefile.am4
-rw-r--r--xlators/protocol/auth/Makefile.am (renamed from auth/Makefile.am)2
-rw-r--r--xlators/protocol/auth/addr/Makefile.am1
-rw-r--r--xlators/protocol/auth/addr/src/Makefile.am14
-rw-r--r--xlators/protocol/auth/addr/src/addr.c229
-rw-r--r--xlators/protocol/auth/login/Makefile.am1
-rw-r--r--xlators/protocol/auth/login/src/Makefile.am12
-rw-r--r--xlators/protocol/auth/login/src/login.c128
-rw-r--r--xlators/protocol/client/Makefile.am2
-rw-r--r--xlators/protocol/client/src/Makefile.am18
-rw-r--r--xlators/protocol/client/src/client-callback.c56
-rw-r--r--xlators/protocol/client/src/client-handshake.c1960
-rw-r--r--xlators/protocol/client/src/client-helpers.c349
-rw-r--r--xlators/protocol/client/src/client-lk.c573
-rw-r--r--xlators/protocol/client/src/client-mem-types.h25
-rw-r--r--xlators/protocol/client/src/client-protocol.c6589
-rw-r--r--xlators/protocol/client/src/client-protocol.h171
-rw-r--r--xlators/protocol/client/src/client-rpc-fops.c6203
-rw-r--r--xlators/protocol/client/src/client.c2867
-rw-r--r--xlators/protocol/client/src/client.h257
-rw-r--r--xlators/protocol/client/src/saved-frames.c191
-rw-r--r--xlators/protocol/client/src/saved-frames.h79
-rw-r--r--xlators/protocol/server/Makefile.am2
-rw-r--r--xlators/protocol/server/src/Makefile.am28
-rw-r--r--xlators/protocol/server/src/authenticate.c253
-rw-r--r--xlators/protocol/server/src/authenticate.h51
-rw-r--r--xlators/protocol/server/src/server-dentry.c413
-rw-r--r--xlators/protocol/server/src/server-handshake.c781
-rw-r--r--xlators/protocol/server/src/server-helpers.c1831
-rw-r--r--xlators/protocol/server/src/server-helpers.h101
-rw-r--r--xlators/protocol/server/src/server-mem-types.h30
-rw-r--r--xlators/protocol/server/src/server-protocol.c7989
-rw-r--r--xlators/protocol/server/src/server-protocol.h158
-rw-r--r--xlators/protocol/server/src/server-resolve.c598
-rw-r--r--xlators/protocol/server/src/server-rpc-fops.c6179
-rw-r--r--xlators/protocol/server/src/server.c1214
-rw-r--r--xlators/protocol/server/src/server.h196
-rw-r--r--xlators/storage/Makefile.am8
-rw-r--r--xlators/storage/bd/Makefile.am3
-rw-r--r--xlators/storage/bd/src/Makefile.am20
-rw-r--r--xlators/storage/bd/src/bd-aio.c527
-rw-r--r--xlators/storage/bd/src/bd-aio.h41
-rw-r--r--xlators/storage/bd/src/bd-helper.c783
-rw-r--r--xlators/storage/bd/src/bd.c2404
-rw-r--r--xlators/storage/bd/src/bd.h178
-rw-r--r--xlators/storage/bdb/Makefile.am3
-rw-r--r--xlators/storage/bdb/src/Makefile.am18
-rw-r--r--xlators/storage/bdb/src/bctx.c341
-rw-r--r--xlators/storage/bdb/src/bdb-ll.c1460
-rw-r--r--xlators/storage/bdb/src/bdb.c3624
-rw-r--r--xlators/storage/bdb/src/bdb.h530
-rw-r--r--xlators/storage/posix/src/Makefile.am18
-rw-r--r--xlators/storage/posix/src/posix-aio.c569
-rw-r--r--xlators/storage/posix/src/posix-aio.h39
-rw-r--r--xlators/storage/posix/src/posix-handle.c744
-rw-r--r--xlators/storage/posix/src/posix-handle.h143
-rw-r--r--xlators/storage/posix/src/posix-helpers.c1391
-rw-r--r--xlators/storage/posix/src/posix-mem-types.h27
-rw-r--r--xlators/storage/posix/src/posix.c5850
-rw-r--r--xlators/storage/posix/src/posix.h157
-rw-r--r--xlators/system/Makefile.am1
-rw-r--r--xlators/system/posix-acl/Makefile.am1
-rw-r--r--xlators/system/posix-acl/src/Makefile.am23
-rw-r--r--xlators/system/posix-acl/src/posix-acl-xattr.c180
-rw-r--r--xlators/system/posix-acl/src/posix-acl-xattr.h26
-rw-r--r--xlators/system/posix-acl/src/posix-acl.c2183
-rw-r--r--xlators/system/posix-acl/src/posix-acl.h30
1330 files changed, 427443 insertions, 131363 deletions
diff --git a/.gitignore b/.gitignore
index b09d20ac7..ff253c1da 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,5 +8,41 @@ install-sh
ltmain.sh
Makefile.in
missing
+py-compile
*.sw?
*~
+*.lo
+*.la
+*.o
+*.tar.gz
+*.rpm
+.libs
+.deps
+Makefile
+stamp-h1
+
+# Generated files
+api/examples/__init__.py*
+api/examples/setup.py
+argp-standalone/libargp.a
+contrib/uuid/uuid_types.h
+extras/init.d/glusterd-Debian
+extras/init.d/glusterd-Redhat
+extras/init.d/glusterd-SuSE
+extras/init.d/glusterd.plist
+extras/ocf/glusterd
+extras/ocf/volume
+extras/who-wrote-glusterfs/gitdm
+glusterfs-api.pc
+glusterfs.spec
+glusterfsd/src/glusterfsd
+libgfchangelog.pc
+libglusterfs/src/spec.lex.c
+libglusterfs/src/y.tab.c
+libglusterfs/src/y.tab.h
+libtool
+run-tests.sh
+ufo/.tox
+ufo/test/unit/.coverage
+xlators/mount/fuse/utils/mount.glusterfs
+xlators/mount/fuse/utils/mount_glusterfs
diff --git a/.mailmap b/.mailmap
new file mode 100644
index 000000000..6bcd95dea
--- /dev/null
+++ b/.mailmap
@@ -0,0 +1,31 @@
+# .mailmap, see 'git short-log --help' for details
+#
+# Listing of contributors that filed patches with different email addresses.
+# Format: <name> <main-email> <alias> [<alias> ...]
+#
+
+Amar Tumballi <amarts@redhat.com> <amar@gluster.com> <amar@del.gluster.com>
+Anand Avati <avati@redhat.com> <avati@gluster.com> <avati@dev.gluster.com> <avati@amp.gluster.com> <avati@blackhole.gluster.com>
+Anush Shetty <ashetty@redhat.com> <anush@gluster.com>
+Csaba Henk <csaba@redhat.com> <csaba@gluster.com> <csaba@lowlife.hu> <csaba@zresearch.com>
+Harshavardhana <fharshav@redhat.com> <harsha@gluster.com> <harsha@zresearch.com> <harsha@dev.gluster.com> <harsha@harshavardhana.net>
+Kaleb S. KEITHLEY <kkeithle@redhat.com> <kkeithle@f16node1.kkeithle.usersys.redhat.com>
+Kaushal M <kaushal@redhat.com> <kaushal@gluster.com>
+Kaushik BV <kbudiger@redhat.com> <kaushikbv@gluster.com>
+Krishna Srinivas <ksriniva@redhat.com> <krishna@gluster.com> <krishna@zresearch.com> <krishna@guest-laptop>
+Krishnan Parthasarathi <kparthas@redhat.com> <kp@gluster.com>
+Louis Zuckerman <louiszuckerman@gmail.com> <me@louiszuckerman.com>
+M S Vishwanath Bhat <vbhat@redhat.com> <msvbhat@gmail.com> <vishwanath@gluster.com>
+Pavan Sondur <pavan@gluster.com> <pavan@dev.gluster.com>
+Pete Zaitcev <zaitcev@kotori.zaitcev.us> <zaitcev@yahoo.com>
+Pranith Kumar K <pkarampu@redhat.com> <pranithk@gluster.com>
+Raghavendra Bhat <raghavendra@redhat.com> <raghavendrabhat@gluster.com>
+Raghavendra G <rgowdapp@redhat.com> <raghavendra@gluster.com> <raghavendra@zresearch.com>
+Rahul C S <rahulcs@redhat.com> <rahulcssjce@gmail.com>
+Rajesh Amaravathi <rajesh@redhat.com> <rajesh@gluster.com> <rajesh.amaravathi@gmail.com>
+Shehjar Tikoo <shehjart@gluster.com> <shehjart@zresearch.com>
+Venky Shankar <vshankar@redhat.com> <venky@gluster.com>
+Vijay Bellur <vbellur@redhat.com> <vijay@gluster.com> <vijay@dev.gluster.com>
+Vijaykumar Koppad <vkoppad@redhat.com> <vijaykumar.koppad@gmail.com>
+Vikas Gorur <vikas@gluster.com> <vikas@zresearch.com>
+shishir gowda <sgowda@redhat.com> <shishirng@gluster.com>
diff --git a/AUTHORS b/AUTHORS
index d9abdcd07..e69de29bb 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -1,3 +0,0 @@
-CORE TEAM:
-* Please visit http://www.gluster.org/core-team.php for complete list
- of contributors.
diff --git a/CONTRIBUTING b/CONTRIBUTING
new file mode 100644
index 000000000..7bccd88d7
--- /dev/null
+++ b/CONTRIBUTING
@@ -0,0 +1,25 @@
+ Developer's Certificate of Origin 1.1
+
+ By making a contribution to this project, I certify that:
+
+ (a) The contribution was created in whole or in part by me and I
+ have the right to submit it under the open source license
+ indicated in the file; or
+
+ (b) The contribution is based upon previous work that, to the best
+ of my knowledge, is covered under an appropriate open source
+ license and I have the right under that license to submit that
+ work with modifications, whether created in whole or in part
+ by me, under the same open source license (unless I am
+ permitted to submit under a different license), as indicated
+ in the file; or
+
+ (c) The contribution was provided directly to me by some other
+ person who certified (a), (b) or (c) and I have not modified
+ it.
+
+ (d) I understand and agree that this project and the contribution
+ are public and that a record of the contribution (including all
+ personal information I submit with it, including my sign-off) is
+ maintained indefinitely and may be redistributed consistent with
+ this project or the open source license(s) involved.
diff --git a/COPYING b/COPYING
deleted file mode 100644
index 94a9ed024..000000000
--- a/COPYING
+++ /dev/null
@@ -1,674 +0,0 @@
- GNU GENERAL PUBLIC LICENSE
- Version 3, 29 June 2007
-
- Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
- Everyone is permitted to copy and distribute verbatim copies
- of this license document, but changing it is not allowed.
-
- Preamble
-
- The GNU General Public License is a free, copyleft license for
-software and other kinds of works.
-
- The licenses for most software and other practical works are designed
-to take away your freedom to share and change the works. By contrast,
-the GNU General Public License is intended to guarantee your freedom to
-share and change all versions of a program--to make sure it remains free
-software for all its users. We, the Free Software Foundation, use the
-GNU General Public License for most of our software; it applies also to
-any other work released this way by its authors. You can apply it to
-your programs, too.
-
- When we speak of free software, we are referring to freedom, not
-price. Our General Public Licenses are designed to make sure that you
-have the freedom to distribute copies of free software (and charge for
-them if you wish), that you receive source code or can get it if you
-want it, that you can change the software or use pieces of it in new
-free programs, and that you know you can do these things.
-
- To protect your rights, we need to prevent others from denying you
-these rights or asking you to surrender the rights. Therefore, you have
-certain responsibilities if you distribute copies of the software, or if
-you modify it: responsibilities to respect the freedom of others.
-
- For example, if you distribute copies of such a program, whether
-gratis or for a fee, you must pass on to the recipients the same
-freedoms that you received. You must make sure that they, too, receive
-or can get the source code. And you must show them these terms so they
-know their rights.
-
- Developers that use the GNU GPL protect your rights with two steps:
-(1) assert copyright on the software, and (2) offer you this License
-giving you legal permission to copy, distribute and/or modify it.
-
- For the developers' and authors' protection, the GPL clearly explains
-that there is no warranty for this free software. For both users' and
-authors' sake, the GPL requires that modified versions be marked as
-changed, so that their problems will not be attributed erroneously to
-authors of previous versions.
-
- Some devices are designed to deny users access to install or run
-modified versions of the software inside them, although the manufacturer
-can do so. This is fundamentally incompatible with the aim of
-protecting users' freedom to change the software. The systematic
-pattern of such abuse occurs in the area of products for individuals to
-use, which is precisely where it is most unacceptable. Therefore, we
-have designed this version of the GPL to prohibit the practice for those
-products. If such problems arise substantially in other domains, we
-stand ready to extend this provision to those domains in future versions
-of the GPL, as needed to protect the freedom of users.
-
- Finally, every program is threatened constantly by software patents.
-States should not allow patents to restrict development and use of
-software on general-purpose computers, but in those that do, we wish to
-avoid the special danger that patents applied to a free program could
-make it effectively proprietary. To prevent this, the GPL assures that
-patents cannot be used to render the program non-free.
-
- The precise terms and conditions for copying, distribution and
-modification follow.
-
- TERMS AND CONDITIONS
-
- 0. Definitions.
-
- "This License" refers to version 3 of the GNU General Public License.
-
- "Copyright" also means copyright-like laws that apply to other kinds of
-works, such as semiconductor masks.
-
- "The Program" refers to any copyrightable work licensed under this
-License. Each licensee is addressed as "you". "Licensees" and
-"recipients" may be individuals or organizations.
-
- To "modify" a work means to copy from or adapt all or part of the work
-in a fashion requiring copyright permission, other than the making of an
-exact copy. The resulting work is called a "modified version" of the
-earlier work or a work "based on" the earlier work.
-
- A "covered work" means either the unmodified Program or a work based
-on the Program.
-
- To "propagate" a work means to do anything with it that, without
-permission, would make you directly or secondarily liable for
-infringement under applicable copyright law, except executing it on a
-computer or modifying a private copy. Propagation includes copying,
-distribution (with or without modification), making available to the
-public, and in some countries other activities as well.
-
- To "convey" a work means any kind of propagation that enables other
-parties to make or receive copies. Mere interaction with a user through
-a computer network, with no transfer of a copy, is not conveying.
-
- An interactive user interface displays "Appropriate Legal Notices"
-to the extent that it includes a convenient and prominently visible
-feature that (1) displays an appropriate copyright notice, and (2)
-tells the user that there is no warranty for the work (except to the
-extent that warranties are provided), that licensees may convey the
-work under this License, and how to view a copy of this License. If
-the interface presents a list of user commands or options, such as a
-menu, a prominent item in the list meets this criterion.
-
- 1. Source Code.
-
- The "source code" for a work means the preferred form of the work
-for making modifications to it. "Object code" means any non-source
-form of a work.
-
- A "Standard Interface" means an interface that either is an official
-standard defined by a recognized standards body, or, in the case of
-interfaces specified for a particular programming language, one that
-is widely used among developers working in that language.
-
- The "System Libraries" of an executable work include anything, other
-than the work as a whole, that (a) is included in the normal form of
-packaging a Major Component, but which is not part of that Major
-Component, and (b) serves only to enable use of the work with that
-Major Component, or to implement a Standard Interface for which an
-implementation is available to the public in source code form. A
-"Major Component", in this context, means a major essential component
-(kernel, window system, and so on) of the specific operating system
-(if any) on which the executable work runs, or a compiler used to
-produce the work, or an object code interpreter used to run it.
-
- The "Corresponding Source" for a work in object code form means all
-the source code needed to generate, install, and (for an executable
-work) run the object code and to modify the work, including scripts to
-control those activities. However, it does not include the work's
-System Libraries, or general-purpose tools or generally available free
-programs which are used unmodified in performing those activities but
-which are not part of the work. For example, Corresponding Source
-includes interface definition files associated with source files for
-the work, and the source code for shared libraries and dynamically
-linked subprograms that the work is specifically designed to require,
-such as by intimate data communication or control flow between those
-subprograms and other parts of the work.
-
- The Corresponding Source need not include anything that users
-can regenerate automatically from other parts of the Corresponding
-Source.
-
- The Corresponding Source for a work in source code form is that
-same work.
-
- 2. Basic Permissions.
-
- All rights granted under this License are granted for the term of
-copyright on the Program, and are irrevocable provided the stated
-conditions are met. This License explicitly affirms your unlimited
-permission to run the unmodified Program. The output from running a
-covered work is covered by this License only if the output, given its
-content, constitutes a covered work. This License acknowledges your
-rights of fair use or other equivalent, as provided by copyright law.
-
- You may make, run and propagate covered works that you do not
-convey, without conditions so long as your license otherwise remains
-in force. You may convey covered works to others for the sole purpose
-of having them make modifications exclusively for you, or provide you
-with facilities for running those works, provided that you comply with
-the terms of this License in conveying all material for which you do
-not control copyright. Those thus making or running the covered works
-for you must do so exclusively on your behalf, under your direction
-and control, on terms that prohibit them from making any copies of
-your copyrighted material outside their relationship with you.
-
- Conveying under any other circumstances is permitted solely under
-the conditions stated below. Sublicensing is not allowed; section 10
-makes it unnecessary.
-
- 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
-
- No covered work shall be deemed part of an effective technological
-measure under any applicable law fulfilling obligations under article
-11 of the WIPO copyright treaty adopted on 20 December 1996, or
-similar laws prohibiting or restricting circumvention of such
-measures.
-
- When you convey a covered work, you waive any legal power to forbid
-circumvention of technological measures to the extent such circumvention
-is effected by exercising rights under this License with respect to
-the covered work, and you disclaim any intention to limit operation or
-modification of the work as a means of enforcing, against the work's
-users, your or third parties' legal rights to forbid circumvention of
-technological measures.
-
- 4. Conveying Verbatim Copies.
-
- You may convey verbatim copies of the Program's source code as you
-receive it, in any medium, provided that you conspicuously and
-appropriately publish on each copy an appropriate copyright notice;
-keep intact all notices stating that this License and any
-non-permissive terms added in accord with section 7 apply to the code;
-keep intact all notices of the absence of any warranty; and give all
-recipients a copy of this License along with the Program.
-
- You may charge any price or no price for each copy that you convey,
-and you may offer support or warranty protection for a fee.
-
- 5. Conveying Modified Source Versions.
-
- You may convey a work based on the Program, or the modifications to
-produce it from the Program, in the form of source code under the
-terms of section 4, provided that you also meet all of these conditions:
-
- a) The work must carry prominent notices stating that you modified
- it, and giving a relevant date.
-
- b) The work must carry prominent notices stating that it is
- released under this License and any conditions added under section
- 7. This requirement modifies the requirement in section 4 to
- "keep intact all notices".
-
- c) You must license the entire work, as a whole, under this
- License to anyone who comes into possession of a copy. This
- License will therefore apply, along with any applicable section 7
- additional terms, to the whole of the work, and all its parts,
- regardless of how they are packaged. This License gives no
- permission to license the work in any other way, but it does not
- invalidate such permission if you have separately received it.
-
- d) If the work has interactive user interfaces, each must display
- Appropriate Legal Notices; however, if the Program has interactive
- interfaces that do not display Appropriate Legal Notices, your
- work need not make them do so.
-
- A compilation of a covered work with other separate and independent
-works, which are not by their nature extensions of the covered work,
-and which are not combined with it such as to form a larger program,
-in or on a volume of a storage or distribution medium, is called an
-"aggregate" if the compilation and its resulting copyright are not
-used to limit the access or legal rights of the compilation's users
-beyond what the individual works permit. Inclusion of a covered work
-in an aggregate does not cause this License to apply to the other
-parts of the aggregate.
-
- 6. Conveying Non-Source Forms.
-
- You may convey a covered work in object code form under the terms
-of sections 4 and 5, provided that you also convey the
-machine-readable Corresponding Source under the terms of this License,
-in one of these ways:
-
- a) Convey the object code in, or embodied in, a physical product
- (including a physical distribution medium), accompanied by the
- Corresponding Source fixed on a durable physical medium
- customarily used for software interchange.
-
- b) Convey the object code in, or embodied in, a physical product
- (including a physical distribution medium), accompanied by a
- written offer, valid for at least three years and valid for as
- long as you offer spare parts or customer support for that product
- model, to give anyone who possesses the object code either (1) a
- copy of the Corresponding Source for all the software in the
- product that is covered by this License, on a durable physical
- medium customarily used for software interchange, for a price no
- more than your reasonable cost of physically performing this
- conveying of source, or (2) access to copy the
- Corresponding Source from a network server at no charge.
-
- c) Convey individual copies of the object code with a copy of the
- written offer to provide the Corresponding Source. This
- alternative is allowed only occasionally and noncommercially, and
- only if you received the object code with such an offer, in accord
- with subsection 6b.
-
- d) Convey the object code by offering access from a designated
- place (gratis or for a charge), and offer equivalent access to the
- Corresponding Source in the same way through the same place at no
- further charge. You need not require recipients to copy the
- Corresponding Source along with the object code. If the place to
- copy the object code is a network server, the Corresponding Source
- may be on a different server (operated by you or a third party)
- that supports equivalent copying facilities, provided you maintain
- clear directions next to the object code saying where to find the
- Corresponding Source. Regardless of what server hosts the
- Corresponding Source, you remain obligated to ensure that it is
- available for as long as needed to satisfy these requirements.
-
- e) Convey the object code using peer-to-peer transmission, provided
- you inform other peers where the object code and Corresponding
- Source of the work are being offered to the general public at no
- charge under subsection 6d.
-
- A separable portion of the object code, whose source code is excluded
-from the Corresponding Source as a System Library, need not be
-included in conveying the object code work.
-
- A "User Product" is either (1) a "consumer product", which means any
-tangible personal property which is normally used for personal, family,
-or household purposes, or (2) anything designed or sold for incorporation
-into a dwelling. In determining whether a product is a consumer product,
-doubtful cases shall be resolved in favor of coverage. For a particular
-product received by a particular user, "normally used" refers to a
-typical or common use of that class of product, regardless of the status
-of the particular user or of the way in which the particular user
-actually uses, or expects or is expected to use, the product. A product
-is a consumer product regardless of whether the product has substantial
-commercial, industrial or non-consumer uses, unless such uses represent
-the only significant mode of use of the product.
-
- "Installation Information" for a User Product means any methods,
-procedures, authorization keys, or other information required to install
-and execute modified versions of a covered work in that User Product from
-a modified version of its Corresponding Source. The information must
-suffice to ensure that the continued functioning of the modified object
-code is in no case prevented or interfered with solely because
-modification has been made.
-
- If you convey an object code work under this section in, or with, or
-specifically for use in, a User Product, and the conveying occurs as
-part of a transaction in which the right of possession and use of the
-User Product is transferred to the recipient in perpetuity or for a
-fixed term (regardless of how the transaction is characterized), the
-Corresponding Source conveyed under this section must be accompanied
-by the Installation Information. But this requirement does not apply
-if neither you nor any third party retains the ability to install
-modified object code on the User Product (for example, the work has
-been installed in ROM).
-
- The requirement to provide Installation Information does not include a
-requirement to continue to provide support service, warranty, or updates
-for a work that has been modified or installed by the recipient, or for
-the User Product in which it has been modified or installed. Access to a
-network may be denied when the modification itself materially and
-adversely affects the operation of the network or violates the rules and
-protocols for communication across the network.
-
- Corresponding Source conveyed, and Installation Information provided,
-in accord with this section must be in a format that is publicly
-documented (and with an implementation available to the public in
-source code form), and must require no special password or key for
-unpacking, reading or copying.
-
- 7. Additional Terms.
-
- "Additional permissions" are terms that supplement the terms of this
-License by making exceptions from one or more of its conditions.
-Additional permissions that are applicable to the entire Program shall
-be treated as though they were included in this License, to the extent
-that they are valid under applicable law. If additional permissions
-apply only to part of the Program, that part may be used separately
-under those permissions, but the entire Program remains governed by
-this License without regard to the additional permissions.
-
- When you convey a copy of a covered work, you may at your option
-remove any additional permissions from that copy, or from any part of
-it. (Additional permissions may be written to require their own
-removal in certain cases when you modify the work.) You may place
-additional permissions on material, added by you to a covered work,
-for which you have or can give appropriate copyright permission.
-
- Notwithstanding any other provision of this License, for material you
-add to a covered work, you may (if authorized by the copyright holders of
-that material) supplement the terms of this License with terms:
-
- a) Disclaiming warranty or limiting liability differently from the
- terms of sections 15 and 16 of this License; or
-
- b) Requiring preservation of specified reasonable legal notices or
- author attributions in that material or in the Appropriate Legal
- Notices displayed by works containing it; or
-
- c) Prohibiting misrepresentation of the origin of that material, or
- requiring that modified versions of such material be marked in
- reasonable ways as different from the original version; or
-
- d) Limiting the use for publicity purposes of names of licensors or
- authors of the material; or
-
- e) Declining to grant rights under trademark law for use of some
- trade names, trademarks, or service marks; or
-
- f) Requiring indemnification of licensors and authors of that
- material by anyone who conveys the material (or modified versions of
- it) with contractual assumptions of liability to the recipient, for
- any liability that these contractual assumptions directly impose on
- those licensors and authors.
-
- All other non-permissive additional terms are considered "further
-restrictions" within the meaning of section 10. If the Program as you
-received it, or any part of it, contains a notice stating that it is
-governed by this License along with a term that is a further
-restriction, you may remove that term. If a license document contains
-a further restriction but permits relicensing or conveying under this
-License, you may add to a covered work material governed by the terms
-of that license document, provided that the further restriction does
-not survive such relicensing or conveying.
-
- If you add terms to a covered work in accord with this section, you
-must place, in the relevant source files, a statement of the
-additional terms that apply to those files, or a notice indicating
-where to find the applicable terms.
-
- Additional terms, permissive or non-permissive, may be stated in the
-form of a separately written license, or stated as exceptions;
-the above requirements apply either way.
-
- 8. Termination.
-
- You may not propagate or modify a covered work except as expressly
-provided under this License. Any attempt otherwise to propagate or
-modify it is void, and will automatically terminate your rights under
-this License (including any patent licenses granted under the third
-paragraph of section 11).
-
- However, if you cease all violation of this License, then your
-license from a particular copyright holder is reinstated (a)
-provisionally, unless and until the copyright holder explicitly and
-finally terminates your license, and (b) permanently, if the copyright
-holder fails to notify you of the violation by some reasonable means
-prior to 60 days after the cessation.
-
- Moreover, your license from a particular copyright holder is
-reinstated permanently if the copyright holder notifies you of the
-violation by some reasonable means, this is the first time you have
-received notice of violation of this License (for any work) from that
-copyright holder, and you cure the violation prior to 30 days after
-your receipt of the notice.
-
- Termination of your rights under this section does not terminate the
-licenses of parties who have received copies or rights from you under
-this License. If your rights have been terminated and not permanently
-reinstated, you do not qualify to receive new licenses for the same
-material under section 10.
-
- 9. Acceptance Not Required for Having Copies.
-
- You are not required to accept this License in order to receive or
-run a copy of the Program. Ancillary propagation of a covered work
-occurring solely as a consequence of using peer-to-peer transmission
-to receive a copy likewise does not require acceptance. However,
-nothing other than this License grants you permission to propagate or
-modify any covered work. These actions infringe copyright if you do
-not accept this License. Therefore, by modifying or propagating a
-covered work, you indicate your acceptance of this License to do so.
-
- 10. Automatic Licensing of Downstream Recipients.
-
- Each time you convey a covered work, the recipient automatically
-receives a license from the original licensors, to run, modify and
-propagate that work, subject to this License. You are not responsible
-for enforcing compliance by third parties with this License.
-
- An "entity transaction" is a transaction transferring control of an
-organization, or substantially all assets of one, or subdividing an
-organization, or merging organizations. If propagation of a covered
-work results from an entity transaction, each party to that
-transaction who receives a copy of the work also receives whatever
-licenses to the work the party's predecessor in interest had or could
-give under the previous paragraph, plus a right to possession of the
-Corresponding Source of the work from the predecessor in interest, if
-the predecessor has it or can get it with reasonable efforts.
-
- You may not impose any further restrictions on the exercise of the
-rights granted or affirmed under this License. For example, you may
-not impose a license fee, royalty, or other charge for exercise of
-rights granted under this License, and you may not initiate litigation
-(including a cross-claim or counterclaim in a lawsuit) alleging that
-any patent claim is infringed by making, using, selling, offering for
-sale, or importing the Program or any portion of it.
-
- 11. Patents.
-
- A "contributor" is a copyright holder who authorizes use under this
-License of the Program or a work on which the Program is based. The
-work thus licensed is called the contributor's "contributor version".
-
- A contributor's "essential patent claims" are all patent claims
-owned or controlled by the contributor, whether already acquired or
-hereafter acquired, that would be infringed by some manner, permitted
-by this License, of making, using, or selling its contributor version,
-but do not include claims that would be infringed only as a
-consequence of further modification of the contributor version. For
-purposes of this definition, "control" includes the right to grant
-patent sublicenses in a manner consistent with the requirements of
-this License.
-
- Each contributor grants you a non-exclusive, worldwide, royalty-free
-patent license under the contributor's essential patent claims, to
-make, use, sell, offer for sale, import and otherwise run, modify and
-propagate the contents of its contributor version.
-
- In the following three paragraphs, a "patent license" is any express
-agreement or commitment, however denominated, not to enforce a patent
-(such as an express permission to practice a patent or covenant not to
-sue for patent infringement). To "grant" such a patent license to a
-party means to make such an agreement or commitment not to enforce a
-patent against the party.
-
- If you convey a covered work, knowingly relying on a patent license,
-and the Corresponding Source of the work is not available for anyone
-to copy, free of charge and under the terms of this License, through a
-publicly available network server or other readily accessible means,
-then you must either (1) cause the Corresponding Source to be so
-available, or (2) arrange to deprive yourself of the benefit of the
-patent license for this particular work, or (3) arrange, in a manner
-consistent with the requirements of this License, to extend the patent
-license to downstream recipients. "Knowingly relying" means you have
-actual knowledge that, but for the patent license, your conveying the
-covered work in a country, or your recipient's use of the covered work
-in a country, would infringe one or more identifiable patents in that
-country that you have reason to believe are valid.
-
- If, pursuant to or in connection with a single transaction or
-arrangement, you convey, or propagate by procuring conveyance of, a
-covered work, and grant a patent license to some of the parties
-receiving the covered work authorizing them to use, propagate, modify
-or convey a specific copy of the covered work, then the patent license
-you grant is automatically extended to all recipients of the covered
-work and works based on it.
-
- A patent license is "discriminatory" if it does not include within
-the scope of its coverage, prohibits the exercise of, or is
-conditioned on the non-exercise of one or more of the rights that are
-specifically granted under this License. You may not convey a covered
-work if you are a party to an arrangement with a third party that is
-in the business of distributing software, under which you make payment
-to the third party based on the extent of your activity of conveying
-the work, and under which the third party grants, to any of the
-parties who would receive the covered work from you, a discriminatory
-patent license (a) in connection with copies of the covered work
-conveyed by you (or copies made from those copies), or (b) primarily
-for and in connection with specific products or compilations that
-contain the covered work, unless you entered into that arrangement,
-or that patent license was granted, prior to 28 March 2007.
-
- Nothing in this License shall be construed as excluding or limiting
-any implied license or other defenses to infringement that may
-otherwise be available to you under applicable patent law.
-
- 12. No Surrender of Others' Freedom.
-
- If conditions are imposed on you (whether by court order, agreement or
-otherwise) that contradict the conditions of this License, they do not
-excuse you from the conditions of this License. If you cannot convey a
-covered work so as to satisfy simultaneously your obligations under this
-License and any other pertinent obligations, then as a consequence you may
-not convey it at all. For example, if you agree to terms that obligate you
-to collect a royalty for further conveying from those to whom you convey
-the Program, the only way you could satisfy both those terms and this
-License would be to refrain entirely from conveying the Program.
-
- 13. Use with the GNU Affero General Public License.
-
- Notwithstanding any other provision of this License, you have
-permission to link or combine any covered work with a work licensed
-under version 3 of the GNU Affero General Public License into a single
-combined work, and to convey the resulting work. The terms of this
-License will continue to apply to the part which is the covered work,
-but the special requirements of the GNU Affero General Public License,
-section 13, concerning interaction through a network will apply to the
-combination as such.
-
- 14. Revised Versions of this License.
-
- The Free Software Foundation may publish revised and/or new versions of
-the GNU General Public License from time to time. Such new versions will
-be similar in spirit to the present version, but may differ in detail to
-address new problems or concerns.
-
- Each version is given a distinguishing version number. If the
-Program specifies that a certain numbered version of the GNU General
-Public License "or any later version" applies to it, you have the
-option of following the terms and conditions either of that numbered
-version or of any later version published by the Free Software
-Foundation. If the Program does not specify a version number of the
-GNU General Public License, you may choose any version ever published
-by the Free Software Foundation.
-
- If the Program specifies that a proxy can decide which future
-versions of the GNU General Public License can be used, that proxy's
-public statement of acceptance of a version permanently authorizes you
-to choose that version for the Program.
-
- Later license versions may give you additional or different
-permissions. However, no additional obligations are imposed on any
-author or copyright holder as a result of your choosing to follow a
-later version.
-
- 15. Disclaimer of Warranty.
-
- THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
-APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
-HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
-OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
-THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
-IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
-ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
-
- 16. Limitation of Liability.
-
- IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
-WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
-THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
-GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
-USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
-DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
-PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
-EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
-SUCH DAMAGES.
-
- 17. Interpretation of Sections 15 and 16.
-
- If the disclaimer of warranty and limitation of liability provided
-above cannot be given local legal effect according to their terms,
-reviewing courts shall apply local law that most closely approximates
-an absolute waiver of all civil liability in connection with the
-Program, unless a warranty or assumption of liability accompanies a
-copy of the Program in return for a fee.
-
- END OF TERMS AND CONDITIONS
-
- How to Apply These Terms to Your New Programs
-
- If you develop a new program, and you want it to be of the greatest
-possible use to the public, the best way to achieve this is to make it
-free software which everyone can redistribute and change under these terms.
-
- To do so, attach the following notices to the program. It is safest
-to attach them to the start of each source file to most effectively
-state the exclusion of warranty; and each file should have at least
-the "copyright" line and a pointer to where the full notice is found.
-
- <one line to give the program's name and a brief idea of what it does.>
- Copyright (C) <year> <name of author>
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-Also add information on how to contact you by electronic and paper mail.
-
- If the program does terminal interaction, make it output a short
-notice like this when it starts in an interactive mode:
-
- <program> Copyright (C) <year> <name of author>
- This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
- This is free software, and you are welcome to redistribute it
- under certain conditions; type `show c' for details.
-
-The hypothetical commands `show w' and `show c' should show the appropriate
-parts of the General Public License. Of course, your program's commands
-might be different; for a GUI interface, you would use an "about box".
-
- You should also get your employer (if you work as a programmer) or school,
-if any, to sign a "copyright disclaimer" for the program, if necessary.
-For more information on this, and how to apply and follow the GNU GPL, see
-<http://www.gnu.org/licenses/>.
-
- The GNU General Public License does not permit incorporating your program
-into proprietary programs. If your program is a subroutine library, you
-may consider it more useful to permit linking proprietary applications with
-the library. If this is what you want to do, use the GNU Lesser General
-Public License instead of this License. But first, please read
-<http://www.gnu.org/philosophy/why-not-lgpl.html>.
diff --git a/COPYING-GPLV2 b/COPYING-GPLV2
new file mode 100644
index 000000000..d159169d1
--- /dev/null
+++ b/COPYING-GPLV2
@@ -0,0 +1,339 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) year name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
diff --git a/COPYING-LGPLV3 b/COPYING-LGPLV3
new file mode 100644
index 000000000..65c5ca88a
--- /dev/null
+++ b/COPYING-LGPLV3
@@ -0,0 +1,165 @@
+ GNU LESSER GENERAL PUBLIC LICENSE
+ Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+
+ This version of the GNU Lesser General Public License incorporates
+the terms and conditions of version 3 of the GNU General Public
+License, supplemented by the additional permissions listed below.
+
+ 0. Additional Definitions.
+
+ As used herein, "this License" refers to version 3 of the GNU Lesser
+General Public License, and the "GNU GPL" refers to version 3 of the GNU
+General Public License.
+
+ "The Library" refers to a covered work governed by this License,
+other than an Application or a Combined Work as defined below.
+
+ An "Application" is any work that makes use of an interface provided
+by the Library, but which is not otherwise based on the Library.
+Defining a subclass of a class defined by the Library is deemed a mode
+of using an interface provided by the Library.
+
+ A "Combined Work" is a work produced by combining or linking an
+Application with the Library. The particular version of the Library
+with which the Combined Work was made is also called the "Linked
+Version".
+
+ The "Minimal Corresponding Source" for a Combined Work means the
+Corresponding Source for the Combined Work, excluding any source code
+for portions of the Combined Work that, considered in isolation, are
+based on the Application, and not on the Linked Version.
+
+ The "Corresponding Application Code" for a Combined Work means the
+object code and/or source code for the Application, including any data
+and utility programs needed for reproducing the Combined Work from the
+Application, but excluding the System Libraries of the Combined Work.
+
+ 1. Exception to Section 3 of the GNU GPL.
+
+ You may convey a covered work under sections 3 and 4 of this License
+without being bound by section 3 of the GNU GPL.
+
+ 2. Conveying Modified Versions.
+
+ If you modify a copy of the Library, and, in your modifications, a
+facility refers to a function or data to be supplied by an Application
+that uses the facility (other than as an argument passed when the
+facility is invoked), then you may convey a copy of the modified
+version:
+
+ a) under this License, provided that you make a good faith effort to
+ ensure that, in the event an Application does not supply the
+ function or data, the facility still operates, and performs
+ whatever part of its purpose remains meaningful, or
+
+ b) under the GNU GPL, with none of the additional permissions of
+ this License applicable to that copy.
+
+ 3. Object Code Incorporating Material from Library Header Files.
+
+ The object code form of an Application may incorporate material from
+a header file that is part of the Library. You may convey such object
+code under terms of your choice, provided that, if the incorporated
+material is not limited to numerical parameters, data structure
+layouts and accessors, or small macros, inline functions and templates
+(ten or fewer lines in length), you do both of the following:
+
+ a) Give prominent notice with each copy of the object code that the
+ Library is used in it and that the Library and its use are
+ covered by this License.
+
+ b) Accompany the object code with a copy of the GNU GPL and this license
+ document.
+
+ 4. Combined Works.
+
+ You may convey a Combined Work under terms of your choice that,
+taken together, effectively do not restrict modification of the
+portions of the Library contained in the Combined Work and reverse
+engineering for debugging such modifications, if you also do each of
+the following:
+
+ a) Give prominent notice with each copy of the Combined Work that
+ the Library is used in it and that the Library and its use are
+ covered by this License.
+
+ b) Accompany the Combined Work with a copy of the GNU GPL and this license
+ document.
+
+ c) For a Combined Work that displays copyright notices during
+ execution, include the copyright notice for the Library among
+ these notices, as well as a reference directing the user to the
+ copies of the GNU GPL and this license document.
+
+ d) Do one of the following:
+
+ 0) Convey the Minimal Corresponding Source under the terms of this
+ License, and the Corresponding Application Code in a form
+ suitable for, and under terms that permit, the user to
+ recombine or relink the Application with a modified version of
+ the Linked Version to produce a modified Combined Work, in the
+ manner specified by section 6 of the GNU GPL for conveying
+ Corresponding Source.
+
+ 1) Use a suitable shared library mechanism for linking with the
+ Library. A suitable mechanism is one that (a) uses at run time
+ a copy of the Library already present on the user's computer
+ system, and (b) will operate properly with a modified version
+ of the Library that is interface-compatible with the Linked
+ Version.
+
+ e) Provide Installation Information, but only if you would otherwise
+ be required to provide such information under section 6 of the
+ GNU GPL, and only to the extent that such information is
+ necessary to install and execute a modified version of the
+ Combined Work produced by recombining or relinking the
+ Application with a modified version of the Linked Version. (If
+ you use option 4d0, the Installation Information must accompany
+ the Minimal Corresponding Source and Corresponding Application
+ Code. If you use option 4d1, you must provide the Installation
+ Information in the manner specified by section 6 of the GNU GPL
+ for conveying Corresponding Source.)
+
+ 5. Combined Libraries.
+
+ You may place library facilities that are a work based on the
+Library side by side in a single library together with other library
+facilities that are not Applications and are not covered by this
+License, and convey such a combined library under terms of your
+choice, if you do both of the following:
+
+ a) Accompany the combined library with a copy of the same work based
+ on the Library, uncombined with any other library facilities,
+ conveyed under the terms of this License.
+
+ b) Give prominent notice with the combined library that part of it
+ is a work based on the Library, and explaining where to find the
+ accompanying uncombined form of the same work.
+
+ 6. Revised Versions of the GNU Lesser General Public License.
+
+ The Free Software Foundation may publish revised and/or new versions
+of the GNU Lesser General Public License from time to time. Such new
+versions will be similar in spirit to the present version, but may
+differ in detail to address new problems or concerns.
+
+ Each version is given a distinguishing version number. If the
+Library as you received it specifies that a certain numbered version
+of the GNU Lesser General Public License "or any later version"
+applies to it, you have the option of following the terms and
+conditions either of that published version or of any later version
+published by the Free Software Foundation. If the Library as you
+received it does not specify a version number of the GNU Lesser
+General Public License, you may choose any version of the GNU Lesser
+General Public License ever published by the Free Software Foundation.
+
+ If the Library as you received it specifies that a proxy can decide
+whether future versions of the GNU Lesser General Public License shall
+apply, that proxy's public statement of acceptance of any version is
+permanent authorization for you to choose that version for the
+Library.
diff --git a/Makefile.am b/Makefile.am
index 44506dfd0..598ebb410 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -1,8 +1,18 @@
-EXTRA_DIST = autogen.sh COPYING INSTALL README AUTHORS THANKS NEWS glusterfs.spec
+EXTRA_DIST = autogen.sh \
+ COPYING-GPLV2 COPYING-LGPLV3 \
+ INSTALL README AUTHORS THANKS NEWS \
+ glusterfs.spec glusterfs-api.pc.in libgfchangelog.pc.in \
+ error-codes.json gf-error-codes.h.template \
+ gen-headers.py run-tests.sh \
+ $(shell find $(top_srcdir)/tests -type f -print)
-SUBDIRS = argp-standalone libglusterfs $(LIBGLUSTERFSCLIENT_SUBDIR) xlators scheduler transport auth glusterfsd $(GF_BOOSTER_SUBDIR) $(FUSERMOUNT_SUBDIR) doc extras
+SUBDIRS = argp-standalone libglusterfs rpc api xlators glusterfsd \
+ $(FUSERMOUNT_SUBDIR) doc extras cli @SYNCDAEMON_SUBDIR@
-CLEANFILES =
+pkgconfigdir = @pkgconfigdir@
+pkgconfig_DATA = glusterfs-api.pc libgfchangelog.pc
+
+CLEANFILES =
gitclean: distclean
find . -name Makefile.in -exec rm -f {} \;
@@ -11,4 +21,9 @@ gitclean: distclean
rm -fr autom4te.cache
rm -f missing aclocal.m4 config.h.in config.guess config.sub ltmain.sh install-sh configure depcomp
rm -fr argp-standalone/autom4te.cache
- rm -f argp-standalone/aclocal.m4 argp-standalone/config.h.in argp-standalone/configure argp-standalone/depcomp argp-standalone/install-sh argp-standalone/missing
+ rm -f argp-standalone/aclocal.m4 argp-standalone/config.h.in
+ rm -f argp-standalone/configure argp-standalone/depcomp
+ rm -f argp-standalone/install-sh argp-standalone/missing
+
+dist-hook:
+ (cd $(srcdir) && git diff && echo ===== git log ==== && git log) > $(distdir)/ChangeLog
diff --git a/NEWS b/NEWS
index 39a37cd93..ad00208d3 100644
--- a/NEWS
+++ b/NEWS
@@ -1 +1 @@
-Please visit http://www.gluster.org/news.php for news updates.
+Gluster moves to Gerrit.
diff --git a/README b/README
index 15b1b4a88..ce648743e 100644
--- a/README
+++ b/README
@@ -1,11 +1 @@
-GlusterFS is a flexible, powerful, POSIX-compliant cluster filesystem
-that can scale to several petabytes.
-
-GlusterFS works over either TCP/IP or high-performance Infiniband RDMA
-interconnect.
-
-GlusterFS provides striping and on-the-fly synchronous replication
-of files. GlusterFS has a fully distributed design, with no
-single point of failure.
-
-Please visit http://www.gluster.org/glusterfs.php for more info.
+For more info, please visit http://www.gluster.org/.
diff --git a/THANKS b/THANKS
index 5e86b41a7..e1ce5105d 100644
--- a/THANKS
+++ b/THANKS
@@ -1,3 +1 @@
-
-* http://www.gluster.org/glusterfs-thanks.php
-
+For all of you, who use the product and help us making it more robust, useful, popular..
diff --git a/api/Makefile.am b/api/Makefile.am
new file mode 100644
index 000000000..f0ad1ee97
--- /dev/null
+++ b/api/Makefile.am
@@ -0,0 +1 @@
+SUBDIRS = src examples
diff --git a/api/examples/Makefile.am b/api/examples/Makefile.am
new file mode 100644
index 000000000..05f40ff53
--- /dev/null
+++ b/api/examples/Makefile.am
@@ -0,0 +1,6 @@
+EXTRA_PROGRAMS = glfsxmp
+glfsxmp_SOURCES = glfsxmp.c
+glfsxmp_CFLAGS = $(GLFS_CFLAGS) -Wall
+glfsxmp_LDADD = $(GLFS_LIBS) -lrt
+
+EXTRA_DIST = gfapi.py
diff --git a/api/examples/README b/api/examples/README
new file mode 100644
index 000000000..4d2b521f7
--- /dev/null
+++ b/api/examples/README
@@ -0,0 +1,36 @@
+This is an example application which uses libgfapi. It is
+a complete autotools based build system which demonstrates the
+required changes in configure.ac, Makefile.am etc to successfuly
+detect for and build an application against libgfapi.
+
+There are two approaches to building a libgfapi based application:
+
+1. In the presence of pkg-config in your build system.
+This is the recommended approach which is also used in this example.
+For this approach to work, you need to build glusterfs by passing
+--pkgconfigdir=/usr/lib64/pkgconfig (or the appropriate directory)
+in your distro. This already happens if you build RPMs with the
+glusterfs.spec provided in glusterfs.git. You will also need to
+install glusterfs-api RPM.
+
+2. In the absence of pkg-config in your build system.
+Make sure your LDFLAGS includes -L/path/to/lib where libgfapi.so is
+installed and -I/path/to/include/glusterfs where the 'api' directory
+containing the headers are available.
+
+glfsxmp.c
+=========
+
+glfsxmp.c is an example application which uses libgfapi
+
+Compilation Steps For glfsxmp.c
+===============================
+
+1. $./autogen.sh
+2. $./configure
+
+Note: Before running ./configure , as mentioned above, you need to
+ take care of #1 or #2 i.e. pkg-config path or LDFLAGS and
+ -I/<path> with correct values.
+
+3. $make glfsxmp
diff --git a/api/examples/autogen.sh b/api/examples/autogen.sh
new file mode 100755
index 000000000..1fee6be11
--- /dev/null
+++ b/api/examples/autogen.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+aclocal
+autoconf
+automake --foreign
diff --git a/api/examples/configure.ac b/api/examples/configure.ac
new file mode 100644
index 000000000..b80177a4e
--- /dev/null
+++ b/api/examples/configure.ac
@@ -0,0 +1,12 @@
+
+AC_INIT([glfs-test],[0.1],[gluster-devel@nongu.org])
+
+AM_INIT_AUTOMAKE
+
+AC_CONFIG_FILES([Makefile])
+
+AC_PROG_CC
+
+PKG_CHECK_MODULES([GLFS], [glusterfs-api >= 3])
+
+AC_OUTPUT
diff --git a/api/examples/gfapi.py b/api/examples/gfapi.py
new file mode 100755
index 000000000..3ac67f4d5
--- /dev/null
+++ b/api/examples/gfapi.py
@@ -0,0 +1,422 @@
+#!/usr/bin/python
+
+from ctypes import *
+from ctypes.util import find_library
+import os
+import sys
+import time
+import types
+
+# Looks like ctypes is having trouble with dependencies, so just force them to
+# load with RTLD_GLOBAL until I figure that out.
+glfs = CDLL(find_library("glusterfs"),RTLD_GLOBAL)
+xdr = CDLL(find_library("gfxdr"),RTLD_GLOBAL)
+api = CDLL(find_library("gfapi"),RTLD_GLOBAL)
+
+# Wow, the Linux kernel folks really play nasty games with this structure. If
+# you look at the man page for stat(2) and then at this definition you'll note
+# two discrepancies. First, we seem to have st_nlink and st_mode reversed. In
+# fact that's exactly how they're defined *for 64-bit systems*; for 32-bit
+# they're in the man-page order. Even uglier, the man page makes no mention of
+# the *nsec fields, but they are very much present and if they're not included
+# then we get memory corruption because libgfapi has a structure definition
+# that's longer than ours and they overwrite some random bit of memory after
+# the space we allocated. Yes, that's all very disgusting, and I'm still not
+# sure this will really work on 32-bit because all of the field types are so
+# obfuscated behind macros and feature checks.
+class Stat (Structure):
+ _fields_ = [
+ ("st_dev", c_ulong),
+ ("st_ino", c_ulong),
+ ("st_nlink", c_ulong),
+ ("st_mode", c_uint),
+ ("st_uid", c_uint),
+ ("st_gid", c_uint),
+ ("st_rdev", c_ulong),
+ ("st_size", c_ulong),
+ ("st_blksize", c_ulong),
+ ("st_blocks", c_ulong),
+ ("st_atime", c_ulong),
+ ("st_atimensec", c_ulong),
+ ("st_mtime", c_ulong),
+ ("st_mtimensec", c_ulong),
+ ("st_ctime", c_ulong),
+ ("st_ctimensec", c_ulong),
+ ]
+api.glfs_creat.restype = c_void_p
+api.glfs_open.restype = c_void_p
+api.glfs_lstat.restype = c_int
+api.glfs_lstat.argtypes = [c_void_p, c_char_p, POINTER(Stat)]
+
+class Dirent (Structure):
+ _fields_ = [
+ ("d_ino", c_ulong),
+ ("d_off", c_ulong),
+ ("d_reclen", c_ushort),
+ ("d_type", c_char),
+ ("d_name", c_char * 256),
+ ]
+api.glfs_opendir.restype = c_void_p
+api.glfs_readdir_r.restype = c_int
+api.glfs_readdir_r.argtypes = [c_void_p, POINTER(Dirent),
+ POINTER(POINTER(Dirent))]
+
+# There's a bit of ctypes glitchiness around __del__ functions and module-level
+# variables. If we unload the module while we still have references to File or
+# Volume objects, the module-level variables might have disappeared by the time
+# __del__ gets called. Therefore the objects hold references which they
+# release when __del__ is done. We only actually use the object-local values
+# in __del__; for clarity, we just use the simpler module-level form elsewhere.
+
+class File(object):
+
+ def __init__ (self, fd):
+ # Add a reference so the module-level variable "api" doesn't
+ # get yanked out from under us (see comment above File def'n).
+ self._api = api
+ self.fd = fd
+
+ def __del__ (self):
+ self._api.glfs_close(self.fd)
+ self._api = None
+
+ # File operations, in alphabetical order.
+
+ def fsync (self):
+ return api.glfs_fsync(self.fd)
+
+ def read (self, buflen, flags=0):
+ rbuf = create_string_buffer(buflen)
+ rc = api.glfs_read(self.fd,rbuf,buflen,flags)
+ if rc > 0:
+ return rbuf.value[:rc]
+ else:
+ return rc
+
+ def read_buffer (self, buf, flags=0):
+ return api.glfs_read(self.fd,buf,len(buf),flags)
+
+ def write (self, data, flags=0):
+ return api.glfs_write(self.fd,data,len(data),flags)
+
+ def fallocate (self, mode, offset, len):
+ return api.glfs_fallocate(self.fd, mode, offset, len)
+
+ def discard (self, offset, len):
+ return api.glfs_discard(self.fd, offset, len)
+
+
+class Dir(object):
+
+ def __init__ (self, fd):
+ # Add a reference so the module-level variable "api" doesn't
+ # get yanked out from under us (see comment above File def'n).
+ self._api = api
+ self.fd = fd
+ self.cursor = POINTER(Dirent)()
+
+ def __del__ (self):
+ self._api.glfs_closedir(self.fd)
+ self._api = None
+
+ def next (self):
+ entry = Dirent()
+ entry.d_reclen = 256
+ rc = api.glfs_readdir_r(self.fd,byref(entry),byref(self.cursor))
+ if (rc < 0) or (not self.cursor) or (not self.cursor.contents):
+ return rc
+ return entry
+
+class Volume(object):
+
+ # Housekeeping functions.
+
+ def __init__ (self, host, volid, proto="tcp", port=24007):
+ # Add a reference so the module-level variable "api" doesn't
+ # get yanked out from under us (see comment above File def'n).
+ self._api = api
+ self.fs = api.glfs_new(volid)
+ api.glfs_set_volfile_server(self.fs,proto,host,port)
+
+ def __del__ (self):
+ self._api.glfs_fini(self.fs)
+ self._api = None
+
+ def set_logging (self, path, level):
+ api.glfs_set_logging(self.fs,path,level)
+
+ def mount (self):
+ api.glfs_init(self.fs)
+
+ # File operations, in alphabetical order.
+
+ def creat (self, path, flags, mode):
+ fd = api.glfs_creat(self.fs,path,flags,mode)
+ if not fd:
+ return fd
+ return File(fd)
+
+ def getxattr (self, path, key, maxlen):
+ buf = create_string_buffer(maxlen)
+ rc = api.glfs_getxattr(self.fs,path,key,buf,maxlen)
+ if rc < 0:
+ return rc
+ return buf.value[:rc]
+
+ def listxattr (self, path):
+ buf = create_string_buffer(512)
+ rc = api.glfs_listxattr(self.fs,path,buf,512)
+ if rc < 0:
+ return rc
+ xattrs = []
+ # Parsing character by character is ugly, but it seems like the
+ # easiest way to deal with the "strings separated by NUL in one
+ # buffer" format.
+ i = 0
+ while i < rc:
+ new_xa = buf.raw[i]
+ i += 1
+ while i < rc:
+ next_char = buf.raw[i]
+ i += 1
+ if next_char == '\0':
+ xattrs.append(new_xa)
+ break
+ new_xa += next_char
+ xattrs.sort()
+ return xattrs
+
+ def lstat (self, path):
+ x = Stat()
+ rc = api.glfs_lstat(self.fs,path,byref(x))
+ if rc >= 0:
+ return x
+ else:
+ return rc
+
+ def mkdir (self, path):
+ return api.glfs_mkdir(self.fs,path)
+
+ def open (self, path, flags):
+ fd = api.glfs_open(self.fs,path,flags)
+ if not fd:
+ return fd
+ return File(fd)
+
+ def opendir (self, path):
+ fd = api.glfs_opendir(self.fs,path)
+ if not fd:
+ return fd
+ return Dir(fd)
+
+ def rename (self, opath, npath):
+ return api.glfs_rename(self.fs,opath,npath)
+
+ def rmdir (self, path):
+ return api.glfs_rmdir(self.fs,path)
+
+ def setxattr (self, path, key, value, vlen):
+ return api.glfs_setxattr(self.fs,path,key,value,vlen,0)
+
+ def unlink (self, path):
+ return api.glfs_unlink(self.fs,path)
+
+if __name__ == "__main__":
+ def test_create_write (vol, path, data):
+ mypath = path + ".io"
+ fd = vol.creat(mypath,os.O_WRONLY|os.O_EXCL,0644)
+ if not fd:
+ return False, "creat error"
+ rc = fd.write(data)
+ if rc != len(data):
+ return False, "wrote %d/%d bytes" % (rc, len(data))
+ return True, "wrote %d bytes" % rc
+
+ # TBD: this test fails if we do create, open, write, read
+ def test_open_read (vol, path, data):
+ mypath = path + ".io"
+ fd = vol.open(mypath,os.O_RDONLY)
+ if not fd:
+ return False, "open error"
+ dlen = len(data) * 2
+ buf = fd.read(dlen)
+ if type(buf) == types.IntType:
+ return False, "read error %d" % buf
+ if len(buf) != len(data):
+ return False, "read %d/%d bytes" % (len(buf), len(data))
+ return True, "read '%s'" % buf
+
+ def test_lstat (vol, path, data):
+ mypath = path + ".io"
+ sb = vol.lstat(mypath)
+ if type(sb) == types.IntType:
+ return False, "lstat error %d" % sb
+ if sb.st_size != len(data):
+ return False, "lstat size is %d, expected %d" % (
+ sb.st_size, len(data))
+ return True, "lstat got correct size %d" % sb.st_size
+
+ def test_rename (vol, path, data):
+ opath = path + ".io"
+ npath = path + ".tmp"
+ rc = vol.rename(opath,npath)
+ if rc < 0:
+ return False, "rename error %d" % rc
+ ofd = vol.open(opath,os.O_RDWR)
+ if isinstance(ofd,File):
+ return False, "old path working after rename"
+ nfd = vol.open(npath,os.O_RDWR)
+ if isinstance(nfd,File):
+ return False, "new path not working after rename"
+ return True, "rename worked"
+
+ def test_unlink (vol, path, data):
+ mypath = path + ".tmp"
+ rc = vol.unlink(mypath)
+ if rc < 0:
+ return False, "unlink error %d" % fd
+ fd = vol.open(mypath,os.O_RDWR)
+ if isinstance(fd,File):
+ return False, "path still usable after unlink"
+ return True, "unlink worked"
+
+ def test_mkdir (vol, path, data):
+ mypath = path + ".dir"
+ rc = vol.mkdir(mypath)
+ if rc < 0:
+ return False, "mkdir error %d" % rc
+ return True, "mkdir worked"
+
+ def test_create_in_dir (vol, path, data):
+ mypath = path + ".dir/probe"
+ fd = vol.creat(mypath,os.O_RDWR,0644)
+ if not isinstance(fd,File):
+ return False, "create (in dir) error"
+ return True, "create (in dir) worked"
+
+ def test_dir_listing (vol, path, data):
+ mypath = path + ".dir"
+ fd = vol.opendir(mypath)
+ if not isinstance(fd,Dir):
+ return False, "opendir error %d" % fd
+ files = []
+ while True:
+ ent = fd.next()
+ if not isinstance(ent,Dirent):
+ break
+ name = ent.d_name[:ent.d_reclen]
+ files.append(name)
+ if files != [".", "..", "probe"]:
+ return False, "wrong directory contents"
+ return True, "directory listing worked"
+
+ def test_unlink_in_dir (vol, path, data):
+ mypath = path + ".dir/probe"
+ rc = vol.unlink(mypath)
+ if rc < 0:
+ return False, "unlink (in dir) error %d" % rc
+ return True, "unlink (in dir) worked"
+
+ def test_rmdir (vol, path, data):
+ mypath = path + ".dir"
+ rc = vol.rmdir(mypath)
+ if rc < 0:
+ return False, "rmdir error %d" % rc
+ sb = vol.lstat(mypath)
+ if not isinstance(sb,Stat):
+ return False, "dir still there after rmdir"
+ return True, "rmdir worked"
+
+ def test_setxattr (vol, path, data):
+ mypath = path + ".xa"
+ fd = vol.creat(mypath,os.O_RDWR|os.O_EXCL,0644)
+ if not fd:
+ return False, "creat (xattr test) error"
+ key1, key2 = "hello", "goodbye"
+ if vol.setxattr(mypath,"trusted.key1",key1,len(key1)) < 0:
+ return False, "setxattr (key1) error"
+ if vol.setxattr(mypath,"trusted.key2",key2,len(key2)) < 0:
+ return False, "setxattr (key2) error"
+ return True, "setxattr worked"
+
+ def test_getxattr (vol, path, data):
+ mypath = path + ".xa"
+ buf = vol.getxattr(mypath,"trusted.key1",32)
+ if type(buf) == types.IntType:
+ return False, "getxattr error"
+ if buf != "hello":
+ return False, "wrong getxattr value %s" % buf
+ return True, "getxattr worked"
+
+ def test_listxattr (vol, path, data):
+ mypath = path + ".xa"
+ xattrs = vol.listxattr(mypath)
+ if type(xattrs) == types.IntType:
+ return False, "listxattr error"
+ if xattrs != ["trusted.key1","trusted.key2"]:
+ return False, "wrong listxattr value %s" % repr(xattrs)
+ return True, "listxattr worked"
+
+ def test_fallocate (vol, path, data):
+ mypath = path + ".io"
+ fd = vol.creat(mypath,os.O_WRONLY|os.O_EXCL,0644)
+ if not fd:
+ return False, "creat error"
+ rc = fd.fallocate(0, 0, 1024*1024)
+ if rc != 0:
+ return False, "fallocate error"
+ rc = fd.discard(4096, 4096)
+ if rc != 0:
+ return False, "discard error"
+ return True, "fallocate/discard worked"
+
+ test_list = (
+ test_create_write,
+ test_open_read,
+ test_lstat,
+ test_rename,
+ test_unlink,
+ test_mkdir,
+ test_create_in_dir,
+ test_dir_listing,
+ test_unlink_in_dir,
+ test_rmdir,
+ test_setxattr,
+ test_getxattr,
+ test_listxattr,
+ test_fallocate,
+ )
+
+ ok_to_fail = (
+ # TBD: this fails opening the new file, even though the file
+ # did get renamed. Looks like a gfapi bug, not ours.
+ (test_rename, "new path not working after rename"),
+ # TBD: similar, call returns error even though it worked
+ (test_rmdir, "dir still there after rmdir"),
+ )
+
+ volid, path = sys.argv[1:3]
+ data = "fubar"
+ vol = Volume("localhost",volid)
+ vol.set_logging("/dev/null",7)
+ #vol.set_logging("/dev/stderr",7)
+ vol.mount()
+
+ failures = 0
+ expected = 0
+ for t in test_list:
+ rc, msg = t(vol,path,data)
+ if rc:
+ print "PASS: %s" % msg
+ else:
+ print "FAIL: %s" % msg
+ failures += 1
+ for otf in ok_to_fail:
+ if (t == otf[0]) and (msg == otf[1]):
+ print " (skipping known failure)"
+ expected += 1
+ break # from the *inner* for loop
+ else:
+ break # from the *outer* for loop
+
+ print "%d failures (%d expected)" % (failures, expected)
diff --git a/api/examples/glfsxmp.c b/api/examples/glfsxmp.c
new file mode 100644
index 000000000..600d72fb5
--- /dev/null
+++ b/api/examples/glfsxmp.c
@@ -0,0 +1,1598 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include "api/glfs.h"
+#include "api/glfs-handles.h"
+#include <string.h>
+#include <time.h>
+
+
+int
+test_dirops (glfs_t *fs)
+{
+ glfs_fd_t *fd = NULL;
+ char buf[512];
+ struct dirent *entry = NULL;
+
+ fd = glfs_opendir (fs, "/");
+ if (!fd) {
+ fprintf (stderr, "/: %s\n", strerror (errno));
+ return -1;
+ }
+
+ fprintf (stderr, "Entries:\n");
+ while (glfs_readdir_r (fd, (struct dirent *)buf, &entry), entry) {
+ fprintf (stderr, "%s: %lu\n", entry->d_name, glfs_telldir (fd));
+ }
+
+ glfs_closedir (fd);
+ return 0;
+}
+
+
+int
+test_xattr (glfs_t *fs)
+{
+ char *filename = "/filename2";
+ char buf[512];
+ char *ptr;
+ int ret;
+
+ ret = glfs_setxattr (fs, filename, "user.testkey", "testval", 8, 0);
+ fprintf (stderr, "setxattr(%s): %d (%s)\n", filename, ret,
+ strerror (errno));
+
+ ret = glfs_setxattr (fs, filename, "user.testkey2", "testval", 8, 0);
+ fprintf (stderr, "setxattr(%s): %d (%s)\n", filename, ret,
+ strerror (errno));
+
+ ret = glfs_listxattr (fs, filename, buf, 512);
+ fprintf (stderr, "listxattr(%s): %d (%s)\n", filename, ret,
+ strerror (errno));
+ if (ret < 0)
+ return -1;
+
+ for (ptr = buf; ptr < buf + ret; ptr++) {
+ printf ("key=%s\n", ptr);
+ ptr += strlen (ptr);
+ }
+
+ return 0;
+}
+
+
+int
+test_chdir (glfs_t *fs)
+{
+ int ret = -1;
+ char *topdir = "/topdir";
+ char *linkdir = "/linkdir";
+ char *subdir = "./subdir";
+ char *respath = NULL;
+ char pathbuf[4096];
+
+ ret = glfs_mkdir (fs, topdir, 0755);
+ if (ret) {
+ fprintf (stderr, "mkdir(%s): %s\n", topdir, strerror (errno));
+ return -1;
+ }
+
+ respath = glfs_getcwd (fs, pathbuf, 4096);
+ fprintf (stdout, "getcwd() = %s\n", respath);
+
+ ret = glfs_symlink (fs, topdir, linkdir);
+ if (ret) {
+ fprintf (stderr, "symlink(%s, %s): %s\n", topdir, linkdir, strerror (errno));
+ return -1;
+ }
+
+ ret = glfs_chdir (fs, linkdir);
+ if (ret) {
+ fprintf (stderr, "chdir(%s): %s\n", linkdir, strerror (errno));
+ return -1;
+ }
+
+ respath = glfs_getcwd (fs, pathbuf, 4096);
+ fprintf (stdout, "getcwd() = %s\n", respath);
+
+ respath = glfs_realpath (fs, subdir, pathbuf);
+ if (respath) {
+ fprintf (stderr, "realpath(%s) worked unexpectedly: %s\n", subdir, respath);
+ return -1;
+ }
+
+ ret = glfs_mkdir (fs, subdir, 0755);
+ if (ret) {
+ fprintf (stderr, "mkdir(%s): %s\n", subdir, strerror (errno));
+ return -1;
+ }
+
+ respath = glfs_realpath (fs, subdir, pathbuf);
+ if (!respath) {
+ fprintf (stderr, "realpath(%s): %s\n", subdir, strerror (errno));
+ } else {
+ fprintf (stdout, "realpath(%s) = %s\n", subdir, respath);
+ }
+
+ ret = glfs_chdir (fs, subdir);
+ if (ret) {
+ fprintf (stderr, "chdir(%s): %s\n", subdir, strerror (errno));
+ return -1;
+ }
+
+ respath = glfs_getcwd (fs, pathbuf, 4096);
+ fprintf (stdout, "getcwd() = %s\n", respath);
+
+ respath = glfs_realpath (fs, "/linkdir/subdir", pathbuf);
+ if (!respath) {
+ fprintf (stderr, "realpath(/linkdir/subdir): %s\n", strerror (errno));
+ } else {
+ fprintf (stdout, "realpath(/linkdir/subdir) = %s\n", respath);
+ }
+
+ return 0;
+}
+
+#ifdef DEBUG
+static void
+peek_stat (struct stat *sb)
+{
+ printf ("Dumping stat information:\n");
+ printf ("File type: ");
+
+ switch (sb->st_mode & S_IFMT) {
+ case S_IFBLK: printf ("block device\n"); break;
+ case S_IFCHR: printf ("character device\n"); break;
+ case S_IFDIR: printf ("directory\n"); break;
+ case S_IFIFO: printf ("FIFO/pipe\n"); break;
+ case S_IFLNK: printf ("symlink\n"); break;
+ case S_IFREG: printf ("regular file\n"); break;
+ case S_IFSOCK: printf ("socket\n"); break;
+ default: printf ("unknown?\n"); break;
+ }
+
+ printf ("I-node number: %ld\n", (long) sb->st_ino);
+
+ printf ("Mode: %lo (octal)\n",
+ (unsigned long) sb->st_mode);
+
+ printf ("Link count: %ld\n", (long) sb->st_nlink);
+ printf ("Ownership: UID=%ld GID=%ld\n",
+ (long) sb->st_uid, (long) sb->st_gid);
+
+ printf ("Preferred I/O block size: %ld bytes\n",
+ (long) sb->st_blksize);
+ printf ("File size: %lld bytes\n",
+ (long long) sb->st_size);
+ printf ("Blocks allocated: %lld\n",
+ (long long) sb->st_blocks);
+
+ printf ("Last status change: %s", ctime(&sb->st_ctime));
+ printf ("Last file access: %s", ctime(&sb->st_atime));
+ printf ("Last file modification: %s", ctime(&sb->st_mtime));
+
+ return;
+}
+
+static void
+peek_handle (unsigned char *glid)
+{
+ int i;
+
+ for (i = 0; i < GFAPI_HANDLE_LENGTH; i++)
+ {
+ printf (":%02x:", glid[i]);
+ }
+ printf ("\n");
+}
+#else /* DEBUG */
+static void
+peek_stat (struct stat *sb)
+{
+ return;
+}
+
+static void
+peek_handle (unsigned char *id)
+{
+ return;
+}
+#endif /* DEBUG */
+
+glfs_t *fs = NULL;
+char *full_parent_name = "/testdir", *parent_name = "testdir";
+
+void
+test_h_unlink (void)
+{
+ char *my_dir = "unlinkdir";
+ char *my_file = "file.txt";
+ char *my_subdir = "dir1";
+ struct glfs_object *parent = NULL, *leaf = NULL, *dir = NULL,
+ *subdir = NULL, *subleaf = NULL;
+ struct stat sb;
+ int ret;
+
+ printf ("glfs_h_unlink tests: In Progress\n");
+
+ /* Prepare tests */
+ parent = glfs_h_lookupat (fs, NULL, full_parent_name, &sb);
+ if (parent == NULL) {
+ fprintf (stderr, "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, NULL, strerror (errno));
+ printf ("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ dir = glfs_h_mkdir (fs, parent, my_dir, 0644, &sb);
+ if (dir == NULL) {
+ fprintf (stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_dir, parent, strerror (errno));
+ printf ("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ leaf = glfs_h_creat (fs, dir, my_file, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf (stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, dir, strerror (errno));
+ printf ("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ subdir = glfs_h_mkdir (fs, dir, my_subdir, 0644, &sb);
+ if (subdir == NULL) {
+ fprintf (stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_subdir, dir, strerror (errno));
+ printf ("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ subleaf = glfs_h_creat (fs, subdir, my_file, O_CREAT, 0644, &sb);
+ if (subleaf == NULL) {
+ fprintf (stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, subdir, strerror (errno));
+ printf ("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink non empty directory */
+ ret = glfs_h_unlink (fs, dir, my_subdir);
+ if ((ret && errno != ENOTEMPTY) || (ret == 0)) {
+ fprintf (stderr, "glfs_h_unlink: error unlinking %s: it is non empty: %s\n",
+ my_subdir, strerror (errno));
+ printf ("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink regular file */
+ ret = glfs_h_unlink (fs, subdir, my_file);
+ if (ret) {
+ fprintf (stderr, "glfs_h_unlink: error unlinking %s: from (%p),%s\n",
+ my_file, subdir, strerror (errno));
+ printf ("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink directory */
+ ret = glfs_h_unlink (fs, dir, my_subdir);
+ if (ret) {
+ fprintf (stderr, "glfs_h_unlink: error unlinking %s: from (%p),%s\n",
+ my_subdir, dir, strerror (errno));
+ printf ("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink regular file */
+ ret = glfs_h_unlink (fs, dir, my_file);
+ if (ret) {
+ fprintf (stderr, "glfs_h_unlink: error unlinking %s: from (%p),%s\n",
+ my_file, dir, strerror (errno));
+ printf ("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink non-existant regular file */
+ ret = glfs_h_unlink (fs, dir, my_file);
+ if ((ret && errno != ENOENT) || (ret == 0)) {
+ fprintf (stderr, "glfs_h_unlink: error unlinking non-existant %s: invalid errno ,%d, %s\n",
+ my_file, ret, strerror (errno));
+ printf ("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink non-existant directory */
+ ret = glfs_h_unlink (fs, dir, my_subdir);
+ if ((ret && errno != ENOENT) || (ret == 0)) {
+ fprintf (stderr, "glfs_h_unlink: error unlinking non-existant %s: invalid errno ,%d, %s\n",
+ my_subdir, ret, strerror (errno));
+ printf ("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink directory */
+ ret = glfs_h_unlink (fs, parent, my_dir);
+ if (ret) {
+ fprintf (stderr, "glfs_h_unlink: error unlinking %s: from (%p),%s\n",
+ my_dir, dir, strerror (errno));
+ printf ("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ printf ("glfs_h_unlink tests: PASSED\n");
+
+out:
+ if (dir)
+ glfs_h_close (dir);
+ if (leaf)
+ glfs_h_close (leaf);
+ if (subdir)
+ glfs_h_close (subdir);
+ if (subleaf)
+ glfs_h_close (subleaf);
+ if (parent)
+ glfs_h_close (parent);
+
+ return;
+}
+
+void
+test_h_getsetattrs (void)
+{
+ char *my_dir = "attrdir";
+ char *my_file = "attrfile.txt";
+ struct glfs_object *parent = NULL, *leaf = NULL, *dir = NULL;
+ struct stat sb, retsb;
+ int ret, valid;
+ struct timespec timestamp;
+
+ printf("glfs_h_getattrs and setattrs tests: In Progress\n");
+
+ /* Prepare tests */
+ parent = glfs_h_lookupat (fs, NULL, full_parent_name, &sb);
+ if (parent == NULL) {
+ fprintf (stderr, "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, NULL, strerror (errno));
+ printf ("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ dir = glfs_h_mkdir (fs, parent, my_dir, 0644, &sb);
+ if (dir == NULL) {
+ fprintf (stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_dir, parent, strerror (errno));
+ printf ("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ leaf = glfs_h_creat (fs, dir, my_file, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf (stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, dir, strerror (errno));
+ printf ("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ ret = glfs_h_getattrs (fs, dir, &retsb);
+ if (ret != 0) {
+ fprintf (stderr, "glfs_h_getattrs: error %s: from (%p),%s\n",
+ my_dir, dir, strerror (errno));
+ printf ("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&retsb);
+ /* TODO: Compare stat information */
+
+ retsb.st_mode = 00666;
+ retsb.st_uid = 1000;
+ retsb.st_gid = 1001;
+ ret = clock_gettime (CLOCK_REALTIME, &timestamp);
+ if(ret != 0) {
+ fprintf (stderr, "clock_gettime: error %s\n", strerror (errno));
+ printf ("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+ retsb.st_atim = timestamp;
+ retsb.st_mtim = timestamp;
+ valid = GFAPI_SET_ATTR_MODE | GFAPI_SET_ATTR_UID | GFAPI_SET_ATTR_GID |
+ GFAPI_SET_ATTR_ATIME | GFAPI_SET_ATTR_MTIME;
+ peek_stat (&retsb);
+
+ ret = glfs_h_setattrs (fs, dir, &retsb, valid);
+ if (ret != 0) {
+ fprintf (stderr, "glfs_h_setattrs: error %s: from (%p),%s\n",
+ my_dir, dir, strerror (errno));
+ printf ("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ memset(&retsb, 0, sizeof (struct stat));
+ ret = glfs_h_stat (fs, dir, &retsb);
+ if (ret != 0) {
+ fprintf (stderr, "glfs_h_stat: error %s: from (%p),%s\n",
+ my_dir, dir, strerror (errno));
+ printf ("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&retsb);
+
+ printf ("glfs_h_getattrs and setattrs tests: PASSED\n");
+out:
+ if (parent)
+ glfs_h_close (parent);
+ if (leaf)
+ glfs_h_close (leaf);
+ if (dir)
+ glfs_h_close (dir);
+
+ return;
+}
+
+void
+test_h_truncate (void)
+{
+ char *my_dir = "truncatedir";
+ char *my_file = "file.txt";
+ struct glfs_object *root = NULL, *parent = NULL, *leaf = NULL;
+ struct stat sb;
+ glfs_fd_t *fd = NULL;
+ char buf[32];
+ off_t offset = 0;
+ int ret = 0;
+
+ printf("glfs_h_truncate tests: In Progress\n");
+
+ /* Prepare tests */
+ root = glfs_h_lookupat (fs, NULL, full_parent_name, &sb);
+ if (root == NULL) {
+ fprintf (stderr, "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, NULL, strerror (errno));
+ printf ("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ parent = glfs_h_mkdir (fs, root, my_dir, 0644, &sb);
+ if (parent == NULL) {
+ fprintf (stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_dir, root, strerror (errno));
+ printf ("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ leaf = glfs_h_creat (fs, parent, my_file, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf (stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, parent, strerror (errno));
+ printf ("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ fd = glfs_h_open (fs, leaf, O_RDWR);
+ if (fd == NULL) {
+ fprintf (stderr, "glfs_h_open: error on open of %s: %s\n",
+ my_file, strerror (errno));
+ printf ("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+
+ memcpy (buf, "abcdefghijklmnopqrstuvwxyz012345", 32);
+ ret = glfs_write (fd, buf, 32, 0);
+
+ /* run tests */
+ /* truncate lower */
+ offset = 30;
+ ret = glfs_h_truncate (fs, leaf, offset);
+ if (ret != 0) {
+ fprintf (stderr, "glfs_h_truncate: error creating %s: from (%p),%s\n",
+ my_file, parent, strerror (errno));
+ printf ("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ ret = glfs_h_getattrs (fs, leaf, &sb);
+ if (ret != 0) {
+ fprintf (stderr, "glfs_h_getattrs: error for %s (%p),%s\n",
+ my_file, leaf, strerror (errno));
+ printf ("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ if (sb.st_size != offset) {
+ fprintf (stderr, "glfs_h_truncate: post size mismatch\n");
+ printf ("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+
+ /* truncate higher */
+ offset = 32;
+ ret = glfs_h_truncate (fs, leaf, offset);
+ if (ret != 0) {
+ fprintf (stderr, "glfs_h_truncate: error creating %s: from (%p),%s\n",
+ my_file, parent, strerror (errno));
+ printf ("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ ret = glfs_h_getattrs (fs, leaf, &sb);
+ if (ret != 0) {
+ fprintf (stderr, "glfs_h_getattrs: error for %s (%p),%s\n",
+ my_file, leaf, strerror (errno));
+ printf ("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ if (sb.st_size != offset) {
+ fprintf (stderr, "glfs_h_truncate: post size mismatch\n");
+ printf ("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+
+ /* truncate equal */
+ offset = 30;
+ ret = glfs_h_truncate (fs, leaf, offset);
+ if (ret != 0) {
+ fprintf (stderr, "glfs_h_truncate: error creating %s: from (%p),%s\n",
+ my_file, parent, strerror (errno));
+ printf ("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ ret = glfs_h_getattrs (fs, leaf, &sb);
+ if (ret != 0) {
+ fprintf (stderr, "glfs_h_getattrs: error for %s (%p),%s\n",
+ my_file, leaf, strerror (errno));
+ printf ("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ if (sb.st_size != offset) {
+ fprintf (stderr, "glfs_h_truncate: post size mismatch\n");
+ printf ("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+
+ printf ("glfs_h_truncate tests: PASSED\n");
+out:
+ if (fd)
+ glfs_close (fd);
+ if (root)
+ glfs_h_close (root);
+ if (parent)
+ glfs_h_close (parent);
+ if (leaf)
+ glfs_h_close (leaf);
+
+ return;
+}
+
+void
+test_h_links (void)
+{
+ char *my_dir = "linkdir";
+ char *my_file = "file.txt";
+ char *my_symlnk = "slnk.txt";
+ char *my_lnk = "lnk.txt";
+ char *linksrc_dir = "dir1";
+ char *linktgt_dir = "dir2";
+ struct glfs_object *root = NULL, *parent = NULL, *leaf = NULL,
+ *dirsrc = NULL, *dirtgt = NULL, *dleaf = NULL;
+ struct glfs_object *ln1 = NULL;
+ struct stat sb;
+ int ret;
+ char *buf = NULL;
+
+ printf("glfs_h_link(s) tests: In Progress\n");
+
+ /* Prepare tests */
+ root = glfs_h_lookupat (fs, NULL, full_parent_name, &sb);
+ if (root == NULL) {
+ fprintf (stderr, "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, NULL, strerror (errno));
+ printf ("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ parent = glfs_h_mkdir (fs, root, my_dir, 0644, &sb);
+ if (parent == NULL) {
+ fprintf (stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_dir, root, strerror (errno));
+ printf ("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ leaf = glfs_h_creat (fs, parent, my_file, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf (stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, parent, strerror (errno));
+ printf ("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ dirsrc = glfs_h_mkdir (fs, parent, linksrc_dir, 0644, &sb);
+ if (dirsrc == NULL) {
+ fprintf (stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ linksrc_dir, parent, strerror (errno));
+ printf ("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ dirtgt = glfs_h_mkdir (fs, parent, linktgt_dir, 0644, &sb);
+ if (dirtgt == NULL) {
+ fprintf (stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ linktgt_dir, parent, strerror (errno));
+ printf ("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ dleaf = glfs_h_creat (fs, dirsrc, my_file, O_CREAT, 0644, &sb);
+ if (dleaf == NULL) {
+ fprintf (stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, dirsrc, strerror (errno));
+ printf ("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ /* run tests */
+ /* sym link: /testdir/linkdir/file.txt to ./slnk.txt */
+ ln1 = glfs_h_symlink (fs, parent, my_symlnk, "./file.txt", &sb);
+ if (ln1 == NULL) {
+ fprintf (stderr, "glfs_h_symlink: error creating %s: from (%p),%s\n",
+ my_symlnk, parent, strerror (errno));
+ printf ("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ buf = calloc (1024, sizeof(char));
+ if (buf == NULL) {
+ fprintf (stderr, "Error allocating memory\n");
+ printf ("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+
+ ret = glfs_h_readlink (fs, ln1, buf, 1024);
+ if (ret <= 0) {
+ fprintf (stderr, "glfs_h_readlink: error reading %s: from (%p),%s\n",
+ my_symlnk, ln1, strerror (errno));
+ printf ("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ if (!(strncmp (buf, my_symlnk, strlen (my_symlnk)))) {
+ fprintf (stderr, "glfs_h_readlink: error mismatch in link name: actual %s: retrieved %s\n",
+ my_symlnk, buf);
+ printf ("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+
+ /* link: /testdir/linkdir/file.txt to ./lnk.txt */
+ ret = glfs_h_link (fs, leaf, parent, my_lnk);
+ if (ret != 0) {
+ fprintf (stderr, "glfs_h_link: error creating %s: from (%p),%s\n",
+ my_lnk, parent, strerror (errno));
+ printf ("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ /* TODO: Should write content to a file and read from the link */
+
+ /* link: /testdir/linkdir/dir1/file.txt to ../dir2/slnk.txt */
+ ret = glfs_h_link (fs, dleaf, dirtgt, my_lnk);
+ if (ret != 0) {
+ fprintf (stderr, "glfs_h_link: error creating %s: from (%p),%s\n",
+ my_lnk, dirtgt, strerror (errno));
+ printf ("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ /* TODO: Should write content to a file and read from the link */
+
+ printf ("glfs_h_link(s) tests: PASSED\n");
+
+out:
+ if (root)
+ glfs_h_close (root);
+ if (parent)
+ glfs_h_close (parent);
+ if (leaf)
+ glfs_h_close (leaf);
+ if (dirsrc)
+ glfs_h_close (dirsrc);
+ if (dirtgt)
+ glfs_h_close (dirtgt);
+ if (dleaf)
+ glfs_h_close (dleaf);
+ if (ln1)
+ glfs_h_close (ln1);
+ if (buf)
+ free (buf);
+
+ return;
+}
+
+void
+test_h_rename (void)
+{
+ char *my_dir = "renamedir";
+ char *my_file = "file.txt";
+ char *src_dir = "dir1";
+ char *tgt_dir = "dir2";
+ struct glfs_object *root = NULL, *parent = NULL, *leaf = NULL,
+ *dirsrc = NULL, *dirtgt = NULL, *dleaf = NULL;
+ struct stat sb;
+ int ret;
+
+ printf("glfs_h_rename tests: In Progress\n");
+
+ /* Prepare tests */
+ root = glfs_h_lookupat (fs, NULL, full_parent_name, &sb);
+ if (root == NULL) {
+ fprintf (stderr, "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, NULL, strerror (errno));
+ printf ("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ parent = glfs_h_mkdir (fs, root, my_dir, 0644, &sb);
+ if (parent == NULL) {
+ fprintf (stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_dir, root, strerror (errno));
+ printf ("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ leaf = glfs_h_creat (fs, parent, my_file, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf (stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, parent, strerror (errno));
+ printf ("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ dirsrc = glfs_h_mkdir (fs, parent, src_dir, 0644, &sb);
+ if (dirsrc == NULL) {
+ fprintf (stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ src_dir, parent, strerror (errno));
+ printf ("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ dirtgt = glfs_h_mkdir (fs, parent, tgt_dir, 0644, &sb);
+ if (dirtgt == NULL) {
+ fprintf (stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ tgt_dir, parent, strerror (errno));
+ printf ("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ dleaf = glfs_h_creat (fs, dirsrc, my_file, O_CREAT, 0644, &sb);
+ if (dleaf == NULL) {
+ fprintf (stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, dirsrc, strerror (errno));
+ printf ("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ /* run tests */
+ /* Rename file.txt -> file1.txt */
+ ret = glfs_h_rename (fs, parent, "file.txt", parent, "file1.txt");
+ if (ret != 0) {
+ fprintf (stderr, "glfs_h_rename: error renaming %s to %s (%s)\n",
+ "file.txt", "file1.txt", strerror (errno));
+ printf ("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ /* rename dir1/file.txt -> file.txt */
+ ret = glfs_h_rename (fs, dirsrc, "file.txt", parent, "file.txt");
+ if (ret != 0) {
+ fprintf (stderr, "glfs_h_rename: error renaming %s/%s to %s (%s)\n",
+ src_dir, "file.txt", "file.txt", strerror (errno));
+ printf ("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ /* rename file1.txt -> file.txt (exists) */
+ ret = glfs_h_rename (fs, parent, "file1.txt", parent, "file.txt");
+ if (ret != 0) {
+ fprintf (stderr, "glfs_h_rename: error renaming %s to %s (%s)\n",
+ "file.txt", "file.txt", strerror (errno));
+ printf ("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ /* rename dir1 -> dir3 */
+ ret = glfs_h_rename (fs, parent, "dir1", parent, "dir3");
+ if (ret != 0) {
+ fprintf (stderr, "glfs_h_rename: error renaming %s to %s (%s)\n",
+ "dir1", "dir3", strerror (errno));
+ printf ("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ /* rename dir2 ->dir3 (exists) */
+ ret = glfs_h_rename (fs, parent, "dir2", parent, "dir3");
+ if (ret != 0) {
+ fprintf (stderr, "glfs_h_rename: error renaming %s to %s (%s)\n",
+ "dir2", "dir3", strerror (errno));
+ printf ("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ /* rename file.txt -> dir3 (fail) */
+ ret = glfs_h_rename (fs, parent, "file.txt", parent, "dir3");
+ if (ret == 0) {
+ fprintf (stderr, "glfs_h_rename: NO error renaming %s to %s (%s)\n",
+ "file.txt", "dir3", strerror (errno));
+ printf ("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ /* rename dir3 -> file.txt (fail) */
+ ret = glfs_h_rename (fs, parent, "dir3", parent, "file.txt");
+ if (ret == 0) {
+ fprintf (stderr, "glfs_h_rename: NO error renaming %s to %s (%s)\n",
+ "dir3", "file.txt", strerror (errno));
+ printf ("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ printf ("glfs_h_rename tests: PASSED\n");
+
+out:
+ if (root)
+ glfs_h_close (root);
+ if (parent)
+ glfs_h_close (parent);
+ if (leaf)
+ glfs_h_close (leaf);
+ if (dirsrc)
+ glfs_h_close (dirsrc);
+ if (dirtgt)
+ glfs_h_close (dirtgt);
+ if (dleaf)
+ glfs_h_close (dleaf);
+
+ return;
+}
+
+void
+assimilatetime (struct timespec *ts, struct timespec ts_st,
+ struct timespec ts_ed)
+{
+ if ((ts_ed.tv_nsec - ts_st.tv_nsec) < 0) {
+ ts->tv_sec += ts_ed.tv_sec - ts_st.tv_sec - 1;
+ ts->tv_nsec += 1000000000 + ts_ed.tv_nsec - ts_st.tv_nsec;
+ } else {
+ ts->tv_sec += ts_ed.tv_sec - ts_st.tv_sec;
+ ts->tv_nsec += ts_ed.tv_nsec - ts_st.tv_nsec;
+ }
+
+ if (ts->tv_nsec > 1000000000) {
+ ts->tv_nsec = ts->tv_nsec - 1000000000;
+ ts->tv_sec += 1;
+ }
+
+ return;
+}
+
+#define MAX_FILES_CREATE 10
+#define MAXPATHNAME 512
+void
+test_h_performance (void)
+{
+ char *my_dir = "perftest",
+ *full_dir_path="/testdir/perftest";
+ char *my_file = "file_", my_file_name[MAXPATHNAME];
+ struct glfs_object *parent = NULL, *leaf = NULL, *dir = NULL;
+ struct stat sb;
+ int ret, i;
+ struct glfs_fd *fd;
+ struct timespec c_ts = {0, 0}, c_ts_st, c_ts_ed;
+ struct timespec o_ts = {0, 0}, o_ts_st, o_ts_ed;
+
+ printf("glfs_h_performance tests: In Progress\n");
+
+ /* Prepare tests */
+ parent = glfs_h_lookupat (fs, NULL, full_parent_name, &sb);
+ if (parent == NULL) {
+ fprintf (stderr, "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, NULL, strerror (errno));
+ printf ("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+
+ dir = glfs_h_mkdir (fs, parent, my_dir, 0644, &sb);
+ if (dir == NULL) {
+ fprintf (stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_dir, parent, strerror (errno));
+ printf ("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ /* create performance */
+ ret = clock_gettime (CLOCK_REALTIME, &o_ts_st);
+ if(ret != 0) {
+ fprintf (stderr, "clock_gettime: error %s\n", strerror (errno));
+ printf ("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ for (i = 0; i < MAX_FILES_CREATE; i++) {
+ sprintf (my_file_name, "%s%d", my_file, i);
+
+ ret = clock_gettime (CLOCK_REALTIME, &c_ts_st);
+ if(ret != 0) {
+ fprintf (stderr, "clock_gettime: error %s\n",
+ strerror (errno));
+ printf ("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ leaf = glfs_h_lookupat (fs, dir, my_file_name, &sb);
+ if (leaf != NULL) {
+ fprintf (stderr, "glfs_h_lookup: exists %s\n",
+ my_file_name);
+ printf ("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+
+ leaf = glfs_h_creat (fs, dir, my_file_name, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf (stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, dir, strerror (errno));
+ printf ("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+
+ ret = clock_gettime (CLOCK_REALTIME, &c_ts_ed);
+ if(ret != 0) {
+ fprintf (stderr, "clock_gettime: error %s\n",
+ strerror (errno));
+ printf ("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ assimilatetime (&c_ts, c_ts_st, c_ts_ed);
+ glfs_h_close (leaf); leaf = NULL;
+ }
+
+ ret = clock_gettime (CLOCK_REALTIME, &o_ts_ed);
+ if(ret != 0) {
+ fprintf (stderr, "clock_gettime: error %s\n", strerror (errno));
+ printf ("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ assimilatetime (&o_ts, o_ts_st, o_ts_ed);
+
+ printf ("Creation performance (handle based):\n\t# empty files:%d\n",
+ MAX_FILES_CREATE);
+ printf ("\tOverall time:\n\t\tSecs:%ld\n\t\tnSecs:%ld\n",
+ o_ts.tv_sec, o_ts.tv_nsec);
+ printf ("\tcreate call time time:\n\t\tSecs:%ld\n\t\tnSecs:%ld\n",
+ c_ts.tv_sec, c_ts.tv_nsec);
+
+ /* create using path */
+ c_ts.tv_sec = o_ts.tv_sec = 0;
+ c_ts.tv_nsec = o_ts.tv_nsec = 0;
+
+ sprintf (my_file_name, "%s1", full_dir_path);
+ ret = glfs_mkdir (fs, my_file_name, 0644);
+ if (ret != 0) {
+ fprintf (stderr, "glfs_mkdir: error creating %s: from (%p),%s\n",
+ my_dir, parent, strerror (errno));
+ printf ("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ ret = clock_gettime (CLOCK_REALTIME, &o_ts_st);
+ if(ret != 0) {
+ fprintf (stderr, "clock_gettime: error %s\n", strerror (errno));
+ printf ("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ for (i = 0; i < MAX_FILES_CREATE; i++) {
+ sprintf (my_file_name, "%s1/%sn%d", full_dir_path, my_file, i);
+
+ ret = clock_gettime (CLOCK_REALTIME, &c_ts_st);
+ if(ret != 0) {
+ fprintf (stderr, "clock_gettime: error %s\n",
+ strerror (errno));
+ printf ("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ ret = glfs_stat (fs, my_file_name, &sb);
+ if (ret == 0) {
+ fprintf (stderr, "glfs_stat: exists %s\n",
+ my_file_name);
+ printf ("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+
+ fd = glfs_creat (fs, my_file_name, O_CREAT, 0644);
+ if (fd == NULL) {
+ fprintf (stderr, "glfs_creat: error creating %s: from (%p),%s\n",
+ my_file, dir, strerror (errno));
+ printf ("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+
+ ret = clock_gettime (CLOCK_REALTIME, &c_ts_ed);
+ if(ret != 0) {
+ fprintf (stderr, "clock_gettime: error %s\n",
+ strerror (errno));
+ printf ("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ assimilatetime (&c_ts, c_ts_st, c_ts_ed);
+ glfs_close (fd);
+ }
+
+ ret = clock_gettime (CLOCK_REALTIME, &o_ts_ed);
+ if(ret != 0) {
+ fprintf (stderr, "clock_gettime: error %s\n", strerror (errno));
+ printf ("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ assimilatetime (&o_ts, o_ts_st, o_ts_ed);
+
+ printf ("Creation performance (path based):\n\t# empty files:%d\n",
+ MAX_FILES_CREATE);
+ printf ("\tOverall time:\n\t\tSecs:%ld\n\t\tnSecs:%ld\n",
+ o_ts.tv_sec, o_ts.tv_nsec);
+ printf ("\tcreate call time time:\n\t\tSecs:%ld\n\t\tnSecs:%ld\n",
+ c_ts.tv_sec, c_ts.tv_nsec);
+out:
+ return;
+}
+
+int
+test_handleops (int argc, char *argv[])
+{
+ int ret = 0;
+ glfs_fd_t *fd = NULL;
+ struct stat sb = {0, };
+ struct glfs_object *root = NULL, *parent = NULL, *leaf = NULL,
+ *tmp = NULL;
+ char readbuf[32], writebuf[32];
+ unsigned char leaf_handle[GFAPI_HANDLE_LENGTH];
+
+ char *full_leaf_name = "/testdir/testfile.txt",
+ *leaf_name = "testfile.txt",
+ *relative_leaf_name = "testdir/testfile.txt";
+ char *leaf_name1 = "testfile1.txt";
+ char *full_newparent_name = "/testdir/dir1",
+ *newparent_name = "dir1";
+ char *full_newnod_name = "/testdir/nod1",
+ *newnod_name = "nod1";
+
+ /* Initialize test area */
+ ret = glfs_mkdir (fs, full_parent_name, 0644);
+ if (ret != 0 && errno != EEXIST) {
+ fprintf (stderr, "%s: (%p) %s\n", full_parent_name, fd,
+ strerror (errno));
+ printf ("Test initialization failed on volume %s\n", argv[1]);
+ goto out;
+ }
+ else if (ret != 0) {
+ printf ("Found test directory %s to be existing\n",
+ full_parent_name);
+ printf ("Cleanup test directory and restart tests\n");
+ goto out;
+ }
+
+ fd = glfs_creat (fs, full_leaf_name, O_CREAT, 0644);
+ if (fd == NULL) {
+ fprintf (stderr, "%s: (%p) %s\n", full_leaf_name, fd,
+ strerror (errno));
+ printf ("Test initialization failed on volume %s\n", argv[1]);
+ goto out;
+ }
+ glfs_close (fd);
+
+ printf ("Initialized the test area, within volume %s\n", argv[1]);
+
+ /* Handle based APIs test area */
+
+ /* glfs_lookupat test */
+ printf ("glfs_h_lookupat tests: In Progress\n");
+ /* start at root of the volume */
+ root = glfs_h_lookupat (fs, NULL, "/", &sb);
+ if (root == NULL) {
+ fprintf (stderr, "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ "/", NULL, strerror (errno));
+ printf ("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ /* lookup a parent within root */
+ parent = glfs_h_lookupat (fs, root, parent_name, &sb);
+ if (parent == NULL) {
+ fprintf (stderr, "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ parent_name, root, strerror (errno));
+ printf ("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ /* lookup a leaf/child within the parent */
+ leaf = glfs_h_lookupat (fs, parent, leaf_name, &sb);
+ if (leaf == NULL) {
+ fprintf (stderr, "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ leaf_name, parent, strerror (errno));
+ printf ("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ /* reset */
+ glfs_h_close (root); root = NULL;
+ glfs_h_close (leaf); leaf = NULL;
+ glfs_h_close (parent); parent = NULL;
+
+ /* check absolute paths */
+ root = glfs_h_lookupat (fs, NULL, "/", &sb);
+ if (root == NULL) {
+ fprintf (stderr, "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ "/", NULL, strerror (errno));
+ printf ("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ parent = glfs_h_lookupat (fs, NULL, full_parent_name, &sb);
+ if (parent == NULL) {
+ fprintf (stderr, "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, root, strerror (errno));
+ printf ("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ leaf = glfs_h_lookupat (fs, NULL, full_leaf_name, &sb);
+ if (leaf == NULL) {
+ fprintf (stderr, "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_leaf_name, parent, strerror (errno));
+ printf ("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ /* reset */
+ glfs_h_close (leaf); leaf = NULL;
+
+ /* check multiple component paths */
+ leaf = glfs_h_lookupat (fs, root, relative_leaf_name, &sb);
+ if (leaf == NULL) {
+ fprintf (stderr, "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ relative_leaf_name, parent, strerror (errno));
+ goto out;
+ }
+ peek_stat (&sb);
+
+ /* reset */
+ glfs_h_close (root); root = NULL;
+ glfs_h_close (parent); parent = NULL;
+
+ /* check symlinks in path */
+
+ /* TODO: -ve test cases */
+ /* parent invalid
+ * path invalid
+ * path does not exist after some components
+ * no parent, but relative path
+ * parent and full path? -ve?
+ */
+
+ printf ("glfs_h_lookupat tests: PASSED\n");
+
+ /* glfs_openat test */
+ printf ("glfs_h_open tests: In Progress\n");
+ fd = glfs_h_open (fs, leaf, O_RDWR);
+ if (fd == NULL) {
+ fprintf (stderr, "glfs_h_open: error on open of %s: %s\n",
+ full_leaf_name, strerror (errno));
+ printf ("glfs_h_open tests: FAILED\n");
+ goto out;
+ }
+
+ /* test read/write based on fd */
+ memcpy (writebuf, "abcdefghijklmnopqrstuvwxyz012345", 32);
+ ret = glfs_write (fd, writebuf, 32, 0);
+
+ glfs_lseek (fd, 0, SEEK_SET);
+
+ ret = glfs_read (fd, readbuf, 32, 0);
+ if (memcmp (readbuf, writebuf, 32)) {
+ printf ("Failed to read what I wrote: %s %s\n", readbuf,
+ writebuf);
+ glfs_close (fd);
+ printf ("glfs_h_open tests: FAILED\n");
+ goto out;
+ }
+
+ glfs_h_close (leaf); leaf = NULL;
+ glfs_close (fd);
+
+ printf ("glfs_h_open tests: PASSED\n");
+
+ /* Create tests */
+ printf ("glfs_h_creat tests: In Progress\n");
+ parent = glfs_h_lookupat (fs, NULL, full_parent_name, &sb);
+ if (parent == NULL) {
+ fprintf (stderr, "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, root, strerror (errno));
+ printf ("glfs_h_creat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ leaf = glfs_h_creat (fs, parent, leaf_name1, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf (stderr, "glfs_h_creat: error on create of %s: from (%p),%s\n",
+ leaf_name1, parent, strerror (errno));
+ printf ("glfs_h_creat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ glfs_h_close (leaf); leaf = NULL;
+
+ leaf = glfs_h_creat (fs, parent, leaf_name1, O_CREAT | O_EXCL, 0644,
+ &sb);
+ if (leaf != NULL || errno != EEXIST) {
+ fprintf (stderr, "glfs_h_creat: existing file, leaf = (%p), errno = %s\n",
+ leaf, strerror (errno));
+ printf ("glfs_h_creat tests: FAILED\n");
+ if (leaf != NULL) {
+ glfs_h_close (leaf); leaf = NULL;
+ }
+ }
+
+ tmp = glfs_h_creat (fs, root, parent_name, O_CREAT, 0644, &sb);
+ if (tmp != NULL || !(errno == EISDIR || errno == EINVAL)) {
+ fprintf (stderr, "glfs_h_creat: dir create, tmp = (%p), errno = %s\n",
+ leaf, strerror (errno));
+ printf ("glfs_h_creat tests: FAILED\n");
+ if (tmp != NULL) {
+ glfs_h_close (tmp); tmp = NULL;
+ }
+ }
+
+ /* TODO: Other combinations and -ve cases as applicable */
+ printf ("glfs_h_creat tests: PASSED\n");
+
+ /* extract handle and create from handle test */
+ printf ("glfs_h_extract_handle and glfs_h_create_from_handle tests: In Progress\n");
+ /* TODO: Change the lookup to creat below for a GIFD recovery falure,
+ * that needs to be fixed */
+ leaf = glfs_h_lookupat (fs, parent, leaf_name1, &sb);
+ if (leaf == NULL) {
+ fprintf (stderr, "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ leaf_name1, parent, strerror (errno));
+ printf ("glfs_h_extract_handle tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ ret = glfs_h_extract_handle (leaf, leaf_handle,
+ GFAPI_HANDLE_LENGTH);
+ if (ret < 0) {
+ fprintf (stderr, "glfs_h_extract_handle: error extracting handle of %s: %s\n",
+ full_leaf_name, strerror (errno));
+ printf ("glfs_h_extract_handle tests: FAILED\n");
+ goto out;
+ }
+ peek_handle (leaf_handle);
+
+ glfs_h_close (leaf); leaf = NULL;
+
+ leaf = glfs_h_create_from_handle (fs, leaf_handle, GFAPI_HANDLE_LENGTH,
+ &sb);
+ if (leaf == NULL) {
+ fprintf (stderr, "glfs_h_create_from_handle: error on create of %s: from (%p),%s\n",
+ leaf_name1, leaf_handle, strerror (errno));
+ printf ("glfs_h_create_from_handle tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ fd = glfs_h_open (fs, leaf, O_RDWR);
+ if (fd == NULL) {
+ fprintf (stderr, "glfs_h_open: error on open of %s: %s\n",
+ full_leaf_name, strerror (errno));
+ printf ("glfs_h_create_from_handle tests: FAILED\n");
+ goto out;
+ }
+
+ /* test read/write based on fd */
+ memcpy (writebuf, "abcdefghijklmnopqrstuvwxyz012345", 32);
+ ret = glfs_write (fd, writebuf, 32, 0);
+
+ glfs_lseek (fd, 0, SEEK_SET);
+
+ ret = glfs_read (fd, readbuf, 32, 0);
+ if (memcmp (readbuf, writebuf, 32)) {
+ printf ("Failed to read what I wrote: %s %s\n", writebuf,
+ writebuf);
+ printf ("glfs_h_create_from_handle tests: FAILED\n");
+ glfs_close (fd);
+ goto out;
+ }
+
+ glfs_close (fd);
+ glfs_h_close (leaf); leaf = NULL;
+ glfs_h_close (parent); parent = NULL;
+
+ printf ("glfs_h_extract_handle and glfs_h_create_from_handle tests: PASSED\n");
+
+ /* Mkdir tests */
+ printf ("glfs_h_mkdir tests: In Progress\n");
+
+ ret = glfs_rmdir (fs, full_newparent_name);
+ if (ret && errno != ENOENT) {
+ fprintf (stderr, "glfs_rmdir: Failed for %s: %s\n",
+ full_newparent_name, strerror (errno));
+ printf ("glfs_h_mkdir tests: FAILED\n");
+ goto out;
+ }
+
+ parent = glfs_h_lookupat (fs, NULL, full_parent_name, &sb);
+ if (parent == NULL) {
+ fprintf (stderr, "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, root, strerror (errno));
+ printf ("glfs_h_mkdir tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ leaf = glfs_h_mkdir (fs, parent, newparent_name, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf (stderr, "glfs_h_mkdir: error on mkdir of %s: from (%p),%s\n",
+ newparent_name, parent, strerror (errno));
+ printf ("glfs_h_mkdir tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ glfs_h_close (leaf); leaf = NULL;
+
+ leaf = glfs_h_mkdir (fs, parent, newparent_name, 0644, &sb);
+ if (leaf != NULL || errno != EEXIST) {
+ fprintf (stderr, "glfs_h_mkdir: existing directory, leaf = (%p), errno = %s\n",
+ leaf, strerror (errno));
+ printf ("glfs_h_mkdir tests: FAILED\n");
+ if (leaf != NULL) {
+ glfs_h_close (leaf); leaf = NULL;
+ }
+ }
+
+ glfs_h_close (parent); parent = NULL;
+
+ printf ("glfs_h_mkdir tests: PASSED\n");
+
+ /* Mknod tests */
+ printf ("glfs_h_mknod tests: In Progress\n");
+ ret = glfs_unlink (fs, full_newnod_name);
+ if (ret && errno != ENOENT) {
+ fprintf (stderr, "glfs_unlink: Failed for %s: %s\n",
+ full_newnod_name, strerror (errno));
+ printf ("glfs_h_mknod tests: FAILED\n");
+ goto out;
+ }
+
+ parent = glfs_h_lookupat (fs, NULL, full_parent_name, &sb);
+ if (parent == NULL) {
+ fprintf (stderr, "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, root, strerror (errno));
+ printf ("glfs_h_mknod tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ leaf = glfs_h_mknod (fs, parent, newnod_name, S_IFIFO, 0, &sb);
+ if (leaf == NULL) {
+ fprintf (stderr, "glfs_h_mkdir: error on mkdir of %s: from (%p),%s\n",
+ newnod_name, parent, strerror (errno));
+ printf ("glfs_h_mknod tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ /* TODO: creat op on a FIFO node hangs, need to check and fix
+ tmp = glfs_h_creat (fs, parent, newnod_name, O_CREAT, 0644, &sb);
+ if (tmp != NULL || errno != EINVAL) {
+ fprintf (stderr, "glfs_h_creat: node create, tmp = (%p), errno = %s\n",
+ tmp, strerror (errno));
+ printf ("glfs_h_creat/mknod tests: FAILED\n");
+ if (tmp != NULL) {
+ glfs_h_close(tmp); tmp = NULL;
+ }
+ } */
+
+ glfs_h_close (leaf); leaf = NULL;
+
+ leaf = glfs_h_mknod (fs, parent, newnod_name, 0644, 0, &sb);
+ if (leaf != NULL || errno != EEXIST) {
+ fprintf (stderr, "glfs_h_mknod: existing node, leaf = (%p), errno = %s\n",
+ leaf, strerror (errno));
+ printf ("glfs_h_mknod tests: FAILED\n");
+ if (leaf != NULL) {
+ glfs_h_close (leaf); leaf = NULL;
+ }
+ }
+
+ glfs_h_close (parent); parent = NULL;
+
+ printf ("glfs_h_mknod tests: PASSED\n");
+
+ /* unlink tests */
+ test_h_unlink ();
+
+ /* TODO: opendir tests */
+
+ /* getattr tests */
+ test_h_getsetattrs ();
+
+ /* TODO: setattr tests */
+
+ /* truncate tests */
+ test_h_truncate();
+
+ /* link tests */
+ test_h_links ();
+
+ /* rename tests */
+ test_h_rename ();
+
+ /* performance tests */
+ test_h_performance ();
+
+ /* END: New APIs test area */
+
+out:
+ /* Cleanup glfs handles */
+ if (root)
+ glfs_h_close (root);
+ if (parent)
+ glfs_h_close (parent);
+ if (leaf)
+ glfs_h_close (leaf);
+
+ return ret;
+}
+
+int
+main (int argc, char *argv[])
+{
+ glfs_t *fs2 = NULL;
+ int ret = 0;
+ glfs_fd_t *fd = NULL;
+ glfs_fd_t *fd2 = NULL;
+ struct stat sb = {0, };
+ char readbuf[32];
+ char writebuf[32];
+
+ char *filename = "/filename2";
+
+ if (argc != 3) {
+ printf ("Expect following args\n\t%s <volname> <hostname>\n", argv[0]);
+ return -1;
+ }
+
+ fs = glfs_new (argv[1]);
+ if (!fs) {
+ fprintf (stderr, "glfs_new: returned NULL\n");
+ return 1;
+ }
+
+// ret = glfs_set_volfile (fs, "/tmp/posix.vol");
+
+ ret = glfs_set_volfile_server (fs, "tcp", argv[2], 24007);
+
+// ret = glfs_set_volfile_server (fs, "unix", "/tmp/gluster.sock", 0);
+
+ ret = glfs_set_logging (fs, "/dev/stderr", 7);
+
+ ret = glfs_init (fs);
+
+ fprintf (stderr, "glfs_init: returned %d\n", ret);
+
+ sleep (2);
+
+ fs2 = glfs_new (argv[1]);
+ if (!fs2) {
+ fprintf (stderr, "glfs_new: returned NULL\n");
+ return 1;
+ }
+
+
+// ret = glfs_set_volfile (fs2, "/tmp/posix.vol");
+
+ ret = glfs_set_volfile_server (fs2, "tcp", argv[2], 24007);
+
+ ret = glfs_set_logging (fs2, "/dev/stderr", 7);
+
+ ret = glfs_init (fs2);
+
+ fprintf (stderr, "glfs_init: returned %d\n", ret);
+
+ ret = glfs_lstat (fs, filename, &sb);
+ fprintf (stderr, "%s: (%d) %s\n", filename, ret, strerror (errno));
+
+ fd = glfs_creat (fs, filename, O_RDWR, 0644);
+ fprintf (stderr, "%s: (%p) %s\n", filename, fd, strerror (errno));
+
+ fd2 = glfs_open (fs2, filename, O_RDWR);
+ fprintf (stderr, "%s: (%p) %s\n", filename, fd, strerror (errno));
+
+ sprintf (writebuf, "hi there\n");
+ ret = glfs_write (fd, writebuf, 32, 0);
+
+ glfs_lseek (fd2, 0, SEEK_SET);
+
+ ret = glfs_read (fd2, readbuf, 32, 0);
+
+ printf ("read %d, %s", ret, readbuf);
+
+ glfs_close (fd);
+ glfs_close (fd2);
+
+ filename = "/filename3";
+ ret = glfs_mknod (fs, filename, S_IFIFO, 0);
+ fprintf (stderr, "%s: (%d) %s\n", filename, ret, strerror (errno));
+
+ ret = glfs_lstat (fs, filename, &sb);
+ fprintf (stderr, "%s: (%d) %s\n", filename, ret, strerror (errno));
+
+
+ ret = glfs_rename (fs, filename, "/filename4");
+ fprintf (stderr, "rename(%s): (%d) %s\n", filename, ret,
+ strerror (errno));
+
+ ret = glfs_unlink (fs, "/filename4");
+ fprintf (stderr, "unlink(%s): (%d) %s\n", "/filename4", ret,
+ strerror (errno));
+
+ filename = "/dirname2";
+ ret = glfs_mkdir (fs, filename, 0);
+ fprintf (stderr, "%s: (%d) %s\n", filename, ret, strerror (errno));
+
+ ret = glfs_lstat (fs, filename, &sb);
+ fprintf (stderr, "lstat(%s): (%d) %s\n", filename, ret, strerror (errno));
+
+ ret = glfs_rmdir (fs, filename);
+ fprintf (stderr, "rmdir(%s): (%d) %s\n", filename, ret, strerror (errno));
+
+ test_dirops (fs);
+
+ test_xattr (fs);
+
+ test_chdir (fs);
+
+ test_handleops (argc, argv);
+ // done
+
+ glfs_fini (fs);
+ glfs_fini (fs2);
+
+ return ret;
+}
diff --git a/api/examples/setup.py.in b/api/examples/setup.py.in
new file mode 100644
index 000000000..44b738094
--- /dev/null
+++ b/api/examples/setup.py.in
@@ -0,0 +1,29 @@
+from distutils.core import setup
+
+# generate a __init__.py for the package namespace
+fo = open('__init__.py', 'w')
+fo.write('__version__ = "@PACKAGE_VERSION@"\n')
+fo.close()
+
+DESC = """GlusterFS is a clustered file-system capable of scaling to
+several petabytes. It aggregates various storage bricks over Infiniband
+RDMA or TCP/IP interconnect into one large parallel network file system.
+GlusterFS is one of the most sophisticated file systems in terms of
+features and extensibility. It borrows a powerful concept called
+Translators from GNU Hurd kernel. Much of the code in GlusterFS is in
+user space and easily manageable.
+
+This package contains the Python interface to the libgfapi library."""
+
+setup(
+ name='glusterfs-api',
+ version='@PACKAGE_VERSION@',
+ description='Python client library for the GlusterFS libgfapi',
+ long_description=DESC,
+ author='Gluster Community',
+ author_email='gluster-devel@nongnu.org',
+ license='LGPLv3',
+ url='http://gluster.org/',
+ package_dir={'gluster':''},
+ packages=['gluster']
+)
diff --git a/api/src/Makefile.am b/api/src/Makefile.am
new file mode 100644
index 000000000..7c5df3e20
--- /dev/null
+++ b/api/src/Makefile.am
@@ -0,0 +1,36 @@
+lib_LTLIBRARIES = libgfapi.la
+noinst_HEADERS = glfs-mem-types.h glfs-internal.h
+libgfapi_HEADERS = glfs.h glfs-handles.h
+libgfapidir = $(includedir)/glusterfs/api
+
+libgfapi_la_SOURCES = glfs.c glfs-mgmt.c glfs-fops.c glfs-resolve.c \
+ glfs-handleops.c
+libgfapi_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
+ $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \
+ $(top_builddir)/rpc/xdr/src/libgfxdr.la \
+ $(GF_LDADD)
+
+libgfapi_la_CPPFLAGS = $(GF_CPPFLAGS) -D__USE_FILE_OFFSET64 \
+ -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src \
+ -I$(top_srcdir)/rpc/xdr/src
+
+
+xlator_LTLIBRARIES = api.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/mount
+# workaround for broken parallel install support in automake with LTLIBRARIES
+# http://debbugs.gnu.org/cgi/bugreport.cgi?bug=7328
+install_xlatorLTLIBRARIES = install-xlatorLTLIBRARIES
+$(install_xlatorLTLIBRARIES): install-libLTLIBRARIES
+
+api_la_SOURCES = glfs-master.c
+api_la_DEPENDENCIES = libgfapi.la
+api_la_LDFLAGS = -module -avoid-version
+api_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
+ $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \
+ $(top_builddir)/rpc/xdr/src/libgfxdr.la \
+ $(top_builddir)/api/src/libgfapi.la
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
diff --git a/api/src/glfs-fops.c b/api/src/glfs-fops.c
new file mode 100644
index 000000000..10bb7d38b
--- /dev/null
+++ b/api/src/glfs-fops.c
@@ -0,0 +1,3252 @@
+/*
+ Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#include "glfs-internal.h"
+#include "glfs-mem-types.h"
+#include "syncop.h"
+#include "glfs.h"
+#include <limits.h>
+
+#ifdef NAME_MAX
+#define GF_NAME_MAX NAME_MAX
+#else
+#define GF_NAME_MAX 255
+#endif
+
+#define READDIRBUF_SIZE (sizeof(struct dirent) + GF_NAME_MAX + 1)
+
+int
+glfs_loc_link (loc_t *loc, struct iatt *iatt)
+{
+ int ret = -1;
+ inode_t *linked_inode = NULL;
+
+ if (!loc->inode) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ linked_inode = inode_link (loc->inode, loc->parent, loc->name, iatt);
+ if (linked_inode) {
+ inode_lookup (linked_inode);
+ inode_unref (linked_inode);
+ ret = 0;
+ } else {
+ ret = -1;
+ errno = ENOMEM;
+ }
+
+ return ret;
+}
+
+
+void
+glfs_iatt_to_stat (struct glfs *fs, struct iatt *iatt, struct stat *stat)
+{
+ iatt_to_stat (iatt, stat);
+ stat->st_dev = fs->dev_id;
+}
+
+
+int
+glfs_loc_unlink (loc_t *loc)
+{
+ inode_unlink (loc->inode, loc->parent, loc->name);
+
+ return 0;
+}
+
+
+struct glfs_fd *
+glfs_open (struct glfs *fs, const char *path, int flags)
+{
+ int ret = -1;
+ struct glfs_fd *glfd = NULL;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ glfd = glfs_fd_new (fs);
+ if (!glfd)
+ goto out;
+
+retry:
+ ret = glfs_resolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ if (IA_ISDIR (iatt.ia_type)) {
+ ret = -1;
+ errno = EISDIR;
+ goto out;
+ }
+
+ if (!IA_ISREG (iatt.ia_type)) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ if (glfd->fd) {
+ /* Retry. Safe to touch glfd->fd as we
+ still have not glfs_fd_bind() yet.
+ */
+ fd_unref (glfd->fd);
+ glfd->fd = NULL;
+ }
+
+ glfd->fd = fd_create (loc.inode, getpid());
+ if (!glfd->fd) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = syncop_open (subvol, &loc, flags, glfd->fd);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+out:
+ loc_wipe (&loc);
+
+ if (ret && glfd) {
+ glfs_fd_destroy (glfd);
+ glfd = NULL;
+ } else if (glfd) {
+ glfd->fd->flags = flags;
+ fd_bind (glfd->fd);
+ glfs_fd_bind (glfd);
+ }
+
+ glfs_subvol_done (fs, subvol);
+
+ return glfd;
+}
+
+
+int
+glfs_close (struct glfs_fd *glfd)
+{
+ xlator_t *subvol = NULL;
+ int ret = -1;
+ fd_t *fd = NULL;
+ struct glfs *fs = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = syncop_flush (subvol, fd);
+out:
+ fs = glfd->fs;
+ glfs_fd_destroy (glfd);
+
+ if (fd)
+ fd_unref (fd);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_lstat (struct glfs *fs, const char *path, struct stat *stat)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_lresolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret == 0 && stat)
+ glfs_iatt_to_stat (fs, &iatt, stat);
+out:
+ loc_wipe (&loc);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_stat (struct glfs *fs, const char *path, struct stat *stat)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_resolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret == 0 && stat)
+ glfs_iatt_to_stat (fs, &iatt, stat);
+out:
+ loc_wipe (&loc);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_fstat (struct glfs_fd *glfd, struct stat *stat)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ struct iatt iatt = {0, };
+ fd_t *fd = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = syncop_fstat (subvol, fd, &iatt);
+
+ if (ret == 0 && stat)
+ glfs_iatt_to_stat (glfd->fs, &iatt, stat);
+out:
+ if (fd)
+ fd_unref (fd);
+
+ glfs_subvol_done (glfd->fs, subvol);
+
+ return ret;
+}
+
+
+struct glfs_fd *
+glfs_creat (struct glfs *fs, const char *path, int flags, mode_t mode)
+{
+ int ret = -1;
+ struct glfs_fd *glfd = NULL;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ uuid_t gfid;
+ dict_t *xattr_req = NULL;
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ xattr_req = dict_new ();
+ if (!xattr_req) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ uuid_generate (gfid);
+ ret = dict_set_static_bin (xattr_req, "gfid-req", gfid, 16);
+ if (ret) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ glfd = glfs_fd_new (fs);
+ if (!glfd)
+ goto out;
+
+ /* This must be glfs_resolve() and NOT glfs_lresolve().
+ That is because open("name", O_CREAT) where "name"
+ is a danging symlink must create the dangling
+ destinataion.
+ */
+retry:
+ ret = glfs_resolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret == -1 && errno != ENOENT)
+ /* Any other type of error is fatal */
+ goto out;
+
+ if (ret == -1 && errno == ENOENT && !loc.parent)
+ /* The parent directory or an ancestor even
+ higher does not exist
+ */
+ goto out;
+
+ if (loc.inode) {
+ if (flags & O_EXCL) {
+ ret = -1;
+ errno = EEXIST;
+ goto out;
+ }
+
+ if (IA_ISDIR (iatt.ia_type)) {
+ ret = -1;
+ errno = EISDIR;
+ goto out;
+ }
+
+ if (!IA_ISREG (iatt.ia_type)) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+ }
+
+ if (ret == -1 && errno == ENOENT) {
+ loc.inode = inode_new (loc.parent->table);
+ if (!loc.inode) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+ }
+
+ if (glfd->fd) {
+ /* Retry. Safe to touch glfd->fd as we
+ still have not glfs_fd_bind() yet.
+ */
+ fd_unref (glfd->fd);
+ glfd->fd = NULL;
+ }
+
+ glfd->fd = fd_create (loc.inode, getpid());
+ if (!glfd->fd) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ if (ret == 0) {
+ ret = syncop_open (subvol, &loc, flags, glfd->fd);
+ } else {
+ ret = syncop_create (subvol, &loc, flags, mode, glfd->fd,
+ xattr_req, &iatt);
+ }
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret == 0)
+ ret = glfs_loc_link (&loc, &iatt);
+out:
+ loc_wipe (&loc);
+
+ if (xattr_req)
+ dict_unref (xattr_req);
+
+ if (ret && glfd) {
+ glfs_fd_destroy (glfd);
+ glfd = NULL;
+ } else if (glfd) {
+ glfd->fd->flags = flags;
+ fd_bind (glfd->fd);
+ glfs_fd_bind (glfd);
+ }
+
+ glfs_subvol_done (fs, subvol);
+
+ return glfd;
+}
+
+
+off_t
+glfs_lseek (struct glfs_fd *glfd, off_t offset, int whence)
+{
+ struct stat sb = {0, };
+ int ret = -1;
+
+ __glfs_entry_fd (glfd);
+
+ switch (whence) {
+ case SEEK_SET:
+ glfd->offset = offset;
+ break;
+ case SEEK_CUR:
+ glfd->offset += offset;
+ break;
+ case SEEK_END:
+ ret = glfs_fstat (glfd, &sb);
+ if (ret) {
+ /* seek cannot fail :O */
+ break;
+ }
+ glfd->offset = sb.st_size + offset;
+ break;
+ }
+
+ return glfd->offset;
+}
+
+
+//////////////
+
+ssize_t
+glfs_preadv (struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt,
+ off_t offset, int flags)
+{
+ xlator_t *subvol = NULL;
+ ssize_t ret = -1;
+ ssize_t size = -1;
+ struct iovec *iov = NULL;
+ int cnt = 0;
+ struct iobref *iobref = NULL;
+ fd_t *fd = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ size = iov_length (iovec, iovcnt);
+
+ ret = syncop_readv (subvol, fd, size, offset, 0, &iov, &cnt, &iobref);
+ if (ret <= 0)
+ goto out;
+
+ size = iov_copy (iovec, iovcnt, iov, cnt); /* FIXME!!! */
+
+ glfd->offset = (offset + size);
+
+ ret = size;
+out:
+ if (iov)
+ GF_FREE (iov);
+ if (iobref)
+ iobref_unref (iobref);
+
+ if (fd)
+ fd_unref (fd);
+
+ glfs_subvol_done (glfd->fs, subvol);
+
+ return ret;
+}
+
+
+ssize_t
+glfs_read (struct glfs_fd *glfd, void *buf, size_t count, int flags)
+{
+ struct iovec iov = {0, };
+ ssize_t ret = 0;
+
+ iov.iov_base = buf;
+ iov.iov_len = count;
+
+ ret = glfs_preadv (glfd, &iov, 1, glfd->offset, flags);
+
+ return ret;
+}
+
+
+ssize_t
+glfs_pread (struct glfs_fd *glfd, void *buf, size_t count, off_t offset,
+ int flags)
+{
+ struct iovec iov = {0, };
+ ssize_t ret = 0;
+
+ iov.iov_base = buf;
+ iov.iov_len = count;
+
+ ret = glfs_preadv (glfd, &iov, 1, offset, flags);
+
+ return ret;
+}
+
+
+ssize_t
+glfs_readv (struct glfs_fd *glfd, const struct iovec *iov, int count,
+ int flags)
+{
+ ssize_t ret = 0;
+
+ ret = glfs_preadv (glfd, iov, count, glfd->offset, flags);
+
+ return ret;
+}
+
+
+struct glfs_io {
+ struct glfs_fd *glfd;
+ int op;
+ off_t offset;
+ struct iovec *iov;
+ int count;
+ int flags;
+ glfs_io_cbk fn;
+ void *data;
+};
+
+
+static int
+glfs_io_async_cbk (int ret, call_frame_t *frame, void *data)
+{
+ struct glfs_io *gio = data;
+
+ gio->fn (gio->glfd, ret, gio->data);
+
+ GF_FREE (gio->iov);
+ GF_FREE (gio);
+
+ return 0;
+}
+
+
+static int
+glfs_io_async_task (void *data)
+{
+ struct glfs_io *gio = data;
+ ssize_t ret = 0;
+
+ switch (gio->op) {
+ case GF_FOP_WRITE:
+ ret = glfs_pwritev (gio->glfd, gio->iov, gio->count,
+ gio->offset, gio->flags);
+ break;
+ case GF_FOP_FTRUNCATE:
+ ret = glfs_ftruncate (gio->glfd, gio->offset);
+ break;
+ case GF_FOP_FSYNC:
+ if (gio->flags)
+ ret = glfs_fdatasync (gio->glfd);
+ else
+ ret = glfs_fsync (gio->glfd);
+ break;
+ case GF_FOP_DISCARD:
+ ret = glfs_discard (gio->glfd, gio->offset, gio->count);
+ break;
+ case GF_FOP_ZEROFILL:
+ ret = glfs_zerofill(gio->glfd, gio->offset, gio->count);
+ break;
+ }
+
+ return (int) ret;
+}
+
+
+int
+glfs_preadv_async_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iovec *iovec,
+ int count, struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata)
+{
+ struct glfs_io *gio = NULL;
+ xlator_t *subvol = NULL;
+ struct glfs *fs = NULL;
+ struct glfs_fd *glfd = NULL;
+
+
+ gio = frame->local;
+ frame->local = NULL;
+ subvol = cookie;
+ glfd = gio->glfd;
+ fs = glfd->fs;
+
+ if (op_ret <= 0)
+ goto out;
+
+ op_ret = iov_copy (gio->iov, gio->count, iovec, count);
+
+ glfd->offset = gio->offset + op_ret;
+out:
+ errno = op_errno;
+ gio->fn (gio->glfd, op_ret, gio->data);
+
+ GF_FREE (gio->iov);
+ GF_FREE (gio);
+ STACK_DESTROY (frame->root);
+ glfs_subvol_done (fs, subvol);
+
+ return 0;
+}
+
+
+int
+glfs_preadv_async (struct glfs_fd *glfd, const struct iovec *iovec, int count,
+ off_t offset, int flags, glfs_io_cbk fn, void *data)
+{
+ struct glfs_io *gio = NULL;
+ int ret = 0;
+ call_frame_t *frame = NULL;
+ xlator_t *subvol = NULL;
+ glfs_t *fs = NULL;
+ fd_t *fd = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ fs = glfd->fs;
+
+ frame = syncop_create_frame (THIS);
+ if (!frame) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gio = GF_CALLOC (1, sizeof (*gio), glfs_mt_glfs_io_t);
+ if (!gio) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gio->iov = iov_dup (iovec, count);
+ if (!gio->iov) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gio->op = GF_FOP_READ;
+ gio->glfd = glfd;
+ gio->count = count;
+ gio->offset = offset;
+ gio->flags = flags;
+ gio->fn = fn;
+ gio->data = data;
+
+ frame->local = gio;
+
+ STACK_WIND_COOKIE (frame, glfs_preadv_async_cbk, subvol, subvol,
+ subvol->fops->readv, fd, iov_length (iovec, count),
+ offset, flags, NULL);
+
+out:
+ if (ret) {
+ GF_FREE (gio->iov);
+ GF_FREE (gio);
+ STACK_DESTROY (frame->root);
+ glfs_subvol_done (fs, subvol);
+ }
+
+ if (fd)
+ fd_unref (fd);
+
+ return ret;
+}
+
+
+int
+glfs_read_async (struct glfs_fd *glfd, void *buf, size_t count, int flags,
+ glfs_io_cbk fn, void *data)
+{
+ struct iovec iov = {0, };
+ ssize_t ret = 0;
+
+ iov.iov_base = buf;
+ iov.iov_len = count;
+
+ ret = glfs_preadv_async (glfd, &iov, 1, glfd->offset, flags, fn, data);
+
+ return ret;
+}
+
+
+int
+glfs_pread_async (struct glfs_fd *glfd, void *buf, size_t count, off_t offset,
+ int flags, glfs_io_cbk fn, void *data)
+{
+ struct iovec iov = {0, };
+ ssize_t ret = 0;
+
+ iov.iov_base = buf;
+ iov.iov_len = count;
+
+ ret = glfs_preadv_async (glfd, &iov, 1, offset, flags, fn, data);
+
+ return ret;
+}
+
+
+int
+glfs_readv_async (struct glfs_fd *glfd, const struct iovec *iov, int count,
+ int flags, glfs_io_cbk fn, void *data)
+{
+ ssize_t ret = 0;
+
+ ret = glfs_preadv_async (glfd, iov, count, glfd->offset, flags,
+ fn, data);
+ return ret;
+}
+
+///// writev /////
+
+ssize_t
+glfs_pwritev (struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt,
+ off_t offset, int flags)
+{
+ xlator_t *subvol = NULL;
+ int ret = -1;
+ size_t size = -1;
+ struct iobref *iobref = NULL;
+ struct iobuf *iobuf = NULL;
+ struct iovec iov = {0, };
+ fd_t *fd = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ size = iov_length (iovec, iovcnt);
+
+ iobuf = iobuf_get2 (subvol->ctx->iobuf_pool, size);
+ if (!iobuf) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ iobref = iobref_new ();
+ if (!iobref) {
+ iobuf_unref (iobuf);
+ errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ ret = iobref_add (iobref, iobuf);
+ if (ret) {
+ iobuf_unref (iobuf);
+ iobref_unref (iobref);
+ errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ iov_unload (iobuf_ptr (iobuf), iovec, iovcnt); /* FIXME!!! */
+
+ iov.iov_base = iobuf_ptr (iobuf);
+ iov.iov_len = size;
+
+ ret = syncop_writev (subvol, fd, &iov, 1, offset, iobref, flags);
+
+ iobuf_unref (iobuf);
+ iobref_unref (iobref);
+
+ if (ret <= 0)
+ goto out;
+
+ glfd->offset = (offset + size);
+
+out:
+ if (fd)
+ fd_unref (fd);
+
+ glfs_subvol_done (glfd->fs, subvol);
+
+ return ret;
+}
+
+
+ssize_t
+glfs_write (struct glfs_fd *glfd, const void *buf, size_t count, int flags)
+{
+ struct iovec iov = {0, };
+ ssize_t ret = 0;
+
+ iov.iov_base = (void *) buf;
+ iov.iov_len = count;
+
+ ret = glfs_pwritev (glfd, &iov, 1, glfd->offset, flags);
+
+ return ret;
+}
+
+
+
+ssize_t
+glfs_writev (struct glfs_fd *glfd, const struct iovec *iov, int count,
+ int flags)
+{
+ ssize_t ret = 0;
+
+ ret = glfs_pwritev (glfd, iov, count, glfd->offset, flags);
+
+ return ret;
+}
+
+
+ssize_t
+glfs_pwrite (struct glfs_fd *glfd, const void *buf, size_t count, off_t offset,
+ int flags)
+{
+ struct iovec iov = {0, };
+ ssize_t ret = 0;
+
+ iov.iov_base = (void *) buf;
+ iov.iov_len = count;
+
+ ret = glfs_pwritev (glfd, &iov, 1, offset, flags);
+
+ return ret;
+}
+
+
+int
+glfs_pwritev_async (struct glfs_fd *glfd, const struct iovec *iovec, int count,
+ off_t offset, int flags, glfs_io_cbk fn, void *data)
+{
+ struct glfs_io *gio = NULL;
+ int ret = 0;
+
+ gio = GF_CALLOC (1, sizeof (*gio), glfs_mt_glfs_io_t);
+ if (!gio) {
+ errno = ENOMEM;
+ return -1;
+ }
+
+ gio->iov = iov_dup (iovec, count);
+ if (!gio->iov) {
+ GF_FREE (gio);
+ errno = ENOMEM;
+ return -1;
+ }
+
+ gio->op = GF_FOP_WRITE;
+ gio->glfd = glfd;
+ gio->count = count;
+ gio->offset = offset;
+ gio->flags = flags;
+ gio->fn = fn;
+ gio->data = data;
+
+ ret = synctask_new (glfs_from_glfd (glfd)->ctx->env,
+ glfs_io_async_task, glfs_io_async_cbk,
+ NULL, gio);
+
+ if (ret) {
+ GF_FREE (gio->iov);
+ GF_FREE (gio);
+ }
+
+ return ret;
+}
+
+
+int
+glfs_write_async (struct glfs_fd *glfd, const void *buf, size_t count, int flags,
+ glfs_io_cbk fn, void *data)
+{
+ struct iovec iov = {0, };
+ ssize_t ret = 0;
+
+ iov.iov_base = (void *) buf;
+ iov.iov_len = count;
+
+ ret = glfs_pwritev_async (glfd, &iov, 1, glfd->offset, flags, fn, data);
+
+ return ret;
+}
+
+
+int
+glfs_pwrite_async (struct glfs_fd *glfd, const void *buf, int count,
+ off_t offset, int flags, glfs_io_cbk fn, void *data)
+{
+ struct iovec iov = {0, };
+ ssize_t ret = 0;
+
+ iov.iov_base = (void *) buf;
+ iov.iov_len = count;
+
+ ret = glfs_pwritev_async (glfd, &iov, 1, offset, flags, fn, data);
+
+ return ret;
+}
+
+
+int
+glfs_writev_async (struct glfs_fd *glfd, const struct iovec *iov, int count,
+ int flags, glfs_io_cbk fn, void *data)
+{
+ ssize_t ret = 0;
+
+ ret = glfs_pwritev_async (glfd, iov, count, glfd->offset, flags,
+ fn, data);
+ return ret;
+}
+
+
+int
+glfs_fsync (struct glfs_fd *glfd)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = syncop_fsync (subvol, fd, 0);
+out:
+ if (fd)
+ fd_unref (fd);
+
+ glfs_subvol_done (glfd->fs, subvol);
+
+ return ret;
+}
+
+
+static int
+glfs_fsync_async_common (struct glfs_fd *glfd, glfs_io_cbk fn, void *data,
+ int dataonly)
+{
+ struct glfs_io *gio = NULL;
+ int ret = 0;
+
+ gio = GF_CALLOC (1, sizeof (*gio), glfs_mt_glfs_io_t);
+ if (!gio) {
+ errno = ENOMEM;
+ return -1;
+ }
+
+ gio->op = GF_FOP_FSYNC;
+ gio->glfd = glfd;
+ gio->flags = dataonly;
+ gio->fn = fn;
+ gio->data = data;
+
+ ret = synctask_new (glfs_from_glfd (glfd)->ctx->env,
+ glfs_io_async_task, glfs_io_async_cbk,
+ NULL, gio);
+
+ if (ret) {
+ GF_FREE (gio->iov);
+ GF_FREE (gio);
+ }
+
+ return ret;
+
+}
+
+
+int
+glfs_fsync_async (struct glfs_fd *glfd, glfs_io_cbk fn, void *data)
+{
+ return glfs_fsync_async_common (glfd, fn, data, 0);
+}
+
+
+int
+glfs_fdatasync (struct glfs_fd *glfd)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = syncop_fsync (subvol, fd, 1);
+out:
+ if (fd)
+ fd_unref (fd);
+
+ glfs_subvol_done (glfd->fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_fdatasync_async (struct glfs_fd *glfd, glfs_io_cbk fn, void *data)
+{
+ return glfs_fsync_async_common (glfd, fn, data, 1);
+}
+
+
+int
+glfs_ftruncate (struct glfs_fd *glfd, off_t offset)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = syncop_ftruncate (subvol, fd, offset);
+out:
+ if (fd)
+ fd_unref (fd);
+
+ glfs_subvol_done (glfd->fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_ftruncate_async (struct glfs_fd *glfd, off_t offset,
+ glfs_io_cbk fn, void *data)
+{
+ struct glfs_io *gio = NULL;
+ int ret = 0;
+
+ gio = GF_CALLOC (1, sizeof (*gio), glfs_mt_glfs_io_t);
+ if (!gio) {
+ errno = ENOMEM;
+ return -1;
+ }
+
+ gio->op = GF_FOP_FTRUNCATE;
+ gio->glfd = glfd;
+ gio->offset = offset;
+ gio->fn = fn;
+ gio->data = data;
+
+ ret = synctask_new (glfs_from_glfd (glfd)->ctx->env,
+ glfs_io_async_task, glfs_io_async_cbk,
+ NULL, gio);
+
+ if (ret) {
+ GF_FREE (gio->iov);
+ GF_FREE (gio);
+ }
+
+ return ret;
+}
+
+
+int
+glfs_access (struct glfs *fs, const char *path, int mode)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_resolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ ret = syncop_access (subvol, &loc, mode);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+out:
+ loc_wipe (&loc);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_symlink (struct glfs *fs, const char *data, const char *path)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ uuid_t gfid;
+ dict_t *xattr_req = NULL;
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ xattr_req = dict_new ();
+ if (!xattr_req) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ uuid_generate (gfid);
+ ret = dict_set_static_bin (xattr_req, "gfid-req", gfid, 16);
+ if (ret) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+retry:
+ ret = glfs_lresolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (loc.inode) {
+ errno = EEXIST;
+ ret = -1;
+ goto out;
+ }
+
+ if (ret == -1 && errno != ENOENT)
+ /* Any other type of error is fatal */
+ goto out;
+
+ if (ret == -1 && errno == ENOENT && !loc.parent)
+ /* The parent directory or an ancestor even
+ higher does not exist
+ */
+ goto out;
+
+ /* ret == -1 && errno == ENOENT */
+ loc.inode = inode_new (loc.parent->table);
+ if (!loc.inode) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = syncop_symlink (subvol, &loc, data, xattr_req, &iatt);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret == 0)
+ ret = glfs_loc_link (&loc, &iatt);
+out:
+ loc_wipe (&loc);
+
+ if (xattr_req)
+ dict_unref (xattr_req);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_readlink (struct glfs *fs, const char *path, char *buf, size_t bufsiz)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ int reval = 0;
+ char *linkval = NULL;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_lresolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ if (iatt.ia_type != IA_IFLNK) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ ret = syncop_readlink (subvol, &loc, &linkval, bufsiz);
+ if (ret > 0) {
+ memcpy (buf, linkval, ret);
+ GF_FREE (linkval);
+ }
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+out:
+ loc_wipe (&loc);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_mknod (struct glfs *fs, const char *path, mode_t mode, dev_t dev)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ uuid_t gfid;
+ dict_t *xattr_req = NULL;
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ xattr_req = dict_new ();
+ if (!xattr_req) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ uuid_generate (gfid);
+ ret = dict_set_static_bin (xattr_req, "gfid-req", gfid, 16);
+ if (ret) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+retry:
+ ret = glfs_lresolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (loc.inode) {
+ errno = EEXIST;
+ ret = -1;
+ goto out;
+ }
+
+ if (ret == -1 && errno != ENOENT)
+ /* Any other type of error is fatal */
+ goto out;
+
+ if (ret == -1 && errno == ENOENT && !loc.parent)
+ /* The parent directory or an ancestor even
+ higher does not exist
+ */
+ goto out;
+
+ /* ret == -1 && errno == ENOENT */
+ loc.inode = inode_new (loc.parent->table);
+ if (!loc.inode) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = syncop_mknod (subvol, &loc, mode, dev, xattr_req, &iatt);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret == 0)
+ ret = glfs_loc_link (&loc, &iatt);
+out:
+ loc_wipe (&loc);
+
+ if (xattr_req)
+ dict_unref (xattr_req);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_mkdir (struct glfs *fs, const char *path, mode_t mode)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ uuid_t gfid;
+ dict_t *xattr_req = NULL;
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ xattr_req = dict_new ();
+ if (!xattr_req) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ uuid_generate (gfid);
+ ret = dict_set_static_bin (xattr_req, "gfid-req", gfid, 16);
+ if (ret) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+retry:
+ ret = glfs_lresolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (loc.inode) {
+ errno = EEXIST;
+ ret = -1;
+ goto out;
+ }
+
+ if (ret == -1 && errno != ENOENT)
+ /* Any other type of error is fatal */
+ goto out;
+
+ if (ret == -1 && errno == ENOENT && !loc.parent)
+ /* The parent directory or an ancestor even
+ higher does not exist
+ */
+ goto out;
+
+ /* ret == -1 && errno == ENOENT */
+ loc.inode = inode_new (loc.parent->table);
+ if (!loc.inode) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = syncop_mkdir (subvol, &loc, mode, xattr_req, &iatt);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret == 0)
+ ret = glfs_loc_link (&loc, &iatt);
+out:
+ loc_wipe (&loc);
+
+ if (xattr_req)
+ dict_unref (xattr_req);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_unlink (struct glfs *fs, const char *path)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_lresolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ if (iatt.ia_type == IA_IFDIR) {
+ ret = -1;
+ errno = EISDIR;
+ goto out;
+ }
+
+ ret = syncop_unlink (subvol, &loc);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret == 0)
+ ret = glfs_loc_unlink (&loc);
+out:
+ loc_wipe (&loc);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_rmdir (struct glfs *fs, const char *path)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_lresolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ if (iatt.ia_type != IA_IFDIR) {
+ ret = -1;
+ errno = ENOTDIR;
+ goto out;
+ }
+
+ ret = syncop_rmdir (subvol, &loc);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret == 0)
+ ret = glfs_loc_unlink (&loc);
+out:
+ loc_wipe (&loc);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_rename (struct glfs *fs, const char *oldpath, const char *newpath)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t oldloc = {0, };
+ loc_t newloc = {0, };
+ struct iatt oldiatt = {0, };
+ struct iatt newiatt = {0, };
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_lresolve (fs, subvol, oldpath, &oldloc, &oldiatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &oldloc, retry);
+
+ if (ret)
+ goto out;
+retrynew:
+ ret = glfs_lresolve (fs, subvol, newpath, &newloc, &newiatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &newloc, retrynew);
+
+ if (ret && errno != ENOENT && newloc.parent)
+ goto out;
+
+ if (newiatt.ia_type != IA_INVAL) {
+ if ((oldiatt.ia_type == IA_IFDIR) !=
+ (newiatt.ia_type == IA_IFDIR)) {
+ /* Either both old and new must be dirs,
+ * or both must be non-dirs. Else, fail.
+ */
+ ret = -1;
+ errno = EISDIR;
+ goto out;
+ }
+ }
+
+ /* TODO: check if new or old is a prefix of the other, and fail EINVAL */
+
+ ret = syncop_rename (subvol, &oldloc, &newloc);
+
+ if (ret == -1 && errno == ESTALE) {
+ if (reval < DEFAULT_REVAL_COUNT) {
+ reval++;
+ loc_wipe (&oldloc);
+ loc_wipe (&newloc);
+ goto retry;
+ }
+ }
+
+ if (ret == 0)
+ inode_rename (oldloc.parent->table, oldloc.parent, oldloc.name,
+ newloc.parent, newloc.name, oldloc.inode,
+ &oldiatt);
+out:
+ loc_wipe (&oldloc);
+ loc_wipe (&newloc);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_link (struct glfs *fs, const char *oldpath, const char *newpath)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t oldloc = {0, };
+ loc_t newloc = {0, };
+ struct iatt oldiatt = {0, };
+ struct iatt newiatt = {0, };
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_lresolve (fs, subvol, oldpath, &oldloc, &oldiatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &oldloc, retry);
+
+ if (ret)
+ goto out;
+retrynew:
+ ret = glfs_lresolve (fs, subvol, newpath, &newloc, &newiatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &newloc, retrynew);
+
+ if (ret == 0) {
+ ret = -1;
+ errno = EEXIST;
+ goto out;
+ }
+
+ if (oldiatt.ia_type == IA_IFDIR) {
+ ret = -1;
+ errno = EISDIR;
+ goto out;
+ }
+
+ /* Filling the inode of the hard link to be same as that of the
+ original file
+ */
+ if (newloc.inode) {
+ inode_unref (newloc.inode);
+ newloc.inode = NULL;
+ }
+ newloc.inode = inode_ref (oldloc.inode);
+
+ ret = syncop_link (subvol, &oldloc, &newloc);
+
+ if (ret == -1 && errno == ESTALE) {
+ loc_wipe (&oldloc);
+ loc_wipe (&newloc);
+ if (reval--)
+ goto retry;
+ }
+
+ if (ret == 0)
+ ret = glfs_loc_link (&newloc, &oldiatt);
+out:
+ loc_wipe (&oldloc);
+ loc_wipe (&newloc);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+struct glfs_fd *
+glfs_opendir (struct glfs *fs, const char *path)
+{
+ int ret = -1;
+ struct glfs_fd *glfd = NULL;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ glfd = glfs_fd_new (fs);
+ if (!glfd)
+ goto out;
+
+ INIT_LIST_HEAD (&glfd->entries);
+retry:
+ ret = glfs_resolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ if (!IA_ISDIR (iatt.ia_type)) {
+ ret = -1;
+ errno = ENOTDIR;
+ goto out;
+ }
+
+ if (glfd->fd) {
+ /* Retry. Safe to touch glfd->fd as we
+ still have not glfs_fd_bind() yet.
+ */
+ fd_unref (glfd->fd);
+ glfd->fd = NULL;
+ }
+
+ glfd->fd = fd_create (loc.inode, getpid());
+ if (!glfd->fd) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = syncop_opendir (subvol, &loc, glfd->fd);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+out:
+ loc_wipe (&loc);
+
+ if (ret && glfd) {
+ glfs_fd_destroy (glfd);
+ glfd = NULL;
+ } else {
+ fd_bind (glfd->fd);
+ glfs_fd_bind (glfd);
+ }
+
+ glfs_subvol_done (fs, subvol);
+
+ return glfd;
+}
+
+
+int
+glfs_closedir (struct glfs_fd *glfd)
+{
+ __glfs_entry_fd (glfd);
+
+ gf_dirent_free (list_entry (&glfd->entries, gf_dirent_t, list));
+
+ glfs_fd_destroy (glfd);
+
+ return 0;
+}
+
+
+long
+glfs_telldir (struct glfs_fd *fd)
+{
+ return fd->offset;
+}
+
+
+void
+glfs_seekdir (struct glfs_fd *fd, long offset)
+{
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmp = NULL;
+
+ if (fd->offset == offset)
+ return;
+
+ fd->offset = offset;
+ fd->next = NULL;
+
+ list_for_each_entry_safe (entry, tmp, &fd->entries, list) {
+ if (entry->d_off != offset)
+ continue;
+
+ if (&tmp->list != &fd->entries) {
+ /* found! */
+ fd->next = tmp;
+ return;
+ }
+ }
+ /* could not find entry at requested offset in the cache.
+ next readdir_r() will result in glfd_entry_refresh()
+ */
+}
+
+int
+glfs_discard_async (struct glfs_fd *glfd, off_t offset, size_t len,
+ glfs_io_cbk fn, void *data)
+{
+ struct glfs_io *gio = NULL;
+ int ret = 0;
+
+ gio = GF_CALLOC (1, sizeof (*gio), glfs_mt_glfs_io_t);
+ if (!gio) {
+ errno = ENOMEM;
+ return -1;
+ }
+
+ gio->op = GF_FOP_DISCARD;
+ gio->glfd = glfd;
+ gio->offset = offset;
+ gio->count = len;
+ gio->fn = fn;
+ gio->data = data;
+
+ ret = synctask_new (glfs_from_glfd (glfd)->ctx->env,
+ glfs_io_async_task, glfs_io_async_cbk,
+ NULL, gio);
+
+ if (ret) {
+ GF_FREE (gio->iov);
+ GF_FREE (gio);
+ }
+
+ return ret;
+}
+
+int
+glfs_zerofill_async (struct glfs_fd *glfd, off_t offset, size_t len,
+ glfs_io_cbk fn, void *data)
+{
+ struct glfs_io *gio = NULL;
+ int ret = 0;
+
+ gio = GF_CALLOC (1, sizeof (*gio), glfs_mt_glfs_io_t);
+ if (!gio) {
+ errno = ENOMEM;
+ return -1;
+ }
+
+ gio->op = GF_FOP_ZEROFILL;
+ gio->glfd = glfd;
+ gio->offset = offset;
+ gio->count = len;
+ gio->fn = fn;
+ gio->data = data;
+
+ ret = synctask_new (glfs_from_glfd (glfd)->ctx->env,
+ glfs_io_async_task, glfs_io_async_cbk,
+ NULL, gio);
+
+ if (ret) {
+ GF_FREE (gio->iov);
+ GF_FREE (gio);
+ }
+
+ return ret;
+}
+
+
+void
+gf_dirent_to_dirent (gf_dirent_t *gf_dirent, struct dirent *dirent)
+{
+ dirent->d_ino = gf_dirent->d_ino;
+
+#ifdef _DIRENT_HAVE_D_OFF
+ dirent->d_off = gf_dirent->d_off;
+#endif
+
+#ifdef _DIRENT_HAVE_D_TYPE
+ dirent->d_type = gf_dirent->d_type;
+#endif
+
+#ifdef _DIRENT_HAVE_D_NAMLEN
+ dirent->d_namlen = strlen (gf_dirent->d_name);
+#endif
+
+ strncpy (dirent->d_name, gf_dirent->d_name, GF_NAME_MAX + 1);
+}
+
+
+int
+glfd_entry_refresh (struct glfs_fd *glfd, int plus)
+{
+ xlator_t *subvol = NULL;
+ gf_dirent_t entries;
+ gf_dirent_t old;
+ int ret = -1;
+ fd_t *fd = NULL;
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ if (fd->inode->ia_type != IA_IFDIR) {
+ ret = -1;
+ errno = EBADF;
+ goto out;
+ }
+
+ INIT_LIST_HEAD (&entries.list);
+ INIT_LIST_HEAD (&old.list);
+
+ if (plus)
+ ret = syncop_readdirp (subvol, fd, 131072, glfd->offset,
+ NULL, &entries);
+ else
+ ret = syncop_readdir (subvol, fd, 131072, glfd->offset,
+ &entries);
+ if (ret >= 0) {
+ if (plus)
+ gf_link_inodes_from_dirent (THIS, fd->inode, &entries);
+
+ list_splice_init (&glfd->entries, &old.list);
+ list_splice_init (&entries.list, &glfd->entries);
+
+ /* spurious errno is dangerous for glfd_entry_next() */
+ errno = 0;
+ }
+
+ if (ret > 0)
+ glfd->next = list_entry (glfd->entries.next, gf_dirent_t, list);
+
+ gf_dirent_free (&old);
+out:
+ if (fd)
+ fd_unref (fd);
+
+ glfs_subvol_done (glfd->fs, subvol);
+
+ return ret;
+}
+
+
+gf_dirent_t *
+glfd_entry_next (struct glfs_fd *glfd, int plus)
+{
+ gf_dirent_t *entry = NULL;
+ int ret = -1;
+
+ if (!glfd->offset || !glfd->next) {
+ ret = glfd_entry_refresh (glfd, plus);
+ if (ret < 0)
+ return NULL;
+ }
+
+ entry = glfd->next;
+ if (!entry)
+ return NULL;
+
+ if (&entry->next->list == &glfd->entries)
+ glfd->next = NULL;
+ else
+ glfd->next = entry->next;
+
+ glfd->offset = entry->d_off;
+
+ return entry;
+}
+
+
+static struct dirent *
+glfs_readdirbuf_get (struct glfs_fd *glfd)
+{
+ struct dirent *buf = NULL;
+
+ LOCK (&glfd->fd->lock);
+ {
+ buf = glfd->readdirbuf;
+ if (buf) {
+ memset (buf, 0, READDIRBUF_SIZE);
+ goto unlock;
+ }
+
+ buf = GF_CALLOC (1, READDIRBUF_SIZE, glfs_mt_readdirbuf_t);
+ if (!buf) {
+ errno = ENOMEM;
+ goto unlock;
+ }
+
+ glfd->readdirbuf = buf;
+ }
+unlock:
+ UNLOCK (&glfd->fd->lock);
+
+ return buf;
+}
+
+
+int
+glfs_readdirplus_r (struct glfs_fd *glfd, struct stat *stat, struct dirent *ext,
+ struct dirent **res)
+{
+ int ret = 0;
+ gf_dirent_t *entry = NULL;
+ struct dirent *buf = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ errno = 0;
+
+ if (ext)
+ buf = ext;
+ else
+ buf = glfs_readdirbuf_get (glfd);
+
+ if (!buf) {
+ errno = ENOMEM;
+ return -1;
+ }
+
+ entry = glfd_entry_next (glfd, !!stat);
+ if (errno)
+ ret = -1;
+
+ if (res) {
+ if (entry)
+ *res = buf;
+ else
+ *res = NULL;
+ }
+
+ if (entry) {
+ gf_dirent_to_dirent (entry, buf);
+ if (stat)
+ glfs_iatt_to_stat (glfd->fs, &entry->d_stat, stat);
+ }
+
+ return ret;
+}
+
+
+int
+glfs_readdir_r (struct glfs_fd *glfd, struct dirent *buf, struct dirent **res)
+{
+ return glfs_readdirplus_r (glfd, 0, buf, res);
+}
+
+
+struct dirent *
+glfs_readdirplus (struct glfs_fd *glfd, struct stat *stat)
+{
+ struct dirent *res = NULL;
+ int ret = -1;
+
+ ret = glfs_readdirplus_r (glfd, stat, NULL, &res);
+ if (ret)
+ return NULL;
+
+ return res;
+}
+
+
+
+struct dirent *
+glfs_readdir (struct glfs_fd *glfd)
+{
+ return glfs_readdirplus (glfd, NULL);
+}
+
+
+int
+glfs_statvfs (struct glfs *fs, const char *path, struct statvfs *buf)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_resolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ ret = syncop_statfs (subvol, &loc, buf);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+out:
+ loc_wipe (&loc);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_setattr (struct glfs *fs, const char *path, struct iatt *iatt,
+ int valid, int follow)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt riatt = {0, };
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ if (follow)
+ ret = glfs_resolve (fs, subvol, path, &loc, &riatt, reval);
+ else
+ ret = glfs_lresolve (fs, subvol, path, &loc, &riatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ ret = syncop_setattr (subvol, &loc, iatt, valid, 0, 0);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+out:
+ loc_wipe (&loc);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_fsetattr (struct glfs_fd *glfd, struct iatt *iatt, int valid)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = syncop_fsetattr (subvol, fd, iatt, valid, 0, 0);
+out:
+ if (fd)
+ fd_unref (fd);
+
+ glfs_subvol_done (glfd->fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_chmod (struct glfs *fs, const char *path, mode_t mode)
+{
+ int ret = -1;
+ struct iatt iatt = {0, };
+ int valid = 0;
+
+ iatt.ia_prot = ia_prot_from_st_mode (mode);
+ valid = GF_SET_ATTR_MODE;
+
+ ret = glfs_setattr (fs, path, &iatt, valid, 1);
+
+ return ret;
+}
+
+
+int
+glfs_fchmod (struct glfs_fd *glfd, mode_t mode)
+{
+ int ret = -1;
+ struct iatt iatt = {0, };
+ int valid = 0;
+
+ iatt.ia_prot = ia_prot_from_st_mode (mode);
+ valid = GF_SET_ATTR_MODE;
+
+ ret = glfs_fsetattr (glfd, &iatt, valid);
+
+ return ret;
+}
+
+
+int
+glfs_chown (struct glfs *fs, const char *path, uid_t uid, gid_t gid)
+{
+ int ret = -1;
+ int valid = 0;
+ struct iatt iatt = {0, };
+
+ iatt.ia_uid = uid;
+ iatt.ia_gid = gid;
+ valid = GF_SET_ATTR_UID|GF_SET_ATTR_GID;
+
+ ret = glfs_setattr (fs, path, &iatt, valid, 1);
+
+ return ret;
+}
+
+
+int
+glfs_lchown (struct glfs *fs, const char *path, uid_t uid, gid_t gid)
+{
+ int ret = -1;
+ int valid = 0;
+ struct iatt iatt = {0, };
+
+ iatt.ia_uid = uid;
+ iatt.ia_gid = gid;
+ valid = GF_SET_ATTR_UID|GF_SET_ATTR_GID;
+
+ ret = glfs_setattr (fs, path, &iatt, valid, 0);
+
+ return ret;
+}
+
+
+int
+glfs_fchown (struct glfs_fd *glfd, uid_t uid, gid_t gid)
+{
+ int ret = -1;
+ int valid = 0;
+ struct iatt iatt = {0, };
+
+ iatt.ia_uid = uid;
+ iatt.ia_gid = gid;
+ valid = GF_SET_ATTR_UID|GF_SET_ATTR_GID;
+
+ ret = glfs_fsetattr (glfd, &iatt, valid);
+
+ return ret;
+}
+
+
+int
+glfs_utimens (struct glfs *fs, const char *path, struct timespec times[2])
+{
+ int ret = -1;
+ int valid = 0;
+ struct iatt iatt = {0, };
+
+ iatt.ia_atime = times[0].tv_sec;
+ iatt.ia_atime_nsec = times[0].tv_nsec;
+ iatt.ia_mtime = times[1].tv_sec;
+ iatt.ia_mtime_nsec = times[1].tv_nsec;
+
+ valid = GF_SET_ATTR_ATIME|GF_SET_ATTR_MTIME;
+
+ ret = glfs_setattr (fs, path, &iatt, valid, 1);
+
+ return ret;
+}
+
+
+int
+glfs_lutimens (struct glfs *fs, const char *path, struct timespec times[2])
+{
+ int ret = -1;
+ int valid = 0;
+ struct iatt iatt = {0, };
+
+ iatt.ia_atime = times[0].tv_sec;
+ iatt.ia_atime_nsec = times[0].tv_nsec;
+ iatt.ia_mtime = times[1].tv_sec;
+ iatt.ia_mtime_nsec = times[1].tv_nsec;
+
+ valid = GF_SET_ATTR_ATIME|GF_SET_ATTR_MTIME;
+
+ ret = glfs_setattr (fs, path, &iatt, valid, 0);
+
+ return ret;
+}
+
+
+int
+glfs_futimens (struct glfs_fd *glfd, struct timespec times[2])
+{
+ int ret = -1;
+ int valid = 0;
+ struct iatt iatt = {0, };
+
+ iatt.ia_atime = times[0].tv_sec;
+ iatt.ia_atime_nsec = times[0].tv_nsec;
+ iatt.ia_mtime = times[1].tv_sec;
+ iatt.ia_mtime_nsec = times[1].tv_nsec;
+
+ valid = GF_SET_ATTR_ATIME|GF_SET_ATTR_MTIME;
+
+ ret = glfs_fsetattr (glfd, &iatt, valid);
+
+ return ret;
+}
+
+
+int
+glfs_getxattr_process (void *value, size_t size, dict_t *xattr,
+ const char *name)
+{
+ data_t *data = NULL;
+ int ret = -1;
+
+ data = dict_get (xattr, (char *)name);
+ if (!data) {
+ errno = ENODATA;
+ ret = -1;
+ goto out;
+ }
+
+ ret = data->len;
+ if (!value || !size)
+ goto out;
+
+ if (size < ret) {
+ ret = -1;
+ errno = ERANGE;
+ goto out;
+ }
+
+ memcpy (value, data->data, ret);
+out:
+ if (xattr)
+ dict_unref (xattr);
+ return ret;
+}
+
+
+ssize_t
+glfs_getxattr_common (struct glfs *fs, const char *path, const char *name,
+ void *value, size_t size, int follow)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ dict_t *xattr = NULL;
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ if (follow)
+ ret = glfs_resolve (fs, subvol, path, &loc, &iatt, reval);
+ else
+ ret = glfs_lresolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ ret = syncop_getxattr (subvol, &loc, &xattr, name);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ ret = glfs_getxattr_process (value, size, xattr, name);
+out:
+ loc_wipe (&loc);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+ssize_t
+glfs_getxattr (struct glfs *fs, const char *path, const char *name,
+ void *value, size_t size)
+{
+ return glfs_getxattr_common (fs, path, name, value, size, 1);
+}
+
+
+ssize_t
+glfs_lgetxattr (struct glfs *fs, const char *path, const char *name,
+ void *value, size_t size)
+{
+ return glfs_getxattr_common (fs, path, name, value, size, 0);
+}
+
+
+ssize_t
+glfs_fgetxattr (struct glfs_fd *glfd, const char *name, void *value,
+ size_t size)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ dict_t *xattr = NULL;
+ fd_t *fd = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = syncop_fgetxattr (subvol, fd, &xattr, name);
+ if (ret)
+ goto out;
+
+ ret = glfs_getxattr_process (value, size, xattr, name);
+out:
+ if (fd)
+ fd_unref (fd);
+
+ glfs_subvol_done (glfd->fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_listxattr_process (void *value, size_t size, dict_t *xattr)
+{
+ int ret = -1;
+
+ ret = dict_keys_join (NULL, 0, xattr, NULL);
+
+ if (!value || !size)
+ goto out;
+
+ if (size < ret) {
+ ret = -1;
+ errno = ERANGE;
+ goto out;
+ }
+
+ dict_keys_join (value, size, xattr, NULL);
+out:
+ if (xattr)
+ dict_unref (xattr);
+ return ret;
+}
+
+
+ssize_t
+glfs_listxattr_common (struct glfs *fs, const char *path, void *value,
+ size_t size, int follow)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ dict_t *xattr = NULL;
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+retry:
+ if (follow)
+ ret = glfs_resolve (fs, subvol, path, &loc, &iatt, reval);
+ else
+ ret = glfs_lresolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ ret = syncop_getxattr (subvol, &loc, &xattr, NULL);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ ret = glfs_listxattr_process (value, size, xattr);
+out:
+ loc_wipe (&loc);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+ssize_t
+glfs_listxattr (struct glfs *fs, const char *path, void *value, size_t size)
+{
+ return glfs_listxattr_common (fs, path, value, size, 1);
+}
+
+
+ssize_t
+glfs_llistxattr (struct glfs *fs, const char *path, void *value, size_t size)
+{
+ return glfs_listxattr_common (fs, path, value, size, 0);
+}
+
+
+ssize_t
+glfs_flistxattr (struct glfs_fd *glfd, void *value, size_t size)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ dict_t *xattr = NULL;
+ fd_t *fd = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = syncop_fgetxattr (subvol, fd, &xattr, NULL);
+ if (ret)
+ goto out;
+
+ ret = glfs_listxattr_process (value, size, xattr);
+out:
+ if (fd)
+ fd_unref (fd);
+
+ glfs_subvol_done (glfd->fs, subvol);
+
+ return ret;
+}
+
+
+dict_t *
+dict_for_key_value (const char *name, const char *value, size_t size)
+{
+ dict_t *xattr = NULL;
+ int ret = 0;
+
+ xattr = dict_new ();
+ if (!xattr)
+ return NULL;
+
+ ret = dict_set_static_bin (xattr, (char *)name, (void *)value, size);
+ if (ret) {
+ dict_destroy (xattr);
+ xattr = NULL;
+ }
+
+ return xattr;
+}
+
+
+int
+glfs_setxattr_common (struct glfs *fs, const char *path, const char *name,
+ const void *value, size_t size, int flags, int follow)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ dict_t *xattr = NULL;
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ if (follow)
+ ret = glfs_resolve (fs, subvol, path, &loc, &iatt, reval);
+ else
+ ret = glfs_lresolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ xattr = dict_for_key_value (name, value, size);
+ if (!xattr) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = syncop_setxattr (subvol, &loc, xattr, flags);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+out:
+ loc_wipe (&loc);
+ if (xattr)
+ dict_unref (xattr);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_setxattr (struct glfs *fs, const char *path, const char *name,
+ const void *value, size_t size, int flags)
+{
+ return glfs_setxattr_common (fs, path, name, value, size, flags, 1);
+}
+
+
+int
+glfs_lsetxattr (struct glfs *fs, const char *path, const char *name,
+ const void *value, size_t size, int flags)
+{
+ return glfs_setxattr_common (fs, path, name, value, size, flags, 0);
+}
+
+
+int
+glfs_fsetxattr (struct glfs_fd *glfd, const char *name, const void *value,
+ size_t size, int flags)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ dict_t *xattr = NULL;
+ fd_t *fd = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ xattr = dict_for_key_value (name, value, size);
+ if (!xattr) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = syncop_fsetxattr (subvol, fd, xattr, flags);
+out:
+ if (xattr)
+ dict_unref (xattr);
+
+ if (fd)
+ fd_unref (fd);
+
+ glfs_subvol_done (glfd->fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_removexattr_common (struct glfs *fs, const char *path, const char *name,
+ int follow)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ if (follow)
+ ret = glfs_resolve (fs, subvol, path, &loc, &iatt, reval);
+ else
+ ret = glfs_lresolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ ret = syncop_removexattr (subvol, &loc, name);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+out:
+ loc_wipe (&loc);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_removexattr (struct glfs *fs, const char *path, const char *name)
+{
+ return glfs_removexattr_common (fs, path, name, 1);
+}
+
+
+int
+glfs_lremovexattr (struct glfs *fs, const char *path, const char *name)
+{
+ return glfs_removexattr_common (fs, path, name, 0);
+}
+
+
+int
+glfs_fremovexattr (struct glfs_fd *glfd, const char *name)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = syncop_fremovexattr (subvol, fd, name);
+out:
+ if (fd)
+ fd_unref (fd);
+
+ glfs_subvol_done (glfd->fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_fallocate (struct glfs_fd *glfd, int keep_size, off_t offset, size_t len)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = syncop_fallocate (subvol, fd, keep_size, offset, len);
+out:
+ if (fd)
+ fd_unref(fd);
+
+ glfs_subvol_done (glfd->fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_discard (struct glfs_fd *glfd, off_t offset, size_t len)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = syncop_discard (subvol, fd, offset, len);
+out:
+ if (fd)
+ fd_unref(fd);
+
+ glfs_subvol_done (glfd->fs, subvol);
+
+ return ret;
+}
+
+int
+glfs_zerofill (struct glfs_fd *glfd, off_t offset, size_t len)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = syncop_zerofill (subvol, fd, offset, len);
+out:
+ if (fd)
+ fd_unref(fd);
+
+ glfs_subvol_done (glfd->fs, subvol);
+
+ return ret;
+}
+
+int
+glfs_chdir (struct glfs *fs, const char *path)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_resolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ if (!IA_ISDIR (iatt.ia_type)) {
+ ret = -1;
+ errno = ENOTDIR;
+ goto out;
+ }
+
+ glfs_cwd_set (fs, loc.inode);
+
+out:
+ loc_wipe (&loc);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_fchdir (struct glfs_fd *glfd)
+{
+ int ret = -1;
+ inode_t *inode = NULL;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ inode = fd->inode;
+
+ if (!IA_ISDIR (inode->ia_type)) {
+ ret = -1;
+ errno = ENOTDIR;
+ goto out;
+ }
+
+ glfs_cwd_set (glfd->fs, inode);
+ ret = 0;
+out:
+ if (fd)
+ fd_unref (fd);
+
+ glfs_subvol_done (glfd->fs, subvol);
+
+ return ret;
+}
+
+
+char *
+glfs_realpath (struct glfs *fs, const char *path, char *resolved_path)
+{
+ int ret = -1;
+ char *retpath = NULL;
+ char *allocpath = NULL;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ if (resolved_path)
+ retpath = resolved_path;
+ else
+ retpath = allocpath = malloc (PATH_MAX + 1);
+
+ if (!retpath) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_resolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ if (loc.path) {
+ strncpy (retpath, loc.path, PATH_MAX);
+ retpath[PATH_MAX] = 0;
+ }
+
+out:
+ loc_wipe (&loc);
+
+ if (ret == -1) {
+ if (allocpath)
+ free (allocpath);
+ retpath = NULL;
+ }
+
+ glfs_subvol_done (fs, subvol);
+
+ return retpath;
+}
+
+
+char *
+glfs_getcwd (struct glfs *fs, char *buf, size_t n)
+{
+ int ret = -1;
+ inode_t *inode = NULL;
+ char *path = NULL;
+
+ __glfs_entry_fs (fs);
+
+ if (!buf || n < 2) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ inode = glfs_cwd_get (fs);
+
+ if (!inode) {
+ strncpy (buf, "/", n);
+ ret = 0;
+ goto out;
+ }
+
+ ret = inode_path (inode, 0, &path);
+ if (n <= ret) {
+ ret = -1;
+ errno = ERANGE;
+ goto out;
+ }
+
+ strncpy (buf, path, n);
+ ret = 0;
+out:
+ GF_FREE (path);
+
+ if (inode)
+ inode_unref (inode);
+
+ if (ret < 0)
+ return NULL;
+
+ return buf;
+}
+
+
+static void
+gf_flock_to_flock (struct gf_flock *gf_flock, struct flock *flock)
+{
+ flock->l_type = gf_flock->l_type;
+ flock->l_whence = gf_flock->l_whence;
+ flock->l_start = gf_flock->l_start;
+ flock->l_len = gf_flock->l_len;
+ flock->l_pid = gf_flock->l_pid;
+}
+
+
+static void
+gf_flock_from_flock (struct gf_flock *gf_flock, struct flock *flock)
+{
+ gf_flock->l_type = flock->l_type;
+ gf_flock->l_whence = flock->l_whence;
+ gf_flock->l_start = flock->l_start;
+ gf_flock->l_len = flock->l_len;
+ gf_flock->l_pid = flock->l_pid;
+}
+
+
+int
+glfs_posix_lock (struct glfs_fd *glfd, int cmd, struct flock *flock)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ struct gf_flock gf_flock = {0, };
+ struct gf_flock saved_flock = {0, };
+ fd_t *fd = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ gf_flock_from_flock (&gf_flock, flock);
+ gf_flock_from_flock (&saved_flock, flock);
+ ret = syncop_lk (subvol, fd, cmd, &gf_flock);
+ gf_flock_to_flock (&gf_flock, flock);
+
+ if (ret == 0 && (cmd == F_SETLK || cmd == F_SETLKW))
+ fd_lk_insert_and_merge (fd, cmd, &saved_flock);
+out:
+ if (fd)
+ fd_unref (fd);
+
+ glfs_subvol_done (glfd->fs, subvol);
+
+ return ret;
+}
+
+
+struct glfs_fd *
+glfs_dup (struct glfs_fd *glfd)
+{
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+ glfs_fd_t *dupfd = NULL;
+ struct glfs *fs = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ fs = glfd->fs;
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (fs, subvol, glfd);
+ if (!fd) {
+ errno = EBADFD;
+ goto out;
+ }
+
+ dupfd = glfs_fd_new (fs);
+ if (!dupfd) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ dupfd->fd = fd_ref (fd);
+out:
+ if (fd)
+ fd_unref (fd);
+ if (dupfd)
+ glfs_fd_bind (dupfd);
+
+ glfs_subvol_done (fs, subvol);
+
+ return dupfd;
+}
diff --git a/api/src/glfs-handleops.c b/api/src/glfs-handleops.c
new file mode 100644
index 000000000..9c707a619
--- /dev/null
+++ b/api/src/glfs-handleops.c
@@ -0,0 +1,1278 @@
+/*
+ * Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+
+#include "glfs-internal.h"
+#include "glfs-mem-types.h"
+#include "syncop.h"
+#include "glfs.h"
+#include "glfs-handles.h"
+
+static void
+glfs_iatt_from_stat (struct stat *stat, int valid, struct iatt *iatt,
+ int *glvalid)
+{
+ /* validate in args */
+ if ((stat == NULL) || (iatt == NULL) || (glvalid == NULL)) {
+ errno = EINVAL;
+ return;
+ }
+
+ *glvalid = 0;
+
+ if (valid & GFAPI_SET_ATTR_MODE) {
+ iatt->ia_prot = ia_prot_from_st_mode (stat->st_mode);
+ *glvalid |= GF_SET_ATTR_MODE;
+ }
+
+ if (valid & GFAPI_SET_ATTR_UID) {
+ iatt->ia_uid = stat->st_uid;
+ *glvalid |= GF_SET_ATTR_UID;
+ }
+
+ if (valid & GFAPI_SET_ATTR_GID) {
+ iatt->ia_gid = stat->st_gid;
+ *glvalid |= GF_SET_ATTR_GID;
+ }
+
+ if (valid & GFAPI_SET_ATTR_ATIME) {
+ iatt->ia_atime = stat->st_atime;
+ iatt->ia_atime_nsec = ST_ATIM_NSEC (stat);
+ *glvalid |= GF_SET_ATTR_ATIME;
+ }
+
+ if (valid & GFAPI_SET_ATTR_MTIME) {
+ iatt->ia_mtime = stat->st_mtime;
+ iatt->ia_mtime_nsec = ST_MTIM_NSEC (stat);
+ *glvalid |= GF_SET_ATTR_MTIME;
+ }
+
+ return;
+}
+
+struct glfs_object *
+glfs_h_lookupat (struct glfs *fs, struct glfs_object *parent,
+ const char *path, struct stat *stat)
+{
+ int ret = 0;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ struct iatt iatt = {0, };
+ struct glfs_object *object = NULL;
+ loc_t loc = {0, };
+
+ /* validate in args */
+ if ((fs == NULL) || (path == NULL)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ __glfs_entry_fs (fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ if (parent) {
+ inode = glfs_resolve_inode (fs, subvol, parent);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+ }
+
+ /* fop/op */
+ ret = glfs_resolve_at (fs, subvol, inode, path, &loc, &iatt,
+ 0 /*TODO: links? */, 0);
+
+ /* populate out args */
+ if (!ret) {
+ if (stat)
+ glfs_iatt_to_stat (fs, &iatt, stat);
+
+ ret = glfs_create_object (&loc, &object);
+ }
+
+out:
+ loc_wipe (&loc);
+
+ if (inode)
+ inode_unref (inode);
+
+ glfs_subvol_done (fs, subvol);
+
+ return object;
+}
+
+int
+glfs_h_stat (struct glfs *fs, struct glfs_object *object, struct stat *stat)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+
+ /* validate in args */
+ if ((fs == NULL) || (object == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ __glfs_entry_fs (fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode (fs, subvol, object);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ /* populate loc */
+ GLFS_LOC_FILL_INODE (inode, loc, out);
+
+ /* fop/op */
+ ret = syncop_stat (subvol, &loc, &iatt);
+
+ /* populate out args */
+ if (!ret && stat) {
+ glfs_iatt_to_stat (fs, &iatt, stat);
+ }
+out:
+ loc_wipe (&loc);
+
+ if (inode)
+ inode_unref (inode);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+int
+glfs_h_getattrs (struct glfs *fs, struct glfs_object *object, struct stat *stat)
+{
+ int ret = 0;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ struct iatt iatt = {0, };
+
+ /* validate in args */
+ if ((fs == NULL) || (object == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ __glfs_entry_fs (fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode (fs, subvol, object);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ /* fop/op */
+ ret = glfs_resolve_base (fs, subvol, inode, &iatt);
+
+ /* populate out args */
+ if (!ret && stat) {
+ glfs_iatt_to_stat (fs, &iatt, stat);
+ }
+
+out:
+ if (inode)
+ inode_unref (inode);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+int
+glfs_h_setattrs (struct glfs *fs, struct glfs_object *object, struct stat *stat,
+ int valid)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ int glvalid = 0;
+
+ /* validate in args */
+ if ((fs == NULL) || (object == NULL) || (stat == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ __glfs_entry_fs (fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode (fs, subvol, object);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ /* map valid masks from in args */
+ glfs_iatt_from_stat (stat, valid, &iatt, &glvalid);
+
+ /* populate loc */
+ GLFS_LOC_FILL_INODE (inode, loc, out);
+
+ /* fop/op */
+ ret = syncop_setattr (subvol, &loc, &iatt, glvalid, 0, 0);
+out:
+ loc_wipe (&loc);
+
+ if (inode)
+ inode_unref (inode);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+struct glfs_fd *
+glfs_h_open (struct glfs *fs, struct glfs_object *object, int flags)
+{
+ int ret = -1;
+ struct glfs_fd *glfd = NULL;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {0, };
+
+ /* validate in args */
+ if ((fs == NULL) || (object == NULL)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ __glfs_entry_fs (fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode (fs, subvol, object);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ /* check types to open */
+ if (IA_ISDIR (inode->ia_type)) {
+ ret = -1;
+ errno = EISDIR;
+ goto out;
+ }
+
+ if (!IA_ISREG (inode->ia_type)) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ glfd = glfs_fd_new (fs);
+ if (!glfd) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ glfd->fd = fd_create (inode, getpid());
+ if (!glfd->fd) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ /* populate loc */
+ GLFS_LOC_FILL_INODE (inode, loc, out);
+
+ /* fop/op */
+ ret = syncop_open (subvol, &loc, flags, glfd->fd);
+
+out:
+ loc_wipe (&loc);
+
+ if (inode)
+ inode_unref (inode);
+
+ if (ret && glfd) {
+ glfs_fd_destroy (glfd);
+ glfd = NULL;
+ } else {
+ glfd->fd->flags = flags;
+ fd_bind (glfd->fd);
+ glfs_fd_bind (glfd);
+ }
+
+ glfs_subvol_done (fs, subvol);
+
+ return glfd;
+}
+
+struct glfs_object *
+glfs_h_creat (struct glfs *fs, struct glfs_object *parent, const char *path,
+ int flags, mode_t mode, struct stat *stat)
+{
+ int ret = -1;
+ struct glfs_fd *glfd = NULL;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ uuid_t gfid;
+ dict_t *xattr_req = NULL;
+ struct glfs_object *object = NULL;
+
+ /* validate in args */
+ if ((fs == NULL) || (parent == NULL) || (path == NULL)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ __glfs_entry_fs (fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode (fs, subvol, parent);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ xattr_req = dict_new ();
+ if (!xattr_req) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ uuid_generate (gfid);
+ ret = dict_set_static_bin (xattr_req, "gfid-req", gfid, 16);
+ if (ret) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ GLFS_LOC_FILL_PINODE (inode, loc, ret, errno, out, path);
+
+ glfd = glfs_fd_new (fs);
+ if (!glfd)
+ goto out;
+
+ glfd->fd = fd_create (loc.inode, getpid());
+ if (!glfd->fd) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ /* fop/op */
+ ret = syncop_create (subvol, &loc, flags, mode, glfd->fd,
+ xattr_req, &iatt);
+
+ /* populate out args */
+ if (ret == 0) {
+ /* TODO: If the inode existed in the cache (say file already
+ exists), then the glfs_loc_link will not update the
+ loc.inode, as a result we will have a 0000 GFID that we
+ would copy out to the object, this needs to be fixed.
+ */
+ ret = glfs_loc_link (&loc, &iatt);
+ if (ret != 0) {
+ goto out;
+ }
+
+ if (stat)
+ glfs_iatt_to_stat (fs, &iatt, stat);
+
+ ret = glfs_create_object (&loc, &object);
+ }
+
+out:
+ if (ret && object != NULL) {
+ glfs_h_close (object);
+ object = NULL;
+ }
+
+ loc_wipe(&loc);
+
+ if (inode)
+ inode_unref (inode);
+
+ if (xattr_req)
+ dict_unref (xattr_req);
+
+ if (glfd) {
+ glfs_fd_destroy (glfd);
+ glfd = NULL;
+ }
+
+ glfs_subvol_done (fs, subvol);
+
+ return object;
+}
+
+struct glfs_object *
+glfs_h_mkdir (struct glfs *fs, struct glfs_object *parent, const char *path,
+ mode_t mode, struct stat *stat)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ uuid_t gfid;
+ dict_t *xattr_req = NULL;
+ struct glfs_object *object = NULL;
+
+ /* validate in args */
+ if ((fs == NULL) || (parent == NULL) || (path == NULL)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ __glfs_entry_fs (fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode (fs, subvol, parent);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ xattr_req = dict_new ();
+ if (!xattr_req) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ uuid_generate (gfid);
+ ret = dict_set_static_bin (xattr_req, "gfid-req", gfid, 16);
+ if (ret) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ GLFS_LOC_FILL_PINODE (inode, loc, ret, errno, out, path);
+
+ /* fop/op */
+ ret = syncop_mkdir (subvol, &loc, mode, xattr_req, &iatt);
+
+ /* populate out args */
+ if ( ret == 0 ) {
+ ret = glfs_loc_link (&loc, &iatt);
+ if (ret != 0) {
+ goto out;
+ }
+
+ if (stat)
+ glfs_iatt_to_stat (fs, &iatt, stat);
+
+ ret = glfs_create_object (&loc, &object);
+ }
+
+out:
+ if (ret && object != NULL) {
+ glfs_h_close (object);
+ object = NULL;
+ }
+
+ loc_wipe(&loc);
+
+ if (inode)
+ inode_unref (inode);
+
+ if (xattr_req)
+ dict_unref (xattr_req);
+
+ glfs_subvol_done (fs, subvol);
+
+ return object;
+}
+
+struct glfs_object *
+glfs_h_mknod (struct glfs *fs, struct glfs_object *parent, const char *path,
+ mode_t mode, dev_t dev, struct stat *stat)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ uuid_t gfid;
+ dict_t *xattr_req = NULL;
+ struct glfs_object *object = NULL;
+
+ /* validate in args */
+ if ((fs == NULL) || (parent == NULL) || (path == NULL)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ __glfs_entry_fs (fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode (fs, subvol, parent);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ xattr_req = dict_new ();
+ if (!xattr_req) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ uuid_generate (gfid);
+ ret = dict_set_static_bin (xattr_req, "gfid-req", gfid, 16);
+ if (ret) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ GLFS_LOC_FILL_PINODE (inode, loc, ret, errno, out, path);
+
+ /* fop/op */
+ ret = syncop_mknod (subvol, &loc, mode, dev, xattr_req, &iatt);
+
+ /* populate out args */
+ if (ret == 0) {
+ ret = glfs_loc_link (&loc, &iatt);
+ if (ret != 0) {
+ goto out;
+ }
+
+ if (stat)
+ glfs_iatt_to_stat (fs, &iatt, stat);
+
+ ret = glfs_create_object (&loc, &object);
+ }
+out:
+ if (ret && object != NULL) {
+ glfs_h_close (object);
+ object = NULL;
+ }
+
+ loc_wipe(&loc);
+
+ if (inode)
+ inode_unref (inode);
+
+ if (xattr_req)
+ dict_unref (xattr_req);
+
+ glfs_subvol_done (fs, subvol);
+
+ return object;
+}
+
+int
+glfs_h_unlink (struct glfs *fs, struct glfs_object *parent, const char *path)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {0, };
+
+ /* validate in args */
+ if ((fs == NULL) || (parent == NULL) || (path == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ __glfs_entry_fs (fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol (fs);
+ if ( !subvol ) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode (fs, subvol, parent);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ ret = glfs_resolve_at (fs, subvol, inode, path, &loc, NULL, 0 , 0);
+ if (ret != 0) {
+ goto out;
+ }
+
+ if (!IA_ISDIR(loc.inode->ia_type)) {
+ ret = syncop_unlink (subvol, &loc);
+ if (ret != 0) {
+ goto out;
+ }
+ } else {
+ ret = syncop_rmdir (subvol, &loc);
+ if (ret != 0) {
+ goto out;
+ }
+ }
+
+ if (ret == 0)
+ ret = glfs_loc_unlink (&loc);
+
+out:
+ loc_wipe (&loc);
+
+ if (inode)
+ inode_unref (inode);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+struct glfs_fd *
+glfs_h_opendir (struct glfs *fs, struct glfs_object *object)
+{
+ int ret = -1;
+ struct glfs_fd *glfd = NULL;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {0, };
+
+ /* validate in args */
+ if ((fs == NULL) || (object == NULL)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ __glfs_entry_fs (fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode (fs, subvol, object);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ if (!IA_ISDIR (inode->ia_type)) {
+ ret = -1;
+ errno = ENOTDIR;
+ goto out;
+ }
+
+ glfd = glfs_fd_new (fs);
+ if (!glfd)
+ goto out;
+
+ INIT_LIST_HEAD (&glfd->entries);
+
+ glfd->fd = fd_create (inode, getpid());
+ if (!glfd->fd) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ GLFS_LOC_FILL_INODE (inode, loc, out);
+
+ /* fop/op */
+ ret = syncop_opendir (subvol, &loc, glfd->fd);
+
+out:
+ loc_wipe (&loc);
+
+ if (inode)
+ inode_unref (inode);
+
+ if (ret && glfd) {
+ glfs_fd_destroy (glfd);
+ glfd = NULL;
+ } else {
+ fd_bind (glfd->fd);
+ glfs_fd_bind (glfd);
+ }
+
+ glfs_subvol_done (fs, subvol);
+
+ return glfd;
+}
+
+ssize_t
+glfs_h_extract_handle (struct glfs_object *object, unsigned char *handle,
+ int len)
+{
+ ssize_t ret = -1;
+
+ /* validate in args */
+ if (object == NULL) {
+ errno = EINVAL;
+ goto out;
+ }
+
+ if (!handle || !len) {
+ ret = GFAPI_HANDLE_LENGTH;
+ goto out;
+ }
+
+ if (len < GFAPI_HANDLE_LENGTH)
+ {
+ errno = ERANGE;
+ goto out;
+ }
+
+ memcpy (handle, object->gfid, GFAPI_HANDLE_LENGTH);
+
+ ret = GFAPI_HANDLE_LENGTH;
+
+out:
+ return ret;
+}
+
+struct glfs_object *
+glfs_h_create_from_handle (struct glfs *fs, unsigned char *handle, int len,
+ struct stat *stat)
+{
+ loc_t loc = {0, };
+ int ret = -1;
+ struct iatt iatt = {0, };
+ inode_t *newinode = NULL;
+ xlator_t *subvol = NULL;
+ struct glfs_object *object = NULL;
+
+ /* validate in args */
+ if ((fs == NULL) || (handle == NULL) || (len != GFAPI_HANDLE_LENGTH)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ __glfs_entry_fs (fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ errno = EIO;
+ goto out;
+ }
+
+ memcpy (loc.gfid, handle, GFAPI_HANDLE_LENGTH);
+
+ newinode = inode_find (subvol->itable, loc.gfid);
+ if (newinode)
+ loc.inode = newinode;
+ else {
+ loc.inode = inode_new (subvol->itable);
+ if (!loc.inode) {
+ errno = ENOMEM;
+ goto out;
+ }
+ }
+
+ ret = syncop_lookup (subvol, &loc, 0, &iatt, 0, 0);
+ if (ret) {
+ gf_log (subvol->name, GF_LOG_WARNING,
+ "inode refresh of %s failed: %s",
+ uuid_utoa (loc.gfid), strerror (errno));
+ goto out;
+ }
+
+ newinode = inode_link (loc.inode, 0, 0, &iatt);
+ if (newinode)
+ inode_lookup (newinode);
+ else {
+ gf_log (subvol->name, GF_LOG_WARNING,
+ "inode linking of %s failed: %s",
+ uuid_utoa (loc.gfid), strerror (errno));
+ errno = EINVAL;
+ goto out;
+ }
+
+ /* populate stat */
+ if (stat)
+ glfs_iatt_to_stat (fs, &iatt, stat);
+
+ object = GF_CALLOC (1, sizeof(struct glfs_object),
+ glfs_mt_glfs_object_t);
+ if (object == NULL) {
+ errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ /* populate the return object */
+ object->inode = newinode;
+ uuid_copy (object->gfid, object->inode->gfid);
+
+out:
+ /* TODO: Check where the inode ref is being held? */
+ loc_wipe (&loc);
+
+ glfs_subvol_done (fs, subvol);
+
+ return object;
+}
+
+int
+glfs_h_close (struct glfs_object *object)
+{
+ /* Release the held reference */
+ inode_unref (object->inode);
+ GF_FREE (object);
+
+ return 0;
+}
+
+int
+glfs_h_truncate (struct glfs *fs, struct glfs_object *object, off_t offset)
+{
+ loc_t loc = {0, };
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+
+ /* validate in args */
+ if ((fs == NULL) || (object == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ __glfs_entry_fs (fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode (fs, subvol, object);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ GLFS_LOC_FILL_INODE (inode, loc, out);
+
+ /* fop/op */
+ ret = syncop_truncate (subvol, &loc, (off_t)offset);
+
+ /* populate out args */
+ if (ret == 0)
+ ret = glfs_loc_unlink (&loc);
+
+out:
+ loc_wipe (&loc);
+
+ if (inode)
+ inode_unref (inode);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+struct glfs_object *
+glfs_h_symlink (struct glfs *fs, struct glfs_object *parent, const char *name,
+ const char *data, struct stat *stat)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ uuid_t gfid;
+ dict_t *xattr_req = NULL;
+ struct glfs_object *object = NULL;
+
+ /* validate in args */
+ if ((fs == NULL) || (parent == NULL) || (name == NULL) ||
+ (data == NULL)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ __glfs_entry_fs (fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode (fs, subvol, parent);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ xattr_req = dict_new ();
+ if (!xattr_req) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ uuid_generate (gfid);
+ ret = dict_set_static_bin (xattr_req, "gfid-req", gfid, 16);
+ if (ret) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ GLFS_LOC_FILL_PINODE (inode, loc, ret, errno, out, name);
+
+ /* fop/op */
+ ret = syncop_symlink (subvol, &loc, data, xattr_req, &iatt);
+
+ /* populate out args */
+ if (ret == 0) {
+ /* TODO: If the inode existed in the cache (say file already
+ * exists), then the glfs_loc_link will not update the
+ * loc.inode, as a result we will have a 0000 GFID that we
+ * would copy out to the object, this needs to be fixed.
+ */
+ ret = glfs_loc_link (&loc, &iatt);
+ if (ret != 0) {
+ goto out;
+ }
+
+ if (stat)
+ glfs_iatt_to_stat (fs, &iatt, stat);
+
+ ret = glfs_create_object (&loc, &object);
+ }
+
+out:
+ if (ret && object != NULL) {
+ glfs_h_close (object);
+ object = NULL;
+ }
+
+ loc_wipe(&loc);
+
+ if (inode)
+ inode_unref (inode);
+
+ if (xattr_req)
+ dict_unref (xattr_req);
+
+ glfs_subvol_done (fs, subvol);
+
+ return object;
+}
+
+int
+glfs_h_readlink (struct glfs *fs, struct glfs_object *object, char *buf,
+ size_t bufsiz)
+{
+ loc_t loc = {0, };
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ char *linkval = NULL;
+
+ /* validate in args */
+ if ((fs == NULL) || (object == NULL) || (buf == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ __glfs_entry_fs (fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode (fs, subvol, object);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ GLFS_LOC_FILL_INODE (inode, loc, out);
+
+ /* fop/op */
+ ret = syncop_readlink (subvol, &loc, &linkval, bufsiz);
+
+ /* populate out args */
+ if (ret > 0)
+ memcpy (buf, linkval, ret);
+
+out:
+ loc_wipe (&loc);
+
+ if (inode)
+ inode_unref (inode);
+
+ if (linkval)
+ GF_FREE (linkval);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+int
+glfs_h_link (struct glfs *fs, struct glfs_object *linksrc,
+ struct glfs_object *parent, const char *name)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ inode_t *pinode = NULL;
+ loc_t oldloc = {0, };
+ loc_t newloc = {0, };
+
+ /* validate in args */
+ if ((fs == NULL) || (linksrc == NULL) || (parent == NULL) ||
+ (name == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ __glfs_entry_fs (fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode (fs, subvol, linksrc);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ if (inode->ia_type == IA_IFDIR) {
+ ret = -1;
+ errno = EISDIR;
+ goto out;
+ }
+
+ GLFS_LOC_FILL_INODE (inode, oldloc, out);
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ pinode = glfs_resolve_inode (fs, subvol, parent);
+ if (!pinode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ /* setup newloc based on parent */
+ newloc.parent = inode_ref (pinode);
+ newloc.name = name;
+ ret = glfs_loc_touchup (&newloc);
+ if (ret != 0) {
+ errno = EINVAL;
+ goto out;
+ }
+
+ /* Filling the inode of the hard link to be same as that of the
+ * original file
+ */
+ newloc.inode = inode_ref (inode);
+
+ /* fop/op */
+ ret = syncop_link (subvol, &oldloc, &newloc);
+
+ if (ret == 0)
+ /* TODO: No iatt to pass as there has been no lookup */
+ ret = glfs_loc_link (&newloc, NULL);
+out:
+ loc_wipe (&oldloc);
+ loc_wipe (&newloc);
+
+ if (inode)
+ inode_unref (inode);
+
+ if (pinode)
+ inode_unref (pinode);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+int
+glfs_h_rename (struct glfs *fs, struct glfs_object *olddir, const char *oldname,
+ struct glfs_object *newdir, const char *newname)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *oldpinode = NULL;
+ inode_t *newpinode = NULL;
+ loc_t oldloc = {0, };
+ loc_t newloc = {0, };
+ struct iatt oldiatt = {0, };
+ struct iatt newiatt = {0, };
+
+ /* validate in args */
+ if ((fs == NULL) || (olddir == NULL) || (oldname == NULL) ||
+ (newdir == NULL) || (newname == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ __glfs_entry_fs (fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol (fs);
+ if ( !subvol ) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ oldpinode = glfs_resolve_inode (fs, subvol, olddir);
+ if (!oldpinode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ ret = glfs_resolve_at (fs, subvol, oldpinode, oldname, &oldloc,
+ &oldiatt, 0 , 0);
+ if (ret != 0) {
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ newpinode = glfs_resolve_inode (fs, subvol, newdir);
+ if (!newpinode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ ret = glfs_resolve_at (fs, subvol, newpinode, newname, &newloc,
+ &newiatt, 0, 0);
+
+ if (ret && errno != ENOENT && newloc.parent)
+ goto out;
+
+ if (newiatt.ia_type != IA_INVAL) {
+ if ((oldiatt.ia_type == IA_IFDIR) !=
+ (newiatt.ia_type == IA_IFDIR)) {
+ /* Either both old and new must be dirs,
+ * or both must be non-dirs. Else, fail.
+ */
+ ret = -1;
+ errno = EISDIR;
+ goto out;
+ }
+ }
+
+ /* TODO: check if new or old is a prefix of the other, and fail EINVAL */
+
+ ret = syncop_rename (subvol, &oldloc, &newloc);
+
+ if (ret == 0)
+ inode_rename (oldloc.parent->table, oldloc.parent, oldloc.name,
+ newloc.parent, newloc.name, oldloc.inode,
+ &oldiatt);
+
+out:
+ loc_wipe (&oldloc);
+ loc_wipe (&newloc);
+
+ if (oldpinode)
+ inode_unref (oldpinode);
+
+ if (newpinode)
+ inode_unref (newpinode);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
diff --git a/api/src/glfs-handles.h b/api/src/glfs-handles.h
new file mode 100644
index 000000000..437f2cbc8
--- /dev/null
+++ b/api/src/glfs-handles.h
@@ -0,0 +1,143 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLFS_HANDLES_H
+#define _GLFS_HANDLES_H
+
+#include "glfs.h"
+
+/* GLFS OBJECT BASED OPERATIONS
+ *
+ * The following APIs are introduced to provide an API framework that can work
+ * with gluster objects (files and directories), instead of absolute paths.
+ *
+ * The following API set can be related to the POSIX *at interfaces (like
+ * openat (2)). The intention of these APIs is to be able to operate based
+ * on parent object and looking up or creating child objects within, OR to be
+ * used on the actual object thus looked up or created, and retrieve information
+ * regarding the same.
+ *
+ * The APIs also provide for generating an opaque invariant handle to the
+ * object, that can later be used to lookup the object, instead of the regular
+ * glfs_h_* variants. The APIs that provide this behaviour are,
+ * glfs_h_extract_handle and glfs_h_create_from_handle.
+ *
+ * The object handles can be transitioned to fd based operations as supported
+ * by glfs.h calls, using the glfs_h_open call. This provides a way to move
+ * from objects to fd's akin to moving from path to fd for required operations.
+ *
+ * NOTE: The opaque invariant handle is the GFID of the object in reality, but
+ * maintained as an opaque data value, for potential internal changes to the
+ * same without impacting the caller.
+ *
+ * NOTE: Currently looking up an object can create multiple object handles to
+ * the same, i.e distinct glfs_object *. Hence each such looked up or received
+ * handle from other calls, would need to be closed. In the future, for a given
+ * object these pointers would be the same, and an ease of use API to forget all
+ * instances of this bject would be provided (instead of a per lookup close).
+ * This should not change the APIs in their current form.
+ *
+ */
+
+/* Values for valid falgs to be used when using XXXsetattr, to set multiple
+ attribute values passed via the related stat structure.
+ */
+#define GFAPI_SET_ATTR_MODE 0x1
+#define GFAPI_SET_ATTR_UID 0x2
+#define GFAPI_SET_ATTR_GID 0x4
+#define GFAPI_SET_ATTR_SIZE 0x8
+#define GFAPI_SET_ATTR_ATIME 0x10
+#define GFAPI_SET_ATTR_MTIME 0x20
+
+/* Handle length for object handles returned from glfs_h_extract_handle or
+ * glfs_h_create_from_handle */
+#define GFAPI_HANDLE_LENGTH 16
+
+__BEGIN_DECLS
+
+/*
+ * Notes:
+ *
+ * The file object handle. One per looked up, created file/directory
+ *
+ * This had been introduced to facilitate gfid/inode based gfapi
+ * - a requirement introduced by nfs-ganesha
+ */
+struct glfs_object;
+typedef struct glfs_object glfs_object_t;
+
+/* Handle based operations */
+/* Operations that generate handles */
+struct glfs_object *glfs_h_lookupat (struct glfs *fs,
+ struct glfs_object *parent,
+ const char *path, struct stat *stat);
+
+struct glfs_object *glfs_h_creat (struct glfs *fs, struct glfs_object *parent,
+ const char *path, int flags, mode_t mode,
+ struct stat *sb);
+
+struct glfs_object *glfs_h_mkdir (struct glfs *fs, struct glfs_object *parent,
+ const char *path, mode_t flags,
+ struct stat *sb);
+
+struct glfs_object *glfs_h_mknod (struct glfs *fs, struct glfs_object *parent,
+ const char *path, mode_t mode, dev_t dev,
+ struct stat *sb);
+
+struct glfs_object *glfs_h_symlink (struct glfs *fs, struct glfs_object *parent,
+ const char *name, const char *data,
+ struct stat *stat);
+
+/* Operations on the actual objects */
+int glfs_h_unlink (struct glfs *fs, struct glfs_object *parent,
+ const char *path);
+
+int glfs_h_close (struct glfs_object *object);
+
+int glfs_caller_specific_init (void *uid_caller_key, void *gid_caller_key,
+ void *future);
+
+int glfs_h_truncate (struct glfs *fs, struct glfs_object *object, off_t offset);
+
+int glfs_h_stat(struct glfs *fs, struct glfs_object *object, struct stat *stat);
+
+int glfs_h_getattrs (struct glfs *fs, struct glfs_object *object,
+ struct stat *stat);
+
+int glfs_h_setattrs (struct glfs *fs, struct glfs_object *object,
+ struct stat *sb, int valid);
+
+int glfs_h_readlink (struct glfs *fs, struct glfs_object *object, char *buf,
+ size_t bufsiz);
+
+int glfs_h_link (struct glfs *fs, struct glfs_object *linktgt,
+ struct glfs_object *parent, const char *name);
+
+int glfs_h_rename (struct glfs *fs, struct glfs_object *olddir,
+ const char *oldname, struct glfs_object *newdir,
+ const char *newname);
+
+/* Operations enabling opaque invariant handle to object transitions */
+ssize_t glfs_h_extract_handle (struct glfs_object *object,
+ unsigned char *handle, int len);
+
+struct glfs_object *glfs_h_create_from_handle (struct glfs *fs,
+ unsigned char *handle, int len,
+ struct stat *stat);
+
+/* Operations enabling object handles to fd transitions */
+struct glfs_fd *glfs_h_opendir (struct glfs *fs, struct glfs_object *object);
+
+struct glfs_fd *glfs_h_open (struct glfs *fs, struct glfs_object *object,
+ int flags);
+
+__END_DECLS
+
+#endif /* !_GLFS_HANDLES_H */ \ No newline at end of file
diff --git a/api/src/glfs-internal.h b/api/src/glfs-internal.h
new file mode 100644
index 000000000..ec1d5579d
--- /dev/null
+++ b/api/src/glfs-internal.h
@@ -0,0 +1,200 @@
+/*
+ Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef _GLFS_INTERNAL_H
+#define _GLFS_INTERNAL_H
+
+#include "xlator.h"
+
+#define GLFS_SYMLINK_MAX_FOLLOW 2048
+
+#define DEFAULT_REVAL_COUNT 1
+
+#define ESTALE_RETRY(ret,errno,reval,loc,label) do { \
+ if (ret == -1 && errno == ESTALE) { \
+ if (reval < DEFAULT_REVAL_COUNT) { \
+ reval++; \
+ loc_wipe (loc); \
+ goto label; \
+ } \
+ } \
+ } while (0)
+
+#define GLFS_LOC_FILL_INODE(oinode, loc, label) do { \
+ loc.inode = inode_ref (oinode); \
+ uuid_copy (loc.gfid, oinode->gfid); \
+ ret = glfs_loc_touchup (&loc); \
+ if (ret != 0) { \
+ errno = EINVAL; \
+ goto label; \
+ } \
+ } while (0)
+
+#define GLFS_LOC_FILL_PINODE(pinode, loc, ret, errno, label, path) do { \
+ loc.inode = inode_new (pinode->table); \
+ if (!loc.inode) { \
+ ret = -1; \
+ errno = ENOMEM; \
+ goto label; \
+ } \
+ loc.parent = inode_ref (pinode); \
+ loc.name = path; \
+ ret = glfs_loc_touchup (&loc); \
+ if (ret != 0) { \
+ errno = EINVAL; \
+ goto label; \
+ } \
+ } while (0)
+
+struct glfs;
+
+typedef int (*glfs_init_cbk) (struct glfs *fs, int ret);
+
+struct glfs {
+ char *volname;
+
+ glusterfs_ctx_t *ctx;
+
+ pthread_t poller;
+
+ glfs_init_cbk init_cbk;
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+ int init;
+ int ret;
+ int err;
+
+ xlator_t *active_subvol;
+ xlator_t *next_subvol;
+ xlator_t *old_subvol;
+
+ char *oldvolfile;
+ ssize_t oldvollen;
+
+ inode_t *cwd;
+
+ uint32_t dev_id; /* Used to fill st_dev in struct stat */
+
+ struct list_head openfds;
+
+ gf_boolean_t migration_in_progress;
+};
+
+struct glfs_fd {
+ struct list_head openfds;
+ struct glfs *fs;
+ off_t offset;
+ fd_t *fd; /* Currently guared by @fs->mutex. TODO: per-glfd lock */
+ struct list_head entries;
+ gf_dirent_t *next;
+ struct dirent *readdirbuf;
+};
+
+/* glfs object handle introduced for the alternate gfapi implementation based
+ on glfs handles/gfid/inode
+*/
+struct glfs_object {
+ inode_t *inode;
+ uuid_t gfid;
+};
+
+#define DEFAULT_EVENT_POOL_SIZE 16384
+#define GF_MEMPOOL_COUNT_OF_DICT_T 4096
+#define GF_MEMPOOL_COUNT_OF_DATA_T (GF_MEMPOOL_COUNT_OF_DICT_T * 4)
+#define GF_MEMPOOL_COUNT_OF_DATA_PAIR_T (GF_MEMPOOL_COUNT_OF_DICT_T * 4)
+
+int glfs_mgmt_init (struct glfs *fs);
+void glfs_init_done (struct glfs *fs, int ret);
+int glfs_process_volfp (struct glfs *fs, FILE *fp);
+int glfs_resolve (struct glfs *fs, xlator_t *subvol, const char *path, loc_t *loc,
+ struct iatt *iatt, int reval);
+int glfs_lresolve (struct glfs *fs, xlator_t *subvol, const char *path, loc_t *loc,
+ struct iatt *iatt, int reval);
+fd_t *glfs_resolve_fd (struct glfs *fs, xlator_t *subvol, struct glfs_fd *glfd);
+
+fd_t *__glfs_migrate_fd (struct glfs *fs, xlator_t *subvol, struct glfs_fd *glfd);
+
+int glfs_first_lookup (xlator_t *subvol);
+
+static inline void
+__glfs_entry_fs (struct glfs *fs)
+{
+ THIS = fs->ctx->master;
+}
+
+
+static inline void
+__glfs_entry_fd (struct glfs_fd *fd)
+{
+ THIS = fd->fd->inode->table->xl->ctx->master;
+}
+
+
+/*
+ By default all lock attempts from user context must
+ use glfs_lock() and glfs_unlock(). This allows
+ for a safe implementation of graph migration where
+ we can give up the mutex during syncop calls so
+ that bottom up calls (particularly CHILD_UP notify)
+ can do a mutex_lock() on @glfs without deadlocking
+ the filesystem
+*/
+static inline int
+glfs_lock (struct glfs *fs)
+{
+ pthread_mutex_lock (&fs->mutex);
+
+ while (!fs->init)
+ pthread_cond_wait (&fs->cond, &fs->mutex);
+
+ while (fs->migration_in_progress)
+ pthread_cond_wait (&fs->cond, &fs->mutex);
+
+ return 0;
+}
+
+
+static inline void
+glfs_unlock (struct glfs *fs)
+{
+ pthread_mutex_unlock (&fs->mutex);
+}
+
+
+void glfs_fd_destroy (struct glfs_fd *glfd);
+
+struct glfs_fd *glfs_fd_new (struct glfs *fs);
+void glfs_fd_bind (struct glfs_fd *glfd);
+
+xlator_t * glfs_active_subvol (struct glfs *fs);
+xlator_t * __glfs_active_subvol (struct glfs *fs);
+void glfs_subvol_done (struct glfs *fs, xlator_t *subvol);
+
+inode_t * glfs_refresh_inode (xlator_t *subvol, inode_t *inode);
+
+inode_t *glfs_cwd_get (struct glfs *fs);
+int glfs_cwd_set (struct glfs *fs, inode_t *inode);
+inode_t *glfs_resolve_inode (struct glfs *fs, xlator_t *subvol,
+ struct glfs_object *object);
+int glfs_create_object (loc_t *loc, struct glfs_object **retobject);
+int __glfs_cwd_set (struct glfs *fs, inode_t *inode);
+
+int glfs_resolve_base (struct glfs *fs, xlator_t *subvol, inode_t *inode,
+ struct iatt *iatt);
+int glfs_resolve_at (struct glfs *fs, xlator_t *subvol, inode_t *at,
+ const char *origpath, loc_t *loc, struct iatt *iatt,
+ int follow, int reval);
+int glfs_loc_touchup (loc_t *loc);
+void glfs_iatt_to_stat (struct glfs *fs, struct iatt *iatt, struct stat *stat);
+int glfs_loc_link (loc_t *loc, struct iatt *iatt);
+int glfs_loc_unlink (loc_t *loc);
+
+#endif /* !_GLFS_INTERNAL_H */
diff --git a/api/src/glfs-master.c b/api/src/glfs-master.c
new file mode 100644
index 000000000..c02534c18
--- /dev/null
+++ b/api/src/glfs-master.c
@@ -0,0 +1,154 @@
+/*
+ Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <unistd.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <inttypes.h>
+#include <limits.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include "glusterfs.h"
+
+#include "glfs-internal.h"
+#include "glfs-mem-types.h"
+
+
+int
+glfs_graph_setup (struct glfs *fs, glusterfs_graph_t *graph)
+{
+ xlator_t *new_subvol = NULL;
+ xlator_t *old_subvol = NULL;
+ inode_table_t *itable = NULL;
+ int ret = -1;
+
+ new_subvol = graph->top;
+
+ /* This is called in a bottom-up context, it should specifically
+ NOT be glfs_lock()
+ */
+ pthread_mutex_lock (&fs->mutex);
+ {
+ if (new_subvol->switched ||
+ new_subvol == fs->active_subvol ||
+ new_subvol == fs->next_subvol) {
+ /* Spurious CHILD_UP event on old graph */
+ ret = 0;
+ goto unlock;
+ }
+
+ if (!new_subvol->itable) {
+ itable = inode_table_new (131072, new_subvol);
+ if (!itable) {
+ errno = ENOMEM;
+ ret = -1;
+ goto unlock;
+ }
+
+ new_subvol->itable = itable;
+ }
+
+ old_subvol = fs->next_subvol;
+ fs->next_subvol = new_subvol;
+ fs->next_subvol->winds++; /* first ref */
+ ret = 0;
+ }
+unlock:
+ pthread_mutex_unlock (&fs->mutex);
+
+ if (old_subvol)
+ /* wasn't picked up so far, skip */
+ glfs_subvol_done (fs, old_subvol);
+
+ return ret;
+}
+
+
+int
+notify (xlator_t *this, int event, void *data, ...)
+{
+ glusterfs_graph_t *graph = NULL;
+ struct glfs *fs = NULL;
+
+ graph = data;
+ fs = this->private;
+
+ switch (event) {
+ case GF_EVENT_GRAPH_NEW:
+ gf_log (this->name, GF_LOG_INFO, "New graph %s (%d) coming up",
+ uuid_utoa ((unsigned char *)graph->graph_uuid),
+ graph->id);
+ break;
+ case GF_EVENT_CHILD_UP:
+ glfs_graph_setup (fs, graph);
+ glfs_init_done (fs, 0);
+ break;
+ case GF_EVENT_CHILD_DOWN:
+ glfs_graph_setup (fs, graph);
+ glfs_init_done (fs, 1);
+ break;
+ case GF_EVENT_CHILD_CONNECTING:
+ break;
+ default:
+ gf_log (this->name, GF_LOG_DEBUG,
+ "got notify event %d", event);
+ break;
+ }
+
+ return 0;
+}
+
+
+int
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, glfs_mt_end + 1);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to initialise "
+ "memory accounting");
+ return ret;
+ }
+
+ return 0;
+}
+
+
+int
+init (xlator_t *this)
+{
+ return 0;
+}
+
+
+void
+fini (xlator_t *this)
+{
+
+}
+
+
+struct xlator_dumpops dumpops;
+
+
+struct xlator_fops fops;
+
+
+struct xlator_cbks cbks;
diff --git a/api/src/glfs-mem-types.h b/api/src/glfs-mem-types.h
new file mode 100644
index 000000000..3301b3da5
--- /dev/null
+++ b/api/src/glfs-mem-types.h
@@ -0,0 +1,32 @@
+/*
+ Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLFS_MEM_TYPES_H
+#define _GLFS_MEM_TYPES_H
+
+#include "mem-types.h"
+
+#define GF_MEM_TYPE_START (gf_common_mt_end + 1)
+
+enum glfs_mem_types_ {
+ glfs_mt_glfs_t = GF_MEM_TYPE_START,
+ glfs_mt_call_pool_t,
+ glfs_mt_xlator_t,
+ glfs_mt_glfs_fd_t,
+ glfs_mt_glfs_io_t,
+ glfs_mt_volfile_t,
+ glfs_mt_xlator_cmdline_option_t,
+ glfs_mt_glfs_object_t,
+ glfs_mt_readdirbuf_t,
+ glfs_mt_end
+
+};
+#endif
+
diff --git a/api/src/glfs-mgmt.c b/api/src/glfs-mgmt.c
new file mode 100644
index 000000000..6843e9cb3
--- /dev/null
+++ b/api/src/glfs-mgmt.c
@@ -0,0 +1,543 @@
+/*
+ Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <pthread.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif /* _CONFIG_H */
+
+#include "glusterfs.h"
+#include "stack.h"
+#include "dict.h"
+#include "event.h"
+#include "defaults.h"
+
+#include "rpc-clnt.h"
+#include "protocol-common.h"
+#include "glusterfs3.h"
+#include "portmap-xdr.h"
+#include "xdr-generic.h"
+
+#include "syncop.h"
+#include "xlator.h"
+
+#include "glfs-internal.h"
+#include "glfs-mem-types.h"
+
+
+int glfs_volfile_fetch (struct glfs *fs);
+
+int
+glfs_process_volfp (struct glfs *fs, FILE *fp)
+{
+ glusterfs_graph_t *graph = NULL;
+ int ret = -1;
+ xlator_t *trav = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+
+ ctx = fs->ctx;
+ graph = glusterfs_graph_construct (fp);
+ if (!graph) {
+ gf_log ("glfs", GF_LOG_ERROR, "failed to construct the graph");
+ goto out;
+ }
+
+ for (trav = graph->first; trav; trav = trav->next) {
+ if (strcmp (trav->type, "mount/fuse") == 0) {
+ gf_log ("glfs", GF_LOG_ERROR,
+ "fuse xlator cannot be specified "
+ "in volume file");
+ goto out;
+ }
+ }
+
+ ret = glusterfs_graph_prepare (graph, ctx);
+ if (ret) {
+ glusterfs_graph_destroy (graph);
+ goto out;
+ }
+
+ ret = glusterfs_graph_activate (graph, ctx);
+
+ if (ret) {
+ glusterfs_graph_destroy (graph);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (fp)
+ fclose (fp);
+
+ if (!ctx->active) {
+ ret = -1;
+ }
+
+ return ret;
+}
+
+
+int
+mgmt_cbk_spec (struct rpc_clnt *rpc, void *mydata, void *data)
+{
+ struct glfs *fs = NULL;
+ xlator_t *this = NULL;
+
+ this = mydata;
+ fs = this->private;
+
+ glfs_volfile_fetch (fs);
+
+ return 0;
+}
+
+
+int
+mgmt_cbk_event (struct rpc_clnt *rpc, void *mydata, void *data)
+{
+ return 0;
+}
+
+
+rpcclnt_cb_actor_t mgmt_cbk_actors[] = {
+ [GF_CBK_FETCHSPEC] = {"FETCHSPEC", GF_CBK_FETCHSPEC, mgmt_cbk_spec },
+ [GF_CBK_EVENT_NOTIFY] = {"EVENTNOTIFY", GF_CBK_EVENT_NOTIFY,
+ mgmt_cbk_event},
+};
+
+
+struct rpcclnt_cb_program mgmt_cbk_prog = {
+ .progname = "GlusterFS Callback",
+ .prognum = GLUSTER_CBK_PROGRAM,
+ .progver = GLUSTER_CBK_VERSION,
+ .actors = mgmt_cbk_actors,
+ .numactors = GF_CBK_MAXVALUE,
+};
+
+char *clnt_handshake_procs[GF_HNDSK_MAXVALUE] = {
+ [GF_HNDSK_NULL] = "NULL",
+ [GF_HNDSK_SETVOLUME] = "SETVOLUME",
+ [GF_HNDSK_GETSPEC] = "GETSPEC",
+ [GF_HNDSK_PING] = "PING",
+ [GF_HNDSK_EVENT_NOTIFY] = "EVENTNOTIFY",
+};
+
+rpc_clnt_prog_t clnt_handshake_prog = {
+ .progname = "GlusterFS Handshake",
+ .prognum = GLUSTER_HNDSK_PROGRAM,
+ .progver = GLUSTER_HNDSK_VERSION,
+ .procnames = clnt_handshake_procs,
+};
+
+
+int
+mgmt_submit_request (void *req, call_frame_t *frame,
+ glusterfs_ctx_t *ctx,
+ rpc_clnt_prog_t *prog, int procnum,
+ fop_cbk_fn_t cbkfn, xdrproc_t xdrproc)
+{
+ int ret = -1;
+ int count = 0;
+ struct iovec iov = {0, };
+ struct iobuf *iobuf = NULL;
+ struct iobref *iobref = NULL;
+ ssize_t xdr_size = 0;
+
+ iobref = iobref_new ();
+ if (!iobref) {
+ goto out;
+ }
+
+ if (req) {
+ xdr_size = xdr_sizeof (xdrproc, req);
+
+ iobuf = iobuf_get2 (ctx->iobuf_pool, xdr_size);
+ if (!iobuf) {
+ goto out;
+ };
+
+ iobref_add (iobref, iobuf);
+
+ iov.iov_base = iobuf->ptr;
+ iov.iov_len = iobuf_pagesize (iobuf);
+
+ /* Create the xdr payload */
+ ret = xdr_serialize_generic (iov, req, xdrproc);
+ if (ret == -1) {
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to create XDR payload");
+ goto out;
+ }
+ iov.iov_len = ret;
+ count = 1;
+ }
+
+ /* Send the msg */
+ ret = rpc_clnt_submit (ctx->mgmt, prog, procnum, cbkfn,
+ &iov, count,
+ NULL, 0, iobref, frame, NULL, 0, NULL, 0, NULL);
+
+out:
+ if (iobref)
+ iobref_unref (iobref);
+
+ if (iobuf)
+ iobuf_unref (iobuf);
+ return ret;
+}
+
+
+static int
+glusterfs_oldvolfile_update (struct glfs *fs, char *volfile, ssize_t size)
+{
+ int ret = -1;
+
+ fs->oldvollen = size;
+ if (!fs->oldvolfile) {
+ fs->oldvolfile = GF_CALLOC (1, size+1, glfs_mt_volfile_t);
+ } else {
+ fs->oldvolfile = GF_REALLOC (fs->oldvolfile, size+1);
+ }
+
+ if (!fs->oldvolfile) {
+ fs->oldvollen = 0;
+ } else {
+ memcpy (fs->oldvolfile, volfile, size);
+ fs->oldvollen = size;
+ ret = 0;
+ }
+
+ return ret;
+}
+
+
+int
+mgmt_getspec_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf_getspec_rsp rsp = {0,};
+ call_frame_t *frame = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ int ret = 0;
+ ssize_t size = 0;
+ FILE *tmpfp = NULL;
+ int need_retry = 0;
+ struct glfs *fs = NULL;
+
+ frame = myframe;
+ ctx = frame->this->ctx;
+ fs = ((xlator_t *)ctx->master)->private;
+
+ if (-1 == req->rpc_status) {
+ ret = -1;
+ need_retry = 1;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_getspec_rsp);
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_ERROR, "XDR decoding error");
+ ret = -1;
+ goto out;
+ }
+
+ if (-1 == rsp.op_ret) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "failed to get the 'volume file' from server");
+ ret = -1;
+ errno = rsp.op_errno;
+ goto out;
+ }
+
+ ret = 0;
+ size = rsp.op_ret;
+
+ if ((size == fs->oldvollen) &&
+ (memcmp (fs->oldvolfile, rsp.spec, size) == 0)) {
+ gf_log (frame->this->name, GF_LOG_INFO,
+ "No change in volfile, continuing");
+ goto out;
+ }
+
+ tmpfp = tmpfile ();
+ if (!tmpfp) {
+ ret = -1;
+ goto out;
+ }
+
+ fwrite (rsp.spec, size, 1, tmpfp);
+ fflush (tmpfp);
+ if (ferror (tmpfp)) {
+ ret = -1;
+ goto out;
+ }
+
+ /* Check if only options have changed. No need to reload the
+ * volfile if topology hasn't changed.
+ * glusterfs_volfile_reconfigure returns 3 possible return states
+ * return 0 =======> reconfiguration of options has succeeded
+ * return 1 =======> the graph has to be reconstructed and all the xlators should be inited
+ * return -1(or -ve) =======> Some Internal Error occurred during the operation
+ */
+
+ ret = glusterfs_volfile_reconfigure (fs->oldvollen, tmpfp, fs->ctx,
+ fs->oldvolfile);
+ if (ret == 0) {
+ gf_log ("glusterfsd-mgmt", GF_LOG_DEBUG,
+ "No need to re-load volfile, reconfigure done");
+ ret = glusterfs_oldvolfile_update (fs, rsp.spec, size);
+ goto out;
+ }
+
+ if (ret < 0) {
+ gf_log ("glusterfsd-mgmt", GF_LOG_DEBUG,
+ "Reconfigure failed !!");
+ goto out;
+ }
+
+ ret = glfs_process_volfp (fs, tmpfp);
+ /* tmpfp closed */
+ tmpfp = NULL;
+ if (ret)
+ goto out;
+
+ ret = glusterfs_oldvolfile_update (fs, rsp.spec, size);
+out:
+ STACK_DESTROY (frame->root);
+
+ if (rsp.spec)
+ free (rsp.spec);
+
+ // Stop if server is running at an unsupported op-version
+ if (ENOTSUP == ret) {
+ gf_log ("mgmt", GF_LOG_ERROR, "Server is operating at an "
+ "op-version which is not supported");
+ errno = ENOTSUP;
+ glfs_init_done (fs, -1);
+ }
+
+ if (ret && ctx && !ctx->active) {
+ /* Do it only for the first time */
+ /* Failed to get the volume file, something wrong,
+ restart the process */
+ gf_log ("glfs-mgmt", GF_LOG_ERROR,
+ "failed to fetch volume file (key:%s)",
+ ctx->cmd_args.volfile_id);
+ if (!need_retry) {
+ if (!errno)
+ errno = EINVAL;
+ glfs_init_done (fs, -1);
+ }
+ }
+
+ if (tmpfp)
+ fclose (tmpfp);
+
+ return 0;
+}
+
+
+int
+glfs_volfile_fetch (struct glfs *fs)
+{
+ cmd_args_t *cmd_args = NULL;
+ gf_getspec_req req = {0, };
+ int ret = 0;
+ call_frame_t *frame = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ dict_t *dict = NULL;
+
+ ctx = fs->ctx;
+ cmd_args = &ctx->cmd_args;
+
+ frame = create_frame (THIS, ctx->pool);
+
+ req.key = cmd_args->volfile_id;
+ req.flags = 0;
+
+ dict = dict_new ();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ // Set the supported min and max op-versions, so glusterd can make a
+ // decision
+ ret = dict_set_int32 (dict, "min-op-version", GD_OP_VERSION_MIN);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Failed to set min-op-version"
+ " in request dict");
+ goto out;
+ }
+
+ ret = dict_set_int32 (dict, "max-op-version", GD_OP_VERSION_MAX);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Failed to set max-op-version"
+ " in request dict");
+ goto out;
+ }
+
+ ret = dict_allocate_and_serialize (dict, &req.xdata.xdata_val,
+ &req.xdata.xdata_len);
+ if (ret < 0) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Failed to serialize dictionary");
+ goto out;
+ }
+
+ ret = mgmt_submit_request (&req, frame, ctx, &clnt_handshake_prog,
+ GF_HNDSK_GETSPEC, mgmt_getspec_cbk,
+ (xdrproc_t)xdr_gf_getspec_req);
+out:
+ return ret;
+}
+
+
+static int
+mgmt_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
+ void *data)
+{
+ xlator_t *this = NULL;
+ cmd_args_t *cmd_args = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ struct glfs *fs = NULL;
+ int ret = 0;
+
+ this = mydata;
+ ctx = this->ctx;
+ fs = ((xlator_t *)ctx->master)->private;
+ cmd_args = &ctx->cmd_args;
+
+ switch (event) {
+ case RPC_CLNT_DISCONNECT:
+ if (!ctx->active) {
+ cmd_args->max_connect_attempts--;
+ gf_log ("glfs-mgmt", GF_LOG_ERROR,
+ "failed to connect with remote-host: %s",
+ strerror (errno));
+ gf_log ("glfs-mgmt", GF_LOG_INFO,
+ "%d connect attempts left",
+ cmd_args->max_connect_attempts);
+ if (0 >= cmd_args->max_connect_attempts) {
+ errno = ENOTCONN;
+ glfs_init_done (fs, -1);
+ }
+ }
+ break;
+ case RPC_CLNT_CONNECT:
+ rpc_clnt_set_connected (&((struct rpc_clnt*)ctx->mgmt)->conn);
+
+ ret = glfs_volfile_fetch (fs);
+ if (ret && ctx && (ctx->active == NULL)) {
+ /* Do it only for the first time */
+ /* Exit the process.. there are some wrong options */
+ gf_log ("glfs-mgmt", GF_LOG_ERROR,
+ "failed to fetch volume file (key:%s)",
+ ctx->cmd_args.volfile_id);
+ errno = EINVAL;
+ glfs_init_done (fs, -1);
+ }
+
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+
+int
+glusterfs_mgmt_notify (int32_t op, void *data, ...)
+{
+ int ret = 0;
+
+ switch (op)
+ {
+ case GF_EN_DEFRAG_STATUS:
+ break;
+
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+
+int
+glfs_mgmt_init (struct glfs *fs)
+{
+ cmd_args_t *cmd_args = NULL;
+ struct rpc_clnt *rpc = NULL;
+ dict_t *options = NULL;
+ int ret = -1;
+ int port = GF_DEFAULT_BASE_PORT;
+ char *host = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+
+ ctx = fs->ctx;
+ cmd_args = &ctx->cmd_args;
+
+ if (ctx->mgmt)
+ return 0;
+
+ if (cmd_args->volfile_server_port)
+ port = cmd_args->volfile_server_port;
+
+ host = "localhost";
+ if (cmd_args->volfile_server)
+ host = cmd_args->volfile_server;
+
+ ret = rpc_transport_inet_options_build (&options, host, port);
+ if (ret)
+ goto out;
+
+ rpc = rpc_clnt_new (options, THIS->ctx, THIS->name, 8);
+ if (!rpc) {
+ ret = -1;
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to create rpc clnt");
+ goto out;
+ }
+
+ ret = rpc_clnt_register_notify (rpc, mgmt_rpc_notify, THIS);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to register notify function");
+ goto out;
+ }
+
+ ret = rpcclnt_cbk_program_register (rpc, &mgmt_cbk_prog, THIS);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to register callback function");
+ goto out;
+ }
+
+ ctx->notify = glusterfs_mgmt_notify;
+
+ /* This value should be set before doing the 'rpc_clnt_start()' as
+ the notify function uses this variable */
+ ctx->mgmt = rpc;
+
+ ret = rpc_clnt_start (rpc);
+out:
+ return ret;
+}
+
diff --git a/api/src/glfs-resolve.c b/api/src/glfs-resolve.c
new file mode 100644
index 000000000..4ca2eb6fc
--- /dev/null
+++ b/api/src/glfs-resolve.c
@@ -0,0 +1,969 @@
+/*
+ Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#include <unistd.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <inttypes.h>
+#include <limits.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "stack.h"
+#include "event.h"
+#include "glfs-mem-types.h"
+#include "common-utils.h"
+#include "syncop.h"
+#include "call-stub.h"
+
+#include "glfs-internal.h"
+
+#define graphid_str(subvol) (uuid_utoa((unsigned char *)subvol->graph->graph_uuid))
+
+
+int
+glfs_first_lookup_safe (xlator_t *subvol)
+{
+ loc_t loc = {0, };
+ int ret = -1;
+
+ loc.inode = subvol->itable->root;
+ memset (loc.gfid, 0, 16);
+ loc.gfid[15] = 1;
+ loc.path = "/";
+ loc.name = "";
+
+ ret = syncop_lookup (subvol, &loc, 0, 0, 0, 0);
+
+ gf_log (subvol->name, GF_LOG_DEBUG, "first lookup complete %d", ret);
+
+ return ret;
+}
+
+
+int
+__glfs_first_lookup (struct glfs *fs, xlator_t *subvol)
+{
+ int ret = -1;
+
+ fs->migration_in_progress = 1;
+ pthread_mutex_unlock (&fs->mutex);
+ {
+ ret = glfs_first_lookup_safe (subvol);
+ }
+ pthread_mutex_lock (&fs->mutex);
+ fs->migration_in_progress = 0;
+ pthread_cond_broadcast (&fs->cond);
+
+ return ret;
+}
+
+
+inode_t *
+glfs_refresh_inode_safe (xlator_t *subvol, inode_t *oldinode)
+{
+ loc_t loc = {0, };
+ int ret = -1;
+ struct iatt iatt = {0, };
+ inode_t *newinode = NULL;
+
+
+ if (!oldinode)
+ return NULL;
+
+ if (oldinode->table->xl == subvol)
+ return inode_ref (oldinode);
+
+ newinode = inode_find (subvol->itable, oldinode->gfid);
+ if (newinode)
+ return newinode;
+
+ uuid_copy (loc.gfid, oldinode->gfid);
+ loc.inode = inode_new (subvol->itable);
+ if (!loc.inode)
+ return NULL;
+
+ ret = syncop_lookup (subvol, &loc, 0, &iatt, 0, 0);
+
+ if (ret) {
+ gf_log (subvol->name, GF_LOG_WARNING,
+ "inode refresh of %s failed: %s",
+ uuid_utoa (oldinode->gfid), strerror (errno));
+ loc_wipe (&loc);
+ return NULL;
+ }
+
+ newinode = inode_link (loc.inode, 0, 0, &iatt);
+ if (newinode)
+ inode_lookup (newinode);
+
+ loc_wipe (&loc);
+
+ return newinode;
+}
+
+
+inode_t *
+__glfs_refresh_inode (struct glfs *fs, xlator_t *subvol, inode_t *inode)
+{
+ inode_t *newinode = NULL;
+
+ fs->migration_in_progress = 1;
+ pthread_mutex_unlock (&fs->mutex);
+ {
+ newinode = glfs_refresh_inode_safe (subvol, inode);
+ }
+ pthread_mutex_lock (&fs->mutex);
+ fs->migration_in_progress = 0;
+ pthread_cond_broadcast (&fs->cond);
+
+ return newinode;
+}
+
+int
+glfs_loc_touchup (loc_t *loc)
+{
+ char *path = NULL;
+ int ret = -1;
+ char *bn = NULL;
+
+ if (loc->parent)
+ ret = inode_path (loc->parent, loc->name, &path);
+ else
+ ret = inode_path (loc->inode, 0, &path);
+
+ loc->path = path;
+
+ if (ret < 0 || !path) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ bn = strrchr (path, '/');
+ if (bn)
+ bn++;
+ loc->name = bn;
+ ret = 0;
+out:
+ return ret;
+}
+
+
+int
+glfs_resolve_symlink (struct glfs *fs, xlator_t *subvol, inode_t *inode,
+ char **lpath)
+{
+ loc_t loc = {0, };
+ char *path = NULL;
+ char *rpath = NULL;
+ int ret = -1;
+
+ loc.inode = inode_ref (inode);
+ uuid_copy (loc.gfid, inode->gfid);
+ ret = inode_path (inode, NULL, &rpath);
+ if (ret < 0)
+ goto out;
+ loc.path = rpath;
+
+ ret = syncop_readlink (subvol, &loc, &path, 4096);
+
+ if (ret < 0)
+ goto out;
+
+ if (lpath)
+ *lpath = path;
+out:
+ loc_wipe (&loc);
+ return ret;
+}
+
+
+int
+glfs_resolve_base (struct glfs *fs, xlator_t *subvol, inode_t *inode,
+ struct iatt *iatt)
+{
+ loc_t loc = {0, };
+ int ret = -1;
+ char *path = NULL;
+
+ loc.inode = inode_ref (inode);
+ uuid_copy (loc.gfid, inode->gfid);
+
+ ret = inode_path (loc.inode, NULL, &path);
+ loc.path = path;
+ if (ret < 0)
+ goto out;
+
+ ret = syncop_lookup (subvol, &loc, NULL, iatt, NULL, NULL);
+out:
+ loc_wipe (&loc);
+
+ return ret;
+}
+
+
+inode_t *
+glfs_resolve_component (struct glfs *fs, xlator_t *subvol, inode_t *parent,
+ const char *component, struct iatt *iatt,
+ int force_lookup)
+{
+ loc_t loc = {0, };
+ inode_t *inode = NULL;
+ int reval = 0;
+ int ret = -1;
+ int glret = -1;
+ struct iatt ciatt = {0, };
+ uuid_t gfid;
+ dict_t *xattr_req = NULL;
+
+ loc.name = component;
+
+ loc.parent = inode_ref (parent);
+ uuid_copy (loc.pargfid, parent->gfid);
+
+
+ if (strcmp (component, ".") == 0)
+ loc.inode = inode_ref (parent);
+ else if (strcmp (component, "..") == 0)
+ loc.inode = inode_parent (parent, 0, 0);
+ else
+ loc.inode = inode_grep (parent->table, parent, component);
+
+ if (loc.inode) {
+ uuid_copy (loc.gfid, loc.inode->gfid);
+ reval = 1;
+
+ if (!force_lookup) {
+ inode = inode_ref (loc.inode);
+ ciatt.ia_type = inode->ia_type;
+ goto found;
+ }
+ } else {
+ uuid_generate (gfid);
+ loc.inode = inode_new (parent->table);
+ }
+
+ if (!loc.inode)
+ goto out;
+
+ glret = glfs_loc_touchup (&loc);
+ if (glret < 0) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_lookup (subvol, &loc, NULL, &ciatt, NULL, NULL);
+ if (ret && reval) {
+ inode_unref (loc.inode);
+ loc.inode = inode_new (parent->table);
+ if (!loc.inode) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ xattr_req = dict_new ();
+ if (!xattr_req) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ uuid_generate (gfid);
+
+ ret = dict_set_static_bin (xattr_req, "gfid-req", gfid, 16);
+ if (ret) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = syncop_lookup (subvol, &loc, xattr_req, &ciatt,
+ NULL, NULL);
+ }
+ if (ret)
+ goto out;
+
+ inode = inode_link (loc.inode, loc.parent, component, &ciatt);
+found:
+ if (inode)
+ inode_lookup (inode);
+ if (iatt)
+ *iatt = ciatt;
+out:
+ if (xattr_req)
+ dict_unref (xattr_req);
+
+ loc_wipe (&loc);
+
+ return inode;
+}
+
+
+int
+glfs_resolve_at (struct glfs *fs, xlator_t *subvol, inode_t *at,
+ const char *origpath, loc_t *loc, struct iatt *iatt,
+ int follow, int reval)
+{
+ inode_t *inode = NULL;
+ inode_t *parent = NULL;
+ char *saveptr = NULL;
+ char *path = NULL;
+ char *component = NULL;
+ char *next_component = NULL;
+ int ret = -1;
+ struct iatt ciatt = {0, };
+
+ path = gf_strdup (origpath);
+ if (!path) {
+ errno = ENOMEM;
+ return -1;
+ }
+
+ parent = NULL;
+ if (at && path[0] != '/') {
+ /* A relative resolution of a path which starts with '/'
+ is equal to an absolute path resolution.
+ */
+ inode = inode_ref (at);
+ } else {
+ inode = inode_ref (subvol->itable->root);
+
+ if (strcmp (path, "/") == 0)
+ glfs_resolve_base (fs, subvol, inode, &ciatt);
+ }
+
+ for (component = strtok_r (path, "/", &saveptr);
+ component; component = next_component) {
+
+ next_component = strtok_r (NULL, "/", &saveptr);
+
+ if (parent)
+ inode_unref (parent);
+
+ parent = inode;
+
+ inode = glfs_resolve_component (fs, subvol, parent,
+ component, &ciatt,
+ /* force hard lookup on the last
+ component, as the caller
+ wants proper iatt filled
+ */
+ (reval || (!next_component &&
+ iatt)));
+ if (!inode)
+ break;
+
+ if (IA_ISLNK (ciatt.ia_type) && (next_component || follow)) {
+ /* If the component is not the last piece,
+ then following it is necessary even if
+ not requested by the caller
+ */
+ char *lpath = NULL;
+ loc_t sym_loc = {0,};
+
+ if (follow > GLFS_SYMLINK_MAX_FOLLOW) {
+ errno = ELOOP;
+ ret = -1;
+ if (inode) {
+ inode_unref (inode);
+ inode = NULL;
+ }
+ break;
+ }
+
+ ret = glfs_resolve_symlink (fs, subvol, inode, &lpath);
+ inode_unref (inode);
+ inode = NULL;
+ if (ret < 0)
+ break;
+
+ ret = glfs_resolve_at (fs, subvol, parent, lpath,
+ &sym_loc,
+ /* followed iatt becomes the
+ component iatt
+ */
+ &ciatt,
+ /* always recurisvely follow while
+ following symlink
+ */
+ follow + 1, reval);
+ if (ret == 0)
+ inode = inode_ref (sym_loc.inode);
+ loc_wipe (&sym_loc);
+ GF_FREE (lpath);
+ }
+
+ if (!next_component)
+ break;
+
+ if (!IA_ISDIR (ciatt.ia_type)) {
+ /* next_component exists and this component is
+ not a directory
+ */
+ inode_unref (inode);
+ inode = NULL;
+ ret = -1;
+ errno = ENOTDIR;
+ break;
+ }
+ }
+
+ if (parent && next_component)
+ /* resolution failed mid-way */
+ goto out;
+
+ /* At this point, all components up to the last parent directory
+ have been resolved successfully (@parent). Resolution of basename
+ might have failed (@inode) if at all.
+ */
+
+ loc->parent = parent;
+ if (parent) {
+ uuid_copy (loc->pargfid, parent->gfid);
+ loc->name = component;
+ }
+
+ loc->inode = inode;
+ if (inode) {
+ uuid_copy (loc->gfid, inode->gfid);
+ if (iatt)
+ *iatt = ciatt;
+ ret = 0;
+ }
+
+ glfs_loc_touchup (loc);
+out:
+ GF_FREE (path);
+
+ /* do NOT loc_wipe here as only last component might be missing */
+
+ return ret;
+}
+
+
+int
+glfs_resolve_path (struct glfs *fs, xlator_t *subvol, const char *origpath,
+ loc_t *loc, struct iatt *iatt, int follow, int reval)
+{
+ int ret = -1;
+ inode_t *cwd = NULL;
+
+ if (origpath[0] == '/')
+ return glfs_resolve_at (fs, subvol, NULL, origpath, loc, iatt,
+ follow, reval);
+
+ cwd = glfs_cwd_get (fs);
+
+ ret = glfs_resolve_at (fs, subvol, cwd, origpath, loc, iatt,
+ follow, reval);
+ if (cwd)
+ inode_unref (cwd);
+
+ return ret;
+}
+
+
+int
+glfs_resolve (struct glfs *fs, xlator_t *subvol, const char *origpath,
+ loc_t *loc, struct iatt *iatt, int reval)
+{
+ int ret = -1;
+
+ ret = glfs_resolve_path (fs, subvol, origpath, loc, iatt, 1, reval);
+
+ return ret;
+}
+
+
+int
+glfs_lresolve (struct glfs *fs, xlator_t *subvol, const char *origpath,
+ loc_t *loc, struct iatt *iatt, int reval)
+{
+ int ret = -1;
+
+ ret = glfs_resolve_path (fs, subvol, origpath, loc, iatt, 0, reval);
+
+ return ret;
+}
+
+
+int
+glfs_migrate_fd_locks_safe (struct glfs *fs, xlator_t *oldsubvol, fd_t *oldfd,
+ xlator_t *newsubvol, fd_t *newfd)
+{
+ dict_t *lockinfo = NULL;
+ int ret = 0;
+ char uuid1[64];
+
+ if (!oldfd->lk_ctx || fd_lk_ctx_empty (oldfd->lk_ctx))
+ return 0;
+
+ newfd->lk_ctx = fd_lk_ctx_ref (oldfd->lk_ctx);
+
+ ret = syncop_fgetxattr (oldsubvol, oldfd, &lockinfo,
+ GF_XATTR_LOCKINFO_KEY);
+ if (ret < 0) {
+ gf_log (fs->volname, GF_LOG_WARNING,
+ "fgetxattr (%s) failed (%s) on graph %s (%d)",
+ uuid_utoa_r (oldfd->inode->gfid, uuid1),
+ strerror (errno),
+ graphid_str (oldsubvol), oldsubvol->graph->id);
+ goto out;
+ }
+
+ if (!dict_get (lockinfo, GF_XATTR_LOCKINFO_KEY)) {
+ gf_log (fs->volname, GF_LOG_WARNING,
+ "missing lokinfo key (%s) on graph %s (%d)",
+ uuid_utoa_r (oldfd->inode->gfid, uuid1),
+ graphid_str (oldsubvol), oldsubvol->graph->id);
+ goto out;
+ }
+
+ ret = syncop_fsetxattr (newsubvol, newfd, lockinfo, 0);
+ if (ret < 0) {
+ gf_log (fs->volname, GF_LOG_WARNING,
+ "fsetxattr (%s) failed (%s) on graph %s (%d)",
+ uuid_utoa_r (newfd->inode->gfid, uuid1),
+ strerror (errno),
+ graphid_str (newsubvol), newsubvol->graph->id);
+ goto out;
+ }
+out:
+ if (lockinfo)
+ dict_unref (lockinfo);
+ return ret;
+}
+
+
+fd_t *
+glfs_migrate_fd_safe (struct glfs *fs, xlator_t *newsubvol, fd_t *oldfd)
+{
+ fd_t *newfd = NULL;
+ inode_t *oldinode = NULL;
+ inode_t *newinode = NULL;
+ xlator_t *oldsubvol = NULL;
+ int ret = -1;
+ loc_t loc = {0, };
+ char uuid1[64];
+
+
+ oldinode = oldfd->inode;
+ oldsubvol = oldinode->table->xl;
+
+ if (oldsubvol == newsubvol)
+ return fd_ref (oldfd);
+
+ if (!oldsubvol->switched) {
+ ret = syncop_fsync (oldsubvol, oldfd, 0);
+ if (ret) {
+ gf_log (fs->volname, GF_LOG_WARNING,
+ "fsync() failed (%s) on %s graph %s (%d)",
+ strerror (errno),
+ uuid_utoa_r (oldfd->inode->gfid, uuid1),
+ graphid_str (oldsubvol), oldsubvol->graph->id);
+ }
+ }
+
+ newinode = glfs_refresh_inode_safe (newsubvol, oldinode);
+ if (!newinode) {
+ gf_log (fs->volname, GF_LOG_WARNING,
+ "inode (%s) refresh failed (%s) on graph %s (%d)",
+ uuid_utoa_r (oldinode->gfid, uuid1),
+ strerror (errno),
+ graphid_str (newsubvol), newsubvol->graph->id);
+ goto out;
+ }
+
+ newfd = fd_create (newinode, getpid());
+ if (!newfd) {
+ gf_log (fs->volname, GF_LOG_WARNING,
+ "fd_create (%s) failed (%s) on graph %s (%d)",
+ uuid_utoa_r (newinode->gfid, uuid1),
+ strerror (errno),
+ graphid_str (newsubvol), newsubvol->graph->id);
+ goto out;
+ }
+
+ loc.inode = inode_ref (newinode);
+
+ ret = inode_path (oldfd->inode, NULL, (char **)&loc.path);
+ if (ret < 0) {
+ gf_log (fs->volname, GF_LOG_INFO, "inode_path failed");
+ goto out;
+ }
+
+ uuid_copy (loc.gfid, oldinode->gfid);
+
+
+ if (IA_ISDIR (oldinode->ia_type))
+ ret = syncop_opendir (newsubvol, &loc, newfd);
+ else
+ ret = syncop_open (newsubvol, &loc,
+ oldfd->flags & ~(O_TRUNC|O_EXCL|O_CREAT),
+ newfd);
+ loc_wipe (&loc);
+
+ if (ret) {
+ gf_log (fs->volname, GF_LOG_WARNING,
+ "syncop_open%s (%s) failed (%s) on graph %s (%d)",
+ IA_ISDIR (oldinode->ia_type) ? "dir" : "",
+ uuid_utoa_r (newinode->gfid, uuid1),
+ strerror (errno),
+ graphid_str (newsubvol), newsubvol->graph->id);
+ goto out;
+ }
+
+ ret = glfs_migrate_fd_locks_safe (fs, oldsubvol, oldfd, newsubvol,
+ newfd);
+
+ if (ret) {
+ gf_log (fs->volname, GF_LOG_WARNING,
+ "lock migration (%s) failed (%s) on graph %s (%d)",
+ uuid_utoa_r (newinode->gfid, uuid1),
+ strerror (errno),
+ graphid_str (newsubvol), newsubvol->graph->id);
+ goto out;
+ }
+
+ newfd->flags = oldfd->flags;
+ fd_bind (newfd);
+out:
+ if (newinode)
+ inode_unref (newinode);
+
+ if (ret) {
+ fd_unref (newfd);
+ newfd = NULL;
+ }
+
+ return newfd;
+}
+
+
+fd_t *
+__glfs_migrate_fd (struct glfs *fs, xlator_t *newsubvol, struct glfs_fd *glfd)
+{
+ fd_t *oldfd = NULL;
+ fd_t *newfd = NULL;
+
+ oldfd = glfd->fd;
+
+ fs->migration_in_progress = 1;
+ pthread_mutex_unlock (&fs->mutex);
+ {
+ newfd = glfs_migrate_fd_safe (fs, newsubvol, oldfd);
+ }
+ pthread_mutex_lock (&fs->mutex);
+ fs->migration_in_progress = 0;
+ pthread_cond_broadcast (&fs->cond);
+
+ return newfd;
+}
+
+
+fd_t *
+__glfs_resolve_fd (struct glfs *fs, xlator_t *subvol, struct glfs_fd *glfd)
+{
+ fd_t *fd = NULL;
+
+ if (glfd->fd->inode->table->xl == subvol)
+ return fd_ref (glfd->fd);
+
+ fd = __glfs_migrate_fd (fs, subvol, glfd);
+ if (!fd)
+ return NULL;
+
+ if (subvol == fs->active_subvol) {
+ fd_unref (glfd->fd);
+ glfd->fd = fd_ref (fd);
+ }
+
+ return fd;
+}
+
+
+fd_t *
+glfs_resolve_fd (struct glfs *fs, xlator_t *subvol, struct glfs_fd *glfd)
+{
+ fd_t *fd = NULL;
+
+ glfs_lock (fs);
+ {
+ fd = __glfs_resolve_fd (fs, subvol, glfd);
+ }
+ glfs_unlock (fs);
+
+ return fd;
+}
+
+
+void
+__glfs_migrate_openfds (struct glfs *fs, xlator_t *subvol)
+{
+ struct glfs_fd *glfd = NULL;
+ fd_t *fd = NULL;
+
+ list_for_each_entry (glfd, &fs->openfds, openfds) {
+ if (uuid_is_null (glfd->fd->inode->gfid)) {
+ gf_log (fs->volname, GF_LOG_INFO,
+ "skipping openfd %p/%p in graph %s (%d)",
+ glfd, glfd->fd, graphid_str(subvol),
+ subvol->graph->id);
+ /* create in progress, defer */
+ continue;
+ }
+
+ fd = __glfs_migrate_fd (fs, subvol, glfd);
+ if (fd) {
+ fd_unref (glfd->fd);
+ glfd->fd = fd;
+ }
+ }
+}
+
+
+xlator_t *
+__glfs_active_subvol (struct glfs *fs)
+{
+ xlator_t *new_subvol = NULL;
+ int ret = -1;
+ inode_t *new_cwd = NULL;
+
+ if (!fs->next_subvol)
+ return fs->active_subvol;
+
+ new_subvol = fs->next_subvol;
+
+ ret = __glfs_first_lookup (fs, new_subvol);
+ if (ret) {
+ gf_log (fs->volname, GF_LOG_INFO,
+ "first lookup on graph %s (%d) failed (%s)",
+ graphid_str (new_subvol), new_subvol->graph->id,
+ strerror (errno));
+ return NULL;
+ }
+
+ if (fs->cwd) {
+ new_cwd = __glfs_refresh_inode (fs, new_subvol, fs->cwd);
+
+ if (!new_cwd) {
+ char buf1[64];
+ gf_log (fs->volname, GF_LOG_INFO,
+ "cwd refresh of %s graph %s (%d) failed (%s)",
+ uuid_utoa_r (fs->cwd->gfid, buf1),
+ graphid_str (new_subvol),
+ new_subvol->graph->id, strerror (errno));
+ return NULL;
+ }
+ }
+
+ __glfs_migrate_openfds (fs, new_subvol);
+
+ /* switching @active_subvol and @cwd
+ should be atomic
+ */
+ fs->old_subvol = fs->active_subvol;
+ fs->active_subvol = fs->next_subvol;
+ fs->next_subvol = NULL;
+
+ if (new_cwd) {
+ __glfs_cwd_set (fs, new_cwd);
+ inode_unref (new_cwd);
+ }
+
+ gf_log (fs->volname, GF_LOG_INFO, "switched to graph %s (%d)",
+ graphid_str (new_subvol), new_subvol->graph->id);
+
+ return new_subvol;
+}
+
+xlator_t *
+glfs_active_subvol (struct glfs *fs)
+{
+ xlator_t *subvol = NULL;
+ xlator_t *old_subvol = NULL;
+
+ glfs_lock (fs);
+ {
+ subvol = __glfs_active_subvol (fs);
+
+ if (subvol)
+ subvol->winds++;
+
+ if (fs->old_subvol) {
+ old_subvol = fs->old_subvol;
+ fs->old_subvol = NULL;
+ old_subvol->switched = 1;
+ }
+ }
+ glfs_unlock (fs);
+
+ if (old_subvol)
+ glfs_subvol_done (fs, old_subvol);
+
+ return subvol;
+}
+
+
+void
+glfs_subvol_done (struct glfs *fs, xlator_t *subvol)
+{
+ int ref = 0;
+ xlator_t *active_subvol = NULL;
+
+ if (!subvol)
+ return;
+
+ glfs_lock (fs);
+ {
+ ref = (--subvol->winds);
+ active_subvol = fs->active_subvol;
+ }
+ glfs_unlock (fs);
+
+ if (ref == 0) {
+ assert (subvol != active_subvol);
+ xlator_notify (subvol, GF_EVENT_PARENT_DOWN, subvol, NULL);
+ }
+}
+
+
+int
+__glfs_cwd_set (struct glfs *fs, inode_t *inode)
+{
+ if (inode->table->xl != fs->active_subvol) {
+ inode = __glfs_refresh_inode (fs, fs->active_subvol, inode);
+ if (!inode)
+ return -1;
+ } else {
+ inode_ref (inode);
+ }
+
+ if (fs->cwd)
+ inode_unref (fs->cwd);
+
+ fs->cwd = inode;
+
+ return 0;
+}
+
+
+int
+glfs_cwd_set (struct glfs *fs, inode_t *inode)
+{
+ int ret = 0;
+
+ glfs_lock (fs);
+ {
+ ret = __glfs_cwd_set (fs, inode);
+ }
+ glfs_unlock (fs);
+
+ return ret;
+}
+
+
+inode_t *
+__glfs_cwd_get (struct glfs *fs)
+{
+ inode_t *cwd = NULL;
+
+ if (!fs->cwd)
+ return NULL;
+
+ if (fs->cwd->table->xl == fs->active_subvol) {
+ cwd = inode_ref (fs->cwd);
+ return cwd;
+ }
+
+ cwd = __glfs_refresh_inode (fs, fs->active_subvol, fs->cwd);
+
+ return cwd;
+}
+
+inode_t *
+glfs_cwd_get (struct glfs *fs)
+{
+ inode_t *cwd = NULL;
+
+ glfs_lock (fs);
+ {
+ cwd = __glfs_cwd_get (fs);
+ }
+ glfs_unlock (fs);
+
+ return cwd;
+}
+
+inode_t *
+__glfs_resolve_inode (struct glfs *fs, xlator_t *subvol,
+ struct glfs_object *object)
+{
+ inode_t *inode = NULL;
+
+ if (object->inode->table->xl == subvol)
+ return inode_ref (object->inode);
+
+ inode = __glfs_refresh_inode (fs, fs->active_subvol,
+ object->inode);
+ if (!inode)
+ return NULL;
+
+ if (subvol == fs->active_subvol) {
+ inode_unref (object->inode);
+ object->inode = inode_ref (inode);
+ }
+
+ return inode;
+}
+
+inode_t *
+glfs_resolve_inode (struct glfs *fs, xlator_t *subvol,
+ struct glfs_object *object)
+{
+ inode_t *inode = NULL;
+
+ glfs_lock (fs);
+ {
+ inode = __glfs_resolve_inode(fs, subvol, object);
+ }
+ glfs_unlock (fs);
+
+ return inode;
+}
+
+int
+glfs_create_object (loc_t *loc, struct glfs_object **retobject)
+{
+ struct glfs_object *object = NULL;
+
+ object = GF_CALLOC (1, sizeof(struct glfs_object),
+ glfs_mt_glfs_object_t);
+ if (object == NULL) {
+ errno = ENOMEM;
+ return -1;
+ }
+
+ object->inode = loc->inode;
+ uuid_copy (object->gfid, object->inode->gfid);
+
+ /* we hold the reference */
+ loc->inode = NULL;
+
+ *retobject = object;
+
+ return 0;
+}
diff --git a/api/src/glfs.c b/api/src/glfs.c
new file mode 100644
index 000000000..29ed47c0c
--- /dev/null
+++ b/api/src/glfs.c
@@ -0,0 +1,673 @@
+/*
+ Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+/*
+ TODO:
+ - merge locks in glfs_posix_lock for lock self-healing
+ - set proper pid/lk_owner to call frames (currently buried in syncop)
+ - fix logging.c/h to store logfp and loglevel in glusterfs_ctx_t and
+ reach it via THIS.
+ - update syncop functions to accept/return xdata. ???
+ - protocol/client to reconnect immediately after portmap disconnect.
+ - handle SEEK_END failure in _lseek()
+ - handle umask (per filesystem?)
+ - make itables LRU based
+ - 0-copy for readv/writev
+ - reconcile the open/creat mess
+*/
+
+#include <unistd.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <limits.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "stack.h"
+#include "event.h"
+#include "glfs-mem-types.h"
+#include "common-utils.h"
+#include "syncop.h"
+#include "call-stub.h"
+
+#include "glfs.h"
+#include "glfs-internal.h"
+#include "hashfn.h"
+#include "rpc-clnt.h"
+
+
+static gf_boolean_t
+vol_assigned (cmd_args_t *args)
+{
+ return args->volfile || args->volfile_server;
+}
+
+
+static int
+glusterfs_ctx_defaults_init (glusterfs_ctx_t *ctx)
+{
+ call_pool_t *pool = NULL;
+ int ret = -1;
+
+ xlator_mem_acct_init (THIS, glfs_mt_end + 1);
+
+ ctx->process_uuid = generate_glusterfs_ctx_id ();
+ if (!ctx->process_uuid) {
+ goto err;
+ }
+
+ ctx->page_size = 128 * GF_UNIT_KB;
+
+ ctx->iobuf_pool = iobuf_pool_new ();
+ if (!ctx->iobuf_pool) {
+ goto err;
+ }
+
+ ctx->event_pool = event_pool_new (DEFAULT_EVENT_POOL_SIZE);
+ if (!ctx->event_pool) {
+ goto err;
+ }
+
+ ctx->env = syncenv_new (0, 0, 0);
+ if (!ctx->env) {
+ goto err;
+ }
+
+ pool = GF_CALLOC (1, sizeof (call_pool_t),
+ glfs_mt_call_pool_t);
+ if (!pool) {
+ goto err;
+ }
+
+ /* frame_mem_pool size 112 * 4k */
+ pool->frame_mem_pool = mem_pool_new (call_frame_t, 4096);
+ if (!pool->frame_mem_pool) {
+ goto err;
+ }
+ /* stack_mem_pool size 256 * 1024 */
+ pool->stack_mem_pool = mem_pool_new (call_stack_t, 1024);
+ if (!pool->stack_mem_pool) {
+ goto err;
+ }
+
+ ctx->stub_mem_pool = mem_pool_new (call_stub_t, 1024);
+ if (!ctx->stub_mem_pool) {
+ goto err;
+ }
+
+ ctx->dict_pool = mem_pool_new (dict_t, GF_MEMPOOL_COUNT_OF_DICT_T);
+ if (!ctx->dict_pool)
+ goto err;
+
+ ctx->dict_pair_pool = mem_pool_new (data_pair_t,
+ GF_MEMPOOL_COUNT_OF_DATA_PAIR_T);
+ if (!ctx->dict_pair_pool)
+ goto err;
+
+ ctx->dict_data_pool = mem_pool_new (data_t, GF_MEMPOOL_COUNT_OF_DATA_T);
+ if (!ctx->dict_data_pool)
+ goto err;
+
+ INIT_LIST_HEAD (&pool->all_frames);
+ INIT_LIST_HEAD (&ctx->cmd_args.xlator_options);
+ LOCK_INIT (&pool->lock);
+ ctx->pool = pool;
+
+ pthread_mutex_init (&(ctx->lock), NULL);
+
+ ret = 0;
+err:
+ if (ret && pool) {
+ if (pool->frame_mem_pool)
+ mem_pool_destroy (pool->frame_mem_pool);
+ if (pool->stack_mem_pool)
+ mem_pool_destroy (pool->stack_mem_pool);
+ GF_FREE (pool);
+ }
+
+ if (ret && ctx) {
+ if (ctx->stub_mem_pool)
+ mem_pool_destroy (ctx->stub_mem_pool);
+ if (ctx->dict_pool)
+ mem_pool_destroy (ctx->dict_pool);
+ if (ctx->dict_data_pool)
+ mem_pool_destroy (ctx->dict_data_pool);
+ if (ctx->dict_pair_pool)
+ mem_pool_destroy (ctx->dict_pair_pool);
+ }
+
+ return ret;
+}
+
+
+static int
+create_master (struct glfs *fs)
+{
+ int ret = 0;
+ xlator_t *master = NULL;
+
+ master = GF_CALLOC (1, sizeof (*master),
+ glfs_mt_xlator_t);
+ if (!master)
+ goto err;
+
+ master->name = gf_strdup ("gfapi");
+ if (!master->name)
+ goto err;
+
+ if (xlator_set_type (master, "mount/api") == -1) {
+ gf_log ("glfs", GF_LOG_ERROR,
+ "master xlator for %s initialization failed",
+ fs->volname);
+ goto err;
+ }
+
+ master->ctx = fs->ctx;
+ master->private = fs;
+ master->options = get_new_dict ();
+ if (!master->options)
+ goto err;
+
+
+ ret = xlator_init (master);
+ if (ret) {
+ gf_log ("glfs", GF_LOG_ERROR,
+ "failed to initialize gfapi translator");
+ goto err;
+ }
+
+ fs->ctx->master = master;
+ THIS = master;
+
+ return 0;
+
+err:
+ if (master) {
+ xlator_destroy (master);
+ }
+
+ return -1;
+}
+
+
+static FILE *
+get_volfp (struct glfs *fs)
+{
+ int ret = 0;
+ cmd_args_t *cmd_args = NULL;
+ FILE *specfp = NULL;
+ struct stat statbuf;
+
+ cmd_args = &fs->ctx->cmd_args;
+
+ ret = lstat (cmd_args->volfile, &statbuf);
+ if (ret == -1) {
+ gf_log ("glfs", GF_LOG_ERROR,
+ "%s: %s", cmd_args->volfile, strerror (errno));
+ return NULL;
+ }
+
+ if ((specfp = fopen (cmd_args->volfile, "r")) == NULL) {
+ gf_log ("glfs", GF_LOG_ERROR,
+ "volume file %s: %s",
+ cmd_args->volfile,
+ strerror (errno));
+ return NULL;
+ }
+
+ gf_log ("glfs", GF_LOG_DEBUG,
+ "loading volume file %s", cmd_args->volfile);
+
+ return specfp;
+}
+
+
+int
+glfs_volumes_init (struct glfs *fs)
+{
+ FILE *fp = NULL;
+ cmd_args_t *cmd_args = NULL;
+ int ret = 0;
+
+ cmd_args = &fs->ctx->cmd_args;
+
+ if (!vol_assigned (cmd_args))
+ return -1;
+
+ if (cmd_args->volfile_server) {
+ ret = glfs_mgmt_init (fs);
+ goto out;
+ }
+
+ fp = get_volfp (fs);
+
+ if (!fp) {
+ gf_log ("glfs", GF_LOG_ERROR,
+ "Cannot reach volume specification file");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glfs_process_volfp (fs, fp);
+ if (ret)
+ goto out;
+
+out:
+ return ret;
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+
+
+int
+glfs_set_xlator_option (struct glfs *fs, const char *xlator, const char *key,
+ const char *value)
+{
+ xlator_cmdline_option_t *option = NULL;
+
+ option = GF_CALLOC (1, sizeof (*option),
+ glfs_mt_xlator_cmdline_option_t);
+ if (!option)
+ goto enomem;
+
+ INIT_LIST_HEAD (&option->cmd_args);
+
+ option->volume = gf_strdup (xlator);
+ if (!option->volume)
+ goto enomem;
+ option->key = gf_strdup (key);
+ if (!option->key)
+ goto enomem;
+ option->value = gf_strdup (value);
+ if (!option->value)
+ goto enomem;
+
+ list_add (&option->cmd_args, &fs->ctx->cmd_args.xlator_options);
+
+ return 0;
+enomem:
+ errno = ENOMEM;
+
+ if (!option)
+ return -1;
+
+ GF_FREE (option->volume);
+ GF_FREE (option->key);
+ GF_FREE (option->value);
+ GF_FREE (option);
+
+ return -1;
+}
+
+int glfs_setfsuid (uid_t fsuid)
+{
+ return syncopctx_setfsuid (&fsuid);
+}
+
+int glfs_setfsgid (gid_t fsgid)
+{
+ return syncopctx_setfsgid (&fsgid);
+}
+
+int glfs_setfsgroups (size_t size, const gid_t *list)
+{
+ return syncopctx_setfsgroups(size, list);
+}
+
+struct glfs *
+glfs_from_glfd (struct glfs_fd *glfd)
+{
+ return glfd->fs;
+}
+
+
+struct glfs_fd *
+glfs_fd_new (struct glfs *fs)
+{
+ struct glfs_fd *glfd = NULL;
+
+ glfd = GF_CALLOC (1, sizeof (*glfd), glfs_mt_glfs_fd_t);
+ if (!glfd)
+ return NULL;
+
+ glfd->fs = fs;
+
+ INIT_LIST_HEAD (&glfd->openfds);
+
+ return glfd;
+}
+
+
+void
+glfs_fd_bind (struct glfs_fd *glfd)
+{
+ struct glfs *fs = NULL;
+
+ fs = glfd->fs;
+
+ glfs_lock (fs);
+ {
+ list_add_tail (&glfd->openfds, &fs->openfds);
+ }
+ glfs_unlock (fs);
+}
+
+void
+glfs_fd_destroy (struct glfs_fd *glfd)
+{
+ if (!glfd)
+ return;
+
+ glfs_lock (glfd->fs);
+ {
+ list_del_init (&glfd->openfds);
+ }
+ glfs_unlock (glfd->fs);
+
+ if (glfd->fd)
+ fd_unref (glfd->fd);
+
+ GF_FREE (glfd->readdirbuf);
+
+ GF_FREE (glfd);
+}
+
+
+static void *
+glfs_poller (void *data)
+{
+ struct glfs *fs = NULL;
+
+ fs = data;
+
+ event_dispatch (fs->ctx->event_pool);
+
+ return NULL;
+}
+
+
+struct glfs *
+glfs_new (const char *volname)
+{
+ struct glfs *fs = NULL;
+ int ret = -1;
+ glusterfs_ctx_t *ctx = NULL;
+
+ ctx = glusterfs_ctx_new ();
+ if (!ctx) {
+ return NULL;
+ }
+
+#ifdef DEBUG
+ gf_mem_acct_enable_set (ctx);
+#endif
+
+ /* first globals init, for gf_mem_acct_enable_set () */
+ ret = glusterfs_globals_init (ctx);
+ if (ret)
+ return NULL;
+
+ THIS->ctx = ctx;
+
+ /* then ctx_defaults_init, for xlator_mem_acct_init(THIS) */
+ ret = glusterfs_ctx_defaults_init (ctx);
+ if (ret)
+ return NULL;
+
+ fs = GF_CALLOC (1, sizeof (*fs), glfs_mt_glfs_t);
+ if (!fs)
+ return NULL;
+ fs->ctx = ctx;
+
+ glfs_set_logging (fs, "/dev/null", 0);
+
+ fs->ctx->cmd_args.volfile_id = gf_strdup (volname);
+
+ fs->volname = gf_strdup (volname);
+
+ pthread_mutex_init (&fs->mutex, NULL);
+ pthread_cond_init (&fs->cond, NULL);
+
+ INIT_LIST_HEAD (&fs->openfds);
+
+ return fs;
+}
+
+
+int
+glfs_set_volfile (struct glfs *fs, const char *volfile)
+{
+ cmd_args_t *cmd_args = NULL;
+
+ cmd_args = &fs->ctx->cmd_args;
+
+ if (vol_assigned (cmd_args))
+ return -1;
+
+ cmd_args->volfile = gf_strdup (volfile);
+
+ return 0;
+}
+
+
+int
+glfs_set_volfile_server (struct glfs *fs, const char *transport,
+ const char *host, int port)
+{
+ cmd_args_t *cmd_args = NULL;
+
+ cmd_args = &fs->ctx->cmd_args;
+
+ if (vol_assigned (cmd_args))
+ return -1;
+
+ cmd_args->volfile_server = gf_strdup (host);
+ cmd_args->volfile_server_transport = gf_strdup (transport);
+ cmd_args->volfile_server_port = port;
+ cmd_args->max_connect_attempts = 2;
+
+ return 0;
+}
+
+
+int
+glfs_set_logging (struct glfs *fs, const char *logfile, int loglevel)
+{
+ int ret = 0;
+ char *tmplog = NULL;
+
+ if (!logfile) {
+ ret = gf_set_log_file_path (&fs->ctx->cmd_args);
+ if (ret)
+ goto out;
+ tmplog = fs->ctx->cmd_args.log_file;
+ } else {
+ tmplog = (char *)logfile;
+ }
+
+ ret = gf_log_init (fs->ctx, tmplog, NULL);
+ if (ret)
+ goto out;
+
+ if (loglevel >= 0)
+ gf_log_set_loglevel (loglevel);
+
+out:
+ return ret;
+}
+
+
+int
+glfs_init_wait (struct glfs *fs)
+{
+ int ret = -1;
+
+ /* Always a top-down call, use glfs_lock() */
+ glfs_lock (fs);
+ {
+ while (!fs->init)
+ pthread_cond_wait (&fs->cond,
+ &fs->mutex);
+ ret = fs->ret;
+ errno = fs->err;
+ }
+ glfs_unlock (fs);
+
+ return ret;
+}
+
+
+void
+glfs_init_done (struct glfs *fs, int ret)
+{
+ glfs_init_cbk init_cbk;
+
+ if (!fs) {
+ gf_log ("glfs", GF_LOG_ERROR,
+ "fs is NULL");
+ goto out;
+ }
+
+ init_cbk = fs->init_cbk;
+
+ /* Always a bottom-up call, use mutex_lock() */
+ pthread_mutex_lock (&fs->mutex);
+ {
+ fs->init = 1;
+ fs->ret = ret;
+ fs->err = errno;
+
+ if (!init_cbk)
+ pthread_cond_broadcast (&fs->cond);
+ }
+ pthread_mutex_unlock (&fs->mutex);
+
+ if (init_cbk)
+ init_cbk (fs, ret);
+out:
+ return;
+}
+
+
+int
+glfs_init_common (struct glfs *fs)
+{
+ int ret = -1;
+
+ ret = create_master (fs);
+ if (ret)
+ return ret;
+
+ ret = gf_thread_create (&fs->poller, NULL, glfs_poller, fs);
+ if (ret)
+ return ret;
+
+ ret = glfs_volumes_init (fs);
+ if (ret)
+ return ret;
+
+ fs->dev_id = gf_dm_hashfn (fs->volname, strlen (fs->volname));
+ return ret;
+}
+
+
+int
+glfs_init_async (struct glfs *fs, glfs_init_cbk cbk)
+{
+ int ret = -1;
+
+ fs->init_cbk = cbk;
+
+ ret = glfs_init_common (fs);
+
+ return ret;
+}
+
+
+int
+glfs_init (struct glfs *fs)
+{
+ int ret = -1;
+
+ ret = glfs_init_common (fs);
+ if (ret)
+ return ret;
+
+ ret = glfs_init_wait (fs);
+
+ return ret;
+}
+
+
+int
+glfs_fini (struct glfs *fs)
+{
+ int ret = -1;
+ int countdown = 100;
+ xlator_t *subvol = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ call_pool_t *call_pool = NULL;
+
+ ctx = fs->ctx;
+
+ if (ctx->mgmt) {
+ rpc_clnt_disable (ctx->mgmt);
+ ctx->mgmt = NULL;
+ }
+
+ __glfs_entry_fs (fs);
+
+ call_pool = fs->ctx->pool;
+
+ while (countdown--) {
+ /* give some time for background frames to finish */
+ if (!call_pool->cnt)
+ break;
+ usleep (100000);
+ }
+ /* leaked frames may exist, we ignore */
+
+ /*We deem glfs_fini as successful if there are no pending frames in the call
+ *pool*/
+ ret = (call_pool->cnt == 0)? 0: -1;
+
+ subvol = glfs_active_subvol (fs);
+ if (subvol) {
+ /* PARENT_DOWN within glfs_subvol_done() is issued only
+ on graph switch (new graph should activiate and
+ decrement the extra @winds count taken in glfs_graph_setup()
+
+ Since we are explicitly destroying, PARENT_DOWN is necessary
+ */
+ xlator_notify (subvol, GF_EVENT_PARENT_DOWN, subvol, 0);
+ /* TBD: wait for CHILD_DOWN before exiting, in case of
+ asynchronous cleanup like graceful socket disconnection
+ in the future.
+ */
+ }
+
+ glfs_subvol_done (fs, subvol);
+
+ if (ctx->log.logfile)
+ fclose (ctx->log.logfile);
+
+ return ret;
+}
diff --git a/api/src/glfs.h b/api/src/glfs.h
new file mode 100644
index 000000000..18fda496e
--- /dev/null
+++ b/api/src/glfs.h
@@ -0,0 +1,581 @@
+/*
+ Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef _GLFS_H
+#define _GLFS_H
+
+/*
+ Enforce the following flags as libgfapi is built
+ with them, and we want programs linking against them to also
+ be built with these flags. This is necessary as it affects
+ some of the structures defined in libc headers (like struct stat)
+ and those definitions need to be consistently compiled in
+ both the library and the application.
+*/
+
+#ifndef _FILE_OFFSET_BITS
+#define _FILE_OFFSET_BITS 64
+#endif
+
+#ifndef __USE_FILE_OFFSET64
+#define __USE_FILE_OFFSET64
+#endif
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/uio.h>
+#include <unistd.h>
+#include <sys/cdefs.h>
+#include <dirent.h>
+#include <sys/statvfs.h>
+
+__BEGIN_DECLS
+
+/* The filesystem object. One object per 'virtual mount' */
+struct glfs;
+typedef struct glfs glfs_t;
+
+
+/*
+ SYNOPSIS
+
+ glfs_new: Create a new 'virtual mount' object.
+
+ DESCRIPTION
+
+ This is most likely the very first function you will use. This function
+ will create a new glfs_t (virtual mount) object in memory.
+
+ On this newly created glfs_t, you need to be either set a volfile path
+ (glfs_set_volfile) or a volfile server (glfs_set_volfile_server).
+
+ The glfs_t object needs to be initialized with glfs_init() before you
+ can start issuing file operations on it.
+
+ PARAMETERS
+
+ @volname: Name of the volume. This identifies the server-side volume and
+ the fetched volfile (equivalent of --volfile-id command line
+ parameter to glusterfsd). When used with glfs_set_volfile() the
+ @volname has no effect (except for appearing in log messages).
+
+ RETURN VALUES
+
+ NULL : Out of memory condition.
+ Others : Pointer to the newly created glfs_t virtual mount object.
+
+*/
+
+glfs_t *glfs_new (const char *volname);
+
+
+/*
+ SYNOPSIS
+
+ glfs_set_volfile: Specify the path to the volume specification file.
+
+ DESCRIPTION
+
+ If you are using a static volume specification file (without dynamic
+ volume management abilities from the CLI), then specify the path to
+ the volume specification file.
+
+ This is incompatible with glfs_set_volfile_server().
+
+ PARAMETERS
+
+ @fs: The 'virtual mount' object to be configured with the volume
+ specification file.
+
+ @volfile: Path to the locally available volume specification file.
+
+ RETURN VALUES
+
+ 0 : Success.
+ -1 : Failure. @errno will be set with the type of failure.
+
+*/
+
+int glfs_set_volfile (glfs_t *fs, const char *volfile);
+
+
+/*
+ SYNOPSIS
+
+ glfs_set_volfile_server: Specify the address of management server.
+
+ DESCRIPTION
+
+ This function specifies the address of the management server (glusterd)
+ to connect, and establish the volume configuration. The @volname
+ parameter passed to glfs_new() is the volume which will be virtually
+ mounted as the glfs_t object. All operations performed by the CLI at
+ the management server will automatically be reflected in the 'virtual
+ mount' object as it maintains a connection to glusterd and polls on
+ configuration change notifications.
+
+ This is incompatible with glfs_set_volfile().
+
+ PARAMETERS
+
+ @fs: The 'virtual mount' object to be configured with the volume
+ specification file.
+
+ @transport: String specifying the transport used to connect to the
+ management daemon. Specifying NULL will result in the usage
+ of the default (tcp) transport type. Permitted values
+ are those what you specify as transport-type in a volume
+ specification file (e.g "tcp", "rdma", "unix".)
+
+ @host: String specifying the address of where to find the management
+ daemon. Depending on the transport type this would either be
+ an FQDN (e.g: "storage01.company.com"), ASCII encoded IP
+ address "192.168.22.1", or a UNIX domain socket path (e.g
+ "/tmp/glusterd.socket".)
+
+ @port: The TCP port number where gluster management daemon is listening.
+ Specifying 0 uses the default port number GF_DEFAULT_BASE_PORT.
+ This parameter is unused if you are using a UNIX domain socket.
+
+ RETURN VALUES
+
+ 0 : Success.
+ -1 : Failure. @errno will be set with the type of failure.
+
+*/
+
+int glfs_set_volfile_server (glfs_t *fs, const char *transport,
+ const char *host, int port);
+
+
+/*
+ SYNOPSIS
+
+ glfs_set_logging: Specify logging parameters.
+
+ DESCRIPTION
+
+ This function specifies logging parameters for the virtual mount.
+ Default log file is /dev/null.
+
+ PARAMETERS
+
+ @fs: The 'virtual mount' object to be configured with the logging parameters.
+
+ @logfile: The logfile to be used for logging. Will be created if it does not
+ already exist (provided system permissions allow). If NULL, a new
+ logfile will be created in default log directory associated with
+ the glusterfs installation.
+
+ @loglevel: Numerical value specifying the degree of verbosity. Higher the
+ value, more verbose the logging.
+
+ RETURN VALUES
+
+ 0 : Success.
+ -1 : Failure. @errno will be set with the type of failure.
+
+*/
+
+int glfs_set_logging (glfs_t *fs, const char *logfile, int loglevel);
+
+
+/*
+ SYNOPSIS
+
+ glfs_init: Initialize the 'virtual mount'
+
+ DESCRIPTION
+
+ This function initializes the glfs_t object. This consists of many steps:
+ - Spawn a poll-loop thread.
+ - Establish connection to management daemon and receive volume specification.
+ - Construct translator graph and initialize graph.
+ - Wait for initialization (connecting to all bricks) to complete.
+
+ PARAMETERS
+
+ @fs: The 'virtual mount' object to be initialized.
+
+ RETURN VALUES
+
+ 0 : Success.
+ -1 : Failure. @errno will be set with the type of failure.
+
+*/
+
+int glfs_init (glfs_t *fs);
+
+
+/*
+ SYNOPSIS
+
+ glfs_fini: Cleanup and destroy the 'virtual mount'
+
+ DESCRIPTION
+
+ This function attempts to gracefully destroy glfs_t object. An attempt is
+ made to wait for all background processing to complete before returning.
+
+ glfs_fini() must be called after all operations on glfs_t is finished.
+
+ IMPORTANT
+
+ IT IS NECESSARY TO CALL glfs_fini() ON ALL THE INITIALIZED glfs_t
+ OBJECTS BEFORE TERMINATING THE PROGRAM. THERE MAY BE CACHED AND
+ UNWRITTEN / INCOMPLETE OPERATIONS STILL IN PROGRESS EVEN THOUGH THE
+ API CALLS HAVE RETURNED. glfs_fini() WILL WAIT FOR BACKGROUND OPERATIONS
+ TO COMPLETE BEFORE RETURNING, THEREBY MAKING IT SAFE FOR THE PROGRAM TO
+ EXIT.
+
+ PARAMETERS
+
+ @fs: The 'virtual mount' object to be destroyed.
+
+ RETURN VALUES
+
+ 0 : Success.
+*/
+
+int glfs_fini (glfs_t *fs);
+
+/*
+ * FILE OPERATION
+ *
+ * What follows are filesystem operations performed on the
+ * 'virtual mount'. The calls here are kept as close to
+ * the POSIX system calls as possible.
+ *
+ * Notes:
+ *
+ * - All paths specified, even if absolute, are relative to the
+ * root of the virtual mount and not the system root (/).
+ *
+ */
+
+/* The file descriptor object. One per open file/directory. */
+
+struct glfs_fd;
+typedef struct glfs_fd glfs_fd_t;
+
+/*
+ * PER THREAD IDENTITY MODIFIERS
+ *
+ * The following operations enable to set a per thread identity context
+ * for the glfs APIs to perform operations as. The calls here are kept as close
+ * to POSIX equivalents as possible.
+ *
+ * NOTES:
+ *
+ * - setgroups is a per thread setting, hence this is named as fsgroups to be
+ * close in naming to the fs(u/g)id APIs
+ * - Typical mode of operation is to set the IDs as required, with the
+ * supplementary groups being optionally set, make the glfs call and post the
+ * glfs operation set them back to eu/gid or uid/gid as appropriate to the
+ * caller
+ * - The groups once set, need to be unset by setting the size to 0 (in which
+ * case the list argument is a do not care)
+ * - Once a process for a thread of operation choses to set the IDs, all glfs
+ * calls made from that thread would default to the IDs set for the thread.
+ * As a result use these APIs with care and ensure that the set IDs are
+ * reverted to global process defaults as required.
+ *
+ */
+int glfs_setfsuid (uid_t fsuid);
+int glfs_setfsgid (gid_t fsgid);
+int glfs_setfsgroups (size_t size, const gid_t *list);
+
+/*
+ SYNOPSIS
+
+ glfs_open: Open a file.
+
+ DESCRIPTION
+
+ This function opens a file on a virtual mount.
+
+ PARAMETERS
+
+ @fs: The 'virtual mount' object to be initialized.
+
+ @path: Path of the file within the virtual mount.
+
+ @flags: Open flags. See open(2). O_CREAT is not supported.
+ Use glfs_creat() for creating files.
+
+ RETURN VALUES
+
+ NULL : Failure. @errno will be set with the type of failure.
+ Others : Pointer to the opened glfs_fd_t.
+
+ */
+
+glfs_fd_t *glfs_open (glfs_t *fs, const char *path, int flags);
+
+
+/*
+ SYNOPSIS
+
+ glfs_creat: Create a file.
+
+ DESCRIPTION
+
+ This function opens a file on a virtual mount.
+
+ PARAMETERS
+
+ @fs: The 'virtual mount' object to be initialized.
+
+ @path: Path of the file within the virtual mount.
+
+ @mode: Permission of the file to be created.
+
+ @flags: Create flags. See open(2). O_EXCL is supported.
+
+ RETURN VALUES
+
+ NULL : Failure. @errno will be set with the type of failure.
+ Others : Pointer to the opened glfs_fd_t.
+
+ */
+
+glfs_fd_t *glfs_creat (glfs_t *fs, const char *path, int flags,
+ mode_t mode);
+
+int glfs_close (glfs_fd_t *fd);
+
+glfs_t *glfs_from_glfd (glfs_fd_t *fd);
+
+int glfs_set_xlator_option (glfs_t *fs, const char *xlator, const char *key,
+ const char *value);
+
+/*
+
+ glfs_io_cbk
+
+ The following is the function type definition of the callback
+ function pointer which has to be provided by the caller to the
+ *_async() versions of the IO calls.
+
+ The callback function is called on completion of the requested
+ IO, and the appropriate return value is returned in @ret.
+
+ In case of an error in completing the IO, @ret will be -1 and
+ @errno will be set with the appropriate error.
+
+ @ret will be same as the return value of the non _async() variant
+ of the particular call
+
+ @data is the same context pointer provided by the caller at the
+ time of issuing the async IO call. This can be used by the
+ caller to differentiate different instances of the async requests
+ in a common callback function.
+*/
+
+typedef void (*glfs_io_cbk) (glfs_fd_t *fd, ssize_t ret, void *data);
+
+// glfs_{read,write}[_async]
+
+ssize_t glfs_read (glfs_fd_t *fd, void *buf, size_t count, int flags);
+ssize_t glfs_write (glfs_fd_t *fd, const void *buf, size_t count, int flags);
+int glfs_read_async (glfs_fd_t *fd, void *buf, size_t count, int flags,
+ glfs_io_cbk fn, void *data);
+int glfs_write_async (glfs_fd_t *fd, const void *buf, size_t count, int flags,
+ glfs_io_cbk fn, void *data);
+
+// glfs_{read,write}v[_async]
+
+ssize_t glfs_readv (glfs_fd_t *fd, const struct iovec *iov, int iovcnt,
+ int flags);
+ssize_t glfs_writev (glfs_fd_t *fd, const struct iovec *iov, int iovcnt,
+ int flags);
+int glfs_readv_async (glfs_fd_t *fd, const struct iovec *iov, int count,
+ int flags, glfs_io_cbk fn, void *data);
+int glfs_writev_async (glfs_fd_t *fd, const struct iovec *iov, int count,
+ int flags, glfs_io_cbk fn, void *data);
+
+// glfs_p{read,write}[_async]
+
+ssize_t glfs_pread (glfs_fd_t *fd, void *buf, size_t count, off_t offset,
+ int flags);
+ssize_t glfs_pwrite (glfs_fd_t *fd, const void *buf, size_t count,
+ off_t offset, int flags);
+int glfs_pread_async (glfs_fd_t *fd, void *buf, size_t count, off_t offset,
+ int flags, glfs_io_cbk fn, void *data);
+int glfs_pwrite_async (glfs_fd_t *fd, const void *buf, int count, off_t offset,
+ int flags, glfs_io_cbk fn, void *data);
+
+// glfs_p{read,write}v[_async]
+
+ssize_t glfs_preadv (glfs_fd_t *fd, const struct iovec *iov, int iovcnt,
+ off_t offset, int flags);
+ssize_t glfs_pwritev (glfs_fd_t *fd, const struct iovec *iov, int iovcnt,
+ off_t offset, int flags);
+int glfs_preadv_async (glfs_fd_t *fd, const struct iovec *iov, int count,
+ off_t offset, int flags, glfs_io_cbk fn, void *data);
+int glfs_pwritev_async (glfs_fd_t *fd, const struct iovec *iov, int count,
+ off_t offset, int flags, glfs_io_cbk fn, void *data);
+
+
+off_t glfs_lseek (glfs_fd_t *fd, off_t offset, int whence);
+
+int glfs_truncate (glfs_t *fs, const char *path, off_t length);
+
+int glfs_ftruncate (glfs_fd_t *fd, off_t length);
+int glfs_ftruncate_async (glfs_fd_t *fd, off_t length, glfs_io_cbk fn,
+ void *data);
+
+int glfs_lstat (glfs_t *fs, const char *path, struct stat *buf);
+int glfs_stat (glfs_t *fs, const char *path, struct stat *buf);
+int glfs_fstat (glfs_fd_t *fd, struct stat *buf);
+
+int glfs_fsync (glfs_fd_t *fd);
+int glfs_fsync_async (glfs_fd_t *fd, glfs_io_cbk fn, void *data);
+
+int glfs_fdatasync (glfs_fd_t *fd);
+int glfs_fdatasync_async (glfs_fd_t *fd, glfs_io_cbk fn, void *data);
+
+int glfs_access (glfs_t *fs, const char *path, int mode);
+
+int glfs_symlink (glfs_t *fs, const char *oldpath, const char *newpath);
+
+int glfs_readlink (glfs_t *fs, const char *path, char *buf, size_t bufsiz);
+
+int glfs_mknod (glfs_t *fs, const char *path, mode_t mode, dev_t dev);
+
+int glfs_mkdir (glfs_t *fs, const char *path, mode_t mode);
+
+int glfs_unlink (glfs_t *fs, const char *path);
+
+int glfs_rmdir (glfs_t *fs, const char *path);
+
+int glfs_rename (glfs_t *fs, const char *oldpath, const char *newpath);
+
+int glfs_link (glfs_t *fs, const char *oldpath, const char *newpath);
+
+glfs_fd_t *glfs_opendir (glfs_t *fs, const char *path);
+
+/*
+ * @glfs_readdir_r and @glfs_readdirplus_r ARE thread safe AND re-entrant,
+ * but the interface has ambiguity about the size of @dirent to be allocated
+ * before calling the APIs. 512 byte buffer (for @dirent) is sufficient for
+ * all known systems which are tested againt glusterfs/gfapi, but may be
+ * insufficient in the future.
+ */
+
+int glfs_readdir_r (glfs_fd_t *fd, struct dirent *dirent,
+ struct dirent **result);
+
+int glfs_readdirplus_r (glfs_fd_t *fd, struct stat *stat, struct dirent *dirent,
+ struct dirent **result);
+
+/*
+ * @glfs_readdir and @glfs_readdirplus are NEITHER thread safe NOR re-entrant
+ * when called on the same directory handle. However they ARE thread safe
+ * AND re-entrant when called on different directory handles (which may be
+ * referring to the same directory too.)
+ */
+
+struct dirent *glfs_readdir (glfs_fd_t *fd);
+
+struct dirent *glfs_readdirplus (glfs_fd_t *fd, struct stat *stat);
+
+long glfs_telldir (glfs_fd_t *fd);
+
+void glfs_seekdir (glfs_fd_t *fd, long offset);
+
+int glfs_closedir (glfs_fd_t *fd);
+
+int glfs_statvfs (glfs_t *fs, const char *path, struct statvfs *buf);
+
+int glfs_chmod (glfs_t *fs, const char *path, mode_t mode);
+
+int glfs_fchmod (glfs_fd_t *fd, mode_t mode);
+
+int glfs_chown (glfs_t *fs, const char *path, uid_t uid, gid_t gid);
+
+int glfs_lchown (glfs_t *fs, const char *path, uid_t uid, gid_t gid);
+
+int glfs_fchown (glfs_fd_t *fd, uid_t uid, gid_t gid);
+
+int glfs_utimens (glfs_t *fs, const char *path, struct timespec times[2]);
+
+int glfs_lutimens (glfs_t *fs, const char *path, struct timespec times[2]);
+
+int glfs_futimens (glfs_fd_t *fd, struct timespec times[2]);
+
+ssize_t glfs_getxattr (glfs_t *fs, const char *path, const char *name,
+ void *value, size_t size);
+
+ssize_t glfs_lgetxattr (glfs_t *fs, const char *path, const char *name,
+ void *value, size_t size);
+
+ssize_t glfs_fgetxattr (glfs_fd_t *fd, const char *name,
+ void *value, size_t size);
+
+ssize_t glfs_listxattr (glfs_t *fs, const char *path, void *value, size_t size);
+
+ssize_t glfs_llistxattr (glfs_t *fs, const char *path, void *value,
+ size_t size);
+
+ssize_t glfs_flistxattr (glfs_fd_t *fd, void *value, size_t size);
+
+int glfs_setxattr (glfs_t *fs, const char *path, const char *name,
+ const void *value, size_t size, int flags);
+
+int glfs_lsetxattr (glfs_t *fs, const char *path, const char *name,
+ const void *value, size_t size, int flags);
+
+int glfs_fsetxattr (glfs_fd_t *fd, const char *name,
+ const void *value, size_t size, int flags);
+
+int glfs_removexattr (glfs_t *fs, const char *path, const char *name);
+
+int glfs_lremovexattr (glfs_t *fs, const char *path, const char *name);
+
+int glfs_fremovexattr (glfs_fd_t *fd, const char *name);
+
+int glfs_fallocate(glfs_fd_t *fd, int keep_size, off_t offset, size_t len);
+
+int glfs_discard(glfs_fd_t *fd, off_t offset, size_t len);
+
+
+int glfs_discard_async (glfs_fd_t *fd, off_t length, size_t lent,
+ glfs_io_cbk fn, void *data);
+
+int glfs_zerofill(glfs_fd_t *fd, off_t offset, size_t len);
+
+int glfs_zerofill_async (glfs_fd_t *fd, off_t length, size_t len,
+ glfs_io_cbk fn, void *data);
+
+char *glfs_getcwd (glfs_t *fs, char *buf, size_t size);
+
+int glfs_chdir (glfs_t *fs, const char *path);
+
+int glfs_fchdir (glfs_fd_t *fd);
+
+char *glfs_realpath (glfs_t *fs, const char *path, char *resolved_path);
+
+/*
+ * @cmd and @flock are as specified in man fcntl(2).
+ */
+int glfs_posix_lock (glfs_fd_t *fd, int cmd, struct flock *flock);
+
+glfs_fd_t *glfs_dup (glfs_fd_t *fd);
+
+__END_DECLS
+
+#endif /* !_GLFS_H */
diff --git a/argp-standalone/configure.ac b/argp-standalone/configure.ac
index fe54d5ac9..2ecd2a801 100644
--- a/argp-standalone/configure.ac
+++ b/argp-standalone/configure.ac
@@ -8,7 +8,9 @@ AC_CONFIG_SRCDIR([argp-ba.c])
AC_CONFIG_AUX_DIR([.])
AM_INIT_AUTOMAKE
-AM_CONFIG_HEADER(config.h)
+AC_CONFIG_HEADERS(config.h)
+
+m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES(yes)])
# GNU libc defaults to supplying the ISO C library functions only. The
# _GNU_SOURCE define enables these extensions, in particular we want
@@ -20,7 +22,7 @@ AC_GNU_SOURCE
AC_PROG_CC
AC_PROG_MAKE_SET
AC_PROG_RANLIB
-AM_PROG_CC_STDC
+AC_PROG_CC
if test "x$am_cv_prog_cc_stdc" = xno ; then
AC_ERROR([the C compiler doesn't handle ANSI-C])
diff --git a/argp-standalone/strcasecmp.c b/argp-standalone/strcasecmp.c
index bcad7a226..9c1637232 100644
--- a/argp-standalone/strcasecmp.c
+++ b/argp-standalone/strcasecmp.c
@@ -8,6 +8,7 @@
*/
#include <ctype.h>
+int strcasecmp(const char *, const char *);
int strcasecmp(const char *s1, const char *s2)
{
diff --git a/argp-standalone/vsnprintf.c b/argp-standalone/vsnprintf.c
index e9b5f192b..33c9a5d00 100644
--- a/argp-standalone/vsnprintf.c
+++ b/argp-standalone/vsnprintf.c
@@ -551,7 +551,7 @@ static LDOUBLE abs_val (LDOUBLE value)
return result;
}
-static LDOUBLE pow10 (int exp)
+static LDOUBLE pow10_argp (int exp)
{
LDOUBLE result = 1;
@@ -564,7 +564,7 @@ static LDOUBLE pow10 (int exp)
return result;
}
-static long round (LDOUBLE value)
+static long round_argp (LDOUBLE value)
{
long intpart;
@@ -626,12 +626,12 @@ static int fmtfp (char *buffer, size_t *currlen, size_t maxlen,
/* We "cheat" by converting the fractional part to integer by
* multiplying by a factor of 10
*/
- fracpart = round ((pow10 (max)) * (ufvalue - intpart));
+ fracpart = round_argp ((pow10_argp (max)) * (ufvalue - intpart));
- if (fracpart >= pow10 (max))
+ if (fracpart >= pow10_argp (max))
{
intpart++;
- fracpart -= pow10 (max);
+ fracpart -= pow10_argp (max);
}
#ifdef DEBUG_SNPRINTF
diff --git a/auth/addr/src/Makefile.am b/auth/addr/src/Makefile.am
deleted file mode 100644
index cca406151..000000000
--- a/auth/addr/src/Makefile.am
+++ /dev/null
@@ -1,12 +0,0 @@
-auth_LTLIBRARIES = addr.la
-authdir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/auth
-
-addr_la_LDFLAGS = -module -avoidversion
-
-addr_la_SOURCES = addr.c
-addr_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
-
-CLEANFILES =
diff --git a/auth/addr/src/addr.c b/auth/addr/src/addr.c
deleted file mode 100644
index 08e79d40a..000000000
--- a/auth/addr/src/addr.c
+++ /dev/null
@@ -1,224 +0,0 @@
-/*
- Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <fnmatch.h>
-#include <sys/socket.h>
-#include <netdb.h>
-#include "authenticate.h"
-#include "dict.h"
-
-#define ADDR_DELIMITER " ,"
-#define PRIVILEGED_PORT_CEILING 1024
-
-#ifndef AF_INET_SDP
-#define AF_INET_SDP 27
-#endif
-
-auth_result_t
-gf_auth (dict_t *input_params, dict_t *config_params)
-{
- int ret = 0;
- char *name = NULL;
- char *searchstr = NULL;
- char peer_addr[UNIX_PATH_MAX];
- data_t *peer_info_data = NULL;
- peer_info_t *peer_info = NULL;
- data_t *allow_addr = NULL, *reject_addr = NULL;
- char is_inet_sdp = 0;
-
- name = data_to_str (dict_get (input_params, "remote-subvolume"));
- if (!name) {
- gf_log ("authenticate/addr",
- GF_LOG_ERROR,
- "remote-subvolume not specified");
- return AUTH_DONT_CARE;
- }
-
- ret = asprintf (&searchstr, "auth.addr.%s.allow", name);
- if (-1 == ret) {
- gf_log ("auth/addr", GF_LOG_ERROR,
- "asprintf failed while setting search string");
- return AUTH_DONT_CARE;
- }
- allow_addr = dict_get (config_params,
- searchstr);
- free (searchstr);
-
- ret = asprintf (&searchstr, "auth.addr.%s.reject", name);
- if (-1 == ret) {
- gf_log ("auth/addr", GF_LOG_ERROR,
- "asprintf failed while setting search string");
- return AUTH_DONT_CARE;
- }
- reject_addr = dict_get (config_params,
- searchstr);
- free (searchstr);
-
- if (!allow_addr) {
- /* TODO: backword compatibility */
- ret = asprintf (&searchstr, "auth.ip.%s.allow", name);
- if (-1 == ret) {
- gf_log ("auth/addr", GF_LOG_ERROR,
- "asprintf failed while setting search string");
- return AUTH_DONT_CARE;
- }
- allow_addr = dict_get (config_params, searchstr);
- free (searchstr);
- }
-
- if (!(allow_addr || reject_addr)) {
- gf_log ("auth/addr", GF_LOG_DEBUG,
- "none of the options auth.addr.%s.allow or "
- "auth.addr.%s.reject specified, returning auth_dont_care",
- name, name);
- return AUTH_DONT_CARE;
- }
-
- peer_info_data = dict_get (input_params, "peer-info");
- if (!peer_info_data) {
- gf_log ("authenticate/addr",
- GF_LOG_ERROR,
- "peer-info not present");
- return AUTH_DONT_CARE;
- }
-
- peer_info = data_to_ptr (peer_info_data);
-
- switch (((struct sockaddr *) &peer_info->sockaddr)->sa_family)
- {
- case AF_INET_SDP:
- is_inet_sdp = 1;
- ((struct sockaddr *) &peer_info->sockaddr)->sa_family = AF_INET;
-
- case AF_INET:
- case AF_INET6:
- {
- char *service;
- uint16_t peer_port;
- strcpy (peer_addr, peer_info->identifier);
- service = strrchr (peer_addr, ':');
- *service = '\0';
- service ++;
-
- if (is_inet_sdp) {
- ((struct sockaddr *) &peer_info->sockaddr)->sa_family = AF_INET_SDP;
- }
-
- peer_port = atoi (service);
- if (peer_port >= PRIVILEGED_PORT_CEILING) {
- gf_log ("auth/addr", GF_LOG_ERROR,
- "client is bound to port %d which is not privileged",
- peer_port);
- return AUTH_DONT_CARE;
- }
- break;
-
- case AF_UNIX:
- strcpy (peer_addr, peer_info->identifier);
- break;
-
- default:
- gf_log ("authenticate/addr", GF_LOG_ERROR,
- "unknown address family %d",
- ((struct sockaddr *) &peer_info->sockaddr)->sa_family);
- return AUTH_DONT_CARE;
- }
- }
-
- if (reject_addr) {
- char *addr_str = NULL;
- char *tmp;
- char *addr_cpy = strdup (reject_addr->data);
-
- addr_str = strtok_r (addr_cpy, ADDR_DELIMITER, &tmp);
-
- while (addr_str) {
- char negate = 0, match =0;
- gf_log (name, GF_LOG_DEBUG,
- "rejected = \"%s\", received addr = \"%s\"",
- addr_str, peer_addr);
- if (addr_str[0] == '!') {
- negate = 1;
- addr_str++;
- }
-
- match = fnmatch (addr_str,
- peer_addr,
- 0);
- if (negate ? match : !match) {
- free (addr_cpy);
- return AUTH_REJECT;
- }
- addr_str = strtok_r (NULL, ADDR_DELIMITER, &tmp);
- }
- free (addr_cpy);
- }
-
- if (allow_addr) {
- char *addr_str = NULL;
- char *tmp;
- char *addr_cpy = strdup (allow_addr->data);
-
- addr_str = strtok_r (addr_cpy, ADDR_DELIMITER, &tmp);
-
- while (addr_str) {
- char negate = 0, match = 0;
- gf_log (name, GF_LOG_DEBUG,
- "allowed = \"%s\", received addr = \"%s\"",
- addr_str, peer_addr);
- if (addr_str[0] == '!') {
- negate = 1;
- addr_str++;
- }
-
- match = fnmatch (addr_str,
- peer_addr,
- 0);
-
- if (negate ? match : !match) {
- free (addr_cpy);
- return AUTH_ACCEPT;
- }
- addr_str = strtok_r (NULL, ADDR_DELIMITER, &tmp);
- }
- free (addr_cpy);
- }
-
- return AUTH_DONT_CARE;
-}
-
-struct volume_options options[] = {
- { .key = {"auth.addr.*.allow"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {"auth.addr.*.reject"},
- .type = GF_OPTION_TYPE_ANY
- },
- /* Backword compatibility */
- { .key = {"auth.ip.*.allow"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {NULL} }
-};
diff --git a/auth/login/src/Makefile.am b/auth/login/src/Makefile.am
deleted file mode 100644
index eb7b990c2..000000000
--- a/auth/login/src/Makefile.am
+++ /dev/null
@@ -1,13 +0,0 @@
-auth_LTLIBRARIES = login.la
-authdir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/auth
-
-login_la_LDFLAGS = -module -avoidversion
-
-login_la_SOURCES = login.c
-login_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
-
-CLEANFILES =
diff --git a/auth/login/src/login.c b/auth/login/src/login.c
deleted file mode 100644
index ec77fc172..000000000
--- a/auth/login/src/login.c
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <fnmatch.h>
-#include "authenticate.h"
-
-auth_result_t gf_auth (dict_t *input_params, dict_t *config_params)
-{
- int ret = 0;
- char *username = NULL, *password = NULL;
- data_t *allow_user = NULL, *username_data = NULL, *password_data = NULL;
- int32_t result = AUTH_DONT_CARE;
- char *brick_name = NULL, *searchstr = NULL;
-
- username_data = dict_get (input_params, "username");
- if (!username_data)
- return AUTH_DONT_CARE;
-
- username = data_to_str (username_data);
-
- password_data = dict_get (input_params, "password");
- if (!password_data)
- return AUTH_DONT_CARE;
-
- password = data_to_str (password_data);
-
- brick_name = data_to_str (dict_get (input_params, "remote-subvolume"));
- if (!brick_name) {
- gf_log ("auth/login",
- GF_LOG_ERROR,
- "remote-subvolume not specified");
- return AUTH_REJECT;
- }
-
- ret = asprintf (&searchstr, "auth.login.%s.allow", brick_name);
- if (-1 == ret) {
- gf_log ("auth/login", GF_LOG_ERROR,
- "asprintf failed while setting search string");
- return AUTH_DONT_CARE;
- }
-
- allow_user = dict_get (config_params,
- searchstr);
- free (searchstr);
-
- if (allow_user) {
- char *username_str = NULL;
- char *tmp;
- char *username_cpy = strdup (allow_user->data);
-
- username_str = strtok_r (username_cpy, " ,", &tmp);
-
- while (username_str) {
- data_t *passwd_data = NULL;
- if (!fnmatch (username_str,
- username,
- 0)) {
- ret = asprintf (&searchstr, "auth.login.%s.password", username);
- if (-1 == ret) {
- gf_log ("auth/login", GF_LOG_ERROR,
- "asprintf failed while setting search string");
- return AUTH_DONT_CARE;
- }
- passwd_data = dict_get (config_params, searchstr);
- FREE (searchstr);
-
- if (!passwd_data) {
- gf_log ("auth/login",
- GF_LOG_DEBUG,
- "wrong username/password combination");
- result = AUTH_REJECT;
- }
- else
- result = !strcmp (data_to_str (passwd_data), password) ? AUTH_ACCEPT : AUTH_REJECT;
- break;
- }
- username_str = strtok_r (NULL, " ,", &tmp);
- }
- free (username_cpy);
- }
-
- return result;
-}
-
-struct volume_options options[] = {
- { .key = {"auth.login.*.allow"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {"auth.login.*.password"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {NULL} }
-};
diff --git a/autogen.sh b/autogen.sh
index e20408bf2..f937e6be0 100755
--- a/autogen.sh
+++ b/autogen.sh
@@ -1,8 +1,105 @@
#!/bin/sh
-aclocal
-autoheader
-(libtoolize --automake --copy --force || glibtoolize --automake --copy --force)
-autoconf
-automake --add-missing --copy --foreign
+echo
+echo ... GlusterFS autogen ...
+echo
+
+## Check all dependencies are present
+MISSING=""
+
+# Check for aclocal
+env aclocal --version > /dev/null 2>&1
+if [ $? -eq 0 ]; then
+ ACLOCAL=aclocal
+else
+ MISSING="$MISSING aclocal"
+fi
+
+# Check for autoconf
+env autoconf --version > /dev/null 2>&1
+if [ $? -eq 0 ]; then
+ AUTOCONF=autoconf
+else
+ MISSING="$MISSING autoconf"
+fi
+
+# Check for autoheader
+env autoheader --version > /dev/null 2>&1
+if [ $? -eq 0 ]; then
+ AUTOHEADER=autoheader
+else
+ MISSING="$MISSING autoheader"
+fi
+
+# Check for automake
+env automake --version > /dev/null 2>&1
+if [ $? -eq 0 ]; then
+ AUTOMAKE=automake
+else
+ MISSING="$MISSING automake"
+fi
+
+# Check for libtoolize or glibtoolize
+env libtoolize --version > /dev/null 2>&1
+if [ $? -eq 0 ]; then
+ # libtoolize was found, so use it
+ TOOL=libtoolize
+else
+ # libtoolize wasn't found, so check for glibtoolize
+ env glibtoolize --version > /dev/null 2>&1
+ if [ $? -eq 0 ]; then
+ TOOL=glibtoolize
+ else
+ MISSING="$MISSING libtoolize/glibtoolize"
+ fi
+fi
+
+# Check for tar
+env tar --version > /dev/null 2>&1
+if [ $? -ne 0 ]; then
+ MISSING="$MISSING tar"
+fi
+
+## If dependencies are missing, warn the user and abort
+if [ "x$MISSING" != "x" ]; then
+ echo "Aborting."
+ echo
+ echo "The following build tools are missing:"
+ echo
+ for pkg in $MISSING; do
+ echo " * $pkg"
+ done
+ echo
+ echo "Please install them and try again."
+ echo
+ exit 1
+fi
+
+## generate gf-error-codes.h from error-codes.json
+echo "Generate gf-error-codes.h ..."
+if ./gen-headers.py; then
+ if ! mv -fv gf-error-codes.h libglusterfs/src/gf-error-codes.h; then
+ exit 1
+ fi
+else
+ exit 1
+fi
+
+## Do the autogeneration
+echo Running ${ACLOCAL}...
+$ACLOCAL -I ./contrib/aclocal
+echo Running ${AUTOHEADER}...
+$AUTOHEADER
+echo Running ${TOOL}...
+$TOOL --automake --copy --force
+echo Running ${AUTOCONF}...
+$AUTOCONF
+echo Running ${AUTOMAKE}...
+$AUTOMAKE --add-missing --copy --foreign
+
+# Run autogen in the argp-standalone sub-directory
cd argp-standalone;./autogen.sh
+
+# Instruct user on next steps
+echo
+echo "Please proceed with configuring, compiling, and installing."
diff --git a/booster/Makefile.am b/booster/Makefile.am
deleted file mode 100644
index e1c45f305..000000000
--- a/booster/Makefile.am
+++ /dev/null
@@ -1 +0,0 @@
-SUBDIRS=src \ No newline at end of file
diff --git a/booster/src/Makefile.am b/booster/src/Makefile.am
deleted file mode 100644
index 7ecbcd225..000000000
--- a/booster/src/Makefile.am
+++ /dev/null
@@ -1,21 +0,0 @@
-ldpreload_LTLIBRARIES = libglusterfs-booster.la
-ldpreloaddir = $(libdir)/glusterfs
-noinst_HEADERS = booster_fstab.h booster-fd.h
-libglusterfs_booster_la_SOURCES = booster.c booster_stat.c booster_fstab.c booster-fd.c
-libglusterfs_booster_la_CFLAGS = -I$(top_srcdir)/libglusterfsclient/src/ -D_GNU_SOURCE -D$(GF_HOST_OS) -fPIC -Wall \
- -pthread $(GF_BOOSTER_CFLAGS) -shared -nostartfiles
-libglusterfs_booster_la_CPPFLAGS = -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE \
- -I$(top_srcdir)/libglusterfsclient/src \
- -I$(top_srcdir)/libglusterfs/src -DDATADIR=\"$(localstatedir)\" \
- -DCONFDIR=\"$(sysconfdir)/glusterfs\"
-
-libglusterfs_booster_la_LDFLAGS = -module -avoidversion
-libglusterfs_booster_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(top_builddir)/libglusterfsclient/src/libglusterfsclient.la
-
-CLEANFILES =
-
-uninstall-local:
- rm -f $(DESTDIR)$(ldpreloaddir)/glusterfs-booster.so
-
-install-data-hook:
- ln -sf libglusterfs-booster.so $(DESTDIR)$(ldpreloaddir)/glusterfs-booster.so
diff --git a/booster/src/booster-fd.c b/booster/src/booster-fd.c
deleted file mode 100644
index 5e83384ff..000000000
--- a/booster/src/booster-fd.c
+++ /dev/null
@@ -1,299 +0,0 @@
-/*
- Copyright (c) 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-
-#include "booster-fd.h"
-#include <logging.h>
-#include <mem-pool.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <common-utils.h>
-#include <string.h>
-
-#include <assert.h>
-
-extern fd_t *
-fd_ref (fd_t *fd);
-
-extern void
-fd_unref (fd_t *fd);
-/*
- Allocate in memory chunks of power of 2 starting from 1024B
- Assumes fdtable->lock is held
- */
-static inline uint
-gf_roundup_power_of_two (uint nr)
-{
- uint result = 1;
-
- if (nr < 0) {
- gf_log ("booster-fd", GF_LOG_ERROR, "Negative number passed");
- return -1;
- }
-
- while (result <= nr)
- result *= 2;
-
- return result;
-}
-
-int
-booster_fdtable_expand (booster_fdtable_t *fdtable, uint nr)
-{
- fd_t **oldfds = NULL;
- uint oldmax_fds = -1;
- uint cpy = 0;
- int32_t ret = -1;
-
- if (fdtable == NULL || nr < 0) {
- gf_log ("booster-fd", GF_LOG_ERROR, "Invalid argument");
- errno = EINVAL;
- ret = -1;
- goto out;
- }
-
- nr /= (1024 / sizeof (fd_t *));
- nr = gf_roundup_power_of_two (nr + 1);
- nr *= (1024 / sizeof (fd_t *));
-
- oldfds = fdtable->fds;
- oldmax_fds = fdtable->max_fds;
-
- fdtable->fds = CALLOC (nr, sizeof (fd_t *));
- if (fdtable->fds == NULL) {
- gf_log ("booster-fd", GF_LOG_ERROR, "Memory allocation failed");
- fdtable->fds = oldfds;
- oldfds = NULL;
- ret = -1;
- goto out;
- }
-
- fdtable->max_fds = nr;
-
- if (oldfds) {
- cpy = oldmax_fds * sizeof (fd_t *);
- memcpy (fdtable->fds, oldfds, cpy);
- }
-
- gf_log ("booster-fd", GF_LOG_DEBUG, "FD-table expanded: Old: %d,New: %d"
- , oldmax_fds, nr);
- ret = 0;
-out:
- FREE (oldfds);
-
- return ret;
-}
-
-booster_fdtable_t *
-booster_fdtable_alloc (void)
-{
- booster_fdtable_t *fdtable = NULL;
- int32_t ret = -1;
-
- fdtable = CALLOC (1, sizeof (*fdtable));
- GF_VALIDATE_OR_GOTO ("booster-fd", fdtable, out);
-
- LOCK_INIT (&fdtable->lock);
-
- LOCK (&fdtable->lock);
- {
- ret = booster_fdtable_expand (fdtable, 0);
- }
- UNLOCK (&fdtable->lock);
-
- if (ret == -1) {
- gf_log ("booster-fd", GF_LOG_ERROR, "FD-table allocation "
- "failed");
- FREE (fdtable);
- fdtable = NULL;
- }
-
-out:
- return fdtable;
-}
-
-fd_t **
-__booster_fdtable_get_all_fds (booster_fdtable_t *fdtable, uint *count)
-{
- fd_t **fds = NULL;
-
- if (count == NULL)
- goto out;
-
- fds = fdtable->fds;
- fdtable->fds = calloc (fdtable->max_fds, sizeof (fd_t *));
- *count = fdtable->max_fds;
-
-out:
- return fds;
-}
-
-fd_t **
-booster_fdtable_get_all_fds (booster_fdtable_t *fdtable, uint *count)
-{
- fd_t **fds = NULL;
- if (!fdtable)
- return NULL;
-
- LOCK (&fdtable->lock);
- {
- fds = __booster_fdtable_get_all_fds (fdtable, count);
- }
- UNLOCK (&fdtable->lock);
-
- return fds;
-}
-
-void
-booster_fdtable_destroy (booster_fdtable_t *fdtable)
-{
- fd_t *fd = NULL;
- fd_t **fds = NULL;
- uint fd_count = 0;
- int i = 0;
-
- if (!fdtable)
- return;
-
- LOCK (&fdtable->lock);
- {
- fds = __booster_fdtable_get_all_fds (fdtable, &fd_count);
- FREE (fdtable->fds);
- }
- UNLOCK (&fdtable->lock);
-
- if (!fds)
- goto free_table;
-
- for (i = 0; i < fd_count; i++) {
- fd = fds[i];
- if (fd != NULL)
- fd_unref (fd);
- }
- FREE (fds);
-free_table:
- LOCK_DESTROY (&fdtable->lock);
- FREE (fdtable);
-}
-
-int
-booster_fd_unused_get (booster_fdtable_t *fdtable, fd_t *fdptr, int fd)
-{
- int ret = -1;
- int error = 0;
-
- if (fdtable == NULL || fdptr == NULL || fd < 0) {
- gf_log ("booster-fd", GF_LOG_ERROR, "invalid argument");
- errno = EINVAL;
- return -1;
- }
-
- gf_log ("booster-fd", GF_LOG_DEBUG, "Requested fd: %d", fd);
- LOCK (&fdtable->lock);
- {
- while (fdtable->max_fds < fd) {
- error = 0;
- error = booster_fdtable_expand (fdtable,
- fdtable->max_fds + 1);
- if (error) {
- gf_log ("booster-fd", GF_LOG_ERROR,
- "Cannot expand fdtable:%s",
- strerror (error));
- goto err;
- }
- }
-
- if (!fdtable->fds[fd]) {
- fdtable->fds[fd] = fdptr;
- fd_ref (fdptr);
- ret = fd;
- } else
- gf_log ("booster-fd", GF_LOG_ERROR, "Cannot allocate fd"
- " %d (slot not empty in fdtable)", fd);
- }
-err:
- UNLOCK (&fdtable->lock);
-
- return ret;
-}
-
-void
-booster_fd_put (booster_fdtable_t *fdtable, int fd)
-{
- fd_t *fdptr = NULL;
- if (fdtable == NULL || fd < 0) {
- gf_log ("booster-fd", GF_LOG_ERROR, "invalid argument");
- return;
- }
-
- gf_log ("booster-fd", GF_LOG_DEBUG, "FD put: %d", fd);
- if (!(fd < fdtable->max_fds)) {
- gf_log ("booster-fd", GF_LOG_ERROR, "FD not in booster fd"
- " table");
- return;
- }
-
- LOCK (&fdtable->lock);
- {
- fdptr = fdtable->fds[fd];
- fdtable->fds[fd] = NULL;
- }
- UNLOCK (&fdtable->lock);
-
- if (fdptr)
- fd_unref (fdptr);
-}
-
-fd_t *
-booster_fdptr_get (booster_fdtable_t *fdtable, int fd)
-{
- fd_t *fdptr = NULL;
-
- if (fdtable == NULL || fd < 0) {
- gf_log ("booster-fd", GF_LOG_ERROR, "invalid argument");
- errno = EINVAL;
- return NULL;
- }
-
- gf_log ("booster-fd", GF_LOG_DEBUG, "FD ptr request: %d", fd);
- if (!(fd < fdtable->max_fds)) {
- gf_log ("booster-fd", GF_LOG_ERROR, "FD not in booster fd"
- " table");
- errno = EINVAL;
- return NULL;
- }
-
- LOCK (&fdtable->lock);
- {
- fdptr = fdtable->fds[fd];
- if (fdptr)
- fd_ref (fdptr);
- }
- UNLOCK (&fdtable->lock);
-
- return fdptr;
-}
-
-void
-booster_fdptr_put (fd_t *booster_fd)
-{
- if (booster_fd)
- fd_unref (booster_fd);
-}
diff --git a/booster/src/booster-fd.h b/booster/src/booster-fd.h
deleted file mode 100644
index 43592e825..000000000
--- a/booster/src/booster-fd.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- Copyright (c) 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _BOOSTER_FD_H
-#define _BOOSTER_FD_H
-
-#include <libglusterfsclient.h>
-#include <locking.h>
-#include <list.h>
-
-/* This struct must be updated if the fd_t in fd.h changes.
- * We cannot include those headers here because unistd.h, included
- * by glusterfs headers, conflicts with the syscall prototypes we
- * define for booster.
- */
-struct _fd {
- pid_t pid;
- int32_t flags;
- int32_t refcount;
- struct list_head inode_list;
- struct _inode *inode;
- struct _dict *ctx;
- gf_lock_t lock; /* used ONLY for manipulating
- 'struct _fd_ctx' array (_ctx).*/
- struct _fd_ctx *_ctx;
-};
-typedef struct _fd fd_t;
-
-
-struct _booster_fdtable {
- int refcount;
- unsigned int max_fds;
- gf_lock_t lock;
- fd_t **fds;
-};
-typedef struct _booster_fdtable booster_fdtable_t;
-
-extern int
-booster_fd_unused_get (booster_fdtable_t *fdtable, fd_t *fdptr, int fd);
-
-extern void
-booster_fd_put (booster_fdtable_t *fdtable, int fd);
-
-extern fd_t *
-booster_fdptr_get (booster_fdtable_t *fdtable, int fd);
-
-extern void
-booster_fdptr_put (fd_t *fdptr);
-
-extern void
-booster_fdtable_destroy (booster_fdtable_t *fdtable);
-
-booster_fdtable_t *
-booster_fdtable_alloc (void);
-
-#endif /* #ifndef _BOOSTER_FD_H */
diff --git a/booster/src/booster.c b/booster/src/booster.c
deleted file mode 100644
index ff4da3a1b..000000000
--- a/booster/src/booster.c
+++ /dev/null
@@ -1,2745 +0,0 @@
-/*
- Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <dlfcn.h>
-#include <sys/types.h>
-#include <sys/uio.h>
-#include <stdio.h>
-#include <stdarg.h>
-#include <stdlib.h>
-#include <inttypes.h>
-#include <libglusterfsclient.h>
-#include <list.h>
-#include <pthread.h>
-#include <sys/xattr.h>
-#include <string.h>
-#include <assert.h>
-#include <errno.h>
-#include <ctype.h>
-#include <logging.h>
-#include <utime.h>
-#include <dirent.h>
-#include <sys/statfs.h>
-#include <sys/statvfs.h>
-#include <fcntl.h>
-#include "booster-fd.h"
-
-#ifndef GF_UNIT_KB
-#define GF_UNIT_KB 1024
-#endif
-
-/* attr constructor registers this function with libc's
- * _init function as a function that must be called before
- * the main() of the program.
- */
-static void booster_lib_init (void) __attribute__((constructor));
-
-extern fd_t *
-fd_ref (fd_t *fd);
-
-extern void
-fd_unref (fd_t *fd);
-
-extern int pipe (int filedes[2]);
-/* We define these flags so that we can remove fcntl.h from the include path.
- * fcntl.h has certain defines and other lines of code that redirect the
- * application's open and open64 calls to the syscalls defined by
- * libc, for us, thats not a Good Thing (TM).
- */
-#ifndef GF_O_CREAT
-#define GF_O_CREAT 0x40
-#endif
-
-#ifndef GF_O_TRUNC
-#define GF_O_TRUNC 0x200
-#endif
-
-#ifndef GF_O_RDWR
-#define GF_O_RDWR 0x2
-#endif
-
-#ifndef GF_O_WRONLY
-#define GF_O_WRONLY 0x1
-#endif
-
-#ifndef UNIX_PATH_MAX
-#define UNIX_PATH_MAX 108
-#endif
-
-typedef enum {
- BOOSTER_OPEN,
- BOOSTER_CREAT
-} booster_op_t;
-
-struct _inode;
-struct _dict;
-
-ssize_t
-write (int fd, const void *buf, size_t count);
-
-/* open, open64, creat */
-static int (*real_open) (const char *pathname, int flags, ...);
-static int (*real_open64) (const char *pathname, int flags, ...);
-static int (*real_creat) (const char *pathname, mode_t mode);
-
-/* read, readv, pread, pread64 */
-static ssize_t (*real_read) (int fd, void *buf, size_t count);
-static ssize_t (*real_readv) (int fd, const struct iovec *vector, int count);
-static ssize_t (*real_pread) (int fd, void *buf, size_t count,
- unsigned long offset);
-static ssize_t (*real_pread64) (int fd, void *buf, size_t count,
- uint64_t offset);
-
-/* write, writev, pwrite, pwrite64 */
-static ssize_t (*real_write) (int fd, const void *buf, size_t count);
-static ssize_t (*real_writev) (int fd, const struct iovec *vector, int count);
-static ssize_t (*real_pwrite) (int fd, const void *buf, size_t count,
- unsigned long offset);
-static ssize_t (*real_pwrite64) (int fd, const void *buf, size_t count,
- uint64_t offset);
-
-/* lseek, llseek, lseek64 */
-static off_t (*real_lseek) (int fildes, unsigned long offset, int whence);
-static off_t (*real_lseek64) (int fildes, uint64_t offset, int whence);
-
-/* close */
-static int (*real_close) (int fd);
-
-/* dup dup2 */
-static int (*real_dup) (int fd);
-static int (*real_dup2) (int oldfd, int newfd);
-
-static pid_t (*real_fork) (void);
-static int (*real_mkdir) (const char *pathname, mode_t mode);
-static int (*real_rmdir) (const char *pathname);
-static int (*real_chmod) (const char *pathname, mode_t mode);
-static int (*real_chown) (const char *pathname, uid_t owner, gid_t group);
-static int (*real_fchmod) (int fd, mode_t mode);
-static int (*real_fchown) (int fd, uid_t, gid_t gid);
-static int (*real_fsync) (int fd);
-static int (*real_ftruncate) (int fd, off_t length);
-static int (*real_link) (const char *oldpath, const char *newname);
-static int (*real_rename) (const char *oldpath, const char *newpath);
-static int (*real_utimes) (const char *path, const struct timeval times[2]);
-static int (*real_utime) (const char *path, const struct utimbuf *buf);
-static int (*real_mknod) (const char *path, mode_t mode, dev_t dev);
-static int (*real_mkfifo) (const char *path, mode_t mode);
-static int (*real_unlink) (const char *path);
-static int (*real_symlink) (const char *oldpath, const char *newpath);
-static int (*real_readlink) (const char *path, char *buf, size_t bufsize);
-static char * (*real_realpath) (const char *path, char *resolved);
-static DIR * (*real_opendir) (const char *path);
-static struct dirent * (*real_readdir) (DIR *dir);
-static int (*real_readdir_r) (DIR *dir, struct dirent *entry,
- struct dirent **result);
-static int (*real_readdir64_r) (DIR *dir, struct dirent64 *entry,
- struct dirent64 **result);
-static int (*real_closedir) (DIR *dh);
-static int (*real___xstat) (int ver, const char *path, struct stat *buf);
-static int (*real___xstat64) (int ver, const char *path, struct stat64 *buf);
-static int (*real_stat) (const char *path, struct stat *buf);
-static int (*real_stat64) (const char *path, struct stat64 *buf);
-static int (*real___fxstat) (int ver, int fd, struct stat *buf);
-static int (*real___fxstat64) (int ver, int fd, struct stat64 *buf);
-static int (*real_fstat) (int fd, struct stat *buf);
-static int (*real_fstat64) (int fd , struct stat64 *buf);
-static int (*real___lxstat) (int ver, const char *path, struct stat *buf);
-static int (*real___lxstat64) (int ver, const char *path, struct stat64 *buf);
-static int (*real_lstat) (const char *path, struct stat *buf);
-static int (*real_lstat64) (const char *path, struct stat64 *buf);
-static int (*real_statfs) (const char *path, struct statfs *buf);
-static int (*real_statfs64) (const char *path, struct statfs64 *buf);
-static int (*real_statvfs) (const char *path, struct statvfs *buf);
-static int (*real_statvfs64) (const char *path, struct statvfs64 *buf);
-static ssize_t (*real_getxattr) (const char *path, const char *name,
- void *value, size_t size);
-static ssize_t (*real_lgetxattr) (const char *path, const char *name,
- void *value, size_t size);
-static int (*real_remove) (const char* path);
-static int (*real_lchown) (const char *path, uid_t owner, gid_t group);
-static void (*real_rewinddir) (DIR *dirp);
-static void (*real_seekdir) (DIR *dirp, off_t offset);
-static off_t (*real_telldir) (DIR *dirp);
-
-static ssize_t (*real_sendfile) (int out_fd, int in_fd, off_t *offset,
- size_t count);
-static ssize_t (*real_sendfile64) (int out_fd, int in_fd, off_t *offset,
- size_t count);
-static int (*real_fcntl) (int fd, int cmd, ...);
-
-#define RESOLVE(sym) do { \
- if (!real_##sym) \
- real_##sym = dlsym (RTLD_NEXT, #sym); \
- } while (0)
-
-/*TODO: set proper value */
-#define MOUNT_HASH_SIZE 256
-
-struct booster_mount {
- dev_t st_dev;
- glusterfs_handle_t handle;
- struct list_head device_list;
-};
-typedef struct booster_mount booster_mount_t;
-
-static booster_fdtable_t *booster_fdtable = NULL;
-
-extern int booster_configure (char *confpath);
-/* This is dup'ed every time VMP open/creat wants a new fd.
- * This is needed so we occupy an entry in the process' file
- * table.
- */
-int process_piped_fd = -1;
-
-static int
-booster_get_process_fd ()
-{
- return real_dup (process_piped_fd);
-}
-
-/* The following two define which file contains
- * the FSTAB configuration for VMP-based usage.
- */
-#define DEFAULT_BOOSTER_CONF CONFDIR"/booster.conf"
-#define BOOSTER_CONF_ENV_VAR "GLUSTERFS_BOOSTER_FSTAB"
-
-
-/* The following define which log file is used when
- * using the old mount point bypass approach.
- */
-#define BOOSTER_DEFAULT_LOG CONFDIR"/booster.log"
-#define BOOSTER_LOG_ENV_VAR "GLUSTERFS_BOOSTER_LOG"
-
-void
-do_open (int fd, const char *pathname, int flags, mode_t mode, booster_op_t op)
-{
- char *specfile = NULL;
- char *mount_point = NULL;
- int32_t size = 0;
- int32_t ret = -1;
- FILE *specfp = NULL;
- glusterfs_file_t fh = NULL;
- char *logfile = NULL;
- glusterfs_init_params_t iparams = {
- .loglevel = "error",
- .lookup_timeout = 600,
- .stat_timeout = 600,
- };
-
- gf_log ("booster", GF_LOG_DEBUG, "Opening using MPB: %s", pathname);
- size = fgetxattr (fd, "user.glusterfs-booster-volfile", NULL, 0);
- if (size == -1) {
- gf_log ("booster", GF_LOG_ERROR, "Xattr "
- "user.glusterfs-booster-volfile not found: %s",
- strerror (errno));
- goto out;
- }
-
- specfile = calloc (1, size);
- if (!specfile) {
- gf_log ("booster", GF_LOG_ERROR, "Memory allocation failed");
- goto out;
- }
-
- ret = fgetxattr (fd, "user.glusterfs-booster-volfile", specfile,
- size);
- if (ret == -1) {
- gf_log ("booster", GF_LOG_ERROR, "Xattr "
- "user.glusterfs-booster-volfile not found: %s",
- strerror (errno));
- goto out;
- }
-
- specfp = tmpfile ();
- if (!specfp) {
- gf_log ("booster", GF_LOG_ERROR, "Temp file creation failed"
- ": %s", strerror (errno));
- goto out;
- }
-
- ret = fwrite (specfile, size, 1, specfp);
- if (ret != 1) {
- gf_log ("booster", GF_LOG_ERROR, "Failed to write volfile: %s",
- strerror (errno));
- goto out;
- }
-
- fseek (specfp, 0L, SEEK_SET);
-
- size = fgetxattr (fd, "user.glusterfs-booster-mount", NULL, 0);
- if (size == -1) {
- gf_log ("booster", GF_LOG_ERROR, "Xattr "
- "user.glusterfs-booster-mount not found: %s",
- strerror (errno));
- goto out;
- }
-
- mount_point = calloc (size, sizeof (char));
- if (!mount_point) {
- gf_log ("booster", GF_LOG_ERROR, "Memory allocation failed");
- goto out;
- }
-
- ret = fgetxattr (fd, "user.glusterfs-booster-mount", mount_point, size);
- if (ret == -1) {
- gf_log ("booster", GF_LOG_ERROR, "Xattr "
- "user.glusterfs-booster-mount not found: %s",
- strerror (errno));
- goto out;
- }
-
- logfile = getenv (BOOSTER_LOG_ENV_VAR);
- if (logfile) {
- if (strlen (logfile) > 0)
- iparams.logfile = strdup (logfile);
- else
- iparams.logfile = strdup (BOOSTER_DEFAULT_LOG);
- } else {
- iparams.logfile = strdup (BOOSTER_DEFAULT_LOG);
- }
-
- gf_log ("booster", GF_LOG_DEBUG, "Using log-file: %s", iparams.logfile);
- iparams.specfp = specfp;
-
- ret = glusterfs_mount (mount_point, &iparams);
- if (ret == -1) {
- if (errno != EEXIST) {
- gf_log ("booster", GF_LOG_ERROR, "Mount failed over"
- " glusterfs");
- goto out;
- } else
- gf_log ("booster", GF_LOG_ERROR, "Already mounted");
- }
-
- switch (op) {
- case BOOSTER_OPEN:
- gf_log ("booster", GF_LOG_DEBUG, "Booster open call");
- fh = glusterfs_open (pathname, flags, mode);
- break;
-
- case BOOSTER_CREAT:
- gf_log ("booster", GF_LOG_DEBUG, "Booster create call");
- fh = glusterfs_creat (pathname, mode);
- break;
- }
-
- if (!fh) {
- gf_log ("booster", GF_LOG_ERROR, "Error performing operation");
- goto out;
- }
-
- if (booster_fd_unused_get (booster_fdtable, fh, fd) == -1) {
- gf_log ("booster", GF_LOG_ERROR, "Failed to get unused FD");
- goto out;
- }
- fh = NULL;
-
-out:
- if (specfile) {
- free (specfile);
- }
-
- if (specfp) {
- fclose (specfp);
- }
-
- if (mount_point) {
- free (mount_point);
- }
-
- if (fh) {
- glusterfs_close (fh);
- }
-
- return;
-}
-
-int
-vmp_open (const char *pathname, int flags, ...)
-{
- mode_t mode = 0;
- int fd = -1;
- glusterfs_file_t fh = NULL;
- va_list ap;
-
- if (flags & GF_O_CREAT) {
- va_start (ap, flags);
- mode = va_arg (ap, mode_t);
- va_end (ap);
-
- fh = glusterfs_open (pathname, flags, mode);
- }
- else
- fh = glusterfs_open (pathname, flags);
-
- if (!fh) {
- gf_log ("booster", GF_LOG_ERROR, "VMP open failed");
- goto out;
- }
-
- fd = booster_get_process_fd ();
- if (fd == -1) {
- gf_log ("booster", GF_LOG_ERROR, "Failed to create open fd");
- goto fh_close_out;
- }
-
- if (booster_fd_unused_get (booster_fdtable, fh, fd) == -1) {
- gf_log ("booster", GF_LOG_ERROR, "Failed to map fd into table");
- goto realfd_close_out;
- }
-
- return fd;
-
-realfd_close_out:
- real_close (fd);
- fd = -1;
-
-fh_close_out:
- glusterfs_close (fh);
-
-out:
- return fd;
-}
-
-#define BOOSTER_USE_OPEN64 1
-#define BOOSTER_DONT_USE_OPEN64 0
-
-int
-booster_open (const char *pathname, int use64, int flags, ...)
-{
- int ret = -1;
- mode_t mode = 0;
- va_list ap;
- int (*my_open) (const char *pathname, int flags, ...);
-
- if (!pathname) {
- errno = EINVAL;
- goto out;
- }
-
- gf_log ("booster", GF_LOG_DEBUG, "Open: %s", pathname);
- /* First try opening through the virtual mount point.
- * The difference lies in the fact that:
- * 1. We depend on libglusterfsclient library to perform
- * the translation from the path to handle.
- * 2. We do not go to the file system for the fd, instead
- * we use booster_get_process_fd (), which returns a dup'ed
- * fd of a pipe created in booster_init.
- */
- if (flags & GF_O_CREAT) {
- va_start (ap, flags);
- mode = va_arg (ap, mode_t);
- va_end (ap);
- ret = vmp_open (pathname, flags, mode);
- }
- else
- ret = vmp_open (pathname, flags);
-
- /* We receive an ENODEV if the VMP does not exist. If we
- * receive an error other than ENODEV, it means, there
- * actually was an error performing vmp_open. This must
- * be returned to the user.
- */
- if ((ret < 0) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "Error in opening file over "
- " VMP: %s", strerror (errno));
- goto out;
- }
-
- if (ret > 0) {
- gf_log ("booster", GF_LOG_DEBUG, "File opened");
- goto out;
- }
-
- if (use64) {
- gf_log ("booster", GF_LOG_DEBUG, "Using 64-bit open");
- my_open = real_open64;
- } else {
- gf_log ("booster", GF_LOG_DEBUG, "Using 32-bit open");
- my_open = real_open;
- }
-
- /* It is possible the RESOLVE macro is not able
- * to resolve the symbol of a function, in that case
- * we dont want to seg-fault on calling a NULL functor.
- */
- if (my_open == NULL) {
- gf_log ("booster", GF_LOG_ERROR, "open not resolved");
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
-
- if (flags & GF_O_CREAT) {
- va_start (ap, flags);
- mode = va_arg (ap, mode_t);
- va_end (ap);
-
- ret = my_open (pathname, flags, mode);
- } else
- ret = my_open (pathname, flags);
-
- if (ret != -1) {
- do_open (ret, pathname, flags, mode, BOOSTER_OPEN);
- }
-
-out:
- return ret;
-}
-
-/* This is done to over-write existing definitions of open and open64 inside
- * libc with our own copies. __REDIRECT is provided by libc.
- *
- * XXX: This will not work anywhere other than libc based systems.
- */
-int __REDIRECT (booster_false_open, (__const char *__file, int __oflag, ...),
- open) __nonnull ((1));
-int __REDIRECT (booster_false_open64, (__const char *__file, int __oflag, ...),
- open64) __nonnull ((1));
-int
-booster_false_open (const char *pathname, int flags, ...)
-{
- int ret;
- mode_t mode = 0;
- va_list ap;
-
- if (flags & GF_O_CREAT) {
- va_start (ap, flags);
- mode = va_arg (ap, mode_t);
- va_end (ap);
-
- ret = booster_open (pathname, BOOSTER_DONT_USE_OPEN64, flags,
- mode);
- }
- else
- ret = booster_open (pathname, BOOSTER_DONT_USE_OPEN64, flags);
-
- return ret;
-}
-
-int
-booster_false_open64 (const char *pathname, int flags, ...)
-{
- int ret;
- mode_t mode = 0;
- va_list ap;
-
- if (flags & GF_O_CREAT) {
- va_start (ap, flags);
- mode = va_arg (ap, mode_t);
- va_end (ap);
-
- ret = booster_open (pathname, BOOSTER_USE_OPEN64, flags, mode);
- }
- else
- ret = booster_open (pathname, BOOSTER_USE_OPEN64, flags);
-
- return ret;
-}
-
-int
-vmp_creat (const char *pathname, mode_t mode)
-{
- int fd = -1;
- glusterfs_file_t fh = NULL;
-
- fh = glusterfs_creat (pathname, mode);
- if (!fh) {
- gf_log ("booster", GF_LOG_ERROR, "Create failed: %s: %s",
- pathname, strerror (errno));
- goto out;
- }
-
- fd = booster_get_process_fd ();
- if (fd == -1) {
- gf_log ("booster", GF_LOG_ERROR, "Failed to create fd");
- goto close_out;
- }
-
- if ((booster_fd_unused_get (booster_fdtable, fh, fd)) == -1) {
- gf_log ("booster", GF_LOG_ERROR, "Failed to map unused fd");
- goto real_close_out;
- }
-
- return fd;
-
-real_close_out:
- real_close (fd);
- fd = -1;
-
-close_out:
- glusterfs_close (fh);
-
-out:
- return -1;
-}
-
-int __REDIRECT (booster_false_creat, (const char *pathname, mode_t mode),
- creat) __nonnull ((1));
-
-int
-booster_false_creat (const char *pathname, mode_t mode)
-{
- int ret = -1;
- if (!pathname) {
- errno = EINVAL;
- goto out;
- }
-
- gf_log ("booster", GF_LOG_DEBUG, "Create: %s", pathname);
- ret = vmp_creat (pathname, mode);
-
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "VMP create failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret > 0) {
- gf_log ("booster", GF_LOG_DEBUG, "File created");
- goto out;
- }
-
- if (real_creat == NULL) {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
- ret = real_creat (pathname, mode);
-
- if (ret != -1) {
- do_open (ret, pathname, GF_O_WRONLY | GF_O_TRUNC, mode,
- BOOSTER_CREAT);
- } else
- gf_log ("booster", GF_LOG_ERROR, "real create failed: %s",
- strerror (errno));
-
-out:
- return ret;
-}
-
-
-/* pread */
-
-ssize_t
-pread (int fd, void *buf, size_t count, unsigned long offset)
-{
- ssize_t ret;
- glusterfs_file_t glfs_fd = 0;
-
- gf_log ("booster", GF_LOG_DEBUG, "pread: fd %d, count %lu, offset %lu"
- ,fd, (long unsigned)count, offset);
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
- if (!glfs_fd) {
- gf_log ("booster", GF_LOG_DEBUG, "Not booster fd");
- if (real_pread == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_pread (fd, buf, count, offset);
- } else {
- gf_log ("booster", GF_LOG_DEBUG, "Is a booster fd");
- ret = glusterfs_pread (glfs_fd, buf, count, offset);
- booster_fdptr_put (glfs_fd);
- }
-
- return ret;
-}
-
-
-ssize_t
-pread64 (int fd, void *buf, size_t count, uint64_t offset)
-{
- ssize_t ret;
- glusterfs_file_t glfs_fd = 0;
-
- gf_log ("booster", GF_LOG_DEBUG, "pread64: fd %d, count %lu, offset %"
- PRIu64, fd, (long unsigned)count, offset);
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
- if (!glfs_fd) {
- gf_log ("booster", GF_LOG_DEBUG, "Not booster fd");
- if (real_pread64 == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_pread64 (fd, buf, count, offset);
- } else {
- gf_log ("booster", GF_LOG_DEBUG, "Is a booster fd");
- ret = glusterfs_pread (glfs_fd, buf, count, offset);
- booster_fdptr_put (glfs_fd);
- }
-
- return ret;
-}
-
-
-ssize_t
-read (int fd, void *buf, size_t count)
-{
- int ret;
- glusterfs_file_t glfs_fd;
-
- gf_log ("booster", GF_LOG_DEBUG, "read: fd %d, count %lu", fd,
- (long unsigned)count);
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
- if (!glfs_fd) {
- gf_log ("booster", GF_LOG_DEBUG, "Not booster fd");
- if (real_read == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_read (fd, buf, count);
- } else {
- gf_log ("booster", GF_LOG_DEBUG, "Is a booster fd");
- ret = glusterfs_read (glfs_fd, buf, count);
- booster_fdptr_put (glfs_fd);
- }
-
- return ret;
-}
-
-
-ssize_t
-readv (int fd, const struct iovec *vector, int count)
-{
- int ret;
- glusterfs_file_t glfs_fd = 0;
-
- gf_log ("booster", GF_LOG_DEBUG, "readv: fd %d, iovecs %d", fd, count);
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
- if (!glfs_fd) {
- gf_log ("booster", GF_LOG_DEBUG, "Not a booster fd");
- if (real_readv == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_readv (fd, vector, count);
- } else {
- gf_log ("booster", GF_LOG_DEBUG, "Is a booster fd");
- ret = glusterfs_readv (glfs_fd, vector, count);
- booster_fdptr_put (glfs_fd);
- }
-
- return ret;
-}
-
-
-ssize_t
-write (int fd, const void *buf, size_t count)
-{
- int ret;
- glusterfs_file_t glfs_fd = 0;
-
- gf_log ("booster", GF_LOG_DEBUG, "write: fd %d, count %d", fd, count);
-
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
-
- if (!glfs_fd) {
- gf_log ("booster", GF_LOG_DEBUG, "Not a booster fd");
- if (real_write == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_write (fd, buf, count);
- } else {
- gf_log ("booster", GF_LOG_DEBUG, "Is a booster fd");
- ret = glusterfs_write (glfs_fd, buf, count);
- booster_fdptr_put (glfs_fd);
- }
-
- return ret;
-}
-
-ssize_t
-writev (int fd, const struct iovec *vector, int count)
-{
- int ret = 0;
- glusterfs_file_t glfs_fd = 0;
-
- gf_log ("booster", GF_LOG_DEBUG, "writev: fd %d, iovecs %d", fd, count);
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
-
- if (!glfs_fd) {
- gf_log ("booster", GF_LOG_DEBUG, "Not a booster fd");
- if (real_writev == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_writev (fd, vector, count);
- } else {
- gf_log ("booster", GF_LOG_DEBUG, "Is a booster fd");
- ret = glusterfs_writev (glfs_fd, vector, count);
- booster_fdptr_put (glfs_fd);
- }
-
- return ret;
-}
-
-
-ssize_t
-pwrite (int fd, const void *buf, size_t count, unsigned long offset)
-{
- int ret;
- glusterfs_file_t glfs_fd = 0;
-
- gf_log ("booster", GF_LOG_DEBUG, "pwrite: fd %d, count %d, offset %lu",
- fd, count, offset);
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
-
- if (!glfs_fd) {
- gf_log ("booster", GF_LOG_DEBUG, "Not a booster fd");
- if (real_pwrite == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_pwrite (fd, buf, count, offset);
- } else {
- gf_log ("booster", GF_LOG_DEBUG, "Is a booster fd");
- ret = glusterfs_pwrite (glfs_fd, buf, count, offset);
- booster_fdptr_put (glfs_fd);
- }
-
- return ret;
-}
-
-
-ssize_t
-pwrite64 (int fd, const void *buf, size_t count, uint64_t offset)
-{
- int ret;
- glusterfs_file_t glfs_fd = 0;
-
- gf_log ("booster", GF_LOG_DEBUG, "pwrite64: fd %d, count %d, offset %"
- PRIu64, fd, count, offset);
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
-
- if (!glfs_fd) {
- gf_log ("booster", GF_LOG_DEBUG, "Not a booster fd");
- if (real_pwrite64 == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_pwrite64 (fd, buf, count, offset);
- } else {
- gf_log ("booster", GF_LOG_DEBUG, "Is a booster fd");
- ret = glusterfs_pwrite (glfs_fd, buf, count, offset);
- booster_fdptr_put (glfs_fd);
- }
-
- return ret;
-}
-
-
-int
-close (int fd)
-{
- int ret = -1;
- glusterfs_file_t glfs_fd = 0;
-
- gf_log ("booster", GF_LOG_DEBUG, "close: fd %d", fd);
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
-
- if (glfs_fd) {
- gf_log ("booster", GF_LOG_DEBUG, "Is a booster fd");
- booster_fd_put (booster_fdtable, fd);
- ret = glusterfs_close (glfs_fd);
- booster_fdptr_put (glfs_fd);
- }
-
- ret = real_close (fd);
-
- return ret;
-}
-
-#ifndef _LSEEK_DECLARED
-#define _LSEEK_DECLARED
-off_t
-lseek (int filedes, unsigned long offset, int whence)
-{
- int ret;
- glusterfs_file_t glfs_fd = 0;
-
- gf_log ("booster", GF_LOG_DEBUG, "lseek: fd %d, offset %ld",
- filedes, offset);
-
- glfs_fd = booster_fdptr_get (booster_fdtable, filedes);
- if (glfs_fd) {
- gf_log ("booster", GF_LOG_DEBUG, "Is a booster fd");
- ret = glusterfs_lseek (glfs_fd, offset, whence);
- booster_fdptr_put (glfs_fd);
- } else {
- gf_log ("booster", GF_LOG_DEBUG, "Not a booster fd");
- if (real_lseek == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_lseek (filedes, offset, whence);
- }
-
- return ret;
-}
-#endif
-
-off_t
-lseek64 (int filedes, uint64_t offset, int whence)
-{
- int ret;
- glusterfs_file_t glfs_fd = 0;
-
-
- gf_log ("booster", GF_LOG_DEBUG, "lseek: fd %d, offset %"PRIu64,
- filedes, offset);
- glfs_fd = booster_fdptr_get (booster_fdtable, filedes);
- if (glfs_fd) {
- gf_log ("booster", GF_LOG_DEBUG, "Is a booster fd");
- ret = glusterfs_lseek (glfs_fd, offset, whence);
- booster_fdptr_put (glfs_fd);
- } else {
- gf_log ("booster", GF_LOG_DEBUG, "Not a booster fd");
- if (real_lseek64 == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_lseek64 (filedes, offset, whence);
- }
-
- return ret;
-}
-
-int
-dup (int oldfd)
-{
- int ret = -1, new_fd = -1;
- glusterfs_file_t glfs_fd = 0;
-
- gf_log ("booster", GF_LOG_DEBUG, "dup: fd %d", oldfd);
- glfs_fd = booster_fdptr_get (booster_fdtable, oldfd);
- new_fd = real_dup (oldfd);
-
- if (new_fd >=0 && glfs_fd) {
- gf_log ("booster", GF_LOG_DEBUG, "Is a booster fd");
- ret = booster_fd_unused_get (booster_fdtable, glfs_fd,
- new_fd);
- fd_ref ((fd_t *)glfs_fd);
- if (ret == -1) {
- gf_log ("booster", GF_LOG_ERROR,"Failed to map new fd");
- real_close (new_fd);
- }
- }
-
- if (glfs_fd) {
- booster_fdptr_put (glfs_fd);
- }
-
- return new_fd;
-}
-
-
-int
-dup2 (int oldfd, int newfd)
-{
- int ret = -1;
- glusterfs_file_t old_glfs_fd = NULL, new_glfs_fd = NULL;
-
- if (oldfd == newfd) {
- return newfd;
- }
-
- old_glfs_fd = booster_fdptr_get (booster_fdtable, oldfd);
- new_glfs_fd = booster_fdptr_get (booster_fdtable, newfd);
-
- ret = real_dup2 (oldfd, newfd);
- if (ret >= 0) {
- if (new_glfs_fd) {
- glusterfs_close (new_glfs_fd);
- booster_fdptr_put (new_glfs_fd);
- booster_fd_put (booster_fdtable, newfd);
- new_glfs_fd = 0;
- }
-
- if (old_glfs_fd) {
- ret = booster_fd_unused_get (booster_fdtable,
- old_glfs_fd, newfd);
- fd_ref ((fd_t *)old_glfs_fd);
- if (ret == -1) {
- real_close (newfd);
- }
- }
- }
-
- if (old_glfs_fd) {
- booster_fdptr_put (old_glfs_fd);
- }
-
- if (new_glfs_fd) {
- booster_fdptr_put (new_glfs_fd);
- }
-
- return ret;
-}
-
-int
-mkdir (const char *pathname, mode_t mode)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_DEBUG, "mkdir: path %s", pathname);
- ret = glusterfs_mkdir (pathname, mode);
-
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "mkdir failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_DEBUG, "directory created");
- return ret;
- }
-
- if (real_mkdir == NULL) {
- ret = -1;
- errno = ENOSYS;
- } else
- ret = real_mkdir (pathname, mode);
-
- return ret;
-}
-
-int
-rmdir (const char *pathname)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_DEBUG, "rmdir: path %s", pathname);
- ret = glusterfs_rmdir (pathname);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "rmdir failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_DEBUG, "directory removed");
- return ret;
- }
-
- if (real_rmdir == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_rmdir (pathname);
-
- return ret;
-}
-
-int
-chmod (const char *pathname, mode_t mode)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_DEBUG, "chmod: path %s", pathname);
- ret = glusterfs_chmod (pathname, mode);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "chmod failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_DEBUG, "chmod succeeded");
- return ret;
- }
-
- if (real_chmod == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_chmod (pathname, mode);
-
- return ret;
-}
-
-int
-chown (const char *pathname, uid_t owner, gid_t group)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_DEBUG, "chown: path: %s", pathname);
- ret = glusterfs_chown (pathname, owner, group);
-
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "chown failed: %s\n",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_DEBUG, "chown succeeded");
- return ret;
- }
-
- if (real_chown == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_chown (pathname, owner, group);
-
- return ret;
-}
-
-int
-fchown (int fd, uid_t owner, gid_t group)
-{
- int ret = -1;
- glusterfs_file_t fh = NULL;
-
- gf_log ("booster", GF_LOG_DEBUG, "fchown: fd %d, uid %d, gid %d", fd,
- owner, group);
- fh = booster_fdptr_get (booster_fdtable, fd);
- if (!fh) {
- gf_log ("booster", GF_LOG_DEBUG, "Not a booster fd");
- if (real_fchown == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_fchown (fd, owner, group);
- } else {
- gf_log ("booster", GF_LOG_DEBUG, "Is a booster fd");
- ret = glusterfs_fchown (fh, owner, group);
- booster_fdptr_put (fh);
- }
-
- return ret;
-}
-
-
-#define MOUNT_TABLE_HASH_SIZE 256
-
-
-static int
-booster_init (void)
-{
- char *booster_conf_path = NULL;
- int ret = -1;
- int pipefd[2];
-
- booster_fdtable = booster_fdtable_alloc ();
- if (!booster_fdtable) {
- fprintf (stderr, "cannot allocate fdtable: %s\n",
- strerror (errno));
- goto err;
- }
-
- if (pipe (pipefd) == -1) {
- gf_log ("booster-fstab", GF_LOG_ERROR, "Pipe creation failed:%s"
- , strerror (errno));
- goto err;
- }
-
- process_piped_fd = pipefd[0];
- real_close (pipefd[1]);
- /* libglusterfsclient based VMPs should be inited only
- * after the file tables are inited so that if the socket
- * calls use the fd based syscalls, the fd tables are
- * correctly initialized to return a NULL handle, on which the
- * socket calls will fall-back to the real API.
- */
- booster_conf_path = getenv (BOOSTER_CONF_ENV_VAR);
- if (booster_conf_path != NULL) {
- if (strlen (booster_conf_path) > 0)
- ret = booster_configure (booster_conf_path);
- else {
- gf_log ("booster", GF_LOG_ERROR, "%s not defined, "
- "using default path: %s", BOOSTER_CONF_ENV_VAR,
- DEFAULT_BOOSTER_CONF);
- ret = booster_configure (DEFAULT_BOOSTER_CONF);
- }
- } else {
- gf_log ("booster", GF_LOG_ERROR, "%s not defined, using default"
- " path: %s", BOOSTER_CONF_ENV_VAR,DEFAULT_BOOSTER_CONF);
- ret = booster_configure (DEFAULT_BOOSTER_CONF);
- }
-
- if (ret == 0)
- gf_log ("booster", GF_LOG_DEBUG, "booster is inited");
- return 0;
-
-err:
- /* Sure we return an error value here
- * but who cares about booster.
- */
- return -1;
-}
-
-
-static void
-booster_cleanup (void)
-{
- /* gf_fd_fdtable_destroy (booster_fdtable);*/
- /*for (i=0; i < booster_fdtable->max_fds; i++) {
- if (booster_fdtable->fds[i]) {
- fd_t *fd = booster_fdtable->fds[i];
- free (fd);
- }
- }*/
-
- free (booster_fdtable);
- booster_fdtable = NULL;
-
- /*
- * FIXME: there may be issues during execution of fini of individual
- * xlators due to inconsistent lock states.
- */
-
- /*
- * as above FIXME says, glusterfs_umount_all indeed leads to memory
- * corruption, hence commenting out
- */
- /* glusterfs_umount_all (); */
- glusterfs_reset ();
-}
-
-int
-fchmod (int fd, mode_t mode)
-{
- int ret = -1;
- glusterfs_file_t fh = NULL;
-
- gf_log ("booster", GF_LOG_DEBUG, "fchmod: fd %d, mode: 0x%x", fd, mode);
- fh = booster_fdptr_get (booster_fdtable, fd);
- if (!fh) {
- gf_log ("booster", GF_LOG_DEBUG, "Not a booster fd");
- if (real_fchmod == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_fchmod (fd, mode);
- } else {
- gf_log ("booster", GF_LOG_DEBUG, "Is a booster fd");
- ret = glusterfs_fchmod (fh, mode);
- booster_fdptr_put (fh);
- }
-
- return ret;
-}
-
-int
-fsync (int fd)
-{
- int ret = -1;
- glusterfs_file_t fh = NULL;
-
- gf_log ("booster", GF_LOG_DEBUG, "fsync: fd %d", fd);
- fh = booster_fdptr_get (booster_fdtable, fd);
- if (!fh) {
- gf_log ("booster", GF_LOG_DEBUG, "Not a booster fd");
- if (real_fsync == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_fsync (fd);
- } else {
- gf_log ("booster", GF_LOG_DEBUG, "Is a booster fd");
- ret = glusterfs_fsync (fh);
- booster_fdptr_put (fh);
- }
-
- return ret;
-}
-
-int
-ftruncate (int fd, off_t length)
-{
- int ret = -1;
- glusterfs_file_t fh = NULL;
-
- gf_log ("booster", GF_LOG_DEBUG, "ftruncate: fd %d, length: %"PRIu64,fd
- , length);
- fh = booster_fdptr_get (booster_fdtable, fd);
- if (!fh) {
- gf_log ("booster", GF_LOG_DEBUG, "Not a booster fd");
- if (real_ftruncate == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_ftruncate (fd, length);
- } else {
- gf_log ("booster", GF_LOG_DEBUG, "Is a booster fd");
- ret = glusterfs_ftruncate (fh, length);
- booster_fdptr_put (fh);
- }
-
- return ret;
-}
-
-int
-link (const char *old, const char *new)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_DEBUG, "link: old: %s, new: %s", old, new);
- ret = glusterfs_link (old, new);
-
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "Link failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_DEBUG, "link call succeeded");
- return ret;
- }
-
- if (real_link == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_link (old, new);
-
- return ret;
-}
-
-int
-rename (const char *old, const char *new)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_DEBUG, "link: old: %s, new: %s", old, new);
- ret = glusterfs_rename (old, new);
-
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "Rename failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_DEBUG, "Rename succeeded");
- return ret;
- }
-
- if (real_rename == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_rename (old, new);
-
- return ret;
-}
-
-int
-utimes (const char *path, const struct timeval times[2])
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_DEBUG, "utimes: path %s", path);
- ret = glusterfs_utimes (path, times);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "utimes failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_DEBUG, "utimes succeeded");
- return ret;
- }
-
- if (real_utimes == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_utimes (path, times);
-
- return ret;
-}
-
-int
-utime (const char *path, const struct utimbuf *buf)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_DEBUG, "utime: path %s", path);
- ret = glusterfs_utime (path, buf);
-
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "utime failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_DEBUG, "utime succeeded");
- return ret;
- }
-
- if (real_utime == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_utime (path, buf);
-
- return ret;
-}
-
-int
-mknod (const char *path, mode_t mode, dev_t dev)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_DEBUG, "mknod: path %s", path);
- ret = glusterfs_mknod (path, mode, dev);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "mknod failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_DEBUG, "mknod succeeded");
- return ret;
- }
-
- if (real_mknod) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_mknod (path, mode, dev);
-
- return ret;
-}
-
-int
-mkfifo (const char *path, mode_t mode)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_DEBUG, "mkfifo: path %s", path);
- ret = glusterfs_mkfifo (path, mode);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "mkfifo failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_DEBUG, "mkfifo succeeded");
- return ret;
- }
-
- if (real_mkfifo == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_mkfifo (path, mode);
-
- return ret;
-}
-
-int
-unlink (const char *path)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_DEBUG, "unlink: path %s", path);
- ret = glusterfs_unlink (path);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "unlink failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_DEBUG, "unlink succeeded");
- return ret;
- }
-
- if (real_unlink == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_unlink (path);
-
- return ret;
-}
-
-int
-symlink (const char *oldpath, const char *newpath)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_DEBUG, "symlink: old: %s, new: %s",
- oldpath, newpath);
- ret = glusterfs_symlink (oldpath, newpath);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "symlink failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_DEBUG, "symlink succeeded");
- return ret;
- }
-
- if (real_symlink == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_symlink (oldpath, newpath);
-
- return ret;
-}
-
-int
-readlink (const char *path, char *buf, size_t bufsize)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_DEBUG, "readlink: path %s", path);
- ret = glusterfs_readlink (path, buf, bufsize);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "readlink failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret > 0) {
- gf_log ("booster", GF_LOG_DEBUG, "readlink succeeded");
- return ret;
- }
-
- if (real_readlink == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_readlink (path, buf, bufsize);
-
- return ret;
-}
-
-char *
-realpath (const char *path, char *resolved_path)
-{
- char *res = NULL;
-
- gf_log ("booster", GF_LOG_DEBUG, "realpath: path %s", path);
- res = glusterfs_realpath (path, resolved_path);
- if ((res == NULL) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "realpath failed: %s",
- strerror (errno));
- return res;
- }
-
- if (res != NULL) {
- gf_log ("booster", GF_LOG_DEBUG, "realpath succeeded");
- return res;
- }
-
- if (real_realpath == NULL) {
- errno = ENOSYS;
- res = NULL;
- } else
- res = real_realpath (path, resolved_path);
-
- return res;
-}
-
-#define BOOSTER_GL_DIR 1
-#define BOOSTER_POSIX_DIR 2
-
-struct booster_dir_handle {
- int type;
- void *dirh;
-};
-
-DIR *
-opendir (const char *path)
-{
- glusterfs_dir_t gdir = NULL;
- struct booster_dir_handle *bh = NULL;
- DIR *pdir = NULL;
-
- gf_log ("booster", GF_LOG_DEBUG, "opendir: path: %s", path);
- bh = calloc (1, sizeof (struct booster_dir_handle));
- if (!bh) {
- gf_log ("booster", GF_LOG_ERROR, "memory allocation failed");
- errno = ENOMEM;
- goto out;
- }
-
- gdir = glusterfs_opendir (path);
- if (gdir) {
- gf_log ("booster", GF_LOG_DEBUG, "Gluster dir opened");
- bh->type = BOOSTER_GL_DIR;
- bh->dirh = (void *)gdir;
- goto out;
- } else if ((!gdir) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "Opendir failed");
- goto free_out;
- }
-
- if (real_opendir == NULL) {
- errno = ENOSYS;
- goto free_out;
- }
-
- pdir = real_opendir (path);
-
- if (pdir) {
- bh->type = BOOSTER_POSIX_DIR;
- bh->dirh = (void *)pdir;
- goto out;
- }
-
-free_out:
- if (bh) {
- free (bh);
- bh = NULL;
- }
-out:
- return (DIR *)bh;
-}
-
-int __REDIRECT (booster_false_readdir_r, (DIR *dir, struct dirent *entry,
- struct dirent **result), readdir_r) __nonnull ((1));
-int __REDIRECT (booster_false_readdir64_r, (DIR *dir, struct dirent64 *entry,
- struct dirent64 **result), readdir64_r) __nonnull ((1));
-
-int
-booster_false_readdir_r (DIR *dir, struct dirent *entry, struct dirent **result)
-{
- struct booster_dir_handle *bh = (struct booster_dir_handle *)dir;
- int ret = 0;
-
- if (!bh) {
- ret = errno = EFAULT;
- goto out;
- }
-
- if (bh->type == BOOSTER_GL_DIR) {
- gf_log ("booster", GF_LOG_DEBUG, "readdir_r on gluster");
- ret = glusterfs_readdir_r ((glusterfs_dir_t)bh->dirh, entry,
- result);
-
- } else if (bh->type == BOOSTER_POSIX_DIR) {
- gf_log ("booster", GF_LOG_DEBUG, "readdir_r on posix");
- if (real_readdir_r == NULL) {
- ret = errno = ENOSYS;
- goto out;
- }
-
- ret = real_readdir_r ((DIR *)bh->dirh, entry, result);
- } else {
- ret = errno = EINVAL;
- }
-
-out:
- return ret;
-}
-
-int
-booster_false_readdir64_r (DIR *dir, struct dirent64 *entry,
- struct dirent64 **result)
-{
- struct booster_dir_handle *bh = (struct booster_dir_handle *)dir;
- int ret = 0;
-
- if (!bh) {
- ret = errno = EFAULT;
- goto out;
- }
-
- if (bh->type == BOOSTER_GL_DIR) {
- gf_log ("booster", GF_LOG_DEBUG, "readdir_r on gluster");
- ret = glusterfs_readdir_r ((glusterfs_dir_t)bh->dirh,
- (struct dirent *)entry,
- (struct dirent **)result);
- } else if (bh->type == BOOSTER_POSIX_DIR) {
- gf_log ("booster", GF_LOG_DEBUG, "readdir_r on posix");
- if (real_readdir64_r == NULL) {
- ret = errno = ENOSYS;
- goto out;
- }
-
- ret = real_readdir64_r ((DIR *)bh->dirh, entry, result);
- } else {
- ret = errno = EINVAL;
- }
-
-out:
- return ret;
-}
-
-struct dirent *
-booster_readdir (DIR *dir)
-{
- struct booster_dir_handle *bh = (struct booster_dir_handle *)dir;
- struct dirent *dirp = NULL;
-
- if (!bh) {
- errno = EFAULT;
- goto out;
- }
-
- if (bh->type == BOOSTER_GL_DIR) {
- gf_log ("booster", GF_LOG_DEBUG, "readdir on gluster");
- dirp = glusterfs_readdir ((glusterfs_dir_t)bh->dirh);
- } else if (bh->type == BOOSTER_POSIX_DIR) {
- gf_log ("booster", GF_LOG_DEBUG, "readdir on posix");
- if (real_readdir == NULL) {
- errno = ENOSYS;
- dirp = NULL;
- goto out;
- }
-
- dirp = real_readdir ((DIR *)bh->dirh);
- } else {
- dirp = NULL;
- errno = EINVAL;
- }
-
-out:
- return dirp;
-}
-
-int
-closedir (DIR *dh)
-{
- struct booster_dir_handle *bh = (struct booster_dir_handle *)dh;
- int ret = -1;
-
- if (!bh) {
- errno = EFAULT;
- goto out;
- }
-
- if (bh->type == BOOSTER_GL_DIR) {
- gf_log ("booster", GF_LOG_DEBUG, "closedir on gluster");
- ret = glusterfs_closedir ((glusterfs_dir_t)bh->dirh);
- } else if (bh->type == BOOSTER_POSIX_DIR) {
- gf_log ("booster", GF_LOG_DEBUG, "closedir on posix");
- if (real_closedir == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_closedir ((DIR *)bh->dirh);
- } else {
- errno = EBADF;
- }
-
- if (ret == 0) {
- free (bh);
- bh = NULL;
- }
-out:
- return ret;
-}
-
-/* The real stat functions reside in booster_stat.c to
- * prevent clash with the statX prototype and functions
- * declared from sys/stat.h
- */
-int
-booster_xstat (int ver, const char *path, void *buf)
-{
- struct stat *sbuf = (struct stat *)buf;
- int ret = -1;
-
- gf_log ("booster", GF_LOG_DEBUG, "xstat: path: %s", path);
- ret = glusterfs_stat (path, sbuf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "xstat failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_DEBUG, "xstat succeeded");
- goto out;
- }
-
- if (real___xstat == NULL) {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
-
- ret = real___xstat (ver, path, sbuf);
-out:
- return ret;
-}
-
-int
-booster_xstat64 (int ver, const char *path, void *buf)
-{
- int ret = -1;
- struct stat64 *sbuf = (struct stat64 *)buf;
-
- gf_log ("booster", GF_LOG_DEBUG, "xstat64: path: %s", path);
- ret = glusterfs_stat (path, (struct stat *)sbuf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "xstat64 failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_DEBUG, "xstat64 succeeded");
- goto out;
- }
-
- if (real___xstat64 == NULL) {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
- ret = real___xstat64 (ver, path, sbuf);
-out:
- return ret;
-}
-
-int
-booster_stat (const char *path, void *buf)
-{
- struct stat *sbuf = (struct stat *)buf;
- int ret = -1;
-
- gf_log ("booster", GF_LOG_DEBUG, "stat: path: %s", path);
- ret = glusterfs_stat (path, sbuf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "stat failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_DEBUG, "stat succeeded");
- goto out;
- }
-
- if (real_stat != NULL)
- ret = real_stat (path, sbuf);
- else if (real___xstat != NULL)
- ret = real___xstat (0, path, sbuf);
- else {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
-
-out:
- return ret;
-}
-
-int
-booster_stat64 (const char *path, void *buf)
-{
- int ret = -1;
- struct stat64 *sbuf = (struct stat64 *)buf;
-
- gf_log ("booster", GF_LOG_DEBUG, "stat64: %s", path);
- ret = glusterfs_stat (path, (struct stat *)sbuf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "stat64 failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_DEBUG, "stat64 succeeded");
- goto out;
- }
-
- if (real_stat64 != NULL)
- ret = real_stat64 (path, sbuf);
- else if (real___xstat64 != NULL)
- ret = real___xstat64 (0, path, sbuf);
- else {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
-out:
- return ret;
-}
-
-int
-booster_fxstat (int ver, int fd, void *buf)
-{
- struct stat *sbuf = (struct stat *)buf;
- int ret = -1;
- glusterfs_file_t fh = NULL;
-
- gf_log ("booster", GF_LOG_DEBUG, "fxstat: fd %d", fd);
- fh = booster_fdptr_get (booster_fdtable, fd);
- if (!fh) {
- gf_log ("booster", GF_LOG_DEBUG, "Not a booster fd");
- if (real___fxstat == NULL) {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
- ret = real___fxstat (ver, fd, sbuf);
- } else {
- gf_log ("booster", GF_LOG_DEBUG, "Is a booster fd");
- ret = glusterfs_fstat (fh, sbuf);
- booster_fdptr_put (fh);
- }
-
-out:
- return ret;
-}
-
-int
-booster_fxstat64 (int ver, int fd, void *buf)
-{
- int ret = -1;
- struct stat64 *sbuf = (struct stat64 *)buf;
- glusterfs_file_t fh = NULL;
-
- gf_log ("booster", GF_LOG_DEBUG, "fxstat64: fd %d", fd);
- fh = booster_fdptr_get (booster_fdtable, fd);
- if (!fh) {
- gf_log ("booster", GF_LOG_DEBUG, "Not a booster fd");
- if (real___fxstat64 == NULL) {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
- ret = real___fxstat64 (ver, fd, sbuf);
- } else {
- gf_log ("booster", GF_LOG_DEBUG, "Is a booster fd");
- ret = glusterfs_fstat (fh, (struct stat *)sbuf);
- booster_fdptr_put (fh);
- }
-
-out:
- return ret;
-}
-
-int
-booster_fstat (int fd, void *buf)
-{
- struct stat *sbuf = (struct stat *)buf;
- int ret = -1;
- glusterfs_file_t fh = NULL;
-
- gf_log ("booster", GF_LOG_DEBUG, "fstat: fd %d", fd);
- fh = booster_fdptr_get (booster_fdtable, fd);
- if (!fh) {
- gf_log ("booster", GF_LOG_DEBUG, "Not a booster fd");
- if (real_fstat != NULL)
- ret = real_fstat (fd, sbuf);
- else if (real___fxstat != NULL)
- ret = real___fxstat (0, fd, sbuf);
- else {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
- } else {
- gf_log ("booster", GF_LOG_DEBUG, "Is a booster fd");
- ret = glusterfs_fstat (fh, sbuf);
- booster_fdptr_put (fh);
- }
-
-out:
- return ret;
-}
-
-int
-booster_fstat64 (int fd, void *buf)
-{
- int ret = -1;
- struct stat64 *sbuf = (struct stat64 *)buf;
- glusterfs_file_t fh = NULL;
-
- gf_log ("booster", GF_LOG_DEBUG, "fstat64: fd %d", fd);
- fh = booster_fdptr_get (booster_fdtable, fd);
- if (!fh) {
- gf_log ("booster", GF_LOG_DEBUG, "Not a booster fd");
- if (real_fstat64 != NULL)
- ret = real_fstat64 (fd, sbuf);
- else if (real___fxstat64 != NULL)
- /* Not sure how portable the use of 0 for
- * version number is but it works over glibc.
- * We need this because, I've
- * observed that all the above real* functors can be
- * NULL. In that case, this is our last and only option.
- */
- ret = real___fxstat64 (0, fd, sbuf);
- else {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
- } else {
- gf_log ("booster", GF_LOG_DEBUG, "Is a booster fd");
- ret = glusterfs_fstat (fh, (struct stat *)sbuf);
- booster_fdptr_put (fh);
- }
-
-out:
- return ret;
-}
-
-int
-booster_lxstat (int ver, const char *path, void *buf)
-{
- struct stat *sbuf = (struct stat *)buf;
- int ret = -1;
-
- gf_log ("booster", GF_LOG_DEBUG, "lxstat: path %s", path);
- ret = glusterfs_lstat (path, sbuf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "lxstat failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_DEBUG, "lxstat succeeded");
- goto out;
- }
-
- if (real___lxstat == NULL) {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
-
- ret = real___lxstat (ver, path, sbuf);
-out:
- return ret;
-}
-
-int
-booster_lxstat64 (int ver, const char *path, void *buf)
-{
- int ret = -1;
- struct stat64 *sbuf = (struct stat64 *)buf;
-
- gf_log ("booster", GF_LOG_DEBUG, "lxstat64: path %s", path);
- ret = glusterfs_lstat (path, (struct stat *)sbuf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "lxstat64 failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_DEBUG, "lxstat64 succeeded");
- goto out;
- }
-
- if (real___lxstat64 == NULL) {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
- ret = real___lxstat64 (ver, path, sbuf);
-out:
- return ret;
-}
-
-int
-booster_lstat (const char *path, void *buf)
-{
- struct stat *sbuf = (struct stat *)buf;
- int ret = -1;
-
- gf_log ("booster", GF_LOG_DEBUG, "lstat: path %s", path);
- ret = glusterfs_lstat (path, sbuf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "lstat failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_DEBUG, "lstat succeeded");
- goto out;
- }
-
- if (real_lstat != NULL)
- ret = real_lstat (path, sbuf);
- else if (real___lxstat != NULL)
- ret = real___lxstat (0, path, sbuf);
- else {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
-
-out:
- return ret;
-}
-
-int
-booster_lstat64 (const char *path, void *buf)
-{
- int ret = -1;
- struct stat64 *sbuf = (struct stat64 *)buf;
-
- gf_log ("booster", GF_LOG_DEBUG, "lstat64: path %s", path);
- ret = glusterfs_lstat (path, (struct stat *)sbuf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "lstat64 failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_DEBUG, "lstat64 succeeded");
- goto out;
- }
-
- if (real_lstat64 != NULL)
- ret = real_lstat64 (path, sbuf);
- else if (real___lxstat64 != NULL)
- ret = real___lxstat64 (0, path, sbuf);
- else {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
-out:
- return ret;
-}
-
-int
-booster_statfs (const char *pathname, struct statfs *buf)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_DEBUG, "statfs: path %s", pathname);
- ret = glusterfs_statfs (pathname, buf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "statfs failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_DEBUG, "statfs succeeded");
- goto out;
- }
-
- if (real_statfs == NULL) {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
-
- ret = real_statfs (pathname, buf);
-
-out:
- return ret;
-}
-
-int
-booster_statfs64 (const char *pathname, struct statfs64 *buf)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_DEBUG, "stat64: path %s", pathname);
- ret = glusterfs_statfs (pathname, (struct statfs *)buf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "statfs64 failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_DEBUG, "statfs64 succeeded");
- goto out;
- }
-
- if (real_statfs64 == NULL) {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
-
- ret = real_statfs64 (pathname, buf);
-
-out:
- return ret;
-}
-
-int
-booster_statvfs (const char *pathname, struct statvfs *buf)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_DEBUG, "statvfs: path %s", pathname);
- ret = glusterfs_statvfs (pathname, buf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "statvfs failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_DEBUG, "statvfs succeeded");
- goto out;
- }
-
- if (real_statvfs == NULL) {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
-
- ret = real_statvfs (pathname, buf);
-
-out:
- return ret;
-}
-
-int
-booster_statvfs64 (const char *pathname, struct statvfs64 *buf)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_DEBUG, "statvfs64: path %s", pathname);
- ret = glusterfs_statvfs (pathname, (struct statvfs *)buf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "statvfs64 failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_DEBUG, "statvfs64 succeeded");
- goto out;
- }
-
- if (real_statvfs64 == NULL) {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
-
- ret = real_statvfs64 (pathname, buf);
-
-out:
- return ret;
-}
-
-ssize_t
-getxattr (const char *path, const char *name, void *value, size_t size)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_DEBUG, "getxattr: path %s, name %s", path,
- name);
- ret = glusterfs_getxattr (path, name, value, size);
- if ((ret == -1) && (ret != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "getxattr failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret > 0) {
- gf_log ("booster", GF_LOG_DEBUG, "getxattr succeeded");
- return ret;
- }
-
- if (real_getxattr == NULL) {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
-
- ret = real_getxattr (path, name, value, size);
-out:
- return ret;
-}
-
-
-ssize_t
-lgetxattr (const char *path, const char *name, void *value, size_t size)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_DEBUG, "lgetxattr: path %s, name %s", path,
- name);
- ret = glusterfs_lgetxattr (path, name, value, size);
- if ((ret == -1) && (ret != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "lgetxattr failed: %s",
- strerror (errno));
-
- return ret;
- }
-
- if (ret > 0) {
- gf_log ("booster", GF_LOG_DEBUG, "lgetxattr succeeded");
- return ret;
- }
-
- if (real_lgetxattr == NULL) {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
-
- ret = real_lgetxattr (path, name, value, size);
-out:
- return ret;
-}
-
-int
-remove (const char *path)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_DEBUG, "remove: %s", path);
- ret = glusterfs_remove (path);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "remove failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_DEBUG, "remove succeeded");
- goto out;
- }
-
- if (real_remove == NULL) {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
- ret = real_remove (path);
-
-out:
- return ret;
-}
-
-int
-lchown (const char *path, uid_t owner, gid_t group)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_DEBUG, "lchown: path %s", path);
- ret = glusterfs_lchown (path, owner, group);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "lchown failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_ERROR, "lchown succeeded");
- goto out;
- }
-
- if (real_lchown == NULL) {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
- ret = real_lchown (path, owner, group);
-
-out:
- return ret;
-}
-
-void
-booster_rewinddir (DIR *dir)
-{
- struct booster_dir_handle *bh = (struct booster_dir_handle *)dir;
-
- if (!bh) {
- errno = EFAULT;
- goto out;
- }
-
- if (bh->type == BOOSTER_GL_DIR) {
- gf_log ("booster", GF_LOG_DEBUG, "rewinddir on glusterfs");
- glusterfs_rewinddir ((glusterfs_dir_t)bh->dirh);
- } else if (bh->type == BOOSTER_POSIX_DIR) {
- if (real_rewinddir == NULL) {
- errno = ENOSYS;
- goto out;
- }
- gf_log ("booster", GF_LOG_DEBUG, "rewinddir on posix");
- real_rewinddir ((DIR *)bh->dirh);
- } else
- errno = EINVAL;
-out:
- return;
-}
-
-void
-booster_seekdir (DIR *dir, off_t offset)
-{
- struct booster_dir_handle *bh = (struct booster_dir_handle *)dir;
-
- if (!bh) {
- errno = EFAULT;
- goto out;
- }
-
- if (bh->type == BOOSTER_GL_DIR) {
- gf_log ("booster", GF_LOG_DEBUG, "seekdir on glusterfs");
- glusterfs_seekdir ((glusterfs_dir_t)bh->dirh, offset);
- } else if (bh->type == BOOSTER_POSIX_DIR) {
- if (real_seekdir == NULL) {
- errno = ENOSYS;
- goto out;
- }
-
- gf_log ("booster", GF_LOG_DEBUG, "seekdir on posix");
- real_seekdir ((DIR *)bh->dirh, offset);
- } else
- errno = EINVAL;
-out:
- return;
-}
-
-off_t
-booster_telldir (DIR *dir)
-{
- struct booster_dir_handle *bh = (struct booster_dir_handle *)dir;
- off_t offset = -1;
-
- if (!bh) {
- errno = EFAULT;
- goto out;
- }
-
- if (bh->type == BOOSTER_GL_DIR) {
- gf_log ("booster", GF_LOG_DEBUG, "telldir on glusterfs");
- offset = glusterfs_telldir ((glusterfs_dir_t)bh->dirh);
- } else if (bh->type == BOOSTER_POSIX_DIR) {
- if (real_telldir == NULL) {
- errno = ENOSYS;
- goto out;
- }
-
- gf_log ("booster", GF_LOG_DEBUG, "telldir on posix");
- offset = real_telldir ((DIR *)bh->dirh);
- } else
- errno = EINVAL;
-out:
- return offset;
-}
-
-
-pid_t
-fork (void)
-{
- pid_t pid = 0;
- char child = 0;
-
- glusterfs_log_lock ();
- {
- pid = real_fork ();
- }
- glusterfs_log_unlock ();
-
- child = (pid == 0);
- if (child) {
- booster_cleanup ();
- booster_init ();
- }
-
- return pid;
-}
-
-ssize_t
-sendfile (int out_fd, int in_fd, off_t *offset, size_t count)
-{
- glusterfs_file_t in_fh = NULL;
- ssize_t ret = -1;
-
- gf_log ("booster", GF_LOG_DEBUG, "sendfile: in fd %d, out fd %d, offset"
- " %"PRIu64", count %d", in_fd, out_fd, *offset, count);
- /*
- * handle sendfile in booster only if in_fd corresponds to a glusterfs
- * file handle
- */
- in_fh = booster_fdptr_get (booster_fdtable, in_fd);
- if (!in_fh) {
- gf_log ("booster", GF_LOG_DEBUG, "Not a booster fd");
- if (real_sendfile == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else {
- ret = real_sendfile (out_fd, in_fd, offset, count);
- }
- } else {
- gf_log ("booster", GF_LOG_DEBUG, "Is a booster fd");
- ret = glusterfs_sendfile (out_fd, in_fh, offset, count);
- booster_fdptr_put (in_fh);
- }
-
- return ret;
-}
-
-ssize_t
-sendfile64 (int out_fd, int in_fd, off_t *offset, size_t count)
-{
- glusterfs_file_t in_fh = NULL;
- ssize_t ret = -1;
-
- gf_log ("booster", GF_LOG_DEBUG, "sendfile64: in fd %d, out fd %d,"
- " offset %"PRIu64", count %d", in_fd, out_fd, *offset, count);
- /*
- * handle sendfile in booster only if in_fd corresponds to a glusterfs
- * file handle
- */
- in_fh = booster_fdptr_get (booster_fdtable, in_fd);
- if (!in_fh) {
- gf_log ("booster", GF_LOG_DEBUG, "Not a booster fd");
- if (real_sendfile64 == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else {
- ret = real_sendfile64 (out_fd, in_fd, offset, count);
- }
- } else {
- gf_log ("booster", GF_LOG_DEBUG, "Is a booster fd");
- ret = glusterfs_sendfile (out_fd, in_fh, offset, count);
- booster_fdptr_put (in_fh);
- }
-
- return ret;
-}
-
-
-int
-fcntl (int fd, int cmd, ...)
-{
- va_list ap;
- int ret = -1;
- long arg = 0;
- struct flock *lock = NULL;
- glusterfs_file_t glfs_fd = 0;
-
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
-
- gf_log ("booster", GF_LOG_DEBUG, "fcntl: fd %d, cmd %d", fd, cmd);
- switch (cmd) {
- case F_DUPFD:
- /*
- * FIXME: Consider this case when implementing F_DUPFD, F_GETFD
- * etc flags in libglusterfsclient. Commenting it out for
- * timebeing since it is defined only in linux kernel
- * versions >= 2.6.24.
- */
- /* case F_DUPFD_CLOEXEC: */
- case F_GETFD:
- case F_GETFL:
- case F_GETOWN:
- case F_GETSIG:
- case F_GETLEASE:
- if (glfs_fd) {
- gf_log ("booster", GF_LOG_DEBUG, "Is a booster fd");
- ret = glusterfs_fcntl (glfs_fd, cmd);
- } else {
- if (!real_fcntl) {
- errno = ENOSYS;
- goto out;
- }
-
- gf_log ("booster", GF_LOG_DEBUG, "Not a booster fd");
- ret = real_fcntl (fd, cmd);
- }
- break;
-
- case F_SETFD:
- case F_SETFL:
- case F_SETOWN:
- case F_SETSIG:
- case F_SETLEASE:
- case F_NOTIFY:
- va_start (ap, cmd);
- arg = va_arg (ap, long);
- va_end (ap);
-
- if (glfs_fd) {
- gf_log ("booster", GF_LOG_DEBUG, "Is a booster fd");
- ret = glusterfs_fcntl (glfs_fd, cmd, arg);
- } else {
- if (!real_fcntl) {
- errno = ENOSYS;
- goto out;
- }
-
- gf_log ("booster", GF_LOG_DEBUG, "Not a booster fd");
- ret = real_fcntl (fd, cmd, arg);
- }
- break;
-
- case F_GETLK:
- case F_SETLK:
- case F_SETLKW:
-#if F_GETLK != F_GETLK64
- case F_GETLK64:
-#endif
-#if F_SETLK != F_SETLK64
- case F_SETLK64:
-#endif
-#if F_SETLKW != F_SETLKW64
- case F_SETLKW64:
-#endif
- va_start (ap, cmd);
- lock = va_arg (ap, struct flock *);
- va_end (ap);
-
- if (lock == NULL) {
- errno = EINVAL;
- goto out;
- }
-
- if (glfs_fd) {
- gf_log ("booster", GF_LOG_DEBUG, "Is a booster fd");
- ret = glusterfs_fcntl (glfs_fd, cmd, lock);
- } else {
- if (!real_fcntl) {
- errno = ENOSYS;
- goto out;
- }
-
- gf_log ("booster", GF_LOG_DEBUG, "Not a booster fd");
- ret = real_fcntl (fd, cmd, lock);
- }
- break;
-
- default:
- errno = EINVAL;
- break;
- }
-
-out:
- if (glfs_fd) {
- booster_fdptr_put (glfs_fd);
- }
-
- return ret;
-}
-
-void
-booster_lib_init (void)
-{
-
- RESOLVE (open);
- RESOLVE (open64);
- RESOLVE (creat);
-
- RESOLVE (read);
- RESOLVE (readv);
- RESOLVE (pread);
- RESOLVE (pread64);
-
- RESOLVE (write);
- RESOLVE (writev);
- RESOLVE (pwrite);
- RESOLVE (pwrite64);
-
- RESOLVE (lseek);
- RESOLVE (lseek64);
-
- RESOLVE (close);
-
- RESOLVE (dup);
- RESOLVE (dup2);
-
- RESOLVE (fork);
- RESOLVE (mkdir);
- RESOLVE (rmdir);
- RESOLVE (chmod);
- RESOLVE (chown);
- RESOLVE (fchmod);
- RESOLVE (fchown);
- RESOLVE (fsync);
- RESOLVE (ftruncate);
- RESOLVE (link);
- RESOLVE (rename);
- RESOLVE (utimes);
- RESOLVE (utime);
- RESOLVE (mknod);
- RESOLVE (mkfifo);
- RESOLVE (unlink);
- RESOLVE (symlink);
- RESOLVE (readlink);
- RESOLVE (realpath);
- RESOLVE (opendir);
- RESOLVE (readdir);
- RESOLVE (closedir);
- RESOLVE (__xstat);
- RESOLVE (__xstat64);
- RESOLVE (stat);
- RESOLVE (stat64);
- RESOLVE (__fxstat);
- RESOLVE (__fxstat64);
- RESOLVE (fstat);
- RESOLVE (fstat64);
- RESOLVE (__lxstat);
- RESOLVE (__lxstat64);
- RESOLVE (lstat);
- RESOLVE (lstat64);
- RESOLVE (statfs);
- RESOLVE (statfs64);
- RESOLVE (statvfs);
- RESOLVE (statvfs64);
- RESOLVE (getxattr);
- RESOLVE (lgetxattr);
- RESOLVE (remove);
- RESOLVE (lchown);
- RESOLVE (rewinddir);
- RESOLVE (seekdir);
- RESOLVE (telldir);
- RESOLVE (sendfile);
- RESOLVE (sendfile64);
- RESOLVE (readdir_r);
- RESOLVE (readdir64_r);
- RESOLVE (fcntl);
-
- /* This must be called after resolving real functions
- * above so that the socket based IO calls in libglusterfsclient
- * can fall back to a non-NULL real_XXX function pointer.
- * Calling booster_init before resolving the names above
- * results in seg-faults because the function symbols above are NULL.
- */
- booster_init ();
-}
-
diff --git a/booster/src/booster_fstab.c b/booster/src/booster_fstab.c
deleted file mode 100644
index 202249cad..000000000
--- a/booster/src/booster_fstab.c
+++ /dev/null
@@ -1,452 +0,0 @@
-/* Utilities for reading/writing fstab, mtab, etc.
- Copyright (C) 1995-2000, 2001, 2002, 2003, 2006
- Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
-
-#include <alloca.h>
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include "booster_fstab.h"
-#include <stdlib.h>
-#include <libglusterfsclient.h>
-#include <errno.h>
-
-/* The default timeout for inode and stat cache. */
-#define BOOSTER_DEFAULT_ATTR_TIMEO 5 /* In Secs */
-
-/* Prepare to begin reading and/or writing mount table entries from the
- beginning of FILE. MODE is as for `fopen'. */
-glusterfs_fstab_t *
-glusterfs_fstab_init (const char *file, const char *mode)
-{
- glusterfs_fstab_t *handle = NULL;
- handle = calloc (1, sizeof (glusterfs_fstab_t));
- if (!handle) {
- gf_log ("booster-fstab", GF_LOG_ERROR, "Memory allocation"
- " failed");
- goto out;
- }
-
- gf_log ("booster-fstab", GF_LOG_DEBUG, "FSTAB file: %s", file);
- FILE *result = fopen (file,mode);
- if (result != NULL) {
- handle->fp = result;
- } else {
- gf_log ("booster-fstab", GF_LOG_ERROR, "FSTAB file open failed:"
- " %s", strerror (errno));
- free (handle);
- handle = NULL;
- }
-
-out:
-
- return handle;
-}
-
-int
-glusterfs_fstab_close (glusterfs_fstab_t *h)
-{
- if (!h)
- return -1;
-
- if (h->fp)
- fclose (h->fp);
-
- return 0;
-}
-
-/* Since the values in a line are separated by spaces, a name cannot
- contain a space. Therefore some programs encode spaces in names
- by the strings "\040". We undo the encoding when reading an entry.
- The decoding happens in place. */
-static char *
-decode_name (char *buf)
-{
- char *rp = buf;
- char *wp = buf;
-
- do
- if (rp[0] == '\\' && rp[1] == '0' && rp[2] == '4'
- && rp[3] == '0')
- {
- /* \040 is a SPACE. */
- *wp++ = ' ';
- rp += 3;
- }
- else if (rp[0] == '\\' && rp[1] == '0' && rp[2] == '1'
- && rp[3] == '1')
- {
- /* \011 is a TAB. */
- *wp++ = '\t';
- rp += 3;
- }
- else if (rp[0] == '\\' && rp[1] == '0' && rp[2] == '1'
- && rp[3] == '2')
- {
- /* \012 is a NEWLINE. */
- *wp++ = '\n';
- rp += 3;
- }
- else if (rp[0] == '\\' && rp[1] == '\\')
- {
- /* We have to escape \\ to be able to represent all
- * characters. */
- *wp++ = '\\';
- rp += 1;
- }
- else if (rp[0] == '\\' && rp[1] == '1' && rp[2] == '3'
- && rp[3] == '4')
- {
- /* \134 is also \\. */
- *wp++ = '\\';
- rp += 3;
- }
- else
- *wp++ = *rp;
- while (*rp++ != '\0');
-
- return buf;
-}
-
-
-/* Read one mount table entry from STREAM. Returns a pointer to storage
- reused on the next call, or null for EOF or error (use feof/ferror to
- check). */
-struct glusterfs_mntent *
-__glusterfs_fstab_getent (FILE *stream, struct glusterfs_mntent *mp,
- char *buffer, int bufsiz)
-{
- char *cp;
- char *head;
-
- do
- {
- char *end_ptr;
-
- if (fgets (buffer, bufsiz, stream) == NULL)
- {
- return NULL;
- }
-
- end_ptr = strchr (buffer, '\n');
- if (end_ptr != NULL) /* chop newline */
- *end_ptr = '\0';
- else
- {
- /* Not the whole line was read. Do it now but forget
- * it. */
- char tmp[1024];
- while (fgets (tmp, sizeof tmp, stream) != NULL)
- if (strchr (tmp, '\n') != NULL)
- break;
- }
-
- head = buffer + strspn (buffer, " \t");
- /* skip empty lines and comment lines: */
- }
- while (head[0] == '\0' || head[0] == '#');
-
- cp = strsep (&head, " \t");
- mp->mnt_fsname = cp != NULL ? decode_name (cp) : (char *) "";
- if (head)
- head += strspn (head, " \t");
- cp = strsep (&head, " \t");
- mp->mnt_dir = cp != NULL ? decode_name (cp) : (char *) "";
- if (head)
- head += strspn (head, " \t");
- cp = strsep (&head, " \t");
- mp->mnt_type = cp != NULL ? decode_name (cp) : (char *) "";
- if (head)
- head += strspn (head, " \t");
- cp = strsep (&head, " \t");
- mp->mnt_opts = cp != NULL ? decode_name (cp) : (char *) "";
- switch (head ? sscanf (head, " %d %d ", &mp->mnt_freq,
- &mp->mnt_passno) : 0)
- {
- case 0:
- mp->mnt_freq = 0;
- case 1:
- mp->mnt_passno = 0;
- case 2:
- break;
- }
-
- return mp;
-}
-
-struct glusterfs_mntent *
-glusterfs_fstab_getent (glusterfs_fstab_t *h)
-{
- if (!h)
- return NULL;
-
- if (!h->fp)
- return NULL;
-
- return __glusterfs_fstab_getent (h->fp, &h->tmpent, h->buf,
- GF_MNTENT_BUFSIZE);
-}
-
-/* We have to use an encoding for names if they contain spaces or tabs.
- To be able to represent all characters we also have to escape the
- backslash itself. This "function" must be a macro since we use
- `alloca'. */
-#define encode_name(name) \
- do { \
- const char *rp = name; \
- \
- while (*rp != '\0') \
- if (*rp == ' ' || *rp == '\t' || *rp == '\\') \
- break; \
- else \
- ++rp; \
- \
- if (*rp != '\0') \
- { \
- /* In the worst case the length of the string \
- * can increase to four times the current \
- * length. */ \
- char *wp; \
- \
- rp = name; \
- name = wp = (char *) alloca (strlen (name) * 4 + 1); \
- \
- do { \
- if (*rp == ' ') \
- { \
- *wp++ = '\\'; \
- *wp++ = '0'; \
- *wp++ = '4'; \
- *wp++ = '0'; \
- } \
- else if (*rp == '\t') \
- { \
- *wp++ = '\\'; \
- *wp++ = '0'; \
- *wp++ = '1'; \
- *wp++ = '1'; \
- } \
- else if (*rp == '\n') \
- { \
- *wp++ = '\\'; \
- *wp++ = '0'; \
- *wp++ = '1'; \
- *wp++ = '2'; \
- } \
- else if (*rp == '\\') \
- { \
- *wp++ = '\\'; \
- *wp++ = '\\'; \
- } \
- else \
- *wp++ = *rp; \
- } while (*rp++ != '\0'); \
- } \
- } while (0) \
-
-
-int
-glusterfs_fstab_addent (glusterfs_fstab_t *h,
- const struct glusterfs_mntent *mnt)
-{
- struct glusterfs_mntent mntcopy = *mnt;
- if (!h)
- return -1;
-
- if (!h->fp)
- return -1;
-
- if (fseek (h->fp, 0, SEEK_END))
- return -1;
-
- /* Encode spaces and tabs in the names. */
- encode_name (mntcopy.mnt_fsname);
- encode_name (mntcopy.mnt_dir);
- encode_name (mntcopy.mnt_type);
- encode_name (mntcopy.mnt_opts);
-
- return (fprintf (h->fp, "%s %s %s %s %d %d\n",
- mntcopy.mnt_fsname,
- mntcopy.mnt_dir,
- mntcopy.mnt_type,
- mntcopy.mnt_opts,
- mntcopy.mnt_freq,
- mntcopy.mnt_passno)
- < 0 ? 1 : 0);
-}
-
-
-/* Search MNT->mnt_opts for an option matching OPT.
- Returns the address of the substring, or null if none found. */
-char *
-glusterfs_fstab_hasoption (const struct glusterfs_mntent *mnt, const char *opt)
-{
- const size_t optlen = strlen (opt);
- char *rest = mnt->mnt_opts, *p;
-
- while ((p = strstr (rest, opt)) != NULL)
- {
- if ((p == rest || p[-1] == ',')
- && (p[optlen] == '\0' || p[optlen] == '=' || p[optlen] == ','))
- return p;
-
- rest = strchr (p, ',');
- if (rest == NULL)
- break;
- ++rest;
- }
-
- return NULL;
-}
-
-void
-clean_init_params (glusterfs_init_params_t *ipars)
-{
- if (!ipars)
- return;
-
- if (ipars->volume_name)
- free (ipars->volume_name);
-
- if (ipars->specfile)
- free (ipars->specfile);
-
- if (ipars->logfile)
- free (ipars->logfile);
-
- if (ipars->loglevel)
- free (ipars->loglevel);
-
- return;
-}
-
-char *
-get_option_value (char *opt)
-{
- char *val = NULL;
- char *saveptr = NULL;
- char *copy_opt = NULL;
- char *retval = NULL;
-
- copy_opt = strdup (opt);
-
- /* Get the = before the value of the option. */
- val = index (copy_opt, '=');
- if (val) {
- /* Move to start of option */
- ++val;
-
- /* Now, to create a '\0' delimited string out of the
- * options string, first get the position where the
- * next option starts, that would be the next ','.
- */
- saveptr = index (val, ',');
- if (saveptr)
- *saveptr = '\0';
- retval = strdup (val);
- }
-
- free (copy_opt);
-
- return retval;
-}
-
-void
-booster_mount (struct glusterfs_mntent *ent)
-{
- char *opt = NULL;
- glusterfs_init_params_t ipars;
- time_t timeout = BOOSTER_DEFAULT_ATTR_TIMEO;
- char *timeostr = NULL;
- char *endptr = NULL;
-
- if (!ent)
- return;
-
- gf_log ("booster-fstab", GF_LOG_DEBUG, "Mount entry: volfile: %s,"
- " VMP: %s, Type: %s, Options: %s", ent->mnt_fsname,
- ent->mnt_dir, ent->mnt_type, ent->mnt_opts);
- if ((strcmp (ent->mnt_type, "glusterfs") != 0)) {
- gf_log ("booster-fstab", GF_LOG_ERROR, "Type is not glusterfs");
- return;
- }
-
- memset (&ipars, 0, sizeof (glusterfs_init_params_t));
- if (ent->mnt_fsname)
- ipars.specfile = strdup (ent->mnt_fsname);
-
- opt = glusterfs_fstab_hasoption (ent, "subvolume");
- if (opt)
- ipars.volume_name = get_option_value (opt);
-
- opt = glusterfs_fstab_hasoption (ent, "log-file");
- if (!opt)
- opt = glusterfs_fstab_hasoption (ent, "logfile");
-
- if (opt)
- ipars.logfile = get_option_value (opt);
-
- opt = glusterfs_fstab_hasoption (ent, "log-level");
- if (!opt)
- opt = glusterfs_fstab_hasoption (ent, "loglevel");
-
- if (opt)
- ipars.loglevel = get_option_value (opt);
-
- /* Attribute cache timeout */
- opt = glusterfs_fstab_hasoption (ent, "attr_timeout");
- if (opt) {
- timeostr = get_option_value (opt);
- if (timeostr)
- timeout = strtol (timeostr, &endptr, 10);
- }
-
- ipars.lookup_timeout = timeout;
- ipars.stat_timeout = timeout;
-
- if ((glusterfs_mount (ent->mnt_dir, &ipars)) == -1)
- gf_log ("booster-fstab", GF_LOG_ERROR, "VMP mounting failed");
-
- clean_init_params (&ipars);
-}
-
-int
-booster_configure (char *confpath)
-{
- int ret = -1;
- glusterfs_fstab_t *handle = NULL;
- struct glusterfs_mntent *ent = NULL;
-
- if (!confpath)
- goto out;
-
- handle = glusterfs_fstab_init (confpath, "r");
- if (!handle)
- goto out;
-
- while ((ent = glusterfs_fstab_getent (handle)) != NULL)
- booster_mount (ent);
-
- glusterfs_fstab_close (handle);
- ret = 0;
-out:
- return ret;
-}
-
-
diff --git a/booster/src/booster_fstab.h b/booster/src/booster_fstab.h
deleted file mode 100644
index 9bab04c5a..000000000
--- a/booster/src/booster_fstab.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/* Utilities for reading/writing fstab, mtab, etc.
- Copyright (C) 1995, 1996, 1997, 1998, 1999 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
-
-#ifndef GLUSTERFS_FSTAB_MNTENT_H
-#define GLUSTERFS_FSTAB_MNTENT_H 1
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "compat.h"
-
-/* General filesystem types. */
-#define GF_MNTTYPE_IGNORE "ignore" /* Ignore this entry. */
-#define GF_MNTTYPE_NFS "nfs" /* Network file system. */
-#define GF_MNTTYPE_SWAP "swap" /* Swap device. */
-
-
-/* Generic mount options. */
-#define GF_MNTOPT_DEFAULTS "defaults" /* Use all default options. */
-#define GF_MNTOPT_RO "ro" /* Read only. */
-#define GF_MNTOPT_RW "rw" /* Read/write. */
-#define GF_MNTOPT_SUID "suid" /* Set uid allowed. */
-#define GF_MNTOPT_NOSUID "nosuid" /* No set uid allowed. */
-#define GF_MNTOPT_NOAUTO "noauto" /* Do not auto mount. */
-
-
-/* Structure describing a mount table entry. */
-struct glusterfs_mntent
-{
- char *mnt_fsname; /* Device or server for filesystem. */
- char *mnt_dir; /* Directory mounted on. */
- char *mnt_type; /* Type of filesystem: ufs, nfs, etc. */
- char *mnt_opts; /* Comma-separated options for fs. */
- int mnt_freq; /* Dump frequency (in days). */
- int mnt_passno; /* Pass number for `fsck'. */
-};
-
-#define GF_MNTENT_BUFSIZE 1024
-typedef struct glusterfs_fstab_handle {
- FILE *fp;
- char buf[GF_MNTENT_BUFSIZE];
- struct glusterfs_mntent tmpent;
-}glusterfs_fstab_t;
-
-
-/* Prepare to begin reading and/or writing mount table entries from the
- beginning of FILE. MODE is as for `fopen'. */
-extern glusterfs_fstab_t *glusterfs_fstab_init (const char *file,
- const char *mode);
-
-extern struct glusterfs_mntent *glusterfs_fstab_getent (glusterfs_fstab_t *h);
-
-/* Write the mount table entry described by MNT to STREAM.
- Return zero on success, nonzero on failure. */
-extern int glusterfs_fstab_addent (glusterfs_fstab_t *h,
- const struct glusterfs_mntent *mnt);
-
-/* Close a stream opened with `glusterfs_fstab_init'. */
-extern int glusterfs_fstab_close (glusterfs_fstab_t *h);
-
-/* Search MNT->mnt_opts for an option matching OPT.
- Returns the address of the substring, or null if none found. */
-extern char *glusterfs_fstab_hasoption (const struct glusterfs_mntent *mnt,
- const char *opt);
-
-#endif
diff --git a/booster/src/booster_stat.c b/booster/src/booster_stat.c
deleted file mode 100644
index 97a4f28a4..000000000
--- a/booster/src/booster_stat.c
+++ /dev/null
@@ -1,200 +0,0 @@
-/*
- Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include <sys/types.h>
-
-extern int
-booster_stat (const char *path, void *buf);
-
-extern int
-booster_stat64 (const char *path, void *buf);
-
-extern int
-booster_xstat (int ver, const char *path, void *buf);
-
-extern int
-booster_xstat64 (int ver, const char *path, void *buf);
-
-extern int
-booster_fxstat (int ver, int fd, void *buf);
-extern int
-booster_fxstat64 (int ver, int fd, void *buf);
-extern int
-booster_fstat (int fd, void *buf);
-extern int
-booster_fstat64 (int fd, void *buf);
-
-extern int
-booster_lstat (const char *path, void *buf);
-extern int
-booster_lstat64 (const char *path, void *buf);
-extern int
-booster_lxstat (int ver, const char *path, void *buf);
-extern int
-booster_lxstat64 (int ver, const char *path, void *buf);
-
-
-extern int
-booster_statfs (const char *path, void *buf);
-extern int
-booster_statfs64 (const char *path, void *buf);
-
-extern int
-booster_statvfs (const char *path, void *buf);
-
-extern int
-booster_statvfs64 (const char *path, void *buf);
-
-extern void *
-booster_readdir (void *dir);
-
-extern void
-booster_rewinddir (void *dir);
-
-extern void
-booster_seekdir (void *dir, off_t offset);
-
-extern off_t
-booster_telldir (void *dir);
-
-int
-stat (const char *path, void *buf)
-{
- return booster_stat (path, buf);
-}
-
-int
-stat64 (const char *path, void *buf)
-{
- return booster_stat64 (path, buf);
-}
-
-int
-__xstat (int ver, const char *path, void *buf)
-{
- return booster_xstat (ver, path, buf);
-}
-
-int
-__xstat64 (int ver, const char *path, void *buf)
-{
- return booster_xstat64 (ver, path, buf);
-}
-
-int
-__fxstat (int ver, int fd, void *buf)
-{
- return booster_fxstat (ver, fd, buf);
-}
-
-int
-__fxstat64 (int ver, int fd, void *buf)
-{
- return booster_fxstat64 (ver, fd, buf);
-}
-
-int
-fstat (int fd, void *buf)
-{
- return booster_fstat (fd, buf);
-}
-
-int
-fstat64 (int fd, void *buf)
-{
- return booster_fstat64 (fd, buf);
-}
-
-int
-lstat (const char *path, void *buf)
-{
- return booster_lstat (path, buf);
-}
-
-int
-lstat64 (const char *path, void *buf)
-{
- return booster_lstat64 (path, buf);
-}
-
-int
-__lxstat (int ver, const char *path, void *buf)
-{
- return booster_lxstat (ver, path, buf);
-}
-
-int
-__lxstat64 (int ver, const char *path, void *buf)
-{
- return booster_lxstat64 (ver, path, buf);
-}
-
-int
-statfs (const char *pathname, void *buf)
-{
- return booster_statfs (pathname, buf);
-}
-
-int
-statfs64 (const char *pathname, void *buf)
-{
- return booster_statfs64 (pathname, buf);
-}
-
-int
-statvfs (const char *pathname, void *buf)
-{
- return booster_statvfs (pathname, buf);
-}
-
-int
-statvfs64 (const char *pathname, void *buf)
-{
- return booster_statvfs64 (pathname, buf);
-}
-
-void *
-readdir (void *dir)
-{
- return booster_readdir (dir);
-}
-
-void *
-readdir64 (void *dir)
-{
- return booster_readdir (dir);
-}
-
-void
-rewinddir (void *dir)
-{
- return booster_rewinddir (dir);
-}
-
-void
-seekdir (void *dir, off_t offset)
-{
- return booster_seekdir (dir, offset);
-}
-
-off_t
-telldir (void *dir)
-{
- return booster_telldir (dir);
-}
diff --git a/scheduler/random/Makefile.am b/cli/Makefile.am
index d471a3f92..a985f42a8 100644
--- a/scheduler/random/Makefile.am
+++ b/cli/Makefile.am
@@ -1,3 +1,3 @@
SUBDIRS = src
-CLEANFILES =
+CLEANFILES =
diff --git a/cli/src/Makefile.am b/cli/src/Makefile.am
new file mode 100644
index 000000000..216d1bb55
--- /dev/null
+++ b/cli/src/Makefile.am
@@ -0,0 +1,29 @@
+sbin_PROGRAMS = gluster
+
+gluster_SOURCES = cli.c registry.c input.c cli-cmd.c cli-rl.c \
+ cli-cmd-volume.c cli-cmd-peer.c cli-rpc-ops.c cli-cmd-parser.c\
+ cli-cmd-system.c cli-cmd-misc.c cli-xml-output.c cli-cmd-snapshot.c
+
+gluster_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(GF_LDADD)\
+ $(RLLIBS) $(top_builddir)/rpc/xdr/src/libgfxdr.la \
+ $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \
+ $(GF_GLUSTERFS_LIBS) $(XML_LIBS)
+
+gluster_LDFLAGS = $(GF_LDFLAGS)
+noinst_HEADERS = cli.h cli-mem-types.h cli-cmd.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) \
+ -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/rpc-lib/src\
+ -I$(top_srcdir)/rpc/xdr/src\
+ -DDATADIR=\"$(localstatedir)\" \
+ -DCONFDIR=\"$(sysconfdir)/glusterfs\" \
+ -DGSYNCD_PREFIX=\"$(libexecdir)/glusterfs\"\
+ -DSYNCDAEMON_COMPILE=$(SYNCDAEMON_COMPILE) -DSBIN_DIR=\"$(sbindir)\"\
+ $(XML_CPPFLAGS)
+
+AM_CFLAGS = -Wall $(GF_GLUSTERFS_CFLAGS)
+
+CLEANFILES =
+
+$(top_builddir)/libglusterfs/src/libglusterfs.la:
+ $(MAKE) -C $(top_builddir)/libglusterfs/src/ all
diff --git a/cli/src/cli-cmd-misc.c b/cli/src/cli-cmd-misc.c
new file mode 100644
index 000000000..566d7c978
--- /dev/null
+++ b/cli/src/cli-cmd-misc.c
@@ -0,0 +1,98 @@
+/*
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <pthread.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "cli.h"
+#include "cli-cmd.h"
+#include "cli-mem-types.h"
+#include "protocol-common.h"
+
+extern struct rpc_clnt *global_rpc;
+
+extern rpc_clnt_prog_t *cli_rpc_prog;
+
+extern struct cli_cmd volume_cmds[];
+extern struct cli_cmd cli_probe_cmds[];
+extern struct cli_cmd cli_log_cmds[];
+extern struct cli_cmd cli_system_cmds[];
+extern struct cli_cmd cli_bd_cmds[];
+extern struct cli_cmd snapshot_cmds[];
+struct cli_cmd cli_misc_cmds[];
+
+int
+cli_cmd_quit_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ exit (0);
+}
+
+int
+cli_cmd_display_help (struct cli_state *state, struct cli_cmd_word *in_word,
+ const char **words, int wordcount)
+{
+ struct cli_cmd *cmd[] = {volume_cmds, cli_probe_cmds,
+ cli_misc_cmds, snapshot_cmds,
+ NULL};
+ struct cli_cmd *cmd_ind = NULL;
+ int i = 0;
+
+ /* cli_system_cmds commands for internal usage
+ they are not exposed
+ */
+ for (i=0; cmd[i]!=NULL; i++)
+ for (cmd_ind = cmd[i]; cmd_ind->pattern; cmd_ind++)
+ if (_gf_false == cmd_ind->disable)
+ cli_out ("%s - %s", cmd_ind->pattern,
+ cmd_ind->desc);
+
+ return 0;
+}
+
+struct cli_cmd cli_misc_cmds[] = {
+ { "quit",
+ cli_cmd_quit_cbk,
+ "quit"},
+
+ { "help",
+ cli_cmd_display_help,
+ "display command options"},
+
+ { "exit",
+ cli_cmd_quit_cbk,
+ "exit"},
+
+ { NULL, NULL, NULL }
+};
+
+
+int
+cli_cmd_misc_register (struct cli_state *state)
+{
+ int ret = 0;
+ struct cli_cmd *cmd = NULL;
+
+ for (cmd = cli_misc_cmds; cmd->pattern; cmd++) {
+
+ ret = cli_cmd_register (&state->tree, cmd);
+ if (ret)
+ goto out;
+ }
+out:
+ return ret;
+}
diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c
new file mode 100644
index 000000000..5ab208b8f
--- /dev/null
+++ b/cli/src/cli-cmd-parser.c
@@ -0,0 +1,3682 @@
+/*
+ Copyright (c) 2010-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <pthread.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "cli.h"
+#include "cli-cmd.h"
+#include "cli-mem-types.h"
+#include "dict.h"
+
+#include "protocol-common.h"
+#include "cli1-xdr.h"
+
+#define MAX_SNAP_DESCRIPTION_LEN 1024
+
+struct snap_config_opt_vals_ snap_confopt_vals[] = {
+ {.op_name = "snap-max-hard-limit",
+ .question = "Changing snapshot-max-hard-limit "
+ "will lead to deletion of snapshots "
+ "if they exceed the new limit.\n"
+ "Do you want to continue?"
+ },
+ {.op_name = "snap-max-soft-limit",
+ .question = "Changing snapshot-max-soft-limit "
+ "will lead to deletion of snapshots "
+ "if they exceed the new limit.\n"
+ "Do you want to continue?"
+ },
+ {.op_name = "both",
+ .question = "Changing snapshot-max-hard-limit & "
+ "snapshot-max-soft-limit will lead to "
+ "deletion of snapshots if they exceed "
+ "the new limit.\nDo you want to continue?"
+ },
+ {.op_name = NULL,
+ }
+};
+
+enum cli_snap_config_set_types {
+ GF_SNAP_CONFIG_SET_HARD = 0,
+ GF_SNAP_CONFIG_SET_SOFT = 1,
+ GF_SNAP_CONFIG_SET_BOTH = 2,
+};
+typedef enum cli_snap_config_set_types cli_snap_config_set_types;
+
+static const char *
+id_sel (void *wcon)
+{
+ return (const char *)wcon;
+}
+
+static char *
+str_getunamb (const char *tok, char **opwords)
+{
+ return (char *)cli_getunamb (tok, (void **)opwords, id_sel);
+}
+
+int32_t
+cli_cmd_bricks_parse (const char **words, int wordcount, int brick_index,
+ char **bricks, int *brick_count)
+{
+ int ret = 0;
+ char *tmp_list = NULL;
+ char brick_list[120000] = {0,};
+ char *space = " ";
+ char *delimiter = NULL;
+ char *host_name = NULL;
+ char *free_list_ptr = NULL;
+ char *tmpptr = NULL;
+ int j = 0;
+ int brick_list_len = 0;
+ char *tmp_host = NULL;
+
+ GF_ASSERT (words);
+ GF_ASSERT (wordcount);
+ GF_ASSERT (bricks);
+ GF_ASSERT (brick_index > 0);
+ GF_ASSERT (brick_index < wordcount);
+
+ strncpy (brick_list, space, strlen (space));
+ brick_list_len++;
+ while (brick_index < wordcount) {
+ if (validate_brick_name ((char *)words[brick_index])) {
+ cli_err ("Wrong brick type: %s, use <HOSTNAME>:"
+ "<export-dir-abs-path>", words[brick_index]);
+ ret = -1;
+ goto out;
+ } else {
+ delimiter = strrchr (words[brick_index], ':');
+ ret = gf_canonicalize_path (delimiter + 1);
+ if (ret)
+ goto out;
+ }
+
+ if ((brick_list_len + strlen (words[brick_index]) + 1) > sizeof (brick_list)) {
+ cli_err ("Total brick list is larger than a request. "
+ "Can take (brick_count %d)", *brick_count);
+ ret = -1;
+ goto out;
+ }
+
+ tmp_host = gf_strdup ((char *)words[brick_index]);
+ if (!tmp_host) {
+ gf_log ("cli", GF_LOG_ERROR, "Out of memory");
+ ret = -1;
+ goto out;
+ }
+ get_host_name (tmp_host, &host_name);
+ if (!host_name) {
+ ret = -1;
+ gf_log("cli",GF_LOG_ERROR, "Unable to allocate "
+ "memory");
+ goto out;
+ }
+
+ if (!(strcmp (host_name, "localhost") &&
+ strcmp (host_name, "127.0.0.1") &&
+ strncmp (host_name, "0.", 2))) {
+ cli_err ("Please provide a valid hostname/ip other "
+ "than localhost, 127.0.0.1 or loopback "
+ "address (0.0.0.0 to 0.255.255.255).");
+ ret = -1;
+ GF_FREE (tmp_host);
+ goto out;
+ }
+ if (!valid_internet_address (host_name, _gf_false)) {
+ cli_err ("internet address '%s' does not conform to "
+ "standards", host_name);
+ }
+ GF_FREE (tmp_host);
+ tmp_list = gf_strdup (brick_list + 1);
+ if (free_list_ptr) {
+ GF_FREE (free_list_ptr);
+ free_list_ptr = NULL;
+ }
+ free_list_ptr = tmp_list;
+ j = 0;
+ while(j < *brick_count) {
+ strtok_r (tmp_list, " ", &tmpptr);
+ if (!(strcmp (tmp_list, words[brick_index]))) {
+ ret = -1;
+ cli_err ("Found duplicate"
+ " exports %s",words[brick_index]);
+ goto out;
+ }
+ tmp_list = tmpptr;
+ j++;
+ }
+ strcat (brick_list, words[brick_index]);
+ strcat (brick_list, " ");
+ brick_list_len += (strlen (words[brick_index]) + 1);
+ ++(*brick_count);
+ ++brick_index;
+ }
+
+ *bricks = gf_strdup (brick_list);
+ if (!*bricks)
+ ret = -1;
+out:
+ GF_FREE (free_list_ptr);
+ return ret;
+}
+
+int32_t
+cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options)
+{
+ dict_t *dict = NULL;
+ char *volname = NULL;
+ int ret = -1;
+ gf1_cluster_type type = GF_CLUSTER_TYPE_NONE;
+ int count = 1;
+ int sub_count = 1;
+ int brick_index = 0;
+ int i = 0;
+ char *trans_type = NULL;
+ int32_t index = 0;
+ char *bricks = NULL;
+ int32_t brick_count = 0;
+ char *opwords[] = { "replica", "stripe", "transport", NULL };
+
+ char *invalid_volnames[] = {"volume", "type", "subvolumes", "option",
+ "end-volume", "all", "volume_not_in_ring",
+ "description", "force",
+ "snap-max-hard-limit",
+ "snap-max-soft-limit", NULL};
+ char *w = NULL;
+ int op_count = 0;
+ int32_t replica_count = 1;
+ int32_t stripe_count = 1;
+ gf_boolean_t is_force = _gf_false;
+ int wc = wordcount;
+
+ GF_ASSERT (words);
+ GF_ASSERT (options);
+
+ dict = dict_new ();
+
+ if (!dict)
+ goto out;
+
+ if (wordcount < 3)
+ goto out;
+
+ volname = (char *)words[2];
+
+ GF_ASSERT (volname);
+
+ /* Validate the volume name here itself */
+ {
+ if (volname[0] == '-')
+ goto out;
+
+ for (i = 0; invalid_volnames[i]; i++) {
+ if (!strcmp (volname, invalid_volnames[i])) {
+ cli_err ("\"%s\" cannot be the name of a volume.",
+ volname);
+ goto out;
+ }
+ }
+
+ if (strchr (volname, '/'))
+ goto out;
+
+ if (strlen (volname) > 512)
+ goto out;
+
+ for (i = 0; i < strlen (volname); i++)
+ if (!isalnum (volname[i]) && (volname[i] != '_') && (volname[i] != '-'))
+ goto out;
+ }
+
+ if (wordcount < 4) {
+ ret = -1;
+ goto out;
+ }
+
+ type = GF_CLUSTER_TYPE_NONE;
+ index = 3;
+
+ while (op_count < 3) {
+ ret = -1;
+ w = str_getunamb (words[index], opwords);
+ if (!w) {
+ break;
+ } else if ((strcmp (w, "replica")) == 0) {
+ switch (type) {
+ case GF_CLUSTER_TYPE_STRIPE_REPLICATE:
+ case GF_CLUSTER_TYPE_REPLICATE:
+ cli_err ("replica option given twice");
+ goto out;
+ case GF_CLUSTER_TYPE_NONE:
+ type = GF_CLUSTER_TYPE_REPLICATE;
+ break;
+ case GF_CLUSTER_TYPE_STRIPE:
+ type = GF_CLUSTER_TYPE_STRIPE_REPLICATE;
+ break;
+ }
+
+ if (wordcount < (index+2)) {
+ ret = -1;
+ goto out;
+ }
+ replica_count = strtol (words[index+1], NULL, 0);
+ if (replica_count < 2) {
+ cli_err ("replica count should be greater"
+ " than 1");
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_int32 (dict, "replica-count", replica_count);
+ if (ret)
+ goto out;
+
+ index += 2;
+
+ } else if ((strcmp (w, "stripe")) == 0) {
+ switch (type) {
+ case GF_CLUSTER_TYPE_STRIPE_REPLICATE:
+ case GF_CLUSTER_TYPE_STRIPE:
+ cli_err ("stripe option given twice");
+ goto out;
+ case GF_CLUSTER_TYPE_NONE:
+ type = GF_CLUSTER_TYPE_STRIPE;
+ break;
+ case GF_CLUSTER_TYPE_REPLICATE:
+ type = GF_CLUSTER_TYPE_STRIPE_REPLICATE;
+ break;
+ }
+ if (wordcount < (index + 2)) {
+ ret = -1;
+ goto out;
+ }
+ stripe_count = strtol (words[index+1], NULL, 0);
+ if (stripe_count < 2) {
+ cli_err ("stripe count should be greater"
+ " than 1");
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_int32 (dict, "stripe-count", stripe_count);
+ if (ret)
+ goto out;
+
+ index += 2;
+
+ } else if ((strcmp (w, "transport")) == 0) {
+ if (trans_type) {
+ cli_err ("'transport' option given more"
+ " than one time");
+ goto out;
+ }
+ if ((strcasecmp (words[index+1], "tcp") == 0)) {
+ trans_type = gf_strdup ("tcp");
+ } else if ((strcasecmp (words[index+1], "rdma") == 0)) {
+ trans_type = gf_strdup ("rdma");
+ } else if ((strcasecmp (words[index+1], "tcp,rdma") == 0) ||
+ (strcasecmp (words[index+1], "rdma,tcp") == 0)) {
+ trans_type = gf_strdup ("tcp,rdma");
+ } else {
+ gf_log ("", GF_LOG_ERROR, "incorrect transport"
+ " protocol specified");
+ ret = -1;
+ goto out;
+ }
+ index += 2;
+ } else {
+ GF_ASSERT (!"opword mismatch");
+ ret = -1;
+ goto out;
+ }
+ op_count++;
+ }
+
+ if (!trans_type)
+ trans_type = gf_strdup ("tcp");
+
+ sub_count = stripe_count * replica_count;
+
+ /* reset the count value now */
+ count = 1;
+
+ if (index >= wordcount) {
+ ret = -1;
+ goto out;
+ }
+
+ brick_index = index;
+
+ if (strcmp (words[wordcount - 1], "force") == 0) {
+ is_force = _gf_true;
+ wc = wordcount - 1;
+ }
+
+ ret = cli_cmd_bricks_parse (words, wc, brick_index, &bricks,
+ &brick_count);
+ if (ret)
+ goto out;
+
+ /* If brick-count is not valid when replica or stripe is
+ given, exit here */
+ if (!brick_count) {
+ cli_err ("No bricks specified");
+ ret = -1;
+ goto out;
+ }
+
+ if (brick_count % sub_count) {
+ if (type == GF_CLUSTER_TYPE_STRIPE)
+ cli_err ("number of bricks is not a multiple of "
+ "stripe count");
+ else if (type == GF_CLUSTER_TYPE_REPLICATE)
+ cli_err ("number of bricks is not a multiple of "
+ "replica count");
+ else
+ cli_err ("number of bricks given doesn't match "
+ "required count");
+
+ ret = -1;
+ goto out;
+ }
+
+ /* Everything if parsed fine. start setting info in dict */
+ ret = dict_set_str (dict, "volname", volname);
+ if (ret)
+ goto out;
+
+ ret = dict_set_int32 (dict, "type", type);
+ if (ret)
+ goto out;
+
+ ret = dict_set_dynstr (dict, "transport", trans_type);
+ if (ret)
+ goto out;
+ trans_type = NULL;
+
+ ret = dict_set_dynstr (dict, "bricks", bricks);
+ if (ret)
+ goto out;
+
+ ret = dict_set_int32 (dict, "count", brick_count);
+ if (ret)
+ goto out;
+
+ ret = dict_set_int32 (dict, "force", is_force);
+ if (ret)
+ goto out;
+
+ *options = dict;
+
+out:
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Unable to parse create volume CLI");
+ if (dict)
+ dict_destroy (dict);
+ }
+
+ GF_FREE (trans_type);
+
+ return ret;
+}
+
+int32_t
+cli_cmd_volume_reset_parse (const char **words, int wordcount, dict_t **options)
+{
+ dict_t *dict = NULL;
+ char *volname = NULL;
+ int ret = -1;
+
+ GF_ASSERT (words);
+ GF_ASSERT (options);
+
+ dict = dict_new ();
+
+ if (!dict)
+ goto out;
+
+ if (wordcount < 3)
+ goto out;
+
+ if (wordcount > 5)
+ goto out;
+
+ volname = (char *)words[2];
+
+ if (!volname) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_str (dict, "volname", volname);
+ if (ret)
+ goto out;
+
+ if (wordcount == 3) {
+ ret = dict_set_str (dict, "key", "all");
+ if (ret)
+ goto out;
+ }
+
+ if (wordcount >= 4) {
+ if (!strcmp ("force", (char*)words[3])) {
+ ret = dict_set_int32 (dict, "force", 1);
+ if (ret)
+ goto out;
+ ret = dict_set_str (dict, "key", "all");
+ if (ret)
+ goto out;
+ } else {
+ ret = dict_set_str (dict, "key", (char *)words[3]);
+ if (ret)
+ goto out;
+ }
+ }
+
+ if (wordcount == 5) {
+ if (strcmp ("force", (char*)words[4])) {
+ ret = -1;
+ goto out;
+ } else {
+ ret = dict_set_int32 (dict, "force", 1);
+ if (ret)
+ goto out;
+ }
+ }
+
+ *options = dict;
+
+out:
+ if (ret && dict) {
+ dict_destroy (dict);
+ }
+
+ return ret;
+}
+
+int32_t
+cli_cmd_quota_parse (const char **words, int wordcount, dict_t **options)
+{
+ dict_t *dict = NULL;
+ char *volname = NULL;
+ int ret = -1;
+ int i = 0;
+ char key[20] = {0, };
+ uint64_t value = 0;
+ gf_quota_type type = GF_QUOTA_OPTION_TYPE_NONE;
+ char *opwords[] = { "enable", "disable", "limit-usage",
+ "remove", "list", "version", NULL };
+ char *w = NULL;
+
+ GF_ASSERT (words);
+ GF_ASSERT (options);
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ if (wordcount < 4)
+ goto out;
+
+ volname = (char *)words[2];
+ if (!volname) {
+ ret = -1;
+ goto out;
+ }
+
+ /* Validate the volume name here itself */
+ {
+ if (volname[0] == '-')
+ goto out;
+
+ if (!strcmp (volname, "all")) {
+ cli_err ("\"all\" cannot be the name of a volume.");
+ goto out;
+ }
+
+ if (strchr (volname, '/'))
+ goto out;
+
+ if (strlen (volname) > 512)
+ goto out;
+
+ for (i = 0; i < strlen (volname); i++)
+ if (!isalnum (volname[i]) && (volname[i] != '_') && (volname[i] != '-'))
+ goto out;
+ }
+
+ ret = dict_set_str (dict, "volname", volname);
+ if (ret < 0)
+ goto out;
+
+ w = str_getunamb (words[3], opwords);
+ if (!w) {
+ ret = - 1;
+ goto out;
+ }
+
+ if (strcmp (w, "enable") == 0) {
+ if (wordcount == 4) {
+ type = GF_QUOTA_OPTION_TYPE_ENABLE;
+ ret = 0;
+ goto set_type;
+ } else {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ if (strcmp (w, "disable") == 0) {
+ if (wordcount == 4) {
+ type = GF_QUOTA_OPTION_TYPE_DISABLE;
+ ret = 0;
+ goto set_type;
+ } else {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ if (strcmp (w, "limit-usage") == 0) {
+ if (wordcount != 6) {
+ ret = -1;
+ goto out;
+ }
+
+ type = GF_QUOTA_OPTION_TYPE_LIMIT_USAGE;
+
+ if (words[4][0] != '/') {
+ cli_err ("Please enter absolute path");
+
+ return -2;
+ }
+ ret = dict_set_str (dict, "path", (char *) words[4]);
+ if (ret)
+ goto out;
+
+ if (!words[5]) {
+ cli_err ("Please enter the limit value to be set");
+
+ return -2;
+ }
+
+ ret = gf_string2bytesize (words[5], &value);
+ if (ret != 0) {
+ cli_err ("Please enter a correct value");
+ return -1;
+ }
+
+ ret = dict_set_str (dict, "limit", (char *) words[5]);
+ if (ret < 0)
+ goto out;
+
+ goto set_type;
+ }
+ if (strcmp (w, "remove") == 0) {
+ if (wordcount != 5) {
+ ret = -1;
+ goto out;
+ }
+
+ type = GF_QUOTA_OPTION_TYPE_REMOVE;
+
+ if (words[4][0] != '/') {
+ cli_err ("Please enter absolute path");
+
+ return -2;
+ }
+
+ ret = dict_set_str (dict, "path", (char *) words[4]);
+ if (ret < 0)
+ goto out;
+ goto set_type;
+ }
+
+ if (strcmp (w, "list") == 0) {
+ if (wordcount < 4) {
+ ret = -1;
+ goto out;
+ }
+
+ type = GF_QUOTA_OPTION_TYPE_LIST;
+
+ i = 4;
+ while (i < wordcount) {
+ snprintf (key, 20, "path%d", i-4);
+
+ ret = dict_set_str (dict, key, (char *) words [i++]);
+ if (ret < 0)
+ goto out;
+ }
+
+ ret = dict_set_int32 (dict, "count", i - 4);
+ if (ret < 0)
+ goto out;
+
+ goto set_type;
+ }
+
+ if (strcmp (w, "version") == 0) {
+ type = GF_QUOTA_OPTION_TYPE_VERSION;
+ } else {
+ GF_ASSERT (!"opword mismatch");
+ }
+
+set_type:
+ ret = dict_set_int32 (dict, "type", type);
+ if (ret < 0)
+ goto out;
+
+ *options = dict;
+out:
+ if (ret < 0) {
+ if (dict)
+ dict_destroy (dict);
+ }
+
+ return ret;
+}
+
+static inline gf_boolean_t
+cli_is_key_spl (char *key)
+{
+ return (strcmp (key, "group") == 0);
+}
+
+#define GLUSTERD_DEFAULT_WORKDIR "/var/lib/glusterd"
+static int
+cli_add_key_group (dict_t *dict, char *key, char *value, char **op_errstr)
+{
+ int ret = -1;
+ int opt_count = 0;
+ char iter_key[1024] = {0,};
+ char iter_val[1024] = {0,};
+ char *saveptr = NULL;
+ char *tok_key = NULL;
+ char *tok_val = NULL;
+ char *dkey = NULL;
+ char *dval = NULL;
+ char *tagpath = NULL;
+ char *buf = NULL;
+ char line[PATH_MAX + 256] = {0,};
+ char errstr[2048] = "";
+ FILE *fp = NULL;
+
+ ret = gf_asprintf (&tagpath, "%s/groups/%s",
+ GLUSTERD_DEFAULT_WORKDIR, value);
+ if (ret == -1) {
+ tagpath = NULL;
+ goto out;
+ }
+
+ fp = fopen (tagpath, "r");
+ if (!fp) {
+ ret = -1;
+ snprintf(errstr, sizeof(errstr), "Unable to open file '%s'."
+ " Error: %s", tagpath, strerror (errno));
+ if (op_errstr)
+ *op_errstr = gf_strdup(errstr);
+ goto out;
+ }
+
+ opt_count = 0;
+ buf = line;
+ while (fscanf (fp, "%s", buf) != EOF) {
+
+ opt_count++;
+ tok_key = strtok_r (line, "=", &saveptr);
+ tok_val = strtok_r (NULL, "=", &saveptr);
+ if (!tok_key || !tok_val) {
+ ret = -1;
+ snprintf(errstr, sizeof(errstr), "'%s' file format "
+ "not valid.", tagpath);
+ if (op_errstr)
+ *op_errstr = gf_strdup(errstr);
+ goto out;
+ }
+
+ snprintf (iter_key, sizeof (iter_key), "key%d", opt_count);
+ dkey = gf_strdup (tok_key);
+ ret = dict_set_dynstr (dict, iter_key, dkey);
+ if (ret)
+ goto out;
+ dkey = NULL;
+
+ snprintf (iter_val, sizeof (iter_val), "value%d", opt_count);
+ dval = gf_strdup (tok_val);
+ ret = dict_set_dynstr (dict, iter_val, dval);
+ if (ret)
+ goto out;
+ dval = NULL;
+
+ }
+
+ if (!opt_count) {
+ ret = -1;
+ snprintf(errstr, sizeof(errstr), "'%s' file format "
+ "not valid.", tagpath);
+ if (op_errstr)
+ *op_errstr = gf_strdup(errstr);
+ goto out;
+ }
+ ret = dict_set_int32 (dict, "count", opt_count);
+out:
+
+ GF_FREE (tagpath);
+
+ if (ret) {
+ GF_FREE (dkey);
+ GF_FREE (dval);
+ }
+
+ if (fp)
+ fclose (fp);
+
+ return ret;
+}
+#undef GLUSTERD_DEFAULT_WORKDIR
+
+int32_t
+cli_cmd_volume_set_parse (const char **words, int wordcount, dict_t **options,
+ char **op_errstr)
+{
+ dict_t *dict = NULL;
+ char *volname = NULL;
+ int ret = -1;
+ int count = 0;
+ char *key = NULL;
+ char *value = NULL;
+ int i = 0;
+ char str[50] = {0,};
+
+ GF_ASSERT (words);
+ GF_ASSERT (options);
+
+ dict = dict_new ();
+
+ if (!dict)
+ goto out;
+
+ if (wordcount < 3)
+ goto out;
+
+ volname = (char *)words[2];
+
+ GF_ASSERT (volname);
+
+ ret = dict_set_str (dict, "volname", volname);
+
+ if (ret)
+ goto out;
+
+ if ((!strcmp (volname, "help") || !strcmp (volname, "help-xml"))
+ && wordcount == 3 ) {
+ ret = dict_set_str (dict, volname, volname);
+ if (ret)
+ goto out;
+
+ } else if (wordcount < 5) {
+ ret = -1;
+ goto out;
+
+ } else if (wordcount == 5 && cli_is_key_spl ((char *)words[3])) {
+ key = (char *) words[3];
+ value = (char *) words[4];
+ if ( !key || !value) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = gf_strip_whitespace (value, strlen (value));
+ if (ret == -1)
+ goto out;
+
+ if (strlen (value) == 0) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = cli_add_key_group (dict, key, value, op_errstr);
+ if (ret == 0)
+ *options = dict;
+ goto out;
+ }
+
+ for (i = 3; i < wordcount; i+=2) {
+
+ key = (char *) words[i];
+ value = (char *) words[i+1];
+
+ if ( !key || !value) {
+ ret = -1;
+ goto out;
+ }
+
+ count++;
+
+ ret = gf_strip_whitespace (value, strlen (value));
+ if (ret == -1)
+ goto out;
+
+ if (strlen (value) == 0) {
+ ret = -1;
+ goto out;
+ }
+
+ if (cli_is_key_spl (key)) {
+ ret = -1;
+ goto out;
+ }
+
+ sprintf (str, "key%d", count);
+ ret = dict_set_str (dict, str, key);
+ if (ret)
+ goto out;
+
+ sprintf (str, "value%d", count);
+ ret = dict_set_str (dict, str, value);
+
+ if (ret)
+ goto out;
+ }
+
+ ret = dict_set_int32 (dict, "count", wordcount-3);
+
+ if (ret)
+ goto out;
+
+ *options = dict;
+
+out:
+ if (ret)
+ dict_destroy (dict);
+
+ return ret;
+}
+
+int32_t
+cli_cmd_volume_add_brick_parse (const char **words, int wordcount,
+ dict_t **options)
+{
+ dict_t *dict = NULL;
+ char *volname = NULL;
+ int ret = -1;
+ int brick_count = 0, brick_index = 0;
+ char *bricks = NULL;
+ char *opwords_cl[] = { "replica", "stripe", NULL };
+ gf1_cluster_type type = GF_CLUSTER_TYPE_NONE;
+ int count = 1;
+ char *w = NULL;
+ int index;
+ gf_boolean_t is_force = _gf_false;
+ int wc = wordcount;
+
+ GF_ASSERT (words);
+ GF_ASSERT (options);
+
+ dict = dict_new ();
+
+ if (!dict)
+ goto out;
+
+ if (wordcount < 3)
+ goto out;
+
+ volname = (char *)words[2];
+
+ GF_ASSERT (volname);
+
+ ret = dict_set_str (dict, "volname", volname);
+
+ if (ret)
+ goto out;
+
+ if (wordcount < 4) {
+ ret = -1;
+ goto out;
+ }
+ if (wordcount < 6) {
+ /* seems no options are given, go directly to the parse_brick */
+ brick_index = 3;
+ type = GF_CLUSTER_TYPE_NONE;
+ goto parse_bricks;
+ }
+
+ w = str_getunamb (words[3], opwords_cl);
+ if (!w) {
+ type = GF_CLUSTER_TYPE_NONE;
+ index = 3;
+ } else if ((strcmp (w, "replica")) == 0) {
+ type = GF_CLUSTER_TYPE_REPLICATE;
+ if (wordcount < 5) {
+ ret = -1;
+ goto out;
+ }
+ count = strtol (words[4], NULL, 0);
+ if (!count || (count < 2)) {
+ cli_err ("replica count should be greater than 1");
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_int32 (dict, "replica-count", count);
+ if (ret)
+ goto out;
+ index = 5;
+ } else if ((strcmp (w, "stripe")) == 0) {
+ type = GF_CLUSTER_TYPE_STRIPE;
+ if (wordcount < 5) {
+ ret = -1;
+ goto out;
+ }
+ count = strtol (words[4], NULL, 0);
+ if (!count || (count < 2)) {
+ cli_err ("stripe count should be greater than 1");
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_int32 (dict, "stripe-count", count);
+ if (ret)
+ goto out;
+ index = 5;
+ } else {
+ GF_ASSERT (!"opword mismatch");
+ ret = -1;
+ goto out;
+ }
+
+ brick_index = index;
+
+parse_bricks:
+
+ if (strcmp (words[wordcount - 1], "force") == 0) {
+ is_force = _gf_true;
+ wc = wordcount - 1;
+ }
+
+ ret = cli_cmd_bricks_parse (words, wc, brick_index, &bricks,
+ &brick_count);
+ if (ret)
+ goto out;
+
+ ret = dict_set_dynstr (dict, "bricks", bricks);
+ if (ret)
+ goto out;
+
+ ret = dict_set_int32 (dict, "count", brick_count);
+
+ if (ret)
+ goto out;
+
+ ret = dict_set_int32 (dict, "force", is_force);
+ if (ret)
+ goto out;
+
+ *options = dict;
+
+out:
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Unable to parse add-brick CLI");
+ if (dict)
+ dict_destroy (dict);
+ }
+
+ return ret;
+}
+
+
+int32_t
+cli_cmd_volume_remove_brick_parse (const char **words, int wordcount,
+ dict_t **options, int *question)
+{
+ dict_t *dict = NULL;
+ char *volname = NULL;
+ char *delimiter = NULL;
+ int ret = -1;
+ char key[50];
+ int brick_count = 0, brick_index = 0;
+ int32_t tmp_index = 0;
+ int32_t j = 0;
+ char *tmp_brick = NULL;
+ char *tmp_brick1 = NULL;
+ char *type_opword[] = { "replica", NULL };
+ char *opwords[] = { "start", "commit", "stop", "status",
+ "force", NULL };
+ char *w = NULL;
+ int32_t command = GF_OP_CMD_NONE;
+ long count = 0;
+
+ GF_ASSERT (words);
+ GF_ASSERT (options);
+
+ if (wordcount < 4)
+ goto out;
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ volname = (char *)words[2];
+
+ GF_ASSERT (volname);
+
+ ret = dict_set_str (dict, "volname", volname);
+ if (ret)
+ goto out;
+
+ brick_index = 3;
+ w = str_getunamb (words[3], type_opword);
+ if (w && !strcmp ("replica", w)) {
+ if (wordcount < 5) {
+ ret = -1;
+ goto out;
+ }
+ count = strtol (words[4], NULL, 0);
+ if (count < 1) {
+ cli_err ("replica count should be greater than 0 in "
+ "case of remove-brick");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_int32 (dict, "replica-count", count);
+ if (ret)
+ goto out;
+ brick_index = 5;
+ } else if (w) {
+ GF_ASSERT (!"opword mismatch");
+ }
+
+ w = str_getunamb (words[wordcount - 1], opwords);
+ if (!w) {
+ /* Should be default 'force' */
+ command = GF_OP_CMD_COMMIT_FORCE;
+ if (question)
+ *question = 1;
+ } else {
+ /* handled this option */
+ wordcount--;
+ if (!strcmp ("start", w)) {
+ command = GF_OP_CMD_START;
+ } else if (!strcmp ("commit", w)) {
+ command = GF_OP_CMD_COMMIT;
+ if (question)
+ *question = 1;
+ } else if (!strcmp ("stop", w)) {
+ command = GF_OP_CMD_STOP;
+ } else if (!strcmp ("status", w)) {
+ command = GF_OP_CMD_STATUS;
+ } else if (!strcmp ("force", w)) {
+ command = GF_OP_CMD_COMMIT_FORCE;
+ if (question)
+ *question = 1;
+ } else {
+ GF_ASSERT (!"opword mismatch");
+ ret = -1;
+ goto out;
+ }
+ }
+
+ if (wordcount < 4) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_int32 (dict, "command", command);
+ if (ret)
+ gf_log ("cli", GF_LOG_INFO, "failed to set 'command' %d",
+ command);
+
+
+ tmp_index = brick_index;
+ tmp_brick = GF_MALLOC(2048 * sizeof(*tmp_brick), gf_common_mt_char);
+
+ if (!tmp_brick) {
+ gf_log ("",GF_LOG_ERROR,"cli_cmd_volume_remove_brick_parse: "
+ "Unable to get memory");
+ ret = -1;
+ goto out;
+ }
+
+ tmp_brick1 = GF_MALLOC(2048 * sizeof(*tmp_brick1), gf_common_mt_char);
+
+ if (!tmp_brick1) {
+ gf_log ("",GF_LOG_ERROR,"cli_cmd_volume_remove_brick_parse: "
+ "Unable to get memory");
+ ret = -1;
+ goto out;
+ }
+
+ while (brick_index < wordcount) {
+ if (validate_brick_name ((char *)words[brick_index])) {
+ cli_err ("wrong brick type: %s, use <HOSTNAME>:"
+ "<export-dir-abs-path>", words[brick_index]);
+ ret = -1;
+ goto out;
+ } else {
+ delimiter = strrchr(words[brick_index], ':');
+ ret = gf_canonicalize_path (delimiter + 1);
+ if (ret)
+ goto out;
+ }
+
+ j = tmp_index;
+ strcpy(tmp_brick, words[brick_index]);
+ while ( j < brick_index) {
+ strcpy(tmp_brick1, words[j]);
+ if (!(strcmp (tmp_brick, tmp_brick1))) {
+ gf_log("",GF_LOG_ERROR, "Duplicate bricks"
+ " found %s", words[brick_index]);
+ cli_err("Duplicate bricks found %s",
+ words[brick_index]);
+ ret = -1;
+ goto out;
+ }
+ j++;
+ }
+ snprintf (key, 50, "brick%d", ++brick_count);
+ ret = dict_set_str (dict, key, (char *)words[brick_index++]);
+
+ if (ret)
+ goto out;
+ }
+
+ ret = dict_set_int32 (dict, "count", brick_count);
+ if (ret)
+ goto out;
+
+ *options = dict;
+
+out:
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Unable to parse remove-brick CLI");
+ if (dict)
+ dict_destroy (dict);
+ }
+
+ GF_FREE (tmp_brick);
+ GF_FREE (tmp_brick1);
+
+ return ret;
+}
+
+
+int32_t
+cli_cmd_volume_replace_brick_parse (const char **words, int wordcount,
+ dict_t **options)
+{
+ dict_t *dict = NULL;
+ char *volname = NULL;
+ int ret = -1;
+ int op_index = 0;
+ char *delimiter = NULL;
+ gf1_cli_replace_op replace_op = GF_REPLACE_OP_NONE;
+ char *opwords[] = { "start", "commit", "pause", "abort", "status",
+ NULL };
+ char *w = NULL;
+ gf_boolean_t is_force = _gf_false;
+
+ GF_ASSERT (words);
+ GF_ASSERT (options);
+
+ dict = dict_new ();
+
+ if (!dict)
+ goto out;
+
+ if (wordcount < 3)
+ goto out;
+
+ volname = (char *)words[2];
+
+ GF_ASSERT (volname);
+
+ ret = dict_set_str (dict, "volname", volname);
+
+ if (ret)
+ goto out;
+
+ if (wordcount < 4) {
+ ret = -1;
+ goto out;
+ }
+
+ if (validate_brick_name ((char *)words[3])) {
+ cli_err ("wrong brick type: %s, use "
+ "<HOSTNAME>:<export-dir-abs-path>", words[3]);
+ ret = -1;
+ goto out;
+ } else {
+ delimiter = strrchr ((char *)words[3], ':');
+ ret = gf_canonicalize_path (delimiter + 1);
+ if (ret)
+ goto out;
+ }
+ ret = dict_set_str (dict, "src-brick", (char *)words[3]);
+
+ if (ret)
+ goto out;
+
+ if (wordcount < 5) {
+ ret = -1;
+ goto out;
+ }
+
+ if (validate_brick_name ((char *)words[4])) {
+ cli_err ("wrong brick type: %s, use "
+ "<HOSTNAME>:<export-dir-abs-path>", words[4]);
+ ret = -1;
+ goto out;
+ } else {
+ delimiter = strrchr ((char *)words[4], ':');
+ ret = gf_canonicalize_path (delimiter + 1);
+ if (ret)
+ goto out;
+ }
+
+
+ ret = dict_set_str (dict, "dst-brick", (char *)words[4]);
+
+ if (ret)
+ goto out;
+
+ op_index = 5;
+ if ((wordcount < (op_index + 1))) {
+ ret = -1;
+ goto out;
+ }
+
+ w = str_getunamb (words[op_index], opwords);
+
+ if (!w) {
+ } else if (!strcmp ("start", w)) {
+ replace_op = GF_REPLACE_OP_START;
+ } else if (!strcmp ("commit", w)) {
+ replace_op = GF_REPLACE_OP_COMMIT;
+ } else if (!strcmp ("pause", w)) {
+ replace_op = GF_REPLACE_OP_PAUSE;
+ } else if (!strcmp ("abort", w)) {
+ replace_op = GF_REPLACE_OP_ABORT;
+ } else if (!strcmp ("status", w)) {
+ replace_op = GF_REPLACE_OP_STATUS;
+ } else
+ GF_ASSERT (!"opword mismatch");
+
+ /* commit force option */
+
+ op_index = 6;
+
+ if (wordcount > (op_index + 1)) {
+ ret = -1;
+ goto out;
+ }
+
+ if (wordcount == (op_index + 1)) {
+ if ((replace_op != GF_REPLACE_OP_COMMIT) &&
+ (replace_op != GF_REPLACE_OP_START)) {
+ ret = -1;
+ goto out;
+ }
+ if (!strcmp ("force", words[op_index])) {
+ if (replace_op == GF_REPLACE_OP_COMMIT)
+ replace_op = GF_REPLACE_OP_COMMIT_FORCE;
+
+ else if (replace_op == GF_REPLACE_OP_START)
+ is_force = _gf_true;
+ }
+ }
+
+ if (replace_op == GF_REPLACE_OP_NONE) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_int32 (dict, "operation", (int32_t) replace_op);
+
+ if (ret)
+ goto out;
+
+ ret = dict_set_int32 (dict, "force", is_force);
+ if (ret)
+ goto out;
+
+ *options = dict;
+
+out:
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Unable to parse replace-brick CLI");
+ if (dict)
+ dict_destroy (dict);
+ }
+
+ return ret;
+}
+
+int32_t
+cli_cmd_log_filename_parse (const char **words, int wordcount, dict_t **options)
+{
+ dict_t *dict = NULL;
+ char *volname = NULL;
+ char *str = NULL;
+ int ret = -1;
+ char *delimiter = NULL;
+
+ GF_ASSERT (words);
+ GF_ASSERT (options);
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ volname = (char *)words[3];
+ GF_ASSERT (volname);
+
+ ret = dict_set_str (dict, "volname", volname);
+ if (ret)
+ goto out;
+
+ str = (char *)words[4];
+ if (strchr (str, ':')) {
+ delimiter = strchr (words[4], ':');
+ if (!delimiter || delimiter == words[4]
+ || *(delimiter+1) != '/') {
+ cli_err ("wrong brick type: %s, use <HOSTNAME>:"
+ "<export-dir-abs-path>", words[4]);
+ ret = -1;
+ goto out;
+ } else {
+ ret = gf_canonicalize_path (delimiter + 1);
+ if (ret)
+ goto out;
+ }
+ ret = dict_set_str (dict, "brick", str);
+ if (ret)
+ goto out;
+ /* Path */
+ str = (char *)words[5];
+ ret = dict_set_str (dict, "path", str);
+ if (ret)
+ goto out;
+ } else {
+ ret = dict_set_str (dict, "path", str);
+ if (ret)
+ goto out;
+ }
+
+ *options = dict;
+
+out:
+ if (ret && dict)
+ dict_destroy (dict);
+
+ return ret;
+}
+
+int32_t
+cli_cmd_log_level_parse (const char **words, int worcount, dict_t **options)
+{
+ dict_t *dict = NULL;
+ int ret = -1;
+
+ GF_ASSERT (words);
+ GF_ASSERT (options);
+
+ /*
+ * loglevel command format:
+ * > volume log level <VOL> <XLATOR[*]> <LOGLEVEL>
+ * > volume log level colon-o posix WARNING
+ * > volume log level colon-o replicate* DEBUG
+ * > volume log level coon-o * TRACE
+ */
+
+ GF_ASSERT ((strncmp(words[0], "volume", 6) == 0));
+ GF_ASSERT ((strncmp(words[1], "log", 3) == 0));
+ GF_ASSERT ((strncmp(words[2], "level", 5) == 0));
+
+ ret = glusterd_check_log_level(words[5]);
+ if (ret == -1) {
+ cli_err("Invalid log level [%s] specified", words[5]);
+ cli_err("Valid values for loglevel: (DEBUG|WARNING|ERROR"
+ "|CRITICAL|NONE|TRACE)");
+ goto out;
+ }
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ GF_ASSERT(words[3]);
+ GF_ASSERT(words[4]);
+
+ ret = dict_set_str (dict, "volname", (char *)words[3]);
+ if (ret)
+ goto out;
+
+ ret = dict_set_str (dict, "xlator", (char *)words[4]);
+ if (ret)
+ goto out;
+
+ ret = dict_set_str (dict, "loglevel", (char *)words[5]);
+ if (ret)
+ goto out;
+
+ *options = dict;
+
+ out:
+ if (ret && dict)
+ dict_destroy (dict);
+
+ return ret;
+}
+
+int32_t
+cli_cmd_log_locate_parse (const char **words, int wordcount, dict_t **options)
+{
+ dict_t *dict = NULL;
+ char *volname = NULL;
+ char *str = NULL;
+ int ret = -1;
+ char *delimiter = NULL;
+
+ GF_ASSERT (words);
+ GF_ASSERT (options);
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ volname = (char *)words[3];
+ GF_ASSERT (volname);
+
+ ret = dict_set_str (dict, "volname", volname);
+ if (ret)
+ goto out;
+
+ if (words[4]) {
+ delimiter = strchr (words[4], ':');
+ if (!delimiter || delimiter == words[4]
+ || *(delimiter+1) != '/') {
+ cli_err ("wrong brick type: %s, use <HOSTNAME>:"
+ "<export-dir-abs-path>", words[4]);
+ ret = -1;
+ goto out;
+ } else {
+ ret = gf_canonicalize_path (delimiter + 1);
+ if (ret)
+ goto out;
+ }
+ str = (char *)words[4];
+ ret = dict_set_str (dict, "brick", str);
+ if (ret)
+ goto out;
+ }
+
+ *options = dict;
+
+out:
+ if (ret && dict)
+ dict_destroy (dict);
+
+ return ret;
+}
+
+int32_t
+cli_cmd_log_rotate_parse (const char **words, int wordcount, dict_t **options)
+{
+ dict_t *dict = NULL;
+ char *volname = NULL;
+ char *str = NULL;
+ int ret = -1;
+ char *delimiter = NULL;
+
+ GF_ASSERT (words);
+ GF_ASSERT (options);
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ volname = (char *)words[3];
+ GF_ASSERT (volname);
+
+ ret = dict_set_str (dict, "volname", volname);
+ if (ret)
+ goto out;
+
+ if (words[4]) {
+ delimiter = strchr (words[4], ':');
+ if (!delimiter || delimiter == words[4]
+ || *(delimiter+1) != '/') {
+ cli_err ("wrong brick type: %s, use <HOSTNAME>:"
+ "<export-dir-abs-path>", words[4]);
+ ret = -1;
+ goto out;
+ } else {
+ ret = gf_canonicalize_path (delimiter + 1);
+ if (ret)
+ goto out;
+ }
+ str = (char *)words[4];
+ ret = dict_set_str (dict, "brick", str);
+ if (ret)
+ goto out;
+ }
+
+ *options = dict;
+
+out:
+ if (ret && dict)
+ dict_destroy (dict);
+
+ return ret;
+}
+
+static gf_boolean_t
+gsyncd_url_check (const char *w)
+{
+ return !!strpbrk (w, ":/");
+}
+
+static gf_boolean_t
+gsyncd_glob_check (const char *w)
+{
+ return !!strpbrk (w, "*?[");
+}
+
+static int
+config_parse (const char **words, int wordcount, dict_t *dict,
+ unsigned cmdi, unsigned glob)
+{
+ int32_t ret = -1;
+ int32_t i = -1;
+ char *append_str = NULL;
+ size_t append_len = 0;
+ char *subop = NULL;
+
+ switch ((wordcount - 1) - cmdi) {
+ case 0:
+ subop = gf_strdup ("get-all");
+ break;
+ case 1:
+ if (words[cmdi + 1][0] == '!') {
+ (words[cmdi + 1])++;
+ if (gf_asprintf (&subop, "del%s",
+ glob ? "-glob" : "") == -1)
+ subop = NULL;
+ } else
+ subop = gf_strdup ("get");
+
+ ret = dict_set_str (dict, "op_name", ((char *)words[cmdi + 1]));
+ if (ret < 0)
+ goto out;
+ break;
+ default:
+ if (gf_asprintf (&subop, "set%s", glob ? "-glob" : "") == -1)
+ subop = NULL;
+
+ ret = dict_set_str (dict, "op_name", ((char *)words[cmdi + 1]));
+ if (ret < 0)
+ goto out;
+
+ /* join the varargs by spaces to get the op_value */
+
+ for (i = cmdi + 2; i < wordcount; i++)
+ append_len += (strlen (words[i]) + 1);
+ /* trailing strcat will add two bytes, make space for that */
+ append_len++;
+
+ append_str = GF_CALLOC (1, append_len, cli_mt_append_str);
+ if (!append_str) {
+ ret = -1;
+ goto out;
+ }
+
+ for (i = cmdi + 2; i < wordcount; i++) {
+ strcat (append_str, words[i]);
+ strcat (append_str, " ");
+ }
+ append_str[append_len - 2] = '\0';
+ /* "checkpoint now" is special: we resolve that "now" */
+ if (strcmp (words[cmdi + 1], "checkpoint") == 0 &&
+ strcmp (append_str, "now") == 0) {
+ struct timeval tv = {0,};
+
+ ret = gettimeofday (&tv, NULL);
+ if (ret == -1)
+ goto out; /* FIXME: free append_str? */
+
+ GF_FREE (append_str);
+ append_str = GF_CALLOC (1, 300, cli_mt_append_str);
+ if (!append_str) {
+ ret = -1;
+ goto out;
+ }
+ strcpy (append_str, "as of ");
+ gf_time_fmt (append_str + strlen ("as of "),
+ 300 - strlen ("as of "),
+ tv.tv_sec, gf_timefmt_FT);
+ }
+
+ ret = dict_set_dynstr (dict, "op_value", append_str);
+ }
+
+ ret = -1;
+ if (subop) {
+ ret = dict_set_dynstr (dict, "subop", subop);
+ if (!ret)
+ subop = NULL;
+ }
+
+out:
+ if (ret && append_str)
+ GF_FREE (append_str);
+
+ GF_FREE (subop);
+
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+static int32_t
+force_push_pem_parse (const char **words, int wordcount,
+ dict_t *dict, unsigned *cmdi)
+{
+ int32_t ret = 0;
+
+ if (!strcmp ((char *)words[wordcount-1], "force")) {
+ if ((strcmp ((char *)words[wordcount-2], "start")) &&
+ (strcmp ((char *)words[wordcount-2], "stop")) &&
+ (strcmp ((char *)words[wordcount-2], "create")) &&
+ (strcmp ((char *)words[wordcount-2], "push-pem"))) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_uint32 (dict, "force",
+ _gf_true);
+ if (ret)
+ goto out;
+ (*cmdi)++;
+
+ if (!strcmp ((char *)words[wordcount-2], "push-pem")) {
+ if (strcmp ((char *)words[wordcount-3], "create")) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_int32 (dict, "push_pem", 1);
+ if (ret)
+ goto out;
+ (*cmdi)++;
+ }
+ } else if (!strcmp ((char *)words[wordcount-1], "push-pem")) {
+ if (strcmp ((char *)words[wordcount-2], "create")) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_int32 (dict, "push_pem", 1);
+ if (ret)
+ goto out;
+ (*cmdi)++;
+ }
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+
+int32_t
+cli_cmd_gsync_set_parse (const char **words, int wordcount, dict_t **options)
+{
+ int32_t ret = -1;
+ dict_t *dict = NULL;
+ gf1_cli_gsync_set type = GF_GSYNC_OPTION_TYPE_NONE;
+ int i = 0;
+ unsigned masteri = 0;
+ unsigned slavei = 0;
+ unsigned glob = 0;
+ unsigned cmdi = 0;
+ char *opwords[] = { "create", "status", "start", "stop",
+ "config", "force", "delete",
+ "push-pem", "detail", NULL };
+ char *w = NULL;
+
+ GF_ASSERT (words);
+ GF_ASSERT (options);
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ /* new syntax:
+ *
+ * volume geo-replication $m $s create [push-pem] [force]
+ * volume geo-replication [$m [$s]] status [detail]
+ * volume geo-replication [$m] $s config [[!]$opt [$val]]
+ * volume geo-replication $m $s start|stop [force]
+ * volume geo-replication $m $s delete
+ */
+
+ if (wordcount < 3)
+ goto out;
+
+ for (i = 2; i <= 3 && i < wordcount - 1; i++) {
+ if (gsyncd_glob_check (words[i]))
+ glob = i;
+ if (gsyncd_url_check (words[i])) {
+ slavei = i;
+ break;
+ }
+ }
+
+ if (glob && !slavei)
+ /* glob is allowed only for config, thus it implies there is a
+ * slave argument; but that might have not been recognized on
+ * the first scan as it's url characteristics has been covered
+ * by the glob syntax.
+ *
+ * In this case, the slave is perforce the last glob-word -- the
+ * upcoming one is neither glob, nor url, so it's definitely not
+ * the slave.
+ */
+ slavei = glob;
+ if (slavei) {
+ cmdi = slavei + 1;
+ if (slavei == 3)
+ masteri = 2;
+ } else if (i <= 3) {
+ if (!strcmp ((char *)words[wordcount-1], "detail")) {
+ /* For status detail it is mandatory to provide
+ * both master and slave */
+ ret = -1;
+ goto out;
+ }
+
+ /* no $s, can only be status cmd
+ * (with either a single $m before it or nothing)
+ * -- these conditions imply that i <= 3 after
+ * the iteration and that i is the successor of
+ * the (0 or 1 length) sequence of $m-s.
+ */
+ cmdi = i;
+ if (i == 3)
+ masteri = 2;
+ } else
+ goto out;
+
+ /* now check if input really complies syntax
+ * (in a somewhat redundant way, in favor
+ * transparent soundness)
+ */
+
+ if (masteri && gsyncd_url_check (words[masteri]))
+ goto out;
+ if (slavei && !glob && !gsyncd_url_check (words[slavei]))
+ goto out;
+
+ w = str_getunamb (words[cmdi], opwords);
+ if (!w)
+ goto out;
+
+ if (strcmp (w, "create") == 0) {
+ type = GF_GSYNC_OPTION_TYPE_CREATE;
+
+ if (!masteri || !slavei)
+ goto out;
+ } else if (strcmp (w, "status") == 0) {
+ type = GF_GSYNC_OPTION_TYPE_STATUS;
+
+ if (slavei && !masteri)
+ goto out;
+ } else if (strcmp (w, "config") == 0) {
+ type = GF_GSYNC_OPTION_TYPE_CONFIG;
+
+ if (!slavei)
+ goto out;
+ } else if (strcmp (w, "start") == 0) {
+ type = GF_GSYNC_OPTION_TYPE_START;
+
+ if (!masteri || !slavei)
+ goto out;
+ } else if (strcmp (w, "stop") == 0) {
+ type = GF_GSYNC_OPTION_TYPE_STOP;
+
+ if (!masteri || !slavei)
+ goto out;
+ } else if (strcmp (w, "delete") == 0) {
+ type = GF_GSYNC_OPTION_TYPE_DELETE;
+
+ if (!masteri || !slavei)
+ goto out;
+ } else
+ GF_ASSERT (!"opword mismatch");
+
+ ret = force_push_pem_parse (words, wordcount, dict, &cmdi);
+ if (ret)
+ goto out;
+
+ if (!strcmp ((char *)words[wordcount-1], "detail")) {
+ if (strcmp ((char *)words[wordcount-2], "status")) {
+ ret = -1;
+ goto out;
+ }
+ if (!slavei || !masteri) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_uint32 (dict, "status-detail", _gf_true);
+ if (ret)
+ goto out;
+ cmdi++;
+ }
+
+ if (type != GF_GSYNC_OPTION_TYPE_CONFIG &&
+ (cmdi < wordcount - 1 || glob))
+ goto out;
+
+ /* If got so far, input is valid, assemble the message */
+
+ ret = 0;
+
+ if (masteri) {
+ ret = dict_set_str (dict, "master", (char *)words[masteri]);
+ if (!ret)
+ ret = dict_set_str (dict, "volname",
+ (char *)words[masteri]);
+ }
+ if (!ret && slavei)
+ ret = dict_set_str (dict, "slave", (char *)words[slavei]);
+ if (!ret)
+ ret = dict_set_int32 (dict, "type", type);
+ if (!ret && type == GF_GSYNC_OPTION_TYPE_CONFIG)
+ ret = config_parse (words, wordcount, dict, cmdi, glob);
+
+out:
+ if (ret) {
+ if (dict)
+ dict_destroy (dict);
+ } else
+ *options = dict;
+
+
+ return ret;
+}
+
+int32_t
+cli_cmd_volume_profile_parse (const char **words, int wordcount,
+ dict_t **options)
+{
+ dict_t *dict = NULL;
+ char *volname = NULL;
+ int ret = -1;
+ gf1_cli_stats_op op = GF_CLI_STATS_NONE;
+ char *opwords[] = { "start", "stop", "info", NULL };
+ char *w = NULL;
+
+ GF_ASSERT (words);
+ GF_ASSERT (options);
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ if (wordcount < 4 || wordcount >5)
+ goto out;
+
+ volname = (char *)words[2];
+
+ ret = dict_set_str (dict, "volname", volname);
+ if (ret)
+ goto out;
+
+ w = str_getunamb (words[3], opwords);
+ if (!w) {
+ ret = -1;
+ goto out;
+ }
+ if (strcmp (w, "start") == 0) {
+ op = GF_CLI_STATS_START;
+ } else if (strcmp (w, "stop") == 0) {
+ op = GF_CLI_STATS_STOP;
+ } else if (strcmp (w, "info") == 0) {
+ op = GF_CLI_STATS_INFO;
+ } else
+ GF_ASSERT (!"opword mismatch");
+
+ ret = dict_set_int32 (dict, "op", (int32_t)op);
+ if (ret)
+ goto out;
+
+ if (wordcount == 5) {
+ if (!strcmp (words[4], "nfs")) {
+ ret = dict_set_int32 (dict, "nfs", _gf_true);
+ if (ret)
+ goto out;
+ }
+ }
+
+ *options = dict;
+out:
+ if (ret && dict)
+ dict_destroy (dict);
+ return ret;
+}
+
+int32_t
+cli_cmd_volume_top_parse (const char **words, int wordcount,
+ dict_t **options)
+{
+ dict_t *dict = NULL;
+ char *volname = NULL;
+ char *value = NULL;
+ char *key = NULL;
+ int ret = -1;
+ gf1_cli_stats_op op = GF_CLI_STATS_NONE;
+ gf1_cli_top_op top_op = GF_CLI_TOP_NONE;
+ int32_t list_cnt = -1;
+ int index = 0;
+ int perf = 0;
+ uint32_t blk_size = 0;
+ uint32_t count = 0;
+ gf_boolean_t nfs = _gf_false;
+ char *delimiter = NULL;
+ char *opwords[] = { "open", "read", "write", "opendir",
+ "readdir", "read-perf", "write-perf",
+ "clear", NULL };
+ char *w = NULL;
+
+ GF_ASSERT (words);
+ GF_ASSERT (options);
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ if (wordcount < 4)
+ goto out;
+
+ volname = (char *)words[2];
+
+ ret = dict_set_str (dict, "volname", volname);
+ if (ret)
+ goto out;
+
+ op = GF_CLI_STATS_TOP;
+ ret = dict_set_int32 (dict, "op", (int32_t)op);
+ if (ret)
+ goto out;
+
+ w = str_getunamb (words[3], opwords);
+ if (!w) {
+ ret = -1;
+ goto out;
+ }
+ if (strcmp (w, "open") == 0) {
+ top_op = GF_CLI_TOP_OPEN;
+ } else if (strcmp (w, "read") == 0) {
+ top_op = GF_CLI_TOP_READ;
+ } else if (strcmp (w, "write") == 0) {
+ top_op = GF_CLI_TOP_WRITE;
+ } else if (strcmp (w, "opendir") == 0) {
+ top_op = GF_CLI_TOP_OPENDIR;
+ } else if (strcmp (w, "readdir") == 0) {
+ top_op = GF_CLI_TOP_READDIR;
+ } else if (strcmp (w, "read-perf") == 0) {
+ top_op = GF_CLI_TOP_READ_PERF;
+ perf = 1;
+ } else if (strcmp (w, "write-perf") == 0) {
+ top_op = GF_CLI_TOP_WRITE_PERF;
+ perf = 1;
+ } else if (strcmp (w, "clear") == 0) {
+ ret = dict_set_int32 (dict, "clear-stats", 1);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Could not set clear-stats in dict");
+ goto out;
+ }
+ } else
+ GF_ASSERT (!"opword mismatch");
+ ret = dict_set_int32 (dict, "top-op", (int32_t)top_op);
+ if (ret)
+ goto out;
+
+ if ((wordcount > 4) && !strcmp (words[4], "nfs")) {
+ nfs = _gf_true;
+ ret = dict_set_int32 (dict, "nfs", nfs);
+ if (ret)
+ goto out;
+ index = 5;
+ } else {
+ index = 4;
+ }
+
+ for (; index < wordcount; index+=2) {
+
+ key = (char *) words[index];
+ value = (char *) words[index+1];
+
+ if ( key && !value ) {
+ ret = -1;
+ goto out;
+ }
+ if (!strcmp (key, "brick")) {
+ delimiter = strchr (value, ':');
+ if (!delimiter || delimiter == value
+ || *(delimiter+1) != '/') {
+ cli_err ("wrong brick type: %s, use <HOSTNAME>:"
+ "<export-dir-abs-path>", value);
+ ret = -1;
+ goto out;
+ } else {
+ ret = gf_canonicalize_path (delimiter + 1);
+ if (ret)
+ goto out;
+ }
+ ret = dict_set_str (dict, "brick", value);
+
+ } else if (!strcmp (key, "list-cnt")) {
+ ret = gf_is_str_int (value);
+ if (!ret)
+ list_cnt = atoi (value);
+ if (ret || (list_cnt < 0) || (list_cnt > 100)) {
+ cli_err ("list-cnt should be between 0 to 100");
+ ret = -1;
+ goto out;
+ }
+ } else if (perf && !nfs && !strcmp (key, "bs")) {
+ ret = gf_is_str_int (value);
+ if (!ret)
+ blk_size = atoi (value);
+ if (ret || (blk_size <= 0)) {
+ if (blk_size < 0)
+ cli_err ("block size is an invalid"
+ " number");
+ else
+ cli_err ("block size should be an "
+ "integer greater than zero");
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_uint32 (dict, "blk-size", blk_size);
+ } else if (perf && !nfs && !strcmp (key, "count")) {
+ ret = gf_is_str_int (value);
+ if (!ret)
+ count = atoi(value);
+ if (ret || (count <= 0)) {
+ if (count < 0)
+ cli_err ("count is an invalid number");
+ else
+ cli_err ("count should be an integer "
+ "greater than zero");
+
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_uint32 (dict, "blk-cnt", count);
+ } else {
+ ret = -1;
+ goto out;
+ }
+ if (ret) {
+ gf_log ("", GF_LOG_WARNING, "Dict set failed for "
+ "key %s", key);
+ goto out;
+ }
+ }
+ if (list_cnt == -1)
+ list_cnt = 100;
+ ret = dict_set_int32 (dict, "list-cnt", list_cnt);
+ if (ret) {
+ gf_log ("", GF_LOG_WARNING, "Dict set failed for list_cnt");
+ goto out;
+ }
+
+ if ((blk_size > 0) ^ (count > 0)) {
+ cli_err ("Need to give both 'bs' and 'count'");
+ ret = -1;
+ goto out;
+ } else if (((uint64_t)blk_size * count) > (10 * GF_UNIT_GB)) {
+ cli_err ("'bs * count' value %"PRIu64" is greater than "
+ "maximum allowed value of 10GB",
+ ((uint64_t)blk_size * count));
+ ret = -1;
+ goto out;
+ }
+
+ *options = dict;
+out:
+ if (ret && dict)
+ dict_destroy (dict);
+ return ret;
+}
+
+uint32_t
+cli_cmd_get_statusop (const char *arg)
+{
+ int i = 0;
+ uint32_t ret = GF_CLI_STATUS_NONE;
+ char *w = NULL;
+ char *opwords[] = {"detail", "mem", "clients", "fd",
+ "inode", "callpool", "tasks", NULL};
+ struct {
+ char *opname;
+ uint32_t opcode;
+ } optable[] = {
+ { "detail", GF_CLI_STATUS_DETAIL },
+ { "mem", GF_CLI_STATUS_MEM },
+ { "clients", GF_CLI_STATUS_CLIENTS },
+ { "fd", GF_CLI_STATUS_FD },
+ { "inode", GF_CLI_STATUS_INODE },
+ { "callpool", GF_CLI_STATUS_CALLPOOL },
+ { "tasks", GF_CLI_STATUS_TASKS },
+ { NULL }
+ };
+
+ w = str_getunamb (arg, opwords);
+ if (!w) {
+ gf_log ("cli", GF_LOG_DEBUG,
+ "Not a status op %s", arg);
+ goto out;
+ }
+
+ for (i = 0; optable[i].opname; i++) {
+ if (!strcmp (w, optable[i].opname)) {
+ ret = optable[i].opcode;
+ break;
+ }
+ }
+
+ out:
+ return ret;
+}
+
+int
+cli_cmd_volume_status_parse (const char **words, int wordcount,
+ dict_t **options)
+{
+ dict_t *dict = NULL;
+ int ret = -1;
+ uint32_t cmd = 0;
+
+ GF_ASSERT (options);
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ switch (wordcount) {
+
+ case 2:
+ cmd = GF_CLI_STATUS_ALL;
+ ret = 0;
+ break;
+
+ case 3:
+ if (!strcmp (words[2], "all")) {
+ cmd = GF_CLI_STATUS_ALL;
+ ret = 0;
+
+ } else {
+ cmd = GF_CLI_STATUS_VOL;
+ ret = dict_set_str (dict, "volname", (char *)words[2]);
+ }
+
+ break;
+
+ case 4:
+ cmd = cli_cmd_get_statusop (words[3]);
+
+ if (!strcmp (words[2], "all")) {
+ if (cmd == GF_CLI_STATUS_NONE) {
+ cli_err ("%s is not a valid status option",
+ words[3]);
+ ret = -1;
+ goto out;
+ }
+ cmd |= GF_CLI_STATUS_ALL;
+ ret = 0;
+
+ } else {
+ ret = dict_set_str (dict, "volname",
+ (char *)words[2]);
+ if (ret)
+ goto out;
+
+ if (cmd == GF_CLI_STATUS_NONE) {
+ if (!strcmp (words[3], "nfs")) {
+ cmd |= GF_CLI_STATUS_NFS;
+ } else if (!strcmp (words[3], "shd")) {
+ cmd |= GF_CLI_STATUS_SHD;
+ } else {
+ cmd = GF_CLI_STATUS_BRICK;
+ ret = dict_set_str (dict, "brick",
+ (char *)words[3]);
+ }
+
+ } else {
+ cmd |= GF_CLI_STATUS_VOL;
+ ret = 0;
+ }
+ }
+
+ break;
+
+ case 5:
+ if (!strcmp (words[2], "all")) {
+ cli_err ("Cannot specify brick/nfs for \"all\"");
+ ret = -1;
+ goto out;
+ }
+
+ cmd = cli_cmd_get_statusop (words[4]);
+ if (cmd == GF_CLI_STATUS_NONE) {
+ cli_err ("%s is not a valid status option",
+ words[4]);
+ ret = -1;
+ goto out;
+ }
+
+
+ ret = dict_set_str (dict, "volname", (char *)words[2]);
+ if (ret)
+ goto out;
+
+ if (!strcmp (words[3], "nfs")) {
+ if (cmd == GF_CLI_STATUS_FD ||
+ cmd == GF_CLI_STATUS_DETAIL ||
+ cmd == GF_CLI_STATUS_TASKS) {
+ cli_err ("Detail/FD/Tasks status not available"
+ " for NFS Servers");
+ ret = -1;
+ goto out;
+ }
+ cmd |= GF_CLI_STATUS_NFS;
+ } else if (!strcmp (words[3], "shd")){
+ if (cmd == GF_CLI_STATUS_FD ||
+ cmd == GF_CLI_STATUS_CLIENTS ||
+ cmd == GF_CLI_STATUS_DETAIL ||
+ cmd == GF_CLI_STATUS_TASKS) {
+ cli_err ("Detail/FD/Clients/Tasks status not "
+ "available for Self-heal Daemons");
+ ret = -1;
+ goto out;
+ }
+ cmd |= GF_CLI_STATUS_SHD;
+ } else {
+ if (cmd == GF_CLI_STATUS_TASKS) {
+ cli_err ("Tasks status not available for "
+ "bricks");
+ ret = -1;
+ goto out;
+ }
+ cmd |= GF_CLI_STATUS_BRICK;
+ ret = dict_set_str (dict, "brick", (char *)words[3]);
+ }
+ break;
+
+ default:
+ goto out;
+ }
+
+ if (ret)
+ goto out;
+
+ ret = dict_set_int32 (dict, "cmd", cmd);
+ if (ret)
+ goto out;
+
+ *options = dict;
+
+ out:
+ if (ret && dict)
+ dict_destroy (dict);
+
+ return ret;
+}
+
+gf_boolean_t
+cli_cmd_validate_dumpoption (const char *arg, char **option)
+{
+ char *opwords[] = {"all", "nfs", "mem", "iobuf", "callpool", "priv",
+ "fd", "inode", "history", "inodectx", "fdctx",
+ NULL};
+ char *w = NULL;
+
+ w = str_getunamb (arg, opwords);
+ if (!w) {
+ gf_log ("cli", GF_LOG_DEBUG, "Unknown statedump option %s",
+ arg);
+ return _gf_false;
+ }
+ *option = w;
+ return _gf_true;
+}
+
+int
+cli_cmd_volume_statedump_options_parse (const char **words, int wordcount,
+ dict_t **options)
+{
+ int ret = 0;
+ int i = 0;
+ dict_t *dict = NULL;
+ int option_cnt = 0;
+ char *option = NULL;
+ char option_str[100] = {0,};
+
+ for (i = 3; i < wordcount; i++, option_cnt++) {
+ if (!cli_cmd_validate_dumpoption (words[i], &option)) {
+ ret = -1;
+ goto out;
+ }
+ strncat (option_str, option, strlen (option));
+ strncat (option_str, " ", 1);
+ }
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ ret = dict_set_dynstr (dict, "options", gf_strdup (option_str));
+ if (ret)
+ goto out;
+
+ ret = dict_set_int32 (dict, "option_cnt", option_cnt);
+ if (ret)
+ goto out;
+
+ *options = dict;
+out:
+ if (ret && dict)
+ dict_destroy (dict);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR, "Error parsing dumpoptions");
+ return ret;
+}
+
+int
+cli_cmd_volume_clrlks_opts_parse (const char **words, int wordcount,
+ dict_t **options)
+{
+ int ret = -1;
+ int i = 0;
+ dict_t *dict = NULL;
+ char *kind_opts[4] = {"blocked", "granted", "all", NULL};
+ char *types[4] = {"inode", "entry", "posix", NULL};
+ char *free_ptr = NULL;
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ if (strcmp (words[4], "kind"))
+ goto out;
+
+ for (i = 0; kind_opts[i]; i++) {
+ if (!strcmp (words[5], kind_opts[i])) {
+ free_ptr = gf_strdup (words[5]);
+ ret = dict_set_dynstr (dict, "kind", free_ptr);
+ if (ret)
+ goto out;
+ free_ptr = NULL;
+ break;
+ }
+ }
+ if (i == 3)
+ goto out;
+
+ ret = -1;
+ for (i = 0; types[i]; i++) {
+ if (!strcmp (words[6], types[i])) {
+ free_ptr = gf_strdup (words[6]);
+ ret = dict_set_dynstr (dict, "type", free_ptr);
+ if (ret)
+ goto out;
+ free_ptr = NULL;
+ break;
+ }
+ }
+ if (i == 3)
+ goto out;
+
+ if (wordcount == 8) {
+ free_ptr = gf_strdup (words[7]);
+ ret = dict_set_dynstr (dict, "opts", free_ptr);
+ if (ret)
+ goto out;
+ free_ptr = NULL;
+ }
+
+ ret = 0;
+ *options = dict;
+out:
+ if (ret) {
+ GF_FREE (free_ptr);
+ dict_unref (dict);
+ }
+
+ return ret;
+}
+
+static int
+extract_hostname_path_from_token (const char *tmp_words, char **hostname,
+ char **path)
+{
+ int ret = 0;
+ char *delimiter = NULL;
+ char *tmp_host = NULL;
+ char *host_name = NULL;
+ char *words = NULL;
+
+ *hostname = NULL;
+ *path = NULL;
+
+ words = GF_CALLOC (1, strlen (tmp_words) + 1, gf_common_mt_char);
+ if (!words){
+ ret = -1;
+ goto out;
+ }
+
+ strncpy (words, tmp_words, strlen (tmp_words) + 1);
+
+ if (validate_brick_name (words)) {
+ cli_err ("Wrong brick type: %s, use <HOSTNAME>:"
+ "<export-dir-abs-path>", words);
+ ret = -1;
+ goto out;
+ } else {
+ delimiter = strrchr (words, ':');
+ ret = gf_canonicalize_path (delimiter + 1);
+ if (ret) {
+ goto out;
+ } else {
+ *path = GF_CALLOC (1, strlen (delimiter+1) +1,
+ gf_common_mt_char);
+ if (!*path) {
+ ret = -1;
+ goto out;
+
+ }
+ strncpy (*path, delimiter +1,
+ strlen(delimiter + 1) + 1);
+ }
+ }
+
+ tmp_host = gf_strdup (words);
+ if (!tmp_host) {
+ gf_log ("cli", GF_LOG_ERROR, "Out of memory");
+ ret = -1;
+ goto out;
+ }
+ get_host_name (tmp_host, &host_name);
+ if (!host_name) {
+ ret = -1;
+ gf_log("cli",GF_LOG_ERROR, "Unable to allocate "
+ "memory");
+ goto out;
+ }
+ if (!(strcmp (host_name, "localhost") &&
+ strcmp (host_name, "127.0.0.1") &&
+ strncmp (host_name, "0.", 2))) {
+ cli_err ("Please provide a valid hostname/ip other "
+ "than localhost, 127.0.0.1 or loopback "
+ "address (0.0.0.0 to 0.255.255.255).");
+ ret = -1;
+ goto out;
+ }
+ if (!valid_internet_address (host_name, _gf_false)) {
+ cli_err ("internet address '%s' does not conform to "
+ "standards", host_name);
+ ret = -1;
+ goto out;
+ }
+
+ *hostname = GF_CALLOC (1, strlen (host_name) + 1,
+ gf_common_mt_char);
+ if (!*hostname) {
+ ret = -1;
+ goto out;
+ }
+ strncpy (*hostname, host_name, strlen (host_name) + 1);
+ ret = 0;
+
+out:
+ GF_FREE (words);
+ GF_FREE (tmp_host);
+ return ret;
+}
+
+
+int
+cli_cmd_volume_heal_options_parse (const char **words, int wordcount,
+ dict_t **options)
+{
+ int ret = 0;
+ dict_t *dict = NULL;
+ char *hostname = NULL;
+ char *path = NULL;
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ ret = dict_set_str (dict, "volname", (char *) words[2]);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "failed to set volname");
+ goto out;
+ }
+
+ if (wordcount == 3) {
+ ret = dict_set_int32 (dict, "heal-op", GF_AFR_OP_HEAL_INDEX);
+ goto done;
+ }
+
+ if (wordcount == 4) {
+ if (!strcmp (words[3], "full")) {
+ ret = dict_set_int32 (dict, "heal-op",
+ GF_AFR_OP_HEAL_FULL);
+ goto done;
+ } else if (!strcmp (words[3], "statistics")) {
+ ret = dict_set_int32 (dict, "heal-op",
+ GF_AFR_OP_STATISTICS);
+ goto done;
+
+ } else if (!strcmp (words[3], "info")) {
+ ret = dict_set_int32 (dict, "heal-op",
+ GF_AFR_OP_INDEX_SUMMARY);
+ goto done;
+ } else {
+ ret = -1;
+ goto out;
+ }
+ }
+ if (wordcount == 5) {
+ if (strcmp (words[3], "info") &&
+ strcmp (words[3], "statistics")) {
+ ret = -1;
+ goto out;
+ }
+
+ if (!strcmp (words[3], "info")) {
+ if (!strcmp (words[4], "healed")) {
+ ret = dict_set_int32 (dict, "heal-op",
+ GF_AFR_OP_HEALED_FILES);
+ goto done;
+ }
+ if (!strcmp (words[4], "heal-failed")) {
+ ret = dict_set_int32 (dict, "heal-op",
+ GF_AFR_OP_HEAL_FAILED_FILES);
+ goto done;
+ }
+ if (!strcmp (words[4], "split-brain")) {
+ ret = dict_set_int32 (dict, "heal-op",
+ GF_AFR_OP_SPLIT_BRAIN_FILES);
+ goto done;
+ }
+ }
+
+ if (!strcmp (words[3], "statistics")) {
+ if (!strcmp (words[4], "heal-count")) {
+ ret = dict_set_int32 (dict, "heal-op",
+ GF_AFR_OP_STATISTICS_HEAL_COUNT);
+ goto done;
+ }
+ }
+ ret = -1;
+ goto out;
+ }
+ if (wordcount == 7) {
+ if (!strcmp (words[3], "statistics")
+ && !strcmp (words[4], "heal-count")
+ && !strcmp (words[5], "replica")) {
+
+ ret = dict_set_int32 (dict, "heal-op",
+ GF_AFR_OP_STATISTICS_HEAL_COUNT_PER_REPLICA);
+ if (ret)
+ goto out;
+ ret = extract_hostname_path_from_token (words[6],
+ &hostname, &path);
+ if (ret)
+ goto out;
+ ret = dict_set_dynstr (dict, "per-replica-cmd-hostname",
+ hostname);
+ if (ret)
+ goto out;
+ ret = dict_set_dynstr (dict, "per-replica-cmd-path",
+ path);
+ if (ret)
+ goto out;
+ else
+ goto done;
+
+ }
+ }
+ ret = -1;
+ goto out;
+done:
+ *options = dict;
+out:
+ if (ret && dict) {
+ dict_unref (dict);
+ *options = NULL;
+ }
+
+ return ret;
+}
+
+int
+cli_cmd_volume_defrag_parse (const char **words, int wordcount,
+ dict_t **options)
+{
+ dict_t *dict = NULL;
+ int ret = -1;
+ char *option = NULL;
+ char *volname = NULL;
+ char *command = NULL;
+ gf_cli_defrag_type cmd = 0;
+
+ GF_ASSERT (words);
+ GF_ASSERT (options);
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ if (!((wordcount == 4) || (wordcount == 5)))
+ goto out;
+
+ if (wordcount == 4) {
+ if (strcmp (words[3], "start") && strcmp (words[3], "stop") &&
+ strcmp (words[3], "status"))
+ goto out;
+ } else {
+ if (strcmp (words[3], "fix-layout") &&
+ strcmp (words[3], "start"))
+ goto out;
+ }
+
+ volname = (char *) words[2];
+
+ if (wordcount == 4) {
+ command = (char *) words[3];
+ }
+ if (wordcount == 5) {
+ if ((strcmp (words[3], "fix-layout") ||
+ strcmp (words[4], "start")) &&
+ (strcmp (words[3], "start") ||
+ strcmp (words[4], "force"))) {
+ ret = -1;
+ goto out;
+ }
+ command = (char *) words[3];
+ option = (char *) words[4];
+ }
+
+ if (strcmp (command, "start") == 0) {
+ cmd = GF_DEFRAG_CMD_START;
+ if (option && strcmp (option, "force") == 0) {
+ cmd = GF_DEFRAG_CMD_START_FORCE;
+ }
+ goto done;
+ }
+
+ if (strcmp (command, "fix-layout") == 0) {
+ cmd = GF_DEFRAG_CMD_START_LAYOUT_FIX;
+ goto done;
+ }
+ if (strcmp (command, "stop") == 0) {
+ cmd = GF_DEFRAG_CMD_STOP;
+ goto done;
+ }
+ if (strcmp (command, "status") == 0) {
+ cmd = GF_DEFRAG_CMD_STATUS;
+ }
+
+done:
+ ret = dict_set_str (dict, "volname", volname);
+
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "failed to set dict");
+ goto out;
+ }
+
+ ret = dict_set_int32 (dict, "rebalance-command", (int32_t) cmd);
+
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "failed to set dict");
+ goto out;
+ }
+
+ *options = dict;
+
+out:
+ if (ret && dict)
+ dict_destroy (dict);
+
+ return ret;
+}
+
+int32_t
+cli_snap_create_desc_parse (dict_t *dict, const char **words,
+ size_t wordcount, int32_t desc_opt_loc)
+{
+ int32_t ret = -1;
+ char *desc = NULL;
+ int32_t desc_len = 0;
+
+ desc = GF_CALLOC (MAX_SNAP_DESCRIPTION_LEN + 1, sizeof(char),
+ gf_common_mt_char);
+ if (!desc) {
+ ret = -1;
+ goto out;
+ }
+
+
+ if (strlen (words[desc_opt_loc]) >= MAX_SNAP_DESCRIPTION_LEN) {
+ cli_out ("snapshot create: description truncated: "
+ "Description provided is longer than 1024 characters");
+ desc_len = MAX_SNAP_DESCRIPTION_LEN;
+ } else {
+ desc_len = strlen (words[desc_opt_loc]);
+ }
+
+ strncpy (desc, words[desc_opt_loc], desc_len);
+ desc[desc_len] = '\0';
+ /* Calculating the size of the description as given by the user */
+
+ ret = dict_set_dynstr (dict, "description", desc);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Unable to save snap "
+ "description");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret && desc)
+ GF_FREE (desc);
+
+ return ret;
+}
+
+/* Function to check whether the Volume name is repeated */
+int
+cli_check_if_volname_repeated (const char **words, unsigned int start_index,
+ uint64_t cur_index) {
+ uint64_t i = -1;
+ int ret = 0;
+
+ GF_ASSERT (words);
+
+ for (i = start_index ; i < cur_index ; i++) {
+ if (strcmp (words[i], words[cur_index]) == 0) {
+ ret = -1;
+ goto out;
+ }
+ }
+out :
+ return ret;
+}
+
+/* snapshot create <snapname> <vol-name(s)> [description <description>]
+ * [force]
+ * @arg-0, dict : Request Dictionary to be sent to server side.
+ * @arg-1, words : Contains individual words of CLI command.
+ * @arg-2, wordcount: Contains number of words present in the CLI command.
+ *
+ * return value : -1 on failure
+ * 0 on success
+ */
+int
+cli_snap_create_parse (dict_t *dict, const char **words, int wordcount) {
+ uint64_t i = 0;
+ int ret = -1;
+ uint64_t volcount = 0;
+ char key[PATH_MAX] = "";
+ char *snapname = NULL;
+ unsigned int cmdi = 2;
+ /* cmdi is command index, here cmdi is "2" (gluster snapshot create)*/
+
+ GF_ASSERT (words);
+ GF_ASSERT (dict);
+
+ if (wordcount <= cmdi + 1) {
+ cli_err ("Invalid Syntax.");
+ gf_log ("cli", GF_LOG_ERROR,
+ "Too less words for snap create command");
+ goto out;
+ }
+
+ if (strlen(words[cmdi]) >= GLUSTERD_MAX_SNAP_NAME) {
+ cli_err ("snapshot create: failed: snapname cannot exceed "
+ "255 characters.");
+ gf_log ("cli", GF_LOG_ERROR, "Snapname too long");
+
+ goto out;
+ }
+
+ snapname = (char *) words[cmdi];
+ for (i = 0 ; i < strlen (snapname); i++) {
+ /* Following volume name convention */
+ if (!isalnum (snapname[i]) && (snapname[i] != '_'
+ && (snapname[i] != '-'))) {
+ /* TODO : Is this message enough?? */
+ cli_err ("Snapname can contain only alphanumeric, "
+ "\"-\" and \"_\" characters");
+ goto out;
+ }
+ }
+
+ ret = dict_set_str (dict, "snapname", (char *)words[cmdi]);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not save snap "
+ "name");
+ goto out;
+ }
+
+ /* Filling volume name in the dictionary */
+ for (i = cmdi + 1 ; i < wordcount
+ && (strcmp (words[i], "description")) != 0
+ && (strcmp (words[i], "force") != 0); i++) {
+ volcount++;
+ /* volume index starts from 1 */
+ ret = snprintf (key, sizeof (key),"volname%ld", volcount);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_set_str (dict, key, (char *)words[i]);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not "
+ "save volume name");
+ goto out;
+ }
+
+ if (i >= cmdi + 2) {
+ ret = -1;
+ cli_err("Creating multiple volume snapshot is not "
+ "supported as of now");
+ goto out;
+ }
+ /* TODO : remove this above condition check once
+ * multiple volume snapshot is supported */
+ }
+
+ if (volcount == 0) {
+ ret = -1;
+ cli_err ("Please provide the volume name");
+ gf_log ("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ ret = dict_set_int32 (dict, "volcount", volcount);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not save volcount");
+ goto out;
+ }
+
+ /* Verify how we got out of "for" loop,
+ * if it is by reaching wordcount limit then goto "out",
+ * because we need not parse for "description" and "force"
+ * after this.
+ */
+ if (i == wordcount) {
+ goto out;
+ }
+
+ if ((strcmp (words[i], "description")) == 0) {
+ ++i;
+ if (i > (wordcount - 1)) {
+ ret = -1;
+ cli_err ("Please provide a description");
+ gf_log ("cli", GF_LOG_ERROR,
+ "Description not provided");
+ goto out;
+ }
+
+ ret = cli_snap_create_desc_parse(dict, words, wordcount, i);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not save snap "
+ "description");
+ goto out;
+ }
+
+ if ( i == (wordcount - 1))
+ goto out;
+ i++;
+ /* point the index to next word.
+ * As description might be follwed by force option.
+ * Before that, check if wordcount limit is reached
+ */
+ }
+
+ if ((strcmp (words[i], "force") != 0)) {
+ ret = -1;
+ cli_err ("Invalid Syntax.");
+ gf_log ("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+ ret = dict_set_int8 (dict, "snap-force", 1);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not save "
+ "snap force option");
+ goto out;
+ }
+
+ /* Check if the command has anything after "force" keyword */
+ if (++i < wordcount) {
+ ret = -1;
+ gf_log ("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ ret = 0;
+
+out :
+ return ret;
+}
+
+/* snapshot list [volname]
+ * @arg-0, dict : Request Dictionary to be sent to server side.
+ * @arg-1, words : Contains individual words of CLI command.
+ * @arg-2, wordcount: Contains number of words present in the CLI command.
+ *
+ * return value : -1 on failure
+ * 0 on success
+ */
+int
+cli_snap_list_parse (dict_t *dict, const char **words, int wordcount) {
+ int ret = -1;
+
+ GF_ASSERT (words);
+ GF_ASSERT (dict);
+
+ if (wordcount < 2 || wordcount > 3) {
+ gf_log ("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ if (wordcount == 2) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = dict_set_str (dict, "volname", (char *)words[2]);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Failed to save volname in dictionary");
+ goto out;
+ }
+out :
+ return ret;
+}
+
+/* snapshot info [(snapname | volume <volname>)]
+ * @arg-0, dict : Request Dictionary to be sent to server side.
+ * @arg-1, words : Contains individual words of CLI command.
+ * @arg-2, wordcount: Contains number of words present in the CLI command.
+ *
+ * return value : -1 on failure
+ * 0 on success
+ */
+int
+cli_snap_info_parse (dict_t *dict, const char **words, int wordcount)
+{
+
+ int ret = -1;
+ int32_t cmd = GF_SNAP_INFO_TYPE_ALL;
+ unsigned int cmdi = 2;
+ /* cmdi is command index, here cmdi is "2" (gluster snapshot info)*/
+
+ GF_ASSERT (words);
+ GF_ASSERT (dict);
+
+ if (wordcount > 4 || wordcount < cmdi) {
+ gf_log ("", GF_LOG_ERROR, "Invalid syntax");
+ goto out;
+ }
+
+ if (wordcount == cmdi) {
+ ret = 0;
+ goto out;
+ }
+
+ /* If 3rd word is not "volume", then it must
+ * be snapname.
+ */
+ if (strcmp (words[cmdi], "volume") != 0) {
+ ret = dict_set_str (dict, "snapname",
+ (char *)words[cmdi]);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Unable to save "
+ "snapname %s", words[cmdi]);
+ goto out;
+ }
+
+ /* Once snap name is parsed, if we encounter any other
+ * word then fail it. Invalid Syntax.
+ * example : snapshot info <snapname> word
+ */
+ if ((cmdi + 1) != wordcount) {
+ ret = -1;
+ gf_log ("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ cmd = GF_SNAP_INFO_TYPE_SNAP;
+ ret = 0;
+ goto out;
+ /* No need to continue the parsing once we
+ * get the snapname
+ */
+ }
+
+ /* If 3rd word is "volume", then check if next word
+ * is present. As, "snapshot info volume" is an
+ * invalid command.
+ */
+ if ((cmdi + 1) == wordcount) {
+ ret = -1;
+ gf_log ("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ ret = dict_set_str (dict, "volname", (char *)words[wordcount - 1]);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Count not save "
+ "volume name %s", words[wordcount - 1]);
+ goto out;
+ }
+ cmd = GF_SNAP_INFO_TYPE_VOL;
+out :
+ if (ret == 0) {
+ ret = dict_set_int32 (dict, "cmd", cmd);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not save "
+ "type of snapshot info");
+ }
+ }
+ return ret;
+}
+
+
+
+/* snapshot restore <snapname>
+ * @arg-0, dict : Request Dictionary to be sent to server side.
+ * @arg-1, words : Contains individual words of CLI command.
+ * @arg-2, wordcount: Contains number of words present in the CLI command.
+ *
+ * return value : -1 on failure
+ * 0 on success
+ */
+int
+cli_snap_restore_parse (dict_t *dict, const char **words, int wordcount)
+{
+
+ int ret = -1;
+
+ GF_ASSERT (words);
+ GF_ASSERT (dict);
+
+ if (wordcount != 3) {
+ gf_log ("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ ret = dict_set_str (dict, "snapname", (char *)words[2]);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Unable to save snap-name %s",
+ words[2]);
+ goto out;
+ }
+out :
+ return ret;
+}
+
+/* snapshot delete <snapname>
+ * @arg-0, dict : Request Dictionary to be sent to server side.
+ * @arg-1, words : Contains individual words of CLI command.
+ * @arg-2, wordcount: Contains number of words present in the CLI command.
+ *
+ * return value : -1 on failure
+ * 0 on success
+ * 1 if user cancel the operation
+ */
+int
+cli_snap_delete_parse (dict_t *dict, const char **words, int wordcount,
+ struct cli_state *state) {
+
+ int ret = -1;
+ const char *question = NULL;
+ gf_answer_t answer = GF_ANSWER_NO;
+
+ question = "Deleting snap will erase all the information about "
+ "the snap. Do you still want to continue?";
+
+ GF_ASSERT (words);
+ GF_ASSERT (dict);
+
+ if (wordcount != 3) {
+ gf_log ("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ ret = dict_set_str (dict, "snapname", (char *)words[2]);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Unable to save snapname %s",
+ words[2]);
+ goto out;
+ }
+
+ answer = cli_cmd_get_confirmation (state, question);
+ if (GF_ANSWER_NO == answer) {
+ ret = 1;
+ gf_log ("cli", GF_LOG_DEBUG, "User cancelled "
+ "snapshot delete operation");
+ goto out;
+ }
+out :
+ return ret;
+}
+
+/* snapshot status [(snapname | volume <volname>)]
+ * @arg-0, dict : Request Dictionary to be sent to server side.
+ * @arg-1, words : Contains individual words of CLI command.
+ * @arg-2, wordcount: Contains number of words present in the CLI command.
+ *
+ * return value : -1 on failure
+ * 0 on success
+ */
+int
+cli_snap_status_parse (dict_t *dict, const char **words, int wordcount)
+{
+
+ int ret = -1;
+ int32_t cmd = GF_SNAP_STATUS_TYPE_ALL;
+ unsigned int cmdi = 2;
+ /* cmdi is command index, here cmdi is "2" (gluster snapshot status)*/
+
+ GF_ASSERT (words);
+ GF_ASSERT (dict);
+
+ if (wordcount > 4 || wordcount < cmdi) {
+ gf_log ("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ if (wordcount == cmdi) {
+ ret = 0;
+ goto out;
+ }
+
+ /* if 3rd word is not "volume", then it must be "snapname"
+ */
+ if (strcmp (words[cmdi], "volume") != 0) {
+ ret = dict_set_str (dict, "snapname",
+ (char *)words[cmdi]);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Count not save "
+ "snap name %s", words[cmdi]);
+ goto out;
+ }
+
+ if ((cmdi + 1) != wordcount) {
+ ret = -1;
+ gf_log ("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ ret = 0;
+ cmd = GF_SNAP_STATUS_TYPE_SNAP;
+ goto out;
+ }
+
+ /* If 3rd word is "volume", then check if next word is present.
+ * As, "snapshot info volume" is an invalid command
+ */
+ if ((cmdi + 1) == wordcount) {
+ ret = -1;
+ gf_log ("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ ret = dict_set_str (dict, "volname", (char *)words [wordcount - 1]);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Count not save "
+ "volume name %s", words[wordcount - 1]);
+ goto out;
+ }
+ cmd = GF_SNAP_STATUS_TYPE_VOL;
+
+out :
+ if (ret == 0) {
+ ret = dict_set_int32 (dict, "cmd", cmd);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not save cmd "
+ "of snapshot status");
+ }
+ }
+ return ret;
+}
+
+
+int32_t
+cli_snap_config_limit_parse (const char **words, dict_t *dict,
+ unsigned int wordcount, unsigned int index,
+ char *key)
+{
+ int ret = -1;
+ int limit = 0;
+
+ GF_ASSERT (words);
+ GF_ASSERT (dict);
+ GF_ASSERT (key);
+
+ if (index >= wordcount) {
+ ret = -1;
+ cli_err ("Please provide a value for %s.",key);
+ gf_log ("cli", GF_LOG_ERROR, "Value not provided for %s", key);
+ goto out;
+ }
+
+ limit = strtol (words[index], NULL, 0);
+ if (limit <= 0) {
+ ret = -1;
+ cli_err ("%s should be greater than 0.", key);
+ goto out;
+ }
+
+ ret = dict_set_int32 (dict, key, limit);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not set "
+ "%s in dictionary", key);
+ goto out;
+ }
+
+out :
+ return ret;
+}
+
+/* function cli_snap_config_parse
+ * Config Syntax : gluster snapshot config [volname]
+ * [snap-max-hard-limit <count>]
+ * [snap-max-soft-limit <count>]
+ *
+ return value: <0 on failure
+ 1 if user cancels the operation
+ 0 on success
+
+ NOTE : snap-max-soft-limit can only be set for system.
+*/
+int32_t
+cli_snap_config_parse (const char **words, int wordcount, dict_t *dict,
+ struct cli_state *state)
+{
+ int ret = -1;
+ gf_answer_t answer = GF_ANSWER_NO;
+ gf_boolean_t vol_presence = _gf_false;
+ struct snap_config_opt_vals_ *conf_vals = NULL;
+ int8_t hard_limit = 0;
+ int8_t soft_limit = 0;
+ int8_t config_type = -1;
+ const char *question = NULL;
+ unsigned int cmdi = 2;
+ /* cmdi is command index, here cmdi is "2" (gluster snapshot config)*/
+
+ GF_ASSERT (words);
+ GF_ASSERT (dict);
+ GF_ASSERT (state);
+
+ if ((wordcount < 2) || (wordcount > 7)) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Invalid wordcount(%d)", wordcount);
+ goto out;
+ }
+
+ if (wordcount == 2) {
+ config_type = GF_SNAP_CONFIG_DISPLAY;
+ ret = 0;
+ goto set;
+ }
+
+ /* Check whether the 3rd word is volname */
+ if (strcmp (words[cmdi], "snap-max-hard-limit") != 0
+ && strcmp (words[cmdi], "snap-max-soft-limit") != 0) {
+ ret = dict_set_str (dict, "volname", (char *)words[cmdi]);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to set volname");
+ goto out;
+ }
+ cmdi++;
+ vol_presence = _gf_true;
+
+ if (cmdi == wordcount) {
+ config_type = GF_SNAP_CONFIG_DISPLAY;
+ ret = 0;
+ goto set;
+ }
+ }
+
+ config_type = GF_SNAP_CONFIG_TYPE_SET;
+
+ if (strcmp (words[cmdi], "snap-max-hard-limit") == 0) {
+ ret = cli_snap_config_limit_parse (words, dict, wordcount,
+ ++cmdi, "snap-max-hard-limit");
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to parse snap "
+ "config hard limit");
+ goto out;
+ }
+ hard_limit = 1;
+
+ if (++cmdi == wordcount) {
+ ret = 0;
+ goto set;
+ }
+ }
+
+ if (strcmp (words[cmdi], "snap-max-soft-limit") == 0) {
+ if (vol_presence == 1) {
+ ret = -1;
+ cli_err ("Soft limit cannot be set to individual "
+ "volumes.");
+ gf_log ("cli", GF_LOG_ERROR, "Soft limit cannot be "
+ "set to volumes");
+ goto out;
+ }
+
+ ret = cli_snap_config_limit_parse (words, dict, wordcount,
+ ++cmdi, "snap-max-soft-limit");
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to parse snap "
+ "config soft limit");
+ goto out;
+ }
+
+ if (++cmdi != wordcount) {
+ ret = -1;
+ gf_log ("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+ soft_limit = 1;
+ } else {
+ ret = -1;
+ gf_log ("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+ ret = 0; /* Success */
+
+set:
+ ret = dict_set_int32 (dict, "config-command", config_type);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Unable to set "
+ "config-command");
+ goto out;
+ }
+
+ if (config_type == GF_SNAP_CONFIG_TYPE_SET) {
+ conf_vals = snap_confopt_vals;
+ if (hard_limit && soft_limit) {
+ question = conf_vals[GF_SNAP_CONFIG_SET_BOTH].question;
+ } else if (soft_limit) {
+ question = conf_vals[GF_SNAP_CONFIG_SET_SOFT].question;
+ } else if (hard_limit) {
+ question = conf_vals[GF_SNAP_CONFIG_SET_HARD].question;
+ }
+
+ answer = cli_cmd_get_confirmation (state, question);
+ if (GF_ANSWER_NO == answer) {
+ ret = 1;
+ gf_log ("cli", GF_LOG_DEBUG, "User cancelled "
+ "snapshot config operation");
+ }
+ }
+
+out:
+ return ret;
+}
+
+int
+validate_snapname (const char *snapname, char **opwords) {
+ int ret = -1;
+ int i = 0;
+
+ GF_ASSERT (snapname);
+ GF_ASSERT (opwords);
+
+ for (i = 0 ; opwords[i] != NULL; i++) {
+ if (strcmp (opwords[i], snapname) == 0) {
+ cli_out ("\"%s\" cannot be a snapname", snapname);
+ goto out;
+ }
+ }
+ ret = 0;
+out :
+ return ret;
+}
+
+int32_t
+cli_cmd_snapshot_parse (const char **words, int wordcount, dict_t **options,
+ struct cli_state *state)
+{
+ int32_t ret = -1;
+ dict_t *dict = NULL;
+ gf1_cli_snapshot type = GF_SNAP_OPTION_TYPE_NONE;
+ char *w = NULL;
+ char *opwords[] = {"create", "delete", "restore", "start",
+ "stop", "list", "status", "config",
+ "info", NULL};
+ char *invalid_snapnames[] = {"description", "force",
+ "volume", NULL};
+
+ GF_ASSERT (words);
+ GF_ASSERT (options);
+ GF_ASSERT (state);
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ /* Lowest wordcount possible */
+ if (wordcount < 2) {
+ gf_log ("", GF_LOG_ERROR,
+ "Invalid command: Not enough arguments");
+ goto out;
+ }
+
+ w = str_getunamb (words[1], opwords);
+ if (!w) {
+ /* Checks if the operation is a valid operation */
+ gf_log ("", GF_LOG_ERROR, "Opword Mismatch");
+ goto out;
+ }
+
+ if (!strcmp (w, "create")) {
+ type = GF_SNAP_OPTION_TYPE_CREATE;
+ } else if (!strcmp (w, "list")) {
+ type = GF_SNAP_OPTION_TYPE_LIST;
+ } else if (!strcmp (w, "info")) {
+ type = GF_SNAP_OPTION_TYPE_INFO;
+ } else if (!strcmp (w, "delete")) {
+ type = GF_SNAP_OPTION_TYPE_DELETE;
+ } else if (!strcmp (w, "config")) {
+ type = GF_SNAP_OPTION_TYPE_CONFIG;
+ } else if (!strcmp (w, "restore")) {
+ type = GF_SNAP_OPTION_TYPE_RESTORE;
+ } else if (!strcmp (w, "status")) {
+ type = GF_SNAP_OPTION_TYPE_STATUS;
+ }
+
+ if (type != GF_SNAP_OPTION_TYPE_CONFIG) {
+ ret = dict_set_int32 (dict, "hold_snap_locks", _gf_true);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Unable to set hold-snap-locks value "
+ "as _gf_true");
+ goto out;
+ }
+ }
+
+ /* Check which op is intended */
+ switch (type) {
+ case GF_SNAP_OPTION_TYPE_CREATE:
+ {
+ /* Syntax :
+ * gluster snapshot create <snapname> <vol-name(s)>
+ * [description <description>]
+ * [force]
+ */
+
+ /* In cases where the snapname is not given then
+ * parsing fails & snapname cannot be "description",
+ * "force" and "volume", that check is made here
+ */
+ if (wordcount == 2){
+ ret = -1;
+ gf_log ("cli", GF_LOG_ERROR,
+ "Invalid Syntax");
+ goto out;
+ }
+
+ ret = validate_snapname (words[2], invalid_snapnames);
+ if (ret) {
+ goto out;
+ }
+
+ ret = cli_snap_create_parse (dict, words, wordcount);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "create command parsing failed.");
+ goto out;
+ }
+ break;
+ }
+ case GF_SNAP_OPTION_TYPE_INFO:
+ {
+ /* Syntax :
+ * gluster snapshot info [(snapname] | [vol <volname>)]
+ */
+ ret = cli_snap_info_parse (dict, words, wordcount);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to parse "
+ "snapshot info command");
+ goto out;
+ }
+ break;
+ }
+
+ case GF_SNAP_OPTION_TYPE_LIST:
+ {
+ /* Syntax :
+ * gluster snaphsot list [volname]
+ */
+
+ ret = cli_snap_list_parse (dict, words, wordcount);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to parse "
+ "snapshot list command");
+ goto out;
+ }
+ break;
+ }
+
+ case GF_SNAP_OPTION_TYPE_DELETE:
+ {
+ /* Syntax :
+ * gluster snapshot delete <snapname>
+ */
+ ret = cli_snap_delete_parse (dict, words, wordcount,
+ state);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to parse "
+ "snapshot delete command");
+ goto out;
+ }
+ break;
+ }
+
+ case GF_SNAP_OPTION_TYPE_CONFIG:
+ {
+ /* snapshot config [volname] [snap-max-hard-limit <count>]
+ * [snap-max-soft-limit <percent>] */
+ ret = cli_snap_config_parse (words, wordcount, dict,
+ state);
+ if (ret) {
+ if (ret < 0)
+ gf_log ("cli", GF_LOG_ERROR,
+ "config command parsing failed.");
+ goto out;
+ }
+
+ ret = dict_set_int32 (dict, "type",
+ GF_SNAP_OPTION_TYPE_CONFIG);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Unable to set "
+ "config type");
+ ret = -1;
+ goto out;
+ }
+ break;
+ }
+ case GF_SNAP_OPTION_TYPE_STATUS:
+ {
+ /* Syntax :
+ * gluster snapshot status [(snapname |
+ * volume <volname>)]
+ */
+ ret = cli_snap_status_parse (dict, words, wordcount);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to parse "
+ "snapshot status command");
+ goto out;
+ }
+ break;
+ }
+
+ case GF_SNAP_OPTION_TYPE_RESTORE:
+ {
+ /* Syntax:
+ * snapshot restore <snapname>
+ */
+ ret = cli_snap_restore_parse (dict, words, wordcount);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to parse "
+ "restore command");
+ goto out;
+ }
+ break;
+ }
+ default:
+ gf_log ("", GF_LOG_ERROR, "Opword Mismatch");
+ goto out;
+ break;
+ }
+
+ ret = dict_set_int32 (dict, "type", type);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Failed to set type.");
+ goto out;
+ }
+ /* If you got so far, input is valid */
+ ret = 0;
+out:
+ if (ret) {
+ if (dict)
+ dict_destroy (dict);
+ } else
+ *options = dict;
+
+ return ret;
+}
diff --git a/cli/src/cli-cmd-peer.c b/cli/src/cli-cmd-peer.c
new file mode 100644
index 000000000..551312411
--- /dev/null
+++ b/cli/src/cli-cmd-peer.c
@@ -0,0 +1,300 @@
+/*
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <pthread.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "cli.h"
+#include "cli-cmd.h"
+#include "cli-mem-types.h"
+#include "cli1-xdr.h"
+#include "protocol-common.h"
+
+extern struct rpc_clnt *global_rpc;
+
+extern rpc_clnt_prog_t *cli_rpc_prog;
+
+int cli_cmd_peer_help_cbk (struct cli_state *state, struct cli_cmd_word *in_word,
+ const char **words, int wordcount);
+
+int
+cli_cmd_peer_probe_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ cli_local_t *local = NULL;
+
+ if (!(wordcount == 3)) {
+ cli_usage_out (word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_PROBE];
+
+ frame = create_frame (THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ ret = dict_set_str (dict, "hostname", (char *)words[2]);
+ if (ret)
+ goto out;
+
+ ret = valid_internet_address ((char *) words[2], _gf_false);
+ if (ret == 1) {
+ ret = 0;
+ } else {
+ cli_out ("%s is an invalid address", words[2]);
+ cli_usage_out (word->pattern);
+ parse_error = 1;
+ ret = -1;
+ goto out;
+ }
+/* if (words[3]) {
+ ret = dict_set_str (dict, "port", (char *)words[3]);
+ if (ret)
+ goto out;
+ }
+*/
+
+ CLI_LOCAL_INIT (local, words, frame, dict);
+
+ if (proc->fn) {
+ ret = proc->fn (frame, THIS, dict);
+ }
+
+out:
+ if (ret) {
+ cli_cmd_sent_status_get (&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out ("Peer probe failed");
+ }
+
+ CLI_STACK_DESTROY (frame);
+
+ return ret;
+}
+
+
+int
+cli_cmd_peer_deprobe_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+ int flags = 0;
+ int sent = 0;
+ int parse_error = 0;
+ cli_local_t *local = NULL;
+
+ if ((wordcount < 3) || (wordcount > 4)) {
+ cli_usage_out (word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_DEPROBE];
+
+ frame = create_frame (THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+
+ dict = dict_new ();
+
+ ret = dict_set_str (dict, "hostname", (char *)words[2]);
+ if (ret)
+ goto out;
+
+/* if (words[3]) {
+ ret = dict_set_str (dict, "port", (char *)words[3]);
+ if (ret)
+ goto out;
+ }
+*/
+ if (wordcount == 4) {
+ if (!strcmp("force", words[3]))
+ flags |= GF_CLI_FLAG_OP_FORCE;
+ else {
+ ret = -1;
+ cli_usage_out (word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+ }
+ ret = dict_set_int32 (dict, "flags", flags);
+ if (ret)
+ goto out;
+
+ CLI_LOCAL_INIT (local, words, frame, dict);
+
+ if (proc->fn) {
+ ret = proc->fn (frame, THIS, dict);
+ }
+
+out:
+ if (ret) {
+ cli_cmd_sent_status_get (&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out ("Peer detach failed");
+ }
+
+ CLI_STACK_DESTROY (frame);
+
+ return ret;
+}
+
+int
+cli_cmd_peer_status_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ int sent = 0;
+ int parse_error = 0;
+
+ if (wordcount != 2) {
+ cli_usage_out (word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_LIST_FRIENDS];
+
+ frame = create_frame (THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+
+ if (proc->fn) {
+ ret = proc->fn (frame, THIS, (void *)GF_CLI_LIST_PEERS);
+ }
+
+out:
+ if (ret) {
+ cli_cmd_sent_status_get (&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out ("Peer status failed");
+ }
+
+ CLI_STACK_DESTROY (frame);
+
+ return ret;
+}
+
+int
+cli_cmd_pool_list_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ int sent = 0;
+ int parse_error = 0;
+
+ if (wordcount != 2) {
+ cli_usage_out (word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_LIST_FRIENDS];
+
+ frame = create_frame (THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+
+ if (proc->fn) {
+ ret = proc->fn (frame, THIS,
+ (void *)GF_CLI_LIST_POOL_NODES);
+ }
+
+out:
+ if (ret) {
+ cli_cmd_sent_status_get (&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_err ("pool list: command execution failed");
+ }
+
+ CLI_STACK_DESTROY (frame);
+
+ return ret;
+}
+
+struct cli_cmd cli_probe_cmds[] = {
+ { "peer probe <HOSTNAME>",
+ cli_cmd_peer_probe_cbk,
+ "probe peer specified by <HOSTNAME>"},
+
+ { "peer detach <HOSTNAME> [force]",
+ cli_cmd_peer_deprobe_cbk,
+ "detach peer specified by <HOSTNAME>"},
+
+ { "peer status",
+ cli_cmd_peer_status_cbk,
+ "list status of peers"},
+
+ { "peer help",
+ cli_cmd_peer_help_cbk,
+ "Help command for peer "},
+
+ { "pool list",
+ cli_cmd_pool_list_cbk,
+ "list all the nodes in the pool (including localhost)"},
+
+ { NULL, NULL, NULL }
+};
+
+int
+cli_cmd_peer_help_cbk (struct cli_state *state, struct cli_cmd_word *in_word,
+ const char **words, int wordcount)
+{
+ struct cli_cmd *cmd = NULL;
+
+
+
+ for (cmd = cli_probe_cmds; cmd->pattern; cmd++)
+ cli_out ("%s - %s", cmd->pattern, cmd->desc);
+
+ return 0;
+}
+
+int
+cli_cmd_probe_register (struct cli_state *state)
+{
+ int ret = 0;
+ struct cli_cmd *cmd = NULL;
+
+ for (cmd = cli_probe_cmds; cmd->pattern; cmd++) {
+
+ ret = cli_cmd_register (&state->tree, cmd);
+ if (ret)
+ goto out;
+ }
+out:
+ return ret;
+}
diff --git a/cli/src/cli-cmd-snapshot.c b/cli/src/cli-cmd-snapshot.c
new file mode 100644
index 000000000..de492d683
--- /dev/null
+++ b/cli/src/cli-cmd-snapshot.c
@@ -0,0 +1,146 @@
+/*
+ Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <pthread.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "cli.h"
+#include "cli-cmd.h"
+
+extern rpc_clnt_prog_t *cli_rpc_prog;
+
+int
+cli_cmd_snapshot_help_cbk (struct cli_state *state, struct cli_cmd_word *in_word,
+ const char **words, int wordcount);
+
+int
+cli_cmd_snapshot_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = 0;
+ int parse_err = 0;
+ dict_t *options = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ cli_local_t *local = NULL;
+
+ proc = &cli_rpc_prog->proctable [GLUSTER_CLI_SNAP];
+ if (proc == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ frame = create_frame (THIS, THIS->ctx->pool);
+ if (frame == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ /* Parses the command entered by the user */
+ ret = cli_cmd_snapshot_parse (words, wordcount, &options, state);
+ if (ret) {
+ if (ret < 0) {
+ cli_usage_out (word->pattern);
+ parse_err = 1;
+ }
+ else {
+ /* User might have cancelled the snapshot operation */
+ ret = 0;
+ }
+ goto out;
+ }
+
+ CLI_LOCAL_INIT (local, words, frame, options);
+
+ if (proc->fn)
+ ret = proc->fn (frame, THIS, options);
+
+out:
+ if (ret && parse_err == 0)
+ cli_out ("Snapshot command failed");
+
+ CLI_STACK_DESTROY (frame);
+
+ return ret;
+}
+
+struct cli_cmd snapshot_cmds[] = {
+ { "snapshot help",
+ cli_cmd_snapshot_help_cbk,
+ "display help for snapshot commands"
+ },
+ { "snapshot create <snapname> <volname(s)> [description <description>] [force]",
+ cli_cmd_snapshot_cbk,
+ "Snapshot Create."
+ },
+ { "snapshot restore <snapname>",
+ cli_cmd_snapshot_cbk,
+ "Snapshot Restore."
+ },
+ { "snapshot status [(snapname | volume <volname>)]",
+ cli_cmd_snapshot_cbk,
+ "Snapshot Status."
+ },
+ { "snapshot info [(snapname | volume <volname>)]",
+ cli_cmd_snapshot_cbk,
+ "Snapshot Info."
+ },
+ { "snapshot list [volname]",
+ cli_cmd_snapshot_cbk,
+ "Snapshot List."
+ },
+ {"snapshot config [volname] [snap-max-hard-limit <count>] [snap-max-soft-limit <percent>]",
+ cli_cmd_snapshot_cbk,
+ "Snapshot Config."
+ },
+ {"snapshot delete <snapname>",
+ cli_cmd_snapshot_cbk,
+ "Snapshot Delete."
+ },
+ { NULL, NULL, NULL }
+};
+
+int
+cli_cmd_snapshot_help_cbk (struct cli_state *state,
+ struct cli_cmd_word *in_word,
+ const char **words,
+ int wordcount)
+{
+ struct cli_cmd *cmd = NULL;
+
+ for (cmd = snapshot_cmds; cmd->pattern; cmd++)
+ if (_gf_false == cmd->disable)
+ cli_out ("%s - %s", cmd->pattern, cmd->desc);
+
+ return 0;
+}
+
+int
+cli_cmd_snapshot_register (struct cli_state *state)
+{
+ int ret = 0;
+ struct cli_cmd *cmd = NULL;
+
+ for (cmd = snapshot_cmds; cmd->pattern; cmd++) {
+
+ ret = cli_cmd_register (&state->tree, cmd);
+ if (ret)
+ goto out;
+ }
+out:
+ return ret;
+}
diff --git a/cli/src/cli-cmd-system.c b/cli/src/cli-cmd-system.c
new file mode 100644
index 000000000..8cfa5e70c
--- /dev/null
+++ b/cli/src/cli-cmd-system.c
@@ -0,0 +1,602 @@
+/*
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <pthread.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "cli.h"
+#include "cli-cmd.h"
+#include "cli-mem-types.h"
+#include "protocol-common.h"
+
+
+extern struct rpc_clnt *global_rpc;
+
+extern rpc_clnt_prog_t *cli_rpc_prog;
+
+int cli_cmd_system_help_cbk (struct cli_state *state, struct cli_cmd_word *in_word,
+ const char **words, int wordcount);
+
+int cli_cmd_copy_file_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount);
+
+int cli_cmd_sys_exec_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount);
+
+int
+cli_cmd_getspec_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+
+ frame = create_frame (THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ if (wordcount != 3) {
+ cli_usage_out (word->pattern);
+ goto out;
+ }
+
+ ret = dict_set_str (dict, "volid", (char *)words[2]);
+ if (ret)
+ goto out;
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_GETSPEC];
+ if (proc->fn) {
+ ret = proc->fn (frame, THIS, dict);
+ }
+
+out:
+ if (!proc && ret) {
+ if (dict)
+ dict_destroy (dict);
+ if (wordcount > 1)
+ cli_out ("Fetching spec for volume %s failed",
+ (char *)words[2]);
+ }
+
+ return ret;
+}
+
+int
+cli_cmd_pmap_b2p_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+
+ frame = create_frame (THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ if (wordcount != 4) {
+ cli_usage_out (word->pattern);
+ goto out;
+ }
+
+ ret = dict_set_str (dict, "brick", (char *)words[3]);
+ if (ret)
+ goto out;
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_PMAP_PORTBYBRICK];
+ if (proc->fn) {
+ ret = proc->fn (frame, THIS, dict);
+ }
+
+out:
+ if (!proc && ret) {
+ if (dict)
+ dict_destroy (dict);
+ if (wordcount > 1)
+ cli_out ("Fetching spec for volume %s failed",
+ (char *)words[3]);
+ }
+
+ return ret;
+}
+
+int
+cli_cmd_fsm_log_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ char *name = "";
+
+ if ((wordcount != 4) && (wordcount != 3)) {
+ cli_usage_out (word->pattern);
+ goto out;
+ }
+
+ if (wordcount == 4)
+ name = (char*)words[3];
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_FSM_LOG];
+ if (proc && proc->fn) {
+ frame = create_frame (THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+ ret = proc->fn (frame, THIS, (void*)name);
+ }
+out:
+ return ret;
+}
+
+int
+cli_cmd_getwd_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+
+ if (wordcount != 2) {
+ cli_usage_out (word->pattern);
+ goto out;
+ }
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_GETWD];
+ if (proc && proc->fn) {
+ frame = create_frame (THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+ ret = proc->fn (frame, THIS, NULL);
+ }
+out:
+ return ret;
+}
+
+static dict_t *
+make_seq_dict (int argc, char **argv)
+{
+ char index[] = "4294967296"; // 1<<32
+ int i = 0;
+ int ret = 0;
+ dict_t *dict = dict_new ();
+
+ if (!dict)
+ return NULL;
+
+ for (i = 0; i < argc; i++) {
+ snprintf(index, sizeof(index), "%d", i);
+ ret = dict_set_str (dict, index, argv[i]);
+ if (ret == -1)
+ break;
+ }
+
+ if (ret) {
+ dict_destroy (dict);
+ dict = NULL;
+ }
+
+ return dict;
+}
+
+int
+cli_cmd_mount_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ int ret = -1;
+ dict_t *dict = NULL;
+ void *dataa[] = {NULL, NULL};
+
+ if (wordcount < 4) {
+ cli_usage_out (word->pattern);
+ goto out;
+ }
+
+ dict = make_seq_dict (wordcount - 3, (char **)words + 3);
+ if (!dict)
+ goto out;
+
+ dataa[0] = (void *)words[2];
+ dataa[1] = dict;
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_MOUNT];
+ if (proc && proc->fn) {
+ frame = create_frame (THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+ ret = proc->fn (frame, THIS, dataa);
+ }
+
+ out:
+ if (dict)
+ dict_unref (dict);
+
+ if (!proc && ret)
+ cli_out ("Mount command failed");
+
+ return ret;
+}
+
+int
+cli_cmd_umount_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ int ret = -1;
+ dict_t *dict = NULL;
+
+ if (!(wordcount == 3 ||
+ (wordcount == 4 && strcmp (words[3], "lazy") == 0))) {
+ cli_usage_out (word->pattern);
+ goto out;
+ }
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ ret = dict_set_str (dict, "path", (char *)words[2]);
+ if (ret != 0)
+ goto out;
+ ret = dict_set_int32 (dict, "lazy", wordcount == 4);
+ if (ret != 0)
+ goto out;
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_UMOUNT];
+ if (proc && proc->fn) {
+ frame = create_frame (THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+ ret = proc->fn (frame, THIS, dict);
+ }
+
+ out:
+ if (dict)
+ dict_unref (dict);
+
+ if (!proc && ret)
+ cli_out ("Umount command failed");
+
+ return ret;
+}
+
+int
+cli_cmd_uuid_get_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = -1;
+ int sent = 0;
+ int parse_error = 0;
+ dict_t *dict = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ cli_local_t *local = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ if (wordcount != 3) {
+ cli_usage_out (word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_UUID_GET];
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame)
+ goto out;
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ CLI_LOCAL_INIT (local, words, frame, dict);
+ if (proc->fn)
+ ret = proc->fn (frame, this, dict);
+
+out:
+ if (ret) {
+ cli_cmd_sent_status_get (&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out ("uuid get failed");
+ }
+
+ if (dict)
+ dict_unref (dict);
+
+ CLI_STACK_DESTROY (frame);
+ return ret;
+}
+
+int
+cli_cmd_uuid_reset_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ gf_answer_t answer = GF_ANSWER_NO;
+ char *question = NULL;
+ cli_local_t *local = NULL;
+ dict_t *dict = NULL;
+ xlator_t *this = NULL;
+
+ question = "Resetting uuid changes the uuid of local glusterd. "
+ "Do you want to continue?";
+
+ if (wordcount != 3) {
+ cli_usage_out (word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_UUID_RESET];
+
+ this = THIS;
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame)
+ goto out;
+
+ dict = dict_new ();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+ CLI_LOCAL_INIT (local, words, frame, dict);
+ answer = cli_cmd_get_confirmation (state, question);
+
+ if (GF_ANSWER_NO == answer) {
+ ret = 0;
+ goto out;
+ }
+
+ //send NULL as argument since no dictionary is sent to glusterd
+ if (proc->fn) {
+ ret = proc->fn (frame, this, dict);
+ }
+
+out:
+ if (ret) {
+ cli_cmd_sent_status_get (&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out ("uuid reset failed");
+ }
+
+ CLI_STACK_DESTROY (frame);
+
+ return ret;
+}
+
+struct cli_cmd cli_system_cmds[] = {
+ { "system:: getspec <VOLID>",
+ cli_cmd_getspec_cbk,
+ "fetch spec for volume <VOLID>"},
+
+ { "system:: portmap brick2port <BRICK>",
+ cli_cmd_pmap_b2p_cbk,
+ "query which port <BRICK> listens on"},
+
+ { "system:: fsm log [<peer-name>]",
+ cli_cmd_fsm_log_cbk,
+ "display fsm transitions"},
+
+ { "system:: getwd",
+ cli_cmd_getwd_cbk,
+ "query glusterd work directory"},
+
+ { "system:: mount <label> <args...>",
+ cli_cmd_mount_cbk,
+ "request a mount"},
+
+ { "system:: umount <path> [lazy]",
+ cli_cmd_umount_cbk,
+ "request an umount"},
+
+ { "system:: uuid get",
+ cli_cmd_uuid_get_cbk,
+ "get uuid of glusterd"},
+
+ { "system:: uuid reset",
+ cli_cmd_uuid_reset_cbk,
+ "reset the uuid of glusterd"},
+
+ { "system:: help",
+ cli_cmd_system_help_cbk,
+ "display help for system commands"},
+
+ { "system:: copy file [<filename>]",
+ cli_cmd_copy_file_cbk,
+ "Copy file from current node's $working_dir to "
+ "$working_dir of all cluster nodes"},
+
+ { "system:: execute <command> <args>",
+ cli_cmd_sys_exec_cbk,
+ "Execute the command on all the nodes "
+ "in the cluster and display their output."},
+
+ { NULL, NULL, NULL }
+};
+
+int
+cli_cmd_sys_exec_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ char cmd_arg_name[PATH_MAX] = "";
+ char *command = NULL;
+ char *saveptr = NULL;
+ char *tmp = NULL;
+ int ret = -1;
+ int i = -1;
+ int cmd_args_count = 0;
+ int in_cmd_args_count = 0;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+ cli_local_t *local = NULL;
+
+ if (wordcount < 3) {
+ cli_usage_out (word->pattern);
+ goto out;
+ }
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ command = strtok_r ((char *)words[2], " ", &saveptr);
+ do {
+ tmp = strtok_r (NULL, " ", &saveptr);
+ if (tmp) {
+ in_cmd_args_count++;
+ memset (cmd_arg_name, '\0', sizeof(cmd_arg_name));
+ snprintf (cmd_arg_name, sizeof(cmd_arg_name),
+ "cmd_arg_%d", in_cmd_args_count);
+ ret = dict_set_str (dict, cmd_arg_name, tmp);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to set "
+ "%s in dict", cmd_arg_name);
+ goto out;
+ }
+ }
+ } while (tmp);
+
+ cmd_args_count = wordcount - 3;
+
+ ret = dict_set_str (dict, "command", command);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to set command in dict");
+ goto out;
+ }
+
+ for (i=1; i <= cmd_args_count; i++) {
+ in_cmd_args_count++;
+ memset (cmd_arg_name, '\0', sizeof(cmd_arg_name));
+ snprintf (cmd_arg_name, sizeof(cmd_arg_name),
+ "cmd_arg_%d", in_cmd_args_count);
+ ret = dict_set_str (dict, cmd_arg_name,
+ (char *)words[2+i]);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to set %s in dict",
+ cmd_arg_name);
+ goto out;
+ }
+ }
+
+ ret = dict_set_int32 (dict, "cmd_args_count", in_cmd_args_count);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to set cmd_args_count in dict");
+ goto out;
+ }
+
+ ret = dict_set_str (dict, "volname", "N/A");
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to set volname in dict");
+ goto out;
+ }
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_SYS_EXEC];
+ if (proc && proc->fn) {
+ frame = create_frame (THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+ CLI_LOCAL_INIT (local, words, frame, dict);
+ ret = proc->fn (frame, THIS, (void*)dict);
+ }
+out:
+ return ret;
+}
+
+int
+cli_cmd_copy_file_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ char *filename = "";
+ dict_t *dict = NULL;
+ cli_local_t *local = NULL;
+
+ if (wordcount != 4) {
+ cli_usage_out (word->pattern);
+ goto out;
+ }
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ filename = (char*)words[3];
+ ret = dict_set_str (dict, "source", filename);
+ if (ret)
+ gf_log ("", GF_LOG_ERROR, "Unable to set filename in dict");
+
+ ret = dict_set_str (dict, "volname", "N/A");
+ if (ret)
+ gf_log ("", GF_LOG_ERROR, "Unable to set volname in dict");
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_COPY_FILE];
+ if (proc && proc->fn) {
+ frame = create_frame (THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+ CLI_LOCAL_INIT (local, words, frame, dict);
+ ret = proc->fn (frame, THIS, (void*)dict);
+ }
+out:
+ return ret;
+}
+
+int
+cli_cmd_system_help_cbk (struct cli_state *state, struct cli_cmd_word *in_word,
+ const char **words, int wordcount)
+{
+ struct cli_cmd *cmd = NULL;
+
+ for (cmd = cli_system_cmds; cmd->pattern; cmd++)
+ cli_out ("%s - %s", cmd->pattern, cmd->desc);
+
+ return 0;
+}
+
+int
+cli_cmd_system_register (struct cli_state *state)
+{
+ int ret = 0;
+ struct cli_cmd *cmd = NULL;
+
+ for (cmd = cli_system_cmds; cmd->pattern; cmd++) {
+
+ ret = cli_cmd_register (&state->tree, cmd);
+ if (ret)
+ goto out;
+ }
+out:
+ return ret;
+}
diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c
new file mode 100644
index 000000000..100be0b73
--- /dev/null
+++ b/cli/src/cli-cmd-volume.c
@@ -0,0 +1,1972 @@
+/*
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <pthread.h>
+
+#include <sys/socket.h>
+#include <netdb.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "cli.h"
+#include "cli-cmd.h"
+#include "cli-mem-types.h"
+#include "cli1-xdr.h"
+#include "run.h"
+
+extern struct rpc_clnt *global_rpc;
+
+extern rpc_clnt_prog_t *cli_rpc_prog;
+
+int
+cli_cmd_volume_help_cbk (struct cli_state *state, struct cli_cmd_word *in_word,
+ const char **words, int wordcount);
+
+int
+cli_cmd_volume_info_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ cli_cmd_volume_get_ctx_t ctx = {0,};
+ cli_local_t *local = NULL;
+ int sent = 0;
+ int parse_error = 0;
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_GET_VOLUME];
+
+ frame = create_frame (THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+
+ if ((wordcount == 2) || (wordcount == 3 &&
+ !strcmp (words[2], "all"))) {
+ ctx.flags = GF_CLI_GET_NEXT_VOLUME;
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_GET_NEXT_VOLUME];
+ } else if (wordcount == 3) {
+ ctx.flags = GF_CLI_GET_VOLUME;
+ ctx.volname = (char *)words[2];
+ if (strlen (ctx.volname) > 1024) {
+ cli_out ("Invalid volume name");
+ goto out;
+ }
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_GET_VOLUME];
+ } else {
+ cli_usage_out (word->pattern);
+ parse_error = 1;
+ return -1;
+ }
+
+ local = cli_local_get ();
+
+ if (!local)
+ goto out;
+
+ local->get_vol.flags = ctx.flags;
+ if (ctx.volname)
+ local->get_vol.volname = gf_strdup (ctx.volname);
+
+ frame->local = local;
+
+ if (proc->fn) {
+ ret = proc->fn (frame, THIS, &ctx);
+ }
+
+out:
+ if (ret) {
+ cli_cmd_sent_status_get (&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out ("Getting Volume information failed!");
+ }
+
+ CLI_STACK_DESTROY (frame);
+
+ return ret;
+
+}
+
+int
+cli_cmd_sync_volume_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ dict_t *dict = NULL;
+ cli_local_t *local = NULL;
+ gf_answer_t answer = GF_ANSWER_NO;
+ const char *question = "Sync volume may make data "
+ "inaccessible while the sync "
+ "is in progress. Do you want "
+ "to continue?";
+
+ if ((wordcount < 3) || (wordcount > 4)) {
+ cli_usage_out (word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ if ((wordcount == 3) || !strcmp(words[3], "all")) {
+ ret = dict_set_int32 (dict, "flags", (int32_t)
+ GF_CLI_SYNC_ALL);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "failed to set"
+ "flag");
+ goto out;
+ }
+ } else {
+ ret = dict_set_str (dict, "volname", (char *) words[3]);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "failed to set "
+ "volume");
+ goto out;
+ }
+ }
+
+ ret = dict_set_str (dict, "hostname", (char *) words[2]);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "failed to set hostname");
+ goto out;
+ }
+
+ if (!(state->mode & GLUSTER_MODE_SCRIPT)) {
+ answer = cli_cmd_get_confirmation (state, question);
+ if (GF_ANSWER_NO == answer) {
+ ret = 0;
+ goto out;
+ }
+ }
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_SYNC_VOLUME];
+
+ frame = create_frame (THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+
+ CLI_LOCAL_INIT (local, words, frame, dict);
+
+ if (proc->fn) {
+ ret = proc->fn (frame, THIS, dict);
+ }
+
+out:
+ if (ret) {
+ cli_cmd_sent_status_get (&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out ("Volume sync failed");
+ }
+
+ CLI_STACK_DESTROY (frame);
+
+ return ret;
+}
+
+gf_ai_compare_t
+cli_cmd_compare_addrinfo (struct addrinfo *first, struct addrinfo *next)
+{
+ int ret = -1;
+ struct addrinfo *tmp1 = NULL;
+ struct addrinfo *tmp2 = NULL;
+ char firstip[NI_MAXHOST] = {0.};
+ char nextip[NI_MAXHOST] = {0,};
+
+ for (tmp1 = first; tmp1 != NULL; tmp1 = tmp1->ai_next) {
+ ret = getnameinfo (tmp1->ai_addr, tmp1->ai_addrlen, firstip,
+ NI_MAXHOST, NULL, 0, NI_NUMERICHOST);
+ if (ret)
+ return GF_AI_COMPARE_ERROR;
+ for (tmp2 = next; tmp2 != NULL; tmp2 = tmp2->ai_next) {
+ ret = getnameinfo (tmp2->ai_addr, tmp2->ai_addrlen, nextip,
+ NI_MAXHOST, NULL, 0, NI_NUMERICHOST);
+ if (ret)
+ return GF_AI_COMPARE_ERROR;
+ if (!strcmp (firstip, nextip)) {
+ return GF_AI_COMPARE_MATCH;
+ }
+ }
+ }
+ return GF_AI_COMPARE_NO_MATCH;
+}
+
+/* Check for non optimal brick order for replicate :
+ * Checks if bricks belonging to a replicate volume
+ * are present on the same server
+ */
+int32_t
+cli_cmd_check_brick_order (struct cli_state *state, const char *bricks,
+ int brick_count, int sub_count)
+{
+ int ret = -1;
+ int i = 0;
+ int j = 0;
+ int k = 0;
+ addrinfo_list_t *ai_list = NULL;
+ addrinfo_list_t *ai_list_tmp1 = NULL;
+ addrinfo_list_t *ai_list_tmp2 = NULL;
+ char *brick = NULL;
+ char *brick_list = NULL;
+ char *brick_list_dup = NULL;
+ char *tmpptr = NULL;
+ struct addrinfo *ai_info = NULL;
+ gf_answer_t answer = GF_ANSWER_NO;
+ const char *failed_question = NULL;
+ const char *found_question = NULL;
+ failed_question = "Failed to perform brick order check. "
+ "Do you want to continue creating the volume? ";
+ found_question = "Multiple bricks of a replicate volume are present"
+ " on the same server. This setup is not optimal.\n"
+ "Do you still want to continue creating the volume? ";
+
+ GF_ASSERT (bricks);
+ GF_ASSERT (brick_count > 0);
+ GF_ASSERT (sub_count > 0);
+
+ ai_list = malloc (sizeof (addrinfo_list_t));
+ ai_list->info = NULL;
+ INIT_LIST_HEAD (&ai_list->list);
+ brick_list = gf_strdup (bricks);
+ if (brick_list == NULL) {
+ gf_log ("cli", GF_LOG_DEBUG, "failed to allocate memory");
+ goto check_failed;
+ }
+ brick_list_dup = brick_list;
+ /* Resolve hostnames and get addrinfo */
+ while (i < brick_count) {
+ ++i;
+ brick = strtok_r (brick_list, " \n", &tmpptr);
+ brick_list = tmpptr;
+ if (brick == NULL)
+ goto check_failed;
+ brick = strtok_r (brick, ":", &tmpptr);
+ if (brick == NULL)
+ goto check_failed;
+ ret = getaddrinfo (brick, NULL, NULL, &ai_info);
+ if (ret)
+ goto check_failed;
+ ai_list_tmp1 = malloc (sizeof (addrinfo_list_t));
+ if (ai_list_tmp1 == NULL)
+ goto check_failed;
+ ai_list_tmp1->info = ai_info;
+ list_add_tail (&ai_list_tmp1->list, &ai_list->list);
+ ai_list_tmp1 = NULL;
+ }
+
+ i = 0;
+ ai_list_tmp1 = list_entry (ai_list->list.next, addrinfo_list_t, list);
+
+ /* Check for bad brick order */
+ while (i < brick_count) {
+ ++i;
+ ai_info = ai_list_tmp1->info;
+ ai_list_tmp1 = list_entry (ai_list_tmp1->list.next,
+ addrinfo_list_t, list);
+ if ( 0 == i % sub_count) {
+ j = 0;
+ continue;
+ }
+ ai_list_tmp2 = ai_list_tmp1;
+ k = j;
+ while (k < sub_count - 1) {
+ ++k;
+ ret = cli_cmd_compare_addrinfo (ai_info,
+ ai_list_tmp2->info);
+ if (GF_AI_COMPARE_ERROR == ret)
+ goto check_failed;
+ if (GF_AI_COMPARE_MATCH == ret)
+ goto found_bad_brick_order;
+ ai_list_tmp2 = list_entry (ai_list_tmp2->list.next,
+ addrinfo_list_t, list);
+ }
+ ++j;
+ }
+ gf_log ("cli", GF_LOG_INFO, "Brick order okay");
+ ret = 0;
+ goto out;
+
+check_failed:
+ gf_log ("cli", GF_LOG_INFO, "Failed bad brick order check");
+ answer = cli_cmd_get_confirmation(state, failed_question);
+ if (GF_ANSWER_YES == answer)
+ ret = 0;
+ goto out;
+
+found_bad_brick_order:
+ gf_log ("cli", GF_LOG_INFO, "Bad brick order found");
+ answer = cli_cmd_get_confirmation (state, found_question);
+ if (GF_ANSWER_YES == answer)
+ ret = 0;
+out:
+ ai_list_tmp2 = NULL;
+ i = 0;
+ GF_FREE (brick_list_dup);
+ list_for_each_entry (ai_list_tmp1, &ai_list->list, list) {
+ if (ai_list_tmp1->info)
+ freeaddrinfo (ai_list_tmp1->info);
+ free (ai_list_tmp2);
+ ai_list_tmp2 = ai_list_tmp1;
+ }
+ free (ai_list_tmp2);
+ return ret;
+}
+
+int
+cli_cmd_volume_create_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ char *brick_list = NULL;
+ int32_t brick_count = 0;
+ int32_t sub_count = 0;
+ int32_t type = GF_CLUSTER_TYPE_NONE;
+ cli_local_t *local = NULL;
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_CREATE_VOLUME];
+
+ frame = create_frame (THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+
+ ret = cli_cmd_volume_create_parse (words, wordcount, &options);
+
+ if (ret) {
+ cli_usage_out (word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+ /*Check brick order if type is replicate*/
+ ret = dict_get_int32 (options, "type", &type);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not get brick type");
+ goto out;
+ }
+ if ((type == GF_CLUSTER_TYPE_REPLICATE) ||
+ (type == GF_CLUSTER_TYPE_STRIPE_REPLICATE)) {
+ if ((ret = dict_get_str (options, "bricks", &brick_list)) != 0) {
+ gf_log ("cli", GF_LOG_ERROR, "Replica bricks check : "
+ "Could not retrieve bricks list");
+ goto out;
+ }
+ if ((ret = dict_get_int32 (options, "count", &brick_count)) != 0) {
+ gf_log ("cli", GF_LOG_ERROR, "Replica bricks check : "
+ "Could not retrieve brick count");
+ goto out;
+ }
+ if ((ret = dict_get_int32 (options, "replica-count", &sub_count)) != 0) {
+ gf_log ("cli", GF_LOG_ERROR, "Replica bricks check : "
+ "Could not retrieve replica count");
+ goto out;
+ }
+ gf_log ("cli", GF_LOG_INFO, "Replicate cluster type found."
+ " Checking brick order.");
+ ret = cli_cmd_check_brick_order (state, brick_list, brick_count, sub_count);
+ if (ret) {
+ gf_log("cli", GF_LOG_INFO, "Not creating volume because of bad brick order");
+ goto out;
+ }
+ }
+
+ if (state->mode & GLUSTER_MODE_SCRIPT) {
+ ret = dict_set_int32 (options, "force", _gf_true);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to set force "
+ "option");
+ goto out;
+ }
+ }
+
+ CLI_LOCAL_INIT (local, words, frame, options);
+
+ if (proc->fn) {
+ ret = proc->fn (frame, THIS, options);
+ }
+
+out:
+ if (ret) {
+ cli_cmd_sent_status_get (&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out ("Volume create failed");
+ }
+
+ CLI_STACK_DESTROY (frame);
+
+ return ret;
+}
+
+
+int
+cli_cmd_volume_delete_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ char *volname = NULL;
+ gf_answer_t answer = GF_ANSWER_NO;
+ const char *question = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ cli_local_t *local = NULL;
+ dict_t *dict = NULL;
+
+ question = "Deleting volume will erase all information about the volume. "
+ "Do you want to continue?";
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_DELETE_VOLUME];
+
+ frame = create_frame (THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ if (wordcount != 3) {
+ cli_usage_out (word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+
+ answer = cli_cmd_get_confirmation (state, question);
+
+ if (GF_ANSWER_NO == answer) {
+ ret = 0;
+ goto out;
+ }
+
+ volname = (char *)words[2];
+
+ ret = dict_set_str (dict, "volname", volname);
+
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_WARNING, "dict set failed");
+ goto out;
+ }
+
+ CLI_LOCAL_INIT (local, words, frame, dict);
+
+ if (proc->fn) {
+ ret = proc->fn (frame, THIS, dict);
+ }
+
+out:
+ if (ret) {
+ cli_cmd_sent_status_get (&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out ("Volume delete failed");
+ }
+
+ CLI_STACK_DESTROY (frame);
+
+ return ret;
+}
+
+int
+cli_cmd_volume_start_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ dict_t *dict = NULL;
+ int flags = 0;
+ cli_local_t *local = NULL;
+
+ frame = create_frame (THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+
+ if (wordcount < 3 || wordcount > 4) {
+ cli_usage_out (word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+
+ dict = dict_new ();
+ if (!dict) {
+ goto out;
+ }
+
+ if (!words[2])
+ goto out;
+
+ ret = dict_set_str (dict, "volname", (char *)words[2]);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "dict set failed");
+ goto out;
+ }
+
+ if (wordcount == 4) {
+ if (!strcmp("force", words[3])) {
+ flags |= GF_CLI_FLAG_OP_FORCE;
+ } else {
+ ret = -1;
+ cli_usage_out (word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+ }
+ ret = dict_set_int32 (dict, "flags", flags);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "dict set failed");
+ goto out;
+ }
+
+ if (ret < 0) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "failed to serialize dict");
+ goto out;
+ }
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_START_VOLUME];
+
+ CLI_LOCAL_INIT (local, words, frame, dict);
+
+ if (proc->fn) {
+ ret = proc->fn (frame, THIS, dict);
+ }
+
+out:
+ if (ret) {
+ cli_cmd_sent_status_get (&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out ("Volume start failed");
+ }
+
+ CLI_STACK_DESTROY (frame);
+
+ return ret;
+}
+
+gf_answer_t
+cli_cmd_get_confirmation (struct cli_state *state, const char *question)
+{
+ char answer[5] = {'\0', };
+ char flush = '\0';
+ size_t len;
+
+ if (state->mode & GLUSTER_MODE_SCRIPT)
+ return GF_ANSWER_YES;
+
+ printf ("%s (y/n) ", question);
+
+ if (fgets (answer, 4, stdin) == NULL) {
+ cli_out("gluster cli read error");
+ goto out;
+ }
+
+ len = strlen (answer);
+
+ if (len && answer [len - 1] == '\n'){
+ answer [--len] = '\0';
+ } else {
+ do{
+ flush = getchar ();
+ }while (flush != '\n');
+ }
+
+ if (len > 3)
+ goto out;
+
+ if (!strcasecmp (answer, "y") || !strcasecmp (answer, "yes"))
+ return GF_ANSWER_YES;
+
+ else if (!strcasecmp (answer, "n") || !strcasecmp (answer, "no"))
+ return GF_ANSWER_NO;
+
+out:
+ cli_out ("Invalid input, please enter y/n");
+
+ return GF_ANSWER_NO;
+}
+
+int
+cli_cmd_volume_stop_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ int flags = 0;
+ gf_answer_t answer = GF_ANSWER_NO;
+ int sent = 0;
+ int parse_error = 0;
+ dict_t *dict = NULL;
+ char *volname = NULL;
+ cli_local_t *local = NULL;
+
+ const char *question = "Stopping volume will make its data inaccessible. "
+ "Do you want to continue?";
+
+ frame = create_frame (THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+
+ if (wordcount < 3 || wordcount > 4) {
+ cli_usage_out (word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+
+ volname = (char*) words[2];
+
+ dict = dict_new ();
+ ret = dict_set_str (dict, "volname", volname);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "dict set failed");
+ goto out;
+ }
+
+ if (wordcount == 4) {
+ if (!strcmp("force", words[3])) {
+ flags |= GF_CLI_FLAG_OP_FORCE;
+ } else {
+ ret = -1;
+ cli_usage_out (word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+ }
+ ret = dict_set_int32 (dict, "flags", flags);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "dict set failed");
+ goto out;
+ }
+
+ answer = cli_cmd_get_confirmation (state, question);
+
+ if (GF_ANSWER_NO == answer) {
+ ret = 0;
+ goto out;
+ }
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_STOP_VOLUME];
+
+ CLI_LOCAL_INIT (local, words, frame, dict);
+
+ if (proc->fn) {
+ ret = proc->fn (frame, THIS, dict);
+ }
+
+out:
+ if (ret) {
+ cli_cmd_sent_status_get (&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out ("Volume stop on '%s' failed", volname);
+ }
+
+ CLI_STACK_DESTROY (frame);
+
+ return ret;
+}
+
+
+int
+cli_cmd_volume_rename_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+ int sent = 0;
+ int parse_error = 0;
+
+
+ frame = create_frame (THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ if (wordcount != 4) {
+ cli_usage_out (word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+
+ ret = dict_set_str (dict, "old-volname", (char *)words[2]);
+
+ if (ret)
+ goto out;
+
+ ret = dict_set_str (dict, "new-volname", (char *)words[3]);
+
+ if (ret)
+ goto out;
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_RENAME_VOLUME];
+
+ if (proc->fn) {
+ ret = proc->fn (frame, THIS, dict);
+ }
+
+out:
+ if (dict)
+ dict_destroy (dict);
+
+ if (ret) {
+ cli_cmd_sent_status_get (&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out ("Volume rename on '%s' failed", (char *)words[2]);
+ }
+
+ CLI_STACK_DESTROY (frame);
+
+ return ret;
+}
+
+int
+cli_cmd_volume_defrag_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ cli_local_t *local = NULL;
+#ifdef GF_SOLARIS_HOST_OS
+ cli_out ("Command not supported on Solaris");
+ goto out;
+#endif
+
+ frame = create_frame (THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+
+ ret = cli_cmd_volume_defrag_parse (words, wordcount, &dict);
+
+ if (ret) {
+ cli_usage_out (word->pattern);
+ parse_error = 1;
+ }
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_DEFRAG_VOLUME];
+
+ CLI_LOCAL_INIT (local, words, frame, dict);
+
+ if (proc->fn) {
+ ret = proc->fn (frame, THIS, dict);
+ }
+
+out:
+ if (ret) {
+ cli_cmd_sent_status_get (&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out ("Volume rebalance failed");
+ }
+
+ CLI_STACK_DESTROY (frame);
+
+ return ret;
+}
+
+int
+cli_cmd_volume_reset_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int sent = 0;
+ int parse_error = 0;
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ cli_local_t *local = NULL;
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_RESET_VOLUME];
+
+ frame = create_frame (THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+
+ ret = cli_cmd_volume_reset_parse (words, wordcount, &options);
+ if (ret) {
+ cli_usage_out (word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+
+ CLI_LOCAL_INIT (local, words, frame, options);
+
+ if (proc->fn) {
+ ret = proc->fn (frame, THIS, options);
+ }
+
+out:
+ if (ret) {
+ cli_cmd_sent_status_get (&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out ("Volume reset failed");
+ }
+
+ CLI_STACK_DESTROY (frame);
+
+ return ret;
+
+}
+
+int
+cli_cmd_volume_profile_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int sent = 0;
+ int parse_error = 0;
+
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ cli_local_t *local = NULL;
+
+ ret = cli_cmd_volume_profile_parse (words, wordcount, &options);
+
+ if (ret) {
+ cli_usage_out (word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_PROFILE_VOLUME];
+
+ frame = create_frame (THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+
+ CLI_LOCAL_INIT (local, words, frame, options);
+
+ if (proc->fn) {
+ ret = proc->fn (frame, THIS, options);
+ }
+
+out:
+ if (ret) {
+ cli_cmd_sent_status_get (&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out ("Volume profile failed");
+ }
+
+ CLI_STACK_DESTROY (frame);
+
+ return ret;
+
+}
+
+int
+cli_cmd_volume_set_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int sent = 0;
+ int parse_error = 0;
+
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ cli_local_t *local = NULL;
+ char *op_errstr = NULL;
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_SET_VOLUME];
+
+ frame = create_frame (THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+
+ ret = cli_cmd_volume_set_parse (words, wordcount, &options, &op_errstr);
+ if (ret) {
+ if (op_errstr) {
+ cli_err ("%s", op_errstr);
+ GF_FREE (op_errstr);
+ } else
+ cli_usage_out (word->pattern);
+
+ parse_error = 1;
+ goto out;
+ }
+
+ CLI_LOCAL_INIT (local, words, frame, options);
+
+ if (proc->fn) {
+ ret = proc->fn (frame, THIS, options);
+ }
+
+out:
+ if (ret) {
+ cli_cmd_sent_status_get (&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out ("Volume set failed");
+ }
+
+ CLI_STACK_DESTROY (frame);
+
+ return ret;
+
+}
+
+int
+cli_cmd_volume_add_brick_cbk (struct cli_state *state,
+ struct cli_cmd_word *word, const char **words,
+ int wordcount)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ gf_answer_t answer = GF_ANSWER_NO;
+ cli_local_t *local = NULL;
+
+ const char *question = "Changing the 'stripe count' of the volume is "
+ "not a supported feature. In some cases it may result in data "
+ "loss on the volume. Also there may be issues with regular "
+ "filesystem operations on the volume after the change. Do you "
+ "really want to continue with 'stripe' count option ? ";
+
+ frame = create_frame (THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+
+ ret = cli_cmd_volume_add_brick_parse (words, wordcount, &options);
+ if (ret) {
+ cli_usage_out (word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+
+ /* TODO: there are challenges in supporting changing of
+ stripe-count, untill it is properly supported give warning to user */
+ if (dict_get (options, "stripe-count")) {
+ answer = cli_cmd_get_confirmation (state, question);
+
+ if (GF_ANSWER_NO == answer) {
+ ret = 0;
+ goto out;
+ }
+ }
+
+ if (state->mode & GLUSTER_MODE_SCRIPT) {
+ ret = dict_set_int32 (options, "force", _gf_true);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to set force "
+ "option");
+ goto out;
+ }
+ }
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_ADD_BRICK];
+
+ CLI_LOCAL_INIT (local, words, frame, options);
+
+ if (proc->fn) {
+ ret = proc->fn (frame, THIS, options);
+ }
+
+out:
+ if (ret) {
+ cli_cmd_sent_status_get (&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out ("Volume add-brick failed");
+ }
+
+ CLI_STACK_DESTROY (frame);
+
+ return ret;
+}
+
+int
+cli_cmd_quota_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+
+ int ret = 0;
+ int parse_err = 0;
+ int32_t type = 0;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ gf_answer_t answer = GF_ANSWER_NO;
+ cli_local_t *local = NULL;
+ const char *question = "Disabling quota will delete all the quota "
+ "configuration. Do you want to continue?";
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_QUOTA];
+ if (proc == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ frame = create_frame (THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = cli_cmd_quota_parse (words, wordcount, &options);
+
+ if (ret < 0) {
+ cli_usage_out (word->pattern);
+ parse_err = 1;
+ goto out;
+ } else if (dict_get_int32 (options, "type", &type) == 0 &&
+ type == GF_QUOTA_OPTION_TYPE_DISABLE) {
+ answer = cli_cmd_get_confirmation (state, question);
+ if (answer == GF_ANSWER_NO)
+ goto out;
+ }
+
+ CLI_LOCAL_INIT (local, words, frame, options);
+
+ if (proc->fn)
+ ret = proc->fn (frame, THIS, options);
+
+out:
+ if (ret && parse_err == 0)
+ cli_out ("Quota command failed");
+
+ CLI_STACK_DESTROY (frame);
+
+ return ret;
+
+}
+
+int
+cli_cmd_volume_remove_brick_cbk (struct cli_state *state,
+ struct cli_cmd_word *word, const char **words,
+ int wordcount)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ gf_answer_t answer = GF_ANSWER_NO;
+ int sent = 0;
+ int parse_error = 0;
+ int need_question = 0;
+ cli_local_t *local = NULL;
+
+ const char *question = "Removing brick(s) can result in data loss. "
+ "Do you want to Continue?";
+
+ frame = create_frame (THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+
+ ret = cli_cmd_volume_remove_brick_parse (words, wordcount, &options,
+ &need_question);
+ if (ret) {
+ cli_usage_out (word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+
+ if (!(state->mode & GLUSTER_MODE_SCRIPT) && need_question) {
+ /* we need to ask question only in case of 'commit or force' */
+ answer = cli_cmd_get_confirmation (state, question);
+ if (GF_ANSWER_NO == answer) {
+ ret = 0;
+ goto out;
+ }
+ }
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_REMOVE_BRICK];
+
+ CLI_LOCAL_INIT (local, words, frame, options);
+
+ if (proc->fn) {
+ ret = proc->fn (frame, THIS, options);
+ }
+
+out:
+ if (ret) {
+ cli_cmd_sent_status_get (&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out ("Volume remove-brick failed");
+ }
+
+ CLI_STACK_DESTROY (frame);
+
+ return ret;
+
+}
+
+int
+cli_cmd_volume_replace_brick_cbk (struct cli_state *state,
+ struct cli_cmd_word *word,
+ const char **words,
+ int wordcount)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ cli_local_t *local = NULL;
+
+#ifdef GF_SOLARIS_HOST_OS
+ cli_out ("Command not supported on Solaris");
+ goto out;
+#endif
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_REPLACE_BRICK];
+
+ frame = create_frame (THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+
+ ret = cli_cmd_volume_replace_brick_parse (words, wordcount, &options);
+
+ if (ret) {
+ cli_usage_out (word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+
+ if (state->mode & GLUSTER_MODE_SCRIPT) {
+ ret = dict_set_int32 (options, "force", _gf_true);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to set force"
+ "option");
+ goto out;
+ }
+ }
+
+ CLI_LOCAL_INIT (local, words, frame, options);
+
+ if (proc->fn) {
+ ret = proc->fn (frame, THIS, options);
+ }
+
+out:
+ if (ret) {
+ cli_cmd_sent_status_get (&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out ("Volume replace-brick failed");
+ }
+
+ CLI_STACK_DESTROY (frame);
+
+ return ret;
+}
+
+
+int
+cli_cmd_volume_set_transport_cbk (struct cli_state *state,
+ struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ cli_cmd_broadcast_response (0);
+ return 0;
+}
+
+int
+cli_cmd_volume_top_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ cli_local_t *local = NULL;
+
+ ret = cli_cmd_volume_top_parse (words, wordcount, &options);
+
+ if (ret) {
+ parse_error = 1;
+ cli_usage_out (word->pattern);
+ goto out;
+ }
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_TOP_VOLUME];
+
+ frame = create_frame (THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+
+ CLI_LOCAL_INIT (local, words, frame, options);
+
+ if (proc->fn) {
+ ret = proc->fn (frame, THIS, options);
+ }
+
+out:
+ if (ret) {
+ cli_cmd_sent_status_get (&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out ("Volume top failed");
+ }
+
+ CLI_STACK_DESTROY (frame);
+
+ return ret;
+
+}
+
+
+int
+cli_cmd_log_rotate_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ cli_local_t *local = NULL;
+
+ if (!((wordcount == 4) || (wordcount == 5))) {
+ cli_usage_out (word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_LOG_ROTATE];
+
+ frame = create_frame (THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+
+ ret = cli_cmd_log_rotate_parse (words, wordcount, &options);
+ if (ret)
+ goto out;
+
+ CLI_LOCAL_INIT (local, words, frame, options);
+
+ if (proc->fn) {
+ ret = proc->fn (frame, THIS, options);
+ }
+
+out:
+ if (ret) {
+ cli_cmd_sent_status_get (&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out ("Volume log rotate failed");
+ }
+ CLI_STACK_DESTROY (frame);
+
+ return ret;
+}
+
+#if (SYNCDAEMON_COMPILE)
+static int
+cli_check_gsync_present ()
+{
+ char buff[PATH_MAX] = {0, };
+ runner_t runner = {0,};
+ char *ptr = NULL;
+ int ret = 0;
+
+ ret = setenv ("_GLUSTERD_CALLED_", "1", 1);
+ if (-1 == ret) {
+ gf_log ("", GF_LOG_WARNING, "setenv syscall failed, hence could"
+ "not assert if geo-replication is installed");
+ goto out;
+ }
+
+ runinit (&runner);
+ runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", "--version", NULL);
+ runner_redir (&runner, STDOUT_FILENO, RUN_PIPE);
+ ret = runner_start (&runner);
+ if (ret == -1) {
+ gf_log ("", GF_LOG_INFO, "geo-replication not installed");
+ goto out;
+ }
+
+ ptr = fgets(buff, sizeof(buff), runner_chio (&runner, STDOUT_FILENO));
+ if (ptr) {
+ if (!strstr (buff, "gsyncd")) {
+ ret = -1;
+ goto out;
+ }
+ } else {
+ ret = -1;
+ goto out;
+ }
+
+ ret = runner_end (&runner);
+
+ if (ret)
+ gf_log ("", GF_LOG_ERROR, "geo-replication not installed");
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret ? -1 : 0;
+
+}
+
+void
+cli_cmd_check_gsync_exists_cbk (struct cli_cmd *this)
+{
+
+ int ret = 0;
+
+ ret = cli_check_gsync_present ();
+ if (ret)
+ this->disable = _gf_true;
+
+}
+#endif
+
+int
+cli_cmd_volume_gsync_set_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = 0;
+ int parse_err = 0;
+ dict_t *options = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ cli_local_t *local = NULL;
+
+ proc = &cli_rpc_prog->proctable [GLUSTER_CLI_GSYNC_SET];
+ if (proc == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ frame = create_frame (THIS, THIS->ctx->pool);
+ if (frame == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = cli_cmd_gsync_set_parse (words, wordcount, &options);
+ if (ret) {
+ cli_usage_out (word->pattern);
+ parse_err = 1;
+ goto out;
+ }
+
+ CLI_LOCAL_INIT (local, words, frame, options);
+
+ if (proc->fn)
+ ret = proc->fn (frame, THIS, options);
+
+out:
+ if (ret && parse_err == 0)
+ cli_out (GEOREP" command failed");
+
+ CLI_STACK_DESTROY (frame);
+
+ return ret;
+}
+
+int
+cli_cmd_volume_status_cbk (struct cli_state *state,
+ struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+ uint32_t cmd = 0;
+ cli_local_t *local = NULL;
+
+ ret = cli_cmd_volume_status_parse (words, wordcount, &dict);
+
+ if (ret) {
+ cli_usage_out (word->pattern);
+ goto out;
+ }
+
+ ret = dict_get_uint32 (dict, "cmd", &cmd);
+ if (ret)
+ goto out;
+
+ if (!(cmd & GF_CLI_STATUS_ALL)) {
+ /* for one volume or brick */
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_STATUS_VOLUME];
+ } else {
+ /* volume status all or all detail */
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_STATUS_ALL];
+ }
+
+ if (!proc->fn)
+ goto out;
+
+ frame = create_frame (THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+
+ CLI_LOCAL_INIT (local, words, frame, dict);
+
+ ret = proc->fn (frame, THIS, dict);
+
+out:
+ CLI_STACK_DESTROY (frame);
+
+ return ret;
+}
+
+
+int
+cli_get_detail_status (dict_t *dict, int i, cli_volume_status_t *status)
+{
+ uint64_t free = 0;
+ uint64_t total = 0;
+ char key[1024] = {0};
+ int ret = 0;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.free", i);
+ ret = dict_get_uint64 (dict, key, &free);
+
+ status->free = gf_uint64_2human_readable (free);
+ if (!status->free)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.total", i);
+ ret = dict_get_uint64 (dict, key, &total);
+
+ status->total = gf_uint64_2human_readable (total);
+ if (!status->total)
+ goto out;
+
+#ifdef GF_LINUX_HOST_OS
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.device", i);
+ ret = dict_get_str (dict, key, &(status->device));
+ if (ret)
+ status->device = NULL;
+#endif
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.block_size", i);
+ ret = dict_get_uint64 (dict, key, &(status->block_size));
+ if (ret) {
+ ret = 0;
+ status->block_size = 0;
+ }
+
+#ifdef GF_LINUX_HOST_OS
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.mnt_options", i);
+ ret = dict_get_str (dict, key, &(status->mount_options));
+ if (ret)
+ status->mount_options = NULL;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.fs_name", i);
+ ret = dict_get_str (dict, key, &(status->fs_name));
+ if (ret) {
+ ret = 0;
+ status->fs_name = NULL;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.inode_size", i);
+ ret = dict_get_str (dict, key, &(status->inode_size));
+ if (ret)
+ status->inode_size = NULL;
+#endif /* GF_LINUX_HOST_OS */
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.total_inodes", i);
+ ret = dict_get_uint64 (dict, key,
+ &(status->total_inodes));
+ if (ret)
+ status->total_inodes = 0;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.free_inodes", i);
+ ret = dict_get_uint64 (dict, key, &(status->free_inodes));
+ if (ret) {
+ ret = 0;
+ status->free_inodes = 0;
+ }
+
+
+ out:
+ return ret;
+}
+
+void
+cli_print_detailed_status (cli_volume_status_t *status)
+{
+ cli_out ("%-20s : %-20s", "Brick", status->brick);
+ if (status->online)
+ cli_out ("%-20s : %-20d", "Port", status->port);
+ else
+ cli_out ("%-20s : %-20s", "Port", "N/A");
+ cli_out ("%-20s : %-20c", "Online", (status->online) ? 'Y' : 'N');
+ cli_out ("%-20s : %-20s", "Pid", status->pid_str);
+
+#ifdef GF_LINUX_HOST_OS
+ if (status->fs_name)
+ cli_out ("%-20s : %-20s", "File System", status->fs_name);
+ else
+ cli_out ("%-20s : %-20s", "File System", "N/A");
+
+ if (status->device)
+ cli_out ("%-20s : %-20s", "Device", status->device);
+ else
+ cli_out ("%-20s : %-20s", "Device", "N/A");
+
+ if (status->mount_options) {
+ cli_out ("%-20s : %-20s", "Mount Options",
+ status->mount_options);
+ } else {
+ cli_out ("%-20s : %-20s", "Mount Options", "N/A");
+ }
+
+ if (status->inode_size) {
+ cli_out ("%-20s : %-20s", "Inode Size",
+ status->inode_size);
+ } else {
+ cli_out ("%-20s : %-20s", "Inode Size", "N/A");
+ }
+#endif
+ if (status->free)
+ cli_out ("%-20s : %-20s", "Disk Space Free", status->free);
+ else
+ cli_out ("%-20s : %-20s", "Disk Space Free", "N/A");
+
+ if (status->total)
+ cli_out ("%-20s : %-20s", "Total Disk Space", status->total);
+ else
+ cli_out ("%-20s : %-20s", "Total Disk Space", "N/A");
+
+
+ if (status->total_inodes) {
+ cli_out ("%-20s : %-20ld", "Inode Count",
+ status->total_inodes);
+ } else {
+ cli_out ("%-20s : %-20s", "Inode Count", "N/A");
+ }
+
+ if (status->free_inodes) {
+ cli_out ("%-20s : %-20ld", "Free Inodes",
+ status->free_inodes);
+ } else {
+ cli_out ("%-20s : %-20s", "Free Inodes", "N/A");
+ }
+}
+
+int
+cli_print_brick_status (cli_volume_status_t *status)
+{
+ int fieldlen = CLI_VOL_STATUS_BRICK_LEN;
+ int bricklen = 0;
+ char *p = NULL;
+ int num_tabs = 0;
+
+ p = status->brick;
+ bricklen = strlen (p);
+ while (bricklen > 0) {
+ if (bricklen > fieldlen) {
+ cli_out ("%.*s", fieldlen, p);
+ p += fieldlen;
+ bricklen -= fieldlen;
+ } else {
+ num_tabs = (fieldlen - bricklen) / CLI_TAB_LENGTH + 1;
+ printf ("%s", p);
+ while (num_tabs-- != 0)
+ printf ("\t");
+ if (status->port) {
+ if (status->online)
+ cli_out ("%d\t%c\t%s",
+ status->port,
+ status->online?'Y':'N',
+ status->pid_str);
+ else
+ cli_out ("%s\t%c\t%s",
+ "N/A",
+ status->online?'Y':'N',
+ status->pid_str);
+ }
+ else
+ cli_out ("%s\t%c\t%s",
+ "N/A", status->online?'Y':'N',
+ status->pid_str);
+ bricklen = 0;
+ }
+ }
+
+ return 0;
+}
+
+int
+cli_cmd_volume_heal_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ dict_t *options = NULL;
+ xlator_t *this = NULL;
+ cli_local_t *local = NULL;
+
+ this = THIS;
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame)
+ goto out;
+
+ if (wordcount < 3) {
+ cli_usage_out (word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+
+ ret = cli_cmd_volume_heal_options_parse (words, wordcount, &options);
+ if (ret) {
+ cli_usage_out (word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_HEAL_VOLUME];
+
+ CLI_LOCAL_INIT (local, words, frame, options);
+
+ if (proc->fn) {
+ ret = proc->fn (frame, THIS, options);
+ }
+
+out:
+ if (ret) {
+ cli_cmd_sent_status_get (&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out ("Volume heal failed");
+ }
+
+ CLI_STACK_DESTROY (frame);
+
+ return ret;
+}
+
+int
+cli_cmd_volume_statedump_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ cli_local_t *local = NULL;
+
+ frame = create_frame (THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+
+ if (wordcount < 3) {
+ cli_usage_out (word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+
+ if (wordcount >= 3) {
+ ret = cli_cmd_volume_statedump_options_parse (words, wordcount,
+ &options);
+ if (ret) {
+ parse_error = 1;
+ gf_log ("cli", GF_LOG_ERROR, "Error parsing "
+ "statedump options");
+ cli_out ("Error parsing options");
+ cli_usage_out (word->pattern);
+ }
+ }
+
+ ret = dict_set_str (options, "volname", (char *)words[2]);
+ if (ret)
+ goto out;
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_STATEDUMP_VOLUME];
+
+ CLI_LOCAL_INIT (local, words, frame, options);
+
+ if (proc->fn) {
+ ret = proc->fn (frame, THIS, options);
+ }
+
+out:
+ if (ret) {
+ cli_cmd_sent_status_get (&sent);
+ if ((sent == 0) && (parse_error = 0))
+ cli_out ("Volume statedump failed");
+ }
+
+ CLI_STACK_DESTROY (frame);
+
+ return ret;
+}
+
+int
+cli_cmd_volume_list_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = -1;
+ call_frame_t *frame = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ int sent = 0;
+
+ frame = create_frame (THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_LIST_VOLUME];
+ if (proc->fn) {
+ ret = proc->fn (frame, THIS, NULL);
+ }
+
+out:
+ if (ret) {
+ cli_cmd_sent_status_get (&sent);
+ if (sent == 0)
+ cli_out ("Volume list failed");
+ }
+
+ CLI_STACK_DESTROY (frame);
+
+ return ret;
+}
+
+int
+cli_cmd_volume_clearlocks_cbk (struct cli_state *state,
+ struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ cli_local_t *local = NULL;
+
+ frame = create_frame (THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+
+ if (wordcount < 7 || wordcount > 8) {
+ cli_usage_out (word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+
+ ret = cli_cmd_volume_clrlks_opts_parse (words, wordcount, &options);
+ if (ret) {
+ parse_error = 1;
+ gf_log ("cli", GF_LOG_ERROR, "Error parsing "
+ "clear-locks options");
+ cli_out ("Error parsing options");
+ cli_usage_out (word->pattern);
+ }
+
+ ret = dict_set_str (options, "volname", (char *)words[2]);
+ if (ret)
+ goto out;
+
+ ret = dict_set_str (options, "path", (char *)words[3]);
+ if (ret)
+ goto out;
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_CLRLOCKS_VOLUME];
+
+ CLI_LOCAL_INIT (local, words, frame, options);
+
+ if (proc->fn) {
+ ret = proc->fn (frame, THIS, options);
+ }
+
+out:
+ if (ret) {
+ cli_cmd_sent_status_get (&sent);
+ if ((sent == 0) && (parse_error = 0))
+ cli_out ("Volume clear-locks failed");
+ }
+
+ CLI_STACK_DESTROY (frame);
+
+ return ret;
+}
+
+struct cli_cmd volume_cmds[] = {
+ { "volume info [all|<VOLNAME>]",
+ cli_cmd_volume_info_cbk,
+ "list information of all volumes"},
+
+ { "volume create <NEW-VOLNAME> [stripe <COUNT>] [replica <COUNT>] "
+ "[transport <tcp|rdma|tcp,rdma>] <NEW-BRICK>"
+#ifdef HAVE_BD_XLATOR
+ "?<vg_name>"
+#endif
+ "... [force]",
+
+ cli_cmd_volume_create_cbk,
+ "create a new volume of specified type with mentioned bricks"},
+
+ { "volume delete <VOLNAME>",
+ cli_cmd_volume_delete_cbk,
+ "delete volume specified by <VOLNAME>"},
+
+ { "volume start <VOLNAME> [force]",
+ cli_cmd_volume_start_cbk,
+ "start volume specified by <VOLNAME>"},
+
+ { "volume stop <VOLNAME> [force]",
+ cli_cmd_volume_stop_cbk,
+ "stop volume specified by <VOLNAME>"},
+
+ /*{ "volume rename <VOLNAME> <NEW-VOLNAME>",
+ cli_cmd_volume_rename_cbk,
+ "rename volume <VOLNAME> to <NEW-VOLNAME>"},*/
+
+ { "volume add-brick <VOLNAME> [<stripe|replica> <COUNT>] <NEW-BRICK> ... [force]",
+ cli_cmd_volume_add_brick_cbk,
+ "add brick to volume <VOLNAME>"},
+
+ { "volume remove-brick <VOLNAME> [replica <COUNT>] <BRICK> ... [start|stop|status|commit|force]",
+ cli_cmd_volume_remove_brick_cbk,
+ "remove brick from volume <VOLNAME>"},
+
+ { "volume rebalance <VOLNAME> [fix-layout] {start|stop|status} [force]",
+ cli_cmd_volume_defrag_cbk,
+ "rebalance operations"},
+
+ { "volume replace-brick <VOLNAME> <BRICK> <NEW-BRICK> {start [force]|pause|abort|status|commit [force]}",
+ cli_cmd_volume_replace_brick_cbk,
+ "replace-brick operations"},
+
+ /*{ "volume set-transport <VOLNAME> <TRANSPORT-TYPE> [<TRANSPORT-TYPE>] ...",
+ cli_cmd_volume_set_transport_cbk,
+ "set transport type for volume <VOLNAME>"},*/
+
+ { "volume set <VOLNAME> <KEY> <VALUE>",
+ cli_cmd_volume_set_cbk,
+ "set options for volume <VOLNAME>"},
+
+ { "volume help",
+ cli_cmd_volume_help_cbk,
+ "display help for the volume command"},
+
+ { "volume log rotate <VOLNAME> [BRICK]",
+ cli_cmd_log_rotate_cbk,
+ "rotate the log file for corresponding volume/brick"},
+
+ { "volume sync <HOSTNAME> [all|<VOLNAME>]",
+ cli_cmd_sync_volume_cbk,
+ "sync the volume information from a peer"},
+
+ { "volume reset <VOLNAME> [option] [force]",
+ cli_cmd_volume_reset_cbk,
+ "reset all the reconfigured options"},
+
+#if (SYNCDAEMON_COMPILE)
+ {"volume "GEOREP" [<VOLNAME>] [<SLAVE-URL>] {create [push-pem] [force]"
+ "|start [force]|stop [force]|config|status [detail]|delete} [options...]",
+ cli_cmd_volume_gsync_set_cbk,
+ "Geo-sync operations",
+ cli_cmd_check_gsync_exists_cbk},
+#endif
+
+ { "volume profile <VOLNAME> {start|stop|info [nfs]}",
+ cli_cmd_volume_profile_cbk,
+ "volume profile operations"},
+
+ { "volume quota <VOLNAME> <enable|disable|limit-usage|list|remove> [path] [value]",
+ cli_cmd_quota_cbk,
+ "quota translator specific operations"},
+
+ { "volume top <VOLNAME> {open|read|write|opendir|readdir|clear} [nfs|brick <brick>] [list-cnt <value>] |\n"
+ "volume top <VOLNAME> {read-perf|write-perf} [bs <size> count <count>] [brick <brick>] [list-cnt <value>]",
+ cli_cmd_volume_top_cbk,
+ "volume top operations"},
+
+ { "volume status [all | <VOLNAME> [nfs|shd|<BRICK>]]"
+ " [detail|clients|mem|inode|fd|callpool|tasks]",
+ cli_cmd_volume_status_cbk,
+ "display status of all or specified volume(s)/brick"},
+
+ { "volume heal <VOLNAME> [{full | statistics {heal-count {replica <hostname:brickname>}} |info {healed | heal-failed | split-brain}}]",
+ cli_cmd_volume_heal_cbk,
+ "self-heal commands on volume specified by <VOLNAME>"},
+
+ {"volume statedump <VOLNAME> [nfs] [all|mem|iobuf|callpool|priv|fd|"
+ "inode|history]...",
+ cli_cmd_volume_statedump_cbk,
+ "perform statedump on bricks"},
+
+ {"volume list",
+ cli_cmd_volume_list_cbk,
+ "list all volumes in cluster"},
+
+ {"volume clear-locks <VOLNAME> <path> kind {blocked|granted|all}"
+ "{inode [range]|entry [basename]|posix [range]}",
+ cli_cmd_volume_clearlocks_cbk,
+ "Clear locks held on path"
+ },
+
+ { NULL, NULL, NULL }
+};
+
+int
+cli_cmd_volume_help_cbk (struct cli_state *state, struct cli_cmd_word *in_word,
+ const char **words, int wordcount)
+{
+ struct cli_cmd *cmd = NULL;
+
+ for (cmd = volume_cmds; cmd->pattern; cmd++)
+ if (_gf_false == cmd->disable)
+ cli_out ("%s - %s", cmd->pattern, cmd->desc);
+
+ return 0;
+}
+
+int
+cli_cmd_volume_register (struct cli_state *state)
+{
+ int ret = 0;
+ struct cli_cmd *cmd = NULL;
+
+ for (cmd = volume_cmds; cmd->pattern; cmd++) {
+
+ ret = cli_cmd_register (&state->tree, cmd);
+ if (ret)
+ goto out;
+ }
+out:
+ return ret;
+}
diff --git a/cli/src/cli-cmd.c b/cli/src/cli-cmd.c
new file mode 100644
index 000000000..b81f75b5b
--- /dev/null
+++ b/cli/src/cli-cmd.c
@@ -0,0 +1,386 @@
+/*
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <pthread.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "cli.h"
+#include "cli-cmd.h"
+#include "cli-mem-types.h"
+#include "protocol-common.h"
+
+#include <fnmatch.h>
+
+static int cmd_done;
+static int cmd_sent;
+static pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
+static pthread_mutex_t cond_mutex = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t conn = PTHREAD_COND_INITIALIZER;
+static pthread_mutex_t conn_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+int cli_op_ret = 0;
+int connected = 0;
+
+int cli_cmd_log_help_cbk (struct cli_state *state, struct cli_cmd_word *in_word,
+ const char **words, int wordcount);
+
+static unsigned
+cli_cmd_needs_connection (struct cli_cmd_word *word)
+{
+ if (!strcasecmp ("quit", word->word))
+ return 0;
+
+ if (!strcasecmp ("help", word->word))
+ return 0;
+
+ if (!strcasecmp ("getwd", word->word))
+ return 1;
+
+ if (!strcasecmp ("exit", word->word))
+ return 0;
+
+ return CLI_DEFAULT_CONN_TIMEOUT;
+}
+
+int
+cli_cmd_status_reset (void)
+{
+ int ret = 0;
+
+ ret = cli_cmd_lock ();
+ {
+ if (ret == 0) {
+ cmd_sent = 0;
+ cmd_done = 0;
+ }
+ }
+ ret = cli_cmd_unlock ();
+ return ret;
+
+}
+
+int
+cli_cmd_sent_status_get (int *status)
+{
+ int ret = 0;
+ GF_ASSERT (status);
+
+ ret = cli_cmd_lock ();
+ {
+ if (ret == 0)
+ *status = cmd_sent;
+ }
+ ret = cli_cmd_unlock ();
+ return ret;
+}
+
+int
+cli_cmd_process (struct cli_state *state, int argc, char **argv)
+{
+ int ret = 0;
+ struct cli_cmd_word *word = NULL;
+ struct cli_cmd_word *next = NULL;
+ int i = 0;
+
+ word = &state->tree.root;
+
+ if (!argc)
+ return 0;
+
+ for (i = 0; i < argc; i++) {
+ next = cli_cmd_nextword (word, argv[i]);
+
+ word = next;
+ if (!word)
+ break;
+
+ if (word->cbkfn)
+ break;
+ }
+
+ if (!word) {
+ cli_out ("unrecognized word: %s (position %d)",
+ argv[i], i);
+ return -1;
+ }
+
+ if (!word->cbkfn) {
+ cli_out ("unrecognized command");
+ return -1;
+ }
+
+ if ( strcmp (word->word,"help")==0 )
+ goto callback;
+
+ state->await_connected = cli_cmd_needs_connection (word);
+
+ ret = cli_cmd_await_connected (state->await_connected);
+ if (ret) {
+ cli_out ("Connection failed. Please check if gluster "
+ "daemon is operational.");
+ gf_log ("", GF_LOG_INFO, "Exiting with: %d", ret);
+ exit (ret);
+ }
+
+callback:
+ ret = word->cbkfn (state, word, (const char **)argv, argc);
+ (void) cli_cmd_status_reset ();
+ return ret;
+}
+
+int
+cli_cmd_input_token_count (const char *text)
+{
+ int count = 0;
+ const char *trav = NULL;
+ int is_spc = 1;
+
+ for (trav = text; *trav; trav++) {
+ if (*trav == ' ') {
+ is_spc = 1;
+ } else {
+ if (is_spc) {
+ count++;
+ is_spc = 0;
+ }
+ }
+ }
+
+ return count;
+}
+
+
+int
+cli_cmd_process_line (struct cli_state *state, const char *text)
+{
+ int count = 0;
+ char **tokens = NULL;
+ char **tokenp = NULL;
+ char *token = NULL;
+ char *copy = NULL;
+ char *saveptr = NULL;
+ int i = 0;
+ int ret = -1;
+
+ count = cli_cmd_input_token_count (text);
+
+ tokens = calloc (count + 1, sizeof (*tokens));
+ if (!tokens)
+ return -1;
+
+ copy = strdup (text);
+ if (!copy)
+ goto out;
+
+ tokenp = tokens;
+
+ for (token = strtok_r (copy, " \t\r\n", &saveptr); token;
+ token = strtok_r (NULL, " \t\r\n", &saveptr)) {
+ *tokenp = strdup (token);
+
+ if (!*tokenp)
+ goto out;
+ tokenp++;
+ i++;
+
+ }
+
+ ret = cli_cmd_process (state, count, tokens);
+out:
+ free (copy);
+
+ if (tokens)
+ cli_cmd_tokens_destroy (tokens);
+
+ return ret;
+}
+
+
+int
+cli_cmds_register (struct cli_state *state)
+{
+ int ret = 0;
+
+ ret = cli_cmd_volume_register (state);
+ if (ret)
+ goto out;
+
+ ret = cli_cmd_probe_register (state);
+ if (ret)
+ goto out;
+
+ ret = cli_cmd_system_register (state);
+ if (ret)
+ goto out;
+
+ ret = cli_cmd_misc_register (state);
+ if (ret)
+ goto out;
+
+ ret = cli_cmd_snapshot_register (state);
+ if (ret)
+ goto out;
+out:
+ return ret;
+}
+
+int
+cli_cmd_cond_init ()
+{
+
+ pthread_mutex_init (&cond_mutex, NULL);
+ pthread_cond_init (&cond, NULL);
+
+ pthread_mutex_init (&conn_mutex, NULL);
+ pthread_cond_init (&conn, NULL);
+
+ return 0;
+}
+
+int
+cli_cmd_lock ()
+{
+ pthread_mutex_lock (&cond_mutex);
+ return 0;
+}
+
+int
+cli_cmd_unlock ()
+{
+ pthread_mutex_unlock (&cond_mutex);
+ return 0;
+}
+
+static void
+seconds_from_now (unsigned secs, struct timespec *ts)
+{
+ struct timeval tv = {0,};
+
+ gettimeofday (&tv, NULL);
+
+ ts->tv_sec = tv.tv_sec + secs;
+ ts->tv_nsec = tv.tv_usec * 1000;
+}
+
+int
+cli_cmd_await_response (unsigned time)
+{
+ struct timespec ts = {0,};
+ int ret = 0;
+
+ cli_op_ret = -1;
+
+ seconds_from_now (time, &ts);
+ while (!cmd_done && !ret) {
+ ret = pthread_cond_timedwait (&cond, &cond_mutex,
+ &ts);
+ }
+
+ cmd_done = 0;
+
+ if (ret)
+ return ret;
+
+ return cli_op_ret;
+}
+
+int
+cli_cmd_broadcast_response (int32_t status)
+{
+
+ pthread_mutex_lock (&cond_mutex);
+ {
+ if (!cmd_sent)
+ goto out;
+ cmd_done = 1;
+ cli_op_ret = status;
+ pthread_cond_broadcast (&cond);
+ }
+
+
+out:
+ pthread_mutex_unlock (&cond_mutex);
+ return 0;
+}
+
+int32_t
+cli_cmd_await_connected (unsigned conn_timo)
+{
+ int32_t ret = 0;
+ struct timespec ts = {0,};
+
+ if (!conn_timo)
+ return 0;
+
+ pthread_mutex_lock (&conn_mutex);
+ {
+ seconds_from_now (conn_timo, &ts);
+ while (!connected && !ret) {
+ ret = pthread_cond_timedwait (&conn, &conn_mutex,
+ &ts);
+ }
+ }
+ pthread_mutex_unlock (&conn_mutex);
+
+
+ return ret;
+}
+
+int32_t
+cli_cmd_broadcast_connected ()
+{
+ pthread_mutex_lock (&conn_mutex);
+ {
+ connected = 1;
+ pthread_cond_broadcast (&conn);
+ }
+
+ pthread_mutex_unlock (&conn_mutex);
+
+ return 0;
+}
+
+int
+cli_cmd_submit (void *req, call_frame_t *frame,
+ rpc_clnt_prog_t *prog,
+ int procnum, struct iobref *iobref,
+ xlator_t *this, fop_cbk_fn_t cbkfn, xdrproc_t xdrproc)
+{
+ int ret = -1;
+ unsigned timeout = 0;
+
+ if ((GLUSTER_CLI_PROFILE_VOLUME == procnum) ||
+ (GLUSTER_CLI_HEAL_VOLUME == procnum))
+ timeout = CLI_TEN_MINUTES_TIMEOUT;
+ else
+ timeout = CLI_DEFAULT_CMD_TIMEOUT;
+
+ cli_cmd_lock ();
+ cmd_sent = 0;
+ ret = cli_submit_request (req, frame, prog,
+ procnum, NULL, this, cbkfn, xdrproc);
+
+ if (!ret) {
+ cmd_sent = 1;
+ ret = cli_cmd_await_response (timeout);
+ }
+
+ cli_cmd_unlock ();
+
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
diff --git a/cli/src/cli-cmd.h b/cli/src/cli-cmd.h
new file mode 100644
index 000000000..041729276
--- /dev/null
+++ b/cli/src/cli-cmd.h
@@ -0,0 +1,124 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef __CLI_CMD_H__
+#define __CLI_CMD_H__
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <netdb.h>
+
+#include "cli.h"
+#include "list.h"
+
+#define CLI_LOCAL_INIT(local, words, frame, dictionary) \
+ do { \
+ local = cli_local_get (); \
+ \
+ if (local) { \
+ local->words = words; \
+ if (dictionary) \
+ local->dict = dictionary; \
+ if (frame) \
+ frame->local = local; \
+ } \
+ } while (0)
+
+#define CLI_STACK_DESTROY(_frame) \
+ do { \
+ if (_frame) { \
+ if (_frame->local) { \
+ gf_log ("cli", GF_LOG_DEBUG, "frame->local " \
+ "is not NULL (%p)", _frame->local); \
+ cli_local_wipe (_frame->local); \
+ _frame->local = NULL; \
+ } \
+ STACK_DESTROY (_frame->root); \
+ } \
+ } while (0);
+
+typedef enum {
+ GF_ANSWER_YES = 1,
+ GF_ANSWER_NO = 2
+} gf_answer_t;
+
+struct cli_cmd {
+ const char *pattern;
+ cli_cmd_cbk_t *cbk;
+ const char *desc;
+ cli_cmd_reg_cbk_t *reg_cbk; /* callback to check in runtime if the *
+ * command should be enabled or disabled */
+ gf_boolean_t disable;
+};
+
+struct cli_cmd_volume_get_ctx_ {
+ char *volname;
+ int flags;
+};
+
+typedef struct cli_profile_info_ {
+ uint64_t fop_hits;
+ double min_latency;
+ double max_latency;
+ double avg_latency;
+ char *fop_name;
+ double percentage_avg_latency;
+} cli_profile_info_t;
+
+typedef struct addrinfo_list {
+ struct list_head list;
+ struct addrinfo *info;
+} addrinfo_list_t;
+
+typedef enum {
+ GF_AI_COMPARE_NO_MATCH = 0,
+ GF_AI_COMPARE_MATCH = 1,
+ GF_AI_COMPARE_ERROR = 2
+} gf_ai_compare_t;
+
+typedef struct cli_cmd_volume_get_ctx_ cli_cmd_volume_get_ctx_t;
+
+int cli_cmd_volume_register (struct cli_state *state);
+
+int cli_cmd_probe_register (struct cli_state *state);
+
+int cli_cmd_system_register (struct cli_state *state);
+
+int cli_cmd_snapshot_register (struct cli_state *state);
+
+int cli_cmd_misc_register (struct cli_state *state);
+
+struct cli_cmd_word *cli_cmd_nextword (struct cli_cmd_word *word,
+ const char *text);
+void cli_cmd_tokens_destroy (char **tokens);
+
+int cli_cmd_await_response (unsigned time);
+
+int cli_cmd_broadcast_response (int32_t status);
+
+int cli_cmd_cond_init ();
+
+int cli_cmd_lock ();
+
+int cli_cmd_unlock ();
+
+int
+cli_cmd_submit (void *req, call_frame_t *frame,
+ rpc_clnt_prog_t *prog,
+ int procnum, struct iobref *iobref,
+ xlator_t *this, fop_cbk_fn_t cbkfn, xdrproc_t xdrproc);
+
+gf_answer_t
+cli_cmd_get_confirmation (struct cli_state *state, const char *question);
+int cli_cmd_sent_status_get (int *status);
+
+#endif /* __CLI_CMD_H__ */
diff --git a/cli/src/cli-mem-types.h b/cli/src/cli-mem-types.h
new file mode 100644
index 000000000..09fcb639b
--- /dev/null
+++ b/cli/src/cli-mem-types.h
@@ -0,0 +1,30 @@
+/*
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef __CLI_MEM_TYPES_H__
+#define __CLI_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+#define CLI_MEM_TYPE_START (gf_common_mt_end + 1)
+
+enum cli_mem_types_ {
+ cli_mt_xlator_list_t = CLI_MEM_TYPE_START,
+ cli_mt_xlator_t,
+ cli_mt_xlator_cmdline_option_t,
+ cli_mt_char,
+ cli_mt_call_pool_t,
+ cli_mt_cli_local_t,
+ cli_mt_cli_get_vol_ctx_t,
+ cli_mt_append_str,
+ cli_mt_end
+
+};
+
+#endif
diff --git a/cli/src/cli-rl.c b/cli/src/cli-rl.c
new file mode 100644
index 000000000..ade1c8ebb
--- /dev/null
+++ b/cli/src/cli-rl.c
@@ -0,0 +1,415 @@
+/*
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <pthread.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "cli.h"
+#include "cli-cmd.h"
+#include "cli-mem-types.h"
+
+#include "event.h"
+
+#include <fnmatch.h>
+
+#ifdef HAVE_READLINE
+
+#include <stdio.h>
+#include <readline/readline.h>
+#include <readline/history.h>
+
+
+int
+cli_rl_out (struct cli_state *state, const char *fmt, va_list ap)
+{
+ int tmp_rl_point = rl_point;
+ int n = rl_end;
+ int ret = 0;
+
+ if (rl_end >= 0 ) {
+ rl_kill_text (0, rl_end);
+ rl_redisplay ();
+ }
+
+ printf ("\r%*s\r", (int)strlen (state->prompt), "");
+
+ ret = vprintf (fmt, ap);
+
+ printf ("\n");
+ fflush(stdout);
+
+ if (n) {
+ rl_do_undo ();
+ rl_point = tmp_rl_point;
+ rl_reset_line_state ();
+ }
+
+ return ret;
+}
+
+int
+cli_rl_err (struct cli_state *state, const char *fmt, va_list ap)
+{
+ int tmp_rl_point = rl_point;
+ int n = rl_end;
+ int ret = 0;
+
+ if (rl_end >= 0 ) {
+ rl_kill_text (0, rl_end);
+ rl_redisplay ();
+ }
+
+ fprintf (stderr, "\r%*s\r", (int)strlen (state->prompt), "");
+
+ ret = vfprintf (stderr, fmt, ap);
+
+ fprintf (stderr, "\n");
+ fflush(stderr);
+
+ if (n) {
+ rl_do_undo ();
+ rl_point = tmp_rl_point;
+ rl_reset_line_state ();
+ }
+
+ return ret;
+}
+
+
+void
+cli_rl_process_line (char *line)
+{
+ struct cli_state *state = NULL;
+ int ret = 0;
+
+ state = global_state;
+
+ state->rl_processing = 1;
+ {
+ ret = cli_cmd_process_line (state, line);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to process line");
+
+ add_history (line);
+ }
+ state->rl_processing = 0;
+
+}
+
+
+int
+cli_rl_stdin (int fd, int idx, void *data,
+ int poll_out, int poll_in, int poll_err)
+{
+ rl_callback_read_char ();
+
+ return 0;
+}
+
+
+char *
+cli_rl_autocomplete_entry (const char *text, int times)
+{
+ struct cli_state *state = NULL;
+ char *retp = NULL;
+
+ state = global_state;
+
+ if (!state->matchesp)
+ return NULL;
+
+ retp = *state->matchesp;
+
+ state->matchesp++;
+
+ return retp ? strdup (retp) : NULL;
+}
+
+
+int
+cli_rl_token_count (const char *text)
+{
+ int count = 0;
+ const char *trav = NULL;
+ int is_spc = 1;
+
+ for (trav = text; *trav; trav++) {
+ if (*trav == ' ') {
+ is_spc = 1;
+ } else {
+ if (is_spc) {
+ count++;
+ is_spc = 0;
+ }
+ }
+ }
+
+ if (is_spc)
+ /* what needs to be autocompleted is a full
+ new word, and not extend the last word
+ */
+ count++;
+
+ return count;
+}
+
+
+char **
+cli_rl_tokenize (const char *text)
+{
+ int count = 0;
+ char **tokens = NULL;
+ char **tokenp = NULL;
+ char *token = NULL;
+ char *copy = NULL;
+ char *saveptr = NULL;
+ int i = 0;
+
+ count = cli_rl_token_count (text);
+
+ tokens = calloc (count + 1, sizeof (*tokens));
+ if (!tokens)
+ return NULL;
+
+ copy = strdup (text);
+ if (!copy)
+ goto out;
+
+ tokenp = tokens;
+
+ for (token = strtok_r (copy, " \t\r\n", &saveptr); token;
+ token = strtok_r (NULL, " \t\r\n", &saveptr)) {
+ *tokenp = strdup (token);
+
+ if (!*tokenp)
+ goto out;
+ tokenp++;
+ i++;
+
+ }
+
+ if (i < count) {
+ /* symbolize that what needs to be autocompleted is
+ the full set of possible nextwords, and not extend
+ the last word
+ */
+ *tokenp = strdup ("");
+ if (!*tokenp)
+ goto out;
+ tokenp++;
+ i++;
+ }
+
+out:
+ free (copy);
+
+ if (i < count) {
+ cli_cmd_tokens_destroy (tokens);
+ tokens = NULL;
+ }
+
+ return tokens;
+}
+
+
+char **
+cli_rl_get_matches (struct cli_state *state, struct cli_cmd_word *word,
+ const char *text)
+{
+ char **matches = NULL;
+ char **matchesp = NULL;
+ struct cli_cmd_word **next = NULL;
+ int count = 0;
+ int len = 0;
+
+ len = strlen (text);
+
+ if (!word->nextwords)
+ return NULL;
+
+ for (next = word->nextwords; *next; next++)
+ count++;
+
+ matches = calloc (count + 1, sizeof (*matches));
+ matchesp = matches;
+
+ for (next = word->nextwords; *next; next++) {
+ if ((*next)->match) {
+ continue;
+ }
+
+ if (strncmp ((*next)->word, text, len) == 0) {
+ *matchesp = strdup ((*next)->word);
+ matchesp++;
+ }
+ }
+
+ return matches;
+}
+
+
+int
+cli_rl_autocomplete_prepare (struct cli_state *state, const char *text)
+{
+ struct cli_cmd_word *word = NULL;
+ struct cli_cmd_word *next = NULL;
+ char **tokens = NULL;
+ char **tokenp = NULL;
+ char *token = NULL;
+ char **matches = NULL;
+
+ tokens = cli_rl_tokenize (text);
+ if (!tokens)
+ return 0;
+
+ word = &state->tree.root;
+
+ for (tokenp = tokens; (token = *tokenp); tokenp++) {
+ if (!*(tokenp+1)) {
+ /* last word */
+ break;
+ }
+
+ next = cli_cmd_nextword (word, token);
+ word = next;
+ if (!word)
+ break;
+ }
+
+ if (!word)
+ goto out;
+
+ matches = cli_rl_get_matches (state, word, token);
+
+ state->matches = matches;
+ state->matchesp = matches;
+
+out:
+ cli_cmd_tokens_destroy (tokens);
+ return 0;
+}
+
+
+int
+cli_rl_autocomplete_cleanup (struct cli_state *state)
+{
+ if (state->matches)
+ cli_cmd_tokens_destroy (state->matches);
+
+ state->matches = NULL;
+ state->matchesp = NULL;
+
+ return 0;
+}
+
+
+char **
+cli_rl_autocomplete (const char *text, int start, int end)
+{
+ struct cli_state *state = NULL;
+ char **matches = NULL;
+ char save = 0;
+
+ state = global_state;
+
+ /* hack to make the autocompletion code neater */
+ /* fake it as though the cursor is at the end of line */
+
+ save = rl_line_buffer[rl_point];
+ rl_line_buffer[rl_point] = 0;
+
+ cli_rl_autocomplete_prepare (state, rl_line_buffer);
+
+ matches = rl_completion_matches (text, cli_rl_autocomplete_entry);
+
+ cli_rl_autocomplete_cleanup (state);
+
+ rl_line_buffer[rl_point] = save;
+
+ return matches;
+}
+
+
+static char *
+complete_none (const char *txt, int times)
+{
+ return NULL;
+}
+
+
+void *
+cli_rl_input (void *_data)
+{
+ struct cli_state *state = NULL;
+ char *line = NULL;
+
+ state = _data;
+
+ for (;;) {
+ line = readline (state->prompt);
+ if (!line)
+ exit(0); //break;
+
+ if (*line)
+ cli_rl_process_line (line);
+
+ free (line);
+ }
+
+ return NULL;
+}
+
+
+int
+cli_rl_enable (struct cli_state *state)
+{
+ int ret = 0;
+
+ rl_pre_input_hook = NULL;
+ rl_attempted_completion_function = cli_rl_autocomplete;
+ rl_completion_entry_function = complete_none;
+
+ if (!state->rl_async) {
+ ret = pthread_create (&state->input, NULL,
+ cli_rl_input, state);
+ if (ret == 0)
+ state->rl_enabled = 1;
+ goto out;
+ }
+
+ ret = event_register (state->ctx->event_pool, 0, cli_rl_stdin, state,
+ 1, 0);
+ if (ret == -1)
+ goto out;
+
+ state->rl_enabled = 1;
+ rl_callback_handler_install (state->prompt, cli_rl_process_line);
+
+out:
+ return state->rl_enabled;
+}
+
+#else /* HAVE_READLINE */
+
+int
+cli_rl_enable (struct cli_state *state)
+{
+ return 0;
+}
+
+#endif /* HAVE_READLINE */
diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c
new file mode 100644
index 000000000..bfeb854ad
--- /dev/null
+++ b/cli/src/cli-rpc-ops.c
@@ -0,0 +1,8673 @@
+/*
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+/* Widths of various columns in top read/write-perf output
+ * Total width of top read/write-perf should be 80 chars
+ * including one space between column
+ */
+#define VOL_TOP_PERF_FILENAME_DEF_WIDTH 47
+#define VOL_TOP_PERF_FILENAME_ALT_WIDTH 44
+#define VOL_TOP_PERF_SPEED_WIDTH 4
+#define VOL_TOP_PERF_TIME_WIDTH 26
+
+#define INDENT_MAIN_HEAD "%-25s %s "
+
+#include "cli.h"
+#include "compat-errno.h"
+#include "cli-cmd.h"
+#include <sys/uio.h>
+#include <stdlib.h>
+#include <sys/mount.h>
+#include "cli1-xdr.h"
+#include "xdr-generic.h"
+#include "protocol-common.h"
+#include "cli-mem-types.h"
+#include "compat.h"
+
+#include "syscall.h"
+#include "glusterfs3.h"
+#include "portmap-xdr.h"
+
+#include "run.h"
+
+extern rpc_clnt_prog_t *cli_rpc_prog;
+extern int cli_op_ret;
+extern int connected;
+
+char *cli_vol_type_str[] = {"Distribute",
+ "Stripe",
+ "Replicate",
+ "Striped-Replicate",
+ "Distributed-Stripe",
+ "Distributed-Replicate",
+ "Distributed-Striped-Replicate",
+ };
+
+char *cli_vol_status_str[] = {"Created",
+ "Started",
+ "Stopped",
+ };
+
+char *cli_vol_task_status_str[] = {"not started",
+ "in progress",
+ "stopped",
+ "completed",
+ "failed",
+ "fix-layout in progress",
+ "fix-layout stopped",
+ "fix-layout completed",
+ "fix-layout failed",
+};
+
+int32_t
+gf_cli_get_volume (call_frame_t *frame, xlator_t *this,
+ void *data);
+
+int
+cli_to_glusterd (gf_cli_req *req, call_frame_t *frame, fop_cbk_fn_t cbkfn,
+ xdrproc_t xdrproc, dict_t *dict, int procnum, xlator_t *this,
+ rpc_clnt_prog_t *prog, struct iobref *iobref);
+
+rpc_clnt_prog_t cli_handshake_prog = {
+ .progname = "cli handshake",
+ .prognum = GLUSTER_HNDSK_PROGRAM,
+ .progver = GLUSTER_HNDSK_VERSION,
+};
+
+rpc_clnt_prog_t cli_pmap_prog = {
+ .progname = "cli portmap",
+ .prognum = GLUSTER_PMAP_PROGRAM,
+ .progver = GLUSTER_PMAP_VERSION,
+};
+
+int
+gf_cli_probe_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gf_cli_rsp rsp = {0,};
+ int ret = -1;
+ char msg[1024] = {0,};
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ //rsp.op_ret = -1;
+ //rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ gf_log ("cli", GF_LOG_INFO, "Received resp to probe");
+
+ if (rsp.op_errstr && (strlen (rsp.op_errstr) > 0)) {
+ snprintf (msg, sizeof (msg), "%s", rsp.op_errstr);
+ if (rsp.op_ret)
+ gf_log ("cli", GF_LOG_ERROR, "%s", msg);
+ }
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_str (NULL,
+ (rsp.op_ret)? NULL : msg,
+ rsp.op_ret, rsp.op_errno,
+ (rsp.op_ret)? msg : NULL);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+
+ if (!rsp.op_ret)
+ cli_out ("peer probe: success. %s", msg);
+ else
+ cli_err ("peer probe: failed: %s", msg);
+
+ ret = rsp.op_ret;
+
+out:
+ cli_cmd_broadcast_response (ret);
+ return ret;
+}
+
+int
+gf_cli_deprobe_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gf_cli_rsp rsp = {0,};
+ int ret = -1;
+ char msg[1024] = {0,};
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ //rsp.op_ret = -1;
+ //rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ gf_log ("cli", GF_LOG_INFO, "Received resp to deprobe");
+
+ if (rsp.op_ret) {
+ if (strlen (rsp.op_errstr) > 0) {
+ snprintf (msg, sizeof (msg), "%s", rsp.op_errstr);
+ gf_log ("cli", GF_LOG_ERROR, "%s", rsp.op_errstr);
+ }
+ } else {
+ snprintf (msg, sizeof (msg), "success");
+ }
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_str (NULL,
+ (rsp.op_ret)? NULL : msg,
+ rsp.op_ret, rsp.op_errno,
+ (rsp.op_ret)? msg : NULL);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+
+ if (!rsp.op_ret)
+ cli_out ("peer detach: %s", msg);
+ else
+ cli_err ("peer detach: failed: %s", msg);
+
+ ret = rsp.op_ret;
+
+out:
+ cli_cmd_broadcast_response (ret);
+ return ret;
+}
+
+int
+gf_cli_output_peer_status (dict_t *dict, int count)
+{
+ int ret = -1;
+ char *uuid_buf = NULL;
+ char *hostname_buf = NULL;
+ int32_t i = 1;
+ char key[256] = {0,};
+ char *state = NULL;
+ int32_t connected = 0;
+ char *connected_str = NULL;
+
+ cli_out ("Number of Peers: %d", count);
+ i = 1;
+ while ( i <= count) {
+ snprintf (key, 256, "friend%d.uuid", i);
+ ret = dict_get_str (dict, key, &uuid_buf);
+ if (ret)
+ goto out;
+
+ snprintf (key, 256, "friend%d.hostname", i);
+ ret = dict_get_str (dict, key, &hostname_buf);
+ if (ret)
+ goto out;
+
+ snprintf (key, 256, "friend%d.connected", i);
+ ret = dict_get_int32 (dict, key, &connected);
+ if (ret)
+ goto out;
+ if (connected)
+ connected_str = "Connected";
+ else
+ connected_str = "Disconnected";
+
+
+ snprintf (key, 256, "friend%d.state", i);
+ ret = dict_get_str (dict, key, &state);
+ if (ret)
+ goto out;
+
+ cli_out ("\nHostname: %s\nUuid: %s\nState: %s (%s)",
+ hostname_buf, uuid_buf, state, connected_str);
+ i++;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+gf_cli_output_pool_list (dict_t *dict, int count)
+{
+ int ret = -1;
+ char *uuid_buf = NULL;
+ char *hostname_buf = NULL;
+ int32_t i = 1;
+ char key[256] = {0,};
+ int32_t connected = 0;
+ char *connected_str = NULL;
+
+ if (count >= 1)
+ cli_out ("UUID\t\t\t\t\tHostname\tState");
+
+ while ( i <= count) {
+ snprintf (key, 256, "friend%d.uuid", i);
+ ret = dict_get_str (dict, key, &uuid_buf);
+ if (ret)
+ goto out;
+
+ snprintf (key, 256, "friend%d.hostname", i);
+ ret = dict_get_str (dict, key, &hostname_buf);
+ if (ret)
+ goto out;
+
+ snprintf (key, 256, "friend%d.connected", i);
+ ret = dict_get_int32 (dict, key, &connected);
+ if (ret)
+ goto out;
+ if (connected)
+ connected_str = "Connected";
+ else
+ connected_str = "Disconnected";
+
+ cli_out ("%s\t%-9s\t%s ", uuid_buf, hostname_buf,
+ connected_str);
+ i++;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/* function pointer for gf_cli_output_{pool_list,peer_status} */
+typedef int (*cli_friend_output_fn) (dict_t*, int);
+
+int
+gf_cli_list_friends_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gf1_cli_peer_list_rsp rsp = {0,};
+ int ret = -1;
+ dict_t *dict = NULL;
+ char msg[1024] = {0,};
+ char *cmd = NULL;
+ cli_friend_output_fn friend_output_fn;
+ call_frame_t *frame = NULL;
+ unsigned long flags = 0;
+
+ frame = myframe;
+ flags = (long)frame->local;
+
+ if (flags == GF_CLI_LIST_POOL_NODES) {
+ cmd = "pool list";
+ friend_output_fn = &gf_cli_output_pool_list;
+ } else {
+ cmd = "peer status";
+ friend_output_fn = &gf_cli_output_peer_status;
+ }
+
+ /* 'free' the flags set by gf_cli_list_friends */
+ frame->local = NULL;
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_peer_list_rsp);
+ if (ret < 0) {
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ //rsp.op_ret = -1;
+ //rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ gf_log ("cli", GF_LOG_DEBUG, "Received resp to list: %d",
+ rsp.op_ret);
+
+ ret = rsp.op_ret;
+
+ if (!rsp.op_ret) {
+
+ if (!rsp.friends.friends_len) {
+ snprintf (msg, sizeof (msg),
+ "%s: No peers present", cmd);
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_peer_status (dict,
+ rsp.op_ret,
+ rsp.op_errno,
+ msg);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+ cli_err ("%s", msg);
+ ret = 0;
+ goto out;
+ }
+
+ dict = dict_new ();
+
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize (rsp.friends.friends_val,
+ rsp.friends.friends_len,
+ &dict);
+
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to allocate memory");
+ goto out;
+ }
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_peer_status (dict, rsp.op_ret,
+ rsp.op_errno, msg);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "count", &count);
+ if (ret) {
+ goto out;
+ }
+
+ ret = friend_output_fn (dict, count);
+ if (ret) {
+ goto out;
+ }
+ } else {
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_peer_status (dict, rsp.op_ret,
+ rsp.op_errno, NULL);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ } else {
+ ret = -1;
+ }
+ goto out;
+ }
+
+
+ ret = 0;
+
+out:
+ cli_cmd_broadcast_response (ret);
+ if (ret)
+ cli_err ("%s: failed", cmd);
+
+ if (dict)
+ dict_destroy (dict);
+
+ return ret;
+}
+
+void
+cli_out_options ( char *substr, char *optstr, char *valstr)
+{
+ char *ptr1 = NULL;
+ char *ptr2 = NULL;
+
+ ptr1 = substr;
+ ptr2 = optstr;
+
+ while (ptr1)
+ {
+ if (*ptr1 != *ptr2)
+ break;
+ ptr1++;
+ ptr2++;
+ if (!ptr1)
+ return;
+ if (!ptr2)
+ return;
+ }
+
+ if (*ptr2 == '\0')
+ return;
+ cli_out ("%s: %s",ptr2 , valstr);
+}
+
+static int
+_gf_cli_output_volinfo_opts (dict_t *d, char *k,
+ data_t *v, void *tmp)
+{
+ int ret = 0;
+ char *key = NULL;
+ char *ptr = NULL;
+ data_t *value = NULL;
+
+ key = tmp;
+
+ ptr = strstr (k, "option.");
+ if (ptr) {
+ value = v;
+ if (!value) {
+ ret = -1;
+ goto out;
+ }
+ cli_out_options (key, k, v->data);
+ }
+out:
+ return ret;
+}
+
+
+int
+gf_cli_get_volume_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int ret = -1;
+ int opt_count = 0;
+ int32_t i = 0;
+ int32_t j = 1;
+ int32_t status = 0;
+ int32_t type = 0;
+ int32_t brick_count = 0;
+ int32_t dist_count = 0;
+ int32_t stripe_count = 0;
+ int32_t replica_count = 0;
+ int32_t vol_type = 0;
+ int32_t transport = 0;
+ char *volume_id_str = NULL;
+ char *brick = NULL;
+ char *volname = NULL;
+ dict_t *dict = NULL;
+ cli_local_t *local = NULL;
+ char key[1024] = {0};
+ char err_str[2048] = {0};
+ gf_cli_rsp rsp = {0};
+ char *caps = NULL;
+ int k __attribute__((unused)) = 0;
+ // snap_volume variable helps in showing whether a volume is a normal
+ //volume or a volume for the snapshot
+ int32_t snap_volume = 0;
+
+ if (-1 == req->rpc_status)
+ goto out;
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+
+ gf_log ("cli", GF_LOG_INFO, "Received resp to get vol: %d",
+ rsp.op_ret);
+
+ if (rsp.op_ret) {
+ ret = -1;
+ goto out;
+ }
+
+ if (!rsp.dict.dict_len) {
+ if (global_state->mode & GLUSTER_MODE_XML)
+ goto xml_output;
+ cli_err ("No volumes present");
+ ret = 0;
+ goto out;
+ }
+
+ dict = dict_new ();
+
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize (rsp.dict.dict_val,
+ rsp.dict.dict_len,
+ &dict);
+
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Unable to allocate memory");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "count", &count);
+ if (ret)
+ goto out;
+
+ local = ((call_frame_t *)myframe)->local;
+
+ if (!count) {
+ switch (local->get_vol.flags) {
+
+ case GF_CLI_GET_NEXT_VOLUME:
+ GF_FREE (local->get_vol.volname);
+ local->get_vol.volname = NULL;
+ ret = 0;
+ goto out;
+
+ case GF_CLI_GET_VOLUME:
+ memset (err_str, 0, sizeof (err_str));
+ snprintf (err_str, sizeof (err_str),
+ "Volume %s does not exist",
+ local->get_vol.volname);
+ ret = -1;
+ if (!(global_state->mode & GLUSTER_MODE_XML))
+ goto out;
+ }
+ }
+
+xml_output:
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ /* For GET_NEXT_VOLUME output is already begun in
+ * and will also end in gf_cli_get_next_volume()
+ */
+ if (local->get_vol.flags == GF_CLI_GET_VOLUME) {
+ ret = cli_xml_output_vol_info_begin
+ (local, rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+ }
+
+ if (dict) {
+ ret = cli_xml_output_vol_info (local, dict);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+ }
+
+ if (local->get_vol.flags == GF_CLI_GET_VOLUME) {
+ ret = cli_xml_output_vol_info_end (local);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ }
+ goto out;
+ }
+
+ while ( i < count) {
+ cli_out (" ");
+ snprintf (key, 256, "volume%d.name", i);
+ ret = dict_get_str (dict, key, &volname);
+ if (ret)
+ goto out;
+
+ snprintf (key, 256, "volume%d.type", i);
+ ret = dict_get_int32 (dict, key, &type);
+ if (ret)
+ goto out;
+
+ snprintf (key, 256, "volume%d.status", i);
+ ret = dict_get_int32 (dict, key, &status);
+ if (ret)
+ goto out;
+
+ snprintf (key, sizeof (key), "volume%d.snap_volume", i);
+ ret = dict_get_int32 (dict, key, &snap_volume);
+ if (ret)
+ goto out;
+
+ snprintf (key, 256, "volume%d.brick_count", i);
+ ret = dict_get_int32 (dict, key, &brick_count);
+ if (ret)
+ goto out;
+
+ snprintf (key, 256, "volume%d.dist_count", i);
+ ret = dict_get_int32 (dict, key, &dist_count);
+ if (ret)
+ goto out;
+
+ snprintf (key, 256, "volume%d.stripe_count", i);
+ ret = dict_get_int32 (dict, key, &stripe_count);
+ if (ret)
+ goto out;
+
+ snprintf (key, 256, "volume%d.replica_count", i);
+ ret = dict_get_int32 (dict, key, &replica_count);
+ if (ret)
+ goto out;
+
+ snprintf (key, 256, "volume%d.transport", i);
+ ret = dict_get_int32 (dict, key, &transport);
+ if (ret)
+ goto out;
+
+ snprintf (key, 256, "volume%d.volume_id", i);
+ ret = dict_get_str (dict, key, &volume_id_str);
+ if (ret)
+ goto out;
+
+ vol_type = type;
+
+ // Distributed (stripe/replicate/stripe-replica) setups
+ if ((type > 0) && ( dist_count < brick_count))
+ vol_type = type + 3;
+
+ cli_out ("Volume Name: %s", volname);
+ cli_out ("Type: %s", cli_vol_type_str[vol_type]);
+ cli_out ("Volume ID: %s", volume_id_str);
+ cli_out ("Status: %s", cli_vol_status_str[status]);
+ if (snap_volume)
+ cli_out ("Snap Volume: %s", "yes");
+ else
+ cli_out ("Snap Volume: %s", "no");
+
+#ifdef HAVE_BD_XLATOR
+ k = 0;
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.xlator%d", i, k);
+ ret = dict_get_str (dict, key, &caps);
+ if (ret)
+ goto next;
+ do {
+ j = 0;
+ cli_out ("Xlator %d: %s", k + 1, caps);
+ do {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key),
+ "volume%d.xlator%d.caps%d",
+ i, k, j++);
+ ret = dict_get_str (dict, key, &caps);
+ if (ret)
+ break;
+ cli_out ("Capability %d: %s", j, caps);
+ } while (1);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key),
+ "volume%d.xlator%d", i, ++k);
+ ret = dict_get_str (dict, key, &caps);
+ if (ret)
+ break;
+ } while (1);
+
+next:
+#else
+ caps = 0; /* Avoid compiler warnings when BD not enabled */
+#endif
+
+ if (type == GF_CLUSTER_TYPE_STRIPE_REPLICATE) {
+ cli_out ("Number of Bricks: %d x %d x %d = %d",
+ (brick_count / dist_count),
+ stripe_count,
+ replica_count,
+ brick_count);
+ } else if (type == GF_CLUSTER_TYPE_NONE) {
+ cli_out ("Number of Bricks: %d", brick_count);
+ } else {
+ /* For both replicate and stripe, dist_count is
+ good enough */
+ cli_out ("Number of Bricks: %d x %d = %d",
+ (brick_count / dist_count),
+ dist_count, brick_count);
+ }
+
+ cli_out ("Transport-type: %s",
+ ((transport == 0)?"tcp":
+ (transport == 1)?"rdma":
+ "tcp,rdma"));
+ j = 1;
+
+ GF_FREE (local->get_vol.volname);
+ local->get_vol.volname = gf_strdup (volname);
+
+ if (brick_count)
+ cli_out ("Bricks:");
+
+ while (j <= brick_count) {
+ snprintf (key, 1024, "volume%d.brick%d", i, j);
+ ret = dict_get_str (dict, key, &brick);
+ if (ret)
+ goto out;
+
+ cli_out ("Brick%d: %s", j, brick);
+#ifdef HAVE_BD_XLATOR
+ snprintf (key, 256, "volume%d.vg%d", i, j);
+ ret = dict_get_str (dict, key, &caps);
+ if (!ret)
+ cli_out ("Brick%d VG: %s", j, caps);
+#endif
+ j++;
+ }
+
+ snprintf (key, 256, "volume%d.opt_count",i);
+ ret = dict_get_int32 (dict, key, &opt_count);
+ if (ret)
+ goto out;
+
+ if (!opt_count)
+ goto out;
+
+ cli_out ("Options Reconfigured:");
+
+ snprintf (key, 256, "volume%d.option.",i);
+
+ ret = dict_foreach (dict, _gf_cli_output_volinfo_opts, key);
+ if (ret)
+ goto out;
+
+ i++;
+ }
+
+
+ ret = 0;
+out:
+ cli_cmd_broadcast_response (ret);
+ if (ret)
+ cli_err ("%s", err_str);
+
+ if (dict)
+ dict_destroy (dict);
+
+ free (rsp.dict.dict_val);
+
+ free (rsp.op_errstr);
+
+ gf_log ("cli", GF_LOG_INFO, "Returning: %d", ret);
+ return ret;
+}
+
+int
+gf_cli_create_volume_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gf_cli_rsp rsp = {0,};
+ int ret = -1;
+ cli_local_t *local = NULL;
+ char *volname = NULL;
+ dict_t *dict = NULL;
+ dict_t *rsp_dict = NULL;
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ local = ((call_frame_t *) (myframe))->local;
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+
+ gf_log ("cli", GF_LOG_INFO, "Received resp to create volume");
+
+ dict = local->dict;
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret)
+ goto out;
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ if (rsp.op_ret == 0) {
+ rsp_dict = dict_new ();
+ ret = dict_unserialize (rsp.dict.dict_val,
+ rsp.dict.dict_len,
+ &rsp_dict);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Failed rsp_dict unserialization");
+ goto out;
+ }
+ }
+
+ ret = cli_xml_output_vol_create (rsp_dict, rsp.op_ret,
+ rsp.op_errno, rsp.op_errstr);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+
+ if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
+ cli_err ("volume create: %s: failed: %s", volname,
+ rsp.op_errstr);
+ else if (rsp.op_ret)
+ cli_err ("volume create: %s: failed", volname);
+ else
+ cli_out ("volume create: %s: success: "
+ "please start the volume to access data", volname);
+
+ ret = rsp.op_ret;
+
+out:
+ cli_cmd_broadcast_response (ret);
+ free (rsp.dict.dict_val);
+ free (rsp.op_errstr);
+ return ret;
+}
+
+int
+gf_cli_delete_volume_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gf_cli_rsp rsp = {0,};
+ int ret = -1;
+ cli_local_t *local = NULL;
+ char *volname = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+ dict_t *rsp_dict = NULL;
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ frame = myframe;
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+
+ local = frame->local;
+
+ if (local)
+ dict = local->dict;
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "dict get failed");
+ goto out;
+ }
+
+ gf_log ("cli", GF_LOG_INFO, "Received resp to delete volume");
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ if (rsp.op_ret == 0) {
+ rsp_dict = dict_new ();
+ ret = dict_unserialize (rsp.dict.dict_val,
+ rsp.dict.dict_len,
+ &rsp_dict);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Failed rsp_dict unserialization");
+ goto out;
+ }
+ }
+
+ ret = cli_xml_output_generic_volume ("volDelete", rsp_dict,
+ rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+
+ if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
+ cli_err ("volume delete: %s: failed: %s", volname,
+ rsp.op_errstr);
+ else if (rsp.op_ret)
+ cli_err ("volume delete: %s: failed", volname);
+ else
+ cli_out ("volume delete: %s: success", volname);
+
+ ret = rsp.op_ret;
+
+out:
+ cli_cmd_broadcast_response (ret);
+ free (rsp.dict.dict_val);
+
+ gf_log ("", GF_LOG_INFO, "Returning with %d", ret);
+ return ret;
+}
+
+int
+gf_cli3_1_uuid_get_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ char *uuid_str = NULL;
+ gf_cli_rsp rsp = {0,};
+ int ret = -1;
+ cli_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+
+ if (-1 == req->rpc_status)
+ goto out;
+
+ frame = myframe;
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+
+ local = frame->local;
+ frame->local = NULL;
+
+ gf_log ("cli", GF_LOG_INFO, "Received resp to uuid get");
+
+ dict = dict_new ();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize (rsp.dict.dict_val, rsp.dict.dict_len,
+ &dict);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to unserialize "
+ "response for uuid get");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "uuid", &uuid_str);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to get uuid "
+ "from dictionary");
+ goto out;
+ }
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_dict ("uuidGenerate", dict, rsp.op_ret,
+ rsp.op_errno, rsp.op_errstr);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+
+ if (rsp.op_ret) {
+ if (strcmp (rsp.op_errstr, "") == 0)
+ cli_err ("Get uuid was unsuccessful");
+ else
+ cli_err ("%s", rsp.op_errstr);
+
+ } else {
+ cli_out ("UUID: %s", uuid_str);
+
+ }
+ ret = rsp.op_ret;
+
+out:
+ cli_cmd_broadcast_response (ret);
+ cli_local_wipe (local);
+ if (rsp.dict.dict_val)
+ free (rsp.dict.dict_val);
+ if (dict)
+ dict_unref (dict);
+
+ gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+ return ret;
+}
+
+int
+gf_cli3_1_uuid_reset_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gf_cli_rsp rsp = {0,};
+ int ret = -1;
+ cli_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ frame = myframe;
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+
+ local = frame->local;
+ frame->local = NULL;
+
+ gf_log ("cli", GF_LOG_INFO, "Received resp to uuid reset");
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_dict ("uuidReset", dict, rsp.op_ret,
+ rsp.op_errno, rsp.op_errstr);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+
+ if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
+ cli_err ("%s", rsp.op_errstr);
+ else
+ cli_out ("resetting the peer uuid has been %s",
+ (rsp.op_ret) ? "unsuccessful": "successful");
+ ret = rsp.op_ret;
+
+out:
+ cli_cmd_broadcast_response (ret);
+ cli_local_wipe (local);
+ if (rsp.dict.dict_val)
+ free (rsp.dict.dict_val);
+ if (dict)
+ dict_unref (dict);
+
+ gf_log ("", GF_LOG_INFO, "Returning with %d", ret);
+ return ret;
+}
+
+int
+gf_cli_start_volume_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gf_cli_rsp rsp = {0,};
+ int ret = -1;
+ cli_local_t *local = NULL;
+ char *volname = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+ dict_t *rsp_dict = NULL;
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ frame = myframe;
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+
+ if (frame)
+ local = frame->local;
+
+ if (local)
+ dict = local->dict;
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log (frame->this->name, GF_LOG_ERROR, "dict get failed");
+ goto out;
+ }
+
+ gf_log ("cli", GF_LOG_INFO, "Received resp to start volume");
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ if (rsp.op_ret == 0) {
+ rsp_dict = dict_new ();
+ ret = dict_unserialize (rsp.dict.dict_val,
+ rsp.dict.dict_len,
+ &rsp_dict);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Failed rsp_dict unserialization");
+ goto out;
+ }
+ }
+
+ ret = cli_xml_output_generic_volume ("volStart", rsp_dict,
+ rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+
+ if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
+ cli_err ("volume start: %s: failed: %s", volname,
+ rsp.op_errstr);
+ else if (rsp.op_ret)
+ cli_err ("volume start: %s: failed", volname);
+ else
+ cli_out ("volume start: %s: success", volname);
+
+ ret = rsp.op_ret;
+
+out:
+ cli_cmd_broadcast_response (ret);
+ free (rsp.dict.dict_val);
+ free (rsp.op_errstr);
+ return ret;
+}
+
+int
+gf_cli_stop_volume_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gf_cli_rsp rsp = {0,};
+ int ret = -1;
+ cli_local_t *local = NULL;
+ char *volname = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+ dict_t *rsp_dict = NULL;
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ frame = myframe;
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+
+ if (frame)
+ local = frame->local;
+
+ if (local) {
+ dict = local->dict;
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Unable to get volname from dict");
+ goto out;
+ }
+ }
+
+ gf_log ("cli", GF_LOG_INFO, "Received resp to stop volume");
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ if (rsp.op_ret == 0) {
+ rsp_dict = dict_new ();
+ ret = dict_unserialize (rsp.dict.dict_val,
+ rsp.dict.dict_len,
+ &rsp_dict);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Failed rsp_dict unserialization");
+ goto out;
+ }
+ }
+
+ ret = cli_xml_output_generic_volume ("volStop", rsp_dict,
+ rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+
+ if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
+ cli_err ("volume stop: %s: failed: %s", volname, rsp.op_errstr);
+ else if (rsp.op_ret)
+ cli_err ("volume stop: %s: failed", volname);
+ else
+ cli_out ("volume stop: %s: success", volname);
+
+ ret = rsp.op_ret;
+
+out:
+ cli_cmd_broadcast_response (ret);
+ free (rsp.op_errstr);
+ free (rsp.dict.dict_val);
+
+ return ret;
+}
+
+int
+gf_cli_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gf_cli_rsp rsp = {0,};
+ cli_local_t *local = NULL;
+ char *volname = NULL;
+ call_frame_t *frame = NULL;
+ char *status = "unknown";
+ int cmd = 0;
+ int ret = -1;
+ dict_t *dict = NULL;
+ dict_t *local_dict = NULL;
+ uint64_t files = 0;
+ uint64_t size = 0;
+ uint64_t lookup = 0;
+ char msg[1024] = {0,};
+ gf_defrag_status_t status_rcd = GF_DEFRAG_STATUS_NOT_STARTED;
+ int32_t counter = 0;
+ char *node_name = NULL;
+ char key[256] = {0,};
+ int32_t i = 1;
+ uint64_t failures = 0;
+ uint64_t skipped = 0;
+ double elapsed = 0;
+ char *size_str = NULL;
+ char *task_id_str = NULL;
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ frame = myframe;
+
+ ret = xdr_to_generic (*iov, &rsp,
+ (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+
+ if (frame)
+ local = frame->local;
+
+ if (local)
+ local_dict = local->dict;
+
+ ret = dict_get_str (local_dict, "volname", &volname);
+ if (ret) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to get volname");
+ goto out;
+ }
+
+ ret = dict_get_int32 (local_dict, "rebalance-command", (int32_t*)&cmd);
+ if (ret) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to get command");
+ goto out;
+ }
+
+ if (rsp.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (rsp.dict.dict_val,
+ rsp.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_log ("glusterd", GF_LOG_ERROR,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ goto out;
+ }
+ }
+
+ if (!((cmd == GF_DEFRAG_CMD_STOP) || (cmd == GF_DEFRAG_CMD_STATUS)) &&
+ !(global_state->mode & GLUSTER_MODE_XML)) {
+ /* All other possibilites are about starting a rebalance */
+ ret = dict_get_str (dict, GF_REBALANCE_TID_KEY, &task_id_str);
+ if (rsp.op_ret && strcmp (rsp.op_errstr, "")) {
+ snprintf (msg, sizeof (msg), "%s", rsp.op_errstr);
+ } else {
+ if (!rsp.op_ret) {
+ snprintf (msg, sizeof (msg),
+ "Starting rebalance on volume %s has "
+ "been successful.\nID: %s", volname,
+ task_id_str);
+ } else {
+ snprintf (msg, sizeof (msg),
+ "Starting rebalance on volume %s has "
+ "been unsuccessful.", volname);
+ }
+ }
+ goto done;
+ }
+
+ if (cmd == GF_DEFRAG_CMD_STOP) {
+ if (rsp.op_ret == -1) {
+ if (strcmp (rsp.op_errstr, ""))
+ snprintf (msg, sizeof (msg),
+ "%s", rsp.op_errstr);
+ else
+ snprintf (msg, sizeof (msg),
+ "rebalance volume %s stop failed",
+ volname);
+ goto done;
+ } else {
+ snprintf (msg, sizeof (msg),
+ "rebalance process may be in the middle of a "
+ "file migration.\nThe process will be fully "
+ "stopped once the migration of the file is "
+ "complete.\nPlease check rebalance process "
+ "for completion before doing any further "
+ "brick related tasks on the volume.");
+ }
+ }
+ if (cmd == GF_DEFRAG_CMD_STATUS) {
+ if (rsp.op_ret == -1) {
+ if (strcmp (rsp.op_errstr, ""))
+ snprintf (msg, sizeof (msg),
+ "%s", rsp.op_errstr);
+ else
+ snprintf (msg, sizeof (msg),
+ "Failed to get the status of "
+ "rebalance process");
+ goto done;
+ }
+ }
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_rebalance (cmd, dict, rsp.op_ret,
+ rsp.op_errno,
+ rsp.op_errstr);
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "count", &counter);
+ if (ret) {
+ gf_log (frame->this->name, GF_LOG_ERROR, "count not set");
+ goto out;
+ }
+
+ cli_out ("%40s %16s %13s %13s %13s %13s %20s %18s", "Node",
+ "Rebalanced-files", "size", "scanned", "failures", "skipped",
+ "status", "run time in secs");
+ cli_out ("%40s %16s %13s %13s %13s %13s %20s %18s", "---------",
+ "-----------", "-----------", "-----------", "-----------",
+ "-----------", "------------", "--------------");
+ do {
+ snprintf (key, 256, "node-name-%d", i);
+ ret = dict_get_str (dict, key, &node_name);
+ if (ret)
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "failed to get node-name");
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "files-%d", i);
+ ret = dict_get_uint64 (dict, key, &files);
+ if (ret)
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "failed to get file count");
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "size-%d", i);
+ ret = dict_get_uint64 (dict, key, &size);
+ if (ret)
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "failed to get size of xfer");
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "lookups-%d", i);
+ ret = dict_get_uint64 (dict, key, &lookup);
+ if (ret)
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "failed to get lookedup file count");
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "status-%d", i);
+ ret = dict_get_int32 (dict, key, (int32_t *)&status_rcd);
+ if (ret)
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "failed to get status");
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "failures-%d", i);
+ ret = dict_get_uint64 (dict, key, &failures);
+ if (ret)
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "failed to get failures count");
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "skipped-%d", i);
+ ret = dict_get_uint64 (dict, key, &skipped);
+ if (ret)
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "failed to get skipped count");
+ memset (key, 0, 256);
+ snprintf (key, 256, "run-time-%d", i);
+ ret = dict_get_double (dict, key, &elapsed);
+ if (ret)
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "failed to get run-time");
+
+ status = cli_vol_task_status_str[status_rcd];
+ size_str = gf_uint64_2human_readable(size);
+ cli_out ("%40s %16"PRIu64 " %13s" " %13"PRIu64 " %13"PRIu64
+ " %13"PRIu64 " %20s %18.2f", node_name, files,
+ size_str, lookup, failures, skipped, status, elapsed);
+ GF_FREE(size_str);
+
+ i++;
+ } while (i <= counter);
+
+
+done:
+ if (global_state->mode & GLUSTER_MODE_XML)
+ cli_xml_output_str ("volRebalance", msg,
+ rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ else {
+ if (rsp.op_ret)
+ cli_err ("volume rebalance: %s: failed: %s", volname,
+ msg);
+ else
+ cli_out ("volume rebalance: %s: success: %s", volname,
+ msg);
+ }
+ ret = rsp.op_ret;
+
+out:
+ free (rsp.op_errstr); //malloced by xdr
+ free (rsp.dict.dict_val); //malloced by xdr
+ if (dict)
+ dict_unref (dict);
+ cli_cmd_broadcast_response (ret);
+ return ret;
+}
+
+int
+gf_cli_rename_volume_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gf_cli_rsp rsp = {0,};
+ int ret = -1;
+ char msg[1024] = {0,};
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+
+
+ gf_log ("cli", GF_LOG_INFO, "Received resp to probe");
+ snprintf (msg, sizeof (msg), "Rename volume %s",
+ (rsp.op_ret) ? "unsuccessful": "successful");
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_str ("volRename", msg, rsp.op_ret,
+ rsp.op_errno, rsp.op_errstr);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+
+ if (rsp.op_ret)
+ cli_err ("volume rename: failed");
+ else
+ cli_out ("volume rename: success");
+
+ ret = rsp.op_ret;
+
+out:
+ cli_cmd_broadcast_response (ret);
+ return ret;
+}
+
+int
+gf_cli_reset_volume_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gf_cli_rsp rsp = {0,};
+ int ret = -1;
+ char msg[1024] = {0,};
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+
+ gf_log ("cli", GF_LOG_INFO, "Received resp to reset");
+
+ if (strcmp (rsp.op_errstr, ""))
+ snprintf (msg, sizeof (msg), "%s", rsp.op_errstr);
+ else
+ snprintf (msg, sizeof (msg), "reset volume %s",
+ (rsp.op_ret) ? "unsuccessful": "successful");
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_str ("volReset", msg, rsp.op_ret,
+ rsp.op_errno, rsp.op_errstr);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+
+ if (rsp.op_ret)
+ cli_err ("volume reset: failed: %s", msg);
+ else
+ cli_out ("volume reset: success: %s", msg);
+
+ ret = rsp.op_ret;
+
+out:
+ cli_cmd_broadcast_response (ret);
+ return ret;
+}
+
+char *
+is_server_debug_xlator (void *myframe)
+{
+ call_frame_t *frame = NULL;
+ cli_local_t *local = NULL;
+ char **words = NULL;
+ char *key = NULL;
+ char *value = NULL;
+ char *debug_xlator = NULL;
+
+ frame = myframe;
+ local = frame->local;
+ words = (char **)local->words;
+
+ while (*words != NULL) {
+ if (strstr (*words, "trace") == NULL &&
+ strstr (*words, "error-gen") == NULL) {
+ words++;
+ continue;
+ }
+
+ key = *words;
+ words++;
+ value = *words;
+ if (value == NULL)
+ break;
+ if (strstr (value, "client")) {
+ words++;
+ continue;
+ } else {
+ if (!(strstr (value, "posix") || strstr (value, "acl")
+ || strstr (value, "locks") ||
+ strstr (value, "io-threads") ||
+ strstr (value, "marker") ||
+ strstr (value, "index"))) {
+ words++;
+ continue;
+ } else {
+ debug_xlator = gf_strdup (key);
+ break;
+ }
+ }
+ }
+
+ return debug_xlator;
+}
+
+int
+gf_cli_set_volume_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gf_cli_rsp rsp = {0,};
+ int ret = -1;
+ dict_t *dict = NULL;
+ char *help_str = NULL;
+ char msg[1024] = {0,};
+ char *debug_xlator = _gf_false;
+ char tmp_str[512] = {0,};
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+
+ gf_log ("cli", GF_LOG_INFO, "Received resp to set");
+
+ dict = dict_new ();
+
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize (rsp.dict.dict_val, rsp.dict.dict_len, &dict);
+
+ /* For brick processes graph change does not happen on the fly.
+ * The proces has to be restarted. So this is a check from the
+ * volume set option such that if debug xlators such as trace/errorgen
+ * are provided in the set command, warn the user.
+ */
+ debug_xlator = is_server_debug_xlator (myframe);
+
+ if (dict_get_str (dict, "help-str", &help_str) && !msg[0])
+ snprintf (msg, sizeof (msg), "Set volume %s",
+ (rsp.op_ret) ? "unsuccessful": "successful");
+ if (rsp.op_ret == 0 && debug_xlator) {
+ snprintf (tmp_str, sizeof (tmp_str), "\n%s translator has been "
+ "added to the server volume file. Please restart the"
+ " volume for enabling the translator", debug_xlator);
+ }
+
+ if ((global_state->mode & GLUSTER_MODE_XML) && (help_str == NULL)) {
+ ret = cli_xml_output_str ("volSet", msg, rsp.op_ret,
+ rsp.op_errno, rsp.op_errstr);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+
+ if (rsp.op_ret) {
+ if (strcmp (rsp.op_errstr, ""))
+ cli_err ("volume set: failed: %s", rsp.op_errstr);
+ else
+ cli_err ("volume set: failed");
+ } else {
+ if (help_str == NULL) {
+ if (debug_xlator == NULL)
+ cli_out ("volume set: success");
+ else
+ cli_out ("volume set: success%s", tmp_str);
+ }else {
+ cli_out ("%s", help_str);
+ }
+ }
+
+ ret = rsp.op_ret;
+
+out:
+ if (dict)
+ dict_unref (dict);
+ GF_FREE (debug_xlator);
+ cli_cmd_broadcast_response (ret);
+ return ret;
+}
+
+int
+gf_cli_add_brick_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gf_cli_rsp rsp = {0,};
+ int ret = -1;
+ char msg[1024] = {0,};
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+
+
+ gf_log ("cli", GF_LOG_INFO, "Received resp to add brick");
+
+ if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
+ snprintf (msg, sizeof (msg), "%s", rsp.op_errstr);
+ else
+ snprintf (msg, sizeof (msg), "Add Brick %s",
+ (rsp.op_ret) ? "unsuccessful": "successful");
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_str ("volAddBrick", msg, rsp.op_ret,
+ rsp.op_errno, rsp.op_errstr);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+
+ if (rsp.op_ret)
+ cli_err ("volume add-brick: failed: %s", rsp.op_errstr);
+ else
+ cli_out ("volume add-brick: success");
+ ret = rsp.op_ret;
+
+out:
+ cli_cmd_broadcast_response (ret);
+ free (rsp.dict.dict_val);
+ free (rsp.op_errstr);
+ return ret;
+}
+
+int
+gf_cli3_remove_brick_status_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gf_cli_rsp rsp = {0,};
+ char *status = "unknown";
+ int ret = -1;
+ uint64_t files = 0;
+ uint64_t size = 0;
+ uint64_t lookup = 0;
+ dict_t *dict = NULL;
+ char msg[1024] = {0,};
+ char key[256] = {0,};
+ int32_t i = 1;
+ int32_t counter = 0;
+ char *node_name = 0;
+ gf_defrag_status_t status_rcd = GF_DEFRAG_STATUS_NOT_STARTED;
+ uint64_t failures = 0;
+ uint64_t skipped = 0;
+ double elapsed = 0;
+ char *size_str = NULL;
+ int32_t command = 0;
+ gf1_op_commands cmd = GF_OP_CMD_NONE;
+ cli_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ char *cmd_str = "unknown";
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ frame = myframe;
+
+ ret = xdr_to_generic (*iov, &rsp,
+ (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+
+ if (frame)
+ local = frame->local;
+ ret = dict_get_int32 (local->dict, "command", &command);
+ if (ret)
+ goto out;
+ cmd = command;
+
+ switch (cmd) {
+ case GF_OP_CMD_STOP:
+ cmd_str = "stop";
+ break;
+ case GF_OP_CMD_STATUS:
+ cmd_str = "status";
+ break;
+ default:
+ break;
+ }
+
+ ret = rsp.op_ret;
+ if (rsp.op_ret == -1) {
+ if (strcmp (rsp.op_errstr, ""))
+ snprintf (msg, sizeof (msg), "volume remove-brick %s: "
+ "failed: %s", cmd_str, rsp.op_errstr);
+ else
+ snprintf (msg, sizeof (msg), "volume remove-brick %s: "
+ "failed", cmd_str);
+
+ if (global_state->mode & GLUSTER_MODE_XML)
+ goto xml_output;
+
+ cli_err ("%s", msg);
+ goto out;
+ }
+
+ if (rsp.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (rsp.dict.dict_val,
+ rsp.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ strncpy (msg, "failed to unserialize req-buffer to "
+ "dictionary", sizeof (msg));
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ rsp.op_ret = -1;
+ goto xml_output;
+ }
+
+ gf_log ("cli", GF_LOG_ERROR, "%s", msg);
+ goto out;
+ }
+ }
+
+xml_output:
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ if (strcmp (rsp.op_errstr, "")) {
+ ret = cli_xml_output_vol_remove_brick (_gf_true, dict,
+ rsp.op_ret,
+ rsp.op_errno,
+ rsp.op_errstr);
+ } else {
+ ret = cli_xml_output_vol_remove_brick (_gf_true, dict,
+ rsp.op_ret,
+ rsp.op_errno,
+ msg);
+ }
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "count", &counter);
+ if (ret) {
+ gf_log (frame->this->name, GF_LOG_ERROR, "count not set");
+ goto out;
+ }
+
+
+ cli_out ("%40s %16s %13s %13s %13s %13s %14s %s", "Node",
+ "Rebalanced-files", "size", "scanned", "failures", "skipped",
+ "status", "run-time in secs");
+ cli_out ("%40s %16s %13s %13s %13s %13s %14s %16s", "---------",
+ "-----------", "-----------", "-----------", "-----------",
+ "-----------","------------", "--------------");
+
+ do {
+ snprintf (key, 256, "node-name-%d", i);
+ ret = dict_get_str (dict, key, &node_name);
+ if (ret)
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "failed to get node-name");
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "files-%d", i);
+ ret = dict_get_uint64 (dict, key, &files);
+ if (ret)
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "failed to get file count");
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "size-%d", i);
+ ret = dict_get_uint64 (dict, key, &size);
+ if (ret)
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "failed to get size of xfer");
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "lookups-%d", i);
+ ret = dict_get_uint64 (dict, key, &lookup);
+ if (ret)
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "failed to get lookedup file count");
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "status-%d", i);
+ ret = dict_get_int32 (dict, key, (int32_t *)&status_rcd);
+ if (ret)
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "failed to get status");
+
+ snprintf (key, 256, "failures-%d", i);
+ ret = dict_get_uint64 (dict, key, &failures);
+ if (ret)
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "Failed to get failure on files");
+
+ snprintf (key, 256, "failures-%d", i);
+ ret = dict_get_uint64 (dict, key, &skipped);
+ if (ret)
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "Failed to get skipped files");
+ memset (key, 0, 256);
+ snprintf (key, 256, "run-time-%d", i);
+ ret = dict_get_double (dict, key, &elapsed);
+ if (ret)
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "Failed to get run-time");
+
+ switch (status_rcd) {
+ case GF_DEFRAG_STATUS_NOT_STARTED:
+ status = "not started";
+ break;
+ case GF_DEFRAG_STATUS_STARTED:
+ status = "in progress";
+ break;
+ case GF_DEFRAG_STATUS_STOPPED:
+ status = "stopped";
+ break;
+ case GF_DEFRAG_STATUS_COMPLETE:
+ status = "completed";
+ break;
+ case GF_DEFRAG_STATUS_FAILED:
+ status = "failed";
+ break;
+ default:
+ break;
+ }
+
+ size_str = gf_uint64_2human_readable(size);
+
+ if (strcmp (status, "not started")) {
+ cli_out ("%40s %16"PRIu64 " %13s" " %13"PRIu64 " %13"
+ PRIu64 " %13"PRIu64 " %14s %16.2f", node_name,
+ files, size_str, lookup, failures, skipped,
+ status, elapsed);
+ }
+ GF_FREE(size_str);
+
+ i++;
+ } while (i <= counter);
+
+ if ((cmd == GF_OP_CMD_STOP) && (rsp.op_ret == 0)) {
+ cli_out ("'remove-brick' process may be in the middle of a "
+ "file migration.\nThe process will be fully stopped "
+ "once the migration of the file is complete.\nPlease "
+ "check remove-brick process for completion before "
+ "doing any further brick related tasks on the "
+ "volume.");
+ }
+
+out:
+ free (rsp.dict.dict_val); //malloced by xdr
+ if (dict)
+ dict_unref (dict);
+ cli_cmd_broadcast_response (ret);
+ return ret;
+}
+
+
+int
+gf_cli_remove_brick_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gf_cli_rsp rsp = {0,};
+ int ret = -1;
+ char msg[1024] = {0,};
+ gf1_op_commands cmd = GF_OP_CMD_NONE;
+ char *cmd_str = "unknown";
+ cli_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ char *task_id_str = NULL;
+ dict_t *rsp_dict = NULL;
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ frame = myframe;
+ local = frame->local;
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+
+ ret = dict_get_int32 (local->dict, "command", (int32_t *)&cmd);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "failed to get command");
+ goto out;
+ }
+
+ if (rsp.dict.dict_len) {
+ rsp_dict = dict_new ();
+ if (!rsp_dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize (rsp.dict.dict_val, rsp.dict.dict_len,
+ &rsp_dict);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Failed to unserialize rsp_dict");
+ goto out;
+ }
+ }
+
+ switch (cmd) {
+ case GF_OP_CMD_START:
+ cmd_str = "start";
+
+ ret = dict_get_str (rsp_dict, GF_REMOVE_BRICK_TID_KEY, &task_id_str);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "remove-brick-id is not present in dict");
+ }
+ break;
+ case GF_OP_CMD_COMMIT:
+ cmd_str = "commit";
+ break;
+ case GF_OP_CMD_COMMIT_FORCE:
+ cmd_str = "commit force";
+ break;
+ default:
+ cmd_str = "unknown";
+ break;
+ }
+
+ gf_log ("cli", GF_LOG_INFO, "Received resp to remove brick");
+
+ if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
+ snprintf (msg, sizeof (msg), "%s", rsp.op_errstr);
+ else
+ snprintf (msg, sizeof (msg), "Remove Brick %s %s", cmd_str,
+ (rsp.op_ret) ? "unsuccessful": "successful");
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_remove_brick (_gf_false, rsp_dict,
+ rsp.op_ret, rsp.op_errno,
+ msg);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+
+ if (rsp.op_ret) {
+ cli_err ("volume remove-brick %s: failed: %s", cmd_str,
+ msg);
+ } else {
+ cli_out ("volume remove-brick %s: success", cmd_str);
+ if (GF_OP_CMD_START == cmd && task_id_str != NULL)
+ cli_out ("ID: %s", task_id_str);
+ }
+
+ ret = rsp.op_ret;
+
+out:
+ cli_cmd_broadcast_response (ret);
+ free (rsp.dict.dict_val);
+ free (rsp.op_errstr);
+
+ return ret;
+}
+
+
+
+int
+gf_cli_replace_brick_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gf_cli_rsp rsp = {0,};
+ int ret = -1;
+ cli_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+ char *src_brick = NULL;
+ char *dst_brick = NULL;
+ char *status_reply = NULL;
+ gf1_cli_replace_op replace_op = 0;
+ char *rb_operation_str = NULL;
+ dict_t *rsp_dict = NULL;
+ char msg[1024] = {0,};
+ char *task_id_str = NULL;
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ frame = (call_frame_t *) myframe;
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+
+ local = frame->local;
+ GF_ASSERT (local);
+ dict = local->dict;
+
+ ret = dict_get_int32 (dict, "operation", (int32_t *)&replace_op);
+ if (ret) {
+ gf_log ("", GF_LOG_DEBUG,
+ "dict_get on operation failed");
+ goto out;
+ }
+
+ if (rsp.dict.dict_len) {
+ /* Unserialize the dictionary */
+ rsp_dict = dict_new ();
+
+ ret = dict_unserialize (rsp.dict.dict_val,
+ rsp.dict.dict_len,
+ &rsp_dict);
+ if (ret < 0) {
+ gf_log ("glusterd", GF_LOG_ERROR,
+ "failed to "
+ "unserialize rsp buffer to dictionary");
+ goto out;
+ }
+ }
+
+ switch (replace_op) {
+ case GF_REPLACE_OP_START:
+ if (rsp.op_ret) {
+ rb_operation_str = gf_strdup ("replace-brick failed to"
+ " start");
+ } else {
+ ret = dict_get_str (rsp_dict, GF_REPLACE_BRICK_TID_KEY,
+ &task_id_str);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to get "
+ "\"replace-brick-id\" from dict");
+ goto out;
+ }
+ ret = gf_asprintf (&rb_operation_str,
+ "replace-brick started successfully"
+ "\nID: %s", task_id_str);
+ if (ret < 0)
+ goto out;
+ }
+ break;
+
+ case GF_REPLACE_OP_STATUS:
+
+ if (rsp.op_ret || ret) {
+ rb_operation_str = gf_strdup ("replace-brick status "
+ "unknown");
+ } else {
+ ret = dict_get_str (rsp_dict, "status-reply",
+ &status_reply);
+ if (ret) {
+ gf_log (frame->this->name, GF_LOG_ERROR, "failed to"
+ "get status");
+ goto out;
+ }
+
+ rb_operation_str = gf_strdup (status_reply);
+ }
+
+ break;
+
+ case GF_REPLACE_OP_PAUSE:
+ if (rsp.op_ret)
+ rb_operation_str = gf_strdup ("replace-brick pause "
+ "failed");
+ else
+ rb_operation_str = gf_strdup ("replace-brick paused "
+ "successfully");
+ break;
+
+ case GF_REPLACE_OP_ABORT:
+ if (rsp.op_ret)
+ rb_operation_str = gf_strdup ("replace-brick abort "
+ "failed");
+ else
+ rb_operation_str = gf_strdup ("replace-brick aborted "
+ "successfully");
+ break;
+
+ case GF_REPLACE_OP_COMMIT:
+ case GF_REPLACE_OP_COMMIT_FORCE:
+ ret = dict_get_str (dict, "src-brick", &src_brick);
+ if (ret) {
+ gf_log ("", GF_LOG_DEBUG,
+ "dict_get on src-brick failed");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "dst-brick", &dst_brick);
+ if (ret) {
+ gf_log ("", GF_LOG_DEBUG,
+ "dict_get on dst-brick failed");
+ goto out;
+ }
+
+
+ if (rsp.op_ret || ret)
+ rb_operation_str = gf_strdup ("replace-brick commit "
+ "failed");
+ else
+ rb_operation_str = gf_strdup ("replace-brick commit "
+ "successful");
+
+ break;
+
+ default:
+ gf_log ("", GF_LOG_DEBUG,
+ "Unknown operation");
+ break;
+ }
+
+ if (rsp.op_ret && (strcmp (rsp.op_errstr, ""))) {
+ rb_operation_str = gf_strdup (rsp.op_errstr);
+ }
+
+ gf_log ("cli", GF_LOG_INFO, "Received resp to replace brick");
+ snprintf (msg, sizeof (msg), "%s",
+ rb_operation_str ? rb_operation_str : "Unknown operation");
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_replace_brick (replace_op, rsp_dict,
+ rsp.op_ret,
+ rsp.op_errno, msg);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+
+ if (rsp.op_ret)
+ cli_err ("volume replace-brick: failed: %s", msg);
+ else
+ cli_out ("volume replace-brick: success: %s", msg);
+ ret = rsp.op_ret;
+
+out:
+ if (frame)
+ frame->local = NULL;
+
+ if (local) {
+ dict_unref (local->dict);
+ cli_local_wipe (local);
+ }
+
+ if (rb_operation_str)
+ GF_FREE (rb_operation_str);
+
+ cli_cmd_broadcast_response (ret);
+ free (rsp.dict.dict_val);
+ if (rsp_dict)
+ dict_unref (rsp_dict);
+
+ return ret;
+}
+
+
+static int
+gf_cli_log_rotate_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gf_cli_rsp rsp = {0,};
+ int ret = -1;
+ char msg[1024] = {0,};
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+
+ gf_log ("cli", GF_LOG_DEBUG, "Received resp to log rotate");
+
+ if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
+ snprintf (msg, sizeof (msg), "%s", rsp.op_errstr);
+ else
+ snprintf (msg, sizeof (msg), "log rotate %s",
+ (rsp.op_ret) ? "unsuccessful": "successful");
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_str ("volLogRotate", msg, rsp.op_ret,
+ rsp.op_errno, rsp.op_errstr);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+
+ if (rsp.op_ret)
+ cli_err ("volume log-rotate: failed: %s", msg);
+ else
+ cli_out ("volume log-rotate: success");
+ ret = rsp.op_ret;
+
+out:
+ cli_cmd_broadcast_response (ret);
+ free (rsp.dict.dict_val);
+
+ return ret;
+}
+
+static int
+gf_cli_sync_volume_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gf_cli_rsp rsp = {0,};
+ int ret = -1;
+ char msg[1024] = {0,};
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+
+ gf_log ("cli", GF_LOG_DEBUG, "Received resp to sync");
+
+ if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
+ snprintf (msg, sizeof (msg), "volume sync: failed: %s",
+ rsp.op_errstr);
+ else
+ snprintf (msg, sizeof (msg), "volume sync: %s",
+ (rsp.op_ret) ? "failed": "success");
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_str ("volSync", msg, rsp.op_ret,
+ rsp.op_errno, rsp.op_errstr);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+
+ if (rsp.op_ret)
+ cli_err ("%s", msg);
+ else
+ cli_out ("%s", msg);
+ ret = rsp.op_ret;
+
+out:
+ cli_cmd_broadcast_response (ret);
+ return ret;
+}
+
+int32_t
+gf_cli_print_limit_list (char *volname, char *limit_list,
+ char *op_errstr)
+{
+ int64_t size = 0;
+ int64_t limit_value = 0;
+ int32_t i, j;
+ int32_t len = 0, ret = -1;
+ char *size_str = NULL;
+ char path [PATH_MAX] = {0, };
+ char ret_str [1024] = {0, };
+ char value [1024] = {0, };
+ char mountdir [] = "/tmp/mntXXXXXX";
+ char abspath [PATH_MAX] = {0, };
+ char *colon_ptr = NULL;
+ runner_t runner = {0,};
+
+ GF_VALIDATE_OR_GOTO ("cli", volname, out);
+ GF_VALIDATE_OR_GOTO ("cli", limit_list, out);
+
+ if (!connected)
+ goto out;
+
+ len = strlen (limit_list);
+ if (len == 0) {
+ cli_err ("%s", op_errstr?op_errstr:"quota limit not set ");
+ goto out;
+ }
+
+ if (mkdtemp (mountdir) == NULL) {
+ gf_log ("cli", GF_LOG_WARNING, "failed to create a temporary "
+ "mount directory");
+ ret = -1;
+ goto out;
+ }
+
+ /* Mount a temporary client to fetch the disk usage
+ * of the directory on which the limit is set.
+ */
+ ret = runcmd (SBIN_DIR"/glusterfs", "-s",
+ "localhost", "--volfile-id", volname, "-l",
+ DEFAULT_LOG_FILE_DIRECTORY"/quota-list.log",
+ mountdir, NULL);
+ if (ret) {
+ gf_log ("cli", GF_LOG_WARNING, "failed to mount glusterfs client");
+ ret = -1;
+ goto rm_dir;
+ }
+
+ len = strlen (limit_list);
+ if (len == 0) {
+ cli_err ("quota limit not set ");
+ goto unmount;
+ }
+
+ i = 0;
+
+ cli_out ("\tpath\t\t limit_set\t size");
+ cli_out ("-----------------------------------------------------------"
+ "-----------------------");
+ while (i < len) {
+ j = 0;
+
+ while (limit_list [i] != ',' && limit_list [i] != '\0') {
+ path [j++] = limit_list[i++];
+ }
+ path [j] = '\0';
+ //here path[] contains both path and limit value
+
+ colon_ptr = strrchr (path, ':');
+ *colon_ptr = '\0';
+ strcpy (value, ++colon_ptr);
+
+ snprintf (abspath, sizeof (abspath), "%s/%s", mountdir, path);
+
+ ret = sys_lgetxattr (abspath, "trusted.limit.list", (void *) ret_str, 4096);
+ if (ret < 0) {
+ cli_out ("%-20s %10s", path, value);
+ } else {
+ sscanf (ret_str, "%"PRId64",%"PRId64, &size,
+ &limit_value);
+ size_str = gf_uint64_2human_readable ((uint64_t) size);
+ if (size_str == NULL) {
+ cli_out ("%-20s %10s %20"PRId64, path,
+ value, size);
+ } else {
+ cli_out ("%-20s %10s %20s", path,
+ value, size_str);
+ GF_FREE (size_str);
+ }
+ }
+ i++;
+ }
+
+unmount:
+
+ runinit (&runner);
+ runner_add_args (&runner, "umount",
+#if GF_LINUX_HOST_OS
+ "-l",
+#endif
+ mountdir, NULL);
+ ret = runner_run_reuse (&runner);
+ if (ret)
+ runner_log (&runner, "cli", GF_LOG_WARNING, "error executing");
+ runner_end (&runner);
+
+rm_dir:
+ rmdir (mountdir);
+out:
+ return ret;
+}
+
+int
+gf_cli_quota_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gf_cli_rsp rsp = {0,};
+ int ret = -1;
+ dict_t *dict = NULL;
+ char *volname = NULL;
+ char *limit_list = NULL;
+ int32_t type = 0;
+ char msg[1024] = {0,};
+ call_frame_t *frame = NULL;
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ frame = myframe;
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+
+ if (rsp.op_ret &&
+ strcmp (rsp.op_errstr, "") == 0) {
+ snprintf (msg, sizeof (msg), "command unsuccessful %s",
+ rsp.op_errstr);
+
+ if (global_state->mode & GLUSTER_MODE_XML)
+ goto xml_output;
+ goto out;
+ }
+
+ if (rsp.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (rsp.dict.dict_val,
+ rsp.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_log ("glusterd", GF_LOG_ERROR,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ goto out;
+ }
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret)
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "failed to get volname");
+
+ ret = dict_get_str (dict, "limit_list", &limit_list);
+ if (ret)
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "failed to get limit_list");
+
+ ret = dict_get_int32 (dict, "type", &type);
+ if (ret)
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "failed to get type");
+
+ if (type == GF_QUOTA_OPTION_TYPE_LIST) {
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_quota_limit_list
+ (volname, limit_list, rsp.op_ret,
+ rsp.op_errno, rsp.op_errstr);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+
+ }
+
+ if (limit_list) {
+ gf_cli_print_limit_list (volname,
+ limit_list,
+ rsp.op_errstr);
+ } else {
+ gf_log ("cli", GF_LOG_INFO, "Received resp to quota "
+ "command ");
+ if (rsp.op_errstr)
+ snprintf (msg, sizeof (msg), "%s",
+ rsp.op_errstr);
+ }
+ } else {
+ gf_log ("cli", GF_LOG_INFO, "Received resp to quota command ");
+ if (rsp.op_errstr)
+ snprintf (msg, sizeof (msg), "%s", rsp.op_errstr);
+ else
+ snprintf (msg, sizeof (msg), "successful");
+ }
+
+xml_output:
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_str ("volQuota", msg, rsp.op_ret,
+ rsp.op_errno, rsp.op_errstr);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+
+ if (strlen (msg) > 0) {
+ if (rsp.op_ret)
+ cli_err ("%s", msg);
+ else
+ cli_out ("%s", msg);
+ }
+
+ ret = rsp.op_ret;
+out:
+ cli_cmd_broadcast_response (ret);
+ if (dict)
+ dict_unref (dict);
+
+ free (rsp.dict.dict_val);
+
+ return ret;
+}
+
+int
+gf_cli_getspec_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gf_getspec_rsp rsp = {0,};
+ int ret = -1;
+ char *spec = NULL;
+ call_frame_t *frame = NULL;
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ frame = myframe;
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_getspec_rsp);
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+
+ if (rsp.op_ret == -1) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "getspec failed");
+ goto out;
+ }
+
+ gf_log ("cli", GF_LOG_INFO, "Received resp to getspec");
+
+ spec = GF_MALLOC (rsp.op_ret + 1, cli_mt_char);
+ if (!spec) {
+ gf_log("", GF_LOG_ERROR, "out of memory");
+ goto out;
+ }
+ memcpy (spec, rsp.spec, rsp.op_ret);
+ spec[rsp.op_ret] = '\0';
+ cli_out ("%s", spec);
+ GF_FREE (spec);
+
+ ret = 0;
+
+out:
+ cli_cmd_broadcast_response (ret);
+ return ret;
+}
+
+int
+gf_cli_pmap_b2p_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ pmap_port_by_brick_rsp rsp = {0,};
+ int ret = -1;
+ char *spec = NULL;
+ call_frame_t *frame = NULL;
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ frame = myframe;
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_pmap_port_by_brick_rsp);
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+
+ if (rsp.op_ret == -1) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "pump_b2p failed");
+ goto out;
+ }
+
+ gf_log ("cli", GF_LOG_INFO, "Received resp to pmap b2p");
+
+ cli_out ("%d", rsp.port);
+ GF_FREE (spec);
+
+ ret = rsp.op_ret;
+
+out:
+ cli_cmd_broadcast_response (ret);
+ return ret;
+}
+
+
+int32_t
+gf_cli_probe (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gf_cli_req req = {{0,},};
+ int ret = 0;
+ dict_t *dict = NULL;
+ int port = 0;
+
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
+ }
+
+ dict = data;
+
+ ret = dict_get_int32 (dict, "port", &port);
+ if (ret) {
+ ret = dict_set_int32 (dict, "port", CLI_GLUSTERD_PORT);
+ if (ret)
+ goto out;
+ }
+
+ ret = cli_to_glusterd (&req, frame, gf_cli_probe_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_PROBE, this, cli_rpc_prog, NULL);
+
+out:
+ GF_FREE (req.dict.dict_val);
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+
+ return ret;
+}
+
+int32_t
+gf_cli_deprobe (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gf_cli_req req = {{0,},};
+ int ret = 0;
+ dict_t *dict = NULL;
+ int port = 0;
+ int flags = 0;
+
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
+ }
+
+ dict = data;
+ ret = dict_get_int32 (dict, "port", &port);
+ if (ret) {
+ ret = dict_set_int32 (dict, "port", CLI_GLUSTERD_PORT);
+ if (ret)
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "flags", &flags);
+ if (ret) {
+ ret = dict_set_int32 (dict, "flags", 0);
+ if (ret)
+ goto out;
+ }
+
+ ret = cli_to_glusterd (&req, frame, gf_cli_deprobe_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict,
+ GLUSTER_CLI_DEPROBE, this, cli_rpc_prog, NULL);
+
+out:
+ GF_FREE (req.dict.dict_val);
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+
+ return ret;
+}
+
+int32_t
+gf_cli_list_friends (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gf1_cli_peer_list_req req = {0,};
+ int ret = 0;
+ unsigned long flags = 0;
+
+ if (!frame || !this) {
+ ret = -1;
+ goto out;
+ }
+
+ GF_ASSERT (frame->local == NULL);
+
+ flags = (long)data;
+ req.flags = flags;
+ frame->local = (void*)flags;
+ ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
+ GLUSTER_CLI_LIST_FRIENDS, NULL,
+ this, gf_cli_list_friends_cbk,
+ (xdrproc_t) xdr_gf1_cli_peer_list_req);
+
+out:
+ if (ret) {
+ /*
+ * If everything goes fine, gf_cli_list_friends_cbk()
+ * [invoked through cli_cmd_submit()]resets the
+ * frame->local to NULL. In case cli_cmd_submit()
+ * fails in between, RESET frame->local here.
+ */
+ frame->local = NULL;
+ }
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+gf_cli_get_next_volume (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+
+ int ret = 0;
+ cli_cmd_volume_get_ctx_t *ctx = NULL;
+ cli_local_t *local = NULL;
+
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
+ }
+
+ ctx = data;
+ local = frame->local;
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_info_begin (local, 0, 0, "");
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Error outputting to xml");
+ goto out;
+ }
+ }
+
+ ret = gf_cli_get_volume (frame, this, data);
+
+
+ if (!local || !local->get_vol.volname) {
+ if ((global_state->mode & GLUSTER_MODE_XML))
+ goto end_xml;
+
+ cli_err ("No volumes present");
+ goto out;
+ }
+
+
+ ctx->volname = local->get_vol.volname;
+
+ while (ctx->volname) {
+ ret = gf_cli_get_volume (frame, this, ctx);
+ if (ret)
+ goto out;
+ ctx->volname = local->get_vol.volname;
+ }
+
+end_xml:
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_info_end (local);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR, "Error outputting to xml");
+ }
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+gf_cli_get_volume (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gf_cli_req req = {{0,}};
+ int ret = 0;
+ cli_cmd_volume_get_ctx_t *ctx = NULL;
+ dict_t *dict = NULL;
+ int32_t flags = 0;
+
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
+ }
+
+ ctx = data;
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ if (ctx->volname) {
+ ret = dict_set_str (dict, "volname", ctx->volname);
+ if (ret)
+ goto out;
+ }
+
+ flags = ctx->flags;
+ ret = dict_set_int32 (dict, "flags", flags);
+ if (ret) {
+ gf_log (frame->this->name, GF_LOG_ERROR, "failed to set flags");
+ goto out;
+ }
+
+ ret = dict_allocate_and_serialize (dict, &req.dict.dict_val,
+ &req.dict.dict_len);
+
+ ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
+ GLUSTER_CLI_GET_VOLUME, NULL,
+ this, gf_cli_get_volume_cbk,
+ (xdrproc_t) xdr_gf_cli_req);
+
+out:
+ if (dict)
+ dict_unref (dict);
+
+ GF_FREE (req.dict.dict_val);
+
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+gf_cli3_1_uuid_get (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gf_cli_req req = {{0,}};
+ int ret = 0;
+ dict_t *dict = NULL;
+
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
+ }
+
+ dict = data;
+ ret = cli_to_glusterd (&req, frame, gf_cli3_1_uuid_get_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict,
+ GLUSTER_CLI_UUID_GET, this, cli_rpc_prog,
+ NULL);
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+gf_cli3_1_uuid_reset (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gf_cli_req req = {{0,}};
+ int ret = 0;
+ dict_t *dict = NULL;
+
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
+ }
+
+ dict = data;
+ ret = cli_to_glusterd (&req, frame, gf_cli3_1_uuid_reset_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict,
+ GLUSTER_CLI_UUID_RESET, this, cli_rpc_prog,
+ NULL);
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+gf_cli_create_volume (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gf_cli_req req = {{0,}};
+ int ret = 0;
+ dict_t *dict = NULL;
+
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
+ }
+
+ dict = data;
+
+ ret = cli_to_glusterd (&req, frame, gf_cli_create_volume_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_CREATE_VOLUME, this, cli_rpc_prog,
+ NULL);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+
+ GF_FREE (req.dict.dict_val);
+
+ return ret;
+}
+
+int32_t
+gf_cli_delete_volume (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gf_cli_req req = {{0,}};
+ int ret = 0;
+ dict_t *dict = NULL;
+
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
+ }
+
+ dict = data;
+
+ ret = cli_to_glusterd (&req, frame, gf_cli_delete_volume_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_DELETE_VOLUME, this, cli_rpc_prog,
+ NULL);
+
+out:
+ GF_FREE (req.dict.dict_val);
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+
+ return ret;
+}
+
+int32_t
+gf_cli_start_volume (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gf_cli_req req = {{0,}};
+ int ret = 0;
+ dict_t *dict = NULL;
+
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
+ }
+
+ dict = data;
+
+ ret = cli_to_glusterd (&req, frame, gf_cli_start_volume_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_START_VOLUME, this, cli_rpc_prog,
+ NULL);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+
+ return ret;
+}
+
+int32_t
+gf_cli_stop_volume (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gf_cli_req req = {{0,}};
+ int ret = 0;
+ dict_t *dict = data;
+
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
+ }
+
+ dict = data;
+
+ ret = cli_to_glusterd (&req, frame, gf_cli_stop_volume_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_STOP_VOLUME, this, cli_rpc_prog,
+ NULL);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+
+ return ret;
+}
+
+int32_t
+gf_cli_defrag_volume (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gf_cli_req req = {{0,}};
+ int ret = 0;
+ dict_t *dict = NULL;
+
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
+ }
+
+ dict = data;
+
+ ret = cli_to_glusterd (&req, frame, gf_cli_defrag_volume_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_DEFRAG_VOLUME, this, cli_rpc_prog,
+ NULL);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+
+ return ret;
+}
+
+int32_t
+gf_cli_rename_volume (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gf_cli_req req = {{0,}};
+ int ret = 0;
+ dict_t *dict = NULL;
+
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
+ }
+
+ dict = data;
+
+ ret = dict_allocate_and_serialize (dict, &req.dict.dict_val,
+ &req.dict.dict_len);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to serialize the data");
+
+ goto out;
+ }
+
+
+ ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
+ GLUSTER_CLI_RENAME_VOLUME, NULL,
+ this, gf_cli_rename_volume_cbk,
+ (xdrproc_t) xdr_gf_cli_req);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+
+ return ret;
+}
+
+int32_t
+gf_cli_reset_volume (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gf_cli_req req = {{0,}};
+ int ret = 0;
+ dict_t *dict = NULL;
+
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
+ }
+
+ dict = data;
+
+ ret = cli_to_glusterd (&req, frame, gf_cli_reset_volume_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_RESET_VOLUME, this, cli_rpc_prog,
+ NULL);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+gf_cli_set_volume (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gf_cli_req req = {{0,}};
+ int ret = 0;
+ dict_t *dict = NULL;
+
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
+ }
+
+ dict = data;
+
+ ret = cli_to_glusterd (&req, frame, gf_cli_set_volume_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_SET_VOLUME, this, cli_rpc_prog,
+ NULL);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+
+ return ret;
+}
+
+int32_t
+gf_cli_add_brick (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gf_cli_req req = {{0,}};
+ int ret = 0;
+ dict_t *dict = NULL;
+ char *volname = NULL;
+ int32_t count = 0;
+
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
+ }
+
+ dict = data;
+
+ ret = dict_get_str (dict, "volname", &volname);
+
+ if (ret)
+ goto out;
+
+ ret = dict_get_int32 (dict, "count", &count);
+ if (ret)
+ goto out;
+
+ ret = cli_to_glusterd (&req, frame, gf_cli_add_brick_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_ADD_BRICK, this, cli_rpc_prog, NULL);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+
+ GF_FREE (req.dict.dict_val);
+
+ return ret;
+}
+
+int32_t
+gf_cli_remove_brick (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gf_cli_req req = {{0,}};;
+ gf_cli_req status_req = {{0,}};;
+ int ret = 0;
+ dict_t *dict = NULL;
+ int32_t command = 0;
+ char *volname = NULL;
+ dict_t *req_dict = NULL;
+ int32_t cmd = 0;
+
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
+ }
+
+ dict = data;
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret)
+ goto out;
+
+ ret = dict_get_int32 (dict, "command", &command);
+ if (ret)
+ goto out;
+
+ if ((command != GF_OP_CMD_STATUS) &&
+ (command != GF_OP_CMD_STOP)) {
+
+
+ ret = cli_to_glusterd (&req, frame, gf_cli_remove_brick_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_REMOVE_BRICK, this,
+ cli_rpc_prog, NULL);
+ } else {
+ /* Need rebalance status to be sent :-) */
+ req_dict = dict_new ();
+ if (!req_dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_str (req_dict, "volname", volname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set dict");
+ goto out;
+ }
+
+ if (command == GF_OP_CMD_STATUS)
+ cmd |= GF_DEFRAG_CMD_STATUS;
+ else
+ cmd |= GF_DEFRAG_CMD_STOP;
+
+ ret = dict_set_int32 (req_dict, "rebalance-command", (int32_t) cmd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set dict");
+ goto out;
+ }
+
+ ret = cli_to_glusterd (&status_req, frame,
+ gf_cli3_remove_brick_status_cbk,
+ (xdrproc_t) xdr_gf_cli_req, req_dict,
+ GLUSTER_CLI_DEFRAG_VOLUME, this,
+ cli_rpc_prog, NULL);
+
+ }
+
+out:
+ if (req_dict)
+ dict_unref (req_dict);
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+
+ GF_FREE (req.dict.dict_val);
+
+ GF_FREE (status_req.dict.dict_val);
+
+ return ret;
+}
+
+int32_t
+gf_cli_replace_brick (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gf_cli_req req = {{0,}};
+ int ret = 0;
+ dict_t *dict = NULL;
+ char *src_brick = NULL;
+ char *dst_brick = NULL;
+ char *volname = NULL;
+ int32_t op = 0;
+
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
+ }
+
+ dict = data;
+
+ ret = dict_get_int32 (dict, "operation", &op);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "dict_get on operation failed");
+ goto out;
+ }
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "dict_get on volname failed");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "src-brick", &src_brick);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "dict_get on src-brick failed");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "dst-brick", &dst_brick);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "dict_get on dst-brick failed");
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Received command replace-brick %s with "
+ "%s with operation=%d", src_brick,
+ dst_brick, op);
+
+ ret = cli_to_glusterd (&req, frame, gf_cli_replace_brick_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_REPLACE_BRICK, this, cli_rpc_prog,
+ NULL);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+
+ GF_FREE (req.dict.dict_val);
+
+ return ret;
+}
+
+
+int32_t
+gf_cli_log_rotate (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gf_cli_req req = {{0,}};
+ int ret = 0;
+ dict_t *dict = NULL;
+
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
+ }
+
+ dict = data;
+
+ ret = cli_to_glusterd (&req, frame, gf_cli_log_rotate_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_LOG_ROTATE, this, cli_rpc_prog,
+ NULL);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+
+ GF_FREE (req.dict.dict_val);
+ return ret;
+}
+
+int32_t
+gf_cli_sync_volume (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ int ret = 0;
+ gf_cli_req req = {{0,}};
+ dict_t *dict = NULL;
+
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
+ }
+
+ dict = data;
+
+ ret = cli_to_glusterd (&req, frame, gf_cli_sync_volume_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_SYNC_VOLUME, this, cli_rpc_prog,
+ NULL);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ GF_FREE (req.dict.dict_val);
+
+ return ret;
+}
+
+int32_t
+gf_cli_getspec (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gf_getspec_req req = {0,};
+ int ret = 0;
+ dict_t *dict = NULL;
+ dict_t *op_dict = NULL;
+
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
+ }
+
+ dict = data;
+
+ ret = dict_get_str (dict, "volid", &req.key);
+ if (ret)
+ goto out;
+
+ op_dict = dict_new ();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ // Set the supported min and max op-versions, so glusterd can make a
+ // decision
+ ret = dict_set_int32 (op_dict, "min-op-version", GD_OP_VERSION_MIN);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Failed to set min-op-version"
+ " in request dict");
+ goto out;
+ }
+
+ ret = dict_set_int32 (op_dict, "max-op-version", GD_OP_VERSION_MAX);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Failed to set max-op-version"
+ " in request dict");
+ goto out;
+ }
+
+ ret = dict_allocate_and_serialize (op_dict, &req.xdata.xdata_val,
+ &req.xdata.xdata_len);
+ if (ret < 0) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Failed to serialize dictionary");
+ goto out;
+ }
+
+ ret = cli_cmd_submit (&req, frame, &cli_handshake_prog,
+ GF_HNDSK_GETSPEC, NULL,
+ this, gf_cli_getspec_cbk,
+ (xdrproc_t) xdr_gf_getspec_req);
+
+out:
+ if (op_dict) {
+ dict_unref(op_dict);
+ }
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+
+ return ret;
+}
+
+int32_t
+gf_cli_quota (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gf_cli_req req = {{0,}};
+ int ret = 0;
+ dict_t *dict = NULL;
+
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
+ }
+
+ dict = data;
+
+ ret = cli_to_glusterd (&req, frame, gf_cli_quota_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_QUOTA, this, cli_rpc_prog, NULL);
+
+out:
+ GF_FREE (req.dict.dict_val);
+
+ return ret;
+}
+
+int32_t
+gf_cli_pmap_b2p (call_frame_t *frame, xlator_t *this, void *data)
+{
+ pmap_port_by_brick_req req = {0,};
+ int ret = 0;
+ dict_t *dict = NULL;
+
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
+ }
+
+ dict = data;
+
+ ret = dict_get_str (dict, "brick", &req.brick);
+ if (ret)
+ goto out;
+
+ ret = cli_cmd_submit (&req, frame, &cli_pmap_prog,
+ GF_PMAP_PORTBYBRICK, NULL,
+ this, gf_cli_pmap_b2p_cbk,
+ (xdrproc_t) xdr_pmap_port_by_brick_req);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+
+ return ret;
+}
+
+static int
+gf_cli_fsm_log_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gf1_cli_fsm_log_rsp rsp = {0,};
+ int ret = -1;
+ dict_t *dict = NULL;
+ int tr_count = 0;
+ char key[256] = {0};
+ int i = 0;
+ char *old_state = NULL;
+ char *new_state = NULL;
+ char *event = NULL;
+ char *time = NULL;
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_fsm_log_rsp);
+ if (ret < 0) {
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+
+ if (rsp.op_ret) {
+ if (strcmp (rsp.op_errstr, ""))
+ cli_err ("%s", rsp.op_errstr);
+ cli_err ("fsm log unsuccessful");
+ ret = rsp.op_ret;
+ goto out;
+ }
+
+ dict = dict_new ();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize (rsp.fsm_log.fsm_log_val,
+ rsp.fsm_log.fsm_log_len,
+ &dict);
+
+ if (ret) {
+ cli_err ("bad response");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "count", &tr_count);
+ if (tr_count)
+ cli_out("number of transitions: %d", tr_count);
+ else
+ cli_err("No transitions");
+ for (i = 0; i < tr_count; i++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "log%d-old-state", i);
+ ret = dict_get_str (dict, key, &old_state);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "log%d-event", i);
+ ret = dict_get_str (dict, key, &event);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "log%d-new-state", i);
+ ret = dict_get_str (dict, key, &new_state);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "log%d-time", i);
+ ret = dict_get_str (dict, key, &time);
+ if (ret)
+ goto out;
+ cli_out ("Old State: [%s]\n"
+ "New State: [%s]\n"
+ "Event : [%s]\n"
+ "timestamp: [%s]\n", old_state, new_state, event, time);
+ }
+
+ ret = rsp.op_ret;
+
+out:
+ cli_cmd_broadcast_response (ret);
+ return ret;
+}
+
+int32_t
+gf_cli_fsm_log (call_frame_t *frame, xlator_t *this, void *data)
+{
+ int ret = -1;
+ gf1_cli_fsm_log_req req = {0,};
+
+ GF_ASSERT (frame);
+ GF_ASSERT (this);
+ GF_ASSERT (data);
+
+ if (!frame || !this || !data)
+ goto out;
+ req.name = data;
+ ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
+ GLUSTER_CLI_FSM_LOG, NULL,
+ this, gf_cli_fsm_log_cbk,
+ (xdrproc_t) xdr_gf1_cli_fsm_log_req);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+
+ return ret;
+}
+
+int
+gf_cli_gsync_config_command (dict_t *dict)
+{
+ runner_t runner = {0,};
+ char *subop = NULL;
+ char *gwd = NULL;
+ char *slave = NULL;
+ char *confpath = NULL;
+ char *master = NULL;
+ char *op_name = NULL;
+ int ret = -1;
+ char conf_path[PATH_MAX] = "";
+
+ if (dict_get_str (dict, "subop", &subop) != 0)
+ return -1;
+
+ if (strcmp (subop, "get") != 0 && strcmp (subop, "get-all") != 0) {
+ cli_out (GEOREP" config updated successfully");
+ return 0;
+ }
+
+ if (dict_get_str (dict, "glusterd_workdir", &gwd) != 0 ||
+ dict_get_str (dict, "slave", &slave) != 0)
+ return -1;
+
+ if (dict_get_str (dict, "master", &master) != 0)
+ master = NULL;
+ if (dict_get_str (dict, "op_name", &op_name) != 0)
+ op_name = NULL;
+
+ ret = dict_get_str (dict, "conf_path", &confpath);
+ if (!confpath) {
+ ret = snprintf (conf_path, sizeof(conf_path) - 1,
+ "%s/"GEOREP"/gsyncd_template.conf", gwd);
+ conf_path[ret] = '\0';
+ confpath = conf_path;
+ }
+
+ runinit (&runner);
+ runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", "-c", NULL);
+ runner_argprintf (&runner, "%s", confpath);
+ if (master)
+ runner_argprintf (&runner, ":%s", master);
+ runner_add_arg (&runner, slave);
+ runner_argprintf (&runner, "--config-%s", subop);
+ if (op_name)
+ runner_add_arg (&runner, op_name);
+
+ return runner_run (&runner);
+}
+
+int
+gf_cli_fetch_gsyncd_status_values (char *status,
+ gf_cli_gsync_status_t *sts_val)
+{
+ int32_t ret = -1;
+ char *tmp = NULL;
+ char *save_ptr = NULL;
+ char *key = NULL;
+ char *value = NULL;
+
+ if (!status || !sts_val) {
+ gf_log ("", GF_LOG_ERROR, "status or sts_val is null");
+ goto out;
+ }
+
+ tmp = strtok_r (status, "\n", &save_ptr);
+
+ if (tmp)
+ sts_val->health = gf_strdup (tmp);
+
+ while (tmp) {
+ key = strtok_r (tmp, "=", &value);
+
+ if ((key) && (!strcmp(key, "Uptime")))
+ sts_val->uptime = gf_strdup (value);
+
+ if ((key) && (!strcmp(key, "FilesSyncd")))
+ sts_val->files_syncd = gf_strdup (value);
+
+ if ((key) && (!strcmp(key, "FilesPending")))
+ sts_val->files_pending = gf_strdup (value);
+
+ if ((key) && (!strcmp(key, "BytesPending"))) {
+ value = gf_uint64_2human_readable(atol(value));
+ sts_val->bytes_pending = gf_strdup (value);
+ }
+
+ if ((key) && (!strcmp(key, "DeletesPending")))
+ sts_val->deletes_pending = gf_strdup (value);
+
+ tmp = strtok_r (NULL, ";", &save_ptr);
+ }
+
+ if (sts_val->health)
+ ret = 0;
+
+ if (!sts_val->uptime)
+ sts_val->uptime = gf_strdup ("N/A");
+
+ if (!sts_val->files_syncd)
+ sts_val->files_syncd = gf_strdup ("N/A");
+
+ if (!sts_val->files_pending)
+ sts_val->files_pending = gf_strdup ("N/A");
+
+ if (!sts_val->bytes_pending)
+ sts_val->bytes_pending = gf_strdup ("N/A");
+
+ if (!sts_val->deletes_pending)
+ sts_val->deletes_pending = gf_strdup ("N/A");
+
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d.", ret);
+ return ret;
+}
+
+char*
+get_struct_variable (int mem_num, gf_cli_gsync_status_t *sts_val)
+{
+ switch (mem_num) {
+ case 0: return (sts_val->node);
+ case 1: return (sts_val->master);
+ case 2: return (sts_val->slave);
+ case 3: return (sts_val->health);
+ case 4: return (sts_val->uptime);
+ case 5: return (sts_val->files_syncd);
+ case 6: return (sts_val->files_pending);
+ case 7: return (sts_val->bytes_pending);
+ case 8: return (sts_val->deletes_pending);
+ default:
+ goto out;
+ }
+
+out:
+ return NULL;
+}
+
+int
+gf_cli_print_status (char **title_values,
+ gf_cli_gsync_status_t **sts_vals,
+ int *spacing, int gsync_count,
+ int number_of_fields, int is_detail)
+{
+ int indents = 0;
+ int i = 0;
+ int j = 0;
+ int ret = 0;
+ int total_spacing = 0;
+ char **output_values = NULL;
+ char *tmp = NULL;
+ char *hyphens = NULL;
+ char heading[PATH_MAX] = {0, };
+ char indent_spaces[PATH_MAX] = {0, };
+
+ /* calculating spacing for hyphens */
+ for (i = 0; i < number_of_fields; i++) {
+ /* Suppressing master and slave output for status detail */
+ if ((is_detail) && ((i == 1) || (i == 2))) {
+ total_spacing++;
+ continue;
+ } else if ((!is_detail) && (i > 4)) {
+ /* Suppressing detailed output for
+ * status */
+ continue;
+ }
+ spacing[i] += 3; /* Adding extra space to
+ distinguish between fields */
+ total_spacing += spacing[i];
+ }
+ total_spacing += 4; /* For the spacing between the fields */
+
+ /* char pointers for each field */
+ output_values = GF_CALLOC (number_of_fields, sizeof (char *),
+ gf_common_mt_char);
+ if (!output_values) {
+ ret = -1;
+ goto out;
+ }
+ for (i = 0; i < number_of_fields; i++) {
+ output_values[i] = GF_CALLOC (spacing[i] + 1, sizeof (char),
+ gf_common_mt_char);
+ if (!output_values[i]) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ hyphens = GF_CALLOC (total_spacing + 1, sizeof (char),
+ gf_common_mt_char);
+ if (!hyphens) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = snprintf(heading, sizeof(heading), "MASTER: %s SLAVE: %s",
+ sts_vals[0]->master, sts_vals[0]->slave);
+ if (ret) {
+ if (ret < sizeof(heading))
+ heading[ret] = '\0';
+ else
+ heading[sizeof(heading) - 1] = '\0';
+ ret = 0;
+ } else {
+ ret = -1;
+ goto out;
+ }
+
+ if (is_detail) {
+ cli_out (" ");
+ if (strlen(heading) > total_spacing)
+ cli_out ("%s", heading);
+ else {
+ /* Printing the heading with centre justification */
+ indents = (total_spacing - strlen(heading)) / 2;
+ memset (indent_spaces, ' ', indents);
+ indent_spaces[indents] = '\0';
+ ret = snprintf (hyphens, total_spacing, "%s%s",
+ indent_spaces, heading);
+ if (ret) {
+ hyphens[ret] = '\0';
+ cli_out ("%s", hyphens);
+ ret = 0;
+ } else {
+ ret = -1;
+ goto out;
+ }
+ }
+ cli_out (" ");
+ }
+
+ /* setting the title "NODE", "MASTER", etc. from title_values[]
+ and printing the same */
+ for (j = 0; j < number_of_fields; j++) {
+ /* Suppressing master and slave output for status detail */
+ if ((is_detail) && ((j == 1) || (j == 2))) {
+ output_values[j][0] = '\0';
+ continue;
+ } else if ((!is_detail) && (j > 4)) {
+ /* Suppressing detailed output for
+ * status */
+ output_values[j][0] = '\0';
+ continue;
+ }
+ memset (output_values[j], ' ', spacing[j]);
+ memcpy (output_values[j], title_values[j],
+ strlen(title_values[j]));
+ output_values[j][spacing[j]] = '\0';
+ }
+ cli_out ("%s %s %s %s %s %s %s %s %s", output_values[0],
+ output_values[1], output_values[2], output_values[3],
+ output_values[4], output_values[5], output_values[6],
+ output_values[7], output_values[8]);
+
+ /* setting and printing the hyphens */
+ memset (hyphens, '-', total_spacing);
+ hyphens[total_spacing] = '\0';
+ cli_out ("%s", hyphens);
+
+ for (i = 0; i < gsync_count; i++) {
+ for (j = 0; j < number_of_fields; j++) {
+ /* Suppressing master and slave output for
+ * status detail */
+ if ((is_detail) && ((j == 1) || (j == 2))) {
+ output_values[j][0] = '\0';
+ continue;
+ } else if ((!is_detail) && (j > 4)) {
+ /* Suppressing detailed output for
+ * status */
+ output_values[j][0] = '\0';
+ continue;
+ }
+ tmp = get_struct_variable(j, sts_vals[i]);
+ if (!tmp) {
+ gf_log ("", GF_LOG_ERROR,
+ "struct member empty.");
+ ret = -1;
+ goto out;
+ }
+ memset (output_values[j], ' ', spacing[j]);
+ memcpy (output_values[j], tmp, strlen (tmp));
+ output_values[j][spacing[j]] = '\0';
+ }
+
+ cli_out ("%s %s %s %s %s %s %s %s %s", output_values[0],
+ output_values[1], output_values[2], output_values[3],
+ output_values[4], output_values[5], output_values[6],
+ output_values[7], output_values[8]);
+ }
+
+out:
+ if (output_values) {
+ for (i = 0; i < number_of_fields; i++) {
+ if (output_values[i])
+ GF_FREE (output_values[i]);
+ }
+ GF_FREE (output_values);
+ }
+
+ if (hyphens)
+ GF_FREE (hyphens);
+
+ return ret;
+}
+
+int
+gf_cli_read_status_data (dict_t *dict,
+ gf_cli_gsync_status_t **sts_vals,
+ int *spacing, int gsync_count,
+ int number_of_fields)
+{
+ int ret = 0;
+ int i = 0;
+ int j = 0;
+ char mst[PATH_MAX] = {0, };
+ char slv[PATH_MAX] = {0, };
+ char sts[PATH_MAX] = {0, };
+ char nds[PATH_MAX] = {0, };
+ char *status = NULL;
+ char *tmp = NULL;
+
+ /* Storing per node status info in each object */
+ for (i = 0; i < gsync_count; i++) {
+ snprintf (nds, sizeof(nds), "node%d", i + 1);
+ snprintf (mst, sizeof(mst), "master%d", i + 1);
+ snprintf (slv, sizeof(slv), "slave%d", i + 1);
+ snprintf (sts, sizeof(sts), "status%d", i + 1);
+
+ /* Fetching the values from dict, and calculating
+ the max length for each field */
+ ret = dict_get_str (dict, nds, &(sts_vals[i]->node));
+ if (ret)
+ goto out;
+
+ ret = dict_get_str (dict, mst, &(sts_vals[i]->master));
+ if (ret)
+ goto out;
+
+ ret = dict_get_str (dict, slv, &(sts_vals[i]->slave));
+ if (ret)
+ goto out;
+
+ ret = dict_get_str (dict, sts, &status);
+ if (ret)
+ goto out;
+
+ /* Fetching health and uptime from sts_val */
+ ret = gf_cli_fetch_gsyncd_status_values (status, sts_vals[i]);
+ if (ret)
+ goto out;
+
+ for (j = 0; j < number_of_fields; j++) {
+ tmp = get_struct_variable(j, sts_vals[i]);
+ if (!tmp) {
+ gf_log ("", GF_LOG_ERROR,
+ "struct member empty.");
+ ret = -1;
+ goto out;
+ }
+ if (strlen (tmp) > spacing[j])
+ spacing[j] = strlen (tmp);
+ }
+ }
+
+out:
+ return ret;
+}
+
+int
+gf_cli_gsync_status_output (dict_t *dict, int status_detail)
+{
+ int gsync_count = 0;
+ int i = 0;
+ int j = 0;
+ int ret = 0;
+ int spacing[10] = {0};
+ int num_of_fields = 9;
+ char errmsg[1024] = "";
+ char *master = NULL;
+ char *slave = NULL;
+ char *tmp = NULL;
+ char *title_values[] = {"NODE", "MASTER", "SLAVE",
+ "HEALTH", "UPTIME",
+ "FILES SYNCD",
+ "FILES PENDING",
+ "BYTES PENDING",
+ "DELETES PENDING"};
+ gf_cli_gsync_status_t **sts_vals = NULL;
+
+ /* Checks if any session is active or not */
+ ret = dict_get_int32 (dict, "gsync-count", &gsync_count);
+ if (ret) {
+ ret = dict_get_str (dict, "master", &master);
+
+ ret = dict_get_str (dict, "slave", &slave);
+
+ if (master) {
+ if (slave)
+ snprintf (errmsg, sizeof(errmsg), "No active "
+ "geo-replication sessions between %s"
+ " and %s", master, slave);
+ else
+ snprintf (errmsg, sizeof(errmsg), "No active "
+ "geo-replication sessions for %s",
+ master);
+ } else
+ snprintf (errmsg, sizeof(errmsg), "No active "
+ "geo-replication sessions");
+
+ gf_log ("cli", GF_LOG_INFO, "%s", errmsg);
+ cli_out ("%s", errmsg);
+ ret = 0;
+ goto out;
+ }
+
+ for (i = 0; i < num_of_fields; i++)
+ spacing[i] = strlen(title_values[i]);
+
+ /* gsync_count = number of nodes reporting output.
+ each sts_val object will store output of each
+ node */
+ sts_vals = GF_CALLOC (gsync_count, sizeof (gf_cli_gsync_status_t *),
+ gf_common_mt_char);
+ if (!sts_vals) {
+ ret = -1;
+ goto out;
+ }
+ for (i = 0; i < gsync_count; i++) {
+ sts_vals[i] = GF_CALLOC (1, sizeof (gf_cli_gsync_status_t),
+ gf_common_mt_char);
+ if (!sts_vals[i]) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = gf_cli_read_status_data (dict, sts_vals, spacing,
+ gsync_count, num_of_fields);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to read status data");
+ goto out;
+ }
+
+ ret = gf_cli_print_status (title_values, sts_vals, spacing, gsync_count,
+ num_of_fields, status_detail);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to print status output");
+ goto out;
+ }
+
+out:
+ if (sts_vals) {
+ for (i = 0; i < gsync_count; i++) {
+ for (j = 3; j < num_of_fields; j++) {
+ tmp = get_struct_variable(j, sts_vals[i]);
+ if (tmp)
+ GF_FREE (tmp);
+ }
+ }
+ GF_FREE (sts_vals);
+ }
+
+ return ret;
+}
+
+static int32_t
+write_contents_to_common_pem_file (dict_t *dict, int output_count)
+{
+ char *workdir = NULL;
+ char common_pem_file[PATH_MAX] = "";
+ char *output = NULL;
+ char output_name[PATH_MAX] = "";
+ int bytes_writen = 0;
+ int fd = -1;
+ int ret = -1;
+ int i = -1;
+
+ ret = dict_get_str (dict, "glusterd_workdir", &workdir);
+ if (ret || !workdir) {
+ gf_log ("", GF_LOG_ERROR, "Unable to fetch workdir");
+ ret = -1;
+ goto out;
+ }
+
+ snprintf (common_pem_file, sizeof(common_pem_file),
+ "%s/geo-replication/common_secret.pem.pub",
+ workdir);
+
+ unlink (common_pem_file);
+
+ fd = open (common_pem_file, O_WRONLY | O_CREAT, 0600);
+ if (fd == -1) {
+ gf_log ("", GF_LOG_ERROR, "Failed to open %s"
+ " Error : %s", common_pem_file,
+ strerror (errno));
+ ret = -1;
+ goto out;
+ }
+
+ for (i = 1; i <= output_count; i++) {
+ memset (output_name, '\0', sizeof (output_name));
+ snprintf (output_name, sizeof (output_name),
+ "output_%d", i);
+ ret = dict_get_str (dict, output_name, &output);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Failed to get %s.",
+ output_name);
+ cli_out ("Unable to fetch output.");
+ }
+ if (output) {
+ bytes_writen = write (fd, output, strlen(output));
+ if (bytes_writen != strlen(output)) {
+ gf_log ("", GF_LOG_ERROR, "Failed to write "
+ "to %s", common_pem_file);
+ ret = -1;
+ goto out;
+ }
+ /* Adding the new line character */
+ bytes_writen = write (fd, "\n", strlen("\n"));
+ if (bytes_writen != strlen("\n")) {
+ gf_log ("", GF_LOG_ERROR,
+ "Failed to add new line char");
+ ret = -1;
+ goto out;
+ }
+ output = NULL;
+ }
+ }
+
+ cli_out ("Common secret pub file present at %s", common_pem_file);
+ ret = 0;
+out:
+ if (fd)
+ close (fd);
+
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+gf_cli_sys_exec_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int ret = -1;
+ int output_count = -1;
+ int i = -1;
+ char *output = NULL;
+ char *command = NULL;
+ char output_name[PATH_MAX] = "";
+ gf_cli_rsp rsp = {0, };
+ dict_t *dict = NULL;
+ call_frame_t *frame = NULL;
+
+ if (req->rpc_status == -1) {
+ ret = -1;
+ goto out;
+ }
+
+ frame = myframe;
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+
+ dict = dict_new ();
+
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize (rsp.dict.dict_val, rsp.dict.dict_len, &dict);
+
+ if (ret)
+ goto out;
+
+ if (rsp.op_ret) {
+ cli_err ("%s", rsp.op_errstr ? rsp.op_errstr :
+ "Command failed.");
+ ret = rsp.op_ret;
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "output_count", &output_count);
+ if (ret) {
+ cli_out ("Command executed successfully.");
+ ret = 0;
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "command", &command);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to get command from dict");
+ goto out;
+ }
+
+ if (!strcmp (command, "gsec_create")) {
+ ret = write_contents_to_common_pem_file (dict, output_count);
+ if (!ret)
+ goto out;
+ }
+
+ for (i = 1; i <= output_count; i++) {
+ memset (output_name, '\0', sizeof (output_name));
+ snprintf (output_name, sizeof (output_name),
+ "output_%d", i);
+ ret = dict_get_str (dict, output_name, &output);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Failed to get %s.",
+ output_name);
+ cli_out ("Unable to fetch output.");
+ }
+ if (output) {
+ cli_out ("%s", output);
+ output = NULL;
+ }
+ }
+
+ ret = 0;
+out:
+ if (dict)
+ dict_unref (dict);
+ cli_cmd_broadcast_response (ret);
+
+ free (rsp.dict.dict_val);
+
+ return ret;
+}
+
+int
+gf_cli_copy_file_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int ret = -1;
+ gf_cli_rsp rsp = {0, };
+ dict_t *dict = NULL;
+ call_frame_t *frame = NULL;
+
+ if (req->rpc_status == -1) {
+ ret = -1;
+ goto out;
+ }
+
+ frame = myframe;
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+
+ dict = dict_new ();
+
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize (rsp.dict.dict_val, rsp.dict.dict_len, &dict);
+
+ if (ret)
+ goto out;
+
+ if (rsp.op_ret) {
+ cli_err ("%s", rsp.op_errstr ? rsp.op_errstr :
+ "Copy unsuccessful");
+ ret = rsp.op_ret;
+ goto out;
+ }
+
+ cli_out ("Successfully copied file.");
+
+out:
+ if (dict)
+ dict_unref (dict);
+ cli_cmd_broadcast_response (ret);
+
+ free (rsp.dict.dict_val);
+
+ return ret;
+}
+
+int
+gf_cli_gsync_set_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int ret = -1;
+ gf_cli_rsp rsp = {0, };
+ dict_t *dict = NULL;
+ char *gsync_status = NULL;
+ char *master = NULL;
+ char *slave = NULL;
+ int32_t type = 0;
+ call_frame_t *frame = NULL;
+ gf_boolean_t status_detail = _gf_false;
+
+
+ if (req->rpc_status == -1) {
+ ret = -1;
+ goto out;
+ }
+
+ frame = myframe;
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+
+ dict = dict_new ();
+
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize (rsp.dict.dict_val, rsp.dict.dict_len, &dict);
+
+ if (ret)
+ goto out;
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_gsync (dict, rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+
+ if (rsp.op_ret) {
+ cli_err ("%s", rsp.op_errstr ? rsp.op_errstr :
+ GEOREP" command unsuccessful");
+ ret = rsp.op_ret;
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "gsync-status", &gsync_status);
+ if (!ret)
+ cli_out ("%s", gsync_status);
+ else
+ ret = 0;
+
+ ret = dict_get_int32 (dict, "type", &type);
+ if (ret) {
+ gf_log (frame->this->name, GF_LOG_ERROR, "failed to get type");
+ goto out;
+ }
+
+ switch (type) {
+ case GF_GSYNC_OPTION_TYPE_START:
+ case GF_GSYNC_OPTION_TYPE_STOP:
+ if (dict_get_str (dict, "master", &master) != 0)
+ master = "???";
+ if (dict_get_str (dict, "slave", &slave) != 0)
+ slave = "???";
+
+ cli_out ("%s " GEOREP " session between %s & %s"
+ " has been successful",
+ type == GF_GSYNC_OPTION_TYPE_START ?
+ "Starting" : "Stopping",
+ master, slave);
+ break;
+
+ case GF_GSYNC_OPTION_TYPE_CONFIG:
+ ret = gf_cli_gsync_config_command (dict);
+ break;
+
+ case GF_GSYNC_OPTION_TYPE_STATUS:
+ status_detail = dict_get_str_boolean (dict,
+ "status-detail",
+ _gf_false);
+ ret = gf_cli_gsync_status_output (dict, status_detail);
+ break;
+
+ case GF_GSYNC_OPTION_TYPE_DELETE:
+ if (dict_get_str (dict, "master", &master) != 0)
+ master = "???";
+ if (dict_get_str (dict, "slave", &slave) != 0)
+ slave = "???";
+ cli_out ("Deleting " GEOREP " session between %s & %s"
+ " has been successful", master, slave);
+ break;
+
+ case GF_GSYNC_OPTION_TYPE_CREATE:
+ if (dict_get_str (dict, "master", &master) != 0)
+ master = "???";
+ if (dict_get_str (dict, "slave", &slave) != 0)
+ slave = "???";
+ cli_out ("Creating " GEOREP " session between %s & %s"
+ " has been successful", master, slave);
+ break;
+
+ default:
+ cli_out (GEOREP" command executed successfully");
+ }
+
+out:
+ if (dict)
+ dict_unref (dict);
+ cli_cmd_broadcast_response (ret);
+
+ free (rsp.dict.dict_val);
+
+ return ret;
+}
+
+int32_t
+gf_cli_sys_exec (call_frame_t *frame, xlator_t *this, void *data)
+{
+ int ret = 0;
+ dict_t *dict = NULL;
+ gf_cli_req req = {{0,}};
+
+ if (!frame || !this || !data) {
+ ret = -1;
+ gf_log ("cli", GF_LOG_ERROR, "Invalid data");
+ goto out;
+ }
+
+ dict = data;
+
+ ret = cli_to_glusterd (&req, frame, gf_cli_sys_exec_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_SYS_EXEC, this, cli_rpc_prog,
+ NULL);
+out:
+ GF_FREE (req.dict.dict_val);
+ return ret;
+}
+
+int32_t
+gf_cli_copy_file (call_frame_t *frame, xlator_t *this, void *data)
+{
+ int ret = 0;
+ dict_t *dict = NULL;
+ gf_cli_req req = {{0,}};
+
+ if (!frame || !this || !data) {
+ ret = -1;
+ gf_log ("cli", GF_LOG_ERROR, "Invalid data");
+ goto out;
+ }
+
+ dict = data;
+
+ ret = cli_to_glusterd (&req, frame, gf_cli_copy_file_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_COPY_FILE, this, cli_rpc_prog,
+ NULL);
+out:
+ GF_FREE (req.dict.dict_val);
+ return ret;
+}
+
+int32_t
+gf_cli_gsync_set (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ int ret = 0;
+ dict_t *dict = NULL;
+ gf_cli_req req = {{0,}};
+
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
+ }
+
+ dict = data;
+
+ ret = cli_to_glusterd (&req, frame, gf_cli_gsync_set_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_GSYNC_SET, this, cli_rpc_prog,
+ NULL);
+
+out:
+ GF_FREE (req.dict.dict_val);
+
+ return ret;
+}
+
+
+int
+cli_profile_info_percentage_cmp (void *a, void *b)
+{
+ cli_profile_info_t *ia = NULL;
+ cli_profile_info_t *ib = NULL;
+ int ret = 0;
+
+ ia = a;
+ ib = b;
+ if (ia->percentage_avg_latency < ib->percentage_avg_latency)
+ ret = -1;
+ else if (ia->percentage_avg_latency > ib->percentage_avg_latency)
+ ret = 1;
+ else
+ ret = 0;
+ return ret;
+}
+
+
+void
+cmd_profile_volume_brick_out (dict_t *dict, int count, int interval)
+{
+ char key[256] = {0};
+ int i = 0;
+ uint64_t sec = 0;
+ uint64_t r_count = 0;
+ uint64_t w_count = 0;
+ uint64_t rb_counts[32] = {0};
+ uint64_t wb_counts[32] = {0};
+ cli_profile_info_t profile_info[GF_FOP_MAXVALUE] = {{0}};
+ char output[128] = {0};
+ int per_line = 0;
+ char read_blocks[128] = {0};
+ char write_blocks[128] = {0};
+ int index = 0;
+ int is_header_printed = 0;
+ int ret = 0;
+ double total_percentage_latency = 0;
+
+ for (i = 0; i < 32; i++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-%d-read-%d", count,
+ interval, (1 << i));
+ ret = dict_get_uint64 (dict, key, &rb_counts[i]);
+ }
+
+ for (i = 0; i < 32; i++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-%d-write-%d", count, interval,
+ (1<<i));
+ ret = dict_get_uint64 (dict, key, &wb_counts[i]);
+ }
+
+ for (i = 0; i < GF_FOP_MAXVALUE; i++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-%d-%d-hits", count,
+ interval, i);
+ ret = dict_get_uint64 (dict, key, &profile_info[i].fop_hits);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-%d-%d-avglatency", count,
+ interval, i);
+ ret = dict_get_double (dict, key, &profile_info[i].avg_latency);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-%d-%d-minlatency", count,
+ interval, i);
+ ret = dict_get_double (dict, key, &profile_info[i].min_latency);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-%d-%d-maxlatency", count,
+ interval, i);
+ ret = dict_get_double (dict, key, &profile_info[i].max_latency);
+ profile_info[i].fop_name = (char *)gf_fop_list[i];
+
+ total_percentage_latency +=
+ (profile_info[i].fop_hits * profile_info[i].avg_latency);
+ }
+ if (total_percentage_latency) {
+ for (i = 0; i < GF_FOP_MAXVALUE; i++) {
+ profile_info[i].percentage_avg_latency = 100 * (
+ (profile_info[i].avg_latency* profile_info[i].fop_hits) /
+ total_percentage_latency);
+ }
+ gf_array_insertionsort (profile_info, 1, GF_FOP_MAXVALUE - 1,
+ sizeof (cli_profile_info_t),
+ cli_profile_info_percentage_cmp);
+ }
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-%d-duration", count, interval);
+ ret = dict_get_uint64 (dict, key, &sec);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-%d-total-read", count, interval);
+ ret = dict_get_uint64 (dict, key, &r_count);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-%d-total-write", count, interval);
+ ret = dict_get_uint64 (dict, key, &w_count);
+
+ if (ret == 0) {
+ }
+
+ if (interval == -1)
+ cli_out ("Cumulative Stats:");
+ else
+ cli_out ("Interval %d Stats:", interval);
+ snprintf (output, sizeof (output), "%14s", "Block Size:");
+ snprintf (read_blocks, sizeof (read_blocks), "%14s", "No. of Reads:");
+ snprintf (write_blocks, sizeof (write_blocks), "%14s", "No. of Writes:");
+ index = 14;
+ for (i = 0; i < 32; i++) {
+ if ((rb_counts[i] == 0) && (wb_counts[i] == 0))
+ continue;
+ per_line++;
+ snprintf (output+index, sizeof (output)-index, "%19db+ ", (1<<i));
+ if (rb_counts[i]) {
+ snprintf (read_blocks+index, sizeof (read_blocks)-index,
+ "%21"PRId64" ", rb_counts[i]);
+ } else {
+ snprintf (read_blocks+index, sizeof (read_blocks)-index,
+ "%21s ", "0");
+ }
+ if (wb_counts[i]) {
+ snprintf (write_blocks+index, sizeof (write_blocks)-index,
+ "%21"PRId64" ", wb_counts[i]);
+ } else {
+ snprintf (write_blocks+index, sizeof (write_blocks)-index,
+ "%21s ", "0");
+ }
+ index += 22;
+ if (per_line == 3) {
+ cli_out ("%s", output);
+ cli_out ("%s", read_blocks);
+ cli_out ("%s", write_blocks);
+ cli_out (" ");
+ per_line = 0;
+ memset (output, 0, sizeof (output));
+ memset (read_blocks, 0, sizeof (read_blocks));
+ memset (write_blocks, 0, sizeof (write_blocks));
+ snprintf (output, sizeof (output), "%14s", "Block Size:");
+ snprintf (read_blocks, sizeof (read_blocks), "%14s",
+ "No. of Reads:");
+ snprintf (write_blocks, sizeof (write_blocks), "%14s",
+ "No. of Writes:");
+ index = 14;
+ }
+ }
+
+ if (per_line != 0) {
+ cli_out ("%s", output);
+ cli_out ("%s", read_blocks);
+ cli_out ("%s", write_blocks);
+ }
+ for (i = 0; i < GF_FOP_MAXVALUE; i++) {
+ if (profile_info[i].fop_hits == 0)
+ continue;
+ if (is_header_printed == 0) {
+ cli_out ("%10s %13s %13s %13s %14s %11s", "%-latency",
+ "Avg-latency", "Min-Latency", "Max-Latency",
+ "No. of calls", "Fop");
+ cli_out ("%10s %13s %13s %13s %14s %11s", "---------",
+ "-----------", "-----------", "-----------",
+ "------------", "----");
+ is_header_printed = 1;
+ }
+ if (profile_info[i].fop_hits) {
+ cli_out ("%10.2lf %10.2lf us %10.2lf us %10.2lf us"
+ " %14"PRId64" %11s",
+ profile_info[i].percentage_avg_latency,
+ profile_info[i].avg_latency,
+ profile_info[i].min_latency,
+ profile_info[i].max_latency,
+ profile_info[i].fop_hits,
+ profile_info[i].fop_name);
+ }
+ }
+ cli_out (" ");
+ cli_out ("%12s: %"PRId64" seconds", "Duration", sec);
+ cli_out ("%12s: %"PRId64" bytes", "Data Read", r_count);
+ cli_out ("%12s: %"PRId64" bytes", "Data Written", w_count);
+ cli_out (" ");
+}
+
+int32_t
+gf_cli_profile_volume_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gf_cli_rsp rsp = {0,};
+ int ret = -1;
+ dict_t *dict = NULL;
+ gf1_cli_stats_op op = GF_CLI_STATS_NONE;
+ char key[256] = {0};
+ int interval = 0;
+ int i = 1;
+ int32_t brick_count = 0;
+ char *volname = NULL;
+ char *brick = NULL;
+ char str[1024] = {0,};
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ gf_log ("cli", GF_LOG_DEBUG, "Received resp to profile");
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+
+ dict = dict_new ();
+
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize (rsp.dict.dict_val,
+ rsp.dict.dict_len,
+ &dict);
+
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to allocate memory");
+ goto out;
+ } else {
+ dict->extra_stdfree = rsp.dict.dict_val;
+ }
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_profile (dict, rsp.op_ret,
+ rsp.op_errno,
+ rsp.op_errstr);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret)
+ goto out;
+
+ ret = dict_get_int32 (dict, "op", (int32_t*)&op);
+ if (ret)
+ goto out;
+
+ if (rsp.op_ret && strcmp (rsp.op_errstr, "")) {
+ cli_err ("%s", rsp.op_errstr);
+ } else {
+ switch (op) {
+ case GF_CLI_STATS_START:
+ cli_out ("Starting volume profile on %s has been %s ",
+ volname,
+ (rsp.op_ret) ? "unsuccessful": "successful");
+ break;
+ case GF_CLI_STATS_STOP:
+ cli_out ("Stopping volume profile on %s has been %s ",
+ volname,
+ (rsp.op_ret) ? "unsuccessful": "successful");
+ break;
+ case GF_CLI_STATS_INFO:
+ break;
+ default:
+ cli_out ("volume profile on %s has been %s ",
+ volname,
+ (rsp.op_ret) ? "unsuccessful": "successful");
+ break;
+ }
+ }
+
+ if (rsp.op_ret) {
+ ret = rsp.op_ret;
+ goto out;
+ }
+
+ if (op != GF_CLI_STATS_INFO) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "count", &brick_count);
+ if (ret)
+ goto out;
+
+ if (!brick_count) {
+ cli_err ("All bricks of volume %s are down.", volname);
+ goto out;
+ }
+
+ while (i <= brick_count) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-brick", i);
+ ret = dict_get_str (dict, key, &brick);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Couldn't get brick name");
+ goto out;
+ }
+
+ ret = dict_get_str_boolean (dict, "nfs", _gf_false);
+ if (ret)
+ snprintf (str, sizeof (str), "NFS Server : %s", brick);
+ else
+ snprintf (str, sizeof (str), "Brick: %s", brick);
+ cli_out ("%s", str);
+ memset (str, '-', strlen (str));
+ cli_out ("%s", str);
+
+ snprintf (key, sizeof (key), "%d-cumulative", i);
+ ret = dict_get_int32 (dict, key, &interval);
+ if (ret == 0) {
+ cmd_profile_volume_brick_out (dict, i, interval);
+ }
+ snprintf (key, sizeof (key), "%d-interval", i);
+ ret = dict_get_int32 (dict, key, &interval);
+ if (ret == 0) {
+ cmd_profile_volume_brick_out (dict, i, interval);
+ }
+ i++;
+ }
+ ret = rsp.op_ret;
+
+out:
+ if (dict)
+ dict_unref (dict);
+ free (rsp.op_errstr);
+ cli_cmd_broadcast_response (ret);
+ return ret;
+}
+
+int32_t
+gf_cli_profile_volume (call_frame_t *frame, xlator_t *this, void *data)
+{
+ int ret = -1;
+ gf_cli_req req = {{0,}};
+ dict_t *dict = NULL;
+
+ GF_ASSERT (frame);
+ GF_ASSERT (this);
+ GF_ASSERT (data);
+
+ if (!frame || !this || !data)
+ goto out;
+ dict = data;
+
+ ret = cli_to_glusterd (&req, frame, gf_cli_profile_volume_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_PROFILE_VOLUME, this, cli_rpc_prog,
+ NULL);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+
+ GF_FREE (req.dict.dict_val);
+ return ret;
+}
+
+int32_t
+gf_cli_top_volume_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gf_cli_rsp rsp = {0,};
+ int ret = -1;
+ dict_t *dict = NULL;
+ gf1_cli_stats_op op = GF_CLI_STATS_NONE;
+ char key[256] = {0};
+ int i = 0;
+ int32_t brick_count = 0;
+ char brick[1024];
+ int32_t members = 0;
+ char *filename;
+ char *bricks;
+ uint64_t value = 0;
+ int32_t j = 0;
+ gf1_cli_top_op top_op = GF_CLI_TOP_NONE;
+ uint64_t nr_open = 0;
+ uint64_t max_nr_open = 0;
+ double throughput = 0;
+ double time = 0;
+ int32_t time_sec = 0;
+ long int time_usec = 0;
+ char timestr[256] = {0, };
+ char *openfd_str = NULL;
+ gf_boolean_t nfs = _gf_false;
+ gf_boolean_t clear_stats = _gf_false;
+ int stats_cleared = 0;
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ gf_log ("cli", GF_LOG_DEBUG, "Received resp to top");
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+
+ if (rsp.op_ret) {
+ if (strcmp (rsp.op_errstr, ""))
+ cli_err ("%s", rsp.op_errstr);
+ cli_err ("volume top unsuccessful");
+ ret = rsp.op_ret;
+ goto out;
+ }
+
+ dict = dict_new ();
+
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize (rsp.dict.dict_val,
+ rsp.dict.dict_len,
+ &dict);
+
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to allocate memory");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "op", (int32_t*)&op);
+
+ if (op != GF_CLI_STATS_TOP) {
+ ret = 0;
+ goto out;
+ }
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_top (dict, rsp.op_ret,
+ rsp.op_errno,
+ rsp.op_errstr);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ }
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "count", &brick_count);
+ if (ret)
+ goto out;
+ snprintf (key, sizeof (key), "%d-top-op", 1);
+ ret = dict_get_int32 (dict, key, (int32_t*)&top_op);
+ if (ret)
+ goto out;
+
+ clear_stats = dict_get_str_boolean (dict, "clear-stats", _gf_false);
+
+ while (i < brick_count) {
+ i++;
+ snprintf (brick, sizeof (brick), "%d-brick", i);
+ ret = dict_get_str (dict, brick, &bricks);
+ if (ret)
+ goto out;
+
+ nfs = dict_get_str_boolean (dict, "nfs", _gf_false);
+
+ if (clear_stats) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-stats-cleared", i);
+ ret = dict_get_int32 (dict, key, &stats_cleared);
+ if (ret)
+ goto out;
+ cli_out (stats_cleared ? "Cleared stats for %s %s" :
+ "Failed to clear stats for %s %s",
+ nfs ? "NFS server on" : "brick", bricks);
+ continue;
+ }
+
+ if (nfs)
+ cli_out ("NFS Server : %s", bricks);
+ else
+ cli_out ("Brick: %s", bricks);
+
+ snprintf(key, sizeof (key), "%d-members", i);
+ ret = dict_get_int32 (dict, key, &members);
+
+ switch (top_op) {
+ case GF_CLI_TOP_OPEN:
+ snprintf (key, sizeof (key), "%d-current-open", i);
+ ret = dict_get_uint64 (dict, key, &nr_open);
+ if (ret)
+ break;
+ snprintf (key, sizeof (key), "%d-max-open", i);
+ ret = dict_get_uint64 (dict, key, &max_nr_open);
+ if (ret)
+ goto out;
+ snprintf (key, sizeof (key), "%d-max-openfd-time", i);
+ ret = dict_get_str (dict, key, &openfd_str);
+ if (ret)
+ goto out;
+ cli_out ("Current open fds: %"PRIu64", Max open"
+ " fds: %"PRIu64", Max openfd time: %s", nr_open,
+ max_nr_open, openfd_str);
+ case GF_CLI_TOP_READ:
+ case GF_CLI_TOP_WRITE:
+ case GF_CLI_TOP_OPENDIR:
+ case GF_CLI_TOP_READDIR:
+ if (!members) {
+ continue;
+ }
+ cli_out ("Count\t\tfilename\n=======================");
+ break;
+ case GF_CLI_TOP_READ_PERF:
+ case GF_CLI_TOP_WRITE_PERF:
+ snprintf (key, sizeof (key), "%d-throughput", i);
+ ret = dict_get_double (dict, key, &throughput);
+ if (!ret) {
+ snprintf (key, sizeof (key), "%d-time", i);
+ ret = dict_get_double (dict, key, &time);
+ }
+ if (!ret)
+ cli_out ("Throughput %.2f MBps time %.4f secs", throughput,
+ time / 1e6);
+
+ if (!members) {
+ continue;
+ }
+ cli_out ("%*s %-*s %-*s",
+ VOL_TOP_PERF_SPEED_WIDTH, "MBps",
+ VOL_TOP_PERF_FILENAME_DEF_WIDTH, "Filename",
+ VOL_TOP_PERF_TIME_WIDTH, "Time");
+ cli_out ("%*s %-*s %-*s",
+ VOL_TOP_PERF_SPEED_WIDTH, "====",
+ VOL_TOP_PERF_FILENAME_DEF_WIDTH, "========",
+ VOL_TOP_PERF_TIME_WIDTH, "====");
+ break;
+ default:
+ goto out;
+ }
+
+ for (j = 1; j <= members; j++) {
+ snprintf (key, sizeof (key), "%d-filename-%d", i, j);
+ ret = dict_get_str (dict, key, &filename);
+ if (ret)
+ break;
+ snprintf (key, sizeof (key), "%d-value-%d", i, j);
+ ret = dict_get_uint64 (dict, key, &value);
+ if (ret)
+ goto out;
+ if ( top_op == GF_CLI_TOP_READ_PERF ||
+ top_op == GF_CLI_TOP_WRITE_PERF) {
+ snprintf (key, sizeof (key), "%d-time-sec-%d", i, j);
+ ret = dict_get_int32 (dict, key, (int32_t *)&time_sec);
+ if (ret)
+ goto out;
+ snprintf (key, sizeof (key), "%d-time-usec-%d", i, j);
+ ret = dict_get_int32 (dict, key, (int32_t *)&time_usec);
+ if (ret)
+ goto out;
+ gf_time_fmt (timestr, sizeof timestr,
+ time_sec, gf_timefmt_FT);
+ snprintf (timestr + strlen (timestr), sizeof timestr - strlen (timestr),
+ ".%"GF_PRI_SUSECONDS, time_usec);
+ if (strlen (filename) < VOL_TOP_PERF_FILENAME_DEF_WIDTH)
+ cli_out ("%*"PRIu64" %-*s %-*s",
+ VOL_TOP_PERF_SPEED_WIDTH,
+ value,
+ VOL_TOP_PERF_FILENAME_DEF_WIDTH,
+ filename,
+ VOL_TOP_PERF_TIME_WIDTH,
+ timestr);
+ else
+ cli_out ("%*"PRIu64" ...%-*s %-*s",
+ VOL_TOP_PERF_SPEED_WIDTH,
+ value,
+ VOL_TOP_PERF_FILENAME_ALT_WIDTH ,
+ filename + strlen (filename) -
+ VOL_TOP_PERF_FILENAME_ALT_WIDTH,
+ VOL_TOP_PERF_TIME_WIDTH,
+ timestr);
+ } else {
+ cli_out ("%"PRIu64"\t\t%s", value, filename);
+ }
+ }
+ }
+ ret = rsp.op_ret;
+
+out:
+ cli_cmd_broadcast_response (ret);
+
+ if (dict)
+ dict_unref (dict);
+
+ free (rsp.dict.dict_val);
+ return ret;
+}
+
+int32_t
+gf_cli_top_volume (call_frame_t *frame, xlator_t *this, void *data)
+{
+ int ret = -1;
+ gf_cli_req req = {{0,}};
+ dict_t *dict = NULL;
+
+ GF_ASSERT (frame);
+ GF_ASSERT (this);
+ GF_ASSERT (data);
+
+ if (!frame || !this || !data)
+ goto out;
+ dict = data;
+
+ ret = cli_to_glusterd (&req, frame, gf_cli_top_volume_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_PROFILE_VOLUME, this, cli_rpc_prog,
+ NULL);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ GF_FREE (req.dict.dict_val);
+ return ret;
+}
+
+
+int
+gf_cli_getwd_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gf1_cli_getwd_rsp rsp = {0,};
+ int ret = -1;
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_getwd_rsp);
+ if (ret < 0) {
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+
+ if (rsp.op_ret == -1) {
+ cli_err ("getwd failed");
+ ret = rsp.op_ret;
+ goto out;
+ }
+
+ gf_log ("cli", GF_LOG_INFO, "Received resp to getwd");
+
+ cli_out ("%s", rsp.wd);
+
+ ret = 0;
+
+out:
+ cli_cmd_broadcast_response (ret);
+ return ret;
+}
+
+int32_t
+gf_cli_getwd (call_frame_t *frame, xlator_t *this, void *data)
+{
+ int ret = -1;
+ gf1_cli_getwd_req req = {0,};
+
+ GF_ASSERT (frame);
+ GF_ASSERT (this);
+
+ if (!frame || !this)
+ goto out;
+
+ ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
+ GLUSTER_CLI_GETWD, NULL,
+ this, gf_cli_getwd_cbk,
+ (xdrproc_t) xdr_gf1_cli_getwd_req);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+
+ return ret;
+}
+
+void
+cli_print_volume_status_mempool (dict_t *dict, char *prefix)
+{
+ int ret = -1;
+ int32_t mempool_count = 0;
+ char *name = NULL;
+ int32_t hotcount = 0;
+ int32_t coldcount = 0;
+ uint64_t paddedsizeof = 0;
+ uint64_t alloccount = 0;
+ int32_t maxalloc = 0;
+ uint64_t pool_misses = 0;
+ int32_t maxstdalloc = 0;
+ char key[1024] = {0,};
+ int i = 0;
+
+ GF_ASSERT (dict);
+ GF_ASSERT (prefix);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.mempool-count",prefix);
+ ret = dict_get_int32 (dict, key, &mempool_count);
+ if (ret)
+ goto out;
+
+ cli_out ("Mempool Stats\n-------------");
+ cli_out ("%-30s %9s %9s %12s %10s %8s %8s %12s", "Name", "HotCount",
+ "ColdCount", "PaddedSizeof", "AllocCount", "MaxAlloc",
+ "Misses", "Max-StdAlloc");
+ cli_out ("%-30s %9s %9s %12s %10s %8s %8s %12s", "----", "--------",
+ "---------", "------------", "----------",
+ "--------", "--------", "------------");
+
+ for (i = 0; i < mempool_count; i++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.pool%d.name", prefix, i);
+ ret = dict_get_str (dict, key, &name);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.pool%d.hotcount", prefix, i);
+ ret = dict_get_int32 (dict, key, &hotcount);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.pool%d.coldcount", prefix, i);
+ ret = dict_get_int32 (dict, key, &coldcount);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.pool%d.paddedsizeof",
+ prefix, i);
+ ret = dict_get_uint64 (dict, key, &paddedsizeof);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.pool%d.alloccount", prefix, i);
+ ret = dict_get_uint64 (dict, key, &alloccount);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.pool%d.max_alloc", prefix, i);
+ ret = dict_get_int32 (dict, key, &maxalloc);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.pool%d.max-stdalloc", prefix, i);
+ ret = dict_get_int32 (dict, key, &maxstdalloc);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.pool%d.pool-misses", prefix, i);
+ ret = dict_get_uint64 (dict, key, &pool_misses);
+ if (ret)
+ goto out;
+
+ cli_out ("%-30s %9d %9d %12"PRIu64" %10"PRIu64" %8d %8"PRIu64
+ " %12d", name, hotcount, coldcount, paddedsizeof,
+ alloccount, maxalloc, pool_misses, maxstdalloc);
+ }
+
+out:
+ return;
+
+}
+
+void
+cli_print_volume_status_mem (dict_t *dict, gf_boolean_t notbrick)
+{
+ int ret = -1;
+ char *volname = NULL;
+ char *hostname = NULL;
+ char *path = NULL;
+ int online = -1;
+ char key[1024] = {0,};
+ int brick_index_max = -1;
+ int other_count = 0;
+ int index_max = 0;
+ int val = 0;
+ int i = 0;
+
+ GF_ASSERT (dict);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret)
+ goto out;
+ cli_out ("Memory status for volume : %s", volname);
+
+ ret = dict_get_int32 (dict, "brick-index-max", &brick_index_max);
+ if (ret)
+ goto out;
+ ret = dict_get_int32 (dict, "other-count", &other_count);
+ if (ret)
+ goto out;
+
+ index_max = brick_index_max + other_count;
+
+ for (i = 0; i <= index_max; i++) {
+ cli_out ("----------------------------------------------");
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.hostname", i);
+ ret = dict_get_str (dict, key, &hostname);
+ if (ret)
+ continue;
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.path", i);
+ ret = dict_get_str (dict, key, &path);
+ if (ret)
+ continue;
+ if (notbrick)
+ cli_out ("%s : %s", hostname, path);
+ else
+ cli_out ("Brick : %s:%s", hostname, path);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.status", i);
+ ret = dict_get_int32 (dict, key, &online);
+ if (ret)
+ goto out;
+ if (!online) {
+ if (notbrick)
+ cli_out ("%s is offline", hostname);
+ else
+ cli_out ("Brick is offline");
+ continue;
+ }
+
+ cli_out ("Mallinfo\n--------");
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.mallinfo.arena", i);
+ ret = dict_get_int32 (dict, key, &val);
+ if (ret)
+ goto out;
+ cli_out ("%-8s : %d","Arena", val);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.mallinfo.ordblks", i);
+ ret = dict_get_int32 (dict, key, &val);
+ if(ret)
+ goto out;
+ cli_out ("%-8s : %d","Ordblks", val);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.mallinfo.smblks", i);
+ ret = dict_get_int32 (dict, key, &val);
+ if(ret)
+ goto out;
+ cli_out ("%-8s : %d","Smblks", val);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.mallinfo.hblks", i);
+ ret = dict_get_int32 (dict, key, &val);
+ if(ret)
+ goto out;
+ cli_out ("%-8s : %d", "Hblks", val);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.mallinfo.hblkhd", i);
+ ret = dict_get_int32 (dict, key, &val);
+ if (ret)
+ goto out;
+ cli_out ("%-8s : %d", "Hblkhd", val);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.mallinfo.usmblks", i);
+ ret = dict_get_int32 (dict, key, &val);
+ if (ret)
+ goto out;
+ cli_out ("%-8s : %d", "Usmblks", val);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.mallinfo.fsmblks", i);
+ ret = dict_get_int32 (dict, key, &val);
+ if (ret)
+ goto out;
+ cli_out ("%-8s : %d", "Fsmblks", val);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.mallinfo.uordblks", i);
+ ret = dict_get_int32 (dict, key, &val);
+ if (ret)
+ goto out;
+ cli_out ("%-8s : %d", "Uordblks", val);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.mallinfo.fordblks", i);
+ ret = dict_get_int32 (dict, key, &val);
+ if (ret)
+ goto out;
+ cli_out ("%-8s : %d", "Fordblks", val);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.mallinfo.keepcost", i);
+ ret = dict_get_int32 (dict, key, &val);
+ if (ret)
+ goto out;
+ cli_out ("%-8s : %d", "Keepcost", val);
+
+ cli_out (" ");
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d", i);
+ cli_print_volume_status_mempool (dict, key);
+ }
+out:
+ cli_out ("----------------------------------------------\n");
+ return;
+}
+
+void
+cli_print_volume_status_clients (dict_t *dict, gf_boolean_t notbrick)
+{
+ int ret = -1;
+ char *volname = NULL;
+ int brick_index_max = -1;
+ int other_count = 0;
+ int index_max = 0;
+ char *hostname = NULL;
+ char *path = NULL;
+ int online = -1;
+ int client_count = 0;
+ char *clientname = NULL;
+ uint64_t bytesread = 0;
+ uint64_t byteswrite = 0;
+ char key[1024] = {0,};
+ int i = 0;
+ int j = 0;
+
+ GF_ASSERT (dict);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret)
+ goto out;
+ cli_out ("Client connections for volume %s", volname);
+
+ ret = dict_get_int32 (dict, "brick-index-max", &brick_index_max);
+ if (ret)
+ goto out;
+ ret = dict_get_int32 (dict, "other-count", &other_count);
+ if (ret)
+ goto out;
+
+ index_max = brick_index_max + other_count;
+
+ for (i = 0; i <= index_max; i++) {
+ cli_out ("----------------------------------------------");
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.hostname", i);
+ ret = dict_get_str (dict, key, &hostname);
+ if (ret)
+ goto out;
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.path", i);
+ ret = dict_get_str (dict, key, &path);
+ if (ret)
+ goto out;
+
+ if (notbrick)
+ cli_out ("%s : %s", hostname, path);
+ else
+ cli_out ("Brick : %s:%s", hostname, path);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.status", i);
+ ret = dict_get_int32 (dict, key, &online);
+ if (ret)
+ goto out;
+ if (!online) {
+ if (notbrick)
+ cli_out ("%s is offline", hostname);
+ else
+ cli_out ("Brick is offline");
+ continue;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.clientcount", i);
+ ret = dict_get_int32 (dict, key, &client_count);
+ if (ret)
+ goto out;
+
+ cli_out ("Clients connected : %d", client_count);
+ if (client_count == 0)
+ continue;
+
+ cli_out ("%-48s %15s %15s", "Hostname", "BytesRead",
+ "BytesWritten");
+ cli_out ("%-48s %15s %15s", "--------", "---------",
+ "------------");
+ for (j =0; j < client_count; j++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key),
+ "brick%d.client%d.hostname", i, j);
+ ret = dict_get_str (dict, key, &clientname);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key),
+ "brick%d.client%d.bytesread", i, j);
+ ret = dict_get_uint64 (dict, key, &bytesread);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key),
+ "brick%d.client%d.byteswrite", i, j);
+ ret = dict_get_uint64 (dict, key, &byteswrite);
+ if (ret)
+ goto out;
+
+ cli_out ("%-48s %15"PRIu64" %15"PRIu64,
+ clientname, bytesread, byteswrite);
+ }
+ }
+out:
+ cli_out ("----------------------------------------------\n");
+ return;
+}
+
+void
+cli_print_volume_status_inode_entry (dict_t *dict, char *prefix)
+{
+ int ret = -1;
+ char key[1024] = {0,};
+ char *gfid = NULL;
+ uint64_t nlookup = 0;
+ uint32_t ref = 0;
+ int ia_type = 0;
+ char inode_type;
+
+ GF_ASSERT (dict);
+ GF_ASSERT (prefix);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.gfid", prefix);
+ ret = dict_get_str (dict, key, &gfid);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.nlookup", prefix);
+ ret = dict_get_uint64 (dict, key, &nlookup);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.ref", prefix);
+ ret = dict_get_uint32 (dict, key, &ref);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.ia_type", prefix);
+ ret = dict_get_int32 (dict, key, &ia_type);
+ if (ret)
+ goto out;
+
+ switch (ia_type) {
+ case IA_IFREG:
+ inode_type = 'R';
+ break;
+ case IA_IFDIR:
+ inode_type = 'D';
+ break;
+ case IA_IFLNK:
+ inode_type = 'L';
+ break;
+ case IA_IFBLK:
+ inode_type = 'B';
+ break;
+ case IA_IFCHR:
+ inode_type = 'C';
+ break;
+ case IA_IFIFO:
+ inode_type = 'F';
+ break;
+ case IA_IFSOCK:
+ inode_type = 'S';
+ break;
+ default:
+ inode_type = 'I';
+ break;
+ }
+
+ cli_out ("%-40s %14"PRIu64" %14"PRIu32" %9c",
+ gfid, nlookup, ref, inode_type);
+
+out:
+ return;
+
+}
+
+void
+cli_print_volume_status_itables (dict_t *dict, char *prefix)
+{
+ int ret = -1;
+ char key[1024] = {0,};
+ uint32_t active_size = 0;
+ uint32_t lru_size = 0;
+ uint32_t purge_size = 0;
+ int i =0;
+
+ GF_ASSERT (dict);
+ GF_ASSERT (prefix);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.active_size", prefix);
+ ret = dict_get_uint32 (dict, key, &active_size);
+ if (ret)
+ goto out;
+ if (active_size != 0) {
+ cli_out ("Active inodes:");
+ cli_out ("%-40s %14s %14s %9s", "GFID", "Lookups", "Ref",
+ "IA type");
+ cli_out ("%-40s %14s %14s %9s", "----", "-------", "---",
+ "-------");
+ }
+ for (i = 0; i < active_size; i++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.active%d", prefix, i);
+ cli_print_volume_status_inode_entry (dict, key);
+ }
+ cli_out (" ");
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.lru_size", prefix);
+ ret = dict_get_uint32 (dict, key, &lru_size);
+ if (ret)
+ goto out;
+ if (lru_size != 0) {
+ cli_out ("LRU inodes:");
+ cli_out ("%-40s %14s %14s %9s", "GFID", "Lookups", "Ref",
+ "IA type");
+ cli_out ("%-40s %14s %14s %9s", "----", "-------", "---",
+ "-------");
+ }
+ for (i = 0; i < lru_size; i++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.lru%d", prefix, i);
+ cli_print_volume_status_inode_entry (dict, key);
+ }
+ cli_out (" ");
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.purge_size", prefix);
+ ret = dict_get_uint32 (dict, key, &purge_size);
+ if (ret)
+ goto out;
+ if (purge_size != 0) {
+ cli_out ("Purged inodes:");
+ cli_out ("%-40s %14s %14s %9s", "GFID", "Lookups", "Ref",
+ "IA type");
+ cli_out ("%-40s %14s %14s %9s", "----", "-------", "---",
+ "-------");
+ }
+ for (i = 0; i < purge_size; i++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.purge%d", prefix, i);
+ cli_print_volume_status_inode_entry (dict, key);
+ }
+
+out:
+ return;
+}
+
+void
+cli_print_volume_status_inode (dict_t *dict, gf_boolean_t notbrick)
+{
+ int ret = -1;
+ char *volname = NULL;
+ int brick_index_max = -1;
+ int other_count = 0;
+ int index_max = 0;
+ char *hostname = NULL;
+ char *path = NULL;
+ int online = -1;
+ int conn_count = 0;
+ char key[1024] = {0,};
+ int i = 0;
+ int j = 0;
+
+ GF_ASSERT (dict);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret)
+ goto out;
+ cli_out ("Inode tables for volume %s", volname);
+
+ ret = dict_get_int32 (dict, "brick-index-max", &brick_index_max);
+ if (ret)
+ goto out;
+ ret = dict_get_int32 (dict, "other-count", &other_count);
+ if (ret)
+ goto out;
+
+ index_max = brick_index_max + other_count;
+
+ for ( i = 0; i <= index_max; i++) {
+ cli_out ("----------------------------------------------");
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.hostname", i);
+ ret = dict_get_str (dict, key, &hostname);
+ if (ret)
+ goto out;
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.path", i);
+ ret = dict_get_str (dict, key, &path);
+ if (ret)
+ goto out;
+ if (notbrick)
+ cli_out ("%s : %s", hostname, path);
+ else
+ cli_out ("Brick : %s:%s", hostname, path);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.status", i);
+ ret = dict_get_int32 (dict, key, &online);
+ if (ret)
+ goto out;
+ if (!online) {
+ if (notbrick)
+ cli_out ("%s is offline", hostname);
+ else
+ cli_out ("Brick is offline");
+ continue;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.conncount", i);
+ ret = dict_get_int32 (dict, key, &conn_count);
+ if (ret)
+ goto out;
+
+ for (j = 0; j < conn_count; j++) {
+ if (conn_count > 1)
+ cli_out ("Connection %d:", j+1);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.conn%d.itable",
+ i, j);
+ cli_print_volume_status_itables (dict, key);
+ cli_out (" ");
+ }
+ }
+out:
+ cli_out ("----------------------------------------------");
+ return;
+}
+
+void
+cli_print_volume_status_fdtable (dict_t *dict, char *prefix)
+{
+ int ret = -1;
+ char key[1024] = {0,};
+ int refcount = 0;
+ uint32_t maxfds = 0;
+ int firstfree = 0;
+ int openfds = 0;
+ int fd_pid = 0;
+ int fd_refcount = 0;
+ int fd_flags = 0;
+ int i = 0;
+
+ GF_ASSERT (dict);
+ GF_ASSERT (prefix);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.refcount", prefix);
+ ret = dict_get_int32 (dict, key, &refcount);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.maxfds", prefix);
+ ret = dict_get_uint32 (dict, key, &maxfds);
+ if (ret)
+ goto out;
+
+ memset (key, 0 ,sizeof (key));
+ snprintf (key, sizeof (key), "%s.firstfree", prefix);
+ ret = dict_get_int32 (dict, key, &firstfree);
+ if (ret)
+ goto out;
+
+ cli_out ("RefCount = %d MaxFDs = %d FirstFree = %d",
+ refcount, maxfds, firstfree);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.openfds", prefix);
+ ret = dict_get_int32 (dict, key, &openfds);
+ if (ret)
+ goto out;
+ if (0 == openfds) {
+ cli_err ("No open fds");
+ goto out;
+ }
+
+ cli_out ("%-19s %-19s %-19s %-19s", "FD Entry", "PID",
+ "RefCount", "Flags");
+ cli_out ("%-19s %-19s %-19s %-19s", "--------", "---",
+ "--------", "-----");
+
+ for (i = 0; i < maxfds ; i++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.fdentry%d.pid", prefix, i);
+ ret = dict_get_int32 (dict, key, &fd_pid);
+ if (ret)
+ continue;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.fdentry%d.refcount",
+ prefix, i);
+ ret = dict_get_int32 (dict, key, &fd_refcount);
+ if (ret)
+ continue;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.fdentry%d.flags", prefix, i);
+ ret = dict_get_int32 (dict, key, &fd_flags);
+ if (ret)
+ continue;
+
+ cli_out ("%-19d %-19d %-19d %-19d", i, fd_pid, fd_refcount,
+ fd_flags);
+ }
+
+out:
+ return;
+}
+
+void
+cli_print_volume_status_fd (dict_t *dict, gf_boolean_t notbrick)
+{
+ int ret = -1;
+ char *volname = NULL;
+ int brick_index_max = -1;
+ int other_count = 0;
+ int index_max = 0;
+ char *hostname = NULL;
+ char *path = NULL;
+ int online = -1;
+ int conn_count = 0;
+ char key[1024] = {0,};
+ int i = 0;
+ int j = 0;
+
+ GF_ASSERT (dict);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret)
+ goto out;
+ cli_out ("FD tables for volume %s", volname);
+
+ ret = dict_get_int32 (dict, "brick-index-max", &brick_index_max);
+ if (ret)
+ goto out;
+ ret = dict_get_int32 (dict, "other-count", &other_count);
+ if (ret)
+ goto out;
+
+ index_max = brick_index_max + other_count;
+
+ for (i = 0; i <= index_max; i++) {
+ cli_out ("----------------------------------------------");
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.hostname", i);
+ ret = dict_get_str (dict, key, &hostname);
+ if (ret)
+ goto out;
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.path", i);
+ ret = dict_get_str (dict, key, &path);
+ if (ret)
+ goto out;
+
+ if (notbrick)
+ cli_out ("%s : %s", hostname, path);
+ else
+ cli_out ("Brick : %s:%s", hostname, path);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.status", i);
+ ret = dict_get_int32 (dict, key, &online);
+ if (ret)
+ goto out;
+ if (!online) {
+ if (notbrick)
+ cli_out ("%s is offline", hostname);
+ else
+ cli_out ("Brick is offline");
+ continue;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.conncount", i);
+ ret = dict_get_int32 (dict, key, &conn_count);
+ if (ret)
+ goto out;
+
+ for (j = 0; j < conn_count; j++) {
+ cli_out ("Connection %d:", j+1);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.conn%d.fdtable",
+ i, j);
+ cli_print_volume_status_fdtable (dict, key);
+ cli_out (" ");
+ }
+ }
+out:
+ cli_out ("----------------------------------------------");
+ return;
+}
+
+void
+cli_print_volume_status_call_frame (dict_t *dict, char *prefix)
+{
+ int ret = -1;
+ char key[1024] = {0,};
+ int ref_count = 0;
+ char *translator = 0;
+ int complete = 0;
+ char *parent = NULL;
+ char *wind_from = NULL;
+ char *wind_to = NULL;
+ char *unwind_from = NULL;
+ char *unwind_to = NULL;
+
+ if (!dict || !prefix)
+ return;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.refcount", prefix);
+ ret = dict_get_int32 (dict, key, &ref_count);
+ if (ret)
+ return;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.translator", prefix);
+ ret = dict_get_str (dict, key, &translator);
+ if (ret)
+ return;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.complete", prefix);
+ ret = dict_get_int32 (dict, key, &complete);
+ if (ret)
+ return;
+
+ cli_out (" Ref Count = %d", ref_count);
+ cli_out (" Translator = %s", translator);
+ cli_out (" Completed = %s", (complete ? "Yes" : "No"));
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.parent", prefix);
+ ret = dict_get_str (dict, key, &parent);
+ if (!ret)
+ cli_out (" Parent = %s", parent);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.windfrom", prefix);
+ ret = dict_get_str (dict, key, &wind_from);
+ if (!ret)
+ cli_out (" Wind From = %s", wind_from);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.windto", prefix);
+ ret = dict_get_str (dict, key, &wind_to);
+ if (!ret)
+ cli_out (" Wind To = %s", wind_to);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.unwindfrom", prefix);
+ ret = dict_get_str (dict, key, &unwind_from);
+ if (!ret)
+ cli_out (" Unwind From = %s", unwind_from);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.unwindto", prefix);
+ ret = dict_get_str (dict, key, &unwind_to);
+ if (!ret)
+ cli_out (" Unwind To = %s", unwind_to);
+}
+
+void
+cli_print_volume_status_call_stack (dict_t *dict, char *prefix)
+{
+ int ret = -1;
+ char key[1024] = {0,};
+ int uid = 0;
+ int gid = 0;
+ int pid = 0;
+ uint64_t unique = 0;
+ //char *op = NULL;
+ int count = 0;
+ int i = 0;
+
+ if (!dict || !prefix)
+ return;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.uid", prefix);
+ ret = dict_get_int32 (dict, key, &uid);
+ if (ret)
+ return;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.gid", prefix);
+ ret = dict_get_int32 (dict, key, &gid);
+ if (ret)
+ return;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.pid", prefix);
+ ret = dict_get_int32 (dict, key, &pid);
+ if (ret)
+ return;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.unique", prefix);
+ ret = dict_get_uint64 (dict, key, &unique);
+ if (ret)
+ return;
+
+ /*
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.op", prefix);
+ ret = dict_get_str (dict, key, &op);
+ if (ret)
+ return;
+ */
+
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.count", prefix);
+ ret = dict_get_int32 (dict, key, &count);
+ if (ret)
+ return;
+
+ cli_out (" UID : %d", uid);
+ cli_out (" GID : %d", gid);
+ cli_out (" PID : %d", pid);
+ cli_out (" Unique : %"PRIu64, unique);
+ //cli_out ("\tOp : %s", op);
+ cli_out (" Frames : %d", count);
+
+ for (i = 0; i < count; i++) {
+ cli_out (" Frame %d", i+1);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.frame%d", prefix, i);
+ cli_print_volume_status_call_frame (dict, key);
+ }
+
+ cli_out (" ");
+}
+
+void
+cli_print_volume_status_callpool (dict_t *dict, gf_boolean_t notbrick)
+{
+ int ret = -1;
+ char *volname = NULL;
+ int brick_index_max = -1;
+ int other_count = 0;
+ int index_max = 0;
+ char *hostname = NULL;
+ char *path = NULL;
+ int online = -1;
+ int call_count = 0;
+ char key[1024] = {0,};
+ int i = 0;
+ int j = 0;
+
+ GF_ASSERT (dict);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret)
+ goto out;
+ cli_out ("Pending calls for volume %s", volname);
+
+ ret = dict_get_int32 (dict, "brick-index-max", &brick_index_max);
+ if (ret)
+ goto out;
+ ret = dict_get_int32 (dict, "other-count", &other_count);
+ if (ret)
+ goto out;
+
+ index_max = brick_index_max + other_count;
+
+ for (i = 0; i <= index_max; i++) {
+ cli_out ("----------------------------------------------");
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.hostname", i);
+ ret = dict_get_str (dict, key, &hostname);
+ if (ret)
+ goto out;
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.path", i);
+ ret = dict_get_str (dict, key, &path);
+ if (ret)
+ goto out;
+
+ if (notbrick)
+ cli_out ("%s : %s", hostname, path);
+ else
+ cli_out ("Brick : %s:%s", hostname, path);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.status", i);
+ ret = dict_get_int32 (dict, key, &online);
+ if (ret)
+ goto out;
+ if (!online) {
+ if (notbrick)
+ cli_out ("%s is offline", hostname);
+ else
+ cli_out ("Brick is offline");
+ continue;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.callpool.count", i);
+ ret = dict_get_int32 (dict, key, &call_count);
+ if (ret)
+ goto out;
+ cli_out ("Pending calls: %d", call_count);
+
+ if (0 == call_count)
+ continue;
+
+ for (j = 0; j < call_count; j++) {
+ cli_out ("Call Stack%d", j+1);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key),
+ "brick%d.callpool.stack%d", i, j);
+ cli_print_volume_status_call_stack (dict, key);
+ }
+ }
+
+out:
+ cli_out ("----------------------------------------------");
+ return;
+}
+
+static void
+cli_print_volume_status_tasks (dict_t *dict)
+{
+ int ret = -1;
+ int i = 0;
+ int j = 0;
+ int count = 0;
+ int task_count = 0;
+ int status = 0;
+ char *op = NULL;
+ char *task_id_str = NULL;
+ char *volname = NULL;
+ char key[1024] = {0,};
+ char task[1024] = {0,};
+ char *brick = NULL;
+ char *src_brick = NULL;
+ char *dest_brick = NULL;
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret)
+ goto out;
+
+ ret = dict_get_int32 (dict, "tasks", &task_count);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to get tasks count");
+ return;
+ }
+
+ cli_out ("Task Status of Volume %s", volname);
+ cli_print_line (CLI_BRICK_STATUS_LINE_LEN);
+
+ if (task_count == 0) {
+ cli_out ("There are no active volume tasks");
+ cli_out (" ");
+ return;
+ }
+
+ for (i = 0; i < task_count; i++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "task%d.type", i);
+ ret = dict_get_str(dict, key, &op);
+ if (ret)
+ return;
+ cli_out ("%-20s : %-20s", "Task", op);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "task%d.id", i);
+ ret = dict_get_str (dict, key, &task_id_str);
+ if (ret)
+ return;
+ cli_out ("%-20s : %-20s", "ID", task_id_str);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "task%d.status", i);
+ ret = dict_get_int32 (dict, key, &status);
+ if (ret)
+ return;
+
+ snprintf (task, sizeof (task), "task%d", i);
+
+ /*
+ Replace brick only has two states - In progress and Complete
+ Ref: xlators/mgmt/glusterd/src/glusterd-replace-brick.c
+ */
+
+ if (!strcmp (op, "Replace brick")) {
+ if (status)
+ status = GF_DEFRAG_STATUS_COMPLETE;
+ else
+ status = GF_DEFRAG_STATUS_STARTED;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.src-brick", task);
+ ret = dict_get_str (dict, key, &src_brick);
+ if (ret)
+ goto out;
+
+ cli_out ("%-20s : %-20s", "Source Brick", src_brick);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.dst-brick", task);
+ ret = dict_get_str (dict, key, &dest_brick);
+ if (ret)
+ goto out;
+
+ cli_out ("%-20s : %-20s", "Destination Brick",
+ dest_brick);
+
+ } else if (!strcmp (op, "Remove brick")) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.count", task);
+ ret = dict_get_int32 (dict, key, &count);
+ if (ret)
+ goto out;
+
+ cli_out ("%-20s", "Removed bricks:");
+
+ for (j = 1; j <= count; j++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key),"%s.brick%d",
+ task, j);
+ ret = dict_get_str (dict, key, &brick);
+ if (ret)
+ goto out;
+
+ cli_out ("%-20s", brick);
+ }
+ }
+ cli_out ("%-20s : %-20s", "Status",
+ cli_vol_task_status_str[status]);
+ cli_out (" ");
+ }
+
+out:
+ return;
+}
+
+static int
+gf_cli_status_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int ret = -1;
+ int brick_index_max = -1;
+ int other_count = 0;
+ int index_max = 0;
+ int i = 0;
+ int pid = -1;
+ uint32_t cmd = 0;
+ gf_boolean_t notbrick = _gf_false;
+ char key[1024] = {0,};
+ char *hostname = NULL;
+ char *path = NULL;
+ char *volname = NULL;
+ dict_t *dict = NULL;
+ gf_cli_rsp rsp = {0,};
+ cli_volume_status_t status = {0};
+ cli_local_t *local = NULL;
+ gf_boolean_t wipe_local = _gf_false;
+ char msg[1024] = {0,};
+
+ if (req->rpc_status == -1)
+ goto out;
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+
+ gf_log ("cli", GF_LOG_DEBUG, "Received response to status cmd");
+
+ local = ((call_frame_t *)myframe)->local;
+ if (!local) {
+ local = cli_local_get ();
+ if (!local) {
+ ret = -1;
+ gf_log ("cli", GF_LOG_ERROR, "Failed to get local");
+ goto out;
+ }
+ wipe_local = _gf_true;
+ }
+
+ if (rsp.op_ret) {
+ if (strcmp (rsp.op_errstr, ""))
+ snprintf (msg, sizeof (msg), "%s", rsp.op_errstr);
+ else
+ snprintf (msg, sizeof (msg), "Unable to obtain volume "
+ "status information.");
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ if (!local->all)
+ cli_xml_output_str ("volStatus", msg,
+ rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ ret = 0;
+ goto out;
+ }
+
+ cli_err ("%s", msg);
+ if (local && local->all) {
+ ret = 0;
+ cli_out (" ");
+ } else
+ ret = -1;
+
+ goto out;
+ }
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ ret = dict_unserialize (rsp.dict.dict_val,
+ rsp.dict.dict_len,
+ &dict);
+ if (ret)
+ goto out;
+
+ ret = dict_get_uint32 (dict, "cmd", &cmd);
+ if (ret)
+ goto out;
+
+ if ((cmd & GF_CLI_STATUS_ALL)) {
+ if (local && local->dict) {
+ dict_ref (dict);
+ ret = dict_set_static_ptr (local->dict, "rsp-dict", dict);
+ ret = 0;
+ } else {
+ gf_log ("cli", GF_LOG_ERROR, "local not found");
+ ret = -1;
+ }
+ goto out;
+ }
+
+ if ((cmd & GF_CLI_STATUS_NFS) || (cmd & GF_CLI_STATUS_SHD))
+ notbrick = _gf_true;
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ if (!local->all) {
+ ret = cli_xml_output_vol_status_begin (local,
+ rsp.op_ret,
+ rsp.op_errno,
+ rsp.op_errstr);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+ }
+ if (cmd & GF_CLI_STATUS_TASKS) {
+ ret = cli_xml_output_vol_status_tasks_detail (local,
+ dict);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,"Error outputting "
+ "to xml");
+ goto out;
+ }
+ } else {
+ ret = cli_xml_output_vol_status (local, dict);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+ }
+
+ if (!local->all) {
+ ret = cli_xml_output_vol_status_end (local);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ }
+ }
+ goto out;
+ }
+
+ status.brick = GF_CALLOC (1, PATH_MAX + 256, gf_common_mt_strdup);
+
+ switch (cmd & GF_CLI_STATUS_MASK) {
+ case GF_CLI_STATUS_MEM:
+ cli_print_volume_status_mem (dict, notbrick);
+ goto cont;
+ break;
+ case GF_CLI_STATUS_CLIENTS:
+ cli_print_volume_status_clients (dict, notbrick);
+ goto cont;
+ break;
+ case GF_CLI_STATUS_INODE:
+ cli_print_volume_status_inode (dict, notbrick);
+ goto cont;
+ break;
+ case GF_CLI_STATUS_FD:
+ cli_print_volume_status_fd (dict, notbrick);
+ goto cont;
+ break;
+ case GF_CLI_STATUS_CALLPOOL:
+ cli_print_volume_status_callpool (dict, notbrick);
+ goto cont;
+ break;
+ case GF_CLI_STATUS_TASKS:
+ cli_print_volume_status_tasks (dict);
+ goto cont;
+ break;
+ default:
+ break;
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret)
+ goto out;
+
+ ret = dict_get_int32 (dict, "brick-index-max", &brick_index_max);
+ if (ret)
+ goto out;
+
+ ret = dict_get_int32 (dict, "other-count", &other_count);
+ if (ret)
+ goto out;
+
+ index_max = brick_index_max + other_count;
+
+
+ cli_out ("Status of volume: %s", volname);
+
+ if ((cmd & GF_CLI_STATUS_DETAIL) == 0) {
+ cli_out ("Gluster process\t\t\t\t\t\tPort\tOnline\tPid");
+ cli_print_line (CLI_BRICK_STATUS_LINE_LEN);
+ }
+
+ for (i = 0; i <= index_max; i++) {
+
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.hostname", i);
+ ret = dict_get_str (dict, key, &hostname);
+ if (ret)
+ continue;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.path", i);
+ ret = dict_get_str (dict, key, &path);
+ if (ret)
+ continue;
+
+ /* Brick/not-brick is handled seperately here as all
+ * types of nodes are contained in the default output
+ */
+ memset (status.brick, 0, PATH_MAX + 255);
+ if (!strcmp (hostname, "NFS Server") ||
+ !strcmp (hostname, "Self-heal Daemon"))
+ snprintf (status.brick, PATH_MAX + 255, "%s on %s",
+ hostname, path);
+ else
+ snprintf (status.brick, PATH_MAX + 255, "Brick %s:%s",
+ hostname, path);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.port", i);
+ ret = dict_get_int32 (dict, key, &(status.port));
+ if (ret)
+ continue;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.status", i);
+ ret = dict_get_int32 (dict, key, &(status.online));
+ if (ret)
+ continue;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.pid", i);
+ ret = dict_get_int32 (dict, key, &pid);
+ if (ret)
+ continue;
+ if (pid == -1)
+ ret = gf_asprintf (&(status.pid_str), "%s", "N/A");
+ else
+ ret = gf_asprintf (&(status.pid_str), "%d", pid);
+
+ if (ret == -1)
+ goto out;
+
+ if ((cmd & GF_CLI_STATUS_DETAIL)) {
+ ret = cli_get_detail_status (dict, i, &status);
+ if (ret)
+ goto out;
+ cli_print_line (CLI_BRICK_STATUS_LINE_LEN);
+ cli_print_detailed_status (&status);
+
+ } else {
+ cli_print_brick_status (&status);
+ }
+ }
+ cli_out (" ");
+
+ if ((cmd & GF_CLI_STATUS_MASK) == GF_CLI_STATUS_NONE)
+ cli_print_volume_status_tasks (dict);
+cont:
+ ret = rsp.op_ret;
+
+out:
+ if (dict)
+ dict_unref (dict);
+ GF_FREE (status.brick);
+ if (local && wipe_local) {
+ cli_local_wipe (local);
+ }
+
+ cli_cmd_broadcast_response (ret);
+ return ret;
+}
+
+int32_t
+gf_cli_status_volume (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gf_cli_req req = {{0,}};
+ int ret = -1;
+ dict_t *dict = NULL;
+
+ if (!frame || !this || !data)
+ goto out;
+
+ dict = data;
+
+ ret = cli_to_glusterd (&req, frame, gf_cli_status_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_STATUS_VOLUME, this, cli_rpc_prog,
+ NULL);
+ out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning: %d", ret);
+ return ret;
+}
+
+int
+gf_cli_status_volume_all (call_frame_t *frame, xlator_t *this, void *data)
+{
+ int i = 0;
+ int ret = -1;
+ int vol_count = -1;
+ uint32_t cmd = 0;
+ char key[1024] = {0};
+ char *volname = NULL;
+ void *vol_dict = NULL;
+ dict_t *dict = NULL;
+ cli_local_t *local = NULL;
+
+ if (frame->local) {
+ local = frame->local;
+ local->all = _gf_true;
+ } else
+ goto out;
+
+ ret = dict_get_uint32 (local->dict, "cmd", &cmd);
+ if (ret)
+ goto out;
+
+
+ ret = gf_cli_status_volume (frame, this, data);
+ if (ret)
+ goto out;
+
+ ret = dict_get_ptr (local->dict, "rsp-dict", &vol_dict);
+ if (ret)
+ goto out;
+
+ ret = dict_get_int32 ((dict_t *)vol_dict, "vol_count", &vol_count);
+ if (ret) {
+ cli_err ("Failed to get names of volumes");
+ goto out;
+ }
+
+ /* remove the "all" flag in cmd */
+ cmd &= ~GF_CLI_STATUS_ALL;
+ cmd |= GF_CLI_STATUS_VOL;
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ //TODO: Pass proper op_* values
+ ret = cli_xml_output_vol_status_begin (local, 0,0, NULL);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+ }
+
+ if (vol_count == 0 && !(global_state->mode & GLUSTER_MODE_XML)) {
+ cli_err ("No volumes present");
+ ret = 0;
+ goto out;
+ }
+
+ for (i = 0; i < vol_count; i++) {
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "vol%d", i);
+ ret = dict_get_str (vol_dict, key, &volname);
+ if (ret)
+ goto out;
+
+ ret = dict_set_str (dict, "volname", volname);
+ if (ret)
+ goto out;
+
+ ret = dict_set_uint32 (dict, "cmd", cmd);
+ if (ret)
+ goto out;
+
+ ret = gf_cli_status_volume (frame, this, dict);
+ if (ret)
+ goto out;
+
+ dict_unref (dict);
+ }
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_status_end (local);
+ }
+
+ out:
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR, "status all failed");
+
+ if (vol_dict)
+ dict_unref (vol_dict);
+
+ if (ret && dict)
+ dict_unref (dict);
+
+ if (local)
+ cli_local_wipe (local);
+
+ if (frame)
+ frame->local = NULL;
+
+ return ret;
+}
+
+static int
+gf_cli_mount_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gf1_cli_mount_rsp rsp = {0,};
+ int ret = -1;
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_mount_rsp);
+ if (ret < 0) {
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+
+ gf_log ("cli", GF_LOG_INFO, "Received resp to mount");
+
+ if (rsp.op_ret == 0) {
+ ret = 0;
+ cli_out ("%s", rsp.path);
+ } else {
+ /* weird sounding but easy to parse... */
+ cli_err ("%d : failed with this errno (%s)",
+ rsp.op_errno, strerror (rsp.op_errno));
+ ret = -1;
+ }
+
+out:
+ cli_cmd_broadcast_response (ret);
+ return ret;
+}
+
+int32_t
+gf_cli_mount (call_frame_t *frame, xlator_t *this, void *data)
+{
+ gf1_cli_mount_req req = {0,};
+ int ret = -1;
+ void **dataa = data;
+ char *label = NULL;
+ dict_t *dict = NULL;
+
+ if (!frame || !this || !data)
+ goto out;
+
+ label = dataa[0];
+ dict = dataa[1];
+
+ req.label = label;
+ ret = dict_allocate_and_serialize (dict, &req.dict.dict_val,
+ &req.dict.dict_len);
+ if (ret) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
+ GLUSTER_CLI_MOUNT, NULL,
+ this, gf_cli_mount_cbk,
+ (xdrproc_t)xdr_gf1_cli_mount_req);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+static int
+gf_cli_umount_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gf1_cli_umount_rsp rsp = {0,};
+ int ret = -1;
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_umount_rsp);
+ if (ret < 0) {
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+
+ gf_log ("cli", GF_LOG_INFO, "Received resp to mount");
+
+ if (rsp.op_ret == 0)
+ ret = 0;
+ else {
+ cli_err ("umount failed");
+ ret = -1;
+ }
+
+out:
+ cli_cmd_broadcast_response (ret);
+ return ret;
+}
+
+int32_t
+gf_cli_umount (call_frame_t *frame, xlator_t *this, void *data)
+{
+ gf1_cli_umount_req req = {0,};
+ int ret = -1;
+ dict_t *dict = NULL;
+
+ if (!frame || !this || !data)
+ goto out;
+
+ dict = data;
+
+ ret = dict_get_str (dict, "path", &req.path);
+ if (ret == 0)
+ ret = dict_get_int32 (dict, "lazy", &req.lazy);
+
+ if (ret) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
+ GLUSTER_CLI_UMOUNT, NULL,
+ this, gf_cli_umount_cbk,
+ (xdrproc_t)xdr_gf1_cli_umount_req);
+
+ out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+void
+cmd_heal_volume_statistics_out (dict_t *dict, int brick)
+{
+
+ uint64_t num_entries = 0;
+ int ret = 0;
+ char key[256] = {0};
+ char *hostname = NULL;
+ uint64_t i = 0;
+ uint64_t healed_count = 0;
+ uint64_t split_brain_count = 0;
+ uint64_t heal_failed_count = 0;
+ char *start_time_str = NULL;
+ char *end_time_str = NULL;
+ char *crawl_type = NULL;
+ int progress = -1;
+
+ snprintf (key, sizeof key, "%d-hostname", brick);
+ ret = dict_get_str (dict, key, &hostname);
+ if (ret)
+ goto out;
+ cli_out ("------------------------------------------------");
+ cli_out ("\nCrawl statistics for brick no %d", brick);
+ cli_out ("Hostname of brick %s", hostname);
+
+ snprintf (key, sizeof key, "statistics-%d-count", brick);
+ ret = dict_get_uint64 (dict, key, &num_entries);
+ if (ret)
+ goto out;
+
+ for (i = 0; i < num_entries; i++)
+ {
+ snprintf (key, sizeof key, "statistics_crawl_type-%d-%"PRIu64,
+ brick, i);
+ ret = dict_get_str (dict, key, &crawl_type);
+ if (ret)
+ goto out;
+
+ snprintf (key, sizeof key, "statistics_healed_cnt-%d-%"PRIu64,
+ brick,i);
+ ret = dict_get_uint64 (dict, key, &healed_count);
+ if (ret)
+ goto out;
+
+ snprintf (key, sizeof key, "statistics_sb_cnt-%d-%"PRIu64,
+ brick, i);
+ ret = dict_get_uint64 (dict, key, &split_brain_count);
+ if (ret)
+ goto out;
+ snprintf (key, sizeof key, "statistics_heal_failed_cnt-%d-%"PRIu64,
+ brick, i);
+ ret = dict_get_uint64 (dict, key, &heal_failed_count);
+ if (ret)
+ goto out;
+ snprintf (key, sizeof key, "statistics_strt_time-%d-%"PRIu64,
+ brick, i);
+ ret = dict_get_str (dict, key, &start_time_str);
+ if (ret)
+ goto out;
+ snprintf (key, sizeof key, "statistics_end_time-%d-%"PRIu64,
+ brick, i);
+ ret = dict_get_str (dict, key, &end_time_str);
+ if (ret)
+ goto out;
+ snprintf (key, sizeof key, "statistics_inprogress-%d-%"PRIu64,
+ brick, i);
+ ret = dict_get_int32 (dict, key, &progress);
+ if (ret)
+ goto out;
+
+ cli_out ("\nStarting time of crawl: %s", start_time_str);
+ if (progress == 1)
+ cli_out ("Crawl is in progress");
+ else
+ cli_out ("Ending time of crawl: %s", end_time_str);
+
+ cli_out ("Type of crawl: %s", crawl_type);
+ cli_out ("No. of entries healed: %"PRIu64,
+ healed_count);
+ cli_out ("No. of entries in split-brain: %"PRIu64,
+ split_brain_count);
+ cli_out ("No. of heal failed entries: %"PRIu64,
+ heal_failed_count);
+
+ }
+
+
+out:
+ return;
+}
+
+void
+cmd_heal_volume_brick_out (dict_t *dict, int brick)
+{
+ uint64_t num_entries = 0;
+ int ret = 0;
+ char key[256] = {0};
+ char *hostname = NULL;
+ char *path = NULL;
+ char *status = NULL;
+ uint64_t i = 0;
+ uint32_t time = 0;
+ char timestr[32] = {0};
+ char *shd_status = NULL;
+
+ snprintf (key, sizeof key, "%d-hostname", brick);
+ ret = dict_get_str (dict, key, &hostname);
+ if (ret)
+ goto out;
+ snprintf (key, sizeof key, "%d-path", brick);
+ ret = dict_get_str (dict, key, &path);
+ if (ret)
+ goto out;
+ cli_out ("\nBrick %s:%s", hostname, path);
+
+ snprintf (key, sizeof key, "%d-status", brick);
+ ret = dict_get_str (dict, key, &status);
+ if (status && strlen (status))
+ cli_out ("Status: %s", status);
+
+ snprintf (key, sizeof key, "%d-shd-status",brick);
+ ret = dict_get_str (dict, key, &shd_status);
+
+ if(!shd_status)
+ {
+ snprintf (key, sizeof key, "%d-count", brick);
+ ret = dict_get_uint64 (dict, key, &num_entries);
+ cli_out ("Number of entries: %"PRIu64, num_entries);
+
+
+ for (i = 0; i < num_entries; i++) {
+ snprintf (key, sizeof key, "%d-%"PRIu64, brick, i);
+ ret = dict_get_str (dict, key, &path);
+ if (ret)
+ continue;
+ time = 0;
+ snprintf (key, sizeof key, "%d-%"PRIu64"-time",
+ brick, i);
+ ret = dict_get_uint32 (dict, key, &time);
+ if (!time) {
+ cli_out ("%s", path);
+ } else {
+ gf_time_fmt (timestr, sizeof timestr,
+ time, gf_timefmt_FT);
+ if (i == 0) {
+ cli_out ("at path on brick");
+ cli_out ("-----------------------------------");
+ }
+ cli_out ("%s %s", timestr, path);
+ }
+ }
+ }
+
+out:
+ return;
+}
+
+
+void
+cmd_heal_volume_statistics_heal_count_out (dict_t *dict, int brick)
+{
+ uint64_t num_entries = 0;
+ int ret = 0;
+ char key[256] = {0};
+ char *hostname = NULL;
+ char *path = NULL;
+ char *status = NULL;
+ char *shd_status = NULL;
+
+ snprintf (key, sizeof key, "%d-hostname", brick);
+ ret = dict_get_str (dict, key, &hostname);
+ if (ret)
+ goto out;
+ snprintf (key, sizeof key, "%d-path", brick);
+ ret = dict_get_str (dict, key, &path);
+ if (ret)
+ goto out;
+ cli_out ("\nBrick %s:%s", hostname, path);
+
+ snprintf (key, sizeof key, "%d-status", brick);
+ ret = dict_get_str (dict, key, &status);
+ if (status && strlen (status))
+ cli_out ("Status: %s", status);
+
+ snprintf (key, sizeof key, "%d-shd-status",brick);
+ ret = dict_get_str (dict, key, &shd_status);
+
+ if(!shd_status)
+ {
+ snprintf (key, sizeof key, "%d-hardlinks", brick);
+ ret = dict_get_uint64 (dict, key, &num_entries);
+ if (ret)
+ cli_out ("No gathered input for this brick");
+ else
+ cli_out ("Number of entries: %"PRIu64, num_entries);
+
+
+ }
+
+out:
+ return;
+}
+
+
+int
+gf_cli_heal_volume_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gf_cli_rsp rsp = {0,};
+ int ret = -1;
+ cli_local_t *local = NULL;
+ char *volname = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *input_dict = NULL;
+ dict_t *dict = NULL;
+ int brick_count = 0;
+ int i = 0;
+ gf_xl_afr_op_t heal_op = GF_AFR_OP_INVALID;
+ char *operation = NULL;
+ char *substr = NULL;
+ char *heal_op_str = NULL;
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ frame = myframe;
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+
+ if (frame)
+ local = frame->local;
+
+ if (local) {
+ input_dict = local->dict;
+ ret = dict_get_int32 (input_dict, "heal-op",
+ (int32_t*)&heal_op);
+ }
+//TODO: Proper XML output
+//#if (HAVE_LIB_XML)
+// if (global_state->mode & GLUSTER_MODE_XML) {
+// ret = cli_xml_output_dict ("volHeal", dict, rsp.op_ret,
+// rsp.op_errno, rsp.op_errstr);
+// if (ret)
+// gf_log ("cli", GF_LOG_ERROR,
+// "Error outputting to xml");
+// goto out;
+// }
+//#endif
+
+ ret = dict_get_str (input_dict, "volname", &volname);
+ if (ret) {
+ gf_log (frame->this->name, GF_LOG_ERROR, "failed to get volname");
+ goto out;
+ }
+
+ gf_log ("cli", GF_LOG_INFO, "Received resp to heal volume");
+
+ switch (heal_op) {
+ case GF_AFR_OP_HEAL_INDEX:
+ heal_op_str = "to perform index self heal";
+ break;
+ case GF_AFR_OP_HEAL_FULL:
+ heal_op_str = "to perform full self heal";
+ break;
+ case GF_AFR_OP_INDEX_SUMMARY:
+ heal_op_str = "list of entries to be healed";
+ break;
+ case GF_AFR_OP_HEALED_FILES:
+ heal_op_str = "list of healed entries";
+ break;
+ case GF_AFR_OP_HEAL_FAILED_FILES:
+ heal_op_str = "list of heal failed entries";
+ break;
+ case GF_AFR_OP_SPLIT_BRAIN_FILES:
+ heal_op_str = "list of split brain entries";
+ break;
+ case GF_AFR_OP_STATISTICS:
+ heal_op_str = "crawl statistics";
+ break;
+ case GF_AFR_OP_STATISTICS_HEAL_COUNT:
+ heal_op_str = "count of entries to be healed";
+ break;
+ case GF_AFR_OP_STATISTICS_HEAL_COUNT_PER_REPLICA:
+ heal_op_str = "count of entries to be healed per replica";
+ break;
+ case GF_AFR_OP_INVALID:
+ heal_op_str = "invalid heal op";
+ break;
+ }
+
+ if ((heal_op == GF_AFR_OP_HEAL_FULL) ||
+ (heal_op == GF_AFR_OP_HEAL_INDEX)) {
+ operation = "Launching heal operation";
+ substr = "\nUse heal info commands to check status";
+ } else {
+ operation = "Gathering";
+ substr = "";
+ }
+
+ if (rsp.op_ret) {
+ if (strcmp (rsp.op_errstr, "")) {
+ cli_err ("%s", rsp.op_errstr);
+ } else {
+ cli_err ("%s %s on volume %s has been unsuccessful",
+ operation, heal_op_str, volname);
+ }
+
+ ret = rsp.op_ret;
+ goto out;
+ } else {
+ cli_out ("%s %s on volume %s has been successful %s", operation,
+ heal_op_str, volname, substr);
+ }
+
+ ret = rsp.op_ret;
+ if ((heal_op == GF_AFR_OP_HEAL_FULL) ||
+ (heal_op == GF_AFR_OP_HEAL_INDEX))
+ goto out;
+
+ dict = dict_new ();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize (rsp.dict.dict_val,
+ rsp.dict.dict_len,
+ &dict);
+
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to allocate memory");
+ goto out;
+ } else {
+ dict->extra_stdfree = rsp.dict.dict_val;
+ }
+ ret = dict_get_int32 (dict, "count", &brick_count);
+ if (ret)
+ goto out;
+
+ if (!brick_count) {
+ cli_err ("All bricks of volume %s are down.", volname);
+ goto out;
+ }
+
+ switch (heal_op) {
+ case GF_AFR_OP_STATISTICS:
+ for (i = 0; i < brick_count; i++)
+ cmd_heal_volume_statistics_out (dict, i);
+ break;
+ case GF_AFR_OP_STATISTICS_HEAL_COUNT:
+ case GF_AFR_OP_STATISTICS_HEAL_COUNT_PER_REPLICA:
+ for (i = 0; i < brick_count; i++)
+ cmd_heal_volume_statistics_heal_count_out (dict,
+ i);
+ break;
+ case GF_AFR_OP_INDEX_SUMMARY:
+ case GF_AFR_OP_HEALED_FILES:
+ case GF_AFR_OP_HEAL_FAILED_FILES:
+ case GF_AFR_OP_SPLIT_BRAIN_FILES:
+ for (i = 0; i < brick_count; i++)
+ cmd_heal_volume_brick_out (dict, i);
+ break;
+ default:
+ break;
+ }
+
+ ret = rsp.op_ret;
+
+out:
+ cli_cmd_broadcast_response (ret);
+ free (rsp.op_errstr);
+ if (dict)
+ dict_unref (dict);
+ return ret;
+}
+
+int32_t
+gf_cli_heal_volume (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gf_cli_req req = {{0,}};
+ int ret = 0;
+ dict_t *dict = NULL;
+
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
+ }
+
+ dict = data;
+
+ ret = cli_to_glusterd (&req, frame, gf_cli_heal_volume_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_HEAL_VOLUME, this, cli_rpc_prog,
+ NULL);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+
+ GF_FREE (req.dict.dict_val);
+
+ return ret;
+}
+
+int32_t
+gf_cli_statedump_volume_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gf_cli_rsp rsp = {0,};
+ int ret = -1;
+ char msg[1024] = {0,};
+
+ if (-1 == req->rpc_status)
+ goto out;
+ ret = xdr_to_generic (*iov, &rsp,
+ (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+ gf_log ("cli", GF_LOG_DEBUG, "Received response to statedump");
+ if (rsp.op_ret)
+ snprintf (msg, sizeof(msg), "%s", rsp.op_errstr);
+ else
+ snprintf (msg, sizeof (msg), "Volume statedump successful");
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_str ("volStatedump", msg, rsp.op_ret,
+ rsp.op_errno, rsp.op_errstr);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+
+ if (rsp.op_ret)
+ cli_err ("volume statedump: failed: %s", msg);
+ else
+ cli_out ("volume statedump: success");
+ ret = rsp.op_ret;
+
+out:
+ cli_cmd_broadcast_response (ret);
+ return ret;
+}
+
+int32_t
+gf_cli_statedump_volume (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gf_cli_req req = {{0,}};
+ dict_t *options = NULL;
+ int ret = -1;
+
+ if (!frame || !this || !data)
+ goto out;
+
+ options = data;
+
+ ret = cli_to_glusterd (&req, frame, gf_cli_statedump_volume_cbk,
+ (xdrproc_t) xdr_gf_cli_req, options,
+ GLUSTER_CLI_STATEDUMP_VOLUME, this, cli_rpc_prog,
+ NULL);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+
+ GF_FREE (req.dict.dict_val);
+ return ret;
+}
+
+int32_t
+gf_cli_list_volume_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int ret = -1;
+ gf_cli_rsp rsp = {0,};
+ dict_t *dict = NULL;
+ int vol_count = 0;;
+ char *volname = NULL;
+ char key[1024] = {0,};
+ int i = 0;
+
+ if (-1 == req->rpc_status)
+ goto out;
+ ret = xdr_to_generic (*iov, &rsp,
+ (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+
+ dict = dict_new ();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize (rsp.dict.dict_val, rsp.dict.dict_len, &dict);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Unable to allocate memory");
+ goto out;
+ }
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_list (dict, rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+
+ if (rsp.op_ret)
+ cli_err ("%s", rsp.op_errstr);
+ else {
+ ret = dict_get_int32 (dict, "count", &vol_count);
+ if (ret)
+ goto out;
+
+ if (vol_count == 0) {
+ cli_err ("No volumes present in cluster");
+ goto out;
+ }
+ for (i = 0; i < vol_count; i++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d", i);
+ ret = dict_get_str (dict, key, &volname);
+ if (ret)
+ goto out;
+ cli_out ("%s", volname);
+ }
+ }
+
+ ret = rsp.op_ret;
+
+out:
+ cli_cmd_broadcast_response (ret);
+ return ret;
+}
+
+int32_t
+gf_cli_list_volume (call_frame_t *frame, xlator_t *this, void *data)
+{
+ int ret = -1;
+ gf_cli_req req = {{0,}};
+
+ if (!frame || !this)
+ goto out;
+
+ ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
+ GLUSTER_CLI_LIST_VOLUME, NULL,
+ this, gf_cli_list_volume_cbk,
+ (xdrproc_t)xdr_gf_cli_req);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+gf_cli_clearlocks_volume_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gf_cli_rsp rsp = {0,};
+ int ret = -1;
+ char *lk_summary = NULL;
+ char *volname = NULL;
+ dict_t *dict = NULL;
+
+ if (-1 == req->rpc_status)
+ goto out;
+ ret = xdr_to_generic (*iov, &rsp,
+ (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+ gf_log ("cli", GF_LOG_DEBUG, "Received response to clear-locks");
+
+ if (rsp.op_ret) {
+ cli_err ("Volume clear-locks unsuccessful");
+ cli_err ("%s", rsp.op_errstr);
+
+ } else {
+ if (!rsp.dict.dict_len) {
+ cli_err ("Possibly no locks cleared");
+ ret = 0;
+ goto out;
+ }
+
+ dict = dict_new ();
+
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize (rsp.dict.dict_val,
+ rsp.dict.dict_len,
+ &dict);
+
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Unable to serialize response dictionary");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Unable to get volname "
+ "from dictionary");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "lk-summary", &lk_summary);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Unable to get lock "
+ "summary from dictionary");
+ goto out;
+ }
+ cli_out ("Volume clear-locks successful");
+ cli_out ("%s", lk_summary);
+
+ }
+
+ ret = rsp.op_ret;
+
+out:
+ if (dict)
+ dict_unref (dict);
+ cli_cmd_broadcast_response (ret);
+ return ret;
+}
+
+int32_t
+gf_cli_clearlocks_volume (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gf_cli_req req = {{0,}};
+ dict_t *options = NULL;
+ int ret = -1;
+
+ if (!frame || !this || !data)
+ goto out;
+
+ options = data;
+
+ ret = cli_to_glusterd (&req, frame, gf_cli_clearlocks_volume_cbk,
+ (xdrproc_t) xdr_gf_cli_req, options,
+ GLUSTER_CLI_CLRLOCKS_VOLUME, this, cli_rpc_prog,
+ NULL);
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+
+ GF_FREE (req.dict.dict_val);
+ return ret;
+}
+
+int32_t
+cli_snapshot_remove_reply (gf_cli_rsp *rsp, dict_t *dict, call_frame_t *frame)
+{
+ int32_t ret = -1;
+ char *snap_name = NULL;
+
+ GF_ASSERT (rsp);
+ GF_ASSERT (dict);
+ GF_ASSERT (frame);
+
+ if (rsp->op_ret) {
+ cli_err("snapshot delete: failed: %s",
+ rsp->op_errstr ? rsp->op_errstr :
+ "Please check log file for details");
+ ret = rsp->op_ret;
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "snapname", &snap_name);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to get snapname");
+ goto out;
+ }
+
+ cli_out ("snapshot delete: %s: snap removed successfully",
+ snap_name);
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int
+cli_snapshot_config_display (dict_t *dict, gf_cli_rsp *rsp)
+{
+ char buf[PATH_MAX] = "";
+ char *volname = NULL;
+ int ret = -1;
+ int config_command = 0;
+ uint64_t value = 0;
+ uint64_t hard_limit = 0;
+ uint64_t soft_limit = 0;
+ uint64_t i = 0;
+ uint64_t voldisplaycount = 0;
+
+ GF_ASSERT (dict);
+ GF_ASSERT (rsp);
+
+ if (rsp->op_ret) {
+ cli_err ("Snapshot Config : failed: %s",
+ rsp->op_errstr ? rsp->op_errstr :
+ "Please check log file for details");
+ ret = rsp->op_ret;
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "config-command", &config_command);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not fetch config type");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ /* Ignore the error, as volname is optional */
+
+ if (!volname) {
+ volname = "System";
+ }
+
+ ret = dict_get_uint64 (dict, "snap-max-hard-limit", &hard_limit);
+ /* Ignore the error, as the key specified is optional */
+ ret = dict_get_uint64 (dict, "snap-max-soft-limit", &soft_limit);
+
+ if (!hard_limit && !soft_limit
+ && config_command != GF_SNAP_CONFIG_DISPLAY) {
+ ret = -1;
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "Could not fetch config-key");
+ goto out;
+ }
+
+ switch (config_command) {
+ case GF_SNAP_CONFIG_TYPE_SET:
+ if (hard_limit && soft_limit) {
+ cli_out ("snapshot config: snap-max-hard-limit "
+ "& snap-max-soft-limit for system set "
+ "successfully");
+ } else if (hard_limit){
+ cli_out ("snapshot config: %s "
+ "for snap-max-hard-limit set successfully",
+ volname);
+ } else if (soft_limit) {
+ cli_out ("snapshot config: %s "
+ "for snap-max-soft-limit set successfully",
+ volname);
+ }
+ break;
+
+ case GF_SNAP_CONFIG_DISPLAY :
+ cli_out ("\nSnapshot System Configuration:");
+ ret = dict_get_uint64 (dict, "snap-max-hard-limit",
+ &value);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not fetch "
+ "snap_max_hard_limit for %s", volname);
+ ret = -1;
+ goto out;
+ }
+ cli_out ("snap-max-hard-limit : %"PRIu64, value);
+
+ ret = dict_get_uint64 (dict, "snap-max-soft-limit",
+ &soft_limit);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not fetch "
+ "snap-max-soft-limit for %s", volname);
+ ret = -1;
+ goto out;
+ }
+ cli_out ("snap-max-soft-limit : %"PRIu64"%%\n",
+ soft_limit);
+
+ cli_out ("Snapshot Volume Configuration:");
+
+ ret = dict_get_uint64 (dict, "voldisplaycount",
+ &voldisplaycount);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Could not fetch voldisplaycount");
+ ret = -1;
+ goto out;
+ }
+
+ for (i = 0; i < voldisplaycount; i++) {
+ snprintf (buf, sizeof(buf), "volume%ld-volname", i);
+ ret = dict_get_str (dict, buf, &volname);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not fetch "
+ " %s", buf);
+ ret = -1;
+ goto out;
+ }
+ cli_out ("\nVolume : %s", volname);
+
+ snprintf (buf, sizeof(buf),
+ "volume%ld-snap-max-hard-limit", i);
+ ret = dict_get_uint64 (dict, buf, &value);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not fetch "
+ " %s", buf);
+ ret = -1;
+ goto out;
+ }
+ cli_out ("snap-max-hard-limit : %"PRIu64, value);
+
+ snprintf (buf, sizeof(buf),
+ "volume%ld-active-hard-limit", i);
+ ret = dict_get_uint64 (dict, buf, &value);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not fetch"
+ " effective snap_max_hard_limit for "
+ "%s", volname);
+ ret = -1;
+ goto out;
+ }
+ cli_out ("Effective snap-max-hard-limit : %"PRIu64,
+ value);
+
+ snprintf (buf, sizeof(buf),
+ "volume%ld-snap-max-soft-limit", i);
+ ret = dict_get_uint64 (dict, buf, &value);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not fetch "
+ " %s", buf);
+ ret = -1;
+ goto out;
+ }
+ cli_out ("Effective snap-max-soft-limit : %"PRIu64" "
+ "(%"PRIu64"%%)", value, soft_limit);
+ }
+ break;
+ default :
+ break;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/* This function is used to print the volume related information
+ * of a snap.
+ *
+ * arg - 0, dict : Response Dictionary.
+ * arg - 1, prefix str : snaplist.snap{0..}.vol{0..}.*
+ */
+int
+cli_get_each_volinfo_in_snap (dict_t *dict, char *keyprefix,
+ gf_boolean_t snap_driven) {
+ char key[PATH_MAX] = "";
+ char *get_buffer = NULL;
+ int value = 0;
+ int ret = -1;
+ char indent[5] = "\t";
+ char *volname = NULL;
+
+ GF_ASSERT (dict);
+ GF_ASSERT (keyprefix);
+
+ if (snap_driven) {
+ ret = snprintf (key, sizeof (key), "%s.volname", keyprefix);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_str (dict, key, &get_buffer);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to get %s", key);
+ goto out;
+ }
+ cli_out ("%s" INDENT_MAIN_HEAD "%s", indent,
+ "Snap Volume Name", ":", get_buffer);
+
+ ret = snprintf (key, sizeof (key),
+ "%s.origin-volname", keyprefix);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_str (dict, key, &volname);
+ if (ret) {
+ gf_log ("cli", GF_LOG_WARNING, "Failed to get %s", key);
+ cli_out ("%-12s", "Origin:");
+ }
+ cli_out ("%s" INDENT_MAIN_HEAD "%s", indent,
+ "Origin Volume name", ":", volname);
+
+
+ ret = snprintf (key, sizeof (key), "%s.snapcount",
+ keyprefix);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, key, &value);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to get %s", key);
+ goto out;
+ }
+ cli_out ("%s%s %s %s %d", indent, "Snaps taken for",
+ volname, ":", value);
+
+ ret = snprintf (key, sizeof (key), "%s.snaps-available",
+ keyprefix);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, key, &value);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to get %s", key);
+ goto out;
+ }
+ cli_out ("%s%s %s %s %d", indent, "Snaps available for",
+ volname, ":", value);
+ }
+
+
+ ret = snprintf (key, sizeof (key), "%s.vol-status", keyprefix);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_str (dict, key, &get_buffer);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to get %s", key);
+ goto out;
+ }
+ cli_out ("%s" INDENT_MAIN_HEAD "%s", indent, "Status",
+ ":", get_buffer);
+out :
+ return ret;
+}
+
+/* This function is used to print snap related information
+ * arg - 0, dict : Response dictionary.
+ * arg - 1, prefix_str : snaplist.snap{0..}.*
+ */
+int
+cli_get_volinfo_in_snap (dict_t *dict, char *keyprefix) {
+
+ char key[PATH_MAX] = "";
+ int i = 0;
+ int volcount = 0;
+ int ret = -1;
+
+ GF_ASSERT (dict);
+ GF_ASSERT (keyprefix);
+
+ ret = snprintf (key, sizeof (key), "%s.vol-count", keyprefix);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, key, &volcount);
+ for (i = 1 ; i <= volcount ; i++) {
+ ret = snprintf (key, sizeof (key),
+ "%s.vol%d", keyprefix, i);
+ if (ret < 0) {
+ goto out;
+ }
+ ret = cli_get_each_volinfo_in_snap (dict, key, _gf_true);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not list "
+ "details of volume in a snap");
+ goto out;
+ }
+ cli_out (" ");
+ }
+
+out :
+ return ret;
+}
+
+int
+cli_get_each_snap_info (dict_t *dict, char *prefix_str,
+ gf_boolean_t snap_driven) {
+ char key_buffer[PATH_MAX] = "";
+ char *get_buffer = NULL;
+ int ret = -1;
+ char indent[5] = "";
+
+ GF_ASSERT (dict);
+ GF_ASSERT (prefix_str);
+
+ if (!snap_driven)
+ strcat (indent, "\t");
+
+ ret = snprintf (key_buffer, sizeof (key_buffer), "%s.snapname",
+ prefix_str);
+ if (ret < 0 ) {
+ goto out;
+ }
+
+ ret = dict_get_str (dict, key_buffer, &get_buffer);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Unable to fetch snapname %s ",
+ key_buffer);
+ goto out;
+ }
+ cli_out ("%s" INDENT_MAIN_HEAD "%s", indent, "Snapshot",
+ ":", get_buffer);
+
+ ret = snprintf (key_buffer, sizeof (key_buffer), "%s.snap-id",
+ prefix_str);
+ if (ret < 0 ) {
+ goto out;
+ }
+
+ ret = dict_get_str (dict, key_buffer, &get_buffer);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Unable to fetch snap-id %s ",
+ key_buffer);
+ goto out;
+ }
+ cli_out ("%s" INDENT_MAIN_HEAD "%s", indent, "Snap UUID",
+ ":", get_buffer);
+
+ ret = snprintf (key_buffer, sizeof (key_buffer), "%s.snap-desc",
+ prefix_str);
+ if (ret < 0 ) {
+ goto out;
+ }
+
+ ret = dict_get_str (dict, key_buffer, &get_buffer);
+ if (!ret) {
+ /* Ignore error for description */
+ cli_out ("%s" INDENT_MAIN_HEAD "%s", indent,
+ "Description", ":", get_buffer);
+ }
+
+ ret = snprintf (key_buffer, sizeof (key_buffer), "%s.snap-time",
+ prefix_str);
+ if (ret < 0 ) {
+ goto out;
+ }
+
+ ret = dict_get_str (dict, key_buffer, &get_buffer);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Unable to fetch snap-time %s ",
+ prefix_str);
+ goto out;
+ }
+ cli_out ("%s" INDENT_MAIN_HEAD "%s", indent, "Created",
+ ":", get_buffer);
+
+ if (snap_driven) {
+ cli_out ("%-12s", "Snap Volumes:\n");
+ ret = cli_get_volinfo_in_snap (dict, prefix_str);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Unable to list details "
+ "of the snaps");
+ goto out;
+ }
+ }
+out :
+ return ret;
+}
+
+/* This is a generic function to print snap related information.
+ * arg - 0, dict : Response Dictionary
+ */
+int
+cli_call_snapshot_info (dict_t *dict, gf_boolean_t bool_snap_driven) {
+ int snap_count = 0;
+ char key[PATH_MAX] = "";
+ int ret = -1;
+ int i = 0;
+
+ GF_ASSERT (dict);
+
+ ret = dict_get_int32 (dict, "snap-count", &snap_count);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Unable to get snap-count");
+ goto out;
+ }
+
+ if (snap_count == 0) {
+ cli_out ("No snapshots present");
+ }
+
+ for (i = 1 ; i <= snap_count ; i++) {
+ ret = snprintf (key, sizeof (key), "snap%d", i);
+ if (ret < 0) {
+ goto out;
+ }
+ ret = cli_get_each_snap_info (dict, key, bool_snap_driven);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Unable to print snap details");
+ goto out;
+ }
+ }
+out :
+ return ret;
+}
+
+int
+cli_get_snaps_in_volume (dict_t *dict) {
+ int ret = -1;
+ int i = 0;
+ int count = 0;
+ int avail = 0;
+ char key[PATH_MAX] = "";
+ char *get_buffer = NULL;
+
+ GF_ASSERT (dict);
+
+ ret = dict_get_str (dict, "origin-volname", &get_buffer);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not fetch origin-volname");
+ goto out;
+ }
+ cli_out (INDENT_MAIN_HEAD "%s", "Volume Name", ":", get_buffer);
+
+ ret = dict_get_int32 (dict, "snap-count", &avail);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not fetch snap-count");
+ goto out;
+ }
+ cli_out (INDENT_MAIN_HEAD "%d", "Snaps Taken", ":", avail);
+
+ ret = dict_get_int32 (dict, "snaps-available", &count);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not fetch snaps-available");
+ goto out;
+ }
+ cli_out (INDENT_MAIN_HEAD "%d", "Snaps Available", ":", count);
+
+ for (i = 1 ; i <= avail ; i++) {
+ snprintf (key, sizeof (key), "snap%d", i);
+ ret = cli_get_each_snap_info (dict, key, _gf_false);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Unable to print snap details");
+ goto out;
+ }
+
+ ret = snprintf (key, sizeof (key), "snap%d.vol1", i);
+ if (ret < 0) {
+ goto out;
+ }
+ ret = cli_get_each_volinfo_in_snap (dict, key, _gf_false);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not get volume "
+ "related information");
+ goto out;
+ }
+
+ cli_out (" ");
+ }
+out :
+ return ret;
+}
+
+int
+cli_snapshot_list (dict_t *dict) {
+ int snapcount = 0;
+ char key[PATH_MAX] = "";
+ int ret = -1;
+ int i = 0;
+ char *get_buffer = NULL;
+
+ GF_ASSERT (dict);
+
+ ret = dict_get_int32 (dict, "snap-count", &snapcount);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not fetch snap count");
+ goto out;
+ }
+
+ if (snapcount == 0) {
+ cli_out ("No snapshots present");
+ }
+
+ for (i = 1 ; i <= snapcount ; i++) {
+ ret = snprintf (key, sizeof (key), "snapname%d",i);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_str (dict, key, &get_buffer);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not get %s ", key);
+ goto out;
+ } else {
+ cli_out ("%s", get_buffer);
+ }
+ }
+out :
+ return ret;
+}
+
+int
+cli_get_snap_volume_status (dict_t *dict, char *key_prefix)
+{
+ int ret = -1;
+ char key[PATH_MAX] = "";
+ char *buffer = NULL;
+ int brickcount = 0;
+ int i = 0;
+ int pid = 0;
+
+ GF_ASSERT (dict);
+ GF_ASSERT (key_prefix);
+
+ ret = snprintf (key, sizeof (key), "%s.brickcount", key_prefix);
+ if (ret < 0) {
+ goto out;
+ }
+ ret = dict_get_int32 (dict, key, &brickcount);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to fetch brickcount");
+ goto out;
+ }
+
+ for ( i = 0 ; i < brickcount ; i++ ) {
+ ret = snprintf (key, sizeof (key), "%s.brick%d.path",
+ key_prefix, i);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_str (dict, key, &buffer);
+ if (ret) {
+ gf_log ("cli", GF_LOG_INFO,
+ "Unable to get Brick Path");
+ continue;
+ }
+ cli_out ("\n\t%-17s %s %s", "Brick Path", ":", buffer);
+
+ ret = snprintf (key, sizeof (key), "%s.brick%d.vgname",
+ key_prefix, i);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_str (dict, key, &buffer);
+ if (ret) {
+ gf_log ("cli", GF_LOG_INFO,
+ "Unable to get Volume Group");
+ cli_out ("\t%-17s %s %s", "Volume Group", ":", "N/A");
+ } else
+ cli_out ("\t%-17s %s %s", "Volume Group", ":", buffer);
+
+ ret = snprintf (key, sizeof (key), "%s.brick%d.status",
+ key_prefix, i);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_str (dict, key, &buffer);
+ if (ret) {
+ gf_log ("cli", GF_LOG_INFO,
+ "Unable to get Brick Running");
+ cli_out ("\t%-17s %s %s", "Brick Running", ":", "N/A");
+ } else
+ cli_out ("\t%-17s %s %s", "Brick Running", ":", buffer);
+
+ ret = snprintf (key, sizeof (key), "%s.brick%d.pid",
+ key_prefix, i);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, key, &pid);
+ if (ret) {
+ gf_log ("cli", GF_LOG_INFO,
+ "Unable to get pid");
+ cli_out ("\t%-17s %s %s", "Brick PID", ":", "N/A");
+ } else
+ cli_out ("\t%-17s %s %d", "Brick PID", ":", pid);
+
+ ret = snprintf (key, sizeof (key), "%s.brick%d.data",
+ key_prefix, i);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_str (dict, key, &buffer);
+ if (ret) {
+ gf_log ("cli", GF_LOG_INFO,
+ "Unable to get Data Percent");
+ cli_out ("\t%-17s %s %s", "Data Percentage", ":", "N/A");
+ } else
+ cli_out ("\t%-17s %s %s", "Data Percentage", ":", buffer);
+
+ ret = snprintf (key, sizeof (key), "%s.brick%d.lvsize",
+ key_prefix, i);
+ if (ret < 0) {
+ goto out;
+ }
+ ret = dict_get_str (dict, key, &buffer);
+ if (ret) {
+ gf_log ("cli", GF_LOG_INFO, "Unable to get LV Size");
+ cli_out ("\t%-17s %s %s", "LV Size", ":", "N/A");
+ } else
+ cli_out ("\t%-17s %s %s", "LV Size", ":", buffer);
+
+ }
+out :
+ return ret;
+}
+
+
+
+int
+cli_get_single_snap_status (dict_t *dict, char *keyprefix)
+{
+ int ret = -1;
+ char key[PATH_MAX] = "";
+ int i = 0;
+ int volcount = 0;
+ char *get_buffer = NULL;
+
+ GF_ASSERT (dict);
+ GF_ASSERT (keyprefix);
+
+ ret = snprintf (key, sizeof (key), "%s.snapname", keyprefix);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_str (dict, key, &get_buffer);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Unable to get snapname");
+ goto out;
+ }
+ cli_out ("\nSnap Name : %s", get_buffer);
+
+ ret = snprintf (key, sizeof (key), "%s.uuid", keyprefix);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_str (dict, key, &get_buffer);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Unable to get snap UUID");
+ goto out;
+ }
+ cli_out ("Snap UUID : %s", get_buffer);
+
+ ret = snprintf (key, sizeof (key), "%s.volcount", keyprefix);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, key, &volcount);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Unable to get volume count");
+ goto out;
+ }
+
+ for (i = 0 ; i < volcount ; i++) {
+ ret = snprintf (key, sizeof (key), "%s.vol%d", keyprefix, i);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = cli_get_snap_volume_status (dict, key);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Could not get snap volume status");
+ goto out;
+ }
+ }
+out :
+ return ret;
+}
+
+int
+cli_snap_status_all (dict_t *dict) {
+ int ret = -1;
+ char key[PATH_MAX] = "";
+ int snapcount = 0;
+ int i = 0;
+
+ GF_ASSERT (dict);
+
+ ret = dict_get_int32 (dict, "status.snapcount", &snapcount);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not get snapcount");
+ goto out;
+ }
+
+ if (snapcount == 0) {
+ cli_out ("No snapshots present");
+ }
+
+ for (i = 0 ; i < snapcount; i++) {
+ ret = snprintf (key, sizeof (key), "status.snap%d",i);
+ if (ret < 0) {
+ goto out;
+ }
+ ret = cli_get_single_snap_status (dict, key);
+ }
+out:
+ return ret;
+}
+
+
+int
+cli_snapshot_status_display (dict_t *dict, gf_cli_rsp *rsp)
+{
+ char key[PATH_MAX] = "";
+ int ret = -1;
+ int status_cmd = -1;
+
+ GF_ASSERT (dict);
+ GF_ASSERT (rsp);
+
+ if (rsp->op_ret) {
+ cli_err ("Snapshot Status : failed: %s",
+ rsp->op_errstr ? rsp->op_errstr :
+ "Please check log file for details");
+ ret = rsp->op_ret;
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "cmd", &status_cmd);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not fetch status type");
+ goto out;
+ }
+ switch (status_cmd) {
+ case GF_SNAP_STATUS_TYPE_ALL :
+ {
+ ret = cli_snap_status_all (dict);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not fetch "
+ "status of all snap");
+ goto out;
+ }
+ break;
+ }
+
+ case GF_SNAP_STATUS_TYPE_SNAP :
+ {
+ ret = snprintf (key, sizeof (key), "status.snap0");
+ if (ret < 0) {
+ goto out;
+ }
+ ret = cli_get_single_snap_status (dict, key);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not fetch "
+ "status of snap");
+ goto out;
+ }
+ break;
+ }
+
+ case GF_SNAP_STATUS_TYPE_VOL :
+ {
+ ret = cli_snap_status_all (dict);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not fetch "
+ "status of snap in a volume");
+ goto out;
+ }
+ break;
+ }
+ default :
+ break;
+ }
+out :
+ return ret;
+}
+
+int
+gf_cli_snapshot_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int ret = -1;
+ gf_cli_rsp rsp = {0, };
+ dict_t *dict = NULL;
+ char *snap_name = NULL;
+ int32_t type = 0;
+ call_frame_t *frame = NULL;
+ gf_boolean_t snap_driven = _gf_false;
+
+ if (req->rpc_status == -1) {
+ ret = -1;
+ goto out;
+ }
+
+ frame = myframe;
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+
+ dict = dict_new ();
+
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize (rsp.dict.dict_val, rsp.dict.dict_len, &dict);
+
+ if (ret)
+ goto out;
+
+ ret = dict_get_int32 (dict, "type", &type);
+ if (ret) {
+ gf_log (frame->this->name, GF_LOG_ERROR, "failed to get type");
+ goto out;
+ }
+
+ switch (type) {
+ case GF_SNAP_OPTION_TYPE_CREATE:
+ if (rsp.op_ret) {
+ cli_err("snapshot create: failed: %s",
+ rsp.op_errstr ? rsp.op_errstr :
+ "Please check log file for details");
+ ret = rsp.op_ret;
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "snapname", &snap_name);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Failed to get snap name");
+ goto out;
+ }
+ cli_out ("snapshot create: %s: snap created successfully",
+ snap_name);
+ break;
+
+ case GF_SNAP_OPTION_TYPE_RESTORE:
+ /* TODO: Check if rsp.op_ret needs to be checked here. Or is
+ * it ok to check this in the start of the function where we
+ * get rsp.*/
+ if (rsp.op_ret) {
+ cli_err("snapshot restore: failed: %s",
+ rsp.op_errstr ? rsp.op_errstr :
+ "Please check log file for details");
+ ret = rsp.op_ret;
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "snapname", &snap_name);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Failed to get snap name");
+ goto out;
+ }
+
+ cli_out ("Snapshot restore: %s: Snap restored "
+ "successfully", snap_name);
+
+ ret = 0;
+ break;
+
+ case GF_SNAP_OPTION_TYPE_INFO:
+ if (rsp.op_ret) {
+ cli_err ("Snapshot info : failed: %s",
+ rsp.op_errstr ? rsp.op_errstr :
+ "Please check log file for details");
+ ret = rsp.op_ret;
+ goto out;
+ }
+
+ snap_driven = dict_get_str_boolean (dict, "snap-driven",
+ _gf_false);
+ if (snap_driven == _gf_true) {
+ ret = cli_call_snapshot_info (dict, snap_driven);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Snapshot info failed");
+ goto out;
+ }
+ } else if (snap_driven == _gf_false) {
+ ret = cli_get_snaps_in_volume (dict);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Snapshot info failed");
+ goto out;
+ }
+ }
+ break;
+
+ case GF_SNAP_OPTION_TYPE_CONFIG:
+ ret = cli_snapshot_config_display (dict, &rsp);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to display "
+ "snapshot config output.");
+ goto out;
+ }
+ break;
+
+ case GF_SNAP_OPTION_TYPE_LIST:
+ if (rsp.op_ret) {
+ cli_err ("Snapshot list : failed: %s",
+ rsp.op_errstr ? rsp.op_errstr :
+ "Please check log file for details");
+ ret = rsp.op_ret;
+ goto out;
+ }
+
+ ret = cli_snapshot_list (dict);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to display "
+ "snapshot list");
+ goto out;
+ }
+ break;
+
+ case GF_SNAP_OPTION_TYPE_DELETE:
+ ret = cli_snapshot_remove_reply (&rsp, dict, frame);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Failed to delete snap");
+ goto out;
+ }
+ break;
+
+ case GF_SNAP_OPTION_TYPE_STATUS:
+ ret = cli_snapshot_status_display (dict, &rsp);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to display "
+ "snapshot status output.");
+ goto out;
+ }
+ break;
+
+ default:
+ cli_err ("Unknown command executed");
+ ret = -1;
+ goto out;
+ }
+out:
+ if (dict)
+ dict_unref (dict);
+ cli_cmd_broadcast_response (ret);
+
+ free (rsp.dict.dict_val);
+ free (rsp.op_errstr);
+
+ return ret;
+}
+
+int32_t
+gf_cli_snapshot (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gf_cli_req req = {{0,}};
+ dict_t *options = NULL;
+ int ret = -1;
+
+ if (!frame || !this || !data)
+ goto out;
+
+ options = data;
+
+ ret = cli_to_glusterd (&req, frame, gf_cli_snapshot_cbk,
+ (xdrproc_t) xdr_gf_cli_req, options,
+ GLUSTER_CLI_SNAP, this, cli_rpc_prog,
+ NULL);
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+
+ GF_FREE (req.dict.dict_val);
+ return ret;
+}
+
+int
+cli_to_glusterd (gf_cli_req *req, call_frame_t *frame,
+ fop_cbk_fn_t cbkfn, xdrproc_t xdrproc, dict_t *dict,
+ int procnum, xlator_t *this, rpc_clnt_prog_t *prog,
+ struct iobref *iobref)
+{
+ int ret = 0;
+ size_t len = 0;
+ char *cmd = NULL;
+ int i = 0;
+ const char **words = NULL;
+ cli_local_t *local = NULL;
+
+ if (!this || !frame || !dict) {
+ ret = -1;
+ goto out;
+ }
+
+ if (!frame->local) {
+ ret = -1;
+ goto out;
+ }
+
+ local = frame->local;
+
+ if (!local->words) {
+ ret = -1;
+ goto out;
+ }
+
+ words = local->words;
+
+ while (words[i])
+ len += strlen (words[i++]) + 1;
+
+ cmd = GF_CALLOC (1, len, gf_common_mt_char);
+
+ if (!cmd) {
+ ret = -1;
+ goto out;
+ }
+
+ for (i = 0; words[i]; i++) {
+ strncat (cmd, words[i], strlen (words[i]));
+ if (words[i+1] != NULL)
+ strncat (cmd, " ", strlen (" "));
+ }
+
+ cmd [len - 1] = '\0';
+
+ ret = dict_set_dynstr (dict, "cmd-str", cmd);
+ if (ret)
+ goto out;
+
+ ret = dict_allocate_and_serialize (dict, &(req->dict).dict_val,
+ &(req->dict).dict_len);
+
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "failed to get serialized length of dict");
+ goto out;
+ }
+
+ ret = cli_cmd_submit (req, frame, prog, procnum, iobref, this,
+ cbkfn, (xdrproc_t) xdrproc);
+
+out:
+ return ret;
+
+}
+
+struct rpc_clnt_procedure gluster_cli_actors[GLUSTER_CLI_MAXVALUE] = {
+ [GLUSTER_CLI_NULL] = {"NULL", NULL },
+ [GLUSTER_CLI_PROBE] = {"PROBE_QUERY", gf_cli_probe},
+ [GLUSTER_CLI_DEPROBE] = {"DEPROBE_QUERY", gf_cli_deprobe},
+ [GLUSTER_CLI_LIST_FRIENDS] = {"LIST_FRIENDS", gf_cli_list_friends},
+ [GLUSTER_CLI_UUID_RESET] = {"UUID_RESET", gf_cli3_1_uuid_reset},
+ [GLUSTER_CLI_UUID_GET] = {"UUID_GET", gf_cli3_1_uuid_get},
+ [GLUSTER_CLI_CREATE_VOLUME] = {"CREATE_VOLUME", gf_cli_create_volume},
+ [GLUSTER_CLI_DELETE_VOLUME] = {"DELETE_VOLUME", gf_cli_delete_volume},
+ [GLUSTER_CLI_START_VOLUME] = {"START_VOLUME", gf_cli_start_volume},
+ [GLUSTER_CLI_STOP_VOLUME] = {"STOP_VOLUME", gf_cli_stop_volume},
+ [GLUSTER_CLI_RENAME_VOLUME] = {"RENAME_VOLUME", gf_cli_rename_volume},
+ [GLUSTER_CLI_DEFRAG_VOLUME] = {"DEFRAG_VOLUME", gf_cli_defrag_volume},
+ [GLUSTER_CLI_GET_VOLUME] = {"GET_VOLUME", gf_cli_get_volume},
+ [GLUSTER_CLI_GET_NEXT_VOLUME] = {"GET_NEXT_VOLUME", gf_cli_get_next_volume},
+ [GLUSTER_CLI_SET_VOLUME] = {"SET_VOLUME", gf_cli_set_volume},
+ [GLUSTER_CLI_ADD_BRICK] = {"ADD_BRICK", gf_cli_add_brick},
+ [GLUSTER_CLI_REMOVE_BRICK] = {"REMOVE_BRICK", gf_cli_remove_brick},
+ [GLUSTER_CLI_REPLACE_BRICK] = {"REPLACE_BRICK", gf_cli_replace_brick},
+ [GLUSTER_CLI_LOG_ROTATE] = {"LOG ROTATE", gf_cli_log_rotate},
+ [GLUSTER_CLI_GETSPEC] = {"GETSPEC", gf_cli_getspec},
+ [GLUSTER_CLI_PMAP_PORTBYBRICK] = {"PMAP PORTBYBRICK", gf_cli_pmap_b2p},
+ [GLUSTER_CLI_SYNC_VOLUME] = {"SYNC_VOLUME", gf_cli_sync_volume},
+ [GLUSTER_CLI_RESET_VOLUME] = {"RESET_VOLUME", gf_cli_reset_volume},
+ [GLUSTER_CLI_FSM_LOG] = {"FSM_LOG", gf_cli_fsm_log},
+ [GLUSTER_CLI_GSYNC_SET] = {"GSYNC_SET", gf_cli_gsync_set},
+ [GLUSTER_CLI_PROFILE_VOLUME] = {"PROFILE_VOLUME", gf_cli_profile_volume},
+ [GLUSTER_CLI_QUOTA] = {"QUOTA", gf_cli_quota},
+ [GLUSTER_CLI_TOP_VOLUME] = {"TOP_VOLUME", gf_cli_top_volume},
+ [GLUSTER_CLI_GETWD] = {"GETWD", gf_cli_getwd},
+ [GLUSTER_CLI_STATUS_VOLUME] = {"STATUS_VOLUME", gf_cli_status_volume},
+ [GLUSTER_CLI_STATUS_ALL] = {"STATUS_ALL", gf_cli_status_volume_all},
+ [GLUSTER_CLI_MOUNT] = {"MOUNT", gf_cli_mount},
+ [GLUSTER_CLI_UMOUNT] = {"UMOUNT", gf_cli_umount},
+ [GLUSTER_CLI_HEAL_VOLUME] = {"HEAL_VOLUME", gf_cli_heal_volume},
+ [GLUSTER_CLI_STATEDUMP_VOLUME] = {"STATEDUMP_VOLUME", gf_cli_statedump_volume},
+ [GLUSTER_CLI_LIST_VOLUME] = {"LIST_VOLUME", gf_cli_list_volume},
+ [GLUSTER_CLI_CLRLOCKS_VOLUME] = {"CLEARLOCKS_VOLUME", gf_cli_clearlocks_volume},
+ [GLUSTER_CLI_COPY_FILE] = {"COPY_FILE", gf_cli_copy_file},
+ [GLUSTER_CLI_SYS_EXEC] = {"SYS_EXEC", gf_cli_sys_exec},
+ [GLUSTER_CLI_SNAP] = {"SNAP", gf_cli_snapshot},
+};
+
+struct rpc_clnt_program cli_prog = {
+ .progname = "Gluster CLI",
+ .prognum = GLUSTER_CLI_PROGRAM,
+ .progver = GLUSTER_CLI_VERSION,
+ .numproc = GLUSTER_CLI_MAXVALUE,
+ .proctable = gluster_cli_actors,
+};
diff --git a/cli/src/cli-xml-output.c b/cli/src/cli-xml-output.c
new file mode 100644
index 000000000..d8884d44b
--- /dev/null
+++ b/cli/src/cli-xml-output.c
@@ -0,0 +1,3772 @@
+/*
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <stdlib.h>
+#include "cli.h"
+#include "cli1-xdr.h"
+#include "run.h"
+#include "compat.h"
+#include "syscall.h"
+
+
+enum gf_task_types {
+ GF_TASK_TYPE_REBALANCE,
+ GF_TASK_TYPE_REMOVE_BRICK
+};
+
+/*
+ * IMPORTANT NOTE:
+ * All exported functions in this file which use libxml need use a
+ * #if (HAVE_LIB_XML), #else, #endif
+ * For eg,
+ * int exported_func () {
+ * #if (HAVE_LIB_XML)
+ * <Stuff using libxml>
+ * #else
+ * return 0;
+ * #endif
+ * }
+ *
+ * All other functions, which are called internally within this file need to be
+ * within #if (HAVE_LIB_XML), #endif statements
+ * For eg,
+ * #if (HAVE_LIB_XML)
+ * int internal_func ()
+ * {
+ * }
+ * #endif
+ *
+ * Following the above formate ensures that all xml related code is compliled
+ * only when libxml2 is present, and also keeps the rest of the codebase free
+ * of #if (HAVE_LIB_XML)
+ */
+
+
+#if (HAVE_LIB_XML)
+
+#include <libxml/encoding.h>
+#include <libxml/xmlwriter.h>
+
+#define XML_RET_CHECK_AND_GOTO(ret, label) do { \
+ if (ret < 0) { \
+ ret = -1; \
+ goto label; \
+ } \
+ else \
+ ret = 0; \
+ }while (0) \
+
+int
+cli_begin_xml_output (xmlTextWriterPtr *writer, xmlDocPtr *doc)
+{
+ int ret = -1;
+
+ *writer = xmlNewTextWriterDoc (doc, 0);
+ if (writer == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = xmlTextWriterStartDocument (*writer, "1.0", "UTF-8", "yes");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* <cliOutput> */
+ ret = xmlTextWriterStartElement (*writer, (xmlChar *)"cliOutput");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+cli_end_xml_output (xmlTextWriterPtr writer, xmlDocPtr doc)
+{
+ int ret = -1;
+
+ /* </cliOutput> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterEndDocument (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+
+ /* Dump xml document to stdout and pretty format it */
+ xmlSaveFormatFileEnc ("-", doc, "UTF-8", 1);
+
+ xmlFreeTextWriter (writer);
+ xmlFreeDoc (doc);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+cli_xml_output_common (xmlTextWriterPtr writer, int op_ret, int op_errno,
+ char *op_errstr)
+{
+ int ret = -1;
+
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"opRet",
+ "%d", op_ret);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"opErrno",
+ "%d", op_errno);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"opErrstr",
+ "%s", op_errstr);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+#endif
+
+int
+cli_xml_output_str (char *op, char *str, int op_ret, int op_errno,
+ char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+
+ ret = cli_begin_xml_output (&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common (writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ if (op) {
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"cliOp",
+ "%s", op);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ if (str) {
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"output",
+ "%s", str);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ ret = cli_end_xml_output (writer, doc);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+#if (HAVE_LIB_XML)
+int
+cli_xml_output_data_pair (dict_t *this, char *key, data_t *value,
+ void *data)
+{
+ int ret = -1;
+ xmlTextWriterPtr *writer = NULL;
+
+ writer = (xmlTextWriterPtr *)data;
+
+ ret = xmlTextWriterWriteFormatElement (*writer, (xmlChar *)key,
+ "%s", value->data);
+
+ return ret;
+}
+#endif
+
+int
+cli_xml_output_dict ( char *op, dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+
+ ret = cli_begin_xml_output (&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common (writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ /* <"op"> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)op);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ if (dict)
+ dict_foreach (dict, cli_xml_output_data_pair, &writer);
+
+ /* </"op"> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = cli_end_xml_output (writer, doc);
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+#if (HAVE_LIB_XML)
+int
+cli_xml_output_vol_status_common (xmlTextWriterPtr writer, dict_t *dict,
+ int brick_index, int *online,
+ gf_boolean_t *node_present)
+{
+ int ret = -1;
+ char *hostname = NULL;
+ char *path = NULL;
+ int port = 0;
+ int status = 0;
+ int pid = 0;
+ char key[1024] = {0,};
+
+ snprintf (key, sizeof (key), "brick%d.hostname", brick_index);
+ ret = dict_get_str (dict, key, &hostname);
+ if (ret) {
+ *node_present = _gf_false;
+ goto out;
+ }
+ *node_present = _gf_true;
+
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"hostname",
+ "%s", hostname);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.path", brick_index);
+ ret = dict_get_str (dict, key, &path);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"path",
+ "%s", path);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.status", brick_index);
+ ret = dict_get_int32 (dict, key, &status);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"status",
+ "%d", status);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ *online = status;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.port", brick_index);
+ ret = dict_get_int32 (dict, key, &port);
+ if (ret)
+ goto out;
+
+ /* If the process is either offline or doesn't provide a port (shd)
+ * port = "N/A"
+ * else print the port number of the process.
+ */
+
+ if (*online == 1 && port != 0)
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"port",
+ "%d", port);
+ else
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"port",
+ "%s", "N/A");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.pid", brick_index);
+ ret = dict_get_int32 (dict, key, &pid);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"pid",
+ "%d", pid);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+cli_xml_output_vol_status_detail (xmlTextWriterPtr writer, dict_t *dict,
+ int brick_index)
+{
+ int ret = -1;
+ uint64_t size_total = 0;
+ uint64_t size_free = 0;
+ char *device = NULL;
+ uint64_t block_size = 0;
+ char *mnt_options = NULL;
+ char *fs_name = NULL;
+ char *inode_size = NULL;
+ uint64_t inodes_total = 0;
+ uint64_t inodes_free = 0;
+ char key[1024] = {0,};
+
+ snprintf (key, sizeof (key), "brick%d.total", brick_index);
+ ret = dict_get_uint64 (dict, key, &size_total);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"sizeTotal",
+ "%"PRIu64, size_total);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.free", brick_index);
+ ret = dict_get_uint64 (dict, key, &size_free);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"sizeFree",
+ "%"PRIu64, size_free);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.device", brick_index);
+ ret = dict_get_str (dict, key, &device);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"device",
+ "%s", device);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.block_size", brick_index);
+ ret = dict_get_uint64 (dict, key, &block_size);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"blockSize",
+ "%"PRIu64, block_size);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.mnt_options", brick_index);
+ ret = dict_get_str (dict, key, &mnt_options);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"mntOptions",
+ "%s", mnt_options);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.fs_name", brick_index);
+ ret = dict_get_str (dict, key, &fs_name);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"fsName",
+ "%s", fs_name);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* inode details are only available for ext 2/3/4 & xfs */
+ if (!IS_EXT_FS(fs_name) || strcmp (fs_name, "xfs")) {
+ ret = 0;
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.inode_size", brick_index);
+ ret = dict_get_str (dict, key, &inode_size);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"inodeSize",
+ "%s", fs_name);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.total_inodes", brick_index);
+ ret = dict_get_uint64 (dict, key, &inodes_total);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"inodesTotal",
+ "%"PRIu64, inodes_total);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.free_inodes", brick_index);
+ ret = dict_get_uint64 (dict, key, &inodes_free);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"inodesFree",
+ "%"PRIu64, inodes_free);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+cli_xml_output_vol_status_mempool (xmlTextWriterPtr writer, dict_t *dict,
+ char *prefix)
+{
+ int ret = -1;
+ int mempool_count = 0;
+ char *name = NULL;
+ int hotcount = 0;
+ int coldcount = 0;
+ uint64_t paddedsizeof = 0;
+ uint64_t alloccount = 0;
+ int maxalloc = 0;
+ char key[1024] = {0,};
+ int i = 0;
+
+ /* <mempool> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"mempool");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ snprintf (key, sizeof (key), "%s.mempool-count", prefix);
+ ret = dict_get_int32 (dict, key, &mempool_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"count",
+ "%d", mempool_count);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ for (i = 0; i < mempool_count; i++) {
+ /* <pool> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"pool");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.pool%d.name", prefix, i);
+ ret = dict_get_str (dict, key, &name);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"name",
+ "%s", name);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.pool%d.hotcount", prefix, i);
+ ret = dict_get_int32 (dict, key, &hotcount);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"hotCount",
+ "%d", hotcount);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.pool%d.coldcount", prefix, i);
+ ret = dict_get_int32 (dict, key, &coldcount);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"coldCount",
+ "%d", coldcount);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.pool%d.paddedsizeof",
+ prefix, i);
+ ret = dict_get_uint64 (dict, key, &paddedsizeof);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement
+ (writer, (xmlChar *)"padddedSizeOf", "%"PRIu64,
+ paddedsizeof);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.pool%d.alloccount", prefix, i);
+ ret = dict_get_uint64 (dict, key, &alloccount);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"allocCount",
+ "%"PRIu64, alloccount);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.pool%d.max_alloc", prefix, i);
+ ret = dict_get_int32 (dict, key, &maxalloc);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"maxAlloc",
+ "%d", maxalloc);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.pool%d.pool-misses", prefix, i);
+ ret = dict_get_uint64 (dict, key, &alloccount);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"poolMisses",
+ "%"PRIu64, alloccount);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.pool%d.max-stdalloc", prefix, i);
+ ret = dict_get_int32 (dict, key, &maxalloc);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"maxStdAlloc",
+ "%d", maxalloc);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+
+ /* </pool> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ /* </mempool> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+cli_xml_output_vol_status_mem (xmlTextWriterPtr writer, dict_t *dict,
+ int brick_index)
+{
+ int ret = -1;
+ int arena = 0;
+ int ordblks = 0;
+ int smblks = 0;
+ int hblks = 0;
+ int hblkhd = 0;
+ int usmblks = 0;
+ int fsmblks = 0;
+ int uordblks = 0;
+ int fordblks = 0;
+ int keepcost = 0;
+ char key[1024] = {0,};
+
+ /* <memStatus> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"memStatus");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* <mallinfo> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"mallinfo");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ snprintf (key, sizeof (key), "brick%d.mallinfo.arena", brick_index);
+ ret = dict_get_int32 (dict, key, &arena);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"arena",
+ "%d", arena);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.mallinfo.ordblks", brick_index);
+ ret = dict_get_int32 (dict, key, &ordblks);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"ordblks",
+ "%d", ordblks);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.mallinfo.smblks", brick_index);
+ ret = dict_get_int32 (dict, key, &smblks);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"smblks",
+ "%d", smblks);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.mallinfo.hblks", brick_index);
+ ret = dict_get_int32 (dict, key, &hblks);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"hblks",
+ "%d", hblks);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.mallinfo.hblkhd", brick_index);
+ ret = dict_get_int32 (dict, key, &hblkhd);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"hblkhd",
+ "%d", hblkhd);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.mallinfo.usmblks", brick_index);
+ ret = dict_get_int32 (dict, key, &usmblks);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"usmblks",
+ "%d", usmblks);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.mallinfo.fsmblks", brick_index);
+ ret = dict_get_int32 (dict, key, &fsmblks);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"fsmblks",
+ "%d", fsmblks);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.mallinfo.uordblks", brick_index);
+ ret = dict_get_int32 (dict, key, &uordblks);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"uordblks",
+ "%d", uordblks);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.mallinfo.fordblks", brick_index);
+ ret = dict_get_int32 (dict, key, &fordblks);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"fordblks",
+ "%d", fordblks);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.mallinfo.keepcost", brick_index);
+ ret = dict_get_int32 (dict, key, &keepcost);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"keepcost",
+ "%d", keepcost);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* </mallinfo> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d", brick_index);
+ ret = cli_xml_output_vol_status_mempool (writer, dict, key);
+ if (ret)
+ goto out;
+
+ /* </memStatus> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+cli_xml_output_vol_status_clients (xmlTextWriterPtr writer, dict_t *dict,
+ int brick_index)
+{
+ int ret = -1;
+ int client_count = 0;
+ char *hostname = NULL;
+ uint64_t bytes_read = 0;
+ uint64_t bytes_write = 0;
+ char key[1024] = {0,};
+ int i = 0;
+
+ /* <clientsStatus> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"clientsStatus");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ snprintf (key, sizeof (key), "brick%d.clientcount", brick_index);
+ ret = dict_get_int32 (dict, key, &client_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"clientCount",
+ "%d", client_count);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ for (i = 0; i < client_count; i++) {
+ /* <client> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"client");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.client%d.hostname",
+ brick_index, i);
+ ret = dict_get_str (dict, key, &hostname);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"hostname",
+ "%s", hostname);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.client%d.bytesread",
+ brick_index, i);
+ ret = dict_get_uint64 (dict, key, &bytes_read);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"bytesRead",
+ "%"PRIu64, bytes_read);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.client%d.byteswrite",
+ brick_index, i);
+ ret = dict_get_uint64 (dict, key, &bytes_write);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"bytesWrite",
+ "%"PRIu64, bytes_write);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* </client> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ /* </clientsStatus> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+cli_xml_output_vol_status_inode_entry (xmlTextWriterPtr writer, dict_t *dict,
+ char *prefix)
+{
+ int ret = -1;
+ char *gfid = NULL;
+ uint64_t nlookup = 0;
+ uint32_t ref = 0;
+ int ia_type = 0;
+ char key[1024] = {0,};
+
+ /* <inode> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"inode");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ snprintf (key, sizeof (key), "%s.gfid", prefix);
+ ret = dict_get_str (dict, key, &gfid);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"gfid",
+ "%s", gfid);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key,0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.nlookup", prefix);
+ ret = dict_get_uint64 (dict, key, &nlookup);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"nLookup",
+ "%"PRIu64, nlookup);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key,0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.ref", prefix);
+ ret = dict_get_uint32 (dict, key, &ref);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"ref",
+ "%"PRIu32, ref);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key,0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.ia_type", prefix);
+ ret = dict_get_int32 (dict, key, &ia_type);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"iaType",
+ "%d", ia_type);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* </inode> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+cli_xml_output_vol_status_itable (xmlTextWriterPtr writer, dict_t *dict,
+ char *prefix)
+{
+ int ret = -1;
+ uint32_t active_size = 0;
+ uint32_t lru_size = 0;
+ uint32_t purge_size = 0;
+ char key[1024] = {0,};
+ int i = 0;
+
+ snprintf (key, sizeof (key), "%s.active_size", prefix);
+ ret = dict_get_uint32 (dict, key, &active_size);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"activeSize",
+ "%"PRIu32, active_size);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ if (active_size != 0) {
+ /* <active> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"active");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ for (i = 0; i < active_size; i++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.active%d", prefix, i);
+ ret = cli_xml_output_vol_status_inode_entry
+ (writer, dict, key);
+ if (ret)
+ goto out;
+ }
+ /* </active> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.lru_size", prefix);
+ ret = dict_get_uint32 (dict, key, &lru_size);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"lruSize",
+ "%"PRIu32, lru_size);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ if (lru_size != 0) {
+ /* <lru> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"lru");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ for (i = 0; i < lru_size; i++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.lru%d", prefix, i);
+ ret = cli_xml_output_vol_status_inode_entry
+ (writer, dict, key);
+ if (ret)
+ goto out;
+ }
+ /* </lru> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.purge_size", prefix);
+ ret = dict_get_uint32 (dict, key, &purge_size);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"purgeSize",
+ "%"PRIu32, purge_size);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ if (purge_size != 0) {
+ /* <purge> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"purge");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ for (i = 0; i < purge_size; i++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.purge%d", prefix, i);
+ ret = cli_xml_output_vol_status_inode_entry
+ (writer, dict, key);
+ if (ret)
+ goto out;
+ }
+ /* </purge> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+cli_xml_output_vol_status_inode (xmlTextWriterPtr writer, dict_t *dict,
+ int brick_index)
+{
+ int ret = -1;
+ int conn_count = 0;
+ char key[1024] = {0,};
+ int i = 0;
+
+ /* <inodeStatus> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"inodeStatus");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ snprintf (key, sizeof (key), "brick%d.conncount", brick_index);
+ ret = dict_get_int32 (dict, key, &conn_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"connections",
+ "%d", conn_count);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ for (i = 0; i < conn_count; i++) {
+ /* <connection> */
+ ret = xmlTextWriterStartElement (writer,
+ (xmlChar *)"connection");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.conn%d.itable",
+ brick_index, i);
+ ret = cli_xml_output_vol_status_itable (writer, dict, key);
+ if (ret)
+ goto out;
+
+ /* </connection> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ /* </inodeStatus> */
+ ret= xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+cli_xml_output_vol_status_fdtable (xmlTextWriterPtr writer, dict_t *dict,
+ char *prefix)
+{
+ int ret = -1;
+ int refcount = 0;
+ uint32_t maxfds = 0;
+ int firstfree = 0;
+ int openfds = 0;
+ int fd_pid = 0;
+ int fd_refcount = 0;
+ int fd_flags = 0;
+ char key[1024] = {0,};
+ int i = 0;
+
+ /* <fdTable> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"fdTable");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ snprintf (key, sizeof (key), "%s.refcount", prefix);
+ ret = dict_get_int32 (dict, key, &refcount);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"refCount",
+ "%d", refcount);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.maxfds", prefix);
+ ret = dict_get_uint32 (dict, key, &maxfds);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"maxFds",
+ "%"PRIu32, maxfds);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.firstfree", prefix);
+ ret = dict_get_int32 (dict, key, &firstfree);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"firstFree",
+ "%d", firstfree);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.openfds", prefix);
+ ret = dict_get_int32 (dict, key, &openfds);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"openFds",
+ "%d", openfds);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ for (i = 0; i < maxfds; i++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.fdentry%d.pid", prefix, i);
+ ret = dict_get_int32 (dict, key, &fd_pid);
+ if (ret)
+ continue;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.fdentry%d.refcount",
+ prefix, i);
+ ret = dict_get_int32 (dict, key, &fd_refcount);
+ if (ret)
+ continue;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.fdentry%d.flags", prefix, i);
+ ret = dict_get_int32 (dict, key, &fd_flags);
+ if (ret)
+ continue;
+
+ /* <fd> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"fd");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"entry",
+ "%d", i+1);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"pid",
+ "%d", fd_pid);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"refCount",
+ "%d", fd_refcount);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"flags",
+ "%d", fd_flags);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* </fd> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ /* </fdTable> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+cli_xml_output_vol_status_fd (xmlTextWriterPtr writer, dict_t *dict,
+ int brick_index)
+{
+ int ret = -1;
+ int conn_count = 0;
+ char key[1024] = {0,};
+ int i = 0;
+
+ /* <fdStatus> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"fdStatus");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ snprintf (key, sizeof (key), "brick%d.conncount", brick_index);
+ ret = dict_get_int32 (dict, key, &conn_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"connections",
+ "%d", conn_count);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ for (i = 0; i < conn_count; i++) {
+ /* <connection> */
+ ret = xmlTextWriterStartElement (writer,
+ (xmlChar *)"connection");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.conn%d.fdtable",
+ brick_index, i);
+ ret = cli_xml_output_vol_status_fdtable (writer, dict, key);
+ if (ret)
+ goto out;
+
+ /* </connection> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ /* </fdStatus> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+cli_xml_output_vol_status_callframe (xmlTextWriterPtr writer, dict_t *dict,
+ char *prefix)
+{
+ int ret = -1;
+ int ref_count = 0;
+ char *translator = NULL;
+ int complete = 0;
+ char *parent = NULL;
+ char *wind_from = NULL;
+ char *wind_to = NULL;
+ char *unwind_from = NULL;
+ char *unwind_to = NULL;
+ char key[1024] = {0,};
+
+ /* <callFrame> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"callFrame");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ snprintf (key, sizeof (key), "%s.refcount", prefix);
+ ret = dict_get_int32 (dict, key, &ref_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"refCount",
+ "%d", ref_count);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.translator", prefix);
+ ret = dict_get_str (dict, key, &translator);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"translator",
+ "%s", translator);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.complete", prefix);
+ ret = dict_get_int32 (dict, key, &complete);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"complete",
+ "%d", complete);
+ XML_RET_CHECK_AND_GOTO (ret ,out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.parent", prefix);
+ ret = dict_get_str (dict, key, &parent);
+ if (!ret) {
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"parent",
+ "%s", parent);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.windfrom", prefix);
+ ret = dict_get_str (dict, key, &wind_from);
+ if (!ret) {
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"windFrom",
+ "%s", wind_from);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.windto", prefix);
+ ret = dict_get_str (dict, key, &wind_to);
+ if (!ret) {
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"windTo",
+ "%s", wind_to);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.unwindfrom", prefix);
+ ret = dict_get_str (dict, key, &unwind_from);
+ if (!ret) {
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"unwindFrom",
+ "%s", unwind_from);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.unwindto", prefix);
+ ret = dict_get_str (dict, key, &unwind_to);
+ if (!ret) {
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"unwindTo",
+ "%s", unwind_to);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ /* </callFrame> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+cli_xml_output_vol_status_callstack (xmlTextWriterPtr writer, dict_t *dict,
+ char *prefix)
+{
+ int ret = -1;
+ int uid = 0;
+ int gid = 0;
+ int pid = 0;
+ uint64_t unique = 0;
+ int frame_count = 0;
+ char key[1024] = {0,};
+ int i = 0;
+
+ /* <callStack> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"callStack");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ snprintf (key, sizeof (key), "%s.uid", prefix);
+ ret = dict_get_int32 (dict, key, &uid);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"uid",
+ "%d", uid);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.gid", prefix);
+ ret = dict_get_int32 (dict, key, &gid);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"gid",
+ "%d", gid);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.pid", prefix);
+ ret = dict_get_int32 (dict, key, &pid);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"pid",
+ "%d", pid);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.unique", prefix);
+ ret = dict_get_uint64 (dict, key, &unique);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"unique",
+ "%"PRIu64, unique);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.count", prefix);
+ ret = dict_get_int32 (dict, key, &frame_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"frameCount",
+ "%d", frame_count);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ for (i = 0; i < frame_count; i++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.frame%d", prefix, i);
+ ret = cli_xml_output_vol_status_callframe (writer, dict,
+ key);
+ if (ret)
+ goto out;
+ }
+
+ /* </callStack> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+cli_xml_output_vol_status_callpool (xmlTextWriterPtr writer, dict_t *dict,
+ int brick_index)
+{
+ int ret = -1;
+ int call_count = 0;
+ char key[1024] = {0,};
+ int i = 0;
+
+ /* <callpoolStatus> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"callpoolStatus");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ snprintf (key, sizeof (key), "brick%d.callpool.count", brick_index);
+ ret = dict_get_int32 (dict, key, &call_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"count",
+ "%d", call_count);
+
+ for (i = 0; i < call_count; i++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.callpool.stack%d",
+ brick_index, i);
+ ret = cli_xml_output_vol_status_callstack (writer, dict,
+ key);
+ if (ret)
+ goto out;
+ }
+
+ /* </callpoolStatus> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+#endif
+
+int
+cli_xml_output_vol_status_begin (cli_local_t *local, int op_ret, int op_errno,
+ char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+
+ ret = cli_begin_xml_output (&(local->writer), &(local->doc));
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = cli_xml_output_common (local->writer, op_ret, op_errno,
+ op_errstr);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* <volStatus> */
+ ret = xmlTextWriterStartElement (local->writer,
+ (xmlChar *) "volStatus");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* <volumes> */
+ ret = xmlTextWriterStartElement (local->writer, (xmlChar *)"volumes");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+int
+cli_xml_output_vol_status_end (cli_local_t *local)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+
+ /* </volumes> */
+ ret = xmlTextWriterEndElement (local->writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* </volStatus> */
+ ret = xmlTextWriterEndElement (local->writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = cli_end_xml_output (local->writer, local->doc);
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+#if (HAVE_LIB_XML)
+int
+cli_xml_output_remove_brick_task_params (xmlTextWriterPtr writer, dict_t *dict,
+ char *prefix)
+{
+ int ret = -1;
+ char key[1024] = {0,};
+ int count = 0;
+ int i = 0;
+ char *brick = NULL;
+
+ /* <params> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"params");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ snprintf (key, sizeof (key), "%s.count", prefix);
+ ret = dict_get_int32 (dict, key, &count);
+ if (ret)
+ goto out;
+
+ for (i = 1; i <= count; i++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.brick%d", prefix, i);
+ ret = dict_get_str (dict, key, &brick);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"brick",
+ "%s", brick);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ brick = NULL;
+ }
+
+ /* </param> */
+ ret = xmlTextWriterEndElement (writer);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+cli_xml_output_replace_brick_task_params (xmlTextWriterPtr writer, dict_t *dict,
+ char *prefix)
+{
+
+ int ret = -1;
+ char key[1024] = {0,};
+ char *brick = NULL;
+
+ /* <params> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"params");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ snprintf (key, sizeof (key), "%s.src-brick", prefix);
+ ret = dict_get_str (dict, key, &brick);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"srcBrick",
+ "%s", brick);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.dst-brick", prefix);
+ ret = dict_get_str (dict, key, &brick);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"dstBrick",
+ "%s", brick);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+
+ /* </param> */
+ ret = xmlTextWriterEndElement (writer);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+cli_xml_output_vol_status_tasks (cli_local_t *local, dict_t *dict) {
+ int ret = -1;
+ char *task_type = NULL;
+ char *task_id_str = NULL;
+ int status = 0;
+ int tasks = 0;
+ char key[1024] = {0,};
+ int i = 0;
+
+ /* <tasks> */
+ ret = xmlTextWriterStartElement (local->writer, (xmlChar *)"tasks");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = dict_get_int32 (dict, "tasks", &tasks);
+ if (ret)
+ goto out;
+
+ for (i = 0; i < tasks; i++) {
+ /* <task> */
+ ret = xmlTextWriterStartElement (local->writer,
+ (xmlChar *)"task");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "task%d.type", i);
+ ret = dict_get_str (dict, key, &task_type);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (local->writer,
+ (xmlChar *)"type",
+ "%s", task_type);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "task%d.id", i);
+ ret = dict_get_str (dict, key, &task_id_str);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (local->writer,
+ (xmlChar *)"id",
+ "%s", task_id_str);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "task%d.status", i);
+ ret = dict_get_int32 (dict, key, &status);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (local->writer,
+ (xmlChar *)"status",
+ "%d", status);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ if (!strcmp (task_type, "Replace brick")) {
+ if (status) {
+ status = GF_DEFRAG_STATUS_COMPLETE;
+ } else {
+ status = GF_DEFRAG_STATUS_STARTED;
+ }
+ }
+
+ ret = xmlTextWriterWriteFormatElement (local->writer,
+ (xmlChar *)"statusStr",
+ "%s",
+ cli_vol_task_status_str[status]);
+
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "task%d", i);
+ if (!strcmp (task_type, "Replace brick")) {
+ ret = cli_xml_output_replace_brick_task_params
+ (local->writer, dict, key);
+ if (ret)
+ goto out;
+ } else if (!strcmp (task_type, "Remove brick")) {
+ ret = cli_xml_output_remove_brick_task_params
+ (local->writer, dict, key);
+ if (ret)
+ goto out;
+ }
+
+
+ /* </task> */
+ ret = xmlTextWriterEndElement (local->writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ /* </tasks> */
+ ret = xmlTextWriterEndElement (local->writer);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+cli_xml_output_vol_status_tasks_detail (cli_local_t *local, dict_t *dict)
+{
+ int ret = -1;
+ char *volname = NULL;
+
+ /*<volume>*/
+ ret = xmlTextWriterStartElement (local->writer, (xmlChar *)"volume");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (local->writer,
+ (xmlChar *)"volName", "%s",
+ volname);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = cli_xml_output_vol_status_tasks (local, dict);
+ if (ret)
+ goto out;
+
+ /* </volume> */
+ ret = xmlTextWriterEndElement (local->writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+out:
+ return ret;
+}
+#endif
+
+int
+cli_xml_output_vol_status (cli_local_t *local, dict_t *dict)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ char *volname = NULL;
+ int brick_count = 0;
+ int brick_index_max = -1;
+ int other_count = 0;
+ int index_max = 0;
+ uint32_t cmd = GF_CLI_STATUS_NONE;
+ int online = 0;
+ gf_boolean_t node_present = _gf_true;
+ int i;
+
+
+ /* <volume> */
+ ret = xmlTextWriterStartElement (local->writer, (xmlChar *)"volume");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (local->writer,
+ (xmlChar *)"volName", "%s",
+ volname);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = dict_get_int32 (dict, "count", &brick_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (local->writer,
+ (xmlChar *)"nodeCount", "%d",
+ brick_count);
+ if (ret)
+ goto out;
+
+ ret = dict_get_uint32 (dict, "cmd", &cmd);
+ if (ret)
+ goto out;
+
+ ret = dict_get_int32 (dict, "brick-index-max", &brick_index_max);
+ if (ret)
+ goto out;
+ ret = dict_get_int32 (dict, "other-count", &other_count);
+ if (ret)
+ goto out;
+
+ index_max = brick_index_max + other_count;
+
+ for (i = 0; i <= index_max; i++) {
+ /* <node> */
+ ret = xmlTextWriterStartElement (local->writer,
+ (xmlChar *)"node");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = cli_xml_output_vol_status_common (local->writer, dict, i,
+ &online, &node_present);
+ if (ret) {
+ if (node_present)
+ goto out;
+ else
+ continue;
+ }
+
+ switch (cmd & GF_CLI_STATUS_MASK) {
+ case GF_CLI_STATUS_DETAIL:
+ ret = cli_xml_output_vol_status_detail (local->writer,
+ dict, i);
+ if (ret)
+ goto out;
+ break;
+
+ case GF_CLI_STATUS_MEM:
+ if (online) {
+ ret = cli_xml_output_vol_status_mem
+ (local->writer, dict, i);
+ if (ret)
+ goto out;
+ }
+ break;
+
+ case GF_CLI_STATUS_CLIENTS:
+ if (online) {
+ ret = cli_xml_output_vol_status_clients
+ (local->writer, dict, i);
+ if (ret)
+ goto out;
+ }
+ break;
+
+ case GF_CLI_STATUS_INODE:
+ if (online) {
+ ret = cli_xml_output_vol_status_inode
+ (local->writer, dict, i);
+ if (ret)
+ goto out;
+ }
+ break;
+
+ case GF_CLI_STATUS_FD:
+ if (online) {
+ ret = cli_xml_output_vol_status_fd
+ (local->writer, dict, i);
+ if (ret)
+ goto out;
+ }
+ break;
+
+ case GF_CLI_STATUS_CALLPOOL:
+ if (online) {
+ ret = cli_xml_output_vol_status_callpool
+ (local->writer, dict, i);
+ if (ret)
+ goto out;
+ }
+ break;
+ default:
+ break;
+
+ }
+ /* </node> */
+ ret = xmlTextWriterEndElement (local->writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ /* Tasks are only present when a normal volume status call is done on a
+ * single volume or on all volumes
+ */
+ if (((cmd & GF_CLI_STATUS_MASK) == GF_CLI_STATUS_NONE) &&
+ (cmd & (GF_CLI_STATUS_VOL|GF_CLI_STATUS_ALL))) {
+ ret = cli_xml_output_vol_status_tasks (local, dict);
+ if (ret)
+ goto out;
+ }
+
+ /* </volume> */
+ ret = xmlTextWriterEndElement (local->writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+#if (HAVE_LIB_XML)
+int
+cli_xml_output_vol_top_rw_perf (xmlTextWriterPtr writer, dict_t *dict,
+ int brick_index, int member_index)
+{
+ int ret = -1;
+ char *filename = NULL;
+ uint64_t throughput = 0;
+ long int time_sec = 0;
+ long int time_usec = 0;
+ char timestr[256] = {0,};
+ char key[1024] = {0,};
+
+ /* <file> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"file");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ snprintf (key, sizeof (key), "%d-filename-%d", brick_index,
+ member_index);
+ ret = dict_get_str (dict, key, &filename);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"filename",
+ "%s", filename);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-value-%d", brick_index, member_index);
+ ret = dict_get_uint64 (dict, key, &throughput);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"count",
+ "%"PRIu64, throughput);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-time-sec-%d", brick_index,
+ member_index);
+ ret = dict_get_int32 (dict, key, (int32_t *)&time_sec);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-time-usec-%d", brick_index,
+ member_index);
+ ret = dict_get_int32 (dict, key, (int32_t *)&time_usec);
+ if (ret)
+ goto out;
+
+ gf_time_fmt (timestr, sizeof timestr, time_sec, gf_timefmt_FT);
+ snprintf (timestr + strlen (timestr),
+ sizeof timestr - strlen (timestr),
+ ".%"GF_PRI_SUSECONDS, time_usec);
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"time",
+ "%s", timestr);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* </file> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+cli_xml_output_vol_top_other (xmlTextWriterPtr writer, dict_t *dict,
+ int brick_index, int member_index)
+{
+ int ret = -1;
+ char *filename = NULL;
+ uint64_t count = 0;
+ char key[1024] = {0,};
+
+ /* <file> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"file");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ snprintf (key, sizeof (key), "%d-filename-%d", brick_index,
+ member_index);
+ ret = dict_get_str (dict, key, &filename);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"filename",
+ "%s", filename);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-value-%d", brick_index, member_index);
+ ret = dict_get_uint64 (dict, key, &count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"count",
+ "%"PRIu64, count);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* </file> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+#endif
+
+int
+cli_xml_output_vol_top (dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+ int brick_count = 0;
+ int top_op = GF_CLI_TOP_NONE;
+ char *brick_name = NULL;
+ int members = 0;
+ uint64_t current_open = 0;
+ uint64_t max_open = 0;
+ char *max_open_time = NULL;
+ double throughput = 0.0;
+ double time_taken = 0.0;
+ char key[1024] = {0,};
+ int i = 0;
+ int j = 0;
+
+ ret = cli_begin_xml_output (&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common (writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ /* <volTop> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"volTop");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = dict_get_int32 (dict, "count", &brick_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"brickCount",
+ "%d", brick_count);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = dict_get_int32 (dict, "1-top-op", &top_op);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"topOp",
+ "%d", top_op);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ while (i < brick_count) {
+ i++;
+
+ /* <brick> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"brick");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-brick", i);
+ ret = dict_get_str (dict, key, &brick_name);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"name",
+ "%s", brick_name);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key , sizeof (key), "%d-members", i);
+ ret = dict_get_int32 (dict, key, &members);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"members",
+ "%d", members);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ switch (top_op) {
+ case GF_CLI_TOP_OPEN:
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-current-open", i);
+ ret = dict_get_uint64 (dict, key, &current_open);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement
+ (writer, (xmlChar *)"currentOpen", "%"PRIu64,
+ current_open);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-max-open", i);
+ ret = dict_get_uint64 (dict, key, &max_open);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement
+ (writer, (xmlChar *)"maxOpen", "%"PRIu64,
+ max_open);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-max-openfd-time", i);
+ ret = dict_get_str (dict, key, &max_open_time);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement
+ (writer, (xmlChar *)"maxOpenTime", "%s",
+ max_open_time);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ case GF_CLI_TOP_READ:
+ case GF_CLI_TOP_WRITE:
+ case GF_CLI_TOP_OPENDIR:
+ case GF_CLI_TOP_READDIR:
+
+ break;
+
+ case GF_CLI_TOP_READ_PERF:
+ case GF_CLI_TOP_WRITE_PERF:
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-throughput", i);
+ ret = dict_get_double (dict, key, &throughput);
+ if (!ret) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-time", i);
+ ret = dict_get_double (dict, key, &time_taken);
+ }
+
+ if (!ret) {
+ ret = xmlTextWriterWriteFormatElement
+ (writer, (xmlChar *)"throughput",
+ "%f", throughput);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement
+ (writer, (xmlChar *)"timeTaken",
+ "%f", time_taken);
+ }
+
+ break;
+
+ default:
+ ret = -1;
+ goto out;
+ }
+
+ for (j = 1; j <= members; j++) {
+ if (top_op == GF_CLI_TOP_READ_PERF ||
+ top_op == GF_CLI_TOP_WRITE_PERF) {
+ ret = cli_xml_output_vol_top_rw_perf
+ (writer, dict, i, j);
+ } else {
+ ret = cli_xml_output_vol_top_other
+ (writer, dict, i, j);
+ }
+ if (ret)
+ goto out;
+ }
+
+
+ /* </brick> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ /* </volTop> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = cli_end_xml_output (writer, doc);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+#if (HAVE_LIB_XML)
+int
+cli_xml_output_vol_profile_stats (xmlTextWriterPtr writer, dict_t *dict,
+ int brick_index, int interval)
+{
+ int ret = -1;
+ uint64_t read_count = 0;
+ uint64_t write_count = 0;
+ uint64_t hits = 0;
+ double avg_latency = 0.0;
+ double max_latency = 0.0;
+ double min_latency = 0.0;
+ uint64_t duration = 0;
+ uint64_t total_read = 0;
+ uint64_t total_write = 0;
+ char key[1024] = {0};
+ int i = 0;
+
+ /* <cumulativeStats> || <intervalStats> */
+ if (interval == -1)
+ ret = xmlTextWriterStartElement (writer,
+ (xmlChar *)"cumulativeStats");
+ else
+ ret = xmlTextWriterStartElement (writer,
+ (xmlChar *)"intervalStats");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* <blockStats> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"blockStats");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ for (i = 0; i < 32; i++) {
+ /* <block> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"block");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement
+ (writer, (xmlChar *)"size", "%"PRIu32, (1 << i));
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-%d-read-%d", brick_index,
+ interval, (1 << i));
+ ret = dict_get_uint64 (dict, key, &read_count);
+ if (ret)
+ read_count = 0;
+ ret = xmlTextWriterWriteFormatElement
+ (writer, (xmlChar *)"reads", "%"PRIu64, read_count);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-%d-write-%d", brick_index,
+ interval, (1 << i));
+ ret = dict_get_uint64 (dict, key, &write_count);
+ if (ret)
+ write_count = 0;
+ ret = xmlTextWriterWriteFormatElement
+ (writer, (xmlChar *)"writes", "%"PRIu64, write_count);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* </block> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ /* </blockStats> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* <fopStats> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"fopStats");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ for (i = 0; i < GF_FOP_MAXVALUE; i++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-%d-%d-hits", brick_index,
+ interval, i);
+ ret = dict_get_uint64 (dict, key, &hits);
+ if (ret)
+ goto cont;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-%d-%d-avglatency", brick_index,
+ interval, i);
+ ret = dict_get_double (dict, key, &avg_latency);
+ if (ret)
+ goto cont;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-%d-%d-minlatency", brick_index,
+ interval, i);
+ ret = dict_get_double (dict, key, &min_latency);
+ if (ret)
+ goto cont;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-%d-%d-maxlatency", brick_index,
+ interval, i);
+ ret = dict_get_double (dict, key, &max_latency);
+ if (ret)
+ goto cont;
+
+ /* <fop> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"fop");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement
+ (writer, (xmlChar *)"name","%s", gf_fop_list[i]);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement
+ (writer, (xmlChar *)"hits", "%"PRIu64, hits);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement
+ (writer, (xmlChar *)"avgLatency", "%f", avg_latency);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement
+ (writer, (xmlChar *)"minLatency", "%f", min_latency);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement
+ (writer, (xmlChar *)"maxLatency", "%f", max_latency);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* </fop> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+cont:
+ hits = 0;
+ avg_latency = 0.0;
+ min_latency = 0.0;
+ max_latency = 0.0;
+ }
+
+ /* </fopStats> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-%d-duration", brick_index, interval);
+ ret = dict_get_uint64 (dict, key, &duration);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"duration",
+ "%"PRIu64, duration);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-%d-total-read", brick_index, interval);
+ ret = dict_get_uint64 (dict, key, &total_read);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"totalRead",
+ "%"PRIu64, total_read);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-%d-total-write", brick_index, interval);
+ ret = dict_get_uint64 (dict, key, &total_write);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"totalWrite",
+ "%"PRIu64, total_write);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* </cumulativeStats> || </intervalStats> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+#endif
+
+int
+cli_xml_output_vol_profile (dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+ char *volname = NULL;
+ int op = GF_CLI_STATS_NONE;
+ int brick_count = 0;
+ char *brick_name = NULL;
+ int interval = 0;
+ char key[1024] = {0,};
+ int i = 0;
+
+ ret = cli_begin_xml_output (&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common (writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ /* <volProfile> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"volProfile");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"volname",
+ "%s", volname);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = dict_get_int32 (dict, "op", &op);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"profileOp",
+ "%d", op);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ if (op != GF_CLI_STATS_INFO)
+ goto cont;
+
+ ret = dict_get_int32 (dict, "count", &brick_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"brickCount",
+ "%d", brick_count);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ while (i < brick_count) {
+ i++;
+
+ /* <brick> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"brick");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ snprintf (key, sizeof (key), "%d-brick", i);
+ ret = dict_get_str (dict, key, &brick_name);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement
+ (writer, (xmlChar *)"brickName", "%s", brick_name);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ snprintf (key, sizeof (key), "%d-cumulative", i);
+ ret = dict_get_int32 (dict, key, &interval);
+ if (ret == 0) {
+ ret = cli_xml_output_vol_profile_stats
+ (writer, dict, i, interval);
+ if (ret)
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-interval", i);
+ ret = dict_get_int32 (dict, key, &interval);
+ if (ret == 0) {
+ ret = cli_xml_output_vol_profile_stats
+ (writer, dict, i, interval);
+ if (ret)
+ goto out;
+ }
+
+ /* </brick> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+cont:
+ /* </volProfile> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = cli_end_xml_output (writer, doc);
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+int
+cli_xml_output_vol_list (dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+ int count = 0;
+ char *volname = NULL;
+ char key[1024] = {0,};
+ int i = 0;
+
+ ret = cli_begin_xml_output (&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common (writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ /* <volList> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"volList");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = dict_get_int32 (dict, "count", &count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"count",
+ "%d", count);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ for (i = 0; i < count; i++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d", i);
+ ret = dict_get_str (dict, key, &volname);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"volume",
+ "%s", volname);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ /* </volList> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = cli_end_xml_output (writer, doc);
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+#if (HAVE_LIB_XML)
+int
+cli_xml_output_vol_info_option (xmlTextWriterPtr writer, char *substr,
+ char *optstr, char *valstr)
+{
+ int ret = -1;
+ char *ptr1 = NULL;
+ char *ptr2 = NULL;
+
+ ptr1 = substr;
+ ptr2 = optstr;
+
+ while (ptr1) {
+ if (*ptr1 != *ptr2)
+ break;
+ ptr1++;
+ ptr2++;
+ if (!ptr1)
+ goto out;
+ if (!ptr2)
+ goto out;
+ }
+ if (*ptr2 == '\0')
+ goto out;
+
+ /* <option> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"option");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"name",
+ "%s", ptr2);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"value",
+ "%s", valstr);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* </option> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+struct tmp_xml_option_logger {
+ char *key;
+ xmlTextWriterPtr writer;
+};
+
+static int
+_output_vol_info_option (dict_t *d, char *k, data_t *v,
+ void *data)
+{
+ int ret = 0;
+ char *ptr = NULL;
+ struct tmp_xml_option_logger *tmp = NULL;
+
+ tmp = data;
+
+ ptr = strstr (k, "option.");
+ if (!ptr)
+ goto out;
+
+ if (!v) {
+ ret = -1;
+ goto out;
+ }
+ ret = cli_xml_output_vol_info_option (tmp->writer, tmp->key, k,
+ v->data);
+
+out:
+ return ret;
+}
+
+int
+cli_xml_output_vol_info_options (xmlTextWriterPtr writer, dict_t *dict,
+ char *prefix)
+{
+ int ret = -1;
+ int opt_count = 0;
+ char key[1024] = {0,};
+ struct tmp_xml_option_logger tmp = {0,};
+
+ snprintf (key, sizeof (key), "%s.opt_count", prefix);
+ ret = dict_get_int32 (dict, key, &opt_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"optCount",
+ "%d", opt_count);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* <options> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"options");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ snprintf (key, sizeof (key), "%s.option.", prefix);
+
+ tmp.key = key;
+ tmp.writer = writer;
+ ret = dict_foreach (dict, _output_vol_info_option, &tmp);
+ if (ret)
+ goto out;
+
+ /* </options> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+#endif
+
+int
+cli_xml_output_vol_info (cli_local_t *local, dict_t *dict)
+{
+#if (HAVE_LIB_XML)
+ int ret = 0;
+ int count = 0;
+ char *volname = NULL;
+ char *volume_id = NULL;
+ char *uuid = NULL;
+ int type = 0;
+ int status = 0;
+ int brick_count = 0;
+ int dist_count = 0;
+ int stripe_count = 0;
+ int replica_count = 0;
+ int transport = 0;
+ char *brick = NULL;
+ char key[1024] = {0,};
+ int i = 0;
+ int j = 1;
+ char *caps = NULL;
+ int k __attribute__((unused)) = 0;
+ char *snap_volume = NULL;
+
+ ret = dict_get_int32 (dict, "count", &count);
+ if (ret)
+ goto out;
+
+ for (i = 0; i < count; i++) {
+ /* <volume> */
+ ret = xmlTextWriterStartElement (local->writer,
+ (xmlChar *)"volume");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.name", i);
+ ret = dict_get_str (dict, key, &volname);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (local->writer,
+ (xmlChar *)"name",
+ "%s", volname);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.volume_id", i);
+ ret = dict_get_str (dict, key, &volume_id);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (local->writer,
+ (xmlChar *)"id",
+ "%s", volume_id);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.status", i);
+ ret = dict_get_int32 (dict, key, &status);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (local->writer,
+ (xmlChar *)"status",
+ "%d", status);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.snap_volume", i);
+ ret = dict_get_str (dict, key, &snap_volume);
+ if (ret)
+ goto out;
+ if (snap_volume) {
+ ret = xmlTextWriterWriteFormatElement (local->writer,
+ (xmlChar *)"snapVol",
+ "%s", snap_volume);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ ret =xmlTextWriterWriteFormatElement
+ (local->writer, (xmlChar *)"statusStr", "%s",
+ cli_vol_status_str[status]);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.brick_count", i);
+ ret = dict_get_int32 (dict, key, &brick_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (local->writer,
+ (xmlChar *)"brickCount",
+ "%d", brick_count);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.dist_count", i);
+ ret = dict_get_int32 (dict, key, &dist_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (local->writer,
+ (xmlChar *)"distCount",
+ "%d", dist_count);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.stripe_count", i);
+ ret = dict_get_int32 (dict, key, &stripe_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (local->writer,
+ (xmlChar *)"stripeCount",
+ "%d", stripe_count);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.replica_count", i);
+ ret = dict_get_int32 (dict, key, &replica_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (local->writer,
+ (xmlChar *)"replicaCount",
+ "%d", replica_count);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.type", i);
+ ret = dict_get_int32 (dict, key, &type);
+ if (ret)
+ goto out;
+ /* For Distributed-(stripe,replicate,stipe-replicate) types */
+ if ((type > 0) && (dist_count < brick_count))
+ type += 3;
+ ret = xmlTextWriterWriteFormatElement (local->writer,
+ (xmlChar *)"type",
+ "%d", type);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (local->writer,
+ (xmlChar *)"typeStr",
+ "%s",
+ cli_vol_type_str[type]);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.transport", i);
+ ret = dict_get_int32 (dict, key, &transport);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (local->writer,
+ (xmlChar *)"transport",
+ "%d", transport);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+#ifdef HAVE_BD_XLATOR
+ /* <xlators> */
+ ret = xmlTextWriterStartElement (local->writer,
+ (xmlChar *)"xlators");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ for (k = 0; ; k++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key),"volume%d.xlator%d", i, k);
+ ret = dict_get_str (dict, key, &caps);
+ if (ret)
+ break;
+
+ /* <xlator> */
+ ret = xmlTextWriterStartElement (local->writer,
+ (xmlChar *)"xlator");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement
+ (local->writer, (xmlChar *)"name", "%s", caps);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* <capabilities> */
+ ret = xmlTextWriterStartElement (local->writer,
+ (xmlChar *)
+ "capabilities");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ j = 0;
+ for (j = 0; ;j++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key),
+ "volume%d.xlator%d.caps%d", i, k, j);
+ ret = dict_get_str (dict, key, &caps);
+ if (ret)
+ break;
+ ret = xmlTextWriterWriteFormatElement
+ (local->writer, (xmlChar *)"capability",
+ "%s", caps);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+ /* </capabilities> */
+ ret = xmlTextWriterEndElement (local->writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ /* </xlator> */
+ ret = xmlTextWriterEndElement (local->writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+ ret = xmlTextWriterFullEndElement (local->writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ /* </xlators> */
+#else
+ caps = 0; /* Avoid compiler warnings when BD not enabled */
+#endif
+ j = 1;
+
+ /* <bricks> */
+ ret = xmlTextWriterStartElement (local->writer,
+ (xmlChar *)"bricks");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ while (j <= brick_count) {
+ ret = xmlTextWriterStartElement
+ (local->writer, (xmlChar *)"brick");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.brick%d.uuid",
+ i, j);
+ ret = dict_get_str (dict, key, &uuid);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatAttribute
+ (local->writer, (xmlChar *)"uuid", "%s",
+ uuid);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.brick%d", i, j);
+ ret = dict_get_str (dict, key, &brick);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatString
+ (local->writer, "%s", brick);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* </brick> */
+ ret = xmlTextWriterEndElement (local->writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ j++;
+ }
+ /* </bricks> */
+ ret = xmlTextWriterEndElement (local->writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d", i);
+ ret = cli_xml_output_vol_info_options (local->writer, dict,
+ key);
+ if (ret)
+ goto out;
+
+ /* </volume> */
+ ret = xmlTextWriterEndElement (local->writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ if (volname) {
+ GF_FREE (local->get_vol.volname);
+ local->get_vol.volname = gf_strdup (volname);
+ local->vol_count += count;
+ }
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+int
+cli_xml_output_vol_info_begin (cli_local_t *local, int op_ret, int op_errno,
+ char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+
+ GF_ASSERT (local);
+
+ ret = cli_begin_xml_output (&(local->writer), &(local->doc));
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common (local->writer, op_ret, op_errno,
+ op_errstr);
+ if (ret)
+ goto out;
+
+ /* <volInfo> */
+ ret = xmlTextWriterStartElement (local->writer, (xmlChar *)"volInfo");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* <volumes> */
+ ret = xmlTextWriterStartElement (local->writer, (xmlChar *)"volumes");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* Init vol count */
+ local->vol_count = 0;
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+int
+cli_xml_output_vol_info_end (cli_local_t *local)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+
+ GF_ASSERT (local);
+
+ ret = xmlTextWriterWriteFormatElement (local->writer,
+ (xmlChar *)"count",
+ "%d", local->vol_count);
+
+ /* </volumes> */
+ ret = xmlTextWriterEndElement (local->writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* </volInfo> */
+ ret = xmlTextWriterEndElement (local->writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = cli_end_xml_output (local->writer, local->doc);
+
+out:
+ gf_log ("cli", GF_LOG_ERROR, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+int
+cli_xml_output_vol_quota_limit_list (char *volname, char *limit_list,
+ int op_ret, int op_errno,
+ char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+ int64_t size = 0;
+ int64_t limit_value = 0;
+ int i = 0;
+ int j = 0;
+ int k = 0;
+ int len = 0;
+ char *size_str = NULL;
+ char path[PATH_MAX] = {0,};
+ char ret_str[1024] = {0,};
+ char value[1024] = {0,};
+ char mountdir[] = "/tmp/mountXXXXXX";
+ char abspath[PATH_MAX] = {0,};
+ runner_t runner = {0,};
+
+ GF_ASSERT (volname);
+ GF_ASSERT (limit_list);
+
+ ret = cli_begin_xml_output (&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common (writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ /* <volQuota> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"volQuota");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ if (!limit_list)
+ goto cont;
+
+ len = strlen (limit_list);
+ if (len == 0)
+ goto cont;
+
+ if (mkdtemp (mountdir) == NULL) {
+ gf_log ("cli", GF_LOG_ERROR, "failed to create a temporary"
+ " mount directory");
+ ret = -1;
+ goto out;
+ }
+
+ ret = runcmd (SBIN_DIR"/glusterfs", "-s", "localhost",
+ "--volfile-id", volname, "-l",
+ DEFAULT_LOG_FILE_DIRECTORY"/quota-list-xml.log",
+ mountdir, NULL);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "failed to mount glusterfs client");
+ ret = -1;
+ goto rm_dir;
+ }
+
+ while (i < len) {
+ j = 0;
+ k = 0;
+ size = 0;
+
+ while (limit_list[i] != ':')
+ path[k++] = limit_list[i++];
+ path[k] = '\0';
+
+ i++;
+
+ while (limit_list[i] != ',' && limit_list[i] != '\0')
+ value[j++] = limit_list[i++];
+ i++;
+
+ snprintf (abspath, sizeof (abspath), "%s/%s", mountdir, path);
+ ret = sys_lgetxattr (abspath, "trusted.limit.list",
+ (void *)ret_str, 4096);
+ if (ret >= 0) {
+ sscanf (ret_str, "%"SCNd64",%"SCNd64, &size,
+ &limit_value);
+ size_str = gf_uint64_2human_readable ((uint64_t)size);
+ }
+
+ /* <quota> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"quota");
+ XML_RET_CHECK_AND_GOTO (ret, unmount);
+
+ ret = xmlTextWriterWriteFormatElement
+ (writer, (xmlChar *)"path", "%s", path);
+ XML_RET_CHECK_AND_GOTO (ret, unmount);
+
+ ret = xmlTextWriterWriteFormatElement
+ (writer, (xmlChar *)"limit", "%s", value);
+ XML_RET_CHECK_AND_GOTO (ret, unmount);
+
+ if (size_str) {
+ ret = xmlTextWriterWriteFormatElement
+ (writer, (xmlChar *)"size", "%s", size_str);
+ XML_RET_CHECK_AND_GOTO (ret, unmount);
+ GF_FREE (size_str);
+ } else {
+ ret = xmlTextWriterWriteFormatElement
+ (writer, (xmlChar *)"size", "%"PRId64, size);
+ XML_RET_CHECK_AND_GOTO (ret, unmount);
+ }
+
+ /* </quota> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, unmount);
+
+ }
+
+unmount:
+ runinit (&runner);
+ runner_add_args (&runner, "umount",
+#if GF_LINUX_HOST_OS
+ "-l",
+#endif
+ mountdir, NULL);
+ ret = runner_run_reuse (&runner);
+ if (ret)
+ runner_log (&runner, "cli", GF_LOG_WARNING, "error executing");
+ runner_end (&runner);
+
+rm_dir:
+ rmdir (mountdir);
+
+cont:
+ /* </volQuota> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = cli_end_xml_output (writer, doc);
+
+out:
+ GF_FREE (size_str);
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+int
+cli_xml_output_peer_status (dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+ int count = 0;
+ char *uuid = NULL;
+ char *hostname = NULL;
+ int connected = 0;
+ int state_id = 0;
+ char *state_str = NULL;
+ int i = 1;
+ char key[1024] = {0,};
+
+ ret = cli_begin_xml_output (&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common (writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ /* <peerStatus> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"peerStatus");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ if (!dict)
+ goto cont;
+
+ ret = dict_get_int32 (dict, "count", &count);
+ if (ret)
+ goto out;
+
+ while (i <= count) {
+ /* <peer> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"peer");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "friend%d.uuid", i);
+ ret = dict_get_str (dict, key, &uuid);
+ if (ret)
+ goto out;
+
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"uuid",
+ "%s", uuid);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "friend%d.hostname", i);
+ ret = dict_get_str (dict, key, &hostname);
+ if (ret)
+ goto out;
+
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"hostname",
+ "%s", hostname);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "friend%d.connected", i);
+ ret = dict_get_int32 (dict, key, &connected);
+ if (ret)
+ goto out;
+
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"connected",
+ "%d", connected);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "friend%d.stateId", i);
+ ret = dict_get_int32 (dict, key, &state_id);
+ if (!ret) {
+ /* ignore */
+
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"state", "%d", state_id);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "friend%d.state", i);
+ ret = dict_get_str (dict, key, &state_str);
+ if (!ret) {
+ /* ignore */
+
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"stateStr", "%s", state_str);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ /* </peer> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ i++;
+ }
+
+cont:
+ /* </peerStatus> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = cli_end_xml_output (writer, doc);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+#if (HAVE_LIB_XML)
+/* Used for rebalance stop/status, remove-brick status */
+int
+cli_xml_output_vol_rebalance_status (xmlTextWriterPtr writer, dict_t *dict,
+ enum gf_task_types task_type)
+{
+ int ret = -1;
+ int count = 0;
+ char *node_name = NULL;
+ char *node_uuid = NULL;
+ uint64_t files = 0;
+ uint64_t size = 0;
+ uint64_t lookups = 0;
+ int status_rcd = 0;
+ uint64_t failures = 0;
+ uint64_t skipped = 0;
+ uint64_t total_files = 0;
+ uint64_t total_size = 0;
+ uint64_t total_lookups = 0;
+ uint64_t total_failures = 0;
+ uint64_t total_skipped = 0;
+ char key[1024] = {0,};
+ int i = 0;
+ int overall_status = -1;
+ double elapsed = 0;
+ double overall_elapsed = 0;
+
+ if (!dict) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "count", &count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"nodeCount",
+ "%d", count);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ while (i < count) {
+ i++;
+
+ /* <node> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"node");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "node-name-%d", i);
+ ret = dict_get_str (dict, key, &node_name);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"nodeName",
+ "%s", node_name);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "node-uuid-%d", i);
+ ret = dict_get_str (dict, key, &node_uuid);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"id",
+ "%s", node_uuid);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "files-%d", i);
+ ret = dict_get_uint64 (dict, key, &files);
+ if (ret)
+ goto out;
+ total_files += files;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"files",
+ "%"PRIu64, files);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "size-%d", i);
+ ret = dict_get_uint64 (dict, key, &size);
+ if (ret)
+ goto out;
+ total_size += size;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"size",
+ "%"PRIu64,size);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "lookups-%d", i);
+ ret = dict_get_uint64 (dict, key, &lookups);
+ if (ret)
+ goto out;
+ total_lookups += lookups;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"lookups",
+ "%"PRIu64, lookups);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "failures-%d", i);
+ ret = dict_get_uint64 (dict, key, &failures);
+ if (ret)
+ goto out;
+ total_failures += failures;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"failures",
+ "%"PRIu64, failures);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* skipped-%d is not available for remove brick in dict,
+ so using failures as skipped count in case of remove-brick
+ similar to logic used in CLI(non xml output) */
+ if (task_type == GF_TASK_TYPE_REBALANCE) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "skipped-%d", i);
+ }
+ else {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "failures-%d", i);
+ }
+
+ ret = dict_get_uint64 (dict, key, &skipped);
+ if (ret)
+ goto out;
+ total_skipped += skipped;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"skipped",
+ "%"PRIu64, skipped);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "status-%d", i);
+ ret = dict_get_int32 (dict, key, &status_rcd);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"status",
+ "%d", status_rcd);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"statusStr",
+ "%s",
+ cli_vol_task_status_str[status_rcd]);
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "run-time-%d", i);
+ ret = dict_get_double (dict, key, &elapsed);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"runtime",
+ "%.2f", elapsed);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ if (elapsed > overall_elapsed) {
+ overall_elapsed = elapsed;
+ }
+
+ if (-1 == overall_status)
+ overall_status = status_rcd;
+ else if ((GF_DEFRAG_STATUS_COMPLETE == overall_status ||
+ status_rcd > overall_status) &&
+ (status_rcd != GF_DEFRAG_STATUS_COMPLETE))
+ overall_status = status_rcd;
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* </node> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ /* Aggregate status */
+ /* <aggregate> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"aggregate");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer,(xmlChar *)"files",
+ "%"PRIu64, total_files);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer,(xmlChar *)"size",
+ "%"PRIu64, total_size);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer,(xmlChar *)"lookups",
+ "%"PRIu64, total_lookups);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer,(xmlChar *)"failures",
+ "%"PRIu64, total_failures);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer,(xmlChar *)"skipped",
+ "%"PRIu64, total_skipped);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer,(xmlChar *)"status",
+ "%d", overall_status);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer,(xmlChar *)"statusStr",
+ "%s",
+ cli_vol_task_status_str[overall_status]);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer,(xmlChar *)"runtime",
+ "%.2f", overall_elapsed);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* </aggregate> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+#endif
+
+int
+cli_xml_output_vol_rebalance (gf_cli_defrag_type op, dict_t *dict, int op_ret,
+ int op_errno, char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+ char *task_id_str = NULL;
+
+ ret = cli_begin_xml_output (&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common (writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ /* <volRebalance> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"volRebalance");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = dict_get_str (dict, GF_REBALANCE_TID_KEY, &task_id_str);
+ if (ret == 0) {
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"task-id",
+ "%s", task_id_str);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"op",
+ "%d", op);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ if ((GF_DEFRAG_CMD_STOP == op) || (GF_DEFRAG_CMD_STATUS == op)) {
+ ret = cli_xml_output_vol_rebalance_status (writer, dict,
+ GF_TASK_TYPE_REBALANCE);
+ if (ret)
+ goto out;
+ }
+
+ /* </volRebalance> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+
+ ret = cli_end_xml_output (writer, doc);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+int
+cli_xml_output_vol_remove_brick (gf_boolean_t status_op, dict_t *dict,
+ int op_ret, int op_errno, char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+ char *task_id_str = NULL;
+
+ ret = cli_begin_xml_output (&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common (writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ /* <volRemoveBrick> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"volRemoveBrick");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = dict_get_str (dict, GF_REMOVE_BRICK_TID_KEY, &task_id_str);
+ if (ret == 0) {
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"task-id",
+ "%s", task_id_str);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ if (status_op) {
+ ret = cli_xml_output_vol_rebalance_status (writer, dict,
+ GF_TASK_TYPE_REMOVE_BRICK);
+ if (ret)
+ goto out;
+ }
+
+ /* </volRemoveBrick> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+
+ ret = cli_end_xml_output (writer, doc);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+int
+cli_xml_output_vol_replace_brick (gf1_cli_replace_op op, dict_t *dict,
+ int op_ret, int op_errno, char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ int status = 0;
+ uint64_t files = 0;
+ char *current_file = 0;
+ char *task_id_str = NULL;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+
+ ret = cli_begin_xml_output (&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common (writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ /* <volReplaceBrick> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"volReplaceBrick");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = dict_get_str (dict, GF_REPLACE_BRICK_TID_KEY, &task_id_str);
+ if (ret == 0) {
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"task-id",
+ "%s", task_id_str);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"op",
+ "%d", op);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ if (GF_REPLACE_OP_STATUS == op) {
+ ret = dict_get_int32 (dict, "status", &status);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"status",
+ "%d", status);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = dict_get_uint64 (dict, "files", &files);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"files",
+ "%"PRIu64, files);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ if (status)
+ goto cont;
+
+ ret = dict_get_str (dict, "current_file", &current_file);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"currentFile",
+ "%s", current_file);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+cont:
+ /* </volReplaceBrick> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = cli_end_xml_output (writer, doc);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+int
+cli_xml_output_vol_create (dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+ char *volname = NULL;
+ char *volid = NULL;
+
+ ret = cli_begin_xml_output (&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common (writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ if (dict) {
+ /* <volCreate> */
+ ret = xmlTextWriterStartElement (writer,
+ (xmlChar *)"volCreate");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* <volume> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"volume");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *) "name",
+ "%s", volname);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = dict_get_str (dict, "volume-id", &volid);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"id",
+ "%s", volid);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* </volume> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* </volCreate> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ ret = cli_end_xml_output (writer, doc);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+int
+cli_xml_output_generic_volume (char *op, dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+ char *volname = NULL;
+ char *volid = NULL;
+
+ GF_ASSERT (op);
+
+ ret = cli_begin_xml_output (&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common (writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ if (dict) {
+ /* <"op"> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)op);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* <volume> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"volume");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *) "name",
+ "%s", volname);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = dict_get_str (dict, "vol-id", &volid);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"id",
+ "%s", volid);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* </volume> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* </"op"> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ ret = cli_end_xml_output (writer, doc);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+#if (HAVE_LIB_XML)
+int
+cli_xml_output_vol_gsync_status (dict_t *dict, xmlTextWriterPtr writer)
+{
+ char master_key[PATH_MAX] = "";
+ char slave_key[PATH_MAX] = "";
+ char status_key[PATH_MAX] = "";
+ char node_key[PATH_MAX] = "";
+ char *master = NULL;
+ char *slave = NULL;
+ char *status = NULL;
+ char *node = NULL;
+ int ret = -1;
+ int gsync_count = 0;
+ int i = 1;
+
+ ret = dict_get_int32 (dict, "gsync-count", &gsync_count);
+ if (ret)
+ goto out;
+
+ for (i=1; i <= gsync_count; i++) {
+ snprintf (node_key, sizeof(node_key), "node%d", i);
+ snprintf (master_key, sizeof(master_key), "master%d", i);
+ snprintf (slave_key, sizeof(slave_key), "slave%d", i);
+ snprintf (status_key, sizeof(status_key), "status%d", i);
+
+ ret = dict_get_str (dict, node_key, &node);
+ if (ret)
+ goto out;
+
+ ret = dict_get_str (dict, master_key, &master);
+ if (ret)
+ goto out;
+
+ ret = dict_get_str (dict, slave_key, &slave);
+ if (ret)
+ goto out;
+
+ ret = dict_get_str (dict, status_key, &status);
+ if (ret)
+ goto out;
+
+ /* <pair> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"pair");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"node",
+ "%s", node);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"master",
+ "%s", master);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"slave",
+ "%s", slave);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"status",
+ "%s", status);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* </pair> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ }
+
+out:
+ gf_log ("cli",GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+#endif
+
+int
+cli_xml_output_vol_gsync (dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+ char *master = NULL;
+ char *slave = NULL;
+ int type = 0;
+
+ GF_ASSERT (dict);
+
+ ret = cli_begin_xml_output (&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common (writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ /* <geoRep> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"geoRep");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = dict_get_int32 (dict, "type", &type);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to get type");
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"type",
+ "%d", type);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ switch (type) {
+ case GF_GSYNC_OPTION_TYPE_START:
+ case GF_GSYNC_OPTION_TYPE_STOP:
+ if (dict_get_str (dict, "master", &master) != 0)
+ master = "???";
+ if (dict_get_str (dict, "slave", &slave) != 0)
+ slave = "???";
+
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"master",
+ "%s", master);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"slave",
+ "%s", slave);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ break;
+
+ case GF_GSYNC_OPTION_TYPE_CONFIG:
+ break;
+ case GF_GSYNC_OPTION_TYPE_STATUS:
+ ret = cli_xml_output_vol_gsync_status(dict, writer);
+ break;
+ default:
+ ret = 0;
+ break;
+ }
+
+ /* </geoRep> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = cli_end_xml_output (writer, doc);
+out:
+ gf_log ("cli",GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
diff --git a/cli/src/cli.c b/cli/src/cli.c
new file mode 100644
index 000000000..91b315ff1
--- /dev/null
+++ b/cli/src/cli.c
@@ -0,0 +1,666 @@
+/*
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <netinet/in.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/resource.h>
+#include <sys/file.h>
+#include <netdb.h>
+#include <signal.h>
+#include <libgen.h>
+
+#include <sys/utsname.h>
+
+#include <stdint.h>
+#include <pthread.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <time.h>
+#include <semaphore.h>
+#include <errno.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef HAVE_MALLOC_H
+#include <malloc.h>
+#endif
+
+#ifdef HAVE_MALLOC_STATS
+#ifdef DEBUG
+#include <mcheck.h>
+#endif
+#endif
+
+#include "cli.h"
+#include "cli-cmd.h"
+#include "cli-mem-types.h"
+
+#include "xlator.h"
+#include "glusterfs.h"
+#include "compat.h"
+#include "logging.h"
+#include "dict.h"
+#include "list.h"
+#include "timer.h"
+#include "stack.h"
+#include "revision.h"
+#include "common-utils.h"
+#include "event.h"
+#include "globals.h"
+#include "syscall.h"
+#include "call-stub.h"
+#include <fnmatch.h>
+
+#include "xdr-generic.h"
+
+extern int connected;
+/* using argp for command line parsing */
+
+const char *argp_program_version = "" \
+ PACKAGE_NAME" "PACKAGE_VERSION" built on "__DATE__" "__TIME__ \
+ "\nRepository revision: " GLUSTERFS_REPOSITORY_REVISION "\n" \
+ "Copyright (c) 2006-2011 Gluster Inc. " \
+ "<http://www.gluster.com>\n" \
+ "GlusterFS comes with ABSOLUTELY NO WARRANTY.\n" \
+ "You may redistribute copies of GlusterFS under the terms of "\
+ "the GNU General Public License.";
+
+const char *argp_program_bug_address = "<" PACKAGE_BUGREPORT ">";
+
+
+
+struct rpc_clnt *global_rpc;
+
+rpc_clnt_prog_t *cli_rpc_prog;
+
+
+extern struct rpc_clnt_program cli_prog;
+
+static int
+glusterfs_ctx_defaults_init (glusterfs_ctx_t *ctx)
+{
+ cmd_args_t *cmd_args = NULL;
+ struct rlimit lim = {0, };
+ call_pool_t *pool = NULL;
+
+ xlator_mem_acct_init (THIS, cli_mt_end);
+
+ ctx->process_uuid = generate_glusterfs_ctx_id ();
+ if (!ctx->process_uuid)
+ return -1;
+
+ ctx->page_size = 128 * GF_UNIT_KB;
+
+ ctx->iobuf_pool = iobuf_pool_new ();
+ if (!ctx->iobuf_pool)
+ return -1;
+
+ ctx->event_pool = event_pool_new (DEFAULT_EVENT_POOL_SIZE);
+ if (!ctx->event_pool)
+ return -1;
+
+ pool = GF_CALLOC (1, sizeof (call_pool_t),
+ cli_mt_call_pool_t);
+ if (!pool)
+ return -1;
+
+ /* frame_mem_pool size 112 * 64 */
+ pool->frame_mem_pool = mem_pool_new (call_frame_t, 32);
+ if (!pool->frame_mem_pool)
+ return -1;
+
+ /* stack_mem_pool size 256 * 128 */
+ pool->stack_mem_pool = mem_pool_new (call_stack_t, 16);
+
+ if (!pool->stack_mem_pool)
+ return -1;
+
+ ctx->stub_mem_pool = mem_pool_new (call_stub_t, 16);
+ if (!ctx->stub_mem_pool)
+ return -1;
+
+ ctx->dict_pool = mem_pool_new (dict_t, 32);
+ if (!ctx->dict_pool)
+ return -1;
+
+ ctx->dict_pair_pool = mem_pool_new (data_pair_t, 512);
+ if (!ctx->dict_pair_pool)
+ return -1;
+
+ ctx->dict_data_pool = mem_pool_new (data_t, 512);
+ if (!ctx->dict_data_pool)
+ return -1;
+
+ INIT_LIST_HEAD (&pool->all_frames);
+ LOCK_INIT (&pool->lock);
+ ctx->pool = pool;
+
+ pthread_mutex_init (&(ctx->lock), NULL);
+
+ cmd_args = &ctx->cmd_args;
+
+ INIT_LIST_HEAD (&cmd_args->xlator_options);
+
+ lim.rlim_cur = RLIM_INFINITY;
+ lim.rlim_max = RLIM_INFINITY;
+ setrlimit (RLIMIT_CORE, &lim);
+
+ return 0;
+}
+
+
+static int
+logging_init (glusterfs_ctx_t *ctx, struct cli_state *state)
+{
+ char *log_file = state->log_file ? state->log_file :
+ DEFAULT_CLI_LOG_FILE_DIRECTORY "/cli.log";
+
+ /* passing ident as NULL means to use default ident for syslog */
+ if (gf_log_init (ctx, log_file, NULL) == -1) {
+ fprintf (stderr, "ERROR: failed to open logfile %s\n",
+ log_file);
+ return -1;
+ }
+
+ /* CLI should not have something to DEBUG after the release,
+ hence defaulting to INFO loglevel */
+ gf_log_set_loglevel ((state->log_level == -1) ? GF_LOG_INFO :
+ state->log_level);
+
+ return 0;
+}
+
+int
+cli_submit_request (void *req, call_frame_t *frame,
+ rpc_clnt_prog_t *prog,
+ int procnum, struct iobref *iobref,
+ xlator_t *this, fop_cbk_fn_t cbkfn, xdrproc_t xdrproc)
+{
+ int ret = -1;
+ int count = 0;
+ struct iovec iov = {0, };
+ struct iobuf *iobuf = NULL;
+ char new_iobref = 0;
+ ssize_t xdr_size = 0;
+
+ GF_ASSERT (this);
+
+ if (req) {
+ xdr_size = xdr_sizeof (xdrproc, req);
+ iobuf = iobuf_get2 (this->ctx->iobuf_pool, xdr_size);
+ if (!iobuf) {
+ goto out;
+ };
+
+ if (!iobref) {
+ iobref = iobref_new ();
+ if (!iobref) {
+ goto out;
+ }
+
+ new_iobref = 1;
+ }
+
+ iobref_add (iobref, iobuf);
+
+ iov.iov_base = iobuf->ptr;
+ iov.iov_len = iobuf_size (iobuf);
+
+
+ /* Create the xdr payload */
+ ret = xdr_serialize_generic (iov, req, xdrproc);
+ if (ret == -1) {
+ goto out;
+ }
+ iov.iov_len = ret;
+ count = 1;
+ }
+
+ /* Send the msg */
+ ret = rpc_clnt_submit (global_rpc, prog, procnum, cbkfn,
+ &iov, count,
+ NULL, 0, iobref, frame, NULL, 0, NULL, 0, NULL);
+ ret = 0;
+
+out:
+ if (new_iobref)
+ iobref_unref (iobref);
+ if (iobuf)
+ iobuf_unref (iobuf);
+ return ret;
+}
+
+int
+cli_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
+ void *data)
+{
+ xlator_t *this = NULL;
+ int ret = 0;
+
+ this = mydata;
+
+ switch (event) {
+ case RPC_CLNT_CONNECT:
+ {
+
+ cli_cmd_broadcast_connected ();
+ gf_log (this->name, GF_LOG_TRACE, "got RPC_CLNT_CONNECT");
+ break;
+ }
+
+ case RPC_CLNT_DISCONNECT:
+ {
+ gf_log (this->name, GF_LOG_TRACE, "got RPC_CLNT_DISCONNECT");
+ connected = 0;
+ if (!global_state->prompt && global_state->await_connected) {
+ ret = 1;
+ cli_out ("Connection failed. Please check if gluster "
+ "daemon is operational.");
+ exit (ret);
+ }
+ break;
+ }
+
+ default:
+ gf_log (this->name, GF_LOG_TRACE,
+ "got some other RPC event %d", event);
+ ret = 0;
+ break;
+ }
+
+ return ret;
+}
+
+
+/*
+ * ret: 0: option successfully processed
+ * 1: signalling end of option list
+ * -1: unknown option or other issue
+ */
+int
+cli_opt_parse (char *opt, struct cli_state *state)
+{
+ char *oarg;
+
+ if (strcmp (opt, "") == 0)
+ return 1;
+
+ if (strcmp (opt, "version") == 0) {
+ cli_out ("%s", argp_program_version);
+ exit (0);
+ }
+
+ if (strcmp (opt, "print-logdir") == 0) {
+ cli_out ("%s", DEFAULT_LOG_FILE_DIRECTORY);
+ exit (0);
+ }
+
+ if (strcmp (opt, "print-statedumpdir") == 0) {
+ cli_out ("%s", DEFAULT_VAR_RUN_DIRECTORY);
+ exit (0);
+ }
+
+ if (strcmp (opt, "xml") == 0) {
+#if (HAVE_LIB_XML)
+ state->mode |= GLUSTER_MODE_XML;
+#else
+ cli_err ("XML output not supported. Ignoring '--xml' option");
+#endif
+ return 0;
+ }
+
+ oarg = strtail (opt, "mode=");
+ if (oarg) {
+ if (strcmp (oarg, "script") == 0) {
+ state->mode |= GLUSTER_MODE_SCRIPT;
+ return 0;
+ }
+ if (strcmp (oarg, "interactive") == 0)
+ return 0;
+ return -1;
+ }
+
+ oarg = strtail (opt, "remote-host=");
+ if (oarg) {
+ state->remote_host = oarg;
+ return 0;
+ }
+
+ oarg = strtail (opt, "log-file=");
+ if (oarg) {
+ state->log_file = oarg;
+ return 0;
+ }
+
+ oarg = strtail (opt, "log-level=");
+ if (oarg) {
+ state->log_level = glusterd_check_log_level(oarg);
+ if (state->log_level == -1)
+ return -1;
+ return 0;
+ }
+
+ oarg = strtail (opt, "glusterd-sock=");
+ if (oarg) {
+ state->glusterd_sock = oarg;
+ return 0;
+ }
+
+ return -1;
+}
+
+int
+parse_cmdline (int argc, char *argv[], struct cli_state *state)
+{
+ int ret = 0;
+ int i = 0;
+ int j = 0;
+ char *opt = NULL;
+
+ state->argc=argc-1;
+ state->argv=&argv[1];
+
+ for (i = 0; i < state->argc; i++) {
+ opt = strtail (state->argv[i], "--");
+ if (opt) {
+ ret = cli_opt_parse (opt, state);
+ if (ret == -1) {
+ cli_out ("unrecognized option --%s", opt);
+ return ret;
+ }
+ for (j = i; j < state->argc - 1; j++)
+ state->argv[j] = state->argv[j + 1];
+ state->argc--;
+ /* argv shifted, next check should be at i again */
+ i--;
+ if (ret == 1) {
+ /* end of cli options */
+ ret = 0;
+ break;
+ }
+ }
+ }
+
+ state->argv[state->argc] = NULL;
+
+ return ret;
+}
+
+
+int
+cli_cmd_tree_init (struct cli_cmd_tree *tree)
+{
+ struct cli_cmd_word *root = NULL;
+ int ret = 0;
+
+ root = &tree->root;
+ root->tree = tree;
+
+ return ret;
+}
+
+
+int
+cli_state_init (struct cli_state *state)
+{
+ struct cli_cmd_tree *tree = NULL;
+ int ret = 0;
+
+
+ state->log_level = -1;
+
+ tree = &state->tree;
+ tree->state = state;
+
+ ret = cli_cmd_tree_init (tree);
+
+ return ret;
+}
+
+int
+cli_usage_out (const char *usage)
+{
+ GF_ASSERT (usage);
+ GF_ASSERT (usage[0] != '\0');
+
+ if (!usage || usage[0] == '\0')
+ return -1;
+
+ cli_err ("Usage: %s", usage);
+ return 0;
+}
+
+int
+_cli_err (const char *fmt, ...)
+{
+ struct cli_state *state = NULL;
+ va_list ap;
+ int ret = 0;
+
+ state = global_state;
+
+ va_start (ap, fmt);
+
+#ifdef HAVE_READLINE
+ if (state->rl_enabled && !state->rl_processing)
+ return cli_rl_err(state, fmt, ap);
+#endif
+
+ ret = vfprintf (stderr, fmt, ap);
+ fprintf (stderr, "\n");
+ va_end (ap);
+
+ return ret;
+}
+
+
+int
+_cli_out (const char *fmt, ...)
+{
+ struct cli_state *state = NULL;
+ va_list ap;
+ int ret = 0;
+
+ state = global_state;
+
+ va_start (ap, fmt);
+
+#ifdef HAVE_READLINE
+ if (state->rl_enabled && !state->rl_processing)
+ return cli_rl_out(state, fmt, ap);
+#endif
+
+ ret = vprintf (fmt, ap);
+ printf ("\n");
+ va_end (ap);
+
+ return ret;
+}
+
+struct rpc_clnt *
+cli_rpc_init (struct cli_state *state)
+{
+ struct rpc_clnt *rpc = NULL;
+ dict_t *options = NULL;
+ int ret = -1;
+ int port = CLI_GLUSTERD_PORT;
+ xlator_t *this = NULL;
+
+
+ this = THIS;
+ cli_rpc_prog = &cli_prog;
+ options = dict_new ();
+ if (!options)
+ goto out;
+
+ /* Connect using to glusterd using the specified method, giving
+ * preference to unix socket connection. If nothing is specified connect
+ * to the default glusterd socket
+ */
+ if (state->glusterd_sock) {
+ gf_log ("cli", GF_LOG_INFO, "Connecting to glusterd using "
+ "sockfile %s", state->glusterd_sock);
+ ret = rpc_transport_unix_options_build (&options,
+ state->glusterd_sock,
+ 0);
+ if (ret)
+ goto out;
+ } else if (state->remote_host) {
+ gf_log ("cli", GF_LOG_INFO, "Connecting to remote glusterd at "
+ "%s", state->remote_host);
+ ret = dict_set_str (options, "remote-host", state->remote_host);
+ if (ret)
+ goto out;
+
+ if (state->remote_port)
+ port = state->remote_port;
+
+ ret = dict_set_int32 (options, "remote-port", port);
+ if (ret)
+ goto out;
+
+ ret = dict_set_str (options, "transport.address-family",
+ "inet");
+ if (ret)
+ goto out;
+ } else {
+ gf_log ("cli", GF_LOG_DEBUG, "Connecting to glusterd using "
+ "default socket");
+ ret = rpc_transport_unix_options_build
+ (&options, DEFAULT_GLUSTERD_SOCKFILE, 0);
+ if (ret)
+ goto out;
+ }
+
+ rpc = rpc_clnt_new (options, this->ctx, this->name, 16);
+ if (!rpc)
+ goto out;
+
+ ret = rpc_clnt_register_notify (rpc, cli_rpc_notify, this);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "failed to register notify");
+ goto out;
+ }
+
+ ret = rpc_clnt_start (rpc);
+out:
+ if (ret) {
+ if (rpc)
+ rpc_clnt_unref (rpc);
+ rpc = NULL;
+ }
+ return rpc;
+}
+
+cli_local_t *
+cli_local_get ()
+{
+ cli_local_t *local = NULL;
+
+ local = GF_CALLOC (1, sizeof (*local), cli_mt_cli_local_t);
+
+ return local;
+}
+
+void
+cli_local_wipe (cli_local_t *local)
+{
+ if (local) {
+ GF_FREE (local->get_vol.volname);
+ if (local->dict)
+ dict_unref (local->dict);
+ GF_FREE (local);
+ }
+
+ return;
+}
+
+struct cli_state *global_state;
+
+int
+main (int argc, char *argv[])
+{
+ struct cli_state state = {0, };
+ int ret = -1;
+ glusterfs_ctx_t *ctx = NULL;
+
+ ctx = glusterfs_ctx_new ();
+ if (!ctx)
+ return ENOMEM;
+
+#ifdef DEBUG
+ gf_mem_acct_enable_set (ctx);
+#endif
+
+ ret = glusterfs_globals_init (ctx);
+ if (ret)
+ return ret;
+
+ THIS->ctx = ctx;
+
+ ret = glusterfs_ctx_defaults_init (ctx);
+ if (ret)
+ goto out;
+
+ ret = cli_state_init (&state);
+ if (ret)
+ goto out;
+
+ state.ctx = ctx;
+ global_state = &state;
+
+ ret = parse_cmdline (argc, argv, &state);
+ if (ret)
+ goto out;
+
+ ret = logging_init (ctx, &state);
+ if (ret)
+ goto out;
+
+ global_rpc = cli_rpc_init (&state);
+ if (!global_rpc)
+ goto out;
+
+ ret = cli_cmds_register (&state);
+ if (ret)
+ goto out;
+
+ ret = cli_cmd_cond_init ();
+ if (ret)
+ goto out;
+
+ ret = cli_input_init (&state);
+ if (ret)
+ goto out;
+
+ ret = event_dispatch (ctx->event_pool);
+
+out:
+// glusterfs_ctx_destroy (ctx);
+
+ return ret;
+}
+
+void
+cli_print_line (int len)
+{
+ GF_ASSERT (len > 0);
+
+ while (len--)
+ printf ("-");
+
+ printf ("\n");
+}
diff --git a/cli/src/cli.h b/cli/src/cli.h
new file mode 100644
index 000000000..8daa4b741
--- /dev/null
+++ b/cli/src/cli.h
@@ -0,0 +1,400 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef __CLI_H__
+#define __CLI_H__
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "rpc-clnt.h"
+#include "glusterfs.h"
+#include "protocol-common.h"
+#include "logging.h"
+
+#include "cli1-xdr.h"
+
+#if (HAVE_LIB_XML)
+#include <libxml/encoding.h>
+#include <libxml/xmlwriter.h>
+#endif
+
+#define DEFAULT_EVENT_POOL_SIZE 16384
+#define CLI_GLUSTERD_PORT 24007
+#define CLI_DEFAULT_CONN_TIMEOUT 120
+#define CLI_DEFAULT_CMD_TIMEOUT 120
+#define CLI_TEN_MINUTES_TIMEOUT 600 //Longer timeout for volume top
+#define DEFAULT_CLI_LOG_FILE_DIRECTORY DATADIR "/log/glusterfs"
+#define CLI_VOL_STATUS_BRICK_LEN 55
+#define CLI_TAB_LENGTH 8
+#define CLI_BRICK_STATUS_LINE_LEN 78
+
+enum argp_option_keys {
+ ARGP_DEBUG_KEY = 133,
+ ARGP_PORT_KEY = 'p',
+};
+
+#define GLUSTER_MODE_SCRIPT (1 << 0)
+#define GLUSTER_MODE_ERR_FATAL (1 << 1)
+#define GLUSTER_MODE_XML (1 << 2)
+struct cli_state;
+struct cli_cmd_word;
+struct cli_cmd_tree;
+struct cli_cmd;
+
+extern char *cli_vol_type_str[];
+extern char *cli_vol_status_str[];
+extern char *cli_vol_task_status_str[];
+
+typedef int (cli_cmd_cbk_t)(struct cli_state *state,
+ struct cli_cmd_word *word,
+ const char **words,
+ int wordcount);
+typedef void (cli_cmd_reg_cbk_t)( struct cli_cmd *this);
+
+typedef int (cli_cmd_match_t)(struct cli_cmd_word *word);
+typedef int (cli_cmd_filler_t)(struct cli_cmd_word *word);
+
+struct cli_cmd_word {
+ struct cli_cmd_tree *tree;
+ const char *word;
+ cli_cmd_filler_t *filler;
+ cli_cmd_match_t *match;
+ cli_cmd_cbk_t *cbkfn;
+ const char *desc;
+ const char *pattern;
+ int nextwords_cnt;
+ struct cli_cmd_word **nextwords;
+};
+
+
+struct cli_cmd_tree {
+ struct cli_state *state;
+ struct cli_cmd_word root;
+};
+
+
+struct cli_state {
+ int argc;
+ char **argv;
+
+ char debug;
+
+ /* for events dispatching */
+ glusterfs_ctx_t *ctx;
+
+ /* registry of known commands */
+ struct cli_cmd_tree tree;
+
+ /* the thread which "executes" the command in non-interactive mode */
+ /* also the thread which reads from stdin in non-readline mode */
+ pthread_t input;
+
+ /* terminal I/O */
+ const char *prompt;
+ int rl_enabled;
+ int rl_async;
+ int rl_processing;
+
+ /* autocompletion state */
+ char **matches;
+ char **matchesp;
+
+ char *remote_host;
+ int remote_port;
+ int mode;
+ int await_connected;
+
+ char *log_file;
+ gf_loglevel_t log_level;
+
+ char *glusterd_sock;
+};
+
+struct cli_local {
+ struct {
+ char *volname;
+ int flags;
+ } get_vol;
+
+ dict_t *dict;
+ const char **words;
+ /* Marker for volume status all */
+ gf_boolean_t all;
+#if (HAVE_LIB_XML)
+ xmlTextWriterPtr writer;
+ xmlDocPtr doc;
+ int vol_count;
+#endif
+};
+
+struct gf_cli_gsync_detailed_status_ {
+ char *node;
+ char *master;
+ char *slave;
+ char *health;
+ char *uptime;
+ char *files_syncd;
+ char *files_pending;
+ char *bytes_pending;
+ char *deletes_pending;
+};
+
+struct cli_volume_status {
+ int port;
+ int online;
+ uint64_t block_size;
+ uint64_t total_inodes;
+ uint64_t free_inodes;
+ char *brick;
+ char *pid_str;
+ char *free;
+ char *total;
+#ifdef GF_LINUX_HOST_OS
+ char *fs_name;
+ char *mount_options;
+ char *device;
+ char *inode_size;
+#endif
+};
+
+struct snap_config_opt_vals_ {
+ char *op_name;
+ char *question;
+};
+
+typedef struct gf_cli_gsync_detailed_status_ gf_cli_gsync_status_t;
+
+typedef struct cli_volume_status cli_volume_status_t;
+
+typedef struct cli_local cli_local_t;
+
+typedef ssize_t (*cli_serialize_t) (struct iovec outmsg, void *args);
+
+extern struct cli_state *global_state; /* use only in readline callback */
+
+typedef const char *(*cli_selector_t) (void *wcon);
+
+void *cli_getunamb (const char *tok, void **choices, cli_selector_t sel);
+
+int cli_cmd_register (struct cli_cmd_tree *tree, struct cli_cmd *cmd);
+int cli_cmds_register (struct cli_state *state);
+
+int cli_input_init (struct cli_state *state);
+
+int cli_cmd_process (struct cli_state *state, int argc, char *argv[]);
+int cli_cmd_process_line (struct cli_state *state, const char *line);
+
+int cli_rl_enable (struct cli_state *state);
+int cli_rl_out (struct cli_state *state, const char *fmt, va_list ap);
+int cli_rl_err (struct cli_state *state, const char *fmt, va_list ap);
+
+int cli_usage_out (const char *usage);
+
+int _cli_out (const char *fmt, ...);
+int _cli_err (const char *fmt, ...);
+
+#define cli_out(fmt...) do { \
+ FMT_WARN (fmt); \
+ \
+ _cli_out(fmt); \
+ \
+ } while (0)
+
+#define cli_err(fmt...) do { \
+ FMT_WARN (fmt); \
+ \
+ _cli_err(fmt); \
+ \
+ } while (0)
+
+int
+cli_submit_request (void *req, call_frame_t *frame,
+ rpc_clnt_prog_t *prog,
+ int procnum, struct iobref *iobref,
+ xlator_t *this, fop_cbk_fn_t cbkfn, xdrproc_t xdrproc);
+
+int32_t
+cli_cmd_volume_create_parse (const char **words, int wordcount,
+ dict_t **options);
+
+int32_t
+cli_cmd_volume_reset_parse (const char **words, int wordcount, dict_t **opt);
+
+int32_t
+cli_cmd_gsync_set_parse (const char **words, int wordcount, dict_t **opt);
+
+int32_t
+cli_cmd_quota_parse (const char **words, int wordcount, dict_t **opt);
+
+int32_t
+cli_cmd_volume_set_parse (const char **words, int wordcount,
+ dict_t **options, char **op_errstr);
+
+int32_t
+cli_cmd_volume_add_brick_parse (const char **words, int wordcount,
+ dict_t **options);
+
+int32_t
+cli_cmd_volume_remove_brick_parse (const char **words, int wordcount,
+ dict_t **options, int *question);
+
+int32_t
+cli_cmd_volume_replace_brick_parse (const char **words, int wordcount,
+ dict_t **options);
+
+int32_t
+cli_cmd_log_rotate_parse (const char **words, int wordcount, dict_t **options);
+int32_t
+cli_cmd_log_locate_parse (const char **words, int wordcount, dict_t **options);
+int32_t
+cli_cmd_log_filename_parse (const char **words, int wordcount, dict_t **options);
+
+int32_t
+cli_cmd_volume_statedump_options_parse (const char **words, int wordcount,
+ dict_t **options);
+int32_t
+cli_cmd_volume_clrlks_opts_parse (const char **words, int wordcount,
+ dict_t **options);
+
+cli_local_t * cli_local_get ();
+
+void
+cli_local_wipe (cli_local_t *local);
+
+int32_t
+cli_cmd_await_connected ();
+
+int32_t
+cli_cmd_broadcast_connected ();
+
+int
+cli_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
+ void *data);
+
+int32_t
+cli_cmd_volume_profile_parse (const char **words, int wordcount,
+ dict_t **options);
+int32_t
+cli_cmd_volume_top_parse (const char **words, int wordcount,
+ dict_t **options);
+
+int32_t
+cli_cmd_log_level_parse (const char **words, int wordcount,
+ dict_t **options);
+
+int32_t
+cli_cmd_volume_status_parse (const char **words, int wordcount,
+ dict_t **options);
+
+int
+cli_cmd_volume_heal_options_parse (const char **words, int wordcount,
+ dict_t **options);
+
+int
+cli_cmd_volume_defrag_parse (const char **words, int wordcount,
+ dict_t **options);
+
+int
+cli_print_brick_status (cli_volume_status_t *status);
+
+void
+cli_print_detailed_status (cli_volume_status_t *status);
+
+int
+cli_get_detail_status (dict_t *dict, int i, cli_volume_status_t *status);
+
+void
+cli_print_line (int len);
+
+int
+cli_xml_output_str (char *op, char *str, int op_ret, int op_errno,
+ char *op_errstr);
+
+int
+cli_xml_output_dict (char *op, dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr);
+
+int
+cli_xml_output_vol_top (dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr);
+
+int
+cli_xml_output_vol_profile (dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr);
+
+int
+cli_xml_output_vol_status_begin (cli_local_t *local, int op_ret, int op_errno,
+ char *op_errstr);
+
+int
+cli_xml_output_vol_status_end (cli_local_t *local);
+
+int
+cli_xml_output_vol_status (cli_local_t *local, dict_t *dict);
+
+int
+cli_xml_output_vol_list (dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr);
+
+int
+cli_xml_output_vol_info_begin (cli_local_t *local, int op_ret, int op_errno,
+ char *op_errstr);
+
+int
+cli_xml_output_vol_info_end (cli_local_t *local);
+
+int
+cli_xml_output_vol_info (cli_local_t *local, dict_t *dict);
+
+int
+cli_xml_output_vol_quota_limit_list (char *volname, char *limit_list,
+ int op_ret, int op_errno,
+ char *op_errstr);
+
+int
+cli_xml_output_peer_status (dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr);
+
+int
+cli_xml_output_vol_rebalance (gf_cli_defrag_type op, dict_t *dict, int op_ret,
+ int op_errno, char *op_errstr);
+
+int
+cli_xml_output_vol_remove_brick (gf_boolean_t status_op, dict_t *dict,
+ int op_ret, int op_errno, char *op_errstr);
+
+int
+cli_xml_output_vol_replace_brick (gf1_cli_replace_op op, dict_t *dict,
+ int op_ret, int op_errno, char *op_errstr);
+
+int
+cli_xml_output_vol_create (dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr);
+
+int
+cli_xml_output_generic_volume (char *op, dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr);
+
+int
+cli_xml_output_vol_gsync (dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr);
+int
+cli_xml_output_vol_status_tasks_detail (cli_local_t *local, dict_t *dict);
+
+char *
+is_server_debug_xlator (void *myframe);
+
+int32_t
+cli_cmd_snapshot_parse (const char **words, int wordcount, dict_t **options,
+ struct cli_state *state);
+
+#endif /* __CLI_H__ */
diff --git a/cli/src/input.c b/cli/src/input.c
new file mode 100644
index 000000000..a8ea46c6d
--- /dev/null
+++ b/cli/src/input.c
@@ -0,0 +1,97 @@
+/*
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <pthread.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "cli.h"
+#include "cli-mem-types.h"
+
+#define CMDBUFSIZ 1024
+
+void *
+cli_batch (void *d)
+{
+ struct cli_state *state = NULL;
+ int ret = 0;
+
+ state = d;
+
+ ret = cli_cmd_process (state, state->argc, state->argv);
+
+ gf_log ("", GF_LOG_INFO, "Exiting with: %d", ret);
+ exit (-ret);
+
+ return NULL;
+}
+
+
+void *
+cli_input (void *d)
+{
+ struct cli_state *state = NULL;
+ int ret = 0;
+ char cmdbuf[CMDBUFSIZ];
+ char *cmd = NULL;
+ size_t len = 0;
+
+ state = d;
+
+ for (;;) {
+ printf ("%s", state->prompt);
+
+ cmd = fgets (cmdbuf, CMDBUFSIZ, stdin);
+ if (!cmd)
+ break;
+ len = strlen(cmd);
+ if (len > 0 && cmd[len - 1] == '\n') //strip trailing \n
+ cmd[len - 1] = '\0';
+ ret = cli_cmd_process_line (state, cmd);
+ if (ret != 0 && state->mode & GLUSTER_MODE_ERR_FATAL)
+ break;
+ }
+
+ exit (-ret);
+
+ return NULL;
+}
+
+
+int
+cli_input_init (struct cli_state *state)
+{
+ int ret = 0;
+
+ if (state->argc) {
+ ret = pthread_create (&state->input, NULL, cli_batch, state);
+ return ret;
+ }
+
+ if (isatty (STDIN_FILENO)) {
+ state->prompt = "gluster> ";
+
+ cli_rl_enable (state);
+ } else {
+ state->prompt = "";
+ state->mode = GLUSTER_MODE_SCRIPT | GLUSTER_MODE_ERR_FATAL;
+ }
+
+ if (!state->rl_enabled)
+ ret = pthread_create (&state->input, NULL, cli_input, state);
+
+ return ret;
+}
diff --git a/cli/src/registry.c b/cli/src/registry.c
new file mode 100644
index 000000000..c4abe3be0
--- /dev/null
+++ b/cli/src/registry.c
@@ -0,0 +1,411 @@
+/*
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include "cli.h"
+#include "cli-cmd.h"
+
+
+static int
+__is_spc (int ch)
+{
+ if (ch == ' ')
+ return 1;
+ return 0;
+}
+
+
+static int
+__is_div (int ch)
+{
+ switch (ch) {
+ case '(':
+ case ')':
+ case '<':
+ case '>':
+ case '[':
+ case ']':
+ case '{':
+ case '}':
+ case '|':
+ return 1;
+ }
+
+ return 0;
+}
+
+
+static int
+__is_word (const char *word)
+{
+ return (!__is_div (*word) && !__is_spc (*word));
+}
+
+
+int
+counter_char (int ch)
+{
+ switch (ch) {
+ case '(':
+ return ')';
+ case '<':
+ return '>';
+ case '[':
+ return ']';
+ case '{':
+ return '}';
+ }
+
+ return -1;
+}
+
+
+const char *
+__is_template_balanced (const char *template)
+{
+ const char *trav = NULL;
+ int ch = 0;
+
+ trav = template;
+
+ while (*trav) {
+ ch = *trav;
+
+ switch (ch) {
+ case '<':
+ case '(':
+ case '[':
+ trav = __is_template_balanced (trav+1);
+ if (!trav)
+ return NULL;
+ if (*trav != counter_char (ch))
+ return NULL;
+ break;
+ case '>':
+ case ')':
+ case ']':
+ return trav;
+ }
+
+ trav++;
+ }
+
+ return trav;
+}
+
+
+int
+is_template_balanced (const char *template)
+{
+ const char *trav = NULL;
+
+ trav = __is_template_balanced (template);
+ if (!trav || *trav)
+ return -1;
+
+ return 0;
+}
+
+
+int
+cli_cmd_token_count (const char *template)
+{
+ int count = 0;
+ const char *trav = NULL;
+ int is_alnum = 0;
+
+ for (trav = template; *trav; trav++) {
+ switch (*trav) {
+ case '<':
+ case '>':
+ case '(':
+ case ')':
+ case '[':
+ case ']':
+ case '{':
+ case '}':
+ case '|':
+ count++;
+ /* fall through */
+ case ' ':
+ is_alnum = 0;
+ break;
+ default:
+ if (!is_alnum) {
+ is_alnum = 1;
+ count++;
+ }
+ }
+ }
+
+ return count + 1;
+}
+
+
+void
+cli_cmd_tokens_destroy (char **tokens)
+{
+ char **tokenp = NULL;
+
+ if (!tokens)
+ return;
+
+ tokenp = tokens;
+ while (*tokenp) {
+ free (*tokenp);
+ tokenp++;
+ }
+
+ free (tokens);
+}
+
+
+int
+cli_cmd_tokens_fill (char **tokens, const char *template)
+{
+ const char *trav = NULL;
+ char **tokenp = NULL;
+ char *token = NULL;
+ int ret = 0;
+ int ch = 0;
+
+ tokenp = tokens;
+
+ for (trav = template; *trav; trav++) {
+ ch = *trav;
+
+ if (__is_spc (ch))
+ continue;
+
+ if (__is_div (ch)) {
+ token = calloc (2, 1);
+ if (!token)
+ return -1;
+ token[0] = ch;
+
+ *tokenp = token;
+ tokenp++;
+
+ continue;
+ }
+
+ token = strdup (trav);
+ *tokenp = token;
+ tokenp++;
+
+ for (token++; *token; token++) {
+ if (__is_spc (*token) || __is_div (*token)) {
+ *token = 0;
+ break;
+ }
+ trav++;
+ }
+ }
+
+ return ret;
+}
+
+
+char **
+cli_cmd_tokenize (const char *template)
+{
+ char **tokens = NULL;
+ int ret = 0;
+ int count = 0;
+
+ ret = is_template_balanced (template);
+ if (ret)
+ return NULL;
+
+ count = cli_cmd_token_count (template);
+ if (count <= 0)
+ return NULL;
+
+ tokens = calloc (count + 1, sizeof (char *));
+ if (!tokens)
+ return NULL;
+
+ ret = cli_cmd_tokens_fill (tokens, template);
+ if (ret)
+ goto err;
+
+ return tokens;
+err:
+ cli_cmd_tokens_destroy (tokens);
+ return NULL;
+}
+
+void *
+cli_getunamb (const char *tok, void **choices, cli_selector_t sel)
+{
+ void **wcon = NULL;
+ char *w = NULL;
+ unsigned mn = 0;
+ void *ret = NULL;
+
+ if (!choices || !tok || !*tok)
+ return NULL;
+
+ for (wcon = choices; *wcon; wcon++) {
+ w = strtail ((char *)sel (*wcon), tok);
+ if (!w)
+ /* no match */
+ continue;
+ if (!*w)
+ /* exact match */
+ return *wcon;
+
+ ret = *wcon;
+ mn++;
+ }
+
+#ifdef FORCE_MATCH_EXACT
+ return NULL;
+#else
+ return (mn == 1) ? ret : NULL;
+#endif
+}
+
+static const char *
+sel_cmd_word (void *wcon)
+{
+ return ((struct cli_cmd_word *)wcon)->word;
+}
+
+struct cli_cmd_word *
+cli_cmd_nextword (struct cli_cmd_word *word, const char *token)
+{
+ return (struct cli_cmd_word *)cli_getunamb (token,
+ (void **)word->nextwords,
+ sel_cmd_word);
+}
+
+
+struct cli_cmd_word *
+cli_cmd_newword (struct cli_cmd_word *word, const char *token)
+{
+ struct cli_cmd_word **nextwords = NULL;
+ struct cli_cmd_word *nextword = NULL;
+
+ nextwords = realloc (word->nextwords,
+ (word->nextwords_cnt + 2) * sizeof (*nextwords));
+ if (!nextwords)
+ return NULL;
+
+ word->nextwords = nextwords;
+
+ nextword = calloc (1, sizeof (*nextword));
+ if (!nextword)
+ return NULL;
+
+ nextword->word = strdup (token);
+ if (!nextword->word) {
+ free (nextword);
+ return NULL;
+ }
+
+ nextword->tree = word->tree;
+ nextwords[word->nextwords_cnt++] = nextword;
+ nextwords[word->nextwords_cnt] = NULL;
+
+ return nextword;
+}
+
+
+int
+cli_cmd_ingest (struct cli_cmd_tree *tree, char **tokens, cli_cmd_cbk_t *cbkfn,
+ const char *desc, const char *pattern)
+{
+ int ret = 0;
+ char **tokenp = NULL;
+ char *token = NULL;
+ struct cli_cmd_word *word = NULL;
+ struct cli_cmd_word *next = NULL;
+
+ word = &tree->root;
+
+ for (tokenp = tokens; (token = *tokenp); tokenp++) {
+ if (!__is_word (token))
+ break;
+
+ next = cli_cmd_nextword (word, token);
+ if (!next)
+ next = cli_cmd_newword (word, token);
+
+ word = next;
+ if (!word)
+ break;
+ }
+
+ if (!word)
+ return -1;
+
+ if (word->cbkfn) {
+ /* warning - command already registered */
+ }
+
+ word->cbkfn = cbkfn;
+ word->desc = desc;
+ word->pattern = pattern;
+
+ /* end of static strings in command template */
+
+ /* TODO: autocompletion beyond this point is just "nice to have" */
+
+ return ret;
+}
+
+
+int
+cli_cmd_register (struct cli_cmd_tree *tree, struct cli_cmd *cmd)
+{
+ char **tokens = NULL;
+ int ret = 0;
+
+ GF_ASSERT (cmd);
+
+ if (cmd->reg_cbk)
+ cmd->reg_cbk (cmd);
+
+ if (cmd->disable) {
+ ret = 0;
+ goto out;
+ }
+
+ tokens = cli_cmd_tokenize (cmd->pattern);
+ if (!tokens) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = cli_cmd_ingest (tree, tokens, cmd->cbk, cmd->desc, cmd->pattern);
+ if (ret) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ if (tokens)
+ cli_cmd_tokens_destroy (tokens);
+
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
diff --git a/commit.sh b/commit.sh
deleted file mode 100755
index 9cc55c52e..000000000
--- a/commit.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/sh
-
-export EDITOR="emacs"
-git commit -a -e "$@"
diff --git a/configure.ac b/configure.ac
index 6fbaab2c8..b3d1ed184 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,152 +1,245 @@
-dnl Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
-dnl This file is part of GlusterFS.
+dnl Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+dnl This file is part of GlusterFS.
dnl
-dnl GlusterFS is free software; you can redistribute it and/or modify
-dnl it under the terms of the GNU General Public License as published by
-dnl the Free Software Foundation; either version 3 of the License, or
-dnl (at your option) any later version.
-dnl
-dnl GlusterFS is distributed in the hope that it will be useful,
-dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-dnl GNU General Public License for more details.
-dnl
-dnl You should have received a copy of the GNU General Public License
-dnl along with this program. If not, see <http://www.gnu.org/licenses/>.
+dnl This file is licensed to you under your choice of the GNU Lesser
+dnl General Public License, version 3 or any later version (LGPLv3 or
+dnl later), or the GNU General Public License, version 2 (GPLv2), in all
+dnl cases as published by the Free Software Foundation.
-AC_INIT([glusterfs],[2.1.0git],[gluster-users@gluster.org])
+AC_INIT([glusterfs],[3git],[gluster-users@gluster.org],,[https://github.com/gluster/glusterfs.git])
AM_INIT_AUTOMAKE
-AM_CONFIG_HEADER([config.h])
+m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES(yes)])
+
+if make --help 2>&1 | grep -q no-print-directory; then
+ AM_MAKEFLAGS="$AM_MAKEFLAGS --no-print-directory";
+fi
+
+if make --help 2>&1 | grep -q quiet; then
+ AM_MAKEFLAGS="$AM_MAKEFLAGS --quiet"
+fi
+
+if libtool --help 2>&1 | grep -q quiet; then
+ AM_LIBTOOLFLAGS="--quiet";
+fi
+
+AC_CONFIG_HEADERS([config.h])
AC_CONFIG_FILES([Makefile
- libglusterfs/Makefile
- libglusterfs/src/Makefile
- libglusterfsclient/Makefile
- libglusterfsclient/src/Makefile
- glusterfsd/Makefile
- glusterfsd/src/Makefile
- booster/Makefile
- booster/src/Makefile
- xlators/Makefile
- xlators/mount/Makefile
- xlators/mount/fuse/Makefile
- xlators/mount/fuse/src/Makefile
- xlators/mount/fuse/utils/mount.glusterfs
- xlators/mount/fuse/utils/mount_glusterfs
- xlators/mount/fuse/utils/Makefile
- xlators/storage/Makefile
- xlators/storage/posix/Makefile
- xlators/storage/posix/src/Makefile
- xlators/storage/bdb/Makefile
- xlators/storage/bdb/src/Makefile
- xlators/cluster/Makefile
- xlators/cluster/unify/Makefile
- xlators/cluster/unify/src/Makefile
- xlators/cluster/afr/Makefile
- xlators/cluster/afr/src/Makefile
- xlators/cluster/stripe/Makefile
- xlators/cluster/stripe/src/Makefile
- xlators/cluster/dht/Makefile
- xlators/cluster/dht/src/Makefile
- xlators/cluster/ha/Makefile
- xlators/cluster/ha/src/Makefile
- xlators/cluster/map/Makefile
- xlators/cluster/map/src/Makefile
- xlators/performance/Makefile
- xlators/performance/write-behind/Makefile
- xlators/performance/write-behind/src/Makefile
- xlators/performance/read-ahead/Makefile
- xlators/performance/read-ahead/src/Makefile
- xlators/performance/io-threads/Makefile
- xlators/performance/io-threads/src/Makefile
- xlators/performance/io-cache/Makefile
- xlators/performance/io-cache/src/Makefile
- xlators/performance/symlink-cache/Makefile
- xlators/performance/symlink-cache/src/Makefile
- xlators/performance/quick-read/Makefile
- xlators/performance/quick-read/src/Makefile
- xlators/performance/stat-prefetch/Makefile
- xlators/performance/stat-prefetch/src/Makefile
- xlators/debug/Makefile
- xlators/debug/trace/Makefile
- xlators/debug/trace/src/Makefile
- xlators/debug/error-gen/Makefile
- xlators/debug/error-gen/src/Makefile
- xlators/debug/io-stats/Makefile
- xlators/debug/io-stats/src/Makefile
- xlators/protocol/Makefile
- xlators/protocol/client/Makefile
- xlators/protocol/client/src/Makefile
- xlators/protocol/server/Makefile
- xlators/protocol/server/src/Makefile
- xlators/features/Makefile
- xlators/features/locks/Makefile
- xlators/features/locks/src/Makefile
- xlators/features/path-convertor/Makefile
- xlators/features/path-convertor/src/Makefile
- xlators/features/trash/Makefile
- xlators/features/trash/src/Makefile
- xlators/features/filter/Makefile
- xlators/features/filter/src/Makefile
- xlators/features/quota/Makefile
- xlators/features/quota/src/Makefile
- xlators/encryption/Makefile
- xlators/encryption/rot-13/Makefile
- xlators/encryption/rot-13/src/Makefile
- scheduler/Makefile
- scheduler/alu/Makefile
- scheduler/alu/src/Makefile
- scheduler/random/Makefile
- scheduler/random/src/Makefile
- scheduler/nufa/Makefile
- scheduler/nufa/src/Makefile
- scheduler/rr/Makefile
- scheduler/rr/src/Makefile
- scheduler/switch/Makefile
- scheduler/switch/src/Makefile
- transport/Makefile
- transport/socket/Makefile
- transport/socket/src/Makefile
- transport/ib-verbs/Makefile
- transport/ib-verbs/src/Makefile
- auth/Makefile
- auth/addr/Makefile
- auth/addr/src/Makefile
- auth/login/Makefile
- auth/login/src/Makefile
- doc/Makefile
- doc/examples/Makefile
- doc/hacker-guide/Makefile
- extras/Makefile
- extras/init.d/Makefile
- extras/init.d/glusterfs-server.plist
- extras/init.d/glusterfsd-Debian
- extras/init.d/glusterfsd-Redhat
- extras/init.d/glusterfsd-SuSE
- extras/benchmarking/Makefile
- contrib/Makefile
- contrib/fuse-util/Makefile
- glusterfs.spec])
+ libglusterfs/Makefile
+ libglusterfs/src/Makefile
+ geo-replication/src/peer_gsec_create
+ geo-replication/src/peer_add_secret_pub
+ glusterfsd/Makefile
+ glusterfsd/src/Makefile
+ rpc/Makefile
+ rpc/rpc-lib/Makefile
+ rpc/rpc-lib/src/Makefile
+ rpc/rpc-transport/Makefile
+ rpc/rpc-transport/socket/Makefile
+ rpc/rpc-transport/socket/src/Makefile
+ rpc/rpc-transport/rdma/Makefile
+ rpc/rpc-transport/rdma/src/Makefile
+ rpc/xdr/Makefile
+ rpc/xdr/src/Makefile
+ xlators/Makefile
+ xlators/mount/Makefile
+ xlators/mount/fuse/Makefile
+ xlators/mount/fuse/src/Makefile
+ xlators/mount/fuse/utils/mount.glusterfs
+ xlators/mount/fuse/utils/mount_glusterfs
+ xlators/mount/fuse/utils/Makefile
+ xlators/storage/Makefile
+ xlators/storage/posix/Makefile
+ xlators/storage/posix/src/Makefile
+ xlators/storage/bd/Makefile
+ xlators/storage/bd/src/Makefile
+ xlators/cluster/Makefile
+ xlators/cluster/afr/Makefile
+ xlators/cluster/afr/src/Makefile
+ xlators/cluster/stripe/Makefile
+ xlators/cluster/stripe/src/Makefile
+ xlators/cluster/dht/Makefile
+ xlators/cluster/dht/src/Makefile
+ xlators/performance/Makefile
+ xlators/performance/write-behind/Makefile
+ xlators/performance/write-behind/src/Makefile
+ xlators/performance/read-ahead/Makefile
+ xlators/performance/read-ahead/src/Makefile
+ xlators/performance/readdir-ahead/Makefile
+ xlators/performance/readdir-ahead/src/Makefile
+ xlators/performance/io-threads/Makefile
+ xlators/performance/io-threads/src/Makefile
+ xlators/performance/io-cache/Makefile
+ xlators/performance/io-cache/src/Makefile
+ xlators/performance/symlink-cache/Makefile
+ xlators/performance/symlink-cache/src/Makefile
+ xlators/performance/quick-read/Makefile
+ xlators/performance/quick-read/src/Makefile
+ xlators/performance/open-behind/Makefile
+ xlators/performance/open-behind/src/Makefile
+ xlators/performance/md-cache/Makefile
+ xlators/performance/md-cache/src/Makefile
+ xlators/debug/Makefile
+ xlators/debug/trace/Makefile
+ xlators/debug/trace/src/Makefile
+ xlators/debug/error-gen/Makefile
+ xlators/debug/error-gen/src/Makefile
+ xlators/debug/io-stats/Makefile
+ xlators/debug/io-stats/src/Makefile
+ xlators/protocol/Makefile
+ xlators/protocol/auth/Makefile
+ xlators/protocol/auth/addr/Makefile
+ xlators/protocol/auth/addr/src/Makefile
+ xlators/protocol/auth/login/Makefile
+ xlators/protocol/auth/login/src/Makefile
+ xlators/protocol/client/Makefile
+ xlators/protocol/client/src/Makefile
+ xlators/protocol/server/Makefile
+ xlators/protocol/server/src/Makefile
+ xlators/features/Makefile
+ xlators/features/changelog/Makefile
+ xlators/features/changelog/src/Makefile
+ xlators/features/changelog/lib/Makefile
+ xlators/features/changelog/lib/src/Makefile
+ xlators/features/glupy/Makefile
+ xlators/features/glupy/src/Makefile
+ xlators/features/locks/Makefile
+ xlators/features/locks/src/Makefile
+ xlators/features/quota/Makefile
+ xlators/features/quota/src/Makefile
+ xlators/features/marker/Makefile
+ xlators/features/marker/src/Makefile
+ xlators/features/read-only/Makefile
+ xlators/features/read-only/src/Makefile
+ xlators/features/compress/Makefile
+ xlators/features/compress/src/Makefile
+ xlators/features/mac-compat/Makefile
+ xlators/features/mac-compat/src/Makefile
+ xlators/features/quiesce/Makefile
+ xlators/features/quiesce/src/Makefile
+ xlators/features/index/Makefile
+ xlators/features/index/src/Makefile
+ xlators/features/protect/Makefile
+ xlators/features/protect/src/Makefile
+ xlators/features/gfid-access/Makefile
+ xlators/features/gfid-access/src/Makefile
+ xlators/playground/Makefile
+ xlators/playground/template/Makefile
+ xlators/playground/template/src/Makefile
+ xlators/encryption/Makefile
+ xlators/encryption/rot-13/Makefile
+ xlators/encryption/rot-13/src/Makefile
+ xlators/encryption/crypt/Makefile
+ xlators/encryption/crypt/src/Makefile
+ xlators/features/qemu-block/Makefile
+ xlators/features/qemu-block/src/Makefile
+ xlators/system/Makefile
+ xlators/system/posix-acl/Makefile
+ xlators/system/posix-acl/src/Makefile
+ xlators/nfs/Makefile
+ xlators/nfs/server/Makefile
+ xlators/nfs/server/src/Makefile
+ xlators/mgmt/Makefile
+ xlators/mgmt/glusterd/Makefile
+ xlators/mgmt/glusterd/src/Makefile
+ cli/Makefile
+ cli/src/Makefile
+ doc/Makefile
+ extras/Makefile
+ extras/init.d/Makefile
+ extras/init.d/glusterd.plist
+ extras/init.d/glusterd-Debian
+ extras/init.d/glusterd-Redhat
+ extras/init.d/glusterd-SuSE
+ extras/systemd/Makefile
+ extras/systemd/glusterd.service
+ extras/benchmarking/Makefile
+ extras/hook-scripts/Makefile
+ extras/ocf/Makefile
+ extras/ocf/glusterd
+ extras/ocf/volume
+ extras/LinuxRPM/Makefile
+ extras/geo-rep/Makefile
+ contrib/fuse-util/Makefile
+ contrib/uuid/uuid_types.h
+ glusterfs-api.pc
+ libgfchangelog.pc
+ api/Makefile
+ api/src/Makefile
+ api/examples/Makefile
+ api/examples/setup.py
+ geo-replication/Makefile
+ geo-replication/src/Makefile
+ geo-replication/syncdaemon/Makefile
+ glusterfs.spec])
AC_CANONICAL_HOST
AC_PROG_CC
+AC_DISABLE_STATIC
AC_PROG_LIBTOOL
+AC_ARG_WITH(pkgconfigdir,
+ [ --with-pkgconfigdir=DIR pkgconfig file in DIR @<:@LIBDIR/pkgconfig@:>@],
+ [pkgconfigdir=$withval],
+ [pkgconfigdir='${libdir}/pkgconfig'])
+AC_SUBST(pkgconfigdir)
+
AC_ARG_WITH(mountutildir,
[ --with-mountutildir=DIR mount helper utility in DIR @<:@/sbin@:>@],
[mountutildir=$withval],
[mountutildir='/sbin'])
AC_SUBST(mountutildir)
+AC_ARG_WITH(systemddir,
+ [ --with-systemddir=DIR systemd service files in DIR @<:@/usr/lib/systemd/system@:>@],
+ [systemddir=$withval],
+ [systemddir='/usr/lib/systemd/system'])
+AC_SUBST(systemddir)
+
+AC_ARG_WITH(initdir,
+ [ --with-initdir=DIR init.d scripts in DIR @<:@/etc/init.d@:>@],
+ [initdir=$withval],
+ [initdir='/etc/init.d'])
+AC_SUBST(initdir)
+
+AC_ARG_WITH(launchddir,
+ [ --with-launchddir=DIR launchd services in DIR @<:@/Library/LaunchDaemons@:>@],
+ [launchddir=$withval],
+ [launchddir='/Library/LaunchDaemons'])
+AC_SUBST(launchddir)
+
+AC_ARG_WITH([ocf],
+ [AS_HELP_STRING([--without-ocf], [build OCF-compliant cluster resource agents])],
+ ,
+ [OCF_SUBDIR='ocf'],
+ )
+AC_SUBST(OCF_SUBDIR)
+
# LEX needs a check
AC_PROG_LEX
if test "x${LEX}" != "xflex" -a "x${FLEX}" != "xlex"; then
AC_MSG_ERROR([Flex or lex required to build glusterfs.])
fi
+dnl
+dnl Word sizes...
+dnl
+AC_CHECK_SIZEOF(short)
+AC_CHECK_SIZEOF(int)
+AC_CHECK_SIZEOF(long)
+AC_CHECK_SIZEOF(long long)
+SIZEOF_SHORT=$ac_cv_sizeof_short
+SIZEOF_INT=$ac_cv_sizeof_int
+SIZEOF_LONG=$ac_cv_sizeof_long
+SIZEOF_LONG_LONG=$ac_cv_sizeof_long_long
+AC_SUBST(SIZEOF_SHORT)
+AC_SUBST(SIZEOF_INT)
+AC_SUBST(SIZEOF_LONG)
+AC_SUBST(SIZEOF_LONG_LONG)
+
# YACC needs a check
AC_PROG_YACC
if test "x${YACC}" = "xbyacc" -o "x${YACC}" = "xyacc" -o "x${YACC}" = "x"; then
@@ -155,8 +248,10 @@ fi
AC_CHECK_TOOL([LD],[ld])
+AC_CHECK_LIB([crypto], [MD5], , AC_MSG_ERROR([OpenSSL crypto library is required to build glusterfs]))
+
AC_CHECK_LIB([pthread], [pthread_mutex_init], , AC_MSG_ERROR([Posix threads library is required to build glusterfs]))
-
+
AC_CHECK_FUNC([dlopen], [has_dlopen=yes], AC_CHECK_LIB([dl], [dlopen], , AC_MSG_ERROR([Dynamic linking library required to build glusterfs])))
@@ -164,6 +259,18 @@ AC_CHECK_HEADERS([sys/xattr.h])
AC_CHECK_HEADERS([sys/extattr.h])
+AC_CHECK_HEADERS([openssl/md5.h])
+
+AC_CHECK_HEADERS([linux/falloc.h])
+
+case $host_os in
+ darwin*)
+ if ! test "`/usr/bin/sw_vers | grep ProductVersion: | cut -f 2 | cut -d. -f2`" -ge 5; then
+ AC_MSG_ERROR([You need at least OS X 10.5 (Leopard) to build Glusterfs])
+ fi
+ ;;
+esac
+
dnl Mac OS X does not have spinlocks
AC_CHECK_FUNC([pthread_spin_init], [have_spinlock=yes])
if test "x${have_spinlock}" = "xyes"; then
@@ -187,166 +294,280 @@ if test "x${have_setfsuid}" = "xyes" -a "x${have_setfsgid}" = "xyes"; then
fi
-# LIBGLUSTERFSCLIENT section
-AC_ARG_ENABLE([libglusterfsclient],
- AC_HELP_STRING([--disable-libglusterfsclient],
- [Do not build libglusterfsclient]))
+# FUSE section
+AC_ARG_ENABLE([fuse-client],
+ AC_HELP_STRING([--disable-fuse-client],
+ [Do not build the fuse client. NOTE: you cannot mount glusterfs without the client]))
-BUILD_LIBGLUSTERFSCLIENT="yes"
+BUILD_FUSE_CLIENT=no
+if test "x$enable_fuse_client" != "xno"; then
+ FUSE_CLIENT_SUBDIR=fuse
+ BUILD_FUSE_CLIENT="yes"
+fi
-if test "x$enable_libglusterfsclient" != "xno"; then
- LIBGLUSTERFSCLIENT_SUBDIR="libglusterfsclient"
- BUILD_LIBGLUSTERFSCLIENT="yes"
+AC_ARG_ENABLE([bd-xlator],
+ AC_HELP_STRING([--enable-bd-xlator], [Build BD xlator]))
+
+if test "x$enable_bd_xlator" != "xno"; then
+ AC_CHECK_LIB([lvm2app],
+ [lvm_init,lvm_lv_from_name],
+ [HAVE_BD_LIB="yes"],
+ [HAVE_BD_LIB="no"])
+
+if test "x$HAVE_BD_LIB" = "xyes"; then
+ # lvm_lv_from_name() has been made public with lvm2-2.02.79
+ AC_CHECK_DECLS(
+ [lvm_lv_from_name],
+ [NEED_LVM_LV_FROM_NAME_DECL="no"],
+ [NEED_LVM_LV_FROM_NAME_DECL="yes"],
+ [[#include <lvm2app.h>]])
+ fi
fi
-AC_SUBST(LIBGLUSTERFSCLIENT_SUBDIR)
-# end LIBGLUSTERFSCLIENT section
+if test "x$enable_bd_xlator" = "xyes" -a "x$HAVE_BD_LIB" = "xno"; then
+ echo "BD xlator requested but required lvm2 development library not found."
+ exit 1
+fi
+BUILD_BD_XLATOR=no
+if test "x${enable-bd-xlator}" != "xno" -a "x${HAVE_BD_LIB}" = "xyes"; then
+ BUILD_BD_XLATOR=yes
+ AC_DEFINE(HAVE_BD_XLATOR, 1, [define if lvm2app library found and bd xlator
+ enabled])
+ if test "x$NEED_LVM_LV_FROM_NAME_DECL" = "xyes"; then
+ AC_DEFINE(NEED_LVM_LV_FROM_NAME_DECL, 1, [defined if lvm_lv_from_name()
+ was not found in the lvm2app.h header, but can be linked])
+ fi
+fi
+AM_CONDITIONAL([ENABLE_BD_XLATOR], [test x$BUILD_BD_XLATOR = xyes])
+# start encryption/crypt section
-# FUSE section
-AC_ARG_ENABLE([fuse-client],
- AC_HELP_STRING([--disable-fuse-client],
- [Do not build the fuse client. NOTE: you cannot mount glusterfs without the client]))
+AC_CHECK_HEADERS([openssl/cmac.h], [have_cmac_h=yes], [have_cmac_h=no])
-BUILD_FUSE_CLIENT=no
-if test "x$enable_fuse_client" != "xno"; then
- FUSE_CLIENT_SUBDIR=fuse
- BUILD_FUSE_CLIENT="yes"
+AC_ARG_ENABLE([crypt-xlator],
+ AC_HELP_STRING([--enable-crypt-xlator], [Build crypt encryption xlator]))
+
+if test "x$enable_crypt_xlator" = "xyes" -a "x$have_cmac_h" = "xno"; then
+ echo "Encryption xlator requires OpenSSL with cmac.h"
+ exit 1
fi
+BUILD_CRYPT_XLATOR=no
+if test "x$enable_crypt_xlator" != "xno" -a "x$have_cmac_h" = "xyes"; then
+ BUILD_CRYPT_XLATOR=yes
+ AC_DEFINE(HAVE_CRYPT_XLATOR, 1, [enable building crypt encryption xlator])
+fi
+
+AM_CONDITIONAL([ENABLE_CRYPT_XLATOR], [test x$BUILD_CRYPT_XLATOR = xyes])
+
AC_SUBST(FUSE_CLIENT_SUBDIR)
# end FUSE section
# FUSERMOUNT section
AC_ARG_ENABLE([fusermount],
- AC_HELP_STRING([--enable-fusermount],
- [Build fusermount]),
- [], [enable_fusermount=no])
+ AC_HELP_STRING([--disable-fusermount],
+ [Use system's fusermount]))
-BUILD_FUSERMOUNT="no"
-
-if test "x$enable_fusermount" != "xno"; then
- FUSERMOUNT_SUBDIR="contrib"
- BUILD_FUSERMOUNT="yes"
+BUILD_FUSERMOUNT="yes"
+if test "x$enable_fusermount" = "xno"; then
+ BUILD_FUSERMOUNT="no"
+else
AC_DEFINE(GF_FUSERMOUNT, 1, [Use our own fusermount])
+ FUSERMOUNT_SUBDIR="contrib/fuse-util"
fi
AC_SUBST(FUSERMOUNT_SUBDIR)
#end FUSERMOUNT section
+# QEMU_BLOCK section
+
+AC_ARG_ENABLE([qemu-block],
+ AC_HELP_STRING([--enable-qemu-block],
+ [Build QEMU Block formats translator]))
+
+if test "x$enable_qemu_block" != "xno"; then
+ PKG_CHECK_MODULES([GLIB], [glib-2.0],
+ [HAVE_GLIB_2="yes"],
+ [HAVE_GLIB_2="no"])
+fi
+
+if test "x$enable_qemu_block" = "xyes" -a "x$HAVE_GLIB_2" = "xno"; then
+ echo "QEMU Block formats translator requires libglib-2.0, but missing."
+ exit 1
+fi
+
+BUILD_QEMU_BLOCK=no
+if test "x${enable_qemu_block}" != "xno" -a "x${HAVE_GLIB_2}" = "xyes"; then
+ BUILD_QEMU_BLOCK=yes
+ AC_DEFINE(HAVE_QEMU_BLOCK, 1, [define if libglib-2.0 library found and QEMU
+ Block translator enabled])
+fi
+
+AM_CONDITIONAL([ENABLE_QEMU_BLOCK], [test x$BUILD_QEMU_BLOCK = xyes])
+
+# end QEMU_BLOCK section
# EPOLL section
AC_ARG_ENABLE([epoll],
- AC_HELP_STRING([--disable-epoll],
- [Use poll instead of epoll.]))
+ AC_HELP_STRING([--disable-epoll],
+ [Use poll instead of epoll.]))
BUILD_EPOLL=no
if test "x$enable_epoll" != "xno"; then
AC_CHECK_HEADERS([sys/epoll.h],
[BUILD_EPOLL=yes],
- [BUILD_EPOLL=no])
+ [BUILD_EPOLL=no])
fi
# end EPOLL section
# IBVERBS section
AC_ARG_ENABLE([ibverbs],
- AC_HELP_STRING([--disable-ibverbs],
- [Do not build the ibverbs transport]))
+ AC_HELP_STRING([--disable-ibverbs],
+ [Do not build the ibverbs transport]))
if test "x$enable_ibverbs" != "xno"; then
AC_CHECK_LIB([ibverbs],
[ibv_get_device_list],
- [HAVE_LIBIBVERBS="yes"],
- [HAVE_LIBIBVERBS="no"])
+ [HAVE_LIBIBVERBS="yes"],
+ [HAVE_LIBIBVERBS="no"])
+ AC_CHECK_LIB([rdmacm], [rdma_create_id], [HAVE_RDMACM="yes"], [HAVE_RDMACM="no"])
fi
-if test "x$enable_ibverbs" = "xyes" -a "x$HAVE_LIBIBVERBS" = "xno"; then
- echo "ibverbs requested but not found."
- exit 1
-fi
+if test "x$enable_ibverbs" = "xyes"; then
+ if test "x$HAVE_LIBIBVERBS" = "xno"; then
+ echo "ibverbs-transport requested, but libibverbs is not present."
+ exit 1
+ fi
+ if test "x$HAVE_RDMACM" = "xno"; then
+ echo "ibverbs-transport requested, but librdmacm is not present."
+ exit 1
+ fi
+fi
+BUILD_RDMA=no
BUILD_IBVERBS=no
-if test "x$enable_ibverbs" != "xno" -a "x$HAVE_LIBIBVERBS" = "xyes"; then
+if test "x$enable_ibverbs" != "xno" -a "x$HAVE_LIBIBVERBS" = "xyes" -a "x$HAVE_RDMACM" = "xyes"; then
IBVERBS_SUBDIR=ib-verbs
BUILD_IBVERBS=yes
+ RDMA_SUBDIR=rdma
+ BUILD_RDMA=yes
fi
AC_SUBST(IBVERBS_SUBDIR)
+AC_SUBST(RDMA_SUBDIR)
# end IBVERBS section
-# Berkely-DB section
-# storage/bdb requires Berkeley-DB version 4.6.21 or higher
-_GLFS_DB_VERSION_MAJOR=4
-_GLFS_DB_VERSION_MINOR=6
-_GLFS_DB_VERSION_PATCH=21
-AC_ARG_ENABLE([db],
- AC_HELP_STRING([--disable-bdb],
- [Do not build the Berkeley-DB translator]))
-
-if test "x$enable_bdb" != "xno"; then
- AC_CHECK_HEADERS([db.h],
- [HAVE_BDB="yes"],
- [HAVE_BDB="no"])
- if test "x$HAVE_BDB" = "xyes"; then
- AC_CHECK_LIB([db],
- [db_create],
- [HAVE_BDB="yes"],
- [HAVE_BDB="no"])
- fi
-
- if test "x$HAVE_BDB" = "xyes"; then
- AC_TRY_COMPILE([#include <db.h>],
- #if (DB_VERSION_MAJOR < $_GLFS_DB_VERSION_MAJOR) ||\
- (DB_VERSION_MAJOR == $_GLFS_DB_VERSION_MAJOR && \
- DB_VERSION_MINOR < $_GLFS_DB_VERSION_MINOR) || \
- (DB_VERSION_MAJOR == $_GLFS_DB_VERSION_MAJOR && \
- DB_VERSION_MINOR == $_GLFS_DB_VERSION_MINOR && \
- DB_VERSION_PATCH < $_GLFS_DB_VERSION_PATCH)
- #error "bdb older than required"
- #endif
- ,
- [HAVE_BDB_VERSION="yes"],
- [HAVE_BDB_VERSION="no"])
-
- dnl check for DB->stat having 4 arguments.
- AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include <db.h>]],
- [[DB *bdb; bdb->stat (NULL, NULL, NULL, 0);]])],
- [HAVE_BDB_VERSION=yes], [HAVE_BDB_VERSION=no])
+# SYNCDAEMON section
+AC_ARG_ENABLE([georeplication],
+ AC_HELP_STRING([--disable-georeplication],
+ [Do not install georeplication components]))
- dnl check for DBC->c_get presence.
- AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include <db.h>]],
- [[DBC *cursor; cursor->get (NULL, NULL, NULL, 0);]])],
- [HAVE_BDB_CURSOR_GET=yes], [HAVE_BDB_CURSOR_GET=no])
-
- fi
+BUILD_SYNCDAEMON=no
+case $host_os in
+ linux*)
+#do nothing
+ ;;
+ netbsd*)
+#do nothing
+ ;;
+ *)
+#disabling geo replication for non-linux platforms
+ enable_georeplication=no
+ ;;
+esac
+SYNCDAEMON_COMPILE=0
+if test "x$enable_georeplication" != "xno"; then
+ SYNCDAEMON_SUBDIR=geo-replication
+ SYNCDAEMON_COMPILE=1
+
+ BUILD_SYNCDAEMON="yes"
+ AM_PATH_PYTHON([2.4])
+ echo -n "checking if python is python 2.x... "
+ if echo $PYTHON_VERSION | grep ^2; then
+ :
+ else
+ echo no
+ AC_MSG_ERROR([only python 2.x is supported])
+ fi
+ echo -n "checking if python has ctypes support... "
+ if "$PYTHON" -c 'import ctypes' 2>/dev/null; then
+ echo yes
+ else
+ echo no
+ AC_MSG_ERROR([python does not have ctypes support])
+ fi
fi
-
-if test "x$HAVE_BDB_CURSOR_GET" = "xyes" -a "x$HAVE_BDB_VERSION" = "xyes"; then
- AC_DEFINE(HAVE_BDB_CURSOR_GET, 1, [Berkeley-DB version has cursor->get()])
+AC_SUBST(SYNCDAEMON_COMPILE)
+AC_SUBST(SYNCDAEMON_SUBDIR)
+# end SYNCDAEMON section
+
+# CDC xlator - check if libz is present if so enable HAVE_LIB_Z
+echo -n "checking if libz is present... "
+
+PKG_CHECK_MODULES([ZLIB], [zlib >= 1.2.0],
+ [echo "yes (features requiring zlib enabled)" AC_DEFINE(HAVE_LIB_Z, 1, [define if zlib is present])],
+ [echo "no"] )
+
+AC_SUBST(LIBZ_CFLAGS)
+AC_SUBST(LIBZ_LIBS)
+# end CDC xlator secion
+
+# check for systemtap/dtrace
+BUILD_SYSTEMTAP=no
+AC_MSG_CHECKING([whether to include systemtap tracing support])
+AC_ARG_ENABLE([systemtap],
+ [AS_HELP_STRING([--enable-systemtap],
+ [Enable inclusion of systemtap trace support])],
+ [ENABLE_SYSTEMTAP="${enableval}"], [ENABLE_SYSTEMTAP="def"])
+
+AM_CONDITIONAL([ENABLE_SYSTEMTAP], [test "x${ENABLE_SYSTEMTAP}" = "xyes"])
+AC_MSG_RESULT(${ENABLE_SYSTEMTAP})
+
+if test "x${ENABLE_SYSTEMTAP}" != "xno"; then
+ AC_CHECK_PROG(DTRACE, dtrace, "yes", "no")
+ AC_CHECK_HEADER([sys/sdt.h], [SDT_H_FOUND="yes"],
+ [SDT_H_FOUND="no"])
fi
-if test "x$enable_bdb" = "xyes" -a "x$HAVE_BDB" = "xno" -a "x$HAVE_BDB_VERSION" = "xno" -a "x$HAVE_BDB_CURSOR_GET" = "xno"; then
- echo "Berkeley-DB requested but not found. glusterfs bdb feature requires db version 4.6.21 or higher"
- exit 1
+if test "x${ENABLE_SYSTEMTAP}" = "xyes"; then
+ if test "x${DTRACE}" = "xno"; then
+ AC_MSG_ERROR([dtrace not found])
+ elif test "$x{SDT_H_FOUND}" = "xno"; then
+ AC_MSG_ERROR([systemtap support needs sys/sdt.h header])
+ fi
fi
-
-BUILD_BDB=no
-if test "x$enable_bdb" != "xno" -a "x$HAVE_BDB" = "xyes"; then
- BDB_SUBDIR=bdb
- BUILD_BDB=yes
+if test "x${DTRACE}" = "xyes" -a "x${SDT_H_FOUND}" = "xyes"; then
+ AC_MSG_CHECKING([x"${DTRACE}"xy"${SDT_H_FOUND}"y])
+ AC_DEFINE([HAVE_SYSTEMTAP], [1], [Define to 1 if using probes.])
+ BUILD_SYSTEMTAP=yes
fi
-
-
-
-AC_SUBST(BDB_SUBDIR)
-# end BDB section
+# end of systemtap/dtrace
+
+# xml-output
+AC_ARG_ENABLE([xml-output],
+ AC_HELP_STRING([--disable-xml-output],
+ [Disable the xml output]))
+BUILD_XML_OUTPUT="yes"
+if test "x$enable_xml_output" != "xno"; then
+ #check if libxml is present if so enable HAVE_LIB_XML
+ m4_ifdef([AM_PATH_XML2],[AM_PATH_XML2([2.6.19])], [no_xml=yes])
+ if test "x${no_xml}" = "x"; then
+ AC_DEFINE([HAVE_LIB_XML], [1], [Define to 1 if using libxml2.])
+ else
+ AC_MSG_WARN([libxml2 devel libraries not found disabling XML support])
+ BUILD_XML_OUTPUT="no"
+ fi
+else
+ BUILD_XML_OUTPUT="no"
+fi
+# end of xml-output
dnl FreeBSD > 5 has execinfo as a Ported library for giving a workaround
dnl solution to GCC backtrace functionality
@@ -371,12 +592,26 @@ dnl Linux, Solaris, Cygwin
AC_CHECK_MEMBERS([struct stat.st_atim.tv_nsec])
dnl FreeBSD, NetBSD
AC_CHECK_MEMBERS([struct stat.st_atimespec.tv_nsec])
+case $host_os in
+ *netbsd*)
+ CFLAGS+=" -D_INCOMPLETE_XOPEN_C063"
+ ;;
+esac
+AC_CHECK_FUNC([linkat], [have_linkat=yes])
+if test "x${have_linkat}" = "xyes"; then
+ AC_DEFINE(HAVE_LINKAT, 1, [define if found linkat])
+fi
+AC_SUBST(HAVE_LINKAT)
+
+dnl check for Monotonic clock
+AC_CHECK_FUNC([clock_gettime], [has_monotonic_clock=yes], AC_CHECK_LIB([rt], [clock_gettime], , AC_MSG_WARN([System doesn't have monotonic clock using contrib])))
dnl Check for argp
AC_CHECK_HEADER([argp.h], AC_DEFINE(HAVE_ARGP, 1, [have argp]))
AC_CONFIG_SUBDIRS(argp-standalone)
+
BUILD_ARGP_STANDALONE=no
-if test "x${ac_cv_header_argp_h}" = "xno"; then
+if test "x${ac_cv_header_argp_h}" = "xno"; then
BUILD_ARGP_STANDALONE=yes
ARGP_STANDALONE_CPPFLAGS='-I${top_srcdir}/argp-standalone'
ARGP_STANDALONE_LDADD='${top_builddir}/argp-standalone/libargp.a'
@@ -397,7 +632,18 @@ if test "x${have_fdatasync}" = "xyes"; then
AC_DEFINE(HAVE_FDATASYNC, 1, [define if fdatasync exists])
fi
-# Check the distribution where you are compiling glusterfs on
+AC_CHECK_FUNC([fallocate], [have_fallocate=yes])
+if test "x${have_fallocate}" = "xyes"; then
+ AC_DEFINE(HAVE_FALLOCATE, 1, [define if fallocate exists])
+fi
+
+AC_CHECK_FUNC([posix_fallocate], [have_posix_fallocate=yes])
+if test "x${have_posix_fallocate}" = "xyes"; then
+ AC_DEFINE(HAVE_POSIX_FALLOCATE, 1, [define if posix_fallocate exists])
+fi
+
+
+# Check the distribution where you are compiling glusterfs on
GF_DISTRIBUTION=
AC_CHECK_FILE([/etc/debian_version])
@@ -419,78 +665,227 @@ AC_SUBST(GF_DISTRIBUTION)
GF_HOST_OS=""
GF_LDFLAGS="-rdynamic"
-if test "x$BUILD_LIBGLUSTERFSCLIENT" = "xyes"; then
- GF_BOOSTER_SUBDIR="booster"
+# check for gcc -Werror=format-security
+saved_CFLAGS=$CFLAGS
+CFLAGS="-Wformat -Werror=format-security"
+AC_MSG_CHECKING([whether $CC accepts -Werror=format-security])
+AC_COMPILE_IFELSE([AC_LANG_PROGRAM()], [cc_werror_format_security=yes], [cc_werror_format_security=no])
+echo $cc_werror_format_security
+if test "x$cc_werror_format_security" = "xno"; then
+ CFLAGS="$saved_CFLAGS"
+else
+ CFLAGS="$saved_CFLAGS $CFLAGS"
+fi
+
+# check for gcc -Werror=implicit-function-declaration
+saved_CFLAGS=$CFLAGS
+CFLAGS="-Werror=implicit-function-declaration"
+AC_MSG_CHECKING([whether $CC accepts -Werror=implicit-function-declaration])
+AC_COMPILE_IFELSE([AC_LANG_PROGRAM()], [cc_werror_implicit=yes], [cc_werror_implicit=no])
+echo $cc_werror_implicit
+if test "x$cc_werror_implicit" = "xno"; then
+ CFLAGS="$saved_CFLAGS"
+else
+ CFLAGS="$saved_CFLAGS $CFLAGS"
fi
-GF_FUSE_LDADD="-lfuse"
case $host_os in
linux*)
- dnl GF_LINUX_HOST_OS=1
GF_HOST_OS="GF_LINUX_HOST_OS"
- GF_CFLAGS="${ARGP_STANDALONE_CPPFLAGS}"
- GF_GLUSTERFS_CFLAGS="${GF_CFLAGS}"
- GF_LDADD="${ARGP_STANDALONE_LDADD}"
- ;;
+ GF_CFLAGS="${ARGP_STANDALONE_CPPFLAGS} -O0"
+ GF_GLUSTERFS_CFLAGS="${GF_CFLAGS}"
+ GF_LDADD="${ARGP_STANDALONE_LDADD}"
+ GF_FUSE_CFLAGS="-DFUSERMOUNT_DIR=\\\"\$(bindir)\\\""
+ ;;
solaris*)
GF_HOST_OS="GF_SOLARIS_HOST_OS"
- GF_CFLAGS="${ARGP_STANDALONE_CPPFLAGS} -D_REENTRANT"
- GF_LDFLAGS=""
- GF_GLUSTERFS_CFLAGS="${GF_CFLAGS}"
- GF_LDADD="${ARGP_STANDALONE_LDADD}"
- GF_GLUSTERFS_LDFLAGS="-lnsl -lresolv -lsocket"
- GF_BOOSTER_SUBDIR=""
- ;;
+ GF_CFLAGS="${ARGP_STANDALONE_CPPFLAGS} -D_REENTRANT -D_POSIX_PTHREAD_SEMANTICS -O0 -m64"
+ GF_LDFLAGS=""
+ GF_GLUSTERFS_CFLAGS="${GF_CFLAGS}"
+ GF_LDADD="${ARGP_STANDALONE_LDADD}"
+ GF_GLUSTERFS_LIBS="-lnsl -lresolv -lsocket"
+ BUILD_FUSE_CLIENT=no
+ FUSE_CLIENT_SUBDIR=""
+ ;;
+ *netbsd*)
+ GF_HOST_OS="GF_BSD_HOST_OS"
+ GF_CFLAGS="${ARGP_STANDALONE_CPPFLAGS} -D_INCOMPLETE_XOPEN_C063"
+ GF_CFLAGS="${GF_CFLAGS} -DTHREAD_UNSAFE_BASENAME"
+ GF_CFLAGS="${GF_CFLAGS} -DTHREAD_UNSAFE_DIRNAME"
+ GF_GLUSTERFS_CFLAGS="${GF_CFLAGS}"
+ GF_LDADD="${ARGP_STANDALONE_LDADD}"
+ if test "x$ac_cv_header_execinfo_h" = "xyes"; then
+ GF_GLUSTERFS_LIBS="-lexecinfo"
+ fi
+ GF_FUSE_LDADD="-lperfuse"
+ BUILD_FUSE_CLIENT=yes
+ LEXLIB=""
+ ;;
*bsd*)
GF_HOST_OS="GF_BSD_HOST_OS"
- GF_CFLAGS="${ARGP_STANDALONE_CPPFLAGS}"
- GF_GLUSTERFS_CFLAGS="${GF_CFLAGS}"
- GF_LDADD="${ARGP_STANDALONE_LDADD}"
- if test "x$ac_cv_header_execinfo_h" = "xyes"; then
- GF_GLUSTERFS_LDFLAGS="-lexecinfo"
- fi
- GF_FUSE_LDADD="-liconv -lfuse"
- BUILD_LIBGLUSTERFSCLIENT=no
- LIBGLUSTERFSCLIENT_SUBDIR=""
- GF_BOOSTER_SUBDIR=""
- ;;
+ GF_CFLAGS="${ARGP_STANDALONE_CPPFLAGS} -O0"
+ GF_CFLAGS="${GF_CFLAGS} -DTHREAD_UNSAFE_BASENAME"
+ GF_CFLAGS="${GF_CFLAGS} -DTHREAD_UNSAFE_DIRNAME"
+ GF_GLUSTERFS_CFLAGS="${GF_CFLAGS}"
+ GF_LDADD="${ARGP_STANDALONE_LDADD}"
+ if test "x$ac_cv_header_execinfo_h" = "xyes"; then
+ GF_GLUSTERFS_LIBS="-lexecinfo"
+ fi
+ BUILD_FUSE_CLIENT=no
+ ;;
darwin*)
GF_HOST_OS="GF_DARWIN_HOST_OS"
- LIBTOOL=glibtool
- GF_CFLAGS="${ARGP_STANDALONE_CPPFLAGS} -D__DARWIN_64_BIT_INO_T -bundle -undefined suppress -flat_namespace"
- GF_GLUSTERFS_CFLAGS="${ARGP_STANDALONE_CPPFLAGS} -D__DARWIN_64_BIT_INO_T -undefined suppress -flat_namespace"
- GF_LDADD="${ARGP_STANDALONE_LDADD}"
- GF_FUSE_LDADD="-liconv -lfuse_ino64"
- BUILD_LIBGLUSTERFSCLIENT=no
- LIBGLUSTERFSCLIENT_SUBDIR=""
- GF_BOOSTER_SUBDIR=""
- ;;
+ LIBTOOL=glibtool
+ GF_CFLAGS="${ARGP_STANDALONE_CPPFLAGS} -D__DARWIN_64_BIT_INO_T -bundle -undefined suppress -flat_namespace -D_XOPEN_SOURCE -O0"
+ GF_CFLAGS="${GF_CFLAGS} -DTHREAD_UNSAFE_BASENAME"
+ GF_CFLAGS="${GF_CFLAGS} -DTHREAD_UNSAFE_DIRNAME"
+ GF_GLUSTERFS_CFLAGS="${ARGP_STANDALONE_CPPFLAGS} -D__DARWIN_64_BIT_INO_T -undefined suppress -flat_namespace -O0"
+ GF_LDADD="${ARGP_STANDALONE_LDADD}"
+ GF_FUSE_CFLAGS="-I\$(CONTRIBDIR)/macfuse"
+ ;;
esac
+# enable debug section
+AC_ARG_ENABLE([debug],
+ AC_HELP_STRING([--enable-debug],
+ [Enable debug build options.]))
+
+BUILD_DEBUG=no
+if test "x$enable_debug" = "xyes"; then
+ BUILD_DEBUG=yes
+ CFLAGS=`echo $CFLAGS | sed -e s/O2/O0/`
+else
+ BUILD_DEBUG=no
+fi
+AC_SUBST(CFLAGS)
+# end enable debug section
+
+# syslog section
+AC_ARG_ENABLE([syslog],
+ AC_HELP_STRING([--disable-syslog],
+ [Disable syslog for logging]))
+
+USE_SYSLOG="yes"
+if test "x$enable_syslog" != "xno"; then
+ AC_DEFINE(GF_USE_SYSLOG, 1, [Use syslog for logging])
+else
+ USE_SYSLOG="no"
+fi
+AM_CONDITIONAL([ENABLE_SYSLOG], [test x$USE_SYSLOG = xyes])
+#end syslog section
+
+BUILD_READLINE=no
+AC_CHECK_LIB([readline -lcurses],[readline],[RLLIBS="-lreadline -lcurses"])
+AC_CHECK_LIB([readline -ltermcap],[readline],[RLLIBS="-lreadline -ltermcap"])
+AC_CHECK_LIB([readline -lncurses],[readline],[RLLIBS="-lreadline -lncurses"])
+
+if test "x$RLLIBS" != "x"; then
+ AC_DEFINE(HAVE_READLINE, 1, [readline enabled CLI])
+ BUILD_READLINE=yes
+fi
+
+BUILD_LIBAIO=no
+AC_CHECK_LIB([aio],[io_setup],[LIBAIO="-laio"])
+
+if test "x$LIBAIO" != "x"; then
+ AC_DEFINE(HAVE_LIBAIO, 1, [libaio based POSIX enabled])
+ BUILD_LIBAIO=yes
+fi
+
+# glupy section
+BUILD_GLUPY=no
+have_python2=no
+have_Python_h=no
+
+AM_PATH_PYTHON()
+if echo $PYTHON_VERSION | grep ^2; then
+ have_python2=yes
+fi
+AC_CHECK_HEADERS([python$PYTHON_VERSION/Python.h],[have_Python_h=yes],[])
+AC_ARG_ENABLE([glupy],
+ AS_HELP_STRING([--enable-glupy],
+ [build glupy]))
+case x$enable_glupy in
+ xyes)
+ if test "x$have_python2" = "xyes" -a "x$have_Python_h" = "xyes"; then
+ BUILD_GLUPY=yes
+ else
+ AC_MSG_ERROR([glupy requires python-devel/python-dev package and python2.x])
+ fi
+ ;;
+ xno)
+ ;;
+ *)
+ if test "x$have_python2" = "xyes" -a "x$have_Python_h" = "xyes"; then
+ BUILD_GLUPY=yes
+ else
+ AC_MSG_WARN([
+ ---------------------------------------------------------------------------------
+ cannot build glupy. python 2.x and python-devel/python-dev package are required.
+ ---------------------------------------------------------------------------------])
+ fi
+ ;;
+esac
+
+if test "x$BUILD_GLUPY" = "xyes"; then
+ BUILD_PYTHON_INC=`$PYTHON -c "from distutils import sysconfig; print sysconfig.get_python_inc()"`
+ BUILD_PYTHON_LIB=python$PYTHON_VERSION
+ GLUPY_SUBDIR=glupy
+ GLUPY_SUBDIR_MAKEFILE=xlators/features/glupy/Makefile
+ GLUPY_SUBDIR_SRC_MAKEFILE=xlators/features/glupy/src/Makefile
+ echo "building glupy with -isystem $BUILD_PYTHON_INC -l $BUILD_PYTHON_LIB"
+ AC_SUBST(BUILD_PYTHON_INC)
+ AC_SUBST(BUILD_PYTHON_LIB)
+ AC_SUBST(GLUPY_SUBDIR)
+ AC_SUBST(GLUPY_SUBDIR_MAKEFILE)
+ AC_SUBST(GLUPY_SUBDIR_SRC_MAKEFILE)
+fi
+# end glupy section
+
AC_SUBST(GF_HOST_OS)
-AC_SUBST(GF_GLUSTERFS_LDFLAGS)
+AC_SUBST([GF_GLUSTERFS_LIBS])
AC_SUBST(GF_GLUSTERFS_CFLAGS)
AC_SUBST(GF_CFLAGS)
AC_SUBST(GF_LDFLAGS)
AC_SUBST(GF_LDADD)
AC_SUBST(GF_FUSE_LDADD)
-AC_SUBST(GF_BOOSTER_SUBDIR)
+AC_SUBST(GF_FUSE_CFLAGS)
+AC_SUBST(RLLIBS)
+AC_SUBST(LIBAIO)
+AC_SUBST(AM_MAKEFLAGS)
+AC_SUBST(AM_LIBTOOLFLAGS)
CONTRIBDIR='$(top_srcdir)/contrib'
AC_SUBST(CONTRIBDIR)
-AM_CONDITIONAL([GF_DARWIN_HOST_OS], test "${GF_HOST_OS}" = "GF_DARWIN_HOST_OS")
+GF_CPPDEFINES='-D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D$(GF_HOST_OS)'
+GF_CPPINCLUDES='-I$(top_srcdir)/libglusterfs/src -I$(CONTRIBDIR)/uuid'
+GF_CPPFLAGS="$GF_CPPDEFINES $GF_CPPINCLUDES"
+AC_SUBST([GF_CPPFLAGS])
+
+AM_CONDITIONAL([GF_DARWIN_HOST_OS], test "${GF_HOST_OS}" = "GF_DARWIN_HOST_OS")
+
+AM_CONDITIONAL([GF_INSTALL_VAR_LIB_GLUSTERD], test ! -d ${localstatedir}/lib/glusterd && test -d ${sysconfdir}/glusterd )
AC_OUTPUT
echo
echo "GlusterFS configure summary"
echo "==========================="
-echo "FUSE client : $BUILD_FUSE_CLIENT"
-echo "Infiniband verbs : $BUILD_IBVERBS"
-echo "epoll IO multiplex : $BUILD_EPOLL"
-echo "Berkeley-DB : $BUILD_BDB"
-echo "libglusterfsclient : $BUILD_LIBGLUSTERFSCLIENT"
-echo "argp-standalone : $BUILD_ARGP_STANDALONE"
-echo "fusermount : $BUILD_FUSERMOUNT"
+echo "FUSE client : $BUILD_FUSE_CLIENT"
+echo "Infiniband verbs : $BUILD_IBVERBS"
+echo "epoll IO multiplex : $BUILD_EPOLL"
+echo "argp-standalone : $BUILD_ARGP_STANDALONE"
+echo "fusermount : $BUILD_FUSERMOUNT"
+echo "readline : $BUILD_READLINE"
+echo "georeplication : $BUILD_SYNCDAEMON"
+echo "Linux-AIO : $BUILD_LIBAIO"
+echo "Enable Debug : $BUILD_DEBUG"
+echo "systemtap : $BUILD_SYSTEMTAP"
+echo "Block Device xlator : $BUILD_BD_XLATOR"
+echo "glupy : $BUILD_GLUPY"
+echo "Use syslog : $USE_SYSLOG"
+echo "XML output : $BUILD_XML_OUTPUT"
+echo "QEMU Block formats : $BUILD_QEMU_BLOCK"
+echo "Encryption xlator : $BUILD_CRYPT_XLATOR"
echo
diff --git a/contrib/Makefile.am b/contrib/Makefile.am
deleted file mode 100644
index a3d5cfbf8..000000000
--- a/contrib/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = fuse-util
-
-CLEANFILES =
diff --git a/contrib/aclocal/mkdirp.m4 b/contrib/aclocal/mkdirp.m4
new file mode 100644
index 000000000..d2f7edd5c
--- /dev/null
+++ b/contrib/aclocal/mkdirp.m4
@@ -0,0 +1,146 @@
+# Excerpt from autoconf/autoconf/programs.m4
+# This file is part of Autoconf. -*- Autoconf -*-
+# Checking for programs.
+
+# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
+# 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free Software
+# Foundation, Inc.
+
+# This file is part of Autoconf. This program is free
+# software; you can redistribute it and/or modify it under the
+# terms of the GNU General Public License as published by the
+# Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# Under Section 7 of GPL version 3, you are granted additional
+# permissions described in the Autoconf Configure Script Exception,
+# version 3.0, as published by the Free Software Foundation.
+#
+# You should have received a copy of the GNU General Public License
+# and a copy of the Autoconf Configure Script Exception along with
+# this program; see the files COPYINGv3 and COPYING.EXCEPTION
+# respectively. If not, see <http://www.gnu.org/licenses/>.
+
+# Written by David MacKenzie, with help from
+# Franc,ois Pinard, Karl Berry, Richard Pixley, Ian Lance Taylor,
+# Roland McGrath, Noah Friedman, david d zuhn, and many others.
+
+# AC_PROG_MKDIR_P
+# ---------------
+# Check whether `mkdir -p' is known to be thread-safe, and fall back to
+# install-sh -d otherwise.
+#
+# Automake 1.8 used `mkdir -m 0755 -p --' to ensure that directories
+# created by `make install' are always world readable, even if the
+# installer happens to have an overly restrictive umask (e.g. 077).
+# This was a mistake. There are at least two reasons why we must not
+# use `-m 0755':
+# - it causes special bits like SGID to be ignored,
+# - it may be too restrictive (some setups expect 775 directories).
+#
+# Do not use -m 0755 and let people choose whatever they expect by
+# setting umask.
+#
+# We cannot accept any implementation of `mkdir' that recognizes `-p'.
+# Some implementations (such as Solaris 8's) are vulnerable to race conditions:
+# if a parallel make tries to run `mkdir -p a/b' and `mkdir -p a/c'
+# concurrently, both version can detect that a/ is missing, but only
+# one can create it and the other will error out. Consequently we
+# restrict ourselves to known race-free implementations.
+#
+# Automake used to define mkdir_p as `mkdir -p .', in order to
+# allow $(mkdir_p) to be used without argument. As in
+# $(mkdir_p) $(somedir)
+# where $(somedir) is conditionally defined. However we don't do
+# that for MKDIR_P.
+# 1. before we restricted the check to GNU mkdir, `mkdir -p .' was
+# reported to fail in read-only directories. The system where this
+# happened has been forgotten.
+# 2. in practice we call $(MKDIR_P) on directories such as
+# $(MKDIR_P) "$(DESTDIR)$(somedir)"
+# and we don't want to create $(DESTDIR) if $(somedir) is empty.
+# To support the latter case, we have to write
+# test -z "$(somedir)" || $(MKDIR_P) "$(DESTDIR)$(somedir)"
+# so $(MKDIR_P) always has an argument.
+# We will have better chances of detecting a missing test if
+# $(MKDIR_P) complains about missing arguments.
+# 3. $(MKDIR_P) is named after `mkdir -p' and we don't expect this
+# to accept no argument.
+# 4. having something like `mkdir .' in the output is unsightly.
+#
+# On NextStep and OpenStep, the `mkdir' command does not
+# recognize any option. It will interpret all options as
+# directories to create.
+AN_MAKEVAR([MKDIR_P], [AC_PROG_MKDIR_P])
+AC_DEFUN_ONCE([AC_PROG_MKDIR_P],
+[AC_REQUIRE([AC_CONFIG_AUX_DIR_DEFAULT])dnl
+
+AC_MSG_CHECKING([for a thread-safe mkdir -p])
+if test -z "$MKDIR_P"; then
+ AC_CACHE_VAL([ac_cv_path_mkdir],
+ [_AS_PATH_WALK([$PATH$PATH_SEPARATOR/opt/sfw/bin],
+ [for ac_prog in mkdir gmkdir; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ AS_EXECUTABLE_P(["$as_dir/$ac_prog$ac_exec_ext"]) || continue
+ case `"$as_dir/$ac_prog$ac_exec_ext" --version 2>&1` in #(
+ 'mkdir (GNU coreutils) '* | \
+ 'mkdir (coreutils) '* | \
+ 'mkdir (fileutils) '4.1*)
+ ac_cv_path_mkdir=$as_dir/$ac_prog$ac_exec_ext
+ break 3;;
+ esac
+ done
+ done])])
+ test -d ./--version && rmdir ./--version
+ if test "${ac_cv_path_mkdir+set}" = set; then
+ MKDIR_P="$ac_cv_path_mkdir -p"
+ else
+ # As a last resort, use the slow shell script. Don't cache a
+ # value for MKDIR_P within a source directory, because that will
+ # break other packages using the cache if that directory is
+ # removed, or if the value is a relative name.
+ MKDIR_P="$ac_install_sh -d"
+ fi
+fi
+dnl status.m4 does special magic for MKDIR_P instead of AC_SUBST,
+dnl to get relative names right. However, also AC_SUBST here so
+dnl that Automake versions before 1.10 will pick it up (they do not
+dnl trace AC_SUBST_TRACE).
+dnl FIXME: Remove this once we drop support for Automake < 1.10.
+AC_SUBST([MKDIR_P])dnl
+AC_MSG_RESULT([$MKDIR_P])
+])# AC_PROG_MKDIR_P
+
+
+# From automake/m4/mkdirp.m4
+## -*- Autoconf -*-
+# Copyright (C) 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_PROG_MKDIR_P
+# ---------------
+# Check for `mkdir -p'.
+AC_DEFUN([AM_PROG_MKDIR_P],
+[
+AC_REQUIRE([AC_PROG_MKDIR_P])dnl
+dnl Automake 1.8 to 1.9.6 used to define mkdir_p. We now use MKDIR_P,
+dnl while keeping a definition of mkdir_p for backward compatibility.
+dnl @MKDIR_P@ is magic: AC_OUTPUT adjusts its value for each Makefile.
+dnl However we cannot define mkdir_p as $(MKDIR_P) for the sake of
+dnl Makefile.ins that do not define MKDIR_P, so we do our own
+dnl adjustment using top_builddir (which is defined more often than
+dnl MKDIR_P).
+AC_SUBST([mkdir_p], ["$MKDIR_P"])dnl
+case $mkdir_p in
+ [[\\/$]]* | ?:[[\\/]]*) ;;
+ */*) mkdir_p="\$(top_builddir)/$mkdir_p" ;;
+esac
+])
diff --git a/contrib/aclocal/python.m4 b/contrib/aclocal/python.m4
new file mode 100644
index 000000000..a39a90090
--- /dev/null
+++ b/contrib/aclocal/python.m4
@@ -0,0 +1,209 @@
+## ------------------------ -*- Autoconf -*-
+## Python file handling
+## From Andrew Dalke
+## Updated by James Henstridge
+## ------------------------
+# Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009
+# Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_PATH_PYTHON([MINIMUM-VERSION], [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND])
+# ---------------------------------------------------------------------------
+# Adds support for distributing Python modules and packages. To
+# install modules, copy them to $(pythondir), using the python_PYTHON
+# automake variable. To install a package with the same name as the
+# automake package, install to $(pkgpythondir), or use the
+# pkgpython_PYTHON automake variable.
+#
+# The variables $(pyexecdir) and $(pkgpyexecdir) are provided as
+# locations to install python extension modules (shared libraries).
+# Another macro is required to find the appropriate flags to compile
+# extension modules.
+#
+# If your package is configured with a different prefix to python,
+# users will have to add the install directory to the PYTHONPATH
+# environment variable, or create a .pth file (see the python
+# documentation for details).
+#
+# If the MINIMUM-VERSION argument is passed, AM_PATH_PYTHON will
+# cause an error if the version of python installed on the system
+# doesn't meet the requirement. MINIMUM-VERSION should consist of
+# numbers and dots only.
+AC_DEFUN([AM_PATH_PYTHON],
+ [
+ dnl Find a Python interpreter. Python versions prior to 2.0 are not
+ dnl supported. (2.0 was released on October 16, 2000).
+ m4_define_default([_AM_PYTHON_INTERPRETER_LIST],
+ [python python2 python3 python3.2 python3.1 python3.0 python2.7 python2.6 python2.5 python2.4 python2.3 python2.2 dnl
+python2.1 python2.0])
+
+ m4_if([$1],[],[
+ dnl No version check is needed.
+ # Find any Python interpreter.
+ if test -z "$PYTHON"; then
+ AC_PATH_PROGS([PYTHON], _AM_PYTHON_INTERPRETER_LIST, :)
+ fi
+ am_display_PYTHON=python
+ ], [
+ dnl A version check is needed.
+ if test -n "$PYTHON"; then
+ # If the user set $PYTHON, use it and don't search something else.
+ AC_MSG_CHECKING([whether $PYTHON version >= $1])
+ AM_PYTHON_CHECK_VERSION([$PYTHON], [$1],
+ [AC_MSG_RESULT(yes)],
+ [AC_MSG_ERROR(too old)])
+ am_display_PYTHON=$PYTHON
+ else
+ # Otherwise, try each interpreter until we find one that satisfies
+ # VERSION.
+ AC_CACHE_CHECK([for a Python interpreter with version >= $1],
+ [am_cv_pathless_PYTHON],[
+ for am_cv_pathless_PYTHON in _AM_PYTHON_INTERPRETER_LIST none; do
+ test "$am_cv_pathless_PYTHON" = none && break
+ AM_PYTHON_CHECK_VERSION([$am_cv_pathless_PYTHON], [$1], [break])
+ done])
+ # Set $PYTHON to the absolute path of $am_cv_pathless_PYTHON.
+ if test "$am_cv_pathless_PYTHON" = none; then
+ PYTHON=:
+ else
+ AC_PATH_PROG([PYTHON], [$am_cv_pathless_PYTHON])
+ fi
+ am_display_PYTHON=$am_cv_pathless_PYTHON
+ fi
+ ])
+
+ if test "$PYTHON" = :; then
+ dnl Run any user-specified action, or abort.
+ m4_default([$3], [AC_MSG_ERROR([no suitable Python interpreter found])])
+ else
+
+ dnl Query Python for its version number. Getting [:3] seems to be
+ dnl the best way to do this; it's what "site.py" does in the standard
+ dnl library.
+
+ AC_CACHE_CHECK([for $am_display_PYTHON version], [am_cv_python_version],
+ [am_cv_python_version=`$PYTHON -c "import sys; sys.stdout.write(sys.version[[:3]])"`])
+ AC_SUBST([PYTHON_VERSION], [$am_cv_python_version])
+
+ dnl Use the values of $prefix and $exec_prefix for the corresponding
+ dnl values of PYTHON_PREFIX and PYTHON_EXEC_PREFIX. These are made
+ dnl distinct variables so they can be overridden if need be. However,
+ dnl general consensus is that you shouldn't need this ability.
+
+ AC_SUBST([PYTHON_PREFIX], ['${prefix}'])
+ AC_SUBST([PYTHON_EXEC_PREFIX], ['${exec_prefix}'])
+
+ dnl At times (like when building shared libraries) you may want
+ dnl to know which OS platform Python thinks this is.
+
+ AC_CACHE_CHECK([for $am_display_PYTHON platform], [am_cv_python_platform],
+ [am_cv_python_platform=`$PYTHON -c "import sys; sys.stdout.write(sys.platform)"`])
+ AC_SUBST([PYTHON_PLATFORM], [$am_cv_python_platform])
+
+
+ dnl Set up 4 directories:
+
+ dnl pythondir -- where to install python scripts. This is the
+ dnl site-packages directory, not the python standard library
+ dnl directory like in previous automake betas. This behavior
+ dnl is more consistent with lispdir.m4 for example.
+ dnl Query distutils for this directory. distutils does not exist in
+ dnl Python 1.5, so we fall back to the hardcoded directory if it
+ dnl doesn't work.
+ AC_CACHE_CHECK([for $am_display_PYTHON script directory],
+ [am_cv_python_pythondir],
+ [if test "x$prefix" = xNONE
+ then
+ am_py_prefix=$ac_default_prefix
+ else
+ am_py_prefix=$prefix
+ fi
+ am_cv_python_pythondir=`$PYTHON -c "import sys; from distutils import sysconfig; sys.stdout.write(sysconfig.get_python_lib(0,0,prefix='$am_py_prefix'))" 2>/dev/null ||
+ echo "$PYTHON_PREFIX/lib/python$PYTHON_VERSION/site-packages"`
+ case $am_cv_python_pythondir in
+ $am_py_prefix*)
+ am__strip_prefix=`echo "$am_py_prefix" | sed 's|.|.|g'`
+ am_cv_python_pythondir=`echo "$am_cv_python_pythondir" | sed "s,^$am__strip_prefix,$PYTHON_PREFIX,"`
+ ;;
+ *)
+ case $am_py_prefix in
+ /usr|/System*) ;;
+ *)
+ am_cv_python_pythondir=$PYTHON_PREFIX/lib/python$PYTHON_VERSION/site-packages
+ ;;
+ esac
+ ;;
+ esac
+ ])
+ AC_SUBST([pythondir], [$am_cv_python_pythondir])
+
+ dnl pkgpythondir -- $PACKAGE directory under pythondir. Was
+ dnl PYTHON_SITE_PACKAGE in previous betas, but this naming is
+ dnl more consistent with the rest of automake.
+
+ AC_SUBST([pkgpythondir], [\${pythondir}/$PACKAGE])
+
+ dnl pyexecdir -- directory for installing python extension modules
+ dnl (shared libraries)
+ dnl Query distutils for this directory. distutils does not exist in
+ dnl Python 1.5, so we fall back to the hardcoded directory if it
+ dnl doesn't work.
+ AC_CACHE_CHECK([for $am_display_PYTHON extension module directory],
+ [am_cv_python_pyexecdir],
+ [if test "x$exec_prefix" = xNONE
+ then
+ am_py_exec_prefix=$am_py_prefix
+ else
+ am_py_exec_prefix=$exec_prefix
+ fi
+ am_cv_python_pyexecdir=`$PYTHON -c "import sys; from distutils import sysconfig; sys.stdout.write(sysconfig.get_python_lib(1,0,prefix='$am_py_exec_prefix'))" 2>/dev/null ||
+ echo "$PYTHON_EXEC_PREFIX/lib/python$PYTHON_VERSION/site-packages"`
+ case $am_cv_python_pyexecdir in
+ $am_py_exec_prefix*)
+ am__strip_prefix=`echo "$am_py_exec_prefix" | sed 's|.|.|g'`
+ am_cv_python_pyexecdir=`echo "$am_cv_python_pyexecdir" | sed "s,^$am__strip_prefix,$PYTHON_EXEC_PREFIX,"`
+ ;;
+ *)
+ case $am_py_exec_prefix in
+ /usr|/System*) ;;
+ *)
+ am_cv_python_pyexecdir=$PYTHON_EXEC_PREFIX/lib/python$PYTHON_VERSION/site-packages
+ ;;
+ esac
+ ;;
+ esac
+ ])
+ AC_SUBST([pyexecdir], [$am_cv_python_pyexecdir])
+
+ dnl pkgpyexecdir -- $(pyexecdir)/$(PACKAGE)
+
+ AC_SUBST([pkgpyexecdir], [\${pyexecdir}/$PACKAGE])
+
+ dnl Run any user-specified action.
+ $2
+ fi
+
+])
+
+
+# AM_PYTHON_CHECK_VERSION(PROG, VERSION, [ACTION-IF-TRUE], [ACTION-IF-FALSE])
+# ---------------------------------------------------------------------------
+# Run ACTION-IF-TRUE if the Python interpreter PROG has version >= VERSION.
+# Run ACTION-IF-FALSE otherwise.
+# This test uses sys.hexversion instead of the string equivalent (first
+# word of sys.version), in order to cope with versions such as 2.2c1.
+# This supports Python 2.0 or higher. (2.0 was released on October 16, 2000).
+AC_DEFUN([AM_PYTHON_CHECK_VERSION],
+ [prog="import sys
+# split strings by '.' and convert to numeric. Append some zeros
+# because we need at least 4 digits for the hex conversion.
+# map returns an iterator in Python 3.0 and a list in 2.x
+minver = list(map(int, '$2'.split('.'))) + [[0, 0, 0]]
+minverhex = 0
+# xrange is not present in Python 3.0 and range returns an iterator
+for i in list(range(0, 4)): minverhex = (minverhex << 8) + minver[[i]]
+sys.exit(sys.hexversion < minverhex)"
+ AS_IF([AM_RUN_LOG([$1 -c "$prog"])], [$3], [$4])])
diff --git a/contrib/fuse-include/fuse-misc.h b/contrib/fuse-include/fuse-misc.h
index 16b473472..f905215b0 100644
--- a/contrib/fuse-include/fuse-misc.h
+++ b/contrib/fuse-include/fuse-misc.h
@@ -10,4 +10,5 @@
unsigned long calc_timeout_sec (double t);
unsigned int calc_timeout_nsec (double t);
-void convert_fuse_file_lock (struct fuse_file_lock *fl, struct flock *flock);
+void convert_fuse_file_lock (struct fuse_file_lock *fl, struct gf_flock *flock,
+ uint64_t lk_owner);
diff --git a/contrib/fuse-include/fuse-mount.h b/contrib/fuse-include/fuse-mount.h
index d263a8039..9358ac810 100644
--- a/contrib/fuse-include/fuse-mount.h
+++ b/contrib/fuse-include/fuse-mount.h
@@ -1,11 +1,13 @@
/*
FUSE: Filesystem in Userspace
Copyright (C) 2001-2007 Miklos Szeredi <miklos@szeredi.hu>
- Copyright (c) 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
This program can be distributed under the terms of the GNU LGPLv2.
See the file COPYING.LIB.
*/
void gf_fuse_unmount (const char *mountpoint, int fd);
-int gf_fuse_mount (const char *mountpoint, char *fsname, char *mnt_param);
+int gf_fuse_mount (const char *mountpoint, char *fsname,
+ unsigned long mountflags, char *mnt_param,
+ pid_t *mtab_pid, int status_fd);
diff --git a/contrib/fuse-include/fuse_kernel.h b/contrib/fuse-include/fuse_kernel.h
index dac35d8b9..60bb2f9f7 100644
--- a/contrib/fuse-include/fuse_kernel.h
+++ b/contrib/fuse-include/fuse_kernel.h
@@ -56,22 +56,79 @@
* - add umask flag to input argument of open, mknod and mkdir
* - add notification messages for invalidation of inodes and
* directory entries
+ *
+ * 7.13
+ * - make max number of background requests and congestion threshold
+ * tunables
+ *
+ * 7.14
+ * - add splice support to fuse device
+ *
+ * 7.15
+ * - add store notify
+ * - add retrieve notify
+ *
+ * 7.16
+ * - add BATCH_FORGET request
+ * - FUSE_IOCTL_UNRESTRICTED shall now return with array of 'struct
+ * fuse_ioctl_iovec' instead of ambiguous 'struct iovec'
+ * - add FUSE_IOCTL_32BIT flag
+ *
+ * 7.17
+ * - add FUSE_FLOCK_LOCKS and FUSE_RELEASE_FLOCK_UNLOCK
+ *
+ * 7.18
+ * - add FUSE_IOCTL_DIR flag
+ * - add FUSE_NOTIFY_DELETE
+ *
+ * 7.19
+ * - add FUSE_FALLOCATE
+ *
+ * 7.20
+ * - add FUSE_AUTO_INVAL_DATA
+ *
+ * 7.21
+ * - add FUSE_READDIRPLUS
+ * - send the requested events in POLL request
+ *
+ * 7.22
+ * - add FUSE_ASYNC_DIO
*/
#ifndef _LINUX_FUSE_H
#define _LINUX_FUSE_H
-#include <sys/types.h>
-#define __u64 uint64_t
-#define __s64 int64_t
-#define __u32 uint32_t
-#define __s32 int32_t
+#ifdef __KERNEL__
+#include <linux/types.h>
+#else
+#include <stdint.h>
+#endif
+
+/*
+ * Version negotiation:
+ *
+ * Both the kernel and userspace send the version they support in the
+ * INIT request and reply respectively.
+ *
+ * If the major versions match then both shall use the smallest
+ * of the two minor versions for communication.
+ *
+ * If the kernel supports a larger major version, then userspace shall
+ * reply with the major version it supports, ignore the rest of the
+ * INIT message and expect a new INIT message from the kernel with a
+ * matching major version.
+ *
+ * If the library supports a larger major version, then it shall fall
+ * back to the major protocol version sent by the kernel for
+ * communication and reply with that major version (and an arbitrary
+ * supported minor version).
+ */
/** Version number of this interface */
#define FUSE_KERNEL_VERSION 7
/** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 12
+#define FUSE_KERNEL_MINOR_VERSION 22
/** The node ID of the root inode */
#define FUSE_ROOT_ID 1
@@ -80,42 +137,42 @@
userspace works under 64bit kernels */
struct fuse_attr {
- __u64 ino;
- __u64 size;
- __u64 blocks;
- __u64 atime;
- __u64 mtime;
- __u64 ctime;
- __u32 atimensec;
- __u32 mtimensec;
- __u32 ctimensec;
- __u32 mode;
- __u32 nlink;
- __u32 uid;
- __u32 gid;
- __u32 rdev;
- __u32 blksize;
- __u32 padding;
+ uint64_t ino;
+ uint64_t size;
+ uint64_t blocks;
+ uint64_t atime;
+ uint64_t mtime;
+ uint64_t ctime;
+ uint32_t atimensec;
+ uint32_t mtimensec;
+ uint32_t ctimensec;
+ uint32_t mode;
+ uint32_t nlink;
+ uint32_t uid;
+ uint32_t gid;
+ uint32_t rdev;
+ uint32_t blksize;
+ uint32_t padding;
};
struct fuse_kstatfs {
- __u64 blocks;
- __u64 bfree;
- __u64 bavail;
- __u64 files;
- __u64 ffree;
- __u32 bsize;
- __u32 namelen;
- __u32 frsize;
- __u32 padding;
- __u32 spare[6];
+ uint64_t blocks;
+ uint64_t bfree;
+ uint64_t bavail;
+ uint64_t files;
+ uint64_t ffree;
+ uint32_t bsize;
+ uint32_t namelen;
+ uint32_t frsize;
+ uint32_t padding;
+ uint32_t spare[6];
};
struct fuse_file_lock {
- __u64 start;
- __u64 end;
- __u32 type;
- __u32 pid; /* tgid */
+ uint64_t start;
+ uint64_t end;
+ uint32_t type;
+ uint32_t pid; /* tgid */
};
/**
@@ -146,8 +203,22 @@ struct fuse_file_lock {
/**
* INIT request/reply flags
*
+ * FUSE_ASYNC_READ: asynchronous read requests
+ * FUSE_POSIX_LOCKS: remote locking for POSIX file locks
+ * FUSE_FILE_OPS: kernel sends file handle for fstat, etc... (not yet supported)
+ * FUSE_ATOMIC_O_TRUNC: handles the O_TRUNC open flag in the filesystem
* FUSE_EXPORT_SUPPORT: filesystem handles lookups of "." and ".."
+ * FUSE_BIG_WRITES: filesystem can handle write size larger than 4kB
* FUSE_DONT_MASK: don't apply umask to file mode on create operations
+ * FUSE_SPLICE_WRITE: kernel supports splice write on the device
+ * FUSE_SPLICE_MOVE: kernel supports splice move on the device
+ * FUSE_SPLICE_READ: kernel supports splice read on the device
+ * FUSE_FLOCK_LOCKS: remote locking for BSD style file locks
+ * FUSE_HAS_IOCTL_DIR: kernel supports ioctl on directories
+ * FUSE_AUTO_INVAL_DATA: automatically invalidate cached pages
+ * FUSE_DO_READDIRPLUS: do READDIRPLUS (READDIR+LOOKUP in one)
+ * FUSE_READDIRPLUS_AUTO: adaptive readdirplus
+ * FUSE_ASYNC_DIO: asynchronous direct I/O submission
*/
#define FUSE_ASYNC_READ (1 << 0)
#define FUSE_POSIX_LOCKS (1 << 1)
@@ -156,6 +227,15 @@ struct fuse_file_lock {
#define FUSE_EXPORT_SUPPORT (1 << 4)
#define FUSE_BIG_WRITES (1 << 5)
#define FUSE_DONT_MASK (1 << 6)
+#define FUSE_SPLICE_WRITE (1 << 7)
+#define FUSE_SPLICE_MOVE (1 << 8)
+#define FUSE_SPLICE_READ (1 << 9)
+#define FUSE_FLOCK_LOCKS (1 << 10)
+#define FUSE_HAS_IOCTL_DIR (1 << 11)
+#define FUSE_AUTO_INVAL_DATA (1 << 12)
+#define FUSE_DO_READDIRPLUS (1 << 13)
+#define FUSE_READDIRPLUS_AUTO (1 << 14)
+#define FUSE_ASYNC_DIO (1 << 15)
/**
* CUSE INIT request/reply flags
@@ -168,6 +248,7 @@ struct fuse_file_lock {
* Release flags
*/
#define FUSE_RELEASE_FLUSH (1 << 0)
+#define FUSE_RELEASE_FLOCK_UNLOCK (1 << 1)
/**
* Getattr flags
@@ -199,12 +280,16 @@ struct fuse_file_lock {
* FUSE_IOCTL_COMPAT: 32bit compat ioctl on 64bit machine
* FUSE_IOCTL_UNRESTRICTED: not restricted to well-formed ioctls, retry allowed
* FUSE_IOCTL_RETRY: retry with new iovecs
+ * FUSE_IOCTL_32BIT: 32bit ioctl
+ * FUSE_IOCTL_DIR: is a directory
*
* FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs
*/
#define FUSE_IOCTL_COMPAT (1 << 0)
#define FUSE_IOCTL_UNRESTRICTED (1 << 1)
#define FUSE_IOCTL_RETRY (1 << 2)
+#define FUSE_IOCTL_32BIT (1 << 3)
+#define FUSE_IOCTL_DIR (1 << 4)
#define FUSE_IOCTL_MAX_IOV 256
@@ -254,6 +339,10 @@ enum fuse_opcode {
FUSE_DESTROY = 38,
FUSE_IOCTL = 39,
FUSE_POLL = 40,
+ FUSE_NOTIFY_REPLY = 41,
+ FUSE_BATCH_FORGET = 42,
+ FUSE_FALLOCATE = 43,
+ FUSE_READDIRPLUS = 44,
/* CUSE specific operations */
CUSE_INIT = 4096,
@@ -263,6 +352,9 @@ enum fuse_notify_code {
FUSE_NOTIFY_POLL = 1,
FUSE_NOTIFY_INVAL_INODE = 2,
FUSE_NOTIFY_INVAL_ENTRY = 3,
+ FUSE_NOTIFY_STORE = 4,
+ FUSE_NOTIFY_RETRIEVE = 5,
+ FUSE_NOTIFY_DELETE = 6,
FUSE_NOTIFY_CODE_MAX,
};
@@ -272,133 +364,143 @@ enum fuse_notify_code {
#define FUSE_COMPAT_ENTRY_OUT_SIZE 120
struct fuse_entry_out {
- __u64 nodeid; /* Inode ID */
- __u64 generation; /* Inode generation: nodeid:gen must
- be unique for the fs's lifetime */
- __u64 entry_valid; /* Cache timeout for the name */
- __u64 attr_valid; /* Cache timeout for the attributes */
- __u32 entry_valid_nsec;
- __u32 attr_valid_nsec;
+ uint64_t nodeid; /* Inode ID */
+ uint64_t generation; /* Inode generation: nodeid:gen must
+ be unique for the fs's lifetime */
+ uint64_t entry_valid; /* Cache timeout for the name */
+ uint64_t attr_valid; /* Cache timeout for the attributes */
+ uint32_t entry_valid_nsec;
+ uint32_t attr_valid_nsec;
struct fuse_attr attr;
};
struct fuse_forget_in {
- __u64 nlookup;
+ uint64_t nlookup;
+};
+
+struct fuse_forget_one {
+ uint64_t nodeid;
+ uint64_t nlookup;
+};
+
+struct fuse_batch_forget_in {
+ uint32_t count;
+ uint32_t dummy;
};
struct fuse_getattr_in {
- __u32 getattr_flags;
- __u32 dummy;
- __u64 fh;
+ uint32_t getattr_flags;
+ uint32_t dummy;
+ uint64_t fh;
};
#define FUSE_COMPAT_ATTR_OUT_SIZE 96
struct fuse_attr_out {
- __u64 attr_valid; /* Cache timeout for the attributes */
- __u32 attr_valid_nsec;
- __u32 dummy;
+ uint64_t attr_valid; /* Cache timeout for the attributes */
+ uint32_t attr_valid_nsec;
+ uint32_t dummy;
struct fuse_attr attr;
};
#define FUSE_COMPAT_MKNOD_IN_SIZE 8
struct fuse_mknod_in {
- __u32 mode;
- __u32 rdev;
- __u32 umask;
- __u32 padding;
+ uint32_t mode;
+ uint32_t rdev;
+ uint32_t umask;
+ uint32_t padding;
};
struct fuse_mkdir_in {
- __u32 mode;
- __u32 umask;
+ uint32_t mode;
+ uint32_t umask;
};
struct fuse_rename_in {
- __u64 newdir;
+ uint64_t newdir;
};
struct fuse_link_in {
- __u64 oldnodeid;
+ uint64_t oldnodeid;
};
struct fuse_setattr_in {
- __u32 valid;
- __u32 padding;
- __u64 fh;
- __u64 size;
- __u64 lock_owner;
- __u64 atime;
- __u64 mtime;
- __u64 unused2;
- __u32 atimensec;
- __u32 mtimensec;
- __u32 unused3;
- __u32 mode;
- __u32 unused4;
- __u32 uid;
- __u32 gid;
- __u32 unused5;
+ uint32_t valid;
+ uint32_t padding;
+ uint64_t fh;
+ uint64_t size;
+ uint64_t lock_owner;
+ uint64_t atime;
+ uint64_t mtime;
+ uint64_t unused2;
+ uint32_t atimensec;
+ uint32_t mtimensec;
+ uint32_t unused3;
+ uint32_t mode;
+ uint32_t unused4;
+ uint32_t uid;
+ uint32_t gid;
+ uint32_t unused5;
};
struct fuse_open_in {
- __u32 flags;
- __u32 unused;
+ uint32_t flags;
+ uint32_t unused;
};
struct fuse_create_in {
- __u32 flags;
- __u32 mode;
- __u32 umask;
- __u32 padding;
+ uint32_t flags;
+ uint32_t mode;
+ uint32_t umask;
+ uint32_t padding;
};
struct fuse_open_out {
- __u64 fh;
- __u32 open_flags;
- __u32 padding;
+ uint64_t fh;
+ uint32_t open_flags;
+ uint32_t padding;
};
struct fuse_release_in {
- __u64 fh;
- __u32 flags;
- __u32 release_flags;
- __u64 lock_owner;
+ uint64_t fh;
+ uint32_t flags;
+ uint32_t release_flags;
+ uint64_t lock_owner;
};
struct fuse_flush_in {
- __u64 fh;
- __u32 unused;
- __u32 padding;
- __u64 lock_owner;
+ uint64_t fh;
+ uint32_t unused;
+ uint32_t padding;
+ uint64_t lock_owner;
};
struct fuse_read_in {
- __u64 fh;
- __u64 offset;
- __u32 size;
- __u32 read_flags;
- __u64 lock_owner;
- __u32 flags;
- __u32 padding;
+ uint64_t fh;
+ uint64_t offset;
+ uint32_t size;
+ uint32_t read_flags;
+ uint64_t lock_owner;
+ uint32_t flags;
+ uint32_t padding;
};
#define FUSE_COMPAT_WRITE_IN_SIZE 24
struct fuse_write_in {
- __u64 fh;
- __u64 offset;
- __u32 size;
- __u32 write_flags;
- __u64 lock_owner;
- __u32 flags;
- __u32 padding;
+ uint64_t fh;
+ uint64_t offset;
+ uint32_t size;
+ uint32_t write_flags;
+ uint64_t lock_owner;
+ uint32_t flags;
+ uint32_t padding;
};
struct fuse_write_out {
- __u32 size;
- __u32 padding;
+ uint32_t size;
+ uint32_t padding;
};
#define FUSE_COMPAT_STATFS_SIZE 48
@@ -408,32 +510,32 @@ struct fuse_statfs_out {
};
struct fuse_fsync_in {
- __u64 fh;
- __u32 fsync_flags;
- __u32 padding;
+ uint64_t fh;
+ uint32_t fsync_flags;
+ uint32_t padding;
};
struct fuse_setxattr_in {
- __u32 size;
- __u32 flags;
+ uint32_t size;
+ uint32_t flags;
};
struct fuse_getxattr_in {
- __u32 size;
- __u32 padding;
+ uint32_t size;
+ uint32_t padding;
};
struct fuse_getxattr_out {
- __u32 size;
- __u32 padding;
+ uint32_t size;
+ uint32_t padding;
};
struct fuse_lk_in {
- __u64 fh;
- __u64 owner;
+ uint64_t fh;
+ uint64_t owner;
struct fuse_file_lock lk;
- __u32 lk_flags;
- __u32 padding;
+ uint32_t lk_flags;
+ uint32_t padding;
};
struct fuse_lk_out {
@@ -441,133 +543,190 @@ struct fuse_lk_out {
};
struct fuse_access_in {
- __u32 mask;
- __u32 padding;
+ uint32_t mask;
+ uint32_t padding;
};
struct fuse_init_in {
- __u32 major;
- __u32 minor;
- __u32 max_readahead;
- __u32 flags;
+ uint32_t major;
+ uint32_t minor;
+ uint32_t max_readahead;
+ uint32_t flags;
};
struct fuse_init_out {
- __u32 major;
- __u32 minor;
- __u32 max_readahead;
- __u32 flags;
- __u32 unused;
- __u32 max_write;
+ uint32_t major;
+ uint32_t minor;
+ uint32_t max_readahead;
+ uint32_t flags;
+ uint16_t max_background;
+ uint16_t congestion_threshold;
+ uint32_t max_write;
};
#define CUSE_INIT_INFO_MAX 4096
struct cuse_init_in {
- __u32 major;
- __u32 minor;
- __u32 unused;
- __u32 flags;
+ uint32_t major;
+ uint32_t minor;
+ uint32_t unused;
+ uint32_t flags;
};
struct cuse_init_out {
- __u32 major;
- __u32 minor;
- __u32 unused;
- __u32 flags;
- __u32 max_read;
- __u32 max_write;
- __u32 dev_major; /* chardev major */
- __u32 dev_minor; /* chardev minor */
- __u32 spare[10];
+ uint32_t major;
+ uint32_t minor;
+ uint32_t unused;
+ uint32_t flags;
+ uint32_t max_read;
+ uint32_t max_write;
+ uint32_t dev_major; /* chardev major */
+ uint32_t dev_minor; /* chardev minor */
+ uint32_t spare[10];
};
struct fuse_interrupt_in {
- __u64 unique;
+ uint64_t unique;
};
struct fuse_bmap_in {
- __u64 block;
- __u32 blocksize;
- __u32 padding;
+ uint64_t block;
+ uint32_t blocksize;
+ uint32_t padding;
};
struct fuse_bmap_out {
- __u64 block;
+ uint64_t block;
};
struct fuse_ioctl_in {
- __u64 fh;
- __u32 flags;
- __u32 cmd;
- __u64 arg;
- __u32 in_size;
- __u32 out_size;
+ uint64_t fh;
+ uint32_t flags;
+ uint32_t cmd;
+ uint64_t arg;
+ uint32_t in_size;
+ uint32_t out_size;
+};
+
+struct fuse_ioctl_iovec {
+ uint64_t base;
+ uint64_t len;
};
struct fuse_ioctl_out {
- __s32 result;
- __u32 flags;
- __u32 in_iovs;
- __u32 out_iovs;
+ int32_t result;
+ uint32_t flags;
+ uint32_t in_iovs;
+ uint32_t out_iovs;
};
struct fuse_poll_in {
- __u64 fh;
- __u64 kh;
- __u32 flags;
- __u32 padding;
+ uint64_t fh;
+ uint64_t kh;
+ uint32_t flags;
+ uint32_t events;
};
struct fuse_poll_out {
- __u32 revents;
- __u32 padding;
+ uint32_t revents;
+ uint32_t padding;
};
struct fuse_notify_poll_wakeup_out {
- __u64 kh;
+ uint64_t kh;
+};
+
+struct fuse_fallocate_in {
+ uint64_t fh;
+ uint64_t offset;
+ uint64_t length;
+ uint32_t mode;
+ uint32_t padding;
};
struct fuse_in_header {
- __u32 len;
- __u32 opcode;
- __u64 unique;
- __u64 nodeid;
- __u32 uid;
- __u32 gid;
- __u32 pid;
- __u32 padding;
+ uint32_t len;
+ uint32_t opcode;
+ uint64_t unique;
+ uint64_t nodeid;
+ uint32_t uid;
+ uint32_t gid;
+ uint32_t pid;
+ uint32_t padding;
};
struct fuse_out_header {
- __u32 len;
- __s32 error;
- __u64 unique;
+ uint32_t len;
+ int32_t error;
+ uint64_t unique;
};
struct fuse_dirent {
- __u64 ino;
- __u64 off;
- __u32 namelen;
- __u32 type;
- char name[0];
+ uint64_t ino;
+ uint64_t off;
+ uint32_t namelen;
+ uint32_t type;
+ char name[];
};
#define FUSE_NAME_OFFSET offsetof(struct fuse_dirent, name)
-#define FUSE_DIRENT_ALIGN(x) (((x) + sizeof(__u64) - 1) & ~(sizeof(__u64) - 1))
+#define FUSE_DIRENT_ALIGN(x) \
+ (((x) + sizeof(uint64_t) - 1) & ~(sizeof(uint64_t) - 1))
#define FUSE_DIRENT_SIZE(d) \
FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen)
+struct fuse_direntplus {
+ struct fuse_entry_out entry_out;
+ struct fuse_dirent dirent;
+};
+
+#define FUSE_NAME_OFFSET_DIRENTPLUS \
+ offsetof(struct fuse_direntplus, dirent.name)
+#define FUSE_DIRENTPLUS_SIZE(d) \
+ FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET_DIRENTPLUS + (d)->dirent.namelen)
+
struct fuse_notify_inval_inode_out {
- __u64 ino;
- __s64 off;
- __s64 len;
+ uint64_t ino;
+ int64_t off;
+ int64_t len;
};
struct fuse_notify_inval_entry_out {
- __u64 parent;
- __u32 namelen;
- __u32 padding;
+ uint64_t parent;
+ uint32_t namelen;
+ uint32_t padding;
+};
+
+struct fuse_notify_delete_out {
+ uint64_t parent;
+ uint64_t child;
+ uint32_t namelen;
+ uint32_t padding;
+};
+
+struct fuse_notify_store_out {
+ uint64_t nodeid;
+ uint64_t offset;
+ uint32_t size;
+ uint32_t padding;
+};
+
+struct fuse_notify_retrieve_out {
+ uint64_t notify_unique;
+ uint64_t nodeid;
+ uint64_t offset;
+ uint32_t size;
+ uint32_t padding;
+};
+
+/* Matches the size of fuse_write_in */
+struct fuse_notify_retrieve_in {
+ uint64_t dummy1;
+ uint64_t offset;
+ uint32_t size;
+ uint32_t dummy2;
+ uint64_t dummy3;
+ uint64_t dummy4;
};
#endif /* _LINUX_FUSE_H */
diff --git a/glusterfs-guts/src/fuse_kernel.h b/contrib/fuse-include/fuse_kernel_macfuse.h
index 7ebff8b22..3fbf24f70 100644
--- a/glusterfs-guts/src/fuse_kernel.h
+++ b/contrib/fuse-include/fuse_kernel_macfuse.h
@@ -1,19 +1,14 @@
/*
- FUSE: Filesystem in Userspace
+ This file defines the kernel interface of FUSE
Copyright (C) 2001-2007 Miklos Szeredi <miklos@szeredi.hu>
This program can be distributed under the terms of the GNU GPL.
See the file COPYING.
-*/
-
-/* This file defines the kernel interface of FUSE */
-#ifdef __FreeBSD__
-/*
This -- and only this -- header file may also be distributed under
the terms of the BSD Licence as follows:
- Copyright (C) 2001-2006 Miklos Szeredi. All rights reserved.
+ Copyright (C) 2001-2007 Miklos Szeredi. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -37,6 +32,7 @@
SUCH DAMAGE.
*/
+#ifndef linux
#include <sys/types.h>
#define __u64 uint64_t
#define __u32 uint32_t
@@ -71,14 +67,23 @@ struct fuse_attr {
__u64 atime;
__u64 mtime;
__u64 ctime;
+#if (__FreeBSD__ >= 10)
+ __u64 crtime;
+#endif /* __FreeBSD__ >= 10 */
__u32 atimensec;
__u32 mtimensec;
__u32 ctimensec;
+#if (__FreeBSD__ >= 10)
+ __u32 crtimensec;
+#endif /* __FreeBSD__ >= 10 */
__u32 mode;
__u32 nlink;
__u32 uid;
__u32 gid;
__u32 rdev;
+#if (__FreeBSD__ >= 10)
+ __u32 flags; /* file flags; see chflags(2) */
+#endif /* __FreeBSD__ >= 10 */
};
struct fuse_kstatfs {
@@ -111,6 +116,12 @@ struct fuse_file_lock {
#define FATTR_ATIME (1 << 4)
#define FATTR_MTIME (1 << 5)
#define FATTR_FH (1 << 6)
+#if (__FreeBSD__ >= 10)
+#define FATTR_CRTIME (1 << 28)
+#define FATTR_CHGTIME (1 << 29)
+#define FATTR_BKUPTIME (1 << 30)
+#define FATTR_FLAGS (1 << 31)
+#endif /* __FreeBSD__ >= 10 */
/**
* Flags returned by the OPEN request
@@ -120,12 +131,21 @@ struct fuse_file_lock {
*/
#define FOPEN_DIRECT_IO (1 << 0)
#define FOPEN_KEEP_CACHE (1 << 1)
+#if (__FreeBSD__ >= 10)
+#define FOPEN_PURGE_ATTR (1 << 30)
+#define FOPEN_PURGE_UBC (1 << 31)
+#endif
/**
* INIT request/reply flags
*/
#define FUSE_ASYNC_READ (1 << 0)
#define FUSE_POSIX_LOCKS (1 << 1)
+#if (__FreeBSD__ >= 10)
+#define FUSE_CASE_INSENSITIVE (1 << 29)
+#define FUSE_VOL_RENAME (1 << 30)
+#define FUSE_XTIMES (1 << 31)
+#endif /* __FreeBSD__ >= 10 */
/**
* Release flags
@@ -169,6 +189,11 @@ enum fuse_opcode {
FUSE_INTERRUPT = 36,
FUSE_BMAP = 37,
FUSE_DESTROY = 38,
+#if (__FreeBSD__ >= 10)
+ FUSE_SETVOLNAME = 61,
+ FUSE_GETXTIMES = 62,
+ FUSE_EXCHANGE = 63,
+#endif /* __FreeBSD__ >= 10 */
};
/* The read buffer is required to be at least 8k, but may be much larger */
@@ -196,6 +221,15 @@ struct fuse_attr_out {
struct fuse_attr attr;
};
+#if (__FreeBSD__ >= 10)
+struct fuse_getxtimes_out {
+ __u64 bkuptime;
+ __u64 crtime;
+ __u32 bkuptimensec;
+ __u32 crtimensec;
+};
+#endif /* __FreeBSD__ >= 10 */
+
struct fuse_mknod_in {
__u32 mode;
__u32 rdev;
@@ -210,6 +244,14 @@ struct fuse_rename_in {
__u64 newdir;
};
+#if (__FreeBSD__ >= 10)
+struct fuse_exchange_in {
+ __u64 olddir;
+ __u64 newdir;
+ __u64 options;
+};
+#endif /* __FreeBSD__ >= 10 */
+
struct fuse_link_in {
__u64 oldnodeid;
};
@@ -231,6 +273,15 @@ struct fuse_setattr_in {
__u32 uid;
__u32 gid;
__u32 unused5;
+#if (__FreeBSD__ >= 10)
+ __u64 bkuptime;
+ __u64 chgtime;
+ __u64 crtime;
+ __u32 bkuptimensec;
+ __u32 chgtimensec;
+ __u32 crtimensec;
+ __u32 flags; /* file flags; see chflags(2) */
+#endif /* __FreeBSD__ >= 10 */
};
struct fuse_open_in {
@@ -292,11 +343,19 @@ struct fuse_fsync_in {
struct fuse_setxattr_in {
__u32 size;
__u32 flags;
+#if (__FreeBSD__ >= 10)
+ __u32 position;
+ __u32 padding;
+#endif /* __FreeBSD__ >= 10 */
};
struct fuse_getxattr_in {
__u32 size;
__u32 padding;
+#if (__FreeBSD__ >= 10)
+ __u32 position;
+ __u32 padding2;
+#endif /* __FreeBSD__ >= 10 */
};
struct fuse_getxattr_out {
diff --git a/contrib/fuse-util/mount_util.h b/contrib/fuse-include/mount_util.h
index cf54d9d0d..f392f99f1 100644
--- a/contrib/fuse-util/mount_util.h
+++ b/contrib/fuse-include/mount_util.h
@@ -10,7 +10,8 @@
int fuse_mnt_add_mount(const char *progname, const char *fsname,
const char *mnt, const char *type, const char *opts);
-int fuse_mnt_umount(const char *progname, const char *mnt, int lazy);
+int fuse_mnt_umount(const char *progname, const char *abs_mnt,
+ const char *rel_mnt, int lazy);
char *fuse_mnt_resolve_path(const char *progname, const char *orig);
int fuse_mnt_check_empty(const char *progname, const char *mnt,
mode_t rootmode, off_t rootsize);
diff --git a/contrib/fuse-lib/misc.c b/contrib/fuse-lib/misc.c
index 877c3880d..0c41b1a19 100644
--- a/contrib/fuse-lib/misc.c
+++ b/contrib/fuse-lib/misc.c
@@ -10,6 +10,7 @@
#include <string.h>
#include <limits.h>
#include <fcntl.h>
+#include "glusterfs.h"
#include "fuse_kernel.h"
#include "fuse-misc.h"
@@ -37,7 +38,8 @@ calc_timeout_nsec (double t)
}
void
-convert_fuse_file_lock (struct fuse_file_lock *fl, struct flock *flock)
+convert_fuse_file_lock (struct fuse_file_lock *fl, struct gf_flock *flock,
+ uint64_t lk_owner)
{
memset (flock, 0, sizeof (struct flock));
flock->l_type = fl->type;
@@ -48,4 +50,5 @@ convert_fuse_file_lock (struct fuse_file_lock *fl, struct flock *flock)
else
flock->l_len = fl->end - fl->start + 1;
flock->l_pid = fl->pid;
+ set_lk_owner_from_uint64 (&flock->l_owner, lk_owner);
}
diff --git a/contrib/fuse-lib/mount-common.c b/contrib/fuse-lib/mount-common.c
new file mode 100644
index 000000000..fd6cce44e
--- /dev/null
+++ b/contrib/fuse-lib/mount-common.c
@@ -0,0 +1,265 @@
+/*
+ FUSE: Filesystem in Userspace
+ Copyright (C) 2001-2007 Miklos Szeredi <miklos@szeredi.hu>
+ Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
+
+ This program can be distributed under the terms of the GNU LGPLv2.
+ See the file COPYING.LIB.
+*/
+
+#include "mount-gluster-compat.h"
+
+/*
+ * These functions (and gf_fuse_umount() in mount.c)
+ * were originally taken from libfuse as of commit 7960e99e
+ * (http://fuse.git.sourceforge.net/git/gitweb.cgi?p=fuse/fuse;a=commit;h=7960e99e)
+ * almost verbatim. What has been changed upon adoption:
+ * - style adopted to that of glusterfs
+ * - s/fprintf/gf_log/
+ * - s/free/FREE/, s/malloc/MALLOC/
+ * - there are some other minor things
+ *
+ * For changes that were made later and syncs with upstream,
+ * see the commit log and per-function comments.
+ */
+
+#ifndef __NetBSD__
+/* FUSE: cherry-picked bd99f9cf */
+static int
+mtab_needs_update (const char *mnt)
+{
+ int res;
+ struct stat stbuf;
+
+ /* If mtab is within new mount, don't touch it */
+ if (strncmp (mnt, _PATH_MOUNTED, strlen (mnt)) == 0 &&
+ _PATH_MOUNTED[strlen (mnt)] == '/')
+ return 0;
+
+ /*
+ * Skip mtab update if /etc/mtab:
+ *
+ * - doesn't exist,
+ * - is a symlink,
+ * - is on a read-only filesystem.
+ */
+ res = lstat (_PATH_MOUNTED, &stbuf);
+ if (res == -1) {
+ if (errno == ENOENT)
+ return 0;
+ } else {
+ uid_t ruid;
+ int err;
+
+ if (S_ISLNK (stbuf.st_mode))
+ return 0;
+
+ ruid = getuid ();
+ if (ruid != 0)
+ setreuid (0, -1);
+
+ res = access (_PATH_MOUNTED, W_OK);
+ err = (res == -1) ? errno : 0;
+ if (ruid != 0)
+ setreuid (ruid, -1);
+
+ if (err == EROFS)
+ return 0;
+ }
+
+ return 1;
+}
+#else /* __NetBSD__ */
+#define mtab_needs_update(x) 1
+#endif /* __NetBSD__ */
+
+/* FUSE: called add_mount_legacy(); R.I.P. as of cbd3a2a8 */
+int
+fuse_mnt_add_mount (const char *progname, const char *fsname,
+ const char *mnt, const char *type, const char *opts)
+{
+ int res;
+ int status;
+ sigset_t blockmask;
+ sigset_t oldmask;
+
+ if (!mtab_needs_update (mnt))
+ return 0;
+
+ sigemptyset (&blockmask);
+ sigaddset (&blockmask, SIGCHLD);
+ res = sigprocmask (SIG_BLOCK, &blockmask, &oldmask);
+ if (res == -1) {
+ GFFUSE_LOGERR ("%s: sigprocmask: %s",
+ progname, strerror (errno));
+ return -1;
+ }
+
+ res = fork ();
+ if (res == -1) {
+ GFFUSE_LOGERR ("%s: fork: %s", progname, strerror (errno));
+ goto out_restore;
+ }
+ if (res == 0) {
+ char templ[] = "/tmp/fusermountXXXXXX";
+ char *tmp;
+
+ sigprocmask (SIG_SETMASK, &oldmask, NULL);
+ setuid (geteuid ());
+
+ /*
+ * hide in a directory, where mount isn't able to resolve
+ * fsname as a valid path
+ */
+ tmp = mkdtemp (templ);
+ if (!tmp) {
+ GFFUSE_LOGERR ("%s: failed to create temporary directory",
+ progname);
+ exit (1);
+ }
+ if (chdir (tmp)) {
+ GFFUSE_LOGERR ("%s: failed to chdir to %s: %s",
+ progname, tmp, strerror (errno));
+ exit (1);
+ }
+ rmdir (tmp);
+ execl (_PATH_MOUNT, _PATH_MOUNT, "-i", "-f", "-t", type,
+ "-o", opts, fsname, mnt, NULL);
+ GFFUSE_LOGERR ("%s: failed to execute %s: %s",
+ progname, _PATH_MOUNT, strerror (errno));
+ exit (1);
+ }
+
+ res = waitpid (res, &status, 0);
+ if (res == -1)
+ GFFUSE_LOGERR ("%s: waitpid: %s", progname, strerror (errno));
+ res = (res != -1 && status == 0) ? 0 : -1;
+
+ out_restore:
+ sigprocmask (SIG_SETMASK, &oldmask, NULL);
+ return res;
+}
+
+char *
+fuse_mnt_resolve_path (const char *progname, const char *orig)
+{
+ char buf[PATH_MAX];
+ char *copy;
+ char *dst;
+ char *end;
+ char *lastcomp;
+ const char *toresolv;
+
+ if (!orig[0]) {
+ GFFUSE_LOGERR ("%s: invalid mountpoint '%s'", progname, orig);
+ return NULL;
+ }
+
+ copy = strdup (orig);
+ if (copy == NULL) {
+ GFFUSE_LOGERR ("%s: failed to allocate memory", progname);
+ return NULL;
+ }
+
+ toresolv = copy;
+ lastcomp = NULL;
+ for (end = copy + strlen (copy) - 1; end > copy && *end == '/'; end --);
+ if (end[0] != '/') {
+ char *tmp;
+ end[1] = '\0';
+ tmp = strrchr (copy, '/');
+ if (tmp == NULL) {
+ lastcomp = copy;
+ toresolv = ".";
+ } else {
+ lastcomp = tmp + 1;
+ if (tmp == copy)
+ toresolv = "/";
+ }
+ if (strcmp (lastcomp, ".") == 0 || strcmp (lastcomp, "..") == 0) {
+ lastcomp = NULL;
+ toresolv = copy;
+ }
+ else if (tmp)
+ tmp[0] = '\0';
+ }
+ if (realpath (toresolv, buf) == NULL) {
+ GFFUSE_LOGERR ("%s: bad mount point %s: %s", progname, orig,
+ strerror (errno));
+ FREE (copy);
+ return NULL;
+ }
+ if (lastcomp == NULL)
+ dst = strdup (buf);
+ else {
+ dst = (char *) MALLOC (strlen (buf) + 1 + strlen (lastcomp) + 1);
+ if (dst) {
+ unsigned buflen = strlen (buf);
+ if (buflen && buf[buflen-1] == '/')
+ sprintf (dst, "%s%s", buf, lastcomp);
+ else
+ sprintf (dst, "%s/%s", buf, lastcomp);
+ }
+ }
+ FREE (copy);
+ if (dst == NULL)
+ GFFUSE_LOGERR ("%s: failed to allocate memory", progname);
+ return dst;
+}
+
+/* FUSE: to support some changes that were reverted since
+ * then, it was split in two (fuse_mnt_umount() and
+ * exec_umount()); however the actual code is same as here
+ * since 0197ce40
+ */
+int
+fuse_mnt_umount (const char *progname, const char *abs_mnt,
+ const char *rel_mnt, int lazy)
+{
+ int res;
+ int status;
+ sigset_t blockmask;
+ sigset_t oldmask;
+
+ if (!mtab_needs_update (abs_mnt)) {
+ res = umount2 (rel_mnt, lazy ? 2 : 0);
+ if (res == -1)
+ GFFUSE_LOGERR ("%s: failed to unmount %s: %s",
+ progname, abs_mnt, strerror (errno));
+ return res;
+ }
+
+ sigemptyset (&blockmask);
+ sigaddset (&blockmask, SIGCHLD);
+ res = sigprocmask (SIG_BLOCK, &blockmask, &oldmask);
+ if (res == -1) {
+ GFFUSE_LOGERR ("%s: sigprocmask: %s", progname,
+ strerror (errno));
+ return -1;
+ }
+
+ res = fork ();
+ if (res == -1) {
+ GFFUSE_LOGERR ("%s: fork: %s", progname, strerror (errno));
+ goto out_restore;
+ }
+ if (res == 0) {
+ sigprocmask (SIG_SETMASK, &oldmask, NULL);
+ setuid (geteuid ());
+ execl ("/bin/umount", "/bin/umount", "-i", rel_mnt,
+ lazy ? "-l" : NULL, NULL);
+ GFFUSE_LOGERR ("%s: failed to execute /bin/umount: %s",
+ progname, strerror (errno));
+ exit (1);
+ }
+ res = waitpid (res, &status, 0);
+ if (res == -1)
+ GFFUSE_LOGERR ("%s: waitpid: %s", progname, strerror (errno));
+
+ if (status != 0)
+ res = -1;
+
+ out_restore:
+ sigprocmask (SIG_SETMASK, &oldmask, NULL);
+ return res;
+}
diff --git a/contrib/fuse-lib/mount-gluster-compat.h b/contrib/fuse-lib/mount-gluster-compat.h
new file mode 100644
index 000000000..4fc20623b
--- /dev/null
+++ b/contrib/fuse-lib/mount-gluster-compat.h
@@ -0,0 +1,56 @@
+/*
+ FUSE: Filesystem in Userspace
+ Copyright (C) 2001-2007 Miklos Szeredi <miklos@szeredi.hu>
+ Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
+
+ This program can be distributed under the terms of the GNU LGPLv2.
+ See the file COPYING.LIB.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stddef.h>
+#include <limits.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <dirent.h>
+#include <signal.h>
+#ifndef __NetBSD__
+#include <mntent.h>
+#endif /* __NetBSD__ */
+#include <sys/stat.h>
+#include <sys/poll.h>
+#include <sys/un.h>
+#include <sys/wait.h>
+#include <sys/mount.h>
+
+#ifdef __NetBSD__
+#include <perfuse.h>
+#define umount2(dir, flags) unmount(dir, ((flags) != 0) ? MNT_FORCE : 0)
+#define MS_RDONLY MNT_RDONLY
+#endif
+
+#ifdef linux
+#define _PATH_MOUNT "/bin/mount"
+#else /* NetBSD, MacOS X */
+#define _PATH_MOUNT "/sbin/mount"
+#endif
+
+#ifdef FUSE_UTIL
+#define MALLOC(size) malloc (size)
+#define FREE(ptr) free (ptr)
+#define GFFUSE_LOGERR(...) fprintf (stderr, ## __VA_ARGS__)
+#else /* FUSE_UTIL */
+#include "glusterfs.h"
+#include "logging.h"
+#include "common-utils.h"
+
+#define GFFUSE_LOGERR(...) \
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR, ## __VA_ARGS__)
+#endif /* !FUSE_UTIL */
diff --git a/contrib/fuse-lib/mount.c b/contrib/fuse-lib/mount.c
index c6de43607..922d9e464 100644
--- a/contrib/fuse-lib/mount.c
+++ b/contrib/fuse-lib/mount.c
@@ -1,302 +1,146 @@
/*
FUSE: Filesystem in Userspace
Copyright (C) 2001-2007 Miklos Szeredi <miklos@szeredi.hu>
- Copyright (c) 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
This program can be distributed under the terms of the GNU LGPLv2.
See the file COPYING.LIB.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <stddef.h>
-#include <limits.h>
-#include <fcntl.h>
-#include <errno.h>
-#include <dirent.h>
-#include <mntent.h>
-#include <sys/stat.h>
-#include <sys/poll.h>
-#include <sys/socket.h>
-#include <sys/un.h>
-#include <sys/wait.h>
-#include <sys/mount.h>
-
-#ifdef FUSE_UTIL
-#define MALLOC(size) malloc (size)
-#define FREE(ptr) free (ptr)
-#define GFFUSE_LOGERR(...) fprintf (stderr, ## __VA_ARGS__)
-#else /* FUSE_UTIL */
-#include "glusterfs.h"
-#include "logging.h"
-#include "common-utils.h"
+#include "mount_util.h"
+#include "mount-gluster-compat.h"
#ifdef GF_FUSERMOUNT
#define FUSERMOUNT_PROG FUSERMOUNT_DIR "/fusermount-glusterfs"
#else
#define FUSERMOUNT_PROG "fusermount"
#endif
-#define FUSE_COMMFD_ENV "_FUSE_COMMFD"
-
-#define GFFUSE_LOGERR(...) \
- gf_log ("glusterfs-fuse", GF_LOG_ERROR, ## __VA_ARGS__)
-#endif /* !FUSE_UTIL */
+#define FUSE_DEVFD_ENV "_FUSE_DEVFD"
-/*
- * Functions below, until following note, were taken from libfuse
- * (http://git.gluster.com/?p=users/csaba/fuse.git;a=commit;h=b988bbf9)
- * almost verbatim. What has been changed:
- * - style adopted to that of glusterfs
- * - s/fprintf/gf_log/
- * - s/free/FREE/, s/malloc/MALLOC/
- * - there are some other minor things
- */
-static int
-mtab_needs_update (const char *mnt)
+/* FUSE: function is called fuse_kern_unmount() */
+void
+gf_fuse_unmount (const char *mountpoint, int fd)
{
int res;
- struct stat stbuf;
-
- /* If mtab is within new mount, don't touch it */
- if (strncmp (mnt, _PATH_MOUNTED, strlen (mnt)) == 0 &&
- _PATH_MOUNTED[strlen (mnt)] == '/')
- return 0;
-
- /*
- * Skip mtab update if /etc/mtab:
- *
- * - doesn't exist,
- * - is a symlink,
- * - is on a read-only filesystem.
- */
- res = lstat (_PATH_MOUNTED, &stbuf);
- if (res == -1) {
- if (errno == ENOENT)
- return 0;
- } else {
- if (S_ISLNK (stbuf.st_mode))
- return 0;
-
- res = access (_PATH_MOUNTED, W_OK);
- if (res == -1 && errno == EROFS)
- return 0;
- }
+ int pid;
- return 1;
-}
+ if (!mountpoint)
+ return;
-#ifndef FUSE_UTIL
-static
-#endif
-int
-fuse_mnt_add_mount (const char *progname, const char *fsname,
- const char *mnt, const char *type, const char *opts)
-{
- int res;
- int status;
- sigset_t blockmask;
- sigset_t oldmask;
-
- if (!mtab_needs_update (mnt))
- return 0;
-
- sigemptyset (&blockmask);
- sigaddset (&blockmask, SIGCHLD);
- res = sigprocmask (SIG_BLOCK, &blockmask, &oldmask);
- if (res == -1) {
- GFFUSE_LOGERR ("%s: sigprocmask: %s",
- progname, strerror (errno));
- return -1;
- }
+ if (fd != -1) {
+ struct pollfd pfd;
+
+ pfd.fd = fd;
+ pfd.events = 0;
+ res = poll (&pfd, 1, 0);
+ /* If file poll returns POLLERR on the device file descriptor,
+ then the filesystem is already unmounted */
+ if (res == 1 && (pfd.revents & POLLERR))
+ return;
- res = fork ();
- if (res == -1) {
- GFFUSE_LOGERR ("%s: fork: %s", progname, strerror (errno));
- goto out_restore;
+ /* Need to close file descriptor, otherwise synchronous umount
+ would recurse into filesystem, and deadlock */
+ close (fd);
}
- if (res == 0) {
- char templ[] = "/tmp/fusermountXXXXXX";
- char *tmp;
-
- sigprocmask (SIG_SETMASK, &oldmask, NULL);
- setuid (geteuid ());
-
- /*
- * hide in a directory, where mount isn't able to resolve
- * fsname as a valid path
- */
- tmp = mkdtemp (templ);
- if (!tmp) {
- GFFUSE_LOGERR ("%s: failed to create temporary directory",
- progname);
- exit (1);
- }
- if (chdir (tmp)) {
- GFFUSE_LOGERR ("%s: failed to chdir to %s: %s",
- progname, tmp, strerror (errno));
- exit (1);
- }
- rmdir (tmp);
- execl ("/bin/mount", "/bin/mount", "-i", "-f", "-t", type,
- "-o", opts, fsname, mnt, NULL);
- GFFUSE_LOGERR ("%s: failed to execute /bin/mount: %s",
- progname, strerror (errno));
- exit (1);
+
+ if (geteuid () == 0) {
+ fuse_mnt_umount ("fuse", mountpoint, mountpoint, 1);
+ return;
}
- res = waitpid (res, &status, 0);
- if (res == -1)
- GFFUSE_LOGERR ("%s: waitpid: %s", progname, strerror (errno));
- if (status != 0)
- res = -1;
+ res = umount2 (mountpoint, 2);
+ if (res == 0)
+ return;
+
+ pid = fork ();
+ if (pid == -1)
+ return;
+
+ if (pid == 0) {
+ const char *argv[] = { FUSERMOUNT_PROG, "-u", "-q", "-z",
+ "--", mountpoint, NULL };
- out_restore:
- sigprocmask (SIG_SETMASK, &oldmask, NULL);
- return res;
+ execvp (FUSERMOUNT_PROG, (char **)argv);
+ _exit (1);
+ }
+ waitpid (pid, NULL, 0);
}
-#ifndef FUSE_UTIL
-static
-#endif
-char
-*fuse_mnt_resolve_path (const char *progname, const char *orig)
+
+/* gluster-specific routines */
+
+static char *
+escape (char *s)
{
- char buf[PATH_MAX];
- char *copy;
- char *dst;
- char *end;
- char *lastcomp;
- const char *toresolv;
-
- if (!orig[0]) {
- GFFUSE_LOGERR ("%s: invalid mountpoint '%s'", progname, orig);
- return NULL;
+ size_t len = 0;
+ char *p = NULL;
+ char *q = NULL;
+ char *e = NULL;
+
+ for (p = s; *p; p++) {
+ if (*p == ',')
+ len++;
+ len++;
}
- copy = strdup (orig);
- if (copy == NULL) {
- GFFUSE_LOGERR ("%s: failed to allocate memory", progname);
+ e = CALLOC (1, len + 1);
+ if (!e)
return NULL;
- }
- toresolv = copy;
- lastcomp = NULL;
- for (end = copy + strlen (copy) - 1; end > copy && *end == '/'; end --);
- if (end[0] != '/') {
- char *tmp;
- end[1] = '\0';
- tmp = strrchr (copy, '/');
- if (tmp == NULL) {
- lastcomp = copy;
- toresolv = ".";
- } else {
- lastcomp = tmp + 1;
- if (tmp == copy)
- toresolv = "/";
- }
- if (strcmp (lastcomp, ".") == 0 || strcmp (lastcomp, "..") == 0) {
- lastcomp = NULL;
- toresolv = copy;
- }
- else if (tmp)
- tmp[0] = '\0';
- }
- if (realpath (toresolv, buf) == NULL) {
- GFFUSE_LOGERR ("%s: bad mount point %s: %s", progname, orig,
- strerror (errno));
- FREE (copy);
- return NULL;
- }
- if (lastcomp == NULL)
- dst = strdup (buf);
- else {
- dst = (char *) MALLOC (strlen (buf) + 1 + strlen (lastcomp) + 1);
- if (dst) {
- unsigned buflen = strlen (buf);
- if (buflen && buf[buflen-1] == '/')
- sprintf (dst, "%s%s", buf, lastcomp);
- else
- sprintf (dst, "%s/%s", buf, lastcomp);
+ for (p = s, q = e; *p; p++, q++) {
+ if (*p == ',') {
+ *q = '\\';
+ q++;
}
+ *q = *p;
}
- FREE (copy);
- if (dst == NULL)
- GFFUSE_LOGERR ("%s: failed to allocate memory", progname);
- return dst;
+
+ return e;
}
-#ifndef FUSE_UTIL
-/* return value:
- * >= 0 => fd
- * -1 => error
- */
static int
-receive_fd (int fd)
+fuse_mount_fusermount (const char *mountpoint, char *fsname,
+ unsigned long mountflags, char *mnt_param,
+ int fd)
{
- struct msghdr msg;
- struct iovec iov;
- char buf[1];
- int rv;
- size_t ccmsg[CMSG_SPACE (sizeof (int)) / sizeof (size_t)];
- struct cmsghdr *cmsg;
-
- iov.iov_base = buf;
- iov.iov_len = 1;
-
- msg.msg_name = 0;
- msg.msg_namelen = 0;
- msg.msg_iov = &iov;
- msg.msg_iovlen = 1;
- /* old BSD implementations should use msg_accrights instead of
- * msg_control; the interface is different. */
- msg.msg_control = ccmsg;
- msg.msg_controllen = sizeof (ccmsg);
-
- while (((rv = recvmsg (fd, &msg, 0)) == -1) && errno == EINTR);
- if (rv == -1) {
- GFFUSE_LOGERR ("recvmsg failed: %s", strerror (errno));
- return -1;
- }
- if (!rv) {
- /* EOF */
- return -1;
- }
+ int pid = -1;
+ int res = 0;
+ int ret = -1;
+ char *fm_mnt_params = NULL;
+ char *efsname = NULL;
+
+#ifndef GF_FUSERMOUNT
+ GFFUSE_LOGERR ("Mounting via helper utility "
+ "(unprivileged mounting) is supported "
+ "only if glusterfs is compiled with "
+ "--enable-fusermount");
+ return -1;
+#endif
+
+ efsname = escape (fsname);
+ if (!efsname) {
+ GFFUSE_LOGERR ("Out of memory");
- cmsg = CMSG_FIRSTHDR (&msg);
- if (!cmsg->cmsg_type == SCM_RIGHTS) {
- GFFUSE_LOGERR ("got control message of unknown type %d",
- cmsg->cmsg_type);
return -1;
}
- return *(int*)CMSG_DATA (cmsg);
-}
-
-static int
-fuse_mount_fusermount (const char *mountpoint, const char *opts)
-{
- int fds[2], pid;
- int res;
- int rv;
+ ret = asprintf (&fm_mnt_params,
+ "%s%s,fsname=%s,nonempty,subtype=glusterfs",
+ (mountflags & MS_RDONLY) ? "ro," : "",
+ mnt_param, efsname);
+ FREE (efsname);
+ if (ret == -1) {
+ GFFUSE_LOGERR ("Out of memory");
- res = socketpair (PF_UNIX, SOCK_STREAM, 0, fds);
- if (res == -1) {
- GFFUSE_LOGERR ("socketpair() failed: %s", strerror (errno));
- return -1;
+ goto out;
}
+ /* fork to exec fusermount */
pid = fork ();
if (pid == -1) {
GFFUSE_LOGERR ("fork() failed: %s", strerror (errno));
- close (fds[0]);
- close (fds[1]);
- return -1;
+ ret = -1;
+ goto out;
}
if (pid == 0) {
@@ -305,213 +149,37 @@ fuse_mount_fusermount (const char *mountpoint, const char *opts)
int a = 0;
argv[a++] = FUSERMOUNT_PROG;
- if (opts) {
- argv[a++] = "-o";
- argv[a++] = opts;
- }
+ argv[a++] = "-o";
+ argv[a++] = fm_mnt_params;
argv[a++] = "--";
argv[a++] = mountpoint;
argv[a++] = NULL;
- close (fds[1]);
- fcntl (fds[0], F_SETFD, 0);
- snprintf (env, sizeof (env), "%i", fds[0]);
- setenv (FUSE_COMMFD_ENV, env, 1);
+ snprintf (env, sizeof (env), "%i", fd);
+ setenv (FUSE_DEVFD_ENV, env, 1);
execvp (FUSERMOUNT_PROG, (char **)argv);
GFFUSE_LOGERR ("failed to exec fusermount: %s",
strerror (errno));
_exit (1);
}
- close (fds[0]);
- rv = receive_fd (fds[1]);
- close (fds[1]);
- waitpid (pid, NULL, 0); /* bury zombie */
-
- return rv;
-}
-#endif
-
-#ifndef FUSE_UTIL
-static
-#endif
-int
-fuse_mnt_umount (const char *progname, const char *mnt, int lazy)
-{
- int res;
- int status;
- sigset_t blockmask;
- sigset_t oldmask;
-
- if (!mtab_needs_update (mnt)) {
- res = umount2 (mnt, lazy ? 2 : 0);
- if (res == -1)
- GFFUSE_LOGERR ("%s: failed to unmount %s: %s",
- progname, mnt, strerror (errno));
- return res;
- }
-
- sigemptyset (&blockmask);
- sigaddset (&blockmask, SIGCHLD);
- res = sigprocmask (SIG_BLOCK, &blockmask, &oldmask);
- if (res == -1) {
- GFFUSE_LOGERR ("%s: sigprocmask: %s", progname,
- strerror (errno));
- return -1;
- }
-
- res = fork ();
- if (res == -1) {
- GFFUSE_LOGERR ("%s: fork: %s", progname, strerror (errno));
- goto out_restore;
- }
- if (res == 0) {
- sigprocmask (SIG_SETMASK, &oldmask, NULL);
- setuid (geteuid ());
- execl ("/bin/umount", "/bin/umount", "-i", mnt,
- lazy ? "-l" : NULL, NULL);
- GFFUSE_LOGERR ("%s: failed to execute /bin/umount: %s",
- progname, strerror (errno));
- exit (1);
- }
- res = waitpid (res, &status, 0);
- if (res == -1)
- GFFUSE_LOGERR ("%s: waitpid: %s", progname, strerror (errno));
-
- if (status != 0)
- res = -1;
-
- out_restore:
- sigprocmask (SIG_SETMASK, &oldmask, NULL);
- return res;
-}
-
-#ifdef FUSE_UTIL
-int
-fuse_mnt_check_empty (const char *progname, const char *mnt,
- mode_t rootmode, off_t rootsize)
-{
- int isempty = 1;
-
- if (S_ISDIR (rootmode)) {
- struct dirent *ent;
- DIR *dp = opendir (mnt);
- if (dp == NULL) {
- fprintf (stderr,
- "%s: failed to open mountpoint for reading: %s\n",
- progname, strerror (errno));
- return -1;
- }
- while ((ent = readdir (dp)) != NULL) {
- if (strcmp (ent->d_name, ".") != 0 &&
- strcmp (ent->d_name, "..") != 0) {
- isempty = 0;
- break;
- }
- }
- closedir (dp);
- } else if (rootsize)
- isempty = 0;
-
- if (!isempty) {
- fprintf (stderr, "%s: mountpoint is not empty\n", progname);
- fprintf (stderr, "%s: if you are sure this is safe, "
- "use the 'nonempty' mount option\n", progname);
- return -1;
- }
- return 0;
-}
-
-int
-fuse_mnt_check_fuseblk (void)
-{
- char buf[256];
- FILE *f = fopen ("/proc/filesystems", "r");
- if (!f)
- return 1;
-
- while (fgets (buf, sizeof (buf), f))
- if (strstr (buf, "fuseblk\n")) {
- fclose (f);
- return 1;
- }
-
- fclose (f);
- return 0;
-}
-#endif
-
-#ifndef FUSE_UTIL
-void
-gf_fuse_unmount (const char *mountpoint, int fd)
-{
- int res;
- int pid;
-
- if (!mountpoint)
- return;
-
- if (fd != -1) {
- struct pollfd pfd;
-
- pfd.fd = fd;
- pfd.events = 0;
- res = poll (&pfd, 1, 0);
- /* If file poll returns POLLERR on the device file descriptor,
- then the filesystem is already unmounted */
- if (res == 1 && (pfd.revents & POLLERR))
- return;
-
- /* Need to close file descriptor, otherwise synchronous umount
- would recurse into filesystem, and deadlock */
- close (fd);
- }
-
- if (geteuid () == 0) {
- fuse_mnt_umount ("fuse", mountpoint, 1);
- return;
- }
-
- res = umount2 (mountpoint, 2);
- if (res == 0)
- return;
-
- pid = fork ();
- if (pid == -1)
- return;
-
- if (pid == 0) {
- const char *argv[] = { FUSERMOUNT_PROG, "-u", "-q", "-z",
- "--", mountpoint, NULL };
-
- execvp (FUSERMOUNT_PROG, (char **)argv);
- _exit (1);
- }
- waitpid (pid, NULL, 0);
+ ret = waitpid (pid, &res, 0);
+ ret = (ret == pid && res == 0) ? 0 : -1;
+ out:
+ FREE (fm_mnt_params);
+ return ret;
}
-#endif
-
-/*
- * Functions below are loosely modelled after similar functions of libfuse
- */
-#ifndef FUSE_UTIL
static int
-fuse_mount_sys (const char *mountpoint, char *fsname, char *mnt_param)
+fuse_mount_sys (const char *mountpoint, char *fsname,
+ unsigned long mountflags, char *mnt_param, int fd)
{
- int fd = -1, ret = -1;
+ int ret = -1;
unsigned mounted = 0;
char *mnt_param_mnt = NULL;
char *fstype = "fuse.glusterfs";
char *source = fsname;
- fd = open ("/dev/fuse", O_RDWR);
- if (fd == -1) {
- GFFUSE_LOGERR ("cannot open /dev/fuse (%s)", strerror (errno));
-
- return -1;
- }
-
ret = asprintf (&mnt_param_mnt,
"%s,fd=%i,rootmode=%o,user_id=%i,group_id=%i",
mnt_param, fd, S_IFDIR, getuid (), getgid ());
@@ -520,7 +188,7 @@ fuse_mount_sys (const char *mountpoint, char *fsname, char *mnt_param)
goto out;
}
- ret = mount (source, mountpoint, fstype, 0,
+ ret = mount (source, mountpoint, fstype, mountflags,
mnt_param_mnt);
if (ret == -1 && errno == ENODEV) {
/* fs subtype support was added by 79c0b2df aka
@@ -533,7 +201,7 @@ fuse_mount_sys (const char *mountpoint, char *fsname, char *mnt_param)
goto out;
}
- ret = mount (source, mountpoint, fstype, 0,
+ ret = mount (source, mountpoint, fstype, mountflags,
mnt_param_mnt);
}
if (ret == -1)
@@ -541,8 +209,10 @@ fuse_mount_sys (const char *mountpoint, char *fsname, char *mnt_param)
else
mounted = 1;
+#ifndef __NetBSD__
if (geteuid () == 0) {
char *newmnt = fuse_mnt_resolve_path ("fuse", mountpoint);
+ char *mnt_param_mtab = NULL;
if (!newmnt) {
ret = -1;
@@ -550,8 +220,17 @@ fuse_mount_sys (const char *mountpoint, char *fsname, char *mnt_param)
goto out;
}
- ret = fuse_mnt_add_mount ("fuse", source, newmnt, fstype,
- mnt_param);
+ ret = asprintf (&mnt_param_mtab, "%s%s",
+ mountflags & MS_RDONLY ? "ro," : "",
+ mnt_param);
+ if (ret == -1)
+ GFFUSE_LOGERR ("Out of memory");
+ else {
+ ret = fuse_mnt_add_mount ("fuse", source, newmnt,
+ fstype, mnt_param_mtab);
+ FREE (mnt_param_mtab);
+ }
+
FREE (newmnt);
if (ret == -1) {
GFFUSE_LOGERR ("failed to add mtab entry");
@@ -559,58 +238,76 @@ fuse_mount_sys (const char *mountpoint, char *fsname, char *mnt_param)
goto out;
}
}
+#endif /* __NetBSD__ */
- out:
+out:
if (ret == -1) {
if (mounted)
umount2 (mountpoint, 2); /* lazy umount */
- close (fd);
- fd = -1;
}
FREE (mnt_param_mnt);
if (source != fsname)
FREE (source);
- return fd;
+
+ return ret;
}
int
-gf_fuse_mount (const char *mountpoint, char *fsname, char *mnt_param)
+gf_fuse_mount (const char *mountpoint, char *fsname,
+ unsigned long mountflags, char *mnt_param,
+ pid_t *mnt_pid, int status_fd)
{
- int fd = -1, rv = -1;
- char *fm_mnt_params = NULL, *p = NULL;
+ int fd = -1;
+ pid_t pid = -1;
+ int ret = -1;
- fd = fuse_mount_sys (mountpoint, fsname, mnt_param);
+ fd = open ("/dev/fuse", O_RDWR);
if (fd == -1) {
- gf_log ("glusterfs-fuse", GF_LOG_NORMAL,
- "direct mount failed (%s), "
- "retry to mount via fusermount",
- strerror (errno));
-
- rv = asprintf (&fm_mnt_params,
- "%s,fsname=%s,nonempty,subtype=glusterfs",
- mnt_param, fsname);
-
- if (rv == -1) {
- GFFUSE_LOGERR ("Out of memory");
+ GFFUSE_LOGERR ("cannot open /dev/fuse (%s)",
+ strerror (errno));
+ return -1;
+ }
- return -1;
+ /* start mount agent */
+ pid = fork();
+ switch (pid) {
+ case 0:
+ /* hello it's mount agent */
+ if (!mnt_pid) {
+ /* daemonize mount agent, caller is
+ * not interested in waiting for it
+ */
+ pid = fork ();
+ if (pid)
+ exit (pid == -1 ? 1 : 0);
}
- fd = fuse_mount_fusermount (mountpoint, fm_mnt_params);
- if (fd == -1) {
- p = fm_mnt_params + strlen (fm_mnt_params);
- while (*--p != ',');
- *p = '\0';
+ ret = fuse_mount_sys (mountpoint, fsname, mountflags, mnt_param, fd);
+ if (ret == -1) {
+ gf_log ("glusterfs-fuse", GF_LOG_INFO,
+ "direct mount failed (%s), "
+ "retry to mount via fusermount",
+ strerror (errno));
- fd = fuse_mount_fusermount (mountpoint, fm_mnt_params);
+ ret = fuse_mount_fusermount (mountpoint, fsname, mountflags,
+ mnt_param, fd);
}
- FREE (fm_mnt_params);
+ if (ret == -1)
+ GFFUSE_LOGERR ("mount of %s to %s (%s) failed",
+ fsname, mountpoint, mnt_param);
- if (fd == -1)
- GFFUSE_LOGERR ("mount failed");
+ if (status_fd >= 0)
+ (void)write (status_fd, &ret, sizeof (ret));
+ exit (!!ret);
+ /* bye mount agent */
+ case -1:
+ close (fd);
+ fd = -1;
}
+ if (mnt_pid)
+ *mnt_pid = pid;
+
return fd;
}
-#endif
diff --git a/contrib/fuse-util/Makefile.am b/contrib/fuse-util/Makefile.am
index 06b6cb6da..971d3d220 100644
--- a/contrib/fuse-util/Makefile.am
+++ b/contrib/fuse-util/Makefile.am
@@ -1,12 +1,14 @@
bin_PROGRAMS = fusermount-glusterfs
-fusermount_glusterfs_SOURCES = fusermount.c $(CONTRIBDIR)/fuse-lib/mount.c
-noinst_HEADERS = mount_util.h
+fusermount_glusterfs_SOURCES = fusermount.c mount_util.c $(CONTRIBDIR)/fuse-lib/mount-common.c
+noinst_HEADERS = $(CONTRIBDIR)/fuse-include/mount_util.h
-AM_CFLAGS = -Wall -D_FILE_OFFSET_BITS=64 -DFUSE_UTIL $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -DFUSE_UTIL -I$(CONTRIBDIR)/fuse-include
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
install-exec-hook:
- chown root $(DESTDIR)$(bindir)/fusermount-glusterfs
+ -chown root $(DESTDIR)$(bindir)/fusermount-glusterfs
chmod u+s $(DESTDIR)$(bindir)/fusermount-glusterfs
CLEANFILES =
diff --git a/contrib/fuse-util/fusermount.c b/contrib/fuse-util/fusermount.c
index c3ecc86cc..0ff8d9039 100644
--- a/contrib/fuse-util/fusermount.c
+++ b/contrib/fuse-util/fusermount.c
@@ -19,6 +19,7 @@
#include <errno.h>
#include <fcntl.h>
#include <pwd.h>
+#include <limits.h>
#include <mntent.h>
#include <sys/wait.h>
#include <sys/stat.h>
@@ -26,7 +27,9 @@
#include <sys/fsuid.h>
#include <sys/socket.h>
#include <sys/utsname.h>
+#include <sched.h>
+#define FUSE_DEVFD_ENV "_FUSE_DEVFD"
#define FUSE_COMMFD_ENV "_FUSE_COMMFD"
#define FUSE_DEV_OLD "/proc/fs/fuse/dev"
@@ -37,6 +40,12 @@
#ifndef MS_DIRSYNC
#define MS_DIRSYNC 128
#endif
+#ifndef MS_REC
+#define MS_REC 16384
+#endif
+#ifndef MS_PRIVATE
+#define MS_PRIVATE (1<<18)
+#endif
static const char *progname;
@@ -74,77 +83,348 @@ static void restore_privs(void)
}
#ifndef IGNORE_MTAB
+/*
+ * Make sure that /etc/mtab is checked and updated atomically
+ */
+static int lock_umount(void)
+{
+ const char *mtab_lock = _PATH_MOUNTED ".fuselock";
+ int mtablock;
+ int res;
+ struct stat mtab_stat;
+
+ /* /etc/mtab could be a symlink to /proc/mounts */
+ if (lstat(_PATH_MOUNTED, &mtab_stat) == 0 && S_ISLNK(mtab_stat.st_mode))
+ return -1;
+
+ mtablock = open(mtab_lock, O_RDWR | O_CREAT, 0600);
+ if (mtablock == -1) {
+ fprintf(stderr, "%s: unable to open fuse lock file: %s\n",
+ progname, strerror(errno));
+ return -1;
+ }
+ res = lockf(mtablock, F_LOCK, 0);
+ if (res < 0) {
+ fprintf(stderr, "%s: error getting lock: %s\n", progname,
+ strerror(errno));
+ close(mtablock);
+ return -1;
+ }
+
+ return mtablock;
+}
+
+static void unlock_umount(int mtablock)
+{
+ if (mtablock >= 0) {
+ int res;
+
+ res = lockf(mtablock, F_ULOCK, 0);
+ if (res < 0) {
+ fprintf(stderr, "%s: error releasing lock: %s\n",
+ progname, strerror(errno));
+ }
+ close(mtablock);
+ }
+}
+
static int add_mount(const char *source, const char *mnt, const char *type,
const char *opts)
{
return fuse_mnt_add_mount(progname, source, mnt, type, opts);
}
-static int unmount_fuse(const char *mnt, int quiet, int lazy)
+static int may_unmount(const char *mnt, int quiet)
{
- if (getuid() != 0) {
- struct mntent *entp;
- FILE *fp;
- const char *user = NULL;
- char uidstr[32];
- unsigned uidlen = 0;
- int found;
- const char *mtab = _PATH_MOUNTED;
-
- user = get_user_name();
- if (user == NULL)
- return -1;
+ struct mntent *entp;
+ FILE *fp;
+ const char *user = NULL;
+ char uidstr[32];
+ unsigned uidlen = 0;
+ int found;
+ const char *mtab = _PATH_MOUNTED;
- fp = setmntent(mtab, "r");
- if (fp == NULL) {
- fprintf(stderr,
- "%s: failed to open %s: %s\n", progname, mtab,
- strerror(errno));
- return -1;
- }
+ user = get_user_name();
+ if (user == NULL)
+ return -1;
- uidlen = sprintf(uidstr, "%u", getuid());
-
- found = 0;
- while ((entp = getmntent(fp)) != NULL) {
- if (!found && strcmp(entp->mnt_dir, mnt) == 0 &&
- (strcmp(entp->mnt_type, "fuse") == 0 ||
- strcmp(entp->mnt_type, "fuseblk") == 0 ||
- strncmp(entp->mnt_type, "fuse.", 5) == 0 ||
- strncmp(entp->mnt_type, "fuseblk.", 8) == 0)) {
- char *p = strstr(entp->mnt_opts, "user=");
- if (p &&
- (p == entp->mnt_opts || *(p-1) == ',') &&
- strcmp(p + 5, user) == 0) {
- found = 1;
- break;
- }
- /* /etc/mtab is a link pointing to
- /proc/mounts: */
- else if ((p =
- strstr(entp->mnt_opts, "user_id=")) &&
- (p == entp->mnt_opts ||
- *(p-1) == ',') &&
- strncmp(p + 8, uidstr, uidlen) == 0 &&
- (*(p+8+uidlen) == ',' ||
- *(p+8+uidlen) == '\0')) {
- found = 1;
- break;
- }
+ fp = setmntent(mtab, "r");
+ if (fp == NULL) {
+ fprintf(stderr, "%s: failed to open %s: %s\n", progname, mtab,
+ strerror(errno));
+ return -1;
+ }
+
+ uidlen = sprintf(uidstr, "%u", getuid());
+
+ found = 0;
+ while ((entp = getmntent(fp)) != NULL) {
+ if (!found && strcmp(entp->mnt_dir, mnt) == 0 &&
+ (strcmp(entp->mnt_type, "fuse") == 0 ||
+ strcmp(entp->mnt_type, "fuseblk") == 0 ||
+ strncmp(entp->mnt_type, "fuse.", 5) == 0 ||
+ strncmp(entp->mnt_type, "fuseblk.", 8) == 0)) {
+ char *p = strstr(entp->mnt_opts, "user=");
+ if (p &&
+ (p == entp->mnt_opts || *(p-1) == ',') &&
+ strcmp(p + 5, user) == 0) {
+ found = 1;
+ break;
+ }
+ /* /etc/mtab is a link pointing to
+ /proc/mounts: */
+ else if ((p =
+ strstr(entp->mnt_opts, "user_id=")) &&
+ (p == entp->mnt_opts ||
+ *(p-1) == ',') &&
+ strncmp(p + 8, uidstr, uidlen) == 0 &&
+ (*(p+8+uidlen) == ',' ||
+ *(p+8+uidlen) == '\0')) {
+ found = 1;
+ break;
}
}
- endmntent(fp);
+ }
+ endmntent(fp);
- if (!found) {
- if (!quiet)
- fprintf(stderr,
- "%s: entry for %s not found in %s\n",
- progname, mnt, mtab);
- return -1;
+ if (!found) {
+ if (!quiet)
+ fprintf(stderr,
+ "%s: entry for %s not found in %s\n",
+ progname, mnt, mtab);
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * Check whether the file specified in "fusermount -u" is really a
+ * mountpoint and not a symlink. This is necessary otherwise the user
+ * could move the mountpoint away and replace it with a symlink
+ * pointing to an arbitrary mount, thereby tricking fusermount into
+ * unmounting that (umount(2) will follow symlinks).
+ *
+ * This is the child process running in a separate mount namespace, so
+ * we don't mess with the global namespace and if the process is
+ * killed for any reason, mounts are automatically cleaned up.
+ *
+ * First make sure nothing is propagated back into the parent
+ * namespace by marking all mounts "private".
+ *
+ * Then bind mount parent onto a stable base where the user can't move
+ * it around.
+ *
+ * Finally check /proc/mounts for an entry matching the requested
+ * mountpoint. If it's found then we are OK, and the user can't move
+ * it around within the parent directory as rename() will return
+ * EBUSY. Be careful to ignore any mounts that existed before the
+ * bind.
+ */
+static int check_is_mount_child(void *p)
+{
+ const char **a = p;
+ const char *last = a[0];
+ const char *mnt = a[1];
+ int res;
+ const char *procmounts = "/proc/mounts";
+ int found;
+ FILE *fp;
+ struct mntent *entp;
+ int count;
+
+ res = mount("", "/", "", MS_PRIVATE | MS_REC, NULL);
+ if (res == -1) {
+ fprintf(stderr, "%s: failed to mark mounts private: %s\n",
+ progname, strerror(errno));
+ return 1;
+ }
+
+ fp = setmntent(procmounts, "r");
+ if (fp == NULL) {
+ fprintf(stderr, "%s: failed to open %s: %s\n", progname,
+ procmounts, strerror(errno));
+ return 1;
+ }
+
+ count = 0;
+ while (getmntent(fp) != NULL)
+ count++;
+ endmntent(fp);
+
+ fp = setmntent(procmounts, "r");
+ if (fp == NULL) {
+ fprintf(stderr, "%s: failed to open %s: %s\n", progname,
+ procmounts, strerror(errno));
+ return 1;
+ }
+
+ res = mount(".", "/", "", MS_BIND | MS_REC, NULL);
+ if (res == -1) {
+ fprintf(stderr, "%s: failed to bind parent to /: %s\n",
+ progname, strerror(errno));
+ return 1;
+ }
+
+ found = 0;
+ while ((entp = getmntent(fp)) != NULL) {
+ if (count > 0) {
+ count--;
+ continue;
+ }
+ if (entp->mnt_dir[0] == '/' &&
+ strcmp(entp->mnt_dir + 1, last) == 0) {
+ found = 1;
+ break;
}
}
+ endmntent(fp);
+
+ if (!found) {
+ fprintf(stderr, "%s: %s not mounted\n", progname, mnt);
+ return 1;
+ }
- return fuse_mnt_umount(progname, mnt, lazy);
+ return 0;
+}
+
+static pid_t clone_newns(void *a)
+{
+ char buf[131072];
+ char *stack = buf + (sizeof(buf) / 2 - ((size_t) buf & 15));
+
+#ifdef __ia64__
+ extern int __clone2(int (*fn)(void *),
+ void *child_stack_base, size_t stack_size,
+ int flags, void *arg, pid_t *ptid,
+ void *tls, pid_t *ctid);
+
+ return __clone2(check_is_mount_child, stack, sizeof(buf) / 2,
+ CLONE_NEWNS, a, NULL, NULL, NULL);
+#else
+ return clone(check_is_mount_child, stack, CLONE_NEWNS, a);
+#endif
+}
+
+static int check_is_mount(const char *last, const char *mnt)
+{
+ pid_t pid, p;
+ int status;
+ const char *a[2] = { last, mnt };
+
+ pid = clone_newns((void *) a);
+ if (pid == (pid_t) -1) {
+ fprintf(stderr, "%s: failed to clone namespace: %s\n",
+ progname, strerror(errno));
+ return -1;
+ }
+ p = waitpid(pid, &status, __WCLONE);
+ if (p == (pid_t) -1) {
+ fprintf(stderr, "%s: waitpid failed: %s\n",
+ progname, strerror(errno));
+ return -1;
+ }
+ if (!WIFEXITED(status)) {
+ fprintf(stderr, "%s: child terminated abnormally (status %i)\n",
+ progname, status);
+ return -1;
+ }
+ if (WEXITSTATUS(status) != 0)
+ return -1;
+
+ return 0;
+}
+
+static int chdir_to_parent(char *copy, const char **lastp)
+{
+ char *tmp;
+ const char *parent;
+ char buf[65536];
+ int res;
+
+ tmp = strrchr(copy, '/');
+ if (tmp == NULL || tmp[1] == '\0') {
+ fprintf(stderr, "%s: internal error: invalid abs path: <%s>\n",
+ progname, copy);
+ return -1;
+ }
+ if (tmp != copy) {
+ *tmp = '\0';
+ parent = copy;
+ *lastp = tmp + 1;
+ } else if (tmp[1] != '\0') {
+ *lastp = tmp + 1;
+ parent = "/";
+ } else {
+ *lastp = ".";
+ parent = "/";
+ }
+
+ res = chdir(parent);
+ if (res == -1) {
+ fprintf(stderr, "%s: failed to chdir to %s: %s\n",
+ progname, parent, strerror(errno));
+ return -1;
+ }
+
+ if (getcwd(buf, sizeof(buf)) == NULL) {
+ fprintf(stderr, "%s: failed to obtain current directory: %s\n",
+ progname, strerror(errno));
+ return -1;
+ }
+ if (strcmp(buf, parent) != 0) {
+ fprintf(stderr, "%s: mountpoint moved (%s -> %s)\n", progname,
+ parent, buf);
+ return -1;
+
+ }
+
+ return 0;
+}
+
+static int unmount_fuse_locked(const char *mnt, int quiet, int lazy)
+{
+ char *copy;
+ const char *last;
+ int res;
+
+ if (getuid() != 0) {
+ res = may_unmount(mnt, quiet);
+ if (res == -1)
+ return -1;
+ }
+
+ copy = strdup(mnt);
+ if (copy == NULL) {
+ fprintf(stderr, "%s: failed to allocate memory\n", progname);
+ return -1;
+ }
+
+ res = chdir_to_parent(copy, &last);
+ if (res == -1)
+ goto out;
+
+ res = check_is_mount(last, mnt);
+ if (res == -1)
+ goto out;
+
+ res = fuse_mnt_umount(progname, mnt, last, lazy);
+
+out:
+ free(copy);
+
+ return res;
+}
+
+static int unmount_fuse(const char *mnt, int quiet, int lazy)
+{
+ int res;
+ int mtablock = lock_umount();
+
+ res = unmount_fuse_locked(mnt, quiet, lazy);
+ unlock_umount(mtablock);
+
+ return res;
}
static int count_fuse_fs(void)
@@ -186,7 +466,7 @@ static int add_mount(const char *source, const char *mnt, const char *type,
static int unmount_fuse(const char *mnt, int quiet, int lazy)
{
- return fuse_mnt_umount(progname, mnt, lazy);
+ return fuse_mnt_umount(progname, mnt, mnt, lazy);
}
#endif /* IGNORE_MTAB */
@@ -225,13 +505,13 @@ static void read_conf(void)
int isnewline = 1;
while (fgets(line, sizeof(line), fp) != NULL) {
if (isnewline) {
- if (line[strlen(line)-1] == '\n') {
+ if (strlen(line) && line[strlen(line)-1] == '\n') {
strip_line(line);
parse_line(line, linenum);
} else {
isnewline = 0;
}
- } else if(line[strlen(line)-1] == '\n') {
+ } else if(strlen(line) && line[strlen(line)-1] == '\n') {
fprintf(stderr, "%s: reading %s: line %i too long\n", progname, FUSE_CONF, linenum);
isnewline = 1;
@@ -326,7 +606,7 @@ static int add_option(char **optsp, const char *opt, unsigned expand)
static int get_mnt_opts(int flags, char *opts, char **mnt_optsp)
{
int i;
- int l;
+ size_t l;
if (!(flags & MS_RDONLY) && add_option(mnt_optsp, "rw", 0) == -1)
return -1;
@@ -341,7 +621,7 @@ static int get_mnt_opts(int flags, char *opts, char **mnt_optsp)
return -1;
/* remove comma from end of opts*/
l = strlen(*mnt_optsp);
- if ((*mnt_optsp)[l-1] == ',')
+ if (l && (*mnt_optsp)[l-1] == ',')
(*mnt_optsp)[l-1] = '\0';
if (getuid() != 0) {
const char *user = get_user_name();
@@ -366,18 +646,26 @@ static int opt_eq(const char *s, unsigned len, const char *opt)
static int get_string_opt(const char *s, unsigned len, const char *opt,
char **val)
{
+ int i;
unsigned opt_len = strlen(opt);
+ char *d;
- if (*val)
- free(*val);
+ free(*val);
*val = (char *) malloc(len - opt_len + 1);
if (!*val) {
fprintf(stderr, "%s: failed to allocate memory\n", progname);
return 0;
}
- memcpy(*val, s + opt_len, len - opt_len);
- (*val)[len - opt_len] = '\0';
+ d = *val;
+ s += opt_len;
+ len -= opt_len;
+ for (i = 0; i < len; i++) {
+ if (s[i] == '\\' && i + 1 < len)
+ i++;
+ *d++ = s[i];
+ }
+ *d = '\0';
return 1;
}
@@ -408,7 +696,12 @@ static int do_mount(const char *mnt, char **typep, mode_t rootmode,
unsigned len;
const char *fsname_str = "fsname=";
const char *subtype_str = "subtype=";
- for (len = 0; s[len] && s[len] != ','; len++);
+ for (len = 0; s[len]; len++) {
+ if (s[len] == '\\' && s[len + 1])
+ len++;
+ else if (s[len] == ',')
+ break;
+ }
if (begins_with(s, fsname_str)) {
if (!get_string_opt(s, len, fsname_str, &fsname))
goto err;
@@ -526,15 +819,14 @@ static int do_mount(const char *mnt, char **typep, mode_t rootmode,
fprintf(stderr, "%s: mount failed: %s\n", progname,
strerror(errno_save));
goto err;
- } else {
- *sourcep = source;
- *typep = type;
- *mnt_optsp = mnt_opts;
}
+ *sourcep = source;
+ *typep = type;
+ *mnt_optsp = mnt_opts;
free(fsname);
free(optbuf);
- return res;
+ return 0;
err:
free(fsname);
@@ -577,8 +869,7 @@ static int check_version(const char *dev)
return 0;
}
-static int check_perm(const char **mntp, struct stat *stbuf, int *currdir_fd,
- int *mountpoint_fd)
+static int check_perm(const char **mntp, struct stat *stbuf, int *mountpoint_fd)
{
int res;
const char *mnt = *mntp;
@@ -596,13 +887,6 @@ static int check_perm(const char **mntp, struct stat *stbuf, int *currdir_fd,
return 0;
if (S_ISDIR(stbuf->st_mode)) {
- *currdir_fd = open(".", O_RDONLY);
- if (*currdir_fd == -1) {
- fprintf(stderr,
- "%s: failed to open current directory: %s\n",
- progname, strerror(errno));
- return -1;
- }
res = chdir(mnt);
if (res == -1) {
fprintf(stderr,
@@ -719,8 +1003,36 @@ static int open_fuse_device(char **devp)
return -1;
}
+static int check_fuse_device(char *devfd, char **devp)
+{
+ int res;
+ char *devlink;
+
+ res = asprintf(&devlink, "/proc/self/fd/%s", devfd);
+ if (res == -1) {
+ fprintf(stderr, "%s: failed to allocate memory\n", progname);
+ return -1;
+ }
+
+ *devp = (char *) calloc(1, PATH_MAX + 1);
+ if (!*devp) {
+ fprintf(stderr, "%s: failed to allocate memory\n", progname);
+ free(devlink);
+ return -1;
+ }
+
+ res = readlink (devlink, *devp, PATH_MAX);
+ free (devlink);
+ if (res == -1) {
+ fprintf(stderr, "%s: specified fuse fd is invalid\n",
+ progname);
+ return -1;
+ }
-static int mount_fuse(const char *mnt, const char *opts)
+ return atoi(devfd);
+}
+
+static int mount_fuse(const char *mnt, const char *opts, char *devfd)
{
int res;
int fd;
@@ -730,10 +1042,9 @@ static int mount_fuse(const char *mnt, const char *opts)
char *source = NULL;
char *mnt_opts = NULL;
const char *real_mnt = mnt;
- int currdir_fd = -1;
int mountpoint_fd = -1;
- fd = open_fuse_device(&dev);
+ fd = devfd ? check_fuse_device(devfd, &dev) : open_fuse_device(&dev);
if (fd == -1)
return -1;
@@ -744,15 +1055,13 @@ static int mount_fuse(const char *mnt, const char *opts)
int mount_count = count_fuse_fs();
if (mount_count >= mount_max) {
fprintf(stderr, "%s: too many FUSE filesystems mounted; mount_max=N can be set in /etc/fuse.conf\n", progname);
- close(fd);
- return -1;
+ goto fail_close_fd;
}
}
res = check_version(dev);
if (res != -1) {
- res = check_perm(&real_mnt, &stbuf, &currdir_fd,
- &mountpoint_fd);
+ res = check_perm(&real_mnt, &stbuf, &mountpoint_fd);
restore_privs();
if (res != -1)
res = do_mount(real_mnt, &type, stbuf.st_mode & S_IFMT,
@@ -761,33 +1070,38 @@ static int mount_fuse(const char *mnt, const char *opts)
} else
restore_privs();
- if (currdir_fd != -1) {
- fchdir(currdir_fd);
- close(currdir_fd);
- }
if (mountpoint_fd != -1)
close(mountpoint_fd);
+ if (res == -1)
+ goto fail_close_fd;
+
+ res = chdir("/");
if (res == -1) {
- close(fd);
- return -1;
+ fprintf(stderr, "%s: failed to chdir to '/'\n", progname);
+ goto fail_close_fd;
}
if (geteuid() == 0) {
res = add_mount(source, mnt, type, mnt_opts);
if (res == -1) {
- umount2(mnt, 2); /* lazy umount */
- close(fd);
- return -1;
+ /* Can't clean up mount in a non-racy way */
+ goto fail_close_fd;
}
}
+out_free:
free(source);
free(type);
free(mnt_opts);
free(dev);
return fd;
+
+fail_close_fd:
+ close(fd);
+ fd = -1;
+ goto out_free;
}
static int send_fd(int sock_fd, int fd)
@@ -857,6 +1171,7 @@ int main(int argc, char *argv[])
static int unmount = 0;
static int lazy = 0;
static int quiet = 0;
+ char *devfd;
char *commfd;
int cfd;
const char *opts = "";
@@ -925,6 +1240,13 @@ int main(int argc, char *argv[])
drop_privs();
mnt = fuse_mnt_resolve_path(progname, origmnt);
+ if (mnt != NULL) {
+ res = chdir("/");
+ if (res == -1) {
+ fprintf(stderr, "%s: failed to chdir to '/'\n", progname);
+ exit(1);
+ }
+ }
restore_privs();
if (mnt == NULL)
exit(1);
@@ -945,21 +1267,26 @@ int main(int argc, char *argv[])
return 0;
}
- commfd = getenv(FUSE_COMMFD_ENV);
- if (commfd == NULL) {
- fprintf(stderr, "%s: old style mounting not supported\n",
- progname);
- exit(1);
+ devfd = getenv(FUSE_DEVFD_ENV);
+ if (devfd == NULL) {
+ commfd = getenv(FUSE_COMMFD_ENV);
+ if (commfd == NULL) {
+ fprintf(stderr, "%s: old style mounting not supported\n",
+ progname);
+ exit(1);
+ }
}
- fd = mount_fuse(mnt, opts);
+ fd = mount_fuse(mnt, opts, devfd);
if (fd == -1)
exit(1);
- cfd = atoi(commfd);
- res = send_fd(cfd, fd);
- if (res == -1)
- exit(1);
+ if (devfd == NULL) {
+ cfd = atoi(commfd);
+ res = send_fd(cfd, fd);
+ if (res == -1)
+ exit(1);
+ }
return 0;
}
diff --git a/contrib/fuse-util/mount_util.c b/contrib/fuse-util/mount_util.c
new file mode 100644
index 000000000..911b84445
--- /dev/null
+++ b/contrib/fuse-util/mount_util.c
@@ -0,0 +1,64 @@
+/*
+ FUSE: Filesystem in Userspace
+ Copyright (C) 2001-2007 Miklos Szeredi <miklos@szeredi.hu>
+
+ This program can be distributed under the terms of the GNU LGPLv2.
+ See the file COPYING.LIB.
+*/
+
+#include <dirent.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+int fuse_mnt_check_empty(const char *progname, const char *mnt,
+ mode_t rootmode, off_t rootsize)
+{
+ int isempty = 1;
+
+ if (S_ISDIR(rootmode)) {
+ struct dirent *ent;
+ DIR *dp = opendir(mnt);
+ if (dp == NULL) {
+ fprintf(stderr,
+ "%s: failed to open mountpoint for reading: %s\n",
+ progname, strerror(errno));
+ return -1;
+ }
+ while ((ent = readdir(dp)) != NULL) {
+ if (strcmp(ent->d_name, ".") != 0 &&
+ strcmp(ent->d_name, "..") != 0) {
+ isempty = 0;
+ break;
+ }
+ }
+ closedir(dp);
+ } else if (rootsize)
+ isempty = 0;
+
+ if (!isempty) {
+ fprintf(stderr, "%s: mountpoint is not empty\n", progname);
+ fprintf(stderr, "%s: if you are sure this is safe, use the 'nonempty' mount option\n", progname);
+ return -1;
+ }
+ return 0;
+}
+
+int fuse_mnt_check_fuseblk(void)
+{
+ char buf[256];
+ FILE *f = fopen("/proc/filesystems", "r");
+ if (!f)
+ return 1;
+
+ while (fgets(buf, sizeof(buf), f))
+ if (strstr(buf, "fuseblk\n")) {
+ fclose(f);
+ return 1;
+ }
+
+ fclose(f);
+ return 0;
+}
diff --git a/contrib/ipaddr-py/COPYING b/contrib/ipaddr-py/COPYING
new file mode 100644
index 000000000..d64569567
--- /dev/null
+++ b/contrib/ipaddr-py/COPYING
@@ -0,0 +1,202 @@
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/contrib/ipaddr-py/MANIFEST.in b/contrib/ipaddr-py/MANIFEST.in
new file mode 100644
index 000000000..f57280444
--- /dev/null
+++ b/contrib/ipaddr-py/MANIFEST.in
@@ -0,0 +1,3 @@
+include COPYING
+include ipaddr_test.py
+include RELEASENOTES
diff --git a/contrib/ipaddr-py/OWNERS b/contrib/ipaddr-py/OWNERS
new file mode 100644
index 000000000..501673e03
--- /dev/null
+++ b/contrib/ipaddr-py/OWNERS
@@ -0,0 +1,4 @@
+pmoody
+harro
+mshields
+smart
diff --git a/contrib/ipaddr-py/README b/contrib/ipaddr-py/README
new file mode 100644
index 000000000..1b54294bb
--- /dev/null
+++ b/contrib/ipaddr-py/README
@@ -0,0 +1,8 @@
+ipaddr.py is a library for working with IP addresses, both IPv4 and IPv6.
+It was developed by Google for internal use, and is now open source.
+
+Project home page: http://code.google.com/p/ipaddr-py/
+
+Please send contributions to ipaddr-py-dev@googlegroups.com. Code should
+include unit tests and follow the Google Python style guide:
+http://code.google.com/p/soc/wiki/PythonStyleGuide
diff --git a/contrib/ipaddr-py/ipaddr.py b/contrib/ipaddr-py/ipaddr.py
new file mode 100644
index 000000000..a89298a31
--- /dev/null
+++ b/contrib/ipaddr-py/ipaddr.py
@@ -0,0 +1,1907 @@
+#!/usr/bin/python
+#
+# Copyright 2007 Google Inc.
+# Licensed to PSF under a Contributor Agreement.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied. See the License for the specific language governing
+# permissions and limitations under the License.
+
+"""A fast, lightweight IPv4/IPv6 manipulation library in Python.
+
+This library is used to create/poke/manipulate IPv4 and IPv6 addresses
+and networks.
+
+"""
+
+__version__ = 'trunk'
+
+import struct
+
+IPV4LENGTH = 32
+IPV6LENGTH = 128
+
+
+class AddressValueError(ValueError):
+ """A Value Error related to the address."""
+
+
+class NetmaskValueError(ValueError):
+ """A Value Error related to the netmask."""
+
+
+def IPAddress(address, version=None):
+ """Take an IP string/int and return an object of the correct type.
+
+ Args:
+ address: A string or integer, the IP address. Either IPv4 or
+ IPv6 addresses may be supplied; integers less than 2**32 will
+ be considered to be IPv4 by default.
+ version: An Integer, 4 or 6. If set, don't try to automatically
+ determine what the IP address type is. important for things
+ like IPAddress(1), which could be IPv4, '0.0.0.1', or IPv6,
+ '::1'.
+
+ Returns:
+ An IPv4Address or IPv6Address object.
+
+ Raises:
+ ValueError: if the string passed isn't either a v4 or a v6
+ address.
+
+ """
+ if version:
+ if version == 4:
+ return IPv4Address(address)
+ elif version == 6:
+ return IPv6Address(address)
+
+ try:
+ return IPv4Address(address)
+ except (AddressValueError, NetmaskValueError):
+ pass
+
+ try:
+ return IPv6Address(address)
+ except (AddressValueError, NetmaskValueError):
+ pass
+
+ raise ValueError('%r does not appear to be an IPv4 or IPv6 address' %
+ address)
+
+
+def IPNetwork(address, version=None, strict=False):
+ """Take an IP string/int and return an object of the correct type.
+
+ Args:
+ address: A string or integer, the IP address. Either IPv4 or
+ IPv6 addresses may be supplied; integers less than 2**32 will
+ be considered to be IPv4 by default.
+ version: An Integer, if set, don't try to automatically
+ determine what the IP address type is. important for things
+ like IPNetwork(1), which could be IPv4, '0.0.0.1/32', or IPv6,
+ '::1/128'.
+
+ Returns:
+ An IPv4Network or IPv6Network object.
+
+ Raises:
+ ValueError: if the string passed isn't either a v4 or a v6
+ address. Or if a strict network was requested and a strict
+ network wasn't given.
+
+ """
+ if version:
+ if version == 4:
+ return IPv4Network(address, strict)
+ elif version == 6:
+ return IPv6Network(address, strict)
+
+ try:
+ return IPv4Network(address, strict)
+ except (AddressValueError, NetmaskValueError):
+ pass
+
+ try:
+ return IPv6Network(address, strict)
+ except (AddressValueError, NetmaskValueError):
+ pass
+
+ raise ValueError('%r does not appear to be an IPv4 or IPv6 network' %
+ address)
+
+
+def v4_int_to_packed(address):
+ """The binary representation of this address.
+
+ Args:
+ address: An integer representation of an IPv4 IP address.
+
+ Returns:
+ The binary representation of this address.
+
+ Raises:
+ ValueError: If the integer is too large to be an IPv4 IP
+ address.
+ """
+ if address > _BaseV4._ALL_ONES:
+ raise ValueError('Address too large for IPv4')
+ return struct.pack('!I', address)
+
+
+def v6_int_to_packed(address):
+ """The binary representation of this address.
+
+ Args:
+ address: An integer representation of an IPv4 IP address.
+
+ Returns:
+ The binary representation of this address.
+ """
+ return struct.pack('!QQ', address >> 64, address & (2**64 - 1))
+
+
+def _find_address_range(addresses):
+ """Find a sequence of addresses.
+
+ Args:
+ addresses: a list of IPv4 or IPv6 addresses.
+
+ Returns:
+ A tuple containing the first and last IP addresses in the sequence.
+
+ """
+ first = last = addresses[0]
+ for ip in addresses[1:]:
+ if ip._ip == last._ip + 1:
+ last = ip
+ else:
+ break
+ return (first, last)
+
+def _get_prefix_length(number1, number2, bits):
+ """Get the number of leading bits that are same for two numbers.
+
+ Args:
+ number1: an integer.
+ number2: another integer.
+ bits: the maximum number of bits to compare.
+
+ Returns:
+ The number of leading bits that are the same for two numbers.
+
+ """
+ for i in range(bits):
+ if number1 >> i == number2 >> i:
+ return bits - i
+ return 0
+
+def _count_righthand_zero_bits(number, bits):
+ """Count the number of zero bits on the right hand side.
+
+ Args:
+ number: an integer.
+ bits: maximum number of bits to count.
+
+ Returns:
+ The number of zero bits on the right hand side of the number.
+
+ """
+ if number == 0:
+ return bits
+ for i in range(bits):
+ if (number >> i) % 2:
+ return i
+
+def summarize_address_range(first, last):
+ """Summarize a network range given the first and last IP addresses.
+
+ Example:
+ >>> summarize_address_range(IPv4Address('1.1.1.0'),
+ IPv4Address('1.1.1.130'))
+ [IPv4Network('1.1.1.0/25'), IPv4Network('1.1.1.128/31'),
+ IPv4Network('1.1.1.130/32')]
+
+ Args:
+ first: the first IPv4Address or IPv6Address in the range.
+ last: the last IPv4Address or IPv6Address in the range.
+
+ Returns:
+ The address range collapsed to a list of IPv4Network's or
+ IPv6Network's.
+
+ Raise:
+ TypeError:
+ If the first and last objects are not IP addresses.
+ If the first and last objects are not the same version.
+ ValueError:
+ If the last object is not greater than the first.
+ If the version is not 4 or 6.
+
+ """
+ if not (isinstance(first, _BaseIP) and isinstance(last, _BaseIP)):
+ raise TypeError('first and last must be IP addresses, not networks')
+ if first.version != last.version:
+ raise TypeError("%s and %s are not of the same version" % (
+ str(first), str(last)))
+ if first > last:
+ raise ValueError('last IP address must be greater than first')
+
+ networks = []
+
+ if first.version == 4:
+ ip = IPv4Network
+ elif first.version == 6:
+ ip = IPv6Network
+ else:
+ raise ValueError('unknown IP version')
+
+ ip_bits = first._max_prefixlen
+ first_int = first._ip
+ last_int = last._ip
+ while first_int <= last_int:
+ nbits = _count_righthand_zero_bits(first_int, ip_bits)
+ current = None
+ while nbits >= 0:
+ addend = 2**nbits - 1
+ current = first_int + addend
+ nbits -= 1
+ if current <= last_int:
+ break
+ prefix = _get_prefix_length(first_int, current, ip_bits)
+ net = ip('%s/%d' % (str(first), prefix))
+ networks.append(net)
+ if current == ip._ALL_ONES:
+ break
+ first_int = current + 1
+ first = IPAddress(first_int, version=first._version)
+ return networks
+
+def _collapse_address_list_recursive(addresses):
+ """Loops through the addresses, collapsing concurrent netblocks.
+
+ Example:
+
+ ip1 = IPv4Network('1.1.0.0/24')
+ ip2 = IPv4Network('1.1.1.0/24')
+ ip3 = IPv4Network('1.1.2.0/24')
+ ip4 = IPv4Network('1.1.3.0/24')
+ ip5 = IPv4Network('1.1.4.0/24')
+ ip6 = IPv4Network('1.1.0.1/22')
+
+ _collapse_address_list_recursive([ip1, ip2, ip3, ip4, ip5, ip6]) ->
+ [IPv4Network('1.1.0.0/22'), IPv4Network('1.1.4.0/24')]
+
+ This shouldn't be called directly; it is called via
+ collapse_address_list([]).
+
+ Args:
+ addresses: A list of IPv4Network's or IPv6Network's
+
+ Returns:
+ A list of IPv4Network's or IPv6Network's depending on what we were
+ passed.
+
+ """
+ ret_array = []
+ optimized = False
+
+ for cur_addr in addresses:
+ if not ret_array:
+ ret_array.append(cur_addr)
+ continue
+ if cur_addr in ret_array[-1]:
+ optimized = True
+ elif cur_addr == ret_array[-1].supernet().subnet()[1]:
+ ret_array.append(ret_array.pop().supernet())
+ optimized = True
+ else:
+ ret_array.append(cur_addr)
+
+ if optimized:
+ return _collapse_address_list_recursive(ret_array)
+
+ return ret_array
+
+
+def collapse_address_list(addresses):
+ """Collapse a list of IP objects.
+
+ Example:
+ collapse_address_list([IPv4('1.1.0.0/24'), IPv4('1.1.1.0/24')]) ->
+ [IPv4('1.1.0.0/23')]
+
+ Args:
+ addresses: A list of IPv4Network or IPv6Network objects.
+
+ Returns:
+ A list of IPv4Network or IPv6Network objects depending on what we
+ were passed.
+
+ Raises:
+ TypeError: If passed a list of mixed version objects.
+
+ """
+ i = 0
+ addrs = []
+ ips = []
+ nets = []
+
+ # split IP addresses and networks
+ for ip in addresses:
+ if isinstance(ip, _BaseIP):
+ if ips and ips[-1]._version != ip._version:
+ raise TypeError("%s and %s are not of the same version" % (
+ str(ip), str(ips[-1])))
+ ips.append(ip)
+ elif ip._prefixlen == ip._max_prefixlen:
+ if ips and ips[-1]._version != ip._version:
+ raise TypeError("%s and %s are not of the same version" % (
+ str(ip), str(ips[-1])))
+ ips.append(ip.ip)
+ else:
+ if nets and nets[-1]._version != ip._version:
+ raise TypeError("%s and %s are not of the same version" % (
+ str(ip), str(ips[-1])))
+ nets.append(ip)
+
+ # sort and dedup
+ ips = sorted(set(ips))
+ nets = sorted(set(nets))
+
+ while i < len(ips):
+ (first, last) = _find_address_range(ips[i:])
+ i = ips.index(last) + 1
+ addrs.extend(summarize_address_range(first, last))
+
+ return _collapse_address_list_recursive(sorted(
+ addrs + nets, key=_BaseNet._get_networks_key))
+
+# backwards compatibility
+CollapseAddrList = collapse_address_list
+
+# Test whether this Python implementation supports byte objects that
+# are not identical to str ones.
+# We need to exclude platforms where bytes == str so that we can
+# distinguish between packed representations and strings, for example
+# b'12::' (the IPv4 address 49.50.58.58) and '12::' (an IPv6 address).
+try:
+ _compat_has_real_bytes = bytes is not str
+except NameError: # <Python2.6
+ _compat_has_real_bytes = False
+
+def get_mixed_type_key(obj):
+ """Return a key suitable for sorting between networks and addresses.
+
+ Address and Network objects are not sortable by default; they're
+ fundamentally different so the expression
+
+ IPv4Address('1.1.1.1') <= IPv4Network('1.1.1.1/24')
+
+ doesn't make any sense. There are some times however, where you may wish
+ to have ipaddr sort these for you anyway. If you need to do this, you
+ can use this function as the key= argument to sorted().
+
+ Args:
+ obj: either a Network or Address object.
+ Returns:
+ appropriate key.
+
+ """
+ if isinstance(obj, _BaseNet):
+ return obj._get_networks_key()
+ elif isinstance(obj, _BaseIP):
+ return obj._get_address_key()
+ return NotImplemented
+
+class _IPAddrBase(object):
+
+ """The mother class."""
+
+ def __index__(self):
+ return self._ip
+
+ def __int__(self):
+ return self._ip
+
+ def __hex__(self):
+ return hex(self._ip)
+
+ @property
+ def exploded(self):
+ """Return the longhand version of the IP address as a string."""
+ return self._explode_shorthand_ip_string()
+
+ @property
+ def compressed(self):
+ """Return the shorthand version of the IP address as a string."""
+ return str(self)
+
+
+class _BaseIP(_IPAddrBase):
+
+ """A generic IP object.
+
+ This IP class contains the version independent methods which are
+ used by single IP addresses.
+
+ """
+
+ def __init__(self, address):
+ if (not (_compat_has_real_bytes and isinstance(address, bytes))
+ and '/' in str(address)):
+ raise AddressValueError(address)
+
+ def __eq__(self, other):
+ try:
+ return (self._ip == other._ip
+ and self._version == other._version)
+ except AttributeError:
+ return NotImplemented
+
+ def __ne__(self, other):
+ eq = self.__eq__(other)
+ if eq is NotImplemented:
+ return NotImplemented
+ return not eq
+
+ def __le__(self, other):
+ gt = self.__gt__(other)
+ if gt is NotImplemented:
+ return NotImplemented
+ return not gt
+
+ def __ge__(self, other):
+ lt = self.__lt__(other)
+ if lt is NotImplemented:
+ return NotImplemented
+ return not lt
+
+ def __lt__(self, other):
+ if self._version != other._version:
+ raise TypeError('%s and %s are not of the same version' % (
+ str(self), str(other)))
+ if not isinstance(other, _BaseIP):
+ raise TypeError('%s and %s are not of the same type' % (
+ str(self), str(other)))
+ if self._ip != other._ip:
+ return self._ip < other._ip
+ return False
+
+ def __gt__(self, other):
+ if self._version != other._version:
+ raise TypeError('%s and %s are not of the same version' % (
+ str(self), str(other)))
+ if not isinstance(other, _BaseIP):
+ raise TypeError('%s and %s are not of the same type' % (
+ str(self), str(other)))
+ if self._ip != other._ip:
+ return self._ip > other._ip
+ return False
+
+ # Shorthand for Integer addition and subtraction. This is not
+ # meant to ever support addition/subtraction of addresses.
+ def __add__(self, other):
+ if not isinstance(other, int):
+ return NotImplemented
+ return IPAddress(int(self) + other, version=self._version)
+
+ def __sub__(self, other):
+ if not isinstance(other, int):
+ return NotImplemented
+ return IPAddress(int(self) - other, version=self._version)
+
+ def __repr__(self):
+ return '%s(%r)' % (self.__class__.__name__, str(self))
+
+ def __str__(self):
+ return '%s' % self._string_from_ip_int(self._ip)
+
+ def __hash__(self):
+ return hash(hex(long(self._ip)))
+
+ def _get_address_key(self):
+ return (self._version, self)
+
+ @property
+ def version(self):
+ raise NotImplementedError('BaseIP has no version')
+
+
+class _BaseNet(_IPAddrBase):
+
+ """A generic IP object.
+
+ This IP class contains the version independent methods which are
+ used by networks.
+
+ """
+
+ def __init__(self, address):
+ self._cache = {}
+
+ def __repr__(self):
+ return '%s(%r)' % (self.__class__.__name__, str(self))
+
+ def iterhosts(self):
+ """Generate Iterator over usable hosts in a network.
+
+ This is like __iter__ except it doesn't return the network
+ or broadcast addresses.
+
+ """
+ cur = int(self.network) + 1
+ bcast = int(self.broadcast) - 1
+ while cur <= bcast:
+ cur += 1
+ yield IPAddress(cur - 1, version=self._version)
+
+ def __iter__(self):
+ cur = int(self.network)
+ bcast = int(self.broadcast)
+ while cur <= bcast:
+ cur += 1
+ yield IPAddress(cur - 1, version=self._version)
+
+ def __getitem__(self, n):
+ network = int(self.network)
+ broadcast = int(self.broadcast)
+ if n >= 0:
+ if network + n > broadcast:
+ raise IndexError
+ return IPAddress(network + n, version=self._version)
+ else:
+ n += 1
+ if broadcast + n < network:
+ raise IndexError
+ return IPAddress(broadcast + n, version=self._version)
+
+ def __lt__(self, other):
+ if self._version != other._version:
+ raise TypeError('%s and %s are not of the same version' % (
+ str(self), str(other)))
+ if not isinstance(other, _BaseNet):
+ raise TypeError('%s and %s are not of the same type' % (
+ str(self), str(other)))
+ if self.network != other.network:
+ return self.network < other.network
+ if self.netmask != other.netmask:
+ return self.netmask < other.netmask
+ return False
+
+ def __gt__(self, other):
+ if self._version != other._version:
+ raise TypeError('%s and %s are not of the same version' % (
+ str(self), str(other)))
+ if not isinstance(other, _BaseNet):
+ raise TypeError('%s and %s are not of the same type' % (
+ str(self), str(other)))
+ if self.network != other.network:
+ return self.network > other.network
+ if self.netmask != other.netmask:
+ return self.netmask > other.netmask
+ return False
+
+ def __le__(self, other):
+ gt = self.__gt__(other)
+ if gt is NotImplemented:
+ return NotImplemented
+ return not gt
+
+ def __ge__(self, other):
+ lt = self.__lt__(other)
+ if lt is NotImplemented:
+ return NotImplemented
+ return not lt
+
+ def __eq__(self, other):
+ try:
+ return (self._version == other._version
+ and self.network == other.network
+ and int(self.netmask) == int(other.netmask))
+ except AttributeError:
+ if isinstance(other, _BaseIP):
+ return (self._version == other._version
+ and self._ip == other._ip)
+
+ def __ne__(self, other):
+ eq = self.__eq__(other)
+ if eq is NotImplemented:
+ return NotImplemented
+ return not eq
+
+ def __str__(self):
+ return '%s/%s' % (str(self.ip),
+ str(self._prefixlen))
+
+ def __hash__(self):
+ return hash(int(self.network) ^ int(self.netmask))
+
+ def __contains__(self, other):
+ # always false if one is v4 and the other is v6.
+ if self._version != other._version:
+ return False
+ # dealing with another network.
+ if isinstance(other, _BaseNet):
+ return (self.network <= other.network and
+ self.broadcast >= other.broadcast)
+ # dealing with another address
+ else:
+ return (int(self.network) <= int(other._ip) <=
+ int(self.broadcast))
+
+ def overlaps(self, other):
+ """Tell if self is partly contained in other."""
+ return self.network in other or self.broadcast in other or (
+ other.network in self or other.broadcast in self)
+
+ @property
+ def network(self):
+ x = self._cache.get('network')
+ if x is None:
+ x = IPAddress(self._ip & int(self.netmask), version=self._version)
+ self._cache['network'] = x
+ return x
+
+ @property
+ def broadcast(self):
+ x = self._cache.get('broadcast')
+ if x is None:
+ x = IPAddress(self._ip | int(self.hostmask), version=self._version)
+ self._cache['broadcast'] = x
+ return x
+
+ @property
+ def hostmask(self):
+ x = self._cache.get('hostmask')
+ if x is None:
+ x = IPAddress(int(self.netmask) ^ self._ALL_ONES,
+ version=self._version)
+ self._cache['hostmask'] = x
+ return x
+
+ @property
+ def with_prefixlen(self):
+ return '%s/%d' % (str(self.ip), self._prefixlen)
+
+ @property
+ def with_netmask(self):
+ return '%s/%s' % (str(self.ip), str(self.netmask))
+
+ @property
+ def with_hostmask(self):
+ return '%s/%s' % (str(self.ip), str(self.hostmask))
+
+ @property
+ def numhosts(self):
+ """Number of hosts in the current subnet."""
+ return int(self.broadcast) - int(self.network) + 1
+
+ @property
+ def version(self):
+ raise NotImplementedError('BaseNet has no version')
+
+ @property
+ def prefixlen(self):
+ return self._prefixlen
+
+ def address_exclude(self, other):
+ """Remove an address from a larger block.
+
+ For example:
+
+ addr1 = IPNetwork('10.1.1.0/24')
+ addr2 = IPNetwork('10.1.1.0/26')
+ addr1.address_exclude(addr2) =
+ [IPNetwork('10.1.1.64/26'), IPNetwork('10.1.1.128/25')]
+
+ or IPv6:
+
+ addr1 = IPNetwork('::1/32')
+ addr2 = IPNetwork('::1/128')
+ addr1.address_exclude(addr2) = [IPNetwork('::0/128'),
+ IPNetwork('::2/127'),
+ IPNetwork('::4/126'),
+ IPNetwork('::8/125'),
+ ...
+ IPNetwork('0:0:8000::/33')]
+
+ Args:
+ other: An IPvXNetwork object of the same type.
+
+ Returns:
+ A sorted list of IPvXNetwork objects addresses which is self
+ minus other.
+
+ Raises:
+ TypeError: If self and other are of difffering address
+ versions, or if other is not a network object.
+ ValueError: If other is not completely contained by self.
+
+ """
+ if not self._version == other._version:
+ raise TypeError("%s and %s are not of the same version" % (
+ str(self), str(other)))
+
+ if not isinstance(other, _BaseNet):
+ raise TypeError("%s is not a network object" % str(other))
+
+ if other not in self:
+ raise ValueError('%s not contained in %s' % (str(other),
+ str(self)))
+ if other == self:
+ return []
+
+ ret_addrs = []
+
+ # Make sure we're comparing the network of other.
+ other = IPNetwork('%s/%s' % (str(other.network), str(other.prefixlen)),
+ version=other._version)
+
+ s1, s2 = self.subnet()
+ while s1 != other and s2 != other:
+ if other in s1:
+ ret_addrs.append(s2)
+ s1, s2 = s1.subnet()
+ elif other in s2:
+ ret_addrs.append(s1)
+ s1, s2 = s2.subnet()
+ else:
+ # If we got here, there's a bug somewhere.
+ assert True == False, ('Error performing exclusion: '
+ 's1: %s s2: %s other: %s' %
+ (str(s1), str(s2), str(other)))
+ if s1 == other:
+ ret_addrs.append(s2)
+ elif s2 == other:
+ ret_addrs.append(s1)
+ else:
+ # If we got here, there's a bug somewhere.
+ assert True == False, ('Error performing exclusion: '
+ 's1: %s s2: %s other: %s' %
+ (str(s1), str(s2), str(other)))
+
+ return sorted(ret_addrs, key=_BaseNet._get_networks_key)
+
+ def compare_networks(self, other):
+ """Compare two IP objects.
+
+ This is only concerned about the comparison of the integer
+ representation of the network addresses. This means that the
+ host bits aren't considered at all in this method. If you want
+ to compare host bits, you can easily enough do a
+ 'HostA._ip < HostB._ip'
+
+ Args:
+ other: An IP object.
+
+ Returns:
+ If the IP versions of self and other are the same, returns:
+
+ -1 if self < other:
+ eg: IPv4('1.1.1.0/24') < IPv4('1.1.2.0/24')
+ IPv6('1080::200C:417A') < IPv6('1080::200B:417B')
+ 0 if self == other
+ eg: IPv4('1.1.1.1/24') == IPv4('1.1.1.2/24')
+ IPv6('1080::200C:417A/96') == IPv6('1080::200C:417B/96')
+ 1 if self > other
+ eg: IPv4('1.1.1.0/24') > IPv4('1.1.0.0/24')
+ IPv6('1080::1:200C:417A/112') >
+ IPv6('1080::0:200C:417A/112')
+
+ If the IP versions of self and other are different, returns:
+
+ -1 if self._version < other._version
+ eg: IPv4('10.0.0.1/24') < IPv6('::1/128')
+ 1 if self._version > other._version
+ eg: IPv6('::1/128') > IPv4('255.255.255.0/24')
+
+ """
+ if self._version < other._version:
+ return -1
+ if self._version > other._version:
+ return 1
+ # self._version == other._version below here:
+ if self.network < other.network:
+ return -1
+ if self.network > other.network:
+ return 1
+ # self.network == other.network below here:
+ if self.netmask < other.netmask:
+ return -1
+ if self.netmask > other.netmask:
+ return 1
+ # self.network == other.network and self.netmask == other.netmask
+ return 0
+
+ def _get_networks_key(self):
+ """Network-only key function.
+
+ Returns an object that identifies this address' network and
+ netmask. This function is a suitable "key" argument for sorted()
+ and list.sort().
+
+ """
+ return (self._version, self.network, self.netmask)
+
+ def _ip_int_from_prefix(self, prefixlen=None):
+ """Turn the prefix length netmask into a int for comparison.
+
+ Args:
+ prefixlen: An integer, the prefix length.
+
+ Returns:
+ An integer.
+
+ """
+ if not prefixlen and prefixlen != 0:
+ prefixlen = self._prefixlen
+ return self._ALL_ONES ^ (self._ALL_ONES >> prefixlen)
+
+ def _prefix_from_ip_int(self, ip_int, mask=32):
+ """Return prefix length from the decimal netmask.
+
+ Args:
+ ip_int: An integer, the IP address.
+ mask: The netmask. Defaults to 32.
+
+ Returns:
+ An integer, the prefix length.
+
+ """
+ while mask:
+ if ip_int & 1 == 1:
+ break
+ ip_int >>= 1
+ mask -= 1
+
+ return mask
+
+ def _ip_string_from_prefix(self, prefixlen=None):
+ """Turn a prefix length into a dotted decimal string.
+
+ Args:
+ prefixlen: An integer, the netmask prefix length.
+
+ Returns:
+ A string, the dotted decimal netmask string.
+
+ """
+ if not prefixlen:
+ prefixlen = self._prefixlen
+ return self._string_from_ip_int(self._ip_int_from_prefix(prefixlen))
+
+ def iter_subnets(self, prefixlen_diff=1, new_prefix=None):
+ """The subnets which join to make the current subnet.
+
+ In the case that self contains only one IP
+ (self._prefixlen == 32 for IPv4 or self._prefixlen == 128
+ for IPv6), return a list with just ourself.
+
+ Args:
+ prefixlen_diff: An integer, the amount the prefix length
+ should be increased by. This should not be set if
+ new_prefix is also set.
+ new_prefix: The desired new prefix length. This must be a
+ larger number (smaller prefix) than the existing prefix.
+ This should not be set if prefixlen_diff is also set.
+
+ Returns:
+ An iterator of IPv(4|6) objects.
+
+ Raises:
+ ValueError: The prefixlen_diff is too small or too large.
+ OR
+ prefixlen_diff and new_prefix are both set or new_prefix
+ is a smaller number than the current prefix (smaller
+ number means a larger network)
+
+ """
+ if self._prefixlen == self._max_prefixlen:
+ yield self
+ return
+
+ if new_prefix is not None:
+ if new_prefix < self._prefixlen:
+ raise ValueError('new prefix must be longer')
+ if prefixlen_diff != 1:
+ raise ValueError('cannot set prefixlen_diff and new_prefix')
+ prefixlen_diff = new_prefix - self._prefixlen
+
+ if prefixlen_diff < 0:
+ raise ValueError('prefix length diff must be > 0')
+ new_prefixlen = self._prefixlen + prefixlen_diff
+
+ if not self._is_valid_netmask(str(new_prefixlen)):
+ raise ValueError(
+ 'prefix length diff %d is invalid for netblock %s' % (
+ new_prefixlen, str(self)))
+
+ first = IPNetwork('%s/%s' % (str(self.network),
+ str(self._prefixlen + prefixlen_diff)),
+ version=self._version)
+
+ yield first
+ current = first
+ while True:
+ broadcast = current.broadcast
+ if broadcast == self.broadcast:
+ return
+ new_addr = IPAddress(int(broadcast) + 1, version=self._version)
+ current = IPNetwork('%s/%s' % (str(new_addr), str(new_prefixlen)),
+ version=self._version)
+
+ yield current
+
+ def masked(self):
+ """Return the network object with the host bits masked out."""
+ return IPNetwork('%s/%d' % (self.network, self._prefixlen),
+ version=self._version)
+
+ def subnet(self, prefixlen_diff=1, new_prefix=None):
+ """Return a list of subnets, rather than an iterator."""
+ return list(self.iter_subnets(prefixlen_diff, new_prefix))
+
+ def supernet(self, prefixlen_diff=1, new_prefix=None):
+ """The supernet containing the current network.
+
+ Args:
+ prefixlen_diff: An integer, the amount the prefix length of
+ the network should be decreased by. For example, given a
+ /24 network and a prefixlen_diff of 3, a supernet with a
+ /21 netmask is returned.
+
+ Returns:
+ An IPv4 network object.
+
+ Raises:
+ ValueError: If self.prefixlen - prefixlen_diff < 0. I.e., you have a
+ negative prefix length.
+ OR
+ If prefixlen_diff and new_prefix are both set or new_prefix is a
+ larger number than the current prefix (larger number means a
+ smaller network)
+
+ """
+ if self._prefixlen == 0:
+ return self
+
+ if new_prefix is not None:
+ if new_prefix > self._prefixlen:
+ raise ValueError('new prefix must be shorter')
+ if prefixlen_diff != 1:
+ raise ValueError('cannot set prefixlen_diff and new_prefix')
+ prefixlen_diff = self._prefixlen - new_prefix
+
+
+ if self.prefixlen - prefixlen_diff < 0:
+ raise ValueError(
+ 'current prefixlen is %d, cannot have a prefixlen_diff of %d' %
+ (self.prefixlen, prefixlen_diff))
+ return IPNetwork('%s/%s' % (str(self.network),
+ str(self.prefixlen - prefixlen_diff)),
+ version=self._version)
+
+ # backwards compatibility
+ Subnet = subnet
+ Supernet = supernet
+ AddressExclude = address_exclude
+ CompareNetworks = compare_networks
+ Contains = __contains__
+
+
+class _BaseV4(object):
+
+ """Base IPv4 object.
+
+ The following methods are used by IPv4 objects in both single IP
+ addresses and networks.
+
+ """
+
+ # Equivalent to 255.255.255.255 or 32 bits of 1's.
+ _ALL_ONES = (2**IPV4LENGTH) - 1
+ _DECIMAL_DIGITS = frozenset('0123456789')
+
+ def __init__(self, address):
+ self._version = 4
+ self._max_prefixlen = IPV4LENGTH
+
+ def _explode_shorthand_ip_string(self, ip_str=None):
+ if not ip_str:
+ ip_str = str(self)
+ return ip_str
+
+ def _ip_int_from_string(self, ip_str):
+ """Turn the given IP string into an integer for comparison.
+
+ Args:
+ ip_str: A string, the IP ip_str.
+
+ Returns:
+ The IP ip_str as an integer.
+
+ Raises:
+ AddressValueError: if ip_str isn't a valid IPv4 Address.
+
+ """
+ octets = ip_str.split('.')
+ if len(octets) != 4:
+ raise AddressValueError(ip_str)
+
+ packed_ip = 0
+ for oc in octets:
+ try:
+ packed_ip = (packed_ip << 8) | self._parse_octet(oc)
+ except ValueError:
+ raise AddressValueError(ip_str)
+ return packed_ip
+
+ def _parse_octet(self, octet_str):
+ """Convert a decimal octet into an integer.
+
+ Args:
+ octet_str: A string, the number to parse.
+
+ Returns:
+ The octet as an integer.
+
+ Raises:
+ ValueError: if the octet isn't strictly a decimal from [0..255].
+
+ """
+ # Whitelist the characters, since int() allows a lot of bizarre stuff.
+ if not self._DECIMAL_DIGITS.issuperset(octet_str):
+ raise ValueError
+ octet_int = int(octet_str, 10)
+ # Disallow leading zeroes, because no clear standard exists on
+ # whether these should be interpreted as decimal or octal.
+ if octet_int > 255 or (octet_str[0] == '0' and len(octet_str) > 1):
+ raise ValueError
+ return octet_int
+
+ def _string_from_ip_int(self, ip_int):
+ """Turns a 32-bit integer into dotted decimal notation.
+
+ Args:
+ ip_int: An integer, the IP address.
+
+ Returns:
+ The IP address as a string in dotted decimal notation.
+
+ """
+ octets = []
+ for _ in xrange(4):
+ octets.insert(0, str(ip_int & 0xFF))
+ ip_int >>= 8
+ return '.'.join(octets)
+
+ @property
+ def max_prefixlen(self):
+ return self._max_prefixlen
+
+ @property
+ def packed(self):
+ """The binary representation of this address."""
+ return v4_int_to_packed(self._ip)
+
+ @property
+ def version(self):
+ return self._version
+
+ @property
+ def is_reserved(self):
+ """Test if the address is otherwise IETF reserved.
+
+ Returns:
+ A boolean, True if the address is within the
+ reserved IPv4 Network range.
+
+ """
+ return self in IPv4Network('240.0.0.0/4')
+
+ @property
+ def is_private(self):
+ """Test if this address is allocated for private networks.
+
+ Returns:
+ A boolean, True if the address is reserved per RFC 1918.
+
+ """
+ return (self in IPv4Network('10.0.0.0/8') or
+ self in IPv4Network('172.16.0.0/12') or
+ self in IPv4Network('192.168.0.0/16'))
+
+ @property
+ def is_multicast(self):
+ """Test if the address is reserved for multicast use.
+
+ Returns:
+ A boolean, True if the address is multicast.
+ See RFC 3171 for details.
+
+ """
+ return self in IPv4Network('224.0.0.0/4')
+
+ @property
+ def is_unspecified(self):
+ """Test if the address is unspecified.
+
+ Returns:
+ A boolean, True if this is the unspecified address as defined in
+ RFC 5735 3.
+
+ """
+ return self in IPv4Network('0.0.0.0')
+
+ @property
+ def is_loopback(self):
+ """Test if the address is a loopback address.
+
+ Returns:
+ A boolean, True if the address is a loopback per RFC 3330.
+
+ """
+ return self in IPv4Network('127.0.0.0/8')
+
+ @property
+ def is_link_local(self):
+ """Test if the address is reserved for link-local.
+
+ Returns:
+ A boolean, True if the address is link-local per RFC 3927.
+
+ """
+ return self in IPv4Network('169.254.0.0/16')
+
+
+class IPv4Address(_BaseV4, _BaseIP):
+
+ """Represent and manipulate single IPv4 Addresses."""
+
+ def __init__(self, address):
+
+ """
+ Args:
+ address: A string or integer representing the IP
+ '192.168.1.1'
+
+ Additionally, an integer can be passed, so
+ IPv4Address('192.168.1.1') == IPv4Address(3232235777).
+ or, more generally
+ IPv4Address(int(IPv4Address('192.168.1.1'))) ==
+ IPv4Address('192.168.1.1')
+
+ Raises:
+ AddressValueError: If ipaddr isn't a valid IPv4 address.
+
+ """
+ _BaseIP.__init__(self, address)
+ _BaseV4.__init__(self, address)
+
+ # Efficient constructor from integer.
+ if isinstance(address, (int, long)):
+ self._ip = address
+ if address < 0 or address > self._ALL_ONES:
+ raise AddressValueError(address)
+ return
+
+ # Constructing from a packed address
+ if _compat_has_real_bytes:
+ if isinstance(address, bytes) and len(address) == 4:
+ self._ip = struct.unpack('!I', address)[0]
+ return
+
+ # Assume input argument to be string or any object representation
+ # which converts into a formatted IP string.
+ addr_str = str(address)
+ self._ip = self._ip_int_from_string(addr_str)
+
+
+class IPv4Network(_BaseV4, _BaseNet):
+
+ """This class represents and manipulates 32-bit IPv4 networks.
+
+ Attributes: [examples for IPv4Network('1.2.3.4/27')]
+ ._ip: 16909060
+ .ip: IPv4Address('1.2.3.4')
+ .network: IPv4Address('1.2.3.0')
+ .hostmask: IPv4Address('0.0.0.31')
+ .broadcast: IPv4Address('1.2.3.31')
+ .netmask: IPv4Address('255.255.255.224')
+ .prefixlen: 27
+
+ """
+
+ # the valid octets for host and netmasks. only useful for IPv4.
+ _valid_mask_octets = set((255, 254, 252, 248, 240, 224, 192, 128, 0))
+
+ def __init__(self, address, strict=False):
+ """Instantiate a new IPv4 network object.
+
+ Args:
+ address: A string or integer representing the IP [& network].
+ '192.168.1.1/24'
+ '192.168.1.1/255.255.255.0'
+ '192.168.1.1/0.0.0.255'
+ are all functionally the same in IPv4. Similarly,
+ '192.168.1.1'
+ '192.168.1.1/255.255.255.255'
+ '192.168.1.1/32'
+ are also functionaly equivalent. That is to say, failing to
+ provide a subnetmask will create an object with a mask of /32.
+
+ If the mask (portion after the / in the argument) is given in
+ dotted quad form, it is treated as a netmask if it starts with a
+ non-zero field (e.g. /255.0.0.0 == /8) and as a hostmask if it
+ starts with a zero field (e.g. 0.255.255.255 == /8), with the
+ single exception of an all-zero mask which is treated as a
+ netmask == /0. If no mask is given, a default of /32 is used.
+
+ Additionally, an integer can be passed, so
+ IPv4Network('192.168.1.1') == IPv4Network(3232235777).
+ or, more generally
+ IPv4Network(int(IPv4Network('192.168.1.1'))) ==
+ IPv4Network('192.168.1.1')
+
+ strict: A boolean. If true, ensure that we have been passed
+ A true network address, eg, 192.168.1.0/24 and not an
+ IP address on a network, eg, 192.168.1.1/24.
+
+ Raises:
+ AddressValueError: If ipaddr isn't a valid IPv4 address.
+ NetmaskValueError: If the netmask isn't valid for
+ an IPv4 address.
+ ValueError: If strict was True and a network address was not
+ supplied.
+
+ """
+ _BaseNet.__init__(self, address)
+ _BaseV4.__init__(self, address)
+
+ # Efficient constructor from integer.
+ if isinstance(address, (int, long)):
+ self._ip = address
+ self.ip = IPv4Address(self._ip)
+ self._prefixlen = self._max_prefixlen
+ self.netmask = IPv4Address(self._ALL_ONES)
+ if address < 0 or address > self._ALL_ONES:
+ raise AddressValueError(address)
+ return
+
+ # Constructing from a packed address
+ if _compat_has_real_bytes:
+ if isinstance(address, bytes) and len(address) == 4:
+ self._ip = struct.unpack('!I', address)[0]
+ self.ip = IPv4Address(self._ip)
+ self._prefixlen = self._max_prefixlen
+ self.netmask = IPv4Address(self._ALL_ONES)
+ return
+
+ # Assume input argument to be string or any object representation
+ # which converts into a formatted IP prefix string.
+ addr = str(address).split('/')
+
+ if len(addr) > 2:
+ raise AddressValueError(address)
+
+ self._ip = self._ip_int_from_string(addr[0])
+ self.ip = IPv4Address(self._ip)
+
+ if len(addr) == 2:
+ mask = addr[1].split('.')
+ if len(mask) == 4:
+ # We have dotted decimal netmask.
+ if self._is_valid_netmask(addr[1]):
+ self.netmask = IPv4Address(self._ip_int_from_string(
+ addr[1]))
+ elif self._is_hostmask(addr[1]):
+ self.netmask = IPv4Address(
+ self._ip_int_from_string(addr[1]) ^ self._ALL_ONES)
+ else:
+ raise NetmaskValueError('%s is not a valid netmask'
+ % addr[1])
+
+ self._prefixlen = self._prefix_from_ip_int(int(self.netmask))
+ else:
+ # We have a netmask in prefix length form.
+ if not self._is_valid_netmask(addr[1]):
+ raise NetmaskValueError(addr[1])
+ self._prefixlen = int(addr[1])
+ self.netmask = IPv4Address(self._ip_int_from_prefix(
+ self._prefixlen))
+ else:
+ self._prefixlen = self._max_prefixlen
+ self.netmask = IPv4Address(self._ip_int_from_prefix(
+ self._prefixlen))
+ if strict:
+ if self.ip != self.network:
+ raise ValueError('%s has host bits set' %
+ self.ip)
+
+ def _is_hostmask(self, ip_str):
+ """Test if the IP string is a hostmask (rather than a netmask).
+
+ Args:
+ ip_str: A string, the potential hostmask.
+
+ Returns:
+ A boolean, True if the IP string is a hostmask.
+
+ """
+ bits = ip_str.split('.')
+ try:
+ parts = [int(x) for x in bits if int(x) in self._valid_mask_octets]
+ except ValueError:
+ return False
+ if len(parts) != len(bits):
+ return False
+ if parts[0] < parts[-1]:
+ return True
+ return False
+
+ def _is_valid_netmask(self, netmask):
+ """Verify that the netmask is valid.
+
+ Args:
+ netmask: A string, either a prefix or dotted decimal
+ netmask.
+
+ Returns:
+ A boolean, True if the prefix represents a valid IPv4
+ netmask.
+
+ """
+ mask = netmask.split('.')
+ if len(mask) == 4:
+ if [x for x in mask if int(x) not in self._valid_mask_octets]:
+ return False
+ if [y for idx, y in enumerate(mask) if idx > 0 and
+ y > mask[idx - 1]]:
+ return False
+ return True
+ try:
+ netmask = int(netmask)
+ except ValueError:
+ return False
+ return 0 <= netmask <= self._max_prefixlen
+
+ # backwards compatibility
+ IsRFC1918 = lambda self: self.is_private
+ IsMulticast = lambda self: self.is_multicast
+ IsLoopback = lambda self: self.is_loopback
+ IsLinkLocal = lambda self: self.is_link_local
+
+
+class _BaseV6(object):
+
+ """Base IPv6 object.
+
+ The following methods are used by IPv6 objects in both single IP
+ addresses and networks.
+
+ """
+
+ _ALL_ONES = (2**IPV6LENGTH) - 1
+ _HEXTET_COUNT = 8
+ _HEX_DIGITS = frozenset('0123456789ABCDEFabcdef')
+
+ def __init__(self, address):
+ self._version = 6
+ self._max_prefixlen = IPV6LENGTH
+
+ def _ip_int_from_string(self, ip_str):
+ """Turn an IPv6 ip_str into an integer.
+
+ Args:
+ ip_str: A string, the IPv6 ip_str.
+
+ Returns:
+ A long, the IPv6 ip_str.
+
+ Raises:
+ AddressValueError: if ip_str isn't a valid IPv6 Address.
+
+ """
+ parts = ip_str.split(':')
+
+ # An IPv6 address needs at least 2 colons (3 parts).
+ if len(parts) < 3:
+ raise AddressValueError(ip_str)
+
+ # If the address has an IPv4-style suffix, convert it to hexadecimal.
+ if '.' in parts[-1]:
+ ipv4_int = IPv4Address(parts.pop())._ip
+ parts.append('%x' % ((ipv4_int >> 16) & 0xFFFF))
+ parts.append('%x' % (ipv4_int & 0xFFFF))
+
+ # An IPv6 address can't have more than 8 colons (9 parts).
+ if len(parts) > self._HEXTET_COUNT + 1:
+ raise AddressValueError(ip_str)
+
+ # Disregarding the endpoints, find '::' with nothing in between.
+ # This indicates that a run of zeroes has been skipped.
+ try:
+ skip_index, = (
+ [i for i in xrange(1, len(parts) - 1) if not parts[i]] or
+ [None])
+ except ValueError:
+ # Can't have more than one '::'
+ raise AddressValueError(ip_str)
+
+ # parts_hi is the number of parts to copy from above/before the '::'
+ # parts_lo is the number of parts to copy from below/after the '::'
+ if skip_index is not None:
+ # If we found a '::', then check if it also covers the endpoints.
+ parts_hi = skip_index
+ parts_lo = len(parts) - skip_index - 1
+ if not parts[0]:
+ parts_hi -= 1
+ if parts_hi:
+ raise AddressValueError(ip_str) # ^: requires ^::
+ if not parts[-1]:
+ parts_lo -= 1
+ if parts_lo:
+ raise AddressValueError(ip_str) # :$ requires ::$
+ parts_skipped = self._HEXTET_COUNT - (parts_hi + parts_lo)
+ if parts_skipped < 1:
+ raise AddressValueError(ip_str)
+ else:
+ # Otherwise, allocate the entire address to parts_hi. The endpoints
+ # could still be empty, but _parse_hextet() will check for that.
+ if len(parts) != self._HEXTET_COUNT:
+ raise AddressValueError(ip_str)
+ parts_hi = len(parts)
+ parts_lo = 0
+ parts_skipped = 0
+
+ try:
+ # Now, parse the hextets into a 128-bit integer.
+ ip_int = 0L
+ for i in xrange(parts_hi):
+ ip_int <<= 16
+ ip_int |= self._parse_hextet(parts[i])
+ ip_int <<= 16 * parts_skipped
+ for i in xrange(-parts_lo, 0):
+ ip_int <<= 16
+ ip_int |= self._parse_hextet(parts[i])
+ return ip_int
+ except ValueError:
+ raise AddressValueError(ip_str)
+
+ def _parse_hextet(self, hextet_str):
+ """Convert an IPv6 hextet string into an integer.
+
+ Args:
+ hextet_str: A string, the number to parse.
+
+ Returns:
+ The hextet as an integer.
+
+ Raises:
+ ValueError: if the input isn't strictly a hex number from [0..FFFF].
+
+ """
+ # Whitelist the characters, since int() allows a lot of bizarre stuff.
+ if not self._HEX_DIGITS.issuperset(hextet_str):
+ raise ValueError
+ hextet_int = int(hextet_str, 16)
+ if hextet_int > 0xFFFF:
+ raise ValueError
+ return hextet_int
+
+ def _compress_hextets(self, hextets):
+ """Compresses a list of hextets.
+
+ Compresses a list of strings, replacing the longest continuous
+ sequence of "0" in the list with "" and adding empty strings at
+ the beginning or at the end of the string such that subsequently
+ calling ":".join(hextets) will produce the compressed version of
+ the IPv6 address.
+
+ Args:
+ hextets: A list of strings, the hextets to compress.
+
+ Returns:
+ A list of strings.
+
+ """
+ best_doublecolon_start = -1
+ best_doublecolon_len = 0
+ doublecolon_start = -1
+ doublecolon_len = 0
+ for index in range(len(hextets)):
+ if hextets[index] == '0':
+ doublecolon_len += 1
+ if doublecolon_start == -1:
+ # Start of a sequence of zeros.
+ doublecolon_start = index
+ if doublecolon_len > best_doublecolon_len:
+ # This is the longest sequence of zeros so far.
+ best_doublecolon_len = doublecolon_len
+ best_doublecolon_start = doublecolon_start
+ else:
+ doublecolon_len = 0
+ doublecolon_start = -1
+
+ if best_doublecolon_len > 1:
+ best_doublecolon_end = (best_doublecolon_start +
+ best_doublecolon_len)
+ # For zeros at the end of the address.
+ if best_doublecolon_end == len(hextets):
+ hextets += ['']
+ hextets[best_doublecolon_start:best_doublecolon_end] = ['']
+ # For zeros at the beginning of the address.
+ if best_doublecolon_start == 0:
+ hextets = [''] + hextets
+
+ return hextets
+
+ def _string_from_ip_int(self, ip_int=None):
+ """Turns a 128-bit integer into hexadecimal notation.
+
+ Args:
+ ip_int: An integer, the IP address.
+
+ Returns:
+ A string, the hexadecimal representation of the address.
+
+ Raises:
+ ValueError: The address is bigger than 128 bits of all ones.
+
+ """
+ if not ip_int and ip_int != 0:
+ ip_int = int(self._ip)
+
+ if ip_int > self._ALL_ONES:
+ raise ValueError('IPv6 address is too large')
+
+ hex_str = '%032x' % ip_int
+ hextets = []
+ for x in range(0, 32, 4):
+ hextets.append('%x' % int(hex_str[x:x+4], 16))
+
+ hextets = self._compress_hextets(hextets)
+ return ':'.join(hextets)
+
+ def _explode_shorthand_ip_string(self, ip_str=None):
+ """Expand a shortened IPv6 address.
+
+ Args:
+ ip_str: A string, the IPv6 address.
+
+ Returns:
+ A string, the expanded IPv6 address.
+
+ """
+ if not ip_str:
+ ip_str = str(self)
+ if isinstance(self, _BaseNet):
+ ip_str = str(self.ip)
+
+ ip_int = self._ip_int_from_string(ip_str)
+ parts = []
+ for i in xrange(self._HEXTET_COUNT):
+ parts.append('%04x' % (ip_int & 0xFFFF))
+ ip_int >>= 16
+ parts.reverse()
+ return ':'.join(parts)
+
+ @property
+ def max_prefixlen(self):
+ return self._max_prefixlen
+
+ @property
+ def packed(self):
+ """The binary representation of this address."""
+ return v6_int_to_packed(self._ip)
+
+ @property
+ def version(self):
+ return self._version
+
+ @property
+ def is_multicast(self):
+ """Test if the address is reserved for multicast use.
+
+ Returns:
+ A boolean, True if the address is a multicast address.
+ See RFC 2373 2.7 for details.
+
+ """
+ return self in IPv6Network('ff00::/8')
+
+ @property
+ def is_reserved(self):
+ """Test if the address is otherwise IETF reserved.
+
+ Returns:
+ A boolean, True if the address is within one of the
+ reserved IPv6 Network ranges.
+
+ """
+ return (self in IPv6Network('::/8') or
+ self in IPv6Network('100::/8') or
+ self in IPv6Network('200::/7') or
+ self in IPv6Network('400::/6') or
+ self in IPv6Network('800::/5') or
+ self in IPv6Network('1000::/4') or
+ self in IPv6Network('4000::/3') or
+ self in IPv6Network('6000::/3') or
+ self in IPv6Network('8000::/3') or
+ self in IPv6Network('A000::/3') or
+ self in IPv6Network('C000::/3') or
+ self in IPv6Network('E000::/4') or
+ self in IPv6Network('F000::/5') or
+ self in IPv6Network('F800::/6') or
+ self in IPv6Network('FE00::/9'))
+
+ @property
+ def is_unspecified(self):
+ """Test if the address is unspecified.
+
+ Returns:
+ A boolean, True if this is the unspecified address as defined in
+ RFC 2373 2.5.2.
+
+ """
+ return self._ip == 0 and getattr(self, '_prefixlen', 128) == 128
+
+ @property
+ def is_loopback(self):
+ """Test if the address is a loopback address.
+
+ Returns:
+ A boolean, True if the address is a loopback address as defined in
+ RFC 2373 2.5.3.
+
+ """
+ return self._ip == 1 and getattr(self, '_prefixlen', 128) == 128
+
+ @property
+ def is_link_local(self):
+ """Test if the address is reserved for link-local.
+
+ Returns:
+ A boolean, True if the address is reserved per RFC 4291.
+
+ """
+ return self in IPv6Network('fe80::/10')
+
+ @property
+ def is_site_local(self):
+ """Test if the address is reserved for site-local.
+
+ Note that the site-local address space has been deprecated by RFC 3879.
+ Use is_private to test if this address is in the space of unique local
+ addresses as defined by RFC 4193.
+
+ Returns:
+ A boolean, True if the address is reserved per RFC 3513 2.5.6.
+
+ """
+ return self in IPv6Network('fec0::/10')
+
+ @property
+ def is_private(self):
+ """Test if this address is allocated for private networks.
+
+ Returns:
+ A boolean, True if the address is reserved per RFC 4193.
+
+ """
+ return self in IPv6Network('fc00::/7')
+
+ @property
+ def ipv4_mapped(self):
+ """Return the IPv4 mapped address.
+
+ Returns:
+ If the IPv6 address is a v4 mapped address, return the
+ IPv4 mapped address. Return None otherwise.
+
+ """
+ if (self._ip >> 32) != 0xFFFF:
+ return None
+ return IPv4Address(self._ip & 0xFFFFFFFF)
+
+ @property
+ def teredo(self):
+ """Tuple of embedded teredo IPs.
+
+ Returns:
+ Tuple of the (server, client) IPs or None if the address
+ doesn't appear to be a teredo address (doesn't start with
+ 2001::/32)
+
+ """
+ if (self._ip >> 96) != 0x20010000:
+ return None
+ return (IPv4Address((self._ip >> 64) & 0xFFFFFFFF),
+ IPv4Address(~self._ip & 0xFFFFFFFF))
+
+ @property
+ def sixtofour(self):
+ """Return the IPv4 6to4 embedded address.
+
+ Returns:
+ The IPv4 6to4-embedded address if present or None if the
+ address doesn't appear to contain a 6to4 embedded address.
+
+ """
+ if (self._ip >> 112) != 0x2002:
+ return None
+ return IPv4Address((self._ip >> 80) & 0xFFFFFFFF)
+
+
+class IPv6Address(_BaseV6, _BaseIP):
+
+ """Represent and manipulate single IPv6 Addresses.
+ """
+
+ def __init__(self, address):
+ """Instantiate a new IPv6 address object.
+
+ Args:
+ address: A string or integer representing the IP
+
+ Additionally, an integer can be passed, so
+ IPv6Address('2001:4860::') ==
+ IPv6Address(42541956101370907050197289607612071936L).
+ or, more generally
+ IPv6Address(IPv6Address('2001:4860::')._ip) ==
+ IPv6Address('2001:4860::')
+
+ Raises:
+ AddressValueError: If address isn't a valid IPv6 address.
+
+ """
+ _BaseIP.__init__(self, address)
+ _BaseV6.__init__(self, address)
+
+ # Efficient constructor from integer.
+ if isinstance(address, (int, long)):
+ self._ip = address
+ if address < 0 or address > self._ALL_ONES:
+ raise AddressValueError(address)
+ return
+
+ # Constructing from a packed address
+ if _compat_has_real_bytes:
+ if isinstance(address, bytes) and len(address) == 16:
+ tmp = struct.unpack('!QQ', address)
+ self._ip = (tmp[0] << 64) | tmp[1]
+ return
+
+ # Assume input argument to be string or any object representation
+ # which converts into a formatted IP string.
+ addr_str = str(address)
+ if not addr_str:
+ raise AddressValueError('')
+
+ self._ip = self._ip_int_from_string(addr_str)
+
+
+class IPv6Network(_BaseV6, _BaseNet):
+
+ """This class represents and manipulates 128-bit IPv6 networks.
+
+ Attributes: [examples for IPv6('2001:658:22A:CAFE:200::1/64')]
+ .ip: IPv6Address('2001:658:22a:cafe:200::1')
+ .network: IPv6Address('2001:658:22a:cafe::')
+ .hostmask: IPv6Address('::ffff:ffff:ffff:ffff')
+ .broadcast: IPv6Address('2001:658:22a:cafe:ffff:ffff:ffff:ffff')
+ .netmask: IPv6Address('ffff:ffff:ffff:ffff::')
+ .prefixlen: 64
+
+ """
+
+
+ def __init__(self, address, strict=False):
+ """Instantiate a new IPv6 Network object.
+
+ Args:
+ address: A string or integer representing the IPv6 network or the IP
+ and prefix/netmask.
+ '2001:4860::/128'
+ '2001:4860:0000:0000:0000:0000:0000:0000/128'
+ '2001:4860::'
+ are all functionally the same in IPv6. That is to say,
+ failing to provide a subnetmask will create an object with
+ a mask of /128.
+
+ Additionally, an integer can be passed, so
+ IPv6Network('2001:4860::') ==
+ IPv6Network(42541956101370907050197289607612071936L).
+ or, more generally
+ IPv6Network(IPv6Network('2001:4860::')._ip) ==
+ IPv6Network('2001:4860::')
+
+ strict: A boolean. If true, ensure that we have been passed
+ A true network address, eg, 192.168.1.0/24 and not an
+ IP address on a network, eg, 192.168.1.1/24.
+
+ Raises:
+ AddressValueError: If address isn't a valid IPv6 address.
+ NetmaskValueError: If the netmask isn't valid for
+ an IPv6 address.
+ ValueError: If strict was True and a network address was not
+ supplied.
+
+ """
+ _BaseNet.__init__(self, address)
+ _BaseV6.__init__(self, address)
+
+ # Efficient constructor from integer.
+ if isinstance(address, (int, long)):
+ self._ip = address
+ self.ip = IPv6Address(self._ip)
+ self._prefixlen = self._max_prefixlen
+ self.netmask = IPv6Address(self._ALL_ONES)
+ if address < 0 or address > self._ALL_ONES:
+ raise AddressValueError(address)
+ return
+
+ # Constructing from a packed address
+ if _compat_has_real_bytes:
+ if isinstance(address, bytes) and len(address) == 16:
+ tmp = struct.unpack('!QQ', address)
+ self._ip = (tmp[0] << 64) | tmp[1]
+ self.ip = IPv6Address(self._ip)
+ self._prefixlen = self._max_prefixlen
+ self.netmask = IPv6Address(self._ALL_ONES)
+ return
+
+ # Assume input argument to be string or any object representation
+ # which converts into a formatted IP prefix string.
+ addr = str(address).split('/')
+
+ if len(addr) > 2:
+ raise AddressValueError(address)
+
+ self._ip = self._ip_int_from_string(addr[0])
+ self.ip = IPv6Address(self._ip)
+
+ if len(addr) == 2:
+ if self._is_valid_netmask(addr[1]):
+ self._prefixlen = int(addr[1])
+ else:
+ raise NetmaskValueError(addr[1])
+ else:
+ self._prefixlen = self._max_prefixlen
+
+ self.netmask = IPv6Address(self._ip_int_from_prefix(self._prefixlen))
+
+ if strict:
+ if self.ip != self.network:
+ raise ValueError('%s has host bits set' %
+ self.ip)
+
+ def _is_valid_netmask(self, prefixlen):
+ """Verify that the netmask/prefixlen is valid.
+
+ Args:
+ prefixlen: A string, the netmask in prefix length format.
+
+ Returns:
+ A boolean, True if the prefix represents a valid IPv6
+ netmask.
+
+ """
+ try:
+ prefixlen = int(prefixlen)
+ except ValueError:
+ return False
+ return 0 <= prefixlen <= self._max_prefixlen
+
+ @property
+ def with_netmask(self):
+ return self.with_prefixlen
diff --git a/contrib/ipaddr-py/ipaddr_test.py b/contrib/ipaddr-py/ipaddr_test.py
new file mode 100755
index 000000000..09bece0e7
--- /dev/null
+++ b/contrib/ipaddr-py/ipaddr_test.py
@@ -0,0 +1,1099 @@
+#!/usr/bin/python
+#
+# Copyright 2007 Google Inc.
+# Licensed to PSF under a Contributor Agreement.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Unittest for ipaddr module."""
+
+
+import unittest
+import time
+import ipaddr
+
+# Compatibility function to cast str to bytes objects
+if ipaddr._compat_has_real_bytes:
+ _cb = lambda bytestr: bytes(bytestr, 'charmap')
+else:
+ _cb = str
+
+class IpaddrUnitTest(unittest.TestCase):
+
+ def setUp(self):
+ self.ipv4 = ipaddr.IPv4Network('1.2.3.4/24')
+ self.ipv4_hostmask = ipaddr.IPv4Network('10.0.0.1/0.255.255.255')
+ self.ipv6 = ipaddr.IPv6Network('2001:658:22a:cafe:200:0:0:1/64')
+
+ def tearDown(self):
+ del(self.ipv4)
+ del(self.ipv4_hostmask)
+ del(self.ipv6)
+ del(self)
+
+ def testRepr(self):
+ self.assertEqual("IPv4Network('1.2.3.4/32')",
+ repr(ipaddr.IPv4Network('1.2.3.4')))
+ self.assertEqual("IPv6Network('::1/128')",
+ repr(ipaddr.IPv6Network('::1')))
+
+ def testAutoMasking(self):
+ addr1 = ipaddr.IPv4Network('1.1.1.255/24')
+ addr1_masked = ipaddr.IPv4Network('1.1.1.0/24')
+ self.assertEqual(addr1_masked, addr1.masked())
+
+ addr2 = ipaddr.IPv6Network('2000:cafe::efac:100/96')
+ addr2_masked = ipaddr.IPv6Network('2000:cafe::/96')
+ self.assertEqual(addr2_masked, addr2.masked())
+
+ # issue57
+ def testAddressIntMath(self):
+ self.assertEqual(ipaddr.IPv4Address('1.1.1.1') + 255,
+ ipaddr.IPv4Address('1.1.2.0'))
+ self.assertEqual(ipaddr.IPv4Address('1.1.1.1') - 256,
+ ipaddr.IPv4Address('1.1.0.1'))
+ self.assertEqual(ipaddr.IPv6Address('::1') + (2**16 - 2),
+ ipaddr.IPv6Address('::ffff'))
+ self.assertEqual(ipaddr.IPv6Address('::ffff') - (2**16 - 2),
+ ipaddr.IPv6Address('::1'))
+
+ def testInvalidStrings(self):
+ def AssertInvalidIP(ip_str):
+ self.assertRaises(ValueError, ipaddr.IPAddress, ip_str)
+ AssertInvalidIP("")
+ AssertInvalidIP("016.016.016.016")
+ AssertInvalidIP("016.016.016")
+ AssertInvalidIP("016.016")
+ AssertInvalidIP("016")
+ AssertInvalidIP("000.000.000.000")
+ AssertInvalidIP("000")
+ AssertInvalidIP("0x0a.0x0a.0x0a.0x0a")
+ AssertInvalidIP("0x0a.0x0a.0x0a")
+ AssertInvalidIP("0x0a.0x0a")
+ AssertInvalidIP("0x0a")
+ AssertInvalidIP("42.42.42.42.42")
+ AssertInvalidIP("42.42.42")
+ AssertInvalidIP("42.42")
+ AssertInvalidIP("42")
+ AssertInvalidIP("42..42.42")
+ AssertInvalidIP("42..42.42.42")
+ AssertInvalidIP("42.42.42.42.")
+ AssertInvalidIP("42.42.42.42...")
+ AssertInvalidIP(".42.42.42.42")
+ AssertInvalidIP("...42.42.42.42")
+ AssertInvalidIP("42.42.42.-0")
+ AssertInvalidIP("42.42.42.+0")
+ AssertInvalidIP(".")
+ AssertInvalidIP("...")
+ AssertInvalidIP("bogus")
+ AssertInvalidIP("bogus.com")
+ AssertInvalidIP("192.168.0.1.com")
+ AssertInvalidIP("12345.67899.-54321.-98765")
+ AssertInvalidIP("257.0.0.0")
+ AssertInvalidIP("42.42.42.-42")
+ AssertInvalidIP("3ffe::1.net")
+ AssertInvalidIP("3ffe::1::1")
+ AssertInvalidIP("1::2::3::4:5")
+ AssertInvalidIP("::7:6:5:4:3:2:")
+ AssertInvalidIP(":6:5:4:3:2:1::")
+ AssertInvalidIP("2001::db:::1")
+ AssertInvalidIP("FEDC:9878")
+ AssertInvalidIP("+1.+2.+3.4")
+ AssertInvalidIP("1.2.3.4e0")
+ AssertInvalidIP("::7:6:5:4:3:2:1:0")
+ AssertInvalidIP("7:6:5:4:3:2:1:0::")
+ AssertInvalidIP("9:8:7:6:5:4:3::2:1")
+ AssertInvalidIP("0:1:2:3::4:5:6:7")
+ AssertInvalidIP("3ffe:0:0:0:0:0:0:0:1")
+ AssertInvalidIP("3ffe::10000")
+ AssertInvalidIP("3ffe::goog")
+ AssertInvalidIP("3ffe::-0")
+ AssertInvalidIP("3ffe::+0")
+ AssertInvalidIP("3ffe::-1")
+ AssertInvalidIP(":")
+ AssertInvalidIP(":::")
+ AssertInvalidIP("::1.2.3")
+ AssertInvalidIP("::1.2.3.4.5")
+ AssertInvalidIP("::1.2.3.4:")
+ AssertInvalidIP("1.2.3.4::")
+ AssertInvalidIP("2001:db8::1:")
+ AssertInvalidIP(":2001:db8::1")
+ AssertInvalidIP(":1:2:3:4:5:6:7")
+ AssertInvalidIP("1:2:3:4:5:6:7:")
+ AssertInvalidIP(":1:2:3:4:5:6:")
+
+ self.assertRaises(ipaddr.AddressValueError, ipaddr.IPv4Network, '')
+ self.assertRaises(ipaddr.AddressValueError, ipaddr.IPv4Network,
+ 'google.com')
+ self.assertRaises(ipaddr.AddressValueError, ipaddr.IPv4Network,
+ '::1.2.3.4')
+ self.assertRaises(ipaddr.AddressValueError, ipaddr.IPv6Network, '')
+ self.assertRaises(ipaddr.AddressValueError, ipaddr.IPv6Network,
+ 'google.com')
+ self.assertRaises(ipaddr.AddressValueError, ipaddr.IPv6Network,
+ '1.2.3.4')
+ self.assertRaises(ipaddr.AddressValueError, ipaddr.IPv6Network,
+ 'cafe:cafe::/128/190')
+ self.assertRaises(ipaddr.AddressValueError, ipaddr.IPv6Network,
+ '1234:axy::b')
+ self.assertRaises(ipaddr.AddressValueError, ipaddr.IPv6Address,
+ '1234:axy::b')
+ self.assertRaises(ipaddr.AddressValueError, ipaddr.IPv6Address,
+ '2001:db8:::1')
+ self.assertRaises(ipaddr.AddressValueError, ipaddr.IPv6Address,
+ '2001:888888::1')
+ self.assertRaises(ipaddr.AddressValueError,
+ ipaddr.IPv4Address(1)._ip_int_from_string,
+ '1.a.2.3')
+ self.assertEqual(False, ipaddr.IPv4Network(1)._is_hostmask('1.a.2.3'))
+
+ def testGetNetwork(self):
+ self.assertEqual(int(self.ipv4.network), 16909056)
+ self.assertEqual(str(self.ipv4.network), '1.2.3.0')
+ self.assertEqual(str(self.ipv4_hostmask.network), '10.0.0.0')
+
+ self.assertEqual(int(self.ipv6.network),
+ 42540616829182469433403647294022090752)
+ self.assertEqual(str(self.ipv6.network),
+ '2001:658:22a:cafe::')
+ self.assertEqual(str(self.ipv6.hostmask),
+ '::ffff:ffff:ffff:ffff')
+
+ def testBadVersionComparison(self):
+ # These should always raise TypeError
+ v4addr = ipaddr.IPAddress('1.1.1.1')
+ v4net = ipaddr.IPNetwork('1.1.1.1')
+ v6addr = ipaddr.IPAddress('::1')
+ v6net = ipaddr.IPAddress('::1')
+
+ self.assertRaises(TypeError, v4addr.__lt__, v6addr)
+ self.assertRaises(TypeError, v4addr.__gt__, v6addr)
+ self.assertRaises(TypeError, v4net.__lt__, v6net)
+ self.assertRaises(TypeError, v4net.__gt__, v6net)
+
+ self.assertRaises(TypeError, v6addr.__lt__, v4addr)
+ self.assertRaises(TypeError, v6addr.__gt__, v4addr)
+ self.assertRaises(TypeError, v6net.__lt__, v4net)
+ self.assertRaises(TypeError, v6net.__gt__, v4net)
+
+ def testMixedTypeComparison(self):
+ v4addr = ipaddr.IPAddress('1.1.1.1')
+ v4net = ipaddr.IPNetwork('1.1.1.1/32')
+ v6addr = ipaddr.IPAddress('::1')
+ v6net = ipaddr.IPNetwork('::1/128')
+
+ self.assertFalse(v4net.__contains__(v6net))
+ self.assertFalse(v6net.__contains__(v4net))
+
+ self.assertRaises(TypeError, lambda: v4addr < v4net)
+ self.assertRaises(TypeError, lambda: v4addr > v4net)
+ self.assertRaises(TypeError, lambda: v4net < v4addr)
+ self.assertRaises(TypeError, lambda: v4net > v4addr)
+
+ self.assertRaises(TypeError, lambda: v6addr < v6net)
+ self.assertRaises(TypeError, lambda: v6addr > v6net)
+ self.assertRaises(TypeError, lambda: v6net < v6addr)
+ self.assertRaises(TypeError, lambda: v6net > v6addr)
+
+ # with get_mixed_type_key, you can sort addresses and network.
+ self.assertEqual([v4addr, v4net], sorted([v4net, v4addr],
+ key=ipaddr.get_mixed_type_key))
+ self.assertEqual([v6addr, v6net], sorted([v6net, v6addr],
+ key=ipaddr.get_mixed_type_key))
+
+ def testIpFromInt(self):
+ self.assertEqual(self.ipv4.ip, ipaddr.IPv4Network(16909060).ip)
+ self.assertRaises(ipaddr.AddressValueError,
+ ipaddr.IPv4Network, 2**32)
+ self.assertRaises(ipaddr.AddressValueError,
+ ipaddr.IPv4Network, -1)
+
+ ipv4 = ipaddr.IPNetwork('1.2.3.4')
+ ipv6 = ipaddr.IPNetwork('2001:658:22a:cafe:200:0:0:1')
+ self.assertEqual(ipv4, ipaddr.IPNetwork(int(ipv4)))
+ self.assertEqual(ipv6, ipaddr.IPNetwork(int(ipv6)))
+
+ v6_int = 42540616829182469433547762482097946625
+ self.assertEqual(self.ipv6.ip, ipaddr.IPv6Network(v6_int).ip)
+ self.assertRaises(ipaddr.AddressValueError,
+ ipaddr.IPv6Network, 2**128)
+ self.assertRaises(ipaddr.AddressValueError,
+ ipaddr.IPv6Network, -1)
+
+ self.assertEqual(ipaddr.IPNetwork(self.ipv4.ip).version, 4)
+ self.assertEqual(ipaddr.IPNetwork(self.ipv6.ip).version, 6)
+
+ if ipaddr._compat_has_real_bytes: # on python3+
+ def testIpFromPacked(self):
+ ip = ipaddr.IPNetwork
+
+ self.assertEqual(self.ipv4.ip,
+ ip(_cb('\x01\x02\x03\x04')).ip)
+ self.assertEqual(ip('255.254.253.252'),
+ ip(_cb('\xff\xfe\xfd\xfc')))
+ self.assertRaises(ValueError, ipaddr.IPNetwork, _cb('\x00' * 3))
+ self.assertRaises(ValueError, ipaddr.IPNetwork, _cb('\x00' * 5))
+ self.assertEqual(self.ipv6.ip,
+ ip(_cb('\x20\x01\x06\x58\x02\x2a\xca\xfe'
+ '\x02\x00\x00\x00\x00\x00\x00\x01')).ip)
+ self.assertEqual(ip('ffff:2:3:4:ffff::'),
+ ip(_cb('\xff\xff\x00\x02\x00\x03\x00\x04' +
+ '\xff\xff' + '\x00' * 6)))
+ self.assertEqual(ip('::'),
+ ip(_cb('\x00' * 16)))
+ self.assertRaises(ValueError, ip, _cb('\x00' * 15))
+ self.assertRaises(ValueError, ip, _cb('\x00' * 17))
+
+ def testGetIp(self):
+ self.assertEqual(int(self.ipv4.ip), 16909060)
+ self.assertEqual(str(self.ipv4.ip), '1.2.3.4')
+ self.assertEqual(str(self.ipv4_hostmask.ip), '10.0.0.1')
+
+ self.assertEqual(int(self.ipv6.ip),
+ 42540616829182469433547762482097946625)
+ self.assertEqual(str(self.ipv6.ip),
+ '2001:658:22a:cafe:200::1')
+
+ def testGetNetmask(self):
+ self.assertEqual(int(self.ipv4.netmask), 4294967040L)
+ self.assertEqual(str(self.ipv4.netmask), '255.255.255.0')
+ self.assertEqual(str(self.ipv4_hostmask.netmask), '255.0.0.0')
+ self.assertEqual(int(self.ipv6.netmask),
+ 340282366920938463444927863358058659840)
+ self.assertEqual(self.ipv6.prefixlen, 64)
+
+ def testZeroNetmask(self):
+ ipv4_zero_netmask = ipaddr.IPv4Network('1.2.3.4/0')
+ self.assertEqual(int(ipv4_zero_netmask.netmask), 0)
+ self.assertTrue(ipv4_zero_netmask._is_valid_netmask(str(0)))
+
+ ipv6_zero_netmask = ipaddr.IPv6Network('::1/0')
+ self.assertEqual(int(ipv6_zero_netmask.netmask), 0)
+ self.assertTrue(ipv6_zero_netmask._is_valid_netmask(str(0)))
+
+ def testGetBroadcast(self):
+ self.assertEqual(int(self.ipv4.broadcast), 16909311L)
+ self.assertEqual(str(self.ipv4.broadcast), '1.2.3.255')
+
+ self.assertEqual(int(self.ipv6.broadcast),
+ 42540616829182469451850391367731642367)
+ self.assertEqual(str(self.ipv6.broadcast),
+ '2001:658:22a:cafe:ffff:ffff:ffff:ffff')
+
+ def testGetPrefixlen(self):
+ self.assertEqual(self.ipv4.prefixlen, 24)
+
+ self.assertEqual(self.ipv6.prefixlen, 64)
+
+ def testGetSupernet(self):
+ self.assertEqual(self.ipv4.supernet().prefixlen, 23)
+ self.assertEqual(str(self.ipv4.supernet().network), '1.2.2.0')
+ self.assertEqual(ipaddr.IPv4Network('0.0.0.0/0').supernet(),
+ ipaddr.IPv4Network('0.0.0.0/0'))
+
+ self.assertEqual(self.ipv6.supernet().prefixlen, 63)
+ self.assertEqual(str(self.ipv6.supernet().network),
+ '2001:658:22a:cafe::')
+ self.assertEqual(ipaddr.IPv6Network('::0/0').supernet(),
+ ipaddr.IPv6Network('::0/0'))
+
+ def testGetSupernet3(self):
+ self.assertEqual(self.ipv4.supernet(3).prefixlen, 21)
+ self.assertEqual(str(self.ipv4.supernet(3).network), '1.2.0.0')
+
+ self.assertEqual(self.ipv6.supernet(3).prefixlen, 61)
+ self.assertEqual(str(self.ipv6.supernet(3).network),
+ '2001:658:22a:caf8::')
+
+ def testGetSupernet4(self):
+ self.assertRaises(ValueError, self.ipv4.supernet, prefixlen_diff=2,
+ new_prefix=1)
+ self.assertRaises(ValueError, self.ipv4.supernet, new_prefix=25)
+ self.assertEqual(self.ipv4.supernet(prefixlen_diff=2),
+ self.ipv4.supernet(new_prefix=22))
+
+ self.assertRaises(ValueError, self.ipv6.supernet, prefixlen_diff=2,
+ new_prefix=1)
+ self.assertRaises(ValueError, self.ipv6.supernet, new_prefix=65)
+ self.assertEqual(self.ipv6.supernet(prefixlen_diff=2),
+ self.ipv6.supernet(new_prefix=62))
+
+ def testIterSubnets(self):
+ self.assertEqual(self.ipv4.subnet(), list(self.ipv4.iter_subnets()))
+ self.assertEqual(self.ipv6.subnet(), list(self.ipv6.iter_subnets()))
+
+ def testFancySubnetting(self):
+ self.assertEqual(sorted(self.ipv4.subnet(prefixlen_diff=3)),
+ sorted(self.ipv4.subnet(new_prefix=27)))
+ self.assertRaises(ValueError, self.ipv4.subnet, new_prefix=23)
+ self.assertRaises(ValueError, self.ipv4.subnet,
+ prefixlen_diff=3, new_prefix=27)
+ self.assertEqual(sorted(self.ipv6.subnet(prefixlen_diff=4)),
+ sorted(self.ipv6.subnet(new_prefix=68)))
+ self.assertRaises(ValueError, self.ipv6.subnet, new_prefix=63)
+ self.assertRaises(ValueError, self.ipv6.subnet,
+ prefixlen_diff=4, new_prefix=68)
+
+ def testGetSubnet(self):
+ self.assertEqual(self.ipv4.subnet()[0].prefixlen, 25)
+ self.assertEqual(str(self.ipv4.subnet()[0].network), '1.2.3.0')
+ self.assertEqual(str(self.ipv4.subnet()[1].network), '1.2.3.128')
+
+ self.assertEqual(self.ipv6.subnet()[0].prefixlen, 65)
+
+ def testGetSubnetForSingle32(self):
+ ip = ipaddr.IPv4Network('1.2.3.4/32')
+ subnets1 = [str(x) for x in ip.subnet()]
+ subnets2 = [str(x) for x in ip.subnet(2)]
+ self.assertEqual(subnets1, ['1.2.3.4/32'])
+ self.assertEqual(subnets1, subnets2)
+
+ def testGetSubnetForSingle128(self):
+ ip = ipaddr.IPv6Network('::1/128')
+ subnets1 = [str(x) for x in ip.subnet()]
+ subnets2 = [str(x) for x in ip.subnet(2)]
+ self.assertEqual(subnets1, ['::1/128'])
+ self.assertEqual(subnets1, subnets2)
+
+ def testSubnet2(self):
+ ips = [str(x) for x in self.ipv4.subnet(2)]
+ self.assertEqual(
+ ips,
+ ['1.2.3.0/26', '1.2.3.64/26', '1.2.3.128/26', '1.2.3.192/26'])
+
+ ipsv6 = [str(x) for x in self.ipv6.subnet(2)]
+ self.assertEqual(
+ ipsv6,
+ ['2001:658:22a:cafe::/66',
+ '2001:658:22a:cafe:4000::/66',
+ '2001:658:22a:cafe:8000::/66',
+ '2001:658:22a:cafe:c000::/66'])
+
+ def testSubnetFailsForLargeCidrDiff(self):
+ self.assertRaises(ValueError, self.ipv4.subnet, 9)
+ self.assertRaises(ValueError, self.ipv6.subnet, 65)
+
+ def testSupernetFailsForLargeCidrDiff(self):
+ self.assertRaises(ValueError, self.ipv4.supernet, 25)
+ self.assertRaises(ValueError, self.ipv6.supernet, 65)
+
+ def testSubnetFailsForNegativeCidrDiff(self):
+ self.assertRaises(ValueError, self.ipv4.subnet, -1)
+ self.assertRaises(ValueError, self.ipv6.subnet, -1)
+
+ def testGetNumHosts(self):
+ self.assertEqual(self.ipv4.numhosts, 256)
+ self.assertEqual(self.ipv4.subnet()[0].numhosts, 128)
+ self.assertEqual(self.ipv4.supernet().numhosts, 512)
+
+ self.assertEqual(self.ipv6.numhosts, 18446744073709551616)
+ self.assertEqual(self.ipv6.subnet()[0].numhosts, 9223372036854775808)
+ self.assertEqual(self.ipv6.supernet().numhosts, 36893488147419103232)
+
+ def testContains(self):
+ self.assertTrue(ipaddr.IPv4Network('1.2.3.128/25') in self.ipv4)
+ self.assertFalse(ipaddr.IPv4Network('1.2.4.1/24') in self.ipv4)
+ self.assertTrue(self.ipv4 in self.ipv4)
+ self.assertTrue(self.ipv6 in self.ipv6)
+ # We can test addresses and string as well.
+ addr1 = ipaddr.IPv4Address('1.2.3.37')
+ self.assertTrue(addr1 in self.ipv4)
+ # issue 61, bad network comparison on like-ip'd network objects
+ # with identical broadcast addresses.
+ self.assertFalse(ipaddr.IPv4Network('1.1.0.0/16').__contains__(
+ ipaddr.IPv4Network('1.0.0.0/15')))
+
+ def testBadAddress(self):
+ self.assertRaises(ipaddr.AddressValueError, ipaddr.IPv4Network,
+ 'poop')
+ self.assertRaises(ipaddr.AddressValueError,
+ ipaddr.IPv4Network, '1.2.3.256')
+
+ self.assertRaises(ipaddr.AddressValueError, ipaddr.IPv6Network,
+ 'poopv6')
+ self.assertRaises(ipaddr.AddressValueError,
+ ipaddr.IPv4Network, '1.2.3.4/32/24')
+ self.assertRaises(ipaddr.AddressValueError,
+ ipaddr.IPv4Network, '10/8')
+ self.assertRaises(ipaddr.AddressValueError,
+ ipaddr.IPv6Network, '10/8')
+
+
+ def testBadNetMask(self):
+ self.assertRaises(ipaddr.NetmaskValueError,
+ ipaddr.IPv4Network, '1.2.3.4/')
+ self.assertRaises(ipaddr.NetmaskValueError,
+ ipaddr.IPv4Network, '1.2.3.4/33')
+ self.assertRaises(ipaddr.NetmaskValueError,
+ ipaddr.IPv4Network, '1.2.3.4/254.254.255.256')
+ self.assertRaises(ipaddr.NetmaskValueError,
+ ipaddr.IPv4Network, '1.1.1.1/240.255.0.0')
+ self.assertRaises(ipaddr.NetmaskValueError,
+ ipaddr.IPv6Network, '::1/')
+ self.assertRaises(ipaddr.NetmaskValueError,
+ ipaddr.IPv6Network, '::1/129')
+
+ def testNth(self):
+ self.assertEqual(str(self.ipv4[5]), '1.2.3.5')
+ self.assertRaises(IndexError, self.ipv4.__getitem__, 256)
+
+ self.assertEqual(str(self.ipv6[5]),
+ '2001:658:22a:cafe::5')
+
+ def testGetitem(self):
+ # http://code.google.com/p/ipaddr-py/issues/detail?id=15
+ addr = ipaddr.IPv4Network('172.31.255.128/255.255.255.240')
+ self.assertEqual(28, addr.prefixlen)
+ addr_list = list(addr)
+ self.assertEqual('172.31.255.128', str(addr_list[0]))
+ self.assertEqual('172.31.255.128', str(addr[0]))
+ self.assertEqual('172.31.255.143', str(addr_list[-1]))
+ self.assertEqual('172.31.255.143', str(addr[-1]))
+ self.assertEqual(addr_list[-1], addr[-1])
+
+ def testEqual(self):
+ self.assertTrue(self.ipv4 == ipaddr.IPv4Network('1.2.3.4/24'))
+ self.assertFalse(self.ipv4 == ipaddr.IPv4Network('1.2.3.4/23'))
+ self.assertFalse(self.ipv4 == ipaddr.IPv6Network('::1.2.3.4/24'))
+ self.assertFalse(self.ipv4 == '')
+ self.assertFalse(self.ipv4 == [])
+ self.assertFalse(self.ipv4 == 2)
+ self.assertTrue(ipaddr.IPNetwork('1.1.1.1/32') ==
+ ipaddr.IPAddress('1.1.1.1'))
+ self.assertTrue(ipaddr.IPNetwork('1.1.1.1/24') ==
+ ipaddr.IPAddress('1.1.1.1'))
+ self.assertFalse(ipaddr.IPNetwork('1.1.1.0/24') ==
+ ipaddr.IPAddress('1.1.1.1'))
+
+ self.assertTrue(self.ipv6 ==
+ ipaddr.IPv6Network('2001:658:22a:cafe:200::1/64'))
+ self.assertTrue(ipaddr.IPNetwork('::1/128') ==
+ ipaddr.IPAddress('::1'))
+ self.assertTrue(ipaddr.IPNetwork('::1/127') ==
+ ipaddr.IPAddress('::1'))
+ self.assertFalse(ipaddr.IPNetwork('::0/127') ==
+ ipaddr.IPAddress('::1'))
+ self.assertFalse(self.ipv6 ==
+ ipaddr.IPv6Network('2001:658:22a:cafe:200::1/63'))
+ self.assertFalse(self.ipv6 == ipaddr.IPv4Network('1.2.3.4/23'))
+ self.assertFalse(self.ipv6 == '')
+ self.assertFalse(self.ipv6 == [])
+ self.assertFalse(self.ipv6 == 2)
+
+ def testNotEqual(self):
+ self.assertFalse(self.ipv4 != ipaddr.IPv4Network('1.2.3.4/24'))
+ self.assertTrue(self.ipv4 != ipaddr.IPv4Network('1.2.3.4/23'))
+ self.assertTrue(self.ipv4 != ipaddr.IPv6Network('::1.2.3.4/24'))
+ self.assertTrue(self.ipv4 != '')
+ self.assertTrue(self.ipv4 != [])
+ self.assertTrue(self.ipv4 != 2)
+
+ addr2 = ipaddr.IPAddress('2001:658:22a:cafe:200::1')
+ self.assertFalse(self.ipv6 !=
+ ipaddr.IPv6Network('2001:658:22a:cafe:200::1/64'))
+ self.assertTrue(self.ipv6 !=
+ ipaddr.IPv6Network('2001:658:22a:cafe:200::1/63'))
+ self.assertTrue(self.ipv6 != ipaddr.IPv4Network('1.2.3.4/23'))
+ self.assertTrue(self.ipv6 != '')
+ self.assertTrue(self.ipv6 != [])
+ self.assertTrue(self.ipv6 != 2)
+
+ def testSlash32Constructor(self):
+ self.assertEqual(str(ipaddr.IPv4Network('1.2.3.4/255.255.255.255')),
+ '1.2.3.4/32')
+
+ def testSlash128Constructor(self):
+ self.assertEqual(str(ipaddr.IPv6Network('::1/128')),
+ '::1/128')
+
+ def testSlash0Constructor(self):
+ self.assertEqual(str(ipaddr.IPv4Network('1.2.3.4/0.0.0.0')),
+ '1.2.3.4/0')
+
+ def testCollapsing(self):
+ # test only IP addresses including some duplicates
+ ip1 = ipaddr.IPv4Address('1.1.1.0')
+ ip2 = ipaddr.IPv4Address('1.1.1.1')
+ ip3 = ipaddr.IPv4Address('1.1.1.2')
+ ip4 = ipaddr.IPv4Address('1.1.1.3')
+ ip5 = ipaddr.IPv4Address('1.1.1.4')
+ ip6 = ipaddr.IPv4Address('1.1.1.0')
+ # check that addreses are subsumed properly.
+ collapsed = ipaddr.collapse_address_list([ip1, ip2, ip3, ip4, ip5, ip6])
+ self.assertEqual(collapsed, [ipaddr.IPv4Network('1.1.1.0/30'),
+ ipaddr.IPv4Network('1.1.1.4/32')])
+
+ # test a mix of IP addresses and networks including some duplicates
+ ip1 = ipaddr.IPv4Address('1.1.1.0')
+ ip2 = ipaddr.IPv4Address('1.1.1.1')
+ ip3 = ipaddr.IPv4Address('1.1.1.2')
+ ip4 = ipaddr.IPv4Address('1.1.1.3')
+ ip5 = ipaddr.IPv4Network('1.1.1.4/30')
+ ip6 = ipaddr.IPv4Network('1.1.1.4/30')
+ # check that addreses are subsumed properly.
+ collapsed = ipaddr.collapse_address_list([ip5, ip1, ip2, ip3, ip4, ip6])
+ self.assertEqual(collapsed, [ipaddr.IPv4Network('1.1.1.0/29')])
+
+ # test only IP networks
+ ip1 = ipaddr.IPv4Network('1.1.0.0/24')
+ ip2 = ipaddr.IPv4Network('1.1.1.0/24')
+ ip3 = ipaddr.IPv4Network('1.1.2.0/24')
+ ip4 = ipaddr.IPv4Network('1.1.3.0/24')
+ ip5 = ipaddr.IPv4Network('1.1.4.0/24')
+ # stored in no particular order b/c we want CollapseAddr to call [].sort
+ ip6 = ipaddr.IPv4Network('1.1.0.0/22')
+ # check that addreses are subsumed properly.
+ collapsed = ipaddr.collapse_address_list([ip1, ip2, ip3, ip4, ip5, ip6])
+ self.assertEqual(collapsed, [ipaddr.IPv4Network('1.1.0.0/22'),
+ ipaddr.IPv4Network('1.1.4.0/24')])
+
+ # test that two addresses are supernet'ed properly
+ collapsed = ipaddr.collapse_address_list([ip1, ip2])
+ self.assertEqual(collapsed, [ipaddr.IPv4Network('1.1.0.0/23')])
+
+ # test same IP networks
+ ip_same1 = ip_same2 = ipaddr.IPv4Network('1.1.1.1/32')
+ self.assertEqual(ipaddr.collapse_address_list([ip_same1, ip_same2]),
+ [ip_same1])
+
+ # test same IP addresses
+ ip_same1 = ip_same2 = ipaddr.IPv4Address('1.1.1.1')
+ self.assertEqual(ipaddr.collapse_address_list([ip_same1, ip_same2]),
+ [ipaddr.IPNetwork('1.1.1.1/32')])
+ ip1 = ipaddr.IPv6Network('::2001:1/100')
+ ip2 = ipaddr.IPv6Network('::2002:1/120')
+ ip3 = ipaddr.IPv6Network('::2001:1/96')
+ # test that ipv6 addresses are subsumed properly.
+ collapsed = ipaddr.collapse_address_list([ip1, ip2, ip3])
+ self.assertEqual(collapsed, [ip3])
+
+ # the toejam test
+ ip1 = ipaddr.IPAddress('1.1.1.1')
+ ip2 = ipaddr.IPAddress('::1')
+ self.assertRaises(TypeError, ipaddr.collapse_address_list,
+ [ip1, ip2])
+
+ def testSummarizing(self):
+ #ip = ipaddr.IPAddress
+ #ipnet = ipaddr.IPNetwork
+ summarize = ipaddr.summarize_address_range
+ ip1 = ipaddr.IPAddress('1.1.1.0')
+ ip2 = ipaddr.IPAddress('1.1.1.255')
+ # test a /24 is sumamrized properly
+ self.assertEqual(summarize(ip1, ip2)[0], ipaddr.IPNetwork('1.1.1.0/24'))
+ # test an IPv4 range that isn't on a network byte boundary
+ ip2 = ipaddr.IPAddress('1.1.1.8')
+ self.assertEqual(summarize(ip1, ip2), [ipaddr.IPNetwork('1.1.1.0/29'),
+ ipaddr.IPNetwork('1.1.1.8')])
+
+ ip1 = ipaddr.IPAddress('1::')
+ ip2 = ipaddr.IPAddress('1:ffff:ffff:ffff:ffff:ffff:ffff:ffff')
+ # test a IPv6 is sumamrized properly
+ self.assertEqual(summarize(ip1, ip2)[0], ipaddr.IPNetwork('1::/16'))
+ # test an IPv6 range that isn't on a network byte boundary
+ ip2 = ipaddr.IPAddress('2::')
+ self.assertEqual(summarize(ip1, ip2), [ipaddr.IPNetwork('1::/16'),
+ ipaddr.IPNetwork('2::/128')])
+
+ # test exception raised when first is greater than last
+ self.assertRaises(ValueError, summarize, ipaddr.IPAddress('1.1.1.0'),
+ ipaddr.IPAddress('1.1.0.0'))
+ # test exception raised when first and last aren't IP addresses
+ self.assertRaises(TypeError, summarize,
+ ipaddr.IPNetwork('1.1.1.0'),
+ ipaddr.IPNetwork('1.1.0.0'))
+ self.assertRaises(TypeError, summarize,
+ ipaddr.IPNetwork('1.1.1.0'), ipaddr.IPNetwork('1.1.0.0'))
+ # test exception raised when first and last are not same version
+ self.assertRaises(TypeError, summarize, ipaddr.IPAddress('::'),
+ ipaddr.IPNetwork('1.1.0.0'))
+
+ def testAddressComparison(self):
+ self.assertTrue(ipaddr.IPAddress('1.1.1.1') <=
+ ipaddr.IPAddress('1.1.1.1'))
+ self.assertTrue(ipaddr.IPAddress('1.1.1.1') <=
+ ipaddr.IPAddress('1.1.1.2'))
+ self.assertTrue(ipaddr.IPAddress('::1') <= ipaddr.IPAddress('::1'))
+ self.assertTrue(ipaddr.IPAddress('::1') <= ipaddr.IPAddress('::2'))
+
+ def testNetworkComparison(self):
+ # ip1 and ip2 have the same network address
+ ip1 = ipaddr.IPv4Network('1.1.1.0/24')
+ ip2 = ipaddr.IPv4Network('1.1.1.1/24')
+ ip3 = ipaddr.IPv4Network('1.1.2.0/24')
+
+ self.assertTrue(ip1 < ip3)
+ self.assertTrue(ip3 > ip2)
+
+ self.assertEqual(ip1.compare_networks(ip2), 0)
+ self.assertTrue(ip1._get_networks_key() == ip2._get_networks_key())
+ self.assertEqual(ip1.compare_networks(ip3), -1)
+ self.assertTrue(ip1._get_networks_key() < ip3._get_networks_key())
+
+ ip1 = ipaddr.IPv6Network('2001::2000/96')
+ ip2 = ipaddr.IPv6Network('2001::2001/96')
+ ip3 = ipaddr.IPv6Network('2001:ffff::2000/96')
+
+ self.assertTrue(ip1 < ip3)
+ self.assertTrue(ip3 > ip2)
+ self.assertEqual(ip1.compare_networks(ip2), 0)
+ self.assertTrue(ip1._get_networks_key() == ip2._get_networks_key())
+ self.assertEqual(ip1.compare_networks(ip3), -1)
+ self.assertTrue(ip1._get_networks_key() < ip3._get_networks_key())
+
+ # Test comparing different protocols.
+ # Should always raise a TypeError.
+ ipv6 = ipaddr.IPv6Network('::/0')
+ ipv4 = ipaddr.IPv4Network('0.0.0.0/0')
+ self.assertRaises(TypeError, ipv4.__lt__, ipv6)
+ self.assertRaises(TypeError, ipv4.__gt__, ipv6)
+ self.assertRaises(TypeError, ipv6.__lt__, ipv4)
+ self.assertRaises(TypeError, ipv6.__gt__, ipv4)
+
+ # Regression test for issue 19.
+ ip1 = ipaddr.IPNetwork('10.1.2.128/25')
+ self.assertFalse(ip1 < ip1)
+ self.assertFalse(ip1 > ip1)
+ ip2 = ipaddr.IPNetwork('10.1.3.0/24')
+ self.assertTrue(ip1 < ip2)
+ self.assertFalse(ip2 < ip1)
+ self.assertFalse(ip1 > ip2)
+ self.assertTrue(ip2 > ip1)
+ ip3 = ipaddr.IPNetwork('10.1.3.0/25')
+ self.assertTrue(ip2 < ip3)
+ self.assertFalse(ip3 < ip2)
+ self.assertFalse(ip2 > ip3)
+ self.assertTrue(ip3 > ip2)
+
+ # Regression test for issue 28.
+ ip1 = ipaddr.IPNetwork('10.10.10.0/31')
+ ip2 = ipaddr.IPNetwork('10.10.10.0')
+ ip3 = ipaddr.IPNetwork('10.10.10.2/31')
+ ip4 = ipaddr.IPNetwork('10.10.10.2')
+ sorted = [ip1, ip2, ip3, ip4]
+ unsorted = [ip2, ip4, ip1, ip3]
+ unsorted.sort()
+ self.assertEqual(sorted, unsorted)
+ unsorted = [ip4, ip1, ip3, ip2]
+ unsorted.sort()
+ self.assertEqual(sorted, unsorted)
+ self.assertRaises(TypeError, ip1.__lt__, ipaddr.IPAddress('10.10.10.0'))
+ self.assertRaises(TypeError, ip2.__lt__, ipaddr.IPAddress('10.10.10.0'))
+
+ # <=, >=
+ self.assertTrue(ipaddr.IPNetwork('1.1.1.1') <=
+ ipaddr.IPNetwork('1.1.1.1'))
+ self.assertTrue(ipaddr.IPNetwork('1.1.1.1') <=
+ ipaddr.IPNetwork('1.1.1.2'))
+ self.assertFalse(ipaddr.IPNetwork('1.1.1.2') <=
+ ipaddr.IPNetwork('1.1.1.1'))
+ self.assertTrue(ipaddr.IPNetwork('::1') <= ipaddr.IPNetwork('::1'))
+ self.assertTrue(ipaddr.IPNetwork('::1') <= ipaddr.IPNetwork('::2'))
+ self.assertFalse(ipaddr.IPNetwork('::2') <= ipaddr.IPNetwork('::1'))
+
+ def testStrictNetworks(self):
+ self.assertRaises(ValueError, ipaddr.IPNetwork, '192.168.1.1/24',
+ strict=True)
+ self.assertRaises(ValueError, ipaddr.IPNetwork, '::1/120', strict=True)
+
+ def testOverlaps(self):
+ other = ipaddr.IPv4Network('1.2.3.0/30')
+ other2 = ipaddr.IPv4Network('1.2.2.0/24')
+ other3 = ipaddr.IPv4Network('1.2.2.64/26')
+ self.assertTrue(self.ipv4.overlaps(other))
+ self.assertFalse(self.ipv4.overlaps(other2))
+ self.assertTrue(other2.overlaps(other3))
+
+ def testEmbeddedIpv4(self):
+ ipv4_string = '192.168.0.1'
+ ipv4 = ipaddr.IPv4Network(ipv4_string)
+ v4compat_ipv6 = ipaddr.IPv6Network('::%s' % ipv4_string)
+ self.assertEqual(int(v4compat_ipv6.ip), int(ipv4.ip))
+ v4mapped_ipv6 = ipaddr.IPv6Network('::ffff:%s' % ipv4_string)
+ self.assertNotEqual(v4mapped_ipv6.ip, ipv4.ip)
+ self.assertRaises(ipaddr.AddressValueError, ipaddr.IPv6Network,
+ '2001:1.1.1.1:1.1.1.1')
+
+ # Issue 67: IPv6 with embedded IPv4 address not recognized.
+ def testIPv6AddressTooLarge(self):
+ # RFC4291 2.5.5.2
+ self.assertEqual(ipaddr.IPAddress('::FFFF:192.0.2.1'),
+ ipaddr.IPAddress('::FFFF:c000:201'))
+ # RFC4291 2.2 (part 3) x::d.d.d.d
+ self.assertEqual(ipaddr.IPAddress('FFFF::192.0.2.1'),
+ ipaddr.IPAddress('FFFF::c000:201'))
+
+ def testIPVersion(self):
+ self.assertEqual(self.ipv4.version, 4)
+ self.assertEqual(self.ipv6.version, 6)
+
+ def testMaxPrefixLength(self):
+ self.assertEqual(self.ipv4.max_prefixlen, 32)
+ self.assertEqual(self.ipv6.max_prefixlen, 128)
+
+ def testPacked(self):
+ self.assertEqual(self.ipv4.packed,
+ _cb('\x01\x02\x03\x04'))
+ self.assertEqual(ipaddr.IPv4Network('255.254.253.252').packed,
+ _cb('\xff\xfe\xfd\xfc'))
+ self.assertEqual(self.ipv6.packed,
+ _cb('\x20\x01\x06\x58\x02\x2a\xca\xfe'
+ '\x02\x00\x00\x00\x00\x00\x00\x01'))
+ self.assertEqual(ipaddr.IPv6Network('ffff:2:3:4:ffff::').packed,
+ _cb('\xff\xff\x00\x02\x00\x03\x00\x04\xff\xff'
+ + '\x00' * 6))
+ self.assertEqual(ipaddr.IPv6Network('::1:0:0:0:0').packed,
+ _cb('\x00' * 6 + '\x00\x01' + '\x00' * 8))
+
+ def testIpStrFromPrefixlen(self):
+ ipv4 = ipaddr.IPv4Network('1.2.3.4/24')
+ self.assertEqual(ipv4._ip_string_from_prefix(), '255.255.255.0')
+ self.assertEqual(ipv4._ip_string_from_prefix(28), '255.255.255.240')
+
+ def testIpType(self):
+ ipv4net = ipaddr.IPNetwork('1.2.3.4')
+ ipv4addr = ipaddr.IPAddress('1.2.3.4')
+ ipv6net = ipaddr.IPNetwork('::1.2.3.4')
+ ipv6addr = ipaddr.IPAddress('::1.2.3.4')
+ self.assertEqual(ipaddr.IPv4Network, type(ipv4net))
+ self.assertEqual(ipaddr.IPv4Address, type(ipv4addr))
+ self.assertEqual(ipaddr.IPv6Network, type(ipv6net))
+ self.assertEqual(ipaddr.IPv6Address, type(ipv6addr))
+
+ def testReservedIpv4(self):
+ # test networks
+ self.assertEqual(True, ipaddr.IPNetwork('224.1.1.1/31').is_multicast)
+ self.assertEqual(False, ipaddr.IPNetwork('240.0.0.0').is_multicast)
+
+ self.assertEqual(True, ipaddr.IPNetwork('192.168.1.1/17').is_private)
+ self.assertEqual(False, ipaddr.IPNetwork('192.169.0.0').is_private)
+ self.assertEqual(True, ipaddr.IPNetwork('10.255.255.255').is_private)
+ self.assertEqual(False, ipaddr.IPNetwork('11.0.0.0').is_private)
+ self.assertEqual(True, ipaddr.IPNetwork('172.31.255.255').is_private)
+ self.assertEqual(False, ipaddr.IPNetwork('172.32.0.0').is_private)
+
+ self.assertEqual(True,
+ ipaddr.IPNetwork('169.254.100.200/24').is_link_local)
+ self.assertEqual(False,
+ ipaddr.IPNetwork('169.255.100.200/24').is_link_local)
+
+ self.assertEqual(True,
+ ipaddr.IPNetwork('127.100.200.254/32').is_loopback)
+ self.assertEqual(True, ipaddr.IPNetwork('127.42.0.0/16').is_loopback)
+ self.assertEqual(False, ipaddr.IPNetwork('128.0.0.0').is_loopback)
+
+ # test addresses
+ self.assertEqual(True, ipaddr.IPAddress('224.1.1.1').is_multicast)
+ self.assertEqual(False, ipaddr.IPAddress('240.0.0.0').is_multicast)
+
+ self.assertEqual(True, ipaddr.IPAddress('192.168.1.1').is_private)
+ self.assertEqual(False, ipaddr.IPAddress('192.169.0.0').is_private)
+ self.assertEqual(True, ipaddr.IPAddress('10.255.255.255').is_private)
+ self.assertEqual(False, ipaddr.IPAddress('11.0.0.0').is_private)
+ self.assertEqual(True, ipaddr.IPAddress('172.31.255.255').is_private)
+ self.assertEqual(False, ipaddr.IPAddress('172.32.0.0').is_private)
+
+ self.assertEqual(True,
+ ipaddr.IPAddress('169.254.100.200').is_link_local)
+ self.assertEqual(False,
+ ipaddr.IPAddress('169.255.100.200').is_link_local)
+
+ self.assertEqual(True,
+ ipaddr.IPAddress('127.100.200.254').is_loopback)
+ self.assertEqual(True, ipaddr.IPAddress('127.42.0.0').is_loopback)
+ self.assertEqual(False, ipaddr.IPAddress('128.0.0.0').is_loopback)
+ self.assertEqual(True, ipaddr.IPNetwork('0.0.0.0').is_unspecified)
+
+ def testReservedIpv6(self):
+
+ self.assertEqual(True, ipaddr.IPNetwork('ffff::').is_multicast)
+ self.assertEqual(True, ipaddr.IPNetwork(2**128-1).is_multicast)
+ self.assertEqual(True, ipaddr.IPNetwork('ff00::').is_multicast)
+ self.assertEqual(False, ipaddr.IPNetwork('fdff::').is_multicast)
+
+ self.assertEqual(True, ipaddr.IPNetwork('fecf::').is_site_local)
+ self.assertEqual(True, ipaddr.IPNetwork(
+ 'feff:ffff:ffff:ffff::').is_site_local)
+ self.assertEqual(False, ipaddr.IPNetwork('fbf:ffff::').is_site_local)
+ self.assertEqual(False, ipaddr.IPNetwork('ff00::').is_site_local)
+
+ self.assertEqual(True, ipaddr.IPNetwork('fc00::').is_private)
+ self.assertEqual(True, ipaddr.IPNetwork(
+ 'fc00:ffff:ffff:ffff::').is_private)
+ self.assertEqual(False, ipaddr.IPNetwork('fbff:ffff::').is_private)
+ self.assertEqual(False, ipaddr.IPNetwork('fe00::').is_private)
+
+ self.assertEqual(True, ipaddr.IPNetwork('fea0::').is_link_local)
+ self.assertEqual(True, ipaddr.IPNetwork('febf:ffff::').is_link_local)
+ self.assertEqual(False, ipaddr.IPNetwork('fe7f:ffff::').is_link_local)
+ self.assertEqual(False, ipaddr.IPNetwork('fec0::').is_link_local)
+
+ self.assertEqual(True, ipaddr.IPNetwork('0:0::0:01').is_loopback)
+ self.assertEqual(False, ipaddr.IPNetwork('::1/127').is_loopback)
+ self.assertEqual(False, ipaddr.IPNetwork('::').is_loopback)
+ self.assertEqual(False, ipaddr.IPNetwork('::2').is_loopback)
+
+ self.assertEqual(True, ipaddr.IPNetwork('0::0').is_unspecified)
+ self.assertEqual(False, ipaddr.IPNetwork('::1').is_unspecified)
+ self.assertEqual(False, ipaddr.IPNetwork('::/127').is_unspecified)
+
+ # test addresses
+ self.assertEqual(True, ipaddr.IPAddress('ffff::').is_multicast)
+ self.assertEqual(True, ipaddr.IPAddress(2**128-1).is_multicast)
+ self.assertEqual(True, ipaddr.IPAddress('ff00::').is_multicast)
+ self.assertEqual(False, ipaddr.IPAddress('fdff::').is_multicast)
+
+ self.assertEqual(True, ipaddr.IPAddress('fecf::').is_site_local)
+ self.assertEqual(True, ipaddr.IPAddress(
+ 'feff:ffff:ffff:ffff::').is_site_local)
+ self.assertEqual(False, ipaddr.IPAddress('fbf:ffff::').is_site_local)
+ self.assertEqual(False, ipaddr.IPAddress('ff00::').is_site_local)
+
+ self.assertEqual(True, ipaddr.IPAddress('fc00::').is_private)
+ self.assertEqual(True, ipaddr.IPAddress(
+ 'fc00:ffff:ffff:ffff::').is_private)
+ self.assertEqual(False, ipaddr.IPAddress('fbff:ffff::').is_private)
+ self.assertEqual(False, ipaddr.IPAddress('fe00::').is_private)
+
+ self.assertEqual(True, ipaddr.IPAddress('fea0::').is_link_local)
+ self.assertEqual(True, ipaddr.IPAddress('febf:ffff::').is_link_local)
+ self.assertEqual(False, ipaddr.IPAddress('fe7f:ffff::').is_link_local)
+ self.assertEqual(False, ipaddr.IPAddress('fec0::').is_link_local)
+
+ self.assertEqual(True, ipaddr.IPAddress('0:0::0:01').is_loopback)
+ self.assertEqual(True, ipaddr.IPAddress('::1').is_loopback)
+ self.assertEqual(False, ipaddr.IPAddress('::2').is_loopback)
+
+ self.assertEqual(True, ipaddr.IPAddress('0::0').is_unspecified)
+ self.assertEqual(False, ipaddr.IPAddress('::1').is_unspecified)
+
+ # some generic IETF reserved addresses
+ self.assertEqual(True, ipaddr.IPAddress('100::').is_reserved)
+ self.assertEqual(True, ipaddr.IPNetwork('4000::1/128').is_reserved)
+
+ def testIpv4Mapped(self):
+ self.assertEqual(ipaddr.IPAddress('::ffff:192.168.1.1').ipv4_mapped,
+ ipaddr.IPAddress('192.168.1.1'))
+ self.assertEqual(ipaddr.IPAddress('::c0a8:101').ipv4_mapped, None)
+ self.assertEqual(ipaddr.IPAddress('::ffff:c0a8:101').ipv4_mapped,
+ ipaddr.IPAddress('192.168.1.1'))
+
+ def testAddrExclude(self):
+ addr1 = ipaddr.IPNetwork('10.1.1.0/24')
+ addr2 = ipaddr.IPNetwork('10.1.1.0/26')
+ addr3 = ipaddr.IPNetwork('10.2.1.0/24')
+ addr4 = ipaddr.IPAddress('10.1.1.0')
+ self.assertEqual(addr1.address_exclude(addr2),
+ [ipaddr.IPNetwork('10.1.1.64/26'),
+ ipaddr.IPNetwork('10.1.1.128/25')])
+ self.assertRaises(ValueError, addr1.address_exclude, addr3)
+ self.assertRaises(TypeError, addr1.address_exclude, addr4)
+ self.assertEqual(addr1.address_exclude(addr1), [])
+
+ def testHash(self):
+ self.assertEqual(hash(ipaddr.IPNetwork('10.1.1.0/24')),
+ hash(ipaddr.IPNetwork('10.1.1.0/24')))
+ self.assertEqual(hash(ipaddr.IPAddress('10.1.1.0')),
+ hash(ipaddr.IPAddress('10.1.1.0')))
+ # i70
+ self.assertEqual(hash(ipaddr.IPAddress('1.2.3.4')),
+ hash(ipaddr.IPAddress(
+ long(ipaddr.IPAddress('1.2.3.4')._ip))))
+ ip1 = ipaddr.IPAddress('10.1.1.0')
+ ip2 = ipaddr.IPAddress('1::')
+ dummy = {}
+ dummy[self.ipv4] = None
+ dummy[self.ipv6] = None
+ dummy[ip1] = None
+ dummy[ip2] = None
+ self.assertTrue(self.ipv4 in dummy)
+ self.assertTrue(ip2 in dummy)
+
+ def testCopyConstructor(self):
+ addr1 = ipaddr.IPNetwork('10.1.1.0/24')
+ addr2 = ipaddr.IPNetwork(addr1)
+ addr3 = ipaddr.IPNetwork('2001:658:22a:cafe:200::1/64')
+ addr4 = ipaddr.IPNetwork(addr3)
+ addr5 = ipaddr.IPv4Address('1.1.1.1')
+ addr6 = ipaddr.IPv6Address('2001:658:22a:cafe:200::1')
+
+ self.assertEqual(addr1, addr2)
+ self.assertEqual(addr3, addr4)
+ self.assertEqual(addr5, ipaddr.IPv4Address(addr5))
+ self.assertEqual(addr6, ipaddr.IPv6Address(addr6))
+
+ def testCompressIPv6Address(self):
+ test_addresses = {
+ '1:2:3:4:5:6:7:8': '1:2:3:4:5:6:7:8/128',
+ '2001:0:0:4:0:0:0:8': '2001:0:0:4::8/128',
+ '2001:0:0:4:5:6:7:8': '2001::4:5:6:7:8/128',
+ '2001:0:3:4:5:6:7:8': '2001:0:3:4:5:6:7:8/128',
+ '2001:0:3:4:5:6:7:8': '2001:0:3:4:5:6:7:8/128',
+ '0:0:3:0:0:0:0:ffff': '0:0:3::ffff/128',
+ '0:0:0:4:0:0:0:ffff': '::4:0:0:0:ffff/128',
+ '0:0:0:0:5:0:0:ffff': '::5:0:0:ffff/128',
+ '1:0:0:4:0:0:7:8': '1::4:0:0:7:8/128',
+ '0:0:0:0:0:0:0:0': '::/128',
+ '0:0:0:0:0:0:0:0/0': '::/0',
+ '0:0:0:0:0:0:0:1': '::1/128',
+ '2001:0658:022a:cafe:0000:0000:0000:0000/66':
+ '2001:658:22a:cafe::/66',
+ '::1.2.3.4': '::102:304/128',
+ '1:2:3:4:5:ffff:1.2.3.4': '1:2:3:4:5:ffff:102:304/128',
+ '::7:6:5:4:3:2:1': '0:7:6:5:4:3:2:1/128',
+ '::7:6:5:4:3:2:0': '0:7:6:5:4:3:2:0/128',
+ '7:6:5:4:3:2:1::': '7:6:5:4:3:2:1:0/128',
+ '0:6:5:4:3:2:1::': '0:6:5:4:3:2:1:0/128',
+ }
+ for uncompressed, compressed in test_addresses.items():
+ self.assertEqual(compressed, str(ipaddr.IPv6Network(uncompressed)))
+
+ def testExplodeShortHandIpStr(self):
+ addr1 = ipaddr.IPv6Network('2001::1')
+ addr2 = ipaddr.IPv6Address('2001:0:5ef5:79fd:0:59d:a0e5:ba1')
+ self.assertEqual('2001:0000:0000:0000:0000:0000:0000:0001',
+ addr1._explode_shorthand_ip_string(str(addr1.ip)))
+ self.assertEqual('0000:0000:0000:0000:0000:0000:0000:0001',
+ ipaddr.IPv6Network('::1/128').exploded)
+ # issue 77
+ self.assertEqual('2001:0000:5ef5:79fd:0000:059d:a0e5:0ba1',
+ addr2.exploded)
+
+ def testIntRepresentation(self):
+ self.assertEqual(16909060, int(self.ipv4))
+ self.assertEqual(42540616829182469433547762482097946625, int(self.ipv6))
+
+ def testHexRepresentation(self):
+ self.assertEqual(hex(0x1020304),
+ hex(self.ipv4))
+
+ self.assertEqual(hex(0x20010658022ACAFE0200000000000001),
+ hex(self.ipv6))
+
+ # backwards compatibility
+ def testBackwardsCompability(self):
+ self.assertEqual(ipaddr.CollapseAddrList(
+ [ipaddr.IPNetwork('1.1.0.0/24'), ipaddr.IPNetwork('1.1.1.0/24')]),
+ [ipaddr.IPNetwork('1.1.0.0/23')])
+
+ self.assertEqual(ipaddr.IPNetwork('::42:0/112').AddressExclude(
+ ipaddr.IPNetwork('::42:8000/113')),
+ [ipaddr.IPNetwork('::42:0/113')])
+
+ self.assertTrue(ipaddr.IPNetwork('1::/8').CompareNetworks(
+ ipaddr.IPNetwork('2::/9')) < 0)
+
+ self.assertEqual(ipaddr.IPNetwork('1::/16').Contains(
+ ipaddr.IPNetwork('2::/16')), False)
+
+ self.assertEqual(ipaddr.IPNetwork('0.0.0.0/0').Subnet(),
+ [ipaddr.IPNetwork('0.0.0.0/1'),
+ ipaddr.IPNetwork('128.0.0.0/1')])
+ self.assertEqual(ipaddr.IPNetwork('::/127').Subnet(),
+ [ipaddr.IPNetwork('::/128'),
+ ipaddr.IPNetwork('::1/128')])
+
+ self.assertEqual(ipaddr.IPNetwork('1.0.0.0/32').Supernet(),
+ ipaddr.IPNetwork('1.0.0.0/31'))
+ self.assertEqual(ipaddr.IPNetwork('::/121').Supernet(),
+ ipaddr.IPNetwork('::/120'))
+
+ self.assertEqual(ipaddr.IPNetwork('10.0.0.2').IsRFC1918(), True)
+ self.assertEqual(ipaddr.IPNetwork('10.0.0.0').IsMulticast(), False)
+ self.assertEqual(ipaddr.IPNetwork('127.255.255.255').IsLoopback(), True)
+ self.assertEqual(ipaddr.IPNetwork('169.255.255.255').IsLinkLocal(),
+ False)
+
+ def testForceVersion(self):
+ self.assertEqual(ipaddr.IPNetwork(1).version, 4)
+ self.assertEqual(ipaddr.IPNetwork(1, version=6).version, 6)
+
+ def testWithStar(self):
+ self.assertEqual(str(self.ipv4.with_prefixlen), "1.2.3.4/24")
+ self.assertEqual(str(self.ipv4.with_netmask), "1.2.3.4/255.255.255.0")
+ self.assertEqual(str(self.ipv4.with_hostmask), "1.2.3.4/0.0.0.255")
+
+ self.assertEqual(str(self.ipv6.with_prefixlen),
+ '2001:658:22a:cafe:200::1/64')
+ # rfc3513 sec 2.3 says that ipv6 only uses cidr notation for
+ # subnets
+ self.assertEqual(str(self.ipv6.with_netmask),
+ '2001:658:22a:cafe:200::1/64')
+ # this probably don't make much sense, but it's included for
+ # compatibility with ipv4
+ self.assertEqual(str(self.ipv6.with_hostmask),
+ '2001:658:22a:cafe:200::1/::ffff:ffff:ffff:ffff')
+
+ def testNetworkElementCaching(self):
+ # V4 - make sure we're empty
+ self.assertFalse(self.ipv4._cache.has_key('network'))
+ self.assertFalse(self.ipv4._cache.has_key('broadcast'))
+ self.assertFalse(self.ipv4._cache.has_key('hostmask'))
+
+ # V4 - populate and test
+ self.assertEqual(self.ipv4.network, ipaddr.IPv4Address('1.2.3.0'))
+ self.assertEqual(self.ipv4.broadcast, ipaddr.IPv4Address('1.2.3.255'))
+ self.assertEqual(self.ipv4.hostmask, ipaddr.IPv4Address('0.0.0.255'))
+
+ # V4 - check we're cached
+ self.assertTrue(self.ipv4._cache.has_key('network'))
+ self.assertTrue(self.ipv4._cache.has_key('broadcast'))
+ self.assertTrue(self.ipv4._cache.has_key('hostmask'))
+
+ # V6 - make sure we're empty
+ self.assertFalse(self.ipv6._cache.has_key('network'))
+ self.assertFalse(self.ipv6._cache.has_key('broadcast'))
+ self.assertFalse(self.ipv6._cache.has_key('hostmask'))
+
+ # V6 - populate and test
+ self.assertEqual(self.ipv6.network,
+ ipaddr.IPv6Address('2001:658:22a:cafe::'))
+ self.assertEqual(self.ipv6.broadcast, ipaddr.IPv6Address(
+ '2001:658:22a:cafe:ffff:ffff:ffff:ffff'))
+ self.assertEqual(self.ipv6.hostmask,
+ ipaddr.IPv6Address('::ffff:ffff:ffff:ffff'))
+
+ # V6 - check we're cached
+ self.assertTrue(self.ipv6._cache.has_key('network'))
+ self.assertTrue(self.ipv6._cache.has_key('broadcast'))
+ self.assertTrue(self.ipv6._cache.has_key('hostmask'))
+
+ def testTeredo(self):
+ # stolen from wikipedia
+ server = ipaddr.IPv4Address('65.54.227.120')
+ client = ipaddr.IPv4Address('192.0.2.45')
+ teredo_addr = '2001:0000:4136:e378:8000:63bf:3fff:fdd2'
+ self.assertEqual((server, client),
+ ipaddr.IPAddress(teredo_addr).teredo)
+ bad_addr = '2000::4136:e378:8000:63bf:3fff:fdd2'
+ self.assertFalse(ipaddr.IPAddress(bad_addr).teredo)
+ bad_addr = '2001:0001:4136:e378:8000:63bf:3fff:fdd2'
+ self.assertFalse(ipaddr.IPAddress(bad_addr).teredo)
+
+ # i77
+ teredo_addr = ipaddr.IPv6Address('2001:0:5ef5:79fd:0:59d:a0e5:ba1')
+ self.assertEqual((ipaddr.IPv4Address('94.245.121.253'),
+ ipaddr.IPv4Address('95.26.244.94')),
+ teredo_addr.teredo)
+
+
+ def testsixtofour(self):
+ sixtofouraddr = ipaddr.IPAddress('2002:ac1d:2d64::1')
+ bad_addr = ipaddr.IPAddress('2000:ac1d:2d64::1')
+ self.assertEqual(ipaddr.IPv4Address('172.29.45.100'),
+ sixtofouraddr.sixtofour)
+ self.assertFalse(bad_addr.sixtofour)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/contrib/ipaddr-py/setup.py b/contrib/ipaddr-py/setup.py
new file mode 100755
index 000000000..33564320e
--- /dev/null
+++ b/contrib/ipaddr-py/setup.py
@@ -0,0 +1,36 @@
+#!/usr/bin/python
+#
+# Copyright 2008 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from distutils.core import setup
+
+import ipaddr
+
+
+setup(name='ipaddr',
+ maintainer='Google',
+ maintainer_email='ipaddr-py-dev@googlegroups.com',
+ version=ipaddr.__version__,
+ url='http://code.google.com/p/ipaddr-py/',
+ license='Apache License, Version 2.0',
+ classifiers=[
+ 'Development Status :: 5 - Production/Stable',
+ 'Intended Audience :: Developers',
+ 'License :: OSI Approved :: Apache Software License',
+ 'Operating System :: OS Independent',
+ 'Topic :: Internet',
+ 'Topic :: Software Development :: Libraries',
+ 'Topic :: System :: Networking'],
+ py_modules=['ipaddr'])
diff --git a/contrib/ipaddr-py/test-2to3.sh b/contrib/ipaddr-py/test-2to3.sh
new file mode 100755
index 000000000..408d665bc
--- /dev/null
+++ b/contrib/ipaddr-py/test-2to3.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+# Converts the python2 ipaddr files to python3 and runs the unit tests
+# with both python versions.
+
+mkdir -p 2to3output && \
+cp -f *.py 2to3output && \
+( cd 2to3output && 2to3 . | patch -p0 ) && \
+py3version=$(python3 --version 2>&1) && \
+echo -e "\nTesting with ${py3version}" && \
+python3 2to3output/ipaddr_test.py && \
+rm -r 2to3output && \
+pyversion=$(python --version 2>&1) && \
+echo -e "\nTesting with ${pyversion}" && \
+./ipaddr_test.py
diff --git a/contrib/libgen/basename_r.c b/contrib/libgen/basename_r.c
new file mode 100644
index 000000000..2c3a87afe
--- /dev/null
+++ b/contrib/libgen/basename_r.c
@@ -0,0 +1,40 @@
+/*
+ * borrowed from glibc-2.12.1/string/basename.c
+ * Modified to return "." for NULL or "", as required for SUSv2.
+ */
+#include <string.h>
+#include <stdlib.h>
+#ifdef THREAD_UNSAFE_BASENAME
+
+/* Return the name-within-directory of a file name.
+ Copyright (C) 1996,97,98,2002 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+char *
+basename_r (filename)
+ const char *filename;
+{
+ char *p;
+
+ if ((filename == NULL) || (*filename == '\0'))
+ return ".";
+
+ p = strrchr (filename, '/');
+ return p ? p + 1 : (char *) filename;
+}
+#endif /* THREAD_UNSAFE_BASENAME */
diff --git a/contrib/libgen/dirname_r.c b/contrib/libgen/dirname_r.c
new file mode 100644
index 000000000..131cbcf2a
--- /dev/null
+++ b/contrib/libgen/dirname_r.c
@@ -0,0 +1,243 @@
+/*
+ * Borrowed from glibc-2.12.1/string/memrchr.c
+ * Based on strlen implementation by Torbjorn Granlund (tege@sics.se),
+ * Removed code for long bigger than 32 bytes, renamed __ptr_t as void *
+ * changed reg_char type to char.
+ */
+#include <string.h>
+#include <stdlib.h>
+#ifdef THREAD_UNSAFE_DIRNAME
+
+/* memrchr -- find the last occurrence of a byte in a memory block
+ Copyright (C) 1991, 93, 96, 97, 99, 2000 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Based on strlen implementation by Torbjorn Granlund (tege@sics.se),
+ with help from Dan Sahlin (dan@sics.se) and
+ commentary by Jim Blandy (jimb@ai.mit.edu);
+ adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu),
+ and implemented by Roland McGrath (roland@ai.mit.edu).
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+void *
+__memrchr (s, c_in, n)
+ const void * s;
+ int c_in;
+ size_t n;
+{
+ const unsigned char *char_ptr;
+ const unsigned long int *longword_ptr;
+ unsigned long int longword, magic_bits, charmask;
+ unsigned char c;
+
+ c = (unsigned char) c_in;
+
+ /* Handle the last few characters by reading one character at a time.
+ Do this until CHAR_PTR is aligned on a longword boundary. */
+ for (char_ptr = (const unsigned char *) s + n;
+ n > 0 && ((unsigned long int) char_ptr
+ & (sizeof (longword) - 1)) != 0;
+ --n)
+ if (*--char_ptr == c)
+ return (void *) char_ptr;
+
+ /* All these elucidatory comments refer to 4-byte longwords,
+ but the theory applies equally well to 8-byte longwords. */
+
+ longword_ptr = (const unsigned long int *) char_ptr;
+
+ /* Bits 31, 24, 16, and 8 of this number are zero. Call these bits
+ the "holes." Note that there is a hole just to the left of
+ each byte, with an extra at the end:
+
+ bits: 01111110 11111110 11111110 11111111
+ bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD
+
+ The 1-bits make sure that carries propagate to the next 0-bit.
+ The 0-bits provide holes for carries to fall into. */
+
+ if (sizeof (longword) != 4 && sizeof (longword) != 8)
+ abort ();
+
+ magic_bits = 0x7efefeff;
+
+ /* Set up a longword, each of whose bytes is C. */
+ charmask = c | (c << 8);
+ charmask |= charmask << 16;
+
+ /* Instead of the traditional loop which tests each character,
+ we will test a longword at a time. The tricky part is testing
+ if *any of the four* bytes in the longword in question are zero. */
+ while (n >= sizeof (longword))
+ {
+ /* We tentatively exit the loop if adding MAGIC_BITS to
+ LONGWORD fails to change any of the hole bits of LONGWORD.
+
+ 1) Is this safe? Will it catch all the zero bytes?
+ Suppose there is a byte with all zeros. Any carry bits
+ propagating from its left will fall into the hole at its
+ least significant bit and stop. Since there will be no
+ carry from its most significant bit, the LSB of the
+ byte to the left will be unchanged, and the zero will be
+ detected.
+
+ 2) Is this worthwhile? Will it ignore everything except
+ zero bytes? Suppose every byte of LONGWORD has a bit set
+ somewhere. There will be a carry into bit 8. If bit 8
+ is set, this will carry into bit 16. If bit 8 is clear,
+ one of bits 9-15 must be set, so there will be a carry
+ into bit 16. Similarly, there will be a carry into bit
+ 24. If one of bits 24-30 is set, there will be a carry
+ into bit 31, so all of the hole bits will be changed.
+
+ The one misfire occurs when bits 24-30 are clear and bit
+ 31 is set; in this case, the hole at bit 31 is not
+ changed. If we had access to the processor carry flag,
+ we could close this loophole by putting the fourth hole
+ at bit 32!
+
+ So it ignores everything except 128's, when they're aligned
+ properly.
+
+ 3) But wait! Aren't we looking for C, not zero?
+ Good point. So what we do is XOR LONGWORD with a longword,
+ each of whose bytes is C. This turns each byte that is C
+ into a zero. */
+
+ longword = *--longword_ptr ^ charmask;
+
+ /* Add MAGIC_BITS to LONGWORD. */
+ if ((((longword + magic_bits)
+
+ /* Set those bits that were unchanged by the addition. */
+ ^ ~longword)
+
+ /* Look at only the hole bits. If any of the hole bits
+ are unchanged, most likely one of the bytes was a
+ zero. */
+ & ~magic_bits) != 0)
+ {
+ /* Which of the bytes was C? If none of them were, it was
+ a misfire; continue the search. */
+
+ const unsigned char *cp = (const unsigned char *) longword_ptr;
+
+ if (cp[3] == c)
+ return (void *) &cp[3];
+ if (cp[2] == c)
+ return (void *) &cp[2];
+ if (cp[1] == c)
+ return (void *) &cp[1];
+ if (cp[0] == c)
+ return (void *) cp;
+ }
+
+ n -= sizeof (longword);
+ }
+
+ char_ptr = (const unsigned char *) longword_ptr;
+
+ while (n-- > 0)
+ {
+ if (*--char_ptr == c)
+ return (void *) char_ptr;
+ }
+
+ return 0;
+}
+
+/*
+ * Borrowed from glibc-2.12.1/misc/dirname.c
+ */
+
+/* dirname - return directory part of PATH.
+ Copyright (C) 1996, 2000, 2001, 2002 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+char *
+dirname_r (char *path)
+{
+ static const char dot[] = ".";
+ char *last_slash;
+
+ /* Find last '/'. */
+ last_slash = path != NULL ? strrchr (path, '/') : NULL;
+
+ if (last_slash != NULL && last_slash != path && last_slash[1] == '\0')
+ {
+ /* Determine whether all remaining characters are slashes. */
+ char *runp;
+
+ for (runp = last_slash; runp != path; --runp)
+ if (runp[-1] != '/')
+ break;
+
+ /* The '/' is the last character, we have to look further. */
+ if (runp != path)
+ last_slash = __memrchr (path, '/', runp - path);
+ }
+
+ if (last_slash != NULL)
+ {
+ /* Determine whether all remaining characters are slashes. */
+ char *runp;
+
+ for (runp = last_slash; runp != path; --runp)
+ if (runp[-1] != '/')
+ break;
+
+ /* Terminate the path. */
+ if (runp == path)
+ {
+ /* The last slash is the first character in the string. We have to
+ return "/". As a special case we have to return "//" if there
+ are exactly two slashes at the beginning of the string. See
+ XBD 4.10 Path Name Resolution for more information. */
+ if (last_slash == path + 1)
+ ++last_slash;
+ else
+ last_slash = path + 1;
+ }
+ else
+ last_slash = runp;
+
+ last_slash[0] = '\0';
+ }
+ else
+ /* This assignment is ill-designed but the XPG specs require to
+ return a string containing "." in any case no directory part is
+ found and so a static and constant string is required. */
+ path = (char *) dot;
+
+ return path;
+}
+#endif /* THREAD_UNSAFE_DIRNAME */
diff --git a/contrib/macfuse/COPYING.txt b/contrib/macfuse/COPYING.txt
new file mode 100644
index 000000000..3f89bb08d
--- /dev/null
+++ b/contrib/macfuse/COPYING.txt
@@ -0,0 +1,128 @@
+MacFUSE is a package developed by Google and is covered under the following
+BSD-style license:
+
+ ================================================================
+ Copyright (c) 2007-2009 Google Inc.
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following disclaimer
+ in the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Google Inc. nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ================================================================
+
+Note that Google's patches to the FUSE library (libfuse/*.patch) (and to
+the SSHFS user-space program (filesystems/sshfs/*.patch) are also released
+under the BSD license.
+
+Portions of this package were derived from code developed by other authors.
+Please read further for specific details.
+
+* fusefs/fuse_kernel.h is an unmodified copy of the interface header from
+ the Linux FUSE distribution (http://fuse.sourceforge.net). fuse_kernel.h
+ can be redistributed either under the GPL or under the BSD license. It
+ is being redistributed here under the BSD license.
+
+* Unless otherwise noted, parts of MacFUSE (multiple files in fusefs/) contain
+ code derived from the FreeBSD version of FUSE (http://fuse4bsd.creo.hu),
+ which is covered by the following BSD-style license:
+
+ ================================================================
+ Copyright (C) 2005 Csaba Henk. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ SUCH DAMAGE.
+ ================================================================
+
+* fusefs/fuse_nodehash.c is a modified version of HashNode.c from an
+ Apple Developer Technical Support (DTS) sample code example. The original
+ source, which is available on http://developer.apple.com/samplecode/, has
+ the following disclaimer:
+
+ ================================================================
+ Disclaimer: IMPORTANT: This Apple software is supplied to you by
+ Apple Computer, Inc. Apple") in consideration of your agreement
+ to the following terms, and your use, installation, modification
+ or redistribution of this Apple software constitutes acceptance
+ of these terms. If you do not agree with these terms, please do
+ not use, install, modify or redistribute this Apple software.
+
+ In consideration of your agreement to abide by the following terms,
+ and subject to these terms, Apple grants you a personal, non-exclusive
+ license, under Apple's copyrights in this original Apple software
+ (the "Apple Software"), to use, reproduce, modify and redistribute
+ the Apple Software, with or without modifications, in source and/or
+ binary forms; provided that if you redistribute the Apple Software
+ in its entirety and without modifications, you must retain this
+ notice and the following text and disclaimers in all such
+ redistributions of the Apple Software. Neither the name,
+ trademarks, service marks or logos of Apple Computer, Inc. may be
+ used to endorse or promote products derived from the Apple Software
+ without specific prior written permission from Apple. Except as
+ expressly stated in this notice, no other rights or licenses,
+ express or implied, are granted by Apple herein, including but
+ not limited to any patent rights that may be infringed by your
+ derivative works or by other works in which the Apple Software
+ may be incorporated.
+
+ The Apple Software is provided by Apple on an "AS IS" basis. APPLE
+ MAKES NO WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION
+ THE IMPLIED WARRANTIES OF NON-INFRINGEMENT, MERCHANTABILITY AND
+ FITNESS FOR A PARTICULAR PURPOSE, REGARDING THE APPLE SOFTWARE OR
+ ITS USE AND OPERATION ALONE OR IN COMBINATION WITH YOUR PRODUCTS.
+
+ IN NO EVENT SHALL APPLE BE LIABLE FOR ANY SPECIAL, INDIRECT,
+ INCIDENTAL OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) ARISING IN ANY WAY OUT OF THE USE,
+ REPRODUCTION, MODIFICATION AND/OR DISTRIBUTION OF THE APPLE SOFTWARE,
+ HOWEVER CAUSED AND WHETHER UNDER THEORY OF CONTRACT, TORT (INCLUDING
+ NEGLIGENCE), STRICT LIABILITY OR OTHERWISE, EVEN IF APPLE HAS BEEN
+ ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ================================================================
+
+* Parts of the mount_fusefs and the load_fusefs command-line programs
+ (implemented in fusefs/mount_fusefs/ and fusefs/load_fusefs/, respectively)
+ come from Apple's Darwin sources and are covered under the Apple Public
+ Source License (APSL). You can read the APSL at:
+
+ http://www.publicsource.apple.com/apsl/
diff --git a/contrib/macfuse/fuse_ioctl.h b/contrib/macfuse/fuse_ioctl.h
new file mode 100644
index 000000000..054968cb1
--- /dev/null
+++ b/contrib/macfuse/fuse_ioctl.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2006-2008 Google. All Rights Reserved.
+ * Amit Singh <singh@>
+ */
+
+#ifndef _FUSE_IOCTL_H_
+#define _FUSE_IOCTL_H_
+
+#include <stdint.h>
+#include <sys/ioctl.h>
+
+/* FUSEDEVIOCxxx */
+
+/* Get mounter's pid. */
+#define FUSEDEVGETMOUNTERPID _IOR('F', 1, u_int32_t)
+
+/* Check if FUSE_INIT kernel-user handshake is complete. */
+#define FUSEDEVIOCGETHANDSHAKECOMPLETE _IOR('F', 2, u_int32_t)
+
+/* Mark the daemon as dead. */
+#define FUSEDEVIOCSETDAEMONDEAD _IOW('F', 3, u_int32_t)
+
+/* Tell the kernel which operations the daemon implements. */
+#define FUSEDEVIOCSETIMPLEMENTEDBITS _IOW('F', 4, u_int64_t)
+
+/* Get device's random "secret". */
+#define FUSEDEVIOCGETRANDOM _IOR('F', 5, u_int32_t)
+
+/*
+ * The 'AVFI' (alter-vnode-for-inode) ioctls all require an inode number
+ * as an argument. In the user-space library, you can get the inode number
+ * from a path by using fuse_lookup_inode_by_path_np() [lib/fuse.c].
+ *
+ * To see an example of using this, see the implementation of
+ * fuse_purge_path_np() in lib/fuse_darwin.c.
+ */
+
+struct fuse_avfi_ioctl {
+ uint64_t inode;
+ uint64_t cmd;
+ uint32_t ubc_flags;
+ uint32_t note;
+ off_t size;
+};
+
+/* Alter the vnode (if any) specified by the given inode. */
+#define FUSEDEVIOCALTERVNODEFORINODE _IOW('F', 6, struct fuse_avfi_ioctl)
+#define FSCTLALTERVNODEFORINODE IOCBASECMD(FUSEDEVIOCALTERVNODEFORINODE)
+
+/*
+ * Possible cmd values for AVFI.
+ */
+
+#define FUSE_AVFI_MARKGONE 0x00000001 /* no ubc_flags */
+#define FUSE_AVFI_PURGEATTRCACHE 0x00000002 /* no ubc_flags */
+#define FUSE_AVFI_PURGEVNCACHE 0x00000004 /* no ubc_flags */
+#define FUSE_AVFI_UBC 0x00000008 /* uses ubc_flags */
+#define FUSE_AVFI_UBC_SETSIZE 0x00000010 /* uses ubc_flags, size */
+#define FUSE_AVFI_KNOTE 0x00000020 /* uses note */
+
+#define FUSE_SETACLSTATE _IOW('h', 10, int32_t)
+#define FSCTLSETACLSTATE IOCBASECMD(FUSE_SETACLSTATE)
+
+#endif /* _FUSE_IOCTL_H_ */
diff --git a/contrib/macfuse/fuse_param.h b/contrib/macfuse/fuse_param.h
new file mode 100644
index 000000000..81d753c6c
--- /dev/null
+++ b/contrib/macfuse/fuse_param.h
@@ -0,0 +1,147 @@
+/*
+ * Copyright (C) 2006-2008 Google. All Rights Reserved.
+ * Amit Singh <singh@>
+ */
+
+#ifndef _FUSE_PARAM_H_
+#define _FUSE_PARAM_H_
+
+/* Compile-time tunables (M_MACFUSE*) */
+
+#define M_MACFUSE_ENABLE_FIFOFS 0
+#define M_MACFUSE_ENABLE_INTERRUPT 1
+#define M_MACFUSE_ENABLE_SPECFS 0
+#define M_MACFUSE_ENABLE_TSLOCKING 0
+#define M_MACFUSE_ENABLE_UNSUPPORTED 1
+#define M_MACFUSE_ENABLE_XATTR 1
+
+#if M_MACFUSE_ENABLE_UNSUPPORTED
+ #define M_MACFUSE_ENABLE_DSELECT 0
+ #define M_MACFUSE_ENABLE_EXCHANGE 1
+ #define M_MACFUSE_ENABLE_KQUEUE 1
+ #define M_MACFUSE_ENABLE_KUNC 0
+#if __LP64__
+ #define M_MACFUSE_ENABLE_INTERIM_FSNODE_LOCK 1
+#endif /* __LP64__ */
+#endif /* M_MACFUSE_ENABLE_UNSUPPORTED */
+
+#if M_MACFUSE_ENABLE_INTERIM_FSNODE_LOCK
+#define FUSE_VNOP_EXPORT __private_extern__
+#else
+#define FUSE_VNOP_EXPORT static
+#endif /* M_MACFUSE_ENABLE_INTERIM_FSNODE_LOCK */
+
+/* User Control */
+
+#define MACFUSE_POSTUNMOUNT_SIGNAL SIGKILL
+
+#define MACOSX_ADMIN_GROUP_NAME "admin"
+
+#define SYSCTL_MACFUSE_TUNABLES_ADMIN "macfuse.tunables.admin_group"
+#define SYSCTL_MACFUSE_VERSION_NUMBER "macfuse.version.number"
+
+/* Paths */
+
+#define MACFUSE_BUNDLE_PATH "/Library/Filesystems/fusefs.fs"
+#define MACFUSE_KEXT MACFUSE_BUNDLE_PATH "/Support/fusefs.kext"
+#define MACFUSE_LOAD_PROG MACFUSE_BUNDLE_PATH "/Support/load_fusefs"
+#define MACFUSE_MOUNT_PROG MACFUSE_BUNDLE_PATH "/Support/mount_fusefs"
+#define SYSTEM_KEXTLOAD "/sbin/kextload"
+#define SYSTEM_KEXTUNLOAD "/sbin/kextunload"
+
+/* Compatible API version */
+
+#define MACFUSE_MIN_USER_VERSION_MAJOR 7
+#define MACFUSE_MIN_USER_VERSION_MINOR 5
+
+/* Device Interface */
+
+/*
+ * This is the prefix ("fuse" by default) of the name of a FUSE device node
+ * in devfs. The suffix is the device number. "/dev/fuse0" is the first FUSE
+ * device by default. If you change the prefix from the default to something
+ * else, the user-space FUSE library will need to know about it too.
+ */
+#define MACFUSE_DEVICE_BASENAME "fuse"
+
+/*
+ * This is the number of /dev/fuse<n> nodes we will create. <n> goes from
+ * 0 to (FUSE_NDEVICES - 1).
+ */
+#define MACFUSE_NDEVICES 24
+
+/*
+ * This is the default block size of the virtual storage devices that are
+ * implicitly implemented by the FUSE kernel extension. This can be changed
+ * on a per-mount basis (there's one such virtual device for each mount).
+ */
+#define FUSE_DEFAULT_BLOCKSIZE 4096
+
+#define FUSE_MIN_BLOCKSIZE 512
+#define FUSE_MAX_BLOCKSIZE MAXPHYS
+
+#ifndef MAX_UPL_TRANSFER
+#define MAX_UPL_TRANSFER 256
+#endif
+
+/*
+ * This is default I/O size used while accessing the virtual storage devices.
+ * This can be changed on a per-mount basis.
+ *
+ * Nevertheless, the I/O size must be at least as big as the block size.
+ */
+#define FUSE_DEFAULT_IOSIZE (16 * PAGE_SIZE)
+
+#define FUSE_MIN_IOSIZE 512
+#define FUSE_MAX_IOSIZE (MAX_UPL_TRANSFER * PAGE_SIZE)
+
+#define FUSE_DEFAULT_INIT_TIMEOUT 10 /* s */
+#define FUSE_MIN_INIT_TIMEOUT 1 /* s */
+#define FUSE_MAX_INIT_TIMEOUT 300 /* s */
+#define FUSE_INIT_WAIT_INTERVAL 100000 /* us */
+
+#define FUSE_INIT_TIMEOUT_DEFAULT_BUTTON_TITLE "OK"
+#define FUSE_INIT_TIMEOUT_NOTICE_MESSAGE \
+ "Timed out waiting for the file system to initialize. The volume has " \
+ "been ejected. You can use the init_timeout mount option to wait longer."
+
+#define FUSE_DEFAULT_DAEMON_TIMEOUT 60 /* s */
+#define FUSE_MIN_DAEMON_TIMEOUT 0 /* s */
+#define FUSE_MAX_DAEMON_TIMEOUT 600 /* s */
+
+#define FUSE_DAEMON_TIMEOUT_DEFAULT_BUTTON_TITLE "Keep Trying"
+#define FUSE_DAEMON_TIMEOUT_OTHER_BUTTON_TITLE "Force Eject"
+#define FUSE_DAEMON_TIMEOUT_ALTERNATE_BUTTON_TITLE "Don't Warn Again"
+#define FUSE_DAEMON_TIMEOUT_ALERT_MESSAGE \
+ "There was a timeout waiting for the file system to respond. You can " \
+ "eject this volume immediately, but unsaved changes may be lost."
+#define FUSE_DAEMON_TIMEOUT_ALERT_TIMEOUT 120 /* s */
+
+#ifdef KERNEL
+
+/*
+ * This is the soft upper limit on the number of "request tickets" FUSE's
+ * user-kernel IPC layer can have for a given mount. This can be modified
+ * through the fuse.* sysctl interface.
+ */
+#define FUSE_DEFAULT_MAX_FREE_TICKETS 1024
+#define FUSE_DEFAULT_IOV_PERMANENT_BUFSIZE (1 << 19)
+#define FUSE_DEFAULT_IOV_CREDIT 16
+
+/* User-Kernel IPC Buffer */
+
+#define FUSE_MIN_USERKERNEL_BUFSIZE (128 * 1024)
+#define FUSE_MAX_USERKERNEL_BUFSIZE (4096 * 1024)
+
+#define FUSE_REASONABLE_XATTRSIZE FUSE_MIN_USERKERNEL_BUFSIZE
+
+#endif /* KERNEL */
+
+#define FUSE_DEFAULT_USERKERNEL_BUFSIZE (4096 * 1024)
+
+#define FUSE_LINK_MAX LINK_MAX
+#define FUSE_UIO_BACKUP_MAX 8
+
+#define FUSE_MAXNAMLEN 255
+
+#endif /* _FUSE_PARAM_H_ */
diff --git a/contrib/macfuse/mount_darwin.c b/contrib/macfuse/mount_darwin.c
new file mode 100644
index 000000000..c485583e9
--- /dev/null
+++ b/contrib/macfuse/mount_darwin.c
@@ -0,0 +1,357 @@
+/*
+ * Derived from mount_bsd.c from the fuse distribution.
+ *
+ * FUSE: Filesystem in Userspace
+ * Copyright (C) 2005-2006 Csaba Henk <csaba.henk@creo.hu>
+ * Copyright (C) 2007-2009 Amit Singh <asingh@gmail.com>
+ * Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
+ *
+ * This program can be distributed under the terms of the GNU LGPLv2.
+ * See the file COPYING.LIB.
+ */
+
+#undef _POSIX_C_SOURCE
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <sys/sysctl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stddef.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <string.h>
+#include <paths.h>
+
+#include <libproc.h>
+#include <sys/utsname.h>
+
+#include <sys/param.h>
+#include <sys/mount.h>
+#include <AssertMacros.h>
+
+#include "fuse_param.h"
+#include "fuse_ioctl.h"
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "common-utils.h"
+
+#define GFFUSE_LOGERR(...) \
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR, ## __VA_ARGS__)
+
+static long
+fuse_os_version_major_np(void)
+{
+ int ret = 0;
+ long major = 0;
+ char *c = NULL;
+ struct utsname u;
+ size_t oldlen;
+
+ oldlen = sizeof(u.release);
+
+ ret = sysctlbyname("kern.osrelease", u.release, &oldlen, NULL, 0);
+ if (ret != 0) {
+ return -1;
+ }
+
+ c = strchr(u.release, '.');
+ if (c == NULL) {
+ return -1;
+ }
+
+ *c = '\0';
+
+ errno = 0;
+ major = strtol(u.release, NULL, 10);
+ if ((errno == EINVAL) || (errno == ERANGE)) {
+ return -1;
+ }
+
+ return major;
+}
+
+static int
+fuse_running_under_rosetta(void)
+{
+ int result = 0;
+ int is_native = 1;
+ size_t sz = sizeof(result);
+
+ int ret = sysctlbyname("sysctl.proc_native", &result, &sz, NULL, (size_t)0);
+ if ((ret == 0) && !result) {
+ is_native = 0;
+ }
+
+ return !is_native;
+}
+
+static int
+loadkmod(void)
+{
+ int result = -1;
+ int pid, terminated_pid;
+ union wait status;
+ long major;
+
+ major = fuse_os_version_major_np();
+
+ if (major < 9) { /* not Mac OS X 10.5+ */
+ return EINVAL;
+ }
+
+ pid = fork();
+
+ if (pid == 0) {
+ execl(MACFUSE_LOAD_PROG, MACFUSE_LOAD_PROG, NULL);
+
+ /* exec failed */
+ exit(ENOENT);
+ }
+
+ require_action(pid != -1, Return, result = errno);
+
+ while ((terminated_pid = wait4(pid, (int *)&status, 0, NULL)) < 0) {
+ /* retry if EINTR, else break out with error */
+ if (errno != EINTR) {
+ break;
+ }
+ }
+
+ if ((terminated_pid == pid) && (WIFEXITED(status))) {
+ result = WEXITSTATUS(status);
+ } else {
+ result = -1;
+ }
+
+Return:
+ check_noerr_string(result, strerror(errno));
+
+ return result;
+}
+
+int
+gf_fuse_mount (const char *mountpoint, char *fsname, char *mnt_param,
+ pid_t *mtab_pid /* not used on OS X */)
+{
+ int fd, pid;
+ int result;
+ char *fdnam, *dev;
+ const char *mountprog = MACFUSE_MOUNT_PROG;
+ sig_t chldf;
+
+ /* mount_fusefs should not try to spawn the daemon */
+ setenv("MOUNT_FUSEFS_SAFE", "1", 1);
+
+ /* to notify mount_fusefs it's called from lib */
+ setenv("MOUNT_FUSEFS_CALL_BY_LIB", "1", 1);
+
+ if (!mountpoint) {
+ fprintf(stderr, "missing or invalid mount point\n");
+ return -1;
+ }
+
+ if (fuse_running_under_rosetta()) {
+ fprintf(stderr, "MacFUSE does not work under Rosetta\n");
+ return -1;
+ }
+
+ chldf = signal(SIGCHLD, SIG_DFL); /* So that we can wait4() below. */
+
+ result = loadkmod();
+ if (result == EINVAL)
+ GFFUSE_LOGERR("OS X >= 10.5 (at least Leopard) required");
+ else if (result == 0 || result == ENOENT || result == EBUSY) {
+ /* Module loaded, but now need to check for user<->kernel match. */
+
+ char version[MAXHOSTNAMELEN + 1] = { 0 };
+ size_t version_len = MAXHOSTNAMELEN;
+ size_t version_len_desired = 0;
+
+ result = sysctlbyname(SYSCTL_MACFUSE_VERSION_NUMBER, version,
+ &version_len, NULL, (size_t)0);
+ if (result == 0) {
+ /* sysctlbyname() includes the trailing '\0' in version_len */
+ version_len_desired = strlen("2.x.y") + 1;
+
+ if (version_len != version_len_desired)
+ result = -1;
+ } else
+ strcpy(version, "?.?.?");
+ if (result == 0) {
+ char *ep;
+ char vstr[4];
+ unsigned vval;
+ int i;
+
+ for (i = 0; i < 3; i++)
+ vstr[i] = version[2*i];
+ vstr[3] = '\0';
+
+ vval = strtoul(vstr, &ep, 10);
+ if (*ep || vval < 203 || vval > 217)
+ result = -1;
+ else
+ gf_log("glusterfs-fuse", GF_LOG_INFO,
+ "MacFUSE kext version %s", version);
+ }
+ if (result != 0)
+ GFFUSE_LOGERR("MacFUSE version %s is not supported", version);
+ } else
+ GFFUSE_LOGERR("cannot load MacFUSE kext");
+ if (result != 0)
+ return -1;
+
+ fdnam = getenv("FUSE_DEV_FD");
+
+ if (fdnam) {
+ char *ep;
+
+ fd = strtol(fdnam, &ep, 10);
+ if (*ep != '\0' || fd < 0) {
+ GFFUSE_LOGERR("invalid value given in FUSE_DEV_FD");
+ return -1;
+ }
+
+ goto mount;
+ }
+
+ dev = getenv("FUSE_DEV_NAME");
+ if (dev) {
+ if ((fd = open(dev, O_RDWR)) < 0) {
+ GFFUSE_LOGERR("failed to open device (%s)", strerror(errno));
+ return -1;
+ }
+ } else {
+ int r, devidx = -1;
+ char devpath[MAXPATHLEN];
+
+ for (r = 0; r < MACFUSE_NDEVICES; r++) {
+ snprintf(devpath, MAXPATHLEN - 1,
+ _PATH_DEV MACFUSE_DEVICE_BASENAME "%d", r);
+ fd = open(devpath, O_RDWR);
+ if (fd >= 0) {
+ dev = devpath;
+ devidx = r;
+ break;
+ }
+ }
+ if (devidx == -1) {
+ GFFUSE_LOGERR("failed to open device (%s)", strerror(errno));
+ return -1;
+ }
+ }
+
+mount:
+ if (getenv("FUSE_NO_MOUNT") || ! mountpoint)
+ goto out;
+
+ signal(SIGCHLD, chldf);
+
+ pid = fork();
+
+ if (pid == -1) {
+ GFFUSE_LOGERR("fork() failed (%s)", strerror(errno));
+ close(fd);
+ return -1;
+ }
+
+ if (pid == 0) {
+
+ pid = fork();
+ if (pid == -1) {
+ GFFUSE_LOGERR("fork() failed (%s)", strerror(errno));
+ close(fd);
+ exit(1);
+ }
+
+ if (pid == 0) {
+ const char *argv[32];
+ int a = 0;
+ char *opts = NULL;
+
+ if (asprintf(&opts, "%s,fssubtype=glusterfs", mnt_param) == -1) {
+ GFFUSE_LOGERR("Out of memory");
+ exit(1);
+ }
+
+ if (! fdnam)
+ asprintf(&fdnam, "%d", fd);
+
+ argv[a++] = mountprog;
+ if (opts) {
+ argv[a++] = "-o";
+ argv[a++] = opts;
+ }
+ argv[a++] = fdnam;
+ argv[a++] = mountpoint;
+ argv[a++] = NULL;
+
+ {
+ char title[MAXPATHLEN + 1] = { 0 };
+ u_int32_t len = MAXPATHLEN;
+ int ret = proc_pidpath(getpid(), title, len);
+ if (ret) {
+ setenv("MOUNT_FUSEFS_DAEMON_PATH", title, 1);
+ }
+ }
+ execvp(mountprog, (char **) argv);
+ GFFUSE_LOGERR("MacFUSE: failed to exec mount program (%s)", strerror(errno));
+ exit(1);
+ }
+
+ _exit(0);
+ }
+
+out:
+ return fd;
+}
+
+void
+gf_fuse_unmount(const char *mountpoint, int fd)
+{
+ int ret;
+ struct stat sbuf;
+ char dev[128];
+ char resolved_path[PATH_MAX];
+ char *ep, *rp = NULL;
+
+ unsigned int hs_complete = 0;
+
+ ret = ioctl(fd, FUSEDEVIOCGETHANDSHAKECOMPLETE, &hs_complete);
+ if (ret || !hs_complete) {
+ return;
+ }
+ /* XXX does this have any use here? */
+ ret = ioctl(fd, FUSEDEVIOCSETDAEMONDEAD, &fd);
+ if (ret) {
+ return;
+ }
+
+ if (fstat(fd, &sbuf) == -1) {
+ return;
+ }
+
+ devname_r(sbuf.st_rdev, S_IFCHR, dev, 128);
+
+ if (strncmp(dev, MACFUSE_DEVICE_BASENAME,
+ sizeof(MACFUSE_DEVICE_BASENAME) - 1)) {
+ return;
+ }
+
+ strtol(dev + 4, &ep, 10);
+ if (*ep != '\0') {
+ return;
+ }
+
+ rp = realpath(mountpoint, resolved_path);
+ if (rp) {
+ ret = unmount(resolved_path, 0);
+ }
+
+ close(fd);
+
+ return;
+}
diff --git a/contrib/qemu/block.c b/contrib/qemu/block.c
new file mode 100644
index 000000000..b56024113
--- /dev/null
+++ b/contrib/qemu/block.c
@@ -0,0 +1,4604 @@
+/*
+ * QEMU System Emulator block driver
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "config-host.h"
+#include "qemu-common.h"
+#include "trace.h"
+#include "monitor/monitor.h"
+#include "block/block_int.h"
+#include "block/blockjob.h"
+#include "qemu/module.h"
+#include "qapi/qmp/qjson.h"
+#include "sysemu/sysemu.h"
+#include "qemu/notify.h"
+#include "block/coroutine.h"
+#include "qmp-commands.h"
+#include "qemu/timer.h"
+
+#ifdef CONFIG_BSD
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/queue.h>
+#ifndef __DragonFly__
+#include <sys/disk.h>
+#endif
+#endif
+
+#ifdef _WIN32
+#include <windows.h>
+#endif
+
+#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
+
+typedef enum {
+ BDRV_REQ_COPY_ON_READ = 0x1,
+ BDRV_REQ_ZERO_WRITE = 0x2,
+} BdrvRequestFlags;
+
+static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
+static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque);
+static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque);
+static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ QEMUIOVector *iov);
+static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ QEMUIOVector *iov);
+static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
+ BdrvRequestFlags flags);
+static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
+ BdrvRequestFlags flags);
+static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
+ int64_t sector_num,
+ QEMUIOVector *qiov,
+ int nb_sectors,
+ BlockDriverCompletionFunc *cb,
+ void *opaque,
+ bool is_write);
+static void coroutine_fn bdrv_co_do_rw(void *opaque);
+static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors);
+
+static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
+ bool is_write, double elapsed_time, uint64_t *wait);
+static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
+ double elapsed_time, uint64_t *wait);
+static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
+ bool is_write, int64_t *wait);
+
+static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
+ QTAILQ_HEAD_INITIALIZER(bdrv_states);
+
+static QLIST_HEAD(, BlockDriver) bdrv_drivers =
+ QLIST_HEAD_INITIALIZER(bdrv_drivers);
+
+/* If non-zero, use only whitelisted block drivers */
+static int use_bdrv_whitelist;
+
+#ifdef _WIN32
+static int is_windows_drive_prefix(const char *filename)
+{
+ return (((filename[0] >= 'a' && filename[0] <= 'z') ||
+ (filename[0] >= 'A' && filename[0] <= 'Z')) &&
+ filename[1] == ':');
+}
+
+int is_windows_drive(const char *filename)
+{
+ if (is_windows_drive_prefix(filename) &&
+ filename[2] == '\0')
+ return 1;
+ if (strstart(filename, "\\\\.\\", NULL) ||
+ strstart(filename, "//./", NULL))
+ return 1;
+ return 0;
+}
+#endif
+
+/* throttling disk I/O limits */
+void bdrv_io_limits_disable(BlockDriverState *bs)
+{
+ bs->io_limits_enabled = false;
+
+ while (qemu_co_queue_next(&bs->throttled_reqs));
+
+ if (bs->block_timer) {
+ qemu_del_timer(bs->block_timer);
+ qemu_free_timer(bs->block_timer);
+ bs->block_timer = NULL;
+ }
+
+ bs->slice_start = 0;
+ bs->slice_end = 0;
+}
+
+static void bdrv_block_timer(void *opaque)
+{
+ BlockDriverState *bs = opaque;
+
+ qemu_co_queue_next(&bs->throttled_reqs);
+}
+
+void bdrv_io_limits_enable(BlockDriverState *bs)
+{
+ qemu_co_queue_init(&bs->throttled_reqs);
+ bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
+ bs->io_limits_enabled = true;
+}
+
+bool bdrv_io_limits_enabled(BlockDriverState *bs)
+{
+ BlockIOLimit *io_limits = &bs->io_limits;
+ return io_limits->bps[BLOCK_IO_LIMIT_READ]
+ || io_limits->bps[BLOCK_IO_LIMIT_WRITE]
+ || io_limits->bps[BLOCK_IO_LIMIT_TOTAL]
+ || io_limits->iops[BLOCK_IO_LIMIT_READ]
+ || io_limits->iops[BLOCK_IO_LIMIT_WRITE]
+ || io_limits->iops[BLOCK_IO_LIMIT_TOTAL];
+}
+
+static void bdrv_io_limits_intercept(BlockDriverState *bs,
+ bool is_write, int nb_sectors)
+{
+ int64_t wait_time = -1;
+
+ if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
+ qemu_co_queue_wait(&bs->throttled_reqs);
+ }
+
+ /* In fact, we hope to keep each request's timing, in FIFO mode. The next
+ * throttled requests will not be dequeued until the current request is
+ * allowed to be serviced. So if the current request still exceeds the
+ * limits, it will be inserted to the head. All requests followed it will
+ * be still in throttled_reqs queue.
+ */
+
+ while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) {
+ qemu_mod_timer(bs->block_timer,
+ wait_time + qemu_get_clock_ns(vm_clock));
+ qemu_co_queue_wait_insert_head(&bs->throttled_reqs);
+ }
+
+ qemu_co_queue_next(&bs->throttled_reqs);
+}
+
+/* check if the path starts with "<protocol>:" */
+static int path_has_protocol(const char *path)
+{
+ const char *p;
+
+#ifdef _WIN32
+ if (is_windows_drive(path) ||
+ is_windows_drive_prefix(path)) {
+ return 0;
+ }
+ p = path + strcspn(path, ":/\\");
+#else
+ p = path + strcspn(path, ":/");
+#endif
+
+ return *p == ':';
+}
+
+int path_is_absolute(const char *path)
+{
+#ifdef _WIN32
+ /* specific case for names like: "\\.\d:" */
+ if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
+ return 1;
+ }
+ return (*path == '/' || *path == '\\');
+#else
+ return (*path == '/');
+#endif
+}
+
+/* if filename is absolute, just copy it to dest. Otherwise, build a
+ path to it by considering it is relative to base_path. URL are
+ supported. */
+void path_combine(char *dest, int dest_size,
+ const char *base_path,
+ const char *filename)
+{
+ const char *p, *p1;
+ int len;
+
+ if (dest_size <= 0)
+ return;
+ if (path_is_absolute(filename)) {
+ pstrcpy(dest, dest_size, filename);
+ } else {
+ p = strchr(base_path, ':');
+ if (p)
+ p++;
+ else
+ p = base_path;
+ p1 = strrchr(base_path, '/');
+#ifdef _WIN32
+ {
+ const char *p2;
+ p2 = strrchr(base_path, '\\');
+ if (!p1 || p2 > p1)
+ p1 = p2;
+ }
+#endif
+ if (p1)
+ p1++;
+ else
+ p1 = base_path;
+ if (p1 > p)
+ p = p1;
+ len = p - base_path;
+ if (len > dest_size - 1)
+ len = dest_size - 1;
+ memcpy(dest, base_path, len);
+ dest[len] = '\0';
+ pstrcat(dest, dest_size, filename);
+ }
+}
+
+void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz)
+{
+ if (bs->backing_file[0] == '\0' || path_has_protocol(bs->backing_file)) {
+ pstrcpy(dest, sz, bs->backing_file);
+ } else {
+ path_combine(dest, sz, bs->filename, bs->backing_file);
+ }
+}
+
+void bdrv_register(BlockDriver *bdrv)
+{
+ /* Block drivers without coroutine functions need emulation */
+ if (!bdrv->bdrv_co_readv) {
+ bdrv->bdrv_co_readv = bdrv_co_readv_em;
+ bdrv->bdrv_co_writev = bdrv_co_writev_em;
+
+ /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
+ * the block driver lacks aio we need to emulate that too.
+ */
+ if (!bdrv->bdrv_aio_readv) {
+ /* add AIO emulation layer */
+ bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
+ bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
+ }
+ }
+
+ QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
+}
+
+/* create a new block device (by default it is empty) */
+BlockDriverState *bdrv_new(const char *device_name)
+{
+ BlockDriverState *bs;
+
+ bs = g_malloc0(sizeof(BlockDriverState));
+ pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
+ if (device_name[0] != '\0') {
+ QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
+ }
+ bdrv_iostatus_disable(bs);
+ notifier_list_init(&bs->close_notifiers);
+ notifier_with_return_list_init(&bs->before_write_notifiers);
+
+ return bs;
+}
+
+void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
+{
+ notifier_list_add(&bs->close_notifiers, notify);
+}
+
+BlockDriver *bdrv_find_format(const char *format_name)
+{
+ BlockDriver *drv1;
+ QLIST_FOREACH(drv1, &bdrv_drivers, list) {
+ if (!strcmp(drv1->format_name, format_name)) {
+ return drv1;
+ }
+ }
+ return NULL;
+}
+
+static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
+{
+ static const char *whitelist_rw[] = {
+ CONFIG_BDRV_RW_WHITELIST
+ };
+ static const char *whitelist_ro[] = {
+ CONFIG_BDRV_RO_WHITELIST
+ };
+ const char **p;
+
+ if (!whitelist_rw[0] && !whitelist_ro[0]) {
+ return 1; /* no whitelist, anything goes */
+ }
+
+ for (p = whitelist_rw; *p; p++) {
+ if (!strcmp(drv->format_name, *p)) {
+ return 1;
+ }
+ }
+ if (read_only) {
+ for (p = whitelist_ro; *p; p++) {
+ if (!strcmp(drv->format_name, *p)) {
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
+
+BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
+ bool read_only)
+{
+ BlockDriver *drv = bdrv_find_format(format_name);
+ return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
+}
+
+typedef struct CreateCo {
+ BlockDriver *drv;
+ char *filename;
+ QEMUOptionParameter *options;
+ int ret;
+} CreateCo;
+
+static void coroutine_fn bdrv_create_co_entry(void *opaque)
+{
+ CreateCo *cco = opaque;
+ assert(cco->drv);
+
+ cco->ret = cco->drv->bdrv_create(cco->filename, cco->options);
+}
+
+int bdrv_create(BlockDriver *drv, const char* filename,
+ QEMUOptionParameter *options)
+{
+ int ret;
+
+ Coroutine *co;
+ CreateCo cco = {
+ .drv = drv,
+ .filename = g_strdup(filename),
+ .options = options,
+ .ret = NOT_DONE,
+ };
+
+ if (!drv->bdrv_create) {
+ ret = -ENOTSUP;
+ goto out;
+ }
+
+ if (qemu_in_coroutine()) {
+ /* Fast-path if already in coroutine context */
+ bdrv_create_co_entry(&cco);
+ } else {
+ co = qemu_coroutine_create(bdrv_create_co_entry);
+ qemu_coroutine_enter(co, &cco);
+ while (cco.ret == NOT_DONE) {
+ qemu_aio_wait();
+ }
+ }
+
+ ret = cco.ret;
+
+out:
+ g_free(cco.filename);
+ return ret;
+}
+
+int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
+{
+ BlockDriver *drv;
+
+ drv = bdrv_find_protocol(filename, true);
+ if (drv == NULL) {
+ return -ENOENT;
+ }
+
+ return bdrv_create(drv, filename, options);
+}
+
+/*
+ * Create a uniquely-named empty temporary file.
+ * Return 0 upon success, otherwise a negative errno value.
+ */
+int get_tmp_filename(char *filename, int size)
+{
+#ifdef _WIN32
+ char temp_dir[MAX_PATH];
+ /* GetTempFileName requires that its output buffer (4th param)
+ have length MAX_PATH or greater. */
+ assert(size >= MAX_PATH);
+ return (GetTempPath(MAX_PATH, temp_dir)
+ && GetTempFileName(temp_dir, "qem", 0, filename)
+ ? 0 : -GetLastError());
+#else
+ int fd;
+ const char *tmpdir;
+ tmpdir = getenv("TMPDIR");
+ if (!tmpdir)
+ tmpdir = "/tmp";
+ if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
+ return -EOVERFLOW;
+ }
+ fd = mkstemp(filename);
+ if (fd < 0) {
+ return -errno;
+ }
+ if (close(fd) != 0) {
+ unlink(filename);
+ return -errno;
+ }
+ return 0;
+#endif
+}
+
+/*
+ * Detect host devices. By convention, /dev/cdrom[N] is always
+ * recognized as a host CDROM.
+ */
+static BlockDriver *find_hdev_driver(const char *filename)
+{
+ int score_max = 0, score;
+ BlockDriver *drv = NULL, *d;
+
+ QLIST_FOREACH(d, &bdrv_drivers, list) {
+ if (d->bdrv_probe_device) {
+ score = d->bdrv_probe_device(filename);
+ if (score > score_max) {
+ score_max = score;
+ drv = d;
+ }
+ }
+ }
+
+ return drv;
+}
+
+BlockDriver *bdrv_find_protocol(const char *filename,
+ bool allow_protocol_prefix)
+{
+ BlockDriver *drv1;
+ char protocol[128];
+ int len;
+ const char *p;
+
+ /* TODO Drivers without bdrv_file_open must be specified explicitly */
+
+ /*
+ * XXX(hch): we really should not let host device detection
+ * override an explicit protocol specification, but moving this
+ * later breaks access to device names with colons in them.
+ * Thanks to the brain-dead persistent naming schemes on udev-
+ * based Linux systems those actually are quite common.
+ */
+ drv1 = find_hdev_driver(filename);
+ if (drv1) {
+ return drv1;
+ }
+
+ if (!path_has_protocol(filename) || !allow_protocol_prefix) {
+ return bdrv_find_format("file");
+ }
+
+ p = strchr(filename, ':');
+ assert(p != NULL);
+ len = p - filename;
+ if (len > sizeof(protocol) - 1)
+ len = sizeof(protocol) - 1;
+ memcpy(protocol, filename, len);
+ protocol[len] = '\0';
+ QLIST_FOREACH(drv1, &bdrv_drivers, list) {
+ if (drv1->protocol_name &&
+ !strcmp(drv1->protocol_name, protocol)) {
+ return drv1;
+ }
+ }
+ return NULL;
+}
+
+static int find_image_format(BlockDriverState *bs, const char *filename,
+ BlockDriver **pdrv)
+{
+ int score, score_max;
+ BlockDriver *drv1, *drv;
+ uint8_t buf[2048];
+ int ret = 0;
+
+ /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
+ if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
+ drv = bdrv_find_format("raw");
+ if (!drv) {
+ ret = -ENOENT;
+ }
+ *pdrv = drv;
+ return ret;
+ }
+
+ ret = bdrv_pread(bs, 0, buf, sizeof(buf));
+ if (ret < 0) {
+ *pdrv = NULL;
+ return ret;
+ }
+
+ score_max = 0;
+ drv = NULL;
+ QLIST_FOREACH(drv1, &bdrv_drivers, list) {
+ if (drv1->bdrv_probe) {
+ score = drv1->bdrv_probe(buf, ret, filename);
+ if (score > score_max) {
+ score_max = score;
+ drv = drv1;
+ }
+ }
+ }
+ if (!drv) {
+ ret = -ENOENT;
+ }
+ *pdrv = drv;
+ return ret;
+}
+
+/**
+ * Set the current 'total_sectors' value
+ */
+static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
+{
+ BlockDriver *drv = bs->drv;
+
+ /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
+ if (bs->sg)
+ return 0;
+
+ /* query actual device if possible, otherwise just trust the hint */
+ if (drv->bdrv_getlength) {
+ int64_t length = drv->bdrv_getlength(bs);
+ if (length < 0) {
+ return length;
+ }
+ hint = length >> BDRV_SECTOR_BITS;
+ }
+
+ bs->total_sectors = hint;
+ return 0;
+}
+
+/**
+ * Set open flags for a given discard mode
+ *
+ * Return 0 on success, -1 if the discard mode was invalid.
+ */
+int bdrv_parse_discard_flags(const char *mode, int *flags)
+{
+ *flags &= ~BDRV_O_UNMAP;
+
+ if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
+ /* do nothing */
+ } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
+ *flags |= BDRV_O_UNMAP;
+ } else {
+ return -1;
+ }
+
+ return 0;
+}
+
+/**
+ * Set open flags for a given cache mode
+ *
+ * Return 0 on success, -1 if the cache mode was invalid.
+ */
+int bdrv_parse_cache_flags(const char *mode, int *flags)
+{
+ *flags &= ~BDRV_O_CACHE_MASK;
+
+ if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
+ *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
+ } else if (!strcmp(mode, "directsync")) {
+ *flags |= BDRV_O_NOCACHE;
+ } else if (!strcmp(mode, "writeback")) {
+ *flags |= BDRV_O_CACHE_WB;
+ } else if (!strcmp(mode, "unsafe")) {
+ *flags |= BDRV_O_CACHE_WB;
+ *flags |= BDRV_O_NO_FLUSH;
+ } else if (!strcmp(mode, "writethrough")) {
+ /* this is the default */
+ } else {
+ return -1;
+ }
+
+ return 0;
+}
+
+/**
+ * The copy-on-read flag is actually a reference count so multiple users may
+ * use the feature without worrying about clobbering its previous state.
+ * Copy-on-read stays enabled until all users have called to disable it.
+ */
+void bdrv_enable_copy_on_read(BlockDriverState *bs)
+{
+ bs->copy_on_read++;
+}
+
+void bdrv_disable_copy_on_read(BlockDriverState *bs)
+{
+ assert(bs->copy_on_read > 0);
+ bs->copy_on_read--;
+}
+
+static int bdrv_open_flags(BlockDriverState *bs, int flags)
+{
+ int open_flags = flags | BDRV_O_CACHE_WB;
+
+ /*
+ * Clear flags that are internal to the block layer before opening the
+ * image.
+ */
+ open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
+
+ /*
+ * Snapshots should be writable.
+ */
+ if (bs->is_temporary) {
+ open_flags |= BDRV_O_RDWR;
+ }
+
+ return open_flags;
+}
+
+/*
+ * Common part for opening disk images and files
+ *
+ * Removes all processed options from *options.
+ */
+static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
+ QDict *options, int flags, BlockDriver *drv)
+{
+ int ret, open_flags;
+ const char *filename;
+
+ assert(drv != NULL);
+ assert(bs->file == NULL);
+ assert(options != NULL && bs->options != options);
+
+ if (file != NULL) {
+ filename = file->filename;
+ } else {
+ filename = qdict_get_try_str(options, "filename");
+ }
+
+ trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
+
+ /* bdrv_open() with directly using a protocol as drv. This layer is already
+ * opened, so assign it to bs (while file becomes a closed BlockDriverState)
+ * and return immediately. */
+ if (file != NULL && drv->bdrv_file_open) {
+ bdrv_swap(file, bs);
+ return 0;
+ }
+
+ bs->open_flags = flags;
+ bs->buffer_alignment = 512;
+ open_flags = bdrv_open_flags(bs, flags);
+ bs->read_only = !(open_flags & BDRV_O_RDWR);
+
+ if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
+ return -ENOTSUP;
+ }
+
+ assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
+ if (!bs->read_only && (flags & BDRV_O_COPY_ON_READ)) {
+ bdrv_enable_copy_on_read(bs);
+ }
+
+ if (filename != NULL) {
+ pstrcpy(bs->filename, sizeof(bs->filename), filename);
+ } else {
+ bs->filename[0] = '\0';
+ }
+
+ bs->drv = drv;
+ bs->opaque = g_malloc0(drv->instance_size);
+
+ bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
+
+ /* Open the image, either directly or using a protocol */
+ if (drv->bdrv_file_open) {
+ assert(file == NULL);
+ assert(drv->bdrv_parse_filename || filename != NULL);
+ ret = drv->bdrv_file_open(bs, options, open_flags);
+ } else {
+ if (file == NULL) {
+ qerror_report(ERROR_CLASS_GENERIC_ERROR, "Can't use '%s' as a "
+ "block driver for the protocol level",
+ drv->format_name);
+ ret = -EINVAL;
+ goto free_and_fail;
+ }
+ assert(file != NULL);
+ bs->file = file;
+ ret = drv->bdrv_open(bs, options, open_flags);
+ }
+
+ if (ret < 0) {
+ goto free_and_fail;
+ }
+
+ ret = refresh_total_sectors(bs, bs->total_sectors);
+ if (ret < 0) {
+ goto free_and_fail;
+ }
+
+#ifndef _WIN32
+ if (bs->is_temporary) {
+ assert(filename != NULL);
+ unlink(filename);
+ }
+#endif
+ return 0;
+
+free_and_fail:
+ bs->file = NULL;
+ g_free(bs->opaque);
+ bs->opaque = NULL;
+ bs->drv = NULL;
+ return ret;
+}
+
+/*
+ * Opens a file using a protocol (file, host_device, nbd, ...)
+ *
+ * options is a QDict of options to pass to the block drivers, or NULL for an
+ * empty set of options. The reference to the QDict belongs to the block layer
+ * after the call (even on failure), so if the caller intends to reuse the
+ * dictionary, it needs to use QINCREF() before calling bdrv_file_open.
+ */
+int bdrv_file_open(BlockDriverState **pbs, const char *filename,
+ QDict *options, int flags)
+{
+ BlockDriverState *bs;
+ BlockDriver *drv;
+ const char *drvname;
+ bool allow_protocol_prefix = false;
+ int ret;
+
+ /* NULL means an empty set of options */
+ if (options == NULL) {
+ options = qdict_new();
+ }
+
+ bs = bdrv_new("");
+ bs->options = options;
+ options = qdict_clone_shallow(options);
+
+ /* Fetch the file name from the options QDict if necessary */
+ if (!filename) {
+ filename = qdict_get_try_str(options, "filename");
+ } else if (filename && !qdict_haskey(options, "filename")) {
+ qdict_put(options, "filename", qstring_from_str(filename));
+ allow_protocol_prefix = true;
+ } else {
+ qerror_report(ERROR_CLASS_GENERIC_ERROR, "Can't specify 'file' and "
+ "'filename' options at the same time");
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ /* Find the right block driver */
+ drvname = qdict_get_try_str(options, "driver");
+ if (drvname) {
+ drv = bdrv_find_whitelisted_format(drvname, !(flags & BDRV_O_RDWR));
+ qdict_del(options, "driver");
+ } else if (filename) {
+ drv = bdrv_find_protocol(filename, allow_protocol_prefix);
+ if (!drv) {
+ qerror_report(ERROR_CLASS_GENERIC_ERROR, "Unknown protocol");
+ }
+ } else {
+ qerror_report(ERROR_CLASS_GENERIC_ERROR,
+ "Must specify either driver or file");
+ drv = NULL;
+ }
+
+ if (!drv) {
+ ret = -ENOENT;
+ goto fail;
+ }
+
+ /* Parse the filename and open it */
+ if (drv->bdrv_parse_filename && filename) {
+ Error *local_err = NULL;
+ drv->bdrv_parse_filename(filename, options, &local_err);
+ if (error_is_set(&local_err)) {
+ qerror_report_err(local_err);
+ error_free(local_err);
+ ret = -EINVAL;
+ goto fail;
+ }
+ qdict_del(options, "filename");
+ } else if (!drv->bdrv_parse_filename && !filename) {
+ qerror_report(ERROR_CLASS_GENERIC_ERROR,
+ "The '%s' block driver requires a file name",
+ drv->format_name);
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ ret = bdrv_open_common(bs, NULL, options, flags, drv);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ /* Check if any unknown options were used */
+ if (qdict_size(options) != 0) {
+ const QDictEntry *entry = qdict_first(options);
+ qerror_report(ERROR_CLASS_GENERIC_ERROR, "Block protocol '%s' doesn't "
+ "support the option '%s'",
+ drv->format_name, entry->key);
+ ret = -EINVAL;
+ goto fail;
+ }
+ QDECREF(options);
+
+ bs->growable = 1;
+ *pbs = bs;
+ return 0;
+
+fail:
+ QDECREF(options);
+ if (!bs->drv) {
+ QDECREF(bs->options);
+ }
+ bdrv_delete(bs);
+ return ret;
+}
+
+/*
+ * Opens the backing file for a BlockDriverState if not yet open
+ *
+ * options is a QDict of options to pass to the block drivers, or NULL for an
+ * empty set of options. The reference to the QDict is transferred to this
+ * function (even on failure), so if the caller intends to reuse the dictionary,
+ * it needs to use QINCREF() before calling bdrv_file_open.
+ */
+int bdrv_open_backing_file(BlockDriverState *bs, QDict *options)
+{
+ char backing_filename[PATH_MAX];
+ int back_flags, ret;
+ BlockDriver *back_drv = NULL;
+
+ if (bs->backing_hd != NULL) {
+ QDECREF(options);
+ return 0;
+ }
+
+ /* NULL means an empty set of options */
+ if (options == NULL) {
+ options = qdict_new();
+ }
+
+ bs->open_flags &= ~BDRV_O_NO_BACKING;
+ if (qdict_haskey(options, "file.filename")) {
+ backing_filename[0] = '\0';
+ } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
+ QDECREF(options);
+ return 0;
+ }
+
+ bs->backing_hd = bdrv_new("");
+ bdrv_get_full_backing_filename(bs, backing_filename,
+ sizeof(backing_filename));
+
+ if (bs->backing_format[0] != '\0') {
+ back_drv = bdrv_find_format(bs->backing_format);
+ }
+
+ /* backing files always opened read-only */
+ back_flags = bs->open_flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT);
+
+ ret = bdrv_open(bs->backing_hd,
+ *backing_filename ? backing_filename : NULL, options,
+ back_flags, back_drv);
+ if (ret < 0) {
+ bdrv_delete(bs->backing_hd);
+ bs->backing_hd = NULL;
+ bs->open_flags |= BDRV_O_NO_BACKING;
+ return ret;
+ }
+ return 0;
+}
+
+static void extract_subqdict(QDict *src, QDict **dst, const char *start)
+{
+ const QDictEntry *entry, *next;
+ const char *p;
+
+ *dst = qdict_new();
+ entry = qdict_first(src);
+
+ while (entry != NULL) {
+ next = qdict_next(src, entry);
+ if (strstart(entry->key, start, &p)) {
+ qobject_incref(entry->value);
+ qdict_put_obj(*dst, p, entry->value);
+ qdict_del(src, entry->key);
+ }
+ entry = next;
+ }
+}
+
+/*
+ * Opens a disk image (raw, qcow2, vmdk, ...)
+ *
+ * options is a QDict of options to pass to the block drivers, or NULL for an
+ * empty set of options. The reference to the QDict belongs to the block layer
+ * after the call (even on failure), so if the caller intends to reuse the
+ * dictionary, it needs to use QINCREF() before calling bdrv_open.
+ */
+int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options,
+ int flags, BlockDriver *drv)
+{
+ int ret;
+ /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
+ char tmp_filename[PATH_MAX + 1];
+ BlockDriverState *file = NULL;
+ QDict *file_options = NULL;
+
+ /* NULL means an empty set of options */
+ if (options == NULL) {
+ options = qdict_new();
+ }
+
+ bs->options = options;
+ options = qdict_clone_shallow(options);
+
+ /* For snapshot=on, create a temporary qcow2 overlay */
+ if (flags & BDRV_O_SNAPSHOT) {
+ BlockDriverState *bs1;
+ int64_t total_size;
+ BlockDriver *bdrv_qcow2;
+ QEMUOptionParameter *create_options;
+ char backing_filename[PATH_MAX];
+
+ if (qdict_size(options) != 0) {
+ error_report("Can't use snapshot=on with driver-specific options");
+ ret = -EINVAL;
+ goto fail;
+ }
+ assert(filename != NULL);
+
+ /* if snapshot, we create a temporary backing file and open it
+ instead of opening 'filename' directly */
+
+ /* if there is a backing file, use it */
+ bs1 = bdrv_new("");
+ ret = bdrv_open(bs1, filename, NULL, 0, drv);
+ if (ret < 0) {
+ bdrv_delete(bs1);
+ goto fail;
+ }
+ total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
+
+ bdrv_delete(bs1);
+
+ ret = get_tmp_filename(tmp_filename, sizeof(tmp_filename));
+ if (ret < 0) {
+ goto fail;
+ }
+
+ /* Real path is meaningless for protocols */
+ if (path_has_protocol(filename)) {
+ snprintf(backing_filename, sizeof(backing_filename),
+ "%s", filename);
+ } else if (!realpath(filename, backing_filename)) {
+ ret = -errno;
+ goto fail;
+ }
+
+ bdrv_qcow2 = bdrv_find_format("qcow2");
+ create_options = parse_option_parameters("", bdrv_qcow2->create_options,
+ NULL);
+
+ set_option_parameter_int(create_options, BLOCK_OPT_SIZE, total_size);
+ set_option_parameter(create_options, BLOCK_OPT_BACKING_FILE,
+ backing_filename);
+ if (drv) {
+ set_option_parameter(create_options, BLOCK_OPT_BACKING_FMT,
+ drv->format_name);
+ }
+
+ ret = bdrv_create(bdrv_qcow2, tmp_filename, create_options);
+ free_option_parameters(create_options);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ filename = tmp_filename;
+ drv = bdrv_qcow2;
+ bs->is_temporary = 1;
+ }
+
+ /* Open image file without format layer */
+ if (flags & BDRV_O_RDWR) {
+ flags |= BDRV_O_ALLOW_RDWR;
+ }
+
+ extract_subqdict(options, &file_options, "file.");
+
+ ret = bdrv_file_open(&file, filename, file_options,
+ bdrv_open_flags(bs, flags | BDRV_O_UNMAP));
+ if (ret < 0) {
+ goto fail;
+ }
+
+ /* Find the right image format driver */
+ if (!drv) {
+ ret = find_image_format(file, filename, &drv);
+ }
+
+ if (!drv) {
+ goto unlink_and_fail;
+ }
+
+ /* Open the image */
+ ret = bdrv_open_common(bs, file, options, flags, drv);
+ if (ret < 0) {
+ goto unlink_and_fail;
+ }
+
+ if (bs->file != file) {
+ bdrv_delete(file);
+ file = NULL;
+ }
+
+ /* If there is a backing file, use it */
+ if ((flags & BDRV_O_NO_BACKING) == 0) {
+ QDict *backing_options;
+
+ extract_subqdict(options, &backing_options, "backing.");
+ ret = bdrv_open_backing_file(bs, backing_options);
+ if (ret < 0) {
+ goto close_and_fail;
+ }
+ }
+
+ /* Check if any unknown options were used */
+ if (qdict_size(options) != 0) {
+ const QDictEntry *entry = qdict_first(options);
+ qerror_report(ERROR_CLASS_GENERIC_ERROR, "Block format '%s' used by "
+ "device '%s' doesn't support the option '%s'",
+ drv->format_name, bs->device_name, entry->key);
+
+ ret = -EINVAL;
+ goto close_and_fail;
+ }
+ QDECREF(options);
+
+ if (!bdrv_key_required(bs)) {
+ bdrv_dev_change_media_cb(bs, true);
+ }
+
+ /* throttling disk I/O limits */
+ if (bs->io_limits_enabled) {
+ bdrv_io_limits_enable(bs);
+ }
+
+ return 0;
+
+unlink_and_fail:
+ if (file != NULL) {
+ bdrv_delete(file);
+ }
+ if (bs->is_temporary) {
+ unlink(filename);
+ }
+fail:
+ QDECREF(bs->options);
+ QDECREF(options);
+ bs->options = NULL;
+ return ret;
+
+close_and_fail:
+ bdrv_close(bs);
+ QDECREF(options);
+ return ret;
+}
+
+typedef struct BlockReopenQueueEntry {
+ bool prepared;
+ BDRVReopenState state;
+ QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
+} BlockReopenQueueEntry;
+
+/*
+ * Adds a BlockDriverState to a simple queue for an atomic, transactional
+ * reopen of multiple devices.
+ *
+ * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
+ * already performed, or alternatively may be NULL a new BlockReopenQueue will
+ * be created and initialized. This newly created BlockReopenQueue should be
+ * passed back in for subsequent calls that are intended to be of the same
+ * atomic 'set'.
+ *
+ * bs is the BlockDriverState to add to the reopen queue.
+ *
+ * flags contains the open flags for the associated bs
+ *
+ * returns a pointer to bs_queue, which is either the newly allocated
+ * bs_queue, or the existing bs_queue being used.
+ *
+ */
+BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
+ BlockDriverState *bs, int flags)
+{
+ assert(bs != NULL);
+
+ BlockReopenQueueEntry *bs_entry;
+ if (bs_queue == NULL) {
+ bs_queue = g_new0(BlockReopenQueue, 1);
+ QSIMPLEQ_INIT(bs_queue);
+ }
+
+ if (bs->file) {
+ bdrv_reopen_queue(bs_queue, bs->file, flags);
+ }
+
+ bs_entry = g_new0(BlockReopenQueueEntry, 1);
+ QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
+
+ bs_entry->state.bs = bs;
+ bs_entry->state.flags = flags;
+
+ return bs_queue;
+}
+
+/*
+ * Reopen multiple BlockDriverStates atomically & transactionally.
+ *
+ * The queue passed in (bs_queue) must have been built up previous
+ * via bdrv_reopen_queue().
+ *
+ * Reopens all BDS specified in the queue, with the appropriate
+ * flags. All devices are prepared for reopen, and failure of any
+ * device will cause all device changes to be abandonded, and intermediate
+ * data cleaned up.
+ *
+ * If all devices prepare successfully, then the changes are committed
+ * to all devices.
+ *
+ */
+int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
+{
+ int ret = -1;
+ BlockReopenQueueEntry *bs_entry, *next;
+ Error *local_err = NULL;
+
+ assert(bs_queue != NULL);
+
+ bdrv_drain_all();
+
+ QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
+ if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
+ error_propagate(errp, local_err);
+ goto cleanup;
+ }
+ bs_entry->prepared = true;
+ }
+
+ /* If we reach this point, we have success and just need to apply the
+ * changes
+ */
+ QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
+ bdrv_reopen_commit(&bs_entry->state);
+ }
+
+ ret = 0;
+
+cleanup:
+ QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
+ if (ret && bs_entry->prepared) {
+ bdrv_reopen_abort(&bs_entry->state);
+ }
+ g_free(bs_entry);
+ }
+ g_free(bs_queue);
+ return ret;
+}
+
+
+/* Reopen a single BlockDriverState with the specified flags. */
+int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
+{
+ int ret = -1;
+ Error *local_err = NULL;
+ BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
+
+ ret = bdrv_reopen_multiple(queue, &local_err);
+ if (local_err != NULL) {
+ error_propagate(errp, local_err);
+ }
+ return ret;
+}
+
+
+/*
+ * Prepares a BlockDriverState for reopen. All changes are staged in the
+ * 'opaque' field of the BDRVReopenState, which is used and allocated by
+ * the block driver layer .bdrv_reopen_prepare()
+ *
+ * bs is the BlockDriverState to reopen
+ * flags are the new open flags
+ * queue is the reopen queue
+ *
+ * Returns 0 on success, non-zero on error. On error errp will be set
+ * as well.
+ *
+ * On failure, bdrv_reopen_abort() will be called to clean up any data.
+ * It is the responsibility of the caller to then call the abort() or
+ * commit() for any other BDS that have been left in a prepare() state
+ *
+ */
+int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
+ Error **errp)
+{
+ int ret = -1;
+ Error *local_err = NULL;
+ BlockDriver *drv;
+
+ assert(reopen_state != NULL);
+ assert(reopen_state->bs->drv != NULL);
+ drv = reopen_state->bs->drv;
+
+ /* if we are to stay read-only, do not allow permission change
+ * to r/w */
+ if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
+ reopen_state->flags & BDRV_O_RDWR) {
+ error_set(errp, QERR_DEVICE_IS_READ_ONLY,
+ reopen_state->bs->device_name);
+ goto error;
+ }
+
+
+ ret = bdrv_flush(reopen_state->bs);
+ if (ret) {
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
+ strerror(-ret));
+ goto error;
+ }
+
+ if (drv->bdrv_reopen_prepare) {
+ ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
+ if (ret) {
+ if (local_err != NULL) {
+ error_propagate(errp, local_err);
+ } else {
+ error_setg(errp, "failed while preparing to reopen image '%s'",
+ reopen_state->bs->filename);
+ }
+ goto error;
+ }
+ } else {
+ /* It is currently mandatory to have a bdrv_reopen_prepare()
+ * handler for each supported drv. */
+ error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
+ drv->format_name, reopen_state->bs->device_name,
+ "reopening of file");
+ ret = -1;
+ goto error;
+ }
+
+ ret = 0;
+
+error:
+ return ret;
+}
+
+/*
+ * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
+ * makes them final by swapping the staging BlockDriverState contents into
+ * the active BlockDriverState contents.
+ */
+void bdrv_reopen_commit(BDRVReopenState *reopen_state)
+{
+ BlockDriver *drv;
+
+ assert(reopen_state != NULL);
+ drv = reopen_state->bs->drv;
+ assert(drv != NULL);
+
+ /* If there are any driver level actions to take */
+ if (drv->bdrv_reopen_commit) {
+ drv->bdrv_reopen_commit(reopen_state);
+ }
+
+ /* set BDS specific flags now */
+ reopen_state->bs->open_flags = reopen_state->flags;
+ reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
+ BDRV_O_CACHE_WB);
+ reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
+}
+
+/*
+ * Abort the reopen, and delete and free the staged changes in
+ * reopen_state
+ */
+void bdrv_reopen_abort(BDRVReopenState *reopen_state)
+{
+ BlockDriver *drv;
+
+ assert(reopen_state != NULL);
+ drv = reopen_state->bs->drv;
+ assert(drv != NULL);
+
+ if (drv->bdrv_reopen_abort) {
+ drv->bdrv_reopen_abort(reopen_state);
+ }
+}
+
+
+void bdrv_close(BlockDriverState *bs)
+{
+ if (bs->job) {
+ block_job_cancel_sync(bs->job);
+ }
+ bdrv_drain_all(); /* complete I/O */
+ bdrv_flush(bs);
+ bdrv_drain_all(); /* in case flush left pending I/O */
+ notifier_list_notify(&bs->close_notifiers, bs);
+
+ if (bs->drv) {
+ if (bs->backing_hd) {
+ bdrv_delete(bs->backing_hd);
+ bs->backing_hd = NULL;
+ }
+ bs->drv->bdrv_close(bs);
+ g_free(bs->opaque);
+#ifdef _WIN32
+ if (bs->is_temporary) {
+ unlink(bs->filename);
+ }
+#endif
+ bs->opaque = NULL;
+ bs->drv = NULL;
+ bs->copy_on_read = 0;
+ bs->backing_file[0] = '\0';
+ bs->backing_format[0] = '\0';
+ bs->total_sectors = 0;
+ bs->encrypted = 0;
+ bs->valid_key = 0;
+ bs->sg = 0;
+ bs->growable = 0;
+ QDECREF(bs->options);
+ bs->options = NULL;
+
+ if (bs->file != NULL) {
+ bdrv_delete(bs->file);
+ bs->file = NULL;
+ }
+ }
+
+ bdrv_dev_change_media_cb(bs, false);
+
+ /*throttling disk I/O limits*/
+ if (bs->io_limits_enabled) {
+ bdrv_io_limits_disable(bs);
+ }
+}
+
+void bdrv_close_all(void)
+{
+ BlockDriverState *bs;
+
+ QTAILQ_FOREACH(bs, &bdrv_states, list) {
+ bdrv_close(bs);
+ }
+}
+
+/*
+ * Wait for pending requests to complete across all BlockDriverStates
+ *
+ * This function does not flush data to disk, use bdrv_flush_all() for that
+ * after calling this function.
+ *
+ * Note that completion of an asynchronous I/O operation can trigger any
+ * number of other I/O operations on other devices---for example a coroutine
+ * can be arbitrarily complex and a constant flow of I/O can come until the
+ * coroutine is complete. Because of this, it is not possible to have a
+ * function to drain a single device's I/O queue.
+ */
+void bdrv_drain_all(void)
+{
+ BlockDriverState *bs;
+ bool busy;
+
+ do {
+ busy = qemu_aio_wait();
+
+ /* FIXME: We do not have timer support here, so this is effectively
+ * a busy wait.
+ */
+ QTAILQ_FOREACH(bs, &bdrv_states, list) {
+ if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
+ qemu_co_queue_restart_all(&bs->throttled_reqs);
+ busy = true;
+ }
+ }
+ } while (busy);
+
+ /* If requests are still pending there is a bug somewhere */
+ QTAILQ_FOREACH(bs, &bdrv_states, list) {
+ assert(QLIST_EMPTY(&bs->tracked_requests));
+ assert(qemu_co_queue_empty(&bs->throttled_reqs));
+ }
+}
+
+/* make a BlockDriverState anonymous by removing from bdrv_state list.
+ Also, NULL terminate the device_name to prevent double remove */
+void bdrv_make_anon(BlockDriverState *bs)
+{
+ if (bs->device_name[0] != '\0') {
+ QTAILQ_REMOVE(&bdrv_states, bs, list);
+ }
+ bs->device_name[0] = '\0';
+}
+
+static void bdrv_rebind(BlockDriverState *bs)
+{
+ if (bs->drv && bs->drv->bdrv_rebind) {
+ bs->drv->bdrv_rebind(bs);
+ }
+}
+
+static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
+ BlockDriverState *bs_src)
+{
+ /* move some fields that need to stay attached to the device */
+ bs_dest->open_flags = bs_src->open_flags;
+
+ /* dev info */
+ bs_dest->dev_ops = bs_src->dev_ops;
+ bs_dest->dev_opaque = bs_src->dev_opaque;
+ bs_dest->dev = bs_src->dev;
+ bs_dest->buffer_alignment = bs_src->buffer_alignment;
+ bs_dest->copy_on_read = bs_src->copy_on_read;
+
+ bs_dest->enable_write_cache = bs_src->enable_write_cache;
+
+ /* i/o timing parameters */
+ bs_dest->slice_start = bs_src->slice_start;
+ bs_dest->slice_end = bs_src->slice_end;
+ bs_dest->slice_submitted = bs_src->slice_submitted;
+ bs_dest->io_limits = bs_src->io_limits;
+ bs_dest->throttled_reqs = bs_src->throttled_reqs;
+ bs_dest->block_timer = bs_src->block_timer;
+ bs_dest->io_limits_enabled = bs_src->io_limits_enabled;
+
+ /* r/w error */
+ bs_dest->on_read_error = bs_src->on_read_error;
+ bs_dest->on_write_error = bs_src->on_write_error;
+
+ /* i/o status */
+ bs_dest->iostatus_enabled = bs_src->iostatus_enabled;
+ bs_dest->iostatus = bs_src->iostatus;
+
+ /* dirty bitmap */
+ bs_dest->dirty_bitmap = bs_src->dirty_bitmap;
+
+ /* job */
+ bs_dest->in_use = bs_src->in_use;
+ bs_dest->job = bs_src->job;
+
+ /* keep the same entry in bdrv_states */
+ pstrcpy(bs_dest->device_name, sizeof(bs_dest->device_name),
+ bs_src->device_name);
+ bs_dest->list = bs_src->list;
+}
+
+/*
+ * Swap bs contents for two image chains while they are live,
+ * while keeping required fields on the BlockDriverState that is
+ * actually attached to a device.
+ *
+ * This will modify the BlockDriverState fields, and swap contents
+ * between bs_new and bs_old. Both bs_new and bs_old are modified.
+ *
+ * bs_new is required to be anonymous.
+ *
+ * This function does not create any image files.
+ */
+void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
+{
+ BlockDriverState tmp;
+
+ /* bs_new must be anonymous and shouldn't have anything fancy enabled */
+ assert(bs_new->device_name[0] == '\0');
+ assert(bs_new->dirty_bitmap == NULL);
+ assert(bs_new->job == NULL);
+ assert(bs_new->dev == NULL);
+ assert(bs_new->in_use == 0);
+ assert(bs_new->io_limits_enabled == false);
+ assert(bs_new->block_timer == NULL);
+
+ tmp = *bs_new;
+ *bs_new = *bs_old;
+ *bs_old = tmp;
+
+ /* there are some fields that should not be swapped, move them back */
+ bdrv_move_feature_fields(&tmp, bs_old);
+ bdrv_move_feature_fields(bs_old, bs_new);
+ bdrv_move_feature_fields(bs_new, &tmp);
+
+ /* bs_new shouldn't be in bdrv_states even after the swap! */
+ assert(bs_new->device_name[0] == '\0');
+
+ /* Check a few fields that should remain attached to the device */
+ assert(bs_new->dev == NULL);
+ assert(bs_new->job == NULL);
+ assert(bs_new->in_use == 0);
+ assert(bs_new->io_limits_enabled == false);
+ assert(bs_new->block_timer == NULL);
+
+ bdrv_rebind(bs_new);
+ bdrv_rebind(bs_old);
+}
+
+/*
+ * Add new bs contents at the top of an image chain while the chain is
+ * live, while keeping required fields on the top layer.
+ *
+ * This will modify the BlockDriverState fields, and swap contents
+ * between bs_new and bs_top. Both bs_new and bs_top are modified.
+ *
+ * bs_new is required to be anonymous.
+ *
+ * This function does not create any image files.
+ */
+void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
+{
+ bdrv_swap(bs_new, bs_top);
+
+ /* The contents of 'tmp' will become bs_top, as we are
+ * swapping bs_new and bs_top contents. */
+ bs_top->backing_hd = bs_new;
+ bs_top->open_flags &= ~BDRV_O_NO_BACKING;
+ pstrcpy(bs_top->backing_file, sizeof(bs_top->backing_file),
+ bs_new->filename);
+ pstrcpy(bs_top->backing_format, sizeof(bs_top->backing_format),
+ bs_new->drv ? bs_new->drv->format_name : "");
+}
+
+void bdrv_delete(BlockDriverState *bs)
+{
+ assert(!bs->dev);
+ assert(!bs->job);
+ assert(!bs->in_use);
+
+ /* remove from list, if necessary */
+ bdrv_make_anon(bs);
+
+ bdrv_close(bs);
+
+ g_free(bs);
+}
+
+int bdrv_attach_dev(BlockDriverState *bs, void *dev)
+/* TODO change to DeviceState *dev when all users are qdevified */
+{
+ if (bs->dev) {
+ return -EBUSY;
+ }
+ bs->dev = dev;
+ bdrv_iostatus_reset(bs);
+ return 0;
+}
+
+/* TODO qdevified devices don't use this, remove when devices are qdevified */
+void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
+{
+ if (bdrv_attach_dev(bs, dev) < 0) {
+ abort();
+ }
+}
+
+void bdrv_detach_dev(BlockDriverState *bs, void *dev)
+/* TODO change to DeviceState *dev when all users are qdevified */
+{
+ assert(bs->dev == dev);
+ bs->dev = NULL;
+ bs->dev_ops = NULL;
+ bs->dev_opaque = NULL;
+ bs->buffer_alignment = 512;
+}
+
+/* TODO change to return DeviceState * when all users are qdevified */
+void *bdrv_get_attached_dev(BlockDriverState *bs)
+{
+ return bs->dev;
+}
+
+void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
+ void *opaque)
+{
+ bs->dev_ops = ops;
+ bs->dev_opaque = opaque;
+}
+
+void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
+ enum MonitorEvent ev,
+ BlockErrorAction action, bool is_read)
+{
+ QObject *data;
+ const char *action_str;
+
+ switch (action) {
+ case BDRV_ACTION_REPORT:
+ action_str = "report";
+ break;
+ case BDRV_ACTION_IGNORE:
+ action_str = "ignore";
+ break;
+ case BDRV_ACTION_STOP:
+ action_str = "stop";
+ break;
+ default:
+ abort();
+ }
+
+ data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
+ bdrv->device_name,
+ action_str,
+ is_read ? "read" : "write");
+ monitor_protocol_event(ev, data);
+
+ qobject_decref(data);
+}
+
+static void bdrv_emit_qmp_eject_event(BlockDriverState *bs, bool ejected)
+{
+ QObject *data;
+
+ data = qobject_from_jsonf("{ 'device': %s, 'tray-open': %i }",
+ bdrv_get_device_name(bs), ejected);
+ monitor_protocol_event(QEVENT_DEVICE_TRAY_MOVED, data);
+
+ qobject_decref(data);
+}
+
+static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
+{
+ if (bs->dev_ops && bs->dev_ops->change_media_cb) {
+ bool tray_was_closed = !bdrv_dev_is_tray_open(bs);
+ bs->dev_ops->change_media_cb(bs->dev_opaque, load);
+ if (tray_was_closed) {
+ /* tray open */
+ bdrv_emit_qmp_eject_event(bs, true);
+ }
+ if (load) {
+ /* tray close */
+ bdrv_emit_qmp_eject_event(bs, false);
+ }
+ }
+}
+
+bool bdrv_dev_has_removable_media(BlockDriverState *bs)
+{
+ return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
+}
+
+void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
+{
+ if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
+ bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
+ }
+}
+
+bool bdrv_dev_is_tray_open(BlockDriverState *bs)
+{
+ if (bs->dev_ops && bs->dev_ops->is_tray_open) {
+ return bs->dev_ops->is_tray_open(bs->dev_opaque);
+ }
+ return false;
+}
+
+static void bdrv_dev_resize_cb(BlockDriverState *bs)
+{
+ if (bs->dev_ops && bs->dev_ops->resize_cb) {
+ bs->dev_ops->resize_cb(bs->dev_opaque);
+ }
+}
+
+bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
+{
+ if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
+ return bs->dev_ops->is_medium_locked(bs->dev_opaque);
+ }
+ return false;
+}
+
+/*
+ * Run consistency checks on an image
+ *
+ * Returns 0 if the check could be completed (it doesn't mean that the image is
+ * free of errors) or -errno when an internal error occurred. The results of the
+ * check are stored in res.
+ */
+int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
+{
+ if (bs->drv->bdrv_check == NULL) {
+ return -ENOTSUP;
+ }
+
+ memset(res, 0, sizeof(*res));
+ return bs->drv->bdrv_check(bs, res, fix);
+}
+
+#define COMMIT_BUF_SECTORS 2048
+
+/* commit COW file into the raw image */
+int bdrv_commit(BlockDriverState *bs)
+{
+ BlockDriver *drv = bs->drv;
+ int64_t sector, total_sectors;
+ int n, ro, open_flags;
+ int ret = 0;
+ uint8_t *buf;
+ char filename[PATH_MAX];
+
+ if (!drv)
+ return -ENOMEDIUM;
+
+ if (!bs->backing_hd) {
+ return -ENOTSUP;
+ }
+
+ if (bdrv_in_use(bs) || bdrv_in_use(bs->backing_hd)) {
+ return -EBUSY;
+ }
+
+ ro = bs->backing_hd->read_only;
+ /* Use pstrcpy (not strncpy): filename must be NUL-terminated. */
+ pstrcpy(filename, sizeof(filename), bs->backing_hd->filename);
+ open_flags = bs->backing_hd->open_flags;
+
+ if (ro) {
+ if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
+ return -EACCES;
+ }
+ }
+
+ total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
+ buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
+
+ for (sector = 0; sector < total_sectors; sector += n) {
+ if (bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
+
+ if (bdrv_read(bs, sector, buf, n) != 0) {
+ ret = -EIO;
+ goto ro_cleanup;
+ }
+
+ if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
+ ret = -EIO;
+ goto ro_cleanup;
+ }
+ }
+ }
+
+ if (drv->bdrv_make_empty) {
+ ret = drv->bdrv_make_empty(bs);
+ bdrv_flush(bs);
+ }
+
+ /*
+ * Make sure all data we wrote to the backing device is actually
+ * stable on disk.
+ */
+ if (bs->backing_hd)
+ bdrv_flush(bs->backing_hd);
+
+ro_cleanup:
+ g_free(buf);
+
+ if (ro) {
+ /* ignoring error return here */
+ bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
+ }
+
+ return ret;
+}
+
+int bdrv_commit_all(void)
+{
+ BlockDriverState *bs;
+
+ QTAILQ_FOREACH(bs, &bdrv_states, list) {
+ if (bs->drv && bs->backing_hd) {
+ int ret = bdrv_commit(bs);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+ }
+ return 0;
+}
+
+/**
+ * Remove an active request from the tracked requests list
+ *
+ * This function should be called when a tracked request is completing.
+ */
+static void tracked_request_end(BdrvTrackedRequest *req)
+{
+ QLIST_REMOVE(req, list);
+ qemu_co_queue_restart_all(&req->wait_queue);
+}
+
+/**
+ * Add an active request to the tracked requests list
+ */
+static void tracked_request_begin(BdrvTrackedRequest *req,
+ BlockDriverState *bs,
+ int64_t sector_num,
+ int nb_sectors, bool is_write)
+{
+ *req = (BdrvTrackedRequest){
+ .bs = bs,
+ .sector_num = sector_num,
+ .nb_sectors = nb_sectors,
+ .is_write = is_write,
+ .co = qemu_coroutine_self(),
+ };
+
+ qemu_co_queue_init(&req->wait_queue);
+
+ QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
+}
+
+/**
+ * Round a region to cluster boundaries
+ */
+void bdrv_round_to_clusters(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ int64_t *cluster_sector_num,
+ int *cluster_nb_sectors)
+{
+ BlockDriverInfo bdi;
+
+ if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
+ *cluster_sector_num = sector_num;
+ *cluster_nb_sectors = nb_sectors;
+ } else {
+ int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
+ *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
+ *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
+ nb_sectors, c);
+ }
+}
+
+static bool tracked_request_overlaps(BdrvTrackedRequest *req,
+ int64_t sector_num, int nb_sectors) {
+ /* aaaa bbbb */
+ if (sector_num >= req->sector_num + req->nb_sectors) {
+ return false;
+ }
+ /* bbbb aaaa */
+ if (req->sector_num >= sector_num + nb_sectors) {
+ return false;
+ }
+ return true;
+}
+
+static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors)
+{
+ BdrvTrackedRequest *req;
+ int64_t cluster_sector_num;
+ int cluster_nb_sectors;
+ bool retry;
+
+ /* If we touch the same cluster it counts as an overlap. This guarantees
+ * that allocating writes will be serialized and not race with each other
+ * for the same cluster. For example, in copy-on-read it ensures that the
+ * CoR read and write operations are atomic and guest writes cannot
+ * interleave between them.
+ */
+ bdrv_round_to_clusters(bs, sector_num, nb_sectors,
+ &cluster_sector_num, &cluster_nb_sectors);
+
+ do {
+ retry = false;
+ QLIST_FOREACH(req, &bs->tracked_requests, list) {
+ if (tracked_request_overlaps(req, cluster_sector_num,
+ cluster_nb_sectors)) {
+ /* Hitting this means there was a reentrant request, for
+ * example, a block driver issuing nested requests. This must
+ * never happen since it means deadlock.
+ */
+ assert(qemu_coroutine_self() != req->co);
+
+ qemu_co_queue_wait(&req->wait_queue);
+ retry = true;
+ break;
+ }
+ }
+ } while (retry);
+}
+
+/*
+ * Return values:
+ * 0 - success
+ * -EINVAL - backing format specified, but no file
+ * -ENOSPC - can't update the backing file because no space is left in the
+ * image file header
+ * -ENOTSUP - format driver doesn't support changing the backing file
+ */
+int bdrv_change_backing_file(BlockDriverState *bs,
+ const char *backing_file, const char *backing_fmt)
+{
+ BlockDriver *drv = bs->drv;
+ int ret;
+
+ /* Backing file format doesn't make sense without a backing file */
+ if (backing_fmt && !backing_file) {
+ return -EINVAL;
+ }
+
+ if (drv->bdrv_change_backing_file != NULL) {
+ ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
+ } else {
+ ret = -ENOTSUP;
+ }
+
+ if (ret == 0) {
+ pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
+ pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
+ }
+ return ret;
+}
+
+/*
+ * Finds the image layer in the chain that has 'bs' as its backing file.
+ *
+ * active is the current topmost image.
+ *
+ * Returns NULL if bs is not found in active's image chain,
+ * or if active == bs.
+ */
+BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
+ BlockDriverState *bs)
+{
+ BlockDriverState *overlay = NULL;
+ BlockDriverState *intermediate;
+
+ assert(active != NULL);
+ assert(bs != NULL);
+
+ /* if bs is the same as active, then by definition it has no overlay
+ */
+ if (active == bs) {
+ return NULL;
+ }
+
+ intermediate = active;
+ while (intermediate->backing_hd) {
+ if (intermediate->backing_hd == bs) {
+ overlay = intermediate;
+ break;
+ }
+ intermediate = intermediate->backing_hd;
+ }
+
+ return overlay;
+}
+
+typedef struct BlkIntermediateStates {
+ BlockDriverState *bs;
+ QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
+} BlkIntermediateStates;
+
+
+/*
+ * Drops images above 'base' up to and including 'top', and sets the image
+ * above 'top' to have base as its backing file.
+ *
+ * Requires that the overlay to 'top' is opened r/w, so that the backing file
+ * information in 'bs' can be properly updated.
+ *
+ * E.g., this will convert the following chain:
+ * bottom <- base <- intermediate <- top <- active
+ *
+ * to
+ *
+ * bottom <- base <- active
+ *
+ * It is allowed for bottom==base, in which case it converts:
+ *
+ * base <- intermediate <- top <- active
+ *
+ * to
+ *
+ * base <- active
+ *
+ * Error conditions:
+ * if active == top, that is considered an error
+ *
+ */
+int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
+ BlockDriverState *base)
+{
+ BlockDriverState *intermediate;
+ BlockDriverState *base_bs = NULL;
+ BlockDriverState *new_top_bs = NULL;
+ BlkIntermediateStates *intermediate_state, *next;
+ int ret = -EIO;
+
+ QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
+ QSIMPLEQ_INIT(&states_to_delete);
+
+ if (!top->drv || !base->drv) {
+ goto exit;
+ }
+
+ new_top_bs = bdrv_find_overlay(active, top);
+
+ if (new_top_bs == NULL) {
+ /* we could not find the image above 'top', this is an error */
+ goto exit;
+ }
+
+ /* special case of new_top_bs->backing_hd already pointing to base - nothing
+ * to do, no intermediate images */
+ if (new_top_bs->backing_hd == base) {
+ ret = 0;
+ goto exit;
+ }
+
+ intermediate = top;
+
+ /* now we will go down through the list, and add each BDS we find
+ * into our deletion queue, until we hit the 'base'
+ */
+ while (intermediate) {
+ intermediate_state = g_malloc0(sizeof(BlkIntermediateStates));
+ intermediate_state->bs = intermediate;
+ QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
+
+ if (intermediate->backing_hd == base) {
+ base_bs = intermediate->backing_hd;
+ break;
+ }
+ intermediate = intermediate->backing_hd;
+ }
+ if (base_bs == NULL) {
+ /* something went wrong, we did not end at the base. safely
+ * unravel everything, and exit with error */
+ goto exit;
+ }
+
+ /* success - we can delete the intermediate states, and link top->base */
+ ret = bdrv_change_backing_file(new_top_bs, base_bs->filename,
+ base_bs->drv ? base_bs->drv->format_name : "");
+ if (ret) {
+ goto exit;
+ }
+ new_top_bs->backing_hd = base_bs;
+
+
+ QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
+ /* so that bdrv_close() does not recursively close the chain */
+ intermediate_state->bs->backing_hd = NULL;
+ bdrv_delete(intermediate_state->bs);
+ }
+ ret = 0;
+
+exit:
+ QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
+ g_free(intermediate_state);
+ }
+ return ret;
+}
+
+
+static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
+ size_t size)
+{
+ int64_t len;
+
+ if (!bdrv_is_inserted(bs))
+ return -ENOMEDIUM;
+
+ if (bs->growable)
+ return 0;
+
+ len = bdrv_getlength(bs);
+
+ if (offset < 0)
+ return -EIO;
+
+ if ((offset > len) || (len - offset < size))
+ return -EIO;
+
+ return 0;
+}
+
+static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors)
+{
+ return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
+ nb_sectors * BDRV_SECTOR_SIZE);
+}
+
+typedef struct RwCo {
+ BlockDriverState *bs;
+ int64_t sector_num;
+ int nb_sectors;
+ QEMUIOVector *qiov;
+ bool is_write;
+ int ret;
+} RwCo;
+
+static void coroutine_fn bdrv_rw_co_entry(void *opaque)
+{
+ RwCo *rwco = opaque;
+
+ if (!rwco->is_write) {
+ rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
+ rwco->nb_sectors, rwco->qiov, 0);
+ } else {
+ rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
+ rwco->nb_sectors, rwco->qiov, 0);
+ }
+}
+
+/*
+ * Process a vectored synchronous request using coroutines
+ */
+static int bdrv_rwv_co(BlockDriverState *bs, int64_t sector_num,
+ QEMUIOVector *qiov, bool is_write)
+{
+ Coroutine *co;
+ RwCo rwco = {
+ .bs = bs,
+ .sector_num = sector_num,
+ .nb_sectors = qiov->size >> BDRV_SECTOR_BITS,
+ .qiov = qiov,
+ .is_write = is_write,
+ .ret = NOT_DONE,
+ };
+ assert((qiov->size & (BDRV_SECTOR_SIZE - 1)) == 0);
+
+ /**
+ * In sync call context, when the vcpu is blocked, this throttling timer
+ * will not fire; so the I/O throttling function has to be disabled here
+ * if it has been enabled.
+ */
+ if (bs->io_limits_enabled) {
+ fprintf(stderr, "Disabling I/O throttling on '%s' due "
+ "to synchronous I/O.\n", bdrv_get_device_name(bs));
+ bdrv_io_limits_disable(bs);
+ }
+
+ if (qemu_in_coroutine()) {
+ /* Fast-path if already in coroutine context */
+ bdrv_rw_co_entry(&rwco);
+ } else {
+ co = qemu_coroutine_create(bdrv_rw_co_entry);
+ qemu_coroutine_enter(co, &rwco);
+ while (rwco.ret == NOT_DONE) {
+ qemu_aio_wait();
+ }
+ }
+ return rwco.ret;
+}
+
+/*
+ * Process a synchronous request using coroutines
+ */
+static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
+ int nb_sectors, bool is_write)
+{
+ QEMUIOVector qiov;
+ struct iovec iov = {
+ .iov_base = (void *)buf,
+ .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
+ };
+
+ qemu_iovec_init_external(&qiov, &iov, 1);
+ return bdrv_rwv_co(bs, sector_num, &qiov, is_write);
+}
+
+/* return < 0 if error. See bdrv_write() for the return codes */
+int bdrv_read(BlockDriverState *bs, int64_t sector_num,
+ uint8_t *buf, int nb_sectors)
+{
+ return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
+}
+
+/* Just like bdrv_read(), but with I/O throttling temporarily disabled */
+int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
+ uint8_t *buf, int nb_sectors)
+{
+ bool enabled;
+ int ret;
+
+ enabled = bs->io_limits_enabled;
+ bs->io_limits_enabled = false;
+ ret = bdrv_read(bs, 0, buf, 1);
+ bs->io_limits_enabled = enabled;
+ return ret;
+}
+
+/* Return < 0 if error. Important errors are:
+ -EIO generic I/O error (may happen for all errors)
+ -ENOMEDIUM No media inserted.
+ -EINVAL Invalid sector number or nb_sectors
+ -EACCES Trying to write a read-only device
+*/
+int bdrv_write(BlockDriverState *bs, int64_t sector_num,
+ const uint8_t *buf, int nb_sectors)
+{
+ return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
+}
+
+int bdrv_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov)
+{
+ return bdrv_rwv_co(bs, sector_num, qiov, true);
+}
+
+int bdrv_pread(BlockDriverState *bs, int64_t offset,
+ void *buf, int count1)
+{
+ uint8_t tmp_buf[BDRV_SECTOR_SIZE];
+ int len, nb_sectors, count;
+ int64_t sector_num;
+ int ret;
+
+ count = count1;
+ /* first read to align to sector start */
+ len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
+ if (len > count)
+ len = count;
+ sector_num = offset >> BDRV_SECTOR_BITS;
+ if (len > 0) {
+ if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
+ return ret;
+ memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
+ count -= len;
+ if (count == 0)
+ return count1;
+ sector_num++;
+ buf += len;
+ }
+
+ /* read the sectors "in place" */
+ nb_sectors = count >> BDRV_SECTOR_BITS;
+ if (nb_sectors > 0) {
+ if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
+ return ret;
+ sector_num += nb_sectors;
+ len = nb_sectors << BDRV_SECTOR_BITS;
+ buf += len;
+ count -= len;
+ }
+
+ /* add data from the last sector */
+ if (count > 0) {
+ if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
+ return ret;
+ memcpy(buf, tmp_buf, count);
+ }
+ return count1;
+}
+
+int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov)
+{
+ uint8_t tmp_buf[BDRV_SECTOR_SIZE];
+ int len, nb_sectors, count;
+ int64_t sector_num;
+ int ret;
+
+ count = qiov->size;
+
+ /* first write to align to sector start */
+ len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
+ if (len > count)
+ len = count;
+ sector_num = offset >> BDRV_SECTOR_BITS;
+ if (len > 0) {
+ if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
+ return ret;
+ qemu_iovec_to_buf(qiov, 0, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)),
+ len);
+ if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
+ return ret;
+ count -= len;
+ if (count == 0)
+ return qiov->size;
+ sector_num++;
+ }
+
+ /* write the sectors "in place" */
+ nb_sectors = count >> BDRV_SECTOR_BITS;
+ if (nb_sectors > 0) {
+ QEMUIOVector qiov_inplace;
+
+ qemu_iovec_init(&qiov_inplace, qiov->niov);
+ qemu_iovec_concat(&qiov_inplace, qiov, len,
+ nb_sectors << BDRV_SECTOR_BITS);
+ ret = bdrv_writev(bs, sector_num, &qiov_inplace);
+ qemu_iovec_destroy(&qiov_inplace);
+ if (ret < 0) {
+ return ret;
+ }
+
+ sector_num += nb_sectors;
+ len = nb_sectors << BDRV_SECTOR_BITS;
+ count -= len;
+ }
+
+ /* add data from the last sector */
+ if (count > 0) {
+ if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
+ return ret;
+ qemu_iovec_to_buf(qiov, qiov->size - count, tmp_buf, count);
+ if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
+ return ret;
+ }
+ return qiov->size;
+}
+
+int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
+ const void *buf, int count1)
+{
+ QEMUIOVector qiov;
+ struct iovec iov = {
+ .iov_base = (void *) buf,
+ .iov_len = count1,
+ };
+
+ qemu_iovec_init_external(&qiov, &iov, 1);
+ return bdrv_pwritev(bs, offset, &qiov);
+}
+
+/*
+ * Writes to the file and ensures that no writes are reordered across this
+ * request (acts as a barrier)
+ *
+ * Returns 0 on success, -errno in error cases.
+ */
+int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
+ const void *buf, int count)
+{
+ int ret;
+
+ ret = bdrv_pwrite(bs, offset, buf, count);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* No flush needed for cache modes that already do it */
+ if (bs->enable_write_cache) {
+ bdrv_flush(bs);
+ }
+
+ return 0;
+}
+
+static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+{
+ /* Perform I/O through a temporary buffer so that users who scribble over
+ * their read buffer while the operation is in progress do not end up
+ * modifying the image file. This is critical for zero-copy guest I/O
+ * where anything might happen inside guest memory.
+ */
+ void *bounce_buffer;
+
+ BlockDriver *drv = bs->drv;
+ struct iovec iov;
+ QEMUIOVector bounce_qiov;
+ int64_t cluster_sector_num;
+ int cluster_nb_sectors;
+ size_t skip_bytes;
+ int ret;
+
+ /* Cover entire cluster so no additional backing file I/O is required when
+ * allocating cluster in the image file.
+ */
+ bdrv_round_to_clusters(bs, sector_num, nb_sectors,
+ &cluster_sector_num, &cluster_nb_sectors);
+
+ trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
+ cluster_sector_num, cluster_nb_sectors);
+
+ iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
+ iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
+ qemu_iovec_init_external(&bounce_qiov, &iov, 1);
+
+ ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
+ &bounce_qiov);
+ if (ret < 0) {
+ goto err;
+ }
+
+ if (drv->bdrv_co_write_zeroes &&
+ buffer_is_zero(bounce_buffer, iov.iov_len)) {
+ ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
+ cluster_nb_sectors);
+ } else {
+ /* This does not change the data on the disk, it is not necessary
+ * to flush even in cache=writethrough mode.
+ */
+ ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
+ &bounce_qiov);
+ }
+
+ if (ret < 0) {
+ /* It might be okay to ignore write errors for guest requests. If this
+ * is a deliberate copy-on-read then we don't want to ignore the error.
+ * Simply report it in all cases.
+ */
+ goto err;
+ }
+
+ skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
+ qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes,
+ nb_sectors * BDRV_SECTOR_SIZE);
+
+err:
+ qemu_vfree(bounce_buffer);
+ return ret;
+}
+
+/*
+ * Handle a read request in coroutine context
+ */
+static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
+ BdrvRequestFlags flags)
+{
+ BlockDriver *drv = bs->drv;
+ BdrvTrackedRequest req;
+ int ret;
+
+ if (!drv) {
+ return -ENOMEDIUM;
+ }
+ if (bdrv_check_request(bs, sector_num, nb_sectors)) {
+ return -EIO;
+ }
+
+ /* throttling disk read I/O */
+ if (bs->io_limits_enabled) {
+ bdrv_io_limits_intercept(bs, false, nb_sectors);
+ }
+
+ if (bs->copy_on_read) {
+ flags |= BDRV_REQ_COPY_ON_READ;
+ }
+ if (flags & BDRV_REQ_COPY_ON_READ) {
+ bs->copy_on_read_in_flight++;
+ }
+
+ if (bs->copy_on_read_in_flight) {
+ wait_for_overlapping_requests(bs, sector_num, nb_sectors);
+ }
+
+ tracked_request_begin(&req, bs, sector_num, nb_sectors, false);
+
+ if (flags & BDRV_REQ_COPY_ON_READ) {
+ int pnum;
+
+ ret = bdrv_co_is_allocated(bs, sector_num, nb_sectors, &pnum);
+ if (ret < 0) {
+ goto out;
+ }
+
+ if (!ret || pnum != nb_sectors) {
+ ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
+ goto out;
+ }
+ }
+
+ ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
+
+out:
+ tracked_request_end(&req);
+
+ if (flags & BDRV_REQ_COPY_ON_READ) {
+ bs->copy_on_read_in_flight--;
+ }
+
+ return ret;
+}
+
+int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *qiov)
+{
+ trace_bdrv_co_readv(bs, sector_num, nb_sectors);
+
+ return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
+}
+
+int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+{
+ trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
+
+ return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
+ BDRV_REQ_COPY_ON_READ);
+}
+
+static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors)
+{
+ BlockDriver *drv = bs->drv;
+ QEMUIOVector qiov;
+ struct iovec iov;
+ int ret;
+
+ /* TODO Emulate only part of misaligned requests instead of letting block
+ * drivers return -ENOTSUP and emulate everything */
+
+ /* First try the efficient write zeroes operation */
+ if (drv->bdrv_co_write_zeroes) {
+ ret = drv->bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
+ if (ret != -ENOTSUP) {
+ return ret;
+ }
+ }
+
+ /* Fall back to bounce buffer if write zeroes is unsupported */
+ iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
+ iov.iov_base = qemu_blockalign(bs, iov.iov_len);
+ memset(iov.iov_base, 0, iov.iov_len);
+ qemu_iovec_init_external(&qiov, &iov, 1);
+
+ ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, &qiov);
+
+ qemu_vfree(iov.iov_base);
+ return ret;
+}
+
+/*
+ * Handle a write request in coroutine context
+ */
+static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
+ BdrvRequestFlags flags)
+{
+ BlockDriver *drv = bs->drv;
+ BdrvTrackedRequest req;
+ int ret;
+
+ if (!bs->drv) {
+ return -ENOMEDIUM;
+ }
+ if (bs->read_only) {
+ return -EACCES;
+ }
+ if (bdrv_check_request(bs, sector_num, nb_sectors)) {
+ return -EIO;
+ }
+
+ /* throttling disk write I/O */
+ if (bs->io_limits_enabled) {
+ bdrv_io_limits_intercept(bs, true, nb_sectors);
+ }
+
+ if (bs->copy_on_read_in_flight) {
+ wait_for_overlapping_requests(bs, sector_num, nb_sectors);
+ }
+
+ tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
+
+ ret = notifier_with_return_list_notify(&bs->before_write_notifiers, &req);
+
+ if (ret < 0) {
+ /* Do nothing, write notifier decided to fail this request */
+ } else if (flags & BDRV_REQ_ZERO_WRITE) {
+ ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors);
+ } else {
+ ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
+ }
+
+ if (ret == 0 && !bs->enable_write_cache) {
+ ret = bdrv_co_flush(bs);
+ }
+
+ if (bs->dirty_bitmap) {
+ bdrv_set_dirty(bs, sector_num, nb_sectors);
+ }
+
+ if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
+ bs->wr_highest_sector = sector_num + nb_sectors - 1;
+ }
+
+ tracked_request_end(&req);
+
+ return ret;
+}
+
+int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *qiov)
+{
+ trace_bdrv_co_writev(bs, sector_num, nb_sectors);
+
+ return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
+}
+
+int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors)
+{
+ trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
+
+ return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
+ BDRV_REQ_ZERO_WRITE);
+}
+
+/**
+ * Truncate file to 'offset' bytes (needed only for file protocols)
+ */
+int bdrv_truncate(BlockDriverState *bs, int64_t offset)
+{
+ BlockDriver *drv = bs->drv;
+ int ret;
+ if (!drv)
+ return -ENOMEDIUM;
+ if (!drv->bdrv_truncate)
+ return -ENOTSUP;
+ if (bs->read_only)
+ return -EACCES;
+ if (bdrv_in_use(bs))
+ return -EBUSY;
+ ret = drv->bdrv_truncate(bs, offset);
+ if (ret == 0) {
+ ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
+ bdrv_dev_resize_cb(bs);
+ }
+ return ret;
+}
+
+/**
+ * Length of a allocated file in bytes. Sparse files are counted by actual
+ * allocated space. Return < 0 if error or unknown.
+ */
+int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
+{
+ BlockDriver *drv = bs->drv;
+ if (!drv) {
+ return -ENOMEDIUM;
+ }
+ if (drv->bdrv_get_allocated_file_size) {
+ return drv->bdrv_get_allocated_file_size(bs);
+ }
+ if (bs->file) {
+ return bdrv_get_allocated_file_size(bs->file);
+ }
+ return -ENOTSUP;
+}
+
+/**
+ * Length of a file in bytes. Return < 0 if error or unknown.
+ */
+int64_t bdrv_getlength(BlockDriverState *bs)
+{
+ BlockDriver *drv = bs->drv;
+ if (!drv)
+ return -ENOMEDIUM;
+
+ if (bs->growable || bdrv_dev_has_removable_media(bs)) {
+ if (drv->bdrv_getlength) {
+ return drv->bdrv_getlength(bs);
+ }
+ }
+ return bs->total_sectors * BDRV_SECTOR_SIZE;
+}
+
+/* return 0 as number of sectors if no device present or error */
+void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
+{
+ int64_t length;
+ length = bdrv_getlength(bs);
+ if (length < 0)
+ length = 0;
+ else
+ length = length >> BDRV_SECTOR_BITS;
+ *nb_sectors_ptr = length;
+}
+
+/* throttling disk io limits */
+void bdrv_set_io_limits(BlockDriverState *bs,
+ BlockIOLimit *io_limits)
+{
+ bs->io_limits = *io_limits;
+ bs->io_limits_enabled = bdrv_io_limits_enabled(bs);
+}
+
+void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
+ BlockdevOnError on_write_error)
+{
+ bs->on_read_error = on_read_error;
+ bs->on_write_error = on_write_error;
+}
+
+BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
+{
+ return is_read ? bs->on_read_error : bs->on_write_error;
+}
+
+BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
+{
+ BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
+
+ switch (on_err) {
+ case BLOCKDEV_ON_ERROR_ENOSPC:
+ return (error == ENOSPC) ? BDRV_ACTION_STOP : BDRV_ACTION_REPORT;
+ case BLOCKDEV_ON_ERROR_STOP:
+ return BDRV_ACTION_STOP;
+ case BLOCKDEV_ON_ERROR_REPORT:
+ return BDRV_ACTION_REPORT;
+ case BLOCKDEV_ON_ERROR_IGNORE:
+ return BDRV_ACTION_IGNORE;
+ default:
+ abort();
+ }
+}
+
+/* This is done by device models because, while the block layer knows
+ * about the error, it does not know whether an operation comes from
+ * the device or the block layer (from a job, for example).
+ */
+void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
+ bool is_read, int error)
+{
+ assert(error >= 0);
+ bdrv_emit_qmp_error_event(bs, QEVENT_BLOCK_IO_ERROR, action, is_read);
+ if (action == BDRV_ACTION_STOP) {
+ vm_stop(RUN_STATE_IO_ERROR);
+ bdrv_iostatus_set_err(bs, error);
+ }
+}
+
+int bdrv_is_read_only(BlockDriverState *bs)
+{
+ return bs->read_only;
+}
+
+int bdrv_is_sg(BlockDriverState *bs)
+{
+ return bs->sg;
+}
+
+int bdrv_enable_write_cache(BlockDriverState *bs)
+{
+ return bs->enable_write_cache;
+}
+
+void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
+{
+ bs->enable_write_cache = wce;
+
+ /* so a reopen() will preserve wce */
+ if (wce) {
+ bs->open_flags |= BDRV_O_CACHE_WB;
+ } else {
+ bs->open_flags &= ~BDRV_O_CACHE_WB;
+ }
+}
+
+int bdrv_is_encrypted(BlockDriverState *bs)
+{
+ if (bs->backing_hd && bs->backing_hd->encrypted)
+ return 1;
+ return bs->encrypted;
+}
+
+int bdrv_key_required(BlockDriverState *bs)
+{
+ BlockDriverState *backing_hd = bs->backing_hd;
+
+ if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
+ return 1;
+ return (bs->encrypted && !bs->valid_key);
+}
+
+int bdrv_set_key(BlockDriverState *bs, const char *key)
+{
+ int ret;
+ if (bs->backing_hd && bs->backing_hd->encrypted) {
+ ret = bdrv_set_key(bs->backing_hd, key);
+ if (ret < 0)
+ return ret;
+ if (!bs->encrypted)
+ return 0;
+ }
+ if (!bs->encrypted) {
+ return -EINVAL;
+ } else if (!bs->drv || !bs->drv->bdrv_set_key) {
+ return -ENOMEDIUM;
+ }
+ ret = bs->drv->bdrv_set_key(bs, key);
+ if (ret < 0) {
+ bs->valid_key = 0;
+ } else if (!bs->valid_key) {
+ bs->valid_key = 1;
+ /* call the change callback now, we skipped it on open */
+ bdrv_dev_change_media_cb(bs, true);
+ }
+ return ret;
+}
+
+const char *bdrv_get_format_name(BlockDriverState *bs)
+{
+ return bs->drv ? bs->drv->format_name : NULL;
+}
+
+void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
+ void *opaque)
+{
+ BlockDriver *drv;
+
+ QLIST_FOREACH(drv, &bdrv_drivers, list) {
+ it(opaque, drv->format_name);
+ }
+}
+
+BlockDriverState *bdrv_find(const char *name)
+{
+ BlockDriverState *bs;
+
+ QTAILQ_FOREACH(bs, &bdrv_states, list) {
+ if (!strcmp(name, bs->device_name)) {
+ return bs;
+ }
+ }
+ return NULL;
+}
+
+BlockDriverState *bdrv_next(BlockDriverState *bs)
+{
+ if (!bs) {
+ return QTAILQ_FIRST(&bdrv_states);
+ }
+ return QTAILQ_NEXT(bs, list);
+}
+
+void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
+{
+ BlockDriverState *bs;
+
+ QTAILQ_FOREACH(bs, &bdrv_states, list) {
+ it(opaque, bs);
+ }
+}
+
+const char *bdrv_get_device_name(BlockDriverState *bs)
+{
+ return bs->device_name;
+}
+
+int bdrv_get_flags(BlockDriverState *bs)
+{
+ return bs->open_flags;
+}
+
+int bdrv_flush_all(void)
+{
+ BlockDriverState *bs;
+ int result = 0;
+
+ QTAILQ_FOREACH(bs, &bdrv_states, list) {
+ int ret = bdrv_flush(bs);
+ if (ret < 0 && !result) {
+ result = ret;
+ }
+ }
+
+ return result;
+}
+
+int bdrv_has_zero_init_1(BlockDriverState *bs)
+{
+ return 1;
+}
+
+int bdrv_has_zero_init(BlockDriverState *bs)
+{
+ assert(bs->drv);
+
+ if (bs->drv->bdrv_has_zero_init) {
+ return bs->drv->bdrv_has_zero_init(bs);
+ }
+
+ /* safe default */
+ return 0;
+}
+
+typedef struct BdrvCoIsAllocatedData {
+ BlockDriverState *bs;
+ BlockDriverState *base;
+ int64_t sector_num;
+ int nb_sectors;
+ int *pnum;
+ int ret;
+ bool done;
+} BdrvCoIsAllocatedData;
+
+/*
+ * Returns true iff the specified sector is present in the disk image. Drivers
+ * not implementing the functionality are assumed to not support backing files,
+ * hence all their sectors are reported as allocated.
+ *
+ * If 'sector_num' is beyond the end of the disk image the return value is 0
+ * and 'pnum' is set to 0.
+ *
+ * 'pnum' is set to the number of sectors (including and immediately following
+ * the specified sector) that are known to be in the same
+ * allocated/unallocated state.
+ *
+ * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
+ * beyond the end of the disk image it will be clamped.
+ */
+int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, int *pnum)
+{
+ int64_t n;
+
+ if (sector_num >= bs->total_sectors) {
+ *pnum = 0;
+ return 0;
+ }
+
+ n = bs->total_sectors - sector_num;
+ if (n < nb_sectors) {
+ nb_sectors = n;
+ }
+
+ if (!bs->drv->bdrv_co_is_allocated) {
+ *pnum = nb_sectors;
+ return 1;
+ }
+
+ return bs->drv->bdrv_co_is_allocated(bs, sector_num, nb_sectors, pnum);
+}
+
+/* Coroutine wrapper for bdrv_is_allocated() */
+static void coroutine_fn bdrv_is_allocated_co_entry(void *opaque)
+{
+ BdrvCoIsAllocatedData *data = opaque;
+ BlockDriverState *bs = data->bs;
+
+ data->ret = bdrv_co_is_allocated(bs, data->sector_num, data->nb_sectors,
+ data->pnum);
+ data->done = true;
+}
+
+/*
+ * Synchronous wrapper around bdrv_co_is_allocated().
+ *
+ * See bdrv_co_is_allocated() for details.
+ */
+int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
+ int *pnum)
+{
+ Coroutine *co;
+ BdrvCoIsAllocatedData data = {
+ .bs = bs,
+ .sector_num = sector_num,
+ .nb_sectors = nb_sectors,
+ .pnum = pnum,
+ .done = false,
+ };
+
+ co = qemu_coroutine_create(bdrv_is_allocated_co_entry);
+ qemu_coroutine_enter(co, &data);
+ while (!data.done) {
+ qemu_aio_wait();
+ }
+ return data.ret;
+}
+
+/*
+ * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
+ *
+ * Return true if the given sector is allocated in any image between
+ * BASE and TOP (inclusive). BASE can be NULL to check if the given
+ * sector is allocated in any image of the chain. Return false otherwise.
+ *
+ * 'pnum' is set to the number of sectors (including and immediately following
+ * the specified sector) that are known to be in the same
+ * allocated/unallocated state.
+ *
+ */
+int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top,
+ BlockDriverState *base,
+ int64_t sector_num,
+ int nb_sectors, int *pnum)
+{
+ BlockDriverState *intermediate;
+ int ret, n = nb_sectors;
+
+ intermediate = top;
+ while (intermediate && intermediate != base) {
+ int pnum_inter;
+ ret = bdrv_co_is_allocated(intermediate, sector_num, nb_sectors,
+ &pnum_inter);
+ if (ret < 0) {
+ return ret;
+ } else if (ret) {
+ *pnum = pnum_inter;
+ return 1;
+ }
+
+ /*
+ * [sector_num, nb_sectors] is unallocated on top but intermediate
+ * might have
+ *
+ * [sector_num+x, nr_sectors] allocated.
+ */
+ if (n > pnum_inter &&
+ (intermediate == top ||
+ sector_num + pnum_inter < intermediate->total_sectors)) {
+ n = pnum_inter;
+ }
+
+ intermediate = intermediate->backing_hd;
+ }
+
+ *pnum = n;
+ return 0;
+}
+
+/* Coroutine wrapper for bdrv_is_allocated_above() */
+static void coroutine_fn bdrv_is_allocated_above_co_entry(void *opaque)
+{
+ BdrvCoIsAllocatedData *data = opaque;
+ BlockDriverState *top = data->bs;
+ BlockDriverState *base = data->base;
+
+ data->ret = bdrv_co_is_allocated_above(top, base, data->sector_num,
+ data->nb_sectors, data->pnum);
+ data->done = true;
+}
+
+/*
+ * Synchronous wrapper around bdrv_co_is_allocated_above().
+ *
+ * See bdrv_co_is_allocated_above() for details.
+ */
+int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
+ int64_t sector_num, int nb_sectors, int *pnum)
+{
+ Coroutine *co;
+ BdrvCoIsAllocatedData data = {
+ .bs = top,
+ .base = base,
+ .sector_num = sector_num,
+ .nb_sectors = nb_sectors,
+ .pnum = pnum,
+ .done = false,
+ };
+
+ co = qemu_coroutine_create(bdrv_is_allocated_above_co_entry);
+ qemu_coroutine_enter(co, &data);
+ while (!data.done) {
+ qemu_aio_wait();
+ }
+ return data.ret;
+}
+
+const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
+{
+ if (bs->backing_hd && bs->backing_hd->encrypted)
+ return bs->backing_file;
+ else if (bs->encrypted)
+ return bs->filename;
+ else
+ return NULL;
+}
+
+void bdrv_get_backing_filename(BlockDriverState *bs,
+ char *filename, int filename_size)
+{
+ pstrcpy(filename, filename_size, bs->backing_file);
+}
+
+int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
+ const uint8_t *buf, int nb_sectors)
+{
+ BlockDriver *drv = bs->drv;
+ if (!drv)
+ return -ENOMEDIUM;
+ if (!drv->bdrv_write_compressed)
+ return -ENOTSUP;
+ if (bdrv_check_request(bs, sector_num, nb_sectors))
+ return -EIO;
+
+ assert(!bs->dirty_bitmap);
+
+ return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
+}
+
+int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+{
+ BlockDriver *drv = bs->drv;
+ if (!drv)
+ return -ENOMEDIUM;
+ if (!drv->bdrv_get_info)
+ return -ENOTSUP;
+ memset(bdi, 0, sizeof(*bdi));
+ return drv->bdrv_get_info(bs, bdi);
+}
+
+int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
+ int64_t pos, int size)
+{
+ QEMUIOVector qiov;
+ struct iovec iov = {
+ .iov_base = (void *) buf,
+ .iov_len = size,
+ };
+
+ qemu_iovec_init_external(&qiov, &iov, 1);
+ return bdrv_writev_vmstate(bs, &qiov, pos);
+}
+
+int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
+{
+ BlockDriver *drv = bs->drv;
+
+ if (!drv) {
+ return -ENOMEDIUM;
+ } else if (drv->bdrv_save_vmstate) {
+ return drv->bdrv_save_vmstate(bs, qiov, pos);
+ } else if (bs->file) {
+ return bdrv_writev_vmstate(bs->file, qiov, pos);
+ }
+
+ return -ENOTSUP;
+}
+
+int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
+ int64_t pos, int size)
+{
+ BlockDriver *drv = bs->drv;
+ if (!drv)
+ return -ENOMEDIUM;
+ if (drv->bdrv_load_vmstate)
+ return drv->bdrv_load_vmstate(bs, buf, pos, size);
+ if (bs->file)
+ return bdrv_load_vmstate(bs->file, buf, pos, size);
+ return -ENOTSUP;
+}
+
+void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
+{
+ if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
+ return;
+ }
+
+ bs->drv->bdrv_debug_event(bs, event);
+}
+
+int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
+ const char *tag)
+{
+ while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
+ bs = bs->file;
+ }
+
+ if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
+ return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
+ }
+
+ return -ENOTSUP;
+}
+
+int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
+{
+ while (bs && bs->drv && !bs->drv->bdrv_debug_resume) {
+ bs = bs->file;
+ }
+
+ if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
+ return bs->drv->bdrv_debug_resume(bs, tag);
+ }
+
+ return -ENOTSUP;
+}
+
+bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
+{
+ while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
+ bs = bs->file;
+ }
+
+ if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
+ return bs->drv->bdrv_debug_is_suspended(bs, tag);
+ }
+
+ return false;
+}
+
+int bdrv_is_snapshot(BlockDriverState *bs)
+{
+ return !!(bs->open_flags & BDRV_O_SNAPSHOT);
+}
+
+/* backing_file can either be relative, or absolute, or a protocol. If it is
+ * relative, it must be relative to the chain. So, passing in bs->filename
+ * from a BDS as backing_file should not be done, as that may be relative to
+ * the CWD rather than the chain. */
+BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
+ const char *backing_file)
+{
+ char *filename_full = NULL;
+ char *backing_file_full = NULL;
+ char *filename_tmp = NULL;
+ int is_protocol = 0;
+ BlockDriverState *curr_bs = NULL;
+ BlockDriverState *retval = NULL;
+
+ if (!bs || !bs->drv || !backing_file) {
+ return NULL;
+ }
+
+ filename_full = g_malloc(PATH_MAX);
+ backing_file_full = g_malloc(PATH_MAX);
+ filename_tmp = g_malloc(PATH_MAX);
+
+ is_protocol = path_has_protocol(backing_file);
+
+ for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
+
+ /* If either of the filename paths is actually a protocol, then
+ * compare unmodified paths; otherwise make paths relative */
+ if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
+ if (strcmp(backing_file, curr_bs->backing_file) == 0) {
+ retval = curr_bs->backing_hd;
+ break;
+ }
+ } else {
+ /* If not an absolute filename path, make it relative to the current
+ * image's filename path */
+ path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
+ backing_file);
+
+ /* We are going to compare absolute pathnames */
+ if (!realpath(filename_tmp, filename_full)) {
+ continue;
+ }
+
+ /* We need to make sure the backing filename we are comparing against
+ * is relative to the current image filename (or absolute) */
+ path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
+ curr_bs->backing_file);
+
+ if (!realpath(filename_tmp, backing_file_full)) {
+ continue;
+ }
+
+ if (strcmp(backing_file_full, filename_full) == 0) {
+ retval = curr_bs->backing_hd;
+ break;
+ }
+ }
+ }
+
+ g_free(filename_full);
+ g_free(backing_file_full);
+ g_free(filename_tmp);
+ return retval;
+}
+
+int bdrv_get_backing_file_depth(BlockDriverState *bs)
+{
+ if (!bs->drv) {
+ return 0;
+ }
+
+ if (!bs->backing_hd) {
+ return 0;
+ }
+
+ return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
+}
+
+BlockDriverState *bdrv_find_base(BlockDriverState *bs)
+{
+ BlockDriverState *curr_bs = NULL;
+
+ if (!bs) {
+ return NULL;
+ }
+
+ curr_bs = bs;
+
+ while (curr_bs->backing_hd) {
+ curr_bs = curr_bs->backing_hd;
+ }
+ return curr_bs;
+}
+
+/**************************************************************/
+/* async I/Os */
+
+BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
+ QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
+
+ return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
+ cb, opaque, false);
+}
+
+BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
+ QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
+
+ return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
+ cb, opaque, true);
+}
+
+
+typedef struct MultiwriteCB {
+ int error;
+ int num_requests;
+ int num_callbacks;
+ struct {
+ BlockDriverCompletionFunc *cb;
+ void *opaque;
+ QEMUIOVector *free_qiov;
+ } callbacks[];
+} MultiwriteCB;
+
+static void multiwrite_user_cb(MultiwriteCB *mcb)
+{
+ int i;
+
+ for (i = 0; i < mcb->num_callbacks; i++) {
+ mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
+ if (mcb->callbacks[i].free_qiov) {
+ qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
+ }
+ g_free(mcb->callbacks[i].free_qiov);
+ }
+}
+
+static void multiwrite_cb(void *opaque, int ret)
+{
+ MultiwriteCB *mcb = opaque;
+
+ trace_multiwrite_cb(mcb, ret);
+
+ if (ret < 0 && !mcb->error) {
+ mcb->error = ret;
+ }
+
+ mcb->num_requests--;
+ if (mcb->num_requests == 0) {
+ multiwrite_user_cb(mcb);
+ g_free(mcb);
+ }
+}
+
+static int multiwrite_req_compare(const void *a, const void *b)
+{
+ const BlockRequest *req1 = a, *req2 = b;
+
+ /*
+ * Note that we can't simply subtract req2->sector from req1->sector
+ * here as that could overflow the return value.
+ */
+ if (req1->sector > req2->sector) {
+ return 1;
+ } else if (req1->sector < req2->sector) {
+ return -1;
+ } else {
+ return 0;
+ }
+}
+
+/*
+ * Takes a bunch of requests and tries to merge them. Returns the number of
+ * requests that remain after merging.
+ */
+static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
+ int num_reqs, MultiwriteCB *mcb)
+{
+ int i, outidx;
+
+ // Sort requests by start sector
+ qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
+
+ // Check if adjacent requests touch the same clusters. If so, combine them,
+ // filling up gaps with zero sectors.
+ outidx = 0;
+ for (i = 1; i < num_reqs; i++) {
+ int merge = 0;
+ int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
+
+ // Handle exactly sequential writes and overlapping writes.
+ if (reqs[i].sector <= oldreq_last) {
+ merge = 1;
+ }
+
+ if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
+ merge = 0;
+ }
+
+ if (merge) {
+ size_t size;
+ QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
+ qemu_iovec_init(qiov,
+ reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
+
+ // Add the first request to the merged one. If the requests are
+ // overlapping, drop the last sectors of the first request.
+ size = (reqs[i].sector - reqs[outidx].sector) << 9;
+ qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size);
+
+ // We should need to add any zeros between the two requests
+ assert (reqs[i].sector <= oldreq_last);
+
+ // Add the second request
+ qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size);
+
+ reqs[outidx].nb_sectors = qiov->size >> 9;
+ reqs[outidx].qiov = qiov;
+
+ mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
+ } else {
+ outidx++;
+ reqs[outidx].sector = reqs[i].sector;
+ reqs[outidx].nb_sectors = reqs[i].nb_sectors;
+ reqs[outidx].qiov = reqs[i].qiov;
+ }
+ }
+
+ return outidx + 1;
+}
+
+/*
+ * Submit multiple AIO write requests at once.
+ *
+ * On success, the function returns 0 and all requests in the reqs array have
+ * been submitted. In error case this function returns -1, and any of the
+ * requests may or may not be submitted yet. In particular, this means that the
+ * callback will be called for some of the requests, for others it won't. The
+ * caller must check the error field of the BlockRequest to wait for the right
+ * callbacks (if error != 0, no callback will be called).
+ *
+ * The implementation may modify the contents of the reqs array, e.g. to merge
+ * requests. However, the fields opaque and error are left unmodified as they
+ * are used to signal failure for a single request to the caller.
+ */
+int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
+{
+ MultiwriteCB *mcb;
+ int i;
+
+ /* don't submit writes if we don't have a medium */
+ if (bs->drv == NULL) {
+ for (i = 0; i < num_reqs; i++) {
+ reqs[i].error = -ENOMEDIUM;
+ }
+ return -1;
+ }
+
+ if (num_reqs == 0) {
+ return 0;
+ }
+
+ // Create MultiwriteCB structure
+ mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
+ mcb->num_requests = 0;
+ mcb->num_callbacks = num_reqs;
+
+ for (i = 0; i < num_reqs; i++) {
+ mcb->callbacks[i].cb = reqs[i].cb;
+ mcb->callbacks[i].opaque = reqs[i].opaque;
+ }
+
+ // Check for mergable requests
+ num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
+
+ trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
+
+ /* Run the aio requests. */
+ mcb->num_requests = num_reqs;
+ for (i = 0; i < num_reqs; i++) {
+ bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
+ reqs[i].nb_sectors, multiwrite_cb, mcb);
+ }
+
+ return 0;
+}
+
+void bdrv_aio_cancel(BlockDriverAIOCB *acb)
+{
+ acb->aiocb_info->cancel(acb);
+}
+
+/* block I/O throttling */
+static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
+ bool is_write, double elapsed_time, uint64_t *wait)
+{
+ uint64_t bps_limit = 0;
+ uint64_t extension;
+ double bytes_limit, bytes_base, bytes_res;
+ double slice_time, wait_time;
+
+ if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
+ bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
+ } else if (bs->io_limits.bps[is_write]) {
+ bps_limit = bs->io_limits.bps[is_write];
+ } else {
+ if (wait) {
+ *wait = 0;
+ }
+
+ return false;
+ }
+
+ slice_time = bs->slice_end - bs->slice_start;
+ slice_time /= (NANOSECONDS_PER_SECOND);
+ bytes_limit = bps_limit * slice_time;
+ bytes_base = bs->slice_submitted.bytes[is_write];
+ if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
+ bytes_base += bs->slice_submitted.bytes[!is_write];
+ }
+
+ /* bytes_base: the bytes of data which have been read/written; and
+ * it is obtained from the history statistic info.
+ * bytes_res: the remaining bytes of data which need to be read/written.
+ * (bytes_base + bytes_res) / bps_limit: used to calcuate
+ * the total time for completing reading/writting all data.
+ */
+ bytes_res = (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
+
+ if (bytes_base + bytes_res <= bytes_limit) {
+ if (wait) {
+ *wait = 0;
+ }
+
+ return false;
+ }
+
+ /* Calc approx time to dispatch */
+ wait_time = (bytes_base + bytes_res) / bps_limit - elapsed_time;
+
+ /* When the I/O rate at runtime exceeds the limits,
+ * bs->slice_end need to be extended in order that the current statistic
+ * info can be kept until the timer fire, so it is increased and tuned
+ * based on the result of experiment.
+ */
+ extension = wait_time * NANOSECONDS_PER_SECOND;
+ extension = DIV_ROUND_UP(extension, BLOCK_IO_SLICE_TIME) *
+ BLOCK_IO_SLICE_TIME;
+ bs->slice_end += extension;
+ if (wait) {
+ *wait = wait_time * NANOSECONDS_PER_SECOND;
+ }
+
+ return true;
+}
+
+static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
+ double elapsed_time, uint64_t *wait)
+{
+ uint64_t iops_limit = 0;
+ double ios_limit, ios_base;
+ double slice_time, wait_time;
+
+ if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
+ iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
+ } else if (bs->io_limits.iops[is_write]) {
+ iops_limit = bs->io_limits.iops[is_write];
+ } else {
+ if (wait) {
+ *wait = 0;
+ }
+
+ return false;
+ }
+
+ slice_time = bs->slice_end - bs->slice_start;
+ slice_time /= (NANOSECONDS_PER_SECOND);
+ ios_limit = iops_limit * slice_time;
+ ios_base = bs->slice_submitted.ios[is_write];
+ if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
+ ios_base += bs->slice_submitted.ios[!is_write];
+ }
+
+ if (ios_base + 1 <= ios_limit) {
+ if (wait) {
+ *wait = 0;
+ }
+
+ return false;
+ }
+
+ /* Calc approx time to dispatch, in seconds */
+ wait_time = (ios_base + 1) / iops_limit;
+ if (wait_time > elapsed_time) {
+ wait_time = wait_time - elapsed_time;
+ } else {
+ wait_time = 0;
+ }
+
+ /* Exceeded current slice, extend it by another slice time */
+ bs->slice_end += BLOCK_IO_SLICE_TIME;
+ if (wait) {
+ *wait = wait_time * NANOSECONDS_PER_SECOND;
+ }
+
+ return true;
+}
+
+static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
+ bool is_write, int64_t *wait)
+{
+ int64_t now, max_wait;
+ uint64_t bps_wait = 0, iops_wait = 0;
+ double elapsed_time;
+ int bps_ret, iops_ret;
+
+ now = qemu_get_clock_ns(vm_clock);
+ if (now > bs->slice_end) {
+ bs->slice_start = now;
+ bs->slice_end = now + BLOCK_IO_SLICE_TIME;
+ memset(&bs->slice_submitted, 0, sizeof(bs->slice_submitted));
+ }
+
+ elapsed_time = now - bs->slice_start;
+ elapsed_time /= (NANOSECONDS_PER_SECOND);
+
+ bps_ret = bdrv_exceed_bps_limits(bs, nb_sectors,
+ is_write, elapsed_time, &bps_wait);
+ iops_ret = bdrv_exceed_iops_limits(bs, is_write,
+ elapsed_time, &iops_wait);
+ if (bps_ret || iops_ret) {
+ max_wait = bps_wait > iops_wait ? bps_wait : iops_wait;
+ if (wait) {
+ *wait = max_wait;
+ }
+
+ now = qemu_get_clock_ns(vm_clock);
+ if (bs->slice_end < now + max_wait) {
+ bs->slice_end = now + max_wait;
+ }
+
+ return true;
+ }
+
+ if (wait) {
+ *wait = 0;
+ }
+
+ bs->slice_submitted.bytes[is_write] += (int64_t)nb_sectors *
+ BDRV_SECTOR_SIZE;
+ bs->slice_submitted.ios[is_write]++;
+
+ return false;
+}
+
+/**************************************************************/
+/* async block device emulation */
+
+typedef struct BlockDriverAIOCBSync {
+ BlockDriverAIOCB common;
+ QEMUBH *bh;
+ int ret;
+ /* vector translation state */
+ QEMUIOVector *qiov;
+ uint8_t *bounce;
+ int is_write;
+} BlockDriverAIOCBSync;
+
+static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
+{
+ BlockDriverAIOCBSync *acb =
+ container_of(blockacb, BlockDriverAIOCBSync, common);
+ qemu_bh_delete(acb->bh);
+ acb->bh = NULL;
+ qemu_aio_release(acb);
+}
+
+static const AIOCBInfo bdrv_em_aiocb_info = {
+ .aiocb_size = sizeof(BlockDriverAIOCBSync),
+ .cancel = bdrv_aio_cancel_em,
+};
+
+static void bdrv_aio_bh_cb(void *opaque)
+{
+ BlockDriverAIOCBSync *acb = opaque;
+
+ if (!acb->is_write)
+ qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
+ qemu_vfree(acb->bounce);
+ acb->common.cb(acb->common.opaque, acb->ret);
+ qemu_bh_delete(acb->bh);
+ acb->bh = NULL;
+ qemu_aio_release(acb);
+}
+
+static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
+ int64_t sector_num,
+ QEMUIOVector *qiov,
+ int nb_sectors,
+ BlockDriverCompletionFunc *cb,
+ void *opaque,
+ int is_write)
+
+{
+ BlockDriverAIOCBSync *acb;
+
+ acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque);
+ acb->is_write = is_write;
+ acb->qiov = qiov;
+ acb->bounce = qemu_blockalign(bs, qiov->size);
+ acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
+
+ if (is_write) {
+ qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
+ acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
+ } else {
+ acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
+ }
+
+ qemu_bh_schedule(acb->bh);
+
+ return &acb->common;
+}
+
+static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
+}
+
+static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
+}
+
+
+typedef struct BlockDriverAIOCBCoroutine {
+ BlockDriverAIOCB common;
+ BlockRequest req;
+ bool is_write;
+ bool *done;
+ QEMUBH* bh;
+} BlockDriverAIOCBCoroutine;
+
+static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
+{
+ BlockDriverAIOCBCoroutine *acb =
+ container_of(blockacb, BlockDriverAIOCBCoroutine, common);
+ bool done = false;
+
+ acb->done = &done;
+ while (!done) {
+ qemu_aio_wait();
+ }
+}
+
+static const AIOCBInfo bdrv_em_co_aiocb_info = {
+ .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
+ .cancel = bdrv_aio_co_cancel_em,
+};
+
+static void bdrv_co_em_bh(void *opaque)
+{
+ BlockDriverAIOCBCoroutine *acb = opaque;
+
+ acb->common.cb(acb->common.opaque, acb->req.error);
+
+ if (acb->done) {
+ *acb->done = true;
+ }
+
+ qemu_bh_delete(acb->bh);
+ qemu_aio_release(acb);
+}
+
+/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
+static void coroutine_fn bdrv_co_do_rw(void *opaque)
+{
+ BlockDriverAIOCBCoroutine *acb = opaque;
+ BlockDriverState *bs = acb->common.bs;
+
+ if (!acb->is_write) {
+ acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
+ acb->req.nb_sectors, acb->req.qiov, 0);
+ } else {
+ acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
+ acb->req.nb_sectors, acb->req.qiov, 0);
+ }
+
+ acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
+ qemu_bh_schedule(acb->bh);
+}
+
+static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
+ int64_t sector_num,
+ QEMUIOVector *qiov,
+ int nb_sectors,
+ BlockDriverCompletionFunc *cb,
+ void *opaque,
+ bool is_write)
+{
+ Coroutine *co;
+ BlockDriverAIOCBCoroutine *acb;
+
+ acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
+ acb->req.sector = sector_num;
+ acb->req.nb_sectors = nb_sectors;
+ acb->req.qiov = qiov;
+ acb->is_write = is_write;
+ acb->done = NULL;
+
+ co = qemu_coroutine_create(bdrv_co_do_rw);
+ qemu_coroutine_enter(co, acb);
+
+ return &acb->common;
+}
+
+static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
+{
+ BlockDriverAIOCBCoroutine *acb = opaque;
+ BlockDriverState *bs = acb->common.bs;
+
+ acb->req.error = bdrv_co_flush(bs);
+ acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
+ qemu_bh_schedule(acb->bh);
+}
+
+BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ trace_bdrv_aio_flush(bs, opaque);
+
+ Coroutine *co;
+ BlockDriverAIOCBCoroutine *acb;
+
+ acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
+ acb->done = NULL;
+
+ co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
+ qemu_coroutine_enter(co, acb);
+
+ return &acb->common;
+}
+
+static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
+{
+ BlockDriverAIOCBCoroutine *acb = opaque;
+ BlockDriverState *bs = acb->common.bs;
+
+ acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
+ acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
+ qemu_bh_schedule(acb->bh);
+}
+
+BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ Coroutine *co;
+ BlockDriverAIOCBCoroutine *acb;
+
+ trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
+
+ acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
+ acb->req.sector = sector_num;
+ acb->req.nb_sectors = nb_sectors;
+ acb->done = NULL;
+ co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
+ qemu_coroutine_enter(co, acb);
+
+ return &acb->common;
+}
+
+void bdrv_init(void)
+{
+ module_call_init(MODULE_INIT_BLOCK);
+}
+
+void bdrv_init_with_whitelist(void)
+{
+ use_bdrv_whitelist = 1;
+ bdrv_init();
+}
+
+void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ BlockDriverAIOCB *acb;
+
+ acb = g_slice_alloc(aiocb_info->aiocb_size);
+ acb->aiocb_info = aiocb_info;
+ acb->bs = bs;
+ acb->cb = cb;
+ acb->opaque = opaque;
+ return acb;
+}
+
+void qemu_aio_release(void *p)
+{
+ BlockDriverAIOCB *acb = p;
+ g_slice_free1(acb->aiocb_info->aiocb_size, acb);
+}
+
+/**************************************************************/
+/* Coroutine block device emulation */
+
+typedef struct CoroutineIOCompletion {
+ Coroutine *coroutine;
+ int ret;
+} CoroutineIOCompletion;
+
+static void bdrv_co_io_em_complete(void *opaque, int ret)
+{
+ CoroutineIOCompletion *co = opaque;
+
+ co->ret = ret;
+ qemu_coroutine_enter(co->coroutine, NULL);
+}
+
+static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *iov,
+ bool is_write)
+{
+ CoroutineIOCompletion co = {
+ .coroutine = qemu_coroutine_self(),
+ };
+ BlockDriverAIOCB *acb;
+
+ if (is_write) {
+ acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
+ bdrv_co_io_em_complete, &co);
+ } else {
+ acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
+ bdrv_co_io_em_complete, &co);
+ }
+
+ trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
+ if (!acb) {
+ return -EIO;
+ }
+ qemu_coroutine_yield();
+
+ return co.ret;
+}
+
+static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ QEMUIOVector *iov)
+{
+ return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
+}
+
+static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ QEMUIOVector *iov)
+{
+ return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
+}
+
+static void coroutine_fn bdrv_flush_co_entry(void *opaque)
+{
+ RwCo *rwco = opaque;
+
+ rwco->ret = bdrv_co_flush(rwco->bs);
+}
+
+int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
+{
+ int ret;
+
+ if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
+ return 0;
+ }
+
+ /* Write back cached data to the OS even with cache=unsafe */
+ BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
+ if (bs->drv->bdrv_co_flush_to_os) {
+ ret = bs->drv->bdrv_co_flush_to_os(bs);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ /* But don't actually force it to the disk with cache=unsafe */
+ if (bs->open_flags & BDRV_O_NO_FLUSH) {
+ goto flush_parent;
+ }
+
+ BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
+ if (bs->drv->bdrv_co_flush_to_disk) {
+ ret = bs->drv->bdrv_co_flush_to_disk(bs);
+ } else if (bs->drv->bdrv_aio_flush) {
+ BlockDriverAIOCB *acb;
+ CoroutineIOCompletion co = {
+ .coroutine = qemu_coroutine_self(),
+ };
+
+ acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
+ if (acb == NULL) {
+ ret = -EIO;
+ } else {
+ qemu_coroutine_yield();
+ ret = co.ret;
+ }
+ } else {
+ /*
+ * Some block drivers always operate in either writethrough or unsafe
+ * mode and don't support bdrv_flush therefore. Usually qemu doesn't
+ * know how the server works (because the behaviour is hardcoded or
+ * depends on server-side configuration), so we can't ensure that
+ * everything is safe on disk. Returning an error doesn't work because
+ * that would break guests even if the server operates in writethrough
+ * mode.
+ *
+ * Let's hope the user knows what he's doing.
+ */
+ ret = 0;
+ }
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
+ * in the case of cache=unsafe, so there are no useless flushes.
+ */
+flush_parent:
+ return bdrv_co_flush(bs->file);
+}
+
+void bdrv_invalidate_cache(BlockDriverState *bs)
+{
+ if (bs->drv && bs->drv->bdrv_invalidate_cache) {
+ bs->drv->bdrv_invalidate_cache(bs);
+ }
+}
+
+void bdrv_invalidate_cache_all(void)
+{
+ BlockDriverState *bs;
+
+ QTAILQ_FOREACH(bs, &bdrv_states, list) {
+ bdrv_invalidate_cache(bs);
+ }
+}
+
+void bdrv_clear_incoming_migration_all(void)
+{
+ BlockDriverState *bs;
+
+ QTAILQ_FOREACH(bs, &bdrv_states, list) {
+ bs->open_flags = bs->open_flags & ~(BDRV_O_INCOMING);
+ }
+}
+
+int bdrv_flush(BlockDriverState *bs)
+{
+ Coroutine *co;
+ RwCo rwco = {
+ .bs = bs,
+ .ret = NOT_DONE,
+ };
+
+ if (qemu_in_coroutine()) {
+ /* Fast-path if already in coroutine context */
+ bdrv_flush_co_entry(&rwco);
+ } else {
+ co = qemu_coroutine_create(bdrv_flush_co_entry);
+ qemu_coroutine_enter(co, &rwco);
+ while (rwco.ret == NOT_DONE) {
+ qemu_aio_wait();
+ }
+ }
+
+ return rwco.ret;
+}
+
+static void coroutine_fn bdrv_discard_co_entry(void *opaque)
+{
+ RwCo *rwco = opaque;
+
+ rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
+}
+
+int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors)
+{
+ if (!bs->drv) {
+ return -ENOMEDIUM;
+ } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
+ return -EIO;
+ } else if (bs->read_only) {
+ return -EROFS;
+ }
+
+ if (bs->dirty_bitmap) {
+ bdrv_reset_dirty(bs, sector_num, nb_sectors);
+ }
+
+ /* Do nothing if disabled. */
+ if (!(bs->open_flags & BDRV_O_UNMAP)) {
+ return 0;
+ }
+
+ if (bs->drv->bdrv_co_discard) {
+ return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
+ } else if (bs->drv->bdrv_aio_discard) {
+ BlockDriverAIOCB *acb;
+ CoroutineIOCompletion co = {
+ .coroutine = qemu_coroutine_self(),
+ };
+
+ acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
+ bdrv_co_io_em_complete, &co);
+ if (acb == NULL) {
+ return -EIO;
+ } else {
+ qemu_coroutine_yield();
+ return co.ret;
+ }
+ } else {
+ return 0;
+ }
+}
+
+int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
+{
+ Coroutine *co;
+ RwCo rwco = {
+ .bs = bs,
+ .sector_num = sector_num,
+ .nb_sectors = nb_sectors,
+ .ret = NOT_DONE,
+ };
+
+ if (qemu_in_coroutine()) {
+ /* Fast-path if already in coroutine context */
+ bdrv_discard_co_entry(&rwco);
+ } else {
+ co = qemu_coroutine_create(bdrv_discard_co_entry);
+ qemu_coroutine_enter(co, &rwco);
+ while (rwco.ret == NOT_DONE) {
+ qemu_aio_wait();
+ }
+ }
+
+ return rwco.ret;
+}
+
+/**************************************************************/
+/* removable device support */
+
+/**
+ * Return TRUE if the media is present
+ */
+int bdrv_is_inserted(BlockDriverState *bs)
+{
+ BlockDriver *drv = bs->drv;
+
+ if (!drv)
+ return 0;
+ if (!drv->bdrv_is_inserted)
+ return 1;
+ return drv->bdrv_is_inserted(bs);
+}
+
+/**
+ * Return whether the media changed since the last call to this
+ * function, or -ENOTSUP if we don't know. Most drivers don't know.
+ */
+int bdrv_media_changed(BlockDriverState *bs)
+{
+ BlockDriver *drv = bs->drv;
+
+ if (drv && drv->bdrv_media_changed) {
+ return drv->bdrv_media_changed(bs);
+ }
+ return -ENOTSUP;
+}
+
+/**
+ * If eject_flag is TRUE, eject the media. Otherwise, close the tray
+ */
+void bdrv_eject(BlockDriverState *bs, bool eject_flag)
+{
+ BlockDriver *drv = bs->drv;
+
+ if (drv && drv->bdrv_eject) {
+ drv->bdrv_eject(bs, eject_flag);
+ }
+
+ if (bs->device_name[0] != '\0') {
+ bdrv_emit_qmp_eject_event(bs, eject_flag);
+ }
+}
+
+/**
+ * Lock or unlock the media (if it is locked, the user won't be able
+ * to eject it manually).
+ */
+void bdrv_lock_medium(BlockDriverState *bs, bool locked)
+{
+ BlockDriver *drv = bs->drv;
+
+ trace_bdrv_lock_medium(bs, locked);
+
+ if (drv && drv->bdrv_lock_medium) {
+ drv->bdrv_lock_medium(bs, locked);
+ }
+}
+
+/* needed for generic scsi interface */
+
+int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
+{
+ BlockDriver *drv = bs->drv;
+
+ if (drv && drv->bdrv_ioctl)
+ return drv->bdrv_ioctl(bs, req, buf);
+ return -ENOTSUP;
+}
+
+BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
+ unsigned long int req, void *buf,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ BlockDriver *drv = bs->drv;
+
+ if (drv && drv->bdrv_aio_ioctl)
+ return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
+ return NULL;
+}
+
+void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
+{
+ bs->buffer_alignment = align;
+}
+
+void *qemu_blockalign(BlockDriverState *bs, size_t size)
+{
+ return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
+}
+
+/*
+ * Check if all memory in this vector is sector aligned.
+ */
+bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
+{
+ int i;
+
+ for (i = 0; i < qiov->niov; i++) {
+ if ((uintptr_t) qiov->iov[i].iov_base % bs->buffer_alignment) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+void bdrv_set_dirty_tracking(BlockDriverState *bs, int granularity)
+{
+ int64_t bitmap_size;
+
+ assert((granularity & (granularity - 1)) == 0);
+
+ if (granularity) {
+ granularity >>= BDRV_SECTOR_BITS;
+ assert(!bs->dirty_bitmap);
+ bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS);
+ bs->dirty_bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1);
+ } else {
+ if (bs->dirty_bitmap) {
+ hbitmap_free(bs->dirty_bitmap);
+ bs->dirty_bitmap = NULL;
+ }
+ }
+}
+
+int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
+{
+ if (bs->dirty_bitmap) {
+ return hbitmap_get(bs->dirty_bitmap, sector);
+ } else {
+ return 0;
+ }
+}
+
+void bdrv_dirty_iter_init(BlockDriverState *bs, HBitmapIter *hbi)
+{
+ hbitmap_iter_init(hbi, bs->dirty_bitmap, 0);
+}
+
+void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
+ int nr_sectors)
+{
+ hbitmap_set(bs->dirty_bitmap, cur_sector, nr_sectors);
+}
+
+void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
+ int nr_sectors)
+{
+ hbitmap_reset(bs->dirty_bitmap, cur_sector, nr_sectors);
+}
+
+int64_t bdrv_get_dirty_count(BlockDriverState *bs)
+{
+ if (bs->dirty_bitmap) {
+ return hbitmap_count(bs->dirty_bitmap);
+ } else {
+ return 0;
+ }
+}
+
+void bdrv_set_in_use(BlockDriverState *bs, int in_use)
+{
+ assert(bs->in_use != in_use);
+ bs->in_use = in_use;
+}
+
+int bdrv_in_use(BlockDriverState *bs)
+{
+ return bs->in_use;
+}
+
+void bdrv_iostatus_enable(BlockDriverState *bs)
+{
+ bs->iostatus_enabled = true;
+ bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
+}
+
+/* The I/O status is only enabled if the drive explicitly
+ * enables it _and_ the VM is configured to stop on errors */
+bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
+{
+ return (bs->iostatus_enabled &&
+ (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
+ bs->on_write_error == BLOCKDEV_ON_ERROR_STOP ||
+ bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
+}
+
+void bdrv_iostatus_disable(BlockDriverState *bs)
+{
+ bs->iostatus_enabled = false;
+}
+
+void bdrv_iostatus_reset(BlockDriverState *bs)
+{
+ if (bdrv_iostatus_is_enabled(bs)) {
+ bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
+ if (bs->job) {
+ block_job_iostatus_reset(bs->job);
+ }
+ }
+}
+
+void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
+{
+ assert(bdrv_iostatus_is_enabled(bs));
+ if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
+ bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
+ BLOCK_DEVICE_IO_STATUS_FAILED;
+ }
+}
+
+void
+bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
+ enum BlockAcctType type)
+{
+ assert(type < BDRV_MAX_IOTYPE);
+
+ cookie->bytes = bytes;
+ cookie->start_time_ns = get_clock();
+ cookie->type = type;
+}
+
+void
+bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
+{
+ assert(cookie->type < BDRV_MAX_IOTYPE);
+
+ bs->nr_bytes[cookie->type] += cookie->bytes;
+ bs->nr_ops[cookie->type]++;
+ bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
+}
+
+void bdrv_img_create(const char *filename, const char *fmt,
+ const char *base_filename, const char *base_fmt,
+ char *options, uint64_t img_size, int flags,
+ Error **errp, bool quiet)
+{
+ QEMUOptionParameter *param = NULL, *create_options = NULL;
+ QEMUOptionParameter *backing_fmt, *backing_file, *size;
+ BlockDriverState *bs = NULL;
+ BlockDriver *drv, *proto_drv;
+ BlockDriver *backing_drv = NULL;
+ int ret = 0;
+
+ /* Find driver and parse its options */
+ drv = bdrv_find_format(fmt);
+ if (!drv) {
+ error_setg(errp, "Unknown file format '%s'", fmt);
+ return;
+ }
+
+ proto_drv = bdrv_find_protocol(filename, true);
+ if (!proto_drv) {
+ error_setg(errp, "Unknown protocol '%s'", filename);
+ return;
+ }
+
+ create_options = append_option_parameters(create_options,
+ drv->create_options);
+ create_options = append_option_parameters(create_options,
+ proto_drv->create_options);
+
+ /* Create parameter list with default values */
+ param = parse_option_parameters("", create_options, param);
+
+ set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
+
+ /* Parse -o options */
+ if (options) {
+ param = parse_option_parameters(options, create_options, param);
+ if (param == NULL) {
+ error_setg(errp, "Invalid options for file format '%s'.", fmt);
+ goto out;
+ }
+ }
+
+ if (base_filename) {
+ if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
+ base_filename)) {
+ error_setg(errp, "Backing file not supported for file format '%s'",
+ fmt);
+ goto out;
+ }
+ }
+
+ if (base_fmt) {
+ if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
+ error_setg(errp, "Backing file format not supported for file "
+ "format '%s'", fmt);
+ goto out;
+ }
+ }
+
+ backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
+ if (backing_file && backing_file->value.s) {
+ if (!strcmp(filename, backing_file->value.s)) {
+ error_setg(errp, "Error: Trying to create an image with the "
+ "same filename as the backing file");
+ goto out;
+ }
+ }
+
+ backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
+ if (backing_fmt && backing_fmt->value.s) {
+ backing_drv = bdrv_find_format(backing_fmt->value.s);
+ if (!backing_drv) {
+ error_setg(errp, "Unknown backing file format '%s'",
+ backing_fmt->value.s);
+ goto out;
+ }
+ }
+
+ // The size for the image must always be specified, with one exception:
+ // If we are using a backing file, we can obtain the size from there
+ size = get_option_parameter(param, BLOCK_OPT_SIZE);
+ if (size && size->value.n == -1) {
+ if (backing_file && backing_file->value.s) {
+ uint64_t size;
+ char buf[32];
+ int back_flags;
+
+ /* backing files always opened read-only */
+ back_flags =
+ flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
+
+ bs = bdrv_new("");
+
+ ret = bdrv_open(bs, backing_file->value.s, NULL, back_flags,
+ backing_drv);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Could not open '%s'",
+ backing_file->value.s);
+ goto out;
+ }
+ bdrv_get_geometry(bs, &size);
+ size *= 512;
+
+ snprintf(buf, sizeof(buf), "%" PRId64, size);
+ set_option_parameter(param, BLOCK_OPT_SIZE, buf);
+ } else {
+ error_setg(errp, "Image creation needs a size parameter");
+ goto out;
+ }
+ }
+
+ if (!quiet) {
+ printf("Formatting '%s', fmt=%s ", filename, fmt);
+ print_option_parameters(param);
+ puts("");
+ }
+ ret = bdrv_create(drv, filename, param);
+ if (ret < 0) {
+ if (ret == -ENOTSUP) {
+ error_setg(errp,"Formatting or formatting option not supported for "
+ "file format '%s'", fmt);
+ } else if (ret == -EFBIG) {
+ const char *cluster_size_hint = "";
+ if (get_option_parameter(create_options, BLOCK_OPT_CLUSTER_SIZE)) {
+ cluster_size_hint = " (try using a larger cluster size)";
+ }
+ error_setg(errp, "The image size is too large for file format '%s'%s",
+ fmt, cluster_size_hint);
+ } else {
+ error_setg(errp, "%s: error while creating %s: %s", filename, fmt,
+ strerror(-ret));
+ }
+ }
+
+out:
+ free_option_parameters(create_options);
+ free_option_parameters(param);
+
+ if (bs) {
+ bdrv_delete(bs);
+ }
+}
+
+AioContext *bdrv_get_aio_context(BlockDriverState *bs)
+{
+ /* Currently BlockDriverState always uses the main loop AioContext */
+ return qemu_get_aio_context();
+}
+
+void bdrv_add_before_write_notifier(BlockDriverState *bs,
+ NotifierWithReturn *notifier)
+{
+ notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
+}
diff --git a/contrib/qemu/block/qcow.c b/contrib/qemu/block/qcow.c
new file mode 100644
index 000000000..5239bd68f
--- /dev/null
+++ b/contrib/qemu/block/qcow.c
@@ -0,0 +1,914 @@
+/*
+ * Block driver for the QCOW format
+ *
+ * Copyright (c) 2004-2006 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu-common.h"
+#include "block/block_int.h"
+#include "qemu/module.h"
+#include <zlib.h>
+#include "qemu/aes.h"
+#include "migration/migration.h"
+
+/**************************************************************/
+/* QEMU COW block driver with compression and encryption support */
+
+#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb)
+#define QCOW_VERSION 1
+
+#define QCOW_CRYPT_NONE 0
+#define QCOW_CRYPT_AES 1
+
+#define QCOW_OFLAG_COMPRESSED (1LL << 63)
+
+typedef struct QCowHeader {
+ uint32_t magic;
+ uint32_t version;
+ uint64_t backing_file_offset;
+ uint32_t backing_file_size;
+ uint32_t mtime;
+ uint64_t size; /* in bytes */
+ uint8_t cluster_bits;
+ uint8_t l2_bits;
+ uint32_t crypt_method;
+ uint64_t l1_table_offset;
+} QCowHeader;
+
+#define L2_CACHE_SIZE 16
+
+typedef struct BDRVQcowState {
+ int cluster_bits;
+ int cluster_size;
+ int cluster_sectors;
+ int l2_bits;
+ int l2_size;
+ int l1_size;
+ uint64_t cluster_offset_mask;
+ uint64_t l1_table_offset;
+ uint64_t *l1_table;
+ uint64_t *l2_cache;
+ uint64_t l2_cache_offsets[L2_CACHE_SIZE];
+ uint32_t l2_cache_counts[L2_CACHE_SIZE];
+ uint8_t *cluster_cache;
+ uint8_t *cluster_data;
+ uint64_t cluster_cache_offset;
+ uint32_t crypt_method; /* current crypt method, 0 if no key yet */
+ uint32_t crypt_method_header;
+ AES_KEY aes_encrypt_key;
+ AES_KEY aes_decrypt_key;
+ CoMutex lock;
+ Error *migration_blocker;
+} BDRVQcowState;
+
+static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
+
+static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename)
+{
+ const QCowHeader *cow_header = (const void *)buf;
+
+ if (buf_size >= sizeof(QCowHeader) &&
+ be32_to_cpu(cow_header->magic) == QCOW_MAGIC &&
+ be32_to_cpu(cow_header->version) == QCOW_VERSION)
+ return 100;
+ else
+ return 0;
+}
+
+static int qcow_open(BlockDriverState *bs, QDict *options, int flags)
+{
+ BDRVQcowState *s = bs->opaque;
+ int len, i, shift, ret;
+ QCowHeader header;
+
+ ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
+ if (ret < 0) {
+ goto fail;
+ }
+ be32_to_cpus(&header.magic);
+ be32_to_cpus(&header.version);
+ be64_to_cpus(&header.backing_file_offset);
+ be32_to_cpus(&header.backing_file_size);
+ be32_to_cpus(&header.mtime);
+ be64_to_cpus(&header.size);
+ be32_to_cpus(&header.crypt_method);
+ be64_to_cpus(&header.l1_table_offset);
+
+ if (header.magic != QCOW_MAGIC) {
+ ret = -EMEDIUMTYPE;
+ goto fail;
+ }
+ if (header.version != QCOW_VERSION) {
+ char version[64];
+ snprintf(version, sizeof(version), "QCOW version %d", header.version);
+ qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
+ bs->device_name, "qcow", version);
+ ret = -ENOTSUP;
+ goto fail;
+ }
+
+ if (header.size <= 1 || header.cluster_bits < 9) {
+ ret = -EINVAL;
+ goto fail;
+ }
+ if (header.crypt_method > QCOW_CRYPT_AES) {
+ ret = -EINVAL;
+ goto fail;
+ }
+ s->crypt_method_header = header.crypt_method;
+ if (s->crypt_method_header) {
+ bs->encrypted = 1;
+ }
+ s->cluster_bits = header.cluster_bits;
+ s->cluster_size = 1 << s->cluster_bits;
+ s->cluster_sectors = 1 << (s->cluster_bits - 9);
+ s->l2_bits = header.l2_bits;
+ s->l2_size = 1 << s->l2_bits;
+ bs->total_sectors = header.size / 512;
+ s->cluster_offset_mask = (1LL << (63 - s->cluster_bits)) - 1;
+
+ /* read the level 1 table */
+ shift = s->cluster_bits + s->l2_bits;
+ s->l1_size = (header.size + (1LL << shift) - 1) >> shift;
+
+ s->l1_table_offset = header.l1_table_offset;
+ s->l1_table = g_malloc(s->l1_size * sizeof(uint64_t));
+
+ ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
+ s->l1_size * sizeof(uint64_t));
+ if (ret < 0) {
+ goto fail;
+ }
+
+ for(i = 0;i < s->l1_size; i++) {
+ be64_to_cpus(&s->l1_table[i]);
+ }
+ /* alloc L2 cache */
+ s->l2_cache = g_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
+ s->cluster_cache = g_malloc(s->cluster_size);
+ s->cluster_data = g_malloc(s->cluster_size);
+ s->cluster_cache_offset = -1;
+
+ /* read the backing file name */
+ if (header.backing_file_offset != 0) {
+ len = header.backing_file_size;
+ if (len > 1023) {
+ len = 1023;
+ }
+ ret = bdrv_pread(bs->file, header.backing_file_offset,
+ bs->backing_file, len);
+ if (ret < 0) {
+ goto fail;
+ }
+ bs->backing_file[len] = '\0';
+ }
+
+ /* Disable migration when qcow images are used */
+ error_set(&s->migration_blocker,
+ QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
+ "qcow", bs->device_name, "live migration");
+ migrate_add_blocker(s->migration_blocker);
+
+ qemu_co_mutex_init(&s->lock);
+ return 0;
+
+ fail:
+ g_free(s->l1_table);
+ g_free(s->l2_cache);
+ g_free(s->cluster_cache);
+ g_free(s->cluster_data);
+ return ret;
+}
+
+
+/* We have nothing to do for QCOW reopen, stubs just return
+ * success */
+static int qcow_reopen_prepare(BDRVReopenState *state,
+ BlockReopenQueue *queue, Error **errp)
+{
+ return 0;
+}
+
+static int qcow_set_key(BlockDriverState *bs, const char *key)
+{
+ BDRVQcowState *s = bs->opaque;
+ uint8_t keybuf[16];
+ int len, i;
+
+ memset(keybuf, 0, 16);
+ len = strlen(key);
+ if (len > 16)
+ len = 16;
+ /* XXX: we could compress the chars to 7 bits to increase
+ entropy */
+ for(i = 0;i < len;i++) {
+ keybuf[i] = key[i];
+ }
+ s->crypt_method = s->crypt_method_header;
+
+ if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0)
+ return -1;
+ if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0)
+ return -1;
+ return 0;
+}
+
+/* The crypt function is compatible with the linux cryptoloop
+ algorithm for < 4 GB images. NOTE: out_buf == in_buf is
+ supported */
+static void encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
+ uint8_t *out_buf, const uint8_t *in_buf,
+ int nb_sectors, int enc,
+ const AES_KEY *key)
+{
+ union {
+ uint64_t ll[2];
+ uint8_t b[16];
+ } ivec;
+ int i;
+
+ for(i = 0; i < nb_sectors; i++) {
+ ivec.ll[0] = cpu_to_le64(sector_num);
+ ivec.ll[1] = 0;
+ AES_cbc_encrypt(in_buf, out_buf, 512, key,
+ ivec.b, enc);
+ sector_num++;
+ in_buf += 512;
+ out_buf += 512;
+ }
+}
+
+/* 'allocate' is:
+ *
+ * 0 to not allocate.
+ *
+ * 1 to allocate a normal cluster (for sector indexes 'n_start' to
+ * 'n_end')
+ *
+ * 2 to allocate a compressed cluster of size
+ * 'compressed_size'. 'compressed_size' must be > 0 and <
+ * cluster_size
+ *
+ * return 0 if not allocated.
+ */
+static uint64_t get_cluster_offset(BlockDriverState *bs,
+ uint64_t offset, int allocate,
+ int compressed_size,
+ int n_start, int n_end)
+{
+ BDRVQcowState *s = bs->opaque;
+ int min_index, i, j, l1_index, l2_index;
+ uint64_t l2_offset, *l2_table, cluster_offset, tmp;
+ uint32_t min_count;
+ int new_l2_table;
+
+ l1_index = offset >> (s->l2_bits + s->cluster_bits);
+ l2_offset = s->l1_table[l1_index];
+ new_l2_table = 0;
+ if (!l2_offset) {
+ if (!allocate)
+ return 0;
+ /* allocate a new l2 entry */
+ l2_offset = bdrv_getlength(bs->file);
+ /* round to cluster size */
+ l2_offset = (l2_offset + s->cluster_size - 1) & ~(s->cluster_size - 1);
+ /* update the L1 entry */
+ s->l1_table[l1_index] = l2_offset;
+ tmp = cpu_to_be64(l2_offset);
+ if (bdrv_pwrite_sync(bs->file,
+ s->l1_table_offset + l1_index * sizeof(tmp),
+ &tmp, sizeof(tmp)) < 0)
+ return 0;
+ new_l2_table = 1;
+ }
+ for(i = 0; i < L2_CACHE_SIZE; i++) {
+ if (l2_offset == s->l2_cache_offsets[i]) {
+ /* increment the hit count */
+ if (++s->l2_cache_counts[i] == 0xffffffff) {
+ for(j = 0; j < L2_CACHE_SIZE; j++) {
+ s->l2_cache_counts[j] >>= 1;
+ }
+ }
+ l2_table = s->l2_cache + (i << s->l2_bits);
+ goto found;
+ }
+ }
+ /* not found: load a new entry in the least used one */
+ min_index = 0;
+ min_count = 0xffffffff;
+ for(i = 0; i < L2_CACHE_SIZE; i++) {
+ if (s->l2_cache_counts[i] < min_count) {
+ min_count = s->l2_cache_counts[i];
+ min_index = i;
+ }
+ }
+ l2_table = s->l2_cache + (min_index << s->l2_bits);
+ if (new_l2_table) {
+ memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
+ if (bdrv_pwrite_sync(bs->file, l2_offset, l2_table,
+ s->l2_size * sizeof(uint64_t)) < 0)
+ return 0;
+ } else {
+ if (bdrv_pread(bs->file, l2_offset, l2_table, s->l2_size * sizeof(uint64_t)) !=
+ s->l2_size * sizeof(uint64_t))
+ return 0;
+ }
+ s->l2_cache_offsets[min_index] = l2_offset;
+ s->l2_cache_counts[min_index] = 1;
+ found:
+ l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
+ cluster_offset = be64_to_cpu(l2_table[l2_index]);
+ if (!cluster_offset ||
+ ((cluster_offset & QCOW_OFLAG_COMPRESSED) && allocate == 1)) {
+ if (!allocate)
+ return 0;
+ /* allocate a new cluster */
+ if ((cluster_offset & QCOW_OFLAG_COMPRESSED) &&
+ (n_end - n_start) < s->cluster_sectors) {
+ /* if the cluster is already compressed, we must
+ decompress it in the case it is not completely
+ overwritten */
+ if (decompress_cluster(bs, cluster_offset) < 0)
+ return 0;
+ cluster_offset = bdrv_getlength(bs->file);
+ cluster_offset = (cluster_offset + s->cluster_size - 1) &
+ ~(s->cluster_size - 1);
+ /* write the cluster content */
+ if (bdrv_pwrite(bs->file, cluster_offset, s->cluster_cache, s->cluster_size) !=
+ s->cluster_size)
+ return -1;
+ } else {
+ cluster_offset = bdrv_getlength(bs->file);
+ if (allocate == 1) {
+ /* round to cluster size */
+ cluster_offset = (cluster_offset + s->cluster_size - 1) &
+ ~(s->cluster_size - 1);
+ bdrv_truncate(bs->file, cluster_offset + s->cluster_size);
+ /* if encrypted, we must initialize the cluster
+ content which won't be written */
+ if (s->crypt_method &&
+ (n_end - n_start) < s->cluster_sectors) {
+ uint64_t start_sect;
+ start_sect = (offset & ~(s->cluster_size - 1)) >> 9;
+ memset(s->cluster_data + 512, 0x00, 512);
+ for(i = 0; i < s->cluster_sectors; i++) {
+ if (i < n_start || i >= n_end) {
+ encrypt_sectors(s, start_sect + i,
+ s->cluster_data,
+ s->cluster_data + 512, 1, 1,
+ &s->aes_encrypt_key);
+ if (bdrv_pwrite(bs->file, cluster_offset + i * 512,
+ s->cluster_data, 512) != 512)
+ return -1;
+ }
+ }
+ }
+ } else if (allocate == 2) {
+ cluster_offset |= QCOW_OFLAG_COMPRESSED |
+ (uint64_t)compressed_size << (63 - s->cluster_bits);
+ }
+ }
+ /* update L2 table */
+ tmp = cpu_to_be64(cluster_offset);
+ l2_table[l2_index] = tmp;
+ if (bdrv_pwrite_sync(bs->file, l2_offset + l2_index * sizeof(tmp),
+ &tmp, sizeof(tmp)) < 0)
+ return 0;
+ }
+ return cluster_offset;
+}
+
+static int coroutine_fn qcow_co_is_allocated(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, int *pnum)
+{
+ BDRVQcowState *s = bs->opaque;
+ int index_in_cluster, n;
+ uint64_t cluster_offset;
+
+ qemu_co_mutex_lock(&s->lock);
+ cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0);
+ qemu_co_mutex_unlock(&s->lock);
+ index_in_cluster = sector_num & (s->cluster_sectors - 1);
+ n = s->cluster_sectors - index_in_cluster;
+ if (n > nb_sectors)
+ n = nb_sectors;
+ *pnum = n;
+ return (cluster_offset != 0);
+}
+
+static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
+ const uint8_t *buf, int buf_size)
+{
+ z_stream strm1, *strm = &strm1;
+ int ret, out_len;
+
+ memset(strm, 0, sizeof(*strm));
+
+ strm->next_in = (uint8_t *)buf;
+ strm->avail_in = buf_size;
+ strm->next_out = out_buf;
+ strm->avail_out = out_buf_size;
+
+ ret = inflateInit2(strm, -12);
+ if (ret != Z_OK)
+ return -1;
+ ret = inflate(strm, Z_FINISH);
+ out_len = strm->next_out - out_buf;
+ if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) ||
+ out_len != out_buf_size) {
+ inflateEnd(strm);
+ return -1;
+ }
+ inflateEnd(strm);
+ return 0;
+}
+
+static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
+{
+ BDRVQcowState *s = bs->opaque;
+ int ret, csize;
+ uint64_t coffset;
+
+ coffset = cluster_offset & s->cluster_offset_mask;
+ if (s->cluster_cache_offset != coffset) {
+ csize = cluster_offset >> (63 - s->cluster_bits);
+ csize &= (s->cluster_size - 1);
+ ret = bdrv_pread(bs->file, coffset, s->cluster_data, csize);
+ if (ret != csize)
+ return -1;
+ if (decompress_buffer(s->cluster_cache, s->cluster_size,
+ s->cluster_data, csize) < 0) {
+ return -1;
+ }
+ s->cluster_cache_offset = coffset;
+ }
+ return 0;
+}
+
+static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *qiov)
+{
+ BDRVQcowState *s = bs->opaque;
+ int index_in_cluster;
+ int ret = 0, n;
+ uint64_t cluster_offset;
+ struct iovec hd_iov;
+ QEMUIOVector hd_qiov;
+ uint8_t *buf;
+ void *orig_buf;
+
+ if (qiov->niov > 1) {
+ buf = orig_buf = qemu_blockalign(bs, qiov->size);
+ } else {
+ orig_buf = NULL;
+ buf = (uint8_t *)qiov->iov->iov_base;
+ }
+
+ qemu_co_mutex_lock(&s->lock);
+
+ while (nb_sectors != 0) {
+ /* prepare next request */
+ cluster_offset = get_cluster_offset(bs, sector_num << 9,
+ 0, 0, 0, 0);
+ index_in_cluster = sector_num & (s->cluster_sectors - 1);
+ n = s->cluster_sectors - index_in_cluster;
+ if (n > nb_sectors) {
+ n = nb_sectors;
+ }
+
+ if (!cluster_offset) {
+ if (bs->backing_hd) {
+ /* read from the base image */
+ hd_iov.iov_base = (void *)buf;
+ hd_iov.iov_len = n * 512;
+ qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
+ qemu_co_mutex_unlock(&s->lock);
+ ret = bdrv_co_readv(bs->backing_hd, sector_num,
+ n, &hd_qiov);
+ qemu_co_mutex_lock(&s->lock);
+ if (ret < 0) {
+ goto fail;
+ }
+ } else {
+ /* Note: in this case, no need to wait */
+ memset(buf, 0, 512 * n);
+ }
+ } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
+ /* add AIO support for compressed blocks ? */
+ if (decompress_cluster(bs, cluster_offset) < 0) {
+ goto fail;
+ }
+ memcpy(buf,
+ s->cluster_cache + index_in_cluster * 512, 512 * n);
+ } else {
+ if ((cluster_offset & 511) != 0) {
+ goto fail;
+ }
+ hd_iov.iov_base = (void *)buf;
+ hd_iov.iov_len = n * 512;
+ qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
+ qemu_co_mutex_unlock(&s->lock);
+ ret = bdrv_co_readv(bs->file,
+ (cluster_offset >> 9) + index_in_cluster,
+ n, &hd_qiov);
+ qemu_co_mutex_lock(&s->lock);
+ if (ret < 0) {
+ break;
+ }
+ if (s->crypt_method) {
+ encrypt_sectors(s, sector_num, buf, buf,
+ n, 0,
+ &s->aes_decrypt_key);
+ }
+ }
+ ret = 0;
+
+ nb_sectors -= n;
+ sector_num += n;
+ buf += n * 512;
+ }
+
+done:
+ qemu_co_mutex_unlock(&s->lock);
+
+ if (qiov->niov > 1) {
+ qemu_iovec_from_buf(qiov, 0, orig_buf, qiov->size);
+ qemu_vfree(orig_buf);
+ }
+
+ return ret;
+
+fail:
+ ret = -EIO;
+ goto done;
+}
+
+static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *qiov)
+{
+ BDRVQcowState *s = bs->opaque;
+ int index_in_cluster;
+ uint64_t cluster_offset;
+ const uint8_t *src_buf;
+ int ret = 0, n;
+ uint8_t *cluster_data = NULL;
+ struct iovec hd_iov;
+ QEMUIOVector hd_qiov;
+ uint8_t *buf;
+ void *orig_buf;
+
+ s->cluster_cache_offset = -1; /* disable compressed cache */
+
+ if (qiov->niov > 1) {
+ buf = orig_buf = qemu_blockalign(bs, qiov->size);
+ qemu_iovec_to_buf(qiov, 0, buf, qiov->size);
+ } else {
+ orig_buf = NULL;
+ buf = (uint8_t *)qiov->iov->iov_base;
+ }
+
+ qemu_co_mutex_lock(&s->lock);
+
+ while (nb_sectors != 0) {
+
+ index_in_cluster = sector_num & (s->cluster_sectors - 1);
+ n = s->cluster_sectors - index_in_cluster;
+ if (n > nb_sectors) {
+ n = nb_sectors;
+ }
+ cluster_offset = get_cluster_offset(bs, sector_num << 9, 1, 0,
+ index_in_cluster,
+ index_in_cluster + n);
+ if (!cluster_offset || (cluster_offset & 511) != 0) {
+ ret = -EIO;
+ break;
+ }
+ if (s->crypt_method) {
+ if (!cluster_data) {
+ cluster_data = g_malloc0(s->cluster_size);
+ }
+ encrypt_sectors(s, sector_num, cluster_data, buf,
+ n, 1, &s->aes_encrypt_key);
+ src_buf = cluster_data;
+ } else {
+ src_buf = buf;
+ }
+
+ hd_iov.iov_base = (void *)src_buf;
+ hd_iov.iov_len = n * 512;
+ qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
+ qemu_co_mutex_unlock(&s->lock);
+ ret = bdrv_co_writev(bs->file,
+ (cluster_offset >> 9) + index_in_cluster,
+ n, &hd_qiov);
+ qemu_co_mutex_lock(&s->lock);
+ if (ret < 0) {
+ break;
+ }
+ ret = 0;
+
+ nb_sectors -= n;
+ sector_num += n;
+ buf += n * 512;
+ }
+ qemu_co_mutex_unlock(&s->lock);
+
+ if (qiov->niov > 1) {
+ qemu_vfree(orig_buf);
+ }
+ g_free(cluster_data);
+
+ return ret;
+}
+
+static void qcow_close(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+
+ g_free(s->l1_table);
+ g_free(s->l2_cache);
+ g_free(s->cluster_cache);
+ g_free(s->cluster_data);
+
+ migrate_del_blocker(s->migration_blocker);
+ error_free(s->migration_blocker);
+}
+
+static int qcow_create(const char *filename, QEMUOptionParameter *options)
+{
+ int header_size, backing_filename_len, l1_size, shift, i;
+ QCowHeader header;
+ uint8_t *tmp;
+ int64_t total_size = 0;
+ const char *backing_file = NULL;
+ int flags = 0;
+ int ret;
+ BlockDriverState *qcow_bs;
+
+ /* Read out options */
+ while (options && options->name) {
+ if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
+ total_size = options->value.n / 512;
+ } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
+ backing_file = options->value.s;
+ } else if (!strcmp(options->name, BLOCK_OPT_ENCRYPT)) {
+ flags |= options->value.n ? BLOCK_FLAG_ENCRYPT : 0;
+ }
+ options++;
+ }
+
+ ret = bdrv_create_file(filename, options);
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = bdrv_file_open(&qcow_bs, filename, NULL, BDRV_O_RDWR);
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = bdrv_truncate(qcow_bs, 0);
+ if (ret < 0) {
+ goto exit;
+ }
+
+ memset(&header, 0, sizeof(header));
+ header.magic = cpu_to_be32(QCOW_MAGIC);
+ header.version = cpu_to_be32(QCOW_VERSION);
+ header.size = cpu_to_be64(total_size * 512);
+ header_size = sizeof(header);
+ backing_filename_len = 0;
+ if (backing_file) {
+ if (strcmp(backing_file, "fat:")) {
+ header.backing_file_offset = cpu_to_be64(header_size);
+ backing_filename_len = strlen(backing_file);
+ header.backing_file_size = cpu_to_be32(backing_filename_len);
+ header_size += backing_filename_len;
+ } else {
+ /* special backing file for vvfat */
+ backing_file = NULL;
+ }
+ header.cluster_bits = 9; /* 512 byte cluster to avoid copying
+ unmodifyed sectors */
+ header.l2_bits = 12; /* 32 KB L2 tables */
+ } else {
+ header.cluster_bits = 12; /* 4 KB clusters */
+ header.l2_bits = 9; /* 4 KB L2 tables */
+ }
+ header_size = (header_size + 7) & ~7;
+ shift = header.cluster_bits + header.l2_bits;
+ l1_size = ((total_size * 512) + (1LL << shift) - 1) >> shift;
+
+ header.l1_table_offset = cpu_to_be64(header_size);
+ if (flags & BLOCK_FLAG_ENCRYPT) {
+ header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES);
+ } else {
+ header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
+ }
+
+ /* write all the data */
+ ret = bdrv_pwrite(qcow_bs, 0, &header, sizeof(header));
+ if (ret != sizeof(header)) {
+ goto exit;
+ }
+
+ if (backing_file) {
+ ret = bdrv_pwrite(qcow_bs, sizeof(header),
+ backing_file, backing_filename_len);
+ if (ret != backing_filename_len) {
+ goto exit;
+ }
+ }
+
+ tmp = g_malloc0(BDRV_SECTOR_SIZE);
+ for (i = 0; i < ((sizeof(uint64_t)*l1_size + BDRV_SECTOR_SIZE - 1)/
+ BDRV_SECTOR_SIZE); i++) {
+ ret = bdrv_pwrite(qcow_bs, header_size +
+ BDRV_SECTOR_SIZE*i, tmp, BDRV_SECTOR_SIZE);
+ if (ret != BDRV_SECTOR_SIZE) {
+ g_free(tmp);
+ goto exit;
+ }
+ }
+
+ g_free(tmp);
+ ret = 0;
+exit:
+ bdrv_delete(qcow_bs);
+ return ret;
+}
+
+static int qcow_make_empty(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+ uint32_t l1_length = s->l1_size * sizeof(uint64_t);
+ int ret;
+
+ memset(s->l1_table, 0, l1_length);
+ if (bdrv_pwrite_sync(bs->file, s->l1_table_offset, s->l1_table,
+ l1_length) < 0)
+ return -1;
+ ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length);
+ if (ret < 0)
+ return ret;
+
+ memset(s->l2_cache, 0, s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
+ memset(s->l2_cache_offsets, 0, L2_CACHE_SIZE * sizeof(uint64_t));
+ memset(s->l2_cache_counts, 0, L2_CACHE_SIZE * sizeof(uint32_t));
+
+ return 0;
+}
+
+/* XXX: put compressed sectors first, then all the cluster aligned
+ tables to avoid losing bytes in alignment */
+static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
+ const uint8_t *buf, int nb_sectors)
+{
+ BDRVQcowState *s = bs->opaque;
+ z_stream strm;
+ int ret, out_len;
+ uint8_t *out_buf;
+ uint64_t cluster_offset;
+
+ if (nb_sectors != s->cluster_sectors) {
+ ret = -EINVAL;
+
+ /* Zero-pad last write if image size is not cluster aligned */
+ if (sector_num + nb_sectors == bs->total_sectors &&
+ nb_sectors < s->cluster_sectors) {
+ uint8_t *pad_buf = qemu_blockalign(bs, s->cluster_size);
+ memset(pad_buf, 0, s->cluster_size);
+ memcpy(pad_buf, buf, nb_sectors * BDRV_SECTOR_SIZE);
+ ret = qcow_write_compressed(bs, sector_num,
+ pad_buf, s->cluster_sectors);
+ qemu_vfree(pad_buf);
+ }
+ return ret;
+ }
+
+ out_buf = g_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
+
+ /* best compression, small window, no zlib header */
+ memset(&strm, 0, sizeof(strm));
+ ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
+ Z_DEFLATED, -12,
+ 9, Z_DEFAULT_STRATEGY);
+ if (ret != 0) {
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ strm.avail_in = s->cluster_size;
+ strm.next_in = (uint8_t *)buf;
+ strm.avail_out = s->cluster_size;
+ strm.next_out = out_buf;
+
+ ret = deflate(&strm, Z_FINISH);
+ if (ret != Z_STREAM_END && ret != Z_OK) {
+ deflateEnd(&strm);
+ ret = -EINVAL;
+ goto fail;
+ }
+ out_len = strm.next_out - out_buf;
+
+ deflateEnd(&strm);
+
+ if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
+ /* could not compress: write normal cluster */
+ ret = bdrv_write(bs, sector_num, buf, s->cluster_sectors);
+ if (ret < 0) {
+ goto fail;
+ }
+ } else {
+ cluster_offset = get_cluster_offset(bs, sector_num << 9, 2,
+ out_len, 0, 0);
+ if (cluster_offset == 0) {
+ ret = -EIO;
+ goto fail;
+ }
+
+ cluster_offset &= s->cluster_offset_mask;
+ ret = bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+
+ ret = 0;
+fail:
+ g_free(out_buf);
+ return ret;
+}
+
+static int qcow_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+{
+ BDRVQcowState *s = bs->opaque;
+ bdi->cluster_size = s->cluster_size;
+ return 0;
+}
+
+
+static QEMUOptionParameter qcow_create_options[] = {
+ {
+ .name = BLOCK_OPT_SIZE,
+ .type = OPT_SIZE,
+ .help = "Virtual disk size"
+ },
+ {
+ .name = BLOCK_OPT_BACKING_FILE,
+ .type = OPT_STRING,
+ .help = "File name of a base image"
+ },
+ {
+ .name = BLOCK_OPT_ENCRYPT,
+ .type = OPT_FLAG,
+ .help = "Encrypt the image"
+ },
+ { NULL }
+};
+
+static BlockDriver bdrv_qcow = {
+ .format_name = "qcow",
+ .instance_size = sizeof(BDRVQcowState),
+ .bdrv_probe = qcow_probe,
+ .bdrv_open = qcow_open,
+ .bdrv_close = qcow_close,
+ .bdrv_reopen_prepare = qcow_reopen_prepare,
+ .bdrv_create = qcow_create,
+ .bdrv_has_zero_init = bdrv_has_zero_init_1,
+
+ .bdrv_co_readv = qcow_co_readv,
+ .bdrv_co_writev = qcow_co_writev,
+ .bdrv_co_is_allocated = qcow_co_is_allocated,
+
+ .bdrv_set_key = qcow_set_key,
+ .bdrv_make_empty = qcow_make_empty,
+ .bdrv_write_compressed = qcow_write_compressed,
+ .bdrv_get_info = qcow_get_info,
+
+ .create_options = qcow_create_options,
+};
+
+static void bdrv_qcow_init(void)
+{
+ bdrv_register(&bdrv_qcow);
+}
+
+block_init(bdrv_qcow_init);
diff --git a/contrib/qemu/block/qcow2-cache.c b/contrib/qemu/block/qcow2-cache.c
new file mode 100644
index 000000000..2f3114ecc
--- /dev/null
+++ b/contrib/qemu/block/qcow2-cache.c
@@ -0,0 +1,323 @@
+/*
+ * L2/refcount table cache for the QCOW2 format
+ *
+ * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "block/block_int.h"
+#include "qemu-common.h"
+#include "qcow2.h"
+#include "trace.h"
+
+typedef struct Qcow2CachedTable {
+ void* table;
+ int64_t offset;
+ bool dirty;
+ int cache_hits;
+ int ref;
+} Qcow2CachedTable;
+
+struct Qcow2Cache {
+ Qcow2CachedTable* entries;
+ struct Qcow2Cache* depends;
+ int size;
+ bool depends_on_flush;
+};
+
+Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables)
+{
+ BDRVQcowState *s = bs->opaque;
+ Qcow2Cache *c;
+ int i;
+
+ c = g_malloc0(sizeof(*c));
+ c->size = num_tables;
+ c->entries = g_malloc0(sizeof(*c->entries) * num_tables);
+
+ for (i = 0; i < c->size; i++) {
+ c->entries[i].table = qemu_blockalign(bs, s->cluster_size);
+ }
+
+ return c;
+}
+
+int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c)
+{
+ int i;
+
+ for (i = 0; i < c->size; i++) {
+ assert(c->entries[i].ref == 0);
+ qemu_vfree(c->entries[i].table);
+ }
+
+ g_free(c->entries);
+ g_free(c);
+
+ return 0;
+}
+
+static int qcow2_cache_flush_dependency(BlockDriverState *bs, Qcow2Cache *c)
+{
+ int ret;
+
+ ret = qcow2_cache_flush(bs, c->depends);
+ if (ret < 0) {
+ return ret;
+ }
+
+ c->depends = NULL;
+ c->depends_on_flush = false;
+
+ return 0;
+}
+
+static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
+{
+ BDRVQcowState *s = bs->opaque;
+ int ret = 0;
+
+ if (!c->entries[i].dirty || !c->entries[i].offset) {
+ return 0;
+ }
+
+ trace_qcow2_cache_entry_flush(qemu_coroutine_self(),
+ c == s->l2_table_cache, i);
+
+ if (c->depends) {
+ ret = qcow2_cache_flush_dependency(bs, c);
+ } else if (c->depends_on_flush) {
+ ret = bdrv_flush(bs->file);
+ if (ret >= 0) {
+ c->depends_on_flush = false;
+ }
+ }
+
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (c == s->refcount_block_cache) {
+ BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART);
+ } else if (c == s->l2_table_cache) {
+ BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
+ }
+
+ ret = bdrv_pwrite(bs->file, c->entries[i].offset, c->entries[i].table,
+ s->cluster_size);
+ if (ret < 0) {
+ return ret;
+ }
+
+ c->entries[i].dirty = false;
+
+ return 0;
+}
+
+int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c)
+{
+ BDRVQcowState *s = bs->opaque;
+ int result = 0;
+ int ret;
+ int i;
+
+ trace_qcow2_cache_flush(qemu_coroutine_self(), c == s->l2_table_cache);
+
+ for (i = 0; i < c->size; i++) {
+ ret = qcow2_cache_entry_flush(bs, c, i);
+ if (ret < 0 && result != -ENOSPC) {
+ result = ret;
+ }
+ }
+
+ if (result == 0) {
+ ret = bdrv_flush(bs->file);
+ if (ret < 0) {
+ result = ret;
+ }
+ }
+
+ return result;
+}
+
+int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
+ Qcow2Cache *dependency)
+{
+ int ret;
+
+ if (dependency->depends) {
+ ret = qcow2_cache_flush_dependency(bs, dependency);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ if (c->depends && (c->depends != dependency)) {
+ ret = qcow2_cache_flush_dependency(bs, c);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ c->depends = dependency;
+ return 0;
+}
+
+void qcow2_cache_depends_on_flush(Qcow2Cache *c)
+{
+ c->depends_on_flush = true;
+}
+
+static int qcow2_cache_find_entry_to_replace(Qcow2Cache *c)
+{
+ int i;
+ int min_count = INT_MAX;
+ int min_index = -1;
+
+
+ for (i = 0; i < c->size; i++) {
+ if (c->entries[i].ref) {
+ continue;
+ }
+
+ if (c->entries[i].cache_hits < min_count) {
+ min_index = i;
+ min_count = c->entries[i].cache_hits;
+ }
+
+ /* Give newer hits priority */
+ /* TODO Check how to optimize the replacement strategy */
+ c->entries[i].cache_hits /= 2;
+ }
+
+ if (min_index == -1) {
+ /* This can't happen in current synchronous code, but leave the check
+ * here as a reminder for whoever starts using AIO with the cache */
+ abort();
+ }
+ return min_index;
+}
+
+static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
+ uint64_t offset, void **table, bool read_from_disk)
+{
+ BDRVQcowState *s = bs->opaque;
+ int i;
+ int ret;
+
+ trace_qcow2_cache_get(qemu_coroutine_self(), c == s->l2_table_cache,
+ offset, read_from_disk);
+
+ /* Check if the table is already cached */
+ for (i = 0; i < c->size; i++) {
+ if (c->entries[i].offset == offset) {
+ goto found;
+ }
+ }
+
+ /* If not, write a table back and replace it */
+ i = qcow2_cache_find_entry_to_replace(c);
+ trace_qcow2_cache_get_replace_entry(qemu_coroutine_self(),
+ c == s->l2_table_cache, i);
+ if (i < 0) {
+ return i;
+ }
+
+ ret = qcow2_cache_entry_flush(bs, c, i);
+ if (ret < 0) {
+ return ret;
+ }
+
+ trace_qcow2_cache_get_read(qemu_coroutine_self(),
+ c == s->l2_table_cache, i);
+ c->entries[i].offset = 0;
+ if (read_from_disk) {
+ if (c == s->l2_table_cache) {
+ BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD);
+ }
+
+ ret = bdrv_pread(bs->file, offset, c->entries[i].table, s->cluster_size);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ /* Give the table some hits for the start so that it won't be replaced
+ * immediately. The number 32 is completely arbitrary. */
+ c->entries[i].cache_hits = 32;
+ c->entries[i].offset = offset;
+
+ /* And return the right table */
+found:
+ c->entries[i].cache_hits++;
+ c->entries[i].ref++;
+ *table = c->entries[i].table;
+
+ trace_qcow2_cache_get_done(qemu_coroutine_self(),
+ c == s->l2_table_cache, i);
+
+ return 0;
+}
+
+int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
+ void **table)
+{
+ return qcow2_cache_do_get(bs, c, offset, table, true);
+}
+
+int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
+ void **table)
+{
+ return qcow2_cache_do_get(bs, c, offset, table, false);
+}
+
+int qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table)
+{
+ int i;
+
+ for (i = 0; i < c->size; i++) {
+ if (c->entries[i].table == *table) {
+ goto found;
+ }
+ }
+ return -ENOENT;
+
+found:
+ c->entries[i].ref--;
+ *table = NULL;
+
+ assert(c->entries[i].ref >= 0);
+ return 0;
+}
+
+void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table)
+{
+ int i;
+
+ for (i = 0; i < c->size; i++) {
+ if (c->entries[i].table == table) {
+ goto found;
+ }
+ }
+ abort();
+
+found:
+ c->entries[i].dirty = true;
+}
diff --git a/contrib/qemu/block/qcow2-cluster.c b/contrib/qemu/block/qcow2-cluster.c
new file mode 100644
index 000000000..cca76d4fc
--- /dev/null
+++ b/contrib/qemu/block/qcow2-cluster.c
@@ -0,0 +1,1478 @@
+/*
+ * Block driver for the QCOW version 2 format
+ *
+ * Copyright (c) 2004-2006 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <zlib.h>
+
+#include "qemu-common.h"
+#include "block/block_int.h"
+#include "block/qcow2.h"
+#include "trace.h"
+
+int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
+ bool exact_size)
+{
+ BDRVQcowState *s = bs->opaque;
+ int new_l1_size2, ret, i;
+ uint64_t *new_l1_table;
+ int64_t new_l1_table_offset, new_l1_size;
+ uint8_t data[12];
+
+ if (min_size <= s->l1_size)
+ return 0;
+
+ if (exact_size) {
+ new_l1_size = min_size;
+ } else {
+ /* Bump size up to reduce the number of times we have to grow */
+ new_l1_size = s->l1_size;
+ if (new_l1_size == 0) {
+ new_l1_size = 1;
+ }
+ while (min_size > new_l1_size) {
+ new_l1_size = (new_l1_size * 3 + 1) / 2;
+ }
+ }
+
+ if (new_l1_size > INT_MAX) {
+ return -EFBIG;
+ }
+
+#ifdef DEBUG_ALLOC2
+ fprintf(stderr, "grow l1_table from %d to %" PRId64 "\n",
+ s->l1_size, new_l1_size);
+#endif
+
+ new_l1_size2 = sizeof(uint64_t) * new_l1_size;
+ new_l1_table = g_malloc0(align_offset(new_l1_size2, 512));
+ memcpy(new_l1_table, s->l1_table, s->l1_size * sizeof(uint64_t));
+
+ /* write new table (align to cluster) */
+ BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ALLOC_TABLE);
+ new_l1_table_offset = qcow2_alloc_clusters(bs, new_l1_size2);
+ if (new_l1_table_offset < 0) {
+ g_free(new_l1_table);
+ return new_l1_table_offset;
+ }
+
+ ret = qcow2_cache_flush(bs, s->refcount_block_cache);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE);
+ for(i = 0; i < s->l1_size; i++)
+ new_l1_table[i] = cpu_to_be64(new_l1_table[i]);
+ ret = bdrv_pwrite_sync(bs->file, new_l1_table_offset, new_l1_table, new_l1_size2);
+ if (ret < 0)
+ goto fail;
+ for(i = 0; i < s->l1_size; i++)
+ new_l1_table[i] = be64_to_cpu(new_l1_table[i]);
+
+ /* set new table */
+ BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ACTIVATE_TABLE);
+ cpu_to_be32w((uint32_t*)data, new_l1_size);
+ cpu_to_be64wu((uint64_t*)(data + 4), new_l1_table_offset);
+ ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_size), data,sizeof(data));
+ if (ret < 0) {
+ goto fail;
+ }
+ g_free(s->l1_table);
+ qcow2_free_clusters(bs, s->l1_table_offset, s->l1_size * sizeof(uint64_t),
+ QCOW2_DISCARD_OTHER);
+ s->l1_table_offset = new_l1_table_offset;
+ s->l1_table = new_l1_table;
+ s->l1_size = new_l1_size;
+ return 0;
+ fail:
+ g_free(new_l1_table);
+ qcow2_free_clusters(bs, new_l1_table_offset, new_l1_size2,
+ QCOW2_DISCARD_OTHER);
+ return ret;
+}
+
+/*
+ * l2_load
+ *
+ * Loads a L2 table into memory. If the table is in the cache, the cache
+ * is used; otherwise the L2 table is loaded from the image file.
+ *
+ * Returns a pointer to the L2 table on success, or NULL if the read from
+ * the image file failed.
+ */
+
+static int l2_load(BlockDriverState *bs, uint64_t l2_offset,
+ uint64_t **l2_table)
+{
+ BDRVQcowState *s = bs->opaque;
+ int ret;
+
+ ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset, (void**) l2_table);
+
+ return ret;
+}
+
+/*
+ * Writes one sector of the L1 table to the disk (can't update single entries
+ * and we really don't want bdrv_pread to perform a read-modify-write)
+ */
+#define L1_ENTRIES_PER_SECTOR (512 / 8)
+static int write_l1_entry(BlockDriverState *bs, int l1_index)
+{
+ BDRVQcowState *s = bs->opaque;
+ uint64_t buf[L1_ENTRIES_PER_SECTOR];
+ int l1_start_index;
+ int i, ret;
+
+ l1_start_index = l1_index & ~(L1_ENTRIES_PER_SECTOR - 1);
+ for (i = 0; i < L1_ENTRIES_PER_SECTOR; i++) {
+ buf[i] = cpu_to_be64(s->l1_table[l1_start_index + i]);
+ }
+
+ BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
+ ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset + 8 * l1_start_index,
+ buf, sizeof(buf));
+ if (ret < 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+/*
+ * l2_allocate
+ *
+ * Allocate a new l2 entry in the file. If l1_index points to an already
+ * used entry in the L2 table (i.e. we are doing a copy on write for the L2
+ * table) copy the contents of the old L2 table into the newly allocated one.
+ * Otherwise the new table is initialized with zeros.
+ *
+ */
+
+static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
+{
+ BDRVQcowState *s = bs->opaque;
+ uint64_t old_l2_offset;
+ uint64_t *l2_table;
+ int64_t l2_offset;
+ int ret;
+
+ old_l2_offset = s->l1_table[l1_index];
+
+ trace_qcow2_l2_allocate(bs, l1_index);
+
+ /* allocate a new l2 entry */
+
+ l2_offset = qcow2_alloc_clusters(bs, s->l2_size * sizeof(uint64_t));
+ if (l2_offset < 0) {
+ return l2_offset;
+ }
+
+ ret = qcow2_cache_flush(bs, s->refcount_block_cache);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ /* allocate a new entry in the l2 cache */
+
+ trace_qcow2_l2_allocate_get_empty(bs, l1_index);
+ ret = qcow2_cache_get_empty(bs, s->l2_table_cache, l2_offset, (void**) table);
+ if (ret < 0) {
+ return ret;
+ }
+
+ l2_table = *table;
+
+ if ((old_l2_offset & L1E_OFFSET_MASK) == 0) {
+ /* if there was no old l2 table, clear the new table */
+ memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
+ } else {
+ uint64_t* old_table;
+
+ /* if there was an old l2 table, read it from the disk */
+ BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_COW_READ);
+ ret = qcow2_cache_get(bs, s->l2_table_cache,
+ old_l2_offset & L1E_OFFSET_MASK,
+ (void**) &old_table);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ memcpy(l2_table, old_table, s->cluster_size);
+
+ ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &old_table);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+
+ /* write the l2 table to the file */
+ BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE);
+
+ trace_qcow2_l2_allocate_write_l2(bs, l1_index);
+ qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+ ret = qcow2_cache_flush(bs, s->l2_table_cache);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ /* update the L1 entry */
+ trace_qcow2_l2_allocate_write_l1(bs, l1_index);
+ s->l1_table[l1_index] = l2_offset | QCOW_OFLAG_COPIED;
+ ret = write_l1_entry(bs, l1_index);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ *table = l2_table;
+ trace_qcow2_l2_allocate_done(bs, l1_index, 0);
+ return 0;
+
+fail:
+ trace_qcow2_l2_allocate_done(bs, l1_index, ret);
+ qcow2_cache_put(bs, s->l2_table_cache, (void**) table);
+ s->l1_table[l1_index] = old_l2_offset;
+ return ret;
+}
+
+/*
+ * Checks how many clusters in a given L2 table are contiguous in the image
+ * file. As soon as one of the flags in the bitmask stop_flags changes compared
+ * to the first cluster, the search is stopped and the cluster is not counted
+ * as contiguous. (This allows it, for example, to stop at the first compressed
+ * cluster which may require a different handling)
+ */
+static int count_contiguous_clusters(uint64_t nb_clusters, int cluster_size,
+ uint64_t *l2_table, uint64_t start, uint64_t stop_flags)
+{
+ int i;
+ uint64_t mask = stop_flags | L2E_OFFSET_MASK;
+ uint64_t offset = be64_to_cpu(l2_table[0]) & mask;
+
+ if (!offset)
+ return 0;
+
+ for (i = start; i < start + nb_clusters; i++) {
+ uint64_t l2_entry = be64_to_cpu(l2_table[i]) & mask;
+ if (offset + (uint64_t) i * cluster_size != l2_entry) {
+ break;
+ }
+ }
+
+ return (i - start);
+}
+
+static int count_contiguous_free_clusters(uint64_t nb_clusters, uint64_t *l2_table)
+{
+ int i;
+
+ for (i = 0; i < nb_clusters; i++) {
+ int type = qcow2_get_cluster_type(be64_to_cpu(l2_table[i]));
+
+ if (type != QCOW2_CLUSTER_UNALLOCATED) {
+ break;
+ }
+ }
+
+ return i;
+}
+
+/* The crypt function is compatible with the linux cryptoloop
+ algorithm for < 4 GB images. NOTE: out_buf == in_buf is
+ supported */
+void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
+ uint8_t *out_buf, const uint8_t *in_buf,
+ int nb_sectors, int enc,
+ const AES_KEY *key)
+{
+ union {
+ uint64_t ll[2];
+ uint8_t b[16];
+ } ivec;
+ int i;
+
+ for(i = 0; i < nb_sectors; i++) {
+ ivec.ll[0] = cpu_to_le64(sector_num);
+ ivec.ll[1] = 0;
+ AES_cbc_encrypt(in_buf, out_buf, 512, key,
+ ivec.b, enc);
+ sector_num++;
+ in_buf += 512;
+ out_buf += 512;
+ }
+}
+
+static int coroutine_fn copy_sectors(BlockDriverState *bs,
+ uint64_t start_sect,
+ uint64_t cluster_offset,
+ int n_start, int n_end)
+{
+ BDRVQcowState *s = bs->opaque;
+ QEMUIOVector qiov;
+ struct iovec iov;
+ int n, ret;
+
+ /*
+ * If this is the last cluster and it is only partially used, we must only
+ * copy until the end of the image, or bdrv_check_request will fail for the
+ * bdrv_read/write calls below.
+ */
+ if (start_sect + n_end > bs->total_sectors) {
+ n_end = bs->total_sectors - start_sect;
+ }
+
+ n = n_end - n_start;
+ if (n <= 0) {
+ return 0;
+ }
+
+ iov.iov_len = n * BDRV_SECTOR_SIZE;
+ iov.iov_base = qemu_blockalign(bs, iov.iov_len);
+
+ qemu_iovec_init_external(&qiov, &iov, 1);
+
+ BLKDBG_EVENT(bs->file, BLKDBG_COW_READ);
+
+ /* Call .bdrv_co_readv() directly instead of using the public block-layer
+ * interface. This avoids double I/O throttling and request tracking,
+ * which can lead to deadlock when block layer copy-on-read is enabled.
+ */
+ ret = bs->drv->bdrv_co_readv(bs, start_sect + n_start, n, &qiov);
+ if (ret < 0) {
+ goto out;
+ }
+
+ if (s->crypt_method) {
+ qcow2_encrypt_sectors(s, start_sect + n_start,
+ iov.iov_base, iov.iov_base, n, 1,
+ &s->aes_encrypt_key);
+ }
+
+ BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE);
+ ret = bdrv_co_writev(bs->file, (cluster_offset >> 9) + n_start, n, &qiov);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = 0;
+out:
+ qemu_vfree(iov.iov_base);
+ return ret;
+}
+
+
+/*
+ * get_cluster_offset
+ *
+ * For a given offset of the disk image, find the cluster offset in
+ * qcow2 file. The offset is stored in *cluster_offset.
+ *
+ * on entry, *num is the number of contiguous sectors we'd like to
+ * access following offset.
+ *
+ * on exit, *num is the number of contiguous sectors we can read.
+ *
+ * Returns the cluster type (QCOW2_CLUSTER_*) on success, -errno in error
+ * cases.
+ */
+int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
+ int *num, uint64_t *cluster_offset)
+{
+ BDRVQcowState *s = bs->opaque;
+ unsigned int l2_index;
+ uint64_t l1_index, l2_offset, *l2_table;
+ int l1_bits, c;
+ unsigned int index_in_cluster, nb_clusters;
+ uint64_t nb_available, nb_needed;
+ int ret;
+
+ index_in_cluster = (offset >> 9) & (s->cluster_sectors - 1);
+ nb_needed = *num + index_in_cluster;
+
+ l1_bits = s->l2_bits + s->cluster_bits;
+
+ /* compute how many bytes there are between the offset and
+ * the end of the l1 entry
+ */
+
+ nb_available = (1ULL << l1_bits) - (offset & ((1ULL << l1_bits) - 1));
+
+ /* compute the number of available sectors */
+
+ nb_available = (nb_available >> 9) + index_in_cluster;
+
+ if (nb_needed > nb_available) {
+ nb_needed = nb_available;
+ }
+
+ *cluster_offset = 0;
+
+ /* seek the the l2 offset in the l1 table */
+
+ l1_index = offset >> l1_bits;
+ if (l1_index >= s->l1_size) {
+ ret = QCOW2_CLUSTER_UNALLOCATED;
+ goto out;
+ }
+
+ l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK;
+ if (!l2_offset) {
+ ret = QCOW2_CLUSTER_UNALLOCATED;
+ goto out;
+ }
+
+ /* load the l2 table in memory */
+
+ ret = l2_load(bs, l2_offset, &l2_table);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* find the cluster offset for the given disk offset */
+
+ l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
+ *cluster_offset = be64_to_cpu(l2_table[l2_index]);
+ nb_clusters = size_to_clusters(s, nb_needed << 9);
+
+ ret = qcow2_get_cluster_type(*cluster_offset);
+ switch (ret) {
+ case QCOW2_CLUSTER_COMPRESSED:
+ /* Compressed clusters can only be processed one by one */
+ c = 1;
+ *cluster_offset &= L2E_COMPRESSED_OFFSET_SIZE_MASK;
+ break;
+ case QCOW2_CLUSTER_ZERO:
+ if (s->qcow_version < 3) {
+ return -EIO;
+ }
+ c = count_contiguous_clusters(nb_clusters, s->cluster_size,
+ &l2_table[l2_index], 0,
+ QCOW_OFLAG_COMPRESSED | QCOW_OFLAG_ZERO);
+ *cluster_offset = 0;
+ break;
+ case QCOW2_CLUSTER_UNALLOCATED:
+ /* how many empty clusters ? */
+ c = count_contiguous_free_clusters(nb_clusters, &l2_table[l2_index]);
+ *cluster_offset = 0;
+ break;
+ case QCOW2_CLUSTER_NORMAL:
+ /* how many allocated clusters ? */
+ c = count_contiguous_clusters(nb_clusters, s->cluster_size,
+ &l2_table[l2_index], 0,
+ QCOW_OFLAG_COMPRESSED | QCOW_OFLAG_ZERO);
+ *cluster_offset &= L2E_OFFSET_MASK;
+ break;
+ default:
+ abort();
+ }
+
+ qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+
+ nb_available = (c * s->cluster_sectors);
+
+out:
+ if (nb_available > nb_needed)
+ nb_available = nb_needed;
+
+ *num = nb_available - index_in_cluster;
+
+ return ret;
+}
+
+/*
+ * get_cluster_table
+ *
+ * for a given disk offset, load (and allocate if needed)
+ * the l2 table.
+ *
+ * the l2 table offset in the qcow2 file and the cluster index
+ * in the l2 table are given to the caller.
+ *
+ * Returns 0 on success, -errno in failure case
+ */
+static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
+ uint64_t **new_l2_table,
+ int *new_l2_index)
+{
+ BDRVQcowState *s = bs->opaque;
+ unsigned int l2_index;
+ uint64_t l1_index, l2_offset;
+ uint64_t *l2_table = NULL;
+ int ret;
+
+ /* seek the the l2 offset in the l1 table */
+
+ l1_index = offset >> (s->l2_bits + s->cluster_bits);
+ if (l1_index >= s->l1_size) {
+ ret = qcow2_grow_l1_table(bs, l1_index + 1, false);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ assert(l1_index < s->l1_size);
+ l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK;
+
+ /* seek the l2 table of the given l2 offset */
+
+ if (s->l1_table[l1_index] & QCOW_OFLAG_COPIED) {
+ /* load the l2 table in memory */
+ ret = l2_load(bs, l2_offset, &l2_table);
+ if (ret < 0) {
+ return ret;
+ }
+ } else {
+ /* First allocate a new L2 table (and do COW if needed) */
+ ret = l2_allocate(bs, l1_index, &l2_table);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* Then decrease the refcount of the old table */
+ if (l2_offset) {
+ qcow2_free_clusters(bs, l2_offset, s->l2_size * sizeof(uint64_t),
+ QCOW2_DISCARD_OTHER);
+ }
+ }
+
+ /* find the cluster offset for the given disk offset */
+
+ l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
+
+ *new_l2_table = l2_table;
+ *new_l2_index = l2_index;
+
+ return 0;
+}
+
+/*
+ * alloc_compressed_cluster_offset
+ *
+ * For a given offset of the disk image, return cluster offset in
+ * qcow2 file.
+ *
+ * If the offset is not found, allocate a new compressed cluster.
+ *
+ * Return the cluster offset if successful,
+ * Return 0, otherwise.
+ *
+ */
+
+uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
+ uint64_t offset,
+ int compressed_size)
+{
+ BDRVQcowState *s = bs->opaque;
+ int l2_index, ret;
+ uint64_t *l2_table;
+ int64_t cluster_offset;
+ int nb_csectors;
+
+ ret = get_cluster_table(bs, offset, &l2_table, &l2_index);
+ if (ret < 0) {
+ return 0;
+ }
+
+ /* Compression can't overwrite anything. Fail if the cluster was already
+ * allocated. */
+ cluster_offset = be64_to_cpu(l2_table[l2_index]);
+ if (cluster_offset & L2E_OFFSET_MASK) {
+ qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+ return 0;
+ }
+
+ cluster_offset = qcow2_alloc_bytes(bs, compressed_size);
+ if (cluster_offset < 0) {
+ qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+ return 0;
+ }
+
+ nb_csectors = ((cluster_offset + compressed_size - 1) >> 9) -
+ (cluster_offset >> 9);
+
+ cluster_offset |= QCOW_OFLAG_COMPRESSED |
+ ((uint64_t)nb_csectors << s->csize_shift);
+
+ /* update L2 table */
+
+ /* compressed clusters never have the copied flag */
+
+ BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED);
+ qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+ l2_table[l2_index] = cpu_to_be64(cluster_offset);
+ ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+ if (ret < 0) {
+ return 0;
+ }
+
+ return cluster_offset;
+}
+
+static int perform_cow(BlockDriverState *bs, QCowL2Meta *m, Qcow2COWRegion *r)
+{
+ BDRVQcowState *s = bs->opaque;
+ int ret;
+
+ if (r->nb_sectors == 0) {
+ return 0;
+ }
+
+ qemu_co_mutex_unlock(&s->lock);
+ ret = copy_sectors(bs, m->offset / BDRV_SECTOR_SIZE, m->alloc_offset,
+ r->offset / BDRV_SECTOR_SIZE,
+ r->offset / BDRV_SECTOR_SIZE + r->nb_sectors);
+ qemu_co_mutex_lock(&s->lock);
+
+ if (ret < 0) {
+ return ret;
+ }
+
+ /*
+ * Before we update the L2 table to actually point to the new cluster, we
+ * need to be sure that the refcounts have been increased and COW was
+ * handled.
+ */
+ qcow2_cache_depends_on_flush(s->l2_table_cache);
+
+ return 0;
+}
+
+int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
+{
+ BDRVQcowState *s = bs->opaque;
+ int i, j = 0, l2_index, ret;
+ uint64_t *old_cluster, *l2_table;
+ uint64_t cluster_offset = m->alloc_offset;
+
+ trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters);
+ assert(m->nb_clusters > 0);
+
+ old_cluster = g_malloc(m->nb_clusters * sizeof(uint64_t));
+
+ /* copy content of unmodified sectors */
+ ret = perform_cow(bs, m, &m->cow_start);
+ if (ret < 0) {
+ goto err;
+ }
+
+ ret = perform_cow(bs, m, &m->cow_end);
+ if (ret < 0) {
+ goto err;
+ }
+
+ /* Update L2 table. */
+ if (s->use_lazy_refcounts) {
+ qcow2_mark_dirty(bs);
+ }
+ if (qcow2_need_accurate_refcounts(s)) {
+ qcow2_cache_set_dependency(bs, s->l2_table_cache,
+ s->refcount_block_cache);
+ }
+
+ ret = get_cluster_table(bs, m->offset, &l2_table, &l2_index);
+ if (ret < 0) {
+ goto err;
+ }
+ qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+
+ for (i = 0; i < m->nb_clusters; i++) {
+ /* if two concurrent writes happen to the same unallocated cluster
+ * each write allocates separate cluster and writes data concurrently.
+ * The first one to complete updates l2 table with pointer to its
+ * cluster the second one has to do RMW (which is done above by
+ * copy_sectors()), update l2 table with its cluster pointer and free
+ * old cluster. This is what this loop does */
+ if(l2_table[l2_index + i] != 0)
+ old_cluster[j++] = l2_table[l2_index + i];
+
+ l2_table[l2_index + i] = cpu_to_be64((cluster_offset +
+ (i << s->cluster_bits)) | QCOW_OFLAG_COPIED);
+ }
+
+
+ ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+ if (ret < 0) {
+ goto err;
+ }
+
+ /*
+ * If this was a COW, we need to decrease the refcount of the old cluster.
+ * Also flush bs->file to get the right order for L2 and refcount update.
+ *
+ * Don't discard clusters that reach a refcount of 0 (e.g. compressed
+ * clusters), the next write will reuse them anyway.
+ */
+ if (j != 0) {
+ for (i = 0; i < j; i++) {
+ qcow2_free_any_clusters(bs, be64_to_cpu(old_cluster[i]), 1,
+ QCOW2_DISCARD_NEVER);
+ }
+ }
+
+ ret = 0;
+err:
+ g_free(old_cluster);
+ return ret;
+ }
+
+/*
+ * Returns the number of contiguous clusters that can be used for an allocating
+ * write, but require COW to be performed (this includes yet unallocated space,
+ * which must copy from the backing file)
+ */
+static int count_cow_clusters(BDRVQcowState *s, int nb_clusters,
+ uint64_t *l2_table, int l2_index)
+{
+ int i;
+
+ for (i = 0; i < nb_clusters; i++) {
+ uint64_t l2_entry = be64_to_cpu(l2_table[l2_index + i]);
+ int cluster_type = qcow2_get_cluster_type(l2_entry);
+
+ switch(cluster_type) {
+ case QCOW2_CLUSTER_NORMAL:
+ if (l2_entry & QCOW_OFLAG_COPIED) {
+ goto out;
+ }
+ break;
+ case QCOW2_CLUSTER_UNALLOCATED:
+ case QCOW2_CLUSTER_COMPRESSED:
+ case QCOW2_CLUSTER_ZERO:
+ break;
+ default:
+ abort();
+ }
+ }
+
+out:
+ assert(i <= nb_clusters);
+ return i;
+}
+
+/*
+ * Check if there already is an AIO write request in flight which allocates
+ * the same cluster. In this case we need to wait until the previous
+ * request has completed and updated the L2 table accordingly.
+ *
+ * Returns:
+ * 0 if there was no dependency. *cur_bytes indicates the number of
+ * bytes from guest_offset that can be read before the next
+ * dependency must be processed (or the request is complete)
+ *
+ * -EAGAIN if we had to wait for another request, previously gathered
+ * information on cluster allocation may be invalid now. The caller
+ * must start over anyway, so consider *cur_bytes undefined.
+ */
+static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset,
+ uint64_t *cur_bytes, QCowL2Meta **m)
+{
+ BDRVQcowState *s = bs->opaque;
+ QCowL2Meta *old_alloc;
+ uint64_t bytes = *cur_bytes;
+
+ QLIST_FOREACH(old_alloc, &s->cluster_allocs, next_in_flight) {
+
+ uint64_t start = guest_offset;
+ uint64_t end = start + bytes;
+ uint64_t old_start = l2meta_cow_start(old_alloc);
+ uint64_t old_end = l2meta_cow_end(old_alloc);
+
+ if (end <= old_start || start >= old_end) {
+ /* No intersection */
+ } else {
+ if (start < old_start) {
+ /* Stop at the start of a running allocation */
+ bytes = old_start - start;
+ } else {
+ bytes = 0;
+ }
+
+ /* Stop if already an l2meta exists. After yielding, it wouldn't
+ * be valid any more, so we'd have to clean up the old L2Metas
+ * and deal with requests depending on them before starting to
+ * gather new ones. Not worth the trouble. */
+ if (bytes == 0 && *m) {
+ *cur_bytes = 0;
+ return 0;
+ }
+
+ if (bytes == 0) {
+ /* Wait for the dependency to complete. We need to recheck
+ * the free/allocated clusters when we continue. */
+ qemu_co_mutex_unlock(&s->lock);
+ qemu_co_queue_wait(&old_alloc->dependent_requests);
+ qemu_co_mutex_lock(&s->lock);
+ return -EAGAIN;
+ }
+ }
+ }
+
+ /* Make sure that existing clusters and new allocations are only used up to
+ * the next dependency if we shortened the request above */
+ *cur_bytes = bytes;
+
+ return 0;
+}
+
+/*
+ * Checks how many already allocated clusters that don't require a copy on
+ * write there are at the given guest_offset (up to *bytes). If
+ * *host_offset is not zero, only physically contiguous clusters beginning at
+ * this host offset are counted.
+ *
+ * Note that guest_offset may not be cluster aligned. In this case, the
+ * returned *host_offset points to exact byte referenced by guest_offset and
+ * therefore isn't cluster aligned as well.
+ *
+ * Returns:
+ * 0: if no allocated clusters are available at the given offset.
+ * *bytes is normally unchanged. It is set to 0 if the cluster
+ * is allocated and doesn't need COW, but doesn't have the right
+ * physical offset.
+ *
+ * 1: if allocated clusters that don't require a COW are available at
+ * the requested offset. *bytes may have decreased and describes
+ * the length of the area that can be written to.
+ *
+ * -errno: in error cases
+ */
+static int handle_copied(BlockDriverState *bs, uint64_t guest_offset,
+ uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m)
+{
+ BDRVQcowState *s = bs->opaque;
+ int l2_index;
+ uint64_t cluster_offset;
+ uint64_t *l2_table;
+ unsigned int nb_clusters;
+ unsigned int keep_clusters;
+ int ret, pret;
+
+ trace_qcow2_handle_copied(qemu_coroutine_self(), guest_offset, *host_offset,
+ *bytes);
+
+ assert(*host_offset == 0 || offset_into_cluster(s, guest_offset)
+ == offset_into_cluster(s, *host_offset));
+
+ /*
+ * Calculate the number of clusters to look for. We stop at L2 table
+ * boundaries to keep things simple.
+ */
+ nb_clusters =
+ size_to_clusters(s, offset_into_cluster(s, guest_offset) + *bytes);
+
+ l2_index = offset_to_l2_index(s, guest_offset);
+ nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
+
+ /* Find L2 entry for the first involved cluster */
+ ret = get_cluster_table(bs, guest_offset, &l2_table, &l2_index);
+ if (ret < 0) {
+ return ret;
+ }
+
+ cluster_offset = be64_to_cpu(l2_table[l2_index]);
+
+ /* Check how many clusters are already allocated and don't need COW */
+ if (qcow2_get_cluster_type(cluster_offset) == QCOW2_CLUSTER_NORMAL
+ && (cluster_offset & QCOW_OFLAG_COPIED))
+ {
+ /* If a specific host_offset is required, check it */
+ bool offset_matches =
+ (cluster_offset & L2E_OFFSET_MASK) == *host_offset;
+
+ if (*host_offset != 0 && !offset_matches) {
+ *bytes = 0;
+ ret = 0;
+ goto out;
+ }
+
+ /* We keep all QCOW_OFLAG_COPIED clusters */
+ keep_clusters =
+ count_contiguous_clusters(nb_clusters, s->cluster_size,
+ &l2_table[l2_index], 0,
+ QCOW_OFLAG_COPIED | QCOW_OFLAG_ZERO);
+ assert(keep_clusters <= nb_clusters);
+
+ *bytes = MIN(*bytes,
+ keep_clusters * s->cluster_size
+ - offset_into_cluster(s, guest_offset));
+
+ ret = 1;
+ } else {
+ ret = 0;
+ }
+
+ /* Cleanup */
+out:
+ pret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+ if (pret < 0) {
+ return pret;
+ }
+
+ /* Only return a host offset if we actually made progress. Otherwise we
+ * would make requirements for handle_alloc() that it can't fulfill */
+ if (ret) {
+ *host_offset = (cluster_offset & L2E_OFFSET_MASK)
+ + offset_into_cluster(s, guest_offset);
+ }
+
+ return ret;
+}
+
+/*
+ * Allocates new clusters for the given guest_offset.
+ *
+ * At most *nb_clusters are allocated, and on return *nb_clusters is updated to
+ * contain the number of clusters that have been allocated and are contiguous
+ * in the image file.
+ *
+ * If *host_offset is non-zero, it specifies the offset in the image file at
+ * which the new clusters must start. *nb_clusters can be 0 on return in this
+ * case if the cluster at host_offset is already in use. If *host_offset is
+ * zero, the clusters can be allocated anywhere in the image file.
+ *
+ * *host_offset is updated to contain the offset into the image file at which
+ * the first allocated cluster starts.
+ *
+ * Return 0 on success and -errno in error cases. -EAGAIN means that the
+ * function has been waiting for another request and the allocation must be
+ * restarted, but the whole request should not be failed.
+ */
+static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset,
+ uint64_t *host_offset, unsigned int *nb_clusters)
+{
+ BDRVQcowState *s = bs->opaque;
+
+ trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset,
+ *host_offset, *nb_clusters);
+
+ /* Allocate new clusters */
+ trace_qcow2_cluster_alloc_phys(qemu_coroutine_self());
+ if (*host_offset == 0) {
+ int64_t cluster_offset =
+ qcow2_alloc_clusters(bs, *nb_clusters * s->cluster_size);
+ if (cluster_offset < 0) {
+ return cluster_offset;
+ }
+ *host_offset = cluster_offset;
+ return 0;
+ } else {
+ int ret = qcow2_alloc_clusters_at(bs, *host_offset, *nb_clusters);
+ if (ret < 0) {
+ return ret;
+ }
+ *nb_clusters = ret;
+ return 0;
+ }
+}
+
+/*
+ * Allocates new clusters for an area that either is yet unallocated or needs a
+ * copy on write. If *host_offset is non-zero, clusters are only allocated if
+ * the new allocation can match the specified host offset.
+ *
+ * Note that guest_offset may not be cluster aligned. In this case, the
+ * returned *host_offset points to exact byte referenced by guest_offset and
+ * therefore isn't cluster aligned as well.
+ *
+ * Returns:
+ * 0: if no clusters could be allocated. *bytes is set to 0,
+ * *host_offset is left unchanged.
+ *
+ * 1: if new clusters were allocated. *bytes may be decreased if the
+ * new allocation doesn't cover all of the requested area.
+ * *host_offset is updated to contain the host offset of the first
+ * newly allocated cluster.
+ *
+ * -errno: in error cases
+ */
+static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
+ uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m)
+{
+ BDRVQcowState *s = bs->opaque;
+ int l2_index;
+ uint64_t *l2_table;
+ uint64_t entry;
+ unsigned int nb_clusters;
+ int ret;
+
+ uint64_t alloc_cluster_offset;
+
+ trace_qcow2_handle_alloc(qemu_coroutine_self(), guest_offset, *host_offset,
+ *bytes);
+ assert(*bytes > 0);
+
+ /*
+ * Calculate the number of clusters to look for. We stop at L2 table
+ * boundaries to keep things simple.
+ */
+ nb_clusters =
+ size_to_clusters(s, offset_into_cluster(s, guest_offset) + *bytes);
+
+ l2_index = offset_to_l2_index(s, guest_offset);
+ nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
+
+ /* Find L2 entry for the first involved cluster */
+ ret = get_cluster_table(bs, guest_offset, &l2_table, &l2_index);
+ if (ret < 0) {
+ return ret;
+ }
+
+ entry = be64_to_cpu(l2_table[l2_index]);
+
+ /* For the moment, overwrite compressed clusters one by one */
+ if (entry & QCOW_OFLAG_COMPRESSED) {
+ nb_clusters = 1;
+ } else {
+ nb_clusters = count_cow_clusters(s, nb_clusters, l2_table, l2_index);
+ }
+
+ /* This function is only called when there were no non-COW clusters, so if
+ * we can't find any unallocated or COW clusters either, something is
+ * wrong with our code. */
+ assert(nb_clusters > 0);
+
+ ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* Allocate, if necessary at a given offset in the image file */
+ alloc_cluster_offset = start_of_cluster(s, *host_offset);
+ ret = do_alloc_cluster_offset(bs, guest_offset, &alloc_cluster_offset,
+ &nb_clusters);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ /* Can't extend contiguous allocation */
+ if (nb_clusters == 0) {
+ *bytes = 0;
+ return 0;
+ }
+
+ /*
+ * Save info needed for meta data update.
+ *
+ * requested_sectors: Number of sectors from the start of the first
+ * newly allocated cluster to the end of the (possibly shortened
+ * before) write request.
+ *
+ * avail_sectors: Number of sectors from the start of the first
+ * newly allocated to the end of the last newly allocated cluster.
+ *
+ * nb_sectors: The number of sectors from the start of the first
+ * newly allocated cluster to the end of the area that the write
+ * request actually writes to (excluding COW at the end)
+ */
+ int requested_sectors =
+ (*bytes + offset_into_cluster(s, guest_offset))
+ >> BDRV_SECTOR_BITS;
+ int avail_sectors = nb_clusters
+ << (s->cluster_bits - BDRV_SECTOR_BITS);
+ int alloc_n_start = offset_into_cluster(s, guest_offset)
+ >> BDRV_SECTOR_BITS;
+ int nb_sectors = MIN(requested_sectors, avail_sectors);
+ QCowL2Meta *old_m = *m;
+
+ *m = g_malloc0(sizeof(**m));
+
+ **m = (QCowL2Meta) {
+ .next = old_m,
+
+ .alloc_offset = alloc_cluster_offset,
+ .offset = start_of_cluster(s, guest_offset),
+ .nb_clusters = nb_clusters,
+ .nb_available = nb_sectors,
+
+ .cow_start = {
+ .offset = 0,
+ .nb_sectors = alloc_n_start,
+ },
+ .cow_end = {
+ .offset = nb_sectors * BDRV_SECTOR_SIZE,
+ .nb_sectors = avail_sectors - nb_sectors,
+ },
+ };
+ qemu_co_queue_init(&(*m)->dependent_requests);
+ QLIST_INSERT_HEAD(&s->cluster_allocs, *m, next_in_flight);
+
+ *host_offset = alloc_cluster_offset + offset_into_cluster(s, guest_offset);
+ *bytes = MIN(*bytes, (nb_sectors * BDRV_SECTOR_SIZE)
+ - offset_into_cluster(s, guest_offset));
+ assert(*bytes != 0);
+
+ return 1;
+
+fail:
+ if (*m && (*m)->nb_clusters > 0) {
+ QLIST_REMOVE(*m, next_in_flight);
+ }
+ return ret;
+}
+
+/*
+ * alloc_cluster_offset
+ *
+ * For a given offset on the virtual disk, find the cluster offset in qcow2
+ * file. If the offset is not found, allocate a new cluster.
+ *
+ * If the cluster was already allocated, m->nb_clusters is set to 0 and
+ * other fields in m are meaningless.
+ *
+ * If the cluster is newly allocated, m->nb_clusters is set to the number of
+ * contiguous clusters that have been allocated. In this case, the other
+ * fields of m are valid and contain information about the first allocated
+ * cluster.
+ *
+ * If the request conflicts with another write request in flight, the coroutine
+ * is queued and will be reentered when the dependency has completed.
+ *
+ * Return 0 on success and -errno in error cases
+ */
+int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
+ int n_start, int n_end, int *num, uint64_t *host_offset, QCowL2Meta **m)
+{
+ BDRVQcowState *s = bs->opaque;
+ uint64_t start, remaining;
+ uint64_t cluster_offset;
+ uint64_t cur_bytes;
+ int ret;
+
+ trace_qcow2_alloc_clusters_offset(qemu_coroutine_self(), offset,
+ n_start, n_end);
+
+ assert(n_start * BDRV_SECTOR_SIZE == offset_into_cluster(s, offset));
+ offset = start_of_cluster(s, offset);
+
+again:
+ start = offset + (n_start << BDRV_SECTOR_BITS);
+ remaining = (n_end - n_start) << BDRV_SECTOR_BITS;
+ cluster_offset = 0;
+ *host_offset = 0;
+ cur_bytes = 0;
+ *m = NULL;
+
+ while (true) {
+
+ if (!*host_offset) {
+ *host_offset = start_of_cluster(s, cluster_offset);
+ }
+
+ assert(remaining >= cur_bytes);
+
+ start += cur_bytes;
+ remaining -= cur_bytes;
+ cluster_offset += cur_bytes;
+
+ if (remaining == 0) {
+ break;
+ }
+
+ cur_bytes = remaining;
+
+ /*
+ * Now start gathering as many contiguous clusters as possible:
+ *
+ * 1. Check for overlaps with in-flight allocations
+ *
+ * a) Overlap not in the first cluster -> shorten this request and
+ * let the caller handle the rest in its next loop iteration.
+ *
+ * b) Real overlaps of two requests. Yield and restart the search
+ * for contiguous clusters (the situation could have changed
+ * while we were sleeping)
+ *
+ * c) TODO: Request starts in the same cluster as the in-flight
+ * allocation ends. Shorten the COW of the in-fight allocation,
+ * set cluster_offset to write to the same cluster and set up
+ * the right synchronisation between the in-flight request and
+ * the new one.
+ */
+ ret = handle_dependencies(bs, start, &cur_bytes, m);
+ if (ret == -EAGAIN) {
+ /* Currently handle_dependencies() doesn't yield if we already had
+ * an allocation. If it did, we would have to clean up the L2Meta
+ * structs before starting over. */
+ assert(*m == NULL);
+ goto again;
+ } else if (ret < 0) {
+ return ret;
+ } else if (cur_bytes == 0) {
+ break;
+ } else {
+ /* handle_dependencies() may have decreased cur_bytes (shortened
+ * the allocations below) so that the next dependency is processed
+ * correctly during the next loop iteration. */
+ }
+
+ /*
+ * 2. Count contiguous COPIED clusters.
+ */
+ ret = handle_copied(bs, start, &cluster_offset, &cur_bytes, m);
+ if (ret < 0) {
+ return ret;
+ } else if (ret) {
+ continue;
+ } else if (cur_bytes == 0) {
+ break;
+ }
+
+ /*
+ * 3. If the request still hasn't completed, allocate new clusters,
+ * considering any cluster_offset of steps 1c or 2.
+ */
+ ret = handle_alloc(bs, start, &cluster_offset, &cur_bytes, m);
+ if (ret < 0) {
+ return ret;
+ } else if (ret) {
+ continue;
+ } else {
+ assert(cur_bytes == 0);
+ break;
+ }
+ }
+
+ *num = (n_end - n_start) - (remaining >> BDRV_SECTOR_BITS);
+ assert(*num > 0);
+ assert(*host_offset != 0);
+
+ return 0;
+}
+
+static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
+ const uint8_t *buf, int buf_size)
+{
+ z_stream strm1, *strm = &strm1;
+ int ret, out_len;
+
+ memset(strm, 0, sizeof(*strm));
+
+ strm->next_in = (uint8_t *)buf;
+ strm->avail_in = buf_size;
+ strm->next_out = out_buf;
+ strm->avail_out = out_buf_size;
+
+ ret = inflateInit2(strm, -12);
+ if (ret != Z_OK)
+ return -1;
+ ret = inflate(strm, Z_FINISH);
+ out_len = strm->next_out - out_buf;
+ if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) ||
+ out_len != out_buf_size) {
+ inflateEnd(strm);
+ return -1;
+ }
+ inflateEnd(strm);
+ return 0;
+}
+
+int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
+{
+ BDRVQcowState *s = bs->opaque;
+ int ret, csize, nb_csectors, sector_offset;
+ uint64_t coffset;
+
+ coffset = cluster_offset & s->cluster_offset_mask;
+ if (s->cluster_cache_offset != coffset) {
+ nb_csectors = ((cluster_offset >> s->csize_shift) & s->csize_mask) + 1;
+ sector_offset = coffset & 511;
+ csize = nb_csectors * 512 - sector_offset;
+ BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED);
+ ret = bdrv_read(bs->file, coffset >> 9, s->cluster_data, nb_csectors);
+ if (ret < 0) {
+ return ret;
+ }
+ if (decompress_buffer(s->cluster_cache, s->cluster_size,
+ s->cluster_data + sector_offset, csize) < 0) {
+ return -EIO;
+ }
+ s->cluster_cache_offset = coffset;
+ }
+ return 0;
+}
+
+/*
+ * This discards as many clusters of nb_clusters as possible at once (i.e.
+ * all clusters in the same L2 table) and returns the number of discarded
+ * clusters.
+ */
+static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
+ unsigned int nb_clusters)
+{
+ BDRVQcowState *s = bs->opaque;
+ uint64_t *l2_table;
+ int l2_index;
+ int ret;
+ int i;
+
+ ret = get_cluster_table(bs, offset, &l2_table, &l2_index);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* Limit nb_clusters to one L2 table */
+ nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
+
+ for (i = 0; i < nb_clusters; i++) {
+ uint64_t old_offset;
+
+ old_offset = be64_to_cpu(l2_table[l2_index + i]);
+ if ((old_offset & L2E_OFFSET_MASK) == 0) {
+ continue;
+ }
+
+ /* First remove L2 entries */
+ qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+ l2_table[l2_index + i] = cpu_to_be64(0);
+
+ /* Then decrease the refcount */
+ qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST);
+ }
+
+ ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return nb_clusters;
+}
+
+int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
+ int nb_sectors)
+{
+ BDRVQcowState *s = bs->opaque;
+ uint64_t end_offset;
+ unsigned int nb_clusters;
+ int ret;
+
+ end_offset = offset + (nb_sectors << BDRV_SECTOR_BITS);
+
+ /* Round start up and end down */
+ offset = align_offset(offset, s->cluster_size);
+ end_offset &= ~(s->cluster_size - 1);
+
+ if (offset > end_offset) {
+ return 0;
+ }
+
+ nb_clusters = size_to_clusters(s, end_offset - offset);
+
+ s->cache_discards = true;
+
+ /* Each L2 table is handled by its own loop iteration */
+ while (nb_clusters > 0) {
+ ret = discard_single_l2(bs, offset, nb_clusters);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ nb_clusters -= ret;
+ offset += (ret * s->cluster_size);
+ }
+
+ ret = 0;
+fail:
+ s->cache_discards = false;
+ qcow2_process_discards(bs, ret);
+
+ return ret;
+}
+
+/*
+ * This zeroes as many clusters of nb_clusters as possible at once (i.e.
+ * all clusters in the same L2 table) and returns the number of zeroed
+ * clusters.
+ */
+static int zero_single_l2(BlockDriverState *bs, uint64_t offset,
+ unsigned int nb_clusters)
+{
+ BDRVQcowState *s = bs->opaque;
+ uint64_t *l2_table;
+ int l2_index;
+ int ret;
+ int i;
+
+ ret = get_cluster_table(bs, offset, &l2_table, &l2_index);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* Limit nb_clusters to one L2 table */
+ nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
+
+ for (i = 0; i < nb_clusters; i++) {
+ uint64_t old_offset;
+
+ old_offset = be64_to_cpu(l2_table[l2_index + i]);
+
+ /* Update L2 entries */
+ qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+ if (old_offset & QCOW_OFLAG_COMPRESSED) {
+ l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO);
+ qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST);
+ } else {
+ l2_table[l2_index + i] |= cpu_to_be64(QCOW_OFLAG_ZERO);
+ }
+ }
+
+ ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return nb_clusters;
+}
+
+int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors)
+{
+ BDRVQcowState *s = bs->opaque;
+ unsigned int nb_clusters;
+ int ret;
+
+ /* The zero flag is only supported by version 3 and newer */
+ if (s->qcow_version < 3) {
+ return -ENOTSUP;
+ }
+
+ /* Each L2 table is handled by its own loop iteration */
+ nb_clusters = size_to_clusters(s, nb_sectors << BDRV_SECTOR_BITS);
+
+ s->cache_discards = true;
+
+ while (nb_clusters > 0) {
+ ret = zero_single_l2(bs, offset, nb_clusters);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ nb_clusters -= ret;
+ offset += (ret * s->cluster_size);
+ }
+
+ ret = 0;
+fail:
+ s->cache_discards = false;
+ qcow2_process_discards(bs, ret);
+
+ return ret;
+}
diff --git a/contrib/qemu/block/qcow2-refcount.c b/contrib/qemu/block/qcow2-refcount.c
new file mode 100644
index 000000000..1244693f3
--- /dev/null
+++ b/contrib/qemu/block/qcow2-refcount.c
@@ -0,0 +1,1374 @@
+/*
+ * Block driver for the QCOW version 2 format
+ *
+ * Copyright (c) 2004-2006 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu-common.h"
+#include "block/block_int.h"
+#include "block/qcow2.h"
+
+static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size);
+static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
+ int64_t offset, int64_t length,
+ int addend, enum qcow2_discard_type type);
+
+
+/*********************************************************/
+/* refcount handling */
+
+int qcow2_refcount_init(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+ int ret, refcount_table_size2, i;
+
+ refcount_table_size2 = s->refcount_table_size * sizeof(uint64_t);
+ s->refcount_table = g_malloc(refcount_table_size2);
+ if (s->refcount_table_size > 0) {
+ BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_LOAD);
+ ret = bdrv_pread(bs->file, s->refcount_table_offset,
+ s->refcount_table, refcount_table_size2);
+ if (ret != refcount_table_size2)
+ goto fail;
+ for(i = 0; i < s->refcount_table_size; i++)
+ be64_to_cpus(&s->refcount_table[i]);
+ }
+ return 0;
+ fail:
+ return -ENOMEM;
+}
+
+void qcow2_refcount_close(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+ g_free(s->refcount_table);
+}
+
+
+static int load_refcount_block(BlockDriverState *bs,
+ int64_t refcount_block_offset,
+ void **refcount_block)
+{
+ BDRVQcowState *s = bs->opaque;
+ int ret;
+
+ BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_LOAD);
+ ret = qcow2_cache_get(bs, s->refcount_block_cache, refcount_block_offset,
+ refcount_block);
+
+ return ret;
+}
+
+/*
+ * Returns the refcount of the cluster given by its index. Any non-negative
+ * return value is the refcount of the cluster, negative values are -errno
+ * and indicate an error.
+ */
+static int get_refcount(BlockDriverState *bs, int64_t cluster_index)
+{
+ BDRVQcowState *s = bs->opaque;
+ int refcount_table_index, block_index;
+ int64_t refcount_block_offset;
+ int ret;
+ uint16_t *refcount_block;
+ uint16_t refcount;
+
+ refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
+ if (refcount_table_index >= s->refcount_table_size)
+ return 0;
+ refcount_block_offset = s->refcount_table[refcount_table_index];
+ if (!refcount_block_offset)
+ return 0;
+
+ ret = qcow2_cache_get(bs, s->refcount_block_cache, refcount_block_offset,
+ (void**) &refcount_block);
+ if (ret < 0) {
+ return ret;
+ }
+
+ block_index = cluster_index &
+ ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
+ refcount = be16_to_cpu(refcount_block[block_index]);
+
+ ret = qcow2_cache_put(bs, s->refcount_block_cache,
+ (void**) &refcount_block);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return refcount;
+}
+
+/*
+ * Rounds the refcount table size up to avoid growing the table for each single
+ * refcount block that is allocated.
+ */
+static unsigned int next_refcount_table_size(BDRVQcowState *s,
+ unsigned int min_size)
+{
+ unsigned int min_clusters = (min_size >> (s->cluster_bits - 3)) + 1;
+ unsigned int refcount_table_clusters =
+ MAX(1, s->refcount_table_size >> (s->cluster_bits - 3));
+
+ while (min_clusters > refcount_table_clusters) {
+ refcount_table_clusters = (refcount_table_clusters * 3 + 1) / 2;
+ }
+
+ return refcount_table_clusters << (s->cluster_bits - 3);
+}
+
+
+/* Checks if two offsets are described by the same refcount block */
+static int in_same_refcount_block(BDRVQcowState *s, uint64_t offset_a,
+ uint64_t offset_b)
+{
+ uint64_t block_a = offset_a >> (2 * s->cluster_bits - REFCOUNT_SHIFT);
+ uint64_t block_b = offset_b >> (2 * s->cluster_bits - REFCOUNT_SHIFT);
+
+ return (block_a == block_b);
+}
+
+/*
+ * Loads a refcount block. If it doesn't exist yet, it is allocated first
+ * (including growing the refcount table if needed).
+ *
+ * Returns 0 on success or -errno in error case
+ */
+static int alloc_refcount_block(BlockDriverState *bs,
+ int64_t cluster_index, uint16_t **refcount_block)
+{
+ BDRVQcowState *s = bs->opaque;
+ unsigned int refcount_table_index;
+ int ret;
+
+ BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC);
+
+ /* Find the refcount block for the given cluster */
+ refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
+
+ if (refcount_table_index < s->refcount_table_size) {
+
+ uint64_t refcount_block_offset =
+ s->refcount_table[refcount_table_index] & REFT_OFFSET_MASK;
+
+ /* If it's already there, we're done */
+ if (refcount_block_offset) {
+ return load_refcount_block(bs, refcount_block_offset,
+ (void**) refcount_block);
+ }
+ }
+
+ /*
+ * If we came here, we need to allocate something. Something is at least
+ * a cluster for the new refcount block. It may also include a new refcount
+ * table if the old refcount table is too small.
+ *
+ * Note that allocating clusters here needs some special care:
+ *
+ * - We can't use the normal qcow2_alloc_clusters(), it would try to
+ * increase the refcount and very likely we would end up with an endless
+ * recursion. Instead we must place the refcount blocks in a way that
+ * they can describe them themselves.
+ *
+ * - We need to consider that at this point we are inside update_refcounts
+ * and doing the initial refcount increase. This means that some clusters
+ * have already been allocated by the caller, but their refcount isn't
+ * accurate yet. free_cluster_index tells us where this allocation ends
+ * as long as we don't overwrite it by freeing clusters.
+ *
+ * - alloc_clusters_noref and qcow2_free_clusters may load a different
+ * refcount block into the cache
+ */
+
+ *refcount_block = NULL;
+
+ /* We write to the refcount table, so we might depend on L2 tables */
+ ret = qcow2_cache_flush(bs, s->l2_table_cache);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* Allocate the refcount block itself and mark it as used */
+ int64_t new_block = alloc_clusters_noref(bs, s->cluster_size);
+ if (new_block < 0) {
+ return new_block;
+ }
+
+#ifdef DEBUG_ALLOC2
+ fprintf(stderr, "qcow2: Allocate refcount block %d for %" PRIx64
+ " at %" PRIx64 "\n",
+ refcount_table_index, cluster_index << s->cluster_bits, new_block);
+#endif
+
+ if (in_same_refcount_block(s, new_block, cluster_index << s->cluster_bits)) {
+ /* Zero the new refcount block before updating it */
+ ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, new_block,
+ (void**) refcount_block);
+ if (ret < 0) {
+ goto fail_block;
+ }
+
+ memset(*refcount_block, 0, s->cluster_size);
+
+ /* The block describes itself, need to update the cache */
+ int block_index = (new_block >> s->cluster_bits) &
+ ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
+ (*refcount_block)[block_index] = cpu_to_be16(1);
+ } else {
+ /* Described somewhere else. This can recurse at most twice before we
+ * arrive at a block that describes itself. */
+ ret = update_refcount(bs, new_block, s->cluster_size, 1,
+ QCOW2_DISCARD_NEVER);
+ if (ret < 0) {
+ goto fail_block;
+ }
+
+ ret = qcow2_cache_flush(bs, s->refcount_block_cache);
+ if (ret < 0) {
+ goto fail_block;
+ }
+
+ /* Initialize the new refcount block only after updating its refcount,
+ * update_refcount uses the refcount cache itself */
+ ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, new_block,
+ (void**) refcount_block);
+ if (ret < 0) {
+ goto fail_block;
+ }
+
+ memset(*refcount_block, 0, s->cluster_size);
+ }
+
+ /* Now the new refcount block needs to be written to disk */
+ BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE);
+ qcow2_cache_entry_mark_dirty(s->refcount_block_cache, *refcount_block);
+ ret = qcow2_cache_flush(bs, s->refcount_block_cache);
+ if (ret < 0) {
+ goto fail_block;
+ }
+
+ /* If the refcount table is big enough, just hook the block up there */
+ if (refcount_table_index < s->refcount_table_size) {
+ uint64_t data64 = cpu_to_be64(new_block);
+ BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_HOOKUP);
+ ret = bdrv_pwrite_sync(bs->file,
+ s->refcount_table_offset + refcount_table_index * sizeof(uint64_t),
+ &data64, sizeof(data64));
+ if (ret < 0) {
+ goto fail_block;
+ }
+
+ s->refcount_table[refcount_table_index] = new_block;
+ return 0;
+ }
+
+ ret = qcow2_cache_put(bs, s->refcount_block_cache, (void**) refcount_block);
+ if (ret < 0) {
+ goto fail_block;
+ }
+
+ /*
+ * If we come here, we need to grow the refcount table. Again, a new
+ * refcount table needs some space and we can't simply allocate to avoid
+ * endless recursion.
+ *
+ * Therefore let's grab new refcount blocks at the end of the image, which
+ * will describe themselves and the new refcount table. This way we can
+ * reference them only in the new table and do the switch to the new
+ * refcount table at once without producing an inconsistent state in
+ * between.
+ */
+ BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_GROW);
+
+ /* Calculate the number of refcount blocks needed so far */
+ uint64_t refcount_block_clusters = 1 << (s->cluster_bits - REFCOUNT_SHIFT);
+ uint64_t blocks_used = (s->free_cluster_index +
+ refcount_block_clusters - 1) / refcount_block_clusters;
+
+ /* And now we need at least one block more for the new metadata */
+ uint64_t table_size = next_refcount_table_size(s, blocks_used + 1);
+ uint64_t last_table_size;
+ uint64_t blocks_clusters;
+ do {
+ uint64_t table_clusters =
+ size_to_clusters(s, table_size * sizeof(uint64_t));
+ blocks_clusters = 1 +
+ ((table_clusters + refcount_block_clusters - 1)
+ / refcount_block_clusters);
+ uint64_t meta_clusters = table_clusters + blocks_clusters;
+
+ last_table_size = table_size;
+ table_size = next_refcount_table_size(s, blocks_used +
+ ((meta_clusters + refcount_block_clusters - 1)
+ / refcount_block_clusters));
+
+ } while (last_table_size != table_size);
+
+#ifdef DEBUG_ALLOC2
+ fprintf(stderr, "qcow2: Grow refcount table %" PRId32 " => %" PRId64 "\n",
+ s->refcount_table_size, table_size);
+#endif
+
+ /* Create the new refcount table and blocks */
+ uint64_t meta_offset = (blocks_used * refcount_block_clusters) *
+ s->cluster_size;
+ uint64_t table_offset = meta_offset + blocks_clusters * s->cluster_size;
+ uint16_t *new_blocks = g_malloc0(blocks_clusters * s->cluster_size);
+ uint64_t *new_table = g_malloc0(table_size * sizeof(uint64_t));
+
+ assert(meta_offset >= (s->free_cluster_index * s->cluster_size));
+
+ /* Fill the new refcount table */
+ memcpy(new_table, s->refcount_table,
+ s->refcount_table_size * sizeof(uint64_t));
+ new_table[refcount_table_index] = new_block;
+
+ int i;
+ for (i = 0; i < blocks_clusters; i++) {
+ new_table[blocks_used + i] = meta_offset + (i * s->cluster_size);
+ }
+
+ /* Fill the refcount blocks */
+ uint64_t table_clusters = size_to_clusters(s, table_size * sizeof(uint64_t));
+ int block = 0;
+ for (i = 0; i < table_clusters + blocks_clusters; i++) {
+ new_blocks[block++] = cpu_to_be16(1);
+ }
+
+ /* Write refcount blocks to disk */
+ BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_BLOCKS);
+ ret = bdrv_pwrite_sync(bs->file, meta_offset, new_blocks,
+ blocks_clusters * s->cluster_size);
+ g_free(new_blocks);
+ if (ret < 0) {
+ goto fail_table;
+ }
+
+ /* Write refcount table to disk */
+ for(i = 0; i < table_size; i++) {
+ cpu_to_be64s(&new_table[i]);
+ }
+
+ BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_TABLE);
+ ret = bdrv_pwrite_sync(bs->file, table_offset, new_table,
+ table_size * sizeof(uint64_t));
+ if (ret < 0) {
+ goto fail_table;
+ }
+
+ for(i = 0; i < table_size; i++) {
+ be64_to_cpus(&new_table[i]);
+ }
+
+ /* Hook up the new refcount table in the qcow2 header */
+ uint8_t data[12];
+ cpu_to_be64w((uint64_t*)data, table_offset);
+ cpu_to_be32w((uint32_t*)(data + 8), table_clusters);
+ BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_SWITCH_TABLE);
+ ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, refcount_table_offset),
+ data, sizeof(data));
+ if (ret < 0) {
+ goto fail_table;
+ }
+
+ /* And switch it in memory */
+ uint64_t old_table_offset = s->refcount_table_offset;
+ uint64_t old_table_size = s->refcount_table_size;
+
+ g_free(s->refcount_table);
+ s->refcount_table = new_table;
+ s->refcount_table_size = table_size;
+ s->refcount_table_offset = table_offset;
+
+ /* Free old table. Remember, we must not change free_cluster_index */
+ uint64_t old_free_cluster_index = s->free_cluster_index;
+ qcow2_free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t),
+ QCOW2_DISCARD_OTHER);
+ s->free_cluster_index = old_free_cluster_index;
+
+ ret = load_refcount_block(bs, new_block, (void**) refcount_block);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return 0;
+
+fail_table:
+ g_free(new_table);
+fail_block:
+ if (*refcount_block != NULL) {
+ qcow2_cache_put(bs, s->refcount_block_cache, (void**) refcount_block);
+ }
+ return ret;
+}
+
+void qcow2_process_discards(BlockDriverState *bs, int ret)
+{
+ BDRVQcowState *s = bs->opaque;
+ Qcow2DiscardRegion *d, *next;
+
+ QTAILQ_FOREACH_SAFE(d, &s->discards, next, next) {
+ QTAILQ_REMOVE(&s->discards, d, next);
+
+ /* Discard is optional, ignore the return value */
+ if (ret >= 0) {
+ bdrv_discard(bs->file,
+ d->offset >> BDRV_SECTOR_BITS,
+ d->bytes >> BDRV_SECTOR_BITS);
+ }
+
+ g_free(d);
+ }
+}
+
+static void update_refcount_discard(BlockDriverState *bs,
+ uint64_t offset, uint64_t length)
+{
+ BDRVQcowState *s = bs->opaque;
+ Qcow2DiscardRegion *d, *p, *next;
+
+ QTAILQ_FOREACH(d, &s->discards, next) {
+ uint64_t new_start = MIN(offset, d->offset);
+ uint64_t new_end = MAX(offset + length, d->offset + d->bytes);
+
+ if (new_end - new_start <= length + d->bytes) {
+ /* There can't be any overlap, areas ending up here have no
+ * references any more and therefore shouldn't get freed another
+ * time. */
+ assert(d->bytes + length == new_end - new_start);
+ d->offset = new_start;
+ d->bytes = new_end - new_start;
+ goto found;
+ }
+ }
+
+ d = g_malloc(sizeof(*d));
+ *d = (Qcow2DiscardRegion) {
+ .bs = bs,
+ .offset = offset,
+ .bytes = length,
+ };
+ QTAILQ_INSERT_TAIL(&s->discards, d, next);
+
+found:
+ /* Merge discard requests if they are adjacent now */
+ QTAILQ_FOREACH_SAFE(p, &s->discards, next, next) {
+ if (p == d
+ || p->offset > d->offset + d->bytes
+ || d->offset > p->offset + p->bytes)
+ {
+ continue;
+ }
+
+ /* Still no overlap possible */
+ assert(p->offset == d->offset + d->bytes
+ || d->offset == p->offset + p->bytes);
+
+ QTAILQ_REMOVE(&s->discards, p, next);
+ d->offset = MIN(d->offset, p->offset);
+ d->bytes += p->bytes;
+ }
+}
+
+/* XXX: cache several refcount block clusters ? */
+static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
+ int64_t offset, int64_t length, int addend, enum qcow2_discard_type type)
+{
+ BDRVQcowState *s = bs->opaque;
+ int64_t start, last, cluster_offset;
+ uint16_t *refcount_block = NULL;
+ int64_t old_table_index = -1;
+ int ret;
+
+#ifdef DEBUG_ALLOC2
+ fprintf(stderr, "update_refcount: offset=%" PRId64 " size=%" PRId64 " addend=%d\n",
+ offset, length, addend);
+#endif
+ if (length < 0) {
+ return -EINVAL;
+ } else if (length == 0) {
+ return 0;
+ }
+
+ if (addend < 0) {
+ qcow2_cache_set_dependency(bs, s->refcount_block_cache,
+ s->l2_table_cache);
+ }
+
+ start = offset & ~(s->cluster_size - 1);
+ last = (offset + length - 1) & ~(s->cluster_size - 1);
+ for(cluster_offset = start; cluster_offset <= last;
+ cluster_offset += s->cluster_size)
+ {
+ int block_index, refcount;
+ int64_t cluster_index = cluster_offset >> s->cluster_bits;
+ int64_t table_index =
+ cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
+
+ /* Load the refcount block and allocate it if needed */
+ if (table_index != old_table_index) {
+ if (refcount_block) {
+ ret = qcow2_cache_put(bs, s->refcount_block_cache,
+ (void**) &refcount_block);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+
+ ret = alloc_refcount_block(bs, cluster_index, &refcount_block);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+ old_table_index = table_index;
+
+ qcow2_cache_entry_mark_dirty(s->refcount_block_cache, refcount_block);
+
+ /* we can update the count and save it */
+ block_index = cluster_index &
+ ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
+
+ refcount = be16_to_cpu(refcount_block[block_index]);
+ refcount += addend;
+ if (refcount < 0 || refcount > 0xffff) {
+ ret = -EINVAL;
+ goto fail;
+ }
+ if (refcount == 0 && cluster_index < s->free_cluster_index) {
+ s->free_cluster_index = cluster_index;
+ }
+ refcount_block[block_index] = cpu_to_be16(refcount);
+
+ if (refcount == 0 && s->discard_passthrough[type]) {
+ update_refcount_discard(bs, cluster_offset, s->cluster_size);
+ }
+ }
+
+ ret = 0;
+fail:
+ if (!s->cache_discards) {
+ qcow2_process_discards(bs, ret);
+ }
+
+ /* Write last changed block to disk */
+ if (refcount_block) {
+ int wret;
+ wret = qcow2_cache_put(bs, s->refcount_block_cache,
+ (void**) &refcount_block);
+ if (wret < 0) {
+ return ret < 0 ? ret : wret;
+ }
+ }
+
+ /*
+ * Try do undo any updates if an error is returned (This may succeed in
+ * some cases like ENOSPC for allocating a new refcount block)
+ */
+ if (ret < 0) {
+ int dummy;
+ dummy = update_refcount(bs, offset, cluster_offset - offset, -addend,
+ QCOW2_DISCARD_NEVER);
+ (void)dummy;
+ }
+
+ return ret;
+}
+
+/*
+ * Increases or decreases the refcount of a given cluster by one.
+ * addend must be 1 or -1.
+ *
+ * If the return value is non-negative, it is the new refcount of the cluster.
+ * If it is negative, it is -errno and indicates an error.
+ */
+static int update_cluster_refcount(BlockDriverState *bs,
+ int64_t cluster_index,
+ int addend,
+ enum qcow2_discard_type type)
+{
+ BDRVQcowState *s = bs->opaque;
+ int ret;
+
+ ret = update_refcount(bs, cluster_index << s->cluster_bits, 1, addend,
+ type);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return get_refcount(bs, cluster_index);
+}
+
+
+
+/*********************************************************/
+/* cluster allocation functions */
+
+
+
+/* return < 0 if error */
+static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size)
+{
+ BDRVQcowState *s = bs->opaque;
+ int i, nb_clusters, refcount;
+
+ nb_clusters = size_to_clusters(s, size);
+retry:
+ for(i = 0; i < nb_clusters; i++) {
+ int64_t next_cluster_index = s->free_cluster_index++;
+ refcount = get_refcount(bs, next_cluster_index);
+
+ if (refcount < 0) {
+ return refcount;
+ } else if (refcount != 0) {
+ goto retry;
+ }
+ }
+#ifdef DEBUG_ALLOC2
+ fprintf(stderr, "alloc_clusters: size=%" PRId64 " -> %" PRId64 "\n",
+ size,
+ (s->free_cluster_index - nb_clusters) << s->cluster_bits);
+#endif
+ return (s->free_cluster_index - nb_clusters) << s->cluster_bits;
+}
+
+int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size)
+{
+ int64_t offset;
+ int ret;
+
+ BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC);
+ offset = alloc_clusters_noref(bs, size);
+ if (offset < 0) {
+ return offset;
+ }
+
+ ret = update_refcount(bs, offset, size, 1, QCOW2_DISCARD_NEVER);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return offset;
+}
+
+int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
+ int nb_clusters)
+{
+ BDRVQcowState *s = bs->opaque;
+ uint64_t cluster_index;
+ uint64_t old_free_cluster_index;
+ int i, refcount, ret;
+
+ /* Check how many clusters there are free */
+ cluster_index = offset >> s->cluster_bits;
+ for(i = 0; i < nb_clusters; i++) {
+ refcount = get_refcount(bs, cluster_index++);
+
+ if (refcount < 0) {
+ return refcount;
+ } else if (refcount != 0) {
+ break;
+ }
+ }
+
+ /* And then allocate them */
+ old_free_cluster_index = s->free_cluster_index;
+ s->free_cluster_index = cluster_index + i;
+
+ ret = update_refcount(bs, offset, i << s->cluster_bits, 1,
+ QCOW2_DISCARD_NEVER);
+ if (ret < 0) {
+ return ret;
+ }
+
+ s->free_cluster_index = old_free_cluster_index;
+
+ return i;
+}
+
+/* only used to allocate compressed sectors. We try to allocate
+ contiguous sectors. size must be <= cluster_size */
+int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
+{
+ BDRVQcowState *s = bs->opaque;
+ int64_t offset, cluster_offset;
+ int free_in_cluster;
+
+ BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC_BYTES);
+ assert(size > 0 && size <= s->cluster_size);
+ if (s->free_byte_offset == 0) {
+ offset = qcow2_alloc_clusters(bs, s->cluster_size);
+ if (offset < 0) {
+ return offset;
+ }
+ s->free_byte_offset = offset;
+ }
+ redo:
+ free_in_cluster = s->cluster_size -
+ (s->free_byte_offset & (s->cluster_size - 1));
+ if (size <= free_in_cluster) {
+ /* enough space in current cluster */
+ offset = s->free_byte_offset;
+ s->free_byte_offset += size;
+ free_in_cluster -= size;
+ if (free_in_cluster == 0)
+ s->free_byte_offset = 0;
+ if ((offset & (s->cluster_size - 1)) != 0)
+ update_cluster_refcount(bs, offset >> s->cluster_bits, 1,
+ QCOW2_DISCARD_NEVER);
+ } else {
+ offset = qcow2_alloc_clusters(bs, s->cluster_size);
+ if (offset < 0) {
+ return offset;
+ }
+ cluster_offset = s->free_byte_offset & ~(s->cluster_size - 1);
+ if ((cluster_offset + s->cluster_size) == offset) {
+ /* we are lucky: contiguous data */
+ offset = s->free_byte_offset;
+ update_cluster_refcount(bs, offset >> s->cluster_bits, 1,
+ QCOW2_DISCARD_NEVER);
+ s->free_byte_offset += size;
+ } else {
+ s->free_byte_offset = offset;
+ goto redo;
+ }
+ }
+
+ /* The cluster refcount was incremented, either by qcow2_alloc_clusters()
+ * or explicitly by update_cluster_refcount(). Refcount blocks must be
+ * flushed before the caller's L2 table updates.
+ */
+ qcow2_cache_set_dependency(bs, s->l2_table_cache, s->refcount_block_cache);
+ return offset;
+}
+
+void qcow2_free_clusters(BlockDriverState *bs,
+ int64_t offset, int64_t size,
+ enum qcow2_discard_type type)
+{
+ int ret;
+
+ BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_FREE);
+ ret = update_refcount(bs, offset, size, -1, type);
+ if (ret < 0) {
+ fprintf(stderr, "qcow2_free_clusters failed: %s\n", strerror(-ret));
+ /* TODO Remember the clusters to free them later and avoid leaking */
+ }
+}
+
+/*
+ * Free a cluster using its L2 entry (handles clusters of all types, e.g.
+ * normal cluster, compressed cluster, etc.)
+ */
+void qcow2_free_any_clusters(BlockDriverState *bs, uint64_t l2_entry,
+ int nb_clusters, enum qcow2_discard_type type)
+{
+ BDRVQcowState *s = bs->opaque;
+
+ switch (qcow2_get_cluster_type(l2_entry)) {
+ case QCOW2_CLUSTER_COMPRESSED:
+ {
+ int nb_csectors;
+ nb_csectors = ((l2_entry >> s->csize_shift) &
+ s->csize_mask) + 1;
+ qcow2_free_clusters(bs,
+ (l2_entry & s->cluster_offset_mask) & ~511,
+ nb_csectors * 512, type);
+ }
+ break;
+ case QCOW2_CLUSTER_NORMAL:
+ qcow2_free_clusters(bs, l2_entry & L2E_OFFSET_MASK,
+ nb_clusters << s->cluster_bits, type);
+ break;
+ case QCOW2_CLUSTER_UNALLOCATED:
+ case QCOW2_CLUSTER_ZERO:
+ break;
+ default:
+ abort();
+ }
+}
+
+
+
+/*********************************************************/
+/* snapshots and image creation */
+
+
+
+/* update the refcounts of snapshots and the copied flag */
+int qcow2_update_snapshot_refcount(BlockDriverState *bs,
+ int64_t l1_table_offset, int l1_size, int addend)
+{
+ BDRVQcowState *s = bs->opaque;
+ uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2, l1_allocated;
+ int64_t old_offset, old_l2_offset;
+ int i, j, l1_modified = 0, nb_csectors, refcount;
+ int ret;
+
+ l2_table = NULL;
+ l1_table = NULL;
+ l1_size2 = l1_size * sizeof(uint64_t);
+
+ s->cache_discards = true;
+
+ /* WARNING: qcow2_snapshot_goto relies on this function not using the
+ * l1_table_offset when it is the current s->l1_table_offset! Be careful
+ * when changing this! */
+ if (l1_table_offset != s->l1_table_offset) {
+ l1_table = g_malloc0(align_offset(l1_size2, 512));
+ l1_allocated = 1;
+
+ ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size2);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ for(i = 0;i < l1_size; i++)
+ be64_to_cpus(&l1_table[i]);
+ } else {
+ assert(l1_size == s->l1_size);
+ l1_table = s->l1_table;
+ l1_allocated = 0;
+ }
+
+ for(i = 0; i < l1_size; i++) {
+ l2_offset = l1_table[i];
+ if (l2_offset) {
+ old_l2_offset = l2_offset;
+ l2_offset &= L1E_OFFSET_MASK;
+
+ ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset,
+ (void**) &l2_table);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ for(j = 0; j < s->l2_size; j++) {
+ offset = be64_to_cpu(l2_table[j]);
+ if (offset != 0) {
+ old_offset = offset;
+ offset &= ~QCOW_OFLAG_COPIED;
+ if (offset & QCOW_OFLAG_COMPRESSED) {
+ nb_csectors = ((offset >> s->csize_shift) &
+ s->csize_mask) + 1;
+ if (addend != 0) {
+ int ret;
+ ret = update_refcount(bs,
+ (offset & s->cluster_offset_mask) & ~511,
+ nb_csectors * 512, addend,
+ QCOW2_DISCARD_SNAPSHOT);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+ /* compressed clusters are never modified */
+ refcount = 2;
+ } else {
+ uint64_t cluster_index = (offset & L2E_OFFSET_MASK) >> s->cluster_bits;
+ if (addend != 0) {
+ refcount = update_cluster_refcount(bs, cluster_index, addend,
+ QCOW2_DISCARD_SNAPSHOT);
+ } else {
+ refcount = get_refcount(bs, cluster_index);
+ }
+
+ if (refcount < 0) {
+ ret = refcount;
+ goto fail;
+ }
+ }
+
+ if (refcount == 1) {
+ offset |= QCOW_OFLAG_COPIED;
+ }
+ if (offset != old_offset) {
+ if (addend > 0) {
+ qcow2_cache_set_dependency(bs, s->l2_table_cache,
+ s->refcount_block_cache);
+ }
+ l2_table[j] = cpu_to_be64(offset);
+ qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+ }
+ }
+ }
+
+ ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+ if (ret < 0) {
+ goto fail;
+ }
+
+
+ if (addend != 0) {
+ refcount = update_cluster_refcount(bs, l2_offset >> s->cluster_bits, addend,
+ QCOW2_DISCARD_SNAPSHOT);
+ } else {
+ refcount = get_refcount(bs, l2_offset >> s->cluster_bits);
+ }
+ if (refcount < 0) {
+ ret = refcount;
+ goto fail;
+ } else if (refcount == 1) {
+ l2_offset |= QCOW_OFLAG_COPIED;
+ }
+ if (l2_offset != old_l2_offset) {
+ l1_table[i] = l2_offset;
+ l1_modified = 1;
+ }
+ }
+ }
+
+ ret = bdrv_flush(bs);
+fail:
+ if (l2_table) {
+ qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+ }
+
+ s->cache_discards = false;
+ qcow2_process_discards(bs, ret);
+
+ /* Update L1 only if it isn't deleted anyway (addend = -1) */
+ if (ret == 0 && addend >= 0 && l1_modified) {
+ for (i = 0; i < l1_size; i++) {
+ cpu_to_be64s(&l1_table[i]);
+ }
+
+ ret = bdrv_pwrite_sync(bs->file, l1_table_offset, l1_table, l1_size2);
+
+ for (i = 0; i < l1_size; i++) {
+ be64_to_cpus(&l1_table[i]);
+ }
+ }
+ if (l1_allocated)
+ g_free(l1_table);
+ return ret;
+}
+
+
+
+
+/*********************************************************/
+/* refcount checking functions */
+
+
+
+/*
+ * Increases the refcount for a range of clusters in a given refcount table.
+ * This is used to construct a temporary refcount table out of L1 and L2 tables
+ * which can be compared the the refcount table saved in the image.
+ *
+ * Modifies the number of errors in res.
+ */
+static void inc_refcounts(BlockDriverState *bs,
+ BdrvCheckResult *res,
+ uint16_t *refcount_table,
+ int refcount_table_size,
+ int64_t offset, int64_t size)
+{
+ BDRVQcowState *s = bs->opaque;
+ int64_t start, last, cluster_offset;
+ int k;
+
+ if (size <= 0)
+ return;
+
+ start = offset & ~(s->cluster_size - 1);
+ last = (offset + size - 1) & ~(s->cluster_size - 1);
+ for(cluster_offset = start; cluster_offset <= last;
+ cluster_offset += s->cluster_size) {
+ k = cluster_offset >> s->cluster_bits;
+ if (k < 0) {
+ fprintf(stderr, "ERROR: invalid cluster offset=0x%" PRIx64 "\n",
+ cluster_offset);
+ res->corruptions++;
+ } else if (k >= refcount_table_size) {
+ fprintf(stderr, "Warning: cluster offset=0x%" PRIx64 " is after "
+ "the end of the image file, can't properly check refcounts.\n",
+ cluster_offset);
+ res->check_errors++;
+ } else {
+ if (++refcount_table[k] == 0) {
+ fprintf(stderr, "ERROR: overflow cluster offset=0x%" PRIx64
+ "\n", cluster_offset);
+ res->corruptions++;
+ }
+ }
+ }
+}
+
+/* Flags for check_refcounts_l1() and check_refcounts_l2() */
+enum {
+ CHECK_OFLAG_COPIED = 0x1, /* check QCOW_OFLAG_COPIED matches refcount */
+ CHECK_FRAG_INFO = 0x2, /* update BlockFragInfo counters */
+};
+
+/*
+ * Increases the refcount in the given refcount table for the all clusters
+ * referenced in the L2 table. While doing so, performs some checks on L2
+ * entries.
+ *
+ * Returns the number of errors found by the checks or -errno if an internal
+ * error occurred.
+ */
+static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
+ uint16_t *refcount_table, int refcount_table_size, int64_t l2_offset,
+ int flags)
+{
+ BDRVQcowState *s = bs->opaque;
+ uint64_t *l2_table, l2_entry;
+ uint64_t next_contiguous_offset = 0;
+ int i, l2_size, nb_csectors, refcount;
+
+ /* Read L2 table from disk */
+ l2_size = s->l2_size * sizeof(uint64_t);
+ l2_table = g_malloc(l2_size);
+
+ if (bdrv_pread(bs->file, l2_offset, l2_table, l2_size) != l2_size)
+ goto fail;
+
+ /* Do the actual checks */
+ for(i = 0; i < s->l2_size; i++) {
+ l2_entry = be64_to_cpu(l2_table[i]);
+
+ switch (qcow2_get_cluster_type(l2_entry)) {
+ case QCOW2_CLUSTER_COMPRESSED:
+ /* Compressed clusters don't have QCOW_OFLAG_COPIED */
+ if (l2_entry & QCOW_OFLAG_COPIED) {
+ fprintf(stderr, "ERROR: cluster %" PRId64 ": "
+ "copied flag must never be set for compressed "
+ "clusters\n", l2_entry >> s->cluster_bits);
+ l2_entry &= ~QCOW_OFLAG_COPIED;
+ res->corruptions++;
+ }
+
+ /* Mark cluster as used */
+ nb_csectors = ((l2_entry >> s->csize_shift) &
+ s->csize_mask) + 1;
+ l2_entry &= s->cluster_offset_mask;
+ inc_refcounts(bs, res, refcount_table, refcount_table_size,
+ l2_entry & ~511, nb_csectors * 512);
+
+ if (flags & CHECK_FRAG_INFO) {
+ res->bfi.allocated_clusters++;
+ res->bfi.compressed_clusters++;
+
+ /* Compressed clusters are fragmented by nature. Since they
+ * take up sub-sector space but we only have sector granularity
+ * I/O we need to re-read the same sectors even for adjacent
+ * compressed clusters.
+ */
+ res->bfi.fragmented_clusters++;
+ }
+ break;
+
+ case QCOW2_CLUSTER_ZERO:
+ if ((l2_entry & L2E_OFFSET_MASK) == 0) {
+ break;
+ }
+ /* fall through */
+
+ case QCOW2_CLUSTER_NORMAL:
+ {
+ /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */
+ uint64_t offset = l2_entry & L2E_OFFSET_MASK;
+
+ if (flags & CHECK_OFLAG_COPIED) {
+ refcount = get_refcount(bs, offset >> s->cluster_bits);
+ if (refcount < 0) {
+ fprintf(stderr, "Can't get refcount for offset %"
+ PRIx64 ": %s\n", l2_entry, strerror(-refcount));
+ goto fail;
+ }
+ if ((refcount == 1) != ((l2_entry & QCOW_OFLAG_COPIED) != 0)) {
+ fprintf(stderr, "ERROR OFLAG_COPIED: offset=%"
+ PRIx64 " refcount=%d\n", l2_entry, refcount);
+ res->corruptions++;
+ }
+ }
+
+ if (flags & CHECK_FRAG_INFO) {
+ res->bfi.allocated_clusters++;
+ if (next_contiguous_offset &&
+ offset != next_contiguous_offset) {
+ res->bfi.fragmented_clusters++;
+ }
+ next_contiguous_offset = offset + s->cluster_size;
+ }
+
+ /* Mark cluster as used */
+ inc_refcounts(bs, res, refcount_table,refcount_table_size,
+ offset, s->cluster_size);
+
+ /* Correct offsets are cluster aligned */
+ if (offset & (s->cluster_size - 1)) {
+ fprintf(stderr, "ERROR offset=%" PRIx64 ": Cluster is not "
+ "properly aligned; L2 entry corrupted.\n", offset);
+ res->corruptions++;
+ }
+ break;
+ }
+
+ case QCOW2_CLUSTER_UNALLOCATED:
+ break;
+
+ default:
+ abort();
+ }
+ }
+
+ g_free(l2_table);
+ return 0;
+
+fail:
+ fprintf(stderr, "ERROR: I/O error in check_refcounts_l2\n");
+ g_free(l2_table);
+ return -EIO;
+}
+
+/*
+ * Increases the refcount for the L1 table, its L2 tables and all referenced
+ * clusters in the given refcount table. While doing so, performs some checks
+ * on L1 and L2 entries.
+ *
+ * Returns the number of errors found by the checks or -errno if an internal
+ * error occurred.
+ */
+static int check_refcounts_l1(BlockDriverState *bs,
+ BdrvCheckResult *res,
+ uint16_t *refcount_table,
+ int refcount_table_size,
+ int64_t l1_table_offset, int l1_size,
+ int flags)
+{
+ BDRVQcowState *s = bs->opaque;
+ uint64_t *l1_table, l2_offset, l1_size2;
+ int i, refcount, ret;
+
+ l1_size2 = l1_size * sizeof(uint64_t);
+
+ /* Mark L1 table as used */
+ inc_refcounts(bs, res, refcount_table, refcount_table_size,
+ l1_table_offset, l1_size2);
+
+ /* Read L1 table entries from disk */
+ if (l1_size2 == 0) {
+ l1_table = NULL;
+ } else {
+ l1_table = g_malloc(l1_size2);
+ if (bdrv_pread(bs->file, l1_table_offset,
+ l1_table, l1_size2) != l1_size2)
+ goto fail;
+ for(i = 0;i < l1_size; i++)
+ be64_to_cpus(&l1_table[i]);
+ }
+
+ /* Do the actual checks */
+ for(i = 0; i < l1_size; i++) {
+ l2_offset = l1_table[i];
+ if (l2_offset) {
+ /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */
+ if (flags & CHECK_OFLAG_COPIED) {
+ refcount = get_refcount(bs, (l2_offset & ~QCOW_OFLAG_COPIED)
+ >> s->cluster_bits);
+ if (refcount < 0) {
+ fprintf(stderr, "Can't get refcount for l2_offset %"
+ PRIx64 ": %s\n", l2_offset, strerror(-refcount));
+ goto fail;
+ }
+ if ((refcount == 1) != ((l2_offset & QCOW_OFLAG_COPIED) != 0)) {
+ fprintf(stderr, "ERROR OFLAG_COPIED: l2_offset=%" PRIx64
+ " refcount=%d\n", l2_offset, refcount);
+ res->corruptions++;
+ }
+ }
+
+ /* Mark L2 table as used */
+ l2_offset &= L1E_OFFSET_MASK;
+ inc_refcounts(bs, res, refcount_table, refcount_table_size,
+ l2_offset, s->cluster_size);
+
+ /* L2 tables are cluster aligned */
+ if (l2_offset & (s->cluster_size - 1)) {
+ fprintf(stderr, "ERROR l2_offset=%" PRIx64 ": Table is not "
+ "cluster aligned; L1 entry corrupted\n", l2_offset);
+ res->corruptions++;
+ }
+
+ /* Process and check L2 entries */
+ ret = check_refcounts_l2(bs, res, refcount_table,
+ refcount_table_size, l2_offset, flags);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+ }
+ g_free(l1_table);
+ return 0;
+
+fail:
+ fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
+ res->check_errors++;
+ g_free(l1_table);
+ return -EIO;
+}
+
+/*
+ * Checks an image for refcount consistency.
+ *
+ * Returns 0 if no errors are found, the number of errors in case the image is
+ * detected as corrupted, and -errno when an internal error occurred.
+ */
+int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
+ BdrvCheckMode fix)
+{
+ BDRVQcowState *s = bs->opaque;
+ int64_t size, i, highest_cluster;
+ int nb_clusters, refcount1, refcount2;
+ QCowSnapshot *sn;
+ uint16_t *refcount_table;
+ int ret;
+
+ size = bdrv_getlength(bs->file);
+ nb_clusters = size_to_clusters(s, size);
+ refcount_table = g_malloc0(nb_clusters * sizeof(uint16_t));
+
+ res->bfi.total_clusters =
+ size_to_clusters(s, bs->total_sectors * BDRV_SECTOR_SIZE);
+
+ /* header */
+ inc_refcounts(bs, res, refcount_table, nb_clusters,
+ 0, s->cluster_size);
+
+ /* current L1 table */
+ ret = check_refcounts_l1(bs, res, refcount_table, nb_clusters,
+ s->l1_table_offset, s->l1_size,
+ CHECK_OFLAG_COPIED | CHECK_FRAG_INFO);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ /* snapshots */
+ for(i = 0; i < s->nb_snapshots; i++) {
+ sn = s->snapshots + i;
+ ret = check_refcounts_l1(bs, res, refcount_table, nb_clusters,
+ sn->l1_table_offset, sn->l1_size, 0);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+ inc_refcounts(bs, res, refcount_table, nb_clusters,
+ s->snapshots_offset, s->snapshots_size);
+
+ /* refcount data */
+ inc_refcounts(bs, res, refcount_table, nb_clusters,
+ s->refcount_table_offset,
+ s->refcount_table_size * sizeof(uint64_t));
+
+ for(i = 0; i < s->refcount_table_size; i++) {
+ uint64_t offset, cluster;
+ offset = s->refcount_table[i];
+ cluster = offset >> s->cluster_bits;
+
+ /* Refcount blocks are cluster aligned */
+ if (offset & (s->cluster_size - 1)) {
+ fprintf(stderr, "ERROR refcount block %" PRId64 " is not "
+ "cluster aligned; refcount table entry corrupted\n", i);
+ res->corruptions++;
+ continue;
+ }
+
+ if (cluster >= nb_clusters) {
+ fprintf(stderr, "ERROR refcount block %" PRId64
+ " is outside image\n", i);
+ res->corruptions++;
+ continue;
+ }
+
+ if (offset != 0) {
+ inc_refcounts(bs, res, refcount_table, nb_clusters,
+ offset, s->cluster_size);
+ if (refcount_table[cluster] != 1) {
+ fprintf(stderr, "ERROR refcount block %" PRId64
+ " refcount=%d\n",
+ i, refcount_table[cluster]);
+ res->corruptions++;
+ }
+ }
+ }
+
+ /* compare ref counts */
+ for (i = 0, highest_cluster = 0; i < nb_clusters; i++) {
+ refcount1 = get_refcount(bs, i);
+ if (refcount1 < 0) {
+ fprintf(stderr, "Can't get refcount for cluster %" PRId64 ": %s\n",
+ i, strerror(-refcount1));
+ res->check_errors++;
+ continue;
+ }
+
+ refcount2 = refcount_table[i];
+
+ if (refcount1 > 0 || refcount2 > 0) {
+ highest_cluster = i;
+ }
+
+ if (refcount1 != refcount2) {
+
+ /* Check if we're allowed to fix the mismatch */
+ int *num_fixed = NULL;
+ if (refcount1 > refcount2 && (fix & BDRV_FIX_LEAKS)) {
+ num_fixed = &res->leaks_fixed;
+ } else if (refcount1 < refcount2 && (fix & BDRV_FIX_ERRORS)) {
+ num_fixed = &res->corruptions_fixed;
+ }
+
+ fprintf(stderr, "%s cluster %" PRId64 " refcount=%d reference=%d\n",
+ num_fixed != NULL ? "Repairing" :
+ refcount1 < refcount2 ? "ERROR" :
+ "Leaked",
+ i, refcount1, refcount2);
+
+ if (num_fixed) {
+ ret = update_refcount(bs, i << s->cluster_bits, 1,
+ refcount2 - refcount1,
+ QCOW2_DISCARD_ALWAYS);
+ if (ret >= 0) {
+ (*num_fixed)++;
+ continue;
+ }
+ }
+
+ /* And if we couldn't, print an error */
+ if (refcount1 < refcount2) {
+ res->corruptions++;
+ } else {
+ res->leaks++;
+ }
+ }
+ }
+
+ res->image_end_offset = (highest_cluster + 1) * s->cluster_size;
+ ret = 0;
+
+fail:
+ g_free(refcount_table);
+
+ return ret;
+}
+
diff --git a/contrib/qemu/block/qcow2-snapshot.c b/contrib/qemu/block/qcow2-snapshot.c
new file mode 100644
index 000000000..0caac9055
--- /dev/null
+++ b/contrib/qemu/block/qcow2-snapshot.c
@@ -0,0 +1,660 @@
+/*
+ * Block driver for the QCOW version 2 format
+ *
+ * Copyright (c) 2004-2006 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu-common.h"
+#include "block/block_int.h"
+#include "block/qcow2.h"
+
+typedef struct QEMU_PACKED QCowSnapshotHeader {
+ /* header is 8 byte aligned */
+ uint64_t l1_table_offset;
+
+ uint32_t l1_size;
+ uint16_t id_str_size;
+ uint16_t name_size;
+
+ uint32_t date_sec;
+ uint32_t date_nsec;
+
+ uint64_t vm_clock_nsec;
+
+ uint32_t vm_state_size;
+ uint32_t extra_data_size; /* for extension */
+ /* extra data follows */
+ /* id_str follows */
+ /* name follows */
+} QCowSnapshotHeader;
+
+typedef struct QEMU_PACKED QCowSnapshotExtraData {
+ uint64_t vm_state_size_large;
+ uint64_t disk_size;
+} QCowSnapshotExtraData;
+
+void qcow2_free_snapshots(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+ int i;
+
+ for(i = 0; i < s->nb_snapshots; i++) {
+ g_free(s->snapshots[i].name);
+ g_free(s->snapshots[i].id_str);
+ }
+ g_free(s->snapshots);
+ s->snapshots = NULL;
+ s->nb_snapshots = 0;
+}
+
+int qcow2_read_snapshots(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+ QCowSnapshotHeader h;
+ QCowSnapshotExtraData extra;
+ QCowSnapshot *sn;
+ int i, id_str_size, name_size;
+ int64_t offset;
+ uint32_t extra_data_size;
+ int ret;
+
+ if (!s->nb_snapshots) {
+ s->snapshots = NULL;
+ s->snapshots_size = 0;
+ return 0;
+ }
+
+ offset = s->snapshots_offset;
+ s->snapshots = g_malloc0(s->nb_snapshots * sizeof(QCowSnapshot));
+
+ for(i = 0; i < s->nb_snapshots; i++) {
+ /* Read statically sized part of the snapshot header */
+ offset = align_offset(offset, 8);
+ ret = bdrv_pread(bs->file, offset, &h, sizeof(h));
+ if (ret < 0) {
+ goto fail;
+ }
+
+ offset += sizeof(h);
+ sn = s->snapshots + i;
+ sn->l1_table_offset = be64_to_cpu(h.l1_table_offset);
+ sn->l1_size = be32_to_cpu(h.l1_size);
+ sn->vm_state_size = be32_to_cpu(h.vm_state_size);
+ sn->date_sec = be32_to_cpu(h.date_sec);
+ sn->date_nsec = be32_to_cpu(h.date_nsec);
+ sn->vm_clock_nsec = be64_to_cpu(h.vm_clock_nsec);
+ extra_data_size = be32_to_cpu(h.extra_data_size);
+
+ id_str_size = be16_to_cpu(h.id_str_size);
+ name_size = be16_to_cpu(h.name_size);
+
+ /* Read extra data */
+ ret = bdrv_pread(bs->file, offset, &extra,
+ MIN(sizeof(extra), extra_data_size));
+ if (ret < 0) {
+ goto fail;
+ }
+ offset += extra_data_size;
+
+ if (extra_data_size >= 8) {
+ sn->vm_state_size = be64_to_cpu(extra.vm_state_size_large);
+ }
+
+ if (extra_data_size >= 16) {
+ sn->disk_size = be64_to_cpu(extra.disk_size);
+ } else {
+ sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
+ }
+
+ /* Read snapshot ID */
+ sn->id_str = g_malloc(id_str_size + 1);
+ ret = bdrv_pread(bs->file, offset, sn->id_str, id_str_size);
+ if (ret < 0) {
+ goto fail;
+ }
+ offset += id_str_size;
+ sn->id_str[id_str_size] = '\0';
+
+ /* Read snapshot name */
+ sn->name = g_malloc(name_size + 1);
+ ret = bdrv_pread(bs->file, offset, sn->name, name_size);
+ if (ret < 0) {
+ goto fail;
+ }
+ offset += name_size;
+ sn->name[name_size] = '\0';
+ }
+
+ s->snapshots_size = offset - s->snapshots_offset;
+ return 0;
+
+fail:
+ qcow2_free_snapshots(bs);
+ return ret;
+}
+
+/* add at the end of the file a new list of snapshots */
+static int qcow2_write_snapshots(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+ QCowSnapshot *sn;
+ QCowSnapshotHeader h;
+ QCowSnapshotExtraData extra;
+ int i, name_size, id_str_size, snapshots_size;
+ struct {
+ uint32_t nb_snapshots;
+ uint64_t snapshots_offset;
+ } QEMU_PACKED header_data;
+ int64_t offset, snapshots_offset;
+ int ret;
+
+ /* compute the size of the snapshots */
+ offset = 0;
+ for(i = 0; i < s->nb_snapshots; i++) {
+ sn = s->snapshots + i;
+ offset = align_offset(offset, 8);
+ offset += sizeof(h);
+ offset += sizeof(extra);
+ offset += strlen(sn->id_str);
+ offset += strlen(sn->name);
+ }
+ snapshots_size = offset;
+
+ /* Allocate space for the new snapshot list */
+ snapshots_offset = qcow2_alloc_clusters(bs, snapshots_size);
+ offset = snapshots_offset;
+ if (offset < 0) {
+ return offset;
+ }
+ ret = bdrv_flush(bs);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* Write all snapshots to the new list */
+ for(i = 0; i < s->nb_snapshots; i++) {
+ sn = s->snapshots + i;
+ memset(&h, 0, sizeof(h));
+ h.l1_table_offset = cpu_to_be64(sn->l1_table_offset);
+ h.l1_size = cpu_to_be32(sn->l1_size);
+ /* If it doesn't fit in 32 bit, older implementations should treat it
+ * as a disk-only snapshot rather than truncate the VM state */
+ if (sn->vm_state_size <= 0xffffffff) {
+ h.vm_state_size = cpu_to_be32(sn->vm_state_size);
+ }
+ h.date_sec = cpu_to_be32(sn->date_sec);
+ h.date_nsec = cpu_to_be32(sn->date_nsec);
+ h.vm_clock_nsec = cpu_to_be64(sn->vm_clock_nsec);
+ h.extra_data_size = cpu_to_be32(sizeof(extra));
+
+ memset(&extra, 0, sizeof(extra));
+ extra.vm_state_size_large = cpu_to_be64(sn->vm_state_size);
+ extra.disk_size = cpu_to_be64(sn->disk_size);
+
+ id_str_size = strlen(sn->id_str);
+ name_size = strlen(sn->name);
+ h.id_str_size = cpu_to_be16(id_str_size);
+ h.name_size = cpu_to_be16(name_size);
+ offset = align_offset(offset, 8);
+
+ ret = bdrv_pwrite(bs->file, offset, &h, sizeof(h));
+ if (ret < 0) {
+ goto fail;
+ }
+ offset += sizeof(h);
+
+ ret = bdrv_pwrite(bs->file, offset, &extra, sizeof(extra));
+ if (ret < 0) {
+ goto fail;
+ }
+ offset += sizeof(extra);
+
+ ret = bdrv_pwrite(bs->file, offset, sn->id_str, id_str_size);
+ if (ret < 0) {
+ goto fail;
+ }
+ offset += id_str_size;
+
+ ret = bdrv_pwrite(bs->file, offset, sn->name, name_size);
+ if (ret < 0) {
+ goto fail;
+ }
+ offset += name_size;
+ }
+
+ /*
+ * Update the header to point to the new snapshot table. This requires the
+ * new table and its refcounts to be stable on disk.
+ */
+ ret = bdrv_flush(bs);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ QEMU_BUILD_BUG_ON(offsetof(QCowHeader, snapshots_offset) !=
+ offsetof(QCowHeader, nb_snapshots) + sizeof(header_data.nb_snapshots));
+
+ header_data.nb_snapshots = cpu_to_be32(s->nb_snapshots);
+ header_data.snapshots_offset = cpu_to_be64(snapshots_offset);
+
+ ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
+ &header_data, sizeof(header_data));
+ if (ret < 0) {
+ goto fail;
+ }
+
+ /* free the old snapshot table */
+ qcow2_free_clusters(bs, s->snapshots_offset, s->snapshots_size,
+ QCOW2_DISCARD_SNAPSHOT);
+ s->snapshots_offset = snapshots_offset;
+ s->snapshots_size = snapshots_size;
+ return 0;
+
+fail:
+ return ret;
+}
+
+static void find_new_snapshot_id(BlockDriverState *bs,
+ char *id_str, int id_str_size)
+{
+ BDRVQcowState *s = bs->opaque;
+ QCowSnapshot *sn;
+ int i, id, id_max = 0;
+
+ for(i = 0; i < s->nb_snapshots; i++) {
+ sn = s->snapshots + i;
+ id = strtoul(sn->id_str, NULL, 10);
+ if (id > id_max)
+ id_max = id;
+ }
+ snprintf(id_str, id_str_size, "%d", id_max + 1);
+}
+
+static int find_snapshot_by_id(BlockDriverState *bs, const char *id_str)
+{
+ BDRVQcowState *s = bs->opaque;
+ int i;
+
+ for(i = 0; i < s->nb_snapshots; i++) {
+ if (!strcmp(s->snapshots[i].id_str, id_str))
+ return i;
+ }
+ return -1;
+}
+
+static int find_snapshot_by_id_or_name(BlockDriverState *bs, const char *name)
+{
+ BDRVQcowState *s = bs->opaque;
+ int i, ret;
+
+ ret = find_snapshot_by_id(bs, name);
+ if (ret >= 0)
+ return ret;
+ for(i = 0; i < s->nb_snapshots; i++) {
+ if (!strcmp(s->snapshots[i].name, name))
+ return i;
+ }
+ return -1;
+}
+
+/* if no id is provided, a new one is constructed */
+int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
+{
+ BDRVQcowState *s = bs->opaque;
+ QCowSnapshot *new_snapshot_list = NULL;
+ QCowSnapshot *old_snapshot_list = NULL;
+ QCowSnapshot sn1, *sn = &sn1;
+ int i, ret;
+ uint64_t *l1_table = NULL;
+ int64_t l1_table_offset;
+
+ memset(sn, 0, sizeof(*sn));
+
+ /* Generate an ID if it wasn't passed */
+ if (sn_info->id_str[0] == '\0') {
+ find_new_snapshot_id(bs, sn_info->id_str, sizeof(sn_info->id_str));
+ }
+
+ /* Check that the ID is unique */
+ if (find_snapshot_by_id(bs, sn_info->id_str) >= 0) {
+ return -EEXIST;
+ }
+
+ /* Populate sn with passed data */
+ sn->id_str = g_strdup(sn_info->id_str);
+ sn->name = g_strdup(sn_info->name);
+
+ sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
+ sn->vm_state_size = sn_info->vm_state_size;
+ sn->date_sec = sn_info->date_sec;
+ sn->date_nsec = sn_info->date_nsec;
+ sn->vm_clock_nsec = sn_info->vm_clock_nsec;
+
+ /* Allocate the L1 table of the snapshot and copy the current one there. */
+ l1_table_offset = qcow2_alloc_clusters(bs, s->l1_size * sizeof(uint64_t));
+ if (l1_table_offset < 0) {
+ ret = l1_table_offset;
+ goto fail;
+ }
+
+ sn->l1_table_offset = l1_table_offset;
+ sn->l1_size = s->l1_size;
+
+ l1_table = g_malloc(s->l1_size * sizeof(uint64_t));
+ for(i = 0; i < s->l1_size; i++) {
+ l1_table[i] = cpu_to_be64(s->l1_table[i]);
+ }
+
+ ret = bdrv_pwrite(bs->file, sn->l1_table_offset, l1_table,
+ s->l1_size * sizeof(uint64_t));
+ if (ret < 0) {
+ goto fail;
+ }
+
+ g_free(l1_table);
+ l1_table = NULL;
+
+ /*
+ * Increase the refcounts of all clusters and make sure everything is
+ * stable on disk before updating the snapshot table to contain a pointer
+ * to the new L1 table.
+ */
+ ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 1);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ /* Append the new snapshot to the snapshot list */
+ new_snapshot_list = g_malloc((s->nb_snapshots + 1) * sizeof(QCowSnapshot));
+ if (s->snapshots) {
+ memcpy(new_snapshot_list, s->snapshots,
+ s->nb_snapshots * sizeof(QCowSnapshot));
+ old_snapshot_list = s->snapshots;
+ }
+ s->snapshots = new_snapshot_list;
+ s->snapshots[s->nb_snapshots++] = *sn;
+
+ ret = qcow2_write_snapshots(bs);
+ if (ret < 0) {
+ g_free(s->snapshots);
+ s->snapshots = old_snapshot_list;
+ goto fail;
+ }
+
+ g_free(old_snapshot_list);
+
+#ifdef DEBUG_ALLOC
+ {
+ BdrvCheckResult result = {0};
+ qcow2_check_refcounts(bs, &result, 0);
+ }
+#endif
+ return 0;
+
+fail:
+ g_free(sn->id_str);
+ g_free(sn->name);
+ g_free(l1_table);
+
+ return ret;
+}
+
+/* copy the snapshot 'snapshot_name' into the current disk image */
+int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
+{
+ BDRVQcowState *s = bs->opaque;
+ QCowSnapshot *sn;
+ int i, snapshot_index;
+ int cur_l1_bytes, sn_l1_bytes;
+ int ret;
+ uint64_t *sn_l1_table = NULL;
+
+ /* Search the snapshot */
+ snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id);
+ if (snapshot_index < 0) {
+ return -ENOENT;
+ }
+ sn = &s->snapshots[snapshot_index];
+
+ if (sn->disk_size != bs->total_sectors * BDRV_SECTOR_SIZE) {
+ error_report("qcow2: Loading snapshots with different disk "
+ "size is not implemented");
+ ret = -ENOTSUP;
+ goto fail;
+ }
+
+ /*
+ * Make sure that the current L1 table is big enough to contain the whole
+ * L1 table of the snapshot. If the snapshot L1 table is smaller, the
+ * current one must be padded with zeros.
+ */
+ ret = qcow2_grow_l1_table(bs, sn->l1_size, true);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ cur_l1_bytes = s->l1_size * sizeof(uint64_t);
+ sn_l1_bytes = sn->l1_size * sizeof(uint64_t);
+
+ /*
+ * Copy the snapshot L1 table to the current L1 table.
+ *
+ * Before overwriting the old current L1 table on disk, make sure to
+ * increase all refcounts for the clusters referenced by the new one.
+ * Decrease the refcount referenced by the old one only when the L1
+ * table is overwritten.
+ */
+ sn_l1_table = g_malloc0(cur_l1_bytes);
+
+ ret = bdrv_pread(bs->file, sn->l1_table_offset, sn_l1_table, sn_l1_bytes);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ ret = qcow2_update_snapshot_refcount(bs, sn->l1_table_offset,
+ sn->l1_size, 1);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset, sn_l1_table,
+ cur_l1_bytes);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ /*
+ * Decrease refcount of clusters of current L1 table.
+ *
+ * At this point, the in-memory s->l1_table points to the old L1 table,
+ * whereas on disk we already have the new one.
+ *
+ * qcow2_update_snapshot_refcount special cases the current L1 table to use
+ * the in-memory data instead of really using the offset to load a new one,
+ * which is why this works.
+ */
+ ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset,
+ s->l1_size, -1);
+
+ /*
+ * Now update the in-memory L1 table to be in sync with the on-disk one. We
+ * need to do this even if updating refcounts failed.
+ */
+ for(i = 0;i < s->l1_size; i++) {
+ s->l1_table[i] = be64_to_cpu(sn_l1_table[i]);
+ }
+
+ if (ret < 0) {
+ goto fail;
+ }
+
+ g_free(sn_l1_table);
+ sn_l1_table = NULL;
+
+ /*
+ * Update QCOW_OFLAG_COPIED in the active L1 table (it may have changed
+ * when we decreased the refcount of the old snapshot.
+ */
+ ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
+ if (ret < 0) {
+ goto fail;
+ }
+
+#ifdef DEBUG_ALLOC
+ {
+ BdrvCheckResult result = {0};
+ qcow2_check_refcounts(bs, &result, 0);
+ }
+#endif
+ return 0;
+
+fail:
+ g_free(sn_l1_table);
+ return ret;
+}
+
+int qcow2_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
+{
+ BDRVQcowState *s = bs->opaque;
+ QCowSnapshot sn;
+ int snapshot_index, ret;
+
+ /* Search the snapshot */
+ snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id);
+ if (snapshot_index < 0) {
+ return -ENOENT;
+ }
+ sn = s->snapshots[snapshot_index];
+
+ /* Remove it from the snapshot list */
+ memmove(s->snapshots + snapshot_index,
+ s->snapshots + snapshot_index + 1,
+ (s->nb_snapshots - snapshot_index - 1) * sizeof(sn));
+ s->nb_snapshots--;
+ ret = qcow2_write_snapshots(bs);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /*
+ * The snapshot is now unused, clean up. If we fail after this point, we
+ * won't recover but just leak clusters.
+ */
+ g_free(sn.id_str);
+ g_free(sn.name);
+
+ /*
+ * Now decrease the refcounts of clusters referenced by the snapshot and
+ * free the L1 table.
+ */
+ ret = qcow2_update_snapshot_refcount(bs, sn.l1_table_offset,
+ sn.l1_size, -1);
+ if (ret < 0) {
+ return ret;
+ }
+ qcow2_free_clusters(bs, sn.l1_table_offset, sn.l1_size * sizeof(uint64_t),
+ QCOW2_DISCARD_SNAPSHOT);
+
+ /* must update the copied flag on the current cluster offsets */
+ ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
+ if (ret < 0) {
+ return ret;
+ }
+
+#ifdef DEBUG_ALLOC
+ {
+ BdrvCheckResult result = {0};
+ qcow2_check_refcounts(bs, &result, 0);
+ }
+#endif
+ return 0;
+}
+
+int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
+{
+ BDRVQcowState *s = bs->opaque;
+ QEMUSnapshotInfo *sn_tab, *sn_info;
+ QCowSnapshot *sn;
+ int i;
+
+ if (!s->nb_snapshots) {
+ *psn_tab = NULL;
+ return s->nb_snapshots;
+ }
+
+ sn_tab = g_malloc0(s->nb_snapshots * sizeof(QEMUSnapshotInfo));
+ for(i = 0; i < s->nb_snapshots; i++) {
+ sn_info = sn_tab + i;
+ sn = s->snapshots + i;
+ pstrcpy(sn_info->id_str, sizeof(sn_info->id_str),
+ sn->id_str);
+ pstrcpy(sn_info->name, sizeof(sn_info->name),
+ sn->name);
+ sn_info->vm_state_size = sn->vm_state_size;
+ sn_info->date_sec = sn->date_sec;
+ sn_info->date_nsec = sn->date_nsec;
+ sn_info->vm_clock_nsec = sn->vm_clock_nsec;
+ }
+ *psn_tab = sn_tab;
+ return s->nb_snapshots;
+}
+
+int qcow2_snapshot_load_tmp(BlockDriverState *bs, const char *snapshot_name)
+{
+ int i, snapshot_index;
+ BDRVQcowState *s = bs->opaque;
+ QCowSnapshot *sn;
+ uint64_t *new_l1_table;
+ int new_l1_bytes;
+ int ret;
+
+ assert(bs->read_only);
+
+ /* Search the snapshot */
+ snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_name);
+ if (snapshot_index < 0) {
+ return -ENOENT;
+ }
+ sn = &s->snapshots[snapshot_index];
+
+ /* Allocate and read in the snapshot's L1 table */
+ new_l1_bytes = s->l1_size * sizeof(uint64_t);
+ new_l1_table = g_malloc0(align_offset(new_l1_bytes, 512));
+
+ ret = bdrv_pread(bs->file, sn->l1_table_offset, new_l1_table, new_l1_bytes);
+ if (ret < 0) {
+ g_free(new_l1_table);
+ return ret;
+ }
+
+ /* Switch the L1 table */
+ g_free(s->l1_table);
+
+ s->l1_size = sn->l1_size;
+ s->l1_table_offset = sn->l1_table_offset;
+ s->l1_table = new_l1_table;
+
+ for(i = 0;i < s->l1_size; i++) {
+ be64_to_cpus(&s->l1_table[i]);
+ }
+
+ return 0;
+}
diff --git a/contrib/qemu/block/qcow2.c b/contrib/qemu/block/qcow2.c
new file mode 100644
index 000000000..0eceefe2c
--- /dev/null
+++ b/contrib/qemu/block/qcow2.c
@@ -0,0 +1,1825 @@
+/*
+ * Block driver for the QCOW version 2 format
+ *
+ * Copyright (c) 2004-2006 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu-common.h"
+#include "block/block_int.h"
+#include "qemu/module.h"
+#include <zlib.h>
+#include "qemu/aes.h"
+#include "block/qcow2.h"
+#include "qemu/error-report.h"
+#include "qapi/qmp/qerror.h"
+#include "qapi/qmp/qbool.h"
+#include "trace.h"
+
+/*
+ Differences with QCOW:
+
+ - Support for multiple incremental snapshots.
+ - Memory management by reference counts.
+ - Clusters which have a reference count of one have the bit
+ QCOW_OFLAG_COPIED to optimize write performance.
+ - Size of compressed clusters is stored in sectors to reduce bit usage
+ in the cluster offsets.
+ - Support for storing additional data (such as the VM state) in the
+ snapshots.
+ - If a backing store is used, the cluster size is not constrained
+ (could be backported to QCOW).
+ - L2 tables have always a size of one cluster.
+*/
+
+
+typedef struct {
+ uint32_t magic;
+ uint32_t len;
+} QCowExtension;
+
+#define QCOW2_EXT_MAGIC_END 0
+#define QCOW2_EXT_MAGIC_BACKING_FORMAT 0xE2792ACA
+#define QCOW2_EXT_MAGIC_FEATURE_TABLE 0x6803f857
+
+static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename)
+{
+ const QCowHeader *cow_header = (const void *)buf;
+
+ if (buf_size >= sizeof(QCowHeader) &&
+ be32_to_cpu(cow_header->magic) == QCOW_MAGIC &&
+ be32_to_cpu(cow_header->version) >= 2)
+ return 100;
+ else
+ return 0;
+}
+
+
+/*
+ * read qcow2 extension and fill bs
+ * start reading from start_offset
+ * finish reading upon magic of value 0 or when end_offset reached
+ * unknown magic is skipped (future extension this version knows nothing about)
+ * return 0 upon success, non-0 otherwise
+ */
+static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
+ uint64_t end_offset, void **p_feature_table)
+{
+ BDRVQcowState *s = bs->opaque;
+ QCowExtension ext;
+ uint64_t offset;
+ int ret;
+
+#ifdef DEBUG_EXT
+ printf("qcow2_read_extensions: start=%ld end=%ld\n", start_offset, end_offset);
+#endif
+ offset = start_offset;
+ while (offset < end_offset) {
+
+#ifdef DEBUG_EXT
+ /* Sanity check */
+ if (offset > s->cluster_size)
+ printf("qcow2_read_extension: suspicious offset %lu\n", offset);
+
+ printf("attempting to read extended header in offset %lu\n", offset);
+#endif
+
+ if (bdrv_pread(bs->file, offset, &ext, sizeof(ext)) != sizeof(ext)) {
+ fprintf(stderr, "qcow2_read_extension: ERROR: "
+ "pread fail from offset %" PRIu64 "\n",
+ offset);
+ return 1;
+ }
+ be32_to_cpus(&ext.magic);
+ be32_to_cpus(&ext.len);
+ offset += sizeof(ext);
+#ifdef DEBUG_EXT
+ printf("ext.magic = 0x%x\n", ext.magic);
+#endif
+ if (ext.len > end_offset - offset) {
+ error_report("Header extension too large");
+ return -EINVAL;
+ }
+
+ switch (ext.magic) {
+ case QCOW2_EXT_MAGIC_END:
+ return 0;
+
+ case QCOW2_EXT_MAGIC_BACKING_FORMAT:
+ if (ext.len >= sizeof(bs->backing_format)) {
+ fprintf(stderr, "ERROR: ext_backing_format: len=%u too large"
+ " (>=%zu)\n",
+ ext.len, sizeof(bs->backing_format));
+ return 2;
+ }
+ if (bdrv_pread(bs->file, offset , bs->backing_format,
+ ext.len) != ext.len)
+ return 3;
+ bs->backing_format[ext.len] = '\0';
+#ifdef DEBUG_EXT
+ printf("Qcow2: Got format extension %s\n", bs->backing_format);
+#endif
+ break;
+
+ case QCOW2_EXT_MAGIC_FEATURE_TABLE:
+ if (p_feature_table != NULL) {
+ void* feature_table = g_malloc0(ext.len + 2 * sizeof(Qcow2Feature));
+ ret = bdrv_pread(bs->file, offset , feature_table, ext.len);
+ if (ret < 0) {
+ return ret;
+ }
+
+ *p_feature_table = feature_table;
+ }
+ break;
+
+ default:
+ /* unknown magic - save it in case we need to rewrite the header */
+ {
+ Qcow2UnknownHeaderExtension *uext;
+
+ uext = g_malloc0(sizeof(*uext) + ext.len);
+ uext->magic = ext.magic;
+ uext->len = ext.len;
+ QLIST_INSERT_HEAD(&s->unknown_header_ext, uext, next);
+
+ ret = bdrv_pread(bs->file, offset , uext->data, uext->len);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+ break;
+ }
+
+ offset += ((ext.len + 7) & ~7);
+ }
+
+ return 0;
+}
+
+static void cleanup_unknown_header_ext(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+ Qcow2UnknownHeaderExtension *uext, *next;
+
+ QLIST_FOREACH_SAFE(uext, &s->unknown_header_ext, next, next) {
+ QLIST_REMOVE(uext, next);
+ g_free(uext);
+ }
+}
+
+static void GCC_FMT_ATTR(2, 3) report_unsupported(BlockDriverState *bs,
+ const char *fmt, ...)
+{
+ char msg[64];
+ va_list ap;
+
+ va_start(ap, fmt);
+ vsnprintf(msg, sizeof(msg), fmt, ap);
+ va_end(ap);
+
+ qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
+ bs->device_name, "qcow2", msg);
+}
+
+static void report_unsupported_feature(BlockDriverState *bs,
+ Qcow2Feature *table, uint64_t mask)
+{
+ while (table && table->name[0] != '\0') {
+ if (table->type == QCOW2_FEAT_TYPE_INCOMPATIBLE) {
+ if (mask & (1 << table->bit)) {
+ report_unsupported(bs, "%.46s",table->name);
+ mask &= ~(1 << table->bit);
+ }
+ }
+ table++;
+ }
+
+ if (mask) {
+ report_unsupported(bs, "Unknown incompatible feature: %" PRIx64, mask);
+ }
+}
+
+/*
+ * Sets the dirty bit and flushes afterwards if necessary.
+ *
+ * The incompatible_features bit is only set if the image file header was
+ * updated successfully. Therefore it is not required to check the return
+ * value of this function.
+ */
+int qcow2_mark_dirty(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+ uint64_t val;
+ int ret;
+
+ assert(s->qcow_version >= 3);
+
+ if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
+ return 0; /* already dirty */
+ }
+
+ val = cpu_to_be64(s->incompatible_features | QCOW2_INCOMPAT_DIRTY);
+ ret = bdrv_pwrite(bs->file, offsetof(QCowHeader, incompatible_features),
+ &val, sizeof(val));
+ if (ret < 0) {
+ return ret;
+ }
+ ret = bdrv_flush(bs->file);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* Only treat image as dirty if the header was updated successfully */
+ s->incompatible_features |= QCOW2_INCOMPAT_DIRTY;
+ return 0;
+}
+
+/*
+ * Clears the dirty bit and flushes before if necessary. Only call this
+ * function when there are no pending requests, it does not guard against
+ * concurrent requests dirtying the image.
+ */
+static int qcow2_mark_clean(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+
+ if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
+ int ret = bdrv_flush(bs);
+ if (ret < 0) {
+ return ret;
+ }
+
+ s->incompatible_features &= ~QCOW2_INCOMPAT_DIRTY;
+ return qcow2_update_header(bs);
+ }
+ return 0;
+}
+
+static int qcow2_check(BlockDriverState *bs, BdrvCheckResult *result,
+ BdrvCheckMode fix)
+{
+ int ret = qcow2_check_refcounts(bs, result, fix);
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (fix && result->check_errors == 0 && result->corruptions == 0) {
+ return qcow2_mark_clean(bs);
+ }
+ return ret;
+}
+
+static QemuOptsList qcow2_runtime_opts = {
+ .name = "qcow2",
+ .head = QTAILQ_HEAD_INITIALIZER(qcow2_runtime_opts.head),
+ .desc = {
+ {
+ .name = "lazy_refcounts",
+ .type = QEMU_OPT_BOOL,
+ .help = "Postpone refcount updates",
+ },
+ {
+ .name = QCOW2_OPT_DISCARD_REQUEST,
+ .type = QEMU_OPT_BOOL,
+ .help = "Pass guest discard requests to the layer below",
+ },
+ {
+ .name = QCOW2_OPT_DISCARD_SNAPSHOT,
+ .type = QEMU_OPT_BOOL,
+ .help = "Generate discard requests when snapshot related space "
+ "is freed",
+ },
+ {
+ .name = QCOW2_OPT_DISCARD_OTHER,
+ .type = QEMU_OPT_BOOL,
+ .help = "Generate discard requests when other clusters are freed",
+ },
+ { /* end of list */ }
+ },
+};
+
+static int qcow2_open(BlockDriverState *bs, QDict *options, int flags)
+{
+ BDRVQcowState *s = bs->opaque;
+ int len, i, ret = 0;
+ QCowHeader header;
+ QemuOpts *opts;
+ Error *local_err = NULL;
+ uint64_t ext_end;
+ uint64_t l1_vm_state_index;
+
+ ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
+ if (ret < 0) {
+ goto fail;
+ }
+ be32_to_cpus(&header.magic);
+ be32_to_cpus(&header.version);
+ be64_to_cpus(&header.backing_file_offset);
+ be32_to_cpus(&header.backing_file_size);
+ be64_to_cpus(&header.size);
+ be32_to_cpus(&header.cluster_bits);
+ be32_to_cpus(&header.crypt_method);
+ be64_to_cpus(&header.l1_table_offset);
+ be32_to_cpus(&header.l1_size);
+ be64_to_cpus(&header.refcount_table_offset);
+ be32_to_cpus(&header.refcount_table_clusters);
+ be64_to_cpus(&header.snapshots_offset);
+ be32_to_cpus(&header.nb_snapshots);
+
+ if (header.magic != QCOW_MAGIC) {
+ ret = -EMEDIUMTYPE;
+ goto fail;
+ }
+ if (header.version < 2 || header.version > 3) {
+ report_unsupported(bs, "QCOW version %d", header.version);
+ ret = -ENOTSUP;
+ goto fail;
+ }
+
+ s->qcow_version = header.version;
+
+ /* Initialise version 3 header fields */
+ if (header.version == 2) {
+ header.incompatible_features = 0;
+ header.compatible_features = 0;
+ header.autoclear_features = 0;
+ header.refcount_order = 4;
+ header.header_length = 72;
+ } else {
+ be64_to_cpus(&header.incompatible_features);
+ be64_to_cpus(&header.compatible_features);
+ be64_to_cpus(&header.autoclear_features);
+ be32_to_cpus(&header.refcount_order);
+ be32_to_cpus(&header.header_length);
+ }
+
+ if (header.header_length > sizeof(header)) {
+ s->unknown_header_fields_size = header.header_length - sizeof(header);
+ s->unknown_header_fields = g_malloc(s->unknown_header_fields_size);
+ ret = bdrv_pread(bs->file, sizeof(header), s->unknown_header_fields,
+ s->unknown_header_fields_size);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+
+ if (header.backing_file_offset) {
+ ext_end = header.backing_file_offset;
+ } else {
+ ext_end = 1 << header.cluster_bits;
+ }
+
+ /* Handle feature bits */
+ s->incompatible_features = header.incompatible_features;
+ s->compatible_features = header.compatible_features;
+ s->autoclear_features = header.autoclear_features;
+
+ if (s->incompatible_features & ~QCOW2_INCOMPAT_MASK) {
+ void *feature_table = NULL;
+ qcow2_read_extensions(bs, header.header_length, ext_end,
+ &feature_table);
+ report_unsupported_feature(bs, feature_table,
+ s->incompatible_features &
+ ~QCOW2_INCOMPAT_MASK);
+ ret = -ENOTSUP;
+ goto fail;
+ }
+
+ /* Check support for various header values */
+ if (header.refcount_order != 4) {
+ report_unsupported(bs, "%d bit reference counts",
+ 1 << header.refcount_order);
+ ret = -ENOTSUP;
+ goto fail;
+ }
+
+ if (header.cluster_bits < MIN_CLUSTER_BITS ||
+ header.cluster_bits > MAX_CLUSTER_BITS) {
+ ret = -EINVAL;
+ goto fail;
+ }
+ if (header.crypt_method > QCOW_CRYPT_AES) {
+ ret = -EINVAL;
+ goto fail;
+ }
+ s->crypt_method_header = header.crypt_method;
+ if (s->crypt_method_header) {
+ bs->encrypted = 1;
+ }
+ s->cluster_bits = header.cluster_bits;
+ s->cluster_size = 1 << s->cluster_bits;
+ s->cluster_sectors = 1 << (s->cluster_bits - 9);
+ s->l2_bits = s->cluster_bits - 3; /* L2 is always one cluster */
+ s->l2_size = 1 << s->l2_bits;
+ bs->total_sectors = header.size / 512;
+ s->csize_shift = (62 - (s->cluster_bits - 8));
+ s->csize_mask = (1 << (s->cluster_bits - 8)) - 1;
+ s->cluster_offset_mask = (1LL << s->csize_shift) - 1;
+ s->refcount_table_offset = header.refcount_table_offset;
+ s->refcount_table_size =
+ header.refcount_table_clusters << (s->cluster_bits - 3);
+
+ s->snapshots_offset = header.snapshots_offset;
+ s->nb_snapshots = header.nb_snapshots;
+
+ /* read the level 1 table */
+ s->l1_size = header.l1_size;
+
+ l1_vm_state_index = size_to_l1(s, header.size);
+ if (l1_vm_state_index > INT_MAX) {
+ ret = -EFBIG;
+ goto fail;
+ }
+ s->l1_vm_state_index = l1_vm_state_index;
+
+ /* the L1 table must contain at least enough entries to put
+ header.size bytes */
+ if (s->l1_size < s->l1_vm_state_index) {
+ ret = -EINVAL;
+ goto fail;
+ }
+ s->l1_table_offset = header.l1_table_offset;
+ if (s->l1_size > 0) {
+ s->l1_table = g_malloc0(
+ align_offset(s->l1_size * sizeof(uint64_t), 512));
+ ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
+ s->l1_size * sizeof(uint64_t));
+ if (ret < 0) {
+ goto fail;
+ }
+ for(i = 0;i < s->l1_size; i++) {
+ be64_to_cpus(&s->l1_table[i]);
+ }
+ }
+
+ /* alloc L2 table/refcount block cache */
+ s->l2_table_cache = qcow2_cache_create(bs, L2_CACHE_SIZE);
+ s->refcount_block_cache = qcow2_cache_create(bs, REFCOUNT_CACHE_SIZE);
+
+ s->cluster_cache = g_malloc(s->cluster_size);
+ /* one more sector for decompressed data alignment */
+ s->cluster_data = qemu_blockalign(bs, QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size
+ + 512);
+ s->cluster_cache_offset = -1;
+ s->flags = flags;
+
+ ret = qcow2_refcount_init(bs);
+ if (ret != 0) {
+ goto fail;
+ }
+
+ QLIST_INIT(&s->cluster_allocs);
+ QTAILQ_INIT(&s->discards);
+
+ /* read qcow2 extensions */
+ if (qcow2_read_extensions(bs, header.header_length, ext_end, NULL)) {
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ /* read the backing file name */
+ if (header.backing_file_offset != 0) {
+ len = header.backing_file_size;
+ if (len > 1023) {
+ len = 1023;
+ }
+ ret = bdrv_pread(bs->file, header.backing_file_offset,
+ bs->backing_file, len);
+ if (ret < 0) {
+ goto fail;
+ }
+ bs->backing_file[len] = '\0';
+ }
+
+ ret = qcow2_read_snapshots(bs);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ /* Clear unknown autoclear feature bits */
+ if (!bs->read_only && s->autoclear_features != 0) {
+ s->autoclear_features = 0;
+ ret = qcow2_update_header(bs);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+
+ /* Initialise locks */
+ qemu_co_mutex_init(&s->lock);
+
+ /* Repair image if dirty */
+ if (!(flags & BDRV_O_CHECK) && !bs->read_only &&
+ (s->incompatible_features & QCOW2_INCOMPAT_DIRTY)) {
+ BdrvCheckResult result = {0};
+
+ ret = qcow2_check(bs, &result, BDRV_FIX_ERRORS);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+
+ /* Enable lazy_refcounts according to image and command line options */
+ opts = qemu_opts_create_nofail(&qcow2_runtime_opts);
+ qemu_opts_absorb_qdict(opts, options, &local_err);
+ if (error_is_set(&local_err)) {
+ qerror_report_err(local_err);
+ error_free(local_err);
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ s->use_lazy_refcounts = qemu_opt_get_bool(opts, QCOW2_OPT_LAZY_REFCOUNTS,
+ (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS));
+
+ s->discard_passthrough[QCOW2_DISCARD_NEVER] = false;
+ s->discard_passthrough[QCOW2_DISCARD_ALWAYS] = true;
+ s->discard_passthrough[QCOW2_DISCARD_REQUEST] =
+ qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_REQUEST,
+ flags & BDRV_O_UNMAP);
+ s->discard_passthrough[QCOW2_DISCARD_SNAPSHOT] =
+ qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_SNAPSHOT, true);
+ s->discard_passthrough[QCOW2_DISCARD_OTHER] =
+ qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false);
+
+ qemu_opts_del(opts);
+
+ if (s->use_lazy_refcounts && s->qcow_version < 3) {
+ qerror_report(ERROR_CLASS_GENERIC_ERROR, "Lazy refcounts require "
+ "a qcow2 image with at least qemu 1.1 compatibility level");
+ ret = -EINVAL;
+ goto fail;
+ }
+
+#ifdef DEBUG_ALLOC
+ {
+ BdrvCheckResult result = {0};
+ qcow2_check_refcounts(bs, &result, 0);
+ }
+#endif
+ return ret;
+
+ fail:
+ g_free(s->unknown_header_fields);
+ cleanup_unknown_header_ext(bs);
+ qcow2_free_snapshots(bs);
+ qcow2_refcount_close(bs);
+ g_free(s->l1_table);
+ if (s->l2_table_cache) {
+ qcow2_cache_destroy(bs, s->l2_table_cache);
+ }
+ g_free(s->cluster_cache);
+ qemu_vfree(s->cluster_data);
+ return ret;
+}
+
+static int qcow2_set_key(BlockDriverState *bs, const char *key)
+{
+ BDRVQcowState *s = bs->opaque;
+ uint8_t keybuf[16];
+ int len, i;
+
+ memset(keybuf, 0, 16);
+ len = strlen(key);
+ if (len > 16)
+ len = 16;
+ /* XXX: we could compress the chars to 7 bits to increase
+ entropy */
+ for(i = 0;i < len;i++) {
+ keybuf[i] = key[i];
+ }
+ s->crypt_method = s->crypt_method_header;
+
+ if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0)
+ return -1;
+ if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0)
+ return -1;
+#if 0
+ /* test */
+ {
+ uint8_t in[16];
+ uint8_t out[16];
+ uint8_t tmp[16];
+ for(i=0;i<16;i++)
+ in[i] = i;
+ AES_encrypt(in, tmp, &s->aes_encrypt_key);
+ AES_decrypt(tmp, out, &s->aes_decrypt_key);
+ for(i = 0; i < 16; i++)
+ printf(" %02x", tmp[i]);
+ printf("\n");
+ for(i = 0; i < 16; i++)
+ printf(" %02x", out[i]);
+ printf("\n");
+ }
+#endif
+ return 0;
+}
+
+/* We have nothing to do for QCOW2 reopen, stubs just return
+ * success */
+static int qcow2_reopen_prepare(BDRVReopenState *state,
+ BlockReopenQueue *queue, Error **errp)
+{
+ return 0;
+}
+
+static int coroutine_fn qcow2_co_is_allocated(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, int *pnum)
+{
+ BDRVQcowState *s = bs->opaque;
+ uint64_t cluster_offset;
+ int ret;
+
+ *pnum = nb_sectors;
+ /* FIXME We can get errors here, but the bdrv_co_is_allocated interface
+ * can't pass them on today */
+ qemu_co_mutex_lock(&s->lock);
+ ret = qcow2_get_cluster_offset(bs, sector_num << 9, pnum, &cluster_offset);
+ qemu_co_mutex_unlock(&s->lock);
+ if (ret < 0) {
+ *pnum = 0;
+ }
+
+ return (cluster_offset != 0) || (ret == QCOW2_CLUSTER_ZERO);
+}
+
+/* handle reading after the end of the backing file */
+int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
+ int64_t sector_num, int nb_sectors)
+{
+ int n1;
+ if ((sector_num + nb_sectors) <= bs->total_sectors)
+ return nb_sectors;
+ if (sector_num >= bs->total_sectors)
+ n1 = 0;
+ else
+ n1 = bs->total_sectors - sector_num;
+
+ qemu_iovec_memset(qiov, 512 * n1, 0, 512 * (nb_sectors - n1));
+
+ return n1;
+}
+
+static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
+ int remaining_sectors, QEMUIOVector *qiov)
+{
+ BDRVQcowState *s = bs->opaque;
+ int index_in_cluster, n1;
+ int ret;
+ int cur_nr_sectors; /* number of sectors in current iteration */
+ uint64_t cluster_offset = 0;
+ uint64_t bytes_done = 0;
+ QEMUIOVector hd_qiov;
+ uint8_t *cluster_data = NULL;
+
+ qemu_iovec_init(&hd_qiov, qiov->niov);
+
+ qemu_co_mutex_lock(&s->lock);
+
+ while (remaining_sectors != 0) {
+
+ /* prepare next request */
+ cur_nr_sectors = remaining_sectors;
+ if (s->crypt_method) {
+ cur_nr_sectors = MIN(cur_nr_sectors,
+ QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors);
+ }
+
+ ret = qcow2_get_cluster_offset(bs, sector_num << 9,
+ &cur_nr_sectors, &cluster_offset);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ index_in_cluster = sector_num & (s->cluster_sectors - 1);
+
+ qemu_iovec_reset(&hd_qiov);
+ qemu_iovec_concat(&hd_qiov, qiov, bytes_done,
+ cur_nr_sectors * 512);
+
+ switch (ret) {
+ case QCOW2_CLUSTER_UNALLOCATED:
+
+ if (bs->backing_hd) {
+ /* read from the base image */
+ n1 = qcow2_backing_read1(bs->backing_hd, &hd_qiov,
+ sector_num, cur_nr_sectors);
+ if (n1 > 0) {
+ BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
+ qemu_co_mutex_unlock(&s->lock);
+ ret = bdrv_co_readv(bs->backing_hd, sector_num,
+ n1, &hd_qiov);
+ qemu_co_mutex_lock(&s->lock);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+ } else {
+ /* Note: in this case, no need to wait */
+ qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors);
+ }
+ break;
+
+ case QCOW2_CLUSTER_ZERO:
+ qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors);
+ break;
+
+ case QCOW2_CLUSTER_COMPRESSED:
+ /* add AIO support for compressed blocks ? */
+ ret = qcow2_decompress_cluster(bs, cluster_offset);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ qemu_iovec_from_buf(&hd_qiov, 0,
+ s->cluster_cache + index_in_cluster * 512,
+ 512 * cur_nr_sectors);
+ break;
+
+ case QCOW2_CLUSTER_NORMAL:
+ if ((cluster_offset & 511) != 0) {
+ ret = -EIO;
+ goto fail;
+ }
+
+ if (s->crypt_method) {
+ /*
+ * For encrypted images, read everything into a temporary
+ * contiguous buffer on which the AES functions can work.
+ */
+ if (!cluster_data) {
+ cluster_data =
+ qemu_blockalign(bs, QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
+ }
+
+ assert(cur_nr_sectors <=
+ QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors);
+ qemu_iovec_reset(&hd_qiov);
+ qemu_iovec_add(&hd_qiov, cluster_data,
+ 512 * cur_nr_sectors);
+ }
+
+ BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
+ qemu_co_mutex_unlock(&s->lock);
+ ret = bdrv_co_readv(bs->file,
+ (cluster_offset >> 9) + index_in_cluster,
+ cur_nr_sectors, &hd_qiov);
+ qemu_co_mutex_lock(&s->lock);
+ if (ret < 0) {
+ goto fail;
+ }
+ if (s->crypt_method) {
+ qcow2_encrypt_sectors(s, sector_num, cluster_data,
+ cluster_data, cur_nr_sectors, 0, &s->aes_decrypt_key);
+ qemu_iovec_from_buf(qiov, bytes_done,
+ cluster_data, 512 * cur_nr_sectors);
+ }
+ break;
+
+ default:
+ g_assert_not_reached();
+ ret = -EIO;
+ goto fail;
+ }
+
+ remaining_sectors -= cur_nr_sectors;
+ sector_num += cur_nr_sectors;
+ bytes_done += cur_nr_sectors * 512;
+ }
+ ret = 0;
+
+fail:
+ qemu_co_mutex_unlock(&s->lock);
+
+ qemu_iovec_destroy(&hd_qiov);
+ qemu_vfree(cluster_data);
+
+ return ret;
+}
+
+static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
+ int64_t sector_num,
+ int remaining_sectors,
+ QEMUIOVector *qiov)
+{
+ BDRVQcowState *s = bs->opaque;
+ int index_in_cluster;
+ int n_end;
+ int ret;
+ int cur_nr_sectors; /* number of sectors in current iteration */
+ uint64_t cluster_offset;
+ QEMUIOVector hd_qiov;
+ uint64_t bytes_done = 0;
+ uint8_t *cluster_data = NULL;
+ QCowL2Meta *l2meta = NULL;
+
+ trace_qcow2_writev_start_req(qemu_coroutine_self(), sector_num,
+ remaining_sectors);
+
+ qemu_iovec_init(&hd_qiov, qiov->niov);
+
+ s->cluster_cache_offset = -1; /* disable compressed cache */
+
+ qemu_co_mutex_lock(&s->lock);
+
+ while (remaining_sectors != 0) {
+
+ l2meta = NULL;
+
+ trace_qcow2_writev_start_part(qemu_coroutine_self());
+ index_in_cluster = sector_num & (s->cluster_sectors - 1);
+ n_end = index_in_cluster + remaining_sectors;
+ if (s->crypt_method &&
+ n_end > QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors) {
+ n_end = QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors;
+ }
+
+ ret = qcow2_alloc_cluster_offset(bs, sector_num << 9,
+ index_in_cluster, n_end, &cur_nr_sectors, &cluster_offset, &l2meta);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ assert((cluster_offset & 511) == 0);
+
+ qemu_iovec_reset(&hd_qiov);
+ qemu_iovec_concat(&hd_qiov, qiov, bytes_done,
+ cur_nr_sectors * 512);
+
+ if (s->crypt_method) {
+ if (!cluster_data) {
+ cluster_data = qemu_blockalign(bs, QCOW_MAX_CRYPT_CLUSTERS *
+ s->cluster_size);
+ }
+
+ assert(hd_qiov.size <=
+ QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
+ qemu_iovec_to_buf(&hd_qiov, 0, cluster_data, hd_qiov.size);
+
+ qcow2_encrypt_sectors(s, sector_num, cluster_data,
+ cluster_data, cur_nr_sectors, 1, &s->aes_encrypt_key);
+
+ qemu_iovec_reset(&hd_qiov);
+ qemu_iovec_add(&hd_qiov, cluster_data,
+ cur_nr_sectors * 512);
+ }
+
+ qemu_co_mutex_unlock(&s->lock);
+ BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
+ trace_qcow2_writev_data(qemu_coroutine_self(),
+ (cluster_offset >> 9) + index_in_cluster);
+ ret = bdrv_co_writev(bs->file,
+ (cluster_offset >> 9) + index_in_cluster,
+ cur_nr_sectors, &hd_qiov);
+ qemu_co_mutex_lock(&s->lock);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ while (l2meta != NULL) {
+ QCowL2Meta *next;
+
+ ret = qcow2_alloc_cluster_link_l2(bs, l2meta);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ /* Take the request off the list of running requests */
+ if (l2meta->nb_clusters != 0) {
+ QLIST_REMOVE(l2meta, next_in_flight);
+ }
+
+ qemu_co_queue_restart_all(&l2meta->dependent_requests);
+
+ next = l2meta->next;
+ g_free(l2meta);
+ l2meta = next;
+ }
+
+ remaining_sectors -= cur_nr_sectors;
+ sector_num += cur_nr_sectors;
+ bytes_done += cur_nr_sectors * 512;
+ trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_nr_sectors);
+ }
+ ret = 0;
+
+fail:
+ qemu_co_mutex_unlock(&s->lock);
+
+ while (l2meta != NULL) {
+ QCowL2Meta *next;
+
+ if (l2meta->nb_clusters != 0) {
+ QLIST_REMOVE(l2meta, next_in_flight);
+ }
+ qemu_co_queue_restart_all(&l2meta->dependent_requests);
+
+ next = l2meta->next;
+ g_free(l2meta);
+ l2meta = next;
+ }
+
+ qemu_iovec_destroy(&hd_qiov);
+ qemu_vfree(cluster_data);
+ trace_qcow2_writev_done_req(qemu_coroutine_self(), ret);
+
+ return ret;
+}
+
+static void qcow2_close(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+ g_free(s->l1_table);
+
+ qcow2_cache_flush(bs, s->l2_table_cache);
+ qcow2_cache_flush(bs, s->refcount_block_cache);
+
+ qcow2_mark_clean(bs);
+
+ qcow2_cache_destroy(bs, s->l2_table_cache);
+ qcow2_cache_destroy(bs, s->refcount_block_cache);
+
+ g_free(s->unknown_header_fields);
+ cleanup_unknown_header_ext(bs);
+
+ g_free(s->cluster_cache);
+ qemu_vfree(s->cluster_data);
+ qcow2_refcount_close(bs);
+ qcow2_free_snapshots(bs);
+}
+
+static void qcow2_invalidate_cache(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+ int flags = s->flags;
+ AES_KEY aes_encrypt_key;
+ AES_KEY aes_decrypt_key;
+ uint32_t crypt_method = 0;
+ QDict *options;
+
+ /*
+ * Backing files are read-only which makes all of their metadata immutable,
+ * that means we don't have to worry about reopening them here.
+ */
+
+ if (s->crypt_method) {
+ crypt_method = s->crypt_method;
+ memcpy(&aes_encrypt_key, &s->aes_encrypt_key, sizeof(aes_encrypt_key));
+ memcpy(&aes_decrypt_key, &s->aes_decrypt_key, sizeof(aes_decrypt_key));
+ }
+
+ qcow2_close(bs);
+
+ options = qdict_new();
+ qdict_put(options, QCOW2_OPT_LAZY_REFCOUNTS,
+ qbool_from_int(s->use_lazy_refcounts));
+
+ memset(s, 0, sizeof(BDRVQcowState));
+ qcow2_open(bs, options, flags);
+
+ QDECREF(options);
+
+ if (crypt_method) {
+ s->crypt_method = crypt_method;
+ memcpy(&s->aes_encrypt_key, &aes_encrypt_key, sizeof(aes_encrypt_key));
+ memcpy(&s->aes_decrypt_key, &aes_decrypt_key, sizeof(aes_decrypt_key));
+ }
+}
+
+static size_t header_ext_add(char *buf, uint32_t magic, const void *s,
+ size_t len, size_t buflen)
+{
+ QCowExtension *ext_backing_fmt = (QCowExtension*) buf;
+ size_t ext_len = sizeof(QCowExtension) + ((len + 7) & ~7);
+
+ if (buflen < ext_len) {
+ return -ENOSPC;
+ }
+
+ *ext_backing_fmt = (QCowExtension) {
+ .magic = cpu_to_be32(magic),
+ .len = cpu_to_be32(len),
+ };
+ memcpy(buf + sizeof(QCowExtension), s, len);
+
+ return ext_len;
+}
+
+/*
+ * Updates the qcow2 header, including the variable length parts of it, i.e.
+ * the backing file name and all extensions. qcow2 was not designed to allow
+ * such changes, so if we run out of space (we can only use the first cluster)
+ * this function may fail.
+ *
+ * Returns 0 on success, -errno in error cases.
+ */
+int qcow2_update_header(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+ QCowHeader *header;
+ char *buf;
+ size_t buflen = s->cluster_size;
+ int ret;
+ uint64_t total_size;
+ uint32_t refcount_table_clusters;
+ size_t header_length;
+ Qcow2UnknownHeaderExtension *uext;
+
+ buf = qemu_blockalign(bs, buflen);
+
+ /* Header structure */
+ header = (QCowHeader*) buf;
+
+ if (buflen < sizeof(*header)) {
+ ret = -ENOSPC;
+ goto fail;
+ }
+
+ header_length = sizeof(*header) + s->unknown_header_fields_size;
+ total_size = bs->total_sectors * BDRV_SECTOR_SIZE;
+ refcount_table_clusters = s->refcount_table_size >> (s->cluster_bits - 3);
+
+ *header = (QCowHeader) {
+ /* Version 2 fields */
+ .magic = cpu_to_be32(QCOW_MAGIC),
+ .version = cpu_to_be32(s->qcow_version),
+ .backing_file_offset = 0,
+ .backing_file_size = 0,
+ .cluster_bits = cpu_to_be32(s->cluster_bits),
+ .size = cpu_to_be64(total_size),
+ .crypt_method = cpu_to_be32(s->crypt_method_header),
+ .l1_size = cpu_to_be32(s->l1_size),
+ .l1_table_offset = cpu_to_be64(s->l1_table_offset),
+ .refcount_table_offset = cpu_to_be64(s->refcount_table_offset),
+ .refcount_table_clusters = cpu_to_be32(refcount_table_clusters),
+ .nb_snapshots = cpu_to_be32(s->nb_snapshots),
+ .snapshots_offset = cpu_to_be64(s->snapshots_offset),
+
+ /* Version 3 fields */
+ .incompatible_features = cpu_to_be64(s->incompatible_features),
+ .compatible_features = cpu_to_be64(s->compatible_features),
+ .autoclear_features = cpu_to_be64(s->autoclear_features),
+ .refcount_order = cpu_to_be32(3 + REFCOUNT_SHIFT),
+ .header_length = cpu_to_be32(header_length),
+ };
+
+ /* For older versions, write a shorter header */
+ switch (s->qcow_version) {
+ case 2:
+ ret = offsetof(QCowHeader, incompatible_features);
+ break;
+ case 3:
+ ret = sizeof(*header);
+ break;
+ default:
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ buf += ret;
+ buflen -= ret;
+ memset(buf, 0, buflen);
+
+ /* Preserve any unknown field in the header */
+ if (s->unknown_header_fields_size) {
+ if (buflen < s->unknown_header_fields_size) {
+ ret = -ENOSPC;
+ goto fail;
+ }
+
+ memcpy(buf, s->unknown_header_fields, s->unknown_header_fields_size);
+ buf += s->unknown_header_fields_size;
+ buflen -= s->unknown_header_fields_size;
+ }
+
+ /* Backing file format header extension */
+ if (*bs->backing_format) {
+ ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BACKING_FORMAT,
+ bs->backing_format, strlen(bs->backing_format),
+ buflen);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ buf += ret;
+ buflen -= ret;
+ }
+
+ /* Feature table */
+ Qcow2Feature features[] = {
+ {
+ .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
+ .bit = QCOW2_INCOMPAT_DIRTY_BITNR,
+ .name = "dirty bit",
+ },
+ {
+ .type = QCOW2_FEAT_TYPE_COMPATIBLE,
+ .bit = QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR,
+ .name = "lazy refcounts",
+ },
+ };
+
+ ret = header_ext_add(buf, QCOW2_EXT_MAGIC_FEATURE_TABLE,
+ features, sizeof(features), buflen);
+ if (ret < 0) {
+ goto fail;
+ }
+ buf += ret;
+ buflen -= ret;
+
+ /* Keep unknown header extensions */
+ QLIST_FOREACH(uext, &s->unknown_header_ext, next) {
+ ret = header_ext_add(buf, uext->magic, uext->data, uext->len, buflen);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ buf += ret;
+ buflen -= ret;
+ }
+
+ /* End of header extensions */
+ ret = header_ext_add(buf, QCOW2_EXT_MAGIC_END, NULL, 0, buflen);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ buf += ret;
+ buflen -= ret;
+
+ /* Backing file name */
+ if (*bs->backing_file) {
+ size_t backing_file_len = strlen(bs->backing_file);
+
+ if (buflen < backing_file_len) {
+ ret = -ENOSPC;
+ goto fail;
+ }
+
+ /* Using strncpy is ok here, since buf is not NUL-terminated. */
+ strncpy(buf, bs->backing_file, buflen);
+
+ header->backing_file_offset = cpu_to_be64(buf - ((char*) header));
+ header->backing_file_size = cpu_to_be32(backing_file_len);
+ }
+
+ /* Write the new header */
+ ret = bdrv_pwrite(bs->file, 0, header, s->cluster_size);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ ret = 0;
+fail:
+ qemu_vfree(header);
+ return ret;
+}
+
+static int qcow2_change_backing_file(BlockDriverState *bs,
+ const char *backing_file, const char *backing_fmt)
+{
+ pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
+ pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
+
+ return qcow2_update_header(bs);
+}
+
+static int preallocate(BlockDriverState *bs)
+{
+ uint64_t nb_sectors;
+ uint64_t offset;
+ uint64_t host_offset = 0;
+ int num;
+ int ret;
+ QCowL2Meta *meta;
+
+ nb_sectors = bdrv_getlength(bs) >> 9;
+ offset = 0;
+
+ while (nb_sectors) {
+ num = MIN(nb_sectors, INT_MAX >> 9);
+ ret = qcow2_alloc_cluster_offset(bs, offset, 0, num, &num,
+ &host_offset, &meta);
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = qcow2_alloc_cluster_link_l2(bs, meta);
+ if (ret < 0) {
+ qcow2_free_any_clusters(bs, meta->alloc_offset, meta->nb_clusters,
+ QCOW2_DISCARD_NEVER);
+ return ret;
+ }
+
+ /* There are no dependent requests, but we need to remove our request
+ * from the list of in-flight requests */
+ if (meta != NULL) {
+ QLIST_REMOVE(meta, next_in_flight);
+ }
+
+ /* TODO Preallocate data if requested */
+
+ nb_sectors -= num;
+ offset += num << 9;
+ }
+
+ /*
+ * It is expected that the image file is large enough to actually contain
+ * all of the allocated clusters (otherwise we get failing reads after
+ * EOF). Extend the image to the last allocated sector.
+ */
+ if (host_offset != 0) {
+ uint8_t buf[512];
+ memset(buf, 0, 512);
+ ret = bdrv_write(bs->file, (host_offset >> 9) + num - 1, buf, 1);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static int qcow2_create2(const char *filename, int64_t total_size,
+ const char *backing_file, const char *backing_format,
+ int flags, size_t cluster_size, int prealloc,
+ QEMUOptionParameter *options, int version)
+{
+ /* Calculate cluster_bits */
+ int cluster_bits;
+ cluster_bits = ffs(cluster_size) - 1;
+ if (cluster_bits < MIN_CLUSTER_BITS || cluster_bits > MAX_CLUSTER_BITS ||
+ (1 << cluster_bits) != cluster_size)
+ {
+ error_report(
+ "Cluster size must be a power of two between %d and %dk",
+ 1 << MIN_CLUSTER_BITS, 1 << (MAX_CLUSTER_BITS - 10));
+ return -EINVAL;
+ }
+
+ /*
+ * Open the image file and write a minimal qcow2 header.
+ *
+ * We keep things simple and start with a zero-sized image. We also
+ * do without refcount blocks or a L1 table for now. We'll fix the
+ * inconsistency later.
+ *
+ * We do need a refcount table because growing the refcount table means
+ * allocating two new refcount blocks - the seconds of which would be at
+ * 2 GB for 64k clusters, and we don't want to have a 2 GB initial file
+ * size for any qcow2 image.
+ */
+ BlockDriverState* bs;
+ QCowHeader header;
+ uint8_t* refcount_table;
+ int ret;
+
+ ret = bdrv_create_file(filename, options);
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* Write the header */
+ memset(&header, 0, sizeof(header));
+ header.magic = cpu_to_be32(QCOW_MAGIC);
+ header.version = cpu_to_be32(version);
+ header.cluster_bits = cpu_to_be32(cluster_bits);
+ header.size = cpu_to_be64(0);
+ header.l1_table_offset = cpu_to_be64(0);
+ header.l1_size = cpu_to_be32(0);
+ header.refcount_table_offset = cpu_to_be64(cluster_size);
+ header.refcount_table_clusters = cpu_to_be32(1);
+ header.refcount_order = cpu_to_be32(3 + REFCOUNT_SHIFT);
+ header.header_length = cpu_to_be32(sizeof(header));
+
+ if (flags & BLOCK_FLAG_ENCRYPT) {
+ header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES);
+ } else {
+ header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
+ }
+
+ if (flags & BLOCK_FLAG_LAZY_REFCOUNTS) {
+ header.compatible_features |=
+ cpu_to_be64(QCOW2_COMPAT_LAZY_REFCOUNTS);
+ }
+
+ ret = bdrv_pwrite(bs, 0, &header, sizeof(header));
+ if (ret < 0) {
+ goto out;
+ }
+
+ /* Write an empty refcount table */
+ refcount_table = g_malloc0(cluster_size);
+ ret = bdrv_pwrite(bs, cluster_size, refcount_table, cluster_size);
+ g_free(refcount_table);
+
+ if (ret < 0) {
+ goto out;
+ }
+
+ bdrv_close(bs);
+
+ /*
+ * And now open the image and make it consistent first (i.e. increase the
+ * refcount of the cluster that is occupied by the header and the refcount
+ * table)
+ */
+ BlockDriver* drv = bdrv_find_format("qcow2");
+ assert(drv != NULL);
+ ret = bdrv_open(bs, filename, NULL,
+ BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH, drv);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = qcow2_alloc_clusters(bs, 2 * cluster_size);
+ if (ret < 0) {
+ goto out;
+
+ } else if (ret != 0) {
+ error_report("Huh, first cluster in empty image is already in use?");
+ abort();
+ }
+
+ /* Okay, now that we have a valid image, let's give it the right size */
+ ret = bdrv_truncate(bs, total_size * BDRV_SECTOR_SIZE);
+ if (ret < 0) {
+ goto out;
+ }
+
+ /* Want a backing file? There you go.*/
+ if (backing_file) {
+ ret = bdrv_change_backing_file(bs, backing_file, backing_format);
+ if (ret < 0) {
+ goto out;
+ }
+ }
+
+ /* And if we're supposed to preallocate metadata, do that now */
+ if (prealloc) {
+ BDRVQcowState *s = bs->opaque;
+ qemu_co_mutex_lock(&s->lock);
+ ret = preallocate(bs);
+ qemu_co_mutex_unlock(&s->lock);
+ if (ret < 0) {
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ bdrv_delete(bs);
+ return ret;
+}
+
+static int qcow2_create(const char *filename, QEMUOptionParameter *options)
+{
+ const char *backing_file = NULL;
+ const char *backing_fmt = NULL;
+ uint64_t sectors = 0;
+ int flags = 0;
+ size_t cluster_size = DEFAULT_CLUSTER_SIZE;
+ int prealloc = 0;
+ int version = 2;
+
+ /* Read out options */
+ while (options && options->name) {
+ if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
+ sectors = options->value.n / 512;
+ } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
+ backing_file = options->value.s;
+ } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FMT)) {
+ backing_fmt = options->value.s;
+ } else if (!strcmp(options->name, BLOCK_OPT_ENCRYPT)) {
+ flags |= options->value.n ? BLOCK_FLAG_ENCRYPT : 0;
+ } else if (!strcmp(options->name, BLOCK_OPT_CLUSTER_SIZE)) {
+ if (options->value.n) {
+ cluster_size = options->value.n;
+ }
+ } else if (!strcmp(options->name, BLOCK_OPT_PREALLOC)) {
+ if (!options->value.s || !strcmp(options->value.s, "off")) {
+ prealloc = 0;
+ } else if (!strcmp(options->value.s, "metadata")) {
+ prealloc = 1;
+ } else {
+ fprintf(stderr, "Invalid preallocation mode: '%s'\n",
+ options->value.s);
+ return -EINVAL;
+ }
+ } else if (!strcmp(options->name, BLOCK_OPT_COMPAT_LEVEL)) {
+ if (!options->value.s || !strcmp(options->value.s, "0.10")) {
+ version = 2;
+ } else if (!strcmp(options->value.s, "1.1")) {
+ version = 3;
+ } else {
+ fprintf(stderr, "Invalid compatibility level: '%s'\n",
+ options->value.s);
+ return -EINVAL;
+ }
+ } else if (!strcmp(options->name, BLOCK_OPT_LAZY_REFCOUNTS)) {
+ flags |= options->value.n ? BLOCK_FLAG_LAZY_REFCOUNTS : 0;
+ }
+ options++;
+ }
+
+ if (backing_file && prealloc) {
+ fprintf(stderr, "Backing file and preallocation cannot be used at "
+ "the same time\n");
+ return -EINVAL;
+ }
+
+ if (version < 3 && (flags & BLOCK_FLAG_LAZY_REFCOUNTS)) {
+ fprintf(stderr, "Lazy refcounts only supported with compatibility "
+ "level 1.1 and above (use compat=1.1 or greater)\n");
+ return -EINVAL;
+ }
+
+ return qcow2_create2(filename, sectors, backing_file, backing_fmt, flags,
+ cluster_size, prealloc, options, version);
+}
+
+static int qcow2_make_empty(BlockDriverState *bs)
+{
+#if 0
+ /* XXX: not correct */
+ BDRVQcowState *s = bs->opaque;
+ uint32_t l1_length = s->l1_size * sizeof(uint64_t);
+ int ret;
+
+ memset(s->l1_table, 0, l1_length);
+ if (bdrv_pwrite(bs->file, s->l1_table_offset, s->l1_table, l1_length) < 0)
+ return -1;
+ ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length);
+ if (ret < 0)
+ return ret;
+
+ l2_cache_reset(bs);
+#endif
+ return 0;
+}
+
+static coroutine_fn int qcow2_co_write_zeroes(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors)
+{
+ int ret;
+ BDRVQcowState *s = bs->opaque;
+
+ /* Emulate misaligned zero writes */
+ if (sector_num % s->cluster_sectors || nb_sectors % s->cluster_sectors) {
+ return -ENOTSUP;
+ }
+
+ /* Whatever is left can use real zero clusters */
+ qemu_co_mutex_lock(&s->lock);
+ ret = qcow2_zero_clusters(bs, sector_num << BDRV_SECTOR_BITS,
+ nb_sectors);
+ qemu_co_mutex_unlock(&s->lock);
+
+ return ret;
+}
+
+static coroutine_fn int qcow2_co_discard(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors)
+{
+ int ret;
+ BDRVQcowState *s = bs->opaque;
+
+ qemu_co_mutex_lock(&s->lock);
+ ret = qcow2_discard_clusters(bs, sector_num << BDRV_SECTOR_BITS,
+ nb_sectors);
+ qemu_co_mutex_unlock(&s->lock);
+ return ret;
+}
+
+static int qcow2_truncate(BlockDriverState *bs, int64_t offset)
+{
+ BDRVQcowState *s = bs->opaque;
+ int64_t new_l1_size;
+ int ret;
+
+ if (offset & 511) {
+ error_report("The new size must be a multiple of 512");
+ return -EINVAL;
+ }
+
+ /* cannot proceed if image has snapshots */
+ if (s->nb_snapshots) {
+ error_report("Can't resize an image which has snapshots");
+ return -ENOTSUP;
+ }
+
+ /* shrinking is currently not supported */
+ if (offset < bs->total_sectors * 512) {
+ error_report("qcow2 doesn't support shrinking images yet");
+ return -ENOTSUP;
+ }
+
+ new_l1_size = size_to_l1(s, offset);
+ ret = qcow2_grow_l1_table(bs, new_l1_size, true);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* write updated header.size */
+ offset = cpu_to_be64(offset);
+ ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, size),
+ &offset, sizeof(uint64_t));
+ if (ret < 0) {
+ return ret;
+ }
+
+ s->l1_vm_state_index = new_l1_size;
+ return 0;
+}
+
+/* XXX: put compressed sectors first, then all the cluster aligned
+ tables to avoid losing bytes in alignment */
+static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,
+ const uint8_t *buf, int nb_sectors)
+{
+ BDRVQcowState *s = bs->opaque;
+ z_stream strm;
+ int ret, out_len;
+ uint8_t *out_buf;
+ uint64_t cluster_offset;
+
+ if (nb_sectors == 0) {
+ /* align end of file to a sector boundary to ease reading with
+ sector based I/Os */
+ cluster_offset = bdrv_getlength(bs->file);
+ cluster_offset = (cluster_offset + 511) & ~511;
+ bdrv_truncate(bs->file, cluster_offset);
+ return 0;
+ }
+
+ if (nb_sectors != s->cluster_sectors) {
+ ret = -EINVAL;
+
+ /* Zero-pad last write if image size is not cluster aligned */
+ if (sector_num + nb_sectors == bs->total_sectors &&
+ nb_sectors < s->cluster_sectors) {
+ uint8_t *pad_buf = qemu_blockalign(bs, s->cluster_size);
+ memset(pad_buf, 0, s->cluster_size);
+ memcpy(pad_buf, buf, nb_sectors * BDRV_SECTOR_SIZE);
+ ret = qcow2_write_compressed(bs, sector_num,
+ pad_buf, s->cluster_sectors);
+ qemu_vfree(pad_buf);
+ }
+ return ret;
+ }
+
+ out_buf = g_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
+
+ /* best compression, small window, no zlib header */
+ memset(&strm, 0, sizeof(strm));
+ ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
+ Z_DEFLATED, -12,
+ 9, Z_DEFAULT_STRATEGY);
+ if (ret != 0) {
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ strm.avail_in = s->cluster_size;
+ strm.next_in = (uint8_t *)buf;
+ strm.avail_out = s->cluster_size;
+ strm.next_out = out_buf;
+
+ ret = deflate(&strm, Z_FINISH);
+ if (ret != Z_STREAM_END && ret != Z_OK) {
+ deflateEnd(&strm);
+ ret = -EINVAL;
+ goto fail;
+ }
+ out_len = strm.next_out - out_buf;
+
+ deflateEnd(&strm);
+
+ if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
+ /* could not compress: write normal cluster */
+ ret = bdrv_write(bs, sector_num, buf, s->cluster_sectors);
+ if (ret < 0) {
+ goto fail;
+ }
+ } else {
+ cluster_offset = qcow2_alloc_compressed_cluster_offset(bs,
+ sector_num << 9, out_len);
+ if (!cluster_offset) {
+ ret = -EIO;
+ goto fail;
+ }
+ cluster_offset &= s->cluster_offset_mask;
+ BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED);
+ ret = bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+
+ ret = 0;
+fail:
+ g_free(out_buf);
+ return ret;
+}
+
+static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+ int ret;
+
+ qemu_co_mutex_lock(&s->lock);
+ ret = qcow2_cache_flush(bs, s->l2_table_cache);
+ if (ret < 0) {
+ qemu_co_mutex_unlock(&s->lock);
+ return ret;
+ }
+
+ if (qcow2_need_accurate_refcounts(s)) {
+ ret = qcow2_cache_flush(bs, s->refcount_block_cache);
+ if (ret < 0) {
+ qemu_co_mutex_unlock(&s->lock);
+ return ret;
+ }
+ }
+ qemu_co_mutex_unlock(&s->lock);
+
+ return 0;
+}
+
+static int64_t qcow2_vm_state_offset(BDRVQcowState *s)
+{
+ return (int64_t)s->l1_vm_state_index << (s->cluster_bits + s->l2_bits);
+}
+
+static int qcow2_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+{
+ BDRVQcowState *s = bs->opaque;
+ bdi->cluster_size = s->cluster_size;
+ bdi->vm_state_offset = qcow2_vm_state_offset(s);
+ return 0;
+}
+
+#if 0
+static void dump_refcounts(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+ int64_t nb_clusters, k, k1, size;
+ int refcount;
+
+ size = bdrv_getlength(bs->file);
+ nb_clusters = size_to_clusters(s, size);
+ for(k = 0; k < nb_clusters;) {
+ k1 = k;
+ refcount = get_refcount(bs, k);
+ k++;
+ while (k < nb_clusters && get_refcount(bs, k) == refcount)
+ k++;
+ printf("%" PRId64 ": refcount=%d nb=%" PRId64 "\n", k, refcount,
+ k - k1);
+ }
+}
+#endif
+
+static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
+ int64_t pos)
+{
+ BDRVQcowState *s = bs->opaque;
+ int growable = bs->growable;
+ int ret;
+
+ BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE);
+ bs->growable = 1;
+ ret = bdrv_pwritev(bs, qcow2_vm_state_offset(s) + pos, qiov);
+ bs->growable = growable;
+
+ return ret;
+}
+
+static int qcow2_load_vmstate(BlockDriverState *bs, uint8_t *buf,
+ int64_t pos, int size)
+{
+ BDRVQcowState *s = bs->opaque;
+ int growable = bs->growable;
+ int ret;
+
+ BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD);
+ bs->growable = 1;
+ ret = bdrv_pread(bs, qcow2_vm_state_offset(s) + pos, buf, size);
+ bs->growable = growable;
+
+ return ret;
+}
+
+static QEMUOptionParameter qcow2_create_options[] = {
+ {
+ .name = BLOCK_OPT_SIZE,
+ .type = OPT_SIZE,
+ .help = "Virtual disk size"
+ },
+ {
+ .name = BLOCK_OPT_COMPAT_LEVEL,
+ .type = OPT_STRING,
+ .help = "Compatibility level (0.10 or 1.1)"
+ },
+ {
+ .name = BLOCK_OPT_BACKING_FILE,
+ .type = OPT_STRING,
+ .help = "File name of a base image"
+ },
+ {
+ .name = BLOCK_OPT_BACKING_FMT,
+ .type = OPT_STRING,
+ .help = "Image format of the base image"
+ },
+ {
+ .name = BLOCK_OPT_ENCRYPT,
+ .type = OPT_FLAG,
+ .help = "Encrypt the image"
+ },
+ {
+ .name = BLOCK_OPT_CLUSTER_SIZE,
+ .type = OPT_SIZE,
+ .help = "qcow2 cluster size",
+ .value = { .n = DEFAULT_CLUSTER_SIZE },
+ },
+ {
+ .name = BLOCK_OPT_PREALLOC,
+ .type = OPT_STRING,
+ .help = "Preallocation mode (allowed values: off, metadata)"
+ },
+ {
+ .name = BLOCK_OPT_LAZY_REFCOUNTS,
+ .type = OPT_FLAG,
+ .help = "Postpone refcount updates",
+ },
+ { NULL }
+};
+
+static BlockDriver bdrv_qcow2 = {
+ .format_name = "qcow2",
+ .instance_size = sizeof(BDRVQcowState),
+ .bdrv_probe = qcow2_probe,
+ .bdrv_open = qcow2_open,
+ .bdrv_close = qcow2_close,
+ .bdrv_reopen_prepare = qcow2_reopen_prepare,
+ .bdrv_create = qcow2_create,
+ .bdrv_has_zero_init = bdrv_has_zero_init_1,
+ .bdrv_co_is_allocated = qcow2_co_is_allocated,
+ .bdrv_set_key = qcow2_set_key,
+ .bdrv_make_empty = qcow2_make_empty,
+
+ .bdrv_co_readv = qcow2_co_readv,
+ .bdrv_co_writev = qcow2_co_writev,
+ .bdrv_co_flush_to_os = qcow2_co_flush_to_os,
+
+ .bdrv_co_write_zeroes = qcow2_co_write_zeroes,
+ .bdrv_co_discard = qcow2_co_discard,
+ .bdrv_truncate = qcow2_truncate,
+ .bdrv_write_compressed = qcow2_write_compressed,
+
+ .bdrv_snapshot_create = qcow2_snapshot_create,
+ .bdrv_snapshot_goto = qcow2_snapshot_goto,
+ .bdrv_snapshot_delete = qcow2_snapshot_delete,
+ .bdrv_snapshot_list = qcow2_snapshot_list,
+ .bdrv_snapshot_load_tmp = qcow2_snapshot_load_tmp,
+ .bdrv_get_info = qcow2_get_info,
+
+ .bdrv_save_vmstate = qcow2_save_vmstate,
+ .bdrv_load_vmstate = qcow2_load_vmstate,
+
+ .bdrv_change_backing_file = qcow2_change_backing_file,
+
+ .bdrv_invalidate_cache = qcow2_invalidate_cache,
+
+ .create_options = qcow2_create_options,
+ .bdrv_check = qcow2_check,
+};
+
+static void bdrv_qcow2_init(void)
+{
+ bdrv_register(&bdrv_qcow2);
+}
+
+block_init(bdrv_qcow2_init);
diff --git a/contrib/qemu/block/qcow2.h b/contrib/qemu/block/qcow2.h
new file mode 100644
index 000000000..3b2d5cda7
--- /dev/null
+++ b/contrib/qemu/block/qcow2.h
@@ -0,0 +1,437 @@
+/*
+ * Block driver for the QCOW version 2 format
+ *
+ * Copyright (c) 2004-2006 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef BLOCK_QCOW2_H
+#define BLOCK_QCOW2_H
+
+#include "qemu/aes.h"
+#include "block/coroutine.h"
+
+//#define DEBUG_ALLOC
+//#define DEBUG_ALLOC2
+//#define DEBUG_EXT
+
+#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb)
+
+#define QCOW_CRYPT_NONE 0
+#define QCOW_CRYPT_AES 1
+
+#define QCOW_MAX_CRYPT_CLUSTERS 32
+
+/* indicate that the refcount of the referenced cluster is exactly one. */
+#define QCOW_OFLAG_COPIED (1LL << 63)
+/* indicate that the cluster is compressed (they never have the copied flag) */
+#define QCOW_OFLAG_COMPRESSED (1LL << 62)
+/* The cluster reads as all zeros */
+#define QCOW_OFLAG_ZERO (1LL << 0)
+
+#define REFCOUNT_SHIFT 1 /* refcount size is 2 bytes */
+
+#define MIN_CLUSTER_BITS 9
+#define MAX_CLUSTER_BITS 21
+
+#define L2_CACHE_SIZE 16
+
+/* Must be at least 4 to cover all cases of refcount table growth */
+#define REFCOUNT_CACHE_SIZE 4
+
+#define DEFAULT_CLUSTER_SIZE 65536
+
+
+#define QCOW2_OPT_LAZY_REFCOUNTS "lazy_refcounts"
+#define QCOW2_OPT_DISCARD_REQUEST "pass_discard_request"
+#define QCOW2_OPT_DISCARD_SNAPSHOT "pass_discard_snapshot"
+#define QCOW2_OPT_DISCARD_OTHER "pass_discard_other"
+
+typedef struct QCowHeader {
+ uint32_t magic;
+ uint32_t version;
+ uint64_t backing_file_offset;
+ uint32_t backing_file_size;
+ uint32_t cluster_bits;
+ uint64_t size; /* in bytes */
+ uint32_t crypt_method;
+ uint32_t l1_size; /* XXX: save number of clusters instead ? */
+ uint64_t l1_table_offset;
+ uint64_t refcount_table_offset;
+ uint32_t refcount_table_clusters;
+ uint32_t nb_snapshots;
+ uint64_t snapshots_offset;
+
+ /* The following fields are only valid for version >= 3 */
+ uint64_t incompatible_features;
+ uint64_t compatible_features;
+ uint64_t autoclear_features;
+
+ uint32_t refcount_order;
+ uint32_t header_length;
+} QCowHeader;
+
+typedef struct QCowSnapshot {
+ uint64_t l1_table_offset;
+ uint32_t l1_size;
+ char *id_str;
+ char *name;
+ uint64_t disk_size;
+ uint64_t vm_state_size;
+ uint32_t date_sec;
+ uint32_t date_nsec;
+ uint64_t vm_clock_nsec;
+} QCowSnapshot;
+
+struct Qcow2Cache;
+typedef struct Qcow2Cache Qcow2Cache;
+
+typedef struct Qcow2UnknownHeaderExtension {
+ uint32_t magic;
+ uint32_t len;
+ QLIST_ENTRY(Qcow2UnknownHeaderExtension) next;
+ uint8_t data[];
+} Qcow2UnknownHeaderExtension;
+
+enum {
+ QCOW2_FEAT_TYPE_INCOMPATIBLE = 0,
+ QCOW2_FEAT_TYPE_COMPATIBLE = 1,
+ QCOW2_FEAT_TYPE_AUTOCLEAR = 2,
+};
+
+/* Incompatible feature bits */
+enum {
+ QCOW2_INCOMPAT_DIRTY_BITNR = 0,
+ QCOW2_INCOMPAT_DIRTY = 1 << QCOW2_INCOMPAT_DIRTY_BITNR,
+
+ QCOW2_INCOMPAT_MASK = QCOW2_INCOMPAT_DIRTY,
+};
+
+/* Compatible feature bits */
+enum {
+ QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR = 0,
+ QCOW2_COMPAT_LAZY_REFCOUNTS = 1 << QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR,
+
+ QCOW2_COMPAT_FEAT_MASK = QCOW2_COMPAT_LAZY_REFCOUNTS,
+};
+
+enum qcow2_discard_type {
+ QCOW2_DISCARD_NEVER = 0,
+ QCOW2_DISCARD_ALWAYS,
+ QCOW2_DISCARD_REQUEST,
+ QCOW2_DISCARD_SNAPSHOT,
+ QCOW2_DISCARD_OTHER,
+ QCOW2_DISCARD_MAX
+};
+
+typedef struct Qcow2Feature {
+ uint8_t type;
+ uint8_t bit;
+ char name[46];
+} QEMU_PACKED Qcow2Feature;
+
+typedef struct Qcow2DiscardRegion {
+ BlockDriverState *bs;
+ uint64_t offset;
+ uint64_t bytes;
+ QTAILQ_ENTRY(Qcow2DiscardRegion) next;
+} Qcow2DiscardRegion;
+
+typedef struct BDRVQcowState {
+ int cluster_bits;
+ int cluster_size;
+ int cluster_sectors;
+ int l2_bits;
+ int l2_size;
+ int l1_size;
+ int l1_vm_state_index;
+ int csize_shift;
+ int csize_mask;
+ uint64_t cluster_offset_mask;
+ uint64_t l1_table_offset;
+ uint64_t *l1_table;
+
+ Qcow2Cache* l2_table_cache;
+ Qcow2Cache* refcount_block_cache;
+
+ uint8_t *cluster_cache;
+ uint8_t *cluster_data;
+ uint64_t cluster_cache_offset;
+ QLIST_HEAD(QCowClusterAlloc, QCowL2Meta) cluster_allocs;
+
+ uint64_t *refcount_table;
+ uint64_t refcount_table_offset;
+ uint32_t refcount_table_size;
+ int64_t free_cluster_index;
+ int64_t free_byte_offset;
+
+ CoMutex lock;
+
+ uint32_t crypt_method; /* current crypt method, 0 if no key yet */
+ uint32_t crypt_method_header;
+ AES_KEY aes_encrypt_key;
+ AES_KEY aes_decrypt_key;
+ uint64_t snapshots_offset;
+ int snapshots_size;
+ int nb_snapshots;
+ QCowSnapshot *snapshots;
+
+ int flags;
+ int qcow_version;
+ bool use_lazy_refcounts;
+
+ bool discard_passthrough[QCOW2_DISCARD_MAX];
+
+ uint64_t incompatible_features;
+ uint64_t compatible_features;
+ uint64_t autoclear_features;
+
+ size_t unknown_header_fields_size;
+ void* unknown_header_fields;
+ QLIST_HEAD(, Qcow2UnknownHeaderExtension) unknown_header_ext;
+ QTAILQ_HEAD (, Qcow2DiscardRegion) discards;
+ bool cache_discards;
+} BDRVQcowState;
+
+/* XXX: use std qcow open function ? */
+typedef struct QCowCreateState {
+ int cluster_size;
+ int cluster_bits;
+ uint16_t *refcount_block;
+ uint64_t *refcount_table;
+ int64_t l1_table_offset;
+ int64_t refcount_table_offset;
+ int64_t refcount_block_offset;
+} QCowCreateState;
+
+struct QCowAIOCB;
+
+typedef struct Qcow2COWRegion {
+ /**
+ * Offset of the COW region in bytes from the start of the first cluster
+ * touched by the request.
+ */
+ uint64_t offset;
+
+ /** Number of sectors to copy */
+ int nb_sectors;
+} Qcow2COWRegion;
+
+/**
+ * Describes an in-flight (part of a) write request that writes to clusters
+ * that are not referenced in their L2 table yet.
+ */
+typedef struct QCowL2Meta
+{
+ /** Guest offset of the first newly allocated cluster */
+ uint64_t offset;
+
+ /** Host offset of the first newly allocated cluster */
+ uint64_t alloc_offset;
+
+ /**
+ * Number of sectors from the start of the first allocated cluster to
+ * the end of the (possibly shortened) request
+ */
+ int nb_available;
+
+ /** Number of newly allocated clusters */
+ int nb_clusters;
+
+ /**
+ * Requests that overlap with this allocation and wait to be restarted
+ * when the allocating request has completed.
+ */
+ CoQueue dependent_requests;
+
+ /**
+ * The COW Region between the start of the first allocated cluster and the
+ * area the guest actually writes to.
+ */
+ Qcow2COWRegion cow_start;
+
+ /**
+ * The COW Region between the area the guest actually writes to and the
+ * end of the last allocated cluster.
+ */
+ Qcow2COWRegion cow_end;
+
+ /** Pointer to next L2Meta of the same write request */
+ struct QCowL2Meta *next;
+
+ QLIST_ENTRY(QCowL2Meta) next_in_flight;
+} QCowL2Meta;
+
+enum {
+ QCOW2_CLUSTER_UNALLOCATED,
+ QCOW2_CLUSTER_NORMAL,
+ QCOW2_CLUSTER_COMPRESSED,
+ QCOW2_CLUSTER_ZERO
+};
+
+#define L1E_OFFSET_MASK 0x00ffffffffffff00ULL
+#define L2E_OFFSET_MASK 0x00ffffffffffff00ULL
+#define L2E_COMPRESSED_OFFSET_SIZE_MASK 0x3fffffffffffffffULL
+
+#define REFT_OFFSET_MASK 0xffffffffffffff00ULL
+
+static inline int64_t start_of_cluster(BDRVQcowState *s, int64_t offset)
+{
+ return offset & ~(s->cluster_size - 1);
+}
+
+static inline int64_t offset_into_cluster(BDRVQcowState *s, int64_t offset)
+{
+ return offset & (s->cluster_size - 1);
+}
+
+static inline int size_to_clusters(BDRVQcowState *s, int64_t size)
+{
+ return (size + (s->cluster_size - 1)) >> s->cluster_bits;
+}
+
+static inline int64_t size_to_l1(BDRVQcowState *s, int64_t size)
+{
+ int shift = s->cluster_bits + s->l2_bits;
+ return (size + (1ULL << shift) - 1) >> shift;
+}
+
+static inline int offset_to_l2_index(BDRVQcowState *s, int64_t offset)
+{
+ return (offset >> s->cluster_bits) & (s->l2_size - 1);
+}
+
+static inline int64_t align_offset(int64_t offset, int n)
+{
+ offset = (offset + n - 1) & ~(n - 1);
+ return offset;
+}
+
+static inline int qcow2_get_cluster_type(uint64_t l2_entry)
+{
+ if (l2_entry & QCOW_OFLAG_COMPRESSED) {
+ return QCOW2_CLUSTER_COMPRESSED;
+ } else if (l2_entry & QCOW_OFLAG_ZERO) {
+ return QCOW2_CLUSTER_ZERO;
+ } else if (!(l2_entry & L2E_OFFSET_MASK)) {
+ return QCOW2_CLUSTER_UNALLOCATED;
+ } else {
+ return QCOW2_CLUSTER_NORMAL;
+ }
+}
+
+/* Check whether refcounts are eager or lazy */
+static inline bool qcow2_need_accurate_refcounts(BDRVQcowState *s)
+{
+ return !(s->incompatible_features & QCOW2_INCOMPAT_DIRTY);
+}
+
+static inline uint64_t l2meta_cow_start(QCowL2Meta *m)
+{
+ return m->offset + m->cow_start.offset;
+}
+
+static inline uint64_t l2meta_cow_end(QCowL2Meta *m)
+{
+ return m->offset + m->cow_end.offset
+ + (m->cow_end.nb_sectors << BDRV_SECTOR_BITS);
+}
+
+// FIXME Need qcow2_ prefix to global functions
+
+/* qcow2.c functions */
+int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
+ int64_t sector_num, int nb_sectors);
+
+int qcow2_mark_dirty(BlockDriverState *bs);
+int qcow2_update_header(BlockDriverState *bs);
+
+/* qcow2-refcount.c functions */
+int qcow2_refcount_init(BlockDriverState *bs);
+void qcow2_refcount_close(BlockDriverState *bs);
+
+int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size);
+int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
+ int nb_clusters);
+int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size);
+void qcow2_free_clusters(BlockDriverState *bs,
+ int64_t offset, int64_t size,
+ enum qcow2_discard_type type);
+void qcow2_free_any_clusters(BlockDriverState *bs, uint64_t l2_entry,
+ int nb_clusters, enum qcow2_discard_type type);
+
+int qcow2_update_snapshot_refcount(BlockDriverState *bs,
+ int64_t l1_table_offset, int l1_size, int addend);
+
+int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
+ BdrvCheckMode fix);
+
+void qcow2_process_discards(BlockDriverState *bs, int ret);
+
+/* qcow2-cluster.c functions */
+int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
+ bool exact_size);
+void qcow2_l2_cache_reset(BlockDriverState *bs);
+int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
+void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
+ uint8_t *out_buf, const uint8_t *in_buf,
+ int nb_sectors, int enc,
+ const AES_KEY *key);
+
+int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
+ int *num, uint64_t *cluster_offset);
+int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
+ int n_start, int n_end, int *num, uint64_t *host_offset, QCowL2Meta **m);
+uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
+ uint64_t offset,
+ int compressed_size);
+
+int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m);
+int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
+ int nb_sectors);
+int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors);
+
+/* qcow2-snapshot.c functions */
+int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info);
+int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id);
+int qcow2_snapshot_delete(BlockDriverState *bs, const char *snapshot_id);
+int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab);
+int qcow2_snapshot_load_tmp(BlockDriverState *bs, const char *snapshot_name);
+
+void qcow2_free_snapshots(BlockDriverState *bs);
+int qcow2_read_snapshots(BlockDriverState *bs);
+
+/* qcow2-cache.c functions */
+Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables);
+int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c);
+
+void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table);
+int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c);
+int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
+ Qcow2Cache *dependency);
+void qcow2_cache_depends_on_flush(Qcow2Cache *c);
+
+int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
+ void **table);
+int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
+ void **table);
+int qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table);
+
+#endif
diff --git a/contrib/qemu/block/qed-check.c b/contrib/qemu/block/qed-check.c
new file mode 100644
index 000000000..b473dcd61
--- /dev/null
+++ b/contrib/qemu/block/qed-check.c
@@ -0,0 +1,248 @@
+/*
+ * QEMU Enhanced Disk Format Consistency Check
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * Authors:
+ * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qed.h"
+
+typedef struct {
+ BDRVQEDState *s;
+ BdrvCheckResult *result;
+ bool fix; /* whether to fix invalid offsets */
+
+ uint64_t nclusters;
+ uint32_t *used_clusters; /* referenced cluster bitmap */
+
+ QEDRequest request;
+} QEDCheck;
+
+static bool qed_test_bit(uint32_t *bitmap, uint64_t n) {
+ return !!(bitmap[n / 32] & (1 << (n % 32)));
+}
+
+static void qed_set_bit(uint32_t *bitmap, uint64_t n) {
+ bitmap[n / 32] |= 1 << (n % 32);
+}
+
+/**
+ * Set bitmap bits for clusters
+ *
+ * @check: Check structure
+ * @offset: Starting offset in bytes
+ * @n: Number of clusters
+ */
+static bool qed_set_used_clusters(QEDCheck *check, uint64_t offset,
+ unsigned int n)
+{
+ uint64_t cluster = qed_bytes_to_clusters(check->s, offset);
+ unsigned int corruptions = 0;
+
+ while (n-- != 0) {
+ /* Clusters should only be referenced once */
+ if (qed_test_bit(check->used_clusters, cluster)) {
+ corruptions++;
+ }
+
+ qed_set_bit(check->used_clusters, cluster);
+ cluster++;
+ }
+
+ check->result->corruptions += corruptions;
+ return corruptions == 0;
+}
+
+/**
+ * Check an L2 table
+ *
+ * @ret: Number of invalid cluster offsets
+ */
+static unsigned int qed_check_l2_table(QEDCheck *check, QEDTable *table)
+{
+ BDRVQEDState *s = check->s;
+ unsigned int i, num_invalid = 0;
+ uint64_t last_offset = 0;
+
+ for (i = 0; i < s->table_nelems; i++) {
+ uint64_t offset = table->offsets[i];
+
+ if (qed_offset_is_unalloc_cluster(offset) ||
+ qed_offset_is_zero_cluster(offset)) {
+ continue;
+ }
+ check->result->bfi.allocated_clusters++;
+ if (last_offset && (last_offset + s->header.cluster_size != offset)) {
+ check->result->bfi.fragmented_clusters++;
+ }
+ last_offset = offset;
+
+ /* Detect invalid cluster offset */
+ if (!qed_check_cluster_offset(s, offset)) {
+ if (check->fix) {
+ table->offsets[i] = 0;
+ check->result->corruptions_fixed++;
+ } else {
+ check->result->corruptions++;
+ }
+
+ num_invalid++;
+ continue;
+ }
+
+ qed_set_used_clusters(check, offset, 1);
+ }
+
+ return num_invalid;
+}
+
+/**
+ * Descend tables and check each cluster is referenced once only
+ */
+static int qed_check_l1_table(QEDCheck *check, QEDTable *table)
+{
+ BDRVQEDState *s = check->s;
+ unsigned int i, num_invalid_l1 = 0;
+ int ret, last_error = 0;
+
+ /* Mark L1 table clusters used */
+ qed_set_used_clusters(check, s->header.l1_table_offset,
+ s->header.table_size);
+
+ for (i = 0; i < s->table_nelems; i++) {
+ unsigned int num_invalid_l2;
+ uint64_t offset = table->offsets[i];
+
+ if (qed_offset_is_unalloc_cluster(offset)) {
+ continue;
+ }
+
+ /* Detect invalid L2 offset */
+ if (!qed_check_table_offset(s, offset)) {
+ /* Clear invalid offset */
+ if (check->fix) {
+ table->offsets[i] = 0;
+ check->result->corruptions_fixed++;
+ } else {
+ check->result->corruptions++;
+ }
+
+ num_invalid_l1++;
+ continue;
+ }
+
+ if (!qed_set_used_clusters(check, offset, s->header.table_size)) {
+ continue; /* skip an invalid table */
+ }
+
+ ret = qed_read_l2_table_sync(s, &check->request, offset);
+ if (ret) {
+ check->result->check_errors++;
+ last_error = ret;
+ continue;
+ }
+
+ num_invalid_l2 = qed_check_l2_table(check,
+ check->request.l2_table->table);
+
+ /* Write out fixed L2 table */
+ if (num_invalid_l2 > 0 && check->fix) {
+ ret = qed_write_l2_table_sync(s, &check->request, 0,
+ s->table_nelems, false);
+ if (ret) {
+ check->result->check_errors++;
+ last_error = ret;
+ continue;
+ }
+ }
+ }
+
+ /* Drop reference to final table */
+ qed_unref_l2_cache_entry(check->request.l2_table);
+ check->request.l2_table = NULL;
+
+ /* Write out fixed L1 table */
+ if (num_invalid_l1 > 0 && check->fix) {
+ ret = qed_write_l1_table_sync(s, 0, s->table_nelems);
+ if (ret) {
+ check->result->check_errors++;
+ last_error = ret;
+ }
+ }
+
+ return last_error;
+}
+
+/**
+ * Check for unreferenced (leaked) clusters
+ */
+static void qed_check_for_leaks(QEDCheck *check)
+{
+ BDRVQEDState *s = check->s;
+ uint64_t i;
+
+ for (i = s->header.header_size; i < check->nclusters; i++) {
+ if (!qed_test_bit(check->used_clusters, i)) {
+ check->result->leaks++;
+ }
+ }
+}
+
+/**
+ * Mark an image clean once it passes check or has been repaired
+ */
+static void qed_check_mark_clean(BDRVQEDState *s, BdrvCheckResult *result)
+{
+ /* Skip if there were unfixable corruptions or I/O errors */
+ if (result->corruptions > 0 || result->check_errors > 0) {
+ return;
+ }
+
+ /* Skip if image is already marked clean */
+ if (!(s->header.features & QED_F_NEED_CHECK)) {
+ return;
+ }
+
+ /* Ensure fixes reach storage before clearing check bit */
+ bdrv_flush(s->bs);
+
+ s->header.features &= ~QED_F_NEED_CHECK;
+ qed_write_header_sync(s);
+}
+
+int qed_check(BDRVQEDState *s, BdrvCheckResult *result, bool fix)
+{
+ QEDCheck check = {
+ .s = s,
+ .result = result,
+ .nclusters = qed_bytes_to_clusters(s, s->file_size),
+ .request = { .l2_table = NULL },
+ .fix = fix,
+ };
+ int ret;
+
+ check.used_clusters = g_malloc0(((check.nclusters + 31) / 32) *
+ sizeof(check.used_clusters[0]));
+
+ check.result->bfi.total_clusters =
+ (s->header.image_size + s->header.cluster_size - 1) /
+ s->header.cluster_size;
+ ret = qed_check_l1_table(&check, s->l1_table);
+ if (ret == 0) {
+ /* Only check for leaks if entire image was scanned successfully */
+ qed_check_for_leaks(&check);
+
+ if (fix) {
+ qed_check_mark_clean(s, result);
+ }
+ }
+
+ g_free(check.used_clusters);
+ return ret;
+}
diff --git a/contrib/qemu/block/qed-cluster.c b/contrib/qemu/block/qed-cluster.c
new file mode 100644
index 000000000..f64b2af8f
--- /dev/null
+++ b/contrib/qemu/block/qed-cluster.c
@@ -0,0 +1,165 @@
+/*
+ * QEMU Enhanced Disk Format Cluster functions
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * Authors:
+ * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qed.h"
+
+/**
+ * Count the number of contiguous data clusters
+ *
+ * @s: QED state
+ * @table: L2 table
+ * @index: First cluster index
+ * @n: Maximum number of clusters
+ * @offset: Set to first cluster offset
+ *
+ * This function scans tables for contiguous clusters. A contiguous run of
+ * clusters may be allocated, unallocated, or zero.
+ */
+static unsigned int qed_count_contiguous_clusters(BDRVQEDState *s,
+ QEDTable *table,
+ unsigned int index,
+ unsigned int n,
+ uint64_t *offset)
+{
+ unsigned int end = MIN(index + n, s->table_nelems);
+ uint64_t last = table->offsets[index];
+ unsigned int i;
+
+ *offset = last;
+
+ for (i = index + 1; i < end; i++) {
+ if (qed_offset_is_unalloc_cluster(last)) {
+ /* Counting unallocated clusters */
+ if (!qed_offset_is_unalloc_cluster(table->offsets[i])) {
+ break;
+ }
+ } else if (qed_offset_is_zero_cluster(last)) {
+ /* Counting zero clusters */
+ if (!qed_offset_is_zero_cluster(table->offsets[i])) {
+ break;
+ }
+ } else {
+ /* Counting allocated clusters */
+ if (table->offsets[i] != last + s->header.cluster_size) {
+ break;
+ }
+ last = table->offsets[i];
+ }
+ }
+ return i - index;
+}
+
+typedef struct {
+ BDRVQEDState *s;
+ uint64_t pos;
+ size_t len;
+
+ QEDRequest *request;
+
+ /* User callback */
+ QEDFindClusterFunc *cb;
+ void *opaque;
+} QEDFindClusterCB;
+
+static void qed_find_cluster_cb(void *opaque, int ret)
+{
+ QEDFindClusterCB *find_cluster_cb = opaque;
+ BDRVQEDState *s = find_cluster_cb->s;
+ QEDRequest *request = find_cluster_cb->request;
+ uint64_t offset = 0;
+ size_t len = 0;
+ unsigned int index;
+ unsigned int n;
+
+ if (ret) {
+ goto out;
+ }
+
+ index = qed_l2_index(s, find_cluster_cb->pos);
+ n = qed_bytes_to_clusters(s,
+ qed_offset_into_cluster(s, find_cluster_cb->pos) +
+ find_cluster_cb->len);
+ n = qed_count_contiguous_clusters(s, request->l2_table->table,
+ index, n, &offset);
+
+ if (qed_offset_is_unalloc_cluster(offset)) {
+ ret = QED_CLUSTER_L2;
+ } else if (qed_offset_is_zero_cluster(offset)) {
+ ret = QED_CLUSTER_ZERO;
+ } else if (qed_check_cluster_offset(s, offset)) {
+ ret = QED_CLUSTER_FOUND;
+ } else {
+ ret = -EINVAL;
+ }
+
+ len = MIN(find_cluster_cb->len, n * s->header.cluster_size -
+ qed_offset_into_cluster(s, find_cluster_cb->pos));
+
+out:
+ find_cluster_cb->cb(find_cluster_cb->opaque, ret, offset, len);
+ g_free(find_cluster_cb);
+}
+
+/**
+ * Find the offset of a data cluster
+ *
+ * @s: QED state
+ * @request: L2 cache entry
+ * @pos: Byte position in device
+ * @len: Number of bytes
+ * @cb: Completion function
+ * @opaque: User data for completion function
+ *
+ * This function translates a position in the block device to an offset in the
+ * image file. It invokes the cb completion callback to report back the
+ * translated offset or unallocated range in the image file.
+ *
+ * If the L2 table exists, request->l2_table points to the L2 table cache entry
+ * and the caller must free the reference when they are finished. The cache
+ * entry is exposed in this way to avoid callers having to read the L2 table
+ * again later during request processing. If request->l2_table is non-NULL it
+ * will be unreferenced before taking on the new cache entry.
+ */
+void qed_find_cluster(BDRVQEDState *s, QEDRequest *request, uint64_t pos,
+ size_t len, QEDFindClusterFunc *cb, void *opaque)
+{
+ QEDFindClusterCB *find_cluster_cb;
+ uint64_t l2_offset;
+
+ /* Limit length to L2 boundary. Requests are broken up at the L2 boundary
+ * so that a request acts on one L2 table at a time.
+ */
+ len = MIN(len, (((pos >> s->l1_shift) + 1) << s->l1_shift) - pos);
+
+ l2_offset = s->l1_table->offsets[qed_l1_index(s, pos)];
+ if (qed_offset_is_unalloc_cluster(l2_offset)) {
+ cb(opaque, QED_CLUSTER_L1, 0, len);
+ return;
+ }
+ if (!qed_check_table_offset(s, l2_offset)) {
+ cb(opaque, -EINVAL, 0, 0);
+ return;
+ }
+
+ find_cluster_cb = g_malloc(sizeof(*find_cluster_cb));
+ find_cluster_cb->s = s;
+ find_cluster_cb->pos = pos;
+ find_cluster_cb->len = len;
+ find_cluster_cb->cb = cb;
+ find_cluster_cb->opaque = opaque;
+ find_cluster_cb->request = request;
+
+ qed_read_l2_table(s, request, l2_offset,
+ qed_find_cluster_cb, find_cluster_cb);
+}
diff --git a/contrib/qemu/block/qed-gencb.c b/contrib/qemu/block/qed-gencb.c
new file mode 100644
index 000000000..7d7ac1ffc
--- /dev/null
+++ b/contrib/qemu/block/qed-gencb.c
@@ -0,0 +1,32 @@
+/*
+ * QEMU Enhanced Disk Format
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * Authors:
+ * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qed.h"
+
+void *gencb_alloc(size_t len, BlockDriverCompletionFunc *cb, void *opaque)
+{
+ GenericCB *gencb = g_malloc(len);
+ gencb->cb = cb;
+ gencb->opaque = opaque;
+ return gencb;
+}
+
+void gencb_complete(void *opaque, int ret)
+{
+ GenericCB *gencb = opaque;
+ BlockDriverCompletionFunc *cb = gencb->cb;
+ void *user_opaque = gencb->opaque;
+
+ g_free(gencb);
+ cb(user_opaque, ret);
+}
diff --git a/contrib/qemu/block/qed-l2-cache.c b/contrib/qemu/block/qed-l2-cache.c
new file mode 100644
index 000000000..e9b2aae44
--- /dev/null
+++ b/contrib/qemu/block/qed-l2-cache.c
@@ -0,0 +1,187 @@
+/*
+ * QEMU Enhanced Disk Format L2 Cache
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+/*
+ * L2 table cache usage is as follows:
+ *
+ * An open image has one L2 table cache that is used to avoid accessing the
+ * image file for recently referenced L2 tables.
+ *
+ * Cluster offset lookup translates the logical offset within the block device
+ * to a cluster offset within the image file. This is done by indexing into
+ * the L1 and L2 tables which store cluster offsets. It is here where the L2
+ * table cache serves up recently referenced L2 tables.
+ *
+ * If there is a cache miss, that L2 table is read from the image file and
+ * committed to the cache. Subsequent accesses to that L2 table will be served
+ * from the cache until the table is evicted from the cache.
+ *
+ * L2 tables are also committed to the cache when new L2 tables are allocated
+ * in the image file. Since the L2 table cache is write-through, the new L2
+ * table is first written out to the image file and then committed to the
+ * cache.
+ *
+ * Multiple I/O requests may be using an L2 table cache entry at any given
+ * time. That means an entry may be in use across several requests and
+ * reference counting is needed to free the entry at the correct time. In
+ * particular, an entry evicted from the cache will only be freed once all
+ * references are dropped.
+ *
+ * An in-flight I/O request will hold a reference to a L2 table cache entry for
+ * the period during which it needs to access the L2 table. This includes
+ * cluster offset lookup, L2 table allocation, and L2 table update when a new
+ * data cluster has been allocated.
+ *
+ * An interesting case occurs when two requests need to access an L2 table that
+ * is not in the cache. Since the operation to read the table from the image
+ * file takes some time to complete, both requests may see a cache miss and
+ * start reading the L2 table from the image file. The first to finish will
+ * commit its L2 table into the cache. When the second tries to commit its
+ * table will be deleted in favor of the existing cache entry.
+ */
+
+#include "trace.h"
+#include "qed.h"
+
+/* Each L2 holds 2GB so this let's us fully cache a 100GB disk */
+#define MAX_L2_CACHE_SIZE 50
+
+/**
+ * Initialize the L2 cache
+ */
+void qed_init_l2_cache(L2TableCache *l2_cache)
+{
+ QTAILQ_INIT(&l2_cache->entries);
+ l2_cache->n_entries = 0;
+}
+
+/**
+ * Free the L2 cache
+ */
+void qed_free_l2_cache(L2TableCache *l2_cache)
+{
+ CachedL2Table *entry, *next_entry;
+
+ QTAILQ_FOREACH_SAFE(entry, &l2_cache->entries, node, next_entry) {
+ qemu_vfree(entry->table);
+ g_free(entry);
+ }
+}
+
+/**
+ * Allocate an uninitialized entry from the cache
+ *
+ * The returned entry has a reference count of 1 and is owned by the caller.
+ * The caller must allocate the actual table field for this entry and it must
+ * be freeable using qemu_vfree().
+ */
+CachedL2Table *qed_alloc_l2_cache_entry(L2TableCache *l2_cache)
+{
+ CachedL2Table *entry;
+
+ entry = g_malloc0(sizeof(*entry));
+ entry->ref++;
+
+ trace_qed_alloc_l2_cache_entry(l2_cache, entry);
+
+ return entry;
+}
+
+/**
+ * Decrease an entry's reference count and free if necessary when the reference
+ * count drops to zero.
+ */
+void qed_unref_l2_cache_entry(CachedL2Table *entry)
+{
+ if (!entry) {
+ return;
+ }
+
+ entry->ref--;
+ trace_qed_unref_l2_cache_entry(entry, entry->ref);
+ if (entry->ref == 0) {
+ qemu_vfree(entry->table);
+ g_free(entry);
+ }
+}
+
+/**
+ * Find an entry in the L2 cache. This may return NULL and it's up to the
+ * caller to satisfy the cache miss.
+ *
+ * For a cached entry, this function increases the reference count and returns
+ * the entry.
+ */
+CachedL2Table *qed_find_l2_cache_entry(L2TableCache *l2_cache, uint64_t offset)
+{
+ CachedL2Table *entry;
+
+ QTAILQ_FOREACH(entry, &l2_cache->entries, node) {
+ if (entry->offset == offset) {
+ trace_qed_find_l2_cache_entry(l2_cache, entry, offset, entry->ref);
+ entry->ref++;
+ return entry;
+ }
+ }
+ return NULL;
+}
+
+/**
+ * Commit an L2 cache entry into the cache. This is meant to be used as part of
+ * the process to satisfy a cache miss. A caller would allocate an entry which
+ * is not actually in the L2 cache and then once the entry was valid and
+ * present on disk, the entry can be committed into the cache.
+ *
+ * Since the cache is write-through, it's important that this function is not
+ * called until the entry is present on disk and the L1 has been updated to
+ * point to the entry.
+ *
+ * N.B. This function steals a reference to the l2_table from the caller so the
+ * caller must obtain a new reference by issuing a call to
+ * qed_find_l2_cache_entry().
+ */
+void qed_commit_l2_cache_entry(L2TableCache *l2_cache, CachedL2Table *l2_table)
+{
+ CachedL2Table *entry;
+
+ entry = qed_find_l2_cache_entry(l2_cache, l2_table->offset);
+ if (entry) {
+ qed_unref_l2_cache_entry(entry);
+ qed_unref_l2_cache_entry(l2_table);
+ return;
+ }
+
+ /* Evict an unused cache entry so we have space. If all entries are in use
+ * we can grow the cache temporarily and we try to shrink back down later.
+ */
+ if (l2_cache->n_entries >= MAX_L2_CACHE_SIZE) {
+ CachedL2Table *next;
+ QTAILQ_FOREACH_SAFE(entry, &l2_cache->entries, node, next) {
+ if (entry->ref > 1) {
+ continue;
+ }
+
+ QTAILQ_REMOVE(&l2_cache->entries, entry, node);
+ l2_cache->n_entries--;
+ qed_unref_l2_cache_entry(entry);
+
+ /* Stop evicting when we've shrunk back to max size */
+ if (l2_cache->n_entries < MAX_L2_CACHE_SIZE) {
+ break;
+ }
+ }
+ }
+
+ l2_cache->n_entries++;
+ QTAILQ_INSERT_TAIL(&l2_cache->entries, l2_table, node);
+}
diff --git a/contrib/qemu/block/qed-table.c b/contrib/qemu/block/qed-table.c
new file mode 100644
index 000000000..76d2dcccf
--- /dev/null
+++ b/contrib/qemu/block/qed-table.c
@@ -0,0 +1,296 @@
+/*
+ * QEMU Enhanced Disk Format Table I/O
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * Authors:
+ * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "trace.h"
+#include "qemu/sockets.h" /* for EINPROGRESS on Windows */
+#include "qed.h"
+
+typedef struct {
+ GenericCB gencb;
+ BDRVQEDState *s;
+ QEDTable *table;
+
+ struct iovec iov;
+ QEMUIOVector qiov;
+} QEDReadTableCB;
+
+static void qed_read_table_cb(void *opaque, int ret)
+{
+ QEDReadTableCB *read_table_cb = opaque;
+ QEDTable *table = read_table_cb->table;
+ int noffsets = read_table_cb->qiov.size / sizeof(uint64_t);
+ int i;
+
+ /* Handle I/O error */
+ if (ret) {
+ goto out;
+ }
+
+ /* Byteswap offsets */
+ for (i = 0; i < noffsets; i++) {
+ table->offsets[i] = le64_to_cpu(table->offsets[i]);
+ }
+
+out:
+ /* Completion */
+ trace_qed_read_table_cb(read_table_cb->s, read_table_cb->table, ret);
+ gencb_complete(&read_table_cb->gencb, ret);
+}
+
+static void qed_read_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ QEDReadTableCB *read_table_cb = gencb_alloc(sizeof(*read_table_cb),
+ cb, opaque);
+ QEMUIOVector *qiov = &read_table_cb->qiov;
+
+ trace_qed_read_table(s, offset, table);
+
+ read_table_cb->s = s;
+ read_table_cb->table = table;
+ read_table_cb->iov.iov_base = table->offsets,
+ read_table_cb->iov.iov_len = s->header.cluster_size * s->header.table_size,
+
+ qemu_iovec_init_external(qiov, &read_table_cb->iov, 1);
+ bdrv_aio_readv(s->bs->file, offset / BDRV_SECTOR_SIZE, qiov,
+ qiov->size / BDRV_SECTOR_SIZE,
+ qed_read_table_cb, read_table_cb);
+}
+
+typedef struct {
+ GenericCB gencb;
+ BDRVQEDState *s;
+ QEDTable *orig_table;
+ QEDTable *table;
+ bool flush; /* flush after write? */
+
+ struct iovec iov;
+ QEMUIOVector qiov;
+} QEDWriteTableCB;
+
+static void qed_write_table_cb(void *opaque, int ret)
+{
+ QEDWriteTableCB *write_table_cb = opaque;
+
+ trace_qed_write_table_cb(write_table_cb->s,
+ write_table_cb->orig_table,
+ write_table_cb->flush,
+ ret);
+
+ if (ret) {
+ goto out;
+ }
+
+ if (write_table_cb->flush) {
+ /* We still need to flush first */
+ write_table_cb->flush = false;
+ bdrv_aio_flush(write_table_cb->s->bs, qed_write_table_cb,
+ write_table_cb);
+ return;
+ }
+
+out:
+ qemu_vfree(write_table_cb->table);
+ gencb_complete(&write_table_cb->gencb, ret);
+}
+
+/**
+ * Write out an updated part or all of a table
+ *
+ * @s: QED state
+ * @offset: Offset of table in image file, in bytes
+ * @table: Table
+ * @index: Index of first element
+ * @n: Number of elements
+ * @flush: Whether or not to sync to disk
+ * @cb: Completion function
+ * @opaque: Argument for completion function
+ */
+static void qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
+ unsigned int index, unsigned int n, bool flush,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ QEDWriteTableCB *write_table_cb;
+ unsigned int sector_mask = BDRV_SECTOR_SIZE / sizeof(uint64_t) - 1;
+ unsigned int start, end, i;
+ size_t len_bytes;
+
+ trace_qed_write_table(s, offset, table, index, n);
+
+ /* Calculate indices of the first and one after last elements */
+ start = index & ~sector_mask;
+ end = (index + n + sector_mask) & ~sector_mask;
+
+ len_bytes = (end - start) * sizeof(uint64_t);
+
+ write_table_cb = gencb_alloc(sizeof(*write_table_cb), cb, opaque);
+ write_table_cb->s = s;
+ write_table_cb->orig_table = table;
+ write_table_cb->flush = flush;
+ write_table_cb->table = qemu_blockalign(s->bs, len_bytes);
+ write_table_cb->iov.iov_base = write_table_cb->table->offsets;
+ write_table_cb->iov.iov_len = len_bytes;
+ qemu_iovec_init_external(&write_table_cb->qiov, &write_table_cb->iov, 1);
+
+ /* Byteswap table */
+ for (i = start; i < end; i++) {
+ uint64_t le_offset = cpu_to_le64(table->offsets[i]);
+ write_table_cb->table->offsets[i - start] = le_offset;
+ }
+
+ /* Adjust for offset into table */
+ offset += start * sizeof(uint64_t);
+
+ bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE,
+ &write_table_cb->qiov,
+ write_table_cb->qiov.size / BDRV_SECTOR_SIZE,
+ qed_write_table_cb, write_table_cb);
+}
+
+/**
+ * Propagate return value from async callback
+ */
+static void qed_sync_cb(void *opaque, int ret)
+{
+ *(int *)opaque = ret;
+}
+
+int qed_read_l1_table_sync(BDRVQEDState *s)
+{
+ int ret = -EINPROGRESS;
+
+ qed_read_table(s, s->header.l1_table_offset,
+ s->l1_table, qed_sync_cb, &ret);
+ while (ret == -EINPROGRESS) {
+ qemu_aio_wait();
+ }
+
+ return ret;
+}
+
+void qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ BLKDBG_EVENT(s->bs->file, BLKDBG_L1_UPDATE);
+ qed_write_table(s, s->header.l1_table_offset,
+ s->l1_table, index, n, false, cb, opaque);
+}
+
+int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index,
+ unsigned int n)
+{
+ int ret = -EINPROGRESS;
+
+ qed_write_l1_table(s, index, n, qed_sync_cb, &ret);
+ while (ret == -EINPROGRESS) {
+ qemu_aio_wait();
+ }
+
+ return ret;
+}
+
+typedef struct {
+ GenericCB gencb;
+ BDRVQEDState *s;
+ uint64_t l2_offset;
+ QEDRequest *request;
+} QEDReadL2TableCB;
+
+static void qed_read_l2_table_cb(void *opaque, int ret)
+{
+ QEDReadL2TableCB *read_l2_table_cb = opaque;
+ QEDRequest *request = read_l2_table_cb->request;
+ BDRVQEDState *s = read_l2_table_cb->s;
+ CachedL2Table *l2_table = request->l2_table;
+ uint64_t l2_offset = read_l2_table_cb->l2_offset;
+
+ if (ret) {
+ /* can't trust loaded L2 table anymore */
+ qed_unref_l2_cache_entry(l2_table);
+ request->l2_table = NULL;
+ } else {
+ l2_table->offset = l2_offset;
+
+ qed_commit_l2_cache_entry(&s->l2_cache, l2_table);
+
+ /* This is guaranteed to succeed because we just committed the entry
+ * to the cache.
+ */
+ request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, l2_offset);
+ assert(request->l2_table != NULL);
+ }
+
+ gencb_complete(&read_l2_table_cb->gencb, ret);
+}
+
+void qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ QEDReadL2TableCB *read_l2_table_cb;
+
+ qed_unref_l2_cache_entry(request->l2_table);
+
+ /* Check for cached L2 entry */
+ request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, offset);
+ if (request->l2_table) {
+ cb(opaque, 0);
+ return;
+ }
+
+ request->l2_table = qed_alloc_l2_cache_entry(&s->l2_cache);
+ request->l2_table->table = qed_alloc_table(s);
+
+ read_l2_table_cb = gencb_alloc(sizeof(*read_l2_table_cb), cb, opaque);
+ read_l2_table_cb->s = s;
+ read_l2_table_cb->l2_offset = offset;
+ read_l2_table_cb->request = request;
+
+ BLKDBG_EVENT(s->bs->file, BLKDBG_L2_LOAD);
+ qed_read_table(s, offset, request->l2_table->table,
+ qed_read_l2_table_cb, read_l2_table_cb);
+}
+
+int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, uint64_t offset)
+{
+ int ret = -EINPROGRESS;
+
+ qed_read_l2_table(s, request, offset, qed_sync_cb, &ret);
+ while (ret == -EINPROGRESS) {
+ qemu_aio_wait();
+ }
+
+ return ret;
+}
+
+void qed_write_l2_table(BDRVQEDState *s, QEDRequest *request,
+ unsigned int index, unsigned int n, bool flush,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ BLKDBG_EVENT(s->bs->file, BLKDBG_L2_UPDATE);
+ qed_write_table(s, request->l2_table->offset,
+ request->l2_table->table, index, n, flush, cb, opaque);
+}
+
+int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
+ unsigned int index, unsigned int n, bool flush)
+{
+ int ret = -EINPROGRESS;
+
+ qed_write_l2_table(s, request, index, n, flush, qed_sync_cb, &ret);
+ while (ret == -EINPROGRESS) {
+ qemu_aio_wait();
+ }
+
+ return ret;
+}
diff --git a/contrib/qemu/block/qed.c b/contrib/qemu/block/qed.c
new file mode 100644
index 000000000..f767b0528
--- /dev/null
+++ b/contrib/qemu/block/qed.c
@@ -0,0 +1,1596 @@
+/*
+ * QEMU Enhanced Disk Format
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * Authors:
+ * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qemu/timer.h"
+#include "trace.h"
+#include "qed.h"
+#include "qapi/qmp/qerror.h"
+#include "migration/migration.h"
+
+static void qed_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+ QEDAIOCB *acb = (QEDAIOCB *)blockacb;
+ bool finished = false;
+
+ /* Wait for the request to finish */
+ acb->finished = &finished;
+ while (!finished) {
+ qemu_aio_wait();
+ }
+}
+
+static const AIOCBInfo qed_aiocb_info = {
+ .aiocb_size = sizeof(QEDAIOCB),
+ .cancel = qed_aio_cancel,
+};
+
+static int bdrv_qed_probe(const uint8_t *buf, int buf_size,
+ const char *filename)
+{
+ const QEDHeader *header = (const QEDHeader *)buf;
+
+ if (buf_size < sizeof(*header)) {
+ return 0;
+ }
+ if (le32_to_cpu(header->magic) != QED_MAGIC) {
+ return 0;
+ }
+ return 100;
+}
+
+/**
+ * Check whether an image format is raw
+ *
+ * @fmt: Backing file format, may be NULL
+ */
+static bool qed_fmt_is_raw(const char *fmt)
+{
+ return fmt && strcmp(fmt, "raw") == 0;
+}
+
+static void qed_header_le_to_cpu(const QEDHeader *le, QEDHeader *cpu)
+{
+ cpu->magic = le32_to_cpu(le->magic);
+ cpu->cluster_size = le32_to_cpu(le->cluster_size);
+ cpu->table_size = le32_to_cpu(le->table_size);
+ cpu->header_size = le32_to_cpu(le->header_size);
+ cpu->features = le64_to_cpu(le->features);
+ cpu->compat_features = le64_to_cpu(le->compat_features);
+ cpu->autoclear_features = le64_to_cpu(le->autoclear_features);
+ cpu->l1_table_offset = le64_to_cpu(le->l1_table_offset);
+ cpu->image_size = le64_to_cpu(le->image_size);
+ cpu->backing_filename_offset = le32_to_cpu(le->backing_filename_offset);
+ cpu->backing_filename_size = le32_to_cpu(le->backing_filename_size);
+}
+
+static void qed_header_cpu_to_le(const QEDHeader *cpu, QEDHeader *le)
+{
+ le->magic = cpu_to_le32(cpu->magic);
+ le->cluster_size = cpu_to_le32(cpu->cluster_size);
+ le->table_size = cpu_to_le32(cpu->table_size);
+ le->header_size = cpu_to_le32(cpu->header_size);
+ le->features = cpu_to_le64(cpu->features);
+ le->compat_features = cpu_to_le64(cpu->compat_features);
+ le->autoclear_features = cpu_to_le64(cpu->autoclear_features);
+ le->l1_table_offset = cpu_to_le64(cpu->l1_table_offset);
+ le->image_size = cpu_to_le64(cpu->image_size);
+ le->backing_filename_offset = cpu_to_le32(cpu->backing_filename_offset);
+ le->backing_filename_size = cpu_to_le32(cpu->backing_filename_size);
+}
+
+int qed_write_header_sync(BDRVQEDState *s)
+{
+ QEDHeader le;
+ int ret;
+
+ qed_header_cpu_to_le(&s->header, &le);
+ ret = bdrv_pwrite(s->bs->file, 0, &le, sizeof(le));
+ if (ret != sizeof(le)) {
+ return ret;
+ }
+ return 0;
+}
+
+typedef struct {
+ GenericCB gencb;
+ BDRVQEDState *s;
+ struct iovec iov;
+ QEMUIOVector qiov;
+ int nsectors;
+ uint8_t *buf;
+} QEDWriteHeaderCB;
+
+static void qed_write_header_cb(void *opaque, int ret)
+{
+ QEDWriteHeaderCB *write_header_cb = opaque;
+
+ qemu_vfree(write_header_cb->buf);
+ gencb_complete(write_header_cb, ret);
+}
+
+static void qed_write_header_read_cb(void *opaque, int ret)
+{
+ QEDWriteHeaderCB *write_header_cb = opaque;
+ BDRVQEDState *s = write_header_cb->s;
+
+ if (ret) {
+ qed_write_header_cb(write_header_cb, ret);
+ return;
+ }
+
+ /* Update header */
+ qed_header_cpu_to_le(&s->header, (QEDHeader *)write_header_cb->buf);
+
+ bdrv_aio_writev(s->bs->file, 0, &write_header_cb->qiov,
+ write_header_cb->nsectors, qed_write_header_cb,
+ write_header_cb);
+}
+
+/**
+ * Update header in-place (does not rewrite backing filename or other strings)
+ *
+ * This function only updates known header fields in-place and does not affect
+ * extra data after the QED header.
+ */
+static void qed_write_header(BDRVQEDState *s, BlockDriverCompletionFunc cb,
+ void *opaque)
+{
+ /* We must write full sectors for O_DIRECT but cannot necessarily generate
+ * the data following the header if an unrecognized compat feature is
+ * active. Therefore, first read the sectors containing the header, update
+ * them, and write back.
+ */
+
+ int nsectors = (sizeof(QEDHeader) + BDRV_SECTOR_SIZE - 1) /
+ BDRV_SECTOR_SIZE;
+ size_t len = nsectors * BDRV_SECTOR_SIZE;
+ QEDWriteHeaderCB *write_header_cb = gencb_alloc(sizeof(*write_header_cb),
+ cb, opaque);
+
+ write_header_cb->s = s;
+ write_header_cb->nsectors = nsectors;
+ write_header_cb->buf = qemu_blockalign(s->bs, len);
+ write_header_cb->iov.iov_base = write_header_cb->buf;
+ write_header_cb->iov.iov_len = len;
+ qemu_iovec_init_external(&write_header_cb->qiov, &write_header_cb->iov, 1);
+
+ bdrv_aio_readv(s->bs->file, 0, &write_header_cb->qiov, nsectors,
+ qed_write_header_read_cb, write_header_cb);
+}
+
+static uint64_t qed_max_image_size(uint32_t cluster_size, uint32_t table_size)
+{
+ uint64_t table_entries;
+ uint64_t l2_size;
+
+ table_entries = (table_size * cluster_size) / sizeof(uint64_t);
+ l2_size = table_entries * cluster_size;
+
+ return l2_size * table_entries;
+}
+
+static bool qed_is_cluster_size_valid(uint32_t cluster_size)
+{
+ if (cluster_size < QED_MIN_CLUSTER_SIZE ||
+ cluster_size > QED_MAX_CLUSTER_SIZE) {
+ return false;
+ }
+ if (cluster_size & (cluster_size - 1)) {
+ return false; /* not power of 2 */
+ }
+ return true;
+}
+
+static bool qed_is_table_size_valid(uint32_t table_size)
+{
+ if (table_size < QED_MIN_TABLE_SIZE ||
+ table_size > QED_MAX_TABLE_SIZE) {
+ return false;
+ }
+ if (table_size & (table_size - 1)) {
+ return false; /* not power of 2 */
+ }
+ return true;
+}
+
+static bool qed_is_image_size_valid(uint64_t image_size, uint32_t cluster_size,
+ uint32_t table_size)
+{
+ if (image_size % BDRV_SECTOR_SIZE != 0) {
+ return false; /* not multiple of sector size */
+ }
+ if (image_size > qed_max_image_size(cluster_size, table_size)) {
+ return false; /* image is too large */
+ }
+ return true;
+}
+
+/**
+ * Read a string of known length from the image file
+ *
+ * @file: Image file
+ * @offset: File offset to start of string, in bytes
+ * @n: String length in bytes
+ * @buf: Destination buffer
+ * @buflen: Destination buffer length in bytes
+ * @ret: 0 on success, -errno on failure
+ *
+ * The string is NUL-terminated.
+ */
+static int qed_read_string(BlockDriverState *file, uint64_t offset, size_t n,
+ char *buf, size_t buflen)
+{
+ int ret;
+ if (n >= buflen) {
+ return -EINVAL;
+ }
+ ret = bdrv_pread(file, offset, buf, n);
+ if (ret < 0) {
+ return ret;
+ }
+ buf[n] = '\0';
+ return 0;
+}
+
+/**
+ * Allocate new clusters
+ *
+ * @s: QED state
+ * @n: Number of contiguous clusters to allocate
+ * @ret: Offset of first allocated cluster
+ *
+ * This function only produces the offset where the new clusters should be
+ * written. It updates BDRVQEDState but does not make any changes to the image
+ * file.
+ */
+static uint64_t qed_alloc_clusters(BDRVQEDState *s, unsigned int n)
+{
+ uint64_t offset = s->file_size;
+ s->file_size += n * s->header.cluster_size;
+ return offset;
+}
+
+QEDTable *qed_alloc_table(BDRVQEDState *s)
+{
+ /* Honor O_DIRECT memory alignment requirements */
+ return qemu_blockalign(s->bs,
+ s->header.cluster_size * s->header.table_size);
+}
+
+/**
+ * Allocate a new zeroed L2 table
+ */
+static CachedL2Table *qed_new_l2_table(BDRVQEDState *s)
+{
+ CachedL2Table *l2_table = qed_alloc_l2_cache_entry(&s->l2_cache);
+
+ l2_table->table = qed_alloc_table(s);
+ l2_table->offset = qed_alloc_clusters(s, s->header.table_size);
+
+ memset(l2_table->table->offsets, 0,
+ s->header.cluster_size * s->header.table_size);
+ return l2_table;
+}
+
+static void qed_aio_next_io(void *opaque, int ret);
+
+static void qed_plug_allocating_write_reqs(BDRVQEDState *s)
+{
+ assert(!s->allocating_write_reqs_plugged);
+
+ s->allocating_write_reqs_plugged = true;
+}
+
+static void qed_unplug_allocating_write_reqs(BDRVQEDState *s)
+{
+ QEDAIOCB *acb;
+
+ assert(s->allocating_write_reqs_plugged);
+
+ s->allocating_write_reqs_plugged = false;
+
+ acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs);
+ if (acb) {
+ qed_aio_next_io(acb, 0);
+ }
+}
+
+static void qed_finish_clear_need_check(void *opaque, int ret)
+{
+ /* Do nothing */
+}
+
+static void qed_flush_after_clear_need_check(void *opaque, int ret)
+{
+ BDRVQEDState *s = opaque;
+
+ bdrv_aio_flush(s->bs, qed_finish_clear_need_check, s);
+
+ /* No need to wait until flush completes */
+ qed_unplug_allocating_write_reqs(s);
+}
+
+static void qed_clear_need_check(void *opaque, int ret)
+{
+ BDRVQEDState *s = opaque;
+
+ if (ret) {
+ qed_unplug_allocating_write_reqs(s);
+ return;
+ }
+
+ s->header.features &= ~QED_F_NEED_CHECK;
+ qed_write_header(s, qed_flush_after_clear_need_check, s);
+}
+
+static void qed_need_check_timer_cb(void *opaque)
+{
+ BDRVQEDState *s = opaque;
+
+ /* The timer should only fire when allocating writes have drained */
+ assert(!QSIMPLEQ_FIRST(&s->allocating_write_reqs));
+
+ trace_qed_need_check_timer_cb(s);
+
+ qed_plug_allocating_write_reqs(s);
+
+ /* Ensure writes are on disk before clearing flag */
+ bdrv_aio_flush(s->bs, qed_clear_need_check, s);
+}
+
+static void qed_start_need_check_timer(BDRVQEDState *s)
+{
+ trace_qed_start_need_check_timer(s);
+
+ /* Use vm_clock so we don't alter the image file while suspended for
+ * migration.
+ */
+ qemu_mod_timer(s->need_check_timer, qemu_get_clock_ns(vm_clock) +
+ get_ticks_per_sec() * QED_NEED_CHECK_TIMEOUT);
+}
+
+/* It's okay to call this multiple times or when no timer is started */
+static void qed_cancel_need_check_timer(BDRVQEDState *s)
+{
+ trace_qed_cancel_need_check_timer(s);
+ qemu_del_timer(s->need_check_timer);
+}
+
+static void bdrv_qed_rebind(BlockDriverState *bs)
+{
+ BDRVQEDState *s = bs->opaque;
+ s->bs = bs;
+}
+
+static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags)
+{
+ BDRVQEDState *s = bs->opaque;
+ QEDHeader le_header;
+ int64_t file_size;
+ int ret;
+
+ s->bs = bs;
+ QSIMPLEQ_INIT(&s->allocating_write_reqs);
+
+ ret = bdrv_pread(bs->file, 0, &le_header, sizeof(le_header));
+ if (ret < 0) {
+ return ret;
+ }
+ qed_header_le_to_cpu(&le_header, &s->header);
+
+ if (s->header.magic != QED_MAGIC) {
+ return -EMEDIUMTYPE;
+ }
+ if (s->header.features & ~QED_FEATURE_MASK) {
+ /* image uses unsupported feature bits */
+ char buf[64];
+ snprintf(buf, sizeof(buf), "%" PRIx64,
+ s->header.features & ~QED_FEATURE_MASK);
+ qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
+ bs->device_name, "QED", buf);
+ return -ENOTSUP;
+ }
+ if (!qed_is_cluster_size_valid(s->header.cluster_size)) {
+ return -EINVAL;
+ }
+
+ /* Round down file size to the last cluster */
+ file_size = bdrv_getlength(bs->file);
+ if (file_size < 0) {
+ return file_size;
+ }
+ s->file_size = qed_start_of_cluster(s, file_size);
+
+ if (!qed_is_table_size_valid(s->header.table_size)) {
+ return -EINVAL;
+ }
+ if (!qed_is_image_size_valid(s->header.image_size,
+ s->header.cluster_size,
+ s->header.table_size)) {
+ return -EINVAL;
+ }
+ if (!qed_check_table_offset(s, s->header.l1_table_offset)) {
+ return -EINVAL;
+ }
+
+ s->table_nelems = (s->header.cluster_size * s->header.table_size) /
+ sizeof(uint64_t);
+ s->l2_shift = ffs(s->header.cluster_size) - 1;
+ s->l2_mask = s->table_nelems - 1;
+ s->l1_shift = s->l2_shift + ffs(s->table_nelems) - 1;
+
+ if ((s->header.features & QED_F_BACKING_FILE)) {
+ if ((uint64_t)s->header.backing_filename_offset +
+ s->header.backing_filename_size >
+ s->header.cluster_size * s->header.header_size) {
+ return -EINVAL;
+ }
+
+ ret = qed_read_string(bs->file, s->header.backing_filename_offset,
+ s->header.backing_filename_size, bs->backing_file,
+ sizeof(bs->backing_file));
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (s->header.features & QED_F_BACKING_FORMAT_NO_PROBE) {
+ pstrcpy(bs->backing_format, sizeof(bs->backing_format), "raw");
+ }
+ }
+
+ /* Reset unknown autoclear feature bits. This is a backwards
+ * compatibility mechanism that allows images to be opened by older
+ * programs, which "knock out" unknown feature bits. When an image is
+ * opened by a newer program again it can detect that the autoclear
+ * feature is no longer valid.
+ */
+ if ((s->header.autoclear_features & ~QED_AUTOCLEAR_FEATURE_MASK) != 0 &&
+ !bdrv_is_read_only(bs->file) && !(flags & BDRV_O_INCOMING)) {
+ s->header.autoclear_features &= QED_AUTOCLEAR_FEATURE_MASK;
+
+ ret = qed_write_header_sync(s);
+ if (ret) {
+ return ret;
+ }
+
+ /* From here on only known autoclear feature bits are valid */
+ bdrv_flush(bs->file);
+ }
+
+ s->l1_table = qed_alloc_table(s);
+ qed_init_l2_cache(&s->l2_cache);
+
+ ret = qed_read_l1_table_sync(s);
+ if (ret) {
+ goto out;
+ }
+
+ /* If image was not closed cleanly, check consistency */
+ if (!(flags & BDRV_O_CHECK) && (s->header.features & QED_F_NEED_CHECK)) {
+ /* Read-only images cannot be fixed. There is no risk of corruption
+ * since write operations are not possible. Therefore, allow
+ * potentially inconsistent images to be opened read-only. This can
+ * aid data recovery from an otherwise inconsistent image.
+ */
+ if (!bdrv_is_read_only(bs->file) &&
+ !(flags & BDRV_O_INCOMING)) {
+ BdrvCheckResult result = {0};
+
+ ret = qed_check(s, &result, true);
+ if (ret) {
+ goto out;
+ }
+ }
+ }
+
+ s->need_check_timer = qemu_new_timer_ns(vm_clock,
+ qed_need_check_timer_cb, s);
+
+out:
+ if (ret) {
+ qed_free_l2_cache(&s->l2_cache);
+ qemu_vfree(s->l1_table);
+ }
+ return ret;
+}
+
+/* We have nothing to do for QED reopen, stubs just return
+ * success */
+static int bdrv_qed_reopen_prepare(BDRVReopenState *state,
+ BlockReopenQueue *queue, Error **errp)
+{
+ return 0;
+}
+
+static void bdrv_qed_close(BlockDriverState *bs)
+{
+ BDRVQEDState *s = bs->opaque;
+
+ qed_cancel_need_check_timer(s);
+ qemu_free_timer(s->need_check_timer);
+
+ /* Ensure writes reach stable storage */
+ bdrv_flush(bs->file);
+
+ /* Clean shutdown, no check required on next open */
+ if (s->header.features & QED_F_NEED_CHECK) {
+ s->header.features &= ~QED_F_NEED_CHECK;
+ qed_write_header_sync(s);
+ }
+
+ qed_free_l2_cache(&s->l2_cache);
+ qemu_vfree(s->l1_table);
+}
+
+static int qed_create(const char *filename, uint32_t cluster_size,
+ uint64_t image_size, uint32_t table_size,
+ const char *backing_file, const char *backing_fmt)
+{
+ QEDHeader header = {
+ .magic = QED_MAGIC,
+ .cluster_size = cluster_size,
+ .table_size = table_size,
+ .header_size = 1,
+ .features = 0,
+ .compat_features = 0,
+ .l1_table_offset = cluster_size,
+ .image_size = image_size,
+ };
+ QEDHeader le_header;
+ uint8_t *l1_table = NULL;
+ size_t l1_size = header.cluster_size * header.table_size;
+ int ret = 0;
+ BlockDriverState *bs = NULL;
+
+ ret = bdrv_create_file(filename, NULL);
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR | BDRV_O_CACHE_WB);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* File must start empty and grow, check truncate is supported */
+ ret = bdrv_truncate(bs, 0);
+ if (ret < 0) {
+ goto out;
+ }
+
+ if (backing_file) {
+ header.features |= QED_F_BACKING_FILE;
+ header.backing_filename_offset = sizeof(le_header);
+ header.backing_filename_size = strlen(backing_file);
+
+ if (qed_fmt_is_raw(backing_fmt)) {
+ header.features |= QED_F_BACKING_FORMAT_NO_PROBE;
+ }
+ }
+
+ qed_header_cpu_to_le(&header, &le_header);
+ ret = bdrv_pwrite(bs, 0, &le_header, sizeof(le_header));
+ if (ret < 0) {
+ goto out;
+ }
+ ret = bdrv_pwrite(bs, sizeof(le_header), backing_file,
+ header.backing_filename_size);
+ if (ret < 0) {
+ goto out;
+ }
+
+ l1_table = g_malloc0(l1_size);
+ ret = bdrv_pwrite(bs, header.l1_table_offset, l1_table, l1_size);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = 0; /* success */
+out:
+ g_free(l1_table);
+ bdrv_delete(bs);
+ return ret;
+}
+
+static int bdrv_qed_create(const char *filename, QEMUOptionParameter *options)
+{
+ uint64_t image_size = 0;
+ uint32_t cluster_size = QED_DEFAULT_CLUSTER_SIZE;
+ uint32_t table_size = QED_DEFAULT_TABLE_SIZE;
+ const char *backing_file = NULL;
+ const char *backing_fmt = NULL;
+
+ while (options && options->name) {
+ if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
+ image_size = options->value.n;
+ } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
+ backing_file = options->value.s;
+ } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FMT)) {
+ backing_fmt = options->value.s;
+ } else if (!strcmp(options->name, BLOCK_OPT_CLUSTER_SIZE)) {
+ if (options->value.n) {
+ cluster_size = options->value.n;
+ }
+ } else if (!strcmp(options->name, BLOCK_OPT_TABLE_SIZE)) {
+ if (options->value.n) {
+ table_size = options->value.n;
+ }
+ }
+ options++;
+ }
+
+ if (!qed_is_cluster_size_valid(cluster_size)) {
+ fprintf(stderr, "QED cluster size must be within range [%u, %u] and power of 2\n",
+ QED_MIN_CLUSTER_SIZE, QED_MAX_CLUSTER_SIZE);
+ return -EINVAL;
+ }
+ if (!qed_is_table_size_valid(table_size)) {
+ fprintf(stderr, "QED table size must be within range [%u, %u] and power of 2\n",
+ QED_MIN_TABLE_SIZE, QED_MAX_TABLE_SIZE);
+ return -EINVAL;
+ }
+ if (!qed_is_image_size_valid(image_size, cluster_size, table_size)) {
+ fprintf(stderr, "QED image size must be a non-zero multiple of "
+ "cluster size and less than %" PRIu64 " bytes\n",
+ qed_max_image_size(cluster_size, table_size));
+ return -EINVAL;
+ }
+
+ return qed_create(filename, cluster_size, image_size, table_size,
+ backing_file, backing_fmt);
+}
+
+typedef struct {
+ Coroutine *co;
+ int is_allocated;
+ int *pnum;
+} QEDIsAllocatedCB;
+
+static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t len)
+{
+ QEDIsAllocatedCB *cb = opaque;
+ *cb->pnum = len / BDRV_SECTOR_SIZE;
+ cb->is_allocated = (ret == QED_CLUSTER_FOUND || ret == QED_CLUSTER_ZERO);
+ if (cb->co) {
+ qemu_coroutine_enter(cb->co, NULL);
+ }
+}
+
+static int coroutine_fn bdrv_qed_co_is_allocated(BlockDriverState *bs,
+ int64_t sector_num,
+ int nb_sectors, int *pnum)
+{
+ BDRVQEDState *s = bs->opaque;
+ uint64_t pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE;
+ size_t len = (size_t)nb_sectors * BDRV_SECTOR_SIZE;
+ QEDIsAllocatedCB cb = {
+ .is_allocated = -1,
+ .pnum = pnum,
+ };
+ QEDRequest request = { .l2_table = NULL };
+
+ qed_find_cluster(s, &request, pos, len, qed_is_allocated_cb, &cb);
+
+ /* Now sleep if the callback wasn't invoked immediately */
+ while (cb.is_allocated == -1) {
+ cb.co = qemu_coroutine_self();
+ qemu_coroutine_yield();
+ }
+
+ qed_unref_l2_cache_entry(request.l2_table);
+
+ return cb.is_allocated;
+}
+
+static int bdrv_qed_make_empty(BlockDriverState *bs)
+{
+ return -ENOTSUP;
+}
+
+static BDRVQEDState *acb_to_s(QEDAIOCB *acb)
+{
+ return acb->common.bs->opaque;
+}
+
+/**
+ * Read from the backing file or zero-fill if no backing file
+ *
+ * @s: QED state
+ * @pos: Byte position in device
+ * @qiov: Destination I/O vector
+ * @cb: Completion function
+ * @opaque: User data for completion function
+ *
+ * This function reads qiov->size bytes starting at pos from the backing file.
+ * If there is no backing file then zeroes are read.
+ */
+static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
+ QEMUIOVector *qiov,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ uint64_t backing_length = 0;
+ size_t size;
+
+ /* If there is a backing file, get its length. Treat the absence of a
+ * backing file like a zero length backing file.
+ */
+ if (s->bs->backing_hd) {
+ int64_t l = bdrv_getlength(s->bs->backing_hd);
+ if (l < 0) {
+ cb(opaque, l);
+ return;
+ }
+ backing_length = l;
+ }
+
+ /* Zero all sectors if reading beyond the end of the backing file */
+ if (pos >= backing_length ||
+ pos + qiov->size > backing_length) {
+ qemu_iovec_memset(qiov, 0, 0, qiov->size);
+ }
+
+ /* Complete now if there are no backing file sectors to read */
+ if (pos >= backing_length) {
+ cb(opaque, 0);
+ return;
+ }
+
+ /* If the read straddles the end of the backing file, shorten it */
+ size = MIN((uint64_t)backing_length - pos, qiov->size);
+
+ BLKDBG_EVENT(s->bs->file, BLKDBG_READ_BACKING_AIO);
+ bdrv_aio_readv(s->bs->backing_hd, pos / BDRV_SECTOR_SIZE,
+ qiov, size / BDRV_SECTOR_SIZE, cb, opaque);
+}
+
+typedef struct {
+ GenericCB gencb;
+ BDRVQEDState *s;
+ QEMUIOVector qiov;
+ struct iovec iov;
+ uint64_t offset;
+} CopyFromBackingFileCB;
+
+static void qed_copy_from_backing_file_cb(void *opaque, int ret)
+{
+ CopyFromBackingFileCB *copy_cb = opaque;
+ qemu_vfree(copy_cb->iov.iov_base);
+ gencb_complete(&copy_cb->gencb, ret);
+}
+
+static void qed_copy_from_backing_file_write(void *opaque, int ret)
+{
+ CopyFromBackingFileCB *copy_cb = opaque;
+ BDRVQEDState *s = copy_cb->s;
+
+ if (ret) {
+ qed_copy_from_backing_file_cb(copy_cb, ret);
+ return;
+ }
+
+ BLKDBG_EVENT(s->bs->file, BLKDBG_COW_WRITE);
+ bdrv_aio_writev(s->bs->file, copy_cb->offset / BDRV_SECTOR_SIZE,
+ &copy_cb->qiov, copy_cb->qiov.size / BDRV_SECTOR_SIZE,
+ qed_copy_from_backing_file_cb, copy_cb);
+}
+
+/**
+ * Copy data from backing file into the image
+ *
+ * @s: QED state
+ * @pos: Byte position in device
+ * @len: Number of bytes
+ * @offset: Byte offset in image file
+ * @cb: Completion function
+ * @opaque: User data for completion function
+ */
+static void qed_copy_from_backing_file(BDRVQEDState *s, uint64_t pos,
+ uint64_t len, uint64_t offset,
+ BlockDriverCompletionFunc *cb,
+ void *opaque)
+{
+ CopyFromBackingFileCB *copy_cb;
+
+ /* Skip copy entirely if there is no work to do */
+ if (len == 0) {
+ cb(opaque, 0);
+ return;
+ }
+
+ copy_cb = gencb_alloc(sizeof(*copy_cb), cb, opaque);
+ copy_cb->s = s;
+ copy_cb->offset = offset;
+ copy_cb->iov.iov_base = qemu_blockalign(s->bs, len);
+ copy_cb->iov.iov_len = len;
+ qemu_iovec_init_external(&copy_cb->qiov, &copy_cb->iov, 1);
+
+ qed_read_backing_file(s, pos, &copy_cb->qiov,
+ qed_copy_from_backing_file_write, copy_cb);
+}
+
+/**
+ * Link one or more contiguous clusters into a table
+ *
+ * @s: QED state
+ * @table: L2 table
+ * @index: First cluster index
+ * @n: Number of contiguous clusters
+ * @cluster: First cluster offset
+ *
+ * The cluster offset may be an allocated byte offset in the image file, the
+ * zero cluster marker, or the unallocated cluster marker.
+ */
+static void qed_update_l2_table(BDRVQEDState *s, QEDTable *table, int index,
+ unsigned int n, uint64_t cluster)
+{
+ int i;
+ for (i = index; i < index + n; i++) {
+ table->offsets[i] = cluster;
+ if (!qed_offset_is_unalloc_cluster(cluster) &&
+ !qed_offset_is_zero_cluster(cluster)) {
+ cluster += s->header.cluster_size;
+ }
+ }
+}
+
+static void qed_aio_complete_bh(void *opaque)
+{
+ QEDAIOCB *acb = opaque;
+ BlockDriverCompletionFunc *cb = acb->common.cb;
+ void *user_opaque = acb->common.opaque;
+ int ret = acb->bh_ret;
+ bool *finished = acb->finished;
+
+ qemu_bh_delete(acb->bh);
+ qemu_aio_release(acb);
+
+ /* Invoke callback */
+ cb(user_opaque, ret);
+
+ /* Signal cancel completion */
+ if (finished) {
+ *finished = true;
+ }
+}
+
+static void qed_aio_complete(QEDAIOCB *acb, int ret)
+{
+ BDRVQEDState *s = acb_to_s(acb);
+
+ trace_qed_aio_complete(s, acb, ret);
+
+ /* Free resources */
+ qemu_iovec_destroy(&acb->cur_qiov);
+ qed_unref_l2_cache_entry(acb->request.l2_table);
+
+ /* Free the buffer we may have allocated for zero writes */
+ if (acb->flags & QED_AIOCB_ZERO) {
+ qemu_vfree(acb->qiov->iov[0].iov_base);
+ acb->qiov->iov[0].iov_base = NULL;
+ }
+
+ /* Arrange for a bh to invoke the completion function */
+ acb->bh_ret = ret;
+ acb->bh = qemu_bh_new(qed_aio_complete_bh, acb);
+ qemu_bh_schedule(acb->bh);
+
+ /* Start next allocating write request waiting behind this one. Note that
+ * requests enqueue themselves when they first hit an unallocated cluster
+ * but they wait until the entire request is finished before waking up the
+ * next request in the queue. This ensures that we don't cycle through
+ * requests multiple times but rather finish one at a time completely.
+ */
+ if (acb == QSIMPLEQ_FIRST(&s->allocating_write_reqs)) {
+ QSIMPLEQ_REMOVE_HEAD(&s->allocating_write_reqs, next);
+ acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs);
+ if (acb) {
+ qed_aio_next_io(acb, 0);
+ } else if (s->header.features & QED_F_NEED_CHECK) {
+ qed_start_need_check_timer(s);
+ }
+ }
+}
+
+/**
+ * Commit the current L2 table to the cache
+ */
+static void qed_commit_l2_update(void *opaque, int ret)
+{
+ QEDAIOCB *acb = opaque;
+ BDRVQEDState *s = acb_to_s(acb);
+ CachedL2Table *l2_table = acb->request.l2_table;
+ uint64_t l2_offset = l2_table->offset;
+
+ qed_commit_l2_cache_entry(&s->l2_cache, l2_table);
+
+ /* This is guaranteed to succeed because we just committed the entry to the
+ * cache.
+ */
+ acb->request.l2_table = qed_find_l2_cache_entry(&s->l2_cache, l2_offset);
+ assert(acb->request.l2_table != NULL);
+
+ qed_aio_next_io(opaque, ret);
+}
+
+/**
+ * Update L1 table with new L2 table offset and write it out
+ */
+static void qed_aio_write_l1_update(void *opaque, int ret)
+{
+ QEDAIOCB *acb = opaque;
+ BDRVQEDState *s = acb_to_s(acb);
+ int index;
+
+ if (ret) {
+ qed_aio_complete(acb, ret);
+ return;
+ }
+
+ index = qed_l1_index(s, acb->cur_pos);
+ s->l1_table->offsets[index] = acb->request.l2_table->offset;
+
+ qed_write_l1_table(s, index, 1, qed_commit_l2_update, acb);
+}
+
+/**
+ * Update L2 table with new cluster offsets and write them out
+ */
+static void qed_aio_write_l2_update(QEDAIOCB *acb, int ret, uint64_t offset)
+{
+ BDRVQEDState *s = acb_to_s(acb);
+ bool need_alloc = acb->find_cluster_ret == QED_CLUSTER_L1;
+ int index;
+
+ if (ret) {
+ goto err;
+ }
+
+ if (need_alloc) {
+ qed_unref_l2_cache_entry(acb->request.l2_table);
+ acb->request.l2_table = qed_new_l2_table(s);
+ }
+
+ index = qed_l2_index(s, acb->cur_pos);
+ qed_update_l2_table(s, acb->request.l2_table->table, index, acb->cur_nclusters,
+ offset);
+
+ if (need_alloc) {
+ /* Write out the whole new L2 table */
+ qed_write_l2_table(s, &acb->request, 0, s->table_nelems, true,
+ qed_aio_write_l1_update, acb);
+ } else {
+ /* Write out only the updated part of the L2 table */
+ qed_write_l2_table(s, &acb->request, index, acb->cur_nclusters, false,
+ qed_aio_next_io, acb);
+ }
+ return;
+
+err:
+ qed_aio_complete(acb, ret);
+}
+
+static void qed_aio_write_l2_update_cb(void *opaque, int ret)
+{
+ QEDAIOCB *acb = opaque;
+ qed_aio_write_l2_update(acb, ret, acb->cur_cluster);
+}
+
+/**
+ * Flush new data clusters before updating the L2 table
+ *
+ * This flush is necessary when a backing file is in use. A crash during an
+ * allocating write could result in empty clusters in the image. If the write
+ * only touched a subregion of the cluster, then backing image sectors have
+ * been lost in the untouched region. The solution is to flush after writing a
+ * new data cluster and before updating the L2 table.
+ */
+static void qed_aio_write_flush_before_l2_update(void *opaque, int ret)
+{
+ QEDAIOCB *acb = opaque;
+ BDRVQEDState *s = acb_to_s(acb);
+
+ if (!bdrv_aio_flush(s->bs->file, qed_aio_write_l2_update_cb, opaque)) {
+ qed_aio_complete(acb, -EIO);
+ }
+}
+
+/**
+ * Write data to the image file
+ */
+static void qed_aio_write_main(void *opaque, int ret)
+{
+ QEDAIOCB *acb = opaque;
+ BDRVQEDState *s = acb_to_s(acb);
+ uint64_t offset = acb->cur_cluster +
+ qed_offset_into_cluster(s, acb->cur_pos);
+ BlockDriverCompletionFunc *next_fn;
+
+ trace_qed_aio_write_main(s, acb, ret, offset, acb->cur_qiov.size);
+
+ if (ret) {
+ qed_aio_complete(acb, ret);
+ return;
+ }
+
+ if (acb->find_cluster_ret == QED_CLUSTER_FOUND) {
+ next_fn = qed_aio_next_io;
+ } else {
+ if (s->bs->backing_hd) {
+ next_fn = qed_aio_write_flush_before_l2_update;
+ } else {
+ next_fn = qed_aio_write_l2_update_cb;
+ }
+ }
+
+ BLKDBG_EVENT(s->bs->file, BLKDBG_WRITE_AIO);
+ bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE,
+ &acb->cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE,
+ next_fn, acb);
+}
+
+/**
+ * Populate back untouched region of new data cluster
+ */
+static void qed_aio_write_postfill(void *opaque, int ret)
+{
+ QEDAIOCB *acb = opaque;
+ BDRVQEDState *s = acb_to_s(acb);
+ uint64_t start = acb->cur_pos + acb->cur_qiov.size;
+ uint64_t len =
+ qed_start_of_cluster(s, start + s->header.cluster_size - 1) - start;
+ uint64_t offset = acb->cur_cluster +
+ qed_offset_into_cluster(s, acb->cur_pos) +
+ acb->cur_qiov.size;
+
+ if (ret) {
+ qed_aio_complete(acb, ret);
+ return;
+ }
+
+ trace_qed_aio_write_postfill(s, acb, start, len, offset);
+ qed_copy_from_backing_file(s, start, len, offset,
+ qed_aio_write_main, acb);
+}
+
+/**
+ * Populate front untouched region of new data cluster
+ */
+static void qed_aio_write_prefill(void *opaque, int ret)
+{
+ QEDAIOCB *acb = opaque;
+ BDRVQEDState *s = acb_to_s(acb);
+ uint64_t start = qed_start_of_cluster(s, acb->cur_pos);
+ uint64_t len = qed_offset_into_cluster(s, acb->cur_pos);
+
+ trace_qed_aio_write_prefill(s, acb, start, len, acb->cur_cluster);
+ qed_copy_from_backing_file(s, start, len, acb->cur_cluster,
+ qed_aio_write_postfill, acb);
+}
+
+/**
+ * Check if the QED_F_NEED_CHECK bit should be set during allocating write
+ */
+static bool qed_should_set_need_check(BDRVQEDState *s)
+{
+ /* The flush before L2 update path ensures consistency */
+ if (s->bs->backing_hd) {
+ return false;
+ }
+
+ return !(s->header.features & QED_F_NEED_CHECK);
+}
+
+static void qed_aio_write_zero_cluster(void *opaque, int ret)
+{
+ QEDAIOCB *acb = opaque;
+
+ if (ret) {
+ qed_aio_complete(acb, ret);
+ return;
+ }
+
+ qed_aio_write_l2_update(acb, 0, 1);
+}
+
+/**
+ * Write new data cluster
+ *
+ * @acb: Write request
+ * @len: Length in bytes
+ *
+ * This path is taken when writing to previously unallocated clusters.
+ */
+static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
+{
+ BDRVQEDState *s = acb_to_s(acb);
+ BlockDriverCompletionFunc *cb;
+
+ /* Cancel timer when the first allocating request comes in */
+ if (QSIMPLEQ_EMPTY(&s->allocating_write_reqs)) {
+ qed_cancel_need_check_timer(s);
+ }
+
+ /* Freeze this request if another allocating write is in progress */
+ if (acb != QSIMPLEQ_FIRST(&s->allocating_write_reqs)) {
+ QSIMPLEQ_INSERT_TAIL(&s->allocating_write_reqs, acb, next);
+ }
+ if (acb != QSIMPLEQ_FIRST(&s->allocating_write_reqs) ||
+ s->allocating_write_reqs_plugged) {
+ return; /* wait for existing request to finish */
+ }
+
+ acb->cur_nclusters = qed_bytes_to_clusters(s,
+ qed_offset_into_cluster(s, acb->cur_pos) + len);
+ qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);
+
+ if (acb->flags & QED_AIOCB_ZERO) {
+ /* Skip ahead if the clusters are already zero */
+ if (acb->find_cluster_ret == QED_CLUSTER_ZERO) {
+ qed_aio_next_io(acb, 0);
+ return;
+ }
+
+ cb = qed_aio_write_zero_cluster;
+ } else {
+ cb = qed_aio_write_prefill;
+ acb->cur_cluster = qed_alloc_clusters(s, acb->cur_nclusters);
+ }
+
+ if (qed_should_set_need_check(s)) {
+ s->header.features |= QED_F_NEED_CHECK;
+ qed_write_header(s, cb, acb);
+ } else {
+ cb(acb, 0);
+ }
+}
+
+/**
+ * Write data cluster in place
+ *
+ * @acb: Write request
+ * @offset: Cluster offset in bytes
+ * @len: Length in bytes
+ *
+ * This path is taken when writing to already allocated clusters.
+ */
+static void qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, size_t len)
+{
+ /* Allocate buffer for zero writes */
+ if (acb->flags & QED_AIOCB_ZERO) {
+ struct iovec *iov = acb->qiov->iov;
+
+ if (!iov->iov_base) {
+ iov->iov_base = qemu_blockalign(acb->common.bs, iov->iov_len);
+ memset(iov->iov_base, 0, iov->iov_len);
+ }
+ }
+
+ /* Calculate the I/O vector */
+ acb->cur_cluster = offset;
+ qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);
+
+ /* Do the actual write */
+ qed_aio_write_main(acb, 0);
+}
+
+/**
+ * Write data cluster
+ *
+ * @opaque: Write request
+ * @ret: QED_CLUSTER_FOUND, QED_CLUSTER_L2, QED_CLUSTER_L1,
+ * or -errno
+ * @offset: Cluster offset in bytes
+ * @len: Length in bytes
+ *
+ * Callback from qed_find_cluster().
+ */
+static void qed_aio_write_data(void *opaque, int ret,
+ uint64_t offset, size_t len)
+{
+ QEDAIOCB *acb = opaque;
+
+ trace_qed_aio_write_data(acb_to_s(acb), acb, ret, offset, len);
+
+ acb->find_cluster_ret = ret;
+
+ switch (ret) {
+ case QED_CLUSTER_FOUND:
+ qed_aio_write_inplace(acb, offset, len);
+ break;
+
+ case QED_CLUSTER_L2:
+ case QED_CLUSTER_L1:
+ case QED_CLUSTER_ZERO:
+ qed_aio_write_alloc(acb, len);
+ break;
+
+ default:
+ qed_aio_complete(acb, ret);
+ break;
+ }
+}
+
+/**
+ * Read data cluster
+ *
+ * @opaque: Read request
+ * @ret: QED_CLUSTER_FOUND, QED_CLUSTER_L2, QED_CLUSTER_L1,
+ * or -errno
+ * @offset: Cluster offset in bytes
+ * @len: Length in bytes
+ *
+ * Callback from qed_find_cluster().
+ */
+static void qed_aio_read_data(void *opaque, int ret,
+ uint64_t offset, size_t len)
+{
+ QEDAIOCB *acb = opaque;
+ BDRVQEDState *s = acb_to_s(acb);
+ BlockDriverState *bs = acb->common.bs;
+
+ /* Adjust offset into cluster */
+ offset += qed_offset_into_cluster(s, acb->cur_pos);
+
+ trace_qed_aio_read_data(s, acb, ret, offset, len);
+
+ if (ret < 0) {
+ goto err;
+ }
+
+ qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);
+
+ /* Handle zero cluster and backing file reads */
+ if (ret == QED_CLUSTER_ZERO) {
+ qemu_iovec_memset(&acb->cur_qiov, 0, 0, acb->cur_qiov.size);
+ qed_aio_next_io(acb, 0);
+ return;
+ } else if (ret != QED_CLUSTER_FOUND) {
+ qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov,
+ qed_aio_next_io, acb);
+ return;
+ }
+
+ BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
+ bdrv_aio_readv(bs->file, offset / BDRV_SECTOR_SIZE,
+ &acb->cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE,
+ qed_aio_next_io, acb);
+ return;
+
+err:
+ qed_aio_complete(acb, ret);
+}
+
+/**
+ * Begin next I/O or complete the request
+ */
+static void qed_aio_next_io(void *opaque, int ret)
+{
+ QEDAIOCB *acb = opaque;
+ BDRVQEDState *s = acb_to_s(acb);
+ QEDFindClusterFunc *io_fn = (acb->flags & QED_AIOCB_WRITE) ?
+ qed_aio_write_data : qed_aio_read_data;
+
+ trace_qed_aio_next_io(s, acb, ret, acb->cur_pos + acb->cur_qiov.size);
+
+ /* Handle I/O error */
+ if (ret) {
+ qed_aio_complete(acb, ret);
+ return;
+ }
+
+ acb->qiov_offset += acb->cur_qiov.size;
+ acb->cur_pos += acb->cur_qiov.size;
+ qemu_iovec_reset(&acb->cur_qiov);
+
+ /* Complete request */
+ if (acb->cur_pos >= acb->end_pos) {
+ qed_aio_complete(acb, 0);
+ return;
+ }
+
+ /* Find next cluster and start I/O */
+ qed_find_cluster(s, &acb->request,
+ acb->cur_pos, acb->end_pos - acb->cur_pos,
+ io_fn, acb);
+}
+
+static BlockDriverAIOCB *qed_aio_setup(BlockDriverState *bs,
+ int64_t sector_num,
+ QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb,
+ void *opaque, int flags)
+{
+ QEDAIOCB *acb = qemu_aio_get(&qed_aiocb_info, bs, cb, opaque);
+
+ trace_qed_aio_setup(bs->opaque, acb, sector_num, nb_sectors,
+ opaque, flags);
+
+ acb->flags = flags;
+ acb->finished = NULL;
+ acb->qiov = qiov;
+ acb->qiov_offset = 0;
+ acb->cur_pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE;
+ acb->end_pos = acb->cur_pos + nb_sectors * BDRV_SECTOR_SIZE;
+ acb->request.l2_table = NULL;
+ qemu_iovec_init(&acb->cur_qiov, qiov->niov);
+
+ /* Start request */
+ qed_aio_next_io(acb, 0);
+ return &acb->common;
+}
+
+static BlockDriverAIOCB *bdrv_qed_aio_readv(BlockDriverState *bs,
+ int64_t sector_num,
+ QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb,
+ void *opaque)
+{
+ return qed_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
+}
+
+static BlockDriverAIOCB *bdrv_qed_aio_writev(BlockDriverState *bs,
+ int64_t sector_num,
+ QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb,
+ void *opaque)
+{
+ return qed_aio_setup(bs, sector_num, qiov, nb_sectors, cb,
+ opaque, QED_AIOCB_WRITE);
+}
+
+typedef struct {
+ Coroutine *co;
+ int ret;
+ bool done;
+} QEDWriteZeroesCB;
+
+static void coroutine_fn qed_co_write_zeroes_cb(void *opaque, int ret)
+{
+ QEDWriteZeroesCB *cb = opaque;
+
+ cb->done = true;
+ cb->ret = ret;
+ if (cb->co) {
+ qemu_coroutine_enter(cb->co, NULL);
+ }
+}
+
+static int coroutine_fn bdrv_qed_co_write_zeroes(BlockDriverState *bs,
+ int64_t sector_num,
+ int nb_sectors)
+{
+ BlockDriverAIOCB *blockacb;
+ BDRVQEDState *s = bs->opaque;
+ QEDWriteZeroesCB cb = { .done = false };
+ QEMUIOVector qiov;
+ struct iovec iov;
+
+ /* Refuse if there are untouched backing file sectors */
+ if (bs->backing_hd) {
+ if (qed_offset_into_cluster(s, sector_num * BDRV_SECTOR_SIZE) != 0) {
+ return -ENOTSUP;
+ }
+ if (qed_offset_into_cluster(s, nb_sectors * BDRV_SECTOR_SIZE) != 0) {
+ return -ENOTSUP;
+ }
+ }
+
+ /* Zero writes start without an I/O buffer. If a buffer becomes necessary
+ * then it will be allocated during request processing.
+ */
+ iov.iov_base = NULL,
+ iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE,
+
+ qemu_iovec_init_external(&qiov, &iov, 1);
+ blockacb = qed_aio_setup(bs, sector_num, &qiov, nb_sectors,
+ qed_co_write_zeroes_cb, &cb,
+ QED_AIOCB_WRITE | QED_AIOCB_ZERO);
+ if (!blockacb) {
+ return -EIO;
+ }
+ if (!cb.done) {
+ cb.co = qemu_coroutine_self();
+ qemu_coroutine_yield();
+ }
+ assert(cb.done);
+ return cb.ret;
+}
+
+static int bdrv_qed_truncate(BlockDriverState *bs, int64_t offset)
+{
+ BDRVQEDState *s = bs->opaque;
+ uint64_t old_image_size;
+ int ret;
+
+ if (!qed_is_image_size_valid(offset, s->header.cluster_size,
+ s->header.table_size)) {
+ return -EINVAL;
+ }
+
+ /* Shrinking is currently not supported */
+ if ((uint64_t)offset < s->header.image_size) {
+ return -ENOTSUP;
+ }
+
+ old_image_size = s->header.image_size;
+ s->header.image_size = offset;
+ ret = qed_write_header_sync(s);
+ if (ret < 0) {
+ s->header.image_size = old_image_size;
+ }
+ return ret;
+}
+
+static int64_t bdrv_qed_getlength(BlockDriverState *bs)
+{
+ BDRVQEDState *s = bs->opaque;
+ return s->header.image_size;
+}
+
+static int bdrv_qed_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+{
+ BDRVQEDState *s = bs->opaque;
+
+ memset(bdi, 0, sizeof(*bdi));
+ bdi->cluster_size = s->header.cluster_size;
+ bdi->is_dirty = s->header.features & QED_F_NEED_CHECK;
+ return 0;
+}
+
+static int bdrv_qed_change_backing_file(BlockDriverState *bs,
+ const char *backing_file,
+ const char *backing_fmt)
+{
+ BDRVQEDState *s = bs->opaque;
+ QEDHeader new_header, le_header;
+ void *buffer;
+ size_t buffer_len, backing_file_len;
+ int ret;
+
+ /* Refuse to set backing filename if unknown compat feature bits are
+ * active. If the image uses an unknown compat feature then we may not
+ * know the layout of data following the header structure and cannot safely
+ * add a new string.
+ */
+ if (backing_file && (s->header.compat_features &
+ ~QED_COMPAT_FEATURE_MASK)) {
+ return -ENOTSUP;
+ }
+
+ memcpy(&new_header, &s->header, sizeof(new_header));
+
+ new_header.features &= ~(QED_F_BACKING_FILE |
+ QED_F_BACKING_FORMAT_NO_PROBE);
+
+ /* Adjust feature flags */
+ if (backing_file) {
+ new_header.features |= QED_F_BACKING_FILE;
+
+ if (qed_fmt_is_raw(backing_fmt)) {
+ new_header.features |= QED_F_BACKING_FORMAT_NO_PROBE;
+ }
+ }
+
+ /* Calculate new header size */
+ backing_file_len = 0;
+
+ if (backing_file) {
+ backing_file_len = strlen(backing_file);
+ }
+
+ buffer_len = sizeof(new_header);
+ new_header.backing_filename_offset = buffer_len;
+ new_header.backing_filename_size = backing_file_len;
+ buffer_len += backing_file_len;
+
+ /* Make sure we can rewrite header without failing */
+ if (buffer_len > new_header.header_size * new_header.cluster_size) {
+ return -ENOSPC;
+ }
+
+ /* Prepare new header */
+ buffer = g_malloc(buffer_len);
+
+ qed_header_cpu_to_le(&new_header, &le_header);
+ memcpy(buffer, &le_header, sizeof(le_header));
+ buffer_len = sizeof(le_header);
+
+ if (backing_file) {
+ memcpy(buffer + buffer_len, backing_file, backing_file_len);
+ buffer_len += backing_file_len;
+ }
+
+ /* Write new header */
+ ret = bdrv_pwrite_sync(bs->file, 0, buffer, buffer_len);
+ g_free(buffer);
+ if (ret == 0) {
+ memcpy(&s->header, &new_header, sizeof(new_header));
+ }
+ return ret;
+}
+
+static void bdrv_qed_invalidate_cache(BlockDriverState *bs)
+{
+ BDRVQEDState *s = bs->opaque;
+
+ bdrv_qed_close(bs);
+ memset(s, 0, sizeof(BDRVQEDState));
+ bdrv_qed_open(bs, NULL, bs->open_flags);
+}
+
+static int bdrv_qed_check(BlockDriverState *bs, BdrvCheckResult *result,
+ BdrvCheckMode fix)
+{
+ BDRVQEDState *s = bs->opaque;
+
+ return qed_check(s, result, !!fix);
+}
+
+static QEMUOptionParameter qed_create_options[] = {
+ {
+ .name = BLOCK_OPT_SIZE,
+ .type = OPT_SIZE,
+ .help = "Virtual disk size (in bytes)"
+ }, {
+ .name = BLOCK_OPT_BACKING_FILE,
+ .type = OPT_STRING,
+ .help = "File name of a base image"
+ }, {
+ .name = BLOCK_OPT_BACKING_FMT,
+ .type = OPT_STRING,
+ .help = "Image format of the base image"
+ }, {
+ .name = BLOCK_OPT_CLUSTER_SIZE,
+ .type = OPT_SIZE,
+ .help = "Cluster size (in bytes)",
+ .value = { .n = QED_DEFAULT_CLUSTER_SIZE },
+ }, {
+ .name = BLOCK_OPT_TABLE_SIZE,
+ .type = OPT_SIZE,
+ .help = "L1/L2 table size (in clusters)"
+ },
+ { /* end of list */ }
+};
+
+static BlockDriver bdrv_qed = {
+ .format_name = "qed",
+ .instance_size = sizeof(BDRVQEDState),
+ .create_options = qed_create_options,
+
+ .bdrv_probe = bdrv_qed_probe,
+ .bdrv_rebind = bdrv_qed_rebind,
+ .bdrv_open = bdrv_qed_open,
+ .bdrv_close = bdrv_qed_close,
+ .bdrv_reopen_prepare = bdrv_qed_reopen_prepare,
+ .bdrv_create = bdrv_qed_create,
+ .bdrv_has_zero_init = bdrv_has_zero_init_1,
+ .bdrv_co_is_allocated = bdrv_qed_co_is_allocated,
+ .bdrv_make_empty = bdrv_qed_make_empty,
+ .bdrv_aio_readv = bdrv_qed_aio_readv,
+ .bdrv_aio_writev = bdrv_qed_aio_writev,
+ .bdrv_co_write_zeroes = bdrv_qed_co_write_zeroes,
+ .bdrv_truncate = bdrv_qed_truncate,
+ .bdrv_getlength = bdrv_qed_getlength,
+ .bdrv_get_info = bdrv_qed_get_info,
+ .bdrv_change_backing_file = bdrv_qed_change_backing_file,
+ .bdrv_invalidate_cache = bdrv_qed_invalidate_cache,
+ .bdrv_check = bdrv_qed_check,
+};
+
+static void bdrv_qed_init(void)
+{
+ bdrv_register(&bdrv_qed);
+}
+
+block_init(bdrv_qed_init);
diff --git a/contrib/qemu/block/qed.h b/contrib/qemu/block/qed.h
new file mode 100644
index 000000000..2b4ddedf3
--- /dev/null
+++ b/contrib/qemu/block/qed.h
@@ -0,0 +1,344 @@
+/*
+ * QEMU Enhanced Disk Format
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * Authors:
+ * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#ifndef BLOCK_QED_H
+#define BLOCK_QED_H
+
+#include "block/block_int.h"
+
+/* The layout of a QED file is as follows:
+ *
+ * +--------+----------+----------+----------+-----+
+ * | header | L1 table | cluster0 | cluster1 | ... |
+ * +--------+----------+----------+----------+-----+
+ *
+ * There is a 2-level pagetable for cluster allocation:
+ *
+ * +----------+
+ * | L1 table |
+ * +----------+
+ * ,------' | '------.
+ * +----------+ | +----------+
+ * | L2 table | ... | L2 table |
+ * +----------+ +----------+
+ * ,------' | '------.
+ * +----------+ | +----------+
+ * | Data | ... | Data |
+ * +----------+ +----------+
+ *
+ * The L1 table is fixed size and always present. L2 tables are allocated on
+ * demand. The L1 table size determines the maximum possible image size; it
+ * can be influenced using the cluster_size and table_size values.
+ *
+ * All fields are little-endian on disk.
+ */
+
+enum {
+ QED_MAGIC = 'Q' | 'E' << 8 | 'D' << 16 | '\0' << 24,
+
+ /* The image supports a backing file */
+ QED_F_BACKING_FILE = 0x01,
+
+ /* The image needs a consistency check before use */
+ QED_F_NEED_CHECK = 0x02,
+
+ /* The backing file format must not be probed, treat as raw image */
+ QED_F_BACKING_FORMAT_NO_PROBE = 0x04,
+
+ /* Feature bits must be used when the on-disk format changes */
+ QED_FEATURE_MASK = QED_F_BACKING_FILE | /* supported feature bits */
+ QED_F_NEED_CHECK |
+ QED_F_BACKING_FORMAT_NO_PROBE,
+ QED_COMPAT_FEATURE_MASK = 0, /* supported compat feature bits */
+ QED_AUTOCLEAR_FEATURE_MASK = 0, /* supported autoclear feature bits */
+
+ /* Data is stored in groups of sectors called clusters. Cluster size must
+ * be large to avoid keeping too much metadata. I/O requests that have
+ * sub-cluster size will require read-modify-write.
+ */
+ QED_MIN_CLUSTER_SIZE = 4 * 1024, /* in bytes */
+ QED_MAX_CLUSTER_SIZE = 64 * 1024 * 1024,
+ QED_DEFAULT_CLUSTER_SIZE = 64 * 1024,
+
+ /* Allocated clusters are tracked using a 2-level pagetable. Table size is
+ * a multiple of clusters so large maximum image sizes can be supported
+ * without jacking up the cluster size too much.
+ */
+ QED_MIN_TABLE_SIZE = 1, /* in clusters */
+ QED_MAX_TABLE_SIZE = 16,
+ QED_DEFAULT_TABLE_SIZE = 4,
+
+ /* Delay to flush and clean image after last allocating write completes */
+ QED_NEED_CHECK_TIMEOUT = 5, /* in seconds */
+};
+
+typedef struct {
+ uint32_t magic; /* QED\0 */
+
+ uint32_t cluster_size; /* in bytes */
+ uint32_t table_size; /* for L1 and L2 tables, in clusters */
+ uint32_t header_size; /* in clusters */
+
+ uint64_t features; /* format feature bits */
+ uint64_t compat_features; /* compatible feature bits */
+ uint64_t autoclear_features; /* self-resetting feature bits */
+
+ uint64_t l1_table_offset; /* in bytes */
+ uint64_t image_size; /* total logical image size, in bytes */
+
+ /* if (features & QED_F_BACKING_FILE) */
+ uint32_t backing_filename_offset; /* in bytes from start of header */
+ uint32_t backing_filename_size; /* in bytes */
+} QEDHeader;
+
+typedef struct {
+ uint64_t offsets[0]; /* in bytes */
+} QEDTable;
+
+/* The L2 cache is a simple write-through cache for L2 structures */
+typedef struct CachedL2Table {
+ QEDTable *table;
+ uint64_t offset; /* offset=0 indicates an invalidate entry */
+ QTAILQ_ENTRY(CachedL2Table) node;
+ int ref;
+} CachedL2Table;
+
+typedef struct {
+ QTAILQ_HEAD(, CachedL2Table) entries;
+ unsigned int n_entries;
+} L2TableCache;
+
+typedef struct QEDRequest {
+ CachedL2Table *l2_table;
+} QEDRequest;
+
+enum {
+ QED_AIOCB_WRITE = 0x0001, /* read or write? */
+ QED_AIOCB_ZERO = 0x0002, /* zero write, used with QED_AIOCB_WRITE */
+};
+
+typedef struct QEDAIOCB {
+ BlockDriverAIOCB common;
+ QEMUBH *bh;
+ int bh_ret; /* final return status for completion bh */
+ QSIMPLEQ_ENTRY(QEDAIOCB) next; /* next request */
+ int flags; /* QED_AIOCB_* bits ORed together */
+ bool *finished; /* signal for cancel completion */
+ uint64_t end_pos; /* request end on block device, in bytes */
+
+ /* User scatter-gather list */
+ QEMUIOVector *qiov;
+ size_t qiov_offset; /* byte count already processed */
+
+ /* Current cluster scatter-gather list */
+ QEMUIOVector cur_qiov;
+ uint64_t cur_pos; /* position on block device, in bytes */
+ uint64_t cur_cluster; /* cluster offset in image file */
+ unsigned int cur_nclusters; /* number of clusters being accessed */
+ int find_cluster_ret; /* used for L1/L2 update */
+
+ QEDRequest request;
+} QEDAIOCB;
+
+typedef struct {
+ BlockDriverState *bs; /* device */
+ uint64_t file_size; /* length of image file, in bytes */
+
+ QEDHeader header; /* always cpu-endian */
+ QEDTable *l1_table;
+ L2TableCache l2_cache; /* l2 table cache */
+ uint32_t table_nelems;
+ uint32_t l1_shift;
+ uint32_t l2_shift;
+ uint32_t l2_mask;
+
+ /* Allocating write request queue */
+ QSIMPLEQ_HEAD(, QEDAIOCB) allocating_write_reqs;
+ bool allocating_write_reqs_plugged;
+
+ /* Periodic flush and clear need check flag */
+ QEMUTimer *need_check_timer;
+} BDRVQEDState;
+
+enum {
+ QED_CLUSTER_FOUND, /* cluster found */
+ QED_CLUSTER_ZERO, /* zero cluster found */
+ QED_CLUSTER_L2, /* cluster missing in L2 */
+ QED_CLUSTER_L1, /* cluster missing in L1 */
+};
+
+/**
+ * qed_find_cluster() completion callback
+ *
+ * @opaque: User data for completion callback
+ * @ret: QED_CLUSTER_FOUND Success
+ * QED_CLUSTER_L2 Data cluster unallocated in L2
+ * QED_CLUSTER_L1 L2 unallocated in L1
+ * -errno POSIX error occurred
+ * @offset: Data cluster offset
+ * @len: Contiguous bytes starting from cluster offset
+ *
+ * This function is invoked when qed_find_cluster() completes.
+ *
+ * On success ret is QED_CLUSTER_FOUND and offset/len are a contiguous range
+ * in the image file.
+ *
+ * On failure ret is QED_CLUSTER_L2 or QED_CLUSTER_L1 for missing L2 or L1
+ * table offset, respectively. len is number of contiguous unallocated bytes.
+ */
+typedef void QEDFindClusterFunc(void *opaque, int ret, uint64_t offset, size_t len);
+
+/**
+ * Generic callback for chaining async callbacks
+ */
+typedef struct {
+ BlockDriverCompletionFunc *cb;
+ void *opaque;
+} GenericCB;
+
+void *gencb_alloc(size_t len, BlockDriverCompletionFunc *cb, void *opaque);
+void gencb_complete(void *opaque, int ret);
+
+/**
+ * Header functions
+ */
+int qed_write_header_sync(BDRVQEDState *s);
+
+/**
+ * L2 cache functions
+ */
+void qed_init_l2_cache(L2TableCache *l2_cache);
+void qed_free_l2_cache(L2TableCache *l2_cache);
+CachedL2Table *qed_alloc_l2_cache_entry(L2TableCache *l2_cache);
+void qed_unref_l2_cache_entry(CachedL2Table *entry);
+CachedL2Table *qed_find_l2_cache_entry(L2TableCache *l2_cache, uint64_t offset);
+void qed_commit_l2_cache_entry(L2TableCache *l2_cache, CachedL2Table *l2_table);
+
+/**
+ * Table I/O functions
+ */
+int qed_read_l1_table_sync(BDRVQEDState *s);
+void qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n,
+ BlockDriverCompletionFunc *cb, void *opaque);
+int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index,
+ unsigned int n);
+int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
+ uint64_t offset);
+void qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset,
+ BlockDriverCompletionFunc *cb, void *opaque);
+void qed_write_l2_table(BDRVQEDState *s, QEDRequest *request,
+ unsigned int index, unsigned int n, bool flush,
+ BlockDriverCompletionFunc *cb, void *opaque);
+int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
+ unsigned int index, unsigned int n, bool flush);
+
+/**
+ * Cluster functions
+ */
+void qed_find_cluster(BDRVQEDState *s, QEDRequest *request, uint64_t pos,
+ size_t len, QEDFindClusterFunc *cb, void *opaque);
+
+/**
+ * Consistency check
+ */
+int qed_check(BDRVQEDState *s, BdrvCheckResult *result, bool fix);
+
+QEDTable *qed_alloc_table(BDRVQEDState *s);
+
+/**
+ * Round down to the start of a cluster
+ */
+static inline uint64_t qed_start_of_cluster(BDRVQEDState *s, uint64_t offset)
+{
+ return offset & ~(uint64_t)(s->header.cluster_size - 1);
+}
+
+static inline uint64_t qed_offset_into_cluster(BDRVQEDState *s, uint64_t offset)
+{
+ return offset & (s->header.cluster_size - 1);
+}
+
+static inline uint64_t qed_bytes_to_clusters(BDRVQEDState *s, uint64_t bytes)
+{
+ return qed_start_of_cluster(s, bytes + (s->header.cluster_size - 1)) /
+ (s->header.cluster_size - 1);
+}
+
+static inline unsigned int qed_l1_index(BDRVQEDState *s, uint64_t pos)
+{
+ return pos >> s->l1_shift;
+}
+
+static inline unsigned int qed_l2_index(BDRVQEDState *s, uint64_t pos)
+{
+ return (pos >> s->l2_shift) & s->l2_mask;
+}
+
+/**
+ * Test if a cluster offset is valid
+ */
+static inline bool qed_check_cluster_offset(BDRVQEDState *s, uint64_t offset)
+{
+ uint64_t header_size = (uint64_t)s->header.header_size *
+ s->header.cluster_size;
+
+ if (offset & (s->header.cluster_size - 1)) {
+ return false;
+ }
+ return offset >= header_size && offset < s->file_size;
+}
+
+/**
+ * Test if a table offset is valid
+ */
+static inline bool qed_check_table_offset(BDRVQEDState *s, uint64_t offset)
+{
+ uint64_t end_offset = offset + (s->header.table_size - 1) *
+ s->header.cluster_size;
+
+ /* Overflow check */
+ if (end_offset <= offset) {
+ return false;
+ }
+
+ return qed_check_cluster_offset(s, offset) &&
+ qed_check_cluster_offset(s, end_offset);
+}
+
+static inline bool qed_offset_is_cluster_aligned(BDRVQEDState *s,
+ uint64_t offset)
+{
+ if (qed_offset_into_cluster(s, offset)) {
+ return false;
+ }
+ return true;
+}
+
+static inline bool qed_offset_is_unalloc_cluster(uint64_t offset)
+{
+ if (offset == 0) {
+ return true;
+ }
+ return false;
+}
+
+static inline bool qed_offset_is_zero_cluster(uint64_t offset)
+{
+ if (offset == 1) {
+ return true;
+ }
+ return false;
+}
+
+#endif /* BLOCK_QED_H */
diff --git a/contrib/qemu/block/snapshot.c b/contrib/qemu/block/snapshot.c
new file mode 100644
index 000000000..6c6d9deea
--- /dev/null
+++ b/contrib/qemu/block/snapshot.c
@@ -0,0 +1,157 @@
+/*
+ * Block layer snapshot related functions
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "block/snapshot.h"
+#include "block/block_int.h"
+
+int bdrv_snapshot_find(BlockDriverState *bs, QEMUSnapshotInfo *sn_info,
+ const char *name)
+{
+ QEMUSnapshotInfo *sn_tab, *sn;
+ int nb_sns, i, ret;
+
+ ret = -ENOENT;
+ nb_sns = bdrv_snapshot_list(bs, &sn_tab);
+ if (nb_sns < 0) {
+ return ret;
+ }
+ for (i = 0; i < nb_sns; i++) {
+ sn = &sn_tab[i];
+ if (!strcmp(sn->id_str, name) || !strcmp(sn->name, name)) {
+ *sn_info = *sn;
+ ret = 0;
+ break;
+ }
+ }
+ g_free(sn_tab);
+ return ret;
+}
+
+int bdrv_can_snapshot(BlockDriverState *bs)
+{
+ BlockDriver *drv = bs->drv;
+ if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
+ return 0;
+ }
+
+ if (!drv->bdrv_snapshot_create) {
+ if (bs->file != NULL) {
+ return bdrv_can_snapshot(bs->file);
+ }
+ return 0;
+ }
+
+ return 1;
+}
+
+int bdrv_snapshot_create(BlockDriverState *bs,
+ QEMUSnapshotInfo *sn_info)
+{
+ BlockDriver *drv = bs->drv;
+ if (!drv) {
+ return -ENOMEDIUM;
+ }
+ if (drv->bdrv_snapshot_create) {
+ return drv->bdrv_snapshot_create(bs, sn_info);
+ }
+ if (bs->file) {
+ return bdrv_snapshot_create(bs->file, sn_info);
+ }
+ return -ENOTSUP;
+}
+
+int bdrv_snapshot_goto(BlockDriverState *bs,
+ const char *snapshot_id)
+{
+ BlockDriver *drv = bs->drv;
+ int ret, open_ret;
+
+ if (!drv) {
+ return -ENOMEDIUM;
+ }
+ if (drv->bdrv_snapshot_goto) {
+ return drv->bdrv_snapshot_goto(bs, snapshot_id);
+ }
+
+ if (bs->file) {
+ drv->bdrv_close(bs);
+ ret = bdrv_snapshot_goto(bs->file, snapshot_id);
+ open_ret = drv->bdrv_open(bs, NULL, bs->open_flags);
+ if (open_ret < 0) {
+ bdrv_delete(bs->file);
+ bs->drv = NULL;
+ return open_ret;
+ }
+ return ret;
+ }
+
+ return -ENOTSUP;
+}
+
+int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
+{
+ BlockDriver *drv = bs->drv;
+ if (!drv) {
+ return -ENOMEDIUM;
+ }
+ if (drv->bdrv_snapshot_delete) {
+ return drv->bdrv_snapshot_delete(bs, snapshot_id);
+ }
+ if (bs->file) {
+ return bdrv_snapshot_delete(bs->file, snapshot_id);
+ }
+ return -ENOTSUP;
+}
+
+int bdrv_snapshot_list(BlockDriverState *bs,
+ QEMUSnapshotInfo **psn_info)
+{
+ BlockDriver *drv = bs->drv;
+ if (!drv) {
+ return -ENOMEDIUM;
+ }
+ if (drv->bdrv_snapshot_list) {
+ return drv->bdrv_snapshot_list(bs, psn_info);
+ }
+ if (bs->file) {
+ return bdrv_snapshot_list(bs->file, psn_info);
+ }
+ return -ENOTSUP;
+}
+
+int bdrv_snapshot_load_tmp(BlockDriverState *bs,
+ const char *snapshot_name)
+{
+ BlockDriver *drv = bs->drv;
+ if (!drv) {
+ return -ENOMEDIUM;
+ }
+ if (!bs->read_only) {
+ return -EINVAL;
+ }
+ if (drv->bdrv_snapshot_load_tmp) {
+ return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
+ }
+ return -ENOTSUP;
+}
diff --git a/contrib/qemu/config-host.h b/contrib/qemu/config-host.h
new file mode 100644
index 000000000..46b1595a8
--- /dev/null
+++ b/contrib/qemu/config-host.h
@@ -0,0 +1,73 @@
+/* Automatically generated by create_config - do not modify */
+#define CONFIG_QEMU_CONFDIR "/usr/local/etc/qemu"
+#define CONFIG_QEMU_DATADIR "/usr/local/share/qemu"
+#define CONFIG_QEMU_DOCDIR "/usr/local/share/doc/qemu"
+#define CONFIG_QEMU_LOCALSTATEDIR "/usr/local/var"
+#define CONFIG_QEMU_HELPERDIR "/usr/local/libexec"
+#define CONFIG_QEMU_LOCALEDIR "/usr/local/share/locale"
+#define HOST_X86_64 1
+#define CONFIG_QEMU_LDST_OPTIMIZATION 1
+#define CONFIG_POSIX 1
+#define CONFIG_LINUX 1
+#define CONFIG_SLIRP 1
+#define CONFIG_SMBD_COMMAND "/usr/sbin/smbd"
+#define CONFIG_AUDIO_DRIVERS \
+ &oss_audio_driver,\
+
+#define CONFIG_OSS 1
+#define CONFIG_BDRV_RW_WHITELIST\
+ NULL
+#define CONFIG_BDRV_RO_WHITELIST\
+ NULL
+#define CONFIG_VNC 1
+#define CONFIG_VNC_TLS 1
+#define CONFIG_VNC_SASL 1
+#define CONFIG_VNC_WS 1
+#define CONFIG_FNMATCH 1
+#define CONFIG_UUID 1
+#define CONFIG_XFS 1
+#define QEMU_VERSION "1.5.50"
+#define QEMU_PKGVERSION ""
+#define CONFIG_CURSES 1
+#define CONFIG_UTIMENSAT 1
+#define CONFIG_PIPE2 1
+#define CONFIG_ACCEPT4 1
+#define CONFIG_SPLICE 1
+#define CONFIG_EVENTFD 1
+#define CONFIG_FALLOCATE 1
+#define CONFIG_FALLOCATE_PUNCH_HOLE 1
+#define CONFIG_SYNC_FILE_RANGE 1
+#define CONFIG_FIEMAP 1
+#define CONFIG_DUP3 1
+#define CONFIG_EPOLL 1
+#define CONFIG_EPOLL_CREATE1 1
+#define CONFIG_EPOLL_PWAIT 1
+#define CONFIG_SENDFILE 1
+#define CONFIG_INOTIFY 1
+#define CONFIG_INOTIFY1 1
+#define CONFIG_BYTESWAP_H 1
+#define CONFIG_CURL 1
+#define CONFIG_LINUX_AIO 1
+#define CONFIG_ATTR 1
+#define CONFIG_VHOST_SCSI 1
+#define CONFIG_IOVEC 1
+#define CONFIG_PREADV 1
+#define CONFIG_FDT 1
+#define CONFIG_SIGNALFD 1
+#define CONFIG_FDATASYNC 1
+#define CONFIG_MADVISE 1
+#define CONFIG_POSIX_MADVISE 1
+#define CONFIG_SIGEV_THREAD_ID 1
+#define CONFIG_UNAME_RELEASE ""
+#define CONFIG_QOM_CAST_DEBUG 1
+#define CONFIG_COROUTINE_BACKEND ucontext
+#define CONFIG_OPEN_BY_HANDLE 1
+#define CONFIG_LINUX_MAGIC_H 1
+#define CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE 1
+#define CONFIG_HAS_ENVIRON 1
+#define CONFIG_CPUID_H 1
+#define CONFIG_INT128 1
+#define CONFIG_VIRTIO_BLK_DATA_PLANE $(CONFIG_VIRTIO)
+#define CONFIG_TRACE_NOP 1
+#define CONFIG_TRACE_FILE trace
+#define CONFIG_TRACE_DEFAULT 1
diff --git a/contrib/qemu/coroutine-ucontext.c b/contrib/qemu/coroutine-ucontext.c
new file mode 100644
index 000000000..4bf2cde27
--- /dev/null
+++ b/contrib/qemu/coroutine-ucontext.c
@@ -0,0 +1,225 @@
+/*
+ * ucontext coroutine initialization code
+ *
+ * Copyright (C) 2006 Anthony Liguori <anthony@codemonkey.ws>
+ * Copyright (C) 2011 Kevin Wolf <kwolf@redhat.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.0 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* XXX Is there a nicer way to disable glibc's stack check for longjmp? */
+#ifdef _FORTIFY_SOURCE
+#undef _FORTIFY_SOURCE
+#endif
+#include <stdlib.h>
+#include <setjmp.h>
+#include <stdint.h>
+#include <pthread.h>
+#include <ucontext.h>
+#include "qemu-common.h"
+#include "block/coroutine_int.h"
+
+#ifdef CONFIG_VALGRIND_H
+#include <valgrind/valgrind.h>
+#endif
+
+typedef struct {
+ Coroutine base;
+ void *stack;
+ sigjmp_buf env;
+
+#ifdef CONFIG_VALGRIND_H
+ unsigned int valgrind_stack_id;
+#endif
+
+} CoroutineUContext;
+
+/**
+ * Per-thread coroutine bookkeeping
+ */
+typedef struct {
+ /** Currently executing coroutine */
+ Coroutine *current;
+
+ /** The default coroutine */
+ CoroutineUContext leader;
+} CoroutineThreadState;
+
+static pthread_key_t thread_state_key;
+
+/*
+ * va_args to makecontext() must be type 'int', so passing
+ * the pointer we need may require several int args. This
+ * union is a quick hack to let us do that
+ */
+union cc_arg {
+ void *p;
+ int i[2];
+};
+
+static CoroutineThreadState *coroutine_get_thread_state(void)
+{
+ CoroutineThreadState *s = pthread_getspecific(thread_state_key);
+
+ if (!s) {
+ s = g_malloc0(sizeof(*s));
+ s->current = &s->leader.base;
+ pthread_setspecific(thread_state_key, s);
+ }
+ return s;
+}
+
+static void qemu_coroutine_thread_cleanup(void *opaque)
+{
+ CoroutineThreadState *s = opaque;
+
+ g_free(s);
+}
+
+static void __attribute__((constructor)) coroutine_init(void)
+{
+ int ret;
+
+ ret = pthread_key_create(&thread_state_key, qemu_coroutine_thread_cleanup);
+ if (ret != 0) {
+ fprintf(stderr, "unable to create leader key: %s\n", strerror(errno));
+ abort();
+ }
+}
+
+static void coroutine_trampoline(int i0, int i1)
+{
+ union cc_arg arg;
+ CoroutineUContext *self;
+ Coroutine *co;
+
+ arg.i[0] = i0;
+ arg.i[1] = i1;
+ self = arg.p;
+ co = &self->base;
+
+ /* Initialize longjmp environment and switch back the caller */
+ if (!sigsetjmp(self->env, 0)) {
+ siglongjmp(*(sigjmp_buf *)co->entry_arg, 1);
+ }
+
+ while (true) {
+ co->entry(co->entry_arg);
+ qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE);
+ }
+}
+
+Coroutine *qemu_coroutine_new(void)
+{
+ const size_t stack_size = 1 << 20;
+ CoroutineUContext *co;
+ ucontext_t old_uc, uc;
+ sigjmp_buf old_env;
+ union cc_arg arg = {0};
+
+ /* The ucontext functions preserve signal masks which incurs a
+ * system call overhead. sigsetjmp(buf, 0)/siglongjmp() does not
+ * preserve signal masks but only works on the current stack.
+ * Since we need a way to create and switch to a new stack, use
+ * the ucontext functions for that but sigsetjmp()/siglongjmp() for
+ * everything else.
+ */
+
+ if (getcontext(&uc) == -1) {
+ abort();
+ }
+
+ co = g_malloc0(sizeof(*co));
+ co->stack = g_malloc(stack_size);
+ co->base.entry_arg = &old_env; /* stash away our jmp_buf */
+
+ uc.uc_link = &old_uc;
+ uc.uc_stack.ss_sp = co->stack;
+ uc.uc_stack.ss_size = stack_size;
+ uc.uc_stack.ss_flags = 0;
+
+#ifdef CONFIG_VALGRIND_H
+ co->valgrind_stack_id =
+ VALGRIND_STACK_REGISTER(co->stack, co->stack + stack_size);
+#endif
+
+ arg.p = co;
+
+ makecontext(&uc, (void (*)(void))coroutine_trampoline,
+ 2, arg.i[0], arg.i[1]);
+
+ /* swapcontext() in, siglongjmp() back out */
+ if (!sigsetjmp(old_env, 0)) {
+ swapcontext(&old_uc, &uc);
+ }
+ return &co->base;
+}
+
+#ifdef CONFIG_VALGRIND_H
+#ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE
+/* Work around an unused variable in the valgrind.h macro... */
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
+#endif
+static inline void valgrind_stack_deregister(CoroutineUContext *co)
+{
+ VALGRIND_STACK_DEREGISTER(co->valgrind_stack_id);
+}
+#ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE
+#pragma GCC diagnostic pop
+#endif
+#endif
+
+void qemu_coroutine_delete(Coroutine *co_)
+{
+ CoroutineUContext *co = DO_UPCAST(CoroutineUContext, base, co_);
+
+#ifdef CONFIG_VALGRIND_H
+ valgrind_stack_deregister(co);
+#endif
+
+ g_free(co->stack);
+ g_free(co);
+}
+
+CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
+ CoroutineAction action)
+{
+ CoroutineUContext *from = DO_UPCAST(CoroutineUContext, base, from_);
+ CoroutineUContext *to = DO_UPCAST(CoroutineUContext, base, to_);
+ CoroutineThreadState *s = coroutine_get_thread_state();
+ int ret;
+
+ s->current = to_;
+
+ ret = sigsetjmp(from->env, 0);
+ if (ret == 0) {
+ siglongjmp(to->env, action);
+ }
+ return ret;
+}
+
+Coroutine *qemu_coroutine_self(void)
+{
+ CoroutineThreadState *s = coroutine_get_thread_state();
+
+ return s->current;
+}
+
+bool qemu_in_coroutine(void)
+{
+ CoroutineThreadState *s = pthread_getspecific(thread_state_key);
+
+ return s && s->current->caller;
+}
diff --git a/contrib/qemu/include/block/aio.h b/contrib/qemu/include/block/aio.h
new file mode 100644
index 000000000..183679374
--- /dev/null
+++ b/contrib/qemu/include/block/aio.h
@@ -0,0 +1,247 @@
+/*
+ * QEMU aio implementation
+ *
+ * Copyright IBM, Corp. 2008
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_AIO_H
+#define QEMU_AIO_H
+
+#include "qemu-common.h"
+#include "qemu/queue.h"
+#include "qemu/event_notifier.h"
+
+typedef struct BlockDriverAIOCB BlockDriverAIOCB;
+typedef void BlockDriverCompletionFunc(void *opaque, int ret);
+
+typedef struct AIOCBInfo {
+ void (*cancel)(BlockDriverAIOCB *acb);
+ size_t aiocb_size;
+} AIOCBInfo;
+
+struct BlockDriverAIOCB {
+ const AIOCBInfo *aiocb_info;
+ BlockDriverState *bs;
+ BlockDriverCompletionFunc *cb;
+ void *opaque;
+};
+
+void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
+ BlockDriverCompletionFunc *cb, void *opaque);
+void qemu_aio_release(void *p);
+
+typedef struct AioHandler AioHandler;
+typedef void QEMUBHFunc(void *opaque);
+typedef void IOHandler(void *opaque);
+
+typedef struct AioContext {
+ GSource source;
+
+ /* The list of registered AIO handlers */
+ QLIST_HEAD(, AioHandler) aio_handlers;
+
+ /* This is a simple lock used to protect the aio_handlers list.
+ * Specifically, it's used to ensure that no callbacks are removed while
+ * we're walking and dispatching callbacks.
+ */
+ int walking_handlers;
+
+ /* Anchor of the list of Bottom Halves belonging to the context */
+ struct QEMUBH *first_bh;
+
+ /* A simple lock used to protect the first_bh list, and ensure that
+ * no callbacks are removed while we're walking and dispatching callbacks.
+ */
+ int walking_bh;
+
+ /* Used for aio_notify. */
+ EventNotifier notifier;
+
+ /* GPollFDs for aio_poll() */
+ GArray *pollfds;
+
+ /* Thread pool for performing work and receiving completion callbacks */
+ struct ThreadPool *thread_pool;
+} AioContext;
+
+/* Returns 1 if there are still outstanding AIO requests; 0 otherwise */
+typedef int (AioFlushEventNotifierHandler)(EventNotifier *e);
+
+/**
+ * aio_context_new: Allocate a new AioContext.
+ *
+ * AioContext provide a mini event-loop that can be waited on synchronously.
+ * They also provide bottom halves, a service to execute a piece of code
+ * as soon as possible.
+ */
+AioContext *aio_context_new(void);
+
+/**
+ * aio_context_ref:
+ * @ctx: The AioContext to operate on.
+ *
+ * Add a reference to an AioContext.
+ */
+void aio_context_ref(AioContext *ctx);
+
+/**
+ * aio_context_unref:
+ * @ctx: The AioContext to operate on.
+ *
+ * Drop a reference to an AioContext.
+ */
+void aio_context_unref(AioContext *ctx);
+
+/**
+ * aio_bh_new: Allocate a new bottom half structure.
+ *
+ * Bottom halves are lightweight callbacks whose invocation is guaranteed
+ * to be wait-free, thread-safe and signal-safe. The #QEMUBH structure
+ * is opaque and must be allocated prior to its use.
+ */
+QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque);
+
+/**
+ * aio_notify: Force processing of pending events.
+ *
+ * Similar to signaling a condition variable, aio_notify forces
+ * aio_wait to exit, so that the next call will re-examine pending events.
+ * The caller of aio_notify will usually call aio_wait again very soon,
+ * or go through another iteration of the GLib main loop. Hence, aio_notify
+ * also has the side effect of recalculating the sets of file descriptors
+ * that the main loop waits for.
+ *
+ * Calling aio_notify is rarely necessary, because for example scheduling
+ * a bottom half calls it already.
+ */
+void aio_notify(AioContext *ctx);
+
+/**
+ * aio_bh_poll: Poll bottom halves for an AioContext.
+ *
+ * These are internal functions used by the QEMU main loop.
+ */
+int aio_bh_poll(AioContext *ctx);
+
+/**
+ * qemu_bh_schedule: Schedule a bottom half.
+ *
+ * Scheduling a bottom half interrupts the main loop and causes the
+ * execution of the callback that was passed to qemu_bh_new.
+ *
+ * Bottom halves that are scheduled from a bottom half handler are instantly
+ * invoked. This can create an infinite loop if a bottom half handler
+ * schedules itself.
+ *
+ * @bh: The bottom half to be scheduled.
+ */
+void qemu_bh_schedule(QEMUBH *bh);
+
+/**
+ * qemu_bh_cancel: Cancel execution of a bottom half.
+ *
+ * Canceling execution of a bottom half undoes the effect of calls to
+ * qemu_bh_schedule without freeing its resources yet. While cancellation
+ * itself is also wait-free and thread-safe, it can of course race with the
+ * loop that executes bottom halves unless you are holding the iothread
+ * mutex. This makes it mostly useless if you are not holding the mutex.
+ *
+ * @bh: The bottom half to be canceled.
+ */
+void qemu_bh_cancel(QEMUBH *bh);
+
+/**
+ *qemu_bh_delete: Cancel execution of a bottom half and free its resources.
+ *
+ * Deleting a bottom half frees the memory that was allocated for it by
+ * qemu_bh_new. It also implies canceling the bottom half if it was
+ * scheduled.
+ *
+ * @bh: The bottom half to be deleted.
+ */
+void qemu_bh_delete(QEMUBH *bh);
+
+/* Return whether there are any pending callbacks from the GSource
+ * attached to the AioContext.
+ *
+ * This is used internally in the implementation of the GSource.
+ */
+bool aio_pending(AioContext *ctx);
+
+/* Progress in completing AIO work to occur. This can issue new pending
+ * aio as a result of executing I/O completion or bh callbacks.
+ *
+ * If there is no pending AIO operation or completion (bottom half),
+ * return false. If there are pending AIO operations of bottom halves,
+ * return true.
+ *
+ * If there are no pending bottom halves, but there are pending AIO
+ * operations, it may not be possible to make any progress without
+ * blocking. If @blocking is true, this function will wait until one
+ * or more AIO events have completed, to ensure something has moved
+ * before returning.
+ */
+bool aio_poll(AioContext *ctx, bool blocking);
+
+#ifdef CONFIG_POSIX
+/* Returns 1 if there are still outstanding AIO requests; 0 otherwise */
+typedef int (AioFlushHandler)(void *opaque);
+
+/* Register a file descriptor and associated callbacks. Behaves very similarly
+ * to qemu_set_fd_handler2. Unlike qemu_set_fd_handler2, these callbacks will
+ * be invoked when using qemu_aio_wait().
+ *
+ * Code that invokes AIO completion functions should rely on this function
+ * instead of qemu_set_fd_handler[2].
+ */
+void aio_set_fd_handler(AioContext *ctx,
+ int fd,
+ IOHandler *io_read,
+ IOHandler *io_write,
+ AioFlushHandler *io_flush,
+ void *opaque);
+#endif
+
+/* Register an event notifier and associated callbacks. Behaves very similarly
+ * to event_notifier_set_handler. Unlike event_notifier_set_handler, these callbacks
+ * will be invoked when using qemu_aio_wait().
+ *
+ * Code that invokes AIO completion functions should rely on this function
+ * instead of event_notifier_set_handler.
+ */
+void aio_set_event_notifier(AioContext *ctx,
+ EventNotifier *notifier,
+ EventNotifierHandler *io_read,
+ AioFlushEventNotifierHandler *io_flush);
+
+/* Return a GSource that lets the main loop poll the file descriptors attached
+ * to this AioContext.
+ */
+GSource *aio_get_g_source(AioContext *ctx);
+
+/* Return the ThreadPool bound to this AioContext */
+struct ThreadPool *aio_get_thread_pool(AioContext *ctx);
+
+/* Functions to operate on the main QEMU AioContext. */
+
+bool qemu_aio_wait(void);
+void qemu_aio_set_event_notifier(EventNotifier *notifier,
+ EventNotifierHandler *io_read,
+ AioFlushEventNotifierHandler *io_flush);
+
+#ifdef CONFIG_POSIX
+void qemu_aio_set_fd_handler(int fd,
+ IOHandler *io_read,
+ IOHandler *io_write,
+ AioFlushHandler *io_flush,
+ void *opaque);
+#endif
+
+#endif
diff --git a/contrib/qemu/include/block/block.h b/contrib/qemu/include/block/block.h
new file mode 100644
index 000000000..b6b9014a9
--- /dev/null
+++ b/contrib/qemu/include/block/block.h
@@ -0,0 +1,443 @@
+#ifndef BLOCK_H
+#define BLOCK_H
+
+#include "block/aio.h"
+#include "qemu-common.h"
+#include "qemu/option.h"
+#include "block/coroutine.h"
+#include "qapi/qmp/qobject.h"
+#include "qapi-types.h"
+
+/* block.c */
+typedef struct BlockDriver BlockDriver;
+typedef struct BlockJob BlockJob;
+
+typedef struct BlockDriverInfo {
+ /* in bytes, 0 if irrelevant */
+ int cluster_size;
+ /* offset at which the VM state can be saved (0 if not possible) */
+ int64_t vm_state_offset;
+ bool is_dirty;
+} BlockDriverInfo;
+
+typedef struct BlockFragInfo {
+ uint64_t allocated_clusters;
+ uint64_t total_clusters;
+ uint64_t fragmented_clusters;
+ uint64_t compressed_clusters;
+} BlockFragInfo;
+
+/* Callbacks for block device models */
+typedef struct BlockDevOps {
+ /*
+ * Runs when virtual media changed (monitor commands eject, change)
+ * Argument load is true on load and false on eject.
+ * Beware: doesn't run when a host device's physical media
+ * changes. Sure would be useful if it did.
+ * Device models with removable media must implement this callback.
+ */
+ void (*change_media_cb)(void *opaque, bool load);
+ /*
+ * Runs when an eject request is issued from the monitor, the tray
+ * is closed, and the medium is locked.
+ * Device models that do not implement is_medium_locked will not need
+ * this callback. Device models that can lock the medium or tray might
+ * want to implement the callback and unlock the tray when "force" is
+ * true, even if they do not support eject requests.
+ */
+ void (*eject_request_cb)(void *opaque, bool force);
+ /*
+ * Is the virtual tray open?
+ * Device models implement this only when the device has a tray.
+ */
+ bool (*is_tray_open)(void *opaque);
+ /*
+ * Is the virtual medium locked into the device?
+ * Device models implement this only when device has such a lock.
+ */
+ bool (*is_medium_locked)(void *opaque);
+ /*
+ * Runs when the size changed (e.g. monitor command block_resize)
+ */
+ void (*resize_cb)(void *opaque);
+} BlockDevOps;
+
+#define BDRV_O_RDWR 0x0002
+#define BDRV_O_SNAPSHOT 0x0008 /* open the file read only and save writes in a snapshot */
+#define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */
+#define BDRV_O_CACHE_WB 0x0040 /* use write-back caching */
+#define BDRV_O_NATIVE_AIO 0x0080 /* use native AIO instead of the thread pool */
+#define BDRV_O_NO_BACKING 0x0100 /* don't open the backing file */
+#define BDRV_O_NO_FLUSH 0x0200 /* disable flushing on this disk */
+#define BDRV_O_COPY_ON_READ 0x0400 /* copy read backing sectors into image */
+#define BDRV_O_INCOMING 0x0800 /* consistency hint for incoming migration */
+#define BDRV_O_CHECK 0x1000 /* open solely for consistency check */
+#define BDRV_O_ALLOW_RDWR 0x2000 /* allow reopen to change from r/o to r/w */
+#define BDRV_O_UNMAP 0x4000 /* execute guest UNMAP/TRIM operations */
+
+#define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH)
+
+#define BDRV_SECTOR_BITS 9
+#define BDRV_SECTOR_SIZE (1ULL << BDRV_SECTOR_BITS)
+#define BDRV_SECTOR_MASK ~(BDRV_SECTOR_SIZE - 1)
+
+typedef enum {
+ BDRV_ACTION_REPORT, BDRV_ACTION_IGNORE, BDRV_ACTION_STOP
+} BlockErrorAction;
+
+typedef QSIMPLEQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue;
+
+typedef struct BDRVReopenState {
+ BlockDriverState *bs;
+ int flags;
+ void *opaque;
+} BDRVReopenState;
+
+
+void bdrv_iostatus_enable(BlockDriverState *bs);
+void bdrv_iostatus_reset(BlockDriverState *bs);
+void bdrv_iostatus_disable(BlockDriverState *bs);
+bool bdrv_iostatus_is_enabled(const BlockDriverState *bs);
+void bdrv_iostatus_set_err(BlockDriverState *bs, int error);
+void bdrv_info_print(Monitor *mon, const QObject *data);
+void bdrv_info(Monitor *mon, QObject **ret_data);
+void bdrv_stats_print(Monitor *mon, const QObject *data);
+void bdrv_info_stats(Monitor *mon, QObject **ret_data);
+
+/* disk I/O throttling */
+void bdrv_io_limits_enable(BlockDriverState *bs);
+void bdrv_io_limits_disable(BlockDriverState *bs);
+bool bdrv_io_limits_enabled(BlockDriverState *bs);
+
+void bdrv_init(void);
+void bdrv_init_with_whitelist(void);
+BlockDriver *bdrv_find_protocol(const char *filename,
+ bool allow_protocol_prefix);
+BlockDriver *bdrv_find_format(const char *format_name);
+BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
+ bool readonly);
+int bdrv_create(BlockDriver *drv, const char* filename,
+ QEMUOptionParameter *options);
+int bdrv_create_file(const char* filename, QEMUOptionParameter *options);
+BlockDriverState *bdrv_new(const char *device_name);
+void bdrv_make_anon(BlockDriverState *bs);
+void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old);
+void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top);
+void bdrv_delete(BlockDriverState *bs);
+int bdrv_parse_cache_flags(const char *mode, int *flags);
+int bdrv_parse_discard_flags(const char *mode, int *flags);
+int bdrv_file_open(BlockDriverState **pbs, const char *filename,
+ QDict *options, int flags);
+int bdrv_open_backing_file(BlockDriverState *bs, QDict *options);
+int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options,
+ int flags, BlockDriver *drv);
+BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
+ BlockDriverState *bs, int flags);
+int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp);
+int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp);
+int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
+ BlockReopenQueue *queue, Error **errp);
+void bdrv_reopen_commit(BDRVReopenState *reopen_state);
+void bdrv_reopen_abort(BDRVReopenState *reopen_state);
+void bdrv_close(BlockDriverState *bs);
+void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify);
+int bdrv_attach_dev(BlockDriverState *bs, void *dev);
+void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev);
+void bdrv_detach_dev(BlockDriverState *bs, void *dev);
+void *bdrv_get_attached_dev(BlockDriverState *bs);
+void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
+ void *opaque);
+void bdrv_dev_eject_request(BlockDriverState *bs, bool force);
+bool bdrv_dev_has_removable_media(BlockDriverState *bs);
+bool bdrv_dev_is_tray_open(BlockDriverState *bs);
+bool bdrv_dev_is_medium_locked(BlockDriverState *bs);
+int bdrv_read(BlockDriverState *bs, int64_t sector_num,
+ uint8_t *buf, int nb_sectors);
+int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
+ uint8_t *buf, int nb_sectors);
+int bdrv_write(BlockDriverState *bs, int64_t sector_num,
+ const uint8_t *buf, int nb_sectors);
+int bdrv_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov);
+int bdrv_pread(BlockDriverState *bs, int64_t offset,
+ void *buf, int count);
+int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
+ const void *buf, int count);
+int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov);
+int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
+ const void *buf, int count);
+int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *qiov);
+int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
+int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *qiov);
+/*
+ * Efficiently zero a region of the disk image. Note that this is a regular
+ * I/O request like read or write and should have a reasonable size. This
+ * function is not suitable for zeroing the entire image in a single request
+ * because it may allocate memory for the entire region.
+ */
+int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors);
+int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, int *pnum);
+int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top,
+ BlockDriverState *base,
+ int64_t sector_num,
+ int nb_sectors, int *pnum);
+BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
+ const char *backing_file);
+int bdrv_get_backing_file_depth(BlockDriverState *bs);
+int bdrv_truncate(BlockDriverState *bs, int64_t offset);
+int64_t bdrv_getlength(BlockDriverState *bs);
+int64_t bdrv_get_allocated_file_size(BlockDriverState *bs);
+void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr);
+int bdrv_commit(BlockDriverState *bs);
+int bdrv_commit_all(void);
+int bdrv_change_backing_file(BlockDriverState *bs,
+ const char *backing_file, const char *backing_fmt);
+void bdrv_register(BlockDriver *bdrv);
+int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
+ BlockDriverState *base);
+BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
+ BlockDriverState *bs);
+BlockDriverState *bdrv_find_base(BlockDriverState *bs);
+
+
+typedef struct BdrvCheckResult {
+ int corruptions;
+ int leaks;
+ int check_errors;
+ int corruptions_fixed;
+ int leaks_fixed;
+ int64_t image_end_offset;
+ BlockFragInfo bfi;
+} BdrvCheckResult;
+
+typedef enum {
+ BDRV_FIX_LEAKS = 1,
+ BDRV_FIX_ERRORS = 2,
+} BdrvCheckMode;
+
+int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix);
+
+/* async block I/O */
+typedef void BlockDriverDirtyHandler(BlockDriverState *bs, int64_t sector,
+ int sector_num);
+BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
+ QEMUIOVector *iov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque);
+BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
+ QEMUIOVector *iov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque);
+BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
+ BlockDriverCompletionFunc *cb, void *opaque);
+BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque);
+void bdrv_aio_cancel(BlockDriverAIOCB *acb);
+
+typedef struct BlockRequest {
+ /* Fields to be filled by multiwrite caller */
+ int64_t sector;
+ int nb_sectors;
+ QEMUIOVector *qiov;
+ BlockDriverCompletionFunc *cb;
+ void *opaque;
+
+ /* Filled by multiwrite implementation */
+ int error;
+} BlockRequest;
+
+int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs,
+ int num_reqs);
+
+/* sg packet commands */
+int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf);
+BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
+ unsigned long int req, void *buf,
+ BlockDriverCompletionFunc *cb, void *opaque);
+
+/* Invalidate any cached metadata used by image formats */
+void bdrv_invalidate_cache(BlockDriverState *bs);
+void bdrv_invalidate_cache_all(void);
+
+void bdrv_clear_incoming_migration_all(void);
+
+/* Ensure contents are flushed to disk. */
+int bdrv_flush(BlockDriverState *bs);
+int coroutine_fn bdrv_co_flush(BlockDriverState *bs);
+int bdrv_flush_all(void);
+void bdrv_close_all(void);
+void bdrv_drain_all(void);
+
+int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors);
+int bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors);
+int bdrv_has_zero_init_1(BlockDriverState *bs);
+int bdrv_has_zero_init(BlockDriverState *bs);
+int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
+ int *pnum);
+int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
+ int64_t sector_num, int nb_sectors, int *pnum);
+
+void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
+ BlockdevOnError on_write_error);
+BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read);
+BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error);
+void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
+ bool is_read, int error);
+int bdrv_is_read_only(BlockDriverState *bs);
+int bdrv_is_sg(BlockDriverState *bs);
+int bdrv_enable_write_cache(BlockDriverState *bs);
+void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce);
+int bdrv_is_inserted(BlockDriverState *bs);
+int bdrv_media_changed(BlockDriverState *bs);
+void bdrv_lock_medium(BlockDriverState *bs, bool locked);
+void bdrv_eject(BlockDriverState *bs, bool eject_flag);
+const char *bdrv_get_format_name(BlockDriverState *bs);
+BlockDriverState *bdrv_find(const char *name);
+BlockDriverState *bdrv_next(BlockDriverState *bs);
+void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs),
+ void *opaque);
+int bdrv_is_encrypted(BlockDriverState *bs);
+int bdrv_key_required(BlockDriverState *bs);
+int bdrv_set_key(BlockDriverState *bs, const char *key);
+int bdrv_query_missing_keys(void);
+void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
+ void *opaque);
+const char *bdrv_get_device_name(BlockDriverState *bs);
+int bdrv_get_flags(BlockDriverState *bs);
+int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
+ const uint8_t *buf, int nb_sectors);
+int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi);
+void bdrv_round_to_clusters(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ int64_t *cluster_sector_num,
+ int *cluster_nb_sectors);
+
+const char *bdrv_get_encrypted_filename(BlockDriverState *bs);
+void bdrv_get_backing_filename(BlockDriverState *bs,
+ char *filename, int filename_size);
+void bdrv_get_full_backing_filename(BlockDriverState *bs,
+ char *dest, size_t sz);
+int bdrv_is_snapshot(BlockDriverState *bs);
+
+int path_is_absolute(const char *path);
+void path_combine(char *dest, int dest_size,
+ const char *base_path,
+ const char *filename);
+
+int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
+int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
+ int64_t pos, int size);
+
+int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
+ int64_t pos, int size);
+
+void bdrv_img_create(const char *filename, const char *fmt,
+ const char *base_filename, const char *base_fmt,
+ char *options, uint64_t img_size, int flags,
+ Error **errp, bool quiet);
+
+void bdrv_set_buffer_alignment(BlockDriverState *bs, int align);
+void *qemu_blockalign(BlockDriverState *bs, size_t size);
+bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov);
+
+struct HBitmapIter;
+void bdrv_set_dirty_tracking(BlockDriverState *bs, int granularity);
+int bdrv_get_dirty(BlockDriverState *bs, int64_t sector);
+void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
+void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
+void bdrv_dirty_iter_init(BlockDriverState *bs, struct HBitmapIter *hbi);
+int64_t bdrv_get_dirty_count(BlockDriverState *bs);
+
+void bdrv_enable_copy_on_read(BlockDriverState *bs);
+void bdrv_disable_copy_on_read(BlockDriverState *bs);
+
+void bdrv_set_in_use(BlockDriverState *bs, int in_use);
+int bdrv_in_use(BlockDriverState *bs);
+
+#ifdef CONFIG_LINUX_AIO
+int raw_get_aio_fd(BlockDriverState *bs);
+#else
+static inline int raw_get_aio_fd(BlockDriverState *bs)
+{
+ return -ENOTSUP;
+}
+#endif
+
+enum BlockAcctType {
+ BDRV_ACCT_READ,
+ BDRV_ACCT_WRITE,
+ BDRV_ACCT_FLUSH,
+ BDRV_MAX_IOTYPE,
+};
+
+typedef struct BlockAcctCookie {
+ int64_t bytes;
+ int64_t start_time_ns;
+ enum BlockAcctType type;
+} BlockAcctCookie;
+
+void bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie,
+ int64_t bytes, enum BlockAcctType type);
+void bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie);
+
+typedef enum {
+ BLKDBG_L1_UPDATE,
+
+ BLKDBG_L1_GROW_ALLOC_TABLE,
+ BLKDBG_L1_GROW_WRITE_TABLE,
+ BLKDBG_L1_GROW_ACTIVATE_TABLE,
+
+ BLKDBG_L2_LOAD,
+ BLKDBG_L2_UPDATE,
+ BLKDBG_L2_UPDATE_COMPRESSED,
+ BLKDBG_L2_ALLOC_COW_READ,
+ BLKDBG_L2_ALLOC_WRITE,
+
+ BLKDBG_READ_AIO,
+ BLKDBG_READ_BACKING_AIO,
+ BLKDBG_READ_COMPRESSED,
+
+ BLKDBG_WRITE_AIO,
+ BLKDBG_WRITE_COMPRESSED,
+
+ BLKDBG_VMSTATE_LOAD,
+ BLKDBG_VMSTATE_SAVE,
+
+ BLKDBG_COW_READ,
+ BLKDBG_COW_WRITE,
+
+ BLKDBG_REFTABLE_LOAD,
+ BLKDBG_REFTABLE_GROW,
+
+ BLKDBG_REFBLOCK_LOAD,
+ BLKDBG_REFBLOCK_UPDATE,
+ BLKDBG_REFBLOCK_UPDATE_PART,
+ BLKDBG_REFBLOCK_ALLOC,
+ BLKDBG_REFBLOCK_ALLOC_HOOKUP,
+ BLKDBG_REFBLOCK_ALLOC_WRITE,
+ BLKDBG_REFBLOCK_ALLOC_WRITE_BLOCKS,
+ BLKDBG_REFBLOCK_ALLOC_WRITE_TABLE,
+ BLKDBG_REFBLOCK_ALLOC_SWITCH_TABLE,
+
+ BLKDBG_CLUSTER_ALLOC,
+ BLKDBG_CLUSTER_ALLOC_BYTES,
+ BLKDBG_CLUSTER_FREE,
+
+ BLKDBG_FLUSH_TO_OS,
+ BLKDBG_FLUSH_TO_DISK,
+
+ BLKDBG_EVENT_MAX,
+} BlkDebugEvent;
+
+#define BLKDBG_EVENT(bs, evt) bdrv_debug_event(bs, evt)
+void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event);
+
+int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
+ const char *tag);
+int bdrv_debug_resume(BlockDriverState *bs, const char *tag);
+bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag);
+
+#endif
diff --git a/contrib/qemu/include/block/block_int.h b/contrib/qemu/include/block/block_int.h
new file mode 100644
index 000000000..c6ac871e2
--- /dev/null
+++ b/contrib/qemu/include/block/block_int.h
@@ -0,0 +1,421 @@
+/*
+ * QEMU System Emulator block driver
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef BLOCK_INT_H
+#define BLOCK_INT_H
+
+#include "block/block.h"
+#include "qemu/option.h"
+#include "qemu/queue.h"
+#include "block/coroutine.h"
+#include "qemu/timer.h"
+#include "qapi-types.h"
+#include "qapi/qmp/qerror.h"
+#include "monitor/monitor.h"
+#include "qemu/hbitmap.h"
+#include "block/snapshot.h"
+
+#define BLOCK_FLAG_ENCRYPT 1
+#define BLOCK_FLAG_COMPAT6 4
+#define BLOCK_FLAG_LAZY_REFCOUNTS 8
+
+#define BLOCK_IO_LIMIT_READ 0
+#define BLOCK_IO_LIMIT_WRITE 1
+#define BLOCK_IO_LIMIT_TOTAL 2
+
+#define BLOCK_IO_SLICE_TIME 100000000
+#define NANOSECONDS_PER_SECOND 1000000000.0
+
+#define BLOCK_OPT_SIZE "size"
+#define BLOCK_OPT_ENCRYPT "encryption"
+#define BLOCK_OPT_COMPAT6 "compat6"
+#define BLOCK_OPT_BACKING_FILE "backing_file"
+#define BLOCK_OPT_BACKING_FMT "backing_fmt"
+#define BLOCK_OPT_CLUSTER_SIZE "cluster_size"
+#define BLOCK_OPT_TABLE_SIZE "table_size"
+#define BLOCK_OPT_PREALLOC "preallocation"
+#define BLOCK_OPT_SUBFMT "subformat"
+#define BLOCK_OPT_COMPAT_LEVEL "compat"
+#define BLOCK_OPT_LAZY_REFCOUNTS "lazy_refcounts"
+#define BLOCK_OPT_ADAPTER_TYPE "adapter_type"
+
+typedef struct BdrvTrackedRequest {
+ BlockDriverState *bs;
+ int64_t sector_num;
+ int nb_sectors;
+ bool is_write;
+ QLIST_ENTRY(BdrvTrackedRequest) list;
+ Coroutine *co; /* owner, used for deadlock detection */
+ CoQueue wait_queue; /* coroutines blocked on this request */
+} BdrvTrackedRequest;
+
+
+typedef struct BlockIOLimit {
+ int64_t bps[3];
+ int64_t iops[3];
+} BlockIOLimit;
+
+typedef struct BlockIOBaseValue {
+ uint64_t bytes[2];
+ uint64_t ios[2];
+} BlockIOBaseValue;
+
+struct BlockDriver {
+ const char *format_name;
+ int instance_size;
+ int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename);
+ int (*bdrv_probe_device)(const char *filename);
+
+ /* Any driver implementing this callback is expected to be able to handle
+ * NULL file names in its .bdrv_open() implementation */
+ void (*bdrv_parse_filename)(const char *filename, QDict *options, Error **errp);
+
+ /* For handling image reopen for split or non-split files */
+ int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state,
+ BlockReopenQueue *queue, Error **errp);
+ void (*bdrv_reopen_commit)(BDRVReopenState *reopen_state);
+ void (*bdrv_reopen_abort)(BDRVReopenState *reopen_state);
+
+ int (*bdrv_open)(BlockDriverState *bs, QDict *options, int flags);
+ int (*bdrv_file_open)(BlockDriverState *bs, QDict *options, int flags);
+ int (*bdrv_read)(BlockDriverState *bs, int64_t sector_num,
+ uint8_t *buf, int nb_sectors);
+ int (*bdrv_write)(BlockDriverState *bs, int64_t sector_num,
+ const uint8_t *buf, int nb_sectors);
+ void (*bdrv_close)(BlockDriverState *bs);
+ void (*bdrv_rebind)(BlockDriverState *bs);
+ int (*bdrv_create)(const char *filename, QEMUOptionParameter *options);
+ int (*bdrv_set_key)(BlockDriverState *bs, const char *key);
+ int (*bdrv_make_empty)(BlockDriverState *bs);
+ /* aio */
+ BlockDriverAIOCB *(*bdrv_aio_readv)(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque);
+ BlockDriverAIOCB *(*bdrv_aio_writev)(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque);
+ BlockDriverAIOCB *(*bdrv_aio_flush)(BlockDriverState *bs,
+ BlockDriverCompletionFunc *cb, void *opaque);
+ BlockDriverAIOCB *(*bdrv_aio_discard)(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque);
+
+ int coroutine_fn (*bdrv_co_readv)(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
+ int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
+ /*
+ * Efficiently zero a region of the disk image. Typically an image format
+ * would use a compact metadata representation to implement this. This
+ * function pointer may be NULL and .bdrv_co_writev() will be called
+ * instead.
+ */
+ int coroutine_fn (*bdrv_co_write_zeroes)(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors);
+ int coroutine_fn (*bdrv_co_discard)(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors);
+ int coroutine_fn (*bdrv_co_is_allocated)(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, int *pnum);
+
+ /*
+ * Invalidate any cached meta-data.
+ */
+ void (*bdrv_invalidate_cache)(BlockDriverState *bs);
+
+ /*
+ * Flushes all data that was already written to the OS all the way down to
+ * the disk (for example raw-posix calls fsync()).
+ */
+ int coroutine_fn (*bdrv_co_flush_to_disk)(BlockDriverState *bs);
+
+ /*
+ * Flushes all internal caches to the OS. The data may still sit in a
+ * writeback cache of the host OS, but it will survive a crash of the qemu
+ * process.
+ */
+ int coroutine_fn (*bdrv_co_flush_to_os)(BlockDriverState *bs);
+
+ const char *protocol_name;
+ int (*bdrv_truncate)(BlockDriverState *bs, int64_t offset);
+ int64_t (*bdrv_getlength)(BlockDriverState *bs);
+ int64_t (*bdrv_get_allocated_file_size)(BlockDriverState *bs);
+ int (*bdrv_write_compressed)(BlockDriverState *bs, int64_t sector_num,
+ const uint8_t *buf, int nb_sectors);
+
+ int (*bdrv_snapshot_create)(BlockDriverState *bs,
+ QEMUSnapshotInfo *sn_info);
+ int (*bdrv_snapshot_goto)(BlockDriverState *bs,
+ const char *snapshot_id);
+ int (*bdrv_snapshot_delete)(BlockDriverState *bs, const char *snapshot_id);
+ int (*bdrv_snapshot_list)(BlockDriverState *bs,
+ QEMUSnapshotInfo **psn_info);
+ int (*bdrv_snapshot_load_tmp)(BlockDriverState *bs,
+ const char *snapshot_name);
+ int (*bdrv_get_info)(BlockDriverState *bs, BlockDriverInfo *bdi);
+
+ int (*bdrv_save_vmstate)(BlockDriverState *bs, QEMUIOVector *qiov,
+ int64_t pos);
+ int (*bdrv_load_vmstate)(BlockDriverState *bs, uint8_t *buf,
+ int64_t pos, int size);
+
+ int (*bdrv_change_backing_file)(BlockDriverState *bs,
+ const char *backing_file, const char *backing_fmt);
+
+ /* removable device specific */
+ int (*bdrv_is_inserted)(BlockDriverState *bs);
+ int (*bdrv_media_changed)(BlockDriverState *bs);
+ void (*bdrv_eject)(BlockDriverState *bs, bool eject_flag);
+ void (*bdrv_lock_medium)(BlockDriverState *bs, bool locked);
+
+ /* to control generic scsi devices */
+ int (*bdrv_ioctl)(BlockDriverState *bs, unsigned long int req, void *buf);
+ BlockDriverAIOCB *(*bdrv_aio_ioctl)(BlockDriverState *bs,
+ unsigned long int req, void *buf,
+ BlockDriverCompletionFunc *cb, void *opaque);
+
+ /* List of options for creating images, terminated by name == NULL */
+ QEMUOptionParameter *create_options;
+
+
+ /*
+ * Returns 0 for completed check, -errno for internal errors.
+ * The check results are stored in result.
+ */
+ int (*bdrv_check)(BlockDriverState* bs, BdrvCheckResult *result,
+ BdrvCheckMode fix);
+
+ void (*bdrv_debug_event)(BlockDriverState *bs, BlkDebugEvent event);
+
+ /* TODO Better pass a option string/QDict/QemuOpts to add any rule? */
+ int (*bdrv_debug_breakpoint)(BlockDriverState *bs, const char *event,
+ const char *tag);
+ int (*bdrv_debug_resume)(BlockDriverState *bs, const char *tag);
+ bool (*bdrv_debug_is_suspended)(BlockDriverState *bs, const char *tag);
+
+ /*
+ * Returns 1 if newly created images are guaranteed to contain only
+ * zeros, 0 otherwise.
+ */
+ int (*bdrv_has_zero_init)(BlockDriverState *bs);
+
+ QLIST_ENTRY(BlockDriver) list;
+};
+
+/*
+ * Note: the function bdrv_append() copies and swaps contents of
+ * BlockDriverStates, so if you add new fields to this struct, please
+ * inspect bdrv_append() to determine if the new fields need to be
+ * copied as well.
+ */
+struct BlockDriverState {
+ int64_t total_sectors; /* if we are reading a disk image, give its
+ size in sectors */
+ int read_only; /* if true, the media is read only */
+ int open_flags; /* flags used to open the file, re-used for re-open */
+ int encrypted; /* if true, the media is encrypted */
+ int valid_key; /* if true, a valid encryption key has been set */
+ int sg; /* if true, the device is a /dev/sg* */
+ int copy_on_read; /* if true, copy read backing sectors into image
+ note this is a reference count */
+
+ BlockDriver *drv; /* NULL means no media */
+ void *opaque;
+
+ void *dev; /* attached device model, if any */
+ /* TODO change to DeviceState when all users are qdevified */
+ const BlockDevOps *dev_ops;
+ void *dev_opaque;
+
+ char filename[1024];
+ char backing_file[1024]; /* if non zero, the image is a diff of
+ this file image */
+ char backing_format[16]; /* if non-zero and backing_file exists */
+ int is_temporary;
+
+ BlockDriverState *backing_hd;
+ BlockDriverState *file;
+
+ NotifierList close_notifiers;
+
+ /* Callback before write request is processed */
+ NotifierWithReturnList before_write_notifiers;
+
+ /* number of in-flight copy-on-read requests */
+ unsigned int copy_on_read_in_flight;
+
+ /* the time for latest disk I/O */
+ int64_t slice_start;
+ int64_t slice_end;
+ BlockIOLimit io_limits;
+ BlockIOBaseValue slice_submitted;
+ CoQueue throttled_reqs;
+ QEMUTimer *block_timer;
+ bool io_limits_enabled;
+
+ /* I/O stats (display with "info blockstats"). */
+ uint64_t nr_bytes[BDRV_MAX_IOTYPE];
+ uint64_t nr_ops[BDRV_MAX_IOTYPE];
+ uint64_t total_time_ns[BDRV_MAX_IOTYPE];
+ uint64_t wr_highest_sector;
+
+ /* Whether the disk can expand beyond total_sectors */
+ int growable;
+
+ /* the memory alignment required for the buffers handled by this driver */
+ int buffer_alignment;
+
+ /* do we need to tell the quest if we have a volatile write cache? */
+ int enable_write_cache;
+
+ /* NOTE: the following infos are only hints for real hardware
+ drivers. They are not used by the block driver */
+ BlockdevOnError on_read_error, on_write_error;
+ bool iostatus_enabled;
+ BlockDeviceIoStatus iostatus;
+ char device_name[32];
+ HBitmap *dirty_bitmap;
+ int in_use; /* users other than guest access, eg. block migration */
+ QTAILQ_ENTRY(BlockDriverState) list;
+
+ QLIST_HEAD(, BdrvTrackedRequest) tracked_requests;
+
+ /* long-running background operation */
+ BlockJob *job;
+
+ QDict *options;
+};
+
+int get_tmp_filename(char *filename, int size);
+
+void bdrv_set_io_limits(BlockDriverState *bs,
+ BlockIOLimit *io_limits);
+
+/**
+ * bdrv_add_before_write_notifier:
+ *
+ * Register a callback that is invoked before write requests are processed but
+ * after any throttling or waiting for overlapping requests.
+ */
+void bdrv_add_before_write_notifier(BlockDriverState *bs,
+ NotifierWithReturn *notifier);
+
+/**
+ * bdrv_get_aio_context:
+ *
+ * Returns: the currently bound #AioContext
+ */
+AioContext *bdrv_get_aio_context(BlockDriverState *bs);
+
+#ifdef _WIN32
+int is_windows_drive(const char *filename);
+#endif
+void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
+ enum MonitorEvent ev,
+ BlockErrorAction action, bool is_read);
+
+/**
+ * stream_start:
+ * @bs: Block device to operate on.
+ * @base: Block device that will become the new base, or %NULL to
+ * flatten the whole backing file chain onto @bs.
+ * @base_id: The file name that will be written to @bs as the new
+ * backing file if the job completes. Ignored if @base is %NULL.
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @on_error: The action to take upon error.
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ * @errp: Error object.
+ *
+ * Start a streaming operation on @bs. Clusters that are unallocated
+ * in @bs, but allocated in any image between @base and @bs (both
+ * exclusive) will be written to @bs. At the end of a successful
+ * streaming job, the backing file of @bs will be changed to
+ * @base_id in the written image and to @base in the live BlockDriverState.
+ */
+void stream_start(BlockDriverState *bs, BlockDriverState *base,
+ const char *base_id, int64_t speed, BlockdevOnError on_error,
+ BlockDriverCompletionFunc *cb,
+ void *opaque, Error **errp);
+
+/**
+ * commit_start:
+ * @bs: Top Block device
+ * @base: Block device that will be written into, and become the new top
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @on_error: The action to take upon error.
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ * @errp: Error object.
+ *
+ */
+void commit_start(BlockDriverState *bs, BlockDriverState *base,
+ BlockDriverState *top, int64_t speed,
+ BlockdevOnError on_error, BlockDriverCompletionFunc *cb,
+ void *opaque, Error **errp);
+
+/*
+ * mirror_start:
+ * @bs: Block device to operate on.
+ * @target: Block device to write to.
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @granularity: The chosen granularity for the dirty bitmap.
+ * @buf_size: The amount of data that can be in flight at one time.
+ * @mode: Whether to collapse all images in the chain to the target.
+ * @on_source_error: The action to take upon error reading from the source.
+ * @on_target_error: The action to take upon error writing to the target.
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ * @errp: Error object.
+ *
+ * Start a mirroring operation on @bs. Clusters that are allocated
+ * in @bs will be written to @bs until the job is cancelled or
+ * manually completed. At the end of a successful mirroring job,
+ * @bs will be switched to read from @target.
+ */
+void mirror_start(BlockDriverState *bs, BlockDriverState *target,
+ int64_t speed, int64_t granularity, int64_t buf_size,
+ MirrorSyncMode mode, BlockdevOnError on_source_error,
+ BlockdevOnError on_target_error,
+ BlockDriverCompletionFunc *cb,
+ void *opaque, Error **errp);
+
+/*
+ * backup_start:
+ * @bs: Block device to operate on.
+ * @target: Block device to write to.
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @on_source_error: The action to take upon error reading from the source.
+ * @on_target_error: The action to take upon error writing to the target.
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ *
+ * Start a backup operation on @bs. Clusters in @bs are written to @target
+ * until the job is cancelled or manually completed.
+ */
+void backup_start(BlockDriverState *bs, BlockDriverState *target,
+ int64_t speed, BlockdevOnError on_source_error,
+ BlockdevOnError on_target_error,
+ BlockDriverCompletionFunc *cb, void *opaque,
+ Error **errp);
+
+#endif /* BLOCK_INT_H */
diff --git a/contrib/qemu/include/block/blockjob.h b/contrib/qemu/include/block/blockjob.h
new file mode 100644
index 000000000..c290d07bb
--- /dev/null
+++ b/contrib/qemu/include/block/blockjob.h
@@ -0,0 +1,278 @@
+/*
+ * Declarations for long-running block device operations
+ *
+ * Copyright (c) 2011 IBM Corp.
+ * Copyright (c) 2012 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef BLOCKJOB_H
+#define BLOCKJOB_H 1
+
+#include "block/block.h"
+
+/**
+ * BlockJobType:
+ *
+ * A class type for block job objects.
+ */
+typedef struct BlockJobType {
+ /** Derived BlockJob struct size */
+ size_t instance_size;
+
+ /** String describing the operation, part of query-block-jobs QMP API */
+ const char *job_type;
+
+ /** Optional callback for job types that support setting a speed limit */
+ void (*set_speed)(BlockJob *job, int64_t speed, Error **errp);
+
+ /** Optional callback for job types that need to forward I/O status reset */
+ void (*iostatus_reset)(BlockJob *job);
+
+ /**
+ * Optional callback for job types whose completion must be triggered
+ * manually.
+ */
+ void (*complete)(BlockJob *job, Error **errp);
+} BlockJobType;
+
+/**
+ * BlockJob:
+ *
+ * Long-running operation on a BlockDriverState.
+ */
+struct BlockJob {
+ /** The job type, including the job vtable. */
+ const BlockJobType *job_type;
+
+ /** The block device on which the job is operating. */
+ BlockDriverState *bs;
+
+ /**
+ * The coroutine that executes the job. If not NULL, it is
+ * reentered when busy is false and the job is cancelled.
+ */
+ Coroutine *co;
+
+ /**
+ * Set to true if the job should cancel itself. The flag must
+ * always be tested just before toggling the busy flag from false
+ * to true. After a job has been cancelled, it should only yield
+ * if #qemu_aio_wait will ("sooner or later") reenter the coroutine.
+ */
+ bool cancelled;
+
+ /**
+ * Set to true if the job is either paused, or will pause itself
+ * as soon as possible (if busy == true).
+ */
+ bool paused;
+
+ /**
+ * Set to false by the job while it is in a quiescent state, where
+ * no I/O is pending and the job has yielded on any condition
+ * that is not detected by #qemu_aio_wait, such as a timer.
+ */
+ bool busy;
+
+ /** Status that is published by the query-block-jobs QMP API */
+ BlockDeviceIoStatus iostatus;
+
+ /** Offset that is published by the query-block-jobs QMP API */
+ int64_t offset;
+
+ /** Length that is published by the query-block-jobs QMP API */
+ int64_t len;
+
+ /** Speed that was set with @block_job_set_speed. */
+ int64_t speed;
+
+ /** The completion function that will be called when the job completes. */
+ BlockDriverCompletionFunc *cb;
+
+ /** The opaque value that is passed to the completion function. */
+ void *opaque;
+};
+
+/**
+ * block_job_create:
+ * @job_type: The class object for the newly-created job.
+ * @bs: The block
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ * @errp: Error object.
+ *
+ * Create a new long-running block device job and return it. The job
+ * will call @cb asynchronously when the job completes. Note that
+ * @bs may have been closed at the time the @cb it is called. If
+ * this is the case, the job may be reported as either cancelled or
+ * completed.
+ *
+ * This function is not part of the public job interface; it should be
+ * called from a wrapper that is specific to the job type.
+ */
+void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
+ int64_t speed, BlockDriverCompletionFunc *cb,
+ void *opaque, Error **errp);
+
+/**
+ * block_job_sleep_ns:
+ * @job: The job that calls the function.
+ * @clock: The clock to sleep on.
+ * @ns: How many nanoseconds to stop for.
+ *
+ * Put the job to sleep (assuming that it wasn't canceled) for @ns
+ * nanoseconds. Canceling the job will interrupt the wait immediately.
+ */
+void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns);
+
+/**
+ * block_job_completed:
+ * @job: The job being completed.
+ * @ret: The status code.
+ *
+ * Call the completion function that was registered at creation time, and
+ * free @job.
+ */
+void block_job_completed(BlockJob *job, int ret);
+
+/**
+ * block_job_set_speed:
+ * @job: The job to set the speed for.
+ * @speed: The new value
+ * @errp: Error object.
+ *
+ * Set a rate-limiting parameter for the job; the actual meaning may
+ * vary depending on the job type.
+ */
+void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp);
+
+/**
+ * block_job_cancel:
+ * @job: The job to be canceled.
+ *
+ * Asynchronously cancel the specified job.
+ */
+void block_job_cancel(BlockJob *job);
+
+/**
+ * block_job_complete:
+ * @job: The job to be completed.
+ * @errp: Error object.
+ *
+ * Asynchronously complete the specified job.
+ */
+void block_job_complete(BlockJob *job, Error **errp);
+
+/**
+ * block_job_is_cancelled:
+ * @job: The job being queried.
+ *
+ * Returns whether the job is scheduled for cancellation.
+ */
+bool block_job_is_cancelled(BlockJob *job);
+
+/**
+ * block_job_query:
+ * @job: The job to get information about.
+ *
+ * Return information about a job.
+ */
+BlockJobInfo *block_job_query(BlockJob *job);
+
+/**
+ * block_job_pause:
+ * @job: The job to be paused.
+ *
+ * Asynchronously pause the specified job.
+ */
+void block_job_pause(BlockJob *job);
+
+/**
+ * block_job_resume:
+ * @job: The job to be resumed.
+ *
+ * Resume the specified job.
+ */
+void block_job_resume(BlockJob *job);
+
+/**
+ * qobject_from_block_job:
+ * @job: The job whose information is requested.
+ *
+ * Return a QDict corresponding to @job's query-block-jobs entry.
+ */
+QObject *qobject_from_block_job(BlockJob *job);
+
+/**
+ * block_job_ready:
+ * @job: The job which is now ready to complete.
+ *
+ * Send a BLOCK_JOB_READY event for the specified job.
+ */
+void block_job_ready(BlockJob *job);
+
+/**
+ * block_job_is_paused:
+ * @job: The job being queried.
+ *
+ * Returns whether the job is currently paused, or will pause
+ * as soon as it reaches a sleeping point.
+ */
+bool block_job_is_paused(BlockJob *job);
+
+/**
+ * block_job_cancel_sync:
+ * @job: The job to be canceled.
+ *
+ * Synchronously cancel the job. The completion callback is called
+ * before the function returns. The job may actually complete
+ * instead of canceling itself; the circumstances under which this
+ * happens depend on the kind of job that is active.
+ *
+ * Returns the return value from the job if the job actually completed
+ * during the call, or -ECANCELED if it was canceled.
+ */
+int block_job_cancel_sync(BlockJob *job);
+
+/**
+ * block_job_iostatus_reset:
+ * @job: The job whose I/O status should be reset.
+ *
+ * Reset I/O status on @job and on BlockDriverState objects it uses,
+ * other than job->bs.
+ */
+void block_job_iostatus_reset(BlockJob *job);
+
+/**
+ * block_job_error_action:
+ * @job: The job to signal an error for.
+ * @bs: The block device on which to set an I/O error.
+ * @on_err: The error action setting.
+ * @is_read: Whether the operation was a read.
+ * @error: The error that was reported.
+ *
+ * Report an I/O error for a block job and possibly stop the VM. Return the
+ * action that was selected based on @on_err and @error.
+ */
+BlockErrorAction block_job_error_action(BlockJob *job, BlockDriverState *bs,
+ BlockdevOnError on_err,
+ int is_read, int error);
+#endif
diff --git a/contrib/qemu/include/block/coroutine.h b/contrib/qemu/include/block/coroutine.h
new file mode 100644
index 000000000..377805a3b
--- /dev/null
+++ b/contrib/qemu/include/block/coroutine.h
@@ -0,0 +1,218 @@
+/*
+ * QEMU coroutine implementation
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
+ * Kevin Wolf <kwolf@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_COROUTINE_H
+#define QEMU_COROUTINE_H
+
+#include <stdbool.h>
+#include "qemu/queue.h"
+#include "qemu/timer.h"
+
+/**
+ * Coroutines are a mechanism for stack switching and can be used for
+ * cooperative userspace threading. These functions provide a simple but
+ * useful flavor of coroutines that is suitable for writing sequential code,
+ * rather than callbacks, for operations that need to give up control while
+ * waiting for events to complete.
+ *
+ * These functions are re-entrant and may be used outside the global mutex.
+ */
+
+/**
+ * Mark a function that executes in coroutine context
+ *
+ * Functions that execute in coroutine context cannot be called directly from
+ * normal functions. In the future it would be nice to enable compiler or
+ * static checker support for catching such errors. This annotation might make
+ * it possible and in the meantime it serves as documentation.
+ *
+ * For example:
+ *
+ * static void coroutine_fn foo(void) {
+ * ....
+ * }
+ */
+#define coroutine_fn
+
+typedef struct Coroutine Coroutine;
+
+/**
+ * Coroutine entry point
+ *
+ * When the coroutine is entered for the first time, opaque is passed in as an
+ * argument.
+ *
+ * When this function returns, the coroutine is destroyed automatically and
+ * execution continues in the caller who last entered the coroutine.
+ */
+typedef void coroutine_fn CoroutineEntry(void *opaque);
+
+/**
+ * Create a new coroutine
+ *
+ * Use qemu_coroutine_enter() to actually transfer control to the coroutine.
+ */
+Coroutine *qemu_coroutine_create(CoroutineEntry *entry);
+
+/**
+ * Transfer control to a coroutine
+ *
+ * The opaque argument is passed as the argument to the entry point when
+ * entering the coroutine for the first time. It is subsequently ignored.
+ */
+void qemu_coroutine_enter(Coroutine *coroutine, void *opaque);
+
+/**
+ * Transfer control back to a coroutine's caller
+ *
+ * This function does not return until the coroutine is re-entered using
+ * qemu_coroutine_enter().
+ */
+void coroutine_fn qemu_coroutine_yield(void);
+
+/**
+ * Get the currently executing coroutine
+ */
+Coroutine *coroutine_fn qemu_coroutine_self(void);
+
+/**
+ * Return whether or not currently inside a coroutine
+ *
+ * This can be used to write functions that work both when in coroutine context
+ * and when not in coroutine context. Note that such functions cannot use the
+ * coroutine_fn annotation since they work outside coroutine context.
+ */
+bool qemu_in_coroutine(void);
+
+
+
+/**
+ * CoQueues are a mechanism to queue coroutines in order to continue executing
+ * them later. They provide the fundamental primitives on which coroutine locks
+ * are built.
+ */
+typedef struct CoQueue {
+ QTAILQ_HEAD(, Coroutine) entries;
+ AioContext *ctx;
+} CoQueue;
+
+/**
+ * Initialise a CoQueue. This must be called before any other operation is used
+ * on the CoQueue.
+ */
+void qemu_co_queue_init(CoQueue *queue);
+
+/**
+ * Adds the current coroutine to the CoQueue and transfers control to the
+ * caller of the coroutine.
+ */
+void coroutine_fn qemu_co_queue_wait(CoQueue *queue);
+
+/**
+ * Adds the current coroutine to the head of the CoQueue and transfers control to the
+ * caller of the coroutine.
+ */
+void coroutine_fn qemu_co_queue_wait_insert_head(CoQueue *queue);
+
+/**
+ * Restarts the next coroutine in the CoQueue and removes it from the queue.
+ *
+ * Returns true if a coroutine was restarted, false if the queue is empty.
+ */
+bool qemu_co_queue_next(CoQueue *queue);
+
+/**
+ * Restarts all coroutines in the CoQueue and leaves the queue empty.
+ */
+void qemu_co_queue_restart_all(CoQueue *queue);
+
+/**
+ * Checks if the CoQueue is empty.
+ */
+bool qemu_co_queue_empty(CoQueue *queue);
+
+
+/**
+ * Provides a mutex that can be used to synchronise coroutines
+ */
+typedef struct CoMutex {
+ bool locked;
+ CoQueue queue;
+} CoMutex;
+
+/**
+ * Initialises a CoMutex. This must be called before any other operation is used
+ * on the CoMutex.
+ */
+void qemu_co_mutex_init(CoMutex *mutex);
+
+/**
+ * Locks the mutex. If the lock cannot be taken immediately, control is
+ * transferred to the caller of the current coroutine.
+ */
+void coroutine_fn qemu_co_mutex_lock(CoMutex *mutex);
+
+/**
+ * Unlocks the mutex and schedules the next coroutine that was waiting for this
+ * lock to be run.
+ */
+void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex);
+
+typedef struct CoRwlock {
+ bool writer;
+ int reader;
+ CoQueue queue;
+} CoRwlock;
+
+/**
+ * Initialises a CoRwlock. This must be called before any other operation
+ * is used on the CoRwlock
+ */
+void qemu_co_rwlock_init(CoRwlock *lock);
+
+/**
+ * Read locks the CoRwlock. If the lock cannot be taken immediately because
+ * of a parallel writer, control is transferred to the caller of the current
+ * coroutine.
+ */
+void qemu_co_rwlock_rdlock(CoRwlock *lock);
+
+/**
+ * Write Locks the mutex. If the lock cannot be taken immediately because
+ * of a parallel reader, control is transferred to the caller of the current
+ * coroutine.
+ */
+void qemu_co_rwlock_wrlock(CoRwlock *lock);
+
+/**
+ * Unlocks the read/write lock and schedules the next coroutine that was
+ * waiting for this lock to be run.
+ */
+void qemu_co_rwlock_unlock(CoRwlock *lock);
+
+/**
+ * Yield the coroutine for a given duration
+ *
+ * Note this function uses timers and hence only works when a main loop is in
+ * use. See main-loop.h and do not use from qemu-tool programs.
+ */
+void coroutine_fn co_sleep_ns(QEMUClock *clock, int64_t ns);
+
+/**
+ * Yield until a file descriptor becomes readable
+ *
+ * Note that this function clobbers the handlers for the file descriptor.
+ */
+void coroutine_fn yield_until_fd_readable(int fd);
+#endif /* QEMU_COROUTINE_H */
diff --git a/contrib/qemu/include/block/coroutine_int.h b/contrib/qemu/include/block/coroutine_int.h
new file mode 100644
index 000000000..f133d65af
--- /dev/null
+++ b/contrib/qemu/include/block/coroutine_int.h
@@ -0,0 +1,53 @@
+/*
+ * Coroutine internals
+ *
+ * Copyright (c) 2011 Kevin Wolf <kwolf@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef QEMU_COROUTINE_INT_H
+#define QEMU_COROUTINE_INT_H
+
+#include "qemu/queue.h"
+#include "block/coroutine.h"
+
+typedef enum {
+ COROUTINE_YIELD = 1,
+ COROUTINE_TERMINATE = 2,
+} CoroutineAction;
+
+struct Coroutine {
+ CoroutineEntry *entry;
+ void *entry_arg;
+ Coroutine *caller;
+ QSLIST_ENTRY(Coroutine) pool_next;
+
+ /* Coroutines that should be woken up when we yield or terminate */
+ QTAILQ_HEAD(, Coroutine) co_queue_wakeup;
+ QTAILQ_ENTRY(Coroutine) co_queue_next;
+};
+
+Coroutine *qemu_coroutine_new(void);
+void qemu_coroutine_delete(Coroutine *co);
+CoroutineAction qemu_coroutine_switch(Coroutine *from, Coroutine *to,
+ CoroutineAction action);
+void coroutine_fn qemu_co_queue_run_restart(Coroutine *co);
+
+#endif
diff --git a/contrib/qemu/include/block/snapshot.h b/contrib/qemu/include/block/snapshot.h
new file mode 100644
index 000000000..eaf61f032
--- /dev/null
+++ b/contrib/qemu/include/block/snapshot.h
@@ -0,0 +1,53 @@
+/*
+ * Block layer snapshot related functions
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef SNAPSHOT_H
+#define SNAPSHOT_H
+
+#include "qemu-common.h"
+
+typedef struct QEMUSnapshotInfo {
+ char id_str[128]; /* unique snapshot id */
+ /* the following fields are informative. They are not needed for
+ the consistency of the snapshot */
+ char name[256]; /* user chosen name */
+ uint64_t vm_state_size; /* VM state info size */
+ uint32_t date_sec; /* UTC date of the snapshot */
+ uint32_t date_nsec;
+ uint64_t vm_clock_nsec; /* VM clock relative to boot */
+} QEMUSnapshotInfo;
+
+int bdrv_snapshot_find(BlockDriverState *bs, QEMUSnapshotInfo *sn_info,
+ const char *name);
+int bdrv_can_snapshot(BlockDriverState *bs);
+int bdrv_snapshot_create(BlockDriverState *bs,
+ QEMUSnapshotInfo *sn_info);
+int bdrv_snapshot_goto(BlockDriverState *bs,
+ const char *snapshot_id);
+int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id);
+int bdrv_snapshot_list(BlockDriverState *bs,
+ QEMUSnapshotInfo **psn_info);
+int bdrv_snapshot_load_tmp(BlockDriverState *bs,
+ const char *snapshot_name);
+#endif
diff --git a/contrib/qemu/include/config.h b/contrib/qemu/include/config.h
new file mode 100644
index 000000000..e20f78696
--- /dev/null
+++ b/contrib/qemu/include/config.h
@@ -0,0 +1,2 @@
+#include "config-host.h"
+#include "config-target.h"
diff --git a/contrib/qemu/include/exec/cpu-common.h b/contrib/qemu/include/exec/cpu-common.h
new file mode 100644
index 000000000..e4996e19c
--- /dev/null
+++ b/contrib/qemu/include/exec/cpu-common.h
@@ -0,0 +1,124 @@
+#ifndef CPU_COMMON_H
+#define CPU_COMMON_H 1
+
+/* CPU interfaces that are target independent. */
+
+#ifndef CONFIG_USER_ONLY
+#include "exec/hwaddr.h"
+#endif
+
+#ifndef NEED_CPU_H
+#include "exec/poison.h"
+#endif
+
+#include "qemu/bswap.h"
+#include "qemu/queue.h"
+
+/**
+ * CPUListState:
+ * @cpu_fprintf: Print function.
+ * @file: File to print to using @cpu_fprint.
+ *
+ * State commonly used for iterating over CPU models.
+ */
+typedef struct CPUListState {
+ fprintf_function cpu_fprintf;
+ FILE *file;
+} CPUListState;
+
+#if !defined(CONFIG_USER_ONLY)
+
+enum device_endian {
+ DEVICE_NATIVE_ENDIAN,
+ DEVICE_BIG_ENDIAN,
+ DEVICE_LITTLE_ENDIAN,
+};
+
+/* address in the RAM (different from a physical address) */
+#if defined(CONFIG_XEN_BACKEND)
+typedef uint64_t ram_addr_t;
+# define RAM_ADDR_MAX UINT64_MAX
+# define RAM_ADDR_FMT "%" PRIx64
+#else
+typedef uintptr_t ram_addr_t;
+# define RAM_ADDR_MAX UINTPTR_MAX
+# define RAM_ADDR_FMT "%" PRIxPTR
+#endif
+
+/* memory API */
+
+typedef void CPUWriteMemoryFunc(void *opaque, hwaddr addr, uint32_t value);
+typedef uint32_t CPUReadMemoryFunc(void *opaque, hwaddr addr);
+
+void qemu_ram_remap(ram_addr_t addr, ram_addr_t length);
+/* This should not be used by devices. */
+MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr);
+void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev);
+
+void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
+ int len, int is_write);
+static inline void cpu_physical_memory_read(hwaddr addr,
+ void *buf, int len)
+{
+ cpu_physical_memory_rw(addr, buf, len, 0);
+}
+static inline void cpu_physical_memory_write(hwaddr addr,
+ const void *buf, int len)
+{
+ cpu_physical_memory_rw(addr, (void *)buf, len, 1);
+}
+void *cpu_physical_memory_map(hwaddr addr,
+ hwaddr *plen,
+ int is_write);
+void cpu_physical_memory_unmap(void *buffer, hwaddr len,
+ int is_write, hwaddr access_len);
+void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque));
+
+bool cpu_physical_memory_is_io(hwaddr phys_addr);
+
+/* Coalesced MMIO regions are areas where write operations can be reordered.
+ * This usually implies that write operations are side-effect free. This allows
+ * batching which can make a major impact on performance when using
+ * virtualization.
+ */
+void qemu_flush_coalesced_mmio_buffer(void);
+
+uint32_t ldub_phys(hwaddr addr);
+uint32_t lduw_le_phys(hwaddr addr);
+uint32_t lduw_be_phys(hwaddr addr);
+uint32_t ldl_le_phys(hwaddr addr);
+uint32_t ldl_be_phys(hwaddr addr);
+uint64_t ldq_le_phys(hwaddr addr);
+uint64_t ldq_be_phys(hwaddr addr);
+void stb_phys(hwaddr addr, uint32_t val);
+void stw_le_phys(hwaddr addr, uint32_t val);
+void stw_be_phys(hwaddr addr, uint32_t val);
+void stl_le_phys(hwaddr addr, uint32_t val);
+void stl_be_phys(hwaddr addr, uint32_t val);
+void stq_le_phys(hwaddr addr, uint64_t val);
+void stq_be_phys(hwaddr addr, uint64_t val);
+
+#ifdef NEED_CPU_H
+uint32_t lduw_phys(hwaddr addr);
+uint32_t ldl_phys(hwaddr addr);
+uint64_t ldq_phys(hwaddr addr);
+void stl_phys_notdirty(hwaddr addr, uint32_t val);
+void stw_phys(hwaddr addr, uint32_t val);
+void stl_phys(hwaddr addr, uint32_t val);
+void stq_phys(hwaddr addr, uint64_t val);
+#endif
+
+void cpu_physical_memory_write_rom(hwaddr addr,
+ const uint8_t *buf, int len);
+
+extern struct MemoryRegion io_mem_rom;
+extern struct MemoryRegion io_mem_notdirty;
+
+typedef void (RAMBlockIterFunc)(void *host_addr,
+ ram_addr_t offset, ram_addr_t length, void *opaque);
+
+void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque);
+
+#endif
+
+#endif /* !CPU_COMMON_H */
diff --git a/contrib/qemu/include/exec/hwaddr.h b/contrib/qemu/include/exec/hwaddr.h
new file mode 100644
index 000000000..c9eb78fba
--- /dev/null
+++ b/contrib/qemu/include/exec/hwaddr.h
@@ -0,0 +1,20 @@
+/* Define hwaddr if it exists. */
+
+#ifndef HWADDR_H
+#define HWADDR_H
+
+#define HWADDR_BITS 64
+/* hwaddr is the type of a physical address (its size can
+ be different from 'target_ulong'). */
+
+typedef uint64_t hwaddr;
+#define HWADDR_MAX UINT64_MAX
+#define TARGET_FMT_plx "%016" PRIx64
+#define HWADDR_PRId PRId64
+#define HWADDR_PRIi PRIi64
+#define HWADDR_PRIo PRIo64
+#define HWADDR_PRIu PRIu64
+#define HWADDR_PRIx PRIx64
+#define HWADDR_PRIX PRIX64
+
+#endif
diff --git a/contrib/qemu/include/exec/poison.h b/contrib/qemu/include/exec/poison.h
new file mode 100644
index 000000000..2341a7504
--- /dev/null
+++ b/contrib/qemu/include/exec/poison.h
@@ -0,0 +1,63 @@
+/* Poison identifiers that should not be used when building
+ target independent device code. */
+
+#ifndef HW_POISON_H
+#define HW_POISON_H
+#ifdef __GNUC__
+
+#pragma GCC poison TARGET_I386
+#pragma GCC poison TARGET_X86_64
+#pragma GCC poison TARGET_ALPHA
+#pragma GCC poison TARGET_ARM
+#pragma GCC poison TARGET_CRIS
+#pragma GCC poison TARGET_LM32
+#pragma GCC poison TARGET_M68K
+#pragma GCC poison TARGET_MIPS
+#pragma GCC poison TARGET_MIPS64
+#pragma GCC poison TARGET_OPENRISC
+#pragma GCC poison TARGET_PPC
+#pragma GCC poison TARGET_PPCEMB
+#pragma GCC poison TARGET_PPC64
+#pragma GCC poison TARGET_ABI32
+#pragma GCC poison TARGET_SH4
+#pragma GCC poison TARGET_SPARC
+#pragma GCC poison TARGET_SPARC64
+
+#pragma GCC poison TARGET_WORDS_BIGENDIAN
+#pragma GCC poison BSWAP_NEEDED
+
+#pragma GCC poison TARGET_LONG_BITS
+#pragma GCC poison TARGET_FMT_lx
+#pragma GCC poison TARGET_FMT_ld
+
+#pragma GCC poison TARGET_PAGE_SIZE
+#pragma GCC poison TARGET_PAGE_MASK
+#pragma GCC poison TARGET_PAGE_BITS
+#pragma GCC poison TARGET_PAGE_ALIGN
+
+#pragma GCC poison CPUArchState
+#pragma GCC poison env
+
+#pragma GCC poison lduw_phys
+#pragma GCC poison ldl_phys
+#pragma GCC poison ldq_phys
+#pragma GCC poison stl_phys_notdirty
+#pragma GCC poison stw_phys
+#pragma GCC poison stl_phys
+#pragma GCC poison stq_phys
+
+#pragma GCC poison CPU_INTERRUPT_HARD
+#pragma GCC poison CPU_INTERRUPT_EXITTB
+#pragma GCC poison CPU_INTERRUPT_HALT
+#pragma GCC poison CPU_INTERRUPT_DEBUG
+#pragma GCC poison CPU_INTERRUPT_TGT_EXT_0
+#pragma GCC poison CPU_INTERRUPT_TGT_EXT_1
+#pragma GCC poison CPU_INTERRUPT_TGT_EXT_2
+#pragma GCC poison CPU_INTERRUPT_TGT_EXT_3
+#pragma GCC poison CPU_INTERRUPT_TGT_EXT_4
+#pragma GCC poison CPU_INTERRUPT_TGT_INT_0
+#pragma GCC poison CPU_INTERRUPT_TGT_INT_1
+#pragma GCC poison CPU_INTERRUPT_TGT_INT_2
+
+#endif
+#endif
diff --git a/contrib/qemu/include/fpu/softfloat.h b/contrib/qemu/include/fpu/softfloat.h
new file mode 100644
index 000000000..f3927e241
--- /dev/null
+++ b/contrib/qemu/include/fpu/softfloat.h
@@ -0,0 +1,641 @@
+/*
+ * QEMU float support
+ *
+ * Derived from SoftFloat.
+ */
+
+/*============================================================================
+
+This C header file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
+Package, Release 2b.
+
+Written by John R. Hauser. This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704. Funding was partially provided by the
+National Science Foundation under grant MIP-9311980. The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek. More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+
+=============================================================================*/
+
+#ifndef SOFTFLOAT_H
+#define SOFTFLOAT_H
+
+#if defined(CONFIG_SOLARIS) && defined(CONFIG_NEEDS_LIBSUNMATH)
+#include <sunmath.h>
+#endif
+
+#include <inttypes.h>
+#include "config-host.h"
+#include "qemu/osdep.h"
+
+/*----------------------------------------------------------------------------
+| Each of the following `typedef's defines the most convenient type that holds
+| integers of at least as many bits as specified. For example, `uint8' should
+| be the most convenient type that can hold unsigned integers of as many as
+| 8 bits. The `flag' type must be able to hold either a 0 or 1. For most
+| implementations of C, `flag', `uint8', and `int8' should all be `typedef'ed
+| to the same as `int'.
+*----------------------------------------------------------------------------*/
+typedef uint8_t flag;
+typedef uint8_t uint8;
+typedef int8_t int8;
+typedef unsigned int uint32;
+typedef signed int int32;
+typedef uint64_t uint64;
+typedef int64_t int64;
+
+#define LIT64( a ) a##LL
+#define INLINE static inline
+
+#define STATUS_PARAM , float_status *status
+#define STATUS(field) status->field
+#define STATUS_VAR , status
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point ordering relations
+*----------------------------------------------------------------------------*/
+enum {
+ float_relation_less = -1,
+ float_relation_equal = 0,
+ float_relation_greater = 1,
+ float_relation_unordered = 2
+};
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point types.
+*----------------------------------------------------------------------------*/
+/* Use structures for soft-float types. This prevents accidentally mixing
+ them with native int/float types. A sufficiently clever compiler and
+ sane ABI should be able to see though these structs. However
+ x86/gcc 3.x seems to struggle a bit, so leave them disabled by default. */
+//#define USE_SOFTFLOAT_STRUCT_TYPES
+#ifdef USE_SOFTFLOAT_STRUCT_TYPES
+typedef struct {
+ uint16_t v;
+} float16;
+#define float16_val(x) (((float16)(x)).v)
+#define make_float16(x) __extension__ ({ float16 f16_val = {x}; f16_val; })
+#define const_float16(x) { x }
+typedef struct {
+ uint32_t v;
+} float32;
+/* The cast ensures an error if the wrong type is passed. */
+#define float32_val(x) (((float32)(x)).v)
+#define make_float32(x) __extension__ ({ float32 f32_val = {x}; f32_val; })
+#define const_float32(x) { x }
+typedef struct {
+ uint64_t v;
+} float64;
+#define float64_val(x) (((float64)(x)).v)
+#define make_float64(x) __extension__ ({ float64 f64_val = {x}; f64_val; })
+#define const_float64(x) { x }
+#else
+typedef uint16_t float16;
+typedef uint32_t float32;
+typedef uint64_t float64;
+#define float16_val(x) (x)
+#define float32_val(x) (x)
+#define float64_val(x) (x)
+#define make_float16(x) (x)
+#define make_float32(x) (x)
+#define make_float64(x) (x)
+#define const_float16(x) (x)
+#define const_float32(x) (x)
+#define const_float64(x) (x)
+#endif
+typedef struct {
+ uint64_t low;
+ uint16_t high;
+} floatx80;
+#define make_floatx80(exp, mant) ((floatx80) { mant, exp })
+#define make_floatx80_init(exp, mant) { .low = mant, .high = exp }
+typedef struct {
+#ifdef HOST_WORDS_BIGENDIAN
+ uint64_t high, low;
+#else
+ uint64_t low, high;
+#endif
+} float128;
+#define make_float128(high_, low_) ((float128) { .high = high_, .low = low_ })
+#define make_float128_init(high_, low_) { .high = high_, .low = low_ }
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point underflow tininess-detection mode.
+*----------------------------------------------------------------------------*/
+enum {
+ float_tininess_after_rounding = 0,
+ float_tininess_before_rounding = 1
+};
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point rounding mode.
+*----------------------------------------------------------------------------*/
+enum {
+ float_round_nearest_even = 0,
+ float_round_down = 1,
+ float_round_up = 2,
+ float_round_to_zero = 3
+};
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point exception flags.
+*----------------------------------------------------------------------------*/
+enum {
+ float_flag_invalid = 1,
+ float_flag_divbyzero = 4,
+ float_flag_overflow = 8,
+ float_flag_underflow = 16,
+ float_flag_inexact = 32,
+ float_flag_input_denormal = 64,
+ float_flag_output_denormal = 128
+};
+
+typedef struct float_status {
+ signed char float_detect_tininess;
+ signed char float_rounding_mode;
+ signed char float_exception_flags;
+ signed char floatx80_rounding_precision;
+ /* should denormalised results go to zero and set the inexact flag? */
+ flag flush_to_zero;
+ /* should denormalised inputs go to zero and set the input_denormal flag? */
+ flag flush_inputs_to_zero;
+ flag default_nan_mode;
+} float_status;
+
+void set_float_rounding_mode(int val STATUS_PARAM);
+void set_float_exception_flags(int val STATUS_PARAM);
+INLINE void set_float_detect_tininess(int val STATUS_PARAM)
+{
+ STATUS(float_detect_tininess) = val;
+}
+INLINE void set_flush_to_zero(flag val STATUS_PARAM)
+{
+ STATUS(flush_to_zero) = val;
+}
+INLINE void set_flush_inputs_to_zero(flag val STATUS_PARAM)
+{
+ STATUS(flush_inputs_to_zero) = val;
+}
+INLINE void set_default_nan_mode(flag val STATUS_PARAM)
+{
+ STATUS(default_nan_mode) = val;
+}
+INLINE int get_float_exception_flags(float_status *status)
+{
+ return STATUS(float_exception_flags);
+}
+void set_floatx80_rounding_precision(int val STATUS_PARAM);
+
+/*----------------------------------------------------------------------------
+| Routine to raise any or all of the software IEC/IEEE floating-point
+| exception flags.
+*----------------------------------------------------------------------------*/
+void float_raise( int8 flags STATUS_PARAM);
+
+/*----------------------------------------------------------------------------
+| Options to indicate which negations to perform in float*_muladd()
+| Using these differs from negating an input or output before calling
+| the muladd function in that this means that a NaN doesn't have its
+| sign bit inverted before it is propagated.
+*----------------------------------------------------------------------------*/
+enum {
+ float_muladd_negate_c = 1,
+ float_muladd_negate_product = 2,
+ float_muladd_negate_result = 4,
+};
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE integer-to-floating-point conversion routines.
+*----------------------------------------------------------------------------*/
+float32 int32_to_float32( int32 STATUS_PARAM );
+float64 int32_to_float64( int32 STATUS_PARAM );
+float32 uint32_to_float32( uint32 STATUS_PARAM );
+float64 uint32_to_float64( uint32 STATUS_PARAM );
+floatx80 int32_to_floatx80( int32 STATUS_PARAM );
+float128 int32_to_float128( int32 STATUS_PARAM );
+float32 int64_to_float32( int64 STATUS_PARAM );
+float32 uint64_to_float32( uint64 STATUS_PARAM );
+float64 int64_to_float64( int64 STATUS_PARAM );
+float64 uint64_to_float64( uint64 STATUS_PARAM );
+floatx80 int64_to_floatx80( int64 STATUS_PARAM );
+float128 int64_to_float128( int64 STATUS_PARAM );
+float128 uint64_to_float128( uint64 STATUS_PARAM );
+
+/*----------------------------------------------------------------------------
+| Software half-precision conversion routines.
+*----------------------------------------------------------------------------*/
+float16 float32_to_float16( float32, flag STATUS_PARAM );
+float32 float16_to_float32( float16, flag STATUS_PARAM );
+
+/*----------------------------------------------------------------------------
+| Software half-precision operations.
+*----------------------------------------------------------------------------*/
+int float16_is_quiet_nan( float16 );
+int float16_is_signaling_nan( float16 );
+float16 float16_maybe_silence_nan( float16 );
+
+INLINE int float16_is_any_nan(float16 a)
+{
+ return ((float16_val(a) & ~0x8000) > 0x7c00);
+}
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated half-precision NaN.
+*----------------------------------------------------------------------------*/
+extern const float16 float16_default_nan;
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE single-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int_fast16_t float32_to_int16_round_to_zero(float32 STATUS_PARAM);
+uint_fast16_t float32_to_uint16_round_to_zero(float32 STATUS_PARAM);
+int32 float32_to_int32( float32 STATUS_PARAM );
+int32 float32_to_int32_round_to_zero( float32 STATUS_PARAM );
+uint32 float32_to_uint32( float32 STATUS_PARAM );
+uint32 float32_to_uint32_round_to_zero( float32 STATUS_PARAM );
+int64 float32_to_int64( float32 STATUS_PARAM );
+int64 float32_to_int64_round_to_zero( float32 STATUS_PARAM );
+float64 float32_to_float64( float32 STATUS_PARAM );
+floatx80 float32_to_floatx80( float32 STATUS_PARAM );
+float128 float32_to_float128( float32 STATUS_PARAM );
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE single-precision operations.
+*----------------------------------------------------------------------------*/
+float32 float32_round_to_int( float32 STATUS_PARAM );
+float32 float32_add( float32, float32 STATUS_PARAM );
+float32 float32_sub( float32, float32 STATUS_PARAM );
+float32 float32_mul( float32, float32 STATUS_PARAM );
+float32 float32_div( float32, float32 STATUS_PARAM );
+float32 float32_rem( float32, float32 STATUS_PARAM );
+float32 float32_muladd(float32, float32, float32, int STATUS_PARAM);
+float32 float32_sqrt( float32 STATUS_PARAM );
+float32 float32_exp2( float32 STATUS_PARAM );
+float32 float32_log2( float32 STATUS_PARAM );
+int float32_eq( float32, float32 STATUS_PARAM );
+int float32_le( float32, float32 STATUS_PARAM );
+int float32_lt( float32, float32 STATUS_PARAM );
+int float32_unordered( float32, float32 STATUS_PARAM );
+int float32_eq_quiet( float32, float32 STATUS_PARAM );
+int float32_le_quiet( float32, float32 STATUS_PARAM );
+int float32_lt_quiet( float32, float32 STATUS_PARAM );
+int float32_unordered_quiet( float32, float32 STATUS_PARAM );
+int float32_compare( float32, float32 STATUS_PARAM );
+int float32_compare_quiet( float32, float32 STATUS_PARAM );
+float32 float32_min(float32, float32 STATUS_PARAM);
+float32 float32_max(float32, float32 STATUS_PARAM);
+int float32_is_quiet_nan( float32 );
+int float32_is_signaling_nan( float32 );
+float32 float32_maybe_silence_nan( float32 );
+float32 float32_scalbn( float32, int STATUS_PARAM );
+
+INLINE float32 float32_abs(float32 a)
+{
+ /* Note that abs does *not* handle NaN specially, nor does
+ * it flush denormal inputs to zero.
+ */
+ return make_float32(float32_val(a) & 0x7fffffff);
+}
+
+INLINE float32 float32_chs(float32 a)
+{
+ /* Note that chs does *not* handle NaN specially, nor does
+ * it flush denormal inputs to zero.
+ */
+ return make_float32(float32_val(a) ^ 0x80000000);
+}
+
+INLINE int float32_is_infinity(float32 a)
+{
+ return (float32_val(a) & 0x7fffffff) == 0x7f800000;
+}
+
+INLINE int float32_is_neg(float32 a)
+{
+ return float32_val(a) >> 31;
+}
+
+INLINE int float32_is_zero(float32 a)
+{
+ return (float32_val(a) & 0x7fffffff) == 0;
+}
+
+INLINE int float32_is_any_nan(float32 a)
+{
+ return ((float32_val(a) & ~(1 << 31)) > 0x7f800000UL);
+}
+
+INLINE int float32_is_zero_or_denormal(float32 a)
+{
+ return (float32_val(a) & 0x7f800000) == 0;
+}
+
+INLINE float32 float32_set_sign(float32 a, int sign)
+{
+ return make_float32((float32_val(a) & 0x7fffffff) | (sign << 31));
+}
+
+#define float32_zero make_float32(0)
+#define float32_one make_float32(0x3f800000)
+#define float32_ln2 make_float32(0x3f317218)
+#define float32_pi make_float32(0x40490fdb)
+#define float32_half make_float32(0x3f000000)
+#define float32_infinity make_float32(0x7f800000)
+
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated single-precision NaN.
+*----------------------------------------------------------------------------*/
+extern const float32 float32_default_nan;
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE double-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int_fast16_t float64_to_int16_round_to_zero(float64 STATUS_PARAM);
+uint_fast16_t float64_to_uint16_round_to_zero(float64 STATUS_PARAM);
+int32 float64_to_int32( float64 STATUS_PARAM );
+int32 float64_to_int32_round_to_zero( float64 STATUS_PARAM );
+uint32 float64_to_uint32( float64 STATUS_PARAM );
+uint32 float64_to_uint32_round_to_zero( float64 STATUS_PARAM );
+int64 float64_to_int64( float64 STATUS_PARAM );
+int64 float64_to_int64_round_to_zero( float64 STATUS_PARAM );
+uint64 float64_to_uint64 (float64 a STATUS_PARAM);
+uint64 float64_to_uint64_round_to_zero (float64 a STATUS_PARAM);
+float32 float64_to_float32( float64 STATUS_PARAM );
+floatx80 float64_to_floatx80( float64 STATUS_PARAM );
+float128 float64_to_float128( float64 STATUS_PARAM );
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE double-precision operations.
+*----------------------------------------------------------------------------*/
+float64 float64_round_to_int( float64 STATUS_PARAM );
+float64 float64_trunc_to_int( float64 STATUS_PARAM );
+float64 float64_add( float64, float64 STATUS_PARAM );
+float64 float64_sub( float64, float64 STATUS_PARAM );
+float64 float64_mul( float64, float64 STATUS_PARAM );
+float64 float64_div( float64, float64 STATUS_PARAM );
+float64 float64_rem( float64, float64 STATUS_PARAM );
+float64 float64_muladd(float64, float64, float64, int STATUS_PARAM);
+float64 float64_sqrt( float64 STATUS_PARAM );
+float64 float64_log2( float64 STATUS_PARAM );
+int float64_eq( float64, float64 STATUS_PARAM );
+int float64_le( float64, float64 STATUS_PARAM );
+int float64_lt( float64, float64 STATUS_PARAM );
+int float64_unordered( float64, float64 STATUS_PARAM );
+int float64_eq_quiet( float64, float64 STATUS_PARAM );
+int float64_le_quiet( float64, float64 STATUS_PARAM );
+int float64_lt_quiet( float64, float64 STATUS_PARAM );
+int float64_unordered_quiet( float64, float64 STATUS_PARAM );
+int float64_compare( float64, float64 STATUS_PARAM );
+int float64_compare_quiet( float64, float64 STATUS_PARAM );
+float64 float64_min(float64, float64 STATUS_PARAM);
+float64 float64_max(float64, float64 STATUS_PARAM);
+int float64_is_quiet_nan( float64 a );
+int float64_is_signaling_nan( float64 );
+float64 float64_maybe_silence_nan( float64 );
+float64 float64_scalbn( float64, int STATUS_PARAM );
+
+INLINE float64 float64_abs(float64 a)
+{
+ /* Note that abs does *not* handle NaN specially, nor does
+ * it flush denormal inputs to zero.
+ */
+ return make_float64(float64_val(a) & 0x7fffffffffffffffLL);
+}
+
+INLINE float64 float64_chs(float64 a)
+{
+ /* Note that chs does *not* handle NaN specially, nor does
+ * it flush denormal inputs to zero.
+ */
+ return make_float64(float64_val(a) ^ 0x8000000000000000LL);
+}
+
+INLINE int float64_is_infinity(float64 a)
+{
+ return (float64_val(a) & 0x7fffffffffffffffLL ) == 0x7ff0000000000000LL;
+}
+
+INLINE int float64_is_neg(float64 a)
+{
+ return float64_val(a) >> 63;
+}
+
+INLINE int float64_is_zero(float64 a)
+{
+ return (float64_val(a) & 0x7fffffffffffffffLL) == 0;
+}
+
+INLINE int float64_is_any_nan(float64 a)
+{
+ return ((float64_val(a) & ~(1ULL << 63)) > 0x7ff0000000000000ULL);
+}
+
+INLINE int float64_is_zero_or_denormal(float64 a)
+{
+ return (float64_val(a) & 0x7ff0000000000000LL) == 0;
+}
+
+INLINE float64 float64_set_sign(float64 a, int sign)
+{
+ return make_float64((float64_val(a) & 0x7fffffffffffffffULL)
+ | ((int64_t)sign << 63));
+}
+
+#define float64_zero make_float64(0)
+#define float64_one make_float64(0x3ff0000000000000LL)
+#define float64_ln2 make_float64(0x3fe62e42fefa39efLL)
+#define float64_pi make_float64(0x400921fb54442d18LL)
+#define float64_half make_float64(0x3fe0000000000000LL)
+#define float64_infinity make_float64(0x7ff0000000000000LL)
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated double-precision NaN.
+*----------------------------------------------------------------------------*/
+extern const float64 float64_default_nan;
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE extended double-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int32 floatx80_to_int32( floatx80 STATUS_PARAM );
+int32 floatx80_to_int32_round_to_zero( floatx80 STATUS_PARAM );
+int64 floatx80_to_int64( floatx80 STATUS_PARAM );
+int64 floatx80_to_int64_round_to_zero( floatx80 STATUS_PARAM );
+float32 floatx80_to_float32( floatx80 STATUS_PARAM );
+float64 floatx80_to_float64( floatx80 STATUS_PARAM );
+float128 floatx80_to_float128( floatx80 STATUS_PARAM );
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE extended double-precision operations.
+*----------------------------------------------------------------------------*/
+floatx80 floatx80_round_to_int( floatx80 STATUS_PARAM );
+floatx80 floatx80_add( floatx80, floatx80 STATUS_PARAM );
+floatx80 floatx80_sub( floatx80, floatx80 STATUS_PARAM );
+floatx80 floatx80_mul( floatx80, floatx80 STATUS_PARAM );
+floatx80 floatx80_div( floatx80, floatx80 STATUS_PARAM );
+floatx80 floatx80_rem( floatx80, floatx80 STATUS_PARAM );
+floatx80 floatx80_sqrt( floatx80 STATUS_PARAM );
+int floatx80_eq( floatx80, floatx80 STATUS_PARAM );
+int floatx80_le( floatx80, floatx80 STATUS_PARAM );
+int floatx80_lt( floatx80, floatx80 STATUS_PARAM );
+int floatx80_unordered( floatx80, floatx80 STATUS_PARAM );
+int floatx80_eq_quiet( floatx80, floatx80 STATUS_PARAM );
+int floatx80_le_quiet( floatx80, floatx80 STATUS_PARAM );
+int floatx80_lt_quiet( floatx80, floatx80 STATUS_PARAM );
+int floatx80_unordered_quiet( floatx80, floatx80 STATUS_PARAM );
+int floatx80_compare( floatx80, floatx80 STATUS_PARAM );
+int floatx80_compare_quiet( floatx80, floatx80 STATUS_PARAM );
+int floatx80_is_quiet_nan( floatx80 );
+int floatx80_is_signaling_nan( floatx80 );
+floatx80 floatx80_maybe_silence_nan( floatx80 );
+floatx80 floatx80_scalbn( floatx80, int STATUS_PARAM );
+
+INLINE floatx80 floatx80_abs(floatx80 a)
+{
+ a.high &= 0x7fff;
+ return a;
+}
+
+INLINE floatx80 floatx80_chs(floatx80 a)
+{
+ a.high ^= 0x8000;
+ return a;
+}
+
+INLINE int floatx80_is_infinity(floatx80 a)
+{
+ return (a.high & 0x7fff) == 0x7fff && a.low == 0x8000000000000000LL;
+}
+
+INLINE int floatx80_is_neg(floatx80 a)
+{
+ return a.high >> 15;
+}
+
+INLINE int floatx80_is_zero(floatx80 a)
+{
+ return (a.high & 0x7fff) == 0 && a.low == 0;
+}
+
+INLINE int floatx80_is_zero_or_denormal(floatx80 a)
+{
+ return (a.high & 0x7fff) == 0;
+}
+
+INLINE int floatx80_is_any_nan(floatx80 a)
+{
+ return ((a.high & 0x7fff) == 0x7fff) && (a.low<<1);
+}
+
+#define floatx80_zero make_floatx80(0x0000, 0x0000000000000000LL)
+#define floatx80_one make_floatx80(0x3fff, 0x8000000000000000LL)
+#define floatx80_ln2 make_floatx80(0x3ffe, 0xb17217f7d1cf79acLL)
+#define floatx80_pi make_floatx80(0x4000, 0xc90fdaa22168c235LL)
+#define floatx80_half make_floatx80(0x3ffe, 0x8000000000000000LL)
+#define floatx80_infinity make_floatx80(0x7fff, 0x8000000000000000LL)
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated extended double-precision NaN.
+*----------------------------------------------------------------------------*/
+extern const floatx80 floatx80_default_nan;
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE quadruple-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int32 float128_to_int32( float128 STATUS_PARAM );
+int32 float128_to_int32_round_to_zero( float128 STATUS_PARAM );
+int64 float128_to_int64( float128 STATUS_PARAM );
+int64 float128_to_int64_round_to_zero( float128 STATUS_PARAM );
+float32 float128_to_float32( float128 STATUS_PARAM );
+float64 float128_to_float64( float128 STATUS_PARAM );
+floatx80 float128_to_floatx80( float128 STATUS_PARAM );
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE quadruple-precision operations.
+*----------------------------------------------------------------------------*/
+float128 float128_round_to_int( float128 STATUS_PARAM );
+float128 float128_add( float128, float128 STATUS_PARAM );
+float128 float128_sub( float128, float128 STATUS_PARAM );
+float128 float128_mul( float128, float128 STATUS_PARAM );
+float128 float128_div( float128, float128 STATUS_PARAM );
+float128 float128_rem( float128, float128 STATUS_PARAM );
+float128 float128_sqrt( float128 STATUS_PARAM );
+int float128_eq( float128, float128 STATUS_PARAM );
+int float128_le( float128, float128 STATUS_PARAM );
+int float128_lt( float128, float128 STATUS_PARAM );
+int float128_unordered( float128, float128 STATUS_PARAM );
+int float128_eq_quiet( float128, float128 STATUS_PARAM );
+int float128_le_quiet( float128, float128 STATUS_PARAM );
+int float128_lt_quiet( float128, float128 STATUS_PARAM );
+int float128_unordered_quiet( float128, float128 STATUS_PARAM );
+int float128_compare( float128, float128 STATUS_PARAM );
+int float128_compare_quiet( float128, float128 STATUS_PARAM );
+int float128_is_quiet_nan( float128 );
+int float128_is_signaling_nan( float128 );
+float128 float128_maybe_silence_nan( float128 );
+float128 float128_scalbn( float128, int STATUS_PARAM );
+
+INLINE float128 float128_abs(float128 a)
+{
+ a.high &= 0x7fffffffffffffffLL;
+ return a;
+}
+
+INLINE float128 float128_chs(float128 a)
+{
+ a.high ^= 0x8000000000000000LL;
+ return a;
+}
+
+INLINE int float128_is_infinity(float128 a)
+{
+ return (a.high & 0x7fffffffffffffffLL) == 0x7fff000000000000LL && a.low == 0;
+}
+
+INLINE int float128_is_neg(float128 a)
+{
+ return a.high >> 63;
+}
+
+INLINE int float128_is_zero(float128 a)
+{
+ return (a.high & 0x7fffffffffffffffLL) == 0 && a.low == 0;
+}
+
+INLINE int float128_is_zero_or_denormal(float128 a)
+{
+ return (a.high & 0x7fff000000000000LL) == 0;
+}
+
+INLINE int float128_is_any_nan(float128 a)
+{
+ return ((a.high >> 48) & 0x7fff) == 0x7fff &&
+ ((a.low != 0) || ((a.high & 0xffffffffffffLL) != 0));
+}
+
+#define float128_zero make_float128(0, 0)
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated quadruple-precision NaN.
+*----------------------------------------------------------------------------*/
+extern const float128 float128_default_nan;
+
+#endif /* !SOFTFLOAT_H */
diff --git a/contrib/qemu/include/glib-compat.h b/contrib/qemu/include/glib-compat.h
new file mode 100644
index 000000000..8aa77afd6
--- /dev/null
+++ b/contrib/qemu/include/glib-compat.h
@@ -0,0 +1,27 @@
+/*
+ * GLIB Compatibility Functions
+ *
+ * Copyright IBM, Corp. 2013
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_GLIB_COMPAT_H
+#define QEMU_GLIB_COMPAT_H
+
+#include <glib.h>
+
+#if !GLIB_CHECK_VERSION(2, 14, 0)
+static inline guint g_timeout_add_seconds(guint interval, GSourceFunc function,
+ gpointer data)
+{
+ return g_timeout_add(interval * 1000, function, data);
+}
+#endif
+
+#endif
diff --git a/contrib/qemu/include/migration/migration.h b/contrib/qemu/include/migration/migration.h
new file mode 100644
index 000000000..bc9fde0b2
--- /dev/null
+++ b/contrib/qemu/include/migration/migration.h
@@ -0,0 +1,157 @@
+/*
+ * QEMU live migration
+ *
+ * Copyright IBM, Corp. 2008
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_MIGRATION_H
+#define QEMU_MIGRATION_H
+
+#include "qapi/qmp/qdict.h"
+#include "qemu-common.h"
+#include "qemu/thread.h"
+#include "qemu/notify.h"
+#include "qapi/error.h"
+#include "migration/vmstate.h"
+#include "qapi-types.h"
+#include "exec/cpu-common.h"
+
+struct MigrationParams {
+ bool blk;
+ bool shared;
+};
+
+typedef struct MigrationState MigrationState;
+
+struct MigrationState
+{
+ int64_t bandwidth_limit;
+ size_t bytes_xfer;
+ size_t xfer_limit;
+ QemuThread thread;
+ QEMUBH *cleanup_bh;
+ QEMUFile *file;
+
+ int state;
+ MigrationParams params;
+ double mbps;
+ int64_t total_time;
+ int64_t downtime;
+ int64_t expected_downtime;
+ int64_t dirty_pages_rate;
+ int64_t dirty_bytes_rate;
+ bool enabled_capabilities[MIGRATION_CAPABILITY_MAX];
+ int64_t xbzrle_cache_size;
+};
+
+void process_incoming_migration(QEMUFile *f);
+
+void qemu_start_incoming_migration(const char *uri, Error **errp);
+
+uint64_t migrate_max_downtime(void);
+
+void do_info_migrate_print(Monitor *mon, const QObject *data);
+
+void do_info_migrate(Monitor *mon, QObject **ret_data);
+
+void exec_start_incoming_migration(const char *host_port, Error **errp);
+
+void exec_start_outgoing_migration(MigrationState *s, const char *host_port, Error **errp);
+
+void tcp_start_incoming_migration(const char *host_port, Error **errp);
+
+void tcp_start_outgoing_migration(MigrationState *s, const char *host_port, Error **errp);
+
+void unix_start_incoming_migration(const char *path, Error **errp);
+
+void unix_start_outgoing_migration(MigrationState *s, const char *path, Error **errp);
+
+void fd_start_incoming_migration(const char *path, Error **errp);
+
+void fd_start_outgoing_migration(MigrationState *s, const char *fdname, Error **errp);
+
+void migrate_fd_error(MigrationState *s);
+
+void migrate_fd_connect(MigrationState *s);
+
+int migrate_fd_close(MigrationState *s);
+
+void add_migration_state_change_notifier(Notifier *notify);
+void remove_migration_state_change_notifier(Notifier *notify);
+bool migration_is_active(MigrationState *);
+bool migration_has_finished(MigrationState *);
+bool migration_has_failed(MigrationState *);
+MigrationState *migrate_get_current(void);
+
+uint64_t ram_bytes_remaining(void);
+uint64_t ram_bytes_transferred(void);
+uint64_t ram_bytes_total(void);
+
+void acct_update_position(QEMUFile *f, size_t size, bool zero);
+
+extern SaveVMHandlers savevm_ram_handlers;
+
+uint64_t dup_mig_bytes_transferred(void);
+uint64_t dup_mig_pages_transferred(void);
+uint64_t skipped_mig_bytes_transferred(void);
+uint64_t skipped_mig_pages_transferred(void);
+uint64_t norm_mig_bytes_transferred(void);
+uint64_t norm_mig_pages_transferred(void);
+uint64_t xbzrle_mig_bytes_transferred(void);
+uint64_t xbzrle_mig_pages_transferred(void);
+uint64_t xbzrle_mig_pages_overflow(void);
+uint64_t xbzrle_mig_pages_cache_miss(void);
+
+/**
+ * @migrate_add_blocker - prevent migration from proceeding
+ *
+ * @reason - an error to be returned whenever migration is attempted
+ */
+void migrate_add_blocker(Error *reason);
+
+/**
+ * @migrate_del_blocker - remove a blocking error from migration
+ *
+ * @reason - the error blocking migration
+ */
+void migrate_del_blocker(Error *reason);
+
+bool migrate_rdma_pin_all(void);
+
+bool migrate_auto_converge(void);
+
+int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen,
+ uint8_t *dst, int dlen);
+int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen);
+
+int migrate_use_xbzrle(void);
+int64_t migrate_xbzrle_cache_size(void);
+
+int64_t xbzrle_cache_resize(int64_t new_size);
+
+void ram_control_before_iterate(QEMUFile *f, uint64_t flags);
+void ram_control_after_iterate(QEMUFile *f, uint64_t flags);
+void ram_control_load_hook(QEMUFile *f, uint64_t flags);
+
+/* Whenever this is found in the data stream, the flags
+ * will be passed to ram_control_load_hook in the incoming-migration
+ * side. This lets before_ram_iterate/after_ram_iterate add
+ * transport-specific sections to the RAM migration data.
+ */
+#define RAM_SAVE_FLAG_HOOK 0x80
+
+#define RAM_SAVE_CONTROL_NOT_SUPP -1000
+#define RAM_SAVE_CONTROL_DELAYED -2000
+
+size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
+ ram_addr_t offset, size_t size,
+ int *bytes_sent);
+
+#endif
diff --git a/contrib/qemu/include/migration/qemu-file.h b/contrib/qemu/include/migration/qemu-file.h
new file mode 100644
index 000000000..0f757fbeb
--- /dev/null
+++ b/contrib/qemu/include/migration/qemu-file.h
@@ -0,0 +1,266 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef QEMU_FILE_H
+#define QEMU_FILE_H 1
+#include "exec/cpu-common.h"
+
+/* This function writes a chunk of data to a file at the given position.
+ * The pos argument can be ignored if the file is only being used for
+ * streaming. The handler should try to write all of the data it can.
+ */
+typedef int (QEMUFilePutBufferFunc)(void *opaque, const uint8_t *buf,
+ int64_t pos, int size);
+
+/* Read a chunk of data from a file at the given position. The pos argument
+ * can be ignored if the file is only be used for streaming. The number of
+ * bytes actually read should be returned.
+ */
+typedef int (QEMUFileGetBufferFunc)(void *opaque, uint8_t *buf,
+ int64_t pos, int size);
+
+/* Close a file
+ *
+ * Return negative error number on error, 0 or positive value on success.
+ *
+ * The meaning of return value on success depends on the specific back-end being
+ * used.
+ */
+typedef int (QEMUFileCloseFunc)(void *opaque);
+
+/* Called to return the OS file descriptor associated to the QEMUFile.
+ */
+typedef int (QEMUFileGetFD)(void *opaque);
+
+/*
+ * This function writes an iovec to file.
+ */
+typedef ssize_t (QEMUFileWritevBufferFunc)(void *opaque, struct iovec *iov,
+ int iovcnt, int64_t pos);
+
+/*
+ * This function provides hooks around different
+ * stages of RAM migration.
+ */
+typedef int (QEMURamHookFunc)(QEMUFile *f, void *opaque, uint64_t flags);
+
+/*
+ * Constants used by ram_control_* hooks
+ */
+#define RAM_CONTROL_SETUP 0
+#define RAM_CONTROL_ROUND 1
+#define RAM_CONTROL_HOOK 2
+#define RAM_CONTROL_FINISH 3
+
+/*
+ * This function allows override of where the RAM page
+ * is saved (such as RDMA, for example.)
+ */
+typedef size_t (QEMURamSaveFunc)(QEMUFile *f, void *opaque,
+ ram_addr_t block_offset,
+ ram_addr_t offset,
+ size_t size,
+ int *bytes_sent);
+
+typedef struct QEMUFileOps {
+ QEMUFilePutBufferFunc *put_buffer;
+ QEMUFileGetBufferFunc *get_buffer;
+ QEMUFileCloseFunc *close;
+ QEMUFileGetFD *get_fd;
+ QEMUFileWritevBufferFunc *writev_buffer;
+ QEMURamHookFunc *before_ram_iterate;
+ QEMURamHookFunc *after_ram_iterate;
+ QEMURamHookFunc *hook_ram_load;
+ QEMURamSaveFunc *save_page;
+} QEMUFileOps;
+
+QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops);
+QEMUFile *qemu_fopen(const char *filename, const char *mode);
+QEMUFile *qemu_fdopen(int fd, const char *mode);
+QEMUFile *qemu_fopen_socket(int fd, const char *mode);
+QEMUFile *qemu_popen_cmd(const char *command, const char *mode);
+int qemu_get_fd(QEMUFile *f);
+int qemu_fclose(QEMUFile *f);
+int64_t qemu_ftell(QEMUFile *f);
+void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size);
+void qemu_put_byte(QEMUFile *f, int v);
+/*
+ * put_buffer without copying the buffer.
+ * The buffer should be available till it is sent asynchronously.
+ */
+void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, int size);
+bool qemu_file_mode_is_not_valid(const char *mode);
+
+static inline void qemu_put_ubyte(QEMUFile *f, unsigned int v)
+{
+ qemu_put_byte(f, (int)v);
+}
+
+#define qemu_put_sbyte qemu_put_byte
+
+void qemu_put_be16(QEMUFile *f, unsigned int v);
+void qemu_put_be32(QEMUFile *f, unsigned int v);
+void qemu_put_be64(QEMUFile *f, uint64_t v);
+int qemu_get_buffer(QEMUFile *f, uint8_t *buf, int size);
+int qemu_get_byte(QEMUFile *f);
+void qemu_update_position(QEMUFile *f, size_t size);
+
+static inline unsigned int qemu_get_ubyte(QEMUFile *f)
+{
+ return (unsigned int)qemu_get_byte(f);
+}
+
+#define qemu_get_sbyte qemu_get_byte
+
+unsigned int qemu_get_be16(QEMUFile *f);
+unsigned int qemu_get_be32(QEMUFile *f);
+uint64_t qemu_get_be64(QEMUFile *f);
+
+int qemu_file_rate_limit(QEMUFile *f);
+void qemu_file_reset_rate_limit(QEMUFile *f);
+void qemu_file_set_rate_limit(QEMUFile *f, int64_t new_rate);
+int64_t qemu_file_get_rate_limit(QEMUFile *f);
+int qemu_file_get_error(QEMUFile *f);
+void qemu_fflush(QEMUFile *f);
+
+static inline void qemu_put_be64s(QEMUFile *f, const uint64_t *pv)
+{
+ qemu_put_be64(f, *pv);
+}
+
+static inline void qemu_put_be32s(QEMUFile *f, const uint32_t *pv)
+{
+ qemu_put_be32(f, *pv);
+}
+
+static inline void qemu_put_be16s(QEMUFile *f, const uint16_t *pv)
+{
+ qemu_put_be16(f, *pv);
+}
+
+static inline void qemu_put_8s(QEMUFile *f, const uint8_t *pv)
+{
+ qemu_put_byte(f, *pv);
+}
+
+static inline void qemu_get_be64s(QEMUFile *f, uint64_t *pv)
+{
+ *pv = qemu_get_be64(f);
+}
+
+static inline void qemu_get_be32s(QEMUFile *f, uint32_t *pv)
+{
+ *pv = qemu_get_be32(f);
+}
+
+static inline void qemu_get_be16s(QEMUFile *f, uint16_t *pv)
+{
+ *pv = qemu_get_be16(f);
+}
+
+static inline void qemu_get_8s(QEMUFile *f, uint8_t *pv)
+{
+ *pv = qemu_get_byte(f);
+}
+
+// Signed versions for type safety
+static inline void qemu_put_sbuffer(QEMUFile *f, const int8_t *buf, int size)
+{
+ qemu_put_buffer(f, (const uint8_t *)buf, size);
+}
+
+static inline void qemu_put_sbe16(QEMUFile *f, int v)
+{
+ qemu_put_be16(f, (unsigned int)v);
+}
+
+static inline void qemu_put_sbe32(QEMUFile *f, int v)
+{
+ qemu_put_be32(f, (unsigned int)v);
+}
+
+static inline void qemu_put_sbe64(QEMUFile *f, int64_t v)
+{
+ qemu_put_be64(f, (uint64_t)v);
+}
+
+static inline size_t qemu_get_sbuffer(QEMUFile *f, int8_t *buf, int size)
+{
+ return qemu_get_buffer(f, (uint8_t *)buf, size);
+}
+
+static inline int qemu_get_sbe16(QEMUFile *f)
+{
+ return (int)qemu_get_be16(f);
+}
+
+static inline int qemu_get_sbe32(QEMUFile *f)
+{
+ return (int)qemu_get_be32(f);
+}
+
+static inline int64_t qemu_get_sbe64(QEMUFile *f)
+{
+ return (int64_t)qemu_get_be64(f);
+}
+
+static inline void qemu_put_s8s(QEMUFile *f, const int8_t *pv)
+{
+ qemu_put_8s(f, (const uint8_t *)pv);
+}
+
+static inline void qemu_put_sbe16s(QEMUFile *f, const int16_t *pv)
+{
+ qemu_put_be16s(f, (const uint16_t *)pv);
+}
+
+static inline void qemu_put_sbe32s(QEMUFile *f, const int32_t *pv)
+{
+ qemu_put_be32s(f, (const uint32_t *)pv);
+}
+
+static inline void qemu_put_sbe64s(QEMUFile *f, const int64_t *pv)
+{
+ qemu_put_be64s(f, (const uint64_t *)pv);
+}
+
+static inline void qemu_get_s8s(QEMUFile *f, int8_t *pv)
+{
+ qemu_get_8s(f, (uint8_t *)pv);
+}
+
+static inline void qemu_get_sbe16s(QEMUFile *f, int16_t *pv)
+{
+ qemu_get_be16s(f, (uint16_t *)pv);
+}
+
+static inline void qemu_get_sbe32s(QEMUFile *f, int32_t *pv)
+{
+ qemu_get_be32s(f, (uint32_t *)pv);
+}
+
+static inline void qemu_get_sbe64s(QEMUFile *f, int64_t *pv)
+{
+ qemu_get_be64s(f, (uint64_t *)pv);
+}
+#endif
diff --git a/contrib/qemu/include/migration/vmstate.h b/contrib/qemu/include/migration/vmstate.h
new file mode 100644
index 000000000..1c31b5d6f
--- /dev/null
+++ b/contrib/qemu/include/migration/vmstate.h
@@ -0,0 +1,740 @@
+/*
+ * QEMU migration/snapshot declarations
+ *
+ * Copyright (c) 2009-2011 Red Hat, Inc.
+ *
+ * Original author: Juan Quintela <quintela@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef QEMU_VMSTATE_H
+#define QEMU_VMSTATE_H 1
+
+#ifndef CONFIG_USER_ONLY
+#include <migration/qemu-file.h>
+#endif
+
+typedef void SaveStateHandler(QEMUFile *f, void *opaque);
+typedef int LoadStateHandler(QEMUFile *f, void *opaque, int version_id);
+
+typedef struct SaveVMHandlers {
+ /* This runs inside the iothread lock. */
+ void (*set_params)(const MigrationParams *params, void * opaque);
+ SaveStateHandler *save_state;
+
+ void (*cancel)(void *opaque);
+ int (*save_live_complete)(QEMUFile *f, void *opaque);
+
+ /* This runs both outside and inside the iothread lock. */
+ bool (*is_active)(void *opaque);
+
+ /* This runs outside the iothread lock in the migration case, and
+ * within the lock in the savevm case. The callback had better only
+ * use data that is local to the migration thread or protected
+ * by other locks.
+ */
+ int (*save_live_iterate)(QEMUFile *f, void *opaque);
+
+ /* This runs outside the iothread lock! */
+ int (*save_live_setup)(QEMUFile *f, void *opaque);
+ uint64_t (*save_live_pending)(QEMUFile *f, void *opaque, uint64_t max_size);
+
+ LoadStateHandler *load_state;
+} SaveVMHandlers;
+
+int register_savevm(DeviceState *dev,
+ const char *idstr,
+ int instance_id,
+ int version_id,
+ SaveStateHandler *save_state,
+ LoadStateHandler *load_state,
+ void *opaque);
+
+int register_savevm_live(DeviceState *dev,
+ const char *idstr,
+ int instance_id,
+ int version_id,
+ SaveVMHandlers *ops,
+ void *opaque);
+
+void unregister_savevm(DeviceState *dev, const char *idstr, void *opaque);
+void register_device_unmigratable(DeviceState *dev, const char *idstr,
+ void *opaque);
+
+
+typedef struct VMStateInfo VMStateInfo;
+typedef struct VMStateDescription VMStateDescription;
+
+struct VMStateInfo {
+ const char *name;
+ int (*get)(QEMUFile *f, void *pv, size_t size);
+ void (*put)(QEMUFile *f, void *pv, size_t size);
+};
+
+enum VMStateFlags {
+ VMS_SINGLE = 0x001,
+ VMS_POINTER = 0x002,
+ VMS_ARRAY = 0x004,
+ VMS_STRUCT = 0x008,
+ VMS_VARRAY_INT32 = 0x010, /* Array with size in int32_t field*/
+ VMS_BUFFER = 0x020, /* static sized buffer */
+ VMS_ARRAY_OF_POINTER = 0x040,
+ VMS_VARRAY_UINT16 = 0x080, /* Array with size in uint16_t field */
+ VMS_VBUFFER = 0x100, /* Buffer with size in int32_t field */
+ VMS_MULTIPLY = 0x200, /* multiply "size" field by field_size */
+ VMS_VARRAY_UINT8 = 0x400, /* Array with size in uint8_t field*/
+ VMS_VARRAY_UINT32 = 0x800, /* Array with size in uint32_t field*/
+};
+
+typedef struct {
+ const char *name;
+ size_t offset;
+ size_t size;
+ size_t start;
+ int num;
+ size_t num_offset;
+ size_t size_offset;
+ const VMStateInfo *info;
+ enum VMStateFlags flags;
+ const VMStateDescription *vmsd;
+ int version_id;
+ bool (*field_exists)(void *opaque, int version_id);
+} VMStateField;
+
+typedef struct VMStateSubsection {
+ const VMStateDescription *vmsd;
+ bool (*needed)(void *opaque);
+} VMStateSubsection;
+
+struct VMStateDescription {
+ const char *name;
+ int unmigratable;
+ int version_id;
+ int minimum_version_id;
+ int minimum_version_id_old;
+ LoadStateHandler *load_state_old;
+ int (*pre_load)(void *opaque);
+ int (*post_load)(void *opaque, int version_id);
+ void (*pre_save)(void *opaque);
+ VMStateField *fields;
+ const VMStateSubsection *subsections;
+};
+
+#ifdef CONFIG_USER_ONLY
+extern const VMStateDescription vmstate_dummy;
+#endif
+
+extern const VMStateInfo vmstate_info_bool;
+
+extern const VMStateInfo vmstate_info_int8;
+extern const VMStateInfo vmstate_info_int16;
+extern const VMStateInfo vmstate_info_int32;
+extern const VMStateInfo vmstate_info_int64;
+
+extern const VMStateInfo vmstate_info_uint8_equal;
+extern const VMStateInfo vmstate_info_uint16_equal;
+extern const VMStateInfo vmstate_info_int32_equal;
+extern const VMStateInfo vmstate_info_uint32_equal;
+extern const VMStateInfo vmstate_info_uint64_equal;
+extern const VMStateInfo vmstate_info_int32_le;
+
+extern const VMStateInfo vmstate_info_uint8;
+extern const VMStateInfo vmstate_info_uint16;
+extern const VMStateInfo vmstate_info_uint32;
+extern const VMStateInfo vmstate_info_uint64;
+
+extern const VMStateInfo vmstate_info_float64;
+
+extern const VMStateInfo vmstate_info_timer;
+extern const VMStateInfo vmstate_info_buffer;
+extern const VMStateInfo vmstate_info_unused_buffer;
+extern const VMStateInfo vmstate_info_bitmap;
+
+#define type_check_2darray(t1,t2,n,m) ((t1(*)[n][m])0 - (t2*)0)
+#define type_check_array(t1,t2,n) ((t1(*)[n])0 - (t2*)0)
+#define type_check_pointer(t1,t2) ((t1**)0 - (t2*)0)
+
+#define vmstate_offset_value(_state, _field, _type) \
+ (offsetof(_state, _field) + \
+ type_check(_type, typeof_field(_state, _field)))
+
+#define vmstate_offset_pointer(_state, _field, _type) \
+ (offsetof(_state, _field) + \
+ type_check_pointer(_type, typeof_field(_state, _field)))
+
+#define vmstate_offset_array(_state, _field, _type, _num) \
+ (offsetof(_state, _field) + \
+ type_check_array(_type, typeof_field(_state, _field), _num))
+
+#define vmstate_offset_2darray(_state, _field, _type, _n1, _n2) \
+ (offsetof(_state, _field) + \
+ type_check_2darray(_type, typeof_field(_state, _field), _n1, _n2))
+
+#define vmstate_offset_sub_array(_state, _field, _type, _start) \
+ (offsetof(_state, _field[_start]))
+
+#define vmstate_offset_buffer(_state, _field) \
+ vmstate_offset_array(_state, _field, uint8_t, \
+ sizeof(typeof_field(_state, _field)))
+
+#define VMSTATE_SINGLE_TEST(_field, _state, _test, _version, _info, _type) { \
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .field_exists = (_test), \
+ .size = sizeof(_type), \
+ .info = &(_info), \
+ .flags = VMS_SINGLE, \
+ .offset = vmstate_offset_value(_state, _field, _type), \
+}
+
+#define VMSTATE_POINTER(_field, _state, _version, _info, _type) { \
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .info = &(_info), \
+ .size = sizeof(_type), \
+ .flags = VMS_SINGLE|VMS_POINTER, \
+ .offset = vmstate_offset_value(_state, _field, _type), \
+}
+
+#define VMSTATE_POINTER_TEST(_field, _state, _test, _info, _type) { \
+ .name = (stringify(_field)), \
+ .info = &(_info), \
+ .field_exists = (_test), \
+ .size = sizeof(_type), \
+ .flags = VMS_SINGLE|VMS_POINTER, \
+ .offset = vmstate_offset_value(_state, _field, _type), \
+}
+
+#define VMSTATE_ARRAY(_field, _state, _num, _version, _info, _type) {\
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .num = (_num), \
+ .info = &(_info), \
+ .size = sizeof(_type), \
+ .flags = VMS_ARRAY, \
+ .offset = vmstate_offset_array(_state, _field, _type, _num), \
+}
+
+#define VMSTATE_2DARRAY(_field, _state, _n1, _n2, _version, _info, _type) { \
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .num = (_n1) * (_n2), \
+ .info = &(_info), \
+ .size = sizeof(_type), \
+ .flags = VMS_ARRAY, \
+ .offset = vmstate_offset_2darray(_state, _field, _type, _n1, _n2), \
+}
+
+#define VMSTATE_ARRAY_TEST(_field, _state, _num, _test, _info, _type) {\
+ .name = (stringify(_field)), \
+ .field_exists = (_test), \
+ .num = (_num), \
+ .info = &(_info), \
+ .size = sizeof(_type), \
+ .flags = VMS_ARRAY, \
+ .offset = vmstate_offset_array(_state, _field, _type, _num),\
+}
+
+#define VMSTATE_SUB_ARRAY(_field, _state, _start, _num, _version, _info, _type) { \
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .num = (_num), \
+ .info = &(_info), \
+ .size = sizeof(_type), \
+ .flags = VMS_ARRAY, \
+ .offset = vmstate_offset_sub_array(_state, _field, _type, _start), \
+}
+
+#define VMSTATE_ARRAY_INT32_UNSAFE(_field, _state, _field_num, _info, _type) {\
+ .name = (stringify(_field)), \
+ .num_offset = vmstate_offset_value(_state, _field_num, int32_t), \
+ .info = &(_info), \
+ .size = sizeof(_type), \
+ .flags = VMS_VARRAY_INT32, \
+ .offset = offsetof(_state, _field), \
+}
+
+#define VMSTATE_VARRAY_INT32(_field, _state, _field_num, _version, _info, _type) {\
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .num_offset = vmstate_offset_value(_state, _field_num, int32_t), \
+ .info = &(_info), \
+ .size = sizeof(_type), \
+ .flags = VMS_VARRAY_INT32|VMS_POINTER, \
+ .offset = vmstate_offset_pointer(_state, _field, _type), \
+}
+
+#define VMSTATE_VARRAY_UINT32(_field, _state, _field_num, _version, _info, _type) {\
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .num_offset = vmstate_offset_value(_state, _field_num, uint32_t),\
+ .info = &(_info), \
+ .size = sizeof(_type), \
+ .flags = VMS_VARRAY_UINT32|VMS_POINTER, \
+ .offset = vmstate_offset_pointer(_state, _field, _type), \
+}
+
+#define VMSTATE_VARRAY_UINT16_UNSAFE(_field, _state, _field_num, _version, _info, _type) {\
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .num_offset = vmstate_offset_value(_state, _field_num, uint16_t),\
+ .info = &(_info), \
+ .size = sizeof(_type), \
+ .flags = VMS_VARRAY_UINT16, \
+ .offset = offsetof(_state, _field), \
+}
+
+#define VMSTATE_STRUCT_TEST(_field, _state, _test, _version, _vmsd, _type) { \
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .field_exists = (_test), \
+ .vmsd = &(_vmsd), \
+ .size = sizeof(_type), \
+ .flags = VMS_STRUCT, \
+ .offset = vmstate_offset_value(_state, _field, _type), \
+}
+
+#define VMSTATE_STRUCT_POINTER_TEST(_field, _state, _test, _vmsd, _type) { \
+ .name = (stringify(_field)), \
+ .field_exists = (_test), \
+ .vmsd = &(_vmsd), \
+ .size = sizeof(_type), \
+ .flags = VMS_STRUCT|VMS_POINTER, \
+ .offset = vmstate_offset_value(_state, _field, _type), \
+}
+
+#define VMSTATE_ARRAY_OF_POINTER(_field, _state, _num, _version, _info, _type) {\
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .num = (_num), \
+ .info = &(_info), \
+ .size = sizeof(_type), \
+ .flags = VMS_ARRAY|VMS_ARRAY_OF_POINTER, \
+ .offset = vmstate_offset_array(_state, _field, _type, _num), \
+}
+
+#define VMSTATE_STRUCT_ARRAY_TEST(_field, _state, _num, _test, _version, _vmsd, _type) { \
+ .name = (stringify(_field)), \
+ .num = (_num), \
+ .field_exists = (_test), \
+ .version_id = (_version), \
+ .vmsd = &(_vmsd), \
+ .size = sizeof(_type), \
+ .flags = VMS_STRUCT|VMS_ARRAY, \
+ .offset = vmstate_offset_array(_state, _field, _type, _num),\
+}
+
+#define VMSTATE_STRUCT_VARRAY_UINT8(_field, _state, _field_num, _version, _vmsd, _type) { \
+ .name = (stringify(_field)), \
+ .num_offset = vmstate_offset_value(_state, _field_num, uint8_t), \
+ .version_id = (_version), \
+ .vmsd = &(_vmsd), \
+ .size = sizeof(_type), \
+ .flags = VMS_STRUCT|VMS_VARRAY_UINT8, \
+ .offset = offsetof(_state, _field), \
+}
+
+#define VMSTATE_STRUCT_VARRAY_POINTER_INT32(_field, _state, _field_num, _vmsd, _type) { \
+ .name = (stringify(_field)), \
+ .version_id = 0, \
+ .num_offset = vmstate_offset_value(_state, _field_num, int32_t), \
+ .size = sizeof(_type), \
+ .vmsd = &(_vmsd), \
+ .flags = VMS_POINTER | VMS_VARRAY_INT32 | VMS_STRUCT, \
+ .offset = vmstate_offset_pointer(_state, _field, _type), \
+}
+
+#define VMSTATE_STRUCT_VARRAY_POINTER_UINT32(_field, _state, _field_num, _vmsd, _type) { \
+ .name = (stringify(_field)), \
+ .version_id = 0, \
+ .num_offset = vmstate_offset_value(_state, _field_num, uint32_t),\
+ .size = sizeof(_type), \
+ .vmsd = &(_vmsd), \
+ .flags = VMS_POINTER | VMS_VARRAY_INT32 | VMS_STRUCT, \
+ .offset = vmstate_offset_pointer(_state, _field, _type), \
+}
+
+#define VMSTATE_STRUCT_VARRAY_POINTER_UINT16(_field, _state, _field_num, _vmsd, _type) { \
+ .name = (stringify(_field)), \
+ .version_id = 0, \
+ .num_offset = vmstate_offset_value(_state, _field_num, uint16_t),\
+ .size = sizeof(_type), \
+ .vmsd = &(_vmsd), \
+ .flags = VMS_POINTER | VMS_VARRAY_UINT16 | VMS_STRUCT, \
+ .offset = vmstate_offset_pointer(_state, _field, _type), \
+}
+
+#define VMSTATE_STRUCT_VARRAY_INT32(_field, _state, _field_num, _version, _vmsd, _type) { \
+ .name = (stringify(_field)), \
+ .num_offset = vmstate_offset_value(_state, _field_num, int32_t), \
+ .version_id = (_version), \
+ .vmsd = &(_vmsd), \
+ .size = sizeof(_type), \
+ .flags = VMS_STRUCT|VMS_VARRAY_INT32, \
+ .offset = offsetof(_state, _field), \
+}
+
+#define VMSTATE_STRUCT_VARRAY_UINT32(_field, _state, _field_num, _version, _vmsd, _type) { \
+ .name = (stringify(_field)), \
+ .num_offset = vmstate_offset_value(_state, _field_num, uint32_t), \
+ .version_id = (_version), \
+ .vmsd = &(_vmsd), \
+ .size = sizeof(_type), \
+ .flags = VMS_STRUCT|VMS_VARRAY_UINT32, \
+ .offset = offsetof(_state, _field), \
+}
+
+#define VMSTATE_STATIC_BUFFER(_field, _state, _version, _test, _start, _size) { \
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .field_exists = (_test), \
+ .size = (_size - _start), \
+ .info = &vmstate_info_buffer, \
+ .flags = VMS_BUFFER, \
+ .offset = vmstate_offset_buffer(_state, _field) + _start, \
+}
+
+#define VMSTATE_VBUFFER_MULTIPLY(_field, _state, _version, _test, _start, _field_size, _multiply) { \
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .field_exists = (_test), \
+ .size_offset = vmstate_offset_value(_state, _field_size, uint32_t),\
+ .size = (_multiply), \
+ .info = &vmstate_info_buffer, \
+ .flags = VMS_VBUFFER|VMS_POINTER|VMS_MULTIPLY, \
+ .offset = offsetof(_state, _field), \
+ .start = (_start), \
+}
+
+#define VMSTATE_VBUFFER(_field, _state, _version, _test, _start, _field_size) { \
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .field_exists = (_test), \
+ .size_offset = vmstate_offset_value(_state, _field_size, int32_t),\
+ .info = &vmstate_info_buffer, \
+ .flags = VMS_VBUFFER|VMS_POINTER, \
+ .offset = offsetof(_state, _field), \
+ .start = (_start), \
+}
+
+#define VMSTATE_VBUFFER_UINT32(_field, _state, _version, _test, _start, _field_size) { \
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .field_exists = (_test), \
+ .size_offset = vmstate_offset_value(_state, _field_size, uint32_t),\
+ .info = &vmstate_info_buffer, \
+ .flags = VMS_VBUFFER|VMS_POINTER, \
+ .offset = offsetof(_state, _field), \
+ .start = (_start), \
+}
+
+#define VMSTATE_BUFFER_UNSAFE_INFO(_field, _state, _version, _info, _size) { \
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .size = (_size), \
+ .info = &(_info), \
+ .flags = VMS_BUFFER, \
+ .offset = offsetof(_state, _field), \
+}
+
+#define VMSTATE_BUFFER_POINTER_UNSAFE(_field, _state, _version, _size) { \
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .size = (_size), \
+ .info = &vmstate_info_buffer, \
+ .flags = VMS_BUFFER|VMS_POINTER, \
+ .offset = offsetof(_state, _field), \
+}
+
+#define VMSTATE_UNUSED_BUFFER(_test, _version, _size) { \
+ .name = "unused", \
+ .field_exists = (_test), \
+ .version_id = (_version), \
+ .size = (_size), \
+ .info = &vmstate_info_unused_buffer, \
+ .flags = VMS_BUFFER, \
+}
+
+/* _field_size should be a int32_t field in the _state struct giving the
+ * size of the bitmap _field in bits.
+ */
+#define VMSTATE_BITMAP(_field, _state, _version, _field_size) { \
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .size_offset = vmstate_offset_value(_state, _field_size, int32_t),\
+ .info = &vmstate_info_bitmap, \
+ .flags = VMS_VBUFFER|VMS_POINTER, \
+ .offset = offsetof(_state, _field), \
+}
+
+/* _f : field name
+ _f_n : num of elements field_name
+ _n : num of elements
+ _s : struct state name
+ _v : version
+*/
+
+#define VMSTATE_SINGLE(_field, _state, _version, _info, _type) \
+ VMSTATE_SINGLE_TEST(_field, _state, NULL, _version, _info, _type)
+
+#define VMSTATE_STRUCT(_field, _state, _version, _vmsd, _type) \
+ VMSTATE_STRUCT_TEST(_field, _state, NULL, _version, _vmsd, _type)
+
+#define VMSTATE_STRUCT_POINTER(_field, _state, _vmsd, _type) \
+ VMSTATE_STRUCT_POINTER_TEST(_field, _state, NULL, _vmsd, _type)
+
+#define VMSTATE_STRUCT_ARRAY(_field, _state, _num, _version, _vmsd, _type) \
+ VMSTATE_STRUCT_ARRAY_TEST(_field, _state, _num, NULL, _version, \
+ _vmsd, _type)
+
+#define VMSTATE_BOOL_V(_f, _s, _v) \
+ VMSTATE_SINGLE(_f, _s, _v, vmstate_info_bool, bool)
+
+#define VMSTATE_INT8_V(_f, _s, _v) \
+ VMSTATE_SINGLE(_f, _s, _v, vmstate_info_int8, int8_t)
+#define VMSTATE_INT16_V(_f, _s, _v) \
+ VMSTATE_SINGLE(_f, _s, _v, vmstate_info_int16, int16_t)
+#define VMSTATE_INT32_V(_f, _s, _v) \
+ VMSTATE_SINGLE(_f, _s, _v, vmstate_info_int32, int32_t)
+#define VMSTATE_INT64_V(_f, _s, _v) \
+ VMSTATE_SINGLE(_f, _s, _v, vmstate_info_int64, int64_t)
+
+#define VMSTATE_UINT8_V(_f, _s, _v) \
+ VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint8, uint8_t)
+#define VMSTATE_UINT16_V(_f, _s, _v) \
+ VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint16, uint16_t)
+#define VMSTATE_UINT32_V(_f, _s, _v) \
+ VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint32, uint32_t)
+#define VMSTATE_UINT64_V(_f, _s, _v) \
+ VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint64, uint64_t)
+
+#define VMSTATE_BOOL(_f, _s) \
+ VMSTATE_BOOL_V(_f, _s, 0)
+
+#define VMSTATE_INT8(_f, _s) \
+ VMSTATE_INT8_V(_f, _s, 0)
+#define VMSTATE_INT16(_f, _s) \
+ VMSTATE_INT16_V(_f, _s, 0)
+#define VMSTATE_INT32(_f, _s) \
+ VMSTATE_INT32_V(_f, _s, 0)
+#define VMSTATE_INT64(_f, _s) \
+ VMSTATE_INT64_V(_f, _s, 0)
+
+#define VMSTATE_UINT8(_f, _s) \
+ VMSTATE_UINT8_V(_f, _s, 0)
+#define VMSTATE_UINT16(_f, _s) \
+ VMSTATE_UINT16_V(_f, _s, 0)
+#define VMSTATE_UINT32(_f, _s) \
+ VMSTATE_UINT32_V(_f, _s, 0)
+#define VMSTATE_UINT64(_f, _s) \
+ VMSTATE_UINT64_V(_f, _s, 0)
+
+#define VMSTATE_UINT8_EQUAL(_f, _s) \
+ VMSTATE_SINGLE(_f, _s, 0, vmstate_info_uint8_equal, uint8_t)
+
+#define VMSTATE_UINT16_EQUAL(_f, _s) \
+ VMSTATE_SINGLE(_f, _s, 0, vmstate_info_uint16_equal, uint16_t)
+
+#define VMSTATE_UINT16_EQUAL_V(_f, _s, _v) \
+ VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint16_equal, uint16_t)
+
+#define VMSTATE_INT32_EQUAL(_f, _s) \
+ VMSTATE_SINGLE(_f, _s, 0, vmstate_info_int32_equal, int32_t)
+
+#define VMSTATE_UINT32_EQUAL_V(_f, _s, _v) \
+ VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint32_equal, uint32_t)
+
+#define VMSTATE_UINT32_EQUAL(_f, _s) \
+ VMSTATE_UINT32_EQUAL_V(_f, _s, 0)
+
+#define VMSTATE_UINT64_EQUAL_V(_f, _s, _v) \
+ VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint64_equal, uint64_t)
+
+#define VMSTATE_UINT64_EQUAL(_f, _s) \
+ VMSTATE_UINT64_EQUAL_V(_f, _s, 0)
+
+#define VMSTATE_INT32_LE(_f, _s) \
+ VMSTATE_SINGLE(_f, _s, 0, vmstate_info_int32_le, int32_t)
+
+#define VMSTATE_UINT8_TEST(_f, _s, _t) \
+ VMSTATE_SINGLE_TEST(_f, _s, _t, 0, vmstate_info_uint8, uint8_t)
+
+#define VMSTATE_UINT16_TEST(_f, _s, _t) \
+ VMSTATE_SINGLE_TEST(_f, _s, _t, 0, vmstate_info_uint16, uint16_t)
+
+#define VMSTATE_UINT32_TEST(_f, _s, _t) \
+ VMSTATE_SINGLE_TEST(_f, _s, _t, 0, vmstate_info_uint32, uint32_t)
+
+
+#define VMSTATE_FLOAT64_V(_f, _s, _v) \
+ VMSTATE_SINGLE(_f, _s, _v, vmstate_info_float64, float64)
+
+#define VMSTATE_FLOAT64(_f, _s) \
+ VMSTATE_FLOAT64_V(_f, _s, 0)
+
+#define VMSTATE_TIMER_TEST(_f, _s, _test) \
+ VMSTATE_POINTER_TEST(_f, _s, _test, vmstate_info_timer, QEMUTimer *)
+
+#define VMSTATE_TIMER_V(_f, _s, _v) \
+ VMSTATE_POINTER(_f, _s, _v, vmstate_info_timer, QEMUTimer *)
+
+#define VMSTATE_TIMER(_f, _s) \
+ VMSTATE_TIMER_V(_f, _s, 0)
+
+#define VMSTATE_TIMER_ARRAY(_f, _s, _n) \
+ VMSTATE_ARRAY_OF_POINTER(_f, _s, _n, 0, vmstate_info_timer, QEMUTimer *)
+
+#define VMSTATE_BOOL_ARRAY_V(_f, _s, _n, _v) \
+ VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_bool, bool)
+
+#define VMSTATE_BOOL_ARRAY(_f, _s, _n) \
+ VMSTATE_BOOL_ARRAY_V(_f, _s, _n, 0)
+
+#define VMSTATE_UINT16_ARRAY_V(_f, _s, _n, _v) \
+ VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_uint16, uint16_t)
+
+#define VMSTATE_UINT16_2DARRAY_V(_f, _s, _n1, _n2, _v) \
+ VMSTATE_2DARRAY(_f, _s, _n1, _n2, _v, vmstate_info_uint16, uint16_t)
+
+#define VMSTATE_UINT16_ARRAY(_f, _s, _n) \
+ VMSTATE_UINT16_ARRAY_V(_f, _s, _n, 0)
+
+#define VMSTATE_UINT16_2DARRAY(_f, _s, _n1, _n2) \
+ VMSTATE_UINT16_2DARRAY_V(_f, _s, _n1, _n2, 0)
+
+#define VMSTATE_UINT8_2DARRAY_V(_f, _s, _n1, _n2, _v) \
+ VMSTATE_2DARRAY(_f, _s, _n1, _n2, _v, vmstate_info_uint8, uint8_t)
+
+#define VMSTATE_UINT8_ARRAY_V(_f, _s, _n, _v) \
+ VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_uint8, uint8_t)
+
+#define VMSTATE_UINT8_ARRAY(_f, _s, _n) \
+ VMSTATE_UINT8_ARRAY_V(_f, _s, _n, 0)
+
+#define VMSTATE_UINT8_2DARRAY(_f, _s, _n1, _n2) \
+ VMSTATE_UINT8_2DARRAY_V(_f, _s, _n1, _n2, 0)
+
+#define VMSTATE_UINT32_ARRAY_V(_f, _s, _n, _v) \
+ VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_uint32, uint32_t)
+
+#define VMSTATE_UINT32_ARRAY(_f, _s, _n) \
+ VMSTATE_UINT32_ARRAY_V(_f, _s, _n, 0)
+
+#define VMSTATE_UINT64_ARRAY_V(_f, _s, _n, _v) \
+ VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_uint64, uint64_t)
+
+#define VMSTATE_UINT64_ARRAY(_f, _s, _n) \
+ VMSTATE_UINT64_ARRAY_V(_f, _s, _n, 0)
+
+#define VMSTATE_INT16_ARRAY_V(_f, _s, _n, _v) \
+ VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_int16, int16_t)
+
+#define VMSTATE_INT16_ARRAY(_f, _s, _n) \
+ VMSTATE_INT16_ARRAY_V(_f, _s, _n, 0)
+
+#define VMSTATE_INT32_ARRAY_V(_f, _s, _n, _v) \
+ VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_int32, int32_t)
+
+#define VMSTATE_INT32_ARRAY(_f, _s, _n) \
+ VMSTATE_INT32_ARRAY_V(_f, _s, _n, 0)
+
+#define VMSTATE_UINT32_SUB_ARRAY(_f, _s, _start, _num) \
+ VMSTATE_SUB_ARRAY(_f, _s, _start, _num, 0, vmstate_info_uint32, uint32_t)
+
+#define VMSTATE_UINT32_ARRAY(_f, _s, _n) \
+ VMSTATE_UINT32_ARRAY_V(_f, _s, _n, 0)
+
+#define VMSTATE_INT64_ARRAY_V(_f, _s, _n, _v) \
+ VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_int64, int64_t)
+
+#define VMSTATE_INT64_ARRAY(_f, _s, _n) \
+ VMSTATE_INT64_ARRAY_V(_f, _s, _n, 0)
+
+#define VMSTATE_FLOAT64_ARRAY_V(_f, _s, _n, _v) \
+ VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_float64, float64)
+
+#define VMSTATE_FLOAT64_ARRAY(_f, _s, _n) \
+ VMSTATE_FLOAT64_ARRAY_V(_f, _s, _n, 0)
+
+#define VMSTATE_BUFFER_V(_f, _s, _v) \
+ VMSTATE_STATIC_BUFFER(_f, _s, _v, NULL, 0, sizeof(typeof_field(_s, _f)))
+
+#define VMSTATE_BUFFER(_f, _s) \
+ VMSTATE_BUFFER_V(_f, _s, 0)
+
+#define VMSTATE_PARTIAL_BUFFER(_f, _s, _size) \
+ VMSTATE_STATIC_BUFFER(_f, _s, 0, NULL, 0, _size)
+
+#define VMSTATE_BUFFER_START_MIDDLE(_f, _s, _start) \
+ VMSTATE_STATIC_BUFFER(_f, _s, 0, NULL, _start, sizeof(typeof_field(_s, _f)))
+
+#define VMSTATE_PARTIAL_VBUFFER(_f, _s, _size) \
+ VMSTATE_VBUFFER(_f, _s, 0, NULL, 0, _size)
+
+#define VMSTATE_PARTIAL_VBUFFER_UINT32(_f, _s, _size) \
+ VMSTATE_VBUFFER_UINT32(_f, _s, 0, NULL, 0, _size)
+
+#define VMSTATE_SUB_VBUFFER(_f, _s, _start, _size) \
+ VMSTATE_VBUFFER(_f, _s, 0, NULL, _start, _size)
+
+#define VMSTATE_BUFFER_TEST(_f, _s, _test) \
+ VMSTATE_STATIC_BUFFER(_f, _s, 0, _test, 0, sizeof(typeof_field(_s, _f)))
+
+#define VMSTATE_BUFFER_UNSAFE(_field, _state, _version, _size) \
+ VMSTATE_BUFFER_UNSAFE_INFO(_field, _state, _version, vmstate_info_buffer, _size)
+
+#define VMSTATE_UNUSED_V(_v, _size) \
+ VMSTATE_UNUSED_BUFFER(NULL, _v, _size)
+
+#define VMSTATE_UNUSED(_size) \
+ VMSTATE_UNUSED_V(0, _size)
+
+#define VMSTATE_UNUSED_TEST(_test, _size) \
+ VMSTATE_UNUSED_BUFFER(_test, 0, _size)
+
+#define VMSTATE_END_OF_LIST() \
+ {}
+
+int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd,
+ void *opaque, int version_id);
+void vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd,
+ void *opaque);
+
+int vmstate_register_with_alias_id(DeviceState *dev, int instance_id,
+ const VMStateDescription *vmsd,
+ void *base, int alias_id,
+ int required_for_version);
+
+static inline int vmstate_register(DeviceState *dev, int instance_id,
+ const VMStateDescription *vmsd,
+ void *opaque)
+{
+ return vmstate_register_with_alias_id(dev, instance_id, vmsd,
+ opaque, -1, 0);
+}
+
+void vmstate_unregister(DeviceState *dev, const VMStateDescription *vmsd,
+ void *opaque);
+
+struct MemoryRegion;
+void vmstate_register_ram(struct MemoryRegion *memory, DeviceState *dev);
+void vmstate_unregister_ram(struct MemoryRegion *memory, DeviceState *dev);
+void vmstate_register_ram_global(struct MemoryRegion *memory);
+
+#endif
diff --git a/contrib/qemu/include/monitor/monitor.h b/contrib/qemu/include/monitor/monitor.h
new file mode 100644
index 000000000..1942cc42f
--- /dev/null
+++ b/contrib/qemu/include/monitor/monitor.h
@@ -0,0 +1,104 @@
+#ifndef MONITOR_H
+#define MONITOR_H
+
+#include "qemu-common.h"
+#include "qapi/qmp/qerror.h"
+#include "qapi/qmp/qdict.h"
+#include "block/block.h"
+#include "monitor/readline.h"
+
+extern Monitor *cur_mon;
+extern Monitor *default_mon;
+
+/* flags for monitor_init */
+#define MONITOR_IS_DEFAULT 0x01
+#define MONITOR_USE_READLINE 0x02
+#define MONITOR_USE_CONTROL 0x04
+#define MONITOR_USE_PRETTY 0x08
+
+/* flags for monitor commands */
+#define MONITOR_CMD_ASYNC 0x0001
+
+/* QMP events */
+typedef enum MonitorEvent {
+ QEVENT_SHUTDOWN,
+ QEVENT_RESET,
+ QEVENT_POWERDOWN,
+ QEVENT_STOP,
+ QEVENT_RESUME,
+ QEVENT_VNC_CONNECTED,
+ QEVENT_VNC_INITIALIZED,
+ QEVENT_VNC_DISCONNECTED,
+ QEVENT_BLOCK_IO_ERROR,
+ QEVENT_RTC_CHANGE,
+ QEVENT_WATCHDOG,
+ QEVENT_SPICE_CONNECTED,
+ QEVENT_SPICE_INITIALIZED,
+ QEVENT_SPICE_DISCONNECTED,
+ QEVENT_BLOCK_JOB_COMPLETED,
+ QEVENT_BLOCK_JOB_CANCELLED,
+ QEVENT_BLOCK_JOB_ERROR,
+ QEVENT_BLOCK_JOB_READY,
+ QEVENT_DEVICE_DELETED,
+ QEVENT_DEVICE_TRAY_MOVED,
+ QEVENT_NIC_RX_FILTER_CHANGED,
+ QEVENT_SUSPEND,
+ QEVENT_SUSPEND_DISK,
+ QEVENT_WAKEUP,
+ QEVENT_BALLOON_CHANGE,
+ QEVENT_SPICE_MIGRATE_COMPLETED,
+ QEVENT_GUEST_PANICKED,
+
+ /* Add to 'monitor_event_names' array in monitor.c when
+ * defining new events here */
+
+ QEVENT_MAX,
+} MonitorEvent;
+
+int monitor_cur_is_qmp(void);
+
+void monitor_protocol_event(MonitorEvent event, QObject *data);
+void monitor_init(CharDriverState *chr, int flags);
+
+int monitor_suspend(Monitor *mon);
+void monitor_resume(Monitor *mon);
+
+int monitor_read_bdrv_key_start(Monitor *mon, BlockDriverState *bs,
+ BlockDriverCompletionFunc *completion_cb,
+ void *opaque);
+int monitor_read_block_device_key(Monitor *mon, const char *device,
+ BlockDriverCompletionFunc *completion_cb,
+ void *opaque);
+
+int monitor_get_fd(Monitor *mon, const char *fdname, Error **errp);
+int monitor_handle_fd_param(Monitor *mon, const char *fdname);
+
+void monitor_vprintf(Monitor *mon, const char *fmt, va_list ap)
+ GCC_FMT_ATTR(2, 0);
+void monitor_printf(Monitor *mon, const char *fmt, ...) GCC_FMT_ATTR(2, 3);
+void monitor_print_filename(Monitor *mon, const char *filename);
+void monitor_flush(Monitor *mon);
+int monitor_set_cpu(int cpu_index);
+int monitor_get_cpu_index(void);
+
+typedef void (MonitorCompletion)(void *opaque, QObject *ret_data);
+
+void monitor_set_error(Monitor *mon, QError *qerror);
+void monitor_read_command(Monitor *mon, int show_prompt);
+ReadLineState *monitor_get_rs(Monitor *mon);
+int monitor_read_password(Monitor *mon, ReadLineFunc *readline_func,
+ void *opaque);
+
+int qmp_qom_set(Monitor *mon, const QDict *qdict, QObject **ret);
+
+int qmp_qom_get(Monitor *mon, const QDict *qdict, QObject **ret);
+
+AddfdInfo *monitor_fdset_add_fd(int fd, bool has_fdset_id, int64_t fdset_id,
+ bool has_opaque, const char *opaque,
+ Error **errp);
+int monitor_fdset_get_fd(int64_t fdset_id, int flags);
+int monitor_fdset_dup_fd_add(int64_t fdset_id, int dup_fd);
+int monitor_fdset_dup_fd_remove(int dup_fd);
+int monitor_fdset_dup_fd_find(int dup_fd);
+
+#endif /* !MONITOR_H */
diff --git a/contrib/qemu/include/monitor/readline.h b/contrib/qemu/include/monitor/readline.h
new file mode 100644
index 000000000..fc9806ecf
--- /dev/null
+++ b/contrib/qemu/include/monitor/readline.h
@@ -0,0 +1,55 @@
+#ifndef READLINE_H
+#define READLINE_H
+
+#include "qemu-common.h"
+
+#define READLINE_CMD_BUF_SIZE 4095
+#define READLINE_MAX_CMDS 64
+#define READLINE_MAX_COMPLETIONS 256
+
+typedef void ReadLineFunc(Monitor *mon, const char *str, void *opaque);
+typedef void ReadLineCompletionFunc(const char *cmdline);
+
+typedef struct ReadLineState {
+ char cmd_buf[READLINE_CMD_BUF_SIZE + 1];
+ int cmd_buf_index;
+ int cmd_buf_size;
+
+ char last_cmd_buf[READLINE_CMD_BUF_SIZE + 1];
+ int last_cmd_buf_index;
+ int last_cmd_buf_size;
+
+ int esc_state;
+ int esc_param;
+
+ char *history[READLINE_MAX_CMDS];
+ int hist_entry;
+
+ ReadLineCompletionFunc *completion_finder;
+ char *completions[READLINE_MAX_COMPLETIONS];
+ int nb_completions;
+ int completion_index;
+
+ ReadLineFunc *readline_func;
+ void *readline_opaque;
+ int read_password;
+ char prompt[256];
+ Monitor *mon;
+} ReadLineState;
+
+void readline_add_completion(ReadLineState *rs, const char *str);
+void readline_set_completion_index(ReadLineState *rs, int completion_index);
+
+const char *readline_get_history(ReadLineState *rs, unsigned int index);
+
+void readline_handle_byte(ReadLineState *rs, int ch);
+
+void readline_start(ReadLineState *rs, const char *prompt, int read_password,
+ ReadLineFunc *readline_func, void *opaque);
+void readline_restart(ReadLineState *rs);
+void readline_show_prompt(ReadLineState *rs);
+
+ReadLineState *readline_init(Monitor *mon,
+ ReadLineCompletionFunc *completion_finder);
+
+#endif /* !READLINE_H */
diff --git a/contrib/qemu/include/qapi/error.h b/contrib/qemu/include/qapi/error.h
new file mode 100644
index 000000000..ffd1cea47
--- /dev/null
+++ b/contrib/qemu/include/qapi/error.h
@@ -0,0 +1,85 @@
+/*
+ * QEMU Error Objects
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2. See
+ * the COPYING.LIB file in the top-level directory.
+ */
+#ifndef ERROR_H
+#define ERROR_H
+
+#include "qemu/compiler.h"
+#include "qapi-types.h"
+#include <stdbool.h>
+
+/**
+ * A class representing internal errors within QEMU. An error has a ErrorClass
+ * code and a human message.
+ */
+typedef struct Error Error;
+
+/**
+ * Set an indirect pointer to an error given a ErrorClass value and a
+ * printf-style human message. This function is not meant to be used outside
+ * of QEMU.
+ */
+void error_set(Error **err, ErrorClass err_class, const char *fmt, ...) GCC_FMT_ATTR(3, 4);
+
+/**
+ * Set an indirect pointer to an error given a ErrorClass value and a
+ * printf-style human message, followed by a strerror() string if
+ * @os_error is not zero.
+ */
+void error_set_errno(Error **err, int os_error, ErrorClass err_class, const char *fmt, ...) GCC_FMT_ATTR(4, 5);
+
+/**
+ * Same as error_set(), but sets a generic error
+ */
+#define error_setg(err, fmt, ...) \
+ error_set(err, ERROR_CLASS_GENERIC_ERROR, fmt, ## __VA_ARGS__)
+#define error_setg_errno(err, os_error, fmt, ...) \
+ error_set_errno(err, os_error, ERROR_CLASS_GENERIC_ERROR, fmt, ## __VA_ARGS__)
+
+/**
+ * Helper for open() errors
+ */
+void error_setg_file_open(Error **errp, int os_errno, const char *filename);
+
+/**
+ * Returns true if an indirect pointer to an error is pointing to a valid
+ * error object.
+ */
+bool error_is_set(Error **err);
+
+/*
+ * Get the error class of an error object.
+ */
+ErrorClass error_get_class(const Error *err);
+
+/**
+ * Returns an exact copy of the error passed as an argument.
+ */
+Error *error_copy(const Error *err);
+
+/**
+ * Get a human readable representation of an error object.
+ */
+const char *error_get_pretty(Error *err);
+
+/**
+ * Propagate an error to an indirect pointer to an error. This function will
+ * always transfer ownership of the error reference and handles the case where
+ * dst_err is NULL correctly. Errors after the first are discarded.
+ */
+void error_propagate(Error **dst_err, Error *local_err);
+
+/**
+ * Free an error object.
+ */
+void error_free(Error *err);
+
+#endif
diff --git a/contrib/qemu/include/qapi/qmp/json-lexer.h b/contrib/qemu/include/qapi/qmp/json-lexer.h
new file mode 100644
index 000000000..cdff0460a
--- /dev/null
+++ b/contrib/qemu/include/qapi/qmp/json-lexer.h
@@ -0,0 +1,51 @@
+/*
+ * JSON lexer
+ *
+ * Copyright IBM, Corp. 2009
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_JSON_LEXER_H
+#define QEMU_JSON_LEXER_H
+
+#include "qapi/qmp/qstring.h"
+#include "qapi/qmp/qlist.h"
+
+typedef enum json_token_type {
+ JSON_OPERATOR = 100,
+ JSON_INTEGER,
+ JSON_FLOAT,
+ JSON_KEYWORD,
+ JSON_STRING,
+ JSON_ESCAPE,
+ JSON_SKIP,
+ JSON_ERROR,
+} JSONTokenType;
+
+typedef struct JSONLexer JSONLexer;
+
+typedef void (JSONLexerEmitter)(JSONLexer *, QString *, JSONTokenType, int x, int y);
+
+struct JSONLexer
+{
+ JSONLexerEmitter *emit;
+ int state;
+ QString *token;
+ int x, y;
+};
+
+void json_lexer_init(JSONLexer *lexer, JSONLexerEmitter func);
+
+int json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size);
+
+int json_lexer_flush(JSONLexer *lexer);
+
+void json_lexer_destroy(JSONLexer *lexer);
+
+#endif
diff --git a/contrib/qemu/include/qapi/qmp/json-parser.h b/contrib/qemu/include/qapi/qmp/json-parser.h
new file mode 100644
index 000000000..44d88f346
--- /dev/null
+++ b/contrib/qemu/include/qapi/qmp/json-parser.h
@@ -0,0 +1,24 @@
+/*
+ * JSON Parser
+ *
+ * Copyright IBM, Corp. 2009
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_JSON_PARSER_H
+#define QEMU_JSON_PARSER_H
+
+#include "qemu-common.h"
+#include "qapi/qmp/qlist.h"
+#include "qapi/error.h"
+
+QObject *json_parser_parse(QList *tokens, va_list *ap);
+QObject *json_parser_parse_err(QList *tokens, va_list *ap, Error **errp);
+
+#endif
diff --git a/contrib/qemu/include/qapi/qmp/json-streamer.h b/contrib/qemu/include/qapi/qmp/json-streamer.h
new file mode 100644
index 000000000..823f7d7fa
--- /dev/null
+++ b/contrib/qemu/include/qapi/qmp/json-streamer.h
@@ -0,0 +1,40 @@
+/*
+ * JSON streaming support
+ *
+ * Copyright IBM, Corp. 2009
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_JSON_STREAMER_H
+#define QEMU_JSON_STREAMER_H
+
+#include "qapi/qmp/qlist.h"
+#include "qapi/qmp/json-lexer.h"
+
+typedef struct JSONMessageParser
+{
+ void (*emit)(struct JSONMessageParser *parser, QList *tokens);
+ JSONLexer lexer;
+ int brace_count;
+ int bracket_count;
+ QList *tokens;
+ uint64_t token_size;
+} JSONMessageParser;
+
+void json_message_parser_init(JSONMessageParser *parser,
+ void (*func)(JSONMessageParser *, QList *));
+
+int json_message_parser_feed(JSONMessageParser *parser,
+ const char *buffer, size_t size);
+
+int json_message_parser_flush(JSONMessageParser *parser);
+
+void json_message_parser_destroy(JSONMessageParser *parser);
+
+#endif
diff --git a/contrib/qemu/include/qapi/qmp/qbool.h b/contrib/qemu/include/qapi/qmp/qbool.h
new file mode 100644
index 000000000..c4eaab9bb
--- /dev/null
+++ b/contrib/qemu/include/qapi/qmp/qbool.h
@@ -0,0 +1,29 @@
+/*
+ * QBool Module
+ *
+ * Copyright IBM, Corp. 2009
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#ifndef QBOOL_H
+#define QBOOL_H
+
+#include <stdint.h>
+#include "qapi/qmp/qobject.h"
+
+typedef struct QBool {
+ QObject_HEAD;
+ int value;
+} QBool;
+
+QBool *qbool_from_int(int value);
+int qbool_get_int(const QBool *qb);
+QBool *qobject_to_qbool(const QObject *obj);
+
+#endif /* QBOOL_H */
diff --git a/contrib/qemu/include/qapi/qmp/qdict.h b/contrib/qemu/include/qapi/qmp/qdict.h
new file mode 100644
index 000000000..685b2e3fc
--- /dev/null
+++ b/contrib/qemu/include/qapi/qmp/qdict.h
@@ -0,0 +1,69 @@
+/*
+ * QDict Module
+ *
+ * Copyright (C) 2009 Red Hat Inc.
+ *
+ * Authors:
+ * Luiz Capitulino <lcapitulino@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+
+#ifndef QDICT_H
+#define QDICT_H
+
+#include "qapi/qmp/qobject.h"
+#include "qapi/qmp/qlist.h"
+#include "qemu/queue.h"
+#include <stdint.h>
+
+#define QDICT_BUCKET_MAX 512
+
+typedef struct QDictEntry {
+ char *key;
+ QObject *value;
+ QLIST_ENTRY(QDictEntry) next;
+} QDictEntry;
+
+typedef struct QDict {
+ QObject_HEAD;
+ size_t size;
+ QLIST_HEAD(,QDictEntry) table[QDICT_BUCKET_MAX];
+} QDict;
+
+/* Object API */
+QDict *qdict_new(void);
+const char *qdict_entry_key(const QDictEntry *entry);
+QObject *qdict_entry_value(const QDictEntry *entry);
+size_t qdict_size(const QDict *qdict);
+void qdict_put_obj(QDict *qdict, const char *key, QObject *value);
+void qdict_del(QDict *qdict, const char *key);
+int qdict_haskey(const QDict *qdict, const char *key);
+QObject *qdict_get(const QDict *qdict, const char *key);
+QDict *qobject_to_qdict(const QObject *obj);
+void qdict_iter(const QDict *qdict,
+ void (*iter)(const char *key, QObject *obj, void *opaque),
+ void *opaque);
+const QDictEntry *qdict_first(const QDict *qdict);
+const QDictEntry *qdict_next(const QDict *qdict, const QDictEntry *entry);
+
+/* Helper to qdict_put_obj(), accepts any object */
+#define qdict_put(qdict, key, obj) \
+ qdict_put_obj(qdict, key, QOBJECT(obj))
+
+/* High level helpers */
+double qdict_get_double(const QDict *qdict, const char *key);
+int64_t qdict_get_int(const QDict *qdict, const char *key);
+int qdict_get_bool(const QDict *qdict, const char *key);
+QList *qdict_get_qlist(const QDict *qdict, const char *key);
+QDict *qdict_get_qdict(const QDict *qdict, const char *key);
+const char *qdict_get_str(const QDict *qdict, const char *key);
+int64_t qdict_get_try_int(const QDict *qdict, const char *key,
+ int64_t def_value);
+int qdict_get_try_bool(const QDict *qdict, const char *key, int def_value);
+const char *qdict_get_try_str(const QDict *qdict, const char *key);
+
+QDict *qdict_clone_shallow(const QDict *src);
+
+#endif /* QDICT_H */
diff --git a/contrib/qemu/include/qapi/qmp/qerror.h b/contrib/qemu/include/qapi/qmp/qerror.h
new file mode 100644
index 000000000..c30c2f6d7
--- /dev/null
+++ b/contrib/qemu/include/qapi/qmp/qerror.h
@@ -0,0 +1,249 @@
+/*
+ * QError Module
+ *
+ * Copyright (C) 2009 Red Hat Inc.
+ *
+ * Authors:
+ * Luiz Capitulino <lcapitulino@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+#ifndef QERROR_H
+#define QERROR_H
+
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qstring.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "qapi-types.h"
+#include <stdarg.h>
+
+typedef struct QError {
+ QObject_HEAD;
+ Location loc;
+ char *err_msg;
+ ErrorClass err_class;
+} QError;
+
+QString *qerror_human(const QError *qerror);
+void qerror_report(ErrorClass err_class, const char *fmt, ...) GCC_FMT_ATTR(2, 3);
+void qerror_report_err(Error *err);
+void assert_no_error(Error *err);
+
+/*
+ * QError class list
+ * Please keep the definitions in alphabetical order.
+ * Use scripts/check-qerror.sh to check.
+ */
+#define QERR_ADD_CLIENT_FAILED \
+ ERROR_CLASS_GENERIC_ERROR, "Could not add client"
+
+#define QERR_AMBIGUOUS_PATH \
+ ERROR_CLASS_GENERIC_ERROR, "Path '%s' does not uniquely identify an object"
+
+#define QERR_BAD_BUS_FOR_DEVICE \
+ ERROR_CLASS_GENERIC_ERROR, "Device '%s' can't go on a %s bus"
+
+#define QERR_BASE_NOT_FOUND \
+ ERROR_CLASS_GENERIC_ERROR, "Base '%s' not found"
+
+#define QERR_BLOCK_JOB_NOT_ACTIVE \
+ ERROR_CLASS_DEVICE_NOT_ACTIVE, "No active block job on device '%s'"
+
+#define QERR_BLOCK_JOB_PAUSED \
+ ERROR_CLASS_GENERIC_ERROR, "The block job for device '%s' is currently paused"
+
+#define QERR_BLOCK_JOB_NOT_READY \
+ ERROR_CLASS_GENERIC_ERROR, "The active block job for device '%s' cannot be completed"
+
+#define QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED \
+ ERROR_CLASS_GENERIC_ERROR, "Block format '%s' used by device '%s' does not support feature '%s'"
+
+#define QERR_BUFFER_OVERRUN \
+ ERROR_CLASS_GENERIC_ERROR, "An internal buffer overran"
+
+#define QERR_BUS_NO_HOTPLUG \
+ ERROR_CLASS_GENERIC_ERROR, "Bus '%s' does not support hotplugging"
+
+#define QERR_BUS_NOT_FOUND \
+ ERROR_CLASS_GENERIC_ERROR, "Bus '%s' not found"
+
+#define QERR_COMMAND_DISABLED \
+ ERROR_CLASS_GENERIC_ERROR, "The command %s has been disabled for this instance"
+
+#define QERR_COMMAND_NOT_FOUND \
+ ERROR_CLASS_COMMAND_NOT_FOUND, "The command %s has not been found"
+
+#define QERR_DEVICE_ENCRYPTED \
+ ERROR_CLASS_DEVICE_ENCRYPTED, "'%s' (%s) is encrypted"
+
+#define QERR_DEVICE_FEATURE_BLOCKS_MIGRATION \
+ ERROR_CLASS_GENERIC_ERROR, "Migration is disabled when using feature '%s' in device '%s'"
+
+#define QERR_DEVICE_HAS_NO_MEDIUM \
+ ERROR_CLASS_GENERIC_ERROR, "Device '%s' has no medium"
+
+#define QERR_DEVICE_INIT_FAILED \
+ ERROR_CLASS_GENERIC_ERROR, "Device '%s' could not be initialized"
+
+#define QERR_DEVICE_IN_USE \
+ ERROR_CLASS_GENERIC_ERROR, "Device '%s' is in use"
+
+#define QERR_DEVICE_IS_READ_ONLY \
+ ERROR_CLASS_GENERIC_ERROR, "Device '%s' is read only"
+
+#define QERR_DEVICE_LOCKED \
+ ERROR_CLASS_GENERIC_ERROR, "Device '%s' is locked"
+
+#define QERR_DEVICE_MULTIPLE_BUSSES \
+ ERROR_CLASS_GENERIC_ERROR, "Device '%s' has multiple child busses"
+
+#define QERR_DEVICE_NO_BUS \
+ ERROR_CLASS_GENERIC_ERROR, "Device '%s' has no child bus"
+
+#define QERR_DEVICE_NO_HOTPLUG \
+ ERROR_CLASS_GENERIC_ERROR, "Device '%s' does not support hotplugging"
+
+#define QERR_DEVICE_NOT_ACTIVE \
+ ERROR_CLASS_DEVICE_NOT_ACTIVE, "Device '%s' has not been activated"
+
+#define QERR_DEVICE_NOT_ENCRYPTED \
+ ERROR_CLASS_GENERIC_ERROR, "Device '%s' is not encrypted"
+
+#define QERR_DEVICE_NOT_FOUND \
+ ERROR_CLASS_DEVICE_NOT_FOUND, "Device '%s' not found"
+
+#define QERR_DEVICE_NOT_REMOVABLE \
+ ERROR_CLASS_GENERIC_ERROR, "Device '%s' is not removable"
+
+#define QERR_DUPLICATE_ID \
+ ERROR_CLASS_GENERIC_ERROR, "Duplicate ID '%s' for %s"
+
+#define QERR_FD_NOT_FOUND \
+ ERROR_CLASS_GENERIC_ERROR, "File descriptor named '%s' not found"
+
+#define QERR_FD_NOT_SUPPLIED \
+ ERROR_CLASS_GENERIC_ERROR, "No file descriptor supplied via SCM_RIGHTS"
+
+#define QERR_FEATURE_DISABLED \
+ ERROR_CLASS_GENERIC_ERROR, "The feature '%s' is not enabled"
+
+#define QERR_INVALID_BLOCK_FORMAT \
+ ERROR_CLASS_GENERIC_ERROR, "Invalid block format '%s'"
+
+#define QERR_INVALID_OPTION_GROUP \
+ ERROR_CLASS_GENERIC_ERROR, "There is no option group '%s'"
+
+#define QERR_INVALID_PARAMETER \
+ ERROR_CLASS_GENERIC_ERROR, "Invalid parameter '%s'"
+
+#define QERR_INVALID_PARAMETER_COMBINATION \
+ ERROR_CLASS_GENERIC_ERROR, "Invalid parameter combination"
+
+#define QERR_INVALID_PARAMETER_TYPE \
+ ERROR_CLASS_GENERIC_ERROR, "Invalid parameter type for '%s', expected: %s"
+
+#define QERR_INVALID_PARAMETER_VALUE \
+ ERROR_CLASS_GENERIC_ERROR, "Parameter '%s' expects %s"
+
+#define QERR_INVALID_PASSWORD \
+ ERROR_CLASS_GENERIC_ERROR, "Password incorrect"
+
+#define QERR_IO_ERROR \
+ ERROR_CLASS_GENERIC_ERROR, "An IO error has occurred"
+
+#define QERR_JSON_PARSE_ERROR \
+ ERROR_CLASS_GENERIC_ERROR, "JSON parse error, %s"
+
+#define QERR_JSON_PARSING \
+ ERROR_CLASS_GENERIC_ERROR, "Invalid JSON syntax"
+
+#define QERR_KVM_MISSING_CAP \
+ ERROR_CLASS_K_V_M_MISSING_CAP, "Using KVM without %s, %s unavailable"
+
+#define QERR_MIGRATION_ACTIVE \
+ ERROR_CLASS_GENERIC_ERROR, "There's a migration process in progress"
+
+#define QERR_MIGRATION_NOT_SUPPORTED \
+ ERROR_CLASS_GENERIC_ERROR, "State blocked by non-migratable device '%s'"
+
+#define QERR_MISSING_PARAMETER \
+ ERROR_CLASS_GENERIC_ERROR, "Parameter '%s' is missing"
+
+#define QERR_NO_BUS_FOR_DEVICE \
+ ERROR_CLASS_GENERIC_ERROR, "No '%s' bus found for device '%s'"
+
+#define QERR_NOT_SUPPORTED \
+ ERROR_CLASS_GENERIC_ERROR, "Not supported"
+
+#define QERR_PERMISSION_DENIED \
+ ERROR_CLASS_GENERIC_ERROR, "Insufficient permission to perform this operation"
+
+#define QERR_PROPERTY_NOT_FOUND \
+ ERROR_CLASS_GENERIC_ERROR, "Property '%s.%s' not found"
+
+#define QERR_PROPERTY_VALUE_BAD \
+ ERROR_CLASS_GENERIC_ERROR, "Property '%s.%s' doesn't take value '%s'"
+
+#define QERR_PROPERTY_VALUE_IN_USE \
+ ERROR_CLASS_GENERIC_ERROR, "Property '%s.%s' can't take value '%s', it's in use"
+
+#define QERR_PROPERTY_VALUE_NOT_FOUND \
+ ERROR_CLASS_GENERIC_ERROR, "Property '%s.%s' can't find value '%s'"
+
+#define QERR_PROPERTY_VALUE_NOT_POWER_OF_2 \
+ ERROR_CLASS_GENERIC_ERROR, "Property %s.%s doesn't take value '%" PRId64 "', it's not a power of 2"
+
+#define QERR_PROPERTY_VALUE_OUT_OF_RANGE \
+ ERROR_CLASS_GENERIC_ERROR, "Property %s.%s doesn't take value %" PRId64 " (minimum: %" PRId64 ", maximum: %" PRId64 ")"
+
+#define QERR_QGA_COMMAND_FAILED \
+ ERROR_CLASS_GENERIC_ERROR, "Guest agent command failed, error was '%s'"
+
+#define QERR_QGA_LOGGING_FAILED \
+ ERROR_CLASS_GENERIC_ERROR, "Guest agent failed to log non-optional log statement"
+
+#define QERR_QMP_BAD_INPUT_OBJECT \
+ ERROR_CLASS_GENERIC_ERROR, "Expected '%s' in QMP input"
+
+#define QERR_QMP_BAD_INPUT_OBJECT_MEMBER \
+ ERROR_CLASS_GENERIC_ERROR, "QMP input object member '%s' expects '%s'"
+
+#define QERR_QMP_EXTRA_MEMBER \
+ ERROR_CLASS_GENERIC_ERROR, "QMP input object member '%s' is unexpected"
+
+#define QERR_RESET_REQUIRED \
+ ERROR_CLASS_GENERIC_ERROR, "Resetting the Virtual Machine is required"
+
+#define QERR_SET_PASSWD_FAILED \
+ ERROR_CLASS_GENERIC_ERROR, "Could not set password"
+
+#define QERR_TOO_MANY_FILES \
+ ERROR_CLASS_GENERIC_ERROR, "Too many open files"
+
+#define QERR_UNDEFINED_ERROR \
+ ERROR_CLASS_GENERIC_ERROR, "An undefined error has occurred"
+
+#define QERR_UNKNOWN_BLOCK_FORMAT_FEATURE \
+ ERROR_CLASS_GENERIC_ERROR, "'%s' uses a %s feature which is not supported by this qemu version: %s"
+
+#define QERR_UNSUPPORTED \
+ ERROR_CLASS_GENERIC_ERROR, "this feature or command is not currently supported"
+
+#define QERR_VIRTFS_FEATURE_BLOCKS_MIGRATION \
+ ERROR_CLASS_GENERIC_ERROR, "Migration is disabled when VirtFS export path '%s' is mounted in the guest using mount_tag '%s'"
+
+#define QERR_SOCKET_CONNECT_FAILED \
+ ERROR_CLASS_GENERIC_ERROR, "Failed to connect to socket"
+
+#define QERR_SOCKET_LISTEN_FAILED \
+ ERROR_CLASS_GENERIC_ERROR, "Failed to set socket to listening mode"
+
+#define QERR_SOCKET_BIND_FAILED \
+ ERROR_CLASS_GENERIC_ERROR, "Failed to bind socket"
+
+#define QERR_SOCKET_CREATE_FAILED \
+ ERROR_CLASS_GENERIC_ERROR, "Failed to create socket"
+
+#endif /* QERROR_H */
diff --git a/contrib/qemu/include/qapi/qmp/qfloat.h b/contrib/qemu/include/qapi/qmp/qfloat.h
new file mode 100644
index 000000000..a8658443d
--- /dev/null
+++ b/contrib/qemu/include/qapi/qmp/qfloat.h
@@ -0,0 +1,29 @@
+/*
+ * QFloat Module
+ *
+ * Copyright IBM, Corp. 2009
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#ifndef QFLOAT_H
+#define QFLOAT_H
+
+#include <stdint.h>
+#include "qapi/qmp/qobject.h"
+
+typedef struct QFloat {
+ QObject_HEAD;
+ double value;
+} QFloat;
+
+QFloat *qfloat_from_double(double value);
+double qfloat_get_double(const QFloat *qi);
+QFloat *qobject_to_qfloat(const QObject *obj);
+
+#endif /* QFLOAT_H */
diff --git a/contrib/qemu/include/qapi/qmp/qint.h b/contrib/qemu/include/qapi/qmp/qint.h
new file mode 100644
index 000000000..48a41b0f2
--- /dev/null
+++ b/contrib/qemu/include/qapi/qmp/qint.h
@@ -0,0 +1,28 @@
+/*
+ * QInt Module
+ *
+ * Copyright (C) 2009 Red Hat Inc.
+ *
+ * Authors:
+ * Luiz Capitulino <lcapitulino@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+
+#ifndef QINT_H
+#define QINT_H
+
+#include <stdint.h>
+#include "qapi/qmp/qobject.h"
+
+typedef struct QInt {
+ QObject_HEAD;
+ int64_t value;
+} QInt;
+
+QInt *qint_from_int(int64_t value);
+int64_t qint_get_int(const QInt *qi);
+QInt *qobject_to_qint(const QObject *obj);
+
+#endif /* QINT_H */
diff --git a/contrib/qemu/include/qapi/qmp/qjson.h b/contrib/qemu/include/qapi/qmp/qjson.h
new file mode 100644
index 000000000..73351ed6d
--- /dev/null
+++ b/contrib/qemu/include/qapi/qmp/qjson.h
@@ -0,0 +1,29 @@
+/*
+ * QObject JSON integration
+ *
+ * Copyright IBM, Corp. 2009
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#ifndef QJSON_H
+#define QJSON_H
+
+#include <stdarg.h>
+#include "qemu/compiler.h"
+#include "qapi/qmp/qobject.h"
+#include "qapi/qmp/qstring.h"
+
+QObject *qobject_from_json(const char *string) GCC_FMT_ATTR(1, 0);
+QObject *qobject_from_jsonf(const char *string, ...) GCC_FMT_ATTR(1, 2);
+QObject *qobject_from_jsonv(const char *string, va_list *ap) GCC_FMT_ATTR(1, 0);
+
+QString *qobject_to_json(const QObject *obj);
+QString *qobject_to_json_pretty(const QObject *obj);
+
+#endif /* QJSON_H */
diff --git a/contrib/qemu/include/qapi/qmp/qlist.h b/contrib/qemu/include/qapi/qmp/qlist.h
new file mode 100644
index 000000000..6cc4831df
--- /dev/null
+++ b/contrib/qemu/include/qapi/qmp/qlist.h
@@ -0,0 +1,63 @@
+/*
+ * QList Module
+ *
+ * Copyright (C) 2009 Red Hat Inc.
+ *
+ * Authors:
+ * Luiz Capitulino <lcapitulino@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+
+#ifndef QLIST_H
+#define QLIST_H
+
+#include "qapi/qmp/qobject.h"
+#include "qemu/queue.h"
+
+typedef struct QListEntry {
+ QObject *value;
+ QTAILQ_ENTRY(QListEntry) next;
+} QListEntry;
+
+typedef struct QList {
+ QObject_HEAD;
+ QTAILQ_HEAD(,QListEntry) head;
+} QList;
+
+#define qlist_append(qlist, obj) \
+ qlist_append_obj(qlist, QOBJECT(obj))
+
+#define QLIST_FOREACH_ENTRY(qlist, var) \
+ for ((var) = ((qlist)->head.tqh_first); \
+ (var); \
+ (var) = ((var)->next.tqe_next))
+
+static inline QObject *qlist_entry_obj(const QListEntry *entry)
+{
+ return entry->value;
+}
+
+QList *qlist_new(void);
+QList *qlist_copy(QList *src);
+void qlist_append_obj(QList *qlist, QObject *obj);
+void qlist_iter(const QList *qlist,
+ void (*iter)(QObject *obj, void *opaque), void *opaque);
+QObject *qlist_pop(QList *qlist);
+QObject *qlist_peek(QList *qlist);
+int qlist_empty(const QList *qlist);
+size_t qlist_size(const QList *qlist);
+QList *qobject_to_qlist(const QObject *obj);
+
+static inline const QListEntry *qlist_first(const QList *qlist)
+{
+ return QTAILQ_FIRST(&qlist->head);
+}
+
+static inline const QListEntry *qlist_next(const QListEntry *entry)
+{
+ return QTAILQ_NEXT(entry, next);
+}
+
+#endif /* QLIST_H */
diff --git a/contrib/qemu/include/qapi/qmp/qobject.h b/contrib/qemu/include/qapi/qmp/qobject.h
new file mode 100644
index 000000000..9124649ed
--- /dev/null
+++ b/contrib/qemu/include/qapi/qmp/qobject.h
@@ -0,0 +1,112 @@
+/*
+ * QEMU Object Model.
+ *
+ * Based on ideas by Avi Kivity <avi@redhat.com>
+ *
+ * Copyright (C) 2009 Red Hat Inc.
+ *
+ * Authors:
+ * Luiz Capitulino <lcapitulino@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ * QObject Reference Counts Terminology
+ * ------------------------------------
+ *
+ * - Returning references: A function that returns an object may
+ * return it as either a weak or a strong reference. If the reference
+ * is strong, you are responsible for calling QDECREF() on the reference
+ * when you are done.
+ *
+ * If the reference is weak, the owner of the reference may free it at
+ * any time in the future. Before storing the reference anywhere, you
+ * should call QINCREF() to make the reference strong.
+ *
+ * - Transferring ownership: when you transfer ownership of a reference
+ * by calling a function, you are no longer responsible for calling
+ * QDECREF() when the reference is no longer needed. In other words,
+ * when the function returns you must behave as if the reference to the
+ * passed object was weak.
+ */
+#ifndef QOBJECT_H
+#define QOBJECT_H
+
+#include <stddef.h>
+#include <assert.h>
+
+typedef enum {
+ QTYPE_NONE,
+ QTYPE_QINT,
+ QTYPE_QSTRING,
+ QTYPE_QDICT,
+ QTYPE_QLIST,
+ QTYPE_QFLOAT,
+ QTYPE_QBOOL,
+ QTYPE_QERROR,
+} qtype_code;
+
+struct QObject;
+
+typedef struct QType {
+ qtype_code code;
+ void (*destroy)(struct QObject *);
+} QType;
+
+typedef struct QObject {
+ const QType *type;
+ size_t refcnt;
+} QObject;
+
+/* Objects definitions must include this */
+#define QObject_HEAD \
+ QObject base
+
+/* Get the 'base' part of an object */
+#define QOBJECT(obj) (&(obj)->base)
+
+/* High-level interface for qobject_incref() */
+#define QINCREF(obj) \
+ qobject_incref(QOBJECT(obj))
+
+/* High-level interface for qobject_decref() */
+#define QDECREF(obj) \
+ qobject_decref(obj ? QOBJECT(obj) : NULL)
+
+/* Initialize an object to default values */
+#define QOBJECT_INIT(obj, qtype_type) \
+ obj->base.refcnt = 1; \
+ obj->base.type = qtype_type
+
+/**
+ * qobject_incref(): Increment QObject's reference count
+ */
+static inline void qobject_incref(QObject *obj)
+{
+ if (obj)
+ obj->refcnt++;
+}
+
+/**
+ * qobject_decref(): Decrement QObject's reference count, deallocate
+ * when it reaches zero
+ */
+static inline void qobject_decref(QObject *obj)
+{
+ if (obj && --obj->refcnt == 0) {
+ assert(obj->type != NULL);
+ assert(obj->type->destroy != NULL);
+ obj->type->destroy(obj);
+ }
+}
+
+/**
+ * qobject_type(): Return the QObject's type
+ */
+static inline qtype_code qobject_type(const QObject *obj)
+{
+ assert(obj->type != NULL);
+ return obj->type->code;
+}
+
+#endif /* QOBJECT_H */
diff --git a/contrib/qemu/include/qapi/qmp/qstring.h b/contrib/qemu/include/qapi/qmp/qstring.h
new file mode 100644
index 000000000..1bc366610
--- /dev/null
+++ b/contrib/qemu/include/qapi/qmp/qstring.h
@@ -0,0 +1,36 @@
+/*
+ * QString Module
+ *
+ * Copyright (C) 2009 Red Hat Inc.
+ *
+ * Authors:
+ * Luiz Capitulino <lcapitulino@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+
+#ifndef QSTRING_H
+#define QSTRING_H
+
+#include <stdint.h>
+#include "qapi/qmp/qobject.h"
+
+typedef struct QString {
+ QObject_HEAD;
+ char *string;
+ size_t length;
+ size_t capacity;
+} QString;
+
+QString *qstring_new(void);
+QString *qstring_from_str(const char *str);
+QString *qstring_from_substr(const char *str, int start, int end);
+size_t qstring_get_length(const QString *qstring);
+const char *qstring_get_str(const QString *qstring);
+void qstring_append_int(QString *qstring, int64_t value);
+void qstring_append(QString *qstring, const char *str);
+void qstring_append_chr(QString *qstring, int c);
+QString *qobject_to_qstring(const QObject *obj);
+
+#endif /* QSTRING_H */
diff --git a/contrib/qemu/include/qapi/qmp/types.h b/contrib/qemu/include/qapi/qmp/types.h
new file mode 100644
index 000000000..7782ec5a6
--- /dev/null
+++ b/contrib/qemu/include/qapi/qmp/types.h
@@ -0,0 +1,25 @@
+/*
+ * Include all QEMU objects.
+ *
+ * Copyright (C) 2009 Red Hat Inc.
+ *
+ * Authors:
+ * Luiz Capitulino <lcapitulino@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+
+#ifndef QEMU_OBJECTS_H
+#define QEMU_OBJECTS_H
+
+#include "qapi/qmp/qobject.h"
+#include "qapi/qmp/qint.h"
+#include "qapi/qmp/qfloat.h"
+#include "qapi/qmp/qbool.h"
+#include "qapi/qmp/qstring.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qlist.h"
+#include "qapi/qmp/qjson.h"
+
+#endif /* QEMU_OBJECTS_H */
diff --git a/contrib/qemu/include/qemu-common.h b/contrib/qemu/include/qemu-common.h
new file mode 100644
index 000000000..6948bb917
--- /dev/null
+++ b/contrib/qemu/include/qemu-common.h
@@ -0,0 +1,478 @@
+
+/* Common header file that is included by all of QEMU.
+ *
+ * This file is supposed to be included only by .c files. No header file should
+ * depend on qemu-common.h, as this would easily lead to circular header
+ * dependencies.
+ *
+ * If a header file uses a definition from qemu-common.h, that definition
+ * must be moved to a separate header file, and the header that uses it
+ * must include that header.
+ */
+#ifndef QEMU_COMMON_H
+#define QEMU_COMMON_H
+
+#include "qemu/compiler.h"
+#include "config-host.h"
+#include "qemu/typedefs.h"
+
+#if defined(__arm__) || defined(__sparc__) || defined(__mips__) || defined(__hppa__) || defined(__ia64__)
+#define WORDS_ALIGNED
+#endif
+
+#define TFR(expr) do { if ((expr) != -1) break; } while (errno == EINTR)
+
+/* we put basic includes here to avoid repeating them in device drivers */
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <string.h>
+#include <strings.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <time.h>
+#include <ctype.h>
+#include <errno.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <assert.h>
+#include <signal.h>
+#include "glib-compat.h"
+
+#ifdef _WIN32
+#include "sysemu/os-win32.h"
+#endif
+
+#ifdef CONFIG_POSIX
+#include "sysemu/os-posix.h"
+#endif
+
+#ifndef O_LARGEFILE
+#define O_LARGEFILE 0
+#endif
+#ifndef O_BINARY
+#define O_BINARY 0
+#endif
+#ifndef MAP_ANONYMOUS
+#define MAP_ANONYMOUS MAP_ANON
+#endif
+#ifndef ENOMEDIUM
+#define ENOMEDIUM ENODEV
+#endif
+#if !defined(ENOTSUP)
+#define ENOTSUP 4096
+#endif
+#if !defined(ECANCELED)
+#define ECANCELED 4097
+#endif
+#if !defined(EMEDIUMTYPE)
+#define EMEDIUMTYPE 4098
+#endif
+#ifndef TIME_MAX
+#define TIME_MAX LONG_MAX
+#endif
+
+/* HOST_LONG_BITS is the size of a native pointer in bits. */
+#if UINTPTR_MAX == UINT32_MAX
+# define HOST_LONG_BITS 32
+#elif UINTPTR_MAX == UINT64_MAX
+# define HOST_LONG_BITS 64
+#else
+# error Unknown pointer size
+#endif
+
+typedef int (*fprintf_function)(FILE *f, const char *fmt, ...)
+ GCC_FMT_ATTR(2, 3);
+
+#ifdef _WIN32
+#define fsync _commit
+#if !defined(lseek)
+# define lseek _lseeki64
+#endif
+int qemu_ftruncate64(int, int64_t);
+#if !defined(ftruncate)
+# define ftruncate qemu_ftruncate64
+#endif
+
+static inline char *realpath(const char *path, char *resolved_path)
+{
+ _fullpath(resolved_path, path, _MAX_PATH);
+ return resolved_path;
+}
+#endif
+
+/* icount */
+void configure_icount(const char *option);
+extern int use_icount;
+
+#include "qemu/osdep.h"
+#include "qemu/bswap.h"
+
+/* FIXME: Remove NEED_CPU_H. */
+#ifdef NEED_CPU_H
+#include "cpu.h"
+#endif /* !defined(NEED_CPU_H) */
+
+/* main function, renamed */
+#if defined(CONFIG_COCOA)
+int qemu_main(int argc, char **argv, char **envp);
+#endif
+
+void qemu_get_timedate(struct tm *tm, int offset);
+int qemu_timedate_diff(struct tm *tm);
+
+#if !GLIB_CHECK_VERSION(2, 20, 0)
+/*
+ * Glib before 2.20.0 doesn't implement g_poll, so wrap it to compile properly
+ * on older systems.
+ */
+static inline gint g_poll(GPollFD *fds, guint nfds, gint timeout)
+{
+ GMainContext *ctx = g_main_context_default();
+ return g_main_context_get_poll_func(ctx)(fds, nfds, timeout);
+}
+#endif
+
+/**
+ * is_help_option:
+ * @s: string to test
+ *
+ * Check whether @s is one of the standard strings which indicate
+ * that the user is asking for a list of the valid values for a
+ * command option like -cpu or -M. The current accepted strings
+ * are 'help' and '?'. '?' is deprecated (it is a shell wildcard
+ * which makes it annoying to use in a reliable way) but provided
+ * for backwards compatibility.
+ *
+ * Returns: true if @s is a request for a list.
+ */
+static inline bool is_help_option(const char *s)
+{
+ return !strcmp(s, "?") || !strcmp(s, "help");
+}
+
+/* cutils.c */
+void pstrcpy(char *buf, int buf_size, const char *str);
+void strpadcpy(char *buf, int buf_size, const char *str, char pad);
+char *pstrcat(char *buf, int buf_size, const char *s);
+int strstart(const char *str, const char *val, const char **ptr);
+int stristart(const char *str, const char *val, const char **ptr);
+int qemu_strnlen(const char *s, int max_len);
+char *qemu_strsep(char **input, const char *delim);
+time_t mktimegm(struct tm *tm);
+int qemu_fls(int i);
+int qemu_fdatasync(int fd);
+int fcntl_setfl(int fd, int flag);
+int qemu_parse_fd(const char *param);
+
+int parse_uint(const char *s, unsigned long long *value, char **endptr,
+ int base);
+int parse_uint_full(const char *s, unsigned long long *value, int base);
+
+/*
+ * strtosz() suffixes used to specify the default treatment of an
+ * argument passed to strtosz() without an explicit suffix.
+ * These should be defined using upper case characters in the range
+ * A-Z, as strtosz() will use qemu_toupper() on the given argument
+ * prior to comparison.
+ */
+#define STRTOSZ_DEFSUFFIX_EB 'E'
+#define STRTOSZ_DEFSUFFIX_PB 'P'
+#define STRTOSZ_DEFSUFFIX_TB 'T'
+#define STRTOSZ_DEFSUFFIX_GB 'G'
+#define STRTOSZ_DEFSUFFIX_MB 'M'
+#define STRTOSZ_DEFSUFFIX_KB 'K'
+#define STRTOSZ_DEFSUFFIX_B 'B'
+int64_t strtosz(const char *nptr, char **end);
+int64_t strtosz_suffix(const char *nptr, char **end, const char default_suffix);
+int64_t strtosz_suffix_unit(const char *nptr, char **end,
+ const char default_suffix, int64_t unit);
+
+/* path.c */
+void init_paths(const char *prefix);
+const char *path(const char *pathname);
+
+#define qemu_isalnum(c) isalnum((unsigned char)(c))
+#define qemu_isalpha(c) isalpha((unsigned char)(c))
+#define qemu_iscntrl(c) iscntrl((unsigned char)(c))
+#define qemu_isdigit(c) isdigit((unsigned char)(c))
+#define qemu_isgraph(c) isgraph((unsigned char)(c))
+#define qemu_islower(c) islower((unsigned char)(c))
+#define qemu_isprint(c) isprint((unsigned char)(c))
+#define qemu_ispunct(c) ispunct((unsigned char)(c))
+#define qemu_isspace(c) isspace((unsigned char)(c))
+#define qemu_isupper(c) isupper((unsigned char)(c))
+#define qemu_isxdigit(c) isxdigit((unsigned char)(c))
+#define qemu_tolower(c) tolower((unsigned char)(c))
+#define qemu_toupper(c) toupper((unsigned char)(c))
+#define qemu_isascii(c) isascii((unsigned char)(c))
+#define qemu_toascii(c) toascii((unsigned char)(c))
+
+void *qemu_oom_check(void *ptr);
+
+ssize_t qemu_write_full(int fd, const void *buf, size_t count)
+ QEMU_WARN_UNUSED_RESULT;
+ssize_t qemu_send_full(int fd, const void *buf, size_t count, int flags)
+ QEMU_WARN_UNUSED_RESULT;
+ssize_t qemu_recv_full(int fd, void *buf, size_t count, int flags)
+ QEMU_WARN_UNUSED_RESULT;
+
+#ifndef _WIN32
+int qemu_pipe(int pipefd[2]);
+/* like openpty() but also makes it raw; return master fd */
+int qemu_openpty_raw(int *aslave, char *pty_name);
+#endif
+
+#ifdef _WIN32
+/* MinGW needs type casts for the 'buf' and 'optval' arguments. */
+#define qemu_getsockopt(sockfd, level, optname, optval, optlen) \
+ getsockopt(sockfd, level, optname, (void *)optval, optlen)
+#define qemu_setsockopt(sockfd, level, optname, optval, optlen) \
+ setsockopt(sockfd, level, optname, (const void *)optval, optlen)
+#define qemu_recv(sockfd, buf, len, flags) recv(sockfd, (void *)buf, len, flags)
+#define qemu_sendto(sockfd, buf, len, flags, destaddr, addrlen) \
+ sendto(sockfd, (const void *)buf, len, flags, destaddr, addrlen)
+#else
+#define qemu_getsockopt(sockfd, level, optname, optval, optlen) \
+ getsockopt(sockfd, level, optname, optval, optlen)
+#define qemu_setsockopt(sockfd, level, optname, optval, optlen) \
+ setsockopt(sockfd, level, optname, optval, optlen)
+#define qemu_recv(sockfd, buf, len, flags) recv(sockfd, buf, len, flags)
+#define qemu_sendto(sockfd, buf, len, flags, destaddr, addrlen) \
+ sendto(sockfd, buf, len, flags, destaddr, addrlen)
+#endif
+
+/* Error handling. */
+
+void QEMU_NORETURN hw_error(const char *fmt, ...) GCC_FMT_ATTR(1, 2);
+
+struct ParallelIOArg {
+ void *buffer;
+ int count;
+};
+
+typedef int (*DMA_transfer_handler) (void *opaque, int nchan, int pos, int size);
+
+typedef uint64_t pcibus_t;
+
+typedef enum LostTickPolicy {
+ LOST_TICK_DISCARD,
+ LOST_TICK_DELAY,
+ LOST_TICK_MERGE,
+ LOST_TICK_SLEW,
+ LOST_TICK_MAX
+} LostTickPolicy;
+
+typedef struct PCIHostDeviceAddress {
+ unsigned int domain;
+ unsigned int bus;
+ unsigned int slot;
+ unsigned int function;
+} PCIHostDeviceAddress;
+
+void tcg_exec_init(unsigned long tb_size);
+bool tcg_enabled(void);
+
+void cpu_exec_init_all(void);
+
+/* CPU save/load. */
+#ifdef CPU_SAVE_VERSION
+void cpu_save(QEMUFile *f, void *opaque);
+int cpu_load(QEMUFile *f, void *opaque, int version_id);
+#endif
+
+/* Unblock cpu */
+void qemu_cpu_kick_self(void);
+
+/* work queue */
+struct qemu_work_item {
+ struct qemu_work_item *next;
+ void (*func)(void *data);
+ void *data;
+ int done;
+ bool free;
+};
+
+
+/**
+ * Sends a (part of) iovec down a socket, yielding when the socket is full, or
+ * Receives data into a (part of) iovec from a socket,
+ * yielding when there is no data in the socket.
+ * The same interface as qemu_sendv_recvv(), with added yielding.
+ * XXX should mark these as coroutine_fn
+ */
+ssize_t qemu_co_sendv_recvv(int sockfd, struct iovec *iov, unsigned iov_cnt,
+ size_t offset, size_t bytes, bool do_send);
+#define qemu_co_recvv(sockfd, iov, iov_cnt, offset, bytes) \
+ qemu_co_sendv_recvv(sockfd, iov, iov_cnt, offset, bytes, false)
+#define qemu_co_sendv(sockfd, iov, iov_cnt, offset, bytes) \
+ qemu_co_sendv_recvv(sockfd, iov, iov_cnt, offset, bytes, true)
+
+/**
+ * The same as above, but with just a single buffer
+ */
+ssize_t qemu_co_send_recv(int sockfd, void *buf, size_t bytes, bool do_send);
+#define qemu_co_recv(sockfd, buf, bytes) \
+ qemu_co_send_recv(sockfd, buf, bytes, false)
+#define qemu_co_send(sockfd, buf, bytes) \
+ qemu_co_send_recv(sockfd, buf, bytes, true)
+
+typedef struct QEMUIOVector {
+ struct iovec *iov;
+ int niov;
+ int nalloc;
+ size_t size;
+} QEMUIOVector;
+
+void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint);
+void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov);
+void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len);
+void qemu_iovec_concat(QEMUIOVector *dst,
+ QEMUIOVector *src, size_t soffset, size_t sbytes);
+void qemu_iovec_concat_iov(QEMUIOVector *dst,
+ struct iovec *src_iov, unsigned int src_cnt,
+ size_t soffset, size_t sbytes);
+void qemu_iovec_destroy(QEMUIOVector *qiov);
+void qemu_iovec_reset(QEMUIOVector *qiov);
+size_t qemu_iovec_to_buf(QEMUIOVector *qiov, size_t offset,
+ void *buf, size_t bytes);
+size_t qemu_iovec_from_buf(QEMUIOVector *qiov, size_t offset,
+ const void *buf, size_t bytes);
+size_t qemu_iovec_memset(QEMUIOVector *qiov, size_t offset,
+ int fillc, size_t bytes);
+
+bool buffer_is_zero(const void *buf, size_t len);
+
+void qemu_progress_init(int enabled, float min_skip);
+void qemu_progress_end(void);
+void qemu_progress_print(float delta, int max);
+const char *qemu_get_vm_name(void);
+
+#define QEMU_FILE_TYPE_BIOS 0
+#define QEMU_FILE_TYPE_KEYMAP 1
+char *qemu_find_file(int type, const char *name);
+
+/* OS specific functions */
+void os_setup_early_signal_handling(void);
+char *os_find_datadir(const char *argv0);
+void os_parse_cmd_args(int index, const char *optarg);
+void os_pidfile_error(void);
+
+/* Convert a byte between binary and BCD. */
+static inline uint8_t to_bcd(uint8_t val)
+{
+ return ((val / 10) << 4) | (val % 10);
+}
+
+static inline uint8_t from_bcd(uint8_t val)
+{
+ return ((val >> 4) * 10) + (val & 0x0f);
+}
+
+/* compute with 96 bit intermediate result: (a*b)/c */
+static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
+{
+ union {
+ uint64_t ll;
+ struct {
+#ifdef HOST_WORDS_BIGENDIAN
+ uint32_t high, low;
+#else
+ uint32_t low, high;
+#endif
+ } l;
+ } u, res;
+ uint64_t rl, rh;
+
+ u.ll = a;
+ rl = (uint64_t)u.l.low * (uint64_t)b;
+ rh = (uint64_t)u.l.high * (uint64_t)b;
+ rh += (rl >> 32);
+ res.l.high = rh / c;
+ res.l.low = (((rh % c) << 32) + (rl & 0xffffffff)) / c;
+ return res.ll;
+}
+
+/* Round number down to multiple */
+#define QEMU_ALIGN_DOWN(n, m) ((n) / (m) * (m))
+
+/* Round number up to multiple */
+#define QEMU_ALIGN_UP(n, m) QEMU_ALIGN_DOWN((n) + (m) - 1, (m))
+
+static inline bool is_power_of_2(uint64_t value)
+{
+ if (!value) {
+ return 0;
+ }
+
+ return !(value & (value - 1));
+}
+
+/* round down to the nearest power of 2*/
+int64_t pow2floor(int64_t value);
+
+#include "qemu/module.h"
+
+/*
+ * Implementation of ULEB128 (http://en.wikipedia.org/wiki/LEB128)
+ * Input is limited to 14-bit numbers
+ */
+
+int uleb128_encode_small(uint8_t *out, uint32_t n);
+int uleb128_decode_small(const uint8_t *in, uint32_t *n);
+
+/* unicode.c */
+int mod_utf8_codepoint(const char *s, size_t n, char **end);
+
+/*
+ * Hexdump a buffer to a file. An optional string prefix is added to every line
+ */
+
+void qemu_hexdump(const char *buf, FILE *fp, const char *prefix, size_t size);
+
+/* vector definitions */
+#ifdef __ALTIVEC__
+#include <altivec.h>
+/* The altivec.h header says we're allowed to undef these for
+ * C++ compatibility. Here we don't care about C++, but we
+ * undef them anyway to avoid namespace pollution.
+ */
+#undef vector
+#undef pixel
+#undef bool
+#define VECTYPE __vector unsigned char
+#define SPLAT(p) vec_splat(vec_ld(0, p), 0)
+#define ALL_EQ(v1, v2) vec_all_eq(v1, v2)
+/* altivec.h may redefine the bool macro as vector type.
+ * Reset it to POSIX semantics. */
+#define bool _Bool
+#elif defined __SSE2__
+#include <emmintrin.h>
+#define VECTYPE __m128i
+#define SPLAT(p) _mm_set1_epi8(*(p))
+#define ALL_EQ(v1, v2) (_mm_movemask_epi8(_mm_cmpeq_epi8(v1, v2)) == 0xFFFF)
+#else
+#define VECTYPE unsigned long
+#define SPLAT(p) (*(p) * (~0UL / 255))
+#define ALL_EQ(v1, v2) ((v1) == (v2))
+#endif
+
+#define BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR 8
+static inline bool
+can_use_buffer_find_nonzero_offset(const void *buf, size_t len)
+{
+ return (len % (BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR
+ * sizeof(VECTYPE)) == 0
+ && ((uintptr_t) buf) % sizeof(VECTYPE) == 0);
+}
+size_t buffer_find_nonzero_offset(const void *buf, size_t len);
+
+/*
+ * helper to parse debug environment variables
+ */
+int parse_debug_env(const char *name, int max, int initial);
+
+#endif
diff --git a/contrib/qemu/include/qemu/aes.h b/contrib/qemu/include/qemu/aes.h
new file mode 100644
index 000000000..e79c70743
--- /dev/null
+++ b/contrib/qemu/include/qemu/aes.h
@@ -0,0 +1,45 @@
+#ifndef QEMU_AES_H
+#define QEMU_AES_H
+
+#define AES_MAXNR 14
+#define AES_BLOCK_SIZE 16
+
+struct aes_key_st {
+ uint32_t rd_key[4 *(AES_MAXNR + 1)];
+ int rounds;
+};
+typedef struct aes_key_st AES_KEY;
+
+int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
+ AES_KEY *key);
+int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
+ AES_KEY *key);
+
+void AES_encrypt(const unsigned char *in, unsigned char *out,
+ const AES_KEY *key);
+void AES_decrypt(const unsigned char *in, unsigned char *out,
+ const AES_KEY *key);
+void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
+ const unsigned long length, const AES_KEY *key,
+ unsigned char *ivec, const int enc);
+
+/*
+AES_Te0[x] = S [x].[02, 01, 01, 03];
+AES_Te1[x] = S [x].[03, 02, 01, 01];
+AES_Te2[x] = S [x].[01, 03, 02, 01];
+AES_Te3[x] = S [x].[01, 01, 03, 02];
+AES_Te4[x] = S [x].[01, 01, 01, 01];
+
+AES_Td0[x] = Si[x].[0e, 09, 0d, 0b];
+AES_Td1[x] = Si[x].[0b, 0e, 09, 0d];
+AES_Td2[x] = Si[x].[0d, 0b, 0e, 09];
+AES_Td3[x] = Si[x].[09, 0d, 0b, 0e];
+AES_Td4[x] = Si[x].[01, 01, 01, 01];
+*/
+
+extern const uint32_t AES_Te0[256], AES_Te1[256], AES_Te2[256],
+ AES_Te3[256], AES_Te4[256];
+extern const uint32_t AES_Td0[256], AES_Td1[256], AES_Td2[256],
+ AES_Td3[256], AES_Td4[256];
+
+#endif
diff --git a/contrib/qemu/include/qemu/atomic.h b/contrib/qemu/include/qemu/atomic.h
new file mode 100644
index 000000000..0aa891330
--- /dev/null
+++ b/contrib/qemu/include/qemu/atomic.h
@@ -0,0 +1,202 @@
+/*
+ * Simple interface for atomic operations.
+ *
+ * Copyright (C) 2013 Red Hat, Inc.
+ *
+ * Author: Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef __QEMU_ATOMIC_H
+#define __QEMU_ATOMIC_H 1
+
+#include "qemu/compiler.h"
+
+/* For C11 atomic ops */
+
+/* Compiler barrier */
+#define barrier() ({ asm volatile("" ::: "memory"); (void)0; })
+
+#ifndef __ATOMIC_RELAXED
+
+/*
+ * We use GCC builtin if it's available, as that can use mfence on
+ * 32-bit as well, e.g. if built with -march=pentium-m. However, on
+ * i386 the spec is buggy, and the implementation followed it until
+ * 4.3 (http://gcc.gnu.org/bugzilla/show_bug.cgi?id=36793).
+ */
+#if defined(__i386__) || defined(__x86_64__)
+#if !QEMU_GNUC_PREREQ(4, 4)
+#if defined __x86_64__
+#define smp_mb() ({ asm volatile("mfence" ::: "memory"); (void)0; })
+#else
+#define smp_mb() ({ asm volatile("lock; addl $0,0(%%esp) " ::: "memory"); (void)0; })
+#endif
+#endif
+#endif
+
+
+#ifdef __alpha__
+#define smp_read_barrier_depends() asm volatile("mb":::"memory")
+#endif
+
+#if defined(__i386__) || defined(__x86_64__) || defined(__s390x__)
+
+/*
+ * Because of the strongly ordered storage model, wmb() and rmb() are nops
+ * here (a compiler barrier only). QEMU doesn't do accesses to write-combining
+ * qemu memory or non-temporal load/stores from C code.
+ */
+#define smp_wmb() barrier()
+#define smp_rmb() barrier()
+
+/*
+ * __sync_lock_test_and_set() is documented to be an acquire barrier only,
+ * but it is a full barrier at the hardware level. Add a compiler barrier
+ * to make it a full barrier also at the compiler level.
+ */
+#define atomic_xchg(ptr, i) (barrier(), __sync_lock_test_and_set(ptr, i))
+
+/*
+ * Load/store with Java volatile semantics.
+ */
+#define atomic_mb_set(ptr, i) ((void)atomic_xchg(ptr, i))
+
+#elif defined(_ARCH_PPC)
+
+/*
+ * We use an eieio() for wmb() on powerpc. This assumes we don't
+ * need to order cacheable and non-cacheable stores with respect to
+ * each other.
+ *
+ * smp_mb has the same problem as on x86 for not-very-new GCC
+ * (http://patchwork.ozlabs.org/patch/126184/, Nov 2011).
+ */
+#define smp_wmb() ({ asm volatile("eieio" ::: "memory"); (void)0; })
+#if defined(__powerpc64__)
+#define smp_rmb() ({ asm volatile("lwsync" ::: "memory"); (void)0; })
+#else
+#define smp_rmb() ({ asm volatile("sync" ::: "memory"); (void)0; })
+#endif
+#define smp_mb() ({ asm volatile("sync" ::: "memory"); (void)0; })
+
+#endif /* _ARCH_PPC */
+
+#endif /* C11 atomics */
+
+/*
+ * For (host) platforms we don't have explicit barrier definitions
+ * for, we use the gcc __sync_synchronize() primitive to generate a
+ * full barrier. This should be safe on all platforms, though it may
+ * be overkill for smp_wmb() and smp_rmb().
+ */
+#ifndef smp_mb
+#define smp_mb() __sync_synchronize()
+#endif
+
+#ifndef smp_wmb
+#ifdef __ATOMIC_RELEASE
+#define smp_wmb() __atomic_thread_fence(__ATOMIC_RELEASE)
+#else
+#define smp_wmb() __sync_synchronize()
+#endif
+#endif
+
+#ifndef smp_rmb
+#ifdef __ATOMIC_ACQUIRE
+#define smp_rmb() __atomic_thread_fence(__ATOMIC_ACQUIRE)
+#else
+#define smp_rmb() __sync_synchronize()
+#endif
+#endif
+
+#ifndef smp_read_barrier_depends
+#ifdef __ATOMIC_CONSUME
+#define smp_read_barrier_depends() __atomic_thread_fence(__ATOMIC_CONSUME)
+#else
+#define smp_read_barrier_depends() barrier()
+#endif
+#endif
+
+#ifndef atomic_read
+#define atomic_read(ptr) (*(__typeof__(*ptr) *volatile) (ptr))
+#endif
+
+#ifndef atomic_set
+#define atomic_set(ptr, i) ((*(__typeof__(*ptr) *volatile) (ptr)) = (i))
+#endif
+
+/* These have the same semantics as Java volatile variables.
+ * See http://gee.cs.oswego.edu/dl/jmm/cookbook.html:
+ * "1. Issue a StoreStore barrier (wmb) before each volatile store."
+ * 2. Issue a StoreLoad barrier after each volatile store.
+ * Note that you could instead issue one before each volatile load, but
+ * this would be slower for typical programs using volatiles in which
+ * reads greatly outnumber writes. Alternatively, if available, you
+ * can implement volatile store as an atomic instruction (for example
+ * XCHG on x86) and omit the barrier. This may be more efficient if
+ * atomic instructions are cheaper than StoreLoad barriers.
+ * 3. Issue LoadLoad and LoadStore barriers after each volatile load."
+ *
+ * If you prefer to think in terms of "pairing" of memory barriers,
+ * an atomic_mb_read pairs with an atomic_mb_set.
+ *
+ * And for the few ia64 lovers that exist, an atomic_mb_read is a ld.acq,
+ * while an atomic_mb_set is a st.rel followed by a memory barrier.
+ *
+ * These are a bit weaker than __atomic_load/store with __ATOMIC_SEQ_CST
+ * (see docs/atomics.txt), and I'm not sure that __ATOMIC_ACQ_REL is enough.
+ * Just always use the barriers manually by the rules above.
+ */
+#ifndef atomic_mb_read
+#define atomic_mb_read(ptr) ({ \
+ typeof(*ptr) _val = atomic_read(ptr); \
+ smp_rmb(); \
+ _val; \
+})
+#endif
+
+#ifndef atomic_mb_set
+#define atomic_mb_set(ptr, i) do { \
+ smp_wmb(); \
+ atomic_set(ptr, i); \
+ smp_mb(); \
+} while (0)
+#endif
+
+#ifndef atomic_xchg
+#ifdef __ATOMIC_SEQ_CST
+#define atomic_xchg(ptr, i) ({ \
+ typeof(*ptr) _new = (i), _old; \
+ __atomic_exchange(ptr, &_new, &_old, __ATOMIC_SEQ_CST); \
+ _old; \
+})
+#elif defined __clang__
+#define atomic_xchg(ptr, i) __sync_exchange(ptr, i)
+#else
+/* __sync_lock_test_and_set() is documented to be an acquire barrier only. */
+#define atomic_xchg(ptr, i) (smp_mb(), __sync_lock_test_and_set(ptr, i))
+#endif
+#endif
+
+/* Provide shorter names for GCC atomic builtins. */
+#define atomic_fetch_inc(ptr) __sync_fetch_and_add(ptr, 1)
+#define atomic_fetch_dec(ptr) __sync_fetch_and_add(ptr, -1)
+#define atomic_fetch_add __sync_fetch_and_add
+#define atomic_fetch_sub __sync_fetch_and_sub
+#define atomic_fetch_and __sync_fetch_and_and
+#define atomic_fetch_or __sync_fetch_and_or
+#define atomic_cmpxchg __sync_val_compare_and_swap
+
+/* And even shorter names that return void. */
+#define atomic_inc(ptr) ((void) __sync_fetch_and_add(ptr, 1))
+#define atomic_dec(ptr) ((void) __sync_fetch_and_add(ptr, -1))
+#define atomic_add(ptr, n) ((void) __sync_fetch_and_add(ptr, n))
+#define atomic_sub(ptr, n) ((void) __sync_fetch_and_sub(ptr, n))
+#define atomic_and(ptr, n) ((void) __sync_fetch_and_and(ptr, n))
+#define atomic_or(ptr, n) ((void) __sync_fetch_and_or(ptr, n))
+
+#endif
diff --git a/contrib/qemu/include/qemu/bitmap.h b/contrib/qemu/include/qemu/bitmap.h
new file mode 100644
index 000000000..308bbb71e
--- /dev/null
+++ b/contrib/qemu/include/qemu/bitmap.h
@@ -0,0 +1,222 @@
+/*
+ * Bitmap Module
+ *
+ * Copyright (C) 2010 Corentin Chary <corentin.chary@gmail.com>
+ *
+ * Mostly inspired by (stolen from) linux/bitmap.h and linux/bitops.h
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+
+#ifndef BITMAP_H
+#define BITMAP_H
+
+#include "qemu-common.h"
+#include "qemu/bitops.h"
+
+/*
+ * The available bitmap operations and their rough meaning in the
+ * case that the bitmap is a single unsigned long are thus:
+ *
+ * Note that nbits should be always a compile time evaluable constant.
+ * Otherwise many inlines will generate horrible code.
+ *
+ * bitmap_zero(dst, nbits) *dst = 0UL
+ * bitmap_fill(dst, nbits) *dst = ~0UL
+ * bitmap_copy(dst, src, nbits) *dst = *src
+ * bitmap_and(dst, src1, src2, nbits) *dst = *src1 & *src2
+ * bitmap_or(dst, src1, src2, nbits) *dst = *src1 | *src2
+ * bitmap_xor(dst, src1, src2, nbits) *dst = *src1 ^ *src2
+ * bitmap_andnot(dst, src1, src2, nbits) *dst = *src1 & ~(*src2)
+ * bitmap_complement(dst, src, nbits) *dst = ~(*src)
+ * bitmap_equal(src1, src2, nbits) Are *src1 and *src2 equal?
+ * bitmap_intersects(src1, src2, nbits) Do *src1 and *src2 overlap?
+ * bitmap_empty(src, nbits) Are all bits zero in *src?
+ * bitmap_full(src, nbits) Are all bits set in *src?
+ * bitmap_set(dst, pos, nbits) Set specified bit area
+ * bitmap_clear(dst, pos, nbits) Clear specified bit area
+ * bitmap_find_next_zero_area(buf, len, pos, n, mask) Find bit free area
+ */
+
+/*
+ * Also the following operations apply to bitmaps.
+ *
+ * set_bit(bit, addr) *addr |= bit
+ * clear_bit(bit, addr) *addr &= ~bit
+ * change_bit(bit, addr) *addr ^= bit
+ * test_bit(bit, addr) Is bit set in *addr?
+ * test_and_set_bit(bit, addr) Set bit and return old value
+ * test_and_clear_bit(bit, addr) Clear bit and return old value
+ * test_and_change_bit(bit, addr) Change bit and return old value
+ * find_first_zero_bit(addr, nbits) Position first zero bit in *addr
+ * find_first_bit(addr, nbits) Position first set bit in *addr
+ * find_next_zero_bit(addr, nbits, bit) Position next zero bit in *addr >= bit
+ * find_next_bit(addr, nbits, bit) Position next set bit in *addr >= bit
+ */
+
+#define BITMAP_LAST_WORD_MASK(nbits) \
+ ( \
+ ((nbits) % BITS_PER_LONG) ? \
+ (1UL<<((nbits) % BITS_PER_LONG))-1 : ~0UL \
+ )
+
+#define DECLARE_BITMAP(name,bits) \
+ unsigned long name[BITS_TO_LONGS(bits)]
+
+#define small_nbits(nbits) \
+ ((nbits) <= BITS_PER_LONG)
+
+int slow_bitmap_empty(const unsigned long *bitmap, int bits);
+int slow_bitmap_full(const unsigned long *bitmap, int bits);
+int slow_bitmap_equal(const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits);
+void slow_bitmap_complement(unsigned long *dst, const unsigned long *src,
+ int bits);
+void slow_bitmap_shift_right(unsigned long *dst,
+ const unsigned long *src, int shift, int bits);
+void slow_bitmap_shift_left(unsigned long *dst,
+ const unsigned long *src, int shift, int bits);
+int slow_bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits);
+void slow_bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits);
+void slow_bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits);
+int slow_bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits);
+int slow_bitmap_intersects(const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits);
+
+static inline unsigned long *bitmap_new(int nbits)
+{
+ int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
+ return g_malloc0(len);
+}
+
+static inline void bitmap_zero(unsigned long *dst, int nbits)
+{
+ if (small_nbits(nbits)) {
+ *dst = 0UL;
+ } else {
+ int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
+ memset(dst, 0, len);
+ }
+}
+
+static inline void bitmap_fill(unsigned long *dst, int nbits)
+{
+ size_t nlongs = BITS_TO_LONGS(nbits);
+ if (!small_nbits(nbits)) {
+ int len = (nlongs - 1) * sizeof(unsigned long);
+ memset(dst, 0xff, len);
+ }
+ dst[nlongs - 1] = BITMAP_LAST_WORD_MASK(nbits);
+}
+
+static inline void bitmap_copy(unsigned long *dst, const unsigned long *src,
+ int nbits)
+{
+ if (small_nbits(nbits)) {
+ *dst = *src;
+ } else {
+ int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
+ memcpy(dst, src, len);
+ }
+}
+
+static inline int bitmap_and(unsigned long *dst, const unsigned long *src1,
+ const unsigned long *src2, int nbits)
+{
+ if (small_nbits(nbits)) {
+ return (*dst = *src1 & *src2) != 0;
+ }
+ return slow_bitmap_and(dst, src1, src2, nbits);
+}
+
+static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
+ const unsigned long *src2, int nbits)
+{
+ if (small_nbits(nbits)) {
+ *dst = *src1 | *src2;
+ } else {
+ slow_bitmap_or(dst, src1, src2, nbits);
+ }
+}
+
+static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1,
+ const unsigned long *src2, int nbits)
+{
+ if (small_nbits(nbits)) {
+ *dst = *src1 ^ *src2;
+ } else {
+ slow_bitmap_xor(dst, src1, src2, nbits);
+ }
+}
+
+static inline int bitmap_andnot(unsigned long *dst, const unsigned long *src1,
+ const unsigned long *src2, int nbits)
+{
+ if (small_nbits(nbits)) {
+ return (*dst = *src1 & ~(*src2)) != 0;
+ }
+ return slow_bitmap_andnot(dst, src1, src2, nbits);
+}
+
+static inline void bitmap_complement(unsigned long *dst, const unsigned long *src,
+ int nbits)
+{
+ if (small_nbits(nbits)) {
+ *dst = ~(*src) & BITMAP_LAST_WORD_MASK(nbits);
+ } else {
+ slow_bitmap_complement(dst, src, nbits);
+ }
+}
+
+static inline int bitmap_equal(const unsigned long *src1,
+ const unsigned long *src2, int nbits)
+{
+ if (small_nbits(nbits)) {
+ return ! ((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits));
+ } else {
+ return slow_bitmap_equal(src1, src2, nbits);
+ }
+}
+
+static inline int bitmap_empty(const unsigned long *src, int nbits)
+{
+ if (small_nbits(nbits)) {
+ return ! (*src & BITMAP_LAST_WORD_MASK(nbits));
+ } else {
+ return slow_bitmap_empty(src, nbits);
+ }
+}
+
+static inline int bitmap_full(const unsigned long *src, int nbits)
+{
+ if (small_nbits(nbits)) {
+ return ! (~(*src) & BITMAP_LAST_WORD_MASK(nbits));
+ } else {
+ return slow_bitmap_full(src, nbits);
+ }
+}
+
+static inline int bitmap_intersects(const unsigned long *src1,
+ const unsigned long *src2, int nbits)
+{
+ if (small_nbits(nbits)) {
+ return ((*src1 & *src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0;
+ } else {
+ return slow_bitmap_intersects(src1, src2, nbits);
+ }
+}
+
+void bitmap_set(unsigned long *map, int i, int len);
+void bitmap_clear(unsigned long *map, int start, int nr);
+unsigned long bitmap_find_next_zero_area(unsigned long *map,
+ unsigned long size,
+ unsigned long start,
+ unsigned int nr,
+ unsigned long align_mask);
+
+#endif /* BITMAP_H */
diff --git a/contrib/qemu/include/qemu/bitops.h b/contrib/qemu/include/qemu/bitops.h
new file mode 100644
index 000000000..affcc969d
--- /dev/null
+++ b/contrib/qemu/include/qemu/bitops.h
@@ -0,0 +1,276 @@
+/*
+ * Bitops Module
+ *
+ * Copyright (C) 2010 Corentin Chary <corentin.chary@gmail.com>
+ *
+ * Mostly inspired by (stolen from) linux/bitmap.h and linux/bitops.h
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+
+#ifndef BITOPS_H
+#define BITOPS_H
+
+#include "qemu-common.h"
+#include "host-utils.h"
+
+#define BITS_PER_BYTE CHAR_BIT
+#define BITS_PER_LONG (sizeof (unsigned long) * BITS_PER_BYTE)
+
+#define BIT(nr) (1UL << (nr))
+#define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG))
+#define BIT_WORD(nr) ((nr) / BITS_PER_LONG)
+#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long))
+
+/**
+ * set_bit - Set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ */
+static inline void set_bit(int nr, unsigned long *addr)
+{
+ unsigned long mask = BIT_MASK(nr);
+ unsigned long *p = addr + BIT_WORD(nr);
+
+ *p |= mask;
+}
+
+/**
+ * clear_bit - Clears a bit in memory
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ */
+static inline void clear_bit(int nr, unsigned long *addr)
+{
+ unsigned long mask = BIT_MASK(nr);
+ unsigned long *p = addr + BIT_WORD(nr);
+
+ *p &= ~mask;
+}
+
+/**
+ * change_bit - Toggle a bit in memory
+ * @nr: Bit to change
+ * @addr: Address to start counting from
+ */
+static inline void change_bit(int nr, unsigned long *addr)
+{
+ unsigned long mask = BIT_MASK(nr);
+ unsigned long *p = addr + BIT_WORD(nr);
+
+ *p ^= mask;
+}
+
+/**
+ * test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ */
+static inline int test_and_set_bit(int nr, unsigned long *addr)
+{
+ unsigned long mask = BIT_MASK(nr);
+ unsigned long *p = addr + BIT_WORD(nr);
+ unsigned long old = *p;
+
+ *p = old | mask;
+ return (old & mask) != 0;
+}
+
+/**
+ * test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to clear
+ * @addr: Address to count from
+ */
+static inline int test_and_clear_bit(int nr, unsigned long *addr)
+{
+ unsigned long mask = BIT_MASK(nr);
+ unsigned long *p = addr + BIT_WORD(nr);
+ unsigned long old = *p;
+
+ *p = old & ~mask;
+ return (old & mask) != 0;
+}
+
+/**
+ * test_and_change_bit - Change a bit and return its old value
+ * @nr: Bit to change
+ * @addr: Address to count from
+ */
+static inline int test_and_change_bit(int nr, unsigned long *addr)
+{
+ unsigned long mask = BIT_MASK(nr);
+ unsigned long *p = addr + BIT_WORD(nr);
+ unsigned long old = *p;
+
+ *p = old ^ mask;
+ return (old & mask) != 0;
+}
+
+/**
+ * test_bit - Determine whether a bit is set
+ * @nr: bit number to test
+ * @addr: Address to start counting from
+ */
+static inline int test_bit(int nr, const unsigned long *addr)
+{
+ return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1)));
+}
+
+/**
+ * find_last_bit - find the last set bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The maximum size to search
+ *
+ * Returns the bit number of the first set bit, or size.
+ */
+unsigned long find_last_bit(const unsigned long *addr,
+ unsigned long size);
+
+/**
+ * find_next_bit - find the next set bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The bitmap size in bits
+ */
+unsigned long find_next_bit(const unsigned long *addr,
+ unsigned long size, unsigned long offset);
+
+/**
+ * find_next_zero_bit - find the next cleared bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The bitmap size in bits
+ */
+
+unsigned long find_next_zero_bit(const unsigned long *addr,
+ unsigned long size,
+ unsigned long offset);
+
+/**
+ * find_first_bit - find the first set bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The maximum size to search
+ *
+ * Returns the bit number of the first set bit.
+ */
+static inline unsigned long find_first_bit(const unsigned long *addr,
+ unsigned long size)
+{
+ return find_next_bit(addr, size, 0);
+}
+
+/**
+ * find_first_zero_bit - find the first cleared bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The maximum size to search
+ *
+ * Returns the bit number of the first cleared bit.
+ */
+static inline unsigned long find_first_zero_bit(const unsigned long *addr,
+ unsigned long size)
+{
+ return find_next_zero_bit(addr, size, 0);
+}
+
+static inline unsigned long hweight_long(unsigned long w)
+{
+ unsigned long count;
+
+ for (count = 0; w; w >>= 1) {
+ count += w & 1;
+ }
+ return count;
+}
+
+/**
+ * extract32:
+ * @value: the value to extract the bit field from
+ * @start: the lowest bit in the bit field (numbered from 0)
+ * @length: the length of the bit field
+ *
+ * Extract from the 32 bit input @value the bit field specified by the
+ * @start and @length parameters, and return it. The bit field must
+ * lie entirely within the 32 bit word. It is valid to request that
+ * all 32 bits are returned (ie @length 32 and @start 0).
+ *
+ * Returns: the value of the bit field extracted from the input value.
+ */
+static inline uint32_t extract32(uint32_t value, int start, int length)
+{
+ assert(start >= 0 && length > 0 && length <= 32 - start);
+ return (value >> start) & (~0U >> (32 - length));
+}
+
+/**
+ * extract64:
+ * @value: the value to extract the bit field from
+ * @start: the lowest bit in the bit field (numbered from 0)
+ * @length: the length of the bit field
+ *
+ * Extract from the 64 bit input @value the bit field specified by the
+ * @start and @length parameters, and return it. The bit field must
+ * lie entirely within the 64 bit word. It is valid to request that
+ * all 64 bits are returned (ie @length 64 and @start 0).
+ *
+ * Returns: the value of the bit field extracted from the input value.
+ */
+static inline uint64_t extract64(uint64_t value, int start, int length)
+{
+ assert(start >= 0 && length > 0 && length <= 64 - start);
+ return (value >> start) & (~0ULL >> (64 - length));
+}
+
+/**
+ * deposit32:
+ * @value: initial value to insert bit field into
+ * @start: the lowest bit in the bit field (numbered from 0)
+ * @length: the length of the bit field
+ * @fieldval: the value to insert into the bit field
+ *
+ * Deposit @fieldval into the 32 bit @value at the bit field specified
+ * by the @start and @length parameters, and return the modified
+ * @value. Bits of @value outside the bit field are not modified.
+ * Bits of @fieldval above the least significant @length bits are
+ * ignored. The bit field must lie entirely within the 32 bit word.
+ * It is valid to request that all 32 bits are modified (ie @length
+ * 32 and @start 0).
+ *
+ * Returns: the modified @value.
+ */
+static inline uint32_t deposit32(uint32_t value, int start, int length,
+ uint32_t fieldval)
+{
+ uint32_t mask;
+ assert(start >= 0 && length > 0 && length <= 32 - start);
+ mask = (~0U >> (32 - length)) << start;
+ return (value & ~mask) | ((fieldval << start) & mask);
+}
+
+/**
+ * deposit64:
+ * @value: initial value to insert bit field into
+ * @start: the lowest bit in the bit field (numbered from 0)
+ * @length: the length of the bit field
+ * @fieldval: the value to insert into the bit field
+ *
+ * Deposit @fieldval into the 64 bit @value at the bit field specified
+ * by the @start and @length parameters, and return the modified
+ * @value. Bits of @value outside the bit field are not modified.
+ * Bits of @fieldval above the least significant @length bits are
+ * ignored. The bit field must lie entirely within the 64 bit word.
+ * It is valid to request that all 64 bits are modified (ie @length
+ * 64 and @start 0).
+ *
+ * Returns: the modified @value.
+ */
+static inline uint64_t deposit64(uint64_t value, int start, int length,
+ uint64_t fieldval)
+{
+ uint64_t mask;
+ assert(start >= 0 && length > 0 && length <= 64 - start);
+ mask = (~0ULL >> (64 - length)) << start;
+ return (value & ~mask) | ((fieldval << start) & mask);
+}
+
+#endif
diff --git a/contrib/qemu/include/qemu/bswap.h b/contrib/qemu/include/qemu/bswap.h
new file mode 100644
index 000000000..14a5f657c
--- /dev/null
+++ b/contrib/qemu/include/qemu/bswap.h
@@ -0,0 +1,478 @@
+#ifndef BSWAP_H
+#define BSWAP_H
+
+#include "config-host.h"
+#include <inttypes.h>
+#include <limits.h>
+#include <string.h>
+#include "fpu/softfloat.h"
+
+#ifdef CONFIG_MACHINE_BSWAP_H
+# include <sys/endian.h>
+# include <sys/types.h>
+# include <machine/bswap.h>
+#elif defined(CONFIG_BYTESWAP_H)
+# include <byteswap.h>
+
+static inline uint16_t bswap16(uint16_t x)
+{
+ return bswap_16(x);
+}
+
+static inline uint32_t bswap32(uint32_t x)
+{
+ return bswap_32(x);
+}
+
+static inline uint64_t bswap64(uint64_t x)
+{
+ return bswap_64(x);
+}
+# else
+static inline uint16_t bswap16(uint16_t x)
+{
+ return (((x & 0x00ff) << 8) |
+ ((x & 0xff00) >> 8));
+}
+
+static inline uint32_t bswap32(uint32_t x)
+{
+ return (((x & 0x000000ffU) << 24) |
+ ((x & 0x0000ff00U) << 8) |
+ ((x & 0x00ff0000U) >> 8) |
+ ((x & 0xff000000U) >> 24));
+}
+
+static inline uint64_t bswap64(uint64_t x)
+{
+ return (((x & 0x00000000000000ffULL) << 56) |
+ ((x & 0x000000000000ff00ULL) << 40) |
+ ((x & 0x0000000000ff0000ULL) << 24) |
+ ((x & 0x00000000ff000000ULL) << 8) |
+ ((x & 0x000000ff00000000ULL) >> 8) |
+ ((x & 0x0000ff0000000000ULL) >> 24) |
+ ((x & 0x00ff000000000000ULL) >> 40) |
+ ((x & 0xff00000000000000ULL) >> 56));
+}
+#endif /* ! CONFIG_MACHINE_BSWAP_H */
+
+static inline void bswap16s(uint16_t *s)
+{
+ *s = bswap16(*s);
+}
+
+static inline void bswap32s(uint32_t *s)
+{
+ *s = bswap32(*s);
+}
+
+static inline void bswap64s(uint64_t *s)
+{
+ *s = bswap64(*s);
+}
+
+#if defined(HOST_WORDS_BIGENDIAN)
+#define be_bswap(v, size) (v)
+#define le_bswap(v, size) glue(bswap, size)(v)
+#define be_bswaps(v, size)
+#define le_bswaps(p, size) do { *p = glue(bswap, size)(*p); } while(0)
+#else
+#define le_bswap(v, size) (v)
+#define be_bswap(v, size) glue(bswap, size)(v)
+#define le_bswaps(v, size)
+#define be_bswaps(p, size) do { *p = glue(bswap, size)(*p); } while(0)
+#endif
+
+#define CPU_CONVERT(endian, size, type)\
+static inline type endian ## size ## _to_cpu(type v)\
+{\
+ return glue(endian, _bswap)(v, size);\
+}\
+\
+static inline type cpu_to_ ## endian ## size(type v)\
+{\
+ return glue(endian, _bswap)(v, size);\
+}\
+\
+static inline void endian ## size ## _to_cpus(type *p)\
+{\
+ glue(endian, _bswaps)(p, size);\
+}\
+\
+static inline void cpu_to_ ## endian ## size ## s(type *p)\
+{\
+ glue(endian, _bswaps)(p, size);\
+}\
+\
+static inline type endian ## size ## _to_cpup(const type *p)\
+{\
+ return glue(glue(endian, size), _to_cpu)(*p);\
+}\
+\
+static inline void cpu_to_ ## endian ## size ## w(type *p, type v)\
+{\
+ *p = glue(glue(cpu_to_, endian), size)(v);\
+}
+
+CPU_CONVERT(be, 16, uint16_t)
+CPU_CONVERT(be, 32, uint32_t)
+CPU_CONVERT(be, 64, uint64_t)
+
+CPU_CONVERT(le, 16, uint16_t)
+CPU_CONVERT(le, 32, uint32_t)
+CPU_CONVERT(le, 64, uint64_t)
+
+/* len must be one of 1, 2, 4 */
+static inline uint32_t qemu_bswap_len(uint32_t value, int len)
+{
+ return bswap32(value) >> (32 - 8 * len);
+}
+
+/* Unions for reinterpreting between floats and integers. */
+
+typedef union {
+ float32 f;
+ uint32_t l;
+} CPU_FloatU;
+
+typedef union {
+ float64 d;
+#if defined(HOST_WORDS_BIGENDIAN)
+ struct {
+ uint32_t upper;
+ uint32_t lower;
+ } l;
+#else
+ struct {
+ uint32_t lower;
+ uint32_t upper;
+ } l;
+#endif
+ uint64_t ll;
+} CPU_DoubleU;
+
+typedef union {
+ floatx80 d;
+ struct {
+ uint64_t lower;
+ uint16_t upper;
+ } l;
+} CPU_LDoubleU;
+
+typedef union {
+ float128 q;
+#if defined(HOST_WORDS_BIGENDIAN)
+ struct {
+ uint32_t upmost;
+ uint32_t upper;
+ uint32_t lower;
+ uint32_t lowest;
+ } l;
+ struct {
+ uint64_t upper;
+ uint64_t lower;
+ } ll;
+#else
+ struct {
+ uint32_t lowest;
+ uint32_t lower;
+ uint32_t upper;
+ uint32_t upmost;
+ } l;
+ struct {
+ uint64_t lower;
+ uint64_t upper;
+ } ll;
+#endif
+} CPU_QuadU;
+
+/* unaligned/endian-independent pointer access */
+
+/*
+ * the generic syntax is:
+ *
+ * load: ld{type}{sign}{size}{endian}_p(ptr)
+ *
+ * store: st{type}{size}{endian}_p(ptr, val)
+ *
+ * Note there are small differences with the softmmu access API!
+ *
+ * type is:
+ * (empty): integer access
+ * f : float access
+ *
+ * sign is:
+ * (empty): for floats or 32 bit size
+ * u : unsigned
+ * s : signed
+ *
+ * size is:
+ * b: 8 bits
+ * w: 16 bits
+ * l: 32 bits
+ * q: 64 bits
+ *
+ * endian is:
+ * (empty): host endian
+ * be : big endian
+ * le : little endian
+ */
+
+static inline int ldub_p(const void *ptr)
+{
+ return *(uint8_t *)ptr;
+}
+
+static inline int ldsb_p(const void *ptr)
+{
+ return *(int8_t *)ptr;
+}
+
+static inline void stb_p(void *ptr, int v)
+{
+ *(uint8_t *)ptr = v;
+}
+
+/* Any compiler worth its salt will turn these memcpy into native unaligned
+ operations. Thus we don't need to play games with packed attributes, or
+ inline byte-by-byte stores. */
+
+static inline int lduw_p(const void *ptr)
+{
+ uint16_t r;
+ memcpy(&r, ptr, sizeof(r));
+ return r;
+}
+
+static inline int ldsw_p(const void *ptr)
+{
+ int16_t r;
+ memcpy(&r, ptr, sizeof(r));
+ return r;
+}
+
+static inline void stw_p(void *ptr, uint16_t v)
+{
+ memcpy(ptr, &v, sizeof(v));
+}
+
+static inline int ldl_p(const void *ptr)
+{
+ int32_t r;
+ memcpy(&r, ptr, sizeof(r));
+ return r;
+}
+
+static inline void stl_p(void *ptr, uint32_t v)
+{
+ memcpy(ptr, &v, sizeof(v));
+}
+
+static inline uint64_t ldq_p(const void *ptr)
+{
+ uint64_t r;
+ memcpy(&r, ptr, sizeof(r));
+ return r;
+}
+
+static inline void stq_p(void *ptr, uint64_t v)
+{
+ memcpy(ptr, &v, sizeof(v));
+}
+
+static inline int lduw_le_p(const void *ptr)
+{
+ return (uint16_t)le_bswap(lduw_p(ptr), 16);
+}
+
+static inline int ldsw_le_p(const void *ptr)
+{
+ return (int16_t)le_bswap(lduw_p(ptr), 16);
+}
+
+static inline int ldl_le_p(const void *ptr)
+{
+ return le_bswap(ldl_p(ptr), 32);
+}
+
+static inline uint64_t ldq_le_p(const void *ptr)
+{
+ return le_bswap(ldq_p(ptr), 64);
+}
+
+static inline void stw_le_p(void *ptr, int v)
+{
+ stw_p(ptr, le_bswap(v, 16));
+}
+
+static inline void stl_le_p(void *ptr, int v)
+{
+ stl_p(ptr, le_bswap(v, 32));
+}
+
+static inline void stq_le_p(void *ptr, uint64_t v)
+{
+ stq_p(ptr, le_bswap(v, 64));
+}
+
+/* float access */
+
+static inline float32 ldfl_le_p(const void *ptr)
+{
+ CPU_FloatU u;
+ u.l = ldl_le_p(ptr);
+ return u.f;
+}
+
+static inline void stfl_le_p(void *ptr, float32 v)
+{
+ CPU_FloatU u;
+ u.f = v;
+ stl_le_p(ptr, u.l);
+}
+
+static inline float64 ldfq_le_p(const void *ptr)
+{
+ CPU_DoubleU u;
+ u.ll = ldq_le_p(ptr);
+ return u.d;
+}
+
+static inline void stfq_le_p(void *ptr, float64 v)
+{
+ CPU_DoubleU u;
+ u.d = v;
+ stq_le_p(ptr, u.ll);
+}
+
+static inline int lduw_be_p(const void *ptr)
+{
+ return (uint16_t)be_bswap(lduw_p(ptr), 16);
+}
+
+static inline int ldsw_be_p(const void *ptr)
+{
+ return (int16_t)be_bswap(lduw_p(ptr), 16);
+}
+
+static inline int ldl_be_p(const void *ptr)
+{
+ return be_bswap(ldl_p(ptr), 32);
+}
+
+static inline uint64_t ldq_be_p(const void *ptr)
+{
+ return be_bswap(ldq_p(ptr), 64);
+}
+
+static inline void stw_be_p(void *ptr, int v)
+{
+ stw_p(ptr, be_bswap(v, 16));
+}
+
+static inline void stl_be_p(void *ptr, int v)
+{
+ stl_p(ptr, be_bswap(v, 32));
+}
+
+static inline void stq_be_p(void *ptr, uint64_t v)
+{
+ stq_p(ptr, be_bswap(v, 64));
+}
+
+/* float access */
+
+static inline float32 ldfl_be_p(const void *ptr)
+{
+ CPU_FloatU u;
+ u.l = ldl_be_p(ptr);
+ return u.f;
+}
+
+static inline void stfl_be_p(void *ptr, float32 v)
+{
+ CPU_FloatU u;
+ u.f = v;
+ stl_be_p(ptr, u.l);
+}
+
+static inline float64 ldfq_be_p(const void *ptr)
+{
+ CPU_DoubleU u;
+ u.ll = ldq_be_p(ptr);
+ return u.d;
+}
+
+static inline void stfq_be_p(void *ptr, float64 v)
+{
+ CPU_DoubleU u;
+ u.d = v;
+ stq_be_p(ptr, u.ll);
+}
+
+/* Legacy unaligned versions. Note that we never had a complete set. */
+
+static inline void cpu_to_le16wu(uint16_t *p, uint16_t v)
+{
+ stw_le_p(p, v);
+}
+
+static inline void cpu_to_le32wu(uint32_t *p, uint32_t v)
+{
+ stl_le_p(p, v);
+}
+
+static inline uint16_t le16_to_cpupu(const uint16_t *p)
+{
+ return lduw_le_p(p);
+}
+
+static inline uint32_t le32_to_cpupu(const uint32_t *p)
+{
+ return ldl_le_p(p);
+}
+
+static inline uint32_t be32_to_cpupu(const uint32_t *p)
+{
+ return ldl_be_p(p);
+}
+
+static inline void cpu_to_be16wu(uint16_t *p, uint16_t v)
+{
+ stw_be_p(p, v);
+}
+
+static inline void cpu_to_be32wu(uint32_t *p, uint32_t v)
+{
+ stl_be_p(p, v);
+}
+
+static inline void cpu_to_be64wu(uint64_t *p, uint64_t v)
+{
+ stq_be_p(p, v);
+}
+
+static inline void cpu_to_32wu(uint32_t *p, uint32_t v)
+{
+ stl_p(p, v);
+}
+
+static inline unsigned long leul_to_cpu(unsigned long v)
+{
+ /* In order to break an include loop between here and
+ qemu-common.h, don't rely on HOST_LONG_BITS. */
+#if ULONG_MAX == UINT32_MAX
+ return le_bswap(v, 32);
+#elif ULONG_MAX == UINT64_MAX
+ return le_bswap(v, 64);
+#else
+# error Unknown sizeof long
+#endif
+}
+
+#undef le_bswap
+#undef be_bswap
+#undef le_bswaps
+#undef be_bswaps
+
+#endif /* BSWAP_H */
diff --git a/contrib/qemu/include/qemu/compiler.h b/contrib/qemu/include/qemu/compiler.h
new file mode 100644
index 000000000..155b35896
--- /dev/null
+++ b/contrib/qemu/include/qemu/compiler.h
@@ -0,0 +1,55 @@
+/* public domain */
+
+#ifndef COMPILER_H
+#define COMPILER_H
+
+#include "config-host.h"
+
+/*----------------------------------------------------------------------------
+| The macro QEMU_GNUC_PREREQ tests for minimum version of the GNU C compiler.
+| The code is a copy of SOFTFLOAT_GNUC_PREREQ, see softfloat-macros.h.
+*----------------------------------------------------------------------------*/
+#if defined(__GNUC__) && defined(__GNUC_MINOR__)
+# define QEMU_GNUC_PREREQ(maj, min) \
+ ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
+#else
+# define QEMU_GNUC_PREREQ(maj, min) 0
+#endif
+
+#define QEMU_NORETURN __attribute__ ((__noreturn__))
+
+#if QEMU_GNUC_PREREQ(3, 4)
+#define QEMU_WARN_UNUSED_RESULT __attribute__((warn_unused_result))
+#else
+#define QEMU_WARN_UNUSED_RESULT
+#endif
+
+#if defined(_WIN32)
+# define QEMU_PACKED __attribute__((gcc_struct, packed))
+#else
+# define QEMU_PACKED __attribute__((packed))
+#endif
+
+#define cat(x,y) x ## y
+#define cat2(x,y) cat(x,y)
+#define QEMU_BUILD_BUG_ON(x) \
+ typedef char cat2(qemu_build_bug_on__,__LINE__)[(x)?-1:1] __attribute__((unused));
+
+#if defined __GNUC__
+# if !QEMU_GNUC_PREREQ(4, 4)
+ /* gcc versions before 4.4.x don't support gnu_printf, so use printf. */
+# define GCC_FMT_ATTR(n, m) __attribute__((format(printf, n, m)))
+# else
+ /* Use gnu_printf when supported (qemu uses standard format strings). */
+# define GCC_FMT_ATTR(n, m) __attribute__((format(gnu_printf, n, m)))
+# if defined(_WIN32)
+ /* Map __printf__ to __gnu_printf__ because we want standard format strings
+ * even when MinGW or GLib include files use __printf__. */
+# define __printf__ __gnu_printf__
+# endif
+# endif
+#else
+#define GCC_FMT_ATTR(n, m)
+#endif
+
+#endif /* COMPILER_H */
diff --git a/contrib/qemu/include/qemu/error-report.h b/contrib/qemu/include/qemu/error-report.h
new file mode 100644
index 000000000..3b098a917
--- /dev/null
+++ b/contrib/qemu/include/qemu/error-report.h
@@ -0,0 +1,46 @@
+/*
+ * Error reporting
+ *
+ * Copyright (C) 2010 Red Hat Inc.
+ *
+ * Authors:
+ * Markus Armbruster <armbru@redhat.com>,
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_ERROR_H
+#define QEMU_ERROR_H
+
+#include <stdarg.h>
+#include <stdbool.h>
+#include "qemu/compiler.h"
+
+typedef struct Location {
+ /* all members are private to qemu-error.c */
+ enum { LOC_NONE, LOC_CMDLINE, LOC_FILE } kind;
+ int num;
+ const void *ptr;
+ struct Location *prev;
+} Location;
+
+Location *loc_push_restore(Location *loc);
+Location *loc_push_none(Location *loc);
+Location *loc_pop(Location *loc);
+Location *loc_save(Location *loc);
+void loc_restore(Location *loc);
+void loc_set_none(void);
+void loc_set_cmdline(char **argv, int idx, int cnt);
+void loc_set_file(const char *fname, int lno);
+
+void error_vprintf(const char *fmt, va_list ap) GCC_FMT_ATTR(1, 0);
+void error_printf(const char *fmt, ...) GCC_FMT_ATTR(1, 2);
+void error_printf_unless_qmp(const char *fmt, ...) GCC_FMT_ATTR(1, 2);
+void error_print_loc(void);
+void error_set_progname(const char *argv0);
+void error_report(const char *fmt, ...) GCC_FMT_ATTR(1, 2);
+const char *error_get_progname(void);
+extern bool enable_timestamp_msg;
+
+#endif
diff --git a/contrib/qemu/include/qemu/event_notifier.h b/contrib/qemu/include/qemu/event_notifier.h
new file mode 100644
index 000000000..88b57af7c
--- /dev/null
+++ b/contrib/qemu/include/qemu/event_notifier.h
@@ -0,0 +1,46 @@
+/*
+ * event notifier support
+ *
+ * Copyright Red Hat, Inc. 2010
+ *
+ * Authors:
+ * Michael S. Tsirkin <mst@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_EVENT_NOTIFIER_H
+#define QEMU_EVENT_NOTIFIER_H
+
+#include "qemu-common.h"
+
+#ifdef _WIN32
+#include <windows.h>
+#endif
+
+struct EventNotifier {
+#ifdef _WIN32
+ HANDLE event;
+#else
+ int rfd;
+ int wfd;
+#endif
+};
+
+typedef void EventNotifierHandler(EventNotifier *);
+
+int event_notifier_init(EventNotifier *, int active);
+void event_notifier_cleanup(EventNotifier *);
+int event_notifier_set(EventNotifier *);
+int event_notifier_test_and_clear(EventNotifier *);
+int event_notifier_set_handler(EventNotifier *, EventNotifierHandler *);
+
+#ifdef CONFIG_POSIX
+void event_notifier_init_fd(EventNotifier *, int fd);
+int event_notifier_get_fd(EventNotifier *);
+#else
+HANDLE event_notifier_get_handle(EventNotifier *);
+#endif
+
+#endif
diff --git a/contrib/qemu/include/qemu/hbitmap.h b/contrib/qemu/include/qemu/hbitmap.h
new file mode 100644
index 000000000..550d7ce2c
--- /dev/null
+++ b/contrib/qemu/include/qemu/hbitmap.h
@@ -0,0 +1,209 @@
+/*
+ * Hierarchical Bitmap Data Type
+ *
+ * Copyright Red Hat, Inc., 2012
+ *
+ * Author: Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later. See the COPYING file in the top-level directory.
+ */
+
+#ifndef HBITMAP_H
+#define HBITMAP_H 1
+
+#include <limits.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include "bitops.h"
+#include "host-utils.h"
+
+typedef struct HBitmap HBitmap;
+typedef struct HBitmapIter HBitmapIter;
+
+#define BITS_PER_LEVEL (BITS_PER_LONG == 32 ? 5 : 6)
+
+/* For 32-bit, the largest that fits in a 4 GiB address space.
+ * For 64-bit, the number of sectors in 1 PiB. Good luck, in
+ * either case... :)
+ */
+#define HBITMAP_LOG_MAX_SIZE (BITS_PER_LONG == 32 ? 34 : 41)
+
+/* We need to place a sentinel in level 0 to speed up iteration. Thus,
+ * we do this instead of HBITMAP_LOG_MAX_SIZE / BITS_PER_LEVEL. The
+ * difference is that it allocates an extra level when HBITMAP_LOG_MAX_SIZE
+ * is an exact multiple of BITS_PER_LEVEL.
+ */
+#define HBITMAP_LEVELS ((HBITMAP_LOG_MAX_SIZE / BITS_PER_LEVEL) + 1)
+
+struct HBitmapIter {
+ const HBitmap *hb;
+
+ /* Copied from hb for access in the inline functions (hb is opaque). */
+ int granularity;
+
+ /* Entry offset into the last-level array of longs. */
+ size_t pos;
+
+ /* The currently-active path in the tree. Each item of cur[i] stores
+ * the bits (i.e. the subtrees) yet to be processed under that node.
+ */
+ unsigned long cur[HBITMAP_LEVELS];
+};
+
+/**
+ * hbitmap_alloc:
+ * @size: Number of bits in the bitmap.
+ * @granularity: Granularity of the bitmap. Aligned groups of 2^@granularity
+ * bits will be represented by a single bit. Each operation on a
+ * range of bits first rounds the bits to determine which group they land
+ * in, and then affect the entire set; iteration will only visit the first
+ * bit of each group.
+ *
+ * Allocate a new HBitmap.
+ */
+HBitmap *hbitmap_alloc(uint64_t size, int granularity);
+
+/**
+ * hbitmap_empty:
+ * @hb: HBitmap to operate on.
+ *
+ * Return whether the bitmap is empty.
+ */
+bool hbitmap_empty(const HBitmap *hb);
+
+/**
+ * hbitmap_granularity:
+ * @hb: HBitmap to operate on.
+ *
+ * Return the granularity of the HBitmap.
+ */
+int hbitmap_granularity(const HBitmap *hb);
+
+/**
+ * hbitmap_count:
+ * @hb: HBitmap to operate on.
+ *
+ * Return the number of bits set in the HBitmap.
+ */
+uint64_t hbitmap_count(const HBitmap *hb);
+
+/**
+ * hbitmap_set:
+ * @hb: HBitmap to operate on.
+ * @start: First bit to set (0-based).
+ * @count: Number of bits to set.
+ *
+ * Set a consecutive range of bits in an HBitmap.
+ */
+void hbitmap_set(HBitmap *hb, uint64_t start, uint64_t count);
+
+/**
+ * hbitmap_reset:
+ * @hb: HBitmap to operate on.
+ * @start: First bit to reset (0-based).
+ * @count: Number of bits to reset.
+ *
+ * Reset a consecutive range of bits in an HBitmap.
+ */
+void hbitmap_reset(HBitmap *hb, uint64_t start, uint64_t count);
+
+/**
+ * hbitmap_get:
+ * @hb: HBitmap to operate on.
+ * @item: Bit to query (0-based).
+ *
+ * Return whether the @item-th bit in an HBitmap is set.
+ */
+bool hbitmap_get(const HBitmap *hb, uint64_t item);
+
+/**
+ * hbitmap_free:
+ * @hb: HBitmap to operate on.
+ *
+ * Free an HBitmap and all of its associated memory.
+ */
+void hbitmap_free(HBitmap *hb);
+
+/**
+ * hbitmap_iter_init:
+ * @hbi: HBitmapIter to initialize.
+ * @hb: HBitmap to iterate on.
+ * @first: First bit to visit (0-based, must be strictly less than the
+ * size of the bitmap).
+ *
+ * Set up @hbi to iterate on the HBitmap @hb. hbitmap_iter_next will return
+ * the lowest-numbered bit that is set in @hb, starting at @first.
+ *
+ * Concurrent setting of bits is acceptable, and will at worst cause the
+ * iteration to miss some of those bits. Resetting bits before the current
+ * position of the iterator is also okay. However, concurrent resetting of
+ * bits can lead to unexpected behavior if the iterator has not yet reached
+ * those bits.
+ */
+void hbitmap_iter_init(HBitmapIter *hbi, const HBitmap *hb, uint64_t first);
+
+/* hbitmap_iter_skip_words:
+ * @hbi: HBitmapIter to operate on.
+ *
+ * Internal function used by hbitmap_iter_next and hbitmap_iter_next_word.
+ */
+unsigned long hbitmap_iter_skip_words(HBitmapIter *hbi);
+
+/**
+ * hbitmap_iter_next:
+ * @hbi: HBitmapIter to operate on.
+ *
+ * Return the next bit that is set in @hbi's associated HBitmap,
+ * or -1 if all remaining bits are zero.
+ */
+static inline int64_t hbitmap_iter_next(HBitmapIter *hbi)
+{
+ unsigned long cur = hbi->cur[HBITMAP_LEVELS - 1];
+ int64_t item;
+
+ if (cur == 0) {
+ cur = hbitmap_iter_skip_words(hbi);
+ if (cur == 0) {
+ return -1;
+ }
+ }
+
+ /* The next call will resume work from the next bit. */
+ hbi->cur[HBITMAP_LEVELS - 1] = cur & (cur - 1);
+ item = ((uint64_t)hbi->pos << BITS_PER_LEVEL) + ctzl(cur);
+
+ return item << hbi->granularity;
+}
+
+/**
+ * hbitmap_iter_next_word:
+ * @hbi: HBitmapIter to operate on.
+ * @p_cur: Location where to store the next non-zero word.
+ *
+ * Return the index of the next nonzero word that is set in @hbi's
+ * associated HBitmap, and set *p_cur to the content of that word
+ * (bits before the index that was passed to hbitmap_iter_init are
+ * trimmed on the first call). Return -1, and set *p_cur to zero,
+ * if all remaining words are zero.
+ */
+static inline size_t hbitmap_iter_next_word(HBitmapIter *hbi, unsigned long *p_cur)
+{
+ unsigned long cur = hbi->cur[HBITMAP_LEVELS - 1];
+
+ if (cur == 0) {
+ cur = hbitmap_iter_skip_words(hbi);
+ if (cur == 0) {
+ *p_cur = 0;
+ return -1;
+ }
+ }
+
+ /* The next call will resume work from the next word. */
+ hbi->cur[HBITMAP_LEVELS - 1] = 0;
+ *p_cur = cur;
+ return hbi->pos;
+}
+
+
+#endif
diff --git a/contrib/qemu/include/qemu/host-utils.h b/contrib/qemu/include/qemu/host-utils.h
new file mode 100644
index 000000000..0f688c1c0
--- /dev/null
+++ b/contrib/qemu/include/qemu/host-utils.h
@@ -0,0 +1,322 @@
+/*
+ * Utility compute operations used by translated code.
+ *
+ * Copyright (c) 2007 Thiemo Seufer
+ * Copyright (c) 2007 Jocelyn Mayer
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef HOST_UTILS_H
+#define HOST_UTILS_H 1
+
+#include "qemu/compiler.h" /* QEMU_GNUC_PREREQ */
+#include <limits.h>
+
+#ifdef CONFIG_INT128
+static inline void mulu64(uint64_t *plow, uint64_t *phigh,
+ uint64_t a, uint64_t b)
+{
+ __uint128_t r = (__uint128_t)a * b;
+ *plow = r;
+ *phigh = r >> 64;
+}
+
+static inline void muls64(uint64_t *plow, uint64_t *phigh,
+ int64_t a, int64_t b)
+{
+ __int128_t r = (__int128_t)a * b;
+ *plow = r;
+ *phigh = r >> 64;
+}
+#else
+void muls64(uint64_t *phigh, uint64_t *plow, int64_t a, int64_t b);
+void mulu64(uint64_t *phigh, uint64_t *plow, uint64_t a, uint64_t b);
+#endif
+
+/**
+ * clz32 - count leading zeros in a 32-bit value.
+ * @val: The value to search
+ *
+ * Returns 32 if the value is zero. Note that the GCC builtin is
+ * undefined if the value is zero.
+ */
+static inline int clz32(uint32_t val)
+{
+#if QEMU_GNUC_PREREQ(3, 4)
+ return val ? __builtin_clz(val) : 32;
+#else
+ /* Binary search for the leading one bit. */
+ int cnt = 0;
+
+ if (!(val & 0xFFFF0000U)) {
+ cnt += 16;
+ val <<= 16;
+ }
+ if (!(val & 0xFF000000U)) {
+ cnt += 8;
+ val <<= 8;
+ }
+ if (!(val & 0xF0000000U)) {
+ cnt += 4;
+ val <<= 4;
+ }
+ if (!(val & 0xC0000000U)) {
+ cnt += 2;
+ val <<= 2;
+ }
+ if (!(val & 0x80000000U)) {
+ cnt++;
+ val <<= 1;
+ }
+ if (!(val & 0x80000000U)) {
+ cnt++;
+ }
+ return cnt;
+#endif
+}
+
+/**
+ * clo32 - count leading ones in a 32-bit value.
+ * @val: The value to search
+ *
+ * Returns 32 if the value is -1.
+ */
+static inline int clo32(uint32_t val)
+{
+ return clz32(~val);
+}
+
+/**
+ * clz64 - count leading zeros in a 64-bit value.
+ * @val: The value to search
+ *
+ * Returns 64 if the value is zero. Note that the GCC builtin is
+ * undefined if the value is zero.
+ */
+static inline int clz64(uint64_t val)
+{
+#if QEMU_GNUC_PREREQ(3, 4)
+ return val ? __builtin_clzll(val) : 64;
+#else
+ int cnt = 0;
+
+ if (!(val >> 32)) {
+ cnt += 32;
+ } else {
+ val >>= 32;
+ }
+
+ return cnt + clz32(val);
+#endif
+}
+
+/**
+ * clo64 - count leading ones in a 64-bit value.
+ * @val: The value to search
+ *
+ * Returns 64 if the value is -1.
+ */
+static inline int clo64(uint64_t val)
+{
+ return clz64(~val);
+}
+
+/**
+ * ctz32 - count trailing zeros in a 32-bit value.
+ * @val: The value to search
+ *
+ * Returns 32 if the value is zero. Note that the GCC builtin is
+ * undefined if the value is zero.
+ */
+static inline int ctz32(uint32_t val)
+{
+#if QEMU_GNUC_PREREQ(3, 4)
+ return val ? __builtin_ctz(val) : 32;
+#else
+ /* Binary search for the trailing one bit. */
+ int cnt;
+
+ cnt = 0;
+ if (!(val & 0x0000FFFFUL)) {
+ cnt += 16;
+ val >>= 16;
+ }
+ if (!(val & 0x000000FFUL)) {
+ cnt += 8;
+ val >>= 8;
+ }
+ if (!(val & 0x0000000FUL)) {
+ cnt += 4;
+ val >>= 4;
+ }
+ if (!(val & 0x00000003UL)) {
+ cnt += 2;
+ val >>= 2;
+ }
+ if (!(val & 0x00000001UL)) {
+ cnt++;
+ val >>= 1;
+ }
+ if (!(val & 0x00000001UL)) {
+ cnt++;
+ }
+
+ return cnt;
+#endif
+}
+
+/**
+ * cto32 - count trailing ones in a 32-bit value.
+ * @val: The value to search
+ *
+ * Returns 32 if the value is -1.
+ */
+static inline int cto32(uint32_t val)
+{
+ return ctz32(~val);
+}
+
+/**
+ * ctz64 - count trailing zeros in a 64-bit value.
+ * @val: The value to search
+ *
+ * Returns 64 if the value is zero. Note that the GCC builtin is
+ * undefined if the value is zero.
+ */
+static inline int ctz64(uint64_t val)
+{
+#if QEMU_GNUC_PREREQ(3, 4)
+ return val ? __builtin_ctzll(val) : 64;
+#else
+ int cnt;
+
+ cnt = 0;
+ if (!((uint32_t)val)) {
+ cnt += 32;
+ val >>= 32;
+ }
+
+ return cnt + ctz32(val);
+#endif
+}
+
+/**
+ * ctz64 - count trailing ones in a 64-bit value.
+ * @val: The value to search
+ *
+ * Returns 64 if the value is -1.
+ */
+static inline int cto64(uint64_t val)
+{
+ return ctz64(~val);
+}
+
+/**
+ * ctpop8 - count the population of one bits in an 8-bit value.
+ * @val: The value to search
+ */
+static inline int ctpop8(uint8_t val)
+{
+#if QEMU_GNUC_PREREQ(3, 4)
+ return __builtin_popcount(val);
+#else
+ val = (val & 0x55) + ((val >> 1) & 0x55);
+ val = (val & 0x33) + ((val >> 2) & 0x33);
+ val = (val & 0x0f) + ((val >> 4) & 0x0f);
+
+ return val;
+#endif
+}
+
+/**
+ * ctpop16 - count the population of one bits in a 16-bit value.
+ * @val: The value to search
+ */
+static inline int ctpop16(uint16_t val)
+{
+#if QEMU_GNUC_PREREQ(3, 4)
+ return __builtin_popcount(val);
+#else
+ val = (val & 0x5555) + ((val >> 1) & 0x5555);
+ val = (val & 0x3333) + ((val >> 2) & 0x3333);
+ val = (val & 0x0f0f) + ((val >> 4) & 0x0f0f);
+ val = (val & 0x00ff) + ((val >> 8) & 0x00ff);
+
+ return val;
+#endif
+}
+
+/**
+ * ctpop32 - count the population of one bits in a 32-bit value.
+ * @val: The value to search
+ */
+static inline int ctpop32(uint32_t val)
+{
+#if QEMU_GNUC_PREREQ(3, 4)
+ return __builtin_popcount(val);
+#else
+ val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
+ val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
+ val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
+ val = (val & 0x00ff00ff) + ((val >> 8) & 0x00ff00ff);
+ val = (val & 0x0000ffff) + ((val >> 16) & 0x0000ffff);
+
+ return val;
+#endif
+}
+
+/**
+ * ctpop64 - count the population of one bits in a 64-bit value.
+ * @val: The value to search
+ */
+static inline int ctpop64(uint64_t val)
+{
+#if QEMU_GNUC_PREREQ(3, 4)
+ return __builtin_popcountll(val);
+#else
+ val = (val & 0x5555555555555555ULL) + ((val >> 1) & 0x5555555555555555ULL);
+ val = (val & 0x3333333333333333ULL) + ((val >> 2) & 0x3333333333333333ULL);
+ val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 0x0f0f0f0f0f0f0f0fULL);
+ val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) & 0x00ff00ff00ff00ffULL);
+ val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) & 0x0000ffff0000ffffULL);
+ val = (val & 0x00000000ffffffffULL) + ((val >> 32) & 0x00000000ffffffffULL);
+
+ return val;
+#endif
+}
+
+/* Host type specific sizes of these routines. */
+
+#if ULONG_MAX == UINT32_MAX
+# define clzl clz32
+# define ctzl ctz32
+# define clol clo32
+# define ctol cto32
+# define ctpopl ctpop32
+#elif ULONG_MAX == UINT64_MAX
+# define clzl clz64
+# define ctzl ctz64
+# define clol clo64
+# define ctol cto64
+# define ctpopl ctpop64
+#else
+# error Unknown sizeof long
+#endif
+
+#endif
diff --git a/contrib/qemu/include/qemu/iov.h b/contrib/qemu/include/qemu/iov.h
new file mode 100644
index 000000000..68d25f29b
--- /dev/null
+++ b/contrib/qemu/include/qemu/iov.h
@@ -0,0 +1,115 @@
+/*
+ * Helpers for using (partial) iovecs.
+ *
+ * Copyright (C) 2010 Red Hat, Inc.
+ *
+ * Author(s):
+ * Amit Shah <amit.shah@redhat.com>
+ * Michael Tokarev <mjt@tls.msk.ru>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef IOV_H
+#define IOV_H
+
+#include "qemu-common.h"
+
+/**
+ * count and return data size, in bytes, of an iovec
+ * starting at `iov' of `iov_cnt' number of elements.
+ */
+size_t iov_size(const struct iovec *iov, const unsigned int iov_cnt);
+
+/**
+ * Copy from single continuous buffer to scatter-gather vector of buffers
+ * (iovec) and back like memcpy() between two continuous memory regions.
+ * Data in single continuous buffer starting at address `buf' and
+ * `bytes' bytes long will be copied to/from an iovec `iov' with
+ * `iov_cnt' number of elements, starting at byte position `offset'
+ * within the iovec. If the iovec does not contain enough space,
+ * only part of data will be copied, up to the end of the iovec.
+ * Number of bytes actually copied will be returned, which is
+ * min(bytes, iov_size(iov)-offset)
+ * `Offset' must point to the inside of iovec.
+ * It is okay to use very large value for `bytes' since we're
+ * limited by the size of the iovec anyway, provided that the
+ * buffer pointed to by buf has enough space. One possible
+ * such "large" value is -1 (sinice size_t is unsigned),
+ * so specifying `-1' as `bytes' means 'up to the end of iovec'.
+ */
+size_t iov_from_buf(const struct iovec *iov, unsigned int iov_cnt,
+ size_t offset, const void *buf, size_t bytes);
+size_t iov_to_buf(const struct iovec *iov, const unsigned int iov_cnt,
+ size_t offset, void *buf, size_t bytes);
+
+/**
+ * Set data bytes pointed out by iovec `iov' of size `iov_cnt' elements,
+ * starting at byte offset `start', to value `fillc', repeating it
+ * `bytes' number of times. `Offset' must point to the inside of iovec.
+ * If `bytes' is large enough, only last bytes portion of iovec,
+ * up to the end of it, will be filled with the specified value.
+ * Function return actual number of bytes processed, which is
+ * min(size, iov_size(iov) - offset).
+ * Again, it is okay to use large value for `bytes' to mean "up to the end".
+ */
+size_t iov_memset(const struct iovec *iov, const unsigned int iov_cnt,
+ size_t offset, int fillc, size_t bytes);
+
+/*
+ * Send/recv data from/to iovec buffers directly
+ *
+ * `offset' bytes in the beginning of iovec buffer are skipped and
+ * next `bytes' bytes are used, which must be within data of iovec.
+ *
+ * r = iov_send_recv(sockfd, iov, iovcnt, offset, bytes, true);
+ *
+ * is logically equivalent to
+ *
+ * char *buf = malloc(bytes);
+ * iov_to_buf(iov, iovcnt, offset, buf, bytes);
+ * r = send(sockfd, buf, bytes, 0);
+ * free(buf);
+ *
+ * For iov_send_recv() _whole_ area being sent or received
+ * should be within the iovec, not only beginning of it.
+ */
+ssize_t iov_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt,
+ size_t offset, size_t bytes, bool do_send);
+#define iov_recv(sockfd, iov, iov_cnt, offset, bytes) \
+ iov_send_recv(sockfd, iov, iov_cnt, offset, bytes, false)
+#define iov_send(sockfd, iov, iov_cnt, offset, bytes) \
+ iov_send_recv(sockfd, iov, iov_cnt, offset, bytes, true)
+
+/**
+ * Produce a text hexdump of iovec `iov' with `iov_cnt' number of elements
+ * in file `fp', prefixing each line with `prefix' and processing not more
+ * than `limit' data bytes.
+ */
+void iov_hexdump(const struct iovec *iov, const unsigned int iov_cnt,
+ FILE *fp, const char *prefix, size_t limit);
+
+/*
+ * Partial copy of vector from iov to dst_iov (data is not copied).
+ * dst_iov overlaps iov at a specified offset.
+ * size of dst_iov is at most bytes. dst vector count is returned.
+ */
+unsigned iov_copy(struct iovec *dst_iov, unsigned int dst_iov_cnt,
+ const struct iovec *iov, unsigned int iov_cnt,
+ size_t offset, size_t bytes);
+
+/*
+ * Remove a given number of bytes from the front or back of a vector.
+ * This may update iov and/or iov_cnt to exclude iovec elements that are
+ * no longer required.
+ *
+ * The number of bytes actually discarded is returned. This number may be
+ * smaller than requested if the vector is too small.
+ */
+size_t iov_discard_front(struct iovec **iov, unsigned int *iov_cnt,
+ size_t bytes);
+size_t iov_discard_back(struct iovec *iov, unsigned int *iov_cnt,
+ size_t bytes);
+
+#endif
diff --git a/contrib/qemu/include/qemu/main-loop.h b/contrib/qemu/include/qemu/main-loop.h
new file mode 100644
index 000000000..6f0200a7a
--- /dev/null
+++ b/contrib/qemu/include/qemu/main-loop.h
@@ -0,0 +1,311 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef QEMU_MAIN_LOOP_H
+#define QEMU_MAIN_LOOP_H 1
+
+#include "block/aio.h"
+
+#define SIG_IPI SIGUSR1
+
+/**
+ * qemu_init_main_loop: Set up the process so that it can run the main loop.
+ *
+ * This includes setting up signal handlers. It should be called before
+ * any other threads are created. In addition, threads other than the
+ * main one should block signals that are trapped by the main loop.
+ * For simplicity, you can consider these signals to be safe: SIGUSR1,
+ * SIGUSR2, thread signals (SIGFPE, SIGILL, SIGSEGV, SIGBUS) and real-time
+ * signals if available. Remember that Windows in practice does not have
+ * signals, though.
+ *
+ * In the case of QEMU tools, this will also start/initialize timers.
+ */
+int qemu_init_main_loop(void);
+
+/**
+ * main_loop_wait: Run one iteration of the main loop.
+ *
+ * If @nonblocking is true, poll for events, otherwise suspend until
+ * one actually occurs. The main loop usually consists of a loop that
+ * repeatedly calls main_loop_wait(false).
+ *
+ * Main loop services include file descriptor callbacks, bottom halves
+ * and timers (defined in qemu-timer.h). Bottom halves are similar to timers
+ * that execute immediately, but have a lower overhead and scheduling them
+ * is wait-free, thread-safe and signal-safe.
+ *
+ * It is sometimes useful to put a whole program in a coroutine. In this
+ * case, the coroutine actually should be started from within the main loop,
+ * so that the main loop can run whenever the coroutine yields. To do this,
+ * you can use a bottom half to enter the coroutine as soon as the main loop
+ * starts:
+ *
+ * void enter_co_bh(void *opaque) {
+ * QEMUCoroutine *co = opaque;
+ * qemu_coroutine_enter(co, NULL);
+ * }
+ *
+ * ...
+ * QEMUCoroutine *co = qemu_coroutine_create(coroutine_entry);
+ * QEMUBH *start_bh = qemu_bh_new(enter_co_bh, co);
+ * qemu_bh_schedule(start_bh);
+ * while (...) {
+ * main_loop_wait(false);
+ * }
+ *
+ * (In the future we may provide a wrapper for this).
+ *
+ * @nonblocking: Whether the caller should block until an event occurs.
+ */
+int main_loop_wait(int nonblocking);
+
+/**
+ * qemu_get_aio_context: Return the main loop's AioContext
+ */
+AioContext *qemu_get_aio_context(void);
+
+/**
+ * qemu_notify_event: Force processing of pending events.
+ *
+ * Similar to signaling a condition variable, qemu_notify_event forces
+ * main_loop_wait to look at pending events and exit. The caller of
+ * main_loop_wait will usually call it again very soon, so qemu_notify_event
+ * also has the side effect of recalculating the sets of file descriptors
+ * that the main loop waits for.
+ *
+ * Calling qemu_notify_event is rarely necessary, because main loop
+ * services (bottom halves and timers) call it themselves. One notable
+ * exception occurs when using qemu_set_fd_handler2 (see below).
+ */
+void qemu_notify_event(void);
+
+#ifdef _WIN32
+/* return TRUE if no sleep should be done afterwards */
+typedef int PollingFunc(void *opaque);
+
+/**
+ * qemu_add_polling_cb: Register a Windows-specific polling callback
+ *
+ * Currently, under Windows some events are polled rather than waited for.
+ * Polling callbacks do not ensure that @func is called timely, because
+ * the main loop might wait for an arbitrarily long time. If possible,
+ * you should instead create a separate thread that does a blocking poll
+ * and set a Win32 event object. The event can then be passed to
+ * qemu_add_wait_object.
+ *
+ * Polling callbacks really have nothing Windows specific in them, but
+ * as they are a hack and are currently not necessary under POSIX systems,
+ * they are only available when QEMU is running under Windows.
+ *
+ * @func: The function that does the polling, and returns 1 to force
+ * immediate completion of main_loop_wait.
+ * @opaque: A pointer-size value that is passed to @func.
+ */
+int qemu_add_polling_cb(PollingFunc *func, void *opaque);
+
+/**
+ * qemu_del_polling_cb: Unregister a Windows-specific polling callback
+ *
+ * This function removes a callback that was registered with
+ * qemu_add_polling_cb.
+ *
+ * @func: The function that was passed to qemu_add_polling_cb.
+ * @opaque: A pointer-size value that was passed to qemu_add_polling_cb.
+ */
+void qemu_del_polling_cb(PollingFunc *func, void *opaque);
+
+/* Wait objects handling */
+typedef void WaitObjectFunc(void *opaque);
+
+/**
+ * qemu_add_wait_object: Register a callback for a Windows handle
+ *
+ * Under Windows, the iohandler mechanism can only be used with sockets.
+ * QEMU must use the WaitForMultipleObjects API to wait on other handles.
+ * This function registers a #HANDLE with QEMU, so that it will be included
+ * in the main loop's calls to WaitForMultipleObjects. When the handle
+ * is in a signaled state, QEMU will call @func.
+ *
+ * @handle: The Windows handle to be observed.
+ * @func: A function to be called when @handle is in a signaled state.
+ * @opaque: A pointer-size value that is passed to @func.
+ */
+int qemu_add_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque);
+
+/**
+ * qemu_del_wait_object: Unregister a callback for a Windows handle
+ *
+ * This function removes a callback that was registered with
+ * qemu_add_wait_object.
+ *
+ * @func: The function that was passed to qemu_add_wait_object.
+ * @opaque: A pointer-size value that was passed to qemu_add_wait_object.
+ */
+void qemu_del_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque);
+#endif
+
+/* async I/O support */
+
+typedef void IOReadHandler(void *opaque, const uint8_t *buf, int size);
+typedef int IOCanReadHandler(void *opaque);
+
+/**
+ * qemu_set_fd_handler2: Register a file descriptor with the main loop
+ *
+ * This function tells the main loop to wake up whenever one of the
+ * following conditions is true:
+ *
+ * 1) if @fd_write is not %NULL, when the file descriptor is writable;
+ *
+ * 2) if @fd_read is not %NULL, when the file descriptor is readable.
+ *
+ * @fd_read_poll can be used to disable the @fd_read callback temporarily.
+ * This is useful to avoid calling qemu_set_fd_handler2 every time the
+ * client becomes interested in reading (or dually, stops being interested).
+ * A typical example is when @fd is a listening socket and you want to bound
+ * the number of active clients. Remember to call qemu_notify_event whenever
+ * the condition may change from %false to %true.
+ *
+ * The callbacks that are set up by qemu_set_fd_handler2 are level-triggered.
+ * If @fd_read does not read from @fd, or @fd_write does not write to @fd
+ * until its buffers are full, they will be called again on the next
+ * iteration.
+ *
+ * @fd: The file descriptor to be observed. Under Windows it must be
+ * a #SOCKET.
+ *
+ * @fd_read_poll: A function that returns 1 if the @fd_read callback
+ * should be fired. If the function returns 0, the main loop will not
+ * end its iteration even if @fd becomes readable.
+ *
+ * @fd_read: A level-triggered callback that is fired if @fd is readable
+ * at the beginning of a main loop iteration, or if it becomes readable
+ * during one.
+ *
+ * @fd_write: A level-triggered callback that is fired when @fd is writable
+ * at the beginning of a main loop iteration, or if it becomes writable
+ * during one.
+ *
+ * @opaque: A pointer-sized value that is passed to @fd_read_poll,
+ * @fd_read and @fd_write.
+ */
+int qemu_set_fd_handler2(int fd,
+ IOCanReadHandler *fd_read_poll,
+ IOHandler *fd_read,
+ IOHandler *fd_write,
+ void *opaque);
+
+/**
+ * qemu_set_fd_handler: Register a file descriptor with the main loop
+ *
+ * This function tells the main loop to wake up whenever one of the
+ * following conditions is true:
+ *
+ * 1) if @fd_write is not %NULL, when the file descriptor is writable;
+ *
+ * 2) if @fd_read is not %NULL, when the file descriptor is readable.
+ *
+ * The callbacks that are set up by qemu_set_fd_handler are level-triggered.
+ * If @fd_read does not read from @fd, or @fd_write does not write to @fd
+ * until its buffers are full, they will be called again on the next
+ * iteration.
+ *
+ * @fd: The file descriptor to be observed. Under Windows it must be
+ * a #SOCKET.
+ *
+ * @fd_read: A level-triggered callback that is fired if @fd is readable
+ * at the beginning of a main loop iteration, or if it becomes readable
+ * during one.
+ *
+ * @fd_write: A level-triggered callback that is fired when @fd is writable
+ * at the beginning of a main loop iteration, or if it becomes writable
+ * during one.
+ *
+ * @opaque: A pointer-sized value that is passed to @fd_read and @fd_write.
+ */
+int qemu_set_fd_handler(int fd,
+ IOHandler *fd_read,
+ IOHandler *fd_write,
+ void *opaque);
+
+#ifdef CONFIG_POSIX
+/**
+ * qemu_add_child_watch: Register a child process for reaping.
+ *
+ * Under POSIX systems, a parent process must read the exit status of
+ * its child processes using waitpid, or the operating system will not
+ * free some of the resources attached to that process.
+ *
+ * This function directs the QEMU main loop to observe a child process
+ * and call waitpid as soon as it exits; the watch is then removed
+ * automatically. It is useful whenever QEMU forks a child process
+ * but will find out about its termination by other means such as a
+ * "broken pipe".
+ *
+ * @pid: The pid that QEMU should observe.
+ */
+int qemu_add_child_watch(pid_t pid);
+#endif
+
+/**
+ * qemu_mutex_lock_iothread: Lock the main loop mutex.
+ *
+ * This function locks the main loop mutex. The mutex is taken by
+ * qemu_init_main_loop and always taken except while waiting on
+ * external events (such as with select). The mutex should be taken
+ * by threads other than the main loop thread when calling
+ * qemu_bh_new(), qemu_set_fd_handler() and basically all other
+ * functions documented in this file.
+ *
+ * NOTE: tools currently are single-threaded and qemu_mutex_lock_iothread
+ * is a no-op there.
+ */
+void qemu_mutex_lock_iothread(void);
+
+/**
+ * qemu_mutex_unlock_iothread: Unlock the main loop mutex.
+ *
+ * This function unlocks the main loop mutex. The mutex is taken by
+ * qemu_init_main_loop and always taken except while waiting on
+ * external events (such as with select). The mutex should be unlocked
+ * as soon as possible by threads other than the main loop thread,
+ * because it prevents the main loop from processing callbacks,
+ * including timers and bottom halves.
+ *
+ * NOTE: tools currently are single-threaded and qemu_mutex_unlock_iothread
+ * is a no-op there.
+ */
+void qemu_mutex_unlock_iothread(void);
+
+/* internal interfaces */
+
+void qemu_fd_register(int fd);
+void qemu_iohandler_fill(GArray *pollfds);
+void qemu_iohandler_poll(GArray *pollfds, int rc);
+
+QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque);
+void qemu_bh_schedule_idle(QEMUBH *bh);
+
+#endif
diff --git a/contrib/qemu/include/qemu/module.h b/contrib/qemu/include/qemu/module.h
new file mode 100644
index 000000000..c4ccd5716
--- /dev/null
+++ b/contrib/qemu/include/qemu/module.h
@@ -0,0 +1,40 @@
+/*
+ * QEMU Module Infrastructure
+ *
+ * Copyright IBM, Corp. 2009
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_MODULE_H
+#define QEMU_MODULE_H
+
+/* This should not be used directly. Use block_init etc. instead. */
+#define module_init(function, type) \
+static void __attribute__((constructor)) do_qemu_init_ ## function(void) { \
+ register_module_init(function, type); \
+}
+
+typedef enum {
+ MODULE_INIT_BLOCK,
+ MODULE_INIT_MACHINE,
+ MODULE_INIT_QAPI,
+ MODULE_INIT_QOM,
+ MODULE_INIT_MAX
+} module_init_type;
+
+#define block_init(function) module_init(function, MODULE_INIT_BLOCK)
+#define machine_init(function) module_init(function, MODULE_INIT_MACHINE)
+#define qapi_init(function) module_init(function, MODULE_INIT_QAPI)
+#define type_init(function) module_init(function, MODULE_INIT_QOM)
+
+void register_module_init(void (*fn)(void), module_init_type type);
+
+void module_call_init(module_init_type type);
+
+#endif
diff --git a/contrib/qemu/include/qemu/notify.h b/contrib/qemu/include/qemu/notify.h
new file mode 100644
index 000000000..a3d73e4bc
--- /dev/null
+++ b/contrib/qemu/include/qemu/notify.h
@@ -0,0 +1,72 @@
+/*
+ * Notifier lists
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_NOTIFY_H
+#define QEMU_NOTIFY_H
+
+#include "qemu/queue.h"
+
+typedef struct Notifier Notifier;
+
+struct Notifier
+{
+ void (*notify)(Notifier *notifier, void *data);
+ QLIST_ENTRY(Notifier) node;
+};
+
+typedef struct NotifierList
+{
+ QLIST_HEAD(, Notifier) notifiers;
+} NotifierList;
+
+#define NOTIFIER_LIST_INITIALIZER(head) \
+ { QLIST_HEAD_INITIALIZER((head).notifiers) }
+
+void notifier_list_init(NotifierList *list);
+
+void notifier_list_add(NotifierList *list, Notifier *notifier);
+
+void notifier_remove(Notifier *notifier);
+
+void notifier_list_notify(NotifierList *list, void *data);
+
+/* Same as Notifier but allows .notify() to return errors */
+typedef struct NotifierWithReturn NotifierWithReturn;
+
+struct NotifierWithReturn {
+ /**
+ * Return 0 on success (next notifier will be invoked), otherwise
+ * notifier_with_return_list_notify() will stop and return the value.
+ */
+ int (*notify)(NotifierWithReturn *notifier, void *data);
+ QLIST_ENTRY(NotifierWithReturn) node;
+};
+
+typedef struct NotifierWithReturnList {
+ QLIST_HEAD(, NotifierWithReturn) notifiers;
+} NotifierWithReturnList;
+
+#define NOTIFIER_WITH_RETURN_LIST_INITIALIZER(head) \
+ { QLIST_HEAD_INITIALIZER((head).notifiers) }
+
+void notifier_with_return_list_init(NotifierWithReturnList *list);
+
+void notifier_with_return_list_add(NotifierWithReturnList *list,
+ NotifierWithReturn *notifier);
+
+void notifier_with_return_remove(NotifierWithReturn *notifier);
+
+int notifier_with_return_list_notify(NotifierWithReturnList *list,
+ void *data);
+
+#endif
diff --git a/contrib/qemu/include/qemu/option.h b/contrib/qemu/include/qemu/option.h
new file mode 100644
index 000000000..a83c70032
--- /dev/null
+++ b/contrib/qemu/include/qemu/option.h
@@ -0,0 +1,157 @@
+/*
+ * Commandline option parsing functions
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2009 Kevin Wolf <kwolf@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef QEMU_OPTIONS_H
+#define QEMU_OPTIONS_H
+
+#include <stdint.h>
+#include "qemu/queue.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qdict.h"
+
+enum QEMUOptionParType {
+ OPT_FLAG,
+ OPT_NUMBER,
+ OPT_SIZE,
+ OPT_STRING,
+};
+
+typedef struct QEMUOptionParameter {
+ const char *name;
+ enum QEMUOptionParType type;
+ union {
+ uint64_t n;
+ char* s;
+ } value;
+ const char *help;
+} QEMUOptionParameter;
+
+
+const char *get_opt_name(char *buf, int buf_size, const char *p, char delim);
+const char *get_opt_value(char *buf, int buf_size, const char *p);
+int get_next_param_value(char *buf, int buf_size,
+ const char *tag, const char **pstr);
+int get_param_value(char *buf, int buf_size,
+ const char *tag, const char *str);
+
+
+/*
+ * The following functions take a parameter list as input. This is a pointer to
+ * the first element of a QEMUOptionParameter array which is terminated by an
+ * entry with entry->name == NULL.
+ */
+
+QEMUOptionParameter *get_option_parameter(QEMUOptionParameter *list,
+ const char *name);
+int set_option_parameter(QEMUOptionParameter *list, const char *name,
+ const char *value);
+int set_option_parameter_int(QEMUOptionParameter *list, const char *name,
+ uint64_t value);
+QEMUOptionParameter *append_option_parameters(QEMUOptionParameter *dest,
+ QEMUOptionParameter *list);
+QEMUOptionParameter *parse_option_parameters(const char *param,
+ QEMUOptionParameter *list, QEMUOptionParameter *dest);
+void free_option_parameters(QEMUOptionParameter *list);
+void print_option_parameters(QEMUOptionParameter *list);
+void print_option_help(QEMUOptionParameter *list);
+
+/* ------------------------------------------------------------------ */
+
+typedef struct QemuOpt QemuOpt;
+typedef struct QemuOpts QemuOpts;
+typedef struct QemuOptsList QemuOptsList;
+
+enum QemuOptType {
+ QEMU_OPT_STRING = 0, /* no parsing (use string as-is) */
+ QEMU_OPT_BOOL, /* on/off */
+ QEMU_OPT_NUMBER, /* simple number */
+ QEMU_OPT_SIZE, /* size, accepts (K)ilo, (M)ega, (G)iga, (T)era postfix */
+};
+
+typedef struct QemuOptDesc {
+ const char *name;
+ enum QemuOptType type;
+ const char *help;
+} QemuOptDesc;
+
+struct QemuOptsList {
+ const char *name;
+ const char *implied_opt_name;
+ bool merge_lists; /* Merge multiple uses of option into a single list? */
+ QTAILQ_HEAD(, QemuOpts) head;
+ QemuOptDesc desc[];
+};
+
+const char *qemu_opt_get(QemuOpts *opts, const char *name);
+/**
+ * qemu_opt_has_help_opt:
+ * @opts: options to search for a help request
+ *
+ * Check whether the options specified by @opts include one of the
+ * standard strings which indicate that the user is asking for a
+ * list of the valid values for a command line option (as defined
+ * by is_help_option()).
+ *
+ * Returns: true if @opts includes 'help' or equivalent.
+ */
+bool qemu_opt_has_help_opt(QemuOpts *opts);
+bool qemu_opt_get_bool(QemuOpts *opts, const char *name, bool defval);
+uint64_t qemu_opt_get_number(QemuOpts *opts, const char *name, uint64_t defval);
+uint64_t qemu_opt_get_size(QemuOpts *opts, const char *name, uint64_t defval);
+int qemu_opt_set(QemuOpts *opts, const char *name, const char *value);
+void qemu_opt_set_err(QemuOpts *opts, const char *name, const char *value,
+ Error **errp);
+int qemu_opt_set_bool(QemuOpts *opts, const char *name, bool val);
+int qemu_opt_set_number(QemuOpts *opts, const char *name, int64_t val);
+typedef int (*qemu_opt_loopfunc)(const char *name, const char *value, void *opaque);
+int qemu_opt_foreach(QemuOpts *opts, qemu_opt_loopfunc func, void *opaque,
+ int abort_on_failure);
+
+QemuOpts *qemu_opts_find(QemuOptsList *list, const char *id);
+QemuOpts *qemu_opts_create(QemuOptsList *list, const char *id,
+ int fail_if_exists, Error **errp);
+QemuOpts *qemu_opts_create_nofail(QemuOptsList *list);
+void qemu_opts_reset(QemuOptsList *list);
+void qemu_opts_loc_restore(QemuOpts *opts);
+int qemu_opts_set(QemuOptsList *list, const char *id,
+ const char *name, const char *value);
+const char *qemu_opts_id(QemuOpts *opts);
+void qemu_opts_del(QemuOpts *opts);
+void qemu_opts_validate(QemuOpts *opts, const QemuOptDesc *desc, Error **errp);
+int qemu_opts_do_parse(QemuOpts *opts, const char *params, const char *firstname);
+QemuOpts *qemu_opts_parse(QemuOptsList *list, const char *params, int permit_abbrev);
+void qemu_opts_set_defaults(QemuOptsList *list, const char *params,
+ int permit_abbrev);
+QemuOpts *qemu_opts_from_qdict(QemuOptsList *list, const QDict *qdict,
+ Error **errp);
+QDict *qemu_opts_to_qdict(QemuOpts *opts, QDict *qdict);
+void qemu_opts_absorb_qdict(QemuOpts *opts, QDict *qdict, Error **errp);
+
+typedef int (*qemu_opts_loopfunc)(QemuOpts *opts, void *opaque);
+int qemu_opts_print(QemuOpts *opts, void *dummy);
+int qemu_opts_foreach(QemuOptsList *list, qemu_opts_loopfunc func, void *opaque,
+ int abort_on_failure);
+
+#endif
diff --git a/contrib/qemu/include/qemu/option_int.h b/contrib/qemu/include/qemu/option_int.h
new file mode 100644
index 000000000..8212fa4a4
--- /dev/null
+++ b/contrib/qemu/include/qemu/option_int.h
@@ -0,0 +1,54 @@
+/*
+ * Commandline option parsing functions
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2009 Kevin Wolf <kwolf@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef QEMU_OPTIONS_INTERNAL_H
+#define QEMU_OPTIONS_INTERNAL_H
+
+#include "qemu/option.h"
+#include "qemu/error-report.h"
+
+struct QemuOpt {
+ const char *name;
+ const char *str;
+
+ const QemuOptDesc *desc;
+ union {
+ bool boolean;
+ uint64_t uint;
+ } value;
+
+ QemuOpts *opts;
+ QTAILQ_ENTRY(QemuOpt) next;
+};
+
+struct QemuOpts {
+ char *id;
+ QemuOptsList *list;
+ Location loc;
+ QTAILQ_HEAD(QemuOptHead, QemuOpt) head;
+ QTAILQ_ENTRY(QemuOpts) next;
+};
+
+#endif
diff --git a/contrib/qemu/include/qemu/osdep.h b/contrib/qemu/include/qemu/osdep.h
new file mode 100644
index 000000000..26136f16e
--- /dev/null
+++ b/contrib/qemu/include/qemu/osdep.h
@@ -0,0 +1,218 @@
+#ifndef QEMU_OSDEP_H
+#define QEMU_OSDEP_H
+
+#include "config-host.h"
+#include <stdarg.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#ifdef __OpenBSD__
+#include <sys/signal.h>
+#endif
+
+#ifndef _WIN32
+#include <sys/wait.h>
+#else
+#define WIFEXITED(x) 1
+#define WEXITSTATUS(x) (x)
+#endif
+
+#include <sys/time.h>
+
+#if defined(CONFIG_SOLARIS) && CONFIG_SOLARIS_VERSION < 10
+/* [u]int_fast*_t not in <sys/int_types.h> */
+typedef unsigned char uint_fast8_t;
+typedef unsigned int uint_fast16_t;
+typedef signed int int_fast16_t;
+#endif
+
+#ifndef glue
+#define xglue(x, y) x ## y
+#define glue(x, y) xglue(x, y)
+#define stringify(s) tostring(s)
+#define tostring(s) #s
+#endif
+
+#ifndef likely
+#if __GNUC__ < 3
+#define __builtin_expect(x, n) (x)
+#endif
+
+#define likely(x) __builtin_expect(!!(x), 1)
+#define unlikely(x) __builtin_expect(!!(x), 0)
+#endif
+
+#ifndef container_of
+#define container_of(ptr, type, member) ({ \
+ const typeof(((type *) 0)->member) *__mptr = (ptr); \
+ (type *) ((char *) __mptr - offsetof(type, member));})
+#endif
+
+/* Convert from a base type to a parent type, with compile time checking. */
+#ifdef __GNUC__
+#define DO_UPCAST(type, field, dev) ( __extension__ ( { \
+ char __attribute__((unused)) offset_must_be_zero[ \
+ -offsetof(type, field)]; \
+ container_of(dev, type, field);}))
+#else
+#define DO_UPCAST(type, field, dev) container_of(dev, type, field)
+#endif
+
+#define typeof_field(type, field) typeof(((type *)0)->field)
+#define type_check(t1,t2) ((t1*)0 - (t2*)0)
+
+#ifndef MIN
+#define MIN(a, b) (((a) < (b)) ? (a) : (b))
+#endif
+#ifndef MAX
+#define MAX(a, b) (((a) > (b)) ? (a) : (b))
+#endif
+
+#ifndef ROUND_UP
+#define ROUND_UP(n,d) (((n) + (d) - 1) & -(d))
+#endif
+
+#ifndef DIV_ROUND_UP
+#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
+#endif
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+#ifndef always_inline
+#if !((__GNUC__ < 3) || defined(__APPLE__))
+#ifdef __OPTIMIZE__
+#undef inline
+#define inline __attribute__ (( always_inline )) __inline__
+#endif
+#endif
+#else
+#undef inline
+#define inline always_inline
+#endif
+
+#define qemu_printf printf
+
+int qemu_daemon(int nochdir, int noclose);
+void *qemu_memalign(size_t alignment, size_t size);
+void *qemu_anon_ram_alloc(size_t size);
+void qemu_vfree(void *ptr);
+void qemu_anon_ram_free(void *ptr, size_t size);
+
+#define QEMU_MADV_INVALID -1
+
+#if defined(CONFIG_MADVISE)
+
+#define QEMU_MADV_WILLNEED MADV_WILLNEED
+#define QEMU_MADV_DONTNEED MADV_DONTNEED
+#ifdef MADV_DONTFORK
+#define QEMU_MADV_DONTFORK MADV_DONTFORK
+#else
+#define QEMU_MADV_DONTFORK QEMU_MADV_INVALID
+#endif
+#ifdef MADV_MERGEABLE
+#define QEMU_MADV_MERGEABLE MADV_MERGEABLE
+#else
+#define QEMU_MADV_MERGEABLE QEMU_MADV_INVALID
+#endif
+#ifdef MADV_DONTDUMP
+#define QEMU_MADV_DONTDUMP MADV_DONTDUMP
+#else
+#define QEMU_MADV_DONTDUMP QEMU_MADV_INVALID
+#endif
+#ifdef MADV_HUGEPAGE
+#define QEMU_MADV_HUGEPAGE MADV_HUGEPAGE
+#else
+#define QEMU_MADV_HUGEPAGE QEMU_MADV_INVALID
+#endif
+
+#elif defined(CONFIG_POSIX_MADVISE)
+
+#define QEMU_MADV_WILLNEED POSIX_MADV_WILLNEED
+#define QEMU_MADV_DONTNEED POSIX_MADV_DONTNEED
+#define QEMU_MADV_DONTFORK QEMU_MADV_INVALID
+#define QEMU_MADV_MERGEABLE QEMU_MADV_INVALID
+#define QEMU_MADV_DONTDUMP QEMU_MADV_INVALID
+#define QEMU_MADV_HUGEPAGE QEMU_MADV_INVALID
+
+#else /* no-op */
+
+#define QEMU_MADV_WILLNEED QEMU_MADV_INVALID
+#define QEMU_MADV_DONTNEED QEMU_MADV_INVALID
+#define QEMU_MADV_DONTFORK QEMU_MADV_INVALID
+#define QEMU_MADV_MERGEABLE QEMU_MADV_INVALID
+#define QEMU_MADV_DONTDUMP QEMU_MADV_INVALID
+#define QEMU_MADV_HUGEPAGE QEMU_MADV_INVALID
+
+#endif
+
+int qemu_madvise(void *addr, size_t len, int advice);
+
+int qemu_open(const char *name, int flags, ...);
+int qemu_close(int fd);
+
+#if defined(__HAIKU__) && defined(__i386__)
+#define FMT_pid "%ld"
+#elif defined(WIN64)
+#define FMT_pid "%" PRId64
+#else
+#define FMT_pid "%d"
+#endif
+
+int qemu_create_pidfile(const char *filename);
+int qemu_get_thread_id(void);
+
+#ifndef CONFIG_IOVEC
+struct iovec {
+ void *iov_base;
+ size_t iov_len;
+};
+/*
+ * Use the same value as Linux for now.
+ */
+#define IOV_MAX 1024
+
+ssize_t readv(int fd, const struct iovec *iov, int iov_cnt);
+ssize_t writev(int fd, const struct iovec *iov, int iov_cnt);
+#else
+#include <sys/uio.h>
+#endif
+
+#ifdef _WIN32
+static inline void qemu_timersub(const struct timeval *val1,
+ const struct timeval *val2,
+ struct timeval *res)
+{
+ res->tv_sec = val1->tv_sec - val2->tv_sec;
+ if (val1->tv_usec < val2->tv_usec) {
+ res->tv_sec--;
+ res->tv_usec = val1->tv_usec - val2->tv_usec + 1000 * 1000;
+ } else {
+ res->tv_usec = val1->tv_usec - val2->tv_usec;
+ }
+}
+#else
+#define qemu_timersub timersub
+#endif
+
+void qemu_set_cloexec(int fd);
+
+void qemu_set_version(const char *);
+const char *qemu_get_version(void);
+
+void fips_set_state(bool requested);
+bool fips_get_state(void);
+
+/* Return a dynamically allocated pathname denoting a file or directory that is
+ * appropriate for storing local state.
+ *
+ * @relative_pathname need not start with a directory separator; one will be
+ * added automatically.
+ *
+ * The caller is responsible for releasing the value returned with g_free()
+ * after use.
+ */
+char *qemu_get_local_state_pathname(const char *relative_pathname);
+
+#endif
diff --git a/contrib/qemu/include/qemu/queue.h b/contrib/qemu/include/qemu/queue.h
new file mode 100644
index 000000000..d433b9017
--- /dev/null
+++ b/contrib/qemu/include/qemu/queue.h
@@ -0,0 +1,414 @@
+/* $NetBSD: queue.h,v 1.52 2009/04/20 09:56:08 mschuett Exp $ */
+
+/*
+ * QEMU version: Copy from netbsd, removed debug code, removed some of
+ * the implementations. Left in singly-linked lists, lists, simple
+ * queues, and tail queues.
+ */
+
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)queue.h 8.5 (Berkeley) 8/20/94
+ */
+
+#ifndef QEMU_SYS_QUEUE_H_
+#define QEMU_SYS_QUEUE_H_
+
+/*
+ * This file defines four types of data structures: singly-linked lists,
+ * lists, simple queues, and tail queues.
+ *
+ * A singly-linked list is headed by a single forward pointer. The
+ * elements are singly linked for minimum space and pointer manipulation
+ * overhead at the expense of O(n) removal for arbitrary elements. New
+ * elements can be added to the list after an existing element or at the
+ * head of the list. Elements being removed from the head of the list
+ * should use the explicit macro for this purpose for optimum
+ * efficiency. A singly-linked list may only be traversed in the forward
+ * direction. Singly-linked lists are ideal for applications with large
+ * datasets and few or no removals or for implementing a LIFO queue.
+ *
+ * A list is headed by a single forward pointer (or an array of forward
+ * pointers for a hash table header). The elements are doubly linked
+ * so that an arbitrary element can be removed without a need to
+ * traverse the list. New elements can be added to the list before
+ * or after an existing element or at the head of the list. A list
+ * may only be traversed in the forward direction.
+ *
+ * A simple queue is headed by a pair of pointers, one the head of the
+ * list and the other to the tail of the list. The elements are singly
+ * linked to save space, so elements can only be removed from the
+ * head of the list. New elements can be added to the list after
+ * an existing element, at the head of the list, or at the end of the
+ * list. A simple queue may only be traversed in the forward direction.
+ *
+ * A tail queue is headed by a pair of pointers, one to the head of the
+ * list and the other to the tail of the list. The elements are doubly
+ * linked so that an arbitrary element can be removed without a need to
+ * traverse the list. New elements can be added to the list before or
+ * after an existing element, at the head of the list, or at the end of
+ * the list. A tail queue may be traversed in either direction.
+ *
+ * For details on the use of these macros, see the queue(3) manual page.
+ */
+
+#include "qemu/atomic.h" /* for smp_wmb() */
+
+/*
+ * List definitions.
+ */
+#define QLIST_HEAD(name, type) \
+struct name { \
+ struct type *lh_first; /* first element */ \
+}
+
+#define QLIST_HEAD_INITIALIZER(head) \
+ { NULL }
+
+#define QLIST_ENTRY(type) \
+struct { \
+ struct type *le_next; /* next element */ \
+ struct type **le_prev; /* address of previous next element */ \
+}
+
+/*
+ * List functions.
+ */
+#define QLIST_INIT(head) do { \
+ (head)->lh_first = NULL; \
+} while (/*CONSTCOND*/0)
+
+#define QLIST_INSERT_AFTER(listelm, elm, field) do { \
+ if (((elm)->field.le_next = (listelm)->field.le_next) != NULL) \
+ (listelm)->field.le_next->field.le_prev = \
+ &(elm)->field.le_next; \
+ (listelm)->field.le_next = (elm); \
+ (elm)->field.le_prev = &(listelm)->field.le_next; \
+} while (/*CONSTCOND*/0)
+
+#define QLIST_INSERT_BEFORE(listelm, elm, field) do { \
+ (elm)->field.le_prev = (listelm)->field.le_prev; \
+ (elm)->field.le_next = (listelm); \
+ *(listelm)->field.le_prev = (elm); \
+ (listelm)->field.le_prev = &(elm)->field.le_next; \
+} while (/*CONSTCOND*/0)
+
+#define QLIST_INSERT_HEAD(head, elm, field) do { \
+ if (((elm)->field.le_next = (head)->lh_first) != NULL) \
+ (head)->lh_first->field.le_prev = &(elm)->field.le_next;\
+ (head)->lh_first = (elm); \
+ (elm)->field.le_prev = &(head)->lh_first; \
+} while (/*CONSTCOND*/0)
+
+#define QLIST_INSERT_HEAD_RCU(head, elm, field) do { \
+ (elm)->field.le_prev = &(head)->lh_first; \
+ (elm)->field.le_next = (head)->lh_first; \
+ smp_wmb(); /* fill elm before linking it */ \
+ if ((head)->lh_first != NULL) { \
+ (head)->lh_first->field.le_prev = &(elm)->field.le_next; \
+ } \
+ (head)->lh_first = (elm); \
+ smp_wmb(); \
+} while (/* CONSTCOND*/0)
+
+#define QLIST_REMOVE(elm, field) do { \
+ if ((elm)->field.le_next != NULL) \
+ (elm)->field.le_next->field.le_prev = \
+ (elm)->field.le_prev; \
+ *(elm)->field.le_prev = (elm)->field.le_next; \
+} while (/*CONSTCOND*/0)
+
+#define QLIST_FOREACH(var, head, field) \
+ for ((var) = ((head)->lh_first); \
+ (var); \
+ (var) = ((var)->field.le_next))
+
+#define QLIST_FOREACH_SAFE(var, head, field, next_var) \
+ for ((var) = ((head)->lh_first); \
+ (var) && ((next_var) = ((var)->field.le_next), 1); \
+ (var) = (next_var))
+
+/*
+ * List access methods.
+ */
+#define QLIST_EMPTY(head) ((head)->lh_first == NULL)
+#define QLIST_FIRST(head) ((head)->lh_first)
+#define QLIST_NEXT(elm, field) ((elm)->field.le_next)
+
+
+/*
+ * Singly-linked List definitions.
+ */
+#define QSLIST_HEAD(name, type) \
+struct name { \
+ struct type *slh_first; /* first element */ \
+}
+
+#define QSLIST_HEAD_INITIALIZER(head) \
+ { NULL }
+
+#define QSLIST_ENTRY(type) \
+struct { \
+ struct type *sle_next; /* next element */ \
+}
+
+/*
+ * Singly-linked List functions.
+ */
+#define QSLIST_INIT(head) do { \
+ (head)->slh_first = NULL; \
+} while (/*CONSTCOND*/0)
+
+#define QSLIST_INSERT_AFTER(slistelm, elm, field) do { \
+ (elm)->field.sle_next = (slistelm)->field.sle_next; \
+ (slistelm)->field.sle_next = (elm); \
+} while (/*CONSTCOND*/0)
+
+#define QSLIST_INSERT_HEAD(head, elm, field) do { \
+ (elm)->field.sle_next = (head)->slh_first; \
+ (head)->slh_first = (elm); \
+} while (/*CONSTCOND*/0)
+
+#define QSLIST_REMOVE_HEAD(head, field) do { \
+ (head)->slh_first = (head)->slh_first->field.sle_next; \
+} while (/*CONSTCOND*/0)
+
+#define QSLIST_REMOVE_AFTER(slistelm, field) do { \
+ (slistelm)->field.sle_next = \
+ QSLIST_NEXT(QSLIST_NEXT((slistelm), field), field); \
+} while (/*CONSTCOND*/0)
+
+#define QSLIST_FOREACH(var, head, field) \
+ for((var) = (head)->slh_first; (var); (var) = (var)->field.sle_next)
+
+#define QSLIST_FOREACH_SAFE(var, head, field, tvar) \
+ for ((var) = QSLIST_FIRST((head)); \
+ (var) && ((tvar) = QSLIST_NEXT((var), field), 1); \
+ (var) = (tvar))
+
+/*
+ * Singly-linked List access methods.
+ */
+#define QSLIST_EMPTY(head) ((head)->slh_first == NULL)
+#define QSLIST_FIRST(head) ((head)->slh_first)
+#define QSLIST_NEXT(elm, field) ((elm)->field.sle_next)
+
+
+/*
+ * Simple queue definitions.
+ */
+#define QSIMPLEQ_HEAD(name, type) \
+struct name { \
+ struct type *sqh_first; /* first element */ \
+ struct type **sqh_last; /* addr of last next element */ \
+}
+
+#define QSIMPLEQ_HEAD_INITIALIZER(head) \
+ { NULL, &(head).sqh_first }
+
+#define QSIMPLEQ_ENTRY(type) \
+struct { \
+ struct type *sqe_next; /* next element */ \
+}
+
+/*
+ * Simple queue functions.
+ */
+#define QSIMPLEQ_INIT(head) do { \
+ (head)->sqh_first = NULL; \
+ (head)->sqh_last = &(head)->sqh_first; \
+} while (/*CONSTCOND*/0)
+
+#define QSIMPLEQ_INSERT_HEAD(head, elm, field) do { \
+ if (((elm)->field.sqe_next = (head)->sqh_first) == NULL) \
+ (head)->sqh_last = &(elm)->field.sqe_next; \
+ (head)->sqh_first = (elm); \
+} while (/*CONSTCOND*/0)
+
+#define QSIMPLEQ_INSERT_TAIL(head, elm, field) do { \
+ (elm)->field.sqe_next = NULL; \
+ *(head)->sqh_last = (elm); \
+ (head)->sqh_last = &(elm)->field.sqe_next; \
+} while (/*CONSTCOND*/0)
+
+#define QSIMPLEQ_INSERT_AFTER(head, listelm, elm, field) do { \
+ if (((elm)->field.sqe_next = (listelm)->field.sqe_next) == NULL) \
+ (head)->sqh_last = &(elm)->field.sqe_next; \
+ (listelm)->field.sqe_next = (elm); \
+} while (/*CONSTCOND*/0)
+
+#define QSIMPLEQ_REMOVE_HEAD(head, field) do { \
+ if (((head)->sqh_first = (head)->sqh_first->field.sqe_next) == NULL)\
+ (head)->sqh_last = &(head)->sqh_first; \
+} while (/*CONSTCOND*/0)
+
+#define QSIMPLEQ_REMOVE(head, elm, type, field) do { \
+ if ((head)->sqh_first == (elm)) { \
+ QSIMPLEQ_REMOVE_HEAD((head), field); \
+ } else { \
+ struct type *curelm = (head)->sqh_first; \
+ while (curelm->field.sqe_next != (elm)) \
+ curelm = curelm->field.sqe_next; \
+ if ((curelm->field.sqe_next = \
+ curelm->field.sqe_next->field.sqe_next) == NULL) \
+ (head)->sqh_last = &(curelm)->field.sqe_next; \
+ } \
+} while (/*CONSTCOND*/0)
+
+#define QSIMPLEQ_FOREACH(var, head, field) \
+ for ((var) = ((head)->sqh_first); \
+ (var); \
+ (var) = ((var)->field.sqe_next))
+
+#define QSIMPLEQ_FOREACH_SAFE(var, head, field, next) \
+ for ((var) = ((head)->sqh_first); \
+ (var) && ((next = ((var)->field.sqe_next)), 1); \
+ (var) = (next))
+
+#define QSIMPLEQ_CONCAT(head1, head2) do { \
+ if (!QSIMPLEQ_EMPTY((head2))) { \
+ *(head1)->sqh_last = (head2)->sqh_first; \
+ (head1)->sqh_last = (head2)->sqh_last; \
+ QSIMPLEQ_INIT((head2)); \
+ } \
+} while (/*CONSTCOND*/0)
+
+#define QSIMPLEQ_LAST(head, type, field) \
+ (QSIMPLEQ_EMPTY((head)) ? \
+ NULL : \
+ ((struct type *)(void *) \
+ ((char *)((head)->sqh_last) - offsetof(struct type, field))))
+
+/*
+ * Simple queue access methods.
+ */
+#define QSIMPLEQ_EMPTY(head) ((head)->sqh_first == NULL)
+#define QSIMPLEQ_FIRST(head) ((head)->sqh_first)
+#define QSIMPLEQ_NEXT(elm, field) ((elm)->field.sqe_next)
+
+
+/*
+ * Tail queue definitions.
+ */
+#define Q_TAILQ_HEAD(name, type, qual) \
+struct name { \
+ qual type *tqh_first; /* first element */ \
+ qual type *qual *tqh_last; /* addr of last next element */ \
+}
+#define QTAILQ_HEAD(name, type) Q_TAILQ_HEAD(name, struct type,)
+
+#define QTAILQ_HEAD_INITIALIZER(head) \
+ { NULL, &(head).tqh_first }
+
+#define Q_TAILQ_ENTRY(type, qual) \
+struct { \
+ qual type *tqe_next; /* next element */ \
+ qual type *qual *tqe_prev; /* address of previous next element */\
+}
+#define QTAILQ_ENTRY(type) Q_TAILQ_ENTRY(struct type,)
+
+/*
+ * Tail queue functions.
+ */
+#define QTAILQ_INIT(head) do { \
+ (head)->tqh_first = NULL; \
+ (head)->tqh_last = &(head)->tqh_first; \
+} while (/*CONSTCOND*/0)
+
+#define QTAILQ_INSERT_HEAD(head, elm, field) do { \
+ if (((elm)->field.tqe_next = (head)->tqh_first) != NULL) \
+ (head)->tqh_first->field.tqe_prev = \
+ &(elm)->field.tqe_next; \
+ else \
+ (head)->tqh_last = &(elm)->field.tqe_next; \
+ (head)->tqh_first = (elm); \
+ (elm)->field.tqe_prev = &(head)->tqh_first; \
+} while (/*CONSTCOND*/0)
+
+#define QTAILQ_INSERT_TAIL(head, elm, field) do { \
+ (elm)->field.tqe_next = NULL; \
+ (elm)->field.tqe_prev = (head)->tqh_last; \
+ *(head)->tqh_last = (elm); \
+ (head)->tqh_last = &(elm)->field.tqe_next; \
+} while (/*CONSTCOND*/0)
+
+#define QTAILQ_INSERT_AFTER(head, listelm, elm, field) do { \
+ if (((elm)->field.tqe_next = (listelm)->field.tqe_next) != NULL)\
+ (elm)->field.tqe_next->field.tqe_prev = \
+ &(elm)->field.tqe_next; \
+ else \
+ (head)->tqh_last = &(elm)->field.tqe_next; \
+ (listelm)->field.tqe_next = (elm); \
+ (elm)->field.tqe_prev = &(listelm)->field.tqe_next; \
+} while (/*CONSTCOND*/0)
+
+#define QTAILQ_INSERT_BEFORE(listelm, elm, field) do { \
+ (elm)->field.tqe_prev = (listelm)->field.tqe_prev; \
+ (elm)->field.tqe_next = (listelm); \
+ *(listelm)->field.tqe_prev = (elm); \
+ (listelm)->field.tqe_prev = &(elm)->field.tqe_next; \
+} while (/*CONSTCOND*/0)
+
+#define QTAILQ_REMOVE(head, elm, field) do { \
+ if (((elm)->field.tqe_next) != NULL) \
+ (elm)->field.tqe_next->field.tqe_prev = \
+ (elm)->field.tqe_prev; \
+ else \
+ (head)->tqh_last = (elm)->field.tqe_prev; \
+ *(elm)->field.tqe_prev = (elm)->field.tqe_next; \
+} while (/*CONSTCOND*/0)
+
+#define QTAILQ_FOREACH(var, head, field) \
+ for ((var) = ((head)->tqh_first); \
+ (var); \
+ (var) = ((var)->field.tqe_next))
+
+#define QTAILQ_FOREACH_SAFE(var, head, field, next_var) \
+ for ((var) = ((head)->tqh_first); \
+ (var) && ((next_var) = ((var)->field.tqe_next), 1); \
+ (var) = (next_var))
+
+#define QTAILQ_FOREACH_REVERSE(var, head, headname, field) \
+ for ((var) = (*(((struct headname *)((head)->tqh_last))->tqh_last)); \
+ (var); \
+ (var) = (*(((struct headname *)((var)->field.tqe_prev))->tqh_last)))
+
+/*
+ * Tail queue access methods.
+ */
+#define QTAILQ_EMPTY(head) ((head)->tqh_first == NULL)
+#define QTAILQ_FIRST(head) ((head)->tqh_first)
+#define QTAILQ_NEXT(elm, field) ((elm)->field.tqe_next)
+
+#define QTAILQ_LAST(head, headname) \
+ (*(((struct headname *)((head)->tqh_last))->tqh_last))
+#define QTAILQ_PREV(elm, headname, field) \
+ (*(((struct headname *)((elm)->field.tqe_prev))->tqh_last))
+
+#endif /* !QEMU_SYS_QUEUE_H_ */
diff --git a/contrib/qemu/include/qemu/sockets.h b/contrib/qemu/include/qemu/sockets.h
new file mode 100644
index 000000000..c5174d76a
--- /dev/null
+++ b/contrib/qemu/include/qemu/sockets.h
@@ -0,0 +1,83 @@
+/* headers to use the BSD sockets */
+#ifndef QEMU_SOCKET_H
+#define QEMU_SOCKET_H
+
+#ifdef _WIN32
+#include <windows.h>
+#include <winsock2.h>
+#include <ws2tcpip.h>
+
+#define socket_error() WSAGetLastError()
+
+int inet_aton(const char *cp, struct in_addr *ia);
+
+#else
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <arpa/inet.h>
+#include <netdb.h>
+#include <sys/un.h>
+
+#define socket_error() errno
+#define closesocket(s) close(s)
+
+#endif /* !_WIN32 */
+
+#include "qemu/option.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qerror.h"
+
+extern QemuOptsList socket_optslist;
+
+/* misc helpers */
+int qemu_socket(int domain, int type, int protocol);
+int qemu_accept(int s, struct sockaddr *addr, socklen_t *addrlen);
+int socket_set_cork(int fd, int v);
+int socket_set_nodelay(int fd);
+void qemu_set_block(int fd);
+void qemu_set_nonblock(int fd);
+int send_all(int fd, const void *buf, int len1);
+int recv_all(int fd, void *buf, int len1, bool single_read);
+
+/* callback function for nonblocking connect
+ * valid fd on success, negative error code on failure
+ */
+typedef void NonBlockingConnectHandler(int fd, void *opaque);
+
+InetSocketAddress *inet_parse(const char *str, Error **errp);
+int inet_listen_opts(QemuOpts *opts, int port_offset, Error **errp);
+int inet_listen(const char *str, char *ostr, int olen,
+ int socktype, int port_offset, Error **errp);
+int inet_connect_opts(QemuOpts *opts, Error **errp,
+ NonBlockingConnectHandler *callback, void *opaque);
+int inet_connect(const char *str, Error **errp);
+int inet_nonblocking_connect(const char *str,
+ NonBlockingConnectHandler *callback,
+ void *opaque, Error **errp);
+
+int inet_dgram_opts(QemuOpts *opts, Error **errp);
+const char *inet_strfamily(int family);
+
+int unix_listen_opts(QemuOpts *opts, Error **errp);
+int unix_listen(const char *path, char *ostr, int olen, Error **errp);
+int unix_connect_opts(QemuOpts *opts, Error **errp,
+ NonBlockingConnectHandler *callback, void *opaque);
+int unix_connect(const char *path, Error **errp);
+int unix_nonblocking_connect(const char *str,
+ NonBlockingConnectHandler *callback,
+ void *opaque, Error **errp);
+
+SocketAddress *socket_parse(const char *str, Error **errp);
+int socket_connect(SocketAddress *addr, Error **errp,
+ NonBlockingConnectHandler *callback, void *opaque);
+int socket_listen(SocketAddress *addr, Error **errp);
+int socket_dgram(SocketAddress *remote, SocketAddress *local, Error **errp);
+
+/* Old, ipv4 only bits. Don't use for new code. */
+int parse_host_port(struct sockaddr_in *saddr, const char *str);
+int socket_init(void);
+
+#endif /* QEMU_SOCKET_H */
diff --git a/contrib/qemu/include/qemu/thread-posix.h b/contrib/qemu/include/qemu/thread-posix.h
new file mode 100644
index 000000000..0f30dccb5
--- /dev/null
+++ b/contrib/qemu/include/qemu/thread-posix.h
@@ -0,0 +1,28 @@
+#ifndef __QEMU_THREAD_POSIX_H
+#define __QEMU_THREAD_POSIX_H 1
+#include "pthread.h"
+#include <semaphore.h>
+
+struct QemuMutex {
+ pthread_mutex_t lock;
+};
+
+struct QemuCond {
+ pthread_cond_t cond;
+};
+
+struct QemuSemaphore {
+#if defined(__APPLE__) || defined(__NetBSD__)
+ pthread_mutex_t lock;
+ pthread_cond_t cond;
+ int count;
+#else
+ sem_t sem;
+#endif
+};
+
+struct QemuThread {
+ pthread_t thread;
+};
+
+#endif
diff --git a/contrib/qemu/include/qemu/thread.h b/contrib/qemu/include/qemu/thread.h
new file mode 100644
index 000000000..c02404b9f
--- /dev/null
+++ b/contrib/qemu/include/qemu/thread.h
@@ -0,0 +1,56 @@
+#ifndef __QEMU_THREAD_H
+#define __QEMU_THREAD_H 1
+
+#include <inttypes.h>
+#include <stdbool.h>
+
+typedef struct QemuMutex QemuMutex;
+typedef struct QemuCond QemuCond;
+typedef struct QemuSemaphore QemuSemaphore;
+typedef struct QemuThread QemuThread;
+
+#ifdef _WIN32
+#include "qemu/thread-win32.h"
+#else
+#include "qemu/thread-posix.h"
+#endif
+
+#define QEMU_THREAD_JOINABLE 0
+#define QEMU_THREAD_DETACHED 1
+
+void qemu_mutex_init(QemuMutex *mutex);
+void qemu_mutex_destroy(QemuMutex *mutex);
+void qemu_mutex_lock(QemuMutex *mutex);
+int qemu_mutex_trylock(QemuMutex *mutex);
+void qemu_mutex_unlock(QemuMutex *mutex);
+
+#define rcu_read_lock() do { } while (0)
+#define rcu_read_unlock() do { } while (0)
+
+void qemu_cond_init(QemuCond *cond);
+void qemu_cond_destroy(QemuCond *cond);
+
+/*
+ * IMPORTANT: The implementation does not guarantee that pthread_cond_signal
+ * and pthread_cond_broadcast can be called except while the same mutex is
+ * held as in the corresponding pthread_cond_wait calls!
+ */
+void qemu_cond_signal(QemuCond *cond);
+void qemu_cond_broadcast(QemuCond *cond);
+void qemu_cond_wait(QemuCond *cond, QemuMutex *mutex);
+
+void qemu_sem_init(QemuSemaphore *sem, int init);
+void qemu_sem_post(QemuSemaphore *sem);
+void qemu_sem_wait(QemuSemaphore *sem);
+int qemu_sem_timedwait(QemuSemaphore *sem, int ms);
+void qemu_sem_destroy(QemuSemaphore *sem);
+
+void qemu_thread_create(QemuThread *thread,
+ void *(*start_routine)(void *),
+ void *arg, int mode);
+void *qemu_thread_join(QemuThread *thread);
+void qemu_thread_get_self(QemuThread *thread);
+bool qemu_thread_is_self(QemuThread *thread);
+void qemu_thread_exit(void *retval);
+
+#endif
diff --git a/contrib/qemu/include/qemu/timer.h b/contrib/qemu/include/qemu/timer.h
new file mode 100644
index 000000000..9dd206ce7
--- /dev/null
+++ b/contrib/qemu/include/qemu/timer.h
@@ -0,0 +1,305 @@
+#ifndef QEMU_TIMER_H
+#define QEMU_TIMER_H
+
+#include "qemu-common.h"
+#include "qemu/main-loop.h"
+#include "qemu/notify.h"
+
+/* timers */
+
+#define SCALE_MS 1000000
+#define SCALE_US 1000
+#define SCALE_NS 1
+
+typedef struct QEMUClock QEMUClock;
+typedef void QEMUTimerCB(void *opaque);
+
+/* The real time clock should be used only for stuff which does not
+ change the virtual machine state, as it is run even if the virtual
+ machine is stopped. The real time clock has a frequency of 1000
+ Hz. */
+extern QEMUClock *rt_clock;
+
+/* The virtual clock is only run during the emulation. It is stopped
+ when the virtual machine is stopped. Virtual timers use a high
+ precision clock, usually cpu cycles (use ticks_per_sec). */
+extern QEMUClock *vm_clock;
+
+/* The host clock should be use for device models that emulate accurate
+ real time sources. It will continue to run when the virtual machine
+ is suspended, and it will reflect system time changes the host may
+ undergo (e.g. due to NTP). The host clock has the same precision as
+ the virtual clock. */
+extern QEMUClock *host_clock;
+
+int64_t qemu_get_clock_ns(QEMUClock *clock);
+int64_t qemu_clock_has_timers(QEMUClock *clock);
+int64_t qemu_clock_expired(QEMUClock *clock);
+int64_t qemu_clock_deadline(QEMUClock *clock);
+void qemu_clock_enable(QEMUClock *clock, bool enabled);
+void qemu_clock_warp(QEMUClock *clock);
+
+void qemu_register_clock_reset_notifier(QEMUClock *clock, Notifier *notifier);
+void qemu_unregister_clock_reset_notifier(QEMUClock *clock,
+ Notifier *notifier);
+
+QEMUTimer *qemu_new_timer(QEMUClock *clock, int scale,
+ QEMUTimerCB *cb, void *opaque);
+void qemu_free_timer(QEMUTimer *ts);
+void qemu_del_timer(QEMUTimer *ts);
+void qemu_mod_timer_ns(QEMUTimer *ts, int64_t expire_time);
+void qemu_mod_timer(QEMUTimer *ts, int64_t expire_time);
+bool qemu_timer_pending(QEMUTimer *ts);
+bool qemu_timer_expired(QEMUTimer *timer_head, int64_t current_time);
+uint64_t qemu_timer_expire_time_ns(QEMUTimer *ts);
+
+void qemu_run_timers(QEMUClock *clock);
+void qemu_run_all_timers(void);
+void configure_alarms(char const *opt);
+void init_clocks(void);
+int init_timer_alarm(void);
+
+int64_t cpu_get_ticks(void);
+void cpu_enable_ticks(void);
+void cpu_disable_ticks(void);
+
+static inline QEMUTimer *qemu_new_timer_ns(QEMUClock *clock, QEMUTimerCB *cb,
+ void *opaque)
+{
+ return qemu_new_timer(clock, SCALE_NS, cb, opaque);
+}
+
+static inline QEMUTimer *qemu_new_timer_ms(QEMUClock *clock, QEMUTimerCB *cb,
+ void *opaque)
+{
+ return qemu_new_timer(clock, SCALE_MS, cb, opaque);
+}
+
+static inline int64_t qemu_get_clock_ms(QEMUClock *clock)
+{
+ return qemu_get_clock_ns(clock) / SCALE_MS;
+}
+
+static inline int64_t get_ticks_per_sec(void)
+{
+ return 1000000000LL;
+}
+
+/* real time host monotonic timer */
+static inline int64_t get_clock_realtime(void)
+{
+ struct timeval tv;
+
+ gettimeofday(&tv, NULL);
+ return tv.tv_sec * 1000000000LL + (tv.tv_usec * 1000);
+}
+
+/* Warning: don't insert tracepoints into these functions, they are
+ also used by simpletrace backend and tracepoints would cause
+ an infinite recursion! */
+#ifdef _WIN32
+extern int64_t clock_freq;
+
+static inline int64_t get_clock(void)
+{
+ LARGE_INTEGER ti;
+ QueryPerformanceCounter(&ti);
+ return muldiv64(ti.QuadPart, get_ticks_per_sec(), clock_freq);
+}
+
+#else
+
+extern int use_rt_clock;
+
+static inline int64_t get_clock(void)
+{
+#ifdef CLOCK_MONOTONIC
+ if (use_rt_clock) {
+ struct timespec ts;
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ return ts.tv_sec * 1000000000LL + ts.tv_nsec;
+ } else
+#endif
+ {
+ /* XXX: using gettimeofday leads to problems if the date
+ changes, so it should be avoided. */
+ return get_clock_realtime();
+ }
+}
+#endif
+
+void qemu_get_timer(QEMUFile *f, QEMUTimer *ts);
+void qemu_put_timer(QEMUFile *f, QEMUTimer *ts);
+
+/* icount */
+int64_t cpu_get_icount(void);
+int64_t cpu_get_clock(void);
+
+/*******************************************/
+/* host CPU ticks (if available) */
+
+#if defined(_ARCH_PPC)
+
+static inline int64_t cpu_get_real_ticks(void)
+{
+ int64_t retval;
+#ifdef _ARCH_PPC64
+ /* This reads timebase in one 64bit go and includes Cell workaround from:
+ http://ozlabs.org/pipermail/linuxppc-dev/2006-October/027052.html
+ */
+ __asm__ __volatile__ ("mftb %0\n\t"
+ "cmpwi %0,0\n\t"
+ "beq- $-8"
+ : "=r" (retval));
+#else
+ /* http://ozlabs.org/pipermail/linuxppc-dev/1999-October/003889.html */
+ unsigned long junk;
+ __asm__ __volatile__ ("mfspr %1,269\n\t" /* mftbu */
+ "mfspr %L0,268\n\t" /* mftb */
+ "mfspr %0,269\n\t" /* mftbu */
+ "cmpw %0,%1\n\t"
+ "bne $-16"
+ : "=r" (retval), "=r" (junk));
+#endif
+ return retval;
+}
+
+#elif defined(__i386__)
+
+static inline int64_t cpu_get_real_ticks(void)
+{
+ int64_t val;
+ asm volatile ("rdtsc" : "=A" (val));
+ return val;
+}
+
+#elif defined(__x86_64__)
+
+static inline int64_t cpu_get_real_ticks(void)
+{
+ uint32_t low,high;
+ int64_t val;
+ asm volatile("rdtsc" : "=a" (low), "=d" (high));
+ val = high;
+ val <<= 32;
+ val |= low;
+ return val;
+}
+
+#elif defined(__hppa__)
+
+static inline int64_t cpu_get_real_ticks(void)
+{
+ int val;
+ asm volatile ("mfctl %%cr16, %0" : "=r"(val));
+ return val;
+}
+
+#elif defined(__ia64)
+
+static inline int64_t cpu_get_real_ticks(void)
+{
+ int64_t val;
+ asm volatile ("mov %0 = ar.itc" : "=r"(val) :: "memory");
+ return val;
+}
+
+#elif defined(__s390__)
+
+static inline int64_t cpu_get_real_ticks(void)
+{
+ int64_t val;
+ asm volatile("stck 0(%1)" : "=m" (val) : "a" (&val) : "cc");
+ return val;
+}
+
+#elif defined(__sparc__)
+
+static inline int64_t cpu_get_real_ticks (void)
+{
+#if defined(_LP64)
+ uint64_t rval;
+ asm volatile("rd %%tick,%0" : "=r"(rval));
+ return rval;
+#else
+ /* We need an %o or %g register for this. For recent enough gcc
+ there is an "h" constraint for that. Don't bother with that. */
+ union {
+ uint64_t i64;
+ struct {
+ uint32_t high;
+ uint32_t low;
+ } i32;
+ } rval;
+ asm volatile("rd %%tick,%%g1; srlx %%g1,32,%0; mov %%g1,%1"
+ : "=r"(rval.i32.high), "=r"(rval.i32.low) : : "g1");
+ return rval.i64;
+#endif
+}
+
+#elif defined(__mips__) && \
+ ((defined(__mips_isa_rev) && __mips_isa_rev >= 2) || defined(__linux__))
+/*
+ * binutils wants to use rdhwr only on mips32r2
+ * but as linux kernel emulate it, it's fine
+ * to use it.
+ *
+ */
+#define MIPS_RDHWR(rd, value) { \
+ __asm__ __volatile__ (".set push\n\t" \
+ ".set mips32r2\n\t" \
+ "rdhwr %0, "rd"\n\t" \
+ ".set pop" \
+ : "=r" (value)); \
+ }
+
+static inline int64_t cpu_get_real_ticks(void)
+{
+ /* On kernels >= 2.6.25 rdhwr <reg>, $2 and $3 are emulated */
+ uint32_t count;
+ static uint32_t cyc_per_count = 0;
+
+ if (!cyc_per_count) {
+ MIPS_RDHWR("$3", cyc_per_count);
+ }
+
+ MIPS_RDHWR("$2", count);
+ return (int64_t)(count * cyc_per_count);
+}
+
+#elif defined(__alpha__)
+
+static inline int64_t cpu_get_real_ticks(void)
+{
+ uint64_t cc;
+ uint32_t cur, ofs;
+
+ asm volatile("rpcc %0" : "=r"(cc));
+ cur = cc;
+ ofs = cc >> 32;
+ return cur - ofs;
+}
+
+#else
+/* The host CPU doesn't have an easily accessible cycle counter.
+ Just return a monotonically increasing value. This will be
+ totally wrong, but hopefully better than nothing. */
+static inline int64_t cpu_get_real_ticks (void)
+{
+ static int64_t ticks = 0;
+ return ticks++;
+}
+#endif
+
+#ifdef CONFIG_PROFILER
+static inline int64_t profile_getclock(void)
+{
+ return cpu_get_real_ticks();
+}
+
+extern int64_t qemu_time, qemu_time_start;
+extern int64_t tlb_flush_time;
+extern int64_t dev_time;
+#endif
+
+#endif
diff --git a/contrib/qemu/include/qemu/typedefs.h b/contrib/qemu/include/qemu/typedefs.h
new file mode 100644
index 000000000..ac9f8d41a
--- /dev/null
+++ b/contrib/qemu/include/qemu/typedefs.h
@@ -0,0 +1,69 @@
+#ifndef QEMU_TYPEDEFS_H
+#define QEMU_TYPEDEFS_H
+
+/* A load of opaque types so that device init declarations don't have to
+ pull in all the real definitions. */
+typedef struct QEMUTimer QEMUTimer;
+typedef struct QEMUFile QEMUFile;
+typedef struct QEMUBH QEMUBH;
+
+struct Monitor;
+typedef struct Monitor Monitor;
+typedef struct MigrationParams MigrationParams;
+
+typedef struct Property Property;
+typedef struct PropertyInfo PropertyInfo;
+typedef struct CompatProperty CompatProperty;
+typedef struct DeviceState DeviceState;
+typedef struct BusState BusState;
+typedef struct BusClass BusClass;
+
+typedef struct AddressSpace AddressSpace;
+typedef struct MemoryRegion MemoryRegion;
+typedef struct MemoryRegionSection MemoryRegionSection;
+
+typedef struct MemoryMappingList MemoryMappingList;
+
+typedef struct NICInfo NICInfo;
+typedef struct HCIInfo HCIInfo;
+typedef struct AudioState AudioState;
+typedef struct BlockDriverState BlockDriverState;
+typedef struct DriveInfo DriveInfo;
+typedef struct DisplayState DisplayState;
+typedef struct DisplayChangeListener DisplayChangeListener;
+typedef struct DisplaySurface DisplaySurface;
+typedef struct PixelFormat PixelFormat;
+typedef struct QemuConsole QemuConsole;
+typedef struct CharDriverState CharDriverState;
+typedef struct MACAddr MACAddr;
+typedef struct NetClientState NetClientState;
+typedef struct i2c_bus i2c_bus;
+typedef struct ISABus ISABus;
+typedef struct ISADevice ISADevice;
+typedef struct SMBusDevice SMBusDevice;
+typedef struct PCIHostState PCIHostState;
+typedef struct PCIExpressHost PCIExpressHost;
+typedef struct PCIBus PCIBus;
+typedef struct PCIDevice PCIDevice;
+typedef struct PCIExpressDevice PCIExpressDevice;
+typedef struct PCIBridge PCIBridge;
+typedef struct PCIEAERMsg PCIEAERMsg;
+typedef struct PCIEAERLog PCIEAERLog;
+typedef struct PCIEAERErr PCIEAERErr;
+typedef struct PCIEPort PCIEPort;
+typedef struct PCIESlot PCIESlot;
+typedef struct MSIMessage MSIMessage;
+typedef struct SerialState SerialState;
+typedef struct PCMCIACardState PCMCIACardState;
+typedef struct MouseTransformInfo MouseTransformInfo;
+typedef struct uWireSlave uWireSlave;
+typedef struct I2SCodec I2SCodec;
+typedef struct SSIBus SSIBus;
+typedef struct EventNotifier EventNotifier;
+typedef struct VirtIODevice VirtIODevice;
+typedef struct QEMUSGList QEMUSGList;
+typedef struct SHPCDevice SHPCDevice;
+typedef struct FWCfgState FWCfgState;
+typedef struct PcGuestInfo PcGuestInfo;
+
+#endif /* QEMU_TYPEDEFS_H */
diff --git a/contrib/qemu/include/sysemu/os-posix.h b/contrib/qemu/include/sysemu/os-posix.h
new file mode 100644
index 000000000..25d0b2a73
--- /dev/null
+++ b/contrib/qemu/include/sysemu/os-posix.h
@@ -0,0 +1,52 @@
+/*
+ * posix specific declarations
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2010 Jes Sorensen <Jes.Sorensen@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef QEMU_OS_POSIX_H
+#define QEMU_OS_POSIX_H
+
+void os_set_line_buffering(void);
+void os_set_proc_name(const char *s);
+void os_setup_signal_handling(void);
+void os_daemonize(void);
+void os_setup_post(void);
+int os_mlock(void);
+
+typedef struct timeval qemu_timeval;
+#define qemu_gettimeofday(tp) gettimeofday(tp, NULL)
+
+#ifndef CONFIG_UTIMENSAT
+#ifndef UTIME_NOW
+# define UTIME_NOW ((1l << 30) - 1l)
+#endif
+#ifndef UTIME_OMIT
+# define UTIME_OMIT ((1l << 30) - 2l)
+#endif
+#endif
+typedef struct timespec qemu_timespec;
+int qemu_utimens(const char *path, const qemu_timespec *times);
+
+bool is_daemonized(void);
+
+#endif
diff --git a/contrib/qemu/include/sysemu/sysemu.h b/contrib/qemu/include/sysemu/sysemu.h
new file mode 100644
index 000000000..3caeb66eb
--- /dev/null
+++ b/contrib/qemu/include/sysemu/sysemu.h
@@ -0,0 +1,200 @@
+#ifndef SYSEMU_H
+#define SYSEMU_H
+/* Misc. things related to the system emulator. */
+
+#include "qemu/typedefs.h"
+#include "qemu/option.h"
+#include "qemu/queue.h"
+#include "qemu/timer.h"
+#include "qapi-types.h"
+#include "qemu/notify.h"
+#include "qemu/main-loop.h"
+
+/* vl.c */
+
+extern const char *bios_name;
+
+extern const char *qemu_name;
+extern uint8_t qemu_uuid[];
+int qemu_uuid_parse(const char *str, uint8_t *uuid);
+#define UUID_FMT "%02hhx%02hhx%02hhx%02hhx-%02hhx%02hhx-%02hhx%02hhx-%02hhx%02hhx-%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx"
+
+bool runstate_check(RunState state);
+void runstate_set(RunState new_state);
+int runstate_is_running(void);
+bool runstate_needs_reset(void);
+typedef struct vm_change_state_entry VMChangeStateEntry;
+typedef void VMChangeStateHandler(void *opaque, int running, RunState state);
+
+VMChangeStateEntry *qemu_add_vm_change_state_handler(VMChangeStateHandler *cb,
+ void *opaque);
+void qemu_del_vm_change_state_handler(VMChangeStateEntry *e);
+void vm_state_notify(int running, RunState state);
+
+#define VMRESET_SILENT false
+#define VMRESET_REPORT true
+
+void vm_start(void);
+int vm_stop(RunState state);
+int vm_stop_force_state(RunState state);
+
+typedef enum WakeupReason {
+ QEMU_WAKEUP_REASON_OTHER = 0,
+ QEMU_WAKEUP_REASON_RTC,
+ QEMU_WAKEUP_REASON_PMTIMER,
+} WakeupReason;
+
+void qemu_system_reset_request(void);
+void qemu_system_suspend_request(void);
+void qemu_register_suspend_notifier(Notifier *notifier);
+void qemu_system_wakeup_request(WakeupReason reason);
+void qemu_system_wakeup_enable(WakeupReason reason, bool enabled);
+void qemu_register_wakeup_notifier(Notifier *notifier);
+void qemu_system_shutdown_request(void);
+void qemu_system_powerdown_request(void);
+void qemu_register_powerdown_notifier(Notifier *notifier);
+void qemu_system_debug_request(void);
+void qemu_system_vmstop_request(RunState reason);
+int qemu_shutdown_requested_get(void);
+int qemu_reset_requested_get(void);
+void qemu_system_killed(int signal, pid_t pid);
+void qemu_devices_reset(void);
+void qemu_system_reset(bool report);
+
+void qemu_add_exit_notifier(Notifier *notify);
+void qemu_remove_exit_notifier(Notifier *notify);
+
+void qemu_add_machine_init_done_notifier(Notifier *notify);
+
+void do_savevm(Monitor *mon, const QDict *qdict);
+int load_vmstate(const char *name);
+void do_delvm(Monitor *mon, const QDict *qdict);
+void do_info_snapshots(Monitor *mon, const QDict *qdict);
+
+void qemu_announce_self(void);
+
+bool qemu_savevm_state_blocked(Error **errp);
+void qemu_savevm_state_begin(QEMUFile *f,
+ const MigrationParams *params);
+int qemu_savevm_state_iterate(QEMUFile *f);
+void qemu_savevm_state_complete(QEMUFile *f);
+void qemu_savevm_state_cancel(void);
+uint64_t qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size);
+int qemu_loadvm_state(QEMUFile *f);
+
+/* SLIRP */
+void do_info_slirp(Monitor *mon);
+
+typedef enum DisplayType
+{
+ DT_DEFAULT,
+ DT_CURSES,
+ DT_SDL,
+ DT_GTK,
+ DT_NOGRAPHIC,
+ DT_NONE,
+} DisplayType;
+
+extern int autostart;
+
+typedef enum {
+ VGA_NONE, VGA_STD, VGA_CIRRUS, VGA_VMWARE, VGA_XENFB, VGA_QXL,
+} VGAInterfaceType;
+
+extern int vga_interface_type;
+#define xenfb_enabled (vga_interface_type == VGA_XENFB)
+#define qxl_enabled (vga_interface_type == VGA_QXL)
+
+extern int graphic_width;
+extern int graphic_height;
+extern int graphic_depth;
+extern DisplayType display_type;
+extern const char *keyboard_layout;
+extern int win2k_install_hack;
+extern int alt_grab;
+extern int ctrl_grab;
+extern int smp_cpus;
+extern int max_cpus;
+extern int cursor_hide;
+extern int graphic_rotate;
+extern int no_quit;
+extern int no_shutdown;
+extern int semihosting_enabled;
+extern int old_param;
+extern int boot_menu;
+extern uint8_t *boot_splash_filedata;
+extern size_t boot_splash_filedata_size;
+extern uint8_t qemu_extra_params_fw[2];
+extern QEMUClock *rtc_clock;
+
+#define MAX_NODES 64
+#define MAX_CPUMASK_BITS 255
+extern int nb_numa_nodes;
+extern uint64_t node_mem[MAX_NODES];
+extern unsigned long *node_cpumask[MAX_NODES];
+
+#define MAX_OPTION_ROMS 16
+typedef struct QEMUOptionRom {
+ const char *name;
+ int32_t bootindex;
+} QEMUOptionRom;
+extern QEMUOptionRom option_rom[MAX_OPTION_ROMS];
+extern int nb_option_roms;
+
+#define MAX_PROM_ENVS 128
+extern const char *prom_envs[MAX_PROM_ENVS];
+extern unsigned int nb_prom_envs;
+
+/* pci-hotplug */
+void pci_device_hot_add(Monitor *mon, const QDict *qdict);
+int pci_drive_hot_add(Monitor *mon, const QDict *qdict, DriveInfo *dinfo);
+void do_pci_device_hot_remove(Monitor *mon, const QDict *qdict);
+
+/* generic hotplug */
+void drive_hot_add(Monitor *mon, const QDict *qdict);
+
+/* CPU hotplug */
+void qemu_register_cpu_added_notifier(Notifier *notifier);
+
+/* pcie aer error injection */
+void pcie_aer_inject_error_print(Monitor *mon, const QObject *data);
+int do_pcie_aer_inject_error(Monitor *mon,
+ const QDict *qdict, QObject **ret_data);
+
+/* serial ports */
+
+#define MAX_SERIAL_PORTS 4
+
+extern CharDriverState *serial_hds[MAX_SERIAL_PORTS];
+
+/* parallel ports */
+
+#define MAX_PARALLEL_PORTS 3
+
+extern CharDriverState *parallel_hds[MAX_PARALLEL_PORTS];
+
+void do_usb_add(Monitor *mon, const QDict *qdict);
+void do_usb_del(Monitor *mon, const QDict *qdict);
+void usb_info(Monitor *mon, const QDict *qdict);
+
+void rtc_change_mon_event(struct tm *tm);
+
+void add_boot_device_path(int32_t bootindex, DeviceState *dev,
+ const char *suffix);
+char *get_boot_devices_list(size_t *size);
+
+DeviceState *get_boot_device(uint32_t position);
+
+QemuOpts *qemu_get_machine_opts(void);
+
+bool usb_enabled(bool default_usb);
+
+extern QemuOptsList qemu_drive_opts;
+extern QemuOptsList qemu_chardev_opts;
+extern QemuOptsList qemu_device_opts;
+extern QemuOptsList qemu_netdev_opts;
+extern QemuOptsList qemu_net_opts;
+extern QemuOptsList qemu_global_opts;
+extern QemuOptsList qemu_mon_opts;
+
+#endif
diff --git a/contrib/qemu/include/trace.h b/contrib/qemu/include/trace.h
new file mode 100644
index 000000000..c15f49812
--- /dev/null
+++ b/contrib/qemu/include/trace.h
@@ -0,0 +1,6 @@
+#ifndef TRACE_H
+#define TRACE_H
+
+#include "trace/generated-tracers.h"
+
+#endif /* TRACE_H */
diff --git a/contrib/qemu/nop-symbols.c b/contrib/qemu/nop-symbols.c
new file mode 100644
index 000000000..ae93a3d3b
--- /dev/null
+++ b/contrib/qemu/nop-symbols.c
@@ -0,0 +1,12 @@
+int notifier_with_return_list_init () { return 0; }
+int notifier_with_return_list_notify () { return 0; }
+int notifier_with_return_list_add () { return 0; }
+int notifier_list_init () { return 0; }
+int notifier_list_notify () { return 0; }
+int notifier_list_add () { return 0; }
+int monitor_protocol_event () { return 0; }
+int block_job_cancel_sync () { return 0; }
+int block_job_iostatus_reset () { return 0; }
+int vm_stop () { return 0; }
+int qemu_get_aio_context () { return 0; }
+
diff --git a/contrib/qemu/qapi-types.h b/contrib/qemu/qapi-types.h
new file mode 100644
index 000000000..082b06d1c
--- /dev/null
+++ b/contrib/qemu/qapi-types.h
@@ -0,0 +1,2746 @@
+/* AUTOMATICALLY GENERATED, DO NOT MODIFY */
+
+/*
+ * schema-defined QAPI types
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#ifndef QAPI_TYPES_H
+#define QAPI_TYPES_H
+
+#include <stdbool.h>
+#include <stdint.h>
+
+
+#ifndef QAPI_TYPES_BUILTIN_STRUCT_DECL_H
+#define QAPI_TYPES_BUILTIN_STRUCT_DECL_H
+
+
+typedef struct strList
+{
+ union {
+ char * value;
+ uint64_t padding;
+ };
+ struct strList *next;
+} strList;
+
+typedef struct intList
+{
+ union {
+ int64_t value;
+ uint64_t padding;
+ };
+ struct intList *next;
+} intList;
+
+typedef struct numberList
+{
+ union {
+ double value;
+ uint64_t padding;
+ };
+ struct numberList *next;
+} numberList;
+
+typedef struct boolList
+{
+ union {
+ bool value;
+ uint64_t padding;
+ };
+ struct boolList *next;
+} boolList;
+
+typedef struct int8List
+{
+ union {
+ int8_t value;
+ uint64_t padding;
+ };
+ struct int8List *next;
+} int8List;
+
+typedef struct int16List
+{
+ union {
+ int16_t value;
+ uint64_t padding;
+ };
+ struct int16List *next;
+} int16List;
+
+typedef struct int32List
+{
+ union {
+ int32_t value;
+ uint64_t padding;
+ };
+ struct int32List *next;
+} int32List;
+
+typedef struct int64List
+{
+ union {
+ int64_t value;
+ uint64_t padding;
+ };
+ struct int64List *next;
+} int64List;
+
+typedef struct uint8List
+{
+ union {
+ uint8_t value;
+ uint64_t padding;
+ };
+ struct uint8List *next;
+} uint8List;
+
+typedef struct uint16List
+{
+ union {
+ uint16_t value;
+ uint64_t padding;
+ };
+ struct uint16List *next;
+} uint16List;
+
+typedef struct uint32List
+{
+ union {
+ uint32_t value;
+ uint64_t padding;
+ };
+ struct uint32List *next;
+} uint32List;
+
+typedef struct uint64List
+{
+ union {
+ uint64_t value;
+ uint64_t padding;
+ };
+ struct uint64List *next;
+} uint64List;
+
+#endif /* QAPI_TYPES_BUILTIN_STRUCT_DECL_H */
+
+
+extern const char *ErrorClass_lookup[];
+typedef enum ErrorClass
+{
+ ERROR_CLASS_GENERIC_ERROR = 0,
+ ERROR_CLASS_COMMAND_NOT_FOUND = 1,
+ ERROR_CLASS_DEVICE_ENCRYPTED = 2,
+ ERROR_CLASS_DEVICE_NOT_ACTIVE = 3,
+ ERROR_CLASS_DEVICE_NOT_FOUND = 4,
+ ERROR_CLASS_K_V_M_MISSING_CAP = 5,
+ ERROR_CLASS_MAX = 6,
+} ErrorClass;
+
+typedef struct ErrorClassList
+{
+ ErrorClass value;
+ struct ErrorClassList *next;
+} ErrorClassList;
+
+
+typedef struct NameInfo NameInfo;
+
+typedef struct NameInfoList
+{
+ union {
+ NameInfo *value;
+ uint64_t padding;
+ };
+ struct NameInfoList *next;
+} NameInfoList;
+
+
+typedef struct VersionInfo VersionInfo;
+
+typedef struct VersionInfoList
+{
+ union {
+ VersionInfo *value;
+ uint64_t padding;
+ };
+ struct VersionInfoList *next;
+} VersionInfoList;
+
+
+typedef struct KvmInfo KvmInfo;
+
+typedef struct KvmInfoList
+{
+ union {
+ KvmInfo *value;
+ uint64_t padding;
+ };
+ struct KvmInfoList *next;
+} KvmInfoList;
+
+extern const char *RunState_lookup[];
+typedef enum RunState
+{
+ RUN_STATE_DEBUG = 0,
+ RUN_STATE_INMIGRATE = 1,
+ RUN_STATE_INTERNAL_ERROR = 2,
+ RUN_STATE_IO_ERROR = 3,
+ RUN_STATE_PAUSED = 4,
+ RUN_STATE_POSTMIGRATE = 5,
+ RUN_STATE_PRELAUNCH = 6,
+ RUN_STATE_FINISH_MIGRATE = 7,
+ RUN_STATE_RESTORE_VM = 8,
+ RUN_STATE_RUNNING = 9,
+ RUN_STATE_SAVE_VM = 10,
+ RUN_STATE_SHUTDOWN = 11,
+ RUN_STATE_SUSPENDED = 12,
+ RUN_STATE_WATCHDOG = 13,
+ RUN_STATE_GUEST_PANICKED = 14,
+ RUN_STATE_MAX = 15,
+} RunState;
+
+typedef struct RunStateList
+{
+ RunState value;
+ struct RunStateList *next;
+} RunStateList;
+
+
+typedef struct SnapshotInfo SnapshotInfo;
+
+typedef struct SnapshotInfoList
+{
+ union {
+ SnapshotInfo *value;
+ uint64_t padding;
+ };
+ struct SnapshotInfoList *next;
+} SnapshotInfoList;
+
+
+typedef struct ImageInfo ImageInfo;
+
+typedef struct ImageInfoList
+{
+ union {
+ ImageInfo *value;
+ uint64_t padding;
+ };
+ struct ImageInfoList *next;
+} ImageInfoList;
+
+
+typedef struct ImageCheck ImageCheck;
+
+typedef struct ImageCheckList
+{
+ union {
+ ImageCheck *value;
+ uint64_t padding;
+ };
+ struct ImageCheckList *next;
+} ImageCheckList;
+
+
+typedef struct StatusInfo StatusInfo;
+
+typedef struct StatusInfoList
+{
+ union {
+ StatusInfo *value;
+ uint64_t padding;
+ };
+ struct StatusInfoList *next;
+} StatusInfoList;
+
+
+typedef struct UuidInfo UuidInfo;
+
+typedef struct UuidInfoList
+{
+ union {
+ UuidInfo *value;
+ uint64_t padding;
+ };
+ struct UuidInfoList *next;
+} UuidInfoList;
+
+
+typedef struct ChardevInfo ChardevInfo;
+
+typedef struct ChardevInfoList
+{
+ union {
+ ChardevInfo *value;
+ uint64_t padding;
+ };
+ struct ChardevInfoList *next;
+} ChardevInfoList;
+
+extern const char *DataFormat_lookup[];
+typedef enum DataFormat
+{
+ DATA_FORMAT_UTF8 = 0,
+ DATA_FORMAT_BASE64 = 1,
+ DATA_FORMAT_MAX = 2,
+} DataFormat;
+
+typedef struct DataFormatList
+{
+ DataFormat value;
+ struct DataFormatList *next;
+} DataFormatList;
+
+
+typedef struct CommandInfo CommandInfo;
+
+typedef struct CommandInfoList
+{
+ union {
+ CommandInfo *value;
+ uint64_t padding;
+ };
+ struct CommandInfoList *next;
+} CommandInfoList;
+
+
+typedef struct EventInfo EventInfo;
+
+typedef struct EventInfoList
+{
+ union {
+ EventInfo *value;
+ uint64_t padding;
+ };
+ struct EventInfoList *next;
+} EventInfoList;
+
+
+typedef struct MigrationStats MigrationStats;
+
+typedef struct MigrationStatsList
+{
+ union {
+ MigrationStats *value;
+ uint64_t padding;
+ };
+ struct MigrationStatsList *next;
+} MigrationStatsList;
+
+
+typedef struct XBZRLECacheStats XBZRLECacheStats;
+
+typedef struct XBZRLECacheStatsList
+{
+ union {
+ XBZRLECacheStats *value;
+ uint64_t padding;
+ };
+ struct XBZRLECacheStatsList *next;
+} XBZRLECacheStatsList;
+
+
+typedef struct MigrationInfo MigrationInfo;
+
+typedef struct MigrationInfoList
+{
+ union {
+ MigrationInfo *value;
+ uint64_t padding;
+ };
+ struct MigrationInfoList *next;
+} MigrationInfoList;
+
+extern const char *MigrationCapability_lookup[];
+typedef enum MigrationCapability
+{
+ MIGRATION_CAPABILITY_XBZRLE = 0,
+ MIGRATION_CAPABILITY_X_RDMA_PIN_ALL = 1,
+ MIGRATION_CAPABILITY_AUTO_CONVERGE = 2,
+ MIGRATION_CAPABILITY_MAX = 3,
+} MigrationCapability;
+
+typedef struct MigrationCapabilityList
+{
+ MigrationCapability value;
+ struct MigrationCapabilityList *next;
+} MigrationCapabilityList;
+
+
+typedef struct MigrationCapabilityStatus MigrationCapabilityStatus;
+
+typedef struct MigrationCapabilityStatusList
+{
+ union {
+ MigrationCapabilityStatus *value;
+ uint64_t padding;
+ };
+ struct MigrationCapabilityStatusList *next;
+} MigrationCapabilityStatusList;
+
+
+typedef struct MouseInfo MouseInfo;
+
+typedef struct MouseInfoList
+{
+ union {
+ MouseInfo *value;
+ uint64_t padding;
+ };
+ struct MouseInfoList *next;
+} MouseInfoList;
+
+
+typedef struct CpuInfo CpuInfo;
+
+typedef struct CpuInfoList
+{
+ union {
+ CpuInfo *value;
+ uint64_t padding;
+ };
+ struct CpuInfoList *next;
+} CpuInfoList;
+
+
+typedef struct BlockDeviceInfo BlockDeviceInfo;
+
+typedef struct BlockDeviceInfoList
+{
+ union {
+ BlockDeviceInfo *value;
+ uint64_t padding;
+ };
+ struct BlockDeviceInfoList *next;
+} BlockDeviceInfoList;
+
+extern const char *BlockDeviceIoStatus_lookup[];
+typedef enum BlockDeviceIoStatus
+{
+ BLOCK_DEVICE_IO_STATUS_OK = 0,
+ BLOCK_DEVICE_IO_STATUS_FAILED = 1,
+ BLOCK_DEVICE_IO_STATUS_NOSPACE = 2,
+ BLOCK_DEVICE_IO_STATUS_MAX = 3,
+} BlockDeviceIoStatus;
+
+typedef struct BlockDeviceIoStatusList
+{
+ BlockDeviceIoStatus value;
+ struct BlockDeviceIoStatusList *next;
+} BlockDeviceIoStatusList;
+
+
+typedef struct BlockDirtyInfo BlockDirtyInfo;
+
+typedef struct BlockDirtyInfoList
+{
+ union {
+ BlockDirtyInfo *value;
+ uint64_t padding;
+ };
+ struct BlockDirtyInfoList *next;
+} BlockDirtyInfoList;
+
+
+typedef struct BlockInfo BlockInfo;
+
+typedef struct BlockInfoList
+{
+ union {
+ BlockInfo *value;
+ uint64_t padding;
+ };
+ struct BlockInfoList *next;
+} BlockInfoList;
+
+
+typedef struct BlockDeviceStats BlockDeviceStats;
+
+typedef struct BlockDeviceStatsList
+{
+ union {
+ BlockDeviceStats *value;
+ uint64_t padding;
+ };
+ struct BlockDeviceStatsList *next;
+} BlockDeviceStatsList;
+
+
+typedef struct BlockStats BlockStats;
+
+typedef struct BlockStatsList
+{
+ union {
+ BlockStats *value;
+ uint64_t padding;
+ };
+ struct BlockStatsList *next;
+} BlockStatsList;
+
+
+typedef struct VncClientInfo VncClientInfo;
+
+typedef struct VncClientInfoList
+{
+ union {
+ VncClientInfo *value;
+ uint64_t padding;
+ };
+ struct VncClientInfoList *next;
+} VncClientInfoList;
+
+
+typedef struct VncInfo VncInfo;
+
+typedef struct VncInfoList
+{
+ union {
+ VncInfo *value;
+ uint64_t padding;
+ };
+ struct VncInfoList *next;
+} VncInfoList;
+
+
+typedef struct SpiceChannel SpiceChannel;
+
+typedef struct SpiceChannelList
+{
+ union {
+ SpiceChannel *value;
+ uint64_t padding;
+ };
+ struct SpiceChannelList *next;
+} SpiceChannelList;
+
+extern const char *SpiceQueryMouseMode_lookup[];
+typedef enum SpiceQueryMouseMode
+{
+ SPICE_QUERY_MOUSE_MODE_CLIENT = 0,
+ SPICE_QUERY_MOUSE_MODE_SERVER = 1,
+ SPICE_QUERY_MOUSE_MODE_UNKNOWN = 2,
+ SPICE_QUERY_MOUSE_MODE_MAX = 3,
+} SpiceQueryMouseMode;
+
+typedef struct SpiceQueryMouseModeList
+{
+ SpiceQueryMouseMode value;
+ struct SpiceQueryMouseModeList *next;
+} SpiceQueryMouseModeList;
+
+
+typedef struct SpiceInfo SpiceInfo;
+
+typedef struct SpiceInfoList
+{
+ union {
+ SpiceInfo *value;
+ uint64_t padding;
+ };
+ struct SpiceInfoList *next;
+} SpiceInfoList;
+
+
+typedef struct BalloonInfo BalloonInfo;
+
+typedef struct BalloonInfoList
+{
+ union {
+ BalloonInfo *value;
+ uint64_t padding;
+ };
+ struct BalloonInfoList *next;
+} BalloonInfoList;
+
+
+typedef struct PciMemoryRange PciMemoryRange;
+
+typedef struct PciMemoryRangeList
+{
+ union {
+ PciMemoryRange *value;
+ uint64_t padding;
+ };
+ struct PciMemoryRangeList *next;
+} PciMemoryRangeList;
+
+
+typedef struct PciMemoryRegion PciMemoryRegion;
+
+typedef struct PciMemoryRegionList
+{
+ union {
+ PciMemoryRegion *value;
+ uint64_t padding;
+ };
+ struct PciMemoryRegionList *next;
+} PciMemoryRegionList;
+
+
+typedef struct PciBridgeInfo PciBridgeInfo;
+
+typedef struct PciBridgeInfoList
+{
+ union {
+ PciBridgeInfo *value;
+ uint64_t padding;
+ };
+ struct PciBridgeInfoList *next;
+} PciBridgeInfoList;
+
+
+typedef struct PciDeviceInfo PciDeviceInfo;
+
+typedef struct PciDeviceInfoList
+{
+ union {
+ PciDeviceInfo *value;
+ uint64_t padding;
+ };
+ struct PciDeviceInfoList *next;
+} PciDeviceInfoList;
+
+
+typedef struct PciInfo PciInfo;
+
+typedef struct PciInfoList
+{
+ union {
+ PciInfo *value;
+ uint64_t padding;
+ };
+ struct PciInfoList *next;
+} PciInfoList;
+
+extern const char *BlockdevOnError_lookup[];
+typedef enum BlockdevOnError
+{
+ BLOCKDEV_ON_ERROR_REPORT = 0,
+ BLOCKDEV_ON_ERROR_IGNORE = 1,
+ BLOCKDEV_ON_ERROR_ENOSPC = 2,
+ BLOCKDEV_ON_ERROR_STOP = 3,
+ BLOCKDEV_ON_ERROR_MAX = 4,
+} BlockdevOnError;
+
+typedef struct BlockdevOnErrorList
+{
+ BlockdevOnError value;
+ struct BlockdevOnErrorList *next;
+} BlockdevOnErrorList;
+
+extern const char *MirrorSyncMode_lookup[];
+typedef enum MirrorSyncMode
+{
+ MIRROR_SYNC_MODE_TOP = 0,
+ MIRROR_SYNC_MODE_FULL = 1,
+ MIRROR_SYNC_MODE_NONE = 2,
+ MIRROR_SYNC_MODE_MAX = 3,
+} MirrorSyncMode;
+
+typedef struct MirrorSyncModeList
+{
+ MirrorSyncMode value;
+ struct MirrorSyncModeList *next;
+} MirrorSyncModeList;
+
+
+typedef struct BlockJobInfo BlockJobInfo;
+
+typedef struct BlockJobInfoList
+{
+ union {
+ BlockJobInfo *value;
+ uint64_t padding;
+ };
+ struct BlockJobInfoList *next;
+} BlockJobInfoList;
+
+extern const char *NewImageMode_lookup[];
+typedef enum NewImageMode
+{
+ NEW_IMAGE_MODE_EXISTING = 0,
+ NEW_IMAGE_MODE_ABSOLUTE_PATHS = 1,
+ NEW_IMAGE_MODE_MAX = 2,
+} NewImageMode;
+
+typedef struct NewImageModeList
+{
+ NewImageMode value;
+ struct NewImageModeList *next;
+} NewImageModeList;
+
+
+typedef struct BlockdevSnapshot BlockdevSnapshot;
+
+typedef struct BlockdevSnapshotList
+{
+ union {
+ BlockdevSnapshot *value;
+ uint64_t padding;
+ };
+ struct BlockdevSnapshotList *next;
+} BlockdevSnapshotList;
+
+
+typedef struct DriveBackup DriveBackup;
+
+typedef struct DriveBackupList
+{
+ union {
+ DriveBackup *value;
+ uint64_t padding;
+ };
+ struct DriveBackupList *next;
+} DriveBackupList;
+
+
+typedef struct Abort Abort;
+
+typedef struct AbortList
+{
+ union {
+ Abort *value;
+ uint64_t padding;
+ };
+ struct AbortList *next;
+} AbortList;
+
+
+typedef struct TransactionAction TransactionAction;
+
+typedef struct TransactionActionList
+{
+ union {
+ TransactionAction *value;
+ uint64_t padding;
+ };
+ struct TransactionActionList *next;
+} TransactionActionList;
+
+extern const char *TransactionActionKind_lookup[];
+typedef enum TransactionActionKind
+{
+ TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC = 0,
+ TRANSACTION_ACTION_KIND_DRIVE_BACKUP = 1,
+ TRANSACTION_ACTION_KIND_ABORT = 2,
+ TRANSACTION_ACTION_KIND_MAX = 3,
+} TransactionActionKind;
+
+
+typedef struct ObjectPropertyInfo ObjectPropertyInfo;
+
+typedef struct ObjectPropertyInfoList
+{
+ union {
+ ObjectPropertyInfo *value;
+ uint64_t padding;
+ };
+ struct ObjectPropertyInfoList *next;
+} ObjectPropertyInfoList;
+
+
+typedef struct ObjectTypeInfo ObjectTypeInfo;
+
+typedef struct ObjectTypeInfoList
+{
+ union {
+ ObjectTypeInfo *value;
+ uint64_t padding;
+ };
+ struct ObjectTypeInfoList *next;
+} ObjectTypeInfoList;
+
+
+typedef struct DevicePropertyInfo DevicePropertyInfo;
+
+typedef struct DevicePropertyInfoList
+{
+ union {
+ DevicePropertyInfo *value;
+ uint64_t padding;
+ };
+ struct DevicePropertyInfoList *next;
+} DevicePropertyInfoList;
+
+
+typedef struct NetdevNoneOptions NetdevNoneOptions;
+
+typedef struct NetdevNoneOptionsList
+{
+ union {
+ NetdevNoneOptions *value;
+ uint64_t padding;
+ };
+ struct NetdevNoneOptionsList *next;
+} NetdevNoneOptionsList;
+
+
+typedef struct NetLegacyNicOptions NetLegacyNicOptions;
+
+typedef struct NetLegacyNicOptionsList
+{
+ union {
+ NetLegacyNicOptions *value;
+ uint64_t padding;
+ };
+ struct NetLegacyNicOptionsList *next;
+} NetLegacyNicOptionsList;
+
+
+typedef struct String String;
+
+typedef struct StringList
+{
+ union {
+ String *value;
+ uint64_t padding;
+ };
+ struct StringList *next;
+} StringList;
+
+
+typedef struct NetdevUserOptions NetdevUserOptions;
+
+typedef struct NetdevUserOptionsList
+{
+ union {
+ NetdevUserOptions *value;
+ uint64_t padding;
+ };
+ struct NetdevUserOptionsList *next;
+} NetdevUserOptionsList;
+
+
+typedef struct NetdevTapOptions NetdevTapOptions;
+
+typedef struct NetdevTapOptionsList
+{
+ union {
+ NetdevTapOptions *value;
+ uint64_t padding;
+ };
+ struct NetdevTapOptionsList *next;
+} NetdevTapOptionsList;
+
+
+typedef struct NetdevSocketOptions NetdevSocketOptions;
+
+typedef struct NetdevSocketOptionsList
+{
+ union {
+ NetdevSocketOptions *value;
+ uint64_t padding;
+ };
+ struct NetdevSocketOptionsList *next;
+} NetdevSocketOptionsList;
+
+
+typedef struct NetdevVdeOptions NetdevVdeOptions;
+
+typedef struct NetdevVdeOptionsList
+{
+ union {
+ NetdevVdeOptions *value;
+ uint64_t padding;
+ };
+ struct NetdevVdeOptionsList *next;
+} NetdevVdeOptionsList;
+
+
+typedef struct NetdevDumpOptions NetdevDumpOptions;
+
+typedef struct NetdevDumpOptionsList
+{
+ union {
+ NetdevDumpOptions *value;
+ uint64_t padding;
+ };
+ struct NetdevDumpOptionsList *next;
+} NetdevDumpOptionsList;
+
+
+typedef struct NetdevBridgeOptions NetdevBridgeOptions;
+
+typedef struct NetdevBridgeOptionsList
+{
+ union {
+ NetdevBridgeOptions *value;
+ uint64_t padding;
+ };
+ struct NetdevBridgeOptionsList *next;
+} NetdevBridgeOptionsList;
+
+
+typedef struct NetdevHubPortOptions NetdevHubPortOptions;
+
+typedef struct NetdevHubPortOptionsList
+{
+ union {
+ NetdevHubPortOptions *value;
+ uint64_t padding;
+ };
+ struct NetdevHubPortOptionsList *next;
+} NetdevHubPortOptionsList;
+
+
+typedef struct NetClientOptions NetClientOptions;
+
+typedef struct NetClientOptionsList
+{
+ union {
+ NetClientOptions *value;
+ uint64_t padding;
+ };
+ struct NetClientOptionsList *next;
+} NetClientOptionsList;
+
+extern const char *NetClientOptionsKind_lookup[];
+typedef enum NetClientOptionsKind
+{
+ NET_CLIENT_OPTIONS_KIND_NONE = 0,
+ NET_CLIENT_OPTIONS_KIND_NIC = 1,
+ NET_CLIENT_OPTIONS_KIND_USER = 2,
+ NET_CLIENT_OPTIONS_KIND_TAP = 3,
+ NET_CLIENT_OPTIONS_KIND_SOCKET = 4,
+ NET_CLIENT_OPTIONS_KIND_VDE = 5,
+ NET_CLIENT_OPTIONS_KIND_DUMP = 6,
+ NET_CLIENT_OPTIONS_KIND_BRIDGE = 7,
+ NET_CLIENT_OPTIONS_KIND_HUBPORT = 8,
+ NET_CLIENT_OPTIONS_KIND_MAX = 9,
+} NetClientOptionsKind;
+
+
+typedef struct NetLegacy NetLegacy;
+
+typedef struct NetLegacyList
+{
+ union {
+ NetLegacy *value;
+ uint64_t padding;
+ };
+ struct NetLegacyList *next;
+} NetLegacyList;
+
+
+typedef struct Netdev Netdev;
+
+typedef struct NetdevList
+{
+ union {
+ Netdev *value;
+ uint64_t padding;
+ };
+ struct NetdevList *next;
+} NetdevList;
+
+
+typedef struct InetSocketAddress InetSocketAddress;
+
+typedef struct InetSocketAddressList
+{
+ union {
+ InetSocketAddress *value;
+ uint64_t padding;
+ };
+ struct InetSocketAddressList *next;
+} InetSocketAddressList;
+
+
+typedef struct UnixSocketAddress UnixSocketAddress;
+
+typedef struct UnixSocketAddressList
+{
+ union {
+ UnixSocketAddress *value;
+ uint64_t padding;
+ };
+ struct UnixSocketAddressList *next;
+} UnixSocketAddressList;
+
+
+typedef struct SocketAddress SocketAddress;
+
+typedef struct SocketAddressList
+{
+ union {
+ SocketAddress *value;
+ uint64_t padding;
+ };
+ struct SocketAddressList *next;
+} SocketAddressList;
+
+extern const char *SocketAddressKind_lookup[];
+typedef enum SocketAddressKind
+{
+ SOCKET_ADDRESS_KIND_INET = 0,
+ SOCKET_ADDRESS_KIND_UNIX = 1,
+ SOCKET_ADDRESS_KIND_FD = 2,
+ SOCKET_ADDRESS_KIND_MAX = 3,
+} SocketAddressKind;
+
+
+typedef struct MachineInfo MachineInfo;
+
+typedef struct MachineInfoList
+{
+ union {
+ MachineInfo *value;
+ uint64_t padding;
+ };
+ struct MachineInfoList *next;
+} MachineInfoList;
+
+
+typedef struct CpuDefinitionInfo CpuDefinitionInfo;
+
+typedef struct CpuDefinitionInfoList
+{
+ union {
+ CpuDefinitionInfo *value;
+ uint64_t padding;
+ };
+ struct CpuDefinitionInfoList *next;
+} CpuDefinitionInfoList;
+
+
+typedef struct AddfdInfo AddfdInfo;
+
+typedef struct AddfdInfoList
+{
+ union {
+ AddfdInfo *value;
+ uint64_t padding;
+ };
+ struct AddfdInfoList *next;
+} AddfdInfoList;
+
+
+typedef struct FdsetFdInfo FdsetFdInfo;
+
+typedef struct FdsetFdInfoList
+{
+ union {
+ FdsetFdInfo *value;
+ uint64_t padding;
+ };
+ struct FdsetFdInfoList *next;
+} FdsetFdInfoList;
+
+
+typedef struct FdsetInfo FdsetInfo;
+
+typedef struct FdsetInfoList
+{
+ union {
+ FdsetInfo *value;
+ uint64_t padding;
+ };
+ struct FdsetInfoList *next;
+} FdsetInfoList;
+
+
+typedef struct TargetInfo TargetInfo;
+
+typedef struct TargetInfoList
+{
+ union {
+ TargetInfo *value;
+ uint64_t padding;
+ };
+ struct TargetInfoList *next;
+} TargetInfoList;
+
+extern const char *QKeyCode_lookup[];
+typedef enum QKeyCode
+{
+ Q_KEY_CODE_SHIFT = 0,
+ Q_KEY_CODE_SHIFT_R = 1,
+ Q_KEY_CODE_ALT = 2,
+ Q_KEY_CODE_ALT_R = 3,
+ Q_KEY_CODE_ALTGR = 4,
+ Q_KEY_CODE_ALTGR_R = 5,
+ Q_KEY_CODE_CTRL = 6,
+ Q_KEY_CODE_CTRL_R = 7,
+ Q_KEY_CODE_MENU = 8,
+ Q_KEY_CODE_ESC = 9,
+ Q_KEY_CODE_1 = 10,
+ Q_KEY_CODE_2 = 11,
+ Q_KEY_CODE_3 = 12,
+ Q_KEY_CODE_4 = 13,
+ Q_KEY_CODE_5 = 14,
+ Q_KEY_CODE_6 = 15,
+ Q_KEY_CODE_7 = 16,
+ Q_KEY_CODE_8 = 17,
+ Q_KEY_CODE_9 = 18,
+ Q_KEY_CODE_0 = 19,
+ Q_KEY_CODE_MINUS = 20,
+ Q_KEY_CODE_EQUAL = 21,
+ Q_KEY_CODE_BACKSPACE = 22,
+ Q_KEY_CODE_TAB = 23,
+ Q_KEY_CODE_Q = 24,
+ Q_KEY_CODE_W = 25,
+ Q_KEY_CODE_E = 26,
+ Q_KEY_CODE_R = 27,
+ Q_KEY_CODE_T = 28,
+ Q_KEY_CODE_Y = 29,
+ Q_KEY_CODE_U = 30,
+ Q_KEY_CODE_I = 31,
+ Q_KEY_CODE_O = 32,
+ Q_KEY_CODE_P = 33,
+ Q_KEY_CODE_BRACKET_LEFT = 34,
+ Q_KEY_CODE_BRACKET_RIGHT = 35,
+ Q_KEY_CODE_RET = 36,
+ Q_KEY_CODE_A = 37,
+ Q_KEY_CODE_S = 38,
+ Q_KEY_CODE_D = 39,
+ Q_KEY_CODE_F = 40,
+ Q_KEY_CODE_G = 41,
+ Q_KEY_CODE_H = 42,
+ Q_KEY_CODE_J = 43,
+ Q_KEY_CODE_K = 44,
+ Q_KEY_CODE_L = 45,
+ Q_KEY_CODE_SEMICOLON = 46,
+ Q_KEY_CODE_APOSTROPHE = 47,
+ Q_KEY_CODE_GRAVE_ACCENT = 48,
+ Q_KEY_CODE_BACKSLASH = 49,
+ Q_KEY_CODE_Z = 50,
+ Q_KEY_CODE_X = 51,
+ Q_KEY_CODE_C = 52,
+ Q_KEY_CODE_V = 53,
+ Q_KEY_CODE_B = 54,
+ Q_KEY_CODE_N = 55,
+ Q_KEY_CODE_M = 56,
+ Q_KEY_CODE_COMMA = 57,
+ Q_KEY_CODE_DOT = 58,
+ Q_KEY_CODE_SLASH = 59,
+ Q_KEY_CODE_ASTERISK = 60,
+ Q_KEY_CODE_SPC = 61,
+ Q_KEY_CODE_CAPS_LOCK = 62,
+ Q_KEY_CODE_F1 = 63,
+ Q_KEY_CODE_F2 = 64,
+ Q_KEY_CODE_F3 = 65,
+ Q_KEY_CODE_F4 = 66,
+ Q_KEY_CODE_F5 = 67,
+ Q_KEY_CODE_F6 = 68,
+ Q_KEY_CODE_F7 = 69,
+ Q_KEY_CODE_F8 = 70,
+ Q_KEY_CODE_F9 = 71,
+ Q_KEY_CODE_F10 = 72,
+ Q_KEY_CODE_NUM_LOCK = 73,
+ Q_KEY_CODE_SCROLL_LOCK = 74,
+ Q_KEY_CODE_KP_DIVIDE = 75,
+ Q_KEY_CODE_KP_MULTIPLY = 76,
+ Q_KEY_CODE_KP_SUBTRACT = 77,
+ Q_KEY_CODE_KP_ADD = 78,
+ Q_KEY_CODE_KP_ENTER = 79,
+ Q_KEY_CODE_KP_DECIMAL = 80,
+ Q_KEY_CODE_SYSRQ = 81,
+ Q_KEY_CODE_KP_0 = 82,
+ Q_KEY_CODE_KP_1 = 83,
+ Q_KEY_CODE_KP_2 = 84,
+ Q_KEY_CODE_KP_3 = 85,
+ Q_KEY_CODE_KP_4 = 86,
+ Q_KEY_CODE_KP_5 = 87,
+ Q_KEY_CODE_KP_6 = 88,
+ Q_KEY_CODE_KP_7 = 89,
+ Q_KEY_CODE_KP_8 = 90,
+ Q_KEY_CODE_KP_9 = 91,
+ Q_KEY_CODE_LESS = 92,
+ Q_KEY_CODE_F11 = 93,
+ Q_KEY_CODE_F12 = 94,
+ Q_KEY_CODE_PRINT = 95,
+ Q_KEY_CODE_HOME = 96,
+ Q_KEY_CODE_PGUP = 97,
+ Q_KEY_CODE_PGDN = 98,
+ Q_KEY_CODE_END = 99,
+ Q_KEY_CODE_LEFT = 100,
+ Q_KEY_CODE_UP = 101,
+ Q_KEY_CODE_DOWN = 102,
+ Q_KEY_CODE_RIGHT = 103,
+ Q_KEY_CODE_INSERT = 104,
+ Q_KEY_CODE_DELETE = 105,
+ Q_KEY_CODE_STOP = 106,
+ Q_KEY_CODE_AGAIN = 107,
+ Q_KEY_CODE_PROPS = 108,
+ Q_KEY_CODE_UNDO = 109,
+ Q_KEY_CODE_FRONT = 110,
+ Q_KEY_CODE_COPY = 111,
+ Q_KEY_CODE_OPEN = 112,
+ Q_KEY_CODE_PASTE = 113,
+ Q_KEY_CODE_FIND = 114,
+ Q_KEY_CODE_CUT = 115,
+ Q_KEY_CODE_LF = 116,
+ Q_KEY_CODE_HELP = 117,
+ Q_KEY_CODE_META_L = 118,
+ Q_KEY_CODE_META_R = 119,
+ Q_KEY_CODE_COMPOSE = 120,
+ Q_KEY_CODE_MAX = 121,
+} QKeyCode;
+
+typedef struct QKeyCodeList
+{
+ QKeyCode value;
+ struct QKeyCodeList *next;
+} QKeyCodeList;
+
+
+typedef struct KeyValue KeyValue;
+
+typedef struct KeyValueList
+{
+ union {
+ KeyValue *value;
+ uint64_t padding;
+ };
+ struct KeyValueList *next;
+} KeyValueList;
+
+extern const char *KeyValueKind_lookup[];
+typedef enum KeyValueKind
+{
+ KEY_VALUE_KIND_NUMBER = 0,
+ KEY_VALUE_KIND_QCODE = 1,
+ KEY_VALUE_KIND_MAX = 2,
+} KeyValueKind;
+
+
+typedef struct ChardevFile ChardevFile;
+
+typedef struct ChardevFileList
+{
+ union {
+ ChardevFile *value;
+ uint64_t padding;
+ };
+ struct ChardevFileList *next;
+} ChardevFileList;
+
+
+typedef struct ChardevHostdev ChardevHostdev;
+
+typedef struct ChardevHostdevList
+{
+ union {
+ ChardevHostdev *value;
+ uint64_t padding;
+ };
+ struct ChardevHostdevList *next;
+} ChardevHostdevList;
+
+
+typedef struct ChardevSocket ChardevSocket;
+
+typedef struct ChardevSocketList
+{
+ union {
+ ChardevSocket *value;
+ uint64_t padding;
+ };
+ struct ChardevSocketList *next;
+} ChardevSocketList;
+
+
+typedef struct ChardevUdp ChardevUdp;
+
+typedef struct ChardevUdpList
+{
+ union {
+ ChardevUdp *value;
+ uint64_t padding;
+ };
+ struct ChardevUdpList *next;
+} ChardevUdpList;
+
+
+typedef struct ChardevMux ChardevMux;
+
+typedef struct ChardevMuxList
+{
+ union {
+ ChardevMux *value;
+ uint64_t padding;
+ };
+ struct ChardevMuxList *next;
+} ChardevMuxList;
+
+
+typedef struct ChardevStdio ChardevStdio;
+
+typedef struct ChardevStdioList
+{
+ union {
+ ChardevStdio *value;
+ uint64_t padding;
+ };
+ struct ChardevStdioList *next;
+} ChardevStdioList;
+
+
+typedef struct ChardevSpiceChannel ChardevSpiceChannel;
+
+typedef struct ChardevSpiceChannelList
+{
+ union {
+ ChardevSpiceChannel *value;
+ uint64_t padding;
+ };
+ struct ChardevSpiceChannelList *next;
+} ChardevSpiceChannelList;
+
+
+typedef struct ChardevSpicePort ChardevSpicePort;
+
+typedef struct ChardevSpicePortList
+{
+ union {
+ ChardevSpicePort *value;
+ uint64_t padding;
+ };
+ struct ChardevSpicePortList *next;
+} ChardevSpicePortList;
+
+
+typedef struct ChardevVC ChardevVC;
+
+typedef struct ChardevVCList
+{
+ union {
+ ChardevVC *value;
+ uint64_t padding;
+ };
+ struct ChardevVCList *next;
+} ChardevVCList;
+
+
+typedef struct ChardevMemory ChardevMemory;
+
+typedef struct ChardevMemoryList
+{
+ union {
+ ChardevMemory *value;
+ uint64_t padding;
+ };
+ struct ChardevMemoryList *next;
+} ChardevMemoryList;
+
+
+typedef struct ChardevDummy ChardevDummy;
+
+typedef struct ChardevDummyList
+{
+ union {
+ ChardevDummy *value;
+ uint64_t padding;
+ };
+ struct ChardevDummyList *next;
+} ChardevDummyList;
+
+
+typedef struct ChardevBackend ChardevBackend;
+
+typedef struct ChardevBackendList
+{
+ union {
+ ChardevBackend *value;
+ uint64_t padding;
+ };
+ struct ChardevBackendList *next;
+} ChardevBackendList;
+
+extern const char *ChardevBackendKind_lookup[];
+typedef enum ChardevBackendKind
+{
+ CHARDEV_BACKEND_KIND_FILE = 0,
+ CHARDEV_BACKEND_KIND_SERIAL = 1,
+ CHARDEV_BACKEND_KIND_PARALLEL = 2,
+ CHARDEV_BACKEND_KIND_PIPE = 3,
+ CHARDEV_BACKEND_KIND_SOCKET = 4,
+ CHARDEV_BACKEND_KIND_UDP = 5,
+ CHARDEV_BACKEND_KIND_PTY = 6,
+ CHARDEV_BACKEND_KIND_NULL = 7,
+ CHARDEV_BACKEND_KIND_MUX = 8,
+ CHARDEV_BACKEND_KIND_MSMOUSE = 9,
+ CHARDEV_BACKEND_KIND_BRAILLE = 10,
+ CHARDEV_BACKEND_KIND_STDIO = 11,
+ CHARDEV_BACKEND_KIND_CONSOLE = 12,
+ CHARDEV_BACKEND_KIND_SPICEVMC = 13,
+ CHARDEV_BACKEND_KIND_SPICEPORT = 14,
+ CHARDEV_BACKEND_KIND_VC = 15,
+ CHARDEV_BACKEND_KIND_MEMORY = 16,
+ CHARDEV_BACKEND_KIND_MAX = 17,
+} ChardevBackendKind;
+
+
+typedef struct ChardevReturn ChardevReturn;
+
+typedef struct ChardevReturnList
+{
+ union {
+ ChardevReturn *value;
+ uint64_t padding;
+ };
+ struct ChardevReturnList *next;
+} ChardevReturnList;
+
+extern const char *TpmModel_lookup[];
+typedef enum TpmModel
+{
+ TPM_MODEL_TPM_TIS = 0,
+ TPM_MODEL_MAX = 1,
+} TpmModel;
+
+typedef struct TpmModelList
+{
+ TpmModel value;
+ struct TpmModelList *next;
+} TpmModelList;
+
+extern const char *TpmType_lookup[];
+typedef enum TpmType
+{
+ TPM_TYPE_PASSTHROUGH = 0,
+ TPM_TYPE_MAX = 1,
+} TpmType;
+
+typedef struct TpmTypeList
+{
+ TpmType value;
+ struct TpmTypeList *next;
+} TpmTypeList;
+
+
+typedef struct TPMPassthroughOptions TPMPassthroughOptions;
+
+typedef struct TPMPassthroughOptionsList
+{
+ union {
+ TPMPassthroughOptions *value;
+ uint64_t padding;
+ };
+ struct TPMPassthroughOptionsList *next;
+} TPMPassthroughOptionsList;
+
+
+typedef struct TpmTypeOptions TpmTypeOptions;
+
+typedef struct TpmTypeOptionsList
+{
+ union {
+ TpmTypeOptions *value;
+ uint64_t padding;
+ };
+ struct TpmTypeOptionsList *next;
+} TpmTypeOptionsList;
+
+extern const char *TpmTypeOptionsKind_lookup[];
+typedef enum TpmTypeOptionsKind
+{
+ TPM_TYPE_OPTIONS_KIND_PASSTHROUGH = 0,
+ TPM_TYPE_OPTIONS_KIND_MAX = 1,
+} TpmTypeOptionsKind;
+
+
+typedef struct TPMInfo TPMInfo;
+
+typedef struct TPMInfoList
+{
+ union {
+ TPMInfo *value;
+ uint64_t padding;
+ };
+ struct TPMInfoList *next;
+} TPMInfoList;
+
+
+typedef struct AcpiTableOptions AcpiTableOptions;
+
+typedef struct AcpiTableOptionsList
+{
+ union {
+ AcpiTableOptions *value;
+ uint64_t padding;
+ };
+ struct AcpiTableOptionsList *next;
+} AcpiTableOptionsList;
+
+extern const char *CommandLineParameterType_lookup[];
+typedef enum CommandLineParameterType
+{
+ COMMAND_LINE_PARAMETER_TYPE_STRING = 0,
+ COMMAND_LINE_PARAMETER_TYPE_BOOLEAN = 1,
+ COMMAND_LINE_PARAMETER_TYPE_NUMBER = 2,
+ COMMAND_LINE_PARAMETER_TYPE_SIZE = 3,
+ COMMAND_LINE_PARAMETER_TYPE_MAX = 4,
+} CommandLineParameterType;
+
+typedef struct CommandLineParameterTypeList
+{
+ CommandLineParameterType value;
+ struct CommandLineParameterTypeList *next;
+} CommandLineParameterTypeList;
+
+
+typedef struct CommandLineParameterInfo CommandLineParameterInfo;
+
+typedef struct CommandLineParameterInfoList
+{
+ union {
+ CommandLineParameterInfo *value;
+ uint64_t padding;
+ };
+ struct CommandLineParameterInfoList *next;
+} CommandLineParameterInfoList;
+
+
+typedef struct CommandLineOptionInfo CommandLineOptionInfo;
+
+typedef struct CommandLineOptionInfoList
+{
+ union {
+ CommandLineOptionInfo *value;
+ uint64_t padding;
+ };
+ struct CommandLineOptionInfoList *next;
+} CommandLineOptionInfoList;
+
+extern const char *X86CPURegister32_lookup[];
+typedef enum X86CPURegister32
+{
+ X86_C_P_U_REGISTER32_EAX = 0,
+ X86_C_P_U_REGISTER32_EBX = 1,
+ X86_C_P_U_REGISTER32_ECX = 2,
+ X86_C_P_U_REGISTER32_EDX = 3,
+ X86_C_P_U_REGISTER32_ESP = 4,
+ X86_C_P_U_REGISTER32_EBP = 5,
+ X86_C_P_U_REGISTER32_ESI = 6,
+ X86_C_P_U_REGISTER32_EDI = 7,
+ X86_C_P_U_REGISTER32_MAX = 8,
+} X86CPURegister32;
+
+typedef struct X86CPURegister32List
+{
+ X86CPURegister32 value;
+ struct X86CPURegister32List *next;
+} X86CPURegister32List;
+
+
+typedef struct X86CPUFeatureWordInfo X86CPUFeatureWordInfo;
+
+typedef struct X86CPUFeatureWordInfoList
+{
+ union {
+ X86CPUFeatureWordInfo *value;
+ uint64_t padding;
+ };
+ struct X86CPUFeatureWordInfoList *next;
+} X86CPUFeatureWordInfoList;
+
+extern const char *RxState_lookup[];
+typedef enum RxState
+{
+ RX_STATE_NORMAL = 0,
+ RX_STATE_NONE = 1,
+ RX_STATE_ALL = 2,
+ RX_STATE_MAX = 3,
+} RxState;
+
+typedef struct RxStateList
+{
+ RxState value;
+ struct RxStateList *next;
+} RxStateList;
+
+
+typedef struct RxFilterInfo RxFilterInfo;
+
+typedef struct RxFilterInfoList
+{
+ union {
+ RxFilterInfo *value;
+ uint64_t padding;
+ };
+ struct RxFilterInfoList *next;
+} RxFilterInfoList;
+
+#ifndef QAPI_TYPES_BUILTIN_CLEANUP_DECL_H
+#define QAPI_TYPES_BUILTIN_CLEANUP_DECL_H
+
+void qapi_free_strList(strList * obj);
+void qapi_free_intList(intList * obj);
+void qapi_free_numberList(numberList * obj);
+void qapi_free_boolList(boolList * obj);
+void qapi_free_int8List(int8List * obj);
+void qapi_free_int16List(int16List * obj);
+void qapi_free_int32List(int32List * obj);
+void qapi_free_int64List(int64List * obj);
+void qapi_free_uint8List(uint8List * obj);
+void qapi_free_uint16List(uint16List * obj);
+void qapi_free_uint32List(uint32List * obj);
+void qapi_free_uint64List(uint64List * obj);
+
+#endif /* QAPI_TYPES_BUILTIN_CLEANUP_DECL_H */
+
+
+void qapi_free_ErrorClassList(ErrorClassList * obj);
+
+struct NameInfo
+{
+ bool has_name;
+ char * name;
+};
+
+void qapi_free_NameInfoList(NameInfoList * obj);
+void qapi_free_NameInfo(NameInfo * obj);
+
+struct VersionInfo
+{
+ struct
+ {
+ int64_t major;
+ int64_t minor;
+ int64_t micro;
+ } qemu;
+ char * package;
+};
+
+void qapi_free_VersionInfoList(VersionInfoList * obj);
+void qapi_free_VersionInfo(VersionInfo * obj);
+
+struct KvmInfo
+{
+ bool enabled;
+ bool present;
+};
+
+void qapi_free_KvmInfoList(KvmInfoList * obj);
+void qapi_free_KvmInfo(KvmInfo * obj);
+
+void qapi_free_RunStateList(RunStateList * obj);
+
+struct SnapshotInfo
+{
+ char * id;
+ char * name;
+ int64_t vm_state_size;
+ int64_t date_sec;
+ int64_t date_nsec;
+ int64_t vm_clock_sec;
+ int64_t vm_clock_nsec;
+};
+
+void qapi_free_SnapshotInfoList(SnapshotInfoList * obj);
+void qapi_free_SnapshotInfo(SnapshotInfo * obj);
+
+struct ImageInfo
+{
+ char * filename;
+ char * format;
+ bool has_dirty_flag;
+ bool dirty_flag;
+ bool has_actual_size;
+ int64_t actual_size;
+ int64_t virtual_size;
+ bool has_cluster_size;
+ int64_t cluster_size;
+ bool has_encrypted;
+ bool encrypted;
+ bool has_backing_filename;
+ char * backing_filename;
+ bool has_full_backing_filename;
+ char * full_backing_filename;
+ bool has_backing_filename_format;
+ char * backing_filename_format;
+ bool has_snapshots;
+ SnapshotInfoList * snapshots;
+ bool has_backing_image;
+ ImageInfo * backing_image;
+};
+
+void qapi_free_ImageInfoList(ImageInfoList * obj);
+void qapi_free_ImageInfo(ImageInfo * obj);
+
+struct ImageCheck
+{
+ char * filename;
+ char * format;
+ int64_t check_errors;
+ bool has_image_end_offset;
+ int64_t image_end_offset;
+ bool has_corruptions;
+ int64_t corruptions;
+ bool has_leaks;
+ int64_t leaks;
+ bool has_corruptions_fixed;
+ int64_t corruptions_fixed;
+ bool has_leaks_fixed;
+ int64_t leaks_fixed;
+ bool has_total_clusters;
+ int64_t total_clusters;
+ bool has_allocated_clusters;
+ int64_t allocated_clusters;
+ bool has_fragmented_clusters;
+ int64_t fragmented_clusters;
+ bool has_compressed_clusters;
+ int64_t compressed_clusters;
+};
+
+void qapi_free_ImageCheckList(ImageCheckList * obj);
+void qapi_free_ImageCheck(ImageCheck * obj);
+
+struct StatusInfo
+{
+ bool running;
+ bool singlestep;
+ RunState status;
+};
+
+void qapi_free_StatusInfoList(StatusInfoList * obj);
+void qapi_free_StatusInfo(StatusInfo * obj);
+
+struct UuidInfo
+{
+ char * UUID;
+};
+
+void qapi_free_UuidInfoList(UuidInfoList * obj);
+void qapi_free_UuidInfo(UuidInfo * obj);
+
+struct ChardevInfo
+{
+ char * label;
+ char * filename;
+};
+
+void qapi_free_ChardevInfoList(ChardevInfoList * obj);
+void qapi_free_ChardevInfo(ChardevInfo * obj);
+
+void qapi_free_DataFormatList(DataFormatList * obj);
+
+struct CommandInfo
+{
+ char * name;
+};
+
+void qapi_free_CommandInfoList(CommandInfoList * obj);
+void qapi_free_CommandInfo(CommandInfo * obj);
+
+struct EventInfo
+{
+ char * name;
+};
+
+void qapi_free_EventInfoList(EventInfoList * obj);
+void qapi_free_EventInfo(EventInfo * obj);
+
+struct MigrationStats
+{
+ int64_t transferred;
+ int64_t remaining;
+ int64_t total;
+ int64_t duplicate;
+ int64_t skipped;
+ int64_t normal;
+ int64_t normal_bytes;
+ int64_t dirty_pages_rate;
+ double mbps;
+};
+
+void qapi_free_MigrationStatsList(MigrationStatsList * obj);
+void qapi_free_MigrationStats(MigrationStats * obj);
+
+struct XBZRLECacheStats
+{
+ int64_t cache_size;
+ int64_t bytes;
+ int64_t pages;
+ int64_t cache_miss;
+ int64_t overflow;
+};
+
+void qapi_free_XBZRLECacheStatsList(XBZRLECacheStatsList * obj);
+void qapi_free_XBZRLECacheStats(XBZRLECacheStats * obj);
+
+struct MigrationInfo
+{
+ bool has_status;
+ char * status;
+ bool has_ram;
+ MigrationStats * ram;
+ bool has_disk;
+ MigrationStats * disk;
+ bool has_xbzrle_cache;
+ XBZRLECacheStats * xbzrle_cache;
+ bool has_total_time;
+ int64_t total_time;
+ bool has_expected_downtime;
+ int64_t expected_downtime;
+ bool has_downtime;
+ int64_t downtime;
+};
+
+void qapi_free_MigrationInfoList(MigrationInfoList * obj);
+void qapi_free_MigrationInfo(MigrationInfo * obj);
+
+void qapi_free_MigrationCapabilityList(MigrationCapabilityList * obj);
+
+struct MigrationCapabilityStatus
+{
+ MigrationCapability capability;
+ bool state;
+};
+
+void qapi_free_MigrationCapabilityStatusList(MigrationCapabilityStatusList * obj);
+void qapi_free_MigrationCapabilityStatus(MigrationCapabilityStatus * obj);
+
+struct MouseInfo
+{
+ char * name;
+ int64_t index;
+ bool current;
+ bool absolute;
+};
+
+void qapi_free_MouseInfoList(MouseInfoList * obj);
+void qapi_free_MouseInfo(MouseInfo * obj);
+
+struct CpuInfo
+{
+ int64_t CPU;
+ bool current;
+ bool halted;
+ bool has_pc;
+ int64_t pc;
+ bool has_nip;
+ int64_t nip;
+ bool has_npc;
+ int64_t npc;
+ bool has_PC;
+ int64_t PC;
+ int64_t thread_id;
+};
+
+void qapi_free_CpuInfoList(CpuInfoList * obj);
+void qapi_free_CpuInfo(CpuInfo * obj);
+
+struct BlockDeviceInfo
+{
+ char * file;
+ bool ro;
+ char * drv;
+ bool has_backing_file;
+ char * backing_file;
+ int64_t backing_file_depth;
+ bool encrypted;
+ bool encryption_key_missing;
+ int64_t bps;
+ int64_t bps_rd;
+ int64_t bps_wr;
+ int64_t iops;
+ int64_t iops_rd;
+ int64_t iops_wr;
+ ImageInfo * image;
+};
+
+void qapi_free_BlockDeviceInfoList(BlockDeviceInfoList * obj);
+void qapi_free_BlockDeviceInfo(BlockDeviceInfo * obj);
+
+void qapi_free_BlockDeviceIoStatusList(BlockDeviceIoStatusList * obj);
+
+struct BlockDirtyInfo
+{
+ int64_t count;
+ int64_t granularity;
+};
+
+void qapi_free_BlockDirtyInfoList(BlockDirtyInfoList * obj);
+void qapi_free_BlockDirtyInfo(BlockDirtyInfo * obj);
+
+struct BlockInfo
+{
+ char * device;
+ char * type;
+ bool removable;
+ bool locked;
+ bool has_inserted;
+ BlockDeviceInfo * inserted;
+ bool has_tray_open;
+ bool tray_open;
+ bool has_io_status;
+ BlockDeviceIoStatus io_status;
+ bool has_dirty;
+ BlockDirtyInfo * dirty;
+};
+
+void qapi_free_BlockInfoList(BlockInfoList * obj);
+void qapi_free_BlockInfo(BlockInfo * obj);
+
+struct BlockDeviceStats
+{
+ int64_t rd_bytes;
+ int64_t wr_bytes;
+ int64_t rd_operations;
+ int64_t wr_operations;
+ int64_t flush_operations;
+ int64_t flush_total_time_ns;
+ int64_t wr_total_time_ns;
+ int64_t rd_total_time_ns;
+ int64_t wr_highest_offset;
+};
+
+void qapi_free_BlockDeviceStatsList(BlockDeviceStatsList * obj);
+void qapi_free_BlockDeviceStats(BlockDeviceStats * obj);
+
+struct BlockStats
+{
+ bool has_device;
+ char * device;
+ BlockDeviceStats * stats;
+ bool has_parent;
+ BlockStats * parent;
+};
+
+void qapi_free_BlockStatsList(BlockStatsList * obj);
+void qapi_free_BlockStats(BlockStats * obj);
+
+struct VncClientInfo
+{
+ char * host;
+ char * family;
+ char * service;
+ bool has_x509_dname;
+ char * x509_dname;
+ bool has_sasl_username;
+ char * sasl_username;
+};
+
+void qapi_free_VncClientInfoList(VncClientInfoList * obj);
+void qapi_free_VncClientInfo(VncClientInfo * obj);
+
+struct VncInfo
+{
+ bool enabled;
+ bool has_host;
+ char * host;
+ bool has_family;
+ char * family;
+ bool has_service;
+ char * service;
+ bool has_auth;
+ char * auth;
+ bool has_clients;
+ VncClientInfoList * clients;
+};
+
+void qapi_free_VncInfoList(VncInfoList * obj);
+void qapi_free_VncInfo(VncInfo * obj);
+
+struct SpiceChannel
+{
+ char * host;
+ char * family;
+ char * port;
+ int64_t connection_id;
+ int64_t channel_type;
+ int64_t channel_id;
+ bool tls;
+};
+
+void qapi_free_SpiceChannelList(SpiceChannelList * obj);
+void qapi_free_SpiceChannel(SpiceChannel * obj);
+
+void qapi_free_SpiceQueryMouseModeList(SpiceQueryMouseModeList * obj);
+
+struct SpiceInfo
+{
+ bool enabled;
+ bool migrated;
+ bool has_host;
+ char * host;
+ bool has_port;
+ int64_t port;
+ bool has_tls_port;
+ int64_t tls_port;
+ bool has_auth;
+ char * auth;
+ bool has_compiled_version;
+ char * compiled_version;
+ SpiceQueryMouseMode mouse_mode;
+ bool has_channels;
+ SpiceChannelList * channels;
+};
+
+void qapi_free_SpiceInfoList(SpiceInfoList * obj);
+void qapi_free_SpiceInfo(SpiceInfo * obj);
+
+struct BalloonInfo
+{
+ int64_t actual;
+};
+
+void qapi_free_BalloonInfoList(BalloonInfoList * obj);
+void qapi_free_BalloonInfo(BalloonInfo * obj);
+
+struct PciMemoryRange
+{
+ int64_t base;
+ int64_t limit;
+};
+
+void qapi_free_PciMemoryRangeList(PciMemoryRangeList * obj);
+void qapi_free_PciMemoryRange(PciMemoryRange * obj);
+
+struct PciMemoryRegion
+{
+ int64_t bar;
+ char * type;
+ int64_t address;
+ int64_t size;
+ bool has_prefetch;
+ bool prefetch;
+ bool has_mem_type_64;
+ bool mem_type_64;
+};
+
+void qapi_free_PciMemoryRegionList(PciMemoryRegionList * obj);
+void qapi_free_PciMemoryRegion(PciMemoryRegion * obj);
+
+struct PciBridgeInfo
+{
+ struct
+ {
+ int64_t number;
+ int64_t secondary;
+ int64_t subordinate;
+ PciMemoryRange * io_range;
+ PciMemoryRange * memory_range;
+ PciMemoryRange * prefetchable_range;
+ } bus;
+ bool has_devices;
+ PciDeviceInfoList * devices;
+};
+
+void qapi_free_PciBridgeInfoList(PciBridgeInfoList * obj);
+void qapi_free_PciBridgeInfo(PciBridgeInfo * obj);
+
+struct PciDeviceInfo
+{
+ int64_t bus;
+ int64_t slot;
+ int64_t function;
+ struct
+ {
+ bool has_desc;
+ char * desc;
+ int64_t class;
+ } class_info;
+ struct
+ {
+ int64_t device;
+ int64_t vendor;
+ } id;
+ bool has_irq;
+ int64_t irq;
+ char * qdev_id;
+ bool has_pci_bridge;
+ PciBridgeInfo * pci_bridge;
+ PciMemoryRegionList * regions;
+};
+
+void qapi_free_PciDeviceInfoList(PciDeviceInfoList * obj);
+void qapi_free_PciDeviceInfo(PciDeviceInfo * obj);
+
+struct PciInfo
+{
+ int64_t bus;
+ PciDeviceInfoList * devices;
+};
+
+void qapi_free_PciInfoList(PciInfoList * obj);
+void qapi_free_PciInfo(PciInfo * obj);
+
+void qapi_free_BlockdevOnErrorList(BlockdevOnErrorList * obj);
+
+void qapi_free_MirrorSyncModeList(MirrorSyncModeList * obj);
+
+struct BlockJobInfo
+{
+ char * type;
+ char * device;
+ int64_t len;
+ int64_t offset;
+ bool busy;
+ bool paused;
+ int64_t speed;
+ BlockDeviceIoStatus io_status;
+};
+
+void qapi_free_BlockJobInfoList(BlockJobInfoList * obj);
+void qapi_free_BlockJobInfo(BlockJobInfo * obj);
+
+void qapi_free_NewImageModeList(NewImageModeList * obj);
+
+struct BlockdevSnapshot
+{
+ char * device;
+ char * snapshot_file;
+ bool has_format;
+ char * format;
+ bool has_mode;
+ NewImageMode mode;
+};
+
+void qapi_free_BlockdevSnapshotList(BlockdevSnapshotList * obj);
+void qapi_free_BlockdevSnapshot(BlockdevSnapshot * obj);
+
+struct DriveBackup
+{
+ char * device;
+ char * target;
+ bool has_format;
+ char * format;
+ MirrorSyncMode sync;
+ bool has_mode;
+ NewImageMode mode;
+ bool has_speed;
+ int64_t speed;
+ bool has_on_source_error;
+ BlockdevOnError on_source_error;
+ bool has_on_target_error;
+ BlockdevOnError on_target_error;
+};
+
+void qapi_free_DriveBackupList(DriveBackupList * obj);
+void qapi_free_DriveBackup(DriveBackup * obj);
+
+struct Abort
+{
+};
+
+void qapi_free_AbortList(AbortList * obj);
+void qapi_free_Abort(Abort * obj);
+
+struct TransactionAction
+{
+ TransactionActionKind kind;
+ union {
+ void *data;
+ BlockdevSnapshot * blockdev_snapshot_sync;
+ DriveBackup * drive_backup;
+ Abort * abort;
+ };
+};
+void qapi_free_TransactionActionList(TransactionActionList * obj);
+void qapi_free_TransactionAction(TransactionAction * obj);
+
+struct ObjectPropertyInfo
+{
+ char * name;
+ char * type;
+};
+
+void qapi_free_ObjectPropertyInfoList(ObjectPropertyInfoList * obj);
+void qapi_free_ObjectPropertyInfo(ObjectPropertyInfo * obj);
+
+struct ObjectTypeInfo
+{
+ char * name;
+};
+
+void qapi_free_ObjectTypeInfoList(ObjectTypeInfoList * obj);
+void qapi_free_ObjectTypeInfo(ObjectTypeInfo * obj);
+
+struct DevicePropertyInfo
+{
+ char * name;
+ char * type;
+};
+
+void qapi_free_DevicePropertyInfoList(DevicePropertyInfoList * obj);
+void qapi_free_DevicePropertyInfo(DevicePropertyInfo * obj);
+
+struct NetdevNoneOptions
+{
+};
+
+void qapi_free_NetdevNoneOptionsList(NetdevNoneOptionsList * obj);
+void qapi_free_NetdevNoneOptions(NetdevNoneOptions * obj);
+
+struct NetLegacyNicOptions
+{
+ bool has_netdev;
+ char * netdev;
+ bool has_macaddr;
+ char * macaddr;
+ bool has_model;
+ char * model;
+ bool has_addr;
+ char * addr;
+ bool has_vectors;
+ uint32_t vectors;
+};
+
+void qapi_free_NetLegacyNicOptionsList(NetLegacyNicOptionsList * obj);
+void qapi_free_NetLegacyNicOptions(NetLegacyNicOptions * obj);
+
+struct String
+{
+ char * str;
+};
+
+void qapi_free_StringList(StringList * obj);
+void qapi_free_String(String * obj);
+
+struct NetdevUserOptions
+{
+ bool has_hostname;
+ char * hostname;
+ bool has_q_restrict;
+ bool q_restrict;
+ bool has_ip;
+ char * ip;
+ bool has_net;
+ char * net;
+ bool has_host;
+ char * host;
+ bool has_tftp;
+ char * tftp;
+ bool has_bootfile;
+ char * bootfile;
+ bool has_dhcpstart;
+ char * dhcpstart;
+ bool has_dns;
+ char * dns;
+ bool has_dnssearch;
+ StringList * dnssearch;
+ bool has_smb;
+ char * smb;
+ bool has_smbserver;
+ char * smbserver;
+ bool has_hostfwd;
+ StringList * hostfwd;
+ bool has_guestfwd;
+ StringList * guestfwd;
+};
+
+void qapi_free_NetdevUserOptionsList(NetdevUserOptionsList * obj);
+void qapi_free_NetdevUserOptions(NetdevUserOptions * obj);
+
+struct NetdevTapOptions
+{
+ bool has_ifname;
+ char * ifname;
+ bool has_fd;
+ char * fd;
+ bool has_fds;
+ char * fds;
+ bool has_script;
+ char * script;
+ bool has_downscript;
+ char * downscript;
+ bool has_helper;
+ char * helper;
+ bool has_sndbuf;
+ uint64_t sndbuf;
+ bool has_vnet_hdr;
+ bool vnet_hdr;
+ bool has_vhost;
+ bool vhost;
+ bool has_vhostfd;
+ char * vhostfd;
+ bool has_vhostfds;
+ char * vhostfds;
+ bool has_vhostforce;
+ bool vhostforce;
+ bool has_queues;
+ uint32_t queues;
+};
+
+void qapi_free_NetdevTapOptionsList(NetdevTapOptionsList * obj);
+void qapi_free_NetdevTapOptions(NetdevTapOptions * obj);
+
+struct NetdevSocketOptions
+{
+ bool has_fd;
+ char * fd;
+ bool has_listen;
+ char * listen;
+ bool has_connect;
+ char * connect;
+ bool has_mcast;
+ char * mcast;
+ bool has_localaddr;
+ char * localaddr;
+ bool has_udp;
+ char * udp;
+};
+
+void qapi_free_NetdevSocketOptionsList(NetdevSocketOptionsList * obj);
+void qapi_free_NetdevSocketOptions(NetdevSocketOptions * obj);
+
+struct NetdevVdeOptions
+{
+ bool has_sock;
+ char * sock;
+ bool has_port;
+ uint16_t port;
+ bool has_group;
+ char * group;
+ bool has_mode;
+ uint16_t mode;
+};
+
+void qapi_free_NetdevVdeOptionsList(NetdevVdeOptionsList * obj);
+void qapi_free_NetdevVdeOptions(NetdevVdeOptions * obj);
+
+struct NetdevDumpOptions
+{
+ bool has_len;
+ uint64_t len;
+ bool has_file;
+ char * file;
+};
+
+void qapi_free_NetdevDumpOptionsList(NetdevDumpOptionsList * obj);
+void qapi_free_NetdevDumpOptions(NetdevDumpOptions * obj);
+
+struct NetdevBridgeOptions
+{
+ bool has_br;
+ char * br;
+ bool has_helper;
+ char * helper;
+};
+
+void qapi_free_NetdevBridgeOptionsList(NetdevBridgeOptionsList * obj);
+void qapi_free_NetdevBridgeOptions(NetdevBridgeOptions * obj);
+
+struct NetdevHubPortOptions
+{
+ int32_t hubid;
+};
+
+void qapi_free_NetdevHubPortOptionsList(NetdevHubPortOptionsList * obj);
+void qapi_free_NetdevHubPortOptions(NetdevHubPortOptions * obj);
+
+struct NetClientOptions
+{
+ NetClientOptionsKind kind;
+ union {
+ void *data;
+ NetdevNoneOptions * none;
+ NetLegacyNicOptions * nic;
+ NetdevUserOptions * user;
+ NetdevTapOptions * tap;
+ NetdevSocketOptions * socket;
+ NetdevVdeOptions * vde;
+ NetdevDumpOptions * dump;
+ NetdevBridgeOptions * bridge;
+ NetdevHubPortOptions * hubport;
+ };
+};
+void qapi_free_NetClientOptionsList(NetClientOptionsList * obj);
+void qapi_free_NetClientOptions(NetClientOptions * obj);
+
+struct NetLegacy
+{
+ bool has_vlan;
+ int32_t vlan;
+ bool has_id;
+ char * id;
+ bool has_name;
+ char * name;
+ NetClientOptions * opts;
+};
+
+void qapi_free_NetLegacyList(NetLegacyList * obj);
+void qapi_free_NetLegacy(NetLegacy * obj);
+
+struct Netdev
+{
+ char * id;
+ NetClientOptions * opts;
+};
+
+void qapi_free_NetdevList(NetdevList * obj);
+void qapi_free_Netdev(Netdev * obj);
+
+struct InetSocketAddress
+{
+ char * host;
+ char * port;
+ bool has_to;
+ uint16_t to;
+ bool has_ipv4;
+ bool ipv4;
+ bool has_ipv6;
+ bool ipv6;
+};
+
+void qapi_free_InetSocketAddressList(InetSocketAddressList * obj);
+void qapi_free_InetSocketAddress(InetSocketAddress * obj);
+
+struct UnixSocketAddress
+{
+ char * path;
+};
+
+void qapi_free_UnixSocketAddressList(UnixSocketAddressList * obj);
+void qapi_free_UnixSocketAddress(UnixSocketAddress * obj);
+
+struct SocketAddress
+{
+ SocketAddressKind kind;
+ union {
+ void *data;
+ InetSocketAddress * inet;
+ UnixSocketAddress * q_unix;
+ String * fd;
+ };
+};
+void qapi_free_SocketAddressList(SocketAddressList * obj);
+void qapi_free_SocketAddress(SocketAddress * obj);
+
+struct MachineInfo
+{
+ char * name;
+ bool has_alias;
+ char * alias;
+ bool has_is_default;
+ bool is_default;
+ int64_t cpu_max;
+};
+
+void qapi_free_MachineInfoList(MachineInfoList * obj);
+void qapi_free_MachineInfo(MachineInfo * obj);
+
+struct CpuDefinitionInfo
+{
+ char * name;
+};
+
+void qapi_free_CpuDefinitionInfoList(CpuDefinitionInfoList * obj);
+void qapi_free_CpuDefinitionInfo(CpuDefinitionInfo * obj);
+
+struct AddfdInfo
+{
+ int64_t fdset_id;
+ int64_t fd;
+};
+
+void qapi_free_AddfdInfoList(AddfdInfoList * obj);
+void qapi_free_AddfdInfo(AddfdInfo * obj);
+
+struct FdsetFdInfo
+{
+ int64_t fd;
+ bool has_opaque;
+ char * opaque;
+};
+
+void qapi_free_FdsetFdInfoList(FdsetFdInfoList * obj);
+void qapi_free_FdsetFdInfo(FdsetFdInfo * obj);
+
+struct FdsetInfo
+{
+ int64_t fdset_id;
+ FdsetFdInfoList * fds;
+};
+
+void qapi_free_FdsetInfoList(FdsetInfoList * obj);
+void qapi_free_FdsetInfo(FdsetInfo * obj);
+
+struct TargetInfo
+{
+ char * arch;
+};
+
+void qapi_free_TargetInfoList(TargetInfoList * obj);
+void qapi_free_TargetInfo(TargetInfo * obj);
+
+void qapi_free_QKeyCodeList(QKeyCodeList * obj);
+
+struct KeyValue
+{
+ KeyValueKind kind;
+ union {
+ void *data;
+ int64_t number;
+ QKeyCode qcode;
+ };
+};
+void qapi_free_KeyValueList(KeyValueList * obj);
+void qapi_free_KeyValue(KeyValue * obj);
+
+struct ChardevFile
+{
+ bool has_in;
+ char * in;
+ char * out;
+};
+
+void qapi_free_ChardevFileList(ChardevFileList * obj);
+void qapi_free_ChardevFile(ChardevFile * obj);
+
+struct ChardevHostdev
+{
+ char * device;
+};
+
+void qapi_free_ChardevHostdevList(ChardevHostdevList * obj);
+void qapi_free_ChardevHostdev(ChardevHostdev * obj);
+
+struct ChardevSocket
+{
+ SocketAddress * addr;
+ bool has_server;
+ bool server;
+ bool has_wait;
+ bool wait;
+ bool has_nodelay;
+ bool nodelay;
+ bool has_telnet;
+ bool telnet;
+};
+
+void qapi_free_ChardevSocketList(ChardevSocketList * obj);
+void qapi_free_ChardevSocket(ChardevSocket * obj);
+
+struct ChardevUdp
+{
+ SocketAddress * remote;
+ bool has_local;
+ SocketAddress * local;
+};
+
+void qapi_free_ChardevUdpList(ChardevUdpList * obj);
+void qapi_free_ChardevUdp(ChardevUdp * obj);
+
+struct ChardevMux
+{
+ char * chardev;
+};
+
+void qapi_free_ChardevMuxList(ChardevMuxList * obj);
+void qapi_free_ChardevMux(ChardevMux * obj);
+
+struct ChardevStdio
+{
+ bool has_signal;
+ bool signal;
+};
+
+void qapi_free_ChardevStdioList(ChardevStdioList * obj);
+void qapi_free_ChardevStdio(ChardevStdio * obj);
+
+struct ChardevSpiceChannel
+{
+ char * type;
+};
+
+void qapi_free_ChardevSpiceChannelList(ChardevSpiceChannelList * obj);
+void qapi_free_ChardevSpiceChannel(ChardevSpiceChannel * obj);
+
+struct ChardevSpicePort
+{
+ char * fqdn;
+};
+
+void qapi_free_ChardevSpicePortList(ChardevSpicePortList * obj);
+void qapi_free_ChardevSpicePort(ChardevSpicePort * obj);
+
+struct ChardevVC
+{
+ bool has_width;
+ int64_t width;
+ bool has_height;
+ int64_t height;
+ bool has_cols;
+ int64_t cols;
+ bool has_rows;
+ int64_t rows;
+};
+
+void qapi_free_ChardevVCList(ChardevVCList * obj);
+void qapi_free_ChardevVC(ChardevVC * obj);
+
+struct ChardevMemory
+{
+ bool has_size;
+ int64_t size;
+};
+
+void qapi_free_ChardevMemoryList(ChardevMemoryList * obj);
+void qapi_free_ChardevMemory(ChardevMemory * obj);
+
+struct ChardevDummy
+{
+};
+
+void qapi_free_ChardevDummyList(ChardevDummyList * obj);
+void qapi_free_ChardevDummy(ChardevDummy * obj);
+
+struct ChardevBackend
+{
+ ChardevBackendKind kind;
+ union {
+ void *data;
+ ChardevFile * file;
+ ChardevHostdev * serial;
+ ChardevHostdev * parallel;
+ ChardevHostdev * pipe;
+ ChardevSocket * socket;
+ ChardevUdp * udp;
+ ChardevDummy * pty;
+ ChardevDummy * null;
+ ChardevMux * mux;
+ ChardevDummy * msmouse;
+ ChardevDummy * braille;
+ ChardevStdio * stdio;
+ ChardevDummy * console;
+ ChardevSpiceChannel * spicevmc;
+ ChardevSpicePort * spiceport;
+ ChardevVC * vc;
+ ChardevMemory * memory;
+ };
+};
+void qapi_free_ChardevBackendList(ChardevBackendList * obj);
+void qapi_free_ChardevBackend(ChardevBackend * obj);
+
+struct ChardevReturn
+{
+ bool has_pty;
+ char * pty;
+};
+
+void qapi_free_ChardevReturnList(ChardevReturnList * obj);
+void qapi_free_ChardevReturn(ChardevReturn * obj);
+
+void qapi_free_TpmModelList(TpmModelList * obj);
+
+void qapi_free_TpmTypeList(TpmTypeList * obj);
+
+struct TPMPassthroughOptions
+{
+ bool has_path;
+ char * path;
+ bool has_cancel_path;
+ char * cancel_path;
+};
+
+void qapi_free_TPMPassthroughOptionsList(TPMPassthroughOptionsList * obj);
+void qapi_free_TPMPassthroughOptions(TPMPassthroughOptions * obj);
+
+struct TpmTypeOptions
+{
+ TpmTypeOptionsKind kind;
+ union {
+ void *data;
+ TPMPassthroughOptions * passthrough;
+ };
+};
+void qapi_free_TpmTypeOptionsList(TpmTypeOptionsList * obj);
+void qapi_free_TpmTypeOptions(TpmTypeOptions * obj);
+
+struct TPMInfo
+{
+ char * id;
+ TpmModel model;
+ TpmTypeOptions * options;
+};
+
+void qapi_free_TPMInfoList(TPMInfoList * obj);
+void qapi_free_TPMInfo(TPMInfo * obj);
+
+struct AcpiTableOptions
+{
+ bool has_sig;
+ char * sig;
+ bool has_rev;
+ uint8_t rev;
+ bool has_oem_id;
+ char * oem_id;
+ bool has_oem_table_id;
+ char * oem_table_id;
+ bool has_oem_rev;
+ uint32_t oem_rev;
+ bool has_asl_compiler_id;
+ char * asl_compiler_id;
+ bool has_asl_compiler_rev;
+ uint32_t asl_compiler_rev;
+ bool has_file;
+ char * file;
+ bool has_data;
+ char * data;
+};
+
+void qapi_free_AcpiTableOptionsList(AcpiTableOptionsList * obj);
+void qapi_free_AcpiTableOptions(AcpiTableOptions * obj);
+
+void qapi_free_CommandLineParameterTypeList(CommandLineParameterTypeList * obj);
+
+struct CommandLineParameterInfo
+{
+ char * name;
+ CommandLineParameterType type;
+ bool has_help;
+ char * help;
+};
+
+void qapi_free_CommandLineParameterInfoList(CommandLineParameterInfoList * obj);
+void qapi_free_CommandLineParameterInfo(CommandLineParameterInfo * obj);
+
+struct CommandLineOptionInfo
+{
+ char * option;
+ CommandLineParameterInfoList * parameters;
+};
+
+void qapi_free_CommandLineOptionInfoList(CommandLineOptionInfoList * obj);
+void qapi_free_CommandLineOptionInfo(CommandLineOptionInfo * obj);
+
+void qapi_free_X86CPURegister32List(X86CPURegister32List * obj);
+
+struct X86CPUFeatureWordInfo
+{
+ int64_t cpuid_input_eax;
+ bool has_cpuid_input_ecx;
+ int64_t cpuid_input_ecx;
+ X86CPURegister32 cpuid_register;
+ int64_t features;
+};
+
+void qapi_free_X86CPUFeatureWordInfoList(X86CPUFeatureWordInfoList * obj);
+void qapi_free_X86CPUFeatureWordInfo(X86CPUFeatureWordInfo * obj);
+
+void qapi_free_RxStateList(RxStateList * obj);
+
+struct RxFilterInfo
+{
+ char * name;
+ bool promiscuous;
+ RxState multicast;
+ RxState unicast;
+ bool broadcast_allowed;
+ bool multicast_overflow;
+ bool unicast_overflow;
+ char * main_mac;
+ intList * vlan_table;
+ strList * unicast_table;
+ strList * multicast_table;
+};
+
+void qapi_free_RxFilterInfoList(RxFilterInfoList * obj);
+void qapi_free_RxFilterInfo(RxFilterInfo * obj);
+
+#endif
diff --git a/contrib/qemu/qemu-coroutine-lock.c b/contrib/qemu/qemu-coroutine-lock.c
new file mode 100644
index 000000000..d9fea4989
--- /dev/null
+++ b/contrib/qemu/qemu-coroutine-lock.c
@@ -0,0 +1,178 @@
+/*
+ * coroutine queues and locks
+ *
+ * Copyright (c) 2011 Kevin Wolf <kwolf@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu-common.h"
+#include "block/coroutine.h"
+#include "block/coroutine_int.h"
+#include "qemu/queue.h"
+#include "trace.h"
+
+void qemu_co_queue_init(CoQueue *queue)
+{
+ QTAILQ_INIT(&queue->entries);
+}
+
+void coroutine_fn qemu_co_queue_wait(CoQueue *queue)
+{
+ Coroutine *self = qemu_coroutine_self();
+ QTAILQ_INSERT_TAIL(&queue->entries, self, co_queue_next);
+ qemu_coroutine_yield();
+ assert(qemu_in_coroutine());
+}
+
+void coroutine_fn qemu_co_queue_wait_insert_head(CoQueue *queue)
+{
+ Coroutine *self = qemu_coroutine_self();
+ QTAILQ_INSERT_HEAD(&queue->entries, self, co_queue_next);
+ qemu_coroutine_yield();
+ assert(qemu_in_coroutine());
+}
+
+/**
+ * qemu_co_queue_run_restart:
+ *
+ * Enter each coroutine that was previously marked for restart by
+ * qemu_co_queue_next() or qemu_co_queue_restart_all(). This function is
+ * invoked by the core coroutine code when the current coroutine yields or
+ * terminates.
+ */
+void qemu_co_queue_run_restart(Coroutine *co)
+{
+ Coroutine *next;
+
+ trace_qemu_co_queue_run_restart(co);
+ while ((next = QTAILQ_FIRST(&co->co_queue_wakeup))) {
+ QTAILQ_REMOVE(&co->co_queue_wakeup, next, co_queue_next);
+ qemu_coroutine_enter(next, NULL);
+ }
+}
+
+static bool qemu_co_queue_do_restart(CoQueue *queue, bool single)
+{
+ Coroutine *self = qemu_coroutine_self();
+ Coroutine *next;
+
+ if (QTAILQ_EMPTY(&queue->entries)) {
+ return false;
+ }
+
+ while ((next = QTAILQ_FIRST(&queue->entries)) != NULL) {
+ QTAILQ_REMOVE(&queue->entries, next, co_queue_next);
+ QTAILQ_INSERT_TAIL(&self->co_queue_wakeup, next, co_queue_next);
+ trace_qemu_co_queue_next(next);
+ if (single) {
+ break;
+ }
+ }
+ return true;
+}
+
+bool qemu_co_queue_next(CoQueue *queue)
+{
+ return qemu_co_queue_do_restart(queue, true);
+}
+
+void qemu_co_queue_restart_all(CoQueue *queue)
+{
+ qemu_co_queue_do_restart(queue, false);
+}
+
+bool qemu_co_queue_empty(CoQueue *queue)
+{
+ return (QTAILQ_FIRST(&queue->entries) == NULL);
+}
+
+void qemu_co_mutex_init(CoMutex *mutex)
+{
+ memset(mutex, 0, sizeof(*mutex));
+ qemu_co_queue_init(&mutex->queue);
+}
+
+void coroutine_fn qemu_co_mutex_lock(CoMutex *mutex)
+{
+ Coroutine *self = qemu_coroutine_self();
+
+ trace_qemu_co_mutex_lock_entry(mutex, self);
+
+ while (mutex->locked) {
+ qemu_co_queue_wait(&mutex->queue);
+ }
+
+ mutex->locked = true;
+
+ trace_qemu_co_mutex_lock_return(mutex, self);
+}
+
+void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex)
+{
+ Coroutine *self = qemu_coroutine_self();
+
+ trace_qemu_co_mutex_unlock_entry(mutex, self);
+
+ assert(mutex->locked == true);
+ assert(qemu_in_coroutine());
+
+ mutex->locked = false;
+ qemu_co_queue_next(&mutex->queue);
+
+ trace_qemu_co_mutex_unlock_return(mutex, self);
+}
+
+void qemu_co_rwlock_init(CoRwlock *lock)
+{
+ memset(lock, 0, sizeof(*lock));
+ qemu_co_queue_init(&lock->queue);
+}
+
+void qemu_co_rwlock_rdlock(CoRwlock *lock)
+{
+ while (lock->writer) {
+ qemu_co_queue_wait(&lock->queue);
+ }
+ lock->reader++;
+}
+
+void qemu_co_rwlock_unlock(CoRwlock *lock)
+{
+ assert(qemu_in_coroutine());
+ if (lock->writer) {
+ lock->writer = false;
+ qemu_co_queue_restart_all(&lock->queue);
+ } else {
+ lock->reader--;
+ assert(lock->reader >= 0);
+ /* Wakeup only one waiting writer */
+ if (!lock->reader) {
+ qemu_co_queue_next(&lock->queue);
+ }
+ }
+}
+
+void qemu_co_rwlock_wrlock(CoRwlock *lock)
+{
+ while (lock->writer || lock->reader) {
+ qemu_co_queue_wait(&lock->queue);
+ }
+ lock->writer = true;
+}
diff --git a/contrib/qemu/qemu-coroutine-sleep.c b/contrib/qemu/qemu-coroutine-sleep.c
new file mode 100644
index 000000000..169ce5ccc
--- /dev/null
+++ b/contrib/qemu/qemu-coroutine-sleep.c
@@ -0,0 +1,39 @@
+/*
+ * QEMU coroutine sleep
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "block/coroutine.h"
+#include "qemu/timer.h"
+
+typedef struct CoSleepCB {
+ QEMUTimer *ts;
+ Coroutine *co;
+} CoSleepCB;
+
+static void co_sleep_cb(void *opaque)
+{
+ CoSleepCB *sleep_cb = opaque;
+
+ qemu_coroutine_enter(sleep_cb->co, NULL);
+}
+
+void coroutine_fn co_sleep_ns(QEMUClock *clock, int64_t ns)
+{
+ CoSleepCB sleep_cb = {
+ .co = qemu_coroutine_self(),
+ };
+ sleep_cb.ts = qemu_new_timer(clock, SCALE_NS, co_sleep_cb, &sleep_cb);
+ qemu_mod_timer(sleep_cb.ts, qemu_get_clock_ns(clock) + ns);
+ qemu_coroutine_yield();
+ qemu_del_timer(sleep_cb.ts);
+ qemu_free_timer(sleep_cb.ts);
+}
diff --git a/contrib/qemu/qemu-coroutine.c b/contrib/qemu/qemu-coroutine.c
new file mode 100644
index 000000000..5e19307ee
--- /dev/null
+++ b/contrib/qemu/qemu-coroutine.c
@@ -0,0 +1,135 @@
+/*
+ * QEMU coroutines
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
+ * Kevin Wolf <kwolf@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "trace.h"
+#include "qemu-common.h"
+#include "qemu/thread.h"
+#include "block/coroutine.h"
+#include "block/coroutine_int.h"
+
+enum {
+ /* Maximum free pool size prevents holding too many freed coroutines */
+ POOL_MAX_SIZE = 0,
+};
+
+/** Free list to speed up creation */
+static QemuMutex pool_lock;
+static QSLIST_HEAD(, Coroutine) pool = QSLIST_HEAD_INITIALIZER(pool);
+static unsigned int pool_size;
+
+Coroutine *qemu_coroutine_create(CoroutineEntry *entry)
+{
+ Coroutine *co;
+
+ qemu_mutex_lock(&pool_lock);
+ co = QSLIST_FIRST(&pool);
+ if (co) {
+ QSLIST_REMOVE_HEAD(&pool, pool_next);
+ pool_size--;
+ }
+ qemu_mutex_unlock(&pool_lock);
+
+ if (!co) {
+ co = qemu_coroutine_new();
+ }
+
+ co->entry = entry;
+ QTAILQ_INIT(&co->co_queue_wakeup);
+ return co;
+}
+
+static void coroutine_delete(Coroutine *co)
+{
+ qemu_mutex_lock(&pool_lock);
+ if (pool_size < POOL_MAX_SIZE) {
+ QSLIST_INSERT_HEAD(&pool, co, pool_next);
+ co->caller = NULL;
+ pool_size++;
+ qemu_mutex_unlock(&pool_lock);
+ return;
+ }
+ qemu_mutex_unlock(&pool_lock);
+
+ qemu_coroutine_delete(co);
+}
+
+static void __attribute__((constructor)) coroutine_pool_init(void)
+{
+ qemu_mutex_init(&pool_lock);
+}
+
+static void __attribute__((destructor)) coroutine_pool_cleanup(void)
+{
+ Coroutine *co;
+ Coroutine *tmp;
+
+ QSLIST_FOREACH_SAFE(co, &pool, pool_next, tmp) {
+ QSLIST_REMOVE_HEAD(&pool, pool_next);
+ qemu_coroutine_delete(co);
+ }
+
+ qemu_mutex_destroy(&pool_lock);
+}
+
+static void coroutine_swap(Coroutine *from, Coroutine *to)
+{
+ CoroutineAction ret;
+
+ ret = qemu_coroutine_switch(from, to, COROUTINE_YIELD);
+
+ qemu_co_queue_run_restart(to);
+
+ switch (ret) {
+ case COROUTINE_YIELD:
+ return;
+ case COROUTINE_TERMINATE:
+ trace_qemu_coroutine_terminate(to);
+ coroutine_delete(to);
+ return;
+ default:
+ abort();
+ }
+}
+
+void qemu_coroutine_enter(Coroutine *co, void *opaque)
+{
+ Coroutine *self = qemu_coroutine_self();
+
+ trace_qemu_coroutine_enter(self, co, opaque);
+
+ if (co->caller) {
+ fprintf(stderr, "Co-routine re-entered recursively\n");
+ abort();
+ }
+
+ co->caller = self;
+ co->entry_arg = opaque;
+ coroutine_swap(self, co);
+}
+
+void coroutine_fn qemu_coroutine_yield(void)
+{
+ Coroutine *self = qemu_coroutine_self();
+ Coroutine *to = self->caller;
+
+ trace_qemu_coroutine_yield(self, to);
+
+ if (!to) {
+ fprintf(stderr, "Co-routine is yielding to no one\n");
+ abort();
+ }
+
+ self->caller = NULL;
+ coroutine_swap(self, to);
+}
diff --git a/contrib/qemu/qmp-commands.h b/contrib/qemu/qmp-commands.h
new file mode 100644
index 000000000..fcc0ff0f7
--- /dev/null
+++ b/contrib/qemu/qmp-commands.h
@@ -0,0 +1,204 @@
+/* THIS FILE IS AUTOMATICALLY GENERATED, DO NOT MODIFY */
+
+/*
+ * schema-defined QAPI function prototypes
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#ifndef QMP_COMMANDS_H
+#define QMP_COMMANDS_H
+
+#include "qapi-types.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/error.h"
+
+void qmp_add_client(const char * protocol, const char * fdname, bool has_skipauth, bool skipauth, bool has_tls, bool tls, Error **errp);
+int qmp_marshal_input_add_client(Monitor *mon, const QDict *qdict, QObject **ret);
+NameInfo * qmp_query_name(Error **errp);
+int qmp_marshal_input_query_name(Monitor *mon, const QDict *qdict, QObject **ret);
+VersionInfo * qmp_query_version(Error **errp);
+int qmp_marshal_input_query_version(Monitor *mon, const QDict *qdict, QObject **ret);
+KvmInfo * qmp_query_kvm(Error **errp);
+int qmp_marshal_input_query_kvm(Monitor *mon, const QDict *qdict, QObject **ret);
+StatusInfo * qmp_query_status(Error **errp);
+int qmp_marshal_input_query_status(Monitor *mon, const QDict *qdict, QObject **ret);
+UuidInfo * qmp_query_uuid(Error **errp);
+int qmp_marshal_input_query_uuid(Monitor *mon, const QDict *qdict, QObject **ret);
+ChardevInfoList * qmp_query_chardev(Error **errp);
+int qmp_marshal_input_query_chardev(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_ringbuf_write(const char * device, const char * data, bool has_format, DataFormat format, Error **errp);
+int qmp_marshal_input_ringbuf_write(Monitor *mon, const QDict *qdict, QObject **ret);
+char * qmp_ringbuf_read(const char * device, int64_t size, bool has_format, DataFormat format, Error **errp);
+int qmp_marshal_input_ringbuf_read(Monitor *mon, const QDict *qdict, QObject **ret);
+CommandInfoList * qmp_query_commands(Error **errp);
+int qmp_marshal_input_query_commands(Monitor *mon, const QDict *qdict, QObject **ret);
+EventInfoList * qmp_query_events(Error **errp);
+int qmp_marshal_input_query_events(Monitor *mon, const QDict *qdict, QObject **ret);
+MigrationInfo * qmp_query_migrate(Error **errp);
+int qmp_marshal_input_query_migrate(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_migrate_set_capabilities(MigrationCapabilityStatusList * capabilities, Error **errp);
+int qmp_marshal_input_migrate_set_capabilities(Monitor *mon, const QDict *qdict, QObject **ret);
+MigrationCapabilityStatusList * qmp_query_migrate_capabilities(Error **errp);
+int qmp_marshal_input_query_migrate_capabilities(Monitor *mon, const QDict *qdict, QObject **ret);
+MouseInfoList * qmp_query_mice(Error **errp);
+int qmp_marshal_input_query_mice(Monitor *mon, const QDict *qdict, QObject **ret);
+CpuInfoList * qmp_query_cpus(Error **errp);
+int qmp_marshal_input_query_cpus(Monitor *mon, const QDict *qdict, QObject **ret);
+BlockInfoList * qmp_query_block(Error **errp);
+int qmp_marshal_input_query_block(Monitor *mon, const QDict *qdict, QObject **ret);
+BlockStatsList * qmp_query_blockstats(Error **errp);
+int qmp_marshal_input_query_blockstats(Monitor *mon, const QDict *qdict, QObject **ret);
+VncInfo * qmp_query_vnc(Error **errp);
+int qmp_marshal_input_query_vnc(Monitor *mon, const QDict *qdict, QObject **ret);
+SpiceInfo * qmp_query_spice(Error **errp);
+int qmp_marshal_input_query_spice(Monitor *mon, const QDict *qdict, QObject **ret);
+BalloonInfo * qmp_query_balloon(Error **errp);
+int qmp_marshal_input_query_balloon(Monitor *mon, const QDict *qdict, QObject **ret);
+PciInfoList * qmp_query_pci(Error **errp);
+int qmp_marshal_input_query_pci(Monitor *mon, const QDict *qdict, QObject **ret);
+BlockJobInfoList * qmp_query_block_jobs(Error **errp);
+int qmp_marshal_input_query_block_jobs(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_quit(Error **errp);
+int qmp_marshal_input_quit(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_stop(Error **errp);
+int qmp_marshal_input_stop(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_system_reset(Error **errp);
+int qmp_marshal_input_system_reset(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_system_powerdown(Error **errp);
+int qmp_marshal_input_system_powerdown(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_cpu(int64_t index, Error **errp);
+int qmp_marshal_input_cpu(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_cpu_add(int64_t id, Error **errp);
+int qmp_marshal_input_cpu_add(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_memsave(int64_t val, int64_t size, const char * filename, bool has_cpu_index, int64_t cpu_index, Error **errp);
+int qmp_marshal_input_memsave(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_pmemsave(int64_t val, int64_t size, const char * filename, Error **errp);
+int qmp_marshal_input_pmemsave(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_cont(Error **errp);
+int qmp_marshal_input_cont(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_system_wakeup(Error **errp);
+int qmp_marshal_input_system_wakeup(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_inject_nmi(Error **errp);
+int qmp_marshal_input_inject_nmi(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_set_link(const char * name, bool up, Error **errp);
+int qmp_marshal_input_set_link(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_block_passwd(const char * device, const char * password, Error **errp);
+int qmp_marshal_input_block_passwd(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_balloon(int64_t value, Error **errp);
+int qmp_marshal_input_balloon(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_block_resize(const char * device, int64_t size, Error **errp);
+int qmp_marshal_input_block_resize(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_transaction(TransactionActionList * actions, Error **errp);
+int qmp_marshal_input_transaction(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_blockdev_snapshot_sync(const char * device, const char * snapshot_file, bool has_format, const char * format, bool has_mode, NewImageMode mode, Error **errp);
+int qmp_marshal_input_blockdev_snapshot_sync(Monitor *mon, const QDict *qdict, QObject **ret);
+char * qmp_human_monitor_command(const char * command_line, bool has_cpu_index, int64_t cpu_index, Error **errp);
+int qmp_marshal_input_human_monitor_command(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_block_commit(const char * device, bool has_base, const char * base, const char * top, bool has_speed, int64_t speed, Error **errp);
+int qmp_marshal_input_block_commit(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_drive_backup(const char * device, const char * target, bool has_format, const char * format, MirrorSyncMode sync, bool has_mode, NewImageMode mode, bool has_speed, int64_t speed, bool has_on_source_error, BlockdevOnError on_source_error, bool has_on_target_error, BlockdevOnError on_target_error, Error **errp);
+int qmp_marshal_input_drive_backup(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_drive_mirror(const char * device, const char * target, bool has_format, const char * format, MirrorSyncMode sync, bool has_mode, NewImageMode mode, bool has_speed, int64_t speed, bool has_granularity, uint32_t granularity, bool has_buf_size, int64_t buf_size, bool has_on_source_error, BlockdevOnError on_source_error, bool has_on_target_error, BlockdevOnError on_target_error, Error **errp);
+int qmp_marshal_input_drive_mirror(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_migrate_cancel(Error **errp);
+int qmp_marshal_input_migrate_cancel(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_migrate_set_downtime(double value, Error **errp);
+int qmp_marshal_input_migrate_set_downtime(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_migrate_set_speed(int64_t value, Error **errp);
+int qmp_marshal_input_migrate_set_speed(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_migrate_set_cache_size(int64_t value, Error **errp);
+int qmp_marshal_input_migrate_set_cache_size(Monitor *mon, const QDict *qdict, QObject **ret);
+int64_t qmp_query_migrate_cache_size(Error **errp);
+int qmp_marshal_input_query_migrate_cache_size(Monitor *mon, const QDict *qdict, QObject **ret);
+ObjectPropertyInfoList * qmp_qom_list(const char * path, Error **errp);
+int qmp_marshal_input_qom_list(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_set_password(const char * protocol, const char * password, bool has_connected, const char * connected, Error **errp);
+int qmp_marshal_input_set_password(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_expire_password(const char * protocol, const char * time, Error **errp);
+int qmp_marshal_input_expire_password(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_eject(const char * device, bool has_force, bool force, Error **errp);
+int qmp_marshal_input_eject(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_change_vnc_password(const char * password, Error **errp);
+int qmp_marshal_input_change_vnc_password(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_change(const char * device, const char * target, bool has_arg, const char * arg, Error **errp);
+int qmp_marshal_input_change(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_block_set_io_throttle(const char * device, int64_t bps, int64_t bps_rd, int64_t bps_wr, int64_t iops, int64_t iops_rd, int64_t iops_wr, Error **errp);
+int qmp_marshal_input_block_set_io_throttle(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_block_stream(const char * device, bool has_base, const char * base, bool has_speed, int64_t speed, bool has_on_error, BlockdevOnError on_error, Error **errp);
+int qmp_marshal_input_block_stream(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_block_job_set_speed(const char * device, int64_t speed, Error **errp);
+int qmp_marshal_input_block_job_set_speed(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_block_job_cancel(const char * device, bool has_force, bool force, Error **errp);
+int qmp_marshal_input_block_job_cancel(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_block_job_pause(const char * device, Error **errp);
+int qmp_marshal_input_block_job_pause(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_block_job_resume(const char * device, Error **errp);
+int qmp_marshal_input_block_job_resume(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_block_job_complete(const char * device, Error **errp);
+int qmp_marshal_input_block_job_complete(Monitor *mon, const QDict *qdict, QObject **ret);
+ObjectTypeInfoList * qmp_qom_list_types(bool has_implements, const char * implements, bool has_abstract, bool abstract, Error **errp);
+int qmp_marshal_input_qom_list_types(Monitor *mon, const QDict *qdict, QObject **ret);
+DevicePropertyInfoList * qmp_device_list_properties(const char * typename, Error **errp);
+int qmp_marshal_input_device_list_properties(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_migrate(const char * uri, bool has_blk, bool blk, bool has_inc, bool inc, bool has_detach, bool detach, Error **errp);
+int qmp_marshal_input_migrate(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_xen_save_devices_state(const char * filename, Error **errp);
+int qmp_marshal_input_xen_save_devices_state(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_xen_set_global_dirty_log(bool enable, Error **errp);
+int qmp_marshal_input_xen_set_global_dirty_log(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_device_del(const char * id, Error **errp);
+int qmp_marshal_input_device_del(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_dump_guest_memory(bool paging, const char * protocol, bool has_begin, int64_t begin, bool has_length, int64_t length, Error **errp);
+int qmp_marshal_input_dump_guest_memory(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_netdev_del(const char * id, Error **errp);
+int qmp_marshal_input_netdev_del(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_getfd(const char * fdname, Error **errp);
+int qmp_marshal_input_getfd(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_closefd(const char * fdname, Error **errp);
+int qmp_marshal_input_closefd(Monitor *mon, const QDict *qdict, QObject **ret);
+MachineInfoList * qmp_query_machines(Error **errp);
+int qmp_marshal_input_query_machines(Monitor *mon, const QDict *qdict, QObject **ret);
+CpuDefinitionInfoList * qmp_query_cpu_definitions(Error **errp);
+int qmp_marshal_input_query_cpu_definitions(Monitor *mon, const QDict *qdict, QObject **ret);
+AddfdInfo * qmp_add_fd(bool has_fdset_id, int64_t fdset_id, bool has_opaque, const char * opaque, Error **errp);
+int qmp_marshal_input_add_fd(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_remove_fd(int64_t fdset_id, bool has_fd, int64_t fd, Error **errp);
+int qmp_marshal_input_remove_fd(Monitor *mon, const QDict *qdict, QObject **ret);
+FdsetInfoList * qmp_query_fdsets(Error **errp);
+int qmp_marshal_input_query_fdsets(Monitor *mon, const QDict *qdict, QObject **ret);
+TargetInfo * qmp_query_target(Error **errp);
+int qmp_marshal_input_query_target(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_send_key(KeyValueList * keys, bool has_hold_time, int64_t hold_time, Error **errp);
+int qmp_marshal_input_send_key(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_screendump(const char * filename, Error **errp);
+int qmp_marshal_input_screendump(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_nbd_server_start(SocketAddress * addr, Error **errp);
+int qmp_marshal_input_nbd_server_start(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_nbd_server_add(const char * device, bool has_writable, bool writable, Error **errp);
+int qmp_marshal_input_nbd_server_add(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_nbd_server_stop(Error **errp);
+int qmp_marshal_input_nbd_server_stop(Monitor *mon, const QDict *qdict, QObject **ret);
+ChardevReturn * qmp_chardev_add(const char * id, ChardevBackend * backend, Error **errp);
+int qmp_marshal_input_chardev_add(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_chardev_remove(const char * id, Error **errp);
+int qmp_marshal_input_chardev_remove(Monitor *mon, const QDict *qdict, QObject **ret);
+TpmModelList * qmp_query_tpm_models(Error **errp);
+int qmp_marshal_input_query_tpm_models(Monitor *mon, const QDict *qdict, QObject **ret);
+TpmTypeList * qmp_query_tpm_types(Error **errp);
+int qmp_marshal_input_query_tpm_types(Monitor *mon, const QDict *qdict, QObject **ret);
+TPMInfoList * qmp_query_tpm(Error **errp);
+int qmp_marshal_input_query_tpm(Monitor *mon, const QDict *qdict, QObject **ret);
+CommandLineOptionInfoList * qmp_query_command_line_options(bool has_option, const char * option, Error **errp);
+int qmp_marshal_input_query_command_line_options(Monitor *mon, const QDict *qdict, QObject **ret);
+RxFilterInfoList * qmp_query_rx_filter(bool has_name, const char * name, Error **errp);
+int qmp_marshal_input_query_rx_filter(Monitor *mon, const QDict *qdict, QObject **ret);
+
+#endif
diff --git a/contrib/qemu/qobject/json-lexer.c b/contrib/qemu/qobject/json-lexer.c
new file mode 100644
index 000000000..440df6039
--- /dev/null
+++ b/contrib/qemu/qobject/json-lexer.c
@@ -0,0 +1,373 @@
+/*
+ * JSON lexer
+ *
+ * Copyright IBM, Corp. 2009
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qapi/qmp/qstring.h"
+#include "qapi/qmp/qlist.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qint.h"
+#include "qemu-common.h"
+#include "qapi/qmp/json-lexer.h"
+
+#define MAX_TOKEN_SIZE (64ULL << 20)
+
+/*
+ * \"([^\\\"]|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*\"
+ * '([^\\']|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*'
+ * 0|([1-9][0-9]*(.[0-9]+)?([eE]([-+])?[0-9]+))
+ * [{}\[\],:]
+ * [a-z]+
+ *
+ */
+
+enum json_lexer_state {
+ IN_ERROR = 0,
+ IN_DQ_UCODE3,
+ IN_DQ_UCODE2,
+ IN_DQ_UCODE1,
+ IN_DQ_UCODE0,
+ IN_DQ_STRING_ESCAPE,
+ IN_DQ_STRING,
+ IN_SQ_UCODE3,
+ IN_SQ_UCODE2,
+ IN_SQ_UCODE1,
+ IN_SQ_UCODE0,
+ IN_SQ_STRING_ESCAPE,
+ IN_SQ_STRING,
+ IN_ZERO,
+ IN_DIGITS,
+ IN_DIGIT,
+ IN_EXP_E,
+ IN_MANTISSA,
+ IN_MANTISSA_DIGITS,
+ IN_NONZERO_NUMBER,
+ IN_NEG_NONZERO_NUMBER,
+ IN_KEYWORD,
+ IN_ESCAPE,
+ IN_ESCAPE_L,
+ IN_ESCAPE_LL,
+ IN_ESCAPE_I,
+ IN_ESCAPE_I6,
+ IN_ESCAPE_I64,
+ IN_WHITESPACE,
+ IN_START,
+};
+
+#define TERMINAL(state) [0 ... 0x7F] = (state)
+
+/* Return whether TERMINAL is a terminal state and the transition to it
+ from OLD_STATE required lookahead. This happens whenever the table
+ below uses the TERMINAL macro. */
+#define TERMINAL_NEEDED_LOOKAHEAD(old_state, terminal) \
+ (json_lexer[(old_state)][0] == (terminal))
+
+static const uint8_t json_lexer[][256] = {
+ /* double quote string */
+ [IN_DQ_UCODE3] = {
+ ['0' ... '9'] = IN_DQ_STRING,
+ ['a' ... 'f'] = IN_DQ_STRING,
+ ['A' ... 'F'] = IN_DQ_STRING,
+ },
+ [IN_DQ_UCODE2] = {
+ ['0' ... '9'] = IN_DQ_UCODE3,
+ ['a' ... 'f'] = IN_DQ_UCODE3,
+ ['A' ... 'F'] = IN_DQ_UCODE3,
+ },
+ [IN_DQ_UCODE1] = {
+ ['0' ... '9'] = IN_DQ_UCODE2,
+ ['a' ... 'f'] = IN_DQ_UCODE2,
+ ['A' ... 'F'] = IN_DQ_UCODE2,
+ },
+ [IN_DQ_UCODE0] = {
+ ['0' ... '9'] = IN_DQ_UCODE1,
+ ['a' ... 'f'] = IN_DQ_UCODE1,
+ ['A' ... 'F'] = IN_DQ_UCODE1,
+ },
+ [IN_DQ_STRING_ESCAPE] = {
+ ['b'] = IN_DQ_STRING,
+ ['f'] = IN_DQ_STRING,
+ ['n'] = IN_DQ_STRING,
+ ['r'] = IN_DQ_STRING,
+ ['t'] = IN_DQ_STRING,
+ ['/'] = IN_DQ_STRING,
+ ['\\'] = IN_DQ_STRING,
+ ['\''] = IN_DQ_STRING,
+ ['\"'] = IN_DQ_STRING,
+ ['u'] = IN_DQ_UCODE0,
+ },
+ [IN_DQ_STRING] = {
+ [1 ... 0xBF] = IN_DQ_STRING,
+ [0xC2 ... 0xF4] = IN_DQ_STRING,
+ ['\\'] = IN_DQ_STRING_ESCAPE,
+ ['"'] = JSON_STRING,
+ },
+
+ /* single quote string */
+ [IN_SQ_UCODE3] = {
+ ['0' ... '9'] = IN_SQ_STRING,
+ ['a' ... 'f'] = IN_SQ_STRING,
+ ['A' ... 'F'] = IN_SQ_STRING,
+ },
+ [IN_SQ_UCODE2] = {
+ ['0' ... '9'] = IN_SQ_UCODE3,
+ ['a' ... 'f'] = IN_SQ_UCODE3,
+ ['A' ... 'F'] = IN_SQ_UCODE3,
+ },
+ [IN_SQ_UCODE1] = {
+ ['0' ... '9'] = IN_SQ_UCODE2,
+ ['a' ... 'f'] = IN_SQ_UCODE2,
+ ['A' ... 'F'] = IN_SQ_UCODE2,
+ },
+ [IN_SQ_UCODE0] = {
+ ['0' ... '9'] = IN_SQ_UCODE1,
+ ['a' ... 'f'] = IN_SQ_UCODE1,
+ ['A' ... 'F'] = IN_SQ_UCODE1,
+ },
+ [IN_SQ_STRING_ESCAPE] = {
+ ['b'] = IN_SQ_STRING,
+ ['f'] = IN_SQ_STRING,
+ ['n'] = IN_SQ_STRING,
+ ['r'] = IN_SQ_STRING,
+ ['t'] = IN_SQ_STRING,
+ ['/'] = IN_DQ_STRING,
+ ['\\'] = IN_DQ_STRING,
+ ['\''] = IN_SQ_STRING,
+ ['\"'] = IN_SQ_STRING,
+ ['u'] = IN_SQ_UCODE0,
+ },
+ [IN_SQ_STRING] = {
+ [1 ... 0xBF] = IN_SQ_STRING,
+ [0xC2 ... 0xF4] = IN_SQ_STRING,
+ ['\\'] = IN_SQ_STRING_ESCAPE,
+ ['\''] = JSON_STRING,
+ },
+
+ /* Zero */
+ [IN_ZERO] = {
+ TERMINAL(JSON_INTEGER),
+ ['0' ... '9'] = IN_ERROR,
+ ['.'] = IN_MANTISSA,
+ },
+
+ /* Float */
+ [IN_DIGITS] = {
+ TERMINAL(JSON_FLOAT),
+ ['0' ... '9'] = IN_DIGITS,
+ },
+
+ [IN_DIGIT] = {
+ ['0' ... '9'] = IN_DIGITS,
+ },
+
+ [IN_EXP_E] = {
+ ['-'] = IN_DIGIT,
+ ['+'] = IN_DIGIT,
+ ['0' ... '9'] = IN_DIGITS,
+ },
+
+ [IN_MANTISSA_DIGITS] = {
+ TERMINAL(JSON_FLOAT),
+ ['0' ... '9'] = IN_MANTISSA_DIGITS,
+ ['e'] = IN_EXP_E,
+ ['E'] = IN_EXP_E,
+ },
+
+ [IN_MANTISSA] = {
+ ['0' ... '9'] = IN_MANTISSA_DIGITS,
+ },
+
+ /* Number */
+ [IN_NONZERO_NUMBER] = {
+ TERMINAL(JSON_INTEGER),
+ ['0' ... '9'] = IN_NONZERO_NUMBER,
+ ['e'] = IN_EXP_E,
+ ['E'] = IN_EXP_E,
+ ['.'] = IN_MANTISSA,
+ },
+
+ [IN_NEG_NONZERO_NUMBER] = {
+ ['0'] = IN_ZERO,
+ ['1' ... '9'] = IN_NONZERO_NUMBER,
+ },
+
+ /* keywords */
+ [IN_KEYWORD] = {
+ TERMINAL(JSON_KEYWORD),
+ ['a' ... 'z'] = IN_KEYWORD,
+ },
+
+ /* whitespace */
+ [IN_WHITESPACE] = {
+ TERMINAL(JSON_SKIP),
+ [' '] = IN_WHITESPACE,
+ ['\t'] = IN_WHITESPACE,
+ ['\r'] = IN_WHITESPACE,
+ ['\n'] = IN_WHITESPACE,
+ },
+
+ /* escape */
+ [IN_ESCAPE_LL] = {
+ ['d'] = JSON_ESCAPE,
+ },
+
+ [IN_ESCAPE_L] = {
+ ['d'] = JSON_ESCAPE,
+ ['l'] = IN_ESCAPE_LL,
+ },
+
+ [IN_ESCAPE_I64] = {
+ ['d'] = JSON_ESCAPE,
+ },
+
+ [IN_ESCAPE_I6] = {
+ ['4'] = IN_ESCAPE_I64,
+ },
+
+ [IN_ESCAPE_I] = {
+ ['6'] = IN_ESCAPE_I6,
+ },
+
+ [IN_ESCAPE] = {
+ ['d'] = JSON_ESCAPE,
+ ['i'] = JSON_ESCAPE,
+ ['p'] = JSON_ESCAPE,
+ ['s'] = JSON_ESCAPE,
+ ['f'] = JSON_ESCAPE,
+ ['l'] = IN_ESCAPE_L,
+ ['I'] = IN_ESCAPE_I,
+ },
+
+ /* top level rule */
+ [IN_START] = {
+ ['"'] = IN_DQ_STRING,
+ ['\''] = IN_SQ_STRING,
+ ['0'] = IN_ZERO,
+ ['1' ... '9'] = IN_NONZERO_NUMBER,
+ ['-'] = IN_NEG_NONZERO_NUMBER,
+ ['{'] = JSON_OPERATOR,
+ ['}'] = JSON_OPERATOR,
+ ['['] = JSON_OPERATOR,
+ [']'] = JSON_OPERATOR,
+ [','] = JSON_OPERATOR,
+ [':'] = JSON_OPERATOR,
+ ['a' ... 'z'] = IN_KEYWORD,
+ ['%'] = IN_ESCAPE,
+ [' '] = IN_WHITESPACE,
+ ['\t'] = IN_WHITESPACE,
+ ['\r'] = IN_WHITESPACE,
+ ['\n'] = IN_WHITESPACE,
+ },
+};
+
+void json_lexer_init(JSONLexer *lexer, JSONLexerEmitter func)
+{
+ lexer->emit = func;
+ lexer->state = IN_START;
+ lexer->token = qstring_new();
+ lexer->x = lexer->y = 0;
+}
+
+static int json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush)
+{
+ int char_consumed, new_state;
+
+ lexer->x++;
+ if (ch == '\n') {
+ lexer->x = 0;
+ lexer->y++;
+ }
+
+ do {
+ new_state = json_lexer[lexer->state][(uint8_t)ch];
+ char_consumed = !TERMINAL_NEEDED_LOOKAHEAD(lexer->state, new_state);
+ if (char_consumed) {
+ qstring_append_chr(lexer->token, ch);
+ }
+
+ switch (new_state) {
+ case JSON_OPERATOR:
+ case JSON_ESCAPE:
+ case JSON_INTEGER:
+ case JSON_FLOAT:
+ case JSON_KEYWORD:
+ case JSON_STRING:
+ lexer->emit(lexer, lexer->token, new_state, lexer->x, lexer->y);
+ /* fall through */
+ case JSON_SKIP:
+ QDECREF(lexer->token);
+ lexer->token = qstring_new();
+ new_state = IN_START;
+ break;
+ case IN_ERROR:
+ /* XXX: To avoid having previous bad input leaving the parser in an
+ * unresponsive state where we consume unpredictable amounts of
+ * subsequent "good" input, percolate this error state up to the
+ * tokenizer/parser by forcing a NULL object to be emitted, then
+ * reset state.
+ *
+ * Also note that this handling is required for reliable channel
+ * negotiation between QMP and the guest agent, since chr(0xFF)
+ * is placed at the beginning of certain events to ensure proper
+ * delivery when the channel is in an unknown state. chr(0xFF) is
+ * never a valid ASCII/UTF-8 sequence, so this should reliably
+ * induce an error/flush state.
+ */
+ lexer->emit(lexer, lexer->token, JSON_ERROR, lexer->x, lexer->y);
+ QDECREF(lexer->token);
+ lexer->token = qstring_new();
+ new_state = IN_START;
+ lexer->state = new_state;
+ return 0;
+ default:
+ break;
+ }
+ lexer->state = new_state;
+ } while (!char_consumed && !flush);
+
+ /* Do not let a single token grow to an arbitrarily large size,
+ * this is a security consideration.
+ */
+ if (lexer->token->length > MAX_TOKEN_SIZE) {
+ lexer->emit(lexer, lexer->token, lexer->state, lexer->x, lexer->y);
+ QDECREF(lexer->token);
+ lexer->token = qstring_new();
+ lexer->state = IN_START;
+ }
+
+ return 0;
+}
+
+int json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size)
+{
+ size_t i;
+
+ for (i = 0; i < size; i++) {
+ int err;
+
+ err = json_lexer_feed_char(lexer, buffer[i], false);
+ if (err < 0) {
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+int json_lexer_flush(JSONLexer *lexer)
+{
+ return lexer->state == IN_START ? 0 : json_lexer_feed_char(lexer, 0, true);
+}
+
+void json_lexer_destroy(JSONLexer *lexer)
+{
+ QDECREF(lexer->token);
+}
diff --git a/contrib/qemu/qobject/json-parser.c b/contrib/qemu/qobject/json-parser.c
new file mode 100644
index 000000000..e7947b340
--- /dev/null
+++ b/contrib/qemu/qobject/json-parser.c
@@ -0,0 +1,724 @@
+/*
+ * JSON Parser
+ *
+ * Copyright IBM, Corp. 2009
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include <stdarg.h>
+
+#include "qemu-common.h"
+#include "qapi/qmp/qstring.h"
+#include "qapi/qmp/qint.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qlist.h"
+#include "qapi/qmp/qfloat.h"
+#include "qapi/qmp/qbool.h"
+#include "qapi/qmp/json-parser.h"
+#include "qapi/qmp/json-lexer.h"
+#include "qapi/qmp/qerror.h"
+
+typedef struct JSONParserContext
+{
+ Error *err;
+ struct {
+ QObject **buf;
+ size_t pos;
+ size_t count;
+ } tokens;
+} JSONParserContext;
+
+#define BUG_ON(cond) assert(!(cond))
+
+/**
+ * TODO
+ *
+ * 0) make errors meaningful again
+ * 1) add geometry information to tokens
+ * 3) should we return a parsed size?
+ * 4) deal with premature EOI
+ */
+
+static QObject *parse_value(JSONParserContext *ctxt, va_list *ap);
+
+/**
+ * Token manipulators
+ *
+ * tokens are dictionaries that contain a type, a string value, and geometry information
+ * about a token identified by the lexer. These are routines that make working with
+ * these objects a bit easier.
+ */
+static const char *token_get_value(QObject *obj)
+{
+ return qdict_get_str(qobject_to_qdict(obj), "token");
+}
+
+static JSONTokenType token_get_type(QObject *obj)
+{
+ return qdict_get_int(qobject_to_qdict(obj), "type");
+}
+
+static int token_is_operator(QObject *obj, char op)
+{
+ const char *val;
+
+ if (token_get_type(obj) != JSON_OPERATOR) {
+ return 0;
+ }
+
+ val = token_get_value(obj);
+
+ return (val[0] == op) && (val[1] == 0);
+}
+
+static int token_is_keyword(QObject *obj, const char *value)
+{
+ if (token_get_type(obj) != JSON_KEYWORD) {
+ return 0;
+ }
+
+ return strcmp(token_get_value(obj), value) == 0;
+}
+
+static int token_is_escape(QObject *obj, const char *value)
+{
+ if (token_get_type(obj) != JSON_ESCAPE) {
+ return 0;
+ }
+
+ return (strcmp(token_get_value(obj), value) == 0);
+}
+
+/**
+ * Error handler
+ */
+static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt,
+ QObject *token, const char *msg, ...)
+{
+ va_list ap;
+ char message[1024];
+ va_start(ap, msg);
+ vsnprintf(message, sizeof(message), msg, ap);
+ va_end(ap);
+ if (ctxt->err) {
+ error_free(ctxt->err);
+ ctxt->err = NULL;
+ }
+ error_set(&ctxt->err, QERR_JSON_PARSE_ERROR, message);
+}
+
+/**
+ * String helpers
+ *
+ * These helpers are used to unescape strings.
+ */
+static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length)
+{
+ if (wchar <= 0x007F) {
+ BUG_ON(buffer_length < 2);
+
+ buffer[0] = wchar & 0x7F;
+ buffer[1] = 0;
+ } else if (wchar <= 0x07FF) {
+ BUG_ON(buffer_length < 3);
+
+ buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F);
+ buffer[1] = 0x80 | (wchar & 0x3F);
+ buffer[2] = 0;
+ } else {
+ BUG_ON(buffer_length < 4);
+
+ buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F);
+ buffer[1] = 0x80 | ((wchar >> 6) & 0x3F);
+ buffer[2] = 0x80 | (wchar & 0x3F);
+ buffer[3] = 0;
+ }
+}
+
+static int hex2decimal(char ch)
+{
+ if (ch >= '0' && ch <= '9') {
+ return (ch - '0');
+ } else if (ch >= 'a' && ch <= 'f') {
+ return 10 + (ch - 'a');
+ } else if (ch >= 'A' && ch <= 'F') {
+ return 10 + (ch - 'A');
+ }
+
+ return -1;
+}
+
+/**
+ * parse_string(): Parse a json string and return a QObject
+ *
+ * string
+ * ""
+ * " chars "
+ * chars
+ * char
+ * char chars
+ * char
+ * any-Unicode-character-
+ * except-"-or-\-or-
+ * control-character
+ * \"
+ * \\
+ * \/
+ * \b
+ * \f
+ * \n
+ * \r
+ * \t
+ * \u four-hex-digits
+ */
+static QString *qstring_from_escaped_str(JSONParserContext *ctxt, QObject *token)
+{
+ const char *ptr = token_get_value(token);
+ QString *str;
+ int double_quote = 1;
+
+ if (*ptr == '"') {
+ double_quote = 1;
+ } else {
+ double_quote = 0;
+ }
+ ptr++;
+
+ str = qstring_new();
+ while (*ptr &&
+ ((double_quote && *ptr != '"') || (!double_quote && *ptr != '\''))) {
+ if (*ptr == '\\') {
+ ptr++;
+
+ switch (*ptr) {
+ case '"':
+ qstring_append(str, "\"");
+ ptr++;
+ break;
+ case '\'':
+ qstring_append(str, "'");
+ ptr++;
+ break;
+ case '\\':
+ qstring_append(str, "\\");
+ ptr++;
+ break;
+ case '/':
+ qstring_append(str, "/");
+ ptr++;
+ break;
+ case 'b':
+ qstring_append(str, "\b");
+ ptr++;
+ break;
+ case 'f':
+ qstring_append(str, "\f");
+ ptr++;
+ break;
+ case 'n':
+ qstring_append(str, "\n");
+ ptr++;
+ break;
+ case 'r':
+ qstring_append(str, "\r");
+ ptr++;
+ break;
+ case 't':
+ qstring_append(str, "\t");
+ ptr++;
+ break;
+ case 'u': {
+ uint16_t unicode_char = 0;
+ char utf8_char[4];
+ int i = 0;
+
+ ptr++;
+
+ for (i = 0; i < 4; i++) {
+ if (qemu_isxdigit(*ptr)) {
+ unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4);
+ } else {
+ parse_error(ctxt, token,
+ "invalid hex escape sequence in string");
+ goto out;
+ }
+ ptr++;
+ }
+
+ wchar_to_utf8(unicode_char, utf8_char, sizeof(utf8_char));
+ qstring_append(str, utf8_char);
+ } break;
+ default:
+ parse_error(ctxt, token, "invalid escape sequence in string");
+ goto out;
+ }
+ } else {
+ char dummy[2];
+
+ dummy[0] = *ptr++;
+ dummy[1] = 0;
+
+ qstring_append(str, dummy);
+ }
+ }
+
+ return str;
+
+out:
+ QDECREF(str);
+ return NULL;
+}
+
+static QObject *parser_context_pop_token(JSONParserContext *ctxt)
+{
+ QObject *token;
+ g_assert(ctxt->tokens.pos < ctxt->tokens.count);
+ token = ctxt->tokens.buf[ctxt->tokens.pos];
+ ctxt->tokens.pos++;
+ return token;
+}
+
+/* Note: parser_context_{peek|pop}_token do not increment the
+ * token object's refcount. In both cases the references will continue
+ * to be tracked and cleaned up in parser_context_free(), so do not
+ * attempt to free the token object.
+ */
+static QObject *parser_context_peek_token(JSONParserContext *ctxt)
+{
+ QObject *token;
+ g_assert(ctxt->tokens.pos < ctxt->tokens.count);
+ token = ctxt->tokens.buf[ctxt->tokens.pos];
+ return token;
+}
+
+static JSONParserContext parser_context_save(JSONParserContext *ctxt)
+{
+ JSONParserContext saved_ctxt = {0};
+ saved_ctxt.tokens.pos = ctxt->tokens.pos;
+ saved_ctxt.tokens.count = ctxt->tokens.count;
+ saved_ctxt.tokens.buf = ctxt->tokens.buf;
+ return saved_ctxt;
+}
+
+static void parser_context_restore(JSONParserContext *ctxt,
+ JSONParserContext saved_ctxt)
+{
+ ctxt->tokens.pos = saved_ctxt.tokens.pos;
+ ctxt->tokens.count = saved_ctxt.tokens.count;
+ ctxt->tokens.buf = saved_ctxt.tokens.buf;
+}
+
+static void tokens_append_from_iter(QObject *obj, void *opaque)
+{
+ JSONParserContext *ctxt = opaque;
+ g_assert(ctxt->tokens.pos < ctxt->tokens.count);
+ ctxt->tokens.buf[ctxt->tokens.pos++] = obj;
+ qobject_incref(obj);
+}
+
+static JSONParserContext *parser_context_new(QList *tokens)
+{
+ JSONParserContext *ctxt;
+ size_t count;
+
+ if (!tokens) {
+ return NULL;
+ }
+
+ count = qlist_size(tokens);
+ if (count == 0) {
+ return NULL;
+ }
+
+ ctxt = g_malloc0(sizeof(JSONParserContext));
+ ctxt->tokens.pos = 0;
+ ctxt->tokens.count = count;
+ ctxt->tokens.buf = g_malloc(count * sizeof(QObject *));
+ qlist_iter(tokens, tokens_append_from_iter, ctxt);
+ ctxt->tokens.pos = 0;
+
+ return ctxt;
+}
+
+/* to support error propagation, ctxt->err must be freed separately */
+static void parser_context_free(JSONParserContext *ctxt)
+{
+ int i;
+ if (ctxt) {
+ for (i = 0; i < ctxt->tokens.count; i++) {
+ qobject_decref(ctxt->tokens.buf[i]);
+ }
+ g_free(ctxt->tokens.buf);
+ g_free(ctxt);
+ }
+}
+
+/**
+ * Parsing rules
+ */
+static int parse_pair(JSONParserContext *ctxt, QDict *dict, va_list *ap)
+{
+ QObject *key = NULL, *token = NULL, *value, *peek;
+ JSONParserContext saved_ctxt = parser_context_save(ctxt);
+
+ peek = parser_context_peek_token(ctxt);
+ if (peek == NULL) {
+ parse_error(ctxt, NULL, "premature EOI");
+ goto out;
+ }
+
+ key = parse_value(ctxt, ap);
+ if (!key || qobject_type(key) != QTYPE_QSTRING) {
+ parse_error(ctxt, peek, "key is not a string in object");
+ goto out;
+ }
+
+ token = parser_context_pop_token(ctxt);
+ if (token == NULL) {
+ parse_error(ctxt, NULL, "premature EOI");
+ goto out;
+ }
+
+ if (!token_is_operator(token, ':')) {
+ parse_error(ctxt, token, "missing : in object pair");
+ goto out;
+ }
+
+ value = parse_value(ctxt, ap);
+ if (value == NULL) {
+ parse_error(ctxt, token, "Missing value in dict");
+ goto out;
+ }
+
+ qdict_put_obj(dict, qstring_get_str(qobject_to_qstring(key)), value);
+
+ qobject_decref(key);
+
+ return 0;
+
+out:
+ parser_context_restore(ctxt, saved_ctxt);
+ qobject_decref(key);
+
+ return -1;
+}
+
+static QObject *parse_object(JSONParserContext *ctxt, va_list *ap)
+{
+ QDict *dict = NULL;
+ QObject *token, *peek;
+ JSONParserContext saved_ctxt = parser_context_save(ctxt);
+
+ token = parser_context_pop_token(ctxt);
+ if (token == NULL) {
+ goto out;
+ }
+
+ if (!token_is_operator(token, '{')) {
+ goto out;
+ }
+ token = NULL;
+
+ dict = qdict_new();
+
+ peek = parser_context_peek_token(ctxt);
+ if (peek == NULL) {
+ parse_error(ctxt, NULL, "premature EOI");
+ goto out;
+ }
+
+ if (!token_is_operator(peek, '}')) {
+ if (parse_pair(ctxt, dict, ap) == -1) {
+ goto out;
+ }
+
+ token = parser_context_pop_token(ctxt);
+ if (token == NULL) {
+ parse_error(ctxt, NULL, "premature EOI");
+ goto out;
+ }
+
+ while (!token_is_operator(token, '}')) {
+ if (!token_is_operator(token, ',')) {
+ parse_error(ctxt, token, "expected separator in dict");
+ goto out;
+ }
+ token = NULL;
+
+ if (parse_pair(ctxt, dict, ap) == -1) {
+ goto out;
+ }
+
+ token = parser_context_pop_token(ctxt);
+ if (token == NULL) {
+ parse_error(ctxt, NULL, "premature EOI");
+ goto out;
+ }
+ }
+ token = NULL;
+ } else {
+ token = parser_context_pop_token(ctxt);
+ token = NULL;
+ }
+
+ return QOBJECT(dict);
+
+out:
+ parser_context_restore(ctxt, saved_ctxt);
+ QDECREF(dict);
+ return NULL;
+}
+
+static QObject *parse_array(JSONParserContext *ctxt, va_list *ap)
+{
+ QList *list = NULL;
+ QObject *token, *peek;
+ JSONParserContext saved_ctxt = parser_context_save(ctxt);
+
+ token = parser_context_pop_token(ctxt);
+ if (token == NULL) {
+ goto out;
+ }
+
+ if (!token_is_operator(token, '[')) {
+ token = NULL;
+ goto out;
+ }
+ token = NULL;
+
+ list = qlist_new();
+
+ peek = parser_context_peek_token(ctxt);
+ if (peek == NULL) {
+ parse_error(ctxt, NULL, "premature EOI");
+ goto out;
+ }
+
+ if (!token_is_operator(peek, ']')) {
+ QObject *obj;
+
+ obj = parse_value(ctxt, ap);
+ if (obj == NULL) {
+ parse_error(ctxt, token, "expecting value");
+ goto out;
+ }
+
+ qlist_append_obj(list, obj);
+
+ token = parser_context_pop_token(ctxt);
+ if (token == NULL) {
+ parse_error(ctxt, NULL, "premature EOI");
+ goto out;
+ }
+
+ while (!token_is_operator(token, ']')) {
+ if (!token_is_operator(token, ',')) {
+ parse_error(ctxt, token, "expected separator in list");
+ goto out;
+ }
+
+ token = NULL;
+
+ obj = parse_value(ctxt, ap);
+ if (obj == NULL) {
+ parse_error(ctxt, token, "expecting value");
+ goto out;
+ }
+
+ qlist_append_obj(list, obj);
+
+ token = parser_context_pop_token(ctxt);
+ if (token == NULL) {
+ parse_error(ctxt, NULL, "premature EOI");
+ goto out;
+ }
+ }
+
+ token = NULL;
+ } else {
+ token = parser_context_pop_token(ctxt);
+ token = NULL;
+ }
+
+ return QOBJECT(list);
+
+out:
+ parser_context_restore(ctxt, saved_ctxt);
+ QDECREF(list);
+ return NULL;
+}
+
+static QObject *parse_keyword(JSONParserContext *ctxt)
+{
+ QObject *token, *ret;
+ JSONParserContext saved_ctxt = parser_context_save(ctxt);
+
+ token = parser_context_pop_token(ctxt);
+ if (token == NULL) {
+ goto out;
+ }
+
+ if (token_get_type(token) != JSON_KEYWORD) {
+ goto out;
+ }
+
+ if (token_is_keyword(token, "true")) {
+ ret = QOBJECT(qbool_from_int(true));
+ } else if (token_is_keyword(token, "false")) {
+ ret = QOBJECT(qbool_from_int(false));
+ } else {
+ parse_error(ctxt, token, "invalid keyword `%s'", token_get_value(token));
+ goto out;
+ }
+
+ return ret;
+
+out:
+ parser_context_restore(ctxt, saved_ctxt);
+
+ return NULL;
+}
+
+static QObject *parse_escape(JSONParserContext *ctxt, va_list *ap)
+{
+ QObject *token = NULL, *obj;
+ JSONParserContext saved_ctxt = parser_context_save(ctxt);
+
+ if (ap == NULL) {
+ goto out;
+ }
+
+ token = parser_context_pop_token(ctxt);
+ if (token == NULL) {
+ goto out;
+ }
+
+ if (token_is_escape(token, "%p")) {
+ obj = va_arg(*ap, QObject *);
+ } else if (token_is_escape(token, "%i")) {
+ obj = QOBJECT(qbool_from_int(va_arg(*ap, int)));
+ } else if (token_is_escape(token, "%d")) {
+ obj = QOBJECT(qint_from_int(va_arg(*ap, int)));
+ } else if (token_is_escape(token, "%ld")) {
+ obj = QOBJECT(qint_from_int(va_arg(*ap, long)));
+ } else if (token_is_escape(token, "%lld") ||
+ token_is_escape(token, "%I64d")) {
+ obj = QOBJECT(qint_from_int(va_arg(*ap, long long)));
+ } else if (token_is_escape(token, "%s")) {
+ obj = QOBJECT(qstring_from_str(va_arg(*ap, const char *)));
+ } else if (token_is_escape(token, "%f")) {
+ obj = QOBJECT(qfloat_from_double(va_arg(*ap, double)));
+ } else {
+ goto out;
+ }
+
+ return obj;
+
+out:
+ parser_context_restore(ctxt, saved_ctxt);
+
+ return NULL;
+}
+
+static QObject *parse_literal(JSONParserContext *ctxt)
+{
+ QObject *token, *obj;
+ JSONParserContext saved_ctxt = parser_context_save(ctxt);
+
+ token = parser_context_pop_token(ctxt);
+ if (token == NULL) {
+ goto out;
+ }
+
+ switch (token_get_type(token)) {
+ case JSON_STRING:
+ obj = QOBJECT(qstring_from_escaped_str(ctxt, token));
+ break;
+ case JSON_INTEGER: {
+ /* A possibility exists that this is a whole-valued float where the
+ * fractional part was left out due to being 0 (.0). It's not a big
+ * deal to treat these as ints in the parser, so long as users of the
+ * resulting QObject know to expect a QInt in place of a QFloat in
+ * cases like these.
+ *
+ * However, in some cases these values will overflow/underflow a
+ * QInt/int64 container, thus we should assume these are to be handled
+ * as QFloats/doubles rather than silently changing their values.
+ *
+ * strtoll() indicates these instances by setting errno to ERANGE
+ */
+ int64_t value;
+
+ errno = 0; /* strtoll doesn't set errno on success */
+ value = strtoll(token_get_value(token), NULL, 10);
+ if (errno != ERANGE) {
+ obj = QOBJECT(qint_from_int(value));
+ break;
+ }
+ /* fall through to JSON_FLOAT */
+ }
+ case JSON_FLOAT:
+ /* FIXME dependent on locale */
+ obj = QOBJECT(qfloat_from_double(strtod(token_get_value(token), NULL)));
+ break;
+ default:
+ goto out;
+ }
+
+ return obj;
+
+out:
+ parser_context_restore(ctxt, saved_ctxt);
+
+ return NULL;
+}
+
+static QObject *parse_value(JSONParserContext *ctxt, va_list *ap)
+{
+ QObject *obj;
+
+ obj = parse_object(ctxt, ap);
+ if (obj == NULL) {
+ obj = parse_array(ctxt, ap);
+ }
+ if (obj == NULL) {
+ obj = parse_escape(ctxt, ap);
+ }
+ if (obj == NULL) {
+ obj = parse_keyword(ctxt);
+ }
+ if (obj == NULL) {
+ obj = parse_literal(ctxt);
+ }
+
+ return obj;
+}
+
+QObject *json_parser_parse(QList *tokens, va_list *ap)
+{
+ return json_parser_parse_err(tokens, ap, NULL);
+}
+
+QObject *json_parser_parse_err(QList *tokens, va_list *ap, Error **errp)
+{
+ JSONParserContext *ctxt = parser_context_new(tokens);
+ QObject *result;
+
+ if (!ctxt) {
+ return NULL;
+ }
+
+ result = parse_value(ctxt, ap);
+
+ error_propagate(errp, ctxt->err);
+
+ parser_context_free(ctxt);
+
+ return result;
+}
diff --git a/contrib/qemu/qobject/json-streamer.c b/contrib/qemu/qobject/json-streamer.c
new file mode 100644
index 000000000..1b2f9b1d1
--- /dev/null
+++ b/contrib/qemu/qobject/json-streamer.c
@@ -0,0 +1,122 @@
+/*
+ * JSON streaming support
+ *
+ * Copyright IBM, Corp. 2009
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qapi/qmp/qlist.h"
+#include "qapi/qmp/qint.h"
+#include "qapi/qmp/qdict.h"
+#include "qemu-common.h"
+#include "qapi/qmp/json-lexer.h"
+#include "qapi/qmp/json-streamer.h"
+
+#define MAX_TOKEN_SIZE (64ULL << 20)
+#define MAX_NESTING (1ULL << 10)
+
+static void json_message_process_token(JSONLexer *lexer, QString *token, JSONTokenType type, int x, int y)
+{
+ JSONMessageParser *parser = container_of(lexer, JSONMessageParser, lexer);
+ QDict *dict;
+
+ if (type == JSON_OPERATOR) {
+ switch (qstring_get_str(token)[0]) {
+ case '{':
+ parser->brace_count++;
+ break;
+ case '}':
+ parser->brace_count--;
+ break;
+ case '[':
+ parser->bracket_count++;
+ break;
+ case ']':
+ parser->bracket_count--;
+ break;
+ default:
+ break;
+ }
+ }
+
+ dict = qdict_new();
+ qdict_put(dict, "type", qint_from_int(type));
+ QINCREF(token);
+ qdict_put(dict, "token", token);
+ qdict_put(dict, "x", qint_from_int(x));
+ qdict_put(dict, "y", qint_from_int(y));
+
+ parser->token_size += token->length;
+
+ qlist_append(parser->tokens, dict);
+
+ if (type == JSON_ERROR) {
+ goto out_emit_bad;
+ } else if (parser->brace_count < 0 ||
+ parser->bracket_count < 0 ||
+ (parser->brace_count == 0 &&
+ parser->bracket_count == 0)) {
+ goto out_emit;
+ } else if (parser->token_size > MAX_TOKEN_SIZE ||
+ parser->bracket_count > MAX_NESTING ||
+ parser->brace_count > MAX_NESTING) {
+ /* Security consideration, we limit total memory allocated per object
+ * and the maximum recursion depth that a message can force.
+ */
+ goto out_emit;
+ }
+
+ return;
+
+out_emit_bad:
+ /* clear out token list and tell the parser to emit and error
+ * indication by passing it a NULL list
+ */
+ QDECREF(parser->tokens);
+ parser->tokens = NULL;
+out_emit:
+ /* send current list of tokens to parser and reset tokenizer */
+ parser->brace_count = 0;
+ parser->bracket_count = 0;
+ parser->emit(parser, parser->tokens);
+ if (parser->tokens) {
+ QDECREF(parser->tokens);
+ }
+ parser->tokens = qlist_new();
+ parser->token_size = 0;
+}
+
+void json_message_parser_init(JSONMessageParser *parser,
+ void (*func)(JSONMessageParser *, QList *))
+{
+ parser->emit = func;
+ parser->brace_count = 0;
+ parser->bracket_count = 0;
+ parser->tokens = qlist_new();
+ parser->token_size = 0;
+
+ json_lexer_init(&parser->lexer, json_message_process_token);
+}
+
+int json_message_parser_feed(JSONMessageParser *parser,
+ const char *buffer, size_t size)
+{
+ return json_lexer_feed(&parser->lexer, buffer, size);
+}
+
+int json_message_parser_flush(JSONMessageParser *parser)
+{
+ return json_lexer_flush(&parser->lexer);
+}
+
+void json_message_parser_destroy(JSONMessageParser *parser)
+{
+ json_lexer_destroy(&parser->lexer);
+ QDECREF(parser->tokens);
+}
diff --git a/contrib/qemu/qobject/qbool.c b/contrib/qemu/qobject/qbool.c
new file mode 100644
index 000000000..a3d2afa82
--- /dev/null
+++ b/contrib/qemu/qobject/qbool.c
@@ -0,0 +1,68 @@
+/*
+ * QBool Module
+ *
+ * Copyright IBM, Corp. 2009
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qapi/qmp/qbool.h"
+#include "qapi/qmp/qobject.h"
+#include "qemu-common.h"
+
+static void qbool_destroy_obj(QObject *obj);
+
+static const QType qbool_type = {
+ .code = QTYPE_QBOOL,
+ .destroy = qbool_destroy_obj,
+};
+
+/**
+ * qbool_from_int(): Create a new QBool from an int
+ *
+ * Return strong reference.
+ */
+QBool *qbool_from_int(int value)
+{
+ QBool *qb;
+
+ qb = g_malloc(sizeof(*qb));
+ qb->value = value;
+ QOBJECT_INIT(qb, &qbool_type);
+
+ return qb;
+}
+
+/**
+ * qbool_get_int(): Get the stored int
+ */
+int qbool_get_int(const QBool *qb)
+{
+ return qb->value;
+}
+
+/**
+ * qobject_to_qbool(): Convert a QObject into a QBool
+ */
+QBool *qobject_to_qbool(const QObject *obj)
+{
+ if (qobject_type(obj) != QTYPE_QBOOL)
+ return NULL;
+
+ return container_of(obj, QBool, base);
+}
+
+/**
+ * qbool_destroy_obj(): Free all memory allocated by a
+ * QBool object
+ */
+static void qbool_destroy_obj(QObject *obj)
+{
+ assert(obj != NULL);
+ g_free(qobject_to_qbool(obj));
+}
diff --git a/contrib/qemu/qobject/qdict.c b/contrib/qemu/qobject/qdict.c
new file mode 100644
index 000000000..ed381f9a5
--- /dev/null
+++ b/contrib/qemu/qobject/qdict.c
@@ -0,0 +1,478 @@
+/*
+ * QDict Module
+ *
+ * Copyright (C) 2009 Red Hat Inc.
+ *
+ * Authors:
+ * Luiz Capitulino <lcapitulino@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+
+#include "qapi/qmp/qint.h"
+#include "qapi/qmp/qfloat.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qbool.h"
+#include "qapi/qmp/qstring.h"
+#include "qapi/qmp/qobject.h"
+#include "qemu/queue.h"
+#include "qemu-common.h"
+
+static void qdict_destroy_obj(QObject *obj);
+
+static const QType qdict_type = {
+ .code = QTYPE_QDICT,
+ .destroy = qdict_destroy_obj,
+};
+
+/**
+ * qdict_new(): Create a new QDict
+ *
+ * Return strong reference.
+ */
+QDict *qdict_new(void)
+{
+ QDict *qdict;
+
+ qdict = g_malloc0(sizeof(*qdict));
+ QOBJECT_INIT(qdict, &qdict_type);
+
+ return qdict;
+}
+
+/**
+ * qobject_to_qdict(): Convert a QObject into a QDict
+ */
+QDict *qobject_to_qdict(const QObject *obj)
+{
+ if (qobject_type(obj) != QTYPE_QDICT)
+ return NULL;
+
+ return container_of(obj, QDict, base);
+}
+
+/**
+ * tdb_hash(): based on the hash agorithm from gdbm, via tdb
+ * (from module-init-tools)
+ */
+static unsigned int tdb_hash(const char *name)
+{
+ unsigned value; /* Used to compute the hash value. */
+ unsigned i; /* Used to cycle through random values. */
+
+ /* Set the initial value from the key size. */
+ for (value = 0x238F13AF * strlen(name), i=0; name[i]; i++)
+ value = (value + (((const unsigned char *)name)[i] << (i*5 % 24)));
+
+ return (1103515243 * value + 12345);
+}
+
+/**
+ * alloc_entry(): allocate a new QDictEntry
+ */
+static QDictEntry *alloc_entry(const char *key, QObject *value)
+{
+ QDictEntry *entry;
+
+ entry = g_malloc0(sizeof(*entry));
+ entry->key = g_strdup(key);
+ entry->value = value;
+
+ return entry;
+}
+
+/**
+ * qdict_entry_value(): Return qdict entry value
+ *
+ * Return weak reference.
+ */
+QObject *qdict_entry_value(const QDictEntry *entry)
+{
+ return entry->value;
+}
+
+/**
+ * qdict_entry_key(): Return qdict entry key
+ *
+ * Return a *pointer* to the string, it has to be duplicated before being
+ * stored.
+ */
+const char *qdict_entry_key(const QDictEntry *entry)
+{
+ return entry->key;
+}
+
+/**
+ * qdict_find(): List lookup function
+ */
+static QDictEntry *qdict_find(const QDict *qdict,
+ const char *key, unsigned int bucket)
+{
+ QDictEntry *entry;
+
+ QLIST_FOREACH(entry, &qdict->table[bucket], next)
+ if (!strcmp(entry->key, key))
+ return entry;
+
+ return NULL;
+}
+
+/**
+ * qdict_put_obj(): Put a new QObject into the dictionary
+ *
+ * Insert the pair 'key:value' into 'qdict', if 'key' already exists
+ * its 'value' will be replaced.
+ *
+ * This is done by freeing the reference to the stored QObject and
+ * storing the new one in the same entry.
+ *
+ * NOTE: ownership of 'value' is transferred to the QDict
+ */
+void qdict_put_obj(QDict *qdict, const char *key, QObject *value)
+{
+ unsigned int bucket;
+ QDictEntry *entry;
+
+ bucket = tdb_hash(key) % QDICT_BUCKET_MAX;
+ entry = qdict_find(qdict, key, bucket);
+ if (entry) {
+ /* replace key's value */
+ qobject_decref(entry->value);
+ entry->value = value;
+ } else {
+ /* allocate a new entry */
+ entry = alloc_entry(key, value);
+ QLIST_INSERT_HEAD(&qdict->table[bucket], entry, next);
+ qdict->size++;
+ }
+}
+
+/**
+ * qdict_get(): Lookup for a given 'key'
+ *
+ * Return a weak reference to the QObject associated with 'key' if
+ * 'key' is present in the dictionary, NULL otherwise.
+ */
+QObject *qdict_get(const QDict *qdict, const char *key)
+{
+ QDictEntry *entry;
+
+ entry = qdict_find(qdict, key, tdb_hash(key) % QDICT_BUCKET_MAX);
+ return (entry == NULL ? NULL : entry->value);
+}
+
+/**
+ * qdict_haskey(): Check if 'key' exists
+ *
+ * Return 1 if 'key' exists in the dict, 0 otherwise
+ */
+int qdict_haskey(const QDict *qdict, const char *key)
+{
+ unsigned int bucket = tdb_hash(key) % QDICT_BUCKET_MAX;
+ return (qdict_find(qdict, key, bucket) == NULL ? 0 : 1);
+}
+
+/**
+ * qdict_size(): Return the size of the dictionary
+ */
+size_t qdict_size(const QDict *qdict)
+{
+ return qdict->size;
+}
+
+/**
+ * qdict_get_obj(): Get a QObject of a specific type
+ */
+static QObject *qdict_get_obj(const QDict *qdict, const char *key,
+ qtype_code type)
+{
+ QObject *obj;
+
+ obj = qdict_get(qdict, key);
+ assert(obj != NULL);
+ assert(qobject_type(obj) == type);
+
+ return obj;
+}
+
+/**
+ * qdict_get_double(): Get an number mapped by 'key'
+ *
+ * This function assumes that 'key' exists and it stores a
+ * QFloat or QInt object.
+ *
+ * Return number mapped by 'key'.
+ */
+double qdict_get_double(const QDict *qdict, const char *key)
+{
+ QObject *obj = qdict_get(qdict, key);
+
+ assert(obj);
+ switch (qobject_type(obj)) {
+ case QTYPE_QFLOAT:
+ return qfloat_get_double(qobject_to_qfloat(obj));
+ case QTYPE_QINT:
+ return qint_get_int(qobject_to_qint(obj));
+ default:
+ abort();
+ }
+}
+
+/**
+ * qdict_get_int(): Get an integer mapped by 'key'
+ *
+ * This function assumes that 'key' exists and it stores a
+ * QInt object.
+ *
+ * Return integer mapped by 'key'.
+ */
+int64_t qdict_get_int(const QDict *qdict, const char *key)
+{
+ QObject *obj = qdict_get_obj(qdict, key, QTYPE_QINT);
+ return qint_get_int(qobject_to_qint(obj));
+}
+
+/**
+ * qdict_get_bool(): Get a bool mapped by 'key'
+ *
+ * This function assumes that 'key' exists and it stores a
+ * QBool object.
+ *
+ * Return bool mapped by 'key'.
+ */
+int qdict_get_bool(const QDict *qdict, const char *key)
+{
+ QObject *obj = qdict_get_obj(qdict, key, QTYPE_QBOOL);
+ return qbool_get_int(qobject_to_qbool(obj));
+}
+
+/**
+ * qdict_get_qlist(): Get the QList mapped by 'key'
+ *
+ * This function assumes that 'key' exists and it stores a
+ * QList object.
+ *
+ * Return QList mapped by 'key'.
+ */
+QList *qdict_get_qlist(const QDict *qdict, const char *key)
+{
+ return qobject_to_qlist(qdict_get_obj(qdict, key, QTYPE_QLIST));
+}
+
+/**
+ * qdict_get_qdict(): Get the QDict mapped by 'key'
+ *
+ * This function assumes that 'key' exists and it stores a
+ * QDict object.
+ *
+ * Return QDict mapped by 'key'.
+ */
+QDict *qdict_get_qdict(const QDict *qdict, const char *key)
+{
+ return qobject_to_qdict(qdict_get_obj(qdict, key, QTYPE_QDICT));
+}
+
+/**
+ * qdict_get_str(): Get a pointer to the stored string mapped
+ * by 'key'
+ *
+ * This function assumes that 'key' exists and it stores a
+ * QString object.
+ *
+ * Return pointer to the string mapped by 'key'.
+ */
+const char *qdict_get_str(const QDict *qdict, const char *key)
+{
+ QObject *obj = qdict_get_obj(qdict, key, QTYPE_QSTRING);
+ return qstring_get_str(qobject_to_qstring(obj));
+}
+
+/**
+ * qdict_get_try_int(): Try to get integer mapped by 'key'
+ *
+ * Return integer mapped by 'key', if it is not present in
+ * the dictionary or if the stored object is not of QInt type
+ * 'def_value' will be returned.
+ */
+int64_t qdict_get_try_int(const QDict *qdict, const char *key,
+ int64_t def_value)
+{
+ QObject *obj;
+
+ obj = qdict_get(qdict, key);
+ if (!obj || qobject_type(obj) != QTYPE_QINT)
+ return def_value;
+
+ return qint_get_int(qobject_to_qint(obj));
+}
+
+/**
+ * qdict_get_try_bool(): Try to get a bool mapped by 'key'
+ *
+ * Return bool mapped by 'key', if it is not present in the
+ * dictionary or if the stored object is not of QBool type
+ * 'def_value' will be returned.
+ */
+int qdict_get_try_bool(const QDict *qdict, const char *key, int def_value)
+{
+ QObject *obj;
+
+ obj = qdict_get(qdict, key);
+ if (!obj || qobject_type(obj) != QTYPE_QBOOL)
+ return def_value;
+
+ return qbool_get_int(qobject_to_qbool(obj));
+}
+
+/**
+ * qdict_get_try_str(): Try to get a pointer to the stored string
+ * mapped by 'key'
+ *
+ * Return a pointer to the string mapped by 'key', if it is not present
+ * in the dictionary or if the stored object is not of QString type
+ * NULL will be returned.
+ */
+const char *qdict_get_try_str(const QDict *qdict, const char *key)
+{
+ QObject *obj;
+
+ obj = qdict_get(qdict, key);
+ if (!obj || qobject_type(obj) != QTYPE_QSTRING)
+ return NULL;
+
+ return qstring_get_str(qobject_to_qstring(obj));
+}
+
+/**
+ * qdict_iter(): Iterate over all the dictionary's stored values.
+ *
+ * This function allows the user to provide an iterator, which will be
+ * called for each stored value in the dictionary.
+ */
+void qdict_iter(const QDict *qdict,
+ void (*iter)(const char *key, QObject *obj, void *opaque),
+ void *opaque)
+{
+ int i;
+ QDictEntry *entry;
+
+ for (i = 0; i < QDICT_BUCKET_MAX; i++) {
+ QLIST_FOREACH(entry, &qdict->table[i], next)
+ iter(entry->key, entry->value, opaque);
+ }
+}
+
+static QDictEntry *qdict_next_entry(const QDict *qdict, int first_bucket)
+{
+ int i;
+
+ for (i = first_bucket; i < QDICT_BUCKET_MAX; i++) {
+ if (!QLIST_EMPTY(&qdict->table[i])) {
+ return QLIST_FIRST(&qdict->table[i]);
+ }
+ }
+
+ return NULL;
+}
+
+/**
+ * qdict_first(): Return first qdict entry for iteration.
+ */
+const QDictEntry *qdict_first(const QDict *qdict)
+{
+ return qdict_next_entry(qdict, 0);
+}
+
+/**
+ * qdict_next(): Return next qdict entry in an iteration.
+ */
+const QDictEntry *qdict_next(const QDict *qdict, const QDictEntry *entry)
+{
+ QDictEntry *ret;
+
+ ret = QLIST_NEXT(entry, next);
+ if (!ret) {
+ unsigned int bucket = tdb_hash(entry->key) % QDICT_BUCKET_MAX;
+ ret = qdict_next_entry(qdict, bucket + 1);
+ }
+
+ return ret;
+}
+
+/**
+ * qdict_clone_shallow(): Clones a given QDict. Its entries are not copied, but
+ * another reference is added.
+ */
+QDict *qdict_clone_shallow(const QDict *src)
+{
+ QDict *dest;
+ QDictEntry *entry;
+ int i;
+
+ dest = qdict_new();
+
+ for (i = 0; i < QDICT_BUCKET_MAX; i++) {
+ QLIST_FOREACH(entry, &src->table[i], next) {
+ qobject_incref(entry->value);
+ qdict_put_obj(dest, entry->key, entry->value);
+ }
+ }
+
+ return dest;
+}
+
+/**
+ * qentry_destroy(): Free all the memory allocated by a QDictEntry
+ */
+static void qentry_destroy(QDictEntry *e)
+{
+ assert(e != NULL);
+ assert(e->key != NULL);
+ assert(e->value != NULL);
+
+ qobject_decref(e->value);
+ g_free(e->key);
+ g_free(e);
+}
+
+/**
+ * qdict_del(): Delete a 'key:value' pair from the dictionary
+ *
+ * This will destroy all data allocated by this entry.
+ */
+void qdict_del(QDict *qdict, const char *key)
+{
+ QDictEntry *entry;
+
+ entry = qdict_find(qdict, key, tdb_hash(key) % QDICT_BUCKET_MAX);
+ if (entry) {
+ QLIST_REMOVE(entry, next);
+ qentry_destroy(entry);
+ qdict->size--;
+ }
+}
+
+/**
+ * qdict_destroy_obj(): Free all the memory allocated by a QDict
+ */
+static void qdict_destroy_obj(QObject *obj)
+{
+ int i;
+ QDict *qdict;
+
+ assert(obj != NULL);
+ qdict = qobject_to_qdict(obj);
+
+ for (i = 0; i < QDICT_BUCKET_MAX; i++) {
+ QDictEntry *entry = QLIST_FIRST(&qdict->table[i]);
+ while (entry) {
+ QDictEntry *tmp = QLIST_NEXT(entry, next);
+ QLIST_REMOVE(entry, next);
+ qentry_destroy(entry);
+ entry = tmp;
+ }
+ }
+
+ g_free(qdict);
+}
diff --git a/contrib/qemu/qobject/qerror.c b/contrib/qemu/qobject/qerror.c
new file mode 100644
index 000000000..3aee1cf6a
--- /dev/null
+++ b/contrib/qemu/qobject/qerror.c
@@ -0,0 +1,156 @@
+/*
+ * QError Module
+ *
+ * Copyright (C) 2009 Red Hat Inc.
+ *
+ * Authors:
+ * Luiz Capitulino <lcapitulino@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+
+#include "monitor/monitor.h"
+#include "qapi/qmp/qjson.h"
+#include "qapi/qmp/qerror.h"
+#include "qemu-common.h"
+
+static void qerror_destroy_obj(QObject *obj);
+
+static const QType qerror_type = {
+ .code = QTYPE_QERROR,
+ .destroy = qerror_destroy_obj,
+};
+
+/**
+ * qerror_new(): Create a new QError
+ *
+ * Return strong reference.
+ */
+static QError *qerror_new(void)
+{
+ QError *qerr;
+
+ qerr = g_malloc0(sizeof(*qerr));
+ QOBJECT_INIT(qerr, &qerror_type);
+
+ return qerr;
+}
+
+/**
+ * qerror_from_info(): Create a new QError from error information
+ *
+ * Return strong reference.
+ */
+static QError *qerror_from_info(ErrorClass err_class, const char *fmt,
+ va_list *va)
+{
+ QError *qerr;
+
+ qerr = qerror_new();
+ loc_save(&qerr->loc);
+
+ qerr->err_msg = g_strdup_vprintf(fmt, *va);
+ qerr->err_class = err_class;
+
+ return qerr;
+}
+
+/**
+ * qerror_human(): Format QError data into human-readable string.
+ */
+QString *qerror_human(const QError *qerror)
+{
+ return qstring_from_str(qerror->err_msg);
+}
+
+/**
+ * qerror_print(): Print QError data
+ *
+ * This function will print the member 'desc' of the specified QError object,
+ * it uses error_report() for this, so that the output is routed to the right
+ * place (ie. stderr or Monitor's device).
+ */
+static void qerror_print(QError *qerror)
+{
+ QString *qstring = qerror_human(qerror);
+ loc_push_restore(&qerror->loc);
+ error_report("%s", qstring_get_str(qstring));
+ loc_pop(&qerror->loc);
+ QDECREF(qstring);
+}
+
+void qerror_report(ErrorClass eclass, const char *fmt, ...)
+{
+ va_list va;
+ QError *qerror;
+
+ va_start(va, fmt);
+ qerror = qerror_from_info(eclass, fmt, &va);
+ va_end(va);
+
+ if (monitor_cur_is_qmp()) {
+ monitor_set_error(cur_mon, qerror);
+ } else {
+ qerror_print(qerror);
+ QDECREF(qerror);
+ }
+}
+
+/* Evil... */
+struct Error
+{
+ char *msg;
+ ErrorClass err_class;
+};
+
+void qerror_report_err(Error *err)
+{
+ QError *qerr;
+
+ qerr = qerror_new();
+ loc_save(&qerr->loc);
+ qerr->err_msg = g_strdup(err->msg);
+ qerr->err_class = err->err_class;
+
+ if (monitor_cur_is_qmp()) {
+ monitor_set_error(cur_mon, qerr);
+ } else {
+ qerror_print(qerr);
+ QDECREF(qerr);
+ }
+}
+
+void assert_no_error(Error *err)
+{
+ if (err) {
+ qerror_report_err(err);
+ abort();
+ }
+}
+
+/**
+ * qobject_to_qerror(): Convert a QObject into a QError
+ */
+static QError *qobject_to_qerror(const QObject *obj)
+{
+ if (qobject_type(obj) != QTYPE_QERROR) {
+ return NULL;
+ }
+
+ return container_of(obj, QError, base);
+}
+
+/**
+ * qerror_destroy_obj(): Free all memory allocated by a QError
+ */
+static void qerror_destroy_obj(QObject *obj)
+{
+ QError *qerr;
+
+ assert(obj != NULL);
+ qerr = qobject_to_qerror(obj);
+
+ g_free(qerr->err_msg);
+ g_free(qerr);
+}
diff --git a/contrib/qemu/qobject/qfloat.c b/contrib/qemu/qobject/qfloat.c
new file mode 100644
index 000000000..7de0992db
--- /dev/null
+++ b/contrib/qemu/qobject/qfloat.c
@@ -0,0 +1,68 @@
+/*
+ * QFloat Module
+ *
+ * Copyright IBM, Corp. 2009
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qapi/qmp/qfloat.h"
+#include "qapi/qmp/qobject.h"
+#include "qemu-common.h"
+
+static void qfloat_destroy_obj(QObject *obj);
+
+static const QType qfloat_type = {
+ .code = QTYPE_QFLOAT,
+ .destroy = qfloat_destroy_obj,
+};
+
+/**
+ * qfloat_from_int(): Create a new QFloat from a float
+ *
+ * Return strong reference.
+ */
+QFloat *qfloat_from_double(double value)
+{
+ QFloat *qf;
+
+ qf = g_malloc(sizeof(*qf));
+ qf->value = value;
+ QOBJECT_INIT(qf, &qfloat_type);
+
+ return qf;
+}
+
+/**
+ * qfloat_get_double(): Get the stored float
+ */
+double qfloat_get_double(const QFloat *qf)
+{
+ return qf->value;
+}
+
+/**
+ * qobject_to_qfloat(): Convert a QObject into a QFloat
+ */
+QFloat *qobject_to_qfloat(const QObject *obj)
+{
+ if (qobject_type(obj) != QTYPE_QFLOAT)
+ return NULL;
+
+ return container_of(obj, QFloat, base);
+}
+
+/**
+ * qfloat_destroy_obj(): Free all memory allocated by a
+ * QFloat object
+ */
+static void qfloat_destroy_obj(QObject *obj)
+{
+ assert(obj != NULL);
+ g_free(qobject_to_qfloat(obj));
+}
diff --git a/contrib/qemu/qobject/qint.c b/contrib/qemu/qobject/qint.c
new file mode 100644
index 000000000..86b9b04f0
--- /dev/null
+++ b/contrib/qemu/qobject/qint.c
@@ -0,0 +1,67 @@
+/*
+ * QInt Module
+ *
+ * Copyright (C) 2009 Red Hat Inc.
+ *
+ * Authors:
+ * Luiz Capitulino <lcapitulino@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+
+#include "qapi/qmp/qint.h"
+#include "qapi/qmp/qobject.h"
+#include "qemu-common.h"
+
+static void qint_destroy_obj(QObject *obj);
+
+static const QType qint_type = {
+ .code = QTYPE_QINT,
+ .destroy = qint_destroy_obj,
+};
+
+/**
+ * qint_from_int(): Create a new QInt from an int64_t
+ *
+ * Return strong reference.
+ */
+QInt *qint_from_int(int64_t value)
+{
+ QInt *qi;
+
+ qi = g_malloc(sizeof(*qi));
+ qi->value = value;
+ QOBJECT_INIT(qi, &qint_type);
+
+ return qi;
+}
+
+/**
+ * qint_get_int(): Get the stored integer
+ */
+int64_t qint_get_int(const QInt *qi)
+{
+ return qi->value;
+}
+
+/**
+ * qobject_to_qint(): Convert a QObject into a QInt
+ */
+QInt *qobject_to_qint(const QObject *obj)
+{
+ if (qobject_type(obj) != QTYPE_QINT)
+ return NULL;
+
+ return container_of(obj, QInt, base);
+}
+
+/**
+ * qint_destroy_obj(): Free all memory allocated by a
+ * QInt object
+ */
+static void qint_destroy_obj(QObject *obj)
+{
+ assert(obj != NULL);
+ g_free(qobject_to_qint(obj));
+}
diff --git a/contrib/qemu/qobject/qjson.c b/contrib/qemu/qobject/qjson.c
new file mode 100644
index 000000000..19085a1bb
--- /dev/null
+++ b/contrib/qemu/qobject/qjson.c
@@ -0,0 +1,282 @@
+/*
+ * QObject JSON integration
+ *
+ * Copyright IBM, Corp. 2009
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qapi/qmp/json-lexer.h"
+#include "qapi/qmp/json-parser.h"
+#include "qapi/qmp/json-streamer.h"
+#include "qapi/qmp/qjson.h"
+#include "qapi/qmp/qint.h"
+#include "qapi/qmp/qlist.h"
+#include "qapi/qmp/qbool.h"
+#include "qapi/qmp/qfloat.h"
+#include "qapi/qmp/qdict.h"
+
+typedef struct JSONParsingState
+{
+ JSONMessageParser parser;
+ va_list *ap;
+ QObject *result;
+} JSONParsingState;
+
+static void parse_json(JSONMessageParser *parser, QList *tokens)
+{
+ JSONParsingState *s = container_of(parser, JSONParsingState, parser);
+ s->result = json_parser_parse(tokens, s->ap);
+}
+
+QObject *qobject_from_jsonv(const char *string, va_list *ap)
+{
+ JSONParsingState state = {};
+
+ state.ap = ap;
+
+ json_message_parser_init(&state.parser, parse_json);
+ json_message_parser_feed(&state.parser, string, strlen(string));
+ json_message_parser_flush(&state.parser);
+ json_message_parser_destroy(&state.parser);
+
+ return state.result;
+}
+
+QObject *qobject_from_json(const char *string)
+{
+ return qobject_from_jsonv(string, NULL);
+}
+
+/*
+ * IMPORTANT: This function aborts on error, thus it must not
+ * be used with untrusted arguments.
+ */
+QObject *qobject_from_jsonf(const char *string, ...)
+{
+ QObject *obj;
+ va_list ap;
+
+ va_start(ap, string);
+ obj = qobject_from_jsonv(string, &ap);
+ va_end(ap);
+
+ assert(obj != NULL);
+ return obj;
+}
+
+typedef struct ToJsonIterState
+{
+ int indent;
+ int pretty;
+ int count;
+ QString *str;
+} ToJsonIterState;
+
+static void to_json(const QObject *obj, QString *str, int pretty, int indent);
+
+static void to_json_dict_iter(const char *key, QObject *obj, void *opaque)
+{
+ ToJsonIterState *s = opaque;
+ QString *qkey;
+ int j;
+
+ if (s->count)
+ qstring_append(s->str, ", ");
+
+ if (s->pretty) {
+ qstring_append(s->str, "\n");
+ for (j = 0 ; j < s->indent ; j++)
+ qstring_append(s->str, " ");
+ }
+
+ qkey = qstring_from_str(key);
+ to_json(QOBJECT(qkey), s->str, s->pretty, s->indent);
+ QDECREF(qkey);
+
+ qstring_append(s->str, ": ");
+ to_json(obj, s->str, s->pretty, s->indent);
+ s->count++;
+}
+
+static void to_json_list_iter(QObject *obj, void *opaque)
+{
+ ToJsonIterState *s = opaque;
+ int j;
+
+ if (s->count)
+ qstring_append(s->str, ", ");
+
+ if (s->pretty) {
+ qstring_append(s->str, "\n");
+ for (j = 0 ; j < s->indent ; j++)
+ qstring_append(s->str, " ");
+ }
+
+ to_json(obj, s->str, s->pretty, s->indent);
+ s->count++;
+}
+
+static void to_json(const QObject *obj, QString *str, int pretty, int indent)
+{
+ switch (qobject_type(obj)) {
+ case QTYPE_QINT: {
+ QInt *val = qobject_to_qint(obj);
+ char buffer[1024];
+
+ snprintf(buffer, sizeof(buffer), "%" PRId64, qint_get_int(val));
+ qstring_append(str, buffer);
+ break;
+ }
+ case QTYPE_QSTRING: {
+ QString *val = qobject_to_qstring(obj);
+ const char *ptr;
+ int cp;
+ char buf[16];
+ char *end;
+
+ ptr = qstring_get_str(val);
+ qstring_append(str, "\"");
+
+ for (; *ptr; ptr = end) {
+ cp = mod_utf8_codepoint(ptr, 6, &end);
+ switch (cp) {
+ case '\"':
+ qstring_append(str, "\\\"");
+ break;
+ case '\\':
+ qstring_append(str, "\\\\");
+ break;
+ case '\b':
+ qstring_append(str, "\\b");
+ break;
+ case '\f':
+ qstring_append(str, "\\f");
+ break;
+ case '\n':
+ qstring_append(str, "\\n");
+ break;
+ case '\r':
+ qstring_append(str, "\\r");
+ break;
+ case '\t':
+ qstring_append(str, "\\t");
+ break;
+ default:
+ if (cp < 0) {
+ cp = 0xFFFD; /* replacement character */
+ }
+ if (cp > 0xFFFF) {
+ /* beyond BMP; need a surrogate pair */
+ snprintf(buf, sizeof(buf), "\\u%04X\\u%04X",
+ 0xD800 + ((cp - 0x10000) >> 10),
+ 0xDC00 + ((cp - 0x10000) & 0x3FF));
+ } else if (cp < 0x20 || cp >= 0x7F) {
+ snprintf(buf, sizeof(buf), "\\u%04X", cp);
+ } else {
+ buf[0] = cp;
+ buf[1] = 0;
+ }
+ qstring_append(str, buf);
+ }
+ };
+
+ qstring_append(str, "\"");
+ break;
+ }
+ case QTYPE_QDICT: {
+ ToJsonIterState s;
+ QDict *val = qobject_to_qdict(obj);
+
+ s.count = 0;
+ s.str = str;
+ s.indent = indent + 1;
+ s.pretty = pretty;
+ qstring_append(str, "{");
+ qdict_iter(val, to_json_dict_iter, &s);
+ if (pretty) {
+ int j;
+ qstring_append(str, "\n");
+ for (j = 0 ; j < indent ; j++)
+ qstring_append(str, " ");
+ }
+ qstring_append(str, "}");
+ break;
+ }
+ case QTYPE_QLIST: {
+ ToJsonIterState s;
+ QList *val = qobject_to_qlist(obj);
+
+ s.count = 0;
+ s.str = str;
+ s.indent = indent + 1;
+ s.pretty = pretty;
+ qstring_append(str, "[");
+ qlist_iter(val, (void *)to_json_list_iter, &s);
+ if (pretty) {
+ int j;
+ qstring_append(str, "\n");
+ for (j = 0 ; j < indent ; j++)
+ qstring_append(str, " ");
+ }
+ qstring_append(str, "]");
+ break;
+ }
+ case QTYPE_QFLOAT: {
+ QFloat *val = qobject_to_qfloat(obj);
+ char buffer[1024];
+ int len;
+
+ len = snprintf(buffer, sizeof(buffer), "%f", qfloat_get_double(val));
+ while (len > 0 && buffer[len - 1] == '0') {
+ len--;
+ }
+
+ if (len && buffer[len - 1] == '.') {
+ buffer[len - 1] = 0;
+ } else {
+ buffer[len] = 0;
+ }
+
+ qstring_append(str, buffer);
+ break;
+ }
+ case QTYPE_QBOOL: {
+ QBool *val = qobject_to_qbool(obj);
+
+ if (qbool_get_int(val)) {
+ qstring_append(str, "true");
+ } else {
+ qstring_append(str, "false");
+ }
+ break;
+ }
+ case QTYPE_QERROR:
+ /* XXX: should QError be emitted? */
+ case QTYPE_NONE:
+ break;
+ }
+}
+
+QString *qobject_to_json(const QObject *obj)
+{
+ QString *str = qstring_new();
+
+ to_json(obj, str, 0, 0);
+
+ return str;
+}
+
+QString *qobject_to_json_pretty(const QObject *obj)
+{
+ QString *str = qstring_new();
+
+ to_json(obj, str, 1, 0);
+
+ return str;
+}
diff --git a/contrib/qemu/qobject/qlist.c b/contrib/qemu/qobject/qlist.c
new file mode 100644
index 000000000..1ced0de58
--- /dev/null
+++ b/contrib/qemu/qobject/qlist.c
@@ -0,0 +1,170 @@
+/*
+ * QList Module
+ *
+ * Copyright (C) 2009 Red Hat Inc.
+ *
+ * Authors:
+ * Luiz Capitulino <lcapitulino@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+
+#include "qapi/qmp/qlist.h"
+#include "qapi/qmp/qobject.h"
+#include "qemu/queue.h"
+#include "qemu-common.h"
+
+static void qlist_destroy_obj(QObject *obj);
+
+static const QType qlist_type = {
+ .code = QTYPE_QLIST,
+ .destroy = qlist_destroy_obj,
+};
+
+/**
+ * qlist_new(): Create a new QList
+ *
+ * Return strong reference.
+ */
+QList *qlist_new(void)
+{
+ QList *qlist;
+
+ qlist = g_malloc(sizeof(*qlist));
+ QTAILQ_INIT(&qlist->head);
+ QOBJECT_INIT(qlist, &qlist_type);
+
+ return qlist;
+}
+
+static void qlist_copy_elem(QObject *obj, void *opaque)
+{
+ QList *dst = opaque;
+
+ qobject_incref(obj);
+ qlist_append_obj(dst, obj);
+}
+
+QList *qlist_copy(QList *src)
+{
+ QList *dst = qlist_new();
+
+ qlist_iter(src, qlist_copy_elem, dst);
+
+ return dst;
+}
+
+/**
+ * qlist_append_obj(): Append an QObject into QList
+ *
+ * NOTE: ownership of 'value' is transferred to the QList
+ */
+void qlist_append_obj(QList *qlist, QObject *value)
+{
+ QListEntry *entry;
+
+ entry = g_malloc(sizeof(*entry));
+ entry->value = value;
+
+ QTAILQ_INSERT_TAIL(&qlist->head, entry, next);
+}
+
+/**
+ * qlist_iter(): Iterate over all the list's stored values.
+ *
+ * This function allows the user to provide an iterator, which will be
+ * called for each stored value in the list.
+ */
+void qlist_iter(const QList *qlist,
+ void (*iter)(QObject *obj, void *opaque), void *opaque)
+{
+ QListEntry *entry;
+
+ QTAILQ_FOREACH(entry, &qlist->head, next)
+ iter(entry->value, opaque);
+}
+
+QObject *qlist_pop(QList *qlist)
+{
+ QListEntry *entry;
+ QObject *ret;
+
+ if (qlist == NULL || QTAILQ_EMPTY(&qlist->head)) {
+ return NULL;
+ }
+
+ entry = QTAILQ_FIRST(&qlist->head);
+ QTAILQ_REMOVE(&qlist->head, entry, next);
+
+ ret = entry->value;
+ g_free(entry);
+
+ return ret;
+}
+
+QObject *qlist_peek(QList *qlist)
+{
+ QListEntry *entry;
+ QObject *ret;
+
+ if (qlist == NULL || QTAILQ_EMPTY(&qlist->head)) {
+ return NULL;
+ }
+
+ entry = QTAILQ_FIRST(&qlist->head);
+
+ ret = entry->value;
+
+ return ret;
+}
+
+int qlist_empty(const QList *qlist)
+{
+ return QTAILQ_EMPTY(&qlist->head);
+}
+
+static void qlist_size_iter(QObject *obj, void *opaque)
+{
+ size_t *count = opaque;
+ (*count)++;
+}
+
+size_t qlist_size(const QList *qlist)
+{
+ size_t count = 0;
+ qlist_iter(qlist, qlist_size_iter, &count);
+ return count;
+}
+
+/**
+ * qobject_to_qlist(): Convert a QObject into a QList
+ */
+QList *qobject_to_qlist(const QObject *obj)
+{
+ if (qobject_type(obj) != QTYPE_QLIST) {
+ return NULL;
+ }
+
+ return container_of(obj, QList, base);
+}
+
+/**
+ * qlist_destroy_obj(): Free all the memory allocated by a QList
+ */
+static void qlist_destroy_obj(QObject *obj)
+{
+ QList *qlist;
+ QListEntry *entry, *next_entry;
+
+ assert(obj != NULL);
+ qlist = qobject_to_qlist(obj);
+
+ QTAILQ_FOREACH_SAFE(entry, &qlist->head, next, next_entry) {
+ QTAILQ_REMOVE(&qlist->head, entry, next);
+ qobject_decref(entry->value);
+ g_free(entry);
+ }
+
+ g_free(qlist);
+}
diff --git a/contrib/qemu/qobject/qstring.c b/contrib/qemu/qobject/qstring.c
new file mode 100644
index 000000000..607b7a142
--- /dev/null
+++ b/contrib/qemu/qobject/qstring.c
@@ -0,0 +1,149 @@
+/*
+ * QString Module
+ *
+ * Copyright (C) 2009 Red Hat Inc.
+ *
+ * Authors:
+ * Luiz Capitulino <lcapitulino@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+
+#include "qapi/qmp/qobject.h"
+#include "qapi/qmp/qstring.h"
+#include "qemu-common.h"
+
+static void qstring_destroy_obj(QObject *obj);
+
+static const QType qstring_type = {
+ .code = QTYPE_QSTRING,
+ .destroy = qstring_destroy_obj,
+};
+
+/**
+ * qstring_new(): Create a new empty QString
+ *
+ * Return strong reference.
+ */
+QString *qstring_new(void)
+{
+ return qstring_from_str("");
+}
+
+/**
+ * qstring_get_length(): Get the length of a QString
+ */
+size_t qstring_get_length(const QString *qstring)
+{
+ return qstring->length;
+}
+
+/**
+ * qstring_from_substr(): Create a new QString from a C string substring
+ *
+ * Return string reference
+ */
+QString *qstring_from_substr(const char *str, int start, int end)
+{
+ QString *qstring;
+
+ qstring = g_malloc(sizeof(*qstring));
+
+ qstring->length = end - start + 1;
+ qstring->capacity = qstring->length;
+
+ qstring->string = g_malloc(qstring->capacity + 1);
+ memcpy(qstring->string, str + start, qstring->length);
+ qstring->string[qstring->length] = 0;
+
+ QOBJECT_INIT(qstring, &qstring_type);
+
+ return qstring;
+}
+
+/**
+ * qstring_from_str(): Create a new QString from a regular C string
+ *
+ * Return strong reference.
+ */
+QString *qstring_from_str(const char *str)
+{
+ return qstring_from_substr(str, 0, strlen(str) - 1);
+}
+
+static void capacity_increase(QString *qstring, size_t len)
+{
+ if (qstring->capacity < (qstring->length + len)) {
+ qstring->capacity += len;
+ qstring->capacity *= 2; /* use exponential growth */
+
+ qstring->string = g_realloc(qstring->string, qstring->capacity + 1);
+ }
+}
+
+/* qstring_append(): Append a C string to a QString
+ */
+void qstring_append(QString *qstring, const char *str)
+{
+ size_t len = strlen(str);
+
+ capacity_increase(qstring, len);
+ memcpy(qstring->string + qstring->length, str, len);
+ qstring->length += len;
+ qstring->string[qstring->length] = 0;
+}
+
+void qstring_append_int(QString *qstring, int64_t value)
+{
+ char num[32];
+
+ snprintf(num, sizeof(num), "%" PRId64, value);
+ qstring_append(qstring, num);
+}
+
+/**
+ * qstring_append_chr(): Append a C char to a QString
+ */
+void qstring_append_chr(QString *qstring, int c)
+{
+ capacity_increase(qstring, 1);
+ qstring->string[qstring->length++] = c;
+ qstring->string[qstring->length] = 0;
+}
+
+/**
+ * qobject_to_qstring(): Convert a QObject to a QString
+ */
+QString *qobject_to_qstring(const QObject *obj)
+{
+ if (qobject_type(obj) != QTYPE_QSTRING)
+ return NULL;
+
+ return container_of(obj, QString, base);
+}
+
+/**
+ * qstring_get_str(): Return a pointer to the stored string
+ *
+ * NOTE: Should be used with caution, if the object is deallocated
+ * this pointer becomes invalid.
+ */
+const char *qstring_get_str(const QString *qstring)
+{
+ return qstring->string;
+}
+
+/**
+ * qstring_destroy_obj(): Free all memory allocated by a QString
+ * object
+ */
+static void qstring_destroy_obj(QObject *obj)
+{
+ QString *qs;
+
+ assert(obj != NULL);
+ qs = qobject_to_qstring(obj);
+ g_free(qs->string);
+ g_free(qs);
+}
diff --git a/contrib/qemu/trace/generated-tracers.h b/contrib/qemu/trace/generated-tracers.h
new file mode 100644
index 000000000..b512660f3
--- /dev/null
+++ b/contrib/qemu/trace/generated-tracers.h
@@ -0,0 +1,3759 @@
+/* This file is autogenerated by tracetool, do not edit. */
+
+#ifndef TRACE__GENERATED_TRACERS_H
+#define TRACE__GENERATED_TRACERS_H
+
+#include "qemu-common.h"
+
+static inline void trace_qxl_interface_set_mm_time(int qid, uint32_t mm_time)
+{
+}
+
+static inline void trace_qxl_io_write_vga(int qid, const char * mode, uint32_t addr, uint32_t val)
+{
+}
+
+static inline void trace_g_malloc(size_t size, void * ptr)
+{
+}
+
+static inline void trace_g_realloc(void * ptr, size_t size, void * newptr)
+{
+}
+
+static inline void trace_g_free(void * ptr)
+{
+}
+
+static inline void trace_qemu_memalign(size_t alignment, size_t size, void * ptr)
+{
+}
+
+static inline void trace_qemu_anon_ram_alloc(size_t size, void * ptr)
+{
+}
+
+static inline void trace_qemu_vfree(void * ptr)
+{
+}
+
+static inline void trace_qemu_anon_ram_free(void * ptr, size_t size)
+{
+}
+
+static inline void trace_virtqueue_fill(void * vq, const void * elem, unsigned int len, unsigned int idx)
+{
+}
+
+static inline void trace_virtqueue_flush(void * vq, unsigned int count)
+{
+}
+
+static inline void trace_virtqueue_pop(void * vq, void * elem, unsigned int in_num, unsigned int out_num)
+{
+}
+
+static inline void trace_virtio_queue_notify(void * vdev, int n, void * vq)
+{
+}
+
+static inline void trace_virtio_irq(void * vq)
+{
+}
+
+static inline void trace_virtio_notify(void * vdev, void * vq)
+{
+}
+
+static inline void trace_virtio_set_status(void * vdev, uint8_t val)
+{
+}
+
+static inline void trace_virtio_serial_send_control_event(unsigned int port, uint16_t event, uint16_t value)
+{
+}
+
+static inline void trace_virtio_serial_throttle_port(unsigned int port, bool throttle)
+{
+}
+
+static inline void trace_virtio_serial_handle_control_message(uint16_t event, uint16_t value)
+{
+}
+
+static inline void trace_virtio_serial_handle_control_message_port(unsigned int port)
+{
+}
+
+static inline void trace_virtio_console_flush_buf(unsigned int port, size_t len, ssize_t ret)
+{
+}
+
+static inline void trace_virtio_console_chr_read(unsigned int port, int size)
+{
+}
+
+static inline void trace_virtio_console_chr_event(unsigned int port, int event)
+{
+}
+
+static inline void trace_bdrv_open_common(void * bs, const char * filename, int flags, const char * format_name)
+{
+}
+
+static inline void trace_multiwrite_cb(void * mcb, int ret)
+{
+}
+
+static inline void trace_bdrv_aio_multiwrite(void * mcb, int num_callbacks, int num_reqs)
+{
+}
+
+static inline void trace_bdrv_aio_discard(void * bs, int64_t sector_num, int nb_sectors, void * opaque)
+{
+}
+
+static inline void trace_bdrv_aio_flush(void * bs, void * opaque)
+{
+}
+
+static inline void trace_bdrv_aio_readv(void * bs, int64_t sector_num, int nb_sectors, void * opaque)
+{
+}
+
+static inline void trace_bdrv_aio_writev(void * bs, int64_t sector_num, int nb_sectors, void * opaque)
+{
+}
+
+static inline void trace_bdrv_lock_medium(void * bs, bool locked)
+{
+}
+
+static inline void trace_bdrv_co_readv(void * bs, int64_t sector_num, int nb_sector)
+{
+}
+
+static inline void trace_bdrv_co_copy_on_readv(void * bs, int64_t sector_num, int nb_sector)
+{
+}
+
+static inline void trace_bdrv_co_writev(void * bs, int64_t sector_num, int nb_sector)
+{
+}
+
+static inline void trace_bdrv_co_write_zeroes(void * bs, int64_t sector_num, int nb_sector)
+{
+}
+
+static inline void trace_bdrv_co_io_em(void * bs, int64_t sector_num, int nb_sectors, int is_write, void * acb)
+{
+}
+
+static inline void trace_bdrv_co_do_copy_on_readv(void * bs, int64_t sector_num, int nb_sectors, int64_t cluster_sector_num, int cluster_nb_sectors)
+{
+}
+
+static inline void trace_stream_one_iteration(void * s, int64_t sector_num, int nb_sectors, int is_allocated)
+{
+}
+
+static inline void trace_stream_start(void * bs, void * base, void * s, void * co, void * opaque)
+{
+}
+
+static inline void trace_commit_one_iteration(void * s, int64_t sector_num, int nb_sectors, int is_allocated)
+{
+}
+
+static inline void trace_commit_start(void * bs, void * base, void * top, void * s, void * co, void * opaque)
+{
+}
+
+static inline void trace_mirror_start(void * bs, void * s, void * co, void * opaque)
+{
+}
+
+static inline void trace_mirror_restart_iter(void * s, int64_t cnt)
+{
+}
+
+static inline void trace_mirror_before_flush(void * s)
+{
+}
+
+static inline void trace_mirror_before_drain(void * s, int64_t cnt)
+{
+}
+
+static inline void trace_mirror_before_sleep(void * s, int64_t cnt, int synced)
+{
+}
+
+static inline void trace_mirror_one_iteration(void * s, int64_t sector_num, int nb_sectors)
+{
+}
+
+static inline void trace_mirror_cow(void * s, int64_t sector_num)
+{
+}
+
+static inline void trace_mirror_iteration_done(void * s, int64_t sector_num, int nb_sectors, int ret)
+{
+}
+
+static inline void trace_mirror_yield(void * s, int64_t cnt, int buf_free_count, int in_flight)
+{
+}
+
+static inline void trace_mirror_yield_in_flight(void * s, int64_t sector_num, int in_flight)
+{
+}
+
+static inline void trace_mirror_yield_buf_busy(void * s, int nb_chunks, int in_flight)
+{
+}
+
+static inline void trace_mirror_break_buf_busy(void * s, int nb_chunks, int in_flight)
+{
+}
+
+static inline void trace_backup_do_cow_enter(void * job, int64_t start, int64_t sector_num, int nb_sectors)
+{
+}
+
+static inline void trace_backup_do_cow_return(void * job, int64_t sector_num, int nb_sectors, int ret)
+{
+}
+
+static inline void trace_backup_do_cow_skip(void * job, int64_t start)
+{
+}
+
+static inline void trace_backup_do_cow_process(void * job, int64_t start)
+{
+}
+
+static inline void trace_backup_do_cow_read_fail(void * job, int64_t start, int ret)
+{
+}
+
+static inline void trace_backup_do_cow_write_fail(void * job, int64_t start, int ret)
+{
+}
+
+static inline void trace_qmp_block_job_cancel(void * job)
+{
+}
+
+static inline void trace_qmp_block_job_pause(void * job)
+{
+}
+
+static inline void trace_qmp_block_job_resume(void * job)
+{
+}
+
+static inline void trace_qmp_block_job_complete(void * job)
+{
+}
+
+static inline void trace_block_job_cb(void * bs, void * job, int ret)
+{
+}
+
+static inline void trace_qmp_block_stream(void * bs, void * job)
+{
+}
+
+static inline void trace_virtio_blk_req_complete(void * req, int status)
+{
+}
+
+static inline void trace_virtio_blk_rw_complete(void * req, int ret)
+{
+}
+
+static inline void trace_virtio_blk_handle_write(void * req, uint64_t sector, size_t nsectors)
+{
+}
+
+static inline void trace_virtio_blk_handle_read(void * req, uint64_t sector, size_t nsectors)
+{
+}
+
+static inline void trace_virtio_blk_data_plane_start(void * s)
+{
+}
+
+static inline void trace_virtio_blk_data_plane_stop(void * s)
+{
+}
+
+static inline void trace_virtio_blk_data_plane_process_request(void * s, unsigned int out_num, unsigned int in_num, unsigned int head)
+{
+}
+
+static inline void trace_virtio_blk_data_plane_complete_request(void * s, unsigned int head, int ret)
+{
+}
+
+static inline void trace_vring_setup(uint64_t physical, void * desc, void * avail, void * used)
+{
+}
+
+static inline void trace_thread_pool_submit(void * pool, void * req, void * opaque)
+{
+}
+
+static inline void trace_thread_pool_complete(void * pool, void * req, void * opaque, int ret)
+{
+}
+
+static inline void trace_thread_pool_cancel(void * req, void * opaque)
+{
+}
+
+static inline void trace_paio_submit(void * acb, void * opaque, int64_t sector_num, int nb_sectors, int type)
+{
+}
+
+static inline void trace_paio_complete(void * acb, void * opaque, int ret)
+{
+}
+
+static inline void trace_paio_cancel(void * acb, void * opaque)
+{
+}
+
+static inline void trace_cpu_in(unsigned int addr, unsigned int val)
+{
+}
+
+static inline void trace_cpu_out(unsigned int addr, unsigned int val)
+{
+}
+
+static inline void trace_balloon_event(void * opaque, unsigned long addr)
+{
+}
+
+static inline void trace_apic_local_deliver(int vector, uint32_t lvt)
+{
+}
+
+static inline void trace_apic_deliver_irq(uint8_t dest, uint8_t dest_mode, uint8_t delivery_mode, uint8_t vector_num, uint8_t trigger_mode)
+{
+}
+
+static inline void trace_cpu_set_apic_base(uint64_t val)
+{
+}
+
+static inline void trace_cpu_get_apic_base(uint64_t val)
+{
+}
+
+static inline void trace_apic_mem_readl(uint64_t addr, uint32_t val)
+{
+}
+
+static inline void trace_apic_mem_writel(uint64_t addr, uint32_t val)
+{
+}
+
+static inline void trace_apic_report_irq_delivered(int apic_irq_delivered)
+{
+}
+
+static inline void trace_apic_reset_irq_delivered(int apic_irq_delivered)
+{
+}
+
+static inline void trace_apic_get_irq_delivered(int apic_irq_delivered)
+{
+}
+
+static inline void trace_cs4231_mem_readl_dreg(uint32_t reg, uint32_t ret)
+{
+}
+
+static inline void trace_cs4231_mem_readl_reg(uint32_t reg, uint32_t ret)
+{
+}
+
+static inline void trace_cs4231_mem_writel_reg(uint32_t reg, uint32_t old, uint32_t val)
+{
+}
+
+static inline void trace_cs4231_mem_writel_dreg(uint32_t reg, uint32_t old, uint32_t val)
+{
+}
+
+static inline void trace_nvram_read(uint32_t addr, uint32_t ret)
+{
+}
+
+static inline void trace_nvram_write(uint32_t addr, uint32_t old, uint32_t val)
+{
+}
+
+static inline void trace_ecc_mem_writel_mer(uint32_t val)
+{
+}
+
+static inline void trace_ecc_mem_writel_mdr(uint32_t val)
+{
+}
+
+static inline void trace_ecc_mem_writel_mfsr(uint32_t val)
+{
+}
+
+static inline void trace_ecc_mem_writel_vcr(uint32_t val)
+{
+}
+
+static inline void trace_ecc_mem_writel_dr(uint32_t val)
+{
+}
+
+static inline void trace_ecc_mem_writel_ecr0(uint32_t val)
+{
+}
+
+static inline void trace_ecc_mem_writel_ecr1(uint32_t val)
+{
+}
+
+static inline void trace_ecc_mem_readl_mer(uint32_t ret)
+{
+}
+
+static inline void trace_ecc_mem_readl_mdr(uint32_t ret)
+{
+}
+
+static inline void trace_ecc_mem_readl_mfsr(uint32_t ret)
+{
+}
+
+static inline void trace_ecc_mem_readl_vcr(uint32_t ret)
+{
+}
+
+static inline void trace_ecc_mem_readl_mfar0(uint32_t ret)
+{
+}
+
+static inline void trace_ecc_mem_readl_mfar1(uint32_t ret)
+{
+}
+
+static inline void trace_ecc_mem_readl_dr(uint32_t ret)
+{
+}
+
+static inline void trace_ecc_mem_readl_ecr0(uint32_t ret)
+{
+}
+
+static inline void trace_ecc_mem_readl_ecr1(uint32_t ret)
+{
+}
+
+static inline void trace_ecc_diag_mem_writeb(uint64_t addr, uint32_t val)
+{
+}
+
+static inline void trace_ecc_diag_mem_readb(uint64_t addr, uint32_t ret)
+{
+}
+
+static inline void trace_fw_cfg_write(void * s, uint8_t value)
+{
+}
+
+static inline void trace_fw_cfg_select(void * s, uint16_t key, int ret)
+{
+}
+
+static inline void trace_fw_cfg_read(void * s, uint8_t ret)
+{
+}
+
+static inline void trace_fw_cfg_add_file_dupe(void * s, char * name)
+{
+}
+
+static inline void trace_fw_cfg_add_file(void * s, int index, char * name, size_t len)
+{
+}
+
+static inline void trace_hd_geometry_lchs_guess(void * bs, int cyls, int heads, int secs)
+{
+}
+
+static inline void trace_hd_geometry_guess(void * bs, uint32_t cyls, uint32_t heads, uint32_t secs, int trans)
+{
+}
+
+static inline void trace_jazz_led_read(uint64_t addr, uint8_t val)
+{
+}
+
+static inline void trace_jazz_led_write(uint64_t addr, uint8_t new)
+{
+}
+
+static inline void trace_lance_mem_readw(uint64_t addr, uint32_t ret)
+{
+}
+
+static inline void trace_lance_mem_writew(uint64_t addr, uint32_t val)
+{
+}
+
+static inline void trace_slavio_intctl_mem_readl(uint32_t cpu, uint64_t addr, uint32_t ret)
+{
+}
+
+static inline void trace_slavio_intctl_mem_writel(uint32_t cpu, uint64_t addr, uint32_t val)
+{
+}
+
+static inline void trace_slavio_intctl_mem_writel_clear(uint32_t cpu, uint32_t val, uint32_t intreg_pending)
+{
+}
+
+static inline void trace_slavio_intctl_mem_writel_set(uint32_t cpu, uint32_t val, uint32_t intreg_pending)
+{
+}
+
+static inline void trace_slavio_intctlm_mem_readl(uint64_t addr, uint32_t ret)
+{
+}
+
+static inline void trace_slavio_intctlm_mem_writel(uint64_t addr, uint32_t val)
+{
+}
+
+static inline void trace_slavio_intctlm_mem_writel_enable(uint32_t val, uint32_t intregm_disabled)
+{
+}
+
+static inline void trace_slavio_intctlm_mem_writel_disable(uint32_t val, uint32_t intregm_disabled)
+{
+}
+
+static inline void trace_slavio_intctlm_mem_writel_target(uint32_t cpu)
+{
+}
+
+static inline void trace_slavio_check_interrupts(uint32_t pending, uint32_t intregm_disabled)
+{
+}
+
+static inline void trace_slavio_set_irq(uint32_t target_cpu, int irq, uint32_t pil, int level)
+{
+}
+
+static inline void trace_slavio_set_timer_irq_cpu(int cpu, int level)
+{
+}
+
+static inline void trace_slavio_misc_update_irq_raise(void)
+{
+}
+
+static inline void trace_slavio_misc_update_irq_lower(void)
+{
+}
+
+static inline void trace_slavio_set_power_fail(int power_failing, uint8_t config)
+{
+}
+
+static inline void trace_slavio_cfg_mem_writeb(uint32_t val)
+{
+}
+
+static inline void trace_slavio_cfg_mem_readb(uint32_t ret)
+{
+}
+
+static inline void trace_slavio_diag_mem_writeb(uint32_t val)
+{
+}
+
+static inline void trace_slavio_diag_mem_readb(uint32_t ret)
+{
+}
+
+static inline void trace_slavio_mdm_mem_writeb(uint32_t val)
+{
+}
+
+static inline void trace_slavio_mdm_mem_readb(uint32_t ret)
+{
+}
+
+static inline void trace_slavio_aux1_mem_writeb(uint32_t val)
+{
+}
+
+static inline void trace_slavio_aux1_mem_readb(uint32_t ret)
+{
+}
+
+static inline void trace_slavio_aux2_mem_writeb(uint32_t val)
+{
+}
+
+static inline void trace_slavio_aux2_mem_readb(uint32_t ret)
+{
+}
+
+static inline void trace_apc_mem_writeb(uint32_t val)
+{
+}
+
+static inline void trace_apc_mem_readb(uint32_t ret)
+{
+}
+
+static inline void trace_slavio_sysctrl_mem_writel(uint32_t val)
+{
+}
+
+static inline void trace_slavio_sysctrl_mem_readl(uint32_t ret)
+{
+}
+
+static inline void trace_slavio_led_mem_writew(uint32_t val)
+{
+}
+
+static inline void trace_slavio_led_mem_readw(uint32_t ret)
+{
+}
+
+static inline void trace_slavio_timer_get_out(uint64_t limit, uint32_t counthigh, uint32_t count)
+{
+}
+
+static inline void trace_slavio_timer_irq(uint32_t counthigh, uint32_t count)
+{
+}
+
+static inline void trace_slavio_timer_mem_readl_invalid(uint64_t addr)
+{
+}
+
+static inline void trace_slavio_timer_mem_readl(uint64_t addr, uint32_t ret)
+{
+}
+
+static inline void trace_slavio_timer_mem_writel(uint64_t addr, uint32_t val)
+{
+}
+
+static inline void trace_slavio_timer_mem_writel_limit(unsigned int timer_index, uint64_t count)
+{
+}
+
+static inline void trace_slavio_timer_mem_writel_counter_invalid(void)
+{
+}
+
+static inline void trace_slavio_timer_mem_writel_status_start(unsigned int timer_index)
+{
+}
+
+static inline void trace_slavio_timer_mem_writel_status_stop(unsigned int timer_index)
+{
+}
+
+static inline void trace_slavio_timer_mem_writel_mode_user(unsigned int timer_index)
+{
+}
+
+static inline void trace_slavio_timer_mem_writel_mode_counter(unsigned int timer_index)
+{
+}
+
+static inline void trace_slavio_timer_mem_writel_mode_invalid(void)
+{
+}
+
+static inline void trace_slavio_timer_mem_writel_invalid(uint64_t addr)
+{
+}
+
+static inline void trace_ledma_memory_read(uint64_t addr)
+{
+}
+
+static inline void trace_ledma_memory_write(uint64_t addr)
+{
+}
+
+static inline void trace_sparc32_dma_set_irq_raise(void)
+{
+}
+
+static inline void trace_sparc32_dma_set_irq_lower(void)
+{
+}
+
+static inline void trace_espdma_memory_read(uint32_t addr)
+{
+}
+
+static inline void trace_espdma_memory_write(uint32_t addr)
+{
+}
+
+static inline void trace_sparc32_dma_mem_readl(uint64_t addr, uint32_t ret)
+{
+}
+
+static inline void trace_sparc32_dma_mem_writel(uint64_t addr, uint32_t old, uint32_t val)
+{
+}
+
+static inline void trace_sparc32_dma_enable_raise(void)
+{
+}
+
+static inline void trace_sparc32_dma_enable_lower(void)
+{
+}
+
+static inline void trace_sun4m_cpu_interrupt(unsigned int level)
+{
+}
+
+static inline void trace_sun4m_cpu_reset_interrupt(unsigned int level)
+{
+}
+
+static inline void trace_sun4m_cpu_set_irq_raise(int level)
+{
+}
+
+static inline void trace_sun4m_cpu_set_irq_lower(int level)
+{
+}
+
+static inline void trace_sun4m_iommu_mem_readl(uint64_t addr, uint32_t ret)
+{
+}
+
+static inline void trace_sun4m_iommu_mem_writel(uint64_t addr, uint32_t val)
+{
+}
+
+static inline void trace_sun4m_iommu_mem_writel_ctrl(uint64_t iostart)
+{
+}
+
+static inline void trace_sun4m_iommu_mem_writel_tlbflush(uint32_t val)
+{
+}
+
+static inline void trace_sun4m_iommu_mem_writel_pgflush(uint32_t val)
+{
+}
+
+static inline void trace_sun4m_iommu_page_get_flags(uint64_t pa, uint64_t iopte, uint32_t ret)
+{
+}
+
+static inline void trace_sun4m_iommu_translate_pa(uint64_t addr, uint64_t pa, uint32_t iopte)
+{
+}
+
+static inline void trace_sun4m_iommu_bad_addr(uint64_t addr)
+{
+}
+
+static inline void trace_usb_packet_state_change(int bus, const char * port, int ep, void * p, const char * o, const char * n)
+{
+}
+
+static inline void trace_usb_packet_state_fault(int bus, const char * port, int ep, void * p, const char * o, const char * n)
+{
+}
+
+static inline void trace_usb_port_claim(int bus, const char * port)
+{
+}
+
+static inline void trace_usb_port_attach(int bus, const char * port, const char * devspeed, const char * portspeed)
+{
+}
+
+static inline void trace_usb_port_detach(int bus, const char * port)
+{
+}
+
+static inline void trace_usb_port_release(int bus, const char * port)
+{
+}
+
+static inline void trace_usb_ehci_reset(void)
+{
+}
+
+static inline void trace_usb_ehci_opreg_read(uint32_t addr, const char * str, uint32_t val)
+{
+}
+
+static inline void trace_usb_ehci_opreg_write(uint32_t addr, const char * str, uint32_t val)
+{
+}
+
+static inline void trace_usb_ehci_opreg_change(uint32_t addr, const char * str, uint32_t new, uint32_t old)
+{
+}
+
+static inline void trace_usb_ehci_portsc_read(uint32_t addr, uint32_t port, uint32_t val)
+{
+}
+
+static inline void trace_usb_ehci_portsc_write(uint32_t addr, uint32_t port, uint32_t val)
+{
+}
+
+static inline void trace_usb_ehci_portsc_change(uint32_t addr, uint32_t port, uint32_t new, uint32_t old)
+{
+}
+
+static inline void trace_usb_ehci_usbsts(const char * sts, int state)
+{
+}
+
+static inline void trace_usb_ehci_state(const char * schedule, const char * state)
+{
+}
+
+static inline void trace_usb_ehci_qh_ptrs(void * q, uint32_t addr, uint32_t nxt, uint32_t c_qtd, uint32_t n_qtd, uint32_t a_qtd)
+{
+}
+
+static inline void trace_usb_ehci_qh_fields(uint32_t addr, int rl, int mplen, int eps, int ep, int devaddr)
+{
+}
+
+static inline void trace_usb_ehci_qh_bits(uint32_t addr, int c, int h, int dtc, int i)
+{
+}
+
+static inline void trace_usb_ehci_qtd_ptrs(void * q, uint32_t addr, uint32_t nxt, uint32_t altnext)
+{
+}
+
+static inline void trace_usb_ehci_qtd_fields(uint32_t addr, int tbytes, int cpage, int cerr, int pid)
+{
+}
+
+static inline void trace_usb_ehci_qtd_bits(uint32_t addr, int ioc, int active, int halt, int babble, int xacterr)
+{
+}
+
+static inline void trace_usb_ehci_itd(uint32_t addr, uint32_t nxt, uint32_t mplen, uint32_t mult, uint32_t ep, uint32_t devaddr)
+{
+}
+
+static inline void trace_usb_ehci_sitd(uint32_t addr, uint32_t nxt, uint32_t active)
+{
+}
+
+static inline void trace_usb_ehci_port_attach(uint32_t port, const char * owner, const char * device)
+{
+}
+
+static inline void trace_usb_ehci_port_detach(uint32_t port, const char * owner)
+{
+}
+
+static inline void trace_usb_ehci_port_reset(uint32_t port, int enable)
+{
+}
+
+static inline void trace_usb_ehci_data(int rw, uint32_t cpage, uint32_t offset, uint32_t addr, uint32_t len, uint32_t bufpos)
+{
+}
+
+static inline void trace_usb_ehci_queue_action(void * q, const char * action)
+{
+}
+
+static inline void trace_usb_ehci_packet_action(void * q, void * p, const char * action)
+{
+}
+
+static inline void trace_usb_ehci_irq(uint32_t level, uint32_t frindex, uint32_t sts, uint32_t mask)
+{
+}
+
+static inline void trace_usb_ehci_guest_bug(const char * reason)
+{
+}
+
+static inline void trace_usb_ehci_doorbell_ring(void)
+{
+}
+
+static inline void trace_usb_ehci_doorbell_ack(void)
+{
+}
+
+static inline void trace_usb_ehci_dma_error(void)
+{
+}
+
+static inline void trace_usb_uhci_reset(void)
+{
+}
+
+static inline void trace_usb_uhci_schedule_start(void)
+{
+}
+
+static inline void trace_usb_uhci_schedule_stop(void)
+{
+}
+
+static inline void trace_usb_uhci_frame_start(uint32_t num)
+{
+}
+
+static inline void trace_usb_uhci_frame_stop_bandwidth(void)
+{
+}
+
+static inline void trace_usb_uhci_frame_loop_stop_idle(void)
+{
+}
+
+static inline void trace_usb_uhci_frame_loop_continue(void)
+{
+}
+
+static inline void trace_usb_uhci_mmio_readw(uint32_t addr, uint32_t val)
+{
+}
+
+static inline void trace_usb_uhci_mmio_writew(uint32_t addr, uint32_t val)
+{
+}
+
+static inline void trace_usb_uhci_queue_add(uint32_t token)
+{
+}
+
+static inline void trace_usb_uhci_queue_del(uint32_t token, const char * reason)
+{
+}
+
+static inline void trace_usb_uhci_packet_add(uint32_t token, uint32_t addr)
+{
+}
+
+static inline void trace_usb_uhci_packet_link_async(uint32_t token, uint32_t addr)
+{
+}
+
+static inline void trace_usb_uhci_packet_unlink_async(uint32_t token, uint32_t addr)
+{
+}
+
+static inline void trace_usb_uhci_packet_cancel(uint32_t token, uint32_t addr, int done)
+{
+}
+
+static inline void trace_usb_uhci_packet_complete_success(uint32_t token, uint32_t addr)
+{
+}
+
+static inline void trace_usb_uhci_packet_complete_shortxfer(uint32_t token, uint32_t addr)
+{
+}
+
+static inline void trace_usb_uhci_packet_complete_stall(uint32_t token, uint32_t addr)
+{
+}
+
+static inline void trace_usb_uhci_packet_complete_babble(uint32_t token, uint32_t addr)
+{
+}
+
+static inline void trace_usb_uhci_packet_complete_error(uint32_t token, uint32_t addr)
+{
+}
+
+static inline void trace_usb_uhci_packet_del(uint32_t token, uint32_t addr)
+{
+}
+
+static inline void trace_usb_uhci_qh_load(uint32_t qh)
+{
+}
+
+static inline void trace_usb_uhci_td_load(uint32_t qh, uint32_t td, uint32_t ctrl, uint32_t token)
+{
+}
+
+static inline void trace_usb_uhci_td_queue(uint32_t td, uint32_t ctrl, uint32_t token)
+{
+}
+
+static inline void trace_usb_uhci_td_nextqh(uint32_t qh, uint32_t td)
+{
+}
+
+static inline void trace_usb_uhci_td_async(uint32_t qh, uint32_t td)
+{
+}
+
+static inline void trace_usb_uhci_td_complete(uint32_t qh, uint32_t td)
+{
+}
+
+static inline void trace_usb_xhci_reset(void)
+{
+}
+
+static inline void trace_usb_xhci_run(void)
+{
+}
+
+static inline void trace_usb_xhci_stop(void)
+{
+}
+
+static inline void trace_usb_xhci_cap_read(uint32_t off, uint32_t val)
+{
+}
+
+static inline void trace_usb_xhci_oper_read(uint32_t off, uint32_t val)
+{
+}
+
+static inline void trace_usb_xhci_port_read(uint32_t port, uint32_t off, uint32_t val)
+{
+}
+
+static inline void trace_usb_xhci_runtime_read(uint32_t off, uint32_t val)
+{
+}
+
+static inline void trace_usb_xhci_doorbell_read(uint32_t off, uint32_t val)
+{
+}
+
+static inline void trace_usb_xhci_oper_write(uint32_t off, uint32_t val)
+{
+}
+
+static inline void trace_usb_xhci_port_write(uint32_t port, uint32_t off, uint32_t val)
+{
+}
+
+static inline void trace_usb_xhci_runtime_write(uint32_t off, uint32_t val)
+{
+}
+
+static inline void trace_usb_xhci_doorbell_write(uint32_t off, uint32_t val)
+{
+}
+
+static inline void trace_usb_xhci_irq_intx(uint32_t level)
+{
+}
+
+static inline void trace_usb_xhci_irq_msi(uint32_t nr)
+{
+}
+
+static inline void trace_usb_xhci_irq_msix(uint32_t nr)
+{
+}
+
+static inline void trace_usb_xhci_irq_msix_use(uint32_t nr)
+{
+}
+
+static inline void trace_usb_xhci_irq_msix_unuse(uint32_t nr)
+{
+}
+
+static inline void trace_usb_xhci_queue_event(uint32_t vector, uint32_t idx, const char * trb, const char * evt, uint64_t param, uint32_t status, uint32_t control)
+{
+}
+
+static inline void trace_usb_xhci_fetch_trb(uint64_t addr, const char * name, uint64_t param, uint32_t status, uint32_t control)
+{
+}
+
+static inline void trace_usb_xhci_port_reset(uint32_t port)
+{
+}
+
+static inline void trace_usb_xhci_port_link(uint32_t port, uint32_t pls)
+{
+}
+
+static inline void trace_usb_xhci_port_notify(uint32_t port, uint32_t pls)
+{
+}
+
+static inline void trace_usb_xhci_slot_enable(uint32_t slotid)
+{
+}
+
+static inline void trace_usb_xhci_slot_disable(uint32_t slotid)
+{
+}
+
+static inline void trace_usb_xhci_slot_address(uint32_t slotid)
+{
+}
+
+static inline void trace_usb_xhci_slot_configure(uint32_t slotid)
+{
+}
+
+static inline void trace_usb_xhci_slot_evaluate(uint32_t slotid)
+{
+}
+
+static inline void trace_usb_xhci_slot_reset(uint32_t slotid)
+{
+}
+
+static inline void trace_usb_xhci_ep_enable(uint32_t slotid, uint32_t epid)
+{
+}
+
+static inline void trace_usb_xhci_ep_disable(uint32_t slotid, uint32_t epid)
+{
+}
+
+static inline void trace_usb_xhci_ep_set_dequeue(uint32_t slotid, uint32_t epid, uint32_t streamid, uint64_t param)
+{
+}
+
+static inline void trace_usb_xhci_ep_kick(uint32_t slotid, uint32_t epid, uint32_t streamid)
+{
+}
+
+static inline void trace_usb_xhci_ep_stop(uint32_t slotid, uint32_t epid)
+{
+}
+
+static inline void trace_usb_xhci_ep_reset(uint32_t slotid, uint32_t epid)
+{
+}
+
+static inline void trace_usb_xhci_xfer_start(void * xfer, uint32_t slotid, uint32_t epid, uint32_t streamid)
+{
+}
+
+static inline void trace_usb_xhci_xfer_async(void * xfer)
+{
+}
+
+static inline void trace_usb_xhci_xfer_nak(void * xfer)
+{
+}
+
+static inline void trace_usb_xhci_xfer_retry(void * xfer)
+{
+}
+
+static inline void trace_usb_xhci_xfer_success(void * xfer, uint32_t bytes)
+{
+}
+
+static inline void trace_usb_xhci_xfer_error(void * xfer, uint32_t ret)
+{
+}
+
+static inline void trace_usb_xhci_unimplemented(const char * item, int nr)
+{
+}
+
+static inline void trace_usb_desc_device(int addr, int len, int ret)
+{
+}
+
+static inline void trace_usb_desc_device_qualifier(int addr, int len, int ret)
+{
+}
+
+static inline void trace_usb_desc_config(int addr, int index, int len, int ret)
+{
+}
+
+static inline void trace_usb_desc_other_speed_config(int addr, int index, int len, int ret)
+{
+}
+
+static inline void trace_usb_desc_string(int addr, int index, int len, int ret)
+{
+}
+
+static inline void trace_usb_desc_bos(int addr, int len, int ret)
+{
+}
+
+static inline void trace_usb_set_addr(int addr)
+{
+}
+
+static inline void trace_usb_set_config(int addr, int config, int ret)
+{
+}
+
+static inline void trace_usb_set_interface(int addr, int iface, int alt, int ret)
+{
+}
+
+static inline void trace_usb_clear_device_feature(int addr, int feature, int ret)
+{
+}
+
+static inline void trace_usb_set_device_feature(int addr, int feature, int ret)
+{
+}
+
+static inline void trace_usb_hub_reset(int addr)
+{
+}
+
+static inline void trace_usb_hub_control(int addr, int request, int value, int index, int length)
+{
+}
+
+static inline void trace_usb_hub_get_port_status(int addr, int nr, int status, int changed)
+{
+}
+
+static inline void trace_usb_hub_set_port_feature(int addr, int nr, const char * f)
+{
+}
+
+static inline void trace_usb_hub_clear_port_feature(int addr, int nr, const char * f)
+{
+}
+
+static inline void trace_usb_hub_attach(int addr, int nr)
+{
+}
+
+static inline void trace_usb_hub_detach(int addr, int nr)
+{
+}
+
+static inline void trace_usb_uas_reset(int addr)
+{
+}
+
+static inline void trace_usb_uas_command(int addr, uint16_t tag, int lun, uint32_t lun64_1, uint32_t lun64_2)
+{
+}
+
+static inline void trace_usb_uas_response(int addr, uint16_t tag, uint8_t code)
+{
+}
+
+static inline void trace_usb_uas_sense(int addr, uint16_t tag, uint8_t status)
+{
+}
+
+static inline void trace_usb_uas_read_ready(int addr, uint16_t tag)
+{
+}
+
+static inline void trace_usb_uas_write_ready(int addr, uint16_t tag)
+{
+}
+
+static inline void trace_usb_uas_xfer_data(int addr, uint16_t tag, uint32_t copy, uint32_t uoff, uint32_t usize, uint32_t soff, uint32_t ssize)
+{
+}
+
+static inline void trace_usb_uas_scsi_data(int addr, uint16_t tag, uint32_t bytes)
+{
+}
+
+static inline void trace_usb_uas_scsi_complete(int addr, uint16_t tag, uint32_t status, uint32_t resid)
+{
+}
+
+static inline void trace_usb_uas_tmf_abort_task(int addr, uint16_t tag, uint16_t task_tag)
+{
+}
+
+static inline void trace_usb_uas_tmf_logical_unit_reset(int addr, uint16_t tag, int lun)
+{
+}
+
+static inline void trace_usb_uas_tmf_unsupported(int addr, uint16_t tag, uint32_t function)
+{
+}
+
+static inline void trace_usb_host_open_started(int bus, int addr)
+{
+}
+
+static inline void trace_usb_host_open_success(int bus, int addr)
+{
+}
+
+static inline void trace_usb_host_open_failure(int bus, int addr)
+{
+}
+
+static inline void trace_usb_host_disconnect(int bus, int addr)
+{
+}
+
+static inline void trace_usb_host_close(int bus, int addr)
+{
+}
+
+static inline void trace_usb_host_attach_kernel(int bus, int addr, int interface)
+{
+}
+
+static inline void trace_usb_host_detach_kernel(int bus, int addr, int interface)
+{
+}
+
+static inline void trace_usb_host_set_address(int bus, int addr, int config)
+{
+}
+
+static inline void trace_usb_host_set_config(int bus, int addr, int config)
+{
+}
+
+static inline void trace_usb_host_set_interface(int bus, int addr, int interface, int alt)
+{
+}
+
+static inline void trace_usb_host_claim_interfaces(int bus, int addr, int config, int nif)
+{
+}
+
+static inline void trace_usb_host_claim_interface(int bus, int addr, int config, int interface)
+{
+}
+
+static inline void trace_usb_host_release_interfaces(int bus, int addr)
+{
+}
+
+static inline void trace_usb_host_release_interface(int bus, int addr, int interface)
+{
+}
+
+static inline void trace_usb_host_req_control(int bus, int addr, void * p, int req, int value, int index)
+{
+}
+
+static inline void trace_usb_host_req_data(int bus, int addr, void * p, int in, int ep, int size)
+{
+}
+
+static inline void trace_usb_host_req_complete(int bus, int addr, void * p, int status, int length)
+{
+}
+
+static inline void trace_usb_host_req_emulated(int bus, int addr, void * p, int status)
+{
+}
+
+static inline void trace_usb_host_req_canceled(int bus, int addr, void * p)
+{
+}
+
+static inline void trace_usb_host_urb_submit(int bus, int addr, void * aurb, int length, int more)
+{
+}
+
+static inline void trace_usb_host_urb_complete(int bus, int addr, void * aurb, int status, int length, int more)
+{
+}
+
+static inline void trace_usb_host_urb_canceled(int bus, int addr, void * aurb)
+{
+}
+
+static inline void trace_usb_host_ep_set_halt(int bus, int addr, int ep)
+{
+}
+
+static inline void trace_usb_host_ep_clear_halt(int bus, int addr, int ep)
+{
+}
+
+static inline void trace_usb_host_iso_start(int bus, int addr, int ep)
+{
+}
+
+static inline void trace_usb_host_iso_stop(int bus, int addr, int ep)
+{
+}
+
+static inline void trace_usb_host_iso_out_of_bufs(int bus, int addr, int ep)
+{
+}
+
+static inline void trace_usb_host_iso_many_urbs(int bus, int addr, int count)
+{
+}
+
+static inline void trace_usb_host_reset(int bus, int addr)
+{
+}
+
+static inline void trace_usb_host_auto_scan_enabled(void)
+{
+}
+
+static inline void trace_usb_host_auto_scan_disabled(void)
+{
+}
+
+static inline void trace_usb_host_claim_port(int bus, int hub, int port)
+{
+}
+
+static inline void trace_usb_host_parse_device(int bus, int addr, int vendor, int product)
+{
+}
+
+static inline void trace_usb_host_parse_config(int bus, int addr, int value, int active)
+{
+}
+
+static inline void trace_usb_host_parse_interface(int bus, int addr, int num, int alt, int active)
+{
+}
+
+static inline void trace_usb_host_parse_endpoint(int bus, int addr, int ep, const char * dir, const char * type, int active)
+{
+}
+
+static inline void trace_usb_host_parse_unknown(int bus, int addr, int len, int type)
+{
+}
+
+static inline void trace_usb_host_parse_error(int bus, int addr, const char * errmsg)
+{
+}
+
+static inline void trace_scsi_req_alloc(int target, int lun, int tag)
+{
+}
+
+static inline void trace_scsi_req_cancel(int target, int lun, int tag)
+{
+}
+
+static inline void trace_scsi_req_data(int target, int lun, int tag, int len)
+{
+}
+
+static inline void trace_scsi_req_data_canceled(int target, int lun, int tag, int len)
+{
+}
+
+static inline void trace_scsi_req_dequeue(int target, int lun, int tag)
+{
+}
+
+static inline void trace_scsi_req_continue(int target, int lun, int tag)
+{
+}
+
+static inline void trace_scsi_req_continue_canceled(int target, int lun, int tag)
+{
+}
+
+static inline void trace_scsi_req_parsed(int target, int lun, int tag, int cmd, int mode, int xfer)
+{
+}
+
+static inline void trace_scsi_req_parsed_lba(int target, int lun, int tag, int cmd, uint64_t lba)
+{
+}
+
+static inline void trace_scsi_req_parse_bad(int target, int lun, int tag, int cmd)
+{
+}
+
+static inline void trace_scsi_req_build_sense(int target, int lun, int tag, int key, int asc, int ascq)
+{
+}
+
+static inline void trace_scsi_device_set_ua(int target, int lun, int key, int asc, int ascq)
+{
+}
+
+static inline void trace_scsi_report_luns(int target, int lun, int tag)
+{
+}
+
+static inline void trace_scsi_inquiry(int target, int lun, int tag, int cdb1, int cdb2)
+{
+}
+
+static inline void trace_scsi_test_unit_ready(int target, int lun, int tag)
+{
+}
+
+static inline void trace_scsi_request_sense(int target, int lun, int tag)
+{
+}
+
+static inline void trace_vm_state_notify(int running, int reason)
+{
+}
+
+static inline void trace_load_file(const char * name, const char * path)
+{
+}
+
+static inline void trace_runstate_set(int new_state)
+{
+}
+
+static inline void trace_qcow2_writev_start_req(void * co, int64_t sector, int nb_sectors)
+{
+}
+
+static inline void trace_qcow2_writev_done_req(void * co, int ret)
+{
+}
+
+static inline void trace_qcow2_writev_start_part(void * co)
+{
+}
+
+static inline void trace_qcow2_writev_done_part(void * co, int cur_nr_sectors)
+{
+}
+
+static inline void trace_qcow2_writev_data(void * co, uint64_t offset)
+{
+}
+
+static inline void trace_qcow2_alloc_clusters_offset(void * co, uint64_t offset, int n_start, int n_end)
+{
+}
+
+static inline void trace_qcow2_handle_copied(void * co, uint64_t guest_offset, uint64_t host_offset, uint64_t bytes)
+{
+}
+
+static inline void trace_qcow2_handle_alloc(void * co, uint64_t guest_offset, uint64_t host_offset, uint64_t bytes)
+{
+}
+
+static inline void trace_qcow2_do_alloc_clusters_offset(void * co, uint64_t guest_offset, uint64_t host_offset, int nb_clusters)
+{
+}
+
+static inline void trace_qcow2_cluster_alloc_phys(void * co)
+{
+}
+
+static inline void trace_qcow2_cluster_link_l2(void * co, int nb_clusters)
+{
+}
+
+static inline void trace_qcow2_l2_allocate(void * bs, int l1_index)
+{
+}
+
+static inline void trace_qcow2_l2_allocate_get_empty(void * bs, int l1_index)
+{
+}
+
+static inline void trace_qcow2_l2_allocate_write_l2(void * bs, int l1_index)
+{
+}
+
+static inline void trace_qcow2_l2_allocate_write_l1(void * bs, int l1_index)
+{
+}
+
+static inline void trace_qcow2_l2_allocate_done(void * bs, int l1_index, int ret)
+{
+}
+
+static inline void trace_qcow2_cache_get(void * co, int c, uint64_t offset, bool read_from_disk)
+{
+}
+
+static inline void trace_qcow2_cache_get_replace_entry(void * co, int c, int i)
+{
+}
+
+static inline void trace_qcow2_cache_get_read(void * co, int c, int i)
+{
+}
+
+static inline void trace_qcow2_cache_get_done(void * co, int c, int i)
+{
+}
+
+static inline void trace_qcow2_cache_flush(void * co, int c)
+{
+}
+
+static inline void trace_qcow2_cache_entry_flush(void * co, int c, int i)
+{
+}
+
+static inline void trace_qed_alloc_l2_cache_entry(void * l2_cache, void * entry)
+{
+}
+
+static inline void trace_qed_unref_l2_cache_entry(void * entry, int ref)
+{
+}
+
+static inline void trace_qed_find_l2_cache_entry(void * l2_cache, void * entry, uint64_t offset, int ref)
+{
+}
+
+static inline void trace_qed_read_table(void * s, uint64_t offset, void * table)
+{
+}
+
+static inline void trace_qed_read_table_cb(void * s, void * table, int ret)
+{
+}
+
+static inline void trace_qed_write_table(void * s, uint64_t offset, void * table, unsigned int index, unsigned int n)
+{
+}
+
+static inline void trace_qed_write_table_cb(void * s, void * table, int flush, int ret)
+{
+}
+
+static inline void trace_qed_need_check_timer_cb(void * s)
+{
+}
+
+static inline void trace_qed_start_need_check_timer(void * s)
+{
+}
+
+static inline void trace_qed_cancel_need_check_timer(void * s)
+{
+}
+
+static inline void trace_qed_aio_complete(void * s, void * acb, int ret)
+{
+}
+
+static inline void trace_qed_aio_setup(void * s, void * acb, int64_t sector_num, int nb_sectors, void * opaque, int flags)
+{
+}
+
+static inline void trace_qed_aio_next_io(void * s, void * acb, int ret, uint64_t cur_pos)
+{
+}
+
+static inline void trace_qed_aio_read_data(void * s, void * acb, int ret, uint64_t offset, size_t len)
+{
+}
+
+static inline void trace_qed_aio_write_data(void * s, void * acb, int ret, uint64_t offset, size_t len)
+{
+}
+
+static inline void trace_qed_aio_write_prefill(void * s, void * acb, uint64_t start, size_t len, uint64_t offset)
+{
+}
+
+static inline void trace_qed_aio_write_postfill(void * s, void * acb, uint64_t start, size_t len, uint64_t offset)
+{
+}
+
+static inline void trace_qed_aio_write_main(void * s, void * acb, int ret, uint64_t offset, size_t len)
+{
+}
+
+static inline void trace_g364fb_read(uint64_t addr, uint32_t val)
+{
+}
+
+static inline void trace_g364fb_write(uint64_t addr, uint32_t new)
+{
+}
+
+static inline void trace_grlib_gptimer_enable(int id, uint32_t count)
+{
+}
+
+static inline void trace_grlib_gptimer_disabled(int id, uint32_t config)
+{
+}
+
+static inline void trace_grlib_gptimer_restart(int id, uint32_t reload)
+{
+}
+
+static inline void trace_grlib_gptimer_set_scaler(uint32_t scaler, uint32_t freq)
+{
+}
+
+static inline void trace_grlib_gptimer_hit(int id)
+{
+}
+
+static inline void trace_grlib_gptimer_readl(int id, uint64_t addr, uint32_t val)
+{
+}
+
+static inline void trace_grlib_gptimer_writel(int id, uint64_t addr, uint32_t val)
+{
+}
+
+static inline void trace_grlib_irqmp_check_irqs(uint32_t pend, uint32_t force, uint32_t mask, uint32_t lvl1, uint32_t lvl2)
+{
+}
+
+static inline void trace_grlib_irqmp_ack(int intno)
+{
+}
+
+static inline void trace_grlib_irqmp_set_irq(int irq)
+{
+}
+
+static inline void trace_grlib_irqmp_readl_unknown(uint64_t addr)
+{
+}
+
+static inline void trace_grlib_irqmp_writel_unknown(uint64_t addr, uint32_t value)
+{
+}
+
+static inline void trace_grlib_apbuart_event(int event)
+{
+}
+
+static inline void trace_grlib_apbuart_writel_unknown(uint64_t addr, uint32_t value)
+{
+}
+
+static inline void trace_grlib_apbuart_readl_unknown(uint64_t addr)
+{
+}
+
+static inline void trace_leon3_set_irq(int intno)
+{
+}
+
+static inline void trace_leon3_reset_irq(int intno)
+{
+}
+
+static inline void trace_spice_vmc_write(ssize_t out, int len)
+{
+}
+
+static inline void trace_spice_vmc_read(int bytes, int len)
+{
+}
+
+static inline void trace_spice_vmc_register_interface(void * scd)
+{
+}
+
+static inline void trace_spice_vmc_unregister_interface(void * scd)
+{
+}
+
+static inline void trace_spice_vmc_event(int event)
+{
+}
+
+static inline void trace_lm32_pic_raise_irq(void)
+{
+}
+
+static inline void trace_lm32_pic_lower_irq(void)
+{
+}
+
+static inline void trace_lm32_pic_interrupt(int irq, int level)
+{
+}
+
+static inline void trace_lm32_pic_set_im(uint32_t im)
+{
+}
+
+static inline void trace_lm32_pic_set_ip(uint32_t ip)
+{
+}
+
+static inline void trace_lm32_pic_get_im(uint32_t im)
+{
+}
+
+static inline void trace_lm32_pic_get_ip(uint32_t ip)
+{
+}
+
+static inline void trace_lm32_juart_get_jtx(uint32_t value)
+{
+}
+
+static inline void trace_lm32_juart_set_jtx(uint32_t value)
+{
+}
+
+static inline void trace_lm32_juart_get_jrx(uint32_t value)
+{
+}
+
+static inline void trace_lm32_juart_set_jrx(uint32_t value)
+{
+}
+
+static inline void trace_lm32_timer_memory_write(uint32_t addr, uint32_t value)
+{
+}
+
+static inline void trace_lm32_timer_memory_read(uint32_t addr, uint32_t value)
+{
+}
+
+static inline void trace_lm32_timer_hit(void)
+{
+}
+
+static inline void trace_lm32_timer_irq_state(int level)
+{
+}
+
+static inline void trace_lm32_uart_memory_write(uint32_t addr, uint32_t value)
+{
+}
+
+static inline void trace_lm32_uart_memory_read(uint32_t addr, uint32_t value)
+{
+}
+
+static inline void trace_lm32_uart_irq_state(int level)
+{
+}
+
+static inline void trace_lm32_sys_memory_write(uint32_t addr, uint32_t value)
+{
+}
+
+static inline void trace_megasas_init_firmware(uint64_t pa)
+{
+}
+
+static inline void trace_megasas_init_queue(uint64_t queue_pa, int queue_len, uint64_t head, uint64_t tail, uint32_t flags)
+{
+}
+
+static inline void trace_megasas_initq_map_failed(int frame)
+{
+}
+
+static inline void trace_megasas_initq_mismatch(int queue_len, int fw_cmds)
+{
+}
+
+static inline void trace_megasas_qf_found(unsigned int index, uint64_t pa)
+{
+}
+
+static inline void trace_megasas_qf_new(unsigned int index, void * cmd)
+{
+}
+
+static inline void trace_megasas_qf_failed(unsigned long pa)
+{
+}
+
+static inline void trace_megasas_qf_enqueue(unsigned int index, unsigned int count, uint64_t context, unsigned int tail, int busy)
+{
+}
+
+static inline void trace_megasas_qf_update(unsigned int head, unsigned int busy)
+{
+}
+
+static inline void trace_megasas_qf_dequeue(unsigned int index)
+{
+}
+
+static inline void trace_megasas_qf_map_failed(int cmd, unsigned long frame)
+{
+}
+
+static inline void trace_megasas_qf_complete_noirq(uint64_t context)
+{
+}
+
+static inline void trace_megasas_qf_complete(uint64_t context, unsigned int tail, unsigned int offset, int busy, unsigned int doorbell)
+{
+}
+
+static inline void trace_megasas_handle_frame(const char * cmd, uint64_t addr, uint64_t context, uint32_t count)
+{
+}
+
+static inline void trace_megasas_frame_busy(uint64_t addr)
+{
+}
+
+static inline void trace_megasas_unhandled_frame_cmd(int cmd, uint8_t frame_cmd)
+{
+}
+
+static inline void trace_megasas_handle_scsi(const char * frame, int bus, int dev, int lun, void * sdev, unsigned long size)
+{
+}
+
+static inline void trace_megasas_scsi_target_not_present(const char * frame, int bus, int dev, int lun)
+{
+}
+
+static inline void trace_megasas_scsi_invalid_cdb_len(const char * frame, int bus, int dev, int lun, int len)
+{
+}
+
+static inline void trace_megasas_iov_read_overflow(int cmd, int bytes, int len)
+{
+}
+
+static inline void trace_megasas_iov_write_overflow(int cmd, int bytes, int len)
+{
+}
+
+static inline void trace_megasas_iov_read_underflow(int cmd, int bytes, int len)
+{
+}
+
+static inline void trace_megasas_iov_write_underflow(int cmd, int bytes, int len)
+{
+}
+
+static inline void trace_megasas_scsi_req_alloc_failed(const char * frame, int dev, int lun)
+{
+}
+
+static inline void trace_megasas_scsi_read_start(int cmd, int len)
+{
+}
+
+static inline void trace_megasas_scsi_write_start(int cmd, int len)
+{
+}
+
+static inline void trace_megasas_scsi_nodata(int cmd)
+{
+}
+
+static inline void trace_megasas_scsi_complete(int cmd, uint32_t status, int len, int xfer)
+{
+}
+
+static inline void trace_megasas_command_complete(int cmd, uint32_t status, uint32_t resid)
+{
+}
+
+static inline void trace_megasas_handle_io(int cmd, const char * frame, int dev, int lun, unsigned long lba, unsigned long count)
+{
+}
+
+static inline void trace_megasas_io_target_not_present(int cmd, const char * frame, int dev, int lun)
+{
+}
+
+static inline void trace_megasas_io_read_start(int cmd, unsigned long lba, unsigned long count, unsigned long len)
+{
+}
+
+static inline void trace_megasas_io_write_start(int cmd, unsigned long lba, unsigned long count, unsigned long len)
+{
+}
+
+static inline void trace_megasas_io_complete(int cmd, uint32_t len)
+{
+}
+
+static inline void trace_megasas_io_read(int cmd, int bytes, int len, unsigned long offset)
+{
+}
+
+static inline void trace_megasas_io_write(int cmd, int bytes, int len, unsigned long offset)
+{
+}
+
+static inline void trace_megasas_io_continue(int cmd, int bytes)
+{
+}
+
+static inline void trace_megasas_iovec_map_failed(int cmd, int index, unsigned long iov_size)
+{
+}
+
+static inline void trace_megasas_iovec_sgl_overflow(int cmd, int index, int limit)
+{
+}
+
+static inline void trace_megasas_iovec_sgl_underflow(int cmd, int index)
+{
+}
+
+static inline void trace_megasas_iovec_sgl_invalid(int cmd, int index, uint64_t pa, uint32_t len)
+{
+}
+
+static inline void trace_megasas_iovec_overflow(int cmd, int len, int limit)
+{
+}
+
+static inline void trace_megasas_iovec_underflow(int cmd, int len, int limit)
+{
+}
+
+static inline void trace_megasas_handle_dcmd(int cmd, int opcode)
+{
+}
+
+static inline void trace_megasas_finish_dcmd(int cmd, int size)
+{
+}
+
+static inline void trace_megasas_dcmd_req_alloc_failed(int cmd, const char * desc)
+{
+}
+
+static inline void trace_megasas_dcmd_internal_submit(int cmd, const char * desc, int dev)
+{
+}
+
+static inline void trace_megasas_dcmd_internal_finish(int cmd, int opcode, int lun)
+{
+}
+
+static inline void trace_megasas_dcmd_internal_invalid(int cmd, int opcode)
+{
+}
+
+static inline void trace_megasas_dcmd_unhandled(int cmd, int opcode, int len)
+{
+}
+
+static inline void trace_megasas_dcmd_zero_sge(int cmd)
+{
+}
+
+static inline void trace_megasas_dcmd_invalid_sge(int cmd, int count)
+{
+}
+
+static inline void trace_megasas_dcmd_map_failed(int cmd)
+{
+}
+
+static inline void trace_megasas_dcmd_invalid_xfer_len(int cmd, unsigned long size, unsigned long max)
+{
+}
+
+static inline void trace_megasas_dcmd_enter(int cmd, const char * dcmd, int len)
+{
+}
+
+static inline void trace_megasas_dcmd_dummy(int cmd, unsigned long size)
+{
+}
+
+static inline void trace_megasas_dcmd_set_fw_time(int cmd, unsigned long time)
+{
+}
+
+static inline void trace_megasas_dcmd_pd_get_list(int cmd, int num, int max, int offset)
+{
+}
+
+static inline void trace_megasas_dcmd_ld_get_list(int cmd, int num, int max)
+{
+}
+
+static inline void trace_megasas_dcmd_ld_get_info(int cmd, int ld_id)
+{
+}
+
+static inline void trace_megasas_dcmd_pd_get_info(int cmd, int pd_id)
+{
+}
+
+static inline void trace_megasas_dcmd_pd_list_query(int cmd, int flags)
+{
+}
+
+static inline void trace_megasas_dcmd_unsupported(int cmd, unsigned long size)
+{
+}
+
+static inline void trace_megasas_abort_frame(int cmd, int abort_cmd)
+{
+}
+
+static inline void trace_megasas_abort_no_cmd(int cmd, uint64_t context)
+{
+}
+
+static inline void trace_megasas_abort_invalid_context(int cmd, uint64_t context, int abort_cmd)
+{
+}
+
+static inline void trace_megasas_reset(void)
+{
+}
+
+static inline void trace_megasas_init(int sges, int cmds, const char * intr, const char * mode)
+{
+}
+
+static inline void trace_megasas_msix_raise(int vector)
+{
+}
+
+static inline void trace_megasas_irq_lower(void)
+{
+}
+
+static inline void trace_megasas_irq_raise(void)
+{
+}
+
+static inline void trace_megasas_intr_enabled(void)
+{
+}
+
+static inline void trace_megasas_intr_disabled(void)
+{
+}
+
+static inline void trace_megasas_mmio_readl(unsigned long addr, uint32_t val)
+{
+}
+
+static inline void trace_megasas_mmio_invalid_readl(unsigned long addr)
+{
+}
+
+static inline void trace_megasas_mmio_writel(uint32_t addr, uint32_t val)
+{
+}
+
+static inline void trace_megasas_mmio_invalid_writel(uint32_t addr, uint32_t val)
+{
+}
+
+static inline void trace_milkymist_ac97_memory_read(uint32_t addr, uint32_t value)
+{
+}
+
+static inline void trace_milkymist_ac97_memory_write(uint32_t addr, uint32_t value)
+{
+}
+
+static inline void trace_milkymist_ac97_pulse_irq_crrequest(void)
+{
+}
+
+static inline void trace_milkymist_ac97_pulse_irq_crreply(void)
+{
+}
+
+static inline void trace_milkymist_ac97_pulse_irq_dmaw(void)
+{
+}
+
+static inline void trace_milkymist_ac97_pulse_irq_dmar(void)
+{
+}
+
+static inline void trace_milkymist_ac97_in_cb(int avail, uint32_t remaining)
+{
+}
+
+static inline void trace_milkymist_ac97_in_cb_transferred(int transferred)
+{
+}
+
+static inline void trace_milkymist_ac97_out_cb(int free, uint32_t remaining)
+{
+}
+
+static inline void trace_milkymist_ac97_out_cb_transferred(int transferred)
+{
+}
+
+static inline void trace_milkymist_hpdmc_memory_read(uint32_t addr, uint32_t value)
+{
+}
+
+static inline void trace_milkymist_hpdmc_memory_write(uint32_t addr, uint32_t value)
+{
+}
+
+static inline void trace_milkymist_memcard_memory_read(uint32_t addr, uint32_t value)
+{
+}
+
+static inline void trace_milkymist_memcard_memory_write(uint32_t addr, uint32_t value)
+{
+}
+
+static inline void trace_milkymist_minimac2_memory_read(uint32_t addr, uint32_t value)
+{
+}
+
+static inline void trace_milkymist_minimac2_memory_write(uint32_t addr, uint32_t value)
+{
+}
+
+static inline void trace_milkymist_minimac2_mdio_write(uint8_t phy_addr, uint8_t addr, uint16_t value)
+{
+}
+
+static inline void trace_milkymist_minimac2_mdio_read(uint8_t phy_addr, uint8_t addr, uint16_t value)
+{
+}
+
+static inline void trace_milkymist_minimac2_tx_frame(uint32_t length)
+{
+}
+
+static inline void trace_milkymist_minimac2_rx_frame(const void * buf, uint32_t length)
+{
+}
+
+static inline void trace_milkymist_minimac2_drop_rx_frame(const void * buf)
+{
+}
+
+static inline void trace_milkymist_minimac2_rx_transfer(const void * buf, uint32_t length)
+{
+}
+
+static inline void trace_milkymist_minimac2_raise_irq_rx(void)
+{
+}
+
+static inline void trace_milkymist_minimac2_lower_irq_rx(void)
+{
+}
+
+static inline void trace_milkymist_minimac2_pulse_irq_tx(void)
+{
+}
+
+static inline void trace_milkymist_pfpu_memory_read(uint32_t addr, uint32_t value)
+{
+}
+
+static inline void trace_milkymist_pfpu_memory_write(uint32_t addr, uint32_t value)
+{
+}
+
+static inline void trace_milkymist_pfpu_vectout(uint32_t a, uint32_t b, uint32_t dma_ptr)
+{
+}
+
+static inline void trace_milkymist_pfpu_pulse_irq(void)
+{
+}
+
+static inline void trace_milkymist_softusb_memory_read(uint32_t addr, uint32_t value)
+{
+}
+
+static inline void trace_milkymist_softusb_memory_write(uint32_t addr, uint32_t value)
+{
+}
+
+static inline void trace_milkymist_softusb_mevt(uint8_t m)
+{
+}
+
+static inline void trace_milkymist_softusb_kevt(uint8_t m)
+{
+}
+
+static inline void trace_milkymist_softusb_mouse_event(int dx, int dy, int dz, int bs)
+{
+}
+
+static inline void trace_milkymist_softusb_pulse_irq(void)
+{
+}
+
+static inline void trace_milkymist_sysctl_memory_read(uint32_t addr, uint32_t value)
+{
+}
+
+static inline void trace_milkymist_sysctl_memory_write(uint32_t addr, uint32_t value)
+{
+}
+
+static inline void trace_milkymist_sysctl_icap_write(uint32_t value)
+{
+}
+
+static inline void trace_milkymist_sysctl_start_timer0(void)
+{
+}
+
+static inline void trace_milkymist_sysctl_stop_timer0(void)
+{
+}
+
+static inline void trace_milkymist_sysctl_start_timer1(void)
+{
+}
+
+static inline void trace_milkymist_sysctl_stop_timer1(void)
+{
+}
+
+static inline void trace_milkymist_sysctl_pulse_irq_timer0(void)
+{
+}
+
+static inline void trace_milkymist_sysctl_pulse_irq_timer1(void)
+{
+}
+
+static inline void trace_milkymist_tmu2_memory_read(uint32_t addr, uint32_t value)
+{
+}
+
+static inline void trace_milkymist_tmu2_memory_write(uint32_t addr, uint32_t value)
+{
+}
+
+static inline void trace_milkymist_tmu2_start(void)
+{
+}
+
+static inline void trace_milkymist_tmu2_pulse_irq(void)
+{
+}
+
+static inline void trace_milkymist_uart_memory_read(uint32_t addr, uint32_t value)
+{
+}
+
+static inline void trace_milkymist_uart_memory_write(uint32_t addr, uint32_t value)
+{
+}
+
+static inline void trace_milkymist_uart_raise_irq(void)
+{
+}
+
+static inline void trace_milkymist_uart_lower_irq(void)
+{
+}
+
+static inline void trace_milkymist_vgafb_memory_read(uint32_t addr, uint32_t value)
+{
+}
+
+static inline void trace_milkymist_vgafb_memory_write(uint32_t addr, uint32_t value)
+{
+}
+
+static inline void trace_mipsnet_send(uint32_t size)
+{
+}
+
+static inline void trace_mipsnet_receive(uint32_t size)
+{
+}
+
+static inline void trace_mipsnet_read(uint64_t addr, uint32_t val)
+{
+}
+
+static inline void trace_mipsnet_write(uint64_t addr, uint64_t val)
+{
+}
+
+static inline void trace_mipsnet_irq(uint32_t isr, uint32_t intctl)
+{
+}
+
+static inline void trace_pc87312_io_read(uint32_t addr, uint32_t val)
+{
+}
+
+static inline void trace_pc87312_io_write(uint32_t addr, uint32_t val)
+{
+}
+
+static inline void trace_pc87312_info_floppy(uint32_t base)
+{
+}
+
+static inline void trace_pc87312_info_ide(uint32_t base)
+{
+}
+
+static inline void trace_pc87312_info_parallel(uint32_t base, uint32_t irq)
+{
+}
+
+static inline void trace_pc87312_info_serial(int n, uint32_t base, uint32_t irq)
+{
+}
+
+static inline void trace_pvscsi_ring_init_data(uint32_t txr_len_log2, uint32_t rxr_len_log2)
+{
+}
+
+static inline void trace_pvscsi_ring_init_msg(uint32_t len_log2)
+{
+}
+
+static inline void trace_pvscsi_ring_flush_cmp(uint64_t filled_cmp_ptr)
+{
+}
+
+static inline void trace_pvscsi_ring_flush_msg(uint64_t filled_cmp_ptr)
+{
+}
+
+static inline void trace_pvscsi_update_irq_level(bool raise, uint64_t mask, uint64_t status)
+{
+}
+
+static inline void trace_pvscsi_update_irq_msi(void)
+{
+}
+
+static inline void trace_pvscsi_cmp_ring_put(unsigned long addr)
+{
+}
+
+static inline void trace_pvscsi_msg_ring_put(unsigned long addr)
+{
+}
+
+static inline void trace_pvscsi_complete_request(uint64_t context, uint64_t len, uint8_t sense_key)
+{
+}
+
+static inline void trace_pvscsi_get_sg_list(int nsg, size_t size)
+{
+}
+
+static inline void trace_pvscsi_get_next_sg_elem(uint32_t flags)
+{
+}
+
+static inline void trace_pvscsi_command_complete_not_found(uint32_t tag)
+{
+}
+
+static inline void trace_pvscsi_command_complete_data_run(void)
+{
+}
+
+static inline void trace_pvscsi_command_complete_sense_len(int len)
+{
+}
+
+static inline void trace_pvscsi_convert_sglist(uint64_t context, unsigned long addr, uint32_t resid)
+{
+}
+
+static inline void trace_pvscsi_process_req_descr(uint8_t cmd, uint64_t ctx)
+{
+}
+
+static inline void trace_pvscsi_process_req_descr_unknown_device(void)
+{
+}
+
+static inline void trace_pvscsi_process_req_descr_invalid_dir(void)
+{
+}
+
+static inline void trace_pvscsi_process_io(unsigned long addr)
+{
+}
+
+static inline void trace_pvscsi_on_cmd_noimpl(const char* cmd)
+{
+}
+
+static inline void trace_pvscsi_on_cmd_reset_dev(uint32_t tgt, int lun, void* dev)
+{
+}
+
+static inline void trace_pvscsi_on_cmd_arrived(const char* cmd)
+{
+}
+
+static inline void trace_pvscsi_on_cmd_abort(uint64_t ctx, uint32_t tgt)
+{
+}
+
+static inline void trace_pvscsi_on_cmd_unknown(uint64_t cmd_id)
+{
+}
+
+static inline void trace_pvscsi_on_cmd_unknown_data(uint32_t data)
+{
+}
+
+static inline void trace_pvscsi_io_write(const char* cmd, uint64_t val)
+{
+}
+
+static inline void trace_pvscsi_io_write_unknown(unsigned long addr, unsigned sz, uint64_t val)
+{
+}
+
+static inline void trace_pvscsi_io_read(const char* cmd, uint64_t status)
+{
+}
+
+static inline void trace_pvscsi_io_read_unknown(unsigned long addr, unsigned sz)
+{
+}
+
+static inline void trace_pvscsi_init_msi_fail(int res)
+{
+}
+
+static inline void trace_pvscsi_state(const char* state)
+{
+}
+
+static inline void trace_pvscsi_tx_rings_ppn(const char* label, uint64_t ppn)
+{
+}
+
+static inline void trace_pvscsi_tx_rings_num_pages(const char* label, uint32_t num)
+{
+}
+
+static inline void trace_xen_ram_alloc(unsigned long ram_addr, unsigned long size)
+{
+}
+
+static inline void trace_xen_client_set_memory(uint64_t start_addr, unsigned long size, bool log_dirty)
+{
+}
+
+static inline void trace_xen_map_cache(uint64_t phys_addr)
+{
+}
+
+static inline void trace_xen_remap_bucket(uint64_t index)
+{
+}
+
+static inline void trace_xen_map_cache_return(void* ptr)
+{
+}
+
+static inline void trace_xen_map_block(uint64_t phys_addr, uint64_t size)
+{
+}
+
+static inline void trace_xen_unmap_block(void* addr, unsigned long size)
+{
+}
+
+static inline void trace_xen_platform_log(char * s)
+{
+}
+
+static inline void trace_qemu_coroutine_enter(void * from, void * to, void * opaque)
+{
+}
+
+static inline void trace_qemu_coroutine_yield(void * from, void * to)
+{
+}
+
+static inline void trace_qemu_coroutine_terminate(void * co)
+{
+}
+
+static inline void trace_qemu_co_queue_run_restart(void * co)
+{
+}
+
+static inline void trace_qemu_co_queue_next(void * nxt)
+{
+}
+
+static inline void trace_qemu_co_mutex_lock_entry(void * mutex, void * self)
+{
+}
+
+static inline void trace_qemu_co_mutex_lock_return(void * mutex, void * self)
+{
+}
+
+static inline void trace_qemu_co_mutex_unlock_entry(void * mutex, void * self)
+{
+}
+
+static inline void trace_qemu_co_mutex_unlock_return(void * mutex, void * self)
+{
+}
+
+static inline void trace_escc_put_queue(char channel, int b)
+{
+}
+
+static inline void trace_escc_get_queue(char channel, int val)
+{
+}
+
+static inline void trace_escc_update_irq(int irq)
+{
+}
+
+static inline void trace_escc_update_parameters(char channel, int speed, int parity, int data_bits, int stop_bits)
+{
+}
+
+static inline void trace_escc_mem_writeb_ctrl(char channel, uint32_t reg, uint32_t val)
+{
+}
+
+static inline void trace_escc_mem_writeb_data(char channel, uint32_t val)
+{
+}
+
+static inline void trace_escc_mem_readb_ctrl(char channel, uint32_t reg, uint8_t val)
+{
+}
+
+static inline void trace_escc_mem_readb_data(char channel, uint32_t ret)
+{
+}
+
+static inline void trace_escc_serial_receive_byte(char channel, int ch)
+{
+}
+
+static inline void trace_escc_sunkbd_event_in(int ch)
+{
+}
+
+static inline void trace_escc_sunkbd_event_out(int ch)
+{
+}
+
+static inline void trace_escc_kbd_command(int val)
+{
+}
+
+static inline void trace_escc_sunmouse_event(int dx, int dy, int buttons_state)
+{
+}
+
+static inline void trace_iscsi_aio_write16_cb(void * iscsi, int status, void * acb, int canceled)
+{
+}
+
+static inline void trace_iscsi_aio_writev(void * iscsi, int64_t sector_num, int nb_sectors, void * opaque, void * acb)
+{
+}
+
+static inline void trace_iscsi_aio_read16_cb(void * iscsi, int status, void * acb, int canceled)
+{
+}
+
+static inline void trace_iscsi_aio_readv(void * iscsi, int64_t sector_num, int nb_sectors, void * opaque, void * acb)
+{
+}
+
+static inline void trace_esp_error_fifo_overrun(void)
+{
+}
+
+static inline void trace_esp_error_unhandled_command(uint32_t val)
+{
+}
+
+static inline void trace_esp_error_invalid_write(uint32_t val, uint32_t addr)
+{
+}
+
+static inline void trace_esp_raise_irq(void)
+{
+}
+
+static inline void trace_esp_lower_irq(void)
+{
+}
+
+static inline void trace_esp_dma_enable(void)
+{
+}
+
+static inline void trace_esp_dma_disable(void)
+{
+}
+
+static inline void trace_esp_get_cmd(uint32_t dmalen, int target)
+{
+}
+
+static inline void trace_esp_do_busid_cmd(uint8_t busid)
+{
+}
+
+static inline void trace_esp_handle_satn_stop(uint32_t cmdlen)
+{
+}
+
+static inline void trace_esp_write_response(uint32_t status)
+{
+}
+
+static inline void trace_esp_do_dma(uint32_t cmdlen, uint32_t len)
+{
+}
+
+static inline void trace_esp_command_complete(void)
+{
+}
+
+static inline void trace_esp_command_complete_unexpected(void)
+{
+}
+
+static inline void trace_esp_command_complete_fail(void)
+{
+}
+
+static inline void trace_esp_transfer_data(uint32_t dma_left, int32_t ti_size)
+{
+}
+
+static inline void trace_esp_handle_ti(uint32_t minlen)
+{
+}
+
+static inline void trace_esp_handle_ti_cmd(uint32_t cmdlen)
+{
+}
+
+static inline void trace_esp_mem_readb(uint32_t saddr, uint8_t reg)
+{
+}
+
+static inline void trace_esp_mem_writeb(uint32_t saddr, uint8_t reg, uint32_t val)
+{
+}
+
+static inline void trace_esp_mem_writeb_cmd_nop(uint32_t val)
+{
+}
+
+static inline void trace_esp_mem_writeb_cmd_flush(uint32_t val)
+{
+}
+
+static inline void trace_esp_mem_writeb_cmd_reset(uint32_t val)
+{
+}
+
+static inline void trace_esp_mem_writeb_cmd_bus_reset(uint32_t val)
+{
+}
+
+static inline void trace_esp_mem_writeb_cmd_iccs(uint32_t val)
+{
+}
+
+static inline void trace_esp_mem_writeb_cmd_msgacc(uint32_t val)
+{
+}
+
+static inline void trace_esp_mem_writeb_cmd_pad(uint32_t val)
+{
+}
+
+static inline void trace_esp_mem_writeb_cmd_satn(uint32_t val)
+{
+}
+
+static inline void trace_esp_mem_writeb_cmd_rstatn(uint32_t val)
+{
+}
+
+static inline void trace_esp_mem_writeb_cmd_sel(uint32_t val)
+{
+}
+
+static inline void trace_esp_mem_writeb_cmd_selatn(uint32_t val)
+{
+}
+
+static inline void trace_esp_mem_writeb_cmd_selatns(uint32_t val)
+{
+}
+
+static inline void trace_esp_mem_writeb_cmd_ensel(uint32_t val)
+{
+}
+
+static inline void trace_esp_mem_writeb_cmd_dissel(uint32_t val)
+{
+}
+
+static inline void trace_esp_pci_error_invalid_dma_direction(void)
+{
+}
+
+static inline void trace_esp_pci_error_invalid_read(uint32_t reg)
+{
+}
+
+static inline void trace_esp_pci_error_invalid_write(uint32_t reg)
+{
+}
+
+static inline void trace_esp_pci_error_invalid_write_dma(uint32_t val, uint32_t addr)
+{
+}
+
+static inline void trace_esp_pci_dma_read(uint32_t saddr, uint32_t reg)
+{
+}
+
+static inline void trace_esp_pci_dma_write(uint32_t saddr, uint32_t reg, uint32_t val)
+{
+}
+
+static inline void trace_esp_pci_dma_idle(uint32_t val)
+{
+}
+
+static inline void trace_esp_pci_dma_blast(uint32_t val)
+{
+}
+
+static inline void trace_esp_pci_dma_abort(uint32_t val)
+{
+}
+
+static inline void trace_esp_pci_dma_start(uint32_t val)
+{
+}
+
+static inline void trace_esp_pci_sbac_read(uint32_t reg)
+{
+}
+
+static inline void trace_esp_pci_sbac_write(uint32_t reg, uint32_t val)
+{
+}
+
+static inline void trace_handle_qmp_command(void * mon, const char * cmd_name)
+{
+}
+
+static inline void trace_monitor_protocol_emitter(void * mon)
+{
+}
+
+static inline void trace_monitor_protocol_event(uint32_t event, const char * evname, void * data)
+{
+}
+
+static inline void trace_monitor_protocol_event_handler(uint32_t event, void * data, uint64_t last, uint64_t now)
+{
+}
+
+static inline void trace_monitor_protocol_event_emit(uint32_t event, void * data)
+{
+}
+
+static inline void trace_monitor_protocol_event_queue(uint32_t event, void * data, uint64_t rate, uint64_t last, uint64_t now)
+{
+}
+
+static inline void trace_monitor_protocol_event_throttle(uint32_t event, uint64_t rate)
+{
+}
+
+static inline void trace_open_eth_mii_write(unsigned idx, uint16_t v)
+{
+}
+
+static inline void trace_open_eth_mii_read(unsigned idx, uint16_t v)
+{
+}
+
+static inline void trace_open_eth_update_irq(uint32_t v)
+{
+}
+
+static inline void trace_open_eth_receive(unsigned len)
+{
+}
+
+static inline void trace_open_eth_receive_mcast(unsigned idx, uint32_t h0, uint32_t h1)
+{
+}
+
+static inline void trace_open_eth_receive_reject(void)
+{
+}
+
+static inline void trace_open_eth_receive_desc(uint32_t addr, uint32_t len_flags)
+{
+}
+
+static inline void trace_open_eth_start_xmit(uint32_t addr, unsigned len, unsigned tx_len)
+{
+}
+
+static inline void trace_open_eth_reg_read(uint32_t addr, uint32_t v)
+{
+}
+
+static inline void trace_open_eth_reg_write(uint32_t addr, uint32_t v)
+{
+}
+
+static inline void trace_open_eth_desc_read(uint32_t addr, uint32_t v)
+{
+}
+
+static inline void trace_open_eth_desc_write(uint32_t addr, uint32_t v)
+{
+}
+
+static inline void trace_v9fs_rerror(uint16_t tag, uint8_t id, int err)
+{
+}
+
+static inline void trace_v9fs_version(uint16_t tag, uint8_t id, int32_t msize, char* version)
+{
+}
+
+static inline void trace_v9fs_version_return(uint16_t tag, uint8_t id, int32_t msize, char* version)
+{
+}
+
+static inline void trace_v9fs_attach(uint16_t tag, uint8_t id, int32_t fid, int32_t afid, char* uname, char* aname)
+{
+}
+
+static inline void trace_v9fs_attach_return(uint16_t tag, uint8_t id, int8_t type, int32_t version, int64_t path)
+{
+}
+
+static inline void trace_v9fs_stat(uint16_t tag, uint8_t id, int32_t fid)
+{
+}
+
+static inline void trace_v9fs_stat_return(uint16_t tag, uint8_t id, int32_t mode, int32_t atime, int32_t mtime, int64_t length)
+{
+}
+
+static inline void trace_v9fs_getattr(uint16_t tag, uint8_t id, int32_t fid, uint64_t request_mask)
+{
+}
+
+static inline void trace_v9fs_getattr_return(uint16_t tag, uint8_t id, uint64_t result_mask, uint32_t mode, uint32_t uid, uint32_t gid)
+{
+}
+
+static inline void trace_v9fs_walk(uint16_t tag, uint8_t id, int32_t fid, int32_t newfid, uint16_t nwnames)
+{
+}
+
+static inline void trace_v9fs_walk_return(uint16_t tag, uint8_t id, uint16_t nwnames, void* qids)
+{
+}
+
+static inline void trace_v9fs_open(uint16_t tag, uint8_t id, int32_t fid, int32_t mode)
+{
+}
+
+static inline void trace_v9fs_open_return(uint16_t tag, uint8_t id, int8_t type, int32_t version, int64_t path, int iounit)
+{
+}
+
+static inline void trace_v9fs_lcreate(uint16_t tag, uint8_t id, int32_t dfid, int32_t flags, int32_t mode, uint32_t gid)
+{
+}
+
+static inline void trace_v9fs_lcreate_return(uint16_t tag, uint8_t id, int8_t type, int32_t version, int64_t path, int32_t iounit)
+{
+}
+
+static inline void trace_v9fs_fsync(uint16_t tag, uint8_t id, int32_t fid, int datasync)
+{
+}
+
+static inline void trace_v9fs_clunk(uint16_t tag, uint8_t id, int32_t fid)
+{
+}
+
+static inline void trace_v9fs_read(uint16_t tag, uint8_t id, int32_t fid, uint64_t off, uint32_t max_count)
+{
+}
+
+static inline void trace_v9fs_read_return(uint16_t tag, uint8_t id, int32_t count, ssize_t err)
+{
+}
+
+static inline void trace_v9fs_readdir(uint16_t tag, uint8_t id, int32_t fid, uint64_t offset, uint32_t max_count)
+{
+}
+
+static inline void trace_v9fs_readdir_return(uint16_t tag, uint8_t id, uint32_t count, ssize_t retval)
+{
+}
+
+static inline void trace_v9fs_write(uint16_t tag, uint8_t id, int32_t fid, uint64_t off, uint32_t count, int cnt)
+{
+}
+
+static inline void trace_v9fs_write_return(uint16_t tag, uint8_t id, int32_t total, ssize_t err)
+{
+}
+
+static inline void trace_v9fs_create(uint16_t tag, uint8_t id, int32_t fid, char* name, int32_t perm, int8_t mode)
+{
+}
+
+static inline void trace_v9fs_create_return(uint16_t tag, uint8_t id, int8_t type, int32_t version, int64_t path, int iounit)
+{
+}
+
+static inline void trace_v9fs_symlink(uint16_t tag, uint8_t id, int32_t fid, char* name, char* symname, uint32_t gid)
+{
+}
+
+static inline void trace_v9fs_symlink_return(uint16_t tag, uint8_t id, int8_t type, int32_t version, int64_t path)
+{
+}
+
+static inline void trace_v9fs_flush(uint16_t tag, uint8_t id, int16_t flush_tag)
+{
+}
+
+static inline void trace_v9fs_link(uint16_t tag, uint8_t id, int32_t dfid, int32_t oldfid, char* name)
+{
+}
+
+static inline void trace_v9fs_remove(uint16_t tag, uint8_t id, int32_t fid)
+{
+}
+
+static inline void trace_v9fs_wstat(uint16_t tag, uint8_t id, int32_t fid, int32_t mode, int32_t atime, int32_t mtime)
+{
+}
+
+static inline void trace_v9fs_mknod(uint16_t tag, uint8_t id, int32_t fid, int mode, int major, int minor)
+{
+}
+
+static inline void trace_v9fs_mknod_return(uint16_t tag, uint8_t id, int8_t type, int32_t version, int64_t path)
+{
+}
+
+static inline void trace_v9fs_lock(uint16_t tag, uint8_t id, int32_t fid, uint8_t type, uint64_t start, uint64_t length)
+{
+}
+
+static inline void trace_v9fs_lock_return(uint16_t tag, uint8_t id, int8_t status)
+{
+}
+
+static inline void trace_v9fs_getlock(uint16_t tag, uint8_t id, int32_t fid, uint8_t type, uint64_t start, uint64_t length)
+{
+}
+
+static inline void trace_v9fs_getlock_return(uint16_t tag, uint8_t id, uint8_t type, uint64_t start, uint64_t length, uint32_t proc_id)
+{
+}
+
+static inline void trace_v9fs_mkdir(uint16_t tag, uint8_t id, int32_t fid, char* name, int mode, uint32_t gid)
+{
+}
+
+static inline void trace_v9fs_mkdir_return(uint16_t tag, uint8_t id, int8_t type, int32_t version, int64_t path, int err)
+{
+}
+
+static inline void trace_v9fs_xattrwalk(uint16_t tag, uint8_t id, int32_t fid, int32_t newfid, char* name)
+{
+}
+
+static inline void trace_v9fs_xattrwalk_return(uint16_t tag, uint8_t id, int64_t size)
+{
+}
+
+static inline void trace_v9fs_xattrcreate(uint16_t tag, uint8_t id, int32_t fid, char* name, int64_t size, int flags)
+{
+}
+
+static inline void trace_v9fs_readlink(uint16_t tag, uint8_t id, int32_t fid)
+{
+}
+
+static inline void trace_v9fs_readlink_return(uint16_t tag, uint8_t id, char* target)
+{
+}
+
+static inline void trace_mmu_helper_dfault(uint64_t address, uint64_t context, int mmu_idx, uint32_t tl)
+{
+}
+
+static inline void trace_mmu_helper_dprot(uint64_t address, uint64_t context, int mmu_idx, uint32_t tl)
+{
+}
+
+static inline void trace_mmu_helper_dmiss(uint64_t address, uint64_t context)
+{
+}
+
+static inline void trace_mmu_helper_tfault(uint64_t address, uint64_t context)
+{
+}
+
+static inline void trace_mmu_helper_tmiss(uint64_t address, uint64_t context)
+{
+}
+
+static inline void trace_mmu_helper_get_phys_addr_code(uint32_t tl, int mmu_idx, uint64_t prim_context, uint64_t sec_context, uint64_t address)
+{
+}
+
+static inline void trace_mmu_helper_get_phys_addr_data(uint32_t tl, int mmu_idx, uint64_t prim_context, uint64_t sec_context, uint64_t address)
+{
+}
+
+static inline void trace_mmu_helper_mmu_fault(uint64_t address, uint64_t paddr, int mmu_idx, uint32_t tl, uint64_t prim_context, uint64_t sec_context)
+{
+}
+
+static inline void trace_int_helper_set_softint(uint32_t softint)
+{
+}
+
+static inline void trace_int_helper_clear_softint(uint32_t softint)
+{
+}
+
+static inline void trace_int_helper_write_softint(uint32_t softint)
+{
+}
+
+static inline void trace_int_helper_icache_freeze(void)
+{
+}
+
+static inline void trace_int_helper_dcache_freeze(void)
+{
+}
+
+static inline void trace_win_helper_gregset_error(uint32_t pstate)
+{
+}
+
+static inline void trace_win_helper_switch_pstate(uint32_t pstate_regs, uint32_t new_pstate_regs)
+{
+}
+
+static inline void trace_win_helper_no_switch_pstate(uint32_t new_pstate_regs)
+{
+}
+
+static inline void trace_win_helper_wrpil(uint32_t psrpil, uint32_t new_pil)
+{
+}
+
+static inline void trace_win_helper_done(uint32_t tl)
+{
+}
+
+static inline void trace_win_helper_retry(uint32_t tl)
+{
+}
+
+static inline void trace_dma_bdrv_io(void * dbs, void * bs, int64_t sector_num, bool to_dev)
+{
+}
+
+static inline void trace_dma_aio_cancel(void * dbs)
+{
+}
+
+static inline void trace_dma_complete(void * dbs, int ret, void * cb)
+{
+}
+
+static inline void trace_dma_bdrv_cb(void * dbs, int ret)
+{
+}
+
+static inline void trace_dma_map_wait(void * dbs)
+{
+}
+
+static inline void trace_console_gfx_new(void)
+{
+}
+
+static inline void trace_console_txt_new(int w, int h)
+{
+}
+
+static inline void trace_console_select(int nr)
+{
+}
+
+static inline void trace_console_refresh(int interval)
+{
+}
+
+static inline void trace_displaysurface_create(void * display_surface, int w, int h)
+{
+}
+
+static inline void trace_displaysurface_create_from(void * display_surface, int w, int h, int bpp, int swap)
+{
+}
+
+static inline void trace_displaysurface_free(void * display_surface)
+{
+}
+
+static inline void trace_displaychangelistener_register(void * dcl, const char * name)
+{
+}
+
+static inline void trace_displaychangelistener_unregister(void * dcl, const char * name)
+{
+}
+
+static inline void trace_ppm_save(const char * filename, void * display_surface)
+{
+}
+
+static inline void trace_vmware_value_read(uint32_t index, uint32_t value)
+{
+}
+
+static inline void trace_vmware_value_write(uint32_t index, uint32_t value)
+{
+}
+
+static inline void trace_vmware_palette_read(uint32_t index, uint32_t value)
+{
+}
+
+static inline void trace_vmware_palette_write(uint32_t index, uint32_t value)
+{
+}
+
+static inline void trace_vmware_scratch_read(uint32_t index, uint32_t value)
+{
+}
+
+static inline void trace_vmware_scratch_write(uint32_t index, uint32_t value)
+{
+}
+
+static inline void trace_vmware_setmode(uint32_t w, uint32_t h, uint32_t bpp)
+{
+}
+
+static inline void trace_savevm_section_start(void)
+{
+}
+
+static inline void trace_savevm_section_end(unsigned int section_id)
+{
+}
+
+static inline void trace_migration_bitmap_sync_start(void)
+{
+}
+
+static inline void trace_migration_bitmap_sync_end(uint64_t dirty_pages)
+{
+}
+
+static inline void trace_migration_throttle(void)
+{
+}
+
+static inline void trace_qxl_create_guest_primary(int qid, uint32_t width, uint32_t height, uint64_t mem, uint32_t format, uint32_t position)
+{
+}
+
+static inline void trace_qxl_create_guest_primary_rest(int qid, int32_t stride, uint32_t type, uint32_t flags)
+{
+}
+
+static inline void trace_qxl_destroy_primary(int qid)
+{
+}
+
+static inline void trace_qxl_enter_vga_mode(int qid)
+{
+}
+
+static inline void trace_qxl_exit_vga_mode(int qid)
+{
+}
+
+static inline void trace_qxl_hard_reset(int qid, int64_t loadvm)
+{
+}
+
+static inline void trace_qxl_interface_async_complete_io(int qid, uint32_t current_async, void * cookie)
+{
+}
+
+static inline void trace_qxl_interface_attach_worker(int qid)
+{
+}
+
+static inline void trace_qxl_interface_get_init_info(int qid)
+{
+}
+
+static inline void trace_qxl_interface_set_compression_level(int qid, int64_t level)
+{
+}
+
+static inline void trace_qxl_interface_update_area_complete(int qid, uint32_t surface_id, uint32_t dirty_left, uint32_t dirty_right, uint32_t dirty_top, uint32_t dirty_bottom)
+{
+}
+
+static inline void trace_qxl_interface_update_area_complete_rest(int qid, uint32_t num_updated_rects)
+{
+}
+
+static inline void trace_qxl_interface_update_area_complete_overflow(int qid, int max)
+{
+}
+
+static inline void trace_qxl_interface_update_area_complete_schedule_bh(int qid, uint32_t num_dirty)
+{
+}
+
+static inline void trace_qxl_io_destroy_primary_ignored(int qid, const char * mode)
+{
+}
+
+static inline void trace_qxl_io_log(int qid, const uint8_t * log_buf)
+{
+}
+
+static inline void trace_qxl_io_read_unexpected(int qid)
+{
+}
+
+static inline void trace_qxl_io_unexpected_vga_mode(int qid, uint64_t addr, uint64_t val, const char * desc)
+{
+}
+
+static inline void trace_qxl_io_write(int qid, const char * mode, uint64_t addr, uint64_t val, unsigned size, int async)
+{
+}
+
+static inline void trace_qxl_memslot_add_guest(int qid, uint32_t slot_id, uint64_t guest_start, uint64_t guest_end)
+{
+}
+
+static inline void trace_qxl_post_load(int qid, const char * mode)
+{
+}
+
+static inline void trace_qxl_pre_load(int qid)
+{
+}
+
+static inline void trace_qxl_pre_save(int qid)
+{
+}
+
+static inline void trace_qxl_reset_surfaces(int qid)
+{
+}
+
+static inline void trace_qxl_ring_command_check(int qid, const char * mode)
+{
+}
+
+static inline void trace_qxl_ring_command_get(int qid, const char * mode)
+{
+}
+
+static inline void trace_qxl_ring_command_req_notification(int qid)
+{
+}
+
+static inline void trace_qxl_ring_cursor_check(int qid, const char * mode)
+{
+}
+
+static inline void trace_qxl_ring_cursor_get(int qid, const char * mode)
+{
+}
+
+static inline void trace_qxl_ring_cursor_req_notification(int qid)
+{
+}
+
+static inline void trace_qxl_ring_res_push(int qid, const char * mode, uint32_t surface_count, uint32_t free_res, void * last_release, const char * notify)
+{
+}
+
+static inline void trace_qxl_ring_res_push_rest(int qid, uint32_t ring_has, uint32_t ring_size, uint32_t prod, uint32_t cons)
+{
+}
+
+static inline void trace_qxl_ring_res_put(int qid, uint32_t free_res)
+{
+}
+
+static inline void trace_qxl_set_mode(int qid, int modenr, uint32_t x_res, uint32_t y_res, uint32_t bits, uint64_t devmem)
+{
+}
+
+static inline void trace_qxl_soft_reset(int qid)
+{
+}
+
+static inline void trace_qemu_spice_add_memslot(int qid, uint32_t slot_id, unsigned long virt_start, unsigned long virt_end, int async)
+{
+}
+
+static inline void trace_qemu_spice_del_memslot(int qid, uint32_t gid, uint32_t slot_id)
+{
+}
+
+static inline void trace_qemu_spice_create_primary_surface(int qid, uint32_t sid, void * surface, int async)
+{
+}
+
+static inline void trace_qemu_spice_destroy_primary_surface(int qid, uint32_t sid, int async)
+{
+}
+
+static inline void trace_qemu_spice_wakeup(uint32_t qid)
+{
+}
+
+static inline void trace_qemu_spice_start(uint32_t qid)
+{
+}
+
+static inline void trace_qemu_spice_stop(uint32_t qid)
+{
+}
+
+static inline void trace_qemu_spice_create_update(uint32_t left, uint32_t right, uint32_t top, uint32_t bottom)
+{
+}
+
+static inline void trace_qxl_spice_destroy_surfaces_complete(int qid)
+{
+}
+
+static inline void trace_qxl_spice_destroy_surfaces(int qid, int async)
+{
+}
+
+static inline void trace_qxl_spice_destroy_surface_wait_complete(int qid, uint32_t id)
+{
+}
+
+static inline void trace_qxl_spice_destroy_surface_wait(int qid, uint32_t id, int async)
+{
+}
+
+static inline void trace_qxl_spice_flush_surfaces_async(int qid, uint32_t surface_count, uint32_t num_free_res)
+{
+}
+
+static inline void trace_qxl_spice_monitors_config(int qid)
+{
+}
+
+static inline void trace_qxl_spice_loadvm_commands(int qid, void * ext, uint32_t count)
+{
+}
+
+static inline void trace_qxl_spice_oom(int qid)
+{
+}
+
+static inline void trace_qxl_spice_reset_cursor(int qid)
+{
+}
+
+static inline void trace_qxl_spice_reset_image_cache(int qid)
+{
+}
+
+static inline void trace_qxl_spice_reset_memslots(int qid)
+{
+}
+
+static inline void trace_qxl_spice_update_area(int qid, uint32_t surface_id, uint32_t left, uint32_t right, uint32_t top, uint32_t bottom)
+{
+}
+
+static inline void trace_qxl_spice_update_area_rest(int qid, uint32_t num_dirty_rects, uint32_t clear_dirty_region)
+{
+}
+
+static inline void trace_qxl_surfaces_dirty(int qid, int surface, int offset, int size)
+{
+}
+
+static inline void trace_qxl_send_events(int qid, uint32_t events)
+{
+}
+
+static inline void trace_qxl_send_events_vm_stopped(int qid, uint32_t events)
+{
+}
+
+static inline void trace_qxl_set_guest_bug(int qid)
+{
+}
+
+static inline void trace_qxl_interrupt_client_monitors_config(int qid, int num_heads, void * heads)
+{
+}
+
+static inline void trace_qxl_client_monitors_config_unsupported_by_guest(int qid, uint32_t int_mask, void * client_monitors_config)
+{
+}
+
+static inline void trace_qxl_client_monitors_config_unsupported_by_device(int qid, int revision)
+{
+}
+
+static inline void trace_qxl_client_monitors_config_capped(int qid, int requested, int limit)
+{
+}
+
+static inline void trace_qxl_client_monitors_config_crc(int qid, unsigned size, uint32_t crc32)
+{
+}
+
+static inline void trace_qxl_set_client_capabilities_unsupported_by_revision(int qid, int revision)
+{
+}
+
+static inline void trace_qxl_render_blit_guest_primary_initialized(void)
+{
+}
+
+static inline void trace_qxl_render_blit(int32_t stride, int32_t left, int32_t right, int32_t top, int32_t bottom)
+{
+}
+
+static inline void trace_qxl_render_guest_primary_resized(int32_t width, int32_t height, int32_t stride, int32_t bytes_pp, int32_t bits_pp)
+{
+}
+
+static inline void trace_qxl_render_update_area_done(void * cookie)
+{
+}
+
+static inline void trace_spapr_pci_msi(const char * msg, uint32_t n, uint32_t ca)
+{
+}
+
+static inline void trace_spapr_pci_msi_setup(const char * name, unsigned vector, uint64_t addr)
+{
+}
+
+static inline void trace_spapr_pci_rtas_ibm_change_msi(unsigned func, unsigned req)
+{
+}
+
+static inline void trace_spapr_pci_rtas_ibm_query_interrupt_source_number(unsigned ioa, unsigned intr)
+{
+}
+
+static inline void trace_spapr_pci_msi_write(uint64_t addr, uint64_t data, uint32_t dt_irq)
+{
+}
+
+static inline void trace_spapr_pci_lsi_set(const char * busname, int pin, uint32_t irq)
+{
+}
+
+static inline void trace_xics_icp_check_ipi(int server, uint8_t mfrr)
+{
+}
+
+static inline void trace_xics_icp_accept(uint32_t old_xirr, uint32_t new_xirr)
+{
+}
+
+static inline void trace_xics_icp_eoi(int server, uint32_t xirr, uint32_t new_xirr)
+{
+}
+
+static inline void trace_xics_icp_irq(int server, int nr, uint8_t priority)
+{
+}
+
+static inline void trace_xics_icp_raise(uint32_t xirr, uint8_t pending_priority)
+{
+}
+
+static inline void trace_xics_set_irq_msi(int srcno, int nr)
+{
+}
+
+static inline void trace_xics_masked_pending(void)
+{
+}
+
+static inline void trace_xics_set_irq_lsi(int srcno, int nr)
+{
+}
+
+static inline void trace_xics_ics_write_xive(int nr, int srcno, int server, uint8_t priority)
+{
+}
+
+static inline void trace_xics_ics_reject(int nr, int srcno)
+{
+}
+
+static inline void trace_xics_ics_eoi(int nr)
+{
+}
+
+static inline void trace_hbitmap_iter_skip_words(const void * hb, void * hbi, uint64_t pos, unsigned long cur)
+{
+}
+
+static inline void trace_hbitmap_reset(void * hb, uint64_t start, uint64_t count, uint64_t sbit, uint64_t ebit)
+{
+}
+
+static inline void trace_hbitmap_set(void * hb, uint64_t start, uint64_t count, uint64_t sbit, uint64_t ebit)
+{
+}
+
+static inline void trace_ioinst(const char * insn)
+{
+}
+
+static inline void trace_ioinst_sch_id(const char * insn, int cssid, int ssid, int schid)
+{
+}
+
+static inline void trace_ioinst_chp_id(const char * insn, int cssid, int chpid)
+{
+}
+
+static inline void trace_ioinst_chsc_cmd(uint16_t cmd, uint16_t len)
+{
+}
+
+static inline void trace_css_enable_facility(const char * facility)
+{
+}
+
+static inline void trace_css_crw(uint8_t rsc, uint8_t erc, uint16_t rsid, const char * chained)
+{
+}
+
+static inline void trace_css_chpid_add(uint8_t cssid, uint8_t chpid, uint8_t type)
+{
+}
+
+static inline void trace_css_new_image(uint8_t cssid, const char * default_cssid)
+{
+}
+
+static inline void trace_css_assign_subch(const char * do_assign, uint8_t cssid, uint8_t ssid, uint16_t schid, uint16_t devno)
+{
+}
+
+static inline void trace_css_io_interrupt(int cssid, int ssid, int schid, uint32_t intparm, uint8_t isc, const char * conditional)
+{
+}
+
+static inline void trace_virtio_ccw_interpret_ccw(int cssid, int ssid, int schid, int cmd_code)
+{
+}
+
+static inline void trace_virtio_ccw_new_device(int cssid, int ssid, int schid, int devno, const char * devno_mode)
+{
+}
+
+static inline void trace_migrate_set_state(int new_state)
+{
+}
+
+static inline void trace_kvm_ioctl(int type, void * arg)
+{
+}
+
+static inline void trace_kvm_vm_ioctl(int type, void * arg)
+{
+}
+
+static inline void trace_kvm_vcpu_ioctl(int cpu_index, int type, void * arg)
+{
+}
+
+static inline void trace_kvm_run_exit(int cpu_index, uint32_t reason)
+{
+}
+
+static inline void trace_object_dynamic_cast_assert(const char * type, const char * target, const char * file, int line, const char * func)
+{
+}
+
+static inline void trace_object_class_dynamic_cast_assert(const char * type, const char * target, const char * file, int line, const char * func)
+{
+}
+#endif /* TRACE__GENERATED_TRACERS_H */
diff --git a/contrib/qemu/util/aes.c b/contrib/qemu/util/aes.c
new file mode 100644
index 000000000..91e97fa6e
--- /dev/null
+++ b/contrib/qemu/util/aes.c
@@ -0,0 +1,1314 @@
+/**
+ *
+ * aes.c - integrated in QEMU by Fabrice Bellard from the OpenSSL project.
+ */
+/*
+ * rijndael-alg-fst.c
+ *
+ * @version 3.0 (December 2000)
+ *
+ * Optimised ANSI C code for the Rijndael cipher (now AES)
+ *
+ * @author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
+ * @author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be>
+ * @author Paulo Barreto <paulo.barreto@terra.com.br>
+ *
+ * This code is hereby placed in the public domain.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include "qemu-common.h"
+#include "qemu/aes.h"
+
+#ifndef NDEBUG
+#define NDEBUG
+#endif
+
+typedef uint32_t u32;
+typedef uint16_t u16;
+typedef uint8_t u8;
+
+/* This controls loop-unrolling in aes_core.c */
+#undef FULL_UNROLL
+# define GETU32(pt) (((u32)(pt)[0] << 24) ^ ((u32)(pt)[1] << 16) ^ ((u32)(pt)[2] << 8) ^ ((u32)(pt)[3]))
+# define PUTU32(ct, st) { (ct)[0] = (u8)((st) >> 24); (ct)[1] = (u8)((st) >> 16); (ct)[2] = (u8)((st) >> 8); (ct)[3] = (u8)(st); }
+
+/*
+AES_Te0[x] = S [x].[02, 01, 01, 03];
+AES_Te1[x] = S [x].[03, 02, 01, 01];
+AES_Te2[x] = S [x].[01, 03, 02, 01];
+AES_Te3[x] = S [x].[01, 01, 03, 02];
+AES_Te4[x] = S [x].[01, 01, 01, 01];
+
+AES_Td0[x] = Si[x].[0e, 09, 0d, 0b];
+AES_Td1[x] = Si[x].[0b, 0e, 09, 0d];
+AES_Td2[x] = Si[x].[0d, 0b, 0e, 09];
+AES_Td3[x] = Si[x].[09, 0d, 0b, 0e];
+AES_Td4[x] = Si[x].[01, 01, 01, 01];
+*/
+
+const uint32_t AES_Te0[256] = {
+ 0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU,
+ 0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U,
+ 0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU,
+ 0xe7fefe19U, 0xb5d7d762U, 0x4dababe6U, 0xec76769aU,
+ 0x8fcaca45U, 0x1f82829dU, 0x89c9c940U, 0xfa7d7d87U,
+ 0xeffafa15U, 0xb25959ebU, 0x8e4747c9U, 0xfbf0f00bU,
+ 0x41adadecU, 0xb3d4d467U, 0x5fa2a2fdU, 0x45afafeaU,
+ 0x239c9cbfU, 0x53a4a4f7U, 0xe4727296U, 0x9bc0c05bU,
+ 0x75b7b7c2U, 0xe1fdfd1cU, 0x3d9393aeU, 0x4c26266aU,
+ 0x6c36365aU, 0x7e3f3f41U, 0xf5f7f702U, 0x83cccc4fU,
+ 0x6834345cU, 0x51a5a5f4U, 0xd1e5e534U, 0xf9f1f108U,
+ 0xe2717193U, 0xabd8d873U, 0x62313153U, 0x2a15153fU,
+ 0x0804040cU, 0x95c7c752U, 0x46232365U, 0x9dc3c35eU,
+ 0x30181828U, 0x379696a1U, 0x0a05050fU, 0x2f9a9ab5U,
+ 0x0e070709U, 0x24121236U, 0x1b80809bU, 0xdfe2e23dU,
+ 0xcdebeb26U, 0x4e272769U, 0x7fb2b2cdU, 0xea75759fU,
+ 0x1209091bU, 0x1d83839eU, 0x582c2c74U, 0x341a1a2eU,
+ 0x361b1b2dU, 0xdc6e6eb2U, 0xb45a5aeeU, 0x5ba0a0fbU,
+ 0xa45252f6U, 0x763b3b4dU, 0xb7d6d661U, 0x7db3b3ceU,
+ 0x5229297bU, 0xdde3e33eU, 0x5e2f2f71U, 0x13848497U,
+ 0xa65353f5U, 0xb9d1d168U, 0x00000000U, 0xc1eded2cU,
+ 0x40202060U, 0xe3fcfc1fU, 0x79b1b1c8U, 0xb65b5bedU,
+ 0xd46a6abeU, 0x8dcbcb46U, 0x67bebed9U, 0x7239394bU,
+ 0x944a4adeU, 0x984c4cd4U, 0xb05858e8U, 0x85cfcf4aU,
+ 0xbbd0d06bU, 0xc5efef2aU, 0x4faaaae5U, 0xedfbfb16U,
+ 0x864343c5U, 0x9a4d4dd7U, 0x66333355U, 0x11858594U,
+ 0x8a4545cfU, 0xe9f9f910U, 0x04020206U, 0xfe7f7f81U,
+ 0xa05050f0U, 0x783c3c44U, 0x259f9fbaU, 0x4ba8a8e3U,
+ 0xa25151f3U, 0x5da3a3feU, 0x804040c0U, 0x058f8f8aU,
+ 0x3f9292adU, 0x219d9dbcU, 0x70383848U, 0xf1f5f504U,
+ 0x63bcbcdfU, 0x77b6b6c1U, 0xafdada75U, 0x42212163U,
+ 0x20101030U, 0xe5ffff1aU, 0xfdf3f30eU, 0xbfd2d26dU,
+ 0x81cdcd4cU, 0x180c0c14U, 0x26131335U, 0xc3ecec2fU,
+ 0xbe5f5fe1U, 0x359797a2U, 0x884444ccU, 0x2e171739U,
+ 0x93c4c457U, 0x55a7a7f2U, 0xfc7e7e82U, 0x7a3d3d47U,
+ 0xc86464acU, 0xba5d5de7U, 0x3219192bU, 0xe6737395U,
+ 0xc06060a0U, 0x19818198U, 0x9e4f4fd1U, 0xa3dcdc7fU,
+ 0x44222266U, 0x542a2a7eU, 0x3b9090abU, 0x0b888883U,
+ 0x8c4646caU, 0xc7eeee29U, 0x6bb8b8d3U, 0x2814143cU,
+ 0xa7dede79U, 0xbc5e5ee2U, 0x160b0b1dU, 0xaddbdb76U,
+ 0xdbe0e03bU, 0x64323256U, 0x743a3a4eU, 0x140a0a1eU,
+ 0x924949dbU, 0x0c06060aU, 0x4824246cU, 0xb85c5ce4U,
+ 0x9fc2c25dU, 0xbdd3d36eU, 0x43acacefU, 0xc46262a6U,
+ 0x399191a8U, 0x319595a4U, 0xd3e4e437U, 0xf279798bU,
+ 0xd5e7e732U, 0x8bc8c843U, 0x6e373759U, 0xda6d6db7U,
+ 0x018d8d8cU, 0xb1d5d564U, 0x9c4e4ed2U, 0x49a9a9e0U,
+ 0xd86c6cb4U, 0xac5656faU, 0xf3f4f407U, 0xcfeaea25U,
+ 0xca6565afU, 0xf47a7a8eU, 0x47aeaee9U, 0x10080818U,
+ 0x6fbabad5U, 0xf0787888U, 0x4a25256fU, 0x5c2e2e72U,
+ 0x381c1c24U, 0x57a6a6f1U, 0x73b4b4c7U, 0x97c6c651U,
+ 0xcbe8e823U, 0xa1dddd7cU, 0xe874749cU, 0x3e1f1f21U,
+ 0x964b4bddU, 0x61bdbddcU, 0x0d8b8b86U, 0x0f8a8a85U,
+ 0xe0707090U, 0x7c3e3e42U, 0x71b5b5c4U, 0xcc6666aaU,
+ 0x904848d8U, 0x06030305U, 0xf7f6f601U, 0x1c0e0e12U,
+ 0xc26161a3U, 0x6a35355fU, 0xae5757f9U, 0x69b9b9d0U,
+ 0x17868691U, 0x99c1c158U, 0x3a1d1d27U, 0x279e9eb9U,
+ 0xd9e1e138U, 0xebf8f813U, 0x2b9898b3U, 0x22111133U,
+ 0xd26969bbU, 0xa9d9d970U, 0x078e8e89U, 0x339494a7U,
+ 0x2d9b9bb6U, 0x3c1e1e22U, 0x15878792U, 0xc9e9e920U,
+ 0x87cece49U, 0xaa5555ffU, 0x50282878U, 0xa5dfdf7aU,
+ 0x038c8c8fU, 0x59a1a1f8U, 0x09898980U, 0x1a0d0d17U,
+ 0x65bfbfdaU, 0xd7e6e631U, 0x844242c6U, 0xd06868b8U,
+ 0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U,
+ 0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU,
+};
+const uint32_t AES_Te1[256] = {
+ 0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU,
+ 0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U,
+ 0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU,
+ 0x19e7fefeU, 0x62b5d7d7U, 0xe64dababU, 0x9aec7676U,
+ 0x458fcacaU, 0x9d1f8282U, 0x4089c9c9U, 0x87fa7d7dU,
+ 0x15effafaU, 0xebb25959U, 0xc98e4747U, 0x0bfbf0f0U,
+ 0xec41adadU, 0x67b3d4d4U, 0xfd5fa2a2U, 0xea45afafU,
+ 0xbf239c9cU, 0xf753a4a4U, 0x96e47272U, 0x5b9bc0c0U,
+ 0xc275b7b7U, 0x1ce1fdfdU, 0xae3d9393U, 0x6a4c2626U,
+ 0x5a6c3636U, 0x417e3f3fU, 0x02f5f7f7U, 0x4f83ccccU,
+ 0x5c683434U, 0xf451a5a5U, 0x34d1e5e5U, 0x08f9f1f1U,
+ 0x93e27171U, 0x73abd8d8U, 0x53623131U, 0x3f2a1515U,
+ 0x0c080404U, 0x5295c7c7U, 0x65462323U, 0x5e9dc3c3U,
+ 0x28301818U, 0xa1379696U, 0x0f0a0505U, 0xb52f9a9aU,
+ 0x090e0707U, 0x36241212U, 0x9b1b8080U, 0x3ddfe2e2U,
+ 0x26cdebebU, 0x694e2727U, 0xcd7fb2b2U, 0x9fea7575U,
+ 0x1b120909U, 0x9e1d8383U, 0x74582c2cU, 0x2e341a1aU,
+ 0x2d361b1bU, 0xb2dc6e6eU, 0xeeb45a5aU, 0xfb5ba0a0U,
+ 0xf6a45252U, 0x4d763b3bU, 0x61b7d6d6U, 0xce7db3b3U,
+ 0x7b522929U, 0x3edde3e3U, 0x715e2f2fU, 0x97138484U,
+ 0xf5a65353U, 0x68b9d1d1U, 0x00000000U, 0x2cc1ededU,
+ 0x60402020U, 0x1fe3fcfcU, 0xc879b1b1U, 0xedb65b5bU,
+ 0xbed46a6aU, 0x468dcbcbU, 0xd967bebeU, 0x4b723939U,
+ 0xde944a4aU, 0xd4984c4cU, 0xe8b05858U, 0x4a85cfcfU,
+ 0x6bbbd0d0U, 0x2ac5efefU, 0xe54faaaaU, 0x16edfbfbU,
+ 0xc5864343U, 0xd79a4d4dU, 0x55663333U, 0x94118585U,
+ 0xcf8a4545U, 0x10e9f9f9U, 0x06040202U, 0x81fe7f7fU,
+ 0xf0a05050U, 0x44783c3cU, 0xba259f9fU, 0xe34ba8a8U,
+ 0xf3a25151U, 0xfe5da3a3U, 0xc0804040U, 0x8a058f8fU,
+ 0xad3f9292U, 0xbc219d9dU, 0x48703838U, 0x04f1f5f5U,
+ 0xdf63bcbcU, 0xc177b6b6U, 0x75afdadaU, 0x63422121U,
+ 0x30201010U, 0x1ae5ffffU, 0x0efdf3f3U, 0x6dbfd2d2U,
+ 0x4c81cdcdU, 0x14180c0cU, 0x35261313U, 0x2fc3ececU,
+ 0xe1be5f5fU, 0xa2359797U, 0xcc884444U, 0x392e1717U,
+ 0x5793c4c4U, 0xf255a7a7U, 0x82fc7e7eU, 0x477a3d3dU,
+ 0xacc86464U, 0xe7ba5d5dU, 0x2b321919U, 0x95e67373U,
+ 0xa0c06060U, 0x98198181U, 0xd19e4f4fU, 0x7fa3dcdcU,
+ 0x66442222U, 0x7e542a2aU, 0xab3b9090U, 0x830b8888U,
+ 0xca8c4646U, 0x29c7eeeeU, 0xd36bb8b8U, 0x3c281414U,
+ 0x79a7dedeU, 0xe2bc5e5eU, 0x1d160b0bU, 0x76addbdbU,
+ 0x3bdbe0e0U, 0x56643232U, 0x4e743a3aU, 0x1e140a0aU,
+ 0xdb924949U, 0x0a0c0606U, 0x6c482424U, 0xe4b85c5cU,
+ 0x5d9fc2c2U, 0x6ebdd3d3U, 0xef43acacU, 0xa6c46262U,
+ 0xa8399191U, 0xa4319595U, 0x37d3e4e4U, 0x8bf27979U,
+ 0x32d5e7e7U, 0x438bc8c8U, 0x596e3737U, 0xb7da6d6dU,
+ 0x8c018d8dU, 0x64b1d5d5U, 0xd29c4e4eU, 0xe049a9a9U,
+ 0xb4d86c6cU, 0xfaac5656U, 0x07f3f4f4U, 0x25cfeaeaU,
+ 0xafca6565U, 0x8ef47a7aU, 0xe947aeaeU, 0x18100808U,
+ 0xd56fbabaU, 0x88f07878U, 0x6f4a2525U, 0x725c2e2eU,
+ 0x24381c1cU, 0xf157a6a6U, 0xc773b4b4U, 0x5197c6c6U,
+ 0x23cbe8e8U, 0x7ca1ddddU, 0x9ce87474U, 0x213e1f1fU,
+ 0xdd964b4bU, 0xdc61bdbdU, 0x860d8b8bU, 0x850f8a8aU,
+ 0x90e07070U, 0x427c3e3eU, 0xc471b5b5U, 0xaacc6666U,
+ 0xd8904848U, 0x05060303U, 0x01f7f6f6U, 0x121c0e0eU,
+ 0xa3c26161U, 0x5f6a3535U, 0xf9ae5757U, 0xd069b9b9U,
+ 0x91178686U, 0x5899c1c1U, 0x273a1d1dU, 0xb9279e9eU,
+ 0x38d9e1e1U, 0x13ebf8f8U, 0xb32b9898U, 0x33221111U,
+ 0xbbd26969U, 0x70a9d9d9U, 0x89078e8eU, 0xa7339494U,
+ 0xb62d9b9bU, 0x223c1e1eU, 0x92158787U, 0x20c9e9e9U,
+ 0x4987ceceU, 0xffaa5555U, 0x78502828U, 0x7aa5dfdfU,
+ 0x8f038c8cU, 0xf859a1a1U, 0x80098989U, 0x171a0d0dU,
+ 0xda65bfbfU, 0x31d7e6e6U, 0xc6844242U, 0xb8d06868U,
+ 0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU,
+ 0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U,
+};
+const uint32_t AES_Te2[256] = {
+ 0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU,
+ 0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U,
+ 0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU,
+ 0xfe19e7feU, 0xd762b5d7U, 0xabe64dabU, 0x769aec76U,
+ 0xca458fcaU, 0x829d1f82U, 0xc94089c9U, 0x7d87fa7dU,
+ 0xfa15effaU, 0x59ebb259U, 0x47c98e47U, 0xf00bfbf0U,
+ 0xadec41adU, 0xd467b3d4U, 0xa2fd5fa2U, 0xafea45afU,
+ 0x9cbf239cU, 0xa4f753a4U, 0x7296e472U, 0xc05b9bc0U,
+ 0xb7c275b7U, 0xfd1ce1fdU, 0x93ae3d93U, 0x266a4c26U,
+ 0x365a6c36U, 0x3f417e3fU, 0xf702f5f7U, 0xcc4f83ccU,
+ 0x345c6834U, 0xa5f451a5U, 0xe534d1e5U, 0xf108f9f1U,
+ 0x7193e271U, 0xd873abd8U, 0x31536231U, 0x153f2a15U,
+ 0x040c0804U, 0xc75295c7U, 0x23654623U, 0xc35e9dc3U,
+ 0x18283018U, 0x96a13796U, 0x050f0a05U, 0x9ab52f9aU,
+ 0x07090e07U, 0x12362412U, 0x809b1b80U, 0xe23ddfe2U,
+ 0xeb26cdebU, 0x27694e27U, 0xb2cd7fb2U, 0x759fea75U,
+ 0x091b1209U, 0x839e1d83U, 0x2c74582cU, 0x1a2e341aU,
+ 0x1b2d361bU, 0x6eb2dc6eU, 0x5aeeb45aU, 0xa0fb5ba0U,
+ 0x52f6a452U, 0x3b4d763bU, 0xd661b7d6U, 0xb3ce7db3U,
+ 0x297b5229U, 0xe33edde3U, 0x2f715e2fU, 0x84971384U,
+ 0x53f5a653U, 0xd168b9d1U, 0x00000000U, 0xed2cc1edU,
+ 0x20604020U, 0xfc1fe3fcU, 0xb1c879b1U, 0x5bedb65bU,
+ 0x6abed46aU, 0xcb468dcbU, 0xbed967beU, 0x394b7239U,
+ 0x4ade944aU, 0x4cd4984cU, 0x58e8b058U, 0xcf4a85cfU,
+ 0xd06bbbd0U, 0xef2ac5efU, 0xaae54faaU, 0xfb16edfbU,
+ 0x43c58643U, 0x4dd79a4dU, 0x33556633U, 0x85941185U,
+ 0x45cf8a45U, 0xf910e9f9U, 0x02060402U, 0x7f81fe7fU,
+ 0x50f0a050U, 0x3c44783cU, 0x9fba259fU, 0xa8e34ba8U,
+ 0x51f3a251U, 0xa3fe5da3U, 0x40c08040U, 0x8f8a058fU,
+ 0x92ad3f92U, 0x9dbc219dU, 0x38487038U, 0xf504f1f5U,
+ 0xbcdf63bcU, 0xb6c177b6U, 0xda75afdaU, 0x21634221U,
+ 0x10302010U, 0xff1ae5ffU, 0xf30efdf3U, 0xd26dbfd2U,
+ 0xcd4c81cdU, 0x0c14180cU, 0x13352613U, 0xec2fc3ecU,
+ 0x5fe1be5fU, 0x97a23597U, 0x44cc8844U, 0x17392e17U,
+ 0xc45793c4U, 0xa7f255a7U, 0x7e82fc7eU, 0x3d477a3dU,
+ 0x64acc864U, 0x5de7ba5dU, 0x192b3219U, 0x7395e673U,
+ 0x60a0c060U, 0x81981981U, 0x4fd19e4fU, 0xdc7fa3dcU,
+ 0x22664422U, 0x2a7e542aU, 0x90ab3b90U, 0x88830b88U,
+ 0x46ca8c46U, 0xee29c7eeU, 0xb8d36bb8U, 0x143c2814U,
+ 0xde79a7deU, 0x5ee2bc5eU, 0x0b1d160bU, 0xdb76addbU,
+ 0xe03bdbe0U, 0x32566432U, 0x3a4e743aU, 0x0a1e140aU,
+ 0x49db9249U, 0x060a0c06U, 0x246c4824U, 0x5ce4b85cU,
+ 0xc25d9fc2U, 0xd36ebdd3U, 0xacef43acU, 0x62a6c462U,
+ 0x91a83991U, 0x95a43195U, 0xe437d3e4U, 0x798bf279U,
+ 0xe732d5e7U, 0xc8438bc8U, 0x37596e37U, 0x6db7da6dU,
+ 0x8d8c018dU, 0xd564b1d5U, 0x4ed29c4eU, 0xa9e049a9U,
+ 0x6cb4d86cU, 0x56faac56U, 0xf407f3f4U, 0xea25cfeaU,
+ 0x65afca65U, 0x7a8ef47aU, 0xaee947aeU, 0x08181008U,
+ 0xbad56fbaU, 0x7888f078U, 0x256f4a25U, 0x2e725c2eU,
+ 0x1c24381cU, 0xa6f157a6U, 0xb4c773b4U, 0xc65197c6U,
+ 0xe823cbe8U, 0xdd7ca1ddU, 0x749ce874U, 0x1f213e1fU,
+ 0x4bdd964bU, 0xbddc61bdU, 0x8b860d8bU, 0x8a850f8aU,
+ 0x7090e070U, 0x3e427c3eU, 0xb5c471b5U, 0x66aacc66U,
+ 0x48d89048U, 0x03050603U, 0xf601f7f6U, 0x0e121c0eU,
+ 0x61a3c261U, 0x355f6a35U, 0x57f9ae57U, 0xb9d069b9U,
+ 0x86911786U, 0xc15899c1U, 0x1d273a1dU, 0x9eb9279eU,
+ 0xe138d9e1U, 0xf813ebf8U, 0x98b32b98U, 0x11332211U,
+ 0x69bbd269U, 0xd970a9d9U, 0x8e89078eU, 0x94a73394U,
+ 0x9bb62d9bU, 0x1e223c1eU, 0x87921587U, 0xe920c9e9U,
+ 0xce4987ceU, 0x55ffaa55U, 0x28785028U, 0xdf7aa5dfU,
+ 0x8c8f038cU, 0xa1f859a1U, 0x89800989U, 0x0d171a0dU,
+ 0xbfda65bfU, 0xe631d7e6U, 0x42c68442U, 0x68b8d068U,
+ 0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU,
+ 0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U,
+};
+const uint32_t AES_Te3[256] = {
+
+ 0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U,
+ 0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U,
+ 0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U,
+ 0xfefe19e7U, 0xd7d762b5U, 0xababe64dU, 0x76769aecU,
+ 0xcaca458fU, 0x82829d1fU, 0xc9c94089U, 0x7d7d87faU,
+ 0xfafa15efU, 0x5959ebb2U, 0x4747c98eU, 0xf0f00bfbU,
+ 0xadadec41U, 0xd4d467b3U, 0xa2a2fd5fU, 0xafafea45U,
+ 0x9c9cbf23U, 0xa4a4f753U, 0x727296e4U, 0xc0c05b9bU,
+ 0xb7b7c275U, 0xfdfd1ce1U, 0x9393ae3dU, 0x26266a4cU,
+ 0x36365a6cU, 0x3f3f417eU, 0xf7f702f5U, 0xcccc4f83U,
+ 0x34345c68U, 0xa5a5f451U, 0xe5e534d1U, 0xf1f108f9U,
+ 0x717193e2U, 0xd8d873abU, 0x31315362U, 0x15153f2aU,
+ 0x04040c08U, 0xc7c75295U, 0x23236546U, 0xc3c35e9dU,
+ 0x18182830U, 0x9696a137U, 0x05050f0aU, 0x9a9ab52fU,
+ 0x0707090eU, 0x12123624U, 0x80809b1bU, 0xe2e23ddfU,
+ 0xebeb26cdU, 0x2727694eU, 0xb2b2cd7fU, 0x75759feaU,
+ 0x09091b12U, 0x83839e1dU, 0x2c2c7458U, 0x1a1a2e34U,
+ 0x1b1b2d36U, 0x6e6eb2dcU, 0x5a5aeeb4U, 0xa0a0fb5bU,
+ 0x5252f6a4U, 0x3b3b4d76U, 0xd6d661b7U, 0xb3b3ce7dU,
+ 0x29297b52U, 0xe3e33eddU, 0x2f2f715eU, 0x84849713U,
+ 0x5353f5a6U, 0xd1d168b9U, 0x00000000U, 0xeded2cc1U,
+ 0x20206040U, 0xfcfc1fe3U, 0xb1b1c879U, 0x5b5bedb6U,
+ 0x6a6abed4U, 0xcbcb468dU, 0xbebed967U, 0x39394b72U,
+ 0x4a4ade94U, 0x4c4cd498U, 0x5858e8b0U, 0xcfcf4a85U,
+ 0xd0d06bbbU, 0xefef2ac5U, 0xaaaae54fU, 0xfbfb16edU,
+ 0x4343c586U, 0x4d4dd79aU, 0x33335566U, 0x85859411U,
+ 0x4545cf8aU, 0xf9f910e9U, 0x02020604U, 0x7f7f81feU,
+ 0x5050f0a0U, 0x3c3c4478U, 0x9f9fba25U, 0xa8a8e34bU,
+ 0x5151f3a2U, 0xa3a3fe5dU, 0x4040c080U, 0x8f8f8a05U,
+ 0x9292ad3fU, 0x9d9dbc21U, 0x38384870U, 0xf5f504f1U,
+ 0xbcbcdf63U, 0xb6b6c177U, 0xdada75afU, 0x21216342U,
+ 0x10103020U, 0xffff1ae5U, 0xf3f30efdU, 0xd2d26dbfU,
+ 0xcdcd4c81U, 0x0c0c1418U, 0x13133526U, 0xecec2fc3U,
+ 0x5f5fe1beU, 0x9797a235U, 0x4444cc88U, 0x1717392eU,
+ 0xc4c45793U, 0xa7a7f255U, 0x7e7e82fcU, 0x3d3d477aU,
+ 0x6464acc8U, 0x5d5de7baU, 0x19192b32U, 0x737395e6U,
+ 0x6060a0c0U, 0x81819819U, 0x4f4fd19eU, 0xdcdc7fa3U,
+ 0x22226644U, 0x2a2a7e54U, 0x9090ab3bU, 0x8888830bU,
+ 0x4646ca8cU, 0xeeee29c7U, 0xb8b8d36bU, 0x14143c28U,
+ 0xdede79a7U, 0x5e5ee2bcU, 0x0b0b1d16U, 0xdbdb76adU,
+ 0xe0e03bdbU, 0x32325664U, 0x3a3a4e74U, 0x0a0a1e14U,
+ 0x4949db92U, 0x06060a0cU, 0x24246c48U, 0x5c5ce4b8U,
+ 0xc2c25d9fU, 0xd3d36ebdU, 0xacacef43U, 0x6262a6c4U,
+ 0x9191a839U, 0x9595a431U, 0xe4e437d3U, 0x79798bf2U,
+ 0xe7e732d5U, 0xc8c8438bU, 0x3737596eU, 0x6d6db7daU,
+ 0x8d8d8c01U, 0xd5d564b1U, 0x4e4ed29cU, 0xa9a9e049U,
+ 0x6c6cb4d8U, 0x5656faacU, 0xf4f407f3U, 0xeaea25cfU,
+ 0x6565afcaU, 0x7a7a8ef4U, 0xaeaee947U, 0x08081810U,
+ 0xbabad56fU, 0x787888f0U, 0x25256f4aU, 0x2e2e725cU,
+ 0x1c1c2438U, 0xa6a6f157U, 0xb4b4c773U, 0xc6c65197U,
+ 0xe8e823cbU, 0xdddd7ca1U, 0x74749ce8U, 0x1f1f213eU,
+ 0x4b4bdd96U, 0xbdbddc61U, 0x8b8b860dU, 0x8a8a850fU,
+ 0x707090e0U, 0x3e3e427cU, 0xb5b5c471U, 0x6666aaccU,
+ 0x4848d890U, 0x03030506U, 0xf6f601f7U, 0x0e0e121cU,
+ 0x6161a3c2U, 0x35355f6aU, 0x5757f9aeU, 0xb9b9d069U,
+ 0x86869117U, 0xc1c15899U, 0x1d1d273aU, 0x9e9eb927U,
+ 0xe1e138d9U, 0xf8f813ebU, 0x9898b32bU, 0x11113322U,
+ 0x6969bbd2U, 0xd9d970a9U, 0x8e8e8907U, 0x9494a733U,
+ 0x9b9bb62dU, 0x1e1e223cU, 0x87879215U, 0xe9e920c9U,
+ 0xcece4987U, 0x5555ffaaU, 0x28287850U, 0xdfdf7aa5U,
+ 0x8c8c8f03U, 0xa1a1f859U, 0x89898009U, 0x0d0d171aU,
+ 0xbfbfda65U, 0xe6e631d7U, 0x4242c684U, 0x6868b8d0U,
+ 0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU,
+ 0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU,
+};
+const uint32_t AES_Te4[256] = {
+ 0x63636363U, 0x7c7c7c7cU, 0x77777777U, 0x7b7b7b7bU,
+ 0xf2f2f2f2U, 0x6b6b6b6bU, 0x6f6f6f6fU, 0xc5c5c5c5U,
+ 0x30303030U, 0x01010101U, 0x67676767U, 0x2b2b2b2bU,
+ 0xfefefefeU, 0xd7d7d7d7U, 0xababababU, 0x76767676U,
+ 0xcacacacaU, 0x82828282U, 0xc9c9c9c9U, 0x7d7d7d7dU,
+ 0xfafafafaU, 0x59595959U, 0x47474747U, 0xf0f0f0f0U,
+ 0xadadadadU, 0xd4d4d4d4U, 0xa2a2a2a2U, 0xafafafafU,
+ 0x9c9c9c9cU, 0xa4a4a4a4U, 0x72727272U, 0xc0c0c0c0U,
+ 0xb7b7b7b7U, 0xfdfdfdfdU, 0x93939393U, 0x26262626U,
+ 0x36363636U, 0x3f3f3f3fU, 0xf7f7f7f7U, 0xccccccccU,
+ 0x34343434U, 0xa5a5a5a5U, 0xe5e5e5e5U, 0xf1f1f1f1U,
+ 0x71717171U, 0xd8d8d8d8U, 0x31313131U, 0x15151515U,
+ 0x04040404U, 0xc7c7c7c7U, 0x23232323U, 0xc3c3c3c3U,
+ 0x18181818U, 0x96969696U, 0x05050505U, 0x9a9a9a9aU,
+ 0x07070707U, 0x12121212U, 0x80808080U, 0xe2e2e2e2U,
+ 0xebebebebU, 0x27272727U, 0xb2b2b2b2U, 0x75757575U,
+ 0x09090909U, 0x83838383U, 0x2c2c2c2cU, 0x1a1a1a1aU,
+ 0x1b1b1b1bU, 0x6e6e6e6eU, 0x5a5a5a5aU, 0xa0a0a0a0U,
+ 0x52525252U, 0x3b3b3b3bU, 0xd6d6d6d6U, 0xb3b3b3b3U,
+ 0x29292929U, 0xe3e3e3e3U, 0x2f2f2f2fU, 0x84848484U,
+ 0x53535353U, 0xd1d1d1d1U, 0x00000000U, 0xededededU,
+ 0x20202020U, 0xfcfcfcfcU, 0xb1b1b1b1U, 0x5b5b5b5bU,
+ 0x6a6a6a6aU, 0xcbcbcbcbU, 0xbebebebeU, 0x39393939U,
+ 0x4a4a4a4aU, 0x4c4c4c4cU, 0x58585858U, 0xcfcfcfcfU,
+ 0xd0d0d0d0U, 0xefefefefU, 0xaaaaaaaaU, 0xfbfbfbfbU,
+ 0x43434343U, 0x4d4d4d4dU, 0x33333333U, 0x85858585U,
+ 0x45454545U, 0xf9f9f9f9U, 0x02020202U, 0x7f7f7f7fU,
+ 0x50505050U, 0x3c3c3c3cU, 0x9f9f9f9fU, 0xa8a8a8a8U,
+ 0x51515151U, 0xa3a3a3a3U, 0x40404040U, 0x8f8f8f8fU,
+ 0x92929292U, 0x9d9d9d9dU, 0x38383838U, 0xf5f5f5f5U,
+ 0xbcbcbcbcU, 0xb6b6b6b6U, 0xdadadadaU, 0x21212121U,
+ 0x10101010U, 0xffffffffU, 0xf3f3f3f3U, 0xd2d2d2d2U,
+ 0xcdcdcdcdU, 0x0c0c0c0cU, 0x13131313U, 0xececececU,
+ 0x5f5f5f5fU, 0x97979797U, 0x44444444U, 0x17171717U,
+ 0xc4c4c4c4U, 0xa7a7a7a7U, 0x7e7e7e7eU, 0x3d3d3d3dU,
+ 0x64646464U, 0x5d5d5d5dU, 0x19191919U, 0x73737373U,
+ 0x60606060U, 0x81818181U, 0x4f4f4f4fU, 0xdcdcdcdcU,
+ 0x22222222U, 0x2a2a2a2aU, 0x90909090U, 0x88888888U,
+ 0x46464646U, 0xeeeeeeeeU, 0xb8b8b8b8U, 0x14141414U,
+ 0xdedededeU, 0x5e5e5e5eU, 0x0b0b0b0bU, 0xdbdbdbdbU,
+ 0xe0e0e0e0U, 0x32323232U, 0x3a3a3a3aU, 0x0a0a0a0aU,
+ 0x49494949U, 0x06060606U, 0x24242424U, 0x5c5c5c5cU,
+ 0xc2c2c2c2U, 0xd3d3d3d3U, 0xacacacacU, 0x62626262U,
+ 0x91919191U, 0x95959595U, 0xe4e4e4e4U, 0x79797979U,
+ 0xe7e7e7e7U, 0xc8c8c8c8U, 0x37373737U, 0x6d6d6d6dU,
+ 0x8d8d8d8dU, 0xd5d5d5d5U, 0x4e4e4e4eU, 0xa9a9a9a9U,
+ 0x6c6c6c6cU, 0x56565656U, 0xf4f4f4f4U, 0xeaeaeaeaU,
+ 0x65656565U, 0x7a7a7a7aU, 0xaeaeaeaeU, 0x08080808U,
+ 0xbabababaU, 0x78787878U, 0x25252525U, 0x2e2e2e2eU,
+ 0x1c1c1c1cU, 0xa6a6a6a6U, 0xb4b4b4b4U, 0xc6c6c6c6U,
+ 0xe8e8e8e8U, 0xddddddddU, 0x74747474U, 0x1f1f1f1fU,
+ 0x4b4b4b4bU, 0xbdbdbdbdU, 0x8b8b8b8bU, 0x8a8a8a8aU,
+ 0x70707070U, 0x3e3e3e3eU, 0xb5b5b5b5U, 0x66666666U,
+ 0x48484848U, 0x03030303U, 0xf6f6f6f6U, 0x0e0e0e0eU,
+ 0x61616161U, 0x35353535U, 0x57575757U, 0xb9b9b9b9U,
+ 0x86868686U, 0xc1c1c1c1U, 0x1d1d1d1dU, 0x9e9e9e9eU,
+ 0xe1e1e1e1U, 0xf8f8f8f8U, 0x98989898U, 0x11111111U,
+ 0x69696969U, 0xd9d9d9d9U, 0x8e8e8e8eU, 0x94949494U,
+ 0x9b9b9b9bU, 0x1e1e1e1eU, 0x87878787U, 0xe9e9e9e9U,
+ 0xcecececeU, 0x55555555U, 0x28282828U, 0xdfdfdfdfU,
+ 0x8c8c8c8cU, 0xa1a1a1a1U, 0x89898989U, 0x0d0d0d0dU,
+ 0xbfbfbfbfU, 0xe6e6e6e6U, 0x42424242U, 0x68686868U,
+ 0x41414141U, 0x99999999U, 0x2d2d2d2dU, 0x0f0f0f0fU,
+ 0xb0b0b0b0U, 0x54545454U, 0xbbbbbbbbU, 0x16161616U,
+};
+const uint32_t AES_Td0[256] = {
+ 0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U,
+ 0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
+ 0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U,
+ 0x4fe5d7fcU, 0xc52acbd7U, 0x26354480U, 0xb562a38fU,
+ 0xdeb15a49U, 0x25ba1b67U, 0x45ea0e98U, 0x5dfec0e1U,
+ 0xc32f7502U, 0x814cf012U, 0x8d4697a3U, 0x6bd3f9c6U,
+ 0x038f5fe7U, 0x15929c95U, 0xbf6d7aebU, 0x955259daU,
+ 0xd4be832dU, 0x587421d3U, 0x49e06929U, 0x8ec9c844U,
+ 0x75c2896aU, 0xf48e7978U, 0x99583e6bU, 0x27b971ddU,
+ 0xbee14fb6U, 0xf088ad17U, 0xc920ac66U, 0x7dce3ab4U,
+ 0x63df4a18U, 0xe51a3182U, 0x97513360U, 0x62537f45U,
+ 0xb16477e0U, 0xbb6bae84U, 0xfe81a01cU, 0xf9082b94U,
+ 0x70486858U, 0x8f45fd19U, 0x94de6c87U, 0x527bf8b7U,
+ 0xab73d323U, 0x724b02e2U, 0xe31f8f57U, 0x6655ab2aU,
+ 0xb2eb2807U, 0x2fb5c203U, 0x86c57b9aU, 0xd33708a5U,
+ 0x302887f2U, 0x23bfa5b2U, 0x02036abaU, 0xed16825cU,
+ 0x8acf1c2bU, 0xa779b492U, 0xf307f2f0U, 0x4e69e2a1U,
+ 0x65daf4cdU, 0x0605bed5U, 0xd134621fU, 0xc4a6fe8aU,
+ 0x342e539dU, 0xa2f355a0U, 0x058ae132U, 0xa4f6eb75U,
+ 0x0b83ec39U, 0x4060efaaU, 0x5e719f06U, 0xbd6e1051U,
+ 0x3e218af9U, 0x96dd063dU, 0xdd3e05aeU, 0x4de6bd46U,
+ 0x91548db5U, 0x71c45d05U, 0x0406d46fU, 0x605015ffU,
+ 0x1998fb24U, 0xd6bde997U, 0x894043ccU, 0x67d99e77U,
+ 0xb0e842bdU, 0x07898b88U, 0xe7195b38U, 0x79c8eedbU,
+ 0xa17c0a47U, 0x7c420fe9U, 0xf8841ec9U, 0x00000000U,
+ 0x09808683U, 0x322bed48U, 0x1e1170acU, 0x6c5a724eU,
+ 0xfd0efffbU, 0x0f853856U, 0x3daed51eU, 0x362d3927U,
+ 0x0a0fd964U, 0x685ca621U, 0x9b5b54d1U, 0x24362e3aU,
+ 0x0c0a67b1U, 0x9357e70fU, 0xb4ee96d2U, 0x1b9b919eU,
+ 0x80c0c54fU, 0x61dc20a2U, 0x5a774b69U, 0x1c121a16U,
+ 0xe293ba0aU, 0xc0a02ae5U, 0x3c22e043U, 0x121b171dU,
+ 0x0e090d0bU, 0xf28bc7adU, 0x2db6a8b9U, 0x141ea9c8U,
+ 0x57f11985U, 0xaf75074cU, 0xee99ddbbU, 0xa37f60fdU,
+ 0xf701269fU, 0x5c72f5bcU, 0x44663bc5U, 0x5bfb7e34U,
+ 0x8b432976U, 0xcb23c6dcU, 0xb6edfc68U, 0xb8e4f163U,
+ 0xd731dccaU, 0x42638510U, 0x13972240U, 0x84c61120U,
+ 0x854a247dU, 0xd2bb3df8U, 0xaef93211U, 0xc729a16dU,
+ 0x1d9e2f4bU, 0xdcb230f3U, 0x0d8652ecU, 0x77c1e3d0U,
+ 0x2bb3166cU, 0xa970b999U, 0x119448faU, 0x47e96422U,
+ 0xa8fc8cc4U, 0xa0f03f1aU, 0x567d2cd8U, 0x223390efU,
+ 0x87494ec7U, 0xd938d1c1U, 0x8ccaa2feU, 0x98d40b36U,
+ 0xa6f581cfU, 0xa57ade28U, 0xdab78e26U, 0x3fadbfa4U,
+ 0x2c3a9de4U, 0x5078920dU, 0x6a5fcc9bU, 0x547e4662U,
+ 0xf68d13c2U, 0x90d8b8e8U, 0x2e39f75eU, 0x82c3aff5U,
+ 0x9f5d80beU, 0x69d0937cU, 0x6fd52da9U, 0xcf2512b3U,
+ 0xc8ac993bU, 0x10187da7U, 0xe89c636eU, 0xdb3bbb7bU,
+ 0xcd267809U, 0x6e5918f4U, 0xec9ab701U, 0x834f9aa8U,
+ 0xe6956e65U, 0xaaffe67eU, 0x21bccf08U, 0xef15e8e6U,
+ 0xbae79bd9U, 0x4a6f36ceU, 0xea9f09d4U, 0x29b07cd6U,
+ 0x31a4b2afU, 0x2a3f2331U, 0xc6a59430U, 0x35a266c0U,
+ 0x744ebc37U, 0xfc82caa6U, 0xe090d0b0U, 0x33a7d815U,
+ 0xf104984aU, 0x41ecdaf7U, 0x7fcd500eU, 0x1791f62fU,
+ 0x764dd68dU, 0x43efb04dU, 0xccaa4d54U, 0xe49604dfU,
+ 0x9ed1b5e3U, 0x4c6a881bU, 0xc12c1fb8U, 0x4665517fU,
+ 0x9d5eea04U, 0x018c355dU, 0xfa877473U, 0xfb0b412eU,
+ 0xb3671d5aU, 0x92dbd252U, 0xe9105633U, 0x6dd64713U,
+ 0x9ad7618cU, 0x37a10c7aU, 0x59f8148eU, 0xeb133c89U,
+ 0xcea927eeU, 0xb761c935U, 0xe11ce5edU, 0x7a47b13cU,
+ 0x9cd2df59U, 0x55f2733fU, 0x1814ce79U, 0x73c737bfU,
+ 0x53f7cdeaU, 0x5ffdaa5bU, 0xdf3d6f14U, 0x7844db86U,
+ 0xcaaff381U, 0xb968c43eU, 0x3824342cU, 0xc2a3405fU,
+ 0x161dc372U, 0xbce2250cU, 0x283c498bU, 0xff0d9541U,
+ 0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U,
+ 0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U,
+};
+const uint32_t AES_Td1[256] = {
+ 0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU,
+ 0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U,
+ 0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU,
+ 0xfc4fe5d7U, 0xd7c52acbU, 0x80263544U, 0x8fb562a3U,
+ 0x49deb15aU, 0x6725ba1bU, 0x9845ea0eU, 0xe15dfec0U,
+ 0x02c32f75U, 0x12814cf0U, 0xa38d4697U, 0xc66bd3f9U,
+ 0xe7038f5fU, 0x9515929cU, 0xebbf6d7aU, 0xda955259U,
+ 0x2dd4be83U, 0xd3587421U, 0x2949e069U, 0x448ec9c8U,
+ 0x6a75c289U, 0x78f48e79U, 0x6b99583eU, 0xdd27b971U,
+ 0xb6bee14fU, 0x17f088adU, 0x66c920acU, 0xb47dce3aU,
+ 0x1863df4aU, 0x82e51a31U, 0x60975133U, 0x4562537fU,
+ 0xe0b16477U, 0x84bb6baeU, 0x1cfe81a0U, 0x94f9082bU,
+ 0x58704868U, 0x198f45fdU, 0x8794de6cU, 0xb7527bf8U,
+ 0x23ab73d3U, 0xe2724b02U, 0x57e31f8fU, 0x2a6655abU,
+ 0x07b2eb28U, 0x032fb5c2U, 0x9a86c57bU, 0xa5d33708U,
+ 0xf2302887U, 0xb223bfa5U, 0xba02036aU, 0x5ced1682U,
+ 0x2b8acf1cU, 0x92a779b4U, 0xf0f307f2U, 0xa14e69e2U,
+ 0xcd65daf4U, 0xd50605beU, 0x1fd13462U, 0x8ac4a6feU,
+ 0x9d342e53U, 0xa0a2f355U, 0x32058ae1U, 0x75a4f6ebU,
+ 0x390b83ecU, 0xaa4060efU, 0x065e719fU, 0x51bd6e10U,
+ 0xf93e218aU, 0x3d96dd06U, 0xaedd3e05U, 0x464de6bdU,
+ 0xb591548dU, 0x0571c45dU, 0x6f0406d4U, 0xff605015U,
+ 0x241998fbU, 0x97d6bde9U, 0xcc894043U, 0x7767d99eU,
+ 0xbdb0e842U, 0x8807898bU, 0x38e7195bU, 0xdb79c8eeU,
+ 0x47a17c0aU, 0xe97c420fU, 0xc9f8841eU, 0x00000000U,
+ 0x83098086U, 0x48322bedU, 0xac1e1170U, 0x4e6c5a72U,
+ 0xfbfd0effU, 0x560f8538U, 0x1e3daed5U, 0x27362d39U,
+ 0x640a0fd9U, 0x21685ca6U, 0xd19b5b54U, 0x3a24362eU,
+ 0xb10c0a67U, 0x0f9357e7U, 0xd2b4ee96U, 0x9e1b9b91U,
+ 0x4f80c0c5U, 0xa261dc20U, 0x695a774bU, 0x161c121aU,
+ 0x0ae293baU, 0xe5c0a02aU, 0x433c22e0U, 0x1d121b17U,
+ 0x0b0e090dU, 0xadf28bc7U, 0xb92db6a8U, 0xc8141ea9U,
+ 0x8557f119U, 0x4caf7507U, 0xbbee99ddU, 0xfda37f60U,
+ 0x9ff70126U, 0xbc5c72f5U, 0xc544663bU, 0x345bfb7eU,
+ 0x768b4329U, 0xdccb23c6U, 0x68b6edfcU, 0x63b8e4f1U,
+ 0xcad731dcU, 0x10426385U, 0x40139722U, 0x2084c611U,
+ 0x7d854a24U, 0xf8d2bb3dU, 0x11aef932U, 0x6dc729a1U,
+ 0x4b1d9e2fU, 0xf3dcb230U, 0xec0d8652U, 0xd077c1e3U,
+ 0x6c2bb316U, 0x99a970b9U, 0xfa119448U, 0x2247e964U,
+ 0xc4a8fc8cU, 0x1aa0f03fU, 0xd8567d2cU, 0xef223390U,
+ 0xc787494eU, 0xc1d938d1U, 0xfe8ccaa2U, 0x3698d40bU,
+ 0xcfa6f581U, 0x28a57adeU, 0x26dab78eU, 0xa43fadbfU,
+ 0xe42c3a9dU, 0x0d507892U, 0x9b6a5fccU, 0x62547e46U,
+ 0xc2f68d13U, 0xe890d8b8U, 0x5e2e39f7U, 0xf582c3afU,
+ 0xbe9f5d80U, 0x7c69d093U, 0xa96fd52dU, 0xb3cf2512U,
+ 0x3bc8ac99U, 0xa710187dU, 0x6ee89c63U, 0x7bdb3bbbU,
+ 0x09cd2678U, 0xf46e5918U, 0x01ec9ab7U, 0xa8834f9aU,
+ 0x65e6956eU, 0x7eaaffe6U, 0x0821bccfU, 0xe6ef15e8U,
+ 0xd9bae79bU, 0xce4a6f36U, 0xd4ea9f09U, 0xd629b07cU,
+ 0xaf31a4b2U, 0x312a3f23U, 0x30c6a594U, 0xc035a266U,
+ 0x37744ebcU, 0xa6fc82caU, 0xb0e090d0U, 0x1533a7d8U,
+ 0x4af10498U, 0xf741ecdaU, 0x0e7fcd50U, 0x2f1791f6U,
+ 0x8d764dd6U, 0x4d43efb0U, 0x54ccaa4dU, 0xdfe49604U,
+ 0xe39ed1b5U, 0x1b4c6a88U, 0xb8c12c1fU, 0x7f466551U,
+ 0x049d5eeaU, 0x5d018c35U, 0x73fa8774U, 0x2efb0b41U,
+ 0x5ab3671dU, 0x5292dbd2U, 0x33e91056U, 0x136dd647U,
+ 0x8c9ad761U, 0x7a37a10cU, 0x8e59f814U, 0x89eb133cU,
+ 0xeecea927U, 0x35b761c9U, 0xede11ce5U, 0x3c7a47b1U,
+ 0x599cd2dfU, 0x3f55f273U, 0x791814ceU, 0xbf73c737U,
+ 0xea53f7cdU, 0x5b5ffdaaU, 0x14df3d6fU, 0x867844dbU,
+ 0x81caaff3U, 0x3eb968c4U, 0x2c382434U, 0x5fc2a340U,
+ 0x72161dc3U, 0x0cbce225U, 0x8b283c49U, 0x41ff0d95U,
+ 0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U,
+ 0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U,
+};
+const uint32_t AES_Td2[256] = {
+ 0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U,
+ 0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U,
+ 0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U,
+ 0xd7fc4fe5U, 0xcbd7c52aU, 0x44802635U, 0xa38fb562U,
+ 0x5a49deb1U, 0x1b6725baU, 0x0e9845eaU, 0xc0e15dfeU,
+ 0x7502c32fU, 0xf012814cU, 0x97a38d46U, 0xf9c66bd3U,
+ 0x5fe7038fU, 0x9c951592U, 0x7aebbf6dU, 0x59da9552U,
+ 0x832dd4beU, 0x21d35874U, 0x692949e0U, 0xc8448ec9U,
+ 0x896a75c2U, 0x7978f48eU, 0x3e6b9958U, 0x71dd27b9U,
+ 0x4fb6bee1U, 0xad17f088U, 0xac66c920U, 0x3ab47dceU,
+ 0x4a1863dfU, 0x3182e51aU, 0x33609751U, 0x7f456253U,
+ 0x77e0b164U, 0xae84bb6bU, 0xa01cfe81U, 0x2b94f908U,
+ 0x68587048U, 0xfd198f45U, 0x6c8794deU, 0xf8b7527bU,
+ 0xd323ab73U, 0x02e2724bU, 0x8f57e31fU, 0xab2a6655U,
+ 0x2807b2ebU, 0xc2032fb5U, 0x7b9a86c5U, 0x08a5d337U,
+ 0x87f23028U, 0xa5b223bfU, 0x6aba0203U, 0x825ced16U,
+ 0x1c2b8acfU, 0xb492a779U, 0xf2f0f307U, 0xe2a14e69U,
+ 0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U,
+ 0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U,
+ 0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU,
+
+ 0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U,
+ 0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U,
+ 0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U,
+ 0x42bdb0e8U, 0x8b880789U, 0x5b38e719U, 0xeedb79c8U,
+ 0x0a47a17cU, 0x0fe97c42U, 0x1ec9f884U, 0x00000000U,
+ 0x86830980U, 0xed48322bU, 0x70ac1e11U, 0x724e6c5aU,
+ 0xfffbfd0eU, 0x38560f85U, 0xd51e3daeU, 0x3927362dU,
+ 0xd9640a0fU, 0xa621685cU, 0x54d19b5bU, 0x2e3a2436U,
+ 0x67b10c0aU, 0xe70f9357U, 0x96d2b4eeU, 0x919e1b9bU,
+ 0xc54f80c0U, 0x20a261dcU, 0x4b695a77U, 0x1a161c12U,
+ 0xba0ae293U, 0x2ae5c0a0U, 0xe0433c22U, 0x171d121bU,
+ 0x0d0b0e09U, 0xc7adf28bU, 0xa8b92db6U, 0xa9c8141eU,
+ 0x198557f1U, 0x074caf75U, 0xddbbee99U, 0x60fda37fU,
+ 0x269ff701U, 0xf5bc5c72U, 0x3bc54466U, 0x7e345bfbU,
+ 0x29768b43U, 0xc6dccb23U, 0xfc68b6edU, 0xf163b8e4U,
+ 0xdccad731U, 0x85104263U, 0x22401397U, 0x112084c6U,
+ 0x247d854aU, 0x3df8d2bbU, 0x3211aef9U, 0xa16dc729U,
+ 0x2f4b1d9eU, 0x30f3dcb2U, 0x52ec0d86U, 0xe3d077c1U,
+ 0x166c2bb3U, 0xb999a970U, 0x48fa1194U, 0x642247e9U,
+ 0x8cc4a8fcU, 0x3f1aa0f0U, 0x2cd8567dU, 0x90ef2233U,
+ 0x4ec78749U, 0xd1c1d938U, 0xa2fe8ccaU, 0x0b3698d4U,
+ 0x81cfa6f5U, 0xde28a57aU, 0x8e26dab7U, 0xbfa43fadU,
+ 0x9de42c3aU, 0x920d5078U, 0xcc9b6a5fU, 0x4662547eU,
+ 0x13c2f68dU, 0xb8e890d8U, 0xf75e2e39U, 0xaff582c3U,
+ 0x80be9f5dU, 0x937c69d0U, 0x2da96fd5U, 0x12b3cf25U,
+ 0x993bc8acU, 0x7da71018U, 0x636ee89cU, 0xbb7bdb3bU,
+ 0x7809cd26U, 0x18f46e59U, 0xb701ec9aU, 0x9aa8834fU,
+ 0x6e65e695U, 0xe67eaaffU, 0xcf0821bcU, 0xe8e6ef15U,
+ 0x9bd9bae7U, 0x36ce4a6fU, 0x09d4ea9fU, 0x7cd629b0U,
+ 0xb2af31a4U, 0x23312a3fU, 0x9430c6a5U, 0x66c035a2U,
+ 0xbc37744eU, 0xcaa6fc82U, 0xd0b0e090U, 0xd81533a7U,
+ 0x984af104U, 0xdaf741ecU, 0x500e7fcdU, 0xf62f1791U,
+ 0xd68d764dU, 0xb04d43efU, 0x4d54ccaaU, 0x04dfe496U,
+ 0xb5e39ed1U, 0x881b4c6aU, 0x1fb8c12cU, 0x517f4665U,
+ 0xea049d5eU, 0x355d018cU, 0x7473fa87U, 0x412efb0bU,
+ 0x1d5ab367U, 0xd25292dbU, 0x5633e910U, 0x47136dd6U,
+ 0x618c9ad7U, 0x0c7a37a1U, 0x148e59f8U, 0x3c89eb13U,
+ 0x27eecea9U, 0xc935b761U, 0xe5ede11cU, 0xb13c7a47U,
+ 0xdf599cd2U, 0x733f55f2U, 0xce791814U, 0x37bf73c7U,
+ 0xcdea53f7U, 0xaa5b5ffdU, 0x6f14df3dU, 0xdb867844U,
+ 0xf381caafU, 0xc43eb968U, 0x342c3824U, 0x405fc2a3U,
+ 0xc372161dU, 0x250cbce2U, 0x498b283cU, 0x9541ff0dU,
+ 0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U,
+ 0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U,
+};
+const uint32_t AES_Td3[256] = {
+ 0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU,
+ 0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU,
+ 0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U,
+ 0xe5d7fc4fU, 0x2acbd7c5U, 0x35448026U, 0x62a38fb5U,
+ 0xb15a49deU, 0xba1b6725U, 0xea0e9845U, 0xfec0e15dU,
+ 0x2f7502c3U, 0x4cf01281U, 0x4697a38dU, 0xd3f9c66bU,
+ 0x8f5fe703U, 0x929c9515U, 0x6d7aebbfU, 0x5259da95U,
+ 0xbe832dd4U, 0x7421d358U, 0xe0692949U, 0xc9c8448eU,
+ 0xc2896a75U, 0x8e7978f4U, 0x583e6b99U, 0xb971dd27U,
+ 0xe14fb6beU, 0x88ad17f0U, 0x20ac66c9U, 0xce3ab47dU,
+ 0xdf4a1863U, 0x1a3182e5U, 0x51336097U, 0x537f4562U,
+ 0x6477e0b1U, 0x6bae84bbU, 0x81a01cfeU, 0x082b94f9U,
+ 0x48685870U, 0x45fd198fU, 0xde6c8794U, 0x7bf8b752U,
+ 0x73d323abU, 0x4b02e272U, 0x1f8f57e3U, 0x55ab2a66U,
+ 0xeb2807b2U, 0xb5c2032fU, 0xc57b9a86U, 0x3708a5d3U,
+ 0x2887f230U, 0xbfa5b223U, 0x036aba02U, 0x16825cedU,
+ 0xcf1c2b8aU, 0x79b492a7U, 0x07f2f0f3U, 0x69e2a14eU,
+ 0xdaf4cd65U, 0x05bed506U, 0x34621fd1U, 0xa6fe8ac4U,
+ 0x2e539d34U, 0xf355a0a2U, 0x8ae13205U, 0xf6eb75a4U,
+ 0x83ec390bU, 0x60efaa40U, 0x719f065eU, 0x6e1051bdU,
+ 0x218af93eU, 0xdd063d96U, 0x3e05aeddU, 0xe6bd464dU,
+ 0x548db591U, 0xc45d0571U, 0x06d46f04U, 0x5015ff60U,
+ 0x98fb2419U, 0xbde997d6U, 0x4043cc89U, 0xd99e7767U,
+ 0xe842bdb0U, 0x898b8807U, 0x195b38e7U, 0xc8eedb79U,
+ 0x7c0a47a1U, 0x420fe97cU, 0x841ec9f8U, 0x00000000U,
+ 0x80868309U, 0x2bed4832U, 0x1170ac1eU, 0x5a724e6cU,
+ 0x0efffbfdU, 0x8538560fU, 0xaed51e3dU, 0x2d392736U,
+ 0x0fd9640aU, 0x5ca62168U, 0x5b54d19bU, 0x362e3a24U,
+ 0x0a67b10cU, 0x57e70f93U, 0xee96d2b4U, 0x9b919e1bU,
+ 0xc0c54f80U, 0xdc20a261U, 0x774b695aU, 0x121a161cU,
+ 0x93ba0ae2U, 0xa02ae5c0U, 0x22e0433cU, 0x1b171d12U,
+ 0x090d0b0eU, 0x8bc7adf2U, 0xb6a8b92dU, 0x1ea9c814U,
+ 0xf1198557U, 0x75074cafU, 0x99ddbbeeU, 0x7f60fda3U,
+ 0x01269ff7U, 0x72f5bc5cU, 0x663bc544U, 0xfb7e345bU,
+ 0x4329768bU, 0x23c6dccbU, 0xedfc68b6U, 0xe4f163b8U,
+ 0x31dccad7U, 0x63851042U, 0x97224013U, 0xc6112084U,
+ 0x4a247d85U, 0xbb3df8d2U, 0xf93211aeU, 0x29a16dc7U,
+ 0x9e2f4b1dU, 0xb230f3dcU, 0x8652ec0dU, 0xc1e3d077U,
+ 0xb3166c2bU, 0x70b999a9U, 0x9448fa11U, 0xe9642247U,
+ 0xfc8cc4a8U, 0xf03f1aa0U, 0x7d2cd856U, 0x3390ef22U,
+ 0x494ec787U, 0x38d1c1d9U, 0xcaa2fe8cU, 0xd40b3698U,
+ 0xf581cfa6U, 0x7ade28a5U, 0xb78e26daU, 0xadbfa43fU,
+ 0x3a9de42cU, 0x78920d50U, 0x5fcc9b6aU, 0x7e466254U,
+ 0x8d13c2f6U, 0xd8b8e890U, 0x39f75e2eU, 0xc3aff582U,
+ 0x5d80be9fU, 0xd0937c69U, 0xd52da96fU, 0x2512b3cfU,
+ 0xac993bc8U, 0x187da710U, 0x9c636ee8U, 0x3bbb7bdbU,
+ 0x267809cdU, 0x5918f46eU, 0x9ab701ecU, 0x4f9aa883U,
+ 0x956e65e6U, 0xffe67eaaU, 0xbccf0821U, 0x15e8e6efU,
+ 0xe79bd9baU, 0x6f36ce4aU, 0x9f09d4eaU, 0xb07cd629U,
+ 0xa4b2af31U, 0x3f23312aU, 0xa59430c6U, 0xa266c035U,
+ 0x4ebc3774U, 0x82caa6fcU, 0x90d0b0e0U, 0xa7d81533U,
+ 0x04984af1U, 0xecdaf741U, 0xcd500e7fU, 0x91f62f17U,
+ 0x4dd68d76U, 0xefb04d43U, 0xaa4d54ccU, 0x9604dfe4U,
+ 0xd1b5e39eU, 0x6a881b4cU, 0x2c1fb8c1U, 0x65517f46U,
+ 0x5eea049dU, 0x8c355d01U, 0x877473faU, 0x0b412efbU,
+ 0x671d5ab3U, 0xdbd25292U, 0x105633e9U, 0xd647136dU,
+ 0xd7618c9aU, 0xa10c7a37U, 0xf8148e59U, 0x133c89ebU,
+ 0xa927eeceU, 0x61c935b7U, 0x1ce5ede1U, 0x47b13c7aU,
+ 0xd2df599cU, 0xf2733f55U, 0x14ce7918U, 0xc737bf73U,
+ 0xf7cdea53U, 0xfdaa5b5fU, 0x3d6f14dfU, 0x44db8678U,
+ 0xaff381caU, 0x68c43eb9U, 0x24342c38U, 0xa3405fc2U,
+ 0x1dc37216U, 0xe2250cbcU, 0x3c498b28U, 0x0d9541ffU,
+ 0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U,
+ 0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U,
+};
+const uint32_t AES_Td4[256] = {
+ 0x52525252U, 0x09090909U, 0x6a6a6a6aU, 0xd5d5d5d5U,
+ 0x30303030U, 0x36363636U, 0xa5a5a5a5U, 0x38383838U,
+ 0xbfbfbfbfU, 0x40404040U, 0xa3a3a3a3U, 0x9e9e9e9eU,
+ 0x81818181U, 0xf3f3f3f3U, 0xd7d7d7d7U, 0xfbfbfbfbU,
+ 0x7c7c7c7cU, 0xe3e3e3e3U, 0x39393939U, 0x82828282U,
+ 0x9b9b9b9bU, 0x2f2f2f2fU, 0xffffffffU, 0x87878787U,
+ 0x34343434U, 0x8e8e8e8eU, 0x43434343U, 0x44444444U,
+ 0xc4c4c4c4U, 0xdedededeU, 0xe9e9e9e9U, 0xcbcbcbcbU,
+ 0x54545454U, 0x7b7b7b7bU, 0x94949494U, 0x32323232U,
+ 0xa6a6a6a6U, 0xc2c2c2c2U, 0x23232323U, 0x3d3d3d3dU,
+ 0xeeeeeeeeU, 0x4c4c4c4cU, 0x95959595U, 0x0b0b0b0bU,
+ 0x42424242U, 0xfafafafaU, 0xc3c3c3c3U, 0x4e4e4e4eU,
+ 0x08080808U, 0x2e2e2e2eU, 0xa1a1a1a1U, 0x66666666U,
+ 0x28282828U, 0xd9d9d9d9U, 0x24242424U, 0xb2b2b2b2U,
+ 0x76767676U, 0x5b5b5b5bU, 0xa2a2a2a2U, 0x49494949U,
+ 0x6d6d6d6dU, 0x8b8b8b8bU, 0xd1d1d1d1U, 0x25252525U,
+ 0x72727272U, 0xf8f8f8f8U, 0xf6f6f6f6U, 0x64646464U,
+ 0x86868686U, 0x68686868U, 0x98989898U, 0x16161616U,
+ 0xd4d4d4d4U, 0xa4a4a4a4U, 0x5c5c5c5cU, 0xccccccccU,
+ 0x5d5d5d5dU, 0x65656565U, 0xb6b6b6b6U, 0x92929292U,
+ 0x6c6c6c6cU, 0x70707070U, 0x48484848U, 0x50505050U,
+ 0xfdfdfdfdU, 0xededededU, 0xb9b9b9b9U, 0xdadadadaU,
+ 0x5e5e5e5eU, 0x15151515U, 0x46464646U, 0x57575757U,
+ 0xa7a7a7a7U, 0x8d8d8d8dU, 0x9d9d9d9dU, 0x84848484U,
+ 0x90909090U, 0xd8d8d8d8U, 0xababababU, 0x00000000U,
+ 0x8c8c8c8cU, 0xbcbcbcbcU, 0xd3d3d3d3U, 0x0a0a0a0aU,
+ 0xf7f7f7f7U, 0xe4e4e4e4U, 0x58585858U, 0x05050505U,
+ 0xb8b8b8b8U, 0xb3b3b3b3U, 0x45454545U, 0x06060606U,
+ 0xd0d0d0d0U, 0x2c2c2c2cU, 0x1e1e1e1eU, 0x8f8f8f8fU,
+ 0xcacacacaU, 0x3f3f3f3fU, 0x0f0f0f0fU, 0x02020202U,
+ 0xc1c1c1c1U, 0xafafafafU, 0xbdbdbdbdU, 0x03030303U,
+ 0x01010101U, 0x13131313U, 0x8a8a8a8aU, 0x6b6b6b6bU,
+ 0x3a3a3a3aU, 0x91919191U, 0x11111111U, 0x41414141U,
+ 0x4f4f4f4fU, 0x67676767U, 0xdcdcdcdcU, 0xeaeaeaeaU,
+ 0x97979797U, 0xf2f2f2f2U, 0xcfcfcfcfU, 0xcecececeU,
+ 0xf0f0f0f0U, 0xb4b4b4b4U, 0xe6e6e6e6U, 0x73737373U,
+ 0x96969696U, 0xacacacacU, 0x74747474U, 0x22222222U,
+ 0xe7e7e7e7U, 0xadadadadU, 0x35353535U, 0x85858585U,
+ 0xe2e2e2e2U, 0xf9f9f9f9U, 0x37373737U, 0xe8e8e8e8U,
+ 0x1c1c1c1cU, 0x75757575U, 0xdfdfdfdfU, 0x6e6e6e6eU,
+ 0x47474747U, 0xf1f1f1f1U, 0x1a1a1a1aU, 0x71717171U,
+ 0x1d1d1d1dU, 0x29292929U, 0xc5c5c5c5U, 0x89898989U,
+ 0x6f6f6f6fU, 0xb7b7b7b7U, 0x62626262U, 0x0e0e0e0eU,
+ 0xaaaaaaaaU, 0x18181818U, 0xbebebebeU, 0x1b1b1b1bU,
+ 0xfcfcfcfcU, 0x56565656U, 0x3e3e3e3eU, 0x4b4b4b4bU,
+ 0xc6c6c6c6U, 0xd2d2d2d2U, 0x79797979U, 0x20202020U,
+ 0x9a9a9a9aU, 0xdbdbdbdbU, 0xc0c0c0c0U, 0xfefefefeU,
+ 0x78787878U, 0xcdcdcdcdU, 0x5a5a5a5aU, 0xf4f4f4f4U,
+ 0x1f1f1f1fU, 0xddddddddU, 0xa8a8a8a8U, 0x33333333U,
+ 0x88888888U, 0x07070707U, 0xc7c7c7c7U, 0x31313131U,
+ 0xb1b1b1b1U, 0x12121212U, 0x10101010U, 0x59595959U,
+ 0x27272727U, 0x80808080U, 0xececececU, 0x5f5f5f5fU,
+ 0x60606060U, 0x51515151U, 0x7f7f7f7fU, 0xa9a9a9a9U,
+ 0x19191919U, 0xb5b5b5b5U, 0x4a4a4a4aU, 0x0d0d0d0dU,
+ 0x2d2d2d2dU, 0xe5e5e5e5U, 0x7a7a7a7aU, 0x9f9f9f9fU,
+ 0x93939393U, 0xc9c9c9c9U, 0x9c9c9c9cU, 0xefefefefU,
+ 0xa0a0a0a0U, 0xe0e0e0e0U, 0x3b3b3b3bU, 0x4d4d4d4dU,
+ 0xaeaeaeaeU, 0x2a2a2a2aU, 0xf5f5f5f5U, 0xb0b0b0b0U,
+ 0xc8c8c8c8U, 0xebebebebU, 0xbbbbbbbbU, 0x3c3c3c3cU,
+ 0x83838383U, 0x53535353U, 0x99999999U, 0x61616161U,
+ 0x17171717U, 0x2b2b2b2bU, 0x04040404U, 0x7e7e7e7eU,
+ 0xbabababaU, 0x77777777U, 0xd6d6d6d6U, 0x26262626U,
+ 0xe1e1e1e1U, 0x69696969U, 0x14141414U, 0x63636363U,
+ 0x55555555U, 0x21212121U, 0x0c0c0c0cU, 0x7d7d7d7dU,
+};
+static const u32 rcon[] = {
+ 0x01000000, 0x02000000, 0x04000000, 0x08000000,
+ 0x10000000, 0x20000000, 0x40000000, 0x80000000,
+ 0x1B000000, 0x36000000, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
+};
+
+/**
+ * Expand the cipher key into the encryption key schedule.
+ */
+int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
+ AES_KEY *key) {
+
+ u32 *rk;
+ int i = 0;
+ u32 temp;
+
+ if (!userKey || !key)
+ return -1;
+ if (bits != 128 && bits != 192 && bits != 256)
+ return -2;
+
+ rk = key->rd_key;
+
+ if (bits==128)
+ key->rounds = 10;
+ else if (bits==192)
+ key->rounds = 12;
+ else
+ key->rounds = 14;
+
+ rk[0] = GETU32(userKey );
+ rk[1] = GETU32(userKey + 4);
+ rk[2] = GETU32(userKey + 8);
+ rk[3] = GETU32(userKey + 12);
+ if (bits == 128) {
+ while (1) {
+ temp = rk[3];
+ rk[4] = rk[0] ^
+ (AES_Te4[(temp >> 16) & 0xff] & 0xff000000) ^
+ (AES_Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^
+ (AES_Te4[(temp ) & 0xff] & 0x0000ff00) ^
+ (AES_Te4[(temp >> 24) ] & 0x000000ff) ^
+ rcon[i];
+ rk[5] = rk[1] ^ rk[4];
+ rk[6] = rk[2] ^ rk[5];
+ rk[7] = rk[3] ^ rk[6];
+ if (++i == 10) {
+ return 0;
+ }
+ rk += 4;
+ }
+ }
+ rk[4] = GETU32(userKey + 16);
+ rk[5] = GETU32(userKey + 20);
+ if (bits == 192) {
+ while (1) {
+ temp = rk[ 5];
+ rk[ 6] = rk[ 0] ^
+ (AES_Te4[(temp >> 16) & 0xff] & 0xff000000) ^
+ (AES_Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^
+ (AES_Te4[(temp ) & 0xff] & 0x0000ff00) ^
+ (AES_Te4[(temp >> 24) ] & 0x000000ff) ^
+ rcon[i];
+ rk[ 7] = rk[ 1] ^ rk[ 6];
+ rk[ 8] = rk[ 2] ^ rk[ 7];
+ rk[ 9] = rk[ 3] ^ rk[ 8];
+ if (++i == 8) {
+ return 0;
+ }
+ rk[10] = rk[ 4] ^ rk[ 9];
+ rk[11] = rk[ 5] ^ rk[10];
+ rk += 6;
+ }
+ }
+ rk[6] = GETU32(userKey + 24);
+ rk[7] = GETU32(userKey + 28);
+ if (bits == 256) {
+ while (1) {
+ temp = rk[ 7];
+ rk[ 8] = rk[ 0] ^
+ (AES_Te4[(temp >> 16) & 0xff] & 0xff000000) ^
+ (AES_Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^
+ (AES_Te4[(temp ) & 0xff] & 0x0000ff00) ^
+ (AES_Te4[(temp >> 24) ] & 0x000000ff) ^
+ rcon[i];
+ rk[ 9] = rk[ 1] ^ rk[ 8];
+ rk[10] = rk[ 2] ^ rk[ 9];
+ rk[11] = rk[ 3] ^ rk[10];
+ if (++i == 7) {
+ return 0;
+ }
+ temp = rk[11];
+ rk[12] = rk[ 4] ^
+ (AES_Te4[(temp >> 24) ] & 0xff000000) ^
+ (AES_Te4[(temp >> 16) & 0xff] & 0x00ff0000) ^
+ (AES_Te4[(temp >> 8) & 0xff] & 0x0000ff00) ^
+ (AES_Te4[(temp ) & 0xff] & 0x000000ff);
+ rk[13] = rk[ 5] ^ rk[12];
+ rk[14] = rk[ 6] ^ rk[13];
+ rk[15] = rk[ 7] ^ rk[14];
+
+ rk += 8;
+ }
+ }
+ return 0;
+}
+
+/**
+ * Expand the cipher key into the decryption key schedule.
+ */
+int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
+ AES_KEY *key) {
+
+ u32 *rk;
+ int i, j, status;
+ u32 temp;
+
+ /* first, start with an encryption schedule */
+ status = AES_set_encrypt_key(userKey, bits, key);
+ if (status < 0)
+ return status;
+
+ rk = key->rd_key;
+
+ /* invert the order of the round keys: */
+ for (i = 0, j = 4*(key->rounds); i < j; i += 4, j -= 4) {
+ temp = rk[i ]; rk[i ] = rk[j ]; rk[j ] = temp;
+ temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
+ temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
+ temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
+ }
+ /* apply the inverse MixColumn transform to all round keys but the first and the last: */
+ for (i = 1; i < (key->rounds); i++) {
+ rk += 4;
+ rk[0] =
+ AES_Td0[AES_Te4[(rk[0] >> 24) ] & 0xff] ^
+ AES_Td1[AES_Te4[(rk[0] >> 16) & 0xff] & 0xff] ^
+ AES_Td2[AES_Te4[(rk[0] >> 8) & 0xff] & 0xff] ^
+ AES_Td3[AES_Te4[(rk[0] ) & 0xff] & 0xff];
+ rk[1] =
+ AES_Td0[AES_Te4[(rk[1] >> 24) ] & 0xff] ^
+ AES_Td1[AES_Te4[(rk[1] >> 16) & 0xff] & 0xff] ^
+ AES_Td2[AES_Te4[(rk[1] >> 8) & 0xff] & 0xff] ^
+ AES_Td3[AES_Te4[(rk[1] ) & 0xff] & 0xff];
+ rk[2] =
+ AES_Td0[AES_Te4[(rk[2] >> 24) ] & 0xff] ^
+ AES_Td1[AES_Te4[(rk[2] >> 16) & 0xff] & 0xff] ^
+ AES_Td2[AES_Te4[(rk[2] >> 8) & 0xff] & 0xff] ^
+ AES_Td3[AES_Te4[(rk[2] ) & 0xff] & 0xff];
+ rk[3] =
+ AES_Td0[AES_Te4[(rk[3] >> 24) ] & 0xff] ^
+ AES_Td1[AES_Te4[(rk[3] >> 16) & 0xff] & 0xff] ^
+ AES_Td2[AES_Te4[(rk[3] >> 8) & 0xff] & 0xff] ^
+ AES_Td3[AES_Te4[(rk[3] ) & 0xff] & 0xff];
+ }
+ return 0;
+}
+
+#ifndef AES_ASM
+/*
+ * Encrypt a single block
+ * in and out can overlap
+ */
+void AES_encrypt(const unsigned char *in, unsigned char *out,
+ const AES_KEY *key) {
+
+ const u32 *rk;
+ u32 s0, s1, s2, s3, t0, t1, t2, t3;
+#ifndef FULL_UNROLL
+ int r;
+#endif /* ?FULL_UNROLL */
+
+ assert(in && out && key);
+ rk = key->rd_key;
+
+ /*
+ * map byte array block to cipher state
+ * and add initial round key:
+ */
+ s0 = GETU32(in ) ^ rk[0];
+ s1 = GETU32(in + 4) ^ rk[1];
+ s2 = GETU32(in + 8) ^ rk[2];
+ s3 = GETU32(in + 12) ^ rk[3];
+#ifdef FULL_UNROLL
+ /* round 1: */
+ t0 = AES_Te0[s0 >> 24] ^ AES_Te1[(s1 >> 16) & 0xff] ^ AES_Te2[(s2 >> 8) & 0xff] ^ AES_Te3[s3 & 0xff] ^ rk[ 4];
+ t1 = AES_Te0[s1 >> 24] ^ AES_Te1[(s2 >> 16) & 0xff] ^ AES_Te2[(s3 >> 8) & 0xff] ^ AES_Te3[s0 & 0xff] ^ rk[ 5];
+ t2 = AES_Te0[s2 >> 24] ^ AES_Te1[(s3 >> 16) & 0xff] ^ AES_Te2[(s0 >> 8) & 0xff] ^ AES_Te3[s1 & 0xff] ^ rk[ 6];
+ t3 = AES_Te0[s3 >> 24] ^ AES_Te1[(s0 >> 16) & 0xff] ^ AES_Te2[(s1 >> 8) & 0xff] ^ AES_Te3[s2 & 0xff] ^ rk[ 7];
+ /* round 2: */
+ s0 = AES_Te0[t0 >> 24] ^ AES_Te1[(t1 >> 16) & 0xff] ^ AES_Te2[(t2 >> 8) & 0xff] ^ AES_Te3[t3 & 0xff] ^ rk[ 8];
+ s1 = AES_Te0[t1 >> 24] ^ AES_Te1[(t2 >> 16) & 0xff] ^ AES_Te2[(t3 >> 8) & 0xff] ^ AES_Te3[t0 & 0xff] ^ rk[ 9];
+ s2 = AES_Te0[t2 >> 24] ^ AES_Te1[(t3 >> 16) & 0xff] ^ AES_Te2[(t0 >> 8) & 0xff] ^ AES_Te3[t1 & 0xff] ^ rk[10];
+ s3 = AES_Te0[t3 >> 24] ^ AES_Te1[(t0 >> 16) & 0xff] ^ AES_Te2[(t1 >> 8) & 0xff] ^ AES_Te3[t2 & 0xff] ^ rk[11];
+ /* round 3: */
+ t0 = AES_Te0[s0 >> 24] ^ AES_Te1[(s1 >> 16) & 0xff] ^ AES_Te2[(s2 >> 8) & 0xff] ^ AES_Te3[s3 & 0xff] ^ rk[12];
+ t1 = AES_Te0[s1 >> 24] ^ AES_Te1[(s2 >> 16) & 0xff] ^ AES_Te2[(s3 >> 8) & 0xff] ^ AES_Te3[s0 & 0xff] ^ rk[13];
+ t2 = AES_Te0[s2 >> 24] ^ AES_Te1[(s3 >> 16) & 0xff] ^ AES_Te2[(s0 >> 8) & 0xff] ^ AES_Te3[s1 & 0xff] ^ rk[14];
+ t3 = AES_Te0[s3 >> 24] ^ AES_Te1[(s0 >> 16) & 0xff] ^ AES_Te2[(s1 >> 8) & 0xff] ^ AES_Te3[s2 & 0xff] ^ rk[15];
+ /* round 4: */
+ s0 = AES_Te0[t0 >> 24] ^ AES_Te1[(t1 >> 16) & 0xff] ^ AES_Te2[(t2 >> 8) & 0xff] ^ AES_Te3[t3 & 0xff] ^ rk[16];
+ s1 = AES_Te0[t1 >> 24] ^ AES_Te1[(t2 >> 16) & 0xff] ^ AES_Te2[(t3 >> 8) & 0xff] ^ AES_Te3[t0 & 0xff] ^ rk[17];
+ s2 = AES_Te0[t2 >> 24] ^ AES_Te1[(t3 >> 16) & 0xff] ^ AES_Te2[(t0 >> 8) & 0xff] ^ AES_Te3[t1 & 0xff] ^ rk[18];
+ s3 = AES_Te0[t3 >> 24] ^ AES_Te1[(t0 >> 16) & 0xff] ^ AES_Te2[(t1 >> 8) & 0xff] ^ AES_Te3[t2 & 0xff] ^ rk[19];
+ /* round 5: */
+ t0 = AES_Te0[s0 >> 24] ^ AES_Te1[(s1 >> 16) & 0xff] ^ AES_Te2[(s2 >> 8) & 0xff] ^ AES_Te3[s3 & 0xff] ^ rk[20];
+ t1 = AES_Te0[s1 >> 24] ^ AES_Te1[(s2 >> 16) & 0xff] ^ AES_Te2[(s3 >> 8) & 0xff] ^ AES_Te3[s0 & 0xff] ^ rk[21];
+ t2 = AES_Te0[s2 >> 24] ^ AES_Te1[(s3 >> 16) & 0xff] ^ AES_Te2[(s0 >> 8) & 0xff] ^ AES_Te3[s1 & 0xff] ^ rk[22];
+ t3 = AES_Te0[s3 >> 24] ^ AES_Te1[(s0 >> 16) & 0xff] ^ AES_Te2[(s1 >> 8) & 0xff] ^ AES_Te3[s2 & 0xff] ^ rk[23];
+ /* round 6: */
+ s0 = AES_Te0[t0 >> 24] ^ AES_Te1[(t1 >> 16) & 0xff] ^ AES_Te2[(t2 >> 8) & 0xff] ^ AES_Te3[t3 & 0xff] ^ rk[24];
+ s1 = AES_Te0[t1 >> 24] ^ AES_Te1[(t2 >> 16) & 0xff] ^ AES_Te2[(t3 >> 8) & 0xff] ^ AES_Te3[t0 & 0xff] ^ rk[25];
+ s2 = AES_Te0[t2 >> 24] ^ AES_Te1[(t3 >> 16) & 0xff] ^ AES_Te2[(t0 >> 8) & 0xff] ^ AES_Te3[t1 & 0xff] ^ rk[26];
+ s3 = AES_Te0[t3 >> 24] ^ AES_Te1[(t0 >> 16) & 0xff] ^ AES_Te2[(t1 >> 8) & 0xff] ^ AES_Te3[t2 & 0xff] ^ rk[27];
+ /* round 7: */
+ t0 = AES_Te0[s0 >> 24] ^ AES_Te1[(s1 >> 16) & 0xff] ^ AES_Te2[(s2 >> 8) & 0xff] ^ AES_Te3[s3 & 0xff] ^ rk[28];
+ t1 = AES_Te0[s1 >> 24] ^ AES_Te1[(s2 >> 16) & 0xff] ^ AES_Te2[(s3 >> 8) & 0xff] ^ AES_Te3[s0 & 0xff] ^ rk[29];
+ t2 = AES_Te0[s2 >> 24] ^ AES_Te1[(s3 >> 16) & 0xff] ^ AES_Te2[(s0 >> 8) & 0xff] ^ AES_Te3[s1 & 0xff] ^ rk[30];
+ t3 = AES_Te0[s3 >> 24] ^ AES_Te1[(s0 >> 16) & 0xff] ^ AES_Te2[(s1 >> 8) & 0xff] ^ AES_Te3[s2 & 0xff] ^ rk[31];
+ /* round 8: */
+ s0 = AES_Te0[t0 >> 24] ^ AES_Te1[(t1 >> 16) & 0xff] ^ AES_Te2[(t2 >> 8) & 0xff] ^ AES_Te3[t3 & 0xff] ^ rk[32];
+ s1 = AES_Te0[t1 >> 24] ^ AES_Te1[(t2 >> 16) & 0xff] ^ AES_Te2[(t3 >> 8) & 0xff] ^ AES_Te3[t0 & 0xff] ^ rk[33];
+ s2 = AES_Te0[t2 >> 24] ^ AES_Te1[(t3 >> 16) & 0xff] ^ AES_Te2[(t0 >> 8) & 0xff] ^ AES_Te3[t1 & 0xff] ^ rk[34];
+ s3 = AES_Te0[t3 >> 24] ^ AES_Te1[(t0 >> 16) & 0xff] ^ AES_Te2[(t1 >> 8) & 0xff] ^ AES_Te3[t2 & 0xff] ^ rk[35];
+ /* round 9: */
+ t0 = AES_Te0[s0 >> 24] ^ AES_Te1[(s1 >> 16) & 0xff] ^ AES_Te2[(s2 >> 8) & 0xff] ^ AES_Te3[s3 & 0xff] ^ rk[36];
+ t1 = AES_Te0[s1 >> 24] ^ AES_Te1[(s2 >> 16) & 0xff] ^ AES_Te2[(s3 >> 8) & 0xff] ^ AES_Te3[s0 & 0xff] ^ rk[37];
+ t2 = AES_Te0[s2 >> 24] ^ AES_Te1[(s3 >> 16) & 0xff] ^ AES_Te2[(s0 >> 8) & 0xff] ^ AES_Te3[s1 & 0xff] ^ rk[38];
+ t3 = AES_Te0[s3 >> 24] ^ AES_Te1[(s0 >> 16) & 0xff] ^ AES_Te2[(s1 >> 8) & 0xff] ^ AES_Te3[s2 & 0xff] ^ rk[39];
+ if (key->rounds > 10) {
+ /* round 10: */
+ s0 = AES_Te0[t0 >> 24] ^ AES_Te1[(t1 >> 16) & 0xff] ^ AES_Te2[(t2 >> 8) & 0xff] ^ AES_Te3[t3 & 0xff] ^ rk[40];
+ s1 = AES_Te0[t1 >> 24] ^ AES_Te1[(t2 >> 16) & 0xff] ^ AES_Te2[(t3 >> 8) & 0xff] ^ AES_Te3[t0 & 0xff] ^ rk[41];
+ s2 = AES_Te0[t2 >> 24] ^ AES_Te1[(t3 >> 16) & 0xff] ^ AES_Te2[(t0 >> 8) & 0xff] ^ AES_Te3[t1 & 0xff] ^ rk[42];
+ s3 = AES_Te0[t3 >> 24] ^ AES_Te1[(t0 >> 16) & 0xff] ^ AES_Te2[(t1 >> 8) & 0xff] ^ AES_Te3[t2 & 0xff] ^ rk[43];
+ /* round 11: */
+ t0 = AES_Te0[s0 >> 24] ^ AES_Te1[(s1 >> 16) & 0xff] ^ AES_Te2[(s2 >> 8) & 0xff] ^ AES_Te3[s3 & 0xff] ^ rk[44];
+ t1 = AES_Te0[s1 >> 24] ^ AES_Te1[(s2 >> 16) & 0xff] ^ AES_Te2[(s3 >> 8) & 0xff] ^ AES_Te3[s0 & 0xff] ^ rk[45];
+ t2 = AES_Te0[s2 >> 24] ^ AES_Te1[(s3 >> 16) & 0xff] ^ AES_Te2[(s0 >> 8) & 0xff] ^ AES_Te3[s1 & 0xff] ^ rk[46];
+ t3 = AES_Te0[s3 >> 24] ^ AES_Te1[(s0 >> 16) & 0xff] ^ AES_Te2[(s1 >> 8) & 0xff] ^ AES_Te3[s2 & 0xff] ^ rk[47];
+ if (key->rounds > 12) {
+ /* round 12: */
+ s0 = AES_Te0[t0 >> 24] ^ AES_Te1[(t1 >> 16) & 0xff] ^ AES_Te2[(t2 >> 8) & 0xff] ^ AES_Te3[t3 & 0xff] ^ rk[48];
+ s1 = AES_Te0[t1 >> 24] ^ AES_Te1[(t2 >> 16) & 0xff] ^ AES_Te2[(t3 >> 8) & 0xff] ^ AES_Te3[t0 & 0xff] ^ rk[49];
+ s2 = AES_Te0[t2 >> 24] ^ AES_Te1[(t3 >> 16) & 0xff] ^ AES_Te2[(t0 >> 8) & 0xff] ^ AES_Te3[t1 & 0xff] ^ rk[50];
+ s3 = AES_Te0[t3 >> 24] ^ AES_Te1[(t0 >> 16) & 0xff] ^ AES_Te2[(t1 >> 8) & 0xff] ^ AES_Te3[t2 & 0xff] ^ rk[51];
+ /* round 13: */
+ t0 = AES_Te0[s0 >> 24] ^ AES_Te1[(s1 >> 16) & 0xff] ^ AES_Te2[(s2 >> 8) & 0xff] ^ AES_Te3[s3 & 0xff] ^ rk[52];
+ t1 = AES_Te0[s1 >> 24] ^ AES_Te1[(s2 >> 16) & 0xff] ^ AES_Te2[(s3 >> 8) & 0xff] ^ AES_Te3[s0 & 0xff] ^ rk[53];
+ t2 = AES_Te0[s2 >> 24] ^ AES_Te1[(s3 >> 16) & 0xff] ^ AES_Te2[(s0 >> 8) & 0xff] ^ AES_Te3[s1 & 0xff] ^ rk[54];
+ t3 = AES_Te0[s3 >> 24] ^ AES_Te1[(s0 >> 16) & 0xff] ^ AES_Te2[(s1 >> 8) & 0xff] ^ AES_Te3[s2 & 0xff] ^ rk[55];
+ }
+ }
+ rk += key->rounds << 2;
+#else /* !FULL_UNROLL */
+ /*
+ * Nr - 1 full rounds:
+ */
+ r = key->rounds >> 1;
+ for (;;) {
+ t0 =
+ AES_Te0[(s0 >> 24) ] ^
+ AES_Te1[(s1 >> 16) & 0xff] ^
+ AES_Te2[(s2 >> 8) & 0xff] ^
+ AES_Te3[(s3 ) & 0xff] ^
+ rk[4];
+ t1 =
+ AES_Te0[(s1 >> 24) ] ^
+ AES_Te1[(s2 >> 16) & 0xff] ^
+ AES_Te2[(s3 >> 8) & 0xff] ^
+ AES_Te3[(s0 ) & 0xff] ^
+ rk[5];
+ t2 =
+ AES_Te0[(s2 >> 24) ] ^
+ AES_Te1[(s3 >> 16) & 0xff] ^
+ AES_Te2[(s0 >> 8) & 0xff] ^
+ AES_Te3[(s1 ) & 0xff] ^
+ rk[6];
+ t3 =
+ AES_Te0[(s3 >> 24) ] ^
+ AES_Te1[(s0 >> 16) & 0xff] ^
+ AES_Te2[(s1 >> 8) & 0xff] ^
+ AES_Te3[(s2 ) & 0xff] ^
+ rk[7];
+
+ rk += 8;
+ if (--r == 0) {
+ break;
+ }
+
+ s0 =
+ AES_Te0[(t0 >> 24) ] ^
+ AES_Te1[(t1 >> 16) & 0xff] ^
+ AES_Te2[(t2 >> 8) & 0xff] ^
+ AES_Te3[(t3 ) & 0xff] ^
+ rk[0];
+ s1 =
+ AES_Te0[(t1 >> 24) ] ^
+ AES_Te1[(t2 >> 16) & 0xff] ^
+ AES_Te2[(t3 >> 8) & 0xff] ^
+ AES_Te3[(t0 ) & 0xff] ^
+ rk[1];
+ s2 =
+ AES_Te0[(t2 >> 24) ] ^
+ AES_Te1[(t3 >> 16) & 0xff] ^
+ AES_Te2[(t0 >> 8) & 0xff] ^
+ AES_Te3[(t1 ) & 0xff] ^
+ rk[2];
+ s3 =
+ AES_Te0[(t3 >> 24) ] ^
+ AES_Te1[(t0 >> 16) & 0xff] ^
+ AES_Te2[(t1 >> 8) & 0xff] ^
+ AES_Te3[(t2 ) & 0xff] ^
+ rk[3];
+ }
+#endif /* ?FULL_UNROLL */
+ /*
+ * apply last round and
+ * map cipher state to byte array block:
+ */
+ s0 =
+ (AES_Te4[(t0 >> 24) ] & 0xff000000) ^
+ (AES_Te4[(t1 >> 16) & 0xff] & 0x00ff0000) ^
+ (AES_Te4[(t2 >> 8) & 0xff] & 0x0000ff00) ^
+ (AES_Te4[(t3 ) & 0xff] & 0x000000ff) ^
+ rk[0];
+ PUTU32(out , s0);
+ s1 =
+ (AES_Te4[(t1 >> 24) ] & 0xff000000) ^
+ (AES_Te4[(t2 >> 16) & 0xff] & 0x00ff0000) ^
+ (AES_Te4[(t3 >> 8) & 0xff] & 0x0000ff00) ^
+ (AES_Te4[(t0 ) & 0xff] & 0x000000ff) ^
+ rk[1];
+ PUTU32(out + 4, s1);
+ s2 =
+ (AES_Te4[(t2 >> 24) ] & 0xff000000) ^
+ (AES_Te4[(t3 >> 16) & 0xff] & 0x00ff0000) ^
+ (AES_Te4[(t0 >> 8) & 0xff] & 0x0000ff00) ^
+ (AES_Te4[(t1 ) & 0xff] & 0x000000ff) ^
+ rk[2];
+ PUTU32(out + 8, s2);
+ s3 =
+ (AES_Te4[(t3 >> 24) ] & 0xff000000) ^
+ (AES_Te4[(t0 >> 16) & 0xff] & 0x00ff0000) ^
+ (AES_Te4[(t1 >> 8) & 0xff] & 0x0000ff00) ^
+ (AES_Te4[(t2 ) & 0xff] & 0x000000ff) ^
+ rk[3];
+ PUTU32(out + 12, s3);
+}
+
+/*
+ * Decrypt a single block
+ * in and out can overlap
+ */
+void AES_decrypt(const unsigned char *in, unsigned char *out,
+ const AES_KEY *key) {
+
+ const u32 *rk;
+ u32 s0, s1, s2, s3, t0, t1, t2, t3;
+#ifndef FULL_UNROLL
+ int r;
+#endif /* ?FULL_UNROLL */
+
+ assert(in && out && key);
+ rk = key->rd_key;
+
+ /*
+ * map byte array block to cipher state
+ * and add initial round key:
+ */
+ s0 = GETU32(in ) ^ rk[0];
+ s1 = GETU32(in + 4) ^ rk[1];
+ s2 = GETU32(in + 8) ^ rk[2];
+ s3 = GETU32(in + 12) ^ rk[3];
+#ifdef FULL_UNROLL
+ /* round 1: */
+ t0 = AES_Td0[s0 >> 24] ^ AES_Td1[(s3 >> 16) & 0xff] ^ AES_Td2[(s2 >> 8) & 0xff] ^ AES_Td3[s1 & 0xff] ^ rk[ 4];
+ t1 = AES_Td0[s1 >> 24] ^ AES_Td1[(s0 >> 16) & 0xff] ^ AES_Td2[(s3 >> 8) & 0xff] ^ AES_Td3[s2 & 0xff] ^ rk[ 5];
+ t2 = AES_Td0[s2 >> 24] ^ AES_Td1[(s1 >> 16) & 0xff] ^ AES_Td2[(s0 >> 8) & 0xff] ^ AES_Td3[s3 & 0xff] ^ rk[ 6];
+ t3 = AES_Td0[s3 >> 24] ^ AES_Td1[(s2 >> 16) & 0xff] ^ AES_Td2[(s1 >> 8) & 0xff] ^ AES_Td3[s0 & 0xff] ^ rk[ 7];
+ /* round 2: */
+ s0 = AES_Td0[t0 >> 24] ^ AES_Td1[(t3 >> 16) & 0xff] ^ AES_Td2[(t2 >> 8) & 0xff] ^ AES_Td3[t1 & 0xff] ^ rk[ 8];
+ s1 = AES_Td0[t1 >> 24] ^ AES_Td1[(t0 >> 16) & 0xff] ^ AES_Td2[(t3 >> 8) & 0xff] ^ AES_Td3[t2 & 0xff] ^ rk[ 9];
+ s2 = AES_Td0[t2 >> 24] ^ AES_Td1[(t1 >> 16) & 0xff] ^ AES_Td2[(t0 >> 8) & 0xff] ^ AES_Td3[t3 & 0xff] ^ rk[10];
+ s3 = AES_Td0[t3 >> 24] ^ AES_Td1[(t2 >> 16) & 0xff] ^ AES_Td2[(t1 >> 8) & 0xff] ^ AES_Td3[t0 & 0xff] ^ rk[11];
+ /* round 3: */
+ t0 = AES_Td0[s0 >> 24] ^ AES_Td1[(s3 >> 16) & 0xff] ^ AES_Td2[(s2 >> 8) & 0xff] ^ AES_Td3[s1 & 0xff] ^ rk[12];
+ t1 = AES_Td0[s1 >> 24] ^ AES_Td1[(s0 >> 16) & 0xff] ^ AES_Td2[(s3 >> 8) & 0xff] ^ AES_Td3[s2 & 0xff] ^ rk[13];
+ t2 = AES_Td0[s2 >> 24] ^ AES_Td1[(s1 >> 16) & 0xff] ^ AES_Td2[(s0 >> 8) & 0xff] ^ AES_Td3[s3 & 0xff] ^ rk[14];
+ t3 = AES_Td0[s3 >> 24] ^ AES_Td1[(s2 >> 16) & 0xff] ^ AES_Td2[(s1 >> 8) & 0xff] ^ AES_Td3[s0 & 0xff] ^ rk[15];
+ /* round 4: */
+ s0 = AES_Td0[t0 >> 24] ^ AES_Td1[(t3 >> 16) & 0xff] ^ AES_Td2[(t2 >> 8) & 0xff] ^ AES_Td3[t1 & 0xff] ^ rk[16];
+ s1 = AES_Td0[t1 >> 24] ^ AES_Td1[(t0 >> 16) & 0xff] ^ AES_Td2[(t3 >> 8) & 0xff] ^ AES_Td3[t2 & 0xff] ^ rk[17];
+ s2 = AES_Td0[t2 >> 24] ^ AES_Td1[(t1 >> 16) & 0xff] ^ AES_Td2[(t0 >> 8) & 0xff] ^ AES_Td3[t3 & 0xff] ^ rk[18];
+ s3 = AES_Td0[t3 >> 24] ^ AES_Td1[(t2 >> 16) & 0xff] ^ AES_Td2[(t1 >> 8) & 0xff] ^ AES_Td3[t0 & 0xff] ^ rk[19];
+ /* round 5: */
+ t0 = AES_Td0[s0 >> 24] ^ AES_Td1[(s3 >> 16) & 0xff] ^ AES_Td2[(s2 >> 8) & 0xff] ^ AES_Td3[s1 & 0xff] ^ rk[20];
+ t1 = AES_Td0[s1 >> 24] ^ AES_Td1[(s0 >> 16) & 0xff] ^ AES_Td2[(s3 >> 8) & 0xff] ^ AES_Td3[s2 & 0xff] ^ rk[21];
+ t2 = AES_Td0[s2 >> 24] ^ AES_Td1[(s1 >> 16) & 0xff] ^ AES_Td2[(s0 >> 8) & 0xff] ^ AES_Td3[s3 & 0xff] ^ rk[22];
+ t3 = AES_Td0[s3 >> 24] ^ AES_Td1[(s2 >> 16) & 0xff] ^ AES_Td2[(s1 >> 8) & 0xff] ^ AES_Td3[s0 & 0xff] ^ rk[23];
+ /* round 6: */
+ s0 = AES_Td0[t0 >> 24] ^ AES_Td1[(t3 >> 16) & 0xff] ^ AES_Td2[(t2 >> 8) & 0xff] ^ AES_Td3[t1 & 0xff] ^ rk[24];
+ s1 = AES_Td0[t1 >> 24] ^ AES_Td1[(t0 >> 16) & 0xff] ^ AES_Td2[(t3 >> 8) & 0xff] ^ AES_Td3[t2 & 0xff] ^ rk[25];
+ s2 = AES_Td0[t2 >> 24] ^ AES_Td1[(t1 >> 16) & 0xff] ^ AES_Td2[(t0 >> 8) & 0xff] ^ AES_Td3[t3 & 0xff] ^ rk[26];
+ s3 = AES_Td0[t3 >> 24] ^ AES_Td1[(t2 >> 16) & 0xff] ^ AES_Td2[(t1 >> 8) & 0xff] ^ AES_Td3[t0 & 0xff] ^ rk[27];
+ /* round 7: */
+ t0 = AES_Td0[s0 >> 24] ^ AES_Td1[(s3 >> 16) & 0xff] ^ AES_Td2[(s2 >> 8) & 0xff] ^ AES_Td3[s1 & 0xff] ^ rk[28];
+ t1 = AES_Td0[s1 >> 24] ^ AES_Td1[(s0 >> 16) & 0xff] ^ AES_Td2[(s3 >> 8) & 0xff] ^ AES_Td3[s2 & 0xff] ^ rk[29];
+ t2 = AES_Td0[s2 >> 24] ^ AES_Td1[(s1 >> 16) & 0xff] ^ AES_Td2[(s0 >> 8) & 0xff] ^ AES_Td3[s3 & 0xff] ^ rk[30];
+ t3 = AES_Td0[s3 >> 24] ^ AES_Td1[(s2 >> 16) & 0xff] ^ AES_Td2[(s1 >> 8) & 0xff] ^ AES_Td3[s0 & 0xff] ^ rk[31];
+ /* round 8: */
+ s0 = AES_Td0[t0 >> 24] ^ AES_Td1[(t3 >> 16) & 0xff] ^ AES_Td2[(t2 >> 8) & 0xff] ^ AES_Td3[t1 & 0xff] ^ rk[32];
+ s1 = AES_Td0[t1 >> 24] ^ AES_Td1[(t0 >> 16) & 0xff] ^ AES_Td2[(t3 >> 8) & 0xff] ^ AES_Td3[t2 & 0xff] ^ rk[33];
+ s2 = AES_Td0[t2 >> 24] ^ AES_Td1[(t1 >> 16) & 0xff] ^ AES_Td2[(t0 >> 8) & 0xff] ^ AES_Td3[t3 & 0xff] ^ rk[34];
+ s3 = AES_Td0[t3 >> 24] ^ AES_Td1[(t2 >> 16) & 0xff] ^ AES_Td2[(t1 >> 8) & 0xff] ^ AES_Td3[t0 & 0xff] ^ rk[35];
+ /* round 9: */
+ t0 = AES_Td0[s0 >> 24] ^ AES_Td1[(s3 >> 16) & 0xff] ^ AES_Td2[(s2 >> 8) & 0xff] ^ AES_Td3[s1 & 0xff] ^ rk[36];
+ t1 = AES_Td0[s1 >> 24] ^ AES_Td1[(s0 >> 16) & 0xff] ^ AES_Td2[(s3 >> 8) & 0xff] ^ AES_Td3[s2 & 0xff] ^ rk[37];
+ t2 = AES_Td0[s2 >> 24] ^ AES_Td1[(s1 >> 16) & 0xff] ^ AES_Td2[(s0 >> 8) & 0xff] ^ AES_Td3[s3 & 0xff] ^ rk[38];
+ t3 = AES_Td0[s3 >> 24] ^ AES_Td1[(s2 >> 16) & 0xff] ^ AES_Td2[(s1 >> 8) & 0xff] ^ AES_Td3[s0 & 0xff] ^ rk[39];
+ if (key->rounds > 10) {
+ /* round 10: */
+ s0 = AES_Td0[t0 >> 24] ^ AES_Td1[(t3 >> 16) & 0xff] ^ AES_Td2[(t2 >> 8) & 0xff] ^ AES_Td3[t1 & 0xff] ^ rk[40];
+ s1 = AES_Td0[t1 >> 24] ^ AES_Td1[(t0 >> 16) & 0xff] ^ AES_Td2[(t3 >> 8) & 0xff] ^ AES_Td3[t2 & 0xff] ^ rk[41];
+ s2 = AES_Td0[t2 >> 24] ^ AES_Td1[(t1 >> 16) & 0xff] ^ AES_Td2[(t0 >> 8) & 0xff] ^ AES_Td3[t3 & 0xff] ^ rk[42];
+ s3 = AES_Td0[t3 >> 24] ^ AES_Td1[(t2 >> 16) & 0xff] ^ AES_Td2[(t1 >> 8) & 0xff] ^ AES_Td3[t0 & 0xff] ^ rk[43];
+ /* round 11: */
+ t0 = AES_Td0[s0 >> 24] ^ AES_Td1[(s3 >> 16) & 0xff] ^ AES_Td2[(s2 >> 8) & 0xff] ^ AES_Td3[s1 & 0xff] ^ rk[44];
+ t1 = AES_Td0[s1 >> 24] ^ AES_Td1[(s0 >> 16) & 0xff] ^ AES_Td2[(s3 >> 8) & 0xff] ^ AES_Td3[s2 & 0xff] ^ rk[45];
+ t2 = AES_Td0[s2 >> 24] ^ AES_Td1[(s1 >> 16) & 0xff] ^ AES_Td2[(s0 >> 8) & 0xff] ^ AES_Td3[s3 & 0xff] ^ rk[46];
+ t3 = AES_Td0[s3 >> 24] ^ AES_Td1[(s2 >> 16) & 0xff] ^ AES_Td2[(s1 >> 8) & 0xff] ^ AES_Td3[s0 & 0xff] ^ rk[47];
+ if (key->rounds > 12) {
+ /* round 12: */
+ s0 = AES_Td0[t0 >> 24] ^ AES_Td1[(t3 >> 16) & 0xff] ^ AES_Td2[(t2 >> 8) & 0xff] ^ AES_Td3[t1 & 0xff] ^ rk[48];
+ s1 = AES_Td0[t1 >> 24] ^ AES_Td1[(t0 >> 16) & 0xff] ^ AES_Td2[(t3 >> 8) & 0xff] ^ AES_Td3[t2 & 0xff] ^ rk[49];
+ s2 = AES_Td0[t2 >> 24] ^ AES_Td1[(t1 >> 16) & 0xff] ^ AES_Td2[(t0 >> 8) & 0xff] ^ AES_Td3[t3 & 0xff] ^ rk[50];
+ s3 = AES_Td0[t3 >> 24] ^ AES_Td1[(t2 >> 16) & 0xff] ^ AES_Td2[(t1 >> 8) & 0xff] ^ AES_Td3[t0 & 0xff] ^ rk[51];
+ /* round 13: */
+ t0 = AES_Td0[s0 >> 24] ^ AES_Td1[(s3 >> 16) & 0xff] ^ AES_Td2[(s2 >> 8) & 0xff] ^ AES_Td3[s1 & 0xff] ^ rk[52];
+ t1 = AES_Td0[s1 >> 24] ^ AES_Td1[(s0 >> 16) & 0xff] ^ AES_Td2[(s3 >> 8) & 0xff] ^ AES_Td3[s2 & 0xff] ^ rk[53];
+ t2 = AES_Td0[s2 >> 24] ^ AES_Td1[(s1 >> 16) & 0xff] ^ AES_Td2[(s0 >> 8) & 0xff] ^ AES_Td3[s3 & 0xff] ^ rk[54];
+ t3 = AES_Td0[s3 >> 24] ^ AES_Td1[(s2 >> 16) & 0xff] ^ AES_Td2[(s1 >> 8) & 0xff] ^ AES_Td3[s0 & 0xff] ^ rk[55];
+ }
+ }
+ rk += key->rounds << 2;
+#else /* !FULL_UNROLL */
+ /*
+ * Nr - 1 full rounds:
+ */
+ r = key->rounds >> 1;
+ for (;;) {
+ t0 =
+ AES_Td0[(s0 >> 24) ] ^
+ AES_Td1[(s3 >> 16) & 0xff] ^
+ AES_Td2[(s2 >> 8) & 0xff] ^
+ AES_Td3[(s1 ) & 0xff] ^
+ rk[4];
+ t1 =
+ AES_Td0[(s1 >> 24) ] ^
+ AES_Td1[(s0 >> 16) & 0xff] ^
+ AES_Td2[(s3 >> 8) & 0xff] ^
+ AES_Td3[(s2 ) & 0xff] ^
+ rk[5];
+ t2 =
+ AES_Td0[(s2 >> 24) ] ^
+ AES_Td1[(s1 >> 16) & 0xff] ^
+ AES_Td2[(s0 >> 8) & 0xff] ^
+ AES_Td3[(s3 ) & 0xff] ^
+ rk[6];
+ t3 =
+ AES_Td0[(s3 >> 24) ] ^
+ AES_Td1[(s2 >> 16) & 0xff] ^
+ AES_Td2[(s1 >> 8) & 0xff] ^
+ AES_Td3[(s0 ) & 0xff] ^
+ rk[7];
+
+ rk += 8;
+ if (--r == 0) {
+ break;
+ }
+
+ s0 =
+ AES_Td0[(t0 >> 24) ] ^
+ AES_Td1[(t3 >> 16) & 0xff] ^
+ AES_Td2[(t2 >> 8) & 0xff] ^
+ AES_Td3[(t1 ) & 0xff] ^
+ rk[0];
+ s1 =
+ AES_Td0[(t1 >> 24) ] ^
+ AES_Td1[(t0 >> 16) & 0xff] ^
+ AES_Td2[(t3 >> 8) & 0xff] ^
+ AES_Td3[(t2 ) & 0xff] ^
+ rk[1];
+ s2 =
+ AES_Td0[(t2 >> 24) ] ^
+ AES_Td1[(t1 >> 16) & 0xff] ^
+ AES_Td2[(t0 >> 8) & 0xff] ^
+ AES_Td3[(t3 ) & 0xff] ^
+ rk[2];
+ s3 =
+ AES_Td0[(t3 >> 24) ] ^
+ AES_Td1[(t2 >> 16) & 0xff] ^
+ AES_Td2[(t1 >> 8) & 0xff] ^
+ AES_Td3[(t0 ) & 0xff] ^
+ rk[3];
+ }
+#endif /* ?FULL_UNROLL */
+ /*
+ * apply last round and
+ * map cipher state to byte array block:
+ */
+ s0 =
+ (AES_Td4[(t0 >> 24) ] & 0xff000000) ^
+ (AES_Td4[(t3 >> 16) & 0xff] & 0x00ff0000) ^
+ (AES_Td4[(t2 >> 8) & 0xff] & 0x0000ff00) ^
+ (AES_Td4[(t1 ) & 0xff] & 0x000000ff) ^
+ rk[0];
+ PUTU32(out , s0);
+ s1 =
+ (AES_Td4[(t1 >> 24) ] & 0xff000000) ^
+ (AES_Td4[(t0 >> 16) & 0xff] & 0x00ff0000) ^
+ (AES_Td4[(t3 >> 8) & 0xff] & 0x0000ff00) ^
+ (AES_Td4[(t2 ) & 0xff] & 0x000000ff) ^
+ rk[1];
+ PUTU32(out + 4, s1);
+ s2 =
+ (AES_Td4[(t2 >> 24) ] & 0xff000000) ^
+ (AES_Td4[(t1 >> 16) & 0xff] & 0x00ff0000) ^
+ (AES_Td4[(t0 >> 8) & 0xff] & 0x0000ff00) ^
+ (AES_Td4[(t3 ) & 0xff] & 0x000000ff) ^
+ rk[2];
+ PUTU32(out + 8, s2);
+ s3 =
+ (AES_Td4[(t3 >> 24) ] & 0xff000000) ^
+ (AES_Td4[(t2 >> 16) & 0xff] & 0x00ff0000) ^
+ (AES_Td4[(t1 >> 8) & 0xff] & 0x0000ff00) ^
+ (AES_Td4[(t0 ) & 0xff] & 0x000000ff) ^
+ rk[3];
+ PUTU32(out + 12, s3);
+}
+
+#endif /* AES_ASM */
+
+void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
+ const unsigned long length, const AES_KEY *key,
+ unsigned char *ivec, const int enc)
+{
+
+ unsigned long n;
+ unsigned long len = length;
+ unsigned char tmp[AES_BLOCK_SIZE];
+
+ assert(in && out && key && ivec);
+
+ if (enc) {
+ while (len >= AES_BLOCK_SIZE) {
+ for(n=0; n < AES_BLOCK_SIZE; ++n)
+ tmp[n] = in[n] ^ ivec[n];
+ AES_encrypt(tmp, out, key);
+ memcpy(ivec, out, AES_BLOCK_SIZE);
+ len -= AES_BLOCK_SIZE;
+ in += AES_BLOCK_SIZE;
+ out += AES_BLOCK_SIZE;
+ }
+ if (len) {
+ for(n=0; n < len; ++n)
+ tmp[n] = in[n] ^ ivec[n];
+ for(n=len; n < AES_BLOCK_SIZE; ++n)
+ tmp[n] = ivec[n];
+ AES_encrypt(tmp, tmp, key);
+ memcpy(out, tmp, AES_BLOCK_SIZE);
+ memcpy(ivec, tmp, AES_BLOCK_SIZE);
+ }
+ } else {
+ while (len >= AES_BLOCK_SIZE) {
+ memcpy(tmp, in, AES_BLOCK_SIZE);
+ AES_decrypt(in, out, key);
+ for(n=0; n < AES_BLOCK_SIZE; ++n)
+ out[n] ^= ivec[n];
+ memcpy(ivec, tmp, AES_BLOCK_SIZE);
+ len -= AES_BLOCK_SIZE;
+ in += AES_BLOCK_SIZE;
+ out += AES_BLOCK_SIZE;
+ }
+ if (len) {
+ memcpy(tmp, in, AES_BLOCK_SIZE);
+ AES_decrypt(tmp, tmp, key);
+ for(n=0; n < len; ++n)
+ out[n] = tmp[n] ^ ivec[n];
+ memcpy(ivec, tmp, AES_BLOCK_SIZE);
+ }
+ }
+}
diff --git a/contrib/qemu/util/bitmap.c b/contrib/qemu/util/bitmap.c
new file mode 100644
index 000000000..687841dce
--- /dev/null
+++ b/contrib/qemu/util/bitmap.c
@@ -0,0 +1,256 @@
+/*
+ * Bitmap Module
+ *
+ * Stolen from linux/src/lib/bitmap.c
+ *
+ * Copyright (C) 2010 Corentin Chary
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2.
+ */
+
+#include "qemu/bitops.h"
+#include "qemu/bitmap.h"
+
+/*
+ * bitmaps provide an array of bits, implemented using an an
+ * array of unsigned longs. The number of valid bits in a
+ * given bitmap does _not_ need to be an exact multiple of
+ * BITS_PER_LONG.
+ *
+ * The possible unused bits in the last, partially used word
+ * of a bitmap are 'don't care'. The implementation makes
+ * no particular effort to keep them zero. It ensures that
+ * their value will not affect the results of any operation.
+ * The bitmap operations that return Boolean (bitmap_empty,
+ * for example) or scalar (bitmap_weight, for example) results
+ * carefully filter out these unused bits from impacting their
+ * results.
+ *
+ * These operations actually hold to a slightly stronger rule:
+ * if you don't input any bitmaps to these ops that have some
+ * unused bits set, then they won't output any set unused bits
+ * in output bitmaps.
+ *
+ * The byte ordering of bitmaps is more natural on little
+ * endian architectures.
+ */
+
+int slow_bitmap_empty(const unsigned long *bitmap, int bits)
+{
+ int k, lim = bits/BITS_PER_LONG;
+
+ for (k = 0; k < lim; ++k) {
+ if (bitmap[k]) {
+ return 0;
+ }
+ }
+ if (bits % BITS_PER_LONG) {
+ if (bitmap[k] & BITMAP_LAST_WORD_MASK(bits)) {
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+int slow_bitmap_full(const unsigned long *bitmap, int bits)
+{
+ int k, lim = bits/BITS_PER_LONG;
+
+ for (k = 0; k < lim; ++k) {
+ if (~bitmap[k]) {
+ return 0;
+ }
+ }
+
+ if (bits % BITS_PER_LONG) {
+ if (~bitmap[k] & BITMAP_LAST_WORD_MASK(bits)) {
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+int slow_bitmap_equal(const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits)
+{
+ int k, lim = bits/BITS_PER_LONG;
+
+ for (k = 0; k < lim; ++k) {
+ if (bitmap1[k] != bitmap2[k]) {
+ return 0;
+ }
+ }
+
+ if (bits % BITS_PER_LONG) {
+ if ((bitmap1[k] ^ bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits)) {
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+void slow_bitmap_complement(unsigned long *dst, const unsigned long *src,
+ int bits)
+{
+ int k, lim = bits/BITS_PER_LONG;
+
+ for (k = 0; k < lim; ++k) {
+ dst[k] = ~src[k];
+ }
+
+ if (bits % BITS_PER_LONG) {
+ dst[k] = ~src[k] & BITMAP_LAST_WORD_MASK(bits);
+ }
+}
+
+int slow_bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits)
+{
+ int k;
+ int nr = BITS_TO_LONGS(bits);
+ unsigned long result = 0;
+
+ for (k = 0; k < nr; k++) {
+ result |= (dst[k] = bitmap1[k] & bitmap2[k]);
+ }
+ return result != 0;
+}
+
+void slow_bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits)
+{
+ int k;
+ int nr = BITS_TO_LONGS(bits);
+
+ for (k = 0; k < nr; k++) {
+ dst[k] = bitmap1[k] | bitmap2[k];
+ }
+}
+
+void slow_bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits)
+{
+ int k;
+ int nr = BITS_TO_LONGS(bits);
+
+ for (k = 0; k < nr; k++) {
+ dst[k] = bitmap1[k] ^ bitmap2[k];
+ }
+}
+
+int slow_bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits)
+{
+ int k;
+ int nr = BITS_TO_LONGS(bits);
+ unsigned long result = 0;
+
+ for (k = 0; k < nr; k++) {
+ result |= (dst[k] = bitmap1[k] & ~bitmap2[k]);
+ }
+ return result != 0;
+}
+
+#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) % BITS_PER_LONG))
+
+void bitmap_set(unsigned long *map, int start, int nr)
+{
+ unsigned long *p = map + BIT_WORD(start);
+ const int size = start + nr;
+ int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG);
+ unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start);
+
+ while (nr - bits_to_set >= 0) {
+ *p |= mask_to_set;
+ nr -= bits_to_set;
+ bits_to_set = BITS_PER_LONG;
+ mask_to_set = ~0UL;
+ p++;
+ }
+ if (nr) {
+ mask_to_set &= BITMAP_LAST_WORD_MASK(size);
+ *p |= mask_to_set;
+ }
+}
+
+void bitmap_clear(unsigned long *map, int start, int nr)
+{
+ unsigned long *p = map + BIT_WORD(start);
+ const int size = start + nr;
+ int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
+ unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
+
+ while (nr - bits_to_clear >= 0) {
+ *p &= ~mask_to_clear;
+ nr -= bits_to_clear;
+ bits_to_clear = BITS_PER_LONG;
+ mask_to_clear = ~0UL;
+ p++;
+ }
+ if (nr) {
+ mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
+ *p &= ~mask_to_clear;
+ }
+}
+
+#define ALIGN_MASK(x,mask) (((x)+(mask))&~(mask))
+
+/**
+ * bitmap_find_next_zero_area - find a contiguous aligned zero area
+ * @map: The address to base the search on
+ * @size: The bitmap size in bits
+ * @start: The bitnumber to start searching at
+ * @nr: The number of zeroed bits we're looking for
+ * @align_mask: Alignment mask for zero area
+ *
+ * The @align_mask should be one less than a power of 2; the effect is that
+ * the bit offset of all zero areas this function finds is multiples of that
+ * power of 2. A @align_mask of 0 means no alignment is required.
+ */
+unsigned long bitmap_find_next_zero_area(unsigned long *map,
+ unsigned long size,
+ unsigned long start,
+ unsigned int nr,
+ unsigned long align_mask)
+{
+ unsigned long index, end, i;
+again:
+ index = find_next_zero_bit(map, size, start);
+
+ /* Align allocation */
+ index = ALIGN_MASK(index, align_mask);
+
+ end = index + nr;
+ if (end > size) {
+ return end;
+ }
+ i = find_next_bit(map, end, index);
+ if (i < end) {
+ start = i + 1;
+ goto again;
+ }
+ return index;
+}
+
+int slow_bitmap_intersects(const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits)
+{
+ int k, lim = bits/BITS_PER_LONG;
+
+ for (k = 0; k < lim; ++k) {
+ if (bitmap1[k] & bitmap2[k]) {
+ return 1;
+ }
+ }
+
+ if (bits % BITS_PER_LONG) {
+ if ((bitmap1[k] & bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits)) {
+ return 1;
+ }
+ }
+ return 0;
+}
diff --git a/contrib/qemu/util/bitops.c b/contrib/qemu/util/bitops.c
new file mode 100644
index 000000000..227c38b88
--- /dev/null
+++ b/contrib/qemu/util/bitops.c
@@ -0,0 +1,158 @@
+/*
+ * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ * Copyright (C) 2008 IBM Corporation
+ * Written by Rusty Russell <rusty@rustcorp.com.au>
+ * (Inspired by David Howell's find_next_bit implementation)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include "qemu/bitops.h"
+
+#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG)
+
+/*
+ * Find the next set bit in a memory region.
+ */
+unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
+ unsigned long offset)
+{
+ const unsigned long *p = addr + BITOP_WORD(offset);
+ unsigned long result = offset & ~(BITS_PER_LONG-1);
+ unsigned long tmp;
+
+ if (offset >= size) {
+ return size;
+ }
+ size -= result;
+ offset %= BITS_PER_LONG;
+ if (offset) {
+ tmp = *(p++);
+ tmp &= (~0UL << offset);
+ if (size < BITS_PER_LONG) {
+ goto found_first;
+ }
+ if (tmp) {
+ goto found_middle;
+ }
+ size -= BITS_PER_LONG;
+ result += BITS_PER_LONG;
+ }
+ while (size >= 4*BITS_PER_LONG) {
+ unsigned long d1, d2, d3;
+ tmp = *p;
+ d1 = *(p+1);
+ d2 = *(p+2);
+ d3 = *(p+3);
+ if (tmp) {
+ goto found_middle;
+ }
+ if (d1 | d2 | d3) {
+ break;
+ }
+ p += 4;
+ result += 4*BITS_PER_LONG;
+ size -= 4*BITS_PER_LONG;
+ }
+ while (size >= BITS_PER_LONG) {
+ if ((tmp = *(p++))) {
+ goto found_middle;
+ }
+ result += BITS_PER_LONG;
+ size -= BITS_PER_LONG;
+ }
+ if (!size) {
+ return result;
+ }
+ tmp = *p;
+
+found_first:
+ tmp &= (~0UL >> (BITS_PER_LONG - size));
+ if (tmp == 0UL) { /* Are any bits set? */
+ return result + size; /* Nope. */
+ }
+found_middle:
+ return result + ctzl(tmp);
+}
+
+/*
+ * This implementation of find_{first,next}_zero_bit was stolen from
+ * Linus' asm-alpha/bitops.h.
+ */
+unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
+ unsigned long offset)
+{
+ const unsigned long *p = addr + BITOP_WORD(offset);
+ unsigned long result = offset & ~(BITS_PER_LONG-1);
+ unsigned long tmp;
+
+ if (offset >= size) {
+ return size;
+ }
+ size -= result;
+ offset %= BITS_PER_LONG;
+ if (offset) {
+ tmp = *(p++);
+ tmp |= ~0UL >> (BITS_PER_LONG - offset);
+ if (size < BITS_PER_LONG) {
+ goto found_first;
+ }
+ if (~tmp) {
+ goto found_middle;
+ }
+ size -= BITS_PER_LONG;
+ result += BITS_PER_LONG;
+ }
+ while (size & ~(BITS_PER_LONG-1)) {
+ if (~(tmp = *(p++))) {
+ goto found_middle;
+ }
+ result += BITS_PER_LONG;
+ size -= BITS_PER_LONG;
+ }
+ if (!size) {
+ return result;
+ }
+ tmp = *p;
+
+found_first:
+ tmp |= ~0UL << size;
+ if (tmp == ~0UL) { /* Are any bits zero? */
+ return result + size; /* Nope. */
+ }
+found_middle:
+ return result + ctzl(~tmp);
+}
+
+unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
+{
+ unsigned long words;
+ unsigned long tmp;
+
+ /* Start at final word. */
+ words = size / BITS_PER_LONG;
+
+ /* Partial final word? */
+ if (size & (BITS_PER_LONG-1)) {
+ tmp = (addr[words] & (~0UL >> (BITS_PER_LONG
+ - (size & (BITS_PER_LONG-1)))));
+ if (tmp) {
+ goto found;
+ }
+ }
+
+ while (words) {
+ tmp = addr[--words];
+ if (tmp) {
+ found:
+ return words * BITS_PER_LONG + BITS_PER_LONG - 1 - clzl(tmp);
+ }
+ }
+
+ /* Not found */
+ return size;
+}
diff --git a/contrib/qemu/util/cutils.c b/contrib/qemu/util/cutils.c
new file mode 100644
index 000000000..0116fcde7
--- /dev/null
+++ b/contrib/qemu/util/cutils.c
@@ -0,0 +1,532 @@
+/*
+ * Simple C functions to supplement the C library
+ *
+ * Copyright (c) 2006 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu-common.h"
+#include "qemu/host-utils.h"
+#include <math.h>
+
+#include "qemu/sockets.h"
+#include "qemu/iov.h"
+
+void strpadcpy(char *buf, int buf_size, const char *str, char pad)
+{
+ int len = qemu_strnlen(str, buf_size);
+ memcpy(buf, str, len);
+ memset(buf + len, pad, buf_size - len);
+}
+
+void pstrcpy(char *buf, int buf_size, const char *str)
+{
+ int c;
+ char *q = buf;
+
+ if (buf_size <= 0)
+ return;
+
+ for(;;) {
+ c = *str++;
+ if (c == 0 || q >= buf + buf_size - 1)
+ break;
+ *q++ = c;
+ }
+ *q = '\0';
+}
+
+/* strcat and truncate. */
+char *pstrcat(char *buf, int buf_size, const char *s)
+{
+ int len;
+ len = strlen(buf);
+ if (len < buf_size)
+ pstrcpy(buf + len, buf_size - len, s);
+ return buf;
+}
+
+int strstart(const char *str, const char *val, const char **ptr)
+{
+ const char *p, *q;
+ p = str;
+ q = val;
+ while (*q != '\0') {
+ if (*p != *q)
+ return 0;
+ p++;
+ q++;
+ }
+ if (ptr)
+ *ptr = p;
+ return 1;
+}
+
+int stristart(const char *str, const char *val, const char **ptr)
+{
+ const char *p, *q;
+ p = str;
+ q = val;
+ while (*q != '\0') {
+ if (qemu_toupper(*p) != qemu_toupper(*q))
+ return 0;
+ p++;
+ q++;
+ }
+ if (ptr)
+ *ptr = p;
+ return 1;
+}
+
+/* XXX: use host strnlen if available ? */
+int qemu_strnlen(const char *s, int max_len)
+{
+ int i;
+
+ for(i = 0; i < max_len; i++) {
+ if (s[i] == '\0') {
+ break;
+ }
+ }
+ return i;
+}
+
+char *qemu_strsep(char **input, const char *delim)
+{
+ char *result = *input;
+ if (result != NULL) {
+ char *p;
+
+ for (p = result; *p != '\0'; p++) {
+ if (strchr(delim, *p)) {
+ break;
+ }
+ }
+ if (*p == '\0') {
+ *input = NULL;
+ } else {
+ *p = '\0';
+ *input = p + 1;
+ }
+ }
+ return result;
+}
+
+time_t mktimegm(struct tm *tm)
+{
+ time_t t;
+ int y = tm->tm_year + 1900, m = tm->tm_mon + 1, d = tm->tm_mday;
+ if (m < 3) {
+ m += 12;
+ y--;
+ }
+ t = 86400ULL * (d + (153 * m - 457) / 5 + 365 * y + y / 4 - y / 100 +
+ y / 400 - 719469);
+ t += 3600 * tm->tm_hour + 60 * tm->tm_min + tm->tm_sec;
+ return t;
+}
+
+int qemu_fls(int i)
+{
+ return 32 - clz32(i);
+}
+
+/*
+ * Make sure data goes on disk, but if possible do not bother to
+ * write out the inode just for timestamp updates.
+ *
+ * Unfortunately even in 2009 many operating systems do not support
+ * fdatasync and have to fall back to fsync.
+ */
+int qemu_fdatasync(int fd)
+{
+#ifdef CONFIG_FDATASYNC
+ return fdatasync(fd);
+#else
+ return fsync(fd);
+#endif
+}
+
+/*
+ * Searches for an area with non-zero content in a buffer
+ *
+ * Attention! The len must be a multiple of
+ * BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR * sizeof(VECTYPE)
+ * and addr must be a multiple of sizeof(VECTYPE) due to
+ * restriction of optimizations in this function.
+ *
+ * can_use_buffer_find_nonzero_offset() can be used to check
+ * these requirements.
+ *
+ * The return value is the offset of the non-zero area rounded
+ * down to a multiple of sizeof(VECTYPE) for the first
+ * BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR chunks and down to
+ * BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR * sizeof(VECTYPE)
+ * afterwards.
+ *
+ * If the buffer is all zero the return value is equal to len.
+ */
+
+size_t buffer_find_nonzero_offset(const void *buf, size_t len)
+{
+ const VECTYPE *p = buf;
+ const VECTYPE zero = (VECTYPE){0};
+ size_t i;
+
+ assert(can_use_buffer_find_nonzero_offset(buf, len));
+
+ if (!len) {
+ return 0;
+ }
+
+ for (i = 0; i < BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR; i++) {
+ if (!ALL_EQ(p[i], zero)) {
+ return i * sizeof(VECTYPE);
+ }
+ }
+
+ for (i = BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR;
+ i < len / sizeof(VECTYPE);
+ i += BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR) {
+ VECTYPE tmp0 = p[i + 0] | p[i + 1];
+ VECTYPE tmp1 = p[i + 2] | p[i + 3];
+ VECTYPE tmp2 = p[i + 4] | p[i + 5];
+ VECTYPE tmp3 = p[i + 6] | p[i + 7];
+ VECTYPE tmp01 = tmp0 | tmp1;
+ VECTYPE tmp23 = tmp2 | tmp3;
+ if (!ALL_EQ(tmp01 | tmp23, zero)) {
+ break;
+ }
+ }
+
+ return i * sizeof(VECTYPE);
+}
+
+/*
+ * Checks if a buffer is all zeroes
+ *
+ * Attention! The len must be a multiple of 4 * sizeof(long) due to
+ * restriction of optimizations in this function.
+ */
+bool buffer_is_zero(const void *buf, size_t len)
+{
+ /*
+ * Use long as the biggest available internal data type that fits into the
+ * CPU register and unroll the loop to smooth out the effect of memory
+ * latency.
+ */
+
+ size_t i;
+ long d0, d1, d2, d3;
+ const long * const data = buf;
+
+ /* use vector optimized zero check if possible */
+ if (can_use_buffer_find_nonzero_offset(buf, len)) {
+ return buffer_find_nonzero_offset(buf, len) == len;
+ }
+
+ assert(len % (4 * sizeof(long)) == 0);
+ len /= sizeof(long);
+
+ for (i = 0; i < len; i += 4) {
+ d0 = data[i + 0];
+ d1 = data[i + 1];
+ d2 = data[i + 2];
+ d3 = data[i + 3];
+
+ if (d0 || d1 || d2 || d3) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+#ifndef _WIN32
+/* Sets a specific flag */
+int fcntl_setfl(int fd, int flag)
+{
+ int flags;
+
+ flags = fcntl(fd, F_GETFL);
+ if (flags == -1)
+ return -errno;
+
+ if (fcntl(fd, F_SETFL, flags | flag) == -1)
+ return -errno;
+
+ return 0;
+}
+#endif
+
+static int64_t suffix_mul(char suffix, int64_t unit)
+{
+ switch (qemu_toupper(suffix)) {
+ case STRTOSZ_DEFSUFFIX_B:
+ return 1;
+ case STRTOSZ_DEFSUFFIX_KB:
+ return unit;
+ case STRTOSZ_DEFSUFFIX_MB:
+ return unit * unit;
+ case STRTOSZ_DEFSUFFIX_GB:
+ return unit * unit * unit;
+ case STRTOSZ_DEFSUFFIX_TB:
+ return unit * unit * unit * unit;
+ case STRTOSZ_DEFSUFFIX_PB:
+ return unit * unit * unit * unit * unit;
+ case STRTOSZ_DEFSUFFIX_EB:
+ return unit * unit * unit * unit * unit * unit;
+ }
+ return -1;
+}
+
+/*
+ * Convert string to bytes, allowing either B/b for bytes, K/k for KB,
+ * M/m for MB, G/g for GB or T/t for TB. End pointer will be returned
+ * in *end, if not NULL. Return -ERANGE on overflow, Return -EINVAL on
+ * other error.
+ */
+int64_t strtosz_suffix_unit(const char *nptr, char **end,
+ const char default_suffix, int64_t unit)
+{
+ int64_t retval = -EINVAL;
+ char *endptr;
+ unsigned char c;
+ int mul_required = 0;
+ double val, mul, integral, fraction;
+
+ errno = 0;
+ val = strtod(nptr, &endptr);
+ if (isnan(val) || endptr == nptr || errno != 0) {
+ goto fail;
+ }
+ fraction = modf(val, &integral);
+ if (fraction != 0) {
+ mul_required = 1;
+ }
+ c = *endptr;
+ mul = suffix_mul(c, unit);
+ if (mul >= 0) {
+ endptr++;
+ } else {
+ mul = suffix_mul(default_suffix, unit);
+ assert(mul >= 0);
+ }
+ if (mul == 1 && mul_required) {
+ goto fail;
+ }
+ if ((val * mul >= INT64_MAX) || val < 0) {
+ retval = -ERANGE;
+ goto fail;
+ }
+ retval = val * mul;
+
+fail:
+ if (end) {
+ *end = endptr;
+ }
+
+ return retval;
+}
+
+int64_t strtosz_suffix(const char *nptr, char **end, const char default_suffix)
+{
+ return strtosz_suffix_unit(nptr, end, default_suffix, 1024);
+}
+
+int64_t strtosz(const char *nptr, char **end)
+{
+ return strtosz_suffix(nptr, end, STRTOSZ_DEFSUFFIX_MB);
+}
+
+/**
+ * parse_uint:
+ *
+ * @s: String to parse
+ * @value: Destination for parsed integer value
+ * @endptr: Destination for pointer to first character not consumed
+ * @base: integer base, between 2 and 36 inclusive, or 0
+ *
+ * Parse unsigned integer
+ *
+ * Parsed syntax is like strtoull()'s: arbitrary whitespace, a single optional
+ * '+' or '-', an optional "0x" if @base is 0 or 16, one or more digits.
+ *
+ * If @s is null, or @base is invalid, or @s doesn't start with an
+ * integer in the syntax above, set *@value to 0, *@endptr to @s, and
+ * return -EINVAL.
+ *
+ * Set *@endptr to point right beyond the parsed integer (even if the integer
+ * overflows or is negative, all digits will be parsed and *@endptr will
+ * point right beyond them).
+ *
+ * If the integer is negative, set *@value to 0, and return -ERANGE.
+ *
+ * If the integer overflows unsigned long long, set *@value to
+ * ULLONG_MAX, and return -ERANGE.
+ *
+ * Else, set *@value to the parsed integer, and return 0.
+ */
+int parse_uint(const char *s, unsigned long long *value, char **endptr,
+ int base)
+{
+ int r = 0;
+ char *endp = (char *)s;
+ unsigned long long val = 0;
+
+ if (!s) {
+ r = -EINVAL;
+ goto out;
+ }
+
+ errno = 0;
+ val = strtoull(s, &endp, base);
+ if (errno) {
+ r = -errno;
+ goto out;
+ }
+
+ if (endp == s) {
+ r = -EINVAL;
+ goto out;
+ }
+
+ /* make sure we reject negative numbers: */
+ while (isspace((unsigned char)*s)) {
+ s++;
+ }
+ if (*s == '-') {
+ val = 0;
+ r = -ERANGE;
+ goto out;
+ }
+
+out:
+ *value = val;
+ *endptr = endp;
+ return r;
+}
+
+/**
+ * parse_uint_full:
+ *
+ * @s: String to parse
+ * @value: Destination for parsed integer value
+ * @base: integer base, between 2 and 36 inclusive, or 0
+ *
+ * Parse unsigned integer from entire string
+ *
+ * Have the same behavior of parse_uint(), but with an additional check
+ * for additional data after the parsed number. If extra characters are present
+ * after the parsed number, the function will return -EINVAL, and *@v will
+ * be set to 0.
+ */
+int parse_uint_full(const char *s, unsigned long long *value, int base)
+{
+ char *endp;
+ int r;
+
+ r = parse_uint(s, value, &endp, base);
+ if (r < 0) {
+ return r;
+ }
+ if (*endp) {
+ *value = 0;
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int qemu_parse_fd(const char *param)
+{
+ int fd;
+ char *endptr = NULL;
+
+ fd = strtol(param, &endptr, 10);
+ if (*endptr || (fd == 0 && param == endptr)) {
+ return -1;
+ }
+ return fd;
+}
+
+/* round down to the nearest power of 2*/
+int64_t pow2floor(int64_t value)
+{
+ if (!is_power_of_2(value)) {
+ value = 0x8000000000000000ULL >> clz64(value);
+ }
+ return value;
+}
+
+/*
+ * Implementation of ULEB128 (http://en.wikipedia.org/wiki/LEB128)
+ * Input is limited to 14-bit numbers
+ */
+int uleb128_encode_small(uint8_t *out, uint32_t n)
+{
+ g_assert(n <= 0x3fff);
+ if (n < 0x80) {
+ *out++ = n;
+ return 1;
+ } else {
+ *out++ = (n & 0x7f) | 0x80;
+ *out++ = n >> 7;
+ return 2;
+ }
+}
+
+int uleb128_decode_small(const uint8_t *in, uint32_t *n)
+{
+ if (!(*in & 0x80)) {
+ *n = *in++;
+ return 1;
+ } else {
+ *n = *in++ & 0x7f;
+ /* we exceed 14 bit number */
+ if (*in & 0x80) {
+ return -1;
+ }
+ *n |= *in++ << 7;
+ return 2;
+ }
+}
+
+/*
+ * helper to parse debug environment variables
+ */
+int parse_debug_env(const char *name, int max, int initial)
+{
+ char *debug_env = getenv(name);
+ char *inv = NULL;
+ int debug;
+
+ if (!debug_env) {
+ return initial;
+ }
+ debug = strtol(debug_env, &inv, 10);
+ if (inv == debug_env) {
+ return initial;
+ }
+ if (debug < 0 || debug > max) {
+ fprintf(stderr, "warning: %s not in [0, %d]", name, max);
+ return initial;
+ }
+ return debug;
+}
diff --git a/contrib/qemu/util/error.c b/contrib/qemu/util/error.c
new file mode 100644
index 000000000..53b04354a
--- /dev/null
+++ b/contrib/qemu/util/error.c
@@ -0,0 +1,120 @@
+/*
+ * QEMU Error Objects
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2. See
+ * the COPYING.LIB file in the top-level directory.
+ */
+
+#include "qemu-common.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qjson.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi-types.h"
+#include "qapi/qmp/qerror.h"
+
+struct Error
+{
+ char *msg;
+ ErrorClass err_class;
+};
+
+void error_set(Error **errp, ErrorClass err_class, const char *fmt, ...)
+{
+ Error *err;
+ va_list ap;
+
+ if (errp == NULL) {
+ return;
+ }
+ assert(*errp == NULL);
+
+ err = g_malloc0(sizeof(*err));
+
+ va_start(ap, fmt);
+ err->msg = g_strdup_vprintf(fmt, ap);
+ va_end(ap);
+ err->err_class = err_class;
+
+ *errp = err;
+}
+
+void error_set_errno(Error **errp, int os_errno, ErrorClass err_class,
+ const char *fmt, ...)
+{
+ Error *err;
+ char *msg1;
+ va_list ap;
+
+ if (errp == NULL) {
+ return;
+ }
+ assert(*errp == NULL);
+
+ err = g_malloc0(sizeof(*err));
+
+ va_start(ap, fmt);
+ msg1 = g_strdup_vprintf(fmt, ap);
+ if (os_errno != 0) {
+ err->msg = g_strdup_printf("%s: %s", msg1, strerror(os_errno));
+ g_free(msg1);
+ } else {
+ err->msg = msg1;
+ }
+ va_end(ap);
+ err->err_class = err_class;
+
+ *errp = err;
+}
+
+void error_setg_file_open(Error **errp, int os_errno, const char *filename)
+{
+ error_setg_errno(errp, os_errno, "Could not open '%s'", filename);
+}
+
+Error *error_copy(const Error *err)
+{
+ Error *err_new;
+
+ err_new = g_malloc0(sizeof(*err));
+ err_new->msg = g_strdup(err->msg);
+ err_new->err_class = err->err_class;
+
+ return err_new;
+}
+
+bool error_is_set(Error **errp)
+{
+ return (errp && *errp);
+}
+
+ErrorClass error_get_class(const Error *err)
+{
+ return err->err_class;
+}
+
+const char *error_get_pretty(Error *err)
+{
+ return err->msg;
+}
+
+void error_free(Error *err)
+{
+ if (err) {
+ g_free(err->msg);
+ g_free(err);
+ }
+}
+
+void error_propagate(Error **dst_err, Error *local_err)
+{
+ if (dst_err && !*dst_err) {
+ *dst_err = local_err;
+ } else if (local_err) {
+ error_free(local_err);
+ }
+}
diff --git a/contrib/qemu/util/hbitmap.c b/contrib/qemu/util/hbitmap.c
new file mode 100644
index 000000000..d93683128
--- /dev/null
+++ b/contrib/qemu/util/hbitmap.c
@@ -0,0 +1,402 @@
+/*
+ * Hierarchical Bitmap Data Type
+ *
+ * Copyright Red Hat, Inc., 2012
+ *
+ * Author: Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later. See the COPYING file in the top-level directory.
+ */
+
+#include <string.h>
+#include <glib.h>
+#include <assert.h>
+#include "qemu/osdep.h"
+#include "qemu/hbitmap.h"
+#include "qemu/host-utils.h"
+#include "trace.h"
+
+/* HBitmaps provides an array of bits. The bits are stored as usual in an
+ * array of unsigned longs, but HBitmap is also optimized to provide fast
+ * iteration over set bits; going from one bit to the next is O(logB n)
+ * worst case, with B = sizeof(long) * CHAR_BIT: the result is low enough
+ * that the number of levels is in fact fixed.
+ *
+ * In order to do this, it stacks multiple bitmaps with progressively coarser
+ * granularity; in all levels except the last, bit N is set iff the N-th
+ * unsigned long is nonzero in the immediately next level. When iteration
+ * completes on the last level it can examine the 2nd-last level to quickly
+ * skip entire words, and even do so recursively to skip blocks of 64 words or
+ * powers thereof (32 on 32-bit machines).
+ *
+ * Given an index in the bitmap, it can be split in group of bits like
+ * this (for the 64-bit case):
+ *
+ * bits 0-57 => word in the last bitmap | bits 58-63 => bit in the word
+ * bits 0-51 => word in the 2nd-last bitmap | bits 52-57 => bit in the word
+ * bits 0-45 => word in the 3rd-last bitmap | bits 46-51 => bit in the word
+ *
+ * So it is easy to move up simply by shifting the index right by
+ * log2(BITS_PER_LONG) bits. To move down, you shift the index left
+ * similarly, and add the word index within the group. Iteration uses
+ * ffs (find first set bit) to find the next word to examine; this
+ * operation can be done in constant time in most current architectures.
+ *
+ * Setting or clearing a range of m bits on all levels, the work to perform
+ * is O(m + m/W + m/W^2 + ...), which is O(m) like on a regular bitmap.
+ *
+ * When iterating on a bitmap, each bit (on any level) is only visited
+ * once. Hence, The total cost of visiting a bitmap with m bits in it is
+ * the number of bits that are set in all bitmaps. Unless the bitmap is
+ * extremely sparse, this is also O(m + m/W + m/W^2 + ...), so the amortized
+ * cost of advancing from one bit to the next is usually constant (worst case
+ * O(logB n) as in the non-amortized complexity).
+ */
+
+struct HBitmap {
+ /* Number of total bits in the bottom level. */
+ uint64_t size;
+
+ /* Number of set bits in the bottom level. */
+ uint64_t count;
+
+ /* A scaling factor. Given a granularity of G, each bit in the bitmap will
+ * will actually represent a group of 2^G elements. Each operation on a
+ * range of bits first rounds the bits to determine which group they land
+ * in, and then affect the entire page; iteration will only visit the first
+ * bit of each group. Here is an example of operations in a size-16,
+ * granularity-1 HBitmap:
+ *
+ * initial state 00000000
+ * set(start=0, count=9) 11111000 (iter: 0, 2, 4, 6, 8)
+ * reset(start=1, count=3) 00111000 (iter: 4, 6, 8)
+ * set(start=9, count=2) 00111100 (iter: 4, 6, 8, 10)
+ * reset(start=5, count=5) 00000000
+ *
+ * From an implementation point of view, when setting or resetting bits,
+ * the bitmap will scale bit numbers right by this amount of bits. When
+ * iterating, the bitmap will scale bit numbers left by this amount of
+ * bits.
+ */
+ int granularity;
+
+ /* A number of progressively less coarse bitmaps (i.e. level 0 is the
+ * coarsest). Each bit in level N represents a word in level N+1 that
+ * has a set bit, except the last level where each bit represents the
+ * actual bitmap.
+ *
+ * Note that all bitmaps have the same number of levels. Even a 1-bit
+ * bitmap will still allocate HBITMAP_LEVELS arrays.
+ */
+ unsigned long *levels[HBITMAP_LEVELS];
+};
+
+static inline int popcountl(unsigned long l)
+{
+ return BITS_PER_LONG == 32 ? ctpop32(l) : ctpop64(l);
+}
+
+/* Advance hbi to the next nonzero word and return it. hbi->pos
+ * is updated. Returns zero if we reach the end of the bitmap.
+ */
+unsigned long hbitmap_iter_skip_words(HBitmapIter *hbi)
+{
+ size_t pos = hbi->pos;
+ const HBitmap *hb = hbi->hb;
+ unsigned i = HBITMAP_LEVELS - 1;
+
+ unsigned long cur;
+ do {
+ cur = hbi->cur[--i];
+ pos >>= BITS_PER_LEVEL;
+ } while (cur == 0);
+
+ /* Check for end of iteration. We always use fewer than BITS_PER_LONG
+ * bits in the level 0 bitmap; thus we can repurpose the most significant
+ * bit as a sentinel. The sentinel is set in hbitmap_alloc and ensures
+ * that the above loop ends even without an explicit check on i.
+ */
+
+ if (i == 0 && cur == (1UL << (BITS_PER_LONG - 1))) {
+ return 0;
+ }
+ for (; i < HBITMAP_LEVELS - 1; i++) {
+ /* Shift back pos to the left, matching the right shifts above.
+ * The index of this word's least significant set bit provides
+ * the low-order bits.
+ */
+ assert(cur);
+ pos = (pos << BITS_PER_LEVEL) + ctzl(cur);
+ hbi->cur[i] = cur & (cur - 1);
+
+ /* Set up next level for iteration. */
+ cur = hb->levels[i + 1][pos];
+ }
+
+ hbi->pos = pos;
+ trace_hbitmap_iter_skip_words(hbi->hb, hbi, pos, cur);
+
+ assert(cur);
+ return cur;
+}
+
+void hbitmap_iter_init(HBitmapIter *hbi, const HBitmap *hb, uint64_t first)
+{
+ unsigned i, bit;
+ uint64_t pos;
+
+ hbi->hb = hb;
+ pos = first >> hb->granularity;
+ assert(pos < hb->size);
+ hbi->pos = pos >> BITS_PER_LEVEL;
+ hbi->granularity = hb->granularity;
+
+ for (i = HBITMAP_LEVELS; i-- > 0; ) {
+ bit = pos & (BITS_PER_LONG - 1);
+ pos >>= BITS_PER_LEVEL;
+
+ /* Drop bits representing items before first. */
+ hbi->cur[i] = hb->levels[i][pos] & ~((1UL << bit) - 1);
+
+ /* We have already added level i+1, so the lowest set bit has
+ * been processed. Clear it.
+ */
+ if (i != HBITMAP_LEVELS - 1) {
+ hbi->cur[i] &= ~(1UL << bit);
+ }
+ }
+}
+
+bool hbitmap_empty(const HBitmap *hb)
+{
+ return hb->count == 0;
+}
+
+int hbitmap_granularity(const HBitmap *hb)
+{
+ return hb->granularity;
+}
+
+uint64_t hbitmap_count(const HBitmap *hb)
+{
+ return hb->count << hb->granularity;
+}
+
+/* Count the number of set bits between start and end, not accounting for
+ * the granularity. Also an example of how to use hbitmap_iter_next_word.
+ */
+static uint64_t hb_count_between(HBitmap *hb, uint64_t start, uint64_t last)
+{
+ HBitmapIter hbi;
+ uint64_t count = 0;
+ uint64_t end = last + 1;
+ unsigned long cur;
+ size_t pos;
+
+ hbitmap_iter_init(&hbi, hb, start << hb->granularity);
+ for (;;) {
+ pos = hbitmap_iter_next_word(&hbi, &cur);
+ if (pos >= (end >> BITS_PER_LEVEL)) {
+ break;
+ }
+ count += popcountl(cur);
+ }
+
+ if (pos == (end >> BITS_PER_LEVEL)) {
+ /* Drop bits representing the END-th and subsequent items. */
+ int bit = end & (BITS_PER_LONG - 1);
+ cur &= (1UL << bit) - 1;
+ count += popcountl(cur);
+ }
+
+ return count;
+}
+
+/* Setting starts at the last layer and propagates up if an element
+ * changes from zero to non-zero.
+ */
+static inline bool hb_set_elem(unsigned long *elem, uint64_t start, uint64_t last)
+{
+ unsigned long mask;
+ bool changed;
+
+ assert((last >> BITS_PER_LEVEL) == (start >> BITS_PER_LEVEL));
+ assert(start <= last);
+
+ mask = 2UL << (last & (BITS_PER_LONG - 1));
+ mask -= 1UL << (start & (BITS_PER_LONG - 1));
+ changed = (*elem == 0);
+ *elem |= mask;
+ return changed;
+}
+
+/* The recursive workhorse (the depth is limited to HBITMAP_LEVELS)... */
+static void hb_set_between(HBitmap *hb, int level, uint64_t start, uint64_t last)
+{
+ size_t pos = start >> BITS_PER_LEVEL;
+ size_t lastpos = last >> BITS_PER_LEVEL;
+ bool changed = false;
+ size_t i;
+
+ i = pos;
+ if (i < lastpos) {
+ uint64_t next = (start | (BITS_PER_LONG - 1)) + 1;
+ changed |= hb_set_elem(&hb->levels[level][i], start, next - 1);
+ for (;;) {
+ start = next;
+ next += BITS_PER_LONG;
+ if (++i == lastpos) {
+ break;
+ }
+ changed |= (hb->levels[level][i] == 0);
+ hb->levels[level][i] = ~0UL;
+ }
+ }
+ changed |= hb_set_elem(&hb->levels[level][i], start, last);
+
+ /* If there was any change in this layer, we may have to update
+ * the one above.
+ */
+ if (level > 0 && changed) {
+ hb_set_between(hb, level - 1, pos, lastpos);
+ }
+}
+
+void hbitmap_set(HBitmap *hb, uint64_t start, uint64_t count)
+{
+ /* Compute range in the last layer. */
+ uint64_t last = start + count - 1;
+
+ trace_hbitmap_set(hb, start, count,
+ start >> hb->granularity, last >> hb->granularity);
+
+ start >>= hb->granularity;
+ last >>= hb->granularity;
+ count = last - start + 1;
+
+ hb->count += count - hb_count_between(hb, start, last);
+ hb_set_between(hb, HBITMAP_LEVELS - 1, start, last);
+}
+
+/* Resetting works the other way round: propagate up if the new
+ * value is zero.
+ */
+static inline bool hb_reset_elem(unsigned long *elem, uint64_t start, uint64_t last)
+{
+ unsigned long mask;
+ bool blanked;
+
+ assert((last >> BITS_PER_LEVEL) == (start >> BITS_PER_LEVEL));
+ assert(start <= last);
+
+ mask = 2UL << (last & (BITS_PER_LONG - 1));
+ mask -= 1UL << (start & (BITS_PER_LONG - 1));
+ blanked = *elem != 0 && ((*elem & ~mask) == 0);
+ *elem &= ~mask;
+ return blanked;
+}
+
+/* The recursive workhorse (the depth is limited to HBITMAP_LEVELS)... */
+static void hb_reset_between(HBitmap *hb, int level, uint64_t start, uint64_t last)
+{
+ size_t pos = start >> BITS_PER_LEVEL;
+ size_t lastpos = last >> BITS_PER_LEVEL;
+ bool changed = false;
+ size_t i;
+
+ i = pos;
+ if (i < lastpos) {
+ uint64_t next = (start | (BITS_PER_LONG - 1)) + 1;
+
+ /* Here we need a more complex test than when setting bits. Even if
+ * something was changed, we must not blank bits in the upper level
+ * unless the lower-level word became entirely zero. So, remove pos
+ * from the upper-level range if bits remain set.
+ */
+ if (hb_reset_elem(&hb->levels[level][i], start, next - 1)) {
+ changed = true;
+ } else {
+ pos++;
+ }
+
+ for (;;) {
+ start = next;
+ next += BITS_PER_LONG;
+ if (++i == lastpos) {
+ break;
+ }
+ changed |= (hb->levels[level][i] != 0);
+ hb->levels[level][i] = 0UL;
+ }
+ }
+
+ /* Same as above, this time for lastpos. */
+ if (hb_reset_elem(&hb->levels[level][i], start, last)) {
+ changed = true;
+ } else {
+ lastpos--;
+ }
+
+ if (level > 0 && changed) {
+ hb_reset_between(hb, level - 1, pos, lastpos);
+ }
+}
+
+void hbitmap_reset(HBitmap *hb, uint64_t start, uint64_t count)
+{
+ /* Compute range in the last layer. */
+ uint64_t last = start + count - 1;
+
+ trace_hbitmap_reset(hb, start, count,
+ start >> hb->granularity, last >> hb->granularity);
+
+ start >>= hb->granularity;
+ last >>= hb->granularity;
+
+ hb->count -= hb_count_between(hb, start, last);
+ hb_reset_between(hb, HBITMAP_LEVELS - 1, start, last);
+}
+
+bool hbitmap_get(const HBitmap *hb, uint64_t item)
+{
+ /* Compute position and bit in the last layer. */
+ uint64_t pos = item >> hb->granularity;
+ unsigned long bit = 1UL << (pos & (BITS_PER_LONG - 1));
+
+ return (hb->levels[HBITMAP_LEVELS - 1][pos >> BITS_PER_LEVEL] & bit) != 0;
+}
+
+void hbitmap_free(HBitmap *hb)
+{
+ unsigned i;
+ for (i = HBITMAP_LEVELS; i-- > 0; ) {
+ g_free(hb->levels[i]);
+ }
+ g_free(hb);
+}
+
+HBitmap *hbitmap_alloc(uint64_t size, int granularity)
+{
+ HBitmap *hb = g_malloc0(sizeof (struct HBitmap));
+ unsigned i;
+
+ assert(granularity >= 0 && granularity < 64);
+ size = (size + (1ULL << granularity) - 1) >> granularity;
+ assert(size <= ((uint64_t)1 << HBITMAP_LOG_MAX_SIZE));
+
+ hb->size = size;
+ hb->granularity = granularity;
+ for (i = HBITMAP_LEVELS; i-- > 0; ) {
+ size = MAX((size + BITS_PER_LONG - 1) >> BITS_PER_LEVEL, 1);
+ hb->levels[i] = g_malloc0(size * sizeof(unsigned long));
+ }
+
+ /* We necessarily have free bits in level 0 due to the definition
+ * of HBITMAP_LEVELS, so use one for a sentinel. This speeds up
+ * hbitmap_iter_skip_words.
+ */
+ assert(size == 1);
+ hb->levels[0][0] |= 1UL << (BITS_PER_LONG - 1);
+ return hb;
+}
diff --git a/contrib/qemu/util/hexdump.c b/contrib/qemu/util/hexdump.c
new file mode 100644
index 000000000..969b3406c
--- /dev/null
+++ b/contrib/qemu/util/hexdump.c
@@ -0,0 +1,37 @@
+/*
+ * Helper to hexdump a buffer
+ *
+ * Copyright (c) 2013 Red Hat, Inc.
+ * Copyright (c) 2013 Gerd Hoffmann <kraxel@redhat.com>
+ * Copyright (c) 2013 Peter Crosthwaite <peter.crosthwaite@xilinx.com>
+ * Copyright (c) 2013 Xilinx, Inc
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "qemu-common.h"
+
+void qemu_hexdump(const char *buf, FILE *fp, const char *prefix, size_t size)
+{
+ unsigned int b;
+
+ for (b = 0; b < size; b++) {
+ if ((b % 16) == 0) {
+ fprintf(fp, "%s: %04x:", prefix, b);
+ }
+ if ((b % 4) == 0) {
+ fprintf(fp, " ");
+ }
+ fprintf(fp, " %02x", (unsigned char)buf[b]);
+ if ((b % 16) == 15) {
+ fprintf(fp, "\n");
+ }
+ }
+ if ((b % 16) != 0) {
+ fprintf(fp, "\n");
+ }
+}
diff --git a/contrib/qemu/util/iov.c b/contrib/qemu/util/iov.c
new file mode 100644
index 000000000..cc6e837c8
--- /dev/null
+++ b/contrib/qemu/util/iov.c
@@ -0,0 +1,426 @@
+/*
+ * Helpers for getting linearized buffers from iov / filling buffers into iovs
+ *
+ * Copyright IBM, Corp. 2007, 2008
+ * Copyright (C) 2010 Red Hat, Inc.
+ *
+ * Author(s):
+ * Anthony Liguori <aliguori@us.ibm.com>
+ * Amit Shah <amit.shah@redhat.com>
+ * Michael Tokarev <mjt@tls.msk.ru>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "qemu/iov.h"
+
+#ifdef _WIN32
+# include <windows.h>
+# include <winsock2.h>
+#else
+# include <sys/types.h>
+# include <sys/socket.h>
+#endif
+
+size_t iov_from_buf(const struct iovec *iov, unsigned int iov_cnt,
+ size_t offset, const void *buf, size_t bytes)
+{
+ size_t done;
+ unsigned int i;
+ for (i = 0, done = 0; (offset || done < bytes) && i < iov_cnt; i++) {
+ if (offset < iov[i].iov_len) {
+ size_t len = MIN(iov[i].iov_len - offset, bytes - done);
+ memcpy(iov[i].iov_base + offset, buf + done, len);
+ done += len;
+ offset = 0;
+ } else {
+ offset -= iov[i].iov_len;
+ }
+ }
+ assert(offset == 0);
+ return done;
+}
+
+size_t iov_to_buf(const struct iovec *iov, const unsigned int iov_cnt,
+ size_t offset, void *buf, size_t bytes)
+{
+ size_t done;
+ unsigned int i;
+ for (i = 0, done = 0; (offset || done < bytes) && i < iov_cnt; i++) {
+ if (offset < iov[i].iov_len) {
+ size_t len = MIN(iov[i].iov_len - offset, bytes - done);
+ memcpy(buf + done, iov[i].iov_base + offset, len);
+ done += len;
+ offset = 0;
+ } else {
+ offset -= iov[i].iov_len;
+ }
+ }
+ assert(offset == 0);
+ return done;
+}
+
+size_t iov_memset(const struct iovec *iov, const unsigned int iov_cnt,
+ size_t offset, int fillc, size_t bytes)
+{
+ size_t done;
+ unsigned int i;
+ for (i = 0, done = 0; (offset || done < bytes) && i < iov_cnt; i++) {
+ if (offset < iov[i].iov_len) {
+ size_t len = MIN(iov[i].iov_len - offset, bytes - done);
+ memset(iov[i].iov_base + offset, fillc, len);
+ done += len;
+ offset = 0;
+ } else {
+ offset -= iov[i].iov_len;
+ }
+ }
+ assert(offset == 0);
+ return done;
+}
+
+size_t iov_size(const struct iovec *iov, const unsigned int iov_cnt)
+{
+ size_t len;
+ unsigned int i;
+
+ len = 0;
+ for (i = 0; i < iov_cnt; i++) {
+ len += iov[i].iov_len;
+ }
+ return len;
+}
+
+/* helper function for iov_send_recv() */
+static ssize_t
+do_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt, bool do_send)
+{
+#ifdef CONFIG_POSIX
+ ssize_t ret;
+ struct msghdr msg;
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_iov = iov;
+ msg.msg_iovlen = iov_cnt;
+ do {
+ ret = do_send
+ ? sendmsg(sockfd, &msg, 0)
+ : recvmsg(sockfd, &msg, 0);
+ } while (ret < 0 && errno == EINTR);
+ return ret;
+#else
+ /* else send piece-by-piece */
+ /*XXX Note: windows has WSASend() and WSARecv() */
+ unsigned i = 0;
+ ssize_t ret = 0;
+ while (i < iov_cnt) {
+ ssize_t r = do_send
+ ? send(sockfd, iov[i].iov_base, iov[i].iov_len, 0)
+ : recv(sockfd, iov[i].iov_base, iov[i].iov_len, 0);
+ if (r > 0) {
+ ret += r;
+ } else if (!r) {
+ break;
+ } else if (errno == EINTR) {
+ continue;
+ } else {
+ /* else it is some "other" error,
+ * only return if there was no data processed. */
+ if (ret == 0) {
+ ret = -1;
+ }
+ break;
+ }
+ i++;
+ }
+ return ret;
+#endif
+}
+
+ssize_t iov_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt,
+ size_t offset, size_t bytes,
+ bool do_send)
+{
+ ssize_t total = 0;
+ ssize_t ret;
+ size_t orig_len, tail;
+ unsigned niov;
+
+ while (bytes > 0) {
+ /* Find the start position, skipping `offset' bytes:
+ * first, skip all full-sized vector elements, */
+ for (niov = 0; niov < iov_cnt && offset >= iov[niov].iov_len; ++niov) {
+ offset -= iov[niov].iov_len;
+ }
+
+ /* niov == iov_cnt would only be valid if bytes == 0, which
+ * we already ruled out in the loop condition. */
+ assert(niov < iov_cnt);
+ iov += niov;
+ iov_cnt -= niov;
+
+ if (offset) {
+ /* second, skip `offset' bytes from the (now) first element,
+ * undo it on exit */
+ iov[0].iov_base += offset;
+ iov[0].iov_len -= offset;
+ }
+ /* Find the end position skipping `bytes' bytes: */
+ /* first, skip all full-sized elements */
+ tail = bytes;
+ for (niov = 0; niov < iov_cnt && iov[niov].iov_len <= tail; ++niov) {
+ tail -= iov[niov].iov_len;
+ }
+ if (tail) {
+ /* second, fixup the last element, and remember the original
+ * length */
+ assert(niov < iov_cnt);
+ assert(iov[niov].iov_len > tail);
+ orig_len = iov[niov].iov_len;
+ iov[niov++].iov_len = tail;
+ }
+
+ ret = do_send_recv(sockfd, iov, niov, do_send);
+
+ /* Undo the changes above before checking for errors */
+ if (tail) {
+ iov[niov-1].iov_len = orig_len;
+ }
+ if (offset) {
+ iov[0].iov_base -= offset;
+ iov[0].iov_len += offset;
+ }
+
+ if (ret < 0) {
+ assert(errno != EINTR);
+ if (errno == EAGAIN && total > 0) {
+ return total;
+ }
+ return -1;
+ }
+
+ /* Prepare for the next iteration */
+ offset += ret;
+ total += ret;
+ bytes -= ret;
+ }
+
+ return total;
+}
+
+
+void iov_hexdump(const struct iovec *iov, const unsigned int iov_cnt,
+ FILE *fp, const char *prefix, size_t limit)
+{
+ int v;
+ size_t size = 0;
+ char *buf;
+
+ for (v = 0; v < iov_cnt; v++) {
+ size += iov[v].iov_len;
+ }
+ size = size > limit ? limit : size;
+ buf = g_malloc(size);
+ iov_to_buf(iov, iov_cnt, 0, buf, size);
+ qemu_hexdump(buf, fp, prefix, size);
+ g_free(buf);
+}
+
+unsigned iov_copy(struct iovec *dst_iov, unsigned int dst_iov_cnt,
+ const struct iovec *iov, unsigned int iov_cnt,
+ size_t offset, size_t bytes)
+{
+ size_t len;
+ unsigned int i, j;
+ for (i = 0, j = 0; i < iov_cnt && j < dst_iov_cnt && bytes; i++) {
+ if (offset >= iov[i].iov_len) {
+ offset -= iov[i].iov_len;
+ continue;
+ }
+ len = MIN(bytes, iov[i].iov_len - offset);
+
+ dst_iov[j].iov_base = iov[i].iov_base + offset;
+ dst_iov[j].iov_len = len;
+ j++;
+ bytes -= len;
+ offset = 0;
+ }
+ assert(offset == 0);
+ return j;
+}
+
+/* io vectors */
+
+void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint)
+{
+ qiov->iov = g_malloc(alloc_hint * sizeof(struct iovec));
+ qiov->niov = 0;
+ qiov->nalloc = alloc_hint;
+ qiov->size = 0;
+}
+
+void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov)
+{
+ int i;
+
+ qiov->iov = iov;
+ qiov->niov = niov;
+ qiov->nalloc = -1;
+ qiov->size = 0;
+ for (i = 0; i < niov; i++)
+ qiov->size += iov[i].iov_len;
+}
+
+void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len)
+{
+ assert(qiov->nalloc != -1);
+
+ if (qiov->niov == qiov->nalloc) {
+ qiov->nalloc = 2 * qiov->nalloc + 1;
+ qiov->iov = g_realloc(qiov->iov, qiov->nalloc * sizeof(struct iovec));
+ }
+ qiov->iov[qiov->niov].iov_base = base;
+ qiov->iov[qiov->niov].iov_len = len;
+ qiov->size += len;
+ ++qiov->niov;
+}
+
+/*
+ * Concatenates (partial) iovecs from src_iov to the end of dst.
+ * It starts copying after skipping `soffset' bytes at the
+ * beginning of src and adds individual vectors from src to
+ * dst copies up to `sbytes' bytes total, or up to the end
+ * of src_iov if it comes first. This way, it is okay to specify
+ * very large value for `sbytes' to indicate "up to the end
+ * of src".
+ * Only vector pointers are processed, not the actual data buffers.
+ */
+void qemu_iovec_concat_iov(QEMUIOVector *dst,
+ struct iovec *src_iov, unsigned int src_cnt,
+ size_t soffset, size_t sbytes)
+{
+ int i;
+ size_t done;
+
+ if (!sbytes) {
+ return;
+ }
+ assert(dst->nalloc != -1);
+ for (i = 0, done = 0; done < sbytes && i < src_cnt; i++) {
+ if (soffset < src_iov[i].iov_len) {
+ size_t len = MIN(src_iov[i].iov_len - soffset, sbytes - done);
+ qemu_iovec_add(dst, src_iov[i].iov_base + soffset, len);
+ done += len;
+ soffset = 0;
+ } else {
+ soffset -= src_iov[i].iov_len;
+ }
+ }
+ assert(soffset == 0); /* offset beyond end of src */
+}
+
+/*
+ * Concatenates (partial) iovecs from src to the end of dst.
+ * It starts copying after skipping `soffset' bytes at the
+ * beginning of src and adds individual vectors from src to
+ * dst copies up to `sbytes' bytes total, or up to the end
+ * of src if it comes first. This way, it is okay to specify
+ * very large value for `sbytes' to indicate "up to the end
+ * of src".
+ * Only vector pointers are processed, not the actual data buffers.
+ */
+void qemu_iovec_concat(QEMUIOVector *dst,
+ QEMUIOVector *src, size_t soffset, size_t sbytes)
+{
+ qemu_iovec_concat_iov(dst, src->iov, src->niov, soffset, sbytes);
+}
+
+void qemu_iovec_destroy(QEMUIOVector *qiov)
+{
+ assert(qiov->nalloc != -1);
+
+ qemu_iovec_reset(qiov);
+ g_free(qiov->iov);
+ qiov->nalloc = 0;
+ qiov->iov = NULL;
+}
+
+void qemu_iovec_reset(QEMUIOVector *qiov)
+{
+ assert(qiov->nalloc != -1);
+
+ qiov->niov = 0;
+ qiov->size = 0;
+}
+
+size_t qemu_iovec_to_buf(QEMUIOVector *qiov, size_t offset,
+ void *buf, size_t bytes)
+{
+ return iov_to_buf(qiov->iov, qiov->niov, offset, buf, bytes);
+}
+
+size_t qemu_iovec_from_buf(QEMUIOVector *qiov, size_t offset,
+ const void *buf, size_t bytes)
+{
+ return iov_from_buf(qiov->iov, qiov->niov, offset, buf, bytes);
+}
+
+size_t qemu_iovec_memset(QEMUIOVector *qiov, size_t offset,
+ int fillc, size_t bytes)
+{
+ return iov_memset(qiov->iov, qiov->niov, offset, fillc, bytes);
+}
+
+size_t iov_discard_front(struct iovec **iov, unsigned int *iov_cnt,
+ size_t bytes)
+{
+ size_t total = 0;
+ struct iovec *cur;
+
+ for (cur = *iov; *iov_cnt > 0; cur++) {
+ if (cur->iov_len > bytes) {
+ cur->iov_base += bytes;
+ cur->iov_len -= bytes;
+ total += bytes;
+ break;
+ }
+
+ bytes -= cur->iov_len;
+ total += cur->iov_len;
+ *iov_cnt -= 1;
+ }
+
+ *iov = cur;
+ return total;
+}
+
+size_t iov_discard_back(struct iovec *iov, unsigned int *iov_cnt,
+ size_t bytes)
+{
+ size_t total = 0;
+ struct iovec *cur;
+
+ if (*iov_cnt == 0) {
+ return 0;
+ }
+
+ cur = iov + (*iov_cnt - 1);
+
+ while (*iov_cnt > 0) {
+ if (cur->iov_len > bytes) {
+ cur->iov_len -= bytes;
+ total += bytes;
+ break;
+ }
+
+ bytes -= cur->iov_len;
+ total += cur->iov_len;
+ cur--;
+ *iov_cnt -= 1;
+ }
+
+ return total;
+}
diff --git a/contrib/qemu/util/module.c b/contrib/qemu/util/module.c
new file mode 100644
index 000000000..7acc33d07
--- /dev/null
+++ b/contrib/qemu/util/module.c
@@ -0,0 +1,81 @@
+/*
+ * QEMU Module Infrastructure
+ *
+ * Copyright IBM, Corp. 2009
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "qemu-common.h"
+#include "qemu/queue.h"
+#include "qemu/module.h"
+
+typedef struct ModuleEntry
+{
+ void (*init)(void);
+ QTAILQ_ENTRY(ModuleEntry) node;
+} ModuleEntry;
+
+typedef QTAILQ_HEAD(, ModuleEntry) ModuleTypeList;
+
+static ModuleTypeList init_type_list[MODULE_INIT_MAX];
+
+static void init_types(void)
+{
+ static int inited;
+ int i;
+
+ if (inited) {
+ return;
+ }
+
+ for (i = 0; i < MODULE_INIT_MAX; i++) {
+ QTAILQ_INIT(&init_type_list[i]);
+ }
+
+ inited = 1;
+}
+
+
+static ModuleTypeList *find_type(module_init_type type)
+{
+ ModuleTypeList *l;
+
+ init_types();
+
+ l = &init_type_list[type];
+
+ return l;
+}
+
+void register_module_init(void (*fn)(void), module_init_type type)
+{
+ ModuleEntry *e;
+ ModuleTypeList *l;
+
+ e = g_malloc0(sizeof(*e));
+ e->init = fn;
+
+ l = find_type(type);
+
+ QTAILQ_INSERT_TAIL(l, e, node);
+}
+
+void module_call_init(module_init_type type)
+{
+ ModuleTypeList *l;
+ ModuleEntry *e;
+
+ l = find_type(type);
+
+ QTAILQ_FOREACH(e, l, node) {
+ e->init();
+ }
+}
diff --git a/contrib/qemu/util/oslib-posix.c b/contrib/qemu/util/oslib-posix.c
new file mode 100644
index 000000000..3dc8b1b07
--- /dev/null
+++ b/contrib/qemu/util/oslib-posix.c
@@ -0,0 +1,243 @@
+/*
+ * os-posix-lib.c
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2010 Red Hat, Inc.
+ *
+ * QEMU library functions on POSIX which are shared between QEMU and
+ * the QEMU tools.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/* The following block of code temporarily renames the daemon() function so the
+ compiler does not see the warning associated with it in stdlib.h on OSX */
+#ifdef __APPLE__
+#define daemon qemu_fake_daemon_function
+#include <stdlib.h>
+#undef daemon
+extern int daemon(int, int);
+#endif
+
+#if defined(__linux__) && (defined(__x86_64__) || defined(__arm__))
+ /* Use 2 MiB alignment so transparent hugepages can be used by KVM.
+ Valgrind does not support alignments larger than 1 MiB,
+ therefore we need special code which handles running on Valgrind. */
+# define QEMU_VMALLOC_ALIGN (512 * 4096)
+#elif defined(__linux__) && defined(__s390x__)
+ /* Use 1 MiB (segment size) alignment so gmap can be used by KVM. */
+# define QEMU_VMALLOC_ALIGN (256 * 4096)
+#else
+# define QEMU_VMALLOC_ALIGN getpagesize()
+#endif
+
+#include <glib/gprintf.h>
+
+#include "config-host.h"
+#include "sysemu/sysemu.h"
+#include "trace.h"
+#include "qemu/sockets.h"
+#include <sys/mman.h>
+
+#ifdef CONFIG_LINUX
+#include <sys/syscall.h>
+#endif
+
+int qemu_get_thread_id(void)
+{
+#if defined(__linux__)
+ return syscall(SYS_gettid);
+#else
+ return getpid();
+#endif
+}
+
+int qemu_daemon(int nochdir, int noclose)
+{
+ return daemon(nochdir, noclose);
+}
+
+void *qemu_oom_check(void *ptr)
+{
+ if (ptr == NULL) {
+ fprintf(stderr, "Failed to allocate memory: %s\n", strerror(errno));
+ abort();
+ }
+ return ptr;
+}
+
+void *qemu_memalign(size_t alignment, size_t size)
+{
+ void *ptr;
+#if defined(_POSIX_C_SOURCE) && !defined(__sun__)
+ int ret;
+ ret = posix_memalign(&ptr, alignment, size);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to allocate %zu B: %s\n",
+ size, strerror(ret));
+ abort();
+ }
+#elif defined(CONFIG_BSD)
+ ptr = qemu_oom_check(valloc(size));
+#else
+ ptr = qemu_oom_check(memalign(alignment, size));
+#endif
+ trace_qemu_memalign(alignment, size, ptr);
+ return ptr;
+}
+
+/* alloc shared memory pages */
+void *qemu_anon_ram_alloc(size_t size)
+{
+ size_t align = QEMU_VMALLOC_ALIGN;
+ size_t total = size + align - getpagesize();
+ void *ptr = mmap(0, total, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ size_t offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr;
+
+ if (ptr == MAP_FAILED) {
+ fprintf(stderr, "Failed to allocate %zu B: %s\n",
+ size, strerror(errno));
+ abort();
+ }
+
+ ptr += offset;
+ total -= offset;
+
+ if (offset > 0) {
+ munmap(ptr - offset, offset);
+ }
+ if (total > size) {
+ munmap(ptr + size, total - size);
+ }
+
+ trace_qemu_anon_ram_alloc(size, ptr);
+ return ptr;
+}
+
+void qemu_vfree(void *ptr)
+{
+ trace_qemu_vfree(ptr);
+ free(ptr);
+}
+
+void qemu_anon_ram_free(void *ptr, size_t size)
+{
+ trace_qemu_anon_ram_free(ptr, size);
+ if (ptr) {
+ munmap(ptr, size);
+ }
+}
+
+void qemu_set_block(int fd)
+{
+ int f;
+ f = fcntl(fd, F_GETFL);
+ fcntl(fd, F_SETFL, f & ~O_NONBLOCK);
+}
+
+void qemu_set_nonblock(int fd)
+{
+ int f;
+ f = fcntl(fd, F_GETFL);
+ fcntl(fd, F_SETFL, f | O_NONBLOCK);
+}
+
+void qemu_set_cloexec(int fd)
+{
+ int f;
+ f = fcntl(fd, F_GETFD);
+ fcntl(fd, F_SETFD, f | FD_CLOEXEC);
+}
+
+/*
+ * Creates a pipe with FD_CLOEXEC set on both file descriptors
+ */
+int qemu_pipe(int pipefd[2])
+{
+ int ret;
+
+#ifdef CONFIG_PIPE2
+ ret = pipe2(pipefd, O_CLOEXEC);
+ if (ret != -1 || errno != ENOSYS) {
+ return ret;
+ }
+#endif
+ ret = pipe(pipefd);
+ if (ret == 0) {
+ qemu_set_cloexec(pipefd[0]);
+ qemu_set_cloexec(pipefd[1]);
+ }
+
+ return ret;
+}
+
+int qemu_utimens(const char *path, const struct timespec *times)
+{
+ struct timeval tv[2], tv_now;
+ struct stat st;
+ int i;
+#ifdef CONFIG_UTIMENSAT
+ int ret;
+
+ ret = utimensat(AT_FDCWD, path, times, AT_SYMLINK_NOFOLLOW);
+ if (ret != -1 || errno != ENOSYS) {
+ return ret;
+ }
+#endif
+ /* Fallback: use utimes() instead of utimensat() */
+
+ /* happy if special cases */
+ if (times[0].tv_nsec == UTIME_OMIT && times[1].tv_nsec == UTIME_OMIT) {
+ return 0;
+ }
+ if (times[0].tv_nsec == UTIME_NOW && times[1].tv_nsec == UTIME_NOW) {
+ return utimes(path, NULL);
+ }
+
+ /* prepare for hard cases */
+ if (times[0].tv_nsec == UTIME_NOW || times[1].tv_nsec == UTIME_NOW) {
+ gettimeofday(&tv_now, NULL);
+ }
+ if (times[0].tv_nsec == UTIME_OMIT || times[1].tv_nsec == UTIME_OMIT) {
+ stat(path, &st);
+ }
+
+ for (i = 0; i < 2; i++) {
+ if (times[i].tv_nsec == UTIME_NOW) {
+ tv[i].tv_sec = tv_now.tv_sec;
+ tv[i].tv_usec = tv_now.tv_usec;
+ } else if (times[i].tv_nsec == UTIME_OMIT) {
+ tv[i].tv_sec = (i == 0) ? st.st_atime : st.st_mtime;
+ tv[i].tv_usec = 0;
+ } else {
+ tv[i].tv_sec = times[i].tv_sec;
+ tv[i].tv_usec = times[i].tv_nsec / 1000;
+ }
+ }
+
+ return utimes(path, &tv[0]);
+}
+
+char *
+qemu_get_local_state_pathname(const char *relative_pathname)
+{
+ return g_strdup_printf("%s/%s", CONFIG_QEMU_LOCALSTATEDIR,
+ relative_pathname);
+}
diff --git a/contrib/qemu/util/qemu-error.c b/contrib/qemu/util/qemu-error.c
new file mode 100644
index 000000000..fec02c607
--- /dev/null
+++ b/contrib/qemu/util/qemu-error.c
@@ -0,0 +1,225 @@
+/*
+ * Error reporting
+ *
+ * Copyright (C) 2010 Red Hat Inc.
+ *
+ * Authors:
+ * Markus Armbruster <armbru@redhat.com>,
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include <stdio.h>
+#include "monitor/monitor.h"
+
+/*
+ * Print to current monitor if we have one, else to stderr.
+ * TODO should return int, so callers can calculate width, but that
+ * requires surgery to monitor_vprintf(). Left for another day.
+ */
+void error_vprintf(const char *fmt, va_list ap)
+{
+ if (cur_mon) {
+ monitor_vprintf(cur_mon, fmt, ap);
+ } else {
+ vfprintf(stderr, fmt, ap);
+ }
+}
+
+/*
+ * Print to current monitor if we have one, else to stderr.
+ * TODO just like error_vprintf()
+ */
+void error_printf(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ error_vprintf(fmt, ap);
+ va_end(ap);
+}
+
+void error_printf_unless_qmp(const char *fmt, ...)
+{
+ va_list ap;
+
+ if (!monitor_cur_is_qmp()) {
+ va_start(ap, fmt);
+ error_vprintf(fmt, ap);
+ va_end(ap);
+ }
+}
+
+static Location std_loc = {
+ .kind = LOC_NONE
+};
+static Location *cur_loc = &std_loc;
+
+/*
+ * Push location saved in LOC onto the location stack, return it.
+ * The top of that stack is the current location.
+ * Needs a matching loc_pop().
+ */
+Location *loc_push_restore(Location *loc)
+{
+ assert(!loc->prev);
+ loc->prev = cur_loc;
+ cur_loc = loc;
+ return loc;
+}
+
+/*
+ * Initialize *LOC to "nowhere", push it onto the location stack.
+ * The top of that stack is the current location.
+ * Needs a matching loc_pop().
+ * Return LOC.
+ */
+Location *loc_push_none(Location *loc)
+{
+ loc->kind = LOC_NONE;
+ loc->prev = NULL;
+ return loc_push_restore(loc);
+}
+
+/*
+ * Pop the location stack.
+ * LOC must be the current location, i.e. the top of the stack.
+ */
+Location *loc_pop(Location *loc)
+{
+ assert(cur_loc == loc && loc->prev);
+ cur_loc = loc->prev;
+ loc->prev = NULL;
+ return loc;
+}
+
+/*
+ * Save the current location in LOC, return LOC.
+ */
+Location *loc_save(Location *loc)
+{
+ *loc = *cur_loc;
+ loc->prev = NULL;
+ return loc;
+}
+
+/*
+ * Change the current location to the one saved in LOC.
+ */
+void loc_restore(Location *loc)
+{
+ Location *prev = cur_loc->prev;
+ assert(!loc->prev);
+ *cur_loc = *loc;
+ cur_loc->prev = prev;
+}
+
+/*
+ * Change the current location to "nowhere in particular".
+ */
+void loc_set_none(void)
+{
+ cur_loc->kind = LOC_NONE;
+}
+
+/*
+ * Change the current location to argument ARGV[IDX..IDX+CNT-1].
+ */
+void loc_set_cmdline(char **argv, int idx, int cnt)
+{
+ cur_loc->kind = LOC_CMDLINE;
+ cur_loc->num = cnt;
+ cur_loc->ptr = argv + idx;
+}
+
+/*
+ * Change the current location to file FNAME, line LNO.
+ */
+void loc_set_file(const char *fname, int lno)
+{
+ assert (fname || cur_loc->kind == LOC_FILE);
+ cur_loc->kind = LOC_FILE;
+ cur_loc->num = lno;
+ if (fname) {
+ cur_loc->ptr = fname;
+ }
+}
+
+static const char *progname;
+
+/*
+ * Set the program name for error_print_loc().
+ */
+void error_set_progname(const char *argv0)
+{
+ const char *p = strrchr(argv0, '/');
+ progname = p ? p + 1 : argv0;
+}
+
+const char *error_get_progname(void)
+{
+ return progname;
+}
+
+/*
+ * Print current location to current monitor if we have one, else to stderr.
+ */
+void error_print_loc(void)
+{
+ const char *sep = "";
+ int i;
+ const char *const *argp;
+
+ if (!cur_mon && progname) {
+ fprintf(stderr, "%s:", progname);
+ sep = " ";
+ }
+ switch (cur_loc->kind) {
+ case LOC_CMDLINE:
+ argp = cur_loc->ptr;
+ for (i = 0; i < cur_loc->num; i++) {
+ error_printf("%s%s", sep, argp[i]);
+ sep = " ";
+ }
+ error_printf(": ");
+ break;
+ case LOC_FILE:
+ error_printf("%s:", (const char *)cur_loc->ptr);
+ if (cur_loc->num) {
+ error_printf("%d:", cur_loc->num);
+ }
+ error_printf(" ");
+ break;
+ default:
+ error_printf("%s", sep);
+ }
+}
+
+bool enable_timestamp_msg;
+/*
+ * Print an error message to current monitor if we have one, else to stderr.
+ * Format arguments like sprintf(). The result should not contain
+ * newlines.
+ * Prepend the current location and append a newline.
+ * It's wrong to call this in a QMP monitor. Use qerror_report() there.
+ */
+void error_report(const char *fmt, ...)
+{
+ va_list ap;
+ GTimeVal tv;
+ gchar *timestr;
+
+ if (enable_timestamp_msg) {
+ g_get_current_time(&tv);
+ timestr = g_time_val_to_iso8601(&tv);
+ error_printf("%s ", timestr);
+ g_free(timestr);
+ }
+
+ error_print_loc();
+ va_start(ap, fmt);
+ error_vprintf(fmt, ap);
+ va_end(ap);
+ error_printf("\n");
+}
diff --git a/contrib/qemu/util/qemu-option.c b/contrib/qemu/util/qemu-option.c
new file mode 100644
index 000000000..e0ef426da
--- /dev/null
+++ b/contrib/qemu/util/qemu-option.c
@@ -0,0 +1,1126 @@
+/*
+ * Commandline option parsing functions
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2009 Kevin Wolf <kwolf@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include "qemu-common.h"
+#include "qemu/error-report.h"
+#include "qapi/qmp/types.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qerror.h"
+#include "qemu/option_int.h"
+
+/*
+ * Extracts the name of an option from the parameter string (p points at the
+ * first byte of the option name)
+ *
+ * The option name is delimited by delim (usually , or =) or the string end
+ * and is copied into buf. If the option name is longer than buf_size, it is
+ * truncated. buf is always zero terminated.
+ *
+ * The return value is the position of the delimiter/zero byte after the option
+ * name in p.
+ */
+const char *get_opt_name(char *buf, int buf_size, const char *p, char delim)
+{
+ char *q;
+
+ q = buf;
+ while (*p != '\0' && *p != delim) {
+ if (q && (q - buf) < buf_size - 1)
+ *q++ = *p;
+ p++;
+ }
+ if (q)
+ *q = '\0';
+
+ return p;
+}
+
+/*
+ * Extracts the value of an option from the parameter string p (p points at the
+ * first byte of the option value)
+ *
+ * This function is comparable to get_opt_name with the difference that the
+ * delimiter is fixed to be comma which starts a new option. To specify an
+ * option value that contains commas, double each comma.
+ */
+const char *get_opt_value(char *buf, int buf_size, const char *p)
+{
+ char *q;
+
+ q = buf;
+ while (*p != '\0') {
+ if (*p == ',') {
+ if (*(p + 1) != ',')
+ break;
+ p++;
+ }
+ if (q && (q - buf) < buf_size - 1)
+ *q++ = *p;
+ p++;
+ }
+ if (q)
+ *q = '\0';
+
+ return p;
+}
+
+int get_next_param_value(char *buf, int buf_size,
+ const char *tag, const char **pstr)
+{
+ const char *p;
+ char option[128];
+
+ p = *pstr;
+ for(;;) {
+ p = get_opt_name(option, sizeof(option), p, '=');
+ if (*p != '=')
+ break;
+ p++;
+ if (!strcmp(tag, option)) {
+ *pstr = get_opt_value(buf, buf_size, p);
+ if (**pstr == ',') {
+ (*pstr)++;
+ }
+ return strlen(buf);
+ } else {
+ p = get_opt_value(NULL, 0, p);
+ }
+ if (*p != ',')
+ break;
+ p++;
+ }
+ return 0;
+}
+
+int get_param_value(char *buf, int buf_size,
+ const char *tag, const char *str)
+{
+ return get_next_param_value(buf, buf_size, tag, &str);
+}
+
+/*
+ * Searches an option list for an option with the given name
+ */
+QEMUOptionParameter *get_option_parameter(QEMUOptionParameter *list,
+ const char *name)
+{
+ while (list && list->name) {
+ if (!strcmp(list->name, name)) {
+ return list;
+ }
+ list++;
+ }
+
+ return NULL;
+}
+
+static void parse_option_bool(const char *name, const char *value, bool *ret,
+ Error **errp)
+{
+ if (value != NULL) {
+ if (!strcmp(value, "on")) {
+ *ret = 1;
+ } else if (!strcmp(value, "off")) {
+ *ret = 0;
+ } else {
+ error_set(errp,QERR_INVALID_PARAMETER_VALUE, name, "'on' or 'off'");
+ }
+ } else {
+ *ret = 1;
+ }
+}
+
+static void parse_option_number(const char *name, const char *value,
+ uint64_t *ret, Error **errp)
+{
+ char *postfix;
+ uint64_t number;
+
+ if (value != NULL) {
+ number = strtoull(value, &postfix, 0);
+ if (*postfix != '\0') {
+ error_set(errp, QERR_INVALID_PARAMETER_VALUE, name, "a number");
+ return;
+ }
+ *ret = number;
+ } else {
+ error_set(errp, QERR_INVALID_PARAMETER_VALUE, name, "a number");
+ }
+}
+
+static void parse_option_size(const char *name, const char *value,
+ uint64_t *ret, Error **errp)
+{
+ char *postfix;
+ double sizef;
+
+ if (value != NULL) {
+ sizef = strtod(value, &postfix);
+ switch (*postfix) {
+ case 'T':
+ sizef *= 1024;
+ /* fall through */
+ case 'G':
+ sizef *= 1024;
+ /* fall through */
+ case 'M':
+ sizef *= 1024;
+ /* fall through */
+ case 'K':
+ case 'k':
+ sizef *= 1024;
+ /* fall through */
+ case 'b':
+ case '\0':
+ *ret = (uint64_t) sizef;
+ break;
+ default:
+ error_set(errp, QERR_INVALID_PARAMETER_VALUE, name, "a size");
+#if 0 /* conversion from qerror_report() to error_set() broke this: */
+ error_printf_unless_qmp("You may use k, M, G or T suffixes for "
+ "kilobytes, megabytes, gigabytes and terabytes.\n");
+#endif
+ return;
+ }
+ } else {
+ error_set(errp, QERR_INVALID_PARAMETER_VALUE, name, "a size");
+ }
+}
+
+/*
+ * Sets the value of a parameter in a given option list. The parsing of the
+ * value depends on the type of option:
+ *
+ * OPT_FLAG (uses value.n):
+ * If no value is given, the flag is set to 1.
+ * Otherwise the value must be "on" (set to 1) or "off" (set to 0)
+ *
+ * OPT_STRING (uses value.s):
+ * value is strdup()ed and assigned as option value
+ *
+ * OPT_SIZE (uses value.n):
+ * The value is converted to an integer. Suffixes for kilobytes etc. are
+ * allowed (powers of 1024).
+ *
+ * Returns 0 on succes, -1 in error cases
+ */
+int set_option_parameter(QEMUOptionParameter *list, const char *name,
+ const char *value)
+{
+ bool flag;
+ Error *local_err = NULL;
+
+ // Find a matching parameter
+ list = get_option_parameter(list, name);
+ if (list == NULL) {
+ fprintf(stderr, "Unknown option '%s'\n", name);
+ return -1;
+ }
+
+ // Process parameter
+ switch (list->type) {
+ case OPT_FLAG:
+ parse_option_bool(name, value, &flag, &local_err);
+ if (!error_is_set(&local_err)) {
+ list->value.n = flag;
+ }
+ break;
+
+ case OPT_STRING:
+ if (value != NULL) {
+ list->value.s = g_strdup(value);
+ } else {
+ fprintf(stderr, "Option '%s' needs a parameter\n", name);
+ return -1;
+ }
+ break;
+
+ case OPT_SIZE:
+ parse_option_size(name, value, &list->value.n, &local_err);
+ break;
+
+ default:
+ fprintf(stderr, "Bug: Option '%s' has an unknown type\n", name);
+ return -1;
+ }
+
+ if (error_is_set(&local_err)) {
+ qerror_report_err(local_err);
+ error_free(local_err);
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * Sets the given parameter to an integer instead of a string.
+ * This function cannot be used to set string options.
+ *
+ * Returns 0 on success, -1 in error cases
+ */
+int set_option_parameter_int(QEMUOptionParameter *list, const char *name,
+ uint64_t value)
+{
+ // Find a matching parameter
+ list = get_option_parameter(list, name);
+ if (list == NULL) {
+ fprintf(stderr, "Unknown option '%s'\n", name);
+ return -1;
+ }
+
+ // Process parameter
+ switch (list->type) {
+ case OPT_FLAG:
+ case OPT_NUMBER:
+ case OPT_SIZE:
+ list->value.n = value;
+ break;
+
+ default:
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * Frees a option list. If it contains strings, the strings are freed as well.
+ */
+void free_option_parameters(QEMUOptionParameter *list)
+{
+ QEMUOptionParameter *cur = list;
+
+ while (cur && cur->name) {
+ if (cur->type == OPT_STRING) {
+ g_free(cur->value.s);
+ }
+ cur++;
+ }
+
+ g_free(list);
+}
+
+/*
+ * Count valid options in list
+ */
+static size_t count_option_parameters(QEMUOptionParameter *list)
+{
+ size_t num_options = 0;
+
+ while (list && list->name) {
+ num_options++;
+ list++;
+ }
+
+ return num_options;
+}
+
+/*
+ * Append an option list (list) to an option list (dest).
+ *
+ * If dest is NULL, a new copy of list is created.
+ *
+ * Returns a pointer to the first element of dest (or the newly allocated copy)
+ */
+QEMUOptionParameter *append_option_parameters(QEMUOptionParameter *dest,
+ QEMUOptionParameter *list)
+{
+ size_t num_options, num_dest_options;
+
+ num_options = count_option_parameters(dest);
+ num_dest_options = num_options;
+
+ num_options += count_option_parameters(list);
+
+ dest = g_realloc(dest, (num_options + 1) * sizeof(QEMUOptionParameter));
+ dest[num_dest_options].name = NULL;
+
+ while (list && list->name) {
+ if (get_option_parameter(dest, list->name) == NULL) {
+ dest[num_dest_options++] = *list;
+ dest[num_dest_options].name = NULL;
+ }
+ list++;
+ }
+
+ return dest;
+}
+
+/*
+ * Parses a parameter string (param) into an option list (dest).
+ *
+ * list is the template option list. If dest is NULL, a new copy of list is
+ * created. If list is NULL, this function fails.
+ *
+ * A parameter string consists of one or more parameters, separated by commas.
+ * Each parameter consists of its name and possibly of a value. In the latter
+ * case, the value is delimited by an = character. To specify a value which
+ * contains commas, double each comma so it won't be recognized as the end of
+ * the parameter.
+ *
+ * For more details of the parsing see above.
+ *
+ * Returns a pointer to the first element of dest (or the newly allocated copy)
+ * or NULL in error cases
+ */
+QEMUOptionParameter *parse_option_parameters(const char *param,
+ QEMUOptionParameter *list, QEMUOptionParameter *dest)
+{
+ QEMUOptionParameter *allocated = NULL;
+ char name[256];
+ char value[256];
+ char *param_delim, *value_delim;
+ char next_delim;
+
+ if (list == NULL) {
+ return NULL;
+ }
+
+ if (dest == NULL) {
+ dest = allocated = append_option_parameters(NULL, list);
+ }
+
+ while (*param) {
+
+ // Find parameter name and value in the string
+ param_delim = strchr(param, ',');
+ value_delim = strchr(param, '=');
+
+ if (value_delim && (value_delim < param_delim || !param_delim)) {
+ next_delim = '=';
+ } else {
+ next_delim = ',';
+ value_delim = NULL;
+ }
+
+ param = get_opt_name(name, sizeof(name), param, next_delim);
+ if (value_delim) {
+ param = get_opt_value(value, sizeof(value), param + 1);
+ }
+ if (*param != '\0') {
+ param++;
+ }
+
+ // Set the parameter
+ if (set_option_parameter(dest, name, value_delim ? value : NULL)) {
+ goto fail;
+ }
+ }
+
+ return dest;
+
+fail:
+ // Only free the list if it was newly allocated
+ free_option_parameters(allocated);
+ return NULL;
+}
+
+/*
+ * Prints all options of a list that have a value to stdout
+ */
+void print_option_parameters(QEMUOptionParameter *list)
+{
+ while (list && list->name) {
+ switch (list->type) {
+ case OPT_STRING:
+ if (list->value.s != NULL) {
+ printf("%s='%s' ", list->name, list->value.s);
+ }
+ break;
+ case OPT_FLAG:
+ printf("%s=%s ", list->name, list->value.n ? "on" : "off");
+ break;
+ case OPT_SIZE:
+ case OPT_NUMBER:
+ printf("%s=%" PRId64 " ", list->name, list->value.n);
+ break;
+ default:
+ printf("%s=(unknown type) ", list->name);
+ break;
+ }
+ list++;
+ }
+}
+
+/*
+ * Prints an overview of all available options
+ */
+void print_option_help(QEMUOptionParameter *list)
+{
+ printf("Supported options:\n");
+ while (list && list->name) {
+ printf("%-16s %s\n", list->name,
+ list->help ? list->help : "No description available");
+ list++;
+ }
+}
+
+/* ------------------------------------------------------------------ */
+
+static QemuOpt *qemu_opt_find(QemuOpts *opts, const char *name)
+{
+ QemuOpt *opt;
+
+ QTAILQ_FOREACH_REVERSE(opt, &opts->head, QemuOptHead, next) {
+ if (strcmp(opt->name, name) != 0)
+ continue;
+ return opt;
+ }
+ return NULL;
+}
+
+const char *qemu_opt_get(QemuOpts *opts, const char *name)
+{
+ QemuOpt *opt = qemu_opt_find(opts, name);
+ return opt ? opt->str : NULL;
+}
+
+bool qemu_opt_has_help_opt(QemuOpts *opts)
+{
+ QemuOpt *opt;
+
+ QTAILQ_FOREACH_REVERSE(opt, &opts->head, QemuOptHead, next) {
+ if (is_help_option(opt->name)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+bool qemu_opt_get_bool(QemuOpts *opts, const char *name, bool defval)
+{
+ QemuOpt *opt = qemu_opt_find(opts, name);
+
+ if (opt == NULL)
+ return defval;
+ assert(opt->desc && opt->desc->type == QEMU_OPT_BOOL);
+ return opt->value.boolean;
+}
+
+uint64_t qemu_opt_get_number(QemuOpts *opts, const char *name, uint64_t defval)
+{
+ QemuOpt *opt = qemu_opt_find(opts, name);
+
+ if (opt == NULL)
+ return defval;
+ assert(opt->desc && opt->desc->type == QEMU_OPT_NUMBER);
+ return opt->value.uint;
+}
+
+uint64_t qemu_opt_get_size(QemuOpts *opts, const char *name, uint64_t defval)
+{
+ QemuOpt *opt = qemu_opt_find(opts, name);
+
+ if (opt == NULL)
+ return defval;
+ assert(opt->desc && opt->desc->type == QEMU_OPT_SIZE);
+ return opt->value.uint;
+}
+
+static void qemu_opt_parse(QemuOpt *opt, Error **errp)
+{
+ if (opt->desc == NULL)
+ return;
+
+ switch (opt->desc->type) {
+ case QEMU_OPT_STRING:
+ /* nothing */
+ return;
+ case QEMU_OPT_BOOL:
+ parse_option_bool(opt->name, opt->str, &opt->value.boolean, errp);
+ break;
+ case QEMU_OPT_NUMBER:
+ parse_option_number(opt->name, opt->str, &opt->value.uint, errp);
+ break;
+ case QEMU_OPT_SIZE:
+ parse_option_size(opt->name, opt->str, &opt->value.uint, errp);
+ break;
+ default:
+ abort();
+ }
+}
+
+static void qemu_opt_del(QemuOpt *opt)
+{
+ QTAILQ_REMOVE(&opt->opts->head, opt, next);
+ g_free((/* !const */ char*)opt->name);
+ g_free((/* !const */ char*)opt->str);
+ g_free(opt);
+}
+
+static bool opts_accepts_any(const QemuOpts *opts)
+{
+ return opts->list->desc[0].name == NULL;
+}
+
+static const QemuOptDesc *find_desc_by_name(const QemuOptDesc *desc,
+ const char *name)
+{
+ int i;
+
+ for (i = 0; desc[i].name != NULL; i++) {
+ if (strcmp(desc[i].name, name) == 0) {
+ return &desc[i];
+ }
+ }
+
+ return NULL;
+}
+
+static void opt_set(QemuOpts *opts, const char *name, const char *value,
+ bool prepend, Error **errp)
+{
+ QemuOpt *opt;
+ const QemuOptDesc *desc;
+ Error *local_err = NULL;
+
+ desc = find_desc_by_name(opts->list->desc, name);
+ if (!desc && !opts_accepts_any(opts)) {
+ error_set(errp, QERR_INVALID_PARAMETER, name);
+ return;
+ }
+
+ opt = g_malloc0(sizeof(*opt));
+ opt->name = g_strdup(name);
+ opt->opts = opts;
+ if (prepend) {
+ QTAILQ_INSERT_HEAD(&opts->head, opt, next);
+ } else {
+ QTAILQ_INSERT_TAIL(&opts->head, opt, next);
+ }
+ opt->desc = desc;
+ opt->str = g_strdup(value);
+ qemu_opt_parse(opt, &local_err);
+ if (error_is_set(&local_err)) {
+ error_propagate(errp, local_err);
+ qemu_opt_del(opt);
+ }
+}
+
+int qemu_opt_set(QemuOpts *opts, const char *name, const char *value)
+{
+ Error *local_err = NULL;
+
+ opt_set(opts, name, value, false, &local_err);
+ if (error_is_set(&local_err)) {
+ qerror_report_err(local_err);
+ error_free(local_err);
+ return -1;
+ }
+
+ return 0;
+}
+
+void qemu_opt_set_err(QemuOpts *opts, const char *name, const char *value,
+ Error **errp)
+{
+ opt_set(opts, name, value, false, errp);
+}
+
+int qemu_opt_set_bool(QemuOpts *opts, const char *name, bool val)
+{
+ QemuOpt *opt;
+ const QemuOptDesc *desc = opts->list->desc;
+
+ opt = g_malloc0(sizeof(*opt));
+ opt->desc = find_desc_by_name(desc, name);
+ if (!opt->desc && !opts_accepts_any(opts)) {
+ qerror_report(QERR_INVALID_PARAMETER, name);
+ g_free(opt);
+ return -1;
+ }
+
+ opt->name = g_strdup(name);
+ opt->opts = opts;
+ opt->value.boolean = !!val;
+ opt->str = g_strdup(val ? "on" : "off");
+ QTAILQ_INSERT_TAIL(&opts->head, opt, next);
+
+ return 0;
+}
+
+int qemu_opt_set_number(QemuOpts *opts, const char *name, int64_t val)
+{
+ QemuOpt *opt;
+ const QemuOptDesc *desc = opts->list->desc;
+
+ opt = g_malloc0(sizeof(*opt));
+ opt->desc = find_desc_by_name(desc, name);
+ if (!opt->desc && !opts_accepts_any(opts)) {
+ qerror_report(QERR_INVALID_PARAMETER, name);
+ g_free(opt);
+ return -1;
+ }
+
+ opt->name = g_strdup(name);
+ opt->opts = opts;
+ opt->value.uint = val;
+ opt->str = g_strdup_printf("%" PRId64, val);
+ QTAILQ_INSERT_TAIL(&opts->head, opt, next);
+
+ return 0;
+}
+
+int qemu_opt_foreach(QemuOpts *opts, qemu_opt_loopfunc func, void *opaque,
+ int abort_on_failure)
+{
+ QemuOpt *opt;
+ int rc = 0;
+
+ QTAILQ_FOREACH(opt, &opts->head, next) {
+ rc = func(opt->name, opt->str, opaque);
+ if (abort_on_failure && rc != 0)
+ break;
+ }
+ return rc;
+}
+
+QemuOpts *qemu_opts_find(QemuOptsList *list, const char *id)
+{
+ QemuOpts *opts;
+
+ QTAILQ_FOREACH(opts, &list->head, next) {
+ if (!opts->id && !id) {
+ return opts;
+ }
+ if (opts->id && id && !strcmp(opts->id, id)) {
+ return opts;
+ }
+ }
+ return NULL;
+}
+
+static int id_wellformed(const char *id)
+{
+ int i;
+
+ if (!qemu_isalpha(id[0])) {
+ return 0;
+ }
+ for (i = 1; id[i]; i++) {
+ if (!qemu_isalnum(id[i]) && !strchr("-._", id[i])) {
+ return 0;
+ }
+ }
+ return 1;
+}
+
+QemuOpts *qemu_opts_create(QemuOptsList *list, const char *id,
+ int fail_if_exists, Error **errp)
+{
+ QemuOpts *opts = NULL;
+
+ if (id) {
+ if (!id_wellformed(id)) {
+ error_set(errp,QERR_INVALID_PARAMETER_VALUE, "id", "an identifier");
+#if 0 /* conversion from qerror_report() to error_set() broke this: */
+ error_printf_unless_qmp("Identifiers consist of letters, digits, '-', '.', '_', starting with a letter.\n");
+#endif
+ return NULL;
+ }
+ opts = qemu_opts_find(list, id);
+ if (opts != NULL) {
+ if (fail_if_exists && !list->merge_lists) {
+ error_set(errp, QERR_DUPLICATE_ID, id, list->name);
+ return NULL;
+ } else {
+ return opts;
+ }
+ }
+ } else if (list->merge_lists) {
+ opts = qemu_opts_find(list, NULL);
+ if (opts) {
+ return opts;
+ }
+ }
+ opts = g_malloc0(sizeof(*opts));
+ opts->id = g_strdup(id);
+ opts->list = list;
+ loc_save(&opts->loc);
+ QTAILQ_INIT(&opts->head);
+ QTAILQ_INSERT_TAIL(&list->head, opts, next);
+ return opts;
+}
+
+QemuOpts *qemu_opts_create_nofail(QemuOptsList *list)
+{
+ QemuOpts *opts;
+ Error *errp = NULL;
+ opts = qemu_opts_create(list, NULL, 0, &errp);
+ assert_no_error(errp);
+ return opts;
+}
+
+void qemu_opts_reset(QemuOptsList *list)
+{
+ QemuOpts *opts, *next_opts;
+
+ QTAILQ_FOREACH_SAFE(opts, &list->head, next, next_opts) {
+ qemu_opts_del(opts);
+ }
+}
+
+void qemu_opts_loc_restore(QemuOpts *opts)
+{
+ loc_restore(&opts->loc);
+}
+
+int qemu_opts_set(QemuOptsList *list, const char *id,
+ const char *name, const char *value)
+{
+ QemuOpts *opts;
+ Error *local_err = NULL;
+
+ opts = qemu_opts_create(list, id, 1, &local_err);
+ if (error_is_set(&local_err)) {
+ qerror_report_err(local_err);
+ error_free(local_err);
+ return -1;
+ }
+ return qemu_opt_set(opts, name, value);
+}
+
+const char *qemu_opts_id(QemuOpts *opts)
+{
+ return opts->id;
+}
+
+void qemu_opts_del(QemuOpts *opts)
+{
+ QemuOpt *opt;
+
+ for (;;) {
+ opt = QTAILQ_FIRST(&opts->head);
+ if (opt == NULL)
+ break;
+ qemu_opt_del(opt);
+ }
+ QTAILQ_REMOVE(&opts->list->head, opts, next);
+ g_free(opts->id);
+ g_free(opts);
+}
+
+int qemu_opts_print(QemuOpts *opts, void *dummy)
+{
+ QemuOpt *opt;
+
+ fprintf(stderr, "%s: %s:", opts->list->name,
+ opts->id ? opts->id : "<noid>");
+ QTAILQ_FOREACH(opt, &opts->head, next) {
+ fprintf(stderr, " %s=\"%s\"", opt->name, opt->str);
+ }
+ fprintf(stderr, "\n");
+ return 0;
+}
+
+static int opts_do_parse(QemuOpts *opts, const char *params,
+ const char *firstname, bool prepend)
+{
+ char option[128], value[1024];
+ const char *p,*pe,*pc;
+ Error *local_err = NULL;
+
+ for (p = params; *p != '\0'; p++) {
+ pe = strchr(p, '=');
+ pc = strchr(p, ',');
+ if (!pe || (pc && pc < pe)) {
+ /* found "foo,more" */
+ if (p == params && firstname) {
+ /* implicitly named first option */
+ pstrcpy(option, sizeof(option), firstname);
+ p = get_opt_value(value, sizeof(value), p);
+ } else {
+ /* option without value, probably a flag */
+ p = get_opt_name(option, sizeof(option), p, ',');
+ if (strncmp(option, "no", 2) == 0) {
+ memmove(option, option+2, strlen(option+2)+1);
+ pstrcpy(value, sizeof(value), "off");
+ } else {
+ pstrcpy(value, sizeof(value), "on");
+ }
+ }
+ } else {
+ /* found "foo=bar,more" */
+ p = get_opt_name(option, sizeof(option), p, '=');
+ if (*p != '=') {
+ break;
+ }
+ p++;
+ p = get_opt_value(value, sizeof(value), p);
+ }
+ if (strcmp(option, "id") != 0) {
+ /* store and parse */
+ opt_set(opts, option, value, prepend, &local_err);
+ if (error_is_set(&local_err)) {
+ qerror_report_err(local_err);
+ error_free(local_err);
+ return -1;
+ }
+ }
+ if (*p != ',') {
+ break;
+ }
+ }
+ return 0;
+}
+
+int qemu_opts_do_parse(QemuOpts *opts, const char *params, const char *firstname)
+{
+ return opts_do_parse(opts, params, firstname, false);
+}
+
+static QemuOpts *opts_parse(QemuOptsList *list, const char *params,
+ int permit_abbrev, bool defaults)
+{
+ const char *firstname;
+ char value[1024], *id = NULL;
+ const char *p;
+ QemuOpts *opts;
+ Error *local_err = NULL;
+
+ assert(!permit_abbrev || list->implied_opt_name);
+ firstname = permit_abbrev ? list->implied_opt_name : NULL;
+
+ if (strncmp(params, "id=", 3) == 0) {
+ get_opt_value(value, sizeof(value), params+3);
+ id = value;
+ } else if ((p = strstr(params, ",id=")) != NULL) {
+ get_opt_value(value, sizeof(value), p+4);
+ id = value;
+ }
+ opts = qemu_opts_create(list, id, !defaults, &local_err);
+ if (opts == NULL) {
+ if (error_is_set(&local_err)) {
+ qerror_report_err(local_err);
+ error_free(local_err);
+ }
+ return NULL;
+ }
+
+ if (opts_do_parse(opts, params, firstname, defaults) != 0) {
+ qemu_opts_del(opts);
+ return NULL;
+ }
+
+ return opts;
+}
+
+QemuOpts *qemu_opts_parse(QemuOptsList *list, const char *params,
+ int permit_abbrev)
+{
+ return opts_parse(list, params, permit_abbrev, false);
+}
+
+void qemu_opts_set_defaults(QemuOptsList *list, const char *params,
+ int permit_abbrev)
+{
+ QemuOpts *opts;
+
+ opts = opts_parse(list, params, permit_abbrev, true);
+ assert(opts);
+}
+
+typedef struct OptsFromQDictState {
+ QemuOpts *opts;
+ Error **errp;
+} OptsFromQDictState;
+
+static void qemu_opts_from_qdict_1(const char *key, QObject *obj, void *opaque)
+{
+ OptsFromQDictState *state = opaque;
+ char buf[32];
+ const char *value;
+ int n;
+
+ if (!strcmp(key, "id") || error_is_set(state->errp)) {
+ return;
+ }
+
+ switch (qobject_type(obj)) {
+ case QTYPE_QSTRING:
+ value = qstring_get_str(qobject_to_qstring(obj));
+ break;
+ case QTYPE_QINT:
+ n = snprintf(buf, sizeof(buf), "%" PRId64,
+ qint_get_int(qobject_to_qint(obj)));
+ assert(n < sizeof(buf));
+ value = buf;
+ break;
+ case QTYPE_QFLOAT:
+ n = snprintf(buf, sizeof(buf), "%.17g",
+ qfloat_get_double(qobject_to_qfloat(obj)));
+ assert(n < sizeof(buf));
+ value = buf;
+ break;
+ case QTYPE_QBOOL:
+ pstrcpy(buf, sizeof(buf),
+ qbool_get_int(qobject_to_qbool(obj)) ? "on" : "off");
+ value = buf;
+ break;
+ default:
+ return;
+ }
+
+ qemu_opt_set_err(state->opts, key, value, state->errp);
+}
+
+/*
+ * Create QemuOpts from a QDict.
+ * Use value of key "id" as ID if it exists and is a QString.
+ * Only QStrings, QInts, QFloats and QBools are copied. Entries with
+ * other types are silently ignored.
+ */
+QemuOpts *qemu_opts_from_qdict(QemuOptsList *list, const QDict *qdict,
+ Error **errp)
+{
+ OptsFromQDictState state;
+ Error *local_err = NULL;
+ QemuOpts *opts;
+
+ opts = qemu_opts_create(list, qdict_get_try_str(qdict, "id"), 1,
+ &local_err);
+ if (error_is_set(&local_err)) {
+ error_propagate(errp, local_err);
+ return NULL;
+ }
+
+ assert(opts != NULL);
+
+ state.errp = &local_err;
+ state.opts = opts;
+ qdict_iter(qdict, qemu_opts_from_qdict_1, &state);
+ if (error_is_set(&local_err)) {
+ error_propagate(errp, local_err);
+ qemu_opts_del(opts);
+ return NULL;
+ }
+
+ return opts;
+}
+
+/*
+ * Adds all QDict entries to the QemuOpts that can be added and removes them
+ * from the QDict. When this function returns, the QDict contains only those
+ * entries that couldn't be added to the QemuOpts.
+ */
+void qemu_opts_absorb_qdict(QemuOpts *opts, QDict *qdict, Error **errp)
+{
+ const QDictEntry *entry, *next;
+
+ entry = qdict_first(qdict);
+
+ while (entry != NULL) {
+ Error *local_err = NULL;
+ OptsFromQDictState state = {
+ .errp = &local_err,
+ .opts = opts,
+ };
+
+ next = qdict_next(qdict, entry);
+
+ if (find_desc_by_name(opts->list->desc, entry->key)) {
+ qemu_opts_from_qdict_1(entry->key, entry->value, &state);
+ if (error_is_set(&local_err)) {
+ error_propagate(errp, local_err);
+ return;
+ } else {
+ qdict_del(qdict, entry->key);
+ }
+ }
+
+ entry = next;
+ }
+}
+
+/*
+ * Convert from QemuOpts to QDict.
+ * The QDict values are of type QString.
+ * TODO We'll want to use types appropriate for opt->desc->type, but
+ * this is enough for now.
+ */
+QDict *qemu_opts_to_qdict(QemuOpts *opts, QDict *qdict)
+{
+ QemuOpt *opt;
+ QObject *val;
+
+ if (!qdict) {
+ qdict = qdict_new();
+ }
+ if (opts->id) {
+ qdict_put(qdict, "id", qstring_from_str(opts->id));
+ }
+ QTAILQ_FOREACH(opt, &opts->head, next) {
+ val = QOBJECT(qstring_from_str(opt->str));
+ qdict_put_obj(qdict, opt->name, val);
+ }
+ return qdict;
+}
+
+/* Validate parsed opts against descriptions where no
+ * descriptions were provided in the QemuOptsList.
+ */
+void qemu_opts_validate(QemuOpts *opts, const QemuOptDesc *desc, Error **errp)
+{
+ QemuOpt *opt;
+ Error *local_err = NULL;
+
+ assert(opts_accepts_any(opts));
+
+ QTAILQ_FOREACH(opt, &opts->head, next) {
+ opt->desc = find_desc_by_name(desc, opt->name);
+ if (!opt->desc) {
+ error_set(errp, QERR_INVALID_PARAMETER, opt->name);
+ return;
+ }
+
+ qemu_opt_parse(opt, &local_err);
+ if (error_is_set(&local_err)) {
+ error_propagate(errp, local_err);
+ return;
+ }
+ }
+}
+
+int qemu_opts_foreach(QemuOptsList *list, qemu_opts_loopfunc func, void *opaque,
+ int abort_on_failure)
+{
+ Location loc;
+ QemuOpts *opts;
+ int rc = 0;
+
+ loc_push_none(&loc);
+ QTAILQ_FOREACH(opts, &list->head, next) {
+ loc_restore(&opts->loc);
+ rc |= func(opts, opaque);
+ if (abort_on_failure && rc != 0)
+ break;
+ }
+ loc_pop(&loc);
+ return rc;
+}
diff --git a/contrib/qemu/util/qemu-thread-posix.c b/contrib/qemu/util/qemu-thread-posix.c
new file mode 100644
index 000000000..4489abf1d
--- /dev/null
+++ b/contrib/qemu/util/qemu-thread-posix.c
@@ -0,0 +1,327 @@
+/*
+ * Wrappers around mutex/cond/thread functions
+ *
+ * Copyright Red Hat, Inc. 2009
+ *
+ * Author:
+ * Marcelo Tosatti <mtosatti@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <time.h>
+#include <signal.h>
+#include <stdint.h>
+#include <string.h>
+#include <limits.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include "qemu/thread.h"
+
+static void error_exit(int err, const char *msg)
+{
+ fprintf(stderr, "qemu: %s: %s\n", msg, strerror(err));
+ abort();
+}
+
+void qemu_mutex_init(QemuMutex *mutex)
+{
+ int err;
+ pthread_mutexattr_t mutexattr;
+
+ pthread_mutexattr_init(&mutexattr);
+ pthread_mutexattr_settype(&mutexattr, PTHREAD_MUTEX_ERRORCHECK);
+ err = pthread_mutex_init(&mutex->lock, &mutexattr);
+ pthread_mutexattr_destroy(&mutexattr);
+ if (err)
+ error_exit(err, __func__);
+}
+
+void qemu_mutex_destroy(QemuMutex *mutex)
+{
+ int err;
+
+ err = pthread_mutex_destroy(&mutex->lock);
+ if (err)
+ error_exit(err, __func__);
+}
+
+void qemu_mutex_lock(QemuMutex *mutex)
+{
+ int err;
+
+ err = pthread_mutex_lock(&mutex->lock);
+ if (err)
+ error_exit(err, __func__);
+}
+
+int qemu_mutex_trylock(QemuMutex *mutex)
+{
+ return pthread_mutex_trylock(&mutex->lock);
+}
+
+void qemu_mutex_unlock(QemuMutex *mutex)
+{
+ int err;
+
+ err = pthread_mutex_unlock(&mutex->lock);
+ if (err)
+ error_exit(err, __func__);
+}
+
+void qemu_cond_init(QemuCond *cond)
+{
+ int err;
+
+ err = pthread_cond_init(&cond->cond, NULL);
+ if (err)
+ error_exit(err, __func__);
+}
+
+void qemu_cond_destroy(QemuCond *cond)
+{
+ int err;
+
+ err = pthread_cond_destroy(&cond->cond);
+ if (err)
+ error_exit(err, __func__);
+}
+
+void qemu_cond_signal(QemuCond *cond)
+{
+ int err;
+
+ err = pthread_cond_signal(&cond->cond);
+ if (err)
+ error_exit(err, __func__);
+}
+
+void qemu_cond_broadcast(QemuCond *cond)
+{
+ int err;
+
+ err = pthread_cond_broadcast(&cond->cond);
+ if (err)
+ error_exit(err, __func__);
+}
+
+void qemu_cond_wait(QemuCond *cond, QemuMutex *mutex)
+{
+ int err;
+
+ err = pthread_cond_wait(&cond->cond, &mutex->lock);
+ if (err)
+ error_exit(err, __func__);
+}
+
+void qemu_sem_init(QemuSemaphore *sem, int init)
+{
+ int rc;
+
+#if defined(__APPLE__) || defined(__NetBSD__)
+ rc = pthread_mutex_init(&sem->lock, NULL);
+ if (rc != 0) {
+ error_exit(rc, __func__);
+ }
+ rc = pthread_cond_init(&sem->cond, NULL);
+ if (rc != 0) {
+ error_exit(rc, __func__);
+ }
+ if (init < 0) {
+ error_exit(EINVAL, __func__);
+ }
+ sem->count = init;
+#else
+ rc = sem_init(&sem->sem, 0, init);
+ if (rc < 0) {
+ error_exit(errno, __func__);
+ }
+#endif
+}
+
+void qemu_sem_destroy(QemuSemaphore *sem)
+{
+ int rc;
+
+#if defined(__APPLE__) || defined(__NetBSD__)
+ rc = pthread_cond_destroy(&sem->cond);
+ if (rc < 0) {
+ error_exit(rc, __func__);
+ }
+ rc = pthread_mutex_destroy(&sem->lock);
+ if (rc < 0) {
+ error_exit(rc, __func__);
+ }
+#else
+ rc = sem_destroy(&sem->sem);
+ if (rc < 0) {
+ error_exit(errno, __func__);
+ }
+#endif
+}
+
+void qemu_sem_post(QemuSemaphore *sem)
+{
+ int rc;
+
+#if defined(__APPLE__) || defined(__NetBSD__)
+ pthread_mutex_lock(&sem->lock);
+ if (sem->count == INT_MAX) {
+ rc = EINVAL;
+ } else if (sem->count++ < 0) {
+ rc = pthread_cond_signal(&sem->cond);
+ } else {
+ rc = 0;
+ }
+ pthread_mutex_unlock(&sem->lock);
+ if (rc != 0) {
+ error_exit(rc, __func__);
+ }
+#else
+ rc = sem_post(&sem->sem);
+ if (rc < 0) {
+ error_exit(errno, __func__);
+ }
+#endif
+}
+
+static void compute_abs_deadline(struct timespec *ts, int ms)
+{
+ struct timeval tv;
+ gettimeofday(&tv, NULL);
+ ts->tv_nsec = tv.tv_usec * 1000 + (ms % 1000) * 1000000;
+ ts->tv_sec = tv.tv_sec + ms / 1000;
+ if (ts->tv_nsec >= 1000000000) {
+ ts->tv_sec++;
+ ts->tv_nsec -= 1000000000;
+ }
+}
+
+int qemu_sem_timedwait(QemuSemaphore *sem, int ms)
+{
+ int rc;
+ struct timespec ts;
+
+#if defined(__APPLE__) || defined(__NetBSD__)
+ compute_abs_deadline(&ts, ms);
+ pthread_mutex_lock(&sem->lock);
+ --sem->count;
+ while (sem->count < 0) {
+ rc = pthread_cond_timedwait(&sem->cond, &sem->lock, &ts);
+ if (rc == ETIMEDOUT) {
+ ++sem->count;
+ break;
+ }
+ if (rc != 0) {
+ error_exit(rc, __func__);
+ }
+ }
+ pthread_mutex_unlock(&sem->lock);
+ return (rc == ETIMEDOUT ? -1 : 0);
+#else
+ if (ms <= 0) {
+ /* This is cheaper than sem_timedwait. */
+ do {
+ rc = sem_trywait(&sem->sem);
+ } while (rc == -1 && errno == EINTR);
+ if (rc == -1 && errno == EAGAIN) {
+ return -1;
+ }
+ } else {
+ compute_abs_deadline(&ts, ms);
+ do {
+ rc = sem_timedwait(&sem->sem, &ts);
+ } while (rc == -1 && errno == EINTR);
+ if (rc == -1 && errno == ETIMEDOUT) {
+ return -1;
+ }
+ }
+ if (rc < 0) {
+ error_exit(errno, __func__);
+ }
+ return 0;
+#endif
+}
+
+void qemu_sem_wait(QemuSemaphore *sem)
+{
+#if defined(__APPLE__) || defined(__NetBSD__)
+ pthread_mutex_lock(&sem->lock);
+ --sem->count;
+ while (sem->count < 0) {
+ pthread_cond_wait(&sem->cond, &sem->lock);
+ }
+ pthread_mutex_unlock(&sem->lock);
+#else
+ int rc;
+
+ do {
+ rc = sem_wait(&sem->sem);
+ } while (rc == -1 && errno == EINTR);
+ if (rc < 0) {
+ error_exit(errno, __func__);
+ }
+#endif
+}
+
+void qemu_thread_create(QemuThread *thread,
+ void *(*start_routine)(void*),
+ void *arg, int mode)
+{
+ sigset_t set, oldset;
+ int err;
+ pthread_attr_t attr;
+
+ err = pthread_attr_init(&attr);
+ if (err) {
+ error_exit(err, __func__);
+ }
+ if (mode == QEMU_THREAD_DETACHED) {
+ err = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
+ if (err) {
+ error_exit(err, __func__);
+ }
+ }
+
+ /* Leave signal handling to the iothread. */
+ sigfillset(&set);
+ pthread_sigmask(SIG_SETMASK, &set, &oldset);
+ err = pthread_create(&thread->thread, &attr, start_routine, arg);
+ if (err)
+ error_exit(err, __func__);
+
+ pthread_sigmask(SIG_SETMASK, &oldset, NULL);
+
+ pthread_attr_destroy(&attr);
+}
+
+void qemu_thread_get_self(QemuThread *thread)
+{
+ thread->thread = pthread_self();
+}
+
+bool qemu_thread_is_self(QemuThread *thread)
+{
+ return pthread_equal(pthread_self(), thread->thread);
+}
+
+void qemu_thread_exit(void *retval)
+{
+ pthread_exit(retval);
+}
+
+void *qemu_thread_join(QemuThread *thread)
+{
+ int err;
+ void *ret;
+
+ err = pthread_join(thread->thread, &ret);
+ if (err) {
+ error_exit(err, __func__);
+ }
+ return ret;
+}
diff --git a/contrib/qemu/util/unicode.c b/contrib/qemu/util/unicode.c
new file mode 100644
index 000000000..d1c865885
--- /dev/null
+++ b/contrib/qemu/util/unicode.c
@@ -0,0 +1,100 @@
+/*
+ * Dealing with Unicode
+ *
+ * Copyright (C) 2013 Red Hat, Inc.
+ *
+ * Authors:
+ * Markus Armbruster <armbru@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later. See the COPYING file in the top-level directory.
+ */
+
+#include "qemu-common.h"
+
+/**
+ * mod_utf8_codepoint:
+ * @s: string encoded in modified UTF-8
+ * @n: maximum number of bytes to read from @s, if less than 6
+ * @end: set to end of sequence on return
+ *
+ * Convert the modified UTF-8 sequence at the start of @s. Modified
+ * UTF-8 is exactly like UTF-8, except U+0000 is encoded as
+ * "\xC0\x80".
+ *
+ * If @n is zero or @s points to a zero byte, the sequence is invalid,
+ * and @end is set to @s.
+ *
+ * If @s points to an impossible byte (0xFE or 0xFF) or a continuation
+ * byte, the sequence is invalid, and @end is set to @s + 1
+ *
+ * Else, the first byte determines how many continuation bytes are
+ * expected. If there are fewer, the sequence is invalid, and @end is
+ * set to @s + 1 + actual number of continuation bytes. Else, the
+ * sequence is well-formed, and @end is set to @s + 1 + expected
+ * number of continuation bytes.
+ *
+ * A well-formed sequence is valid unless it encodes a codepoint
+ * outside the Unicode range U+0000..U+10FFFF, one of Unicode's 66
+ * noncharacters, a surrogate codepoint, or is overlong. Except the
+ * overlong sequence "\xC0\x80" is valid.
+ *
+ * Conversion succeeds if and only if the sequence is valid.
+ *
+ * Returns: the Unicode codepoint on success, -1 on failure.
+ */
+int mod_utf8_codepoint(const char *s, size_t n, char **end)
+{
+ static int min_cp[5] = { 0x80, 0x800, 0x10000, 0x200000, 0x4000000 };
+ const unsigned char *p;
+ unsigned byte, mask, len, i;
+ int cp;
+
+ if (n == 0 || *s == 0) {
+ /* empty sequence */
+ *end = (char *)s;
+ return -1;
+ }
+
+ p = (const unsigned char *)s;
+ byte = *p++;
+ if (byte < 0x80) {
+ cp = byte; /* one byte sequence */
+ } else if (byte >= 0xFE) {
+ cp = -1; /* impossible bytes 0xFE, 0xFF */
+ } else if ((byte & 0x40) == 0) {
+ cp = -1; /* unexpected continuation byte */
+ } else {
+ /* multi-byte sequence */
+ len = 0;
+ for (mask = 0x80; byte & mask; mask >>= 1) {
+ len++;
+ }
+ assert(len > 1 && len < 7);
+ cp = byte & (mask - 1);
+ for (i = 1; i < len; i++) {
+ byte = i < n ? *p : 0;
+ if ((byte & 0xC0) != 0x80) {
+ cp = -1; /* continuation byte missing */
+ goto out;
+ }
+ p++;
+ cp <<= 6;
+ cp |= byte & 0x3F;
+ }
+ if (cp > 0x10FFFF) {
+ cp = -1; /* beyond Unicode range */
+ } else if ((cp >= 0xFDD0 && cp <= 0xFDEF)
+ || (cp & 0xFFFE) == 0xFFFE) {
+ cp = -1; /* noncharacter */
+ } else if (cp >= 0xD800 && cp <= 0xDFFF) {
+ cp = -1; /* surrogate code point */
+ } else if (cp < min_cp[len - 2] && !(cp == 0 && len == 2)) {
+ cp = -1; /* overlong, not \xC0\x80 */
+ }
+ }
+
+out:
+ *end = (char *)p;
+ return cp;
+}
diff --git a/contrib/rbtree/rb.c b/contrib/rbtree/rb.c
new file mode 100644
index 000000000..6184c507e
--- /dev/null
+++ b/contrib/rbtree/rb.c
@@ -0,0 +1,933 @@
+/* Produced by texiweb from libavl.w. */
+
+/* libavl - library for manipulation of binary trees.
+ Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004 Free Software
+ Foundation, Inc.
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 3 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301 USA.
+
+ This code is also covered by the following earlier license notice:
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+*/
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "rb.h"
+
+/* Creates and returns a new table
+ with comparison function |compare| using parameter |param|
+ and memory allocator |allocator|.
+ Returns |NULL| if memory allocation failed. */
+struct rb_table *
+rb_create (rb_comparison_func *compare, void *param,
+ struct libavl_allocator *allocator)
+{
+ struct rb_table *tree;
+
+ assert (compare != NULL);
+
+ if (allocator == NULL)
+ allocator = &rb_allocator_default;
+
+ tree = allocator->libavl_malloc (allocator, sizeof *tree);
+ if (tree == NULL)
+ return NULL;
+
+ tree->rb_root = NULL;
+ tree->rb_compare = compare;
+ tree->rb_param = param;
+ tree->rb_alloc = allocator;
+ tree->rb_count = 0;
+ tree->rb_generation = 0;
+
+ return tree;
+}
+
+/* Search |tree| for an item matching |item|, and return it if found.
+ Otherwise return |NULL|. */
+void *
+rb_find (const struct rb_table *tree, const void *item)
+{
+ const struct rb_node *p;
+
+ assert (tree != NULL && item != NULL);
+ for (p = tree->rb_root; p != NULL; )
+ {
+ int cmp = tree->rb_compare (item, p->rb_data, tree->rb_param);
+
+ if (cmp < 0)
+ p = p->rb_link[0];
+ else if (cmp > 0)
+ p = p->rb_link[1];
+ else /* |cmp == 0| */
+ return p->rb_data;
+ }
+
+ return NULL;
+}
+
+/* Inserts |item| into |tree| and returns a pointer to |item|'s address.
+ If a duplicate item is found in the tree,
+ returns a pointer to the duplicate without inserting |item|.
+ Returns |NULL| in case of memory allocation failure. */
+void **
+rb_probe (struct rb_table *tree, void *item)
+{
+ struct rb_node *pa[RB_MAX_HEIGHT]; /* Nodes on stack. */
+ unsigned char da[RB_MAX_HEIGHT]; /* Directions moved from stack nodes. */
+ int k; /* Stack height. */
+
+ struct rb_node *p; /* Traverses tree looking for insertion point. */
+ struct rb_node *n; /* Newly inserted node. */
+
+ assert (tree != NULL && item != NULL);
+
+ pa[0] = (struct rb_node *) &tree->rb_root;
+ da[0] = 0;
+ k = 1;
+ for (p = tree->rb_root; p != NULL; p = p->rb_link[da[k - 1]])
+ {
+ int cmp = tree->rb_compare (item, p->rb_data, tree->rb_param);
+ if (cmp == 0)
+ return &p->rb_data;
+
+ pa[k] = p;
+ da[k++] = cmp > 0;
+ }
+
+ n = pa[k - 1]->rb_link[da[k - 1]] =
+ tree->rb_alloc->libavl_malloc (tree->rb_alloc, sizeof *n);
+ if (n == NULL)
+ return NULL;
+
+ n->rb_data = item;
+ n->rb_link[0] = n->rb_link[1] = NULL;
+ n->rb_color = RB_RED;
+ tree->rb_count++;
+ tree->rb_generation++;
+
+ while (k >= 3 && pa[k - 1]->rb_color == RB_RED)
+ {
+ if (da[k - 2] == 0)
+ {
+ struct rb_node *y = pa[k - 2]->rb_link[1];
+ if (y != NULL && y->rb_color == RB_RED)
+ {
+ pa[k - 1]->rb_color = y->rb_color = RB_BLACK;
+ pa[k - 2]->rb_color = RB_RED;
+ k -= 2;
+ }
+ else
+ {
+ struct rb_node *x;
+
+ if (da[k - 1] == 0)
+ y = pa[k - 1];
+ else
+ {
+ x = pa[k - 1];
+ y = x->rb_link[1];
+ x->rb_link[1] = y->rb_link[0];
+ y->rb_link[0] = x;
+ pa[k - 2]->rb_link[0] = y;
+ }
+
+ x = pa[k - 2];
+ x->rb_color = RB_RED;
+ y->rb_color = RB_BLACK;
+
+ x->rb_link[0] = y->rb_link[1];
+ y->rb_link[1] = x;
+ pa[k - 3]->rb_link[da[k - 3]] = y;
+ break;
+ }
+ }
+ else
+ {
+ struct rb_node *y = pa[k - 2]->rb_link[0];
+ if (y != NULL && y->rb_color == RB_RED)
+ {
+ pa[k - 1]->rb_color = y->rb_color = RB_BLACK;
+ pa[k - 2]->rb_color = RB_RED;
+ k -= 2;
+ }
+ else
+ {
+ struct rb_node *x;
+
+ if (da[k - 1] == 1)
+ y = pa[k - 1];
+ else
+ {
+ x = pa[k - 1];
+ y = x->rb_link[0];
+ x->rb_link[0] = y->rb_link[1];
+ y->rb_link[1] = x;
+ pa[k - 2]->rb_link[1] = y;
+ }
+
+ x = pa[k - 2];
+ x->rb_color = RB_RED;
+ y->rb_color = RB_BLACK;
+
+ x->rb_link[1] = y->rb_link[0];
+ y->rb_link[0] = x;
+ pa[k - 3]->rb_link[da[k - 3]] = y;
+ break;
+ }
+ }
+ }
+ tree->rb_root->rb_color = RB_BLACK;
+
+
+ return &n->rb_data;
+}
+
+/* Inserts |item| into |table|.
+ Returns |NULL| if |item| was successfully inserted
+ or if a memory allocation error occurred.
+ Otherwise, returns the duplicate item. */
+void *
+rb_insert (struct rb_table *table, void *item)
+{
+ void **p = rb_probe (table, item);
+ return p == NULL || *p == item ? NULL : *p;
+}
+
+/* Inserts |item| into |table|, replacing any duplicate item.
+ Returns |NULL| if |item| was inserted without replacing a duplicate,
+ or if a memory allocation error occurred.
+ Otherwise, returns the item that was replaced. */
+void *
+rb_replace (struct rb_table *table, void *item)
+{
+ void **p = rb_probe (table, item);
+ if (p == NULL || *p == item)
+ return NULL;
+ else
+ {
+ void *r = *p;
+ *p = item;
+ return r;
+ }
+}
+
+/* Deletes from |tree| and returns an item matching |item|.
+ Returns a null pointer if no matching item found. */
+void *
+rb_delete (struct rb_table *tree, const void *item)
+{
+ struct rb_node *pa[RB_MAX_HEIGHT]; /* Nodes on stack. */
+ unsigned char da[RB_MAX_HEIGHT]; /* Directions moved from stack nodes. */
+ int k; /* Stack height. */
+
+ struct rb_node *p; /* The node to delete, or a node part way to it. */
+ int cmp; /* Result of comparison between |item| and |p|. */
+
+ assert (tree != NULL && item != NULL);
+
+ k = 0;
+ p = (struct rb_node *) &tree->rb_root;
+ for (cmp = -1; cmp != 0;
+ cmp = tree->rb_compare (item, p->rb_data, tree->rb_param))
+ {
+ int dir = cmp > 0;
+
+ pa[k] = p;
+ da[k++] = dir;
+
+ p = p->rb_link[dir];
+ if (p == NULL)
+ return NULL;
+ }
+ item = p->rb_data;
+
+ if (p->rb_link[1] == NULL)
+ pa[k - 1]->rb_link[da[k - 1]] = p->rb_link[0];
+ else
+ {
+ enum rb_color t;
+ struct rb_node *r = p->rb_link[1];
+
+ if (r->rb_link[0] == NULL)
+ {
+ r->rb_link[0] = p->rb_link[0];
+ t = r->rb_color;
+ r->rb_color = p->rb_color;
+ p->rb_color = t;
+ pa[k - 1]->rb_link[da[k - 1]] = r;
+ da[k] = 1;
+ pa[k++] = r;
+ }
+ else
+ {
+ struct rb_node *s;
+ int j = k++;
+
+ for (;;)
+ {
+ da[k] = 0;
+ pa[k++] = r;
+ s = r->rb_link[0];
+ if (s->rb_link[0] == NULL)
+ break;
+
+ r = s;
+ }
+
+ da[j] = 1;
+ pa[j] = s;
+ pa[j - 1]->rb_link[da[j - 1]] = s;
+
+ s->rb_link[0] = p->rb_link[0];
+ r->rb_link[0] = s->rb_link[1];
+ s->rb_link[1] = p->rb_link[1];
+
+ t = s->rb_color;
+ s->rb_color = p->rb_color;
+ p->rb_color = t;
+ }
+ }
+
+ if (p->rb_color == RB_BLACK)
+ {
+ for (;;)
+ {
+ struct rb_node *x = pa[k - 1]->rb_link[da[k - 1]];
+ if (x != NULL && x->rb_color == RB_RED)
+ {
+ x->rb_color = RB_BLACK;
+ break;
+ }
+ if (k < 2)
+ break;
+
+ if (da[k - 1] == 0)
+ {
+ struct rb_node *w = pa[k - 1]->rb_link[1];
+
+ if (w->rb_color == RB_RED)
+ {
+ w->rb_color = RB_BLACK;
+ pa[k - 1]->rb_color = RB_RED;
+
+ pa[k - 1]->rb_link[1] = w->rb_link[0];
+ w->rb_link[0] = pa[k - 1];
+ pa[k - 2]->rb_link[da[k - 2]] = w;
+
+ pa[k] = pa[k - 1];
+ da[k] = 0;
+ pa[k - 1] = w;
+ k++;
+
+ w = pa[k - 1]->rb_link[1];
+ }
+
+ if ((w->rb_link[0] == NULL
+ || w->rb_link[0]->rb_color == RB_BLACK)
+ && (w->rb_link[1] == NULL
+ || w->rb_link[1]->rb_color == RB_BLACK))
+ w->rb_color = RB_RED;
+ else
+ {
+ if (w->rb_link[1] == NULL
+ || w->rb_link[1]->rb_color == RB_BLACK)
+ {
+ struct rb_node *y = w->rb_link[0];
+ y->rb_color = RB_BLACK;
+ w->rb_color = RB_RED;
+ w->rb_link[0] = y->rb_link[1];
+ y->rb_link[1] = w;
+ w = pa[k - 1]->rb_link[1] = y;
+ }
+
+ w->rb_color = pa[k - 1]->rb_color;
+ pa[k - 1]->rb_color = RB_BLACK;
+ w->rb_link[1]->rb_color = RB_BLACK;
+
+ pa[k - 1]->rb_link[1] = w->rb_link[0];
+ w->rb_link[0] = pa[k - 1];
+ pa[k - 2]->rb_link[da[k - 2]] = w;
+ break;
+ }
+ }
+ else
+ {
+ struct rb_node *w = pa[k - 1]->rb_link[0];
+
+ if (w->rb_color == RB_RED)
+ {
+ w->rb_color = RB_BLACK;
+ pa[k - 1]->rb_color = RB_RED;
+
+ pa[k - 1]->rb_link[0] = w->rb_link[1];
+ w->rb_link[1] = pa[k - 1];
+ pa[k - 2]->rb_link[da[k - 2]] = w;
+
+ pa[k] = pa[k - 1];
+ da[k] = 1;
+ pa[k - 1] = w;
+ k++;
+
+ w = pa[k - 1]->rb_link[0];
+ }
+
+ if ((w->rb_link[0] == NULL
+ || w->rb_link[0]->rb_color == RB_BLACK)
+ && (w->rb_link[1] == NULL
+ || w->rb_link[1]->rb_color == RB_BLACK))
+ w->rb_color = RB_RED;
+ else
+ {
+ if (w->rb_link[0] == NULL
+ || w->rb_link[0]->rb_color == RB_BLACK)
+ {
+ struct rb_node *y = w->rb_link[1];
+ y->rb_color = RB_BLACK;
+ w->rb_color = RB_RED;
+ w->rb_link[1] = y->rb_link[0];
+ y->rb_link[0] = w;
+ w = pa[k - 1]->rb_link[0] = y;
+ }
+
+ w->rb_color = pa[k - 1]->rb_color;
+ pa[k - 1]->rb_color = RB_BLACK;
+ w->rb_link[0]->rb_color = RB_BLACK;
+
+ pa[k - 1]->rb_link[0] = w->rb_link[1];
+ w->rb_link[1] = pa[k - 1];
+ pa[k - 2]->rb_link[da[k - 2]] = w;
+ break;
+ }
+ }
+
+ k--;
+ }
+
+ }
+
+ tree->rb_alloc->libavl_free (tree->rb_alloc, p);
+ tree->rb_count--;
+ tree->rb_generation++;
+ return (void *) item;
+}
+
+/* Refreshes the stack of parent pointers in |trav|
+ and updates its generation number. */
+static void
+trav_refresh (struct rb_traverser *trav)
+{
+ assert (trav != NULL);
+
+ trav->rb_generation = trav->rb_table->rb_generation;
+
+ if (trav->rb_node != NULL)
+ {
+ rb_comparison_func *cmp = trav->rb_table->rb_compare;
+ void *param = trav->rb_table->rb_param;
+ struct rb_node *node = trav->rb_node;
+ struct rb_node *i;
+
+ trav->rb_height = 0;
+ for (i = trav->rb_table->rb_root; i != node; )
+ {
+ assert (trav->rb_height < RB_MAX_HEIGHT);
+ assert (i != NULL);
+
+ trav->rb_stack[trav->rb_height++] = i;
+ i = i->rb_link[cmp (node->rb_data, i->rb_data, param) > 0];
+ }
+ }
+}
+
+/* Initializes |trav| for use with |tree|
+ and selects the null node. */
+void
+rb_t_init (struct rb_traverser *trav, struct rb_table *tree)
+{
+ trav->rb_table = tree;
+ trav->rb_node = NULL;
+ trav->rb_height = 0;
+ trav->rb_generation = tree->rb_generation;
+}
+
+/* Initializes |trav| for |tree|
+ and selects and returns a pointer to its least-valued item.
+ Returns |NULL| if |tree| contains no nodes. */
+void *
+rb_t_first (struct rb_traverser *trav, struct rb_table *tree)
+{
+ struct rb_node *x;
+
+ assert (tree != NULL && trav != NULL);
+
+ trav->rb_table = tree;
+ trav->rb_height = 0;
+ trav->rb_generation = tree->rb_generation;
+
+ x = tree->rb_root;
+ if (x != NULL)
+ while (x->rb_link[0] != NULL)
+ {
+ assert (trav->rb_height < RB_MAX_HEIGHT);
+ trav->rb_stack[trav->rb_height++] = x;
+ x = x->rb_link[0];
+ }
+ trav->rb_node = x;
+
+ return x != NULL ? x->rb_data : NULL;
+}
+
+/* Initializes |trav| for |tree|
+ and selects and returns a pointer to its greatest-valued item.
+ Returns |NULL| if |tree| contains no nodes. */
+void *
+rb_t_last (struct rb_traverser *trav, struct rb_table *tree)
+{
+ struct rb_node *x;
+
+ assert (tree != NULL && trav != NULL);
+
+ trav->rb_table = tree;
+ trav->rb_height = 0;
+ trav->rb_generation = tree->rb_generation;
+
+ x = tree->rb_root;
+ if (x != NULL)
+ while (x->rb_link[1] != NULL)
+ {
+ assert (trav->rb_height < RB_MAX_HEIGHT);
+ trav->rb_stack[trav->rb_height++] = x;
+ x = x->rb_link[1];
+ }
+ trav->rb_node = x;
+
+ return x != NULL ? x->rb_data : NULL;
+}
+
+/* Searches for |item| in |tree|.
+ If found, initializes |trav| to the item found and returns the item
+ as well.
+ If there is no matching item, initializes |trav| to the null item
+ and returns |NULL|. */
+void *
+rb_t_find (struct rb_traverser *trav, struct rb_table *tree, void *item)
+{
+ struct rb_node *p, *q;
+
+ assert (trav != NULL && tree != NULL && item != NULL);
+ trav->rb_table = tree;
+ trav->rb_height = 0;
+ trav->rb_generation = tree->rb_generation;
+ for (p = tree->rb_root; p != NULL; p = q)
+ {
+ int cmp = tree->rb_compare (item, p->rb_data, tree->rb_param);
+
+ if (cmp < 0)
+ q = p->rb_link[0];
+ else if (cmp > 0)
+ q = p->rb_link[1];
+ else /* |cmp == 0| */
+ {
+ trav->rb_node = p;
+ return p->rb_data;
+ }
+
+ assert (trav->rb_height < RB_MAX_HEIGHT);
+ trav->rb_stack[trav->rb_height++] = p;
+ }
+
+ trav->rb_height = 0;
+ trav->rb_node = NULL;
+ return NULL;
+}
+
+/* Attempts to insert |item| into |tree|.
+ If |item| is inserted successfully, it is returned and |trav| is
+ initialized to its location.
+ If a duplicate is found, it is returned and |trav| is initialized to
+ its location. No replacement of the item occurs.
+ If a memory allocation failure occurs, |NULL| is returned and |trav|
+ is initialized to the null item. */
+void *
+rb_t_insert (struct rb_traverser *trav, struct rb_table *tree, void *item)
+{
+ void **p;
+
+ assert (trav != NULL && tree != NULL && item != NULL);
+
+ p = rb_probe (tree, item);
+ if (p != NULL)
+ {
+ trav->rb_table = tree;
+ trav->rb_node =
+ ((struct rb_node *)
+ ((char *) p - offsetof (struct rb_node, rb_data)));
+ trav->rb_generation = tree->rb_generation - 1;
+ return *p;
+ }
+ else
+ {
+ rb_t_init (trav, tree);
+ return NULL;
+ }
+}
+
+/* Initializes |trav| to have the same current node as |src|. */
+void *
+rb_t_copy (struct rb_traverser *trav, const struct rb_traverser *src)
+{
+ assert (trav != NULL && src != NULL);
+
+ if (trav != src)
+ {
+ trav->rb_table = src->rb_table;
+ trav->rb_node = src->rb_node;
+ trav->rb_generation = src->rb_generation;
+ if (trav->rb_generation == trav->rb_table->rb_generation)
+ {
+ trav->rb_height = src->rb_height;
+ memcpy (trav->rb_stack, (const void *) src->rb_stack,
+ sizeof *trav->rb_stack * trav->rb_height);
+ }
+ }
+
+ return trav->rb_node != NULL ? trav->rb_node->rb_data : NULL;
+}
+
+/* Returns the next data item in inorder
+ within the tree being traversed with |trav|,
+ or if there are no more data items returns |NULL|. */
+void *
+rb_t_next (struct rb_traverser *trav)
+{
+ struct rb_node *x;
+
+ assert (trav != NULL);
+
+ if (trav->rb_generation != trav->rb_table->rb_generation)
+ trav_refresh (trav);
+
+ x = trav->rb_node;
+ if (x == NULL)
+ {
+ return rb_t_first (trav, trav->rb_table);
+ }
+ else if (x->rb_link[1] != NULL)
+ {
+ assert (trav->rb_height < RB_MAX_HEIGHT);
+ trav->rb_stack[trav->rb_height++] = x;
+ x = x->rb_link[1];
+
+ while (x->rb_link[0] != NULL)
+ {
+ assert (trav->rb_height < RB_MAX_HEIGHT);
+ trav->rb_stack[trav->rb_height++] = x;
+ x = x->rb_link[0];
+ }
+ }
+ else
+ {
+ struct rb_node *y;
+
+ do
+ {
+ if (trav->rb_height == 0)
+ {
+ trav->rb_node = NULL;
+ return NULL;
+ }
+
+ y = x;
+ x = trav->rb_stack[--trav->rb_height];
+ }
+ while (y == x->rb_link[1]);
+ }
+ trav->rb_node = x;
+
+ return x->rb_data;
+}
+
+/* Returns the previous data item in inorder
+ within the tree being traversed with |trav|,
+ or if there are no more data items returns |NULL|. */
+void *
+rb_t_prev (struct rb_traverser *trav)
+{
+ struct rb_node *x;
+
+ assert (trav != NULL);
+
+ if (trav->rb_generation != trav->rb_table->rb_generation)
+ trav_refresh (trav);
+
+ x = trav->rb_node;
+ if (x == NULL)
+ {
+ return rb_t_last (trav, trav->rb_table);
+ }
+ else if (x->rb_link[0] != NULL)
+ {
+ assert (trav->rb_height < RB_MAX_HEIGHT);
+ trav->rb_stack[trav->rb_height++] = x;
+ x = x->rb_link[0];
+
+ while (x->rb_link[1] != NULL)
+ {
+ assert (trav->rb_height < RB_MAX_HEIGHT);
+ trav->rb_stack[trav->rb_height++] = x;
+ x = x->rb_link[1];
+ }
+ }
+ else
+ {
+ struct rb_node *y;
+
+ do
+ {
+ if (trav->rb_height == 0)
+ {
+ trav->rb_node = NULL;
+ return NULL;
+ }
+
+ y = x;
+ x = trav->rb_stack[--trav->rb_height];
+ }
+ while (y == x->rb_link[0]);
+ }
+ trav->rb_node = x;
+
+ return x->rb_data;
+}
+
+/* Returns |trav|'s current item. */
+void *
+rb_t_cur (struct rb_traverser *trav)
+{
+ assert (trav != NULL);
+
+ return trav->rb_node != NULL ? trav->rb_node->rb_data : NULL;
+}
+
+/* Replaces the current item in |trav| by |new| and returns the item replaced.
+ |trav| must not have the null item selected.
+ The new item must not upset the ordering of the tree. */
+void *
+rb_t_replace (struct rb_traverser *trav, void *new)
+{
+ void *old;
+
+ assert (trav != NULL && trav->rb_node != NULL && new != NULL);
+ old = trav->rb_node->rb_data;
+ trav->rb_node->rb_data = new;
+ return old;
+}
+
+/* Destroys |new| with |rb_destroy (new, destroy)|,
+ first setting right links of nodes in |stack| within |new|
+ to null pointers to avoid touching uninitialized data. */
+static void
+copy_error_recovery (struct rb_node **stack, int height,
+ struct rb_table *new, rb_item_func *destroy)
+{
+ assert (stack != NULL && height >= 0 && new != NULL);
+
+ for (; height > 2; height -= 2)
+ stack[height - 1]->rb_link[1] = NULL;
+ rb_destroy (new, destroy);
+}
+
+/* Copies |org| to a newly created tree, which is returned.
+ If |copy != NULL|, each data item in |org| is first passed to |copy|,
+ and the return values are inserted into the tree,
+ with |NULL| return values taken as indications of failure.
+ On failure, destroys the partially created new tree,
+ applying |destroy|, if non-null, to each item in the new tree so far,
+ and returns |NULL|.
+ If |allocator != NULL|, it is used for allocation in the new tree.
+ Otherwise, the same allocator used for |org| is used. */
+struct rb_table *
+rb_copy (const struct rb_table *org, rb_copy_func *copy,
+ rb_item_func *destroy, struct libavl_allocator *allocator)
+{
+ struct rb_node *stack[2 * (RB_MAX_HEIGHT + 1)];
+ int height = 0;
+
+ struct rb_table *new;
+ const struct rb_node *x;
+ struct rb_node *y;
+
+ assert (org != NULL);
+ new = rb_create (org->rb_compare, org->rb_param,
+ allocator != NULL ? allocator : org->rb_alloc);
+ if (new == NULL)
+ return NULL;
+ new->rb_count = org->rb_count;
+ if (new->rb_count == 0)
+ return new;
+
+ x = (const struct rb_node *) &org->rb_root;
+ y = (struct rb_node *) &new->rb_root;
+ for (;;)
+ {
+ while (x->rb_link[0] != NULL)
+ {
+ assert (height < 2 * (RB_MAX_HEIGHT + 1));
+
+ y->rb_link[0] =
+ new->rb_alloc->libavl_malloc (new->rb_alloc,
+ sizeof *y->rb_link[0]);
+ if (y->rb_link[0] == NULL)
+ {
+ if (y != (struct rb_node *) &new->rb_root)
+ {
+ y->rb_data = NULL;
+ y->rb_link[1] = NULL;
+ }
+
+ copy_error_recovery (stack, height, new, destroy);
+ return NULL;
+ }
+
+ stack[height++] = (struct rb_node *) x;
+ stack[height++] = y;
+ x = x->rb_link[0];
+ y = y->rb_link[0];
+ }
+ y->rb_link[0] = NULL;
+
+ for (;;)
+ {
+ y->rb_color = x->rb_color;
+ if (copy == NULL)
+ y->rb_data = x->rb_data;
+ else
+ {
+ y->rb_data = copy (x->rb_data, org->rb_param);
+ if (y->rb_data == NULL)
+ {
+ y->rb_link[1] = NULL;
+ copy_error_recovery (stack, height, new, destroy);
+ return NULL;
+ }
+ }
+
+ if (x->rb_link[1] != NULL)
+ {
+ y->rb_link[1] =
+ new->rb_alloc->libavl_malloc (new->rb_alloc,
+ sizeof *y->rb_link[1]);
+ if (y->rb_link[1] == NULL)
+ {
+ copy_error_recovery (stack, height, new, destroy);
+ return NULL;
+ }
+
+ x = x->rb_link[1];
+ y = y->rb_link[1];
+ break;
+ }
+ else
+ y->rb_link[1] = NULL;
+
+ if (height <= 2)
+ return new;
+
+ y = stack[--height];
+ x = stack[--height];
+ }
+ }
+}
+
+/* Frees storage allocated for |tree|.
+ If |destroy != NULL|, applies it to each data item in inorder. */
+void
+rb_destroy (struct rb_table *tree, rb_item_func *destroy)
+{
+ struct rb_node *p, *q;
+
+ assert (tree != NULL);
+
+ for (p = tree->rb_root; p != NULL; p = q)
+ if (p->rb_link[0] == NULL)
+ {
+ q = p->rb_link[1];
+ if (destroy != NULL && p->rb_data != NULL)
+ destroy (p->rb_data, tree->rb_param);
+ tree->rb_alloc->libavl_free (tree->rb_alloc, p);
+ }
+ else
+ {
+ q = p->rb_link[0];
+ p->rb_link[0] = q->rb_link[1];
+ q->rb_link[1] = p;
+ }
+
+ tree->rb_alloc->libavl_free (tree->rb_alloc, tree);
+}
+
+/* Allocates |size| bytes of space using |malloc()|.
+ Returns a null pointer if allocation fails. */
+void *
+rb_malloc (struct libavl_allocator *allocator, size_t size)
+{
+ assert (allocator != NULL && size > 0);
+ return malloc (size);
+}
+
+/* Frees |block|. */
+void
+rb_free (struct libavl_allocator *allocator, void *block)
+{
+ assert (allocator != NULL && block != NULL);
+ free (block);
+}
+
+/* Default memory allocator that uses |malloc()| and |free()|. */
+struct libavl_allocator rb_allocator_default =
+ {
+ rb_malloc,
+ rb_free
+ };
+
+#undef NDEBUG
+#include <assert.h>
+
+/* Asserts that |rb_insert()| succeeds at inserting |item| into |table|. */
+void
+(rb_assert_insert) (struct rb_table *table, void *item)
+{
+ void **p = rb_probe (table, item);
+ assert (p != NULL && *p == item);
+}
+
+/* Asserts that |rb_delete()| really removes |item| from |table|,
+ and returns the removed item. */
+void *
+(rb_assert_delete) (struct rb_table *table, void *item)
+{
+ void *p = rb_delete (table, item);
+ assert (p != NULL);
+ return p;
+}
+
diff --git a/contrib/rbtree/rb.h b/contrib/rbtree/rb.h
new file mode 100644
index 000000000..97b44cfd4
--- /dev/null
+++ b/contrib/rbtree/rb.h
@@ -0,0 +1,126 @@
+/* Produced by texiweb from libavl.w. */
+
+/* libavl - library for manipulation of binary trees.
+ Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004 Free Software
+ Foundation, Inc.
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 3 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301 USA.
+
+ This code is also covered by the following earlier license notice:
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+*/
+
+#ifndef RB_H
+#define RB_H 1
+
+#include <stddef.h>
+
+/* Function types. */
+typedef int rb_comparison_func (const void *rb_a, const void *rb_b,
+ void *rb_param);
+typedef void rb_item_func (void *rb_item, void *rb_param);
+typedef void *rb_copy_func (void *rb_item, void *rb_param);
+
+#ifndef LIBAVL_ALLOCATOR
+#define LIBAVL_ALLOCATOR
+/* Memory allocator. */
+struct libavl_allocator
+ {
+ void *(*libavl_malloc) (struct libavl_allocator *, size_t libavl_size);
+ void (*libavl_free) (struct libavl_allocator *, void *libavl_block);
+ };
+#endif
+
+/* Default memory allocator. */
+extern struct libavl_allocator rb_allocator_default;
+void *rb_malloc (struct libavl_allocator *, size_t);
+void rb_free (struct libavl_allocator *, void *);
+
+/* Maximum RB height. */
+#ifndef RB_MAX_HEIGHT
+#define RB_MAX_HEIGHT 128
+#endif
+
+/* Tree data structure. */
+struct rb_table
+ {
+ struct rb_node *rb_root; /* Tree's root. */
+ rb_comparison_func *rb_compare; /* Comparison function. */
+ void *rb_param; /* Extra argument to |rb_compare|. */
+ struct libavl_allocator *rb_alloc; /* Memory allocator. */
+ size_t rb_count; /* Number of items in tree. */
+ unsigned long rb_generation; /* Generation number. */
+ };
+
+/* Color of a red-black node. */
+enum rb_color
+ {
+ RB_BLACK, /* Black. */
+ RB_RED /* Red. */
+ };
+
+/* A red-black tree node. */
+struct rb_node
+ {
+ struct rb_node *rb_link[2]; /* Subtrees. */
+ void *rb_data; /* Pointer to data. */
+ unsigned char rb_color; /* Color. */
+ };
+
+/* RB traverser structure. */
+struct rb_traverser
+ {
+ struct rb_table *rb_table; /* Tree being traversed. */
+ struct rb_node *rb_node; /* Current node in tree. */
+ struct rb_node *rb_stack[RB_MAX_HEIGHT];
+ /* All the nodes above |rb_node|. */
+ size_t rb_height; /* Number of nodes in |rb_parent|. */
+ unsigned long rb_generation; /* Generation number. */
+ };
+
+/* Table functions. */
+struct rb_table *rb_create (rb_comparison_func *, void *,
+ struct libavl_allocator *);
+struct rb_table *rb_copy (const struct rb_table *, rb_copy_func *,
+ rb_item_func *, struct libavl_allocator *);
+void rb_destroy (struct rb_table *, rb_item_func *);
+void **rb_probe (struct rb_table *, void *);
+void *rb_insert (struct rb_table *, void *);
+void *rb_replace (struct rb_table *, void *);
+void *rb_delete (struct rb_table *, const void *);
+void *rb_find (const struct rb_table *, const void *);
+void rb_assert_insert (struct rb_table *, void *);
+void *rb_assert_delete (struct rb_table *, void *);
+
+#define rb_count(table) ((size_t) (table)->rb_count)
+
+/* Table traverser functions. */
+void rb_t_init (struct rb_traverser *, struct rb_table *);
+void *rb_t_first (struct rb_traverser *, struct rb_table *);
+void *rb_t_last (struct rb_traverser *, struct rb_table *);
+void *rb_t_find (struct rb_traverser *, struct rb_table *, void *);
+void *rb_t_insert (struct rb_traverser *, struct rb_table *, void *);
+void *rb_t_copy (struct rb_traverser *, const struct rb_traverser *);
+void *rb_t_next (struct rb_traverser *);
+void *rb_t_prev (struct rb_traverser *);
+void *rb_t_cur (struct rb_traverser *);
+void *rb_t_replace (struct rb_traverser *, void *);
+
+#endif /* rb.h */
diff --git a/contrib/stdlib/gf_mkostemp.c b/contrib/stdlib/gf_mkostemp.c
new file mode 100644
index 000000000..931249a45
--- /dev/null
+++ b/contrib/stdlib/gf_mkostemp.c
@@ -0,0 +1,107 @@
+/* Borrowed from glibc-2.16/sysdeps/posix/tempname.c */
+
+/* Copyright (C) 1991-2001, 2006, 2007, 2009 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/time.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <time.h>
+#include <inttypes.h>
+
+static const char letters[] =
+"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
+
+/* Generate a temporary file name based on TMPL. TMPL must match the
+ rules for mk[s]temp (i.e. end in "XXXXXX", possibly with a suffix).
+*/
+
+#if !defined(TMP_MAX)
+#define TMP_MAX 238328
+#endif
+
+int
+gf_mkostemp (char *tmpl, int suffixlen, int flags)
+{
+ int len;
+ char *XXXXXX;
+ static uint64_t value;
+ uint64_t random_time_bits;
+ unsigned int count;
+ int fd = -1;
+
+ /* A lower bound on the number of temporary files to attempt to
+ generate. The maximum total number of temporary file names that
+ can exist for a given template is 62**6. It should never be
+ necessary to try all these combinations. Instead if a reasonable
+ number of names is tried (we define reasonable as 62**3) fail to
+ give the system administrator the chance to remove the problems. */
+
+ unsigned int attempts = TMP_MAX; /* TMP_MAX == 62³ */
+
+ len = strlen (tmpl);
+ if (len < 6 + suffixlen || memcmp (&tmpl[len - 6 - suffixlen],
+ "XXXXXX", 6))
+ return -1;
+
+ /* This is where the Xs start. */
+ XXXXXX = &tmpl[len - 6 - suffixlen];
+
+ /* Get some more or less random data. */
+# if HAVE_GETTIMEOFDAY
+ struct timeval tv;
+ gettimeofday (&tv, NULL);
+ random_time_bits = ((uint64_t) tv.tv_usec << 16) ^ tv.tv_sec;
+# else
+ random_time_bits = time (NULL);
+# endif
+
+ value += random_time_bits ^ getpid ();
+
+ for (count = 0; count < attempts; value += 7777, ++count) {
+ uint64_t v = value;
+
+ /* Fill in the random bits. */
+ XXXXXX[0] = letters[v % 62];
+ v /= 62;
+ XXXXXX[1] = letters[v % 62];
+ v /= 62;
+ XXXXXX[2] = letters[v % 62];
+ v /= 62;
+ XXXXXX[3] = letters[v % 62];
+ v /= 62;
+ XXXXXX[4] = letters[v % 62];
+ v /= 62;
+ XXXXXX[5] = letters[v % 62];
+
+ fd = open (tmpl, (flags & ~O_ACCMODE)
+ | O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
+
+ if (fd >= 0)
+ return fd;
+ else if (errno != EEXIST)
+ return -1;
+ }
+
+ /* We got out of the loop because we ran out of combinations to try. */
+ return -1;
+}
diff --git a/contrib/uuid/clear.c b/contrib/uuid/clear.c
new file mode 100644
index 000000000..2d91fee93
--- /dev/null
+++ b/contrib/uuid/clear.c
@@ -0,0 +1,43 @@
+/*
+ * clear.c -- Clear a UUID
+ *
+ * Copyright (C) 1996, 1997 Theodore Ts'o.
+ *
+ * %Begin-Header%
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, and the entire permission notice in its entirety,
+ * including the disclaimer of warranties.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
+ * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ * %End-Header%
+ */
+
+#include "string.h"
+
+#include "uuidP.h"
+
+void uuid_clear(uuid_t uu)
+{
+ memset(uu, 0, 16);
+}
+
diff --git a/contrib/uuid/compare.c b/contrib/uuid/compare.c
new file mode 100644
index 000000000..f28a72678
--- /dev/null
+++ b/contrib/uuid/compare.c
@@ -0,0 +1,55 @@
+/*
+ * compare.c --- compare whether or not two UUID's are the same
+ *
+ * Returns 0 if the two UUID's are different, and 1 if they are the same.
+ *
+ * Copyright (C) 1996, 1997 Theodore Ts'o.
+ *
+ * %Begin-Header%
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, and the entire permission notice in its entirety,
+ * including the disclaimer of warranties.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
+ * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ * %End-Header%
+ */
+
+#include "uuidP.h"
+#include <string.h>
+
+#define UUCMP(u1,u2) if (u1 != u2) return((u1 < u2) ? -1 : 1);
+
+int uuid_compare(const uuid_t uu1, const uuid_t uu2)
+{
+ struct uuid uuid1, uuid2;
+
+ uuid_unpack(uu1, &uuid1);
+ uuid_unpack(uu2, &uuid2);
+
+ UUCMP(uuid1.time_low, uuid2.time_low);
+ UUCMP(uuid1.time_mid, uuid2.time_mid);
+ UUCMP(uuid1.time_hi_and_version, uuid2.time_hi_and_version);
+ UUCMP(uuid1.clock_seq, uuid2.clock_seq);
+ return memcmp(uuid1.node, uuid2.node, 6);
+}
+
diff --git a/contrib/uuid/copy.c b/contrib/uuid/copy.c
new file mode 100644
index 000000000..ead33aa26
--- /dev/null
+++ b/contrib/uuid/copy.c
@@ -0,0 +1,45 @@
+/*
+ * copy.c --- copy UUIDs
+ *
+ * Copyright (C) 1996, 1997 Theodore Ts'o.
+ *
+ * %Begin-Header%
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, and the entire permission notice in its entirety,
+ * including the disclaimer of warranties.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
+ * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ * %End-Header%
+ */
+
+#include "uuidP.h"
+
+void uuid_copy(uuid_t dst, const uuid_t src)
+{
+ unsigned char *cp1;
+ const unsigned char *cp2;
+ int i;
+
+ for (i=0, cp1 = dst, cp2 = src; i < 16; i++)
+ *cp1++ = *cp2++;
+}
diff --git a/contrib/uuid/gen_uuid.c b/contrib/uuid/gen_uuid.c
new file mode 100644
index 000000000..b3eda9de3
--- /dev/null
+++ b/contrib/uuid/gen_uuid.c
@@ -0,0 +1,679 @@
+/*
+ * gen_uuid.c --- generate a DCE-compatible uuid
+ *
+ * Copyright (C) 1996, 1997, 1998, 1999 Theodore Ts'o.
+ *
+ * %Begin-Header%
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, and the entire permission notice in its entirety,
+ * including the disclaimer of warranties.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
+ * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ * %End-Header%
+ */
+
+/*
+ * Force inclusion of SVID stuff since we need it if we're compiling in
+ * gcc-wall wall mode
+ */
+#define _SVID_SOURCE
+
+#include "config.h"
+#ifdef _WIN32
+#define _WIN32_WINNT 0x0500
+#include <windows.h>
+#define UUID MYUUID
+#endif
+#include <stdio.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#ifdef HAVE_STDLIB_H
+#include <stdlib.h>
+#endif
+#include <string.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <sys/types.h>
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+#include <sys/wait.h>
+#include <sys/stat.h>
+#ifdef HAVE_SYS_FILE_H
+#include <sys/file.h>
+#endif
+#ifdef HAVE_SYS_IOCTL_H
+#include <sys/ioctl.h>
+#endif
+#ifdef HAVE_SYS_SOCKET_H
+#include <sys/socket.h>
+#endif
+#ifdef HAVE_SYS_UN_H
+#include <sys/un.h>
+#endif
+#ifdef HAVE_SYS_SOCKIO_H
+#include <sys/sockio.h>
+#endif
+#ifdef HAVE_NET_IF_H
+#include <net/if.h>
+#endif
+#ifdef HAVE_NETINET_IN_H
+#include <netinet/in.h>
+#endif
+#ifdef HAVE_NET_IF_DL_H
+#include <net/if_dl.h>
+#endif
+#if defined(__linux__) && defined(HAVE_SYS_SYSCALL_H)
+#include <sys/syscall.h>
+#endif
+#ifdef HAVE_SYS_RESOURCE_H
+#include <sys/resource.h>
+#endif
+#include <limits.h>
+
+#include "uuidP.h"
+#include "uuidd.h"
+
+#ifdef HAVE_SRANDOM
+#define srand(x) srandom(x)
+#define rand() random()
+#endif
+
+#ifdef TLS
+#define THREAD_LOCAL static TLS
+#else
+#define THREAD_LOCAL static
+#endif
+
+#if defined(__linux__) && defined(__NR_gettid) && defined(HAVE_JRAND48)
+#define DO_JRAND_MIX
+THREAD_LOCAL unsigned short jrand_seed[3];
+#endif
+
+#ifndef OPEN_MAX
+#define OPEN_MAX 1024
+#endif
+
+#ifdef _WIN32
+static void gettimeofday (struct timeval *tv, void *dummy)
+{
+ FILETIME ftime;
+ uint64_t n;
+
+ GetSystemTimeAsFileTime (&ftime);
+ n = (((uint64_t) ftime.dwHighDateTime << 32)
+ + (uint64_t) ftime.dwLowDateTime);
+ if (n) {
+ n /= 10;
+ n -= ((369 * 365 + 89) * (uint64_t) 86400) * 1000000;
+ }
+
+ tv->tv_sec = n / 1000000;
+ tv->tv_usec = n % 1000000;
+}
+
+static int getuid (void)
+{
+ return 1;
+}
+#endif
+
+static int get_random_fd(void)
+{
+ struct timeval tv;
+ static int fd = -2;
+ int i;
+
+ if (fd == -2) {
+ gettimeofday(&tv, 0);
+#ifndef _WIN32
+ fd = open("/dev/urandom", O_RDONLY);
+ if (fd == -1)
+ fd = open("/dev/random", O_RDONLY | O_NONBLOCK);
+ if (fd >= 0) {
+ i = fcntl(fd, F_GETFD);
+ if (i >= 0)
+ fcntl(fd, F_SETFD, i | FD_CLOEXEC);
+ }
+#endif
+ srand((getpid() << 16) ^ getuid() ^ tv.tv_sec ^ tv.tv_usec);
+#ifdef DO_JRAND_MIX
+ jrand_seed[0] = getpid() ^ (tv.tv_sec & 0xFFFF);
+ jrand_seed[1] = getppid() ^ (tv.tv_usec & 0xFFFF);
+ jrand_seed[2] = (tv.tv_sec ^ tv.tv_usec) >> 16;
+#endif
+ }
+ /* Crank the random number generator a few times */
+ gettimeofday(&tv, 0);
+ for (i = (tv.tv_sec ^ tv.tv_usec) & 0x1F; i > 0; i--)
+ rand();
+ return fd;
+}
+
+
+/*
+ * Generate a series of random bytes. Use /dev/urandom if possible,
+ * and if not, use srandom/random.
+ */
+static void get_random_bytes(void *buf, int nbytes)
+{
+ int i, n = nbytes, fd = get_random_fd();
+ int lose_counter = 0;
+ unsigned char *cp = (unsigned char *) buf;
+#ifdef DO_JRAND_MIX
+ unsigned short tmp_seed[3];
+#endif
+ if (fd >= 0) {
+ while (n > 0) {
+ i = read(fd, cp, n);
+ if (i <= 0) {
+ if (lose_counter++ > 16)
+ break;
+ continue;
+ }
+ n -= i;
+ cp += i;
+ lose_counter = 0;
+ }
+ }
+
+ /*
+ * We do this all the time, but this is the only source of
+ * randomness if /dev/random/urandom is out to lunch.
+ */
+ for (cp = buf, i = 0; i < nbytes; i++)
+ *cp++ ^= (rand() >> 7) & 0xFF;
+#ifdef DO_JRAND_MIX
+ memcpy(tmp_seed, jrand_seed, sizeof(tmp_seed));
+ jrand_seed[2] = jrand_seed[2] ^ syscall(__NR_gettid);
+ for (cp = buf, i = 0; i < nbytes; i++)
+ *cp++ ^= (jrand48(tmp_seed) >> 7) & 0xFF;
+ memcpy(jrand_seed, tmp_seed,
+ sizeof(jrand_seed)-sizeof(unsigned short));
+#endif
+
+ return;
+}
+
+/*
+ * Get the ethernet hardware address, if we can find it...
+ *
+ * XXX for a windows version, probably should use GetAdaptersInfo:
+ * http://www.codeguru.com/cpp/i-n/network/networkinformation/article.php/c5451
+ * commenting out get_node_id just to get gen_uuid to compile under windows
+ * is not the right way to go!
+ */
+static int get_node_id(unsigned char *node_id)
+{
+#ifdef HAVE_NET_IF_H
+ int sd;
+ struct ifreq ifr, *ifrp;
+ struct ifconf ifc;
+ char buf[1024];
+ int n, i;
+ unsigned char *a;
+#ifdef HAVE_NET_IF_DL_H
+ struct sockaddr_dl *sdlp;
+#endif
+
+/*
+ * BSD 4.4 defines the size of an ifreq to be
+ * max(sizeof(ifreq), sizeof(ifreq.ifr_name)+ifreq.ifr_addr.sa_len
+ * However, under earlier systems, sa_len isn't present, so the size is
+ * just sizeof(struct ifreq)
+ */
+#ifdef HAVE_SA_LEN
+#ifndef max
+#define max(a,b) ((a) > (b) ? (a) : (b))
+#endif
+#define ifreq_size(i) max(sizeof(struct ifreq),\
+ sizeof((i).ifr_name)+(i).ifr_addr.sa_len)
+#else
+#define ifreq_size(i) sizeof(struct ifreq)
+#endif /* HAVE_SA_LEN*/
+
+ sd = socket(AF_INET, SOCK_DGRAM, IPPROTO_IP);
+ if (sd < 0) {
+ return -1;
+ }
+ memset(buf, 0, sizeof(buf));
+ ifc.ifc_len = sizeof(buf);
+ ifc.ifc_buf = buf;
+ if (ioctl (sd, SIOCGIFCONF, (char *)&ifc) < 0) {
+ close(sd);
+ return -1;
+ }
+ n = ifc.ifc_len;
+ for (i = 0; i < n; i+= ifreq_size(*ifrp) ) {
+ ifrp = (struct ifreq *)((char *) ifc.ifc_buf+i);
+ strncpy(ifr.ifr_name, ifrp->ifr_name, IFNAMSIZ);
+#ifdef SIOCGIFHWADDR
+ if (ioctl(sd, SIOCGIFHWADDR, &ifr) < 0)
+ continue;
+ a = (unsigned char *) &ifr.ifr_hwaddr.sa_data;
+#else
+#ifdef SIOCGENADDR
+ if (ioctl(sd, SIOCGENADDR, &ifr) < 0)
+ continue;
+ a = (unsigned char *) ifr.ifr_enaddr;
+#else
+#ifdef HAVE_NET_IF_DL_H
+ sdlp = (struct sockaddr_dl *) &ifrp->ifr_addr;
+ if ((sdlp->sdl_family != AF_LINK) || (sdlp->sdl_alen != 6))
+ continue;
+ a = (unsigned char *) &sdlp->sdl_data[sdlp->sdl_nlen];
+#else
+ /*
+ * XXX we don't have a way of getting the hardware
+ * address
+ */
+ close(sd);
+ return 0;
+#endif /* HAVE_NET_IF_DL_H */
+#endif /* SIOCGENADDR */
+#endif /* SIOCGIFHWADDR */
+ if (!a[0] && !a[1] && !a[2] && !a[3] && !a[4] && !a[5])
+ continue;
+ if (node_id) {
+ memcpy(node_id, a, 6);
+ close(sd);
+ return 1;
+ }
+ }
+ close(sd);
+#endif
+ return 0;
+}
+
+/* Assume that the gettimeofday() has microsecond granularity */
+#define MAX_ADJUSTMENT 10
+
+static int get_clock(uint32_t *clock_high, uint32_t *clock_low,
+ uint16_t *ret_clock_seq, int *num)
+{
+ THREAD_LOCAL int adjustment = 0;
+ THREAD_LOCAL struct timeval last = {0, 0};
+ THREAD_LOCAL int state_fd = -2;
+ THREAD_LOCAL FILE *state_f;
+ THREAD_LOCAL uint16_t clock_seq;
+ struct timeval tv;
+ struct flock fl;
+ uint64_t clock_reg;
+ mode_t save_umask;
+ int len;
+
+ if (state_fd == -2) {
+ save_umask = umask(0);
+ state_fd = open("/var/lib/libuuid/clock.txt",
+ O_RDWR|O_CREAT, 0660);
+ (void) umask(save_umask);
+ state_f = fdopen(state_fd, "r+");
+ if (!state_f) {
+ close(state_fd);
+ state_fd = -1;
+ }
+ }
+ fl.l_type = F_WRLCK;
+ fl.l_whence = SEEK_SET;
+ fl.l_start = 0;
+ fl.l_len = 0;
+ fl.l_pid = 0;
+ if (state_fd >= 0) {
+ rewind(state_f);
+ while (fcntl(state_fd, F_SETLKW, &fl) < 0) {
+ if ((errno == EAGAIN) || (errno == EINTR))
+ continue;
+ fclose(state_f);
+ close(state_fd);
+ state_fd = -1;
+ break;
+ }
+ }
+ if (state_fd >= 0) {
+ unsigned int cl;
+ unsigned long tv1, tv2;
+ int a;
+
+ if (fscanf(state_f, "clock: %04x tv: %lu %lu adj: %d\n",
+ &cl, &tv1, &tv2, &a) == 4) {
+ clock_seq = cl & 0x3FFF;
+ last.tv_sec = tv1;
+ last.tv_usec = tv2;
+ adjustment = a;
+ }
+ }
+
+ if ((last.tv_sec == 0) && (last.tv_usec == 0)) {
+ get_random_bytes(&clock_seq, sizeof(clock_seq));
+ clock_seq &= 0x3FFF;
+ gettimeofday(&last, 0);
+ last.tv_sec--;
+ }
+
+try_again:
+ gettimeofday(&tv, 0);
+ if ((tv.tv_sec < last.tv_sec) ||
+ ((tv.tv_sec == last.tv_sec) &&
+ (tv.tv_usec < last.tv_usec))) {
+ clock_seq = (clock_seq+1) & 0x3FFF;
+ adjustment = 0;
+ last = tv;
+ } else if ((tv.tv_sec == last.tv_sec) &&
+ (tv.tv_usec == last.tv_usec)) {
+ if (adjustment >= MAX_ADJUSTMENT)
+ goto try_again;
+ adjustment++;
+ } else {
+ adjustment = 0;
+ last = tv;
+ }
+
+ clock_reg = tv.tv_usec*10 + adjustment;
+ clock_reg += ((uint64_t) tv.tv_sec)*10000000;
+ clock_reg += (((uint64_t) 0x01B21DD2) << 32) + 0x13814000;
+
+ if (num && (*num > 1)) {
+ adjustment += *num - 1;
+ last.tv_usec += adjustment / 10;
+ adjustment = adjustment % 10;
+ last.tv_sec += last.tv_usec / 1000000;
+ last.tv_usec = last.tv_usec % 1000000;
+ }
+
+ if (state_fd > 0) {
+ rewind(state_f);
+ len = fprintf(state_f,
+ "clock: %04x tv: %016lu %08lu adj: %08d\n",
+ clock_seq, last.tv_sec, last.tv_usec, adjustment);
+ fflush(state_f);
+ if (ftruncate(state_fd, len) < 0) {
+ fprintf(state_f, " \n");
+ fflush(state_f);
+ }
+ rewind(state_f);
+ fl.l_type = F_UNLCK;
+ fcntl(state_fd, F_SETLK, &fl);
+ }
+
+ *clock_high = clock_reg >> 32;
+ *clock_low = clock_reg;
+ *ret_clock_seq = clock_seq;
+ return 0;
+}
+
+#if defined(USE_UUIDD) && defined(HAVE_SYS_UN_H)
+static ssize_t read_all(int fd, char *buf, size_t count)
+{
+ ssize_t ret;
+ ssize_t c = 0;
+ int tries = 0;
+
+ memset(buf, 0, count);
+ while (count > 0) {
+ ret = read(fd, buf, count);
+ if (ret <= 0) {
+ if ((errno == EAGAIN || errno == EINTR || ret == 0) &&
+ (tries++ < 5))
+ continue;
+ return c ? c : -1;
+ }
+ if (ret > 0)
+ tries = 0;
+ count -= ret;
+ buf += ret;
+ c += ret;
+ }
+ return c;
+}
+#endif
+
+/*
+ * Close all file descriptors
+ */
+#if defined(USE_UUIDD) && defined(HAVE_SYS_UN_H)
+static void close_all_fds(void)
+{
+ int i, max;
+
+#if defined(HAVE_SYSCONF) && defined(_SC_OPEN_MAX)
+ max = sysconf(_SC_OPEN_MAX);
+#elif defined(HAVE_GETDTABLESIZE)
+ max = getdtablesize();
+#elif defined(HAVE_GETRLIMIT) && defined(RLIMIT_NOFILE)
+ struct rlimit rl;
+
+ getrlimit(RLIMIT_NOFILE, &rl);
+ max = rl.rlim_cur;
+#else
+ max = OPEN_MAX;
+#endif
+
+ for (i=0; i < max; i++) {
+ close(i);
+ if (i <= 2)
+ open("/dev/null", O_RDWR);
+ }
+}
+#endif
+
+
+/*
+ * Try using the uuidd daemon to generate the UUID
+ *
+ * Returns 0 on success, non-zero on failure.
+ */
+static int get_uuid_via_daemon(int op, uuid_t out, int *num)
+{
+#if defined(USE_UUIDD) && defined(HAVE_SYS_UN_H)
+ char op_buf[64];
+ int op_len;
+ int s;
+ ssize_t ret;
+ int32_t reply_len = 0, expected = 16;
+ struct sockaddr_un srv_addr;
+ struct stat st;
+ pid_t pid;
+ static const char *uuidd_path = UUIDD_PATH;
+ static int access_ret = -2;
+ static int start_attempts = 0;
+
+ if ((s = socket(AF_UNIX, SOCK_STREAM, 0)) < 0)
+ return -1;
+
+ srv_addr.sun_family = AF_UNIX;
+ strcpy(srv_addr.sun_path, UUIDD_SOCKET_PATH);
+
+ if (connect(s, (const struct sockaddr *) &srv_addr,
+ sizeof(struct sockaddr_un)) < 0) {
+ if (access_ret == -2)
+ access_ret = access(uuidd_path, X_OK);
+ if (access_ret == 0)
+ access_ret = stat(uuidd_path, &st);
+ if (access_ret == 0 && (st.st_mode & (S_ISUID | S_ISGID)) == 0)
+ access_ret = access(UUIDD_DIR, W_OK);
+ if (access_ret == 0 && start_attempts++ < 5) {
+ if ((pid = fork()) == 0) {
+ close_all_fds();
+ execl(uuidd_path, "uuidd", "-qT", "300",
+ (char *) NULL);
+ exit(1);
+ }
+ (void) waitpid(pid, 0, 0);
+ if (connect(s, (const struct sockaddr *) &srv_addr,
+ sizeof(struct sockaddr_un)) < 0)
+ goto fail;
+ } else
+ goto fail;
+ }
+ op_buf[0] = op;
+ op_len = 1;
+ if (op == UUIDD_OP_BULK_TIME_UUID) {
+ memcpy(op_buf+1, num, sizeof(*num));
+ op_len += sizeof(*num);
+ expected += sizeof(*num);
+ }
+
+ ret = write(s, op_buf, op_len);
+ if (ret < 1)
+ goto fail;
+
+ ret = read_all(s, (char *) &reply_len, sizeof(reply_len));
+ if (ret < 0)
+ goto fail;
+
+ if (reply_len != expected)
+ goto fail;
+
+ ret = read_all(s, op_buf, reply_len);
+
+ if (op == UUIDD_OP_BULK_TIME_UUID)
+ memcpy(op_buf+16, num, sizeof(int));
+
+ memcpy(out, op_buf, 16);
+
+ close(s);
+ return ((ret == expected) ? 0 : -1);
+
+fail:
+ close(s);
+#endif
+ return -1;
+}
+
+void uuid__generate_time(uuid_t out, int *num)
+{
+ static unsigned char node_id[6];
+ static int has_init = 0;
+ struct uuid uu;
+ uint32_t clock_mid;
+
+ if (!has_init) {
+ if (get_node_id(node_id) <= 0) {
+ get_random_bytes(node_id, 6);
+ /*
+ * Set multicast bit, to prevent conflicts
+ * with IEEE 802 addresses obtained from
+ * network cards
+ */
+ node_id[0] |= 0x01;
+ }
+ has_init = 1;
+ }
+ get_clock(&clock_mid, &uu.time_low, &uu.clock_seq, num);
+ uu.clock_seq |= 0x8000;
+ uu.time_mid = (uint16_t) clock_mid;
+ uu.time_hi_and_version = ((clock_mid >> 16) & 0x0FFF) | 0x1000;
+ memcpy(uu.node, node_id, 6);
+ uuid_pack(&uu, out);
+}
+
+void uuid_generate_time(uuid_t out)
+{
+#ifdef TLS
+ THREAD_LOCAL int num = 0;
+ THREAD_LOCAL struct uuid uu;
+ THREAD_LOCAL time_t last_time = 0;
+ time_t now;
+
+ if (num > 0) {
+ now = time(0);
+ if (now > last_time+1)
+ num = 0;
+ }
+ if (num <= 0) {
+ num = 1000;
+ if (get_uuid_via_daemon(UUIDD_OP_BULK_TIME_UUID,
+ out, &num) == 0) {
+ last_time = time(0);
+ uuid_unpack(out, &uu);
+ num--;
+ return;
+ }
+ num = 0;
+ }
+ if (num > 0) {
+ uu.time_low++;
+ if (uu.time_low == 0) {
+ uu.time_mid++;
+ if (uu.time_mid == 0)
+ uu.time_hi_and_version++;
+ }
+ num--;
+ uuid_pack(&uu, out);
+ return;
+ }
+#else
+ if (get_uuid_via_daemon(UUIDD_OP_TIME_UUID, out, 0) == 0)
+ return;
+#endif
+
+ uuid__generate_time(out, 0);
+}
+
+
+void uuid__generate_random(uuid_t out, int *num)
+{
+ uuid_t buf;
+ struct uuid uu;
+ int i, n;
+
+ if (!num || !*num)
+ n = 1;
+ else
+ n = *num;
+
+ for (i = 0; i < n; i++) {
+ get_random_bytes(buf, sizeof(buf));
+ uuid_unpack(buf, &uu);
+
+ uu.clock_seq = (uu.clock_seq & 0x3FFF) | 0x8000;
+ uu.time_hi_and_version = (uu.time_hi_and_version & 0x0FFF)
+ | 0x4000;
+ uuid_pack(&uu, out);
+ out += sizeof(uuid_t);
+ }
+}
+
+void uuid_generate_random(uuid_t out)
+{
+ int num = 1;
+ /* No real reason to use the daemon for random uuid's -- yet */
+
+ uuid__generate_random(out, &num);
+}
+
+
+/*
+ * This is the generic front-end to uuid_generate_random and
+ * uuid_generate_time. It uses uuid_generate_random only if
+ * /dev/urandom is available, since otherwise we won't have
+ * high-quality randomness.
+ */
+void uuid_generate(uuid_t out)
+{
+ if (get_random_fd() >= 0)
+ uuid_generate_random(out);
+ else
+ uuid_generate_time(out);
+}
diff --git a/contrib/uuid/gen_uuid_nt.c b/contrib/uuid/gen_uuid_nt.c
new file mode 100644
index 000000000..aa44bfd3d
--- /dev/null
+++ b/contrib/uuid/gen_uuid_nt.c
@@ -0,0 +1,92 @@
+/*
+ * gen_uuid_nt.c -- Use NT api to generate uuid
+ *
+ * Written by Andrey Shedel (andreys@ns.cr.cyco.com)
+ */
+
+
+#include "uuidP.h"
+
+#pragma warning(push,4)
+
+#pragma comment(lib, "ntdll.lib")
+
+//
+// Here is a nice example why it's not a good idea
+// to use native API in ordinary applications.
+// Number of parameters in function below was changed from 3 to 4
+// for NT5.
+//
+//
+// NTSYSAPI
+// NTSTATUS
+// NTAPI
+// NtAllocateUuids(
+// OUT PULONG p1,
+// OUT PULONG p2,
+// OUT PULONG p3,
+// OUT PUCHAR Seed // 6 bytes
+// );
+//
+//
+
+unsigned long
+__stdcall
+NtAllocateUuids(
+ void* p1, // 8 bytes
+ void* p2, // 4 bytes
+ void* p3 // 4 bytes
+ );
+
+typedef
+unsigned long
+(__stdcall*
+NtAllocateUuids_2000)(
+ void* p1, // 8 bytes
+ void* p2, // 4 bytes
+ void* p3, // 4 bytes
+ void* seed // 6 bytes
+ );
+
+
+
+//
+// Nice, but instead of including ntddk.h ot winnt.h
+// I should define it here because they MISSED __stdcall in those headers.
+//
+
+__declspec(dllimport)
+struct _TEB*
+__stdcall
+NtCurrentTeb(void);
+
+
+//
+// The only way to get version information from the system is to examine
+// one stored in PEB. But it's pretty dangerouse because this value could
+// be altered in image header.
+//
+
+static
+int
+Nt5(void)
+{
+ //return NtCuttentTeb()->Peb->OSMajorVersion >= 5;
+ return (int)*(int*)((char*)(int)(*(int*)((char*)NtCurrentTeb() + 0x30)) + 0xA4) >= 5;
+}
+
+
+
+
+void uuid_generate(uuid_t out)
+{
+ if(Nt5())
+ {
+ unsigned char seed[6];
+ ((NtAllocateUuids_2000)NtAllocateUuids)(out, ((char*)out)+8, ((char*)out)+12, &seed[0] );
+ }
+ else
+ {
+ NtAllocateUuids(out, ((char*)out)+8, ((char*)out)+12);
+ }
+}
diff --git a/contrib/uuid/isnull.c b/contrib/uuid/isnull.c
new file mode 100644
index 000000000..931e7e7db
--- /dev/null
+++ b/contrib/uuid/isnull.c
@@ -0,0 +1,48 @@
+/*
+ * isnull.c --- Check whether or not the UUID is null
+ *
+ * Copyright (C) 1996, 1997 Theodore Ts'o.
+ *
+ * %Begin-Header%
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, and the entire permission notice in its entirety,
+ * including the disclaimer of warranties.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
+ * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ * %End-Header%
+ */
+
+#include "uuidP.h"
+
+/* Returns 1 if the uuid is the NULL uuid */
+int uuid_is_null(const uuid_t uu)
+{
+ const unsigned char *cp;
+ int i;
+
+ for (i=0, cp = uu; i < 16; i++)
+ if (*cp++)
+ return 0;
+ return 1;
+}
+
diff --git a/contrib/uuid/pack.c b/contrib/uuid/pack.c
new file mode 100644
index 000000000..097516d2e
--- /dev/null
+++ b/contrib/uuid/pack.c
@@ -0,0 +1,69 @@
+/*
+ * Internal routine for packing UUID's
+ *
+ * Copyright (C) 1996, 1997 Theodore Ts'o.
+ *
+ * %Begin-Header%
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, and the entire permission notice in its entirety,
+ * including the disclaimer of warranties.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
+ * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ * %End-Header%
+ */
+
+#include <string.h>
+#include "uuidP.h"
+
+void uuid_pack(const struct uuid *uu, uuid_t ptr)
+{
+ uint32_t tmp;
+ unsigned char *out = ptr;
+
+ tmp = uu->time_low;
+ out[3] = (unsigned char) tmp;
+ tmp >>= 8;
+ out[2] = (unsigned char) tmp;
+ tmp >>= 8;
+ out[1] = (unsigned char) tmp;
+ tmp >>= 8;
+ out[0] = (unsigned char) tmp;
+
+ tmp = uu->time_mid;
+ out[5] = (unsigned char) tmp;
+ tmp >>= 8;
+ out[4] = (unsigned char) tmp;
+
+ tmp = uu->time_hi_and_version;
+ out[7] = (unsigned char) tmp;
+ tmp >>= 8;
+ out[6] = (unsigned char) tmp;
+
+ tmp = uu->clock_seq;
+ out[9] = (unsigned char) tmp;
+ tmp >>= 8;
+ out[8] = (unsigned char) tmp;
+
+ memcpy(out+10, uu->node, 6);
+}
+
diff --git a/contrib/uuid/parse.c b/contrib/uuid/parse.c
new file mode 100644
index 000000000..074383efa
--- /dev/null
+++ b/contrib/uuid/parse.c
@@ -0,0 +1,79 @@
+/*
+ * parse.c --- UUID parsing
+ *
+ * Copyright (C) 1996, 1997 Theodore Ts'o.
+ *
+ * %Begin-Header%
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, and the entire permission notice in its entirety,
+ * including the disclaimer of warranties.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
+ * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ * %End-Header%
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <string.h>
+
+#include "uuidP.h"
+
+int uuid_parse(const char *in, uuid_t uu)
+{
+ struct uuid uuid;
+ int i;
+ const char *cp;
+ char buf[3];
+
+ if (strlen(in) != 36)
+ return -1;
+ for (i=0, cp = in; i <= 36; i++,cp++) {
+ if ((i == 8) || (i == 13) || (i == 18) ||
+ (i == 23)) {
+ if (*cp == '-')
+ continue;
+ else
+ return -1;
+ }
+ if (i== 36)
+ if (*cp == 0)
+ continue;
+ if (!isxdigit(*cp))
+ return -1;
+ }
+ uuid.time_low = strtoul(in, NULL, 16);
+ uuid.time_mid = strtoul(in+9, NULL, 16);
+ uuid.time_hi_and_version = strtoul(in+14, NULL, 16);
+ uuid.clock_seq = strtoul(in+19, NULL, 16);
+ cp = in+24;
+ buf[2] = 0;
+ for (i=0; i < 6; i++) {
+ buf[0] = *cp++;
+ buf[1] = *cp++;
+ uuid.node[i] = strtoul(buf, NULL, 16);
+ }
+
+ uuid_pack(&uuid, uu);
+ return 0;
+}
diff --git a/contrib/uuid/tst_uuid.c b/contrib/uuid/tst_uuid.c
new file mode 100644
index 000000000..e03138f7d
--- /dev/null
+++ b/contrib/uuid/tst_uuid.c
@@ -0,0 +1,180 @@
+/*
+ * tst_uuid.c --- test program from the UUID library
+ *
+ * Copyright (C) 1996, 1997, 1998 Theodore Ts'o.
+ *
+ * %Begin-Header%
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, and the entire permission notice in its entirety,
+ * including the disclaimer of warranties.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
+ * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ * %End-Header%
+ */
+
+#ifdef _WIN32
+#define _WIN32_WINNT 0x0500
+#include <windows.h>
+#define UUID MYUUID
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "uuid.h"
+
+static int test_uuid(const char * uuid, int isValid)
+{
+ static const char * validStr[2] = {"invalid", "valid"};
+ uuid_t uuidBits;
+ int parsedOk;
+
+ parsedOk = uuid_parse(uuid, uuidBits) == 0;
+
+ printf("%s is %s", uuid, validStr[isValid]);
+ if (parsedOk != isValid) {
+ printf(" but uuid_parse says %s\n", validStr[parsedOk]);
+ return 1;
+ }
+ printf(", OK\n");
+ return 0;
+}
+
+#ifdef __GNUC__
+#define ATTR(x) __attribute__(x)
+#else
+#define ATTR(x)
+#endif
+
+int
+main(int argc ATTR((unused)) , char **argv ATTR((unused)))
+{
+ uuid_t buf, tst;
+ char str[100];
+ struct timeval tv;
+ time_t time_reg;
+ unsigned char *cp;
+ int i;
+ int failed = 0;
+ int type, variant;
+
+ uuid_generate(buf);
+ uuid_unparse(buf, str);
+ printf("UUID generate = %s\n", str);
+ printf("UUID: ");
+ for (i=0, cp = (unsigned char *) &buf; i < 16; i++) {
+ printf("%02x", *cp++);
+ }
+ printf("\n");
+ type = uuid_type(buf); variant = uuid_variant(buf);
+ printf("UUID type = %d, UUID variant = %d\n", type, variant);
+ if (variant != UUID_VARIANT_DCE) {
+ printf("Incorrect UUID Variant; was expecting DCE!\n");
+ failed++;
+ }
+ printf("\n");
+
+ uuid_generate_random(buf);
+ uuid_unparse(buf, str);
+ printf("UUID random string = %s\n", str);
+ printf("UUID: ");
+ for (i=0, cp = (unsigned char *) &buf; i < 16; i++) {
+ printf("%02x", *cp++);
+ }
+ printf("\n");
+ type = uuid_type(buf); variant = uuid_variant(buf);
+ printf("UUID type = %d, UUID variant = %d\n", type, variant);
+ if (variant != UUID_VARIANT_DCE) {
+ printf("Incorrect UUID Variant; was expecting DCE!\n");
+ failed++;
+ }
+ if (type != 4) {
+ printf("Incorrect UUID type; was expecting "
+ "4 (random type)!\n");
+ failed++;
+ }
+ printf("\n");
+
+ uuid_generate_time(buf);
+ uuid_unparse(buf, str);
+ printf("UUID string = %s\n", str);
+ printf("UUID time: ");
+ for (i=0, cp = (unsigned char *) &buf; i < 16; i++) {
+ printf("%02x", *cp++);
+ }
+ printf("\n");
+ type = uuid_type(buf); variant = uuid_variant(buf);
+ printf("UUID type = %d, UUID variant = %d\n", type, variant);
+ if (variant != UUID_VARIANT_DCE) {
+ printf("Incorrect UUID Variant; was expecting DCE!\n");
+ failed++;
+ }
+ if (type != 1) {
+ printf("Incorrect UUID type; was expecting "
+ "1 (time-based type)!\\n");
+ failed++;
+ }
+ tv.tv_sec = 0;
+ tv.tv_usec = 0;
+ time_reg = uuid_time(buf, &tv);
+ printf("UUID time is: (%ld, %ld): %s\n", tv.tv_sec, tv.tv_usec,
+ ctime(&time_reg));
+ uuid_parse(str, tst);
+ if (!uuid_compare(buf, tst))
+ printf("UUID parse and compare succeeded.\n");
+ else {
+ printf("UUID parse and compare failed!\n");
+ failed++;
+ }
+ uuid_clear(tst);
+ if (uuid_is_null(tst))
+ printf("UUID clear and is null succeeded.\n");
+ else {
+ printf("UUID clear and is null failed!\n");
+ failed++;
+ }
+ uuid_copy(buf, tst);
+ if (!uuid_compare(buf, tst))
+ printf("UUID copy and compare succeeded.\n");
+ else {
+ printf("UUID copy and compare failed!\n");
+ failed++;
+ }
+ failed += test_uuid("84949cc5-4701-4a84-895b-354c584a981b", 1);
+ failed += test_uuid("84949CC5-4701-4A84-895B-354C584A981B", 1);
+ failed += test_uuid("84949cc5-4701-4a84-895b-354c584a981bc", 0);
+ failed += test_uuid("84949cc5-4701-4a84-895b-354c584a981", 0);
+ failed += test_uuid("84949cc5x4701-4a84-895b-354c584a981b", 0);
+ failed += test_uuid("84949cc504701-4a84-895b-354c584a981b", 0);
+ failed += test_uuid("84949cc5-470104a84-895b-354c584a981b", 0);
+ failed += test_uuid("84949cc5-4701-4a840895b-354c584a981b", 0);
+ failed += test_uuid("84949cc5-4701-4a84-895b0354c584a981b", 0);
+ failed += test_uuid("g4949cc5-4701-4a84-895b-354c584a981b", 0);
+ failed += test_uuid("84949cc5-4701-4a84-895b-354c584a981g", 0);
+
+ if (failed) {
+ printf("%d failures.\n", failed);
+ exit(1);
+ }
+ return 0;
+}
diff --git a/contrib/uuid/unpack.c b/contrib/uuid/unpack.c
new file mode 100644
index 000000000..beaaff3ca
--- /dev/null
+++ b/contrib/uuid/unpack.c
@@ -0,0 +1,63 @@
+/*
+ * Internal routine for unpacking UUID
+ *
+ * Copyright (C) 1996, 1997 Theodore Ts'o.
+ *
+ * %Begin-Header%
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, and the entire permission notice in its entirety,
+ * including the disclaimer of warranties.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
+ * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ * %End-Header%
+ */
+
+#include <string.h>
+#include "uuidP.h"
+
+void uuid_unpack(const uuid_t in, struct uuid *uu)
+{
+ const uint8_t *ptr = in;
+ uint32_t tmp;
+
+ tmp = *ptr++;
+ tmp = (tmp << 8) | *ptr++;
+ tmp = (tmp << 8) | *ptr++;
+ tmp = (tmp << 8) | *ptr++;
+ uu->time_low = tmp;
+
+ tmp = *ptr++;
+ tmp = (tmp << 8) | *ptr++;
+ uu->time_mid = tmp;
+
+ tmp = *ptr++;
+ tmp = (tmp << 8) | *ptr++;
+ uu->time_hi_and_version = tmp;
+
+ tmp = *ptr++;
+ tmp = (tmp << 8) | *ptr++;
+ uu->clock_seq = tmp;
+
+ memcpy(uu->node, ptr, 6);
+}
+
diff --git a/contrib/uuid/unparse.c b/contrib/uuid/unparse.c
new file mode 100644
index 000000000..a95bbb042
--- /dev/null
+++ b/contrib/uuid/unparse.c
@@ -0,0 +1,76 @@
+/*
+ * unparse.c -- convert a UUID to string
+ *
+ * Copyright (C) 1996, 1997 Theodore Ts'o.
+ *
+ * %Begin-Header%
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, and the entire permission notice in its entirety,
+ * including the disclaimer of warranties.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
+ * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ * %End-Header%
+ */
+
+#include <stdio.h>
+
+#include "uuidP.h"
+
+static const char *fmt_lower =
+ "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x";
+
+static const char *fmt_upper =
+ "%08X-%04X-%04X-%02X%02X-%02X%02X%02X%02X%02X%02X";
+
+#ifdef UUID_UNPARSE_DEFAULT_UPPER
+#define FMT_DEFAULT fmt_upper
+#else
+#define FMT_DEFAULT fmt_lower
+#endif
+
+static void uuid_unparse_x(const uuid_t uu, char *out, const char *fmt)
+{
+ struct uuid uuid;
+
+ uuid_unpack(uu, &uuid);
+ sprintf(out, fmt,
+ uuid.time_low, uuid.time_mid, uuid.time_hi_and_version,
+ uuid.clock_seq >> 8, uuid.clock_seq & 0xFF,
+ uuid.node[0], uuid.node[1], uuid.node[2],
+ uuid.node[3], uuid.node[4], uuid.node[5]);
+}
+
+void uuid_unparse_lower(const uuid_t uu, char *out)
+{
+ uuid_unparse_x(uu, out, fmt_lower);
+}
+
+void uuid_unparse_upper(const uuid_t uu, char *out)
+{
+ uuid_unparse_x(uu, out, fmt_upper);
+}
+
+void uuid_unparse(const uuid_t uu, char *out)
+{
+ uuid_unparse_x(uu, out, FMT_DEFAULT);
+}
diff --git a/contrib/uuid/uuid.h b/contrib/uuid/uuid.h
new file mode 100644
index 000000000..ab006652f
--- /dev/null
+++ b/contrib/uuid/uuid.h
@@ -0,0 +1,104 @@
+/*
+ * Public include file for the UUID library
+ *
+ * Copyright (C) 1996, 1997, 1998 Theodore Ts'o.
+ *
+ * %Begin-Header%
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, and the entire permission notice in its entirety,
+ * including the disclaimer of warranties.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
+ * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ * %End-Header%
+ */
+
+#ifndef _UUID_UUID_H
+#define _UUID_UUID_H
+
+#include "config.h"
+#include <sys/types.h>
+#ifndef _WIN32
+#include <sys/time.h>
+#endif
+#include <time.h>
+
+typedef unsigned char uuid_t[16];
+
+/* UUID Variant definitions */
+#define UUID_VARIANT_NCS 0
+#define UUID_VARIANT_DCE 1
+#define UUID_VARIANT_MICROSOFT 2
+#define UUID_VARIANT_OTHER 3
+
+/* UUID Type definitions */
+#define UUID_TYPE_DCE_TIME 1
+#define UUID_TYPE_DCE_RANDOM 4
+
+/* Allow UUID constants to be defined */
+#ifdef __GNUC__
+#define UUID_DEFINE(name,u0,u1,u2,u3,u4,u5,u6,u7,u8,u9,u10,u11,u12,u13,u14,u15) \
+ static const uuid_t name __attribute__ ((unused)) = {u0,u1,u2,u3,u4,u5,u6,u7,u8,u9,u10,u11,u12,u13,u14,u15}
+#else
+#define UUID_DEFINE(name,u0,u1,u2,u3,u4,u5,u6,u7,u8,u9,u10,u11,u12,u13,u14,u15) \
+ static const uuid_t name = {u0,u1,u2,u3,u4,u5,u6,u7,u8,u9,u10,u11,u12,u13,u14,u15}
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* clear.c */
+void uuid_clear(uuid_t uu);
+
+/* compare.c */
+int uuid_compare(const uuid_t uu1, const uuid_t uu2);
+
+/* copy.c */
+void uuid_copy(uuid_t dst, const uuid_t src);
+
+/* gen_uuid.c */
+void uuid_generate(uuid_t out);
+void uuid_generate_random(uuid_t out);
+void uuid_generate_time(uuid_t out);
+
+/* isnull.c */
+int uuid_is_null(const uuid_t uu);
+
+/* parse.c */
+int uuid_parse(const char *in, uuid_t uu);
+
+/* unparse.c */
+void uuid_unparse(const uuid_t uu, char *out);
+void uuid_unparse_lower(const uuid_t uu, char *out);
+void uuid_unparse_upper(const uuid_t uu, char *out);
+
+/* uuid_time.c */
+time_t uuid_time(const uuid_t uu, struct timeval *ret_tv);
+int uuid_type(const uuid_t uu);
+int uuid_variant(const uuid_t uu);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _UUID_UUID_H */
diff --git a/contrib/uuid/uuidP.h b/contrib/uuid/uuidP.h
new file mode 100644
index 000000000..9a2de6132
--- /dev/null
+++ b/contrib/uuid/uuidP.h
@@ -0,0 +1,63 @@
+/*
+ * uuid.h -- private header file for uuids
+ *
+ * Copyright (C) 1996, 1997 Theodore Ts'o.
+ *
+ * %Begin-Header%
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, and the entire permission notice in its entirety,
+ * including the disclaimer of warranties.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
+ * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ * %End-Header%
+ */
+
+#include "uuid.h"
+#ifdef HAVE_INTTYPES_H
+#include <inttypes.h>
+#else
+#include "uuid_types.h"
+#endif
+#include <sys/types.h>
+
+
+/*
+ * Offset between 15-Oct-1582 and 1-Jan-70
+ */
+#define TIME_OFFSET_HIGH 0x01B21DD2
+#define TIME_OFFSET_LOW 0x13814000
+
+struct uuid {
+ uint32_t time_low;
+ uint16_t time_mid;
+ uint16_t time_hi_and_version;
+ uint16_t clock_seq;
+ uint8_t node[6];
+};
+
+
+/*
+ * prototypes
+ */
+void uuid_pack(const struct uuid *uu, uuid_t ptr);
+void uuid_unpack(const uuid_t in, struct uuid *uu);
diff --git a/contrib/uuid/uuid_time.c b/contrib/uuid/uuid_time.c
new file mode 100644
index 000000000..f25f5c90f
--- /dev/null
+++ b/contrib/uuid/uuid_time.c
@@ -0,0 +1,171 @@
+/*
+ * uuid_time.c --- Interpret the time field from a uuid. This program
+ * violates the UUID abstraction barrier by reaching into the guts
+ * of a UUID and interpreting it.
+ *
+ * Copyright (C) 1998, 1999 Theodore Ts'o.
+ *
+ * %Begin-Header%
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, and the entire permission notice in its entirety,
+ * including the disclaimer of warranties.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
+ * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ * %End-Header%
+ */
+
+#ifdef _WIN32
+#define _WIN32_WINNT 0x0500
+#include <windows.h>
+#define UUID MYUUID
+#endif
+
+#include <stdio.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#include <stdlib.h>
+#include <sys/types.h>
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+#include <time.h>
+
+#include "uuidP.h"
+
+time_t uuid_time(const uuid_t uu, struct timeval *ret_tv)
+{
+ struct timeval tv;
+ struct uuid uuid;
+ uint32_t high;
+ uint64_t clock_reg;
+
+ uuid_unpack(uu, &uuid);
+
+ high = uuid.time_mid | ((uuid.time_hi_and_version & 0xFFF) << 16);
+ clock_reg = uuid.time_low | ((uint64_t) high << 32);
+
+ clock_reg -= (((uint64_t) 0x01B21DD2) << 32) + 0x13814000;
+ tv.tv_sec = clock_reg / 10000000;
+ tv.tv_usec = (clock_reg % 10000000) / 10;
+
+ if (ret_tv)
+ *ret_tv = tv;
+
+ return tv.tv_sec;
+}
+
+int uuid_type(const uuid_t uu)
+{
+ struct uuid uuid;
+
+ uuid_unpack(uu, &uuid);
+ return ((uuid.time_hi_and_version >> 12) & 0xF);
+}
+
+int uuid_variant(const uuid_t uu)
+{
+ struct uuid uuid;
+ int var;
+
+ uuid_unpack(uu, &uuid);
+ var = uuid.clock_seq;
+
+ if ((var & 0x8000) == 0)
+ return UUID_VARIANT_NCS;
+ if ((var & 0x4000) == 0)
+ return UUID_VARIANT_DCE;
+ if ((var & 0x2000) == 0)
+ return UUID_VARIANT_MICROSOFT;
+ return UUID_VARIANT_OTHER;
+}
+
+#ifdef DEBUG
+static const char *variant_string(int variant)
+{
+ switch (variant) {
+ case UUID_VARIANT_NCS:
+ return "NCS";
+ case UUID_VARIANT_DCE:
+ return "DCE";
+ case UUID_VARIANT_MICROSOFT:
+ return "Microsoft";
+ default:
+ return "Other";
+ }
+}
+
+
+int
+main(int argc, char **argv)
+{
+ uuid_t buf;
+ time_t time_reg;
+ struct timeval tv;
+ int type, variant;
+
+ if (argc != 2) {
+ fprintf(stderr, "Usage: %s uuid\n", argv[0]);
+ exit(1);
+ }
+ if (uuid_parse(argv[1], buf)) {
+ fprintf(stderr, "Invalid UUID: %s\n", argv[1]);
+ exit(1);
+ }
+ variant = uuid_variant(buf);
+ type = uuid_type(buf);
+ time_reg = uuid_time(buf, &tv);
+
+ printf("UUID variant is %d (%s)\n", variant, variant_string(variant));
+ if (variant != UUID_VARIANT_DCE) {
+ printf("Warning: This program only knows how to interpret "
+ "DCE UUIDs.\n\tThe rest of the output is likely "
+ "to be incorrect!!\n");
+ }
+ printf("UUID type is %d", type);
+ switch (type) {
+ case 1:
+ printf(" (time based)\n");
+ break;
+ case 2:
+ printf(" (DCE)\n");
+ break;
+ case 3:
+ printf(" (name-based)\n");
+ break;
+ case 4:
+ printf(" (random)\n");
+ break;
+ default:
+ printf("\n");
+ }
+ if (type != 1) {
+ printf("Warning: not a time-based UUID, so UUID time "
+ "decoding will likely not work!\n");
+ }
+ printf("UUID time is: (%ld, %ld): %s\n", tv.tv_sec, tv.tv_usec,
+ ctime(&time_reg));
+
+ return 0;
+}
+#endif
diff --git a/contrib/uuid/uuid_types.h.in b/contrib/uuid/uuid_types.h.in
new file mode 100644
index 000000000..f21ff4ee1
--- /dev/null
+++ b/contrib/uuid/uuid_types.h.in
@@ -0,0 +1,50 @@
+/*
+ * If linux/types.h is already been included, assume it has defined
+ * everything we need. (cross fingers) Other header files may have
+ * also defined the types that we need.
+ */
+#if (!defined(_STDINT_H) && !defined(_UUID_STDINT_H))
+#define _UUID_STDINT_H
+
+typedef unsigned char uint8_t;
+typedef signed char int8_t;
+
+#if (@SIZEOF_INT@ == 8)
+typedef int int64_t;
+typedef unsigned int uint64_t;
+#elif (@SIZEOF_LONG@ == 8)
+typedef long int64_t;
+typedef unsigned long uint64_t;
+#elif (@SIZEOF_LONG_LONG@ == 8)
+#if defined(__GNUC__)
+typedef __signed__ long long int64_t;
+#else
+typedef signed long long int64_t;
+#endif
+typedef unsigned long long uint64_t;
+#endif
+
+#if (@SIZEOF_INT@ == 2)
+typedef int int16_t;
+typedef unsigned int uint16_t;
+#elif (@SIZEOF_SHORT@ == 2)
+typedef short int16_t;
+typedef unsigned short uint16_t;
+#else
+ ?==error: undefined 16 bit type
+#endif
+
+#if (@SIZEOF_INT@ == 4)
+typedef int int32_t;
+typedef unsigned int uint32_t;
+#elif (@SIZEOF_LONG@ == 4)
+typedef long int32_t;
+typedef unsigned long uint32_t;
+#elif (@SIZEOF_SHORT@ == 4)
+typedef short int32_t;
+typedef unsigned short uint32_t;
+#else
+ ?== error: undefined 32 bit type
+#endif
+
+#endif
diff --git a/contrib/uuid/uuidd.h b/contrib/uuid/uuidd.h
new file mode 100644
index 000000000..c71f4b788
--- /dev/null
+++ b/contrib/uuid/uuidd.h
@@ -0,0 +1,54 @@
+/*
+ * Definitions used by the uuidd daemon
+ *
+ * Copyright (C) 2007 Theodore Ts'o.
+ *
+ * %Begin-Header%
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, and the entire permission notice in its entirety,
+ * including the disclaimer of warranties.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
+ * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ * %End-Header%
+ */
+
+#ifndef _UUID_UUIDD_H
+#define _UUID_UUIDD_H
+
+#define UUIDD_DIR "/var/lib/libuuid"
+#define UUIDD_SOCKET_PATH UUIDD_DIR "/request"
+#define UUIDD_PIDFILE_PATH UUIDD_DIR "/uuidd.pid"
+#define UUIDD_PATH "/usr/sbin/uuidd"
+
+#define UUIDD_OP_GETPID 0
+#define UUIDD_OP_GET_MAXOP 1
+#define UUIDD_OP_TIME_UUID 2
+#define UUIDD_OP_RANDOM_UUID 3
+#define UUIDD_OP_BULK_TIME_UUID 4
+#define UUIDD_OP_BULK_RANDOM_UUID 5
+#define UUIDD_MAX_OP UUIDD_OP_BULK_RANDOM_UUID
+
+extern void uuid__generate_time(uuid_t out, int *num);
+extern void uuid__generate_random(uuid_t out, int *num);
+
+#endif /* _UUID_UUID_H */
diff --git a/doc/Makefile.am b/doc/Makefile.am
index 58b5e95b7..1103b607d 100644
--- a/doc/Makefile.am
+++ b/doc/Makefile.am
@@ -1,12 +1,6 @@
-EXTRA_DIST = glusterfs.vol.sample glusterfsd.vol.sample glusterfs.8 mount.glusterfs.8\
- porting_guide.txt authentication.txt coding-standard.pdf get_put_api_using_xattr.txt \
- translator-options.txt mac-related-xattrs.txt replicate.pdf
+EXTRA_DIST = glusterfs.8 mount.glusterfs.8 gluster.8 \
+ glusterd.8 glusterfsd.8
-SUBDIRS = examples hacker-guide
+man8_MANS = glusterfs.8 mount.glusterfs.8 gluster.8 glusterd.8 glusterfsd.8
-voldir = $(sysconfdir)/glusterfs
-vol_DATA = glusterfs.vol.sample glusterfsd.vol.sample
-
-man8_MANS = glusterfs.8 mount.glusterfs.8
-
-CLEANFILES =
+CLEANFILES =
diff --git a/doc/admin-guide/en-US/images/640px-GlusterFS_Architecture.png b/doc/admin-guide/en-US/images/640px-GlusterFS_Architecture.png
new file mode 100644
index 000000000..95f89ec82
--- /dev/null
+++ b/doc/admin-guide/en-US/images/640px-GlusterFS_Architecture.png
Binary files differ
diff --git a/doc/admin-guide/en-US/images/Distributed_Replicated_Volume.png b/doc/admin-guide/en-US/images/Distributed_Replicated_Volume.png
new file mode 100644
index 000000000..dfc0a2c56
--- /dev/null
+++ b/doc/admin-guide/en-US/images/Distributed_Replicated_Volume.png
Binary files differ
diff --git a/doc/admin-guide/en-US/images/Distributed_Striped_Replicated_Volume.png b/doc/admin-guide/en-US/images/Distributed_Striped_Replicated_Volume.png
new file mode 100644
index 000000000..d286fa99e
--- /dev/null
+++ b/doc/admin-guide/en-US/images/Distributed_Striped_Replicated_Volume.png
Binary files differ
diff --git a/doc/admin-guide/en-US/images/Distributed_Striped_Volume.png b/doc/admin-guide/en-US/images/Distributed_Striped_Volume.png
new file mode 100644
index 000000000..752fa982f
--- /dev/null
+++ b/doc/admin-guide/en-US/images/Distributed_Striped_Volume.png
Binary files differ
diff --git a/doc/admin-guide/en-US/images/Distributed_Volume.png b/doc/admin-guide/en-US/images/Distributed_Volume.png
new file mode 100644
index 000000000..4386ca935
--- /dev/null
+++ b/doc/admin-guide/en-US/images/Distributed_Volume.png
Binary files differ
diff --git a/doc/admin-guide/en-US/images/Geo-Rep03_Internet.png b/doc/admin-guide/en-US/images/Geo-Rep03_Internet.png
new file mode 100644
index 000000000..3cd0eaded
--- /dev/null
+++ b/doc/admin-guide/en-US/images/Geo-Rep03_Internet.png
Binary files differ
diff --git a/doc/admin-guide/en-US/images/Geo-Rep04_Cascading.png b/doc/admin-guide/en-US/images/Geo-Rep04_Cascading.png
new file mode 100644
index 000000000..54bf9f05c
--- /dev/null
+++ b/doc/admin-guide/en-US/images/Geo-Rep04_Cascading.png
Binary files differ
diff --git a/doc/admin-guide/en-US/images/Geo-Rep_LAN.png b/doc/admin-guide/en-US/images/Geo-Rep_LAN.png
new file mode 100644
index 000000000..a74f6dbb5
--- /dev/null
+++ b/doc/admin-guide/en-US/images/Geo-Rep_LAN.png
Binary files differ
diff --git a/doc/admin-guide/en-US/images/Geo-Rep_WAN.png b/doc/admin-guide/en-US/images/Geo-Rep_WAN.png
new file mode 100644
index 000000000..d72d72768
--- /dev/null
+++ b/doc/admin-guide/en-US/images/Geo-Rep_WAN.png
Binary files differ
diff --git a/doc/admin-guide/en-US/images/GlusterFS_Architecture.png b/doc/admin-guide/en-US/images/GlusterFS_Architecture.png
new file mode 100644
index 000000000..b506db1f4
--- /dev/null
+++ b/doc/admin-guide/en-US/images/GlusterFS_Architecture.png
Binary files differ
diff --git a/doc/admin-guide/en-US/images/Hadoop_Architecture.png b/doc/admin-guide/en-US/images/Hadoop_Architecture.png
new file mode 100644
index 000000000..8725bd330
--- /dev/null
+++ b/doc/admin-guide/en-US/images/Hadoop_Architecture.png
Binary files differ
diff --git a/doc/admin-guide/en-US/images/Replicated_Volume.png b/doc/admin-guide/en-US/images/Replicated_Volume.png
new file mode 100644
index 000000000..135a63f34
--- /dev/null
+++ b/doc/admin-guide/en-US/images/Replicated_Volume.png
Binary files differ
diff --git a/doc/admin-guide/en-US/images/Striped_Replicated_Volume.png b/doc/admin-guide/en-US/images/Striped_Replicated_Volume.png
new file mode 100644
index 000000000..ee88af731
--- /dev/null
+++ b/doc/admin-guide/en-US/images/Striped_Replicated_Volume.png
Binary files differ
diff --git a/doc/admin-guide/en-US/images/Striped_Volume.png b/doc/admin-guide/en-US/images/Striped_Volume.png
new file mode 100644
index 000000000..63a84b242
--- /dev/null
+++ b/doc/admin-guide/en-US/images/Striped_Volume.png
Binary files differ
diff --git a/doc/admin-guide/en-US/images/UFO_Architecture.png b/doc/admin-guide/en-US/images/UFO_Architecture.png
new file mode 100644
index 000000000..be85d7b28
--- /dev/null
+++ b/doc/admin-guide/en-US/images/UFO_Architecture.png
Binary files differ
diff --git a/doc/admin-guide/en-US/images/VSA_Architecture.png b/doc/admin-guide/en-US/images/VSA_Architecture.png
new file mode 100644
index 000000000..c3ab80cf3
--- /dev/null
+++ b/doc/admin-guide/en-US/images/VSA_Architecture.png
Binary files differ
diff --git a/doc/admin-guide/en-US/images/icon.svg b/doc/admin-guide/en-US/images/icon.svg
new file mode 100644
index 000000000..b2f16d0f6
--- /dev/null
+++ b/doc/admin-guide/en-US/images/icon.svg
@@ -0,0 +1,19 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<svg xmlns:svg="http://www.w3.org/2000/svg" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.0" width="32" height="32" id="svg3017">
+ <defs id="defs3019">
+ <linearGradient id="linearGradient2381">
+ <stop id="stop2383" style="stop-color:#ffffff;stop-opacity:1" offset="0"/>
+ <stop id="stop2385" style="stop-color:#ffffff;stop-opacity:0" offset="1"/>
+ </linearGradient>
+ <linearGradient x1="296.4996" y1="188.81061" x2="317.32471" y2="209.69398" id="linearGradient2371" xlink:href="#linearGradient2381" gradientUnits="userSpaceOnUse" gradientTransform="matrix(0.90776,0,0,0.90776,24.35648,49.24131)"/>
+ </defs>
+ <g transform="matrix(0.437808,-0.437808,0.437808,0.437808,-220.8237,43.55311)" id="g5089">
+ <path d="m 8.4382985,-6.28125 c -0.6073916,0 -4.3132985,5.94886271 -4.3132985,8.25 l 0,26.71875 c 0,0.846384 0.5818159,1.125 1.15625,1.125 l 25.5625,0 c 0.632342,0 1.125001,-0.492658 1.125,-1.125 l 0,-5.21875 0.28125,0 c 0.49684,0 0.906249,-0.409411 0.90625,-0.90625 l 0,-27.9375 c 0,-0.4968398 -0.40941,-0.90625 -0.90625,-0.90625 l -23.8117015,0 z" transform="translate(282.8327,227.1903)" id="path5091" style="fill:#5c5c4f;stroke:#000000;stroke-width:3.23021388;stroke-miterlimit:4;stroke-dasharray:none"/>
+ <rect width="27.85074" height="29.369793" rx="1.1414107" ry="1.1414107" x="286.96509" y="227.63805" id="rect5093" style="fill:#032c87"/>
+ <path d="m 288.43262,225.43675 25.2418,0 0,29.3698 -26.37615,0.0241 1.13435,-29.39394 z" id="rect5095" style="fill:#ffffff"/>
+ <path d="m 302.44536,251.73726 c 1.38691,7.85917 -0.69311,11.28365 -0.69311,11.28365 2.24384,-1.60762 3.96426,-3.47694 4.90522,-5.736 0.96708,2.19264 1.83294,4.42866 4.27443,5.98941 0,0 -1.59504,-7.2004 -1.71143,-11.53706 l -6.77511,0 z" id="path5097" style="fill:#a70000;fill-opacity:1;stroke-width:2"/>
+ <rect width="25.241802" height="29.736675" rx="0.89682275" ry="0.89682275" x="290.73544" y="220.92249" id="rect5099" style="fill:#809cc9"/>
+ <path d="m 576.47347,725.93939 6.37084,0.41502 0.4069,29.51809 c -1.89202,-1.31785 -6.85427,-3.7608 -8.26232,-1.68101 l 0,-26.76752 c 0,-0.82246 0.66212,-1.48458 1.48458,-1.48458 z" transform="matrix(0.499065,-0.866565,0,1,0,0)" id="rect5101" style="fill:#4573b3;fill-opacity:1"/>
+ <path d="m 293.2599,221.89363 20.73918,0 c 0.45101,0 0.8141,0.3631 0.8141,0.81411 0.21547,6.32836 -19.36824,21.7635 -22.36739,17.59717 l 0,-17.59717 c 0,-0.45101 0.3631,-0.81411 0.81411,-0.81411 z" id="path5103" style="opacity:0.65536726;fill:url(#linearGradient2371);fill-opacity:1"/>
+ </g>
+</svg>
diff --git a/doc/admin-guide/en-US/markdown/Administration_Guide.md b/doc/admin-guide/en-US/markdown/Administration_Guide.md
new file mode 100644
index 000000000..8b1378917
--- /dev/null
+++ b/doc/admin-guide/en-US/markdown/Administration_Guide.md
@@ -0,0 +1 @@
+
diff --git a/doc/admin-guide/en-US/markdown/Author_Group.md b/doc/admin-guide/en-US/markdown/Author_Group.md
new file mode 100644
index 000000000..ef2a5e677
--- /dev/null
+++ b/doc/admin-guide/en-US/markdown/Author_Group.md
@@ -0,0 +1,5 @@
+Divya
+Muntimadugu
+Red Hat
+Engineering Content Services
+divya@redhat.com
diff --git a/doc/admin-guide/en-US/markdown/Book_Info.md b/doc/admin-guide/en-US/markdown/Book_Info.md
new file mode 100644
index 000000000..8b1378917
--- /dev/null
+++ b/doc/admin-guide/en-US/markdown/Book_Info.md
@@ -0,0 +1 @@
+
diff --git a/doc/admin-guide/en-US/markdown/Chapter.md b/doc/admin-guide/en-US/markdown/Chapter.md
new file mode 100644
index 000000000..8420259c4
--- /dev/null
+++ b/doc/admin-guide/en-US/markdown/Chapter.md
@@ -0,0 +1,18 @@
+Test Chapter
+============
+
+This is a test paragraph
+
+Test Section 1
+==============
+
+This is a test paragraph in a section
+
+Test Section 2
+==============
+
+This is a test paragraph in Section 2
+
+1. listitem text
+
+
diff --git a/doc/admin-guide/en-US/markdown/Preface.md b/doc/admin-guide/en-US/markdown/Preface.md
new file mode 100644
index 000000000..f7e934ae8
--- /dev/null
+++ b/doc/admin-guide/en-US/markdown/Preface.md
@@ -0,0 +1,22 @@
+Preface
+=======
+
+This guide describes how to configure, operate, and manage Gluster File
+System (GlusterFS).
+
+Audience
+========
+
+This guide is intended for Systems Administrators interested in
+configuring and managing GlusterFS.
+
+This guide assumes that you are familiar with the Linux operating
+system, concepts of File System, GlusterFS concepts, and GlusterFS
+Installation
+
+License
+=======
+
+The License information is available at [][].
+
+ []: http://www.redhat.com/licenses/rhel_rha_eula.html
diff --git a/doc/admin-guide/en-US/markdown/Revision_History.md b/doc/admin-guide/en-US/markdown/Revision_History.md
new file mode 100644
index 000000000..2084309d1
--- /dev/null
+++ b/doc/admin-guide/en-US/markdown/Revision_History.md
@@ -0,0 +1,4 @@
+Revision History
+================
+
+1-0 Thu Apr 5 2012 Divya Muntimadugu <divya@redhat.com> Draft
diff --git a/doc/admin-guide/en-US/markdown/admin_ACLs.md b/doc/admin-guide/en-US/markdown/admin_ACLs.md
new file mode 100644
index 000000000..308e069ca
--- /dev/null
+++ b/doc/admin-guide/en-US/markdown/admin_ACLs.md
@@ -0,0 +1,197 @@
+POSIX Access Control Lists
+==========================
+
+POSIX Access Control Lists (ACLs) allows you to assign different
+permissions for different users or groups even though they do not
+correspond to the original owner or the owning group.
+
+For example: User john creates a file but does not want to allow anyone
+to do anything with this file, except another user, antony (even though
+there are other users that belong to the group john).
+
+This means, in addition to the file owner, the file group, and others,
+additional users and groups can be granted or denied access by using
+POSIX ACLs.
+
+Activating POSIX ACLs Support
+=============================
+
+To use POSIX ACLs for a file or directory, the partition of the file or
+directory must be mounted with POSIX ACLs support.
+
+Activating POSIX ACLs Support on Sever
+--------------------------------------
+
+To mount the backend export directories for POSIX ACLs support, use the
+following command:
+
+`# mount -o acl `
+
+For example:
+
+`# mount -o acl /dev/sda1 /export1 `
+
+Alternatively, if the partition is listed in the /etc/fstab file, add
+the following entry for the partition to include the POSIX ACLs option:
+
+`LABEL=/work /export1 ext3 rw, acl 14 `
+
+Activating POSIX ACLs Support on Client
+---------------------------------------
+
+To mount the glusterfs volumes for POSIX ACLs support, use the following
+command:
+
+`# mount –t glusterfs -o acl `
+
+For example:
+
+`# mount -t glusterfs -o acl 198.192.198.234:glustervolume /mnt/gluster`
+
+Setting POSIX ACLs
+==================
+
+You can set two types of POSIX ACLs, that is, access ACLs and default
+ACLs. You can use access ACLs to grant permission for a specific file or
+directory. You can use default ACLs only on a directory but if a file
+inside that directory does not have an ACLs, it inherits the permissions
+of the default ACLs of the directory.
+
+You can set ACLs for per user, per group, for users not in the user
+group for the file, and via the effective right mask.
+
+Setting Access ACLs
+-------------------
+
+You can apply access ACLs to grant permission for both files and
+directories.
+
+**To set or modify Access ACLs**
+
+You can set or modify access ACLs use the following command:
+
+`# setfacl –m file `
+
+The ACL entry types are the POSIX ACLs representations of owner, group,
+and other.
+
+Permissions must be a combination of the characters `r` (read), `w`
+(write), and `x` (execute). You must specify the ACL entry in the
+following format and can specify multiple entry types separated by
+commas.
+
+ ACL Entry Description
+ ---------------------- --------------------------------------------------------------------------------------------------------------------------------------------------
+ u:uid:\<permission\> Sets the access ACLs for a user. You can specify user name or UID
+ g:gid:\<permission\> Sets the access ACLs for a group. You can specify group name or GID.
+ m:\<permission\> Sets the effective rights mask. The mask is the combination of all access permissions of the owning group and all of the user and group entries.
+ o:\<permission\> Sets the access ACLs for users other than the ones in the group for the file.
+
+If a file or directory already has an POSIX ACLs, and the setfacl
+command is used, the additional permissions are added to the existing
+POSIX ACLs or the existing rule is modified.
+
+For example, to give read and write permissions to user antony:
+
+`# setfacl -m u:antony:rw /mnt/gluster/data/testfile `
+
+Setting Default ACLs
+--------------------
+
+You can apply default ACLs only to directories. They determine the
+permissions of a file system objects that inherits from its parent
+directory when it is created.
+
+To set default ACLs
+
+You can set default ACLs for files and directories using the following
+command:
+
+`# setfacl –m –-set `
+
+For example, to set the default ACLs for the /data directory to read for
+users not in the user group:
+
+`# setfacl –m --set o::r /mnt/gluster/data `
+
+> **Note**
+>
+> An access ACLs set for an individual file can override the default
+> ACLs permissions.
+
+**Effects of a Default ACLs**
+
+The following are the ways in which the permissions of a directory's
+default ACLs are passed to the files and subdirectories in it:
+
+- A subdirectory inherits the default ACLs of the parent directory
+ both as its default ACLs and as an access ACLs.
+
+- A file inherits the default ACLs as its access ACLs.
+
+Retrieving POSIX ACLs
+=====================
+
+You can view the existing POSIX ACLs for a file or directory.
+
+**To view existing POSIX ACLs**
+
+- View the existing access ACLs of a file using the following command:
+
+ `# getfacl `
+
+ For example, to view the existing POSIX ACLs for sample.jpg
+
+ # getfacl /mnt/gluster/data/test/sample.jpg
+ # owner: antony
+ # group: antony
+ user::rw-
+ group::rw-
+ other::r--
+
+- View the default ACLs of a directory using the following command:
+
+ `# getfacl `
+
+ For example, to view the existing ACLs for /data/doc
+
+ # getfacl /mnt/gluster/data/doc
+ # owner: antony
+ # group: antony
+ user::rw-
+ user:john:r--
+ group::r--
+ mask::r--
+ other::r--
+ default:user::rwx
+ default:user:antony:rwx
+ default:group::r-x
+ default:mask::rwx
+ default:other::r-x
+
+Removing POSIX ACLs
+===================
+
+To remove all the permissions for a user, groups, or others, use the
+following command:
+
+`# setfacl -x `
+
+For example, to remove all permissions from the user antony:
+
+`# setfacl -x u:antony /mnt/gluster/data/test-file`
+
+Samba and ACLs
+==============
+
+If you are using Samba to access GlusterFS FUSE mount, then POSIX ACLs
+are enabled by default. Samba has been compiled with the
+`--with-acl-support` option, so no special flags are required when
+accessing or mounting a Samba share.
+
+NFS and ACLs
+============
+
+Currently we do not support ACLs configuration through NFS, i.e. setfacl
+and getfacl commands do not work. However, ACLs permissions set using
+Gluster Native Client is applicable on NFS mounts.
diff --git a/doc/admin-guide/en-US/markdown/admin_Hadoop.md b/doc/admin-guide/en-US/markdown/admin_Hadoop.md
new file mode 100644
index 000000000..2894fa713
--- /dev/null
+++ b/doc/admin-guide/en-US/markdown/admin_Hadoop.md
@@ -0,0 +1,170 @@
+Managing Hadoop Compatible Storage
+==================================
+
+GlusterFS provides compatibility for Apache Hadoop and it uses the
+standard file system APIs available in Hadoop to provide a new storage
+option for Hadoop deployments. Existing MapReduce based applications can
+use GlusterFS seamlessly. This new functionality opens up data within
+Hadoop deployments to any file-based or object-based application.
+
+Architecture Overview
+=====================
+
+The following diagram illustrates Hadoop integration with GlusterFS:
+
+Advantages
+==========
+
+The following are the advantages of Hadoop Compatible Storage with
+GlusterFS:
+
+- Provides simultaneous file-based and object-based access within
+ Hadoop.
+
+- Eliminates the centralized metadata server.
+
+- Provides compatibility with MapReduce applications and rewrite is
+ not required.
+
+- Provides a fault tolerant file system.
+
+Preparing to Install Hadoop Compatible Storage
+==============================================
+
+This section provides information on pre-requisites and list of
+dependencies that will be installed during installation of Hadoop
+compatible storage.
+
+Pre-requisites
+--------------
+
+The following are the pre-requisites to install Hadoop Compatible
+Storage :
+
+- Hadoop 0.20.2 is installed, configured, and is running on all the
+ machines in the cluster.
+
+- Java Runtime Environment
+
+- Maven (mandatory only if you are building the plugin from the
+ source)
+
+- JDK (mandatory only if you are building the plugin from the source)
+
+- getfattr - command line utility
+
+Installing, and Configuring Hadoop Compatible Storage
+=====================================================
+
+This section describes how to install and configure Hadoop Compatible
+Storage in your storage environment and verify that it is functioning
+correctly.
+
+1. Download `glusterfs-hadoop-0.20.2-0.1.x86_64.rpm` file to each
+ server on your cluster. You can download the file from [][].
+
+2. To install Hadoop Compatible Storage on all servers in your cluster,
+ run the following command:
+
+ `# rpm –ivh --nodeps glusterfs-hadoop-0.20.2-0.1.x86_64.rpm`
+
+ The following files will be extracted:
+
+ - /usr/local/lib/glusterfs-Hadoop-version-gluster\_plugin\_version.jar
+
+ - /usr/local/lib/conf/core-site.xml
+
+3. (Optional) To install Hadoop Compatible Storage in a different
+ location, run the following command:
+
+ `# rpm –ivh --nodeps –prefix /usr/local/glusterfs/hadoop glusterfs-hadoop- 0.20.2-0.1.x86_64.rpm`
+
+4. Edit the `conf/core-site.xml` file. The following is the sample
+ `conf/core-site.xml` file:
+
+ <configuration>
+ <property>
+ <name>fs.glusterfs.impl</name>
+ <value>org.apache.hadoop.fs.glusterfs.Gluster FileSystem</value>
+ </property>
+
+ <property>
+ <name>fs.default.name</name>
+ <value>glusterfs://fedora1:9000</value>
+ </property>
+
+ <property>
+ <name>fs.glusterfs.volname</name>
+ <value>hadoopvol</value>
+ </property>
+
+ <property>
+ <name>fs.glusterfs.mount</name>
+ <value>/mnt/glusterfs</value>
+ </property>
+
+ <property>
+ <name>fs.glusterfs.server</name>
+ <value>fedora2</value>
+ </property>
+
+ <property>
+ <name>quick.slave.io</name>
+ <value>Off</value>
+ </property>
+ </configuration>
+
+ The following are the configurable fields:
+
+ -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Property Name Default Value Description
+ ---------------------- -------------------------- ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ fs.default.name glusterfs://fedora1:9000 Any hostname in the cluster as the server and any port number.
+
+ fs.glusterfs.volname hadoopvol GlusterFS volume to mount.
+
+ fs.glusterfs.mount /mnt/glusterfs The directory used to fuse mount the volume.
+
+ fs.glusterfs.server fedora2 Any hostname or IP address on the cluster except the client/master.
+
+ quick.slave.io Off Performance tunable option. If this option is set to On, the plugin will try to perform I/O directly from the disk file system (like ext3 or ext4) the file resides on. Hence read performance will improve and job would run faster.
+ > **Note**
+ >
+ > This option is not tested widely
+ -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+5. Create a soft link in Hadoop’s library and configuration directory
+ for the downloaded files (in Step 3) using the following commands:
+
+ `# ln -s >`
+
+ For example,
+
+ `# ln –s /usr/local/lib/glusterfs-0.20.2-0.1.jar /lib/glusterfs-0.20.2-0.1.jar`
+
+ `# ln –s /usr/local/lib/conf/core-site.xml /conf/core-site.xml `
+
+6. (Optional) You can run the following command on Hadoop master to
+ build the plugin and deploy it along with core-site.xml file,
+ instead of repeating the above steps:
+
+ `# build-deploy-jar.py -d -c `
+
+Starting and Stopping the Hadoop MapReduce Daemon
+=================================================
+
+To start and stop MapReduce daemon
+
+- To start MapReduce daemon manually, enter the following command:
+
+ `# /bin/start-mapred.sh`
+
+- To stop MapReduce daemon manually, enter the following command:
+
+ `# /bin/stop-mapred.sh `
+
+> **Note**
+>
+> You must start Hadoop MapReduce daemon on all servers.
+
+ []: http://download.gluster.com/pub/gluster/glusterfs/qa-releases/3.3-beta-2/glusterfs-hadoop-0.20.2-0.1.x86_64.rpm
diff --git a/doc/admin-guide/en-US/markdown/admin_UFO.md b/doc/admin-guide/en-US/markdown/admin_UFO.md
new file mode 100644
index 000000000..3311eff01
--- /dev/null
+++ b/doc/admin-guide/en-US/markdown/admin_UFO.md
@@ -0,0 +1,1219 @@
+Managing Unified File and Object Storage
+========================================
+
+Unified File and Object Storage (UFO) unifies NAS and object storage
+technology. It provides a system for data storage that enables users to
+access the same data, both as an object and as a file, thus simplifying
+management and controlling storage costs.
+
+Unified File and Object Storage is built upon Openstack's Object Storage
+Swift. Open Stack Object Storage allows users to store and retrieve
+files and content through a simple Web Service (REST: Representational
+State Transfer) interface as objects and GlusterFS, allows users to
+store and retrieve files using Native Fuse and NFS mounts. It uses
+GlusterFS as a backend file system for Open Stack Swift. It also
+leverages on Open Stack Swift's web interface for storing and retrieving
+files over the web combined with GlusterFS features like scalability and
+high availability, replication, elastic volume management for data
+management at disk level.
+
+Unified File and Object Storage technology enables enterprises to adopt
+and deploy cloud storage solutions. It allows users to access and modify
+data as objects from a REST interface along with the ability to access
+and modify files from NAS interfaces including NFS and CIFS. In addition
+to decreasing cost and making it faster and easier to access object
+data, it also delivers massive scalability, high availability and
+replication of object storage. Infrastructure as a Service (IaaS)
+providers can utilize GlusterFS Unified File and Object Storage
+technology to enable their own cloud storage service. Enterprises can
+use this technology to accelerate the process of preparing file-based
+applications for the cloud and simplify new application development for
+cloud computing environments.
+
+OpenStack Object Storage is scalable object storage system and it is not
+a traditional file system. You will not be able to mount this system
+like traditional SAN or NAS volumes and perform POSIX compliant
+operations.
+
+Components of Object Storage
+============================
+
+The major components of Object Storage are:
+
+**Proxy Server**
+
+All REST requests to the UFO are routed through the Proxy Server.
+
+**Objects and Containers**
+
+An object is the basic storage entity and any optional metadata that
+represents the data you store. When you upload data, the data is stored
+as-is (with no compression or encryption).
+
+A container is a storage compartment for your data and provides a way
+for you to organize your data. Containers can be visualized as
+directories in a Linux system. Data must be stored in a container and
+hence objects are created within a container.
+
+It implements objects as files and directories under the container. The
+object name is a '/' separated path and UFO maps it to directories until
+the last name in the path, which is marked as a file. With this
+approach, objects can be accessed as files and directories from native
+GlusterFS (FUSE) or NFS mounts by providing the '/' separated path.
+
+**Accounts and Account Servers**
+
+The OpenStack Object Storage system is designed to be used by many
+different storage consumers. Each user is associated with one or more
+accounts and must identify themselves using an authentication system.
+While authenticating, users must provide the name of the account for
+which the authentication is requested.
+
+UFO implements accounts as GlusterFS volumes. So, when a user is granted
+read/write permission on an account, it means that that user has access
+to all the data available on that GlusterFS volume.
+
+**Authentication and Access Permissions**
+
+You must authenticate against an authentication service to receive
+OpenStack Object Storage connection parameters and an authentication
+token. The token must be passed in for all subsequent container or
+object operations. One authentication service that you can use as a
+middleware example is called `tempauth`.
+
+By default, each user has their own storage account and has full access
+to that account. Users must authenticate with their credentials as
+described above, but once authenticated they can manage containers and
+objects within that account. If a user wants to access the content from
+another account, they must have API access key or a session token
+provided by their authentication system.
+
+Advantages of using GlusterFS Unified File and Object Storage
+=============================================================
+
+The following are the advantages of using GlusterFS UFO:
+
+- No limit on upload and download files sizes as compared to Open
+ Stack Swift which limits the object size to 5GB.
+
+- A unified view of data across NAS and Object Storage technologies.
+
+- Using GlusterFS's UFO has other advantages like the following:
+
+ - High availability
+
+ - Scalability
+
+ - Replication
+
+ - Elastic Volume management
+
+Preparing to Deploy Unified File and Object Storage
+===================================================
+
+This section provides information on pre-requisites and list of
+dependencies that will be installed during the installation of Unified
+File and Object Storage.
+
+Pre-requisites
+--------------
+
+GlusterFS's Unified File and Object Storage needs `user_xattr` support
+from the underlying disk file system. Use the following command to
+enable `user_xattr` for GlusterFS bricks backend:
+
+`# mount –o remount,user_xattr `
+
+For example,
+
+`# mount –o remount,user_xattr /dev/hda1 `
+
+Dependencies
+------------
+
+The following packages are installed on GlusterFS when you install
+Unified File and Object Storage:
+
+- curl
+
+- memcached
+
+- openssl
+
+- xfsprogs
+
+- python2.6
+
+- pyxattr
+
+- python-configobj
+
+- python-setuptools
+
+- python-simplejson
+
+- python-webob
+
+- python-eventlet
+
+- python-greenlet
+
+- python-pastedeploy
+
+- python-netifaces
+
+Installing and Configuring Unified File and Object Storage
+==========================================================
+
+This section provides instructions on how to install and configure
+Unified File and Object Storage in your storage environment.
+
+Installing Unified File and Object Storage
+------------------------------------------
+
+To install Unified File and Object Storage:
+
+1. Download `rhel_install.sh` install script from [][] .
+
+2. Run `rhel_install.sh` script using the following command:
+
+ `# sh rhel_install.sh`
+
+3. Download `swift-1.4.5-1.noarch.rpm` and
+ `swift-plugin-1.0.-1.el6.noarch.rpm` files from [][].
+
+4. Install `swift-1.4.5-1.noarch.rpm` and
+ `swift-plugin-1.0.-1.el6.noarch.rpm` using the following commands:
+
+ `# rpm -ivh swift-1.4.5-1.noarch.rpm`
+
+ `# rpm -ivh swift-plugin-1.0.-1.el6.noarch.rpm`
+
+ > **Note**
+ >
+ > You must repeat the above steps on all the machines on which you
+ > want to install Unified File and Object Storage. If you install
+ > the Unified File and Object Storage on multiple servers, you can
+ > use a load balancer like pound, nginx, and so on to distribute the
+ > request across the machines.
+
+Adding Users
+------------
+
+The authentication system allows the administrator to grant different
+levels of access to different users based on the requirement. The
+following are the types of user permissions:
+
+- admin user
+
+- normal user
+
+Admin user has read and write permissions on the account. By default, a
+normal user has no read or write permissions. A normal user can only
+authenticate itself to get a Auth-Token. Read or write permission are
+provided through ACLs by the admin users.
+
+Add a new user by adding the following entry in
+`/etc/swift/proxy-server.conf` file:
+
+`user_<account-name>_<user-name> = <password> [.admin]`
+
+For example,
+
+`user_test_tester = testing .admin`
+
+> **Note**
+>
+> During installation, the installation script adds few sample users to
+> the `proxy-server.conf` file. It is highly recommended that you remove
+> all the default sample user entries from the configuration file.
+
+For more information on setting ACLs, see ?.
+
+Configuring Proxy Server
+------------------------
+
+The Proxy Server is responsible for connecting to the rest of the
+OpenStack Object Storage architecture. For each request, it looks up the
+location of the account, container, or object in the ring and route the
+request accordingly. The public API is also exposed through the proxy
+server. When objects are streamed to or from an object server, they are
+streamed directly through the proxy server to or from the user – the
+proxy server does not spool them.
+
+The configurable options pertaining to proxy server are stored in
+`/etc/swift/proxy-server.conf`. The following is the sample
+`proxy-server.conf` file:
+
+ [app:proxy-server]
+ use = egg:swift#proxy
+ allow_account_management=true
+ account_autocreate=true
+
+ [filter:tempauth]
+ use = egg:swift#tempauth user_admin_admin=admin.admin.reseller_admin
+ user_test_tester=testing.admin
+ user_test2_tester2=testing2.admin
+ user_test_tester3=testing3
+
+ [filter:healthcheck]
+ use = egg:swift#healthcheck
+
+ [filter:cache]
+ use = egg:swift#memcache
+
+By default, GlusterFS's Unified File and Object Storage is configured to
+support HTTP protocol and uses temporary authentication to authenticate
+the HTTP requests.
+
+Configuring Authentication System
+---------------------------------
+
+Proxy server must be configured to authenticate using `
+
+ `.
+
+Configuring Proxy Server for HTTPS
+----------------------------------
+
+By default, proxy server only handles HTTP request. To configure the
+proxy server to process HTTPS requests, perform the following steps:
+
+1. Create self-signed cert for SSL using the following commands:
+
+ cd /etc/swift
+ openssl req -new -x509 -nodes -out cert.crt -keyout cert.key
+
+2. Add the following lines to `/etc/swift/proxy-server.conf `under
+ [DEFAULT]
+
+ bind_port = 443
+ cert_file = /etc/swift/cert.crt
+ key_file = /etc/swift/cert.key
+
+3. Restart the servers using the following commands:
+
+ swift-init main stop
+ swift-init main start
+
+The following are the configurable options:
+
+ Option Default Description
+ ------------ ------------ -------------------------------
+ bind\_ip 0.0.0.0 IP Address for server to bind
+ bind\_port 80 Port for server to bind
+ swift\_dir /etc/swift Swift configuration directory
+ workers 1 Number of workers to fork
+ user swift swift user
+ cert\_file Path to the ssl .crt
+ key\_file Path to the ssl .key
+
+ : proxy-server.conf Default Options in the [DEFAULT] section
+
+ Option Default Description
+ ------------------------------- ----------------- -----------------------------------------------------------------------------------------------------------
+ use paste.deploy entry point for the container server. For most cases, this should be `egg:swift#container`.
+ log\_name proxy-server Label used when logging
+ log\_facility LOG\_LOCAL0 Syslog log facility
+ log\_level INFO Log level
+ log\_headers True If True, log headers in each request
+ recheck\_account\_existence 60 Cache timeout in seconds to send memcached for account existence
+ recheck\_container\_existence 60 Cache timeout in seconds to send memcached for container existence
+ object\_chunk\_size 65536 Chunk size to read from object servers
+ client\_chunk\_size 65536 Chunk size to read from clients
+ memcache\_servers 127.0.0.1:11211 Comma separated list of memcached servers ip:port
+ node\_timeout 10 Request timeout to external services
+ client\_timeout 60 Timeout to read one chunk from a client
+ conn\_timeout 0.5 Connection timeout to external services
+ error\_suppression\_interval 60 Time in seconds that must elapse since the last error for a node to be considered no longer error limited
+ error\_suppression\_limit 10 Error count to consider a node error limited
+ allow\_account\_management false Whether account `PUT`s and `DELETE`s are even callable
+
+ : proxy-server.conf Server Options in the [proxy-server] section
+
+Configuring Object Server
+-------------------------
+
+The Object Server is a very simple blob storage server that can store,
+retrieve, and delete objects stored on local devices. Objects are stored
+as binary files on the file system with metadata stored in the file’s
+extended attributes (xattrs). This requires that the underlying file
+system choice for object servers support xattrs on files.
+
+The configurable options pertaining Object Server are stored in the file
+`/etc/swift/object-server/1.conf`. The following is the sample
+`object-server/1.conf` file:
+
+ [DEFAULT]
+ devices = /srv/1/node
+ mount_check = false
+ bind_port = 6010
+ user = root
+ log_facility = LOG_LOCAL2
+
+ [pipeline:main]
+ pipeline = gluster object-server
+
+ [app:object-server]
+ use = egg:swift#object
+
+ [filter:gluster]
+ use = egg:swift#gluster
+
+ [object-replicator]
+ vm_test_mode = yes
+
+ [object-updater]
+ [object-auditor]
+
+The following are the configurable options:
+
+ Option Default Description
+ -------------- ------------ ----------------------------------------------------------------------------------------------------
+ swift\_dir /etc/swift Swift configuration directory
+ devices /srv/node Mount parent directory where devices are mounted
+ mount\_check true Whether or not check if the devices are mounted to prevent accidentally writing to the root device
+ bind\_ip 0.0.0.0 IP Address for server to bind
+ bind\_port 6000 Port for server to bind
+ workers 1 Number of workers to fork
+
+ : object-server.conf Default Options in the [DEFAULT] section
+
+ Option Default Description
+ ---------------------- --------------- ----------------------------------------------------------------------------------------------------
+ use paste.deploy entry point for the object server. For most cases, this should be `egg:swift#object`.
+ log\_name object-server log name used when logging
+ log\_facility LOG\_LOCAL0 Syslog log facility
+ log\_level INFO Logging level
+ log\_requests True Whether or not to log each request
+ user swift swift user
+ node\_timeout 3 Request timeout to external services
+ conn\_timeout 0.5 Connection timeout to external services
+ network\_chunk\_size 65536 Size of chunks to read or write over the network
+ disk\_chunk\_size 65536 Size of chunks to read or write to disk
+ max\_upload\_time 65536 Maximum time allowed to upload an object
+ slow 0 If \> 0, Minimum time in seconds for a `PUT` or `DELETE` request to complete
+
+ : object-server.conf Server Options in the [object-server] section
+
+Configuring Container Server
+----------------------------
+
+The Container Server’s primary job is to handle listings of objects. The
+listing is done by querying the GlusterFS mount point with path. This
+query returns a list of all files and directories present under that
+container.
+
+The configurable options pertaining to container server are stored in
+`/etc/swift/container-server/1.conf` file. The following is the sample
+`container-server/1.conf` file:
+
+ [DEFAULT]
+ devices = /srv/1/node
+ mount_check = false
+ bind_port = 6011
+ user = root
+ log_facility = LOG_LOCAL2
+
+ [pipeline:main]
+ pipeline = gluster container-server
+
+ [app:container-server]
+ use = egg:swift#container
+
+ [filter:gluster]
+ use = egg:swift#gluster
+
+ [container-replicator]
+ [container-updater]
+ [container-auditor]
+
+The following are the configurable options:
+
+ Option Default Description
+ -------------- ------------ ----------------------------------------------------------------------------------------------------
+ swift\_dir /etc/swift Swift configuration directory
+ devices /srv/node Mount parent directory where devices are mounted
+ mount\_check true Whether or not check if the devices are mounted to prevent accidentally writing to the root device
+ bind\_ip 0.0.0.0 IP Address for server to bind
+ bind\_port 6001 Port for server to bind
+ workers 1 Number of workers to fork
+ user swift Swift user
+
+ : container-server.conf Default Options in the [DEFAULT] section
+
+ Option Default Description
+ --------------- ------------------ ----------------------------------------------------------------------------------------------------------
+ use paste.deploy entry point for the container server. For most cases, this should be `egg:swift#container`.
+ log\_name container-server Label used when logging
+ log\_facility LOG\_LOCAL0 Syslog log facility
+ log\_level INFO Logging level
+ node\_timeout 3 Request timeout to external services
+ conn\_timeout 0.5 Connection timeout to external services
+
+ : container-server.conf Server Options in the [container-server]
+ section
+
+Configuring Account Server
+--------------------------
+
+The Account Server is very similar to the Container Server, except that
+it is responsible for listing of containers rather than objects. In UFO,
+each gluster volume is an account.
+
+The configurable options pertaining to account server are stored in
+`/etc/swift/account-server/1.conf` file. The following is the sample
+`account-server/1.conf` file:
+
+ [DEFAULT]
+ devices = /srv/1/node
+ mount_check = false
+ bind_port = 6012
+ user = root
+ log_facility = LOG_LOCAL2
+
+ [pipeline:main]
+ pipeline = gluster account-server
+
+ [app:account-server]
+ use = egg:swift#account
+
+ [filter:gluster]
+ use = egg:swift#gluster
+
+ [account-replicator]
+ vm_test_mode = yes
+
+ [account-auditor]
+ [account-reaper]
+
+The following are the configurable options:
+
+ Option Default Description
+ -------------- ------------ ----------------------------------------------------------------------------------------------------
+ swift\_dir /etc/swift Swift configuration directory
+ devices /srv/node mount parent directory where devices are mounted
+ mount\_check true Whether or not check if the devices are mounted to prevent accidentally writing to the root device
+ bind\_ip 0.0.0.0 IP Address for server to bind
+ bind\_port 6002 Port for server to bind
+ workers 1 Number of workers to fork
+ user swift Swift user
+
+ : account-server.conf Default Options in the [DEFAULT] section
+
+ Option Default Description
+ --------------- ---------------- ----------------------------------------------------------------------------------------------------------
+ use paste.deploy entry point for the container server. For most cases, this should be `egg:swift#container`.
+ log\_name account-server Label used when logging
+ log\_facility LOG\_LOCAL0 Syslog log facility
+ log\_level INFO Logging level
+
+ : account-server.conf Server Options in the [account-server] section
+
+Starting and Stopping Server
+----------------------------
+
+You must start the server manually when system reboots and whenever you
+update/modify the configuration files.
+
+- To start the server, enter the following command:
+
+ `# swift_init main start`
+
+- To stop the server, enter the following command:
+
+ `# swift_init main stop`
+
+Working with Unified File and Object Storage
+============================================
+
+This section describes the REST API for administering and managing
+Object Storage. All requests will be directed to the host and URL
+described in the `X-Storage-URL HTTP` header obtained during successful
+authentication.
+
+Configuring Authenticated Access
+--------------------------------
+
+Authentication is the process of proving identity to the system. To use
+the REST interface, you must obtain an authorization token using GET
+method and supply it with v1.0 as the path.
+
+Each REST request against the Object Storage system requires the
+addition of a specific authorization token HTTP x-header, defined as
+X-Auth-Token. The storage URL and authentication token are returned in
+the headers of the response.
+
+- To authenticate, run the following command:
+
+ GET auth/v1.0 HTTP/1.1
+ Host: <auth URL>
+ X-Auth-User: <account name>:<user name>
+ X-Auth-Key: <user-Password>
+
+ For example,
+
+ GET auth/v1.0 HTTP/1.1
+ Host: auth.example.com
+ X-Auth-User: test:tester
+ X-Auth-Key: testing
+
+ HTTP/1.1 200 OK
+ X-Storage-Url: https:/example.storage.com:443/v1/AUTH_test
+ X-Storage-Token: AUTH_tkde3ad38b087b49bbbac0494f7600a554
+ X-Auth-Token: AUTH_tkde3ad38b087b49bbbac0494f7600a554
+ Content-Length: 0
+ Date: Wed, 10 jul 2011 06:11:51 GMT
+
+ To authenticate access using cURL (for the above example), run the
+ following command:
+
+ curl -v -H 'X-Storage-User: test:tester' -H 'X-Storage-Pass:testing' -k
+ https://auth.example.com:443/auth/v1.0
+
+ The X-Auth-Url has to be parsed and used in the connection and
+ request line of all subsequent requests to the server. In the
+ example output, users connecting to server will send most
+ container/object requests with a host header of example.storage.com
+ and the request line's version and account as v1/AUTH\_test.
+
+> **Note**
+>
+> The authentication tokens are valid for a 24 hour period.
+
+Working with Accounts
+---------------------
+
+This section describes the list of operations you can perform at the
+account level of the URL.
+
+### Displaying Container Information
+
+You can list the objects of a specific container, or all containers, as
+needed using GET command. You can use the following optional parameters
+with GET request to refine the results:
+
+ Parameter Description
+ ----------- --------------------------------------------------------------------------
+ limit Limits the number of results to at most *n* value.
+ marker Returns object names greater in value than the specified marker.
+ format Specify either json or xml to return the respective serialized response.
+
+**To display container information**
+
+- List all the containers of an account using the following command:
+
+ GET /<apiversion>/<account> HTTP/1.1
+ Host: <storage URL>
+ X-Auth-Token: <authentication-token-key>
+
+ For example,
+
+ GET /v1/AUTH_test HTTP/1.1
+ Host: example.storage.com
+ X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
+
+ HTTP/1.1 200 Ok
+ Date: Wed, 13 Jul 2011 16:32:21 GMT
+ Server: Apache
+ Content-Type: text/plain; charset=UTF-8
+ Content-Length: 39
+
+ songs
+ movies
+ documents
+ reports
+
+To display container information using cURL (for the above example), run
+the following command:
+
+ curl -v -X GET -H 'X-Auth-Token: AUTH_tkde3ad38b087b49bbbac0494f7600a554'
+ https://example.storage.com:443/v1/AUTH_test -k
+
+### Displaying Account Metadata Information
+
+You can issue HEAD command to the storage service to view the number of
+containers and the total bytes stored in the account.
+
+- To display containers and storage used, run the following command:
+
+ HEAD /<apiversion>/<account> HTTP/1.1
+ Host: <storage URL>
+ X-Auth-Token: <authentication-token-key>
+
+ For example,
+
+ HEAD /v1/AUTH_test HTTP/1.1
+ Host: example.storage.com
+ X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
+
+ HTTP/1.1 204 No Content
+ Date: Wed, 13 Jul 2011 16:52:21 GMT
+ Server: Apache
+ X-Account-Container-Count: 4
+ X-Account-Total-Bytes-Used: 394792
+
+ To display account metadata information using cURL (for the above
+ example), run the following command:
+
+ curl -v -X HEAD -H 'X-Auth-Token:
+ AUTH_tkde3ad38b087b49bbbac0494f7600a554'
+ https://example.storage.com:443/v1/AUTH_test -k
+
+Working with Containers
+-----------------------
+
+This section describes the list of operations you can perform at the
+container level of the URL.
+
+### Creating Containers
+
+You can use PUT command to create containers. Containers are the storage
+folders for your data. The URL encoded name must be less than 256 bytes
+and cannot contain a forward slash '/' character.
+
+- To create a container, run the following command:
+
+ PUT /<apiversion>/<account>/<container>/ HTTP/1.1
+ Host: <storage URL>
+ X-Auth-Token: <authentication-token-key>
+
+ For example,
+
+ PUT /v1/AUTH_test/pictures/ HTTP/1.1
+ Host: example.storage.com
+ X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
+ HTTP/1.1 201 Created
+
+ Date: Wed, 13 Jul 2011 17:32:21 GMT
+ Server: Apache
+ Content-Type: text/plain; charset=UTF-8
+
+ To create container using cURL (for the above example), run the
+ following command:
+
+ curl -v -X PUT -H 'X-Auth-Token:
+ AUTH_tkde3ad38b087b49bbbac0494f7600a554'
+ https://example.storage.com:443/v1/AUTH_test/pictures -k
+
+ The status code of 201 (Created) indicates that you have
+ successfully created the container. If a container with same is
+ already existed, the status code of 202 is displayed.
+
+### Displaying Objects of a Container
+
+You can list the objects of a container using GET command. You can use
+the following optional parameters with GET request to refine the
+results:
+
+ Parameter Description
+ ----------- --------------------------------------------------------------------------------------------------------------
+ limit Limits the number of results to at most *n* value.
+ marker Returns object names greater in value than the specified marker.
+ prefix Displays the results limited to object names beginning with the substring x. beginning with the substring x.
+ path Returns the object names nested in the pseudo path.
+ format Specify either json or xml to return the respective serialized response.
+ delimiter Returns all the object names nested in the container.
+
+To display objects of a container
+
+- List objects of a specific container using the following command:
+
+<!-- -->
+
+ GET /<apiversion>/<account>/<container>[parm=value] HTTP/1.1
+ Host: <storage URL>
+ X-Auth-Token: <authentication-token-key>
+
+For example,
+
+ GET /v1/AUTH_test/images HTTP/1.1
+ Host: example.storage.com
+ X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
+
+ HTTP/1.1 200 Ok
+ Date: Wed, 13 Jul 2011 15:42:21 GMT
+ Server: Apache
+ Content-Type: text/plain; charset=UTF-8
+ Content-Length: 139
+
+ sample file.jpg
+ test-file.pdf
+ You and Me.pdf
+ Puddle of Mudd.mp3
+ Test Reports.doc
+
+To display objects of a container using cURL (for the above example),
+run the following command:
+
+ curl -v -X GET-H 'X-Auth-Token: AUTH_tkde3ad38b087b49bbbac0494f7600a554'
+ https://example.storage.com:443/v1/AUTH_test/images -k
+
+### Displaying Container Metadata Information
+
+You can issue HEAD command to the storage service to view the number of
+objects in a container and the total bytes of all the objects stored in
+the container.
+
+- To display list of objects and storage used, run the following
+ command:
+
+ HEAD /<apiversion>/<account>/<container> HTTP/1.1
+ Host: <storage URL>
+ X-Auth-Token: <authentication-token-key>
+
+ For example,
+
+ HEAD /v1/AUTH_test/images HTTP/1.1
+ Host: example.storage.com
+ X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
+
+ HTTP/1.1 204 No Content
+ Date: Wed, 13 Jul 2011 19:52:21 GMT
+ Server: Apache
+ X-Account-Object-Count: 8
+ X-Container-Bytes-Used: 472
+
+ To display list of objects and storage used in a container using
+ cURL (for the above example), run the following command:
+
+ curl -v -X HEAD -H 'X-Auth-Token:
+ AUTH_tkde3ad38b087b49bbbac0494f7600a554'
+ https://example.storage.com:443/v1/AUTH_test/images -k
+
+### Deleting Container
+
+You can use DELETE command to permanently delete containers. The
+container must be empty before it can be deleted.
+
+You can issue HEAD command to determine if it contains any objects.
+
+- To delete a container, run the following command:
+
+ DELETE /<apiversion>/<account>/<container>/ HTTP/1.1
+ Host: <storage URL>
+ X-Auth-Token: <authentication-token-key>
+
+ For example,
+
+ DELETE /v1/AUTH_test/pictures HTTP/1.1
+ Host: example.storage.com
+ X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
+
+ HTTP/1.1 204 No Content
+ Date: Wed, 13 Jul 2011 17:52:21 GMT
+ Server: Apache
+ Content-Length: 0
+ Content-Type: text/plain; charset=UTF-8
+
+ To delete a container using cURL (for the above example), run the
+ following command:
+
+ curl -v -X DELETE -H 'X-Auth-Token:
+ AUTH_tkde3ad38b087b49bbbac0494f7600a554'
+ https://example.storage.com:443/v1/AUTH_test/pictures -k
+
+ The status code of 204 (No Content) indicates that you have
+ successfully deleted the container. If that container does not
+ exist, the status code 404 (Not Found) is displayed, and if the
+ container is not empty, the status code 409 (Conflict) is displayed.
+
+### Updating Container Metadata
+
+You can update the metadata of container using POST operation, metadata
+keys should be prefixed with 'x-container-meta'.
+
+- To update the metadata of the object, run the following command:
+
+ POST /<apiversion>/<account>/<container> HTTP/1.1
+ Host: <storage URL>
+ X-Auth-Token: <Authentication-token-key>
+ X-Container-Meta-<key>: <new value>
+ X-Container-Meta-<key>: <new value>
+
+ For example,
+
+ POST /v1/AUTH_test/images HTTP/1.1
+ Host: example.storage.com
+ X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
+ X-Container-Meta-Zoo: Lion
+ X-Container-Meta-Home: Dog
+
+ HTTP/1.1 204 No Content
+ Date: Wed, 13 Jul 2011 20:52:21 GMT
+ Server: Apache
+ Content-Type: text/plain; charset=UTF-8
+
+ To update the metadata of the object using cURL (for the above
+ example), run the following command:
+
+ curl -v -X POST -H 'X-Auth-Token:
+ AUTH_tkde3ad38b087b49bbbac0494f7600a554'
+ https://example.storage.com:443/v1/AUTH_test/images -H ' X-Container-Meta-Zoo: Lion' -H 'X-Container-Meta-Home: Dog' -k
+
+ The status code of 204 (No Content) indicates the container's
+ metadata is updated successfully. If that object does not exist, the
+ status code 404 (Not Found) is displayed.
+
+### Setting ACLs on Container
+
+You can set the container access control list by using POST command on
+container with `x- container-read` and` x-container-write` keys.
+
+The ACL format is `[item[,item...]]`. Each item can be a group name to
+give access to or a referrer designation to grant or deny based on the
+HTTP Referer header.
+
+The referrer designation format is:` .r:[-]value`.
+
+The .r can also be `.ref, .referer, `or .`referrer`; though it will be
+shortened to.r for decreased character count usage. The value can be `*`
+to specify any referrer host is allowed access. The leading minus sign
+(-) indicates referrer hosts that should be denied access.
+
+Examples of valid ACLs:
+
+ .r:*
+ .r:*,bobs_account,sues_account:sue
+ bobs_account,sues_account:sue
+
+Examples of invalid ACLs:
+
+ .r:
+ .r:-
+
+By default, allowing read access via `r `will not allow listing objects
+in the container but allows retrieving objects from the container. To
+turn on listings, use the .`rlistings` directive. Also, `.r`
+designations are not allowed in headers whose names include the word
+write.
+
+For example, to set all the objects access rights to "public" inside the
+container using cURL (for the above example), run the following command:
+
+ curl -v -X POST -H 'X-Auth-Token:
+ AUTH_tkde3ad38b087b49bbbac0494f7600a554'
+ https://example.storage.com:443/v1/AUTH_test/images
+ -H 'X-Container-Read: .r:*' -k
+
+Working with Objects
+--------------------
+
+An object represents the data and any metadata for the files stored in
+the system. Through the REST interface, metadata for an object can be
+included by adding custom HTTP headers to the request and the data
+payload as the request body. Objects name should not exceed 1024 bytes
+after URL encoding.
+
+This section describes the list of operations you can perform at the
+object level of the URL.
+
+### Creating or Updating Object
+
+You can use PUT command to write or update an object's content and
+metadata.
+
+You can verify the data integrity by including an MD5checksum for the
+object's data in the ETag header. ETag header is optional and can be
+used to ensure that the object's contents are stored successfully in the
+storage system.
+
+You can assign custom metadata to objects by including additional HTTP
+headers on the PUT request. The objects created with custom metadata via
+HTTP headers are identified with the`X-Object- Meta`- prefix.
+
+- To create or update an object, run the following command:
+
+ PUT /<apiversion>/<account>/<container>/<object> HTTP/1.1
+ Host: <storage URL>
+ X-Auth-Token: <authentication-token-key>
+ ETag: da1e100dc9e7becc810986e37875ae38
+ Content-Length: 342909
+ X-Object-Meta-PIN: 2343
+
+ For example,
+
+ PUT /v1/AUTH_test/pictures/dog HTTP/1.1
+ Host: example.storage.com
+ X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
+ ETag: da1e100dc9e7becc810986e37875ae38
+
+ HTTP/1.1 201 Created
+ Date: Wed, 13 Jul 2011 18:32:21 GMT
+ Server: Apache
+ ETag: da1e100dc9e7becc810986e37875ae38
+ Content-Length: 0
+ Content-Type: text/plain; charset=UTF-8
+
+ To create or update an object using cURL (for the above example),
+ run the following command:
+
+ curl -v -X PUT -H 'X-Auth-Token:
+ AUTH_tkde3ad38b087b49bbbac0494f7600a554'
+ https://example.storage.com:443/v1/AUTH_test/pictures/dog -H 'Content-
+ Length: 0' -k
+
+ The status code of 201 (Created) indicates that you have
+ successfully created or updated the object. If there is a missing
+ content-Length or Content-Type header in the request, the status
+ code of 412 (Length Required) is displayed. (Optionally) If the MD5
+ checksum of the data written to the storage system does not match
+ the ETag value, the status code of 422 (Unprocessable Entity) is
+ displayed.
+
+#### Chunked Transfer Encoding
+
+You can upload data without knowing the size of the data to be uploaded.
+You can do this by specifying an HTTP header of Transfer-Encoding:
+chunked and without using a Content-Length header.
+
+You can use this feature while doing a DB dump, piping the output
+through gzip, and then piping the data directly into Object Storage
+without having to buffer the data to disk to compute the file size.
+
+- To create or update an object, run the following command:
+
+ PUT /<apiversion>/<account>/<container>/<object> HTTP/1.1
+ Host: <storage URL>
+ X-Auth-Token: <authentication-token-key>
+ Transfer-Encoding: chunked
+ X-Object-Meta-PIN: 2343
+
+ For example,
+
+ PUT /v1/AUTH_test/pictures/cat HTTP/1.1
+ Host: example.storage.com
+ X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
+ Transfer-Encoding: chunked
+ X-Object-Meta-PIN: 2343
+ 19
+ A bunch of data broken up
+ D
+ into chunks.
+ 0
+
+### Copying Object
+
+You can copy object from one container to another or add a new object
+and then add reference to designate the source of the data from another
+container.
+
+**To copy object from one container to another**
+
+- To add a new object and designate the source of the data from
+ another container, run the following command:
+
+ COPY /<apiversion>/<account>/<container>/<sourceobject> HTTP/1.1
+ Host: <storage URL>
+ X-Auth-Token: < authentication-token-key>
+ Destination: /<container>/<destinationobject>
+
+ For example,
+
+ COPY /v1/AUTH_test/images/dogs HTTP/1.1
+ Host: example.storage.com
+ X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
+ Destination: /photos/cats
+
+ HTTP/1.1 201 Created
+ Date: Wed, 13 Jul 2011 18:32:21 GMT
+ Server: Apache
+ Content-Length: 0
+ Content-Type: text/plain; charset=UTF-8
+
+ To copy an object using cURL (for the above example), run the
+ following command:
+
+ curl -v -X COPY -H 'X-Auth-Token:
+ AUTH_tkde3ad38b087b49bbbac0494f7600a554' -H 'Destination: /photos/cats' -k https://example.storage.com:443/v1/AUTH_test/images/dogs
+
+ The status code of 201 (Created) indicates that you have
+ successfully copied the object. If there is a missing content-Length
+ or Content-Type header in the request, the status code of 412
+ (Length Required) is displayed.
+
+ You can also use PUT command to copy object by using additional
+ header `X-Copy-From: container/obj`.
+
+- To use PUT command to copy an object, run the following command:
+
+ PUT /v1/AUTH_test/photos/cats HTTP/1.1
+ Host: example.storage.com
+ X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
+ X-Copy-From: /images/dogs
+
+ HTTP/1.1 201 Created
+ Date: Wed, 13 Jul 2011 18:32:21 GMT
+ Server: Apache
+ Content-Type: text/plain; charset=UTF-8
+
+ To copy an object using cURL (for the above example), run the
+ following command:
+
+ curl -v -X PUT -H 'X-Auth-Token: AUTH_tkde3ad38b087b49bbbac0494f7600a554'
+ -H 'X-Copy-From: /images/dogs' –k
+ https://example.storage.com:443/v1/AUTH_test/images/cats
+
+ The status code of 201 (Created) indicates that you have
+ successfully copied the object.
+
+### Displaying Object Information
+
+You can issue GET command on an object to view the object data of the
+object.
+
+- To display the content of an object run the following command:
+
+ GET /<apiversion>/<account>/<container>/<object> HTTP/1.1
+ Host: <storage URL>
+ X-Auth-Token: <Authentication-token-key>
+
+ For example,
+
+ GET /v1/AUTH_test/images/cat HTTP/1.1
+ Host: example.storage.com
+ X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
+
+ HTTP/1.1 200 Ok
+ Date: Wed, 13 Jul 2011 23:52:21 GMT
+ Server: Apache
+ Last-Modified: Thu, 14 Jul 2011 13:40:18 GMT
+ ETag: 8a964ee2a5e88be344f36c22562a6486
+ Content-Length: 534210
+ [.........]
+
+ To display the content of an object using cURL (for the above
+ example), run the following command:
+
+ curl -v -X GET -H 'X-Auth-Token:
+ AUTH_tkde3ad38b087b49bbbac0494f7600a554'
+ https://example.storage.com:443/v1/AUTH_test/images/cat -k
+
+ The status code of 200 (Ok) indicates the object's data is displayed
+ successfully. If that object does not exist, the status code 404
+ (Not Found) is displayed.
+
+### Displaying Object Metadata
+
+You can issue HEAD command on an object to view the object metadata and
+other standard HTTP headers. You must send only authorization token as
+header.
+
+- To display the metadata of the object, run the following command:
+
+<!-- -->
+
+ HEAD /<apiversion>/<account>/<container>/<object> HTTP/1.1
+ Host: <storage URL>
+ X-Auth-Token: <Authentication-token-key>
+
+For example,
+
+ HEAD /v1/AUTH_test/images/cat HTTP/1.1
+ Host: example.storage.com
+ X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
+
+ HTTP/1.1 204 No Content
+ Date: Wed, 13 Jul 2011 21:52:21 GMT
+ Server: Apache
+ Last-Modified: Thu, 14 Jul 2011 13:40:18 GMT
+ ETag: 8a964ee2a5e88be344f36c22562a6486
+ Content-Length: 512000
+ Content-Type: text/plain; charset=UTF-8
+ X-Object-Meta-House: Cat
+ X-Object-Meta-Zoo: Cat
+ X-Object-Meta-Home: Cat
+ X-Object-Meta-Park: Cat
+
+To display the metadata of the object using cURL (for the above
+example), run the following command:
+
+ curl -v -X HEAD -H 'X-Auth-Token:
+ AUTH_tkde3ad38b087b49bbbac0494f7600a554'
+ https://example.storage.com:443/v1/AUTH_test/images/cat -k
+
+The status code of 204 (No Content) indicates the object's metadata is
+displayed successfully. If that object does not exist, the status code
+404 (Not Found) is displayed.
+
+### Updating Object Metadata
+
+You can issue POST command on an object name only to set or overwrite
+arbitrary key metadata. You cannot change the object's other headers
+such as Content-Type, ETag and others using POST operation. The POST
+command will delete all the existing metadata and replace it with the
+new arbitrary key metadata.
+
+You must prefix **X-Object-Meta-** to the key names.
+
+- To update the metadata of an object, run the following command:
+
+ POST /<apiversion>/<account>/<container>/<object> HTTP/1.1
+ Host: <storage URL>
+ X-Auth-Token: <Authentication-token-key>
+ X-Object-Meta-<key>: <new value>
+ X-Object-Meta-<key>: <new value>
+
+ For example,
+
+ POST /v1/AUTH_test/images/cat HTTP/1.1
+ Host: example.storage.com
+ X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
+ X-Object-Meta-Zoo: Lion
+ X-Object-Meta-Home: Dog
+
+ HTTP/1.1 202 Accepted
+ Date: Wed, 13 Jul 2011 22:52:21 GMT
+ Server: Apache
+ Content-Length: 0
+ Content-Type: text/plain; charset=UTF-8
+
+ To update the metadata of an object using cURL (for the above
+ example), run the following command:
+
+ curl -v -X POST -H 'X-Auth-Token:
+ AUTH_tkde3ad38b087b49bbbac0494f7600a554'
+ https://example.storage.com:443/v1/AUTH_test/images/cat -H ' X-Object-
+ Meta-Zoo: Lion' -H 'X-Object-Meta-Home: Dog' -k
+
+ The status code of 202 (Accepted) indicates that you have
+ successfully updated the object's metadata. If that object does not
+ exist, the status code 404 (Not Found) is displayed.
+
+### Deleting Object
+
+You can use DELETE command to permanently delete the object.
+
+The DELETE command on an object will be processed immediately and any
+subsequent operations like GET, HEAD, POST, or DELETE on the object will
+display 404 (Not Found) error.
+
+- To delete an object, run the following command:
+
+ DELETE /<apiversion>/<account>/<container>/<object> HTTP/1.1
+ Host: <storage URL>
+ X-Auth-Token: <Authentication-token-key>
+
+ For example,
+
+ DELETE /v1/AUTH_test/pictures/cat HTTP/1.1
+ Host: example.storage.com
+ X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
+
+ HTTP/1.1 204 No Content
+ Date: Wed, 13 Jul 2011 20:52:21 GMT
+ Server: Apache
+ Content-Type: text/plain; charset=UTF-8
+
+ To delete an object using cURL (for the above example), run the
+ following command:
+
+ curl -v -X DELETE -H 'X-Auth-Token:
+ AUTH_tkde3ad38b087b49bbbac0494f7600a554'
+ https://example.storage.com:443/v1/AUTH_test/pictures/cat -k
+
+ The status code of 204 (No Content) indicates that you have
+ successfully deleted the object. If that object does not exist, the
+ status code 404 (Not Found) is displayed.
+
+ []: http://download.gluster.com/pub/gluster/glusterfs/3.2/UFO/
diff --git a/doc/admin-guide/en-US/markdown/admin_commandref.md b/doc/admin-guide/en-US/markdown/admin_commandref.md
new file mode 100644
index 000000000..4ff05f4ef
--- /dev/null
+++ b/doc/admin-guide/en-US/markdown/admin_commandref.md
@@ -0,0 +1,180 @@
+Command Reference
+=================
+
+This section describes the available commands and includes the following
+section:
+
+- gluster Command
+
+ Gluster Console Manager (command line interpreter)
+
+- glusterd Daemon
+
+ Gluster elastic volume management daemon
+
+gluster Command
+===============
+
+**NAME**
+
+gluster - Gluster Console Manager (command line interpreter)
+
+**SYNOPSIS**
+
+To run the program and display the gluster prompt:
+
+**gluster**
+
+To specify a command directly: gluster [COMMANDS] [OPTIONS]
+
+**DESCRIPTION**
+
+The Gluster Console Manager is a command line utility for elastic volume
+management. You can run the gluster command on any export server. The
+command enables administrators to perform cloud operations such as
+creating, expanding, shrinking, rebalancing, and migrating volumes
+without needing to schedule server downtime.
+
+**COMMANDS**
+
+ ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Command Description
+ ---------------------------------------------------------------------------------------------------------- ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ **Volume**
+
+ volume info [all | VOLNAME] Displays information about all volumes, or the specified volume.
+
+ volume create NEW-VOLNAME [stripe COUNT] [replica COUNT] [transport tcp | rdma | tcp,rdma] NEW-BRICK ... Creates a new volume of the specified type using the specified bricks and transport type (the default transport type is tcp).
+
+ volume delete VOLNAME Deletes the specified volume.
+
+ volume start VOLNAME Starts the specified volume.
+
+ volume stop VOLNAME [force] Stops the specified volume.
+
+ volume rename VOLNAME NEW-VOLNAME Renames the specified volume.
+
+ volume help Displays help for the volume command.
+
+ **Brick**
+
+ volume add-brick VOLNAME NEW-BRICK ... Adds the specified brick to the specified volume.
+
+ volume replace-brick VOLNAME (BRICK NEW-BRICK) start | pause | abort | status Replaces the specified brick.
+
+ volume remove-brick VOLNAME [(replica COUNT)|(stripe COUNT)] BRICK ... Removes the specified brick from the specified volume.
+
+ **Rebalance**
+
+ volume rebalance VOLNAME start Starts rebalancing the specified volume.
+
+ volume rebalance VOLNAME stop Stops rebalancing the specified volume.
+
+ volume rebalance VOLNAME status Displays the rebalance status of the specified volume.
+
+ **Log**
+
+ volume log filename VOLNAME [BRICK] DIRECTORY Sets the log directory for the corresponding volume/brick.
+
+ volume log rotate VOLNAME [BRICK] Rotates the log file for corresponding volume/brick.
+
+ volume log locate VOLNAME [BRICK] Locates the log file for corresponding volume/brick.
+
+ **Peer**
+
+ peer probe HOSTNAME Probes the specified peer.
+
+ peer detach HOSTNAME Detaches the specified peer.
+
+ peer status Displays the status of peers.
+
+ peer help Displays help for the peer command.
+
+ **Geo-replication**
+
+ volume geo-replication MASTER SLAVE start Start geo-replication between the hosts specified by MASTER and SLAVE. You can specify a local master volume as :VOLNAME.
+
+ You can specify a local slave volume as :VOLUME and a local slave directory as /DIRECTORY/SUB-DIRECTORY. You can specify a remote slave volume as DOMAIN::VOLNAME and a remote slave directory as DOMAIN:/DIRECTORY/SUB-DIRECTORY.
+
+ volume geo-replication MASTER SLAVE stop Stop geo-replication between the hosts specified by MASTER and SLAVE. You can specify a local master volume as :VOLNAME and a local master directory as /DIRECTORY/SUB-DIRECTORY.
+
+ You can specify a local slave volume as :VOLNAME and a local slave directory as /DIRECTORY/SUB-DIRECTORY. You can specify a remote slave volume as DOMAIN::VOLNAME and a remote slave directory as DOMAIN:/DIRECTORY/SUB-DIRECTORY.
+
+ volume geo-replication MASTER SLAVE config [options] Configure geo-replication options between the hosts specified by MASTER and SLAVE.
+
+ gluster-command COMMAND The path where the gluster command is installed.
+
+ gluster-log-level LOGFILELEVEL The log level for gluster processes.
+
+ log-file LOGFILE The path to the geo-replication log file.
+
+ log-level LOGFILELEVEL The log level for geo-replication.
+
+ remote-gsyncd COMMAND The path where the gsyncd binary is installed on the remote machine.
+
+ ssh-command COMMAND The ssh command to use to connect to the remote machine (the default is ssh).
+
+ rsync-command COMMAND The rsync command to use for synchronizing the files (the default is rsync).
+
+ volume\_id= UID The command to delete the existing master UID for the intermediate/slave node.
+
+ timeout SECONDS The timeout period.
+
+ sync-jobs N The number of simultaneous files/directories that can be synchronized.
+
+ ignore-deletes If this option is set to 1, a file deleted on master will not trigger a delete operation on the slave. Hence, the slave will remain as a superset of the master and can be used to recover the master in case of crash and/or accidental delete.
+
+ **Other**
+
+ help Display the command options.
+
+ quit Exit the gluster command line interface.
+ ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+**FILES**
+
+/var/lib/glusterd/\*
+
+**SEE ALSO**
+
+fusermount(1), mount.glusterfs(8), glusterfs-volgen(8), glusterfs(8),
+glusterd(8)
+
+glusterd Daemon
+===============
+
+**NAME**
+
+glusterd - Gluster elastic volume management daemon
+
+**SYNOPSIS**
+
+glusterd [OPTION...]
+
+**DESCRIPTION**
+
+The glusterd daemon is used for elastic volume management. The daemon
+must be run on all export servers.
+
+**OPTIONS**
+
+ Option Description
+ ----------------------------------- ----------------------------------------------------------------------------------------------------------------
+ **Basic**
+ -l=LOGFILE, --log-file=LOGFILE Files to use for logging (the default is /usr/local/var/log/glusterfs/glusterfs.log).
+ -L=LOGLEVEL, --log-level=LOGLEVEL Logging severity. Valid options are TRACE, DEBUG, INFO, WARNING, ERROR and CRITICAL (the default is INFO).
+ --debug Runs the program in debug mode. This option sets --no-daemon, --log-level to DEBUG, and --log-file to console.
+ -N, --no-daemon Runs the program in the foreground.
+ **Miscellaneous**
+ -?, --help Displays this help.
+ --usage Displays a short usage message.
+ -V, --version Prints the program version.
+
+**FILES**
+
+/var/lib/glusterd/\*
+
+**SEE ALSO**
+
+fusermount(1), mount.glusterfs(8), glusterfs-volgen(8), glusterfs(8),
+gluster(8)
diff --git a/doc/admin-guide/en-US/markdown/admin_console.md b/doc/admin-guide/en-US/markdown/admin_console.md
new file mode 100644
index 000000000..9b69de02d
--- /dev/null
+++ b/doc/admin-guide/en-US/markdown/admin_console.md
@@ -0,0 +1,51 @@
+Using the Gluster Console Manager – Command Line Utility
+========================================================
+
+The Gluster Console Manager is a single command line utility that
+simplifies configuration and management of your storage environment. The
+Gluster Console Manager is similar to the LVM (Logical Volume Manager)
+CLI or ZFS Command Line Interface, but across multiple storage servers.
+You can use the Gluster Console Manager online, while volumes are
+mounted and active. Gluster automatically synchronizes volume
+configuration information across all Gluster servers.
+
+Using the Gluster Console Manager, you can create new volumes, start
+volumes, and stop volumes, as required. You can also add bricks to
+volumes, remove bricks from existing volumes, as well as change
+translator settings, among other operations.
+
+You can also use the commands to create scripts for automation, as well
+as use the commands as an API to allow integration with third-party
+applications.
+
+**Running the Gluster Console Manager**
+
+You can run the Gluster Console Manager on any GlusterFS server either
+by invoking the commands or by running the Gluster CLI in interactive
+mode. You can also use the gluster command remotely using SSH.
+
+- To run commands directly:
+
+ ` # gluster peer `
+
+ For example:
+
+ ` # gluster peer status `
+
+- To run the Gluster Console Manager in interactive mode
+
+ `# gluster`
+
+ You can execute gluster commands from the Console Manager prompt:
+
+ ` gluster> `
+
+ For example, to view the status of the peer server:
+
+ \# `gluster `
+
+ `gluster > peer status `
+
+ Display the status of the peer.
+
+
diff --git a/doc/admin-guide/en-US/markdown/admin_directory_Quota.md b/doc/admin-guide/en-US/markdown/admin_directory_Quota.md
new file mode 100644
index 000000000..09c757781
--- /dev/null
+++ b/doc/admin-guide/en-US/markdown/admin_directory_Quota.md
@@ -0,0 +1,172 @@
+Managing Directory Quota
+========================
+
+Directory quotas in GlusterFS allow you to set limits on usage of disk
+space by directories or volumes. The storage administrators can control
+the disk space utilization at the directory and/or volume levels in
+GlusterFS by setting limits to allocatable disk space at any level in
+the volume and directory hierarchy. This is particularly useful in cloud
+deployments to facilitate utility billing model.
+
+> **Note**
+>
+> For now, only Hard limit is supported. Here, the limit cannot be
+> exceeded and attempts to use more disk space or inodes beyond the set
+> limit will be denied.
+
+System administrators can also monitor the resource utilization to limit
+the storage for the users depending on their role in the organization.
+
+You can set the quota at the following levels:
+
+- Directory level – limits the usage at the directory level
+
+- Volume level – limits the usage at the volume level
+
+> **Note**
+>
+> You can set the disk limit on the directory even if it is not created.
+> The disk limit is enforced immediately after creating that directory.
+> For more information on setting disk limit, see ?.
+
+Enabling Quota
+==============
+
+You must enable Quota to set disk limits.
+
+**To enable quota**
+
+- Enable the quota using the following command:
+
+ `# gluster volume quota enable `
+
+ For example, to enable quota on test-volume:
+
+ # gluster volume quota test-volume enable
+ Quota is enabled on /test-volume
+
+Disabling Quota
+===============
+
+You can disable Quota, if needed.
+
+**To disable quota:**
+
+- Disable the quota using the following command:
+
+ `# gluster volume quota disable `
+
+ For example, to disable quota translator on test-volume:
+
+ # gluster volume quota test-volume disable
+ Quota translator is disabled on /test-volume
+
+Setting or Replacing Disk Limit
+===============================
+
+You can create new directories in your storage environment and set the
+disk limit or set disk limit for the existing directories. The directory
+name should be relative to the volume with the export directory/mount
+being treated as "/".
+
+**To set or replace disk limit**
+
+- Set the disk limit using the following command:
+
+ `# gluster volume quota limit-usage /`
+
+ For example, to set limit on data directory on test-volume where
+ data is a directory under the export directory:
+
+ # gluster volume quota test-volume limit-usage /data 10GB
+ Usage limit has been set on /data
+
+ > **Note**
+ >
+ > In a multi-level directory hierarchy, the strictest disk limit
+ > will be considered for enforcement.
+
+Displaying Disk Limit Information
+=================================
+
+You can display disk limit information on all the directories on which
+the limit is set.
+
+**To display disk limit information**
+
+- Display disk limit information of all the directories on which limit
+ is set, using the following command:
+
+ `# gluster volume quota list`
+
+ For example, to see the set disks limit on test-volume:
+
+ # gluster volume quota test-volume list
+
+
+ /Test/data 10 GB 6 GB
+ /Test/data1 10 GB 4 GB
+
+- Display disk limit information on a particular directory on which
+ limit is set, using the following command:
+
+ `# gluster volume quota list `
+
+ For example, to see the set limit on /data directory of test-volume:
+
+ # gluster volume quota test-volume list /data
+
+
+ /Test/data 10 GB 6 GB
+
+Updating Memory Cache Size
+==========================
+
+For performance reasons, quota caches the directory sizes on client. You
+can set timeout indicating the maximum valid duration of directory sizes
+in cache, from the time they are populated.
+
+For example: If there are multiple clients writing to a single
+directory, there are chances that some other client might write till the
+quota limit is exceeded. However, this new file-size may not get
+reflected in the client till size entry in cache has become stale
+because of timeout. If writes happen on this client during this
+duration, they are allowed even though they would lead to exceeding of
+quota-limits, since size in cache is not in sync with the actual size.
+When timeout happens, the size in cache is updated from servers and will
+be in sync and no further writes will be allowed. A timeout of zero will
+force fetching of directory sizes from server for every operation that
+modifies file data and will effectively disables directory size caching
+on client side.
+
+**To update the memory cache size**
+
+- Update the memory cache size using the following command:
+
+ `# gluster volume set features.quota-timeout`
+
+ For example, to update the memory cache size for every 5 seconds on
+ test-volume:
+
+ # gluster volume set test-volume features.quota-timeout 5
+ Set volume successful
+
+Removing Disk Limit
+===================
+
+You can remove set disk limit, if you do not want quota anymore.
+
+**To remove disk limit**
+
+- Remove disk limit set on a particular directory using the following
+ command:
+
+ `# gluster volume quota remove `
+
+ For example, to remove the disk limit on /data directory of
+ test-volume:
+
+ # gluster volume quota test-volume remove /data
+ Usage limit set on /data is removed
+
+
diff --git a/doc/admin-guide/en-US/markdown/admin_geo-replication.md b/doc/admin-guide/en-US/markdown/admin_geo-replication.md
new file mode 100644
index 000000000..849957244
--- /dev/null
+++ b/doc/admin-guide/en-US/markdown/admin_geo-replication.md
@@ -0,0 +1,738 @@
+Managing Geo-replication
+========================
+
+Geo-replication provides a continuous, asynchronous, and incremental
+replication service from one site to another over Local Area Networks
+(LANs), Wide Area Network (WANs), and across the Internet.
+
+Geo-replication uses a master–slave model, whereby replication and
+mirroring occurs between the following partners:
+
+- Master – a GlusterFS volume
+
+- Slave – a slave which can be of the following types:
+
+ - A local directory which can be represented as file URL like
+ `file:///path/to/dir`. You can use shortened form, for example,
+ ` /path/to/dir`.
+
+ - A GlusterFS Volume - Slave volume can be either a local volume
+ like `gluster://localhost:volname` (shortened form - `:volname`)
+ or a volume served by different host like
+ `gluster://host:volname` (shortened form - `host:volname`).
+
+ > **Note**
+ >
+ > Both of the above types can be accessed remotely using SSH tunnel.
+ > To use SSH, add an SSH prefix to either a file URL or gluster type
+ > URL. For example, ` ssh://root@remote-host:/path/to/dir`
+ > (shortened form - `root@remote-host:/path/to/dir`) or
+ > `ssh://root@remote-host:gluster://localhost:volname` (shortened
+ > from - `root@remote-host::volname`).
+
+This section introduces Geo-replication, illustrates the various
+deployment scenarios, and explains how to configure the system to
+provide replication and mirroring in your environment.
+
+Replicated Volumes vs Geo-replication
+=====================================
+
+The following table lists the difference between replicated volumes and
+geo-replication:
+
+ Replicated Volumes Geo-replication
+ --------------------------------------------------------------------------------------- -----------------------------------------------------------------------------------------------------------------
+ Mirrors data across clusters Mirrors data across geographically distributed clusters
+ Provides high-availability Ensures backing up of data for disaster recovery
+ Synchronous replication (each and every file operation is sent across all the bricks) Asynchronous replication (checks for the changes in files periodically and syncs them on detecting differences)
+
+Preparing to Deploy Geo-replication
+===================================
+
+This section provides an overview of the Geo-replication deployment
+scenarios, describes how you can check the minimum system requirements,
+and explores common deployment scenarios.
+
+- ?
+
+- ?
+
+- ?
+
+- ?
+
+- ?
+
+Exploring Geo-replication Deployment Scenarios
+----------------------------------------------
+
+Geo-replication provides an incremental replication service over Local
+Area Networks (LANs), Wide Area Network (WANs), and across the Internet.
+This section illustrates the most common deployment scenarios for
+Geo-replication, including the following:
+
+- Geo-replication over LAN
+
+- Geo-replication over WAN
+
+- Geo-replication over the Internet
+
+- Multi-site cascading Geo-replication
+
+**Geo-replication over LAN**
+
+You can configure Geo-replication to mirror data over a Local Area
+Network.
+
+![ Geo-replication over LAN ][]
+
+**Geo-replication over WAN**
+
+You can configure Geo-replication to replicate data over a Wide Area
+Network.
+
+![ Geo-replication over WAN ][]
+
+**Geo-replication over Internet**
+
+You can configure Geo-replication to mirror data over the Internet.
+
+![ Geo-replication over Internet ][]
+
+**Multi-site cascading Geo-replication**
+
+You can configure Geo-replication to mirror data in a cascading fashion
+across multiple sites.
+
+![ Multi-site cascading Geo-replication ][]
+
+Geo-replication Deployment Overview
+-----------------------------------
+
+Deploying Geo-replication involves the following steps:
+
+1. Verify that your environment matches the minimum system requirement.
+ For more information, see ?.
+
+2. Determine the appropriate deployment scenario. For more information,
+ see ?.
+
+3. Start Geo-replication on master and slave systems, as required. For
+ more information, see ?.
+
+Checking Geo-replication Minimum Requirements
+---------------------------------------------
+
+Before deploying GlusterFS Geo-replication, verify that your systems
+match the minimum requirements.
+
+The following table outlines the minimum requirements for both master
+and slave nodes within your environment:
+
+ Component Master Slave
+ ------------------------ --------------------------------------------------------------------- --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Operating System GNU/Linux GNU/Linux
+ Filesystem GlusterFS 3.2 or higher GlusterFS 3.2 or higher (GlusterFS needs to be installed, but does not need to be running), ext3, ext4, or XFS (any other POSIX compliant file system would work, but has not been tested extensively)
+ Python Python 2.4 (with ctypes external module), or Python 2.5 (or higher) Python 2.4 (with ctypes external module), or Python 2.5 (or higher)
+ Secure shell OpenSSH version 4.0 (or higher) SSH2-compliant daemon
+ Remote synchronization rsync 3.0.7 or higher rsync 3.0.7 or higher
+ FUSE GlusterFS supported versions GlusterFS supported versions
+
+Setting Up the Environment for Geo-replication
+----------------------------------------------
+
+**Time Synchronization**
+
+- On bricks of a geo-replication master volume, all the servers' time
+ must be uniform. You are recommended to set up NTP (Network Time
+ Protocol) service to keep the bricks sync in time and avoid
+ out-of-time sync effect.
+
+ For example: In a Replicated volume where brick1 of the master is at
+ 12.20 hrs and brick 2 of the master is at 12.10 hrs with 10 minutes
+ time lag, all the changes in brick2 between this period may go
+ unnoticed during synchronization of files with Slave.
+
+ For more information on setting up NTP, see [][].
+
+**To setup Geo-replication for SSH**
+
+Password-less login has to be set up between the host machine (where
+geo-replication Start command will be issued) and the remote machine
+(where slave process should be launched through SSH).
+
+1. On the node where geo-replication sessions are to be set up, run the
+ following command:
+
+ `# ssh-keygen -f /var/lib/glusterd/geo-replication/secret.pem`
+
+ Press Enter twice to avoid passphrase.
+
+2. Run the following command on master for all the slave hosts:
+
+ `# ssh-copy-id -i /var/lib/glusterd/geo-replication/secret.pem.pub @`
+
+Setting Up the Environment for a Secure Geo-replication Slave
+-------------------------------------------------------------
+
+You can configure a secure slave using SSH so that master is granted a
+restricted access. With GlusterFS, you need not specify configuration
+parameters regarding the slave on the master-side configuration. For
+example, the master does not require the location of the rsync program
+on slave but the slave must ensure that rsync is in the PATH of the user
+which the master connects using SSH. The only information that master
+and slave have to negotiate are the slave-side user account, slave's
+resources that master uses as slave resources, and the master's public
+key. Secure access to the slave can be established using the following
+options:
+
+- Restricting Remote Command Execution
+
+- Using `Mountbroker` for Slaves
+
+- Using IP based Access Control
+
+**Backward Compatibility**
+
+Your existing Ge-replication environment will work with GlusterFS,
+except for the following:
+
+- The process of secure reconfiguration affects only the glusterfs
+ instance on slave. The changes are transparent to master with the
+ exception that you may have to change the SSH target to an
+ unprivileged account on slave.
+
+- The following are the some exceptions where this might not work:
+
+ - Geo-replication URLs which specify the slave resource when
+ configuring master will include the following special
+ characters: space, \*, ?, [;
+
+ - Slave must have a running instance of glusterd, even if there is
+ no gluster volume among the mounted slave resources (that is,
+ file tree slaves are used exclusively) .
+
+### Restricting Remote Command Execution
+
+If you restrict remote command execution, then the Slave audits commands
+coming from the master and the commands related to the given
+geo-replication session is allowed. The Slave also provides access only
+to the files within the slave resource which can be read or manipulated
+by the Master.
+
+To restrict remote command execution:
+
+1. Identify the location of the gsyncd helper utility on Slave. This
+ utility is installed in `PREFIX/libexec/glusterfs/gsyncd`, where
+ PREFIX is a compile-time parameter of glusterfs. For example,
+ `--prefix=PREFIX` to the configure script with the following common
+ values` /usr, /usr/local, and /opt/glusterfs/glusterfs_version`.
+
+2. Ensure that command invoked from master to slave passed through the
+ slave's gsyncd utility.
+
+ You can use either of the following two options:
+
+ - Set gsyncd with an absolute path as the shell for the account
+ which the master connects through SSH. If you need to use a
+ privileged account, then set it up by creating a new user with
+ UID 0.
+
+ - Setup key authentication with command enforcement to gsyncd. You
+ must prefix the copy of master's public key in the Slave
+ account's `authorized_keys` file with the following command:
+
+ `command=<path to gsyncd>`.
+
+ For example,
+ `command="PREFIX/glusterfs/gsyncd" ssh-rsa AAAAB3Nza....`
+
+### Using Mountbroker for Slaves
+
+`mountbroker` is a new service of glusterd. This service allows an
+unprivileged process to own a GlusterFS mount by registering a label
+(and DSL (Domain-specific language) options ) with glusterd through a
+glusterd volfile. Using CLI, you can send a mount request to glusterd to
+receive an alias (symlink) of the mounted volume.
+
+A request from the agent , the unprivileged slave agents use the
+mountbroker service of glusterd to set up an auxiliary gluster mount for
+the agent in a special environment which ensures that the agent is only
+allowed to access with special parameters that provide administrative
+level access to the particular volume.
+
+**To setup an auxiliary gluster mount for the agent**:
+
+1. Create a new group. For example, `geogroup`.
+
+2. Create a unprivileged account. For example, ` geoaccount`. Make it a
+ member of ` geogroup`.
+
+3. Create a new directory owned by root and with permissions *0711.*
+ For example, create a create mountbroker-root directory
+ `/var/mountbroker-root`.
+
+4. Add the following options to the glusterd volfile, assuming the name
+ of the slave gluster volume as `slavevol`:
+
+ `option mountbroker-root /var/mountbroker-root `
+
+ `option mountbroker-geo-replication.geoaccount slavevol`
+
+ `option geo-replication-log-group geogroup`
+
+ If you are unable to locate the glusterd volfile at
+ `/etc/glusterfs/glusterd.vol`, you can create a volfile containing
+ both the default configuration and the above options and place it at
+ `/etc/glusterfs/`.
+
+ A sample glusterd volfile along with default options:
+
+ volume management
+ type mgmt/glusterd
+ option working-directory /var/lib/glusterd
+ option transport-type socket,rdma
+ option transport.socket.keepalive-time 10
+ option transport.socket.keepalive-interval 2
+ option transport.socket.read-fail-log off
+
+ option mountbroker-root /var/mountbroker-root
+ option mountbroker-geo-replication.geoaccount slavevol
+ option geo-replication-log-group geogroup
+ end-volume
+
+ If you host multiple slave volumes on Slave, you can repeat step 2.
+ for each of them and add the following options to the `volfile`:
+
+ option mountbroker-geo-replication.geoaccount2 slavevol2
+ option mountbroker-geo-replication.geoaccount3 slavevol3
+
+5. Setup Master to access Slave as `geoaccount@Slave`.
+
+ You can add multiple slave volumes within the same account
+ (geoaccount) by providing comma-separated list (without spaces) as
+ the argument of `mountbroker-geo-replication.geogroup`. You can also
+ have multiple options of the form `mountbroker-geo-replication.*`.
+ It is recommended to use one service account per Master machine. For
+ example, if there are multiple slave volumes on Slave for the master
+ machines Master1, Master2, and Master3, then create a dedicated
+ service user on Slave for them by repeating Step 2. for each (like
+ geogroup1, geogroup2, and geogroup3), and then add the following
+ corresponding options to the volfile:
+
+ `option mountbroker-geo-replication.geoaccount1 slavevol11,slavevol12,slavevol13`
+
+ `option mountbroker-geo-replication.geoaccount2 slavevol21,slavevol22`
+
+ `option mountbroker-geo-replication.geoaccount3 slavevol31`
+
+ Now set up Master1 to ssh to geoaccount1@Slave, etc.
+
+ You must restart glusterd after making changes in the configuration
+ to effect the updates.
+
+### Using IP based Access Control
+
+You can use IP based access control method to provide access control for
+the slave resources using IP address. You can use method for both Slave
+and file tree slaves, but in the section, we are focusing on file tree
+slaves using this method.
+
+To set access control based on IP address for file tree slaves:
+
+1. Set a general restriction for accessibility of file tree resources:
+
+ `# gluster volume geo-replication '/*' config allow-network ::1,127.0.0.1 `
+
+ This will refuse all requests for spawning slave agents except for
+ requests initiated locally.
+
+2. If you want the to lease file tree at `/data/slave-tree` to Master,
+ enter the following command:
+
+ `# gluster volume geo-replicationconfig allow-network `
+
+ `MasterIP` is the IP address of Master. The slave agent spawn
+ request from master will be accepted if it is executed at
+ `/data/slave-tree`.
+
+If the Master side network configuration does not enable the Slave to
+recognize the exact IP address of Master, you can use CIDR notation to
+specify a subnet instead of a single IP address as MasterIP or even
+comma-separated lists of CIDR subnets.
+
+If you want to extend IP based access control to gluster slaves, use the
+following command:
+
+`# gluster volume geo-replication '*' config allow-network ::1,127.0.0.1`
+
+Starting Geo-replication
+========================
+
+This section describes how to configure and start Gluster
+Geo-replication in your storage environment, and verify that it is
+functioning correctly.
+
+- ?
+
+- ?
+
+- ?
+
+- ?
+
+- ?
+
+Starting Geo-replication
+------------------------
+
+To start Gluster Geo-replication
+
+- Start geo-replication between the hosts using the following command:
+
+ `# gluster volume geo-replication start`
+
+ For example:
+
+ # gluster volume geo-replication Volume1 example.com:/data/remote_dir start
+ Starting geo-replication session between Volume1
+ example.com:/data/remote_dir has been successful
+
+ > **Note**
+ >
+ > You may need to configure the service before starting Gluster
+ > Geo-replication. For more information, see ?.
+
+Verifying Successful Deployment
+-------------------------------
+
+You can use the gluster command to verify the status of Gluster
+Geo-replication in your environment.
+
+**To verify the status Gluster Geo-replication**
+
+- Verify the status by issuing the following command on host:
+
+ `# gluster volume geo-replication status`
+
+ For example:
+
+ `# gluster volume geo-replication Volume1 example.com:/data/remote_dir status`
+
+ # gluster volume geo-replication Volume1 example.com:/data/remote_dir status
+
+ MASTER SLAVE STATUS
+ ______ ______________________________ ____________
+ Volume1 root@example.com:/data/remote_dir Starting....
+
+Displaying Geo-replication Status Information
+---------------------------------------------
+
+You can display status information about a specific geo-replication
+master session, or a particular master-slave session, or all
+geo-replication sessions, as needed.
+
+**To display geo-replication status information**
+
+- Display information of all geo-replication sessions using the
+ following command:
+
+ # gluster volume geo-replication Volume1 example.com:/data/remote_dir status
+
+ MASTER SLAVE STATUS
+ ______ ______________________________ ____________
+ Volume1 root@example.com:/data/remote_dir Starting....
+
+- Display information of a particular master slave session using the
+ following command:
+
+ `# gluster volume geo-replication status`
+
+ For example, to display information of Volume1 and
+ example.com:/data/remote\_dir
+
+ `# gluster volume geo-replication Volume1 example.com:/data/remote_dir status`
+
+ The status of the geo-replication between Volume1 and
+ example.com:/data/remote\_dir is displayed.
+
+- Display information of all geo-replication sessions belonging to a
+ master
+
+ `# gluster volume geo-replication MASTER status`
+
+ For example, to display information of Volume1
+
+ # gluster volume geo-replication Volume1 example.com:/data/remote_dir status
+
+ MASTER SLAVE STATUS
+ ______ ______________________________ ____________
+ Volume1 ssh://example.com:gluster://127.0.0.1:remove_volume OK
+
+ Volume1 ssh://example.com:file:///data/remote_dir OK
+
+ The status of a session could be one of the following four:
+
+- **Starting**: This is the initial phase of the Geo-replication
+ session; it remains in this state for a minute, to make sure no
+ abnormalities are present.
+
+- **OK**: The geo-replication session is in a stable state.
+
+- **Faulty**: The geo-replication session has witnessed some
+ abnormality and the situation has to be investigated further. For
+ further information, see ? section.
+
+- **Corrupt**: The monitor thread which is monitoring the
+ geo-replication session has died. This situation should not occur
+ normally, if it persists contact Red Hat Support[][1].
+
+Configuring Geo-replication
+---------------------------
+
+To configure Gluster Geo-replication
+
+- Use the following command at the Gluster command line:
+
+ `# gluster volume geo-replication config [options]`
+
+ For more information about the options, see ?.
+
+ For example:
+
+ To view list of all option/value pair, use the following command:
+
+ `# gluster volume geo-replication Volume1 example.com:/data/remote_dir config`
+
+Stopping Geo-replication
+------------------------
+
+You can use the gluster command to stop Gluster Geo-replication (syncing
+of data from Master to Slave) in your environment.
+
+**To stop Gluster Geo-replication**
+
+- Stop geo-replication between the hosts using the following command:
+
+ `# gluster volume geo-replication stop `
+
+ For example:
+
+ # gluster volume geo-replication Volume1 example.com:/data/remote_dir stop
+ Stopping geo-replication session between Volume1 and
+ example.com:/data/remote_dir has been successful
+
+ See ? for more information about the gluster command.
+
+Restoring Data from the Slave
+=============================
+
+You can restore data from the slave to the master volume, whenever the
+master volume becomes faulty for reasons like hardware failure.
+
+The example in this section assumes that you are using the Master Volume
+(Volume1) with the following configuration:
+
+ machine1# gluster volume info
+ Type: Distribute
+ Status: Started
+ Number of Bricks: 2
+ Transport-type: tcp
+ Bricks:
+ Brick1: machine1:/export/dir16
+ Brick2: machine2:/export/dir16
+ Options Reconfigured:
+ geo-replication.indexing: on
+
+The data is syncing from master volume (Volume1) to slave directory
+(example.com:/data/remote\_dir). To view the status of this
+geo-replication session run the following command on Master:
+
+ # gluster volume geo-replication Volume1 root@example.com:/data/remote_dir status
+
+ MASTER SLAVE STATUS
+ ______ ______________________________ ____________
+ Volume1 root@example.com:/data/remote_dir OK
+
+**Before Failure**
+
+Assume that the Master volume had 100 files and was mounted at
+/mnt/gluster on one of the client machines (client). Run the following
+command on Client machine to view the list of files:
+
+ client# ls /mnt/gluster | wc –l
+ 100
+
+The slave directory (example.com) will have same data as in the master
+volume and same can be viewed by running the following command on slave:
+
+ example.com# ls /data/remote_dir/ | wc –l
+ 100
+
+**After Failure**
+
+If one of the bricks (machine2) fails, then the status of
+Geo-replication session is changed from "OK" to "Faulty". To view the
+status of this geo-replication session run the following command on
+Master:
+
+ # gluster volume geo-replication Volume1 root@example.com:/data/remote_dir status
+
+ MASTER SLAVE STATUS
+ ______ ______________________________ ____________
+ Volume1 root@example.com:/data/remote_dir Faulty
+
+Machine2 is failed and now you can see discrepancy in number of files
+between master and slave. Few files will be missing from the master
+volume but they will be available only on slave as shown below.
+
+Run the following command on Client:
+
+ client # ls /mnt/gluster | wc –l
+ 52
+
+Run the following command on slave (example.com):
+
+ Example.com# # ls /data/remote_dir/ | wc –l
+ 100
+
+**To restore data from the slave machine**
+
+1. Stop all Master's geo-replication sessions using the following
+ command:
+
+ `# gluster volume geo-replication stop`
+
+ For example:
+
+ machine1# gluster volume geo-replication Volume1
+ example.com:/data/remote_dir stop
+
+ Stopping geo-replication session between Volume1 &
+ example.com:/data/remote_dir has been successful
+
+ > **Note**
+ >
+ > Repeat `# gluster volume geo-replication stop `command on all
+ > active geo-replication sessions of master volume.
+
+2. Replace the faulty brick in the master by using the following
+ command:
+
+ `# gluster volume replace-brick start`
+
+ For example:
+
+ machine1# gluster volume replace-brick Volume1 machine2:/export/dir16 machine3:/export/dir16 start
+ Replace-brick started successfully
+
+3. Commit the migration of data using the following command:
+
+ `# gluster volume replace-brick commit force `
+
+ For example:
+
+ machine1# gluster volume replace-brick Volume1 machine2:/export/dir16 machine3:/export/dir16 commit force
+ Replace-brick commit successful
+
+4. Verify the migration of brick by viewing the volume info using the
+ following command:
+
+ `# gluster volume info `
+
+ For example:
+
+ machine1# gluster volume info
+ Volume Name: Volume1
+ Type: Distribute
+ Status: Started
+ Number of Bricks: 2
+ Transport-type: tcp
+ Bricks:
+ Brick1: machine1:/export/dir16
+ Brick2: machine3:/export/dir16
+ Options Reconfigured:
+ geo-replication.indexing: on
+
+5. Run rsync command manually to sync data from slave to master
+ volume's client (mount point).
+
+ For example:
+
+ `example.com# rsync -PavhS --xattrs --ignore-existing /data/remote_dir/ client:/mnt/gluster`
+
+ Verify that the data is synced by using the following command:
+
+ On master volume, run the following command:
+
+ Client # ls | wc –l
+ 100
+
+ On the Slave run the following command:
+
+ example.com# ls /data/remote_dir/ | wc –l
+ 100
+
+ Now Master volume and Slave directory is synced.
+
+6. Restart geo-replication session from master to slave using the
+ following command:
+
+ `# gluster volume geo-replication start `
+
+ For example:
+
+ machine1# gluster volume geo-replication Volume1
+ example.com:/data/remote_dir start
+ Starting geo-replication session between Volume1 &
+ example.com:/data/remote_dir has been successful
+
+Best Practices
+==============
+
+**Manually Setting Time**
+
+If you have to change the time on your bricks manually, then you must
+set uniform time on all bricks. This avoids the out-of-time sync issue
+described in ?. Setting time backward corrupts the geo-replication
+index, so the recommended way to set the time manually is:
+
+1. Stop geo-replication between the master and slave using the
+ following command:
+
+ `# gluster volume geo-replication sto`p
+
+2. Stop the geo-replication indexing using the following command:
+
+ `# gluster volume set geo-replication.indexing of`f
+
+3. Set uniform time on all bricks.s
+
+4. Restart your geo-replication sessions by using the following
+ command:
+
+ `# gluster volume geo-replication start `
+
+**Running Geo-replication commands in one system**
+
+It is advisable to run the geo-replication commands in one of the bricks
+in the trusted storage pool. This is because, the log files for the
+geo-replication session would be stored in the \*Server\* where the
+Geo-replication start is initiated. Hence it would be easier to locate
+the log-files when required.
+
+**Isolation**
+
+Geo-replication slave operation is not sandboxed as of now and is ran as
+a privileged service. So for the security reason, it is advised to
+create a sandbox environment (dedicated machine / dedicated virtual
+machine / chroot/container type solution) by the administrator to run
+the geo-replication slave in it. Enhancement in this regard will be
+available in follow-up minor release.
+
+ [ Geo-replication over LAN ]: images/Geo-Rep_LAN.png
+ [ Geo-replication over WAN ]: images/Geo-Rep_WAN.png
+ [ Geo-replication over Internet ]: images/Geo-Rep03_Internet.png
+ [ Multi-site cascading Geo-replication ]: images/Geo-Rep04_Cascading.png
+ []: http://docs.redhat.com/docs/en-US/Red_Hat_Enterprise_Linux/6/html/Migration_Planning_Guide/ch04s07.html
+ [1]: www.redhat.com/support/
diff --git a/doc/admin-guide/en-US/markdown/admin_managing_snapshots.md b/doc/admin-guide/en-US/markdown/admin_managing_snapshots.md
new file mode 100644
index 000000000..e76ee9151
--- /dev/null
+++ b/doc/admin-guide/en-US/markdown/admin_managing_snapshots.md
@@ -0,0 +1,66 @@
+Managing GlusterFS Volume Snapshots
+==========================
+
+This section describes how to perform common GlusterFS volume snapshot
+management operations
+
+Pre-requisites
+=====================
+
+GlusterFS volume snapshot feature is based on thinly provisioned LVM snapshot.
+To make use of snapshot feature GlusterFS volume should fulfill following
+pre-requisites:
+
+* Each brick should be on an independent thinly provisioned LVM.
+* Brick LVM should not contain any other data other than brick.
+* None of the brick should be on a thick LVM.
+
+
+Snapshot Management
+=====================
+
+
+**Snapshot creation**
+
+*gluster snapshot create \<vol-name\> \[-n \<snap-name\>\] \[-d \<description\>\]*
+
+This command will create a snapshot of a GlusterFS volume. User can provide a snap-name and a description to identify the snap. The description cannot be more than 1024 characters.
+
+Volume should be present and it should be in started state.
+
+**Restoring snaps**
+
+*gluster snapshot restore -v \<vol-name\> \<snap-name\>*
+
+This command restores an already taken snapshot of a GlusterFS volume. Snapshot restore is an offline activity therefore if the volume is online then the restore operation will fail.
+
+Once the snapshot is restored it will be deleted from the list of snapshot.
+
+**Deleting snaps**
+
+*gluster snapshot delete \<volname\>\ -s \<snap-name\> \[force\]*
+
+This command will delete the specified snapshot.
+
+**Listing of available snaps**
+
+*gluster snapshot list \[\<volname\> \[-s \<snap-name>\]\]*
+
+This command is used to list all snapshots taken, or for a specified volume. If snap-name is provided then it will list the details of that snap.
+
+**Configuring the snapshot behavior**
+
+*gluster snapshot config \[\<vol-name | all\>\]*
+
+This command will display existing config values for a volume. If volume name is not provided then config values of all the volume is displayed. System config is displayed irrespective of volume name.
+
+*gluster snapshot config \<vol-name | all\> \[\<snap-max-hard-limit\> \<count\>\] \[\<snap-max-soft-limit\> \<percentage\>\]*
+
+The above command can be used to change the existing config values. If vol-name is provided then config value of that volume is changed, else it will set/change the system limit.
+
+The system limit is the default value of the config for all the volume. Volume specific limit cannot cross the system limit. If a volume specific limit is not provided then system limit will be considered.
+
+If any of this limit is decreased and the current snap count of the system/volume is more than the limit then the command will fail. If user still want to decrease the limit then force option should be used.
+
+
+
diff --git a/doc/admin-guide/en-US/markdown/admin_managing_volumes.md b/doc/admin-guide/en-US/markdown/admin_managing_volumes.md
new file mode 100644
index 000000000..6c06e27a0
--- /dev/null
+++ b/doc/admin-guide/en-US/markdown/admin_managing_volumes.md
@@ -0,0 +1,710 @@
+Managing GlusterFS Volumes
+==========================
+
+This section describes how to perform common GlusterFS management
+operations, including the following:
+
+- ?
+
+- ?
+
+- ?
+
+- ?
+
+- ?
+
+- ?
+
+- ?
+
+- ?
+
+Tuning Volume Options
+=====================
+
+You can tune volume options, as needed, while the cluster is online and
+available.
+
+> **Note**
+>
+> Red Hat recommends you to set server.allow-insecure option to ON if
+> there are too many bricks in each volume or if there are too many
+> services which have already utilized all the privileged ports in the
+> system. Turning this option ON allows ports to accept/reject messages
+> from insecure ports. So, use this option only if your deployment
+> requires it.
+
+To tune volume options
+
+- Tune volume options using the following command:
+
+ `# gluster volume set `
+
+ For example, to specify the performance cache size for test-volume:
+
+ # gluster volume set test-volume performance.cache-size 256MB
+ Set volume successful
+
+ The following table lists the Volume options along with its
+ description and default value:
+
+ > **Note**
+ >
+ > The default options given here are subject to modification at any
+ > given time and may not be the same for all versions.
+
+ -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Option Description Default Value Available Options
+ -------------------------------------- ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- ---------------------------------- ---------------------------------------------------------------------------------------
+ auth.allow IP addresses of the clients which should be allowed to access the volume. \* (allow all) Valid IP address which includes wild card patterns including \*, such as 192.168.1.\*
+
+ auth.reject IP addresses of the clients which should be denied to access the volume. NONE (reject none) Valid IP address which includes wild card patterns including \*, such as 192.168.2.\*
+
+ client.grace-timeout Specifies the duration for the lock state to be maintained on the client after a network disconnection. 10 10 - 1800 secs
+
+ cluster.self-heal-window-size Specifies the maximum number of blocks per file on which self-heal would happen simultaneously. 16 0 - 1025 blocks
+
+ cluster.data-self-heal-algorithm Specifies the type of self-heal. If you set the option as "full", the entire file is copied from source to destinations. If the option is set to "diff" the file blocks that are not in sync are copied to destinations. Reset uses a heuristic model. If the file does not exist on one of the subvolumes, or a zero-byte file exists (created by entry self-heal) the entire content has to be copied anyway, so there is no benefit from using the "diff" algorithm. If the file size is about the same as page size, the entire file can be read and written with a few operations, which will be faster than "diff" which has to read checksums and then read and write. reset full | diff | reset
+
+ cluster.min-free-disk Specifies the percentage of disk space that must be kept free. Might be useful for non-uniform bricks. 10% Percentage of required minimum free disk space
+
+ cluster.stripe-block-size Specifies the size of the stripe unit that will be read from or written to. 128 KB (for all files) size in bytes
+
+ cluster.self-heal-daemon Allows you to turn-off proactive self-heal on replicated volumes. on On | Off
+
+ cluster.ensure-durability This option makes sure the data/metadata is durable across abrupt shutdown of the brick. on On | Off
+
+ diagnostics.brick-log-level Changes the log-level of the bricks. INFO DEBUG|WARNING|ERROR|CRITICAL|NONE|TRACE
+
+ diagnostics.client-log-level Changes the log-level of the clients. INFO DEBUG|WARNING|ERROR|CRITICAL|NONE|TRACE
+
+ diagnostics.latency-measurement Statistics related to the latency of each operation would be tracked. off On | Off
+
+ diagnostics.dump-fd-stats Statistics related to file-operations would be tracked. off On | Off
+
+ feature.read-only Enables you to mount the entire volume as read-only for all the clients (including NFS clients) accessing it. off On | Off
+
+ features.lock-heal Enables self-healing of locks when the network disconnects. on On | Off
+
+ features.quota-timeout For performance reasons, quota caches the directory sizes on client. You can set timeout indicating the maximum duration of directory sizes in cache, from the time they are populated, during which they are considered valid. 0 0 - 3600 secs
+
+ geo-replication.indexing Use this option to automatically sync the changes in the filesystem from Master to Slave. off On | Off
+
+ network.frame-timeout The time frame after which the operation has to be declared as dead, if the server does not respond for a particular operation. 1800 (30 mins) 1800 secs
+
+ network.ping-timeout The time duration for which the client waits to check if the server is responsive. When a ping timeout happens, there is a network disconnect between the client and server. All resources held by server on behalf of the client get cleaned up. When a reconnection happens, all resources will need to be re-acquired before the client can resume its operations on the server. Additionally, the locks will be acquired and the lock tables updated. 42 Secs 42 Secs
+ This reconnect is a very expensive operation and should be avoided.
+
+ nfs.enable-ino32 For 32-bit nfs clients or applications that do not support 64-bit inode numbers or large files, use this option from the CLI to make Gluster NFS return 32-bit inode numbers instead of 64-bit inode numbers. Applications that will benefit are those that were either: off On | Off
+ \* Built 32-bit and run on 32-bit machines.
+
+ \* Built 32-bit on 64-bit systems.
+
+ \* Built 64-bit but use a library built 32-bit, especially relevant for python and perl scripts.
+
+ Either of the conditions above can lead to application on Linux NFS clients failing with "Invalid argument" or "Value too large for defined data type" errors.
+
+ nfs.volume-access Set the access type for the specified sub-volume. read-write read-write|read-only
+
+ nfs.trusted-write If there is an UNSTABLE write from the client, STABLE flag will be returned to force the client to not send a COMMIT request. off On | Off
+ In some environments, combined with a replicated GlusterFS setup, this option can improve write performance. This flag allows users to trust Gluster replication logic to sync data to the disks and recover when required. COMMIT requests if received will be handled in a default manner by fsyncing. STABLE writes are still handled in a sync manner.
+
+ nfs.trusted-sync All writes and COMMIT requests are treated as async. This implies that no write requests are guaranteed to be on server disks when the write reply is received at the NFS client. Trusted sync includes trusted-write behavior. off On | Off
+
+ nfs.export-dir This option can be used to export specified comma separated subdirectories in the volume. The path must be an absolute path. Along with path allowed list of IPs/hostname can be associated with each subdirectory. If provided connection will allowed only from these IPs. Format: \<dir\>[(hostspec[|hostspec|...])][,...]. Where hostspec can be an IP address, hostname or an IP range in CIDR notation. **Note**: Care must be taken while configuring this option as invalid entries and/or unreachable DNS servers can introduce unwanted delay in all the mount calls. No sub directory exported. Absolute path with allowed list of IP/hostname.
+
+ nfs.export-volumes Enable/Disable exporting entire volumes, instead if used in conjunction with nfs3.export-dir, can allow setting up only subdirectories as exports. on On | Off
+
+ nfs.rpc-auth-unix Enable/Disable the AUTH\_UNIX authentication type. This option is enabled by default for better interoperability. However, you can disable it if required. on On | Off
+
+ nfs.rpc-auth-null Enable/Disable the AUTH\_NULL authentication type. It is not recommended to change the default value for this option. on On | Off
+
+ nfs.rpc-auth-allow\<IP- Addresses\> Allow a comma separated list of addresses and/or hostnames to connect to the server. By default, all clients are disallowed. This allows you to define a general rule for all exported volumes. Reject All IP address or Host name
+
+ nfs.rpc-auth-reject IP- Addresses Reject a comma separated list of addresses and/or hostnames from connecting to the server. By default, all connections are disallowed. This allows you to define a general rule for all exported volumes. Reject All IP address or Host name
+
+ nfs.ports-insecure Allow client connections from unprivileged ports. By default only privileged ports are allowed. This is a global setting in case insecure ports are to be enabled for all exports using a single option. off On | Off
+
+ nfs.addr-namelookup Turn-off name lookup for incoming client connections using this option. In some setups, the name server can take too long to reply to DNS queries resulting in timeouts of mount requests. Use this option to turn off name lookups during address authentication. Note, turning this off will prevent you from using hostnames in rpc-auth.addr.\* filters. on On | Off
+
+ nfs.register-with- portmap For systems that need to run multiple NFS servers, you need to prevent more than one from registering with portmap service. Use this option to turn off portmap registration for Gluster NFS. on On | Off
+
+ nfs.port \<PORT- NUMBER\> Use this option on systems that need Gluster NFS to be associated with a non-default port number. 38465- 38467
+
+ nfs.disable Turn-off volume being exported by NFS off On | Off
+
+ performance.write-behind-window-size Size of the per-file write-behind buffer. 1 MB Write-behind cache size
+
+ performance.io-thread-count The number of threads in IO threads translator. 16 0 - 65
+
+ performance.flush-behind If this option is set ON, instructs write-behind translator to perform flush in background, by returning success (or any errors, if any of previous writes were failed) to application even before flush is sent to backend filesystem. On On | Off
+
+ performance.cache-max-file-size Sets the maximum file size cached by the io-cache translator. Can use the normal size descriptors of KB, MB, GB,TB or PB (for example, 6GB). Maximum size uint64. 2 \^ 64 -1 bytes size in bytes
+
+ performance.cache-min-file-size Sets the minimum file size cached by the io-cache translator. Values same as "max" above. 0B size in bytes
+
+ performance.cache-refresh-timeout The cached data for a file will be retained till 'cache-refresh-timeout' seconds, after which data re-validation is performed. 1 sec 0 - 61
+
+ performance.cache-size Size of the read cache. 32 MB size in bytes
+
+ server.allow-insecure Allow client connections from unprivileged ports. By default only privileged ports are allowed. This is a global setting in case insecure ports are to be enabled for all exports using a single option. on On | Off
+
+ server.grace-timeout Specifies the duration for the lock state to be maintained on the server after a network disconnection. 10 10 - 1800 secs
+
+ server.statedump-path Location of the state dump file. /tmp directory of the brick New directory path
+
+ storage.health-check-interval Number of seconds between health-checks done on the filesystem that is used for the brick(s). Defaults to 30 seconds, set to 0 to disable. /tmp directory of the brick New directory path
+ -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+ You can view the changed volume options using
+ the` # gluster volume info ` command. For more information, see ?.
+
+Expanding Volumes
+=================
+
+You can expand volumes, as needed, while the cluster is online and
+available. For example, you might want to add a brick to a distributed
+volume, thereby increasing the distribution and adding to the capacity
+of the GlusterFS volume.
+
+Similarly, you might want to add a group of bricks to a distributed
+replicated volume, increasing the capacity of the GlusterFS volume.
+
+> **Note**
+>
+> When expanding distributed replicated and distributed striped volumes,
+> you need to add a number of bricks that is a multiple of the replica
+> or stripe count. For example, to expand a distributed replicated
+> volume with a replica count of 2, you need to add bricks in multiples
+> of 2 (such as 4, 6, 8, etc.).
+
+**To expand a volume**
+
+1. On the first server in the cluster, probe the server to which you
+ want to add the new brick using the following command:
+
+ `# gluster peer probe `
+
+ For example:
+
+ # gluster peer probe server4
+ Probe successful
+
+2. Add the brick using the following command:
+
+ `# gluster volume add-brick `
+
+ For example:
+
+ # gluster volume add-brick test-volume server4:/exp4
+ Add Brick successful
+
+3. Check the volume information using the following command:
+
+ `# gluster volume info `
+
+ The command displays information similar to the following:
+
+ Volume Name: test-volume
+ Type: Distribute
+ Status: Started
+ Number of Bricks: 4
+ Bricks:
+ Brick1: server1:/exp1
+ Brick2: server2:/exp2
+ Brick3: server3:/exp3
+ Brick4: server4:/exp4
+
+4. Rebalance the volume to ensure that all files are distributed to the
+ new brick.
+
+ You can use the rebalance command as described in ?.
+
+Shrinking Volumes
+=================
+
+You can shrink volumes, as needed, while the cluster is online and
+available. For example, you might need to remove a brick that has become
+inaccessible in a distributed volume due to hardware or network failure.
+
+> **Note**
+>
+> Data residing on the brick that you are removing will no longer be
+> accessible at the Gluster mount point. Note however that only the
+> configuration information is removed - you can continue to access the
+> data directly from the brick, as necessary.
+
+When shrinking distributed replicated and distributed striped volumes,
+you need to remove a number of bricks that is a multiple of the replica
+or stripe count. For example, to shrink a distributed striped volume
+with a stripe count of 2, you need to remove bricks in multiples of 2
+(such as 4, 6, 8, etc.). In addition, the bricks you are trying to
+remove must be from the same sub-volume (the same replica or stripe
+set).
+
+**To shrink a volume**
+
+1. Remove the brick using the following command:
+
+ `# gluster volume remove-brick ` `start`
+
+ For example, to remove server2:/exp2:
+
+ # gluster volume remove-brick test-volume server2:/exp2
+
+ Removing brick(s) can result in data loss. Do you want to Continue? (y/n)
+
+2. Enter "y" to confirm the operation. The command displays the
+ following message indicating that the remove brick operation is
+ successfully started:
+
+ Remove Brick successful
+
+3. (Optional) View the status of the remove brick operation using the
+ following command:
+
+ `# gluster volume remove-brick `` status`
+
+ For example, to view the status of remove brick operation on
+ server2:/exp2 brick:
+
+ # gluster volume remove-brick test-volume server2:/exp2 status
+ Node Rebalanced-files size scanned status
+ --------- ---------------- ---- ------- -----------
+ 617c923e-6450-4065-8e33-865e28d9428f 34 340 162 in progress
+
+4. Check the volume information using the following command:
+
+ `# gluster volume info `
+
+ The command displays information similar to the following:
+
+ # gluster volume info
+ Volume Name: test-volume
+ Type: Distribute
+ Status: Started
+ Number of Bricks: 3
+ Bricks:
+ Brick1: server1:/exp1
+ Brick3: server3:/exp3
+ Brick4: server4:/exp4
+
+5. Rebalance the volume to ensure that all files are distributed to the
+ new brick.
+
+ You can use the rebalance command as described in ?.
+
+Migrating Volumes
+=================
+
+You can migrate the data from one brick to another, as needed, while the
+cluster is online and available.
+
+**To migrate a volume**
+
+1. Make sure the new brick, server5 in this example, is successfully
+ added to the cluster.
+
+ For more information, see ?.
+
+2. Migrate the data from one brick to another using the following
+ command:
+
+ ` # gluster volume replace-brick start`
+
+ For example, to migrate the data in server3:/exp3 to server5:/exp5
+ in test-volume:
+
+ # gluster volume replace-brick test-volume server3:/exp3 server5:exp5 start
+ Replace brick start operation successful
+
+ > **Note**
+ >
+ > You need to have the FUSE package installed on the server on which
+ > you are running the replace-brick command for the command to work.
+
+3. To pause the migration operation, if needed, use the following
+ command:
+
+ `# gluster volume replace-brick pause `
+
+ For example, to pause the data migration from server3:/exp3 to
+ server5:/exp5 in test-volume:
+
+ # gluster volume replace-brick test-volume server3:/exp3 server5:exp5 pause
+ Replace brick pause operation successful
+
+4. To abort the migration operation, if needed, use the following
+ command:
+
+ ` # gluster volume replace-brick abort `
+
+ For example, to abort the data migration from server3:/exp3 to
+ server5:/exp5 in test-volume:
+
+ # gluster volume replace-brick test-volume server3:/exp3 server5:exp5 abort
+ Replace brick abort operation successful
+
+5. Check the status of the migration operation using the following
+ command:
+
+ ` # gluster volume replace-brick status `
+
+ For example, to check the data migration status from server3:/exp3
+ to server5:/exp5 in test-volume:
+
+ # gluster volume replace-brick test-volume server3:/exp3 server5:/exp5 status
+ Current File = /usr/src/linux-headers-2.6.31-14/block/Makefile
+ Number of files migrated = 10567
+ Migration complete
+
+ The status command shows the current file being migrated along with
+ the current total number of files migrated. After completion of
+ migration, it displays Migration complete.
+
+6. Commit the migration of data from one brick to another using the
+ following command:
+
+ ` # gluster volume replace-brick commit `
+
+ For example, to commit the data migration from server3:/exp3 to
+ server5:/exp5 in test-volume:
+
+ # gluster volume replace-brick test-volume server3:/exp3 server5:/exp5 commit
+ replace-brick commit successful
+
+7. Verify the migration of brick by viewing the volume info using the
+ following command:
+
+ `# gluster volume info `
+
+ For example, to check the volume information of new brick
+ server5:/exp5 in test-volume:
+
+ # gluster volume info test-volume
+ Volume Name: testvolume
+ Type: Replicate
+ Status: Started
+ Number of Bricks: 4
+ Transport-type: tcp
+ Bricks:
+ Brick1: server1:/exp1
+ Brick2: server2:/exp2
+ Brick3: server4:/exp4
+ Brick4: server5:/exp5
+
+ The new volume details are displayed.
+
+ The new volume details are displayed.
+
+ In the above example, previously, there were bricks; 1,2,3, and 4
+ and now brick 3 is replaced by brick 5.
+
+Rebalancing Volumes
+===================
+
+After expanding or shrinking a volume (using the add-brick and
+remove-brick commands respectively), you need to rebalance the data
+among the servers. New directories created after expanding or shrinking
+of the volume will be evenly distributed automatically. For all the
+existing directories, the distribution can be fixed by rebalancing the
+layout and/or data.
+
+This section describes how to rebalance GlusterFS volumes in your
+storage environment, using the following common scenarios:
+
+- Fix Layout - Fixes the layout changes so that the files can actually
+ go to newly added nodes. For more information, see ?.
+
+- Fix Layout and Migrate Data - Rebalances volume by fixing the layout
+ changes and migrating the existing data. For more information, see
+ ?.
+
+Rebalancing Volume to Fix Layout Changes
+----------------------------------------
+
+Fixing the layout is necessary because the layout structure is static
+for a given directory. In a scenario where new bricks have been added to
+the existing volume, newly created files in existing directories will
+still be distributed only among the old bricks. The
+`# gluster volume rebalance fix-layout start `command will fix the
+layout information so that the files can also go to newly added nodes.
+When this command is issued, all the file stat information which is
+already cached will get revalidated.
+
+A fix-layout rebalance will only fix the layout changes and does not
+migrate data. If you want to migrate the existing data,
+use`# gluster volume rebalance start ` command to rebalance data among
+the servers.
+
+**To rebalance a volume to fix layout changes**
+
+- Start the rebalance operation on any one of the server using the
+ following command:
+
+ `# gluster volume rebalance fix-layout start`
+
+ For example:
+
+ # gluster volume rebalance test-volume fix-layout start
+ Starting rebalance on volume test-volume has been successful
+
+Rebalancing Volume to Fix Layout and Migrate Data
+-------------------------------------------------
+
+After expanding or shrinking a volume (using the add-brick and
+remove-brick commands respectively), you need to rebalance the data
+among the servers.
+
+**To rebalance a volume to fix layout and migrate the existing data**
+
+- Start the rebalance operation on any one of the server using the
+ following command:
+
+ `# gluster volume rebalance start`
+
+ For example:
+
+ # gluster volume rebalance test-volume start
+ Starting rebalancing on volume test-volume has been successful
+
+- Start the migration operation forcefully on any one of the server
+ using the following command:
+
+ `# gluster volume rebalance start force`
+
+ For example:
+
+ # gluster volume rebalance test-volume start force
+ Starting rebalancing on volume test-volume has been successful
+
+Displaying Status of Rebalance Operation
+----------------------------------------
+
+You can display the status information about rebalance volume operation,
+as needed.
+
+**To view status of rebalance volume**
+
+- Check the status of the rebalance operation, using the following
+ command:
+
+ `# gluster volume rebalance status`
+
+ For example:
+
+ # gluster volume rebalance test-volume status
+ Node Rebalanced-files size scanned status
+ --------- ---------------- ---- ------- -----------
+ 617c923e-6450-4065-8e33-865e28d9428f 416 1463 312 in progress
+
+ The time to complete the rebalance operation depends on the number
+ of files on the volume along with the corresponding file sizes.
+ Continue checking the rebalance status, verifying that the number of
+ files rebalanced or total files scanned keeps increasing.
+
+ For example, running the status command again might display a result
+ similar to the following:
+
+ # gluster volume rebalance test-volume status
+ Node Rebalanced-files size scanned status
+ --------- ---------------- ---- ------- -----------
+ 617c923e-6450-4065-8e33-865e28d9428f 498 1783 378 in progress
+
+ The rebalance status displays the following when the rebalance is
+ complete:
+
+ # gluster volume rebalance test-volume status
+ Node Rebalanced-files size scanned status
+ --------- ---------------- ---- ------- -----------
+ 617c923e-6450-4065-8e33-865e28d9428f 502 1873 334 completed
+
+Stopping Rebalance Operation
+----------------------------
+
+You can stop the rebalance operation, as needed.
+
+**To stop rebalance**
+
+- Stop the rebalance operation using the following command:
+
+ `# gluster volume rebalance stop`
+
+ For example:
+
+ # gluster volume rebalance test-volume stop
+ Node Rebalanced-files size scanned status
+ --------- ---------------- ---- ------- -----------
+ 617c923e-6450-4065-8e33-865e28d9428f 59 590 244 stopped
+ Stopped rebalance process on volume test-volume
+
+Stopping Volumes
+================
+
+To stop a volume
+
+1. Stop the volume using the following command:
+
+ `# gluster volume stop `
+
+ For example, to stop test-volume:
+
+ # gluster volume stop test-volume
+ Stopping volume will make its data inaccessible. Do you want to continue? (y/n)
+
+2. Enter `y` to confirm the operation. The output of the command
+ displays the following:
+
+ Stopping volume test-volume has been successful
+
+Deleting Volumes
+================
+
+To delete a volume
+
+1. Delete the volume using the following command:
+
+ `# gluster volume delete `
+
+ For example, to delete test-volume:
+
+ # gluster volume delete test-volume
+ Deleting volume will erase all information about the volume. Do you want to continue? (y/n)
+
+2. Enter `y` to confirm the operation. The command displays the
+ following:
+
+ Deleting volume test-volume has been successful
+
+Triggering Self-Heal on Replicate
+=================================
+
+In replicate module, previously you had to manually trigger a self-heal
+when a brick goes offline and comes back online, to bring all the
+replicas in sync. Now the pro-active self-heal daemon runs in the
+background, diagnoses issues and automatically initiates self-healing
+every 10 minutes on the files which requires*healing*.
+
+You can view the list of files that need *healing*, the list of files
+which are currently/previously *healed*, list of files which are in
+split-brain state, and you can manually trigger self-heal on the entire
+volume or only on the files which need *healing*.
+
+- Trigger self-heal only on the files which requires *healing*:
+
+ `# gluster volume heal `
+
+ For example, to trigger self-heal on files which requires *healing*
+ of test-volume:
+
+ # gluster volume heal test-volume
+ Heal operation on volume test-volume has been successful
+
+- Trigger self-heal on all the files of a volume:
+
+ `# gluster volume heal ` `full`
+
+ For example, to trigger self-heal on all the files of of
+ test-volume:
+
+ # gluster volume heal test-volume full
+ Heal operation on volume test-volume has been successful
+
+- View the list of files that needs *healing*:
+
+ `# gluster volume heal ` `info`
+
+ For example, to view the list of files on test-volume that needs
+ *healing*:
+
+ # gluster volume heal test-volume info
+ Brick :/gfs/test-volume_0
+ Number of entries: 0
+
+ Brick :/gfs/test-volume_1
+ Number of entries: 101
+ /95.txt
+ /32.txt
+ /66.txt
+ /35.txt
+ /18.txt
+ /26.txt
+ /47.txt
+ /55.txt
+ /85.txt
+ ...
+
+- View the list of files that are self-healed:
+
+ `# gluster volume heal ` `info healed`
+
+ For example, to view the list of files on test-volume that are
+ self-healed:
+
+ # gluster volume heal test-volume info healed
+ Brick :/gfs/test-volume_0
+ Number of entries: 0
+
+ Brick :/gfs/test-volume_1
+ Number of entries: 69
+ /99.txt
+ /93.txt
+ /76.txt
+ /11.txt
+ /27.txt
+ /64.txt
+ /80.txt
+ /19.txt
+ /41.txt
+ /29.txt
+ /37.txt
+ /46.txt
+ ...
+
+- View the list of files of a particular volume on which the self-heal
+ failed:
+
+ `# gluster volume heal ` `info failed`
+
+ For example, to view the list of files of test-volume that are not
+ self-healed:
+
+ # gluster volume heal test-volume info failed
+ Brick :/gfs/test-volume_0
+ Number of entries: 0
+
+ Brick server2:/gfs/test-volume_3
+ Number of entries: 72
+ /90.txt
+ /95.txt
+ /77.txt
+ /71.txt
+ /87.txt
+ /24.txt
+ ...
+
+- View the list of files of a particular volume which are in
+ split-brain state:
+
+ `# gluster volume heal ` `info split-brain`
+
+ For example, to view the list of files of test-volume which are in
+ split-brain state:
+
+ # gluster volume heal test-volume info split-brain
+ Brick server1:/gfs/test-volume_2
+ Number of entries: 12
+ /83.txt
+ /28.txt
+ /69.txt
+ ...
+
+ Brick :/gfs/test-volume_2
+ Number of entries: 12
+ /83.txt
+ /28.txt
+ /69.txt
+ ...
+
+
diff --git a/doc/admin-guide/en-US/markdown/admin_monitoring_workload.md b/doc/admin-guide/en-US/markdown/admin_monitoring_workload.md
new file mode 100644
index 000000000..0312bd048
--- /dev/null
+++ b/doc/admin-guide/en-US/markdown/admin_monitoring_workload.md
@@ -0,0 +1,931 @@
+Monitoring your GlusterFS Workload
+==================================
+
+You can monitor the GlusterFS volumes on different parameters.
+Monitoring volumes helps in capacity planning and performance tuning
+tasks of the GlusterFS volume. Using these information, you can identify
+and troubleshoot issues.
+
+You can use Volume Top and Profile commands to view the performance and
+identify bottlenecks/hotspots of each brick of a volume. This helps
+system administrators to get vital performance information whenever
+performance needs to be probed.
+
+You can also perform statedump of the brick processes and nfs server
+process of a volume, and also view volume status and volume information.
+
+Running GlusterFS Volume Profile Command
+========================================
+
+GlusterFS Volume Profile command provides an interface to get the
+per-brick I/O information for each File Operation (FOP) of a volume. The
+per brick information helps in identifying bottlenecks in the storage
+system.
+
+This section describes how to run GlusterFS Volume Profile command by
+performing the following operations:
+
+- ?
+
+- ?
+
+- ?
+
+Start Profiling
+---------------
+
+You must start the Profiling to view the File Operation information for
+each brick.
+
+**To start profiling:**
+
+- Start profiling using the following command:
+
+`# gluster volume profile start `
+
+For example, to start profiling on test-volume:
+
+ # gluster volume profile test-volume start
+ Profiling started on test-volume
+
+When profiling on the volume is started, the following additional
+options are displayed in the Volume Info:
+
+ diagnostics.count-fop-hits: on
+
+ diagnostics.latency-measurement: on
+
+Displaying the I/0 Information
+------------------------------
+
+You can view the I/O information of each brick.
+
+To display I/O information:
+
+- Display the I/O information using the following command:
+
+`# gluster volume profile info`
+
+For example, to see the I/O information on test-volume:
+
+ # gluster volume profile test-volume info
+ Brick: Test:/export/2
+ Cumulative Stats:
+
+ Block 1b+ 32b+ 64b+
+ Size:
+ Read: 0 0 0
+ Write: 908 28 8
+
+ Block 128b+ 256b+ 512b+
+ Size:
+ Read: 0 6 4
+ Write: 5 23 16
+
+ Block 1024b+ 2048b+ 4096b+
+ Size:
+ Read: 0 52 17
+ Write: 15 120 846
+
+ Block 8192b+ 16384b+ 32768b+
+ Size:
+ Read: 52 8 34
+ Write: 234 134 286
+
+ Block 65536b+ 131072b+
+ Size:
+ Read: 118 622
+ Write: 1341 594
+
+
+ %-latency Avg- Min- Max- calls Fop
+ latency Latency Latency
+ ___________________________________________________________
+ 4.82 1132.28 21.00 800970.00 4575 WRITE
+ 5.70 156.47 9.00 665085.00 39163 READDIRP
+ 11.35 315.02 9.00 1433947.00 38698 LOOKUP
+ 11.88 1729.34 21.00 2569638.00 7382 FXATTROP
+ 47.35 104235.02 2485.00 7789367.00 488 FSYNC
+
+ ------------------
+
+ ------------------
+
+ Duration : 335
+
+ BytesRead : 94505058
+
+ BytesWritten : 195571980
+
+Stop Profiling
+--------------
+
+You can stop profiling the volume, if you do not need profiling
+information anymore.
+
+**To stop profiling**
+
+- Stop profiling using the following command:
+
+ `# gluster volume profile stop`
+
+ For example, to stop profiling on test-volume:
+
+ `# gluster volume profile stop`
+
+ `Profiling stopped on test-volume`
+
+Running GlusterFS Volume TOP Command
+====================================
+
+GlusterFS Volume Top command allows you to view the glusterfs bricks’
+performance metrics like read, write, file open calls, file read calls,
+file write calls, directory open calls, and directory real calls. The
+top command displays up to 100 results.
+
+This section describes how to run and view the results for the following
+GlusterFS Top commands:
+
+- ?
+
+- ?
+
+- ?
+
+- ?
+
+- ?
+
+- ?
+
+- ?
+
+Viewing Open fd Count and Maximum fd Count
+------------------------------------------
+
+You can view both current open fd count (list of files that are
+currently the most opened and the count) on the brick and the maximum
+open fd count (count of files that are the currently open and the count
+of maximum number of files opened at any given point of time, since the
+servers are up and running). If the brick name is not specified, then
+open fd metrics of all the bricks belonging to the volume will be
+displayed.
+
+**To view open fd count and maximum fd count:**
+
+- View open fd count and maximum fd count using the following command:
+
+ `# gluster volume top open [brick ] [list-cnt ]`
+
+ For example, to view open fd count and maximum fd count on brick
+ server:/export of test-volume and list top 10 open calls:
+
+ `# gluster volume top open brick list-cnt `
+
+ `Brick: server:/export/dir1 `
+
+ `Current open fd's: 34 Max open fd's: 209 `
+
+ ==========Open file stats========
+
+ open file name
+ call count
+
+ 2 /clients/client0/~dmtmp/PARADOX/
+ COURSES.DB
+
+ 11 /clients/client0/~dmtmp/PARADOX/
+ ENROLL.DB
+
+ 11 /clients/client0/~dmtmp/PARADOX/
+ STUDENTS.DB
+
+ 10 /clients/client0/~dmtmp/PWRPNT/
+ TIPS.PPT
+
+ 10 /clients/client0/~dmtmp/PWRPNT/
+ PCBENCHM.PPT
+
+ 9 /clients/client7/~dmtmp/PARADOX/
+ STUDENTS.DB
+
+ 9 /clients/client1/~dmtmp/PARADOX/
+ STUDENTS.DB
+
+ 9 /clients/client2/~dmtmp/PARADOX/
+ STUDENTS.DB
+
+ 9 /clients/client0/~dmtmp/PARADOX/
+ STUDENTS.DB
+
+ 9 /clients/client8/~dmtmp/PARADOX/
+ STUDENTS.DB
+
+Viewing Highest File Read Calls
+-------------------------------
+
+You can view highest read calls on each brick. If brick name is not
+specified, then by default, list of 100 files will be displayed.
+
+**To view highest file Read calls:**
+
+- View highest file Read calls using the following command:
+
+ `# gluster volume top read [brick ] [list-cnt ] `
+
+ For example, to view highest Read calls on brick server:/export of
+ test-volume:
+
+ `# gluster volume top read brick list-cnt `
+
+ `Brick:` server:/export/dir1
+
+ ==========Read file stats========
+
+ read filename
+ call count
+
+ 116 /clients/client0/~dmtmp/SEED/LARGE.FIL
+
+ 64 /clients/client0/~dmtmp/SEED/MEDIUM.FIL
+
+ 54 /clients/client2/~dmtmp/SEED/LARGE.FIL
+
+ 54 /clients/client6/~dmtmp/SEED/LARGE.FIL
+
+ 54 /clients/client5/~dmtmp/SEED/LARGE.FIL
+
+ 54 /clients/client0/~dmtmp/SEED/LARGE.FIL
+
+ 54 /clients/client3/~dmtmp/SEED/LARGE.FIL
+
+ 54 /clients/client4/~dmtmp/SEED/LARGE.FIL
+
+ 54 /clients/client9/~dmtmp/SEED/LARGE.FIL
+
+ 54 /clients/client8/~dmtmp/SEED/LARGE.FIL
+
+Viewing Highest File Write Calls
+--------------------------------
+
+You can view list of files which has highest file write calls on each
+brick. If brick name is not specified, then by default, list of 100
+files will be displayed.
+
+**To view highest file Write calls:**
+
+- View highest file Write calls using the following command:
+
+ `# gluster volume top write [brick ] [list-cnt ] `
+
+ For example, to view highest Write calls on brick server:/export of
+ test-volume:
+
+ `# gluster volume top write brick list-cnt `
+
+ `Brick: server:/export/dir1 `
+
+ ==========Write file stats========
+ write call count filename
+
+ 83 /clients/client0/~dmtmp/SEED/LARGE.FIL
+
+ 59 /clients/client7/~dmtmp/SEED/LARGE.FIL
+
+ 59 /clients/client1/~dmtmp/SEED/LARGE.FIL
+
+ 59 /clients/client2/~dmtmp/SEED/LARGE.FIL
+
+ 59 /clients/client0/~dmtmp/SEED/LARGE.FIL
+
+ 59 /clients/client8/~dmtmp/SEED/LARGE.FIL
+
+ 59 /clients/client5/~dmtmp/SEED/LARGE.FIL
+
+ 59 /clients/client4/~dmtmp/SEED/LARGE.FIL
+
+ 59 /clients/client6/~dmtmp/SEED/LARGE.FIL
+
+ 59 /clients/client3/~dmtmp/SEED/LARGE.FIL
+
+Viewing Highest Open Calls on Directories
+-----------------------------------------
+
+You can view list of files which has highest open calls on directories
+of each brick. If brick name is not specified, then the metrics of all
+the bricks belonging to that volume will be displayed.
+
+To view list of open calls on each directory
+
+- View list of open calls on each directory using the following
+ command:
+
+ `# gluster volume top opendir [brick ] [list-cnt ] `
+
+ For example, to view open calls on brick server:/export/ of
+ test-volume:
+
+ `# gluster volume top opendir brick list-cnt `
+
+ `Brick: server:/export/dir1 `
+
+ ==========Directory open stats========
+
+ Opendir count directory name
+
+ 1001 /clients/client0/~dmtmp
+
+ 454 /clients/client8/~dmtmp
+
+ 454 /clients/client2/~dmtmp
+
+ 454 /clients/client6/~dmtmp
+
+ 454 /clients/client5/~dmtmp
+
+ 454 /clients/client9/~dmtmp
+
+ 443 /clients/client0/~dmtmp/PARADOX
+
+ 408 /clients/client1/~dmtmp
+
+ 408 /clients/client7/~dmtmp
+
+ 402 /clients/client4/~dmtmp
+
+Viewing Highest Read Calls on Directory
+---------------------------------------
+
+You can view list of files which has highest directory read calls on
+each brick. If brick name is not specified, then the metrics of all the
+bricks belonging to that volume will be displayed.
+
+**To view list of highest directory read calls on each brick**
+
+- View list of highest directory read calls on each brick using the
+ following command:
+
+ `# gluster volume top readdir [brick ] [list-cnt ] `
+
+ For example, to view highest directory read calls on brick
+ server:/export of test-volume:
+
+ `# gluster volume top readdir brick list-cnt `
+
+ `Brick: `
+
+ ==========Directory readdirp stats========
+
+ readdirp count directory name
+
+ 1996 /clients/client0/~dmtmp
+
+ 1083 /clients/client0/~dmtmp/PARADOX
+
+ 904 /clients/client8/~dmtmp
+
+ 904 /clients/client2/~dmtmp
+
+ 904 /clients/client6/~dmtmp
+
+ 904 /clients/client5/~dmtmp
+
+ 904 /clients/client9/~dmtmp
+
+ 812 /clients/client1/~dmtmp
+
+ 812 /clients/client7/~dmtmp
+
+ 800 /clients/client4/~dmtmp
+
+Viewing List of Read Performance on each Brick
+----------------------------------------------
+
+You can view the read throughput of files on each brick. If brick name
+is not specified, then the metrics of all the bricks belonging to that
+volume will be displayed. The output will be the read throughput.
+
+ ==========Read throughput file stats========
+
+ read filename Time
+ through
+ put(MBp
+ s)
+
+ 2570.00 /clients/client0/~dmtmp/PWRPNT/ -2011-01-31
+ TRIDOTS.POT 15:38:36.894610
+ 2570.00 /clients/client0/~dmtmp/PWRPNT/ -2011-01-31
+ PCBENCHM.PPT 15:38:39.815310
+ 2383.00 /clients/client2/~dmtmp/SEED/ -2011-01-31
+ MEDIUM.FIL 15:52:53.631499
+
+ 2340.00 /clients/client0/~dmtmp/SEED/ -2011-01-31
+ MEDIUM.FIL 15:38:36.926198
+
+ 2299.00 /clients/client0/~dmtmp/SEED/ -2011-01-31
+ LARGE.FIL 15:38:36.930445
+
+ 2259.00 /clients/client0/~dmtmp/PARADOX/ -2011-01-31
+ COURSES.X04 15:38:40.549919
+
+ 2221.00 /clients/client0/~dmtmp/PARADOX/ -2011-01-31
+ STUDENTS.VAL 15:52:53.298766
+
+ 2221.00 /clients/client3/~dmtmp/SEED/ -2011-01-31
+ COURSES.DB 15:39:11.776780
+
+ 2184.00 /clients/client3/~dmtmp/SEED/ -2011-01-31
+ MEDIUM.FIL 15:39:10.251764
+
+ 2184.00 /clients/client5/~dmtmp/WORD/ -2011-01-31
+ BASEMACH.DOC 15:39:09.336572
+
+This command will initiate a dd for the specified count and block size
+and measures the corresponding throughput.
+
+**To view list of read performance on each brick**
+
+- View list of read performance on each brick using the following
+ command:
+
+ `# gluster volume top read-perf [bs count ] [brick ] [list-cnt ]`
+
+ For example, to view read performance on brick server:/export/ of
+ test-volume, 256 block size of count 1, and list count 10:
+
+ `# gluster volume top read-perf bs 256 count 1 brick list-cnt `
+
+ `Brick: server:/export/dir1 256 bytes (256 B) copied, Throughput: 4.1 MB/s `
+
+ ==========Read throughput file stats========
+
+ read filename Time
+ through
+ put(MBp
+ s)
+
+ 2912.00 /clients/client0/~dmtmp/PWRPNT/ -2011-01-31
+ TRIDOTS.POT 15:38:36.896486
+
+ 2570.00 /clients/client0/~dmtmp/PWRPNT/ -2011-01-31
+ PCBENCHM.PPT 15:38:39.815310
+
+ 2383.00 /clients/client2/~dmtmp/SEED/ -2011-01-31
+ MEDIUM.FIL 15:52:53.631499
+
+ 2340.00 /clients/client0/~dmtmp/SEED/ -2011-01-31
+ MEDIUM.FIL 15:38:36.926198
+
+ 2299.00 /clients/client0/~dmtmp/SEED/ -2011-01-31
+ LARGE.FIL 15:38:36.930445
+
+ 2259.00 /clients/client0/~dmtmp/PARADOX/ -2011-01-31
+ COURSES.X04 15:38:40.549919
+
+ 2221.00 /clients/client9/~dmtmp/PARADOX/ -2011-01-31
+ STUDENTS.VAL 15:52:53.298766
+
+ 2221.00 /clients/client8/~dmtmp/PARADOX/ -2011-01-31
+ COURSES.DB 15:39:11.776780
+
+ 2184.00 /clients/client3/~dmtmp/SEED/ -2011-01-31
+ MEDIUM.FIL 15:39:10.251764
+
+ 2184.00 /clients/client5/~dmtmp/WORD/ -2011-01-31
+ BASEMACH.DOC 15:39:09.336572
+
+
+Viewing List of Write Performance on each Brick
+-----------------------------------------------
+
+You can view list of write throughput of files on each brick. If brick
+name is not specified, then the metrics of all the bricks belonging to
+that volume will be displayed. The output will be the write throughput.
+
+This command will initiate a dd for the specified count and block size
+and measures the corresponding throughput. To view list of write
+performance on each brick:
+
+- View list of write performance on each brick using the following
+ command:
+
+ `# gluster volume top write-perf [bs count ] [brick ] [list-cnt ] `
+
+ For example, to view write performance on brick server:/export/ of
+ test-volume, 256 block size of count 1, and list count 10:
+
+ `# gluster volume top write-perf bs 256 count 1 brick list-cnt `
+
+ `Brick`: server:/export/dir1
+
+ `256 bytes (256 B) copied, Throughput: 2.8 MB/s `
+
+ ==========Write throughput file stats========
+
+ write filename Time
+ throughput
+ (MBps)
+
+ 1170.00 /clients/client0/~dmtmp/SEED/ -2011-01-31
+ SMALL.FIL 15:39:09.171494
+
+ 1008.00 /clients/client6/~dmtmp/SEED/ -2011-01-31
+ LARGE.FIL 15:39:09.73189
+
+ 949.00 /clients/client0/~dmtmp/SEED/ -2011-01-31
+ MEDIUM.FIL 15:38:36.927426
+
+ 936.00 /clients/client0/~dmtmp/SEED/ -2011-01-31
+ LARGE.FIL 15:38:36.933177
+ 897.00 /clients/client5/~dmtmp/SEED/ -2011-01-31
+ MEDIUM.FIL 15:39:09.33628
+
+ 897.00 /clients/client6/~dmtmp/SEED/ -2011-01-31
+ MEDIUM.FIL 15:39:09.27713
+
+ 885.00 /clients/client0/~dmtmp/SEED/ -2011-01-31
+ SMALL.FIL 15:38:36.924271
+
+ 528.00 /clients/client5/~dmtmp/SEED/ -2011-01-31
+ LARGE.FIL 15:39:09.81893
+
+ 516.00 /clients/client6/~dmtmp/ACCESS/ -2011-01-31
+ FASTENER.MDB 15:39:01.797317
+
+Displaying Volume Information
+=============================
+
+You can display information about a specific volume, or all volumes, as
+needed.
+
+**To display volume information**
+
+- Display information about a specific volume using the following
+ command:
+
+ `# gluster volume info ``VOLNAME`
+
+ For example, to display information about test-volume:
+
+ # gluster volume info test-volume
+ Volume Name: test-volume
+ Type: Distribute
+ Status: Created
+ Number of Bricks: 4
+ Bricks:
+ Brick1: server1:/exp1
+ Brick2: server2:/exp2
+ Brick3: server3:/exp3
+ Brick4: server4:/exp4
+
+- Display information about all volumes using the following command:
+
+ `# gluster volume info all`
+
+ # gluster volume info all
+
+ Volume Name: test-volume
+ Type: Distribute
+ Status: Created
+ Number of Bricks: 4
+ Bricks:
+ Brick1: server1:/exp1
+ Brick2: server2:/exp2
+ Brick3: server3:/exp3
+ Brick4: server4:/exp4
+
+ Volume Name: mirror
+ Type: Distributed-Replicate
+ Status: Started
+ Number of Bricks: 2 X 2 = 4
+ Bricks:
+ Brick1: server1:/brick1
+ Brick2: server2:/brick2
+ Brick3: server3:/brick3
+ Brick4: server4:/brick4
+
+ Volume Name: Vol
+ Type: Distribute
+ Status: Started
+ Number of Bricks: 1
+ Bricks:
+ Brick: server:/brick6
+
+Performing Statedump on a Volume
+================================
+
+Statedump is a mechanism through which you can get details of all
+internal variables and state of the glusterfs process at the time of
+issuing the command.You can perform statedumps of the brick processes
+and nfs server process of a volume using the statedump command. The
+following options can be used to determine what information is to be
+dumped:
+
+- **mem** - Dumps the memory usage and memory pool details of the
+ bricks.
+
+- **iobuf** - Dumps iobuf details of the bricks.
+
+- **priv** - Dumps private information of loaded translators.
+
+- **callpool** - Dumps the pending calls of the volume.
+
+- **fd** - Dumps the open fd tables of the volume.
+
+- **inode** - Dumps the inode tables of the volume.
+
+**To display volume statedump**
+
+- Display statedump of a volume or NFS server using the following
+ command:
+
+ `# gluster volume statedump [nfs] [all|mem|iobuf|callpool|priv|fd|inode]`
+
+ For example, to display statedump of test-volume:
+
+ # gluster volume statedump test-volume
+ Volume statedump successful
+
+ The statedump files are created on the brick servers in the` /tmp`
+ directory or in the directory set using `server.statedump-path`
+ volume option. The naming convention of the dump file is
+ `<brick-path>.<brick-pid>.dump`.
+
+- By defult, the output of the statedump is stored at
+ ` /tmp/<brickname.PID.dump>` file on that particular server. Change
+ the directory of the statedump file using the following command:
+
+ `# gluster volume set server.statedump-path `
+
+ For example, to change the location of the statedump file of
+ test-volume:
+
+ # gluster volume set test-volume server.statedump-path /usr/local/var/log/glusterfs/dumps/
+ Set volume successful
+
+ You can view the changed path of the statedump file using the
+ following command:
+
+ `# gluster volume info `
+
+Displaying Volume Status
+========================
+
+You can display the status information about a specific volume, brick or
+all volumes, as needed. Status information can be used to understand the
+current status of the brick, nfs processes, and overall file system.
+Status information can also be used to monitor and debug the volume
+information. You can view status of the volume along with the following
+details:
+
+- **detail** - Displays additional information about the bricks.
+
+- **clients** - Displays the list of clients connected to the volume.
+
+- **mem** - Displays the memory usage and memory pool details of the
+ bricks.
+
+- **inode** - Displays the inode tables of the volume.
+
+- **fd** - Displays the open fd (file descriptors) tables of the
+ volume.
+
+- **callpool** - Displays the pending calls of the volume.
+
+**To display volume status**
+
+- Display information about a specific volume using the following
+ command:
+
+ `# gluster volume status [all| []] [detail|clients|mem|inode|fd|callpool]`
+
+ For example, to display information about test-volume:
+
+ # gluster volume status test-volume
+ STATUS OF VOLUME: test-volume
+ BRICK PORT ONLINE PID
+ --------------------------------------------------------
+ arch:/export/1 24009 Y 22445
+ --------------------------------------------------------
+ arch:/export/2 24010 Y 22450
+
+- Display information about all volumes using the following command:
+
+ `# gluster volume status all`
+
+ # gluster volume status all
+ STATUS OF VOLUME: volume-test
+ BRICK PORT ONLINE PID
+ --------------------------------------------------------
+ arch:/export/4 24010 Y 22455
+
+ STATUS OF VOLUME: test-volume
+ BRICK PORT ONLINE PID
+ --------------------------------------------------------
+ arch:/export/1 24009 Y 22445
+ --------------------------------------------------------
+ arch:/export/2 24010 Y 22450
+
+- Display additional information about the bricks using the following
+ command:
+
+ `# gluster volume status detail`
+
+ For example, to display additional information about the bricks of
+ test-volume:
+
+ # gluster volume status test-volume details
+ STATUS OF VOLUME: test-volume
+ -------------------------------------------
+ Brick : arch:/export/1
+ Port : 24009
+ Online : Y
+ Pid : 16977
+ File System : rootfs
+ Device : rootfs
+ Mount Options : rw
+ Disk Space Free : 13.8GB
+ Total Disk Space : 46.5GB
+ Inode Size : N/A
+ Inode Count : N/A
+ Free Inodes : N/A
+
+ Number of Bricks: 1
+ Bricks:
+ Brick: server:/brick6
+
+- Display the list of clients accessing the volumes using the
+ following command:
+
+ `# gluster volume status clients`
+
+ For example, to display the list of clients connected to
+ test-volume:
+
+ # gluster volume status test-volume clients
+ Brick : arch:/export/1
+ Clients connected : 2
+ Hostname Bytes Read BytesWritten
+ -------- --------- ------------
+ 127.0.0.1:1013 776 676
+ 127.0.0.1:1012 50440 51200
+
+- Display the memory usage and memory pool details of the bricks using
+ the following command:
+
+ `# gluster volume status mem`
+
+ For example, to display the memory usage and memory pool details of
+ the bricks of test-volume:
+
+ Memory status for volume : test-volume
+ ----------------------------------------------
+ Brick : arch:/export/1
+ Mallinfo
+ --------
+ Arena : 434176
+ Ordblks : 2
+ Smblks : 0
+ Hblks : 12
+ Hblkhd : 40861696
+ Usmblks : 0
+ Fsmblks : 0
+ Uordblks : 332416
+ Fordblks : 101760
+ Keepcost : 100400
+
+ Mempool Stats
+ -------------
+ Name HotCount ColdCount PaddedSizeof AllocCount MaxAlloc
+ ---- -------- --------- ------------ ---------- --------
+ test-volume-server:fd_t 0 16384 92 57 5
+ test-volume-server:dentry_t 59 965 84 59 59
+ test-volume-server:inode_t 60 964 148 60 60
+ test-volume-server:rpcsvc_request_t 0 525 6372 351 2
+ glusterfs:struct saved_frame 0 4096 124 2 2
+ glusterfs:struct rpc_req 0 4096 2236 2 2
+ glusterfs:rpcsvc_request_t 1 524 6372 2 1
+ glusterfs:call_stub_t 0 1024 1220 288 1
+ glusterfs:call_stack_t 0 8192 2084 290 2
+ glusterfs:call_frame_t 0 16384 172 1728 6
+
+- Display the inode tables of the volume using the following command:
+
+ `# gluster volume status inode`
+
+ For example, to display the inode tables of the test-volume:
+
+ # gluster volume status test-volume inode
+ inode tables for volume test-volume
+ ----------------------------------------------
+ Brick : arch:/export/1
+ Active inodes:
+ GFID Lookups Ref IA type
+ ---- ------- --- -------
+ 6f3fe173-e07a-4209-abb6-484091d75499 1 9 2
+ 370d35d7-657e-44dc-bac4-d6dd800ec3d3 1 1 2
+
+ LRU inodes:
+ GFID Lookups Ref IA type
+ ---- ------- --- -------
+ 80f98abe-cdcf-4c1d-b917-ae564cf55763 1 0 1
+ 3a58973d-d549-4ea6-9977-9aa218f233de 1 0 1
+ 2ce0197d-87a9-451b-9094-9baa38121155 1 0 2
+
+- Display the open fd tables of the volume using the following
+ command:
+
+ `# gluster volume status fd`
+
+ For example, to display the open fd tables of the test-volume:
+
+ # gluster volume status test-volume fd
+
+ FD tables for volume test-volume
+ ----------------------------------------------
+ Brick : arch:/export/1
+ Connection 1:
+ RefCount = 0 MaxFDs = 128 FirstFree = 4
+ FD Entry PID RefCount Flags
+ -------- --- -------- -----
+ 0 26311 1 2
+ 1 26310 3 2
+ 2 26310 1 2
+ 3 26311 3 2
+
+ Connection 2:
+ RefCount = 0 MaxFDs = 128 FirstFree = 0
+ No open fds
+
+ Connection 3:
+ RefCount = 0 MaxFDs = 128 FirstFree = 0
+ No open fds
+
+- Display the pending calls of the volume using the following command:
+
+ `# gluster volume status callpool`
+
+ Each call has a call stack containing call frames.
+
+ For example, to display the pending calls of test-volume:
+
+ # gluster volume status test-volume
+
+ Pending calls for volume test-volume
+ ----------------------------------------------
+ Brick : arch:/export/1
+ Pending calls: 2
+ Call Stack1
+ UID : 0
+ GID : 0
+ PID : 26338
+ Unique : 192138
+ Frames : 7
+ Frame 1
+ Ref Count = 1
+ Translator = test-volume-server
+ Completed = No
+ Frame 2
+ Ref Count = 0
+ Translator = test-volume-posix
+ Completed = No
+ Parent = test-volume-access-control
+ Wind From = default_fsync
+ Wind To = FIRST_CHILD(this)->fops->fsync
+ Frame 3
+ Ref Count = 1
+ Translator = test-volume-access-control
+ Completed = No
+ Parent = repl-locks
+ Wind From = default_fsync
+ Wind To = FIRST_CHILD(this)->fops->fsync
+ Frame 4
+ Ref Count = 1
+ Translator = test-volume-locks
+ Completed = No
+ Parent = test-volume-io-threads
+ Wind From = iot_fsync_wrapper
+ Wind To = FIRST_CHILD (this)->fops->fsync
+ Frame 5
+ Ref Count = 1
+ Translator = test-volume-io-threads
+ Completed = No
+ Parent = test-volume-marker
+ Wind From = default_fsync
+ Wind To = FIRST_CHILD(this)->fops->fsync
+ Frame 6
+ Ref Count = 1
+ Translator = test-volume-marker
+ Completed = No
+ Parent = /export/1
+ Wind From = io_stats_fsync
+ Wind To = FIRST_CHILD(this)->fops->fsync
+ Frame 7
+ Ref Count = 1
+ Translator = /export/1
+ Completed = No
+ Parent = test-volume-server
+ Wind From = server_fsync_resume
+ Wind To = bound_xl->fops->fsync
+
+
diff --git a/doc/admin-guide/en-US/markdown/admin_setting_volumes.md b/doc/admin-guide/en-US/markdown/admin_setting_volumes.md
new file mode 100644
index 000000000..4038523c8
--- /dev/null
+++ b/doc/admin-guide/en-US/markdown/admin_setting_volumes.md
@@ -0,0 +1,419 @@
+Setting up GlusterFS Server Volumes
+===================================
+
+A volume is a logical collection of bricks where each brick is an export
+directory on a server in the trusted storage pool. Most of the gluster
+management operations are performed on the volume.
+
+To create a new volume in your storage environment, specify the bricks
+that comprise the volume. After you have created a new volume, you must
+start it before attempting to mount it.
+
+- Volumes of the following types can be created in your storage
+ environment:
+
+ - Distributed - Distributed volumes distributes files throughout
+ the bricks in the volume. You can use distributed volumes where
+ the requirement is to scale storage and the redundancy is either
+ not important or is provided by other hardware/software layers.
+ For more information, see ? .
+
+ - Replicated – Replicated volumes replicates files across bricks
+ in the volume. You can use replicated volumes in environments
+ where high-availability and high-reliability are critical. For
+ more information, see ?.
+
+ - Striped – Striped volumes stripes data across bricks in the
+ volume. For best results, you should use striped volumes only in
+ high concurrency environments accessing very large files. For
+ more information, see ?.
+
+ - Distributed Striped - Distributed striped volumes stripe data
+ across two or more nodes in the cluster. You should use
+ distributed striped volumes where the requirement is to scale
+ storage and in high concurrency environments accessing very
+ large files is critical. For more information, see ?.
+
+ - Distributed Replicated - Distributed replicated volumes
+ distributes files across replicated bricks in the volume. You
+ can use distributed replicated volumes in environments where the
+ requirement is to scale storage and high-reliability is
+ critical. Distributed replicated volumes also offer improved
+ read performance in most environments. For more information, see
+ ?.
+
+ - Distributed Striped Replicated – Distributed striped replicated
+ volumes distributes striped data across replicated bricks in the
+ cluster. For best results, you should use distributed striped
+ replicated volumes in highly concurrent environments where
+ parallel access of very large files and performance is critical.
+ In this release, configuration of this volume type is supported
+ only for Map Reduce workloads. For more information, see ?.
+
+ - Striped Replicated – Striped replicated volumes stripes data
+ across replicated bricks in the cluster. For best results, you
+ should use striped replicated volumes in highly concurrent
+ environments where there is parallel access of very large files
+ and performance is critical. In this release, configuration of
+ this volume type is supported only for Map Reduce workloads. For
+ more information, see ?.
+
+**To create a new volume**
+
+- Create a new volume :
+
+ `# gluster volume create [stripe | replica ] [transport tcp | rdma | tcp, rdma] `
+
+ For example, to create a volume called test-volume consisting of
+ server3:/exp3 and server4:/exp4:
+
+ # gluster volume create test-volume server3:/exp3 server4:/exp4
+ Creation of test-volume has been successful
+ Please start the volume to access data.
+
+Creating Distributed Volumes
+============================
+
+In a distributed volumes files are spread randomly across the bricks in
+the volume. Use distributed volumes where you need to scale storage and
+redundancy is either not important or is provided by other
+hardware/software layers.
+
+> **Note**
+>
+> Disk/server failure in distributed volumes can result in a serious
+> loss of data because directory contents are spread randomly across the
+> bricks in the volume.
+
+![][]
+
+**To create a distributed volume**
+
+1. Create a trusted storage pool as described earlier in ?.
+
+2. Create the distributed volume:
+
+ `# gluster volume create [transport tcp | rdma | tcp,rdma] `
+
+ For example, to create a distributed volume with four storage
+ servers using tcp:
+
+ # gluster volume create test-volume server1:/exp1 server2:/exp2 server3:/exp3 server4:/exp4
+ Creation of test-volume has been successful
+ Please start the volume to access data.
+
+ (Optional) You can display the volume information:
+
+ # gluster volume info
+ Volume Name: test-volume
+ Type: Distribute
+ Status: Created
+ Number of Bricks: 4
+ Transport-type: tcp
+ Bricks:
+ Brick1: server1:/exp1
+ Brick2: server2:/exp2
+ Brick3: server3:/exp3
+ Brick4: server4:/exp4
+
+ For example, to create a distributed volume with four storage
+ servers over InfiniBand:
+
+ # gluster volume create test-volume transport rdma server1:/exp1 server2:/exp2 server3:/exp3 server4:/exp4
+ Creation of test-volume has been successful
+ Please start the volume to access data.
+
+ If the transport type is not specified, *tcp* is used as the
+ default. You can also set additional options if required, such as
+ auth.allow or auth.reject. For more information, see ?
+
+ > **Note**
+ >
+ > Make sure you start your volumes before you try to mount them or
+ > else client operations after the mount will hang, see ? for
+ > details.
+
+Creating Replicated Volumes
+===========================
+
+Replicated volumes create copies of files across multiple bricks in the
+volume. You can use replicated volumes in environments where
+high-availability and high-reliability are critical.
+
+> **Note**
+>
+> The number of bricks should be equal to of the replica count for a
+> replicated volume. To protect against server and disk failures, it is
+> recommended that the bricks of the volume are from different servers.
+
+![][1]
+
+**To create a replicated volume**
+
+1. Create a trusted storage pool as described earlier in ?.
+
+2. Create the replicated volume:
+
+ `# gluster volume create [replica ] [transport tcp | rdma tcp,rdma] `
+
+ For example, to create a replicated volume with two storage servers:
+
+ # gluster volume create test-volume replica 2 transport tcp server1:/exp1 server2:/exp2
+ Creation of test-volume has been successful
+ Please start the volume to access data.
+
+ If the transport type is not specified, *tcp* is used as the
+ default. You can also set additional options if required, such as
+ auth.allow or auth.reject. For more information, see ?
+
+ > **Note**
+ >
+ > Make sure you start your volumes before you try to mount them or
+ > else client operations after the mount will hang, see ? for
+ > details.
+
+Creating Striped Volumes
+========================
+
+Striped volumes stripes data across bricks in the volume. For best
+results, you should use striped volumes only in high concurrency
+environments accessing very large files.
+
+> **Note**
+>
+> The number of bricks should be a equal to the stripe count for a
+> striped volume.
+
+![][2]
+
+**To create a striped volume**
+
+1. Create a trusted storage pool as described earlier in ?.
+
+2. Create the striped volume:
+
+ `# gluster volume create [stripe ] [transport tcp | rdma | tcp,rdma] `
+
+ For example, to create a striped volume across two storage servers:
+
+ # gluster volume create test-volume stripe 2 transport tcp server1:/exp1 server2:/exp2
+ Creation of test-volume has been successful
+ Please start the volume to access data.
+
+ If the transport type is not specified, *tcp* is used as the
+ default. You can also set additional options if required, such as
+ auth.allow or auth.reject. For more information, see ?
+
+ > **Note**
+ >
+ > Make sure you start your volumes before you try to mount them or
+ > else client operations after the mount will hang, see ? for
+ > details.
+
+Creating Distributed Striped Volumes
+====================================
+
+Distributed striped volumes stripes files across two or more nodes in
+the cluster. For best results, you should use distributed striped
+volumes where the requirement is to scale storage and in high
+concurrency environments accessing very large files is critical.
+
+> **Note**
+>
+> The number of bricks should be a multiple of the stripe count for a
+> distributed striped volume.
+
+![][3]
+
+**To create a distributed striped volume**
+
+1. Create a trusted storage pool as described earlier in ?.
+
+2. Create the distributed striped volume:
+
+ `# gluster volume create [stripe ] [transport tcp | rdma | tcp,rdma] `
+
+ For example, to create a distributed striped volume across eight
+ storage servers:
+
+ # gluster volume create test-volume stripe 4 transport tcp server1:/exp1 server2:/exp2 server3:/exp3 server4:/exp4 server5:/exp5 server6:/exp6 server7:/exp7 server8:/exp8
+ Creation of test-volume has been successful
+ Please start the volume to access data.
+
+ If the transport type is not specified, *tcp* is used as the
+ default. You can also set additional options if required, such as
+ auth.allow or auth.reject. For more information, see ?
+
+ > **Note**
+ >
+ > Make sure you start your volumes before you try to mount them or
+ > else client operations after the mount will hang, see ? for
+ > details.
+
+Creating Distributed Replicated Volumes
+=======================================
+
+Distributes files across replicated bricks in the volume. You can use
+distributed replicated volumes in environments where the requirement is
+to scale storage and high-reliability is critical. Distributed
+replicated volumes also offer improved read performance in most
+environments.
+
+> **Note**
+>
+> The number of bricks should be a multiple of the replica count for a
+> distributed replicated volume. Also, the order in which bricks are
+> specified has a great effect on data protection. Each replica\_count
+> consecutive bricks in the list you give will form a replica set, with
+> all replica sets combined into a volume-wide distribute set. To make
+> sure that replica-set members are not placed on the same node, list
+> the first brick on every server, then the second brick on every server
+> in the same order, and so on.
+
+![][4]
+
+**To create a distributed replicated volume**
+
+1. Create a trusted storage pool as described earlier in ?.
+
+2. Create the distributed replicated volume:
+
+ `# gluster volume create [replica ] [transport tcp | rdma | tcp,rdma] `
+
+ For example, four node distributed (replicated) volume with a
+ two-way mirror:
+
+ # gluster volume create test-volume replica 2 transport tcp server1:/exp1 server2:/exp2 server3:/exp3 server4:/exp4
+ Creation of test-volume has been successful
+ Please start the volume to access data.
+
+ For example, to create a six node distributed (replicated) volume
+ with a two-way mirror:
+
+ # gluster volume create test-volume replica 2 transport tcp server1:/exp1 server2:/exp2 server3:/exp3 server4:/exp4 server5:/exp5 server6:/exp6
+ Creation of test-volume has been successful
+ Please start the volume to access data.
+
+ If the transport type is not specified, *tcp* is used as the
+ default. You can also set additional options if required, such as
+ auth.allow or auth.reject. For more information, see ?
+
+ > **Note**
+ >
+ > Make sure you start your volumes before you try to mount them or
+ > else client operations after the mount will hang, see ? for
+ > details.
+
+Creating Distributed Striped Replicated Volumes
+===============================================
+
+Distributed striped replicated volumes distributes striped data across
+replicated bricks in the cluster. For best results, you should use
+distributed striped replicated volumes in highly concurrent environments
+where parallel access of very large files and performance is critical.
+In this release, configuration of this volume type is supported only for
+Map Reduce workloads.
+
+> **Note**
+>
+> The number of bricks should be a multiples of number of stripe count
+> and replica count for a distributed striped replicated volume.
+
+**To create a distributed striped replicated volume**
+
+1. Create a trusted storage pool as described earlier in ?.
+
+2. Create a distributed striped replicated volume using the following
+ command:
+
+ `# gluster volume create [stripe ] [replica ] [transport tcp | rdma | tcp,rdma] `
+
+ For example, to create a distributed replicated striped volume
+ across eight storage servers:
+
+ # gluster volume create test-volume stripe 2 replica 2 transport tcp server1:/exp1 server2:/exp2 server3:/exp3 server4:/exp4 server5:/exp5 server6:/exp6 server7:/exp7 server8:/exp8
+ Creation of test-volume has been successful
+ Please start the volume to access data.
+
+ If the transport type is not specified, *tcp* is used as the
+ default. You can also set additional options if required, such as
+ auth.allow or auth.reject. For more information, see ?
+
+ > **Note**
+ >
+ > Make sure you start your volumes before you try to mount them or
+ > else client operations after the mount will hang, see ? for
+ > details.
+
+Creating Striped Replicated Volumes
+===================================
+
+Striped replicated volumes stripes data across replicated bricks in the
+cluster. For best results, you should use striped replicated volumes in
+highly concurrent environments where there is parallel access of very
+large files and performance is critical. In this release, configuration
+of this volume type is supported only for Map Reduce workloads.
+
+> **Note**
+>
+> The number of bricks should be a multiple of the replicate count and
+> stripe count for a striped replicated volume.
+
+![][5]
+
+**To create a striped replicated volume**
+
+1. Create a trusted storage pool consisting of the storage servers that
+ will comprise the volume.
+
+ For more information, see ?.
+
+2. Create a striped replicated volume :
+
+ `# gluster volume create [stripe ] [replica ] [transport tcp | rdma | tcp,rdma] `
+
+ For example, to create a striped replicated volume across four
+ storage servers:
+
+ # gluster volume create test-volume stripe 2 replica 2 transport tcp server1:/exp1 server2:/exp2 server3:/exp3 server4:/exp4
+ Creation of test-volume has been successful
+ Please start the volume to access data.
+
+ To create a striped replicated volume across six storage servers:
+
+ # gluster volume create test-volume stripe 3 replica 2 transport tcp server1:/exp1 server2:/exp2 server3:/exp3 server4:/exp4 server5:/exp5 server6:/exp6
+ Creation of test-volume has been successful
+ Please start the volume to access data.
+
+ If the transport type is not specified, *tcp* is used as the
+ default. You can also set additional options if required, such as
+ auth.allow or auth.reject. For more information, see ?
+
+ > **Note**
+ >
+ > Make sure you start your volumes before you try to mount them or
+ > else client operations after the mount will hang, see ? for
+ > details.
+
+Starting Volumes
+================
+
+You must start your volumes before you try to mount them.
+
+**To start a volume**
+
+- Start a volume:
+
+ `# gluster volume start `
+
+ For example, to start test-volume:
+
+ # gluster volume start test-volume
+ Starting test-volume has been successful
+
+ []: images/Distributed_Volume.png
+ [1]: images/Replicated_Volume.png
+ [2]: images/Striped_Volume.png
+ [3]: images/Distributed_Striped_Volume.png
+ [4]: images/Distributed_Replicated_Volume.png
+ [5]: images/Striped_Replicated_Volume.png
diff --git a/doc/admin-guide/en-US/markdown/admin_settingup_clients.md b/doc/admin-guide/en-US/markdown/admin_settingup_clients.md
new file mode 100644
index 000000000..85b28c952
--- /dev/null
+++ b/doc/admin-guide/en-US/markdown/admin_settingup_clients.md
@@ -0,0 +1,641 @@
+Accessing Data - Setting Up GlusterFS Client
+============================================
+
+You can access gluster volumes in multiple ways. You can use Gluster
+Native Client method for high concurrency, performance and transparent
+failover in GNU/Linux clients. You can also use NFS v3 to access gluster
+volumes. Extensive testing has be done on GNU/Linux clients and NFS
+implementation in other operating system, such as FreeBSD, and Mac OS X,
+as well as Windows 7 (Professional and Up) and Windows Server 2003.
+Other NFS client implementations may work with gluster NFS server.
+
+You can use CIFS to access volumes when using Microsoft Windows as well
+as SAMBA clients. For this access method, Samba packages need to be
+present on the client side.
+
+Gluster Native Client
+=====================
+
+The Gluster Native Client is a FUSE-based client running in user space.
+Gluster Native Client is the recommended method for accessing volumes
+when high concurrency and high write performance is required.
+
+This section introduces the Gluster Native Client and explains how to
+install the software on client machines. This section also describes how
+to mount volumes on clients (both manually and automatically) and how to
+verify that the volume has mounted successfully.
+
+Installing the Gluster Native Client
+------------------------------------
+
+Before you begin installing the Gluster Native Client, you need to
+verify that the FUSE module is loaded on the client and has access to
+the required modules as follows:
+
+1. Add the FUSE loadable kernel module (LKM) to the Linux kernel:
+
+ `# modprobe fuse`
+
+2. Verify that the FUSE module is loaded:
+
+ `# dmesg | grep -i fuse `
+
+ `fuse init (API version 7.13)`
+
+### Installing on Red Hat Package Manager (RPM) Distributions
+
+To install Gluster Native Client on RPM distribution-based systems
+
+1. Install required prerequisites on the client using the following
+ command:
+
+ `$ sudo yum -y install openssh-server wget fuse fuse-libs openib libibverbs`
+
+2. Ensure that TCP and UDP ports 24007 and 24008 are open on all
+ Gluster servers. Apart from these ports, you need to open one port
+ for each brick starting from port 24009. For example: if you have
+ five bricks, you need to have ports 24009 to 24013 open.
+
+ You can use the following chains with iptables:
+
+ `$ sudo iptables -A RH-Firewall-1-INPUT -m state --state NEW -m tcp -p tcp --dport 24007:24008 -j ACCEPT `
+
+ `$ sudo iptables -A RH-Firewall-1-INPUT -m state --state NEW -m tcp -p tcp --dport 24009:24014 -j ACCEPT`
+
+ > **Note**
+ >
+ > If you already have iptable chains, make sure that the above
+ > ACCEPT rules precede the DROP rules. This can be achieved by
+ > providing a lower rule number than the DROP rule.
+
+3. Download the latest glusterfs, glusterfs-fuse, and glusterfs-rdma
+ RPM files to each client. The glusterfs package contains the Gluster
+ Native Client. The glusterfs-fuse package contains the FUSE
+ translator required for mounting on client systems and the
+ glusterfs-rdma packages contain OpenFabrics verbs RDMA module for
+ Infiniband.
+
+ You can download the software at [][].
+
+4. Install Gluster Native Client on the client.
+
+ `$ sudo rpm -i glusterfs-3.3.0qa30-1.x86_64.rpm `
+
+ `$ sudo rpm -i glusterfs-fuse-3.3.0qa30-1.x86_64.rpm `
+
+ `$ sudo rpm -i glusterfs-rdma-3.3.0qa30-1.x86_64.rpm`
+
+ > **Note**
+ >
+ > The RDMA module is only required when using Infiniband.
+
+### Installing on Debian-based Distributions
+
+To install Gluster Native Client on Debian-based distributions
+
+1. Install OpenSSH Server on each client using the following command:
+
+ `$ sudo apt-get install openssh-server vim wget`
+
+2. Download the latest GlusterFS .deb file and checksum to each client.
+
+ You can download the software at [][1].
+
+3. For each .deb file, get the checksum (using the following command)
+ and compare it against the checksum for that file in the md5sum
+ file.
+
+ `$ md5sum GlusterFS_DEB_file.deb `
+
+ The md5sum of the packages is available at: [][2]
+
+4. Uninstall GlusterFS v3.1 (or an earlier version) from the client
+ using the following command:
+
+ `$ sudo dpkg -r glusterfs `
+
+ (Optional) Run `$ sudo dpkg -purge glusterfs `to purge the
+ configuration files.
+
+5. Install Gluster Native Client on the client using the following
+ command:
+
+ `$ sudo dpkg -i GlusterFS_DEB_file `
+
+ For example:
+
+ `$ sudo dpkg -i glusterfs-3.3.x.deb `
+
+6. Ensure that TCP and UDP ports 24007 and 24008 are open on all
+ Gluster servers. Apart from these ports, you need to open one port
+ for each brick starting from port 24009. For example: if you have
+ five bricks, you need to have ports 24009 to 24013 open.
+
+ You can use the following chains with iptables:
+
+ `$ sudo iptables -A RH-Firewall-1-INPUT -m state --state NEW -m tcp -p tcp --dport 24007:24008 -j ACCEPT `
+
+ `$ sudo iptables -A RH-Firewall-1-INPUT -m state --state NEW -m tcp -p tcp --dport 24009:24014 -j ACCEPT`
+
+ > **Note**
+ >
+ > If you already have iptable chains, make sure that the above
+ > ACCEPT rules precede the DROP rules. This can be achieved by
+ > providing a lower rule number than the DROP rule.
+
+### Performing a Source Installation
+
+To build and install Gluster Native Client from the source code
+
+1. Create a new directory using the following commands:
+
+ `# mkdir glusterfs `
+
+ `# cd glusterfs`
+
+2. Download the source code.
+
+ You can download the source at [][1].
+
+3. Extract the source code using the following command:
+
+ `# tar -xvzf SOURCE-FILE `
+
+4. Run the configuration utility using the following command:
+
+ `# ./configure `
+
+ `GlusterFS configure summary `
+
+ `================== `
+
+ `FUSE client : yes `
+
+ `Infiniband verbs : yes `
+
+ `epoll IO multiplex : yes `
+
+ `argp-standalone : no `
+
+ `fusermount : no `
+
+ `readline : yes`
+
+ The configuration summary shows the components that will be built
+ with Gluster Native Client.
+
+5. Build the Gluster Native Client software using the following
+ commands:
+
+ `# make `
+
+ `# make install`
+
+6. Verify that the correct version of Gluster Native Client is
+ installed, using the following command:
+
+ `# glusterfs –-version`
+
+Mounting Volumes
+----------------
+
+After installing the Gluster Native Client, you need to mount Gluster
+volumes to access data. There are two methods you can choose:
+
+- ?
+
+- ?
+
+After mounting a volume, you can test the mounted volume using the
+procedure described in ?.
+
+> **Note**
+>
+> Server names selected during creation of Volumes should be resolvable
+> in the client machine. You can use appropriate /etc/hosts entries or
+> DNS server to resolve server names to IP addresses.
+
+### Manually Mounting Volumes
+
+To manually mount a Gluster volume
+
+- To mount a volume, use the following command:
+
+ `# mount -t glusterfs HOSTNAME-OR-IPADDRESS:/VOLNAME MOUNTDIR`
+
+ For example:
+
+ `# mount -t glusterfs server1:/test-volume /mnt/glusterfs`
+
+ > **Note**
+ >
+ > The server specified in the mount command is only used to fetch
+ > the gluster configuration volfile describing the volume name.
+ > Subsequently, the client will communicate directly with the
+ > servers mentioned in the volfile (which might not even include the
+ > one used for mount).
+ >
+ > If you see a usage message like "Usage: mount.glusterfs", mount
+ > usually requires you to create a directory to be used as the mount
+ > point. Run "mkdir /mnt/glusterfs" before you attempt to run the
+ > mount command listed above.
+
+**Mounting Options**
+
+You can specify the following options when using the
+`mount -t glusterfs` command. Note that you need to separate all options
+with commas.
+
+backupvolfile-server=server-name
+
+volfile-max-fetch-attempts=number of attempts
+
+log-level=loglevel
+
+log-file=logfile
+
+transport=transport-type
+
+direct-io-mode=[enable|disable]
+
+For example:
+
+`# mount -t glusterfs -o backupvolfile-server=volfile_server2 --volfile-max-fetch-attempts=2 log-level=WARNING,log-file=/var/log/gluster.log server1:/test-volume /mnt/glusterfs`
+
+If `backupvolfile-server` option is added while mounting fuse client,
+when the first volfile server fails, then the server specified in
+`backupvolfile-server` option is used as volfile server to mount the
+client.
+
+In `--volfile-max-fetch-attempts=X` option, specify the number of
+attempts to fetch volume files while mounting a volume. This option is
+useful when you mount a server with multiple IP addresses or when
+round-robin DNS is configured for the server-name..
+
+### Automatically Mounting Volumes
+
+You can configure your system to automatically mount the Gluster volume
+each time your system starts.
+
+The server specified in the mount command is only used to fetch the
+gluster configuration volfile describing the volume name. Subsequently,
+the client will communicate directly with the servers mentioned in the
+volfile (which might not even include the one used for mount).
+
+**To automatically mount a Gluster volume**
+
+- To mount a volume, edit the /etc/fstab file and add the following
+ line:
+
+ `HOSTNAME-OR-IPADDRESS:/VOLNAME MOUNTDIR glusterfs defaults,_netdev 0 0 `
+
+ For example:
+
+ `server1:/test-volume /mnt/glusterfs glusterfs defaults,_netdev 0 0`
+
+**Mounting Options**
+
+You can specify the following options when updating the /etc/fstab file.
+Note that you need to separate all options with commas.
+
+log-level=loglevel
+
+log-file=logfile
+
+transport=transport-type
+
+direct-io-mode=[enable|disable]
+
+For example:
+
+`HOSTNAME-OR-IPADDRESS:/VOLNAME MOUNTDIR glusterfs defaults,_netdev,log-level=WARNING,log-file=/var/log/gluster.log 0 0 `
+
+### Testing Mounted Volumes
+
+To test mounted volumes
+
+- Use the following command:
+
+ `# mount `
+
+ If the gluster volume was successfully mounted, the output of the
+ mount command on the client will be similar to this example:
+
+ `server1:/test-volume on /mnt/glusterfs type fuse.glusterfs (rw,allow_other,default_permissions,max_read=131072`
+
+- Use the following command:
+
+ `# df`
+
+ The output of df command on the client will display the aggregated
+ storage space from all the bricks in a volume similar to this
+ example:
+
+ `# df -h /mnt/glusterfs Filesystem Size Used Avail Use% Mounted on server1:/test-volume 28T 22T 5.4T 82% /mnt/glusterfs`
+
+- Change to the directory and list the contents by entering the
+ following:
+
+ `# cd MOUNTDIR `
+
+ `# ls`
+
+- For example,
+
+ `# cd /mnt/glusterfs `
+
+ `# ls`
+
+NFS
+===
+
+You can use NFS v3 to access to gluster volumes. Extensive testing has
+be done on GNU/Linux clients and NFS implementation in other operating
+system, such as FreeBSD, and Mac OS X, as well as Windows 7
+(Professional and Up), Windows Server 2003, and others, may work with
+gluster NFS server implementation.
+
+GlusterFS now includes network lock manager (NLM) v4. NLM enables
+applications on NFSv3 clients to do record locking on files on NFS
+server. It is started automatically whenever the NFS server is run.
+
+You must install nfs-common package on both servers and clients (only
+for Debian-based) distribution.
+
+This section describes how to use NFS to mount Gluster volumes (both
+manually and automatically) and how to verify that the volume has been
+mounted successfully.
+
+Using NFS to Mount Volumes
+--------------------------
+
+You can use either of the following methods to mount Gluster volumes:
+
+- ?
+
+- ?
+
+**Prerequisite**: Install nfs-common package on both servers and clients
+(only for Debian-based distribution), using the following command:
+
+`$ sudo aptitude install nfs-common `
+
+After mounting a volume, you can test the mounted volume using the
+procedure described in ?.
+
+### Manually Mounting Volumes Using NFS
+
+To manually mount a Gluster volume using NFS
+
+- To mount a volume, use the following command:
+
+ `# mount -t nfs -o vers=3 HOSTNAME-OR-IPADDRESS:/VOLNAME MOUNTDIR`
+
+ For example:
+
+ `# mount -t nfs -o vers=3 server1:/test-volume /mnt/glusterfs`
+
+ > **Note**
+ >
+ > Gluster NFS server does not support UDP. If the NFS client you are
+ > using defaults to connecting using UDP, the following message
+ > appears:
+ >
+ > `requested NFS version or transport protocol is not supported`.
+
+ **To connect using TCP**
+
+- Add the following option to the mount command:
+
+ `-o mountproto=tcp `
+
+ For example:
+
+ `# mount -o mountproto=tcp -t nfs server1:/test-volume /mnt/glusterfs`
+
+**To mount Gluster NFS server from a Solaris client**
+
+- Use the following command:
+
+ `# mount -o proto=tcp,vers=3 nfs://HOSTNAME-OR-IPADDRESS:38467/VOLNAME MOUNTDIR`
+
+ For example:
+
+ ` # mount -o proto=tcp,vers=3 nfs://server1:38467/test-volume /mnt/glusterfs`
+
+### Automatically Mounting Volumes Using NFS
+
+You can configure your system to automatically mount Gluster volumes
+using NFS each time the system starts.
+
+**To automatically mount a Gluster volume using NFS**
+
+- To mount a volume, edit the /etc/fstab file and add the following
+ line:
+
+ `HOSTNAME-OR-IPADDRESS:/VOLNAME MOUNTDIR nfs defaults,_netdev,vers=3 0 0`
+
+ For example,
+
+ `server1:/test-volume /mnt/glusterfs nfs defaults,_netdev,vers=3 0 0`
+
+ > **Note**
+ >
+ > Gluster NFS server does not support UDP. If the NFS client you are
+ > using defaults to connecting using UDP, the following message
+ > appears:
+ >
+ > `requested NFS version or transport protocol is not supported.`
+
+ To connect using TCP
+
+- Add the following entry in /etc/fstab file :
+
+ `HOSTNAME-OR-IPADDRESS:/VOLNAME MOUNTDIR nfs defaults,_netdev,mountproto=tcp 0 0`
+
+ For example,
+
+ `server1:/test-volume /mnt/glusterfs nfs defaults,_netdev,mountproto=tcp 0 0`
+
+**To automount NFS mounts**
+
+Gluster supports \*nix standard method of automounting NFS mounts.
+Update the /etc/auto.master and /etc/auto.misc and restart the autofs
+service. After that, whenever a user or process attempts to access the
+directory it will be mounted in the background.
+
+### Testing Volumes Mounted Using NFS
+
+You can confirm that Gluster directories are mounting successfully.
+
+**To test mounted volumes**
+
+- Use the mount command by entering the following:
+
+ `# mount`
+
+ For example, the output of the mount command on the client will
+ display an entry like the following:
+
+ `server1:/test-volume on /mnt/glusterfs type nfs (rw,vers=3,addr=server1)`
+
+- Use the df command by entering the following:
+
+ `# df`
+
+ For example, the output of df command on the client will display the
+ aggregated storage space from all the bricks in a volume.
+
+ # df -h /mnt/glusterfs
+ Filesystem Size Used Avail Use% Mounted on
+ server1:/test-volume 28T 22T 5.4T 82% /mnt/glusterfs
+
+- Change to the directory and list the contents by entering the
+ following:
+
+ `# cd MOUNTDIR`
+
+ `# ls`
+
+ For example,
+
+ `
+
+ `
+
+ `# ls`
+
+CIFS
+====
+
+You can use CIFS to access to volumes when using Microsoft Windows as
+well as SAMBA clients. For this access method, Samba packages need to be
+present on the client side. You can export glusterfs mount point as the
+samba export, and then mount it using CIFS protocol.
+
+This section describes how to mount CIFS shares on Microsoft
+Windows-based clients (both manually and automatically) and how to
+verify that the volume has mounted successfully.
+
+> **Note**
+>
+> CIFS access using the Mac OS X Finder is not supported, however, you
+> can use the Mac OS X command line to access Gluster volumes using
+> CIFS.
+
+Using CIFS to Mount Volumes
+---------------------------
+
+You can use either of the following methods to mount Gluster volumes:
+
+- ?
+
+- ?
+
+After mounting a volume, you can test the mounted volume using the
+procedure described in ?.
+
+You can also use Samba for exporting Gluster Volumes through CIFS
+protocol.
+
+### Exporting Gluster Volumes Through Samba
+
+We recommend you to use Samba for exporting Gluster volumes through the
+CIFS protocol.
+
+**To export volumes through CIFS protocol**
+
+1. Mount a Gluster volume. For more information on mounting volumes,
+ see ?.
+
+2. Setup Samba configuration to export the mount point of the Gluster
+ volume.
+
+ For example, if a Gluster volume is mounted on /mnt/gluster, you
+ must edit smb.conf file to enable exporting this through CIFS. Open
+ smb.conf file in an editor and add the following lines for a simple
+ configuration:
+
+ [glustertest]
+
+ comment = For testing a Gluster volume exported through CIFS
+
+ path = /mnt/glusterfs
+
+ read only = no
+
+ guest ok = yes
+
+Save the changes and start the smb service using your systems init
+scripts (/etc/init.d/smb [re]start).
+
+> **Note**
+>
+> To be able mount from any server in the trusted storage pool, you must
+> repeat these steps on each Gluster node. For more advanced
+> configurations, see Samba documentation.
+
+### Manually Mounting Volumes Using CIFS
+
+You can manually mount Gluster volumes using CIFS on Microsoft
+Windows-based client machines.
+
+**To manually mount a Gluster volume using CIFS**
+
+1. Using Windows Explorer, choose **Tools \> Map Network Drive…** from
+ the menu. The **Map Network Drive**window appears.
+
+2. Choose the drive letter using the **Drive** drop-down list.
+
+3. Click **Browse**, select the volume to map to the network drive, and
+ click **OK**.
+
+4. Click **Finish.**
+
+The network drive (mapped to the volume) appears in the Computer window.
+
+**Alternatively, to manually mount a Gluster volume using CIFS.**
+
+- Click **Start \> Run** and enter the following:
+
+ `
+
+ `
+
+ For example:
+
+ `
+
+ `
+
+### Automatically Mounting Volumes Using CIFS
+
+You can configure your system to automatically mount Gluster volumes
+using CIFS on Microsoft Windows-based clients each time the system
+starts.
+
+**To automatically mount a Gluster volume using CIFS**
+
+The network drive (mapped to the volume) appears in the Computer window
+and is reconnected each time the system starts.
+
+1. Using Windows Explorer, choose **Tools \> Map Network Drive…** from
+ the menu. The **Map Network Drive**window appears.
+
+2. Choose the drive letter using the **Drive** drop-down list.
+
+3. Click **Browse**, select the volume to map to the network drive, and
+ click **OK**.
+
+4. Click the **Reconnect** at logon checkbox.
+
+5. Click **Finish.**
+
+### Testing Volumes Mounted Using CIFS
+
+You can confirm that Gluster directories are mounting successfully by
+navigating to the directory using Windows Explorer.
+
+ []: http://bits.gluster.com/gluster/glusterfs/3.3.0qa30/x86_64/
+ [1]: http://www.gluster.org/download/
+ [2]: http://download.gluster.com/pub/gluster/glusterfs
diff --git a/doc/admin-guide/en-US/markdown/admin_start_stop_daemon.md b/doc/admin-guide/en-US/markdown/admin_start_stop_daemon.md
new file mode 100644
index 000000000..43251cd01
--- /dev/null
+++ b/doc/admin-guide/en-US/markdown/admin_start_stop_daemon.md
@@ -0,0 +1,70 @@
+Managing the glusterd Service
+=============================
+
+After installing GlusterFS, you must start glusterd service. The
+glusterd service serves as the Gluster elastic volume manager,
+overseeing glusterfs processes, and co-ordinating dynamic volume
+operations, such as adding and removing volumes across multiple storage
+servers non-disruptively.
+
+This section describes how to start the glusterd service in the
+following ways:
+
+- ?
+
+- ?
+
+> **Note**
+>
+> You must start glusterd on all GlusterFS servers.
+
+Starting and Stopping glusterd Manually
+=======================================
+
+This section describes how to start and stop glusterd manually
+
+- To start glusterd manually, enter the following command:
+
+ `# /etc/init.d/glusterd start `
+
+- To stop glusterd manually, enter the following command:
+
+ `# /etc/init.d/glusterd stop`
+
+Starting glusterd Automatically
+===============================
+
+This section describes how to configure the system to automatically
+start the glusterd service every time the system boots.
+
+To automatically start the glusterd service every time the system boots,
+enter the following from the command line:
+
+`# chkconfig glusterd on `
+
+Red Hat-based Systems
+---------------------
+
+To configure Red Hat-based systems to automatically start the glusterd
+service every time the system boots, enter the following from the
+command line:
+
+`# chkconfig glusterd on `
+
+Debian-based Systems
+--------------------
+
+To configure Debian-based systems to automatically start the glusterd
+service every time the system boots, enter the following from the
+command line:
+
+`# update-rc.d glusterd defaults`
+
+Systems Other than Red Hat and Debain
+-------------------------------------
+
+To configure systems other than Red Hat or Debian to automatically start
+the glusterd service every time the system boots, enter the following
+entry to the*/etc/rc.local* file:
+
+`# echo "glusterd" >> /etc/rc.local `
diff --git a/doc/admin-guide/en-US/markdown/admin_storage_pools.md b/doc/admin-guide/en-US/markdown/admin_storage_pools.md
new file mode 100644
index 000000000..2a35cbea5
--- /dev/null
+++ b/doc/admin-guide/en-US/markdown/admin_storage_pools.md
@@ -0,0 +1,73 @@
+Setting up Trusted Storage Pools
+================================
+
+Before you can configure a GlusterFS volume, you must create a trusted
+storage pool consisting of the storage servers that provides bricks to a
+volume.
+
+A storage pool is a trusted network of storage servers. When you start
+the first server, the storage pool consists of that server alone. To add
+additional storage servers to the storage pool, you can use the probe
+command from a storage server that is already trusted.
+
+> **Note**
+>
+> Do not self-probe the first server/localhost.
+
+The GlusterFS service must be running on all storage servers that you
+want to add to the storage pool. See ? for more information.
+
+Adding Servers to Trusted Storage Pool
+======================================
+
+To create a trusted storage pool, add servers to the trusted storage
+pool
+
+1. The hostnames used to create the storage pool must be resolvable by
+ DNS.
+
+ To add a server to the storage pool:
+
+ `# gluster peer probe `
+
+ For example, to create a trusted storage pool of four servers, add
+ three servers to the storage pool from server1:
+
+ # gluster peer probe server2
+ Probe successful
+
+ # gluster peer probe server3
+ Probe successful
+
+ # gluster peer probe server4
+ Probe successful
+
+2. Verify the peer status from the first server using the following
+ commands:
+
+ # gluster peer status
+ Number of Peers: 3
+
+ Hostname: server2
+ Uuid: 5e987bda-16dd-43c2-835b-08b7d55e94e5
+ State: Peer in Cluster (Connected)
+
+ Hostname: server3
+ Uuid: 1e0ca3aa-9ef7-4f66-8f15-cbc348f29ff7
+ State: Peer in Cluster (Connected)
+
+ Hostname: server4
+ Uuid: 3e0caba-9df7-4f66-8e5d-cbc348f29ff7
+ State: Peer in Cluster (Connected)
+
+Removing Servers from the Trusted Storage Pool
+==============================================
+
+To remove a server from the storage pool:
+
+`# gluster peer detach`
+
+For example, to remove server4 from the trusted storage pool:
+
+ # gluster peer detach server4
+ Detach successful
diff --git a/doc/admin-guide/en-US/markdown/admin_troubleshooting.md b/doc/admin-guide/en-US/markdown/admin_troubleshooting.md
new file mode 100644
index 000000000..88fb85c24
--- /dev/null
+++ b/doc/admin-guide/en-US/markdown/admin_troubleshooting.md
@@ -0,0 +1,543 @@
+Troubleshooting GlusterFS
+=========================
+
+This section describes how to manage GlusterFS logs and most common
+troubleshooting scenarios related to GlusterFS.
+
+Managing GlusterFS Logs
+=======================
+
+This section describes how to manage GlusterFS logs by performing the
+following operation:
+
+- Rotating Logs
+
+Rotating Logs
+-------------
+
+Administrators can rotate the log file in a volume, as needed.
+
+**To rotate a log file**
+
+- Rotate the log file using the following command:
+
+ `# gluster volume log rotate `
+
+ For example, to rotate the log file on test-volume:
+
+ # gluster volume log rotate test-volume
+ log rotate successful
+
+ > **Note**
+ >
+ > When a log file is rotated, the contents of the current log file
+ > are moved to log-file- name.epoch-time-stamp.
+
+Troubleshooting Geo-replication
+===============================
+
+This section describes the most common troubleshooting scenarios related
+to GlusterFS Geo-replication.
+
+Locating Log Files
+------------------
+
+For every Geo-replication session, the following three log files are
+associated to it (four, if the slave is a gluster volume):
+
+- Master-log-file - log file for the process which monitors the Master
+ volume
+
+- Slave-log-file - log file for process which initiates the changes in
+ slave
+
+- Master-gluster-log-file - log file for the maintenance mount point
+ that Geo-replication module uses to monitor the master volume
+
+- Slave-gluster-log-file - is the slave's counterpart of it
+
+**Master Log File**
+
+To get the Master-log-file for geo-replication, use the following
+command:
+
+`gluster volume geo-replication config log-file`
+
+For example:
+
+`# gluster volume geo-replication Volume1 example.com:/data/remote_dir config log-file `
+
+**Slave Log File**
+
+To get the log file for Geo-replication on slave (glusterd must be
+running on slave machine), use the following commands:
+
+1. On master, run the following command:
+
+ `# gluster volume geo-replication Volume1 example.com:/data/remote_dir config session-owner 5f6e5200-756f-11e0-a1f0-0800200c9a66 `
+
+ Displays the session owner details.
+
+2. On slave, run the following command:
+
+ `# gluster volume geo-replication /data/remote_dir config log-file /var/log/gluster/${session-owner}:remote-mirror.log `
+
+3. Replace the session owner details (output of Step 1) to the output
+ of the Step 2 to get the location of the log file.
+
+ `/var/log/gluster/5f6e5200-756f-11e0-a1f0-0800200c9a66:remote-mirror.log`
+
+Rotating Geo-replication Logs
+-----------------------------
+
+Administrators can rotate the log file of a particular master-slave
+session, as needed. When you run geo-replication's ` log-rotate`
+command, the log file is backed up with the current timestamp suffixed
+to the file name and signal is sent to gsyncd to start logging to a new
+log file.
+
+**To rotate a geo-replication log file**
+
+- Rotate log file for a particular master-slave session using the
+ following command:
+
+ `# gluster volume geo-replication log-rotate`
+
+ For example, to rotate the log file of master `Volume1` and slave
+ `example.com:/data/remote_dir` :
+
+ # gluster volume geo-replication Volume1 example.com:/data/remote_dir log rotate
+ log rotate successful
+
+- Rotate log file for all sessions for a master volume using the
+ following command:
+
+ `# gluster volume geo-replication log-rotate`
+
+ For example, to rotate the log file of master `Volume1`:
+
+ # gluster volume geo-replication Volume1 log rotate
+ log rotate successful
+
+- Rotate log file for all sessions using the following command:
+
+ `# gluster volume geo-replication log-rotate`
+
+ For example, to rotate the log file for all sessions:
+
+ # gluster volume geo-replication log rotate
+ log rotate successful
+
+Synchronization is not complete
+-------------------------------
+
+**Description**: GlusterFS Geo-replication did not synchronize the data
+completely but still the geo- replication status displayed is OK.
+
+**Solution**: You can enforce a full sync of the data by erasing the
+index and restarting GlusterFS Geo- replication. After restarting,
+GlusterFS Geo-replication begins synchronizing all the data. All files
+are compared using checksum, which can be a lengthy and high resource
+utilization operation on large data sets. If the error situation
+persists, contact Red Hat Support.
+
+For more information about erasing index, see ?.
+
+Issues in Data Synchronization
+------------------------------
+
+**Description**: Geo-replication display status as OK, but the files do
+not get synced, only directories and symlink gets synced with the
+following error message in the log:
+
+[2011-05-02 13:42:13.467644] E [master:288:regjob] GMaster: failed to
+sync ./some\_file\`
+
+**Solution**: Geo-replication invokes rsync v3.0.0 or higher on the host
+and the remote machine. You must verify if you have installed the
+required version.
+
+Geo-replication status displays Faulty very often
+-------------------------------------------------
+
+**Description**: Geo-replication displays status as faulty very often
+with a backtrace similar to the following:
+
+2011-04-28 14:06:18.378859] E [syncdutils:131:log\_raise\_exception]
+\<top\>: FAIL: Traceback (most recent call last): File
+"/usr/local/libexec/glusterfs/python/syncdaemon/syncdutils.py", line
+152, in twraptf(\*aa) File
+"/usr/local/libexec/glusterfs/python/syncdaemon/repce.py", line 118, in
+listen rid, exc, res = recv(self.inf) File
+"/usr/local/libexec/glusterfs/python/syncdaemon/repce.py", line 42, in
+recv return pickle.load(inf) EOFError
+
+**Solution**: This error indicates that the RPC communication between
+the master gsyncd module and slave gsyncd module is broken and this can
+happen for various reasons. Check if it satisfies all the following
+pre-requisites:
+
+- Password-less SSH is set up properly between the host and the remote
+ machine.
+
+- If FUSE is installed in the machine, because geo-replication module
+ mounts the GlusterFS volume using FUSE to sync data.
+
+- If the **Slave** is a volume, check if that volume is started.
+
+- If the Slave is a plain directory, verify if the directory has been
+ created already with the required permissions.
+
+- If GlusterFS 3.2 or higher is not installed in the default location
+ (in Master) and has been prefixed to be installed in a custom
+ location, configure the `gluster-command` for it to point to the
+ exact location.
+
+- If GlusterFS 3.2 or higher is not installed in the default location
+ (in slave) and has been prefixed to be installed in a custom
+ location, configure the `remote-gsyncd-command` for it to point to
+ the exact place where gsyncd is located.
+
+Intermediate Master goes to Faulty State
+----------------------------------------
+
+**Description**: In a cascading set-up, the intermediate master goes to
+faulty state with the following log:
+
+raise RuntimeError ("aborting on uuid change from %s to %s" % \\
+RuntimeError: aborting on uuid change from af07e07c-427f-4586-ab9f-
+4bf7d299be81 to de6b5040-8f4e-4575-8831-c4f55bd41154
+
+**Solution**: In a cascading set-up the Intermediate master is loyal to
+the original primary master. The above log means that the
+geo-replication module has detected change in primary master. If this is
+the desired behavior, delete the config option volume-id in the session
+initiated from the intermediate master.
+
+Troubleshooting POSIX ACLs
+==========================
+
+This section describes the most common troubleshooting issues related to
+POSIX ACLs.
+
+setfacl command fails with “setfacl: \<file or directory name\>: Operation not supported” error
+-----------------------------------------------------------------------------------------------
+
+You may face this error when the backend file systems in one of the
+servers is not mounted with the "-o acl" option. The same can be
+confirmed by viewing the following error message in the log file of the
+server "Posix access control list is not supported".
+
+**Solution**: Remount the backend file system with "-o acl" option. For
+more information, see ?.
+
+Troubleshooting Hadoop Compatible Storage
+=========================================
+
+This section describes the most common troubleshooting issues related to
+Hadoop Compatible Storage.
+
+Time Sync
+---------
+
+Running MapReduce job may throw exceptions if the time is out-of-sync on
+the hosts in the cluster.
+
+**Solution**: Sync the time on all hosts using ntpd program.
+
+Troubleshooting NFS
+===================
+
+This section describes the most common troubleshooting issues related to
+NFS .
+
+mount command on NFS client fails with “RPC Error: Program not registered”
+--------------------------------------------------------------------------
+
+Start portmap or rpcbind service on the NFS server.
+
+This error is encountered when the server has not started correctly.
+
+On most Linux distributions this is fixed by starting portmap:
+
+`$ /etc/init.d/portmap start`
+
+On some distributions where portmap has been replaced by rpcbind, the
+following command is required:
+
+`$ /etc/init.d/rpcbind start `
+
+After starting portmap or rpcbind, gluster NFS server needs to be
+restarted.
+
+NFS server start-up fails with “Port is already in use” error in the log file."
+-------------------------------------------------------------------------------
+
+Another Gluster NFS server is running on the same machine.
+
+This error can arise in case there is already a Gluster NFS server
+running on the same machine. This situation can be confirmed from the
+log file, if the following error lines exist:
+
+ [2010-05-26 23:40:49] E [rpc-socket.c:126:rpcsvc_socket_listen] rpc-socket: binding socket failed:Address already in use
+ [2010-05-26 23:40:49] E [rpc-socket.c:129:rpcsvc_socket_listen] rpc-socket: Port is already in use
+ [2010-05-26 23:40:49] E [rpcsvc.c:2636:rpcsvc_stage_program_register] rpc-service: could not create listening connection
+ [2010-05-26 23:40:49] E [rpcsvc.c:2675:rpcsvc_program_register] rpc-service: stage registration of program failed
+ [2010-05-26 23:40:49] E [rpcsvc.c:2695:rpcsvc_program_register] rpc-service: Program registration failed: MOUNT3, Num: 100005, Ver: 3, Port: 38465
+ [2010-05-26 23:40:49] E [nfs.c:125:nfs_init_versions] nfs: Program init failed
+ [2010-05-26 23:40:49] C [nfs.c:531:notify] nfs: Failed to initialize protocols
+
+To resolve this error one of the Gluster NFS servers will have to be
+shutdown. At this time, Gluster NFS server does not support running
+multiple NFS servers on the same machine.
+
+mount command fails with “rpc.statd” related error message
+----------------------------------------------------------
+
+If the mount command fails with the following error message:
+
+mount.nfs: rpc.statd is not running but is required for remote locking.
+mount.nfs: Either use '-o nolock' to keep locks local, or start statd.
+
+Start rpc.statd
+
+For NFS clients to mount the NFS server, rpc.statd service must be
+running on the clients.
+
+Start rpc.statd service by running the following command:
+
+`$ rpc.statd `
+
+mount command takes too long to finish.
+---------------------------------------
+
+Start rpcbind service on the NFS client.
+
+The problem is that the rpcbind or portmap service is not running on the
+NFS client. The resolution for this is to start either of these services
+by running the following command:
+
+`$ /etc/init.d/portmap start`
+
+On some distributions where portmap has been replaced by rpcbind, the
+following command is required:
+
+`$ /etc/init.d/rpcbind start`
+
+NFS server glusterfsd starts but initialization fails with “nfsrpc- service: portmap registration of program failed” error message in the log.
+----------------------------------------------------------------------------------------------------------------------------------------------
+
+NFS start-up can succeed but the initialization of the NFS service can
+still fail preventing clients from accessing the mount points. Such a
+situation can be confirmed from the following error messages in the log
+file:
+
+ [2010-05-26 23:33:47] E [rpcsvc.c:2598:rpcsvc_program_register_portmap] rpc-service: Could notregister with portmap
+ [2010-05-26 23:33:47] E [rpcsvc.c:2682:rpcsvc_program_register] rpc-service: portmap registration of program failed
+ [2010-05-26 23:33:47] E [rpcsvc.c:2695:rpcsvc_program_register] rpc-service: Program registration failed: MOUNT3, Num: 100005, Ver: 3, Port: 38465
+ [2010-05-26 23:33:47] E [nfs.c:125:nfs_init_versions] nfs: Program init failed
+ [2010-05-26 23:33:47] C [nfs.c:531:notify] nfs: Failed to initialize protocols
+ [2010-05-26 23:33:49] E [rpcsvc.c:2614:rpcsvc_program_unregister_portmap] rpc-service: Could not unregister with portmap
+ [2010-05-26 23:33:49] E [rpcsvc.c:2731:rpcsvc_program_unregister] rpc-service: portmap unregistration of program failed
+ [2010-05-26 23:33:49] E [rpcsvc.c:2744:rpcsvc_program_unregister] rpc-service: Program unregistration failed: MOUNT3, Num: 100005, Ver: 3, Port: 38465
+
+1. Start portmap or rpcbind service on the NFS server.
+
+ On most Linux distributions, portmap can be started using the
+ following command:
+
+ `$ /etc/init.d/portmap start `
+
+ On some distributions where portmap has been replaced by rpcbind,
+ run the following command:
+
+ `$ /etc/init.d/rpcbind start `
+
+ After starting portmap or rpcbind, gluster NFS server needs to be
+ restarted.
+
+2. Stop another NFS server running on the same machine.
+
+ Such an error is also seen when there is another NFS server running
+ on the same machine but it is not the Gluster NFS server. On Linux
+ systems, this could be the kernel NFS server. Resolution involves
+ stopping the other NFS server or not running the Gluster NFS server
+ on the machine. Before stopping the kernel NFS server, ensure that
+ no critical service depends on access to that NFS server's exports.
+
+ On Linux, kernel NFS servers can be stopped by using either of the
+ following commands depending on the distribution in use:
+
+ `$ /etc/init.d/nfs-kernel-server stop`
+
+ `$ /etc/init.d/nfs stop`
+
+3. Restart Gluster NFS server.
+
+mount command fails with NFS server failed error.
+-------------------------------------------------
+
+mount command fails with following error
+
+*mount: mount to NFS server '10.1.10.11' failed: timed out (retrying).*
+
+Perform one of the following to resolve this issue:
+
+1. Disable name lookup requests from NFS server to a DNS server.
+
+ The NFS server attempts to authenticate NFS clients by performing a
+ reverse DNS lookup to match hostnames in the volume file with the
+ client IP addresses. There can be a situation where the NFS server
+ either is not able to connect to the DNS server or the DNS server is
+ taking too long to responsd to DNS request. These delays can result
+ in delayed replies from the NFS server to the NFS client resulting
+ in the timeout error seen above.
+
+ NFS server provides a work-around that disables DNS requests,
+ instead relying only on the client IP addresses for authentication.
+ The following option can be added for successful mounting in such
+ situations:
+
+ `option rpc-auth.addr.namelookup off `
+
+ > **Note**
+ >
+ > Note: Remember that disabling the NFS server forces authentication
+ > of clients to use only IP addresses and if the authentication
+ > rules in the volume file use hostnames, those authentication rules
+ > will fail and disallow mounting for those clients.
+
+ or
+
+2. NFS version used by the NFS client is other than version 3.
+
+ Gluster NFS server supports version 3 of NFS protocol. In recent
+ Linux kernels, the default NFS version has been changed from 3 to 4.
+ It is possible that the client machine is unable to connect to the
+ Gluster NFS server because it is using version 4 messages which are
+ not understood by Gluster NFS server. The timeout can be resolved by
+ forcing the NFS client to use version 3. The **vers** option to
+ mount command is used for this purpose:
+
+ `$ mount -o vers=3 `
+
+showmount fails with clnt\_create: RPC: Unable to receive
+---------------------------------------------------------
+
+Check your firewall setting to open ports 111 for portmap
+requests/replies and Gluster NFS server requests/replies. Gluster NFS
+server operates over the following port numbers: 38465, 38466, and
+38467.
+
+For more information, see ?.
+
+Application fails with "Invalid argument" or "Value too large for defined data type" error.
+-------------------------------------------------------------------------------------------
+
+These two errors generally happen for 32-bit nfs clients or applications
+that do not support 64-bit inode numbers or large files. Use the
+following option from the CLI to make Gluster NFS return 32-bit inode
+numbers instead: nfs.enable-ino32 \<on|off\>
+
+Applications that will benefit are those that were either:
+
+- built 32-bit and run on 32-bit machines such that they do not
+ support large files by default
+
+- built 32-bit on 64-bit systems
+
+This option is disabled by default so NFS returns 64-bit inode numbers
+by default.
+
+Applications which can be rebuilt from source are recommended to rebuild
+using the following flag with gcc:
+
+` -D_FILE_OFFSET_BITS=64`
+
+Troubleshooting File Locks
+==========================
+
+In GlusterFS 3.3 you can use `statedump` command to list the locks held
+on files. The statedump output also provides information on each lock
+with its range, basename, PID of the application holding the lock, and
+so on. You can analyze the output to know about the locks whose
+owner/application is no longer running or interested in that lock. After
+ensuring that the no application is using the file, you can clear the
+lock using the following `clear lock` command:
+
+`# `
+
+For more information on performing `statedump`, see ?
+
+**To identify locked file and clear locks**
+
+1. Perform statedump on the volume to view the files that are locked
+ using the following command:
+
+ `# gluster volume statedump inode`
+
+ For example, to display statedump of test-volume:
+
+ # gluster volume statedump test-volume
+ Volume statedump successful
+
+ The statedump files are created on the brick servers in the` /tmp`
+ directory or in the directory set using `server.statedump-path`
+ volume option. The naming convention of the dump file is
+ `<brick-path>.<brick-pid>.dump`.
+
+ The following are the sample contents of the statedump file. It
+ indicates that GlusterFS has entered into a state where there is an
+ entry lock (entrylk) and an inode lock (inodelk). Ensure that those
+ are stale locks and no resources own them.
+
+ [xlator.features.locks.vol-locks.inode]
+ path=/
+ mandatory=0
+ entrylk-count=1
+ lock-dump.domain.domain=vol-replicate-0
+ xlator.feature.locks.lock-dump.domain.entrylk.entrylk[0](ACTIVE)=type=ENTRYLK_WRLCK on basename=file1, pid = 714782904, owner=ffffff2a3c7f0000, transport=0x20e0670, , granted at Mon Feb 27 16:01:01 2012
+
+ conn.2.bound_xl./gfs/brick1.hashsize=14057
+ conn.2.bound_xl./gfs/brick1.name=/gfs/brick1/inode
+ conn.2.bound_xl./gfs/brick1.lru_limit=16384
+ conn.2.bound_xl./gfs/brick1.active_size=2
+ conn.2.bound_xl./gfs/brick1.lru_size=0
+ conn.2.bound_xl./gfs/brick1.purge_size=0
+
+ [conn.2.bound_xl./gfs/brick1.active.1]
+ gfid=538a3d4a-01b0-4d03-9dc9-843cd8704d07
+ nlookup=1
+ ref=2
+ ia_type=1
+ [xlator.features.locks.vol-locks.inode]
+ path=/file1
+ mandatory=0
+ inodelk-count=1
+ lock-dump.domain.domain=vol-replicate-0
+ inodelk.inodelk[0](ACTIVE)=type=WRITE, whence=0, start=0, len=0, pid = 714787072, owner=00ffff2a3c7f0000, transport=0x20e0670, , granted at Mon Feb 27 16:01:01 2012
+
+2. Clear the lock using the following command:
+
+ `# `
+
+ For example, to clear the entry lock on `file1` of test-volume:
+
+ # gluster volume clear-locks test-volume / kind granted entry file1
+ Volume clear-locks successful
+ vol-locks: entry blocked locks=0 granted locks=1
+
+3. Clear the inode lock using the following command:
+
+ `# `
+
+ For example, to clear the inode lock on `file1` of test-volume:
+
+ # gluster volume clear-locks test-volume /file1 kind granted inode 0,0-0
+ Volume clear-locks successful
+ vol-locks: inode blocked locks=0 granted locks=1
+
+ You can perform statedump on test-volume again to verify that the
+ above inode and entry locks are cleared.
+
+
diff --git a/doc/admin-guide/en-US/markdown/gfs_introduction.md b/doc/admin-guide/en-US/markdown/gfs_introduction.md
new file mode 100644
index 000000000..fd2c53dc9
--- /dev/null
+++ b/doc/admin-guide/en-US/markdown/gfs_introduction.md
@@ -0,0 +1,50 @@
+Introducing Gluster File System
+===============================
+
+GlusterFS is an open source, clustered file system capable of scaling to
+several petabytes and handling thousands of clients. GlusterFS can be
+flexibly combined with commodity physical, virtual, and cloud resources
+to deliver highly available and performant enterprise storage at a
+fraction of the cost of traditional solutions.
+
+GlusterFS clusters together storage building blocks over Infiniband RDMA
+and/or TCP/IP interconnect, aggregating disk and memory resources and
+managing data in a single global namespace. GlusterFS is based on a
+stackable user space design, delivering exceptional performance for
+diverse workloads.
+
+![ Virtualized Cloud Environments ][]
+
+GlusterFS is designed for today's high-performance, virtualized cloud
+environments. Unlike traditional data centers, cloud environments
+require multi-tenancy along with the ability to grow or shrink resources
+on demand. Enterprises can scale capacity, performance, and availability
+on demand, with no vendor lock-in, across on-premise, public cloud, and
+hybrid environments.
+
+GlusterFS is in production at thousands of enterprises spanning media,
+healthcare, government, education, web 2.0, and financial services. The
+following table lists the commercial offerings and its documentation
+location:
+
+ ------------------------------------------------------------------------
+ Product Documentation Location
+ ----------- ------------------------------------------------------------
+ Red Hat [][]
+ Storage
+ Software
+ Appliance
+
+ Red Hat [][1]
+ Virtual
+ Storage
+ Appliance
+
+ Red Hat [][2]
+ Storage
+ ------------------------------------------------------------------------
+
+ [ Virtualized Cloud Environments ]: images/640px-GlusterFS_Architecture.png
+ []: http://docs.redhat.com/docs/en-US/Red_Hat_Storage_Software_Appliance/index.html
+ [1]: http://docs.redhat.com/docs/en-US/Red_Hat_Virtual_Storage_Appliance/index.html
+ [2]: http://docs.redhat.com/docs/en-US/Red_Hat_Storage/index.html
diff --git a/doc/admin-guide/en-US/markdown/glossary.md b/doc/admin-guide/en-US/markdown/glossary.md
new file mode 100644
index 000000000..0febaff8f
--- /dev/null
+++ b/doc/admin-guide/en-US/markdown/glossary.md
@@ -0,0 +1,134 @@
+Glossary
+========
+
+Brick
+: A Brick is the GlusterFS basic unit of storage, represented by an
+ export directory on a server in the trusted storage pool. A Brick is
+ expressed by combining a server with an export directory in the
+ following format:
+
+ `SERVER:EXPORT`
+
+ For example:
+
+ `myhostname:/exports/myexportdir/`
+
+Cluster
+: A cluster is a group of linked computers, working together closely
+ thus in many respects forming a single computer.
+
+Distributed File System
+: A file system that allows multiple clients to concurrently access
+ data over a computer network.
+
+Filesystem
+: A method of storing and organizing computer files and their data.
+ Essentially, it organizes these files into a database for the
+ storage, organization, manipulation, and retrieval by the computer's
+ operating system.
+
+ Source: [Wikipedia][]
+
+FUSE
+: Filesystem in Userspace (FUSE) is a loadable kernel module for
+ Unix-like computer operating systems that lets non-privileged users
+ create their own file systems without editing kernel code. This is
+ achieved by running file system code in user space while the FUSE
+ module provides only a "bridge" to the actual kernel interfaces.
+
+ Source: [Wikipedia][1]
+
+Geo-Replication
+: Geo-replication provides a continuous, asynchronous, and incremental
+ replication service from site to another over Local Area Networks
+ (LAN), Wide Area Network (WAN), and across the Internet.
+
+glusterd
+: The Gluster management daemon that needs to run on all servers in
+ the trusted storage pool.
+
+Metadata
+: Metadata is data providing information about one or more other
+ pieces of data.
+
+Namespace
+: Namespace is an abstract container or environment created to hold a
+ logical grouping of unique identifiers or symbols. Each Gluster
+ volume exposes a single namespace as a POSIX mount point that
+ contains every file in the cluster.
+
+Open Source
+: Open source describes practices in production and development that
+ promote access to the end product's source materials. Some consider
+ open source a philosophy, others consider it a pragmatic
+ methodology.
+
+ Before the term open source became widely adopted, developers and
+ producers used a variety of phrases to describe the concept; open
+ source gained hold with the rise of the Internet, and the attendant
+ need for massive retooling of the computing source code.
+
+ Opening the source code enabled a self-enhancing diversity of
+ production models, communication paths, and interactive communities.
+ Subsequently, a new, three-word phrase "open source software" was
+ born to describe the environment that the new copyright, licensing,
+ domain, and consumer issues created.
+
+ Source: [Wikipedia][2]
+
+Petabyte
+: A petabyte (derived from the SI prefix peta- ) is a unit of
+ information equal to one quadrillion (short scale) bytes, or 1000
+ terabytes. The unit symbol for the petabyte is PB. The prefix peta-
+ (P) indicates a power of 1000:
+
+ 1 PB = 1,000,000,000,000,000 B = 10005 B = 1015 B.
+
+ The term "pebibyte" (PiB), using a binary prefix, is used for the
+ corresponding power of 1024.
+
+ Source: [Wikipedia][3]
+
+POSIX
+: Portable Operating System Interface (for Unix) is the name of a
+ family of related standards specified by the IEEE to define the
+ application programming interface (API), along with shell and
+ utilities interfaces for software compatible with variants of the
+ Unix operating system. Gluster exports a fully POSIX compliant file
+ system.
+
+RAID
+: Redundant Array of Inexpensive Disks (RAID) is a technology that
+ provides increased storage reliability through redundancy, combining
+ multiple low-cost, less-reliable disk drives components into a
+ logical unit where all drives in the array are interdependent.
+
+RRDNS
+: Round Robin Domain Name Service (RRDNS) is a method to distribute
+ load across application servers. RRDNS is implemented by creating
+ multiple A records with the same name and different IP addresses in
+ the zone file of a DNS server.
+
+Trusted Storage Pool
+: A storage pool is a trusted network of storage servers. When you
+ start the first server, the storage pool consists of that server
+ alone.
+
+Userspace
+: Applications running in user space don’t directly interact with
+ hardware, instead using the kernel to moderate access. Userspace
+ applications are generally more portable than applications in kernel
+ space. Gluster is a user space application.
+
+Volfile
+: Volfile is a configuration file used by glusterfs process. Volfile
+ will be usually located at `/var/lib/glusterd/vols/VOLNAME`.
+
+Volume
+: A volume is a logical collection of bricks. Most of the gluster
+ management operations happen on the volume.
+
+ [Wikipedia]: http://en.wikipedia.org/wiki/Filesystem
+ [1]: http://en.wikipedia.org/wiki/Filesystem_in_Userspace
+ [2]: http://en.wikipedia.org/wiki/Open_source
+ [3]: http://en.wikipedia.org/wiki/Petabyte
diff --git a/doc/authentication.txt b/doc/authentication.txt
index 70aafd933..73cb21d73 100644
--- a/doc/authentication.txt
+++ b/doc/authentication.txt
@@ -48,7 +48,7 @@
protocol/client:
option remote-subvolume foo-brick
* Client is connecting from a.b.c.d
-
+
protocol/server:
option auth.addr.foo-brick.allow a.b.c.d,e.f.g.h,i.j.k.l #, other ip addresses from which clients are allowed to connect to foo-brick
@@ -79,19 +79,19 @@
* reject only "user shoo from a.b.c.d"
protcol/client:
option remote-subvolume shoo-brick
-
+
protocol/server:
# observe that no "option auth.login.shoo-brick.allow shoo" given
# Also other users from a.b.c.d have to be explicitly allowed using auth.login.shoo-brick.allow ...
option auth.addr.shoo-brick.allow !a.b.c.d
- * reject only "user shoo" from a.b.c.d i.e., user shoo from a.b.c.d has to be rejected.
+ * reject only "user shoo" from a.b.c.d i.e., user shoo from a.b.c.d has to be rejected.
* same as reject only "user shoo from a.b.c.d" above, but rules have to be added whether to allow ip addresses (and users from those ips) other than a.b.c.d
****************************************************************************************************
* ip or username/password based authentication
-
+
* allow user foo or clients from a.b.c.d
protocol/client:
option remote-subvolume foo-brick
@@ -104,7 +104,7 @@
* reject user shoo or clients from a.b.c.d
protocol/client:
option remote-subvolume shoo-brick
-
+
protocol/server:
option auth.login.shoo-brick.allow <usernames other than shoo>
#for each username mentioned in the above <usernames other than shoo> list, specify password as below
diff --git a/doc/coding-standard.tex b/doc/coding-standard.tex
index 92f799c01..30d412a91 100644
--- a/doc/coding-standard.tex
+++ b/doc/coding-standard.tex
@@ -27,7 +27,7 @@ purpose. The comment should be descriptive without being overly verbose.
\textsl{Good}:
\begin{verbatim}
- DBTYPE access_mode; /* access mode for accessing
+ DBTYPE access_mode; /* access mode for accessing
* the databases, can be
* DB_HASH, DB_BTREE
* (option access-mode <mode>)
@@ -69,7 +69,7 @@ Never use a non-constant expression as the initialization value for a variable.
\section*{$\bullet$ Validate all arguments to a function}
All pointer arguments to a function must be checked for \texttt{NULL}.
-A macro named \texttt{VALIDATE} (in \texttt{common-utils.h})
+A macro named \texttt{VALIDATE} (in \texttt{common-utils.h})
takes one argument, and if it is \texttt{NULL}, writes a log message and
jumps to a label called \texttt{err} after setting op\_ret and op\_errno
appropriately. It is recommended to use this template.
@@ -142,8 +142,8 @@ for success or failure.
\begin{verbatim}
op_ret = close (_fd);
if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "close on file %s failed (%s)", real_path,
+ gf_log (this->name, GF_LOG_ERROR,
+ "close on file %s failed (%s)", real_path,
strerror (errno));
op_errno = errno;
goto out;
@@ -157,9 +157,9 @@ memory allocation fails, the call should be unwound and an error
returned to the user.
\section*{$\bullet$ Use result args and reserve the return value to indicate success or failure}
-The return value of every functions must indicate success or failure (unless
-it is impossible for the function to fail --- e.g., boolean functions). If
-the function needs to return additional data, it must be returned using a
+The return value of every functions must indicate success or failure (unless
+it is impossible for the function to fail --- e.g., boolean functions). If
+the function needs to return additional data, it must be returned using a
result (pointer) argument.
\vspace{2ex}
@@ -192,11 +192,11 @@ Unless impossible, use the length-limited versions of the string functions.
\end{verbatim}
\section*{$\bullet$ No dead or commented code}
-There must be no dead code (code to which control can never be passed) or
+There must be no dead code (code to which control can never be passed) or
commented out code in the codebase.
\section*{$\bullet$ Only one unwind and return per function}
-There must be only one exit out of a function. \texttt{UNWIND} and return
+There must be only one exit out of a function. \texttt{UNWIND} and return
should happen at only point in the function.
\section*{$\bullet$ Function length or Keep functions small}
@@ -305,7 +305,7 @@ documentation.
\end{verbatim}
\subsection*{Indicating critical sections}
-To clearly show regions of code which execute with locks held, use
+To clearly show regions of code which execute with locks held, use
the following format:
\begin{verbatim}
@@ -324,7 +324,7 @@ point, \texttt{out}. At that point, the code should detect the error
that has occured and do appropriate cleanup.
\begin{verbatim}
-int32_t
+int32_t
sample_fop (call_frame_t *frame, xlator_t *this, ...)
{
char * var1 = NULL;
@@ -337,13 +337,13 @@ sample_fop (call_frame_t *frame, xlator_t *this, ...)
VALIDATE_OR_GOTO (this, out);
/* other validations */
-
+
dir = opendir (...);
if (dir == NULL) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "opendir failed on %s (%s)", loc->path,
+ gf_log (this->name, GF_LOG_ERROR,
+ "opendir failed on %s (%s)", loc->path,
strerror (op_errno));
goto out;
}
@@ -367,11 +367,10 @@ sample_fop (call_frame_t *frame, xlator_t *this, ...)
if (dir) {
closedir (dir);
dir = NULL;
- }
-
+ }
+
if (pfd) {
- if (pfd->path)
- FREE (pfd->path);
+ FREE (pfd->path);
FREE (pfd);
pfd = NULL;
}
diff --git a/doc/examples/Makefile.am b/doc/examples/Makefile.am
deleted file mode 100644
index b4c93f4c9..000000000
--- a/doc/examples/Makefile.am
+++ /dev/null
@@ -1,8 +0,0 @@
-EXTRA = README unify.vol replicate.vol stripe.vol protocol-client.vol protocol-server.vol posix-locks.vol trash.vol write-behind.vol io-threads.vol io-cache.vol read-ahead.vol filter.vol trace.vol
-EXTRA_DIST = $(EXTRA)
-
-docdir = $(datadir)/doc/$(PACKAGE_NAME)
-Examplesdir = $(docdir)/examples
-Examples_DATA = $(EXTRA)
-
-CLEANFILES =
diff --git a/doc/examples/README b/doc/examples/README
deleted file mode 100644
index 4d472ac08..000000000
--- a/doc/examples/README
+++ /dev/null
@@ -1,13 +0,0 @@
-GlusterFS's translator feature is very flexible and there are quite a lot of ways one
-can configure their filesystem to behave like.
-
-Volume Specification is a way in which GlusterFS understands how it has to work, based
-on what is written there.
-
-Going through the following URLs may give you more idea about all these.
-
-* http://www.gluster.org/docs/index.php/GlusterFS
-* http://www.gluster.org/docs/index.php/GlusterFS_Volume_Specification
-* http://www.gluster.org/docs/index.php/GlusterFS_Translators
-
-Mail us any doubts, suggestions on 'gluster-devel(at)nongnu.org'
diff --git a/doc/examples/filter.vol b/doc/examples/filter.vol
deleted file mode 100644
index ca5c59837..000000000
--- a/doc/examples/filter.vol
+++ /dev/null
@@ -1,23 +0,0 @@
-volume client
- type protocol/client
- option transport-type tcp # for TCP/IP transport
- option remote-host 192.168.1.10 # IP address of the remote brick
- option remote-subvolume brick # name of the remote volume
-end-volume
-
-## In normal clustered storage type, any of the cluster translators can come here.
-#
-# Definition of other clients
-#
-# Definition of cluster translator (may be unify, afr, or unify over afr)
-#
-
-### 'Filter' translator is used on client side (or server side according to needs). This traslator makes all the below translators, (or say volumes) as read-only. Hence if one wants a 'read-only' filesystem, using filter as the top most volume will make it really fast as the fops are returned from this level itself.
-
-volume filter-ro
- type features/filter
- option root-squashing enable
-# option completely-read-only yes
-# translate-uid 1-99=0
- subvolumes client
-end-volume \ No newline at end of file
diff --git a/doc/examples/io-cache.vol b/doc/examples/io-cache.vol
deleted file mode 100644
index 5f3eca4c5..000000000
--- a/doc/examples/io-cache.vol
+++ /dev/null
@@ -1,25 +0,0 @@
-volume client
- type protocol/client
- option transport-type tcp # for TCP/IP transport
- option remote-host 192.168.1.10 # IP address of the remote brick
- option remote-subvolume brick # name of the remote volume
-end-volume
-
-## In normal clustered storage type, any of the cluster translators can come here.
-#
-# Definition of other clients
-#
-# Definition of cluster translator (may be unify, replicate, or unify over replicate)
-#
-
-### 'IO-Cache' translator is best used on client side when a filesystem has file which are not modified frequently but read several times. For example, while compiling a kernel, *.h files are read while compiling every *.c file, in these case, io-cache translator comes very handy, as it keeps the whole file content in the cache, and serves from the cache.
-# One can provide the priority of the cache too.
-
-volume ioc
- type performance/io-cache
- subvolumes client # In this example it is 'client' you may have to change it according to your spec file.
- option page-size 1MB # 128KB is default
- option cache-size 64MB # 32MB is default
- option force-revalidate-timeout 5 # 1second is default
- option priority *.html:2,*:1 # default is *:0
-end-volume
diff --git a/doc/examples/io-threads.vol b/doc/examples/io-threads.vol
deleted file mode 100644
index 9954724e1..000000000
--- a/doc/examples/io-threads.vol
+++ /dev/null
@@ -1,21 +0,0 @@
-
-volume brick
- type storage/posix # POSIX FS translator
- option directory /home/export # Export this directory
-end-volume
-
-### 'IO-threads' translator gives a threading behaviour to File I/O calls. All other normal fops are having default behaviour. Loading this on server side helps to reduce the contension of network. (Which is assumed as a GlusterFS hang).
-# One can load it in client side to reduce the latency involved in case of a slow network, when loaded below write-behind.
-volume iot
- type performance/io-threads
- subvolumes brick
- option thread-count 4 # default value is 1
-end-volume
-
-volume server
- type protocol/server
- subvolumes iot brick
- option transport-type tcp # For TCP/IP transport
- option auth.addr.brick.allow 192.168.* # Allow access to "brick" volume
- option auth.addr.iot.allow 192.168.* # Allow access to "p-locks" volume
-end-volume
diff --git a/doc/examples/posix-locks.vol b/doc/examples/posix-locks.vol
deleted file mode 100644
index b9c9e7a64..000000000
--- a/doc/examples/posix-locks.vol
+++ /dev/null
@@ -1,20 +0,0 @@
-
-volume brick
- type storage/posix # POSIX FS translator
- option directory /home/export # Export this directory
-end-volume
-
-### 'Posix-locks' feature should be added on the server side (as posix volume as subvolume) because it contains the actual file.
-volume p-locks
- type features/posix-locks
- subvolumes brick
- option mandatory on
-end-volume
-
-volume server
- type protocol/server
- subvolumes p-locks brick
- option transport-type tcp
- option auth.addr.brick.allow 192.168.* # Allow access to "brick" volume
- option auth.addr.p-locks.allow 192.168.* # Allow access to "p-locks" volume
-end-volume
diff --git a/doc/examples/protocol-client.vol b/doc/examples/protocol-client.vol
deleted file mode 100644
index 43108f2c2..000000000
--- a/doc/examples/protocol-client.vol
+++ /dev/null
@@ -1,17 +0,0 @@
-volume client
- type protocol/client
- option transport-type tcp # for TCP/IP transport
-# option transport-type ib-sdp # for Infiniband transport
- option remote-host 192.168.1.10 # IP address of the remote brick
-# option transport.socket.remote-port 6996 # default server port is 6996
-
-# option transport-type ib-verbs # for Infiniband verbs transport
-# option transport.ib-verbs.work-request-send-size 1048576
-# option transport.ib-verbs.work-request-send-count 16
-# option transport.ib-verbs.work-request-recv-size 1048576
-# option transport.ib-verbs.work-request-recv-count 16
-# option transport.ib-verbs.remote-port 6996 # default server port is 6996
-
- option remote-subvolume brick # name of the remote volume
-# option transport-timeout 30 # default value is 120seconds
-end-volume
diff --git a/doc/examples/protocol-server.vol b/doc/examples/protocol-server.vol
deleted file mode 100644
index 88477511f..000000000
--- a/doc/examples/protocol-server.vol
+++ /dev/null
@@ -1,25 +0,0 @@
-
-### Export volume "brick" with the contents of "/home/export" directory.
-volume brick
- type storage/posix # POSIX FS translator
- option directory /home/export # Export this directory
-end-volume
-
-### Add network serving capability to above brick.
-volume server
- type protocol/server
- option transport-type tcp # For TCP/IP transport
-# option transport.socket.listen-port 6996 # Default is 6996
-
-# option transport-type ib-verbs # For Infiniband Verbs transport
-# option transport.ib-verbs.work-request-send-size 131072
-# option transport.ib-verbs.work-request-send-count 64
-# option transport.ib-verbs.work-request-recv-size 131072
-# option transport.ib-verbs.work-request-recv-count 64
-# option transport.ib-verbs.listen-port 6996 # Default is 6996
-
-# option bind-address 192.168.1.10 # Default is to listen on all interfaces
-# option client-volume-filename /etc/glusterfs/glusterfs-client.vol
- subvolumes brick
- option auth.addr.brick.allow 192.168.* # Allow access to "brick" volume
-end-volume
diff --git a/doc/examples/read-ahead.vol b/doc/examples/read-ahead.vol
deleted file mode 100644
index 3ce0d95ac..000000000
--- a/doc/examples/read-ahead.vol
+++ /dev/null
@@ -1,22 +0,0 @@
-volume client
- type protocol/client
- option transport-type tcp # for TCP/IP transport
- option remote-host 192.168.1.10 # IP address of the remote brick
- option remote-subvolume brick # name of the remote volume
-end-volume
-
-## In normal clustered storage type, any of the cluster translators can come here.
-#
-# Definition of other clients
-#
-# Definition of cluster translator (may be unify, replicate, or unify over replicate)
-#
-
-### 'Read-Ahead' translator is best utilized on client side, as it prefetches the file contents when the first read() call is issued.
-volume ra
- type performance/read-ahead
- subvolumes client # In this example it is 'client' you may have to change it according to your spec file.
- option page-size 1MB # default is 256KB
- option page-count 4 # default is 2
- option force-atime-update no # defalut is 'no'
-end-volume
diff --git a/doc/examples/replicate.vol b/doc/examples/replicate.vol
deleted file mode 100644
index 8c9541444..000000000
--- a/doc/examples/replicate.vol
+++ /dev/null
@@ -1,119 +0,0 @@
-### 'NOTE'
-# This file has both server spec and client spec to get an understanding of stripe's spec file. Hence can't be used as it is, as a GlusterFS spec file.
-# One need to seperate out server spec and client spec to get it working.
-
-#=========================================================================
-
-# **** server1 spec file ****
-
-### Export volume "brick" with the contents of "/home/export" directory.
-volume posix1
- type storage/posix # POSIX FS translator
- option directory /home/export1 # Export this directory
-end-volume
-
-### Add POSIX record locking support to the storage brick
-volume brick1
- type features/posix-locks
- option mandatory on # enables mandatory locking on all files
- subvolumes posix1
-end-volume
-
-### Add network serving capability to above brick.
-volume server
- type protocol/server
- option transport-type tcp # For TCP/IP transport
- option transport.socket.listen-port 6996 # Default is 6996
-# option client-volume-filename /etc/glusterfs/glusterfs-client.vol
- subvolumes brick1
- option auth.addr.brick1.allow * # access to "brick" volume
-end-volume
-
-
-#=========================================================================
-
-# **** server2 spec file ****
-volume posix2
- type storage/posix # POSIX FS translator
- option directory /home/export2 # Export this directory
-end-volume
-
-### Add POSIX record locking support to the storage brick
-volume brick2
- type features/posix-locks
- option mandatory on # enables mandatory locking on all files
- subvolumes posix2
-end-volume
-
-### Add network serving capability to above brick.
-volume server
- type protocol/server
- option transport-type tcp # For TCP/IP transport
- option transport.socket.listen-port 6997 # Default is 6996
- subvolumes brick2
- option auth.addr.brick2.allow * # Allow access to "brick" volume
-end-volume
-
-
-#=========================================================================
-
-# **** server3 spec file ****
-
-volume posix3
- type storage/posix # POSIX FS translator
- option directory /home/export3 # Export this directory
-end-volume
-
-### Add POSIX record locking support to the storage brick
-volume brick3
- type features/posix-locks
- option mandatory on # enables mandatory locking on all files
- subvolumes posix3
-end-volume
-
-### Add network serving capability to above brick.
-volume server
- type protocol/server
- option transport-type tcp # For TCP/IP transport
- option transport.socket.listen-port 6998 # Default is 6996
- subvolumes brick3
- option auth.addr.brick3.allow * # access to "brick" volume
-end-volume
-
-
-#=========================================================================
-
-# **** Clustered Client config file ****
-
-### Add client feature and attach to remote subvolume of server1
-volume client1
- type protocol/client
- option transport-type tcp # for TCP/IP transport
- option remote-host 127.0.0.1 # IP address of the remote brick
- option transport.socket.remote-port 6996 # default server port is 6996
- option remote-subvolume brick1 # name of the remote volume
-end-volume
-
-### Add client feature and attach to remote subvolume of server2
-volume client2
- type protocol/client
- option transport-type tcp # for TCP/IP transport
- option remote-host 127.0.0.1 # IP address of the remote brick
- option transport.socket.remote-port 6997 # default server port is 6996
- option remote-subvolume brick2 # name of the remote volume
-end-volume
-
-volume client3
- type protocol/client
- option transport-type tcp # for TCP/IP transport
- option remote-host 127.0.0.1 # IP address of the remote brick
- option transport.socket.remote-port 6998 # default server port is 6996
- option remote-subvolume brick3 # name of the remote volume
-end-volume
-
-## Add replicate feature.
-volume replicate
- type cluster/replicate
- subvolumes client1 client2 client3
-end-volume
-
diff --git a/doc/examples/stripe.vol b/doc/examples/stripe.vol
deleted file mode 100644
index ea24cf860..000000000
--- a/doc/examples/stripe.vol
+++ /dev/null
@@ -1,121 +0,0 @@
-
-### 'NOTE'
-# This file has both server spec and client spec to get an understanding of stripe's spec file. Hence can't be used as it is, as a GlusterFS spec file.
-# One need to seperate out server spec and client spec to get it working.
-
-#=========================================================================
-
-# **** server1 spec file ****
-
-### Export volume "brick" with the contents of "/home/export" directory.
-volume posix1
- type storage/posix # POSIX FS translator
- option directory /home/export1 # Export this directory
-end-volume
-
-### Add POSIX record locking support to the storage brick
-volume brick1
- type features/posix-locks
- option mandatory on # enables mandatory locking on all files
- subvolumes posix1
-end-volume
-
-### Add network serving capability to above brick.
-volume server
- type protocol/server
- option transport-type tcp # For TCP/IP transport
- option transport.socket.listen-port 6996 # Default is 6996
-# option client-volume-filename /etc/glusterfs/glusterfs-client.vol
- subvolumes brick1
- option auth.addr.brick1.allow * # access to "brick" volume
-end-volume
-
-
-#=========================================================================
-
-# **** server2 spec file ****
-volume posix2
- type storage/posix # POSIX FS translator
- option directory /home/export2 # Export this directory
-end-volume
-
-### Add POSIX record locking support to the storage brick
-volume brick2
- type features/posix-locks
- option mandatory on # enables mandatory locking on all files
- subvolumes posix2
-end-volume
-
-### Add network serving capability to above brick.
-volume server
- type protocol/server
- option transport-type tcp # For TCP/IP transport
- option transport.socket.listen-port 6997 # Default is 6996
- subvolumes brick2
- option auth.addr.brick2.allow * # Allow access to "brick" volume
-end-volume
-
-
-#=========================================================================
-
-# **** server3 spec file ****
-
-volume posix3
- type storage/posix # POSIX FS translator
- option directory /home/export3 # Export this directory
-end-volume
-
-### Add POSIX record locking support to the storage brick
-volume brick3
- type features/posix-locks
- option mandatory on # enables mandatory locking on all files
- subvolumes posix3
-end-volume
-
-### Add network serving capability to above brick.
-volume server
- type protocol/server
- option transport-type tcp # For TCP/IP transport
- option transport.socket.listen-port 6998 # Default is 6996
- subvolumes brick3
- option auth.addr.brick3.allow * # access to "brick" volume
-end-volume
-
-
-#=========================================================================
-
-# **** Clustered Client config file ****
-
-### Add client feature and attach to remote subvolume of server1
-volume client1
- type protocol/client
- option transport-type tcp # for TCP/IP transport
- option remote-host 127.0.0.1 # IP address of the remote brick
- option transport.socket.remote-port 6996 # default server port is 6996
- option remote-subvolume brick1 # name of the remote volume
-end-volume
-
-### Add client feature and attach to remote subvolume of server2
-volume client2
- type protocol/client
- option transport-type tcp # for TCP/IP transport
- option remote-host 127.0.0.1 # IP address of the remote brick
- option transport.socket.remote-port 6997 # default server port is 6996
- option remote-subvolume brick2 # name of the remote volume
-end-volume
-
-volume client3
- type protocol/client
- option transport-type tcp # for TCP/IP transport
- option remote-host 127.0.0.1 # IP address of the remote brick
- option transport.socket.remote-port 6998 # default server port is 6996
- option remote-subvolume brick3 # name of the remote volume
-end-volume
-
-## Add Stripe Feature.
-volume stripe
- type cluster/stripe
- subvolumes client1 client2 client3
- option block-size 1MB
-end-volume
-
diff --git a/doc/examples/trace.vol b/doc/examples/trace.vol
deleted file mode 100644
index 3f4864db4..000000000
--- a/doc/examples/trace.vol
+++ /dev/null
@@ -1,16 +0,0 @@
-volume client
- type protocol/client
- option transport-type tcp # for TCP/IP transport
- option remote-host 192.168.1.10 # IP address of the remote brick
- option remote-subvolume brick # name of the remote volume
-end-volume
-
-### 'Trace' translator is a very handy debug tool for GlusterFS, as it can be loaded between any of the two volumes without changing the behaviour of the filesystem.
-# On client side it can be the top most volume in spec (like now) to understand what calls are made on FUSE filesystem, when a mounted filesystem is accessed.
-
-volume trace
- type debug/trace
- subvolumes client
-end-volume
-
-# 'NOTE:' By loading 'debug/trace' translator, filesystem will be very slow as it logs each and every calls to the log file.
diff --git a/doc/examples/trash.vol b/doc/examples/trash.vol
deleted file mode 100644
index 16e71be32..000000000
--- a/doc/examples/trash.vol
+++ /dev/null
@@ -1,20 +0,0 @@
-
-volume brick
- type storage/posix # POSIX FS translator
- option directory /home/export # Export this directory
-end-volume
-
-### 'Trash' translator is best used on server side as it just renames the deleted file inside 'trash-dir', and it makes 4 seperate fops for one unlink call.
-volume trashcan
- type features/trash
- subvolumes brick
- option trash-dir /.trashcan
-end-volume
-
-volume server
- type protocol/server
- subvolumes trashcan brick
- option transport-type tcp # For TCP/IP transport
- option auth.addr.brick.allow 192.168.* # Allow access to "brick" volume
- option auth.addr.trashcan.allow 192.168.* # Allow access to "p-locks" volume
-end-volume
diff --git a/doc/examples/unify.vol b/doc/examples/unify.vol
deleted file mode 100644
index 4f7415a23..000000000
--- a/doc/examples/unify.vol
+++ /dev/null
@@ -1,178 +0,0 @@
-### 'NOTE'
-# This file has both server spec and client spec to get an understanding of stripe's spec file. Hence can't be used as it is, as a GlusterFS spec file.
-# One need to seperate out server spec and client spec to get it working.
-
-
-#=========================================================================
-
-# **** server1 spec file ****
-
-### Export volume "brick" with the contents of "/home/export" directory.
-volume posix1
- type storage/posix # POSIX FS translator
- option directory /home/export1 # Export this directory
-end-volume
-
-### Add POSIX record locking support to the storage brick
-volume brick1
- type features/posix-locks
- option mandatory on # enables mandatory locking on all files
- subvolumes posix1
-end-volume
-
-### Add network serving capability to above brick.
-volume server
- type protocol/server
- option transport-type tcp # For TCP/IP transport
- option transport.socket.listen-port 6996 # Default is 6996
-# option client-volume-filename /etc/glusterfs/glusterfs-client.vol
- subvolumes brick1
- option auth.addr.brick1.allow * # access to "brick" volume
-end-volume
-
-
-#=========================================================================
-
-# **** server2 spec file ****
-volume posix2
- type storage/posix # POSIX FS translator
- option directory /home/export2 # Export this directory
-end-volume
-
-### Add POSIX record locking support to the storage brick
-volume brick2
- type features/posix-locks
- option mandatory on # enables mandatory locking on all files
- subvolumes posix2
-end-volume
-
-### Add network serving capability to above brick.
-volume server
- type protocol/server
- option transport-type tcp # For TCP/IP transport
- option transport.socket.listen-port 6997 # Default is 6996
- subvolumes brick2
- option auth.addr.brick2.allow * # Allow access to "brick" volume
-end-volume
-
-
-#=========================================================================
-
-# **** server3 spec file ****
-
-volume posix3
- type storage/posix # POSIX FS translator
- option directory /home/export3 # Export this directory
-end-volume
-
-### Add POSIX record locking support to the storage brick
-volume brick3
- type features/posix-locks
- option mandatory on # enables mandatory locking on all files
- subvolumes posix3
-end-volume
-
-### Add network serving capability to above brick.
-volume server
- type protocol/server
- option transport-type tcp # For TCP/IP transport
- option transport.socket.listen-port 6998 # Default is 6996
- subvolumes brick3
- option auth.addr.brick3.allow * # access to "brick" volume
-end-volume
-
-#=========================================================================
-
-# *** server for namespace ***
-### Export volume "brick" with the contents of "/home/export" directory.
-volume brick-ns
- type storage/posix # POSIX FS translator
- option directory /home/export-ns # Export this directory
-end-volume
-
-volume server
- type protocol/server
- option transport-type tcp # For TCP/IP transport
- option transport.socket.listen-port 6999 # Default is 6996
- subvolumes brick-ns
- option auth.addr.brick-ns.allow * # access to "brick" volume
-end-volume
-
-
-#=========================================================================
-
-# **** Clustered Client config file ****
-
-### Add client feature and attach to remote subvolume of server1
-volume client1
- type protocol/client
- option transport-type tcp # for TCP/IP transport
-# option transport-type ib-sdp # for Infiniband transport
- option remote-host 127.0.0.1 # IP address of the remote brick
- option transport.socket.remote-port 6996 # default server port is 6996
- option remote-subvolume brick1 # name of the remote volume
-end-volume
-
-### Add client feature and attach to remote subvolume of server2
-volume client2
- type protocol/client
- option transport-type tcp # for TCP/IP transport
-# option transport-type ib-sdp # for Infiniband transport
- option remote-host 127.0.0.1 # IP address of the remote brick
- option transport.socket.remote-port 6997 # default server port is 6996
- option remote-subvolume brick2 # name of the remote volume
-end-volume
-
-volume client3
- type protocol/client
- option transport-type tcp # for TCP/IP transport
-# option transport-type ib-sdp # for Infiniband transport
- option remote-host 127.0.0.1 # IP address of the remote brick
- option transport.socket.remote-port 6998 # default server port is 6996
- option remote-subvolume brick3 # name of the remote volume
-end-volume
-
-
-volume client-ns
- type protocol/client
- option transport-type tcp # for TCP/IP transport
-# option transport-type ib-sdp # for Infiniband transport
- option remote-host 127.0.0.1 # IP address of the remote brick
- option transport.socket.remote-port 6999 # default server port is 6996
- option remote-subvolume brick-ns # name of the remote volume
-end-volume
-
-### Add unify feature to cluster the servers. Associate an
-### appropriate scheduler that matches your I/O demand.
-volume bricks
- type cluster/unify
- option namespace client-ns # this will not be storage child of unify.
- subvolumes client1 client2 client3
-### ** ALU Scheduler Option **
- option self-heal background # foreground off # default is foreground
- option scheduler alu
- option alu.limits.min-free-disk 5% #%
- option alu.limits.max-open-files 10000
- option alu.order disk-usage:read-usage:write-usage:open-files-usage:disk-speed-usage
- option alu.disk-usage.entry-threshold 2GB
- option alu.disk-usage.exit-threshold 128MB
- option alu.open-files-usage.entry-threshold 1024
- option alu.open-files-usage.exit-threshold 32
- option alu.read-usage.entry-threshold 20 #%
- option alu.read-usage.exit-threshold 4 #%
- option alu.write-usage.entry-threshold 20 #%
- option alu.write-usage.exit-threshold 4 #%
- option alu.disk-speed-usage.entry-threshold 0 # DO NOT SET IT. SPEED IS CONSTANT!!!.
- option alu.disk-speed-usage.exit-threshold 0 # DO NOT SET IT. SPEED IS CONSTANT!!!.
- option alu.stat-refresh.interval 10sec
- option alu.stat-refresh.num-file-create 10
-### ** Random Scheduler **
-# option scheduler random
-### ** NUFA Scheduler **
-# option scheduler nufa
-# option nufa.local-volume-name posix1
-### ** Round Robin (RR) Scheduler **
-# option scheduler rr
-# option rr.limits.min-free-disk 5% #%
-end-volume
-
diff --git a/doc/examples/write-behind.vol b/doc/examples/write-behind.vol
deleted file mode 100644
index 9c6bae11c..000000000
--- a/doc/examples/write-behind.vol
+++ /dev/null
@@ -1,26 +0,0 @@
-volume client
- type protocol/client
- option transport-type tcp # for TCP/IP transport
- option remote-host 192.168.1.10 # IP address of the remote brick
- option remote-subvolume brick # name of the remote volume
-end-volume
-
-## In normal clustered storage type, any of the cluster translators can come here.
-#
-# Definition of other clients
-#
-# Definition of cluster translator (may be unify, replicate, or unify over replicate)
-#
-
-
-### 'Write-behind' translator is a performance booster for write operation. Best used on client side, as its main intension is to reduce the network latency caused for each write operation.
-
-volume wb
- type performance/write-behind
- subvolumes client # In this example it is 'client' you may have to change it according to your spec file.
- option flush-behind on # default value is 'off'
- option window-size 2MB
- option aggregate-size 1MB # default value is 0
- option enable_O_SYNC no # default is no
- option disable-for-first-nbytes 128KB #default is 1
-end-volume
diff --git a/doc/features/rdma-cm-in-3.4.0.txt b/doc/features/rdma-cm-in-3.4.0.txt
new file mode 100644
index 000000000..fd953e56b
--- /dev/null
+++ b/doc/features/rdma-cm-in-3.4.0.txt
@@ -0,0 +1,9 @@
+Following is the impact of http://review.gluster.org/#change,149.
+
+New userspace packages needed:
+librdmacm
+librdmacm-devel
+
+rdmacm needs an IPoIB address for connection establishment. This requirement results in following issues:
+* Because of bug #890502, we've to probe the peer on an IPoIB address. This imposes a restriction that all volumes created in the future have to communicate over IPoIB address (irrespective of whether they use gluster's tcp or rdma transport).
+* Currently client has an independence to choose b/w tcp and rdma transports while communicating with the server (by creating volumes with transport-type tcp,rdma). This independence was a byproduct of our ability use the normal channel used with transport-type tcp for rdma connectiion establishment handshake too. However, with new requirement of IPoIB address for connection establishment, we loose this independence (till we bring in multi-network support - where a brick can be identified by a set of ip-addresses and we can choose different pairs of ip-addresses for communication based on our requirements - in glusterd).
diff --git a/doc/features/rebalance.md b/doc/features/rebalance.md
new file mode 100644
index 000000000..29b993008
--- /dev/null
+++ b/doc/features/rebalance.md
@@ -0,0 +1,74 @@
+## Background
+
+
+For a more detailed description, view Jeff Darcy's blog post [here]
+(http://hekafs.org/index.php/2012/03/glusterfs-algorithms-distribution/)
+
+GlusterFS uses the distribute translator (DHT) to aggregate space of multiple servers. DHT distributes files among its subvolumes using a consistent hashing method providing 32-bit hashes. Each DHT subvolume is given a range in the 32-bit hash space. A hash value is calculated for every file using a combination of its name. The file is then placed in the subvolume with the hash range that contains the hash value.
+
+## What is rebalance?
+
+The rebalance process migrates files between the DHT subvolumes when necessary.
+
+## When is rebalance required?
+
+Rebalancing is required for two main cases.
+
+1. Addition/Removal of bricks
+
+2. Renaming of a file
+
+## Addition/Removal of bricks
+
+Whenever the number or order of DHT subvolumes change, the hash range given to each subvolume is recalculated. When this happens, already existing files on the volume will need to be moved to the correct subvolume based on their hash. Rebalance does this activity.
+
+Addition of bricks which increase the size of a volume will increase the number of DHT subvolumes and lead to recalculation of hash ranges (This doesn't happen when bricks are added to a volume to increase redundancy, i.e. increase replica count of a volume). This will require an explicit rebalance command to be issued to migrate the files.
+
+Removal of bricks which decrease the size of a volumes also causes the hash ranges of DHT to be recalculated. But we don't need to issue an explicit rebalance command in this case, as rebalance is done automatically by the remove-brick process if needed.
+
+## Renaming of a file
+
+Renaming of file will cause its hash to change. The file now needs to be moved to the correct subvolume based on its new hash. Rebalance does this.
+
+## How does rebalance work?
+
+At a high level, the rebalance process consists of the following 3 steps:
+
+1. Crawl the volume to access all files
+2. Calculate the hash for the file
+3. If needed move the migrate the file to the correct subvolume.
+
+
+The rebalance process has been optimized by making it distributed across the trusted storage pool. With distributed rebalance, a rebalance process is launched on each peer in the cluster. Each rebalance process will crawl files on only those bricks of the volume which are present on it, and migrate the files which need migration to the correct brick. This speeds up the rebalance process considerably.
+
+## What will happen if rebalance is not run?
+
+### Addition of bricks
+
+With the current implementation of add-brick, when the size of a volume is augmented by adding new bricks, the new bricks are not put into use immediately i.e., the hash ranges there not recalculated immediately. This means that the files will still be placed only onto the existing bricks, leaving the newly added storage space unused. Starting a rebalance process on the volume will cause the hash ranges to be recalculated with the new bricks included, which allows the newly added storage space to be used.
+
+### Renaming a file
+
+When a file rename causes the file to be hashed to a new subvolume, DHT writes a link file on the new subvolume leaving the actual file on the original subvolume. A link file is an empty file, which has an extended attribute set that points to the subvolume on which the actual file exists. So, when a client accesses the renamed file, DHT first looks for the file in the hashed subvolume and gets the link file. DHT understands the link file, and gets the actual file from the subvolume pointed to by the link file. This leads to a slight reduction in performance. A rebalance will move the actual file to the hashed subvolume, allowing clients to access the file directly once again.
+
+## Are clients affected during a rebalance process?
+
+The rebalance process is transparent to applications on the clients. Applications which have open files on the volume will not be affected by the rebalance process, even if the open file requires migration. The DHT translator on the client will hide the migration from the applications.
+
+##How are open files migrated?
+
+(A more technical description of the algorithm used can be seen in the commit message of commit a07bb18c8adeb8597f62095c5d1361c5bad01f09.)
+
+To achieve migration of open files, two things need to be assured of,
+a) any writes or changes happening to the file during migration are correctly synced to destination subvolume after the migration is complete.
+b) any further changes should be made to the destination subvolume
+
+Both of these requirements require sending notificatoins to clients. Clients are notified by overloading an attribute used in every callback functions. DHT understands these attributes in the callbacks and can be notified if a file is being migrated or not.
+
+During rebalance, a file will be in two phases
+
+1. Migration in process - In this phase the file is being migrated by the rebalance process from the source subvolume to the destination subvolume. The rebalance process will set a 'in-migration' attribute on the file, which will notify the clients' DHT translator. The clients' DHT translator will then take care to send any further changes to the destination subvolume as well. This way we satisfy the first requirement
+
+2. Migration completed - Once the file has been migrated, the rebalance process will set a 'migration-complete' attribute on the file. The clients will be notified of the completion and all further operations on the file will happen on the destination subvolume.
+
+The DHT translator handles the above and allows the applications on the clients to continue working on a file under migration.
diff --git a/doc/gluster.8 b/doc/gluster.8
new file mode 100644
index 000000000..3c78fb8b1
--- /dev/null
+++ b/doc/gluster.8
@@ -0,0 +1,125 @@
+
+.\" Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+.\" This file is part of GlusterFS.
+.\"
+.\" This file is licensed to you under your choice of the GNU Lesser
+.\" General Public License, version 3 or any later version (LGPLv3 or
+.\" later), or the GNU General Public License, version 2 (GPLv2), in all
+.\" cases as published by the Free Software Foundation.
+.\"
+.\"
+.TH Gluster 8 "Gluster command line utility" "07 March 2011" "Gluster Inc."
+.SH NAME
+gluster - Gluster Console Manager (command line utility)
+.SH SYNOPSIS
+.B gluster
+.PP
+To run the program and display gluster prompt:
+.PP
+.B gluster
+.PP
+(or)
+.PP
+To specify a command directly:
+.PP
+.B gluster
+.I [commands] [options]
+
+.SH DESCRIPTION
+The Gluster Console Manager is a command line utility for elastic volume management. You can run the gluster command on any export server. The command enables administrators to perform cloud operations, such as creating, expanding, shrinking, rebalancing, and migrating volumes without needing to schedule server downtime.
+.SH COMMANDS
+
+.SS "Volume Commands"
+.PP
+.TP
+
+\fB\ volume info [all|<VOLNAME>] \fR
+Display information about all volumes, or the specified volume.
+.TP
+\fB\ volume create <NEW-VOLNAME> [stripe <COUNT>] [replica <COUNT>] [transport <tcp|rdma|tcp,rdma>] <NEW-BRICK> ... \fR
+Create a new volume of the specified type using the specified bricks and transport type (the default transport type is tcp).
+To create a volume with both transports (tcp and rdma), give 'transport tcp,rdma' as an option.
+.TP
+\fB\ volume delete <VOLNAME> \fR
+Delete the specified volume.
+.TP
+\fB\ volume start <VOLNAME> \fR
+Start the specified volume.
+.TP
+\fB\ volume stop <VOLNAME> [force] \fR
+Stop the specified volume.
+.TP
+\fB\ volume rename <VOLNAME> <NEW-VOLNAME> \fR
+Rename the specified volume.
+.TP
+\fB\ volume set <VOLNAME> <OPTION> <PARAMETER> [<OPTION> <PARAMETER>] ... \fR
+Set the volume options.
+.TP
+\fB\ volume help \fR
+Display help for the volume command.
+.SS "Brick Commands"
+.PP
+.TP
+\fB\ volume add-brick <VOLNAME> <NEW-BRICK> ... \fR
+Add the specified brick to the specified volume.
+.TP
+\fB\ volume remove-brick <VOLNAME> <BRICK> ... \fR
+Remove the specified brick from the specified volume.
+.IP
+.B Note:
+If you remove the brick, the data stored in that brick will not be available. You can migrate data from one brick to another using
+.B replace-brick
+option.
+.TP
+\fB\ volume rebalance-brick <VOLNAME>(<BRICK> <NEW-BRICK>) start \fR
+Start rebalancing the specified volume.
+.TP
+\fB\ volume rebalance <VOLNAME> stop \fR
+Stop rebalancing the specified volume.
+.TP
+\fB\ volume rebalance <VOLNAME> status \fR
+Display the rebalance status of the specified volume.
+.TP
+\fB\ volume replace-brick <VOLNAME> (<BRICK> <NEW-BRICK>) start|pause|abort|status|commit \fR
+Replace the specified brick.
+.SS "Log Commands"
+.TP
+\fB\ volume log filename <VOLNAME> [BRICK] <DIRECTORY> \fB
+Set the log directory for the corresponding volume/brick.
+.TP
+\fB\ volume log locate <VOLNAME> [BRICK] \fB
+Locate the log file for corresponding volume/brick.
+.TP
+\fB\ volume log rotate <VOLNAME> [BRICK] \fB
+Rotate the log file for corresponding volume/brick.
+.SS "Peer Commands"
+.TP
+\fB\ peer probe <HOSTNAME> \fR
+Probe the specified peer.
+.TP
+\fB\ peer detach <HOSTNAME> \fR
+Detach the specified peer.
+.TP
+\fB\ peer status \fR
+Display the status of peers.
+.TP
+\fB\ peer help \fR
+Display help for the peer command.
+.SS "Other Commands"
+.TP
+\fB\ help \fR
+Display the command options.
+.TP
+\fB\ quit \fR
+Exit the gluster command line interface.
+
+.SH FILES
+/var/lib/glusterd/*
+.SH SEE ALSO
+.nf
+\fBfusermount\fR(1), \fBmount.glusterfs\fR(8), \fBglusterfs\fR(8), \fBglusterd\fR(8)
+\fR
+.fi
+.SH COPYRIGHT
+.nf
+Copyright(c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
diff --git a/doc/glusterd.8 b/doc/glusterd.8
new file mode 100644
index 000000000..04a43481e
--- /dev/null
+++ b/doc/glusterd.8
@@ -0,0 +1,64 @@
+.\"
+.\" Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+.\" This file is part of GlusterFS.
+.\"
+.\" This file is licensed to you under your choice of the GNU Lesser
+.\" General Public License, version 3 or any later version (LGPLv3 or
+.\" later), or the GNU General Public License, version 2 (GPLv2), in all
+.\" cases as published by the Free Software Foundation.
+.\"
+.\"
+
+.TH Glusterd 8 "Gluster elastic volume management daemon" "07 March 2011" "Gluster Inc."
+.SH NAME
+Glusterd \- Gluster elastic volume management daemon
+.SH SYNOPSIS
+.B glusterd
+.I [OPTION...]
+.SH DESCRIPTION
+The glusterd daemon is used for elastic volume management. The daemon must be run on all export servers.
+
+.SH OPTIONS
+
+.SS "Basic options"
+.PP
+.TP
+
+\fB\-l <LOGFILE>, \fB\-\-log\-file=<LOGFILE>\fR
+File to use for logging.
+.TP
+\fB\-L <LOGLEVEL>, \fB\-\-log\-level=<LOGLEVEL>\fR
+Logging severity. Valid options are TRACE, DEBUG, INFO, WARNING, ERROR and CRITICAL (the default is INFO).
+.TP
+\fB\-\-debug\fR
+Run the program in debug mode. This option sets \fB\-\-no\-daemon\fR, \fB\-\-log\-level\fR to DEBUG
+and \fB\-\-log\-file\fR to console.
+.TP
+\fB\-N, \fB\-\-no\-daemon\fR
+Run the program in the foreground.
+
+.SS "Miscellaneous Options:"
+.TP
+\fB\-?, \fB\-\-help\fR
+Display this help.
+.TP
+\fB\-\-usage\fR
+Display a short usage message.
+.TP
+\fB\-V, \fB\-\-version\fR
+Print the program version.
+
+.PP
+.SH FILES
+/var/lib/glusterd/*
+
+.SH SEE ALSO
+.nf
+\fBfusermount\fR(1), \fBmount.glusterfs\fR(8), \fBglusterfs\fR(8), \fBgluster\fR(8)
+\fR
+.fi
+.SH COPYRIGHT
+.nf
+Copyright(c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+\fR
+.fi
diff --git a/doc/glusterfs.8 b/doc/glusterfs.8
index f7b4dbfc6..60ad5709b 100644
--- a/doc/glusterfs.8
+++ b/doc/glusterfs.8
@@ -1,138 +1,164 @@
-.\" Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+.\" Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
.\" This file is part of GlusterFS.
.\"
-.\" GlusterFS is free software; you can redistribute it and/or modify
-.\" it under the terms of the GNU General Public License as published
-.\" by the Free Software Foundation; either version 3 of the License,
-.\" or (at your option) any later version.
+.\" This file is licensed to you under your choice of the GNU Lesser
+.\" General Public License, version 3 or any later version (LGPLv3 or
+.\" later), or the GNU General Public License, version 2 (GPLv2), in all
+.\" cases as published by the Free Software Foundation.
.\"
-.\" GlusterFS is distributed in the hope that it will be useful, but
-.\" WITHOUT ANY WARRANTY; without even the implied warranty of
-.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-.\" General Public License for more details.
.\"
-.\" You should have received a copy of the GNU General Public License
-.\" long with this program. If not, see
-.\" <http://www.gnu.org/licenses/>.
.\"
-.\"
-.\"
-.TH GlusterFS 8 "Cluster Filesystem" "07 December 2008" "Z Research Inc."
+.TH GlusterFS 8 "Clustered File System" "07 March 2011" "Gluster Inc."
.SH NAME
-GlusterFS \- Clustered Filesystem.
-.SH SYNOPSYS
-.B glusterfs
+GlusterFS \- clustered file system
+.SH SYNOPSIS
+.B glusterfs
.I [options] [mountpoint]
.PP
.SH DESCRIPTION
-GlusterFS is a clustered file-system capable of scaling to several peta-bytes. It aggregates various storage bricks over Infiniband RDMA or TCP/IP interconnect into one large parallel network file system. Storage bricks can be made of any commodity hardware such as x86-64 server with SATA-II RAID and Infiniband HBA.
-GlusterFS is fully POSIX compliant FileSystem. On client side, it has dependency on FUSE package, on server side, it works seemlessly on different OSes. (Currently supported on GNU/Linux, Mac OSX, FreeBSD, OpenSolaris).
+GlusterFS is a clustered file system, capable of scaling to several peta-bytes.
+It aggregates various storage bricks over Infiniband RDMA or TCP/IP and
+interconnect into one large parallel network file system. Storage bricks can
+be made of any commodity hardware, such as x86-64 server with SATA-II RAID and
+Infiniband HBA.
+
+GlusterFS is fully POSIX compliant file system. On client side, it has dependency
+on FUSE package, on server side, it works seemlessly on different operating systems.
+Currently supported on GNU/Linux and Solaris.
+
.SH OPTIONS
-.PP
-Mandatory or optional arguments to long options are also mandatory or optional
-for any corresponding short options.
+
.SS "Basic options"
.PP
-.TP
-
+.TP
\fB\-f, \fB\-\-volfile=VOLUME-FILE\fR
-File to use as VOLUME-FILE [default:/etc/glusterfs/glusterfs.vol]
+File to use as VOLUME-FILE.
.TP
\fB\-l, \fB\-\-log\-file=LOGFILE\fR
-File to use for logging [default:/var/log/glusterfs/glusterfs.log]
+File to use for logging (the default is <INSTALL-DIR>/var/log/glusterfs/<MOUNT-POINT>.log).
.TP
-\fB\-L, \fB\-\-log\-level=LOGLEVEL\fR
-Logging severity. Valid options are DEBUG, WARNING, ERROR, CRITICAL
-and NONE [default: WARNING]
+\fB\-L, \fB\-\-log\-level=LOGLEVEL\fR
+Logging severity. Valid options are TRACE, DEBUG, INFO, WARNING, ERROR and CRITICAL (the default is INFO).
.TP
\fB\-s, \fB\-\-volfile\-server=SERVER\fR
-Server to get the volume from. This option overrides \fB\-\-volfile option
+Server to get the volume from. This option overrides \fB\-\-volfile \fR option.
.TP
-\fB\-\-log\-server=LOG\-SERVER\fR
-Server to use as the central log server.
+\fB\-\-volfile\-max\-fetch\-attempts=MAX\-ATTEMPTS\fR
+Maximum number of connect attempts to server. This option should be provided with
+\fB\-\-volfile\-server\fR option (the default is 1).
.SS "Advanced options"
.PP
.TP
-
+\fB\-\-acl\fR
+Mount the filesystem with POSIX ACL support.
+.TP
\fB\-\-debug\fR
-Run in debug mode. This option sets \fB\-\-no\-daemon\fR, \fB\-\-log\-level\fR to DEBUG
-and \fB\-\-log\-file\fR to console
+Run in debug mode. This option sets \fB\-\-no\-daemon\fR, \fB\-\-log\-level\fR to DEBUG,
+and \fB\-\-log\-file\fR to console.
+.TP
+\fB\-\-enable\-ino32=BOOL\fR
+Use 32-bit inodes when mounting to workaround application that doesn't support 64-bit inodes.
+.TP
+\fB\-\-fopen\-keep\-cache\fR
+Do not purge the cache on file open.
+.TP
+\fB\-\-mac\-compat=BOOL\fR
+Provide stubs for attributes needed for seamless operation on Macs (the default is off).
.TP
\fB\-N, \fB\-\-no\-daemon\fR
-Run in foreground
+Run in the foreground.
.TP
\fB\-p, \fB\-\-pid\-file=PIDFILE\fR
-File to use as pid file
+File to use as PID file.
+.TP
+\fB\-\-read\-only\fR
+Mount the file system in 'read-only' mode.
+.TP
+\fB\-\-selinux\fR
+Enable SELinux label (extended attributes) support on inodes.
+.TP
+\fB\-S, \fB\-\-socket\-file=SOCKFILE\fR
+File to use as unix-socket.
.TP
\fB\-\-volfile\-id=KEY\fR
-KEY of the volume file to be fetched from server
+Key of the volume file to be fetched from the server.
.TP
\fB\-\-volfile\-server\-port=PORT\fR
-Port number of volfile server
+Port number of volfile server.
.TP
\fB\-\-volfile\-server\-transport=TRANSPORT\fR
-Transport type to get volume file from server [default: socket]
+Transport type to get volume file from server (the default is tcp).
.TP
\fB\-\-volume\-name=VOLUME\-NAME\fR
-Volume name to be used for MOUNT-POINT [default: top most volume in
-VOLUME-FILE]
+Volume name to be used for MOUNT-POINT (the default is top most volume in VOLUME-FILE).
.TP
-\fB\-\-xlator\-option=VOLUME\-NAME.OPTION=VALUE\fR
-Add/override a translator option for a volume with the specified value
+\fB\-\-worm\fR
+Mount the filesystem in 'worm' mode.
.TP
-\fB\-\-log\-server\-port=PORT\fR
-Listening port number of log server
+\fB\-\-xlator\-option=VOLUME\-NAME.OPTION=VALUE\fR
+Add/Override a translator option for a volume with the specified value.
.SS "Fuse options"
.PP
.TP
\fB\-\-attribute\-timeout=SECONDS\fR
-Set attribute timeout to SECONDS for inodes in fuse kernel module [default: 1]
+Set attribute timeout to SECONDS for inodes in fuse kernel module (the default is 1).
+.TP
+\fB\-\-background\-qlen=N\fR
+Set fuse module's background queue length to N (the default is 64).
+.TP
+\fB\-\-congestion\-threshold=N\fR
+Set fuse module's congestion threshold to N (the default is 48).
+.TP
+\fB\-\-direct\-io\-mode=BOOL\fR
+Enable/Disable the direct-I/O mode in fuse module (the default is enable).
+.TP
+\fB\-\-dump-fuse=PATH\f\R
+Dump fuse traffic to PATH
.TP
\fB\-\-entry\-timeout=SECONDS\fR
-Set entry timeout to SECONDS in fuse kernel module [default: 1]
+Set entry timeout to SECONDS in fuse kernel module (the default is 1).
+.TP
+\fB\-\-gid\-timeout=SECONDS\fR
+Set auxilary group list timeout to SECONDS for fuse translator (the default is 0).
.TP
-\fB\-\-disable\-direct\-io\-mode\fR
-Disable direct I/O mode in fuse kernel module
+\fB\-\-negative\-timeout=SECONDS\fR
+Set negative timeout to SECONDS in fuse kernel module (the default is 0).
+.TP
+\fB\-\-volfile-check\fR
+Enable strict volume file checking.
.SS "Miscellaneous Options"
.PP
.TP
\fB\-?, \fB\-\-help\fR
-Give this help list
+Display this help.
.TP
\fB\-\-usage\fR
-Give a short usage message
+Display a short usage message.
.TP
\fB\-V, \fB\-\-version\fR
-Print program version
+Print the program version.
.PP
.SH FILES
-/etc/glusterfs/*.vol
+/var/lib/glusterd/vols/*/*.vol
+.SH EXAMPLES
+mount a volume named foo on server bar with log level DEBUG on mount point
+/mnt/foo
-.SH SEE ALSO
-.nf
-The full documentation for \fBGlusterFS\fR is maintained as a Texinfo manual.
-If the \fBinfo\fR and \fBglusterfs\fR are properly installed on your site, the command
- \fBinfo glusterfs\fR
-should give you access to complete documentation.
+# glusterfs \-\-log\-level=DEBUG \-\-volfile\-id=foo \-\-volfile\-server=bar /mnt/foo
+.SH SEE ALSO
.nf
-\fBbison\fR(1) \fBflex\fR(1) \fBfusermount\fR(1)
-\fBhttp://www.glusterfs.org/ <URL:http://www.glusterfs.org/>
-\fR
-.fi
-.SH AUTHORS
-.nf
-\fBhttp://www.gluster.org/core-team.php <URL:http://www.gluster.org/core-team.php>
+\fBfusermount\fR(1), \fBmount.glusterfs\fR(8), \fBgluster\fR(8)
\fR
.fi
.SH COPYRIGHT
.nf
-\fBCopyright(c)2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+Copyright(c) 2006-2011 Red Hat, Inc. <http://www.redhat.com>
\fR
.fi
diff --git a/doc/glusterfs.vol.sample b/doc/glusterfs.vol.sample
deleted file mode 100644
index e126f66b3..000000000
--- a/doc/glusterfs.vol.sample
+++ /dev/null
@@ -1,61 +0,0 @@
-### file: client-volume.vol.sample
-
-#####################################
-### GlusterFS Client Volume File ##
-#####################################
-
-#### CONFIG FILE RULES:
-### "#" is comment character.
-### - Config file is case sensitive
-### - Options within a volume block can be in any order.
-### - Spaces or tabs are used as delimitter within a line.
-### - Each option should end within a line.
-### - Missing or commented fields will assume default values.
-### - Blank/commented lines are allowed.
-### - Sub-volumes should already be defined above before referring.
-
-### Add client feature and attach to remote subvolume
-volume client
- type protocol/client
- option transport-type tcp
-# option transport-type unix
-# option transport-type ib-sdp
- option remote-host 127.0.0.1 # IP address of the remote brick
-# option transport.socket.remote-port 6996 # default server port is 6996
-
-# option transport-type ib-verbs
-# option transport.ib-verbs.remote-port 6996 # default server port is 6996
-# option transport.ib-verbs.work-request-send-size 1048576
-# option transport.ib-verbs.work-request-send-count 16
-# option transport.ib-verbs.work-request-recv-size 1048576
-# option transport.ib-verbs.work-request-recv-count 16
-
-# option transport-timeout 30 # seconds to wait for a reply
- # from server for each request
- option remote-subvolume brick # name of the remote volume
-end-volume
-
-### Add readahead feature
-#volume readahead
-# type performance/read-ahead
-# option page-size 1MB # unit in bytes
-# option page-count 2 # cache per file = (page-count x page-size)
-# subvolumes client
-#end-volume
-
-### Add IO-Cache feature
-#volume iocache
-# type performance/io-cache
-# option page-size 256KB
-# option page-count 2
-# subvolumes readahead
-#end-volume
-
-### Add writeback feature
-#volume writeback
-# type performance/write-behind
-# option aggregate-size 1MB
-# option window-size 2MB
-# option flush-behind off
-# subvolumes iocache
-#end-volume
diff --git a/doc/glusterfsd.8 b/doc/glusterfsd.8
new file mode 100644
index 000000000..176d04236
--- /dev/null
+++ b/doc/glusterfsd.8
@@ -0,0 +1,136 @@
+.\" Copyright (c) 20088888888-2012 Red Hat, Inc. <http://www.redhat.com>
+.\" This file is part of GlusterFS.
+.\"
+.\" This file is licensed to you under your choice of the GNU Lesser
+.\" General Public License, version 3 or any later version (LGPLv3 or
+.\" later), or the GNU General Public License, version 2 (GPLv2), in all
+.\" cases as published by the Free Software Foundation.
+.\"
+.\"
+.\"
+.TH GlusterFS 8 "Cluster Filesystem" "19 March 2010" "Gluster Inc."
+.SH NAME
+GlusterFS \- Clustered Filesystem.
+.SH SYNOPSIS
+.B glusterfsd
+.I [options] [mountpoint]
+.PP
+.SH DESCRIPTION
+GlusterFS is a clustered file-system capable of scaling to several peta-bytes.
+It aggregates various storage bricks over Infiniband RDMA or TCP/IP
+interconnect into one large parallel network file system. Storage bricks can
+be made of any commodity hardware such as x86-64 server with SATA-II RAID and
+Infiniband HBA.
+
+GlusterFS is fully POSIX compliant FileSystem. On client side, it has dependency
+on FUSE package, on server side, it works seemlessly on different OSes.
+(Currently supported on GNU/Linux, Solaris).
+
+.SH OPTIONS
+.PP
+Mandatory or optional arguments to long options are also mandatory or optional
+for any corresponding short options.
+.SS "Basic options"
+.PP
+.TP
+
+\fB\-f, \fB\-\-volfile=VOLUME-FILE\fR
+File to use as VOLUME-FILE [default:/etc/glusterfs/glusterfs.vol]
+.TP
+\fB\-l, \fB\-\-log\-file=LOGFILE\fR
+File to use for logging [default:/var/log/glusterfs/glusterfs.log]
+.TP
+\fB\-L, \fB\-\-log\-level=LOGLEVEL\fR
+Logging severity. Valid options are TRACE, DEBUG, INFO, WARNING, ERROR and
+CRITICAL [default: WARNING]
+.TP
+\fB\-s, \fB\-\-volfile\-server=SERVER\fR
+Server to get the volume from. This option overrides \fB\-\-volfile option
+
+.SS "Advanced options"
+.PP
+.TP
+
+\fB\-\-debug\fR
+Run in debug mode. This option sets \fB\-\-no\-daemon\fR, \fB\-\-log\-level\fR to DEBUG
+and \fB\-\-log\-file\fR to console
+.TP
+\fB\-N, \fB\-\-no\-daemon\fR
+Run in foreground
+.TP
+\fB\-\-read\-only\fR
+Makes the filesystem read-only
+.TP
+\fB\-p, \fB\-\-pid\-file=PIDFILE\fR
+File to use as pid file
+.TP
+\fB\-S SOCKFILE
+Socket file to used for inter-process communication
+.TP
+\fB\-\-brick\-name DIRECTORY
+Directory to be used as export directory for GlusterFS
+.TP
+\fB\-\-brick\-port PORT
+Brick Port to be registered with Gluster portmapper
+.TP
+\fB\-\-volfile\-id=KEY\fR
+KEY of the volume file to be fetched from server
+.TP
+\fB\-\-volfile\-server\-port=PORT\fR
+Port number of volfile server
+.TP
+\fB\-\-volfile\-server\-transport=TRANSPORT\fR
+Transport type to get volume file from server [default: tcp]
+.TP
+\fB\-\-volume\-name=VOLUME\-NAME\fR
+Volume name to be used for MOUNT-POINT [default: top most volume in
+VOLUME-FILE]
+.TP
+\fB\-\-xlator\-option=VOLUME\-NAME.OPTION=VALUE\fR
+Add/override a translator option for a volume with the specified value
+
+.SS "Fuse options"
+.PP
+.TP
+
+\fB\-\-attribute\-timeout=SECONDS\fR
+Set attribute timeout to SECONDS for inodes in fuse kernel module [default: 1]
+.TP
+\fB\-\-entry\-timeout=SECONDS\fR
+Set entry timeout to SECONDS in fuse kernel module [default: 1]
+.TP
+\fB\-\-direct\-io\-mode=BOOL\fR
+Enable/Disable direct-io mode in fuse module [default: enable]
+
+.SS "Miscellaneous Options"
+.PP
+.TP
+
+\fB\-?, \fB\-\-help\fR
+Give this help list
+.TP
+\fB\-\-usage\fR
+Give a short usage message
+.TP
+\fB\-V, \fB\-\-version\fR
+Print program version
+
+.PP
+.SH FILES
+/etc/glusterfs/*.vol
+
+.SH EXAMPLES
+Start a GlusterFS server on localhost with volume name foo
+
+glusterfsd \-s localhost \-\-volfile\-id foo.server.media-disk\-1 \-p /var/lib/glusterd/vols/foo/run/server\-media\-disk\-1.pid \-S /tmp/<uniqueid>.socket \-\-brick-name /media/disk\-1 \-l /var/log/glusterfs/bricks/media\-disk\-1.log \-\-brick\-port 24009 \-\-xlator\-option foo\-server.listen-port=24009
+
+.SH SEE ALSO
+.nf
+\fBfusermount\fR(1), \fBmount.glusterfs\fR(8), \fBgluster\fR(8)
+\fR
+.fi
+.SH COPYRIGHT
+.nf
+Copyright(c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+\fR
+.fi
diff --git a/doc/glusterfsd.vol.sample b/doc/glusterfsd.vol.sample
deleted file mode 100644
index b6d8a1580..000000000
--- a/doc/glusterfsd.vol.sample
+++ /dev/null
@@ -1,47 +0,0 @@
-### file: server-volume.vol.sample
-
-#####################################
-### GlusterFS Server Volume File ##
-#####################################
-
-#### CONFIG FILE RULES:
-### "#" is comment character.
-### - Config file is case sensitive
-### - Options within a volume block can be in any order.
-### - Spaces or tabs are used as delimitter within a line.
-### - Multiple values to options will be : delimitted.
-### - Each option should end within a line.
-### - Missing or commented fields will assume default values.
-### - Blank/commented lines are allowed.
-### - Sub-volumes should already be defined above before referring.
-
-### Export volume "brick" with the contents of "/home/export" directory.
-volume brick
- type storage/posix # POSIX FS translator
- option directory /home/export # Export this directory
-end-volume
-
-### Add network serving capability to above brick.
-volume server
- type protocol/server
- option transport-type tcp
-# option transport-type unix
-# option transport-type ib-sdp
-# option transport.socket.bind-address 192.168.1.10 # Default is to listen on all interfaces
-# option transport.socket.listen-port 6996 # Default is 6996
-
-# option transport-type ib-verbs
-# option transport.ib-verbs.bind-address 192.168.1.10 # Default is to listen on all interfaces
-# option transport.ib-verbs.listen-port 6996 # Default is 6996
-# option transport.ib-verbs.work-request-send-size 131072
-# option transport.ib-verbs.work-request-send-count 64
-# option transport.ib-verbs.work-request-recv-size 131072
-# option transport.ib-verbs.work-request-recv-count 64
-
-# option client-volume-filename /etc/glusterfs/glusterfs-client.vol
- subvolumes brick
-# NOTE: Access to any volume through protocol/server is denied by
-# default. You need to explicitly grant access through # "auth"
-# option.
- option auth.addr.brick.allow * # Allow access to "brick" volume
-end-volume
diff --git a/doc/hacker-guide/adding-fops.txt b/doc/hacker-guide/adding-fops.txt
deleted file mode 100644
index 293de2637..000000000
--- a/doc/hacker-guide/adding-fops.txt
+++ /dev/null
@@ -1,33 +0,0 @@
- HOW TO ADD A NEW FOP TO GlusterFS
- =================================
-
-Steps to be followed when adding a new FOP to GlusterFS:
-
-1. Edit glusterfs.h and add a GF_FOP_* constant.
-
-2. Edit xlator.[ch] and:
- 2a. add the new prototype for fop and callback.
- 2b. edit xlator_fops structure.
-
-3. Edit xlator.c and add to fill_defaults.
-
-4. Edit protocol.h and add struct necessary for the new FOP.
-
-5. Edit defaults.[ch] and provide default implementation.
-
-6. Edit call-stub.[ch] and provide stub implementation.
-
-7. Edit common-utils.c and add to gf_global_variable_init().
-
-8. Edit client-protocol and add your FOP.
-
-9. Edit server-protocol and add your FOP.
-
-10. Implement your FOP in any translator for which the default implementation
- is not sufficient.
-
-==========================================
-Last updated: Mon Oct 27 21:35:49 IST 2008
-
-Author: Vikas Gorur <vikas@zresearch.com>
-==========================================
diff --git a/doc/hacker-guide/bdb.txt b/doc/hacker-guide/bdb.txt
deleted file mode 100644
index fd0bd3652..000000000
--- a/doc/hacker-guide/bdb.txt
+++ /dev/null
@@ -1,70 +0,0 @@
-
-* How does file translates to key/value pair?
----------------------------------------------
-
- in bdb a file is identified by key (obtained by taking basename() of the path of
-the file) and file contents are stored as value corresponding to the key in database
-file (defaults to glusterfs_storage.db under dirname() directory).
-
-* symlinks, directories
------------------------
-
- symlinks and directories are stored as is.
-
-* db (database) files
----------------------
-
- every directory, including root directory, contains a database file called
-glusterfs_storage.db. all the regular files contained in the directory are stored
-as key/value pair inside the glusterfs_storage.db.
-
-* internal data cache
----------------------
-
- db does not provide a way to find out the size of the value corresponding to a key.
-so, bdb makes DB->get() call for key and takes the length of the value returned.
-since DB->get() also returns file contents for key, bdb maintains an internal cache and
-stores the file contents in the cache.
- every directory maintains a seperate cache.
-
-* inode number transformation
------------------------------
-
- bdb allocates a inode number to each file and directory on its own. bdb maintains a
-global counter and increments it after allocating inode number for each file
-(regular, symlink or directory). NOTE: bdb does not guarantee persistent inode numbers.
-
-* checkpoint thread
--------------------
-
- bdb creates a checkpoint thread at the time of init(). checkpoint thread does a
-periodic checkpoint on the DB_ENV. checkpoint is the mechanism, provided by db, to
-forcefully commit the logged transactions to the storage.
-
-NOTES ABOUT FOPS:
------------------
-
-lookup() -
- 1> do lstat() on the path, if lstat fails, we assume that the file being looked up
- is either a regular file or doesn't exist.
- 2> lookup in the DB of parent directory for key corresponding to path. if key exists,
- return key, with.
- NOTE: 'struct stat' stat()ed from DB file is used as a container for 'struct stat'
- of the regular file. st_ino, st_size, st_blocks are updated with file's values.
-
-readv() -
- 1> do a lookup in bctx cache. if successful, return the requested data from cache.
- 2> if cache missed, do a DB->get() the entire file content and insert to cache.
-
-writev():
- 1> flush any cached content of this file.
- 2> do a DB->put(), with DB_DBT_PARTIAL flag.
- NOTE: DB_DBT_PARTIAL is used to do partial update of a value in DB.
-
-readdir():
- 1> regular readdir() in a loop, and vomit all DB_ENV log files and DB files that
- we encounter.
- 2> if the readdir() buffer still has space, open a DB cursor and do a sequential
- DBC->get() to fill the reaadir buffer.
-
-
diff --git a/doc/hacker-guide/en-US/markdown/adding-fops.md b/doc/hacker-guide/en-US/markdown/adding-fops.md
new file mode 100644
index 000000000..3f72ed3e2
--- /dev/null
+++ b/doc/hacker-guide/en-US/markdown/adding-fops.md
@@ -0,0 +1,18 @@
+Adding a new FOP
+================
+
+Steps to be followed when adding a new FOP to GlusterFS:
+
+1. Edit `glusterfs.h` and add a `GF_FOP_*` constant.
+2. Edit `xlator.[ch]` and:
+ * add the new prototype for fop and callback.
+ * edit `xlator_fops` structure.
+3. Edit `xlator.c` and add to fill_defaults.
+4. Edit `protocol.h` and add struct necessary for the new FOP.
+5. Edit `defaults.[ch]` and provide default implementation.
+6. Edit `call-stub.[ch]` and provide stub implementation.
+7. Edit `common-utils.c` and add to gf_global_variable_init().
+8. Edit client-protocol and add your FOP.
+9. Edit server-protocol and add your FOP.
+10. Implement your FOP in any translator for which the default implementation
+ is not sufficient.
diff --git a/doc/hacker-guide/en-US/markdown/afr.md b/doc/hacker-guide/en-US/markdown/afr.md
new file mode 100644
index 000000000..1be7e39f2
--- /dev/null
+++ b/doc/hacker-guide/en-US/markdown/afr.md
@@ -0,0 +1,191 @@
+cluster/afr translator
+======================
+
+Locking
+-------
+
+Before understanding replicate, one must understand two internal FOPs:
+
+### `GF_FILE_LK`
+
+This is exactly like `fcntl(2)` locking, except the locks are in a
+separate domain from locks held by applications.
+
+### `GF_DIR_LK (loc_t *loc, char *basename)`
+
+This allows one to lock a name under a directory. For example,
+to lock /mnt/glusterfs/foo, one would use the call:
+
+```
+GF_DIR_LK ({loc_t for "/mnt/glusterfs"}, "foo")
+```
+
+If one wishes to lock *all* the names under a particular directory,
+supply the basename argument as `NULL`.
+
+The locks can either be read locks or write locks; consult the
+function prototype for more details.
+
+Both these operations are implemented by the features/locks (earlier
+known as posix-locks) translator.
+
+Basic design
+------------
+
+All FOPs can be classified into four major groups:
+
+### inode-read
+
+Operations that read an inode's data (file contents) or metadata (perms, etc.).
+
+access, getxattr, fstat, readlink, readv, stat.
+
+### inode-write
+
+Operations that modify an inode's data or metadata.
+
+chmod, chown, truncate, writev, utimens.
+
+### dir-read
+
+Operations that read a directory's contents or metadata.
+
+readdir, getdents, checksum.
+
+### dir-write
+
+Operations that modify a directory's contents or metadata.
+
+create, link, mkdir, mknod, rename, rmdir, symlink, unlink.
+
+Some of these make a subgroup in that they modify *two* different entries:
+link, rename, symlink.
+
+### Others
+
+Other operations.
+
+flush, lookup, open, opendir, statfs.
+
+Algorithms
+----------
+
+Each of the four major groups has its own algorithm:
+
+### inode-read, dir-read
+
+1. Send a request to the first child that is up:
+ * if it fails:
+ * try the next available child
+ * if we have exhausted all children:
+ * return failure
+
+### inode-write
+
+ All operations are done in parallel unless specified otherwise.
+
+1. Send a ``GF_FILE_LK`` request on all children for a write lock on the
+ appropriate region
+ (for metadata operations: entire file (0, 0) for writev:
+ (offset, offset+size of buffer))
+ * If a lock request fails on a child:
+ * unlock all children
+ * try to acquire a blocking lock (`F_SETLKW`) on each child, serially.
+ If this fails (due to `ENOTCONN` or `EINVAL`):
+ Consider this child as dead for rest of transaction.
+2. Mark all children as "pending" on all (alive) children (see below for
+meaning of "pending").
+ * If it fails on any child:
+ * mark it as dead (in transaction local state).
+3. Perform operation on all (alive) children.
+ * If it fails on any child:
+ * mark it as dead (in transaction local state).
+4. Unmark all successful children as not "pending" on all nodes.
+5. Unlock region on all (alive) children.
+
+### dir-write
+
+ The algorithm for dir-write is same as above except instead of holding
+ `GF_FILE_LK` locks we hold a GF_DIR_LK lock on the name being operated upon.
+ In case of link-type calls, we hold locks on both the operand names.
+
+"pending"
+---------
+
+The "pending" number is like a journal entry. A pending entry is an
+array of 32-bit integers stored in network byte-order as the extended
+attribute of an inode (which can be a directory as well).
+
+There are three keys corresponding to three types of pending operations:
+
+### `AFR_METADATA_PENDING`
+
+There are some metadata operations pending on this inode (perms, ctime/mtime,
+xattr, etc.).
+
+### `AFR_DATA_PENDING`
+
+There is some data pending on this inode (writev).
+
+### `AFR_ENTRY_PENDING`
+
+There are some directory operations pending on this directory
+(create, unlink, etc.).
+
+Self heal
+---------
+
+* On lookup, gather extended attribute data:
+ * If entry is a regular file:
+ * If an entry is present on one child and not on others:
+ * create entry on others.
+ * If entries exist but have different metadata (perms, etc.):
+ * consider the entry with the highest `AFR_METADATA_PENDING` number as
+ definitive and replicate its attributes on children.
+ * If entry is a directory:
+ * Consider the entry with the higest `AFR_ENTRY_PENDING` number as
+ definitive and replicate its contents on all children.
+ * If any two entries have non-matching types (i.e., one is file and
+ other is directory):
+ * Announce to the user via log that a split-brain situation has been
+ detected, and do nothing.
+* On open, gather extended attribute data:
+ * Consider the file with the highest `AFR_DATA_PENDING` number as
+ the definitive one and replicate its contents on all other
+ children.
+
+During all self heal operations, appropriate locks must be held on all
+regions/entries being affected.
+
+Inode scaling
+-------------
+
+Inode scaling is necessary because if a situation arises where an inode number
+is returned for a directory (by lookup) which was previously the inode number
+of a file (as per FUSE's table), then FUSE gets horribly confused (consult a
+FUSE expert for more details).
+
+To avoid such a situation, we distribute the 64-bit inode space equally
+among all children of replicate.
+
+To illustrate:
+
+If c1, c2, c3 are children of replicate, they each get 1/3 of the available
+inode space:
+
+------------- -- -- -- -- -- -- -- -- -- -- -- ---
+Child: c1 c2 c3 c1 c2 c3 c1 c2 c3 c1 c2 ...
+Inode number: 1 2 3 4 5 6 7 8 9 10 11 ...
+------------- -- -- -- -- -- -- -- -- -- -- -- ---
+
+Thus, if lookup on c1 returns an inode number "2", it is scaled to "4"
+(which is the second inode number in c1's space).
+
+This way we ensure that there is never a collision of inode numbers from
+two different children.
+
+This reduction of inode space doesn't really reduce the usability of
+replicate since even if we assume replicate has 1024 children (which would be a
+highly unusual scenario), each child still has a 54-bit inode space:
+$2^{54} \sim 1.8 \times 10^{16}$, which is much larger than any real
+world requirement.
diff --git a/doc/hacker-guide/en-US/markdown/coding-standard.md b/doc/hacker-guide/en-US/markdown/coding-standard.md
new file mode 100644
index 000000000..178dc142a
--- /dev/null
+++ b/doc/hacker-guide/en-US/markdown/coding-standard.md
@@ -0,0 +1,402 @@
+GlusterFS Coding Standards
+==========================
+
+Structure definitions should have a comment per member
+------------------------------------------------------
+
+Every member in a structure definition must have a comment about its
+purpose. The comment should be descriptive without being overly verbose.
+
+*Bad:*
+
+```
+gf_lock_t lock; /* lock */
+```
+
+*Good:*
+
+```
+DBTYPE access_mode; /* access mode for accessing
+ * the databases, can be
+ * DB_HASH, DB_BTREE
+ * (option access-mode <mode>)
+ */
+```
+
+Declare all variables at the beginning of the function
+------------------------------------------------------
+
+All local variables in a function must be declared immediately after the
+opening brace. This makes it easy to keep track of memory that needs to be freed
+during exit. It also helps debugging, since gdb cannot handle variables
+declared inside loops or other such blocks.
+
+Always initialize local variables
+---------------------------------
+
+Every local variable should be initialized to a sensible default value
+at the point of its declaration. All pointers should be initialized to NULL,
+and all integers should be zero or (if it makes sense) an error value.
+
+
+*Good:*
+
+```
+int ret = 0;
+char *databuf = NULL;
+int _fd = -1;
+```
+
+Initialization should always be done with a constant value
+----------------------------------------------------------
+
+Never use a non-constant expression as the initialization value for a variable.
+
+
+*Bad:*
+
+```
+pid_t pid = frame->root->pid;
+char *databuf = malloc (1024);
+```
+
+Validate all arguments to a function
+------------------------------------
+
+All pointer arguments to a function must be checked for `NULL`.
+A macro named `VALIDATE` (in `common-utils.h`)
+takes one argument, and if it is `NULL`, writes a log message and
+jumps to a label called `err` after setting op_ret and op_errno
+appropriately. It is recommended to use this template.
+
+
+*Good:*
+
+```
+VALIDATE(frame);
+VALIDATE(this);
+VALIDATE(inode);
+```
+
+Never rely on precedence of operators
+-------------------------------------
+
+Never write code that relies on the precedence of operators to execute
+correctly. Such code can be hard to read and someone else might not
+know the precedence of operators as accurately as you do.
+
+*Bad:*
+
+```
+if (op_ret == -1 && errno != ENOENT)
+```
+
+*Good:*
+
+```
+if ((op_ret == -1) && (errno != ENOENT))
+```
+
+Use exactly matching types
+--------------------------
+
+Use a variable of the exact type declared in the manual to hold the
+return value of a function. Do not use an ``equivalent'' type.
+
+
+*Bad:*
+
+```
+int len = strlen (path);
+```
+
+*Good:*
+
+```
+size_t len = strlen (path);
+```
+
+Never write code such as `foo->bar->baz`; check every pointer
+-------------------------------------------------------------
+
+Do not write code that blindly follows a chain of pointer
+references. Any pointer in the chain may be `NULL` and thus
+cause a crash. Verify that each pointer is non-null before following
+it.
+
+Check return value of all functions and system calls
+----------------------------------------------------
+
+The return value of all system calls and API functions must be checked
+for success or failure.
+
+*Bad:*
+
+```
+close (fd);
+```
+
+*Good:*
+
+```
+op_ret = close (_fd);
+if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "close on file %s failed (%s)", real_path,
+ strerror (errno));
+ op_errno = errno;
+ goto out;
+}
+```
+
+
+Gracefully handle failure of malloc
+-----------------------------------
+
+GlusterFS should never crash or exit due to lack of memory. If a
+memory allocation fails, the call should be unwound and an error
+returned to the user.
+
+*Use result args and reserve the return value to indicate success or failure:*
+
+The return value of every functions must indicate success or failure (unless
+it is impossible for the function to fail --- e.g., boolean functions). If
+the function needs to return additional data, it must be returned using a
+result (pointer) argument.
+
+*Bad:*
+
+```
+int32_t dict_get_int32 (dict_t *this, char *key);
+```
+
+*Good:*
+
+```
+int dict_get_int32 (dict_t *this, char *key, int32_t *val);
+```
+
+Always use the `n' versions of string functions
+-----------------------------------------------
+
+Unless impossible, use the length-limited versions of the string functions.
+
+*Bad:*
+
+```
+strcpy (entry_path, real_path);
+```
+
+*Good:*
+
+```
+strncpy (entry_path, real_path, entry_path_len);
+```
+
+No dead or commented code
+-------------------------
+
+There must be no dead code (code to which control can never be passed) or
+commented out code in the codebase.
+
+Only one unwind and return per function
+---------------------------------------
+
+There must be only one exit out of a function. `UNWIND` and return
+should happen at only point in the function.
+
+Function length or Keep functions small
+---------------------------------------
+
+We live in the UNIX-world where modules do one thing and do it well.
+This rule should apply to our functions also. If a function is very long, try splitting it
+into many little helper functions. The question is, in a coding
+spree, how do we know a function is long and unreadable. One rule of
+thumb given by Linus Torvalds is that, a function should be broken-up
+if you have 4 or more levels of indentation going on for more than 3-4
+lines.
+
+*Example for a helper function:*
+```
+static int
+same_owner (posix_lock_t *l1, posix_lock_t *l2)
+{
+ return ((l1->client_pid == l2->client_pid) &&
+ (l1->transport == l2->transport));
+}
+```
+
+Defining functions as static
+----------------------------
+
+Define internal functions as static only if you're
+very sure that there will not be a crash(..of any kind..) emanating in
+that function. If there is even a remote possibility, perhaps due to
+pointer derefering, etc, declare the function as non-static. This
+ensures that when a crash does happen, the function name shows up the
+in the back-trace generated by libc. However, doing so has potential
+for polluting the function namespace, so to avoid conflicts with other
+components in other parts, ensure that the function names are
+prepended with a prefix that identify the component to which it
+belongs. For eg. non-static functions in io-threads translator start
+with iot_.
+
+Ensure function calls wrap around after 80-columns
+--------------------------------------------------
+
+Place remaining arguments on the next line if needed.
+
+Functions arguments and function definition
+-------------------------------------------
+
+Place all the arguments of a function definition on the same line
+until the line goes beyond 80-cols. Arguments that extend beyind
+80-cols should be placed on the next line.
+
+Style issues
+------------
+
+### Brace placement
+
+Use K&R/Linux style of brace placement for blocks.
+
+*Good:*
+
+```
+int some_function (...)
+{
+ if (...) {
+ /* ... */
+ } else if (...) {
+ /* ... */
+ } else {
+ /* ... */
+ }
+
+ do {
+ /* ... */
+ } while (cond);
+}
+```
+
+### Indentation
+
+Use *eight* spaces for indenting blocks. Ensure that your
+file contains only spaces and not tab characters. You can do this
+in Emacs by selecting the entire file (`C-x h`) and
+running `M-x untabify`.
+
+To make Emacs indent lines automatically by eight spaces, add this
+line to your `.emacs`:
+
+```
+(add-hook 'c-mode-hook (lambda () (c-set-style "linux")))
+```
+
+### Comments
+
+Write a comment before every function describing its purpose (one-line),
+its arguments, and its return value. Mention whether it is an internal
+function or an exported function.
+
+Write a comment before every structure describing its purpose, and
+write comments about each of its members.
+
+Follow the style shown below for comments, since such comments
+can then be automatically extracted by doxygen to generate
+documentation.
+
+*Good:*
+
+```
+/**
+* hash_name -hash function for filenames
+* @par: parent inode number
+* @name: basename of inode
+* @mod: number of buckets in the hashtable
+*
+* @return: success: bucket number
+* failure: -1
+*
+* Not for external use.
+*/
+```
+
+### Indicating critical sections
+
+To clearly show regions of code which execute with locks held, use
+the following format:
+
+```
+pthread_mutex_lock (&mutex);
+{
+ /* code */
+}
+pthread_mutex_unlock (&mutex);
+```
+
+*A skeleton fop function:*
+
+This is the recommended template for any fop. In the beginning come
+the initializations. After that, the `success' control flow should be
+linear. Any error conditions should cause a `goto` to a single
+point, `out`. At that point, the code should detect the error
+that has occured and do appropriate cleanup.
+
+```
+int32_t
+sample_fop (call_frame_t *frame, xlator_t *this, ...)
+{
+ char * var1 = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ DIR * dir = NULL;
+ struct posix_fd * pfd = NULL;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+
+ /* other validations */
+
+ dir = opendir (...);
+
+ if (dir == NULL) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "opendir failed on %s (%s)", loc->path,
+ strerror (op_errno));
+ goto out;
+ }
+
+ /* another system call */
+ if (...) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ goto out;
+ }
+
+ /* ... */
+
+ out:
+ if (op_ret == -1) {
+
+ /* check for all the cleanup that needs to be
+ done */
+
+ if (dir) {
+ closedir (dir);
+ dir = NULL;
+ }
+
+ if (pfd) {
+ FREE (pfd->path);
+ FREE (pfd);
+ pfd = NULL;
+ }
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, fd);
+ return 0;
+}
+```
diff --git a/doc/hacker-guide/en-US/markdown/posix.md b/doc/hacker-guide/en-US/markdown/posix.md
new file mode 100644
index 000000000..84c813e55
--- /dev/null
+++ b/doc/hacker-guide/en-US/markdown/posix.md
@@ -0,0 +1,59 @@
+storage/posix translator
+========================
+
+Notes
+-----
+
+### `SET_FS_ID`
+
+This is so that all filesystem checks are done with the user's
+uid/gid and not GlusterFS's uid/gid.
+
+### `MAKE_REAL_PATH`
+
+This macro concatenates the base directory of the posix volume
+('option directory') with the given path.
+
+### `need_xattr` in lookup
+
+If this flag is passed, lookup returns a xattr dictionary that contains
+the file's create time, the file's contents, and the version number
+of the file.
+
+This is a hack to increase small file performance. If an application
+wants to read a small file, it can finish its job with just a lookup
+call instead of a lookup followed by read.
+
+### `getdents`/`setdents`
+
+These are used by unify to set and get directory entries.
+
+### `ALIGN_BUF`
+
+Macro to align an address to a page boundary (4K).
+
+### `priv->export_statfs`
+
+In some cases, two exported volumes may reside on the same
+partition on the server. Sending statvfs info for both
+the volumes will lead to erroneous df output at the client,
+since free space on the partition will be counted twice.
+
+In such cases, user can disable exporting statvfs info
+on one of the volumes by setting this option.
+
+### `xattrop`
+
+This fop is used by replicate to set version numbers on files.
+
+### `getxattr`/`setxattr` hack to read/write files
+
+A key, `GLUSTERFS_FILE_CONTENT_STRING`, is handled in a special way by
+`getxattr`/`setxattr`. A getxattr with the key will return the entire
+content of the file as the value. A `setxattr` with the key will write
+the value as the entire content of the file.
+
+### `posix_checksum`
+
+This calculates a simple XOR checksum on all entry names in a
+directory that is used by unify to compare directory contents.
diff --git a/doc/hacker-guide/en-US/markdown/translator-development.md b/doc/hacker-guide/en-US/markdown/translator-development.md
new file mode 100644
index 000000000..77d1b606a
--- /dev/null
+++ b/doc/hacker-guide/en-US/markdown/translator-development.md
@@ -0,0 +1,666 @@
+Translator development
+======================
+
+Setting the Stage
+-----------------
+
+This is the first post in a series that will explain some of the details of
+writing a GlusterFS translator, using some actual code to illustrate.
+
+Before we begin, a word about environments. GlusterFS is over 300K lines of
+code spread across a few hundred files. That's no Linux kernel or anything, but
+ you're still going to be navigating through a lot of code in every
+code-editing session, so some kind of cross-referencing is *essential*. I use
+cscope with the vim bindings, and if I couldn't do Crtl+G and such to jump
+between definitions all the time my productivity would be cut in half. You may
+prefer different tools, but as I go through these examples you'll need
+something functionally similar to follow on. OK, on with the show.
+
+The first thing you need to know is that translators are not just bags of
+functions and variables. They need to have a very definite internal structure
+so that the translator-loading code can figure out where all the pieces are.
+The way it does this is to use dlsym to look for specific names within your
+shared-object file, as follow (from `xlator.c`):
+
+```
+if (!(xl->fops = dlsym (handle, "fops"))) {
+ gf_log ("xlator", GF_LOG_WARNING, "dlsym(fops) on %s",
+ dlerror ());
+ goto out;
+}
+
+if (!(xl->cbks = dlsym (handle, "cbks"))) {
+ gf_log ("xlator", GF_LOG_WARNING, "dlsym(cbks) on %s",
+ dlerror ());
+ goto out;
+}
+
+if (!(xl->init = dlsym (handle, "init"))) {
+ gf_log ("xlator", GF_LOG_WARNING, "dlsym(init) on %s",
+ dlerror ());
+ goto out;
+}
+
+if (!(xl->fini = dlsym (handle, "fini"))) {
+ gf_log ("xlator", GF_LOG_WARNING, "dlsym(fini) on %s",
+ dlerror ());
+ goto out;
+}
+```
+
+In this example, `xl` is a pointer to the in-memory object for the translator
+we're loading. As you can see, it's looking up various symbols *by name* in the
+ shared object it just loaded, and storing pointers to those symbols. Some of
+them (e.g. init are functions, while others e.g. fops are dispatch tables
+containing pointers to many functions. Together, these make up the translator's
+ public interface.
+
+Most of this glue or boilerplate can easily be found at the bottom of one of
+the source files that make up each translator. We're going to use the `rot-13`
+translator just for fun, so in this case you'd look in `rot-13.c` to see this:
+
+```
+struct xlator_fops fops = {
+ .readv = rot13_readv,
+ .writev = rot13_writev
+};
+
+struct xlator_cbks cbks = {
+};
+
+struct volume_options options[] = {
+{ .key = {"encrypt-write"},
+ .type = GF_OPTION_TYPE_BOOL
+},
+{ .key = {"decrypt-read"},
+ .type = GF_OPTION_TYPE_BOOL
+},
+{ .key = {NULL} },
+};
+```
+
+The `fops` table, defined in `xlator.h`, is one of the most important pieces.
+This table contains a pointer to each of the filesystem functions that your
+translator might implement -- `open`, `read`, `stat`, `chmod`, and so on. There
+are 82 such functions in all, but don't worry; any that you don't specify here
+will be see as null and filled with defaults from `defaults.c` when your
+translator is loaded. In this particular example, since `rot-13` is an
+exceptionally simple translator, we only fill in two entries for `readv` and
+`writev`.
+
+There are actually two other tables, also required to have predefined names,
+that are also used to find translator functions: `cbks` (which is empty in this
+ snippet) and `dumpops` (which is missing entirely). The first of these specify
+ entry points for when inodes are forgotten or file descriptors are released.
+In other words, they're destructors for objects in which your translator might
+ have an interest. Mostly you can ignore them, because the default behavior
+handles even the simpler cases of translator-specific inode/fd context
+automatically. However, if the context you attach is a complex structure
+requiring complex cleanup, you'll need to supply these functions. As for
+dumpops, that's just used if you want to provide functions to pretty-print
+various structures in logs. I've never used it myself, though I probably
+should. What's noteworthy here is that we don't even define dumpops. That's
+because all of the functions that might use these dispatch functions will check
+ for `xl->dumpops` being `NULL` before calling through it. This is in sharp
+contrast to the behavior for `fops` and `cbks1`, which *must* be present. If
+they're not, translator loading will fail because these pointers are not
+checked every time and if they're `NULL` then we'll segfault. That's why we
+provide an empty definition for cbks; it's OK for the individual function
+pointers to be NULL, but not for the whole table to be absent.
+
+The last piece I'll cover today is options. As you can see, this is a table of
+translator-specific option names and some information about their types.
+GlusterFS actually provides a pretty rich set of types (`volume_option_type_t`
+in `options.`h) which includes paths, translator names, percentages, and times
+in addition to the obvious integers and strings. Also, the `volume_option_t`
+structure can include information about alternate names, min/max/default
+values, enumerated string values, and descriptions. We don't see any of these
+here, so let's take a quick look at some more complex examples from afr.c and
+then come back to `rot-13`.
+
+```
+{ .key = {"data-self-heal-algorithm"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "",
+ .description = "Select between \"full\", \"diff\". The "
+ "\"full\" algorithm copies the entire file from "
+ "source to sink. The \"diff\" algorithm copies to "
+ "sink only those blocks whose checksums don't match "
+ "with those of source.",
+ .value = { "diff", "full", "" }
+},
+{ .key = {"data-self-heal-window-size"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 1024,
+ .default_value = "1",
+ .description = "Maximum number blocks per file for which "
+ "self-heal process would be applied simultaneously."
+},
+```
+
+When your translator is loaded, all of this information is used to parse the
+options actually provided in the volfile, and then the result is turned into a
+dictionary and stored as `xl->options`. This dictionary is then processed by
+your init function, which you can see being looked up in the first code
+fragment above. We're only going to look at a small part of the `rot-13`'s
+init for now.
+
+```
+priv->decrypt_read = 1;
+priv->encrypt_write = 1;
+
+data = dict_get (this->options, "encrypt-write");
+if (data) {
+ if (gf_string2boolean (data->data, &priv->encrypt_write
+ == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "encrypt-write takes only boolean options");
+ return -1;
+ }
+}
+```
+
+What we can see here is that we're setting some defaults in our priv structure,
+then looking to see if an `encrypt-write` option was actually provided. If so,
+we convert and store it. This is a pretty classic use of dict_get to fetch a
+field from a dictionary, and of using one of many conversion functions in
+`common-utils.c` to convert `data->data` into something we can use.
+
+So far we've covered the basic of how a translator gets loaded, how we find its
+various parts, and how we process its options. In my next Translator 101 post,
+we'll go a little deeper into other things that init and its companion fini
+might do, and how some other fields in our `xlator_t` structure (commonly
+referred to as this) are commonly used.
+
+`init`, `fini`, and private context
+-----------------------------------
+
+In the previous Translator 101 post, we looked at some of the dispatch tables
+and options processing in a translator. This time we're going to cover the rest
+ of the "shell" of a translator -- i.e. the other global parts not specific to
+handling a particular request.
+
+Let's start by looking at the relationship between a translator and its shared
+library. At a first approximation, this is the relationship between an object
+and a class in just about any object-oriented programming language. The class
+defines behaviors, but has to be instantiated as an object to have any kind of
+existence. In our case the object is an `xlator_t`. Several of these might be
+created within the same daemon, sharing all of the same code through init/fini
+and dispatch tables, but sharing *no data*. You could implement shared data (as
+ static variables in your shared libraries) but that's strongly discouraged.
+Every function in your shared library will get an `xlator_t` as an argument,
+and should use it. This lack of class-level data is one of the points where
+the analogy to common OOP systems starts to break down. Another place is the
+complete lack of inheritance. Translators inherit behavior (code) from exactly
+one shared library -- looked up and loaded using the `type` field in a volfile
+`volume ... end-volume` block -- and that's it -- not even single inheritance,
+no subclasses or superclasses, no mixins or prototypes, just the relationship
+between an object and its class. With that in mind, let's turn to the init
+function that we just barely touched on last time.
+
+```
+int32_t
+init (xlator_t *this)
+{
+ data_t *data = NULL;
+ rot_13_private_t *priv = NULL;
+
+ if (!this->children || this->children->next) {
+ gf_log ("rot13", GF_LOG_ERROR,
+ "FATAL: rot13 should have exactly one child");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ priv = GF_CALLOC (sizeof (rot_13_private_t), 1, 0);
+ if (!priv)
+ return -1;
+```
+
+At the very top, we see the function signature -- we get a pointer to the
+`xlator_t` object that we're initializing, and we return an `int32_t` status.
+As with most functions in the translator API, this should be zero to indicate
+success. In this case it's safe to return -1 for failure, but watch out: in
+dispatch-table functions, the return value means the status of the *function
+call* rather than the *request*. A request error should be reflected as a
+callback with a non-zero `op_re`t value, but the dispatch function itself
+should still return zero. In fact, the handling of a non-zero return from a
+dispatch function is not all that robust (we recently had a bug report in
+HekaFS related to this) so it's something you should probably avoid
+altogether. This only underscores the difference between dispatch functions
+and `init`/`fini` functions, where non-zero returns *are* expected and handled
+logically by aborting the translator setup. We can see that down at the
+bottom, where we return -1 to indicate that we couldn't allocate our
+private-data area (more about that later).
+
+The first thing this init function does is check that the translator is being
+set up in the right kind of environment. Translators are called by parents and
+in turn call children. Some translators are "initial" translators that inject
+requests into the system from elsewhere -- e.g. mount/fuse injecting requests
+from the kernel, protocol/server injecting requests from the network. Those
+translators don't need parents, but `rot-13` does and so we check for that.
+Similarly, some translators are "final" translators that (from the perspective
+of the current process) terminate requests instead of passing them on -- e.g.
+`protocol/client` passing them to another node, `storage/posix` passing them to
+a local filesystem. Other translators "multiplex" between multiple children --
+ passing each parent request on to one (`cluster/dht`), some
+(`cluster/stripe`), or all (`cluster/afr`) of those children. `rot-13` fits
+into none of those categories either, so it checks that it has *exactly one*
+child. It might be more convenient or robust if translator shared libraries
+had standard variables describing these requirements, to be checked in a
+consistent way by the translator-loading infrastructure itself instead of by
+each separate init function, but this is the way translators work today.
+
+The last thing we see in this fragment is allocating our private data area.
+This can literally be anything we want; the infrastructure just provides the
+priv pointer as a convenience but takes no responsibility for how it's used. In
+ this case we're using `GF_CALLOC` to allocate our own `rot_13_private_t`
+structure. This gets us all the benefits of GlusterFS's memory-leak detection
+infrastructure, but the way we're calling it is not quite ideal. For one thing,
+ the first two arguments -- from `calloc(3)` -- are kind of reversed. For
+another, notice how the last argument is zero. That can actually be an
+enumerated value, to tell the GlusterFS allocator *what* type we're
+allocating. This can be very useful information for memory profiling and leak
+detection, so it's recommended that you follow the example of any
+x`xx-mem-types.h` file elsewhere in the source tree instead of just passing
+zero here (even though that works).
+
+To finish our tour of standard initialization/termination, let's look at the
+end of `init` and the beginning of `fini`:
+
+```
+ this->private = priv;
+ gf_log ("rot13", GF_LOG_DEBUG, "rot13 xlator loaded");
+ return 0;
+}
+
+void
+fini (xlator_t *this)
+{
+ rot_13_private_t *priv = this->private;
+
+ if (!priv)
+ return;
+ this->private = NULL;
+ GF_FREE (priv);
+```
+
+At the end of init we're just storing our private-data pointer in the `priv`
+field of our `xlator_t`, then returning zero to indicate that initialization
+succeeded. As is usually the case, our fini is even simpler. All it really has
+to do is `GF_FREE` our private-data pointer, which we do in a slightly
+roundabout way here. Notice how we don't even have a return value here, since
+there's nothing obvious and useful that the infrastructure could do if `fini`
+failed.
+
+That's practically everything we need to know to get our translator through
+loading, initialization, options processing, and termination. If we had defined
+ no dispatch functions, we could actually configure a daemon to use our
+translator and it would work as a basic pass-through from its parent to a
+single child. In the next post I'll cover how to build the translator and
+configure a daemon to use it, so that we can actually step through it in a
+debugger and see how it all fits together before we actually start adding
+functionality.
+
+This Time For Real
+------------------
+
+In the first two parts of this series, we learned how to write a basic
+translator skeleton that can get through loading, initialization, and option
+processing. This time we'll cover how to build that translator, configure a
+volume to use it, and run the glusterfs daemon in debug mode.
+
+Unfortunately, there's not much direct support for writing new translators. You
+can check out a GlusterFS tree and splice in your own translator directory, but
+ that's a bit painful because you'll have to update multiple makefiles plus a
+bunch of autoconf garbage. As part of the HekaFS project, I basically reverse
+engineered the truly necessary parts of the translator-building process and
+then pestered one of the Fedora glusterfs package maintainers (thanks
+daMaestro!) to add a `glusterfs-devel` package with the required headers. Since
+ then the complexity level in the HekaFS tree has crept back up a bit, but I
+still remember the simple method and still consider it the easiest way to get
+started on a new translator. For the sake of those not using Fedora, I'm going
+to describe a method that doesn't depend on that header package. What it does
+depend on is a GlusterFS source tree, much as you might have cloned from GitHub
+ or the Gluster review site. This tree doesn't have to be fully built, but you
+do need to run `autogen.sh` and configure in it. Then you can take the
+following simple makefile and put it in a directory with your actual source.
+
+```
+# Change these to match your source code.
+TARGET = rot-13.so
+OBJECTS = rot-13.o
+
+# Change these to match your environment.
+GLFS_SRC = /srv/glusterfs
+GLFS_LIB = /usr/lib64
+HOST_OS = GF_LINUX_HOST_OS
+
+# You shouldn't need to change anything below here.
+
+CFLAGS = -fPIC -Wall -O0 -g \
+ -DHAVE_CONFIG_H -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE \
+ -D$(HOST_OS) -I$(GLFS_SRC) -I$(GLFS_SRC)/contrib/uuid \
+ -I$(GLFS_SRC)/libglusterfs/src
+LDFLAGS = -shared -nostartfiles -L$(GLFS_LIB) -lglusterfs \
+ -lpthread
+
+$(TARGET): $(OBJECTS)
+ $(CC) $(OBJECTS) $(LDFLAGS) -o $(TARGET)
+```
+
+Yes, it's still Linux-specific. Mea culpa. As you can see, we're sticking with
+the `rot-13` example, so you can just copy the files from
+`xlators/encryption/rot-13/src` in your GlusterFS tree to follow on. Type
+`make` and you should be rewarded with a nice little `.so` file.
+
+```
+xlator_example$ ls -l rot-13.so
+-rwxr-xr-x. 1 jeff jeff 40784 Nov 16 16:41 rot-13.so
+```
+
+Notice that we've built with optimization level zero and debugging symbols
+included, which would not typically be the case for a packaged version of
+GlusterFS. Let's put our version of `rot-13.so` into a slightly different file
+on our system, so that it doesn't stomp on the installed version (not that
+you'd ever want to use that anyway).
+
+```
+xlator_example# ls /usr/lib64/glusterfs/3git/xlator/encryption/
+crypt.so crypt.so.0 crypt.so.0.0.0 rot-13.so rot-13.so.0
+rot-13.so.0.0.0
+xlator_example# cp rot-13.so \
+ /usr/lib64/glusterfs/3git/xlator/encryption/my-rot-13.so
+```
+
+These paths represent the current Gluster filesystem layout, which is likely to
+be deprecated in favor of the Fedora layout; your paths may vary. At this point
+ we're ready to configure a volume using our new translator. To do that, I'm
+going to suggest something that's strongly discouraged except during
+development (the Gluster guys are going to hate me for this): write our own
+volfile. Here's just about the simplest volfile you'll ever see.
+
+```
+volume my-posix
+ type storage/posix
+ option directory /srv/export
+end-volume
+
+volume my-rot13
+ type encryption/my-rot-13
+ subvolumes my-posix
+end-volume
+```
+
+All we have here is a basic brick using `/srv/export` for its data, and then
+an instance of our translator layered on top -- no client or server is
+necessary for what we're doing, and the system will automatically push a
+mount/fuse translator on top if there's no server translator. To try this out,
+all we need is the following command (assuming the directories involved already
+ exist).
+
+```
+xlator_example$ glusterfs --debug -f my.vol /srv/import
+```
+
+You should be rewarded with a whole lot of log output, including the text of
+the volfile (this is very useful for debugging problems in the field). If you
+go to another window on the same machine, you can see that you have a new
+filesystem mounted.
+
+```
+~$ df /srv/import
+Filesystem 1K-blocks Used Available Use% Mounted on
+/srv/xlator_example/my.vol
+ 114506240 2706176 105983488 3% /srv/import
+```
+
+Just for fun, write something into a file in `/srv/import`, then look at the
+corresponding file in `/srv/export` to see it all `rot-13`'ed for you.
+
+```
+~$ echo hello > /srv/import/a_file
+~$ cat /srv/export/a_file
+uryyb
+```
+
+There you have it -- functionality you control, implemented easily, layered on
+top of local storage. Now you could start adding functionality -- real
+encryption, perhaps -- and inevitably having to debug it. You could do that the
+ old-school way, with `gf_log` (preferred) or even plain old `printf`, or you
+could run daemons under `gdb` instead. Alternatively, you could wait for the
+next Translator 101 post, where we'll be doing exactly that.
+
+Debugging a Translator
+----------------------
+
+Now that we've learned what a translator looks like and how to build one, it's
+time to run one and actually watch it work. The best way to do this is good
+old-fashioned `gdb`, as follows (using some of the examples from last time).
+
+```
+xlator_example# gdb glusterfs
+GNU gdb (GDB) Red Hat Enterprise Linux (7.2-50.el6)
+...
+(gdb) r --debug -f my.vol /srv/import
+Starting program: /usr/sbin/glusterfs --debug -f my.vol /srv/import
+...
+[2011-11-23 11:23:16.495516] I [fuse-bridge.c:2971:fuse_init]
+ 0-glusterfs-fuse: FUSE inited with protocol versions:
+ glusterfs 7.13 kernel 7.13
+```
+
+If you get to this point, your glusterfs client process is already running. You
+can go to another window to see the mountpoint, do file operations, etc.
+
+```
+~# df /srv/import
+Filesystem 1K-blocks Used Available Use% Mounted on
+/root/xlator_example/my.vol
+ 114506240 2643968 106045568 3% /srv/import
+~# ls /srv/import
+a_file
+~# cat /srv/import/a_file
+hello
+```
+
+Now let's interrupt the process and see where we are.
+
+```
+^C
+Program received signal SIGINT, Interrupt.
+0x0000003a0060b3dc in pthread_cond_wait@@GLIBC_2.3.2 ()
+ from /lib64/libpthread.so.0
+(gdb) info threads
+ 5 Thread 0x7fffeffff700 (LWP 27206) 0x0000003a002dd8c7
+ in readv ()
+ from /lib64/libc.so.6
+ 4 Thread 0x7ffff50e3700 (LWP 27205) 0x0000003a0060b75b
+ in pthread_cond_timedwait@@GLIBC_2.3.2 ()
+ from /lib64/libpthread.so.0
+ 3 Thread 0x7ffff5f02700 (LWP 27204) 0x0000003a0060b3dc
+ in pthread_cond_wait@@GLIBC_2.3.2 ()
+ from /lib64/libpthread.so.0
+ 2 Thread 0x7ffff6903700 (LWP 27203) 0x0000003a0060f245
+ in sigwait ()
+ from /lib64/libpthread.so.0
+* 1 Thread 0x7ffff7957700 (LWP 27196) 0x0000003a0060b3dc
+ in pthread_cond_wait@@GLIBC_2.3.2 ()
+ from /lib64/libpthread.so.0
+```
+
+Like any non-toy server, this one has multiple threads. What are they all
+doing? Honestly, even I don't know. Thread 1 turns out to be in
+`event_dispatch_epoll`, which means it's the one handling all of our network
+I/O. Note that with socket multi-threading patch this will change, with one
+thread in `socket_poller` per connection. Thread 2 is in `glusterfs_sigwaiter`
+which means signals will be isolated to that thread. Thread 3 is in
+`syncenv_task`, so it's a worker process for synchronous requests such as
+those used by the rebalance and repair code. Thread 4 is in
+`janitor_get_next_fd`, so it's waiting for a chance to close no-longer-needed
+file descriptors on the local filesystem. (I admit I had to look that one up,
+BTW.) Lastly, thread 5 is in `fuse_thread_proc`, so it's the one fetching
+requests from our FUSE interface. You'll often see many more threads than
+this, but it's a pretty good basic set. Now, let's set a breakpoint so we can
+actually watch a request.
+
+```
+(gdb) b rot13_writev
+Breakpoint 1 at 0x7ffff50e4f0b: file rot-13.c, line 119.
+(gdb) c
+Continuing.
+```
+
+At this point we go into our other window and do something that will involve a write.
+
+```
+~# echo goodbye > /srv/import/another_file
+(back to the first window)
+[Switching to Thread 0x7fffeffff700 (LWP 27206)]
+
+Breakpoint 1, rot13_writev (frame=0x7ffff6e4402c, this=0x638440,
+ fd=0x7ffff409802c, vector=0x7fffe8000cd8, count=1, offset=0,
+ iobref=0x7fffe8001070) at rot-13.c:119
+119 rot_13_private_t *priv = (rot_13_private_t *)this->private;
+```
+
+Remember how we built with debugging symbols enabled and no optimization? That
+will be pretty important for the next few steps. As you can see, we're in
+`rot13_writev`, with several parameters.
+
+* `frame` is our always-present frame pointer for this request. Also,
+ `frame->local` will point to any local data we created and attached to the
+ request ourselves.
+* `this` is a pointer to our instance of the `rot-13` translator. You can examine
+ it if you like to see the name, type, options, parent/children, inode table,
+ and other stuff associated with it.
+* `fd` is a pointer to a file-descriptor *object* (`fd_t`, not just a
+ file-descriptor index which is what most people use "fd" for). This in turn
+ points to an inode object (`inode_t`) and we can associate our own
+ `rot-13`-specific data with either of these.
+* `vector` and `count` together describe the data buffers for this write, which
+ we'll get to in a moment.
+* `offset` is the offset into the file at which we're writing.
+* `iobref` is a buffer-reference object, which is used to track the life cycle
+ of buffers containing read/write data. If you look closely, you'll notice that
+ `vector[0].iov_base` points to the same address as `iobref->iobrefs[0].ptr`, which
+ should give you some idea of the inter-relationships between vector and iobref.
+
+OK, now what about that `vector`? We can use it to examine the data being
+written, like this.
+
+```
+(gdb) p vector[0]
+$2 = {iov_base = 0x7ffff7936000, iov_len = 8}
+(gdb) x/s 0x7ffff7936000
+0x7ffff7936000: "goodbye\n"
+```
+
+It's not always safe to view this data as a string, because it might just as
+well be binary data, but since we're generating the write this time it's safe
+and convenient. With that knowledge, let's step through things a bit.
+
+```
+(gdb) s
+120 if (priv->encrypt_write)
+(gdb)
+121 rot13_iovec (vector, count);
+(gdb)
+rot13_iovec (vector=0x7fffe8000cd8, count=1) at rot-13.c:57
+57 for (i = 0; i < count; i++) {
+(gdb)
+58 rot13 (vector[i].iov_base, vector[i].iov_len);
+(gdb)
+rot13 (buf=0x7ffff7936000 "goodbye\n", len=8) at rot-13.c:45
+45 for (i = 0; i < len; i++) {
+(gdb)
+46 if (buf[i] >= 'a' && buf[i] <= 'z')
+(gdb)
+47 buf[i] = 'a' + ((buf[i] - 'a' + 13) % 26);
+```
+
+Here we've stepped into `rot13_iovec`, which iterates through our vector
+calling `rot13`, which in turn iterates through the characters in that chunk
+doing the `rot-13` operation if/as appropriate. This is pretty straightforward
+stuff, so let's skip to the next interesting bit.
+
+```
+(gdb) fin
+Run till exit from #0 rot13 (buf=0x7ffff7936000 "goodbye\n",
+ len=8) at rot-13.c:47
+rot13_iovec (vector=0x7fffe8000cd8, count=1) at rot-13.c:57
+57 for (i = 0; i < count; i++) {
+(gdb) fin
+Run till exit from #0 rot13_iovec (vector=0x7fffe8000cd8,
+ count=1) at rot-13.c:57
+rot13_writev (frame=0x7ffff6e4402c, this=0x638440,
+ fd=0x7ffff409802c, vector=0x7fffe8000cd8, count=1,
+ offset=0, iobref=0x7fffe8001070) at rot-13.c:123
+123 STACK_WIND (frame,
+(gdb) b 129
+Breakpoint 2 at 0x7ffff50e4f35: file rot-13.c, line 129.
+(gdb) b rot13_writev_cbk
+Breakpoint 3 at 0x7ffff50e4db3: file rot-13.c, line 106.
+(gdb) c
+```
+
+So we've set breakpoints on both the callback and the statement following the
+`STACK_WIND`. Which one will we hit first?
+
+```
+Breakpoint 3, rot13_writev_cbk (frame=0x7ffff6e4402c,
+ cookie=0x7ffff6e440d8, this=0x638440, op_ret=8, op_errno=0,
+ prebuf=0x7fffefffeca0, postbuf=0x7fffefffec30)
+ at rot-13.c:106
+106 STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno,
+ prebuf, postbuf);
+(gdb) bt
+#0 rot13_writev_cbk (frame=0x7ffff6e4402c,
+ cookie=0x7ffff6e440d8, this=0x638440, op_ret=8, op_errno=0,
+ prebuf=0x7fffefffeca0, postbuf=0x7fffefffec30)
+ at rot-13.c:106
+#1 0x00007ffff52f1b37 in posix_writev (frame=0x7ffff6e440d8,
+ this=<value optimized out>, fd=<value optimized out>,
+ vector=<value optimized out>, count=1,
+ offset=<value optimized out>, iobref=0x7fffe8001070)
+ at posix.c:2217
+#2 0x00007ffff50e513e in rot13_writev (frame=0x7ffff6e4402c,
+ this=0x638440, fd=0x7ffff409802c, vector=0x7fffe8000cd8,
+ count=1, offset=0, iobref=0x7fffe8001070) at rot-13.c:123
+```
+
+Surprise! We're in `rot13_writev_cbk` now, called (indirectly) while we're
+still in `rot13_writev` before `STACK_WIND` returns (still at rot-13.c:123). If
+ you did any request cleanup here, then you need to be careful about what you
+do in the remainder of `rot13_writev` because data may have been freed etc.
+It's tempting to say you should just do the cleanup in `rot13_writev` after
+the `STACK_WIND,` but that's not valid because it's also possible that some
+other translator returned without calling `STACK_UNWIND` -- i.e. before
+`rot13_writev` is called, so then it would be the one getting null-pointer
+errors instead. To put it another way, the callback and the return from
+`STACK_WIND` can occur in either order or even simultaneously on different
+threads. Even if you were to use reference counts, you'd have to make sure to
+use locking or atomic operations to avoid races, and it's not worth it. Unless
+you *really* understand the possible flows of control and know what you're
+doing, it's better to do cleanup in the callback and nothing after
+`STACK_WIND.`
+
+At this point all that's left is a `STACK_UNWIND` and a return. The
+`STACK_UNWIND` invokes our parent's completion callback, and in this case our
+parent is FUSE so at that point the VFS layer is notified of the write being
+complete. Finally, we return through several levels of normal function calls
+until we come back to fuse_thread_proc, which waits for the next request.
+
+So that's it. For extra fun, you might want to repeat this exercise by stepping
+through some other call -- stat or setxattr might be good choices -- but you'll
+ have to use a translator that actually implements those calls to see much
+that's interesting. Then you'll pretty much know everything I knew when I
+started writing my first for-real translators, and probably even a bit more. I
+hope you've enjoyed this series, or at least found it useful, and if you have
+any suggestions for other topics I should cover please let me know (via
+comments or email, IRC or Twitter).
diff --git a/doc/hacker-guide/en-US/markdown/write-behind.md b/doc/hacker-guide/en-US/markdown/write-behind.md
new file mode 100644
index 000000000..e20682249
--- /dev/null
+++ b/doc/hacker-guide/en-US/markdown/write-behind.md
@@ -0,0 +1,56 @@
+performance/write-behind translator
+===================================
+
+Basic working
+--------------
+
+Write behind is basically a translator to lie to the application that the
+write-requests are finished, even before it is actually finished.
+
+On a regular translator tree without write-behind, control flow is like this:
+
+1. application makes a `write()` system call.
+2. VFS ==> FUSE ==> `/dev/fuse`.
+3. fuse-bridge initiates a glusterfs `writev()` call.
+4. `writev()` is `STACK_WIND()`ed upto client-protocol or storage translator.
+5. client-protocol, on receiving reply from server, starts `STACK_UNWIND()` towards the fuse-bridge.
+
+On a translator tree with write-behind, control flow is like this:
+
+1. application makes a `write()` system call.
+2. VFS ==> FUSE ==> `/dev/fuse`.
+3. fuse-bridge initiates a glusterfs `writev()` call.
+4. `writev()` is `STACK_WIND()`ed upto write-behind translator.
+5. write-behind adds the write buffer to its internal queue and does a `STACK_UNWIND()` towards the fuse-bridge.
+
+write call is completed in application's percepective. after
+`STACK_UNWIND()`ing towards the fuse-bridge, write-behind initiates a fresh
+writev() call to its child translator, whose replies will be consumed by
+write-behind itself. Write-behind _doesn't_ cache the write buffer, unless
+`option flush-behind on` is specified in volume specification file.
+
+Windowing
+---------
+
+With respect to write-behind, each write-buffer has three flags: `stack_wound`, `write_behind` and `got_reply`.
+
+* `stack_wound`: if set, indicates that write-behind has initiated `STACK_WIND()` towards child translator.
+* `write_behind`: if set, indicates that write-behind has done `STACK_UNWIND()` towards fuse-bridge.
+* `got_reply`: if set, indicates that write-behind has received reply from child translator for a `writev()` `STACK_WIND()`. a request will be destroyed by write-behind only if this flag is set.
+
+Currently pending write requests = aggregate size of requests with write_behind = 1 and got_reply = 0.
+
+window size limits the aggregate size of currently pending write requests. once
+the pending requests' size has reached the window size, write-behind blocks
+writev() calls from fuse-bridge. Blocking is only from application's
+perspective. Write-behind does `STACK_WIND()` to child translator
+straight-away, but hold behind the `STACK_UNWIND()` towards fuse-bridge.
+`STACK_UNWIND()` is done only once write-behind gets enough replies to
+accomodate for currently blocked request.
+
+Flush behind
+------------
+
+If `option flush-behind on` is specified in volume specification file, then
+write-behind sends aggregate write requests to child translator, instead of
+regular per request `STACK_WIND()`s.
diff --git a/doc/hacker-guide/lock-ahead.txt b/doc/hacker-guide/lock-ahead.txt
deleted file mode 100644
index 46232610c..000000000
--- a/doc/hacker-guide/lock-ahead.txt
+++ /dev/null
@@ -1,80 +0,0 @@
- Lock-ahead translator
- ---------------------
-
-The objective of the lock-ahead translator is to speculatively
-hold locks (inodelk and entrylk) on the universal set (0 - infinity
-in case of inodelk and all basenames in case of entrylk) even
-when a lock is requested only on a subset, in anticipation that
-further locks will be requested within the same universal set.
-
-So, for example, when cluster/replicate locks a region before
-writing to it, lock-ahead would instead lock the entire file.
-On further writes, lock-ahead can immediately return success for
-the lock requests, since the entire file has been previously locked.
-
-To avoid starvation of other clients/mountpoints, we employ a
-notify mechanism, described below.
-
-typedef struct {
- struct list_head subset_locks;
-} la_universal_lock_t;
-
-Universal lock structure is stored in the inode context.
-
-typedef struct {
- enum {LOCK_AHEAD_ENTRYLK, LOCK_AHEAD_FENTRYLK,
- LOCK_AHEAD_INODELK, LOCK_AHEAD_FINODELK};
-
- union {
- fd_t *fd;
- loc_t loc;
- };
-
- off_t l_start;
- off_t l_len;
-
- const char *basename;
-
- struct list_head universal_lock;
-} la_subset_lock_t;
-
-
-fops implemented:
-
-* inodelk/finodelk/entrylk/fentrylk:
-
-lock:
- if universal lock held:
- add subset to it (save loc_t or fd) and return success
- else:
- send lock-notify fop
- hold universal lock and return
- (set inode context, add subset to it, save loc_t or fd)
-
- if this fails:
- forward the lock request
-
-unlock:
- if subset exists in universal lock:
- delete subset lock from list
- else:
- forward it
-
-* release:
- hold subset locks (each subset lock using the saved loc_t or fd)
- and release universal lock
-
-* lock-notify (on unwind) (new fop)
- hold subset locks and release universal lock
-
-
-lock-notify in locks translator:
-
-if a subset lock in entrylk/inodelk cannot be satisfied
-because of a universal lock held by someone else:
- unwind the lock-notify fop
-
-==============================================
-$ Last updated: Tue Feb 17 11:31:18 IST 2009 $
-$ Author: Vikas Gorur <vikas@zresearch.com> $
-==============================================
diff --git a/doc/hacker-guide/posix.txt b/doc/hacker-guide/posix.txt
deleted file mode 100644
index d0132abfe..000000000
--- a/doc/hacker-guide/posix.txt
+++ /dev/null
@@ -1,59 +0,0 @@
----------------
-* storage/posix
----------------
-
-- SET_FS_ID
-
- This is so that all filesystem checks are done with the user's
- uid/gid and not GlusterFS's uid/gid.
-
-- MAKE_REAL_PATH
-
- This macro concatenates the base directory of the posix volume
- ('option directory') with the given path.
-
-- need_xattr in lookup
-
- If this flag is passed, lookup returns a xattr dictionary that contains
- the file's create time, the file's contents, and the version number
- of the file.
-
- This is a hack to increase small file performance. If an application
- wants to read a small file, it can finish its job with just a lookup
- call instead of a lookup followed by read.
-
-- getdents/setdents
-
- These are used by unify to set and get directory entries.
-
-- ALIGN_BUF
-
- Macro to align an address to a page boundary (4K).
-
-- priv->export_statfs
-
- In some cases, two exported volumes may reside on the same
- partition on the server. Sending statvfs info for both
- the volumes will lead to erroneous df output at the client,
- since free space on the partition will be counted twice.
-
- In such cases, user can disable exporting statvfs info
- on one of the volumes by setting this option.
-
-- xattrop
-
- This fop is used by replicate to set version numbers on files.
-
-- getxattr/setxattr hack to read/write files
-
- A key, GLUSTERFS_FILE_CONTENT_STRING, is handled in a special way by
- getxattr/setxattr. A getxattr with the key will return the entire
- content of the file as the value. A setxattr with the key will write
- the value as the entire content of the file.
-
-- posix_checksum
-
- This calculates a simple XOR checksum on all entry names in a
- directory that is used by unify to compare directory contents.
-
-
diff --git a/doc/hacker-guide/write-behind.txt b/doc/hacker-guide/write-behind.txt
deleted file mode 100644
index 498e95480..000000000
--- a/doc/hacker-guide/write-behind.txt
+++ /dev/null
@@ -1,45 +0,0 @@
-basic working
---------------
-
- write behind is basically a translator to lie to the application that the write-requests are finished, even before it is actually finished.
-
- on a regular translator tree without write-behind, control flow is like this:
-
- 1. application makes a write() system call.
- 2. VFS ==> FUSE ==> /dev/fuse.
- 3. fuse-bridge initiates a glusterfs writev() call.
- 4. writev() is STACK_WIND()ed upto client-protocol or storage translator.
- 5. client-protocol, on recieving reply from server, starts STACK_UNWIND() towards the fuse-bridge.
-
- on a translator tree with write-behind, control flow is like this:
-
- 1. application makes a write() system call.
- 2. VFS ==> FUSE ==> /dev/fuse.
- 3. fuse-bridge initiates a glusterfs writev() call.
- 4. writev() is STACK_WIND()ed upto write-behind translator.
- 5. write-behind adds the write buffer to its internal queue and does a STACK_UNWIND() towards the fuse-bridge.
-
- write call is completed in application's percepective. after STACK_UNWIND()ing towards the fuse-bridge, write-behind initiates a fresh writev() call to its child translator, whose replies will be consumed by write-behind itself. write-behind _doesn't_ cache the write buffer, unless 'option flush-behind on' is specified in volume specification file.
-
-windowing
----------
-
- write respect to write-behind, each write-buffer has three flags: 'stack_wound', 'write_behind' and 'got_reply'.
-
- stack_wound: if set, indicates that write-behind has initiated STACK_WIND() towards child translator.
-
- write_behind: if set, indicates that write-behind has done STACK_UNWIND() towards fuse-bridge.
-
- got_reply: if set, indicates that write-behind has recieved reply from child translator for a writev() STACK_WIND(). a request will be destroyed by write-behind only if this flag is set.
-
- currently pending write requests = aggregate size of requests with write_behind = 1 and got_reply = 0.
-
- window size limits the aggregate size of currently pending write requests. once the pending requests' size has reached the window size, write-behind blocks writev() calls from fuse-bridge.
- blocking is only from application's perspective. write-behind does STACK_WIND() to child translator straight-away, but hold behind the STACK_UNWIND() towards fuse-bridge. STACK_UNWIND() is done only once write-behind gets enough replies to accomodate for currently blocked request.
-
-flush behind
-------------
-
- if 'option flush-behind on' is specified in volume specification file, then write-behind sends aggregate write requests to child translator, instead of regular per request STACK_WIND()s.
-
-
diff --git a/doc/user-guide/Makefile.am b/doc/legacy/Makefile.am
index 800e7321d..b2caabaa2 100644
--- a/doc/user-guide/Makefile.am
+++ b/doc/legacy/Makefile.am
@@ -1,3 +1,3 @@
info_TEXINFOS = user-guide.texi
-CLEANFILES = *~
+CLEANFILES = *~
DISTCLEANFILES = .deps/*.P *.info *vti
diff --git a/doc/user-guide/advanced-stripe.odg b/doc/legacy/advanced-stripe.odg
index 7686d7091..7686d7091 100644
--- a/doc/user-guide/advanced-stripe.odg
+++ b/doc/legacy/advanced-stripe.odg
Binary files differ
diff --git a/doc/user-guide/advanced-stripe.pdf b/doc/legacy/advanced-stripe.pdf
index ec8b03dcf..ec8b03dcf 100644
--- a/doc/user-guide/advanced-stripe.pdf
+++ b/doc/legacy/advanced-stripe.pdf
Binary files differ
diff --git a/doc/booster.txt b/doc/legacy/booster.txt
index 684ac8965..051401a28 100644
--- a/doc/booster.txt
+++ b/doc/legacy/booster.txt
@@ -1,6 +1,6 @@
Introduction
============
-* booster is a LD_PRELOADable library which boosts read/write performance by bypassing fuse for
+* booster is a LD_PRELOADable library which boosts read/write performance by bypassing fuse for
read() and write() calls.
Requirements
@@ -14,29 +14,29 @@ Design
* contents of client volume-file.
* mount point.
-* LD_PRELOADed booster.so maintains an hash table storing mount-points and libglusterfsclient handles
- so that handles are reused for files from same mount point.
+* LD_PRELOADed booster.so maintains an hash table storing mount-points and libglusterfsclient handles
+ so that handles are reused for files from same mount point.
* it also maintains a fdtable. fdtable maps the fd (integer) returned to application to fd (pointer to fd struct)
used by libglusterfsclient. application is returned the same fd as the one returned from libc apis.
* During fork, these tables are overwritten to enable creation of fresh glusterfs context in child.
-
+
Working
=======
-* application willing to use booster LD_PRELOADs booster.so which is a wrapper library implementing
+* application willing to use booster LD_PRELOADs booster.so which is a wrapper library implementing
open, read and write.
-* application should specify the path to logfile through the environment variable GLFS_BOOSTER_LOGFILE. If
+* application should specify the path to logfile through the environment variable GLFS_BOOSTER_LOGFILE. If
not specified, logging is done to /dev/stderr.
* open call does,
* real_open on the file.
* fgetxattr(fd).
- * store the volume-file content got in the dictionary to a temparory file.
- * look in the hashtable for the mount-point, if already present get the libglusterfsclient handle from the
- hashtable. Otherwise get a new handle from libglusterfsclient (be careful about mount point not present in
- the hashtable and multiple glusterfs_inits running simultaneously for the same mount-point there by using
+ * store the volume-file content got in the dictionary to a temporary file.
+ * look in the hashtable for the mount-point, if already present get the libglusterfsclient handle from the
+ hashtable. Otherwise get a new handle from libglusterfsclient (be careful about mount point not present in
+ the hashtable and multiple glusterfs_inits running simultaneously for the same mount-point there by using
multiple handles for the same mount point).
* real_close (fd).
* delete temporary volume-volfile.
@@ -51,4 +51,4 @@ Working
* close call does,
* remove the fd from the fdtable.
-* other calls use real_calls.
+* other calls use real_calls.
diff --git a/doc/user-guide/colonO-icon.jpg b/doc/legacy/colonO-icon.jpg
index 3e66f7a27..3e66f7a27 100644
--- a/doc/user-guide/colonO-icon.jpg
+++ b/doc/legacy/colonO-icon.jpg
Binary files differ
diff --git a/doc/errno.list.bsd.txt b/doc/legacy/errno.list.bsd.txt
index 350af25e4..350af25e4 100644
--- a/doc/errno.list.bsd.txt
+++ b/doc/legacy/errno.list.bsd.txt
diff --git a/doc/errno.list.linux.txt b/doc/legacy/errno.list.linux.txt
index baa50792d..cc868644b 100644
--- a/doc/errno.list.linux.txt
+++ b/doc/legacy/errno.list.linux.txt
@@ -95,7 +95,7 @@ extern "C" {
/**
* @defgroup apr_errno Error Codes
- * @ingroup APR
+ * @ingroup APR
* @{
*/
@@ -110,7 +110,7 @@ typedef int apr_status_t;
* @param buf A buffer to hold the error string.
* @param bufsize Size of the buffer to hold the string.
*/
-APR_DECLARE(char *) apr_strerror(apr_status_t statcode, char *buf,
+APR_DECLARE(char *) apr_strerror(apr_status_t statcode, char *buf,
apr_size_t bufsize);
#if defined(DOXYGEN)
@@ -130,7 +130,7 @@ APR_DECLARE(char *) apr_strerror(apr_status_t statcode, char *buf,
* Fold an apr_status_t code back to the native platform defined error.
* @param e The apr_status_t folded platform os error code.
* @warning macro implementation; the statcode argument may be evaluated
- * multiple times. If the statcode was not created by apr_get_os_error
+ * multiple times. If the statcode was not created by apr_get_os_error
* or APR_FROM_OS_ERROR, the results are undefined.
*/
#define APR_TO_OS_ERROR(e) (e == 0 ? APR_SUCCESS : e - APR_OS_START_SYSERR)
@@ -166,7 +166,7 @@ APR_DECLARE(char *) apr_strerror(apr_status_t statcode, char *buf,
* @warning This is a macro implementation; the statcode argument may be evaluated
* multiple times. If the statcode was not created by apr_get_os_error
* or APR_FROM_OS_ERROR, the results are undefined. This macro sets
- * errno, or calls a WSASetLastError() style function, unfolding
+ * errno, or calls a WSASetLastError() style function, unfolding
* socketcode with APR_TO_OS_ERROR.
*/
@@ -206,12 +206,12 @@ APR_DECLARE(char *) apr_strerror(apr_status_t statcode, char *buf,
#define APR_OS_START_CANONERR (APR_OS_START_USERERR \
+ (APR_OS_ERRSPACE_SIZE * 10))
/**
- * APR_OS_START_EAIERR folds EAI_ error codes from getaddrinfo() into
+ * APR_OS_START_EAIERR folds EAI_ error codes from getaddrinfo() into
* apr_status_t values.
*/
#define APR_OS_START_EAIERR (APR_OS_START_CANONERR + APR_OS_ERRSPACE_SIZE)
/**
- * APR_OS_START_SYSERR folds platform-specific system error values into
+ * APR_OS_START_SYSERR folds platform-specific system error values into
* apr_status_t values.
*/
#define APR_OS_START_SYSERR (APR_OS_START_EAIERR + APR_OS_ERRSPACE_SIZE)
@@ -219,13 +219,13 @@ APR_DECLARE(char *) apr_strerror(apr_status_t statcode, char *buf,
/** no error. */
#define APR_SUCCESS 0
-/**
+/**
* @defgroup APR_Error APR Error Values
* <PRE>
* <b>APR ERROR VALUES</b>
- * APR_ENOSTAT APR was unable to perform a stat on the file
+ * APR_ENOSTAT APR was unable to perform a stat on the file
* APR_ENOPOOL APR was not provided a pool with which to allocate memory
- * APR_EBADDATE APR was given an invalid date
+ * APR_EBADDATE APR was given an invalid date
* APR_EINVALSOCK APR was given an invalid socket
* APR_ENOPROC APR was not given a process structure
* APR_ENOTIME APR was not given a time structure
@@ -236,7 +236,7 @@ APR_DECLARE(char *) apr_strerror(apr_status_t statcode, char *buf,
* APR_ENOTHREAD APR was not given a thread structure
* APR_ENOTHDKEY APR was not given a thread key structure
* APR_ENOSHMAVAIL There is no more shared memory available
- * APR_EDSOOPEN APR was unable to open the dso object. For more
+ * APR_EDSOOPEN APR was unable to open the dso object. For more
* information call apr_dso_error().
* APR_EGENERAL General failure (specific information not available)
* APR_EBADIP The specified IP address is invalid
@@ -256,17 +256,17 @@ APR_DECLARE(char *) apr_strerror(apr_status_t statcode, char *buf,
* APR_INCOMPLETE The operation was incomplete although some processing
* was performed and the results are partially valid
* APR_BADCH Getopt found an option not in the option string
- * APR_BADARG Getopt found an option that is missing an argument
+ * APR_BADARG Getopt found an option that is missing an argument
* and an argument was specified in the option string
* APR_EOF APR has encountered the end of the file
* APR_NOTFOUND APR was unable to find the socket in the poll structure
* APR_ANONYMOUS APR is using anonymous shared memory
* APR_FILEBASED APR is using a file name as the key to the shared memory
* APR_KEYBASED APR is using a shared key as the key to the shared memory
- * APR_EINIT Ininitalizer value. If no option has been found, but
+ * APR_EINIT Ininitalizer value. If no option has been found, but
* the status variable requires a value, this should be used
- * APR_ENOTIMPL The APR function has not been implemented on this
- * platform, either because nobody has gotten to it yet,
+ * APR_ENOTIMPL The APR function has not been implemented on this
+ * platform, either because nobody has gotten to it yet,
* or the function is impossible on this platform.
* APR_EMISMATCH Two passwords do not match.
* APR_EABSOLUTE The given path was absolute.
@@ -334,7 +334,7 @@ APR_DECLARE(char *) apr_strerror(apr_status_t statcode, char *buf,
#define APR_ENOTENOUGHENTROPY (APR_OS_START_ERROR + 28)
/** @} */
-/**
+/**
* @defgroup APR_STATUS_IS Status Value Tests
* @warning For any particular error condition, more than one of these tests
* may match. This is because platform-specific error codes may not
@@ -345,16 +345,16 @@ APR_DECLARE(char *) apr_strerror(apr_status_t statcode, char *buf,
* adjust the order of the tests accordingly.
* @{
*/
-/**
- * APR was unable to perform a stat on the file
+/**
+ * APR was unable to perform a stat on the file
* @warning always use this test, as platform-specific variances may meet this
- * more than one error code
+ * more than one error code
*/
#define APR_STATUS_IS_ENOSTAT(s) ((s) == APR_ENOSTAT)
-/**
- * APR was not provided a pool with which to allocate memory
+/**
+ * APR was not provided a pool with which to allocate memory
* @warning always use this test, as platform-specific variances may meet this
- * more than one error code
+ * more than one error code
*/
#define APR_STATUS_IS_ENOPOOL(s) ((s) == APR_ENOPOOL)
/** APR was given an invalid date */
@@ -386,8 +386,8 @@ APR_DECLARE(char *) apr_strerror(apr_status_t statcode, char *buf,
/** The specified netmask is invalid */
#define APR_STATUS_IS_EBADMASK(s) ((s) == APR_EBADMASK)
/* empty slot: +18 */
-/**
- * APR was unable to open the dso object.
+/**
+ * APR was unable to open the dso object.
* For more information call apr_dso_error().
*/
#if defined(WIN32)
@@ -425,7 +425,7 @@ APR_DECLARE(char *) apr_strerror(apr_status_t statcode, char *buf,
/** @} */
-/**
+/**
* @addtogroup APR_Error
* @{
*/
@@ -466,7 +466,7 @@ APR_DECLARE(char *) apr_strerror(apr_status_t statcode, char *buf,
/** @see APR_STATUS_IS_KEYBASED */
#define APR_KEYBASED (APR_OS_START_STATUS + 21)
/** @see APR_STATUS_IS_EINIT */
-#define APR_EINIT (APR_OS_START_STATUS + 22)
+#define APR_EINIT (APR_OS_START_STATUS + 22)
/** @see APR_STATUS_IS_ENOTIMPL */
#define APR_ENOTIMPL (APR_OS_START_STATUS + 23)
/** @see APR_STATUS_IS_EMISMATCH */
@@ -475,156 +475,156 @@ APR_DECLARE(char *) apr_strerror(apr_status_t statcode, char *buf,
#define APR_EBUSY (APR_OS_START_STATUS + 25)
/** @} */
-/**
+/**
* @addtogroup APR_STATUS_IS
* @{
*/
-/**
- * Program is currently executing in the child
+/**
+ * Program is currently executing in the child
* @warning
* always use this test, as platform-specific variances may meet this
* more than one error code */
#define APR_STATUS_IS_INCHILD(s) ((s) == APR_INCHILD)
-/**
- * Program is currently executing in the parent
+/**
+ * Program is currently executing in the parent
* @warning
* always use this test, as platform-specific variances may meet this
- * more than one error code
+ * more than one error code
*/
#define APR_STATUS_IS_INPARENT(s) ((s) == APR_INPARENT)
-/**
- * The thread is detached
+/**
+ * The thread is detached
* @warning
* always use this test, as platform-specific variances may meet this
- * more than one error code
+ * more than one error code
*/
#define APR_STATUS_IS_DETACH(s) ((s) == APR_DETACH)
-/**
- * The thread is not detached
+/**
+ * The thread is not detached
* @warning
* always use this test, as platform-specific variances may meet this
- * more than one error code
+ * more than one error code
*/
#define APR_STATUS_IS_NOTDETACH(s) ((s) == APR_NOTDETACH)
-/**
+/**
* The child has finished executing
* @warning
* always use this test, as platform-specific variances may meet this
- * more than one error code
+ * more than one error code
*/
#define APR_STATUS_IS_CHILD_DONE(s) ((s) == APR_CHILD_DONE)
-/**
+/**
* The child has not finished executing
* @warning
* always use this test, as platform-specific variances may meet this
- * more than one error code
+ * more than one error code
*/
#define APR_STATUS_IS_CHILD_NOTDONE(s) ((s) == APR_CHILD_NOTDONE)
-/**
+/**
* The operation did not finish before the timeout
* @warning
* always use this test, as platform-specific variances may meet this
- * more than one error code
+ * more than one error code
*/
#define APR_STATUS_IS_TIMEUP(s) ((s) == APR_TIMEUP)
-/**
+/**
* The operation was incomplete although some processing was performed
* and the results are partially valid.
* @warning
* always use this test, as platform-specific variances may meet this
- * more than one error code
+ * more than one error code
*/
#define APR_STATUS_IS_INCOMPLETE(s) ((s) == APR_INCOMPLETE)
/* empty slot: +9 */
/* empty slot: +10 */
/* empty slot: +11 */
-/**
+/**
* Getopt found an option not in the option string
* @warning
* always use this test, as platform-specific variances may meet this
- * more than one error code
+ * more than one error code
*/
#define APR_STATUS_IS_BADCH(s) ((s) == APR_BADCH)
-/**
- * Getopt found an option not in the option string and an argument was
+/**
+ * Getopt found an option not in the option string and an argument was
* specified in the option string
* @warning
* always use this test, as platform-specific variances may meet this
- * more than one error code
+ * more than one error code
*/
#define APR_STATUS_IS_BADARG(s) ((s) == APR_BADARG)
-/**
+/**
* APR has encountered the end of the file
* @warning
* always use this test, as platform-specific variances may meet this
- * more than one error code
+ * more than one error code
*/
#define APR_STATUS_IS_EOF(s) ((s) == APR_EOF)
-/**
+/**
* APR was unable to find the socket in the poll structure
* @warning
* always use this test, as platform-specific variances may meet this
- * more than one error code
+ * more than one error code
*/
#define APR_STATUS_IS_NOTFOUND(s) ((s) == APR_NOTFOUND)
/* empty slot: +16 */
/* empty slot: +17 */
/* empty slot: +18 */
-/**
+/**
* APR is using anonymous shared memory
* @warning
* always use this test, as platform-specific variances may meet this
- * more than one error code
+ * more than one error code
*/
#define APR_STATUS_IS_ANONYMOUS(s) ((s) == APR_ANONYMOUS)
-/**
+/**
* APR is using a file name as the key to the shared memory
* @warning
* always use this test, as platform-specific variances may meet this
- * more than one error code
+ * more than one error code
*/
#define APR_STATUS_IS_FILEBASED(s) ((s) == APR_FILEBASED)
-/**
+/**
* APR is using a shared key as the key to the shared memory
* @warning
* always use this test, as platform-specific variances may meet this
- * more than one error code
+ * more than one error code
*/
#define APR_STATUS_IS_KEYBASED(s) ((s) == APR_KEYBASED)
-/**
- * Ininitalizer value. If no option has been found, but
+/**
+ * Ininitalizer value. If no option has been found, but
* the status variable requires a value, this should be used
* @warning
* always use this test, as platform-specific variances may meet this
- * more than one error code
+ * more than one error code
*/
#define APR_STATUS_IS_EINIT(s) ((s) == APR_EINIT)
-/**
- * The APR function has not been implemented on this
- * platform, either because nobody has gotten to it yet,
+/**
+ * The APR function has not been implemented on this
+ * platform, either because nobody has gotten to it yet,
* or the function is impossible on this platform.
* @warning
* always use this test, as platform-specific variances may meet this
- * more than one error code
+ * more than one error code
*/
#define APR_STATUS_IS_ENOTIMPL(s) ((s) == APR_ENOTIMPL)
-/**
+/**
* Two passwords do not match.
* @warning
* always use this test, as platform-specific variances may meet this
- * more than one error code
+ * more than one error code
*/
#define APR_STATUS_IS_EMISMATCH(s) ((s) == APR_EMISMATCH)
-/**
+/**
* The given lock was busy
* @warning always use this test, as platform-specific variances may meet this
- * more than one error code
+ * more than one error code
*/
#define APR_STATUS_IS_EBUSY(s) ((s) == APR_EBUSY)
/** @} */
-/**
+/**
* @addtogroup APR_Error APR Error Values
* @{
*/
@@ -713,8 +713,8 @@ APR_DECLARE(char *) apr_strerror(apr_status_t statcode, char *buf,
#define APR_ESPIPE (APR_OS_START_CANONERR + 12)
#endif
-/**
- * @see APR_STATUS_IS_EAGAIN
+/**
+ * @see APR_STATUS_IS_EAGAIN
* @warning use APR_STATUS_IS_EAGAIN instead of just testing this value
*/
#ifdef EAGAIN
@@ -753,7 +753,7 @@ APR_DECLARE(char *) apr_strerror(apr_status_t statcode, char *buf,
#define APR_EINPROGRESS (APR_OS_START_CANONERR + 17)
#endif
-/**
+/**
* @see APR_STATUS_IS_ECONNABORTED
* @warning use APR_STATUS_IS_ECONNABORTED instead of just testing this value
*/
@@ -771,7 +771,7 @@ APR_DECLARE(char *) apr_strerror(apr_status_t statcode, char *buf,
#define APR_ECONNRESET (APR_OS_START_CANONERR + 19)
#endif
-/** @see APR_STATUS_IS_ETIMEDOUT
+/** @see APR_STATUS_IS_ETIMEDOUT
* @deprecated */
#ifdef ETIMEDOUT
#define APR_ETIMEDOUT ETIMEDOUT
@@ -849,7 +849,7 @@ APR_DECLARE(char *) apr_strerror(apr_status_t statcode, char *buf,
*/
#define APR_OS2_STATUS(e) (APR_FROM_OS_ERROR(e))
-/* These can't sit in a private header, so in spite of the extra size,
+/* These can't sit in a private header, so in spite of the extra size,
* they need to be made available here.
*/
#define SOCBASEERR 10000
@@ -946,10 +946,10 @@ APR_DECLARE(char *) apr_strerror(apr_status_t statcode, char *buf,
|| (s) == APR_OS_START_SYSERR + SOCECONNRESET)
/* XXX deprecated */
#define APR_STATUS_IS_ETIMEDOUT(s) ((s) == APR_ETIMEDOUT \
- || (s) == APR_OS_START_SYSERR + SOCETIMEDOUT)
+ || (s) == APR_OS_START_SYSERR + SOCETIMEDOUT)
#undef APR_STATUS_IS_TIMEUP
#define APR_STATUS_IS_TIMEUP(s) ((s) == APR_TIMEUP \
- || (s) == APR_OS_START_SYSERR + SOCETIMEDOUT)
+ || (s) == APR_OS_START_SYSERR + SOCETIMEDOUT)
#define APR_STATUS_IS_EHOSTUNREACH(s) ((s) == APR_EHOSTUNREACH \
|| (s) == APR_OS_START_SYSERR + SOCEHOSTUNREACH)
#define APR_STATUS_IS_ENETUNREACH(s) ((s) == APR_ENETUNREACH \
@@ -1182,7 +1182,7 @@ APR_DECLARE(char *) apr_strerror(apr_status_t statcode, char *buf,
#define apr_get_netos_error() (errno)
#define apr_set_netos_error(e) (errno = (e))
-/**
+/**
* @addtogroup APR_STATUS_IS
* @{
*/
@@ -1246,15 +1246,15 @@ APR_DECLARE(char *) apr_strerror(apr_status_t statcode, char *buf,
/** operation now in progress */
#define APR_STATUS_IS_EINPROGRESS(s) ((s) == APR_EINPROGRESS)
-/**
- * Software caused connection abort
+/**
+ * Software caused connection abort
* @remark
- * EPROTO on certain older kernels really means ECONNABORTED, so we need to
+ * EPROTO on certain older kernels really means ECONNABORTED, so we need to
* ignore it for them. See discussion in new-httpd archives nh.9701 & nh.9603
*
- * There is potentially a bug in Solaris 2.x x<6, and other boxes that
+ * There is potentially a bug in Solaris 2.x x<6, and other boxes that
* implement tcp sockets in userland (i.e. on top of STREAMS). On these
- * systems, EPROTO can actually result in a fatal loop. See PR#981 for
+ * systems, EPROTO can actually result in a fatal loop. See PR#981 for
* example. It's hard to handle both uses of EPROTO.
*/
#ifdef EPROTO
diff --git a/doc/errno.list.macosx.txt b/doc/legacy/errno.list.macosx.txt
index 728753ac7..4954e03d8 100644
--- a/doc/errno.list.macosx.txt
+++ b/doc/legacy/errno.list.macosx.txt
@@ -34,7 +34,7 @@ extern "C" {
/**
* @defgroup apr_errno Error Codes
- * @ingroup APR
+ * @ingroup APR
* @{
*/
@@ -49,7 +49,7 @@ typedef int apr_status_t;
* @param buf A buffer to hold the error string.
* @param bufsize Size of the buffer to hold the string.
*/
-APR_DECLARE(char *) apr_strerror(apr_status_t statcode, char *buf,
+APR_DECLARE(char *) apr_strerror(apr_status_t statcode, char *buf,
apr_size_t bufsize);
#if defined(DOXYGEN)
@@ -69,7 +69,7 @@ APR_DECLARE(char *) apr_strerror(apr_status_t statcode, char *buf,
* Fold an apr_status_t code back to the native platform defined error.
* @param e The apr_status_t folded platform os error code.
* @warning macro implementation; the statcode argument may be evaluated
- * multiple times. If the statcode was not created by apr_get_os_error
+ * multiple times. If the statcode was not created by apr_get_os_error
* or APR_FROM_OS_ERROR, the results are undefined.
*/
#define APR_TO_OS_ERROR(e) (e == 0 ? APR_SUCCESS : e - APR_OS_START_SYSERR)
@@ -105,7 +105,7 @@ APR_DECLARE(char *) apr_strerror(apr_status_t statcode, char *buf,
* @warning This is a macro implementation; the statcode argument may be evaluated
* multiple times. If the statcode was not created by apr_get_os_error
* or APR_FROM_OS_ERROR, the results are undefined. This macro sets
- * errno, or calls a WSASetLastError() style function, unfolding
+ * errno, or calls a WSASetLastError() style function, unfolding
* socketcode with APR_TO_OS_ERROR.
*/
@@ -145,12 +145,12 @@ APR_DECLARE(char *) apr_strerror(apr_status_t statcode, char *buf,
#define APR_OS_START_CANONERR (APR_OS_START_USERERR \
+ (APR_OS_ERRSPACE_SIZE * 10))
/**
- * APR_OS_START_EAIERR folds EAI_ error codes from getaddrinfo() into
+ * APR_OS_START_EAIERR folds EAI_ error codes from getaddrinfo() into
* apr_status_t values.
*/
#define APR_OS_START_EAIERR (APR_OS_START_CANONERR + APR_OS_ERRSPACE_SIZE)
/**
- * APR_OS_START_SYSERR folds platform-specific system error values into
+ * APR_OS_START_SYSERR folds platform-specific system error values into
* apr_status_t values.
*/
#define APR_OS_START_SYSERR (APR_OS_START_EAIERR + APR_OS_ERRSPACE_SIZE)
@@ -158,13 +158,13 @@ APR_DECLARE(char *) apr_strerror(apr_status_t statcode, char *buf,
/** no error. */
#define APR_SUCCESS 0
-/**
+/**
* @defgroup APR_Error APR Error Values
* <PRE>
* <b>APR ERROR VALUES</b>
- * APR_ENOSTAT APR was unable to perform a stat on the file
+ * APR_ENOSTAT APR was unable to perform a stat on the file
* APR_ENOPOOL APR was not provided a pool with which to allocate memory
- * APR_EBADDATE APR was given an invalid date
+ * APR_EBADDATE APR was given an invalid date
* APR_EINVALSOCK APR was given an invalid socket
* APR_ENOPROC APR was not given a process structure
* APR_ENOTIME APR was not given a time structure
@@ -175,7 +175,7 @@ APR_DECLARE(char *) apr_strerror(apr_status_t statcode, char *buf,
* APR_ENOTHREAD APR was not given a thread structure
* APR_ENOTHDKEY APR was not given a thread key structure
* APR_ENOSHMAVAIL There is no more shared memory available
- * APR_EDSOOPEN APR was unable to open the dso object. For more
+ * APR_EDSOOPEN APR was unable to open the dso object. For more
* information call apr_dso_error().
* APR_EGENERAL General failure (specific information not available)
* APR_EBADIP The specified IP address is invalid
@@ -195,17 +195,17 @@ APR_DECLARE(char *) apr_strerror(apr_status_t statcode, char *buf,
* APR_INCOMPLETE The operation was incomplete although some processing
* was performed and the results are partially valid
* APR_BADCH Getopt found an option not in the option string
- * APR_BADARG Getopt found an option that is missing an argument
+ * APR_BADARG Getopt found an option that is missing an argument
* and an argument was specified in the option string
* APR_EOF APR has encountered the end of the file
* APR_NOTFOUND APR was unable to find the socket in the poll structure
* APR_ANONYMOUS APR is using anonymous shared memory
* APR_FILEBASED APR is using a file name as the key to the shared memory
* APR_KEYBASED APR is using a shared key as the key to the shared memory
- * APR_EINIT Ininitalizer value. If no option has been found, but
+ * APR_EINIT Ininitalizer value. If no option has been found, but
* the status variable requires a value, this should be used
- * APR_ENOTIMPL The APR function has not been implemented on this
- * platform, either because nobody has gotten to it yet,
+ * APR_ENOTIMPL The APR function has not been implemented on this
+ * platform, either because nobody has gotten to it yet,
* or the function is impossible on this platform.
* APR_EMISMATCH Two passwords do not match.
* APR_EABSOLUTE The given path was absolute.
@@ -273,7 +273,7 @@ APR_DECLARE(char *) apr_strerror(apr_status_t statcode, char *buf,
#define APR_ENOTENOUGHENTROPY (APR_OS_START_ERROR + 28)
/** @} */
-/**
+/**
* @defgroup APR_STATUS_IS Status Value Tests
* @warning For any particular error condition, more than one of these tests
* may match. This is because platform-specific error codes may not
@@ -284,16 +284,16 @@ APR_DECLARE(char *) apr_strerror(apr_status_t statcode, char *buf,
* adjust the order of the tests accordingly.
* @{
*/
-/**
- * APR was unable to perform a stat on the file
+/**
+ * APR was unable to perform a stat on the file
* @warning always use this test, as platform-specific variances may meet this
- * more than one error code
+ * more than one error code
*/
#define APR_STATUS_IS_ENOSTAT(s) ((s) == APR_ENOSTAT)
-/**
- * APR was not provided a pool with which to allocate memory
+/**
+ * APR was not provided a pool with which to allocate memory
* @warning always use this test, as platform-specific variances may meet this
- * more than one error code
+ * more than one error code
*/
#define APR_STATUS_IS_ENOPOOL(s) ((s) == APR_ENOPOOL)
/** APR was given an invalid date */
@@ -325,8 +325,8 @@ APR_DECLARE(char *) apr_strerror(apr_status_t statcode, char *buf,
/** The specified netmask is invalid */
#define APR_STATUS_IS_EBADMASK(s) ((s) == APR_EBADMASK)
/* empty slot: +18 */
-/**
- * APR was unable to open the dso object.
+/**
+ * APR was unable to open the dso object.
* For more information call apr_dso_error().
*/
#if defined(WIN32)
@@ -364,7 +364,7 @@ APR_DECLARE(char *) apr_strerror(apr_status_t statcode, char *buf,
/** @} */
-/**
+/**
* @addtogroup APR_Error
* @{
*/
@@ -405,7 +405,7 @@ APR_DECLARE(char *) apr_strerror(apr_status_t statcode, char *buf,
/** @see APR_STATUS_IS_KEYBASED */
#define APR_KEYBASED (APR_OS_START_STATUS + 21)
/** @see APR_STATUS_IS_EINIT */
-#define APR_EINIT (APR_OS_START_STATUS + 22)
+#define APR_EINIT (APR_OS_START_STATUS + 22)
/** @see APR_STATUS_IS_ENOTIMPL */
#define APR_ENOTIMPL (APR_OS_START_STATUS + 23)
/** @see APR_STATUS_IS_EMISMATCH */
@@ -414,156 +414,156 @@ APR_DECLARE(char *) apr_strerror(apr_status_t statcode, char *buf,
#define APR_EBUSY (APR_OS_START_STATUS + 25)
/** @} */
-/**
+/**
* @addtogroup APR_STATUS_IS
* @{
*/
-/**
- * Program is currently executing in the child
+/**
+ * Program is currently executing in the child
* @warning
* always use this test, as platform-specific variances may meet this
* more than one error code */
#define APR_STATUS_IS_INCHILD(s) ((s) == APR_INCHILD)
-/**
- * Program is currently executing in the parent
+/**
+ * Program is currently executing in the parent
* @warning
* always use this test, as platform-specific variances may meet this
- * more than one error code
+ * more than one error code
*/
#define APR_STATUS_IS_INPARENT(s) ((s) == APR_INPARENT)
-/**
- * The thread is detached
+/**
+ * The thread is detached
* @warning
* always use this test, as platform-specific variances may meet this
- * more than one error code
+ * more than one error code
*/
#define APR_STATUS_IS_DETACH(s) ((s) == APR_DETACH)
-/**
- * The thread is not detached
+/**
+ * The thread is not detached
* @warning
* always use this test, as platform-specific variances may meet this
- * more than one error code
+ * more than one error code
*/
#define APR_STATUS_IS_NOTDETACH(s) ((s) == APR_NOTDETACH)
-/**
+/**
* The child has finished executing
* @warning
* always use this test, as platform-specific variances may meet this
- * more than one error code
+ * more than one error code
*/
#define APR_STATUS_IS_CHILD_DONE(s) ((s) == APR_CHILD_DONE)
-/**
+/**
* The child has not finished executing
* @warning
* always use this test, as platform-specific variances may meet this
- * more than one error code
+ * more than one error code
*/
#define APR_STATUS_IS_CHILD_NOTDONE(s) ((s) == APR_CHILD_NOTDONE)
-/**
+/**
* The operation did not finish before the timeout
* @warning
* always use this test, as platform-specific variances may meet this
- * more than one error code
+ * more than one error code
*/
#define APR_STATUS_IS_TIMEUP(s) ((s) == APR_TIMEUP)
-/**
+/**
* The operation was incomplete although some processing was performed
* and the results are partially valid.
* @warning
* always use this test, as platform-specific variances may meet this
- * more than one error code
+ * more than one error code
*/
#define APR_STATUS_IS_INCOMPLETE(s) ((s) == APR_INCOMPLETE)
/* empty slot: +9 */
/* empty slot: +10 */
/* empty slot: +11 */
-/**
+/**
* Getopt found an option not in the option string
* @warning
* always use this test, as platform-specific variances may meet this
- * more than one error code
+ * more than one error code
*/
#define APR_STATUS_IS_BADCH(s) ((s) == APR_BADCH)
-/**
- * Getopt found an option not in the option string and an argument was
+/**
+ * Getopt found an option not in the option string and an argument was
* specified in the option string
* @warning
* always use this test, as platform-specific variances may meet this
- * more than one error code
+ * more than one error code
*/
#define APR_STATUS_IS_BADARG(s) ((s) == APR_BADARG)
-/**
+/**
* APR has encountered the end of the file
* @warning
* always use this test, as platform-specific variances may meet this
- * more than one error code
+ * more than one error code
*/
#define APR_STATUS_IS_EOF(s) ((s) == APR_EOF)
-/**
+/**
* APR was unable to find the socket in the poll structure
* @warning
* always use this test, as platform-specific variances may meet this
- * more than one error code
+ * more than one error code
*/
#define APR_STATUS_IS_NOTFOUND(s) ((s) == APR_NOTFOUND)
/* empty slot: +16 */
/* empty slot: +17 */
/* empty slot: +18 */
-/**
+/**
* APR is using anonymous shared memory
* @warning
* always use this test, as platform-specific variances may meet this
- * more than one error code
+ * more than one error code
*/
#define APR_STATUS_IS_ANONYMOUS(s) ((s) == APR_ANONYMOUS)
-/**
+/**
* APR is using a file name as the key to the shared memory
* @warning
* always use this test, as platform-specific variances may meet this
- * more than one error code
+ * more than one error code
*/
#define APR_STATUS_IS_FILEBASED(s) ((s) == APR_FILEBASED)
-/**
+/**
* APR is using a shared key as the key to the shared memory
* @warning
* always use this test, as platform-specific variances may meet this
- * more than one error code
+ * more than one error code
*/
#define APR_STATUS_IS_KEYBASED(s) ((s) == APR_KEYBASED)
-/**
- * Ininitalizer value. If no option has been found, but
+/**
+ * Ininitalizer value. If no option has been found, but
* the status variable requires a value, this should be used
* @warning
* always use this test, as platform-specific variances may meet this
- * more than one error code
+ * more than one error code
*/
#define APR_STATUS_IS_EINIT(s) ((s) == APR_EINIT)
-/**
- * The APR function has not been implemented on this
- * platform, either because nobody has gotten to it yet,
+/**
+ * The APR function has not been implemented on this
+ * platform, either because nobody has gotten to it yet,
* or the function is impossible on this platform.
* @warning
* always use this test, as platform-specific variances may meet this
- * more than one error code
+ * more than one error code
*/
#define APR_STATUS_IS_ENOTIMPL(s) ((s) == APR_ENOTIMPL)
-/**
+/**
* Two passwords do not match.
* @warning
* always use this test, as platform-specific variances may meet this
- * more than one error code
+ * more than one error code
*/
#define APR_STATUS_IS_EMISMATCH(s) ((s) == APR_EMISMATCH)
-/**
+/**
* The given lock was busy
* @warning always use this test, as platform-specific variances may meet this
- * more than one error code
+ * more than one error code
*/
#define APR_STATUS_IS_EBUSY(s) ((s) == APR_EBUSY)
/** @} */
-/**
+/**
* @addtogroup APR_Error APR Error Values
* @{
*/
@@ -652,8 +652,8 @@ APR_DECLARE(char *) apr_strerror(apr_status_t statcode, char *buf,
#define APR_ESPIPE (APR_OS_START_CANONERR + 12)
#endif
-/**
- * @see APR_STATUS_IS_EAGAIN
+/**
+ * @see APR_STATUS_IS_EAGAIN
* @warning use APR_STATUS_IS_EAGAIN instead of just testing this value
*/
#ifdef EAGAIN
@@ -692,7 +692,7 @@ APR_DECLARE(char *) apr_strerror(apr_status_t statcode, char *buf,
#define APR_EINPROGRESS (APR_OS_START_CANONERR + 17)
#endif
-/**
+/**
* @see APR_STATUS_IS_ECONNABORTED
* @warning use APR_STATUS_IS_ECONNABORTED instead of just testing this value
*/
@@ -710,7 +710,7 @@ APR_DECLARE(char *) apr_strerror(apr_status_t statcode, char *buf,
#define APR_ECONNRESET (APR_OS_START_CANONERR + 19)
#endif
-/** @see APR_STATUS_IS_ETIMEDOUT
+/** @see APR_STATUS_IS_ETIMEDOUT
* @deprecated */
#ifdef ETIMEDOUT
#define APR_ETIMEDOUT ETIMEDOUT
@@ -788,7 +788,7 @@ APR_DECLARE(char *) apr_strerror(apr_status_t statcode, char *buf,
*/
#define APR_OS2_STATUS(e) (APR_FROM_OS_ERROR(e))
-/* These can't sit in a private header, so in spite of the extra size,
+/* These can't sit in a private header, so in spite of the extra size,
* they need to be made available here.
*/
#define SOCBASEERR 10000
@@ -885,10 +885,10 @@ APR_DECLARE(char *) apr_strerror(apr_status_t statcode, char *buf,
|| (s) == APR_OS_START_SYSERR + SOCECONNRESET)
/* XXX deprecated */
#define APR_STATUS_IS_ETIMEDOUT(s) ((s) == APR_ETIMEDOUT \
- || (s) == APR_OS_START_SYSERR + SOCETIMEDOUT)
+ || (s) == APR_OS_START_SYSERR + SOCETIMEDOUT)
#undef APR_STATUS_IS_TIMEUP
#define APR_STATUS_IS_TIMEUP(s) ((s) == APR_TIMEUP \
- || (s) == APR_OS_START_SYSERR + SOCETIMEDOUT)
+ || (s) == APR_OS_START_SYSERR + SOCETIMEDOUT)
#define APR_STATUS_IS_EHOSTUNREACH(s) ((s) == APR_EHOSTUNREACH \
|| (s) == APR_OS_START_SYSERR + SOCEHOSTUNREACH)
#define APR_STATUS_IS_ENETUNREACH(s) ((s) == APR_ENETUNREACH \
@@ -1121,7 +1121,7 @@ APR_DECLARE(char *) apr_strerror(apr_status_t statcode, char *buf,
#define apr_get_netos_error() (errno)
#define apr_set_netos_error(e) (errno = (e))
-/**
+/**
* @addtogroup APR_STATUS_IS
* @{
*/
@@ -1185,15 +1185,15 @@ APR_DECLARE(char *) apr_strerror(apr_status_t statcode, char *buf,
/** operation now in progress */
#define APR_STATUS_IS_EINPROGRESS(s) ((s) == APR_EINPROGRESS)
-/**
- * Software caused connection abort
+/**
+ * Software caused connection abort
* @remark
- * EPROTO on certain older kernels really means ECONNABORTED, so we need to
+ * EPROTO on certain older kernels really means ECONNABORTED, so we need to
* ignore it for them. See discussion in new-httpd archives nh.9701 & nh.9603
*
- * There is potentially a bug in Solaris 2.x x<6, and other boxes that
+ * There is potentially a bug in Solaris 2.x x<6, and other boxes that
* implement tcp sockets in userland (i.e. on top of STREAMS). On these
- * systems, EPROTO can actually result in a fatal loop. See PR#981 for
+ * systems, EPROTO can actually result in a fatal loop. See PR#981 for
* example. It's hard to handle both uses of EPROTO.
*/
#ifdef EPROTO
@@ -1236,14 +1236,14 @@ APR_DECLARE(char *) apr_strerror(apr_status_t statcode, char *buf,
* Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
*
* @APPLE_LICENSE_HEADER_START@
- *
+ *
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
* compliance with the License. Please obtain a copy of the License at
* http://www.opensource.apple.com/apsl/ and read it before using this
* file.
- *
+ *
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
@@ -1251,7 +1251,7 @@ APR_DECLARE(char *) apr_strerror(apr_status_t statcode, char *buf,
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
* Please see the License for the specific language governing rights and
* limitations under the License.
- *
+ *
* @APPLE_LICENSE_HEADER_END@
*/
#include <sys/errno.h>
@@ -1261,7 +1261,7 @@ APR_DECLARE(char *) apr_strerror(apr_status_t statcode, char *buf,
* Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- *
+ *
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
@@ -1270,10 +1270,10 @@ APR_DECLARE(char *) apr_strerror(apr_status_t statcode, char *buf,
* unlawful or unlicensed copies of an Apple operating system, or to
* circumvent, violate, or enable the circumvention or violation of, any
* terms of an Apple operating system software license agreement.
- *
+ *
* Please obtain a copy of the License at
* http://www.opensource.apple.com/apsl/ and read it before using this file.
- *
+ *
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
@@ -1281,7 +1281,7 @@ APR_DECLARE(char *) apr_strerror(apr_status_t statcode, char *buf,
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
* Please see the License for the specific language governing rights and
* limitations under the License.
- *
+ *
* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
*/
/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
@@ -1484,7 +1484,7 @@ __END_DECLS
#define ECANCELED 89 /* Operation canceled */
#define EIDRM 90 /* Identifier removed */
-#define ENOMSG 91 /* No message of desired type */
+#define ENOMSG 91 /* No message of desired type */
#define EILSEQ 92 /* Illegal byte sequence */
#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
#define ENOATTR 93 /* Attribute not found */
diff --git a/doc/errno.list.solaris.txt b/doc/legacy/errno.list.solaris.txt
index 23601e9d3..23601e9d3 100644
--- a/doc/errno.list.solaris.txt
+++ b/doc/legacy/errno.list.solaris.txt
diff --git a/doc/user-guide/fdl.texi b/doc/legacy/fdl.texi
index e33c687cd..e33c687cd 100644
--- a/doc/user-guide/fdl.texi
+++ b/doc/legacy/fdl.texi
diff --git a/doc/user-guide/fuse.odg b/doc/legacy/fuse.odg
index 61bd103c7..61bd103c7 100644
--- a/doc/user-guide/fuse.odg
+++ b/doc/legacy/fuse.odg
Binary files differ
diff --git a/doc/user-guide/fuse.pdf b/doc/legacy/fuse.pdf
index a7d13faff..a7d13faff 100644
--- a/doc/user-guide/fuse.pdf
+++ b/doc/legacy/fuse.pdf
Binary files differ
diff --git a/doc/get_put_api_using_xattr.txt b/doc/legacy/get_put_api_using_xattr.txt
index 58951f5bf..243f9f1ae 100644
--- a/doc/get_put_api_using_xattr.txt
+++ b/doc/legacy/get_put_api_using_xattr.txt
@@ -16,7 +16,7 @@ internals:
* posix handling setxattr/getxattr
- setxattr
posix setxattr does a open with O_CREAT|O_TRUNC on the <path>/<name>, writes value of the setxattr as data into the file and closes the file. when data is null, posix setxattr avoids doing write. file is closed after write.
-
+
- getxattr
posix getxattr does open with O_RDONLY on the <path>/<name>, reads the complete content of the file. file is closed after read.
diff --git a/doc/user-guide/ha.odg b/doc/legacy/ha.odg
index e4b8b72d0..e4b8b72d0 100644
--- a/doc/user-guide/ha.odg
+++ b/doc/legacy/ha.odg
Binary files differ
diff --git a/doc/user-guide/ha.pdf b/doc/legacy/ha.pdf
index e372c0ab0..e372c0ab0 100644
--- a/doc/user-guide/ha.pdf
+++ b/doc/legacy/ha.pdf
Binary files differ
diff --git a/doc/hacker-guide/Makefile.am b/doc/legacy/hacker-guide/Makefile.am
index 65c92ac23..65c92ac23 100644
--- a/doc/hacker-guide/Makefile.am
+++ b/doc/legacy/hacker-guide/Makefile.am
diff --git a/doc/hacker-guide/call-stub.txt b/doc/legacy/hacker-guide/call-stub.txt
index bca1579b2..021037a35 100644
--- a/doc/hacker-guide/call-stub.txt
+++ b/doc/legacy/hacker-guide/call-stub.txt
@@ -2,8 +2,8 @@ creating a call stub and pausing a call
---------------------------------------
libglusterfs provides seperate API to pause each of the fop. parameters to each API is
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
- NOTE: @fn should exactly take the same type and number of parameters that
+@fn - procedure to call during call_resume().
+ NOTE: @fn should exactly take the same type and number of parameters that
the corresponding regular fop takes.
rest will be the regular parameters to corresponding fop.
@@ -17,7 +17,7 @@ specific parameters.
here is the list of stub creation APIs for xlator fops.
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@loc - pointer to location structure.
NOTE: @loc will be copied to a different location, with inode_ref() to
@loc->inode and @loc->parent, if not NULL. also @loc->path will be
@@ -30,7 +30,7 @@ fop_lookup_stub (call_frame_t *frame,
int32_t need_xattr);
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@loc - pointer to location structure.
NOTE: @loc will be copied to a different location, with inode_ref() to
@loc->inode and @loc->parent, if not NULL. also @loc->path will be
@@ -41,7 +41,7 @@ fop_stat_stub (call_frame_t *frame,
loc_t *loc);
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@fd - file descriptor parameter to lk fop.
NOTE: @fd is stored with a fd_ref().
call_stub_t *
@@ -50,7 +50,7 @@ fop_fstat_stub (call_frame_t *frame,
fd_t *fd);
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@loc - pointer to location structure.
NOTE: @loc will be copied to a different location, with inode_ref() to @loc->inode and
@loc->parent, if not NULL. also @loc->path will be copied to a different location.
@@ -62,7 +62,7 @@ fop_chmod_stub (call_frame_t *frame,
mode_t mode);
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@fd - file descriptor parameter to lk fop.
NOTE: @fd is stored with a fd_ref().
@mode - mode parameter for fchmod fop.
@@ -73,7 +73,7 @@ fop_fchmod_stub (call_frame_t *frame,
mode_t mode);
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@loc - pointer to location structure.
NOTE: @loc will be copied to a different location, with inode_ref() to @loc->inode and
@loc->parent, if not NULL. also @loc->path will be copied to a different location.
@@ -87,7 +87,7 @@ fop_chown_stub (call_frame_t *frame,
gid_t gid);
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@fd - file descriptor parameter to lk fop.
NOTE: @fd is stored with a fd_ref().
@uid - uid parameter to fchown.
@@ -100,7 +100,7 @@ fop_fchown_stub (call_frame_t *frame,
gid_t gid);
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@loc - pointer to location structure.
NOTE: @loc will be copied to a different location, with inode_ref() to
@loc->inode and @loc->parent, if not NULL. also @loc->path will be
@@ -113,7 +113,7 @@ fop_truncate_stub (call_frame_t *frame,
off_t off);
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@fd - file descriptor parameter to lk fop.
NOTE: @fd is stored with a fd_ref().
@off - offset parameter to ftruncate fop.
@@ -124,7 +124,7 @@ fop_ftruncate_stub (call_frame_t *frame,
off_t off);
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@loc - pointer to location structure.
NOTE: @loc will be copied to a different location, with inode_ref() to
@loc->inode and @loc->parent, if not NULL. also @loc->path will be
@@ -137,7 +137,7 @@ fop_utimens_stub (call_frame_t *frame,
struct timespec tv[2]);
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@loc - pointer to location structure.
NOTE: @loc will be copied to a different location, with inode_ref() to
@loc->inode and @loc->parent, if not NULL. also @loc->path will be
@@ -150,7 +150,7 @@ fop_access_stub (call_frame_t *frame,
int32_t mask);
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@loc - pointer to location structure.
NOTE: @loc will be copied to a different location, with inode_ref() to
@loc->inode and @loc->parent, if not NULL. also @loc->path will be
@@ -163,7 +163,7 @@ fop_readlink_stub (call_frame_t *frame,
size_t size);
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@loc - pointer to location structure.
NOTE: @loc will be copied to a different location, with inode_ref() to
@loc->inode and @loc->parent, if not NULL. also @loc->path will be
@@ -178,7 +178,7 @@ fop_mknod_stub (call_frame_t *frame,
dev_t rdev);
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@loc - pointer to location structure.
NOTE: @loc will be copied to a different location, with inode_ref() to
@loc->inode and @loc->parent, if not NULL. also @loc->path will be
@@ -191,7 +191,7 @@ fop_mkdir_stub (call_frame_t *frame,
mode_t mode);
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@loc - pointer to location structure.
NOTE: @loc will be copied to a different location, with inode_ref() to
@loc->inode and @loc->parent, if not NULL. also @loc->path will be
@@ -202,7 +202,7 @@ fop_unlink_stub (call_frame_t *frame,
loc_t *loc);
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@loc - pointer to location structure.
NOTE: @loc will be copied to a different location, with inode_ref() to
@loc->inode and @loc->parent, if not NULL. also @loc->path will be
@@ -213,7 +213,7 @@ fop_rmdir_stub (call_frame_t *frame,
loc_t *loc);
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@linkname - linkname parameter to symlink fop.
@loc - pointer to location structure.
NOTE: @loc will be copied to a different location, with inode_ref() to
@@ -226,10 +226,10 @@ fop_symlink_stub (call_frame_t *frame,
loc_t *loc);
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@oldloc - pointer to location structure.
- NOTE: @oldloc will be copied to a different location, with inode_ref() to
- @oldloc->inode and @oldloc->parent, if not NULL. also @oldloc->path will
+ NOTE: @oldloc will be copied to a different location, with inode_ref() to
+ @oldloc->inode and @oldloc->parent, if not NULL. also @oldloc->path will
be copied to a different location, if not NULL.
@newloc - pointer to location structure.
NOTE: @newloc will be copied to a different location, with inode_ref() to
@@ -242,7 +242,7 @@ fop_rename_stub (call_frame_t *frame,
loc_t *newloc);
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@loc - pointer to location structure.
NOTE: @loc will be copied to a different location, with inode_ref() to
@loc->inode and @loc->parent, if not NULL. also @loc->path will be
@@ -255,7 +255,7 @@ fop_link_stub (call_frame_t *frame,
const char *newpath);
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@loc - pointer to location structure.
NOTE: @loc will be copied to a different location, with inode_ref() to
@loc->inode and @loc->parent, if not NULL. also @loc->path will be
@@ -272,7 +272,7 @@ fop_create_stub (call_frame_t *frame,
mode_t mode, fd_t *fd);
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@flags - flags parameter to open fop.
@loc - pointer to location structure.
NOTE: @loc will be copied to a different location, with inode_ref() to
@@ -286,7 +286,7 @@ fop_open_stub (call_frame_t *frame,
fd_t *fd);
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@fd - file descriptor parameter to lk fop.
NOTE: @fd is stored with a fd_ref().
@size - size parameter to readv fop.
@@ -299,10 +299,10 @@ fop_readv_stub (call_frame_t *frame,
off_t off);
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@fd - file descriptor parameter to lk fop.
NOTE: @fd is stored with a fd_ref().
-@vector - vector parameter to writev fop.
+@vector - vector parameter to writev fop.
NOTE: @vector is iov_dup()ed while creating stub. and frame->root->req_refs
dictionary is dict_ref()ed.
@count - count parameter to writev fop.
@@ -316,7 +316,7 @@ fop_writev_stub (call_frame_t *frame,
off_t off);
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@fd - file descriptor parameter to flush fop.
NOTE: @fd is stored with a fd_ref().
call_stub_t *
@@ -326,7 +326,7 @@ fop_flush_stub (call_frame_t *frame,
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@fd - file descriptor parameter to lk fop.
NOTE: @fd is stored with a fd_ref().
@datasync - datasync parameter to fsync fop.
@@ -337,7 +337,7 @@ fop_fsync_stub (call_frame_t *frame,
int32_t datasync);
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@loc - pointer to location structure.
NOTE: @loc will be copied to a different location, with inode_ref() to @loc->inode and
@loc->parent, if not NULL. also @loc->path will be copied to a different location.
@@ -346,11 +346,11 @@ fop_fsync_stub (call_frame_t *frame,
call_stub_t *
fop_opendir_stub (call_frame_t *frame,
fop_opendir_t fn,
- loc_t *loc,
+ loc_t *loc,
fd_t *fd);
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@fd - file descriptor parameter to getdents fop.
NOTE: @fd is stored with a fd_ref().
@size - size parameter to getdents fop.
@@ -365,7 +365,7 @@ fop_getdents_stub (call_frame_t *frame,
int32_t flag);
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@fd - file descriptor parameter to setdents fop.
NOTE: @fd is stored with a fd_ref().
@flags - flags parameter to setdents fop.
@@ -379,7 +379,7 @@ fop_setdents_stub (call_frame_t *frame,
int32_t count);
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@fd - file descriptor parameter to setdents fop.
NOTE: @fd is stored with a fd_ref().
@datasync - datasync parameter to fsyncdir fop.
@@ -390,7 +390,7 @@ fop_fsyncdir_stub (call_frame_t *frame,
int32_t datasync);
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@loc - pointer to location structure.
NOTE: @loc will be copied to a different location, with inode_ref() to
@loc->inode and @loc->parent, if not NULL. also @loc->path will be
@@ -401,9 +401,9 @@ fop_statfs_stub (call_frame_t *frame,
loc_t *loc);
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@loc - pointer to location structure.
- NOTE: @loc will be copied to a different location, with inode_ref() to
+ NOTE: @loc will be copied to a different location, with inode_ref() to
@loc->inode and @loc->parent, if not NULL. also @loc->path will be
copied to a different location.
@dict - dict parameter to setxattr fop.
@@ -416,7 +416,7 @@ fop_setxattr_stub (call_frame_t *frame,
int32_t flags);
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@loc - pointer to location structure.
NOTE: @loc will be copied to a different location, with inode_ref() to
@loc->inode and @loc->parent, if not NULL. also @loc->path will be
@@ -429,7 +429,7 @@ fop_getxattr_stub (call_frame_t *frame,
const char *name);
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@loc - pointer to location structure.
NOTE: @loc will be copied to a different location, with inode_ref() to
@loc->inode and @loc->parent, if not NULL. also @loc->path will be
@@ -443,7 +443,7 @@ fop_removexattr_stub (call_frame_t *frame,
const char *name);
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@fd - file descriptor parameter to lk fop.
NOTE: @fd is stored with a fd_ref().
@cmd - command parameter to lk fop.
@@ -457,13 +457,13 @@ fop_lk_stub (call_frame_t *frame,
struct flock *lock);
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@fd - fd parameter to gf_lk fop.
NOTE: @fd is fd_ref()ed while creating stub, if not NULL.
@cmd - cmd parameter to gf_lk fop.
@lock - lock paramater to gf_lk fop.
NOTE: @lock is copied to a different memory location while creating
- stub.
+ stub.
call_stub_t *
fop_gf_lk_stub (call_frame_t *frame,
fop_gf_lk_t fn,
@@ -472,7 +472,7 @@ fop_gf_lk_stub (call_frame_t *frame,
struct flock *lock);
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@fd - file descriptor parameter to readdir fop.
NOTE: @fd is stored with a fd_ref().
@size - size parameter to readdir fop.
@@ -485,7 +485,7 @@ fop_readdir_stub (call_frame_t *frame,
off_t off);
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@loc - pointer to location structure.
NOTE: @loc will be copied to a different location, with inode_ref() to
@loc->inode and @loc->parent, if not NULL. also @loc->path will be
@@ -498,7 +498,7 @@ fop_checksum_stub (call_frame_t *frame,
int32_t flags);
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@op_ret - op_ret parameter to @fn.
@op_errno - op_errno parameter to @fn.
@inode - inode parameter to @fn.
@@ -516,7 +516,7 @@ fop_lookup_cbk_stub (call_frame_t *frame,
struct stat *buf,
dict_t *dict);
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@op_ret - op_ret parameter to @fn.
@op_errno - op_errno parameter to @fn.
@buf - buf parameter to @fn.
@@ -529,7 +529,7 @@ fop_stat_cbk_stub (call_frame_t *frame,
struct stat *buf);
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@op_ret - op_ret parameter to @fn.
@op_errno - op_errno parameter to @fn.
@buf - buf parameter to @fn.
@@ -542,7 +542,7 @@ fop_fstat_cbk_stub (call_frame_t *frame,
struct stat *buf);
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@op_ret - op_ret parameter to @fn.
@op_errno - op_errno parameter to @fn.
@buf - buf parameter to @fn.
@@ -555,7 +555,7 @@ fop_chmod_cbk_stub (call_frame_t *frame,
struct stat *buf);
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@op_ret - op_ret parameter to @fn.
@op_errno - op_errno parameter to @fn.
@buf - buf parameter to @fn.
@@ -568,7 +568,7 @@ fop_fchmod_cbk_stub (call_frame_t *frame,
struct stat *buf);
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@op_ret - op_ret parameter to @fn.
@op_errno - op_errno parameter to @fn.
@buf - buf parameter to @fn.
@@ -582,7 +582,7 @@ fop_chown_cbk_stub (call_frame_t *frame,
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@op_ret - op_ret parameter to @fn.
@op_errno - op_errno parameter to @fn.
@buf - buf parameter to @fn.
@@ -596,7 +596,7 @@ fop_fchown_cbk_stub (call_frame_t *frame,
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@op_ret - op_ret parameter to @fn.
@op_errno - op_errno parameter to @fn.
@buf - buf parameter to @fn.
@@ -610,7 +610,7 @@ fop_truncate_cbk_stub (call_frame_t *frame,
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@op_ret - op_ret parameter to @fn.
@op_errno - op_errno parameter to @fn.
@buf - buf parameter to @fn.
@@ -624,7 +624,7 @@ fop_ftruncate_cbk_stub (call_frame_t *frame,
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@op_ret - op_ret parameter to @fn.
@op_errno - op_errno parameter to @fn.
@buf - buf parameter to @fn.
@@ -638,7 +638,7 @@ fop_utimens_cbk_stub (call_frame_t *frame,
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@op_ret - op_ret parameter to @fn.
@op_errno - op_errno parameter to @fn.
call_stub_t *
@@ -649,7 +649,7 @@ fop_access_cbk_stub (call_frame_t *frame,
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@op_ret - op_ret parameter to @fn.
@op_errno - op_errno parameter to @fn.
@path - path parameter to @fn.
@@ -663,7 +663,7 @@ fop_readlink_cbk_stub (call_frame_t *frame,
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@op_ret - op_ret parameter to @fn.
@op_errno - op_errno parameter to @fn.
@inode - inode parameter to @fn.
@@ -680,7 +680,7 @@ fop_mknod_cbk_stub (call_frame_t *frame,
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@op_ret - op_ret parameter to @fn.
@op_errno - op_errno parameter to @fn.
@inode - inode parameter to @fn.
@@ -697,7 +697,7 @@ fop_mkdir_cbk_stub (call_frame_t *frame,
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@op_ret - op_ret parameter to @fn.
@op_errno - op_errno parameter to @fn.
call_stub_t *
@@ -708,7 +708,7 @@ fop_unlink_cbk_stub (call_frame_t *frame,
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@op_ret - op_ret parameter to @fn.
@op_errno - op_errno parameter to @fn.
call_stub_t *
@@ -719,7 +719,7 @@ fop_rmdir_cbk_stub (call_frame_t *frame,
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@op_ret - op_ret parameter to @fn.
@op_errno - op_errno parameter to @fn.
@inode - inode parameter to @fn.
@@ -736,7 +736,7 @@ fop_symlink_cbk_stub (call_frame_t *frame,
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@op_ret - op_ret parameter to @fn.
@op_errno - op_errno parameter to @fn.
@buf - buf parameter to @fn.
@@ -750,7 +750,7 @@ fop_rename_cbk_stub (call_frame_t *frame,
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@op_ret - op_ret parameter to @fn.
@op_errno - op_errno parameter to @fn.
@inode - inode parameter to @fn.
@@ -766,7 +766,7 @@ fop_link_cbk_stub (call_frame_t *frame,
struct stat *buf);
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@op_ret - op_ret parameter to @fn.
@op_errno - op_errno parameter to @fn.
@fd - fd parameter to @fn.
@@ -786,7 +786,7 @@ fop_create_cbk_stub (call_frame_t *frame,
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@op_ret - op_ret parameter to @fn.
@op_errno - op_errno parameter to @fn.
@fd - fd parameter to @fn.
@@ -800,10 +800,10 @@ fop_open_cbk_stub (call_frame_t *frame,
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@op_ret - op_ret parameter to @fn.
@op_errno - op_errno parameter to @fn.
-@vector - vector parameter to @fn.
+@vector - vector parameter to @fn.
NOTE: @vector is copied to a different memory location, if not NULL. also
frame->root->rsp_refs is dict_ref()ed.
@stbuf - stbuf parameter to @fn.
@@ -819,7 +819,7 @@ fop_readv_cbk_stub (call_frame_t *frame,
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@op_ret - op_ret parameter to @fn.
@op_errno - op_errno parameter to @fn.
@stbuf - stbuf parameter to @fn.
@@ -833,7 +833,7 @@ fop_writev_cbk_stub (call_frame_t *frame,
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@op_ret - op_ret parameter to @fn.
@op_errno - op_errno parameter to @fn.
call_stub_t *
@@ -843,7 +843,7 @@ fop_flush_cbk_stub (call_frame_t *frame,
int32_t op_errno);
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@op_ret - op_ret parameter to @fn.
@op_errno - op_errno parameter to @fn.
call_stub_t *
@@ -854,7 +854,7 @@ fop_fsync_cbk_stub (call_frame_t *frame,
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@op_ret - op_ret parameter to @fn.
@op_errno - op_errno parameter to @fn.
@fd - fd parameter to @fn.
@@ -868,7 +868,7 @@ fop_opendir_cbk_stub (call_frame_t *frame,
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@op_ret - op_ret parameter to @fn.
@op_errno - op_errno parameter to @fn.
@entries - entries parameter to @fn.
@@ -883,7 +883,7 @@ fop_getdents_cbk_stub (call_frame_t *frame,
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@op_ret - op_ret parameter to @fn.
@op_errno - op_errno parameter to @fn.
call_stub_t *
@@ -893,7 +893,7 @@ fop_setdents_cbk_stub (call_frame_t *frame,
int32_t op_errno);
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@op_ret - op_ret parameter to @fn.
@op_errno - op_errno parameter to @fn.
call_stub_t *
@@ -904,7 +904,7 @@ fop_fsyncdir_cbk_stub (call_frame_t *frame,
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@op_ret - op_ret parameter to @fn.
@op_errno - op_errno parameter to @fn.
@buf - buf parameter to @fn.
@@ -918,7 +918,7 @@ fop_statfs_cbk_stub (call_frame_t *frame,
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@op_ret - op_ret parameter to @fn.
@op_errno - op_errno parameter to @fn.
call_stub_t *
@@ -929,7 +929,7 @@ fop_setxattr_cbk_stub (call_frame_t *frame,
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@op_ret - op_ret parameter to @fn.
@op_errno - op_errno parameter to @fn.
@value - value dictionary parameter to @fn.
@@ -943,7 +943,7 @@ fop_getxattr_cbk_stub (call_frame_t *frame,
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@op_ret - op_ret parameter to @fn.
@op_errno - op_errno parameter to @fn.
call_stub_t *
@@ -954,12 +954,12 @@ fop_removexattr_cbk_stub (call_frame_t *frame,
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@op_ret - op_ret parameter to @fn.
@op_errno - op_errno parameter to @fn.
@lock - lock parameter to @fn.
NOTE: @lock is copied to a different memory location while creating
- stub.
+ stub.
call_stub_t *
fop_lk_cbk_stub (call_frame_t *frame,
fop_lk_cbk_t fn,
@@ -968,12 +968,12 @@ fop_lk_cbk_stub (call_frame_t *frame,
struct flock *lock);
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@op_ret - op_ret parameter to @fn.
@op_errno - op_errno parameter to @fn.
@lock - lock parameter to @fn.
NOTE: @lock is copied to a different memory location while creating
- stub.
+ stub.
call_stub_t *
fop_gf_lk_cbk_stub (call_frame_t *frame,
fop_gf_lk_cbk_t fn,
@@ -983,7 +983,7 @@ fop_gf_lk_cbk_stub (call_frame_t *frame,
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@op_ret - op_ret parameter to @fn.
@op_errno - op_errno parameter to @fn.
@entries - entries parameter to @fn.
@@ -996,14 +996,14 @@ fop_readdir_cbk_stub (call_frame_t *frame,
@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
+@fn - procedure to call during call_resume().
@op_ret - op_ret parameter to @fn.
@op_errno - op_errno parameter to @fn.
@file_checksum - file_checksum parameter to @fn.
- NOTE: file_checksum will be copied to a different memory location
+ NOTE: file_checksum will be copied to a different memory location
while creating stub.
@dir_checksum - dir_checksum parameter to @fn.
- NOTE: file_checksum will be copied to a different memory location
+ NOTE: file_checksum will be copied to a different memory location
while creating stub.
call_stub_t *
fop_checksum_cbk_stub (call_frame_t *frame,
@@ -1025,9 +1025,9 @@ resuming a call:
in stub->args.<operation>.<fd_t-or-inode_t-or-dict_t>. so, if any fd_t, dict_t or
inode_t pointers are assigned at stub->args.<operation>.<fd_t-or-inode_t-or-dict_t> after
fop_<operation>_stub() call, they must be <fd_t-or-inode_t-or-dict_t>_ref()ed.
-
+
call_resume does not STACK_DESTROY() for any fop.
-
+
if stub->fn is NULL, call_resume does STACK_WIND() or STACK_UNWIND() using the stub->frame.
return - call resume fails only if stub is NULL. call resume fails with errno set to EINVAL.
diff --git a/doc/hacker-guide/hacker-guide.tex b/doc/legacy/hacker-guide/hacker-guide.tex
index 90e92d28f..11101e7a8 100644
--- a/doc/hacker-guide/hacker-guide.tex
+++ b/doc/legacy/hacker-guide/hacker-guide.tex
@@ -8,7 +8,7 @@
% \headheight 117pt
% \rhead{\includegraphics{zr-logo.eps}}
-\author{Z Research}
+\author{Gluster}
\title{GlusterFS 1.3 Hacker's Guide}
\date{June 1, 2007}
@@ -22,7 +22,7 @@
\section{Coding guidelines}
GlusterFS uses Git for version control. To get the latest source do:
\begin{verbatim}
- $ git clone git://git.sv.gnu.org/gluster.git glusterfs
+ $ git clone git://git.gluster.com/glusterfs.git glusterfs
\end{verbatim}
\noindent
GlusterFS follows the GNU coding
@@ -31,7 +31,7 @@ most part.
\chapter{Major components}
\section{libglusterfs}
-\texttt{libglusterfs} contains supporting code used by all the other components.
+\texttt{libglusterfs} contains supporting code used by all the other components.
The important files here are:
\texttt{dict.c}: This is an implementation of a serializable dictionary type. It is
@@ -165,8 +165,8 @@ First we include the requisite headers.
#include "logging.h"
/*
- * This is a rot13 ``encryption'' xlator. It rot13's data when
- * writing to disk and rot13's it back when reading it.
+ * This is a rot13 ``encryption'' xlator. It rot13's data when
+ * writing to disk and rot13's it back when reading it.
* This xlator is meant as an example, not for production
* use ;) (hence no error-checking)
*/
@@ -178,7 +178,7 @@ letters. Any other byte is passed through as it is.
\begin{verbatim}
/* We only handle lower case letters for simplicity */
-static void
+static void
rot13 (char *buf, int len)
{
int i;
@@ -252,12 +252,12 @@ rot13_writev (call_frame_t *frame,
xlator_t *this,
dict_t *ctx,
struct iovec *vector,
- int32_t count,
+ int32_t count,
off_t offset)
{
rot13_iovec (vector, count);
- STACK_WIND (frame,
+ STACK_WIND (frame,
rot13_writev_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->writev,
@@ -267,9 +267,9 @@ rot13_writev (call_frame_t *frame,
\end{verbatim}
-Every xlator must define two functions and two external symbols. The functions are
+Every xlator must define two functions and two external symbols. The functions are
\texttt{init} and \texttt{fini}, and the symbols are \texttt{fops} and \texttt{mops}.
-The \texttt{init} function is called when the xlator is loaded by GlusterFS, and
+The \texttt{init} function is called when the xlator is loaded by GlusterFS, and
contains code for the xlator to initialize itself. Note that if an xlator is present
multiple times in the spec tree, the \texttt{init} function will be called each time
the xlator is loaded.
@@ -279,7 +279,7 @@ int32_t
init (xlator_t *this)
{
if (!this->children) {
- gf_log ("rot13", GF_LOG_ERROR,
+ gf_log ("rot13", GF_LOG_ERROR,
"FATAL: rot13 should have exactly one child");
return -1;
}
@@ -291,7 +291,7 @@ init (xlator_t *this)
\begin{verbatim}
-void
+void
fini (xlator_t *this)
{
return;
@@ -302,8 +302,6 @@ struct xlator_fops fops = {
.writev = rot13_writev
};
-struct xlator_mops mops = {
-};
\end{verbatim}
diff --git a/doc/hacker-guide/replicate.txt b/doc/legacy/hacker-guide/replicate.txt
index 284f373fb..133c72afa 100644
--- a/doc/hacker-guide/replicate.txt
+++ b/doc/legacy/hacker-guide/replicate.txt
@@ -5,7 +5,7 @@
Before understanding replicate, one must understand two internal FOPs:
GF_FILE_LK:
- This is exactly like fcntl(2) locking, except the locks are in a
+ This is exactly like fcntl(2) locking, except the locks are in a
separate domain from locks held by applications.
GF_DIR_LK (loc_t *loc, char *basename):
@@ -17,7 +17,7 @@ GF_DIR_LK (loc_t *loc, char *basename):
If one wishes to lock *all* the names under a particular directory,
supply the basename argument as NULL.
- The locks can either be read locks or write locks; consult the
+ The locks can either be read locks or write locks; consult the
function prototype for more details.
Both these operations are implemented by the features/locks (earlier
@@ -79,7 +79,7 @@ Each of the four major groups has its own algorithm:
All operations are done in parallel unless specified otherwise.
- (1) Send a GF_FILE_LK request on all children for a write lock on
+ (1) Send a GF_FILE_LK request on all children for a write lock on
the appropriate region
(for metadata operations: entire file (0, 0)
for writev: (offset, offset+size of buffer))
@@ -87,11 +87,11 @@ Each of the four major groups has its own algorithm:
- If a lock request fails on a child:
unlock all children
try to acquire a blocking lock (F_SETLKW) on each child, serially.
-
+
If this fails (due to ENOTCONN or EINVAL):
Consider this child as dead for rest of transaction.
- (2) Mark all children as "pending" on all (alive) children
+ (2) Mark all children as "pending" on all (alive) children
(see below for meaning of "pending").
- If it fails on any child:
@@ -105,7 +105,7 @@ Each of the four major groups has its own algorithm:
(4) Unmark all successful children as not "pending" on all nodes.
(5) Unlock region on all (alive) children.
-
+
-----------
- dir-write
-----------
@@ -121,11 +121,11 @@ Each of the four major groups has its own algorithm:
The "pending" number is like a journal entry. A pending entry is an
array of 32-bit integers stored in network byte-order as the extended
attribute of an inode (which can be a directory as well).
-
+
There are three keys corresponding to three types of pending operations:
- AFR_METADATA_PENDING
- There are some metadata operations pending on this inode (perms, ctime/mtime,
+ There are some metadata operations pending on this inode (perms, ctime/mtime,
xattr, etc.).
- AFR_DATA_PENDING
@@ -134,7 +134,7 @@ Each of the four major groups has its own algorithm:
- AFR_ENTRY_PENDING
There are some directory operations pending on this directory
(create, unlink, etc.).
-
+
-----------
* Self heal
-----------
@@ -155,7 +155,7 @@ Each of the four major groups has its own algorithm:
other is directory):
- Announce to the user via log that a split-brain situation has been
detected, and do nothing.
-
+
- On open, gather extended attribute data:
- Consider the file with the highest AFR_DATA_PENDING number as
the definitive one and replicate its contents on all other
@@ -190,7 +190,7 @@ Thus, if lookup on c1 returns an inode number "2", it is scaled to "4"
This way we ensure that there is never a collision of inode numbers from
two different children.
-This reduction of inode space doesn't really reduce the usability of
+This reduction of inode space doesn't really reduce the usability of
replicate since even if we assume replicate has 1024 children (which would be a
highly unusual scenario), each child still has a 54-bit inode space.
@@ -201,6 +201,6 @@ which is much larger than any real world requirement.
==============================================
$ Last updated: Sun Oct 12 23:17:01 IST 2008 $
-$ Author: Vikas Gorur <vikas@zresearch.com> $
+$ Author: Vikas Gorur <vikas@gluster.com> $
==============================================
diff --git a/doc/handling-options.txt b/doc/legacy/handling-options.txt
index cac1fe939..9a3b2510a 100644
--- a/doc/handling-options.txt
+++ b/doc/legacy/handling-options.txt
@@ -2,12 +2,12 @@
How to add a new option to a given volume ?
===========================================
-* Add a entry in 'struct volume_options options[]' with your key, what is
+* Add a entry in 'struct volume_options options[]' with your key, what is
the type of the 'key', etc.
-* The 'key' and corresponding 'value' given for the same by user are validated
+* The 'key' and corresponding 'value' given for the same by user are validated
before calling init() of the translator/transport/scheduler/auth-module.
-* Once the complete init() is successful, user will get a warning if he has
+* Once the complete init() is successful, user will get a warning if he has
given a 'key' which is not defined in these modules.
diff --git a/doc/mac-related-xattrs.txt b/doc/legacy/mac-related-xattrs.txt
index 805658334..92bb2ceef 100644
--- a/doc/mac-related-xattrs.txt
+++ b/doc/legacy/mac-related-xattrs.txt
@@ -1,21 +1,21 @@
-This document is intended to briefly explain how the Extended Attributes on
+This document is intended to briefly explain how the Extended Attributes on
Darwin 10.5.x releases works
----
-On Darwin other than all the normal filesystem operations, 'Finder' (like
-Explorer in Windows but a little more) keeps its information in two extended
-attributes named 'com.apple.FinderInfo' and 'com.apple.ResourceFork'. If these
-xattrs are not implemented the filesystem won't be shown on Finder, and if they
-are not implemented properly there may be issues when some of the file operations
-are done through GUI of Finder. But when a filesystem is used over mountpoint in a
-terminal, everything is fine and these xattrs are not required.
+On Darwin other than all the normal filesystem operations, 'Finder' (like
+Explorer in Windows but a little more) keeps its information in two extended
+attributes named 'com.apple.FinderInfo' and 'com.apple.ResourceFork'. If these
+xattrs are not implemented the filesystem won't be shown on Finder, and if they
+are not implemented properly there may be issues when some of the file operations
+are done through GUI of Finder. But when a filesystem is used over mountpoint in a
+terminal, everything is fine and these xattrs are not required.
-Currently the way these xattrs are implemented is simple. All the xattr calls
+Currently the way these xattrs are implemented is simple. All the xattr calls
(getxattr, setxattr, listxattr, removexattr) are passed down to underlaying filesystem,
most of the cases when exported FS is on MacOS X itself, these keys are supported, hence
-the fops succeed. But in the case of using exports of different OS on Darwin the issue is
-extended attribute prefix like 'com.apple.' may not be supported, hence the problem with
+the fops succeed. But in the case of using exports of different OS on Darwin the issue is
+extended attribute prefix like 'com.apple.' may not be supported, hence the problem with
Finder. To solve this issue, GlusterFS returns virtual default values to these keys, which
works fine on most of the cases.
diff --git a/doc/porting_guide.txt b/doc/legacy/porting_guide.txt
index 905bb4228..5705cd964 100644
--- a/doc/porting_guide.txt
+++ b/doc/legacy/porting_guide.txt
@@ -3,7 +3,7 @@
* General setup
-The configure script will detect the target platform for the build.
+The configure script will detect the target platform for the build.
All platform-specific CFLAGS, macro definitions should be done
in configure.ac
@@ -15,11 +15,11 @@ Platform-specific code can be written like this:
* Coding guidelines
-In general, avoid glibc extensions. For example, nested functions don't work
+In general, avoid glibc extensions. For example, nested functions don't work
on Mac OS X. It is best to stick to C99.
When using library calls and system calls, pay attention to the
-portability notes. As far as possible stick to POSIX-specified behavior.
+portability notes. As far as possible stick to POSIX-specified behavior.
Do not use anything expressly permitted by the specification. For example,
some fields in structures may be present only on certain platforms. Avoid
use of such things.
@@ -27,14 +27,14 @@ use of such things.
Do not pass values of constants such as F_*, O_*, errno values, etc. across
platforms.
-Please refer compat-errno.h for more details about errno handling inside
-glusterfs for cross platform.
+Please refer compat-errno.h for more details about errno handling inside
+glusterfs for cross platform.
* Specific issues
- The argp library is available only on Linux through glibc, but for other
platforms glusterfs has already included argp-standalone library which will
- statically linked during the glusterfs build.
+ statically linked during the glusterfs build.
- Extended attribute calls (setxattr, listxattr, etc.) have differing prototypes
on different platforms. See compat.h for macro definitions to resolve this, also
diff --git a/doc/replicate.lyx b/doc/legacy/replicate.lyx
index 2bbcb652a..58ba6b2e0 100644
--- a/doc/replicate.lyx
+++ b/doc/legacy/replicate.lyx
@@ -36,10 +36,10 @@ Automatic File Replication (replicate) in GlusterFS
\end_layout
\begin_layout Author
-Vikas Gorur
+Vikas Gorur
\family typewriter
\size larger
-<vikas@zresearch.com>
+<vikas@gluster.com>
\end_layout
\begin_layout Standard
@@ -77,7 +77,7 @@ The replicate translator of GlusterFS aims to keep identical copies of a file
\end_layout
\begin_layout Standard
-In the rest of the document the terms
+In the rest of the document the terms
\begin_inset Quotes eld
\end_inset
@@ -85,7 +85,7 @@ subvolume
\begin_inset Quotes erd
\end_inset
- and
+ and
\begin_inset Quotes eld
\end_inset
@@ -167,7 +167,7 @@ end{verbatim}
\begin_layout Standard
This defines an replicate volume with two subvolumes, brick1, and brick2.
- For replicate to work properly, it is essential that its subvolumes support
+ For replicate to work properly, it is essential that its subvolumes support
\series bold
extended attributes
\series default
@@ -177,7 +177,7 @@ extended attributes
\end_layout
\begin_layout Standard
-The storage volumes used as backend for replicate
+The storage volumes used as backend for replicate
\emph on
must
\emph default
@@ -262,7 +262,7 @@ replicate divides all filesystem write operations into three classes:
\begin_layout Itemize
\series bold
-data:
+data:
\series default
Operations that modify the contents of a file (write, truncate).
\end_layout
@@ -270,7 +270,7 @@ Operations that modify the contents of a file (write, truncate).
\begin_layout Itemize
\series bold
-metadata:
+metadata:
\series default
Operations that modify attributes of a file or directory (permissions, ownership
, etc.).
@@ -279,7 +279,7 @@ Operations that modify attributes of a file or directory (permissions, ownership
\begin_layout Itemize
\series bold
-entry:
+entry:
\series default
Operations that create or delete directory entries (mkdir, create, rename,
rmdir, unlink, etc.).
@@ -345,7 +345,7 @@ Self-Heal
\begin_layout Standard
replicate automatically tries to fix any inconsistencies it detects among different
copies of a file.
- It uses information in the change log to determine which copy is the
+ It uses information in the change log to determine which copy is the
\begin_inset Quotes eld
\end_inset
@@ -357,7 +357,7 @@ correct
\end_layout
\begin_layout Standard
-Self-heal is triggered when a file or directory is first
+Self-heal is triggered when a file or directory is first
\begin_inset Quotes eld
\end_inset
@@ -374,7 +374,7 @@ If the entry being accessed is a directory:
\end_layout
\begin_layout Itemize
-The contents of the
+The contents of the
\begin_inset Quotes eld
\end_inset
@@ -412,7 +412,7 @@ It may happen that one replicate client can access only some of the servers in
a cluster and another replicate client can access the remaining servers.
Or it may happen that in a cluster of two servers, one server goes down
and comes back up, but the other goes down immediately.
- Both these scenarios result in a
+ Both these scenarios result in a
\begin_inset Quotes eld
\end_inset
@@ -425,7 +425,7 @@ split-brain
\begin_layout Standard
In a split-brain situation, there will be two or more copies of a file,
- all of which are
+ all of which are
\begin_inset Quotes eld
\end_inset
@@ -484,12 +484,12 @@ split-brain
).
This means if a discrepancy is noticed in the attributes or content of
a file, the copy on the `favorite-child' will be considered the definitive
- version and its contents will
+ version and its contents will
\emph on
-overwrite
+overwrite
\emph default
the contents of all other copies.
- Use this option with caution! It is possible to
+ Use this option with caution! It is possible to
\emph on
lose data
\emph default
@@ -502,7 +502,7 @@ Self-heal options
\end_layout
\begin_layout Standard
-Setting any of these options to
+Setting any of these options to
\begin_inset Quotes eld
\end_inset
@@ -549,7 +549,7 @@ If any of these options is turned off, it disables writing of change log
entries for that class of file operations.
That is, steps 2 and 4 of the write algorithm (see above) are not done.
Note that if the change log is not written, the self-heal algorithm cannot
- determine the
+ determine the
\begin_inset Quotes eld
\end_inset
@@ -557,7 +557,7 @@ correct
\begin_inset Quotes erd
\end_inset
- version of a file and hence self-heal will only be able to fix
+ version of a file and hence self-heal will only be able to fix
\begin_inset Quotes eld
\end_inset
@@ -602,7 +602,7 @@ These options let you specify the number of lock servers to use for each
The default values are satisfactory in most cases.
If you are extra paranoid, you may want to increase the values.
However, be very cautious if you set the data- or entry- lock server counts
- to zero, since this can result in
+ to zero, since this can result in
\emph on
lost data.
@@ -610,11 +610,11 @@ lost data.
For example, if you set the data-lock-server-count to zero, and two application
s write to the same region of a file, there is a possibility that none of
your servers will have all the data.
- In other words, the copies will be
+ In other words, the copies will be
\emph on
inconsistent
\emph default
-, and
+, and
\emph on
incomplete
\emph default
@@ -790,7 +790,7 @@ s@gluster.org> (visit gluster.org for details on how to subscribe).
\end_layout
\begin_layout Standard
-Send you comments and suggestions about this document to <vikas@zresearch.com>.
+Send you comments and suggestions about this document to <vikas@gluster.com>.
\end_layout
\end_body
diff --git a/doc/replicate.pdf b/doc/legacy/replicate.pdf
index b7212af2b..b7212af2b 100644
--- a/doc/replicate.pdf
+++ b/doc/legacy/replicate.pdf
Binary files differ
diff --git a/doc/solaris-related-xattrs.txt b/doc/legacy/solaris-related-xattrs.txt
index e26efa5d1..3a4643948 100644
--- a/doc/solaris-related-xattrs.txt
+++ b/doc/legacy/solaris-related-xattrs.txt
@@ -1,42 +1,42 @@
Solaris Extended Attributes
In solaris extended attributes are logically supported as files
-within the filesystem. The file system is therefore augmented
+within the filesystem. The file system is therefore augmented
with an orthogonal namespace of file attributes. Attribute values
are accessed by file descriptors obtained through a special attribute
-interface. This type of logical view of "attributes as files" allows
-the leveraging of existing file system interface functionality to
-support the construction, deletion and manipulation of attributes.
+interface. This type of logical view of "attributes as files" allows
+the leveraging of existing file system interface functionality to
+support the construction, deletion and manipulation of attributes.
But as we have tested through this functionality provided by Solaris
we have come accross two major issues as written below.
-1. Symlink XATTR_NOFOLLOW not present for creating extended attributes
+1. Symlink XATTR_NOFOLLOW not present for creating extended attributes
directly on the symlinks like other platforms Linux,MAC-OSX,BSD etc.
- An implementation is present for O_NOFOLLOW for "openat()" call sets
- up errno ELOOP whenever encountered with a symlink and also another
+ An implementation is present for O_NOFOLLOW for "openat()" call sets
+ up errno ELOOP whenever encountered with a symlink and also another
implementation AT_SYMLINK_NOFOLLOW which is not present for calls like
"attropen(), openat()"
a snippet of test code which helped us understand this behaviour
--------------------------------------
- attrfd = attropen (path, key,
+ attrfd = attropen (path, key,
flags|AT_SYMLINK_NOFOLLOW|O_CREAT|O_WRONLY|O_NOFOLLOW, 0777);
if (attrfd >= 0) {
ftruncate (attrfd, 0);
ret = write (attrfd, value, size);
close (attrfd);
} else {
- fprintf (stderr, "Couldn't set extended attribute for %s (%d)\n",
+ fprintf (stderr, "Couldn't set extended attribute for %s (%d)\n",
path, errno);
- }
+ }
--------------------------------------
2. Extended attribute support for special files like device files, fifo files
- is not supported under solaris.
+ is not supported under solaris.
Apart from these glitches almost everything regarding porting functionality
-for extended attribute calls has been properly implemented in compat.c
+for extended attribute calls has been properly implemented in compat.c
with writing wrapper around functions over
"attropen()", "openat()", "unlinkat()"
diff --git a/doc/stat-prefetch-design.txt b/doc/legacy/stat-prefetch-design.txt
index 06d0ad37e..68ed423d3 100644
--- a/doc/stat-prefetch-design.txt
+++ b/doc/legacy/stat-prefetch-design.txt
@@ -1,64 +1,64 @@
what is stat-prefetch?
======================
It is a translator which caches the dentries read in readdir. This dentry
-list is stored in the context of fd. Later when lookup happens on
+list is stored in the context of fd. Later when lookup happens on
[parent-inode, basename (path)] combination, this list is searched for the
basename. The dentry thus searched is used to fill up the stat corresponding
to path being looked upon, thereby short-cutting lookup calls. This cache is
-preserved till closedir is called on the fd. The purpose of this translator
-is to optimize operations like 'ls -l', where a readdir is followed by
+preserved till closedir is called on the fd. The purpose of this translator
+is to optimize operations like 'ls -l', where a readdir is followed by
lookup (stat) calls on each directory entry.
-1. stat-prefetch harnesses the efficiency of short lookup calls
- (saves network roundtrip time for lookup calls from being accounted to
+1. stat-prefetch harnesses the efficiency of short lookup calls
+ (saves network roundtrip time for lookup calls from being accounted to
the stat call).
-2. To maintain the correctness, it does lookup-behind - lookup is winded to
- underlying translators after it is unwound to upper translators.
+2. To maintain the correctness, it does lookup-behind - lookup is winded to
+ underlying translators after it is unwound to upper translators.
lookup-behind is necessary as inode gets populated in server inode table
- only in lookup-cbk and also because various translators store their
+ only in lookup-cbk and also because various translators store their
contexts in inode contexts during lookup calls.
fops to be implemented:
=======================
* lookup
- 1. check the dentry cache stored in context of fds opened by the same process
+ 1. check the dentry cache stored in context of fds opened by the same process
on parent inode for basename. If found unwind with cached stat, else wind
- the lookup call to underlying translators.
- 2. stat is stored in the context of inode if the path being looked upon
+ the lookup call to underlying translators.
+ 2. stat is stored in the context of inode if the path being looked upon
happens to be directory. This stat will be used to fill postparent stat
when lookup happens on any of the directory contents.
* readdir
1. cache the direntries returned in readdir_cbk in the context of fd.
- 2. if the readdir is happening on non-expected offsets (means a seekdir/rewinddir
+ 2. if the readdir is happening on non-expected offsets (means a seekdir/rewinddir
has happened), cache has to be flushed.
- 3. delete the entry corresponding to basename of path on which fd is opened
+ 3. delete the entry corresponding to basename of path on which fd is opened
from cache stored in parent.
* chmod/fchmod
delete the entry corresponding to basename from cache stored in context of
- fds opened on parent inode, since these calls change st_mode and st_ctime of
+ fds opened on parent inode, since these calls change st_mode and st_ctime of
stat.
-
+
* chown/fchown
- delete the entry corresponding to basename from cache stored in context of
- fds opened on parent inode, since these calls change st_uid/st_gid and
+ delete the entry corresponding to basename from cache stored in context of
+ fds opened on parent inode, since these calls change st_uid/st_gid and
st_ctime of stat.
* truncate/ftruncate
- delete the entry corresponding to basename from cache stored in context of
+ delete the entry corresponding to basename from cache stored in context of
fds opened on parent inode, since these calls change st_size/st_mtime of stat.
* utimens
- delete the entry corresponding to basename from cache stored in context of
+ delete the entry corresponding to basename from cache stored in context of
fds opened on parent inode, since this call changes st_atime/st_mtime of stat.
* readlink
delete the entry corresponding to basename from cache stored in context of fds
opened on parent inode, since this call changes st_atime of stat.
-
+
* unlink
- 1. delete the entry corresponding to basename from cache stored in context of
+ 1. delete the entry corresponding to basename from cache stored in context of
fds, opened on parent directory containing the file being unlinked.
2. delete the entry corresponding to basename of parent directory from cache
of grand-parent.
@@ -66,14 +66,14 @@ fops to be implemented:
* rmdir
1. delete the entry corresponding to basename from cache stored in context of
fds opened on parent inode.
- 2. remove the entire cache from all fds opened on inode corresponding to
+ 2. remove the entire cache from all fds opened on inode corresponding to
directory being removed.
3. delete the entry correspondig to basename of parent from cache stored in
grand-parent.
* readv
delete the entry corresponding to basename from cache stored in context of fds
- opened on parent inode, since readv changes st_atime of file.
+ opened on parent inode, since readv changes st_atime of file.
* writev
delete the entry corresponding to basename from cache stored in context of fds
@@ -82,29 +82,29 @@ fops to be implemented:
* fsync
there is a confusion here as to whether fsync updates mtime/ctimes. Disk based
- filesystems (atleast ext2) just writes the times stored in inode to disk
- during fsync and not the time at which fsync is being done. But in glusterfs,
- a translator like write-behind actually sends writes during fsync which will
- change mtime/ctime. Hence stat-prefetch implements fsync to delete the entry
+ filesystems (atleast ext2) just writes the times stored in inode to disk
+ during fsync and not the time at which fsync is being done. But in glusterfs,
+ a translator like write-behind actually sends writes during fsync which will
+ change mtime/ctime. Hence stat-prefetch implements fsync to delete the entry
corresponding to basename from cache stored in context of fds opened on parent
inode.
-
+
* rename
- 1. remove entry corresponding to oldname from cache stored in fd contexts of
+ 1. remove entry corresponding to oldname from cache stored in fd contexts of
oldparent.
2. remove entry corresponding to newname from cache stored in fd contexts of
- newparent.
- 3. remove entry corresponding to oldparent from cache stored in
+ newparent.
+ 3. remove entry corresponding to oldparent from cache stored in
old-grand-parent, since removing oldname changes st_mtime and st_ctime
of oldparent stat.
- 4. remove entry corresponding to newparent from cache stored in
+ 4. remove entry corresponding to newparent from cache stored in
new-grand-parent, since adding newname changes st_mtime and st_ctime
of newparent stat.
- 5. if oldname happens to be a directory, remove entire cache from all fds
+ 5. if oldname happens to be a directory, remove entire cache from all fds
opened on it.
* create/mknod/mkdir/symlink/link
- delete entry corresponding to basename of parent directory in which these
+ delete entry corresponding to basename of parent directory in which these
operations are happening, from cache stored in context of fds opened on
grand-parent, since adding a new entry to a directory changes st_mtime
and st_ctime of parent directory.
@@ -116,13 +116,13 @@ fops to be implemented:
* setdents
1. remove entry corresponding to basename of path on which fd is opened from
cache stored in context of fds opened on parent.
- 2. for each of the entry in the direntry list, delete from cache stored in
+ 2. for each of the entry in the direntry list, delete from cache stored in
context of fd, the entry corresponding to basename of path being passed.
* getdents
1. remove entry corresponding to basename of path on which fd is opened from
- cache stored in parent, since getdents changes st_atime.
- 2. remove entries corresponding to symbolic links from cache, since readlink
+ cache stored in parent, since getdents changes st_atime.
+ 2. remove entries corresponding to symbolic links from cache, since readlink
would've changed st_atime.
* checksum
@@ -144,11 +144,11 @@ callbacks to be implemented:
limitations:
============
* since a readdir does not return extended attributes of file, if need_xattr is
- set, short-cutting of lookup does not happen and lookup is passed to
+ set, short-cutting of lookup does not happen and lookup is passed to
underlying translators.
* posix_readdir does not check whether the dentries are spanning across multiple
- mount points. Hence it is not transforming inode numbers in stat buffers if
+ mount points. Hence it is not transforming inode numbers in stat buffers if
posix is configured to allow export directory spanning on multiple mountpoints.
- This is a bug which needs to be fixed. posix_readdir should treat dentries the
+ This is a bug which needs to be fixed. posix_readdir should treat dentries the
same way as if lookup is happening on dentries.
diff --git a/doc/user-guide/stripe.odg b/doc/legacy/stripe.odg
index 79441bf14..79441bf14 100644
--- a/doc/user-guide/stripe.odg
+++ b/doc/legacy/stripe.odg
Binary files differ
diff --git a/doc/user-guide/stripe.pdf b/doc/legacy/stripe.pdf
index b94446feb..b94446feb 100644
--- a/doc/user-guide/stripe.pdf
+++ b/doc/legacy/stripe.pdf
Binary files differ
diff --git a/doc/translator-options.txt b/doc/legacy/translator-options.txt
index 5c6d76d48..3422c058a 100644
--- a/doc/translator-options.txt
+++ b/doc/legacy/translator-options.txt
@@ -1,8 +1,7 @@
mount/fuse:
* direct-io-mode GF_OPTION_TYPE_BOOL on|off|yes|no
- * macfuse-local GF_OPTION_TYPE_BOOL on|off|yes|no
* mount-point (mountpoint) GF_OPTION_TYPE_PATH <any-posix-valid-path>
- * attribute-timeout GF_OPTION_TYPE_DOUBLE 0.0
+ * attribute-timeout GF_OPTION_TYPE_DOUBLE 0.0
* entry-timeout GF_OPTION_TYPE_DOUBLE 0.0
protocol/server:
@@ -14,20 +13,20 @@ protocol/server:
protocol/client:
* username GF_OPTION_TYPE_ANY
- * password GF_OPTION_TYPE_ANY
+ * password GF_OPTION_TYPE_ANY
* transport-type GF_OPTION_TYPE_STR tcp|socket|ib-verbs|unix|ib-sdp|
tcp/client|ib-verbs/client
- * remote-host GF_OPTION_TYPE_ANY
- * remote-subvolume GF_OPTION_TYPE_ANY
- * transport-timeout GF_OPTION_TYPE_TIME 5-1013
+ * remote-host GF_OPTION_TYPE_ANY
+ * remote-subvolume GF_OPTION_TYPE_ANY
+ * transport-timeout GF_OPTION_TYPE_TIME 5-1013
cluster/replicate:
* read-subvolume GF_OPTION_TYPE_XLATOR
* favorite-child GF_OPTION_TYPE_XLATOR
- * data-self-heal GF_OPTION_TYPE_BOOL
+ * data-self-heal GF_OPTION_TYPE_BOOL
* metadata-self-heal GF_OPTION_TYPE_BOOL
- * entry-self-heal GF_OPTION_TYPE_BOOL
- * data-change-log GF_OPTION_TYPE_BOOL
+ * entry-self-heal GF_OPTION_TYPE_BOOL
+ * data-change-log GF_OPTION_TYPE_BOOL
* metadata-change-log GF_OPTION_TYPE_BOOL
* entry-change-log GF_OPTION_TYPE_BOOL
* data-lock-server-count GF_OPTION_TYPE_INT 0
@@ -35,54 +34,54 @@ cluster/replicate:
* entry-lock-server-count GF_OPTION_TYPE_INT 0
cluster/distribute:
- * lookup-unhashed GF_OPTION_TYPE_BOOL
+ * lookup-unhashed GF_OPTION_TYPE_BOOL
cluster/unify:
- * namespace GF_OPTION_TYPE_XLATOR
- * scheduler GF_OPTION_TYPE_STR alu|rr|random|nufa|switch
+ * namespace GF_OPTION_TYPE_XLATOR
+ * scheduler GF_OPTION_TYPE_STR alu|rr|random|nufa|switch
* self-heal GF_OPTION_TYPE_STR foreground|background|off
- * optimist GF_OPTION_TYPE_BOOL
+ * optimist GF_OPTION_TYPE_BOOL
cluster/nufa:
- local-volume-name GF_OPTION_TYPE_XLATOR
+ local-volume-name GF_OPTION_TYPE_XLATOR
cluster/stripe:
- * block-size GF_OPTION_TYPE_ANY
+ * block-size GF_OPTION_TYPE_ANY
* use-xattr GF_OPTION_TYPE_BOOL
debug/trace:
* include-ops (include) GF_OPTION_TYPE_STR
- * exclude-ops (exclude) GF_OPTION_TYPE_STR
+ * exclude-ops (exclude) GF_OPTION_TYPE_STR
encryption/rot-13:
* encrypt-write GF_OPTION_TYPE_BOOL
- * decrypt-read GF_OPTION_TYPE_BOOL
+ * decrypt-read GF_OPTION_TYPE_BOOL
features/path-convertor:
- * start-offset GF_OPTION_TYPE_INT 0-4095
- * end-offset GF_OPTION_TYPE_INT 1-4096
+ * start-offset GF_OPTION_TYPE_INT 0-4095
+ * end-offset GF_OPTION_TYPE_INT 1-4096
* replace-with GF_OPTION_TYPE_ANY
features/trash:
- * trash-dir GF_OPTION_TYPE_PATH
+ * trash-dir GF_OPTION_TYPE_PATH
features/locks:
- * mandatory-locks (mandatory) GF_OPTION_TYPE_BOOL
+ * mandatory-locks (mandatory) GF_OPTION_TYPE_BOOL
features/filter:
- * root-squashing GF_OPTION_TYPE_BOOL
+ * root-squashing GF_OPTION_TYPE_BOOL
* read-only GF_OPTION_TYPE_BOOL
* fixed-uid GF_OPTION_TYPE_INT
* fixed-gid GF_OPTION_TYPE_INT
- * translate-uid GF_OPTION_TYPE_ANY
+ * translate-uid GF_OPTION_TYPE_ANY
* translate-gid GF_OPTION_TYPE_ANY
- * filter-uid GF_OPTION_TYPE_ANY
- * filter-gid GF_OPTION_TYPE_ANY
+ * filter-uid GF_OPTION_TYPE_ANY
+ * filter-gid GF_OPTION_TYPE_ANY
features/quota:
* min-free-disk-limit GF_OPTION_TYPE_PERCENT
* refresh-interval GF_OPTION_TYPE_TIME
- * disk-usage-limit GF_OPTION_TYPE_SIZET
+ * disk-usage-limit GF_OPTION_TYPE_SIZET
storage/posix:
* o-direct GF_OPTION_TYPE_BOOL
@@ -105,16 +104,16 @@ storage/bdb:
* access-mode GF_OPTION_TYPE_STR
performance/read-ahead:
- * force-atime-update GF_OPTION_TYPE_BOOL
+ * force-atime-update GF_OPTION_TYPE_BOOL
* page-size GF_OPTION_TYPE_SIZET (64 * GF_UNIT_KB)-(2 * GF_UNIT_MB)
- * page-count GF_OPTION_TYPE_INT 1-16
+ * page-count GF_OPTION_TYPE_INT 1-16
performance/write-behind:
* flush-behind GF_OPTION_TYPE_BOOL
- * aggregate-size GF_OPTION_TYPE_SIZET (128 * GF_UNIT_KB)-(4 * GF_UNIT_MB)
- * window-size GF_OPTION_TYPE_SIZET (512 * GF_UNIT_KB)-(1 * GF_UNIT_GB)
- * enable-O_SYNC GF_OPTION_TYPE_BOOL
- * disable-for-first-nbytes GF_OPTION_TYPE_SIZET 1 - (1 * GF_UNIT_MB)
+ * aggregate-size GF_OPTION_TYPE_SIZET (128 * GF_UNIT_KB)-(4 * GF_UNIT_MB)
+ * window-size GF_OPTION_TYPE_SIZET (512 * GF_UNIT_KB)-(1 * GF_UNIT_GB)
+ * enable-O_SYNC GF_OPTION_TYPE_BOOL
+ * disable-for-first-nbytes GF_OPTION_TYPE_SIZET 1 - (1 * GF_UNIT_MB)
performance/symlink-cache:
@@ -122,23 +121,27 @@ performance/io-threads:
* thread-count GF_OPTION_TYPE_INT 1-32
performance/io-cache:
- * priority GF_OPTION_TYPE_ANY
- * cache-timeout (force-revalidate-timeout) GF_OPTION_TYPE_INT 0-60
- * page-size GF_OPTION_TYPE_SIZET (16 * GF_UNIT_KB)-(4 * GF_UNIT_MB)
+ * priority GF_OPTION_TYPE_ANY
+ * cache-timeout (force-revalidate-timeout) GF_OPTION_TYPE_INT 0-60
+ * page-size GF_OPTION_TYPE_SIZET (16 * GF_UNIT_KB)-(4 * GF_UNIT_MB)
* cache-size GF_OPTION_TYPE_SIZET (4 * GF_UNIT_MB)-(6 * GF_UNIT_GB)
+performance/quick-read:
+ * cache-timeout GF_OPTION_TYPE_INT 1-60
+ * max-file-size GF_OPTION_TYPE_SIZET 0-(1000 * GF_UNIT_KB)
+
auth:
- addr:
- * auth.addr.*.allow GF_OPTION_TYPE_ANY
- * auth.addr.*.reject GF_OPTION_TYPE_ANY
+ * auth.addr.*.allow GF_OPTION_TYPE_ANY
+ * auth.addr.*.reject GF_OPTION_TYPE_ANY
- login:
- * auth.login.*.allow GF_OPTION_TYPE_ANY
+ * auth.login.*.allow GF_OPTION_TYPE_ANY
* auth.login.*.password GF_OPTION_TYPE_ANY
scheduler/alu:
- * scheduler.alu.order (alu.order)
- GF_OPTION_TYPE_ANY
+ * scheduler.alu.order (alu.order)
+ GF_OPTION_TYPE_ANY
* scheduler.alu.disk-usage.entry-threshold (alu.disk-usage.entry-threshold)
GF_OPTION_TYPE_SIZET
* scheduler.alu.disk-usage.exit-threshold (alu.disk-usage.exit-threshold)
@@ -146,17 +149,17 @@ scheduler/alu:
* scheduler.alu.write-usage.entry-threshold (alu.write-usage.entry-threshold)
GF_OPTION_TYPE_SIZET
* scheduler.alu.write-usage.exit-threshold (alu.write-usage.exit-threshold)
- GF_OPTION_TYPE_SIZET
+ GF_OPTION_TYPE_SIZET
* scheduler.alu.read-usage.entry-threshold (alu.read-usage.entry-threshold)
GF_OPTION_TYPE_SIZET
* scheduler.alu.read-usage.exit-threshold (alu.read-usage.exit-threshold)
- GF_OPTION_TYPE_SIZET
+ GF_OPTION_TYPE_SIZET
* scheduler.alu.open-files-usage.entry-threshold (alu.open-files-usage.entry-threshold)
GF_OPTION_TYPE_INT
* scheduler.alu.open-files-usage.exit-threshold (alu.open-files-usage.exit-threshold)
- GF_OPTION_TYPE_INT
+ GF_OPTION_TYPE_INT
* scheduler.read-only-subvolumes (alu.read-only-subvolumes)
- GF_OPTION_TYPE_ANY
+ GF_OPTION_TYPE_ANY
* scheduler.refresh-interval (alu.refresh-interval)
GF_OPTION_TYPE_TIME
* scheduler.limits.min-free-disk (alu.limits.min-free-disk)
@@ -165,11 +168,11 @@ scheduler/alu:
GF_OPTION_TYPE_INT
scheduler/nufa:
- * scheduler.refresh-interval (nufa.refresh-interval)
+ * scheduler.refresh-interval (nufa.refresh-interval)
GF_OPTION_TYPE_TIME
- * scheduler.limits.min-free-disk (nufa.limits.min-free-disk)
+ * scheduler.limits.min-free-disk (nufa.limits.min-free-disk)
GF_OPTION_TYPE_PERCENT
- * scheduler.local-volume-name (nufa.local-volume-name)
+ * scheduler.local-volume-name (nufa.local-volume-name)
GF_OPTION_TYPE_XLATOR
scheduler/random:
@@ -201,20 +204,20 @@ transport/ib-verbs:
* transport.ib-verbs.work-request-recv-count (ib-verbs-work-request-recv-count)
GF_OPTION_TYPE_INT
* remote-port (transport.remote-port,transport.ib-verbs.remote-port)
- GF_OPTION_TYPE_INT
- * transport.ib-verbs.listen-port GF_OPTION_TYPE_INT
- * transport.ib-verbs.connect-path (connect-path) GF_OPTION_TYPE_ANY
- * transport.ib-verbs.bind-path (bind-path) GF_OPTION_TYPE_ANY
- * transport.ib-verbs.listen-path (listen-path) GF_OPTION_TYPE_ANY
+ GF_OPTION_TYPE_INT
+ * transport.ib-verbs.listen-port GF_OPTION_TYPE_INT
+ * transport.ib-verbs.connect-path (connect-path) GF_OPTION_TYPE_ANY
+ * transport.ib-verbs.bind-path (bind-path) GF_OPTION_TYPE_ANY
+ * transport.ib-verbs.listen-path (listen-path) GF_OPTION_TYPE_ANY
* transport.address-family (address-family) GF_OPTION_TYPE_STR inet|inet6|inet/inet6|
inet6/inet|unix|inet-sdp
transport/socket:
- * transport.remote-port (remote-port,transport.socket.remote-port) GF_OPTION_TYPE_INT
- * transport.socket.listen-port (listen-port) GF_OPTION_TYPE_INT
- * transport.socket.bind-address (bind-address) GF_OPTION_TYPE_ANY
- * transport.socket.connect-path (connect-path) GF_OPTION_TYPE_ANY
- * transport.socket.bind-path (bind-path) GF_OPTION_TYPE_ANY
+ * transport.remote-port (remote-port,transport.socket.remote-port) GF_OPTION_TYPE_INT
+ * transport.socket.listen-port (listen-port) GF_OPTION_TYPE_INT
+ * transport.socket.bind-address (bind-address) GF_OPTION_TYPE_ANY
+ * transport.socket.connect-path (connect-path) GF_OPTION_TYPE_ANY
+ * transport.socket.bind-path (bind-path) GF_OPTION_TYPE_ANY
* transport.socket.listen-path (listen-path) GF_OPTION_TYPE_ANY
* transport.address-family (address-family) GF_OPTION_TYPE_STR inet|inet6|
inet/inet6|inet6/inet|
diff --git a/doc/user-guide/unify.odg b/doc/legacy/unify.odg
index ccaa9bf16..ccaa9bf16 100644
--- a/doc/user-guide/unify.odg
+++ b/doc/legacy/unify.odg
Binary files differ
diff --git a/doc/user-guide/unify.pdf b/doc/legacy/unify.pdf
index c22027f66..c22027f66 100644
--- a/doc/user-guide/unify.pdf
+++ b/doc/legacy/unify.pdf
Binary files differ
diff --git a/doc/user-guide/user-guide.info b/doc/legacy/user-guide.info
index e80014c45..2bbadb351 100644
--- a/doc/user-guide/user-guide.info
+++ b/doc/legacy/user-guide.info
@@ -6,7 +6,7 @@ END-INFO-DIR-ENTRY
This is the user manual for GlusterFS 2.0.
- Copyright (C) 2007-2009 <Z> Research, Inc. Permission is granted to
+ Copyright (c) 2007-2011 Gluster, Inc. Permission is granted to
copy, distribute and/or modify this document under the terms of the GNU
Free Documentation License, Version 1.2 or any later version published
by the Free Software Foundation; with no Invariant Sections, no
@@ -21,7 +21,7 @@ GlusterFS 2.0 User Guide
This is the user manual for GlusterFS 2.0.
- Copyright (C) 2007-2009 <Z> Research, Inc. Permission is granted to
+ Copyright (c) 2007-2011 Gluster, Inc. Permission is granted to
copy, distribute and/or modify this document under the terms of the GNU
Free Documentation License, Version 1.2 or any later version published
by the Free Software Foundation; with no Invariant Sections, no
@@ -129,8 +129,8 @@ suggestions. A huge thanks to them all.
Jacques Mattheij - for Europe mirror.
Patrick Negri - for TCP non-blocking connect.
- http://gluster.org/core-team.php (<list-hacking@zresearch.com>)
- <Z> Research
+ http://gluster.org/core-team.php (<list-hacking@gluster.com>)
+ Gluster

File: user-guide.info, Node: Introduction, Next: Installation and Invocation, Prev: Acknowledgements, Up: Top
@@ -160,15 +160,15 @@ makes them all appear to be a part of the same filesystem.
=================
You can reach us through the mailing list *gluster-devel*
-(<gluster-devel@nongnu.org>).
+(<gluster-devel@nongnu.org>).
You can also find many of the developers on IRC, on the `#gluster'
-channel on Freenode (<irc.freenode.net>).
+channel on Freenode (<irc.freenode.net>).
The GlusterFS documentation wiki is also useful:
<http://gluster.org/docs/index.php/GlusterFS>
- For commercial support, you can contact <Z> Research at:
+ For commercial support, you can contact Gluster at:
3194 Winding Vista Common
Fremont, CA 94539
@@ -178,7 +178,7 @@ channel on Freenode (<irc.freenode.net>).
Toll free: +1 (888) 813 6309
Fax: +1 (510) 372 0604
- You can also email us at <support@zresearch.com>.
+ You can also email us at <support@gluster.com>.

File: user-guide.info, Node: Installation and Invocation, Next: Concepts, Prev: Introduction, Up: Top
@@ -226,7 +226,7 @@ recommended that all users use the patched FUSE.
The patched FUSE tarball can be downloaded from:
- <ftp://ftp.zresearch.com/pub/gluster/glusterfs/fuse/>
+ <ftp://ftp.gluster.com/pub/gluster/glusterfs/fuse/>
The specific changes made to FUSE are:
@@ -397,8 +397,8 @@ command-line options accepted by it.
`--volfile-server-port=<port-number>'
Listening port number of volfile server.
-`--volfile-server-transport=[socket|ib-verbs]'
- Transport type to get volfile from server. [default: `socket']
+`--volfile-server-transport=[tcp|ib-verbs]'
+ Transport type to get volfile from server. [default: `tcp']
`--xlator-options=<volume-name.option=value>'
Add/override a translator option for a volume with specified value.
@@ -467,8 +467,8 @@ filesystem to appear. Example:
`--volfile-server-port=<port-number>'
Listening port number of volfile server.
-`--volfile-server-transport=[socket|ib-verbs]'
- Transport type to get volfile from server. [default: `socket']
+`--volfile-server-transport=[tcp|ib-verbs]'
+ Transport type to get volfile from server. [default: `tcp']
`--xlator-options=<volume-name.option=value>'
Add/override a translator option for a volume with specified value.
@@ -906,8 +906,8 @@ and the client.
`non-blocking-connect [no|off|on|yes] (on)'
Whether to make the connection attempt asynchronous.
-`remote-port <n> (6996)'
- Server port to connect to.
+`remote-port <n> (24007)'
+ Server port to connect to.
`remote-host <hostname> *'
Hostname or IP address of the server. If the host name resolves to
@@ -920,7 +920,7 @@ and the client.
The local interface on which the server should listen to requests.
Default is to listen on all interfaces.
-`listen-port <n> (6996)'
+`listen-port <n> (24007)'
The local port to listen on.
4.2.1.2 IB-SDP
@@ -951,8 +951,8 @@ always best to use `ib-verbs'. Use `ib-sdp' only if you cannot get
`non-blocking-connect [no|off|on|yes] (on)'
Whether to make the connection attempt asynchronous.
-`remote-port <n> (6996)'
- Server port to connect to.
+`remote-port <n> (24007)'
+ Server port to connect to.
`remote-host <hostname> *'
Hostname or IP address of the server. If the host name resolves to
@@ -965,7 +965,7 @@ always best to use `ib-verbs'. Use `ib-sdp' only if you cannot get
The local interface on which the server should listen to requests.
Default is to listen on all interfaces.
-`listen-port <n> (6996)'
+`listen-port <n> (24007)'
The local port to listen on.
The following options are common to both the client and server
@@ -2037,11 +2037,11 @@ it means the GlusterFS mount did not succeed. Try running GlusterFS in
connections and firewall settings. To check if the server is reachable,
try:
- telnet IP-ADDRESS 6996
+ telnet IP-ADDRESS 24007
If the server is accessible, your `telnet' command should connect and
block. If not you will see an error message such as `telnet: Unable to
-connect to remote host: Connection refused'. 6996 is the default
+connect to remote host: Connection refused'. 24007 is the default
GlusterFS port. If you have changed it, then use the corresponding port
instead.
@@ -2057,7 +2057,7 @@ the `--log-file' option (See *note Client::).
=======================
`modprobe fuse' fails with: "Unknown symbol in module, or unknown
-parameter".
+parameter".
If you are using fuse-2.6.x on Redhat Enterprise Linux Work Station 4
and Advanced Server 4 with 2.6.9-42.ELlargesmp, 2.6.9-42.ELsmp,
@@ -2066,11 +2066,11 @@ module, you need to apply the following patch.
For fuse-2.6.2:
-<http://ftp.zresearch.com/pub/gluster/glusterfs/fuse/fuse-2.6.2-rhel-build.patch>
+<http://ftp.gluster.com/pub/gluster/glusterfs/fuse/fuse-2.6.2-rhel-build.patch>
For fuse-2.6.3:
-<http://ftp.zresearch.com/pub/gluster/glusterfs/fuse/fuse-2.6.3-rhel-build.patch>
+<http://ftp.gluster.com/pub/gluster/glusterfs/fuse/fuse-2.6.3-rhel-build.patch>
6.3 AppArmour and GlusterFS
===========================
@@ -2641,7 +2641,7 @@ Index
* unify (translator): Unify. (line 6)
* unify invariants: Unify. (line 16)
* write-behind (translator): Write Behind. (line 6)
-* Z Research, Inc.: Introduction. (line 36)
+* Gluster, Inc.: Introduction. (line 36)

diff --git a/doc/user-guide/user-guide.pdf b/doc/legacy/user-guide.pdf
index ed7bd2a99..ed7bd2a99 100644
--- a/doc/user-guide/user-guide.pdf
+++ b/doc/legacy/user-guide.pdf
Binary files differ
diff --git a/doc/user-guide/user-guide.texi b/doc/legacy/user-guide.texi
index 800354116..8e429853f 100644
--- a/doc/user-guide/user-guide.texi
+++ b/doc/legacy/user-guide.texi
@@ -10,7 +10,7 @@
@copying
This is the user manual for GlusterFS 2.0.
-Copyright @copyright{} 2007-2009 @email{@b{Z}} Research, Inc. Permission is granted to
+Copyright @copyright{} 2007-2011 @email{@b{Gluster}} , Inc. Permission is granted to
copy, distribute and/or modify this document under the terms of the
@acronym{GNU} Free Documentation License, Version 1.2 or any later
version published by the Free Software Foundation; with no Invariant
@@ -23,8 +23,7 @@ Documentation License''.
@title GlusterFS 2.0 User Guide [DRAFT]
@subtitle January 15, 2008
@author http://gluster.org/core-team.php
-@author @email{@b{Z}} @b{Research}
-
+@author @email{@b{Gluster}}
@page
@vskip 0pt plus 1filll
@insertcopying
@@ -37,78 +36,78 @@ Documentation License''.
@insertcopying
@menu
-* Acknowledgements::
-* Introduction::
-* Installation and Invocation::
-* Concepts::
-* Translators::
-* Usage Scenarios::
-* Troubleshooting::
-* GNU Free Documentation Licence::
-* Index::
+* Acknowledgements::
+* Introduction::
+* Installation and Invocation::
+* Concepts::
+* Translators::
+* Usage Scenarios::
+* Troubleshooting::
+* GNU Free Documentation Licence::
+* Index::
@detailmenu
--- The Detailed Node Listing ---
Installation and Invocation
-* Pre requisites::
-* Getting GlusterFS::
-* Building::
-* Running GlusterFS::
-* A Tutorial Introduction::
+* Pre requisites::
+* Getting GlusterFS::
+* Building::
+* Running GlusterFS::
+* A Tutorial Introduction::
Running GlusterFS
-* Server::
-* Client::
+* Server::
+* Client::
Concepts
-* Filesystems in Userspace::
-* Translator::
-* Volume specification file::
+* Filesystems in Userspace::
+* Translator::
+* Volume specification file::
Translators
-* Storage Translators::
-* Client and Server Translators::
-* Clustering Translators::
-* Performance Translators::
-* Features Translators::
+* Storage Translators::
+* Client and Server Translators::
+* Clustering Translators::
+* Performance Translators::
+* Features Translators::
Storage Translators
-* POSIX::
+* POSIX::
Client and Server Translators
-* Transport modules::
-* Client protocol::
-* Server protocol::
+* Transport modules::
+* Client protocol::
+* Server protocol::
Clustering Translators
-* Unify::
-* Replicate::
-* Stripe::
+* Unify::
+* Replicate::
+* Stripe::
Performance Translators
-* Read Ahead::
-* Write Behind::
-* IO Threads::
-* IO Cache::
+* Read Ahead::
+* Write Behind::
+* IO Threads::
+* IO Cache::
-Features Translators
+Features Translators
-* POSIX Locks::
-* Fixed ID::
+* POSIX Locks::
+* Fixed ID::
Miscellaneous Translators
-* ROT-13::
-* Trace::
+* ROT-13::
+* Trace::
@end detailmenu
@end menu
@@ -121,7 +120,7 @@ Miscellaneous Translators
@node Acknowledgements
@unnumbered Acknowledgements
GlusterFS continues to be a wonderful and enriching experience for all
-of us involved.
+of us involved.
GlusterFS development would not have been possible at this pace if
not for our enthusiastic users. People from around the world have
@@ -142,8 +141,8 @@ Jacques Mattheij - for Europe mirror.
Patrick Negri - for TCP non-blocking connect.
@flushright
-http://gluster.org/core-team.php (@email{list-hacking@@zresearch.com})
-@email{@b{Z}} Research
+http://gluster.org/core-team.php (@email{list-hacking@@gluster.com})
+@email{@b{Gluster}}
@end flushright
@node Introduction
@@ -167,7 +166,7 @@ Need for distributed filesystems
@end itemize
@section Contacting us
-You can reach us through the mailing list @strong{gluster-devel}
+You can reach us through the mailing list @strong{gluster-devel}
(@email{gluster-devel@@nongnu.org}).
@cindex GlusterFS mailing list
@@ -178,9 +177,9 @@ channel on Freenode (@indicateurl{irc.freenode.net}).
The GlusterFS documentation wiki is also useful: @*
@indicateurl{http://gluster.org/docs/index.php/GlusterFS}
-For commercial support, you can contact @email{@b{Z}} Research at:
+For commercial support, you can contact @email{@b{Gluster}} at:
@cindex commercial support
-@cindex Z Research, Inc.
+@cindex Gluster, Inc.
@display
3194 Winding Vista Common
@@ -192,17 +191,17 @@ Toll free: +1 (888) 813 6309
Fax: +1 (510) 372 0604
@end display
-You can also email us at @email{support@@zresearch.com}.
+You can also email us at @email{support@@gluster.com}.
@node Installation and Invocation
@chapter Installation and Invocation
@menu
-* Pre requisites::
-* Getting GlusterFS::
-* Building::
-* Running GlusterFS::
-* A Tutorial Introduction::
+* Pre requisites::
+* Getting GlusterFS::
+* Building::
+* Running GlusterFS::
+* A Tutorial Introduction::
@end menu
@node Pre requisites
@@ -248,13 +247,13 @@ our patched version of the @acronym{FUSE} kernel module. See Patched FUSE for de
@subsection Patched FUSE
-The GlusterFS project maintains a patched version of @acronym{FUSE} meant to be used
+The GlusterFS project maintains a patched version of @acronym{FUSE} meant to be used
with GlusterFS. The patches increase GlusterFS performance. It is recommended that
all users use the patched @acronym{FUSE}.
The patched @acronym{FUSE} tarball can be downloaded from:
-@indicateurl{ftp://ftp.zresearch.com/pub/gluster/glusterfs/fuse/}
+@indicateurl{ftp://ftp.gluster.com/pub/gluster/glusterfs/fuse/}
The specific changes made to @acronym{FUSE} are:
@@ -312,7 +311,7 @@ $ cd glusterfs-<version>
If you checked out the source from the Arch repository, you'll need
to run @command{./autogen.sh} first. Note that you'll need to have
-Autoconf and Automake installed for this.
+Autoconf and Automake installed for this.
Run @command{configure}.
@@ -372,8 +371,8 @@ paths with the prefix.
@section Running GlusterFS
@menu
-* Server::
-* Client::
+* Server::
+* Client::
@end menu
@node Server
@@ -387,7 +386,7 @@ of the GlusterFS server program and all the command-line options accepted by it.
@cartouche
@table @code
Basic Options
-@item -f, --volfile=<path>
+@item -f, --volfile=<path>
Use the volume file as the volume specification.
@item -s, --volfile-server=<hostname>
@@ -397,7 +396,7 @@ Basic Options
Specify the path for the log file.
@item -L, --log-level=<level>
- Set the log level for the server. Log level should be one of @acronym{DEBUG},
+ Set the log level for the server. Log level should be one of @acronym{DEBUG},
@acronym{WARNING}, @acronym{ERROR}, @acronym{CRITICAL}, or @acronym{NONE}.
Advanced Options
@@ -405,10 +404,10 @@ Advanced Options
Run in debug mode. This option sets --no-daemon, --log-level to DEBUG and
--log-file to console.
-@item -N, --no-daemon
+@item -N, --no-daemon
Run glusterfsd as a foreground process.
-@item -p, --pid-file=<path>
+@item -p, --pid-file=<path>
Path for the @acronym{PID} file.
@item --volfile-id=<key>
@@ -417,20 +416,20 @@ Advanced Options
@item --volfile-server-port=<port-number>
Listening port number of volfile server.
-@item --volfile-server-transport=[socket|ib-verbs]
- Transport type to get volfile from server. [default: @command{socket}]
+@item --volfile-server-transport=[tcp|ib-verbs]
+ Transport type to get volfile from server. [default: @command{tcp}]
@item --xlator-options=<volume-name.option=value>
Add/override a translator option for a volume with specified value.
Miscellaneous Options
-@item -?, --help
+@item -?, --help
Show this help text.
-@item --usage
+@item --usage
Display a short usage message.
-@item -V, --version
+@item -V, --version
Show version information.
@end table
@end cartouche
@@ -465,7 +464,7 @@ The command-line options are detailed below.
@table @code
Basic Options
-@item -f, --volfile=<path>
+@item -f, --volfile=<path>
Use the volume file as the volume specification.
@item -s, --volfile-server=<hostname>
@@ -475,7 +474,7 @@ Basic Options
Specify the path for the log file.
@item -L, --log-level=<level>
- Set the log level for the server. Log level should be one of @acronym{DEBUG},
+ Set the log level for the server. Log level should be one of @acronym{DEBUG},
@acronym{WARNING}, @acronym{ERROR}, @acronym{CRITICAL}, or @acronym{NONE}.
Advanced Options
@@ -483,10 +482,10 @@ Advanced Options
Run in debug mode. This option sets --no-daemon, --log-level to DEBUG and
--log-file to console.
-@item -N, --no-daemon
+@item -N, --no-daemon
Run @command{glusterfs} as a foreground process.
-@item -p, --pid-file=<path>
+@item -p, --pid-file=<path>
Path for the @acronym{PID} file.
@item --volfile-id=<key>
@@ -495,8 +494,8 @@ Advanced Options
@item --volfile-server-port=<port-number>
Listening port number of volfile server.
-@item --volfile-server-transport=[socket|ib-verbs]
- Transport type to get volfile from server. [default: @command{socket}]
+@item --volfile-server-transport=[tcp|ib-verbs]
+ Transport type to get volfile from server. [default: @command{tcp}]
@item --xlator-options=<volume-name.option=value>
Add/override a translator option for a volume with specified value.
@@ -513,14 +512,14 @@ Advanced Options
automatically if kernel supports big writes (>= 2.6.26).
@item -e, --entry-timeout=<n>
- Entry timeout for directory entries in the kernel, in seconds.
+ Entry timeout for directory entries in the kernel, in seconds.
Defaults to 1 second.
Missellaneous Options
-@item -?, --help
+@item -?, --help
Show this help information.
-@item -V, --version
+@item -V, --version
Show version information.
@end table
@end cartouche
@@ -528,7 +527,7 @@ Missellaneous Options
@node A Tutorial Introduction
@section A Tutorial Introduction
-This section will show you how to quickly get GlusterFS up and running. We'll
+This section will show you how to quickly get GlusterFS up and running. We'll
configure GlusterFS as a simple network filesystem, with one server and one client.
In this mode of usage, GlusterFS can serve as a replacement for NFS.
@@ -546,18 +545,18 @@ be run on the server will be shown with the prompt:
Our goal is to make a directory on the @emph{server} (say, @command{/export})
accessible to the @emph{client}.
-First of all, get GlusterFS installed on both the machines, as described in the
+First of all, get GlusterFS installed on both the machines, as described in the
previous sections. Make sure you have the @acronym{FUSE} kernel module loaded. You
-can ensure this by running:
+can ensure this by running:
@example
[root@@server]# modprobe fuse
@end example
Before we can run the GlusterFS client or server programs, we need to write
-two files called @emph{volume specifications} (equivalently refered to as @emph{volfiles}).
+two files called @emph{volume specifications} (equivalently refered to as @emph{volfiles}).
The volfile describes the @emph{translator tree} on a node. The next chapter will
-explain the concepts of `translator' and `volume specification' in detail. For now,
+explain the concepts of `translator' and `volume specification' in detail. For now,
just assume that the volfile is like an NFS @command{/etc/export} file.
On the server, create a text file somewhere (we'll assume the path
@@ -573,7 +572,7 @@ end-volume
volume server
type protocol/server
subvolumes colon-o
- option transport-type tcp
+ option transport-type tcp
option auth.addr.colon-o.allow *
end-volume
@end example
@@ -626,9 +625,9 @@ working as a network file system.
@chapter Concepts
@menu
-* Filesystems in Userspace::
-* Translator::
-* Volume specification file::
+* Filesystems in Userspace::
+* Translator::
+* Volume specification file::
@end menu
@node Filesystems in Userspace
@@ -640,16 +639,16 @@ is a kernel module/library that allows us to write a filesystem
completely in userspace.
@acronym{FUSE} consists of a kernel module which interacts with the userspace
-implementation using a device file @code{/dev/fuse}. When a process
+implementation using a device file @code{/dev/fuse}. When a process
makes a syscall on a @acronym{FUSE} filesystem, @acronym{VFS} hands the request to the
@acronym{FUSE} module, which writes the request to @code{/dev/fuse}. The
userspace implementation polls @code{/dev/fuse}, and when a request arrives,
processes it and writes the result back to @code{/dev/fuse}. The kernel then
-reads from the device file and returns the result to the user process.
+reads from the device file and returns the result to the user process.
In case of GlusterFS, the userspace program is the GlusterFS client.
The control flow is shown in the diagram below. The GlusterFS client
-services the request by sending it to the server, which in turn
+services the request by sending it to the server, which in turn
hands it to the local @acronym{POSIX} filesystem.
@center @image{fuse,44pc,,,.pdf}
@@ -753,7 +752,7 @@ or ``forty-two''.
line is considered the value; it is up to the translator to parse it.
@item @emph{subvolume1}, @emph{subvolume2}, @dots{}
- Volume names of sub-volumes. The sub-volumes must already have been defined earlier
+ Volume names of sub-volumes. The sub-volumes must already have been defined earlier
in the file.
@end table
@@ -798,11 +797,11 @@ end-volume
@chapter Translators
@menu
-* Storage Translators::
-* Client and Server Translators::
-* Clustering Translators::
-* Performance Translators::
-* Features Translators::
+* Storage Translators::
+* Client and Server Translators::
+* Clustering Translators::
+* Performance Translators::
+* Features Translators::
* Miscellaneous Translators::
@end menu
@@ -824,12 +823,12 @@ Other storage backends are planned for the future. One of the possibilities is a
Amazon S3 translator. Amazon S3 is an unlimited online storage service accessible
through a web services @acronym{API}. The S3 translator will allow you to access
the storage as a normal @acronym{POSIX} filesystem.
-@footnote{Some more discussion about this can be found at:
+@footnote{Some more discussion about this can be found at:
http://developer.amazonwebservices.com/connect/message.jspa?messageID=52873}
@menu
-* POSIX::
+* POSIX::
* BDB::
@end menu
@@ -844,14 +843,14 @@ filesystem as its ``backend'' to actually store files and
directories. This can be any filesystem that supports extended
attributes (@acronym{EXT3}, ReiserFS, @acronym{XFS}, ...). Extended
attributes are used by some translators to store metadata, for
-example, by the replicate and stripe translators. See
+example, by the replicate and stripe translators. See
@ref{Replicate} and @ref{Stripe}, respectively for details.
@cartouche
@table @code
@item directory <path>
The directory on the local filesystem which is to be used for storage.
-@end table
+@end table
@end cartouche
@node BDB
@@ -863,7 +862,7 @@ type storage/bdb
The @command{BDB} translator uses a @acronym{Berkeley DB} database as its
``backend'' to actually store files as key-value pair in the database and
directories as regular @acronym{POSIX} directories. Note that @acronym{BDB}
-does not provide extended attribute support for regular files. Do not use
+does not provide extended attribute support for regular files. Do not use
@acronym{BDB} as storage translator while using any translator that demands
extended attributes on ``backend''.
@@ -893,9 +892,9 @@ translator tree over the network or access a remote GlusterFS
server. These two translators implement GlusterFS's network protocol.
@menu
-* Transport modules::
-* Client protocol::
-* Server protocol::
+* Transport modules::
+* Client protocol::
+* Server protocol::
@end menu
@node Transport modules
@@ -925,7 +924,7 @@ The @acronym{TCP} client module accepts the following options:
@table @code
@item non-blocking-connect [no|off|on|yes] (on)
Whether to make the connection attempt asynchronous.
-@item remote-port <n> (6996)
+@item remote-port <n> (24007)
Server port to connect to.
@cindex DNS round robin
@item remote-host <hostname> *
@@ -942,7 +941,7 @@ The @acronym{TCP} server module accepts the following options:
@item bind-address <address> (0.0.0.0)
The local interface on which the server should listen to requests. Default is to
listen on all interfaces.
-@item listen-port <n> (6996)
+@item listen-port <n> (24007)
The local port to listen on.
@end table
@end cartouche
@@ -963,7 +962,7 @@ This module accepts the same options as @command{tcp}
@cindex infiniband transport
-InfiniBand is a scalable switched fabric interconnect mechanism
+InfiniBand is a scalable switched fabric interconnect mechanism
primarily used in high-performance computing. InfiniBand can deliver
data throughput of the order of 10 Gbit/s, with latencies of 4-5 ms.
@@ -971,7 +970,7 @@ The @command{ib-verbs} transport accesses the InfiniBand hardware through
the ``verbs'' @acronym{API}, which is the lowest level of software access possible
and which gives the highest performance. On InfiniBand hardware, it is always
best to use @command{ib-verbs}. Use @command{ib-sdp} only if you cannot get
-@command{ib-verbs} working for some reason.
+@command{ib-verbs} working for some reason.
The @command{ib-verbs} client module accepts the following options:
@@ -979,7 +978,7 @@ The @command{ib-verbs} client module accepts the following options:
@table @code
@item non-blocking-connect [no|off|on|yes] (on)
Whether to make the connection attempt asynchronous.
-@item remote-port <n> (6996)
+@item remote-port <n> (24007)
Server port to connect to.
@cindex DNS round robin
@item remote-host <hostname> *
@@ -996,7 +995,7 @@ The @command{ib-verbs} server module accepts the following options:
@item bind-address <address> (0.0.0.0)
The local interface on which the server should listen to requests. Default is to
listen on all interfaces.
-@item listen-port <n> (6996)
+@item listen-port <n> (24007)
The local port to listen on.
@end table
@end cartouche
@@ -1050,7 +1049,7 @@ translator tree.
@item transport-type [tcp,ib-sdp,ib-verbs] (tcp)
The transport type to use. You should use the client versions of all the
-transport modules (@command{tcp}, @command{ib-sdp},
+transport modules (@command{tcp}, @command{ib-sdp},
@command{ib-verbs}).
@item remote-subvolume <volume_name> *
The name of the volume on the remote host to attach to. Note that
@@ -1076,7 +1075,7 @@ remote GlusterFS clients.
@table @code
@item client-volume-filename <path> (<CONFDIR>/glusterfs-client.vol)
The volume specification file to use for the client. This is the file the
-client will receive when it is invoked with the @command{--server} option
+client will receive when it is invoked with the @command{--server} option
(@ref{Client}).
@item transport-type [tcp,ib-verbs,ib-sdp] (tcp)
@@ -1107,9 +1106,9 @@ translator allows a file to be spread across many server nodes. The following se
look at each of these translators in detail.
@menu
-* Unify::
-* Replicate::
-* Stripe::
+* Unify::
+* Replicate::
+* Stripe::
@end menu
@node Unify
@@ -1122,7 +1121,7 @@ type cluster/unify
The unify translator presents a `unified' view of all its sub-volumes. That is,
it makes the union of all its sub-volumes appear as a single volume. It is the
-unify translator that gives GlusterFS the ability to access an arbitrarily
+unify translator that gives GlusterFS the ability to access an arbitrarily
large amount of storage.
For unify to work correctly, certain invariants need to be maintained across
@@ -1145,7 +1144,7 @@ Looking at the second requirement, you might wonder how one can
accomplish storing redundant copies of a file, if no file can exist
multiple times. To answer, we must remember that these invariants are
from @emph{unify's perspective}. A translator such as replicate at a lower
-level in the translator tree than unify may subvert this picture.
+level in the translator tree than unify may subvert this picture.
The first invariant might seem quite tedious to ensure. We shall see
later that this is not so, since unify's @emph{self-heal} mechanism
@@ -1242,8 +1241,8 @@ are allowed. For example: @command{option alu.limits.min-free-disk 5GB}.
@item alu.read-usage.exit-threshold <%> (5)
@item alu.open-files-usage.entry-threshold <n> (1000)
@item alu.open-files-usage.exit-threshold <n> (100)
-@item alu.limits.min-free-disk <%>
-@item alu.limits.max-open-files <n>
+@item alu.limits.min-free-disk <%>
+@item alu.limits.max-open-files <n>
@end table
@end cartouche
@@ -1260,7 +1259,7 @@ files are mostly similar in size and I/O access pattern, this
scheduler is a good choice. RR scheduler checks for free disk space
on the server before scheduling, so you can know when to add
another server node. The default value of min-free-disk is 5% and is
-checked on file creation calls, with atleast 10 seconds (by default)
+checked on file creation calls, with atleast 10 seconds (by default)
elapsing between two checks.
Options:
@@ -1316,7 +1315,7 @@ than a specified amount (5% by default) then @acronym{NUFA} schedules files
among the other child volumes in a round-robin fashion.
@acronym{NUFA} is named after the similar strategy used for memory access,
-@acronym{NUMA}@footnote{Non-Uniform Memory Access:
+@acronym{NUMA}@footnote{Non-Uniform Memory Access:
@indicateurl{http://en.wikipedia.org/wiki/Non-Uniform_Memory_Access}}.
@cartouche
@@ -1326,7 +1325,7 @@ Minimum disk space that must be free (local or remote) for @acronym{NUFA} to sch
file to it.
@item nufa.refresh-interval <t> (10 seconds)
Time between two successive free disk space checks.
-@item nufa.local-volume-name <volume>
+@item nufa.local-volume-name <volume>
The name of the volume corresponding to the local system. This volume must be
one of the children of the unify volume. This option is mandatory.
@end table
@@ -1398,13 +1397,13 @@ end-volume
@end example
This sample configuration will replicate all directories and files on
-brick1, brick2 and brick3.
+brick1, brick2 and brick3.
All the read operations happen from the first alive child. If all the
three sub-volumes are up, reads will be done from brick1; if brick1 is
down read will be done from brick2. In case read() was being done on
brick1 and it goes down, replicate transparently falls back to
-brick2.
+brick2.
The next release of GlusterFS will add the following features:
@itemize
@@ -1497,7 +1496,7 @@ type cluster/stripe
The stripe translator distributes the contents of a file over its
sub-volumes. It does this by creating a file equal in size to the
total size of the file on each of its sub-volumes. It then writes only
-a part of the file to each sub-volume, leaving the rest of it empty.
+a part of the file to each sub-volume, leaving the rest of it empty.
These empty regions are called `holes' in Unix terminology. The holes
do not consume any disk space.
@@ -1505,14 +1504,14 @@ The diagram below makes this clear.
@center @image{stripe,44pc,,,.pdf}
-You can configure stripe so that only filenames matching a pattern
-are striped. You can also configure the size of the data to be stored
+You can configure stripe so that only filenames matching a pattern
+are striped. You can also configure the size of the data to be stored
on each sub-volume.
@cartouche
@table @code
@item block-size <pattern>:<size> (*:0 no striping)
-Distribute files matching @command{<pattern>} over the sub-volumes,
+Distribute files matching @command{<pattern>} over the sub-volumes,
storing at least @command{<size>} on each sub-volume. For example,
@example
@@ -1531,9 +1530,9 @@ different sizes for different file name patterns.
@section Performance Translators
@menu
-* Read Ahead::
-* Write Behind::
-* IO Threads::
+* Read Ahead::
+* Write Behind::
+* IO Threads::
* IO Cache::
* Booster::
@end menu
@@ -1548,9 +1547,9 @@ type performance/read-ahead
The read-ahead translator pre-fetches data in advance on every read.
This benefits applications that mostly process files in sequential order,
since the next block of data will already be available by the time the
-application is done with the current one.
+application is done with the current one.
-Additionally, the read-ahead translator also behaves as a read-aggregator.
+Additionally, the read-ahead translator also behaves as a read-aggregator.
Many small read operations are combined and issued as fewer, larger read
requests to the server.
@@ -1558,7 +1557,7 @@ Read-ahead deals in ``pages'' as the unit of data fetched. The page size
is configurable, as is the ``page count'', which is the number of pages
that are pre-fetched.
-Read-ahead is best used with InfiniBand (using the ib-verbs transport).
+Read-ahead is best used with InfiniBand (using the ib-verbs transport).
On FastEthernet and Gigabit Ethernet networks,
GlusterFS can achieve the link-maximum throughput even without
read-ahead, making it quite superflous.
@@ -1578,7 +1577,7 @@ The unit of data that is pre-fetched.
The number of pages that are pre-fetched.
@item force-atime-update [on|off|yes|no] (off|no)
Whether to force an access time (atime) update on the file on every read. Without
-this, the atime will be slightly imprecise, as it will reflect the time when
+this, the atime will be slightly imprecise, as it will reflect the time when
the read-ahead translator read the data, not when the application actually read it.
@end table
@end cartouche
@@ -1597,7 +1596,7 @@ write-behind translator, successive write requests can be pipelined.
This mode of write-behind operation is best used on the client side, to
enable decreased write latency for the application.
-The write-behind translator can also aggregate write requests. If the
+The write-behind translator can also aggregate write requests. If the
@command{aggregate-size} option is specified, then successive writes upto that
size are accumulated and written in a single operation. This mode of operation
is best used on the server side, as this will decrease the disk's head movement
@@ -1660,7 +1659,7 @@ It caches data upto @command{cache-size} bytes. The cache is maintained as
a prioritized least-recently-used (@acronym{LRU}) list, with priorities determined
by user-specified patterns to match filenames.
-When the IO cache translator detects a write operation, the
+When the IO cache translator detects a write operation, the
cache for that file is flushed.
The IO cache translator periodically verifies the consistency of
@@ -1716,11 +1715,11 @@ can start your application as:
The booster translator accepts no options.
@node Features Translators
-@section Features Translators
+@section Features Translators
@menu
-* POSIX Locks::
-* Fixed ID::
+* POSIX Locks::
+* Fixed ID::
@end menu
@node POSIX Locks
@@ -1784,8 +1783,8 @@ The @acronym{GID} to send to the server
@section Miscellaneous Translators
@menu
-* ROT-13::
-* Trace::
+* ROT-13::
+* Trace::
@end menu
@node ROT-13
@@ -1800,7 +1799,7 @@ contents using the @acronym{ROT-13} algorithm. @acronym{ROT-13} is a trivial
algorithm that rotates each alphabet by thirteen places. Thus, 'A' becomes 'N',
'B' becomes 'O', and 'Z' becomes 'M'.
-It goes without saying that you shouldn't use this translator if you need
+It goes without saying that you shouldn't use this translator if you need
@emph{real} encryption (a future release of GlusterFS will have real encryption
translators).
@@ -1817,7 +1816,7 @@ Whether to decrypt on read
@subsection Trace
@cindex trace (translator)
@example
-type debug/trace
+type debug/trace
@end example
The trace translator is intended for debugging purposes. When loaded, it
@@ -1828,23 +1827,23 @@ level of DEBUG (See @ref{Running GlusterFS}) for trace to work.
Sample trace output (lines have been wrapped for readability):
@cartouche
@example
-2007-10-30 00:08:58 D [trace.c:1579:trace_opendir] trace: callid: 68
-(*this=0x8059e40, loc=0x8091984 @{path=/iozone3_283, inode=0x8091f00@},
+2007-10-30 00:08:58 D [trace.c:1579:trace_opendir] trace: callid: 68
+(*this=0x8059e40, loc=0x8091984 @{path=/iozone3_283, inode=0x8091f00@},
fd=0x8091d50)
-2007-10-30 00:08:58 D [trace.c:630:trace_opendir_cbk] trace:
+2007-10-30 00:08:58 D [trace.c:630:trace_opendir_cbk] trace:
(*this=0x8059e40, op_ret=4, op_errno=1, fd=0x8091d50)
-2007-10-30 00:08:58 D [trace.c:1602:trace_readdir] trace: callid: 69
+2007-10-30 00:08:58 D [trace.c:1602:trace_readdir] trace: callid: 69
(*this=0x8059e40, size=4096, offset=0 fd=0x8091d50)
-2007-10-30 00:08:58 D [trace.c:215:trace_readdir_cbk] trace:
+2007-10-30 00:08:58 D [trace.c:215:trace_readdir_cbk] trace:
(*this=0x8059e40, op_ret=0, op_errno=0, count=4)
-2007-10-30 00:08:58 D [trace.c:1624:trace_closedir] trace: callid: 71
+2007-10-30 00:08:58 D [trace.c:1624:trace_closedir] trace: callid: 71
(*this=0x8059e40, *fd=0x8091d50)
-2007-10-30 00:08:58 D [trace.c:809:trace_closedir_cbk] trace:
+2007-10-30 00:08:58 D [trace.c:809:trace_closedir_cbk] trace:
(*this=0x8059e40, op_ret=0, op_errno=1)
@end example
@end cartouche
@@ -1872,7 +1871,7 @@ scheduling on a single storage volume. Alternatively users can choose
to have two separate volumes and hence two mount points, but the
applications may demand a single storage system to host both.
-This document explains how to mix file level scheduling with stripe.
+This document explains how to mix file level scheduling with stripe.
@subsection Configuration Brief
@@ -1905,17 +1904,17 @@ addresses / access control fields to match your environment.
type storage/posix
option directory /export/for-unify
end-volume
-
+
volume posix-stripe
type storage/posix
option directory /export/for-stripe
end-volume
-
+
volume posix-namespace
type storage/posix
option directory /export/for-namespace
end-volume
-
+
volume server
type protocol/server
option transport-type tcp
@@ -1964,7 +1963,7 @@ addresses / access control fields to match your environment.
option remote-host 192.168.1.4
option remote-subvolume posix-unify
end-volume
-
+
volume client-stripe-1
type protocol/client
option transport-type tcp
@@ -1992,13 +1991,13 @@ addresses / access control fields to match your environment.
option remote-host 192.168.1.4
option remote-subvolume posix-stripe
end-volume
-
+
volume unify
type cluster/unify
option scheduler rr
subvolumes cluster-unify-1 cluster-unify-2 cluster-unify-3 cluster-unify-4
end-volume
-
+
volume stripe
type cluster/stripe
option block-size *.img:2MB # All files ending with .img are striped with 2MB stripe block size.
@@ -2047,13 +2046,13 @@ concludes with the suggested procedure to report bugs in GlusterFS.
@subsection Server errors
@example
-glusterfsd: FATAL: could not open specfile:
+glusterfsd: FATAL: could not open specfile:
'/etc/glusterfs/glusterfsd.vol'
@end example
-The GlusterFS server expects the volume specification file to be
+The GlusterFS server expects the volume specification file to be
at @command{/etc/glusterfs/glusterfsd.vol}. The example
-specification file will be installed as
+specification file will be installed as
@command{/etc/glusterfs/glusterfsd.vol.sample}. You need to edit
it and rename it, or provide a different specification file using
the @command{--spec-file} command line option (See @ref{Server}).
@@ -2061,7 +2060,7 @@ the @command{--spec-file} command line option (See @ref{Server}).
@vskip 4ex
@example
-gf_log_init: failed to open logfile "/usr/var/log/glusterfs/glusterfsd.log"
+gf_log_init: failed to open logfile "/usr/var/log/glusterfs/glusterfsd.log"
(Permission denied)
@end example
@@ -2073,7 +2072,7 @@ file using the @command{--log-file} option (See @ref{Server}).
@subsection Client errors
@example
-fusermount: failed to access mountpoint /mnt:
+fusermount: failed to access mountpoint /mnt:
Transport endpoint is not connected
@end example
@@ -2103,19 +2102,19 @@ connections and firewall settings. To check if the server is reachable,
try:
@example
-telnet IP-ADDRESS 6996
+telnet IP-ADDRESS 24007
@end example
If the server is accessible, your `telnet' command should connect and
block. If not you will see an error message such as @command{telnet: Unable to
-connect to remote host: Connection refused}. 6996 is the default
+connect to remote host: Connection refused}. 24007 is the default
GlusterFS port. If you have changed it, then use the corresponding
port instead.
@vskip 4ex
@example
-gf_log_init: failed to open logfile "/usr/var/log/glusterfs/glusterfs.log"
+gf_log_init: failed to open logfile "/usr/var/log/glusterfs/glusterfs.log"
(Permission denied)
@end example
@@ -2135,11 +2134,11 @@ module, you need to apply the following patch.
For fuse-2.6.2:
-@indicateurl{http://ftp.zresearch.com/pub/gluster/glusterfs/fuse/fuse-2.6.2-rhel-build.patch}
+@indicateurl{http://ftp.gluster.com/pub/gluster/glusterfs/fuse/fuse-2.6.2-rhel-build.patch}
For fuse-2.6.3:
-@indicateurl{http://ftp.zresearch.com/pub/gluster/glusterfs/fuse/fuse-2.6.3-rhel-build.patch}
+@indicateurl{http://ftp.gluster.com/pub/gluster/glusterfs/fuse/fuse-2.6.3-rhel-build.patch}
@section AppArmour and GlusterFS
@cindex AppArmour
diff --git a/doc/user-guide/xlator.odg b/doc/legacy/xlator.odg
index 179a65f6e..179a65f6e 100644
--- a/doc/user-guide/xlator.odg
+++ b/doc/legacy/xlator.odg
Binary files differ
diff --git a/doc/user-guide/xlator.pdf b/doc/legacy/xlator.pdf
index a07e14d67..a07e14d67 100644
--- a/doc/user-guide/xlator.pdf
+++ b/doc/legacy/xlator.pdf
Binary files differ
diff --git a/doc/logging.txt b/doc/logging.txt
new file mode 100644
index 000000000..b4ee45996
--- /dev/null
+++ b/doc/logging.txt
@@ -0,0 +1,66 @@
+
+New logging framework in glusterfs is targeted for end users like
+customers, community members, testers etc. This aims to bring clear,
+understandable logs called user logs whereas the current logging are
+considered as developer logs. The new framework brings with following
+features
+
+* Each message is logged with proper well defined error code and each
+ error code has well known error message.
+* A logbook has defined error code and error messages. It helps to
+ keep track of possible causes and remedies
+* Log are sent to syslog. The syslog application can be configured to
+ pass them to centralized logging system
+* It brings
+ - Remove repeated log messages
+ - Send alerts to users on certain events
+ - Run a program on events
+ - Call home service on events
+
+
+Log book:
+=========
+A log book is a JSON formatted file error-codes.json located in top
+level of glusterfs source repository. At compile time, gen-headers.py
+generates libglusterfs/src/gf-error-codes.h using the log book and
+gf-error-codes.h.template file. libglusterfs/src/gf-error-codes.h
+consists of header definitions and helper functions to get message by
+code for given locale. Currently it has _gf_get_message() function
+returns message for locale 'en'.
+
+New entry to log book is added like
+
+{
+ "IO_ERROR": {"code": 2233,
+ "message": {"en": "I/O error occurred"}},
+ "SETUP_ERROR": {"code": 2240,
+ "message": {"en": "Setup error"}},
+}
+
+
+Logging:
+========
+The framework provides two functions
+
+void gf_openlog (const char *ident, int option, int facility);
+void gf_syslog (int error_code, int facility_priority, char *format, ...);
+
+Consumers need to call gf_openlog() prior to gf_syslog() like the way
+traditional syslog function calls. error_code is mandatory when using
+gf_syslog(). For example,
+
+gf_openlog (NULL, -1, -1);
+gf_syslog (GF_ERR_DEV, LOG_ERR, "error reading configuration file");
+
+The logs are sent in CEE format (http://cee.mitre.org/) to syslog.
+Its targeted to rsyslog syslog server.
+
+This log framework is enabled at compile time by default. This can be
+disabled by passing '--disable-syslog' to ./configure or '--without
+syslog' to rpmbuild
+
+Even though its enabled at compile time, its required to have
+/etc/glusterfs/logger.conf file to make it into effect before starting
+gluster services
+
+Currently all gluster logs are sent with error code GF_ERR_DEV.
diff --git a/doc/mount.glusterfs.8 b/doc/mount.glusterfs.8
index 7eda3f6a8..01b7f7554 100644
--- a/doc/mount.glusterfs.8
+++ b/doc/mount.glusterfs.8
@@ -1,86 +1,78 @@
-.\" Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+.\" Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
.\" This file is part of GlusterFS.
.\"
-.\" GlusterFS is free software; you can redistribute it and/or modify
-.\" it under the terms of the GNU General Public License as published
-.\" by the Free Software Foundation; either version 3 of the License,
-.\" or (at your option) any later version.
+.\" This file is licensed to you under your choice of the GNU Lesser
+.\" General Public License, version 3 or any later version (LGPLv3 or
+.\" later), or the GNU General Public License, version 2 (GPLv2), in all
+.\" cases as published by the Free Software Foundation.
.\"
-.\" GlusterFS is distributed in the hope that it will be useful, but
-.\" WITHOUT ANY WARRANTY; without even the implied warranty of
-.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-.\" General Public License for more details.
.\"
-.\" You should have received a copy of the GNU General Public License
-.\" long with this program. If not, see
-.\" <http://www.gnu.org/licenses/>.
.\"
-.\"
-.\"
-.TH GlusterFS 8 "Cluster Filesystem" "07 December 2008" "Z Research Inc."
+.TH GlusterFS 8 "Cluster Filesystem" "18 March 2010" "Gluster Inc."
.SH NAME
-GlusterFS \- Clustered Filesystem.
-.SH SYNOPSYS
-.B mount.glusterfs
-.I <volumeserver>:<volumeid/volumeport> -o <options> <mountpoint>
+mount.glusterfs - script to mount native GlusterFS volume
+.SH SYNOPSIS
+.B mount -t glusterfs
+.I [-o <options>] <volumeserver>:<volumeid> <mountpoint>
+.TP
+.B mount -t glusterfs
+.I [-o <options>] <path/to/volumefile> <mountpoint>
.PP
.SH DESCRIPTION
-This tool is part of \fBglusterfs\fR(8)
+This tool is part of \fBglusterfs\fR(8) package, which is used to mount using
+GlusterFS native binary.
+
+\fBmount.glusterfs\fR is meant to be used by the mount(8) command for mounting
+native GlusterFS client. This subcommand, however, can also be used as a
+standalone command with limited functionality.
+
.SH OPTIONS
.PP
.SS "Basic options"
.PP
-.TP
-
-\fBlog\-file=\fRvalue
+.TP
+\fBlog\-file=\fRLOG-FILE
File to use for logging [default:/var/log/glusterfs/glusterfs.log]
.TP
-\fBlog\-level=\fRvalue
-Logging severity. Valid options are TRACE, DEBUG, WARNING, ERROR, CRITICAL
-NORMAL and NONE [default: NORMAL]
+\fBlog\-level=\fRLOG-LEVEL
+Logging severity. Valid options are TRACE, DEBUG, WARNING, ERROR, CRITICAL
+INFO and NONE [default: INFO]
.TP
-\fBlog\-server=\fRvalue
-Server to use as the central log server.
-
+\fBro\fR
+Mount the filesystem read-only
+.PP
.SS "Advanced options"
.PP
.TP
-
-\fBvolfile\-id=\fRvalue
+\fBvolfile\-id=\fRKEY
Volume key or name of the volume file to be fetched from server
.TP
-\fBserver\-port=\fRvalue
-Port number of volfile server [default: 6996]
+\fBtransport=\fRTRANSPORT-TYPE
+Transport type to get volume file from server [default: tcp]
.TP
-\fBtransport=\fRvalue
-Transport type to get volume file from server [default: socket]
-.TP
-\fBvolume\-name=\fRvalue
-Volume name to be used for MOUNT-POINT [default: top most volume in
+\fBvolume\-name=\fRVOLUME-NAME
+Volume name to be used for MOUNT-POINT [default: top most volume in
VOLUME-FILE]
.TP
-\fBlog\-server\-port=\fRvalue
-Listening port number of log server
-.TP
-\fBdirect\-io\-mode=\fRvalue
+\fBdirect\-io\-mode=\fRdisable
Disable direct I/O mode in fuse kernel module
.TP
-
.PP
.SH FILES
-.TP
+.TP
.I /etc/fstab
-file system table
+A typical GlusterFS entry in /etc/fstab looks like below
+
+server1.gluster.com:mirror /mnt/mirror glusterfs log-file=/var/log/mirror.vol,ro,defaults 0 0
+
.TP
.I /etc/mtab
-table of mounted file systems
+An example entry of a GlusterFS mountpoint in /etc/mtab looks like below
-.SH SEE ALSO
-.BR glusterfs(8),
-.BR mount(8)
+mirror.vol /mnt/glusterfs fuse.glusterfs rw,allow_other,default_permissions,max_read=131072 0 0
-.SH AUTHORS
-.BR http://www.gluster.org/core-team.php <URL:http://www.gluster.org/core-team.php>
+.SH SEE ALSO
+\fBglusterfs\fR(8), \fBmount\fR(8), \fBgluster\fR(8)
.SH COPYRIGHT
-.BR Copyright(c)2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+Copyright(c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
diff --git a/doc/qa/qa-client.vol b/doc/qa/qa-client.vol
deleted file mode 100644
index 176dda589..000000000
--- a/doc/qa/qa-client.vol
+++ /dev/null
@@ -1,170 +0,0 @@
-# This spec file should be used for testing before any release
-#
-
-# 1st client
-volume client1
- type protocol/client
- option transport-type tcp # for TCP/IP transport
-# option transport-type ib-sdp # for Infiniband transport
-# option transport-type ib-verbs # for ib-verbs transport
-# option transport.ib-verbs.work-request-send-size 131072
-# option transport.ib-verbs.work-request-send-count 64
-# option transport.ib-verbs.work-request-recv-size 131072
-# option transport.ib-verbs.work-request-recv-count 64
- option remote-host 127.0.0.1
- option remote-subvolume ra1
-end-volume
-
-# 2nd client
-volume client2
- type protocol/client
- option transport-type tcp # for TCP/IP transport
-# option transport-type ib-sdp # for Infiniband transport
-# option transport-type ib-verbs # for ib-verbs transport
- option remote-host 127.0.0.1
- option remote-subvolume ra2
-end-volume
-
-# 3rd client
-volume client3
- type protocol/client
- option transport-type tcp # for TCP/IP transport
-# option transport-type ib-sdp # for Infiniband transport
-# option transport-type ib-verbs # for ib-verbs transport
- option remote-host 127.0.0.1
- option remote-subvolume ra3
-end-volume
-
-# 4th client
-volume client4
- type protocol/client
- option transport-type tcp # for TCP/IP transport
-# option transport-type ib-sdp # for Infiniband transport
-# option transport-type ib-verbs # for ib-verbs transport
- option remote-host 127.0.0.1
- option remote-subvolume ra4
-end-volume
-
-# 5th client
-volume client5
- type protocol/client
- option transport-type tcp # for TCP/IP transport
-# option transport-type ib-sdp # for Infiniband transport
-# option transport-type ib-verbs # for ib-verbs transport
- option remote-host 127.0.0.1
- option remote-subvolume ra5
-end-volume
-
-# 6th client
-volume client6
- type protocol/client
- option transport-type tcp # for TCP/IP transport
-# option transport-type ib-sdp # for Infiniband transport
-# option transport-type ib-verbs # for ib-verbs transport
- option remote-host 127.0.0.1
- option remote-subvolume ra6
-end-volume
-
-# 7th client
-volume client7
- type protocol/client
- option transport-type tcp # for TCP/IP transport
-# option transport-type ib-sdp # for Infiniband transport
-# option transport-type ib-verbs # for ib-verbs transport
- option remote-host 127.0.0.1
- option remote-subvolume ra7
-end-volume
-
-# 8th client
-volume client8
- type protocol/client
- option transport-type tcp # for TCP/IP transport
-# option transport-type ib-sdp # for Infiniband transport
-# option transport-type ib-verbs # for ib-verbs transport
- option remote-host 127.0.0.1
- option remote-subvolume ra8
-end-volume
-
-# 1st Stripe (client1 client2)
-volume stripe1
- type cluster/stripe
- subvolumes client1 client2
- option block-size 128KB # all striped in 128kB block
-end-volume
-
-# 2st Stripe (client3 client4)
-volume stripe2
- type cluster/stripe
- subvolumes client3 client4
- option block-size 128KB # all striped in 128kB block
-end-volume
-
-# 3st Stripe (client5 client6)
-volume stripe3
- type cluster/stripe
- subvolumes client5 client6
- option block-size 128KB # all striped in 128kB block
-end-volume
-
-# 4st Stripe (client7 client8)
-volume stripe4
- type cluster/stripe
- subvolumes client7 client8
- option block-size 128KB # all striped in 128kB block
-end-volume
-
-
-# 1st replicate
-volume replicate1
- type cluster/replicate
- subvolumes stripe1 stripe2
-end-volume
-
-# 2nd replicate
-volume replicate2
- type cluster/replicate
- subvolumes stripe3 stripe4
-end-volume
-
-volume ns
- type protocol/client
- option transport-type tcp
- option remote-host 127.0.0.1
- option remote-subvolume brick-ns
-end-volume
-
-# Unify
-volume unify0
- type cluster/unify
- subvolumes replicate1 replicate2
-# subvolumes stripe1 stripe3
- option namespace ns
- option scheduler rr # random # alu # nufa
- option rr.limits.min-free-disk 1GB
-# option alu.order x
-# option alu.x.entry-threshold
-# option alu.x.exit-threshold
-end-volume
-
-
-# ==== Performance Translators ====
-# The default options for performance translators should be the best for 90+% of the cases
-volume iot
- type performance/io-threads
- subvolumes unify0
-end-volume
-
-volume wb
- type performance/write-behind
- subvolumes iot
-end-volume
-
-volume ioc
- type performance/io-cache
- subvolumes wb
-end-volume
-
-volume ra
- type performance/read-ahead
- subvolumes ioc
-end-volume
diff --git a/doc/qa/qa-high-avail-client.vol b/doc/qa/qa-high-avail-client.vol
deleted file mode 100644
index 69cb8dd30..000000000
--- a/doc/qa/qa-high-avail-client.vol
+++ /dev/null
@@ -1,17 +0,0 @@
-volume client
- type protocol/client
- option transport-type tcp
- option remote-host localhost
- option transport.socket.remote-port 7001
- option remote-subvolume server1-iot
-end-volume
-
-volume ra
- type performance/read-ahead
- subvolumes client
-end-volume
-
-volume wb
- type performance/write-behind
- subvolumes ra
-end-volume
diff --git a/doc/qa/qa-high-avail-server.vol b/doc/qa/qa-high-avail-server.vol
deleted file mode 100644
index 3556b9dae..000000000
--- a/doc/qa/qa-high-avail-server.vol
+++ /dev/null
@@ -1,344 +0,0 @@
-
-# -- server 1 --
-volume server1-posix1
- type storage/posix
- option directory /tmp/ha-export1/
-end-volume
-
-volume server1-ns1
- type storage/posix
- option directory /tmp/ha-export-ns1/
-end-volume
-
-volume server1-client2
- type protocol/client
- option transport-type tcp
- option remote-host 127.0.0.1
- option transport.socket.remote-port 7002
- option remote-subvolume server2-posix2
-end-volume
-
-volume server1-ns2
- type protocol/client
- option transport-type tcp
- option remote-host 127.0.0.1
- option transport.socket.remote-port 7002
- option remote-subvolume server2-ns2
-end-volume
-
-volume server1-client3
- type protocol/client
- option transport-type tcp
- option remote-host 127.0.0.1
- option transport.socket.remote-port 7003
- option remote-subvolume server3-posix3
-end-volume
-
-volume server1-ns3
- type protocol/client
- option transport-type tcp
- option remote-host 127.0.0.1
- option transport.socket.remote-port 7003
- option remote-subvolume server3-ns3
-end-volume
-
-volume server1-io1
- type performance/io-threads
- option thread-count 8
- subvolumes server1-posix1
-end-volume
-
-
-volume server1-io2
- type performance/io-threads
- option thread-count 8
- subvolumes server1-client2
-end-volume
-
-volume server1-io3
- type performance/io-threads
- option thread-count 8
- subvolumes server1-client3
-end-volume
-
-volume server1-ns-io1
- type performance/io-threads
- option thread-count 8
- subvolumes server1-ns1
-end-volume
-
-volume server1-ns-io2
- type performance/io-threads
- option thread-count 8
- subvolumes server1-ns2
-end-volume
-
-volume server1-ns-io3
- type performance/io-threads
- option thread-count 8
- subvolumes server1-ns3
-end-volume
-
-volume server1-ns-replicate
- type cluster/replicate
- subvolumes server1-ns-io1 server1-ns-io2 server1-ns-io3
-end-volume
-
-volume server1-storage-replicate
- type cluster/replicate
- subvolumes server1-io1 server1-io2 server1-io3
-end-volume
-
-volume server1-unify
- type cluster/unify
- #option self-heal off
- subvolumes server1-storage-replicate
- option namespace server1-ns-replicate
- option scheduler rr
-end-volume
-
-volume server1-iot
- type performance/io-threads
- option thread-count 8
- subvolumes server1-unify
-end-volume
-
-volume server1
- type protocol/server
- option transport-type tcp
- subvolumes server1-iot
- option transport.socket.listen-port 7001
- option auth.addr.server1-posix1.allow *
- option auth.addr.server1-ns1.allow *
- option auth.addr.server1-iot.allow *
-end-volume
-
-
-# == Server2 ==
-volume server2-client1
- type protocol/client
- option transport-type tcp
- option remote-host 127.0.0.1
- option transport.socket.remote-port 7001
- option remote-subvolume server1-posix1
-end-volume
-
-volume server2-ns1
- type protocol/client
- option transport-type tcp
- option remote-host 127.0.0.1
- option transport.socket.remote-port 7001
- option remote-subvolume server1-ns1
-end-volume
-
-volume server2-posix2
- type storage/posix
- option directory /tmp/ha-export2/
-end-volume
-
-volume server2-ns2
- type storage/posix
- option directory /tmp/ha-export-ns2/
-end-volume
-
-volume server2-client3
- type protocol/client
- option transport-type tcp
- option remote-host 127.0.0.1
- option transport.socket.remote-port 7003
- option remote-subvolume server3-posix3
-end-volume
-
-volume server2-ns3
- type protocol/client
- option transport-type tcp
- option remote-host 127.0.0.1
- option transport.socket.remote-port 7003
- option remote-subvolume server3-ns3
-end-volume
-
-volume server2-io1
- type performance/io-threads
- option thread-count 8
- subvolumes server2-client1
-end-volume
-
-
-volume server2-io2
- type performance/io-threads
- option thread-count 8
- subvolumes server2-posix2
-end-volume
-
-volume server2-io3
- type performance/io-threads
- option thread-count 8
- subvolumes server2-client3
-end-volume
-
-volume server2-ns-io1
- type performance/io-threads
- option thread-count 8
- subvolumes server2-ns1
-end-volume
-
-volume server2-ns-io2
- type performance/io-threads
- option thread-count 8
- subvolumes server2-ns2
-end-volume
-
-volume server2-ns-io3
- type performance/io-threads
- option thread-count 8
- subvolumes server2-ns3
-end-volume
-
-volume server2-ns-replicate
- type cluster/replicate
- subvolumes server2-ns-io1 server2-ns-io2 server2-ns-io3
-end-volume
-
-volume server2-storage-replicate
- type cluster/replicate
- subvolumes server2-io2 server2-io3 server2-io1
-end-volume
-
-volume server2-unify
- type cluster/unify
- option self-heal off
- subvolumes server2-storage-replicate
- option namespace server2-ns-replicate
- option scheduler rr
-end-volume
-
-volume server2-iot
- type performance/io-threads
- option thread-count 8
- subvolumes server2-unify
-end-volume
-
-volume server2
- type protocol/server
- option transport-type tcp
- subvolumes server2-iot
- option transport.socket.listen-port 7002
- option auth.addr.server2-posix2.allow *
- option auth.addr.server2-ns2.allow *
- option auth.addr.server2-iot.allow *
-end-volume
-
-# == server 3 ==
-volume server3-client1
- type protocol/client
- option transport-type tcp
- option remote-host 127.0.0.1
- option transport.socket.remote-port 7001
- option remote-subvolume server1-posix1
-end-volume
-
-volume server3-ns1
- type protocol/client
- option transport-type tcp
- option remote-host 127.0.0.1
- option transport.socket.remote-port 7001
- option remote-subvolume server1-ns1
-end-volume
-
-volume server3-client2
- type protocol/client
- option transport-type tcp
- option remote-host 127.0.0.1
- option transport.socket.remote-port 7002
- option remote-subvolume server2-posix2
-end-volume
-
-volume server3-ns2
- type protocol/client
- option transport-type tcp
- option remote-host 127.0.0.1
- option transport.socket.remote-port 7002
- option remote-subvolume server2-ns2
-end-volume
-
-volume server3-posix3
- type storage/posix
- option directory /tmp/ha-export3/
-end-volume
-
-volume server3-ns3
- type storage/posix
- option directory /tmp/ha-export-ns3/
-end-volume
-
-volume server3-io1
- type performance/io-threads
- option thread-count 8
- subvolumes server3-client1
-end-volume
-
-
-volume server3-io2
- type performance/io-threads
- option thread-count 8
- subvolumes server3-client2
-end-volume
-
-volume server3-io3
- type performance/io-threads
- option thread-count 8
- subvolumes server3-posix3
-end-volume
-
-volume server3-ns-io1
- type performance/io-threads
- option thread-count 8
- subvolumes server3-ns1
-end-volume
-
-volume server3-ns-io2
- type performance/io-threads
- option thread-count 8
- subvolumes server3-ns2
-end-volume
-
-volume server3-ns-io3
- type performance/io-threads
- option thread-count 8
- subvolumes server3-ns3
-end-volume
-
-volume server3-ns-replicate
- type cluster/replicate
- subvolumes server3-ns-io1 server3-ns-io2 server3-ns-io3
-end-volume
-
-volume server3-storage-replicate
- type cluster/replicate
- subvolumes server3-io3 server3-io2 server3-io1
-end-volume
-
-volume server3-unify
- type cluster/unify
- option self-heal off
- subvolumes server3-storage-replicate
- option namespace server3-ns-replicate
- option scheduler rr
-end-volume
-
-volume server3-iot
- type performance/io-threads
- option thread-count 8
- subvolumes server3-unify
-end-volume
-
-volume server3
- type protocol/server
- option transport-type tcp
- subvolumes server3-iot
- option transport.socket.listen-port 7003
- option auth.addr.server3-posix3.allow *
- option auth.addr.server3-ns3.allow *
- option auth.addr.server3-iot.allow *
-end-volume
-
diff --git a/doc/qa/qa-server.vol b/doc/qa/qa-server.vol
deleted file mode 100644
index 1c245c324..000000000
--- a/doc/qa/qa-server.vol
+++ /dev/null
@@ -1,284 +0,0 @@
-# This spec file should be used for testing before any release
-#
-
-# Namespace posix
-volume brick-ns
- type storage/posix # POSIX FS translator
- option directory /tmp/export-ns # Export this directory
-end-volume
-
-# 1st server
-
-volume brick1
- type storage/posix # POSIX FS translator
- option directory /tmp/export1 # Export this directory
-end-volume
-
-# == Posix-Locks ==
- volume plocks1
- type features/posix-locks
-# option mandatory on
- subvolumes brick1
- end-volume
-
-volume iot1
- type performance/io-threads
- subvolumes plocks1 # change properly if above commented volumes needs to be included
-# option <key> <value>
-end-volume
-
-volume wb1
- type performance/write-behind
- subvolumes iot1
-# option <key> <value>
-end-volume
-
-volume ra1
- type performance/read-ahead
- subvolumes wb1
-# option <key> <value>
-end-volume
-
-volume brick2
- type storage/posix # POSIX FS translator
- option directory /tmp/export2 # Export this directory
-end-volume
-
-# == TrashCan Translator ==
-# volume trash2
-# type features/trash
-# option trash-dir /.trashcan
-# subvolumes brick2
-# end-volume
-
-# == Posix-Locks ==
-volume plocks2
- type features/posix-locks
-# option <something> <something>
- subvolumes brick2
-end-volume
-
-volume iot2
- type performance/io-threads
- subvolumes plocks2 # change properly if above commented volumes needs to be included
-# option <key> <value>
-end-volume
-
-volume wb2
- type performance/write-behind
- subvolumes iot2
-# option <key> <value>
-end-volume
-
-volume ra2
- type performance/read-ahead
- subvolumes wb2
-# option <key> <value>
-end-volume
-
-volume brick3
- type storage/posix # POSIX FS translator
- option directory /tmp/export3 # Export this directory
-end-volume
-
-# == TrashCan Translator ==
-# volume trash3
-# type features/trash
-# option trash-dir /.trashcan
-# subvolumes brick3
-# end-volume
-
-# == Posix-Locks ==
-volume plocks3
- type features/posix-locks
-# option <something> <something>
- subvolumes brick3
-end-volume
-
-volume iot3
- type performance/io-threads
- subvolumes plocks3 # change properly if above commented volumes needs to be included
-# option <key> <value>
-end-volume
-
-volume wb3
- type performance/write-behind
- subvolumes iot3
-# option <key> <value>
-end-volume
-
-volume ra3
- type performance/read-ahead
- subvolumes wb3
-# option <key> <value>
-end-volume
-
-volume brick4
- type storage/posix # POSIX FS translator
- option directory /tmp/export4 # Export this directory
-end-volume
-
-# == Posix-Locks ==
-volume plocks4
- type features/posix-locks
-# option <something> <something>
- subvolumes brick4
-end-volume
-
-volume iot4
- type performance/io-threads
- subvolumes plocks4 # change properly if above commented volumes needs to be included
-# option <key> <value>
-end-volume
-
-volume wb4
- type performance/write-behind
- subvolumes iot4
-# option <key> <value>
-end-volume
-
-volume ra4
- type performance/read-ahead
- subvolumes wb4
-# option <key> <value>
-end-volume
-
-volume brick5
- type storage/posix # POSIX FS translator
- option directory /tmp/export5 # Export this directory
-end-volume
-
-
-# == Posix-Locks ==
-volume plocks5
- type features/posix-locks
-# option <something> <something>
- subvolumes brick5
-end-volume
-
-volume iot5
- type performance/io-threads
- subvolumes plocks5 # change properly if above commented volumes needs to be included
-# option <key> <value>
-end-volume
-
-volume wb5
- type performance/write-behind
- subvolumes iot5
-# option <key> <value>
-end-volume
-
-volume ra5
- type performance/read-ahead
- subvolumes wb5
-# option <key> <value>
-end-volume
-
-volume brick6
- type storage/posix # POSIX FS translator
- option directory /tmp/export6 # Export this directory
-end-volume
-
-# == Posix-Locks ==
-volume plocks6
- type features/posix-locks
-# option <something> <something>
- subvolumes brick6
-end-volume
-
-volume iot6
- type performance/io-threads
- subvolumes plocks6 # change properly if above commented volumes needs to be included
-# option <key> <value>
-end-volume
-
-volume wb6
- type performance/write-behind
- subvolumes iot6
-# option <key> <value>
-end-volume
-
-volume ra6
- type performance/read-ahead
- subvolumes wb6
-# option <key> <value>
-end-volume
-
-volume brick7
- type storage/posix # POSIX FS translator
- option directory /tmp/export7 # Export this directory
-end-volume
-
-# == Posix-Locks ==
-volume plocks7
- type features/posix-locks
-# option <something> <something>
- subvolumes brick7
-end-volume
-
-volume iot7
- type performance/io-threads
- subvolumes plocks7 # change properly if above commented volumes needs to be included
-# option <key> <value>
-end-volume
-
-volume wb7
- type performance/write-behind
- subvolumes iot7
-# option <key> <value>
-end-volume
-
-volume ra7
- type performance/read-ahead
- subvolumes wb7
-# option <key> <value>
-end-volume
-
-volume brick8
- type storage/posix # POSIX FS translator
- option directory /tmp/export8 # Export this directory
-end-volume
-
-# == Posix-Locks ==
-volume plocks8
- type features/posix-locks
-# option <something> <something>
- subvolumes brick8
-end-volume
-
-volume iot8
- type performance/io-threads
- subvolumes plocks8 # change properly if above commented volumes needs to be included
-# option <key> <value>
-end-volume
-
-volume wb8
- type performance/write-behind
- subvolumes iot8
-# option <key> <value>
-end-volume
-
-volume ra8
- type performance/read-ahead
- subvolumes wb8
-# option <key> <value>
-end-volume
-
-volume server8
- type protocol/server
- subvolumes ra8 ra1 ra2 ra3 ra4 ra5 ra6 ra7 brick-ns
- option transport-type tcp # For TCP/IP transport
-# option transport-type ib-sdp # For Infiniband transport
-# option transport-type ib-verbs # For ib-verbs transport
- option client-volume-filename /examples/qa-client.vol
- option auth.addr.ra1.allow * # Allow access to "stat8" volume
- option auth.addr.ra2.allow * # Allow access to "stat8" volume
- option auth.addr.ra3.allow * # Allow access to "stat8" volume
- option auth.addr.ra4.allow * # Allow access to "stat8" volume
- option auth.addr.ra5.allow * # Allow access to "stat8" volume
- option auth.addr.ra6.allow * # Allow access to "stat8" volume
- option auth.addr.ra7.allow * # Allow access to "stat8" volume
- option auth.addr.ra8.allow * # Allow access to "stat8" volume
- option auth.addr.brick-ns.allow * # Allow access to "stat8" volume
-end-volume
-
diff --git a/doc/rpc-for-glusterfs.changes-done.txt b/doc/rpc-for-glusterfs.changes-done.txt
new file mode 100644
index 000000000..6bbbca788
--- /dev/null
+++ b/doc/rpc-for-glusterfs.changes-done.txt
@@ -0,0 +1,18 @@
+This document serves as a basic coding standard/practise for further
+developments after proper protocol layer is implemented.
+
+With this release we are bringing abstraction based on xlator driven
+operation and protocol driven operation. ie, all the client side (fuse)
+operations are xlator driven operations and will come with 'op' value
+taken from 'libglusterfs/'.
+
+All the server protocol driven operations are driven by which ever
+version of protocol is used.
+
+All the currently implemented fops will remain, and 'getspec' being generated
+by top level and passes through translator graph, is treated as an 'fop'.
+
+All new 'gluster' and 'glusterd' related calls will be _mgmt_ calls instead of
+fops. All release, releasedir and forget are treated as fops (but they won't
+come with requirement to use STACK_WIND and STACK_UNWIND).
+
diff --git a/doc/split-brain.md b/doc/split-brain.md
new file mode 100644
index 000000000..b0d938e26
--- /dev/null
+++ b/doc/split-brain.md
@@ -0,0 +1,251 @@
+Steps to recover from File split-brain.
+======================================
+
+Quick Start:
+============
+1. Get the path of the file that is in split-brain:
+> It can be obtained either by
+> a) The command `gluster volume heal info split-brain`.
+> b) Identify the files for which file operations performed
+ from the client keep failing with Input/Output error.
+
+2. Close the applications that opened this file from the mount point.
+In case of VMs, they need to be powered-off.
+
+3. Decide on the correct copy:
+> This is done by observing the afr changelog extended attributes of the file on
+the bricks using the getfattr command; then identifying the type of split-brain
+(data split-brain, metadata split-brain, entry split-brain or split-brain due to
+gfid-mismatch); and finally determining which of the bricks contains the 'good copy'
+of the file.
+> `getfattr -d -m . -e hex <file-path-on-brick>`.
+It is also possible that one brick might contain the correct data while the
+other might contain the correct metadata.
+
+4. Reset the relevant extended attribute on the brick(s) that contains the
+'bad copy' of the file data/metadata using the setfattr command.
+> `setfattr -n <attribute-name> -v <attribute-value> <file-path-on-brick>`
+
+5. Trigger self-heal on the file by performing lookup from the client:
+> `ls -l <file-path-on-gluster-mount>`
+
+Detailed Instructions for steps 3 through 5:
+===========================================
+To understand how to resolve split-brain we need to know how to interpret the
+afr changelog extended attributes.
+
+Execute `getfattr -d -m . -e hex <file-path-on-brick>`
+
+* Example:
+[root@store3 ~]# getfattr -d -e hex -m. brick-a/file.txt
+\#file: brick-a/file.txt
+security.selinux=0x726f6f743a6f626a6563745f723a66696c655f743a733000
+trusted.afr.vol-client-2=0x000000000000000000000000
+trusted.afr.vol-client-3=0x000000000200000000000000
+trusted.gfid=0x307a5c9efddd4e7c96e94fd4bcdcbd1b
+
+The extended attributes with `trusted.afr.<volname>-client-<subvolume-index>`
+are used by afr to maintain changelog of the file.The values of the
+`trusted.afr.<volname>-client-<subvolume-index>` are calculated by the glusterfs
+client (fuse or nfs-server) processes. When the glusterfs client modifies a file
+or directory, the client contacts each brick and updates the changelog extended
+attribute according to the response of the brick.
+
+'subvolume-index' is nothing but (brick number - 1) in
+`gluster volume info <volname>` output.
+
+* Example:
+[root@pranithk-laptop ~]# gluster volume info vol
+ Volume Name: vol
+ Type: Distributed-Replicate
+ Volume ID: 4f2d7849-fbd6-40a2-b346-d13420978a01
+ Status: Created
+ Number of Bricks: 4 x 2 = 8
+ Transport-type: tcp
+ Bricks:
+ brick-a: pranithk-laptop:/gfs/brick-a
+ brick-b: pranithk-laptop:/gfs/brick-b
+ brick-c: pranithk-laptop:/gfs/brick-c
+ brick-d: pranithk-laptop:/gfs/brick-d
+ brick-e: pranithk-laptop:/gfs/brick-e
+ brick-f: pranithk-laptop:/gfs/brick-f
+ brick-g: pranithk-laptop:/gfs/brick-g
+ brick-h: pranithk-laptop:/gfs/brick-h
+
+In the example above:
+```
+Brick | Replica set | Brick subvolume index
+----------------------------------------------------------------------------
+-/gfs/brick-a | 0 | 0
+-/gfs/brick-b | 0 | 1
+-/gfs/brick-c | 1 | 2
+-/gfs/brick-d | 1 | 3
+-/gfs/brick-e | 2 | 4
+-/gfs/brick-f | 2 | 5
+-/gfs/brick-g | 3 | 6
+-/gfs/brick-h | 3 | 7
+```
+
+Each file in a brick maintains the changelog of itself and that of the files
+present in all the other bricks in it's replica set as seen by that brick.
+
+In the example volume given above, all files in brick-a will have 2 entries,
+one for itself and the other for the file present in it's replica pair, i.e.brick-b:
+trusted.afr.vol-client-0=0x000000000000000000000000 -->changelog for itself (brick-a)
+trusted.afr.vol-client-1=0x000000000000000000000000 -->changelog for brick-b as seen by brick-a
+
+Likewise, all files in brick-b will have:
+trusted.afr.vol-client-0=0x000000000000000000000000 -->changelog for brick-a as seen by brick-b
+trusted.afr.vol-client-1=0x000000000000000000000000 -->changelog for itself (brick-b)
+
+The same can be extended for other replica pairs.
+
+Interpreting Changelog (roughly pending operation count) Value:
+Each extended attribute has a value which is 24 hexa decimal digits.
+First 8 digits represent changelog of data. Second 8 digits represent changelog
+of metadata. Last 8 digits represent Changelog of directory entries.
+
+Pictorially representing the same, we have:
+```
+0x 000003d7 00000001 00000000
+ | | |
+ | | \_ changelog of directory entries
+ | \_ changelog of metadata
+ \ _ changelog of data
+```
+
+
+For Directories metadata and entry changelogs are valid.
+For regular files data and metadata changelogs are valid.
+For special files like device files etc metadata changelog is valid.
+When a file split-brain happens it could be either data split-brain or
+meta-data split-brain or both. When a split-brain happens the changelog of the
+file would be something like this:
+
+* Example:(Lets consider both data, metadata split-brain on same file).
+[root@pranithk-laptop vol]# getfattr -d -m . -e hex /gfs/brick-?/a
+getfattr: Removing leading '/' from absolute path names
+\#file: gfs/brick-a/a
+trusted.afr.vol-client-0=0x000000000000000000000000
+trusted.afr.vol-client-1=0x000003d70000000100000000
+trusted.gfid=0x80acdbd886524f6fbefa21fc356fed57
+\#file: gfs/brick-b/a
+trusted.afr.vol-client-0=0x000003b00000000100000000
+trusted.afr.vol-client-1=0x000000000000000000000000
+trusted.gfid=0x80acdbd886524f6fbefa21fc356fed57
+
+###Observations:
+
+####According to changelog extended attributes on file /gfs/brick-a/a:
+The first 8 digits of trusted.afr.vol-client-0 are all
+zeros (0x00000000................), and the first 8 digits of
+trusted.afr.vol-client-1 are not all zeros (0x000003d7................).
+So the changelog on /gfs/brick-a/a implies that some data operations succeeded
+on itself but failed on /gfs/brick-b/a.
+
+The second 8 digits of trusted.afr.vol-client-0 are
+all zeros (0x........00000000........), and the second 8 digits of
+trusted.afr.vol-client-1 are not all zeros (0x........00000001........).
+So the changelog on /gfs/brick-a/a implies that some metadata operations succeeded
+on itself but failed on /gfs/brick-b/a.
+
+####According to Changelog extended attributes on file /gfs/brick-b/a:
+The first 8 digits of trusted.afr.vol-client-0 are not all
+zeros (0x000003b0................), and the first 8 digits of
+trusted.afr.vol-client-1 are all zeros (0x00000000................).
+So the changelog on /gfs/brick-b/a implies that some data operations succeeded
+on itself but failed on /gfs/brick-a/a.
+
+The second 8 digits of trusted.afr.vol-client-0 are not
+all zeros (0x........00000001........), and the second 8 digits of
+trusted.afr.vol-client-1 are all zeros (0x........00000000........).
+So the changelog on /gfs/brick-b/a implies that some metadata operations succeeded
+on itself but failed on /gfs/brick-a/a.
+
+Since both the copies have data, metadata changes that are not on the other
+file, it is in both data and metadata split-brain.
+
+Deciding on the correct copy:
+-----------------------------
+The user may have to inspect stat,getfattr output of the files to decide which
+metadata to retain and contents of the file to decide which data to retain.
+Continuing with the example above, lets say we want to retain the data
+of /gfs/brick-a/a and metadata of /gfs/brick-b/a.
+
+Resetting the relevant changelogs to resolve the split-brain:
+-------------------------------------------------------------
+For resolving data-split-brain:
+We need to change the changelog extended attributes on the files as if some data
+operations succeeded on /gfs/brick-a/a but failed on /gfs/brick-b/a. But
+/gfs/brick-b/a should NOT have any changelog which says some data operations
+succeeded on /gfs/brick-b/a but failed on /gfs/brick-a/a. We need to reset the
+data part of the changelog on trusted.afr.vol-client-0 of /gfs/brick-b/a.
+
+For resolving metadata-split-brain:
+We need to change the changelog extended attributes on the files as if some
+metadata operations succeeded on /gfs/brick-b/a but failed on /gfs/brick-a/a.
+But /gfs/brick-a/a should NOT have any changelog which says some metadata
+operations succeeded on /gfs/brick-a/a but failed on /gfs/brick-b/a.
+We need to reset metadata part of the changelog on
+trusted.afr.vol-client-1 of /gfs/brick-a/a
+
+So, the intended changes are:
+On /gfs/brick-b/a:
+For trusted.afr.vol-client-0
+0x000003b00000000100000000 to 0x000000000000000100000000
+(Note that the metadata part is still not all zeros)
+Hence execute
+`setfattr -n trusted.afr.vol-client-0 -v 0x000000000000000100000000 /gfs/brick-b/a`
+
+On /gfs/brick-a/a:
+For trusted.afr.vol-client-1
+0x0000000000000000ffffffff to 0x000003d70000000000000000
+(Note that the data part is still not all zeros)
+Hence execute
+`setfattr -n trusted.afr.vol-client-1 -v 0x000003d70000000000000000 /gfs/brick-a/a`
+
+Thus after the above operations are done, the changelogs look like this:
+[root@pranithk-laptop vol]# getfattr -d -m . -e hex /gfs/brick-?/a
+getfattr: Removing leading '/' from absolute path names
+\#file: gfs/brick-a/a
+trusted.afr.vol-client-0=0x000000000000000000000000
+trusted.afr.vol-client-1=0x000003d70000000000000000
+trusted.gfid=0x80acdbd886524f6fbefa21fc356fed57
+
+\#file: gfs/brick-b/a
+trusted.afr.vol-client-0=0x000000000000000100000000
+trusted.afr.vol-client-1=0x000000000000000000000000
+trusted.gfid=0x80acdbd886524f6fbefa21fc356fed57
+
+
+Triggering Self-heal:
+---------------------
+Perform `ls -l <file-path-on-gluster-mount>` to trigger healing.
+
+Fixing Directory entry split-brain:
+----------------------------------
+Afr has the ability to conservatively merge different entries in the directories
+when there is a split-brain on directory.
+If on one brick directory 'd' has entries '1', '2' and has entries '3', '4' on
+the other brick then afr will merge all of the entries in the directory to have
+'1', '2', '3', '4' entries in the same directory.
+(Note: this may result in deleted files to re-appear in case the split-brain
+happens because of deletion of files on the directory)
+Split-brain resolution needs human intervention when there is at least one entry
+which has same file name but different gfid in that directory.
+Example:
+On brick-a the directory has entries '1' (with gfid g1), '2' and on brick-b
+directory has entries '1' (with gfid g2) and '3'.
+These kinds of directory split-brains need human intervention to resolve.
+The user needs to remove either file '1' on brick-a or the file '1' on brick-b
+to resolve the split-brain. In addition, the corresponding gfid-link file also
+needs to be removed.The gfid-link files are present in the .glusterfs folder
+in the top-level directory of the brick. If the gfid of the file is
+0x307a5c9efddd4e7c96e94fd4bcdcbd1b (the trusted.gfid extended attribute got
+from the getfattr command earlier),the gfid-link file can be found at
+> /gfs/brick-a/.glusterfs/30/7a/307a5c9efddd4e7c96e94fd4bcdcbd1b
+
+####Word of caution:
+Before deleting the gfid-link, we have to ensure that there are no hard links
+to the file present on that brick. If hard-links exist,they must be deleted as
+well.
diff --git a/error-codes.json b/error-codes.json
new file mode 100644
index 000000000..5121049d3
--- /dev/null
+++ b/error-codes.json
@@ -0,0 +1,4 @@
+{
+ "ERR_DEV": {"code": 9999,
+ "message": {"en": "devel error"}}
+}
diff --git a/extras/FreeBSD/Makefile b/extras/FreeBSD/Makefile
index c662b9c65..26fe85c88 100644
--- a/extras/FreeBSD/Makefile
+++ b/extras/FreeBSD/Makefile
@@ -10,17 +10,17 @@ DISTVERSION= 2.0.0rc1
PORTVERSION= 2.0.0rc1
PORTREVISION= 1
CATEGORIES= sysutils
-MASTER_SITES= ftp://ftp.zresearch.com/pub/gluster/glusterfs/2.0/2.0.0/ \
+MASTER_SITES= ftp://ftp.gluster.com/pub/gluster/glusterfs/2.0/2.0.0/ \
http://europe.gluster.org/glusterfs/2.0/2.0.0/
-MAINTAINER= harsha@zresearch.com
+MAINTAINER= harsha@gluster.com
COMMENT= GlusterFS is a clustered file-system
BUILD_DEPENDS= fusefs-libs>2.6.3:${PORTSDIR}/sysutils/fusefs-libs
LIB_DEPENDS= fuse.2:${PORTSDIR}/sysutils/fusefs-libs
RUN_DEPENDS= ${LOCALBASE}/modules/fuse.ko:${PORTSDIR}/sysutils/fusefs-kmod
-MAN8= glusterfs.8
+MAN8= glusterfs.8 gluster.8 glusterd.8 glusterfs-volgen.8
GNU_CONFIGURE= yes
USE_LDCONFIG= yes
USE_AUTOTOOLS= libtool:15
diff --git a/extras/LinuxRPM/Makefile.am b/extras/LinuxRPM/Makefile.am
new file mode 100644
index 000000000..1dafa982b
--- /dev/null
+++ b/extras/LinuxRPM/Makefile.am
@@ -0,0 +1,57 @@
+
+GFS_TAR = ../../glusterfs-$(VERSION).tar.gz
+
+.PHONY: all
+
+all:
+ @echo "To build RPMS run 'make glusterrpms'"
+
+.PHONY: glusterrpms prep srcrpm testsrpm clean
+
+glusterrpms: prep srcrpm rpms
+ -rm -rf rpmbuild
+
+prep:
+ if [ ! -e $(GFS_TAR) ]; then \
+ $(MAKE) -C ../.. dist; \
+ fi
+ -mkdir -p rpmbuild/SPECS
+ -mkdir -p rpmbuild/RPMS
+ -mkdir -p rpmbuild/SRPMS
+ -rm -rf rpmbuild/SOURCES
+ @if [ -d /d/cache/glusterfs -a -e /d/cache/glusterfs/sources ]; then \
+ echo "copying glusterfs rpm files from local cache..." ; \
+ mkdir -p ./rpmbuild/SOURCES; \
+ cp /d/cache/glusterfs/* ./rpmbuild/SOURCES/ ; \
+ elif [ -x /usr/bin/git ]; then \
+ echo "fetching glusterfs rpm files from fedora git repo..."; \
+ cd ./rpmbuild && git clone git://pkgs.fedoraproject.org/glusterfs.git > /dev/null && mv glusterfs SOURCES; \
+ else \
+ echo "glusterfs rpm files not fetched, you don't have git installed!" ; \
+ exit 1 ; \
+ fi
+ cp ../../*.tar.gz ./rpmbuild/SOURCES
+ cp ../../glusterfs.spec ./rpmbuild/SPECS
+
+srcrpm:
+ rpmbuild --define '_topdir $(shell pwd)/rpmbuild' -bs rpmbuild/SPECS/glusterfs.spec
+ mv rpmbuild/SRPMS/* .
+
+rpms:
+ rpmbuild --define '_topdir $(shell pwd)/rpmbuild' -bb rpmbuild/SPECS/glusterfs.spec
+ mv rpmbuild/RPMS/*/* .
+
+# EPEL-5 does not like new versions of rpmbuild and requires some
+# _source_* defines
+
+testsrpm: prep
+ rpmbuild --define '_topdir $(shell pwd)/rpmbuild' \
+ --define '_source_payload w9.gzdio' \
+ --define '_source_filedigest_algorithm 1' \
+ -bs rpmbuild/SPECS/glusterfs.spec
+ mv rpmbuild/SRPMS/* ../..
+ -rm -rf rpmbuild
+
+clean:
+ -rm -rf rpmbuild
+ -rm -f *.rpm
diff --git a/extras/MacOSX/Portfile b/extras/MacOSX/Portfile
index 0094ee4c4..1262a44da 100644
--- a/extras/MacOSX/Portfile
+++ b/extras/MacOSX/Portfile
@@ -5,12 +5,12 @@ PortSystem 1.0
name glusterfs
version 2.0.0rc8
categories fuse
-maintainers amar@zresearch.com
+maintainers amar@gluster.com
description GlusterFS
long_description GlusterFS is a cluster file system, flexible to tune it for your needs.
homepage http://www.gluster.org/
platforms darwin
-master_sites http://ftp.zresearch.com/pub/gluster/glusterfs/2.0/2.0.0
+master_sites http://ftp.gluster.com/pub/gluster/glusterfs/2.0/2.0.0
configure.args --disable-bdb
checksums md5 33c2d02344d4fab422e80cfb637e0b48
@@ -18,7 +18,7 @@ checksums md5 33c2d02344d4fab422e80cfb637e0b48
post-destroot {
file mkdir ${destroot}/Library/LaunchDaemons/
file copy ${worksrcpath}/extras/glusterfs-server.plist \
- ${destroot}/Library/LaunchDaemons/com.zresearch.glusterfs.plist
+ ${destroot}/Library/LaunchDaemons/com.gluster.glusterfs.plist
file mkdir ${destroot}/sbin/
file copy ${worksrcpath}/xlators/mount/fuse/utils/mount_glusterfs \
diff --git a/extras/MacOSX/README.MacOSX b/extras/MacOSX/README.MacOSX
index cc38958d8..ec7abc2cc 100644
--- a/extras/MacOSX/README.MacOSX
+++ b/extras/MacOSX/README.MacOSX
@@ -24,7 +24,7 @@ system. Run,
bash# sudo /Library/Filesystems/fusefs.fs/Support/uninstall-macfuse-core.sh
After this, install MacFUSE (mostly through .dmg available in macfuse homepage
-or if Z Research Inc provides any custom built .dmg)
+or if Gluster Inc provides any custom built .dmg)
Make sure the .dmg of glusterfs is built against the installed MacFUSE version
(if not, any operations over mountpoint gives EIO ie, Input/Output Error). If
@@ -40,7 +40,7 @@ after the entry is added in /etc/fstab, it can be mounted by 'mount' command.
To start the server process one can use the 'launchd' mechanism. Follow below
steps after installation
- bash# launchctl load /Library/LaunchDaemons/com.zresearch.glusterfs.plist
+ bash# launchctl load /Library/LaunchDaemons/com.gluster.glusterfs.plist
No need to run the command if the machine reboots, as it will be loaded
automatically by launchd process.
@@ -51,17 +51,17 @@ Now copy the server volume file in the proper path
NOTE: (If glusterfs is installed in different path other than '/opt/local'
update the volume file at the corresponding path, and also need to
- update the /Library/LaunchDaemons/com.zresearch.glusterfs.plist with
+ update the /Library/LaunchDaemons/com.gluster.glusterfs.plist with
the proper path)
Once the volume file is updated, administrator can start the server process by
running,
- bash# launchctl start com.zresearch.glusterfs
+ bash# launchctl start com.gluster.glusterfs
and stop like
- bash# launchctl stop com.zresearch.glusterfs
+ bash# launchctl stop com.gluster.glusterfs
NOTE: To start the process by default when the loaded, one need to add the
following lines to .plist file
diff --git a/extras/Makefile.am b/extras/Makefile.am
index 9be397300..cf619329b 100644
--- a/extras/Makefile.am
+++ b/extras/Makefile.am
@@ -1,11 +1,20 @@
-docdir = $(datadir)/doc/glusterfs/
-EditorModedir = $(docdir)/
+EditorModedir = $(docdir)
EditorMode_DATA = glusterfs-mode.el glusterfs.vim
-SUBDIRS = init.d benchmarking
+SUBDIRS = init.d systemd benchmarking hook-scripts $(OCF_SUBDIR) LinuxRPM geo-rep
-EXTRA_DIST = specgen.scm MacOSX/Portfile glusterfs-mode.el glusterfs.vim migrate-unify-to-distribute.sh backend-xattr-sanitize.sh
+confdir = $(sysconfdir)/glusterfs
+conf_DATA = glusterfs-logrotate gluster-rsyslog-7.2.conf gluster-rsyslog-5.8.conf \
+ logger.conf.example glusterfs-georep-logrotate
-CLEANFILES =
+voldir = $(sysconfdir)/glusterfs
+vol_DATA = glusterd.vol
+EXTRA_DIST = specgen.scm MacOSX/Portfile glusterfs-mode.el glusterfs.vim \
+ migrate-unify-to-distribute.sh backend-xattr-sanitize.sh \
+ backend-cleanup.sh disk_usage_sync.sh quota-remove-xattr.sh \
+ quota-metadata-cleanup.sh glusterfs-logrotate clear_xattrs.sh \
+ group-virt.example glusterd-sysconfig gluster-rsyslog-7.2.conf \
+ gluster-rsyslog-5.8.conf logger.conf.example glusterd.vol \
+ glusterfs-georep-logrotate
diff --git a/extras/Solaris/Prototype b/extras/Solaris/Prototype
index 369234210..cb2d99d8c 100644
--- a/extras/Solaris/Prototype
+++ b/extras/Solaris/Prototype
@@ -9,6 +9,9 @@ d none /usr/sfw/share 0755 root bin
d none /usr/sfw/share/man 0755 root bin
d none /usr/sfw/share/man/man8 0755 root bin
f none /usr/sfw/share/man/man8/glusterfs.8 0644 root root
+f none /usr/sfw/share/man/man8/gluster.8 0644 root root
+f none /usr/sfw/share/man/man8/glusterd.8 0644 root root
+f none /usr/sfw/share/man/man8/glusterfs-volgen.8 0644 root root
d none /usr/sfw/share/doc 0755 root bin
d none /usr/sfw/share/doc/glusterfs 0755 root root
d none /usr/sfw/share/doc/glusterfs/examples 0755 root root
diff --git a/extras/Solaris/README.solaris b/extras/Solaris/README.solaris
index 0cea45bdc..5a4e4336d 100644
--- a/extras/Solaris/README.solaris
+++ b/extras/Solaris/README.solaris
@@ -10,7 +10,7 @@ $ which glusterfs
$ glusterfs --version
glusterfs 2.0.0rc1 built on Jan 16 2009 03:36:59
Repository revision: glusterfs--mainline--3.0--patch-844
-Copyright (c) 2006, 2007, 2008 Z RESEARCH Inc. <http://www.zresearch.com>
+Copyright (c) 2006-2011 Gluster Inc. <http://www.gluster.com>
GlusterFS comes with ABSOLUTELY NO WARRANTY.
You may redistribute copies of GlusterFS under the terms of the GNU General Public License.
diff --git a/extras/Solaris/pkginfo b/extras/Solaris/pkginfo
index ffa1f2d9a..0da4907d8 100644
--- a/extras/Solaris/pkginfo
+++ b/extras/Solaris/pkginfo
@@ -4,9 +4,9 @@ VERSION="2.0.0rc1"
ARCH="i386"
CLASSES="none"
CATEGORY="system"
-VENDOR="ZResearch Inc"
+VENDOR="Gluster Inc"
PSTAMP="16thJan08"
-EMAIL="harsha@zresearch.com"
+EMAIL="harsha@gluster.com"
ISTATES="S s 1 2 3"
RSTATES="S s 1 2 3"
BASEDIR="/"
diff --git a/extras/Ubuntu/README.Ubuntu b/extras/Ubuntu/README.Ubuntu
new file mode 100644
index 000000000..0c5b7828d
--- /dev/null
+++ b/extras/Ubuntu/README.Ubuntu
@@ -0,0 +1,24 @@
+Bug 765014 - Mounting from localhost in fstab fails at boot on ubuntu
+(https://bugzilla.redhat.com/show_bug.cgi?id=765014)
+(https://bugs.launchpad.net/ubuntu/+source/glusterfs/+bug/876648)
+
+Ubuntu uses upstart instead of init to bootstrap the system and it has a unique
+way of handling fstab, using a program called mountall(8). As a result of using
+a debian initscript to start glusterd, glusterfs mounts in fstab are tried before
+the glusterd service is running. In the case where the client is also a server
+and the volume is mounted from localhost, the mount fails at boot time. To
+correct this we need to launch glusterd using upstart and block the glusterfs
+mounting event until glusterd is started.
+
+The glusterd.conf file contains the necessary configuration for upstart to
+manage the glusterd service. It should be placed in /etc/init/glusterd.conf
+on Ubuntu systems, and then the old initscript /etc/init.d/glusterd can be
+removed. An additional upstart job, mounting-glusterfs.conf, is also required
+to block mounting glusterfs volumes until the glusterd service is available.
+Both of these upstart jobs need to be placed in /etc/init to resolve the issue.
+
+Starting with Ubuntu 12.04, Precise Pangolin, these upstart jobs will be
+included with the glusterfs-server package in the Ubuntu repository.
+
+This affects all versions of glusterfs on the Ubuntu platform since at least
+10.04, Lucid Lynx.
diff --git a/extras/Ubuntu/glusterd.conf b/extras/Ubuntu/glusterd.conf
new file mode 100644
index 000000000..aa99502b0
--- /dev/null
+++ b/extras/Ubuntu/glusterd.conf
@@ -0,0 +1,10 @@
+author "Louis Zuckerman <me@louiszuckerman.com>"
+description "GlusterFS Management Daemon"
+
+start on runlevel [2345]
+stop on runlevel [016]
+
+expect fork
+
+exec /usr/sbin/glusterd -p /var/run/glusterd.pid
+
diff --git a/extras/Ubuntu/mounting-glusterfs.conf b/extras/Ubuntu/mounting-glusterfs.conf
new file mode 100644
index 000000000..3c59c0f63
--- /dev/null
+++ b/extras/Ubuntu/mounting-glusterfs.conf
@@ -0,0 +1,7 @@
+author "Louis Zuckerman <me@louiszuckerman.com>"
+description "Block the mounting event for glusterfs filesystems until glusterd is running"
+
+start on mounting TYPE=glusterfs
+task
+exec start wait-for-state WAIT_FOR=glusterd WAITER=mounting-glusterfs
+
diff --git a/extras/backend-cleanup.sh b/extras/backend-cleanup.sh
new file mode 100644
index 000000000..dc504860b
--- /dev/null
+++ b/extras/backend-cleanup.sh
@@ -0,0 +1,28 @@
+#!/bin/sh
+
+# This script can be used to cleanup the 'cluster/distribute' translator's
+# stale link files. One may choose to run this only when number of subvolumes
+# to distribute volume gets increased (or decreased)
+#
+# This script has to be run on the servers, which are exporting the data to
+# GlusterFS
+#
+# (c) 2010 Gluster Inc <http://www.gluster.com/>
+
+set -e
+
+# Change the below variable as per the setup.
+export_directory="/export/glusterfs"
+
+clean_dir()
+{
+ # Clean the 'link' files on backend
+ find "${export_directory}" -type f -perm +01000 -exec rm -v '{}' \;
+}
+
+main()
+{
+ clean_dir ;
+}
+
+main "$@"
diff --git a/extras/benchmarking/Makefile.am b/extras/benchmarking/Makefile.am
index 04cc06182..bfcc59277 100644
--- a/extras/benchmarking/Makefile.am
+++ b/extras/benchmarking/Makefile.am
@@ -1,7 +1,5 @@
-docdir = $(datadir)/doc/$(PACKAGE_NAME)/benchmarking
-
-benchmarkingdir = $(docdir)
+benchmarkingdir = $(docdir)/benchmarking
benchmarking_DATA = rdd.c glfs-bm.c README launch-script.sh local-script.sh
diff --git a/extras/benchmarking/glfs-bm.c b/extras/benchmarking/glfs-bm.c
index 66b32b3f0..dc717f33c 100644
--- a/extras/benchmarking/glfs-bm.c
+++ b/extras/benchmarking/glfs-bm.c
@@ -1,28 +1,18 @@
-/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#define _GNU_SOURCE
#define __USE_FILE_OFFSET64
#define _FILE_OFFSET_BITS 64
#include <stdio.h>
#include <argp.h>
-#include <libglusterfsclient.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
@@ -42,7 +32,6 @@ struct state {
char need_iface_xattr:1;
char need_mode_posix:1;
- char need_mode_libglusterfsclient:1;
char prefix[512];
long int count;
@@ -50,7 +39,6 @@ struct state {
size_t block_size;
char *specfile;
- void *libglusterfsclient_context;
long int io_size;
};
@@ -132,21 +120,6 @@ parse_opts (int key, char *arg,
return -1;
}
break;
- case 'm':
- if (strcasecmp (arg, "posix") == 0) {
- state->need_mode_posix = 1;
- state->need_mode_libglusterfsclient = 0;
- } else if (strcasecmp (arg, "libglusterfsclient") == 0) {
- state->need_mode_posix = 0;
- state->need_mode_libglusterfsclient = 1;
- } else if (strcasecmp (arg, "both") == 0) {
- state->need_mode_posix = 1;
- state->need_mode_libglusterfsclient = 1;
- } else {
- fprintf (stderr, "unknown mode: %s\n", arg);
- return -1;
- }
- break;
case 'b':
{
size_t block_size = atoi (arg);
@@ -342,170 +315,6 @@ do_mode_posix_iface_xattr (struct state *state)
return 0;
}
-
-int
-do_mode_libglusterfsclient_iface_fileio_write (struct state *state)
-{
- long int i;
- int ret = -1;
- char block[state->block_size];
-
- for (i=0; i<state->count; i++) {
- long fd = 0;
- char filename[512];
-
- sprintf (filename, "/%s.%06ld", state->prefix, i);
-
- fd = glusterfs_open (state->libglusterfsclient_context,
- filename, O_CREAT|O_WRONLY, 0);
-
- if (fd == 0) {
- fprintf (stderr, "open(%s) => %s\n", filename, strerror (errno));
- break;
- }
- ret = glusterfs_write (fd, block, state->block_size);
- if (ret == -1) {
- fprintf (stderr, "glusterfs_write(%s) => %s\n", filename, strerror (errno));
- glusterfs_close (fd);
- break;
- }
- glusterfs_close (fd);
- state->io_size += ret;
- }
-
- return i;
-}
-
-
-int
-do_mode_libglusterfsclient_iface_fileio_read (struct state *state)
-{
- long int i;
- int ret = -1;
- char block[state->block_size];
-
- for (i=0; i<state->count; i++) {
- long fd = 0;
- char filename[512];
-
- sprintf (filename, "/%s.%06ld", state->prefix, i);
-
- fd = glusterfs_open (state->libglusterfsclient_context,
- filename, O_RDONLY, 0);
-
- if (fd == 0) {
- fprintf (stderr, "glusterfs_open(%s) => %s\n", filename, strerror (errno));
- break;
- }
- ret = glusterfs_read (fd, block, state->block_size);
- if (ret == -1) {
- fprintf (stderr, "glusterfs_read(%s) => %s\n", filename, strerror (errno));
- glusterfs_close (fd);
- break;
- }
- glusterfs_close (fd);
- state->io_size += ret;
- }
-
- return i;
-}
-
-
-int
-do_mode_libglusterfsclient_iface_fileio (struct state *state)
-{
- if (state->need_op_write)
- MEASURE (do_mode_libglusterfsclient_iface_fileio_write, state);
-
- if (state->need_op_read)
- MEASURE (do_mode_libglusterfsclient_iface_fileio_read, state);
-
- return 0;
-}
-
-
-int
-do_mode_libglusterfsclient_iface_xattr_write (struct state *state)
-{
- long int i;
- int ret = -1;
- char block[state->block_size];
- char *dname = NULL, *dirc = NULL;
- char *bname = NULL, *basec = NULL;
-
- asprintf (&dirc, "/%s", state->prefix);
- asprintf (&basec, "/%s", state->prefix);
- dname = dirname (dirc);
- bname = basename (basec);
-
- for (i=0; i<state->count; i++) {
- char key[512];
-
- sprintf (key, "glusterfs.file.%s.%06ld", bname, i);
-
- ret = glusterfs_setxattr (state->libglusterfsclient_context,
- dname, key, block, state->block_size, 0);
-
- if (ret < 0) {
- fprintf (stderr, "glusterfs_setxattr (%s, %s, %p) => %s\n",
- dname, key, block, strerror (errno));
- break;
- }
- state->io_size += state->block_size;
- }
-
- return i;
-
-}
-
-
-int
-do_mode_libglusterfsclient_iface_xattr_read (struct state *state)
-{
- long int i;
- int ret = -1;
- char block[state->block_size];
- char *dname = NULL, *dirc = NULL;
- char *bname = NULL, *basec = NULL;
-
- dirc = strdup (state->prefix);
- basec = strdup (state->prefix);
- dname = dirname (dirc);
- bname = basename (basec);
-
- for (i=0; i<state->count; i++) {
- char key[512];
-
- sprintf (key, "glusterfs.file.%s.%06ld", bname, i);
-
- ret = glusterfs_getxattr (state->libglusterfsclient_context,
- dname, key, block, state->block_size);
-
- if (ret < 0) {
- fprintf (stderr, "glusterfs_getxattr (%s, %s, %p) => %s\n",
- dname, key, block, strerror (errno));
- break;
- }
- state->io_size += ret;
- }
-
- return i;
-}
-
-
-int
-do_mode_libglusterfsclient_iface_xattr (struct state *state)
-{
- if (state->need_op_write)
- MEASURE (do_mode_libglusterfsclient_iface_xattr_write, state);
-
- if (state->need_op_read)
- MEASURE (do_mode_libglusterfsclient_iface_xattr_read, state);
-
- return 0;
-}
-
-
int
do_mode_posix (struct state *state)
{
@@ -520,44 +329,8 @@ do_mode_posix (struct state *state)
int
-do_mode_libglusterfsclient (struct state *state)
-{
- glusterfs_init_ctx_t ctx = {
- .logfile = "/dev/stderr",
- .loglevel = "error",
- .lookup_timeout = 60,
- .stat_timeout = 60,
- };
-
- ctx.specfile = state->specfile;
- if (state->specfile) {
- state->libglusterfsclient_context = glusterfs_init (&ctx);
-
- if (!state->libglusterfsclient_context) {
- fprintf (stdout, "Unable to initialize glusterfs context, skipping libglusterfsclient mode\n");
- return -1;
- }
- } else {
- fprintf (stdout, "glusterfs volume specification file not provided, skipping libglusterfsclient mode\n");
- return -1;
- }
-
- if (state->need_iface_fileio)
- do_mode_libglusterfsclient_iface_fileio (state);
-
- if (state->need_iface_xattr)
- do_mode_libglusterfsclient_iface_xattr (state);
-
- return 0;
-}
-
-
-int
do_actions (struct state *state)
{
- if (state->need_mode_libglusterfsclient)
- do_mode_libglusterfsclient (state);
-
if (state->need_mode_posix)
do_mode_posix (state);
@@ -569,8 +342,6 @@ static struct argp_option options[] = {
"WRITE|READ|BOTH - defaults to BOTH"},
{"iface", 'i', "INTERFACE", 0,
"FILEIO|XATTR|BOTH - defaults to FILEIO"},
- {"mode", 'm', "MODE", 0,
- "POSIX|LIBGLUSTERFSCLIENT|BOTH - defaults to POSIX"},
{"block", 'b', "BLOCKSIZE", 0,
"<NUM> - defaults to 4096"},
{"specfile", 's', "SPECFILE", 0,
@@ -601,7 +372,6 @@ main (int argc, char *argv[])
state.need_iface_xattr = 0;
state.need_mode_posix = 1;
- state.need_mode_libglusterfsclient = 0;
state.block_size = 4096;
diff --git a/extras/benchmarking/rdd.c b/extras/benchmarking/rdd.c
index d30660b50..a667c6a1d 100644
--- a/extras/benchmarking/rdd.c
+++ b/extras/benchmarking/rdd.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <stdio.h>
#include <sys/stat.h>
#include <sys/types.h>
@@ -36,172 +26,278 @@
#define UNIX_PATH_MAX 108
#endif
+#define UNIT_KB 1024ULL
+#define UNIT_MB UNIT_KB*1024ULL
+#define UNIT_GB UNIT_MB*1024ULL
+#define UNIT_TB UNIT_GB*1024ULL
+#define UNIT_PB UNIT_TB*1024ULL
+
+#define UNIT_KB_STRING "KB"
+#define UNIT_MB_STRING "MB"
+#define UNIT_GB_STRING "GB"
+#define UNIT_TB_STRING "TB"
+#define UNIT_PB_STRING "PB"
+
struct rdd_file {
- char path[UNIX_PATH_MAX];
- struct stat st;
- int fd;
+ char path[UNIX_PATH_MAX];
+ struct stat st;
+ int fd;
};
-
+
struct rdd_config {
- long iters;
- long max_ops_per_seq;
- size_t max_bs;
- size_t min_bs;
- int thread_count;
- pthread_t *threads;
- pthread_barrier_t barrier;
- pthread_mutex_t lock;
- struct rdd_file in_file;
- struct rdd_file out_file;
+ long iters;
+ long max_ops_per_seq;
+ size_t max_bs;
+ size_t min_bs;
+ int thread_count;
+ pthread_t *threads;
+ pthread_barrier_t barrier;
+ pthread_mutex_t lock;
+ struct rdd_file in_file;
+ struct rdd_file out_file;
+ ssize_t file_size;
};
static struct rdd_config rdd_config;
enum rdd_keys {
- RDD_MIN_BS_KEY = 1,
- RDD_MAX_BS_KEY,
+ RDD_MIN_BS_KEY = 1,
+ RDD_MAX_BS_KEY,
};
-
+
static error_t
rdd_parse_opts (int key, char *arg,
- struct argp_state *_state)
+ struct argp_state *_state)
{
- switch (key) {
- case 'o':
- {
- int len = 0;
- len = strlen (arg);
- if (len > UNIX_PATH_MAX) {
- fprintf (stderr, "output file name too long (%s)\n",
+ switch (key) {
+ case 'o':
+ {
+ int len = 0;
+ len = strlen (arg);
+ if (len > UNIX_PATH_MAX) {
+ fprintf (stderr, "output file name too long (%s)\n",
arg);
- return -1;
- }
-
- strncpy (rdd_config.out_file.path, arg, len);
- }
- break;
-
- case 'i':
- {
- int len = 0;
- len = strlen (arg);
- if (len > UNIX_PATH_MAX) {
- fprintf (stderr, "input file name too long (%s)\n",
+ return -1;
+ }
+
+ strncpy (rdd_config.out_file.path, arg, len);
+ }
+ break;
+
+ case 'i':
+ {
+ int len = 0;
+ len = strlen (arg);
+ if (len > UNIX_PATH_MAX) {
+ fprintf (stderr, "input file name too long (%s)\n",
arg);
- return -1;
- }
-
- strncpy (rdd_config.in_file.path, arg, len);
- }
- break;
-
- case RDD_MIN_BS_KEY:
- {
- char *tmp = NULL;
- long bs = 0;
- bs = strtol (arg, &tmp, 10);
- if ((bs == LONG_MAX) || (bs == LONG_MIN) || (tmp && *tmp)) {
- fprintf (stderr, "invalid argument for minimum block"
+ return -1;
+ }
+
+ strncpy (rdd_config.in_file.path, arg, len);
+ rdd_config.in_file.path[len] = '\0';
+ }
+ break;
+
+ case 'f':
+ {
+ char *tmp = NULL;
+ unsigned long long fs = 0;
+ if (string2bytesize (arg, &fs) == -1) {
+ fprintf (stderr, "invalid argument for file size "
+ "(%s)\n", arg);
+ return -1;
+ }
+
+ rdd_config.file_size = fs;
+ }
+ break;
+
+ case RDD_MIN_BS_KEY:
+ {
+ char *tmp = NULL;
+ long bs = 0;
+ bs = strtol (arg, &tmp, 10);
+ if ((bs == LONG_MAX) || (bs == LONG_MIN) || (tmp && *tmp)) {
+ fprintf (stderr, "invalid argument for minimum block"
"size (%s)\n", arg);
- return -1;
- }
-
- rdd_config.min_bs = bs;
- }
- break;
-
- case RDD_MAX_BS_KEY:
- {
- char *tmp = NULL;
- long bs = 0;
- bs = strtol (arg, &tmp, 10);
- if ((bs == LONG_MAX) || (bs == LONG_MIN) || (tmp && *tmp)) {
- fprintf (stderr, "invalid argument for maximum block"
+ return -1;
+ }
+
+ rdd_config.min_bs = bs;
+ }
+ break;
+
+ case RDD_MAX_BS_KEY:
+ {
+ char *tmp = NULL;
+ long bs = 0;
+ bs = strtol (arg, &tmp, 10);
+ if ((bs == LONG_MAX) || (bs == LONG_MIN) || (tmp && *tmp)) {
+ fprintf (stderr, "invalid argument for maximum block"
"size (%s)\n", arg);
- return -1;
- }
-
- rdd_config.max_bs = bs;
- }
- break;
-
- case 'r':
- {
- char *tmp = NULL;
- long iters = 0;
- iters = strtol (arg, &tmp, 10);
- if ((iters == LONG_MAX) ||
- (iters == LONG_MIN) ||
+ return -1;
+ }
+
+ rdd_config.max_bs = bs;
+ }
+ break;
+
+ case 'r':
+ {
+ char *tmp = NULL;
+ long iters = 0;
+ iters = strtol (arg, &tmp, 10);
+ if ((iters == LONG_MAX) ||
+ (iters == LONG_MIN) ||
(tmp && *tmp)) {
- fprintf (stderr, "invalid argument for iterations"
+ fprintf (stderr, "invalid argument for iterations"
"(%s)\n", arg);
- return -1;
- }
-
- rdd_config.iters = iters;
- }
- break;
-
- case 'm':
- {
- char *tmp = NULL;
- long max_ops = 0;
- max_ops = strtol (arg, &tmp, 10);
- if ((max_ops == LONG_MAX) ||
- (max_ops == LONG_MIN) ||
+ return -1;
+ }
+
+ rdd_config.iters = iters;
+ }
+ break;
+
+ case 'm':
+ {
+ char *tmp = NULL;
+ long max_ops = 0;
+ max_ops = strtol (arg, &tmp, 10);
+ if ((max_ops == LONG_MAX) ||
+ (max_ops == LONG_MIN) ||
(tmp && *tmp)) {
- fprintf (stderr, "invalid argument for max-ops"
+ fprintf (stderr, "invalid argument for max-ops"
"(%s)\n", arg);
- return -1;
- }
-
- rdd_config.max_ops_per_seq = max_ops;
- }
- break;
-
- case 't':
- {
- char *tmp = NULL;
- long threads = 0;
- threads = strtol (arg, &tmp, 10);
- if ((threads == LONG_MAX) ||
- (threads == LONG_MIN) ||
+ return -1;
+ }
+
+ rdd_config.max_ops_per_seq = max_ops;
+ }
+ break;
+
+ case 't':
+ {
+ char *tmp = NULL;
+ long threads = 0;
+ threads = strtol (arg, &tmp, 10);
+ if ((threads == LONG_MAX) ||
+ (threads == LONG_MIN) ||
(tmp && *tmp)) {
- fprintf (stderr, "invalid argument for thread count"
+ fprintf (stderr, "invalid argument for thread count"
"(%s)\n", arg);
- return -1;
- }
+ return -1;
+ }
- rdd_config.thread_count = threads;
- }
- break;
+ rdd_config.thread_count = threads;
+ }
+ break;
- case ARGP_KEY_NO_ARGS:
- break;
- case ARGP_KEY_ARG:
- break;
- case ARGP_KEY_END:
- if (_state->argc == 1) {
- argp_usage (_state);
- }
+ case ARGP_KEY_NO_ARGS:
+ break;
+ case ARGP_KEY_ARG:
+ break;
+ case ARGP_KEY_END:
+ if (_state->argc == 1) {
+ argp_usage (_state);
+ }
- }
+ }
- return 0;
+ return 0;
+}
+
+int
+string2bytesize (const char *str, unsigned long long *n)
+{
+ unsigned long long value = 0ULL;
+ char *tail = NULL;
+ int old_errno = 0;
+ const char *s = NULL;
+
+ if (str == NULL || n == NULL)
+ {
+ errno = EINVAL;
+ return -1;
+ }
+
+ for (s = str; *s != '\0'; s++)
+ {
+ if (isspace (*s))
+ {
+ continue;
+ }
+ if (*s == '-')
+ {
+ return -1;
+ }
+ break;
+ }
+
+ old_errno = errno;
+ errno = 0;
+ value = strtoull (str, &tail, 10);
+
+ if (errno == ERANGE || errno == EINVAL)
+ {
+ return -1;
+ }
+
+ if (errno == 0)
+ {
+ errno = old_errno;
+ }
+
+ if (tail[0] != '\0')
+ {
+ if (strcasecmp (tail, UNIT_KB_STRING) == 0)
+ {
+ value *= UNIT_KB;
+ }
+ else if (strcasecmp (tail, UNIT_MB_STRING) == 0)
+ {
+ value *= UNIT_MB;
+ }
+ else if (strcasecmp (tail, UNIT_GB_STRING) == 0)
+ {
+ value *= UNIT_GB;
+ }
+ else if (strcasecmp (tail, UNIT_TB_STRING) == 0)
+ {
+ value *= UNIT_TB;
+ }
+ else if (strcasecmp (tail, UNIT_PB_STRING) == 0)
+ {
+ value *= UNIT_PB;
+ }
+
+ else
+ {
+ return -1;
+ }
+ }
+
+ *n = value;
+
+ return 0;
}
static struct argp_option rdd_options[] = {
- {"if", 'i', "INPUT_FILE", 0, "input-file"},
- {"of", 'o', "OUTPUT_FILE", 0, "output-file"},
- {"threads", 't', "COUNT", 0, "number of threads to spawn (defaults to 2)"},
- {"min-bs", RDD_MIN_BS_KEY, "MIN_BLOCK_SIZE", 0,
- "Minimum block size in bytes (defaults to 1024)"},
- {"max-bs", RDD_MAX_BS_KEY, "MAX_BLOCK_SIZE", 0,
- "Maximum block size in bytes (defaults to 4096)"},
- {"iters", 'r', "ITERS", 0,
- "Number of read-write sequences (defaults to 1000000)"},
- {"max-ops", 'm', "MAXOPS", 0,
- "maximum number of read-writes to be performed in a sequence (defaults to 1)"},
- {0, 0, 0, 0, 0}
+ {"if", 'i', "INPUT_FILE", 0, "input-file"},
+ {"of", 'o', "OUTPUT_FILE", 0, "output-file"},
+ {"threads", 't', "COUNT", 0, "number of threads to spawn (defaults to 2)"},
+ {"min-bs", RDD_MIN_BS_KEY, "MIN_BLOCK_SIZE", 0,
+ "Minimum block size in bytes (defaults to 1024)"},
+ {"max-bs", RDD_MAX_BS_KEY, "MAX_BLOCK_SIZE", 0,
+ "Maximum block size in bytes (defaults to 4096)"},
+ {"iters", 'r', "ITERS", 0,
+ "Number of read-write sequences (defaults to 1000000)"},
+ {"max-ops", 'm', "MAXOPS", 0,
+ "maximum number of read-writes to be performed in a sequence (defaults to 1)"},
+ {"file-size", 'f', "FILESIZE", 0,
+ "the size of the file which will be created and upon it I/O will be done"
+ " (defaults to 100MB"},
+ {0, 0, 0, 0, 0}
};
static struct argp argp = {
@@ -216,277 +312,341 @@ static struct argp argp = {
static void
rdd_default_config (void)
{
- rdd_config.thread_count = 2;
- rdd_config.iters = 1000000;
- rdd_config.max_bs = 4096;
- rdd_config.min_bs = 1024;
- rdd_config.in_file.fd = rdd_config.out_file.fd = -1;
- rdd_config.max_ops_per_seq = 1;
-
- return;
+ char *tmp_path = "rdd.in";
+
+ rdd_config.thread_count = 2;
+ rdd_config.iters = 1000000;
+ rdd_config.max_bs = 4096;
+ rdd_config.min_bs = 1024;
+ rdd_config.in_file.fd = rdd_config.out_file.fd = -1;
+ rdd_config.max_ops_per_seq = 1;
+ strncpy (rdd_config.in_file.path, tmp_path, strlen (tmp_path));
+ rdd_config.file_size = 104857600;
+
+ return;
}
static char
rdd_valid_config (void)
{
- char ret = 1;
- int fd = -1;
-
- fd = open (rdd_config.in_file.path, O_RDONLY);
- if (fd == -1) {
- ret = 0;
- goto out;
- }
- close (fd);
-
- if (rdd_config.min_bs > rdd_config.max_bs) {
- ret = 0;
- goto out;
- }
-
- if (strlen (rdd_config.out_file.path) == 0) {
- sprintf (rdd_config.out_file.path, "%s.rddout",
+ char ret = 1;
+ int fd = -1;
+
+ fd = open (rdd_config.in_file.path, O_RDONLY);
+ if (fd == -1 && (errno != ENOENT)) {
+ fprintf (stderr, "open: (%s)", strerror (errno));
+ ret = 0;
+ goto out;
+ }
+ close (fd);
+
+ if (rdd_config.min_bs > rdd_config.max_bs) {
+ fprintf (stderr, "minimum blocksize %ld is greater than the "
+ "maximum blocksize %ld", rdd_config.min_bs,
+ rdd_config.max_bs);
+ ret = 0;
+ goto out;
+ }
+
+ if (strlen (rdd_config.out_file.path) == 0) {
+ sprintf (rdd_config.out_file.path, "%s.rddout",
rdd_config.in_file.path);
- }
+ }
out:
- return ret;
+ return ret;
}
static void *
rdd_read_write (void *arg)
{
- int i = 0, ret = 0;
- size_t bs = 0;
- off_t offset = 0;
- long rand = 0;
- long max_ops = 0;
- char *buf = NULL;
-
- buf = calloc (1, rdd_config.max_bs);
- if (!buf) {
- fprintf (stderr, "calloc failed (%s)\n", strerror (errno));
- ret = -1;
- goto out;
- }
-
- for (i = 0; i < rdd_config.iters; i++)
- {
- pthread_mutex_lock (&rdd_config.lock);
- {
- int bytes = 0;
- rand = random ();
-
- if (rdd_config.min_bs == rdd_config.max_bs) {
- bs = rdd_config.max_bs;
- } else {
- bs = rdd_config.min_bs +
- (rand %
- (rdd_config.max_bs -
+ int i = 0, ret = 0;
+ size_t bs = 0;
+ off_t offset = 0;
+ long rand = 0;
+ long max_ops = 0;
+ char *buf = NULL;
+
+ buf = calloc (1, rdd_config.max_bs);
+ if (!buf) {
+ fprintf (stderr, "calloc failed (%s)\n", strerror (errno));
+ ret = -1;
+ goto out;
+ }
+
+ for (i = 0; i < rdd_config.iters; i++)
+ {
+ pthread_mutex_lock (&rdd_config.lock);
+ {
+ int bytes = 0;
+ rand = random ();
+
+ if (rdd_config.min_bs == rdd_config.max_bs) {
+ bs = rdd_config.max_bs;
+ } else {
+ bs = rdd_config.min_bs +
+ (rand %
+ (rdd_config.max_bs -
rdd_config.min_bs));
- }
-
- offset = rand % rdd_config.in_file.st.st_size;
- max_ops = rand % rdd_config.max_ops_per_seq;
- if (!max_ops) {
- max_ops ++;
- }
-
- ret = lseek (rdd_config.in_file.fd, offset, SEEK_SET);
- if (ret != offset) {
- fprintf (stderr, "lseek failed (%s)\n",
+ }
+
+ offset = rand % rdd_config.in_file.st.st_size;
+ max_ops = rand % rdd_config.max_ops_per_seq;
+ if (!max_ops) {
+ max_ops ++;
+ }
+
+ ret = lseek (rdd_config.in_file.fd, offset, SEEK_SET);
+ if (ret != offset) {
+ fprintf (stderr, "lseek failed (%s)\n",
strerror (errno));
- ret = -1;
- goto unlock;
- }
+ ret = -1;
+ goto unlock;
+ }
- ret = lseek (rdd_config.out_file.fd, offset, SEEK_SET);
- if (ret != offset) {
- fprintf (stderr, "lseek failed (%s)\n",
+ ret = lseek (rdd_config.out_file.fd, offset, SEEK_SET);
+ if (ret != offset) {
+ fprintf (stderr, "lseek failed (%s)\n",
strerror (errno));
- ret = -1;
- goto unlock;
- }
-
- while (max_ops--)
- {
- bytes = read (rdd_config.in_file.fd, buf, bs);
- if (!bytes) {
- break;
- }
-
- if (bytes == -1) {
- fprintf (stderr, "read failed (%s)\n",
+ ret = -1;
+ goto unlock;
+ }
+
+ while (max_ops--)
+ {
+ bytes = read (rdd_config.in_file.fd, buf, bs);
+ if (!bytes) {
+ break;
+ }
+
+ if (bytes == -1) {
+ fprintf (stderr, "read failed (%s)\n",
strerror (errno));
- ret = -1;
- goto unlock;
- }
+ ret = -1;
+ goto unlock;
+ }
- if (write (rdd_config.out_file.fd, buf, bytes)
+ if (write (rdd_config.out_file.fd, buf, bytes)
!= bytes) {
- fprintf (stderr, "write failed (%s)\n",
+ fprintf (stderr, "write failed (%s)\n",
strerror (errno));
- ret = -1;
- goto unlock;
- }
- }
- }
- unlock:
- pthread_mutex_unlock (&rdd_config.lock);
- if (ret == -1) {
- goto out;
- }
- ret = 0;
- }
+ ret = -1;
+ goto unlock;
+ }
+ }
+ }
+ unlock:
+ pthread_mutex_unlock (&rdd_config.lock);
+ if (ret == -1) {
+ goto out;
+ }
+ ret = 0;
+ }
out:
- free (buf);
- pthread_barrier_wait (&rdd_config.barrier);
+ free (buf);
+ pthread_barrier_wait (&rdd_config.barrier);
- return NULL;
+ return NULL;
}
+static void
+cleanup (void)
+{
+ close (rdd_config.in_file.fd);
+ close (rdd_config.out_file.fd);
+ rdd_config.in_file.fd = rdd_config.out_file.fd = -1;
+}
+
+static int
+check_and_create (void)
+{
+ int ret = -1;
+ char buf[4096] = {0,};
+ struct stat stbuf = {0,};
+ int fd[2] = {-1,};
+ size_t total_size = -1;
+
+ total_size = rdd_config.file_size;
+
+ ret = stat (rdd_config.in_file.path, &stbuf);
+ if (ret == -1 && (errno != ENOENT))
+ goto out;
+
+ fd[1] = open (rdd_config.in_file.path, O_CREAT | O_WRONLY | O_TRUNC);
+ if (fd[1] == -1)
+ goto out;
+
+ fd[0] = open ("/dev/urandom", O_RDONLY);
+ if (fd[0] == -1)
+ goto out;
+
+ while (total_size > 0) {
+ if (total_size >= 4096) {
+ ret = read (fd[0], buf, 4096);
+ if (ret == -1)
+ goto out;
+ ret = write (fd[1], buf, 4096);
+ if (ret == -1)
+ goto out;
+ total_size = total_size - 4096;
+ } else {
+ ret = read (fd[0], buf, total_size);
+ if (ret == -1)
+ goto out;
+ ret = write (fd[1], buf, total_size);
+ if (ret == -1)
+ goto out;
+ total_size = total_size - total_size;
+ }
+
+ }
+
+ ret = 0;
+
+out:
+ if (fd[0] > 0)
+ close (fd[0]);
+ if (fd[1] > 0)
+ close (fd[1]);
+ return ret;
+}
static int
rdd_spawn_threads (void)
{
- int i = 0, ret = -1, fd = -1;
- char buf[4096];
+ int i = 0, ret = -1, fd = -1;
+ char buf[4096];
- fd = open (rdd_config.in_file.path, O_RDONLY);
- if (fd < 0) {
- fprintf (stderr, "cannot open %s (%s)\n",
+ ret = check_and_create ();
+ if (ret == -1)
+ goto out;
+
+ fd = open (rdd_config.in_file.path, O_RDONLY);
+ if (fd < 0) {
+ fprintf (stderr, "cannot open %s (%s)\n",
rdd_config.in_file.path, strerror (errno));
- ret = -1;
- goto out;
- }
- ret = fstat (fd, &rdd_config.in_file.st);
- if (ret != 0) {
- close (fd);
- fprintf (stderr, "cannot stat %s (%s)\n",
+ ret = -1;
+ goto out;
+ }
+ ret = fstat (fd, &rdd_config.in_file.st);
+ if (ret != 0) {
+ close (fd);
+ fprintf (stderr, "cannot stat %s (%s)\n",
rdd_config.in_file.path, strerror (errno));
- ret = -1;
- goto out;
- }
- rdd_config.in_file.fd = fd;
+ ret = -1;
+ goto out;
+ }
+ rdd_config.in_file.fd = fd;
- fd = open (rdd_config.out_file.path, O_WRONLY | O_CREAT,
+ fd = open (rdd_config.out_file.path, O_WRONLY | O_CREAT | O_TRUNC,
S_IRWXU | S_IROTH);
- if (fd < 0) {
- close (rdd_config.in_file.fd);
- rdd_config.in_file.fd = -1;
- fprintf (stderr, "cannot open %s (%s)\n",
+ if (fd < 0) {
+ close (rdd_config.in_file.fd);
+ rdd_config.in_file.fd = -1;
+ fprintf (stderr, "cannot open %s (%s)\n",
rdd_config.out_file.path, strerror (errno));
- ret = -1;
- goto out;
- }
- rdd_config.out_file.fd = fd;
-
- while ((ret = read (rdd_config.in_file.fd, buf, 4096)) > 0) {
- if (write (rdd_config.out_file.fd, buf, ret) != ret) {
- fprintf (stderr, "write failed (%s)\n",
+ ret = -1;
+ goto out;
+ }
+ rdd_config.out_file.fd = fd;
+
+ while ((ret = read (rdd_config.in_file.fd, buf, 4096)) > 0) {
+ if (write (rdd_config.out_file.fd, buf, ret) != ret) {
+ fprintf (stderr, "write failed (%s)\n",
strerror (errno));
- close (rdd_config.in_file.fd);
- close (rdd_config.out_file.fd);
- rdd_config.in_file.fd = rdd_config.out_file.fd = -1;
- ret = -1;
- goto out;
- }
- }
-
- rdd_config.threads = calloc (rdd_config.thread_count,
+ cleanup ();
+ ret = -1;
+ goto out;
+ }
+ }
+
+ rdd_config.threads = calloc (rdd_config.thread_count,
sizeof (pthread_t));
- if (rdd_config.threads == NULL) {
- fprintf (stderr, "calloc() failed (%s)\n", strerror (errno));
+ if (rdd_config.threads == NULL) {
+ fprintf (stderr, "calloc() failed (%s)\n", strerror (errno));
- ret = -1;
- close (rdd_config.in_file.fd);
- close (rdd_config.out_file.fd);
- rdd_config.in_file.fd = rdd_config.out_file.fd = -1;
- goto out;
- }
+ ret = -1;
+ cleanup ();
+ goto out;
+ }
- ret = pthread_barrier_init (&rdd_config.barrier, NULL,
+ ret = pthread_barrier_init (&rdd_config.barrier, NULL,
rdd_config.thread_count + 1);
- if (ret != 0) {
- fprintf (stderr, "pthread_barrier_init() failed (%s)\n",
+ if (ret != 0) {
+ fprintf (stderr, "pthread_barrier_init() failed (%s)\n",
strerror (ret));
- free (rdd_config.threads);
- close (rdd_config.in_file.fd);
- close (rdd_config.out_file.fd);
- rdd_config.in_file.fd = rdd_config.out_file.fd = -1;
- ret = -1;
- goto out;
- }
-
- ret = pthread_mutex_init (&rdd_config.lock, NULL);
- if (ret != 0) {
- fprintf (stderr, "pthread_mutex_init() failed (%s)\n",
+ free (rdd_config.threads);
+ cleanup ();
+ ret = -1;
+ goto out;
+ }
+
+ ret = pthread_mutex_init (&rdd_config.lock, NULL);
+ if (ret != 0) {
+ fprintf (stderr, "pthread_mutex_init() failed (%s)\n",
strerror (ret));
- free (rdd_config.threads);
- pthread_barrier_destroy (&rdd_config.barrier);
- close (rdd_config.in_file.fd);
- close (rdd_config.out_file.fd);
- rdd_config.in_file.fd = rdd_config.out_file.fd = -1;
- ret = -1;
- goto out;
- }
-
- for (i = 0; i < rdd_config.thread_count; i++)
- {
- ret = pthread_create (&rdd_config.threads[i], NULL,
+ free (rdd_config.threads);
+ pthread_barrier_destroy (&rdd_config.barrier);
+ cleanup ();
+ ret = -1;
+ goto out;
+ }
+
+ for (i = 0; i < rdd_config.thread_count; i++)
+ {
+ ret = pthread_create (&rdd_config.threads[i], NULL,
rdd_read_write, NULL);
- if (ret != 0) {
- fprintf (stderr, "pthread_create failed (%s)\n",
+ if (ret != 0) {
+ fprintf (stderr, "pthread_create failed (%s)\n",
strerror (errno));
- exit (1);
- }
- }
+ exit (1);
+ }
+ }
out:
- return ret;
+ return ret;
}
-
static void
rdd_wait_for_completion (void)
{
- pthread_barrier_wait (&rdd_config.barrier);
+ pthread_barrier_wait (&rdd_config.barrier);
}
-int
+int
main (int argc, char *argv[])
{
- int ret = -1;
+ int ret = -1;
- rdd_default_config ();
+ rdd_default_config ();
- ret = argp_parse (&argp, argc, argv, 0, 0, NULL);
- if (ret != 0) {
- ret = -1;
- fprintf (stderr, "%s: argp_parse() failed\n", argv[0]);
- goto err;
- }
+ ret = argp_parse (&argp, argc, argv, 0, 0, NULL);
+ if (ret != 0) {
+ ret = -1;
+ fprintf (stderr, "%s: argp_parse() failed\n", argv[0]);
+ goto err;
+ }
- if (!rdd_valid_config ()) {
- ret = -1;
- fprintf (stderr, "%s: configuration validation failed\n",
+ if (!rdd_valid_config ()) {
+ ret = -1;
+ fprintf (stderr, "%s: configuration validation failed\n",
argv[0]);
- goto err;
- }
+ goto err;
+ }
- ret = rdd_spawn_threads ();
- if (ret != 0) {
- fprintf (stderr, "%s: spawning threads failed\n", argv[0]);
- goto err;
- }
+ ret = rdd_spawn_threads ();
+ if (ret != 0) {
+ fprintf (stderr, "%s: spawning threads failed\n", argv[0]);
+ goto err;
+ }
- rdd_wait_for_completion ();
+ rdd_wait_for_completion ();
err:
- return ret;
-}
+ return ret;
+}
diff --git a/extras/clear_xattrs.sh b/extras/clear_xattrs.sh
new file mode 100755
index 000000000..dd04731e8
--- /dev/null
+++ b/extras/clear_xattrs.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+
+# Clear the trusted.gfid xattr in the brick tree
+
+# This script must be run only on a stopped brick/volume
+# Stop the volume to make sure no rebalance/replace-brick
+# operations are on-going
+
+# Not much error checking
+remove_xattrs ()
+{
+ find "$1" -exec setfattr -h -x "trusted.gfid" '{}' \; > /dev/null 2>&1;
+ find "$1" -exec setfattr -h -x "trusted.glusterfs.volume-id" '{}' \; > /dev/null 2>&1;
+}
+
+main ()
+{
+ if [ -z "$1" ]; then
+ echo "Usage: $0 <brick_path(s)>";
+ exit 1;
+ fi
+
+ export PATH;
+ which getfattr > /dev/null 2>&1;
+ if [ $? -ne 0 ]; then
+ echo "attr package missing";
+ exit 2;
+ fi
+
+ which setfattr > /dev/null 2>&1;
+ if [ $? -ne 0 ]; then
+ echo "attr package missing";
+ exit 2;
+ fi
+
+ for brick in "$@";
+ do
+ stat "$brick" > /dev/null 2>&1;
+ if [ $? -ne 0 ]; then
+ echo "brick: $brick does not exist";
+ exit 3;
+ fi
+ if [ ! -d "$brick" ]; then
+ echo "$brick: not a directory";
+ exit 4;
+ fi
+ echo "xattr clean-up in progress: $brick";
+ remove_xattrs "$brick";
+ echo "$brick ready to be used as a glusterfs brick";
+ done;
+}
+
+main "$@"; \ No newline at end of file
diff --git a/extras/contri-add.sh b/extras/contri-add.sh
new file mode 100755
index 000000000..7db5edd5d
--- /dev/null
+++ b/extras/contri-add.sh
@@ -0,0 +1,73 @@
+#!/bin/bash
+
+# This script adds contributions of files/directories in backend to volume
+# size.
+# It can also be used to debug by passing dir as first argument, in which case
+# it will just add contributions from immediate children of a directory and
+# displays only if added contributions from immediate children is different
+# from size stored in directory.
+# For Eg., find <backend-directory> -type d -exec ./contri-add.sh dir \{} \;
+# will list all the directories which have descrepancies in their
+# size/contributions.
+
+usage ()
+{
+ echo >&2 "usage: $0 <file|dir> <list-of-backend-directories>"
+}
+
+add_contributions ()
+{
+ local var=0
+ local count=0
+
+ SIZE=`getfattr -h -e hex -n trusted.glusterfs.quota.size $2 2>&1 | sed -e '/^#/d' | sed -e '/^getfattr/d' | sed -e '/^$/d' | cut -d'=' -f 2`
+ CONTRI=`getfattr -h -e hex -d -m trusted.glusterfs.quota.*.contri $2 2>&1 | sed -e '/^#/d' | sed -e '/^getfattr/d' | sed -e '/^$/d' | cut -d'=' -f 2`
+
+ if [ $1 == "file" ]; then
+ PATHS=`find $2 ! -type d | sed -e "\|^$2$|d" | sed -e '/^[ \t]*$/d'`
+ else
+ PATHS=`find $2 -maxdepth 1 | sed -e "\|^$2$|d" | sed -e '/^[ \t]*$/d'`
+ fi
+
+ if [ -z "$PATHS" ]; then
+ return 0
+ fi
+
+ CONTRIBUTIONS=`echo $PATHS | xargs getfattr -h -e hex -d -m trusted.glusterfs.quota.*.contri 2>&1 | sed -e '/^#/d' | sed -e '/^getfattr/d' | sed -e '/^$/d' | cut -d'=' -f 2 | sed -e 's/^[ \t]*\([^ \t]*\)/\1/g'`
+
+ if [ -n "$CONTRIBUTIONS" ]; then
+ for i in $CONTRIBUTIONS; do
+ count=$(($count + 1))
+ var=$(($var + $i))
+ done
+ fi
+
+ if [ $1 == "file" ] || [ $var -ne $(($SIZE)) ] || [ $(($SIZE)) -ne $(($CONTRI)) ]; then
+ if [ $1 == "dir" ]; then
+ TMP_PATH=`echo $2 | sed -e "s/\/home\/export\/[0-9]*/\/mnt\/raghu/g"`
+ stat $TMP_PATH > /dev/null
+ fi
+
+ echo "file count $count"
+ echo "added contribution of $2=$var"
+ echo "size stored in xattrs on $2=$(($SIZE))"
+ echo "contribution of $2 to its parent directory=$(($CONTRI))"
+ echo "=============================================================="
+ fi
+}
+
+
+main ()
+{
+ [ $# -lt 1 ] && usage
+
+ TYPE=$1
+
+ shift 1
+
+ for i in $@; do
+ add_contributions $TYPE $i
+ done
+}
+
+main $@ \ No newline at end of file
diff --git a/extras/disk_usage_sync.sh b/extras/disk_usage_sync.sh
new file mode 100755
index 000000000..85ee158f8
--- /dev/null
+++ b/extras/disk_usage_sync.sh
@@ -0,0 +1,86 @@
+#!/bin/sh
+
+# This script can be used to sync disk usage before activating quotas on GlusterFS.
+# There are two scenarios where quotas are used and this script has to be used accordingly -
+#
+# 1. Server side
+# The script needs to be run with backend export directory as the argument. This script updates
+# the current disk usage of the backend directory in a way that is intelligible to the GlusterFS quota
+# translator. Make sure you run this script before starting glusterfsd (GlusterFS server process):
+# * for the first time
+# * after any server outage (reboot, etc.)
+
+# 2. Client side
+# The script needs to be run with the client mount point as the argument. It updates the current disk
+# of the GlusterFS volume is a way that is intelligible to the GlusterFS quota translator. Make sure
+# you run this script after a fresh mount of the GlusterFS volume on the client:
+# * For the first time
+# * After any client outage (reboot, remount, etc.)
+
+# Please note that this script is dependent on the 'attr' package, more specifically 'setfattr' to set
+# extended attributes of files.
+# GlusterFS
+#
+# (c) 2010 Gluster Inc <http://www.gluster.com/>
+
+PROGRAM_NAME="disk_usage_sync.sh"
+
+#check if setfattr is available
+check_for_attr ()
+{
+ `command -v setfattr >/dev/null`
+ if [ "${?}" -gt 0 ]; then
+ echo >&2 "This script requires the 'attr' package to run. Either it has not been installed or is not present currently in the system path."
+ exit 1
+ fi
+
+}
+
+usage () {
+ echo >&2 "$PROGRAM_NAME - Command used to sync disk usage information before activating quotas on GlusterFS.
+
+usage: $PROGRAM_NAME <target-directory>"
+
+exit 1
+}
+
+TARGET_DIR=$1
+EXT_ATTR_NAME="trusted.glusterfs-quota-du"
+
+#output of du in number of bytes
+get_disk_usage ()
+{
+ if [ ! -d $TARGET_DIR ]; then
+ echo >&2 "Error: $TARGET_DIR does not exist."
+ exit 1
+ fi
+
+ DISK_USAGE=`du -bc $TARGET_DIR | grep 'total' | cut -f1`
+ if [ "${?}" -gt 0 ]; then
+ exit 1
+ fi
+
+}
+
+#set the extended attribute of the root directory with du size in bytes
+set_disk_usage ()
+{
+ ` setfattr -n $EXT_ATTR_NAME -v $DISK_USAGE $TARGET_DIR`
+ if [ "${?}" -gt 0 ]; then
+ exit 1
+ fi
+}
+
+main ()
+{
+ [ $# -lt 1 ] && usage
+
+ check_for_attr
+
+ get_disk_usage
+ set_disk_usage
+
+ printf "Disk Usage information has been sync'd successfully.\n"
+}
+
+main "$@"
diff --git a/extras/file_size_contri.sh b/extras/file_size_contri.sh
new file mode 100755
index 000000000..4f52a9a89
--- /dev/null
+++ b/extras/file_size_contri.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+# This script checks whether the contribution and disk-usage of a file is same.
+
+CONTRIBUTION_HEX=`getfattr -h -e hex -d -m trusted.glusterfs.quota.*.contri $1 2>&1 | sed -e '/^#/d' | sed -e '/^getfattr/d' | sed -e '/^$/d' | cut -d'=' -f 2`
+
+BLOCKS=`stat -c %b $1`
+SIZE=$(($BLOCKS * 512))
+
+CONTRIBUTION=`printf "%d" $CONTRIBUTION_HEX`
+
+if [ $CONTRIBUTION -ne $SIZE ]; then
+ printf "contribution of %s:%d\n" $1 $CONTRIBUTION
+ echo "size of $1: $SIZE"
+ echo "==================================================="
+fi
+
diff --git a/extras/generate-xdr-files.sh b/extras/generate-xdr-files.sh
new file mode 100755
index 000000000..bc02f77c9
--- /dev/null
+++ b/extras/generate-xdr-files.sh
@@ -0,0 +1,98 @@
+#!/bin/sh
+
+_init ()
+{
+ xfile="$1";
+ # TODO: check the validity of .x file
+
+ cfile="${1%.x}.c";
+ hfile="${1%.x}.h";
+
+ tmp_cfile="$1.c";
+
+ tmp1_hfile="$1.h.tmp";
+ tmp1_cfile="$1.c.tmp";
+
+}
+
+append_licence_header ()
+{
+ src_file=$1;
+ dst_file=$2;
+
+ cat >$dst_file <<EOF
+/*
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xdr-common.h"
+#include "compat.h"
+
+#if defined(__GNUC__)
+#if __GNUC__ >= 4
+#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
+#endif
+#endif
+
+EOF
+
+ cat $src_file >> $dst_file;
+
+}
+
+main ()
+{
+ if [ $# -ne 1 ]; then
+ echo "wrong number of arguments given"
+ echo " $0 <XDR-definition-file>.x"
+ exit 1;
+ fi
+
+
+ echo -n "writing the XDR routine file ($tmp_cfile) ... ";
+ rm -f $tmp_cfile;
+ rpcgen -c -o $tmp_cfile $xfile;
+
+ # get rid of warnings in xdr .c file due to "unused variable 'buf'"
+ sed -i -e 's:buf;$:buf;\
+ buf = NULL;:' $tmp_cfile;
+
+ sed -i '/int i;/d' $tmp_cfile;
+
+ echo "OK";
+
+ # no need for a temporary file here as there are no changes from glusterfs
+ echo -n "writing the XDR header file ($hfile) ... ";
+ rm -f $hfile;
+ rpcgen -h -o $hfile $xfile;
+
+ # the '#ifdef' part of file should be fixed
+ sed -i -e 's/-/_/g' $hfile;
+
+ echo "OK";
+
+ echo -n "writing licence header to the generated files ... ";
+ # Write header to temp file and append generated file
+ append_licence_header $hfile $tmp1_hfile;
+ append_licence_header $tmp_cfile $tmp1_cfile;
+ echo "OK"
+
+ # now move the destination file to actual original file
+ echo -n "updating existing files ... ";
+ mv $tmp1_hfile $hfile;
+ mv $tmp1_cfile $cfile;
+
+ # remove unwanted temporary files (if any)
+ rm -f $tmp_cfile $tmp1_cfile $tmp1_hfile
+
+ echo "OK"
+
+}
+
+_init "$@" && main "$@";
diff --git a/extras/geo-rep/Makefile.am b/extras/geo-rep/Makefile.am
new file mode 100644
index 000000000..fc5f56d54
--- /dev/null
+++ b/extras/geo-rep/Makefile.am
@@ -0,0 +1,2 @@
+EXTRA_DIST = gsync-sync-gfid.c gsync-upgrade.sh generate-gfid-file.sh \
+ get-gfid.sh slave-upgrade.sh
diff --git a/extras/geo-rep/generate-gfid-file.sh b/extras/geo-rep/generate-gfid-file.sh
new file mode 100644
index 000000000..c6739fbf1
--- /dev/null
+++ b/extras/geo-rep/generate-gfid-file.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+#Usage: generate-gfid-file.sh <master-volfile-server:master-volume> <path-to-get-gfid.sh> <output-file>
+
+function get_gfids()
+{
+ GET_GFID_CMD=$1
+ OUTPUT_FILE=$2
+ find . -exec $GET_GFID_CMD {} \; > $OUTPUT_FILE
+}
+
+function mount_client()
+{
+ local T; # temporary mount
+ local i; # inode number
+
+ VOLFILE_SERVER=$1;
+ VOLUME=$2;
+ GFID_CMD=$3;
+ OUTPUT=$4;
+
+ T=$(mktemp -d);
+
+ glusterfs -s $VOLFILE_SERVER --volfile-id $VOLUME $T;
+
+ i=$(stat -c '%i' $T);
+
+ [ "x$i" = "x1" ] || fatal "could not mount volume $MASTER on $T";
+
+ cd $T;
+
+ get_gfids $GFID_CMD $OUTPUT
+
+ cd -;
+
+ umount $T || fatal "could not umount $MASTER from $T";
+
+ rmdir $T || warn "rmdir of $T failed";
+}
+
+
+function main()
+{
+ SLAVE=$1
+ GET_GFID_CMD=$2
+ OUTPUT=$3
+
+ VOLFILE_SERVER=`echo $SLAVE | sed -e 's/\(.*\):.*/\1/'`
+ VOLUME_NAME=`echo $SLAVE | sed -e 's/.*:\(.*\)/\1/'`
+
+ mount_client $VOLFILE_SERVER $VOLUME_NAME $GET_GFID_CMD $OUTPUT
+}
+
+main "$@";
diff --git a/extras/geo-rep/get-gfid.sh b/extras/geo-rep/get-gfid.sh
new file mode 100755
index 000000000..a4d609b0b
--- /dev/null
+++ b/extras/geo-rep/get-gfid.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+ATTR_STR=`getfattr -h $1 -n glusterfs.gfid.string`
+GLFS_PATH=`echo $ATTR_STR | sed -e 's/# file: \(.*\) glusterfs.gfid.string*/\1/g'`
+GFID=`echo $ATTR_STR | sed -e 's/.*glusterfs.gfid.string="\(.*\)"/\1/g'`
+
+echo "$GFID $GLFS_PATH"
diff --git a/extras/geo-rep/gsync-sync-gfid.c b/extras/geo-rep/gsync-sync-gfid.c
new file mode 100644
index 000000000..601f4720e
--- /dev/null
+++ b/extras/geo-rep/gsync-sync-gfid.c
@@ -0,0 +1,106 @@
+
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <limits.h>
+#include <sys/types.h>
+#include <attr/xattr.h>
+#include <libgen.h>
+#include <ctype.h>
+#include <stdlib.h>
+
+#ifndef UUID_CANONICAL_FORM_LEN
+#define UUID_CANONICAL_FORM_LEN 36
+#endif
+
+#ifndef GF_FUSE_AUX_GFID_HEAL
+#define GF_FUSE_AUX_GFID_HEAL "glusterfs.gfid.heal"
+#endif
+
+#define GLFS_LINE_MAX (PATH_MAX + (2 * UUID_CANONICAL_FORM_LEN))
+
+int
+main (int argc, char *argv[])
+{
+ char *file = NULL;
+ char *tmp = NULL;
+ char *tmp1 = NULL;
+ char *parent_dir = NULL;
+ char *gfid = NULL;
+ char *bname = NULL;
+ int ret = -1;
+ int len = 0;
+ FILE *fp = NULL;
+ char line[GLFS_LINE_MAX] = {0,};
+ char *path = NULL;
+ void *blob = NULL;
+ void *tmp_blob = NULL;
+
+ if (argc != 2) {
+ /* each line in the file has the following format
+ * uuid-in-canonical-form path-relative-to-gluster-mount.
+ * Both uuid and relative path are from master mount.
+ */
+ fprintf (stderr, "usage: %s <file-of-paths-to-be-synced>\n",
+ argv[0]);
+ goto out;
+ }
+
+ file = argv[1];
+
+ fp = fopen (file, "r");
+ if (fp == NULL) {
+ fprintf (stderr, "cannot open %s for reading (%s)\n",
+ file, strerror (errno));
+ goto out;
+ }
+
+ while (fgets (line, GLFS_LINE_MAX, fp) != NULL) {
+ tmp = line;
+ path = gfid = line;
+
+ path += UUID_CANONICAL_FORM_LEN + 1;
+
+ while(isspace (*path))
+ path++;
+
+ if ((strlen (line) < GLFS_LINE_MAX) &&
+ (line[strlen (line) - 1] == '\n'))
+ line[strlen (line) - 1] = '\0';
+
+ line[UUID_CANONICAL_FORM_LEN] = '\0';
+
+ tmp = strdup (path);
+ tmp1 = strdup (path);
+ parent_dir = dirname (tmp);
+ bname = basename (tmp1);
+
+ /* gfid + '\0' + bname + '\0' */
+ len = UUID_CANONICAL_FORM_LEN + 1 + strlen (bname) + 1;
+
+ blob = calloc (1, len);
+
+ memcpy (blob, gfid, UUID_CANONICAL_FORM_LEN);
+
+ tmp_blob = blob + UUID_CANONICAL_FORM_LEN + 1;
+
+ memcpy (tmp_blob, bname, strlen (bname));
+
+ ret = setxattr (parent_dir, GF_FUSE_AUX_GFID_HEAL, blob, len,
+ 0);
+ if (ret < 0) {
+ fprintf (stderr, "setxattr on %s/%s failed (%s)\n",
+ parent_dir, bname, strerror (errno));
+ }
+ memset (line, 0, GLFS_LINE_MAX);
+
+ free (blob);
+ free (tmp); free (tmp1);
+ blob = NULL;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
diff --git a/extras/geo-rep/gsync-upgrade.sh b/extras/geo-rep/gsync-upgrade.sh
new file mode 100644
index 000000000..b17948736
--- /dev/null
+++ b/extras/geo-rep/gsync-upgrade.sh
@@ -0,0 +1,123 @@
+#!/bin/bash
+#usage: gsync-upgrade.sh <slave-volfile-server:slave-volume> <gfid-file>
+# <path-to-gsync-sync-gfid> <ssh-identity-file>
+#<slave-volfile-server>: a machine on which gluster cli can fetch slave volume info.
+# slave-volfile-server defaults to localhost.
+#
+#<gfid-file>: a file containing paths and their associated gfids
+# on master. The paths are relative to master mount point
+# (not absolute). An example extract of <gfid-file> can be,
+#
+# <extract>
+# 22114455-57c5-46e9-a783-c40f83a72b09 /dir
+# 25772386-3eb8-4550-a802-c3fdc938ca80 /dir/file
+# </extract>
+#
+#<ssh-identity-file>: file from which the identity (private key) for public key authentication is read.
+
+SLAVE_MOUNT='/tmp/glfs_slave'
+
+function SSH()
+{
+ HOST=$1
+ SSHKEY=$2
+
+ shift 2
+
+ ssh -qi $SSHKEY \
+ -oPasswordAuthentication=no \
+ -oStrictHostKeyChecking=no \
+ "$HOST" "$@";
+}
+
+function get_bricks()
+{
+ SSHKEY=$3
+
+ SSH $1 $SSHKEY "gluster volume info $2" | grep -E 'Brick[0-9]+' | sed -e 's/[^:]*:\(.*\)/\1/g'
+}
+
+function cleanup_brick()
+{
+ HOST=$1
+ BRICK=$2
+ SSHKEY=$3
+
+ # TODO: write a C program to receive a list of files and does cleanup on
+ # them instead of spawning a new setfattr process for each file if
+ # performance is bad.
+ SSH -i $SSHKEY $HOST "rm -rf $BRICK/.glusterfs/* && find $BRICK -exec setfattr -x trusted.gfid {} \;"
+}
+
+function cleanup_slave()
+{
+ SSHKEY=$2
+
+ VOLFILE_SERVER=`echo $1 | sed -e 's/\(.*\):.*/\1/'`
+ VOLUME_NAME=`echo $1 | sed -e 's/.*:\(.*\)/\1/'`
+
+ BRICKS=`get_bricks $VOLFILE_SERVER $VOLUME_NAME $SSHKEY`
+
+ for i in $BRICKS; do
+ HOST=`echo $i | sed -e 's/\(.*\):.*/\1/'`
+ BRICK=`echo $i | sed -e 's/.*:\(.*\)/\1/'`
+ cleanup_brick $HOST $BRICK $SSHKEY
+ done
+
+ SSH -i $SSHKEY $VOLFILE_SERVER "gluster --mode=script volume stop $VOLUME_NAME; gluster volume start $VOLUME_NAME";
+
+}
+
+function mount_client()
+{
+ local T; # temporary mount
+ local i; # inode number
+ GFID_FILE=$3
+ SYNC_CMD=$4
+
+ T=$(mktemp -d);
+
+ glusterfs --aux-gfid-mount -s $1 --volfile-id $2 $T;
+
+ i=$(stat -c '%i' $T);
+
+ [ "x$i" = "x1" ] || fatal "could not mount volume $MASTER on $T";
+
+ cd $T;
+
+ $SYNC_CMD $GFID_FILE
+
+ cd -;
+
+ umount -l $T || fatal "could not umount $MASTER from $T";
+
+ rmdir $T || warn "rmdir of $T failed";
+}
+
+function sync_gfids()
+{
+ SLAVE=$1
+ GFID_FILE=$2
+
+ SLAVE_VOLFILE_SERVER=`echo $SLAVE | sed -e 's/\(.*\):.*/\1/'`
+ SLAVE_VOLUME_NAME=`echo $SLAVE | sed -e 's/.*:\(.*\)/\1/'`
+
+ if [ "x$SLAVE_VOLFILE_SERVER" = "x" ]; then
+ SLAVE_VOLFILE_SERVER="localhost"
+ fi
+
+ mount_client $SLAVE_VOLFILE_SERVER $SLAVE_VOLUME_NAME $GFID_FILE $3
+}
+
+function upgrade()
+{
+ SLAVE=$1
+ GFID_FILE=$2
+ SYNC_CMD=$3
+ SSHKEY=$4
+
+ cleanup_slave $SLAVE $SSHKEY
+ sync_gfids $SLAVE $GFID_FILE $SYNC_CMD
+}
+
+upgrade "$@"
diff --git a/extras/geo-rep/slave-upgrade.sh b/extras/geo-rep/slave-upgrade.sh
new file mode 100644
index 000000000..6198f408a
--- /dev/null
+++ b/extras/geo-rep/slave-upgrade.sh
@@ -0,0 +1,102 @@
+#!/bin/bash
+#usage: slave-upgrade.sh <volfile-server:volname> <gfid-file>
+# <path-to-gsync-sync-gfid>
+#<slave-volfile-server>: a machine on which gluster cli can fetch slave volume info.
+# slave-volfile-server defaults to localhost.
+#
+#<gfid-file>: a file containing paths and their associated gfids
+# on master. The paths are relative to master mount point
+# (not absolute). An example extract of <gfid-file> can be,
+#
+# <extract>
+# 22114455-57c5-46e9-a783-c40f83a72b09 /dir
+# 25772386-3eb8-4550-a802-c3fdc938ca80 /dir/file
+# </extract>
+
+function get_bricks()
+{
+ gluster volume info $1 | grep -E 'Brick[0-9]+' | sed -e 's/[^:]*:\(.*\)/\1/g'
+}
+
+function cleanup_brick()
+{
+ HOST=$1
+ BRICK=$2
+
+ # TODO: write a C program to receive a list of files and does cleanup on
+ # them instead of spawning a new setfattr process for each file if
+ # performance is bad.
+ ssh $HOST "rm -rf $BRICK/.glusterfs/* && find $BRICK -exec setfattr -x trusted.gfid {} \; 2>/dev/null"
+}
+
+function cleanup_slave()
+{
+ VOLUME_NAME=`echo $1 | sed -e 's/.*:\(.*\)/\1/'`
+
+ BRICKS=`get_bricks $VOLUME_NAME`
+
+ for i in $BRICKS; do
+ HOST=`echo $i | sed -e 's/\(.*\):.*/\1/'`
+ BRICK=`echo $i | sed -e 's/.*:\(.*\)/\1/'`
+ cleanup_brick $HOST $BRICK
+ done
+
+ # Now restart the volume
+ gluster --mode=script volume stop $VOLUME_NAME;
+ gluster volume start $VOLUME_NAME;
+}
+
+function mount_client()
+{
+ local T; # temporary mount
+ local i; # inode number
+
+ VOLUME_NAME=$2;
+ GFID_FILE=$3
+ SYNC_CMD=$4
+
+ T=$(mktemp -d);
+
+ glusterfs --aux-gfid-mount -s $1 --volfile-id $VOLUME_NAME $T;
+
+ i=$(stat -c '%i' $T);
+
+ cd $T;
+
+ $SYNC_CMD $GFID_FILE
+
+ cd -;
+
+ umount $T || fatal "could not umount $MASTER from $T";
+
+ rmdir $T || warn "rmdir of $T failed";
+}
+
+function sync_gfids()
+{
+ SLAVE=$1
+ GFID_FILE=$2
+ SYNC_CMD=$3
+
+ SLAVE_VOLFILE_SERVER=`echo $SLAVE | sed -e 's/\(.*\):.*/\1/'`
+ SLAVE_VOLUME_NAME=`echo $SLAVE | sed -e 's/.*:\(.*\)/\1/'`
+
+ if [ "x$SLAVE_VOLFILE_SERVER" = "x" ]; then
+ SLAVE_VOLFILE_SERVER="localhost"
+ fi
+
+ mount_client $SLAVE_VOLFILE_SERVER $SLAVE_VOLUME_NAME $GFID_FILE $SYNC_CMD
+}
+
+function upgrade()
+{
+ SLAVE=$1
+ GFID_FILE=$2
+ SYNC_CMD=$3
+
+ cleanup_slave $SLAVE
+
+ sync_gfids $SLAVE $GFID_FILE $SYNC_CMD
+}
+
+upgrade "$@"
diff --git a/extras/gluster-rsyslog-5.8.conf b/extras/gluster-rsyslog-5.8.conf
new file mode 100644
index 000000000..2519999bc
--- /dev/null
+++ b/extras/gluster-rsyslog-5.8.conf
@@ -0,0 +1,51 @@
+##### gluster.conf #####
+
+#
+## If you want to log every message to the log file instead of
+## intelligently suppressing repeated messages, set off to
+## RepeatedMsgReduction. This change requires rsyslog restart
+## (eg. run 'service rsyslog restart')
+#
+#$RepeatedMsgReduction off
+$RepeatedMsgReduction on
+
+#
+## The mmcount module provides the capability to count log messages by
+## severity or json property of given app-name. The count value is added
+## into the log message as json property named '$msgid'
+#
+$ModLoad mmcount
+$mmcountKey gf_code # start counting value of gf_code
+
+$template Glusterfsd_dynLogFile,"/var/log/glusterfs/bricks/%app-name%.log"
+$template Gluster_dynLogFile,"/var/log/glusterfs/%app-name%.log"
+
+$template GLFS_Template,"%msgid%/%syslogfacility-text:::uppercase%/%syslogseverity-text:::uppercase% [%TIMESTAMP:::date-rfc3339%] %msg:::sp-if-no-1st-sp%%msg:::drop-last-lf%\n"
+
+#
+## Pass logs to mmcount if app-name is 'gluster'
+#
+if $app-name contains 'gluster' then :mmcount:
+
+if $app-name contains 'glusterfsd' then ?Glusterfsd_dynLogFile;GLFS_Template
+if $app-name contains 'gluster' and not ( $app-name contains 'glusterfsd' ) then ?Gluster_dynLogFile;GLFS_Template
+
+#
+## Sample configuration to send a email alert for every 50th mmcount
+#
+#$ModLoad ommail
+#$ActionMailSMTPServer smtp.example.com
+#$ActionMailFrom rsyslog@example.com
+#$ActionMailTo glusteradmin@example.com
+#$template mailSubject,"50th message of gf_code=9999 on %hostname%"
+#$template mailBody,"RSYSLOG Alert\r\nmsg='%msg%'"
+#$ActionMailSubject mailSubject
+#$ActionExecOnlyOnceEveryInterval 30
+#if $app-name == 'glusterfsd' and $msgid != 0 and $msgid % 50 == 0 \
+#then :ommail:;RSYSLOG_SyslogProtocol23Format
+#
+
+#
+## discard logs where app-name is 'gluster' as we processed already
+#
+if $app-name contains 'gluster' then ~
diff --git a/extras/gluster-rsyslog-7.2.conf b/extras/gluster-rsyslog-7.2.conf
new file mode 100644
index 000000000..8b2841543
--- /dev/null
+++ b/extras/gluster-rsyslog-7.2.conf
@@ -0,0 +1,76 @@
+##### gluster.conf #####
+#
+## If you want to log every message to the log file instead of
+## intelligently suppressing repeated messages, set off to
+## RepeatedMsgReduction. This change requires rsyslog restart
+## (eg. run 'service rsyslog restart')
+#
+#$RepeatedMsgReduction off
+$RepeatedMsgReduction on
+
+$ModLoad mmjsonparse
+*.* :mmjsonparse:
+
+#
+## The mmcount module provides the capability to count log messages by
+## severity or json property of given app-name. The count value is added
+## into the log message as json property named 'mmcount'
+##
+## More info at http://www.rsyslog.com/doc/mmcount.html
+#
+#module(load="mmcount")
+#action(type="mmcount" appname="glusterd" key="!gf_code") # count each value of gf_code of appname glusterd
+#action(type="mmcount" appname="glusterfsd" key="!gf_code") # count each value of gf_code of appname glusterfsd
+#action(type="mmcount" appname="glusterfs" key="!gf_code") # count each value of gf_code of appname glusterfs
+
+template (name="Glusterfsd_dynLogFile" type="string" string="/var/log/glusterfs/bricks/%app-name%.log")
+template (name="Gluster_dynLogFile" type="string" string="/var/log/glusterfs/%app-name%.log")
+
+template(name="GLFS_template" type="list") {
+ property(name="$!mmcount")
+ constant(value="/")
+ property(name="syslogfacility-text" caseConversion="upper")
+ constant(value="/")
+ property(name="syslogseverity-text" caseConversion="upper")
+ constant(value=" ")
+ constant(value="[")
+ property(name="timereported" dateFormat="rfc3339")
+ constant(value="] ")
+ constant(value="[")
+ property(name="$!gf_code")
+ constant(value="] ")
+ constant(value="[")
+ property(name="$!gf_message")
+ constant(value="] ")
+ property(name="$!msg")
+ constant(value="\n")
+}
+
+if $app-name contains 'glusterfsd' then {
+ action(type="omfile"
+ DynaFile="Glusterfsd_dynLogFile"
+ Template="GLFS_template")
+ stop
+}
+
+if $app-name contains 'gluster' then {
+ action(type="omfile"
+ DynaFile="Gluster_dynLogFile"
+ Template="GLFS_template")
+ stop
+}
+
+#
+## send email for every 50th mmcount
+#$ModLoad ommail
+#if $app-name == 'glusterfsd' and $!mmcount <> 0 and $!mmcount % 50 == 0 then {
+# $ActionMailSMTPServer smtp.example.com
+# $ActionMailFrom rsyslog@example.com
+# $ActionMailTo glusteradmin@example.com
+# $template mailSubject,"50th message of gf_code=9999 on %hostname%"
+# $template mailBody,"RSYSLOG Alert\r\nmsg='%msg%'"
+# $ActionMailSubject mailSubject
+# $ActionExecOnlyOnceEveryInterval 30
+# :ommail:;RSYSLOG_SyslogProtocol23Format
+#}
+#
diff --git a/extras/glusterd-sysconfig b/extras/glusterd-sysconfig
new file mode 100644
index 000000000..8237c5711
--- /dev/null
+++ b/extras/glusterd-sysconfig
@@ -0,0 +1,6 @@
+## Set custom log file and log level (bellow are defaults)
+# LOG_FILE='/var/log/glusterfs/glusterd.log'
+# LOG_LEVEL='INFO'
+
+## Set custom options for glusterd
+# GLUSTERD_OPTIONS=''
diff --git a/extras/glusterd.vol b/extras/glusterd.vol
new file mode 100644
index 000000000..9bac52ab7
--- /dev/null
+++ b/extras/glusterd.vol
@@ -0,0 +1,9 @@
+volume management
+ type mgmt/glusterd
+ option working-directory /var/lib/glusterd
+ option transport-type socket,rdma
+ option transport.socket.keepalive-time 10
+ option transport.socket.keepalive-interval 2
+ option transport.socket.read-fail-log off
+# option base-port 49152
+end-volume
diff --git a/extras/glusterfs-georep-logrotate b/extras/glusterfs-georep-logrotate
new file mode 100644
index 000000000..6a69ab1e3
--- /dev/null
+++ b/extras/glusterfs-georep-logrotate
@@ -0,0 +1,18 @@
+
+rotate 52
+missingok
+
+compress
+delaycompress
+notifempty
+
+/var/log/glusterfs/geo-replication/*/*.log {
+}
+
+
+/var/log/glusterfs/geo-replication-slaves/*.log {
+}
+
+
+/var/log/glusterfs/geo-replication-slaves/*/*.log {
+}
diff --git a/extras/glusterfs-logrotate b/extras/glusterfs-logrotate
new file mode 100644
index 000000000..373ec2e0b
--- /dev/null
+++ b/extras/glusterfs-logrotate
@@ -0,0 +1,27 @@
+# perform the log rotate every week
+weekly
+# keep the backup of 52 weeks
+rotate 52
+missingok
+
+# compress the logs, but from the .2 onwards
+compress
+delaycompress
+notifempty
+
+# Rotate client logs
+/var/log/glusterfs/*.log {
+ sharedscripts
+ postrotate
+ /usr/bin/killall -HUP glusterfs > /dev/null 2>&1 || true
+ /usr/bin/killall -HUP glusterd > /dev/null 2>&1 || true
+ endscript
+}
+
+# Rotate server logs
+/var/log/glusterfs/bricks/*.log {
+ sharedscripts
+ postrotate
+ /usr/bin/killall -HUP glusterfsd > /dev/null 2>&1 || true
+ endscript
+}
diff --git a/extras/glusterfs-mode.el b/extras/glusterfs-mode.el
index e65fbf460..d4f6dc568 100644
--- a/extras/glusterfs-mode.el
+++ b/extras/glusterfs-mode.el
@@ -1,4 +1,4 @@
-;;; Copyright (C) 2007, 2008 Z RESEARCH Inc. <http://www.zresearch.com>
+;;; Copyright (C) 2007-2011 Gluster Inc. <http://www.gluster.com>
;;;
;;; This program is free software; you can redistribute it and/or modify
;;; it under the terms of the GNU General Public License as published by
diff --git a/extras/glusterfs.vim b/extras/glusterfs.vim
index 42fa6b573..899cc6551 100644
--- a/extras/glusterfs.vim
+++ b/extras/glusterfs.vim
@@ -1,20 +1,11 @@
" glusterfs.vim: GNU Vim Syntax file for GlusterFS .vol specification
-" Copyright (C) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
-" This file is part of GlusterFS.
+" Copyright (c) 2007777777Red Hat, Inc. <http://www.redhat.com>
+" This file is part of GlusterFS.
"
-" GlusterFS is free software; you can redistribute it and/or modify
-" it under the terms of the GNU General Public License as published
-" by the Free Software Foundation; either version 3 of the License,
-" or (at your option) any later version.
-"
-" GlusterFS is distributed in the hope that it will be useful, but
-" WITHOUT ANY WARRANTY; without even the implied warranty of
-" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-" General Public License for more details.
-"
-" You should have received a copy of the GNU General Public License
-" along with this program. If not, see
-" <http://www.gnu.org/licenses/>.
+" This file is licensed to you under your choice of the GNU Lesser
+" General Public License, version 3 or any later version (LGPLv3 or
+" later), or the GNU General Public License, version 2 (GPLv2), in all
+" cases as published by the Free Software Foundation.
"
" Last Modified: Wed Aug 1 00:47:10 IST 2007
" Version: 0.8
diff --git a/extras/gnfs-loganalyse.py b/extras/gnfs-loganalyse.py
new file mode 100644
index 000000000..71e79b6be
--- /dev/null
+++ b/extras/gnfs-loganalyse.py
@@ -0,0 +1,260 @@
+#!/bin/python
+"""
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+
+"""
+
+import os
+import string
+import sys
+
+
+class NFSRequest:
+ def requestIsEntryOp (self):
+ op = self.op
+ if op == "CREATE" or op == "LOOKUP" or op == "REMOVE" or op == "LINK" or op == "RENAME" or op == "MKDIR" or op == "RMDIR" or op == "SYMLINK" or op == "MKNOD":
+ return 1
+ else:
+ return 0
+
+ def __init__ (self, logline, linecount):
+ self.calllinecount = 0
+ self.xid = ""
+ self.op = ""
+ self.opdata = ""
+ self.replydata = ""
+ self.replylinecount = 0
+ self.timestamp = ""
+ self.entryname = ""
+ self.gfid = ""
+ self.replygfid = ""
+
+ tokens = logline.strip ().split (" ")
+ self.timestamp = tokens[0] + " " + tokens[1]
+ if "XID:" not in tokens:
+ return None
+
+ if "args:" not in tokens:
+ return None
+
+ self.calllinecount = linecount
+
+ xididx = tokens.index ("XID:")
+ self.xid = tokens [xididx+1].strip(",")
+
+ opidx = tokens.index ("args:")
+ self.op = tokens [opidx-1].strip (":")
+ self.opdata = " ".join(tokens [opidx+1:])
+ if self.requestIsEntryOp ():
+ nameidx = tokens.index ("name:")
+ self.entryname = tokens[nameidx + 1].strip (",")
+ gfididx = tokens.index ("gfid")
+ self.gfid = tokens[gfididx +1].strip(",")
+
+
+ def getXID (self):
+ return self.xid
+
+ def setReply (self, logline, linecount):
+ tokens = logline.strip ().split (" ")
+ timestamp = tokens[0] + " " + tokens[1]
+ statidx = tokens.index ("NFS:")
+ self.replydata = " TimeStamp: " + timestamp + " " + " ".join (tokens [statidx+1:])
+ self.replylinecount = linecount
+ if "gfid" in tokens:
+ gfididx = tokens.index ("gfid")
+ self.replygfid = tokens [gfididx + 1].strip(",")
+
+ def dump (self):
+ print "ReqLine: " + str(self.calllinecount) + " TimeStamp: " + self.timestamp + ", XID: " + self.xid + " " + self.op + " ARGS: " + self.opdata + " RepLine: " + str(self.replylinecount) + " " + self.replydata
+
+class NFSLogAnalyzer:
+
+ def __init__ (self, optn, trackfilename, tracknamefh, stats):
+ self.stats = stats
+ self.xid_request_map = {}
+ self.orphan_replies = {}
+ self.rqlist = []
+ self.CALL = 1
+ self.REPLY = 2
+ self.optn = optn
+ self.trackfilename = trackfilename
+ self.tracknamefh = tracknamefh
+ self.trackedfilehandles = []
+
+ def handle_call_line (self, logline, linecount):
+ newreq = NFSRequest (logline, linecount)
+ xid = newreq.getXID ()
+ if (self.optn == SYNTHESIZE):
+ self.xid_request_map [xid] = newreq
+ self.rqlist.append(newreq)
+ elif self.optn == TRACKFILENAME:
+ if newreq.requestIsEntryOp():
+ if newreq.entryname == self.trackfilename:
+ self.xid_request_map [xid] = newreq
+ self.rqlist.append(newreq)
+ else:
+ del newreq
+ elif self.tracknamefh == ENABLE_TRACKNAME_FH:
+ if len (self.trackedfilehandles) > 0:
+ if newreq.gfid in self.trackedfilehandles:
+ self.xid_request_map [xid] = newreq
+ self.rqlist.append(newreq)
+ else:
+ del newreq
+ else:
+ del newreq
+ else:
+ del newreq
+
+
+ def handle_reply_line (self, logline, linecount):
+ tokens = logline.strip ().split (" ")
+
+ xididx = tokens.index ("XID:")
+ xid = tokens [xididx + 1].strip(",")
+ if xid not in self.xid_request_map.keys ():
+ self.orphan_replies [xid] = logline
+ else:
+ rq = self.xid_request_map [xid]
+ rq.setReply (logline, linecount)
+ if rq.requestIsEntryOp() and rq.entryname == self.trackfilename:
+ self.trackedfilehandles.append (rq.replygfid)
+
+ def analyzeLine (self, logline, linecount):
+ tokens = logline.strip ().split (" ")
+ msgtype = 0
+
+ if "XID:" not in tokens:
+ return
+
+ if "args:" in tokens:
+ msgtype = self.CALL
+ elif "NFS:" in tokens:
+ msgtype = self.REPLY
+
+ if msgtype == self.CALL:
+ self.handle_call_line (logline, linecount)
+ elif msgtype == self.REPLY:
+ self.handle_reply_line (logline, linecount)
+
+ def getStats (self):
+ if self.stats == 0:
+ return
+ rcount = len (self.xid_request_map.keys ())
+ orphancount = len (self.orphan_replies.keys ())
+ print "Requests: " + str(rcount) + ", Orphans: " + str(orphancount)
+
+ def dump (self):
+ self.getStats ()
+ for rq in self.rqlist:
+ rq.dump ()
+ del rq
+
+ self.rqlist = []
+ self.orphan_replies = {}
+ self.xid_request_map = {}
+
+
+linecount = 0
+
+SYNTHESIZE = 1
+TRACKFILENAME = 2
+
+ENABLESTATS = 1
+DISABLESTATS = 0
+
+ENABLE_TRACKNAME_FH = 1
+DISABLE_TRACKNAME_FH = 0
+
+progmsgcount = 1000
+dumpinterval = 200000
+operation = SYNTHESIZE
+stats = ENABLESTATS
+tracknamefh = DISABLE_TRACKNAME_FH
+trackfilename = ""
+
+"""
+Print the progress of the analysing operations every X number of lines read from
+the logs, where X is the argument provided to this option.
+
+Use this to print a status message every say 10000 lines processed or 100000
+lines processed to know how much longer the processing will go on for.
+
+
+USAGE: --progress <NUMLINES>
+"""
+if "--progress" in sys.argv:
+ idx = sys.argv.index ("--progress")
+ progmsgcount = int(sys.argv[idx+1])
+
+"""
+The replies for a NFS request can be separated by hundreds and even thousands
+of other NFS requests and replies. These can be spread over many hundreds and
+thousands of log lines. This script maintains a memory dict to map each request
+to its reply using the XID. Because this is in-core, there is a limit to the
+number of entries in the dict. At regular intervals, it dumps the mapped
+requests and the replies into the stdout. The requests whose replies were not
+found at the point of dumping are left as orphans, i.e. without info about the
+replies. Use this option to tune the number of lines to maximize the number of
+requests whose replies are found while balancing the dict size with memory
+on the machine. The default works fine for most cases.
+
+USAGE: --dump <NUMLINES>
+"""
+if "--dump" in sys.argv:
+ idx = sys.argv.index ("--dump")
+ dumpinterval = int(sys.argv[idx+1])
+
+"""
+The default operation of the script is to output all the requests mapped to
+their replies in a single line. This operation mode can be changed by this
+argument. It is used to print only those operations that were performed on the
+filename given as the argument to this option. Only those entry operations are
+printed which contain this filename.
+
+USAGE: --trackfilename <filename>
+"""
+if "--trackfilename" in sys.argv:
+ idx = sys.argv.index ("--trackfilename")
+ trackfilename = sys.argv[idx + 1]
+ operation = TRACKFILENAME
+
+"""
+At every dump interval, some stats are printed about the dumped lines.
+Use this option to disable printing that to avoid cluttering the
+output.
+"""
+if "--nostats" in sys.argv:
+ stats = DISABLESTATS
+
+"""
+While tracking a file using --trackfilename, we're only given those
+operations which contain the filename. This excludes a large number
+of operations which operate on that file using its filehandle instead of
+the filename. This option enables outputting those operations also. It
+tracks every single file handle that was ever seen in the log for a given
+filename.
+
+USAGE: --trackfilename
+"""
+if "--tracknamefh" in sys.argv:
+ tracknamefh = ENABLE_TRACKNAME_FH
+
+la = NFSLogAnalyzer (operation, trackfilename, tracknamefh, stats)
+
+for line in sys.stdin:
+ linecount = linecount + 1
+ if linecount % dumpinterval == 0:
+ sys.stderr.write ("Dumping data..\n")
+ la.dump ()
+
+ if linecount % progmsgcount == 0:
+ sys.stderr.write ("Integrating line: "+ str(linecount) + "\n")
+ la.analyzeLine (line, linecount)
diff --git a/extras/group-virt.example b/extras/group-virt.example
new file mode 100644
index 000000000..7dc777f2d
--- /dev/null
+++ b/extras/group-virt.example
@@ -0,0 +1,6 @@
+quick-read=off
+read-ahead=off
+io-cache=off
+stat-prefetch=off
+eager-lock=enable
+remote-dio=enable
diff --git a/extras/hook-scripts/Makefile.am b/extras/hook-scripts/Makefile.am
new file mode 100644
index 000000000..f6bded20c
--- /dev/null
+++ b/extras/hook-scripts/Makefile.am
@@ -0,0 +1 @@
+EXTRA_DIST = S29CTDBsetup.sh S30samba-start.sh S30samba-stop.sh S30samba-set.sh S56glusterd-geo-rep-create-post.sh
diff --git a/extras/hook-scripts/S29CTDBsetup.sh b/extras/hook-scripts/S29CTDBsetup.sh
new file mode 100644
index 000000000..e256be1f3
--- /dev/null
+++ b/extras/hook-scripts/S29CTDBsetup.sh
@@ -0,0 +1,69 @@
+#! /bin/bash
+#non-portable - RHS-2.0 only
+# - The script mounts the 'meta-vol' on start 'event' on a known
+# directory (eg. /gluster/lock)
+# - Adds the necessary configuration changes for ctdb in smb.conf and
+# restarts smb service.
+# - P.S: There are other 'tasks' that need to be done outside this script
+# to get CTDB based failover up and running.
+
+SMB_CONF=/etc/samba/smb.conf
+
+CTDB_MNT=/gluster/lock
+PROGNAME="ctdb"
+OPTSPEC="volname:"
+VOL=
+# $META is the volume that will be used by CTDB as a shared filesystem.
+# It is not desirable to use this volume for storing 'data' as well.
+# META is set to 'all' (viz. a keyword and hence not a legal volume name)
+# to prevent the script from running for volumes it was not intended.
+# User needs to set META to the volume that serves CTDB lockfile.
+META="all"
+
+function sighup_samba () {
+ pid=`cat /var/run/smbd.pid`
+ if [ $pid != " " ]
+ then
+ kill -HUP $pid;
+ else
+ /etc/init.d/smb start
+ fi
+}
+
+function parse_args () {
+ ARGS=$(getopt -l $OPTSPEC -name $PROGNAME $@)
+ eval set -- "$ARGS"
+
+ while true; do
+ case $1 in
+ --volname)
+ shift
+ VOL=$1
+ ;;
+
+ *)
+ shift
+ break
+ ;;
+
+ esac
+
+ shift
+ done
+}
+
+function add_glusterfs_ctdb_options () {
+ PAT="Share Definitions"
+ GLUSTER_CTDB_CONFIG="# ctdb config for glusterfs\n\tclustering = yes\n\tidmap backend = tdb2\n\tprivate dir = "$CTDB_MNT"\n"
+
+ sed -i /"$PAT"/i\ "$GLUSTER_CTDB_CONFIG" $SMB_CONF
+}
+
+parse_args $@
+if [ "$META" = "$VOL" ]
+then
+ add_glusterfs_ctdb_options
+ sighup_samba
+ mount -t glusterfs `hostname`:$VOL "$CTDB_MNT" &
+fi
+
diff --git a/extras/hook-scripts/S30samba-set.sh b/extras/hook-scripts/S30samba-set.sh
new file mode 100755
index 000000000..6b11f5a4f
--- /dev/null
+++ b/extras/hook-scripts/S30samba-set.sh
@@ -0,0 +1,109 @@
+#!/bin/bash
+
+#Need to be copied to hooks/<HOOKS_VER>/set/post/
+
+#TODO: All gluster and samba paths are assumed for fedora like systems.
+#Some efforts are required to make it work on other distros.
+
+#The preferred way of creating a smb share of a gluster volume has changed.
+#The old method was to create a fuse mount of the volume and share the mount
+#point through samba.
+#
+#New method eliminates the requirement of fuse mount and changes in fstab.
+#glusterfs_vfs plugin for samba makes call to libgfapi to access the volume.
+#
+#This hook script enables user to enable or disable smb share by volume set
+#option. Keys "user.cifs" and "user.smb" both are valid, but user.smb is
+#preferred.
+
+
+PROGNAME="Ssamba-set"
+OPTSPEC="volname:"
+VOL=
+
+enable_smb=""
+
+function parse_args () {
+ ARGS=$(getopt -l $OPTSPEC -o "o" -name $PROGNAME $@)
+ eval set -- "$ARGS"
+
+ while true; do
+ case $1 in
+ --volname)
+ shift
+ VOL=$1
+ ;;
+ *)
+ shift
+ for pair in $@; do
+ read key value < <(echo "$pair" | tr "=" " ")
+ case "$key" in
+ "user.cifs")
+ enable_smb=$value
+ ;;
+ "user.smb")
+ enable_smb=$value
+ ;;
+ *)
+ ;;
+ esac
+ done
+
+ shift
+ break
+ ;;
+ esac
+ shift
+ done
+}
+
+function add_samba_share () {
+ volname=$1
+ STRING="\n[gluster-$volname]\n"
+ STRING+="comment = For samba share of volume $volname\n"
+ STRING+="vfs objects = glusterfs\n"
+ STRING+="glusterfs:volume = $volname\n"
+ STRING+="glusterfs:logfile = /var/log/samba/glusterfs-$volname.%%M.log\n"
+ STRING+="glusterfs:loglevel = 7\n"
+ STRING+="path = /\n"
+ STRING+="read only = no\n"
+ STRING+="guest ok = yes\n"
+ printf "$STRING" >> /etc/samba/smb.conf
+}
+
+function sighup_samba () {
+ pid=`cat /var/run/smbd.pid`
+ if [ "x$pid" != "x" ]
+ then
+ kill -HUP "$pid";
+ else
+ /etc/init.d/smb start
+ fi
+}
+
+function del_samba_share () {
+ volname=$1
+ sed -i "/\[gluster-$volname\]/,/^$/d" /etc/samba/smb.conf
+}
+
+function is_volume_started () {
+ volname=$1
+ echo "$(grep status /var/lib/glusterd/vols/"$volname"/info |\
+ cut -d"=" -f2)"
+}
+
+parse_args $@
+if [ "0" = $(is_volume_started "$VOL") ]; then
+ exit 0
+fi
+
+if [ "$enable_smb" = "enable" ]; then
+ if ! grep --quiet "\[gluster-$VOL\]" /etc/samba/smb.conf ; then
+ add_samba_share $VOL
+ sighup_samba
+ fi
+
+elif [ "$enable_smb" = "disable" ]; then
+ del_samba_share $VOL
+ sighup_samba
+fi
diff --git a/extras/hook-scripts/S30samba-start.sh b/extras/hook-scripts/S30samba-start.sh
new file mode 100755
index 000000000..34fde0ef8
--- /dev/null
+++ b/extras/hook-scripts/S30samba-start.sh
@@ -0,0 +1,110 @@
+#!/bin/bash
+
+#Need to be copied to hooks/<HOOKS_VER>/start/post
+
+#TODO: All gluster and samba paths are assumed for fedora like systems.
+#Some efforts are required to make it work on other distros.
+
+#The preferred way of creating a smb share of a gluster volume has changed.
+#The old method was to create a fuse mount of the volume and share the mount
+#point through samba.
+#
+#New method eliminates the requirement of fuse mount and changes in fstab.
+#glusterfs_vfs plugin for samba makes call to libgfapi to access the volume.
+#
+#This hook script automagically creates shares for volume on every volume start
+#event by adding the entries in smb.conf file and sending SIGHUP to samba.
+#
+#In smb.conf:
+#glusterfs vfs plugin has to be specified as required vfs object.
+#Path value is relative to the root of gluster volume;"/" signifies complete
+#volume.
+
+PROGNAME="Ssamba-start"
+OPTSPEC="volname:"
+VOL=
+CONFIGFILE=
+LOGFILEBASE=
+PIDDIR=
+
+function parse_args () {
+ ARGS=$(getopt -l $OPTSPEC -name $PROGNAME $@)
+ eval set -- "$ARGS"
+
+ while true; do
+ case $1 in
+ --volname)
+ shift
+ VOL=$1
+ ;;
+ *)
+ shift
+ break
+ ;;
+ esac
+ shift
+ done
+}
+
+function find_config_info () {
+ cmdout=`smbd -b | grep smb.conf`
+ if [ $? -ne 0 ];then
+ echo "Samba is not installed"
+ exit 1
+ fi
+ CONFIGFILE=`echo $cmdout | awk {'print $2'}`
+ PIDDIR=`smbd -b | grep PIDDIR | awk {'print $2'}`
+ LOGFILEBASE=`smbd -b | grep 'LOGFILEBASE' | awk '{print $2}'`
+}
+
+function add_samba_share () {
+ volname=$1
+ STRING="\n[gluster-$volname]\n"
+ STRING+="comment = For samba share of volume $volname\n"
+ STRING+="vfs objects = glusterfs\n"
+ STRING+="glusterfs:volume = $volname\n"
+ STRING+="glusterfs:logfile = $LOGFILEBASE/glusterfs-$volname.%%M.log\n"
+ STRING+="glusterfs:loglevel = 7\n"
+ STRING+="path = /\n"
+ STRING+="read only = no\n"
+ STRING+="guest ok = yes\n"
+ printf "$STRING" >> ${CONFIGFILE}
+}
+
+function sighup_samba () {
+ pid=`cat ${PIDDIR}/smbd.pid`
+ if [ "x$pid" != "x" ]
+ then
+ kill -HUP "$pid";
+ else
+ /etc/init.d/smb condrestart
+ fi
+}
+
+function get_smb () {
+ volname=$1
+ uservalue=
+
+ usercifsvalue=$(grep user.cifs /var/lib/glusterd/vols/"$volname"/info |\
+ cut -d"=" -f2)
+ usersmbvalue=$(grep user.smb /var/lib/glusterd/vols/"$volname"/info |\
+ cut -d"=" -f2)
+
+ if [[ $usercifsvalue = "disable" || $usersmbvalue = "disable" ]]; then
+ uservalue="disable"
+ fi
+ echo "$uservalue"
+}
+
+parse_args $@
+if [ $(get_smb "$VOL") = "disable" ]; then
+ exit 0
+fi
+
+#Find smb.conf, smbd pid directory and smbd logfile path
+find_config_info
+
+if ! grep --quiet "\[gluster-$VOL\]" ${CONFIGFILE} ; then
+ add_samba_share $VOL
+ sighup_samba
+fi
diff --git a/extras/hook-scripts/S30samba-stop.sh b/extras/hook-scripts/S30samba-stop.sh
new file mode 100755
index 000000000..8950eea43
--- /dev/null
+++ b/extras/hook-scripts/S30samba-stop.sh
@@ -0,0 +1,71 @@
+#! /bin/bash
+
+#Need to be copied to hooks/<HOOKS_VER>/stop/pre
+
+#TODO: All gluster and samba paths are assumed for fedora like systems.
+#Some efforts are required to make it work on other distros.
+
+#The preferred way of creating a smb share of a gluster volume has changed.
+#The old method was to create a fuse mount of the volume and share the mount
+#point through samba.
+#
+#New method eliminates the requirement of fuse mount and changes in fstab.
+#glusterfs_vfs plugin for samba makes call to libgfapi to access the volume.
+#
+#This hook script automagically removes shares for volume on every volume stop
+#event by removing the volume related entries(if any) in smb.conf file.
+
+PROGNAME="Ssamba-stop"
+OPTSPEC="volname:"
+VOL=
+CONFIGFILE=
+PIDDIR=
+
+function parse_args () {
+ ARGS=$(getopt -l $OPTSPEC -name $PROGNAME $@)
+ eval set -- "$ARGS"
+
+ while true; do
+ case $1 in
+ --volname)
+ shift
+ VOL=$1
+ ;;
+ *)
+ shift
+ break
+ ;;
+ esac
+ shift
+ done
+}
+
+function find_config_info () {
+ cmdout=`smbd -b | grep smb.conf`
+ if [ $? -ne 0 ];then
+ echo "Samba is not installed"
+ exit 1
+ fi
+ CONFIGFILE=`echo $cmdout | awk {'print $2'}`
+ PIDDIR=`smbd -b | grep PIDDIR | awk {'print $2'}`
+}
+
+function del_samba_share () {
+ volname=$1
+ sed -i "/\[gluster-$volname\]/,/^$/d" ${CONFIGFILE}
+}
+
+function sighup_samba () {
+ pid=`cat ${PIDDIR}/smbd.pid`
+ if [ "x$pid" != "x" ]
+ then
+ kill -HUP $pid;
+ else
+ /etc/init.d/smb condrestart
+ fi
+}
+
+parse_args $@
+find_config_info
+del_samba_share $VOL
+sighup_samba
diff --git a/extras/hook-scripts/S40ufo-stop.py b/extras/hook-scripts/S40ufo-stop.py
new file mode 100755
index 000000000..107f19683
--- /dev/null
+++ b/extras/hook-scripts/S40ufo-stop.py
@@ -0,0 +1,24 @@
+#!/usr/bin/env python
+
+import os
+from optparse import OptionParser
+
+if __name__ == '__main__':
+ # check if swift is installed
+ try:
+ from gluster.swift.common.Glusterfs import get_mnt_point, unmount
+ except ImportError:
+ import sys
+ sys.exit("Openstack Swift does not appear to be installed properly");
+
+ op = OptionParser(usage="%prog [options...]")
+ op.add_option('--volname', dest='vol', type=str)
+ op.add_option('--last', dest='last', type=str)
+ (opts, args) = op.parse_args()
+
+
+ mnt_point = get_mnt_point(opts.vol)
+ if mnt_point:
+ unmount(mnt_point)
+ else:
+ sys.exit("get_mnt_point returned none for mount point")
diff --git a/extras/hook-scripts/S56glusterd-geo-rep-create-post.sh b/extras/hook-scripts/S56glusterd-geo-rep-create-post.sh
new file mode 100755
index 000000000..1369c22fc
--- /dev/null
+++ b/extras/hook-scripts/S56glusterd-geo-rep-create-post.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+key_val_pair1=`echo $2 | cut -d ' ' -f 1`
+key_val_pair2=`echo $2 | cut -d ' ' -f 2`
+key_val_pair3=`echo $2 | cut -d ' ' -f 3`
+
+key=`echo $key_val_pair1 | cut -d '=' -f 1`
+val=`echo $key_val_pair1 | cut -d '=' -f 2`
+if [ "$key" != "is_push_pem" ]; then
+ exit;
+fi
+if [ "$val" != '1' ]; then
+ exit;
+fi
+
+key=`echo $key_val_pair2 | cut -d '=' -f 1`
+val=`echo $key_val_pair2 | cut -d '=' -f 2`
+if [ "$key" != "pub_file" ]; then
+ exit;
+fi
+if [ "$val" == "" ]; then
+ exit;
+fi
+pub_file=`echo $val`
+pub_file_tmp=`echo $val`_tmp
+
+key=`echo $key_val_pair3 | cut -d '=' -f 1`
+val=`echo $key_val_pair3 | cut -d '=' -f 2`
+if [ "$key" != "slave_ip" ]; then
+ exit;
+fi
+if [ "$val" == "" ]; then
+ exit;
+fi
+slave_ip=`echo $val`
+
+if [ -f $pub_file ]; then
+ scp $pub_file $slave_ip:$pub_file_tmp
+ ssh $slave_ip "mv $pub_file_tmp $pub_file"
+ ssh $slave_ip "gluster system:: copy file /geo-replication/common_secret.pem.pub > /dev/null"
+ ssh $slave_ip "gluster system:: execute add_secret_pub > /dev/null"
+fi
diff --git a/extras/init.d/Makefile.am b/extras/init.d/Makefile.am
index 1922e6bab..38898fddd 100644
--- a/extras/init.d/Makefile.am
+++ b/extras/init.d/Makefile.am
@@ -1,17 +1,22 @@
-EXTRA_DIST = glusterfsd-Debian glusterfsd-Redhat glusterfsd-SuSE glusterfs-server.plist
+EXTRA_DIST = glusterd-Debian glusterd-Redhat glusterd-SuSE glusterd.plist rhel5-load-fuse.modules
CLEANFILES =
-initdir = /etc/init.d
+INIT_DIR = @initdir@
+SYSTEMD_DIR = @systemddir@
+LAUNCHD_DIR = @launchddir@
$(GF_DISTRIBUTION):
- $(mkdir_p) $(DESTDIR)$(initdir)
- $(INSTALL_PROGRAM) glusterfsd-$(GF_DISTRIBUTION) $(DESTDIR)$(initdir)/glusterfsd
+ @if [ ! -d $(SYSTEMD_DIR) ]; then \
+ $(mkdir_p) $(DESTDIR)$(INIT_DIR); \
+ $(INSTALL_PROGRAM) glusterd-$(GF_DISTRIBUTION) $(DESTDIR)$(INIT_DIR)/glusterd; \
+ fi
install-exec-local: $(GF_DISTRIBUTION)
install-data-local:
if GF_DARWIN_HOST_OS
- cp glusterfs-server.plist /Library/LaunchDaemons/com.zresearch.glusterfs.plist
+ $(mkdir_p) $(DESTDIR)$(LAUNCHD_DIR)
+ $(INSTALL_PROGRAM) glusterd.plist $(DESTDIR)$(LAUNCHD_DIR)/com.gluster.glusterd.plist
endif
diff --git a/extras/init.d/glusterfsd-Debian.in b/extras/init.d/glusterd-Debian.in
index a0c83d535..918f8592c 100755
--- a/extras/init.d/glusterfsd-Debian.in
+++ b/extras/init.d/glusterd-Debian.in
@@ -1,24 +1,23 @@
#!/bin/sh
### BEGIN INIT INFO
-# Provides: glusterfsd
+# Provides: glusterd
# Required-Start: $local_fs $network
# Required-Stop: $local_fs $network
# Default-Start: 2 3 4 5
# Default-Stop: 0 1 6
-# Short-Description: gluster server
-# Description: This file starts / stops the gluster server
+# Short-Description: Gluster File System service for volume management
+# Description: Gluster File System service for volume management
### END INIT INFO
# Author: Chris AtLee <chris@atlee.ca>
# Patched by: Matthias Albert < matthias@linux4experts.de>
PATH=/sbin:/usr/sbin:/bin:/usr/bin
-NAME=glusterfsd
+NAME=glusterd
SCRIPTNAME=/etc/init.d/$NAME
DAEMON=@prefix@/sbin/$NAME
PIDFILE=/var/run/$NAME.pid
-CONFIGFILE=/etc/glusterfs/glusterfsd.vol
-GLUSTERFS_OPTS="-f $CONFIGFILE"
+GLUSTERD_OPTS=""
PID=`test -f $PIDFILE && cat $PIDFILE`
@@ -31,24 +30,16 @@ test -x $DAEMON || exit 0
# Define LSB log_* functions.
. /lib/lsb/init-functions
-check_config()
-{
- if [ ! -f "$CONFIGFILE" ]; then
- echo "Config file $CONFIGFILE is missing...exiting!"
- exit 0
- fi
-}
do_start()
{
- check_config;
pidofproc -p $PIDFILE $DAEMON >/dev/null
status=$?
if [ $status -eq 0 ]; then
- log_success_msg "glusterfs server is already running with pid $PID"
+ log_success_msg "glusterd service is already running with pid $PID"
else
- log_daemon_msg "Starting glusterfs server" "glusterfsd"
- start-stop-daemon --start --quiet --oknodo --pidfile $PIDFILE --startas $DAEMON -- -p $PIDFILE $GLUSTERFS_OPTS
+ log_daemon_msg "Starting glusterd service" "glusterd"
+ start-stop-daemon --start --quiet --oknodo --pidfile $PIDFILE --startas $DAEMON -- -p $PIDFILE $GLUSTERD_OPTS
log_end_msg $?
start_daemon -p $PIDFILE $DAEMON -f $CONFIGFILE
return $?
@@ -57,7 +48,7 @@ do_start()
do_stop()
{
- log_daemon_msg "Stopping glusterfs server" "glusterfsd"
+ log_daemon_msg "Stopping glusterd service" "glusterd"
start-stop-daemon --stop --quiet --oknodo --pidfile $PIDFILE
log_end_msg $?
rm -f $PIDFILE
@@ -70,9 +61,9 @@ do_status()
pidofproc -p $PIDFILE $DAEMON >/dev/null
status=$?
if [ $status -eq 0 ]; then
- log_success_msg "glusterfs server is running with pid $PID"
+ log_success_msg "glusterd service is running with pid $PID"
else
- log_failure_msg "glusterfs server is not running."
+ log_failure_msg "glusterd service is not running."
fi
exit $status
}
diff --git a/extras/init.d/glusterd-Redhat.in b/extras/init.d/glusterd-Redhat.in
new file mode 100755
index 000000000..e320708ae
--- /dev/null
+++ b/extras/init.d/glusterd-Redhat.in
@@ -0,0 +1,142 @@
+#!/bin/bash
+#
+# glusterd Startup script for the glusterfs server
+#
+# chkconfig: - 20 80
+# description: Clustered file-system server
+
+### BEGIN INIT INFO
+# Provides: glusterd
+# Required-Start: $local_fs $network
+# Required-Stop: $local_fs $network
+# Should-Start:
+# Should-Stop:
+# Default-Start:
+# Default-Stop: 0 1 2 3 4 5 6
+# Short-Description: glusterfs server
+# Description: Clustered file-system server
+### END INIT INFO
+#
+
+# Source function library.
+. /etc/rc.d/init.d/functions
+
+BASE=glusterd
+
+# Fedora File System Layout dictates /run
+[ -e /run ] && RUNDIR="/run"
+PIDFILE="${RUNDIR:-/var/run}/${BASE}.pid"
+
+PID=`test -f $PIDFILE && cat $PIDFILE`
+
+# Overwriteable from sysconfig
+LOG_LEVEL=''
+LOG_FILE=''
+GLUSTERD_OPTIONS=''
+GLUSTERD_NOFILE='65536'
+
+[ -f /etc/sysconfig/${BASE} ] && . /etc/sysconfig/${BASE}
+
+[ ! -z $LOG_LEVEL ] && GLUSTERD_OPTIONS="${GLUSTERD_OPTIONS} --log-level ${LOG_LEVEL}"
+[ ! -z $LOG_FILE ] && GLUSTERD_OPTIONS="${GLUSTERD_OPTIONS} --log-file ${LOG_FILE}"
+
+GLUSTERFSD=glusterfsd
+GLUSTERFS=glusterfs
+GLUSTERD_BIN=@prefix@/sbin/$BASE
+GLUSTERD_OPTS="--pid-file=$PIDFILE ${GLUSTERD_OPTIONS}"
+GLUSTERD="$GLUSTERD_BIN $GLUSTERD_OPTS"
+RETVAL=0
+
+LOCKFILE=/var/lock/subsys/${BASE}
+
+# Start the service $BASE
+start()
+{
+ if pidofproc -p $PIDFILE $GLUSTERD_BIN &> /dev/null; then
+ echo "glusterd service is already running with pid $PID"
+ return 0
+ else
+ ulimit -n $GLUSTERD_NOFILE
+ echo -n $"Starting $BASE:"
+ daemon $GLUSTERD
+ RETVAL=$?
+ echo
+ [ $RETVAL -eq 0 ] && touch $LOCKFILE
+ return $RETVAL
+ fi
+}
+
+# Stop the service $BASE
+stop()
+{
+ echo -n $"Stopping $BASE:"
+ if pidofproc -p $PIDFILE $GLUSTERD_BIN &> /dev/null; then
+ killproc -p $PIDFILE $BASE
+ else
+ killproc $BASE
+ fi
+ RETVAL=$?
+ [ $RETVAL -eq 0 ] && rm -f $LOCKFILE
+ return $RETVAL
+}
+
+restart()
+{
+ stop
+ start
+}
+
+reload()
+{
+ restart
+}
+
+force_reload()
+{
+ restart
+}
+
+rh_status()
+{
+ status $BASE
+}
+
+rh_status_q()
+{
+ rh_status &>/dev/null
+}
+
+
+### service arguments ###
+case $1 in
+ start)
+ rh_status_q && exit 0
+ $1
+ ;;
+ stop)
+ rh_status_q || exit 0
+ $1
+ ;;
+ restart)
+ $1
+ ;;
+ reload)
+ rh_status_q || exit 7
+ $1
+ ;;
+ force-reload)
+ force_reload
+ ;;
+ status)
+ rh_status
+ ;;
+ condrestart|try-restart)
+ rh_status_q || exit 0
+ restart
+ ;;
+ *)
+ echo $"Usage: $0 {start|stop|status|restart|condrestart|try-restart|reload|force-reload}"
+ exit 1
+esac
+
+exit $?
diff --git a/extras/init.d/glusterfsd-SuSE.in b/extras/init.d/glusterd-SuSE.in
index 43552bb49..6259bab00 100755
--- a/extras/init.d/glusterfsd-SuSE.in
+++ b/extras/init.d/glusterd-SuSE.in
@@ -1,31 +1,30 @@
#!/bin/bash
#
### BEGIN INIT INFO
-# Provides: glusterfsd
-# Required-Start: $local_fs $network
-# Required-Stop:
+# Provides: glusterd
+# Required-Start: $remote_fs $network
+# Required-Stop: $remote_fs $network
# Default-Start: 3 5
# Default-Stop:
-# Short-Description: GlusterFS server daemon
-# Description: All necessary services for GlusterFS clients
+# Short-Description: Gluster File System service for volume management
+# Description: Gluster File System service for volume management
### END INIT INFO
# Get function from functions library
. /etc/rc.status
-BASE=glusterfsd
-GLUSTERFSD_BIN=@prefix@/sbin/$BASE
-CONFIGFILE=/etc/glusterfs/glusterfsd.vol
-GLUSTERFSD_OPTS="-f $CONFIGFILE"
-GSERVER="$GLUSTERFSD_BIN $GLUSTERFSD_OPTS"
+BASE=glusterd
+GLUSTERD_BIN=@prefix@/sbin/$BASE
+GLUSTERD_OPTS=""
+GLUSTERD="$GLUSTERD_BIN $GLUSTERD_OPTS"
RETVAL=0
# Start the service $BASE
start()
{
echo -n $"Starting $BASE:"
- startproc $GSERVER
+ startproc $GLUSTERD
return $?
}
@@ -33,7 +32,7 @@ start()
stop()
{
echo $"Stopping $BASE:"
- killproc $BASE
+ killproc $BASE
return $?
}
@@ -55,20 +54,24 @@ case $1 in
rc_status -v
;;
status)
- echo -n " glusterfsd"
+ echo -n " glusterd"
if ! checkproc $BASE ;then
echo " not running"
rc_failed 3
fi
rc_status -v
;;
+ reload)
+ rc_failed 3
+ rc_status -v
+ ;;
restart)
$0 stop
$0 start
rc_status
;;
*)
- echo $"Usage: $0 {start|stop|status|restart}."
+ echo $"Usage: $0 {start|stop|status|reload|restart}."
exit 1
esac
diff --git a/extras/init.d/glusterfs-server.plist.in b/extras/init.d/glusterd.plist.in
index 4d2287c57..7385fa486 100644
--- a/extras/init.d/glusterfs-server.plist.in
+++ b/extras/init.d/glusterd.plist.in
@@ -3,13 +3,11 @@
<plist version="1.0">
<dict>
<key>Label</key>
- <string>com.zresearch.glusterfs</string>
+ <string>com.gluster.glusterd</string>
<key>ProgramArguments</key>
<array>
- <string>@prefix@/sbin/glusterfsd</string>
+ <string>@prefix@/sbin/glusterd</string>
<string>-N</string>
- <string>-f</string>
- <string>@prefix@/etc/glusterfs/server.vol</string>
</array>
</dict>
</plist>
diff --git a/extras/init.d/glusterfsd-Redhat.in b/extras/init.d/glusterfsd-Redhat.in
deleted file mode 100755
index f53de8f21..000000000
--- a/extras/init.d/glusterfsd-Redhat.in
+++ /dev/null
@@ -1,54 +0,0 @@
-#!/bin/bash
-#
-# chkconfig: 35 90 12
-# description: Glusterfsd server
-#
-
-# Get function from functions library
-. /etc/rc.d/init.d/functions
-
-BASE=glusterfsd
-GLUSTERFSD_BIN=@prefix@/sbin/$BASE
-CONFIGFILE=/etc/glusterfs/glusterfsd.vol
-GLUSTERFSD_OPTS="-f $CONFIGFILE"
-GSERVER="$GLUSTERFSD_BIN $GLUSTERFSD_OPTS"
-RETVAL=0
-
-# Start the service $BASE
-start()
-{
- echo $"Starting $BASE:"
- daemon $GSERVER
- RETVAL=$?
- [ $RETVAL -ne 0 ] && exit $RETVAL
-}
-
-# Stop the service $BASE
-stop()
-{
- echo $"Stopping $BASE:"
- killproc $BASE
-}
-
-
-### service arguments ###
-case $1 in
- start)
- start
- ;;
- stop)
- stop
- ;;
- status)
- status $BASE
- ;;
- restart)
- $0 stop
- $0 start
- ;;
- *)
- echo $"Usage: $0 {start|stop|status|restart}."
- exit 1
-esac
-
-exit 0
diff --git a/extras/init.d/rhel5-load-fuse.modules b/extras/init.d/rhel5-load-fuse.modules
new file mode 100755
index 000000000..ee194db99
--- /dev/null
+++ b/extras/init.d/rhel5-load-fuse.modules
@@ -0,0 +1,7 @@
+#!/bin/sh
+#
+# fusermount-glusterfs requires the /dev/fuse character device. The fuse module
+# provides this and is loaded on demand in newer Linux distributions.
+#
+
+[ -c /dev/fuse ] || /sbin/modprobe fuse
diff --git a/extras/logger.conf.example b/extras/logger.conf.example
new file mode 100644
index 000000000..248be5bda
--- /dev/null
+++ b/extras/logger.conf.example
@@ -0,0 +1,13 @@
+#
+# Sample logger.conf file to configure enhanced Logging in GlusterFS
+#
+# To enable enhanced logging capabilities,
+#
+# 1. rename this file to /etc/glusterfs/logger.conf
+#
+# 2. rename /etc/rsyslog.d/gluster.conf.example to
+# /etc/rsyslog.d/gluster.conf
+#
+# This change requires restart of all gluster services/volumes and
+# rsyslog.
+#
diff --git a/extras/ocf/Makefile.am b/extras/ocf/Makefile.am
new file mode 100644
index 000000000..c49a835fb
--- /dev/null
+++ b/extras/ocf/Makefile.am
@@ -0,0 +1,11 @@
+EXTRA_DIST = glusterd.in volume.in
+
+# The root of the OCF resource agent hierarchy
+# Per the OCF standard, it's always "lib",
+# not "lib64" (even on 64-bit platforms).
+ocfdir = $(prefix)/lib/ocf
+
+# The ceph provider directory
+radir = $(ocfdir)/resource.d/$(PACKAGE_NAME)
+
+ra_SCRIPTS = glusterd volume
diff --git a/extras/ocf/glusterd.in b/extras/ocf/glusterd.in
new file mode 100755
index 000000000..c119a285d
--- /dev/null
+++ b/extras/ocf/glusterd.in
@@ -0,0 +1,212 @@
+#!/bin/sh
+#
+# glusterd
+#
+# Description: Manages a glusterd server as a (typically cloned)
+# HA resource
+#
+# Authors: Florian Haas (hastexo Professional Services GmbH)
+#
+# License: GNU General Public License (GPL)
+
+#######################################################################
+# Initialization:
+
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+
+# Convenience variables
+# When sysconfdir and localstatedir aren't passed in as
+# configure flags, they're defined in terms of prefix
+prefix=@prefix@
+#######################################################################
+
+
+OCF_RESKEY_binary_default="glusterd"
+OCF_RESKEY_pid_default="@localstatedir@/run/glusterd.pid"
+OCF_RESKEY_socket_default=""
+OCF_RESKEY_additional_parameters_default=""
+
+: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
+: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}}
+
+glusterd_meta_data() {
+ cat <<EOF
+<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="glusterd" version="0.1">
+ <version>0.1</version>
+ <longdesc lang="en">
+ </longdesc>
+ <shortdesc lang="en">Manages a Gluster server</shortdesc>
+ <parameters>
+ <parameter name="binary">
+ <longdesc lang="en">
+ Name of the glusterd executable. Specify a full absolute
+ path if the binary is not in your \$PATH.
+ </longdesc>
+ <shortdesc lang="en">glusterd executable</shortdesc>
+ <content type="string" default="$OCF_RESKEY_binary_default"/>
+ </parameter>
+ <parameter name="pid">
+ <longdesc lang="en">
+ Path to the glusterd PID file.
+ </longdesc>
+ <shortdesc lang="en">PID file</shortdesc>
+ <content type="string" default="$OCF_RESKEY_pid_default"/>
+ </parameter>
+ <parameter name="socket">
+ <longdesc lang="en">
+ Path to the glusterd UNIX socket file. If unspecified,
+ glusterd will not listen on any socket.
+ </longdesc>
+ <shortdesc lang="en">Socket file</shortdesc>
+ <content type="string"/>
+ </parameter>
+ </parameters>
+ <actions>
+ <action name="start" timeout="20" />
+ <action name="stop" timeout="20" />
+ <action name="monitor" timeout="20" interval="10" />
+ <action name="reload" timeout="20" />
+ <action name="meta-data" timeout="5" />
+ <action name="validate-all" timeout="20" />
+ </actions>
+</resource-agent>
+EOF
+
+}
+
+glusterd_start() {
+ local glusterd_options
+ # exit immediately if configuration is not valid
+ glusterd_validate_all || exit $?
+
+ # if resource is already running, bail out early
+ if glusterd_monitor; then
+ ocf_log info "Resource is already running"
+ return $OCF_SUCCESS
+ fi
+
+ # actually start up the resource here (make sure to immediately
+ # exit with an $OCF_ERR_ error code if anything goes seriously
+ # wrong)
+ glusterd_options="-p $OCF_RESKEY_pid"
+ if [ -n "$OCF_RESKEY_socket" ]; then
+ glusterd_options="$glusterd_options -S $OCF_RESKEY_socket"
+ fi
+ if [ -n "$OCF_RESKEY_additional_parameters" ]; then
+ glusterd_options="$glusterd_options $OCF_RESKEY_additional_parameters"
+ fi
+
+ ocf_run $OCF_RESKEY_binary $glusterd_options || exit $OCF_ERR_GENERIC
+
+ # After the resource has been started, check whether it started up
+ # correctly. If the resource starts asynchronously, the agent may
+ # spin on the monitor function here -- if the resource does not
+ # start up within the defined timeout, the cluster manager will
+ # consider the start action failed
+ while ! glusterd_monitor; do
+ ocf_log debug "Resource has not started yet, waiting"
+ sleep 1
+ done
+
+ # only return $OCF_SUCCESS if _everything_ succeeded as expected
+ return $OCF_SUCCESS
+}
+
+glusterd_stop() {
+ local rc
+ local pid
+
+ # exit immediately if configuration is not valid
+ glusterd_validate_all || exit $?
+
+ glusterd_monitor
+ rc=$?
+ case "$rc" in
+ "$OCF_SUCCESS")
+ # Currently running. Normal, expected behavior.
+ ocf_log debug "Resource is currently running"
+ ;;
+ "$OCF_NOT_RUNNING")
+ # Currently not running. Nothing to do.
+ ocf_log info "Resource is already stopped"
+ return $OCF_SUCCESS
+ ;;
+ esac
+
+ # actually shut down the resource here (make sure to immediately
+ # exit with an $OCF_ERR_ error code if anything goes seriously
+ # wrong)
+ pid=`cat $OCF_RESKEY_pid`
+ ocf_run kill -s TERM $pid || exit OCF_ERR_GENERIC
+
+ # After the resource has been stopped, check whether it shut down
+ # correctly. If the resource stops asynchronously, the agent may
+ # spin on the monitor function here -- if the resource does not
+ # shut down within the defined timeout, the cluster manager will
+ # consider the stop action failed
+ while glusterd_monitor; do
+ ocf_log debug "Resource has not stopped yet, waiting"
+ sleep 1
+ done
+
+ # only return $OCF_SUCCESS if _everything_ succeeded as expected
+ return $OCF_SUCCESS
+
+}
+
+glusterd_monitor() {
+ local pid
+
+ [ -e $OCF_RESKEY_pid ] || return $OCF_NOT_RUNNING
+
+ pid=`cat $OCF_RESKEY_pid`
+ ocf_run kill -s 0 $pid || return $OCF_NOT_RUNNING
+
+ ocf_log debug "$OCF_RESKEY_binary running with PID $pid"
+ return $OCF_SUCCESS
+}
+
+glusterd_validate_all() {
+ # Test for required binaries
+ check_binary $OCF_RESKEY_binary
+
+ return $OCF_SUCCESS
+}
+
+
+
+# Make sure meta-data and usage always succeed
+case $__OCF_ACTION in
+meta-data) glusterd_meta_data
+ exit $OCF_SUCCESS
+ ;;
+usage|help) glusterd_usage
+ exit $OCF_SUCCESS
+ ;;
+esac
+
+# Anything other than meta-data and usage must pass validation
+glusterd_validate_all || exit $?
+
+# Translate each action into the appropriate function call
+case $__OCF_ACTION in
+start) glusterd_start;;
+stop) glusterd_stop;;
+status|monitor) glusterd_monitor;;
+reload) ocf_log info "Reloading..."
+ glusterd_start
+ ;;
+validate-all) ;;
+notify) exit $OCF_SUCCESS;;
+*) glusterd_usage
+ exit $OCF_ERR_UNIMPLEMENTED
+ ;;
+esac
+rc=$?
+
+# The resource agent may optionally log a debug message
+ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION returned $rc"
+exit $rc
diff --git a/extras/ocf/volume.in b/extras/ocf/volume.in
new file mode 100755
index 000000000..72fd1213a
--- /dev/null
+++ b/extras/ocf/volume.in
@@ -0,0 +1,246 @@
+#!/bin/sh
+#
+# glusterd
+#
+# Description: Manages a glusterd server as a (typically cloned)
+# HA resource
+#
+# Authors: Florian Haas (hastexo Professional Services GmbH)
+#
+# License: GNU General Public License (GPL)
+
+#######################################################################
+# Initialization:
+
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+
+# Convenience variables
+# When sysconfdir and localstatedir aren't passed in as
+# configure flags, they're defined in terms of prefix
+prefix=@prefix@
+SHORTHOSTNAME=`hostname -s`
+#######################################################################
+
+OCF_RESKEY_binary_default="gluster"
+
+: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
+
+volume_meta_data() {
+ cat <<EOF
+<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="volume" version="0.1">
+ <version>0.1</version>
+ <longdesc lang="en">
+Manages a GlusterFS volume and monitors its bricks. When a resource of
+this type is configured as a clone (as is commonly the case), then it
+must have clone ordering enabled.
+ </longdesc>
+ <shortdesc lang="en">Manages a GlusterFS volume</shortdesc>
+ <parameters>
+ <parameter name="volname" required="1">
+ <longdesc lang="en">
+ The name of the volume to manage.
+ </longdesc>
+ <shortdesc lang="en">volume name</shortdesc>
+ <content type="string"/>
+ </parameter>
+ <parameter name="binary">
+ <longdesc lang="en">
+ Name of the gluster executable. Specify a full absolute
+ path if the binary is not in your \$PATH.
+ </longdesc>
+ <shortdesc lang="en">gluster executable</shortdesc>
+ <content type="string" default="$OCF_RESKEY_binary_default"/>
+ </parameter>
+ </parameters>
+ <actions>
+ <action name="start" timeout="20" />
+ <action name="stop" timeout="20" />
+ <action name="monitor" timeout="20" interval="10" />
+ <action name="reload" timeout="20" />
+ <action name="meta-data" timeout="5" />
+ <action name="validate-all" timeout="20" />
+ </actions>
+</resource-agent>
+EOF
+
+}
+
+volume_getdir() {
+ local voldir
+ voldir="@sysconfdir@/glusterd/vols/${OCF_RESKEY_volname}"
+
+ [ -d ${voldir} ] || return 1
+
+ echo "${voldir}"
+ return 0
+}
+
+volume_getbricks() {
+ local infofile
+ local voldir
+ voldir=`volume_getdir`
+ infofile="${voldir}/info"
+
+ [ -e ${infofile} ] || return 1
+
+ echo "`sed -n -e "s/^brick-.\+=${SHORTHOSTNAME}://p" < ${infofile}`"
+ return 0
+}
+
+volume_getpids() {
+ local bricks
+ local piddir
+ local pidfile
+ local infofile
+ local voldir
+
+ voldir=`volume_getdir`
+ bricks=`volume_getbricks`
+ piddir="${voldir}/run"
+
+ for brick in ${bricks}; do
+ pidfile="${piddir}/${SHORTHOSTNAME}${brick}.pid"
+ [ -e $pidfile ] || return 1
+ cat $pidfile
+ done
+
+ return 0
+}
+
+volume_start() {
+ local volume_options
+
+ # exit immediately if configuration is not valid
+ volume_validate_all || exit $?
+
+ # if resource is already running, bail out early
+ if volume_monitor; then
+ ocf_log info "Resource is already running"
+ return $OCF_SUCCESS
+ fi
+
+ # actually start up the resource here
+ ocf_run "$OCF_RESKEY_binary" \
+ volume start "$OCF_RESKEY_volname" force || exit $OCF_ERR_GENERIC
+
+ # After the resource has been started, check whether it started up
+ # correctly. If the resource starts asynchronously, the agent may
+ # spin on the monitor function here -- if the resource does not
+ # start up within the defined timeout, the cluster manager will
+ # consider the start action failed
+ while ! volume_monitor; do
+ ocf_log debug "Resource has not started yet, waiting"
+ sleep 1
+ done
+
+ # only return $OCF_SUCCESS if _everything_ succeeded as expected
+ return $OCF_SUCCESS
+}
+
+volume_stop() {
+ local rc
+ local pid
+
+ # exit immediately if configuration is not valid
+ volume_validate_all || exit $?
+
+ volume_monitor
+ rc=$?
+ case "$rc" in
+ "$OCF_SUCCESS")
+ # Currently running. Normal, expected behavior.
+ ocf_log debug "Resource is currently running"
+ ;;
+ "$OCF_NOT_RUNNING")
+ # Currently not running. Nothing to do.
+ ocf_log info "Resource is already stopped"
+ return $OCF_SUCCESS
+ ;;
+ esac
+
+ # actually shut down the resource here (make sure to immediately
+ # exit with an $OCF_ERR_ error code if anything goes seriously
+ # wrong)
+ pids=`volume_getpids`
+ for pid in $pids; do
+ ocf_run kill -s TERM $pid
+ done
+
+ # After the resource has been stopped, check whether it shut down
+ # correctly. If the resource stops asynchronously, the agent may
+ # spin on the monitor function here -- if the resource does not
+ # shut down within the defined timeout, the cluster manager will
+ # consider the stop action failed
+ while volume_monitor; do
+ ocf_log debug "Resource has not stopped yet, waiting"
+ sleep 1
+ done
+
+ # only return $OCF_SUCCESS if _everything_ succeeded as expected
+ return $OCF_SUCCESS
+
+}
+
+volume_monitor() {
+ local pid
+
+ pids=`volume_getpids` || return $OCF_NOT_RUNNING
+
+ for pid in $pids; do
+ ocf_run kill -s 0 $pid || return $OCF_NOT_RUNNING
+ done
+
+ ocf_log debug "Local bricks for volume ${OCF_RESKEY_volname} running with PIDs $pids"
+ return $OCF_SUCCESS
+}
+
+volume_validate_all() {
+ # Test for configuration errors first
+ if [ -z "${OCF_RESKEY_volname}" ]; then
+ ocf_log err 'Missing required parameter "volname"'
+ return $OCF_ERR_CONFIGURED
+ fi
+
+ # Test for required binaries
+ check_binary $OCF_RESKEY_binary
+
+ return $OCF_SUCCESS
+}
+
+
+
+# Make sure meta-data and usage always succeed
+case $__OCF_ACTION in
+meta-data) volume_meta_data
+ exit $OCF_SUCCESS
+ ;;
+usage|help) volume_usage
+ exit $OCF_SUCCESS
+ ;;
+esac
+
+# Anything other than meta-data and usage must pass validation
+volume_validate_all || exit $?
+
+# Translate each action into the appropriate function call
+case $__OCF_ACTION in
+start) volume_start;;
+stop) volume_stop;;
+status|monitor) volume_monitor;;
+reload) ocf_log info "Reloading..."
+ volume_start
+ ;;
+validate-all) ;;
+notify) exit $OCF_SUCCESS;;
+*) volume_usage
+ exit $OCF_ERR_UNIMPLEMENTED
+ ;;
+esac
+rc=$?
+
+# The resource agent may optionally log a debug message
+ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION returned $rc"
+exit $rc
diff --git a/extras/profiler/glusterfs-profiler b/extras/profiler/glusterfs-profiler
new file mode 100755
index 000000000..65d445864
--- /dev/null
+++ b/extras/profiler/glusterfs-profiler
@@ -0,0 +1,817 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
+
+# texttable - module for creating simple ASCII tables
+# Incorporated from texttable.py downloaded from
+# http://jefke.free.fr/stuff/python/texttable/texttable-0.7.0.tar.gz
+
+import sys
+import string
+
+try:
+ if sys.version >= '2.3':
+ import textwrap
+ elif sys.version >= '2.2':
+ from optparse import textwrap
+ else:
+ from optik import textwrap
+except ImportError:
+ sys.stderr.write("Can't import textwrap module!\n")
+ raise
+
+try:
+ True, False
+except NameError:
+ (True, False) = (1, 0)
+
+def len(iterable):
+ """Redefining len here so it will be able to work with non-ASCII characters
+ """
+ if not isinstance(iterable, str):
+ return iterable.__len__()
+
+ try:
+ return len(unicode(iterable, 'utf'))
+ except:
+ return iterable.__len__()
+
+class ArraySizeError(Exception):
+ """Exception raised when specified rows don't fit the required size
+ """
+
+ def __init__(self, msg):
+ self.msg = msg
+ Exception.__init__(self, msg, '')
+
+ def __str__(self):
+ return self.msg
+
+class Texttable:
+
+ BORDER = 1
+ HEADER = 1 << 1
+ HLINES = 1 << 2
+ VLINES = 1 << 3
+
+ def __init__(self, max_width=80):
+ """Constructor
+
+ - max_width is an integer, specifying the maximum width of the table
+ - if set to 0, size is unlimited, therefore cells won't be wrapped
+ """
+
+ if max_width <= 0:
+ max_width = False
+ self._max_width = max_width
+ self._deco = Texttable.VLINES | Texttable.HLINES | Texttable.BORDER | \
+ Texttable.HEADER
+ self.set_chars(['-', '|', '+', '='])
+ self.reset()
+
+ def reset(self):
+ """Reset the instance
+
+ - reset rows and header
+ """
+
+ self._hline_string = None
+ self._row_size = None
+ self._header = []
+ self._rows = []
+
+ def header(self, array):
+ """Specify the header of the table
+ """
+
+ self._check_row_size(array)
+ self._header = map(str, array)
+
+ def add_row(self, array):
+ """Add a row in the rows stack
+
+ - cells can contain newlines and tabs
+ """
+
+ self._check_row_size(array)
+ self._rows.append(map(str, array))
+
+ def add_rows(self, rows, header=True):
+ """Add several rows in the rows stack
+
+ - The 'rows' argument can be either an iterator returning arrays,
+ or a by-dimensional array
+ - 'header' specifies if the first row should be used as the header
+ of the table
+ """
+
+ # nb: don't use 'iter' on by-dimensional arrays, to get a
+ # usable code for python 2.1
+ if header:
+ if hasattr(rows, '__iter__') and hasattr(rows, 'next'):
+ self.header(rows.next())
+ else:
+ self.header(rows[0])
+ rows = rows[1:]
+ for row in rows:
+ self.add_row(row)
+
+ def set_chars(self, array):
+ """Set the characters used to draw lines between rows and columns
+
+ - the array should contain 4 fields:
+
+ [horizontal, vertical, corner, header]
+
+ - default is set to:
+
+ ['-', '|', '+', '=']
+ """
+
+ if len(array) != 4:
+ raise ArraySizeError, "array should contain 4 characters"
+ array = [ x[:1] for x in [ str(s) for s in array ] ]
+ (self._char_horiz, self._char_vert,
+ self._char_corner, self._char_header) = array
+
+ def set_deco(self, deco):
+ """Set the table decoration
+
+ - 'deco' can be a combinaison of:
+
+ Texttable.BORDER: Border around the table
+ Texttable.HEADER: Horizontal line below the header
+ Texttable.HLINES: Horizontal lines between rows
+ Texttable.VLINES: Vertical lines between columns
+
+ All of them are enabled by default
+
+ - example:
+
+ Texttable.BORDER | Texttable.HEADER
+ """
+
+ self._deco = deco
+
+ def set_cols_align(self, array):
+ """Set the desired columns alignment
+
+ - the elements of the array should be either "l", "c" or "r":
+
+ * "l": column flushed left
+ * "c": column centered
+ * "r": column flushed right
+ """
+
+ self._check_row_size(array)
+ self._align = array
+
+ def set_cols_valign(self, array):
+ """Set the desired columns vertical alignment
+
+ - the elements of the array should be either "t", "m" or "b":
+
+ * "t": column aligned on the top of the cell
+ * "m": column aligned on the middle of the cell
+ * "b": column aligned on the bottom of the cell
+ """
+
+ self._check_row_size(array)
+ self._valign = array
+
+ def set_cols_width(self, array):
+ """Set the desired columns width
+
+ - the elements of the array should be integers, specifying the
+ width of each column. For example:
+
+ [10, 20, 5]
+ """
+
+ self._check_row_size(array)
+ try:
+ array = map(int, array)
+ if reduce(min, array) <= 0:
+ raise ValueError
+ except ValueError:
+ sys.stderr.write("Wrong argument in column width specification\n")
+ raise
+ self._width = array
+
+ def draw(self):
+ """Draw the table
+
+ - the table is returned as a whole string
+ """
+
+ if not self._header and not self._rows:
+ return
+ self._compute_cols_width()
+ self._check_align()
+ out = ""
+ if self._has_border():
+ out += self._hline()
+ if self._header:
+ out += self._draw_line(self._header, isheader=True)
+ if self._has_header():
+ out += self._hline_header()
+ length = 0
+ for row in self._rows:
+ length += 1
+ out += self._draw_line(row)
+ if self._has_hlines() and length < len(self._rows):
+ out += self._hline()
+ if self._has_border():
+ out += self._hline()
+ return out[:-1]
+
+ def _check_row_size(self, array):
+ """Check that the specified array fits the previous rows size
+ """
+
+ if not self._row_size:
+ self._row_size = len(array)
+ elif self._row_size != len(array):
+ raise ArraySizeError, "array should contain %d elements" \
+ % self._row_size
+
+ def _has_vlines(self):
+ """Return a boolean, if vlines are required or not
+ """
+
+ return self._deco & Texttable.VLINES > 0
+
+ def _has_hlines(self):
+ """Return a boolean, if hlines are required or not
+ """
+
+ return self._deco & Texttable.HLINES > 0
+
+ def _has_border(self):
+ """Return a boolean, if border is required or not
+ """
+
+ return self._deco & Texttable.BORDER > 0
+
+ def _has_header(self):
+ """Return a boolean, if header line is required or not
+ """
+
+ return self._deco & Texttable.HEADER > 0
+
+ def _hline_header(self):
+ """Print header's horizontal line
+ """
+
+ return self._build_hline(True)
+
+ def _hline(self):
+ """Print an horizontal line
+ """
+
+ if not self._hline_string:
+ self._hline_string = self._build_hline()
+ return self._hline_string
+
+ def _build_hline(self, is_header=False):
+ """Return a string used to separated rows or separate header from
+ rows
+ """
+ horiz = self._char_horiz
+ if (is_header):
+ horiz = self._char_header
+ # compute cell separator
+ s = "%s%s%s" % (horiz, [horiz, self._char_corner][self._has_vlines()],
+ horiz)
+ # build the line
+ l = string.join([horiz*n for n in self._width], s)
+ # add border if needed
+ if self._has_border():
+ l = "%s%s%s%s%s\n" % (self._char_corner, horiz, l, horiz,
+ self._char_corner)
+ else:
+ l += "\n"
+ return l
+
+ def _len_cell(self, cell):
+ """Return the width of the cell
+
+ Special characters are taken into account to return the width of the
+ cell, such like newlines and tabs
+ """
+
+ cell_lines = cell.split('\n')
+ maxi = 0
+ for line in cell_lines:
+ length = 0
+ parts = line.split('\t')
+ for part, i in zip(parts, range(1, len(parts) + 1)):
+ length = length + len(part)
+ if i < len(parts):
+ length = (length/8 + 1)*8
+ maxi = max(maxi, length)
+ return maxi
+
+ def _compute_cols_width(self):
+ """Return an array with the width of each column
+
+ If a specific width has been specified, exit. If the total of the
+ columns width exceed the table desired width, another width will be
+ computed to fit, and cells will be wrapped.
+ """
+
+ if hasattr(self, "_width"):
+ return
+ maxi = []
+ if self._header:
+ maxi = [ self._len_cell(x) for x in self._header ]
+ for row in self._rows:
+ for cell,i in zip(row, range(len(row))):
+ try:
+ maxi[i] = max(maxi[i], self._len_cell(cell))
+ except (TypeError, IndexError):
+ maxi.append(self._len_cell(cell))
+ items = len(maxi)
+ length = reduce(lambda x,y: x+y, maxi)
+ if self._max_width and length + items*3 + 1 > self._max_width:
+ maxi = [(self._max_width - items*3 -1) / items \
+ for n in range(items)]
+ self._width = maxi
+
+ def _check_align(self):
+ """Check if alignment has been specified, set default one if not
+ """
+
+ if not hasattr(self, "_align"):
+ self._align = ["l"]*self._row_size
+ if not hasattr(self, "_valign"):
+ self._valign = ["t"]*self._row_size
+
+ def _draw_line(self, line, isheader=False):
+ """Draw a line
+
+ Loop over a single cell length, over all the cells
+ """
+
+ line = self._splitit(line, isheader)
+ space = " "
+ out = ""
+ for i in range(len(line[0])):
+ if self._has_border():
+ out += "%s " % self._char_vert
+ length = 0
+ for cell, width, align in zip(line, self._width, self._align):
+ length += 1
+ cell_line = cell[i]
+ fill = width - len(cell_line)
+ if isheader:
+ align = "c"
+ if align == "r":
+ out += "%s " % (fill * space + cell_line)
+ elif align == "c":
+ out += "%s " % (fill/2 * space + cell_line \
+ + (fill/2 + fill%2) * space)
+ else:
+ out += "%s " % (cell_line + fill * space)
+ if length < len(line):
+ out += "%s " % [space, self._char_vert][self._has_vlines()]
+ out += "%s\n" % ['', self._char_vert][self._has_border()]
+ return out
+
+ def _splitit(self, line, isheader):
+ """Split each element of line to fit the column width
+
+ Each element is turned into a list, result of the wrapping of the
+ string to the desired width
+ """
+
+ line_wrapped = []
+ for cell, width in zip(line, self._width):
+ array = []
+ for c in cell.split('\n'):
+ array.extend(textwrap.wrap(unicode(c, 'utf'), width))
+ line_wrapped.append(array)
+ max_cell_lines = reduce(max, map(len, line_wrapped))
+ for cell, valign in zip(line_wrapped, self._valign):
+ if isheader:
+ valign = "t"
+ if valign == "m":
+ missing = max_cell_lines - len(cell)
+ cell[:0] = [""] * (missing / 2)
+ cell.extend([""] * (missing / 2 + missing % 2))
+ elif valign == "b":
+ cell[:0] = [""] * (max_cell_lines - len(cell))
+ else:
+ cell.extend([""] * (max_cell_lines - len(cell)))
+ return line_wrapped
+
+
+# Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+# This file is part of GlusterFS.
+
+# GlusterFS is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published
+# by the Free Software Foundation; either version 3 of the License,
+# or (at your option) any later version.
+
+# GlusterFS is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see
+# <http://www.gnu.org/licenses/>.
+
+graph_available = True
+
+try:
+ import numpy as np
+ import matplotlib.pyplot as plt
+except ImportError:
+ graph_available = False
+
+import re
+import sys
+
+from optparse import OptionParser
+
+# Global dict-of-dict holding the latency data
+# latency[xlator-name][op-name]
+
+latencies = {}
+counts = {}
+totals = {}
+
+def collect_data (f):
+ """Collect latency data from the file object f and store it in
+ the global variable @latencies"""
+
+ # example dump file line:
+ # fuse.latency.TRUNCATE=3147.000,4
+
+ for line in f:
+ m = re.search ("(\w+)\.\w+.(\w+)=(\w+\.\w+),(\w+),(\w+.\w+)", line)
+ if m and float(m.group(3)) != 0:
+ xlator = m.group(1)
+ op = m.group(2)
+ time = m.group(3)
+ count = m.group(4)
+ total = m.group(5)
+
+ if not xlator in latencies.keys():
+ latencies[xlator] = dict()
+
+ if not xlator in counts.keys():
+ counts[xlator] = dict()
+
+ if not xlator in totals.keys():
+ totals[xlator] = dict()
+
+ latencies[xlator][op] = time
+ counts[xlator][op] = count
+ totals[xlator][op] = total
+
+
+def calc_latency_heights (xlator_order):
+ heights = map (lambda x: [], xlator_order)
+
+ N = len (xlator_order)
+ for i in range (N):
+ xl = xlator_order[i]
+
+ k = latencies[xl].keys()
+ k.sort()
+
+ if i == len (xlator_order) - 1:
+ # bottom-most xlator
+ heights[i] = [float (latencies[xl][key]) for key in k]
+
+ else:
+ next_xl = xlator_order[i+1]
+ this_xl_time = [latencies[xl][key] for key in k]
+ next_xl_time = [latencies[next_xl][key] for key in k]
+
+ heights[i] = map (lambda x, y: float (x) - float (y),
+ this_xl_time, next_xl_time)
+ return heights
+
+# have sufficient number of colors
+colors = ["violet", "blue", "green", "yellow", "orange", "red"]
+
+def latency_profile (title, xlator_order, mode):
+ heights = calc_latency_heights (xlator_order)
+
+ N = len (latencies[xlator_order[0]].keys())
+ Nxl = len (xlator_order)
+ ind = np.arange (N)
+ width = 0.35
+
+ pieces = map (lambda x: [], xlator_order)
+ bottoms = map (lambda x: [], xlator_order)
+
+ bottoms[Nxl-1] = map (lambda x: 0, latencies[xlator_order[0]].keys())
+
+ k = latencies[xlator_order[0]].keys()
+ k.sort()
+
+ for i in range (Nxl-1):
+ xl = xlator_order[i+1]
+ bottoms[i] = [float(latencies[xl][key]) for key in k]
+
+ if mode == 'text':
+ print "\n%sLatency profile for %s\n" % (' '*20, title)
+ print "Average latency (microseconds):\n"
+
+ table = Texttable()
+
+ table.set_cols_align(["l", "r"] + ["r"] * len(xlator_order))
+ rows = []
+
+ header = ['OP', 'OP Average (us)'] + xlator_order
+ rows = []
+
+ for op in k:
+ sum = reduce (lambda x, y: x + y, [heights[xlator_order.index(xl)][k.index(op)] for xl in xlator_order],
+ 0)
+
+ row = [op]
+ row += ["%5.2f" % sum]
+
+ for xl in xlator_order:
+ op_index = k.index(op)
+ row += ["%5.2f" % (heights[xlator_order.index(xl)][op_index])]
+
+ rows.append(row)
+
+ def row_sort(r1, r2):
+ v1 = float(r1[1])
+ v2 = float(r2[1])
+
+ if v1 < v2:
+ return -1
+ elif v1 == v2:
+ return 0
+ else:
+ return 1
+
+ rows.sort(row_sort, reverse=True)
+ table.add_rows([header] + rows)
+ print table.draw()
+
+ elif mode == 'graph':
+ for i in range(Nxl):
+ pieces[i] = plt.bar (ind, heights[i], width, color=colors[i],
+ bottom=bottoms[i])
+
+ plt.ylabel ("Average Latency (microseconds)")
+ plt.title ("Latency Profile for '%s'" % title)
+ k = latencies[xlator_order[0]].keys()
+ k.sort ()
+ plt.xticks (ind+width/2., k)
+
+ m = round (max(map (float, latencies[xlator_order[0]].values())), -2)
+ plt.yticks (np.arange(0, m + m*0.1, m/10))
+ plt.legend (map (lambda p: p[0], pieces), xlator_order)
+
+ plt.show ()
+ else:
+ print "Unknown mode specified!"
+ sys.exit(1)
+
+
+def fop_distribution (title, xlator_order, mode):
+ plt.ylabel ("Percentage of calls")
+ plt.title ("FOP distribution for '%s'" % title)
+ k = counts[xlator_order[0]].keys()
+ k.sort ()
+
+ N = len (latencies[xlator_order[0]].keys())
+ ind = np.arange(N)
+ width = 0.35
+
+ total = 0
+ top_xl = xlator_order[0]
+ for op in k:
+ total += int(counts[top_xl][op])
+
+ heights = []
+
+ for op in k:
+ heights.append (float(counts[top_xl][op])/total * 100)
+
+ if mode == 'text':
+ print "\n%sFOP distribution for %s\n" % (' '*20, title)
+ print "Total number of calls: %d\n" % total
+
+ table = Texttable()
+
+ table.set_cols_align(["l", "r", "r"])
+
+ rows = []
+ header = ["OP", "% of Calls", "Count"]
+
+ for op in k:
+ row = [op, "%5.2f" % (float(counts[top_xl][op])/total * 100), counts[top_xl][op]]
+ rows.append(row)
+
+ def row_sort(r1, r2):
+ v1 = float(r1[1])
+ v2 = float(r2[1])
+
+ if v1 < v2:
+ return -1
+ elif v1 == v2:
+ return 0
+ else:
+ return 1
+
+ rows.sort(row_sort, reverse=True)
+ table.add_rows([header] + rows)
+ print table.draw()
+
+ elif mode == 'graph':
+ bars = plt.bar (ind, heights, width, color="red")
+
+ for bar in bars:
+ height = bar.get_height()
+ plt.text (bar.get_x()+bar.get_width()/2., 1.05*height,
+ "%d%%" % int(height))
+
+ plt.xticks(ind+width/2., k)
+ plt.yticks(np.arange (0, 110, 10))
+
+ plt.show()
+ else:
+ print "mode not specified!"
+ sys.exit(1)
+
+
+def calc_workload_heights (xlator_order, scaling):
+ workload_heights = map (lambda x: [], xlator_order)
+
+ top_xl = xlator_order[0]
+
+ N = len (xlator_order)
+ for i in range (N):
+ xl = xlator_order[i]
+
+ k = totals[xl].keys()
+ k.sort()
+
+ if i == len (xlator_order) - 1:
+ # bottom-most xlator
+ workload_heights[i] = [float (totals[xl][key]) / float(totals[top_xl][key]) * scaling[k.index(key)] for key in k]
+
+ else:
+ next_xl = xlator_order[i+1]
+ this_xl_time = [float(totals[xl][key]) / float(totals[top_xl][key]) * scaling[k.index(key)] for key in k]
+ next_xl_time = [float(totals[next_xl][key]) / float(totals[top_xl][key]) * scaling[k.index(key)] for key in k]
+
+ workload_heights[i] = map (lambda x, y: (float (x) - float (y)),
+ this_xl_time, next_xl_time)
+
+ return workload_heights
+
+def workload_profile(title, xlator_order, mode):
+ plt.ylabel ("Percentage of Total Time")
+ plt.title ("Workload Profile for '%s'" % title)
+ k = totals[xlator_order[0]].keys()
+ k.sort ()
+
+ N = len(totals[xlator_order[0]].keys())
+ Nxl = len(xlator_order)
+ ind = np.arange(N)
+ width = 0.35
+
+ total = 0
+ top_xl = xlator_order[0]
+ for op in k:
+ total += float(totals[top_xl][op])
+
+ p_heights = []
+
+ for op in k:
+ p_heights.append (float(totals[top_xl][op])/total * 100)
+
+ heights = calc_workload_heights (xlator_order, p_heights)
+
+ pieces = map (lambda x: [], xlator_order)
+ bottoms = map (lambda x: [], xlator_order)
+
+ bottoms[Nxl-1] = map (lambda x: 0, totals[xlator_order[0]].keys())
+
+ for i in range (Nxl-1):
+ xl = xlator_order[i+1]
+ k = totals[xl].keys()
+ k.sort()
+
+ bottoms[i] = [float(totals[xl][key]) / float(totals[top_xl][key]) * p_heights[k.index(key)] for key in k]
+
+ if mode == 'text':
+ print "\n%sWorkload profile for %s\n" % (' '*20, title)
+ print "Total Time: %d microseconds = %.1f seconds = %.1f minutes\n" % (total, total / 1000000.0, total / 6000000.0)
+
+ table = Texttable()
+ table.set_cols_align(["l", "r"] + ["r"] * len(xlator_order))
+ rows = []
+
+ header = ['OP', 'OP Total (%)'] + xlator_order
+ rows = []
+
+ for op in k:
+ sum = reduce (lambda x, y: x + y, [heights[xlator_order.index(xl)][k.index(op)] for xl in xlator_order],
+ 0)
+ row = [op]
+ row += ["%5.2f" % sum]
+
+ for xl in xlator_order:
+ op_index = k.index(op)
+ row += ["%5.2f" % heights[xlator_order.index(xl)][op_index]]
+
+ rows.append(row)
+
+ def row_sort(r1, r2):
+ v1 = float(r1[1])
+ v2 = float(r2[1])
+
+ if v1 < v2:
+ return -1
+ elif v1 == v2:
+ return 0
+ else:
+ return 1
+
+ rows.sort(row_sort, reverse=True)
+ table.add_rows([header] + rows)
+ print table.draw()
+
+ elif mode == 'graph':
+ for i in range(Nxl):
+ pieces[i] = plt.bar (ind, heights[i], width, color=colors[i],
+ bottom=bottoms[i])
+
+ for key in k:
+ bar = pieces[Nxl-1][k.index(key)]
+ plt.text (bar.get_x() + bar.get_width()/2., 1.05*p_heights[k.index(key)],
+ "%d%%" % int(p_heights[k.index(key)]))
+
+ plt.xticks(ind+width/2., k)
+ plt.yticks(np.arange (0, 110, 10))
+ plt.legend (map (lambda p: p[0], pieces), xlator_order)
+
+ plt.show()
+ else:
+ print "Unknown mode specified!"
+ sys.exit(1)
+
+
+def main ():
+ parser = OptionParser(usage="usage: %prog [-l | -d | -w] -x <xlator order> <state dump file>")
+ parser.add_option("-l", "--latency", dest="latency", action="store_true",
+ help="Produce latency profile")
+ parser.add_option("-d", "--distribution", dest="distribution", action="store_true",
+ help="Produce distribution of FOPs")
+ parser.add_option("-w", "--workload", dest="workload", action="store_true",
+ help="Produce workload profile")
+ parser.add_option("-t", "--title", dest="title", help="Set the title of the graph")
+ parser.add_option("-x", "--xlator-order", dest="xlator_order", help="Specify the order of xlators")
+ parser.add_option("-m", "--mode", dest="mode", help="Output format, can be text[default] or graph")
+
+ (options, args) = parser.parse_args()
+
+ if len(args) != 1:
+ parser.error("Incorrect number of arguments")
+
+ if (options.xlator_order):
+ xlator_order = options.xlator_order.split()
+ else:
+ print "xlator order must be specified"
+ sys.exit(1)
+
+ collect_data(file (args[0], 'r'))
+
+ mode = 'text'
+ if (options.mode):
+ mode = options.mode
+ if options.mode == 'graph' and graph_available == False:
+ print "matplotlib not available, falling back to text mode"
+ mode = 'text'
+
+ if (options.latency):
+ latency_profile (options.title, xlator_order, mode)
+
+ if (options.distribution):
+ fop_distribution(options.title, xlator_order, mode)
+
+ if (options.workload):
+ workload_profile(options.title, xlator_order, mode)
+
+main ()
diff --git a/extras/prot_filter.py b/extras/prot_filter.py
new file mode 100755
index 000000000..7dccacf15
--- /dev/null
+++ b/extras/prot_filter.py
@@ -0,0 +1,144 @@
+#!/usr/bin/python
+
+"""
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+"""
+
+"""
+ INSTRUCTIONS
+ Put this in /usr/lib64/glusterfs/$version/filter to have it run automatically,
+ or else you'll have to run it by hand every time you change the volume
+ configuration. Give it a list of volume names on which to enable the
+ protection functionality; it will deliberately ignore client volfiles for
+ other volumes, and all server volfiles. It *will* include internal client
+ volfiles such as those used for NFS or rebalance/self-heal; this is a
+ deliberate choice so that it will catch deletions from those sources as well.
+"""
+
+volume_list = [ "jdtest" ]
+
+import copy
+import string
+import sys
+import types
+
+class Translator:
+ def __init__ (self, name):
+ self.name = name
+ self.xl_type = ""
+ self.opts = {}
+ self.subvols = []
+ self.dumped = False
+ def __repr__ (self):
+ return "<Translator %s>" % self.name
+
+def load (path):
+ # If it's a string, open it; otherwise, assume it's already a
+ # file-like object (most notably from urllib*).
+ if type(path) in types.StringTypes:
+ fp = file(path,"r")
+ else:
+ fp = path
+ all_xlators = {}
+ xlator = None
+ last_xlator = None
+ while True:
+ text = fp.readline()
+ if text == "":
+ break
+ text = text.split()
+ if not len(text):
+ continue
+ if text[0] == "volume":
+ if xlator:
+ raise RuntimeError, "nested volume definition"
+ xlator = Translator(text[1])
+ continue
+ if not xlator:
+ raise RuntimeError, "text outside volume definition"
+ if text[0] == "type":
+ xlator.xl_type = text[1]
+ continue
+ if text[0] == "option":
+ xlator.opts[text[1]] = string.join(text[2:])
+ continue
+ if text[0] == "subvolumes":
+ for sv in text[1:]:
+ xlator.subvols.append(all_xlators[sv])
+ continue
+ if text[0] == "end-volume":
+ all_xlators[xlator.name] = xlator
+ last_xlator = xlator
+ xlator = None
+ continue
+ raise RuntimeError, "unrecognized keyword %s" % text[0]
+ if xlator:
+ raise RuntimeError, "unclosed volume definition"
+ return all_xlators, last_xlator
+
+def generate (graph, last, stream=sys.stdout):
+ for sv in last.subvols:
+ if not sv.dumped:
+ generate(graph,sv,stream)
+ print >> stream, ""
+ sv.dumped = True
+ print >> stream, "volume %s" % last.name
+ print >> stream, " type %s" % last.xl_type
+ for k, v in last.opts.iteritems():
+ print >> stream, " option %s %s" % (k, v)
+ if last.subvols:
+ print >> stream, " subvolumes %s" % string.join(
+ [ sv.name for sv in last.subvols ])
+ print >> stream, "end-volume"
+
+def push_filter (graph, old_xl, filt_type, opts={}):
+ new_type = "-" + filt_type.split("/")[1]
+ old_type = "-" + old_xl.xl_type.split("/")[1]
+ pos = old_xl.name.find(old_type)
+ if pos >= 0:
+ new_name = old_xl.name
+ old_name = new_name[:pos] + new_type + new_name[len(old_type)+pos:]
+ else:
+ new_name = old_xl.name + old_type
+ old_name = old_xl.name + new_type
+ new_xl = Translator(new_name)
+ new_xl.xl_type = old_xl.xl_type
+ new_xl.opts = old_xl.opts
+ new_xl.subvols = old_xl.subvols
+ graph[new_xl.name] = new_xl
+ old_xl.name = old_name
+ old_xl.xl_type = filt_type
+ old_xl.opts = opts
+ old_xl.subvols = [new_xl]
+ graph[old_xl.name] = old_xl
+
+if __name__ == "__main__":
+ path = sys.argv[1]
+ # Alow an override for debugging.
+ for extra in sys.argv[2:]:
+ volume_list.append(extra)
+ graph, last = load(path)
+ for v in volume_list:
+ if graph.has_key(v):
+ break
+ else:
+ print "No configured volumes found - aborting."
+ sys.exit(0)
+ for v in graph.values():
+ if v.xl_type == "cluster/distribute":
+ push_filter(graph,v,"features/prot_dht")
+ elif v.xl_type == "protocol/client":
+ push_filter(graph,v,"features/prot_client")
+ # We push debug/trace so that every fop gets a real frame, because DHT
+ # gets confused if STACK_WIND_TAIL causes certain fops to be invoked
+ # from anything other than a direct child.
+ for v in graph.values():
+ if v.xl_type == "features/prot_client":
+ push_filter(graph,v,"debug/trace")
+ generate(graph,last,stream=open(path,"w"))
diff --git a/extras/quota-metadata-cleanup.sh b/extras/quota-metadata-cleanup.sh
new file mode 100755
index 000000000..37ad8bc31
--- /dev/null
+++ b/extras/quota-metadata-cleanup.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+# This script is used to cleanup xattrs setup by quota-translator in (a)
+# backend directory(ies). It takes a single or list of backend directories
+# as argument(s).
+
+usage ()
+{
+ echo >&2 "usage: $0 <list-of-backend-directories>"
+}
+
+main ()
+{
+ [ $# -lt 1 ] && usage
+
+ INSTALL_DIR=`dirname $0`
+
+ for i in $@; do
+ find $i -exec $INSTALL_DIR/quota-remove-xattr.sh '{}' \;
+ done
+
+}
+
+main $@
diff --git a/extras/quota-remove-xattr.sh b/extras/quota-remove-xattr.sh
new file mode 100755
index 000000000..7191f9bd4
--- /dev/null
+++ b/extras/quota-remove-xattr.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+# This script is used to remove xattrs set by quota translator on a path.
+# It is generally invoked from quota-metadata-cleanup.sh, but can
+# also be used stand-alone.
+
+usage ()
+{
+ echo >&2 "usage: $0 <path>"
+}
+
+main ()
+{
+ [ $# -ne 1 ] && usage $0
+
+ XATTR_KEY_VALUE_PAIRS=`getfattr -h -d -m 'trusted.glusterfs.quota' $1 2>/dev/null | sed -e '/^# file/d'`
+
+ for i in $XATTR_KEY_VALUE_PAIRS; do
+ XATTR_KEY=`echo $i | sed -e 's/\([^=]*\).*/\1/g'`
+ setfattr -h -x $XATTR_KEY $1
+ done
+}
+
+main $@
diff --git a/extras/rebalance.py b/extras/rebalance.py
new file mode 100755
index 000000000..80c614c5d
--- /dev/null
+++ b/extras/rebalance.py
@@ -0,0 +1,299 @@
+#!/usr/bin/python
+
+import atexit
+import copy
+import optparse
+import os
+import pipes
+import shutil
+import string
+import subprocess
+import sys
+import tempfile
+import volfilter
+
+# It's just more convenient to have named fields.
+class Brick:
+ def __init__ (self, path, name):
+ self.path = path
+ self.sv_name = name
+ self.size = 0
+ self.curr_size = 0
+ self.good_size = 0
+ def set_size (self, size):
+ self.size = size
+ def set_range (self, rs, re):
+ self.r_start = rs
+ self.r_end = re
+ self.curr_size = self.r_end - self.r_start + 1
+ def __repr__ (self):
+ value = self.path[:]
+ value += "(%d," % self.size
+ if self.curr_size:
+ value += "0x%x,0x%x)" % (self.r_start, self.r_end)
+ else:
+ value += "-)"
+ return value
+
+def get_bricks (host, vol):
+ t = pipes.Template()
+ t.prepend("gluster --remote-host=%s system getspec %s"%(host,vol),".-")
+ return t.open(None,"r")
+
+def generate_stanza (vf, all_xlators, cur_subvol):
+ sv_list = []
+ for sv in cur_subvol.subvols:
+ generate_stanza(vf,all_xlators,sv)
+ sv_list.append(sv.name)
+ vf.write("volume %s\n"%cur_subvol.name)
+ vf.write(" type %s\n"%cur_subvol.type)
+ for kvpair in cur_subvol.opts.iteritems():
+ vf.write(" option %s %s\n"%kvpair)
+ if sv_list:
+ vf.write(" subvolumes %s\n"%string.join(sv_list))
+ vf.write("end-volume\n\n")
+
+
+def mount_brick (localpath, all_xlators, dht_subvol):
+
+ # Generate a volfile.
+ vf_name = localpath + ".vol"
+ vf = open(vf_name,"w")
+ generate_stanza(vf,all_xlators,dht_subvol)
+ vf.flush()
+ vf.close()
+
+ # Create a brick directory and mount the brick there.
+ os.mkdir(localpath)
+ subprocess.call(["glusterfs","-f",vf_name,localpath])
+
+# We use the command-line tools because there's no getxattr support in the
+# Python standard library (which is ridiculous IMO). Adding the xattr package
+# from PyPI would create a new and difficult dependency because the bits to
+# satisfy it don't seem to exist in Fedora. We already expect the command-line
+# tools to be there, so it's safer just to rely on them.
+#
+# We might have to revisit this if we get as far as actually issuing millions
+# of setxattr requests. Even then, it might be better to do that part with a C
+# program which has only a build-time dependency.
+def get_range (brick):
+ t = pipes.Template()
+ cmd = "getfattr -e hex -n trusted.glusterfs.dht %s 2> /dev/null"
+ t.prepend(cmd%brick,".-")
+ t.append("grep ^trusted.glusterfs.dht=","--")
+ f = t.open(None,"r")
+ try:
+ value = f.readline().rstrip().split('=')[1][2:]
+ except:
+ print "could not get layout for %s (might be OK)" % brick
+ return None
+ v_start = int("0x"+value[16:24],16)
+ v_end = int("0x"+value[24:32],16)
+ return (v_start, v_end)
+
+def calc_sizes (bricks, total):
+ leftover = 1 << 32
+ for b in bricks:
+ if b.size:
+ b.good_size = (b.size << 32) / total
+ leftover -= b.good_size
+ else:
+ b.good_size = 0
+ if leftover:
+ # Add the leftover to an old brick if we can.
+ for b in bricks:
+ if b.good_size:
+ b.good_size += leftover
+ break
+ else:
+ # Fine, just add it wherever.
+ bricks[0].good_size += leftover
+
+# Normalization means sorting the bricks by r_start and (b) ensuring that there
+# are no gaps.
+def normalize (in_bricks):
+ out_bricks = []
+ curr_hash = 0
+ used = 0
+ while curr_hash < (1<<32):
+ curr_best = None
+ for b in in_bricks:
+ if b.r_start == curr_hash:
+ used += 1
+ out_bricks.append(b)
+ in_bricks.remove(b)
+ curr_hash = b.r_end + 1
+ break
+ else:
+ print "gap found at 0x%08x" % curr_hash
+ sys.exit(1)
+ return out_bricks + in_bricks, used
+
+def get_score (bricks):
+ score = 0
+ curr_hash = 0
+ for b in bricks:
+ if not b.curr_size:
+ curr_hash += b.good_size
+ continue
+ new_start = curr_hash
+ curr_hash += b.good_size
+ new_end = curr_hash - 1
+ if new_start > b.r_start:
+ max_start = new_start
+ else:
+ max_start = b.r_start
+ if new_end < b.r_end:
+ min_end = new_end
+ else:
+ min_end = b.r_end
+ if max_start <= min_end:
+ score += (min_end - max_start + 1)
+ return score
+
+if __name__ == "__main__":
+
+ my_usage = "%prog [options] server volume [directory]"
+ parser = optparse.OptionParser(usage=my_usage)
+ parser.add_option("-f", "--free-space", dest="free_space",
+ default=False, action="store_true",
+ help="use free space instead of total space")
+ parser.add_option("-l", "--leave-mounted", dest="leave_mounted",
+ default=False, action="store_true",
+ help="leave subvolumes mounted")
+ parser.add_option("-v", "--verbose", dest="verbose",
+ default=False, action="store_true",
+ help="verbose output")
+ options, args = parser.parse_args()
+
+ if len(args) == 3:
+ fix_dir = args[2]
+ else:
+ if len(args) != 2:
+ parser.print_help()
+ sys.exit(1)
+ fix_dir = None
+ hostname, volname = args[:2]
+
+ # Make sure stuff gets cleaned up, even if there are exceptions.
+ orig_dir = os.getcwd()
+ work_dir = tempfile.mkdtemp()
+ bricks = []
+ def cleanup_workdir ():
+ os.chdir(orig_dir)
+ if options.verbose:
+ print "Cleaning up %s" % work_dir
+ for b in bricks:
+ subprocess.call(["umount",b.path])
+ shutil.rmtree(work_dir)
+ if not options.leave_mounted:
+ atexit.register(cleanup_workdir)
+ os.chdir(work_dir)
+
+ # Mount each brick individually, so we can issue brick-specific calls.
+ if options.verbose:
+ print "Mounting subvolumes..."
+ index = 0
+ volfile_pipe = get_bricks(hostname,volname)
+ all_xlators, last_xlator = volfilter.load(volfile_pipe)
+ for dht_vol in all_xlators.itervalues():
+ if dht_vol.type == "cluster/distribute":
+ break
+ else:
+ print "no DHT volume found"
+ sys.exit(1)
+ for sv in dht_vol.subvols:
+ #print "found subvol %s" % sv.name
+ lpath = "%s/brick%s" % (work_dir, index)
+ index += 1
+ mount_brick(lpath,all_xlators,sv)
+ bricks.append(Brick(lpath,sv.name))
+ if index == 0:
+ print "no bricks"
+ sys.exit(1)
+
+ # Collect all of the sizes.
+ if options.verbose:
+ print "Collecting information..."
+ total = 0
+ for b in bricks:
+ info = os.statvfs(b.path)
+ # We want a standard unit even if different bricks use
+ # different block sizes. The size is chosen to avoid overflows
+ # for very large bricks with very small block sizes, but also
+ # accommodate filesystems which use very large block sizes to
+ # cheat on benchmarks.
+ blocksper100mb = 104857600 / info[0]
+ if options.free_space:
+ size = info[3] / blocksper100mb
+ else:
+ size = info[2] / blocksper100mb
+ if size <= 0:
+ print "brick %s has invalid size %d" % (b.path, size)
+ sys.exit(1)
+ b.set_size(size)
+ total += size
+
+ # Collect all of the layout information.
+ for b in bricks:
+ hash_range = get_range(b.path)
+ if hash_range is not None:
+ rs, re = hash_range
+ if rs > re:
+ print "%s has backwards hash range" % b.path
+ sys.exit(1)
+ b.set_range(hash_range[0],hash_range[1])
+
+ if options.verbose:
+ print "Calculating new layouts..."
+ calc_sizes(bricks,total)
+ bricks, used = normalize(bricks)
+
+ # We can't afford O(n!) here, but O(n^2) should be OK and the result
+ # should be almost as good.
+ while used < len(bricks):
+ best_place = used
+ best_score = get_score(bricks)
+ for i in xrange(used):
+ new_bricks = bricks[:]
+ del new_bricks[used]
+ new_bricks.insert(i,bricks[used])
+ new_score = get_score(new_bricks)
+ if new_score > best_score:
+ best_place = i
+ best_score = new_score
+ if best_place != used:
+ nb = bricks[used]
+ del bricks[used]
+ bricks.insert(best_place,nb)
+ used += 1
+
+ # Finalize whatever we decided on.
+ curr_hash = 0
+ for b in bricks:
+ b.r_start = curr_hash
+ curr_hash += b.good_size
+ b.r_end = curr_hash - 1
+
+ print "Here are the xattr values for your size-weighted layout:"
+ for b in bricks:
+ print " %s: 0x0000000200000000%08x%08x" % (
+ b.sv_name, b.r_start, b.r_end)
+
+ if fix_dir:
+ if options.verbose:
+ print "Fixing layout for %s" % fix_dir
+ for b in bricks:
+ value = "0x0000000200000000%08x%08x" % (
+ b.r_start, b.r_end)
+ path = "%s/%s" % (b.path, fix_dir)
+ cmd = "setfattr -n trusted.glusterfs.dht -v %s %s" % (
+ value, path)
+ print cmd
+
+ if options.leave_mounted:
+ print "The following subvolumes are still mounted:"
+ for b in bricks:
+ print "%s on %s" % (b.sv_name, b.path)
+ print "Don't forget to clean up when you're done."
+
diff --git a/extras/rpc-coverage.sh b/extras/rpc-coverage.sh
new file mode 100755
index 000000000..9148a73e2
--- /dev/null
+++ b/extras/rpc-coverage.sh
@@ -0,0 +1,480 @@
+#!/bin/bash
+
+# This script can be used to provoke 35 fops (if afr is used),
+# 28 fops (if afr is not used) (-fstat,-readdirp, and lk,xattrop calls)
+# Pending are 7 procedures.
+# getspec, fsyncdir, access, fentrylk, fsetxattr, fgetxattr, rchecksum
+# TODO: add commands which can generate fops for missing fops
+
+## Script tests below File Operations over RPC (when afr is used)
+
+# CREATE
+# ENTRYLK
+# FINODELK
+# FLUSH
+# FSTAT
+# FSYNC
+# FTRUNCATE
+# FXATTROP
+# GETXATTR
+# INODELK
+# LINK
+# LK
+# LOOKUP
+# MKDIR
+# MKNOD
+# OPEN
+# OPENDIR
+# READ
+# READDIR
+# READDIRP
+# READLINK
+# RELEASE
+# RELEASEDIR
+# REMOVEXATTR
+# RENAME
+# RMDIR
+# SETATTR
+# SETXATTR
+# STAT
+# STATFS
+# SYMLINK
+# TRUNCATE
+# UNLINK
+# WRITE
+# XATTROP
+
+#set -e;
+set -o pipefail;
+
+function fail() {
+ echo "$*: failed.";
+ exit 1;
+}
+
+function test_mkdir()
+{
+ mkdir -p $PFX/dir;
+ test $(stat -c '%F' $PFX/dir) == "directory" || fail "mkdir"
+}
+
+
+function test_create()
+{
+ : > $PFX/dir/file;
+
+ test "$(stat -c '%F' $PFX/dir/file)" == "regular empty file" || fail "create"
+}
+
+
+function test_statfs()
+{
+ local size;
+
+ size=$(stat -f -c '%s' $PFX/dir/file);
+ test "x$size" != "x0" || fail "statfs"
+}
+
+
+function test_open()
+{
+ exec 4<$PFX/dir/file || fail "open"
+ exec 4>&- || fail "open"
+}
+
+
+function test_write()
+{
+ dd if=/dev/zero of=$PFX/dir/file bs=65536 count=16 2>/dev/null;
+ test $(stat -c '%s' $PFX/dir/file) == 1048576 || fail "open"
+}
+
+
+function test_read()
+{
+ local count;
+
+ count=$(dd if=$PFX/dir/file bs=64k count=16 2>/dev/null | wc -c);
+ test $count == 1048576 || fail "read"
+}
+
+
+function test_truncate()
+{
+ truncate -s 512 $PFX/dir/file;
+ test $(stat -c '%s' $PFX/dir/file) == 512 || fail "truncate"
+
+ truncate -s 0 $PFX/dir/file;
+ test $(stat -c '%s' $PFX/dir/file) == 0 || fail "truncate"
+}
+
+
+function test_fstat()
+{
+ local msg;
+
+ export PFX;
+ msg=$(sh -c 'tail -f $PFX/dir/file --pid=$$ & sleep 1 && echo hooha > $PFX/dir/file && sleep 1');
+ test "x$msg" == "xhooha" || fail "fstat"
+}
+
+
+function test_mknod()
+{
+ mknod -m 0666 $PFX/dir/block b 13 42;
+ test "$(stat -c '%F %a %t %T' $PFX/dir/block)" == "block special file 666 d 2a" \
+ || fail "mknod for block device"
+
+ mknod -m 0666 $PFX/dir/char c 13 42;
+ test "$(stat -c '%F %a %t %T' $PFX/dir/char)" == "character special file 666 d 2a" \
+ || fail "mknod for character device"
+
+ mknod -m 0666 $PFX/dir/fifo p;
+ test "$(stat -c '%F %a' $PFX/dir/fifo)" == "fifo 666" || \
+ fail "mknod for fifo"
+}
+
+
+function test_symlink()
+{
+ local msg;
+
+ ln -s $PFX/dir/file $PFX/dir/symlink;
+ test "$(stat -c '%F' $PFX/dir/symlink)" == "symbolic link" || fail "Creation of symlink"
+
+ msg=$(cat $PFX/dir/symlink);
+ test "x$msg" == "xhooha" || fail "Content match for symlink"
+}
+
+
+function test_hardlink()
+{
+ local ino1;
+ local ino2;
+ local nlink1;
+ local nlink2;
+ local msg;
+
+ ln $PFX/dir/file $PFX/dir/hardlink;
+
+ ino1=$(stat -c '%i' $PFX/dir/file);
+ nlink1=$(stat -c '%h' $PFX/dir/file);
+ ino2=$(stat -c '%i' $PFX/dir/hardlink);
+ nlink2=$(stat -c '%h' $PFX/dir/hardlink);
+
+ test $ino1 == $ino2 || fail "Inode comparison for hardlink"
+ test $nlink1 == 2 || fail "Link count for hardlink"
+ test $nlink2 == 2 || fail "Link count for hardlink"
+
+ msg=$(cat $PFX/dir/hardlink);
+
+ test "x$msg" == "xhooha" || fail "Content match for hardlinks"
+}
+
+
+function test_rename()
+{
+ local ino1;
+ local ino2;
+ local ino3;
+ local msg;
+
+ #### file
+
+ ino1=$(stat -c '%i' $PFX/dir/file);
+
+ mv $PFX/dir/file $PFX/dir/file2 || fail "mv"
+ msg=$(cat $PFX/dir/file2);
+ test "x$msg" == "xhooha" || fail "File contents comparison after mv"
+
+ ino2=$(stat -c '%i' $PFX/dir/file2);
+ test $ino1 == $ino2 || fail "Inode comparison after mv"
+
+ mv $PFX/dir/file2 $PFX/dir/file;
+ msg=$(cat $PFX/dir/file);
+ test "x$msg" == "xhooha" || fail "File contents comparison after mv"
+
+ ino3=$(stat -c '%i' $PFX/dir/file);
+ test $ino1 == $ino3 || fail "Inode comparison after mv"
+
+ #### dir
+
+ ino1=$(stat -c '%i' $PFX/dir);
+
+ mv $PFX/dir $PFX/dir2 || fail "Directory mv"
+ ino2=$(stat -c '%i' $PFX/dir2);
+ test $ino1 == $ino2 || fail "Inode comparison after directory mv"
+
+ mv $PFX/dir2 $PFX/dir || fail "Directory mv"
+ ino3=$(stat -c '%i' $PFX/dir);
+ test $ino1 == $ino3 || fail "Inode comparison after directory mv"
+}
+
+
+function test_chmod()
+{
+ local mode0;
+ local mode1;
+ local mode2;
+
+
+ #### file
+
+ mode0=$(stat -c '%a' $PFX/dir/file);
+ chmod 0753 $PFX/dir/file || fail "chmod"
+
+ mode1=$(stat -c '%a' $PFX/dir/file);
+ test 0$mode1 == 0753 || fail "Mode comparison after chmod"
+
+ chmod 0$mode0 $PFX/dir/file || fail "chmod"
+ mode2=$(stat -c '%a' $PFX/dir/file);
+ test 0$mode2 == 0$mode0 || fail "Mode comparison after chmod"
+
+ #### dir
+
+ mode0=$(stat -c '%a' $PFX/dir);
+ chmod 0753 $PFX/dir || fail "chmod"
+
+ mode1=$(stat -c '%a' $PFX/dir);
+ test 0$mode1 == 0753 || fail "Mode comparison after chmod"
+
+ chmod 0$mode0 $PFX/dir || fail "chmod"
+ mode2=$(stat -c '%a' $PFX/dir);
+ test 0$mode2 == 0$mode0 || fail "Mode comparison after chmod"
+}
+
+
+function test_chown()
+{
+ local user1;
+ local user2;
+ local group1;
+ local group2;
+
+ #### file
+
+ user1=$(stat -c '%u' $PFX/dir/file);
+ group1=$(stat -c '%g' $PFX/dir/file);
+
+ chown 13:42 $PFX/dir/file || fail "chown"
+
+ user2=$(stat -c '%u' $PFX/dir/file);
+ group2=$(stat -c '%g' $PFX/dir/file);
+
+ test $user2 == 13 || fail "User comparison after chown"
+ test $group2 == 42 || fail "Group comparison after chown"
+
+ chown $user1:$group1 $PFX/dir/file || fail "chown"
+
+ user2=$(stat -c '%u' $PFX/dir/file);
+ group2=$(stat -c '%g' $PFX/dir/file);
+
+ test $user2 == $user1 || fail "User comparison after chown"
+ test $group2 == $group1 || fail "Group comparison after chown"
+
+ #### dir
+
+ user1=$(stat -c '%u' $PFX/dir);
+ group1=$(stat -c '%g' $PFX/dir);
+
+ chown 13:42 $PFX/dir || fail "chown"
+
+ user2=$(stat -c '%u' $PFX/dir);
+ group2=$(stat -c '%g' $PFX/dir);
+
+ test $user2 == 13 || fail "User comparison after chown"
+ test $group2 == 42 || fail "Group comparison after chown"
+
+ chown $user1:$group1 $PFX/dir || fail "chown"
+
+ user2=$(stat -c '%u' $PFX/dir);
+ group2=$(stat -c '%g' $PFX/dir);
+
+ test $user2 == $user1 || fail "User comparison after chown"
+ test $group2 == $group1 || fail "Group comparison after chown"
+}
+
+
+function test_utimes()
+{
+ local acc0;
+ local acc1;
+ local acc2;
+ local mod0;
+ local mod1;
+ local mod2;
+
+ #### file
+
+ acc0=$(stat -c '%X' $PFX/dir/file);
+ mod0=$(stat -c '%Y' $PFX/dir/file);
+
+ sleep 1;
+ touch -a $PFX/dir/file || fail "atime change on file"
+
+ acc1=$(stat -c '%X' $PFX/dir/file);
+ mod1=$(stat -c '%Y' $PFX/dir/file);
+
+ sleep 1;
+ touch -m $PFX/dir/file || fail "mtime change on file"
+
+ acc2=$(stat -c '%X' $PFX/dir/file);
+ mod2=$(stat -c '%Y' $PFX/dir/file);
+
+ test $acc0 != $acc1 || fail "atime mismatch comparison on file"
+ test $acc1 == $acc2 || fail "atime match comparison on file"
+ test $mod0 == $mod1 || fail "mtime match comparison on file"
+ test $mod1 != $mod2 || fail "mtime mismatch comparison on file"
+
+ #### dir
+
+ acc0=$(stat -c '%X' $PFX/dir);
+ mod0=$(stat -c '%Y' $PFX/dir);
+
+ sleep 1;
+ touch -a $PFX/dir || fail "atime change on directory"
+
+ acc1=$(stat -c '%X' $PFX/dir);
+ mod1=$(stat -c '%Y' $PFX/dir);
+
+ sleep 1;
+ touch -m $PFX/dir || fail "mtime change on directory"
+
+ acc2=$(stat -c '%X' $PFX/dir);
+ mod2=$(stat -c '%Y' $PFX/dir);
+
+ test $acc0 != $acc1 || fail "atime mismatch comparison on directory"
+ test $acc1 == $acc2 || fail "atime match comparison on directory"
+ test $mod0 == $mod1 || fail "mtime match comparison on directory"
+ test $mod1 != $mod2 || fail "mtime mismatch comparison on directory"
+}
+
+
+function test_locks()
+{
+ exec 200>$PFX/dir/lockfile || fail "exec"
+
+ ## exclusive locks test
+ flock -e 200 || fail "flock -e"
+ ! flock -n -e $PFX/dir/lockfile -c true || fail "! flock -n -e"
+ ! flock -n -s $PFX/dir/lockfile -c true || fail "! flock -n -s"
+ flock -u 200 || fail "flock -u"
+
+ ## shared locks test
+ flock -s 200 || fail "flock -s"
+ ! flock -n -e $PFX/dir/lockfile -c true || fail "! flock -n -e"
+ flock -n -s $PFX/dir/lockfile -c true || fail "! flock -n -s"
+ flock -u 200 || fail "flock -u"
+
+ exec 200>&- || fail "exec"
+
+}
+
+
+function test_readdir()
+{
+ /bin/ls $PFX/dir >/dev/null || fail "ls"
+}
+
+
+function test_setxattr()
+{
+ setfattr -n trusted.testing -v c00k33 $PFX/dir/file || fail "setfattr"
+}
+
+
+function test_listxattr()
+{
+ getfattr -m trusted $PFX/dir/file 2>/dev/null | grep -q trusted.testing || fail "getfattr"
+}
+
+
+function test_getxattr()
+{
+ getfattr -n trusted.testing $PFX/dir/file 2>/dev/null | grep -q c00k33 || fail "getfattr"
+}
+
+
+function test_removexattr()
+{
+ setfattr -x trusted.testing $PFX/dir/file || fail "setfattr remove"
+ getfattr -n trusted.testing $PFXf/dir/file 2>&1 | grep -q "No such attribute"
+}
+
+
+function test_unlink()
+{
+ rm $PFX/dir/file || fail "rm"
+}
+
+
+function test_rmdir()
+{
+ rm -rf $PFX || fail "rm -rf"
+}
+
+
+function run_tests()
+{
+ test_mkdir;
+ test_create;
+ test_statfs;
+ test_open;
+ test_write;
+ test_read;
+ test_truncate;
+ test_fstat;
+ test_mknod;
+ test_hardlink;
+ test_symlink;
+ test_rename;
+ test_chmod;
+ test_chown;
+ test_utimes;
+ test_locks;
+ test_readdir;
+ test_setxattr;
+ test_listxattr;
+ test_getxattr;
+ test_removexattr;
+ test_unlink;
+ test_rmdir;
+}
+
+
+function _init()
+{
+ DIR=$(pwd);
+}
+
+
+function parse_cmdline()
+{
+ if [ "x$1" == "x" ] ; then
+ echo "Usage: $0 /path/mount"
+ exit 1
+ fi
+
+ DIR=$1;
+
+ if [ ! -d "$DIR" ] ; then
+ echo "$DIR: not a directory"
+ exit 1
+ fi
+
+ PFX="$DIR/coverage";
+ rm -rvf $PFX;
+}
+
+
+function main()
+{
+ parse_cmdline "$@";
+
+ run_tests;
+
+ exit 0;
+}
+
+
+_init && main "$@";
diff --git a/extras/specgen.scm b/extras/specgen.scm
index 32aff6480..0edbb6f62 100755
--- a/extras/specgen.scm
+++ b/extras/specgen.scm
@@ -1,7 +1,7 @@
#!/usr/bin/guile -s
!#
-;;; Copyright (C) 2007-2009 Z RESEARCH Inc. <http://www.zresearch.com>
+;;; Copyright (c) 2007-2011 Gluster Inc. <http://www.gluster.com>
;;;
;;; This program is free software; you can redistribute it and/or modify
;;; it under the terms of the GNU General Public License as published by
diff --git a/extras/stripe-merge.c b/extras/stripe-merge.c
index 3f8e4b124..74bd47e30 100644
--- a/extras/stripe-merge.c
+++ b/extras/stripe-merge.c
@@ -1,48 +1,494 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/*
+ * stripe-merge.c
+ *
+ * This program recovers an original file based on the striped files stored on
+ * the individual bricks of a striped volume. The file format and stripe
+ * geometry is validated through the extended attributes stored in the file.
+ *
+ * TODO: Support optional xattr recovery (i.e., user xattrs). Perhaps provide a
+ * command-line flag to toggle this behavior.
+ */
+
#include <stdio.h>
-#include <unistd.h>
-#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <errno.h>
+#include <string.h>
+#include <attr/xattr.h>
+#include <fnmatch.h>
-int
-main (int argc, char *argv[])
+#define ATTRNAME_STRIPE_INDEX "trusted.*.stripe-index"
+#define ATTRNAME_STRIPE_COUNT "trusted.*.stripe-count"
+#define ATTRNAME_STRIPE_SIZE "trusted.*.stripe-size"
+#define ATTRNAME_STRIPE_COALESCE "trusted.*.stripe-coalesce"
+
+#define INVALID_FD -1
+#define INVALID_MODE UINT32_MAX
+
+struct file_stripe_info {
+ int stripe_count;
+ int stripe_size;
+ int coalesce;
+ mode_t mode;
+ int fd[0];
+};
+
+static int close_files(struct file_stripe_info *);
+
+static struct
+file_stripe_info *alloc_file_stripe_info(int count)
{
- int fds[argc-1];
- char buf[argc-1][4096];
int i;
- int max_ret, ret;
+ struct file_stripe_info *finfo;
- if (argc < 2) {
- printf ("Usage: %s file1 file2 ... >file\n", argv[0]);
- return 1;
+ finfo = calloc(1, sizeof(struct file_stripe_info) +
+ (sizeof(int) * count));
+ if (!finfo)
+ return NULL;
+
+ for (i = 0; i < count; i++)
+ finfo->fd[i] = INVALID_FD;
+
+ finfo->mode = INVALID_MODE;
+ finfo->coalesce = INVALID_FD;
+
+ return finfo;
+}
+
+/*
+ * Search for an attribute matching the provided pattern. Return a count for
+ * the total number of matching entries (including 0). Allocate a buffer for
+ * the first matching entry found.
+ */
+static int
+get_stripe_attr_name(const char *path, const char *pattern, char **attrname)
+{
+ char attrbuf[4096];
+ char *ptr, *match = NULL;
+ int len, r, match_count = 0;
+
+ if (!path || !pattern || !attrname)
+ return -1;
+
+ len = listxattr(path, attrbuf, sizeof(attrbuf));
+ if (len < 0)
+ return len;
+
+ ptr = attrbuf;
+ while (ptr) {
+ r = fnmatch(pattern, ptr, 0);
+ if (!r) {
+ if (!match)
+ match = ptr;
+ match_count++;
+ } else if (r != FNM_NOMATCH) {
+ return -1;
+ }
+
+ len -= strlen(ptr) + 1;
+ if (len > 0)
+ ptr += strlen(ptr) + 1;
+ else
+ ptr = NULL;
}
- for (i=0; i<argc-1; i++) {
- fds[i] = open (argv[i+1], O_RDONLY);
- if (fds[i] == -1) {
- perror (argv[i+1]);
- return 1;
+ if (match)
+ *attrname = strdup(match);
+
+ return match_count;
+}
+
+/*
+ * Get the integer representation of a named attribute.
+ */
+static int
+get_stripe_attr_val(const char *path, const char *attr, int *val)
+{
+ char attrbuf[4096];
+ int len;
+
+ if (!path || !attr || !val)
+ return -1;
+
+ len = getxattr(path, attr, attrbuf, sizeof(attrbuf));
+ if (len < 0)
+ return len;
+
+ *val = atoi(attrbuf);
+
+ return 0;
+}
+
+/*
+ * Get an attribute name/value (assumed to be an integer) pair based on a
+ * specified search pattern. A buffer is allocated for the exact attr name
+ * returned. Optionally, skip the pattern search if a buffer is provided
+ * (which should contain an attribute name).
+ *
+ * Returns the attribute count or -1 on error. The value parameter is set only
+ * when a single attribute is found.
+ */
+static int
+get_attr(const char *path, const char *pattern, char **buf, int *val)
+{
+ int count = 1;
+
+ if (!buf)
+ return -1;
+
+ if (!*buf) {
+ count = get_stripe_attr_name(path, pattern, buf);
+ if (count > 1) {
+ /* pattern isn't good enough */
+ fprintf(stderr, "ERROR: duplicate attributes found "
+ "matching pattern: %s\n", pattern);
+ free(*buf);
+ *buf = NULL;
+ return count;
+ } else if (count < 1) {
+ return count;
}
}
- max_ret = 0;
+ if (get_stripe_attr_val(path, *buf, val) < 0)
+ return -1;
+
+ return count;
+}
+
+/*
+ * validate_and_open_files()
+ *
+ * Open the provided source files and validate the extended attributes. Verify
+ * that the geometric attributes are consistent across all of the files and
+ * print a warning if any files are missing. We proceed without error in the
+ * latter case to support partial recovery.
+ */
+static struct
+file_stripe_info *validate_and_open_files(char *paths[], int count)
+{
+ int i, val, tmp;
+ struct stat sbuf;
+ char *stripe_count_attr = NULL;
+ char *stripe_size_attr = NULL;
+ char *stripe_index_attr = NULL;
+ char *stripe_coalesce_attr = NULL;
+ struct file_stripe_info *finfo = NULL;
+
+ for (i = 0; i < count; i++) {
+ if (!paths[i])
+ goto err;
+
+ /*
+ * Check the stripe count first so we can allocate the info
+ * struct with the appropriate number of fds.
+ */
+ if (get_attr(paths[i], ATTRNAME_STRIPE_COUNT,
+ &stripe_count_attr, &val) != 1) {
+ fprintf(stderr, "ERROR: %s: attribute: '%s'\n",
+ paths[i], ATTRNAME_STRIPE_COUNT);
+ goto err;
+ }
+ if (!finfo) {
+ finfo = alloc_file_stripe_info(val);
+ if (!finfo)
+ goto err;
+
+ if (val != count)
+ fprintf(stderr, "WARNING: %s: stripe-count "
+ "(%d) != file count (%d). Result may "
+ "be incomplete.\n", paths[i], val,
+ count);
+
+ finfo->stripe_count = val;
+ } else if (val != finfo->stripe_count) {
+ fprintf(stderr, "ERROR %s: invalid stripe count: %d "
+ "(expected %d)\n", paths[i], val,
+ finfo->stripe_count);
+ goto err;
+ }
+
+ /*
+ * Get and validate the chunk size.
+ */
+ if (get_attr(paths[i], ATTRNAME_STRIPE_SIZE, &stripe_size_attr,
+ &val) != 1) {
+ fprintf(stderr, "ERROR: %s: attribute: '%s'\n",
+ paths[i], ATTRNAME_STRIPE_SIZE);
+ goto err;
+ }
+
+ if (!finfo->stripe_size) {
+ finfo->stripe_size = val;
+ } else if (val != finfo->stripe_size) {
+ fprintf(stderr, "ERROR: %s: invalid stripe size: %d "
+ "(expected %d)\n", paths[i], val,
+ finfo->stripe_size);
+ goto err;
+ }
+
+ /*
+ * stripe-coalesce is a backward compatible attribute. If the
+ * attribute does not exist, assume a value of zero for the
+ * traditional stripe format.
+ */
+ tmp = get_attr(paths[i], ATTRNAME_STRIPE_COALESCE,
+ &stripe_coalesce_attr, &val);
+ if (!tmp) {
+ val = 0;
+ } else if (tmp != 1) {
+ fprintf(stderr, "ERROR: %s: attribute: '%s'\n",
+ paths[i], ATTRNAME_STRIPE_COALESCE);
+ goto err;
+ }
+
+ if (finfo->coalesce == INVALID_FD) {
+ finfo->coalesce = val;
+ } else if (val != finfo->coalesce) {
+ fprintf(stderr, "ERROR: %s: invalid coalesce flag\n",
+ paths[i]);
+ goto err;
+ }
+
+ /*
+ * Get/validate the stripe index and open the file in the
+ * appropriate fd slot.
+ */
+ if (get_attr(paths[i], ATTRNAME_STRIPE_INDEX,
+ &stripe_index_attr, &val) != 1) {
+ fprintf(stderr, "ERROR: %s: attribute: '%s'\n",
+ paths[i], ATTRNAME_STRIPE_INDEX);
+ goto err;
+ }
+ if (finfo->fd[val] != INVALID_FD) {
+ fprintf(stderr, "ERROR: %s: duplicate stripe index: "
+ "%d\n", paths[i], val);
+ goto err;
+ }
+
+ finfo->fd[val] = open(paths[i], O_RDONLY);
+ if (finfo->fd[val] < 0)
+ goto err;
+
+ /*
+ * Get the creation mode for the file.
+ */
+ if (fstat(finfo->fd[val], &sbuf) < 0)
+ goto err;
+ if (finfo->mode == INVALID_MODE) {
+ finfo->mode = sbuf.st_mode;
+ } else if (sbuf.st_mode != finfo->mode) {
+ fprintf(stderr, "ERROR: %s: invalid mode\n", paths[i]);
+ goto err;
+ }
+ }
+
+ free(stripe_count_attr);
+ free(stripe_size_attr);
+ free(stripe_index_attr);
+ free(stripe_coalesce_attr);
+
+ return finfo;
+err:
+
+ free(stripe_count_attr);
+ free(stripe_size_attr);
+ free(stripe_index_attr);
+ free(stripe_coalesce_attr);
+
+ if (finfo) {
+ close_files(finfo);
+ free(finfo);
+ }
+
+ return NULL;
+}
+
+static int
+close_files(struct file_stripe_info *finfo)
+{
+ int i, ret;
+
+ if (!finfo)
+ return -1;
+
+ for (i = 0; i < finfo->stripe_count; i++) {
+ if (finfo->fd[i] == INVALID_FD)
+ continue;
+
+ ret = close(finfo->fd[i]);
+ if (ret < 0)
+ return ret;
+ }
+
+ return ret;
+}
+
+/*
+ * Generate the original file using files striped in the coalesced format.
+ * Data in the striped files is stored at a coalesced offset based on the
+ * stripe number.
+ *
+ * Walk through the finfo fds (which are already ordered) and and iteratively
+ * copy stripe_size bytes from the source files to the target file. If a source
+ * file is missing, seek past the associated stripe_size bytes in the target
+ * file.
+ */
+static int
+generate_file_coalesce(int target, struct file_stripe_info *finfo)
+{
+ char *buf;
+ int ret = 0;
+ int r, w, i;
+
+ buf = malloc(finfo->stripe_size);
+ if (!buf)
+ return -1;
+
+ i = 0;
+ while (1) {
+ if (finfo->fd[i] == INVALID_FD) {
+ if (lseek(target, finfo->stripe_size, SEEK_CUR) < 0)
+ break;
+
+ i = (i + 1) % finfo->stripe_count;
+ continue;
+ }
+
+ r = read(finfo->fd[i], buf, finfo->stripe_size);
+ if (r < 0) {
+ ret = r;
+ break;
+ }
+ if (!r)
+ break;
+
+ w = write(target, buf, r);
+ if (w < 0) {
+ ret = w;
+ break;
+ }
+
+ i = (i + 1) % finfo->stripe_count;
+ }
+
+ free(buf);
+ return ret;
+}
+
+/*
+ * Generate the original file using files striped with the traditional stripe
+ * format. Data in the striped files is stored at the equivalent offset from
+ * the source file.
+ */
+static int
+generate_file_traditional(int target, struct file_stripe_info *finfo)
+{
+ int i, j, max_ret, ret;
+ char buf[finfo->stripe_count][4096];
+
do {
char newbuf[4096] = {0, };
- int j;
max_ret = 0;
- for (i=0; i<argc-1; i++) {
- memset (buf[i], 0, 4096);
- ret = read (fds[i], buf[i], 4096);
+ for (i = 0; i < finfo->stripe_count; i++) {
+ memset(buf[i], 0, 4096);
+ ret = read(finfo->fd[i], buf[i], 4096);
if (ret > max_ret)
max_ret = ret;
}
- for (i=0; i<max_ret;i++)
- for (j=0; j<argc-1; j++)
+ for (i = 0; i < max_ret; i++)
+ for (j = 0; j < finfo->stripe_count; j++)
newbuf[i] |= buf[j][i];
- write (1, newbuf, max_ret);
+ write(target, newbuf, max_ret);
} while (max_ret);
return 0;
}
+static int
+generate_file(int target, struct file_stripe_info *finfo)
+{
+ if (finfo->coalesce)
+ return generate_file_coalesce(target, finfo);
+
+ return generate_file_traditional(target, finfo);
+}
+
+static void
+usage(char *name)
+{
+ fprintf(stderr, "Usage: %s [-o <outputfile>] <inputfile1> "
+ "<inputfile2> ...\n", name);
+}
+
+int
+main(int argc, char *argv[])
+{
+ int file_count, opt;
+ char *opath = NULL;
+ int targetfd;
+ struct file_stripe_info *finfo;
+
+ while ((opt = getopt(argc, argv, "o:")) != -1) {
+ switch (opt) {
+ case 'o':
+ opath = optarg;
+ break;
+ default:
+ usage(argv[0]);
+ return -1;
+ }
+ }
+
+ file_count = argc - optind;
+
+ if (!opath || !file_count) {
+ usage(argv[0]);
+ return -1;
+ }
+
+ finfo = validate_and_open_files(&argv[optind], file_count);
+ if (!finfo)
+ goto err;
+
+ targetfd = open(opath, O_RDWR|O_CREAT, finfo->mode);
+ if (targetfd < 0)
+ goto err;
+
+ if (generate_file(targetfd, finfo) < 0)
+ goto err;
+
+ if (fsync(targetfd) < 0)
+ fprintf(stderr, "ERROR: %s\n", strerror(errno));
+ if (close(targetfd) < 0)
+ fprintf(stderr, "ERROR: %s\n", strerror(errno));
+
+ close_files(finfo);
+ free(finfo);
+
+ return 0;
+
+err:
+ if (finfo) {
+ close_files(finfo);
+ free(finfo);
+ }
+
+ return -1;
+}
+
diff --git a/extras/systemd/Makefile.am b/extras/systemd/Makefile.am
new file mode 100644
index 000000000..3fc656b82
--- /dev/null
+++ b/extras/systemd/Makefile.am
@@ -0,0 +1,11 @@
+
+CLEANFILES =
+
+SYSTEMD_DIR = @systemddir@
+
+install-exec-local:
+ @if [ -d $(SYSTEMD_DIR) ]; then \
+ $(mkdir_p) $(DESTDIR)$(SYSTEMD_DIR); \
+ $(INSTALL_PROGRAM) glusterd.service $(DESTDIR)$(SYSTEMD_DIR)/; \
+ fi
+
diff --git a/extras/systemd/glusterd.service.in b/extras/systemd/glusterd.service.in
new file mode 100644
index 000000000..fc8d8c9a2
--- /dev/null
+++ b/extras/systemd/glusterd.service.in
@@ -0,0 +1,14 @@
+[Unit]
+Description=GlusterFS, a clustered file-system server
+After=network.target rpcbind.service
+Before=network-online.target
+
+[Service]
+Type=forking
+PIDFile=/run/glusterd.pid
+LimitNOFILE=65536
+ExecStart=@prefix@/sbin/glusterd -p /run/glusterd.pid
+KillMode=process
+
+[Install]
+WantedBy=multi-user.target
diff --git a/extras/test/bug-920583.t b/extras/test/bug-920583.t
new file mode 100755
index 000000000..eedbb800a
--- /dev/null
+++ b/extras/test/bug-920583.t
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+##Copy this file to tests/bugs before running run.sh (cp extras/test/bug-920583.t tests/bugs/)
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+logdir=`gluster --print-logdir`
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST $CLI volume create $V0 replica 2 stripe 2 $H0:$B0/${V0}{1,2,3,4,5,6,7,8};
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+function log-file-name()
+{
+ logfilename=$M0".log"
+ echo ${logfilename:1} | tr / -
+}
+
+log_file=$logdir"/"`log-file-name`
+
+lookup_unhashed_count=`grep "adding option 'lookup-unhashed'" $log_file | wc -l`
+no_child_down_count=`grep "adding option 'assert-no-child-down'" $log_file | wc -l`
+mount -t glusterfs $H0:/$V0 $M0 -o "xlator-option=*dht.assert-no-child-down=yes,xlator-option=*dht.lookup-unhashed=yes"
+touch $M0/file1;
+
+new_lookup_unhashed_count=`grep "adding option 'lookup-unhashed'" $log_file | wc -l`
+new_no_child_down_count=`grep "adding option 'assert-no-child-down'" $log_file | wc -l`
+EXPECT "1" expr $new_lookup_unhashed_count - $lookup_unhashed_count
+EXPECT "1" expr $new_no_child_down_count - $no_child_down_count
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/extras/test/gluster_commands.sh b/extras/test/gluster_commands.sh
new file mode 100755
index 000000000..cb2a55fd5
--- /dev/null
+++ b/extras/test/gluster_commands.sh
@@ -0,0 +1,265 @@
+#!/bin/bash
+
+
+# Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
+
+# This script tests the basics gluster cli commands.
+
+echo "Starting glusterd"
+glusterd
+if [ $? -ne 0 ]; then
+ echo "Could not start glusterd.Exiting"
+ exit;
+else
+ echo "glusterd started"
+fi
+
+if [ ! -d "/exports" ]; then
+ mkdir /exports;
+ mkdir /exports/exp{1..10};
+else
+ mkdir /exports/exp{1..10};
+fi
+
+if [ ! -d "/mnt/client" ]; then
+ mkdir /mnt/client -p;
+fi
+
+
+set -e #exit at the first error that happens
+
+# create distribute volume and try start, mount, add-brick, replace-brick, remove-brick, stop, unmount, delete
+
+echo "Creating distribute volume........"
+gluster volume create vol `hostname`:/exports/exp1
+gluster volume info
+
+echo "Starting distribute volume........"
+gluster volume start vol
+gluster volume info
+sleep 1
+mount -t glusterfs `hostname`:vol /mnt/client
+sleep 1
+df -h
+
+echo "adding-brick......."
+gluster volume add-brick vol `hostname`:/exports/exp2
+gluster volume info
+sleep 1
+umount /mnt/client
+mount -t glusterfs `hostname`:vol /mnt/client
+df -h
+sleep 1
+
+echo "replacing brick......"
+gluster volume replace-brick vol `hostname`:/exports/exp1 `hostname`:/exports/exp3 start
+#sleep for 5 seconds
+sleep 5
+gluster volume replace-brick vol `hostname`:/exports/exp1 `hostname`:/exports/exp3 status
+gluster volume replace-brick vol `hostname`:/exports/exp1 `hostname`:/exports/exp3 pause
+gluster volume replace-brick vol `hostname`:/exports/exp1 `hostname`:/exports/exp3 status
+gluster volume replace-brick vol `hostname`:/exports/exp1 `hostname`:/exports/exp3 start
+#sleep for 5 seconds
+sleep 5
+gluster volume replace-brick vol `hostname`:/exports/exp1 `hostname`:/exports/exp3 status
+gluster volume replace-brick vol `hostname`:/exports/exp1 `hostname`:/exports/exp3 commit
+
+
+echo "replcing brick for abort operation"
+gluster volume replace-brick vol `hostname`:/exports/exp3 `hostname`:/exports/exp1 start
+#sleep for 5 seconds
+sleep 5
+gluster volume replace-brick vol `hostname`:/exports/exp3 `hostname`:/exports/exp1 status
+gluster volume replace-brick vol `hostname`:/exports/exp3 `hostname`:/exports/exp1 pause
+gluster volume replace-brick vol `hostname`:/exports/exp3 `hostname`:/exports/exp1 status
+gluster volume replace-brick vol `hostname`:/exports/exp3 `hostname`:/exports/exp1 start
+#sleep for 5 seconds
+sleep 5
+gluster volume replace-brick vol `hostname`:/exports/exp3 `hostname`:/exports/exp1 status
+gluster volume replace-brick vol `hostname`:/exports/exp3 `hostname`:/exports/exp1 abort
+
+
+gluster volume info
+sleep 1
+df -h
+sleep 1
+
+echo "removing brick......."
+gluster --mode=script volume remove-brick vol `hostname`:/exports/exp2
+gluster volume info
+sleep 1
+df -h
+sleep 1
+
+echo "stopping distribute volume......"
+gluster --mode=script volume stop vol
+gluster volume info
+sleep 1
+umount /mnt/client
+df -h
+
+echo "deleting distribute volume......"
+gluster --mode=script volume delete vol
+gluster volume info
+sleep 1
+
+# create replicate volume and try start, mount, add-brick, replace-brick, remove-brick, stop, unmount, delete
+echo "creating replicate volume......"
+gluster volume create mirror replica 2 `hostname`:/exports/exp1 `hostname`:/exports/exp2
+gluster volume info
+sleep 1
+
+echo "starting replicate volume......"
+gluster volume start mirror
+gluster volume info
+sleep 1
+mount -t glusterfs `hostname`:mirror /mnt/client
+sleep 1
+df -h
+sleep 1
+
+echo "adding-brick......."
+gluster volume add-brick mirror `hostname`:/exports/exp3 `hostname`:/exports/exp4
+gluster volume info
+sleep 1
+df -h
+sleep 1
+
+echo "replacing-brick....."
+gluster volume replace-brick mirror `hostname`:/exports/exp1 `hostname`:/exports/exp5 start
+#sleep for 5 seconds
+sleep 5
+gluster volume replace-brick mirror `hostname`:/exports/exp1 `hostname`:/exports/exp5 status
+gluster volume replace-brick mirror `hostname`:/exports/exp1 `hostname`:/exports/exp5 pause
+gluster volume replace-brick mirror `hostname`:/exports/exp1 `hostname`:/exports/exp5 status
+gluster volume replace-brick mirror `hostname`:/exports/exp1 `hostname`:/exports/exp5 start
+#sleep for 5 seconds
+sleep 5
+gluster volume replace-brick mirror `hostname`:/exports/exp1 `hostname`:/exports/exp5 status
+gluster volume replace-brick mirror `hostname`:/exports/exp1 `hostname`:/exports/exp5 commit
+gluster volume info
+sleep 1
+df -h
+sleep 1
+
+echo "replacing brick for abort operation"
+gluster volume replace-brick mirror `hostname`:/exports/exp5 `hostname`:/exports/exp1 start
+#sleep for 5 seconds
+sleep 5
+gluster volume replace-brick mirror `hostname`:/exports/exp5 `hostname`:/exports/exp1 status
+gluster volume replace-brick mirror `hostname`:/exports/exp5 `hostname`:/exports/exp1 pause
+gluster volume replace-brick mirror `hostname`:/exports/exp5 `hostname`:/exports/exp1 status
+gluster volume replace-brick mirror `hostname`:/exports/exp5 `hostname`:/exports/exp1 start
+#sleep for 5 seconds
+sleep 5
+gluster volume replace-brick mirror `hostname`:/exports/exp5 `hostname`:/exports/exp1 status
+gluster volume replace-brick mirror `hostname`:/exports/exp5 `hostname`:/exports/exp1 abort
+
+gluster volume info
+sleep 1
+df -h
+sleep 1
+
+echo "removeing-brick....."
+gluster --mode=script volume remove-brick mirror `hostname`:/exports/exp3 `hostname`:/exports/exp4
+gluster volume info
+sleep 1
+df -h
+sleep 1
+
+echo "stopping replicate volume....."
+gluster --mode=script volume stop mirror
+gluster volume info
+sleep 1
+umount /mnt/client
+df -h
+
+echo "deleting replicate volume....."
+gluster --mode=script volume delete mirror
+gluster volume info
+sleep 1
+
+# create stripe volume and try start, mount, add-brick, replace-brick, remove-brick, stop, unmount, delete
+
+echo "creating stripe volume....."
+gluster volume create str stripe 2 `hostname`:/exports/exp1 `hostname`:/exports/exp2
+gluster volume info
+sleep 1
+
+echo "starting stripe volume....."
+gluster volume start str
+gluster volume info
+sleep 1
+mount -t glusterfs `hostname`:str /mnt/client
+sleep 1
+df -h
+sleep 1
+
+echo "adding brick...."
+gluster volume add-brick str `hostname`:/exports/exp3 `hostname`:/exports/exp4
+gluster volume info
+sleep 1
+df -h
+sleep 1
+
+echo "replacing brick....."
+gluster volume replace-brick str `hostname`:/exports/exp1 `hostname`:/exports/exp5 start
+#sleep for 5 seconds
+sleep 5
+gluster volume replace-brick str `hostname`:/exports/exp1 `hostname`:/exports/exp5 status
+gluster volume replace-brick str `hostname`:/exports/exp1 `hostname`:/exports/exp5 pause
+gluster volume replace-brick str `hostname`:/exports/exp1 `hostname`:/exports/exp5 status
+gluster volume replace-brick str `hostname`:/exports/exp1 `hostname`:/exports/exp5 start
+#sleep for 5 seconds
+sleep 5
+gluster volume replace-brick str `hostname`:/exports/exp1 `hostname`:/exports/exp5 status
+gluster volume replace-brick str `hostname`:/exports/exp1 `hostname`:/exports/exp5 commit
+
+gluster volume info
+sleep 1
+df -h
+sleep 1
+
+echo "replacing brick for abort operation"
+gluster volume replace-brick str `hostname`:/exports/exp5 `hostname`:/exports/exp1 start
+#sleep for 5 seconds
+sleep 5
+gluster volume replace-brick str `hostname`:/exports/exp5 `hostname`:/exports/exp1 status
+gluster volume replace-brick str `hostname`:/exports/exp5 `hostname`:/exports/exp1 pause
+gluster volume replace-brick str `hostname`:/exports/exp5 `hostname`:/exports/exp1 status
+gluster volume replace-brick str `hostname`:/exports/exp5 `hostname`:/exports/exp1 start
+#sleep for 5 seconds
+sleep 5
+gluster volume replace-brick str `hostname`:/exports/exp5 `hostname`:/exports/exp1 status
+gluster volume replace-brick str `hostname`:/exports/exp5 `hostname`:/exports/exp1 abort
+
+gluster volume info
+sleep 1
+df -h
+sleep 1
+
+echo "removing-brick....."
+gluster --mode=script volume remove-brick str `hostname`:/exports/exp3 `hostname`:/exports/exp4
+gluster volume info
+sleep 1
+df -h
+sleep 1
+
+echo "stopping stripe volume....."
+gluster --mode=script volume stop str
+gluster volume info
+sleep 1
+umount /mnt/client
+df -h
+
+echo "deleting stripe volume....."
+gluster --mode=script volume delete str
+gluster volume info
+
diff --git a/extras/test/ld-preload-test/ld-preload-lib.c b/extras/test/ld-preload-test/ld-preload-lib.c
index 8a7504e9c..88afd14c3 100644
--- a/extras/test/ld-preload-test/ld-preload-lib.c
+++ b/extras/test/ld-preload-test/ld-preload-lib.c
@@ -1,23 +1,13 @@
/*
- Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
/* LD PRELOAD'able library
* A very simple library that intercepts booster supported system calls
* and prints a log message to stdout.
diff --git a/extras/test/ld-preload-test/ld-preload-test.c b/extras/test/ld-preload-test/ld-preload-test.c
index d4e9c675c..78772f598 100644
--- a/extras/test/ld-preload-test/ld-preload-test.c
+++ b/extras/test/ld-preload-test/ld-preload-test.c
@@ -1,23 +1,13 @@
/*
- Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
/*
* LD PRELOAD Test Tool
*
diff --git a/extras/test/open-fd-tests.c b/extras/test/open-fd-tests.c
new file mode 100644
index 000000000..4184079d0
--- /dev/null
+++ b/extras/test/open-fd-tests.c
@@ -0,0 +1,64 @@
+
+#include <stdio.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <attr/xattr.h>
+#include <errno.h>
+#include <string.h>
+
+int
+main (int argc, char *argv[])
+{
+ int ret = -1;
+ int fd = 0;
+ char *filename = NULL;
+ int loop = 0;
+ struct stat stbuf = {0,};
+ char string[1024] = {0,};
+
+ if (argc > 1)
+ filename = argv[1];
+
+ if (!filename)
+ filename = "temp-fd-test-file";
+
+ fd = open (filename, O_RDWR|O_CREAT|O_TRUNC);
+ if (fd < 0) {
+ fd = 0;
+ fprintf (stderr, "open failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ while (loop < 1000) {
+ /* Use it as a mechanism to test time delays */
+ memset (string, 0, 1024);
+ scanf ("%s", string);
+
+ ret = write (fd, string, strlen (string));
+ if (ret != strlen (string)) {
+ fprintf (stderr, "write failed : %s (%s %d)\n",
+ strerror (errno), string, loop);
+ goto out;
+ }
+
+ ret = write (fd, "\n", 1);
+ if (ret != 1) {
+ fprintf (stderr, "write failed : %s (%d)\n",
+ strerror (errno), loop);
+ goto out;
+ }
+
+ loop++;
+ }
+
+ fprintf (stdout, "finishing the test after %d loops\n", loop);
+
+ ret = 0;
+out:
+ if (fd)
+ close (fd);
+
+ return ret;
+}
diff --git a/extras/test/run.sh b/extras/test/run.sh
new file mode 100755
index 000000000..4b3839cf6
--- /dev/null
+++ b/extras/test/run.sh
@@ -0,0 +1,22 @@
+#!/bin/sh
+
+# Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
+# Running gluster sanity test which starts glusterd and runs gluster commands, and exit at the first failure.
+$PWD/gluster_commands.sh
+
+if [ $? -ne 0 ]; then
+ echo "sanity failed"
+else
+ echo "sanity passed"
+fi
+
+# Stopping glusterd
+$PWD/stop_glusterd.sh
+
diff --git a/extras/test/stop_glusterd.sh b/extras/test/stop_glusterd.sh
new file mode 100755
index 000000000..a2db13f42
--- /dev/null
+++ b/extras/test/stop_glusterd.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+# Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
+#This script stops the glusterd running on the machine. Helpful for gluster sanity script
+
+killall -9 glusterd
+
+if [ $? -ne 0 ]; then
+ echo "Error: Could not kill glusterd. Either glusterd is not running or kill it manually"
+else
+ echo "Killed glusterd"
+fi
diff --git a/extras/test/test-ffop.c b/extras/test/test-ffop.c
new file mode 100644
index 000000000..2d174d452
--- /dev/null
+++ b/extras/test/test-ffop.c
@@ -0,0 +1,870 @@
+#include <stdio.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <attr/xattr.h>
+#include <errno.h>
+#include <string.h>
+#include <dirent.h>
+
+int fd_based_fops_1 (char *filename); //for fd based fops after unlink
+int fd_based_fops_2 (char *filename); //for fd based fops before unlink
+int dup_fd_based_fops (char *filename); // fops based on fd after dup
+int path_based_fops (char *filename); //for fops based on path
+int dir_based_fops (char *filename); // for fops which operate on directory
+int link_based_fops (char *filename); //for fops which operate in link files (symlinks)
+int test_open_modes (char *filename); // to test open syscall with open modes available.
+int generic_open_read_write (char *filename, int flag); // generic function which does open write and read.
+
+int
+main (int argc, char *argv[])
+{
+ int ret = -1;
+ char filename[255] = {0,};
+
+ if (argc > 1)
+ strcpy(filename, argv[1]);
+ else
+ strcpy(filename, "temp-xattr-test-file");
+
+ ret = fd_based_fops_1 (strcat(filename, "_1"));
+ if (ret < 0)
+ fprintf (stderr, "fd based file operation 1 failed\n");
+ else
+ fprintf (stdout, "fd based file operation 1 passed\n");
+
+ ret = fd_based_fops_2 (strcat(filename, "_2"));
+ if (ret < 0)
+ fprintf (stderr, "fd based file operation 2 failed\n");
+ else
+ fprintf (stdout, "fd based file operation 2 passed\n");
+
+ ret = dup_fd_based_fops (strcat (filename, "_3"));
+ if (ret < 0)
+ fprintf (stderr, "dup fd based file operation failed\n");
+ else
+ fprintf (stdout, "dup fd based file operation passed\n");
+
+ ret = path_based_fops (strcat (filename, "_4"));
+ if (ret < 0)
+ fprintf (stderr, "path based file operation failed\n");
+ else
+ fprintf (stdout, "path based file operation passed\n");
+
+ ret = dir_based_fops (strcat (filename, "_5"));
+ if (ret < 0)
+ fprintf (stderr, "directory based file operation failed\n");
+ else
+ fprintf (stdout, "directory based file operation passed\n");
+
+ ret = link_based_fops (strcat (filename, "_5"));
+ if (ret < 0)
+ fprintf (stderr, "link based file operation failed\n");
+ else
+ fprintf (stdout, "link based file operation passed\n");
+
+ ret = test_open_modes (strcat (filename, "_5"));
+ if (ret < 0)
+ fprintf (stderr, "testing modes of 'open' call failed\n");
+ else
+ fprintf (stdout, "testing modes of 'open' call passed\n");
+
+out:
+ return ret;
+}
+
+int
+fd_based_fops_1 (char *filename)
+{
+ int fd = 0;
+ int ret = -1;
+ struct stat stbuf = {0,};
+ char wstr[50] = {0,};
+ char rstr[50] = {0,};
+
+ fd = open (filename, O_RDWR|O_CREAT);
+ if (fd < 0) {
+ fd = 0;
+ fprintf (stderr, "open failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = unlink (filename);
+ if (ret < 0) {
+ fprintf (stderr, "unlink failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ strcpy (wstr, "This is my string\n");
+ ret = write (fd, wstr, strlen(wstr));
+ if (ret <= 0) {
+ ret = -1;
+ fprintf (stderr, "write failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = lseek (fd, 0, SEEK_SET);
+ if (ret < 0) {
+ fprintf (stderr, "lseek failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = read (fd, rstr, strlen(wstr));
+ if (ret <= 0) {
+ ret = -1;
+ fprintf (stderr, "read failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = memcmp (rstr, wstr, strlen (wstr));
+ if (ret != 0) {
+ ret = -1;
+ fprintf (stderr, "read returning junk\n");
+ goto out;
+ }
+
+ ret = ftruncate (fd, 0);
+ if (ret < 0) {
+ fprintf (stderr, "ftruncate failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = fstat (fd, &stbuf);
+ if (ret < 0) {
+ fprintf (stderr, "fstat failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = fchmod (fd, 0640);
+ if (ret < 0) {
+ fprintf (stderr, "fchmod failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = fchown (fd, 10001, 10001);
+ if (ret < 0) {
+ fprintf (stderr, "fchown failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = fsync (fd);
+ if (ret < 0) {
+ fprintf (stderr, "fsync failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = fsetxattr (fd, "trusted.xattr-test", "working", 8, 0);
+ if (ret < 0) {
+ fprintf (stderr, "fsetxattr failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = fdatasync (fd);
+ if (ret < 0) {
+ fprintf (stderr, "fdatasync failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = flistxattr (fd, NULL, 0);
+ if (ret <= 0) {
+ ret = -1;
+ fprintf (stderr, "flistxattr failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = fgetxattr (fd, "trusted.xattr-test", NULL, 0);
+ if (ret <= 0) {
+ ret = -1;
+ fprintf (stderr, "fgetxattr failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = fremovexattr (fd, "trusted.xattr-test");
+ if (ret < 0) {
+ fprintf (stderr, "fremovexattr failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (fd)
+ close (fd);
+
+ return ret;
+}
+
+
+int
+fd_based_fops_2 (char *filename)
+{
+ int fd = 0;
+ int ret = -1;
+ struct stat stbuf = {0,};
+ char wstr[50] = {0,};
+ char rstr[50] = {0,};
+
+ fd = open (filename, O_RDWR|O_CREAT);
+ if (fd < 0) {
+ fd = 0;
+ fprintf (stderr, "open failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = ftruncate (fd, 0);
+
+ if (ret < 0) {
+ fprintf (stderr, "ftruncate failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ strcpy (wstr, "This is my second string\n");
+ ret = write (fd, wstr, strlen (wstr));
+ if (ret < 0) {
+ ret = -1;
+ fprintf (stderr, "write failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ lseek (fd, 0, SEEK_SET);
+ if (ret < 0) {
+ fprintf (stderr, "lseek failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = read (fd, rstr, strlen (wstr));
+ if (ret <= 0) {
+ ret = -1;
+ fprintf (stderr, "read failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = memcmp (rstr, wstr, strlen (wstr));
+ if (ret != 0) {
+ ret = -1;
+ fprintf (stderr, "read returning junk\n");
+ goto out;
+ }
+
+ ret = fstat (fd, &stbuf);
+ if (ret < 0) {
+ fprintf (stderr, "fstat failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = fchmod (fd, 0640);
+ if (ret < 0) {
+ fprintf (stderr, "fchmod failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = fchown (fd, 10001, 10001);
+ if (ret < 0) {
+ fprintf (stderr, "fchown failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = fsync (fd);
+ if (ret < 0) {
+ fprintf (stderr, "fsync failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = fsetxattr (fd, "trusted.xattr-test", "working", 8, 0);
+ if (ret < 0) {
+ fprintf (stderr, "fsetxattr failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = fdatasync (fd);
+ if (ret < 0) {
+ fprintf (stderr, "fdatasync failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = flistxattr (fd, NULL, 0);
+ if (ret <= 0) {
+ ret = -1;
+ fprintf (stderr, "flistxattr failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = fgetxattr (fd, "trusted.xattr-test", NULL, 0);
+ if (ret <= 0) {
+ ret = -1;
+ fprintf (stderr, "fgetxattr failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = fremovexattr (fd, "trusted.xattr-test");
+ if (ret < 0) {
+ fprintf (stderr, "fremovexattr failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+out:
+ if (fd)
+ close (fd);
+ unlink (filename);
+
+ return ret;
+}
+
+int
+path_based_fops (char *filename)
+{
+ int ret = -1;
+ int fd = 0;
+ struct stat stbuf = {0,};
+ char newfilename[255] = {0,};
+
+ fd = creat (filename, 0644);
+ if (fd < 0) {
+ fprintf (stderr, "creat failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = truncate (filename, 0);
+ if (ret < 0) {
+ fprintf (stderr, "truncate failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = stat (filename, &stbuf);
+ if (ret < 0) {
+ fprintf (stderr, "stat failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = chmod (filename, 0640);
+ if (ret < 0) {
+ fprintf (stderr, "chmod failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = chown (filename, 10001, 10001);
+ if (ret < 0) {
+ fprintf (stderr, "chown failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = setxattr (filename, "trusted.xattr-test", "working", 8, 0);
+ if (ret < 0) {
+ fprintf (stderr, "setxattr failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = listxattr (filename, NULL, 0);
+ if (ret <= 0) {
+ ret = -1;
+ fprintf (stderr, "listxattr failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = getxattr (filename, "trusted.xattr-test", NULL, 0);
+ if (ret <= 0) {
+ ret = -1;
+ fprintf (stderr, "getxattr failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = removexattr (filename, "trusted.xattr-test");
+ if (ret < 0) {
+ fprintf (stderr, "removexattr failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = access (filename, R_OK|W_OK);
+ if (ret < 0) {
+ fprintf (stderr, "access failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ strcpy (newfilename, filename);
+ strcat(newfilename, "_new");
+ ret = rename (filename, newfilename);
+ if (ret < 0) {
+ fprintf (stderr, "rename failed: %s\n", strerror (errno));
+ goto out;
+ }
+ unlink (newfilename);
+
+out:
+ if (fd)
+ close (fd);
+
+ unlink (filename);
+ return ret;
+}
+
+int
+dup_fd_based_fops (char *filename)
+{
+ int fd = 0;
+ int newfd = 0;
+ int ret = -1;
+ struct stat stbuf = {0,};
+ char wstr[50] = {0,};
+ char rstr[50] = {0,};
+
+ fd = open (filename, O_RDWR|O_CREAT);
+ if (fd < 0) {
+ fd = 0;
+ fprintf (stderr, "open failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ newfd = dup (fd);
+ if (newfd < 0) {
+ ret = -1;
+ fprintf (stderr, "dup failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ close (fd);
+
+ strcpy (wstr, "This is my string\n");
+ ret = write (newfd, wstr, strlen(wstr));
+ if (ret <= 0) {
+ ret = -1;
+ fprintf (stderr, "write failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = lseek (newfd, 0, SEEK_SET);
+ if (ret < 0) {
+ fprintf (stderr, "lseek failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = read (newfd, rstr, strlen(wstr));
+ if (ret <= 0) {
+ ret = -1;
+ fprintf (stderr, "read failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = memcmp (rstr, wstr, strlen (wstr));
+ if (ret != 0) {
+ ret = -1;
+ fprintf (stderr, "read returning junk\n");
+ goto out;
+ }
+
+ ret = ftruncate (newfd, 0);
+ if (ret < 0) {
+ fprintf (stderr, "ftruncate failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = fstat (newfd, &stbuf);
+ if (ret < 0) {
+ fprintf (stderr, "fstat failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = fchmod (newfd, 0640);
+ if (ret < 0) {
+ fprintf (stderr, "fchmod failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = fchown (newfd, 10001, 10001);
+ if (ret < 0) {
+ fprintf (stderr, "fchown failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = fsync (newfd);
+ if (ret < 0) {
+ fprintf (stderr, "fsync failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = fsetxattr (newfd, "trusted.xattr-test", "working", 8, 0);
+ if (ret < 0) {
+ fprintf (stderr, "fsetxattr failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = fdatasync (newfd);
+ if (ret < 0) {
+ fprintf (stderr, "fdatasync failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = flistxattr (newfd, NULL, 0);
+ if (ret <= 0) {
+ ret = -1;
+ fprintf (stderr, "flistxattr failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = fgetxattr (newfd, "trusted.xattr-test", NULL, 0);
+ if (ret <= 0) {
+ ret = -1;
+ fprintf (stderr, "fgetxattr failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = fremovexattr (newfd, "trusted.xattr-test");
+ if (ret < 0) {
+ fprintf (stderr, "fremovexattr failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (newfd)
+ close (newfd);
+ ret = unlink (filename);
+ if (ret < 0) {
+ fprintf (stderr, "unlink failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ return ret;
+}
+
+int
+dir_based_fops (char *dirname)
+{
+ int ret = -1;
+ DIR *dp = NULL;
+ char buff[255] = {0,};
+ struct dirent *dbuff = {0,};
+ struct stat stbuff = {0,};
+ char newdname[255] = {0,};
+ char *cwd = NULL;
+
+ ret = mkdir (dirname, 0755);
+ if (ret < 0) {
+ fprintf (stderr, "mkdir failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ dp = opendir (dirname);
+ if (dp == NULL) {
+ fprintf (stderr, "opendir failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ dbuff = readdir (dp);
+ if (NULL == dbuff) {
+ fprintf (stderr, "readdir failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = closedir (dp);
+ if (ret < 0) {
+ fprintf (stderr, "closedir failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = stat (dirname, &stbuff);
+ if (ret < 0) {
+ fprintf (stderr, "stat failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = chmod (dirname, 0744);
+ if (ret < 0) {
+ fprintf (stderr, "chmod failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = chown (dirname, 10001, 10001);
+ if (ret < 0) {
+ fprintf (stderr, "chmod failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = setxattr (dirname, "trusted.xattr-test", "working", 8, 0);
+ if (ret < 0) {
+ fprintf (stderr, "setxattr failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = listxattr (dirname, NULL, 0);
+ if (ret <= 0) {
+ ret = -1;
+ fprintf (stderr, "listxattr failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = getxattr (dirname, "trusted.xattr-test", NULL, 0);
+ if (ret <= 0) {
+ ret = -1;
+ fprintf (stderr, "getxattr failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = removexattr (dirname, "trusted.xattr-test");
+ if (ret < 0) {
+ fprintf (stderr, "removexattr failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ strcpy (newdname, dirname);
+ strcat (newdname, "/../");
+ ret = chdir (newdname);
+ if (ret < 0) {
+ fprintf (stderr, "chdir failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ cwd = getcwd (buff, 255);
+ if (NULL == cwd) {
+ fprintf (stderr, "getcwd failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ strcpy (newdname, dirname);
+ strcat (newdname, "new");
+ ret = rename (dirname, newdname);
+ if (ret < 0) {
+ fprintf (stderr, "rename failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = rmdir (newdname);
+ if (ret < 0) {
+ fprintf (stderr, "rmdir failed: %s\n", strerror (errno));
+ return ret;
+ }
+
+out:
+ rmdir (dirname);
+ return ret;
+}
+
+int
+link_based_fops (char *filename)
+{
+ int ret = -1;
+ int fd = 0;
+ char newname[255] = {0,};
+ char linkname[255] = {0,};
+ struct stat lstbuf = {0,};
+
+ fd = creat (filename, 0644);
+ if (fd < 0) {
+ fd = 0;
+ fprintf (stderr, "creat failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ strcpy (newname, filename);
+ strcat (newname, "_hlink");
+ ret = link (filename, newname);
+ if (ret < 0) {
+ fprintf (stderr, "link failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = unlink (filename);
+ if (ret < 0) {
+ fprintf (stderr, "unlink failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ strcpy (linkname, filename);
+ strcat (linkname, "_slink");
+ ret = symlink (newname, linkname);
+ if (ret < 0) {
+ fprintf (stderr, "symlink failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = lstat (linkname, &lstbuf);
+ if (ret < 0) {
+ fprintf (stderr, "lstbuf failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = lchown (linkname, 10001, 10001);
+ if (ret < 0) {
+ fprintf (stderr, "lchown failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = lsetxattr (linkname, "trusted.lxattr-test", "working", 8, 0);
+ if (ret < 0) {
+ fprintf (stderr, "lsetxattr failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = llistxattr (linkname, NULL, 0);
+ if (ret < 0) {
+ ret = -1;
+ fprintf (stderr, "llistxattr failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = lgetxattr (linkname, "trusted.lxattr-test", NULL, 0);
+ if (ret < 0) {
+ ret = -1;
+ fprintf (stderr, "lgetxattr failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = lremovexattr (linkname, "trusted.lxattr-test");
+ if (ret < 0) {
+ fprintf (stderr, "lremovexattr failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+
+out:
+ if (fd)
+ close(fd);
+ unlink (linkname);
+ unlink (newname);
+}
+
+int
+test_open_modes (char *filename)
+{
+ int ret = -1;
+
+ ret = generic_open_read_write (filename, O_CREAT|O_WRONLY);
+ if (3 != ret) {
+ fprintf (stderr, "flag O_CREAT|O_WRONLY failed: \n");
+ goto out;
+ }
+
+ ret = generic_open_read_write (filename, O_CREAT|O_RDWR);
+ if (ret != 0) {
+ fprintf (stderr, "flag O_CREAT|O_RDWR failed\n");
+ goto out;
+ }
+
+ ret = generic_open_read_write (filename, O_CREAT|O_RDONLY);
+ if (ret != 0) {
+ fprintf (stderr, "flag O_CREAT|O_RDONLY failed\n");
+ goto out;
+ }
+
+ ret = creat (filename, 0644);
+ close (ret);
+ ret = generic_open_read_write (filename, O_WRONLY);
+ if (3 != ret) {
+ fprintf (stderr, "flag O_WRONLY failed\n");
+ goto out;
+ }
+
+ ret = creat (filename, 0644);
+ close (ret);
+ ret = generic_open_read_write (filename, O_RDWR);
+ if (0 != ret) {
+ fprintf (stderr, "flag O_RDWR failed\n");
+ goto out;
+ }
+
+ ret = creat (filename, 0644);
+ close (ret);
+ ret = generic_open_read_write (filename, O_RDONLY);
+ if (0 != ret) {
+ fprintf (stderr, "flag O_RDONLY failed\n");
+ goto out;
+ }
+
+ ret = creat (filename, 0644);
+ close (ret);
+ ret = generic_open_read_write (filename, O_TRUNC|O_WRONLY);
+ if (3 != ret) {
+ fprintf (stderr, "flag O_TRUNC|O_WRONLY failed\n");
+ goto out;
+ }
+
+#if 0 /* undefined behaviour, unable to reliably test */
+ ret = creat (filename, 0644);
+ close (ret);
+ ret = generic_open_read_write (filename, O_TRUNC|O_RDONLY);
+ if (0 != ret) {
+ fprintf (stderr, "flag O_TRUNC|O_RDONLY failed\n");
+ goto out;
+ }
+#endif
+
+ ret = generic_open_read_write (filename, O_CREAT|O_RDWR|O_SYNC);
+ if (0 != ret) {
+ fprintf (stderr, "flag O_CREAT|O_RDWR|O_SYNC failed\n");
+ goto out;
+ }
+
+ ret = creat (filename, 0644);
+ close (ret);
+ ret = generic_open_read_write (filename, O_CREAT|O_EXCL);
+ if (0 != ret) {
+ fprintf (stderr, "flag O_CREAT|O_EXCL failed\n");
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+int generic_open_read_write (char *filename, int flag)
+{
+ int fd = 0;
+ int ret = -1;
+ char wstring[50] = {0,};
+ char rstring[50] = {0,};
+
+ fd = open (filename, flag);
+ if (fd < 0) {
+ if (flag == O_CREAT|O_EXCL && errno == EEXIST) {
+ unlink (filename);
+ return 0;
+ }
+ else {
+ fd = 0;
+ fprintf (stderr, "open failed: %s\n", strerror (errno));
+ return 1;
+ }
+ }
+
+ strcpy (wstring, "My string to write\n");
+ ret = write (fd, wstring, strlen(wstring));
+ if (ret <= 0) {
+ if (errno != EBADF) {
+ fprintf (stderr, "write failed: %s\n", strerror (errno));
+ close (fd);
+ unlink(filename);
+ return 2;
+ }
+ }
+
+ ret = lseek (fd, 0, SEEK_SET);
+ if (ret < 0) {
+ close (fd);
+ unlink(filename);
+ return 4;
+ }
+
+ ret = read (fd, rstring, strlen(wstring));
+ if (ret < 0) {
+ close (fd);
+ unlink (filename);
+ return 3;
+ }
+
+ /* Compare the rstring with wstring. But we do not want to return
+ * error when the flag is either O_RDONLY, O_CREAT|O_RDONLY or
+ * O_TRUNC|O_RDONLY. Because in that case we are not writing
+ * anything to the file.*/
+
+ ret = memcmp (wstring, rstring, strlen (wstring));
+ if (0 != ret && !(flag == O_CREAT|O_RDONLY || flag == O_RDONLY ||\
+ flag == O_TRUNC|O_RDONLY)) {
+ fprintf (stderr, "read is returning junk\n");
+ close (fd);
+ unlink (filename);
+ return 4;
+ }
+
+ close (fd);
+ unlink (filename);
+ return 0;
+}
diff --git a/extras/volfilter.py b/extras/volfilter.py
new file mode 100644
index 000000000..0ca456a78
--- /dev/null
+++ b/extras/volfilter.py
@@ -0,0 +1,167 @@
+# Copyright (c) 2010-2011 Red Hat, Inc.
+#
+# This file is part of HekaFS.
+#
+# HekaFS is free software: you can redistribute it and/or modify it under the
+# terms of the GNU General Public License, version 3, as published by the Free
+# Software Foundation.
+#
+# HekaFS is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License * along
+# with HekaFS. If not, see <http://www.gnu.org/licenses/>.
+
+import copy
+import string
+import sys
+import types
+
+good_xlators = [
+ "cluster/afr",
+ "cluster/dht",
+ "cluster/distribute",
+ "cluster/replicate",
+ "cluster/stripe",
+ "debug/io-stats",
+ "features/access-control",
+ "features/locks",
+ "features/marker",
+ "features/uidmap",
+ "performance/io-threads",
+ "protocol/client",
+ "protocol/server",
+ "storage/posix",
+]
+
+def copy_stack (old_xl,suffix,recursive=False):
+ if recursive:
+ new_name = old_xl.name + "-" + suffix
+ else:
+ new_name = suffix
+ new_xl = Translator(new_name)
+ new_xl.type = old_xl.type
+ # The results with normal assignment here are . . . amusing.
+ new_xl.opts = copy.deepcopy(old_xl.opts)
+ for sv in old_xl.subvols:
+ new_xl.subvols.append(copy_stack(sv,suffix,True))
+ # Patch up the path at the bottom.
+ if new_xl.type == "storage/posix":
+ new_xl.opts["directory"] += ("/" + suffix)
+ return new_xl
+
+def cleanup (parent, graph):
+ if parent.type in good_xlators:
+ # Temporary fix so that HekaFS volumes can use the
+ # SSL-enabled multi-threaded socket transport.
+ if parent.type == "protocol/server":
+ parent.type = "protocol/server2"
+ parent.opts["transport-type"] = "ssl"
+ elif parent.type == "protocol/client":
+ parent.type = "protocol/client2"
+ parent.opts["transport-type"] = "ssl"
+ sv = []
+ for child in parent.subvols:
+ sv.append(cleanup(child,graph))
+ parent.subvols = sv
+ else:
+ parent = cleanup(parent.subvols[0],graph)
+ return parent
+
+class Translator:
+ def __init__ (self, name):
+ self.name = name
+ self.type = ""
+ self.opts = {}
+ self.subvols = []
+ self.dumped = False
+ def __repr__ (self):
+ return "<Translator %s>" % self.name
+
+def load (path):
+ # If it's a string, open it; otherwise, assume it's already a
+ # file-like object (most notably from urllib*).
+ if type(path) in types.StringTypes:
+ fp = file(path,"r")
+ else:
+ fp = path
+ all_xlators = {}
+ xlator = None
+ last_xlator = None
+ while True:
+ text = fp.readline()
+ if text == "":
+ break
+ text = text.split()
+ if not len(text):
+ continue
+ if text[0] == "volume":
+ if xlator:
+ raise RuntimeError, "nested volume definition"
+ xlator = Translator(text[1])
+ continue
+ if not xlator:
+ raise RuntimeError, "text outside volume definition"
+ if text[0] == "type":
+ xlator.type = text[1]
+ continue
+ if text[0] == "option":
+ xlator.opts[text[1]] = string.join(text[2:])
+ continue
+ if text[0] == "subvolumes":
+ for sv in text[1:]:
+ xlator.subvols.append(all_xlators[sv])
+ continue
+ if text[0] == "end-volume":
+ all_xlators[xlator.name] = xlator
+ last_xlator = xlator
+ xlator = None
+ continue
+ raise RuntimeError, "unrecognized keyword %s" % text[0]
+ if xlator:
+ raise RuntimeError, "unclosed volume definition"
+ return all_xlators, last_xlator
+
+def generate (graph, last, stream=sys.stdout):
+ for sv in last.subvols:
+ if not sv.dumped:
+ generate(graph,sv,stream)
+ print >> stream, ""
+ sv.dumped = True
+ print >> stream, "volume %s" % last.name
+ print >> stream, " type %s" % last.type
+ for k, v in last.opts.iteritems():
+ print >> stream, " option %s %s" % (k, v)
+ if last.subvols:
+ print >> stream, " subvolumes %s" % string.join(
+ [ sv.name for sv in last.subvols ])
+ print >> stream, "end-volume"
+
+def push_filter (graph, old_xl, filt_type, opts={}):
+ suffix = "-" + old_xl.type.split("/")[1]
+ if len(old_xl.name) > len(suffix):
+ if old_xl.name[-len(suffix):] == suffix:
+ old_xl.name = old_xl.name[:-len(suffix)]
+ new_xl = Translator(old_xl.name+suffix)
+ new_xl.type = old_xl.type
+ new_xl.opts = old_xl.opts
+ new_xl.subvols = old_xl.subvols
+ graph[new_xl.name] = new_xl
+ old_xl.name += ("-" + filt_type.split("/")[1])
+ old_xl.type = filt_type
+ old_xl.opts = opts
+ old_xl.subvols = [new_xl]
+ graph[old_xl.name] = old_xl
+
+def delete (graph, victim):
+ if len(victim.subvols) != 1:
+ raise RuntimeError, "attempt to delete non-unary translator"
+ for xl in graph.itervalues():
+ while xl.subvols.count(victim):
+ i = xl.subvols.index(victim)
+ xl.subvols[i] = victim.subvols[0]
+
+if __name__ == "__main__":
+ graph, last = load(sys.argv[1])
+ generate(graph,last)
diff --git a/extras/who-wrote-glusterfs/gitdm.aliases b/extras/who-wrote-glusterfs/gitdm.aliases
new file mode 100644
index 000000000..784a3e3bc
--- /dev/null
+++ b/extras/who-wrote-glusterfs/gitdm.aliases
@@ -0,0 +1,48 @@
+#
+# This is the email aliases file, mapping secondary addresses onto a single,
+# canonical address. This file should probably match the contents of .mailmap
+# in the root of the git repository.
+#
+# Format: <alias> <real>
+
+amar@gluster.com amarts@redhat.com
+amar@del.gluster.com amarts@redhat.com
+avati@amp.gluster.com avati@redhat.com
+avati@blackhole.gluster.com avati@redhat.com
+avati@dev.gluster.com avati@redhat.com
+avati@gluster.com avati@redhat.com
+wheelear@gmail.com awheeler@redhat.com
+anush@gluster.com ashetty@redhat.com
+csaba@gluster.com csaba@redhat.com
+csaba@lowlife.hu csaba@redhat.com
+csaba@zresearch.com csaba@redhat.com
+harsha@gluster.com fharshav@redhat.com
+harsha@zresearch.com fharshav@redhat.com
+harsha@dev.gluster.com fharshav@redhat.com
+harsha@harshavardhana.net fharshav@redhat.com
+kkeithle@f16node1.kkeithle.usersys.redhat.com kkeithle@redhat.com
+kaushal@gluster.com kaushal@redhat.com
+kaushikbv@gluster.com kbudiger@redhat.com
+krishna@gluster.com ksriniva@redhat.com
+krishna@zresearch.com ksriniva@redhat.com
+krishna@guest-laptop ksriniva@redhat.com
+kp@gluster.com kparthas@redhat.com
+me@louiszuckerman.com louiszuckerman@gmail.com
+msvbhat@gmail.com vbhat@redhat.com
+vishwanath@gluster.com vbhat@redhat.com
+pavan@dev.gluster.com pavan@gluster.com
+zaitcev@yahoo.com zaitcev@kotori.zaitcev.us
+pranithk@gluster.com pkarampu@redhat.com
+raghavendrabhat@gluster.com raghavendra@redhat.com
+raghavendra@gluster.com rgowdapp@redhat.com
+raghavendra@zresearch.com rgowdapp@redhat.com
+rahulcssjce@gmail.com rahulcs@redhat.com
+rajesh@gluster.com rajesh@redhat.com
+rajesh.amaravathi@gmail.com rajesh@redhat.com
+shehjart@zresearch.com shehjart@gluster.com
+venky@gluster.com vshankar@redhat.com
+vijay@gluster.com vbellur@redhat.com
+vijay@dev.gluster.com vbellur@redhat.com
+vijaykumar.koppad@gmail.com vkoppad@redhat.com
+vikas@zresearch.com vikas@gluster.com
+shishirng@gluster.com sgowda@redhat.com
diff --git a/extras/who-wrote-glusterfs/gitdm.config b/extras/who-wrote-glusterfs/gitdm.config
new file mode 100644
index 000000000..e1ff2bd5b
--- /dev/null
+++ b/extras/who-wrote-glusterfs/gitdm.config
@@ -0,0 +1,8 @@
+#
+# This is the gitdm configuration file for GlusterFS.
+# See the gitdm.config in the gitdm repositofy for additional options and
+# comments.
+#
+
+EmailAliases gitdm.aliases
+EmailMap gitdm.domain-map
diff --git a/extras/who-wrote-glusterfs/gitdm.domain-map b/extras/who-wrote-glusterfs/gitdm.domain-map
new file mode 100644
index 000000000..f1c305898
--- /dev/null
+++ b/extras/who-wrote-glusterfs/gitdm.domain-map
@@ -0,0 +1,15 @@
+#
+# Here is a set of mappings of domain names onto employer names.
+#
+active.by ActiveCloud
+cern.ch CERN
+gluster.com Red Hat
+gooddata.com GoodData
+hastexo.com hastexo
+ibm.com IBM
+linbit.com LINBIT
+netbsd.org NetBSD
+netdirect.ca Net Direct
+redhat.com Red Hat
+stepping-stone.ch stepping stone GmbH
+zresearch.com Red Hat
diff --git a/extras/who-wrote-glusterfs/who-wrote-glusterfs.sh b/extras/who-wrote-glusterfs/who-wrote-glusterfs.sh
new file mode 100755
index 000000000..487f5874b
--- /dev/null
+++ b/extras/who-wrote-glusterfs/who-wrote-glusterfs.sh
@@ -0,0 +1,50 @@
+#!/bin/sh
+#
+# Gather statistics on "Who wrote GlusterFS". The idea comes from the excellent
+# articles on http://lwn.net/ named "Who wrote <linux-version>?".
+#
+# gitdm comes from git://git.lwn.net/gitdm.git by Jonathan Corbet.
+#
+# Confguration files used:
+# - gitdm.config: main configuration file, pointing to the others
+# - gitdm.aliases: merge users with different emailaddresses into one
+# - gitdm.domain-map: map domain names from emailaddresses to companies
+#
+
+DIRNAME=$(dirname $0)
+
+GITDM_REPO=git://git.lwn.net/gitdm.git
+GITDM_DIR=${DIRNAME}/gitdm
+GITDM_CMD="python ${GITDM_DIR}/gitdm"
+
+error()
+{
+ local ret=${?}
+ printf "${@}\n" > /dev/stderr
+ return ${ret}
+}
+
+check_gitdm()
+{
+ if [ ! -e "${GITDM_DIR}/gitdm" ]
+ then
+ git clone --quiet git://git.lwn.net/gitdm.git ${DIRNAME}/gitdm
+ fi
+}
+
+# The first argument is the revision-range (see 'git rev-list --help').
+# REV can be empty, and the statistics will be calculated over the whole
+# current branch.
+REV=${1}
+shift
+# all remaining options are passed to gitdm, see the gitdm script for an
+# explanation of the accepted options.
+GITDM_OPTS=${@}
+
+if ! check_gitdm
+then
+ error "Could not find 'gitdm', exiting..."
+ exit 1
+fi
+
+git log --numstat -M ${REV} | ${GITDM_CMD} -b ${DIRNAME} -n ${GITDM_OPTS}
diff --git a/format-patch.sh b/format-patch.sh
deleted file mode 100755
index 86737042f..000000000
--- a/format-patch.sh
+++ /dev/null
@@ -1,60 +0,0 @@
-#!/bin/bash
-
-
-function is_num()
-{
- local num;
-
- num="$1";
-
- [ -z "$(echo $num | sed -e 's/[0-9]//g')" ]
-}
-
-
-function guess_branch()
-{
- local branch;
- local src_branch;
-
- branch=$(git branch | grep '*' | cut -f2 -d' ');
-
- if [ $branch = "master" ] ; then
- src_branch="master";
- else
- src_branch=$(cat .git/logs/refs/heads/$branch | head -n 1 \
- | sed -r -e 's/.*( [^ ]*)$/\1/g' | cut -f2 -d/);
- fi
-
- echo $src_branch
-}
-
-
-function main()
-{
- local branch;
- local bug;
-
- branch=$(guess_branch);
- echo
- echo "Patches are always to be associated with a bug ID. If there is no "
- echo "bug filed in bugzilla for this patch, it is highly suggested to file"
- echo "a new bug with a description and reasoning of this patchset. If this"
- echo "is a new feature, then file a new enhancement bug with a brief "
- echo "summary of the feature as the description."
- echo
- echo -n "Enter bug ID (from http://bugs.gluster.com/): "
- read bug;
-
- [ -z "$bug" ] || is_num $bug || {
- log "bug ID should be a valid bug number";
- exit;
- }
-
- if [ -z "$bug" ]; then
- git format-patch "$@";
- else
- git format-patch --subject-prefix="PATCH BUG:$bug" "$@";
- fi
-}
-
-main "$@"
diff --git a/gen-headers.py b/gen-headers.py
new file mode 100755
index 000000000..ef9fa7711
--- /dev/null
+++ b/gen-headers.py
@@ -0,0 +1,54 @@
+#!/usr/bin/python
+
+import sys
+try:
+ import json
+except ImportError:
+ import simplejson as json
+from string import Template
+
+
+def getLogBook(logFile='error-codes.json'):
+ fp = open(logFile)
+ return json.load(fp)
+
+
+def genCHeader(logBook,
+ infile='gf-error-codes.h.template',
+ outfile='gf-error-codes.h'):
+ fp = open('gf-error-codes.h.template')
+ s = fp.read()
+ fp.close()
+ template = Template(s)
+
+ defineLines = []
+ caseLines = []
+ for name, value in logBook.iteritems():
+ nameDef = "GF_%s" % (name.upper(),)
+ code = value['code']
+ msgNameDef = "%s_MSG" % (nameDef,)
+ msg = value['message']['en']
+
+ defineLines.append("#define %-20s %d" % (nameDef, code))
+ defineLines.append("#define %-20s %s" % (msgNameDef,
+ json.dumps(msg)))
+ caseLines.append(" case %s: return _(%s);" % \
+ (nameDef, msgNameDef))
+
+ d = {'DEFINES': "\n".join(defineLines),
+ 'CASES': "\n".join(caseLines)}
+ #print template.substitute(d)
+
+ fp = open(outfile, 'w')
+ fp.write(template.substitute(d))
+ fp.close()
+
+
+if __name__ == "__main__":
+ try:
+ logBook = getLogBook()
+ genCHeader(logBook)
+ sys.exit(0)
+ except IOError, e:
+ print str(e)
+ sys.exit(-1)
diff --git a/geo-replication/Makefile.am b/geo-replication/Makefile.am
new file mode 100644
index 000000000..556951d9f
--- /dev/null
+++ b/geo-replication/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = syncdaemon src
+
+CLEANFILES =
diff --git a/geo-replication/src/Makefile.am b/geo-replication/src/Makefile.am
new file mode 100644
index 000000000..324d8869f
--- /dev/null
+++ b/geo-replication/src/Makefile.am
@@ -0,0 +1,33 @@
+
+gsyncddir = $(libexecdir)/glusterfs
+
+gsyncd_SCRIPTS = gverify.sh peer_add_secret_pub peer_gsec_create
+
+# peer_gsec_create and peer_add_secret_pub are not added to
+# EXTRA_DIST as it's derived from a .in file
+EXTRA_DIST = gverify.sh
+
+gsyncd_PROGRAMS = gsyncd
+
+gsyncd_SOURCES = gsyncd.c procdiggy.c
+
+gsyncd_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
+ $(GF_GLUSTERFS_LIBS)
+
+gsyncd_LDFLAGS = $(GF_LDFLAGS)
+
+noinst_HEADERS = procdiggy.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) \
+ -I$(top_srcdir)/libglusterfs/src\
+ -DGSYNCD_PREFIX=\"$(libexecdir)/glusterfs\"\
+ -DUSE_LIBGLUSTERFS\
+ -DSBIN_DIR=\"$(sbindir)\" -DPYTHON=\"$(PYTHON)\"
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+
+CLEANFILES =
+
+$(top_builddir)/libglusterfs/src/libglusterfs.la:
+ $(MAKE) -C $(top_builddir)/libglusterfs/src/ all
diff --git a/geo-replication/src/gsyncd.c b/geo-replication/src/gsyncd.c
new file mode 100644
index 000000000..0830e7f9b
--- /dev/null
+++ b/geo-replication/src/gsyncd.c
@@ -0,0 +1,408 @@
+/*
+ Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/param.h> /* for PATH_MAX */
+
+/* NOTE (USE_LIBGLUSTERFS):
+ * ------------------------
+ * When USE_LIBGLUSTERFS debugging sumbol is passed; perform
+ * glusterfs translator like initialization so that glusterfs
+ * globals, contexts are valid when glustefs api's are invoked.
+ * We unconditionally pass then while building gsyncd binary.
+ */
+#ifdef USE_LIBGLUSTERFS
+#include "glusterfs.h"
+#include "globals.h"
+#endif
+
+#include "common-utils.h"
+#include "run.h"
+#include "procdiggy.h"
+
+#define _GLUSTERD_CALLED_ "_GLUSTERD_CALLED_"
+#define _GSYNCD_DISPATCHED_ "_GSYNCD_DISPATCHED_"
+#define GSYNCD_CONF_TEMPLATE "geo-replication/gsyncd_template.conf"
+#define GSYNCD_PY "gsyncd.py"
+#define RSYNC "rsync"
+
+int restricted = 0;
+
+static int
+duplexpand (void **buf, size_t tsiz, size_t *len)
+{
+ size_t osiz = tsiz * *len;
+ char *p = realloc (*buf, osiz << 1);
+ if (!p) {
+ free(*buf);
+ return -1;
+ }
+
+ memset (p + osiz, 0, osiz);
+ *buf = p;
+ *len <<= 1;
+
+ return 0;
+}
+
+static int
+str2argv (char *str, char ***argv)
+{
+ char *p = NULL;
+ char *savetok = NULL;
+ int argc = 0;
+ size_t argv_len = 32;
+ int ret = 0;
+
+ assert (str);
+ str = strdup (str);
+ if (!str)
+ return -1;
+
+ *argv = calloc (argv_len, sizeof (**argv));
+ if (!*argv)
+ goto error;
+
+ while ((p = strtok_r (str, " ", &savetok))) {
+ str = NULL;
+
+ argc++;
+ if (argc == argv_len) {
+ ret = duplexpand ((void *)argv,
+ sizeof (**argv),
+ &argv_len);
+ if (ret == -1)
+ goto error;
+ }
+ (*argv)[argc - 1] = p;
+ }
+
+ return argc;
+
+ error:
+ fprintf (stderr, "out of memory\n");
+ return -1;
+}
+
+static int
+invoke_gsyncd (int argc, char **argv)
+{
+ char config_file[PATH_MAX] = {0,};
+ size_t gluster_workdir_len = 0;
+ runner_t runner = {0,};
+ int i = 0;
+ int j = 0;
+ char *nargv[argc + 4];
+ char *python = NULL;
+
+ if (restricted) {
+ size_t len;
+ /* in restricted mode we forcibly use the system-wide config */
+ runinit (&runner);
+ runner_add_args (&runner, SBIN_DIR"/gluster",
+ "--log-file=-", "system::", "getwd",
+ NULL);
+ runner_redir (&runner, STDOUT_FILENO, RUN_PIPE);
+ if (runner_start (&runner) == 0 &&
+ fgets (config_file, PATH_MAX,
+ runner_chio (&runner, STDOUT_FILENO)) != NULL &&
+ (len = strlen (config_file)) &&
+ config_file[len - 1] == '\n' &&
+ runner_end (&runner) == 0)
+ gluster_workdir_len = len - 1;
+
+ if (gluster_workdir_len) {
+ if (gluster_workdir_len + 1 + strlen (GSYNCD_CONF_TEMPLATE) + 1 >
+ PATH_MAX)
+ goto error;
+ config_file[gluster_workdir_len] = '/';
+ strcat (config_file, GSYNCD_CONF_TEMPLATE);
+ } else
+ goto error;
+
+ if (setenv ("_GSYNCD_RESTRICTED_", "1", 1) == -1)
+ goto error;
+ }
+
+ if (chdir ("/") == -1)
+ goto error;
+
+ j = 0;
+ python = getenv("PYTHON");
+ if(!python)
+ python = PYTHON;
+ nargv[j++] = python;
+ nargv[j++] = GSYNCD_PREFIX"/python/syncdaemon/"GSYNCD_PY;
+ for (i = 1; i < argc; i++)
+ nargv[j++] = argv[i];
+ if (config_file[0]) {
+ nargv[j++] = "-c";
+ nargv[j++] = config_file;
+ }
+ nargv[j++] = NULL;
+
+ execvp (python, nargv);
+
+ fprintf (stderr, "exec of '%s' failed\n", python);
+ return 127;
+
+ error:
+ fprintf (stderr, "gsyncd initializaion failed\n");
+ return 1;
+}
+
+
+static int
+find_gsyncd (pid_t pid, pid_t ppid, char *name, void *data)
+{
+ char buf[NAME_MAX * 2] = {0,};
+ char path[PATH_MAX] = {0,};
+ char *p = NULL;
+ int zeros = 0;
+ int ret = 0;
+ int fd = -1;
+ pid_t *pida = (pid_t *)data;
+
+ if (ppid != pida[0])
+ return 0;
+
+ sprintf (path, PROC"/%d/cmdline", pid);
+ fd = open (path, O_RDONLY);
+ if (fd == -1)
+ return 0;
+ ret = read (fd, buf, sizeof (buf));
+ close (fd);
+ if (ret == -1)
+ return 0;
+ for (zeros = 0, p = buf; zeros < 2 && p < buf + ret; p++)
+ zeros += !*p;
+
+ ret = 0;
+ switch (zeros) {
+ case 2:
+ if ((strcmp (basename (buf), basename (PYTHON)) ||
+ strcmp (basename (buf + strlen (buf) + 1), GSYNCD_PY)) == 0) {
+ ret = 1;
+ break;
+ }
+ /* fallthrough */
+ case 1:
+ if (strcmp (basename (buf), GSYNCD_PY) == 0)
+ ret = 1;
+ }
+
+ if (ret == 1) {
+ if (pida[1] != -1) {
+ fprintf (stderr, GSYNCD_PY" sibling is not unique");
+ return -1;
+ }
+ pida[1] = pid;
+ }
+
+ return 0;
+}
+
+static int
+invoke_rsync (int argc, char **argv)
+{
+ int i = 0;
+ char path[PATH_MAX] = {0,};
+ pid_t pid = -1;
+ pid_t ppid = -1;
+ pid_t pida[] = {-1, -1};
+ char *name = NULL;
+ char buf[PATH_MAX + 1] = {0,};
+ int ret = 0;
+
+ assert (argv[argc] == NULL);
+
+ if (argc < 2 || strcmp (argv[1], "--server") != 0)
+ goto error;
+
+ for (i = 2; i < argc && argv[i][0] == '-'; i++);
+
+ if (!(i == argc - 2 && strcmp (argv[i], ".") == 0 && argv[i + 1][0] == '/')) {
+ fprintf (stderr, "need an rsync invocation without protected args\n");
+ goto error;
+ }
+
+ /* look up sshd we are spawned from */
+ for (pid = getpid () ;; pid = ppid) {
+ ppid = pidinfo (pid, &name);
+ if (ppid < 0) {
+ fprintf (stderr, "sshd ancestor not found\n");
+ goto error;
+ }
+ if (strcmp (name, "sshd") == 0) {
+ GF_FREE (name);
+ break;
+ }
+ GF_FREE (name);
+ }
+ /* look up "ssh-sibling" gsyncd */
+ pida[0] = pid;
+ ret = prociter (find_gsyncd, pida);
+ if (ret == -1 || pida[1] == -1) {
+ fprintf (stderr, "gsyncd sibling not found\n");
+ goto error;
+ }
+ /* check if rsync target matches gsyncd target */
+ sprintf (path, PROC"/%d/cwd", pida[1]);
+ ret = readlink (path, buf, sizeof (buf));
+ if (ret == -1 || ret == sizeof (buf))
+ goto error;
+ if (strcmp (argv[argc - 1], "/") == 0 /* root dir cannot be a target */ ||
+ (strcmp (argv[argc - 1], path) /* match against gluster target */ &&
+ strcmp (argv[argc - 1], buf) /* match against file target */) != 0) {
+ fprintf (stderr, "rsync target does not match "GEOREP" session\n");
+ goto error;
+ }
+
+ argv[0] = RSYNC;
+
+ execvp (RSYNC, argv);
+
+ fprintf (stderr, "exec of "RSYNC" failed\n");
+ return 127;
+
+ error:
+ fprintf (stderr, "disallowed "RSYNC" invocation\n");
+ return 1;
+}
+
+static int
+invoke_gluster (int argc, char **argv)
+{
+ int i = 0;
+ int j = 0;
+ int optsover = 0;
+ char *ov = NULL;
+
+ for (i = 1; i < argc; i++) {
+ ov = strtail (argv[i], "--");
+ if (ov && !optsover) {
+ if (*ov == '\0')
+ optsover = 1;
+ continue;
+ }
+ switch (++j) {
+ case 1:
+ if (strcmp (argv[i], "volume") != 0)
+ goto error;
+ break;
+ case 2:
+ if (strcmp (argv[i], "info") != 0)
+ goto error;
+ break;
+ case 3:
+ break;
+ default:
+ goto error;
+ }
+ }
+
+ argv[0] = "gluster";
+ execvp (SBIN_DIR"/gluster", argv);
+ fprintf (stderr, "exec of gluster failed\n");
+ return 127;
+
+ error:
+ fprintf (stderr, "disallowed gluster invocation\n");
+ return 1;
+}
+
+struct invocable {
+ char *name;
+ int (*invoker) (int argc, char **argv);
+};
+
+struct invocable invocables[] = {
+ { "rsync", invoke_rsync },
+ { "gsyncd", invoke_gsyncd },
+ { "gluster", invoke_gluster },
+ { NULL, NULL}
+};
+
+int
+main (int argc, char **argv)
+{
+ char *evas = NULL;
+ struct invocable *i = NULL;
+ char *b = NULL;
+ char *sargv = NULL;
+
+#ifdef USE_LIBGLUSTERFS
+ glusterfs_ctx_t *ctx = NULL;
+
+ ctx = glusterfs_ctx_new ();
+ if (!ctx)
+ return ENOMEM;
+
+ if (glusterfs_globals_init (ctx))
+ return 1;
+
+ THIS->ctx = ctx;
+#endif
+
+ evas = getenv (_GLUSTERD_CALLED_);
+ if (evas && strcmp (evas, "1") == 0)
+ /* OK, we know glusterd called us, no need to look for further config
+ * ... altough this conclusion should not inherit to our children
+ */
+ unsetenv (_GLUSTERD_CALLED_);
+ else {
+ /* we regard all gsyncd invocations unsafe
+ * that do not come from glusterd and
+ * therefore restrict it
+ */
+ restricted = 1;
+
+ if (!getenv (_GSYNCD_DISPATCHED_)) {
+ evas = getenv ("SSH_ORIGINAL_COMMAND");
+ if (evas)
+ sargv = evas;
+ else {
+ evas = getenv ("SHELL");
+ if (evas && strcmp (basename (evas), "gsyncd") == 0 &&
+ argc == 3 && strcmp (argv[1], "-c") == 0)
+ sargv = argv[2];
+ }
+ }
+
+ }
+
+ if (!(sargv && restricted))
+ return invoke_gsyncd (argc, argv);
+
+ argc = str2argv (sargv, &argv);
+ if (argc == -1 || setenv (_GSYNCD_DISPATCHED_, "1", 1) == -1) {
+ fprintf (stderr, "internal error\n");
+ return 1;
+ }
+
+ b = basename (argv[0]);
+ for (i = invocables; i->name; i++) {
+ if (strcmp (b, i->name) == 0)
+ return i->invoker (argc, argv);
+ }
+
+ fprintf (stderr, "invoking %s in restricted SSH session is not allowed\n",
+ b);
+
+ return 1;
+}
diff --git a/geo-replication/src/gverify.sh b/geo-replication/src/gverify.sh
new file mode 100755
index 000000000..bd1b25f24
--- /dev/null
+++ b/geo-replication/src/gverify.sh
@@ -0,0 +1,160 @@
+#!/bin/bash
+
+# Script to verify the Master and Slave Gluster compatibility.
+# To use ./gverify <master volume> <slave host> <slave volume>
+# Returns 0 if master and slave compatible.
+
+# Considering buffer_size 100MB
+BUFFER_SIZE=104857600;
+slave_log_file=`gluster --print-logdir`/geo-replication-slaves/slave.log
+
+function SSHM()
+{
+ ssh -q \
+ -oPasswordAuthentication=no \
+ -oStrictHostKeyChecking=no \
+ -oControlMaster=yes \
+ "$@";
+}
+
+function cmd_master()
+{
+ VOL=$1;
+ local cmd_line;
+ cmd_line=$(cat <<EOF
+function do_verify() {
+v=\$1;
+d=\$(mktemp -d 2>/dev/null);
+glusterfs -s localhost --xlator-option="*dht.lookup-unhashed=off" --volfile-id \$v -l $slave_log_file \$d;
+i=\$(stat -c "%i" \$d);
+if [[ "\$i" -ne "1" ]]; then
+echo 0:0;
+exit 1;
+fi;
+cd \$d;
+available_size=\$(df \$d | tail -1 | awk "{print \\\$2}");
+umount -l \$d;
+rmdir \$d;
+ver=\$(gluster --version | head -1 | cut -f2 -d " ");
+echo \$available_size:\$ver;
+};
+cd /tmp;
+[ x$VOL != x ] && do_verify $VOL;
+EOF
+);
+
+echo $cmd_line;
+}
+
+function cmd_slave()
+{
+ VOL=$1;
+ local cmd_line;
+ cmd_line=$(cat <<EOF
+function do_verify() {
+v=\$1;
+d=\$(mktemp -d 2>/dev/null);
+glusterfs -s localhost --xlator-option="*dht.lookup-unhashed=off" --volfile-id \$v -l $slave_log_file \$d;
+i=\$(stat -c "%i" \$d);
+if [[ "\$i" -ne "1" ]]; then
+echo 0:0;
+exit 1;
+fi;
+cd \$d;
+available_size=\$(df \$d | tail -1 | awk "{print \\\$4}");
+no_of_files=\$(find \$d -maxdepth 0 -empty);
+umount -l \$d;
+rmdir \$d;
+ver=\$(gluster --version | head -1 | cut -f2 -d " ");
+echo \$available_size:\$ver:\$no_of_files:;
+};
+cd /tmp;
+[ x$VOL != x ] && do_verify $VOL;
+EOF
+);
+
+echo $cmd_line;
+}
+
+function master_stats()
+{
+ MASTERVOL=$1;
+ local cmd_line;
+ cmd_line=$(cmd_master $MASTERVOL);
+ bash -c "$cmd_line";
+}
+
+
+function slave_stats()
+{
+ SLAVEHOST=$1;
+ SLAVEVOL=$2;
+ local cmd_line;
+ cmd_line=$(cmd_slave $SLAVEVOL);
+ SSHM $SLAVEHOST bash -c "'$cmd_line'";
+}
+
+
+function main()
+{
+ log_file=$4
+ > $log_file
+
+ # Use FORCE_BLOCKER flag in the error message to differentiate
+ # between the errors which the force command should bypass
+
+ ping -w 5 $2;
+ if [ $? -ne 0 ]; then
+ echo "FORCE_BLOCKER|$2 not reachable." > $log_file
+ exit 1;
+ fi;
+
+ ssh -oNumberOfPasswordPrompts=0 $2 "echo Testing_Passwordless_SSH";
+ if [ $? -ne 0 ]; then
+ echo "FORCE_BLOCKER|Passwordless ssh login has not been setup with $2." > $log_file
+ exit 1;
+ fi;
+
+ ERRORS=0;
+ master_data=$(master_stats $1);
+ slave_data=$(slave_stats $2 $3);
+ master_size=$(echo $master_data | cut -f1 -d':');
+ slave_size=$(echo $slave_data | cut -f1 -d':');
+ master_version=$(echo $master_data | cut -f2 -d':');
+ slave_version=$(echo $slave_data | cut -f2 -d':');
+ slave_no_of_files=$(echo $slave_data | cut -f3 -d':');
+
+ if [[ "x$master_size" = "x" || "x$master_version" = "x" || "$master_size" -eq "0" ]]; then
+ echo "FORCE_BLOCKER|Unable to fetch master volume details. Please check the master cluster and master volume." > $log_file;
+ exit 1;
+ fi;
+
+ if [[ "x$slave_size" = "x" || "x$slave_version" = "x" || "$slave_size" -eq "0" ]]; then
+ echo "FORCE_BLOCKER|Unable to fetch slave volume details. Please check the slave cluster and slave volume." > $log_file;
+ exit 1;
+ fi;
+
+ # The above checks are mandatory and force command should be blocked
+ # if they fail. The checks below can be bypassed if force option is
+ # provided hence no FORCE_BLOCKER flag.
+
+ if [ ! $slave_size -ge $(($master_size - $BUFFER_SIZE )) ]; then
+ echo "Total size of master is greater than available size of slave." >> $log_file;
+ ERRORS=$(($ERRORS + 1));
+ fi;
+
+ if [ -z $slave_no_of_files ]; then
+ echo "$2::$3 is not empty. Please delete existing files in $2::$3 and retry, or use force to continue without deleting the existing files." >> $log_file;
+ ERRORS=$(($ERRORS + 1));
+ fi;
+
+ if [[ $master_version > $slave_version ]]; then
+ echo "Gluster version mismatch between master and slave." >> $log_file;
+ ERRORS=$(($ERRORS + 1));
+ fi;
+
+ exit $ERRORS;
+}
+
+
+main "$@";
diff --git a/geo-replication/src/peer_add_secret_pub.in b/geo-replication/src/peer_add_secret_pub.in
new file mode 100644
index 000000000..c036cf334
--- /dev/null
+++ b/geo-replication/src/peer_add_secret_pub.in
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+if [ ! -d ~/.ssh ]; then
+ mkdir ~/.ssh;
+ chmod 700 ~/.ssh
+ chown root:root ~/.ssh
+fi
+
+cat "$GLUSTERD_WORKING_DIR"/geo-replication/common_secret.pem.pub >> ~/.ssh/authorized_keys
diff --git a/geo-replication/src/peer_gsec_create.in b/geo-replication/src/peer_gsec_create.in
new file mode 100755
index 000000000..ef630bd44
--- /dev/null
+++ b/geo-replication/src/peer_gsec_create.in
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+
+if [ ! -f "$GLUSTERD_WORKING_DIR"/geo-replication/secret.pem.pub ]; then
+ \rm -rf "$GLUSTERD_WORKING_DIR"/geo-replication/secret.pem*
+ ssh-keygen -N '' -f "$GLUSTERD_WORKING_DIR"/geo-replication/secret.pem > /dev/null
+fi
+
+output=`echo command=\"@libexecdir@/glusterfs/gsyncd\" " "``cat "$GLUSTERD_WORKING_DIR"/geo-replication/secret.pem.pub`
+echo $output
diff --git a/geo-replication/src/procdiggy.c b/geo-replication/src/procdiggy.c
new file mode 100644
index 000000000..1eba414c1
--- /dev/null
+++ b/geo-replication/src/procdiggy.c
@@ -0,0 +1,121 @@
+/*
+ Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include <ctype.h>
+#include <sys/param.h> /* for PATH_MAX */
+
+#include "common-utils.h"
+#include "procdiggy.h"
+
+pid_t
+pidinfo (pid_t pid, char **name)
+{
+ char buf[NAME_MAX * 2] = {0,};
+ FILE *f = NULL;
+ char path[PATH_MAX] = {0,};
+ char *p = NULL;
+ int ret = 0;
+
+ sprintf (path, PROC"/%d/status", pid);
+
+ f = fopen (path, "r");
+ if (!f)
+ return -1;
+
+ if (name)
+ *name = NULL;
+ for (;;) {
+ size_t len;
+ memset (buf, 0, sizeof (buf));
+ if (fgets (buf, sizeof (buf), f) == NULL ||
+ (len = strlen (buf)) == 0 ||
+ buf[len - 1] != '\n') {
+ pid = -1;
+ goto out;
+ }
+ buf[len - 1] = '\0';
+
+ if (name && !*name) {
+ p = strtail (buf, "Name:");
+ if (p) {
+ while (isspace (*++p));
+ *name = gf_strdup (p);
+ if (!*name) {
+ pid = -2;
+ goto out;
+ }
+ continue;
+ }
+ }
+
+ p = strtail (buf, "PPid:");
+ if (p)
+ break;
+ }
+
+ while (isspace (*++p));
+ ret = gf_string2int (p, &pid);
+ if (ret == -1)
+ pid = -1;
+
+ out:
+ fclose (f);
+ if (pid == -1 && name && *name)
+ GF_FREE (name);
+ if (pid == -2)
+ fprintf (stderr, "out of memory\n");
+ return pid;
+}
+
+int
+prociter (int (*proch) (pid_t pid, pid_t ppid, char *tmpname, void *data),
+ void *data)
+{
+ char *name = NULL;
+ DIR *d = NULL;
+ struct dirent *de = NULL;
+ pid_t pid = -1;
+ pid_t ppid = -1;
+ int ret = 0;
+
+ d = opendir (PROC);
+ if (!d)
+ return -1;
+ while (errno = 0, de = readdir (d)) {
+ if (gf_string2int (de->d_name, &pid) != -1 && pid >= 0) {
+ ppid = pidinfo (pid, &name);
+ switch (ppid) {
+ case -1: continue;
+ case -2: ret = -1; break;
+ }
+ ret = proch (pid, ppid, name, data);
+ GF_FREE (name);
+ if (ret)
+ break;
+ }
+ }
+ closedir (d);
+ if (!de && errno) {
+ fprintf (stderr, "failed to traverse "PROC" (%s)\n",
+ strerror (errno));
+ ret = -1;
+ }
+
+ return ret;
+}
diff --git a/geo-replication/src/procdiggy.h b/geo-replication/src/procdiggy.h
new file mode 100644
index 000000000..56dfc4eb2
--- /dev/null
+++ b/geo-replication/src/procdiggy.h
@@ -0,0 +1,20 @@
+/*
+ Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifdef __NetBSD__
+#include <sys/syslimits.h>
+#endif /* __NetBSD__ */
+
+#define PROC "/proc"
+
+pid_t pidinfo (pid_t pid, char **name);
+
+int prociter (int (*proch) (pid_t pid, pid_t ppid, char *name, void *data),
+ void *data);
+
diff --git a/geo-replication/syncdaemon/Makefile.am b/geo-replication/syncdaemon/Makefile.am
new file mode 100644
index 000000000..83f969639
--- /dev/null
+++ b/geo-replication/syncdaemon/Makefile.am
@@ -0,0 +1,7 @@
+syncdaemondir = $(libexecdir)/glusterfs/python/syncdaemon
+
+syncdaemon_PYTHON = gconf.py gsyncd.py __init__.py master.py README.md repce.py \
+ resource.py configinterface.py syncdutils.py monitor.py libcxattr.py \
+ $(top_builddir)/contrib/ipaddr-py/ipaddr.py libgfchangelog.py
+
+CLEANFILES =
diff --git a/geo-replication/syncdaemon/README.md b/geo-replication/syncdaemon/README.md
new file mode 100644
index 000000000..67f346ace
--- /dev/null
+++ b/geo-replication/syncdaemon/README.md
@@ -0,0 +1,58 @@
+gsycnd, the Gluster Syncdaemon
+==============================
+
+REQUIREMENTS
+------------
+
+_gsyncd_ is a program which can operate either in _master_ or in _slave_ mode.
+Requirements are categorized according to this.
+
+* supported OS is GNU/Linux
+* Python >= 2.5, or 2.4 with Ctypes (see below) (both)
+* OpenSSH >= 4.0 (master) / SSH2 compliant sshd (eg. openssh) (slave)
+* rsync (both)
+* glusterfs: with marker and changelog support (master & slave);
+* FUSE: glusterfs fuse module with auxilary gfid based access support
+
+INSTALLATION
+------------
+
+As of now, the supported way of operation is running from the source directory or using the RPMs given.
+
+If you use Python 2.4.x, you need to install the [Ctypes module](http://python.net/crew/theller/ctypes/).
+
+CONFIGURATION
+-------------
+
+gsyncd tunables are a subset of the long command-line options; for listing them,
+type
+
+ gsyncd.py --help
+
+and see the long options up to "--config-file". (The leading double dash should be omitted;
+interim underscores and dashes are interchangeable.) The set of options bear some resemblance
+to those of glusterfs and rsync.
+
+The config file format matches the following syntax:
+
+ <option1>: <value1>
+ <option2>: <value2>
+ # comment
+
+By default (unless specified by the option `-c`), gsyncd looks for config file at _conf/gsyncd_template.conf_
+in the source tree.
+
+USAGE
+-----
+
+gsyncd is a utilitly for continous mirroring, ie. it mirrors master to slave incrementally.
+Assume we have a gluster volume _pop_ at localhost. We try to set up the mirroring for volume
+_pop_ using gsyncd for gluster volume _moz_ on remote machine/cluster @ example.com. The
+respective gsyncd invocations are (demoing some syntax sugaring):
+
+`gsyncd.py :pop example.com::moz`
+
+gsyncd has to be available on both sides; it's location on the remote side has to be specified
+via the "--remote-gsyncd" option (or "remote-gsyncd" config file parameter). (This option can also be
+used for setting options on the remote side, although the suggested mode of operation is to
+set parameters like log file / pid file in the configuration file.)
diff --git a/geo-replication/syncdaemon/__codecheck.py b/geo-replication/syncdaemon/__codecheck.py
new file mode 100644
index 000000000..e3386afba
--- /dev/null
+++ b/geo-replication/syncdaemon/__codecheck.py
@@ -0,0 +1,46 @@
+import os
+import os.path
+import sys
+import tempfile
+import shutil
+
+ipd = tempfile.mkdtemp(prefix = 'codecheck-aux')
+
+try:
+ # add a fake ipaddr module, we don't want to
+ # deal with the real one (just test our code)
+ f = open(os.path.join(ipd, 'ipaddr.py'), 'w')
+ f.write("""
+class IPAddress(object):
+ pass
+class IPNetwork(list):
+ pass
+""")
+ f.close()
+ sys.path.append(ipd)
+
+ fl = os.listdir(os.path.dirname(sys.argv[0]) or '.')
+ fl.sort()
+ for f in fl:
+ if f[-3:] != '.py' or f[0] == '_':
+ continue
+ m = f[:-3]
+ sys.stdout.write('importing %s ...' % m)
+ __import__(m)
+ print(' OK.')
+
+ def sys_argv_set(a):
+ sys.argv = sys.argv[:1] + a
+
+ gsyncd = sys.modules['gsyncd']
+ for a in [['--help'], ['--version'], ['--canonicalize-escape-url', '/foo']]:
+ print('>>> invoking program with args: %s' % ' '.join(a))
+ pid = os.fork()
+ if not pid:
+ sys_argv_set(a)
+ gsyncd.main()
+ _, r = os.waitpid(pid, 0)
+ if r:
+ raise RuntimeError('invocation failed')
+finally:
+ shutil.rmtree(ipd)
diff --git a/geo-replication/syncdaemon/__init__.py b/geo-replication/syncdaemon/__init__.py
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/geo-replication/syncdaemon/__init__.py
diff --git a/geo-replication/syncdaemon/configinterface.py b/geo-replication/syncdaemon/configinterface.py
new file mode 100644
index 000000000..a326e8246
--- /dev/null
+++ b/geo-replication/syncdaemon/configinterface.py
@@ -0,0 +1,224 @@
+try:
+ import ConfigParser
+except ImportError:
+ # py 3
+ import configparser as ConfigParser
+import re
+from string import Template
+
+from syncdutils import escape, unescape, norm, update_file, GsyncdError
+
+SECT_ORD = '__section_order__'
+SECT_META = '__meta__'
+config_version = 2.0
+
+re_type = type(re.compile(''))
+
+
+class MultiDict(object):
+ """a virtual dict-like class which functions as the union of underlying dicts"""
+
+ def __init__(self, *dd):
+ self.dicts = dd
+
+ def __getitem__(self, key):
+ val = None
+ for d in self.dicts:
+ if d.get(key) != None:
+ val = d[key]
+ if val == None:
+ raise KeyError(key)
+ return val
+
+
+class GConffile(object):
+ """A high-level interface to ConfigParser which flattens the two-tiered
+ config layout by implenting automatic section dispatch based on initial
+ parameters.
+
+ Also ensure section ordering in terms of their time of addition -- a compat
+ hack for Python < 2.7.
+ """
+
+ def _normconfig(self):
+ """normalize config keys by s/-/_/g"""
+ for n, s in self.config._sections.items():
+ if n.find('__') == 0:
+ continue
+ s2 = type(s)()
+ for k, v in s.items():
+ if k.find('__') != 0:
+ k = norm(k)
+ s2[k] = v
+ self.config._sections[n] = s2
+
+ def __init__(self, path, peers, *dd):
+ """
+ - .path: location of config file
+ - .config: underlying ConfigParser instance
+ - .peers: on behalf of whom we flatten .config
+ (master, or master-slave url pair)
+ - .auxdicts: template subtituents
+ """
+ self.peers = peers
+ self.path = path
+ self.auxdicts = dd
+ self.config = ConfigParser.RawConfigParser()
+ self.config.read(path)
+ self._normconfig()
+
+ def section(self, rx=False):
+ """get the section name of the section representing .peers in .config"""
+ peers = self.peers
+ if not peers:
+ peers = ['.', '.']
+ rx = True
+ if rx:
+ st = 'peersrx'
+ else:
+ st = 'peers'
+ return ' '.join([st] + [escape(u) for u in peers])
+
+ @staticmethod
+ def parse_section(section):
+ """retrieve peers sequence encoded by section name
+ (as urls or regexen, depending on section type)
+ """
+ sl = section.split()
+ st = sl.pop(0)
+ sl = [unescape(u) for u in sl]
+ if st == 'peersrx':
+ sl = [re.compile(u) for u in sl]
+ return sl
+
+ def ord_sections(self):
+ """Return an ordered list of sections.
+
+ Ordering happens based on the auxiliary
+ SECT_ORD section storing indices for each
+ section added through the config API.
+
+ To not to go corrupt in case of manually
+ written config files, we take care to append
+ also those sections which are not registered
+ in SECT_ORD.
+
+ Needed for python 2.{4,5,6} where ConfigParser
+ cannot yet order sections/options internally.
+ """
+ so = {}
+ if self.config.has_section(SECT_ORD):
+ so = self.config._sections[SECT_ORD]
+ so2 = {}
+ for k, v in so.items():
+ if k != '__name__':
+ so2[k] = int(v)
+ tv = 0
+ if so2:
+ tv = max(so2.values()) + 1
+ ss = [s for s in self.config.sections() if s.find('__') != 0]
+ for s in ss:
+ if s in so.keys():
+ continue
+ so2[s] = tv
+ tv += 1
+ def scmp(x, y):
+ return cmp(*(so2[s] for s in (x, y)))
+ ss.sort(scmp)
+ return ss
+
+ def update_to(self, dct, allow_unresolved=False):
+ """update @dct from key/values of ours.
+
+ key/values are collected from .config by filtering the regexp sections
+ according to match, and from .section. The values are treated as templates,
+ which are substituted from .auxdicts and (in case of regexp sections)
+ match groups.
+ """
+ if not self.peers:
+ raise GsyncdError('no peers given, cannot select matching options')
+ def update_from_sect(sect, mud):
+ for k, v in self.config._sections[sect].items():
+ if k == '__name__':
+ continue
+ if allow_unresolved:
+ dct[k] = Template(v).safe_substitute(mud)
+ else:
+ dct[k] = Template(v).substitute(mud)
+ for sect in self.ord_sections():
+ sp = self.parse_section(sect)
+ if isinstance(sp[0], re_type) and len(sp) == len(self.peers):
+ match = True
+ mad = {}
+ for i in range(len(sp)):
+ m = sp[i].search(self.peers[i])
+ if not m:
+ match = False
+ break
+ for j in range(len(m.groups())):
+ mad['match%d_%d' % (i+1, j+1)] = m.groups()[j]
+ if match:
+ update_from_sect(sect, MultiDict(dct, mad, *self.auxdicts))
+ if self.config.has_section(self.section()):
+ update_from_sect(self.section(), MultiDict(dct, *self.auxdicts))
+
+ def get(self, opt=None):
+ """print the matching key/value pairs from .config,
+ or if @opt given, the value for @opt (according to the
+ logic described in .update_to)
+ """
+ d = {}
+ self.update_to(d, allow_unresolved = True)
+ if opt:
+ opt = norm(opt)
+ v = d.get(opt)
+ if v:
+ print(v)
+ else:
+ for k, v in d.iteritems():
+ if k == '__name__':
+ continue
+ print("%s: %s" % (k, v))
+
+ def write(self, trfn, opt, *a, **kw):
+ """update on-disk config transactionally
+
+ @trfn is the transaction function
+ """
+ def mergeconf(f):
+ self.config = ConfigParser.RawConfigParser()
+ self.config.readfp(f)
+ self._normconfig()
+ if not self.config.has_section(SECT_META):
+ self.config.add_section(SECT_META)
+ self.config.set(SECT_META, 'version', config_version)
+ return trfn(norm(opt), *a, **kw)
+ def updateconf(f):
+ self.config.write(f)
+ update_file(self.path, updateconf, mergeconf)
+
+ def _set(self, opt, val, rx=False):
+ """set @opt to @val in .section"""
+ sect = self.section(rx)
+ if not self.config.has_section(sect):
+ self.config.add_section(sect)
+ # regarding SECT_ORD, cf. ord_sections
+ if not self.config.has_section(SECT_ORD):
+ self.config.add_section(SECT_ORD)
+ self.config.set(SECT_ORD, sect, len(self.config._sections[SECT_ORD]))
+ self.config.set(sect, opt, val)
+ return True
+
+ def set(self, opt, *a, **kw):
+ """perform ._set transactionally"""
+ self.write(self._set, opt, *a, **kw)
+
+ def _delete(self, opt, rx=False):
+ """delete @opt from .section"""
+ sect = self.section(rx)
+ if self.config.has_section(sect):
+ return self.config.remove_option(sect, opt)
+
+ def delete(self, opt, *a, **kw):
+ """perform ._delete transactionally"""
+ self.write(self._delete, opt, *a, **kw)
diff --git a/geo-replication/syncdaemon/gconf.py b/geo-replication/syncdaemon/gconf.py
new file mode 100644
index 000000000..fe5795f16
--- /dev/null
+++ b/geo-replication/syncdaemon/gconf.py
@@ -0,0 +1,15 @@
+import os
+
+class GConf(object):
+ """singleton class to store globals
+ shared between gsyncd modules"""
+
+ ssh_ctl_dir = None
+ ssh_ctl_args = None
+ cpid = None
+ pid_file_owned = False
+ log_exit = False
+ permanent_handles = []
+ log_metadata = {}
+
+gconf = GConf()
diff --git a/geo-replication/syncdaemon/gsyncd.py b/geo-replication/syncdaemon/gsyncd.py
new file mode 100644
index 000000000..7fcc3165a
--- /dev/null
+++ b/geo-replication/syncdaemon/gsyncd.py
@@ -0,0 +1,534 @@
+#!/usr/bin/env python
+
+import os
+import os.path
+import glob
+import sys
+import time
+import logging
+import signal
+import shutil
+import optparse
+import fcntl
+import fnmatch
+from optparse import OptionParser, SUPPRESS_HELP
+from logging import Logger, handlers
+from errno import EEXIST, ENOENT
+
+from ipaddr import IPAddress, IPNetwork
+
+from gconf import gconf
+from syncdutils import FreeObject, norm, grabpidfile, finalize, log_raise_exception
+from syncdutils import GsyncdError, select, set_term_handler, privileged, update_file
+from configinterface import GConffile
+import resource
+from monitor import monitor
+
+class GLogger(Logger):
+ """Logger customizations for gsyncd.
+
+ It implements a log format similar to that of glusterfs.
+ """
+
+ def makeRecord(self, name, level, *a):
+ rv = Logger.makeRecord(self, name, level, *a)
+ rv.nsecs = (rv.created - int(rv.created)) * 1000000
+ fr = sys._getframe(4)
+ callee = fr.f_locals.get('self')
+ if callee:
+ ctx = str(type(callee)).split("'")[1].split('.')[-1]
+ else:
+ ctx = '<top>'
+ if not hasattr(rv, 'funcName'):
+ rv.funcName = fr.f_code.co_name
+ rv.lvlnam = logging.getLevelName(level)[0]
+ rv.ctx = ctx
+ return rv
+
+ @classmethod
+ def setup(cls, **kw):
+ lbl = kw.get('label', "")
+ if lbl:
+ lbl = '(' + lbl + ')'
+ lprm = {'datefmt': "%Y-%m-%d %H:%M:%S",
+ 'format': "[%(asctime)s.%(nsecs)d] %(lvlnam)s [%(module)s" + lbl + ":%(lineno)s:%(funcName)s] %(ctx)s: %(message)s"}
+ lprm.update(kw)
+ lvl = kw.get('level', logging.INFO)
+ lprm['level'] = lvl
+ logging.root = cls("root", lvl)
+ logging.setLoggerClass(cls)
+ logging.getLogger().handlers = []
+ logging.getLogger().setLevel(lprm['level'])
+
+ if 'filename' in lprm:
+ try:
+ logging_handler = handlers.WatchedFileHandler(lprm['filename'])
+ formatter = logging.Formatter(fmt=lprm['format'],
+ datefmt=lprm['datefmt'])
+ logging_handler.setFormatter(formatter)
+ logging.getLogger().addHandler(logging_handler)
+ except AttributeError:
+ # Python version < 2.6 will not have WatchedFileHandler
+ # so fallback to logging without any handler.
+ # Note: logrotate will not work if Python version is < 2.6
+ logging.basicConfig(**lprm)
+ else:
+ # If filename not passed(not available in lprm) then it may be
+ # streaming.(Ex: {"stream": "/dev/stdout"})
+ logging.basicConfig(**lprm)
+
+ @classmethod
+ def _gsyncd_loginit(cls, **kw):
+ lkw = {}
+ if gconf.log_level:
+ lkw['level'] = gconf.log_level
+ if kw.get('log_file'):
+ if kw['log_file'] in ('-', '/dev/stderr'):
+ lkw['stream'] = sys.stderr
+ elif kw['log_file'] == '/dev/stdout':
+ lkw['stream'] = sys.stdout
+ else:
+ lkw['filename'] = kw['log_file']
+
+ cls.setup(label=kw.get('label'), **lkw)
+
+ lkw.update({'saved_label': kw.get('label')})
+ gconf.log_metadata = lkw
+ gconf.log_exit = True
+
+def startup(**kw):
+ """set up logging, pidfile grabbing, daemonization"""
+ if getattr(gconf, 'pid_file', None) and kw.get('go_daemon') != 'postconn':
+ if not grabpidfile():
+ sys.stderr.write("pidfile is taken, exiting.\n")
+ sys.exit(2)
+ gconf.pid_file_owned = True
+
+ if kw.get('go_daemon') == 'should':
+ x, y = os.pipe()
+ gconf.cpid = os.fork()
+ if gconf.cpid:
+ os.close(x)
+ sys.exit()
+ os.close(y)
+ os.setsid()
+ dn = os.open(os.devnull, os.O_RDWR)
+ for f in (sys.stdin, sys.stdout, sys.stderr):
+ os.dup2(dn, f.fileno())
+ if getattr(gconf, 'pid_file', None):
+ if not grabpidfile(gconf.pid_file + '.tmp'):
+ raise GsyncdError("cannot grab temporary pidfile")
+ os.rename(gconf.pid_file + '.tmp', gconf.pid_file)
+ # wait for parent to terminate
+ # so we can start up with
+ # no messing from the dirty
+ # ol' bustard
+ select((x,), (), ())
+ os.close(x)
+
+ GLogger._gsyncd_loginit(**kw)
+
+
+def _unlink(path):
+ try:
+ os.unlink(path)
+ except (OSError, IOError):
+ if sys.exc_info()[1].errno == ENOENT:
+ pass
+ else:
+ raise GsyncdError('Unlink error: %s' % path)
+
+
+def main():
+ """main routine, signal/exception handling boilerplates"""
+ gconf.starttime = time.time()
+ set_term_handler()
+ GLogger.setup()
+ excont = FreeObject(exval = 0)
+ try:
+ try:
+ main_i()
+ except:
+ log_raise_exception(excont)
+ finally:
+ finalize(exval = excont.exval)
+
+def main_i():
+ """internal main routine
+
+ parse command line, decide what action will be taken;
+ we can either:
+ - query/manipulate configuration
+ - format gsyncd urls using gsyncd's url parsing engine
+ - start service in following modes, in given stages:
+ - monitor: startup(), monitor()
+ - master: startup(), connect_remote(), connect(), service_loop()
+ - slave: startup(), connect(), service_loop()
+ """
+ rconf = {'go_daemon': 'should'}
+
+ def store_abs(opt, optstr, val, parser):
+ if val and val != '-':
+ val = os.path.abspath(val)
+ setattr(parser.values, opt.dest, val)
+ def store_local(opt, optstr, val, parser):
+ rconf[opt.dest] = val
+ def store_local_curry(val):
+ return lambda o, oo, vx, p: store_local(o, oo, val, p)
+ def store_local_obj(op, dmake):
+ return lambda o, oo, vx, p: store_local(o, oo, FreeObject(op=op, **dmake(vx)), p)
+
+ op = OptionParser(usage="%prog [options...] <master> <slave>", version="%prog 0.0.1")
+ op.add_option('--gluster-command-dir', metavar='DIR', default='')
+ op.add_option('--gluster-log-file', metavar='LOGF', default=os.devnull, type=str, action='callback', callback=store_abs)
+ op.add_option('--gluster-log-level', metavar='LVL')
+ op.add_option('--gluster-params', metavar='PRMS', default='')
+ op.add_option('--glusterd-uuid', metavar='UUID', type=str, default='', help=SUPPRESS_HELP)
+ op.add_option('--gluster-cli-options', metavar='OPTS', default='--log-file=-')
+ op.add_option('--mountbroker', metavar='LABEL')
+ op.add_option('-p', '--pid-file', metavar='PIDF', type=str, action='callback', callback=store_abs)
+ op.add_option('-l', '--log-file', metavar='LOGF', type=str, action='callback', callback=store_abs)
+ op.add_option('--log-file-mbr', metavar='LOGF', type=str, action='callback', callback=store_abs)
+ op.add_option('--state-file', metavar='STATF', type=str, action='callback', callback=store_abs)
+ op.add_option('--state-detail-file', metavar='STATF', type=str, action='callback', callback=store_abs)
+ op.add_option('--ignore-deletes', default=False, action='store_true')
+ op.add_option('--isolated-slave', default=False, action='store_true')
+ op.add_option('--use-rsync-xattrs', default=False, action='store_true')
+ op.add_option('-L', '--log-level', metavar='LVL')
+ op.add_option('-r', '--remote-gsyncd', metavar='CMD', default=os.path.abspath(sys.argv[0]))
+ op.add_option('--volume-id', metavar='UUID')
+ op.add_option('--slave-id', metavar='ID')
+ op.add_option('--session-owner', metavar='ID')
+ op.add_option('--local-id', metavar='ID', help=SUPPRESS_HELP, default='')
+ op.add_option('--local-path', metavar='PATH', help=SUPPRESS_HELP, default='')
+ op.add_option('-s', '--ssh-command', metavar='CMD', default='ssh')
+ op.add_option('--rsync-command', metavar='CMD', default='rsync')
+ op.add_option('--rsync-options', metavar='OPTS', default='')
+ op.add_option('--rsync-ssh-options', metavar='OPTS', default='--compress')
+ op.add_option('--timeout', metavar='SEC', type=int, default=120)
+ op.add_option('--connection-timeout', metavar='SEC', type=int, default=60, help=SUPPRESS_HELP)
+ op.add_option('--sync-jobs', metavar='N', type=int, default=3)
+ op.add_option('--turns', metavar='N', type=int, default=0, help=SUPPRESS_HELP)
+ op.add_option('--allow-network', metavar='IPS', default='')
+ op.add_option('--socketdir', metavar='DIR')
+ op.add_option('--state-socket-unencoded', metavar='SOCKF', type=str, action='callback', callback=store_abs)
+ op.add_option('--checkpoint', metavar='LABEL', default='')
+ # tunables for failover/failback mechanism:
+ # None - gsyncd behaves as normal
+ # blind - gsyncd works with xtime pairs to identify
+ # candidates for synchronization
+ # wrapup - same as normal mode but does not assign
+ # xtimes to orphaned files
+ # see crawl() for usage of the above tunables
+ op.add_option('--special-sync-mode', type=str, help=SUPPRESS_HELP)
+
+ # changelog or xtime? (TODO: Change the default)
+ op.add_option('--change-detector', metavar='MODE', type=str, default='xtime')
+ # sleep interval for change detection (xtime crawl uses a hardcoded 1 second sleep time)
+ op.add_option('--change-interval', metavar='SEC', type=int, default=3)
+ # working directory for changelog based mechanism
+ op.add_option('--working-dir', metavar='DIR', type=str, action='callback', callback=store_abs)
+
+ op.add_option('-c', '--config-file', metavar='CONF', type=str, action='callback', callback=store_local)
+ # duh. need to specify dest or value will be mapped to None :S
+ op.add_option('--monitor', dest='monitor', action='callback', callback=store_local_curry(True))
+ op.add_option('--resource-local', dest='resource_local', type=str, action='callback', callback=store_local)
+ op.add_option('--resource-remote', dest='resource_remote', type=str, action='callback', callback=store_local)
+ op.add_option('--feedback-fd', dest='feedback_fd', type=int, help=SUPPRESS_HELP, action='callback', callback=store_local)
+ op.add_option('--listen', dest='listen', help=SUPPRESS_HELP, action='callback', callback=store_local_curry(True))
+ op.add_option('-N', '--no-daemon', dest="go_daemon", action='callback', callback=store_local_curry('dont'))
+ op.add_option('--verify', type=str, dest="verify", action='callback', callback=store_local)
+ op.add_option('--create', type=str, dest="create", action='callback', callback=store_local)
+ op.add_option('--delete', dest='delete', action='callback', callback=store_local_curry(True))
+ op.add_option('--debug', dest="go_daemon", action='callback', callback=lambda *a: (store_local_curry('dont')(*a),
+ setattr(a[-1].values, 'log_file', '-'),
+ setattr(a[-1].values, 'log_level', 'DEBUG'))),
+ op.add_option('--path', type=str, action='append')
+
+ for a in ('check', 'get'):
+ op.add_option('--config-' + a, metavar='OPT', type=str, dest='config', action='callback',
+ callback=store_local_obj(a, lambda vx: {'opt': vx}))
+ op.add_option('--config-get-all', dest='config', action='callback', callback=store_local_obj('get', lambda vx: {'opt': None}))
+ for m in ('', '-rx', '-glob'):
+ # call this code 'Pythonic' eh?
+ # have to define a one-shot local function to be able to inject (a value depending on the)
+ # iteration variable into the inner lambda
+ def conf_mod_opt_regex_variant(rx):
+ op.add_option('--config-set' + m, metavar='OPT VAL', type=str, nargs=2, dest='config', action='callback',
+ callback=store_local_obj('set', lambda vx: {'opt': vx[0], 'val': vx[1], 'rx': rx}))
+ op.add_option('--config-del' + m, metavar='OPT', type=str, dest='config', action='callback',
+ callback=store_local_obj('del', lambda vx: {'opt': vx, 'rx': rx}))
+ conf_mod_opt_regex_variant(m and m[1:] or False)
+
+ op.add_option('--normalize-url', dest='url_print', action='callback', callback=store_local_curry('normal'))
+ op.add_option('--canonicalize-url', dest='url_print', action='callback', callback=store_local_curry('canon'))
+ op.add_option('--canonicalize-escape-url', dest='url_print', action='callback', callback=store_local_curry('canon_esc'))
+
+ tunables = [ norm(o.get_opt_string()[2:]) for o in op.option_list if o.callback in (store_abs, 'store_true', None) and o.get_opt_string() not in ('--version', '--help') ]
+ remote_tunables = [ 'listen', 'go_daemon', 'timeout', 'session_owner', 'config_file', 'use_rsync_xattrs' ]
+ rq_remote_tunables = { 'listen': True }
+
+ # precedence for sources of values: 1) commandline, 2) cfg file, 3) defaults
+ # -- for this to work out we need to tell apart defaults from explicitly set
+ # options... so churn out the defaults here and call the parser with virgin
+ # values container.
+ defaults = op.get_default_values()
+ opts, args = op.parse_args(values=optparse.Values())
+ args_orig = args[:]
+ r = rconf.get('resource_local')
+ if r:
+ if len(args) == 0:
+ args.append(None)
+ args[0] = r
+ r = rconf.get('resource_remote')
+ if r:
+ if len(args) == 0:
+ raise GsyncdError('local resource unspecfied')
+ elif len(args) == 1:
+ args.append(None)
+ args[1] = r
+ confdata = rconf.get('config')
+ if not (len(args) == 2 or \
+ (len(args) == 1 and rconf.get('listen')) or \
+ (len(args) <= 2 and confdata) or \
+ rconf.get('url_print')):
+ sys.stderr.write("error: incorrect number of arguments\n\n")
+ sys.stderr.write(op.get_usage() + "\n")
+ sys.exit(1)
+
+ verify = rconf.get('verify')
+ if verify:
+ logging.info (verify)
+ logging.info ("Able to spawn gsyncd.py")
+ return
+
+ restricted = os.getenv('_GSYNCD_RESTRICTED_')
+
+ if restricted:
+ allopts = {}
+ allopts.update(opts.__dict__)
+ allopts.update(rconf)
+ bannedtuns = set(allopts.keys()) - set(remote_tunables)
+ if bannedtuns:
+ raise GsyncdError('following tunables cannot be set with restricted SSH invocaton: ' + \
+ ', '.join(bannedtuns))
+ for k, v in rq_remote_tunables.items():
+ if not k in allopts or allopts[k] != v:
+ raise GsyncdError('tunable %s is not set to value %s required for restricted SSH invocaton' % \
+ (k, v))
+
+ confrx = getattr(confdata, 'rx', None)
+ def makersc(aa, check=True):
+ if not aa:
+ return ([], None, None)
+ ra = [resource.parse_url(u) for u in aa]
+ local = ra[0]
+ remote = None
+ if len(ra) > 1:
+ remote = ra[1]
+ if check and not local.can_connect_to(remote):
+ raise GsyncdError("%s cannot work with %s" % (local.path, remote and remote.path))
+ return (ra, local, remote)
+ if confrx:
+ # peers are regexen, don't try to parse them
+ if confrx == 'glob':
+ args = [ '\A' + fnmatch.translate(a) for a in args ]
+ canon_peers = args
+ namedict = {}
+ else:
+ dc = rconf.get('url_print')
+ rscs, local, remote = makersc(args_orig, not dc)
+ if dc:
+ for r in rscs:
+ print(r.get_url(**{'normal': {},
+ 'canon': {'canonical': True},
+ 'canon_esc': {'canonical': True, 'escaped': True}}[dc]))
+ return
+ pa = ([], [], [])
+ urlprms = ({}, {'canonical': True}, {'canonical': True, 'escaped': True})
+ for x in rscs:
+ for i in range(len(pa)):
+ pa[i].append(x.get_url(**urlprms[i]))
+ _, canon_peers, canon_esc_peers = pa
+ # creating the namedict, a dict representing various ways of referring to / repreenting
+ # peers to be fillable in config templates
+ mods = (lambda x: x, lambda x: x[0].upper() + x[1:], lambda x: 'e' + x[0].upper() + x[1:])
+ if remote:
+ rmap = { local: ('local', 'master'), remote: ('remote', 'slave') }
+ else:
+ rmap = { local: ('local', 'slave') }
+ namedict = {}
+ for i in range(len(rscs)):
+ x = rscs[i]
+ for name in rmap[x]:
+ for j in range(3):
+ namedict[mods[j](name)] = pa[j][i]
+ namedict[name + 'vol'] = x.volume
+ if name == 'remote':
+ namedict['remotehost'] = x.remotehost
+ if not 'config_file' in rconf:
+ rconf['config_file'] = os.path.join(os.path.dirname(sys.argv[0]), "conf/gsyncd_template.conf")
+ gcnf = GConffile(rconf['config_file'], canon_peers, defaults.__dict__, opts.__dict__, namedict)
+
+ checkpoint_change = False
+ if confdata:
+ opt_ok = norm(confdata.opt) in tunables + [None]
+ if confdata.op == 'check':
+ if opt_ok:
+ sys.exit(0)
+ else:
+ sys.exit(1)
+ elif not opt_ok:
+ raise GsyncdError("not a valid option: " + confdata.opt)
+ if confdata.op == 'get':
+ gcnf.get(confdata.opt)
+ elif confdata.op == 'set':
+ gcnf.set(confdata.opt, confdata.val, confdata.rx)
+ elif confdata.op == 'del':
+ gcnf.delete(confdata.opt, confdata.rx)
+ # when modifying checkpoint, it's important to make a log
+ # of that, so in that case we go on to set up logging even
+ # if its just config invocation
+ if confdata.opt == 'checkpoint' and confdata.op in ('set', 'del') and \
+ not confdata.rx:
+ checkpoint_change = True
+ if not checkpoint_change:
+ return
+
+ gconf.__dict__.update(defaults.__dict__)
+ gcnf.update_to(gconf.__dict__)
+ gconf.__dict__.update(opts.__dict__)
+ gconf.configinterface = gcnf
+
+ delete = rconf.get('delete')
+ if delete:
+ logging.info ('geo-replication delete')
+ # Delete pid file, status file, socket file
+ cleanup_paths = []
+ if getattr(gconf, 'pid_file', None):
+ cleanup_paths.append(gconf.pid_file)
+
+ if getattr(gconf, 'state_file', None):
+ cleanup_paths.append(gconf.state_file)
+
+ if getattr(gconf, 'state_detail_file', None):
+ cleanup_paths.append(gconf.state_detail_file)
+
+ if getattr(gconf, 'state_socket_unencoded', None):
+ cleanup_paths.append(gconf.state_socket_unencoded)
+
+ cleanup_paths.append(rconf['config_file'][:-11] + "*");
+
+ # Cleanup changelog working dirs
+ if getattr(gconf, 'working_dir', None):
+ try:
+ shutil.rmtree(gconf.working_dir)
+ except (IOError, OSError):
+ if sys.exc_info()[1].errno == ENOENT:
+ pass
+ else:
+ raise GsyncdError('Error while removing working dir: %s' % gconf.working_dir)
+
+ for path in cleanup_paths:
+ # To delete temp files
+ for f in glob.glob(path + "*"):
+ _unlink(f)
+ return
+
+ if restricted and gconf.allow_network:
+ ssh_conn = os.getenv('SSH_CONNECTION')
+ if not ssh_conn:
+ #legacy env var
+ ssh_conn = os.getenv('SSH_CLIENT')
+ if ssh_conn:
+ allowed_networks = [ IPNetwork(a) for a in gconf.allow_network.split(',') ]
+ client_ip = IPAddress(ssh_conn.split()[0])
+ allowed = False
+ for nw in allowed_networks:
+ if client_ip in nw:
+ allowed = True
+ break
+ if not allowed:
+ raise GsyncdError("client IP address is not allowed")
+
+ ffd = rconf.get('feedback_fd')
+ if ffd:
+ fcntl.fcntl(ffd, fcntl.F_SETFD, fcntl.FD_CLOEXEC)
+
+ #normalize loglevel
+ lvl0 = gconf.log_level
+ if isinstance(lvl0, str):
+ lvl1 = lvl0.upper()
+ lvl2 = logging.getLevelName(lvl1)
+ # I have _never_ _ever_ seen such an utterly braindead
+ # error condition
+ if lvl2 == "Level " + lvl1:
+ raise GsyncdError('cannot recognize log level "%s"' % lvl0)
+ gconf.log_level = lvl2
+
+ if not privileged() and gconf.log_file_mbr:
+ gconf.log_file = gconf.log_file_mbr
+
+ if checkpoint_change:
+ try:
+ GLogger._gsyncd_loginit(log_file=gconf.log_file, label='conf')
+ if confdata.op == 'set':
+ logging.info('checkpoint %s set' % confdata.val)
+ elif confdata.op == 'del':
+ logging.info('checkpoint info was reset')
+ except IOError:
+ if sys.exc_info()[1].errno == ENOENT:
+ # directory of log path is not present,
+ # which happens if we get here from
+ # a peer-multiplexed "config-set checkpoint"
+ # (as that directory is created only on the
+ # original node)
+ pass
+ else:
+ raise
+ return
+
+ create = rconf.get('create')
+ if create:
+ if getattr(gconf, 'state_file', None):
+ update_file(gconf.state_file, lambda f: f.write(create + '\n'))
+ return
+
+ go_daemon = rconf['go_daemon']
+ be_monitor = rconf.get('monitor')
+
+ rscs, local, remote = makersc(args)
+ if not be_monitor and isinstance(remote, resource.SSH) and \
+ go_daemon == 'should':
+ go_daemon = 'postconn'
+ log_file = None
+ else:
+ log_file = gconf.log_file
+ if be_monitor:
+ label = 'monitor'
+ elif remote:
+ #master
+ label = gconf.local_path
+ else:
+ label = 'slave'
+ startup(go_daemon=go_daemon, log_file=log_file, label=label)
+ resource.Popen.init_errhandler()
+
+ if be_monitor:
+ return monitor(*rscs)
+
+ logging.info("syncing: %s" % " -> ".join(r.url for r in rscs))
+ if remote:
+ go_daemon = remote.connect_remote(go_daemon=go_daemon)
+ if go_daemon:
+ startup(go_daemon=go_daemon, log_file=gconf.log_file)
+ # complete remote connection in child
+ remote.connect_remote(go_daemon='done')
+ local.connect()
+ if ffd:
+ os.close(ffd)
+ local.service_loop(*[r for r in [remote] if r])
+
+
+if __name__ == "__main__":
+ main()
diff --git a/geo-replication/syncdaemon/libcxattr.py b/geo-replication/syncdaemon/libcxattr.py
new file mode 100644
index 000000000..b5b6956ae
--- /dev/null
+++ b/geo-replication/syncdaemon/libcxattr.py
@@ -0,0 +1,87 @@
+import os
+from ctypes import *
+from ctypes.util import find_library
+
+class Xattr(object):
+ """singleton that wraps the extended attribues system
+ interface for python using ctypes
+
+ Just implement it to the degree we need it, in particular
+ - we need just the l*xattr variants, ie. we never want symlinks to be
+ followed
+ - don't need size discovery for getxattr, as we always know the exact
+ sizes we expect
+ """
+
+ libc = CDLL(find_library("libc"))
+
+ @classmethod
+ def geterrno(cls):
+ return c_int.in_dll(cls.libc, 'errno').value
+
+ @classmethod
+ def raise_oserr(cls):
+ errn = cls.geterrno()
+ raise OSError(errn, os.strerror(errn))
+
+ @classmethod
+ def _query_xattr(cls, path, siz, syscall, *a):
+ if siz:
+ buf = create_string_buffer('\0' * siz)
+ else:
+ buf = None
+ ret = getattr(cls.libc, syscall)(*((path,) + a + (buf, siz)))
+ if ret == -1:
+ cls.raise_oserr()
+ if siz:
+ return buf.raw[:ret]
+ else:
+ return ret
+
+ @classmethod
+ def lgetxattr(cls, path, attr, siz=0):
+ return cls._query_xattr( path, siz, 'lgetxattr', attr)
+
+ @classmethod
+ def lgetxattr_buf(cls, path, attr):
+ """lgetxattr variant with size discovery"""
+ size = cls.lgetxattr(path, attr)
+ if size == -1:
+ cls.raise_oserr()
+ if size == 0:
+ return ''
+ return cls.lgetxattr(path, attr, size)
+
+ @classmethod
+ def llistxattr(cls, path, siz=0):
+ ret = cls._query_xattr(path, siz, 'llistxattr')
+ if isinstance(ret, str):
+ ret = ret.split('\0')
+ return ret
+
+ @classmethod
+ def lsetxattr(cls, path, attr, val):
+ ret = cls.libc.lsetxattr(path, attr, val, len(val), 0)
+ if ret == -1:
+ cls.raise_oserr()
+
+ @classmethod
+ def lsetxattr_l(cls, path, attr, val):
+ """ lazy lsetxattr(): caller handles errno """
+ cls.libc.lsetxattr(path, attr, val, len(val), 0)
+
+ @classmethod
+ def lremovexattr(cls, path, attr):
+ ret = cls.libc.lremovexattr(path, attr)
+ if ret == -1:
+ cls.raise_oserr()
+
+ @classmethod
+ def llistxattr_buf(cls, path):
+ """listxattr variant with size discovery"""
+ size = cls.llistxattr(path)
+ if size == -1:
+ cls.raise_oserr()
+ if size == 0:
+ return []
+ return cls.llistxattr(path, size)
diff --git a/geo-replication/syncdaemon/libgfchangelog.py b/geo-replication/syncdaemon/libgfchangelog.py
new file mode 100644
index 000000000..68ec3baf1
--- /dev/null
+++ b/geo-replication/syncdaemon/libgfchangelog.py
@@ -0,0 +1,64 @@
+import os
+from ctypes import *
+from ctypes.util import find_library
+
+class Changes(object):
+ libgfc = CDLL(find_library("gfchangelog"), use_errno=True)
+
+ @classmethod
+ def geterrno(cls):
+ return get_errno()
+
+ @classmethod
+ def raise_oserr(cls):
+ errn = cls.geterrno()
+ raise OSError(errn, os.strerror(errn))
+
+ @classmethod
+ def _get_api(cls, call):
+ return getattr(cls.libgfc, call)
+
+ @classmethod
+ def cl_register(cls, brick, path, log_file, log_level, retries = 0):
+ ret = cls._get_api('gf_changelog_register')(brick, path,
+ log_file, log_level, retries)
+ if ret == -1:
+ cls.raise_oserr()
+
+ @classmethod
+ def cl_scan(cls):
+ ret = cls._get_api('gf_changelog_scan')()
+ if ret == -1:
+ cls.raise_oserr()
+
+ @classmethod
+ def cl_startfresh(cls):
+ ret = cls._get_api('gf_changelog_start_fresh')()
+ if ret == -1:
+ cls.raise_oserr()
+
+ @classmethod
+ def cl_getchanges(cls):
+ """ remove hardcoding for path name length """
+ def clsort(f):
+ return f.split('.')[-1]
+ changes = []
+ buf = create_string_buffer('\0', 4096)
+ call = cls._get_api('gf_changelog_next_change')
+
+ while True:
+ ret = call(buf, 4096)
+ if ret in (0, -1):
+ break;
+ changes.append(buf.raw[:ret-1])
+ if ret == -1:
+ cls.raise_oserr()
+ # cleanup tracker
+ cls.cl_startfresh()
+ return sorted(changes, key=clsort)
+
+ @classmethod
+ def cl_done(cls, clfile):
+ ret = cls._get_api('gf_changelog_done')(clfile)
+ if ret == -1:
+ cls.raise_oserr()
diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py
new file mode 100644
index 000000000..95810a61e
--- /dev/null
+++ b/geo-replication/syncdaemon/master.py
@@ -0,0 +1,1022 @@
+import os
+import sys
+import time
+import stat
+import random
+import signal
+import json
+import logging
+import socket
+import string
+import errno
+from shutil import copyfileobj
+from errno import ENOENT, ENODATA, EPIPE, EEXIST
+from threading import currentThread, Condition, Lock
+from datetime import datetime
+
+from gconf import gconf
+from tempfile import mkdtemp, NamedTemporaryFile
+from syncdutils import FreeObject, Thread, GsyncdError, boolify, escape, \
+ unescape, select, gauxpfx, md5hex, selfkill, entry2pb, \
+ lstat, errno_wrap
+
+URXTIME = (-1, 0)
+
+# Utility functions to help us to get to closer proximity
+# of the DRY principle (no, don't look for elevated or
+# perspectivistic things here)
+
+def _xtime_now():
+ t = time.time()
+ sec = int(t)
+ nsec = int((t - sec) * 1000000)
+ return (sec, nsec)
+
+def _volinfo_hook_relax_foreign(self):
+ volinfo_sys = self.get_sys_volinfo()
+ fgn_vi = volinfo_sys[self.KFGN]
+ if fgn_vi:
+ expiry = fgn_vi['timeout'] - int(time.time()) + 1
+ logging.info('foreign volume info found, waiting %d sec for expiry' % \
+ expiry)
+ time.sleep(expiry)
+ volinfo_sys = self.get_sys_volinfo()
+ return volinfo_sys
+
+
+# The API!
+
+def gmaster_builder(excrawl=None):
+ """produce the GMaster class variant corresponding
+ to sync mode"""
+ this = sys.modules[__name__]
+ modemixin = gconf.special_sync_mode
+ if not modemixin:
+ modemixin = 'normal'
+ changemixin = isinstance(excrawl, str) and excrawl or gconf.change_detector
+ logging.info('setting up %s change detection mode' % changemixin)
+ modemixin = getattr(this, modemixin.capitalize() + 'Mixin')
+ crawlmixin = getattr(this, 'GMaster' + changemixin.capitalize() + 'Mixin')
+ sendmarkmixin = boolify(gconf.use_rsync_xattrs) and SendmarkRsyncMixin or SendmarkNormalMixin
+ purgemixin = boolify(gconf.ignore_deletes) and PurgeNoopMixin or PurgeNormalMixin
+ class _GMaster(crawlmixin, modemixin, sendmarkmixin, purgemixin):
+ pass
+ return _GMaster
+
+
+# Mixin classes that implement the data format
+# and logic particularities of the certain
+# sync modes
+
+class NormalMixin(object):
+ """normal geo-rep behavior"""
+
+ minus_infinity = URXTIME
+
+ # following staticmethods ideally would be
+ # methods of an xtime object (in particular,
+ # implementing the hooks needed for comparison
+ # operators), but at this point we don't yet
+ # have a dedicated xtime class
+
+ @staticmethod
+ def serialize_xtime(xt):
+ return "%d.%d" % tuple(xt)
+
+ @staticmethod
+ def deserialize_xtime(xt):
+ return tuple(int(x) for x in xt.split("."))
+
+ @staticmethod
+ def native_xtime(xt):
+ return xt
+
+ @staticmethod
+ def xtime_geq(xt0, xt1):
+ return xt0 >= xt1
+
+ def make_xtime_opts(self, is_master, opts):
+ if not 'create' in opts:
+ opts['create'] = is_master
+ if not 'default_xtime' in opts:
+ opts['default_xtime'] = URXTIME
+
+ def xtime_low(self, server, path, **opts):
+ xt = server.xtime(path, self.uuid)
+ if isinstance(xt, int) and xt != ENODATA:
+ return xt
+ if xt == ENODATA or xt < self.volmark:
+ if opts['create']:
+ xt = _xtime_now()
+ server.aggregated.set_xtime(path, self.uuid, xt)
+ else:
+ xt = opts['default_xtime']
+ return xt
+
+ def keepalive_payload_hook(self, timo, gap):
+ # first grab a reference as self.volinfo
+ # can be changed in main thread
+ vi = self.volinfo
+ if vi:
+ # then have a private copy which we can mod
+ vi = vi.copy()
+ vi['timeout'] = int(time.time()) + timo
+ else:
+ # send keep-alives more frequently to
+ # avoid a delay in announcing our volume info
+ # to slave if it becomes established in the
+ # meantime
+ gap = min(10, gap)
+ return (vi, gap)
+
+ def volinfo_hook(self):
+ return self.get_sys_volinfo()
+
+ def xtime_reversion_hook(self, path, xtl, xtr):
+ if xtr > xtl:
+ raise GsyncdError("timestamp corruption for " + path)
+
+ def need_sync(self, e, xte, xtrd):
+ return xte > xtrd
+
+ def set_slave_xtime(self, path, mark):
+ self.slave.server.set_xtime(path, self.uuid, mark)
+ self.slave.server.set_xtime_remote(path, self.uuid, mark)
+
+class PartialMixin(NormalMixin):
+ """a variant tuned towards operation with a master
+ that has partial info of the slave (brick typically)"""
+
+ def xtime_reversion_hook(self, path, xtl, xtr):
+ pass
+
+class RecoverMixin(NormalMixin):
+ """a variant that differs from normal in terms
+ of ignoring non-indexed files"""
+
+ @staticmethod
+ def make_xtime_opts(is_master, opts):
+ if not 'create' in opts:
+ opts['create'] = False
+ if not 'default_xtime' in opts:
+ opts['default_xtime'] = URXTIME
+
+ def keepalive_payload_hook(self, timo, gap):
+ return (None, gap)
+
+ def volinfo_hook(self):
+ return _volinfo_hook_relax_foreign(self)
+
+# Further mixins for certain tunable behaviors
+
+class SendmarkNormalMixin(object):
+
+ def sendmark_regular(self, *a, **kw):
+ return self.sendmark(*a, **kw)
+
+class SendmarkRsyncMixin(object):
+
+ def sendmark_regular(self, *a, **kw):
+ pass
+
+
+class PurgeNormalMixin(object):
+
+ def purge_missing(self, path, names):
+ self.slave.server.purge(path, names)
+
+class PurgeNoopMixin(object):
+
+ def purge_missing(self, path, names):
+ pass
+
+class GMasterCommon(object):
+ """abstract class impementling master role"""
+
+ KFGN = 0
+ KNAT = 1
+
+ def get_sys_volinfo(self):
+ """query volume marks on fs root
+
+ err out on multiple foreign masters
+ """
+ fgn_vis, nat_vi = self.master.server.aggregated.foreign_volume_infos(), \
+ self.master.server.aggregated.native_volume_info()
+ fgn_vi = None
+ if fgn_vis:
+ if len(fgn_vis) > 1:
+ raise GsyncdError("cannot work with multiple foreign masters")
+ fgn_vi = fgn_vis[0]
+ return fgn_vi, nat_vi
+
+ @property
+ def uuid(self):
+ if self.volinfo:
+ return self.volinfo['uuid']
+
+ @property
+ def volmark(self):
+ if self.volinfo:
+ return self.volinfo['volume_mark']
+
+ def xtime(self, path, *a, **opts):
+ """get amended xtime
+
+ as of amending, we can create missing xtime, or
+ determine a valid value if what we get is expired
+ (as of the volume mark expiry); way of amendig
+ depends on @opts and on subject of query (master
+ or slave).
+ """
+ if a:
+ rsc = a[0]
+ else:
+ rsc = self.master
+ self.make_xtime_opts(rsc == self.master, opts)
+ return self.xtime_low(rsc.server, path, **opts)
+
+ def get_initial_crawl_data(self):
+ # while persisting only 'files_syncd' is non-zero, rest of
+ # the stats are nulls. lets keep it that way in case they
+ # are needed to be used some day...
+ default_data = {'files_syncd': 0,
+ 'files_remaining': 0,
+ 'bytes_remaining': 0,
+ 'purges_remaining': 0}
+ if getattr(gconf, 'state_detail_file', None):
+ try:
+ return json.load(open(gconf.state_detail_file))
+ except (IOError, OSError):
+ ex = sys.exc_info()[1]
+ if ex.errno == ENOENT:
+ # Create file with initial data
+ with open(gconf.state_detail_file, 'wb') as f:
+ json.dump(default_data, f)
+ return default_data
+ else:
+ raise
+ return default_data
+
+ def update_crawl_data(self):
+ if getattr(gconf, 'state_detail_file', None):
+ try:
+ same_dir = os.path.dirname(gconf.state_detail_file)
+ with NamedTemporaryFile(dir=same_dir, delete=False) as tmp:
+ json.dump(self.total_crawl_stats, tmp)
+ os.rename(tmp.name, gconf.state_detail_file)
+ except (IOError, OSError):
+ raise
+
+ def __init__(self, master, slave):
+ self.master = master
+ self.slave = slave
+ self.jobtab = {}
+ self.syncer = Syncer(slave)
+ # crawls vs. turns:
+ # - self.crawls is simply the number of crawl() invocations on root
+ # - one turn is a maximal consecutive sequence of crawls so that each
+ # crawl in it detects a change to be synced
+ # - self.turns is the number of turns since start
+ # - self.total_turns is a limit so that if self.turns reaches it, then
+ # we exit (for diagnostic purposes)
+ # so, eg., if the master fs changes unceasingly, self.turns will remain 0.
+ self.crawls = 0
+ self.turns = 0
+ self.total_turns = int(gconf.turns)
+ self.crawl_start = datetime.now()
+ self.lastreport = {'crawls': 0, 'turns': 0, 'time': 0}
+ self.total_crawl_stats = None
+ self.start = None
+ self.change_seen = None
+ # the actual volinfo we make use of
+ self.volinfo = None
+ self.terminate = False
+ self.sleep_interval = 1
+ self.checkpoint_thread = None
+
+ def init_keep_alive(cls):
+ """start the keep-alive thread """
+ timo = int(gconf.timeout or 0)
+ if timo > 0:
+ def keep_alive():
+ while True:
+ vi, gap = cls.keepalive_payload_hook(timo, timo * 0.5)
+ cls.slave.server.keep_alive(vi)
+ time.sleep(gap)
+ t = Thread(target=keep_alive)
+ t.start()
+
+ def should_crawl(cls):
+ return (gconf.glusterd_uuid in cls.master.server.node_uuid())
+
+ def register(self):
+ self.register()
+
+ def crawlwrap(self, oneshot=False):
+ if oneshot:
+ # it's important to do this during the oneshot crawl as
+ # for a passive gsyncd (ie. in a replicate scenario)
+ # the keepalive thread would keep the connection alive.
+ self.init_keep_alive()
+
+ # no need to maintain volinfo state machine.
+ # in a cascading setup, each geo-replication session is
+ # independent (ie. 'volume-mark' and 'xtime' are not
+ # propogated). This is beacuse the slave's xtime is now
+ # stored on the master itself. 'volume-mark' just identifies
+ # that we are in a cascading setup and need to enable
+ # 'geo-replication.ignore-pid-check' option.
+ volinfo_sys = self.volinfo_hook()
+ self.volinfo = volinfo_sys[self.KNAT]
+ inter_master = volinfo_sys[self.KFGN]
+ logging.info("%s master with volume id %s ..." % \
+ (inter_master and "intermediate" or "primary",
+ self.uuid))
+ gconf.configinterface.set('volume_id', self.uuid)
+ if self.volinfo:
+ if self.volinfo['retval']:
+ raise GsyncdError("master is corrupt")
+ self.start_checkpoint_thread()
+ else:
+ raise GsyncdError("master volinfo unavailable")
+ self.total_crawl_stats = self.get_initial_crawl_data()
+ self.lastreport['time'] = time.time()
+ logging.info('crawl interval: %d seconds' % self.sleep_interval)
+
+ t0 = time.time()
+ crawl = self.should_crawl()
+ while not self.terminate:
+ if self.start:
+ logging.debug("... crawl #%d done, took %.6f seconds" % \
+ (self.crawls, time.time() - self.start))
+ self.start = time.time()
+ should_display_info = self.start - self.lastreport['time'] >= 60
+ if should_display_info:
+ logging.info("%d crawls, %d turns",
+ self.crawls - self.lastreport['crawls'],
+ self.turns - self.lastreport['turns'])
+ self.lastreport.update(crawls = self.crawls,
+ turns = self.turns,
+ time = self.start)
+ t1 = time.time()
+ if int(t1 - t0) >= 60: #lets hardcode this check to 60 seconds
+ crawl = self.should_crawl()
+ t0 = t1
+ if not crawl:
+ time.sleep(5)
+ continue
+ self.crawl()
+ if oneshot:
+ return
+ time.sleep(self.sleep_interval)
+
+ @classmethod
+ def _checkpt_param(cls, chkpt, prm, xtimish=True):
+ """use config backend to lookup a parameter belonging to
+ checkpoint @chkpt"""
+ cprm = getattr(gconf, 'checkpoint_' + prm, None)
+ if not cprm:
+ return
+ chkpt_mapped, val = cprm.split(':', 1)
+ if unescape(chkpt_mapped) != chkpt:
+ return
+ if xtimish:
+ val = cls.deserialize_xtime(val)
+ return val
+
+ @classmethod
+ def _set_checkpt_param(cls, chkpt, prm, val, xtimish=True):
+ """use config backend to store a parameter associated
+ with checkpoint @chkpt"""
+ if xtimish:
+ val = cls.serialize_xtime(val)
+ gconf.configinterface.set('checkpoint_' + prm, "%s:%s" % (escape(chkpt), val))
+
+ @staticmethod
+ def humantime(*tpair):
+ """format xtime-like (sec, nsec) pair to human readable format"""
+ ts = datetime.fromtimestamp(float('.'.join(str(n) for n in tpair))).\
+ strftime("%Y-%m-%d %H:%M:%S")
+ if len(tpair) > 1:
+ ts += '.' + str(tpair[1])
+ return ts
+
+ def get_extra_info(self):
+ str_info = '\nUptime=%s;FilesSyncd=%d;FilesPending=%d;BytesPending=%d;DeletesPending=%d;' % \
+ (self._crawl_time_format(datetime.now() - self.crawl_start), \
+ self.total_crawl_stats['files_syncd'], \
+ self.total_crawl_stats['files_remaining'], \
+ self.total_crawl_stats['bytes_remaining'], \
+ self.total_crawl_stats['purges_remaining'])
+ str_info += '\0'
+ logging.debug(str_info)
+ return str_info
+
+ def _crawl_time_format(self, crawl_time):
+ # Ex: 5 years, 4 days, 20:23:10
+ years, days = divmod(crawl_time.days, 365.25)
+ years = int(years)
+ days = int(days)
+
+ date=""
+ m, s = divmod(crawl_time.seconds, 60)
+ h, m = divmod(m, 60)
+
+ if years != 0:
+ date += "%s %s " % (years, "year" if years == 1 else "years")
+ if days != 0:
+ date += "%s %s " % (days, "day" if days == 1 else "days")
+
+ date += "%s:%s:%s" % (string.zfill(h, 2), string.zfill(m, 2), string.zfill(s, 2))
+ return date
+
+ def checkpt_service(self, chan, chkpt, tgt):
+ """checkpoint service loop
+
+ monitor and verify checkpoint status for @chkpt, and listen
+ for incoming requests for whom we serve a pretty-formatted
+ status report"""
+ if not chkpt:
+ # dummy loop for the case when there is no checkpt set
+ while True:
+ select([chan], [], [])
+ conn, _ = chan.accept()
+ conn.send(self.get_extra_info())
+ conn.close()
+ completed = self._checkpt_param(chkpt, 'completed', xtimish=False)
+ if completed:
+ completed = tuple(int(x) for x in completed.split('.'))
+ while True:
+ s,_,_ = select([chan], [], [], (not completed) and 5 or None)
+ # either request made and we re-check to not
+ # give back stale data, or we still hunting for completion
+ if self.native_xtime(tgt) and self.native_xtime(tgt) < self.volmark:
+ # indexing has been reset since setting the checkpoint
+ status = "is invalid"
+ else:
+ xtr = self.xtime('.', self.slave)
+ if isinstance(xtr, int):
+ raise GsyncdError("slave root directory is unaccessible (%s)",
+ os.strerror(xtr))
+ ncompleted = self.xtime_geq(xtr, tgt)
+ if completed and not ncompleted: # stale data
+ logging.warn("completion time %s for checkpoint %s became stale" % \
+ (self.humantime(*completed), chkpt))
+ completed = None
+ gconf.confdata.delete('checkpoint-completed')
+ if ncompleted and not completed: # just reaching completion
+ completed = "%.6f" % time.time()
+ self._set_checkpt_param(chkpt, 'completed', completed, xtimish=False)
+ completed = tuple(int(x) for x in completed.split('.'))
+ logging.info("checkpoint %s completed" % chkpt)
+ status = completed and \
+ "completed at " + self.humantime(completed[0]) or \
+ "not reached yet"
+ if s:
+ conn = None
+ try:
+ conn, _ = chan.accept()
+ try:
+ conn.send(" | checkpoint %s %s %s" % (chkpt, status, self.get_extra_info()))
+ except:
+ exc = sys.exc_info()[1]
+ if (isinstance(exc, OSError) or isinstance(exc, IOError)) and \
+ exc.errno == EPIPE:
+ logging.debug('checkpoint client disconnected')
+ else:
+ raise
+ finally:
+ if conn:
+ conn.close()
+
+ def start_checkpoint_thread(self):
+ """prepare and start checkpoint service"""
+ if self.checkpoint_thread or not (
+ getattr(gconf, 'state_socket_unencoded', None) and getattr(gconf, 'socketdir', None)
+ ):
+ return
+ chan = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+ state_socket = os.path.join(gconf.socketdir, md5hex(gconf.state_socket_unencoded) + ".socket")
+ try:
+ os.unlink(state_socket)
+ except:
+ if sys.exc_info()[0] == OSError:
+ pass
+ chan.bind(state_socket)
+ chan.listen(1)
+ checkpt_tgt = None
+ if gconf.checkpoint:
+ checkpt_tgt = self._checkpt_param(gconf.checkpoint, 'target')
+ if not checkpt_tgt:
+ checkpt_tgt = self.xtime('.')
+ if isinstance(checkpt_tgt, int):
+ raise GsyncdError("master root directory is unaccessible (%s)",
+ os.strerror(checkpt_tgt))
+ self._set_checkpt_param(gconf.checkpoint, 'target', checkpt_tgt)
+ logging.debug("checkpoint target %s has been determined for checkpoint %s" % \
+ (repr(checkpt_tgt), gconf.checkpoint))
+ t = Thread(target=self.checkpt_service, args=(chan, gconf.checkpoint, checkpt_tgt))
+ t.start()
+ self.checkpoint_thread = t
+
+ def add_job(self, path, label, job, *a, **kw):
+ """insert @job function to job table at @path with @label"""
+ if self.jobtab.get(path) == None:
+ self.jobtab[path] = []
+ self.jobtab[path].append((label, a, lambda : job(*a, **kw)))
+
+ def add_failjob(self, path, label):
+ """invoke .add_job with a job that does nothing just fails"""
+ logging.debug('salvaged: ' + label)
+ self.add_job(path, label, lambda: False)
+
+ def wait(self, path, *args):
+ """perform jobs registered for @path
+
+ Reset jobtab entry for @path,
+ determine success as the conjuction of
+ success of all the jobs. In case of
+ success, call .sendmark on @path
+ """
+ jobs = self.jobtab.pop(path, [])
+ succeed = True
+ for j in jobs:
+ ret = j[-1]()
+ if not ret:
+ succeed = False
+ if succeed and not args[0] == None:
+ self.sendmark(path, *args)
+ return succeed
+
+ def sendmark(self, path, mark, adct=None):
+ """update slave side xtime for @path to master side xtime
+
+ also can send a setattr payload (see Server.setattr).
+ """
+ if adct:
+ self.slave.server.setattr(path, adct)
+ self.set_slave_xtime(path, mark)
+
+class GMasterChangelogMixin(GMasterCommon):
+ """ changelog based change detection and syncing """
+
+ # index for change type and entry
+ IDX_START = 0
+ IDX_END = 2
+
+ POS_GFID = 0
+ POS_TYPE = 1
+ POS_ENTRY1 = 2
+ POS_ENTRY2 = 3 # renames
+
+ _CL_TYPE_DATA_PFX = "D "
+ _CL_TYPE_METADATA_PFX = "M "
+ _CL_TYPE_ENTRY_PFX = "E "
+
+ TYPE_GFID = [_CL_TYPE_DATA_PFX] # ignoring metadata ops
+ TYPE_ENTRY = [_CL_TYPE_ENTRY_PFX]
+
+ # flat directory heirarchy for gfid based access
+ FLAT_DIR_HIERARCHY = '.'
+
+ # maximum retries per changelog before giving up
+ MAX_RETRIES = 10
+
+ def fallback_xsync(self):
+ logging.info('falling back to xsync mode')
+ gconf.configinterface.set('change-detector', 'xsync')
+ selfkill()
+
+ def setup_working_dir(self):
+ workdir = os.path.join(gconf.working_dir, md5hex(gconf.local_path))
+ logfile = os.path.join(workdir, 'changes.log')
+ logging.debug('changelog working dir %s (log: %s)' % (workdir, logfile))
+ return (workdir, logfile)
+
+ # update stats from *this* crawl
+ def update_cumulative_stats(self, files_pending):
+ self.total_crawl_stats['files_remaining'] = files_pending['count']
+ self.total_crawl_stats['bytes_remaining'] = files_pending['bytes']
+ self.total_crawl_stats['purges_remaining'] = files_pending['purge']
+
+ # sync data
+ def syncdata(self, datas):
+ logging.debug('datas: %s' % (datas))
+ for data in datas:
+ logging.debug('candidate for syncing %s' % data)
+ pb = self.syncer.add(data)
+ def regjob(se, xte, pb):
+ rv = pb.wait()
+ if rv[0]:
+ logging.debug('synced ' + se)
+ return True
+ else:
+ if rv[1] in [23, 24]:
+ # stat to check if the file exist
+ st = lstat(se)
+ if isinstance(st, int):
+ # file got unlinked in the interim
+ return True
+ logging.warn('Rsync: %s [errcode: %d]' % (se, rv[1]))
+ self.add_job(self.FLAT_DIR_HIERARCHY, 'reg', regjob, data, None, pb)
+ if self.wait(self.FLAT_DIR_HIERARCHY, None):
+ return True
+
+ def process_change(self, change, done, retry):
+ pfx = gauxpfx()
+ clist = []
+ entries = []
+ datas = set()
+
+ # basic crawl stats: files and bytes
+ files_pending = {'count': 0, 'purge': 0, 'bytes': 0, 'files': []}
+ try:
+ f = open(change, "r")
+ clist = f.readlines()
+ f.close()
+ except IOError:
+ raise
+
+ def edct(op, **ed):
+ dct = {}
+ dct['op'] = op
+ for k in ed:
+ if k == 'stat':
+ st = ed[k]
+ dst = dct['stat'] = {}
+ dst['uid'] = st.st_uid
+ dst['gid'] = st.st_gid
+ dst['mode'] = st.st_mode
+ else:
+ dct[k] = ed[k]
+ return dct
+
+ # regular file update: bytes & count
+ def _update_reg(entry, size):
+ if not entry in files_pending['files']:
+ files_pending['count'] += 1
+ files_pending['bytes'] += size
+ files_pending['files'].append(entry)
+ # updates for directories, symlinks etc..
+ def _update_rest():
+ files_pending['count'] += 1
+
+ # entry count
+ def entry_update(entry, size, mode):
+ if stat.S_ISREG(mode):
+ _update_reg(entry, size)
+ else:
+ _update_rest()
+ # purge count
+ def purge_update():
+ files_pending['purge'] += 1
+
+ for e in clist:
+ e = e.strip()
+ et = e[self.IDX_START:self.IDX_END]
+ ec = e[self.IDX_END:].split(' ')
+ if et in self.TYPE_ENTRY:
+ ty = ec[self.POS_TYPE]
+ en = unescape(os.path.join(pfx, ec[self.POS_ENTRY1]))
+ gfid = ec[self.POS_GFID]
+ # definitely need a better way bucketize entry ops
+ if ty in ['UNLINK', 'RMDIR']:
+ purge_update()
+ entries.append(edct(ty, gfid=gfid, entry=en))
+ continue
+ go = os.path.join(pfx, gfid)
+ st = lstat(go)
+ if isinstance(st, int):
+ if ty == 'RENAME':
+ entries.append(edct('UNLINK', gfid=gfid, entry=en))
+ else:
+ logging.debug('file %s got purged in the interim' % go)
+ continue
+ entry_update(go, st.st_size, st.st_mode)
+ if ty in ['CREATE', 'MKDIR', 'MKNOD']:
+ entries.append(edct(ty, stat=st, entry=en, gfid=gfid))
+ elif ty == 'LINK':
+ entries.append(edct(ty, stat=st, entry=en, gfid=gfid))
+ elif ty == 'SYMLINK':
+ rl = errno_wrap(os.readlink, [en], [ENOENT])
+ if isinstance(rl, int):
+ continue
+ entries.append(edct(ty, stat=st, entry=en, gfid=gfid, link=rl))
+ elif ty == 'RENAME':
+ e2 = unescape(os.path.join(pfx, ec[self.POS_ENTRY2]))
+ entries.append(edct(ty, gfid=gfid, entry=en, entry1=e2, stat=st))
+ else:
+ logging.warn('ignoring %s [op %s]' % (gfid, ty))
+ elif et in self.TYPE_GFID:
+ go = os.path.join(pfx, ec[0])
+ st = lstat(go)
+ if isinstance(st, int):
+ logging.debug('file %s got purged in the interim' % go)
+ continue
+ entry_update(go, st.st_size, st.st_mode)
+ datas.update([go])
+ logging.debug('entries: %s' % repr(entries))
+ if not retry:
+ self.update_cumulative_stats(files_pending)
+ # sync namespace
+ if (entries):
+ self.slave.server.entry_ops(entries)
+ # sync data
+ if self.syncdata(datas):
+ if done:
+ self.master.server.changelog_done(change)
+ return True
+
+ def sync_done(self):
+ self.total_crawl_stats['files_syncd'] += self.total_crawl_stats['files_remaining']
+ self.total_crawl_stats['files_remaining'] = 0
+ self.total_crawl_stats['bytes_remaining'] = 0
+ self.total_crawl_stats['purges_remaining'] = 0
+ self.update_crawl_data()
+
+ def process(self, changes, done=1):
+ for change in changes:
+ tries = 0
+ retry = False
+ while True:
+ logging.debug('processing change %s' % change)
+ if self.process_change(change, done, retry):
+ self.sync_done()
+ break
+ retry = True
+ tries += 1
+ if tries == self.MAX_RETRIES:
+ logging.warn('changelog %s could not be processed - moving on...' % os.path.basename(change))
+ self.sync_done()
+ if done:
+ self.master.server.changelog_done(change)
+ break
+ # it's either entry_ops() or Rsync that failed to do it's
+ # job. Mostly it's entry_ops() [which currently has a problem
+ # of failing to create an entry but failing to return an errno]
+ # Therefore we do not know if it's either Rsync or the freaking
+ # entry_ops() that failed... so we retry the _whole_ changelog
+ # again.
+ # TODO: remove entry retries when it's gets fixed.
+ logging.warn('incomplete sync, retrying changelog: %s' % change)
+ time.sleep(0.5)
+ self.turns += 1
+
+ def upd_stime(self, stime):
+ if not stime == URXTIME:
+ self.sendmark(self.FLAT_DIR_HIERARCHY, stime)
+
+ def crawl(self):
+ changes = []
+ try:
+ self.master.server.changelog_scan()
+ self.crawls += 1
+ except OSError:
+ self.fallback_xsync()
+ changes = self.master.server.changelog_getchanges()
+ if changes:
+ xtl = self.xtime(self.FLAT_DIR_HIERARCHY)
+ if isinstance(xtl, int):
+ raise GsyncdError('master is corrupt')
+ logging.debug('processing changes %s' % repr(changes))
+ self.process(changes)
+ self.upd_stime(xtl)
+
+ def register(self):
+ (workdir, logfile) = self.setup_working_dir()
+ self.sleep_interval = int(gconf.change_interval)
+ # register with the changelog library
+ try:
+ # 9 == log level (DEBUG)
+ # 5 == connection retries
+ self.master.server.changelog_register(gconf.local_path,
+ workdir, logfile, 9, 5)
+ except OSError:
+ self.fallback_xsync()
+ # control should not reach here
+ raise
+
+class GMasterXsyncMixin(GMasterChangelogMixin):
+ """
+
+ This crawl needs to be xtime based (as of now
+ it's not. this is beacuse we generate CHANGELOG
+ file during each crawl which is then processed
+ by process_change()).
+ For now it's used as a one-shot initial sync
+ mechanism and only syncs directories, regular
+ files and symlinks.
+ """
+
+ def register(self):
+ self.sleep_interval = 60
+ self.tempdir = self.setup_working_dir()[0]
+ self.tempdir = os.path.join(self.tempdir, 'xsync')
+ logging.info('xsync temp directory: %s' % self.tempdir)
+ try:
+ os.makedirs(self.tempdir)
+ except OSError:
+ ex = sys.exc_info()[1]
+ if ex.errno == EEXIST and os.path.isdir(self.tempdir):
+ pass
+ else:
+ raise
+
+ def write_entry_change(self, prefix, data=[]):
+ self.fh.write("%s %s\n" % (prefix, ' '.join(data)))
+
+ def open(self):
+ try:
+ self.xsync_change = os.path.join(self.tempdir, 'XSYNC-CHANGELOG.' + str(int(time.time())))
+ self.fh = open(self.xsync_change, 'w')
+ except IOError:
+ raise
+
+ def close(self):
+ self.fh.close()
+
+ def fname(self):
+ return self.xsync_change
+
+ def crawl(self, path='.', xtr=None, done=0):
+ """ generate a CHANGELOG file consumable by process_change """
+ if path == '.':
+ self.open()
+ self.crawls += 1
+ if not xtr:
+ # get the root stime and use it for all comparisons
+ xtr = self.xtime('.', self.slave)
+ if isinstance(xtr, int):
+ if xtr != ENOENT:
+ raise GsyncdError('slave is corrupt')
+ xtr = self.minus_infinity
+ xtl = self.xtime(path)
+ if isinstance(xtl, int):
+ raise GsyncdError('master is corrupt')
+ if xtr == xtl:
+ if path == '.':
+ self.close()
+ return
+ self.xtime_reversion_hook(path, xtl, xtr)
+ logging.debug("entering " + path)
+ dem = self.master.server.entries(path)
+ pargfid = self.master.server.gfid(path)
+ if isinstance(pargfid, int):
+ logging.warn('skipping directory %s' % (path))
+ for e in dem:
+ bname = e
+ e = os.path.join(path, e)
+ st = lstat(e)
+ if isinstance(st, int):
+ logging.warn('%s got purged in the interim..' % e)
+ continue
+ gfid = self.master.server.gfid(e)
+ if isinstance(gfid, int):
+ logging.warn('skipping entry %s..' % (e))
+ continue
+ xte = self.xtime(e)
+ if isinstance(xte, int):
+ raise GsyncdError('master is corrupt')
+ if not self.need_sync(e, xte, xtr):
+ continue
+ mo = st.st_mode
+ if stat.S_ISDIR(mo):
+ self.write_entry_change("E", [gfid, 'MKDIR', escape(os.path.join(pargfid, bname))])
+ self.crawl(e, xtr)
+ elif stat.S_ISLNK(mo):
+ rl = errno_wrap(os.readlink, [en], [ENOENT])
+ if isinstance(rl, int):
+ continue
+ self.write_entry_change("E", [gfid, 'SYMLINK', escape(os.path.join(pargfid, bname)), rl])
+ else:
+ # if a file has a hardlink, create a Changelog entry as 'LINK' so the slave
+ # side will decide if to create the new entry, or to create link.
+ if st.st_nlink == 1:
+ self.write_entry_change("E", [gfid, 'MKNOD', escape(os.path.join(pargfid, bname))])
+ else:
+ self.write_entry_change("E", [gfid, 'LINK', escape(os.path.join(pargfid, bname))])
+ if stat.S_ISREG(mo):
+ self.write_entry_change("D", [gfid])
+
+ if path == '.':
+ logging.info('processing xsync changelog %s' % self.fname())
+ self.close()
+ self.process([self.fname()], done)
+ self.upd_stime(xtl)
+
+class BoxClosedErr(Exception):
+ pass
+
+class PostBox(list):
+ """synchronized collection for storing things thought of as "requests" """
+
+ def __init__(self, *a):
+ list.__init__(self, *a)
+ # too bad Python stdlib does not have read/write locks...
+ # it would suffivce to grab the lock in .append as reader, in .close as writer
+ self.lever = Condition()
+ self.open = True
+ self.done = False
+
+ def wait(self):
+ """wait on requests to be processed"""
+ self.lever.acquire()
+ if not self.done:
+ self.lever.wait()
+ self.lever.release()
+ return self.result
+
+ def wakeup(self, data):
+ """wake up requestors with the result"""
+ self.result = data
+ self.lever.acquire()
+ self.done = True
+ self.lever.notifyAll()
+ self.lever.release()
+
+ def append(self, e):
+ """post a request"""
+ self.lever.acquire()
+ if not self.open:
+ raise BoxClosedErr
+ list.append(self, e)
+ self.lever.release()
+
+ def close(self):
+ """prohibit the posting of further requests"""
+ self.lever.acquire()
+ self.open = False
+ self.lever.release()
+
+class Syncer(object):
+ """a staged queue to relay rsync requests to rsync workers
+
+ By "staged queue" its meant that when a consumer comes to the
+ queue, it takes _all_ entries, leaving the queue empty.
+ (I don't know if there is an official term for this pattern.)
+
+ The queue uses a PostBox to accumulate incoming items.
+ When a consumer (rsync worker) comes, a new PostBox is
+ set up and the old one is passed on to the consumer.
+
+ Instead of the simplistic scheme of having one big lock
+ which synchronizes both the addition of new items and
+ PostBox exchanges, use a separate lock to arbitrate consumers,
+ and rely on PostBox's synchronization mechanisms take
+ care about additions.
+
+ There is a corner case racy situation, producers vs. consumers,
+ which is not handled by this scheme: namely, when the PostBox
+ exchange occurs in between being passed to the producer for posting
+ and the post placement. But that's what Postbox.close is for:
+ such a posting will find the PostBox closed, in which case
+ the producer can re-try posting against the actual PostBox of
+ the queue.
+
+ To aid accumlation of items in the PostBoxen before grabbed
+ by an rsync worker, the worker goes to sleep a bit after
+ each completed syncjob.
+ """
+
+ def __init__(self, slave):
+ """spawn worker threads"""
+ self.slave = slave
+ self.lock = Lock()
+ self.pb = PostBox()
+ self.bytes_synced = 0
+ for i in range(int(gconf.sync_jobs)):
+ t = Thread(target=self.syncjob)
+ t.start()
+
+ def syncjob(self):
+ """the life of a worker"""
+ while True:
+ pb = None
+ while True:
+ self.lock.acquire()
+ if self.pb:
+ pb, self.pb = self.pb, PostBox()
+ self.lock.release()
+ if pb:
+ break
+ time.sleep(0.5)
+ pb.close()
+ po = self.slave.rsync(pb)
+ if po.returncode == 0:
+ ret = (True, 0)
+ elif po.returncode in (23, 24):
+ # partial transfer (cf. rsync(1)), that's normal
+ ret = (False, po.returncode)
+ else:
+ po.errfail()
+ pb.wakeup(ret)
+
+ def add(self, e):
+ while True:
+ pb = self.pb
+ try:
+ pb.append(e)
+ return pb
+ except BoxClosedErr:
+ pass
diff --git a/geo-replication/syncdaemon/monitor.py b/geo-replication/syncdaemon/monitor.py
new file mode 100644
index 000000000..0c3a42fa6
--- /dev/null
+++ b/geo-replication/syncdaemon/monitor.py
@@ -0,0 +1,244 @@
+import os
+import sys
+import time
+import signal
+import logging
+import uuid
+import xml.etree.ElementTree as XET
+from subprocess import PIPE
+from resource import Popen, FILE, GLUSTER, SSH
+from threading import Lock
+from gconf import gconf
+from syncdutils import update_file, select, waitpid, set_term_handler, is_host_local, GsyncdError
+from syncdutils import escape, Thread, finalize, memoize
+
+class Volinfo(object):
+ def __init__(self, vol, host='localhost', prelude=[]):
+ po = Popen(prelude + ['gluster', '--xml', '--remote-host=' + host, 'volume', 'info', vol],
+ stdout=PIPE, stderr=PIPE)
+ vix = po.stdout.read()
+ po.wait()
+ po.terminate_geterr()
+ vi = XET.fromstring(vix)
+ if vi.find('opRet').text != '0':
+ if prelude:
+ via = '(via %s) ' % prelude.join(' ')
+ else:
+ via = ' '
+ raise GsyncdError('getting volume info of %s%s failed with errorcode %s',
+ (vol, via, vi.find('opErrno').text))
+ self.tree = vi
+ self.volume = vol
+ self.host = host
+
+ def get(self, elem):
+ return self.tree.findall('.//' + elem)
+
+ @property
+ @memoize
+ def bricks(self):
+ def bparse(b):
+ host, dirp = b.text.split(':', 2)
+ return {'host': host, 'dir': dirp}
+ return [ bparse(b) for b in self.get('brick') ]
+
+ @property
+ @memoize
+ def uuid(self):
+ ids = self.get('id')
+ if len(ids) != 1:
+ raise GsyncdError("volume info of %s obtained from %s: ambiguous uuid",
+ self.volume, self.host)
+ return ids[0].text
+
+
+class Monitor(object):
+ """class which spawns and manages gsyncd workers"""
+
+ ST_INIT = 'Initializing...'
+ ST_STABLE = 'Stable'
+ ST_FAULTY = 'faulty'
+ ST_INCON = 'inconsistent'
+ _ST_ORD = [ST_STABLE, ST_INIT, ST_FAULTY, ST_INCON]
+
+ def __init__(self):
+ self.lock = Lock()
+ self.state = {}
+
+ def set_state(self, state, w=None):
+ """set the state that can be used by external agents
+ like glusterd for status reporting"""
+ computestate = lambda: self.state and self._ST_ORD[max(self._ST_ORD.index(s) for s in self.state.values())]
+ if w:
+ self.lock.acquire()
+ old_state = computestate()
+ self.state[w] = state
+ state = computestate()
+ self.lock.release()
+ if state != old_state:
+ self.set_state(state)
+ else:
+ logging.info('new state: %s' % state)
+ if getattr(gconf, 'state_file', None):
+ update_file(gconf.state_file, lambda f: f.write(state + '\n'))
+
+ @staticmethod
+ def terminate():
+ # relax one SIGTERM by setting a handler that sets back
+ # standard handler
+ set_term_handler(lambda *a: set_term_handler())
+ # give a chance to graceful exit
+ os.kill(-os.getpid(), signal.SIGTERM)
+
+ def monitor(self, w, argv, cpids):
+ """the monitor loop
+
+ Basic logic is a blantantly simple blunt heuristics:
+ if spawned client survives 60 secs, it's considered OK.
+ This servers us pretty well as it's not vulneralbe to
+ any kind of irregular behavior of the child...
+
+ ... well, except for one: if children is hung up on
+ waiting for some event, it can survive aeons, still
+ will be defunct. So we tweak the above logic to
+ expect the worker to send us a signal within 60 secs
+ (in the form of closing its end of a pipe). The worker
+ does this when it's done with the setup stage
+ ready to enter the service loop (note it's the setup
+ stage which is vulnerable to hangs -- the full
+ blown worker blows up on EPIPE if the net goes down,
+ due to the keep-alive thread)
+ """
+
+ self.set_state(self.ST_INIT, w)
+ ret = 0
+ def nwait(p, o=0):
+ p2, r = waitpid(p, o)
+ if not p2:
+ return
+ return r
+ def exit_signalled(s):
+ """ child teminated due to receipt of SIGUSR1 """
+ return (os.WIFSIGNALED(s) and (os.WTERMSIG(s) == signal.SIGUSR1))
+ def exit_status(s):
+ if os.WIFEXITED(s):
+ return os.WEXITSTATUS(s)
+ return 1
+ conn_timeout = int(gconf.connection_timeout)
+ while ret in (0, 1):
+ logging.info('-' * conn_timeout)
+ logging.info('starting gsyncd worker')
+ pr, pw = os.pipe()
+ cpid = os.fork()
+ if cpid == 0:
+ os.close(pr)
+ os.execv(sys.executable, argv + ['--feedback-fd', str(pw),
+ '--local-path', w[0],
+ '--local-id', '.' + escape(w[0]),
+ '--resource-remote', w[1]])
+ self.lock.acquire()
+ cpids.add(cpid)
+ self.lock.release()
+ os.close(pw)
+ t0 = time.time()
+ so = select((pr,), (), (), conn_timeout)[0]
+ os.close(pr)
+ if so:
+ ret = nwait(cpid, os.WNOHANG)
+ if ret != None:
+ logging.debug("worker died before establishing connection")
+ else:
+ logging.debug("worker seems to be connected (?? racy check)")
+ while time.time() < t0 + conn_timeout:
+ ret = nwait(cpid, os.WNOHANG)
+ if ret != None:
+ logging.debug("worker died in startup phase")
+ break
+ time.sleep(1)
+ else:
+ logging.debug("worker not confirmed in %d sec, aborting it" % \
+ conn_timeout)
+ self.terminate()
+ time.sleep(1)
+ os.kill(cpid, signal.SIGKILL)
+ ret = nwait(cpid)
+ if ret == None:
+ self.set_state(self.ST_STABLE, w)
+ ret = nwait(cpid)
+ if exit_signalled(ret):
+ ret = 0
+ else:
+ ret = exit_status(ret)
+ if ret in (0,1):
+ self.set_state(self.ST_FAULTY, w)
+ time.sleep(10)
+ self.set_state(self.ST_INCON, w)
+ return ret
+
+ def multiplex(self, wspx, suuid):
+ argv = sys.argv[:]
+ for o in ('-N', '--no-daemon', '--monitor'):
+ while o in argv:
+ argv.remove(o)
+ argv.extend(('-N', '-p', '', '--slave-id', suuid))
+ argv.insert(0, os.path.basename(sys.executable))
+
+ cpids = set()
+ ta = []
+ for wx in wspx:
+ def wmon(w):
+ cpid, _ = self.monitor(w, argv, cpids)
+ terminate()
+ time.sleep(1)
+ self.lock.acquire()
+ for cpid in cpids:
+ os.kill(cpid, signal.SIGKILL)
+ self.lock.release()
+ finalize(exval=1)
+ t = Thread(target = wmon, args=[wx])
+ t.start()
+ ta.append(t)
+ for t in ta:
+ t.join()
+
+def distribute(*resources):
+ master, slave = resources
+ mvol = Volinfo(master.volume, master.host)
+ logging.debug('master bricks: ' + repr(mvol.bricks))
+ prelude = []
+ si = slave
+ if isinstance(slave, SSH):
+ prelude = gconf.ssh_command.split() + [slave.remote_addr]
+ si = slave.inner_rsc
+ logging.debug('slave SSH gateway: ' + slave.remote_addr)
+ if isinstance(si, FILE):
+ sbricks = {'host': 'localhost', 'dir': si.path}
+ suuid = uuid.uuid5(uuid.NAMESPACE_URL, slave.get_url(canonical=True))
+ elif isinstance(si, GLUSTER):
+ svol = Volinfo(si.volume, si.host, prelude)
+ sbricks = svol.bricks
+ suuid = svol.uuid
+ else:
+ raise GsyncdError("unkown slave type " + slave.url)
+ logging.info('slave bricks: ' + repr(sbricks))
+ if isinstance(si, FILE):
+ slaves = [ slave.url ]
+ else:
+ slavenodes = set(b['host'] for b in sbricks)
+ if isinstance(slave, SSH) and not gconf.isolated_slave:
+ rap = SSH.parse_ssh_address(slave)
+ slaves = [ 'ssh://' + rap['user'] + '@' + h + ':' + si.url for h in slavenodes ]
+ else:
+ slavevols = [ h + ':' + si.volume for h in slavenodes ]
+ if isinstance(slave, SSH):
+ slaves = [ 'ssh://' + rap.remote_addr + ':' + v for v in slavevols ]
+ else:
+ slaves = slavevols
+
+ workerspex = [ (brick['dir'], slaves[idx % len(slaves)]) for idx, brick in enumerate(mvol.bricks) if is_host_local(brick['host']) ]
+ logging.info('worker specs: ' + repr(workerspex))
+ return workerspex, suuid
+
+def monitor(*resources):
+ """oh yeah, actually Monitor is used as singleton, too"""
+ return Monitor().multiplex(*distribute(*resources))
diff --git a/geo-replication/syncdaemon/repce.py b/geo-replication/syncdaemon/repce.py
new file mode 100644
index 000000000..755fb61df
--- /dev/null
+++ b/geo-replication/syncdaemon/repce.py
@@ -0,0 +1,225 @@
+import os
+import sys
+import time
+import logging
+from threading import Condition
+try:
+ import thread
+except ImportError:
+ # py 3
+ import _thread as thread
+try:
+ from Queue import Queue
+except ImportError:
+ # py 3
+ from queue import Queue
+try:
+ import cPickle as pickle
+except ImportError:
+ # py 3
+ import pickle
+
+from syncdutils import Thread, select
+
+pickle_proto = -1
+repce_version = 1.0
+
+def ioparse(i, o):
+ if isinstance(i, int):
+ i = os.fdopen(i)
+ # rely on duck typing for recognizing
+ # streams as that works uniformly
+ # in py2 and py3
+ if hasattr(o, 'fileno'):
+ o = o.fileno()
+ return (i, o)
+
+def send(out, *args):
+ """pickle args and write out wholly in one syscall
+
+ ie. not use the ability of pickle to dump directly to
+ a stream, as that would potentially mess up messages
+ by interleaving them
+ """
+ os.write(out, pickle.dumps(args, pickle_proto))
+
+def recv(inf):
+ """load an object from input stream"""
+ return pickle.load(inf)
+
+
+class RepceServer(object):
+ """RePCe is Hungarian for canola, http://hu.wikipedia.org/wiki/Repce
+
+ ... also our homebrewed RPC backend where the transport layer is
+ reduced to a pair of filehandles.
+
+ This is the server component.
+ """
+
+ def __init__(self, obj, i, o, wnum=6):
+ """register a backend object .obj to which incoming messages
+ are dispatched, also incoming/outcoming streams
+ """
+ self.obj = obj
+ self.inf, self.out = ioparse(i, o)
+ self.wnum = wnum
+ self.q = Queue()
+
+ def service_loop(self):
+ """fire up worker threads, get messages and dispatch among them"""
+ for i in range(self.wnum):
+ t = Thread(target=self.worker)
+ t.start()
+ try:
+ while True:
+ self.q.put(recv(self.inf))
+ except EOFError:
+ logging.info("terminating on reaching EOF.")
+
+ def worker(self):
+ """life of a worker
+
+ Get message, extract its id, method name and arguments
+ (kwargs not supported), call method on .obj.
+ Send back message id + return value.
+ If method call throws an exception, rescue it, and send
+ back the exception as result (with flag marking it as
+ exception).
+ """
+ while True:
+ in_data = self.q.get(True)
+ rid = in_data[0]
+ rmeth = in_data[1]
+ exc = False
+ if rmeth == '__repce_version__':
+ res = repce_version
+ else:
+ try:
+ res = getattr(self.obj, rmeth)(*in_data[2:])
+ except:
+ res = sys.exc_info()[1]
+ exc = True
+ logging.exception("call failed: ")
+ send(self.out, rid, exc, res)
+
+
+class RepceJob(object):
+ """class representing message status we can use
+ for waiting on reply"""
+
+ def __init__(self, cbk):
+ """
+ - .rid: (process-wise) unique id
+ - .cbk: what we do upon receiving reply
+ """
+ self.rid = (os.getpid(), thread.get_ident(), time.time())
+ self.cbk = cbk
+ self.lever = Condition()
+ self.done = False
+
+ def __repr__(self):
+ return ':'.join([str(x) for x in self.rid])
+
+ def wait(self):
+ self.lever.acquire()
+ if not self.done:
+ self.lever.wait()
+ self.lever.release()
+ return self.result
+
+ def wakeup(self, data):
+ self.result = data
+ self.lever.acquire()
+ self.done = True
+ self.lever.notify()
+ self.lever.release()
+
+
+class RepceClient(object):
+ """RePCe is Hungarian for canola, http://hu.wikipedia.org/wiki/Repce
+
+ ... also our homebrewed RPC backend where the transport layer is
+ reduced to a pair of filehandles.
+
+ This is the client component.
+ """
+
+ def __init__(self, i, o):
+ self.inf, self.out = ioparse(i, o)
+ self.jtab = {}
+ t = Thread(target = self.listen)
+ t.start()
+
+ def listen(self):
+ while True:
+ select((self.inf,), (), ())
+ rid, exc, res = recv(self.inf)
+ rjob = self.jtab.pop(rid)
+ if rjob.cbk:
+ rjob.cbk(rjob, [exc, res])
+
+ def push(self, meth, *args, **kw):
+ """wrap arguments in a RepceJob, send them to server
+ and return the RepceJob
+
+ @cbk to pass on RepceJob can be given as kwarg.
+ """
+ cbk = kw.get('cbk')
+ if not cbk:
+ def cbk(rj, res):
+ if res[0]:
+ raise res[1]
+ rjob = RepceJob(cbk)
+ self.jtab[rjob.rid] = rjob
+ logging.debug("call %s %s%s ..." % (repr(rjob), meth, repr(args)))
+ send(self.out, rjob.rid, meth, *args)
+ return rjob
+
+ def __call__(self, meth, *args):
+ """RePCe client is callabe, calling it implements a synchronous remote call
+
+ We do a .push with a cbk which does a wakeup upon receiving anwser, then wait
+ on the RepceJob.
+ """
+ rjob = self.push(meth, *args, **{'cbk': lambda rj, res: rj.wakeup(res)})
+ exc, res = rjob.wait()
+ if exc:
+ logging.error('call %s (%s) failed on peer with %s' % (repr(rjob), meth, str(type(res).__name__)))
+ raise res
+ logging.debug("call %s %s -> %s" % (repr(rjob), meth, repr(res)))
+ return res
+
+ class mprx(object):
+ """method proxy, standard trick to implement rubyesque method_missing
+ in Python
+
+ A class is a closure factory, you know what I mean, or go read some SICP.
+ """
+ def __init__(self, ins, meth):
+ self.ins = ins
+ self.meth = meth
+
+ def __call__(self, *a):
+ return self.ins(self.meth, *a)
+
+ def __getattr__(self, meth):
+ """this implements transparent method dispatch to remote object,
+ so that you don't need to call the RepceClient instance like
+
+ rclient('how_old_are_you_if_born_in', 1979)
+
+ but you can make it into an ordinary method call like
+
+ rclient.how_old_are_you_if_born_in(1979)
+ """
+ return self.mprx(self, meth)
+
+ def __version__(self):
+ """used in handshake to verify compatibility"""
+ d = {'proto': self('__repce_version__')}
+ try:
+ d['object'] = self('version')
+ except AttributeError:
+ pass
+ return d
diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py
new file mode 100644
index 000000000..faf62f868
--- /dev/null
+++ b/geo-replication/syncdaemon/resource.py
@@ -0,0 +1,1172 @@
+import re
+import os
+import sys
+import stat
+import time
+import fcntl
+import errno
+import types
+import struct
+import socket
+import logging
+import tempfile
+import threading
+import subprocess
+from errno import EEXIST, ENOENT, ENODATA, ENOTDIR, ELOOP, EISDIR, ENOTEMPTY, ESTALE, EINVAL
+from select import error as SelectError
+
+from gconf import gconf
+import repce
+from repce import RepceServer, RepceClient
+from master import gmaster_builder
+import syncdutils
+from syncdutils import GsyncdError, select, privileged, boolify, funcode
+from syncdutils import umask, entry2pb, gauxpfx, errno_wrap, lstat
+
+UrlRX = re.compile('\A(\w+)://([^ *?[]*)\Z')
+HostRX = re.compile('[a-z\d](?:[a-z\d.-]*[a-z\d])?', re.I)
+UserRX = re.compile("[\w!\#$%&'*+-\/=?^_`{|}~]+")
+
+def sup(x, *a, **kw):
+ """a rubyesque "super" for python ;)
+
+ invoke caller method in parent class with given args.
+ """
+ return getattr(super(type(x), x), sys._getframe(1).f_code.co_name)(*a, **kw)
+
+def desugar(ustr):
+ """transform sugared url strings to standard <scheme>://<urlbody> form
+
+ parsing logic enforces the constraint that sugared forms should contatin
+ a ':' or a '/', which ensures that sugared urls do not conflict with
+ gluster volume names.
+ """
+ m = re.match('([^:]*):(.*)', ustr)
+ if m:
+ if not m.groups()[0]:
+ return "gluster://localhost" + ustr
+ elif '@' in m.groups()[0] or re.search('[:/]', m.groups()[1]):
+ return "ssh://" + ustr
+ else:
+ return "gluster://" + ustr
+ else:
+ if ustr[0] != '/':
+ raise GsyncdError("cannot resolve sugared url '%s'" % ustr)
+ ap = os.path.normpath(ustr)
+ if ap.startswith('//'):
+ ap = ap[1:]
+ return "file://" + ap
+
+def gethostbyname(hnam):
+ """gethostbyname wrapper"""
+ try:
+ return socket.gethostbyname(hnam)
+ except socket.gaierror:
+ ex = sys.exc_info()[1]
+ raise GsyncdError("failed to resolve %s: %s" % \
+ (hnam, ex.strerror))
+
+def parse_url(ustr):
+ """instantiate an url object by scheme-to-class dispatch
+
+ The url classes taken into consideration are the ones in
+ this module whose names are full-caps.
+ """
+ m = UrlRX.match(ustr)
+ if not m:
+ ustr = desugar(ustr)
+ m = UrlRX.match(ustr)
+ if not m:
+ raise GsyncdError("malformed url")
+ sch, path = m.groups()
+ this = sys.modules[__name__]
+ if not hasattr(this, sch.upper()):
+ raise GsyncdError("unknown url scheme " + sch)
+ return getattr(this, sch.upper())(path)
+
+
+class _MetaXattr(object):
+ """singleton class, a lazy wrapper around the
+ libcxattr module
+
+ libcxattr (a heavy import due to ctypes) is
+ loaded only when when the single
+ instance is tried to be used.
+
+ This reduces runtime for those invocations
+ which do not need filesystem manipulation
+ (eg. for config, url parsing)
+ """
+
+ def __getattr__(self, meth):
+ from libcxattr import Xattr as LXattr
+ xmeth = [ m for m in dir(LXattr) if m[0] != '_' ]
+ if not meth in xmeth:
+ return
+ for m in xmeth:
+ setattr(self, m, getattr(LXattr, m))
+ return getattr(self, meth)
+
+class _MetaChangelog(object):
+ def __getattr__(self, meth):
+ from libgfchangelog import Changes as LChanges
+ xmeth = [ m for m in dir(LChanges) if m[0] != '_' ]
+ if not meth in xmeth:
+ return
+ for m in xmeth:
+ setattr(self, m, getattr(LChanges, m))
+ return getattr(self, meth)
+
+Xattr = _MetaXattr()
+Changes = _MetaChangelog()
+
+
+class Popen(subprocess.Popen):
+ """customized subclass of subprocess.Popen with a ring
+ buffer for children error output"""
+
+ @classmethod
+ def init_errhandler(cls):
+ """start the thread which handles children's error output"""
+ cls.errstore = {}
+ def tailer():
+ while True:
+ errstore = cls.errstore.copy()
+ try:
+ poe, _ ,_ = select([po.stderr for po in errstore], [], [], 1)
+ except (ValueError, SelectError):
+ continue
+ for po in errstore:
+ if po.stderr not in poe:
+ continue
+ po.lock.acquire()
+ try:
+ if po.on_death_row:
+ continue
+ la = errstore[po]
+ try:
+ fd = po.stderr.fileno()
+ except ValueError: # file is already closed
+ continue
+ l = os.read(fd, 1024)
+ if not l:
+ continue
+ tots = len(l)
+ for lx in la:
+ tots += len(lx)
+ while tots > 1<<20 and la:
+ tots -= len(la.pop(0))
+ la.append(l)
+ finally:
+ po.lock.release()
+ t = syncdutils.Thread(target = tailer)
+ t.start()
+ cls.errhandler = t
+
+ @classmethod
+ def fork(cls):
+ """fork wrapper that restarts errhandler thread in child"""
+ pid = os.fork()
+ if not pid:
+ cls.init_errhandler()
+ return pid
+
+ def __init__(self, args, *a, **kw):
+ """customizations for subprocess.Popen instantiation
+
+ - 'close_fds' is taken to be the default
+ - if child's stderr is chosen to be managed,
+ register it with the error handler thread
+ """
+ self.args = args
+ if 'close_fds' not in kw:
+ kw['close_fds'] = True
+ self.lock = threading.Lock()
+ self.on_death_row = False
+ try:
+ sup(self, args, *a, **kw)
+ except:
+ ex = sys.exc_info()[1]
+ if not isinstance(ex, OSError):
+ raise
+ raise GsyncdError("""execution of "%s" failed with %s (%s)""" % \
+ (args[0], errno.errorcode[ex.errno], os.strerror(ex.errno)))
+ if kw.get('stderr') == subprocess.PIPE:
+ assert(getattr(self, 'errhandler', None))
+ self.errstore[self] = []
+
+ def errlog(self):
+ """make a log about child's failure event"""
+ filling = ""
+ if self.elines:
+ filling = ", saying:"
+ logging.error("""command "%s" returned with %s%s""" % \
+ (" ".join(self.args), repr(self.returncode), filling))
+ lp = ''
+ def logerr(l):
+ logging.error(self.args[0] + "> " + l)
+ for l in self.elines:
+ ls = l.split('\n')
+ ls[0] = lp + ls[0]
+ lp = ls.pop()
+ for ll in ls:
+ logerr(ll)
+ if lp:
+ logerr(lp)
+
+ def errfail(self):
+ """fail nicely if child did not terminate with success"""
+ self.errlog()
+ syncdutils.finalize(exval = 1)
+
+ def terminate_geterr(self, fail_on_err = True):
+ """kill child, finalize stderr harvesting (unregister
+ from errhandler, set up .elines), fail on error if
+ asked for
+ """
+ self.lock.acquire()
+ try:
+ self.on_death_row = True
+ finally:
+ self.lock.release()
+ elines = self.errstore.pop(self)
+ if self.poll() == None:
+ self.terminate()
+ if self.poll() == None:
+ time.sleep(0.1)
+ self.kill()
+ self.wait()
+ while True:
+ if not select([self.stderr],[],[],0.1)[0]:
+ break
+ b = os.read(self.stderr.fileno(), 1024)
+ if b:
+ elines.append(b)
+ else:
+ break
+ self.stderr.close()
+ self.elines = elines
+ if fail_on_err and self.returncode != 0:
+ self.errfail()
+
+
+class Server(object):
+ """singleton implemening those filesystem access primitives
+ which are needed for geo-replication functionality
+
+ (Singleton in the sense it's a class which has only static
+ and classmethods and is used directly, without instantiation.)
+ """
+
+ GX_NSPACE_PFX = (privileged() and "trusted" or "system")
+ GX_NSPACE = GX_NSPACE_PFX + ".glusterfs"
+ NTV_FMTSTR = "!" + "B"*19 + "II"
+ FRGN_XTRA_FMT = "I"
+ FRGN_FMTSTR = NTV_FMTSTR + FRGN_XTRA_FMT
+ GX_GFID_CANONICAL_LEN = 37 # canonical gfid len + '\0'
+
+ local_path = ''
+
+ @classmethod
+ def _fmt_mknod(cls, l):
+ return "!II%dsI%dsIII" % (cls.GX_GFID_CANONICAL_LEN, l+1)
+ @classmethod
+ def _fmt_mkdir(cls, l):
+ return "!II%dsI%dsII" % (cls.GX_GFID_CANONICAL_LEN, l+1)
+ @classmethod
+ def _fmt_symlink(cls, l1, l2):
+ return "!II%dsI%ds%ds" % (cls.GX_GFID_CANONICAL_LEN, l1+1, l2+1)
+
+ def _pathguard(f):
+ """decorator method that checks
+ the path argument of the decorated
+ functions to make sure it does not
+ point out of the managed tree
+ """
+
+ fc = funcode(f)
+ pi = list(fc.co_varnames).index('path')
+ def ff(*a):
+ path = a[pi]
+ ps = path.split('/')
+ if path[0] == '/' or '..' in ps:
+ raise ValueError('unsafe path')
+ a = list(a)
+ a[pi] = os.path.join(a[0].local_path, path)
+ return f(*a)
+ return ff
+
+ @classmethod
+ @_pathguard
+ def entries(cls, path):
+ """directory entries in an array"""
+ # prevent symlinks being followed
+ if not stat.S_ISDIR(os.lstat(path).st_mode):
+ raise OSError(ENOTDIR, os.strerror(ENOTDIR))
+ return os.listdir(path)
+
+ @classmethod
+ @_pathguard
+ def purge(cls, path, entries=None):
+ """force-delete subtrees
+
+ If @entries is not specified, delete
+ the whole subtree under @path (including
+ @path).
+
+ Otherwise, @entries should be a
+ a sequence of children of @path, and
+ the effect is identical with a joint
+ @entries-less purge on them, ie.
+
+ for e in entries:
+ cls.purge(os.path.join(path, e))
+ """
+ me_also = entries == None
+ if not entries:
+ try:
+ # if it's a symlink, prevent
+ # following it
+ try:
+ os.unlink(path)
+ return
+ except OSError:
+ ex = sys.exc_info()[1]
+ if ex.errno == EISDIR:
+ entries = os.listdir(path)
+ else:
+ raise
+ except OSError:
+ ex = sys.exc_info()[1]
+ if ex.errno in (ENOTDIR, ENOENT, ELOOP):
+ try:
+ os.unlink(path)
+ return
+ except OSError:
+ ex = sys.exc_info()[1]
+ if ex.errno == ENOENT:
+ return
+ raise
+ else:
+ raise
+ for e in entries:
+ cls.purge(os.path.join(path, e))
+ if me_also:
+ os.rmdir(path)
+
+ @classmethod
+ @_pathguard
+ def _create(cls, path, ctor):
+ """path creation backend routine"""
+ try:
+ ctor(path)
+ except OSError:
+ ex = sys.exc_info()[1]
+ if ex.errno == EEXIST:
+ cls.purge(path)
+ return ctor(path)
+ raise
+
+ @classmethod
+ @_pathguard
+ def mkdir(cls, path):
+ cls._create(path, os.mkdir)
+
+ @classmethod
+ @_pathguard
+ def symlink(cls, lnk, path):
+ cls._create(path, lambda p: os.symlink(lnk, p))
+
+ @classmethod
+ @_pathguard
+ def xtime(cls, path, uuid):
+ """query xtime extended attribute
+
+ Return xtime of @path for @uuid as a pair of integers.
+ "Normal" errors due to non-existent @path or extended attribute
+ are tolerated and errno is returned in such a case.
+ """
+
+ try:
+ return struct.unpack('!II', Xattr.lgetxattr(path, '.'.join([cls.GX_NSPACE, uuid, 'xtime']), 8))
+ except OSError:
+ ex = sys.exc_info()[1]
+ if ex.errno in (ENOENT, ENODATA, ENOTDIR):
+ return ex.errno
+ else:
+ raise
+
+ @classmethod
+ def gfid(cls, gfidpath):
+ return errno_wrap(Xattr.lgetxattr, [gfidpath, 'glusterfs.gfid.string', cls.GX_GFID_CANONICAL_LEN], [ENOENT])
+
+ @classmethod
+ def node_uuid(cls, path='.'):
+ try:
+ uuid_l = Xattr.lgetxattr_buf(path, '.'.join([cls.GX_NSPACE, 'node-uuid']))
+ return uuid_l[:-1].split(' ')
+ except OSError:
+ raise
+
+ @classmethod
+ def xtime_vec(cls, path, *uuids):
+ """vectored version of @xtime
+
+ accepts a list of uuids and returns a dictionary
+ with uuid as key(s) and xtime as value(s)
+ """
+ xt = {}
+ for uuid in uuids:
+ xtu = cls.xtime(path, uuid)
+ if xtu == ENODATA:
+ xtu = None
+ if isinstance(xtu, int):
+ return xtu
+ xt[uuid] = xtu
+ return xt
+
+ @classmethod
+ @_pathguard
+ def set_xtime(cls, path, uuid, mark):
+ """set @mark as xtime for @uuid on @path"""
+ Xattr.lsetxattr(path, '.'.join([cls.GX_NSPACE, uuid, 'xtime']), struct.pack('!II', *mark))
+
+ @classmethod
+ @_pathguard
+ def set_xtime_remote(cls, path, uuid, mark):
+ """
+ set @mark as xtime for @uuid on @path
+ the difference b/w this and set_xtime() being
+ set_xtime() being overloaded to set the xtime
+ on the brick (this method sets xtime on the
+ remote slave)
+ """
+ Xattr.lsetxattr(path, '.'.join([cls.GX_NSPACE, uuid, 'xtime']), struct.pack('!II', *mark))
+
+ @classmethod
+ def set_xtime_vec(cls, path, mark_dct):
+ """vectored (or dictered) version of set_xtime
+
+ ignore values that match @ignore
+ """
+ for u,t in mark_dct.items():
+ cls.set_xtime(path, u, t)
+
+ @classmethod
+ def entry_ops(cls, entries):
+ pfx = gauxpfx()
+ logging.debug('entries: %s' % repr(entries))
+ # regular file
+ def entry_pack_reg(gf, bn, st):
+ blen = len(bn)
+ mo = st['mode']
+ return struct.pack(cls._fmt_mknod(blen),
+ st['uid'], st['gid'],
+ gf, mo, bn,
+ stat.S_IMODE(mo), 0, umask())
+ # mkdir
+ def entry_pack_mkdir(gf, bn, st):
+ blen = len(bn)
+ mo = st['mode']
+ return struct.pack(cls._fmt_mkdir(blen),
+ st['uid'], st['gid'],
+ gf, mo, bn,
+ stat.S_IMODE(mo), umask())
+ #symlink
+ def entry_pack_symlink(gf, bn, lnk, st):
+ blen = len(bn)
+ llen = len(lnk)
+ return struct.pack(cls._fmt_symlink(blen, llen),
+ st['uid'], st['gid'],
+ gf, st['mode'], bn, lnk)
+ def entry_purge(entry, gfid):
+ # This is an extremely racy code and needs to be fixed ASAP.
+ # The GFID check here is to be sure that the pargfid/bname
+ # to be purged is the GFID gotten from the changelog.
+ # (a stat(changelog_gfid) would also be valid here)
+ # The race here is between the GFID check and the purge.
+ disk_gfid = cls.gfid(entry)
+ if isinstance(disk_gfid, int):
+ return
+ if not gfid == disk_gfid:
+ return
+ er = errno_wrap(os.unlink, [entry], [ENOENT, EISDIR])
+ if isinstance(er, int):
+ if er == EISDIR:
+ er = errno_wrap(os.rmdir, [entry], [ENOENT, ENOTEMPTY])
+ if er == ENOTEMPTY:
+ return er
+ for e in entries:
+ blob = None
+ op = e['op']
+ gfid = e['gfid']
+ entry = e['entry']
+ (pg, bname) = entry2pb(entry)
+ if op in ['RMDIR', 'UNLINK']:
+ while True:
+ er = entry_purge(entry, gfid)
+ if isinstance(er, int):
+ time.sleep(1)
+ else:
+ break
+ elif op in ['CREATE', 'MKNOD']:
+ blob = entry_pack_reg(gfid, bname, e['stat'])
+ elif op == 'MKDIR':
+ blob = entry_pack_mkdir(gfid, bname, e['stat'])
+ elif op == 'LINK':
+ slink = os.path.join(pfx, gfid)
+ st = lstat(slink)
+ if isinstance(st, int):
+ (pg, bname) = entry2pb(entry)
+ blob = entry_pack_reg(gfid, bname, e['stat'])
+ else:
+ errno_wrap(os.link, [slink, entry], [ENOENT, EEXIST])
+ elif op == 'SYMLINK':
+ blob = entry_pack_symlink(gfid, bname, e['link'], e['stat'])
+ elif op == 'RENAME':
+ en = e['entry1']
+ st = lstat(entry)
+ if isinstance(st, int):
+ (pg, bname) = entry2pb(en)
+ blob = entry_pack_reg(gfid, bname, e['stat'])
+ else:
+ errno_wrap(os.rename, [entry, en], [ENOENT, EEXIST])
+ if blob:
+ errno_wrap(Xattr.lsetxattr_l, [pg, 'glusterfs.gfid.newfile', blob], [EEXIST], [ENOENT, ESTALE, EINVAL])
+
+ @classmethod
+ def changelog_register(cls, cl_brick, cl_dir, cl_log, cl_level, retries = 0):
+ Changes.cl_register(cl_brick, cl_dir, cl_log, cl_level, retries)
+
+ @classmethod
+ def changelog_scan(cls):
+ Changes.cl_scan()
+
+ @classmethod
+ def changelog_getchanges(cls):
+ return Changes.cl_getchanges()
+
+ @classmethod
+ def changelog_done(cls, clfile):
+ Changes.cl_done(clfile)
+
+ @classmethod
+ @_pathguard
+ def setattr(cls, path, adct):
+ """set file attributes
+
+ @adct is a dict, where 'own', 'mode' and 'times'
+ keys are looked for and values used to perform
+ chown, chmod or utimes on @path.
+ """
+ own = adct.get('own')
+ if own:
+ os.lchown(path, *own)
+ mode = adct.get('mode')
+ if mode:
+ os.chmod(path, stat.S_IMODE(mode))
+ times = adct.get('times')
+ if times:
+ os.utime(path, times)
+
+ @staticmethod
+ def pid():
+ return os.getpid()
+
+ last_keep_alive = 0
+ @classmethod
+ def keep_alive(cls, dct):
+ """process keepalive messages.
+
+ Return keep-alive counter (number of received keep-alive
+ messages).
+
+ Now the "keep-alive" message can also have a payload which is
+ used to set a foreign volume-mark on the underlying file system.
+ """
+ if dct:
+ key = '.'.join([cls.GX_NSPACE, 'volume-mark', dct['uuid']])
+ val = struct.pack(cls.FRGN_FMTSTR,
+ *(dct['version'] +
+ tuple(int(x,16) for x in re.findall('(?:[\da-f]){2}', dct['uuid'])) +
+ (dct['retval'],) + dct['volume_mark'][0:2] + (dct['timeout'],)))
+ Xattr.lsetxattr('.', key, val)
+ cls.last_keep_alive += 1
+ return cls.last_keep_alive
+
+ @staticmethod
+ def version():
+ """version used in handshake"""
+ return 1.0
+
+
+class SlaveLocal(object):
+ """mix-in class to implement some factes of a slave server
+
+ ("mix-in" is sort of like "abstract class", ie. it's not
+ instantiated just included in the ancesty DAG. I use "mix-in"
+ to indicate that it's not used as an abstract base class,
+ rather just taken in to implement additional functionality
+ on the basis of the assumed availability of certain interfaces.)
+ """
+
+ def can_connect_to(self, remote):
+ """determine our position in the connectibility matrix"""
+ return not remote
+
+ def service_loop(self):
+ """start a RePCe server serving self's server
+
+ stop servicing if a timeout is configured and got no
+ keep-alime in that inteval
+ """
+
+ if boolify(gconf.use_rsync_xattrs) and not privileged():
+ raise GsyncdError("using rsync for extended attributes is not supported")
+
+ repce = RepceServer(self.server, sys.stdin, sys.stdout, int(gconf.sync_jobs))
+ t = syncdutils.Thread(target=lambda: (repce.service_loop(),
+ syncdutils.finalize()))
+ t.start()
+ logging.info("slave listening")
+ if gconf.timeout and int(gconf.timeout) > 0:
+ while True:
+ lp = self.server.last_keep_alive
+ time.sleep(int(gconf.timeout))
+ if lp == self.server.last_keep_alive:
+ logging.info("connection inactive for %d seconds, stopping" % int(gconf.timeout))
+ break
+ else:
+ select((), (), ())
+
+class SlaveRemote(object):
+ """mix-in class to implement an interface to a remote slave"""
+
+ def connect_remote(self, rargs=[], **opts):
+ """connects to a remote slave
+
+ Invoke an auxiliary utility (slave gsyncd, possibly wrapped)
+ which sets up the connection and set up a RePCe client to
+ communicate throuh its stdio.
+ """
+ slave = opts.get('slave', self.url)
+ extra_opts = []
+ so = getattr(gconf, 'session_owner', None)
+ if so:
+ extra_opts += ['--session-owner', so]
+ if boolify(gconf.use_rsync_xattrs):
+ extra_opts.append('--use-rsync-xattrs')
+ po = Popen(rargs + gconf.remote_gsyncd.split() + extra_opts + \
+ ['-N', '--listen', '--timeout', str(gconf.timeout), slave],
+ stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ gconf.transport = po
+ return self.start_fd_client(po.stdout, po.stdin, **opts)
+
+ def start_fd_client(self, i, o, **opts):
+ """set up RePCe client, handshake with server
+
+ It's cut out as a separate method to let
+ subclasses hook into client startup
+ """
+ self.server = RepceClient(i, o)
+ rv = self.server.__version__()
+ exrv = {'proto': repce.repce_version, 'object': Server.version()}
+ da0 = (rv, exrv)
+ da1 = ({}, {})
+ for i in range(2):
+ for k, v in da0[i].iteritems():
+ da1[i][k] = int(v)
+ if da1[0] != da1[1]:
+ raise GsyncdError("RePCe major version mismatch: local %s, remote %s" % (exrv, rv))
+
+ def rsync(self, files, *args):
+ """invoke rsync"""
+ if not files:
+ raise GsyncdError("no files to sync")
+ logging.debug("files: " + ", ".join(files))
+ argv = gconf.rsync_command.split() + \
+ ['-avR0', '--inplace', '--files-from=-', '--super','--stats', '--numeric-ids', '--no-implied-dirs'] + \
+ gconf.rsync_options.split() + (boolify(gconf.use_rsync_xattrs) and ['--xattrs'] or []) + \
+ ['.'] + list(args)
+ po = Popen(argv, stdin=subprocess.PIPE,stderr=subprocess.PIPE)
+ for f in files:
+ po.stdin.write(f)
+ po.stdin.write('\0')
+
+ po.stdin.close()
+ po.wait()
+ po.terminate_geterr(fail_on_err = False)
+
+ return po
+
+
+class AbstractUrl(object):
+ """abstract base class for url scheme classes"""
+
+ def __init__(self, path, pattern):
+ m = re.search(pattern, path)
+ if not m:
+ raise GsyncdError("malformed path")
+ self.path = path
+ return m.groups()
+
+ @property
+ def scheme(self):
+ return type(self).__name__.lower()
+
+ def canonical_path(self):
+ return self.path
+
+ def get_url(self, canonical=False, escaped=False):
+ """format self's url in various styles"""
+ if canonical:
+ pa = self.canonical_path()
+ else:
+ pa = self.path
+ u = "://".join((self.scheme, pa))
+ if escaped:
+ u = syncdutils.escape(u)
+ return u
+
+ @property
+ def url(self):
+ return self.get_url()
+
+
+ ### Concrete resource classes ###
+
+
+class FILE(AbstractUrl, SlaveLocal, SlaveRemote):
+ """scheme class for file:// urls
+
+ can be used to represent a file slave server
+ on slave side, or interface to a remote file
+ file server on master side
+ """
+
+ class FILEServer(Server):
+ """included server flavor"""
+ pass
+
+ server = FILEServer
+
+ def __init__(self, path):
+ sup(self, path, '^/')
+
+ def connect(self):
+ """inhibit the resource beyond"""
+ os.chdir(self.path)
+
+ def rsync(self, files):
+ return sup(self, files, self.path)
+
+
+class GLUSTER(AbstractUrl, SlaveLocal, SlaveRemote):
+ """scheme class for gluster:// urls
+
+ can be used to represent a gluster slave server
+ on slave side, or interface to a remote gluster
+ slave on master side, or to represent master
+ (slave-ish features come from the mixins, master
+ functionality is outsourced to GMaster from master)
+ """
+
+ class GLUSTERServer(Server):
+ "server enhancements for a glusterfs backend"""
+
+ @classmethod
+ def _attr_unpack_dict(cls, xattr, extra_fields = ''):
+ """generic volume mark fetching/parsing backed"""
+ fmt_string = cls.NTV_FMTSTR + extra_fields
+ buf = Xattr.lgetxattr('.', xattr, struct.calcsize(fmt_string))
+ vm = struct.unpack(fmt_string, buf)
+ m = re.match('(.{8})(.{4})(.{4})(.{4})(.{12})', "".join(['%02x' % x for x in vm[2:18]]))
+ uuid = '-'.join(m.groups())
+ volinfo = { 'version': vm[0:2],
+ 'uuid' : uuid,
+ 'retval' : vm[18],
+ 'volume_mark': vm[19:21],
+ }
+ if extra_fields:
+ return volinfo, vm[-len(extra_fields):]
+ else:
+ return volinfo
+
+ @classmethod
+ def foreign_volume_infos(cls):
+ """return list of valid (not expired) foreign volume marks"""
+ dict_list = []
+ xattr_list = Xattr.llistxattr_buf('.')
+ for ele in xattr_list:
+ if ele.find('.'.join([cls.GX_NSPACE, 'volume-mark', ''])) == 0:
+ d, x = cls._attr_unpack_dict(ele, cls.FRGN_XTRA_FMT)
+ now = int(time.time())
+ if x[0] > now:
+ logging.debug("volinfo[%s] expires: %d (%d sec later)" % \
+ (d['uuid'], x[0], x[0] - now))
+ d['timeout'] = x[0]
+ dict_list.append(d)
+ else:
+ try:
+ Xattr.lremovexattr('.', ele)
+ except OSError:
+ pass
+ return dict_list
+
+ @classmethod
+ def native_volume_info(cls):
+ """get the native volume mark of the underlying gluster volume"""
+ try:
+ return cls._attr_unpack_dict('.'.join([cls.GX_NSPACE, 'volume-mark']))
+ except OSError:
+ ex = sys.exc_info()[1]
+ if ex.errno != ENODATA:
+ raise
+
+ server = GLUSTERServer
+
+ def __init__(self, path):
+ self.host, self.volume = sup(self, path, '^(%s):(.+)' % HostRX.pattern)
+
+ def canonical_path(self):
+ return ':'.join([gethostbyname(self.host), self.volume])
+
+ def can_connect_to(self, remote):
+ """determine our position in the connectibility matrix"""
+ return not remote or \
+ (isinstance(remote, SSH) and isinstance(remote.inner_rsc, GLUSTER))
+
+ class Mounter(object):
+ """Abstract base class for mounter backends"""
+
+ def __init__(self, params):
+ self.params = params
+ self.mntpt = None
+
+ @classmethod
+ def get_glusterprog(cls):
+ return os.path.join(gconf.gluster_command_dir, cls.glusterprog)
+
+ def umount_l(self, d):
+ """perform lazy umount"""
+ po = Popen(self.make_umount_argv(d), stderr=subprocess.PIPE)
+ po.wait()
+ return po
+
+ @classmethod
+ def make_umount_argv(cls, d):
+ raise NotImplementedError
+
+ def make_mount_argv(self, *a):
+ raise NotImplementedError
+
+ def cleanup_mntpt(self, *a):
+ pass
+
+ def handle_mounter(self, po):
+ po.wait()
+
+ def inhibit(self, *a):
+ """inhibit a gluster filesystem
+
+ Mount glusterfs over a temporary mountpoint,
+ change into the mount, and lazy unmount the
+ filesystem.
+ """
+
+ mpi, mpo = os.pipe()
+ mh = Popen.fork()
+ if mh:
+ os.close(mpi)
+ fcntl.fcntl(mpo, fcntl.F_SETFD, fcntl.FD_CLOEXEC)
+ d = None
+ margv = self.make_mount_argv(*a)
+ if self.mntpt:
+ # mntpt is determined pre-mount
+ d = self.mntpt
+ os.write(mpo, d + '\0')
+ po = Popen(margv, **self.mountkw)
+ self.handle_mounter(po)
+ po.terminate_geterr()
+ logging.debug('auxiliary glusterfs mount in place')
+ if not d:
+ # mntpt is determined during mount
+ d = self.mntpt
+ os.write(mpo, d + '\0')
+ os.write(mpo, 'M')
+ t = syncdutils.Thread(target=lambda: os.chdir(d))
+ t.start()
+ tlim = gconf.starttime + int(gconf.connection_timeout)
+ while True:
+ if not t.isAlive():
+ break
+ if time.time() >= tlim:
+ syncdutils.finalize(exval = 1)
+ time.sleep(1)
+ os.close(mpo)
+ _, rv = syncdutils.waitpid(mh, 0)
+ if rv:
+ rv = (os.WIFEXITED(rv) and os.WEXITSTATUS(rv) or 0) - \
+ (os.WIFSIGNALED(rv) and os.WTERMSIG(rv) or 0)
+ logging.warn('stale mount possibly left behind on ' + d)
+ raise GsyncdError("cleaning up temp mountpoint %s failed with status %d" % \
+ (d, rv))
+ else:
+ rv = 0
+ try:
+ os.setsid()
+ os.close(mpo)
+ mntdata = ''
+ while True:
+ c = os.read(mpi, 1)
+ if not c:
+ break
+ mntdata += c
+ if mntdata:
+ mounted = False
+ if mntdata[-1] == 'M':
+ mntdata = mntdata[:-1]
+ assert(mntdata)
+ mounted = True
+ assert(mntdata[-1] == '\0')
+ mntpt = mntdata[:-1]
+ assert(mntpt)
+ if mounted:
+ po = self.umount_l(mntpt)
+ po.terminate_geterr(fail_on_err = False)
+ if po.returncode != 0:
+ po.errlog()
+ rv = po.returncode
+ self.cleanup_mntpt(mntpt)
+ except:
+ logging.exception('mount cleanup failure:')
+ rv = 200
+ os._exit(rv)
+ logging.debug('auxiliary glusterfs mount prepared')
+
+ class DirectMounter(Mounter):
+ """mounter backend which calls mount(8), umount(8) directly"""
+
+ mountkw = {'stderr': subprocess.PIPE}
+ glusterprog = 'glusterfs'
+
+ @staticmethod
+ def make_umount_argv(d):
+ return ['umount', '-l', d]
+
+ def make_mount_argv(self):
+ self.mntpt = tempfile.mkdtemp(prefix = 'gsyncd-aux-mount-')
+ return [self.get_glusterprog()] + ['--' + p for p in self.params] + [self.mntpt]
+
+ def cleanup_mntpt(self, mntpt = None):
+ if not mntpt:
+ mntpt = self.mntpt
+ os.rmdir(mntpt)
+
+ class MountbrokerMounter(Mounter):
+ """mounter backend using the mountbroker gluster service"""
+
+ mountkw = {'stderr': subprocess.PIPE, 'stdout': subprocess.PIPE}
+ glusterprog = 'gluster'
+
+ @classmethod
+ def make_cli_argv(cls):
+ return [cls.get_glusterprog()] + gconf.gluster_cli_options.split() + ['system::']
+
+ @classmethod
+ def make_umount_argv(cls, d):
+ return cls.make_cli_argv() + ['umount', d, 'lazy']
+
+ def make_mount_argv(self, label):
+ return self.make_cli_argv() + \
+ ['mount', label, 'user-map-root=' + syncdutils.getusername()] + self.params
+
+ def handle_mounter(self, po):
+ self.mntpt = po.stdout.readline()[:-1]
+ po.stdout.close()
+ sup(self, po)
+ if po.returncode != 0:
+ # if cli terminated with error due to being
+ # refused by glusterd, what it put
+ # out on stdout is a diagnostic message
+ logging.error('glusterd answered: %s' % self.mntpt)
+
+ def connect(self):
+ """inhibit the resource beyond
+
+ Choose mounting backend (direct or mountbroker),
+ set up glusterfs parameters and perform the mount
+ with given backend
+ """
+
+ label = getattr(gconf, 'mountbroker', None)
+ if not label and not privileged():
+ label = syncdutils.getusername()
+ mounter = label and self.MountbrokerMounter or self.DirectMounter
+ params = gconf.gluster_params.split() + \
+ (gconf.gluster_log_level and ['log-level=' + gconf.gluster_log_level] or []) + \
+ ['log-file=' + gconf.gluster_log_file, 'volfile-server=' + self.host,
+ 'volfile-id=' + self.volume, 'client-pid=-1']
+ mounter(params).inhibit(*[l for l in [label] if l])
+
+ def connect_remote(self, *a, **kw):
+ sup(self, *a, **kw)
+ self.slavedir = "/proc/%d/cwd" % self.server.pid()
+
+ def gmaster_instantiate_tuple(self, slave):
+ """return a tuple of the 'one shot' and the 'main crawl' class instance"""
+ return (gmaster_builder('xsync')(self, slave), gmaster_builder()(self, slave))
+
+ def service_loop(self, *args):
+ """enter service loop
+
+ - if slave given, instantiate GMaster and
+ pass control to that instance, which implements
+ master behavior
+ - else do that's what's inherited
+ """
+ if args:
+ slave = args[0]
+ if gconf.local_path:
+ class brickserver(FILE.FILEServer):
+ local_path = gconf.local_path
+ aggregated = self.server
+ @classmethod
+ def entries(cls, path):
+ e = super(brickserver, cls).entries(path)
+ # on the brick don't mess with /.glusterfs
+ if path == '.':
+ try:
+ e.remove('.glusterfs')
+ except ValueError:
+ pass
+ return e
+ if gconf.slave_id:
+ # define {,set_}xtime in slave, thus preempting
+ # the call to remote, so that it takes data from
+ # the local brick
+ slave.server.xtime = types.MethodType(lambda _self, path, uuid: brickserver.xtime(path, uuid + '.' + gconf.slave_id), slave.server)
+ slave.server.set_xtime = types.MethodType(lambda _self, path, uuid, mark: brickserver.set_xtime(path, uuid + '.' + gconf.slave_id, mark), slave.server)
+ (g1, g2) = self.gmaster_instantiate_tuple(slave)
+ g1.master.server = brickserver
+ g2.master.server = brickserver
+ else:
+ (g1, g2) = self.gmaster_instantiate_tuple(slave)
+ g1.master.server.aggregated = gmaster.master.server
+ g2.master.server.aggregated = gmaster.master.server
+ # bad bad bad: bad way to do things like this
+ # need to make this elegant
+ # register the crawlers and start crawling
+ g1.register()
+ g2.register()
+ g1.crawlwrap(oneshot=True)
+ g2.crawlwrap()
+ else:
+ sup(self, *args)
+
+ def rsync(self, files):
+ return sup(self, files, self.slavedir)
+
+
+class SSH(AbstractUrl, SlaveRemote):
+ """scheme class for ssh:// urls
+
+ interface to remote slave on master side
+ implementing an ssh based proxy
+ """
+
+ def __init__(self, path):
+ self.remote_addr, inner_url = sup(self, path,
+ '^((?:%s@)?%s):(.+)' % tuple([ r.pattern for r in (UserRX, HostRX) ]))
+ self.inner_rsc = parse_url(inner_url)
+ self.volume = inner_url[1:]
+
+ @staticmethod
+ def parse_ssh_address(self):
+ m = re.match('([^@]+)@(.+)', self.remote_addr)
+ if m:
+ u, h = m.groups()
+ else:
+ u, h = syncdutils.getusername(), self.remote_addr
+ self.remotehost = h
+ return {'user': u, 'host': h}
+
+ def canonical_path(self):
+ rap = self.parse_ssh_address(self)
+ remote_addr = '@'.join([rap['user'], gethostbyname(rap['host'])])
+ return ':'.join([remote_addr, self.inner_rsc.get_url(canonical=True)])
+
+ def can_connect_to(self, remote):
+ """determine our position in the connectibility matrix"""
+ return False
+
+ def start_fd_client(self, *a, **opts):
+ """customizations for client startup
+
+ - be a no-op if we are to daemonize (client startup is deferred
+ to post-daemon stage)
+ - determine target url for rsync after consulting server
+ """
+ if opts.get('deferred'):
+ return a
+ sup(self, *a)
+ ityp = type(self.inner_rsc)
+ if ityp == FILE:
+ slavepath = self.inner_rsc.path
+ elif ityp == GLUSTER:
+ slavepath = "/proc/%d/cwd" % self.server.pid()
+ else:
+ raise NotImplementedError
+ self.slaveurl = ':'.join([self.remote_addr, slavepath])
+
+ def connect_remote(self, go_daemon=None):
+ """connect to inner slave url through outer ssh url
+
+ Wrap the connecting utility in ssh.
+
+ Much care is put into daemonizing: in that case
+ ssh is started before daemonization, but
+ RePCe client is to be created after that (as ssh
+ interactive password auth would be defeated by
+ a daemonized ssh, while client should be present
+ only in the final process). In that case the action
+ is taken apart to two parts, this method is ivoked
+ once pre-daemon, once post-daemon. Use @go_daemon
+ to deiced what part to perform.
+
+ [NB. ATM gluster product does not makes use of interactive
+ authentication.]
+ """
+ if go_daemon == 'done':
+ return self.start_fd_client(*self.fd_pair)
+
+ syncdutils.setup_ssh_ctl(tempfile.mkdtemp(prefix='gsyncd-aux-ssh-'),
+ self.remote_addr,
+ self.inner_rsc.url)
+
+ deferred = go_daemon == 'postconn'
+ ret = sup(self, gconf.ssh_command.split() + gconf.ssh_ctl_args + [self.remote_addr],
+ slave=self.inner_rsc.url, deferred=deferred)
+
+ if deferred:
+ # send a message to peer so that we can wait for
+ # the answer from which we know connection is
+ # established and we can proceed with daemonization
+ # (doing that too early robs the ssh passwd prompt...)
+ # However, we'd better not start the RepceClient
+ # before daemonization (that's not preserved properly
+ # in daemon), we just do a an ad-hoc linear put/get.
+ i, o = ret
+ inf = os.fdopen(i)
+ repce.send(o, None, '__repce_version__')
+ select((inf,), (), ())
+ repce.recv(inf)
+ # hack hack hack: store a global reference to the file
+ # to save it from getting GC'd which implies closing it
+ gconf.permanent_handles.append(inf)
+ self.fd_pair = (i, o)
+ return 'should'
+
+ def rsync(self, files):
+ return sup(self, files, '-e', " ".join(gconf.ssh_command.split() + gconf.ssh_ctl_args),
+ *(gconf.rsync_ssh_options.split() + [self.slaveurl]))
diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py
new file mode 100644
index 000000000..348eb38c1
--- /dev/null
+++ b/geo-replication/syncdaemon/syncdutils.py
@@ -0,0 +1,438 @@
+import os
+import sys
+import pwd
+import time
+import fcntl
+import shutil
+import logging
+import socket
+from threading import Lock, Thread as baseThread
+from errno import EACCES, EAGAIN, EPIPE, ENOTCONN, ECONNABORTED, EINTR, ENOENT, EPERM, ESTALE, errorcode
+from signal import signal, SIGTERM, SIGKILL
+from time import sleep
+import select as oselect
+from os import waitpid as owaitpid
+
+try:
+ from cPickle import PickleError
+except ImportError:
+ # py 3
+ from pickle import PickleError
+
+from gconf import gconf
+
+try:
+ # py 3
+ from urllib import parse as urllib
+except ImportError:
+ import urllib
+
+try:
+ from hashlib import md5 as md5
+except ImportError:
+ # py 2.4
+ from md5 import new as md5
+
+# auxillary gfid based access prefix
+_CL_AUX_GFID_PFX = ".gfid/"
+GF_OP_RETRIES = 20
+
+def escape(s):
+ """the chosen flavor of string escaping, used all over
+ to turn whatever data to creatable representation"""
+ return urllib.quote_plus(s)
+
+def unescape(s):
+ """inverse of .escape"""
+ return urllib.unquote_plus(s)
+
+def norm(s):
+ if s:
+ return s.replace('-', '_')
+
+def update_file(path, updater, merger = lambda f: True):
+ """update a file in a transaction-like manner"""
+
+ fr = fw = None
+ try:
+ fd = os.open(path, os.O_CREAT|os.O_RDWR)
+ try:
+ fr = os.fdopen(fd, 'r+b')
+ except:
+ os.close(fd)
+ raise
+ fcntl.lockf(fr, fcntl.LOCK_EX)
+ if not merger(fr):
+ return
+
+ tmpp = path + '.tmp.' + str(os.getpid())
+ fd = os.open(tmpp, os.O_CREAT|os.O_EXCL|os.O_WRONLY)
+ try:
+ fw = os.fdopen(fd, 'wb', 0)
+ except:
+ os.close(fd)
+ raise
+ updater(fw)
+ os.fsync(fd)
+ os.rename(tmpp, path)
+ finally:
+ for fx in (fr, fw):
+ if fx:
+ fx.close()
+
+def create_manifest(fname, content):
+ """
+ Create manifest file for SSH Control Path
+ """
+ fd = None
+ try:
+ fd = os.open(fname, os.O_CREAT|os.O_RDWR)
+ try:
+ os.write(fd, content)
+ except:
+ os.close(fd)
+ raise
+ finally:
+ if fd != None:
+ os.close(fd)
+
+def setup_ssh_ctl(ctld, remote_addr, resource_url):
+ """
+ Setup GConf ssh control path parameters
+ """
+ gconf.ssh_ctl_dir = ctld
+ content = "SLAVE_HOST=%s\nSLAVE_RESOURCE_URL=%s" % (remote_addr,
+ resource_url)
+ content_md5 = md5hex(content)
+ fname = os.path.join(gconf.ssh_ctl_dir,
+ "%s.mft" % content_md5)
+
+ create_manifest(fname, content)
+ ssh_ctl_path = os.path.join(gconf.ssh_ctl_dir,
+ "%s.sock" % content_md5)
+ gconf.ssh_ctl_args = ["-oControlMaster=auto", "-S", ssh_ctl_path]
+
+def grabfile(fname, content=None):
+ """open @fname + contest for its fcntl lock
+
+ @content: if given, set the file content to it
+ """
+ # damn those messy open() mode codes
+ fd = os.open(fname, os.O_CREAT|os.O_RDWR)
+ f = os.fdopen(fd, 'r+b', 0)
+ try:
+ fcntl.lockf(f, fcntl.LOCK_EX|fcntl.LOCK_NB)
+ except:
+ ex = sys.exc_info()[1]
+ f.close()
+ if isinstance(ex, IOError) and ex.errno in (EACCES, EAGAIN):
+ # cannot grab, it's taken
+ return
+ raise
+ if content:
+ try:
+ f.truncate()
+ f.write(content)
+ except:
+ f.close()
+ raise
+ gconf.permanent_handles.append(f)
+ return f
+
+def grabpidfile(fname=None, setpid=True):
+ """.grabfile customization for pid files"""
+ if not fname:
+ fname = gconf.pid_file
+ content = None
+ if setpid:
+ content = str(os.getpid()) + '\n'
+ return grabfile(fname, content=content)
+
+final_lock = Lock()
+
+def finalize(*a, **kw):
+ """all those messy final steps we go trough upon termination
+
+ Do away with pidfile, ssh control dir and logging.
+ """
+ final_lock.acquire()
+ if getattr(gconf, 'pid_file', None):
+ rm_pidf = gconf.pid_file_owned
+ if gconf.cpid:
+ # exit path from parent branch of daemonization
+ rm_pidf = False
+ while True:
+ f = grabpidfile(setpid=False)
+ if not f:
+ # child has already taken over pidfile
+ break
+ if os.waitpid(gconf.cpid, os.WNOHANG)[0] == gconf.cpid:
+ # child has terminated
+ rm_pidf = True
+ break;
+ time.sleep(0.1)
+ if rm_pidf:
+ try:
+ os.unlink(gconf.pid_file)
+ except:
+ ex = sys.exc_info()[1]
+ if ex.errno == ENOENT:
+ pass
+ else:
+ raise
+ if gconf.ssh_ctl_dir and not gconf.cpid:
+ shutil.rmtree(gconf.ssh_ctl_dir)
+ if getattr(gconf, 'state_socket', None):
+ try:
+ os.unlink(gconf.state_socket)
+ except:
+ if sys.exc_info()[0] == OSError:
+ pass
+ if gconf.log_exit:
+ logging.info("exiting.")
+ sys.stdout.flush()
+ sys.stderr.flush()
+ os._exit(kw.get('exval', 0))
+
+def log_raise_exception(excont):
+ """top-level exception handler
+
+ Try to some fancy things to cover up we face with an error.
+ Translate some weird sounding but well understood exceptions
+ into human-friendly lingo
+ """
+ is_filelog = False
+ for h in logging.getLogger().handlers:
+ fno = getattr(getattr(h, 'stream', None), 'fileno', None)
+ if fno and not os.isatty(fno()):
+ is_filelog = True
+
+ exc = sys.exc_info()[1]
+ if isinstance(exc, SystemExit):
+ excont.exval = exc.code or 0
+ raise
+ else:
+ logtag = None
+ if isinstance(exc, GsyncdError):
+ if is_filelog:
+ logging.error(exc.args[0])
+ sys.stderr.write('failure: ' + exc.args[0] + '\n')
+ elif isinstance(exc, PickleError) or isinstance(exc, EOFError) or \
+ ((isinstance(exc, OSError) or isinstance(exc, IOError)) and \
+ exc.errno == EPIPE):
+ logging.error('connection to peer is broken')
+ if hasattr(gconf, 'transport'):
+ gconf.transport.wait()
+ if gconf.transport.returncode == 127:
+ logging.warn("!!!!!!!!!!!!!")
+ logging.warn('!!! getting "No such file or directory" errors '
+ "is most likely due to MISCONFIGURATION, please consult "
+ "http://access.redhat.com/knowledge/docs/en-US/Red_Hat_Storage/2.0/html/Administration_Guide/chap-User_Guide-Geo_Rep-Preparation-Settingup_Environment.html")
+ logging.warn("!!!!!!!!!!!!!")
+ gconf.transport.terminate_geterr()
+ elif isinstance(exc, OSError) and exc.errno in (ENOTCONN, ECONNABORTED):
+ logging.error('glusterfs session went down [%s]', errorcode[exc.errno])
+ else:
+ logtag = "FAIL"
+ if not logtag and logging.getLogger().isEnabledFor(logging.DEBUG):
+ logtag = "FULL EXCEPTION TRACE"
+ if logtag:
+ logging.exception(logtag + ": ")
+ sys.stderr.write("failed with %s.\n" % type(exc).__name__)
+ excont.exval = 1
+ sys.exit(excont.exval)
+
+
+class FreeObject(object):
+ """wildcard class for which any attribute can be set"""
+
+ def __init__(self, **kw):
+ for k,v in kw.items():
+ setattr(self, k, v)
+
+class Thread(baseThread):
+ """thread class flavor for gsyncd
+
+ - always a daemon thread
+ - force exit for whole program if thread
+ function coughs up an exception
+ """
+ def __init__(self, *a, **kw):
+ tf = kw.get('target')
+ if tf:
+ def twrap(*aa):
+ excont = FreeObject(exval = 0)
+ try:
+ tf(*aa)
+ except:
+ try:
+ log_raise_exception(excont)
+ finally:
+ finalize(exval = excont.exval)
+ kw['target'] = twrap
+ baseThread.__init__(self, *a, **kw)
+ self.setDaemon(True)
+
+class GsyncdError(Exception):
+ pass
+
+def getusername(uid = None):
+ if uid == None:
+ uid = os.geteuid()
+ return pwd.getpwuid(uid).pw_name
+
+def privileged():
+ return os.geteuid() == 0
+
+def boolify(s):
+ """
+ Generic string to boolean converter
+
+ return
+ - Quick return if string 's' is of type bool
+ - True if it's in true_list
+ - False if it's in false_list
+ - Warn if it's not present in either and return False
+ """
+ true_list = ['true', 'yes', '1', 'on']
+ false_list = ['false', 'no', '0', 'off']
+
+ if isinstance(s, bool):
+ return s
+
+ rv = False
+ lstr = s.lower()
+ if lstr in true_list:
+ rv = True
+ elif not lstr in false_list:
+ logging.warn("Unknown string (%s) in string to boolean conversion defaulting to False\n" % (s))
+
+ return rv
+
+def eintr_wrap(func, exc, *a):
+ """
+ wrapper around syscalls resilient to interrupt caused
+ by signals
+ """
+ while True:
+ try:
+ return func(*a)
+ except exc:
+ ex = sys.exc_info()[1]
+ if not ex.args[0] == EINTR:
+ raise
+
+def select(*a):
+ return eintr_wrap(oselect.select, oselect.error, *a)
+
+def waitpid (*a):
+ return eintr_wrap(owaitpid, OSError, *a)
+
+def set_term_handler(hook=lambda *a: finalize(*a, **{'exval': 1})):
+ signal(SIGTERM, hook)
+
+def is_host_local(host):
+ locaddr = False
+ for ai in socket.getaddrinfo(host, None):
+ # cf. http://github.com/gluster/glusterfs/blob/ce111f47/xlators/mgmt/glusterd/src/glusterd-utils.c#L125
+ if ai[0] == socket.AF_INET:
+ if ai[-1][0].split(".")[0] == "127":
+ locaddr = True
+ break
+ elif ai[0] == socket.AF_INET6:
+ if ai[-1][0] == "::1":
+ locaddr = True
+ break
+ else:
+ continue
+ try:
+ # use ICMP socket to avoid net.ipv4.ip_nonlocal_bind issue,
+ # cf. https://bugzilla.redhat.com/show_bug.cgi?id=890587
+ s = socket.socket(ai[0], socket.SOCK_RAW, socket.IPPROTO_ICMP)
+ except socket.error:
+ ex = sys.exc_info()[1]
+ if ex.errno != EPERM:
+ raise
+ f = None
+ try:
+ f = open("/proc/sys/net/ipv4/ip_nonlocal_bind")
+ if int(f.read()) != 0:
+ raise GsyncdError(
+ "non-local bind is set and not allowed to create raw sockets, "
+ "cannot determine if %s is local" % host)
+ s = socket.socket(ai[0], socket.SOCK_DGRAM)
+ finally:
+ if f:
+ f.close()
+ try:
+ s.bind(ai[-1])
+ locaddr = True
+ break
+ except:
+ pass
+ s.close()
+ return locaddr
+
+def funcode(f):
+ fc = getattr(f, 'func_code', None)
+ if not fc:
+ # python 3
+ fc = f.__code__
+ return fc
+
+def memoize(f):
+ fc = funcode(f)
+ fn = fc.co_name
+ def ff(self, *a, **kw):
+ rv = getattr(self, '_' + fn, None)
+ if rv == None:
+ rv = f(self, *a, **kw)
+ setattr(self, '_' + fn, rv)
+ return rv
+ return ff
+
+def umask():
+ return os.umask(0)
+
+def entry2pb(e):
+ return e.rsplit('/', 1)
+
+def gauxpfx():
+ return _CL_AUX_GFID_PFX
+
+def md5hex(s):
+ return md5(s).hexdigest()
+
+def selfkill(sig=SIGTERM):
+ os.kill(os.getpid(), sig)
+
+def errno_wrap(call, arg=[], errnos=[], retry_errnos=[ESTALE]):
+ """ wrapper around calls resilient to errnos.
+ retry in case of ESTALE by default.
+ """
+ nr_tries = 0
+ while True:
+ try:
+ return call(*arg)
+ except OSError:
+ ex = sys.exc_info()[1]
+ if ex.errno in errnos:
+ return ex.errno
+ if not ex.errno in retry_errnos:
+ raise
+ nr_tries += 1
+ if nr_tries == GF_OP_RETRIES:
+ # probably a screwed state, cannot do much...
+ logging.warn('reached maximum retries (%s)...' % repr(arg))
+ return
+ time.sleep(0.250) # retry the call
+
+def lstat(e):
+ try:
+ return os.lstat(e)
+ except (IOError, OSError):
+ ex = sys.exc_info()[1]
+ if ex.errno == ENOENT:
+ return ex.errno
+ else:
+ raise
diff --git a/gf-error-codes.h.template b/gf-error-codes.h.template
new file mode 100644
index 000000000..ab6020d64
--- /dev/null
+++ b/gf-error-codes.h.template
@@ -0,0 +1,33 @@
+/***************************************************************/
+/** **/
+/** DO NOT EDIT THIS FILE **/
+/** THIS IS AUTO-GENERATED FROM LOG BOOK **/
+/** YOUR CHANGES WILL BE LOST IN NEXT BUILD **/
+/** **/
+/***************************************************************/
+
+#ifndef _GF_ERROR_CODES_H
+#define _GF_ERROR_CODES_H
+
+#include <libintl.h>
+
+#define _(STRING) gettext(STRING)
+
+
+/** START: ERROR CODE DEFINITIONS **/
+$DEFINES
+/** END: ERROR CODE DEFINITIONS **/
+
+
+/** START: FUNCTION RETURNS MESSAGE OF GIVEN ERROR CODE **/
+const char *
+_gf_get_message (int code) {
+ switch (code) {
+$CASES
+ default: return NULL;
+ }
+}
+/** END: FUNCTION RETURNS MESSAGE OF GIVEN ERROR CODE **/
+
+
+#endif
diff --git a/glusterfs-api.pc.in b/glusterfs-api.pc.in
new file mode 100644
index 000000000..fab4a57d5
--- /dev/null
+++ b/glusterfs-api.pc.in
@@ -0,0 +1,12 @@
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+
+
+Name: glusterfs-api
+Description: GlusterFS API
+/* This is the API version, NOT package version */
+Version: 6
+Libs: -L${libdir} -lgfapi -lglusterfs -lgfrpc -lgfxdr
+Cflags: -I${includedir}/glusterfs -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64
diff --git a/glusterfs-guts/src/Makefile.am b/glusterfs-guts/src/Makefile.am
deleted file mode 100644
index bb8c7b176..000000000
--- a/glusterfs-guts/src/Makefile.am
+++ /dev/null
@@ -1,17 +0,0 @@
-sbin_PROGRAMS = glusterfs-guts
-
-glusterfs_guts_SOURCES = glusterfs-guts.c fuse-bridge.c guts-replay.c guts-trace.c \
- fuse-extra.c guts-extra.c guts-parse.c guts-tables.c
-
-noinst_HEADERS = fuse_kernel.h fuse-extra.h glusterfs-guts.h glusterfs-fuse.h guts-lowlevel.h \
- guts-parse.h guts-replay.h guts-tables.h guts-trace.h
-
-glusterfs_guts_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la -lfuse
-
-AM_CFLAGS = -fPIC -Wall -pthread
-
-AM_CPPFLAGS = -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -DFUSE_USE_VERSION=26 \
- -I$(top_srcdir)/libglusterfs/src -DDATADIR=\"$(localstatedir)\" \
- -DCONFDIR=\"$(sysconfdir)/glusterfs\"
-
-CLEANFILES =
diff --git a/glusterfs-guts/src/fuse-bridge.c b/glusterfs-guts/src/fuse-bridge.c
deleted file mode 100644
index f0bf05ebd..000000000
--- a/glusterfs-guts/src/fuse-bridge.c
+++ /dev/null
@@ -1,2724 +0,0 @@
-/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-#include <stdint.h>
-#include <signal.h>
-#include <pthread.h>
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif /* _CONFIG_H */
-
-#include "glusterfs.h"
-#include "logging.h"
-#include "xlator.h"
-#include "glusterfs.h"
-#include "transport.h"
-#include "defaults.h"
-#include "common-utils.h"
-
-#include <fuse/fuse_lowlevel.h>
-
-#include "fuse-extra.h"
-#include "list.h"
-
-#include "guts-lowlevel.h"
-
-#define BIG_FUSE_CHANNEL_SIZE 1048576
-
-struct fuse_private {
- int fd;
- struct fuse *fuse;
- struct fuse_session *se;
- struct fuse_chan *ch;
- char *mountpoint;
-};
-
-char glusterfs_fuse_direct_io_mode = 1;
-float glusterfs_fuse_entry_timeout = 1.0;
-float glusterfs_fuse_attr_timeout = 1.0;
-
-#define FI_TO_FD(fi) ((fd_t *)((long)fi->fh))
-
-#define FUSE_FOP(state, ret, op, args ...) \
-do { \
- call_frame_t *frame = get_call_frame_for_req (state, 1); \
- xlator_t *xl = frame->this->children ? \
- frame->this->children->xlator : NULL; \
- dict_t *refs = frame->root->req_refs; \
- frame->root->state = state; \
- STACK_WIND (frame, ret, xl, xl->fops->op, args); \
- dict_unref (refs); \
-} while (0)
-
-#define FUSE_FOP_NOREPLY(state, op, args ...) \
-do { \
- call_frame_t *_frame = get_call_frame_for_req (state, 0); \
- xlator_t *xl = _frame->this->children->xlator; \
- _frame->root->req_refs = NULL; \
- STACK_WIND (_frame, fuse_nop_cbk, xl, xl->fops->op, args); \
-} while (0)
-
-typedef struct {
- loc_t loc;
- inode_t *parent;
- inode_t *inode;
- char *name;
-} fuse_loc_t;
-
-typedef struct {
- void *pool;
- xlator_t *this;
- inode_table_t *itable;
- fuse_loc_t fuse_loc;
- fuse_loc_t fuse_loc2;
- fuse_req_t req;
-
- int32_t flags;
- off_t off;
- size_t size;
- unsigned long nlookup;
- fd_t *fd;
- dict_t *dict;
- char *name;
- char is_revalidate;
-} fuse_state_t;
-
-
-static void
-loc_wipe (loc_t *loc)
-{
- if (loc->inode) {
- inode_unref (loc->inode);
- loc->inode = NULL;
- }
- if (loc->path) {
- FREE (loc->path);
- loc->path = NULL;
- }
-}
-
-
-static inode_t *
-dummy_inode (inode_table_t *table)
-{
- inode_t *dummy;
-
- dummy = CALLOC (1, sizeof (*dummy));
- ERR_ABORT (dummy);
-
- dummy->table = table;
-
- INIT_LIST_HEAD (&dummy->list);
- INIT_LIST_HEAD (&dummy->inode_hash);
- INIT_LIST_HEAD (&dummy->fds);
- INIT_LIST_HEAD (&dummy->dentry.name_hash);
- INIT_LIST_HEAD (&dummy->dentry.inode_list);
-
- dummy->ref = 1;
- dummy->ctx = get_new_dict ();
-
- LOCK_INIT (&dummy->lock);
- return dummy;
-}
-
-static void
-fuse_loc_wipe (fuse_loc_t *fuse_loc)
-{
- loc_wipe (&fuse_loc->loc);
- if (fuse_loc->name) {
- FREE (fuse_loc->name);
- fuse_loc->name = NULL;
- }
- if (fuse_loc->inode) {
- inode_unref (fuse_loc->inode);
- fuse_loc->inode = NULL;
- }
- if (fuse_loc->parent) {
- inode_unref (fuse_loc->parent);
- fuse_loc->parent = NULL;
- }
-}
-
-
-static void
-free_state (fuse_state_t *state)
-{
- fuse_loc_wipe (&state->fuse_loc);
-
- fuse_loc_wipe (&state->fuse_loc2);
-
- if (state->dict) {
- dict_unref (state->dict);
- state->dict = (void *)0xaaaaeeee;
- }
- if (state->name) {
- FREE (state->name);
- state->name = NULL;
- }
-#ifdef DEBUG
- memset (state, 0x90, sizeof (*state));
-#endif
- FREE (state);
- state = NULL;
-}
-
-
-static int32_t
-fuse_nop_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- if (frame->root->state)
- free_state (frame->root->state);
-
- frame->root->state = EEEEKS;
- STACK_DESTROY (frame->root);
- return 0;
-}
-
-fuse_state_t *
-state_from_req (fuse_req_t req)
-{
- fuse_state_t *state;
- transport_t *trans = fuse_req_userdata (req);
-
- state = (void *)calloc (1, sizeof (*state));
- ERR_ABORT (state);
- state->pool = trans->xl->ctx->pool;
- state->itable = trans->xl->itable;
- state->req = req;
- state->this = trans->xl;
-
- return state;
-}
-
-
-static call_frame_t *
-get_call_frame_for_req (fuse_state_t *state, char d)
-{
- call_pool_t *pool = state->pool;
- fuse_req_t req = state->req;
- const struct fuse_ctx *ctx = NULL;
- call_ctx_t *cctx = NULL;
- transport_t *trans = NULL;
-
- cctx = CALLOC (1, sizeof (*cctx));
- ERR_ABORT (cctx);
- cctx->frames.root = cctx;
-
- if (req) {
- ctx = fuse_req_ctx(req);
-
- cctx->uid = ctx->uid;
- cctx->gid = ctx->gid;
- cctx->pid = ctx->pid;
- cctx->unique = req_callid (req);
- }
-
- if (req) {
- trans = fuse_req_userdata (req);
- cctx->frames.this = trans->xl;
- cctx->trans = trans;
- } else {
- cctx->frames.this = state->this;
- }
-
- if (d) {
- cctx->req_refs = dict_ref (get_new_dict ());
- dict_set (cctx->req_refs, NULL, trans->buf);
- cctx->req_refs->is_locked = 1;
- }
-
- cctx->pool = pool;
- LOCK (&pool->lock);
- list_add (&cctx->all_frames, &pool->all_frames);
- UNLOCK (&pool->lock);
-
- return &cctx->frames;
-}
-
-
-static void
-fuse_loc_fill (fuse_loc_t *fuse_loc,
- fuse_state_t *state,
- ino_t ino,
- const char *name)
-{
- size_t n;
- inode_t *inode, *parent = NULL;
-
- /* resistance against multiple invocation of loc_fill not to get
- reference leaks via inode_search() */
- inode = fuse_loc->inode;
- if (!inode) {
- inode = inode_search (state->itable, ino, name);
- }
- fuse_loc->inode = inode;
-
- if (name) {
- if (!fuse_loc->name)
- fuse_loc->name = strdup (name);
-
- parent = fuse_loc->parent;
- if (!parent) {
- if (inode)
- parent = inode_parent (inode, ino);
- else
- parent = inode_search (state->itable, ino, NULL);
- }
- }
- fuse_loc->parent = parent;
-
- if (inode) {
- fuse_loc->loc.inode = inode_ref (inode);
- fuse_loc->loc.ino = inode->ino;
- }
-
- if (parent) {
- n = inode_path (parent, name, NULL, 0) + 1;
- fuse_loc->loc.path = CALLOC (1, n);
- ERR_ABORT (fuse_loc->loc.path);
- inode_path (parent, name, (char *)fuse_loc->loc.path, n);
- } else if (inode) {
- n = inode_path (inode, NULL, NULL, 0) + 1;
- fuse_loc->loc.path = CALLOC (1, n);
- ERR_ABORT (fuse_loc->loc.path);
- inode_path (inode, NULL, (char *)fuse_loc->loc.path, n);
- }
-}
-
-static int32_t
-fuse_lookup_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *stat,
- dict_t *dict);
-
-static int32_t
-fuse_entry_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf)
-{
- fuse_state_t *state;
- fuse_req_t req;
- struct fuse_entry_param e = {0, };
-
- state = frame->root->state;
- req = state->req;
-
- if (!op_ret) {
- if (inode->ino == 1)
- buf->st_ino = 1;
- }
-
- if (!op_ret && inode && inode->ino && buf && inode->ino != buf->st_ino) {
- /* temporary workaround to handle AFR returning differnt inode number */
- gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRId64": %s => inode number changed %"PRId64" -> %"PRId64,
- frame->root->unique, state->fuse_loc.loc.path,
- inode->ino, buf->st_ino);
- inode_unref (state->fuse_loc.loc.inode);
- state->fuse_loc.loc.inode = dummy_inode (state->itable);
- state->is_revalidate = 2;
-
- STACK_WIND (frame, fuse_lookup_cbk,
- FIRST_CHILD (this), FIRST_CHILD (this)->fops->lookup,
- &state->fuse_loc.loc,
- 0);
-
- return 0;
- }
-
- if (op_ret == 0) {
- ino_t ino = buf->st_ino;
- inode_t *fuse_inode;
-
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": %s => %"PRId64, frame->root->unique,
- state->fuse_loc.loc.path, ino);
-
- try_again:
- fuse_inode = inode_update (state->itable, state->fuse_loc.parent,
- state->fuse_loc.name, buf);
-
- if (fuse_inode->ctx) {
- /* if the inode was already in the hash, checks to flush out
- old name hashes */
- if ((fuse_inode->st_mode ^ buf->st_mode) & S_IFMT) {
- gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRId64": %s => %"PRId64" Rehashing %x/%x",
- frame->root->unique,
- state->fuse_loc.loc.path, ino, (S_IFMT & buf->st_ino),
- (S_IFMT & fuse_inode->st_mode));
-
- fuse_inode->st_mode = buf->st_mode;
- inode_unhash_name (state->itable, fuse_inode);
- inode_unref (fuse_inode);
- goto try_again;
- }
- if (buf->st_nlink == 1) {
- /* no other name hashes should exist */
- if (!list_empty (&fuse_inode->dentry.inode_list)) {
- gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRId64": %s => %"PRId64" Rehashing because st_nlink less than dentry maps",
- frame->root->unique,
- state->fuse_loc.loc.path, ino);
- inode_unhash_name (state->itable, fuse_inode);
- inode_unref (fuse_inode);
- goto try_again;
- }
- if ((state->fuse_loc.parent != fuse_inode->dentry.parent) ||
- strcmp (state->fuse_loc.name, fuse_inode->dentry.name)) {
- gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRId64": %s => %"PRId64" Rehashing because single st_nlink does not match dentry map",
- frame->root->unique,
- state->fuse_loc.loc.path, ino);
- inode_unhash_name (state->itable, fuse_inode);
- inode_unref (fuse_inode);
- goto try_again;
- }
- }
- }
-
- if ((fuse_inode->ctx != inode->ctx) &&
- list_empty (&fuse_inode->fds)) {
- dict_t *swap = inode->ctx;
- inode->ctx = fuse_inode->ctx;
- fuse_inode->ctx = swap;
- fuse_inode->generation = inode->generation;
- fuse_inode->st_mode = buf->st_mode;
- }
-
- inode_lookup (fuse_inode);
-
- inode_unref (fuse_inode);
-
- /* TODO: make these timeouts configurable (via meta?) */
- e.ino = fuse_inode->ino;
- e.generation = buf->st_ctime;
- e.entry_timeout = glusterfs_fuse_entry_timeout;
- e.attr_timeout = glusterfs_fuse_attr_timeout;
- e.attr = *buf;
- e.attr.st_blksize = BIG_FUSE_CHANNEL_SIZE;
- if (state->fuse_loc.parent)
- fuse_reply_entry (req, &e);
- else
- fuse_reply_attr (req, buf, glusterfs_fuse_attr_timeout);
- } else {
- if (state->is_revalidate == -1 && op_errno == ENOENT) {
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": %s => -1 (%d)", frame->root->unique,
- state->fuse_loc.loc.path, op_errno);
- } else {
- gf_log ("glusterfs-fuse", GF_LOG_ERROR,
- "%"PRId64": %s => -1 (%d)", frame->root->unique,
- state->fuse_loc.loc.path, op_errno);
- }
-
- if (state->is_revalidate == 1) {
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "unlinking stale dentry for `%s'",
- state->fuse_loc.loc.path);
-
- if (state->fuse_loc.parent)
- inode_unlink (state->itable, state->fuse_loc.parent,
- state->fuse_loc.name);
-
- inode_unref (state->fuse_loc.loc.inode);
- state->fuse_loc.loc.inode = dummy_inode (state->itable);
- state->is_revalidate = 2;
-
- STACK_WIND (frame, fuse_lookup_cbk,
- FIRST_CHILD (this), FIRST_CHILD (this)->fops->lookup,
- &state->fuse_loc.loc, 0);
-
- return 0;
- }
-
- fuse_reply_err (req, op_errno);
- }
-
- free_state (state);
- STACK_DESTROY (frame->root);
- return 0;
-}
-
-
-static int32_t
-fuse_lookup_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *stat,
- dict_t *dict)
-{
- fuse_entry_cbk (frame, cookie, this, op_ret, op_errno, inode, stat);
- return 0;
-}
-
-
-static void
-fuse_lookup (fuse_req_t req,
- fuse_ino_t par,
- const char *name)
-{
- fuse_state_t *state;
-
- state = state_from_req (req);
-
- fuse_loc_fill (&state->fuse_loc, state, par, name);
-
- if (!state->fuse_loc.loc.inode) {
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": LOOKUP %s", req_callid (req),
- state->fuse_loc.loc.path);
-
- state->fuse_loc.loc.inode = dummy_inode (state->itable);
- /* to differntiate in entry_cbk what kind of call it is */
- state->is_revalidate = -1;
- } else {
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": LOOKUP %s(%"PRId64")", req_callid (req),
- state->fuse_loc.loc.path, state->fuse_loc.loc.inode->ino);
- state->is_revalidate = 1;
- }
-
- FUSE_FOP (state, fuse_lookup_cbk, lookup,
- &state->fuse_loc.loc, 0);
-}
-
-
-static void
-fuse_forget (fuse_req_t req,
- fuse_ino_t ino,
- unsigned long nlookup)
-{
- inode_t *fuse_inode;
- fuse_state_t *state;
-
- if (ino == 1) {
- fuse_reply_none (req);
- return;
- }
-
- state = state_from_req (req);
- fuse_inode = inode_search (state->itable, ino, NULL);
- inode_forget (fuse_inode, nlookup);
- inode_unref (fuse_inode);
-
- free_state (state);
- fuse_reply_none (req);
-}
-
-
-static int32_t
-fuse_attr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- fuse_state_t *state;
- fuse_req_t req;
-
- state = frame->root->state;
- req = state->req;
-
- if (op_ret == 0) {
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": %s => %"PRId64, frame->root->unique,
- state->fuse_loc.loc.path ? state->fuse_loc.loc.path : "ERR",
- buf->st_ino);
- /* TODO: make these timeouts configurable via meta */
- /* TODO: what if the inode number has changed by now */
- buf->st_blksize = BIG_FUSE_CHANNEL_SIZE;
- fuse_reply_attr (req, buf, glusterfs_fuse_attr_timeout);
- } else {
- gf_log ("glusterfs-fuse", GF_LOG_ERROR,
- "%"PRId64"; %s => -1 (%d)", frame->root->unique,
- state->fuse_loc.loc.path ? state->fuse_loc.loc.path : "ERR",
- op_errno);
- fuse_reply_err (req, op_errno);
- }
-
- free_state (state);
- STACK_DESTROY (frame->root);
- return 0;
-}
-
-
-static void
-fuse_getattr (fuse_req_t req,
- fuse_ino_t ino,
- struct fuse_file_info *fi)
-{
- fuse_state_t *state;
-
- state = state_from_req (req);
-
- if (ino == 1) {
- fuse_loc_fill (&state->fuse_loc, state, ino, NULL);
- if (state->fuse_loc.loc.inode)
- state->is_revalidate = 1;
- else
- state->is_revalidate = -1;
- FUSE_FOP (state,
- fuse_lookup_cbk, lookup, &state->fuse_loc.loc, 0);
- return;
- }
-
- fuse_loc_fill (&state->fuse_loc, state, ino, NULL);
- if (!state->fuse_loc.loc.inode) {
- gf_log ("glusterfs-fuse", GF_LOG_ERROR,
- "%"PRId64": GETATTR %"PRId64" (%s) (fuse_loc_fill() returned NULL inode)",
- req_callid (req), (int64_t)ino, state->fuse_loc.loc.path);
- fuse_reply_err (req, EINVAL);
- return;
- }
-
- if (list_empty (&state->fuse_loc.loc.inode->fds) ||
- S_ISDIR (state->fuse_loc.loc.inode->st_mode)) {
-
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": GETATTR %"PRId64" (%s)",
- req_callid (req), (int64_t)ino, state->fuse_loc.loc.path);
-
- FUSE_FOP (state,
- fuse_attr_cbk,
- stat,
- &state->fuse_loc.loc);
- } else {
- fd_t *fd = list_entry (state->fuse_loc.loc.inode->fds.next,
- fd_t, inode_list);
-
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": FGETATTR %"PRId64" (%s/%p)",
- req_callid (req), (int64_t)ino, state->fuse_loc.loc.path, fd);
-
- FUSE_FOP (state,
- fuse_attr_cbk,
- fstat, fd);
- }
-}
-
-
-static int32_t
-fuse_fd_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- fuse_state_t *state;
- fuse_req_t req;
-
- state = frame->root->state;
- req = state->req;
- fd = state->fd;
-
- if (op_ret >= 0) {
- struct fuse_file_info fi = {0, };
-
- LOCK (&fd->inode->lock);
- list_add (&fd->inode_list, &fd->inode->fds);
- UNLOCK (&fd->inode->lock);
-
- fi.fh = (unsigned long) fd;
- fi.flags = state->flags;
-
- if (!S_ISDIR (fd->inode->st_mode)) {
- if ((fi.flags & 3) && glusterfs_fuse_direct_io_mode)
- fi.direct_io = 1;
- }
-
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": %s => %p", frame->root->unique,
- state->fuse_loc.loc.path, fd);
-
- if (fuse_reply_open (req, &fi) == -ENOENT) {
- gf_log ("glusterfs-fuse", GF_LOG_WARNING, "open() got EINTR");
- state->req = 0;
-
- if (S_ISDIR (fd->inode->st_mode))
- FUSE_FOP_NOREPLY (state, closedir, fd);
- else
- FUSE_FOP_NOREPLY (state, close, fd);
- }
- } else {
- gf_log ("glusterfs-fuse", GF_LOG_ERROR,
- "%"PRId64": %s => -1 (%d)", frame->root->unique,
- state->fuse_loc.loc.path, op_errno);
- fuse_reply_err (req, op_errno);
- fd_destroy (fd);
- }
-
- free_state (state);
- STACK_DESTROY (frame->root);
- return 0;
-}
-
-
-
-static void
-do_chmod (fuse_req_t req,
- fuse_ino_t ino,
- struct stat *attr,
- struct fuse_file_info *fi)
-{
- fuse_state_t *state = state_from_req (req);
-
- if (fi) {
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": FCHMOD %p", req_callid (req), FI_TO_FD (fi));
-
- FUSE_FOP (state,
- fuse_attr_cbk,
- fchmod,
- FI_TO_FD (fi),
- attr->st_mode);
- } else {
- fuse_loc_fill (&state->fuse_loc, state, ino, NULL);
- if (!state->fuse_loc.loc.inode) {
- gf_log ("glusterfs-fuse", GF_LOG_ERROR,
- "%"PRId64": CHMOD %"PRId64" (%s) (fuse_loc_fill() returned NULL inode)",
- req_callid (req), (int64_t)ino, state->fuse_loc.loc.path);
- fuse_reply_err (req, EINVAL);
- return;
- }
-
-
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": CHMOD %s", req_callid (req),
- state->fuse_loc.loc.path);
-
- FUSE_FOP (state,
- fuse_attr_cbk,
- chmod,
- &state->fuse_loc.loc,
- attr->st_mode);
- }
-}
-
-static void
-do_chown (fuse_req_t req,
- fuse_ino_t ino,
- struct stat *attr,
- int valid,
- struct fuse_file_info *fi)
-{
- fuse_state_t *state;
-
- uid_t uid = (valid & FUSE_SET_ATTR_UID) ? attr->st_uid : (uid_t) -1;
- gid_t gid = (valid & FUSE_SET_ATTR_GID) ? attr->st_gid : (gid_t) -1;
-
- state = state_from_req (req);
-
- if (fi) {
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": FCHOWN %p", req_callid (req), FI_TO_FD (fi));
-
- FUSE_FOP (state,
- fuse_attr_cbk,
- fchown,
- FI_TO_FD (fi),
- uid,
- gid);
- } else {
- fuse_loc_fill (&state->fuse_loc, state, ino, NULL);
- if (!state->fuse_loc.loc.inode) {
- gf_log ("glusterfs-fuse", GF_LOG_ERROR,
- "%"PRId64": CHOWN %"PRId64" (%s) (fuse_loc_fill() returned NULL inode)",
- req_callid (req), (int64_t)ino, state->fuse_loc.loc.path);
- fuse_reply_err (req, EINVAL);
- return;
- }
-
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": CHOWN %s", req_callid (req),
- state->fuse_loc.loc.path);
-
- FUSE_FOP (state,
- fuse_attr_cbk,
- chown,
- &state->fuse_loc.loc,
- uid,
- gid);
- }
-}
-
-static void
-do_truncate (fuse_req_t req,
- fuse_ino_t ino,
- struct stat *attr,
- struct fuse_file_info *fi)
-{
- fuse_state_t *state;
-
- state = state_from_req (req);
-
- if (fi) {
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": FTRUNCATE %p/%"PRId64, req_callid (req),
- FI_TO_FD (fi), attr->st_size);
-
- FUSE_FOP (state,
- fuse_attr_cbk,
- ftruncate,
- FI_TO_FD (fi),
- attr->st_size);
- } else {
- fuse_loc_fill (&state->fuse_loc, state, ino, NULL);
- if (!state->fuse_loc.loc.inode) {
- gf_log ("glusterfs-fuse", GF_LOG_ERROR,
- "%"PRId64": TRUNCATE %s/%"PRId64" (fuse_loc_fill() returned NULL inode)",
- req_callid (req), state->fuse_loc.loc.path, attr->st_size);
- fuse_reply_err (req, EINVAL);
- return;
- }
-
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": TRUNCATE %s/%"PRId64, req_callid (req),
- state->fuse_loc.loc.path, attr->st_size);
-
- FUSE_FOP (state,
- fuse_attr_cbk,
- truncate,
- &state->fuse_loc.loc,
- attr->st_size);
- }
-
- return;
-}
-
-static void
-do_utimes (fuse_req_t req,
- fuse_ino_t ino,
- struct stat *attr)
-{
- fuse_state_t *state;
-
- struct timespec tv[2];
-#ifdef FUSE_STAT_HAS_NANOSEC
- tv[0] = ST_ATIM(attr);
- tv[1] = ST_MTIM(attr);
-#else
- tv[0].tv_sec = attr->st_atime;
- tv[0].tv_nsec = 0;
- tv[1].tv_sec = attr->st_mtime;
- tv[1].tv_nsec = 0;
-#endif
-
- state = state_from_req (req);
- fuse_loc_fill (&state->fuse_loc, state, ino, NULL);
- if (!state->fuse_loc.loc.inode) {
- gf_log ("glusterfs-fuse", GF_LOG_ERROR,
- "%"PRId64": UTIMENS %s (fuse_loc_fill() returned NULL inode)",
- req_callid (req), state->fuse_loc.loc.path);
- fuse_reply_err (req, EINVAL);
- return;
- }
-
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": UTIMENS %s", req_callid (req),
- state->fuse_loc.loc.path);
-
- FUSE_FOP (state,
- fuse_attr_cbk,
- utimens,
- &state->fuse_loc.loc,
- tv);
-}
-
-static void
-fuse_setattr (fuse_req_t req,
- fuse_ino_t ino,
- struct stat *attr,
- int valid,
- struct fuse_file_info *fi)
-{
-
- if (valid & FUSE_SET_ATTR_MODE)
- do_chmod (req, ino, attr, fi);
- else if (valid & (FUSE_SET_ATTR_UID | FUSE_SET_ATTR_GID))
- do_chown (req, ino, attr, valid, fi);
- else if (valid & FUSE_SET_ATTR_SIZE)
- do_truncate (req, ino, attr, fi);
- else if ((valid & (FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME)) == (FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME))
- do_utimes (req, ino, attr);
-
- if (!valid)
- fuse_getattr (req, ino, fi);
-}
-
-
-static int32_t
-fuse_err_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- fuse_state_t *state = frame->root->state;
- fuse_req_t req = state->req;
-
- if (op_ret == 0) {
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": %s => 0", frame->root->unique,
- state->fuse_loc.loc.path ? state->fuse_loc.loc.path : "ERR");
- fuse_reply_err (req, 0);
- } else {
- gf_log ("glusterfs-fuse", GF_LOG_ERROR,
- "%"PRId64": %s => -1 (%d)", frame->root->unique,
- state->fuse_loc.loc.path ? state->fuse_loc.loc.path : "ERR",
- op_errno);
- fuse_reply_err (req, op_errno);
- }
-
- if (state->fd)
- fd_destroy (state->fd);
-
- free_state (state);
- STACK_DESTROY (frame->root);
-
- return 0;
-}
-
-
-
-static int32_t
-fuse_unlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- fuse_state_t *state = frame->root->state;
- fuse_req_t req = state->req;
-
- if (op_ret == 0)
- inode_unlink (state->itable, state->fuse_loc.parent, state->fuse_loc.name);
-
- if (op_ret == 0) {
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": %s => 0", frame->root->unique,
- state->fuse_loc.loc.path);
-
- fuse_reply_err (req, 0);
- } else {
- gf_log ("glusterfs-fuse", (op_errno == ENOTEMPTY) ? GF_LOG_DEBUG : GF_LOG_ERROR,
- "%"PRId64": %s => -1 (%d)", frame->root->unique,
- state->fuse_loc.loc.path, op_errno);
-
- fuse_reply_err (req, op_errno);
- }
-
- free_state (state);
- STACK_DESTROY (frame->root);
-
- return 0;
-}
-
-
-static void
-fuse_access (fuse_req_t req,
- fuse_ino_t ino,
- int mask)
-{
- fuse_state_t *state;
-
- state = state_from_req (req);
-
- fuse_loc_fill (&state->fuse_loc, state, ino, NULL);
- if (!state->fuse_loc.loc.inode) {
- gf_log ("glusterfs-fuse", GF_LOG_ERROR,
- "%"PRId64": ACCESS %"PRId64" (%s) (fuse_loc_fill() returned NULL inode)",
- req_callid (req), (int64_t)ino, state->fuse_loc.loc.path);
- fuse_reply_err (req, EINVAL);
- return;
- }
-
- FUSE_FOP (state,
- fuse_err_cbk,
- access,
- &state->fuse_loc.loc,
- mask);
-
- return;
-}
-
-
-
-static int32_t
-fuse_readlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- const char *linkname)
-{
- fuse_state_t *state = frame->root->state;
- fuse_req_t req = state->req;
-
- if (op_ret > 0) {
- ((char *)linkname)[op_ret] = '\0';
-
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": %s => %s", frame->root->unique,
- state->fuse_loc.loc.path, linkname);
-
- fuse_reply_readlink(req, linkname);
- } else {
- gf_log ("glusterfs-fuse", GF_LOG_ERROR,
- "%"PRId64": %s => -1 (%d)", frame->root->unique,
- state->fuse_loc.loc.path, op_errno);
- fuse_reply_err(req, op_errno);
- }
-
- free_state (state);
- STACK_DESTROY (frame->root);
-
- return 0;
-}
-
-static void
-fuse_readlink (fuse_req_t req,
- fuse_ino_t ino)
-{
- fuse_state_t *state;
-
- state = state_from_req (req);
- fuse_loc_fill (&state->fuse_loc, state, ino, NULL);
- if (!state->fuse_loc.loc.inode) {
- gf_log ("glusterfs-fuse", GF_LOG_ERROR,
- "%"PRId64" READLINK %s/%"PRId64" (fuse_loc_fill() returned NULL inode)",
- req_callid (req), state->fuse_loc.loc.path, state->fuse_loc.loc.inode->ino);
- fuse_reply_err (req, EINVAL);
- return;
- }
-
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64" READLINK %s/%"PRId64, req_callid (req),
- state->fuse_loc.loc.path, state->fuse_loc.loc.inode->ino);
-
- FUSE_FOP (state,
- fuse_readlink_cbk,
- readlink,
- &state->fuse_loc.loc,
- 4096);
-
- return;
-}
-
-
-static void
-fuse_mknod (fuse_req_t req,
- fuse_ino_t par,
- const char *name,
- mode_t mode,
- dev_t rdev)
-{
- fuse_state_t *state;
-
- state = state_from_req (req);
- fuse_loc_fill (&state->fuse_loc, state, par, name);
-
- state->fuse_loc.loc.inode = dummy_inode (state->itable);
-
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": MKNOD %s", req_callid (req),
- state->fuse_loc.loc.path);
-
- FUSE_FOP (state,
- fuse_entry_cbk,
- mknod,
- &state->fuse_loc.loc,
- mode,
- rdev);
-
- return;
-}
-
-
-static void
-fuse_mkdir (fuse_req_t req,
- fuse_ino_t par,
- const char *name,
- mode_t mode)
-{
- fuse_state_t *state;
-
- state = state_from_req (req);
- fuse_loc_fill (&state->fuse_loc, state, par, name);
-
- state->fuse_loc.loc.inode = dummy_inode (state->itable);
-
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": MKDIR %s", req_callid (req),
- state->fuse_loc.loc.path);
-
- FUSE_FOP (state,
- fuse_entry_cbk,
- mkdir,
- &state->fuse_loc.loc,
- mode);
-
- return;
-}
-
-
-static void
-fuse_unlink (fuse_req_t req,
- fuse_ino_t par,
- const char *name)
-{
- fuse_state_t *state;
-
- state = state_from_req (req);
-
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": UNLINK %s", req_callid (req),
- state->fuse_loc.loc.path);
-
- fuse_loc_fill (&state->fuse_loc, state, par, name);
- if (!state->fuse_loc.loc.inode) {
- gf_log ("glusterfs-fuse", GF_LOG_ERROR,
- "%"PRId64": UNLINK %s (fuse_loc_fill() returned NULL inode)", req_callid (req),
- state->fuse_loc.loc.path);
- fuse_reply_err (req, EINVAL);
- return;
- }
-
- FUSE_FOP (state,
- fuse_unlink_cbk,
- unlink,
- &state->fuse_loc.loc);
-
- return;
-}
-
-
-static void
-fuse_rmdir (fuse_req_t req,
- fuse_ino_t par,
- const char *name)
-{
- fuse_state_t *state;
-
- state = state_from_req (req);
- fuse_loc_fill (&state->fuse_loc, state, par, name);
- if (!state->fuse_loc.loc.inode) {
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": RMDIR %s (fuse_loc_fill() returned NULL inode)", req_callid (req),
- state->fuse_loc.loc.path);
- fuse_reply_err (req, EINVAL);
- return;
- }
-
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": RMDIR %s", req_callid (req),
- state->fuse_loc.loc.path);
-
- FUSE_FOP (state,
- fuse_unlink_cbk,
- rmdir,
- &state->fuse_loc.loc);
-
- return;
-}
-
-
-static void
-fuse_symlink (fuse_req_t req,
- const char *linkname,
- fuse_ino_t par,
- const char *name)
-{
- fuse_state_t *state;
-
- state = state_from_req (req);
- fuse_loc_fill (&state->fuse_loc, state, par, name);
-
- state->fuse_loc.loc.inode = dummy_inode (state->itable);
-
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": SYMLINK %s -> %s", req_callid (req),
- state->fuse_loc.loc.path, linkname);
-
- FUSE_FOP (state,
- fuse_entry_cbk,
- symlink,
- linkname,
- &state->fuse_loc.loc);
- return;
-}
-
-
-int32_t
-fuse_rename_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- fuse_state_t *state = frame->root->state;
- fuse_req_t req = state->req;
-
- if (op_ret == 0) {
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": %s -> %s => 0", frame->root->unique,
- state->fuse_loc.loc.path,
- state->fuse_loc2.loc.path);
-
- inode_t *inode;
- {
- /* ugly ugly - to stay blind to situation where
- rename happens on a new inode
- */
- buf->st_ino = state->fuse_loc.loc.ino;
- }
- inode = inode_rename (state->itable,
- state->fuse_loc.parent,
- state->fuse_loc.name,
- state->fuse_loc2.parent,
- state->fuse_loc2.name,
- buf);
-
- inode_unref (inode);
- fuse_reply_err (req, 0);
- } else {
- gf_log ("glusterfs-fuse", GF_LOG_ERROR,
- "%"PRId64": %s -> %s => -1 (%d)", frame->root->unique,
- state->fuse_loc.loc.path,
- state->fuse_loc2.loc.path, op_errno);
- fuse_reply_err (req, op_errno);
- }
-
- free_state (state);
- STACK_DESTROY (frame->root);
- return 0;
-}
-
-
-static void
-fuse_rename (fuse_req_t req,
- fuse_ino_t oldpar,
- const char *oldname,
- fuse_ino_t newpar,
- const char *newname)
-{
- fuse_state_t *state;
-
- state = state_from_req (req);
-
- fuse_loc_fill (&state->fuse_loc, state, oldpar, oldname);
- if (!state->fuse_loc.loc.inode) {
- gf_log ("glusterfs-fuse", GF_LOG_ERROR,
- "for %s %"PRId64": RENAME `%s' -> `%s' (fuse_loc_fill() returned NULL inode)",
- state->fuse_loc.loc.path, req_callid (req), state->fuse_loc.loc.path,
- state->fuse_loc2.loc.path);
-
- fuse_reply_err (req, EINVAL);
- return;
- }
-
- fuse_loc_fill (&state->fuse_loc2, state, newpar, newname);
-
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": RENAME `%s' -> `%s'",
- req_callid (req), state->fuse_loc.loc.path,
- state->fuse_loc2.loc.path);
-
- FUSE_FOP (state,
- fuse_rename_cbk,
- rename,
- &state->fuse_loc.loc,
- &state->fuse_loc2.loc);
-
- return;
-}
-
-
-static void
-fuse_link (fuse_req_t req,
- fuse_ino_t ino,
- fuse_ino_t par,
- const char *name)
-{
- fuse_state_t *state;
-
- state = state_from_req (req);
-
- fuse_loc_fill (&state->fuse_loc, state, par, name);
- fuse_loc_fill (&state->fuse_loc2, state, ino, NULL);
- if (!state->fuse_loc2.loc.inode) {
- gf_log ("glusterfs-fuse", GF_LOG_ERROR,
- "fuse_loc_fill() returned NULL inode for %s %"PRId64": LINK %s %s",
- state->fuse_loc2.loc.path, req_callid (req),
- state->fuse_loc2.loc.path, state->fuse_loc.loc.path);
- fuse_reply_err (req, EINVAL);
- return;
- }
-
- state->fuse_loc.loc.inode = inode_ref (state->fuse_loc2.loc.inode);
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": LINK %s %s", req_callid (req),
- state->fuse_loc2.loc.path, state->fuse_loc.loc.path);
-
- FUSE_FOP (state,
- fuse_entry_cbk,
- link,
- &state->fuse_loc2.loc,
- state->fuse_loc.loc.path);
-
- return;
-}
-
-
-static int32_t
-fuse_create_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd,
- inode_t *inode,
- struct stat *buf)
-{
- fuse_state_t *state = frame->root->state;
- fuse_req_t req = state->req;
-
- struct fuse_file_info fi = {0, };
- struct fuse_entry_param e = {0, };
-
- fd = state->fd;
-
- fi.flags = state->flags;
- if (op_ret >= 0) {
- inode_t *fuse_inode;
- fi.fh = (unsigned long) fd;
-
- if ((fi.flags & 3) && glusterfs_fuse_direct_io_mode)
- fi.direct_io = 1;
-
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": %s => %p", frame->root->unique,
- state->fuse_loc.loc.path, fd);
-
- fuse_inode = inode_update (state->itable,
- state->fuse_loc.parent,
- state->fuse_loc.name,
- buf);
- if (fuse_inode->ctx) {
- inode_unhash_name (state->itable, fuse_inode);
- inode_unref (fuse_inode);
-
- fuse_inode = inode_update (state->itable,
- state->fuse_loc.parent,
- state->fuse_loc.name,
- buf);
- }
-
-
- {
- if (fuse_inode->ctx != inode->ctx) {
- dict_t *swap = inode->ctx;
- inode->ctx = fuse_inode->ctx;
- fuse_inode->ctx = swap;
- fuse_inode->generation = inode->generation;
- fuse_inode->st_mode = buf->st_mode;
- }
-
- inode_lookup (fuse_inode);
-
- /* list_del (&fd->inode_list); */
-
- LOCK (&fuse_inode->lock);
- list_add (&fd->inode_list, &fuse_inode->fds);
- inode_unref (fd->inode);
- fd->inode = inode_ref (fuse_inode);
- UNLOCK (&fuse_inode->lock);
-
- // inode_destroy (inode);
- }
-
- inode_unref (fuse_inode);
-
- e.ino = fuse_inode->ino;
- e.generation = buf->st_ctime;
- e.entry_timeout = glusterfs_fuse_entry_timeout;
- e.attr_timeout = glusterfs_fuse_attr_timeout;
- e.attr = *buf;
- e.attr.st_blksize = BIG_FUSE_CHANNEL_SIZE;
-
- fi.keep_cache = 0;
-
- // if (fi.flags & 1)
- // fi.direct_io = 1;
-
- if (fuse_reply_create (req, &e, &fi) == -ENOENT) {
- gf_log ("glusterfs-fuse", GF_LOG_WARNING, "create() got EINTR");
- /* TODO: forget this node too */
- state->req = 0;
- FUSE_FOP_NOREPLY (state, close, fd);
- }
- } else {
- gf_log ("glusterfs-fuse", GF_LOG_ERROR,
- "%"PRId64": %s => -1 (%d)", req_callid (req),
- state->fuse_loc.loc.path, op_errno);
- fuse_reply_err (req, op_errno);
- fd_destroy (fd);
- }
-
- free_state (state);
- STACK_DESTROY (frame->root);
-
- return 0;
-}
-
-
-static void
-fuse_create (fuse_req_t req,
- fuse_ino_t par,
- const char *name,
- mode_t mode,
- struct fuse_file_info *fi)
-{
- fuse_state_t *state;
- fd_t *fd;
-
- state = state_from_req (req);
- state->flags = fi->flags;
-
- fuse_loc_fill (&state->fuse_loc, state, par, name);
- state->fuse_loc.loc.inode = dummy_inode (state->itable);
-
- fd = fd_create (state->fuse_loc.loc.inode);
- state->fd = fd;
-
-
- LOCK (&fd->inode->lock);
- list_del_init (&fd->inode_list);
- UNLOCK (&fd->inode->lock);
-
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": CREATE %s", req_callid (req),
- state->fuse_loc.loc.path);
-
- FUSE_FOP (state,
- fuse_create_cbk,
- create,
- &state->fuse_loc.loc,
- state->flags,
- mode, fd);
-
- return;
-}
-
-
-static void
-fuse_open (fuse_req_t req,
- fuse_ino_t ino,
- struct fuse_file_info *fi)
-{
- fuse_state_t *state;
- fd_t *fd;
-
- state = state_from_req (req);
- state->flags = fi->flags;
-
- fuse_loc_fill (&state->fuse_loc, state, ino, NULL);
- if (!state->fuse_loc.loc.inode) {
- gf_log ("glusterfs-fuse", GF_LOG_ERROR,
- "%"PRId64": OPEN %s (fuse_loc_fill() returned NULL inode)", req_callid (req),
- state->fuse_loc.loc.path);
-
- fuse_reply_err (req, EINVAL);
- return;
- }
-
-
- fd = fd_create (state->fuse_loc.loc.inode);
- state->fd = fd;
-
- LOCK (&fd->inode->lock);
- list_del_init (&fd->inode_list);
- UNLOCK (&fd->inode->lock);
-
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": OPEN %s", req_callid (req),
- state->fuse_loc.loc.path);
-
- FUSE_FOP (state,
- fuse_fd_cbk,
- open,
- &state->fuse_loc.loc,
- fi->flags, fd);
-
- return;
-}
-
-
-static int32_t
-fuse_readv_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iovec *vector,
- int32_t count,
- struct stat *stbuf)
-{
- fuse_state_t *state = frame->root->state;
- fuse_req_t req = state->req;
-
- if (op_ret >= 0) {
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": READ => %d/%d,%"PRId64"/%"PRId64, frame->root->unique,
- op_ret, state->size, state->off, stbuf->st_size);
-
- fuse_reply_vec (req, vector, count);
- } else {
- gf_log ("glusterfs-fuse", GF_LOG_ERROR,
- "%"PRId64": READ => -1 (%d)", frame->root->unique, op_errno);
-
- fuse_reply_err (req, op_errno);
- }
-
- free_state (state);
- STACK_DESTROY (frame->root);
-
- return 0;
-}
-
-static void
-fuse_readv (fuse_req_t req,
- fuse_ino_t ino,
- size_t size,
- off_t off,
- struct fuse_file_info *fi)
-{
- fuse_state_t *state;
-
- state = state_from_req (req);
- state->size = size;
- state->off = off;
-
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": READ (%p, size=%d, offset=%"PRId64")",
- req_callid (req), FI_TO_FD (fi), size, off);
-
- FUSE_FOP (state,
- fuse_readv_cbk,
- readv,
- FI_TO_FD (fi),
- size,
- off);
-
-}
-
-
-static int32_t
-fuse_writev_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *stbuf)
-{
- fuse_state_t *state = frame->root->state;
- fuse_req_t req = state->req;
-
- if (op_ret >= 0) {
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": WRITE => %d/%d,%"PRId64"/%"PRId64, frame->root->unique,
- op_ret, state->size, state->off, stbuf->st_size);
-
- fuse_reply_write (req, op_ret);
- } else {
- gf_log ("glusterfs-fuse", GF_LOG_ERROR,
- "%"PRId64": WRITE => -1 (%d)", frame->root->unique, op_errno);
-
- fuse_reply_err (req, op_errno);
- }
-
- free_state (state);
- STACK_DESTROY (frame->root);
-
- return 0;
-}
-
-
-static void
-fuse_write (fuse_req_t req,
- fuse_ino_t ino,
- const char *buf,
- size_t size,
- off_t off,
- struct fuse_file_info *fi)
-{
- fuse_state_t *state;
- struct iovec vector;
-
- state = state_from_req (req);
- state->size = size;
- state->off = off;
-
- vector.iov_base = (void *)buf;
- vector.iov_len = size;
-
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": WRITE (%p, size=%d, offset=%"PRId64")",
- req_callid (req), FI_TO_FD (fi), size, off);
-
- FUSE_FOP (state,
- fuse_writev_cbk,
- writev,
- FI_TO_FD (fi),
- &vector,
- 1,
- off);
- return;
-}
-
-
-static void
-fuse_flush (fuse_req_t req,
- fuse_ino_t ino,
- struct fuse_file_info *fi)
-{
- fuse_state_t *state;
-
- state = state_from_req (req);
-
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": FLUSH %p", req_callid (req), FI_TO_FD (fi));
-
- FUSE_FOP (state,
- fuse_err_cbk,
- flush,
- FI_TO_FD (fi));
-
- return;
-}
-
-
-static void
-fuse_release (fuse_req_t req,
- fuse_ino_t ino,
- struct fuse_file_info *fi)
-{
- fuse_state_t *state;
-
- state = state_from_req (req);
- state->fd = FI_TO_FD (fi);
-
- LOCK (&state->fd->inode->lock);
- list_del_init (&state->fd->inode_list);
- UNLOCK (&state->fd->inode->lock);
-
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": CLOSE %p", req_callid (req), FI_TO_FD (fi));
-
- FUSE_FOP (state, fuse_err_cbk, close, state->fd);
- return;
-}
-
-
-static void
-fuse_fsync (fuse_req_t req,
- fuse_ino_t ino,
- int datasync,
- struct fuse_file_info *fi)
-{
- fuse_state_t *state;
-
- state = state_from_req (req);
-
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": FSYNC %p", req_callid (req), FI_TO_FD (fi));
-
- FUSE_FOP (state,
- fuse_err_cbk,
- fsync,
- FI_TO_FD (fi),
- datasync);
-
- return;
-}
-
-static void
-fuse_opendir (fuse_req_t req,
- fuse_ino_t ino,
- struct fuse_file_info *fi)
-{
- fuse_state_t *state;
- fd_t *fd;
-
- state = state_from_req (req);
- fuse_loc_fill (&state->fuse_loc, state, ino, NULL);
- if (!state->fuse_loc.loc.inode) {
- gf_log ("glusterfs-fuse", GF_LOG_ERROR,
- "%"PRId64": OPEN %s (fuse_loc_fill() returned NULL inode)", req_callid (req),
- state->fuse_loc.loc.path);
-
- fuse_reply_err (req, EINVAL);
- return;
- }
-
-
- fd = fd_create (state->fuse_loc.loc.inode);
- state->fd = fd;
-
- LOCK (&fd->inode->lock);
- list_del_init (&fd->inode_list);
- UNLOCK (&fd->inode->lock);
-
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": OPEN %s", req_callid (req),
- state->fuse_loc.loc.path);
-
- FUSE_FOP (state,
- fuse_fd_cbk,
- opendir,
- &state->fuse_loc.loc, fd);
-}
-
-#if 0
-
-void
-fuse_dir_reply (fuse_req_t req,
- size_t size,
- off_t off,
- fd_t *fd)
-{
- char *buf;
- size_t size_limited;
- data_t *buf_data;
-
- buf_data = dict_get (fd->ctx, "__fuse__getdents__internal__@@!!");
- buf = buf_data->data;
- size_limited = size;
-
- if (size_limited > (buf_data->len - off))
- size_limited = (buf_data->len - off);
-
- if (off > buf_data->len) {
- size_limited = 0;
- off = 0;
- }
-
- fuse_reply_buf (req, buf + off, size_limited);
-}
-
-
-static int32_t
-fuse_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entries,
- int32_t count)
-{
- fuse_state_t *state = frame->root->state;
- fuse_req_t req = state->req;
-
- if (op_ret < 0) {
- gf_log ("glusterfs-fuse", GF_LOG_ERROR,
- "%"PRId64": READDIR => -1 (%d)",
- frame->root->unique, op_errno);
-
- fuse_reply_err (state->req, op_errno);
- } else {
- dir_entry_t *trav;
- size_t size = 0;
- char *buf;
- data_t *buf_data;
-
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": READDIR => %d entries",
- frame->root->unique, count);
-
- for (trav = entries->next; trav; trav = trav->next) {
- size += fuse_add_direntry (req, NULL, 0, trav->name, NULL, 0);
- }
-
- buf = CALLOC (1, size);
- ERR_ABORT (buf);
- buf_data = data_from_dynptr (buf, size);
- size = 0;
-
- for (trav = entries->next; trav; trav = trav->next) {
- size_t entry_size;
- entry_size = fuse_add_direntry (req, NULL, 0, trav->name, NULL, 0);
- fuse_add_direntry (req, buf + size, entry_size, trav->name,
- &trav->buf, entry_size + size);
- size += entry_size;
- }
-
- dict_set (state->fd->ctx,
- "__fuse__getdents__internal__@@!!",
- buf_data);
-
- fuse_dir_reply (state->req, state->size, state->off, state->fd);
- }
-
- free_state (state);
- STACK_DESTROY (frame->root);
-
- return 0;
-}
-
-static void
-fuse_getdents (fuse_req_t req,
- fuse_ino_t ino,
- struct fuse_file_info *fi,
- size_t size,
- off_t off,
- int32_t flag)
-{
- fuse_state_t *state;
- fd_t *fd = FI_TO_FD (fi);
-
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": GETDENTS %p", req_callid (req), FI_TO_FD (fi));
-
- if (!off)
- dict_del (fd->ctx, "__fuse__getdents__internal__@@!!");
-
- if (dict_get (fd->ctx, "__fuse__getdents__internal__@@!!")) {
- fuse_dir_reply (req, size, off, fd);
- return;
- }
-
- state = state_from_req (req);
-
- state->size = size;
- state->off = off;
- state->fd = fd;
-
- FUSE_FOP (state,
- fuse_getdents_cbk,
- getdents,
- fd,
- size,
- off,
- 0);
-}
-
-#endif
-
-static int32_t
-fuse_readdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- gf_dirent_t *buf)
-{
- fuse_state_t *state = frame->root->state;
- fuse_req_t req = state->req;
-
- if (op_ret >= 0) {
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": READDIR => %d/%d,%"PRId64, frame->root->unique,
- op_ret, state->size, state->off);
-
- fuse_reply_buf (req, (void *)buf, op_ret);
- } else {
- gf_log ("glusterfs-fuse", GF_LOG_ERROR,
- "%"PRId64": READDIR => -1 (%d)", frame->root->unique, op_errno);
-
- fuse_reply_err (req, op_errno);
- }
-
- free_state (state);
- STACK_DESTROY (frame->root);
-
- return 0;
-
-}
-
-static void
-fuse_readdir (fuse_req_t req,
- fuse_ino_t ino,
- size_t size,
- off_t off,
- struct fuse_file_info *fi)
-{
- fuse_state_t *state;
-
- state = state_from_req (req);
- state->size = size;
- state->off = off;
-
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": READDIR (%p, size=%d, offset=%"PRId64")",
- req_callid (req), FI_TO_FD (fi), size, off);
-
- FUSE_FOP (state,
- fuse_readdir_cbk,
- readdir,
- FI_TO_FD (fi),
- size,
- off);
-}
-
-
-static void
-fuse_releasedir (fuse_req_t req,
- fuse_ino_t ino,
- struct fuse_file_info *fi)
-{
- fuse_state_t *state;
-
- state = state_from_req (req);
- state->fd = FI_TO_FD (fi);
-
- LOCK (&state->fd->inode->lock);
- list_del_init (&state->fd->inode_list);
- UNLOCK (&state->fd->inode->lock);
-
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": CLOSEDIR %p", req_callid (req), FI_TO_FD (fi));
-
- FUSE_FOP (state, fuse_err_cbk, closedir, state->fd);
-}
-
-
-static void
-fuse_fsyncdir (fuse_req_t req,
- fuse_ino_t ino,
- int datasync,
- struct fuse_file_info *fi)
-{
- fuse_state_t *state;
-
- state = state_from_req (req);
-
- FUSE_FOP (state,
- fuse_err_cbk,
- fsyncdir,
- FI_TO_FD (fi),
- datasync);
-
- return;
-}
-
-
-static int32_t
-fuse_statfs_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct statvfs *buf)
-{
- fuse_state_t *state = frame->root->state;
- fuse_req_t req = state->req;
-
- /*
- Filesystems (like ZFS on solaris) reports
- different ->f_frsize and ->f_bsize. Old coreutils
- df tools use statfs() and do not see ->f_frsize.
- the ->f_blocks, ->f_bavail and ->f_bfree are
- w.r.t ->f_frsize and not ->f_bsize which makes the
- df tools report wrong values.
-
- Scale the block counts to match ->f_bsize.
- */
- /* TODO: with old coreutils, f_bsize is taken from stat()'s st_blksize
- * so the df with old coreutils this wont work :(
- */
-
- if (op_ret == 0) {
-
- buf->f_blocks *= buf->f_frsize;
- buf->f_blocks /= BIG_FUSE_CHANNEL_SIZE;
-
- buf->f_bavail *= buf->f_frsize;
- buf->f_bavail /= BIG_FUSE_CHANNEL_SIZE;
-
- buf->f_bfree *= buf->f_frsize;
- buf->f_bfree /= BIG_FUSE_CHANNEL_SIZE;
-
- buf->f_frsize = buf->f_bsize = BIG_FUSE_CHANNEL_SIZE;
-
- fuse_reply_statfs (req, buf);
-
- } else {
- gf_log ("glusterfs-fuse", GF_LOG_ERROR,
- "%"PRId64": ERR => -1 (%d)", frame->root->unique, op_errno);
- fuse_reply_err (req, op_errno);
- }
-
- free_state (state);
- STACK_DESTROY (frame->root);
-
- return 0;
-}
-
-
-static void
-fuse_statfs (fuse_req_t req,
- fuse_ino_t ino)
-{
- fuse_state_t *state;
-
- state = state_from_req (req);
- fuse_loc_fill (&state->fuse_loc, state, 1, NULL);
- if (!state->fuse_loc.loc.inode) {
- gf_log ("glusterfs-fuse", GF_LOG_ERROR,
- "%"PRId64": STATFS (fuse_loc_fill() returned NULL inode)", req_callid (req));
-
- fuse_reply_err (req, EINVAL);
- return;
- }
-
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": STATFS", req_callid (req));
-
- FUSE_FOP (state,
- fuse_statfs_cbk,
- statfs,
- &state->fuse_loc.loc);
-}
-
-static void
-fuse_setxattr (fuse_req_t req,
- fuse_ino_t ino,
- const char *name,
- const char *value,
- size_t size,
- int flags)
-{
- fuse_state_t *state;
-
- state = state_from_req (req);
- state->size = size;
- fuse_loc_fill (&state->fuse_loc, state, ino, NULL);
- if (!state->fuse_loc.loc.inode) {
- gf_log ("glusterfs-fuse", GF_LOG_ERROR,
- "%"PRId64": SETXATTR %s/%"PRId64" (%s) (fuse_loc_fill() returned NULL inode)",
- req_callid (req),
- state->fuse_loc.loc.path, (int64_t)ino, name);
-
- fuse_reply_err (req, EINVAL);
- return;
- }
-
- state->dict = get_new_dict ();
-
- dict_set (state->dict, (char *)name,
- bin_to_data ((void *)value, size));
- dict_ref (state->dict);
-
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": SETXATTR %s/%"PRId64" (%s)", req_callid (req),
- state->fuse_loc.loc.path, (int64_t)ino, name);
-
- FUSE_FOP (state,
- fuse_err_cbk,
- setxattr,
- &state->fuse_loc.loc,
- state->dict,
- flags);
-
- return;
-}
-
-
-static int32_t
-fuse_xattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict)
-{
- int32_t ret = op_ret;
- char *value = "";
- fuse_state_t *state = frame->root->state;
- fuse_req_t req = state->req;
-
- if (ret >= 0) {
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": %s => %d", frame->root->unique,
- state->fuse_loc.loc.path, op_ret);
-
- /* if successful */
- if (state->name) {
- /* if callback for getxattr */
- data_t *value_data = dict_get (dict, state->name);
- if (value_data) {
- ret = value_data->len; /* Don't return the value for '\0' */
- value = value_data->data;
-
- if (state->size) {
- /* if callback for getxattr and asks for value */
- fuse_reply_buf (req, value, ret);
- } else {
- /* if callback for getxattr and asks for value length only */
- fuse_reply_xattr (req, ret);
- }
- } else {
- fuse_reply_err (req, ENODATA);
- }
- } else {
- /* if callback for listxattr */
- int32_t len = 0;
- data_pair_t *trav = dict->members_list;
- while (trav) {
- len += strlen (trav->key) + 1;
- trav = trav->next;
- }
- value = alloca (len + 1);
- ERR_ABORT (value);
- len = 0;
- trav = dict->members_list;
- while (trav) {
- strcpy (value + len, trav->key);
- value[len + strlen(trav->key)] = '\0';
- len += strlen (trav->key) + 1;
- trav = trav->next;
- }
- if (state->size) {
- /* if callback for listxattr and asks for list of keys */
- fuse_reply_buf (req, value, len);
- } else {
- /* if callback for listxattr and asks for length of keys only */
- fuse_reply_xattr (req, len);
- }
- }
- } else {
- /* if failure - no need to check if listxattr or getxattr */
- if (op_errno != ENODATA) {
- gf_log ("glusterfs-fuse", GF_LOG_ERROR,
- "%"PRId64": %s => -1 (%d)", frame->root->unique,
- state->fuse_loc.loc.path, op_errno);
- } else {
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": %s => -1 (%d)", frame->root->unique,
- state->fuse_loc.loc.path, op_errno);
- }
-
- fuse_reply_err (req, op_errno);
- }
-
- free_state (state);
- STACK_DESTROY (frame->root);
-
- return 0;
-}
-
-
-static void
-fuse_getxattr (fuse_req_t req,
- fuse_ino_t ino,
- const char *name,
- size_t size)
-{
- fuse_state_t *state;
-
- state = state_from_req (req);
- state->size = size;
- state->name = strdup (name);
- fuse_loc_fill (&state->fuse_loc, state, ino, NULL);
- if (!state->fuse_loc.loc.inode) {
- gf_log ("glusterfs-fuse", GF_LOG_ERROR,
- "%"PRId64": GETXATTR %s/%"PRId64" (%s) (fuse_loc_fill() returned NULL inode)",
- req_callid (req), state->fuse_loc.loc.path, (int64_t)ino, name);
-
- fuse_reply_err (req, EINVAL);
- return;
- }
-
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": GETXATTR %s/%"PRId64" (%s)", req_callid (req),
- state->fuse_loc.loc.path, (int64_t)ino, name);
-
- FUSE_FOP (state,
- fuse_xattr_cbk,
- getxattr,
- &state->fuse_loc.loc);
-
- return;
-}
-
-
-static void
-fuse_listxattr (fuse_req_t req,
- fuse_ino_t ino,
- size_t size)
-{
- fuse_state_t *state;
-
- state = state_from_req (req);
- state->size = size;
- fuse_loc_fill (&state->fuse_loc, state, ino, NULL);
- if (!state->fuse_loc.loc.inode) {
- gf_log ("glusterfs-fuse", GF_LOG_ERROR,
- "%"PRId64": LISTXATTR %s/%"PRId64" (fuse_loc_fill() returned NULL inode)",
- req_callid (req), state->fuse_loc.loc.path, (int64_t)ino);
-
- fuse_reply_err (req, EINVAL);
- return;
- }
-
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": LISTXATTR %s/%"PRId64, req_callid (req),
- state->fuse_loc.loc.path, (int64_t)ino);
-
- FUSE_FOP (state,
- fuse_xattr_cbk,
- getxattr,
- &state->fuse_loc.loc);
-
- return;
-}
-
-
-static void
-fuse_removexattr (fuse_req_t req,
- fuse_ino_t ino,
- const char *name)
-
-{
- fuse_state_t *state;
-
- state = state_from_req (req);
- fuse_loc_fill (&state->fuse_loc, state, ino, NULL);
- if (!state->fuse_loc.loc.inode) {
- gf_log ("glusterfs-fuse", GF_LOG_ERROR,
- "%"PRId64": REMOVEXATTR %s/%"PRId64" (%s) (fuse_loc_fill() returned NULL inode)",
- req_callid (req), state->fuse_loc.loc.path, (int64_t)ino, name);
-
- fuse_reply_err (req, EINVAL);
- return;
- }
-
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": REMOVEXATTR %s/%"PRId64" (%s)", req_callid (req),
- state->fuse_loc.loc.path, (int64_t)ino, name);
-
- FUSE_FOP (state,
- fuse_err_cbk,
- removexattr,
- &state->fuse_loc.loc,
- name);
-
- return;
-}
-
-static int32_t
-fuse_getlk_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct flock *lock)
-{
- fuse_state_t *state = frame->root->state;
-
- if (op_ret == 0) {
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": ERR => 0", frame->root->unique);
- fuse_reply_lock (state->req, lock);
- } else {
- gf_log ("glusterfs-fuse", GF_LOG_ERROR,
- "%"PRId64": ERR => -1 (%d)", frame->root->unique, op_errno);
- fuse_reply_err (state->req, op_errno);
- }
-
- free_state (state);
- STACK_DESTROY (frame->root);
-
- return 0;
-}
-
-static void
-fuse_getlk (fuse_req_t req,
- fuse_ino_t ino,
- struct fuse_file_info *fi,
- struct flock *lock)
-{
- fuse_state_t *state;
-
- state = state_from_req (req);
- state->req = req;
-
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": GETLK %p", req_callid (req), FI_TO_FD (fi));
-
- FUSE_FOP (state,
- fuse_getlk_cbk,
- lk,
- FI_TO_FD (fi),
- F_GETLK,
- lock);
-
- return;
-}
-
-static int32_t
-fuse_setlk_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct flock *lock)
-{
- fuse_state_t *state = frame->root->state;
-
- if (op_ret == 0) {
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": ERR => 0", frame->root->unique);
- fuse_reply_err (state->req, 0);
- } else {
- gf_log ("glusterfs-fuse", GF_LOG_ERROR,
- "%"PRId64": ERR => -1 (%d)", frame->root->unique, op_errno);
- fuse_reply_err (state->req, op_errno);
- }
-
- free_state (state);
- STACK_DESTROY (frame->root);
-
- return 0;
-}
-
-static void
-fuse_setlk (fuse_req_t req,
- fuse_ino_t ino,
- struct fuse_file_info *fi,
- struct flock *lock,
- int sleep)
-{
- fuse_state_t *state;
-
- state = state_from_req (req);
- state->req = req;
-
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRId64": SETLK %p (sleep=%d)", req_callid (req), FI_TO_FD (fi),
- sleep);
-
- FUSE_FOP (state,
- fuse_setlk_cbk,
- lk,
- FI_TO_FD(fi),
- (sleep ? F_SETLKW : F_SETLK),
- lock);
-
- return;
-}
-
-
-int32_t
-fuse_forget_notify (call_frame_t *frame, xlator_t *this,
- inode_t *inode)
-{
- return 0;
-}
-
-struct xlator_fops fuse_xl_fops = {
- .forget = fuse_forget_notify
-};
-
-static void
-fuse_init (void *data, struct fuse_conn_info *conn)
-{
- transport_t *trans = data;
- struct fuse_private *priv = trans->private;
- xlator_t *xl = trans->xl;
- int32_t ret;
-
- xl->name = "fuse";
- xl->fops = &fuse_xl_fops;
- xl->itable = inode_table_new (0, xl);
- xl->notify = default_notify;
- ret = xlator_tree_init (xl);
- if (ret == 0) {
-
- } else {
- fuse_unmount (priv->mountpoint, priv->ch);
- exit (1);
- }
-}
-
-
-static void
-fuse_destroy (void *data)
-{
-
-}
-
-struct fuse_lowlevel_ops fuse_ops = {
- .init = fuse_init,
- .destroy = fuse_destroy,
- .lookup = fuse_lookup,
- .forget = fuse_forget,
- .getattr = fuse_getattr,
- .setattr = fuse_setattr,
- .opendir = fuse_opendir,
- .readdir = fuse_readdir,
- .releasedir = fuse_releasedir,
- .access = fuse_access,
- .readlink = fuse_readlink,
- .mknod = fuse_mknod,
- .mkdir = fuse_mkdir,
- .unlink = fuse_unlink,
- .rmdir = fuse_rmdir,
- .symlink = fuse_symlink,
- .rename = fuse_rename,
- .link = fuse_link,
- .create = fuse_create,
- .open = fuse_open,
- .read = fuse_readv,
- .write = fuse_write,
- .flush = fuse_flush,
- .release = fuse_release,
- .fsync = fuse_fsync,
- .fsyncdir = fuse_fsyncdir,
- .statfs = fuse_statfs,
- .setxattr = fuse_setxattr,
- .getxattr = fuse_getxattr,
- .listxattr = fuse_listxattr,
- .removexattr = fuse_removexattr,
- .getlk = fuse_getlk,
- .setlk = fuse_setlk
-};
-
-
-static int32_t
-fuse_transport_disconnect (transport_t *this)
-{
- struct fuse_private *priv = this->private;
-
- gf_log ("glusterfs-fuse",
- GF_LOG_DEBUG,
- "cleaning up fuse transport in disconnect handler");
-
- fuse_session_remove_chan (priv->ch);
- fuse_session_destroy (priv->se);
- fuse_unmount (priv->mountpoint, priv->ch);
-
- FREE (priv);
- priv = NULL;
- this->private = NULL;
-
- /* TODO: need graceful exit. every xlator should be ->fini()'ed
- and come out of main poll loop cleanly
- */
- exit (0);
-
- return -1;
-}
-
-
-static int32_t
-fuse_transport_init (transport_t *this,
- dict_t *options,
- event_notify_fn_t notify)
-{
- char *mountpoint = strdup (data_to_str (dict_get (options,
- "mountpoint")));
- char *source;
- asprintf (&source, "fsname=glusterfs");
- char *argv[] = { "glusterfs",
-
-#ifndef GF_DARWIN_HOST_OS
- "-o", "nonempty",
-#endif
- "-o", "allow_other",
- "-o", "default_permissions",
- "-o", source,
- "-o", "max_readahead=1048576",
- "-o", "max_read=1048576",
- "-o", "max_write=1048576",
- NULL };
-#ifdef GF_DARWIN_HOST_OS
- int argc = 13;
-#else
- int argc = 15;
-#endif
-
- struct fuse_args args = FUSE_ARGS_INIT(argc,
- argv);
- struct fuse_private *priv = NULL;
- int32_t res;
-
- priv = CALLOC (1, sizeof (*priv));
- ERR_ABORT (priv);
-
-
- this->notify = notify;
- this->private = (void *)priv;
-
- priv->ch = fuse_mount (mountpoint, &args);
- if (!priv->ch) {
- gf_log ("glusterfs-fuse",
- GF_LOG_ERROR, "fuse_mount failed (%s)\n", strerror (errno));
- fuse_opt_free_args(&args);
- goto err_free;
- }
-
- priv->se = fuse_lowlevel_new (&args, &fuse_ops, sizeof (fuse_ops), this);
- fuse_opt_free_args(&args);
-
- res = fuse_set_signal_handlers (priv->se);
- if (res == -1) {
- gf_log ("glusterfs-fuse", GF_LOG_ERROR, "fuse_set_signal_handlers failed");
- goto err;
- }
-
- fuse_session_add_chan (priv->se, priv->ch);
-
- priv->fd = fuse_chan_fd (priv->ch);
- this->buf = data_ref (data_from_dynptr (NULL, 0));
- this->buf->is_locked = 1;
-
- priv->mountpoint = mountpoint;
-
- transport_ref (this);
- //poll_register (this->xl_private, priv->fd, this);
-
- return 0;
-
- err:
- fuse_unmount (mountpoint, priv->ch);
- err_free:
- FREE (mountpoint);
- mountpoint = NULL;
- return -1;
-}
-
-void
-guts_log_req (void *, int32_t);
-
-static void *
-fuse_thread_proc (void *data)
-{
- transport_t *trans = data;
- struct fuse_private *priv = trans->private;
- int32_t res = 0;
- data_t *buf = trans->buf;
- int32_t ref = 0;
- size_t chan_size = fuse_chan_bufsize (priv->ch);
- char *recvbuf = CALLOC (1, chan_size);
- ERR_ABORT (recvbuf);
-
- while (!fuse_session_exited (priv->se)) {
- int32_t fuse_chan_receive (struct fuse_chan * ch,
- char *buf,
- int32_t size);
-
-
- res = fuse_chan_receive (priv->ch,
- recvbuf,
- chan_size);
-
- if (res == -1) {
- transport_disconnect (trans);
- }
-
- buf = trans->buf;
-
- if (res && res != -1) {
- if (buf->len < (res)) {
- if (buf->data) {
- FREE (buf->data);
- buf->data = NULL;
- }
- buf->data = CALLOC (1, res);
- ERR_ABORT (buf->data);
- buf->len = res;
- }
- memcpy (buf->data, recvbuf, res); // evil evil
- guts_log_req (buf->data, res);
- fuse_session_process (priv->se,
- buf->data,
- res,
- priv->ch);
- }
-
- LOCK (&buf->lock);
- ref = buf->refcount;
- UNLOCK (&buf->lock);
- if (1) {
- data_unref (buf);
-
- trans->buf = data_ref (data_from_dynptr (NULL, 0));
- trans->buf->is_locked = 1;
- }
- }
-
- exit (0);
-
- return NULL;
-}
-
-
-static int32_t
-fuse_transport_notify (xlator_t *xl,
- int32_t event,
- void *data,
- ...)
-{
- transport_t *trans = data;
- struct fuse_private *priv = trans->private;
- int32_t res = 0;
- data_t *buf;
- int32_t ref = 0;
-
- if (event == GF_EVENT_POLLERR) {
- gf_log ("glusterfs-fuse", GF_LOG_ERROR, "got GF_EVENT_POLLERR");
- transport_disconnect (trans);
- return -1;
- }
-
- if (event != GF_EVENT_POLLIN) {
- gf_log ("glusterfs-fuse", GF_LOG_WARNING, "Ignoring notify event %d",
- event);
- return 0;
- }
-
- if (!fuse_session_exited(priv->se)) {
- static size_t chan_size = 0;
-
- int32_t fuse_chan_receive (struct fuse_chan * ch,
- char *buf,
- int32_t size);
- if (!chan_size)
- chan_size = fuse_chan_bufsize (priv->ch) ;
-
- buf = trans->buf;
-
- if (!buf->data) {
- buf->data = MALLOC (chan_size);
- ERR_ABORT (buf->data);
- buf->len = chan_size;
- }
-
- res = fuse_chan_receive (priv->ch,
- buf->data,
- chan_size);
- /* if (res == -1) {
- transport_destroy (trans);
- */
- if (res && res != -1) {
- /* trace the request and log it to tio file */
- guts_log_req (buf->data, res);
- fuse_session_process (priv->se,
- buf->data,
- res,
- priv->ch);
- }
-
- LOCK (&buf->lock);
- ref = buf->refcount;
- UNLOCK (&buf->lock);
- /* TODO do the check with a lock */
- if (ref > 1) {
- data_unref (buf);
-
- // trans->buf = data_ref (data_from_dynptr (malloc (fuse_chan_bufsize (priv->ch)),
- trans->buf = data_ref (data_from_dynptr (NULL, 0));
- trans->buf->data = MALLOC (chan_size);
- ERR_ABORT (trans->buf->data);
- trans->buf->len = chan_size;
- trans->buf->is_locked = 1;
- }
- } else {
- transport_disconnect (trans);
- }
-
- /*
- if (fuse_session_exited (priv->se)) {
- transport_destroy (trans);
- res = -1;
- }*/
-
- return res >= 0 ? 0 : res;
-}
-
-static void
-fuse_transport_fini (transport_t *this)
-{
-
-}
-
-static struct transport_ops fuse_transport_ops = {
- .disconnect = fuse_transport_disconnect,
-};
-
-static transport_t fuse_transport = {
- .ops = &fuse_transport_ops,
- .private = NULL,
- .xl = NULL,
- .init = fuse_transport_init,
- .fini = fuse_transport_fini,
- .notify = fuse_transport_notify
-};
-
-
-transport_t *
-glusterfs_mount (glusterfs_ctx_t *ctx,
- const char *mount_point)
-{
- dict_t *options = get_new_dict ();
- transport_t *new_fuse = CALLOC (1, sizeof (*new_fuse));
- ERR_ABORT (new_fuse);
-
- memcpy (new_fuse, &fuse_transport, sizeof (*new_fuse));
- new_fuse->ops = &fuse_transport_ops;
- new_fuse->xl_private = ctx;
-
- dict_set (options,
- "mountpoint",
- str_to_data ((char *)mount_point));
-
- return (new_fuse->init (new_fuse,
- options,
- fuse_transport_notify) == 0 ? new_fuse : NULL);
-}
-
-int32_t
-fuse_thread (pthread_t *thread, void *data)
-{
- return pthread_create (thread, NULL, fuse_thread_proc, data);
-}
-
-
diff --git a/glusterfs-guts/src/fuse-extra.c b/glusterfs-guts/src/fuse-extra.c
deleted file mode 100644
index 95bd0f3ad..000000000
--- a/glusterfs-guts/src/fuse-extra.c
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif /* _CONFIG_H */
-
-#include "fuse-extra.h"
-#include "common-utils.h"
-#include <stdio.h>
-#include <pthread.h>
-#include <stdlib.h>
-#include <string.h>
-#include "common-utils.h"
-
-struct fuse_req;
-struct fuse_ll;
-
-struct fuse_req {
- struct fuse_ll *f;
- uint64_t unique;
- int ctr;
- pthread_mutex_t lock;
- struct fuse_ctx ctx;
- struct fuse_chan *ch;
- int interrupted;
- union {
- struct {
- uint64_t unique;
- } i;
- struct {
- fuse_interrupt_func_t func;
- void *data;
- } ni;
- } u;
- struct fuse_req *next;
- struct fuse_req *prev;
-};
-
-struct fuse_ll {
- int debug;
- int allow_root;
- struct fuse_lowlevel_ops op;
- int got_init;
- void *userdata;
- uid_t owner;
- struct fuse_conn_info conn;
- struct fuse_req list;
- struct fuse_req interrupts;
- pthread_mutex_t lock;
- int got_destroy;
-};
-
-struct fuse_out_header {
- uint32_t len;
- int32_t error;
- uint64_t unique;
-};
-
-uint64_t req_callid (fuse_req_t req)
-{
- return req->unique;
-}
-
-static void destroy_req(fuse_req_t req)
-{
- pthread_mutex_destroy (&req->lock);
- FREE (req);
-}
-
-static void list_del_req(struct fuse_req *req)
-{
- struct fuse_req *prev = req->prev;
- struct fuse_req *next = req->next;
- prev->next = next;
- next->prev = prev;
-}
-
-static void
-free_req (fuse_req_t req)
-{
- int ctr;
- struct fuse_ll *f = req->f;
-
- pthread_mutex_lock(&req->lock);
- req->u.ni.func = NULL;
- req->u.ni.data = NULL;
- pthread_mutex_unlock(&req->lock);
-
- pthread_mutex_lock(&f->lock);
- list_del_req(req);
- ctr = --req->ctr;
- pthread_mutex_unlock(&f->lock);
- if (!ctr)
- destroy_req(req);
-}
-
-int32_t
-fuse_reply_vec (fuse_req_t req,
- struct iovec *vector,
- int32_t count)
-{
- int32_t error = 0;
- struct fuse_out_header out;
- struct iovec *iov;
- int res;
-
- iov = alloca ((count + 1) * sizeof (*vector));
- out.unique = req->unique;
- out.error = error;
- iov[0].iov_base = &out;
- iov[0].iov_len = sizeof(struct fuse_out_header);
- memcpy (&iov[1], vector, count * sizeof (*vector));
- count++;
- out.len = iov_length(iov, count);
- res = fuse_chan_send(req->ch, iov, count);
- free_req(req);
-
- return res;
-}
diff --git a/glusterfs-guts/src/fuse-extra.h b/glusterfs-guts/src/fuse-extra.h
deleted file mode 100644
index 30e3a3f76..000000000
--- a/glusterfs-guts/src/fuse-extra.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _FUSE_EXTRA_H
-#define _FUSE_EXTRA_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif /* _CONFIG_H */
-
-#include <stdlib.h>
-#include <fuse/fuse_lowlevel.h>
-
-uint64_t req_callid (fuse_req_t req);
-
-int32_t
-fuse_reply_vec (fuse_req_t req,
- struct iovec *vector,
- int32_t count);
-
-#endif /* _FUSE_EXTRA_H */
diff --git a/glusterfs-guts/src/glusterfs-fuse.h b/glusterfs-guts/src/glusterfs-fuse.h
deleted file mode 100644
index 76d1a9333..000000000
--- a/glusterfs-guts/src/glusterfs-fuse.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef __GLUSTERFS_FUSE_H__
-#define __GLUSTERFS_FUSE_H__
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif /* _CONFIG_H */
-
-#define DEFAULT_LOG_FILE DATADIR"/log/glusterfs/glusterfs.log"
-#define DEFAULT_GLUSTERFS_CLIENT_VOL CONFDIR "/glusterfs-client.vol"
-
-#define SPEC_LOCAL_FILE 1
-#define SPEC_REMOTE_FILE 2
-
-#if 0
-#define GF_YES 1
-#define GF_NO 0
-#endif
-
-#ifdef GF_LOG_FUSE_ARGS
-#undef GF_LOG_FUSE_ARGS
-#endif
-
-struct gf_spec_location {
- int32_t where;
- union {
- char *file;
- struct {
- char *ip;
- char *port;
- char *transport;
- }server;
- }spec;
-};
-
-transport_t * glusterfs_mount (glusterfs_ctx_t *ctx,
- const char *mount_point);
-
-#endif /* __GLUSTERFS_FUSE_H__ */
diff --git a/glusterfs-guts/src/glusterfs-guts.c b/glusterfs-guts/src/glusterfs-guts.c
deleted file mode 100644
index b1a1d355f..000000000
--- a/glusterfs-guts/src/glusterfs-guts.c
+++ /dev/null
@@ -1,400 +0,0 @@
-/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <argp.h>
-#include <string.h>
-
-#include "glusterfs.h"
-#include "xlator.h"
-#include "glusterfs-guts.h"
-
-/* argp initializations */
-static char doc[] = "glusterfs-guts is unit testing suite for glusterfs";
-static char argp_doc[] = "";
-const char *argp_program_version = PACKAGE_NAME " " PACKAGE_VERSION " built on " __DATE__;
-const char *argp_program_bug_address = PACKAGE_BUGREPORT;
-
-guts_ctx_t guts_ctx;
-error_t parse_opts (int32_t key, char *arg, struct argp_state *_state);
-
-static struct argp_option options[] = {
- {"spec-file", 'f', "VOLUMESPEC-FILE", 0,\
- "Load VOLUMESPEC-FILE."},
- {"threads", 't', "NUMBER", 0,\
- "Load NUMBER of threads."},
- {"tio-file", 'i', "FILE", 0,\
- "Replay fops from FILE."},
- {"tio-directory", 'I', "DIRECTORY", 0,\
- "Replay fops from files in DIRECTORY. Valid option only when using more than one thread."},
- {"log-level", 'L', "LOGLEVEL", 0,
- "LOGLEVEL should be one of DEBUG, WARNING, [ERROR], CRITICAL, NONE"},
- {"log-file", 'l', "LOGFILE", 0, \
- "Specify the file to redirect logs"},
- {"trace", 'T', "MOUNTPOINT", 0, \
- "Run guts in trace mode. Guts mounts glusterfs on MOUNTPOINT specified"},
- {"output", 'o', "OUTPUT-TIOFILE", 0, \
- "Write trace io output to OUTPUT-TIOFILE. Valid only when run in trace(-T) mode."},
- {"version", 'V', 0, 0,\
- "print version information"},
- { 0, }
-};
-
-static struct argp argp = { options, parse_opts, argp_doc, doc };
-
-/* guts_print_version - used by argument parser routine to print version information for guts */
-static int32_t
-guts_print_version (void)
-{
- printf ("%s\n", argp_program_version);
- printf ("Copyright (c) 2006-2009 Z RESEARCH Inc. <http://www.zresearch.com>\n");
- printf ("GlusterFS comes with ABSOLUTELY NO WARRANTY.\nYou may redistribute copies of GlusterFS under the terms of the GNU General Public License.\n");
- exit (0);
-}
-
-/* parse_opts - argument parsing helper routine for argp library */
-error_t
-parse_opts (int32_t key, char *arg, struct argp_state *_state)
-{
- guts_ctx_t *state = _state->input;
-
- switch (key) {
- case 'f':
- if (!state->specfile) {
- state->specfile = strdup (arg);
- }
- break;
-
- case 't':
- if (!state->threads) {
- state->threads = strtol (arg, NULL, 0);
- }
- break;
-
- case 'i':
- if (state->threads == 1) {
- state->file = strdup (arg);
- } else {
- fprintf (stderr, "glusterfs-guts: -i option is valid only when guts is running single thread\n");
- exit (1);
- }
- break;
-
- case 'I':
- if (state->threads > 1) {
- state->directory = strdup (arg);
- } else {
- fprintf (stderr, "glusterfs-guts: -I option is valid only when guts is running multiple threads\n");
- exit (1);
- }
- break;
-
- case 'L':
- /* set log level */
- if (!strncasecmp (arg, "DEBUG", strlen ("DEBUG"))) {
- state->loglevel = GF_LOG_DEBUG;
- } else if (!strncasecmp (arg, "WARNING", strlen ("WARNING"))) {
- state->loglevel = GF_LOG_WARNING;
- } else if (!strncasecmp (arg, "CRITICAL", strlen ("CRITICAL"))) {
- state->loglevel = GF_LOG_CRITICAL;
- } else if (!strncasecmp (arg, "NONE", strlen ("NONE"))) {
- state->loglevel = GF_LOG_NONE;
- } else if (!strncasecmp (arg, "ERROR", strlen ("ERROR"))) {
- state->loglevel = GF_LOG_ERROR;
- } else {
- fprintf (stderr, "glusterfs-guts: Unrecognized log-level \"%s\", possible values are \"DEBUG|WARNING|[ERROR]|CRITICAL|NONE\"\n", arg);
- exit (EXIT_FAILURE);
- }
- break;
- case 'l':
- /* set log file */
- state->logfile = strdup (arg);
- break;
-
- case 'T':
- state->trace = 1;
- state->mountpoint = strdup (arg);
- break;
-
- case 'o':
- state->file = strdup (arg);
- break;
-
- case 'V':
- guts_print_version ();
- break;
-
- }
- return 0;
-}
-
-/* get_xlator_graph - creates a translator graph and returns the pointer to the root of the xlator tree
- *
- * @ctx: guts context structure
- * @conf: file handle to volume specfile
- *
- * returns pointer to the root of the translator tree
- */
-static xlator_t *
-get_xlator_graph (glusterfs_ctx_t *ctx,
- FILE *conf)
-{
- xlator_t *tree, *trav = NULL;
-
- tree = file_to_xlator_tree (ctx, conf);
- trav = tree;
-
- if (tree == NULL) {
- gf_log ("glusterfs-guts",
- GF_LOG_ERROR,
- "specification file parsing failed, exiting");
- return NULL;
- }
-
- tree = trav;
-
- return tree;
-}
-
-/* get_spec_fp - get file handle to volume spec file specified.
- *
- * @ctx: guts context structure
- *
- * returns FILE pointer to the volume spec file.
- */
-static FILE *
-get_spec_fp (guts_ctx_t *ctx)
-{
- char *specfile = ctx->specfile;
- FILE *conf = NULL;
-
- specfile = ctx->specfile;
-
- conf = fopen (specfile, "r");
-
- if (!conf) {
- perror (specfile);
- return NULL;
- }
- gf_log ("glusterfs-guts",
- GF_LOG_DEBUG,
- "loading spec from %s",
- specfile);
-
- return conf;
-}
-
-static void *
-guts_thread_main (void *ctx)
-{
- guts_thread_ctx_t *tctx = (guts_thread_ctx_t *) ctx;
-
- printf ("starting thread main with %s:\n", tctx->file);
- guts_replay (tctx);
- printf ("ending thread main.\n");
-
- return NULL;
-}
-
-/* guts_create_threads - creates different threads based on thread number specified in ctx and assigns a
- * tio file to each thread and attaches each thread to the graph created by main().
- * @ctx: guts_ctx_t which contains the context corresponding to the current run of guts
- *
- * returns the guts_threads_t structure which contains handles to the different threads created.
- *
- */
-static guts_threads_t *
-guts_create_threads (guts_ctx_t *ctx)
-{
- guts_threads_t *threads = NULL;
- int32_t thread_count = ctx->threads;
-
- threads = CALLOC (1, sizeof (*threads));
- ERR_ABORT (threads);
-
-
- INIT_LIST_HEAD (&(threads->threads));
-
- if (thread_count == 1) {
- /* special case: we have only one thread and we are given a tio-file as argument instead of a directory.
- * handling differently */
- guts_thread_ctx_t *thread = NULL;
- thread = CALLOC (1, sizeof (*thread));
- ERR_ABORT (thread);
- list_add (&thread->threads, &threads->threads);
- thread->file = strdup (ctx->file);
- thread->ctx = ctx;
- } else {
- /* look for .tio files in the directory given and assign to each of the threads */
- DIR *dir = opendir (ctx->directory);
-
- if (!dir) {
- gf_log ("guts",
- GF_LOG_ERROR,
- "failed to open directory %s", ctx->directory);
- } else {
- guts_thread_ctx_t *thread = NULL;
- struct dirent *dirp = NULL;
- /* to pass through "." and ".." */
- readdir (dir);
- readdir (dir);
-
- while (thread_count > 0) {
- char pathname[256] = {0,};
-
- thread = CALLOC (1, sizeof (*thread));
- ERR_ABORT (thread);
- dirp = NULL;
-
- list_add (&thread->threads, &threads->threads);
- dirp = readdir (dir);
- if (dirp) {
- sprintf (pathname, "%s/%s", ctx->directory, dirp->d_name);
- printf ("file name for thread(%d) is %s\n", thread_count, pathname);
- thread->file = strdup (pathname);
- thread->ctx = ctx;
- } else if (thread_count > 0) {
- gf_log ("guts",
- GF_LOG_ERROR,
- "number of tio files less than %d, number of threads specified", ctx->threads);
- /* TODO: cleanup */
- return NULL;
- }
- --thread_count;
- }
- }
- }
- return threads;
-}
-
-/* guts_start_threads - starts all the threads in @threads.
- *
- * @threads: guts_threads_t structure containing the handles to threads created by guts_create_threads.
- *
- * returns <0 on error.
- *
- */
-static void
-guts_start_threads (guts_threads_t *gthreads)
-{
- guts_thread_ctx_t *thread = NULL;
- list_for_each_entry (thread, &gthreads->threads, threads) {
- if (pthread_create (&thread->pthread, NULL, guts_thread_main, (void *)thread) < 0) {
- gf_log ("guts",
- GF_LOG_ERROR,
- "failed to start thread");
- } else {
- gf_log ("guts",
- GF_LOG_DEBUG,
- "started thread with file %s", thread->file);
- }
- }
-}
-
-static int32_t
-guts_join_threads (guts_threads_t *gthreads)
-{
- guts_thread_ctx_t *thread = NULL;
- list_for_each_entry (thread, &gthreads->threads, threads) {
- if (pthread_join (thread->pthread, NULL) < 0) {
- gf_log ("guts",
- GF_LOG_ERROR,
- "failed to join thread");
- } else {
- gf_log ("guts",
- GF_LOG_DEBUG,
- "joined thread with file %s", thread->file);
- }
- }
- return 0;
-}
-
-
-int32_t
-main (int32_t argc, char *argv[])
-{
- /* glusterfs_ctx_t is required to be passed to
- * 1. get_xlator_graph
- * 2. glusterfs_mount
- */
- glusterfs_ctx_t gfs_ctx = {
- .logfile = DATADIR "/log/glusterfs/glusterfs-guts.log",
- .loglevel = GF_LOG_DEBUG,
- .poll_type = SYS_POLL_TYPE_EPOLL,
- };
-
- guts_ctx_t guts_ctx = {0,};
- FILE *specfp = NULL;
- xlator_t *graph = NULL;
- guts_threads_t *threads = NULL;
-
- argp_parse (&argp, argc, argv, 0, 0, &guts_ctx);
-
- if (gf_log_init (gfs_ctx.logfile) == -1 ) {
- fprintf (stderr,
- "glusterfs-guts: failed to open logfile \"%s\"\n",
- gfs_ctx.logfile);
- return -1;
- }
- gf_log_set_loglevel (gfs_ctx.loglevel);
-
- specfp = get_spec_fp (&guts_ctx);
- if (!specfp) {
- fprintf (stderr,
- "glusterfs-guts: could not open specfile\n");
- return -1;
- }
-
- graph = get_xlator_graph (&gfs_ctx, specfp);
- if (!graph) {
- gf_log ("guts", GF_LOG_ERROR,
- "Unable to get xlator graph");
- return -1;
- }
- fclose (specfp);
-
- guts_ctx.graph = graph;
-
- if (guts_ctx.trace) {
- return guts_trace (&guts_ctx);
- } else {
- /* now that we have the xlator graph, we need to create as many threads as requested and assign a tio file
- * to each of the threads and tell each thread to attach to the graph we just created. */
-
- if (!guts_ctx.file && !guts_ctx.directory) {
- fprintf (stderr,
- "glusterfs-guts: no tio file specified");
- return -1;
- }
-
- threads = guts_create_threads (&guts_ctx);
-
- if (threads) {
- guts_start_threads (threads);
- guts_join_threads (threads);
- } else {
- gf_log ("guts", GF_LOG_ERROR,
- "unable to create threads");
- return 0;
- }
- }
-
- return 0;
-}
-
diff --git a/glusterfs-guts/src/glusterfs-guts.h b/glusterfs-guts/src/glusterfs-guts.h
deleted file mode 100644
index fd1bc52b8..000000000
--- a/glusterfs-guts/src/glusterfs-guts.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef __GLUSTERFS_GUTS_H
-#define __GLUSTERFS_GUTS_H
-
-#include "xlator.h"
-#include "transport.h"
-#include "glusterfs.h"
-#include "glusterfs-fuse.h"
-#include "timer.h"
-
-#ifdef DEFAULT_LOG_FILE
-#undef DEFAULT_LOG_FILE
-#endif
-
-#define DEFAULT_LOG_FILE DATADIR"/log/glusterfs/glusterfs-guts.log"
-
-
-typedef struct {
- int32_t threads; /* number of threads to start in replay mode */
- char *logfile; /* logfile path */
- int32_t loglevel; /* logging level */
- char *directory; /* path to directory containing tio files, when threads > 1 */
- char *file; /* path to tio file, when threads == 1 during replay. in trace mode, path to tio output */
- char *specfile; /* path to specfile to load translator tree */
- xlator_t *graph; /* translator tree after the specfile is loaded */
- int32_t trace; /* if trace == 1, glusterfs-guts runs in trace mode, otherwise in replay mode */
- char *mountpoint; /* valid only when trace == 1, mounpoint to mount glusterfs */
-} guts_ctx_t;
-
-
-typedef struct {
- struct list_head threads;
- pthread_t pthread;
- xlator_t *tree;
- char *file;
- guts_ctx_t *ctx;
-} guts_thread_ctx_t;
-
-typedef struct {
- struct list_head threads;
-} guts_threads_t;
-
-int32_t guts_replay (guts_thread_ctx_t *);
-int32_t guts_trace (guts_ctx_t *);
-#endif
diff --git a/glusterfs-guts/src/guts-extra.c b/glusterfs-guts/src/guts-extra.c
deleted file mode 100644
index e1eb4f3ce..000000000
--- a/glusterfs-guts/src/guts-extra.c
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
diff --git a/glusterfs-guts/src/guts-lowlevel.h b/glusterfs-guts/src/guts-lowlevel.h
deleted file mode 100644
index 0061d0c23..000000000
--- a/glusterfs-guts/src/guts-lowlevel.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _GUTS_LOWLEVEL_H_
-#define _GUTS_LOWLEVEL_H_
-
-int
-guts_reply_err (fuse_req_t req,
- int err);
-
-int
-guts_reply_none (fuse_req_t req);
-
-int
-guts_reply_entry (fuse_req_t req,
- const struct fuse_entry_param *e);
-
-int
-guts_reply_create (fuse_req_t req,
- const struct fuse_entry_param *e,
- const struct fuse_file_info *f);
-
-int
-guts_reply_attr (fuse_req_t req,
- const struct stat *attr,
- double attr_timeout);
-
-int
-guts_reply_readlink (fuse_req_t req,
- const char *linkname);
-
-int
-guts_reply_open (fuse_req_t req,
- const struct fuse_file_info *f);
-
-int
-guts_reply_write (fuse_req_t req,
- size_t count);
-
-int
-guts_reply_buf (fuse_req_t req,
- const char *buf,
- size_t size);
-
-int
-guts_reply_statfs (fuse_req_t req,
- const struct statvfs *stbuf);
-
-int
-guts_reply_xattr (fuse_req_t req,
- size_t count);
-
-int
-guts_reply_lock (fuse_req_t req,
- struct flock *lock);
-
-/* exploiting the macros to reduce coding work ;) */
-#define fuse_reply_entry guts_reply_entry
-#define fuse_reply_err guts_reply_err
-#define fuse_reply_none guts_reply_none
-#define fuse_reply_attr guts_reply_attr
-#define fuse_reply_open guts_reply_open
-#define fuse_reply_readlink guts_reply_readlink
-#define fuse_reply_create guts_reply_create
-#define fuse_reply_write guts_reply_write
-#define fuse_reply_buf guts_reply_buf
-#define fuse_reply_statfs guts_reply_statfs
-#define fuse_reply_xattr guts_reply_xattr
-#define fuse_reply_lock guts_reply_lock
-
-#endif
diff --git a/glusterfs-guts/src/guts-parse.c b/glusterfs-guts/src/guts-parse.c
deleted file mode 100644
index 3be324a7a..000000000
--- a/glusterfs-guts/src/guts-parse.c
+++ /dev/null
@@ -1,217 +0,0 @@
-/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include "guts-parse.h"
-#include "guts-tables.h"
-
-/* unavoidable usage of global data.. :'( */
-static int32_t tio_fd = 0;
-
-int32_t
-guts_tio_init (const char *filename)
-{
- tio_fd = open (filename, O_WRONLY | O_CREAT);
-
- if (tio_fd < 0) {
- gf_log ("guts",
- GF_LOG_ERROR,
- "failed to open tio file %s", filename);
- }
-
- return tio_fd;
-}
-
-void
-guts_reply_dump (fuse_req_t req,
- const void *arg,
- int32_t len)
-{
- uint8_t *buf = NULL;
- uint8_t *ibuf = NULL;
- uint32_t buf_size = REP_HEADER_FULL_LEN + len;
-
- ibuf = buf = CALLOC (1, buf_size);
-
- /* being paranoid, checking for both ibuf and buf.. ;) */
- if (ibuf && buf) {
- memcpy (ibuf, REP_BEGIN, strlen (REP_BEGIN));
- ibuf += strlen (REP_BEGIN);
- memcpy (ibuf, req, sizeof (struct fuse_req));
- ibuf += sizeof (struct fuse_req);
- memcpy (ibuf, &len, sizeof (len));
- ibuf += sizeof (len);
- memcpy (ibuf, arg, len);
-
- gf_full_write (tio_fd, buf, buf_size);
-
- free (buf);
- } else {
- gf_log ("glusterfs-guts", GF_LOG_DEBUG,
- "failed to allocate memory while dumping reply");
- }
-}
-
-void
-guts_req_dump (struct fuse_in_header *in,
- const void *arg,
- int32_t len)
-{
- /* GUTS_REQUEST_BEGIN:<fuse_in_header>:<arg-len>:<args>:GUTS_REQUEST_END */
- uint8_t *buf = NULL;
- uint8_t *ibuf = NULL;
- uint32_t buf_size = REQ_HEADER_FULL_LEN + len;
-
- ibuf = buf = CALLOC (1, buf_size);
-
- if (ibuf && buf) {
- memcpy (ibuf, REQ_BEGIN, strlen (REQ_BEGIN));
- ibuf += strlen (REQ_BEGIN);
- memcpy (ibuf, in, sizeof (*in));
- ibuf += sizeof (*in);
- memcpy (ibuf, &len, sizeof (len));
- ibuf += sizeof (len);
- memcpy (ibuf, arg, len);
-
- gf_full_write (tio_fd, buf, buf_size);
-
- free (buf);
- } else {
- gf_log ("glusterfs-guts", GF_LOG_DEBUG,
- "failed to allocate memory while dumping reply");
- }
-}
-
-
-
-guts_req_t *
-guts_read_entry (guts_replay_ctx_t *ctx)
-{
- guts_req_t *req = NULL;
- guts_reply_t *reply = NULL;
- uint8_t begin[256] = {0,};
- int32_t ret = 0;
- int32_t fd = ctx->tio_fd;
-
- while (!req) {
- req = guts_get_request (ctx);
-
- if (!req) {
- ret = read (fd, begin, strlen (REQ_BEGIN));
-
- if (ret == 0) {
- gf_log ("glusterfs-guts", GF_LOG_DEBUG,
- "guts replay finished");
- req = NULL;
- }
-
- if (is_request (begin)) {
- req = CALLOC (1, sizeof (*req));
- ERR_ABORT (req);
- gf_full_read (fd, (char *)req, REQ_HEADER_LEN);
-
- req->arg = CALLOC (1, req->arg_len + 1);
- ERR_ABORT (req->arg);
- gf_full_read (fd, req->arg, req->arg_len);
- gf_log ("guts",
- GF_LOG_DEBUG,
- "%s: fop %s (%d)\n",
- begin, guts_log[req->header.opcode].name, req->header.opcode);
- guts_add_request (ctx, req);
- req = guts_get_request (ctx);
- } else {
- /* whenever a reply is read, we put it to a hash table and we would like to retrieve it whenever
- * we get a reply for any call
- */
- reply = CALLOC (1, sizeof (*reply));
- ERR_ABORT (reply);
- gf_full_read (fd, (char *)reply, REP_HEADER_LEN);
-
- reply->arg = CALLOC (1, reply->arg_len + 1);
- ERR_ABORT (reply->arg);
- gf_full_read (fd, reply->arg, reply->arg_len);
-
- /* add a new reply to */
- ret = guts_add_reply (ctx, reply);
- gf_log ("guts",
- GF_LOG_DEBUG,
- "got a reply with unique: %ld", reply->req.unique);
- }
- }
- }
- return req;
-}
-
-guts_reply_t *
-guts_read_reply (guts_replay_ctx_t *ctx,
- uint64_t unique)
-{
- guts_req_t *req = NULL;
- guts_reply_t *reply = NULL, *rep = NULL;
- uint8_t begin[256] = {0,};
- int32_t ret = 0;
- int32_t fd = ctx->tio_fd;
-
- while (!rep) {
-
- ret = read (fd, begin, strlen (REQ_BEGIN));
-
- if (ret == 0) {
- printf ("\ndone\n");
- return NULL;
- }
-
- if (is_request (begin)) {
- req = CALLOC (1, sizeof (*req));
- ERR_ABORT (req);
- gf_full_read (fd, (char *)req, REQ_HEADER_LEN);
-
- req->arg = CALLOC (1, req->arg_len + 1);
- ERR_ABORT (req->arg);
- gf_full_read (fd, req->arg, req->arg_len);
- gf_log ("guts",
- GF_LOG_DEBUG,
- "%s: fop %s (%d)\n",
- begin, guts_log[req->header.opcode].name, req->header.opcode);
-
- ret = guts_add_request (ctx, req);
-
- } else {
- /* whenever a reply is read, we put it to a hash table and we would like to retrieve it whenever
- * we get a reply for any call
- */
- reply = CALLOC (1, sizeof (*reply));
- ERR_ABORT (reply);
- gf_full_read (fd, (char *)reply, REP_HEADER_LEN);
-
- reply->arg = CALLOC (1, reply->arg_len + 1);
- ERR_ABORT (reply->arg);
- gf_full_read (fd, reply->arg, reply->arg_len);
-
- /* add a new reply to */
- if (reply->req.unique == unique) {
- return reply;
- } else {
- ret = guts_add_reply (ctx, reply);
- gf_log ("guts",
- GF_LOG_DEBUG,
- "got a reply with unique: %ld", reply->req.unique);
- }
- }
- }
- return NULL;
-}
diff --git a/glusterfs-guts/src/guts-parse.h b/glusterfs-guts/src/guts-parse.h
deleted file mode 100644
index 68486ff14..000000000
--- a/glusterfs-guts/src/guts-parse.h
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _GUTS_PARSE_H_
-#define _GUTS_PARSE_H_
-
-#include "glusterfs.h"
-#include "glusterfs-guts.h"
-#include "fuse_kernel.h"
-#include <fuse/fuse_lowlevel.h>
-#include "list.h"
-
-#ifndef _FUSE_OPAQUE_
-#define _FUSE_OPAQUE_
-
-struct fuse_private {
- int fd;
- struct fuse *fuse;
- struct fuse_session *se;
- struct fuse_chan *ch;
- char *mountpoint;
-};
-
-struct fuse_req {
- struct fuse_ll *f;
- uint64_t unique;
- int ctr;
- pthread_mutex_t lock;
- struct fuse_ctx ctx;
- struct fuse_chan *ch;
- int interrupted;
- union {
- struct {
- uint64_t unique;
- } i;
- struct {
- fuse_interrupt_func_t func;
- void *data;
- } ni;
- } u;
- struct fuse_req *next;
- struct fuse_req *prev;
-};
-
-struct fuse_ll {
- int debug;
- int allow_root;
- struct fuse_lowlevel_ops op;
- int got_init;
- void *userdata;
- uid_t owner;
- struct fuse_conn_info conn;
- struct fuse_req list;
- struct fuse_req interrupts;
- pthread_mutex_t lock;
- int got_destroy;
-};
-#endif
-
-#define REQ_BEGIN "GUTS_REQ_BEGIN:"
-#define REQ_HEADER_FULL_LEN (strlen(REQ_BEGIN) + sizeof (struct fuse_in_header) + sizeof (int32_t))
-
-#define REP_BEGIN "GUTS_REP_BEGIN:"
-#define REP_HEADER_FULL_LEN (strlen(REP_BEGIN) + sizeof (struct fuse_req) + sizeof (int32_t))
-
-#define REQ_HEADER_LEN (sizeof (struct fuse_in_header) + sizeof (int32_t))
-#define REP_HEADER_LEN (sizeof (struct fuse_req) + sizeof (int32_t))
-
-#define is_request(begin) (0==strcmp(begin, REQ_BEGIN)?1:0)
-
-typedef void (*func_t)(struct fuse_in_header *, const void *);
-
-typedef struct {
- func_t func;
- const char *name;
-} guts_log_t;
-
-typedef struct {
- struct fuse_in_header header;
- int32_t arg_len;
- struct list_head list;
- void *arg;
-} guts_req_t;
-
-typedef struct {
- struct fuse_req req;
- int32_t arg_len;
- void *arg;
-} guts_reply_t;
-
-struct guts_replay_ctx {
- int32_t tio_fd;
- struct fuse_ll *guts_ll;
- dict_t *replies;
- dict_t *inodes;
- dict_t *fds;
- struct list_head requests;
- dict_t *requests_dict;
-};
-
-typedef struct guts_replay_ctx guts_replay_ctx_t;
-
-extern guts_log_t guts_log[];
-
-int32_t
-guts_tio_init (const char *);
-
-void
-guts_req_dump (struct fuse_in_header *,
- const void *,
- int32_t);
-
-guts_req_t *
-guts_read_entry (guts_replay_ctx_t *ctx);
-
-void
-guts_reply_dump (fuse_req_t,
- const void *,
- int32_t);
-
-guts_reply_t *
-guts_read_reply (guts_replay_ctx_t *ctx,
- uint64_t unique);
-
-#endif /* _GUTS_PARSE_H_ */
diff --git a/glusterfs-guts/src/guts-replay.c b/glusterfs-guts/src/guts-replay.c
deleted file mode 100644
index 0fdce0fdc..000000000
--- a/glusterfs-guts/src/guts-replay.c
+++ /dev/null
@@ -1,834 +0,0 @@
-/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include "glusterfs-guts.h"
-#include "guts-parse.h"
-#include <signal.h>
-#include "guts-tables.h"
-#include "guts-replay.h"
-#include "guts-trace.h"
-
-static void
-convert_attr (const struct fuse_setattr_in *attr,
- struct stat *stbuf)
-{
- stbuf->st_mode = attr->mode;
- stbuf->st_uid = attr->uid;
- stbuf->st_gid = attr->gid;
- stbuf->st_size = attr->size;
- stbuf->st_atime = attr->atime;
- /*
- ST_ATIM_NSEC_SET (stbuf, attr->atimensec);
- ST_MTIM_NSEC_SET (stbuf, attr->mtimensec);*/
-}
-
-static void
-guts_replay_lookup (fuse_req_t req,
- fuse_ino_t ino,
- const void *inargs)
-{
- char *name = (char *) inargs;
-
- if (req->f->op.lookup)
- req->f->op.lookup(req, ino, name);
- else
- guts_reply_err (req, ENOSYS);
-
-}
-
-static void
-guts_replay_forget (fuse_req_t req,
- fuse_ino_t ino,
- const void *inargs)
-{
- struct fuse_forget_in *arg = (struct fuse_forget_in *) inargs;
-
- if (req->f->op.forget)
- req->f->op.forget (req, ino, arg->nlookup);
-
-}
-
-static void
-guts_replay_getattr (fuse_req_t req,
- fuse_ino_t ino,
- const void *inargs)
-{
- (void) inargs;
-
- if (req->f->op.getattr)
- req->f->op.getattr (req, ino, NULL);
- else
- guts_reply_err (req, ENOSYS);
-}
-
-static void
-guts_replay_setattr (fuse_req_t req,
- fuse_ino_t ino,
- const void *inargs)
-{
- struct fuse_setattr_in *arg = (struct fuse_setattr_in *)inargs;
-
- if (req->f->op.setattr) {
- struct fuse_file_info *fi = NULL;
- struct fuse_file_info fi_store;
- struct stat stbuf;
- memset (&stbuf, 0, sizeof (stbuf));
- convert_attr (arg, &stbuf);
- if (arg->valid & FATTR_FH) {
- arg->valid &= ~FATTR_FH;
- memset (&fi_store, 0, sizeof (fi_store));
- fi = &fi_store;
- fi->fh = arg->fh;
- fi->fh_old = fi->fh;
- }
- req->f->op.setattr (req, ino, &stbuf, arg->valid, fi);
- } else
- guts_reply_err (req, ENOSYS);
-}
-
-static void
-guts_replay_access (fuse_req_t req,
- fuse_ino_t ino,
- const void *inargs)
-{
- struct fuse_access_in *arg = (struct fuse_access_in *)inargs;
-
- if (req->f->op.access)
- req->f->op.access (req, ino, arg->mask);
- else
- guts_reply_err (req, ENOSYS);
-}
-
-static void
-guts_replay_readlink (fuse_req_t req,
- fuse_ino_t ino,
- const void *inargs)
-{
- (void) inargs;
-
- if (req->f->op.readlink)
- req->f->op.readlink (req, ino);
- else
- guts_reply_err (req, ENOSYS);
-}
-
-
-static void
-guts_replay_mknod (fuse_req_t req,
- fuse_ino_t ino,
- const void *inargs)
-{
- struct fuse_mknod_in *arg = (struct fuse_mknod_in *) inargs;
-
- if (req->f->op.mknod)
- req->f->op.mknod (req, ino, PARAM(arg), arg->mode, arg->rdev);
- else
- guts_reply_err (req, ENOSYS);
-}
-
-static void
-guts_replay_mkdir (fuse_req_t req,
- fuse_ino_t ino,
- const void *inargs)
-{
- struct fuse_mkdir_in *arg = (struct fuse_mkdir_in *) inargs;
-
- if (req->f->op.mkdir)
- req->f->op.mkdir (req, ino, PARAM(arg), arg->mode);
- else
- guts_reply_err (req, ENOSYS);
-}
-
-static void
-guts_replay_unlink (fuse_req_t req,
- fuse_ino_t ino,
- const void *inargs)
-{
- char *name = (char *)inargs;
-
- if (req->f->op.unlink) {
-
- req->f->op.unlink (req, ino, name);
- } else
- guts_reply_err (req, ENOSYS);
-}
-
-static void
-guts_replay_rmdir (fuse_req_t req,
- fuse_ino_t ino,
- const void *inargs)
-{
- char *name = (char *)inargs;
-
- if (req->f->op.rmdir) {
- req->f->op.rmdir (req, ino, name);
- } else
- guts_reply_err (req, ENOSYS);
-}
-
-static void
-guts_replay_symlink (fuse_req_t req,
- fuse_ino_t ino,
- const void *inargs)
-{
- char *name = (char *) inargs;
- char *linkname = ((char *) inargs) + strlen ((char *) inargs) + 1;
-
- if (req->f->op.symlink) {
- req->f->op.symlink (req, linkname, ino, name);
- } else
- guts_reply_err (req, ENOSYS);
-}
-
-
-
-static void
-guts_replay_rename (fuse_req_t req,
- fuse_ino_t ino,
- const void *inargs)
-{
- struct fuse_rename_in *arg = (struct fuse_rename_in *) inargs;
- char *oldname = PARAM(arg);
- char *newname = oldname + strlen (oldname) + 1;
-
- if (req->f->op.rename) {
- req->f->op.rename (req, ino, oldname, arg->newdir, newname);
- } else
- guts_reply_err (req, ENOSYS);
-
-}
-
-static void
-guts_replay_link (fuse_req_t req,
- fuse_ino_t ino,
- const void *inargs)
-{
- struct fuse_link_in *arg = (struct fuse_link_in *) inargs;
-
- if (req->f->op.link) {
- guts_replay_ctx_t *ctx = (guts_replay_ctx_t *) req->u.ni.data;
- fuse_ino_t old_ino = guts_inode_search (ctx, arg->oldnodeid);
-
- req->f->op.link (req, old_ino, ino, PARAM(arg));
- } else
- guts_reply_err (req, ENOSYS);
-}
-
-
-static void
-guts_replay_create (fuse_req_t req,
- fuse_ino_t ino,
- const void *inargs)
-{
- struct guts_create_in *arg = (struct guts_create_in *) inargs;
-
- if (req->f->op.create) {
- struct fuse_file_info fi;
- memset (&fi, 0, sizeof (fi));
- fi.flags = arg->open_in.flags;
-
- req->f->op.create (req, ino, arg->name, arg->open_in.mode, &fi);
- } else
- guts_reply_err (req, ENOSYS);
-
-}
-
-static void
-guts_replay_open (fuse_req_t req,
- fuse_ino_t ino,
- const void *inargs)
-{
- struct fuse_open_in *arg = (struct fuse_open_in *) inargs;
- struct fuse_file_info fi;
-
- memset (&fi, 0, sizeof (fi));
- fi.flags = arg->flags;
-
- if (req->f->op.open) {
- /* TODO: how efficient is using dict_get here?? */
- req->f->op.open (req, ino, &fi);
- } else
- guts_reply_open (req, &fi);
-}
-
-
-static void
-guts_replay_read(fuse_req_t req,
- fuse_ino_t ino,
- const void *inarg)
-{
- struct fuse_read_in *arg = (struct fuse_read_in *) inarg;
-
- if (req->f->op.read){
- struct fuse_file_info fi;
- guts_replay_ctx_t *ctx = req->u.ni.data;
-
- memset (&fi, 0, sizeof (fi));
- /* TODO: how efficient is using dict_get here?? */
- fi.fh = (unsigned long) guts_fd_search (ctx, arg->fh);
- if (!fi.fh) {
- /* TODO: make it more meaningful and organized */
- printf ("readv called without opening the file\n");
- guts_reply_err (req, EBADFD);
- } else {
- fi.fh_old = fi.fh;
- req->f->op.read (req, ino, arg->size, arg->offset, &fi);
- }
- } else
- guts_reply_err (req, ENOSYS);
-}
-
-static void
-guts_replay_write(fuse_req_t req,
- fuse_ino_t ino,
- const void *inarg)
-{
- struct fuse_write_in *arg = (struct fuse_write_in *) inarg;
- struct fuse_file_info fi;
- guts_replay_ctx_t *ctx = (guts_replay_ctx_t *) req->u.ni.data;
-
- memset (&fi, 0, sizeof (fi));
- fi.fh = (unsigned long) guts_fd_search (ctx, arg->fh);
-
- if (!fi.fh) {
- /* TODO: make it more meaningful and organized */
- printf ("writev called without opening the file\n");
- guts_reply_err (req, EBADFD);
- } else {
- fi.fh_old = fi.fh;
- fi.writepage = arg->write_flags & 1;
- if (req->f->op.write)
- req->f->op.write (req, ino, PARAM(arg), arg->size, arg->offset, &fi);
- else
- guts_reply_err (req, ENOSYS);
- }
-}
-
-static void
-guts_replay_flush(fuse_req_t req,
- fuse_ino_t ino,
- const void *inarg)
-{
- struct fuse_flush_in *arg = (struct fuse_flush_in *) inarg;
- struct fuse_file_info fi;
- guts_replay_ctx_t *ctx = (guts_replay_ctx_t *) req->u.ni.data;
-
- memset (&fi, 0, sizeof (fi));
- fi.fh = (unsigned long) guts_fd_search (ctx, arg->fh);
- if (!fi.fh) {
- printf ("flush called without calling open\n");
- guts_reply_err (req, EBADFD);
- } else {
- fi.fh_old = fi.fh;
- fi.flush = 1;
-
- if (req->f->conn.proto_minor >= 7)
- fi.lock_owner = arg->lock_owner;
-
- if (req->f->op.flush)
- req->f->op.flush (req, ino, &fi);
- else
- guts_reply_err (req, ENOSYS);
- }
-}
-
-static void
-guts_replay_release(fuse_req_t req,
- fuse_ino_t ino,
- const void *inarg)
-{
- struct fuse_release_in *arg = (struct fuse_release_in *) inarg;
- struct fuse_file_info fi;
- guts_replay_ctx_t *ctx = (guts_replay_ctx_t *) req->u.ni.data;
-
- memset (&fi, 0, sizeof (fi));
- fi.flags = arg->flags;
- fi.fh = (unsigned long) guts_fd_search (ctx, arg->fh);
-
- if (!fi.fh) {
- printf ("release called without calling open\n");
- guts_reply_err (req, EBADFD);
- } else {
- fi.fh_old = fi.fh;
- if (req->f->conn.proto_minor >= 8) {
- fi.flush = (arg->release_flags & FUSE_RELEASE_FLUSH) ? 1 : 0;
- fi.lock_owner = arg->lock_owner;
- }
- if (req->f->op.release)
- req->f->op.release (req, ino, &fi);
- else
- guts_reply_err (req, ENOSYS);
- }
-}
-
-static void
-guts_replay_fsync(fuse_req_t req,
- fuse_ino_t ino,
- const void *inarg)
-{
- struct fuse_fsync_in *arg = (struct fuse_fsync_in *) inarg;
- struct fuse_file_info fi;
- guts_replay_ctx_t *ctx = (guts_replay_ctx_t *) req->u.ni.data;
-
- memset (&fi, 0, sizeof (fi));
- fi.fh = (unsigned long) guts_fd_search (ctx, arg->fh);
- fi.fh_old = fi.fh;
-
- if (req->f->op.fsync)
- req->f->op.fsync (req, ino, arg->fsync_flags & 1, &fi);
- else
- guts_reply_err (req, ENOSYS);
-}
-
-static void
-guts_replay_opendir (fuse_req_t req,
- fuse_ino_t ino,
- const void *inarg)
-{
- struct fuse_open_in *arg = (struct fuse_open_in *) inarg;
- struct fuse_file_info fi;
-
- memset (&fi, 0, sizeof (fi));
- fi.flags = arg->flags;
-
- if (req->f->op.opendir) {
- req->f->op.opendir (req, ino, &fi);
- } else
- guts_reply_open (req, &fi);
-}
-
-static void
-guts_replay_readdir(fuse_req_t req,
- fuse_ino_t ino,
- const void *inarg)
-{
- struct fuse_read_in *arg = (struct fuse_read_in *) inarg;
- struct fuse_file_info fi;
- guts_replay_ctx_t *ctx = (guts_replay_ctx_t *) req->u.ni.data;
-
- memset (&fi, 0, sizeof (fi));
- fi.fh = (unsigned long) guts_fd_search (ctx, arg->fh);
-
- if (!fi.fh) {
- /* TODO: make it more meaningful and organized */
- printf ("readdir called without opening the file\n");
- guts_reply_err (req, EBADFD);
- } else {
- fi.fh_old = fi.fh;
-
- if (req->f->op.readdir)
- req->f->op.readdir (req, ino, arg->size, arg->offset, &fi);
- else
- guts_reply_err (req, ENOSYS);
- }
-
-}
-
-static void
-guts_replay_releasedir(fuse_req_t req,
- fuse_ino_t ino,
- const void *inarg)
-{
- struct fuse_release_in *arg = (struct fuse_release_in *) inarg;
- struct fuse_file_info fi;
- guts_replay_ctx_t *ctx = (guts_replay_ctx_t *) req->u.ni.data;
-
- memset (&fi, 0, sizeof (fi));
- fi.flags = arg->flags;
- fi.fh = (unsigned long) guts_fd_search (ctx, arg->fh);
- if (!fi.fh) {
- printf ("releasedir called without calling opendir\n");
- guts_reply_err (req, EBADFD);
- } else {
-
- fi.fh_old = fi.fh;
- if (req->f->op.releasedir)
- req->f->op.releasedir (req, ino, &fi);
- else
- guts_reply_err (req, ENOSYS);
- }
-}
-
-static void
-guts_replay_fsyncdir(fuse_req_t req,
- fuse_ino_t ino,
- const void *inarg)
-{
- struct fuse_fsync_in *arg = (struct fuse_fsync_in *) inarg;
- struct fuse_file_info fi;
- guts_replay_ctx_t *ctx = (guts_replay_ctx_t *) req->u.ni.data;
-
- memset (&fi, 0, sizeof (fi));
- fi.fh = (unsigned long) guts_fd_search (ctx, arg->fh);
- fi.fh_old = fi.fh;
-
- if (req->f->op.fsyncdir)
- req->f->op.fsyncdir (req, ino, arg->fsync_flags & 1, &fi);
- else
- guts_reply_err (req, ENOSYS);
-}
-
-static void
-guts_replay_statfs (fuse_req_t req,
- fuse_ino_t ino,
- const void *inargs)
-{
- (void) ino;
- (void) inargs;
-
- if (req->f->op.statfs) {
- req->f->op.statfs (req, ino);
- } else {
- struct statvfs buf = {
- .f_namemax = 255,
- .f_bsize = 512,
- };
- guts_reply_statfs (req, &buf);
- }
-}
-
-static void
-guts_replay_setxattr(fuse_req_t req,
- fuse_ino_t ino,
- const void *inarg)
-{
- struct fuse_setxattr_in *arg = (struct fuse_setxattr_in *) inarg;
- char *name = PARAM(arg);
- char *value = name + strlen(name) + 1;
-
- if (req->f->op.setxattr)
- req->f->op.setxattr (req, ino, name, value, arg->size, arg->flags);
- else
- guts_reply_err (req, ENOSYS);
-}
-
-static void
-guts_replay_getxattr(fuse_req_t req,
- fuse_ino_t ino,
- const void *inarg)
-{
- struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *) inarg;
-
- if (req->f->op.getxattr)
- req->f->op.getxattr (req, ino, PARAM(arg), arg->size);
- else
- guts_reply_err (req, ENOSYS);
-}
-
-static void
-guts_replay_listxattr (fuse_req_t req,
- fuse_ino_t ino,
- const void *inargs)
-{
- struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *) inargs;
-
- if (req->f->op.listxattr)
- req->f->op.listxattr (req, ino, arg->size);
- else
- guts_reply_err (req, ENOSYS);
-}
-
-static void
-guts_replay_removexattr(fuse_req_t req,
- fuse_ino_t ino,
- const void *inargs)
-{
- char *name = (char *)inargs;
-
- if (req->f->op.removexattr)
- req->f->op.removexattr (req, ino, name);
- else
- guts_reply_err (req, ENOSYS);
-}
-
-guts_replay_t guts_replay_fop[] = {
- [FUSE_LOOKUP] = { guts_replay_lookup, "lookup" },
- [FUSE_FORGET] = { guts_replay_forget, "forget" },
- [FUSE_GETATTR] = { guts_replay_getattr, "getattr" },
- [FUSE_SETATTR] = { guts_replay_setattr, "setattr" },
- [FUSE_ACCESS] = { guts_replay_access, "access" },
- [FUSE_READLINK] = { guts_replay_readlink, "readlink" },
- [FUSE_MKNOD] = { guts_replay_mknod, "mknod" },
- [FUSE_MKDIR] = { guts_replay_mkdir, "mkdir" },
- [FUSE_UNLINK] = { guts_replay_unlink, "unlink" },
- [FUSE_RMDIR] = { guts_replay_rmdir, "rmdir" },
- [FUSE_SYMLINK] = { guts_replay_symlink, "symlink" },
- [FUSE_RENAME] = { guts_replay_rename, "rename" },
- [FUSE_LINK] = { guts_replay_link, "link" },
- [FUSE_CREATE] = { guts_replay_create, "create" },
- [FUSE_OPEN] = { guts_replay_open, "open" },
- [FUSE_READ] = { guts_replay_read, "read" },
- [FUSE_WRITE] = { guts_replay_write, "write" },
- [FUSE_FLUSH] = { guts_replay_flush, "flush" },
- [FUSE_RELEASE] = { guts_replay_release, "release" },
- [FUSE_FSYNC] = { guts_replay_fsync, "fsync" },
- [FUSE_OPENDIR] = { guts_replay_opendir, "opendir" },
- [FUSE_READDIR] = { guts_replay_readdir, "readdir" },
- [FUSE_RELEASEDIR] = { guts_replay_releasedir, "releasedir" },
- [FUSE_FSYNCDIR] = { guts_replay_fsyncdir, "fsyncdir" },
- [FUSE_STATFS] = { guts_replay_statfs, "statfs" },
- [FUSE_SETXATTR] = { guts_replay_setxattr, "setxattr" },
- [FUSE_GETXATTR] = { guts_replay_getxattr, "getxattr" },
- [FUSE_LISTXATTR] = { guts_replay_listxattr, "listxattr" },
- [FUSE_REMOVEXATTR] = { guts_replay_removexattr, "removexattr" },
-};
-
-static inline void
-list_init_req (struct fuse_req *req)
-{
- req->next = req;
- req->prev = req;
-}
-
-
-static int32_t
-guts_transport_notify (xlator_t *xl,
- int32_t event,
- void *data,
- ...)
-{
- /* dummy, nobody has got anything to notify me.. ;) */
- return 0;
-}
-
-static int32_t
-guts_transport_init (transport_t *this,
- dict_t *options,
- event_notify_fn_t notify)
-{
- struct fuse_private *priv = CALLOC (1, sizeof (*priv));
- ERR_ABORT (priv);
-
- this->notify = NULL;
- this->private = (void *)priv;
-
- /* fuse channel */
- priv->ch = NULL;
-
- /* fuse session */
- priv->se = NULL;
-
- /* fuse channel fd */
- priv->fd = -1;
-
- this->buf = data_ref (data_from_dynptr (NULL, 0));
- this->buf->is_locked = 1;
-
- priv->mountpoint = NULL;
-
- transport_ref (this);
-
- return 0;
-}
-
-static void
-guts_transport_fini (transport_t *this)
-{
-
-}
-
-static int32_t
-guts_transport_disconnect (transport_t *this)
-{
- struct fuse_private *priv = this->private;
-
- gf_log ("glusterfs-guts",
- GF_LOG_DEBUG,
- "cleaning up fuse transport in disconnect handler");
-
- FREE (priv);
- priv = NULL;
- this->private = NULL;
-
- /* TODO: need graceful exit. every xlator should be ->fini()'ed
- and come out of main poll loop cleanly
- */
- return -1;
-}
-
-static struct transport_ops guts_transport_ops = {
- .disconnect = guts_transport_disconnect,
-};
-
-static transport_t guts_transport = {
- .ops = &guts_transport_ops,
- .private = NULL,
- .xl = NULL,
- .init = guts_transport_init,
- .fini = guts_transport_fini,
- .notify = guts_transport_notify
-};
-
-static inline xlator_t *
-fuse_graph (xlator_t *graph)
-{
- xlator_t *top = NULL;
- xlator_list_t *xlchild;
-
- top = CALLOC (1, sizeof (*top));
- ERR_ABORT (top);
-
- xlchild = CALLOC (1, sizeof(*xlchild));
- ERR_ABORT (xlchild);
- xlchild->xlator = graph;
- top->children = xlchild;
- top->ctx = graph->ctx;
- top->next = graph;
- graph->parent = top;
-
- return top;
-}
-
-static guts_replay_ctx_t *
-guts_replay_init (guts_thread_ctx_t *thread)
-{
- guts_replay_ctx_t *ctx = NULL;
- int32_t fd = open (thread->file, O_RDONLY);
-
- if (fd < 0) {
- gf_log ("glusterfs-guts", GF_LOG_DEBUG,
- "failed to open tio_file %s", thread->file);
- return ctx;
- } else {
- struct fuse_ll *guts_ll = CALLOC (1, sizeof (*guts_ll));
- ERR_ABORT (guts_ll);
-
- ctx = CALLOC (1, sizeof (*ctx));
- ERR_ABORT (ctx);
-
- if (ctx) {
- /* equivalent to fuse_new_session () */
- guts_ll->conn.async_read = 1;
- guts_ll->conn.max_write = UINT_MAX;
- guts_ll->conn.max_readahead = UINT_MAX;
- memcpy (&guts_ll->op, &fuse_ops, sizeof (struct fuse_lowlevel_ops));
- list_init_req (&guts_ll->list);
- list_init_req (&guts_ll->interrupts);
- guts_ll->owner = getuid ();
- guts_ll->userdata = thread;
-
- /* TODO: need to create transport_t object which whole of the glusterfs
- * so desperately depends on */
- transport_t *guts_trans = CALLOC (1, sizeof (*guts_trans));
-
- if (guts_trans) {
- memcpy (guts_trans, &guts_transport, sizeof (*guts_trans));
- guts_trans->ops = &guts_transport_ops;
- } else {
- gf_log ("glusterfs-guts", GF_LOG_ERROR,
- "failed to allocate memory for guts transport object");
- return NULL;
- }
-
- glusterfs_ctx_t *glfs_ctx = CALLOC (1, sizeof (*glfs_ctx));;
- if (glfs_ctx) {
- guts_trans->xl_private = glfs_ctx;
- guts_trans->xl = fuse_graph (thread->ctx->graph);
- }else {
- gf_log ("glusterfs-guts", GF_LOG_ERROR,
- "failed to allocate memory for glusterfs_ctx_t object");
- return NULL;
- }
-
- call_pool_t *pool = CALLOC (1, sizeof (call_pool_t));
- if (pool) {
- glfs_ctx->pool = pool;
- LOCK_INIT (&pool->lock);
- INIT_LIST_HEAD (&pool->all_frames);
- } else {
- gf_log ("glusterfs-guts", GF_LOG_ERROR,
- "failed to allocate memory for guts call pool");
- return NULL;
- }
-
- guts_trans->xl->ctx = glfs_ctx;
- guts_trans->init (guts_trans, NULL, guts_transport_notify);
- guts_ll->userdata = guts_trans;
-
- /* call fuse_init */
- guts_ll->op.init (guts_trans, NULL);
-
- {
- ctx->guts_ll = guts_ll;
- ctx->tio_fd = fd;
- ctx->inodes = get_new_dict ();
- ctx->fds = get_new_dict ();
- ctx->replies = get_new_dict ();
- INIT_LIST_HEAD(&ctx->requests);
- ctx->requests_dict = get_new_dict ();
- }
- } else {
- gf_log ("glusterfs-guts", GF_LOG_ERROR,
- "failed to allocate memory for guts_ctx_t object");
- return NULL;
- }
- }
-
- return ctx;
-}
-
-int32_t
-guts_replay (guts_thread_ctx_t *thread)
-{
- guts_req_t *entry = NULL;
- guts_replay_ctx_t *ctx = guts_replay_init (thread);
-
- if (!ctx) {
- gf_log ("glusterfs-guts", GF_LOG_ERROR,
- "failed to initialize guts_replay");
- return -1;
- } else {
- while ((entry = guts_read_entry (ctx))) {
- /* here we go ... execute the request */
- fuse_req_t req = CALLOC (1, sizeof (struct fuse_req));
- ino_t ino = entry->header.nodeid;
- void *arg = entry->arg;
-
- if (req) {
- req->f = ctx->guts_ll;
- req->unique = entry->header.unique;
- req->ctx.uid = entry->header.uid;
- req->ctx.pid = entry->header.pid;
-
- /* req->u.ni.data is unused void *, while running in replay mode. Making use of available real-estate
- * to store useful information of thread specific guts_replay_ctx */
- req->u.ni.data = (void *) ctx;
- /* req->ch is of type 'struct fuse_chan', which fuse uses only at the
- * time of the response it gets and is useful in sending the reply data to correct channel
- * in /dev/fuse. This is not useful for us, so we ignore it by keeping it NULL */
- list_init_req (req);
-
- fuse_ino_t new_ino = guts_inode_search (ctx, ino);
-
- if (guts_replay_fop[entry->header.opcode].func) {
- printf ("operation: %s && inode: %ld\n", guts_replay_fop[entry->header.opcode].name, new_ino);
- guts_replay_fop[entry->header.opcode].func (req, new_ino, arg);
- }
-
- if (entry->arg)
- free (entry->arg);
- free (entry);
- } else {
- gf_log ("glusterfs-guts", GF_LOG_ERROR,
- "failed to allocate memory for fuse_req_t object");
- return -1;
- }
- }
- }
- return 0;
-}
diff --git a/glusterfs-guts/src/guts-replay.h b/glusterfs-guts/src/guts-replay.h
deleted file mode 100644
index 81936e5c7..000000000
--- a/glusterfs-guts/src/guts-replay.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#define PARAM(inarg) (((char *)(inarg)) + sizeof(*(inarg)))
-
-void guts_reply_err (fuse_req_t, error_t);
-void guts_reply_open (fuse_req_t, struct fuse_file_info *);
-void guts_reply_statfs (fuse_req_t, struct statvfs *);
-
-typedef void (*guts_replay_fop_t)(fuse_req_t, fuse_ino_t, const void *);
-
-typedef struct {
- guts_replay_fop_t func;
- const char *name;
-} guts_replay_t;
-
-extern struct fuse_lowlevel_ops fuse_ops;
-
diff --git a/glusterfs-guts/src/guts-tables.c b/glusterfs-guts/src/guts-tables.c
deleted file mode 100644
index 71f3927a9..000000000
--- a/glusterfs-guts/src/guts-tables.c
+++ /dev/null
@@ -1,248 +0,0 @@
-/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include "guts-parse.h"
-#include "dict.h"
-#include "guts-tables.h"
-
-
-int32_t
-guts_attr_cmp (const struct stat *attr,
- const struct stat *old_attr)
-{
- return 0;
-}
-
-int32_t
-guts_statvfs_cmp (const struct statvfs *stbuf,
- const struct statvfs *old_stbuf)
-{
- return 0;
-}
-
-int32_t
-guts_flock_cmp (struct flock *lock,
- struct flock *old_lock)
-{
- return 0;
-}
-
-
-guts_req_t *
-guts_lookup_request (guts_replay_ctx_t *ctx, uint64_t unique)
-{
- guts_req_t *req = NULL;
-
- if (unique == 0) {
- if (list_empty (&ctx->requests))
- req = NULL;
- else {
- /* pick an entry from list, move it out of the list and return it to the caller */
- char *key = NULL;
-
- req = list_entry (ctx->requests.next, guts_req_t, list);
- list_del (&req->list);
-
- asprintf (&key, "%llu", req->header.unique);
-
- dict_set (ctx->requests_dict, key, data_from_static_ptr (req));
-
- if (key)
- free (key);
- }
- } else {
- char *key = NULL;
- data_t *req_data = NULL;
-
- asprintf (&key, "%llu", unique);
-
- req_data = dict_get (ctx->requests_dict, key);
-
- if (req_data)
- req = data_to_ptr (req_data);
-
- if (key)
- free (key);
- }
- return req;
-}
-
-guts_req_t *
-guts_get_request (guts_replay_ctx_t *ctx)
-{
- return guts_lookup_request (ctx, 0);
-}
-
-int32_t
-guts_add_request (guts_replay_ctx_t *ctx,
- guts_req_t *req)
-{
- list_add_tail (&req->list, &ctx->requests);
- return 0;
-}
-
-int32_t
-guts_add_reply (guts_replay_ctx_t *ctx,
- guts_reply_t *reply)
-{
- char *key = NULL;
- asprintf (&key, "%llu", reply->req.unique);
-
- dict_set (ctx->replies, key, data_from_static_ptr(reply));
-
- if (key)
- free(key);
-
- return 0;
-}
-
-
-guts_reply_t *
-guts_lookup_reply (guts_replay_ctx_t *ctx,
- uint64_t unique)
-{
- char *key = NULL;
- data_t *reply_data = NULL;
- guts_reply_t *new_reply = NULL;
-
- asprintf (&key, "%llu", unique);
- reply_data = dict_get (ctx->replies, key);
-
- if (reply_data) {
- new_reply = data_to_ptr (reply_data);
- dict_del (ctx->replies, key);
- } else {
- /* reply has not yet been read from tio file */
- new_reply = guts_read_reply (ctx, unique);
-
- if (!new_reply) {
- /* failed to fetch reply for 'unique' from tio file */
- new_reply;
- }
- }
-
- if (key)
- free(key);
-
- return new_reply;
-
-}
-
-int32_t
-guts_inode_update (guts_replay_ctx_t *ctx,
- fuse_ino_t old_ino,
- fuse_ino_t new_ino)
-{
- char *key = NULL;
- asprintf (&key, "%ld", old_ino);
- dict_set (ctx->inodes, key, data_from_uint64 (new_ino));
-
- if (key)
- free(key);
-
- return 0;
-}
-
-fuse_ino_t
-guts_inode_search (guts_replay_ctx_t *ctx,
- fuse_ino_t old_ino)
-{
- char *key = NULL;
- data_t *ino_data = NULL;
- fuse_ino_t new_ino = 0;
-
- asprintf (&key, "%ld", old_ino);
- ino_data = dict_get (ctx->inodes, key);
-
- if (ino_data)
- new_ino = data_to_uint64 (ino_data);
- else if (old_ino != /* TODO: FIXME */1 ) {
- new_ino = 0;
- } else
- new_ino = old_ino;
-
- if (key)
- free(key);
-
- return new_ino;
-}
-
-int32_t
-guts_fd_add (guts_replay_ctx_t *ctx,
- unsigned long old_fd,
- fd_t *new_fd)
-{
- char *key = NULL;
- asprintf (&key, "%ld", old_fd);
- dict_set (ctx->fds, key, data_from_static_ptr (new_fd));
-
- if (key)
- free(key);
-
- return 0;
-}
-
-fd_t *
-guts_fd_search (guts_replay_ctx_t *ctx,
- unsigned long old_fd)
-{
- char *key = NULL;
- data_t *fd_data = NULL;
- fd_t *new_fd = NULL;
-
- asprintf (&key, "%ld", old_fd);
- fd_data = dict_get (ctx->fds, key);
-
- if (fd_data)
- new_fd = data_to_ptr (fd_data);
-
- if (key)
- free(key);
-
- return new_fd;
-}
-
-int32_t
-guts_delete_fd (guts_replay_ctx_t *ctx,
- unsigned long old_fd)
-{
- char *key = NULL;
- data_t *fd_data = NULL;
-
- asprintf (&key, "%ld", old_fd);
- fd_data = dict_get (ctx->fds, key);
-
- if (fd_data)
- dict_del (ctx->fds, key);
-
- if (key)
- free(key);
-
- return 0;
-}
-
-inline int32_t
-guts_get_opcode (guts_replay_ctx_t *ctx,
- uint64_t unique)
-{
- guts_req_t *req = guts_lookup_request (ctx, unique);
-
- return ((req == NULL) ? -1 : req->header.opcode);
-
-}
diff --git a/glusterfs-guts/src/guts-tables.h b/glusterfs-guts/src/guts-tables.h
deleted file mode 100644
index 8aa89078a..000000000
--- a/glusterfs-guts/src/guts-tables.h
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _GUTS_TABLES_H_
-#define _GUTS_TABLES_H_
-
-
-int32_t
-guts_attr_cmp (const struct stat *attr,
- const struct stat *old_attr);
-
-int32_t
-guts_statvfs_cmp (const struct statvfs *stbuf,
- const struct statvfs *old_stbuf);
-
-int32_t
-guts_inode_update (guts_replay_ctx_t *ctx,
- fuse_ino_t old_ino,
- fuse_ino_t new_ino);
-
-fuse_ino_t
-guts_inode_search (guts_replay_ctx_t *ctx,
- fuse_ino_t old_ino);
-
-int32_t
-guts_add_request (guts_replay_ctx_t *,
- guts_req_t *);
-
-guts_req_t *
-guts_get_request (guts_replay_ctx_t *ctx);
-
-guts_req_t *
-guts_lookup_request (guts_replay_ctx_t *ctx,
- uint64_t unique);
-
-guts_reply_t *
-guts_lookup_reply (guts_replay_ctx_t *ctx,
- uint64_t unique);
-
-int32_t
-guts_add_reply (guts_replay_ctx_t *ctx,
- guts_reply_t *reply);
-
-int32_t
-guts_flock_cmp (struct flock *lock,
- struct flock *old_lock);
-
-fd_t *
-guts_fd_search (guts_replay_ctx_t *ctx,
- unsigned long old_fd);
-
-int32_t
-guts_delete_fd (guts_replay_ctx_t *,
- unsigned long);
-
-int32_t
-guts_get_opcode (guts_replay_ctx_t *ctx,
- uint64_t unique);
-int32_t
-guts_fd_add (guts_replay_ctx_t *ctx,
- unsigned long old_fd,
- fd_t *new_fd);
-
-#endif
diff --git a/glusterfs-guts/src/guts-trace.c b/glusterfs-guts/src/guts-trace.c
deleted file mode 100644
index 40e2194ee..000000000
--- a/glusterfs-guts/src/guts-trace.c
+++ /dev/null
@@ -1,650 +0,0 @@
-/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include "glusterfs-guts.h"
-#include <signal.h>
-
-#include "guts-parse.h"
-#include "guts-tables.h"
-#include "guts-trace.h"
-
-static xlator_t *
-fuse_graph (xlator_t *graph)
-{
- xlator_t *top = NULL;
- xlator_list_t *xlchild;
-
- top = CALLOC (1, sizeof (*top));
- ERR_ABORT (top);
- xlchild = CALLOC (1, sizeof(*xlchild));
- ERR_ABORT (xlchild);
- xlchild->xlator = graph;
- top->children = xlchild;
- top->ctx = graph->ctx;
- top->next = graph;
- graph->parent = top;
-
- return top;
-}
-
-int32_t
-fuse_thread (pthread_t *thread, void *data);
-
-int32_t
-guts_trace (guts_ctx_t *guts_ctx)
-{
- transport_t *mp = NULL;
- glusterfs_ctx_t ctx = {
- .poll_type = SYS_POLL_TYPE_EPOLL,
- };
- xlator_t *graph = NULL;
- call_pool_t *pool = NULL;
- int32_t ret = -1;
- pthread_t thread;
- /* Ignore SIGPIPE */
- signal (SIGPIPE, SIG_IGN);
-
-#if HAVE_BACKTRACE
- /* Handle SIGABORT and SIGSEGV */
- signal (SIGSEGV, gf_print_trace);
- signal (SIGABRT, gf_print_trace);
-#endif /* HAVE_BACKTRACE */
-
- ret = guts_tio_init (guts_ctx->file);
-
- if (ret < 0) {
- gf_log ("glusterfs-guts", GF_LOG_ERROR,
- "running in trace mode: failed to open tio file %s", guts_ctx->file);
- return -1;
- }
-
- pool = ctx.pool = CALLOC (1, sizeof (call_pool_t));
- ERR_ABORT (ctx.pool);
- LOCK_INIT (&pool->lock);
- INIT_LIST_HEAD (&pool->all_frames);
-
- /* glusterfs_mount has to be ideally placed after all the initialisation stuff */
- if (!(mp = glusterfs_mount (&ctx, guts_ctx->mountpoint))) {
- gf_log ("glusterfs-guts", GF_LOG_ERROR, "Unable to mount glusterfs");
- return -1;
- }
-
- gf_timer_registry_init (&ctx);
- graph = guts_ctx->graph;
-
- if (!graph) {
- gf_log ("glusterfs-guts", GF_LOG_ERROR,
- "Unable to get xlator graph for mount_point %s", guts_ctx->mountpoint);
- transport_disconnect (mp);
- return -1;
- }
-
- ctx.graph = graph;
-
- mp->xl = fuse_graph (graph);
- mp->xl->ctx = &ctx;
-
- fuse_thread (&thread, mp);
-
- while (!poll_iteration (&ctx));
-
- return 0;
-}
-
-
-
-static void
-guts_name (struct fuse_in_header *in,
- const void *inargs)
-{
- char *name = (char *) inargs;
-
- guts_req_dump (in, name, strlen (name));
-}
-
-static void
-guts_noarg (struct fuse_in_header *in,
- const void *inargs)
-{
- guts_req_dump (in, NULL, 0);
-}
-
-
-static void
-guts_setattr (struct fuse_in_header *in,
- const void *inargs)
-{
- struct fuse_setattr_in *arg = (struct fuse_setattr_in *)inargs;
- guts_req_dump (in, arg, sizeof (*arg));
-}
-
-static void
-guts_access (struct fuse_in_header *in,
- const void *inargs)
-{
- struct fuse_access_in *arg = (struct fuse_access_in *)inargs;
- guts_req_dump (in, arg, sizeof (*arg));
-}
-
-
-static void
-guts_mknod (struct fuse_in_header *in,
- const void *inargs)
-{
- struct fuse_mknod_in *arg = (struct fuse_mknod_in *) inargs;
- guts_req_dump (in, arg, sizeof (*arg));
-}
-
-static void
-guts_mkdir (struct fuse_in_header *in,
- const void *inargs)
-{
- struct fuse_mkdir_in *arg = (struct fuse_mkdir_in *) inargs;
- guts_req_dump (in, arg, sizeof (*arg));
-}
-
-
-static void
-guts_symlink (struct fuse_in_header *in,
- const void *inargs)
-{
- char *name = (char *) inargs;
- char *linkname = ((char *) inargs) + strlen ((char *) inargs) + 1;
- struct guts_symlink_in symlink_in;
-
- strcpy (symlink_in.name, name);
- strcpy (symlink_in.linkname, linkname);
- guts_req_dump (in, &symlink_in, sizeof (symlink_in));
-}
-
-static void
-guts_rename (struct fuse_in_header *in,
- const void *inargs)
-{
- struct fuse_rename_in *arg = (struct fuse_rename_in *) inargs;
- char *oldname = PARAM(arg);
- char *newname = oldname + strlen (oldname) + 1;
- struct guts_rename_in rename_in;
-
- memset (&rename_in, 0, sizeof (rename_in));
- memcpy (&rename_in, arg, sizeof (*arg));
- strcpy (rename_in.oldname, oldname);
- strcpy (rename_in.newname, newname);
-
- guts_req_dump (in, &rename_in, sizeof (rename_in));
-
-}
-
-static void
-guts_link (struct fuse_in_header *in,
- const void *inargs)
-{
- struct fuse_link_in *arg = (struct fuse_link_in *) inargs;
-
- guts_req_dump (in, arg, sizeof (*arg));
-}
-
-
-static void
-guts_open (struct fuse_in_header *in,
- const void *inargs)
-{
- struct fuse_open_in *arg = (struct fuse_open_in *) inargs;
-
- guts_req_dump (in, arg, sizeof (*arg));
-}
-
-static void
-guts_create (struct fuse_in_header *in,
- const void *inargs)
-{
- struct guts_create_in create_in;
- struct fuse_open_in *arg = (struct fuse_open_in *) inargs;
- char *name = PARAM (arg);
-
- memset (&create_in, 0, sizeof (create_in));
- memcpy (&create_in.open_in, arg, sizeof (*arg));
- memcpy (&create_in.name, name, strlen (name));
-
- guts_req_dump (in, &create_in, sizeof (create_in));
-}
-
-
-static void
-guts_read(struct fuse_in_header *in,
- const void *inarg)
-{
- struct fuse_read_in *arg = (struct fuse_read_in *) inarg;
- guts_req_dump (in, arg, sizeof (*arg));
-}
-
-static void
-guts_write(struct fuse_in_header *in,
- const void *inarg)
-{
- /* TODO: where the hell is the data to be written??? */
- struct fuse_write_in *arg = (struct fuse_write_in *) inarg;
- guts_req_dump (in, arg, sizeof (*arg));
-}
-
-static void
-guts_flush(struct fuse_in_header *in,
- const void *inarg)
-{
- struct fuse_flush_in *arg = (struct fuse_flush_in *) inarg;
- guts_req_dump (in, arg, sizeof (*arg));
-}
-
-static void
-guts_release(struct fuse_in_header *in,
- const void *inarg)
-{
- struct fuse_release_in *arg = (struct fuse_release_in *) inarg;
- guts_req_dump (in, arg, sizeof (*arg));
-}
-
-static void
-guts_fsync(struct fuse_in_header *in,
- const void *inarg)
-{
- struct fuse_fsync_in *arg = (struct fuse_fsync_in *) inarg;
- guts_req_dump (in, arg, sizeof (*arg));
-}
-
-
-static void
-guts_readdir(struct fuse_in_header *in,
- const void *inarg)
-{
- struct fuse_read_in *arg = (struct fuse_read_in *) inarg;
- guts_req_dump (in, arg, sizeof (*arg));
-}
-
-static void
-guts_releasedir(struct fuse_in_header *in,
- const void *inarg)
-{
- struct fuse_release_in *arg = (struct fuse_release_in *) inarg;
- guts_req_dump (in, arg, sizeof (*arg));
-}
-
-static void
-guts_fsyncdir(struct fuse_in_header *in,
- const void *inarg)
-{
- struct fuse_fsync_in *arg = (struct fuse_fsync_in *) inarg;
- guts_req_dump (in, arg, sizeof (*arg));
-}
-
-
-static void
-guts_setxattr(struct fuse_in_header *in,
- const void *inarg)
-{
- struct fuse_setxattr_in *arg = (struct fuse_setxattr_in *) inarg;
- char *name = PARAM(arg);
- char *value = name + strlen(name) + 1;
- struct guts_xattr_in setxattr_in;
-
- memset (&setxattr_in, 0, sizeof (setxattr_in));
- memcpy (&setxattr_in, arg, sizeof (*arg));
- strcpy (setxattr_in.name, name);
- strcpy (setxattr_in.value, value);
-
- guts_req_dump (in, &setxattr_in, sizeof (setxattr_in));
-
-}
-
-static void
-guts_getxattr(struct fuse_in_header *in,
- const void *inarg)
-{
- struct fuse_getxattr_in *arg = (struct fuse_getxattr_in *) inarg;
- guts_req_dump (in, arg, sizeof (*arg));
-}
-
-guts_log_t guts_log[] = {
- [FUSE_LOOKUP] = { guts_name, "lookup" },
- [FUSE_GETATTR] = { guts_noarg, "getattr" },
- [FUSE_SETATTR] = { guts_setattr, "setattr" },
- [FUSE_ACCESS] = { guts_access, "access" },
- [FUSE_READLINK] = { guts_noarg, "readlink" },
- [FUSE_MKNOD] = { guts_mknod, "mknod" },
- [FUSE_MKDIR] = { guts_mkdir, "mkdir" },
- [FUSE_UNLINK] = { guts_name, "unlink" },
- [FUSE_RMDIR] = { guts_name, "rmdir" },
- [FUSE_SYMLINK] = { guts_symlink, "symlink" },
- [FUSE_RENAME] = { guts_rename, "rename" },
- [FUSE_LINK] = { guts_link, "link" },
- [FUSE_CREATE] = { guts_create, "create" },
- [FUSE_OPEN] = { guts_open, "open" },
- [FUSE_READ] = { guts_read, "read" },
- [FUSE_WRITE] = { guts_write, "write" },
- [FUSE_FLUSH] = { guts_flush, "flush" },
- [FUSE_RELEASE] = { guts_release, "release" },
- [FUSE_FSYNC] = { guts_fsync, "fsync" },
- [FUSE_OPENDIR] = { guts_open, "opendir" },
- [FUSE_READDIR] = { guts_readdir, "readdir" },
- [FUSE_RELEASEDIR] = { guts_releasedir, "releasedir" },
- [FUSE_FSYNCDIR] = { guts_fsyncdir, "fsyncdir" },
- [FUSE_STATFS] = { guts_noarg, "statfs" },
- [FUSE_SETXATTR] = { guts_setxattr, "setxattr" },
- [FUSE_GETXATTR] = { guts_getxattr, "getxattr" },
- [FUSE_LISTXATTR] = { guts_getxattr, "listxattr" },
- [FUSE_REMOVEXATTR] = { guts_name, "removexattr" },
-};
-
-/* used for actual tracing task */
-
-int32_t
-guts_log_req (void *buf,
- int32_t len)
-{
- struct fuse_in_header *in = buf;
- const void *inargs = NULL;
- int32_t header_len = sizeof (struct fuse_in_header);
-
- if (header_len < len ) {
- inargs = buf + header_len;
- gf_log ("guts-gimmik", GF_LOG_ERROR,
- "unique: %llu, opcode: %s (%i), nodeid: %lu, insize: %zu\n",
- (unsigned long long) in->unique, "<null>",
- /*opname((enum fuse_opcode) in->opcode),*/ in->opcode,
- (unsigned long) in->nodeid, len);
- if (guts_log[in->opcode].func)
- guts_log[in->opcode].func (in, inargs);
-
- } else {
- gf_log ("guts", GF_LOG_ERROR,
- "header is longer than the buffer passed");
- }
-
- return 0;
-}
-
-
-int
-guts_reply_err (fuse_req_t req, int err)
-{
- if (IS_TRACE(req)) {
- /* we are tracing calls, just dump the reply to file and continue with fuse_reply_err() */
- guts_reply_dump (req, &err, sizeof (err));
- return fuse_reply_err (req, err);
- } else {
- /* we are replaying. ;) */
- guts_replay_ctx_t *ctx = (guts_replay_ctx_t *) req->u.ni.data;
- int32_t opcode = guts_get_opcode (ctx, req->unique);
-
- /* see if we are called by close/closedir, if yes remove do a guts_fd_delete () */
- if (opcode == FUSE_RELEASEDIR || opcode == FUSE_RELEASE) {
- guts_req_t *request = guts_lookup_request (ctx, req->unique);
- struct fuse_release_in *arg = request->arg;
-
- guts_delete_fd (ctx, arg->fh);
- } else if (err == -1) {
- /* error while replaying?? just quit as of now
- * TODO: this is not the right way */
- printf (":O - glusterfs-guts: replay failed\n");
- exit (0);
- }
-
- return 0;
- }
-}
-
-void
-guts_reply_none (fuse_req_t req)
-{
- if (IS_TRACE(req)) {
- guts_reply_dump (req, NULL, 0);
- fuse_reply_none (req);
- } else {
- return;
- }
-}
-
-int
-guts_reply_entry (fuse_req_t req,
- const struct fuse_entry_param *e)
-{
- if (IS_TRACE(req)) {
- guts_reply_dump (req, e, sizeof (*e));
- return fuse_reply_entry (req, e);
- } else {
- /* TODO: is dict_set() the best solution for this case?? */
- guts_replay_ctx_t *ctx = (guts_replay_ctx_t *) req->u.ni.data;
- guts_reply_t *reply = guts_lookup_reply (ctx, req->unique);
- struct fuse_entry_param *old_entry = (struct fuse_entry_param *)reply->arg;
- guts_inode_update (ctx, old_entry->ino, e->ino);
- return 0;
- }
-}
-
-int
-guts_reply_create (fuse_req_t req,
- const struct fuse_entry_param *e,
- const struct fuse_file_info *f)
-{
- if (IS_TRACE(req)) {
- struct guts_create_out create_out;
-
- memset (&create_out, 0, sizeof (create_out));
- memcpy (&create_out.e, e, sizeof (*e));
- memcpy (&create_out.f, f, sizeof (*f));
-
- guts_reply_dump (req, &create_out, sizeof (create_out));
- return fuse_reply_create (req, e, f);
- } else {
- guts_replay_ctx_t *ctx = (guts_replay_ctx_t *) req->u.ni.data;
- guts_reply_t *reply = guts_lookup_reply (ctx, req->unique);
- struct guts_create_out *old_createout = (struct guts_create_out *) reply->arg;
- struct fuse_file_info *old_f = &old_createout->f;
-
- /* add a new fd and map it to the file handle, as stored in tio file */
- guts_fd_add (ctx, old_f->fh, (fd_t *)(long)f->fh);
-
- return 0;
- }
-}
-
-
-int
-guts_reply_attr (fuse_req_t req,
- const struct stat *attr,
- double attr_timeout)
-{
- if (IS_TRACE(req)) {
- struct guts_attr_out attr_out;
-
- memcpy (&attr_out.attr, attr, sizeof (*attr));
- attr_out.attr_timeout = attr_timeout;
-
- guts_reply_dump (req, &attr_out, sizeof (attr_out));
- return fuse_reply_attr (req, attr, attr_timeout);
- } else {
- guts_replay_ctx_t *ctx = (guts_replay_ctx_t *) req->u.ni.data;
- guts_reply_t *reply = guts_lookup_reply (ctx, req->unique);
- struct guts_attr_out *old_attrout = (struct guts_attr_out *) reply->arg;
-
- if (!guts_attr_cmp (attr, &old_attrout->attr))
- return 0;
- else {
- gf_log ("glusterfs-guts", GF_LOG_ERROR,
- "attr failed.");
- return -1;
- }
- }
-}
-
-int
-guts_reply_readlink (fuse_req_t req,
- const char *linkname)
-{
- if (IS_TRACE(req)) {
- guts_reply_dump (req, linkname, strlen (linkname));
- return fuse_reply_readlink (req, linkname);
- } else {
- guts_replay_ctx_t *ctx = (guts_replay_ctx_t *) req->u.ni.data;
- guts_reply_t *reply = guts_lookup_reply (ctx, req->unique);
- char *old_linkname = (char *) reply->arg;
- if (!strcmp (linkname, old_linkname))
- return 0;
- else {
- gf_log ("glusterfs-guts", GF_LOG_ERROR,
- "readlink failed. linkname in tio file: %s \n linkname recieved on replay: %s",
- old_linkname, linkname);
- return -1;
- }
- }
-}
-
-int
-guts_reply_open (fuse_req_t req,
- const struct fuse_file_info *f)
-{
- if (IS_TRACE(req)) {
- guts_reply_dump (req, f, sizeof (*f));
- return fuse_reply_open (req, f);
- } else {
- /* the fd we recieve here is the valid fd for our current session, map the indicative number we have
- * in mapping */
- guts_replay_ctx_t *ctx = (guts_replay_ctx_t *) req->u.ni.data;
- guts_reply_t *reply = guts_lookup_reply (ctx, req->unique);
-
- if (reply) {
- struct fuse_file_info *old_f = reply->arg;
-
- /* add a new fd and map it to the file handle, as stored in tio file */
- guts_fd_add (ctx, old_f->fh, (fd_t *)(long)f->fh);
- }
-
- return 0;
- }
-}
-
-int
-guts_reply_write (fuse_req_t req,
- size_t count)
-{
- if (IS_TRACE(req)) {
- guts_reply_dump (req, &count, sizeof (count));
- return fuse_reply_write (req, count);
- } else {
- guts_replay_ctx_t *ctx = (guts_replay_ctx_t *) req->u.ni.data;
- guts_reply_t *reply = guts_lookup_reply (ctx, req->unique);
- size_t *old_count = reply->arg;
- if (count == *old_count)
- return 0;
- else {
- gf_log ("glusterfs-guts", GF_LOG_ERROR,
- "writev failed. old writev count: %d \n writev count on replay: %d",
- old_count, count);
- return -1;
- }
- }
-}
-
-int
-guts_reply_buf (fuse_req_t req,
- const char *buf,
- size_t size)
-{
- if (IS_TRACE(req)) {
- guts_reply_dump (req, buf, size);
- return fuse_reply_buf (req, buf, size);
- } else {
- guts_replay_ctx_t *ctx = (guts_replay_ctx_t *) req->u.ni.data;
- guts_reply_t *reply = guts_lookup_reply (ctx, req->unique);
- char *old_buf = reply->arg;
- size_t old_size = reply->arg_len;
- if ((size == old_size) && (!memcmp (buf, old_buf, size)))
- return 0;
- else {
- gf_log ("glusterfs-guts", GF_LOG_ERROR,
- "readv failed. old readv size: %d \n readv size on replay: %d",
- old_size, size);
- return -1;
- }
- }
-}
-
-int
-guts_reply_statfs (fuse_req_t req,
- const struct statvfs *stbuf)
-{
- if (IS_TRACE(req)) {
- guts_reply_dump (req, stbuf, sizeof (*stbuf));
- return fuse_reply_statfs (req, stbuf);
- } else {
- guts_replay_ctx_t *ctx = (guts_replay_ctx_t *) req->u.ni.data;
- guts_reply_t *reply = guts_lookup_reply (ctx, req->unique);
- struct statvfs *old_stbuf = reply->arg;
-
- if (!guts_statvfs_cmp (old_stbuf, stbuf))
- return 0;
- else {
- gf_log ("glusterfs-guts", GF_LOG_ERROR,
- "statfs failed.");
- return -1;
- }
- }
-}
-
-int
-guts_reply_xattr (fuse_req_t req,
- size_t count)
-{
- if (IS_TRACE(req)) {
- guts_reply_dump (req, &count, sizeof (count));
- return fuse_reply_xattr (req, count);
- } else {
- guts_replay_ctx_t *ctx = (guts_replay_ctx_t *) req->u.ni.data;
- guts_reply_t *reply = guts_lookup_reply (ctx, req->unique);
- size_t *old_count = reply->arg;
- if (count == *old_count)
- return 0;
- else {
- gf_log ("glusterfs-guts", GF_LOG_ERROR,
- "xattr failed. old xattr count: %d \n xattr count on replay: %d",
- old_count, count);
- return -1;
- }
- }
-}
-
-int
-guts_reply_lock (fuse_req_t req,
- struct flock *lock)
-{
- if (IS_TRACE(req)) {
- guts_reply_dump (req, lock , sizeof (*lock));
- return fuse_reply_lock (req, lock);
- } else {
- guts_replay_ctx_t *ctx = (guts_replay_ctx_t *) req->u.ni.data;
- guts_reply_t *reply = guts_lookup_reply (ctx, req->unique);
- struct flock *old_lock = (struct flock *)reply->arg;
- if (!guts_flock_cmp (lock, old_lock))
- return 0;
- else {
- gf_log ("glusterfs-guts", GF_LOG_ERROR,
- "lock failed.");
- return -1;
- }
- }
-}
diff --git a/glusterfs-guts/src/guts-trace.h b/glusterfs-guts/src/guts-trace.h
deleted file mode 100644
index 2b6a064b8..000000000
--- a/glusterfs-guts/src/guts-trace.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#define IS_TRACE(req) (req->ch != NULL)
-
-#define PARAM(inarg) (((char *)(inarg)) + sizeof(*(inarg)))
-
-struct guts_symlink_in {
- char name[NAME_MAX];
- char linkname[NAME_MAX];
-};
-
-struct guts_create_in {
- struct fuse_open_in open_in;
- char name[NAME_MAX];
-};
-
-struct guts_xattr_in {
- struct fuse_setxattr_in xattr;
- char name[NAME_MAX];
- char value[NAME_MAX];
-};
-
-struct guts_rename_in {
- struct fuse_rename_in rename;
- char oldname[NAME_MAX];
- char newname[NAME_MAX];
-};
-
-struct guts_create_out {
- struct fuse_entry_param e;
- struct fuse_file_info f;
-};
-
-struct guts_attr_out {
- struct stat attr;
- double attr_timeout;
-};
diff --git a/glusterfs-hadoop/0.20.2/conf/core-site.xml b/glusterfs-hadoop/0.20.2/conf/core-site.xml
new file mode 100644
index 000000000..d7f75fca7
--- /dev/null
+++ b/glusterfs-hadoop/0.20.2/conf/core-site.xml
@@ -0,0 +1,38 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+
+ <property>
+ <name>fs.glusterfs.impl</name>
+ <value>org.apache.hadoop.fs.glusterfs.GlusterFileSystem</value>
+ </property>
+
+ <property>
+ <name>fs.default.name</name>
+ <value>glusterfs://192.168.1.36:9000</value>
+ </property>
+
+ <property>
+ <name>fs.glusterfs.volname</name>
+ <value>volume-dist-rep</value>
+ </property>
+
+ <property>
+ <name>fs.glusterfs.mount</name>
+ <value>/mnt/glusterfs</value>
+ </property>
+
+ <property>
+ <name>fs.glusterfs.server</name>
+ <value>192.168.1.36</value>
+ </property>
+
+ <property>
+ <name>quick.slave.io</name>
+ <value>Off</value>
+ </property>
+
+</configuration>
diff --git a/glusterfs-hadoop/0.20.2/pom.xml b/glusterfs-hadoop/0.20.2/pom.xml
new file mode 100644
index 000000000..fe661d40f
--- /dev/null
+++ b/glusterfs-hadoop/0.20.2/pom.xml
@@ -0,0 +1,36 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <groupId>org.apache.hadoop.fs.glusterfs</groupId>
+ <artifactId>glusterfs</artifactId>
+ <packaging>jar</packaging>
+ <version>0.20.2-0.1</version>
+ <name>glusterfs</name>
+ <url>http://maven.apache.org</url>
+ <dependencies>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <version>3.8.1</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-core</artifactId>
+ <version>0.20.2</version>
+ </dependency>
+ </dependencies>
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <version>2.3.2</version>
+ <configuration>
+ <source>1.5</source>
+ <target>1.5</target>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+</project>
diff --git a/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFSBrickClass.java b/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFSBrickClass.java
new file mode 100644
index 000000000..e633b8aae
--- /dev/null
+++ b/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFSBrickClass.java
@@ -0,0 +1,109 @@
+/**
+ *
+ * Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com>
+ * This file is part of GlusterFS.
+ *
+ * Licensed under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied. See the License for the specific language governing
+ * permissions and limitations under the License.
+ *
+ */
+
+package org.apache.hadoop.fs.glusterfs;
+
+import java.io.*;
+
+public class GlusterFSBrickClass {
+ String host;
+ String exportedFile;
+ long start;
+ long end;
+ boolean isChunked;
+ int stripeSize; // Stripe size in bytes
+ int nrStripes; // number of stripes
+ int switchCount; // for SR, DSR - number of replicas of each stripe
+ // -1 for others
+
+ public GlusterFSBrickClass (String brick, long start, long len, boolean flag,
+ int stripeSize, int nrStripes, int switchCount)
+ throws IOException {
+ this.host = brick2host(brick);
+ this.exportedFile = brick2file(brick);
+ this.start = start;
+ this.end = start + len;
+ this.isChunked = flag;
+ this.stripeSize = stripeSize;
+ this.nrStripes = nrStripes;
+ this.switchCount = switchCount;
+ }
+
+ public boolean isChunked () {
+ return isChunked;
+ }
+
+ public String brickIsLocal(String hostname) {
+ String path = null;
+ File f = null;
+ if (host.equals(hostname))
+ path = exportedFile;
+
+ return path;
+ }
+
+ public int[] getBrickNumberInTree(long start, int len) {
+ long end = len;
+ int startNodeInTree = ((int) (start / stripeSize)) % nrStripes;
+ int endNodeInTree = ((int) ((start + len) / stripeSize)) % nrStripes;
+
+ if (startNodeInTree != endNodeInTree) {
+ end = (start - (start % stripeSize)) + stripeSize;
+ end -= start;
+ }
+
+ return new int[] {startNodeInTree, endNodeInTree, (int) end};
+ }
+
+ public boolean brickHasFilePart(int nodeInTree, int nodeLoc) {
+ if (switchCount == -1)
+ return (nodeInTree == nodeLoc);
+
+ nodeInTree *= switchCount;
+ for (int i = nodeInTree; i < (nodeInTree + switchCount); i++) {
+ if (i == nodeLoc)
+ return true;
+ }
+
+ return false;
+ }
+
+ public String brick2host (String brick)
+ throws IOException {
+ String[] hf = null;
+
+ hf = brick.split(":");
+ if (hf.length != 2)
+ throw new IOException("Error getting hostname from brick");
+
+ return hf[0];
+ }
+
+ public String brick2file (String brick)
+ throws IOException {
+ String[] hf = null;
+
+ hf = brick.split(":");
+ if (hf.length != 2)
+ throw new IOException("Error getting hostname from brick");
+
+ return hf[1];
+ }
+
+}
diff --git a/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFSBrickRepl.java b/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFSBrickRepl.java
new file mode 100644
index 000000000..11454e636
--- /dev/null
+++ b/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFSBrickRepl.java
@@ -0,0 +1,52 @@
+/**
+ *
+ * Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com>
+ * This file is part of GlusterFS.
+ *
+ * Licensed under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied. See the License for the specific language governing
+ * permissions and limitations under the License.
+ *
+ */
+
+package org.apache.hadoop.fs.glusterfs;
+
+import java.io.*;
+
+public class GlusterFSBrickRepl {
+ private String[] replHost;
+ private long start;
+ private long len;
+ private int cnt;
+
+ GlusterFSBrickRepl(int replCount, long start, long len) {
+ this.replHost = new String[replCount];
+ this.start = start;
+ this.len = len;
+ this.cnt = 0;
+ }
+
+ public void addHost (String host) {
+ this.replHost[cnt++] = host;
+ }
+
+ public String[] getReplHosts () {
+ return this.replHost;
+ }
+
+ public long getStartLen () {
+ return this.start;
+ }
+
+ public long getOffLen () {
+ return this.len;
+ }
+}
diff --git a/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFSXattr.java b/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFSXattr.java
new file mode 100644
index 000000000..455dda97e
--- /dev/null
+++ b/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFSXattr.java
@@ -0,0 +1,472 @@
+/**
+ *
+ * Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com>
+ * This file is part of GlusterFS.
+ *
+ * Licensed under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied. See the License for the specific language governing
+ * permissions and limitations under the License.
+ *
+ */
+
+package org.apache.hadoop.fs.glusterfs;
+
+import java.net.*;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.io.*;
+import java.util.HashMap;
+import java.util.TreeMap;
+import java.util.ArrayList;
+import java.util.Iterator;
+
+import org.apache.hadoop.fs.BlockLocation;
+
+public class GlusterFSXattr {
+
+ public enum LAYOUT { D, S, R, DS, DR, SR, DSR }
+ public enum CMD { GET_HINTS, GET_REPLICATION, GET_BLOCK_SIZE, CHECK_FOR_QUICK_IO }
+
+ private static String hostname;
+
+ public GlusterFSXattr() { }
+
+ public static String brick2host (String brick)
+ throws IOException {
+ String[] hf = null;
+
+ hf = brick.split(":");
+ if (hf.length != 2) {
+ System.out.println("brick not of format hostname:path");
+ throw new IOException("Error getting hostname from brick");
+ }
+
+ return hf[0];
+ }
+
+ public static String brick2file (String brick)
+ throws IOException {
+ String[] hf = null;
+
+ hf = brick.split(":");
+ if (hf.length != 2) {
+ System.out.println("brick not of format hostname:path");
+ throw new IOException("Error getting hostname from brick");
+ }
+
+ return hf[1];
+ }
+
+ public static BlockLocation[] getPathInfo (String filename, long start, long len)
+ throws IOException {
+ HashMap<String, ArrayList<String>> vol = null;
+ HashMap<String, Integer> meta = new HashMap<String, Integer>();
+
+ vol = execGetFattr(filename, meta, CMD.GET_HINTS);
+
+ return getHints(vol, meta, start, len, null);
+ }
+
+ public static long getBlockSize (String filename)
+ throws IOException {
+ HashMap<String, ArrayList<String>> vol = null;
+ HashMap<String, Integer> meta = new HashMap<String, Integer>();
+
+ vol = execGetFattr(filename, meta, CMD.GET_BLOCK_SIZE);
+
+ if (!meta.containsKey("block-size"))
+ return 0;
+
+ return (long) meta.get("block-size");
+
+ }
+
+ public static short getReplication (String filename)
+ throws IOException {
+ HashMap<String, ArrayList<String>> vol = null;
+ HashMap<String, Integer> meta = new HashMap<String, Integer>();
+
+ vol = execGetFattr(filename, meta, CMD.GET_REPLICATION);
+
+ return (short) getReplicationFromLayout(vol, meta);
+
+ }
+
+ public static TreeMap<Integer, GlusterFSBrickClass> quickIOPossible (String filename, long start,
+ long len)
+ throws IOException {
+ String realpath = null;
+ HashMap<String, ArrayList<String>> vol = null;
+ HashMap<String, Integer> meta = new HashMap<String, Integer>();
+ TreeMap<Integer, GlusterFSBrickClass> hnts = new TreeMap<Integer, GlusterFSBrickClass>();
+
+ vol = execGetFattr(filename, meta, CMD.GET_HINTS);
+ getHints(vol, meta, start, len, hnts);
+
+ if (hnts.size() == 0)
+ return null; // BOOM !!
+
+ // DEBUG - dump hnts here
+ return hnts;
+ }
+
+ public static HashMap<String, ArrayList<String>> execGetFattr (String filename,
+ HashMap<String, Integer> meta,
+ CMD cmd)
+ throws IOException {
+ Process p = null;
+ BufferedReader brInput = null;
+ String s = null;
+ String cmdOut = null;
+ String getfattrCmd = null;
+ String xlator = null;
+ String enclosingXl = null;
+ String enclosingXlVol = null;
+ String key = null;
+ String layout = "";
+ int rcount = 0;
+ int scount = 0;
+ int dcount = 0;
+ int count = 0;
+
+ HashMap<String, ArrayList<String>> vol = new HashMap<String, ArrayList<String>>();
+
+ getfattrCmd = "getfattr -m . -n trusted.glusterfs.pathinfo " + filename;
+
+ p = Runtime.getRuntime().exec(getfattrCmd);
+ brInput = new BufferedReader(new InputStreamReader(p.getInputStream()));
+
+ cmdOut = "";
+ while ( (s = brInput.readLine()) != null )
+ cmdOut += s;
+
+ /**
+ * TODO: Use a single regex for extracting posix paths as well
+ * as xlator counts for layout matching.
+ */
+
+ Pattern pattern = Pattern.compile("<(.*?)[:\\(](.*?)>");
+ Matcher matcher = pattern.matcher(cmdOut);
+
+ Pattern p_px = Pattern.compile(".*?:(.*)");
+ Matcher m_px;
+ String gibberish_path;
+
+ s = null;
+ while (matcher.find()) {
+ xlator = matcher.group(1);
+ if (xlator.equalsIgnoreCase("posix")) {
+ if (enclosingXl.equalsIgnoreCase("replicate"))
+ count = rcount;
+ else if (enclosingXl.equalsIgnoreCase("stripe"))
+ count = scount;
+ else if (enclosingXl.equalsIgnoreCase("distribute"))
+ count = dcount;
+ else
+ throw new IOException("Unknown Translator: " + enclosingXl);
+
+ key = enclosingXl + "-" + count;
+
+ if (vol.get(key) == null)
+ vol.put(key, new ArrayList<String>());
+
+ gibberish_path = matcher.group(2);
+
+ /* extract posix path from the gibberish string */
+ m_px = p_px.matcher(gibberish_path);
+ if (!m_px.find())
+ throw new IOException("Cannot extract posix path");
+
+ vol.get(key).add(m_px.group(1));
+ continue;
+ }
+
+ enclosingXl = xlator;
+ enclosingXlVol = matcher.group(2);
+
+ if (xlator.equalsIgnoreCase("replicate"))
+ if (rcount++ != 0)
+ continue;
+
+ if (xlator.equalsIgnoreCase("stripe")) {
+ if (scount++ != 0)
+ continue;
+
+
+ Pattern ps = Pattern.compile("\\[(\\d+)\\]");
+ Matcher ms = ps.matcher(enclosingXlVol);
+
+ if (ms.find()) {
+ if (((cmd == CMD.GET_BLOCK_SIZE) || (cmd == CMD.GET_HINTS))
+ && (meta != null))
+ meta.put("block-size", Integer.parseInt(ms.group(1)));
+ } else
+ throw new IOException("Cannot get stripe size");
+ }
+
+ if (xlator.equalsIgnoreCase("distribute"))
+ if (dcount++ != 0)
+ continue;
+
+ layout += xlator.substring(0, 1);
+ }
+
+ if ((dcount == 0) && (scount == 0) && (rcount == 0))
+ throw new IOException("Cannot get layout");
+
+ if (meta != null) {
+ meta.put("dcount", dcount);
+ meta.put("scount", scount);
+ meta.put("rcount", rcount);
+ }
+
+ vol.put("layout", new ArrayList<String>(1));
+ vol.get("layout").add(layout);
+
+ return vol;
+ }
+
+ static BlockLocation[] getHints (HashMap<String, ArrayList<String>> vol,
+ HashMap<String, Integer> meta,
+ long start, long len,
+ TreeMap<Integer, GlusterFSBrickClass> hnts)
+ throws IOException {
+ String brick = null;
+ String key = null;
+ boolean done = false;
+ int i = 0;
+ int counter = 0;
+ int stripeSize = 0;
+ long stripeStart = 0;
+ long stripeEnd = 0;
+ int nrAllocs = 0;
+ int allocCtr = 0;
+ BlockLocation[] result = null;
+ ArrayList<String> brickList = null;
+ ArrayList<String> stripedBricks = null;
+ Iterator<String> it = null;
+
+ String[] blks = null;
+ GlusterFSBrickRepl[] repl = null;
+ int dcount, scount, rcount;
+
+ LAYOUT l = LAYOUT.valueOf(vol.get("layout").get(0));
+ dcount = meta.get("dcount");
+ scount = meta.get("scount");
+ rcount = meta.get("rcount");
+
+ switch (l) {
+ case D:
+ key = "DISTRIBUTE-" + dcount;
+ brick = vol.get(key).get(0);
+
+ if (hnts == null) {
+ result = new BlockLocation[1];
+ result[0] = new BlockLocation(null, new String[] {brick2host(brick)}, start, len);
+ } else
+ hnts.put(0, new GlusterFSBrickClass(brick, start, len, false, -1, -1, -1));
+ break;
+
+ case R:
+ case DR:
+ /* just the name says it's striped - the volume isn't */
+ stripedBricks = new ArrayList<String>();
+
+ for (i = 1; i <= rcount; i++) {
+ key = "REPLICATE-" + i;
+ brickList = vol.get(key);
+ it = brickList.iterator();
+ while (it.hasNext()) {
+ stripedBricks.add(it.next());
+ }
+ }
+
+ nrAllocs = stripedBricks.size();
+ if (hnts == null) {
+ result = new BlockLocation[1];
+ blks = new String[nrAllocs];
+ }
+
+ for (i = 0; i < nrAllocs; i++) {
+ if (hnts == null)
+ blks[i] = brick2host(stripedBricks.get(i));
+ else
+ hnts.put(i, new GlusterFSBrickClass(stripedBricks.get(i), start, len, false, -1, -1, -1));
+ }
+
+ if (hnts == null)
+ result[0] = new BlockLocation(null, blks, start, len);
+
+ break;
+
+ case SR:
+ case DSR:
+ int rsize = 0;
+ ArrayList<ArrayList<String>> replicas = new ArrayList<ArrayList<String>>();
+
+ stripedBricks = new ArrayList<String>();
+
+ if (rcount == 0)
+ throw new IOException("got replicated volume with replication count 0");
+
+ for (i = 1; i <= rcount; i++) {
+ key = "REPLICATE-" + i;
+ brickList = vol.get(key);
+ it = brickList.iterator();
+ replicas.add(i - 1, new ArrayList<String>());
+ while (it.hasNext()) {
+ replicas.get(i - 1).add(it.next());
+ }
+ }
+
+ stripeSize = meta.get("block-size");
+
+ nrAllocs = (int) (((len - start) / stripeSize) + 1);
+ if (hnts == null) {
+ result = new BlockLocation[nrAllocs];
+ repl = new GlusterFSBrickRepl[nrAllocs];
+ }
+
+ // starting stripe position
+ counter = (int) ((start / stripeSize) % rcount);
+ stripeStart = start;
+
+ key = null;
+ int currAlloc = 0;
+ boolean hntsDone = false;
+ while ((stripeStart < len) && !done) {
+ stripeEnd = (stripeStart - (stripeStart % stripeSize)) + stripeSize - 1;
+ if (stripeEnd > start + len) {
+ stripeEnd = start + len - 1;
+ done = true;
+ }
+
+ rsize = replicas.get(counter).size();
+
+ if (hnts == null)
+ repl[allocCtr] = new GlusterFSBrickRepl(rsize, stripeStart, (stripeEnd - stripeStart));
+
+ for (i = 0; i < rsize; i++) {
+ brick = replicas.get(counter).get(i);
+ currAlloc = (allocCtr * rsize) + i;
+
+ if (hnts == null)
+ repl[allocCtr].addHost(brick2host(brick));
+ else
+ if (currAlloc <= (rsize * rcount) - 1) {
+ hnts.put(currAlloc, new GlusterFSBrickClass(brick, stripeStart,
+ (stripeEnd - stripeStart),
+ true, stripeSize, rcount, rsize));
+ } else
+ hntsDone = true;
+ }
+
+ if (hntsDone)
+ break;
+
+ stripeStart = stripeEnd + 1;
+
+ allocCtr++;
+ counter++;
+
+ if (counter >= replicas.size())
+ counter = 0;
+ }
+
+ if (hnts == null)
+ for (int k = 0; k < nrAllocs; k++)
+ result[k] = new BlockLocation(null, repl[k].getReplHosts(), repl[k].getStartLen(), repl[k].getOffLen());
+
+ break;
+
+ case S:
+ case DS:
+ if (scount == 0)
+ throw new IOException("got striped volume with stripe count 0");
+
+ stripedBricks = new ArrayList<String>();
+ stripeSize = meta.get("block-size");
+
+ key = "STRIPE-" + scount;
+ brickList = vol.get(key);
+ it = brickList.iterator();
+ while (it.hasNext()) {
+ stripedBricks.add(it.next());
+ }
+
+ nrAllocs = (int) ((len - start) / stripeSize) + 1;
+ if (hnts == null)
+ result = new BlockLocation[nrAllocs];
+
+ // starting stripe position
+ counter = (int) ((start / stripeSize) % stripedBricks.size());
+ stripeStart = start;
+
+ key = null;
+ while ((stripeStart < len) && !done) {
+ brick = stripedBricks.get(counter);
+
+ stripeEnd = (stripeStart - (stripeStart % stripeSize)) + stripeSize - 1;
+ if (stripeEnd > start + len) {
+ stripeEnd = start + len - 1;
+ done = true;
+ }
+
+ if (hnts == null)
+ result[allocCtr] = new BlockLocation(null, new String[] {brick2host(brick)},
+ stripeStart, (stripeEnd - stripeStart));
+ else
+ if (allocCtr <= stripedBricks.size()) {
+ hnts.put(allocCtr, new GlusterFSBrickClass(brick, stripeStart, (stripeEnd - stripeStart),
+ true, stripeSize, stripedBricks.size(), -1));
+ } else
+ break;
+
+ stripeStart = stripeEnd + 1;
+
+ counter++;
+ allocCtr++;
+
+ if (counter >= stripedBricks.size())
+ counter = 0;
+ }
+
+ break;
+ }
+
+ return result;
+ }
+
+ /* TODO: use meta{dcount,scount,rcount} for checking */
+ public static int getReplicationFromLayout (HashMap<String, ArrayList<String>> vol,
+ HashMap<String, Integer> meta)
+ throws IOException {
+ int replication = 0;
+ LAYOUT l = LAYOUT.valueOf(vol.get("layout").get(0));
+
+ switch (l) {
+ case D:
+ case S:
+ case DS:
+ replication = 1;
+ break;
+
+ case R:
+ case DR:
+ case SR:
+ case DSR:
+ final String key = "REPLICATION-1";
+ replication = vol.get(key).size();
+ }
+
+ return replication;
+ }
+}
diff --git a/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFUSEInputStream.java b/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFUSEInputStream.java
new file mode 100644
index 000000000..e92237aee
--- /dev/null
+++ b/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFUSEInputStream.java
@@ -0,0 +1,205 @@
+/**
+ *
+ * Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com>
+ * This file is part of GlusterFS.
+ *
+ * Licensed under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied. See the License for the specific language governing
+ * permissions and limitations under the License.
+ *
+ */
+
+package org.apache.hadoop.fs.glusterfs;
+
+import java.io.*;
+import java.util.TreeMap;
+
+import org.apache.hadoop.fs.FSInputStream;
+import org.apache.hadoop.fs.FileSystem;
+
+
+public class GlusterFUSEInputStream extends FSInputStream {
+ File f;
+ boolean lastActive;
+ long pos;
+ boolean closed;
+ String thisHost;
+ RandomAccessFile fuseInputStream;
+ RandomAccessFile fsInputStream;
+ GlusterFSBrickClass thisBrick;
+ int nodeLocation;
+ TreeMap<Integer, GlusterFSBrickClass> hnts;
+
+ public GlusterFUSEInputStream (File f, TreeMap<Integer, GlusterFSBrickClass> hnts,
+ String hostname) throws IOException {
+ this.f = f;
+ this.pos = 0;
+ this.closed = false;
+ this.hnts = hnts;
+ this.thisHost = hostname;
+ this.fsInputStream = null;
+ this.fuseInputStream = new RandomAccessFile(f.getPath(), "r");
+
+ this.lastActive = true; // true == FUSE, false == backed file
+
+ String directFilePath = null;
+ if (this.hnts != null) {
+ directFilePath = findLocalFile(f.getPath(), this.hnts);
+ if (directFilePath != null) {
+ this.fsInputStream = new RandomAccessFile(directFilePath, "r");
+ this.lastActive = !this.lastActive;
+ }
+ }
+ }
+
+ public String findLocalFile (String path, TreeMap<Integer, GlusterFSBrickClass> hnts) {
+ int i = 0;
+ String actFilePath = null;
+ GlusterFSBrickClass gfsBrick = null;
+
+ gfsBrick = hnts.get(0);
+
+ /* do a linear search for the matching host not worrying
+ about file stripes */
+ for (i = 0; i < hnts.size(); i++) {
+ gfsBrick = hnts.get(i);
+ actFilePath = gfsBrick.brickIsLocal(this.thisHost);
+ if (actFilePath != null) {
+ this.thisBrick = gfsBrick;
+ this.nodeLocation = i;
+ break;
+ }
+ }
+
+ return actFilePath;
+ }
+
+ public long getPos () throws IOException {
+ return pos;
+ }
+
+ public synchronized int available () throws IOException {
+ return (int) ((f.length()) - getPos());
+ }
+
+ public void seek (long pos) throws IOException {
+ fuseInputStream.seek(pos);
+ if (fsInputStream != null)
+ fsInputStream.seek(pos);
+ }
+
+ public boolean seekToNewSource (long pos) throws IOException {
+ return false;
+ }
+
+ public RandomAccessFile chooseStream (long start, int[] nlen)
+ throws IOException {
+ GlusterFSBrickClass gfsBrick = null;
+ RandomAccessFile in = fuseInputStream;
+ boolean oldActiveStream = lastActive;
+ lastActive = true;
+
+ if ((hnts != null) && (fsInputStream != null)) {
+ gfsBrick = hnts.get(0);
+ if (!gfsBrick.isChunked()) {
+ in = fsInputStream;
+ lastActive = false;
+ } else {
+ // find the current location in the tree and the amount of data it can serve
+ int[] nodeInTree = thisBrick.getBrickNumberInTree(start, nlen[0]);
+
+ // does this node hold the byte ranges we have been requested for ?
+ if ((nodeInTree[2] != 0) && thisBrick.brickHasFilePart(nodeInTree[0], nodeLocation)) {
+ in = fsInputStream;
+ nlen[0] = nodeInTree[2]; // the amount of data that can be read from the stripe
+ lastActive = false;
+ }
+ }
+ }
+
+ return in;
+ }
+
+ public synchronized int read () throws IOException {
+ int byteRead = 0;
+ RandomAccessFile in = null;
+
+ if (closed)
+ throw new IOException("Stream Closed.");
+
+ int[] nlen = { 1 };
+
+ in = chooseStream(getPos(), nlen);
+
+ byteRead = in.read();
+ if (byteRead >= 0) {
+ pos++;
+ syncStreams(1);
+ }
+
+ return byteRead;
+ }
+
+ public synchronized int read (byte buff[], int off, int len) throws
+ IOException {
+ int result = 0;
+ RandomAccessFile in = null;
+
+ if (closed)
+ throw new IOException("Stream Closed.");
+
+ int[] nlen = {len}; // hack to make len mutable
+ in = chooseStream(pos, nlen);
+
+ result = in.read(buff, off, nlen[0]);
+ if (result > 0) {
+ pos += result;
+ syncStreams(result);
+ }
+
+ return result;
+ }
+
+ /**
+ * TODO: use seek() insted of skipBytes(); skipBytes does I/O
+ */
+ public void syncStreams (int bytes) throws IOException {
+ if ((hnts != null) && (hnts.get(0).isChunked()) && (fsInputStream != null))
+ if (!this.lastActive)
+ fuseInputStream.skipBytes(bytes);
+ else
+ fsInputStream.skipBytes(bytes);
+ }
+
+ public synchronized void close () throws IOException {
+ if (closed)
+ throw new IOException("Stream closed.");
+
+ super.close();
+ if (fsInputStream != null)
+ fsInputStream.close();
+ fuseInputStream.close();
+
+ closed = true;
+ }
+
+ // Not supported - mark () and reset ()
+
+ public boolean markSupported () {
+ return false;
+ }
+
+ public void mark (int readLimit) {}
+
+ public void reset () throws IOException {
+ throw new IOException("Mark/Reset not supported.");
+ }
+}
diff --git a/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFUSEOutputStream.java b/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFUSEOutputStream.java
new file mode 100644
index 000000000..5192a0a56
--- /dev/null
+++ b/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFUSEOutputStream.java
@@ -0,0 +1,86 @@
+/**
+ *
+ * Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com>
+ * This file is part of GlusterFS.
+ *
+ * Licensed under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied. See the License for the specific language governing
+ * permissions and limitations under the License.
+ *
+ */
+
+package org.apache.hadoop.fs.glusterfs;
+
+import java.io.*;
+
+import org.apache.hadoop.fs.FSOutputSummer;
+import org.apache.hadoop.fs.FileSystem;
+
+public class GlusterFUSEOutputStream extends OutputStream {
+ File f;
+ long pos;
+ boolean closed;
+ OutputStream fuseOutputStream;
+
+ public GlusterFUSEOutputStream (String file, boolean append) throws
+ IOException {
+ this.f = new File(file); /* not needed ? */
+ this.pos = 0;
+ this.fuseOutputStream = new FileOutputStream(file, append);
+ this.closed = false;
+ }
+
+ public long getPos () throws IOException {
+ return pos;
+ }
+
+ public void write (int v) throws IOException {
+ if (closed)
+ throw new IOException("Stream closed.");
+
+ byte[] b = new byte[1];
+ b[0] = (byte) v;
+
+ write(b, 0, 1);
+ }
+
+ public void write (byte b[]) throws IOException {
+ if (closed)
+ throw new IOException("Stream closed.");
+
+ fuseOutputStream.write(b, 0, b.length);
+ pos += (long) b.length;
+ }
+
+ public void write (byte b[], int off, int len) throws IOException {
+ if (closed)
+ throw new IOException("Stream closed.");
+
+ fuseOutputStream.write(b, off, len);
+ pos += (long) len;
+ }
+
+ public void flush () throws IOException {
+ if (closed)
+ throw new IOException("Stream closed.");
+
+ fuseOutputStream.flush();
+ }
+
+ public void close () throws IOException {
+ if (closed)
+ throw new IOException("Stream closed.");
+
+ flush();
+ fuseOutputStream.close();
+ closed = true;
+ }
+}
diff --git a/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFileSystem.java b/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFileSystem.java
new file mode 100644
index 000000000..b0501cced
--- /dev/null
+++ b/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFileSystem.java
@@ -0,0 +1,492 @@
+/**
+ *
+ * Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com>
+ * This file is part of GlusterFS.
+ *
+ * Licensed under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied. See the License for the specific language governing
+ * permissions and limitations under the License.
+ *
+ */
+
+/**
+ * Implements the Hadoop FileSystem Interface to allow applications to store
+ * files on GlusterFS and run Map/Reduce jobs on the data.
+ */
+
+package org.apache.hadoop.fs.glusterfs;
+
+import java.io.*;
+import java.net.*;
+
+import java.util.regex.*;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.BlockLocation;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.util.Progressable;
+
+import java.util.TreeMap;
+
+/*
+ * This package provides interface for hadoop jobs (incl. Map/Reduce)
+ * to access files in GlusterFS backed file system via FUSE mount
+ */
+public class GlusterFileSystem extends FileSystem {
+
+ private FileSystem glusterFs = null;
+ private URI uri = null;
+ private Path workingDir = null;
+ private String glusterMount = null;
+ private boolean mounted = false;
+
+ /* for quick IO */
+ private boolean quickSlaveIO = false;
+
+ /* extended attribute class */
+ private GlusterFSXattr xattr = null;
+
+ /* hostname of this machine */
+ private static String hostname;
+
+ public GlusterFileSystem () {
+
+ }
+
+ public URI getUri () {
+ return uri;
+ }
+
+ public boolean FUSEMount (String volname, String server, String mount)
+ throws IOException, InterruptedException {
+ boolean ret = true;
+ int retVal = 0;
+ Process p = null;
+ String s = null;
+ String mountCmd = null;
+
+ mountCmd = "mount -t glusterfs " + server + ":" + "/" + volname + " " + mount;
+
+ try {
+ p = Runtime.getRuntime().exec(mountCmd);
+
+ retVal = p.waitFor();
+ if (retVal != 0)
+ ret = false;
+
+ } catch (IOException e) {
+ System.out.println ("Problem mounting FUSE mount on: " + mount);
+ e.printStackTrace();
+ System.exit(-1);
+ }
+
+ return ret;
+ }
+
+ public void initialize (URI uri, Configuration conf) throws IOException {
+ boolean ret = false;
+ String volName = null;
+ String remoteGFSServer = null;
+ String needQuickRead = null;
+
+ if (this.mounted)
+ return;
+
+ System.out.println("Initializing GlusterFS");
+
+ try {
+ volName = conf.get("fs.glusterfs.volname", "");
+ glusterMount = conf.get("fs.glusterfs.mount", "");
+ remoteGFSServer = conf.get("fs.glusterfs.server", "");
+ needQuickRead = conf.get("quick.slave.io", "");
+
+ /*
+ * bail out if we do not have enough information to do a FUSE
+ * mount
+ */
+ if ( (volName.length() == 0) || (remoteGFSServer.length() == 0) ||
+ (glusterMount.length() == 0) )
+ System.exit (-1);
+
+ ret = FUSEMount(volName, remoteGFSServer, glusterMount);
+ if (!ret) {
+ System.out.println("Failed to initialize GlusterFS");
+ System.exit(-1);
+ }
+
+ if ((needQuickRead.length() != 0)
+ && (needQuickRead.equalsIgnoreCase("yes")
+ || needQuickRead.equalsIgnoreCase("on")
+ || needQuickRead.equals("1")))
+ this.quickSlaveIO = true;
+
+ this.mounted = true;
+ this.glusterFs = FileSystem.getLocal(conf);
+ this.workingDir = new Path(glusterMount);
+ this.uri = URI.create(uri.getScheme() + "://" + uri.getAuthority());
+
+ this.xattr = new GlusterFSXattr();
+
+ InetAddress addr = InetAddress.getLocalHost();
+ this.hostname = addr.getHostName();
+
+ setConf(conf);
+
+ } catch (Exception e) {
+ e.printStackTrace();
+ System.out.println("Unable to initialize GlusterFS");
+ System.exit(-1);
+ }
+ }
+
+ @Deprecated
+ public String getName () {
+ return getUri().toString();
+ }
+
+ public Path getWorkingDirectory () {
+ return this.workingDir;
+ }
+
+ public Path getHomeDirectory () {
+ return this.workingDir;
+ }
+
+ public Path makeAbsolute (Path path) {
+ String pth = path.toUri().getPath();
+ if (pth.startsWith(workingDir.toUri().getPath())) {
+ return path;
+ }
+
+ return new Path(workingDir + "/" + pth);
+ }
+
+ public void setWorkingDirectory (Path dir) {
+ this.workingDir = makeAbsolute(dir);
+ }
+
+ public boolean exists (Path path) throws IOException {
+ Path absolute = makeAbsolute(path);
+ File f = new File(absolute.toUri().getPath());
+
+ return f.exists();
+ }
+
+ public boolean mkdirs (Path path, FsPermission permission
+ ) throws IOException {
+ boolean created = false;
+ Path absolute = makeAbsolute(path);
+ File f = new File(absolute.toUri().getPath());
+
+ if (f.exists()) {
+ System.out.println("Directory " + f.getPath() + " already exist");
+ return true;
+ }
+
+ return f.mkdirs();
+ }
+
+ @Deprecated
+ public boolean isDirectory (Path path) throws IOException {
+ Path absolute = makeAbsolute(path);
+ File f = new File(absolute.toUri().getPath());
+
+ return f.isDirectory();
+ }
+
+ public boolean isFile (Path path) throws IOException {
+ return !isDirectory(path);
+ }
+
+ public Path[] listPaths (Path path) throws IOException {
+ Path absolute = makeAbsolute(path);
+ File f = new File (absolute.toUri().getPath());
+ String relPath = path.toUri().getPath();
+ String[] fileList = null;
+ Path[] filePath = null;
+ int fileCnt = 0;
+
+ fileList = f.list();
+
+ filePath = new Path[fileList.length];
+
+ for (; fileCnt < fileList.length; fileCnt++) {
+ filePath[fileCnt] = new Path(relPath + "/" + fileList[fileCnt]);
+ }
+
+ return filePath;
+ }
+
+ public FileStatus[] listStatus (Path path) throws IOException {
+ int fileCnt = 0;
+ Path absolute = makeAbsolute(path);
+ String relpath = path.toUri().getPath();
+ String[] strFileList = null;
+ FileStatus[] fileStatus = null;
+ File f = new File(absolute.toUri().getPath());
+
+ if (!f.exists()) {
+ return null;
+ }
+
+ if (f.isFile())
+ return new FileStatus[] {
+ getFileStatus(path)
+ };
+
+ if (relpath.charAt(relpath.length() - 1) != '/')
+ relpath += "/";
+
+ strFileList = f.list();
+
+ fileStatus = new FileStatus[strFileList.length];
+
+ for (; fileCnt < strFileList.length; fileCnt++) {
+ fileStatus[fileCnt] = getFileStatusFromFileString(relpath + strFileList[fileCnt]);
+ }
+
+ return fileStatus;
+ }
+
+ public FileStatus getFileStatusFromFileString (String path)
+ throws IOException {
+ Path nPath = new Path(path);
+ return getFileStatus(nPath);
+ }
+
+ public FileStatus getFileStatus (Path path) throws IOException {
+ Path absolute = makeAbsolute(path);
+ File f = new File(absolute.toUri().getPath());
+
+ if (!f.exists ())
+ throw new FileNotFoundException("File " + f.getPath() + " does not exist.");
+
+ if (f.isDirectory ())
+ return new FileStatus(0, true, 1, 0, f.lastModified(), path.makeQualified(this));
+ else
+ return new FileStatus(f.length(), false, 0, getDefaultBlockSize(),
+ f.lastModified(), path.makeQualified(this));
+
+ }
+
+ /*
+ * creates a new file in glusterfs namespace. internally the file
+ * descriptor is an instance of OutputStream class.
+ */
+ public FSDataOutputStream create (Path path, FsPermission permission,
+ boolean overwrite, int bufferSize,
+ short replication, long blockSize,
+ Progressable progress)
+ throws IOException {
+ Path absolute = makeAbsolute(path);
+ Path parent = null;
+ File f = null;
+ File fParent = null;
+ FSDataOutputStream glusterFileStream = null;
+
+ f = new File(absolute.toUri().getPath());
+
+ if (f.exists ()) {
+ if (overwrite)
+ f.delete ();
+ else
+ throw new IOException(f.getPath() + " already exist");
+ }
+
+ parent = path.getParent();
+ fParent = new File ((makeAbsolute(parent)).toUri().getPath());
+ if ((parent != null) && (fParent != null) && (!fParent.exists()))
+ if (!fParent.mkdirs())
+ throw new IOException("cannot create parent directory: " + fParent.getPath());
+
+ glusterFileStream = new FSDataOutputStream(new GlusterFUSEOutputStream
+ (f.getPath(), false));
+
+ return glusterFileStream;
+ }
+
+ /*
+ * open the file in read mode (internally the file descriptor is an
+ * instance of InputStream class).
+ *
+ * if quick read mode is set then read the file by by-passing FUSE
+ * if we are on same slave where the file exist
+ */
+ public FSDataInputStream open (Path path) throws IOException {
+ Path absolute = makeAbsolute(path);
+ File f = new File(absolute.toUri().getPath());
+ FSDataInputStream glusterFileStream = null;
+ TreeMap<Integer, GlusterFSBrickClass> hnts = null;
+
+ if (!f.exists())
+ throw new IOException("File " + f.getPath() + " does not exist.");
+
+ if (quickSlaveIO)
+ hnts = xattr.quickIOPossible(f.getPath(), 0, f.length());
+
+ glusterFileStream = new FSDataInputStream(new GlusterFUSEInputStream(f, hnts, hostname));
+ return glusterFileStream;
+ }
+
+ public FSDataInputStream open (Path path, int bufferSize) throws IOException {
+ return open(path);
+ }
+
+ public FSDataOutputStream append (Path f, int bufferSize, Progressable progress)
+ throws IOException {
+ throw new IOException ("append not supported (as yet).");
+ }
+
+ public boolean rename (Path src, Path dst) throws IOException {
+ Path absoluteSrc = makeAbsolute(src);
+ Path absoluteDst = makeAbsolute(dst);
+
+ File fSrc = new File(absoluteSrc.toUri().getPath());
+ File fDst = new File(absoluteDst.toUri().getPath());
+
+ if (fDst.isDirectory()) {
+ fDst = null;
+ String newPath = absoluteDst.toUri().getPath() + "/" + fSrc.getName();
+ fDst = new File(newPath);
+ }
+ return fSrc.renameTo(fDst);
+ }
+
+ @Deprecated
+ public boolean delete (Path path) throws IOException {
+ return delete(path, true);
+ }
+
+ public boolean delete (Path path, boolean recursive) throws IOException {
+ Path absolute = makeAbsolute(path);
+ File f = new File(absolute.toUri().getPath());
+
+ if (f.isFile())
+ return f.delete();
+
+ FileStatus[] dirEntries = listStatus(absolute);
+ if ((!recursive) && (dirEntries != null) && (dirEntries.length != 0))
+ throw new IOException ("Directory " + path.toString() + " is not empty");
+
+ if (dirEntries != null)
+ for (int i = 0; i < dirEntries.length; i++)
+ delete(new Path(absolute, dirEntries[i].getPath()), recursive);
+
+ return f.delete();
+ }
+
+ @Deprecated
+ public long getLength (Path path) throws IOException {
+ Path absolute = makeAbsolute(path);
+ File f = new File(absolute.toUri().getPath());
+
+ if (!f.exists())
+ throw new IOException(f.getPath() + " does not exist.");
+
+ return f.length();
+ }
+
+ @Deprecated
+ public short getReplication (Path path) throws IOException {
+ Path absolute = makeAbsolute(path);
+ File f = new File(absolute.toUri().getPath());
+
+ if (!f.exists())
+ throw new IOException(f.getPath() + " does not exist.");
+
+ return xattr.getReplication(f.getPath());
+ }
+
+ public short getDefaultReplication (Path path) throws IOException {
+ return getReplication(path);
+ }
+
+ public boolean setReplication (Path path, short replication)
+ throws IOException {
+ return true;
+ }
+
+ public long getBlockSize (Path path) throws IOException {
+ long blkSz;
+ Path absolute = makeAbsolute(path);
+ File f = new File(absolute.toUri().getPath());
+
+ blkSz = xattr.getBlockSize(f.getPath());
+ if (blkSz == 0)
+ blkSz = getLength(path);
+
+ return blkSz;
+ }
+
+ public long getDefaultBlockSize () {
+ return 1 << 26; /* default's from hdfs, kfs */
+ }
+
+ @Deprecated
+ public void lock (Path path, boolean shared) throws IOException {
+ }
+
+ @Deprecated
+ public void release (Path path) throws IOException {
+ }
+
+ public BlockLocation[] getFileBlockLocations (FileStatus file, long start, long len)
+ throws IOException {
+
+ Path absolute = makeAbsolute(file.getPath());
+ File f = new File(absolute.toUri().getPath());
+ BlockLocation[] result = null;
+
+ if (file == null)
+ return null;
+
+ result = xattr.getPathInfo(f.getPath(), start, len);
+ if (result == null) {
+ System.out.println("Problem getting destination host for file "
+ + f.getPath());
+ return null;
+ }
+
+ return result;
+ }
+
+ // getFileBlockLocations (FileStatus, long, long) is called by hadoop
+ public BlockLocation[] getFileBlockLocations (Path p, long start, long len)
+ throws IOException {
+ return null;
+ }
+
+ public void copyFromLocalFile (boolean delSrc, Path src, Path dst)
+ throws IOException {
+ FileUtil.copy(glusterFs, src, this, dst, delSrc, getConf());
+ }
+
+ public void copyToLocalFile (boolean delSrc, Path src, Path dst)
+ throws IOException {
+ FileUtil.copy(this, src, glusterFs, dst, delSrc, getConf());
+ }
+
+ public Path startLocalOutput (Path fsOutputFile, Path tmpLocalFile)
+ throws IOException {
+ return tmpLocalFile;
+ }
+
+ public void completeLocalOutput (Path fsOutputFile, Path tmpLocalFile)
+ throws IOException {
+ moveFromLocalFile(tmpLocalFile, fsOutputFile);
+ }
+}
diff --git a/glusterfs-hadoop/0.20.2/src/test/java/org/apache/hadoop/fs/glusterfs/AppTest.java b/glusterfs-hadoop/0.20.2/src/test/java/org/apache/hadoop/fs/glusterfs/AppTest.java
new file mode 100644
index 000000000..21e188c52
--- /dev/null
+++ b/glusterfs-hadoop/0.20.2/src/test/java/org/apache/hadoop/fs/glusterfs/AppTest.java
@@ -0,0 +1,38 @@
+package org.apache.hadoop.fs.glusterfs;
+
+import junit.framework.Test;
+import junit.framework.TestCase;
+import junit.framework.TestSuite;
+
+/**
+ * Unit test for simple App.
+ */
+public class AppTest
+ extends TestCase
+{
+ /**
+ * Create the test case
+ *
+ * @param testName name of the test case
+ */
+ public AppTest( String testName )
+ {
+ super( testName );
+ }
+
+ /**
+ * @return the suite of tests being tested
+ */
+ public static Test suite()
+ {
+ return new TestSuite( AppTest.class );
+ }
+
+ /**
+ * Rigourous Test :-)
+ */
+ public void testApp()
+ {
+ assertTrue( true );
+ }
+}
diff --git a/glusterfs-hadoop/0.20.2/tools/build-deploy-jar.py b/glusterfs-hadoop/0.20.2/tools/build-deploy-jar.py
new file mode 100755
index 000000000..c20e53b39
--- /dev/null
+++ b/glusterfs-hadoop/0.20.2/tools/build-deploy-jar.py
@@ -0,0 +1,212 @@
+#!/usr/bin/python
+
+##
+ #
+ # Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com>
+ # This file is part of GlusterFS.
+ #
+ # Licensed under the Apache License, Version 2.0
+ # (the "License"); you may not use this file except in compliance with
+ # the License. You may obtain a copy of the License at
+ #
+ # http://www.apache.org/licenses/LICENSE-2.0
+ #
+ # Unless required by applicable law or agreed to in writing, software
+ # distributed under the License is distributed on an "AS IS" BASIS,
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ # implied. See the License for the specific language governing
+ # permissions and limitations under the License.
+ #
+ ##
+
+import getopt
+import glob
+import sys, os
+import shutil
+import subprocess, shlex
+
+def usage():
+ print "usage: python build-deploy-jar.py [-b/--build] -d/--dir <hadoop-home> [-c/--core] [-m/--mapred] [-h/--henv]"
+
+def addSlash(s):
+ if not (s[-1] == '/'):
+ s = s + '/'
+
+ return s
+
+def whereis(program):
+ abspath = None
+ for path in (os.environ.get('PATH', '')).split(':'):
+ abspath = os.path.join(path, program)
+ if os.path.exists(abspath) and not os.path.isdir(abspath):
+ return abspath
+
+ return None
+
+def getLatestJar(targetdir):
+ glusterfsJar = glob.glob(targetdir + "*.jar")
+ if len(glusterfsJar) == 0:
+ print "No GlusterFS jar file found in %s ... exiting" % (targetdir)
+ return None
+
+ # pick up the latest jar file - just in case ...
+ stat = latestJar = None
+ ctime = 0
+
+ for jar in glusterfsJar:
+ stat = os.stat(jar)
+ if stat.st_ctime > ctime:
+ latestJar = jar
+ ctime = stat.st_ctime
+
+ return latestJar
+
+# build the glusterfs hadoop plugin using maven
+def build_jar(targetdir):
+ location = whereis('mvn')
+
+ if location == None:
+ print "Cannot find maven to build glusterfs hadoop jar"
+ print "please install maven or if it's already installed then fix your PATH environ"
+ return None
+
+ # do a clean packaging
+ if os.path.exists(targetdir) and os.path.isdir(targetdir):
+ print "Cleaning up directories ... [ " + targetdir + " ]"
+ shutil.rmtree(targetdir)
+
+ print "Building glusterfs jar ..."
+ process = subprocess.Popen(['package'], shell=True,
+ executable=location, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+
+ process.wait()
+ if not process.returncode == 0:
+ print "Building glusterfs jar failed ... exiting"
+ return None
+
+ latestJar = getLatestJar(targetdir)
+ return latestJar
+
+def rcopy(f, host, libdir):
+ print " * doing remote copy to host %s" % (host)
+ scpCmd = "scp %s %s:%s" % (f, host, libdir)
+
+ os.system(scpCmd);
+
+def deployInSlave(f, confdir, libdir, cc, cm, he):
+ slavefile = confdir + "slaves"
+
+ ccFile = confdir + "core-site.xml"
+ cmFile = confdir + "mapred-site.xml"
+ heFile = confdir + "hadoop-env.sh"
+
+ sf = open(slavefile, 'r')
+ for host in sf:
+ host = host.rstrip('\n')
+ print " >>> Deploying %s on %s ..." % (os.path.basename(f), host)
+ rcopy(f, host, libdir)
+
+ if cc:
+ print " >>> Deploying [%s] on %s ..." % (os.path.basename(ccFile), host)
+ rcopy(ccFile, host, confdir)
+
+ if cm:
+ print " >>> Deploying [%s] on %s ..." % (os.path.basename(cmFile), host)
+ rcopy(cmFile, host, confdir)
+
+ if he:
+ print " >>> Deploying [%s] on %s ..." % (os.path.basename(heFile), host)
+ rcopy(heFile, host, confdir);
+
+ print "<<< Done\n"
+
+ sf.close()
+
+def deployInMaster(f, confdir, libdir):
+ import socket
+ masterfile = confdir + "masters"
+
+ mf = open(masterfile, 'r')
+ for host in mf:
+ host = host.rstrip('\n')
+ print " >>> Deploying %s on %s ..." % (os.path.basename(f), host)
+ h = host
+ try:
+ socket.inet_aton(host)
+ h = socket.getfqdn(host)
+ except socket.error:
+ pass
+
+ if h == socket.gethostname() or h == 'localhost':
+ # local cp
+ print " * doing local copy"
+ shutil.copy(f, libdir)
+ else:
+ # scp the file
+ rcopy(f, h, libdir)
+
+ print "<<< Done\n"
+
+ mf.close()
+
+if __name__ == '__main__':
+ opt = args = []
+ try:
+ opt, args = getopt.getopt(sys.argv[1:], "bd:cmh", ["build", "dir=", "core", "mapred", "henv"]);
+ except getopt.GetoptError, err:
+ print str(err)
+ usage()
+ sys.exit(1)
+
+ needbuild = hadoop_dir = copyCore = copyMapred = copyHadoopEnv = None
+
+ for k, v in opt:
+ if k in ("-b", "--build"):
+ needbuild = True
+ elif k in ("-d", "--dir"):
+ hadoop_dir = v
+ elif k in ("-c", "--core"):
+ copyCore = True
+ elif k in ("-m", "--mapred"):
+ copyMapred = True
+ elif k in ("-h", "--henv"):
+ copyHadoopEnv = True
+ else:
+ pass
+
+ if hadoop_dir == None:
+ print 'hadoop directory missing'
+ usage()
+ sys.exit(1)
+
+ os.chdir(os.path.dirname(sys.argv[0]) + '/..')
+ targetdir = './target/'
+
+ if needbuild:
+ jar = build_jar(targetdir)
+ if jar == None:
+ sys.exit(1)
+ else:
+ jar = getLatestJar(targetdir)
+ if jar == None:
+ print "Maybe you want to build it ? with -b option"
+ sys.exit(1)
+
+ print ""
+ print "*** Deploying %s *** " % (jar)
+
+ # copy jar to local hadoop distribution (master)
+ hadoop_home = addSlash(hadoop_dir)
+ if not (os.path.exists(hadoop_home) and os.path.isdir(hadoop_home)):
+ print "path " + hadoop_home + " does not exist or is not adiretory";
+ sys.exit(1);
+
+ hadoop_conf = hadoop_home + "conf/"
+ hadoop_lib = hadoop_home + "lib/"
+
+ print " >>> Scanning hadoop master file for host(s) to deploy"
+ deployInMaster(jar, hadoop_conf, hadoop_lib)
+
+ print ""
+ print " >>> Scanning hadoop slave file for host(s) to deploy"
+ deployInSlave(jar, hadoop_conf, hadoop_lib, copyCore, copyMapred, copyHadoopEnv)
diff --git a/glusterfs-hadoop/COPYING b/glusterfs-hadoop/COPYING
new file mode 100644
index 000000000..d64569567
--- /dev/null
+++ b/glusterfs-hadoop/COPYING
@@ -0,0 +1,202 @@
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/glusterfs-hadoop/README b/glusterfs-hadoop/README
new file mode 100644
index 000000000..3026f11c0
--- /dev/null
+++ b/glusterfs-hadoop/README
@@ -0,0 +1,182 @@
+GlusterFS Hadoop Plugin
+=======================
+
+INTRODUCTION
+------------
+
+This document describes how to use GlusterFS (http://www.gluster.org/) as a backing store with Hadoop.
+
+
+REQUIREMENTS
+------------
+
+* Supported OS is GNU/Linux
+* GlusterFS and Hadoop installed on all machines in the cluster
+* Java Runtime Environment (JRE)
+* Maven (needed if you are building the plugin from source)
+* JDK (needed if you are building the plugin from source)
+
+NOTE: Plugin relies on two *nix command line utilities to function properly. They are:
+
+* mount: Used to mount GlusterFS volumes.
+* getfattr: Used to fetch Extended Attributes of a file
+
+Make sure they are installed on all hosts in the cluster and their locations are in $PATH
+environment variable.
+
+
+INSTALLATION
+------------
+
+** NOTE: Example below is for Hadoop version 0.20.2 ($GLUSTER_HOME/hdfs/0.20.2) **
+
+* Building the plugin from source [Maven (http://maven.apache.org/) and JDK is required to build the plugin]
+
+ Change to glusterfs-hadoop directory in the GlusterFS source tree and build the plugin.
+
+ # cd $GLUSTER_HOME/hdfs/0.20.2
+ # mvn package
+
+ On a successful build the plugin will be present in the `target` directory.
+ (NOTE: version number will be a part of the plugin)
+
+ # ls target/
+ classes glusterfs-0.20.2-0.1.jar maven-archiver surefire-reports test-classes
+ ^^^^^^^^^^^^^^^^^^
+
+ Copy the plugin to lib/ directory in your $HADOOP_HOME dir.
+
+ # cp target/glusterfs-0.20.2-0.1.jar $HADOOP_HOME/lib
+
+ Copy the sample configuration file that ships with this source (conf/core-site.xml) to conf
+ directory in your $HADOOP_HOME dir.
+
+ # cp conf/core-site.xml $HADOOP_HOME/conf
+
+* Installing the plugin from RPM
+
+ See the plugin documentation for installing from RPM.
+
+
+CLUSTER INSTALLATION
+--------------------
+
+ In case it is tedious to do the above steps(s) on all hosts in the cluster; use the build-and-deploy.py script to
+ build the plugin in one place and deploy it (along with the configuration file on all other hosts).
+
+ This should be run on the host which is that hadoop master [Job Tracker].
+
+* STEPS (You would have done Step 1 and 2 anyway while deploying Hadoop)
+
+ 1. Edit conf/slaves file in your hadoop distribution; one line for each slave.
+ 2. Setup password-less ssh b/w hadoop master and slave(s).
+ 3. Edit conf/core-site.xml with all glusterfs related configurations (see CONFIGURATION)
+ 4. Run the following
+ # cd $GLUSTER_HOME/hdfs/0.20.2/tools
+ # python ./build-and-deploy.py -b -d /path/to/hadoop/home -c
+
+ This will build the plugin and copy it (and the config file) to all slaves (mentioned in $HADOOP_HOME/conf/slaves).
+
+ Script options:
+ -b : build the plugin
+ -d : location of hadoop directory
+ -c : deploy core-site.xml
+ -m : deploy mapred-site.xml
+ -h : deploy hadoop-env.sh
+
+
+CONFIGURATION
+-------------
+
+ All plugin configuration is done in a single XML file (core-site.xml) with <name><value> tags in each <property>
+ block.
+
+ Brief explanation of the tunables and the values they accept (change them where-ever needed) are mentioned below
+
+ name: fs.glusterfs.impl
+ value: org.apache.hadoop.fs.glusterfs.GlusterFileSystem
+
+ The default FileSystem API to use (there is little reason to modify this).
+
+ name: fs.default.name
+ value: glusterfs://server:port
+
+ The default name that hadoop uses to represent file as a URI (typically a server:port tuple). Use any host
+ in the cluster as the server and any port number. This option has to be in server:port format for hadoop
+ to create file URI; but is not used by plugin.
+
+ name: fs.glusterfs.volname
+ value: volume-dist-rep
+
+ The volume to mount.
+
+
+ name: fs.glusterfs.mount
+ value: /mnt/glusterfs
+
+ This is the directory that the plugin will use to mount (FUSE mount) the volume.
+
+ name: fs.glusterfs.server
+ value: 192.168.1.36, hackme.zugzug.org
+
+ To mount a volume the plugin needs to know the hostname or the IP of a GlusterFS server in the cluster.
+ Mention it here.
+
+ name: quick.slave.io
+ value: [On/Off], [Yes/No], [1/0]
+
+ NOTE: This option is not tested as of now.
+
+ This is a performance tunable option. Hadoop schedules jobs to hosts that contain the file data part. The job
+ then does I/O on the file (via FUSE in case of GlusterFS). When this option is set, the plugin will try to
+ do I/O directly from the backed filesystem (ext3, ext4 etc..) the file resides on. Hence read performance
+ will improve and job would run faster.
+
+
+USAGE
+-----
+
+ Once configured, start Hadoop Map/Reduce daemons
+
+ # cd $HADOOP_HOME
+ # ./bin/start-mapred.sh
+
+ If the map/reduce job/task trackers are up, all I/O will be done to GlusterFS.
+
+
+FOR HACKERS
+-----------
+
+* Source Layout
+
+** version specific: hdfs/<version> **
+./src
+./src/main
+./src/main/java
+./src/main/java/org
+./src/main/java/org/apache
+./src/main/java/org/apache/hadoop
+./src/main/java/org/apache/hadoop/fs
+./src/main/java/org/apache/hadoop/fs/glusterfs
+./src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFSBrickClass.java
+./src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFSXattr.java <--- Fetch/Parse Extended Attributes of a file
+./src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFUSEInputStream.java <--- Input Stream (instantiated during open() calls; quick read from backed FS)
+./src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFSBrickRepl.java
+./src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFUSEOutputStream.java <--- Output Stream (instantiated during creat() calls)
+./src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFileSystem.java <--- Entry Point for the plugin (extends Hadoop FileSystem class)
+./src/test
+./src/test/java
+./src/test/java/org
+./src/test/java/org/apache
+./src/test/java/org/apache/hadoop
+./src/test/java/org/apache/hadoop/fs
+./src/test/java/org/apache/hadoop/fs/glusterfs
+./src/test/java/org/apache/hadoop/fs/glusterfs/AppTest.java <--- Your test cases go here (if any :-))
+./tools/build-deploy-jar.py <--- Build and Deployment Script
+./conf
+./conf/core-site.xml <--- Sample configuration file
+./pom.xml <--- build XML file (used by maven)
+
+** toplevel: hdfs/ **
+./COPYING <--- License
+./README <--- This file
diff --git a/glusterfs.spec.in b/glusterfs.spec.in
index 390c27d54..040500c5f 100644
--- a/glusterfs.spec.in
+++ b/glusterfs.spec.in
@@ -1,84 +1,271 @@
-# if you make changes, the it is advised to increment this number, and provide
-# a descriptive suffix to identify who owns or what the change represents
-# e.g. release_version 2.MSW
-%define release 1%{?dist}
+%global _hardened_build 1
-# if you wish to compile an rpm without ibverbs support, compile like this...
-# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --without ibverbs
-%{?_without_ibverbs:%define _without_ibverbs --disable-ibverbs}
+%global _for_fedora_koji_builds 0
-# if you wish to compile an rpm without building the client RPMs...
-# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --without client
-%{?_without_client:%define _without_client --disable-fuse-client}
+# uncomment and add '%' to use the prereltag for pre-releases
+# global prereltag beta4
-# if you wish to compile an rpm without BDB translator...
-# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --without bdb
-%{?_without_bdb:%define _without_bdb --disable-bdb}
+# if you wish to compile an rpm without rdma support, compile like this...
+# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --without rdma
+%{?_without_rdma:%global _without_rdma --disable-ibverbs}
-# if you wish to compile an rpm without libglusterfsclient...
-# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --without libglfsclient
-%{?_without_libglfsclient:%define _without_libglfsclient --disable-libglusterclient}
+# No RDMA Support on s390(x)
+%ifarch s390 s390x
+%global _without_rdma --disable-ibverbs
+%endif
-# if you wish to compile an rpm without libglusterfsclient...
+# if you wish to compile an rpm without epoll...
# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --without epoll
-%{?_without_epoll:%define _without_epoll --disable-epoll}
-
-Summary: Cluster File System
-Name: @PACKAGE_NAME@
-Version: @PACKAGE_VERSION@
-Release: %{release}
-License: GPLv3+
-Group: System Environment/Base
-Vendor: Z RESEARCH Inc
-Packager: @PACKAGE_BUGREPORT@
-URL: http://www.gluster.org/docs/index.php/GlusterFS
-Source0: ftp://ftp.gluster.com/pub/gluster/glusterfs/2.0/@PACKAGE_VERSION@/@PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz
-BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root
-Requires(post): /sbin/chkconfig
-Requires(preun): /sbin/service, /sbin/chkconfig
+%{?_without_epoll:%global _without_epoll --disable-epoll}
+
+# if you wish to compile an rpm without fusermount...
+# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --without fusermount
+%{?_without_fusermount:%global _without_fusermount --disable-fusermount}
+
+# if you wish to compile an rpm without geo-replication support, compile like this...
+# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --without georeplication
+%{?_without_georeplication:%global _without_georeplication --disable-geo-replication}
+
+# if you wish to compile an rpm without the OCF resource agents...
+# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --without ocf
+%{?_without_ocf:%global _without_ocf --without-ocf}
+
+# if you wish to build rpms without syslog logging, compile like this
+# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@tar.gz --without syslog
+%{?_without_syslog:%global _without_syslog --disable-syslog}
+
+# disable syslog forcefully as rhel <= 6 doesn't have rsyslog or rsyslog-mmcount
+%if ( 0%{?rhel} && 0%{?rhel} <= 6 )
+%global _without_syslog --disable-syslog
+%endif
+
+# there is no systemtap support! Perhaps some day there will be
+%global _without_systemtap --enable-systemtap=no
+
+# if you wish to compile an rpm without the BD map support...
+# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --without bd
+%{?_without_bd:%global _without_bd --disable-bd-xlator}
+
+%if ( 0%{?rhel} && 0%{?rhel} < 6 )
+%define _without_bd --disable-bd-xlator
+%endif
+
+# if you wish to compile an rpm without the qemu-block support...
+# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --without qemu-block
+%{?_without_qemu_block:%global _without_qemu_block --disable-qemu-block}
+
+%if ( 0%{?rhel} && 0%{?rhel} < 6 )
+# xlators/features/qemu-block fails to build on RHEL5, disable it
+%define _without_qemu_block --disable-qemu-block
+%endif
+
+%if ( 0%{?fedora} && 0%{?fedora} > 16 ) || ( 0%{?rhel} && 0%{?rhel} > 6 )
+%global _with_systemd true
+%endif
+
+# From https://fedoraproject.org/wiki/Packaging:Python#Macros
+%if ( 0%{?rhel} && 0%{?rhel} <= 5 )
+%{!?python_sitelib: %global python_sitelib %(%{__python} -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())")}
+%{!?python_sitearch: %global python_sitearch %(%{__python} -c "from distutils.sysconfig import get_python_lib; print(get_python_lib(1))")}
+%endif
+
+Summary: Cluster File System
+%if ( 0%{_for_fedora_koji_builds} )
+Name: glusterfs
+Version: 3.4.1
+Release: 3%{?prereltag:.%{prereltag}}%{?dist}
+Vendor: Fedora Project
+%else
+Name: @PACKAGE_NAME@
+Version: @PACKAGE_VERSION@
+Release: 1%{?dist}
+Vendor: glusterfs.org
+%endif
+License: GPLv2 or LGPLv3+
+Group: System Environment/Base
+URL: http://www.gluster.org/docs/index.php/GlusterFS
+%if ( 0%{_for_fedora_koji_builds} )
+Source0: http://download.gluster.org/pub/gluster/glusterfs/3.4/%{version}%{?prereltag}/glusterfs-%{version}%{?prereltag}.tar.gz
+Source1: glusterd.sysconfig
+Source2: glusterfsd.sysconfig
+Source3: glusterfs-fuse.logrotate
+Source4: glusterd.logrotate
+Source5: glusterfsd.logrotate
+Source6: rhel5-load-fuse-modules
+Source11: glusterfsd.service
+Source13: glusterfsd.init
+Patch0: %{name}-3.2.5.configure.ac.patch
+Patch1: %{name}-3.3.0.libglusterfs.Makefile.patch
+Patch2: %{name}-3.3.1.rpc.rpcxprt.rdma.name.c.patch
+%else
+Source0: @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz
+%endif
+
+BuildRoot: %(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX)
+
+%if ( 0%{?rhel} && 0%{?rhel} <= 5 )
+BuildRequires: python-simplejson
+%endif
+%if ( 0%{?_with_systemd:1} )
+%if ( 0%{_for_fedora_koji_builds} )
+%global glusterfsd_service %{S:%{SOURCE11}}
+%endif
+BuildRequires: systemd-units
+Requires(post): systemd-units
+Requires(preun): systemd-units
+Requires(postun): systemd-units
+%define _init_enable() /bin/systemctl enable %1.service ;
+%define _init_disable() /bin/systemctl disable %1.service ;
+%define _init_restart() /bin/systemctl try-restart %1.service ;
+%define _init_stop() /bin/systemctl stop %1.service ;
+%define _init_install() %{__install} -D -p -m 0644 %1 %{buildroot}%{_unitdir}/%2.service ;
+# can't seem to make a generic macro that works
+%define _init_glusterd %{_unitdir}/glusterd.service
+%define _init_glusterfsd %{_unitdir}/glusterfsd.service
+%else
+%if ( 0%{_for_fedora_koji_builds} )
+%global glusterfsd_service %{S:%{SOURCE13}}
+%endif
+Requires(post): /sbin/chkconfig
+Requires(preun): /sbin/service
+Requires(preun): /sbin/chkconfig
Requires(postun): /sbin/service
+%define _init_enable() /sbin/chkconfig --add %1 ;
+%define _init_disable() /sbin/chkconfig --del %1 ;
+%define _init_restart() /sbin/service %1 condrestart &>/dev/null ;
+%define _init_stop() /sbin/service %1 stop &>/dev/null ;
+%define _init_install() %{__install} -D -p -m 0755 %1 %{buildroot}%{_sysconfdir}/init.d/%2 ;
+# can't seem to make a generic macro that works
+%define _init_glusterd %{_sysconfdir}/init.d/glusterd
+%define _init_glusterfsd %{_sysconfdir}/init.d/glusterfsd
+%endif
+
+Requires: %{name}-libs = %{version}-%{release}
+BuildRequires: bison flex
+BuildRequires: gcc make automake libtool
+BuildRequires: ncurses-devel readline-devel
+BuildRequires: libxml2-devel openssl-devel
+BuildRequires: libaio-devel
+BuildRequires: python-devel
+BuildRequires: python-ctypes
+%if ( 0%{!?_without_systemtap:1} )
+BuildRequires: systemtap-sdt-devel
+%endif
+%if ( 0%{!?_without_bd:1} )
+BuildRequires: lvm2-devel
+%endif
+%if ( 0%{!?_without_qemu_block:1} )
+BuildRequires: glib2-devel
+%endif
-%{!?_without_ibverbs:BuildRequires: libibverbs-devel}
-%{!?_without_bdb:BuildRequires: db4-devel}
-%{!?_without_client:BuildRequires: fuse-devel}
+Obsoletes: hekafs <= 0.7
+Obsoletes: %{name}-libs <= 2.0.0
+Obsoletes: %{name}-common < %{version}-%{release}
+Obsoletes: %{name}-core < %{version}-%{release}
+Provides: %{name}-libs = %{version}-%{release}
+Provides: %{name}-common = %{version}-%{release}
+Provides: %{name}-core = %{version}-%{release}
-BuildRequires: bison flex
-BuildRequires: gcc make
+# We do not want to generate useless provides and requires for xlator .so files
+# Filter all generated:
+#
+# TODO: RHEL5 does not have a convenient solution
+%if ( 0%{?rhel} == 6 )
+ # filter_setup exists in RHEL6 only
+ %filter_provides_in %{_libdir}/glusterfs/%{version}/
+ %global __filter_from_req %{?__filter_from_req} | %{__grep} -v -P '^(?!lib).*\.so.*$'
+ %filter_setup
+%else
+ # modern rpm and current Fedora do not generate requires if the provides
+ # are filtered
+ %global __provides_exclude_from ^%{_libdir}/glusterfs/%{version}/.*$
+%endif
+
+%if ( 0%{?rhel} && 0%{?rhel} < 6 )
+ # _sharedstatedir is not provided by RHEL5
+ %define _sharedstatedir /var/lib
+%endif
%description
GlusterFS is a clustered file-system capable of scaling to several
-peta-bytes. It aggregates various storage bricks over Infiniband RDMA
+petabytes. It aggregates various storage bricks over Infiniband RDMA
or TCP/IP interconnect into one large parallel network file
-system. GlusterFS is one of the most sophisticated file system in
+system. GlusterFS is one of the most sophisticated file systems in
terms of features and extensibility. It borrows a powerful concept
called Translators from GNU Hurd kernel. Much of the code in GlusterFS
-is in userspace and easily manageable.
+is in user space and easily manageable.
-%package common
-Summary: GlusterFS common files for both the client and the server
-Group: System Environment/Libraries
-Obsoletes: glusterfs-libs <= 2.0.0
-Provides: glusterfs-libs = %{version}-%{release}
+This package includes the glusterfs binary, the glusterfsd daemon and the
+gluster command line, libglusterfs and glusterfs translator modules common to
+both GlusterFS server and client framework.
+
+%package libs
+Summary: GlusterFS common libraries
+Group: Applications/File
+%if ( 0%{!?_without_syslog:1} )
+%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} > 6 )
+Requires: rsyslog-mmjsonparse
+%endif
+%if ( 0%{?rhel} && 0%{?rhel} == 6 )
+Requires: rsyslog-mmcount
+%endif
+%endif
-%description common
+%description libs
GlusterFS is a clustered file-system capable of scaling to several
-peta-bytes. It aggregates various storage bricks over Infiniband RDMA
+petabytes. It aggregates various storage bricks over Infiniband RDMA
or TCP/IP interconnect into one large parallel network file
-system. GlusterFS is one of the most sophisticated file system in
+system. GlusterFS is one of the most sophisticated file systems in
terms of features and extensibility. It borrows a powerful concept
called Translators from GNU Hurd kernel. Much of the code in GlusterFS
-is in userspace and easily manageable.
+is in user space and easily manageable.
+
+This package provides the base GlusterFS libraries
+
+%package cli
+Summary: GlusterFS CLI
+Group: Applications/File
+Requires: %{name}-libs = %{version}-%{release}
+
+%description cli
+GlusterFS is a clustered file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over Infiniband RDMA
+or TCP/IP interconnect into one large parallel network file
+system. GlusterFS is one of the most sophisticated file systems in
+terms of features and extensibility. It borrows a powerful concept
+called Translators from GNU Hurd kernel. Much of the code in GlusterFS
+is in user space and easily manageable.
+
+This package provides the GlusterFS CLI application and its man page
+
+%if ( 0%{!?_without_rdma:1} )
+%package rdma
+Summary: GlusterFS rdma support for ib-verbs
+Group: Applications/File
+BuildRequires: libibverbs-devel
+BuildRequires: librdmacm-devel
+Requires: %{name} = %{version}-%{release}
+
+%description rdma
+GlusterFS is a clustered file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over Infiniband RDMA
+or TCP/IP interconnect into one large parallel network file
+system. GlusterFS is one of the most sophisticated file systems in
+terms of features and extensibility. It borrows a powerful concept
+called Translators from GNU Hurd kernel. Much of the code in GlusterFS
+is in user space and easily manageable.
-This package includes the glusterfs binary, libglusterfs and glusterfs
-translator modules common to both GlusterFS server and client framework.
+This package provides support to ib-verbs library.
+%endif
-%package client
-Summary: GlusterFS Client
-Group: Applications/File
-Requires: fuse
-Requires: %{name}-common = %{version}-%{release}
+%if ( 0%{!?_without_georeplication:1} )
+%package geo-replication
+Summary: GlusterFS Geo-replication
+Group: Applications/File
+Requires: %{name} = %{version}-%{release}
+Requires: %{name}-server = %{version}-%{release}
-%description client
+%description geo-replication
GlusterFS is a clustered file-system capable of scaling to several
peta-bytes. It aggregates various storage bricks over Infiniband RDMA
or TCP/IP interconnect into one large parallel network file
@@ -87,129 +274,900 @@ terms of features and extensibility. It borrows a powerful concept
called Translators from GNU Hurd kernel. Much of the code in GlusterFS
is in userspace and easily manageable.
-This package provides the FUSE based GlusterFS client.
+This package provides support to geo-replication.
+%endif
+
+%package fuse
+Summary: Fuse client
+Group: Applications/File
+BuildRequires: fuse-devel
+Requires: %{name} = %{version}-%{release}
+
+Obsoletes: %{name}-client < %{version}-%{release}
+Provides: %{name}-client = %{version}-%{release}
+
+%description fuse
+GlusterFS is a clustered file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over Infiniband RDMA
+or TCP/IP interconnect into one large parallel network file
+system. GlusterFS is one of the most sophisticated file systems in
+terms of features and extensibility. It borrows a powerful concept
+called Translators from GNU Hurd kernel. Much of the code in GlusterFS
+is in user space and easily manageable.
+
+This package provides support to FUSE based clients.
%package server
-Summary: GlusterFS Server
-Group: System Environment/Daemons
-Requires: %{name}-common = %{version}-%{release}
+Summary: Clustered file-system server
+Group: System Environment/Daemons
+Requires: %{name} = %{version}-%{release}
+Requires: %{name}-cli = %{version}-%{release}
+Requires: %{name}-libs = %{version}-%{release}
+Requires: %{name}-fuse = %{version}-%{release}
+%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} >= 6 )
+Requires: rpcbind
+%else
+Requires: portmap
+%endif
%description server
GlusterFS is a clustered file-system capable of scaling to several
-peta-bytes. It aggregates various storage bricks over Infiniband RDMA
+petabytes. It aggregates various storage bricks over Infiniband RDMA
or TCP/IP interconnect into one large parallel network file
-system. GlusterFS is one of the most sophisticated file system in
+system. GlusterFS is one of the most sophisticated file systems in
terms of features and extensibility. It borrows a powerful concept
called Translators from GNU Hurd kernel. Much of the code in GlusterFS
-is in userspace and easily manageable.
+is in user space and easily manageable.
This package provides the glusterfs server daemon.
+%package api
+Summary: Clustered file-system api library
+Group: System Environment/Daemons
+Requires: %{name} = %{version}-%{release}
+Requires: %{name}-devel = %{version}-%{release}
+# we provide the Python package/namespace 'gluster'
+Provides: python-gluster = %{version}-%{release}
+
+%description api
+GlusterFS is a clustered file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over Infiniband RDMA
+or TCP/IP interconnect into one large parallel network file
+system. GlusterFS is one of the most sophisticated file systems in
+terms of features and extensibility. It borrows a powerful concept
+called Translators from GNU Hurd kernel. Much of the code in GlusterFS
+is in user space and easily manageable.
+
+This package provides the glusterfs libgfapi library.
+
+%if ( 0%{!?_without_ocf:1} )
+%package resource-agents
+Summary: OCF Resource Agents for GlusterFS
+License: GPLv3+
+%if ( ! ( 0%{?rhel} && 0%{?rhel} < 6 ) )
+# EL5 does not support noarch sub-packages
+BuildArch: noarch
+%endif
+# this Group handling comes from the Fedora resource-agents package
+%if ( 0%{?fedora} || 0%{?centos_version} || 0%{?rhel} )
+Group: System Environment/Base
+%else
+Group: Productivity/Clustering/HA
+%endif
+# for glusterd
+Requires: glusterfs-server
+# depending on the distribution, we need pacemaker or resource-agents
+Requires: %{_prefix}/lib/ocf/resource.d
+
+%description resource-agents
+GlusterFS is a clustered file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over Infiniband RDMA
+or TCP/IP interconnect into one large parallel network file
+system. GlusterFS is one of the most sophisticated file systems in
+terms of features and extensibility. It borrows a powerful concept
+called Translators from GNU Hurd kernel. Much of the code in GlusterFS
+is in user space and easily manageable.
+
+This package provides the resource agents which plug glusterd into
+Open Cluster Framework (OCF) compliant cluster resource managers,
+like Pacemaker.
+%endif
+
%package devel
-Summary: GlusterFS Development Libraries
-Group: Development/Libraries
-Requires: %{name}-common = %{version}-%{release}
+Summary: Development Libraries
+Group: Development/Libraries
+Requires: %{name} = %{version}-%{release}
%description devel
GlusterFS is a clustered file-system capable of scaling to several
-peta-bytes. It aggregates various storage bricks over Infiniband RDMA
+petabytes. It aggregates various storage bricks over Infiniband RDMA
or TCP/IP interconnect into one large parallel network file
-system. GlusterFS is one of the most sophisticated file system in
+system. GlusterFS is one of the most sophisticated file systems in
terms of features and extensibility. It borrows a powerful concept
called Translators from GNU Hurd kernel. Much of the code in GlusterFS
-is in userspace and easily manageable.
+is in user space and easily manageable.
+
+This package provides the development libraries and include files.
+
+%package api-devel
+Summary: Development Libraries
+Group: Development/Libraries
+Requires: %{name} = %{version}-%{release}
+Requires: %{name}-devel = %{version}-%{release}
+
+%description api-devel
+GlusterFS is a clustered file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over Infiniband RDMA
+or TCP/IP interconnect into one large parallel network file
+system. GlusterFS is one of the most sophisticated file systems in
+terms of features and extensibility. It borrows a powerful concept
+called Translators from GNU Hurd kernel. Much of the code in GlusterFS
+is in user space and easily manageable.
-This package provides the development libraries.
+This package provides the api include files.
+%package regression-tests
+Summary: Development Tools
+Group: Development/Tools
+Requires: %{name} = %{version}-%{release}
+Requires: %{name}-fuse = %{version}-%{release}
+Requires: %{name}-server = %{version}-%{release}
+Requires: perl(App::Prove) perl(Test::Harness) gcc util-linux-ng lvm2
+Requires: python attr dbench git nfs-utils xfsprogs
+
+%description regression-tests
+The Gluster Test Framework, is a suite of scripts used for
+regression testing of Gluster.
%prep
-%setup -q -n %{name}-%{version}
+%setup -q -n %{name}-%{version}%{?prereltag}
+%if ( 0%{_for_fedora_koji_builds} )
+#%patch0 -p0
+%patch1 -p0 -F4
+%if ( "%{version}" == "3.3.1" )
+%patch2 -p1
+%endif
+%endif
%build
+./autogen.sh
+%configure \
+ %{?_without_rdma} \
+ %{?_without_epoll} \
+ %{?_without_fusermount} \
+ %{?_without_georeplication} \
+ %{?_without_ocf} \
+ %{?_without_syslog} \
+ %{?_without_bd} \
+ %{?_without_qemu_block} \
+ %{?_without_systemtap}
-%configure %{?_without_ibverbs} %{?_without_bdb} %{?_without_libglfsclient} %{?_without_client} %{?_without_epoll}
+# fix hardening and remove rpath in shlibs
+%if ( 0%{?fedora} && 0%{?fedora} > 17 ) || ( 0%{?rhel} && 0%{?rhel} > 6 )
+%{__sed} -i 's| \\\$compiler_flags |&\\\$LDFLAGS |' libtool
+%endif
+%{__sed} -i 's|^hardcode_libdir_flag_spec=.*|hardcode_libdir_flag_spec=""|' libtool
+%{__sed} -i 's|^runpath_var=LD_RUN_PATH|runpath_var=DIE_RPATH_DIE|' libtool
-# Remove rpath
-sed -i 's|^hardcode_libdir_flag_spec=.*|hardcode_libdir_flag_spec=""|g' libtool
-sed -i 's|^runpath_var=LD_RUN_PATH|runpath_var=DIE_RPATH_DIE|g' libtool
%{__make} %{?_smp_mflags}
+pushd api/examples
+FLAGS="$RPM_OPT_FLAGS" %{__python} setup.py build
+popd
%install
-%{__rm} -rf %{buildroot}
+%{__rm} -rf %{buildroot}
%{__make} install DESTDIR=%{buildroot}
+# install the gfapi Python library in /usr/lib/python*/site-packages
+pushd api/examples
+%{__python} setup.py install --skip-build --verbose --root %{buildroot}
+popd
+# Install include directory
%{__mkdir_p} %{buildroot}%{_includedir}/glusterfs
-%{__mkdir_p} %{buildroot}/var/log/glusterfs
%{__install} -p -m 0644 libglusterfs/src/*.h \
%{buildroot}%{_includedir}/glusterfs/
+%{__install} -p -m 0644 contrib/uuid/*.h \
+ %{buildroot}%{_includedir}/glusterfs/
+# Following needed by hekafs multi-tenant translator
+%{__mkdir_p} %{buildroot}%{_includedir}/glusterfs/rpc
+%{__install} -p -m 0644 rpc/rpc-lib/src/*.h \
+ %{buildroot}%{_includedir}/glusterfs/rpc/
+%{__install} -p -m 0644 rpc/xdr/src/*.h \
+ %{buildroot}%{_includedir}/glusterfs/rpc/
+%{__mkdir_p} %{buildroot}%{_includedir}/glusterfs/server
+%{__install} -p -m 0644 xlators/protocol/server/src/*.h \
+ %{buildroot}%{_includedir}/glusterfs/server/
+%if ( 0%{_for_fedora_koji_builds} )
+%{__install} -D -p -m 0644 %{SOURCE1} \
+ %{buildroot}%{_sysconfdir}/sysconfig/glusterd
+%{__install} -D -p -m 0644 %{SOURCE2} \
+ %{buildroot}%{_sysconfdir}/sysconfig/glusterfsd
+%else
+%{__install} -D -p -m 0644 extras/glusterd-sysconfig \
+ %{buildroot}%{_sysconfdir}/sysconfig/glusterd
+%endif
+
+%if ( 0%{_for_fedora_koji_builds} )
+%if ( 0%{?rhel} && 0%{?rhel} <= 5 )
+%{__install} -D -p -m 0755 %{SOURCE6} \
+ %{buildroot}%{_sysconfdir}/sysconfig/modules/glusterfs-fuse.modules
+%endif
+%endif
+
+%{__mkdir_p} %{buildroot}%{_localstatedir}/log/glusterd
+%{__mkdir_p} %{buildroot}%{_localstatedir}/log/glusterfs
+%{__mkdir_p} %{buildroot}%{_localstatedir}/log/glusterfsd
+%{__mkdir_p} %{buildroot}%{_localstatedir}/run/gluster
# Remove unwanted files from all the shared libraries
-find %{buildroot}%{_libdir}/glusterfs -name '*.la' | xargs rm -f
+find %{buildroot}%{_libdir} -name '*.a' -delete
+find %{buildroot}%{_libdir} -name '*.la' -delete
+
+# Remove installed docs, they're included by %%doc
+%{__rm} -rf %{buildroot}%{_datadir}/doc/glusterfs/
+head -50 ChangeLog > ChangeLog.head && mv ChangeLog.head ChangeLog
+cat << EOM >> ChangeLog
+
+More commit messages for this ChangeLog can be found at
+https://forge.gluster.org/glusterfs-core/glusterfs/commits/v%{version}%{?prereltag}
+EOM
+
+# Remove benchmarking and other unpackaged files
+%{__rm} -rf %{buildroot}/benchmarking
+%{__rm} -f %{buildroot}/glusterfs-mode.el
+%{__rm} -f %{buildroot}/glusterfs.vim
+
+# Create working directory
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd
+
+# Update configuration file to /var/lib working directory
+sed -i 's|option working-directory /etc/glusterd|option working-directory %{_sharedstatedir}/glusterd|g' \
+ %{buildroot}%{_sysconfdir}/glusterfs/glusterd.vol
+
+# Install glusterfsd .service or init.d file
+%if ( 0%{_for_fedora_koji_builds} )
+%_init_install %{glusterfsd_service} glusterfsd
+%endif
+
+%if ( 0%{_for_fedora_koji_builds} )
+# Client logrotate entry
+%{__install} -D -p -m 0644 %{SOURCE3} \
+ %{buildroot}%{_sysconfdir}/logrotate.d/glusterfs-fuse
+
+# Server logrotate entry
+%{__install} -D -p -m 0644 %{SOURCE4} \
+ %{buildroot}%{_sysconfdir}/logrotate.d/glusterd
+# Legacy server logrotate entry
+%{__install} -D -p -m 0644 %{SOURCE5} \
+ %{buildroot}%{_sysconfdir}/logrotate.d/glusterfsd
+%else
+%{__install} -D -p -m 0644 extras/glusterfs-logrotate \
+ %{buildroot}%{_sysconfdir}/logrotate.d/glusterfs
+%endif
+
+%if ( 0%{!?_without_georeplication:1} )
+# geo-rep ghosts
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/geo-replication
+touch %{buildroot}%{_sharedstatedir}/glusterd/geo-replication/gsyncd_template.conf
+%{__install} -D -p -m 0644 extras/glusterfs-georep-logrotate \
+ %{buildroot}%{_sysconfdir}/logrotate.d/glusterfs-georep
+%endif
+
+%if ( 0%{!?_without_syslog:1} )
+%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} > 6 )
+%{__install} -D -p -m 0644 extras/gluster-rsyslog-7.2.conf \
+ %{buildroot}%{_sysconfdir}/rsyslog.d/gluster.conf.example
+%endif
+
+%if ( 0%{?rhel} && 0%{?rhel} == 6 )
+%{__install} -D -p -m 0644 extras/gluster-rsyslog-5.8.conf \
+ %{buildroot}%{_sysconfdir}/rsyslog.d/gluster.conf.example
+%endif
+
+%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} >= 6 )
+%{__install} -D -p -m 0644 extras/logger.conf.example \
+ %{buildroot}%{_sysconfdir}/glusterfs/logger.conf.example
+%endif
+%endif
+
+# the rest of the ghosts
+touch %{buildroot}%{_sharedstatedir}/glusterd/glusterd.info
+touch %{buildroot}%{_sharedstatedir}/glusterd/options
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/stop
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/stop/post
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/stop/pre
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/start
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/start/post
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/start/pre
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/remove-brick
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/remove-brick/post
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/remove-brick/pre
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/add-brick
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/add-brick/post
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/add-brick/pre
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/set
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/set/post
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/set/pre
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/create
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/create/post
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/create/pre
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/delete
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/delete/post
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/delete/pre
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/copy-file
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/copy-file/post
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/copy-file/pre
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/gsync-create
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/gsync-create/post
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/gsync-create/pre
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/glustershd
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/peers
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/vols
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/groups
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/nfs/run
+touch %{buildroot}%{_sharedstatedir}/glusterd/nfs/nfs-server.vol
+touch %{buildroot}%{_sharedstatedir}/glusterd/nfs/run/nfs.pid
+
+find ./tests ./run-tests.sh -type f | cpio -pd %{buildroot}%{_prefix}/share/glusterfs
%clean
%{__rm} -rf %{buildroot}
-%post common
-/sbin/ldconfig -n %{_libdir}
+%post
+/sbin/ldconfig
+%if ( 0%{!?_without_syslog:1} )
+%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} >= 6 )
+%_init_restart rsyslog
+%endif
+%endif
+
+%postun
+/sbin/ldconfig
+%if ( 0%{!?_without_syslog:1} )
+%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} >= 6 )
+%_init_restart rsyslog
+%endif
+%endif
+
+%files
+%defattr(-,root,root,-)
+%doc ChangeLog COPYING-GPLV2 COPYING-LGPLV3 INSTALL README THANKS
+%config(noreplace) %{_sysconfdir}/logrotate.d/*
+%config(noreplace) %{_sysconfdir}/sysconfig/*
+%if ( 0%{!?_without_syslog:1} )
+%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} >= 6 )
+%{_sysconfdir}/rsyslog.d/gluster.conf.example
+%endif
+%endif
+%{_libdir}/glusterfs
+%{_sbindir}/glusterfs*
+%{_mandir}/man8/*gluster*.8*
+%exclude %{_mandir}/man8/gluster.8*
+%dir %{_localstatedir}/log/glusterfs
+%dir %{_localstatedir}/run/gluster
+%dir %{_sharedstatedir}/glusterd
+%if ( 0%{!?_without_rdma:1} )
+%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/rpc-transport/rdma*
+%endif
+# server-side, etc., xlators in other RPMs
+%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mount/api*
+%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mount/fuse*
+%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/storage*
+%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/posix*
+%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/protocol/server*
+%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mgmt*
+%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/nfs*
+# sample xlators not generally used or usable
+%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/encryption/rot-13*
+%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/mac-compat*
+%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/testing/performance/symlink-cache*
-%postun common
-/sbin/ldconfig -n %{_libdir}
+%post libs
+/sbin/ldconfig
-%post server
-/sbin/chkconfig --add glusterfsd
+%postun libs
+/sbin/ldconfig
-%preun server
-if [ $1 -eq 0 ]; then
- /sbin/service glusterfsd stop &>/dev/null || :
- /sbin/chkconfig --del glusterfsd
+%files libs
+%{_libdir}/*.so.*
+%exclude %{_libdir}/libgfapi.*
+
+%files cli
+%{_sbindir}/gluster
+%{_mandir}/man8/gluster.8*
+
+%if ( 0%{!?_without_rdma:1} )
+%files rdma
+%defattr(-,root,root,-)
+%{_libdir}/glusterfs/%{version}%{?prereltag}/rpc-transport/rdma*
+%endif
+
+%if ( 0%{!?_without_georeplication:1} )
+%post geo-replication
+#restart glusterd.
+if [ $1 -ge 1 ]; then
+ %_init_restart glusterd
fi
-%files common
+%files geo-replication
%defattr(-,root,root)
-%doc AUTHORS ChangeLog COPYING INSTALL NEWS README
-%doc /usr/share/doc/glusterfs
-%if 0%{!?_without_client:1}
-%exclude %{_libdir}/glusterfs/%{version}/xlator/mount
+%{_libexecdir}/glusterfs/gsyncd
+%{_libexecdir}/glusterfs/python/syncdaemon/*
+%{_libexecdir}/glusterfs/gverify.sh
+%{_libexecdir}/glusterfs/peer_add_secret_pub
+%{_libexecdir}/glusterfs/peer_gsec_create
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/geo-replication
+%ghost %attr(0644,-,-) %{_sharedstatedir}/glusterd/geo-replication/gsyncd_template.conf
%endif
-%{_libdir}/glusterfs
-%{_libdir}/*.so.*
-%{_sbindir}/glusterfs*
-%{_mandir}/man8/glusterfs.8*
-%dir /var/log/glusterfs
-%if 0%{!?_without_client:1}
-%files client
-%defattr(-,root,root)
+%files fuse
+%defattr(-,root,root,-)
+%if ( 0%{_for_fedora_koji_builds} )
+%config(noreplace) %{_sysconfdir}/logrotate.d/glusterfs-fuse
+%endif
+%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mount/fuse*
/sbin/mount.glusterfs
-%{_libdir}/glusterfs/%{version}/xlator/mount
-%endif
+%if ( 0%{!?_without_fusermount:1} )
+%{_bindir}/fusermount-glusterfs
+%endif
+%if ( 0%{_for_fedora_koji_builds} )
+%if ( 0%{?rhel} && 0%{?rhel} <= 5 )
+%{_sysconfdir}/sysconfig/modules/glusterfs-fuse.modules
+%endif
+%endif
%files server
+%defattr(-,root,root,-)
+%doc extras/clear_xattrs.sh
+%if ( 0%{_for_fedora_koji_builds} )
+%config(noreplace) %{_sysconfdir}/logrotate.d/glusterd
+%endif
+%config(noreplace) %{_sysconfdir}/sysconfig/glusterd
+%config(noreplace) %{_sysconfdir}/glusterfs
+# Legacy configs
+%if ( 0%{_for_fedora_koji_builds} )
+%config(noreplace) %{_sysconfdir}/logrotate.d/glusterfsd
+%config(noreplace) %{_sysconfdir}/sysconfig/glusterfsd
+%endif
+# init files
+%_init_glusterd
+%if ( 0%{_for_fedora_koji_builds} )
+%_init_glusterfsd
+%endif
+# binaries
+%{_sbindir}/glusterd
+%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/storage*
+%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/posix*
+%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/protocol/server*
+%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mgmt*
+%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/nfs*
+%ghost %attr(0644,-,-) %config(noreplace) %{_sharedstatedir}/glusterd/glusterd.info
+%ghost %attr(0600,-,-) %{_sharedstatedir}/glusterd/options
+# This is really ugly, but I have no idea how to mark these directories in an
+# other way. They should belong to the glusterfs-server package, but don't
+# exist after installation. They are generated on the first start...
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/stop
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/stop/post
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/stop/pre
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start/post
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start/pre
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick/post
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick/pre
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/pre
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/set
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/set/post
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/set/pre
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/post
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/pre
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/post
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/pre
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/glustershd
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/vols
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/peers
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/groups
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/nfs
+%ghost %attr(0600,-,-) %{_sharedstatedir}/glusterd/nfs/nfs-server.vol
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/nfs/run
+%ghost %attr(0600,-,-) %{_sharedstatedir}/glusterd/nfs/run/nfs.pid
+
+%post api
+/sbin/ldconfig
+
+%postun api
+/sbin/ldconfig
+
+%files api
+%exclude %{_libdir}/*.so
+%{_libdir}/libgfapi.*
+%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mount/api*
+%{python_sitelib}/*
+
+%if ( 0%{!?_without_ocf:1} )
+%files resource-agents
%defattr(-,root,root)
-%config %{_sysconfdir}/glusterfs
-%{_sysconfdir}/init.d/glusterfsd
+# /usr/lib is the standard for OCF, also on x86_64
+%{_prefix}/lib/ocf/resource.d/glusterfs
+%endif
%files devel
%defattr(-,root,root,-)
%{_includedir}/glusterfs
-%{_includedir}/libglusterfsclient.h
%exclude %{_includedir}/glusterfs/y.tab.h
-%{_libdir}/*.a
-%exclude %{_libdir}/*.la
+%exclude %{_includedir}/glusterfs/api
+%exclude %{_libdir}/libgfapi.so
%{_libdir}/*.so
+%files api-devel
+%{_libdir}/pkgconfig/glusterfs-api.pc
+%{_libdir}/pkgconfig/libgfchangelog.pc
+%{_libdir}/libgfapi.so
+%{_includedir}/glusterfs/api/*
+
+%files regression-tests
+%defattr(-,root,root,-)
+%{_prefix}/share/glusterfs/*
+%exclude %{_prefix}/share/glusterfs/tests/basic/rpm.t
+
+%post server
+# Legacy server
+%_init_enable glusterd
+%_init_enable glusterfsd
+
+# Genuine Fedora (and EPEL) builds never put gluster files in /etc; if
+# there are any files in /etc from a prior gluster.org install, move them
+# to /var/lib. (N.B. Starting with 3.3.0 all gluster files are in /var/lib
+# in gluster.org RPMs.) Be careful to copy them on the off chance that
+# /etc and /var/lib are on separate file systems
+if [ -d /etc/glusterd -a ! -h %{_sharedstatedir}/glusterd ]; then
+ %{__mkdir_p} %{_sharedstatedir}/glusterd
+ cp -a /etc/glusterd %{_sharedstatedir}/glusterd
+ rm -rf /etc/glusterd
+ ln -sf %{_sharedstatedir}/glusterd /etc/glusterd
+fi
+
+# Rename old volfiles in an RPM-standard way. These aren't actually
+# considered package config files, so %config doesn't work for them.
+if [ -d %{_sharedstatedir}/glusterd/vols ]; then
+ for file in $(find %{_sharedstatedir}/glusterd/vols -name '*.vol'); do
+ newfile=${file}.rpmsave
+ echo "warning: ${file} saved as ${newfile}"
+ cp ${file} ${newfile}
+ done
+fi
+
+# add marker translator
+# but first make certain that there are no old libs around to bite us
+# BZ 834847
+if [ -e /etc/ld.so.conf.d/glusterfs.conf ]; then
+ rm -f /etc/ld.so.conf.d/glusterfs.conf
+ /sbin/ldconfig
+fi
+pidof -c -o %PPID -x glusterd &> /dev/null
+if [ $? -eq 0 ]; then
+ kill -9 `pgrep -f gsyncd.py` &> /dev/null
+
+ killall glusterd &> /dev/null
+ glusterd --xlator-option *.upgrade=on -N
+else
+ glusterd --xlator-option *.upgrade=on -N
+fi
+
+%preun server
+if [ $1 -eq 0 ]; then
+ if [ -f %_init_glusterfsd ]; then
+ %_init_stop glusterfsd
+ fi
+ %_init_stop glusterd
+ if [ -f %_init_glusterfsd ]; then
+ %_init_disable glusterfsd
+ fi
+ %_init_disable glusterd
+fi
+if [ $1 -ge 1 ]; then
+ if [ -f %_init_glusterfsd ]; then
+ %_init_restart glusterfsd
+ fi
+ %_init_restart glusterd
+fi
+
%changelog
-* Wed Jul 01 2009 Harshavardhana <harsha@gluster.com> - 2.1
-- Removed mod_glusterfs.so and added new --without epoll build
- option.
+* Wed Oct 11 2013 Harshavardhana <fharshav@redhat.com>
+- Add '_sharedstatedir' macro to `/var/lib` on <= RHEL5 (#1003184)
+
+* Wed Oct 9 2013 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- Sync with Fedora glusterfs.spec 3.4.1-2+
+
+* Wed Oct 9 2013 Niels de Vos <ndevos@redhat.com>
+- glusterfs-api-devel requires glusterfs-devel (#1016938, #1017094)
+
+* Mon Sep 30 2013 Niels de Vos <ndevos@redhat.com>
+- Package gfapi.py into the Python site-packages path (#1005146)
+
+* Tue Sep 17 2013 Harshavardhana <fharshav@redhat.com>
+- Provide a new package called "glusterfs-regression-tests" for standalone
+ regression testing.
+
+* Thu Aug 22 2013 Niels de Vos <ndevos@redhat.com>
+- Correct the day/date for some entries in this changelog (#1000019)
+
+* Wed Aug 7 2013 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- Sync with Fedora glusterfs.spec
+- add Requires
+- add -cli subpackage,
+- fix other minor differences with Fedora glusterfs.spec
+
+* Tue Jul 30 2013 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- Sync with Fedora glusterfs.spec, add glusterfs-libs RPM for oVirt/qemu-kvm
+
+* Thu Jul 25 2013 Csaba Henk <csaba@redhat.com>
+- Added peer_add_secret_pub and peer_gsec_create to %{_libexecdir}/glusterfs
+
+* Thu Jul 25 2013 Aravinda VK <avishwan@redhat.com>
+- Added gverify.sh to %{_libexecdir}/glusterfs directory.
+
+* Thu Jul 25 2013 Harshavardhana <fharshav@redhat.com>
+- Allow to build with '--without bd' to disable 'bd' xlator
+
+* Thu Jun 27 2013 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- fix the hardening fix for shlibs, use %%{__sed} macro, shorter ChangeLog
+
+* Wed Jun 26 2013 Niels de Vos <ndevos@redhat.com>
+- move the mount/api xlator to glusterfs-api
+
+* Fri Jun 7 2013 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- Sync with Fedora glusterfs.spec, remove G4S/UFO and Swift
+
+* Mon Mar 4 2013 Niels de Vos <ndevos@redhat.com>
+- Package /var/run/gluster so that statedumps can be created
+
+* Wed Feb 6 2013 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- Sync with Fedora glusterfs.spec
+
+* Tue Dec 11 2012 Filip Pytloun <filip.pytloun@gooddata.com>
+- add sysconfig file
+
+* Thu Oct 25 2012 Niels de Vos <ndevos@redhat.com>
+- Add a sub-package for the OCF resource agents
+
+* Wed Sep 05 2012 Niels de Vos <ndevos@redhat.com>
+- Don't use python-ctypes on SLES (from Jörg Petersen)
+
+* Tue Jul 10 2012 Niels de Vos <ndevos@redhat.com>
+- Include extras/clear_xattrs.sh in the glusterfs-server sub-package
+
+* Thu Jun 07 2012 Niels de Vos <ndevos@redhat.com>
+- Mark /var/lib/glusterd as owned by glusterfs, subdirs belong to -server
+
+* Wed May 9 2012 Kaleb S. KEITHLEY <kkeithle[at]redhat.com>
+- Add BuildRequires: libxml2-devel so that configure will DTRT on for
+- Fedora's Koji build system
+
+* Wed Nov 9 2011 Joe Julian <me@joejulian.name> - git master
+- Merge fedora specfile into gluster's spec.in.
+- Add conditionals to allow the same spec file to be used for both 3.1 and 3.2
+- http://bugs.gluster.com/show_bug.cgi?id=2970
+
+* Wed Oct 5 2011 Joe Julian <me@joejulian.name> - 3.2.4-1
+- Update to 3.2.4
+- Removed the $local_fs requirement from the init scripts as in RHEL/CentOS that's provided
+- by netfs, which needs to be started after glusterd.
+
+* Sun Sep 25 2011 Joe Julian <me@joejulian.name> - 3.2.3-2
+- Merged in upstream changes
+- Fixed version reporting 3.2git
+- Added nfs init script (disabled by default)
+
+* Thu Sep 1 2011 Joe Julian <me@joejulian.name> - 3.2.3-1
+- Update to 3.2.3
+
+* Tue Jul 19 2011 Joe Julian <me@joejulian.name> - 3.2.2-3
+- Add readline and libtermcap dependencies
+
+* Tue Jul 19 2011 Joe Julian <me@joejulian.name> - 3.2.2-2
+- Critical patch to prevent glusterd from walking outside of its own volume during rebalance
+
+* Thu Jul 14 2011 Joe Julian <me@joejulian.name> - 3.2.2-1
+- Update to 3.2.2
+
+* Wed Jul 13 2011 Joe Julian <me@joejulian.name> - 3.2.1-2
+- fix hardcoded path to gsyncd in source to match the actual file location
+
+* Tue Jun 21 2011 Joe Julian <me@joejulian.name> - 3.2.1
+- Update to 3.2.1
+
+* Mon Jun 20 2011 Joe Julian <me@joejulian.name> - 3.1.5
+- Update to 3.1.5
+
+* Tue May 31 2011 Joe Julian <me@joejulian.name> - 3.1.5-qa1.4
+- Current git
+
+* Sun May 29 2011 Joe Julian <me@joejulian.name> - 3.1.5-qa1.2
+- set _sharedstatedir to /var/lib for FHS compliance in RHEL5/CentOS5
+- mv /etc/glusterd, if it exists, to the new state dir for upgrading from gluster packaging
+
+* Sat May 28 2011 Joe Julian <me@joejulian.name> - 3.1.5-qa1.1
+- Update to 3.1.5-qa1
+- Add patch to remove optimization disabling
+- Add patch to remove forced 64 bit compile
+- Obsolete glusterfs-core to allow for upgrading from gluster packaging
+
+* Sat Mar 19 2011 Jonathan Steffan <jsteffan@fedoraproject.org> - 3.1.3-1
+- Update to 3.1.3
+- Merge in more upstream SPEC changes
+- Remove patches from GlusterFS bugzilla #2309 and #2311
+- Remove inode-gen.patch
+
+* Sun Feb 06 2011 Jonathan Steffan <jsteffan@fedoraproject.org> - 3.1.2-3
+- Add back in legacy SPEC elements to support older branches
+
+* Thu Feb 03 2011 Jonathan Steffan <jsteffan@fedoraproject.org> - 3.1.2-2
+- Add patches from CloudFS project
+
+* Tue Jan 25 2011 Jonathan Steffan <jsteffan@fedoraproject.org> - 3.1.2-1
+- Update to 3.1.2
+
+* Wed Jan 5 2011 Dan Horák <dan[at]danny.cz> - 3.1.1-3
+- no InfiniBand on s390(x)
+
+* Sat Jan 1 2011 Jonathan Steffan <jsteffan@fedoraproject.org> - 3.1.1-2
+- Update to support readline
+- Update to not parallel build
+
+* Mon Dec 27 2010 Silas Sewell <silas@sewell.ch> - 3.1.1-1
+- Update to 3.1.1
+- Change package names to mirror upstream
+
+* Mon Dec 20 2010 Jonathan Steffan <jsteffan@fedoraproject.org> - 3.0.7-1
+- Update to 3.0.7
+
+* Wed Jul 28 2010 Jonathan Steffan <jsteffan@fedoraproject.org> - 3.0.5-1
+- Update to 3.0.x
+
+* Sat Apr 10 2010 Jonathan Steffan <jsteffan@fedoraproject.org> - 2.0.9-2
+- Move python version requires into a proper BuildRequires otherwise
+ the spec always turned off python bindings as python is not part
+ of buildsys-build and the chroot will never have python unless we
+ require it
+- Temporarily set -D_FORTIFY_SOURCE=1 until upstream fixes code
+ GlusterFS Bugzilla #197 (#555728)
+- Move glusterfs-volgen to devel subpackage (#555724)
+- Update description (#554947)
+
+* Sat Jan 2 2010 Jonathan Steffan <jsteffan@fedoraproject.org> - 2.0.9-1
+- Update to 2.0.9
+
+* Sun Nov 8 2009 Jonathan Steffan <jsteffan@fedoraproject.org> - 2.0.8-1
+- Update to 2.0.8
+- Remove install of glusterfs-volgen, it's properly added to
+ automake upstream now
+
+* Sat Oct 31 2009 Jonathan Steffan <jsteffan@fedoraproject.org> - 2.0.7-1
+- Update to 2.0.7
+- Install glusterfs-volgen, until it's properly added to automake
+ by upstream
+- Add macro to be able to ship more docs
+
+* Thu Sep 17 2009 Peter Lemenkov <lemenkov@gmail.com> 2.0.6-2
+- Rebuilt with new fuse
+
+* Sat Sep 12 2009 Matthias Saou <http://freshrpms.net/> 2.0.6-1
+- Update to 2.0.6.
+- No longer default to disable the client on RHEL5 (#522192).
+- Update spec file URLs.
+
+* Mon Jul 27 2009 Matthias Saou <http://freshrpms.net/> 2.0.4-1
+- Update to 2.0.4.
+
+* Thu Jun 11 2009 Matthias Saou <http://freshrpms.net/> 2.0.1-2
+- Remove libglusterfs/src/y.tab.c to fix koji F11/devel builds.
+
+* Sat May 16 2009 Matthias Saou <http://freshrpms.net/> 2.0.1-1
+- Update to 2.0.1.
+
+* Thu May 7 2009 Matthias Saou <http://freshrpms.net/> 2.0.0-1
+- Update to 2.0.0 final.
+
+* Wed Apr 29 2009 Matthias Saou <http://freshrpms.net/> 2.0.0-0.3.rc8
+- Move glusterfsd to common, since the client has a symlink to it.
+
+* Fri Apr 24 2009 Matthias Saou <http://freshrpms.net/> 2.0.0-0.2.rc8
+- Update to 2.0.0rc8.
+
+* Sun Apr 12 2009 Matthias Saou <http://freshrpms.net/> 2.0.0-0.2.rc7
+- Update glusterfsd init script to the new style init.
+- Update files to match the new default vol file names.
+- Include logrotate for glusterfsd, use a pid file by default.
+- Include logrotate for glusterfs, using killall for lack of anything better.
+
+* Sat Apr 11 2009 Matthias Saou <http://freshrpms.net/> 2.0.0-0.1.rc7
+- Update to 2.0.0rc7.
+- Rename "libs" to "common" and move the binary, man page and log dir there.
+
+* Tue Feb 24 2009 Fedora Release Engineering <rel-eng@lists.fedoraproject.org>
+- Rebuilt for https://fedoraproject.org/wiki/Fedora_11_Mass_Rebuild
+
+* Mon Feb 16 2009 Matthias Saou <http://freshrpms.net/> 2.0.0-0.1.rc1
+- Update to 2.0.0rc1.
+- Include new libglusterfsclient.h.
+
+* Mon Feb 16 2009 Matthias Saou <http://freshrpms.net/> 1.3.12-1
+- Update to 1.3.12.
+- Remove no longer needed ocreat patch.
+
+* Thu Jul 17 2008 Matthias Saou <http://freshrpms.net/> 1.3.10-1
+- Update to 1.3.10.
+- Remove mount patch, it's been included upstream now.
+
+* Fri May 16 2008 Matthias Saou <http://freshrpms.net/> 1.3.9-1
+- Update to 1.3.9.
+
+* Fri May 9 2008 Matthias Saou <http://freshrpms.net/> 1.3.8-1
+- Update to 1.3.8 final.
+
+* Wed Apr 23 2008 Matthias Saou <http://freshrpms.net/> 1.3.8-0.10
+- Include short patch to include fixes from latest TLA 751.
+
+* Tue Apr 22 2008 Matthias Saou <http://freshrpms.net/> 1.3.8-0.9
+- Update to 1.3.8pre6.
+- Include glusterfs binary in both the client and server packages, now that
+ glusterfsd is a symlink to it instead of a separate binary.
+* Sun Feb 3 2008 Matthias Saou <http://freshrpms.net/> 1.3.8-0.8
+- Add python version check and disable bindings for version < 2.4.
+
+* Sun Feb 3 2008 Matthias Saou <http://freshrpms.net/> 1.3.8-0.7
+- Add --without client rpmbuild option, make it the default for RHEL (no fuse).
+ (I hope "rhel" is the proper default macro name, couldn't find it...)
+
+* Wed Jan 30 2008 Matthias Saou <http://freshrpms.net/> 1.3.8-0.6
+- Add --without ibverbs rpmbuild option to the package.
+
+* Mon Jan 14 2008 Matthias Saou <http://freshrpms.net/> 1.3.8-0.5
+- Update to current TLA again, patch-636 which fixes the known segfaults.
+
+* Thu Jan 10 2008 Matthias Saou <http://freshrpms.net/> 1.3.8-0.4
+- Downgrade to glusterfs--mainline--2.5--patch-628 which is more stable.
+
+* Tue Jan 8 2008 Matthias Saou <http://freshrpms.net/> 1.3.8-0.3
+- Update to current TLA snapshot.
+- Include umount.glusterfs wrapper script (really needed? dunno).
+- Include patch to mount wrapper to avoid multiple identical mounts.
+
+* Sun Dec 30 2007 Matthias Saou <http://freshrpms.net/> 1.3.8-0.1
+- Update to current TLA snapshot, which includes "volume-name=" fstab option.
+
+* Mon Dec 3 2007 Matthias Saou <http://freshrpms.net/> 1.3.7-6
+- Re-add the /var/log/glusterfs directory in the client sub-package (required).
+- Include custom patch to support vol= in fstab for -n glusterfs client option.
+
+* Mon Nov 26 2007 Matthias Saou <http://freshrpms.net/> 1.3.7-4
+- Re-enable libibverbs.
+- Check and update License field to GPLv3+.
+- Add glusterfs-common obsoletes, to provide upgrade path from old packages.
+- Include patch to add mode to O_CREATE opens.
-* Thu Apr 16 2009 Harshavardhana <harsha@gluster.com> - 2.0
-- Galore of updates including new packages added common,
- client,server splitting the original package. rpmbuild
- fully restructured to adhere to Fedora rpm standards.
- Older changelog removed as there were warnings when
- tried with 'rpmlint'.
+* Thu Nov 22 2007 Matthias Saou <http://freshrpms.net/> 1.3.7-3
+- Remove Makefile* files from examples.
+- Include RHEL/Fedora type init script, since the included ones don't do.
+* Wed Nov 21 2007 Matthias Saou <http://freshrpms.net/> 1.3.7-1
+- Major spec file cleanup.
+- Add missing %%clean section.
+- Fix ldconfig calls (weren't set for the proper sub-package).
+* Sat Aug 4 2007 Matt Paine <matt@mattsoftware.com> - 1.3.pre7
+- Added support to build rpm without ibverbs support (use --without ibverbs
+ switch)
+* Sun Jul 15 2007 Matt Paine <matt@mattsoftware.com> - 1.3.pre6
+- Initial spec file
diff --git a/glusterfsd/src/Makefile.am b/glusterfsd/src/Makefile.am
index 060917930..05a10dee3 100644
--- a/glusterfsd/src/Makefile.am
+++ b/glusterfsd/src/Makefile.am
@@ -1,13 +1,19 @@
sbin_PROGRAMS = glusterfsd
-glusterfsd_SOURCES = glusterfsd.c fetch-spec.c
-glusterfsd_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(GF_LDADD)
-glusterfsd_LDFLAGS = $(GF_LDFLAGS) $(GF_GLUSTERFS_LDFLAGS)
-noinst_HEADERS = glusterfsd.h
+glusterfsd_SOURCES = glusterfsd.c glusterfsd-mgmt.c
+glusterfsd_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
+ $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \
+ $(top_builddir)/rpc/xdr/src/libgfxdr.la \
+ $(GF_LDADD) $(GF_GLUSTERFS_LIBS)
+glusterfsd_LDFLAGS = $(GF_LDFLAGS)
+noinst_HEADERS = glusterfsd.h glusterfsd-mem-types.h
-AM_CFLAGS = -fPIC -Wall -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D$(GF_HOST_OS)\
+AM_CPPFLAGS = $(GF_CPPFLAGS) \
-I$(top_srcdir)/libglusterfs/src -DDATADIR=\"$(localstatedir)\" \
- -DCONFDIR=\"$(sysconfdir)/glusterfs\" $(GF_GLUSTERFS_CFLAGS)
+ -DCONFDIR=\"$(sysconfdir)/glusterfs\" \
+ -I$(top_srcdir)/rpc/rpc-lib/src -I$(top_srcdir)/rpc/xdr/src
+
+AM_CFLAGS = -Wall $(GF_GLUSTERFS_CFLAGS)
CLEANFILES =
@@ -16,9 +22,14 @@ $(top_builddir)/libglusterfs/src/libglusterfs.la:
uninstall-local:
rm -f $(DESTDIR)$(sbindir)/glusterfs
+ rm -f $(DESTDIR)$(sbindir)/glusterd
install-data-local:
$(INSTALL) -d -m 755 $(DESTDIR)$(localstatedir)/run
+ $(INSTALL) -d -m 755 $(DESTDIR)$(localstatedir)/run/gluster
$(INSTALL) -d -m 755 $(DESTDIR)$(localstatedir)/log/glusterfs
+ $(INSTALL) -d -m 755 $(DESTDIR)$(sbindir)
rm -f $(DESTDIR)$(sbindir)/glusterfs
+ rm -f $(DESTDIR)$(sbindir)/glusterd
ln -s glusterfsd $(DESTDIR)$(sbindir)/glusterfs
+ ln -s glusterfsd $(DESTDIR)$(sbindir)/glusterd
diff --git a/glusterfsd/src/fetch-spec.c b/glusterfsd/src/fetch-spec.c
deleted file mode 100644
index 3e9712d8d..000000000
--- a/glusterfsd/src/fetch-spec.c
+++ /dev/null
@@ -1,296 +0,0 @@
-/*
- Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include <stdio.h>
-#include <sys/types.h>
-#include <sys/wait.h>
-#include <stdlib.h>
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif /* _CONFIG_H */
-
-#include "glusterfs.h"
-#include "stack.h"
-#include "dict.h"
-#include "transport.h"
-#include "event.h"
-#include "defaults.h"
-
-
-static int
-fetch_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- char *spec_data)
-{
- FILE *spec_fp = NULL;
-
- spec_fp = frame->local;
-
- if (op_ret >= 0) {
- fwrite (spec_data, strlen (spec_data), 1, spec_fp);
- fflush (spec_fp);
- fclose (spec_fp);
- }
- else {
- gf_log (frame->this->name, GF_LOG_ERROR,
- "GETSPEC from server returned -1 (%s)",
- strerror (op_errno));
- }
-
- frame->local = NULL;
- STACK_DESTROY (frame->root);
-
- /* exit the child process */
- exit (op_ret);
-}
-
-
-static int
-fetch_notify (xlator_t *this_xl, int event, void *data, ...)
-{
- int ret = 0;
- call_frame_t *frame = NULL;
- static int failed_connects = 0;
-
- switch (event)
- {
- case GF_EVENT_CHILD_UP:
- frame = create_frame (this_xl, this_xl->ctx->pool);
- frame->local = this_xl->private;
-
- STACK_WIND (frame, fetch_cbk,
- this_xl->children->xlator,
- this_xl->children->xlator->mops->getspec,
- this_xl->ctx->cmd_args.volfile_id,
- 0);
- break;
- case GF_EVENT_CHILD_DOWN:
- failed_connects++;
- if (failed_connects
- >= this_xl->ctx->cmd_args.max_connect_attempts) {
- exit (1);
- }
- break;
- default:
- ret = default_notify (this_xl, event, data);
- break;
- }
-
- return ret;
-}
-
-
-static int
-fetch_init (xlator_t *xl)
-{
- return 0;
-}
-
-static xlator_t *
-get_shrub (glusterfs_ctx_t *ctx,
- const char *remote_host,
- const char *transport,
- uint32_t remote_port)
-{
- volume_opt_list_t *vol_opt = NULL;
- xlator_t *trav = NULL;
- int ret = 0;
- xlator_t *top = NULL;
- xlator_t *trans = NULL;
- xlator_list_t *parent = NULL, *tmp = NULL;
-
- top = CALLOC (1, sizeof (*top));
- ERR_ABORT (top);
- trans = CALLOC (1, sizeof (*trans));
- ERR_ABORT (trans);
-
- INIT_LIST_HEAD (&top->volume_options);
- INIT_LIST_HEAD (&trans->volume_options);
-
- top->name = "top";
- top->ctx = ctx;
- top->next = trans;
- top->init = fetch_init;
- top->notify = fetch_notify;
- top->children = (void *) CALLOC (1, sizeof (*top->children));
- ERR_ABORT (top->children);
- top->children->xlator = trans;
-
- trans->name = "trans";
- trans->ctx = ctx;
- trans->prev = top;
- trans->init = fetch_init;
- trans->notify = default_notify;
- trans->options = get_new_dict ();
-
- parent = CALLOC (1, sizeof(*parent));
- parent->xlator = top;
- if (trans->parents == NULL)
- trans->parents = parent;
- else {
- tmp = trans->parents;
- while (tmp->next)
- tmp = tmp->next;
- tmp->next = parent;
- }
-
- /* TODO: log on failure to set dict */
- if (remote_host) {
- ret = dict_set (trans->options, "remote-host",
- str_to_data ((char *)remote_host));
- }
-
- if (remote_port)
- ret = dict_set_uint32 (trans->options, "remote-port",
- remote_port);
-
- /* 'option remote-subvolume <x>' is needed here even though
- * its not used
- */
- ret = dict_set_static_ptr (trans->options, "remote-subvolume",
- "brick");
- ret = dict_set_static_ptr (trans->options, "disable-handshake", "on");
- ret = dict_set_static_ptr (trans->options, "non-blocking-io", "off");
-
- if (transport) {
- char *transport_type = CALLOC (1, strlen (transport) + 10);
- ERR_ABORT (transport_type);
- strcpy(transport_type, transport);
-
- if (strchr (transport_type, ':'))
- *(strchr (transport_type, ':')) = '\0';
-
- ret = dict_set_dynstr (trans->options, "transport-type",
- transport_type);
- }
-
- xlator_set_type (trans, "protocol/client");
-
- trav = top;
- while (trav) {
- /* Get the first volume_option */
- if (!list_empty (&trav->volume_options)) {
- list_for_each_entry (vol_opt,
- &trav->volume_options, list)
- break;
- if ((ret =
- validate_xlator_volume_options (trav,
- vol_opt->given_opt)) < 0) {
- gf_log (trav->name, GF_LOG_ERROR,
- "validating translator failed");
- return NULL;
- }
- }
- trav = trav->next;
- }
-
- if (xlator_tree_init (top) != 0)
- return NULL;
-
- return top;
-}
-
-
-static int
-_fetch (glusterfs_ctx_t *ctx,
- FILE *spec_fp,
- const char *remote_host,
- const char *transport,
- uint32_t remote_port)
-{
- xlator_t *this = NULL;
-
- this = get_shrub (ctx, remote_host, transport, remote_port);
- if (this == NULL)
- return -1;
-
- this->private = spec_fp;
-
- event_dispatch (ctx->event_pool);
-
- return 0;
-}
-
-
-static int
-_fork_and_fetch (glusterfs_ctx_t *ctx,
- FILE *spec_fp,
- const char *remote_host,
- const char *transport,
- uint32_t remote_port)
-{
- int ret;
-
- ret = fork ();
- switch (ret) {
- case -1:
- perror ("fork()");
- break;
- case 0:
- /* child */
- ret = _fetch (ctx, spec_fp, remote_host,
- transport, remote_port);
- if (ret == -1)
- exit (ret);
- default:
- /* parent */
- wait (&ret);
- ret = WEXITSTATUS (ret);
- }
- return ret;
-}
-
-FILE *
-fetch_spec (glusterfs_ctx_t *ctx)
-{
- char *remote_host = NULL;
- char *transport = NULL;
- FILE *spec_fp;
- int32_t ret;
-
- spec_fp = tmpfile ();
-
- if (!spec_fp) {
- perror ("tmpfile ()");
- return NULL;
- }
-
- remote_host = ctx->cmd_args.volfile_server;
- transport = ctx->cmd_args.volfile_server_transport;
- if (!transport)
- transport = "tcp";
-
- ret = _fork_and_fetch (ctx, spec_fp, remote_host, transport,
- ctx->cmd_args.volfile_server_port);
-
- if (!ret) {
- fseek (spec_fp, 0, SEEK_SET);
- }
- else {
- fclose (spec_fp);
- spec_fp = NULL;
- }
-
- return spec_fp;
-}
diff --git a/glusterfsd/src/glusterfsd-mem-types.h b/glusterfsd/src/glusterfsd-mem-types.h
new file mode 100644
index 000000000..7135c0ada
--- /dev/null
+++ b/glusterfsd/src/glusterfsd-mem-types.h
@@ -0,0 +1,27 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef __GLUSTERFSD_MEM_TYPES_H__
+#define __GLUSTERFSD_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+#define GF_MEM_TYPE_START (gf_common_mt_end + 1)
+
+enum gfd_mem_types_ {
+ gfd_mt_xlator_list_t = GF_MEM_TYPE_START,
+ gfd_mt_xlator_t,
+ gfd_mt_server_cmdline_t,
+ gfd_mt_xlator_cmdline_option_t,
+ gfd_mt_char,
+ gfd_mt_call_pool_t,
+ gfd_mt_end
+
+};
+#endif
diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c
new file mode 100644
index 000000000..1c9220927
--- /dev/null
+++ b/glusterfsd/src/glusterfsd-mgmt.c
@@ -0,0 +1,2105 @@
+/*
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+#include <signal.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif /* _CONFIG_H */
+
+#include "glusterfs.h"
+#include "stack.h"
+#include "dict.h"
+#include "event.h"
+#include "defaults.h"
+
+#include "rpc-clnt.h"
+#include "protocol-common.h"
+#include "glusterfs3.h"
+#include "portmap-xdr.h"
+#include "xdr-generic.h"
+
+#include "glusterfsd.h"
+#include "rpcsvc.h"
+#include "cli1-xdr.h"
+#include "statedump.h"
+#include "syncop.h"
+#include "xlator.h"
+
+static gf_boolean_t is_mgmt_rpc_reconnect = _gf_false;
+
+int glusterfs_mgmt_pmap_signin (glusterfs_ctx_t *ctx);
+int glusterfs_volfile_fetch (glusterfs_ctx_t *ctx);
+int glusterfs_process_volfp (glusterfs_ctx_t *ctx, FILE *fp);
+int glusterfs_graph_unknown_options (glusterfs_graph_t *graph);
+int emancipate(glusterfs_ctx_t *ctx, int ret);
+
+int
+mgmt_cbk_spec (struct rpc_clnt *rpc, void *mydata, void *data)
+{
+ glusterfs_ctx_t *ctx = NULL;
+ xlator_t *this = NULL;
+
+ this = mydata;
+ ctx = glusterfsd_ctx;
+ gf_log ("mgmt", GF_LOG_INFO, "Volume file changed");
+
+ glusterfs_volfile_fetch (ctx);
+ return 0;
+}
+
+
+int
+mgmt_cbk_event (struct rpc_clnt *rpc, void *mydata, void *data)
+{
+ return 0;
+}
+
+struct iobuf *
+glusterfs_serialize_reply (rpcsvc_request_t *req, void *arg,
+ struct iovec *outmsg, xdrproc_t xdrproc)
+{
+ struct iobuf *iob = NULL;
+ ssize_t retlen = -1;
+ ssize_t xdr_size = 0;
+
+ /* First, get the io buffer into which the reply in arg will
+ * be serialized.
+ */
+ xdr_size = xdr_sizeof (xdrproc, arg);
+ iob = iobuf_get2 (req->svc->ctx->iobuf_pool, xdr_size);
+ if (!iob) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Failed to get iobuf");
+ goto ret;
+ }
+
+ iobuf_to_iovec (iob, outmsg);
+ /* Use the given serializer to translate the give C structure in arg
+ * to XDR format which will be written into the buffer in outmsg.
+ */
+ /* retlen is used to received the error since size_t is unsigned and we
+ * need -1 for error notification during encoding.
+ */
+ retlen = xdr_serialize_generic (*outmsg, arg, xdrproc);
+ if (retlen == -1) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Failed to encode message");
+ goto ret;
+ }
+
+ outmsg->iov_len = retlen;
+ret:
+ if (retlen == -1) {
+ iob = NULL;
+ }
+
+ return iob;
+}
+
+int
+glusterfs_submit_reply (rpcsvc_request_t *req, void *arg,
+ struct iovec *payload, int payloadcount,
+ struct iobref *iobref, xdrproc_t xdrproc)
+{
+ struct iobuf *iob = NULL;
+ int ret = -1;
+ struct iovec rsp = {0,};
+ char new_iobref = 0;
+
+ if (!req) {
+ GF_ASSERT (req);
+ goto out;
+ }
+
+ if (!iobref) {
+ iobref = iobref_new ();
+ if (!iobref) {
+ gf_log (THIS->name, GF_LOG_ERROR, "out of memory");
+ goto out;
+ }
+
+ new_iobref = 1;
+ }
+
+ iob = glusterfs_serialize_reply (req, arg, &rsp, xdrproc);
+ if (!iob) {
+ gf_log_callingfn (THIS->name, GF_LOG_ERROR, "Failed to serialize reply");
+ } else {
+ iobref_add (iobref, iob);
+ }
+
+ ret = rpcsvc_submit_generic (req, &rsp, 1, payload, payloadcount,
+ iobref);
+
+ /* Now that we've done our job of handing the message to the RPC layer
+ * we can safely unref the iob in the hope that RPC layer must have
+ * ref'ed the iob on receiving into the txlist.
+ */
+ if (ret == -1) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Reply submission failed");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (iob)
+ iobuf_unref (iob);
+
+ if (new_iobref && iobref)
+ iobref_unref (iobref);
+
+ return ret;
+}
+
+int
+glusterfs_terminate_response_send (rpcsvc_request_t *req, int op_ret)
+{
+ gd1_mgmt_brick_op_rsp rsp = {0,};
+ dict_t *dict = NULL;
+ int ret = 0;
+
+ rsp.op_ret = op_ret;
+ rsp.op_errno = 0;
+ rsp.op_errstr = "";
+ dict = dict_new ();
+
+ if (dict)
+ ret = dict_allocate_and_serialize (dict, &rsp.output.output_val,
+ &rsp.output.output_len);
+
+
+ if (ret == 0)
+ ret = glusterfs_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
+
+ GF_FREE (rsp.output.output_val);
+ if (dict)
+ dict_unref (dict);
+ return ret;
+}
+
+int
+glusterfs_handle_terminate (rpcsvc_request_t *req)
+{
+
+ glusterfs_terminate_response_send (req, 0);
+ cleanup_and_exit (SIGTERM);
+ return 0;
+}
+
+int
+glusterfs_translator_info_response_send (rpcsvc_request_t *req, int ret,
+ char *msg, dict_t *output)
+{
+ gd1_mgmt_brick_op_rsp rsp = {0,};
+ gf_boolean_t free_ptr = _gf_false;
+ GF_ASSERT (req);
+
+ rsp.op_ret = ret;
+ rsp.op_errno = 0;
+ if (ret && msg && msg[0])
+ rsp.op_errstr = msg;
+ else
+ rsp.op_errstr = "";
+
+ ret = -1;
+ if (output) {
+ ret = dict_allocate_and_serialize (output,
+ &rsp.output.output_val,
+ &rsp.output.output_len);
+ }
+ if (!ret)
+ free_ptr = _gf_true;
+
+ glusterfs_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
+ ret = 0;
+ if (free_ptr)
+ GF_FREE (rsp.output.output_val);
+ return ret;
+}
+
+int
+glusterfs_xlator_op_response_send (rpcsvc_request_t *req, int op_ret,
+ char *msg, dict_t *output)
+{
+ gd1_mgmt_brick_op_rsp rsp = {0,};
+ int ret = -1;
+ gf_boolean_t free_ptr = _gf_false;
+ GF_ASSERT (req);
+
+ rsp.op_ret = op_ret;
+ rsp.op_errno = 0;
+ if (op_ret && msg && msg[0])
+ rsp.op_errstr = msg;
+ else
+ rsp.op_errstr = "";
+
+ if (output) {
+ ret = dict_allocate_and_serialize (output,
+ &rsp.output.output_val,
+ &rsp.output.output_len);
+ }
+ if (!ret)
+ free_ptr = _gf_true;
+
+ ret = glusterfs_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
+
+ if (free_ptr)
+ GF_FREE (rsp.output.output_val);
+
+ return ret;
+}
+
+int
+glusterfs_handle_translator_info_get (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gd1_mgmt_brick_op_req xlator_req = {0,};
+ dict_t *dict = NULL;
+ xlator_t *this = NULL;
+ gf1_cli_top_op top_op = 0;
+ uint32_t blk_size = 0;
+ uint32_t blk_count = 0;
+ double time = 0;
+ double throughput = 0;
+ xlator_t *any = NULL;
+ xlator_t *xlator = NULL;
+ glusterfs_graph_t *active = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ char msg[2048] = {0,};
+ dict_t *output = NULL;
+
+ GF_ASSERT (req);
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = xdr_to_generic (req->msg[0], &xlator_req,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ dict = dict_new ();
+ ret = dict_unserialize (xlator_req.input.input_val,
+ xlator_req.input.input_len,
+ &dict);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "top-op", (int32_t *)&top_op);
+ if ((!ret) && (GF_CLI_TOP_READ_PERF == top_op ||
+ GF_CLI_TOP_WRITE_PERF == top_op)) {
+ ret = dict_get_uint32 (dict, "blk-size", &blk_size);
+ if (ret)
+ goto cont;
+ ret = dict_get_uint32 (dict, "blk-cnt", &blk_count);
+ if (ret)
+ goto cont;
+
+ if (GF_CLI_TOP_READ_PERF == top_op) {
+ ret = glusterfs_volume_top_read_perf
+ (blk_size, blk_count, xlator_req.name,
+ &throughput, &time);
+ } else if ( GF_CLI_TOP_WRITE_PERF == top_op) {
+ ret = glusterfs_volume_top_write_perf
+ (blk_size, blk_count, xlator_req.name,
+ &throughput, &time);
+ }
+ ret = dict_set_double (dict, "time", time);
+ if (ret)
+ goto cont;
+ ret = dict_set_double (dict, "throughput", throughput);
+ if (ret)
+ goto cont;
+ }
+cont:
+ ctx = glusterfsd_ctx;
+ GF_ASSERT (ctx);
+ active = ctx->active;
+ any = active->first;
+
+ xlator = xlator_search_by_name (any, xlator_req.name);
+ if (!xlator) {
+ snprintf (msg, sizeof (msg), "xlator %s is not loaded",
+ xlator_req.name);
+ goto out;
+ }
+
+ output = dict_new ();
+ ret = xlator->notify (xlator, GF_EVENT_TRANSLATOR_INFO, dict, output);
+
+out:
+ ret = glusterfs_translator_info_response_send (req, ret, msg, output);
+
+ free (xlator_req.name);
+ free (xlator_req.input.input_val);
+ if (output)
+ dict_unref (output);
+ if (dict)
+ dict_unref (dict);
+ return ret;
+}
+
+int
+glusterfs_volume_top_write_perf (uint32_t blk_size, uint32_t blk_count,
+ char *brick_path, double *throughput,
+ double *time)
+{
+ int32_t fd = -1;
+ int32_t input_fd = -1;
+ char export_path[PATH_MAX];
+ char *buf = NULL;
+ int32_t iter = 0;
+ int32_t ret = -1;
+ uint64_t total_blks = 0;
+ struct timeval begin, end = {0,};
+
+ GF_ASSERT (brick_path);
+ GF_ASSERT (throughput);
+ GF_ASSERT (time);
+ if (!(blk_size > 0) || ! (blk_count > 0))
+ goto out;
+
+ snprintf (export_path, sizeof (export_path), "%s/%s",
+ brick_path, ".gf-tmp-stats-perf");
+
+ fd = open (export_path, O_CREAT|O_RDWR, S_IRWXU);
+ if (-1 == fd) {
+ ret = -1;
+ gf_log ("glusterd", GF_LOG_ERROR, "Could not open tmp file");
+ goto out;
+ }
+
+ buf = GF_MALLOC (blk_size * sizeof(*buf), gf_common_mt_char);
+ if (!buf) {
+ ret = -1;
+ goto out;
+ }
+
+ input_fd = open ("/dev/zero", O_RDONLY);
+ if (-1 == input_fd) {
+ ret = -1;
+ gf_log ("glusterd",GF_LOG_ERROR, "Unable to open input file");
+ goto out;
+ }
+
+ gettimeofday (&begin, NULL);
+ for (iter = 0; iter < blk_count; iter++) {
+ ret = read (input_fd, buf, blk_size);
+ if (ret != blk_size) {
+ ret = -1;
+ goto out;
+ }
+ ret = write (fd, buf, blk_size);
+ if (ret != blk_size) {
+ ret = -1;
+ goto out;
+ }
+ total_blks += ret;
+ }
+ ret = 0;
+ if (total_blks != ((uint64_t)blk_size * blk_count)) {
+ gf_log ("glusterd", GF_LOG_WARNING, "Error in write");
+ ret = -1;
+ goto out;
+ }
+
+ gettimeofday (&end, NULL);
+ *time = (end.tv_sec - begin.tv_sec) * 1e6
+ + (end.tv_usec - begin.tv_usec);
+ *throughput = total_blks / *time;
+ gf_log ("glusterd", GF_LOG_INFO, "Throughput %.2f Mbps time %.2f secs "
+ "bytes written %"PRId64, *throughput, *time, total_blks);
+
+out:
+ if (fd >= 0)
+ close (fd);
+ if (input_fd >= 0)
+ close (input_fd);
+ GF_FREE (buf);
+ unlink (export_path);
+
+ return ret;
+}
+
+int
+glusterfs_volume_top_read_perf (uint32_t blk_size, uint32_t blk_count,
+ char *brick_path, double *throughput,
+ double *time)
+{
+ int32_t fd = -1;
+ int32_t input_fd = -1;
+ int32_t output_fd = -1;
+ char export_path[PATH_MAX];
+ char *buf = NULL;
+ int32_t iter = 0;
+ int32_t ret = -1;
+ uint64_t total_blks = 0;
+ struct timeval begin, end = {0,};
+
+ GF_ASSERT (brick_path);
+ GF_ASSERT (throughput);
+ GF_ASSERT (time);
+ if (!(blk_size > 0) || ! (blk_count > 0))
+ goto out;
+
+ snprintf (export_path, sizeof (export_path), "%s/%s",
+ brick_path, ".gf-tmp-stats-perf");
+ fd = open (export_path, O_CREAT|O_RDWR, S_IRWXU);
+ if (-1 == fd) {
+ ret = -1;
+ gf_log ("glusterd", GF_LOG_ERROR, "Could not open tmp file");
+ goto out;
+ }
+
+ buf = GF_MALLOC (blk_size * sizeof(*buf), gf_common_mt_char);
+ if (!buf) {
+ ret = -1;
+ gf_log ("glusterd", GF_LOG_ERROR, "Could not allocate memory");
+ goto out;
+ }
+
+ input_fd = open ("/dev/zero", O_RDONLY);
+ if (-1 == input_fd) {
+ ret = -1;
+ gf_log ("glusterd", GF_LOG_ERROR, "Could not open input file");
+ goto out;
+ }
+
+ output_fd = open ("/dev/null", O_RDWR);
+ if (-1 == output_fd) {
+ ret = -1;
+ gf_log ("glusterd", GF_LOG_ERROR, "Could not open output file");
+ goto out;
+ }
+
+ for (iter = 0; iter < blk_count; iter++) {
+ ret = read (input_fd, buf, blk_size);
+ if (ret != blk_size) {
+ ret = -1;
+ goto out;
+ }
+ ret = write (fd, buf, blk_size);
+ if (ret != blk_size) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = fsync (fd);
+ if (ret) {
+ gf_log ("glusterd", GF_LOG_ERROR, "could not flush cache");
+ goto out;
+ }
+ ret = lseek (fd, 0L, 0);
+ if (ret != 0) {
+ gf_log ("glusterd", GF_LOG_ERROR,
+ "could not seek back to start");
+ ret = -1;
+ goto out;
+ }
+ gettimeofday (&begin, NULL);
+ for (iter = 0; iter < blk_count; iter++) {
+ ret = read (fd, buf, blk_size);
+ if (ret != blk_size) {
+ ret = -1;
+ goto out;
+ }
+ ret = write (output_fd, buf, blk_size);
+ if (ret != blk_size) {
+ ret = -1;
+ goto out;
+ }
+ total_blks += ret;
+ }
+ ret = 0;
+ if (total_blks != ((uint64_t)blk_size * blk_count)) {
+ ret = -1;
+ gf_log ("glusterd", GF_LOG_WARNING, "Error in read");
+ goto out;
+ }
+
+ gettimeofday (&end, NULL);
+ *time = (end.tv_sec - begin.tv_sec) * 1e6
+ + (end.tv_usec - begin.tv_usec);
+ *throughput = total_blks / *time;
+ gf_log ("glusterd", GF_LOG_INFO, "Throughput %.2f Mbps time %.2f secs "
+ "bytes read %"PRId64, *throughput, *time, total_blks);
+
+out:
+ if (fd >= 0)
+ close (fd);
+ if (input_fd >= 0)
+ close (input_fd);
+ if (output_fd >= 0)
+ close (output_fd);
+ GF_FREE (buf);
+ unlink (export_path);
+
+ return ret;
+}
+
+int
+glusterfs_handle_translator_op (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gd1_mgmt_brick_op_req xlator_req = {0,};
+ dict_t *input = NULL;
+ xlator_t *xlator = NULL;
+ xlator_t *any = NULL;
+ dict_t *output = NULL;
+ char key[2048] = {0};
+ char *xname = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ glusterfs_graph_t *active = NULL;
+ xlator_t *this = NULL;
+ int i = 0;
+ int count = 0;
+
+ GF_ASSERT (req);
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = xdr_to_generic (req->msg[0], &xlator_req,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ ctx = glusterfsd_ctx;
+ active = ctx->active;
+ any = active->first;
+ input = dict_new ();
+ ret = dict_unserialize (xlator_req.input.input_val,
+ xlator_req.input.input_len,
+ &input);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ goto out;
+ } else {
+ input->extra_stdfree = xlator_req.input.input_val;
+ }
+
+ ret = dict_get_int32 (input, "count", &count);
+
+ output = dict_new ();
+ if (!output) {
+ ret = -1;
+ goto out;
+ }
+
+ for (i = 0; i < count; i++) {
+ snprintf (key, sizeof (key), "xl-%d", i);
+ ret = dict_get_str (input, key, &xname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Couldn't get "
+ "xlator %s ", key);
+ goto out;
+ }
+ xlator = xlator_search_by_name (any, xname);
+ if (!xlator) {
+ gf_log (this->name, GF_LOG_ERROR, "xlator %s is not "
+ "loaded", xname);
+ goto out;
+ }
+ }
+ for (i = 0; i < count; i++) {
+ snprintf (key, sizeof (key), "xl-%d", i);
+ ret = dict_get_str (input, key, &xname);
+ xlator = xlator_search_by_name (any, xname);
+ XLATOR_NOTIFY (xlator, GF_EVENT_TRANSLATOR_OP, input, output);
+ if (ret)
+ break;
+ }
+out:
+ glusterfs_xlator_op_response_send (req, ret, "", output);
+ if (input)
+ dict_unref (input);
+ if (output)
+ dict_unref (output);
+ free (xlator_req.name); //malloced by xdr
+
+ return 0;
+}
+
+
+int
+glusterfs_handle_defrag (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gd1_mgmt_brick_op_req xlator_req = {0,};
+ dict_t *dict = NULL;
+ xlator_t *xlator = NULL;
+ xlator_t *any = NULL;
+ dict_t *output = NULL;
+ char msg[2048] = {0};
+ glusterfs_ctx_t *ctx = NULL;
+ glusterfs_graph_t *active = NULL;
+ xlator_t *this = NULL;
+
+ GF_ASSERT (req);
+ this = THIS;
+ GF_ASSERT (this);
+
+ ctx = glusterfsd_ctx;
+ GF_ASSERT (ctx);
+
+ active = ctx->active;
+ if (!active) {
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ any = active->first;
+ ret = xdr_to_generic (req->msg[0], &xlator_req,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ ret = dict_unserialize (xlator_req.input.input_val,
+ xlator_req.input.input_len,
+ &dict);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ goto out;
+ }
+ xlator = xlator_search_by_name (any, xlator_req.name);
+ if (!xlator) {
+ snprintf (msg, sizeof (msg), "xlator %s is not loaded",
+ xlator_req.name);
+ goto out;
+ }
+
+ output = dict_new ();
+ if (!output) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = xlator->notify (xlator, GF_EVENT_VOLUME_DEFRAG, dict, output);
+
+ ret = glusterfs_translator_info_response_send (req, ret,
+ msg, output);
+out:
+ if (dict)
+ dict_unref (dict);
+ free (xlator_req.input.input_val); // malloced by xdr
+ if (output)
+ dict_unref (output);
+ free (xlator_req.name); //malloced by xdr
+
+ return ret;
+
+}
+int
+glusterfs_handle_brick_status (rpcsvc_request_t *req)
+{
+ int ret = -1;
+ gd1_mgmt_brick_op_req brick_req = {0,};
+ gd1_mgmt_brick_op_rsp rsp = {0,};
+ glusterfs_ctx_t *ctx = NULL;
+ glusterfs_graph_t *active = NULL;
+ xlator_t *this = NULL;
+ xlator_t *any = NULL;
+ xlator_t *xlator = NULL;
+ dict_t *dict = NULL;
+ dict_t *output = NULL;
+ char *volname = NULL;
+ char *xname = NULL;
+ uint32_t cmd = 0;
+ char *msg = NULL;
+
+ GF_ASSERT (req);
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = xdr_to_generic (req->msg[0], &brick_req,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ dict = dict_new ();
+ ret = dict_unserialize (brick_req.input.input_val,
+ brick_req.input.input_len, &dict);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to unserialize "
+ "req-buffer to dictionary");
+ goto out;
+ }
+
+ ret = dict_get_uint32 (dict, "cmd", &cmd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Couldn't get status op");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Couldn't get volname");
+ goto out;
+ }
+
+ ctx = glusterfsd_ctx;
+ GF_ASSERT (ctx);
+ active = ctx->active;
+ any = active->first;
+
+ ret = gf_asprintf (&xname, "%s-server", volname);
+ if (-1 == ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Out of memory");
+ goto out;
+ }
+
+ xlator = xlator_search_by_name (any, xname);
+ if (!xlator) {
+ gf_log (this->name, GF_LOG_ERROR, "xlator %s is not loaded",
+ xname);
+ ret = -1;
+ goto out;
+ }
+
+
+ output = dict_new ();
+ switch (cmd & GF_CLI_STATUS_MASK) {
+ case GF_CLI_STATUS_MEM:
+ ret = 0;
+ gf_proc_dump_mem_info_to_dict (output);
+ gf_proc_dump_mempool_info_to_dict (ctx, output);
+ break;
+
+ case GF_CLI_STATUS_CLIENTS:
+ ret = xlator->dumpops->priv_to_dict (xlator, output);
+ break;
+
+ case GF_CLI_STATUS_INODE:
+ ret = xlator->dumpops->inode_to_dict (xlator, output);
+ break;
+
+ case GF_CLI_STATUS_FD:
+ ret = xlator->dumpops->fd_to_dict (xlator, output);
+ break;
+
+ case GF_CLI_STATUS_CALLPOOL:
+ ret = 0;
+ gf_proc_dump_pending_frames_to_dict (ctx->pool, output);
+ break;
+
+ default:
+ ret = -1;
+ msg = gf_strdup ("Unknown status op");
+ break;
+ }
+ rsp.op_ret = ret;
+ rsp.op_errno = 0;
+ if (ret && msg)
+ rsp.op_errstr = msg;
+ else
+ rsp.op_errstr = "";
+
+ ret = dict_allocate_and_serialize (output, &rsp.output.output_val,
+ &rsp.output.output_len);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to serialize output dict to rsp");
+ goto out;
+ }
+
+ glusterfs_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
+ ret = 0;
+
+out:
+ if (dict)
+ dict_unref (dict);
+ if (output)
+ dict_unref (output);
+ free (brick_req.input.input_val);
+ GF_FREE (xname);
+ GF_FREE (msg);
+ GF_FREE (rsp.output.output_val);
+
+ return ret;
+}
+
+
+int
+glusterfs_handle_node_status (rpcsvc_request_t *req)
+{
+ int ret = -1;
+ gd1_mgmt_brick_op_req node_req = {0,};
+ gd1_mgmt_brick_op_rsp rsp = {0,};
+ glusterfs_ctx_t *ctx = NULL;
+ glusterfs_graph_t *active = NULL;
+ xlator_t *any = NULL;
+ xlator_t *node = NULL;
+ xlator_t *subvol = NULL;
+ dict_t *dict = NULL;
+ dict_t *output = NULL;
+ char *volname = NULL;
+ char *node_name = NULL;
+ char *subvol_name = NULL;
+ uint32_t cmd = 0;
+ char *msg = NULL;
+
+ GF_ASSERT (req);
+
+ ret = xdr_to_generic (req->msg[0], &node_req,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ dict = dict_new ();
+ ret = dict_unserialize (node_req.input.input_val,
+ node_req.input.input_len, &dict);
+ if (ret < 0) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Failed to unserialize "
+ "req buffer to dictionary");
+ goto out;
+ }
+
+ ret = dict_get_uint32 (dict, "cmd", &cmd);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Couldn't get status op");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Couldn't get volname");
+ goto out;
+ }
+
+ ctx = glusterfsd_ctx;
+ GF_ASSERT (ctx);
+ active = ctx->active;
+ any = active->first;
+
+ if ((cmd & GF_CLI_STATUS_NFS) != 0)
+ ret = gf_asprintf (&node_name, "%s", "nfs-server");
+ else if ((cmd & GF_CLI_STATUS_SHD) != 0)
+ ret = gf_asprintf (&node_name, "%s", "glustershd");
+ else {
+ ret = -1;
+ goto out;
+ }
+ if (ret == -1) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Failed to set node xlator name");
+ goto out;
+ }
+
+ node = xlator_search_by_name (any, node_name);
+ if (!node) {
+ ret = -1;
+ gf_log (THIS->name, GF_LOG_ERROR, "%s xlator is not loaded",
+ node_name);
+ goto out;
+ }
+
+ if ((cmd & GF_CLI_STATUS_NFS) != 0)
+ ret = gf_asprintf (&subvol_name, "%s", volname);
+ else if ((cmd & GF_CLI_STATUS_SHD) != 0)
+ ret = gf_asprintf (&subvol_name, "%s-replicate-0", volname);
+ else {
+ ret = -1;
+ goto out;
+ }
+ if (ret == -1) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Failed to set node xlator name");
+ goto out;
+ }
+
+ subvol = xlator_search_by_name (node, subvol_name);
+ if (!subvol) {
+ ret = -1;
+ gf_log (THIS->name, GF_LOG_ERROR, "%s xlator is not loaded",
+ subvol_name);
+ goto out;
+ }
+
+ output = dict_new ();
+ switch (cmd & GF_CLI_STATUS_MASK) {
+ case GF_CLI_STATUS_MEM:
+ ret = 0;
+ gf_proc_dump_mem_info_to_dict (output);
+ gf_proc_dump_mempool_info_to_dict (ctx, output);
+ break;
+
+ case GF_CLI_STATUS_CLIENTS:
+ // clients not availbale for SHD
+ if ((cmd & GF_CLI_STATUS_SHD) != 0)
+ break;
+
+ ret = dict_set_str (output, "volname", volname);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Error setting volname to dict");
+ goto out;
+ }
+ ret = node->dumpops->priv_to_dict (node, output);
+ break;
+
+ case GF_CLI_STATUS_INODE:
+ ret = 0;
+ inode_table_dump_to_dict (subvol->itable, "conn0",
+ output);
+ ret = dict_set_int32 (output, "conncount", 1);
+ break;
+
+ case GF_CLI_STATUS_FD:
+ // cannot find fd-tables in nfs-server graph
+ // TODO: finish once found
+ break;
+
+ case GF_CLI_STATUS_CALLPOOL:
+ ret = 0;
+ gf_proc_dump_pending_frames_to_dict (ctx->pool, output);
+ break;
+
+ default:
+ ret = -1;
+ msg = gf_strdup ("Unknown status op");
+ gf_log (THIS->name, GF_LOG_ERROR, "%s", msg);
+ break;
+ }
+ rsp.op_ret = ret;
+ rsp.op_errno = 0;
+ if (ret && msg)
+ rsp.op_errstr = msg;
+ else
+ rsp.op_errstr = "";
+
+ ret = dict_allocate_and_serialize (output, &rsp.output.output_val,
+ &rsp.output.output_len);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Failed to serialize output dict to rsp");
+ goto out;
+ }
+
+ glusterfs_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
+ ret = 0;
+
+out:
+ if (dict)
+ dict_unref (dict);
+ free (node_req.input.input_val);
+ GF_FREE (msg);
+ GF_FREE (rsp.output.output_val);
+ GF_FREE (node_name);
+ GF_FREE (subvol_name);
+
+ gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterfs_handle_nfs_profile (rpcsvc_request_t *req)
+{
+ int ret = -1;
+ gd1_mgmt_brick_op_req nfs_req = {0,};
+ gd1_mgmt_brick_op_rsp rsp = {0,};
+ dict_t *dict = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ glusterfs_graph_t *active = NULL;
+ xlator_t *any = NULL;
+ xlator_t *nfs = NULL;
+ xlator_t *subvol = NULL;
+ char *volname = NULL;
+ dict_t *output = NULL;
+
+ GF_ASSERT (req);
+
+ ret = xdr_to_generic (req->msg[0], &nfs_req,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ dict = dict_new ();
+ ret = dict_unserialize (nfs_req.input.input_val,
+ nfs_req.input.input_len, &dict);
+ if (ret < 0) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Failed to "
+ "unserialize req-buffer to dict");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Couldn't get volname");
+ goto out;
+ }
+
+ ctx = glusterfsd_ctx;
+ GF_ASSERT (ctx);
+
+ active = ctx->active;
+ any = active->first;
+
+ // is this needed?
+ // are problems possible by searching for subvol directly from "any"?
+ nfs = xlator_search_by_name (any, "nfs-server");
+ if (!nfs) {
+ ret = -1;
+ gf_log (THIS->name, GF_LOG_ERROR, "xlator nfs-server is "
+ "not loaded");
+ goto out;
+ }
+
+ subvol = xlator_search_by_name (nfs, volname);
+ if (!subvol) {
+ ret = -1;
+ gf_log (THIS->name, GF_LOG_ERROR, "xlator %s is no loaded",
+ volname);
+ goto out;
+ }
+
+ output = dict_new ();
+ ret = subvol->notify (subvol, GF_EVENT_TRANSLATOR_INFO, dict, output);
+
+ rsp.op_ret = ret;
+ rsp.op_errno = 0;
+ rsp.op_errstr = "";
+
+ ret = dict_allocate_and_serialize (output, &rsp.output.output_val,
+ &rsp.output.output_len);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Failed to serialize output dict to rsp");
+ goto out;
+ }
+
+ glusterfs_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
+ ret = 0;
+
+out:
+ free (nfs_req.input.input_val);
+ if (dict)
+ dict_unref (dict);
+ if (output)
+ dict_unref (output);
+ GF_FREE (rsp.output.output_val);
+
+ gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterfs_handle_volume_barrier_op (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gd1_mgmt_brick_op_req xlator_req = {0,};
+ dict_t *dict = NULL;
+ xlator_t *xlator = NULL;
+ xlator_t *any = NULL;
+ dict_t *output = NULL;
+ char msg[2048] = {0};
+ glusterfs_ctx_t *ctx = NULL;
+ glusterfs_graph_t *active = NULL;
+ xlator_t *this = NULL;
+
+ GF_ASSERT (req);
+ this = THIS;
+ GF_ASSERT (this);
+
+ ctx = glusterfsd_ctx;
+ GF_ASSERT (ctx);
+
+ active = ctx->active;
+ if (!active) {
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ any = active->first;
+ ret = xdr_to_generic (req->msg[0], &xlator_req,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ ret = dict_unserialize (xlator_req.input.input_val,
+ xlator_req.input.input_len,
+ &dict);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ goto out;
+ }
+ xlator = xlator_search_by_name (any, xlator_req.name);
+ if (!xlator) {
+ snprintf (msg, sizeof (msg), "xlator %s is not loaded",
+ xlator_req.name);
+ goto out;
+ }
+
+ output = dict_new ();
+ if (!output) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = xlator->notify (xlator, GF_EVENT_VOLUME_BARRIER_OP,
+ dict, output);
+
+ ret = glusterfs_translator_info_response_send (req, ret,
+ msg, output);
+out:
+ if (dict)
+ dict_unref (dict);
+ free (xlator_req.input.input_val); // malloced by xdr
+ if (output)
+ dict_unref (output);
+ free (xlator_req.name); //malloced by xdr
+
+ return ret;
+
+}
+int
+glusterfs_handle_rpc_msg (rpcsvc_request_t *req)
+{
+ int ret = -1;
+ /* for now, nothing */
+ return ret;
+}
+
+rpcclnt_cb_actor_t mgmt_cbk_actors[] = {
+ [GF_CBK_FETCHSPEC] = {"FETCHSPEC", GF_CBK_FETCHSPEC, mgmt_cbk_spec },
+ [GF_CBK_EVENT_NOTIFY] = {"EVENTNOTIFY", GF_CBK_EVENT_NOTIFY,
+ mgmt_cbk_event},
+};
+
+
+struct rpcclnt_cb_program mgmt_cbk_prog = {
+ .progname = "GlusterFS Callback",
+ .prognum = GLUSTER_CBK_PROGRAM,
+ .progver = GLUSTER_CBK_VERSION,
+ .actors = mgmt_cbk_actors,
+ .numactors = GF_CBK_MAXVALUE,
+};
+
+char *clnt_pmap_procs[GF_PMAP_MAXVALUE] = {
+ [GF_PMAP_NULL] = "NULL",
+ [GF_PMAP_PORTBYBRICK] = "PORTBYBRICK",
+ [GF_PMAP_BRICKBYPORT] = "BRICKBYPORT",
+ [GF_PMAP_SIGNIN] = "SIGNIN",
+ [GF_PMAP_SIGNOUT] = "SIGNOUT",
+ [GF_PMAP_SIGNUP] = "SIGNUP",
+};
+
+
+rpc_clnt_prog_t clnt_pmap_prog = {
+ .progname = "Gluster Portmap",
+ .prognum = GLUSTER_PMAP_PROGRAM,
+ .progver = GLUSTER_PMAP_VERSION,
+ .procnames = clnt_pmap_procs,
+};
+
+char *clnt_handshake_procs[GF_HNDSK_MAXVALUE] = {
+ [GF_HNDSK_NULL] = "NULL",
+ [GF_HNDSK_SETVOLUME] = "SETVOLUME",
+ [GF_HNDSK_GETSPEC] = "GETSPEC",
+ [GF_HNDSK_PING] = "PING",
+ [GF_HNDSK_EVENT_NOTIFY] = "EVENTNOTIFY",
+};
+
+rpc_clnt_prog_t clnt_handshake_prog = {
+ .progname = "GlusterFS Handshake",
+ .prognum = GLUSTER_HNDSK_PROGRAM,
+ .progver = GLUSTER_HNDSK_VERSION,
+ .procnames = clnt_handshake_procs,
+};
+
+rpcsvc_actor_t glusterfs_actors[] = {
+ [GLUSTERD_BRICK_NULL] = {"NULL", GLUSTERD_BRICK_NULL, glusterfs_handle_rpc_msg, NULL, 0, DRC_NA},
+ [GLUSTERD_BRICK_TERMINATE] = {"TERMINATE", GLUSTERD_BRICK_TERMINATE, glusterfs_handle_terminate, NULL, 0, DRC_NA},
+ [GLUSTERD_BRICK_XLATOR_INFO] = {"TRANSLATOR INFO", GLUSTERD_BRICK_XLATOR_INFO, glusterfs_handle_translator_info_get, NULL, 0, DRC_NA},
+ [GLUSTERD_BRICK_XLATOR_OP] = {"TRANSLATOR OP", GLUSTERD_BRICK_XLATOR_OP, glusterfs_handle_translator_op, NULL, 0, DRC_NA},
+ [GLUSTERD_BRICK_STATUS] = {"STATUS", GLUSTERD_BRICK_STATUS, glusterfs_handle_brick_status, NULL, 0, DRC_NA},
+ [GLUSTERD_BRICK_XLATOR_DEFRAG] = {"TRANSLATOR DEFRAG", GLUSTERD_BRICK_XLATOR_DEFRAG, glusterfs_handle_defrag, NULL, 0, DRC_NA},
+ [GLUSTERD_NODE_PROFILE] = {"NFS PROFILE", GLUSTERD_NODE_PROFILE, glusterfs_handle_nfs_profile, NULL, 0, DRC_NA},
+ [GLUSTERD_NODE_STATUS] = {"NFS STATUS", GLUSTERD_NODE_STATUS, glusterfs_handle_node_status, NULL, 0, DRC_NA},
+ [GLUSTERD_VOLUME_BARRIER_OP] = {"VOLUME BARRIER OP", GLUSTERD_VOLUME_BARRIER_OP, glusterfs_handle_volume_barrier_op, NULL, 0, DRC_NA},
+};
+
+struct rpcsvc_program glusterfs_mop_prog = {
+ .progname = "Gluster Brick operations",
+ .prognum = GD_BRICK_PROGRAM,
+ .progver = GD_BRICK_VERSION,
+ .actors = glusterfs_actors,
+ .numactors = GLUSTERD_BRICK_MAXVALUE,
+ .synctask = _gf_true,
+};
+
+int
+mgmt_submit_request (void *req, call_frame_t *frame,
+ glusterfs_ctx_t *ctx,
+ rpc_clnt_prog_t *prog, int procnum,
+ fop_cbk_fn_t cbkfn, xdrproc_t xdrproc)
+{
+ int ret = -1;
+ int count = 0;
+ struct iovec iov = {0, };
+ struct iobuf *iobuf = NULL;
+ struct iobref *iobref = NULL;
+ ssize_t xdr_size = 0;
+
+ iobref = iobref_new ();
+ if (!iobref) {
+ goto out;
+ }
+
+ if (req) {
+ xdr_size = xdr_sizeof (xdrproc, req);
+
+ iobuf = iobuf_get2 (ctx->iobuf_pool, xdr_size);
+ if (!iobuf) {
+ goto out;
+ };
+
+ iobref_add (iobref, iobuf);
+
+ iov.iov_base = iobuf->ptr;
+ iov.iov_len = iobuf_pagesize (iobuf);
+
+ /* Create the xdr payload */
+ ret = xdr_serialize_generic (iov, req, xdrproc);
+ if (ret == -1) {
+ gf_log (THIS->name, GF_LOG_WARNING, "failed to create XDR payload");
+ goto out;
+ }
+ iov.iov_len = ret;
+ count = 1;
+ }
+
+ /* Send the msg */
+ ret = rpc_clnt_submit (ctx->mgmt, prog, procnum, cbkfn,
+ &iov, count,
+ NULL, 0, iobref, frame, NULL, 0, NULL, 0, NULL);
+
+out:
+ if (iobref)
+ iobref_unref (iobref);
+
+ if (iobuf)
+ iobuf_unref (iobuf);
+ return ret;
+}
+
+
+/* XXX: move these into @ctx */
+static char *oldvolfile = NULL;
+static int oldvollen = 0;
+
+
+
+int
+mgmt_getspec_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf_getspec_rsp rsp = {0,};
+ call_frame_t *frame = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ int ret = 0;
+ ssize_t size = 0;
+ FILE *tmpfp = NULL;
+ char *volfilebuf = NULL;
+
+ frame = myframe;
+ ctx = frame->this->ctx;
+
+ if (-1 == req->rpc_status) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_getspec_rsp);
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_ERROR, "XDR decoding error");
+ ret = -1;
+ goto out;
+ }
+
+ if (-1 == rsp.op_ret) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "failed to get the 'volume file' from server");
+ ret = rsp.op_errno;
+ goto out;
+ }
+
+ ret = 0;
+ size = rsp.op_ret;
+
+ if (size == oldvollen && (memcmp (oldvolfile, rsp.spec, size) == 0)) {
+ gf_log (frame->this->name, GF_LOG_INFO,
+ "No change in volfile, continuing");
+ goto out;
+ }
+
+ tmpfp = tmpfile ();
+ if (!tmpfp) {
+ ret = -1;
+ goto out;
+ }
+
+ fwrite (rsp.spec, size, 1, tmpfp);
+ fflush (tmpfp);
+ if (ferror (tmpfp)) {
+ ret = -1;
+ goto out;
+ }
+
+ /* Check if only options have changed. No need to reload the
+ * volfile if topology hasn't changed.
+ * glusterfs_volfile_reconfigure returns 3 possible return states
+ * return 0 =======> reconfiguration of options has succeeded
+ * return 1 =======> the graph has to be reconstructed and all the xlators should be inited
+ * return -1(or -ve) =======> Some Internal Error occurred during the operation
+ */
+
+ ret = glusterfs_volfile_reconfigure (oldvollen, tmpfp, ctx, oldvolfile);
+ if (ret == 0) {
+ gf_log ("glusterfsd-mgmt", GF_LOG_DEBUG,
+ "No need to re-load volfile, reconfigure done");
+ if (oldvolfile)
+ volfilebuf = GF_REALLOC (oldvolfile, size);
+ else
+ volfilebuf = GF_CALLOC (1, size, gf_common_mt_char);
+ if (!volfilebuf) {
+ ret = -1;
+ goto out;
+ }
+ oldvolfile = volfilebuf;
+ oldvollen = size;
+ memcpy (oldvolfile, rsp.spec, size);
+ goto out;
+ }
+
+ if (ret < 0) {
+ gf_log ("glusterfsd-mgmt", GF_LOG_DEBUG, "Reconfigure failed !!");
+ goto out;
+ }
+
+ ret = glusterfs_process_volfp (ctx, tmpfp);
+ /* tmpfp closed */
+ tmpfp = NULL;
+ if (ret)
+ goto out;
+
+ if (oldvolfile)
+ volfilebuf = GF_REALLOC (oldvolfile, size);
+ else
+ volfilebuf = GF_CALLOC (1, size, gf_common_mt_char);
+
+ if (!volfilebuf) {
+ ret = -1;
+ goto out;
+ }
+ oldvolfile = volfilebuf;
+ oldvollen = size;
+ memcpy (oldvolfile, rsp.spec, size);
+ if (!is_mgmt_rpc_reconnect) {
+ glusterfs_mgmt_pmap_signin (ctx);
+ is_mgmt_rpc_reconnect = _gf_true;
+ }
+
+out:
+ STACK_DESTROY (frame->root);
+
+ free (rsp.spec);
+
+ emancipate (ctx, ret);
+
+ // Stop if server is running at an unsupported op-version
+ if (ENOTSUP == ret) {
+ gf_log ("mgmt", GF_LOG_ERROR, "Server is operating at an "
+ "op-version which is not supported");
+ cleanup_and_exit (0);
+ }
+
+ if (ret && ctx && !ctx->active) {
+ /* Do it only for the first time */
+ /* Failed to get the volume file, something wrong,
+ restart the process */
+ gf_log ("mgmt", GF_LOG_ERROR,
+ "failed to fetch volume file (key:%s)",
+ ctx->cmd_args.volfile_id);
+ cleanup_and_exit (0);
+ }
+
+
+ if (tmpfp)
+ fclose (tmpfp);
+
+ return 0;
+}
+
+
+int
+glusterfs_volfile_fetch (glusterfs_ctx_t *ctx)
+{
+ cmd_args_t *cmd_args = NULL;
+ gf_getspec_req req = {0, };
+ int ret = 0;
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+
+ cmd_args = &ctx->cmd_args;
+
+ frame = create_frame (THIS, ctx->pool);
+
+ req.key = cmd_args->volfile_id;
+ req.flags = 0;
+
+ dict = dict_new ();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ // Set the supported min and max op-versions, so glusterd can make a
+ // decision
+ ret = dict_set_int32 (dict, "min-op-version", GD_OP_VERSION_MIN);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Failed to set min-op-version"
+ " in request dict");
+ goto out;
+ }
+
+ ret = dict_set_int32 (dict, "max-op-version", GD_OP_VERSION_MAX);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Failed to set max-op-version"
+ " in request dict");
+ goto out;
+ }
+
+ ret = dict_allocate_and_serialize (dict, &req.xdata.xdata_val,
+ &req.xdata.xdata_len);
+ if (ret < 0) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Failed to serialize dictionary");
+ goto out;
+ }
+
+ ret = mgmt_submit_request (&req, frame, ctx, &clnt_handshake_prog,
+ GF_HNDSK_GETSPEC, mgmt_getspec_cbk,
+ (xdrproc_t)xdr_gf_getspec_req);
+out:
+ return ret;
+}
+
+int32_t
+mgmt_event_notify_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf_event_notify_rsp rsp = {0,};
+ call_frame_t *frame = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ int ret = 0;
+
+ frame = myframe;
+ ctx = frame->this->ctx;
+
+ if (-1 == req->rpc_status) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_event_notify_rsp);
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_ERROR, "XDR decoding error");
+ ret = -1;
+ goto out;
+ }
+
+ if (-1 == rsp.op_ret) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "failed to get the rsp from server");
+ ret = -1;
+ goto out;
+ }
+out:
+ free (rsp.dict.dict_val); //malloced by xdr
+ return ret;
+
+}
+
+int32_t
+glusterfs_rebalance_event_notify_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gf_event_notify_rsp rsp = {0,};
+ call_frame_t *frame = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ int ret = 0;
+
+ frame = myframe;
+ ctx = frame->this->ctx;
+
+ if (-1 == req->rpc_status) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "failed to get the rsp from server");
+ ret = -1;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_event_notify_rsp);
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_ERROR, "XDR decoding error");
+ ret = -1;
+ goto out;
+ }
+
+ if (-1 == rsp.op_ret) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Received error (%s) from server",
+ strerror (rsp.op_errno));
+ ret = -1;
+ goto out;
+ }
+out:
+ free (rsp.dict.dict_val); //malloced by xdr
+ return ret;
+
+}
+
+int32_t
+glusterfs_rebalance_event_notify (dict_t *dict)
+{
+ glusterfs_ctx_t *ctx = NULL;
+ gf_event_notify_req req = {0,};
+ int32_t ret = -1;
+ cmd_args_t *cmd_args = NULL;
+ call_frame_t *frame = NULL;
+
+ ctx = glusterfsd_ctx;
+ cmd_args = &ctx->cmd_args;
+
+ frame = create_frame (THIS, ctx->pool);
+
+ req.op = GF_EN_DEFRAG_STATUS;
+
+ if (dict) {
+ ret = dict_set_str (dict, "volname", cmd_args->volfile_id);
+ if (ret)
+ gf_log ("", GF_LOG_ERROR, "failed to set volname");
+
+ ret = dict_allocate_and_serialize (dict, &req.dict.dict_val,
+ &req.dict.dict_len);
+ }
+
+ ret = mgmt_submit_request (&req, frame, ctx, &clnt_handshake_prog,
+ GF_HNDSK_EVENT_NOTIFY,
+ glusterfs_rebalance_event_notify_cbk,
+ (xdrproc_t)xdr_gf_event_notify_req);
+
+ GF_FREE (req.dict.dict_val);
+
+ STACK_DESTROY (frame->root);
+ return ret;
+}
+
+static int
+mgmt_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
+ void *data)
+{
+ xlator_t *this = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ int ret = 0;
+ server_cmdline_t *server = NULL;
+ rpc_transport_t *rpc_trans = NULL;
+ int need_term = 0;
+ int emval = 0;
+
+ this = mydata;
+ rpc_trans = rpc->conn.trans;
+ ctx = this->ctx;
+
+ switch (event) {
+ case RPC_CLNT_DISCONNECT:
+ if (!ctx->active) {
+ gf_log ("glusterfsd-mgmt", GF_LOG_ERROR,
+ "failed to connect with remote-host: %s (%s)",
+ ctx->cmd_args.volfile_server,
+ strerror (errno));
+ server = ctx->cmd_args.curr_server;
+ if (server->list.next == &ctx->cmd_args.volfile_servers) {
+ need_term = 1;
+ emval = ENOTCONN;
+ gf_log("glusterfsd-mgmt", GF_LOG_INFO,
+ "Exhausted all volfile servers");
+ break;
+ }
+ server = list_entry (server->list.next, typeof(*server),
+ list);
+ ctx->cmd_args.curr_server = server;
+ ctx->cmd_args.volfile_server = server->volfile_server;
+
+ ret = dict_set_str (rpc_trans->options,
+ "remote-host",
+ server->volfile_server);
+ if (ret != 0) {
+ gf_log ("glusterfsd-mgmt", GF_LOG_ERROR,
+ "failed to set remote-host: %s",
+ server->volfile_server);
+ need_term = 1;
+ emval = ENOTCONN;
+ break;
+ }
+ gf_log ("glusterfsd-mgmt", GF_LOG_INFO,
+ "connecting to next volfile server %s",
+ server->volfile_server);
+ }
+ break;
+ case RPC_CLNT_CONNECT:
+ rpc_clnt_set_connected (&((struct rpc_clnt*)ctx->mgmt)->conn);
+
+ ret = glusterfs_volfile_fetch (ctx);
+ if (ret) {
+ emval = ret;
+ if (!ctx->active) {
+ need_term = 1;
+ gf_log ("glusterfsd-mgmt", GF_LOG_ERROR,
+ "failed to fetch volume file (key:%s)",
+ ctx->cmd_args.volfile_id);
+ break;
+
+ }
+ }
+
+ if (is_mgmt_rpc_reconnect)
+ glusterfs_mgmt_pmap_signin (ctx);
+
+ break;
+ default:
+ break;
+ }
+
+ if (need_term) {
+ emancipate (ctx, emval);
+ cleanup_and_exit (1);
+ }
+
+ return 0;
+}
+
+int
+glusterfs_rpcsvc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event,
+ void *data)
+{
+ if (!xl || !data) {
+ goto out;
+ }
+
+ switch (event) {
+ case RPCSVC_EVENT_ACCEPT:
+ {
+ break;
+ }
+ case RPCSVC_EVENT_DISCONNECT:
+ {
+ break;
+ }
+
+ default:
+ break;
+ }
+
+out:
+ return 0;
+}
+
+int
+glusterfs_listener_init (glusterfs_ctx_t *ctx)
+{
+ cmd_args_t *cmd_args = NULL;
+ rpcsvc_t *rpc = NULL;
+ dict_t *options = NULL;
+ int ret = -1;
+
+ cmd_args = &ctx->cmd_args;
+
+ if (ctx->listener)
+ return 0;
+
+ if (!cmd_args->sock_file)
+ return 0;
+
+ ret = rpcsvc_transport_unix_options_build (&options,
+ cmd_args->sock_file);
+ if (ret)
+ goto out;
+
+ rpc = rpcsvc_init (THIS, ctx, options, 8);
+ if (rpc == NULL) {
+ goto out;
+ }
+
+ ret = rpcsvc_register_notify (rpc, glusterfs_rpcsvc_notify, THIS);
+ if (ret) {
+ goto out;
+ }
+
+ ret = rpcsvc_create_listeners (rpc, options, "glusterfsd");
+ if (ret < 1) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = rpcsvc_program_register (rpc, &glusterfs_mop_prog);
+ if (ret) {
+ goto out;
+ }
+
+ ctx->listener = rpc;
+
+out:
+ return ret;
+}
+
+int
+glusterfs_listener_stop (glusterfs_ctx_t *ctx)
+{
+ cmd_args_t *cmd_args = NULL;
+ rpcsvc_t *rpc = NULL;
+ rpcsvc_listener_t *listener = NULL;
+ rpcsvc_listener_t *next = NULL;
+ int ret = 0;
+ xlator_t *this = NULL;
+
+ GF_ASSERT (ctx);
+
+ rpc = ctx->listener;
+ ctx->listener = NULL;
+
+ (void) rpcsvc_program_unregister(rpc, &glusterfs_mop_prog);
+
+ list_for_each_entry_safe (listener, next, &rpc->listeners, list) {
+ rpcsvc_listener_destroy (listener);
+ }
+
+ (void) rpcsvc_unregister_notify (rpc, glusterfs_rpcsvc_notify, THIS);
+
+ GF_FREE (rpc);
+
+ cmd_args = &ctx->cmd_args;
+ if (cmd_args->sock_file) {
+ ret = unlink (cmd_args->sock_file);
+ if (ret && (ENOENT == errno)) {
+ ret = 0;
+ }
+ }
+
+ if (ret) {
+ this = THIS;
+ gf_log (this->name, GF_LOG_ERROR, "Failed to unlink listener "
+ "socket %s, error: %s", cmd_args->sock_file,
+ strerror (errno));
+ }
+ return ret;
+}
+
+int
+glusterfs_mgmt_notify (int32_t op, void *data, ...)
+{
+ int ret = 0;
+ switch (op)
+ {
+ case GF_EN_DEFRAG_STATUS:
+ ret = glusterfs_rebalance_event_notify ((dict_t*) data);
+ break;
+
+ default:
+ gf_log ("", GF_LOG_ERROR, "Invalid op");
+ break;
+ }
+
+ return ret;
+}
+
+int
+glusterfs_mgmt_init (glusterfs_ctx_t *ctx)
+{
+ cmd_args_t *cmd_args = NULL;
+ struct rpc_clnt *rpc = NULL;
+ dict_t *options = NULL;
+ int ret = -1;
+ int port = GF_DEFAULT_BASE_PORT;
+ char *host = NULL;
+
+ cmd_args = &ctx->cmd_args;
+
+ if (ctx->mgmt)
+ return 0;
+
+ if (cmd_args->volfile_server_port)
+ port = cmd_args->volfile_server_port;
+
+ host = "localhost";
+ if (cmd_args->volfile_server)
+ host = cmd_args->volfile_server;
+
+ ret = rpc_transport_inet_options_build (&options, host, port);
+ if (ret)
+ goto out;
+
+ rpc = rpc_clnt_new (options, THIS->ctx, THIS->name, 8);
+ if (!rpc) {
+ ret = -1;
+ gf_log (THIS->name, GF_LOG_WARNING, "failed to create rpc clnt");
+ goto out;
+ }
+
+ ret = rpc_clnt_register_notify (rpc, mgmt_rpc_notify, THIS);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to register notify function");
+ goto out;
+ }
+
+ ret = rpcclnt_cbk_program_register (rpc, &mgmt_cbk_prog, THIS);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to register callback function");
+ goto out;
+ }
+
+ ctx->notify = glusterfs_mgmt_notify;
+
+ /* This value should be set before doing the 'rpc_clnt_start()' as
+ the notify function uses this variable */
+ ctx->mgmt = rpc;
+
+ ret = rpc_clnt_start (rpc);
+out:
+ return ret;
+}
+
+static int
+mgmt_pmap_signin2_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ pmap_signin_rsp rsp = {0,};
+ call_frame_t *frame = NULL;
+ int ret = 0;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_pmap_signin_rsp);
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_ERROR, "XDR decode error");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ if (-1 == rsp.op_ret) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "failed to register the port with glusterd");
+ goto out;
+ }
+out:
+ STACK_DESTROY (frame->root);
+ return 0;
+
+}
+
+static int
+mgmt_pmap_signin_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ pmap_signin_rsp rsp = {0,};
+ call_frame_t *frame = NULL;
+ int ret = 0;
+ pmap_signin_req pmap_req = {0, };
+ cmd_args_t *cmd_args = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ char brick_name[PATH_MAX] = {0,};
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_pmap_signin_rsp);
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_ERROR, "XDR decode error");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ if (-1 == rsp.op_ret) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "failed to register the port with glusterd");
+ goto out;
+ }
+
+ ctx = glusterfsd_ctx;
+ cmd_args = &ctx->cmd_args;
+
+ if (!cmd_args->brick_port2) {
+ /* We are done with signin process */
+ goto out;
+ }
+
+ snprintf (brick_name, PATH_MAX, "%s.rdma", cmd_args->brick_name);
+ pmap_req.port = cmd_args->brick_port2;
+ pmap_req.brick = brick_name;
+
+ ret = mgmt_submit_request (&pmap_req, frame, ctx, &clnt_pmap_prog,
+ GF_PMAP_SIGNIN, mgmt_pmap_signin2_cbk,
+ (xdrproc_t)xdr_pmap_signin_req);
+ if (ret)
+ goto out;
+
+ return 0;
+
+out:
+
+ STACK_DESTROY (frame->root);
+ return 0;
+}
+
+int
+glusterfs_mgmt_pmap_signin (glusterfs_ctx_t *ctx)
+{
+ call_frame_t *frame = NULL;
+ pmap_signin_req req = {0, };
+ int ret = -1;
+ cmd_args_t *cmd_args = NULL;
+
+ frame = create_frame (THIS, ctx->pool);
+ cmd_args = &ctx->cmd_args;
+
+ if (!cmd_args->brick_port || !cmd_args->brick_name) {
+ gf_log ("fsd-mgmt", GF_LOG_DEBUG,
+ "portmapper signin arguments not given");
+ goto out;
+ }
+
+ req.port = cmd_args->brick_port;
+ req.brick = cmd_args->brick_name;
+
+ ret = mgmt_submit_request (&req, frame, ctx, &clnt_pmap_prog,
+ GF_PMAP_SIGNIN, mgmt_pmap_signin_cbk,
+ (xdrproc_t)xdr_pmap_signin_req);
+
+out:
+ return ret;
+}
+
+
+static int
+mgmt_pmap_signout_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ pmap_signout_rsp rsp = {0,};
+ int ret = 0;
+ glusterfs_ctx_t *ctx = NULL;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ ctx = glusterfsd_ctx;
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_pmap_signout_rsp);
+ if (ret < 0) {
+ gf_log (THIS->name, GF_LOG_ERROR, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ if (-1 == rsp.op_ret) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "failed to register the port with glusterd");
+ goto out;
+ }
+out:
+ return 0;
+}
+
+
+int
+glusterfs_mgmt_pmap_signout (glusterfs_ctx_t *ctx)
+{
+ int ret = 0;
+ pmap_signout_req req = {0, };
+ call_frame_t *frame = NULL;
+ cmd_args_t *cmd_args = NULL;
+
+ frame = create_frame (THIS, ctx->pool);
+ cmd_args = &ctx->cmd_args;
+
+ if (!cmd_args->brick_port || !cmd_args->brick_name) {
+ gf_log ("fsd-mgmt", GF_LOG_DEBUG,
+ "portmapper signout arguments not given");
+ goto out;
+ }
+
+ req.port = cmd_args->brick_port;
+ req.brick = cmd_args->brick_name;
+
+ ret = mgmt_submit_request (&req, frame, ctx, &clnt_pmap_prog,
+ GF_PMAP_SIGNOUT, mgmt_pmap_signout_cbk,
+ (xdrproc_t)xdr_pmap_signout_req);
+out:
+ return ret;
+}
diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c
index 30a9149b4..3cb8f0f51 100644
--- a/glusterfsd/src/glusterfsd.c
+++ b/glusterfsd/src/glusterfsd.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2006-2013 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
@@ -24,6 +14,8 @@
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/resource.h>
+#include <sys/file.h>
+#include <sys/wait.h>
#include <netdb.h>
#include <signal.h>
#include <libgen.h>
@@ -32,6 +24,12 @@
#include <stdint.h>
#include <pthread.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <time.h>
+#include <semaphore.h>
+#include <errno.h>
+#include <pwd.h>
#ifndef _CONFIG_H
#define _CONFIG_H
@@ -53,7 +51,6 @@
#include "compat.h"
#include "logging.h"
#include "dict.h"
-#include "protocol.h"
#include "list.h"
#include "timer.h"
#include "glusterfsd.h"
@@ -63,1233 +60,1911 @@
#include "event.h"
#include "globals.h"
#include "statedump.h"
-
+#include "latency.h"
+#include "glusterfsd-mem-types.h"
+#include "syscall.h"
+#include "call-stub.h"
#include <fnmatch.h>
+#include "rpc-clnt.h"
+#include "syncop.h"
+#include "client_t.h"
-extern int
-gf_log_central_init (glusterfs_ctx_t *ctx, const char *remote_host,
- const char *transport, uint32_t remote_port);
+#include "daemon.h"
+/* process mode definitions */
+#define GF_SERVER_PROCESS 0
+#define GF_CLIENT_PROCESS 1
+#define GF_GLUSTERD_PROCESS 2
/* using argp for command line parsing */
static char gf_doc[] = "";
static char argp_doc[] = "--volfile-server=SERVER [MOUNT-POINT]\n" \
- "--volfile=VOLFILE [MOUNT-POINT]";
-const char *argp_program_version = "" \
- PACKAGE_NAME" "PACKAGE_VERSION" built on "__DATE__" "__TIME__ \
- "\nRepository revision: " GLUSTERFS_REPOSITORY_REVISION "\n" \
- "Copyright (c) 2006-2009 Z RESEARCH Inc. " \
- "<http://www.zresearch.com>\n" \
- "GlusterFS comes with ABSOLUTELY NO WARRANTY.\n" \
- "You may redistribute copies of GlusterFS under the terms of "\
- "the GNU General Public License.";
+ "--volfile=VOLFILE [MOUNT-POINT]";
+const char *argp_program_version = ""
+ PACKAGE_NAME" "PACKAGE_VERSION" built on "__DATE__" "__TIME__
+ "\nRepository revision: " GLUSTERFS_REPOSITORY_REVISION "\n"
+ "Copyright (c) 2006-2013 Red Hat, Inc. <http://www.redhat.com/>\n"
+ "GlusterFS comes with ABSOLUTELY NO WARRANTY.\n"
+ "It is licensed to you under your choice of the GNU Lesser\n"
+ "General Public License, version 3 or any later version (LGPLv3\n"
+ "or later), or the GNU General Public License, version 2 (GPLv2),\n"
+ "in all cases as published by the Free Software Foundation.";
const char *argp_program_bug_address = "<" PACKAGE_BUGREPORT ">";
-error_t parse_opts (int32_t key, char *arg, struct argp_state *_state);
+static error_t parse_opts (int32_t key, char *arg, struct argp_state *_state);
static struct argp_option gf_options[] = {
- {0, 0, 0, 0, "Basic options:"},
- {"volfile-server", ARGP_VOLFILE_SERVER_KEY, "SERVER", 0,
- "Server to get the volume file from. This option overrides "
- "--volfile option"},
- {"volfile-max-fetch-attempts", ARGP_VOLFILE_MAX_FETCH_ATTEMPTS,
- "MAX-ATTEMPTS", 0, "Maximum number of connect attempts to server. "
- "This option should be provided with --volfile-server option"
- "[default: 1]"},
- {"volfile", ARGP_VOLUME_FILE_KEY, "VOLFILE", 0,
- "File to use as VOLUME_FILE [default: "DEFAULT_CLIENT_VOLUME_FILE" or "
- DEFAULT_SERVER_VOLUME_FILE"]"},
- {"spec-file", ARGP_VOLUME_FILE_KEY, "VOLFILE", OPTION_HIDDEN,
- "File to use as VOLFILE [default : "DEFAULT_CLIENT_VOLUME_FILE" or "
- DEFAULT_SERVER_VOLUME_FILE"]"},
- {"log-server", ARGP_LOG_SERVER_KEY, "LOG SERVER", 0,
- "Server to use as the central log server."
- "--log-server option"},
-
- {"log-level", ARGP_LOG_LEVEL_KEY, "LOGLEVEL", 0,
- "Logging severity. Valid options are DEBUG, NORMAL, WARNING, ERROR, "
- "CRITICAL and NONE [default: NORMAL]"},
- {"log-file", ARGP_LOG_FILE_KEY, "LOGFILE", 0,
- "File to use for logging [default: "
- DEFAULT_LOG_FILE_DIRECTORY "/" PACKAGE_NAME ".log" "]"},
-
- {0, 0, 0, 0, "Advanced Options:"},
- {"volfile-server-port", ARGP_VOLFILE_SERVER_PORT_KEY, "PORT", 0,
- "Listening port number of volfile server"},
- {"volfile-server-transport", ARGP_VOLFILE_SERVER_TRANSPORT_KEY,
- "TRANSPORT", 0,
- "Transport type to get volfile from server [default: socket]"},
- {"volfile-id", ARGP_VOLFILE_ID_KEY, "KEY", 0,
- "'key' of the volfile to be fetched from server"},
- {"log-server-port", ARGP_LOG_SERVER_PORT_KEY, "PORT", 0,
- "Listening port number of log server"},
- {"pid-file", ARGP_PID_FILE_KEY, "PIDFILE", 0,
- "File to use as pid file"},
- {"no-daemon", ARGP_NO_DAEMON_KEY, 0, 0,
- "Run in foreground"},
- {"run-id", ARGP_RUN_ID_KEY, "RUN-ID", OPTION_HIDDEN,
- "Run ID for the process, used by scripts to keep track of process "
- "they started, defaults to none"},
- {"debug", ARGP_DEBUG_KEY, 0, 0,
- "Run in debug mode. This option sets --no-daemon, --log-level "
- "to DEBUG and --log-file to console"},
- {"volume-name", ARGP_VOLUME_NAME_KEY, "VOLUME-NAME", 0,
- "Volume name to be used for MOUNT-POINT [default: top most volume "
- "in VOLFILE]"},
- {"xlator-option", ARGP_XLATOR_OPTION_KEY,"VOLUME-NAME.OPTION=VALUE", 0,
- "Add/override a translator option for a volume with specified value"},
-
- {0, 0, 0, 0, "Fuse options:"},
- {"disable-direct-io-mode", ARGP_DISABLE_DIRECT_IO_MODE_KEY, 0, 0,
- "Disable direct I/O mode in fuse kernel module"
- " [default if big writes are supported]"},
- {"entry-timeout", ARGP_ENTRY_TIMEOUT_KEY, "SECONDS", 0,
- "Set entry timeout to SECONDS in fuse kernel module [default: 1]"},
- {"attribute-timeout", ARGP_ATTRIBUTE_TIMEOUT_KEY, "SECONDS", 0,
- "Set attribute timeout to SECONDS for inodes in fuse kernel module "
- "[default: 1]"},
- {"volfile-check", ARGP_VOLFILE_CHECK_KEY, 0, 0,
- "Enable strict volume file checking"},
+ {0, 0, 0, 0, "Basic options:"},
+ {"volfile-server", ARGP_VOLFILE_SERVER_KEY, "SERVER", 0,
+ "Server to get the volume file from. This option overrides "
+ "--volfile option"},
+ {"volfile", ARGP_VOLUME_FILE_KEY, "VOLFILE", 0,
+ "File to use as VOLUME_FILE"},
+ {"spec-file", ARGP_VOLUME_FILE_KEY, "VOLFILE", OPTION_HIDDEN,
+ "File to use as VOLUME FILE"},
+
+ {"log-level", ARGP_LOG_LEVEL_KEY, "LOGLEVEL", 0,
+ "Logging severity. Valid options are DEBUG, INFO, WARNING, ERROR, "
+ "CRITICAL, TRACE and NONE [default: INFO]"},
+ {"log-file", ARGP_LOG_FILE_KEY, "LOGFILE", 0,
+ "File to use for logging [default: "
+ DEFAULT_LOG_FILE_DIRECTORY "/" PACKAGE_NAME ".log" "]"},
+
+ {0, 0, 0, 0, "Advanced Options:"},
+ {"volfile-server-port", ARGP_VOLFILE_SERVER_PORT_KEY, "PORT", 0,
+ "Listening port number of volfile server"},
+ {"volfile-server-transport", ARGP_VOLFILE_SERVER_TRANSPORT_KEY,
+ "TRANSPORT", 0,
+ "Transport type to get volfile from server [default: socket]"},
+ {"volfile-id", ARGP_VOLFILE_ID_KEY, "KEY", 0,
+ "'key' of the volfile to be fetched from server"},
+ {"pid-file", ARGP_PID_FILE_KEY, "PIDFILE", 0,
+ "File to use as pid file"},
+ {"socket-file", ARGP_SOCK_FILE_KEY, "SOCKFILE", 0,
+ "File to use as unix-socket"},
+ {"no-daemon", ARGP_NO_DAEMON_KEY, 0, 0,
+ "Run in foreground"},
+ {"run-id", ARGP_RUN_ID_KEY, "RUN-ID", OPTION_HIDDEN,
+ "Run ID for the process, used by scripts to keep track of process "
+ "they started, defaults to none"},
+ {"debug", ARGP_DEBUG_KEY, 0, 0,
+ "Run in debug mode. This option sets --no-daemon, --log-level "
+ "to DEBUG and --log-file to console"},
+ {"volume-name", ARGP_VOLUME_NAME_KEY, "XLATOR-NAME", 0,
+ "Translator name to be used for MOUNT-POINT [default: top most volume "
+ "definition in VOLFILE]"},
+ {"xlator-option", ARGP_XLATOR_OPTION_KEY,"XLATOR-NAME.OPTION=VALUE", 0,
+ "Add/override an option for a translator in volume file with specified"
+ " value"},
+ {"read-only", ARGP_READ_ONLY_KEY, 0, 0,
+ "Mount the filesystem in 'read-only' mode"},
+ {"acl", ARGP_ACL_KEY, 0, 0,
+ "Mount the filesystem with POSIX ACL support"},
+ {"selinux", ARGP_SELINUX_KEY, 0, 0,
+ "Enable SELinux label (extened attributes) support on inodes"},
+#ifdef GF_LINUX_HOST_OS
+ {"aux-gfid-mount", ARGP_AUX_GFID_MOUNT_KEY, 0, 0,
+ "Enable access to filesystem through gfid directly"},
+#endif
+ {"enable-ino32", ARGP_INODE32_KEY, "BOOL", OPTION_ARG_OPTIONAL,
+ "Use 32-bit inodes when mounting to workaround broken applications"
+ "that don't support 64-bit inodes"},
+ {"worm", ARGP_WORM_KEY, 0, 0,
+ "Mount the filesystem in 'worm' mode"},
+ {"mac-compat", ARGP_MAC_COMPAT_KEY, "BOOL", OPTION_ARG_OPTIONAL,
+ "Provide stubs for attributes needed for seamless operation on Macs "
#ifdef GF_DARWIN_HOST_OS
- {"non-local", ARGP_NON_LOCAL_KEY, 0, 0,
- "Mount the macfuse volume without '-o local' option"},
+ "[default: \"on\" on client side, else \"off\"]"
+#else
+ "[default: \"off\"]"
#endif
- {0, 0, 0, 0, "Miscellaneous Options:"},
- {0, }
+ },
+ {"brick-name", ARGP_BRICK_NAME_KEY, "BRICK-NAME", OPTION_HIDDEN,
+ "Brick name to be registered with Gluster portmapper" },
+ {"brick-port", ARGP_BRICK_PORT_KEY, "BRICK-PORT", OPTION_HIDDEN,
+ "Brick Port to be registered with Gluster portmapper" },
+ {"fopen-keep-cache", ARGP_FOPEN_KEEP_CACHE_KEY, "BOOL", OPTION_ARG_OPTIONAL,
+ "Do not purge the cache on file open"},
+
+ {0, 0, 0, 0, "Fuse options:"},
+ {"direct-io-mode", ARGP_DIRECT_IO_MODE_KEY, "BOOL", OPTION_ARG_OPTIONAL,
+ "Use direct I/O mode in fuse kernel module"
+ " [default: \"off\" if big writes are supported, else "
+ "\"on\" for fds not opened with O_RDONLY]"},
+ {"entry-timeout", ARGP_ENTRY_TIMEOUT_KEY, "SECONDS", 0,
+ "Set entry timeout to SECONDS in fuse kernel module [default: 1]"},
+ {"negative-timeout", ARGP_NEGATIVE_TIMEOUT_KEY, "SECONDS", 0,
+ "Set negative timeout to SECONDS in fuse kernel module [default: 0]"},
+ {"attribute-timeout", ARGP_ATTRIBUTE_TIMEOUT_KEY, "SECONDS", 0,
+ "Set attribute timeout to SECONDS for inodes in fuse kernel module "
+ "[default: 1]"},
+ {"gid-timeout", ARGP_GID_TIMEOUT_KEY, "SECONDS", 0,
+ "Set auxilary group list timeout to SECONDS for fuse translator "
+ "[default: 0]"},
+ {"background-qlen", ARGP_FUSE_BACKGROUND_QLEN_KEY, "N", 0,
+ "Set fuse module's background queue length to N "
+ "[default: 64]"},
+ {"congestion-threshold", ARGP_FUSE_CONGESTION_THRESHOLD_KEY, "N", 0,
+ "Set fuse module's congestion threshold to N "
+ "[default: 48]"},
+ {"client-pid", ARGP_CLIENT_PID_KEY, "PID", OPTION_HIDDEN,
+ "client will authenticate itself with process id PID to server"},
+ {"user-map-root", ARGP_USER_MAP_ROOT_KEY, "USER", OPTION_HIDDEN,
+ "replace USER with root in messages"},
+ {"dump-fuse", ARGP_DUMP_FUSE_KEY, "PATH", 0,
+ "Dump fuse traffic to PATH"},
+ {"volfile-check", ARGP_VOLFILE_CHECK_KEY, 0, 0,
+ "Enable strict volume file checking"},
+ {"mem-accounting", ARGP_MEM_ACCOUNTING_KEY, 0, OPTION_HIDDEN,
+ "Enable internal memory accounting"},
+ {"fuse-mountopts", ARGP_FUSE_MOUNTOPTS_KEY, "OPTIONS", OPTION_HIDDEN,
+ "Extra mount options to pass to FUSE"},
+ {"use-readdirp", ARGP_FUSE_USE_READDIRP_KEY, "BOOL", OPTION_ARG_OPTIONAL,
+ "Use readdirp mode in fuse kernel module"
+ " [default: \"off\"]"},
+ {0, 0, 0, 0, "Miscellaneous Options:"},
+ {0, }
};
static struct argp argp = { gf_options, parse_opts, argp_doc, gf_doc };
+int glusterfs_pidfile_cleanup (glusterfs_ctx_t *ctx);
+int glusterfs_volumes_init (glusterfs_ctx_t *ctx);
+int glusterfs_mgmt_init (glusterfs_ctx_t *ctx);
+int glusterfs_listener_init (glusterfs_ctx_t *ctx);
+int glusterfs_listener_stop (glusterfs_ctx_t *ctx);
+
-static void
-_gf_dump_details (int argc, char **argv)
+static int
+set_fuse_mount_options (glusterfs_ctx_t *ctx, dict_t *options)
{
- extern FILE *gf_log_logfile;
- int i = 0;
- char timestr[256];
- time_t utime = 0;
- struct tm *tm = NULL;
- pid_t mypid = 0;
- struct utsname uname_buf = {{0, }, };
- int uname_ret = -1;
-
- utime = time (NULL);
- tm = localtime (&utime);
- mypid = getpid ();
- uname_ret = uname (&uname_buf);
-
- /* Which git? What time? */
- strftime (timestr, 256, "%Y-%m-%d %H:%M:%S", tm);
- fprintf (gf_log_logfile,
- "========================================"
- "========================================\n");
- fprintf (gf_log_logfile, "Version : %s %s built on %s %s\n",
- PACKAGE_NAME, PACKAGE_VERSION, __DATE__, __TIME__);
- fprintf (gf_log_logfile, "git: %s\n",
- GLUSTERFS_REPOSITORY_REVISION);
- fprintf (gf_log_logfile, "Starting Time: %s\n", timestr);
- fprintf (gf_log_logfile, "Command line : ");
- for (i = 0; i < argc; i++) {
- fprintf (gf_log_logfile, "%s ", argv[i]);
+ int ret = 0;
+ cmd_args_t *cmd_args = NULL;
+ char *mount_point = NULL;
+ char cwd[PATH_MAX] = {0,};
+
+ cmd_args = &ctx->cmd_args;
+
+ /* Check if mount-point is absolute path,
+ * if not convert to absolute path by concating with CWD
+ */
+ if (cmd_args->mount_point[0] != '/') {
+ if (getcwd (cwd, PATH_MAX) != NULL) {
+ ret = gf_asprintf (&mount_point, "%s/%s", cwd,
+ cmd_args->mount_point);
+ if (ret == -1) {
+ gf_log ("glusterfsd", GF_LOG_ERROR,
+ "Could not create absolute mountpoint "
+ "path");
+ goto err;
+ }
+ } else {
+ gf_log ("glusterfsd", GF_LOG_ERROR,
+ "Could not get current working directory");
+ goto err;
+ }
+ } else
+ mount_point = gf_strdup (cmd_args->mount_point);
+
+ ret = dict_set_dynstr (options, ZR_MOUNTPOINT_OPT, mount_point);
+ if (ret < 0) {
+ gf_log ("glusterfsd", GF_LOG_ERROR,
+ "failed to set mount-point to options dictionary");
+ goto err;
}
- fprintf (gf_log_logfile, "\nPID : %d\n", mypid);
-
- if (uname_ret == 0) {
- fprintf (gf_log_logfile, "System name : %s\n", uname_buf.sysname);
- fprintf (gf_log_logfile, "Nodename : %s\n", uname_buf.nodename);
- fprintf (gf_log_logfile, "Kernel Release : %s\n", uname_buf.release);
- fprintf (gf_log_logfile, "Hardware Identifier: %s\n", uname_buf.machine);
- }
+ if (cmd_args->fuse_attribute_timeout >= 0) {
+ ret = dict_set_double (options, ZR_ATTR_TIMEOUT_OPT,
+ cmd_args->fuse_attribute_timeout);
+ if (ret < 0) {
+ gf_log ("glusterfsd", GF_LOG_ERROR,
+ "failed to set dict value for key %s",
+ ZR_ATTR_TIMEOUT_OPT);
+ goto err;
+ }
+ }
- fprintf (gf_log_logfile, "\n");
- fflush (gf_log_logfile);
-}
+ if (cmd_args->fuse_entry_timeout >= 0) {
+ ret = dict_set_double (options, ZR_ENTRY_TIMEOUT_OPT,
+ cmd_args->fuse_entry_timeout);
+ if (ret < 0) {
+ gf_log ("glusterfsd", GF_LOG_ERROR,
+ "failed to set dict value for key %s",
+ ZR_ENTRY_TIMEOUT_OPT);
+ goto err;
+ }
+ }
-static xlator_t *
-gf_get_first_xlator (xlator_t *list)
-{
- xlator_t *trav = NULL, *head = NULL;
-
- trav = list;
- do {
- if (trav->prev == NULL) {
- head = trav;
+ if (cmd_args->fuse_negative_timeout >= 0) {
+ ret = dict_set_double (options, ZR_NEGATIVE_TIMEOUT_OPT,
+ cmd_args->fuse_negative_timeout);
+ if (ret < 0) {
+ gf_log ("glusterfsd", GF_LOG_ERROR,
+ "failed to set dict value for key %s",
+ ZR_NEGATIVE_TIMEOUT_OPT);
+ goto err;
}
-
- trav = trav->prev;
- } while (trav != NULL);
-
- return head;
-}
+ }
-static xlator_t *
-_add_fuse_mount (xlator_t *graph)
-{
- int ret = 0;
- cmd_args_t *cmd_args = NULL;
- xlator_t *top = NULL;
- glusterfs_ctx_t *ctx = NULL;
- xlator_list_t *xlchild = NULL;
-
- ctx = graph->ctx;
- cmd_args = &ctx->cmd_args;
-
- xlchild = CALLOC (sizeof (*xlchild), 1);
- ERR_ABORT (xlchild);
- xlchild->xlator = graph;
-
- top = CALLOC (1, sizeof (*top));
- top->name = strdup ("fuse");
- if (xlator_set_type (top, ZR_XLATOR_FUSE) == -1) {
- fprintf (stderr,
- "MOUNT-POINT %s initialization failed",
- cmd_args->mount_point);
- gf_log ("glusterfs", GF_LOG_ERROR,
- "MOUNT-POINT %s initialization failed",
- cmd_args->mount_point);
- return NULL;
- }
- top->children = xlchild;
- top->ctx = graph->ctx;
- top->next = gf_get_first_xlator (graph);
- top->options = get_new_dict ();
-
- ret = dict_set_static_ptr (top->options, ZR_MOUNTPOINT_OPT,
- cmd_args->mount_point);
- if (ret < 0) {
- gf_log ("glusterfs", GF_LOG_DEBUG,
- "failed to set mount-point to options dictionary");
- }
-
- if (cmd_args->fuse_attribute_timeout)
- ret = dict_set_double (top->options, ZR_ATTR_TIMEOUT_OPT,
- cmd_args->fuse_attribute_timeout);
- if (cmd_args->fuse_entry_timeout)
- ret = dict_set_double (top->options, ZR_ENTRY_TIMEOUT_OPT,
- cmd_args->fuse_entry_timeout);
-
- if (cmd_args->volfile_check)
- ret = dict_set_int32 (top->options, ZR_STRICT_VOLFILE_CHECK,
+ if (cmd_args->client_pid_set) {
+ ret = dict_set_int32 (options, "client-pid",
+ cmd_args->client_pid);
+ if (ret < 0) {
+ gf_log ("glusterfsd", GF_LOG_ERROR,
+ "failed to set dict value for key %s",
+ "client-pid");
+ goto err;
+ }
+ }
+
+ if (cmd_args->uid_map_root) {
+ ret = dict_set_int32 (options, "uid-map-root",
+ cmd_args->uid_map_root);
+ if (ret < 0) {
+ gf_log ("glusterfsd", GF_LOG_ERROR,
+ "failed to set dict value for key %s",
+ "uid-map-root");
+ goto err;
+ }
+ }
+
+ if (cmd_args->volfile_check) {
+ ret = dict_set_int32 (options, ZR_STRICT_VOLFILE_CHECK,
cmd_args->volfile_check);
+ if (ret < 0) {
+ gf_log ("glusterfsd", GF_LOG_ERROR,
+ "failed to set dict value for key %s",
+ ZR_STRICT_VOLFILE_CHECK);
+ goto err;
+ }
+ }
-#ifdef GF_DARWIN_HOST_OS
- /* On Darwin machines, O_APPEND is not handled,
- * which may corrupt the data
- */
- if (cmd_args->fuse_direct_io_mode_flag == _gf_true) {
- gf_log ("glusterfs", GF_LOG_DEBUG,
- "'direct-io-mode' in fuse causes data corruption "
- "if O_APPEND is used. disabling 'direct-io-mode'");
- }
- ret = dict_set_static_ptr (top->options, ZR_DIRECT_IO_OPT, "disable");
-
- if (cmd_args->non_local)
- ret = dict_set_uint32 (top->options, "macfuse-local",
- cmd_args->non_local);
-
-#else /* ! DARWIN HOST OS */
- if (cmd_args->fuse_direct_io_mode_flag == _gf_true) {
- ret = dict_set_static_ptr (top->options, ZR_DIRECT_IO_OPT,
- "enable");
- } else {
- ret = dict_set_static_ptr (top->options, ZR_DIRECT_IO_OPT,
- "disable");
- }
+ if (cmd_args->dump_fuse) {
+ ret = dict_set_static_ptr (options, ZR_DUMP_FUSE,
+ cmd_args->dump_fuse);
+ if (ret < 0) {
+ gf_log ("glusterfsd", GF_LOG_ERROR,
+ "failed to set dict value for key %s",
+ ZR_DUMP_FUSE);
+ goto err;
+ }
+ }
-#endif /* GF_DARWIN_HOST_OS */
-
- graph->parents = CALLOC (1, sizeof (xlator_list_t));
- graph->parents->xlator = top;
-
- return top;
-}
+ if (cmd_args->acl) {
+ ret = dict_set_static_ptr (options, "acl", "on");
+ if (ret < 0) {
+ gf_log ("glusterfsd", GF_LOG_ERROR,
+ "failed to set dict value for key acl");
+ goto err;
+ }
+ }
+ if (cmd_args->selinux) {
+ ret = dict_set_static_ptr (options, "selinux", "on");
+ if (ret < 0) {
+ gf_log ("glusterfsd", GF_LOG_ERROR,
+ "failed to set dict value for key selinux");
+ goto err;
+ }
+ }
-static FILE *
-_get_specfp (glusterfs_ctx_t *ctx)
-{
- int ret = 0;
- cmd_args_t *cmd_args = NULL;
- FILE *specfp = NULL;
- struct stat statbuf;
-
- cmd_args = &ctx->cmd_args;
-
- if (cmd_args->volfile_server) {
- specfp = fetch_spec (ctx);
-
- if (specfp == NULL) {
- fprintf (stderr,
- "error while getting volume file from "
- "server %s\n", cmd_args->volfile_server);
- gf_log ("glusterfs", GF_LOG_ERROR,
- "error while getting volume file from "
- "server %s", cmd_args->volfile_server);
+ if (cmd_args->aux_gfid_mount) {
+ ret = dict_set_static_ptr (options, "virtual-gfid-access",
+ "on");
+ if (ret < 0) {
+ gf_log ("glusterfsd", GF_LOG_ERROR,
+ "failed to set dict value for key "
+ "aux-gfid-mount");
+ goto err;
+ }
+ }
+
+ if (cmd_args->enable_ino32) {
+ ret = dict_set_static_ptr (options, "enable-ino32", "on");
+ if (ret < 0) {
+ gf_log ("glusterfsd", GF_LOG_ERROR,
+ "failed to set dict value for key enable-ino32");
+ goto err;
+ }
+ }
+
+ if (cmd_args->read_only) {
+ ret = dict_set_static_ptr (options, "read-only", "on");
+ if (ret < 0) {
+ gf_log ("glusterfsd", GF_LOG_ERROR,
+ "failed to set dict value for key read-only");
+ goto err;
+ }
+ }
+
+ switch (cmd_args->fopen_keep_cache) {
+ case GF_OPTION_ENABLE:
+ ret = dict_set_static_ptr(options, "fopen-keep-cache",
+ "on");
+ if (ret < 0) {
+ gf_log("glusterfsd", GF_LOG_ERROR,
+ "failed to set dict value for key "
+ "fopen-keep-cache");
+ goto err;
}
- else {
- gf_log ("glusterfs", GF_LOG_DEBUG,
- "loading volume file from server %s",
- cmd_args->volfile_server);
+ break;
+ case GF_OPTION_DISABLE:
+ ret = dict_set_static_ptr(options, "fopen-keep-cache",
+ "off");
+ if (ret < 0) {
+ gf_log("glusterfsd", GF_LOG_ERROR,
+ "failed to set dict value for key "
+ "fopen-keep-cache");
+ goto err;
}
-
- return specfp;
- }
-
- ret = stat (cmd_args->volume_file, &statbuf);
- if (ret == -1) {
- fprintf (stderr, "%s: %s\n",
- cmd_args->volume_file, strerror (errno));
- gf_log ("glusterfs", GF_LOG_ERROR,
- "%s: %s", cmd_args->volume_file, strerror (errno));
- return NULL;
- }
- if (!(S_ISREG (statbuf.st_mode) || S_ISLNK (statbuf.st_mode))) {
- fprintf (stderr,
- "provide a valid volume file\n");
- gf_log ("glusterfs", GF_LOG_ERROR,
- "provide a valid volume file");
- return NULL;
- }
- if ((specfp = fopen (cmd_args->volume_file, "r")) == NULL) {
- fprintf (stderr, "volume file %s: %s\n",
- cmd_args->volume_file,
- strerror (errno));
- gf_log ("glusterfs", GF_LOG_ERROR,
- "volume file %s: %s",
- cmd_args->volume_file,
- strerror (errno));
- return NULL;
+ break;
+ case GF_OPTION_DEFERRED: /* default */
+ default:
+ gf_log ("glusterfsd", GF_LOG_DEBUG,
+ "fopen-keep-cache mode %d",
+ cmd_args->fopen_keep_cache);
+ break;
}
-
- gf_log ("glusterfs", GF_LOG_DEBUG,
- "loading volume file %s", cmd_args->volume_file);
-
- return specfp;
-}
-static xlator_t *
-_parse_specfp (glusterfs_ctx_t *ctx,
- FILE *specfp)
-{
- int spec_fd = 0;
- cmd_args_t *cmd_args = NULL;
- xlator_t *tree = NULL, *trav = NULL, *new_tree = NULL;
-
- cmd_args = &ctx->cmd_args;
-
- fseek (specfp, 0L, SEEK_SET);
-
- tree = file_to_xlator_tree (ctx, specfp);
- trav = tree;
-
- if (tree == NULL) {
- if (cmd_args->volfile_server) {
- fprintf (stderr,
- "error in parsing volume file given by "
- "server %s\n", cmd_args->volfile_server);
- gf_log ("glusterfs", GF_LOG_ERROR,
- "error in parsing volume file given by "
- "server %s", cmd_args->volfile_server);
- }
- else {
- fprintf (stderr,
- "error in parsing volume file %s\n",
- cmd_args->volume_file);
- gf_log ("glusterfs", GF_LOG_ERROR,
- "error in parsing volume file %s",
- cmd_args->volume_file);
+ if (cmd_args->gid_timeout) {
+ ret = dict_set_int32(options, "gid-timeout",
+ cmd_args->gid_timeout);
+ if (ret < 0) {
+ gf_log("glusterfsd", GF_LOG_ERROR, "failed to set dict "
+ "value for key gid-timeout");
+ goto err;
}
- return NULL;
}
-
- spec_fd = fileno (specfp);
- get_checksum_for_file (spec_fd, &ctx->volfile_checksum);
-
- /* if volume_name is given, then we attach to it */
- if (cmd_args->volume_name) {
- while (trav) {
- if (strcmp (trav->name, cmd_args->volume_name) == 0) {
- new_tree = trav;
- break;
- }
- trav = trav->next;
+ if (cmd_args->background_qlen) {
+ ret = dict_set_int32 (options, "background-qlen",
+ cmd_args->background_qlen);
+ if (ret < 0) {
+ gf_log("glusterfsd", GF_LOG_ERROR, "failed to set dict "
+ "value for key background-qlen");
+ goto err;
}
-
- if (!trav) {
- if (cmd_args->volfile_server) {
- fprintf (stderr,
- "volume %s not found in volume "
- "file given by server %s\n",
- cmd_args->volume_name,
- cmd_args->volfile_server);
- gf_log ("glusterfs", GF_LOG_ERROR,
- "volume %s not found in volume "
- "file given by server %s",
- cmd_args->volume_name,
- cmd_args->volfile_server);
- } else {
- fprintf (stderr,
- "volume %s not found in volume "
- "file %s\n",
- cmd_args->volume_name,
- cmd_args->volume_file);
- gf_log ("glusterfs", GF_LOG_ERROR,
- "volume %s not found in volume "
- "file %s", cmd_args->volume_name,
- cmd_args->volume_file);
- }
- return NULL;
+ }
+ if (cmd_args->congestion_threshold) {
+ ret = dict_set_int32 (options, "congestion-threshold",
+ cmd_args->congestion_threshold);
+ if (ret < 0) {
+ gf_log("glusterfsd", GF_LOG_ERROR, "failed to set dict "
+ "value for key congestion-threshold");
+ goto err;
}
- tree = trav;
}
- return tree;
-}
-static int
-_log_if_option_is_invalid (xlator_t *xl, data_pair_t *pair)
-{
- volume_opt_list_t *vol_opt = NULL;
- volume_option_t *opt = NULL;
- int i = 0;
- int index = 0;
- int found = 0;
-
- /* Get the first volume_option */
- list_for_each_entry (vol_opt, &xl->volume_options, list) {
- /* Warn for extra option */
- if (!vol_opt->given_opt)
- break;
+ switch (cmd_args->fuse_direct_io_mode) {
+ case GF_OPTION_DISABLE: /* disable */
+ ret = dict_set_static_ptr (options, ZR_DIRECT_IO_OPT,
+ "disable");
+ if (ret < 0) {
+ gf_log ("glusterfsd", GF_LOG_ERROR,
+ "failed to set 'disable' for key %s",
+ ZR_DIRECT_IO_OPT);
+ goto err;
+ }
+ break;
+ case GF_OPTION_ENABLE: /* enable */
+ ret = dict_set_static_ptr (options, ZR_DIRECT_IO_OPT,
+ "enable");
+ if (ret < 0) {
+ gf_log ("glusterfsd", GF_LOG_ERROR,
+ "failed to set 'enable' for key %s",
+ ZR_DIRECT_IO_OPT);
+ goto err;
+ }
+ break;
+ case GF_OPTION_DEFERRED: /* default */
+ default:
+ gf_log ("", GF_LOG_DEBUG, "fuse direct io type %d",
+ cmd_args->fuse_direct_io_mode);
+ break;
+ }
- opt = vol_opt->given_opt;
- for (index = 0;
- ((index < ZR_OPTION_MAX_ARRAY_SIZE) &&
- (opt[index].key && opt[index].key[0])); index++)
- for (i = 0; (i < ZR_VOLUME_MAX_NUM_KEY) &&
- opt[index].key[i]; i++) {
- if (fnmatch (opt[index].key[i],
- pair->key,
- FNM_NOESCAPE) == 0) {
- found = 1;
- break;
- }
- }
- }
+ if (!cmd_args->no_daemon_mode) {
+ ret = dict_set_static_ptr (options, "sync-to-mount",
+ "enable");
+ if (ret < 0) {
+ gf_log ("glusterfsd", GF_LOG_ERROR,
+ "failed to set dict value for key sync-mtab");
+ goto err;
+ }
+ }
- if (!found) {
- gf_log (xl->name, GF_LOG_WARNING,
- "option '%s' is not recognized",
- pair->key);
- }
- return 0;
+ if (cmd_args->use_readdirp) {
+ ret = dict_set_str (options, "use-readdirp",
+ cmd_args->use_readdirp);
+ if (ret < 0) {
+ gf_log ("glusterfsd", GF_LOG_ERROR, "failed to set dict"
+ " value for key use-readdirp");
+ goto err;
+ }
+ }
+ ret = 0;
+err:
+ return ret;
}
-static int
-_xlator_graph_init (xlator_t *xl)
+int
+create_fuse_mount (glusterfs_ctx_t *ctx)
{
- volume_opt_list_t *vol_opt = NULL;
- data_pair_t *pair = NULL;
- xlator_t *trav = NULL;
- int ret = -1;
-
- trav = xl;
-
- while (trav->prev)
- trav = trav->prev;
-
- /* Validate phase */
- while (trav) {
- /* Get the first volume_option */
- list_for_each_entry (vol_opt,
- &trav->volume_options, list)
- break;
- if ((ret =
- validate_xlator_volume_options (trav,
- vol_opt->given_opt)) < 0) {
- gf_log (trav->name, GF_LOG_ERROR,
- "validating translator failed");
- return ret;
- }
- trav = trav->next;
- }
+ int ret = 0;
+ cmd_args_t *cmd_args = NULL;
+ xlator_t *master = NULL;
-
- trav = xl;
- while (trav->prev)
- trav = trav->prev;
- /* Initialization phase */
- while (trav) {
- if (!trav->ready) {
- if ((ret = xlator_tree_init (trav)) < 0) {
- gf_log ("glusterfs", GF_LOG_ERROR,
- "initializing translator failed");
- return ret;
- }
- }
- trav = trav->next;
- }
-
- /* No error in this phase, just bunch of warning if at all */
- trav = xl;
-
- while (trav->prev)
- trav = trav->prev;
-
- /* Validate again phase */
- while (trav) {
- pair = trav->options->members_list;
- while (pair) {
- _log_if_option_is_invalid (trav, pair);
- pair = pair->next;
- }
- trav = trav->next;
- }
+ cmd_args = &ctx->cmd_args;
- return ret;
+ if (!cmd_args->mount_point) {
+ gf_log ("", GF_LOG_TRACE,
+ "mount point not found, not a client process");
+ return 0;
+ }
+
+ if (ctx->process_mode != GF_CLIENT_PROCESS) {
+ gf_log("glusterfsd", GF_LOG_ERROR,
+ "Not a client process, not performing mount operation");
+ return -1;
+ }
+
+ master = GF_CALLOC (1, sizeof (*master),
+ gfd_mt_xlator_t);
+ if (!master)
+ goto err;
+
+ master->name = gf_strdup ("fuse");
+ if (!master->name)
+ goto err;
+
+ if (xlator_set_type (master, "mount/fuse") == -1) {
+ gf_log ("glusterfsd", GF_LOG_ERROR,
+ "MOUNT-POINT %s initialization failed",
+ cmd_args->mount_point);
+ goto err;
+ }
+
+ master->ctx = ctx;
+ master->options = get_new_dict ();
+ if (!master->options)
+ goto err;
+
+ ret = set_fuse_mount_options (ctx, master->options);
+ if (ret)
+ goto err;
+
+ if (cmd_args->fuse_mountopts) {
+ ret = dict_set_static_ptr (master->options, ZR_FUSE_MOUNTOPTS,
+ cmd_args->fuse_mountopts);
+ if (ret < 0) {
+ gf_log ("glusterfsd", GF_LOG_ERROR,
+ "failed to set dict value for key %s",
+ ZR_FUSE_MOUNTOPTS);
+ goto err;
+ }
+ }
+
+ ret = xlator_init (master);
+ if (ret) {
+ gf_log ("", GF_LOG_DEBUG, "failed to initialize fuse translator");
+ goto err;
+ }
+
+ ctx->master = master;
+
+ return 0;
+
+err:
+ if (master) {
+ xlator_destroy (master);
+ }
+
+ return 1;
}
-int
-glusterfs_graph_init (xlator_t *graph, int fuse)
-{
- volume_opt_list_t *vol_opt = NULL;
- if (fuse) {
- /* FUSE needs to be initialized earlier than the
- other translators */
- list_for_each_entry (vol_opt,
- &graph->volume_options, list)
- break;
- if (validate_xlator_volume_options (graph,
- vol_opt->given_opt) == -1) {
- gf_log (graph->name, GF_LOG_ERROR,
- "validating translator failed");
- return -1;
- }
- if (xlator_init (graph) != 0)
- return -1;
-
- graph->ready = 1;
- }
- if (_xlator_graph_init (graph) == -1)
- return -1;
-
- /* check server or fuse is given */
- if (graph->ctx->top == NULL) {
- fprintf (stderr, "no valid translator loaded at the top, or"
- "no mount point given. exiting\n");
- gf_log ("glusterfs", GF_LOG_ERROR,
- "no valid translator loaded at the top or "
- "no mount point given. exiting");
- return -1;
- }
+static FILE *
+get_volfp (glusterfs_ctx_t *ctx)
+{
+ int ret = 0;
+ cmd_args_t *cmd_args = NULL;
+ FILE *specfp = NULL;
+ struct stat statbuf;
+
+ cmd_args = &ctx->cmd_args;
+
+ ret = sys_lstat (cmd_args->volfile, &statbuf);
+ if (ret == -1) {
+ gf_log ("glusterfsd", GF_LOG_ERROR,
+ "%s: %s", cmd_args->volfile, strerror (errno));
+ return NULL;
+ }
+
+ if ((specfp = fopen (cmd_args->volfile, "r")) == NULL) {
+ gf_log ("glusterfsd", GF_LOG_ERROR,
+ "volume file %s: %s",
+ cmd_args->volfile,
+ strerror (errno));
+ return NULL;
+ }
+
+ gf_log ("glusterfsd", GF_LOG_DEBUG,
+ "loading volume file %s", cmd_args->volfile);
- return 0;
+ return specfp;
}
static int
-gf_remember_xlator_option (struct list_head *options, char *arg)
+gf_remember_backup_volfile_server (char *arg)
{
- glusterfs_ctx_t *ctx = NULL;
- cmd_args_t *cmd_args = NULL;
- xlator_cmdline_option_t *option = NULL;
- int ret = -1;
- char *dot = NULL;
- char *equals = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ cmd_args_t *cmd_args = NULL;
+ int ret = -1;
+ server_cmdline_t *server = NULL;
- ctx = get_global_ctx_ptr ();
- cmd_args = &ctx->cmd_args;
+ ctx = glusterfsd_ctx;
+ if (!ctx)
+ goto out;
+ cmd_args = &ctx->cmd_args;
- option = CALLOC (1, sizeof (xlator_cmdline_option_t));
- INIT_LIST_HEAD (&option->cmd_args);
+ if(!cmd_args)
+ goto out;
- dot = strchr (arg, '.');
- if (!dot)
- goto out;
+ server = GF_CALLOC (1, sizeof (server_cmdline_t),
+ gfd_mt_server_cmdline_t);
+ if (!server)
+ goto out;
- option->volume = CALLOC ((dot - arg), sizeof (char));
- strncpy (option->volume, arg, (dot - arg));
+ INIT_LIST_HEAD(&server->list);
- equals = strchr (arg, '=');
- if (!equals)
- goto out;
+ server->volfile_server = gf_strdup(arg);
- option->key = CALLOC ((equals - dot), sizeof (char));
- strncpy (option->key, dot + 1, (equals - dot - 1));
+ if (!cmd_args->volfile_server) {
+ cmd_args->volfile_server = server->volfile_server;
+ cmd_args->curr_server = server;
+ }
- if (!*(equals + 1))
- goto out;
+ if (!server->volfile_server) {
+ gf_log ("", GF_LOG_WARNING,
+ "xlator option %s is invalid", arg);
+ goto out;
+ }
- option->value = strdup (equals + 1);
-
- list_add (&option->cmd_args, &cmd_args->xlator_options);
+ list_add_tail (&server->list, &cmd_args->volfile_servers);
- ret = 0;
+ ret = 0;
out:
- if (ret == -1) {
- if (option) {
- if (option->volume)
- FREE (option->volume);
- if (option->key)
- FREE (option->key);
- if (option->value)
- FREE (option->value);
-
- FREE (option);
- }
- }
+ if (ret == -1) {
+ if (server) {
+ GF_FREE (server->volfile_server);
+ GF_FREE (server);
+ }
+ }
- return ret;
-}
+ return ret;
+}
-static void
-gf_add_cmdline_options (xlator_t *graph, cmd_args_t *cmd_args)
+static int
+gf_remember_xlator_option (char *arg)
{
- int ret = 0;
- xlator_t *trav = graph;
- xlator_cmdline_option_t *cmd_option = NULL;
-
- while (trav) {
- list_for_each_entry (cmd_option,
- &cmd_args->xlator_options, cmd_args) {
- if (!fnmatch (cmd_option->volume,
- trav->name, FNM_NOESCAPE)) {
- ret = dict_set_str (trav->options,
- cmd_option->key,
- cmd_option->value);
- if (ret == 0) {
- gf_log ("glusterfs", GF_LOG_WARNING,
- "adding option '%s' for "
- "volume '%s' with value '%s'",
- cmd_option->key, trav->name,
- cmd_option->value);
- } else {
- gf_log ("glusterfs", GF_LOG_WARNING,
- "adding option '%s' for "
- "volume '%s' failed: %s",
- cmd_option->key, trav->name,
- strerror (-ret));
- }
- }
- }
- trav = trav->next;
- }
+ glusterfs_ctx_t *ctx = NULL;
+ cmd_args_t *cmd_args = NULL;
+ xlator_cmdline_option_t *option = NULL;
+ int ret = -1;
+ char *dot = NULL;
+ char *equals = NULL;
+
+ ctx = glusterfsd_ctx;
+ cmd_args = &ctx->cmd_args;
+
+ option = GF_CALLOC (1, sizeof (xlator_cmdline_option_t),
+ gfd_mt_xlator_cmdline_option_t);
+ if (!option)
+ goto out;
+
+ INIT_LIST_HEAD (&option->cmd_args);
+
+ dot = strchr (arg, '.');
+ if (!dot) {
+ gf_log ("", GF_LOG_WARNING,
+ "xlator option %s is invalid", arg);
+ goto out;
+ }
+
+ option->volume = GF_CALLOC ((dot - arg) + 1, sizeof (char),
+ gfd_mt_char);
+ if (!option->volume)
+ goto out;
+
+ strncpy (option->volume, arg, (dot - arg));
+
+ equals = strchr (arg, '=');
+ if (!equals) {
+ gf_log ("", GF_LOG_WARNING,
+ "xlator option %s is invalid", arg);
+ goto out;
+ }
+
+ option->key = GF_CALLOC ((equals - dot) + 1, sizeof (char),
+ gfd_mt_char);
+ if (!option->key)
+ goto out;
+
+ strncpy (option->key, dot + 1, (equals - dot - 1));
+
+ if (!*(equals + 1)) {
+ gf_log ("", GF_LOG_WARNING,
+ "xlator option %s is invalid", arg);
+ goto out;
+ }
+
+ option->value = gf_strdup (equals + 1);
+
+ list_add (&option->cmd_args, &cmd_args->xlator_options);
+
+ ret = 0;
+out:
+ if (ret == -1) {
+ if (option) {
+ GF_FREE (option->volume);
+ GF_FREE (option->key);
+ GF_FREE (option->value);
+
+ GF_FREE (option);
+ }
+ }
+
+ return ret;
}
-error_t
+
+static error_t
parse_opts (int key, char *arg, struct argp_state *state)
{
- cmd_args_t *cmd_args = NULL;
- uint32_t n = 0;
- double d = 0.0;
-
- cmd_args = state->input;
-
- switch (key) {
- case ARGP_VOLFILE_SERVER_KEY:
- cmd_args->volfile_server = strdup (arg);
- break;
-
- case ARGP_VOLFILE_MAX_FETCH_ATTEMPTS:
- n = 0;
-
- if (gf_string2uint_base10 (arg, &n) == 0) {
- cmd_args->max_connect_attempts = n;
- break;
- }
-
- argp_failure (state, -1, 0,
- "Invalid limit on connect attempts %s", arg);
- break;
+ cmd_args_t *cmd_args = NULL;
+ uint32_t n = 0;
+ double d = 0.0;
+ gf_boolean_t b = _gf_false;
+ char *pwd = NULL;
+ char tmp_buf[2048] = {0,};
+ char *tmp_str = NULL;
+ char *port_str = NULL;
+ struct passwd *pw = NULL;
+
+ cmd_args = state->input;
+
+ switch (key) {
+ case ARGP_VOLFILE_SERVER_KEY:
+ gf_remember_backup_volfile_server (arg);
- case ARGP_VOLUME_FILE_KEY:
- cmd_args->volume_file = strdup (arg);
- break;
-
- case ARGP_LOG_SERVER_KEY:
- cmd_args->log_server = strdup (arg);
- break;
-
- case ARGP_LOG_LEVEL_KEY:
- if (strcasecmp (arg, ARGP_LOG_LEVEL_NONE_OPTION) == 0) {
- cmd_args->log_level = GF_LOG_NONE;
- break;
- }
- if (strcasecmp (arg, ARGP_LOG_LEVEL_CRITICAL_OPTION) == 0) {
- cmd_args->log_level = GF_LOG_CRITICAL;
- break;
- }
- if (strcasecmp (arg, ARGP_LOG_LEVEL_ERROR_OPTION) == 0) {
- cmd_args->log_level = GF_LOG_ERROR;
- break;
- }
- if (strcasecmp (arg, ARGP_LOG_LEVEL_WARNING_OPTION) == 0) {
- cmd_args->log_level = GF_LOG_WARNING;
- break;
- }
- if (strcasecmp (arg, ARGP_LOG_LEVEL_NORMAL_OPTION) == 0) {
- cmd_args->log_level = GF_LOG_NORMAL;
- break;
- }
- if (strcasecmp (arg, ARGP_LOG_LEVEL_DEBUG_OPTION) == 0) {
- cmd_args->log_level = GF_LOG_DEBUG;
- break;
- }
- if (strcasecmp (arg, ARGP_LOG_LEVEL_TRACE_OPTION) == 0) {
- cmd_args->log_level = GF_LOG_TRACE;
- break;
- }
-
- argp_failure (state, -1, 0, "unknown log level %s", arg);
- break;
-
- case ARGP_LOG_FILE_KEY:
- cmd_args->log_file = strdup (arg);
- break;
-
- case ARGP_VOLFILE_SERVER_PORT_KEY:
- n = 0;
-
- if (gf_string2uint_base10 (arg, &n) == 0) {
- cmd_args->volfile_server_port = n;
- break;
- }
-
- argp_failure (state, -1, 0,
- "unknown volfile server port %s", arg);
- break;
+ break;
- case ARGP_LOG_SERVER_PORT_KEY:
- n = 0;
-
- if (gf_string2uint_base10 (arg, &n) == 0) {
- cmd_args->log_server_port = n;
- break;
- }
-
- argp_failure (state, -1, 0,
- "unknown log server port %s", arg);
- break;
-
- case ARGP_VOLFILE_SERVER_TRANSPORT_KEY:
- cmd_args->volfile_server_transport = strdup (arg);
- break;
-
- case ARGP_VOLFILE_ID_KEY:
- cmd_args->volfile_id = strdup (arg);
- break;
+ case ARGP_READ_ONLY_KEY:
+ cmd_args->read_only = 1;
+ break;
- case ARGP_PID_FILE_KEY:
- cmd_args->pid_file = strdup (arg);
- break;
-
- case ARGP_NO_DAEMON_KEY:
- cmd_args->no_daemon_mode = ENABLE_NO_DAEMON_MODE;
- break;
-
- case ARGP_RUN_ID_KEY:
- cmd_args->run_id = strdup (arg);
- break;
-
- case ARGP_DEBUG_KEY:
- cmd_args->debug_mode = ENABLE_DEBUG_MODE;
- break;
-
- case ARGP_DISABLE_DIRECT_IO_MODE_KEY:
- cmd_args->fuse_direct_io_mode_flag = _gf_false;
- break;
-
- case ARGP_ENTRY_TIMEOUT_KEY:
- d = 0.0;
-
- gf_string2double (arg, &d);
- if (!(d < 0.0)) {
- cmd_args->fuse_entry_timeout = d;
- break;
- }
-
- argp_failure (state, -1, 0, "unknown entry timeout %s", arg);
- break;
-
- case ARGP_ATTRIBUTE_TIMEOUT_KEY:
- d = 0.0;
+ case ARGP_ACL_KEY:
+ cmd_args->acl = 1;
+ gf_remember_xlator_option ("*-md-cache.cache-posix-acl=true");
+ break;
- gf_string2double (arg, &d);
- if (!(d < 0.0)) {
- cmd_args->fuse_attribute_timeout = d;
- break;
- }
-
- argp_failure (state, -1, 0,
- "unknown attribute timeout %s", arg);
- break;
-
- case ARGP_VOLFILE_CHECK_KEY:
+ case ARGP_SELINUX_KEY:
+ cmd_args->selinux = 1;
+ gf_remember_xlator_option ("*-md-cache.cache-selinux=true");
+ break;
+
+ case ARGP_AUX_GFID_MOUNT_KEY:
+ cmd_args->aux_gfid_mount = 1;
+ break;
+
+ case ARGP_INODE32_KEY:
+ cmd_args->enable_ino32 = 1;
+ break;
+
+ case ARGP_WORM_KEY:
+ cmd_args->worm = 1;
+ break;
+
+ case ARGP_MAC_COMPAT_KEY:
+ if (!arg)
+ arg = "on";
+
+ if (gf_string2boolean (arg, &b) == 0) {
+ cmd_args->mac_compat = b;
+
+ break;
+ }
+
+ argp_failure (state, -1, 0,
+ "invalid value \"%s\" for mac-compat", arg);
+ break;
+
+ case ARGP_VOLUME_FILE_KEY:
+ GF_FREE (cmd_args->volfile);
+
+ if (arg[0] != '/') {
+ pwd = getcwd (NULL, PATH_MAX);
+ if (!pwd) {
+ argp_failure (state, -1, errno,
+ "getcwd failed with error no %d",
+ errno);
+ break;
+ }
+ snprintf (tmp_buf, 1024, "%s/%s", pwd, arg);
+ cmd_args->volfile = gf_strdup (tmp_buf);
+ free (pwd);
+ } else {
+ cmd_args->volfile = gf_strdup (arg);
+ }
+
+ break;
+
+ case ARGP_LOG_LEVEL_KEY:
+ if (strcasecmp (arg, ARGP_LOG_LEVEL_NONE_OPTION) == 0) {
+ cmd_args->log_level = GF_LOG_NONE;
+ break;
+ }
+ if (strcasecmp (arg, ARGP_LOG_LEVEL_CRITICAL_OPTION) == 0) {
+ cmd_args->log_level = GF_LOG_CRITICAL;
+ break;
+ }
+ if (strcasecmp (arg, ARGP_LOG_LEVEL_ERROR_OPTION) == 0) {
+ cmd_args->log_level = GF_LOG_ERROR;
+ break;
+ }
+ if (strcasecmp (arg, ARGP_LOG_LEVEL_WARNING_OPTION) == 0) {
+ cmd_args->log_level = GF_LOG_WARNING;
+ break;
+ }
+ if (strcasecmp (arg, ARGP_LOG_LEVEL_INFO_OPTION) == 0) {
+ cmd_args->log_level = GF_LOG_INFO;
+ break;
+ }
+ if (strcasecmp (arg, ARGP_LOG_LEVEL_DEBUG_OPTION) == 0) {
+ cmd_args->log_level = GF_LOG_DEBUG;
+ break;
+ }
+ if (strcasecmp (arg, ARGP_LOG_LEVEL_TRACE_OPTION) == 0) {
+ cmd_args->log_level = GF_LOG_TRACE;
+ break;
+ }
+
+ argp_failure (state, -1, 0, "unknown log level %s", arg);
+ break;
+
+ case ARGP_LOG_FILE_KEY:
+ cmd_args->log_file = gf_strdup (arg);
+ break;
+
+ case ARGP_VOLFILE_SERVER_PORT_KEY:
+ n = 0;
+
+ if (gf_string2uint_base10 (arg, &n) == 0) {
+ cmd_args->volfile_server_port = n;
+ break;
+ }
+
+ argp_failure (state, -1, 0,
+ "unknown volfile server port %s", arg);
+ break;
+
+ case ARGP_VOLFILE_SERVER_TRANSPORT_KEY:
+ cmd_args->volfile_server_transport = gf_strdup (arg);
+ break;
+
+ case ARGP_VOLFILE_ID_KEY:
+ cmd_args->volfile_id = gf_strdup (arg);
+ break;
+
+ case ARGP_PID_FILE_KEY:
+ cmd_args->pid_file = gf_strdup (arg);
+ break;
+
+ case ARGP_SOCK_FILE_KEY:
+ cmd_args->sock_file = gf_strdup (arg);
+ break;
+
+ case ARGP_NO_DAEMON_KEY:
+ cmd_args->no_daemon_mode = ENABLE_NO_DAEMON_MODE;
+ break;
+
+ case ARGP_RUN_ID_KEY:
+ cmd_args->run_id = gf_strdup (arg);
+ break;
+
+ case ARGP_DEBUG_KEY:
+ cmd_args->debug_mode = ENABLE_DEBUG_MODE;
+ break;
+
+ case ARGP_DIRECT_IO_MODE_KEY:
+ if (!arg)
+ arg = "on";
+
+ if (gf_string2boolean (arg, &b) == 0) {
+ cmd_args->fuse_direct_io_mode = b;
+
+ break;
+ }
+
+ argp_failure (state, -1, 0,
+ "unknown direct I/O mode setting \"%s\"", arg);
+ break;
+
+ case ARGP_ENTRY_TIMEOUT_KEY:
+ d = 0.0;
+
+ gf_string2double (arg, &d);
+ if (!(d < 0.0)) {
+ cmd_args->fuse_entry_timeout = d;
+ break;
+ }
+
+ argp_failure (state, -1, 0, "unknown entry timeout %s", arg);
+ break;
+
+ case ARGP_NEGATIVE_TIMEOUT_KEY:
+ d = 0.0;
+
+ gf_string2double (arg, &d);
+ if (!(d < 0.0)) {
+ cmd_args->fuse_negative_timeout = d;
+ break;
+ }
+
+ argp_failure (state, -1, 0, "unknown negative timeout %s", arg);
+ break;
+
+ case ARGP_ATTRIBUTE_TIMEOUT_KEY:
+ d = 0.0;
+
+ gf_string2double (arg, &d);
+ if (!(d < 0.0)) {
+ cmd_args->fuse_attribute_timeout = d;
+ break;
+ }
+
+ argp_failure (state, -1, 0,
+ "unknown attribute timeout %s", arg);
+ break;
+
+ case ARGP_CLIENT_PID_KEY:
+ if (gf_string2int (arg, &cmd_args->client_pid) == 0) {
+ cmd_args->client_pid_set = 1;
+ break;
+ }
+
+ argp_failure (state, -1, 0,
+ "unknown client pid %s", arg);
+ break;
+
+ case ARGP_USER_MAP_ROOT_KEY:
+ pw = getpwnam (arg);
+ if (pw)
+ cmd_args->uid_map_root = pw->pw_uid;
+ else
+ argp_failure (state, -1, 0,
+ "user %s does not exist", arg);
+ break;
+
+ case ARGP_VOLFILE_CHECK_KEY:
cmd_args->volfile_check = 1;
break;
- case ARGP_VOLUME_NAME_KEY:
- cmd_args->volume_name = strdup (arg);
- break;
+ case ARGP_VOLUME_NAME_KEY:
+ cmd_args->volume_name = gf_strdup (arg);
+ break;
- case ARGP_XLATOR_OPTION_KEY:
- gf_remember_xlator_option (&cmd_args->xlator_options, arg);
- break;
+ case ARGP_XLATOR_OPTION_KEY:
+ if (gf_remember_xlator_option (arg))
+ argp_failure (state, -1, 0, "invalid xlator option %s",
+ arg);
-#ifdef GF_DARWIN_HOST_OS
- case ARGP_NON_LOCAL_KEY:
- cmd_args->non_local = _gf_true;
- break;
+ break;
-#endif /* DARWIN */
+ case ARGP_KEY_NO_ARGS:
+ break;
+
+ case ARGP_KEY_ARG:
+ if (state->arg_num >= 1)
+ argp_usage (state);
+
+ cmd_args->mount_point = gf_strdup (arg);
+ break;
+
+ case ARGP_DUMP_FUSE_KEY:
+ cmd_args->dump_fuse = gf_strdup (arg);
+ break;
+ case ARGP_BRICK_NAME_KEY:
+ cmd_args->brick_name = gf_strdup (arg);
+ break;
+ case ARGP_BRICK_PORT_KEY:
+ n = 0;
+
+ port_str = strtok_r (arg, ",", &tmp_str);
+ if (gf_string2uint_base10 (port_str, &n) == 0) {
+ cmd_args->brick_port = n;
+ port_str = strtok_r (NULL, ",", &tmp_str);
+ if (port_str) {
+ if (gf_string2uint_base10 (port_str, &n) == 0)
+ cmd_args->brick_port2 = n;
+ break;
+
+ argp_failure (state, -1, 0,
+ "wrong brick (listen) port %s", arg);
+ }
+ break;
+ }
+
+ argp_failure (state, -1, 0,
+ "unknown brick (listen) port %s", arg);
+ break;
+
+ case ARGP_MEM_ACCOUNTING_KEY:
+ /* TODO: it should have got handled much earlier */
+ //gf_mem_acct_enable_set (THIS->ctx);
+ break;
+
+ case ARGP_FOPEN_KEEP_CACHE_KEY:
+ if (!arg)
+ arg = "on";
+
+ if (gf_string2boolean (arg, &b) == 0) {
+ cmd_args->fopen_keep_cache = b;
+
+ break;
+ }
+
+ argp_failure (state, -1, 0,
+ "unknown cache setting \"%s\"", arg);
- case ARGP_KEY_NO_ARGS:
break;
-
- case ARGP_KEY_ARG:
- if (state->arg_num >= 1)
- argp_usage (state);
-
- cmd_args->mount_point = strdup (arg);
+
+ case ARGP_GID_TIMEOUT_KEY:
+ if (!gf_string2int(arg, &cmd_args->gid_timeout))
+ break;
+
+ argp_failure(state, -1, 0, "unknown group list timeout %s", arg);
break;
+ case ARGP_FUSE_BACKGROUND_QLEN_KEY:
+ if (!gf_string2int (arg, &cmd_args->background_qlen))
+ break;
+
+ argp_failure (state, -1, 0,
+ "unknown background qlen option %s", arg);
+ break;
+ case ARGP_FUSE_CONGESTION_THRESHOLD_KEY:
+ if (!gf_string2int (arg, &cmd_args->congestion_threshold))
+ break;
+
+ argp_failure (state, -1, 0,
+ "unknown congestion threshold option %s", arg);
+ break;
+
+ case ARGP_FUSE_MOUNTOPTS_KEY:
+ cmd_args->fuse_mountopts = gf_strdup (arg);
+ break;
+
+ case ARGP_FUSE_USE_READDIRP_KEY:
+ if (!arg)
+ arg = "yes";
+
+ if (gf_string2boolean (arg, &b) == 0) {
+ if (b) {
+ cmd_args->use_readdirp = "yes";
+ } else {
+ cmd_args->use_readdirp = "no";
+ }
+
+ break;
+ }
+
+ argp_failure (state, -1, 0,
+ "unknown use-readdirp setting \"%s\"", arg);
+ break;
+
}
- return 0;
+ return 0;
}
-void
+void
cleanup_and_exit (int signum)
{
- glusterfs_ctx_t *ctx = NULL;
- xlator_t *trav = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ xlator_t *trav = NULL;
+
+ ctx = glusterfsd_ctx;
+
+ if (!ctx)
+ return;
+
+ gf_log_callingfn ("", GF_LOG_WARNING,
+ "received signum (%d), shutting down", signum);
+
+ if (ctx->cleanup_started)
+ return;
+
+ ctx->cleanup_started = 1;
+ glusterfs_mgmt_pmap_signout (ctx);
+
+ /* below part is a racy code where the rpcsvc object is freed.
+ * But in another thread (epoll thread), upon poll error in the
+ * socket the transports are cleaned up where again rpcsvc object
+ * is accessed (which is already freed by the below function).
+ * Since the process is about to be killed dont execute the function
+ * below.
+ */
+ /* if (ctx->listener) { */
+ /* (void) glusterfs_listener_stop (ctx); */
+ /* } */
+
+ /* Call fini() of FUSE xlator first:
+ * so there are no more requests coming and
+ * 'umount' of mount point is done properly */
+ trav = ctx->master;
+ if (trav && trav->fini) {
+ THIS = trav;
+ trav->fini (trav);
+ }
- ctx = get_global_ctx_ptr ();
-
- gf_log ("glusterfs", GF_LOG_WARNING, "shutting down");
+ glusterfs_pidfile_cleanup (ctx);
- if (ctx->pidfp) {
- gf_unlockfd (fileno (ctx->pidfp));
- fclose (ctx->pidfp);
- ctx->pidfp = NULL;
- }
-
- if (ctx->specfp) {
- fclose (ctx->specfp);
- ctx->specfp = NULL;
- }
+ exit (0);
+#if 0
+ /* TODO: Properly do cleanup_and_exit(), with synchronization */
+ if (ctx->mgmt) {
+ /* cleanup the saved-frames before last unref */
+ rpc_clnt_connection_cleanup (&ctx->mgmt->conn);
+ rpc_clnt_unref (ctx->mgmt);
+ }
- if (ctx->cmd_args.pid_file) {
- unlink (ctx->cmd_args.pid_file);
- ctx->cmd_args.pid_file = NULL;
- }
-
- if (ctx->graph) {
- trav = ctx->graph;
- ctx->graph = NULL;
- while (trav) {
- trav->fini (trav);
- trav = trav->next;
- }
- exit (0);
- } else {
- gf_log ("glusterfs", GF_LOG_DEBUG, "no graph present");
- }
+ /* call fini() of each xlator */
+ trav = NULL;
+ if (ctx->active)
+ trav = ctx->active->top;
+ while (trav) {
+ if (trav->fini) {
+ THIS = trav;
+ trav->fini (trav);
+ }
+ trav = trav->next;
+ }
+#endif
}
-static char *
-zr_build_process_uuid ()
+static void
+reincarnate (int signum)
{
- char tmp_str[1024] = {0,};
- char hostname[256] = {0,};
- struct timeval tv = {0,};
- struct tm now = {0, };
- char now_str[32];
-
- if (-1 == gettimeofday(&tv, NULL)) {
- gf_log ("", GF_LOG_ERROR,
- "gettimeofday: failed %s",
- strerror (errno));
- }
+ int ret = 0;
+ glusterfs_ctx_t *ctx = NULL;
+ cmd_args_t *cmd_args = NULL;
- if (-1 == gethostname (hostname, 256)) {
- gf_log ("", GF_LOG_ERROR,
- "gethostname: failed %s",
- strerror (errno));
- }
+ ctx = glusterfsd_ctx;
+ cmd_args = &ctx->cmd_args;
+
+ if (cmd_args->volfile_server) {
+ gf_log ("glusterfsd", GF_LOG_INFO,
+ "Fetching the volume file from server...");
+ ret = glusterfs_volfile_fetch (ctx);
+ } else {
+ gf_log ("glusterfsd", GF_LOG_DEBUG,
+ "Not reloading volume specification file on SIGHUP");
+ }
+
+ /* Also, SIGHUP should do logrotate */
+ gf_log_logrotate (1);
+
+ if (ret < 0)
+ gf_log ("glusterfsd", GF_LOG_ERROR,
+ "volume initialization failed.");
- localtime_r (&tv.tv_sec, &now);
- strftime (now_str, 32, "%Y/%m/%d-%H:%M:%S", &now);
- snprintf (tmp_str, 1024, "%s-%d-%s:%ld",
- hostname, getpid(), now_str, tv.tv_usec);
-
- return strdup (tmp_str);
+ return;
}
-#define GF_SERVER_PROCESS 0
-#define GF_CLIENT_PROCESS 1
+void
+emancipate (glusterfs_ctx_t *ctx, int ret)
+{
+ /* break free from the parent */
+ if (ctx->daemon_pipe[1] != -1) {
+ write (ctx->daemon_pipe[1], (void *) &ret, sizeof (ret));
+ close (ctx->daemon_pipe[1]);
+ ctx->daemon_pipe[1] = -1;
+ }
+}
static uint8_t
gf_get_process_mode (char *exec_name)
{
- char *dup_execname = NULL, *base = NULL;
- uint8_t ret = 0;
-
- dup_execname = strdup (exec_name);
- base = basename (dup_execname);
-
- if (!strncmp (base, "glusterfsd", 10)) {
- ret = GF_SERVER_PROCESS;
- } else {
- ret = GF_CLIENT_PROCESS;
- }
-
- free (dup_execname);
+ char *dup_execname = NULL, *base = NULL;
+ uint8_t ret = 0;
+
+ dup_execname = gf_strdup (exec_name);
+ base = basename (dup_execname);
+
+ if (!strncmp (base, "glusterfsd", 10)) {
+ ret = GF_SERVER_PROCESS;
+ } else if (!strncmp (base, "glusterd", 8)) {
+ ret = GF_GLUSTERD_PROCESS;
+ } else {
+ ret = GF_CLIENT_PROCESS;
+ }
+
+ GF_FREE (dup_execname);
+
+ return ret;
+}
+
+
+static int
+glusterfs_ctx_defaults_init (glusterfs_ctx_t *ctx)
+{
+ cmd_args_t *cmd_args = NULL;
+ struct rlimit lim = {0, };
+ int ret = -1;
+
+ xlator_mem_acct_init (THIS, gfd_mt_end);
+
+ ctx->process_uuid = generate_glusterfs_ctx_id ();
+ if (!ctx->process_uuid) {
+ gf_log ("", GF_LOG_CRITICAL,
+ "ERROR: glusterfs uuid generation failed");
+ goto out;
+ }
+
+ ctx->page_size = 128 * GF_UNIT_KB;
+
+ ctx->iobuf_pool = iobuf_pool_new ();
+ if (!ctx->iobuf_pool) {
+ gf_log ("", GF_LOG_CRITICAL,
+ "ERROR: glusterfs iobuf pool creation failed");
+ goto out;
+ }
+
+ ctx->event_pool = event_pool_new (DEFAULT_EVENT_POOL_SIZE);
+ if (!ctx->event_pool) {
+ gf_log ("", GF_LOG_CRITICAL,
+ "ERROR: glusterfs event pool creation failed");
+ goto out;
+ }
+
+ ctx->pool = GF_CALLOC (1, sizeof (call_pool_t), gfd_mt_call_pool_t);
+ if (!ctx->pool) {
+ gf_log ("", GF_LOG_CRITICAL,
+ "ERROR: glusterfs call pool creation failed");
+ goto out;
+ }
+
+ INIT_LIST_HEAD (&ctx->pool->all_frames);
+ LOCK_INIT (&ctx->pool->lock);
+
+ /* frame_mem_pool size 112 * 4k */
+ ctx->pool->frame_mem_pool = mem_pool_new (call_frame_t, 4096);
+ if (!ctx->pool->frame_mem_pool) {
+ gf_log ("", GF_LOG_CRITICAL,
+ "ERROR: glusterfs frame pool creation failed");
+ goto out;
+ }
+ /* stack_mem_pool size 256 * 1024 */
+ ctx->pool->stack_mem_pool = mem_pool_new (call_stack_t, 1024);
+ if (!ctx->pool->stack_mem_pool) {
+ gf_log ("", GF_LOG_CRITICAL,
+ "ERROR: glusterfs stack pool creation failed");
+ goto out;
+ }
+
+ ctx->stub_mem_pool = mem_pool_new (call_stub_t, 1024);
+ if (!ctx->stub_mem_pool) {
+ gf_log ("", GF_LOG_CRITICAL,
+ "ERROR: glusterfs stub pool creation failed");
+ goto out;
+ }
+
+ ctx->dict_pool = mem_pool_new (dict_t, GF_MEMPOOL_COUNT_OF_DICT_T);
+ if (!ctx->dict_pool)
+ goto out;
+
+ ctx->dict_pair_pool = mem_pool_new (data_pair_t,
+ GF_MEMPOOL_COUNT_OF_DATA_PAIR_T);
+ if (!ctx->dict_pair_pool)
+ goto out;
+
+ ctx->dict_data_pool = mem_pool_new (data_t, GF_MEMPOOL_COUNT_OF_DATA_T);
+ if (!ctx->dict_data_pool)
+ goto out;
+
+ pthread_mutex_init (&(ctx->lock), NULL);
- return ret;
+ ctx->clienttable = gf_clienttable_alloc();
+ if (!ctx->clienttable)
+ goto out;
+
+ cmd_args = &ctx->cmd_args;
+
+ /* parsing command line arguments */
+ cmd_args->log_level = DEFAULT_LOG_LEVEL;
+
+ cmd_args->mac_compat = GF_OPTION_DISABLE;
+#ifdef GF_DARWIN_HOST_OS
+ /* On Darwin machines, O_APPEND is not handled,
+ * which may corrupt the data
+ */
+ cmd_args->fuse_direct_io_mode = GF_OPTION_DISABLE;
+#else
+ cmd_args->fuse_direct_io_mode = GF_OPTION_DEFERRED;
+#endif
+ cmd_args->fuse_attribute_timeout = -1;
+ cmd_args->fuse_entry_timeout = -1;
+ cmd_args->fopen_keep_cache = GF_OPTION_DEFERRED;
+
+ INIT_LIST_HEAD (&cmd_args->xlator_options);
+ INIT_LIST_HEAD (&cmd_args->volfile_servers);
+
+ lim.rlim_cur = RLIM_INFINITY;
+ lim.rlim_max = RLIM_INFINITY;
+ setrlimit (RLIMIT_CORE, &lim);
+
+ ret = 0;
+out:
+
+ if (ret && ctx) {
+ if (ctx->pool) {
+ mem_pool_destroy (ctx->pool->frame_mem_pool);
+ mem_pool_destroy (ctx->pool->stack_mem_pool);
+ }
+ GF_FREE (ctx->pool);
+ mem_pool_destroy (ctx->stub_mem_pool);
+ mem_pool_destroy (ctx->dict_pool);
+ mem_pool_destroy (ctx->dict_data_pool);
+ mem_pool_destroy (ctx->dict_pair_pool);
+ }
+
+ return ret;
}
-int
-set_log_file_path (cmd_args_t *cmd_args)
+static int
+logging_init (glusterfs_ctx_t *ctx, const char *progpath)
{
- int i = 0;
- int ret = 0;
- int port = 0;
- char *tmp_ptr = NULL;
- char tmp_str[1024] = {0,};
-
- if (cmd_args->mount_point) {
- for (i = 1; i < strlen (cmd_args->mount_point); i++) {
- tmp_str[i-1] = cmd_args->mount_point[i];
- if (cmd_args->mount_point[i] == '/')
- tmp_str[i-1] = '-';
+ cmd_args_t *cmd_args = NULL;
+ int ret = 0;
+ char ident[1024] = {0,};
+ char *progname = NULL;
+ char *ptr = NULL;
+
+ cmd_args = &ctx->cmd_args;
+
+ if (cmd_args->log_file == NULL) {
+ ret = gf_set_log_file_path (cmd_args);
+ if (ret == -1) {
+ fprintf (stderr, "ERROR: failed to set the log file path\n");
+ return -1;
}
- ret = asprintf (&cmd_args->log_file,
- DEFAULT_LOG_FILE_DIRECTORY "/%s.log",
- tmp_str);
- if (-1 == ret) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "asprintf failed while setting up log-file");
+ }
+
+#ifdef GF_USE_SYSLOG
+ progname = gf_strdup (progpath);
+ snprintf (ident, 1024, "%s_%s", basename(progname),
+ basename(cmd_args->log_file));
+ GF_FREE (progname);
+ /* remove .log suffix */
+ if (NULL != (ptr = strrchr(ident, '.'))) {
+ if (strcmp(ptr, ".log") == 0) {
+ /* note: ptr points to location in ident only */
+ ptr[0] = '\0';
}
- goto done;
- }
-
- if (cmd_args->volume_file) {
- for (i = 0; i < strlen (cmd_args->volume_file); i++) {
- tmp_str[i] = cmd_args->volume_file[i];
- if (cmd_args->volume_file[i] == '/')
- tmp_str[i] = '-';
+ }
+ ptr = ident;
+#endif
+
+ if (gf_log_init (ctx, cmd_args->log_file, ptr) == -1) {
+ fprintf (stderr, "ERROR: failed to open logfile %s\n",
+ cmd_args->log_file);
+ return -1;
+ }
+
+ gf_log_set_loglevel (cmd_args->log_level);
+
+ return 0;
+}
+
+void
+gf_check_and_set_mem_acct (glusterfs_ctx_t *ctx, int argc, char *argv[])
+{
+ int i = 0;
+ for (i = 0; i < argc; i++) {
+ if (strcmp (argv[i], "--mem-accounting") == 0) {
+ gf_mem_acct_enable_set (ctx);
+ break;
}
- ret = asprintf (&cmd_args->log_file,
- DEFAULT_LOG_FILE_DIRECTORY "/%s.log",
- tmp_str);
- if (-1 == ret) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "asprintf failed while setting up log-file");
+ }
+}
+
+int
+parse_cmdline (int argc, char *argv[], glusterfs_ctx_t *ctx)
+{
+ int process_mode = 0;
+ int ret = 0;
+ struct stat stbuf = {0, };
+ char timestr[32];
+ char tmp_logfile[1024] = { 0 };
+ char *tmp_logfile_dyn = NULL;
+ char *tmp_logfilebase = NULL;
+ cmd_args_t *cmd_args = NULL;
+
+ cmd_args = &ctx->cmd_args;
+
+ argp_parse (&argp, argc, argv, ARGP_IN_ORDER, NULL, cmd_args);
+
+ if (ENABLE_DEBUG_MODE == cmd_args->debug_mode) {
+ cmd_args->log_level = GF_LOG_DEBUG;
+ cmd_args->log_file = "/dev/stderr";
+ cmd_args->no_daemon_mode = ENABLE_NO_DAEMON_MODE;
+ }
+
+ process_mode = gf_get_process_mode (argv[0]);
+ ctx->process_mode = process_mode;
+
+ /* Make sure after the parsing cli, if '--volfile-server' option is
+ given, then '--volfile-id' is mandatory */
+ if (cmd_args->volfile_server && !cmd_args->volfile_id) {
+ gf_log ("glusterfs", GF_LOG_CRITICAL,
+ "ERROR: '--volfile-id' is mandatory if '-s' OR "
+ "'--volfile-server' option is given");
+ ret = -1;
+ goto out;
+ }
+
+ if ((cmd_args->volfile_server == NULL)
+ && (cmd_args->volfile == NULL)) {
+ if (process_mode == GF_SERVER_PROCESS)
+ cmd_args->volfile = gf_strdup (DEFAULT_SERVER_VOLFILE);
+ else if (process_mode == GF_GLUSTERD_PROCESS)
+ cmd_args->volfile = gf_strdup (DEFAULT_GLUSTERD_VOLFILE);
+ else
+ cmd_args->volfile = gf_strdup (DEFAULT_CLIENT_VOLFILE);
+
+ /* Check if the volfile exists, if not give usage output
+ and exit */
+ ret = stat (cmd_args->volfile, &stbuf);
+ if (ret) {
+ gf_log ("glusterfs", GF_LOG_CRITICAL,
+ "ERROR: parsing the volfile failed (%s)\n",
+ strerror (errno));
+ /* argp_usage (argp.) */
+ fprintf (stderr, "USAGE: %s [options] [mountpoint]\n",
+ argv[0]);
+ goto out;
}
- goto done;
}
-
- if (cmd_args->volfile_server) {
- port = 1;
- tmp_ptr = "default";
-
- if (cmd_args->volfile_server_port)
- port = cmd_args->volfile_server_port;
- if (cmd_args->volfile_id)
- tmp_ptr = cmd_args->volfile_id;
-
- ret = asprintf (&cmd_args->log_file,
- DEFAULT_LOG_FILE_DIRECTORY "/%s-%s-%d.log",
- cmd_args->volfile_server, tmp_ptr, port);
- if (-1 == ret) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "asprintf failed while setting up log-file");
+
+ if (cmd_args->run_id) {
+ ret = sys_lstat (cmd_args->log_file, &stbuf);
+ /* If its /dev/null, or /dev/stdout, /dev/stderr,
+ * let it use the same, no need to alter
+ */
+ if (((ret == 0) &&
+ (S_ISREG (stbuf.st_mode) || S_ISLNK (stbuf.st_mode))) ||
+ (ret == -1)) {
+ /* Have separate logfile per run */
+ gf_time_fmt (timestr, sizeof timestr, time (NULL),
+ gf_timefmt_FT);
+ sprintf (tmp_logfile, "%s.%s.%d",
+ cmd_args->log_file, timestr, getpid ());
+
+ /* Create symlink to actual log file */
+ sys_unlink (cmd_args->log_file);
+
+ tmp_logfile_dyn = gf_strdup (tmp_logfile);
+ tmp_logfilebase = basename (tmp_logfile_dyn);
+ ret = sys_symlink (tmp_logfilebase,
+ cmd_args->log_file);
+ if (ret == -1) {
+ fprintf (stderr, "ERROR: symlink of logfile failed\n");
+ goto out;
+ }
+
+ GF_FREE (cmd_args->log_file);
+ cmd_args->log_file = gf_strdup (tmp_logfile);
+
+ GF_FREE (tmp_logfile_dyn);
}
}
- done:
+
+#ifdef GF_DARWIN_HOST_OS
+ if (cmd_args->mount_point)
+ cmd_args->mac_compat = GF_OPTION_DEFERRED;
+#endif
+
+ ret = 0;
+out:
return ret;
}
-int
-main (int argc, char *argv[])
+
+int
+glusterfs_pidfile_setup (glusterfs_ctx_t *ctx)
{
- glusterfs_ctx_t *ctx = NULL;
- cmd_args_t *cmd_args = NULL;
- call_pool_t *pool = NULL;
- struct stat stbuf;
- char tmp_logfile[1024] = { 0 };
- char *tmp_logfile_dyn = NULL;
- char *tmp_logfilebase = NULL;
- char timestr[256] = { 0 };
- time_t utime;
- struct tm *tm = NULL;
- int ret = 0;
- struct rlimit lim;
- FILE *specfp = NULL;
- xlator_t *graph = NULL;
- xlator_t *trav = NULL;
- int fuse_volume_found = 0;
- int xl_count = 0;
- uint8_t process_mode = 0;
-
- ret = glusterfs_globals_init ();
- if (ret)
+ cmd_args_t *cmd_args = NULL;
+ int ret = -1;
+ FILE *pidfp = NULL;
+
+ cmd_args = &ctx->cmd_args;
+
+ if (!cmd_args->pid_file)
+ return 0;
+
+ pidfp = fopen (cmd_args->pid_file, "a+");
+ if (!pidfp) {
+ gf_log ("glusterfsd", GF_LOG_ERROR,
+ "pidfile %s error (%s)",
+ cmd_args->pid_file, strerror (errno));
+ goto out;
+ }
+
+ ret = lockf (fileno (pidfp), F_TLOCK, 0);
+ if (ret) {
+ gf_log ("glusterfsd", GF_LOG_ERROR,
+ "pidfile %s lock error (%s)",
+ cmd_args->pid_file, strerror (errno));
+ goto out;
+ }
+
+ gf_log ("glusterfsd", GF_LOG_TRACE,
+ "pidfile %s lock acquired",
+ cmd_args->pid_file);
+
+ ret = lockf (fileno (pidfp), F_ULOCK, 0);
+ if (ret) {
+ gf_log ("glusterfsd", GF_LOG_ERROR,
+ "pidfile %s unlock error (%s)",
+ cmd_args->pid_file, strerror (errno));
+ goto out;
+ }
+
+ ctx->pidfp = pidfp;
+
+ ret = 0;
+out:
+ if (ret && pidfp)
+ fclose (pidfp);
+
+ return ret;
+}
+
+
+int
+glusterfs_pidfile_cleanup (glusterfs_ctx_t *ctx)
+{
+ cmd_args_t *cmd_args = NULL;
+
+ cmd_args = &ctx->cmd_args;
+
+ if (!ctx->pidfp)
+ return 0;
+
+ gf_log ("glusterfsd", GF_LOG_TRACE,
+ "pidfile %s cleanup",
+ cmd_args->pid_file);
+
+ if (ctx->cmd_args.pid_file) {
+ unlink (ctx->cmd_args.pid_file);
+ ctx->cmd_args.pid_file = NULL;
+ }
+
+ lockf (fileno (ctx->pidfp), F_ULOCK, 0);
+ fclose (ctx->pidfp);
+ ctx->pidfp = NULL;
+
+ return 0;
+}
+
+int
+glusterfs_pidfile_update (glusterfs_ctx_t *ctx)
+{
+ cmd_args_t *cmd_args = NULL;
+ int ret = 0;
+ FILE *pidfp = NULL;
+
+ cmd_args = &ctx->cmd_args;
+
+ pidfp = ctx->pidfp;
+ if (!pidfp)
+ return 0;
+
+ ret = lockf (fileno (pidfp), F_TLOCK, 0);
+ if (ret) {
+ gf_log ("glusterfsd", GF_LOG_ERROR,
+ "pidfile %s lock failed",
+ cmd_args->pid_file);
return ret;
+ }
- utime = time (NULL);
- ctx = glusterfs_ctx_get ();
- process_mode = gf_get_process_mode (argv[0]);
- set_global_ctx_ptr (ctx);
- ctx->process_uuid = zr_build_process_uuid ();
- cmd_args = &ctx->cmd_args;
-
- /* parsing command line arguments */
- cmd_args->log_level = DEFAULT_LOG_LEVEL;
- cmd_args->fuse_direct_io_mode_flag = _gf_true;
-
- INIT_LIST_HEAD (&cmd_args->xlator_options);
-
- argp_parse (&argp, argc, argv, ARGP_IN_ORDER, NULL, cmd_args);
-
- if (ENABLE_DEBUG_MODE == cmd_args->debug_mode) {
- cmd_args->log_level = GF_LOG_DEBUG;
- cmd_args->log_file = "/dev/stdout";
- cmd_args->no_daemon_mode = ENABLE_NO_DAEMON_MODE;
- }
+ ret = ftruncate (fileno (pidfp), 0);
+ if (ret) {
+ gf_log ("glusterfsd", GF_LOG_ERROR,
+ "pidfile %s truncation failed",
+ cmd_args->pid_file);
+ return ret;
+ }
- if ((cmd_args->volfile_server == NULL)
- && (cmd_args->volume_file == NULL)) {
- if (process_mode == GF_SERVER_PROCESS)
- cmd_args->volume_file = strdup (DEFAULT_SERVER_VOLUME_FILE);
- else
- cmd_args->volume_file = strdup (DEFAULT_CLIENT_VOLUME_FILE);
- }
+ ret = fprintf (pidfp, "%d\n", getpid ());
+ if (ret <= 0) {
+ gf_log ("glusterfsd", GF_LOG_ERROR,
+ "pidfile %s write failed",
+ cmd_args->pid_file);
+ return ret;
+ }
- if (cmd_args->log_file == NULL) {
- ret = set_log_file_path (cmd_args);
- if (-1 == ret) {
- fprintf (stderr, "failed to set the log file path.. "
- "exiting\n");
- return -1;
+ ret = fflush (pidfp);
+ if (ret) {
+ gf_log ("glusterfsd", GF_LOG_ERROR,
+ "pidfile %s write failed",
+ cmd_args->pid_file);
+ return ret;
+ }
+
+ gf_log ("glusterfsd", GF_LOG_DEBUG,
+ "pidfile %s updated with pid %d",
+ cmd_args->pid_file, getpid ());
+
+ return 0;
+}
+
+
+void *
+glusterfs_sigwaiter (void *arg)
+{
+ sigset_t set;
+ int ret = 0;
+ int sig = 0;
+
+
+ sigemptyset (&set);
+ sigaddset (&set, SIGINT); /* cleanup_and_exit */
+ sigaddset (&set, SIGTERM); /* cleanup_and_exit */
+ sigaddset (&set, SIGHUP); /* reincarnate */
+ sigaddset (&set, SIGUSR1); /* gf_proc_dump_info */
+ sigaddset (&set, SIGUSR2); /* gf_latency_toggle */
+
+ for (;;) {
+ ret = sigwait (&set, &sig);
+ if (ret)
+ continue;
+
+
+ switch (sig) {
+ case SIGINT:
+ case SIGTERM:
+ cleanup_and_exit (sig);
+ break;
+ case SIGHUP:
+ reincarnate (sig);
+ break;
+ case SIGUSR1:
+ gf_proc_dump_info (sig, glusterfsd_ctx);
+ break;
+ case SIGUSR2:
+ gf_latency_toggle (sig, glusterfsd_ctx);
+ break;
+ default:
+
+ break;
}
}
- ctx->page_size = 128 * GF_UNIT_KB;
- ctx->iobuf_pool = iobuf_pool_new (8 * GF_UNIT_MB, ctx->page_size + 4096);
- ctx->event_pool = event_pool_new (DEFAULT_EVENT_POOL_SIZE);
- pthread_mutex_init (&(ctx->lock), NULL);
- pool = ctx->pool = CALLOC (1, sizeof (call_pool_t));
- ERR_ABORT (ctx->pool);
- LOCK_INIT (&pool->lock);
- INIT_LIST_HEAD (&pool->all_frames);
-
- if (cmd_args->pid_file != NULL) {
- ctx->pidfp = fopen (cmd_args->pid_file, "a+");
- if (ctx->pidfp == NULL) {
- fprintf (stderr,
- "unable to open pid file %s. %s. exiting\n",
- cmd_args->pid_file, strerror (errno));
- /* do cleanup and exit ?! */
- return -1;
- }
- ret = gf_lockfd (fileno (ctx->pidfp));
- if (ret == -1) {
- fprintf (stderr, "unable to lock pid file %s. %s. "
- "Is another instance of %s running?!\n"
- "exiting\n", cmd_args->pid_file,
- strerror (errno), argv[0]);
- fclose (ctx->pidfp);
- return -1;
- }
- ret = ftruncate (fileno (ctx->pidfp), 0);
- if (ret == -1) {
- fprintf (stderr,
- "unable to truncate file %s. %s. exiting\n",
- cmd_args->pid_file, strerror (errno));
- gf_unlockfd (fileno (ctx->pidfp));
- fclose (ctx->pidfp);
- return -1;
- }
- }
-
- /* initializing logs */
- if (cmd_args->run_id) {
- ret = stat (cmd_args->log_file, &stbuf);
- /* If its /dev/null, or /dev/stdout, /dev/stderr,
- * let it use the same, no need to alter
- */
- if (((ret == 0) &&
- (S_ISREG (stbuf.st_mode) || S_ISLNK (stbuf.st_mode))) ||
- (ret == -1)) {
- /* Have seperate logfile per run */
- tm = localtime (&utime);
- strftime (timestr, 256, "%Y%m%d.%H%M%S", tm);
- sprintf (tmp_logfile, "%s.%s.%d",
- cmd_args->log_file, timestr, getpid ());
-
- /* Create symlink to actual log file */
- unlink (cmd_args->log_file);
-
- tmp_logfile_dyn = strdup (tmp_logfile);
- tmp_logfilebase = basename (tmp_logfile_dyn);
- ret = symlink (tmp_logfilebase, cmd_args->log_file);
- if (-1 == ret) {
- fprintf (stderr, "symlink of logfile failed");
- } else {
- FREE (cmd_args->log_file);
- cmd_args->log_file = strdup (tmp_logfile);
+ return NULL;
+}
+
+
+void
+glusterfsd_print_trace (int signum)
+{
+ gf_print_trace (signum, glusterfsd_ctx);
+}
+
+
+int
+glusterfs_signals_setup (glusterfs_ctx_t *ctx)
+{
+ sigset_t set;
+ int ret = 0;
+
+ sigemptyset (&set);
+
+ /* common setting for all threads */
+ signal (SIGSEGV, glusterfsd_print_trace);
+ signal (SIGABRT, glusterfsd_print_trace);
+ signal (SIGILL, glusterfsd_print_trace);
+ signal (SIGTRAP, glusterfsd_print_trace);
+ signal (SIGFPE, glusterfsd_print_trace);
+ signal (SIGBUS, glusterfsd_print_trace);
+ signal (SIGINT, cleanup_and_exit);
+ signal (SIGPIPE, SIG_IGN);
+
+ /* block these signals from non-sigwaiter threads */
+ sigaddset (&set, SIGTERM); /* cleanup_and_exit */
+ sigaddset (&set, SIGHUP); /* reincarnate */
+ sigaddset (&set, SIGUSR1); /* gf_proc_dump_info */
+ sigaddset (&set, SIGUSR2); /* gf_latency_toggle */
+
+ ret = pthread_sigmask (SIG_BLOCK, &set, NULL);
+ if (ret) {
+ gf_log ("glusterfsd", GF_LOG_WARNING,
+ "failed to execute pthread_signmask %s",
+ strerror (errno));
+ return ret;
+ }
+
+ ret = pthread_create (&ctx->sigwaiter, NULL, glusterfs_sigwaiter,
+ (void *) &set);
+ if (ret) {
+ /*
+ TODO:
+ fallback to signals getting handled by other threads.
+ setup the signal handlers
+ */
+ gf_log ("glusterfsd", GF_LOG_WARNING,
+ "failed to create pthread %s",
+ strerror (errno));
+ return ret;
+ }
+
+ return ret;
+}
+
+
+int
+daemonize (glusterfs_ctx_t *ctx)
+{
+ int ret = -1;
+ cmd_args_t *cmd_args = NULL;
+ int cstatus = 0;
+ int err = 0;
+
+ cmd_args = &ctx->cmd_args;
+
+ ret = glusterfs_pidfile_setup (ctx);
+ if (ret)
+ goto out;
+
+ if (cmd_args->no_daemon_mode)
+ goto postfork;
+
+ if (cmd_args->debug_mode)
+ goto postfork;
+
+ ret = pipe (ctx->daemon_pipe);
+ if (ret) {
+ /* If pipe() fails, retain daemon_pipe[] = {-1, -1}
+ and parent will just not wait for child status
+ */
+ ctx->daemon_pipe[0] = -1;
+ ctx->daemon_pipe[1] = -1;
+ }
+
+ ret = os_daemon_return (0, 0);
+ switch (ret) {
+ case -1:
+ if (ctx->daemon_pipe[0] != -1) {
+ close (ctx->daemon_pipe[0]);
+ close (ctx->daemon_pipe[1]);
+ }
+
+ gf_log ("daemonize", GF_LOG_ERROR,
+ "Daemonization failed: %s", strerror(errno));
+ goto out;
+ case 0:
+ /* child */
+ /* close read */
+ close (ctx->daemon_pipe[0]);
+ break;
+ default:
+ /* parent */
+ /* close write */
+ close (ctx->daemon_pipe[1]);
+
+ if (ctx->mnt_pid > 0) {
+ ret = waitpid (ctx->mnt_pid, &cstatus, 0);
+ if (!(ret == ctx->mnt_pid && cstatus == 0)) {
+ gf_log ("daemonize", GF_LOG_ERROR,
+ "mount failed");
+ exit (1);
}
+ }
+
+ err = 1;
+ read (ctx->daemon_pipe[0], (void *)&err, sizeof (err));
+ _exit (err);
+ }
+
+postfork:
+ ret = glusterfs_pidfile_update (ctx);
+ if (ret)
+ goto out;
+
+ glusterfs_signals_setup (ctx);
+out:
+ return ret;
+}
+
+
+int
+glusterfs_process_volfp (glusterfs_ctx_t *ctx, FILE *fp)
+{
+ glusterfs_graph_t *graph = NULL;
+ int ret = -1;
+ xlator_t *trav = NULL;
+
+ graph = glusterfs_graph_construct (fp);
+ if (!graph) {
+ gf_log ("", GF_LOG_ERROR, "failed to construct the graph");
+ goto out;
+ }
+
+ for (trav = graph->first; trav; trav = trav->next) {
+ if (strcmp (trav->type, "mount/fuse") == 0) {
+ gf_log ("glusterfsd", GF_LOG_ERROR,
+ "fuse xlator cannot be specified "
+ "in volume file");
+ goto out;
+ }
+ }
+
+ ret = glusterfs_graph_prepare (graph, ctx);
+ if (ret) {
+ glusterfs_graph_destroy (graph);
+ goto out;
+ }
+
+ ret = glusterfs_graph_activate (graph, ctx);
+
+ if (ret) {
+ glusterfs_graph_destroy (graph);
+ goto out;
+ }
+
+ gf_log_dump_graph (fp, graph);
+
+ ret = 0;
+out:
+ if (fp)
+ fclose (fp);
+
+ if (ret && !ctx->active) {
+ /* there is some error in setting up the first graph itself */
+ cleanup_and_exit (0);
+ }
+
+ return ret;
+}
+
+
+int
+glusterfs_volumes_init (glusterfs_ctx_t *ctx)
+{
+ FILE *fp = NULL;
+ cmd_args_t *cmd_args = NULL;
+ int ret = 0;
+
+ cmd_args = &ctx->cmd_args;
+
+ if (cmd_args->sock_file) {
+ ret = glusterfs_listener_init (ctx);
+ if (ret)
+ goto out;
+ }
+
+ if (cmd_args->volfile_server) {
+ ret = glusterfs_mgmt_init (ctx);
+ /* return, do not emancipate() yet */
+ return ret;
+ }
+
+ fp = get_volfp (ctx);
+
+ if (!fp) {
+ gf_log ("glusterfsd", GF_LOG_ERROR,
+ "Cannot reach volume specification file");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterfs_process_volfp (ctx, fp);
+ if (ret)
+ goto out;
+
+out:
+ emancipate (ctx, ret);
+ return ret;
+}
+
+
+/* This is the only legal global pointer */
+glusterfs_ctx_t *glusterfsd_ctx;
+
+int
+main (int argc, char *argv[])
+{
+ glusterfs_ctx_t *ctx = NULL;
+ int ret = -1;
+ char cmdlinestr[PATH_MAX] = {0,};
+
+ ctx = glusterfs_ctx_new ();
+ if (!ctx) {
+ gf_log ("glusterfs", GF_LOG_CRITICAL,
+ "ERROR: glusterfs context not initialized");
+ return ENOMEM;
+ }
+ glusterfsd_ctx = ctx;
- FREE (tmp_logfile_dyn);
- }
- }
-
- gf_global_variable_init ();
-
- if (gf_log_init (cmd_args->log_file) == -1) {
- fprintf (stderr,
- "failed to open logfile %s. exiting\n",
- cmd_args->log_file);
- return -1;
- }
- gf_log_set_loglevel (cmd_args->log_level);
- gf_proc_dump_init();
-
- /* setting up environment */
- lim.rlim_cur = RLIM_INFINITY;
- lim.rlim_max = RLIM_INFINITY;
- if (setrlimit (RLIMIT_CORE, &lim) == -1) {
- fprintf (stderr, "ignoring %s\n",
- strerror (errno));
- }
#ifdef DEBUG
- mtrace ();
+ gf_mem_acct_enable_set (ctx);
+#else
+ /* Enable memory accounting on the fly based on argument */
+ gf_check_and_set_mem_acct (ctx, argc, argv);
#endif
- signal (SIGUSR1, (sighandler_t) gf_proc_dump_info);
- signal (SIGSEGV, gf_print_trace);
- signal (SIGABRT, gf_print_trace);
- signal (SIGPIPE, SIG_IGN);
- signal (SIGHUP, gf_log_logrotate);
- signal (SIGTERM, cleanup_and_exit);
- /* This is used to dump details */
- /* signal (SIGUSR2, (sighandler_t) glusterfs_stats); */
-
- /* getting and parsing volume file */
- if ((specfp = _get_specfp (ctx)) == NULL) {
- /* _get_specfp() prints necessary error message */
- gf_log ("glusterfs", GF_LOG_ERROR, "exiting\n");
- argp_help (&argp, stderr, ARGP_HELP_SEE, (char *) argv[0]);
- return -1;
- }
- if ((graph = _parse_specfp (ctx, specfp)) == NULL) {
- /* _parse_specfp() prints necessary error message */
- fprintf (stderr, "exiting\n");
- gf_log ("glusterfs", GF_LOG_ERROR, "exiting");
- return -1;
- }
- ctx->specfp = specfp;
-
- /* check whether MOUNT-POINT argument and fuse volume are given
- * at same time or not. If not, add argument MOUNT-POINT to graph
- * as top volume if given
- */
- trav = graph;
- fuse_volume_found = 0;
-
- while (trav) {
- if (strcmp (trav->type, ZR_XLATOR_FUSE) == 0) {
- if (dict_get (trav->options,
- ZR_MOUNTPOINT_OPT) != NULL) {
- trav->ctx = graph->ctx;
- fuse_volume_found = 1;
- }
- }
+ ret = glusterfs_globals_init (ctx);
+ if (ret)
+ return ret;
- xl_count++; /* Getting this value right is very important */
- trav = trav->next;
- }
-
- ctx->xl_count = xl_count + 1;
-
- if (!fuse_volume_found && (cmd_args->mount_point != NULL)) {
- if ((graph = _add_fuse_mount (graph)) == NULL) {
- /* _add_fuse_mount() prints necessary
- * error message
- */
- fprintf (stderr, "exiting\n");
- gf_log ("glusterfs", GF_LOG_ERROR, "exiting");
- return -1;
- }
- }
-
- /* daemonize now */
- if (!cmd_args->no_daemon_mode) {
- if (daemon (0, 0) == -1) {
- fprintf (stderr, "unable to run in daemon mode: %s",
- strerror (errno));
- gf_log ("glusterfs", GF_LOG_ERROR,
- "unable to run in daemon mode: %s",
- strerror (errno));
- return -1;
- }
-
- /* we are daemon now */
- _gf_dump_details (argc, argv);
-
- /* update pid file, if given */
- if (cmd_args->pid_file != NULL) {
- fprintf (ctx->pidfp, "%d\n", getpid ());
- fflush (ctx->pidfp);
- /* we close pid file on exit */
- }
- } else {
- /*
- * Need to have this line twice because PID is different
- * in daemon and non-daemon cases.
- */
+ THIS->ctx = ctx;
- _gf_dump_details (argc, argv);
- }
+ ret = glusterfs_ctx_defaults_init (ctx);
+ if (ret)
+ goto out;
- gf_log_volume_file (ctx->specfp);
+ ret = parse_cmdline (argc, argv, ctx);
+ if (ret)
+ goto out;
- gf_log ("glusterfs", GF_LOG_DEBUG,
- "running in pid %d", getpid ());
-
- gf_timer_registry_init (ctx);
+ ret = logging_init (ctx, argv[0]);
+ if (ret)
+ goto out;
+
+ /* log the version of glusterfs running here along with the actual
+ command line options. */
+ {
+ int i = 0;
+ strcpy (cmdlinestr, argv[0]);
+ for (i = 1; i < argc; i++) {
+ strcat (cmdlinestr, " ");
+ strcat (cmdlinestr, argv[i]);
+ }
+ gf_log (argv[0], GF_LOG_INFO,
+ "Started running %s version %s (%s)",
+ argv[0], PACKAGE_VERSION, cmdlinestr);
+ }
- /* override xlator options with command line options
- * where applicable
- */
- gf_add_cmdline_options (graph, cmd_args);
+ gf_proc_dump_init();
- ctx->graph = graph;
- if (glusterfs_graph_init (graph, fuse_volume_found) != 0) {
- gf_log ("glusterfs", GF_LOG_ERROR,
- "translator initialization failed. exiting");
- return -1;
- }
+ ret = create_fuse_mount (ctx);
+ if (ret)
+ goto out;
- /* Send PARENT_UP notify to all the translators now */
- graph->notify (graph, GF_EVENT_PARENT_UP, ctx->graph);
+ ret = daemonize (ctx);
+ if (ret)
+ goto out;
- gf_log ("glusterfs", GF_LOG_NORMAL, "Successfully started");
-
- if (cmd_args->log_server) {
- gf_log_central_init (ctx, cmd_args->log_server,
- "socket", cmd_args->log_server_port);
- } else if (cmd_args->volfile_server) {
- gf_log_central_init (ctx, cmd_args->volfile_server,
- "socket", cmd_args->volfile_server_port);
+ ctx->env = syncenv_new (0, 0, 0);
+ if (!ctx->env) {
+ gf_log ("", GF_LOG_ERROR,
+ "Could not create new sync-environment");
+ goto out;
}
- event_dispatch (ctx->event_pool);
+ ret = glusterfs_volumes_init (ctx);
+ if (ret)
+ goto out;
- return 0;
+ ret = event_dispatch (ctx->event_pool);
+
+out:
+// glusterfs_ctx_destroy (ctx);
+
+ return ret;
}
diff --git a/glusterfsd/src/glusterfsd.h b/glusterfsd/src/glusterfsd.h
index d661335e3..9e2a0e56e 100644
--- a/glusterfsd/src/glusterfsd.h
+++ b/glusterfsd/src/glusterfsd.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __GLUSTERFSD_H__
#define __GLUSTERFSD_H__
@@ -24,11 +14,12 @@
#define _CONFIG_H
#include "config.h"
#endif
+#include "rpcsvc.h"
+#include "glusterd1-xdr.h"
-#define DEFAULT_CLIENT_VOLUME_FILE CONFDIR "/glusterfs.vol"
-#define DEFAULT_SERVER_VOLUME_FILE CONFDIR "/glusterfsd.vol"
-#define DEFAULT_LOG_FILE_DIRECTORY DATADIR "/log/glusterfs"
-#define DEFAULT_LOG_LEVEL GF_LOG_NORMAL
+#define DEFAULT_GLUSTERD_VOLFILE CONFDIR "/glusterd.vol"
+#define DEFAULT_CLIENT_VOLFILE CONFDIR "/glusterfs.vol"
+#define DEFAULT_SERVER_VOLFILE CONFDIR "/glusterfsd.vol"
#define DEFAULT_EVENT_POOL_SIZE 16384
@@ -37,47 +28,86 @@
#define ARGP_LOG_LEVEL_CRITICAL_OPTION "CRITICAL"
#define ARGP_LOG_LEVEL_ERROR_OPTION "ERROR"
#define ARGP_LOG_LEVEL_WARNING_OPTION "WARNING"
-#define ARGP_LOG_LEVEL_NORMAL_OPTION "NORMAL"
+#define ARGP_LOG_LEVEL_INFO_OPTION "INFO"
#define ARGP_LOG_LEVEL_DEBUG_OPTION "DEBUG"
#define ENABLE_NO_DAEMON_MODE 1
#define ENABLE_DEBUG_MODE 1
-#define ZR_XLATOR_FUSE "mount/fuse"
-#define ZR_MOUNTPOINT_OPT "mountpoint"
-#define ZR_ATTR_TIMEOUT_OPT "attribute-timeout"
-#define ZR_ENTRY_TIMEOUT_OPT "entry-timeout"
-#define ZR_DIRECT_IO_OPT "direct-io-mode"
-#define ZR_STRICT_VOLFILE_CHECK "strict-volfile-check"
+#define GF_MEMPOOL_COUNT_OF_DICT_T 4096
+/* Considering 4 key/value pairs in a dictionary on an average */
+#define GF_MEMPOOL_COUNT_OF_DATA_T (GF_MEMPOOL_COUNT_OF_DICT_T * 4)
+#define GF_MEMPOOL_COUNT_OF_DATA_PAIR_T (GF_MEMPOOL_COUNT_OF_DICT_T * 4)
enum argp_option_keys {
- ARGP_VOLFILE_SERVER_KEY = 's',
- ARGP_VOLUME_FILE_KEY = 'f',
- ARGP_LOG_LEVEL_KEY = 'L',
- ARGP_LOG_FILE_KEY = 'l',
- ARGP_VOLFILE_SERVER_PORT_KEY = 131,
- ARGP_VOLFILE_SERVER_TRANSPORT_KEY = 132,
- ARGP_PID_FILE_KEY = 'p',
- ARGP_NO_DAEMON_KEY = 'N',
- ARGP_RUN_ID_KEY = 'r',
- ARGP_DEBUG_KEY = 133,
- ARGP_DISABLE_DIRECT_IO_MODE_KEY = 134,
- ARGP_ENTRY_TIMEOUT_KEY = 135,
- ARGP_ATTRIBUTE_TIMEOUT_KEY = 136,
- ARGP_VOLUME_NAME_KEY = 137,
- ARGP_XLATOR_OPTION_KEY = 138,
+ ARGP_VOLFILE_SERVER_KEY = 's',
+ ARGP_VOLUME_FILE_KEY = 'f',
+ ARGP_LOG_LEVEL_KEY = 'L',
+ ARGP_LOG_FILE_KEY = 'l',
+ ARGP_VOLFILE_SERVER_PORT_KEY = 131,
+ ARGP_VOLFILE_SERVER_TRANSPORT_KEY = 132,
+ ARGP_PID_FILE_KEY = 'p',
+ ARGP_SOCK_FILE_KEY = 'S',
+ ARGP_NO_DAEMON_KEY = 'N',
+ ARGP_RUN_ID_KEY = 'r',
+ ARGP_DEBUG_KEY = 133,
+ ARGP_NEGATIVE_TIMEOUT_KEY = 134,
+ ARGP_ENTRY_TIMEOUT_KEY = 135,
+ ARGP_ATTRIBUTE_TIMEOUT_KEY = 136,
+ ARGP_VOLUME_NAME_KEY = 137,
+ ARGP_XLATOR_OPTION_KEY = 138,
+ ARGP_DIRECT_IO_MODE_KEY = 139,
#ifdef GF_DARWIN_HOST_OS
- ARGP_NON_LOCAL_KEY = 139,
+ ARGP_NON_LOCAL_KEY = 140,
#endif /* DARWIN */
- ARGP_VOLFILE_ID_KEY = 143,
- ARGP_VOLFILE_CHECK_KEY = 144,
- ARGP_VOLFILE_MAX_FETCH_ATTEMPTS = 145,
- ARGP_LOG_SERVER_KEY = 146,
- ARGP_LOG_SERVER_PORT_KEY = 147,
+ ARGP_VOLFILE_ID_KEY = 143,
+ ARGP_VOLFILE_CHECK_KEY = 144,
+ ARGP_VOLFILE_MAX_FETCH_ATTEMPTS = 145,
+ ARGP_LOG_SERVER_KEY = 146,
+ ARGP_LOG_SERVER_PORT_KEY = 147,
+ ARGP_READ_ONLY_KEY = 148,
+ ARGP_MAC_COMPAT_KEY = 149,
+ ARGP_DUMP_FUSE_KEY = 150,
+ ARGP_BRICK_NAME_KEY = 151,
+ ARGP_BRICK_PORT_KEY = 152,
+ ARGP_CLIENT_PID_KEY = 153,
+ ARGP_ACL_KEY = 154,
+ ARGP_WORM_KEY = 155,
+ ARGP_USER_MAP_ROOT_KEY = 156,
+ ARGP_MEM_ACCOUNTING_KEY = 157,
+ ARGP_SELINUX_KEY = 158,
+ ARGP_FOPEN_KEEP_CACHE_KEY = 159,
+ ARGP_GID_TIMEOUT_KEY = 160,
+ ARGP_FUSE_BACKGROUND_QLEN_KEY = 161,
+ ARGP_FUSE_CONGESTION_THRESHOLD_KEY = 162,
+ ARGP_INODE32_KEY = 163,
+ ARGP_FUSE_MOUNTOPTS_KEY = 164,
+ ARGP_FUSE_USE_READDIRP_KEY = 165,
+ ARGP_AUX_GFID_MOUNT_KEY = 166,
+};
+
+struct _gfd_vol_top_priv_t {
+ rpcsvc_request_t *req;
+ gd1_mgmt_brick_op_req xlator_req;
+ uint32_t blk_count;
+ uint32_t blk_size;
+ double throughput;
+ double time;
+ int32_t ret;
};
+typedef struct _gfd_vol_top_priv_t gfd_vol_top_priv_t;
-/* Moved here from fetch-spec.h */
-FILE *fetch_spec (glusterfs_ctx_t *ctx);
+int glusterfs_mgmt_pmap_signout (glusterfs_ctx_t *ctx);
+int glusterfs_mgmt_pmap_signin (glusterfs_ctx_t *ctx);
+int glusterfs_volfile_fetch (glusterfs_ctx_t *ctx);
+void cleanup_and_exit (int signum);
+int glusterfs_volume_top_write_perf (uint32_t blk_size, uint32_t blk_count,
+ char *brick_path, double *throughput,
+ double *time);
+int glusterfs_volume_top_read_perf (uint32_t blk_size, uint32_t blk_count,
+ char *brick_path, double *throughput,
+ double *time);
+extern glusterfs_ctx_t *glusterfsd_ctx;
#endif /* __GLUSTERFSD_H__ */
diff --git a/libgfchangelog.pc.in b/libgfchangelog.pc.in
new file mode 100644
index 000000000..d654280d0
--- /dev/null
+++ b/libgfchangelog.pc.in
@@ -0,0 +1,11 @@
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+
+
+Name: libgfchangelog
+Description: GlusterFS Changelog Consumer Library
+Version: @VERSION@
+Libs: -L${libdir} -lgfchangelog -lglusterfs
+Cflags: -I${includedir}/glusterfs/gfchangelog -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64
diff --git a/libglusterfs/src/Makefile.am b/libglusterfs/src/Makefile.am
index a3fb923ca..907399ae6 100644
--- a/libglusterfs/src/Makefile.am
+++ b/libglusterfs/src/Makefile.am
@@ -1,21 +1,56 @@
-libglusterfs_la_CFLAGS = -fPIC -Wall -g -shared -nostartfiles $(GF_CFLAGS) $(GF_DARWIN_LIBGLUSTERFS_CFLAGS)
+libglusterfs_la_CFLAGS = -Wall $(GF_CFLAGS) \
+ $(GF_DARWIN_LIBGLUSTERFS_CFLAGS) \
+ -DDATADIR=\"$(localstatedir)\"
-libglusterfs_la_CPPFLAGS = -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64 -D_GNU_SOURCE -DXLATORDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator\" -DSCHEDULERDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/scheduler\" -DTRANSPORTDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/transport\" -D$(GF_HOST_OS) -DLIBDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/auth\"
+libglusterfs_la_CPPFLAGS = $(GF_CPPFLAGS) -D__USE_FILE_OFFSET64 \
+ -DXLATORDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator\" \
+ -I$(top_srcdir)/rpc/rpc-lib/src/ -I$(CONTRIBDIR)/rbtree
libglusterfs_la_LIBADD = @LEXLIB@
lib_LTLIBRARIES = libglusterfs.la
-libglusterfs_la_SOURCES = dict.c spec.lex.c y.tab.c xlator.c logging.c hashfn.c defaults.c scheduler.c common-utils.c transport.c timer.c inode.c call-stub.c compat.c authenticate.c fd.c compat-errno.c event.c mem-pool.c gf-dirent.c syscall.c iobuf.c globals.c statedump.c stack.c
-
-noinst_HEADERS = common-utils.h defaults.h dict.h glusterfs.h hashfn.h logging.h protocol.h scheduler.h xlator.h transport.h stack.h timer.h list.h inode.h call-stub.h compat.h authenticate.h fd.h revision.h compat-errno.h event.h mem-pool.h byte-order.h gf-dirent.h locking.h syscall.h iobuf.h globals.h statedump.h
-
-EXTRA_DIST = spec.l spec.y
-
-spec.lex.c: spec.l y.tab.h
- $(LEX) -t $(srcdir)/spec.l > $@
-
-y.tab.c y.tab.h: spec.y
- $(YACC) -d $(srcdir)/spec.y
-
-CLEANFILES = spec.lex.c y.tab.c y.tab.h
+CONTRIB_BUILDDIR = $(top_builddir)/contrib
+
+libglusterfs_la_SOURCES = dict.c xlator.c logging.c \
+ hashfn.c defaults.c common-utils.c timer.c inode.c call-stub.c \
+ compat.c fd.c compat-errno.c event.c mem-pool.c gf-dirent.c syscall.c \
+ iobuf.c globals.c statedump.c stack.c checksum.c daemon.c timespec.c \
+ $(CONTRIBDIR)/rbtree/rb.c rbthash.c store.c latency.c \
+ graph.c $(CONTRIBDIR)/uuid/clear.c $(CONTRIBDIR)/uuid/copy.c \
+ $(CONTRIBDIR)/uuid/gen_uuid.c $(CONTRIBDIR)/uuid/pack.c \
+ $(CONTRIBDIR)/uuid/parse.c $(CONTRIBDIR)/uuid/unparse.c \
+ $(CONTRIBDIR)/uuid/uuid_time.c $(CONTRIBDIR)/uuid/compare.c \
+ $(CONTRIBDIR)/uuid/isnull.c $(CONTRIBDIR)/uuid/unpack.c syncop.c \
+ graph-print.c trie.c run.c options.c fd-lk.c circ-buff.c \
+ event-history.c gidcache.c ctx.c client_t.c event-poll.c event-epoll.c \
+ $(CONTRIBDIR)/libgen/basename_r.c $(CONTRIBDIR)/libgen/dirname_r.c \
+ $(CONTRIBDIR)/stdlib/gf_mkostemp.c
+
+
+nodist_libglusterfs_la_SOURCES = y.tab.c graph.lex.c gf-error-codes.h
+
+BUILT_SOURCES = graph.lex.c
+
+noinst_HEADERS = common-utils.h defaults.h dict.h glusterfs.h hashfn.h timespec.h \
+ logging.h xlator.h stack.h timer.h list.h inode.h call-stub.h compat.h \
+ fd.h revision.h compat-errno.h event.h mem-pool.h byte-order.h \
+ gf-dirent.h locking.h syscall.h iobuf.h globals.h statedump.h \
+ checksum.h daemon.h $(CONTRIBDIR)/rbtree/rb.h store.h\
+ rbthash.h iatt.h latency.h mem-types.h $(CONTRIBDIR)/uuid/uuidd.h \
+ $(CONTRIBDIR)/uuid/uuid.h $(CONTRIBDIR)/uuid/uuidP.h \
+ $(CONTRIB_BUILDDIR)/uuid/uuid_types.h syncop.h graph-utils.h trie.h \
+ run.h options.h lkowner.h fd-lk.h circ-buff.h event-history.h \
+ gidcache.h client_t.h glusterfs-acl.h
+
+EXTRA_DIST = graph.l graph.y
+
+graph.lex.c: graph.l y.tab.h
+ $(LEX) -Pgraphyy -t $(srcdir)/graph.l > $@
+
+y.tab.c: y.tab.h
+y.tab.h: graph.y
+ $(YACC) -p graphyy -d $(srcdir)/graph.y
+
+CLEANFILES = graph.lex.c y.tab.c y.tab.h
+CONFIG_CLEAN_FILES = $(CONTRIB_BUILDDIR)/uuid/uuid_types.h
diff --git a/libglusterfs/src/authenticate.c b/libglusterfs/src/authenticate.c
deleted file mode 100644
index 24c840b5e..000000000
--- a/libglusterfs/src/authenticate.c
+++ /dev/null
@@ -1,248 +0,0 @@
-/*
- Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#ifndef _GNU_SOURCE
-#define _GNU_SOURCE
-#endif
-
-#include <stdio.h>
-#include <dlfcn.h>
-#include <errno.h>
-#include "authenticate.h"
-
-static void
-init (dict_t *this,
- char *key,
- data_t *value,
- void *data)
-{
- void *handle = NULL;
- char *auth_file = NULL;
- auth_handle_t *auth_handle = NULL;
- auth_fn_t authenticate = NULL;
- int *error = NULL;
- int ret = 0;
-
- /* It gets over written */
- error = data;
-
- if (!strncasecmp (key, "ip", strlen ("ip"))) {
- gf_log ("authenticate", GF_LOG_ERROR,
- "AUTHENTICATION MODULE \"IP\" HAS BEEN REPLACED "
- "BY \"ADDR\"");
- dict_set (this, key, data_from_dynptr (NULL, 0));
- /* TODO: 1.3.x backword compatibility */
- // *error = -1;
- // return;
- key = "addr";
- }
-
- ret = asprintf (&auth_file, "%s/%s.so", LIBDIR, key);
- if (-1 == ret) {
- gf_log ("authenticate", GF_LOG_ERROR, "asprintf failed");
- dict_set (this, key, data_from_dynptr (NULL, 0));
- *error = -1;
- return;
- }
-
- handle = dlopen (auth_file, RTLD_LAZY);
- if (!handle) {
- gf_log ("authenticate", GF_LOG_ERROR, "dlopen(%s): %s\n",
- auth_file, dlerror ());
- dict_set (this, key, data_from_dynptr (NULL, 0));
- FREE (auth_file);
- *error = -1;
- return;
- }
- FREE (auth_file);
-
- authenticate = dlsym (handle, "gf_auth");
- if (!authenticate) {
- gf_log ("authenticate", GF_LOG_ERROR,
- "dlsym(gf_auth) on %s\n", dlerror ());
- dict_set (this, key, data_from_dynptr (NULL, 0));
- *error = -1;
- return;
- }
-
- auth_handle = CALLOC (1, sizeof (*auth_handle));
- if (!auth_handle) {
- gf_log ("authenticate", GF_LOG_ERROR, "Out of memory");
- dict_set (this, key, data_from_dynptr (NULL, 0));
- *error = -1;
- return;
- }
- auth_handle->vol_opt = CALLOC (1, sizeof (volume_opt_list_t));
- auth_handle->vol_opt->given_opt = dlsym (handle, "options");
- if (auth_handle->vol_opt->given_opt == NULL) {
- gf_log ("authenticate", GF_LOG_DEBUG,
- "volume option validation not specified");
- }
-
- auth_handle->authenticate = authenticate;
- auth_handle->handle = handle;
-
- dict_set (this, key,
- data_from_dynptr (auth_handle, sizeof (*auth_handle)));
-}
-
-static void
-fini (dict_t *this,
- char *key,
- data_t *value,
- void *data)
-{
- auth_handle_t *handle = data_to_ptr (value);
- if (handle) {
- dlclose (handle->handle);
- }
-}
-
-int32_t
-gf_auth_init (xlator_t *xl, dict_t *auth_modules)
-{
- int ret = 0;
- auth_handle_t *handle = NULL;
- data_pair_t *pair = NULL;
- dict_foreach (auth_modules, init, &ret);
- if (!ret) {
- pair = auth_modules->members_list;
- while (pair) {
- handle = data_to_ptr (pair->value);
- if (handle) {
- list_add_tail (&(handle->vol_opt->list),
- &(xl->volume_options));
- if (-1 ==
- validate_xlator_volume_options (xl,
- handle->vol_opt->given_opt)) {
- gf_log ("authenticate", GF_LOG_ERROR,
- "volume option validation "
- "failed");
- ret = -1;
- }
- }
- pair = pair->next;
- }
- }
- if (ret) {
- gf_log (xl->name, GF_LOG_ERROR, "authentication init failed");
- dict_foreach (auth_modules, fini, &ret);
- ret = -1;
- }
- return ret;
-}
-
-static dict_t *__input_params;
-static dict_t *__config_params;
-
-void
-map (dict_t *this,
- char *key,
- data_t *value,
- void *data)
-{
- dict_t *res = data;
- auth_fn_t authenticate;
- auth_handle_t *handle = NULL;
-
- if (value && (handle = data_to_ptr (value)) &&
- (authenticate = handle->authenticate)) {
- dict_set (res, key,
- int_to_data (authenticate (__input_params,
- __config_params)));
- } else {
- dict_set (res, key, int_to_data (AUTH_DONT_CARE));
- }
-}
-
-void
-reduce (dict_t *this,
- char *key,
- data_t *value,
- void *data)
-{
- int64_t val = 0;
- int64_t *res = data;
- if (!data)
- return;
-
- val = data_to_int64 (value);
- switch (val)
- {
- case AUTH_ACCEPT:
- if (AUTH_DONT_CARE == *res)
- *res = AUTH_ACCEPT;
- break;
-
- case AUTH_REJECT:
- *res = AUTH_REJECT;
- break;
-
- case AUTH_DONT_CARE:
- break;
- }
-}
-
-
-auth_result_t
-gf_authenticate (dict_t *input_params,
- dict_t *config_params,
- dict_t *auth_modules)
-{
- dict_t *results = NULL;
- int64_t result = AUTH_DONT_CARE;
-
- results = get_new_dict ();
- __input_params = input_params;
- __config_params = config_params;
-
- dict_foreach (auth_modules, map, results);
-
- dict_foreach (results, reduce, &result);
- if (AUTH_DONT_CARE == result) {
- data_t *peerinfo_data = dict_get (input_params, "peer-info");
- char *name = NULL;
-
- if (peerinfo_data) {
- peer_info_t *peerinfo = data_to_ptr (peerinfo_data);
- name = peerinfo->identifier;
- }
-
- gf_log ("auth", GF_LOG_ERROR,
- "no authentication module is interested in "
- "accepting remote-client %s", name);
- result = AUTH_REJECT;
- }
-
- dict_destroy (results);
- return result;
-}
-
-void
-gf_auth_fini (dict_t *auth_modules)
-{
- int32_t dummy;
-
- dict_foreach (auth_modules, fini, &dummy);
-}
diff --git a/libglusterfs/src/authenticate.h b/libglusterfs/src/authenticate.h
deleted file mode 100644
index 253989b51..000000000
--- a/libglusterfs/src/authenticate.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _AUTHENTICATE_H
-#define _AUTHENTICATE_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#ifndef _GNU_SOURCE
-#define _GNU_SOURCE
-#endif
-
-#include <stdio.h>
-#include <fnmatch.h>
-#include "dict.h"
-#include "compat.h"
-#include "list.h"
-#include "transport.h"
-#include "xlator.h"
-
-typedef enum {
- AUTH_ACCEPT,
- AUTH_REJECT,
- AUTH_DONT_CARE
-} auth_result_t;
-
-typedef auth_result_t (*auth_fn_t) (dict_t *input_params,
- dict_t *config_params);
-
-typedef struct {
- void *handle;
- auth_fn_t authenticate;
- volume_opt_list_t *vol_opt;
-} auth_handle_t;
-
-auth_result_t gf_authenticate (dict_t *input_params,
- dict_t *config_params,
- dict_t *auth_modules);
-int32_t gf_auth_init (xlator_t *xl, dict_t *auth_modules);
-void gf_auth_fini (dict_t *auth_modules);
-
-#endif /* _AUTHENTICATE_H */
diff --git a/libglusterfs/src/byte-order.h b/libglusterfs/src/byte-order.h
index cf93a6738..4101db2c7 100644
--- a/libglusterfs/src/byte-order.h
+++ b/libglusterfs/src/byte-order.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _BYTE_ORDER_H
@@ -38,6 +29,23 @@ static uint64_t (*hton64) (uint64_t);
#define ntoh32 hton32
#define ntoh64 hton64
+static uint16_t (*htole16) (uint16_t);
+static uint32_t (*htole32) (uint32_t);
+static uint64_t (*htole64) (uint64_t);
+
+#define letoh16 htole16
+#define letoh32 htole32
+#define letoh64 htole64
+
+static uint16_t (*htobe16) (uint16_t);
+static uint32_t (*htobe32) (uint32_t);
+static uint64_t (*htobe64) (uint64_t);
+
+#define betoh16 htobe16
+#define betoh32 htobe32
+#define betoh64 htobe64
+
+
#define do_swap2(x) (((x&LS1) << 8)|(((x&MS1) >> 8)))
#define do_swap4(x) ((do_swap2(x&LS2) << 16)|(do_swap2((x&MS2) >> 16)))
#define do_swap8(x) ((do_swap4(x&LS4) << 32)|(do_swap4((x&MS4) >> 32)))
@@ -86,15 +94,17 @@ __noswap64 (uint64_t x)
static inline uint16_t
-__byte_order_init16 (uint16_t i)
+__byte_order_n16 (uint16_t i)
{
uint32_t num = 1;
if (((char *)(&num))[0] == 1) {
+ /* cpu is le */
hton16 = __swap16;
hton32 = __swap32;
hton64 = __swap64;
} else {
+ /* cpu is be */
hton16 = __noswap16;
hton32 = __noswap32;
hton64 = __noswap64;
@@ -105,15 +115,17 @@ __byte_order_init16 (uint16_t i)
static inline uint32_t
-__byte_order_init32 (uint32_t i)
+__byte_order_n32 (uint32_t i)
{
uint32_t num = 1;
if (((char *)(&num))[0] == 1) {
+ /* cpu is le */
hton16 = __swap16;
hton32 = __swap32;
hton64 = __swap64;
} else {
+ /* cpu is be */
hton16 = __noswap16;
hton32 = __noswap32;
hton64 = __noswap64;
@@ -124,15 +136,17 @@ __byte_order_init32 (uint32_t i)
static inline uint64_t
-__byte_order_init64 (uint64_t i)
+__byte_order_n64 (uint64_t i)
{
uint32_t num = 1;
if (((char *)(&num))[0] == 1) {
+ /* cpu is le */
hton16 = __swap16;
hton32 = __swap32;
hton64 = __swap64;
} else {
+ /* cpu is be */
hton16 = __noswap16;
hton32 = __noswap32;
hton64 = __noswap64;
@@ -142,9 +156,146 @@ __byte_order_init64 (uint64_t i)
}
-static uint16_t (*hton16) (uint16_t) = __byte_order_init16;
-static uint32_t (*hton32) (uint32_t) = __byte_order_init32;
-static uint64_t (*hton64) (uint64_t) = __byte_order_init64;
+static uint16_t (*hton16) (uint16_t) = __byte_order_n16;
+static uint32_t (*hton32) (uint32_t) = __byte_order_n32;
+static uint64_t (*hton64) (uint64_t) = __byte_order_n64;
+
+
+static inline uint16_t
+__byte_order_le16 (uint16_t i)
+{
+ uint32_t num = 1;
+
+ if (((char *)(&num))[0] == 1) {
+ /* cpu is le */
+ htole16 = __noswap16;
+ htole32 = __noswap32;
+ htole64 = __noswap64;
+ } else {
+ /* cpu is be */
+ htole16 = __swap16;
+ htole32 = __swap32;
+ htole64 = __swap64;
+ }
+
+ return htole16 (i);
+}
+
+
+static inline uint32_t
+__byte_order_le32 (uint32_t i)
+{
+ uint32_t num = 1;
+
+ if (((char *)(&num))[0] == 1) {
+ /* cpu is le */
+ htole16 = __noswap16;
+ htole32 = __noswap32;
+ htole64 = __noswap64;
+ } else {
+ /* cpu is be */
+ htole16 = __swap16;
+ htole32 = __swap32;
+ htole64 = __swap64;
+ }
+
+ return htole32 (i);
+}
+
+
+static inline uint64_t
+__byte_order_le64 (uint64_t i)
+{
+ uint32_t num = 1;
+
+ if (((char *)(&num))[0] == 1) {
+ /* cpu is le */
+ htole16 = __noswap16;
+ htole32 = __noswap32;
+ htole64 = __noswap64;
+ } else {
+ /* cpu is be */
+ htole16 = __swap16;
+ htole32 = __swap32;
+ htole64 = __swap64;
+ }
+
+ return htole64 (i);
+}
+
+
+static uint16_t (*htole16) (uint16_t) = __byte_order_le16;
+static uint32_t (*htole32) (uint32_t) = __byte_order_le32;
+static uint64_t (*htole64) (uint64_t) = __byte_order_le64;
+
+
+static inline uint16_t
+__byte_order_be16 (uint16_t i)
+{
+ uint32_t num = 1;
+
+ if (((char *)(&num))[0] == 1) {
+ /* cpu is le */
+ htobe16 = __swap16;
+ htobe32 = __swap32;
+ htobe64 = __swap64;
+ } else {
+ /* cpu is be */
+ htobe16 = __noswap16;
+ htobe32 = __noswap32;
+ htobe64 = __noswap64;
+ }
+
+ return htobe16 (i);
+}
+
+
+static inline uint32_t
+__byte_order_be32 (uint32_t i)
+{
+ uint32_t num = 1;
+
+ if (((char *)(&num))[0] == 1) {
+ /* cpu is le */
+ htobe16 = __swap16;
+ htobe32 = __swap32;
+ htobe64 = __swap64;
+ } else {
+ /* cpu is be */
+ htobe16 = __noswap16;
+ htobe32 = __noswap32;
+ htobe64 = __noswap64;
+ }
+
+ return htobe32 (i);
+}
+
+
+static inline uint64_t
+__byte_order_be64 (uint64_t i)
+{
+ uint32_t num = 1;
+
+ if (((char *)(&num))[0] == 1) {
+ /* cpu is le */
+ htobe16 = __swap16;
+ htobe32 = __swap32;
+ htobe64 = __swap64;
+ } else {
+ /* cpu is be */
+ htobe16 = __noswap16;
+ htobe32 = __noswap32;
+ htobe64 = __noswap64;
+ }
+
+ return htobe64 (i);
+}
+
+
+static uint16_t (*htobe16) (uint16_t) = __byte_order_be16;
+static uint32_t (*htobe32) (uint32_t) = __byte_order_be32;
+static uint64_t (*htobe64) (uint64_t) = __byte_order_be64;
+
#endif /* _BYTE_ORDER_H */
diff --git a/libglusterfs/src/call-stub.c b/libglusterfs/src/call-stub.c
index 05f58a700..ac79cf071 100644
--- a/libglusterfs/src/call-stub.c
+++ b/libglusterfs/src/call-stub.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
@@ -22,4150 +13,2826 @@
#include "config.h"
#endif
+#include <openssl/md5.h>
+#include <inttypes.h>
+
#include "call-stub.h"
+#include "mem-types.h"
static call_stub_t *
stub_new (call_frame_t *frame,
- char wind,
- glusterfs_fop_t fop)
+ char wind,
+ glusterfs_fop_t fop)
{
- call_stub_t *new = NULL;
+ call_stub_t *new = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- new = CALLOC (1, sizeof (*new));
- GF_VALIDATE_OR_GOTO ("call-stub", new, out);
+ new = mem_get0 (frame->this->ctx->stub_mem_pool);
+ GF_VALIDATE_OR_GOTO ("call-stub", new, out);
- new->frame = frame;
- new->wind = wind;
- new->fop = fop;
+ new->frame = frame;
+ new->wind = wind;
+ new->fop = fop;
+ new->stub_mem_pool = frame->this->ctx->stub_mem_pool;
+ INIT_LIST_HEAD (&new->list);
- INIT_LIST_HEAD (&new->list);
+ INIT_LIST_HEAD (&new->args_cbk.entries);
out:
- return new;
+ return new;
}
call_stub_t *
-fop_lookup_stub (call_frame_t *frame,
- fop_lookup_t fn,
- loc_t *loc,
- dict_t *xattr_req)
+fop_lookup_stub (call_frame_t *frame, fop_lookup_t fn, loc_t *loc,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
- stub = stub_new (frame, 1, GF_FOP_LOOKUP);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 1, GF_FOP_LOOKUP);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.lookup.fn = fn;
+ stub->fn.lookup = fn;
- if (xattr_req)
- stub->args.lookup.xattr_req = dict_ref (xattr_req);
+ loc_copy (&stub->args.loc, loc);
+ if (xdata)
+ stub->args.xdata = dict_ref (xdata);
- loc_copy (&stub->args.lookup.loc, loc);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_lookup_cbk_stub (call_frame_t *frame,
- fop_lookup_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf,
- dict_t *dict)
+fop_lookup_cbk_stub (call_frame_t *frame, fop_lookup_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf,
+ dict_t *xdata, struct iatt *postparent)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
-
- stub = stub_new (frame, 0, GF_FOP_LOOKUP);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.lookup_cbk.fn = fn;
- stub->args.lookup_cbk.op_ret = op_ret;
- stub->args.lookup_cbk.op_errno = op_errno;
- if (inode)
- stub->args.lookup_cbk.inode = inode_ref (inode);
- if (buf)
- stub->args.lookup_cbk.buf = *buf;
- if (dict)
- stub->args.lookup_cbk.dict = dict_ref (dict);
+ call_stub_t *stub = NULL;
+
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+
+ stub = stub_new (frame, 0, GF_FOP_LOOKUP);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+
+ stub->fn_cbk.lookup = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
+ if (inode)
+ stub->args_cbk.inode = inode_ref (inode);
+ if (buf)
+ stub->args_cbk.stat = *buf;
+ if (postparent)
+ stub->args_cbk.postparent = *postparent;
+ if (xdata)
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_stat_stub (call_frame_t *frame,
- fop_stat_t fn,
- loc_t *loc)
+fop_stat_stub (call_frame_t *frame, fop_stat_t fn,
+ loc_t *loc, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
+ call_stub_t *stub = NULL;
+
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
- stub = stub_new (frame, 1, GF_FOP_STAT);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 1, GF_FOP_STAT);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.stat.fn = fn;
- loc_copy (&stub->args.stat.loc, loc);
+ stub->fn.stat = fn;
+ loc_copy (&stub->args.loc, loc);
+ if (xdata)
+ stub->args.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_stat_cbk_stub (call_frame_t *frame,
- fop_stat_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+fop_stat_cbk_stub (call_frame_t *frame, fop_stat_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *buf, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
-
- stub = stub_new (frame, 0, GF_FOP_STAT);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.stat_cbk.fn = fn;
- stub->args.stat_cbk.op_ret = op_ret;
- stub->args.stat_cbk.op_errno = op_errno;
- if (op_ret == 0)
- stub->args.stat_cbk.buf = *buf;
+ call_stub_t *stub = NULL;
+
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+
+ stub = stub_new (frame, 0, GF_FOP_STAT);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+
+ stub->fn_cbk.stat = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
+ if (op_ret == 0)
+ stub->args_cbk.stat = *buf;
+ if (xdata)
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_fstat_stub (call_frame_t *frame,
- fop_fstat_t fn,
- fd_t *fd)
+fop_fstat_stub (call_frame_t *frame, fop_fstat_t fn,
+ fd_t *fd, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- stub = stub_new (frame, 1, GF_FOP_FSTAT);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 1, GF_FOP_FSTAT);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.fstat.fn = fn;
+ stub->fn.fstat = fn;
- if (fd)
- stub->args.fstat.fd = fd_ref (fd);
+ if (fd)
+ stub->args.fd = fd_ref (fd);
+ if (xdata)
+ stub->args.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_fstat_cbk_stub (call_frame_t *frame,
- fop_fstat_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+fop_fstat_cbk_stub (call_frame_t *frame, fop_fstat_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *buf, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- stub = stub_new (frame, 0, GF_FOP_FSTAT);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 0, GF_FOP_FSTAT);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.fstat_cbk.fn = fn;
- stub->args.fstat_cbk.op_ret = op_ret;
- stub->args.fstat_cbk.op_errno = op_errno;
- if (buf)
- stub->args.fstat_cbk.buf = *buf;
+ stub->fn_cbk.fstat = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
+ if (buf)
+ stub->args_cbk.stat = *buf;
+ if (xdata)
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_chmod_stub (call_frame_t *frame,
- fop_chmod_t fn,
- loc_t *loc,
- mode_t mode)
+fop_truncate_stub (call_frame_t *frame, fop_truncate_t fn,
+ loc_t *loc, off_t off, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
- stub = stub_new (frame, 1, GF_FOP_CHMOD);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 1, GF_FOP_TRUNCATE);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.chmod.fn = fn;
- loc_copy (&stub->args.chmod.loc, loc);
- stub->args.chmod.mode = mode;
+ stub->fn.truncate = fn;
+ loc_copy (&stub->args.loc, loc);
+ stub->args.offset = off;
+ if (xdata)
+ stub->args.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_chmod_cbk_stub (call_frame_t *frame,
- fop_chmod_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+fop_truncate_cbk_stub (call_frame_t *frame, fop_truncate_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- stub = stub_new (frame, 0, GF_FOP_CHMOD);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 0, GF_FOP_TRUNCATE);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.chmod_cbk.fn = fn;
- stub->args.chmod_cbk.op_ret = op_ret;
- stub->args.chmod_cbk.op_errno = op_errno;
- if (buf)
- stub->args.chmod_cbk.buf = *buf;
+ stub->fn_cbk.truncate = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
+ if (prebuf)
+ stub->args_cbk.prestat = *prebuf;
+ if (postbuf)
+ stub->args_cbk.poststat = *postbuf;
+ if (xdata)
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_fchmod_stub (call_frame_t *frame,
- fop_fchmod_t fn,
- fd_t *fd,
- mode_t mode)
+fop_ftruncate_stub (call_frame_t *frame, fop_ftruncate_t fn,
+ fd_t *fd, off_t off, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- stub = stub_new (frame, 1, GF_FOP_FCHMOD);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 1, GF_FOP_FTRUNCATE);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.fchmod.fn = fn;
- if (fd)
- stub->args.fchmod.fd = fd_ref (fd);
- stub->args.fchmod.mode = mode;
+ stub->fn.ftruncate = fn;
+ if (fd)
+ stub->args.fd = fd_ref (fd);
+
+ stub->args.offset = off;
+ if (xdata)
+ stub->args.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_fchmod_cbk_stub (call_frame_t *frame,
- fop_fchmod_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+fop_ftruncate_cbk_stub (call_frame_t *frame, fop_ftruncate_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
+
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ stub = stub_new (frame, 0, GF_FOP_FTRUNCATE);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub = stub_new (frame, 0, GF_FOP_FCHMOD);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub->fn_cbk.ftruncate = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
+ if (prebuf)
+ stub->args_cbk.prestat = *prebuf;
+ if (postbuf)
+ stub->args_cbk.poststat = *postbuf;
+ if (xdata)
+ stub->args_cbk.xdata = dict_ref (xdata);
- stub->args.fchmod_cbk.fn = fn;
- stub->args.fchmod_cbk.op_ret = op_ret;
- stub->args.fchmod_cbk.op_errno = op_errno;
- if (buf)
- stub->args.fchmod_cbk.buf = *buf;
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_chown_stub (call_frame_t *frame,
- fop_chown_t fn,
- loc_t *loc,
- uid_t uid,
- gid_t gid)
+fop_access_stub (call_frame_t *frame, fop_access_t fn,
+ loc_t *loc, int32_t mask, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
- stub = stub_new (frame, 1, GF_FOP_CHOWN);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 1, GF_FOP_ACCESS);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.chown.fn = fn;
- loc_copy (&stub->args.chown.loc, loc);
- stub->args.chown.uid = uid;
- stub->args.chown.gid = gid;
+ stub->fn.access = fn;
+ loc_copy (&stub->args.loc, loc);
+ stub->args.mask = mask;
+ if (xdata)
+ stub->args.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_chown_cbk_stub (call_frame_t *frame,
- fop_chown_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+fop_access_cbk_stub (call_frame_t *frame, fop_access_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- stub = stub_new (frame, 0, GF_FOP_CHOWN);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 0, GF_FOP_ACCESS);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.chown_cbk.fn = fn;
- stub->args.chown_cbk.op_ret = op_ret;
- stub->args.chown_cbk.op_errno = op_errno;
- if (buf)
- stub->args.chown_cbk.buf = *buf;
+ stub->fn_cbk.access = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
+ if (xdata)
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_fchown_stub (call_frame_t *frame,
- fop_fchown_t fn,
- fd_t *fd,
- uid_t uid,
- gid_t gid)
+fop_readlink_stub (call_frame_t *frame, fop_readlink_t fn,
+ loc_t *loc, size_t size, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
- stub = stub_new (frame, 1, GF_FOP_FCHOWN);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 1, GF_FOP_READLINK);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.fchown.fn = fn;
- if (fd)
- stub->args.fchown.fd = fd_ref (fd);
- stub->args.fchown.uid = uid;
- stub->args.fchown.gid = gid;
+ stub->fn.readlink = fn;
+ loc_copy (&stub->args.loc, loc);
+ stub->args.size = size;
+ if (xdata)
+ stub->args.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_fchown_cbk_stub (call_frame_t *frame,
- fop_fchown_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+fop_readlink_cbk_stub (call_frame_t *frame, fop_readlink_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ const char *path, struct iatt *stbuf, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- stub = stub_new (frame, 0, GF_FOP_FCHOWN);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 0, GF_FOP_READLINK);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.fchown_cbk.fn = fn;
- stub->args.fchown_cbk.op_ret = op_ret;
- stub->args.fchown_cbk.op_errno = op_errno;
- if (buf)
- stub->args.fchown_cbk.buf = *buf;
+ stub->fn_cbk.readlink = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
+ if (path)
+ stub->args_cbk.buf = gf_strdup (path);
+ if (stbuf)
+ stub->args_cbk.stat = *stbuf;
+ if (xdata)
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
-/* truncate */
-
call_stub_t *
-fop_truncate_stub (call_frame_t *frame,
- fop_truncate_t fn,
- loc_t *loc,
- off_t off)
+fop_mknod_stub (call_frame_t *frame, fop_mknod_t fn, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
- stub = stub_new (frame, 1, GF_FOP_TRUNCATE);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 1, GF_FOP_MKNOD);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.truncate.fn = fn;
- loc_copy (&stub->args.truncate.loc, loc);
- stub->args.truncate.off = off;
+ stub->fn.mknod = fn;
+ loc_copy (&stub->args.loc, loc);
+ stub->args.mode = mode;
+ stub->args.rdev = rdev;
+ stub->args.umask = umask;
+ if (xdata)
+ stub->args.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_truncate_cbk_stub (call_frame_t *frame,
- fop_truncate_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+fop_mknod_cbk_stub (call_frame_t *frame, fop_mknod_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- stub = stub_new (frame, 0, GF_FOP_TRUNCATE);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 0, GF_FOP_MKNOD);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+
+ stub->fn_cbk.mknod = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
+ if (inode)
+ stub->args_cbk.inode = inode_ref (inode);
+ if (buf)
+ stub->args_cbk.stat = *buf;
+ if (preparent)
+ stub->args_cbk.preparent = *preparent;
+ if (postparent)
+ stub->args_cbk.postparent = *postparent;
+ if (xdata)
+ stub->args_cbk.xdata = dict_ref (xdata);
- stub->args.truncate_cbk.fn = fn;
- stub->args.truncate_cbk.op_ret = op_ret;
- stub->args.truncate_cbk.op_errno = op_errno;
- if (buf)
- stub->args.truncate_cbk.buf = *buf;
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_ftruncate_stub (call_frame_t *frame,
- fop_ftruncate_t fn,
- fd_t *fd,
- off_t off)
+fop_mkdir_stub (call_frame_t *frame, fop_mkdir_t fn,
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
- stub = stub_new (frame, 1, GF_FOP_FTRUNCATE);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 1, GF_FOP_MKDIR);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.ftruncate.fn = fn;
- if (fd)
- stub->args.ftruncate.fd = fd_ref (fd);
+ stub->fn.mkdir = fn;
+ loc_copy (&stub->args.loc, loc);
+ stub->args.mode = mode;
+ stub->args.umask = umask;
- stub->args.ftruncate.off = off;
+ if (xdata)
+ stub->args.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_ftruncate_cbk_stub (call_frame_t *frame,
- fop_ftruncate_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+fop_mkdir_cbk_stub (call_frame_t *frame, fop_mkdir_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- stub = stub_new (frame, 0, GF_FOP_FTRUNCATE);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 0, GF_FOP_MKDIR);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.ftruncate_cbk.fn = fn;
- stub->args.ftruncate_cbk.op_ret = op_ret;
- stub->args.ftruncate_cbk.op_errno = op_errno;
- if (buf)
- stub->args.ftruncate_cbk.buf = *buf;
+ stub->fn_cbk.mkdir = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
+ if (inode)
+ stub->args_cbk.inode = inode_ref (inode);
+ if (buf)
+ stub->args_cbk.stat = *buf;
+ if (preparent)
+ stub->args_cbk.preparent = *preparent;
+ if (postparent)
+ stub->args_cbk.postparent = *postparent;
+ if (xdata)
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_utimens_stub (call_frame_t *frame,
- fop_utimens_t fn,
- loc_t *loc,
- struct timespec tv[2])
+fop_unlink_stub (call_frame_t *frame, fop_unlink_t fn,
+ loc_t *loc, int xflag, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
- stub = stub_new (frame, 1, GF_FOP_UTIMENS);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 1, GF_FOP_UNLINK);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.utimens.fn = fn;
- loc_copy (&stub->args.utimens.loc, loc);
- stub->args.utimens.tv[0] = tv[0];
- stub->args.utimens.tv[1] = tv[1];
+ stub->fn.unlink = fn;
+ loc_copy (&stub->args.loc, loc);
+ stub->args.xflag = xflag;
+ if (xdata)
+ stub->args.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_utimens_cbk_stub (call_frame_t *frame,
- fop_utimens_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+fop_unlink_cbk_stub (call_frame_t *frame, fop_unlink_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- stub = stub_new (frame, 0, GF_FOP_UTIMENS);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 0, GF_FOP_UNLINK);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.utimens_cbk.fn = fn;
- stub->args.utimens_cbk.op_ret = op_ret;
- stub->args.utimens_cbk.op_errno = op_errno;
- if (buf)
- stub->args.utimens_cbk.buf = *buf;
+ stub->fn_cbk.unlink = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
+ if (preparent)
+ stub->args_cbk.preparent = *preparent;
+ if (postparent)
+ stub->args_cbk.postparent = *postparent;
+ if (xdata)
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
+
call_stub_t *
-fop_access_stub (call_frame_t *frame,
- fop_access_t fn,
- loc_t *loc,
- int32_t mask)
+fop_rmdir_stub (call_frame_t *frame, fop_rmdir_t fn,
+ loc_t *loc, int flags, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
- stub = stub_new (frame, 1, GF_FOP_ACCESS);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 1, GF_FOP_RMDIR);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.access.fn = fn;
- loc_copy (&stub->args.access.loc, loc);
- stub->args.access.mask = mask;
+ stub->fn.rmdir = fn;
+ loc_copy (&stub->args.loc, loc);
+ stub->args.flags = flags;
+ if (xdata)
+ stub->args.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_access_cbk_stub (call_frame_t *frame,
- fop_access_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno)
+fop_rmdir_cbk_stub (call_frame_t *frame, fop_rmdir_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- stub = stub_new (frame, 0, GF_FOP_ACCESS);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 0, GF_FOP_RMDIR);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.access_cbk.fn = fn;
- stub->args.access_cbk.op_ret = op_ret;
- stub->args.access_cbk.op_errno = op_errno;
+ stub->fn_cbk.rmdir = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
+ if (preparent)
+ stub->args_cbk.preparent = *preparent;
+ if (postparent)
+ stub->args_cbk.postparent = *postparent;
+ if (xdata)
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_readlink_stub (call_frame_t *frame,
- fop_readlink_t fn,
- loc_t *loc,
- size_t size)
+fop_symlink_stub (call_frame_t *frame, fop_symlink_t fn,
+ const char *linkname, loc_t *loc, mode_t umask, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
+
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", linkname, out);
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
-
- stub = stub_new (frame, 1, GF_FOP_READLINK);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 1, GF_FOP_SYMLINK);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.readlink.fn = fn;
- loc_copy (&stub->args.readlink.loc, loc);
- stub->args.readlink.size = size;
+ stub->fn.symlink = fn;
+ stub->args.linkname = gf_strdup (linkname);
+ stub->args.umask = umask;
+ loc_copy (&stub->args.loc, loc);
+ if (xdata)
+ stub->args.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_readlink_cbk_stub (call_frame_t *frame,
- fop_readlink_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- const char *path)
+fop_symlink_cbk_stub (call_frame_t *frame, fop_symlink_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- stub = stub_new (frame, 0, GF_FOP_READLINK);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 0, GF_FOP_SYMLINK);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.readlink_cbk.fn = fn;
- stub->args.readlink_cbk.op_ret = op_ret;
- stub->args.readlink_cbk.op_errno = op_errno;
- if (path)
- stub->args.readlink_cbk.buf = strdup (path);
+ stub->fn_cbk.symlink = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
+ if (inode)
+ stub->args_cbk.inode = inode_ref (inode);
+ if (buf)
+ stub->args_cbk.stat = *buf;
+ if (preparent)
+ stub->args_cbk.preparent = *preparent;
+ if (postparent)
+ stub->args_cbk.postparent = *postparent;
+ if (xdata)
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_mknod_stub (call_frame_t *frame,
- fop_mknod_t fn,
- loc_t *loc,
- mode_t mode,
- dev_t rdev)
+fop_rename_stub (call_frame_t *frame, fop_rename_t fn,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", oldloc, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", newloc, out);
- stub = stub_new (frame, 1, GF_FOP_MKNOD);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 1, GF_FOP_RENAME);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.mknod.fn = fn;
- loc_copy (&stub->args.mknod.loc, loc);
- stub->args.mknod.mode = mode;
- stub->args.mknod.rdev = rdev;
+ stub->fn.rename = fn;
+ loc_copy (&stub->args.loc, oldloc);
+ loc_copy (&stub->args.loc2, newloc);
+ if (xdata)
+ stub->args.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_mknod_cbk_stub (call_frame_t *frame,
- fop_mknod_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf)
+fop_rename_cbk_stub (call_frame_t *frame, fop_rename_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- stub = stub_new (frame, 0, GF_FOP_MKNOD);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 0, GF_FOP_RENAME);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.mknod_cbk.fn = fn;
- stub->args.mknod_cbk.op_ret = op_ret;
- stub->args.mknod_cbk.op_errno = op_errno;
- if (inode)
- stub->args.mknod_cbk.inode = inode_ref (inode);
- if (buf)
- stub->args.mknod_cbk.buf = *buf;
+ stub->fn_cbk.rename = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
+ if (buf)
+ stub->args_cbk.stat = *buf;
+ if (preoldparent)
+ stub->args_cbk.preparent = *preoldparent;
+ if (postoldparent)
+ stub->args_cbk.postparent = *postoldparent;
+ if (prenewparent)
+ stub->args_cbk.preparent2 = *prenewparent;
+ if (postnewparent)
+ stub->args_cbk.postparent2 = *postnewparent;
+ if (xdata)
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_mkdir_stub (call_frame_t *frame,
- fop_mkdir_t fn,
- loc_t *loc,
- mode_t mode)
+fop_link_stub (call_frame_t *frame, fop_link_t fn,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", oldloc, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", newloc, out);
- stub = stub_new (frame, 1, GF_FOP_MKDIR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 1, GF_FOP_LINK);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.mkdir.fn = fn;
- loc_copy (&stub->args.mkdir.loc, loc);
- stub->args.mkdir.mode = mode;
+ stub->fn.link = fn;
+ loc_copy (&stub->args.loc, oldloc);
+ loc_copy (&stub->args.loc2, newloc);
+
+ if (xdata)
+ stub->args.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_mkdir_cbk_stub (call_frame_t *frame,
- fop_mkdir_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf)
+fop_link_cbk_stub (call_frame_t *frame, fop_link_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- stub = stub_new (frame, 0, GF_FOP_MKDIR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 0, GF_FOP_LINK);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.mkdir_cbk.fn = fn;
- stub->args.mkdir_cbk.op_ret = op_ret;
- stub->args.mkdir_cbk.op_errno = op_errno;
- if (inode)
- stub->args.mkdir_cbk.inode = inode_ref (inode);
- if (buf)
- stub->args.mkdir_cbk.buf = *buf;
+ stub->fn_cbk.link = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
+ if (inode)
+ stub->args_cbk.inode = inode_ref (inode);
+ if (buf)
+ stub->args_cbk.stat = *buf;
+ if (preparent)
+ stub->args_cbk.preparent = *preparent;
+ if (postparent)
+ stub->args_cbk.postparent = *postparent;
+ if (xdata)
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_unlink_stub (call_frame_t *frame,
- fop_unlink_t fn,
- loc_t *loc)
+fop_create_stub (call_frame_t *frame, fop_create_t fn,
+ loc_t *loc, int32_t flags, mode_t mode,
+ mode_t umask, fd_t *fd, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
- stub = stub_new (frame, 1, GF_FOP_UNLINK);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 1, GF_FOP_CREATE);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.unlink.fn = fn;
- loc_copy (&stub->args.unlink.loc, loc);
+ stub->fn.create = fn;
+ loc_copy (&stub->args.loc, loc);
+ stub->args.flags = flags;
+ stub->args.mode = mode;
+ stub->args.umask = umask;
+ if (fd)
+ stub->args.fd = fd_ref (fd);
+ if (xdata)
+ stub->args.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_unlink_cbk_stub (call_frame_t *frame,
- fop_unlink_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno)
+fop_create_cbk_stub (call_frame_t *frame, fop_create_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ fd_t *fd, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- stub = stub_new (frame, 0, GF_FOP_UNLINK);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 0, GF_FOP_CREATE);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.unlink_cbk.fn = fn;
- stub->args.unlink_cbk.op_ret = op_ret;
- stub->args.unlink_cbk.op_errno = op_errno;
+ stub->fn_cbk.create = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
+ if (fd)
+ stub->args_cbk.fd = fd_ref (fd);
+ if (inode)
+ stub->args_cbk.inode = inode_ref (inode);
+ if (buf)
+ stub->args_cbk.stat = *buf;
+ if (preparent)
+ stub->args_cbk.preparent = *preparent;
+ if (postparent)
+ stub->args_cbk.postparent = *postparent;
+ if (xdata)
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_rmdir_stub (call_frame_t *frame,
- fop_rmdir_t fn,
- loc_t *loc)
+fop_open_stub (call_frame_t *frame, fop_open_t fn,
+ loc_t *loc, int32_t flags, fd_t *fd, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
- stub = stub_new (frame, 1, GF_FOP_RMDIR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 1, GF_FOP_OPEN);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.rmdir.fn = fn;
- loc_copy (&stub->args.rmdir.loc, loc);
+ stub->fn.open = fn;
+ loc_copy (&stub->args.loc, loc);
+ stub->args.flags = flags;
+ if (fd)
+ stub->args.fd = fd_ref (fd);
+ if (xdata)
+ stub->args.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_rmdir_cbk_stub (call_frame_t *frame,
- fop_rmdir_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno)
+fop_open_cbk_stub (call_frame_t *frame, fop_open_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ fd_t *fd, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- stub = stub_new (frame, 0, GF_FOP_RMDIR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 0, GF_FOP_OPEN);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.rmdir_cbk.fn = fn;
- stub->args.rmdir_cbk.op_ret = op_ret;
- stub->args.rmdir_cbk.op_errno = op_errno;
+ stub->fn_cbk.open = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
+ if (fd)
+ stub->args_cbk.fd = fd_ref (fd);
+ if (xdata)
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_symlink_stub (call_frame_t *frame,
- fop_symlink_t fn,
- const char *linkname,
- loc_t *loc)
+fop_readv_stub (call_frame_t *frame, fop_readv_t fn,
+ fd_t *fd, size_t size, off_t off, uint32_t flags,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
+
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
- GF_VALIDATE_OR_GOTO ("call-stub", linkname, out);
+ stub = stub_new (frame, 1, GF_FOP_READ);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub = stub_new (frame, 1, GF_FOP_SYMLINK);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub->fn.readv = fn;
+ if (fd)
+ stub->args.fd = fd_ref (fd);
+ stub->args.size = size;
+ stub->args.offset = off;
+ stub->args.flags = flags;
- stub->args.symlink.fn = fn;
- stub->args.symlink.linkname = strdup (linkname);
- loc_copy (&stub->args.symlink.loc, loc);
+ if (xdata)
+ stub->args.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_symlink_cbk_stub (call_frame_t *frame,
- fop_symlink_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf)
+fop_readv_cbk_stub (call_frame_t *frame, fop_readv_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *stbuf,
+ struct iobref *iobref, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- stub = stub_new (frame, 0, GF_FOP_SYMLINK);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 0, GF_FOP_READ);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.symlink_cbk.fn = fn;
- stub->args.symlink_cbk.op_ret = op_ret;
- stub->args.symlink_cbk.op_errno = op_errno;
- if (inode)
- stub->args.symlink_cbk.inode = inode_ref (inode);
- if (buf)
- stub->args.symlink_cbk.buf = *buf;
+ stub->fn_cbk.readv = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
+ if (op_ret >= 0) {
+ stub->args_cbk.vector = iov_dup (vector, count);
+ stub->args_cbk.count = count;
+ stub->args_cbk.stat = *stbuf;
+ stub->args_cbk.iobref = iobref_ref (iobref);
+ }
+ if (xdata)
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_rename_stub (call_frame_t *frame,
- fop_rename_t fn,
- loc_t *oldloc,
- loc_t *newloc)
+fop_writev_stub (call_frame_t *frame, fop_writev_t fn,
+ fd_t *fd, struct iovec *vector, int32_t count, off_t off,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", oldloc, out);
- GF_VALIDATE_OR_GOTO ("call-stub", newloc, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", vector, out);
- stub = stub_new (frame, 1, GF_FOP_RENAME);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 1, GF_FOP_WRITE);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.rename.fn = fn;
- loc_copy (&stub->args.rename.old, oldloc);
- loc_copy (&stub->args.rename.new, newloc);
+ stub->fn.writev = fn;
+ if (fd)
+ stub->args.fd = fd_ref (fd);
+ stub->args.vector = iov_dup (vector, count);
+ stub->args.count = count;
+ stub->args.offset = off;
+ stub->args.flags = flags;
+ stub->args.iobref = iobref_ref (iobref);
+ if (xdata)
+ stub->args.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_rename_cbk_stub (call_frame_t *frame,
- fop_rename_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+fop_writev_cbk_stub (call_frame_t *frame, fop_writev_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- stub = stub_new (frame, 0, GF_FOP_RENAME);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 0, GF_FOP_WRITE);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.rename_cbk.fn = fn;
- stub->args.rename_cbk.op_ret = op_ret;
- stub->args.rename_cbk.op_errno = op_errno;
- if (buf)
- stub->args.rename_cbk.buf = *buf;
+ stub->fn_cbk.writev = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
+ if (op_ret >= 0)
+ stub->args_cbk.poststat = *postbuf;
+ if (prebuf)
+ stub->args_cbk.prestat = *prebuf;
+ if (xdata)
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_link_stub (call_frame_t *frame,
- fop_link_t fn,
- loc_t *oldloc,
- loc_t *newloc)
+fop_flush_stub (call_frame_t *frame, fop_flush_t fn,
+ fd_t *fd, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", oldloc, out);
- GF_VALIDATE_OR_GOTO ("call-stub", newloc, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 1, GF_FOP_LINK);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- stub->args.link.fn = fn;
- loc_copy (&stub->args.link.oldloc, oldloc);
- loc_copy (&stub->args.link.newloc, newloc);
+ stub = stub_new (frame, 1, GF_FOP_FLUSH);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub->fn.flush = fn;
+ if (fd)
+ stub->args.fd = fd_ref (fd);
+ if (xdata)
+ stub->args.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_link_cbk_stub (call_frame_t *frame,
- fop_link_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf)
+fop_flush_cbk_stub (call_frame_t *frame, fop_flush_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- stub = stub_new (frame, 0, GF_FOP_LINK);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 0, GF_FOP_FLUSH);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.link_cbk.fn = fn;
- stub->args.link_cbk.op_ret = op_ret;
- stub->args.link_cbk.op_errno = op_errno;
- if (inode)
- stub->args.link_cbk.inode = inode_ref (inode);
- if (buf)
- stub->args.link_cbk.buf = *buf;
+ stub->fn_cbk.flush = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
+ if (xdata)
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_create_stub (call_frame_t *frame,
- fop_create_t fn,
- loc_t *loc,
- int32_t flags,
- mode_t mode, fd_t *fd)
+fop_fsync_stub (call_frame_t *frame, fop_fsync_t fn,
+ fd_t *fd, int32_t datasync, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- stub = stub_new (frame, 1, GF_FOP_CREATE);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 1, GF_FOP_FSYNC);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.create.fn = fn;
- loc_copy (&stub->args.create.loc, loc);
- stub->args.create.flags = flags;
- stub->args.create.mode = mode;
- if (fd)
- stub->args.create.fd = fd_ref (fd);
+ stub->fn.fsync = fn;
+ if (fd)
+ stub->args.fd = fd_ref (fd);
+ stub->args.datasync = datasync;
+ if (xdata)
+ stub->args.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_create_cbk_stub (call_frame_t *frame,
- fop_create_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd,
- inode_t *inode,
- struct stat *buf)
+fop_fsync_cbk_stub (call_frame_t *frame, fop_fsync_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
-
- stub = stub_new (frame, 0, GF_FOP_CREATE);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.create_cbk.fn = fn;
- stub->args.create_cbk.op_ret = op_ret;
- stub->args.create_cbk.op_errno = op_errno;
- if (fd)
- stub->args.create_cbk.fd = fd_ref (fd);
- if (inode)
- stub->args.create_cbk.inode = inode_ref (inode);
- if (buf)
- stub->args.create_cbk.buf = *buf;
+ call_stub_t *stub = NULL;
+
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+
+ stub = stub_new (frame, 0, GF_FOP_FSYNC);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+
+ stub->fn_cbk.fsync = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
+ if (prebuf)
+ stub->args_cbk.prestat = *prebuf;
+ if (postbuf)
+ stub->args_cbk.poststat = *postbuf;
+ if (xdata)
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_open_stub (call_frame_t *frame,
- fop_open_t fn,
- loc_t *loc,
- int32_t flags, fd_t *fd)
+fop_opendir_stub (call_frame_t *frame, fop_opendir_t fn,
+ loc_t *loc, fd_t *fd, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
- stub = stub_new (frame, 1, GF_FOP_OPEN);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 1, GF_FOP_OPENDIR);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.open.fn = fn;
- loc_copy (&stub->args.open.loc, loc);
- stub->args.open.flags = flags;
- if (fd)
- stub->args.open.fd = fd_ref (fd);
+ stub->fn.opendir = fn;
+ loc_copy (&stub->args.loc, loc);
+ if (fd)
+ stub->args.fd = fd_ref (fd);
+ if (xdata)
+ stub->args.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_open_cbk_stub (call_frame_t *frame,
- fop_open_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-
+fop_opendir_cbk_stub (call_frame_t *frame, fop_opendir_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ fd_t *fd, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
+
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ stub = stub_new (frame, 0, GF_FOP_OPENDIR);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub = stub_new (frame, 0, GF_FOP_OPEN);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub->fn_cbk.opendir = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
- stub->args.open_cbk.fn = fn;
- stub->args.open_cbk.op_ret = op_ret;
- stub->args.open_cbk.op_errno = op_errno;
- if (fd)
- stub->args.open_cbk.fd = fd_ref (fd);
+ if (fd)
+ stub->args_cbk.fd = fd_ref (fd);
+ if (xdata)
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_readv_stub (call_frame_t *frame,
- fop_readv_t fn,
- fd_t *fd,
- size_t size,
- off_t off)
+fop_fsyncdir_stub (call_frame_t *frame, fop_fsyncdir_t fn,
+ fd_t *fd, int32_t datasync, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- stub = stub_new (frame, 1, GF_FOP_READ);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 1, GF_FOP_FSYNCDIR);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.readv.fn = fn;
- if (fd)
- stub->args.readv.fd = fd_ref (fd);
- stub->args.readv.size = size;
- stub->args.readv.off = off;
+ stub->fn.fsyncdir = fn;
+ if (fd)
+ stub->args.fd = fd_ref (fd);
+ stub->args.datasync = datasync;
+ if (xdata)
+ stub->args.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_readv_cbk_stub (call_frame_t *frame,
- fop_readv_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iovec *vector,
- int32_t count,
- struct stat *stbuf,
- struct iobref *iobref)
-
+fop_fsyncdir_cbk_stub (call_frame_t *frame, fop_fsyncdir_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
-
- stub = stub_new (frame, 0, GF_FOP_READ);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.readv_cbk.fn = fn;
- stub->args.readv_cbk.op_ret = op_ret;
- stub->args.readv_cbk.op_errno = op_errno;
- if (op_ret >= 0) {
- stub->args.readv_cbk.vector = iov_dup (vector, count);
- stub->args.readv_cbk.count = count;
- stub->args.readv_cbk.stbuf = *stbuf;
- stub->args.readv_cbk.iobref = iobref_ref (iobref);
- }
+ call_stub_t *stub = NULL;
+
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+
+ stub = stub_new (frame, 0, GF_FOP_FSYNCDIR);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+
+ stub->fn_cbk.fsyncdir = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
+ if (xdata)
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_writev_stub (call_frame_t *frame,
- fop_writev_t fn,
- fd_t *fd,
- struct iovec *vector,
- int32_t count,
- off_t off,
- struct iobref *iobref)
+fop_statfs_stub (call_frame_t *frame, fop_statfs_t fn,
+ loc_t *loc, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", vector, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
- stub = stub_new (frame, 1, GF_FOP_WRITE);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 1, GF_FOP_STATFS);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.writev.fn = fn;
- if (fd)
- stub->args.writev.fd = fd_ref (fd);
- stub->args.writev.vector = iov_dup (vector, count);
- stub->args.writev.count = count;
- stub->args.writev.off = off;
- stub->args.writev.iobref = iobref_ref (iobref);
+ stub->fn.statfs = fn;
+ loc_copy (&stub->args.loc, loc);
+ if (xdata)
+ stub->args.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_writev_cbk_stub (call_frame_t *frame,
- fop_writev_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *stbuf)
-
+fop_statfs_cbk_stub (call_frame_t *frame, fop_statfs_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ struct statvfs *buf, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- stub = stub_new (frame, 0, GF_FOP_WRITE);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 0, GF_FOP_STATFS);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.writev_cbk.fn = fn;
- stub->args.writev_cbk.op_ret = op_ret;
- stub->args.writev_cbk.op_errno = op_errno;
- if (op_ret >= 0)
- stub->args.writev_cbk.stbuf = *stbuf;
+ stub->fn_cbk.statfs = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
+ if (op_ret == 0)
+ stub->args_cbk.statvfs = *buf;
+ if (xdata)
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_flush_stub (call_frame_t *frame,
- fop_flush_t fn,
- fd_t *fd)
+fop_setxattr_stub (call_frame_t *frame, fop_setxattr_t fn,
+ loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
- stub = stub_new (frame, 1, GF_FOP_FLUSH);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 1, GF_FOP_SETXATTR);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.flush.fn = fn;
- if (fd)
- stub->args.flush.fd = fd_ref (fd);
+ stub->fn.setxattr = fn;
+ loc_copy (&stub->args.loc, loc);
+ /* TODO */
+ if (dict)
+ stub->args.xattr = dict_ref (dict);
+ stub->args.flags = flags;
+ if (xdata)
+ stub->args.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_flush_cbk_stub (call_frame_t *frame,
- fop_flush_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno)
-
+fop_setxattr_cbk_stub (call_frame_t *frame,
+ fop_setxattr_cbk_t fn,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- stub = stub_new (frame, 0, GF_FOP_FLUSH);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 0, GF_FOP_SETXATTR);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.flush_cbk.fn = fn;
- stub->args.flush_cbk.op_ret = op_ret;
- stub->args.flush_cbk.op_errno = op_errno;
+ stub->fn_cbk.setxattr = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
+ if (xdata)
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
-
-
call_stub_t *
-fop_fsync_stub (call_frame_t *frame,
- fop_fsync_t fn,
- fd_t *fd,
- int32_t datasync)
+fop_getxattr_stub (call_frame_t *frame, fop_getxattr_t fn,
+ loc_t *loc, const char *name, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
+
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ stub = stub_new (frame, 1, GF_FOP_GETXATTR);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub = stub_new (frame, 1, GF_FOP_FSYNC);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub->fn.getxattr = fn;
+ loc_copy (&stub->args.loc, loc);
- stub->args.fsync.fn = fn;
- if (fd)
- stub->args.fsync.fd = fd_ref (fd);
- stub->args.fsync.datasync = datasync;
+ if (name)
+ stub->args.name = gf_strdup (name);
+ if (xdata)
+ stub->args.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_fsync_cbk_stub (call_frame_t *frame,
- fop_fsync_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno)
-
+fop_getxattr_cbk_stub (call_frame_t *frame, fop_getxattr_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- stub = stub_new (frame, 0, GF_FOP_FSYNC);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 0, GF_FOP_GETXATTR);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.fsync_cbk.fn = fn;
- stub->args.fsync_cbk.op_ret = op_ret;
- stub->args.fsync_cbk.op_errno = op_errno;
+ stub->fn_cbk.getxattr = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
+ /* TODO */
+ if (dict)
+ stub->args_cbk.xattr = dict_ref (dict);
+ if (xdata)
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_opendir_stub (call_frame_t *frame,
- fop_opendir_t fn,
- loc_t *loc, fd_t *fd)
+fop_fsetxattr_stub (call_frame_t *frame, fop_fsetxattr_t fn,
+ fd_t *fd, dict_t *dict, int32_t flags, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", fd, out);
- stub = stub_new (frame, 1, GF_FOP_OPENDIR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 1, GF_FOP_FSETXATTR);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.opendir.fn = fn;
- loc_copy (&stub->args.opendir.loc, loc);
- if (fd)
- stub->args.opendir.fd = fd_ref (fd);
+ stub->fn.fsetxattr = fn;
+ stub->args.fd = fd_ref (fd);
+
+ if (dict)
+ stub->args.xattr = dict_ref (dict);
+ stub->args.flags = flags;
+ if (xdata)
+ stub->args.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_opendir_cbk_stub (call_frame_t *frame,
- fop_opendir_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-
+fop_fsetxattr_cbk_stub (call_frame_t *frame, fop_fsetxattr_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 0, GF_FOP_OPENDIR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- stub->args.opendir_cbk.fn = fn;
- stub->args.opendir_cbk.op_ret = op_ret;
- stub->args.opendir_cbk.op_errno = op_errno;
+ stub = stub_new (frame, 0, GF_FOP_FSETXATTR);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- if (fd)
- stub->args.opendir_cbk.fd = fd_ref (fd);
+ stub->fn_cbk.fsetxattr = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
+ if (xdata)
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_getdents_stub (call_frame_t *frame,
- fop_getdents_t fn,
- fd_t *fd,
- size_t size,
- off_t off,
- int32_t flag)
+fop_fgetxattr_stub (call_frame_t *frame, fop_fgetxattr_t fn,
+ fd_t *fd, const char *name, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", fd, out);
- stub = stub_new (frame, 1, GF_FOP_GETDENTS);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 1, GF_FOP_FGETXATTR);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.getdents.fn = fn;
- stub->args.getdents.size = size;
- stub->args.getdents.off = off;
- if (fd)
- stub->args.getdents.fd = fd_ref (fd);
- stub->args.getdents.flag = flag;
+ stub->fn.fgetxattr = fn;
+ stub->args.fd = fd_ref (fd);
+
+ if (name)
+ stub->args.name = gf_strdup (name);
+ if (xdata)
+ stub->args.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_getdents_cbk_stub (call_frame_t *frame,
- fop_getdents_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entries,
- int32_t count)
-
+fop_fgetxattr_cbk_stub (call_frame_t *frame, fop_fgetxattr_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- stub = stub_new (frame, 0, GF_FOP_GETDENTS);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 0, GF_FOP_GETXATTR);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.getdents_cbk.fn = fn;
- stub->args.getdents_cbk.op_ret = op_ret;
- stub->args.getdents_cbk.op_errno = op_errno;
- if (op_ret >= 0) {
- stub->args.getdents_cbk.entries.next = entries->next;
- /* FIXME: are entries not needed in the caller after
- * creating stub? */
- entries->next = NULL;
- }
+ stub->fn_cbk.fgetxattr = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
- stub->args.getdents_cbk.count = count;
+ if (dict)
+ stub->args_cbk.xattr = dict_ref (dict);
+ if (xdata)
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_fsyncdir_stub (call_frame_t *frame,
- fop_fsyncdir_t fn,
- fd_t *fd,
- int32_t datasync)
+fop_removexattr_stub (call_frame_t *frame, fop_removexattr_t fn,
+ loc_t *loc, const char *name, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", name, out);
- stub = stub_new (frame, 1, GF_FOP_FSYNCDIR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 1, GF_FOP_REMOVEXATTR);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.fsyncdir.fn = fn;
- if (fd)
- stub->args.fsyncdir.fd = fd_ref (fd);
- stub->args.fsyncdir.datasync = datasync;
+ stub->fn.removexattr = fn;
+ loc_copy (&stub->args.loc, loc);
+ stub->args.name = gf_strdup (name);
+ if (xdata)
+ stub->args.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_fsyncdir_cbk_stub (call_frame_t *frame,
- fop_fsyncdir_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno)
-
+fop_removexattr_cbk_stub (call_frame_t *frame, fop_removexattr_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- stub = stub_new (frame, 0, GF_FOP_FSYNCDIR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 0, GF_FOP_REMOVEXATTR);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.fsyncdir_cbk.fn = fn;
- stub->args.fsyncdir_cbk.op_ret = op_ret;
- stub->args.fsyncdir_cbk.op_errno = op_errno;
+ stub->fn_cbk.removexattr = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
+ if (xdata)
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_statfs_stub (call_frame_t *frame,
- fop_statfs_t fn,
- loc_t *loc)
+fop_fremovexattr_stub (call_frame_t *frame, fop_fremovexattr_t fn,
+ fd_t *fd, const char *name, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", fd, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", name, out);
- stub = stub_new (frame, 1, GF_FOP_STATFS);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 1, GF_FOP_FREMOVEXATTR);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.statfs.fn = fn;
- loc_copy (&stub->args.statfs.loc, loc);
+ stub->fn.fremovexattr = fn;
+ stub->args.fd = fd_ref (fd);
+ stub->args.name = gf_strdup (name);
+ if (xdata)
+ stub->args.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_statfs_cbk_stub (call_frame_t *frame,
- fop_statfs_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct statvfs *buf)
-
+fop_fremovexattr_cbk_stub (call_frame_t *frame, fop_fremovexattr_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- stub = stub_new (frame, 0, GF_FOP_STATFS);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 0, GF_FOP_FREMOVEXATTR);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.statfs_cbk.fn = fn;
- stub->args.statfs_cbk.op_ret = op_ret;
- stub->args.statfs_cbk.op_errno = op_errno;
- if (op_ret == 0)
- stub->args.statfs_cbk.buf = *buf;
+ stub->fn_cbk.fremovexattr = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
+ if (xdata)
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_setxattr_stub (call_frame_t *frame,
- fop_setxattr_t fn,
- loc_t *loc,
- dict_t *dict,
- int32_t flags)
+fop_lk_stub (call_frame_t *frame, fop_lk_t fn,
+ fd_t *fd, int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", lock, out);
- stub = stub_new (frame, 1, GF_FOP_SETXATTR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 1, GF_FOP_LK);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.setxattr.fn = fn;
- loc_copy (&stub->args.setxattr.loc, loc);
- /* TODO */
- if (dict)
- stub->args.setxattr.dict = dict_ref (dict);
- stub->args.setxattr.flags = flags;
+ stub->fn.lk = fn;
+ if (fd)
+ stub->args.fd = fd_ref (fd);
+ stub->args.cmd = cmd;
+ stub->args.lock = *lock;
+ if (xdata)
+ stub->args.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_setxattr_cbk_stub (call_frame_t *frame,
- fop_setxattr_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno)
+fop_lk_cbk_stub (call_frame_t *frame, fop_lk_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ struct gf_flock *lock, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- stub = stub_new (frame, 0, GF_FOP_SETXATTR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 0, GF_FOP_LK);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.setxattr_cbk.fn = fn;
- stub->args.setxattr_cbk.op_ret = op_ret;
- stub->args.setxattr_cbk.op_errno = op_errno;
+ stub->fn_cbk.lk = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
+ if (op_ret == 0)
+ stub->args_cbk.lock = *lock;
+ if (xdata)
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
+
call_stub_t *
-fop_getxattr_stub (call_frame_t *frame,
- fop_getxattr_t fn,
- loc_t *loc,
- const char *name)
+fop_inodelk_stub (call_frame_t *frame, fop_inodelk_t fn,
+ const char *volume, loc_t *loc, int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", lock, out);
- stub = stub_new (frame, 1, GF_FOP_GETXATTR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 1, GF_FOP_INODELK);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.getxattr.fn = fn;
- loc_copy (&stub->args.getxattr.loc, loc);
+ stub->fn.inodelk = fn;
- if (name)
- stub->args.getxattr.name = strdup (name);
+ if (volume)
+ stub->args.volume = gf_strdup (volume);
+
+ loc_copy (&stub->args.loc, loc);
+ stub->args.cmd = cmd;
+ stub->args.lock = *lock;
+ if (xdata)
+ stub->args.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_getxattr_cbk_stub (call_frame_t *frame,
- fop_getxattr_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict)
+fop_inodelk_cbk_stub (call_frame_t *frame, fop_inodelk_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
+
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ stub = stub_new (frame, 0, GF_FOP_INODELK);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub = stub_new (frame, 0, GF_FOP_GETXATTR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub->fn_cbk.inodelk = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
- stub->args.getxattr_cbk.fn = fn;
- stub->args.getxattr_cbk.op_ret = op_ret;
- stub->args.getxattr_cbk.op_errno = op_errno;
- /* TODO */
- if (dict)
- stub->args.getxattr_cbk.dict = dict_ref (dict);
+ if (xdata)
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_fsetxattr_stub (call_frame_t *frame,
- fop_fsetxattr_t fn,
- fd_t *fd,
- dict_t *dict,
- int32_t flags)
+fop_finodelk_stub (call_frame_t *frame, fop_finodelk_t fn,
+ const char *volume, fd_t *fd, int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
+
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", lock, out);
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", fd, out);
+ stub = stub_new (frame, 1, GF_FOP_FINODELK);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub = stub_new (frame, 1, GF_FOP_FSETXATTR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub->fn.finodelk = fn;
- stub->args.fsetxattr.fn = fn;
- stub->args.fsetxattr.fd = fd_ref (fd);
+ if (fd)
+ stub->args.fd = fd_ref (fd);
- /* TODO */
- if (dict)
- stub->args.fsetxattr.dict = dict_ref (dict);
- stub->args.fsetxattr.flags = flags;
+ if (volume)
+ stub->args.volume = gf_strdup (volume);
+
+ stub->args.cmd = cmd;
+ stub->args.lock = *lock;
+
+ if (xdata)
+ stub->args.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_fsetxattr_cbk_stub (call_frame_t *frame,
- fop_fsetxattr_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno)
+fop_finodelk_cbk_stub (call_frame_t *frame, fop_inodelk_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- stub = stub_new (frame, 0, GF_FOP_FSETXATTR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 0, GF_FOP_FINODELK);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.fsetxattr_cbk.fn = fn;
- stub->args.fsetxattr_cbk.op_ret = op_ret;
- stub->args.fsetxattr_cbk.op_errno = op_errno;
+ stub->fn_cbk.finodelk = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
+
+ if (xdata)
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_fgetxattr_stub (call_frame_t *frame,
- fop_fgetxattr_t fn,
- fd_t *fd,
- const char *name)
+fop_entrylk_stub (call_frame_t *frame, fop_entrylk_t fn,
+ const char *volume, loc_t *loc, const char *name,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
+
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", fd, out);
+ stub = stub_new (frame, 1, GF_FOP_ENTRYLK);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub = stub_new (frame, 1, GF_FOP_FGETXATTR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub->fn.entrylk = fn;
- stub->args.fgetxattr.fn = fn;
- stub->args.fgetxattr.fd = fd_ref (fd);
+ if (volume)
+ stub->args.volume = gf_strdup (volume);
- if (name)
- stub->args.fgetxattr.name = strdup (name);
+ loc_copy (&stub->args.loc, loc);
+
+ stub->args.entrylkcmd = cmd;
+ stub->args.entrylktype = type;
+
+ if (name)
+ stub->args.name = gf_strdup (name);
+
+ if (xdata)
+ stub->args.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_fgetxattr_cbk_stub (call_frame_t *frame,
- fop_fgetxattr_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict)
+fop_entrylk_cbk_stub (call_frame_t *frame, fop_entrylk_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- stub = stub_new (frame, 0, GF_FOP_GETXATTR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 0, GF_FOP_ENTRYLK);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.fgetxattr_cbk.fn = fn;
- stub->args.fgetxattr_cbk.op_ret = op_ret;
- stub->args.fgetxattr_cbk.op_errno = op_errno;
+ stub->fn_cbk.entrylk = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
- /* TODO */
- if (dict)
- stub->args.fgetxattr_cbk.dict = dict_ref (dict);
+ if (xdata)
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_removexattr_stub (call_frame_t *frame,
- fop_removexattr_t fn,
- loc_t *loc,
- const char *name)
+fop_fentrylk_stub (call_frame_t *frame, fop_fentrylk_t fn,
+ const char *volume, fd_t *fd, const char *name,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
- GF_VALIDATE_OR_GOTO ("call-stub", name, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- stub = stub_new (frame, 1, GF_FOP_REMOVEXATTR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 1, GF_FOP_FENTRYLK);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.removexattr.fn = fn;
- loc_copy (&stub->args.removexattr.loc, loc);
- stub->args.removexattr.name = strdup (name);
+ stub->fn.fentrylk = fn;
+
+ if (volume)
+ stub->args.volume = gf_strdup (volume);
+
+ if (fd)
+ stub->args.fd = fd_ref (fd);
+ stub->args.entrylkcmd = cmd;
+ stub->args.entrylktype = type;
+ if (name)
+ stub->args.name = gf_strdup (name);
+
+ if (xdata)
+ stub->args.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_removexattr_cbk_stub (call_frame_t *frame,
- fop_removexattr_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno)
+fop_fentrylk_cbk_stub (call_frame_t *frame, fop_fentrylk_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
+
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ stub = stub_new (frame, 0, GF_FOP_FENTRYLK);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub = stub_new (frame, 0, GF_FOP_REMOVEXATTR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub->fn_cbk.fentrylk = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
- stub->args.removexattr_cbk.fn = fn;
- stub->args.removexattr_cbk.op_ret = op_ret;
- stub->args.removexattr_cbk.op_errno = op_errno;
+ if (xdata)
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_lk_stub (call_frame_t *frame,
- fop_lk_t fn,
- fd_t *fd,
- int32_t cmd,
- struct flock *lock)
+fop_readdirp_cbk_stub (call_frame_t *frame, fop_readdirp_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ gf_dirent_t *entries, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", lock, out);
-
- stub = stub_new (frame, 1, GF_FOP_LK);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.lk.fn = fn;
- if (fd)
- stub->args.lk.fd = fd_ref (fd);
- stub->args.lk.cmd = cmd;
- stub->args.lk.lock = *lock;
-out:
- return stub;
-}
+ call_stub_t *stub = NULL;
+ gf_dirent_t *stub_entry = NULL, *entry = NULL;
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
-call_stub_t *
-fop_lk_cbk_stub (call_frame_t *frame,
- fop_lk_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct flock *lock)
+ stub = stub_new (frame, 0, GF_FOP_READDIRP);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-{
- call_stub_t *stub = NULL;
+ stub->fn_cbk.readdirp = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", entries, out);
- stub = stub_new (frame, 0, GF_FOP_LK);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.lk_cbk.fn = fn;
- stub->args.lk_cbk.op_ret = op_ret;
- stub->args.lk_cbk.op_errno = op_errno;
- if (op_ret == 0)
- stub->args.lk_cbk.lock = *lock;
+ if (op_ret > 0) {
+ list_for_each_entry (entry, &entries->list, list) {
+ stub_entry = gf_dirent_for_name (entry->d_name);
+ if (!stub_entry)
+ goto out;
+ stub_entry->d_off = entry->d_off;
+ stub_entry->d_ino = entry->d_ino;
+ stub_entry->d_stat = entry->d_stat;
+ if (entry->inode)
+ stub_entry->inode = inode_ref (entry->inode);
+ list_add_tail (&stub_entry->list,
+ &stub->args_cbk.entries.list);
+ }
+ }
+ if (xdata)
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
+
call_stub_t *
-fop_inodelk_stub (call_frame_t *frame, fop_inodelk_t fn,
- const char *volume, loc_t *loc, int32_t cmd, struct flock *lock)
+fop_readdir_cbk_stub (call_frame_t *frame, fop_readdir_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ gf_dirent_t *entries, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
+ gf_dirent_t *stub_entry = NULL, *entry = NULL;
- if (!frame || !lock)
- return NULL;
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- stub = stub_new (frame, 1, GF_FOP_INODELK);
- if (!stub)
- return NULL;
+ stub = stub_new (frame, 0, GF_FOP_READDIR);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.inodelk.fn = fn;
+ stub->fn_cbk.readdir = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
- if (volume)
- stub->args.inodelk.volume = strdup (volume);
+ GF_VALIDATE_OR_GOTO ("call-stub", entries, out);
- loc_copy (&stub->args.inodelk.loc, loc);
- stub->args.inodelk.cmd = cmd;
- stub->args.inodelk.lock = *lock;
+ if (op_ret > 0) {
+ list_for_each_entry (entry, &entries->list, list) {
+ stub_entry = gf_dirent_for_name (entry->d_name);
+ if (!stub_entry)
+ goto out;
+ stub_entry->d_off = entry->d_off;
+ stub_entry->d_ino = entry->d_ino;
- return stub;
+ list_add_tail (&stub_entry->list,
+ &stub->args_cbk.entries.list);
+ }
+ }
+ if (xdata)
+ stub->args_cbk.xdata = dict_ref (xdata);
+out:
+ return stub;
}
+
call_stub_t *
-fop_inodelk_cbk_stub (call_frame_t *frame, fop_inodelk_cbk_t fn,
- int32_t op_ret, int32_t op_errno)
+fop_readdir_stub (call_frame_t *frame, fop_readdir_t fn,
+ fd_t *fd, size_t size,
+ off_t off, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- if (!frame)
- return NULL;
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 0, GF_FOP_INODELK);
- if (!stub)
- return NULL;
+ stub = stub_new (frame, 1, GF_FOP_READDIR);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.inodelk_cbk.fn = fn;
- stub->args.inodelk_cbk.op_ret = op_ret;
- stub->args.inodelk_cbk.op_errno = op_errno;
+ stub->fn.readdir = fn;
+ stub->args.fd = fd_ref (fd);
+ stub->args.size = size;
+ stub->args.offset = off;
- return stub;
+ if (xdata)
+ stub->args.xdata = dict_ref (xdata);
+out:
+ return stub;
}
call_stub_t *
-fop_finodelk_stub (call_frame_t *frame, fop_finodelk_t fn,
- const char *volume, fd_t *fd, int32_t cmd, struct flock *lock)
+fop_readdirp_stub (call_frame_t *frame, fop_readdirp_t fn,
+ fd_t *fd, size_t size, off_t off, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- if (!frame || !lock)
- return NULL;
+ stub = stub_new (frame, 1, GF_FOP_READDIRP);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub = stub_new (frame, 1, GF_FOP_FINODELK);
- if (!stub)
- return NULL;
+ stub->fn.readdirp = fn;
+ stub->args.fd = fd_ref (fd);
+ stub->args.size = size;
+ stub->args.offset = off;
+ if (xdata)
+ stub->args.xdata = dict_ref (xdata);
+out:
+ return stub;
+}
- stub->args.finodelk.fn = fn;
- if (fd)
- stub->args.finodelk.fd = fd_ref (fd);
+call_stub_t *
+fop_rchecksum_stub (call_frame_t *frame, fop_rchecksum_t fn,
+ fd_t *fd, off_t offset, int32_t len, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
- if (volume)
- stub->args.finodelk.volume = strdup (volume);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", fd, out);
- stub->args.finodelk.cmd = cmd;
- stub->args.finodelk.lock = *lock;
+ stub = stub_new (frame, 1, GF_FOP_RCHECKSUM);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- return stub;
+ stub->fn.rchecksum = fn;
+ stub->args.fd = fd_ref (fd);
+ stub->args.offset = offset;
+ stub->args.size = len;
+ if (xdata)
+ stub->args.xdata = dict_ref (xdata);
+out:
+ return stub;
}
call_stub_t *
-fop_finodelk_cbk_stub (call_frame_t *frame, fop_inodelk_cbk_t fn,
- int32_t op_ret, int32_t op_errno)
+fop_rchecksum_cbk_stub (call_frame_t *frame, fop_rchecksum_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ uint32_t weak_checksum, uint8_t *strong_checksum,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- if (!frame)
- return NULL;
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- stub = stub_new (frame, 0, GF_FOP_FINODELK);
- if (!stub)
- return NULL;
+ stub = stub_new (frame, 0, GF_FOP_RCHECKSUM);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.finodelk_cbk.fn = fn;
- stub->args.finodelk_cbk.op_ret = op_ret;
- stub->args.finodelk_cbk.op_errno = op_errno;
+ stub->fn_cbk.rchecksum = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
- return stub;
+ if (op_ret >= 0) {
+ stub->args_cbk.weak_checksum =
+ weak_checksum;
+ stub->args_cbk.strong_checksum =
+ memdup (strong_checksum, MD5_DIGEST_LENGTH);
+ }
+
+ if (xdata)
+ stub->args_cbk.xdata = dict_ref (xdata);
+out:
+ return stub;
}
call_stub_t *
-fop_entrylk_stub (call_frame_t *frame, fop_entrylk_t fn,
- const char *volume, loc_t *loc, const char *name,
- entrylk_cmd cmd, entrylk_type type)
+fop_xattrop_cbk_stub (call_frame_t *frame, fop_xattrop_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- if (!frame)
- return NULL;
-
- stub = stub_new (frame, 1, GF_FOP_ENTRYLK);
- if (!stub)
- return NULL;
+ call_stub_t *stub = NULL;
- stub->args.entrylk.fn = fn;
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- if (volume)
- stub->args.entrylk.volume = strdup (volume);
+ stub = stub_new (frame, 0, GF_FOP_XATTROP);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- loc_copy (&stub->args.entrylk.loc, loc);
+ stub->fn_cbk.xattrop = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
- stub->args.entrylk.cmd = cmd;
- stub->args.entrylk.type = type;
- if (name)
- stub->args.entrylk.name = strdup (name);
-
- return stub;
+ if (xdata)
+ stub->args_cbk.xdata = dict_ref (xdata);
+out:
+ return stub;
}
+
call_stub_t *
-fop_entrylk_cbk_stub (call_frame_t *frame, fop_entrylk_cbk_t fn,
- int32_t op_ret, int32_t op_errno)
+fop_fxattrop_cbk_stub (call_frame_t *frame, fop_fxattrop_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ dict_t *xattr, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- if (!frame)
- return NULL;
+ call_stub_t *stub = NULL;
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- stub = stub_new (frame, 0, GF_FOP_ENTRYLK);
- if (!stub)
- return NULL;
+ stub = stub_new (frame, 0, GF_FOP_FXATTROP);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.entrylk_cbk.fn = fn;
- stub->args.entrylk_cbk.op_ret = op_ret;
- stub->args.entrylk_cbk.op_errno = op_errno;
+ stub->fn_cbk.fxattrop = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
+ if (xattr)
+ stub->args_cbk.xattr = dict_ref (xattr);
- return stub;
+ if (xdata)
+ stub->args_cbk.xdata = dict_ref (xdata);
+out:
+ return stub;
}
call_stub_t *
-fop_fentrylk_stub (call_frame_t *frame, fop_fentrylk_t fn,
- const char *volume, fd_t *fd, const char *name,
- entrylk_cmd cmd, entrylk_type type)
+fop_xattrop_stub (call_frame_t *frame, fop_xattrop_t fn,
+ loc_t *loc, gf_xattrop_flags_t optype,
+ dict_t *xattr, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- if (!frame)
- return NULL;
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", xattr, out);
- stub = stub_new (frame, 1, GF_FOP_FENTRYLK);
- if (!stub)
- return NULL;
+ stub = stub_new (frame, 1, GF_FOP_XATTROP);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.fentrylk.fn = fn;
+ stub->fn.xattrop = fn;
- if (volume)
- stub->args.fentrylk.volume = strdup (volume);
+ loc_copy (&stub->args.loc, loc);
- if (fd)
- stub->args.fentrylk.fd = fd_ref (fd);
- stub->args.fentrylk.cmd = cmd;
- stub->args.fentrylk.type = type;
- if (name)
- stub->args.fentrylk.name = strdup (name);
+ stub->args.optype = optype;
+ stub->args.xattr = dict_ref (xattr);
- return stub;
+ if (xdata)
+ stub->args.xdata = dict_ref (xdata);
+out:
+ return stub;
}
+
call_stub_t *
-fop_fentrylk_cbk_stub (call_frame_t *frame, fop_fentrylk_cbk_t fn,
- int32_t op_ret, int32_t op_errno)
+fop_fxattrop_stub (call_frame_t *frame, fop_fxattrop_t fn,
+ fd_t *fd, gf_xattrop_flags_t optype,
+ dict_t *xattr, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- if (!frame)
- return NULL;
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", xattr, out);
- stub = stub_new (frame, 0, GF_FOP_FENTRYLK);
- if (!stub)
- return NULL;
+ stub = stub_new (frame, 1, GF_FOP_FXATTROP);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.fentrylk_cbk.fn = fn;
- stub->args.fentrylk_cbk.op_ret = op_ret;
- stub->args.fentrylk_cbk.op_errno = op_errno;
+ stub->fn.fxattrop = fn;
- return stub;
-}
+ stub->args.fd = fd_ref (fd);
+ stub->args.optype = optype;
+ stub->args.xattr = dict_ref (xattr);
-call_stub_t *
-fop_setdents_stub (call_frame_t *frame,
- fop_setdents_t fn,
- fd_t *fd,
- int32_t flags,
- dir_entry_t *entries,
- int32_t count)
-{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
-
- stub = stub_new (frame, 1, GF_FOP_SETDENTS);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- if (fd)
- stub->args.setdents.fd = fd_ref (fd);
- stub->args.setdents.fn = fn;
- stub->args.setdents.flags = flags;
- stub->args.setdents.count = count;
- if (entries) {
- stub->args.setdents.entries.next = entries->next;
- entries->next = NULL;
- }
+ if (xdata)
+ stub->args.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
+
call_stub_t *
-fop_setdents_cbk_stub (call_frame_t *frame,
- fop_setdents_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno)
-{
- call_stub_t *stub = NULL;
+fop_setattr_cbk_stub (call_frame_t *frame, fop_setattr_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *statpre, struct iatt *statpost,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- stub = stub_new (frame, 0, GF_FOP_SETDENTS);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 0, GF_FOP_SETATTR);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.setdents_cbk.fn = fn;
- stub->args.setdents_cbk.op_ret = op_ret;
- stub->args.setdents_cbk.op_errno = op_errno;
-out:
- return stub;
-
-}
+ stub->fn_cbk.setattr = fn;
-call_stub_t *
-fop_readdir_cbk_stub (call_frame_t *frame,
- fop_readdir_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- gf_dirent_t *entries)
-{
- call_stub_t *stub = NULL;
- gf_dirent_t *stub_entry = NULL, *entry = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
-
- stub = stub_new (frame, 0, GF_FOP_READDIR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.readdir_cbk.fn = fn;
- stub->args.readdir_cbk.op_ret = op_ret;
- stub->args.readdir_cbk.op_errno = op_errno;
- INIT_LIST_HEAD (&stub->args.readdir_cbk.entries.list);
-
- /* This check must come after the init of head above
- * so we're sure the list is empty for list_empty.
- */
- if (!entries)
- goto out;
-
- if (op_ret > 0) {
- list_for_each_entry (entry, &entries->list, list) {
- stub_entry = gf_dirent_for_name (entry->d_name);
- ERR_ABORT (stub_entry);
- stub_entry->d_off = entry->d_off;
- stub_entry->d_ino = entry->d_ino;
-
- list_add_tail (&stub_entry->list,
- &stub->args.readdir_cbk.entries.list);
- }
- }
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
+
+ if (statpre)
+ stub->args_cbk.prestat = *statpre;
+ if (statpost)
+ stub->args_cbk.poststat = *statpost;
+
+ if (xdata)
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
+
call_stub_t *
-fop_readdir_stub (call_frame_t *frame,
- fop_readdir_t fn,
- fd_t *fd,
- size_t size,
- off_t off)
+fop_fsetattr_cbk_stub (call_frame_t *frame, fop_setattr_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *statpre, struct iatt *statpost,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 1, GF_FOP_READDIR);
- stub->args.readdir.fn = fn;
- stub->args.readdir.fd = fd_ref (fd);
- stub->args.readdir.size = size;
- stub->args.readdir.off = off;
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- return stub;
-}
-call_stub_t *
-fop_checksum_stub (call_frame_t *frame,
- fop_checksum_t fn,
- loc_t *loc,
- int32_t flags)
-{
- call_stub_t *stub = NULL;
+ stub = stub_new (frame, 0, GF_FOP_FSETATTR);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
+ stub->fn_cbk.fsetattr = fn;
- stub = stub_new (frame, 1, GF_FOP_CHECKSUM);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
- stub->args.checksum.fn = fn;
- loc_copy (&stub->args.checksum.loc, loc);
- stub->args.checksum.flags = flags;
+ if (statpre)
+ stub->args_cbk.prestat = *statpre;
+ if (statpost)
+ stub->args_cbk.poststat = *statpost;
+ if (xdata)
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_checksum_cbk_stub (call_frame_t *frame,
- fop_checksum_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- uint8_t *file_checksum,
- uint8_t *dir_checksum)
+fop_setattr_stub (call_frame_t *frame, fop_setattr_t fn,
+ loc_t *loc, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
+
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", fn, out);
+
+ stub = stub_new (frame, 1, GF_FOP_SETATTR);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+
+ stub->fn.setattr = fn;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ loc_copy (&stub->args.loc, loc);
- stub = stub_new (frame, 0, GF_FOP_CHECKSUM);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ if (stbuf)
+ stub->args.stat = *stbuf;
- stub->args.checksum_cbk.fn = fn;
- stub->args.checksum_cbk.op_ret = op_ret;
- stub->args.checksum_cbk.op_errno = op_errno;
- if (op_ret >= 0)
- {
- stub->args.checksum_cbk.file_checksum =
- memdup (file_checksum, NAME_MAX);
+ stub->args.valid = valid;
- stub->args.checksum_cbk.dir_checksum =
- memdup (dir_checksum, NAME_MAX);
- }
+ if (xdata)
+ stub->args.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_xattrop_cbk_stub (call_frame_t *frame,
- fop_xattrop_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno)
+fop_fsetattr_stub (call_frame_t *frame, fop_fsetattr_t fn,
+ fd_t *fd, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
-
- stub = stub_new (frame, 0, GF_FOP_XATTROP);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", fn, out);
- stub->args.xattrop_cbk.fn = fn;
- stub->args.xattrop_cbk.op_ret = op_ret;
- stub->args.xattrop_cbk.op_errno = op_errno;
+ stub = stub_new (frame, 1, GF_FOP_FSETATTR);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-out:
- return stub;
-}
+ stub->fn.fsetattr = fn;
+ if (fd)
+ stub->args.fd = fd_ref (fd);
-call_stub_t *
-fop_fxattrop_cbk_stub (call_frame_t *frame,
- fop_fxattrop_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *xattr)
-{
- call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ if (stbuf)
+ stub->args.stat = *stbuf;
- stub = stub_new (frame, 1, GF_FOP_FXATTROP);
- stub->args.fxattrop_cbk.fn = fn;
- stub->args.fxattrop_cbk.op_ret = op_ret;
- stub->args.fxattrop_cbk.op_errno = op_errno;
- if (xattr)
- stub->args.fxattrop_cbk.xattr = dict_ref (xattr);
+ stub->args.valid = valid;
+ if (xdata)
+ stub->args.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_xattrop_stub (call_frame_t *frame,
- fop_xattrop_t fn,
- loc_t *loc,
- gf_xattrop_flags_t optype,
- dict_t *xattr)
+fop_fallocate_cbk_stub(call_frame_t *frame, fop_fallocate_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *statpre, struct iatt *statpost,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- if (!frame || !xattr)
- return NULL;
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- stub = stub_new (frame, 1, GF_FOP_XATTROP);
- if (!stub)
- return NULL;
+ stub = stub_new (frame, 0, GF_FOP_FALLOCATE);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.xattrop.fn = fn;
-
- loc_copy (&stub->args.xattrop.loc, loc);
+ stub->fn_cbk.fallocate = fn;
- stub->args.xattrop.optype = optype;
- stub->args.xattrop.xattr = dict_ref (xattr);
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
- return stub;
+ if (statpre)
+ stub->args_cbk.prestat = *statpre;
+ if (statpost)
+ stub->args_cbk.poststat = *statpost;
+ if (xdata)
+ stub->args_cbk.xdata = dict_ref (xdata);
+out:
+ return stub;
}
call_stub_t *
-fop_fxattrop_stub (call_frame_t *frame,
- fop_fxattrop_t fn,
- fd_t *fd,
- gf_xattrop_flags_t optype,
- dict_t *xattr)
+fop_fallocate_stub(call_frame_t *frame, fop_fallocate_t fn, fd_t *fd,
+ int32_t mode, off_t offset, size_t len, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- if (!frame || !xattr)
- return NULL;
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", fn, out);
- stub = stub_new (frame, 1, GF_FOP_FXATTROP);
- if (!stub)
- return NULL;
+ stub = stub_new (frame, 1, GF_FOP_FALLOCATE);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.fxattrop.fn = fn;
-
- stub->args.fxattrop.fd = fd_ref (fd);
+ stub->fn.fallocate = fn;
- stub->args.fxattrop.optype = optype;
- stub->args.fxattrop.xattr = dict_ref (xattr);
+ if (fd)
+ stub->args.fd = fd_ref (fd);
- return stub;
-}
+ stub->args.flags = mode;
+ stub->args.offset = offset;
+ stub->args.size = len;
+
+ if (xdata)
+ stub->args.xdata = dict_ref (xdata);
+out:
+ return stub;
+}
call_stub_t *
-fop_lock_notify_cbk_stub (call_frame_t *frame, fop_lock_notify_cbk_t fn,
- int32_t op_ret, int32_t op_errno)
+fop_discard_cbk_stub(call_frame_t *frame, fop_discard_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *statpre, struct iatt *statpost,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
+
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
-
- stub = stub_new (frame, 0, GF_FOP_LOCK_NOTIFY);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new (frame, 0, GF_FOP_DISCARD);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.lock_notify_cbk.fn = fn;
- stub->args.lock_notify_cbk.op_ret = op_ret;
- stub->args.lock_notify_cbk.op_errno = op_errno;
+ stub->fn_cbk.discard = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
+
+ if (statpre)
+ stub->args_cbk.prestat = *statpre;
+ if (statpost)
+ stub->args_cbk.poststat = *statpost;
+ if (xdata)
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_lock_notify_stub (call_frame_t *frame, fop_lock_notify_t fn,
- loc_t *loc, int32_t timeout)
+fop_discard_stub(call_frame_t *frame, fop_discard_t fn, fd_t *fd,
+ off_t offset, size_t len, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- if (!frame)
- return NULL;
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", fn, out);
- stub = stub_new (frame, 1, GF_FOP_LOCK_NOTIFY);
- if (!stub)
- return NULL;
+ stub = stub_new (frame, 1, GF_FOP_DISCARD);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.lock_notify.fn = fn;
-
- loc_copy (&stub->args.lock_notify.loc, loc);
+ stub->fn.discard = fn;
- stub->args.lock_notify.timeout = timeout;
+ if (fd)
+ stub->args.fd = fd_ref (fd);
- return stub;
-}
+ stub->args.offset = offset;
+ stub->args.size = len;
+
+ if (xdata)
+ stub->args.xdata = dict_ref (xdata);
+out:
+ return stub;
+}
call_stub_t *
-fop_lock_fnotify_cbk_stub (call_frame_t *frame, fop_lock_fnotify_cbk_t fn,
- int32_t op_ret, int32_t op_errno)
+fop_zerofill_cbk_stub(call_frame_t *frame, fop_zerofill_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *statpre, struct iatt *statpost,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
+
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+
+ stub = stub_new (frame, 0, GF_FOP_ZEROFILL);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
-
- stub = stub_new (frame, 0, GF_FOP_LOCK_FNOTIFY);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub->fn_cbk.zerofill = fn;
- stub->args.lock_fnotify_cbk.fn = fn;
- stub->args.lock_fnotify_cbk.op_ret = op_ret;
- stub->args.lock_fnotify_cbk.op_errno = op_errno;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
+ if (statpre)
+ stub->args_cbk.prestat = *statpre;
+ if (statpost)
+ stub->args_cbk.poststat = *statpost;
+ if (xdata)
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_lock_fnotify_stub (call_frame_t *frame, fop_lock_fnotify_t fn,
- fd_t *fd, int32_t timeout)
+fop_zerofill_stub(call_frame_t *frame, fop_zerofill_t fn, fd_t *fd,
+ off_t offset, size_t len, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
+
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", fn, out);
- if (!frame)
- return NULL;
+ stub = stub_new (frame, 1, GF_FOP_ZEROFILL);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub = stub_new (frame, 1, GF_FOP_LOCK_FNOTIFY);
- if (!stub)
- return NULL;
+ stub->fn.zerofill = fn;
- stub->args.lock_fnotify.fn = fn;
+ if (fd)
+ stub->args.fd = fd_ref (fd);
- stub->args.lock_fnotify.fd = fd_ref (fd);
- stub->args.lock_fnotify.timeout = timeout;
+ stub->args.offset = offset;
+ stub->args.size = len;
+
+ if (xdata)
+ stub->args.xdata = dict_ref (xdata);
+out:
+ return stub;
- return stub;
}
static void
call_resume_wind (call_stub_t *stub)
{
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- switch (stub->fop) {
- case GF_FOP_OPEN:
- {
- stub->args.open.fn (stub->frame,
- stub->frame->this,
- &stub->args.open.loc,
- stub->args.open.flags, stub->args.open.fd);
- break;
- }
- case GF_FOP_CREATE:
- {
- stub->args.create.fn (stub->frame,
- stub->frame->this,
- &stub->args.create.loc,
- stub->args.create.flags,
- stub->args.create.mode,
- stub->args.create.fd);
- break;
- }
- case GF_FOP_STAT:
- {
- stub->args.stat.fn (stub->frame,
- stub->frame->this,
- &stub->args.stat.loc);
- break;
- }
- case GF_FOP_READLINK:
- {
- stub->args.readlink.fn (stub->frame,
- stub->frame->this,
- &stub->args.readlink.loc,
- stub->args.readlink.size);
- break;
- }
-
- case GF_FOP_MKNOD:
- {
- stub->args.mknod.fn (stub->frame,
- stub->frame->this,
- &stub->args.mknod.loc,
- stub->args.mknod.mode,
- stub->args.mknod.rdev);
- }
- break;
-
- case GF_FOP_MKDIR:
- {
- stub->args.mkdir.fn (stub->frame,
- stub->frame->this,
- &stub->args.mkdir.loc,
- stub->args.mkdir.mode);
- }
- break;
-
- case GF_FOP_UNLINK:
- {
- stub->args.unlink.fn (stub->frame,
- stub->frame->this,
- &stub->args.unlink.loc);
- }
- break;
-
- case GF_FOP_RMDIR:
- {
- stub->args.rmdir.fn (stub->frame,
- stub->frame->this,
- &stub->args.rmdir.loc);
- }
- break;
-
- case GF_FOP_SYMLINK:
- {
- stub->args.symlink.fn (stub->frame,
- stub->frame->this,
- stub->args.symlink.linkname,
- &stub->args.symlink.loc);
- }
- break;
-
- case GF_FOP_RENAME:
- {
- stub->args.rename.fn (stub->frame,
- stub->frame->this,
- &stub->args.rename.old,
- &stub->args.rename.new);
- }
- break;
-
- case GF_FOP_LINK:
- {
- stub->args.link.fn (stub->frame,
- stub->frame->this,
- &stub->args.link.oldloc,
- &stub->args.link.newloc);
- }
- break;
-
- case GF_FOP_CHMOD:
- {
- stub->args.chmod.fn (stub->frame,
- stub->frame->this,
- &stub->args.chmod.loc,
- stub->args.chmod.mode);
- }
- break;
-
- case GF_FOP_CHOWN:
- {
- stub->args.chown.fn (stub->frame,
- stub->frame->this,
- &stub->args.chown.loc,
- stub->args.chown.uid,
- stub->args.chown.gid);
- break;
- }
- case GF_FOP_TRUNCATE:
- {
- stub->args.truncate.fn (stub->frame,
- stub->frame->this,
- &stub->args.truncate.loc,
- stub->args.truncate.off);
- break;
- }
-
- case GF_FOP_READ:
- {
- stub->args.readv.fn (stub->frame,
- stub->frame->this,
- stub->args.readv.fd,
- stub->args.readv.size,
- stub->args.readv.off);
- break;
- }
-
- case GF_FOP_WRITE:
- {
- stub->args.writev.fn (stub->frame,
- stub->frame->this,
- stub->args.writev.fd,
- stub->args.writev.vector,
- stub->args.writev.count,
- stub->args.writev.off,
- stub->args.writev.iobref);
- break;
- }
-
- case GF_FOP_STATFS:
- {
- stub->args.statfs.fn (stub->frame,
- stub->frame->this,
- &stub->args.statfs.loc);
- break;
- }
- case GF_FOP_FLUSH:
- {
- stub->args.flush.fn (stub->frame,
- stub->frame->this,
- stub->args.flush.fd);
- break;
- }
-
- case GF_FOP_FSYNC:
- {
- stub->args.fsync.fn (stub->frame,
- stub->frame->this,
- stub->args.fsync.fd,
- stub->args.fsync.datasync);
- break;
- }
-
- case GF_FOP_SETXATTR:
- {
- stub->args.setxattr.fn (stub->frame,
- stub->frame->this,
- &stub->args.setxattr.loc,
- stub->args.setxattr.dict,
- stub->args.setxattr.flags);
- break;
- }
-
- case GF_FOP_GETXATTR:
- {
- stub->args.getxattr.fn (stub->frame,
- stub->frame->this,
- &stub->args.getxattr.loc,
- stub->args.getxattr.name);
- break;
- }
-
- case GF_FOP_FSETXATTR:
- {
- stub->args.fsetxattr.fn (stub->frame,
- stub->frame->this,
- stub->args.fsetxattr.fd,
- stub->args.fsetxattr.dict,
- stub->args.fsetxattr.flags);
- break;
- }
-
- case GF_FOP_FGETXATTR:
- {
- stub->args.fgetxattr.fn (stub->frame,
- stub->frame->this,
- stub->args.fgetxattr.fd,
- stub->args.fgetxattr.name);
- break;
- }
-
- case GF_FOP_REMOVEXATTR:
- {
- stub->args.removexattr.fn (stub->frame,
- stub->frame->this,
- &stub->args.removexattr.loc,
- stub->args.removexattr.name);
- break;
- }
-
- case GF_FOP_OPENDIR:
- {
- stub->args.opendir.fn (stub->frame,
- stub->frame->this,
- &stub->args.opendir.loc,
- stub->args.opendir.fd);
- break;
- }
-
- case GF_FOP_GETDENTS:
- {
- stub->args.getdents.fn (stub->frame,
- stub->frame->this,
- stub->args.getdents.fd,
- stub->args.getdents.size,
- stub->args.getdents.off,
- stub->args.getdents.flag);
- break;
- }
-
- case GF_FOP_FSYNCDIR:
- {
- stub->args.fsyncdir.fn (stub->frame,
- stub->frame->this,
- stub->args.fsyncdir.fd,
- stub->args.fsyncdir.datasync);
- break;
- }
-
- case GF_FOP_ACCESS:
- {
- stub->args.access.fn (stub->frame,
- stub->frame->this,
- &stub->args.access.loc,
- stub->args.access.mask);
- break;
- }
-
- case GF_FOP_FTRUNCATE:
- {
- stub->args.ftruncate.fn (stub->frame,
- stub->frame->this,
- stub->args.ftruncate.fd,
- stub->args.ftruncate.off);
- break;
- }
-
- case GF_FOP_FSTAT:
- {
- stub->args.fstat.fn (stub->frame,
- stub->frame->this,
- stub->args.fstat.fd);
- break;
- }
-
- case GF_FOP_LK:
- {
- stub->args.lk.fn (stub->frame,
- stub->frame->this,
- stub->args.lk.fd,
- stub->args.lk.cmd,
- &stub->args.lk.lock);
- break;
- }
-
- case GF_FOP_INODELK:
- {
- stub->args.inodelk.fn (stub->frame,
- stub->frame->this,
- stub->args.inodelk.volume,
- &stub->args.inodelk.loc,
- stub->args.inodelk.cmd,
- &stub->args.inodelk.lock);
- break;
- }
-
- case GF_FOP_FINODELK:
- {
- stub->args.finodelk.fn (stub->frame,
- stub->frame->this,
- stub->args.finodelk.volume,
- stub->args.finodelk.fd,
- stub->args.finodelk.cmd,
- &stub->args.finodelk.lock);
- break;
- }
-
- case GF_FOP_ENTRYLK:
- {
- stub->args.entrylk.fn (stub->frame,
- stub->frame->this,
- stub->args.entrylk.volume,
- &stub->args.entrylk.loc,
- stub->args.entrylk.name,
- stub->args.entrylk.cmd,
- stub->args.entrylk.type);
- break;
- }
-
- case GF_FOP_FENTRYLK:
- {
- stub->args.fentrylk.fn (stub->frame,
- stub->frame->this,
- stub->args.fentrylk.volume,
- stub->args.fentrylk.fd,
- stub->args.fentrylk.name,
- stub->args.fentrylk.cmd,
- stub->args.fentrylk.type);
- break;
- }
-
- case GF_FOP_UTIMENS:
- {
- stub->args.utimens.fn (stub->frame,
- stub->frame->this,
- &stub->args.utimens.loc,
- stub->args.utimens.tv);
- break;
- }
-
-
- break;
- case GF_FOP_FCHMOD:
- {
- stub->args.fchmod.fn (stub->frame,
- stub->frame->this,
- stub->args.fchmod.fd,
- stub->args.fchmod.mode);
- break;
- }
-
- case GF_FOP_FCHOWN:
- {
- stub->args.fchown.fn (stub->frame,
- stub->frame->this,
- stub->args.fchown.fd,
- stub->args.fchown.uid,
- stub->args.fchown.gid);
- break;
- }
-
- case GF_FOP_LOOKUP:
- {
- stub->args.lookup.fn (stub->frame,
- stub->frame->this,
- &stub->args.lookup.loc,
- stub->args.lookup.xattr_req);
- break;
- }
-
- case GF_FOP_SETDENTS:
- {
- stub->args.setdents.fn (stub->frame,
- stub->frame->this,
- stub->args.setdents.fd,
- stub->args.setdents.flags,
- &stub->args.setdents.entries,
- stub->args.setdents.count);
- break;
- }
-
- case GF_FOP_CHECKSUM:
- {
- stub->args.checksum.fn (stub->frame,
- stub->frame->this,
- &stub->args.checksum.loc,
- stub->args.checksum.flags);
- break;
- }
- case GF_FOP_READDIR:
- {
- stub->args.readdir.fn (stub->frame,
- stub->frame->this,
- stub->args.readdir.fd,
- stub->args.readdir.size,
- stub->args.readdir.off);
- break;
- }
- case GF_FOP_XATTROP:
- {
- stub->args.xattrop.fn (stub->frame,
- stub->frame->this,
- &stub->args.xattrop.loc,
- stub->args.xattrop.optype,
- stub->args.xattrop.xattr);
-
- break;
- }
- case GF_FOP_FXATTROP:
- {
- stub->args.fxattrop.fn (stub->frame,
- stub->frame->this,
- stub->args.fxattrop.fd,
- stub->args.fxattrop.optype,
- stub->args.fxattrop.xattr);
-
- break;
- }
- case GF_FOP_LOCK_NOTIFY:
- {
- stub->args.lock_notify.fn (stub->frame,
- stub->frame->this,
- &stub->args.lock_notify.loc,
- stub->args.lock_notify.timeout);
- break;
- }
- case GF_FOP_LOCK_FNOTIFY:
- {
- stub->args.lock_fnotify.fn (stub->frame,
- stub->frame->this,
- stub->args.lock_fnotify.fd,
- stub->args.lock_fnotify.timeout);
- break;
- }
- default:
- {
- gf_log ("call-stub",
- GF_LOG_DEBUG,
- "Invalid value of FOP");
- }
- break;
- }
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+
+ switch (stub->fop) {
+ case GF_FOP_OPEN:
+ stub->fn.open (stub->frame, stub->frame->this,
+ &stub->args.loc, stub->args.flags,
+ stub->args.fd, stub->args.xdata);
+ break;
+ case GF_FOP_CREATE:
+ stub->fn.create (stub->frame, stub->frame->this,
+ &stub->args.loc, stub->args.flags,
+ stub->args.mode, stub->args.umask,
+ stub->args.fd, stub->args.xdata);
+ break;
+ case GF_FOP_STAT:
+ stub->fn.stat (stub->frame, stub->frame->this,
+ &stub->args.loc, stub->args.xdata);
+ break;
+ case GF_FOP_READLINK:
+ stub->fn.readlink (stub->frame, stub->frame->this,
+ &stub->args.loc, stub->args.size,
+ stub->args.xdata);
+ break;
+ case GF_FOP_MKNOD:
+ stub->fn.mknod (stub->frame, stub->frame->this,
+ &stub->args.loc, stub->args.mode,
+ stub->args.rdev, stub->args.umask,
+ stub->args.xdata);
+ break;
+ case GF_FOP_MKDIR:
+ stub->fn.mkdir (stub->frame, stub->frame->this,
+ &stub->args.loc, stub->args.mode,
+ stub->args.umask, stub->args.xdata);
+ break;
+ case GF_FOP_UNLINK:
+ stub->fn.unlink (stub->frame, stub->frame->this,
+ &stub->args.loc, stub->args.xflag,
+ stub->args.xdata);
+ break;
+ case GF_FOP_RMDIR:
+ stub->fn.rmdir (stub->frame, stub->frame->this,
+ &stub->args.loc, stub->args.flags,
+ stub->args.xdata);
+ break;
+ case GF_FOP_SYMLINK:
+ stub->fn.symlink (stub->frame, stub->frame->this,
+ stub->args.linkname, &stub->args.loc,
+ stub->args.umask, stub->args.xdata);
+ break;
+ case GF_FOP_RENAME:
+ stub->fn.rename (stub->frame, stub->frame->this,
+ &stub->args.loc, &stub->args.loc2,
+ stub->args.xdata);
+ break;
+ case GF_FOP_LINK:
+ stub->fn.link (stub->frame, stub->frame->this,
+ &stub->args.loc, &stub->args.loc2,
+ stub->args.xdata);
+ break;
+ case GF_FOP_TRUNCATE:
+ stub->fn.truncate (stub->frame, stub->frame->this,
+ &stub->args.loc, stub->args.offset,
+ stub->args.xdata);
+ break;
+ case GF_FOP_READ:
+ stub->fn.readv (stub->frame, stub->frame->this,
+ stub->args.fd, stub->args.size,
+ stub->args.offset, stub->args.flags,
+ stub->args.xdata);
+ break;
+ case GF_FOP_WRITE:
+ stub->fn.writev (stub->frame, stub->frame->this,
+ stub->args.fd, stub->args.vector,
+ stub->args.count, stub->args.offset,
+ stub->args.flags, stub->args.iobref,
+ stub->args.xdata);
+ break;
+ case GF_FOP_STATFS:
+ stub->fn.statfs (stub->frame, stub->frame->this,
+ &stub->args.loc, stub->args.xdata);
+ break;
+ case GF_FOP_FLUSH:
+ stub->fn.flush (stub->frame, stub->frame->this,
+ stub->args.fd, stub->args.xdata);
+ break;
+ case GF_FOP_FSYNC:
+ stub->fn.fsync (stub->frame, stub->frame->this,
+ stub->args.fd, stub->args.datasync,
+ stub->args.xdata);
+ break;
+ case GF_FOP_SETXATTR:
+ stub->fn.setxattr (stub->frame, stub->frame->this,
+ &stub->args.loc, stub->args.xattr,
+ stub->args.flags, stub->args.xdata);
+ break;
+ case GF_FOP_GETXATTR:
+ stub->fn.getxattr (stub->frame, stub->frame->this,
+ &stub->args.loc, stub->args.name,
+ stub->args.xdata);
+ break;
+ case GF_FOP_FSETXATTR:
+ stub->fn.fsetxattr (stub->frame, stub->frame->this,
+ stub->args.fd, stub->args.xattr,
+ stub->args.flags, stub->args.xdata);
+ break;
+ case GF_FOP_FGETXATTR:
+ stub->fn.fgetxattr (stub->frame, stub->frame->this,
+ stub->args.fd, stub->args.name,
+ stub->args.xdata);
+ break;
+ case GF_FOP_REMOVEXATTR:
+ stub->fn.removexattr (stub->frame, stub->frame->this,
+ &stub->args.loc, stub->args.name,
+ stub->args.xdata);
+ break;
+ case GF_FOP_FREMOVEXATTR:
+ stub->fn.fremovexattr (stub->frame, stub->frame->this,
+ stub->args.fd, stub->args.name,
+ stub->args.xdata);
+ break;
+ case GF_FOP_OPENDIR:
+ stub->fn.opendir (stub->frame, stub->frame->this,
+ &stub->args.loc, stub->args.fd,
+ stub->args.xdata);
+ break;
+ case GF_FOP_FSYNCDIR:
+ stub->fn.fsyncdir (stub->frame, stub->frame->this,
+ stub->args.fd, stub->args.datasync,
+ stub->args.xdata);
+ break;
+ case GF_FOP_ACCESS:
+ stub->fn.access (stub->frame, stub->frame->this,
+ &stub->args.loc, stub->args.mask,
+ stub->args.xdata);
+ break;
+
+ case GF_FOP_FTRUNCATE:
+ stub->fn.ftruncate (stub->frame, stub->frame->this,
+ stub->args.fd, stub->args.offset,
+ stub->args.xdata);
+ break;
+ case GF_FOP_FSTAT:
+ stub->fn.fstat (stub->frame, stub->frame->this,
+ stub->args.fd, stub->args.xdata);
+ break;
+ case GF_FOP_LK:
+ stub->fn.lk (stub->frame, stub->frame->this,
+ stub->args.fd, stub->args.cmd,
+ &stub->args.lock, stub->args.xdata);
+ break;
+ case GF_FOP_INODELK:
+ stub->fn.inodelk (stub->frame, stub->frame->this,
+ stub->args.volume, &stub->args.loc,
+ stub->args.cmd, &stub->args.lock,
+ stub->args.xdata);
+ break;
+ case GF_FOP_FINODELK:
+ stub->fn.finodelk (stub->frame, stub->frame->this,
+ stub->args.volume, stub->args.fd,
+ stub->args.cmd, &stub->args.lock,
+ stub->args.xdata);
+ break;
+ case GF_FOP_ENTRYLK:
+ stub->fn.entrylk (stub->frame, stub->frame->this,
+ stub->args.volume, &stub->args.loc,
+ stub->args.name, stub->args.entrylkcmd,
+ stub->args.entrylktype, stub->args.xdata);
+ break;
+ case GF_FOP_FENTRYLK:
+ stub->fn.fentrylk (stub->frame, stub->frame->this,
+ stub->args.volume, stub->args.fd,
+ stub->args.name, stub->args.entrylkcmd,
+ stub->args.entrylktype, stub->args.xdata);
+ break;
+ case GF_FOP_LOOKUP:
+ stub->fn.lookup (stub->frame, stub->frame->this,
+ &stub->args.loc, stub->args.xdata);
+ break;
+ case GF_FOP_RCHECKSUM:
+ stub->fn.rchecksum (stub->frame, stub->frame->this,
+ stub->args.fd, stub->args.offset,
+ stub->args.size, stub->args.xdata);
+ break;
+ case GF_FOP_READDIR:
+ stub->fn.readdir (stub->frame, stub->frame->this,
+ stub->args.fd, stub->args.size,
+ stub->args.offset, stub->args.xdata);
+ break;
+ case GF_FOP_READDIRP:
+ stub->fn.readdirp (stub->frame, stub->frame->this,
+ stub->args.fd, stub->args.size,
+ stub->args.offset, stub->args.xdata);
+ break;
+ case GF_FOP_XATTROP:
+ stub->fn.xattrop (stub->frame, stub->frame->this,
+ &stub->args.loc, stub->args.optype,
+ stub->args.xattr, stub->args.xdata);
+ break;
+ case GF_FOP_FXATTROP:
+ stub->fn.fxattrop (stub->frame, stub->frame->this,
+ stub->args.fd, stub->args.optype,
+ stub->args.xattr, stub->args.xdata);
+ break;
+ case GF_FOP_SETATTR:
+ stub->fn.setattr (stub->frame, stub->frame->this,
+ &stub->args.loc, &stub->args.stat,
+ stub->args.valid, stub->args.xdata);
+ break;
+ case GF_FOP_FSETATTR:
+ stub->fn.fsetattr (stub->frame, stub->frame->this,
+ stub->args.fd, &stub->args.stat,
+ stub->args.valid, stub->args.xdata);
+ break;
+ case GF_FOP_FALLOCATE:
+ stub->fn.fallocate(stub->frame, stub->frame->this,
+ stub->args.fd, stub->args.flags,
+ stub->args.offset, stub->args.size,
+ stub->args.xdata);
+ break;
+ case GF_FOP_DISCARD:
+ stub->fn.discard(stub->frame, stub->frame->this,
+ stub->args.fd, stub->args.offset,
+ stub->args.size, stub->args.xdata);
+ break;
+ case GF_FOP_ZEROFILL:
+ stub->fn.zerofill(stub->frame, stub->frame->this,
+ stub->args.fd, stub->args.offset,
+ stub->args.size, stub->args.xdata);
+ break;
+
+ default:
+ gf_log_callingfn ("call-stub", GF_LOG_ERROR,
+ "Invalid value of FOP (%d)",
+ stub->fop);
+ break;
+ }
out:
- return;
+ return;
}
+#define STUB_UNWIND(stb, fop, args ...) do { \
+ if (stb->fn_cbk.fop) \
+ stb->fn_cbk.fop (stb->frame, stb->frame->cookie, \
+ stb->frame->this, stb->args_cbk.op_ret, \
+ stb->args_cbk.op_errno, args); \
+ else \
+ STACK_UNWIND_STRICT (fop, stb->frame, stb->args_cbk.op_ret, \
+ stb->args_cbk.op_errno, args); \
+ } while (0)
+
static void
call_resume_unwind (call_stub_t *stub)
{
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- switch (stub->fop) {
- case GF_FOP_OPEN:
- {
- if (!stub->args.open_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.open_cbk.op_ret,
- stub->args.open_cbk.op_errno,
- stub->args.open_cbk.fd);
- else
- stub->args.open_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.open_cbk.op_ret,
- stub->args.open_cbk.op_errno,
- stub->args.open_cbk.fd);
- break;
- }
-
- case GF_FOP_CREATE:
- {
- if (!stub->args.create_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.create_cbk.op_ret,
- stub->args.create_cbk.op_errno,
- stub->args.create_cbk.fd,
- stub->args.create_cbk.inode,
- &stub->args.create_cbk.buf);
- else
- stub->args.create_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.create_cbk.op_ret,
- stub->args.create_cbk.op_errno,
- stub->args.create_cbk.fd,
- stub->args.create_cbk.inode,
- &stub->args.create_cbk.buf);
-
- break;
- }
-
- case GF_FOP_STAT:
- {
- if (!stub->args.stat_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.stat_cbk.op_ret,
- stub->args.stat_cbk.op_errno,
- &stub->args.stat_cbk.buf);
- else
- stub->args.stat_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.stat_cbk.op_ret,
- stub->args.stat_cbk.op_errno,
- &stub->args.stat_cbk.buf);
-
- break;
- }
-
- case GF_FOP_READLINK:
- {
- if (!stub->args.readlink_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.readlink_cbk.op_ret,
- stub->args.readlink_cbk.op_errno,
- stub->args.readlink_cbk.buf);
- else
- stub->args.readlink_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.readlink_cbk.op_ret,
- stub->args.readlink_cbk.op_errno,
- stub->args.readlink_cbk.buf);
-
- break;
- }
-
- case GF_FOP_MKNOD:
- {
- if (!stub->args.mknod_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.mknod_cbk.op_ret,
- stub->args.mknod_cbk.op_errno,
- stub->args.mknod_cbk.inode,
- &stub->args.mknod_cbk.buf);
- else
- stub->args.mknod_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.mknod_cbk.op_ret,
- stub->args.mknod_cbk.op_errno,
- stub->args.mknod_cbk.inode,
- &stub->args.mknod_cbk.buf);
- break;
- }
-
- case GF_FOP_MKDIR:
- {
- if (!stub->args.mkdir_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.mkdir_cbk.op_ret,
- stub->args.mkdir_cbk.op_errno,
- stub->args.mkdir_cbk.inode,
- &stub->args.mkdir_cbk.buf);
- else
- stub->args.mkdir_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.mkdir_cbk.op_ret,
- stub->args.mkdir_cbk.op_errno,
- stub->args.mkdir_cbk.inode,
- &stub->args.mkdir_cbk.buf);
-
- if (stub->args.mkdir_cbk.inode)
- inode_unref (stub->args.mkdir_cbk.inode);
-
- break;
- }
-
- case GF_FOP_UNLINK:
- {
- if (!stub->args.unlink_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.unlink_cbk.op_ret,
- stub->args.unlink_cbk.op_errno);
- else
- stub->args.unlink_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.unlink_cbk.op_ret,
- stub->args.unlink_cbk.op_errno);
- break;
- }
-
- case GF_FOP_RMDIR:
- {
- if (!stub->args.rmdir_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.rmdir_cbk.op_ret,
- stub->args.rmdir_cbk.op_errno);
- else
- stub->args.unlink_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.rmdir_cbk.op_ret,
- stub->args.rmdir_cbk.op_errno);
- break;
- }
-
- case GF_FOP_SYMLINK:
- {
- if (!stub->args.symlink_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.symlink_cbk.op_ret,
- stub->args.symlink_cbk.op_errno,
- stub->args.symlink_cbk.inode,
- &stub->args.symlink_cbk.buf);
- else
- stub->args.symlink_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.symlink_cbk.op_ret,
- stub->args.symlink_cbk.op_errno,
- stub->args.symlink_cbk.inode,
- &stub->args.symlink_cbk.buf);
- }
- break;
-
- case GF_FOP_RENAME:
- {
-#if 0
- if (!stub->args.rename_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.rename_cbk.op_ret,
- stub->args.rename_cbk.op_errno,
- &stub->args.rename_cbk.buf);
- else
- stub->args.rename_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.rename_cbk.op_ret,
- stub->args.rename_cbk.op_errno,
- &stub->args.rename_cbk.buf);
-#endif
- break;
- }
-
- case GF_FOP_LINK:
- {
- if (!stub->args.link_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.link_cbk.op_ret,
- stub->args.link_cbk.op_errno,
- stub->args.link_cbk.inode,
- &stub->args.link_cbk.buf);
- else
- stub->args.link_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.link_cbk.op_ret,
- stub->args.link_cbk.op_errno,
- stub->args.link_cbk.inode,
- &stub->args.link_cbk.buf);
- break;
- }
-
- case GF_FOP_CHMOD:
- {
- if (!stub->args.chmod_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.chmod_cbk.op_ret,
- stub->args.chmod_cbk.op_errno,
- &stub->args.chmod_cbk.buf);
- else
- stub->args.chmod_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.chmod_cbk.op_ret,
- stub->args.chmod_cbk.op_errno,
- &stub->args.chmod_cbk.buf);
- break;
- }
-
- case GF_FOP_CHOWN:
- {
- if (!stub->args.chown_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.chown_cbk.op_ret,
- stub->args.chown_cbk.op_errno,
- &stub->args.chown_cbk.buf);
- else
- stub->args.chown_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.chown_cbk.op_ret,
- stub->args.chown_cbk.op_errno,
- &stub->args.chown_cbk.buf);
- break;
- }
-
- case GF_FOP_TRUNCATE:
- {
- if (!stub->args.truncate_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.truncate_cbk.op_ret,
- stub->args.truncate_cbk.op_errno,
- &stub->args.truncate_cbk.buf);
- else
- stub->args.truncate_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.truncate_cbk.op_ret,
- stub->args.truncate_cbk.op_errno,
- &stub->args.truncate_cbk.buf);
- break;
- }
-
- case GF_FOP_READ:
- {
- if (!stub->args.readv_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.readv_cbk.op_ret,
- stub->args.readv_cbk.op_errno,
- stub->args.readv_cbk.vector,
- stub->args.readv_cbk.count,
- &stub->args.readv_cbk.stbuf,
- stub->args.readv_cbk.iobref);
- else
- stub->args.readv_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.readv_cbk.op_ret,
- stub->args.readv_cbk.op_errno,
- stub->args.readv_cbk.vector,
- stub->args.readv_cbk.count,
- &stub->args.readv_cbk.stbuf,
- stub->args.readv_cbk.iobref);
- }
- break;
-
- case GF_FOP_WRITE:
- {
- if (!stub->args.writev_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.writev_cbk.op_ret,
- stub->args.writev_cbk.op_errno,
- &stub->args.writev_cbk.stbuf);
- else
- stub->args.writev_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.writev_cbk.op_ret,
- stub->args.writev_cbk.op_errno,
- &stub->args.writev_cbk.stbuf);
- break;
- }
-
- case GF_FOP_STATFS:
- {
- if (!stub->args.statfs_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.statfs_cbk.op_ret,
- stub->args.statfs_cbk.op_errno,
- &(stub->args.statfs_cbk.buf));
- else
- stub->args.statfs_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.statfs_cbk.op_ret,
- stub->args.statfs_cbk.op_errno,
- &(stub->args.statfs_cbk.buf));
- }
- break;
-
- case GF_FOP_FLUSH:
- {
- if (!stub->args.flush_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.flush_cbk.op_ret,
- stub->args.flush_cbk.op_errno);
- else
- stub->args.flush_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.flush_cbk.op_ret,
- stub->args.flush_cbk.op_errno);
-
- break;
- }
-
- case GF_FOP_FSYNC:
- {
- if (!stub->args.fsync_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.fsync_cbk.op_ret,
- stub->args.fsync_cbk.op_errno);
- else
- stub->args.fsync_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.fsync_cbk.op_ret,
- stub->args.fsync_cbk.op_errno);
- break;
- }
-
- case GF_FOP_SETXATTR:
- {
- if (!stub->args.setxattr_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.setxattr_cbk.op_ret,
- stub->args.setxattr_cbk.op_errno);
-
- else
- stub->args.setxattr_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.setxattr_cbk.op_ret,
- stub->args.setxattr_cbk.op_errno);
-
- break;
- }
-
- case GF_FOP_GETXATTR:
- {
- if (!stub->args.getxattr_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.getxattr_cbk.op_ret,
- stub->args.getxattr_cbk.op_errno,
- stub->args.getxattr_cbk.dict);
- else
- stub->args.getxattr_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.getxattr_cbk.op_ret,
- stub->args.getxattr_cbk.op_errno,
- stub->args.getxattr_cbk.dict);
- break;
- }
-
- case GF_FOP_FSETXATTR:
- {
- if (!stub->args.fsetxattr_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.fsetxattr_cbk.op_ret,
- stub->args.fsetxattr_cbk.op_errno);
-
- else
- stub->args.fsetxattr_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.fsetxattr_cbk.op_ret,
- stub->args.fsetxattr_cbk.op_errno);
-
- break;
- }
-
- case GF_FOP_FGETXATTR:
- {
- if (!stub->args.fgetxattr_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.fgetxattr_cbk.op_ret,
- stub->args.fgetxattr_cbk.op_errno,
- stub->args.fgetxattr_cbk.dict);
- else
- stub->args.fgetxattr_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.fgetxattr_cbk.op_ret,
- stub->args.fgetxattr_cbk.op_errno,
- stub->args.fgetxattr_cbk.dict);
- break;
- }
-
- case GF_FOP_REMOVEXATTR:
- {
- if (!stub->args.removexattr_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.removexattr_cbk.op_ret,
- stub->args.removexattr_cbk.op_errno);
- else
- stub->args.removexattr_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.removexattr_cbk.op_ret,
- stub->args.removexattr_cbk.op_errno);
-
- break;
- }
-
- case GF_FOP_OPENDIR:
- {
- if (!stub->args.opendir_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.opendir_cbk.op_ret,
- stub->args.opendir_cbk.op_errno,
- stub->args.opendir_cbk.fd);
- else
- stub->args.opendir_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.opendir_cbk.op_ret,
- stub->args.opendir_cbk.op_errno,
- stub->args.opendir_cbk.fd);
- break;
- }
-
- case GF_FOP_GETDENTS:
- {
- if (!stub->args.getdents_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.getdents_cbk.op_ret,
- stub->args.getdents_cbk.op_errno,
- &stub->args.getdents_cbk.entries,
- stub->args.getdents_cbk.count);
- else
- stub->args.getdents_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.getdents_cbk.op_ret,
- stub->args.getdents_cbk.op_errno,
- &stub->args.getdents_cbk.entries,
- stub->args.getdents_cbk.count);
- break;
- }
-
- case GF_FOP_FSYNCDIR:
- {
- if (!stub->args.fsyncdir_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.fsyncdir_cbk.op_ret,
- stub->args.fsyncdir_cbk.op_errno);
- else
- stub->args.fsyncdir_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.fsyncdir_cbk.op_ret,
- stub->args.fsyncdir_cbk.op_errno);
- break;
- }
-
- case GF_FOP_ACCESS:
- {
- if (!stub->args.access_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.access_cbk.op_ret,
- stub->args.access_cbk.op_errno);
- else
- stub->args.access_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.access_cbk.op_ret,
- stub->args.access_cbk.op_errno);
-
- break;
- }
-
- case GF_FOP_FTRUNCATE:
- {
- if (!stub->args.ftruncate_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.ftruncate_cbk.op_ret,
- stub->args.ftruncate_cbk.op_errno,
- &stub->args.ftruncate_cbk.buf);
- else
- stub->args.ftruncate_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.ftruncate_cbk.op_ret,
- stub->args.ftruncate_cbk.op_errno,
- &stub->args.ftruncate_cbk.buf);
- break;
- }
-
- case GF_FOP_FSTAT:
- {
- if (!stub->args.fstat_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.fstat_cbk.op_ret,
- stub->args.fstat_cbk.op_errno,
- &stub->args.fstat_cbk.buf);
- else
- stub->args.fstat_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.fstat_cbk.op_ret,
- stub->args.fstat_cbk.op_errno,
- &stub->args.fstat_cbk.buf);
-
- break;
- }
-
- case GF_FOP_LK:
- {
- if (!stub->args.lk_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.lk_cbk.op_ret,
- stub->args.lk_cbk.op_errno,
- &stub->args.lk_cbk.lock);
- else
- stub->args.lk_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.lk_cbk.op_ret,
- stub->args.lk_cbk.op_errno,
- &stub->args.lk_cbk.lock);
- break;
- }
-
- case GF_FOP_INODELK:
- {
- if (!stub->args.inodelk_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.inodelk_cbk.op_ret,
- stub->args.inodelk_cbk.op_errno);
-
- else
- stub->args.inodelk_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.inodelk_cbk.op_ret,
- stub->args.inodelk_cbk.op_errno);
- break;
- }
-
- case GF_FOP_FINODELK:
- {
- if (!stub->args.finodelk_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.finodelk_cbk.op_ret,
- stub->args.finodelk_cbk.op_errno);
-
- else
- stub->args.finodelk_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.finodelk_cbk.op_ret,
- stub->args.finodelk_cbk.op_errno);
- break;
- }
-
- case GF_FOP_ENTRYLK:
- {
- if (!stub->args.entrylk_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.entrylk_cbk.op_ret,
- stub->args.entrylk_cbk.op_errno);
-
- else
- stub->args.entrylk_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.entrylk_cbk.op_ret,
- stub->args.entrylk_cbk.op_errno);
- break;
- }
-
- case GF_FOP_FENTRYLK:
- {
- if (!stub->args.fentrylk_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.fentrylk_cbk.op_ret,
- stub->args.fentrylk_cbk.op_errno);
-
- else
- stub->args.fentrylk_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.fentrylk_cbk.op_ret,
- stub->args.fentrylk_cbk.op_errno);
- break;
- }
-
- case GF_FOP_UTIMENS:
- {
- if (!stub->args.utimens_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.utimens_cbk.op_ret,
- stub->args.utimens_cbk.op_errno,
- &stub->args.utimens_cbk.buf);
- else
- stub->args.utimens_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.utimens_cbk.op_ret,
- stub->args.utimens_cbk.op_errno,
- &stub->args.utimens_cbk.buf);
-
- break;
- }
-
-
- break;
- case GF_FOP_FCHMOD:
- {
- if (!stub->args.fchmod_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.fchmod_cbk.op_ret,
- stub->args.fchmod_cbk.op_errno,
- &stub->args.fchmod_cbk.buf);
- else
- stub->args.fchmod_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.fchmod_cbk.op_ret,
- stub->args.fchmod_cbk.op_errno,
- &stub->args.fchmod_cbk.buf);
- break;
- }
-
- case GF_FOP_FCHOWN:
- {
- if (!stub->args.fchown_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.fchown_cbk.op_ret,
- stub->args.fchown_cbk.op_errno,
- &stub->args.fchown_cbk.buf);
- else
- stub->args.fchown_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.fchown_cbk.op_ret,
- stub->args.fchown_cbk.op_errno,
- &stub->args.fchown_cbk.buf);
- break;
- }
-
- case GF_FOP_LOOKUP:
- {
- if (!stub->args.lookup_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.lookup_cbk.op_ret,
- stub->args.lookup_cbk.op_errno,
- stub->args.lookup_cbk.inode,
- &stub->args.lookup_cbk.buf,
- stub->args.lookup_cbk.dict);
- else
- stub->args.lookup_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.lookup_cbk.op_ret,
- stub->args.lookup_cbk.op_errno,
- stub->args.lookup_cbk.inode,
- &stub->args.lookup_cbk.buf,
- stub->args.lookup_cbk.dict);
- /* FIXME NULL should not be passed */
-
- if (stub->args.lookup_cbk.dict)
- dict_unref (stub->args.lookup_cbk.dict);
- if (stub->args.lookup_cbk.inode)
- inode_unref (stub->args.lookup_cbk.inode);
-
- break;
- }
- case GF_FOP_SETDENTS:
- {
- if (!stub->args.setdents_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.setdents_cbk.op_ret,
- stub->args.setdents_cbk.op_errno);
- else
- stub->args.setdents_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.setdents_cbk.op_ret,
- stub->args.setdents_cbk.op_errno);
- break;
- }
-
- case GF_FOP_CHECKSUM:
- {
- if (!stub->args.checksum_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.checksum_cbk.op_ret,
- stub->args.checksum_cbk.op_errno,
- stub->args.checksum_cbk.file_checksum,
- stub->args.checksum_cbk.dir_checksum);
- else
- stub->args.checksum_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.checksum_cbk.op_ret,
- stub->args.checksum_cbk.op_errno,
- stub->args.checksum_cbk.file_checksum,
- stub->args.checksum_cbk.dir_checksum);
- if (stub->args.checksum_cbk.op_ret >= 0)
- {
- FREE (stub->args.checksum_cbk.file_checksum);
- FREE (stub->args.checksum_cbk.dir_checksum);
- }
-
- break;
- }
-
- case GF_FOP_READDIR:
- {
- if (!stub->args.readdir_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.readdir_cbk.op_ret,
- stub->args.readdir_cbk.op_errno,
- &stub->args.readdir_cbk.entries);
- else
- stub->args.readdir_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.readdir_cbk.op_ret,
- stub->args.readdir_cbk.op_errno,
- &stub->args.readdir_cbk.entries);
-
- if (stub->args.readdir_cbk.op_ret > 0)
- gf_dirent_free (&stub->args.readdir_cbk.entries);
-
- break;
- }
-
- case GF_FOP_XATTROP:
- {
- if (!stub->args.xattrop_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.xattrop_cbk.op_ret,
- stub->args.xattrop_cbk.op_errno);
- else
- stub->args.xattrop_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.xattrop_cbk.op_ret,
- stub->args.xattrop_cbk.op_errno,
- stub->args.xattrop_cbk.xattr);
-
- if (stub->args.xattrop_cbk.xattr)
- dict_unref (stub->args.xattrop_cbk.xattr);
-
- break;
- }
- case GF_FOP_FXATTROP:
- {
- if (!stub->args.fxattrop_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.fxattrop_cbk.op_ret,
- stub->args.fxattrop_cbk.op_errno);
- else
- stub->args.fxattrop_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.fxattrop_cbk.op_ret,
- stub->args.fxattrop_cbk.op_errno,
- stub->args.fxattrop_cbk.xattr);
-
- if (stub->args.fxattrop_cbk.xattr)
- dict_unref (stub->args.fxattrop_cbk.xattr);
-
- break;
- }
- case GF_FOP_LOCK_NOTIFY:
- {
- if (!stub->args.lock_notify_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.lock_notify_cbk.op_ret,
- stub->args.lock_notify_cbk.op_errno);
- else
- stub->args.lock_notify_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.lock_notify_cbk.op_ret,
- stub->args.lock_notify_cbk.op_errno);
- break;
- }
- case GF_FOP_LOCK_FNOTIFY:
- {
- if (!stub->args.lock_fnotify_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.lock_fnotify_cbk.op_ret,
- stub->args.lock_fnotify_cbk.op_errno);
- else
- stub->args.lock_fnotify_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.lock_fnotify_cbk.op_ret,
- stub->args.lock_fnotify_cbk.op_errno);
- break;
- }
- case GF_FOP_MAXVALUE:
- {
- gf_log ("call-stub",
- GF_LOG_DEBUG,
- "Invalid value of FOP");
- }
- break;
- }
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+
+ switch (stub->fop) {
+ case GF_FOP_OPEN:
+ STUB_UNWIND (stub, open, stub->args_cbk.fd,
+ stub->args_cbk.xdata);
+ break;
+ case GF_FOP_CREATE:
+ STUB_UNWIND (stub, create, stub->args_cbk.fd,
+ stub->args_cbk.inode, &stub->args_cbk.stat,
+ &stub->args_cbk.preparent,
+ &stub->args_cbk.postparent,
+ stub->args_cbk.xdata);
+ break;
+ case GF_FOP_STAT:
+ STUB_UNWIND (stub, stat, &stub->args_cbk.stat,
+ stub->args_cbk.xdata);
+ break;
+ case GF_FOP_READLINK:
+ STUB_UNWIND (stub, readlink, stub->args_cbk.buf,
+ &stub->args_cbk.stat, stub->args.xdata);
+ break;
+ case GF_FOP_MKNOD:
+ STUB_UNWIND (stub, mknod, stub->args_cbk.inode,
+ &stub->args_cbk.stat, &stub->args_cbk.preparent,
+ &stub->args_cbk.postparent, stub->args_cbk.xdata);
+ break;
+ case GF_FOP_MKDIR:
+ STUB_UNWIND (stub, mkdir, stub->args_cbk.inode,
+ &stub->args_cbk.stat, &stub->args_cbk.preparent,
+ &stub->args_cbk.postparent, stub->args_cbk.xdata);
+ break;
+ case GF_FOP_UNLINK:
+ STUB_UNWIND (stub, unlink, &stub->args_cbk.preparent,
+ &stub->args_cbk.postparent, stub->args_cbk.xdata);
+ break;
+ case GF_FOP_RMDIR:
+ STUB_UNWIND (stub, rmdir, &stub->args_cbk.preparent,
+ &stub->args_cbk.postparent, stub->args_cbk.xdata);
+ break;
+ case GF_FOP_SYMLINK:
+ STUB_UNWIND (stub, symlink, stub->args_cbk.inode,
+ &stub->args_cbk.stat, &stub->args_cbk.preparent,
+ &stub->args_cbk.postparent, stub->args_cbk.xdata);
+ break;
+ case GF_FOP_RENAME:
+ STUB_UNWIND (stub, rename, &stub->args_cbk.stat,
+ &stub->args_cbk.preparent,
+ &stub->args_cbk.postparent,
+ &stub->args_cbk.preparent2,
+ &stub->args_cbk.postparent2,
+ stub->args_cbk.xdata);
+ break;
+ case GF_FOP_LINK:
+ STUB_UNWIND (stub, link, stub->args_cbk.inode,
+ &stub->args_cbk.stat, &stub->args_cbk.preparent,
+ &stub->args_cbk.postparent, stub->args_cbk.xdata);
+ break;
+ case GF_FOP_TRUNCATE:
+ STUB_UNWIND (stub, truncate, &stub->args_cbk.prestat,
+ &stub->args_cbk.poststat, stub->args_cbk.xdata);
+ break;
+ case GF_FOP_READ:
+ STUB_UNWIND (stub, readv, stub->args_cbk.vector,
+ stub->args_cbk.count, &stub->args_cbk.stat,
+ stub->args_cbk.iobref, stub->args_cbk.xdata);
+ break;
+ case GF_FOP_WRITE:
+ STUB_UNWIND (stub, writev, &stub->args_cbk.prestat,
+ &stub->args_cbk.poststat, stub->args_cbk.xdata);
+ break;
+ case GF_FOP_STATFS:
+ STUB_UNWIND (stub, statfs, &stub->args_cbk.statvfs,
+ stub->args_cbk.xdata);
+ break;
+ case GF_FOP_FLUSH:
+ STUB_UNWIND (stub, flush, stub->args_cbk.xdata);
+ break;
+ case GF_FOP_FSYNC:
+ STUB_UNWIND (stub, fsync, &stub->args_cbk.prestat,
+ &stub->args_cbk.poststat, stub->args_cbk.xdata);
+ break;
+ case GF_FOP_SETXATTR:
+ STUB_UNWIND (stub, setxattr, stub->args_cbk.xdata);
+ break;
+ case GF_FOP_GETXATTR:
+ STUB_UNWIND (stub, getxattr, stub->args_cbk.xattr,
+ stub->args_cbk.xdata);
+ break;
+ case GF_FOP_FSETXATTR:
+ STUB_UNWIND (stub, fsetxattr, stub->args_cbk.xdata);
+ break;
+ case GF_FOP_FGETXATTR:
+ STUB_UNWIND (stub, fgetxattr, stub->args_cbk.xattr,
+ stub->args_cbk.xdata);
+ break;
+ case GF_FOP_REMOVEXATTR:
+ STUB_UNWIND (stub, removexattr, stub->args_cbk.xdata);
+ break;
+ case GF_FOP_FREMOVEXATTR:
+ STUB_UNWIND (stub, fremovexattr, stub->args_cbk.xdata);
+ break;
+ case GF_FOP_OPENDIR:
+ STUB_UNWIND (stub, opendir, stub->args_cbk.fd,
+ stub->args_cbk.xdata);
+ break;
+ case GF_FOP_FSYNCDIR:
+ STUB_UNWIND (stub, fsyncdir, stub->args_cbk.xdata);
+ break;
+ case GF_FOP_ACCESS:
+ STUB_UNWIND (stub, access, stub->args_cbk.xdata);
+ break;
+ case GF_FOP_FTRUNCATE:
+ STUB_UNWIND (stub, ftruncate, &stub->args_cbk.prestat,
+ &stub->args_cbk.poststat, stub->args_cbk.xdata);
+ break;
+ case GF_FOP_FSTAT:
+ STUB_UNWIND (stub, fstat, &stub->args_cbk.stat,
+ stub->args_cbk.xdata);
+ break;
+ case GF_FOP_LK:
+ STUB_UNWIND (stub, lk, &stub->args_cbk.lock,
+ stub->args_cbk.xdata);
+ break;
+ case GF_FOP_INODELK:
+ STUB_UNWIND (stub, inodelk, stub->args_cbk.xdata);
+ break;
+ case GF_FOP_FINODELK:
+ STUB_UNWIND (stub, finodelk, stub->args_cbk.xdata);
+ break;
+ case GF_FOP_ENTRYLK:
+ STUB_UNWIND (stub, entrylk, stub->args_cbk.xdata);
+ break;
+ case GF_FOP_FENTRYLK:
+ STUB_UNWIND (stub, fentrylk, stub->args_cbk.xdata);
+ break;
+ case GF_FOP_LOOKUP:
+ STUB_UNWIND (stub, lookup, stub->args_cbk.inode,
+ &stub->args_cbk.stat, stub->args_cbk.xdata,
+ &stub->args_cbk.postparent);
+ break;
+ case GF_FOP_RCHECKSUM:
+ STUB_UNWIND (stub, rchecksum, stub->args_cbk.weak_checksum,
+ stub->args_cbk.strong_checksum, stub->args_cbk.xdata);
+ break;
+ case GF_FOP_READDIR:
+ STUB_UNWIND (stub, readdir, &stub->args_cbk.entries,
+ stub->args_cbk.xdata);
+ break;
+ case GF_FOP_READDIRP:
+ STUB_UNWIND (stub, readdir, &stub->args_cbk.entries,
+ stub->args_cbk.xdata);
+ break;
+ case GF_FOP_XATTROP:
+ STUB_UNWIND (stub, xattrop, stub->args_cbk.xattr,
+ stub->args_cbk.xdata);
+ break;
+ case GF_FOP_FXATTROP:
+ STUB_UNWIND (stub, fxattrop, stub->args_cbk.xattr,
+ stub->args_cbk.xdata);
+ break;
+ case GF_FOP_SETATTR:
+ STUB_UNWIND (stub, setattr, &stub->args_cbk.prestat,
+ &stub->args_cbk.poststat, stub->args_cbk.xdata);
+ break;
+ case GF_FOP_FSETATTR:
+ STUB_UNWIND (stub, fsetattr, &stub->args_cbk.prestat,
+ &stub->args_cbk.poststat, stub->args_cbk.xdata);
+ break;
+ case GF_FOP_FALLOCATE:
+ STUB_UNWIND(stub, fallocate, &stub->args_cbk.prestat,
+ &stub->args_cbk.poststat, stub->args_cbk.xdata);
+ break;
+ case GF_FOP_DISCARD:
+ STUB_UNWIND(stub, discard, &stub->args_cbk.prestat,
+ &stub->args_cbk.poststat, stub->args_cbk.xdata);
+ break;
+ case GF_FOP_ZEROFILL:
+ STUB_UNWIND(stub, zerofill, &stub->args_cbk.prestat,
+ &stub->args_cbk.poststat, stub->args_cbk.xdata);
+ break;
+
+ default:
+ gf_log_callingfn ("call-stub", GF_LOG_ERROR,
+ "Invalid value of FOP (%d)",
+ stub->fop);
+ break;
+ }
out:
- return;
+ return;
}
static void
-call_stub_destroy_wind (call_stub_t *stub)
+call_stub_wipe_args (call_stub_t *stub)
{
- switch (stub->fop) {
- case GF_FOP_OPEN:
- {
- loc_wipe (&stub->args.open.loc);
- if (stub->args.open.fd)
- fd_unref (stub->args.open.fd);
- break;
- }
- case GF_FOP_CREATE:
- {
- loc_wipe (&stub->args.create.loc);
- if (stub->args.create.fd)
- fd_unref (stub->args.create.fd);
- break;
- }
- case GF_FOP_STAT:
- {
- loc_wipe (&stub->args.stat.loc);
- break;
- }
- case GF_FOP_READLINK:
- {
- loc_wipe (&stub->args.readlink.loc);
- break;
- }
-
- case GF_FOP_MKNOD:
- {
- loc_wipe (&stub->args.mknod.loc);
- }
- break;
-
- case GF_FOP_MKDIR:
- {
- loc_wipe (&stub->args.mkdir.loc);
- }
- break;
-
- case GF_FOP_UNLINK:
- {
- loc_wipe (&stub->args.unlink.loc);
- }
- break;
-
- case GF_FOP_RMDIR:
- {
- loc_wipe (&stub->args.rmdir.loc);
- }
- break;
-
- case GF_FOP_SYMLINK:
- {
- FREE (stub->args.symlink.linkname);
- loc_wipe (&stub->args.symlink.loc);
- }
- break;
-
- case GF_FOP_RENAME:
- {
- loc_wipe (&stub->args.rename.old);
- loc_wipe (&stub->args.rename.new);
- }
- break;
-
- case GF_FOP_LINK:
- {
- loc_wipe (&stub->args.link.oldloc);
- loc_wipe (&stub->args.link.newloc);
- }
- break;
-
- case GF_FOP_CHMOD:
- {
- loc_wipe (&stub->args.chmod.loc);
- }
- break;
-
- case GF_FOP_CHOWN:
- {
- loc_wipe (&stub->args.chown.loc);
- break;
- }
- case GF_FOP_TRUNCATE:
- {
- loc_wipe (&stub->args.truncate.loc);
- break;
- }
-
- case GF_FOP_READ:
- {
- if (stub->args.readv.fd)
- fd_unref (stub->args.readv.fd);
- break;
- }
-
- case GF_FOP_WRITE:
- {
- struct iobref *iobref = stub->args.writev.iobref;
- if (stub->args.writev.fd)
- fd_unref (stub->args.writev.fd);
- FREE (stub->args.writev.vector);
- if (iobref)
- iobref_unref (iobref);
- break;
- }
-
- case GF_FOP_STATFS:
- {
- loc_wipe (&stub->args.statfs.loc);
- break;
- }
- case GF_FOP_FLUSH:
- {
- if (stub->args.flush.fd)
- fd_unref (stub->args.flush.fd);
- break;
- }
-
- case GF_FOP_FSYNC:
- {
- if (stub->args.fsync.fd)
- fd_unref (stub->args.fsync.fd);
- break;
- }
+ loc_wipe (&stub->args.loc);
- case GF_FOP_SETXATTR:
- {
- loc_wipe (&stub->args.setxattr.loc);
- if (stub->args.setxattr.dict)
- dict_unref (stub->args.setxattr.dict);
- break;
- }
-
- case GF_FOP_GETXATTR:
- {
- if (stub->args.getxattr.name)
- FREE (stub->args.getxattr.name);
- loc_wipe (&stub->args.getxattr.loc);
- break;
- }
+ loc_wipe (&stub->args.loc2);
- case GF_FOP_FSETXATTR:
- {
- fd_unref (stub->args.fsetxattr.fd);
- if (stub->args.fsetxattr.dict)
- dict_unref (stub->args.fsetxattr.dict);
- break;
- }
-
- case GF_FOP_FGETXATTR:
- {
- if (stub->args.fgetxattr.name)
- FREE (stub->args.fgetxattr.name);
- fd_unref (stub->args.fgetxattr.fd);
- break;
- }
+ if (stub->args.fd)
+ fd_unref (stub->args.fd);
- case GF_FOP_REMOVEXATTR:
- {
- loc_wipe (&stub->args.removexattr.loc);
- FREE (stub->args.removexattr.name);
- break;
- }
-
- case GF_FOP_OPENDIR:
- {
- loc_wipe (&stub->args.opendir.loc);
- if (stub->args.opendir.fd)
- fd_unref (stub->args.opendir.fd);
- break;
- }
+ GF_FREE ((char *)stub->args.linkname);
- case GF_FOP_GETDENTS:
- {
- if (stub->args.getdents.fd)
- fd_unref (stub->args.getdents.fd);
- break;
- }
-
- case GF_FOP_FSYNCDIR:
- {
- if (stub->args.fsyncdir.fd)
- fd_unref (stub->args.fsyncdir.fd);
- break;
- }
-
- case GF_FOP_ACCESS:
- {
- loc_wipe (&stub->args.access.loc);
- break;
- }
-
- case GF_FOP_FTRUNCATE:
- {
- if (stub->args.ftruncate.fd)
- fd_unref (stub->args.ftruncate.fd);
- break;
- }
-
- case GF_FOP_FSTAT:
- {
- if (stub->args.fstat.fd)
- fd_unref (stub->args.fstat.fd);
- break;
- }
-
- case GF_FOP_LK:
- {
- if (stub->args.lk.fd)
- fd_unref (stub->args.lk.fd);
- break;
- }
+ GF_FREE (stub->args.vector);
- case GF_FOP_INODELK:
- {
- if (stub->args.inodelk.volume)
- FREE (stub->args.inodelk.volume);
+ if (stub->args.iobref)
+ iobref_unref (stub->args.iobref);
- loc_wipe (&stub->args.inodelk.loc);
- break;
- }
- case GF_FOP_FINODELK:
- {
- if (stub->args.finodelk.volume)
- FREE (stub->args.finodelk.volume);
-
- if (stub->args.finodelk.fd)
- fd_unref (stub->args.finodelk.fd);
- break;
- }
- case GF_FOP_ENTRYLK:
- {
- if (stub->args.entrylk.volume)
- FREE (stub->args.entrylk.volume);
-
- if (stub->args.entrylk.name)
- FREE (stub->args.entrylk.name);
- loc_wipe (&stub->args.entrylk.loc);
- break;
- }
- case GF_FOP_FENTRYLK:
- {
- if (stub->args.fentrylk.volume)
- FREE (stub->args.fentrylk.volume);
+ if (stub->args.xattr)
+ dict_unref (stub->args.xattr);
- if (stub->args.fentrylk.name)
- FREE (stub->args.fentrylk.name);
+ GF_FREE ((char *)stub->args.name);
- if (stub->args.fentrylk.fd)
- fd_unref (stub->args.fentrylk.fd);
- break;
- }
- case GF_FOP_UTIMENS:
- {
- loc_wipe (&stub->args.utimens.loc);
- break;
- }
- break;
- case GF_FOP_FCHMOD:
- {
- if (stub->args.fchmod.fd)
- fd_unref (stub->args.fchmod.fd);
- break;
- }
-
- case GF_FOP_FCHOWN:
- {
- if (stub->args.fchown.fd)
- fd_unref (stub->args.fchown.fd);
- break;
- }
-
- case GF_FOP_LOOKUP:
- {
- loc_wipe (&stub->args.lookup.loc);
- if (stub->args.lookup.xattr_req)
- dict_unref (stub->args.lookup.xattr_req);
- break;
- }
-
- case GF_FOP_SETDENTS:
- {
- dir_entry_t *entry, *next;
- if (stub->args.setdents.fd)
- fd_unref (stub->args.setdents.fd);
- entry = stub->args.setdents.entries.next;
- while (entry) {
- next = entry->next;
- FREE (entry->name);
- FREE (entry);
- entry = next;
- }
- break;
- }
+ GF_FREE ((char *)stub->args.volume);
- case GF_FOP_CHECKSUM:
- {
- loc_wipe (&stub->args.checksum.loc);
- break;
- }
- break;
- case GF_FOP_READDIR:
- {
- if (stub->args.readdir.fd)
- fd_unref (stub->args.readdir.fd);
- break;
- }
- case GF_FOP_XATTROP:
- {
- loc_wipe (&stub->args.xattrop.loc);
- dict_unref (stub->args.xattrop.xattr);
- break;
- }
- case GF_FOP_FXATTROP:
- {
- if (stub->args.fxattrop.fd)
- fd_unref (stub->args.fxattrop.fd);
- dict_unref (stub->args.xattrop.xattr);
- break;
- }
- case GF_FOP_LOCK_NOTIFY:
- {
- loc_wipe (&stub->args.lock_notify.loc);
- break;
- }
- case GF_FOP_LOCK_FNOTIFY:
- {
- if (stub->args.lock_fnotify.fd)
- fd_unref (stub->args.lock_fnotify.fd);
- break;
- }
- case GF_FOP_MAXVALUE:
- {
- gf_log ("call-stub",
- GF_LOG_DEBUG,
- "Invalid value of FOP");
- }
- break;
- default:
- break;
- }
+ if (stub->args.xdata)
+ dict_unref (stub->args.xdata);
}
static void
-call_stub_destroy_unwind (call_stub_t *stub)
+call_stub_wipe_args_cbk (call_stub_t *stub)
{
- switch (stub->fop) {
- case GF_FOP_OPEN:
- {
- if (stub->args.open_cbk.fd)
- fd_unref (stub->args.open_cbk.fd);
- }
- break;
-
- case GF_FOP_CREATE:
- {
- if (stub->args.create_cbk.fd)
- fd_unref (stub->args.create_cbk.fd);
-
- if (stub->args.create_cbk.inode)
- inode_unref (stub->args.create_cbk.inode);
- }
- break;
-
- case GF_FOP_STAT:
- break;
-
- case GF_FOP_READLINK:
- {
- if (stub->args.readlink_cbk.buf)
- FREE (stub->args.readlink_cbk.buf);
- }
- break;
-
- case GF_FOP_MKNOD:
- {
- if (stub->args.mknod_cbk.inode)
- inode_unref (stub->args.mknod_cbk.inode);
- }
- break;
-
- case GF_FOP_MKDIR:
- {
- if (stub->args.mkdir_cbk.inode)
- inode_unref (stub->args.mkdir_cbk.inode);
- }
- break;
-
- case GF_FOP_UNLINK:
- break;
-
- case GF_FOP_RMDIR:
- break;
-
- case GF_FOP_SYMLINK:
- {
- if (stub->args.symlink_cbk.inode)
- inode_unref (stub->args.symlink_cbk.inode);
- }
- break;
-
- case GF_FOP_RENAME:
- break;
-
- case GF_FOP_LINK:
- {
- if (stub->args.link_cbk.inode)
- inode_unref (stub->args.link_cbk.inode);
- }
- break;
-
- case GF_FOP_CHMOD:
- break;
-
- case GF_FOP_CHOWN:
- break;
-
- case GF_FOP_TRUNCATE:
- break;
-
- case GF_FOP_READ:
- {
- if (stub->args.readv_cbk.op_ret >= 0) {
- struct iobref *iobref = stub->args.readv_cbk.iobref;
- FREE (stub->args.readv_cbk.vector);
-
- if (iobref) {
- iobref_unref (iobref);
- }
- }
- }
- break;
-
- case GF_FOP_WRITE:
- break;
-
- case GF_FOP_STATFS:
- break;
-
- case GF_FOP_FLUSH:
- break;
-
- case GF_FOP_FSYNC:
- break;
+ if (stub->args_cbk.inode)
+ inode_unref (stub->args_cbk.inode);
- case GF_FOP_SETXATTR:
- break;
-
- case GF_FOP_GETXATTR:
- {
- if (stub->args.getxattr_cbk.dict)
- dict_unref (stub->args.getxattr_cbk.dict);
- }
- break;
-
- case GF_FOP_FSETXATTR:
- break;
+ GF_FREE ((char *)stub->args_cbk.buf);
- case GF_FOP_FGETXATTR:
- {
- if (stub->args.fgetxattr_cbk.dict)
- dict_unref (stub->args.fgetxattr_cbk.dict);
- }
- break;
-
- case GF_FOP_REMOVEXATTR:
- break;
-
- case GF_FOP_OPENDIR:
- {
- if (stub->args.opendir_cbk.fd)
- fd_unref (stub->args.opendir_cbk.fd);
- }
- break;
-
- case GF_FOP_GETDENTS:
- {
- dir_entry_t *tmp = NULL, *entries = NULL;
-
- entries = &stub->args.getdents_cbk.entries;
- if (stub->args.getdents_cbk.op_ret >= 0) {
- while (entries->next) {
- tmp = entries->next;
- entries->next = entries->next->next;
- FREE (tmp->name);
- FREE (tmp);
- }
- }
- }
- break;
-
- case GF_FOP_FSYNCDIR:
- break;
-
- case GF_FOP_ACCESS:
- break;
-
- case GF_FOP_FTRUNCATE:
- break;
-
- case GF_FOP_FSTAT:
- break;
-
- case GF_FOP_LK:
- break;
+ GF_FREE (stub->args_cbk.vector);
- case GF_FOP_INODELK:
- break;
+ if (stub->args_cbk.iobref)
+ iobref_unref (stub->args_cbk.iobref);
- case GF_FOP_FINODELK:
- break;
+ if (stub->args_cbk.fd)
+ fd_unref (stub->args_cbk.fd);
- case GF_FOP_ENTRYLK:
- break;
+ if (stub->args_cbk.xattr)
+ dict_unref (stub->args_cbk.xattr);
- case GF_FOP_FENTRYLK:
- break;
+ GF_FREE (stub->args_cbk.strong_checksum);
- case GF_FOP_UTIMENS:
- break;
+ if (stub->args_cbk.xdata)
+ dict_unref (stub->args_cbk.xdata);
- case GF_FOP_FCHMOD:
- break;
-
- case GF_FOP_FCHOWN:
- break;
-
- case GF_FOP_LOOKUP:
- {
- if (stub->args.lookup_cbk.inode)
- inode_unref (stub->args.lookup_cbk.inode);
-
- if (stub->args.lookup_cbk.dict)
- dict_unref (stub->args.lookup_cbk.dict);
- }
- break;
-
- case GF_FOP_SETDENTS:
- break;
-
- case GF_FOP_CHECKSUM:
- {
- if (stub->args.checksum_cbk.op_ret >= 0) {
- FREE (stub->args.checksum_cbk.file_checksum);
- FREE (stub->args.checksum_cbk.dir_checksum);
- }
- }
- break;
-
- case GF_FOP_READDIR:
- {
- if (stub->args.readdir_cbk.op_ret > 0) {
- gf_dirent_free (&stub->args.readdir_cbk.entries);
- }
- }
- break;
-
- case GF_FOP_XATTROP:
- {
- if (stub->args.xattrop_cbk.xattr)
- dict_unref (stub->args.xattrop_cbk.xattr);
- }
- break;
-
- case GF_FOP_FXATTROP:
- {
- if (stub->args.fxattrop_cbk.xattr)
- dict_unref (stub->args.fxattrop_cbk.xattr);
- }
- break;
-
- case GF_FOP_MAXVALUE:
- {
- gf_log ("call-stub",
- GF_LOG_DEBUG,
- "Invalid value of FOP");
- }
- break;
-
- default:
- break;
- }
+ if (!list_empty (&stub->args_cbk.entries.list))
+ gf_dirent_free (&stub->args_cbk.entries);
}
-
+
void
call_stub_destroy (call_stub_t *stub)
{
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- if (stub->wind) {
- call_stub_destroy_wind (stub);
- } else {
- call_stub_destroy_unwind (stub);
- }
-
- FREE (stub);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+
+ if (stub->wind)
+ call_stub_wipe_args (stub);
+ else
+ call_stub_wipe_args_cbk (stub);
+
+ stub->stub_mem_pool = NULL;
+
+ mem_put (stub);
out:
- return;
+ return;
}
+
void
call_resume (call_stub_t *stub)
{
xlator_t *old_THIS = NULL;
- errno = EINVAL;
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ errno = EINVAL;
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- list_del_init (&stub->list);
+ list_del_init (&stub->list);
old_THIS = THIS;
THIS = stub->frame->this;
@@ -4177,9 +2844,30 @@ call_resume (call_stub_t *stub)
}
THIS = old_THIS;
- call_stub_destroy (stub);
+ call_stub_destroy (stub);
out:
- return;
+ return;
}
+void
+call_unwind_error (call_stub_t *stub, int op_ret, int op_errno)
+{
+ xlator_t *old_THIS = NULL;
+
+ list_del_init (&stub->list);
+
+ old_THIS = THIS;
+ THIS = stub->frame->this;
+ {
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
+ call_resume_unwind (stub);
+ }
+ THIS = old_THIS;
+
+ call_stub_destroy (stub);
+
+ return;
+
+}
diff --git a/libglusterfs/src/call-stub.h b/libglusterfs/src/call-stub.h
index 4a8db9c5f..45bef8044 100644
--- a/libglusterfs/src/call-stub.h
+++ b/libglusterfs/src/call-stub.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CALL_STUB_H_
@@ -34,598 +25,164 @@ typedef struct {
char wind;
call_frame_t *frame;
glusterfs_fop_t fop;
+ struct mem_pool *stub_mem_pool; /* pointer to stub mempool in ctx_t */
union {
- /* lookup */
- struct {
- fop_lookup_t fn;
- loc_t loc;
- dict_t *xattr_req;
- } lookup;
- struct {
- fop_lookup_cbk_t fn;
- int32_t op_ret, op_errno;
- inode_t *inode;
- struct stat buf;
- dict_t *dict;
- } lookup_cbk;
-
- /* stat */
- struct {
- fop_stat_t fn;
- loc_t loc;
- } stat;
- struct {
- fop_stat_cbk_t fn;
- int32_t op_ret, op_errno;
- struct stat buf;
- } stat_cbk;
-
- /* fstat */
- struct {
- fop_fstat_t fn;
- fd_t *fd;
- } fstat;
- struct {
- fop_fstat_cbk_t fn;
- int32_t op_ret, op_errno;
- struct stat buf;
- } fstat_cbk;
-
- /* chmod */
- struct {
- fop_chmod_t fn;
- loc_t loc;
- mode_t mode;
- } chmod;
- struct {
- fop_chmod_cbk_t fn;
- int32_t op_ret, op_errno;
- struct stat buf;
- } chmod_cbk;
-
- /* fchmod */
- struct {
- fop_fchmod_t fn;
- fd_t *fd;
- mode_t mode;
- } fchmod;
- struct {
- fop_fchmod_cbk_t fn;
- int32_t op_ret, op_errno;
- struct stat buf;
- } fchmod_cbk;
-
- /* chown */
- struct {
- fop_chown_t fn;
- loc_t loc;
- uid_t uid;
- gid_t gid;
- } chown;
- struct {
- fop_chown_cbk_t fn;
- int32_t op_ret, op_errno;
- struct stat buf;
- } chown_cbk;
-
- /* fchown */
- struct {
- fop_fchown_t fn;
- fd_t *fd;
- uid_t uid;
- gid_t gid;
- } fchown;
- struct {
- fop_fchown_cbk_t fn;
- int32_t op_ret, op_errno;
- struct stat buf;
- } fchown_cbk;
-
- /* truncate */
- struct {
- fop_truncate_t fn;
- loc_t loc;
- off_t off;
- } truncate;
- struct {
- fop_truncate_cbk_t fn;
- int32_t op_ret, op_errno;
- struct stat buf;
- } truncate_cbk;
-
- /* ftruncate */
- struct {
- fop_ftruncate_t fn;
- fd_t *fd;
- off_t off;
- } ftruncate;
- struct {
- fop_ftruncate_cbk_t fn;
- int32_t op_ret, op_errno;
- struct stat buf;
- } ftruncate_cbk;
-
- /* utimens */
- struct {
- fop_utimens_t fn;
- loc_t loc;
- struct timespec tv[2];
- } utimens;
- struct {
- fop_utimens_cbk_t fn;
- int32_t op_ret, op_errno;
- struct stat buf;
- } utimens_cbk;
-
- /* access */
- struct {
- fop_access_t fn;
- loc_t loc;
- int32_t mask;
- } access;
- struct {
- fop_access_cbk_t fn;
- int32_t op_ret, op_errno;
- } access_cbk;
-
- /* readlink */
- struct {
- fop_readlink_t fn;
- loc_t loc;
- size_t size;
- } readlink;
- struct {
- fop_readlink_cbk_t fn;
- int32_t op_ret, op_errno;
- const char *buf;
- } readlink_cbk;
-
- /* mknod */
- struct {
- fop_mknod_t fn;
- loc_t loc;
- mode_t mode;
- dev_t rdev;
- } mknod;
- struct {
- fop_mknod_cbk_t fn;
- int32_t op_ret, op_errno;
- inode_t *inode;
- struct stat buf;
- } mknod_cbk;
-
- /* mkdir */
- struct {
- fop_mkdir_t fn;
- loc_t loc;
- mode_t mode;
- } mkdir;
- struct {
- fop_mkdir_cbk_t fn;
- int32_t op_ret, op_errno;
- inode_t *inode;
- struct stat buf;
- } mkdir_cbk;
-
- /* unlink */
- struct {
- fop_unlink_t fn;
- loc_t loc;
- } unlink;
- struct {
- fop_unlink_cbk_t fn;
- int32_t op_ret, op_errno;
- } unlink_cbk;
-
- /* rmdir */
- struct {
- fop_rmdir_t fn;
- loc_t loc;
- } rmdir;
- struct {
- fop_rmdir_cbk_t fn;
- int32_t op_ret, op_errno;
- } rmdir_cbk;
-
- /* symlink */
- struct {
- fop_symlink_t fn;
- const char *linkname;
- loc_t loc;
- } symlink;
- struct {
- fop_symlink_cbk_t fn;
- int32_t op_ret, op_errno;
- inode_t *inode;
- struct stat buf;
- } symlink_cbk;
-
- /* rename */
- struct {
- fop_rename_t fn;
- loc_t old;
- loc_t new;
- } rename;
- struct {
- fop_rename_cbk_t fn;
- int32_t op_ret, op_errno;
- struct stat buf;
- } rename_cbk;
-
- /* link */
- struct {
- fop_link_t fn;
- loc_t oldloc;
- loc_t newloc;
- } link;
- struct {
- fop_link_cbk_t fn;
- int32_t op_ret, op_errno;
- inode_t *inode;
- struct stat buf;
- } link_cbk;
-
- /* create */
- struct {
- fop_create_t fn;
- loc_t loc;
- int32_t flags;
- mode_t mode;
- fd_t *fd;
- } create;
- struct {
- fop_create_cbk_t fn;
- int32_t op_ret, op_errno;
- fd_t *fd;
- inode_t *inode;
- struct stat buf;
- } create_cbk;
-
- /* open */
- struct {
- fop_open_t fn;
- loc_t loc;
- int32_t flags;
- fd_t *fd;
- } open;
- struct {
- fop_open_cbk_t fn;
- int32_t op_ret, op_errno;
- fd_t *fd;
- } open_cbk;
-
- /* readv */
- struct {
- fop_readv_t fn;
- fd_t *fd;
- size_t size;
- off_t off;
- } readv;
- struct {
- fop_readv_cbk_t fn;
- int32_t op_ret;
- int32_t op_errno;
- struct iovec *vector;
- int32_t count;
- struct stat stbuf;
- struct iobref *iobref;
- } readv_cbk;
-
- /* writev */
- struct {
- fop_writev_t fn;
- fd_t *fd;
- struct iovec *vector;
- int32_t count;
- off_t off;
- struct iobref *iobref;
- } writev;
- struct {
- fop_writev_cbk_t fn;
- int32_t op_ret, op_errno;
- struct stat stbuf;
- } writev_cbk;
-
- /* flush */
- struct {
- fop_flush_t fn;
- fd_t *fd;
- } flush;
- struct {
- fop_flush_cbk_t fn;
- int32_t op_ret, op_errno;
- } flush_cbk;
-
- /* fsync */
- struct {
- fop_fsync_t fn;
- fd_t *fd;
- int32_t datasync;
- } fsync;
- struct {
- fop_fsync_cbk_t fn;
- int32_t op_ret, op_errno;
- } fsync_cbk;
-
- /* opendir */
- struct {
- fop_opendir_t fn;
- loc_t loc;
- fd_t *fd;
- } opendir;
- struct {
- fop_opendir_cbk_t fn;
- int32_t op_ret, op_errno;
- fd_t *fd;
- } opendir_cbk;
-
- /* getdents */
- struct {
- fop_getdents_t fn;
- fd_t *fd;
- size_t size;
- off_t off;
- int32_t flag;
- } getdents;
- struct {
- fop_getdents_cbk_t fn;
- int32_t op_ret;
- int32_t op_errno;
- dir_entry_t entries;
- int32_t count;
- } getdents_cbk;
-
- /* setdents */
- struct {
- fop_setdents_t fn;
- fd_t *fd;
- int32_t flags;
- dir_entry_t entries;
- int32_t count;
- } setdents;
- struct {
- fop_setdents_cbk_t fn;
- int32_t op_ret;
- int32_t op_errno;
- } setdents_cbk;
-
- /* fsyncdir */
- struct {
- fop_fsyncdir_t fn;
- fd_t *fd;
- int32_t datasync;
- } fsyncdir;
- struct {
- fop_fsyncdir_cbk_t fn;
- int32_t op_ret, op_errno;
- } fsyncdir_cbk;
-
- /* statfs */
- struct {
- fop_statfs_t fn;
- loc_t loc;
- } statfs;
- struct {
- fop_statfs_cbk_t fn;
- int32_t op_ret, op_errno;
- struct statvfs buf;
- } statfs_cbk;
-
- /* setxattr */
- struct {
- fop_setxattr_t fn;
- loc_t loc;
- dict_t *dict;
- int32_t flags;
- } setxattr;
- struct {
- fop_setxattr_cbk_t fn;
- int32_t op_ret, op_errno;
- } setxattr_cbk;
-
- /* getxattr */
- struct {
- fop_getxattr_t fn;
- loc_t loc;
- const char *name;
- } getxattr;
- struct {
- fop_getxattr_cbk_t fn;
- int32_t op_ret, op_errno;
- dict_t *dict;
- } getxattr_cbk;
-
- /* fsetxattr */
- struct {
- fop_fsetxattr_t fn;
- fd_t *fd;
- dict_t *dict;
- int32_t flags;
- } fsetxattr;
- struct {
- fop_fsetxattr_cbk_t fn;
- int32_t op_ret, op_errno;
- } fsetxattr_cbk;
-
- /* fgetxattr */
- struct {
- fop_fgetxattr_t fn;
- fd_t *fd;
- const char *name;
- } fgetxattr;
- struct {
- fop_fgetxattr_cbk_t fn;
- int32_t op_ret, op_errno;
- dict_t *dict;
- } fgetxattr_cbk;
-
- /* removexattr */
- struct {
- fop_removexattr_t fn;
- loc_t loc;
- const char *name;
- } removexattr;
- struct {
- fop_removexattr_cbk_t fn;
- int32_t op_ret, op_errno;
- } removexattr_cbk;
-
- /* lk */
- struct {
- fop_lk_t fn;
- fd_t *fd;
- int32_t cmd;
- struct flock lock;
- } lk;
- struct {
- fop_lk_cbk_t fn;
- int32_t op_ret, op_errno;
- struct flock lock;
- } lk_cbk;
-
- /* inodelk */
- struct {
- fop_inodelk_t fn;
- const char *volume;
- loc_t loc;
- int32_t cmd;
- struct flock lock;
- } inodelk;
-
- struct {
- fop_inodelk_cbk_t fn;
- int32_t op_ret, op_errno;
- } inodelk_cbk;
-
- /* finodelk */
- struct {
- fop_finodelk_t fn;
- const char *volume;
- fd_t *fd;
- int32_t cmd;
- struct flock lock;
- } finodelk;
-
- struct {
- fop_finodelk_cbk_t fn;
- int32_t op_ret, op_errno;
- } finodelk_cbk;
-
- /* entrylk */
- struct {
- fop_entrylk_t fn;
- loc_t loc;
- const char *volume;
- const char *name;
- entrylk_cmd cmd;
- entrylk_type type;
- } entrylk;
-
- struct {
- fop_entrylk_cbk_t fn;
- int32_t op_ret, op_errno;
- } entrylk_cbk;
-
- /* fentrylk */
- struct {
- fop_fentrylk_t fn;
- fd_t *fd;
- const char *volume;
- const char *name;
- entrylk_cmd cmd;
- entrylk_type type;
- } fentrylk;
-
- struct {
- fop_fentrylk_cbk_t fn;
- int32_t op_ret, op_errno;
- } fentrylk_cbk;
-
- /* readdir */
- struct {
- fop_readdir_t fn;
- fd_t *fd;
- size_t size;
- off_t off;
- } readdir;
- struct {
- fop_readdir_cbk_t fn;
- int32_t op_ret, op_errno;
- gf_dirent_t entries;
- } readdir_cbk;
-
- /* checksum */
- struct {
- fop_checksum_t fn;
- loc_t loc;
- int32_t flags;
- } checksum;
- struct {
- fop_checksum_cbk_t fn;
- int32_t op_ret, op_errno;
- uint8_t *file_checksum;
- uint8_t *dir_checksum;
- } checksum_cbk;
-
- /* xattrop */
- struct {
- fop_xattrop_t fn;
- loc_t loc;
- gf_xattrop_flags_t optype;
- dict_t *xattr;
- } xattrop;
- struct {
- fop_xattrop_cbk_t fn;
- int32_t op_ret;
- int32_t op_errno;
- dict_t *xattr;
- } xattrop_cbk;
-
- /* fxattrop */
- struct {
- fop_fxattrop_t fn;
- fd_t *fd;
- gf_xattrop_flags_t optype;
- dict_t *xattr;
- } fxattrop;
- struct {
- fop_fxattrop_cbk_t fn;
- int32_t op_ret;
- int32_t op_errno;
- dict_t *xattr;
- } fxattrop_cbk;
-
- struct {
- fop_lock_notify_t fn;
- loc_t loc;
- int32_t timeout;
- } lock_notify;
- struct {
- fop_lock_notify_cbk_t fn;
- int32_t op_ret;
- int32_t op_errno;
- } lock_notify_cbk;
-
- struct {
- fop_lock_fnotify_t fn;
- fd_t *fd;
- int32_t timeout;
- } lock_fnotify;
- struct {
- fop_lock_fnotify_cbk_t fn;
- int32_t op_ret;
- int32_t op_errno;
- } lock_fnotify_cbk;
+ fop_lookup_t lookup;
+ fop_stat_t stat;
+ fop_fstat_t fstat;
+ fop_truncate_t truncate;
+ fop_ftruncate_t ftruncate;
+ fop_access_t access;
+ fop_readlink_t readlink;
+ fop_mknod_t mknod;
+ fop_mkdir_t mkdir;
+ fop_unlink_t unlink;
+ fop_rmdir_t rmdir;
+ fop_symlink_t symlink;
+ fop_rename_t rename;
+ fop_link_t link;
+ fop_create_t create;
+ fop_open_t open;
+ fop_readv_t readv;
+ fop_writev_t writev;
+ fop_flush_t flush;
+ fop_fsync_t fsync;
+ fop_opendir_t opendir;
+ fop_fsyncdir_t fsyncdir;
+ fop_statfs_t statfs;
+ fop_setxattr_t setxattr;
+ fop_getxattr_t getxattr;
+ fop_fgetxattr_t fgetxattr;
+ fop_fsetxattr_t fsetxattr;
+ fop_removexattr_t removexattr;
+ fop_fremovexattr_t fremovexattr;
+ fop_lk_t lk;
+ fop_inodelk_t inodelk;
+ fop_finodelk_t finodelk;
+ fop_entrylk_t entrylk;
+ fop_fentrylk_t fentrylk;
+ fop_readdir_t readdir;
+ fop_readdirp_t readdirp;
+ fop_rchecksum_t rchecksum;
+ fop_xattrop_t xattrop;
+ fop_fxattrop_t fxattrop;
+ fop_setattr_t setattr;
+ fop_fsetattr_t fsetattr;
+ fop_fallocate_t fallocate;
+ fop_discard_t discard;
+ fop_zerofill_t zerofill;
+ } fn;
+ union {
+ fop_lookup_cbk_t lookup;
+ fop_stat_cbk_t stat;
+ fop_fstat_cbk_t fstat;
+ fop_truncate_cbk_t truncate;
+ fop_ftruncate_cbk_t ftruncate;
+ fop_access_cbk_t access;
+ fop_readlink_cbk_t readlink;
+ fop_mknod_cbk_t mknod;
+ fop_mkdir_cbk_t mkdir;
+ fop_unlink_cbk_t unlink;
+ fop_rmdir_cbk_t rmdir;
+ fop_symlink_cbk_t symlink;
+ fop_rename_cbk_t rename;
+ fop_link_cbk_t link;
+ fop_create_cbk_t create;
+ fop_open_cbk_t open;
+ fop_readv_cbk_t readv;
+ fop_writev_cbk_t writev;
+ fop_flush_cbk_t flush;
+ fop_fsync_cbk_t fsync;
+ fop_opendir_cbk_t opendir;
+ fop_fsyncdir_cbk_t fsyncdir;
+ fop_statfs_cbk_t statfs;
+ fop_setxattr_cbk_t setxattr;
+ fop_getxattr_cbk_t getxattr;
+ fop_fgetxattr_cbk_t fgetxattr;
+ fop_fsetxattr_cbk_t fsetxattr;
+ fop_removexattr_cbk_t removexattr;
+ fop_fremovexattr_cbk_t fremovexattr;
+ fop_lk_cbk_t lk;
+ fop_inodelk_cbk_t inodelk;
+ fop_finodelk_cbk_t finodelk;
+ fop_entrylk_cbk_t entrylk;
+ fop_fentrylk_cbk_t fentrylk;
+ fop_readdir_cbk_t readdir;
+ fop_readdirp_cbk_t readdirp;
+ fop_rchecksum_cbk_t rchecksum;
+ fop_xattrop_cbk_t xattrop;
+ fop_fxattrop_cbk_t fxattrop;
+ fop_setattr_cbk_t setattr;
+ fop_fsetattr_cbk_t fsetattr;
+ fop_fallocate_cbk_t fallocate;
+ fop_discard_cbk_t discard;
+ fop_zerofill_cbk_t zerofill;
+ } fn_cbk;
+
+ struct {
+ loc_t loc; // @old in rename(), link()
+ loc_t loc2; // @new in rename(), link()
+ fd_t *fd;
+ off_t offset;
+ int mask;
+ size_t size;
+ mode_t mode;
+ dev_t rdev;
+ mode_t umask;
+ int xflag;
+ int flags;
+ const char *linkname;
+ struct iovec *vector;
+ int count;
+ struct iobref *iobref;
+ int datasync;
+ dict_t *xattr;
+ const char *name;
+ int cmd;
+ struct gf_flock lock;
+ const char *volume;
+ entrylk_cmd entrylkcmd;
+ entrylk_type entrylktype;
+ gf_xattrop_flags_t optype;
+ int valid;
+ struct iatt stat;
+ dict_t *xdata;
} args;
+
+ struct {
+ int op_ret;
+ int op_errno;
+ inode_t *inode;
+ struct iatt stat;
+ struct iatt prestat;
+ struct iatt poststat;
+ struct iatt preparent; // @preoldparent in rename_cbk
+ struct iatt postparent; // @postoldparent in rename_cbk
+ struct iatt preparent2; // @prenewparent in rename_cbk
+ struct iatt postparent2; // @postnewparent in rename_cbk
+ const char *buf;
+ struct iovec *vector;
+ int count;
+ struct iobref *iobref;
+ fd_t *fd;
+ struct statvfs statvfs;
+ dict_t *xattr;
+ struct gf_flock lock;
+ gf_dirent_t entries;
+ uint32_t weak_checksum;
+ uint8_t *strong_checksum;
+ dict_t *xdata;
+ } args_cbk;
} call_stub_t;
+
call_stub_t *
fop_lookup_stub (call_frame_t *frame,
fop_lookup_t fn,
loc_t *loc,
- dict_t *xattr_req);
+ dict_t *xdata);
call_stub_t *
fop_lookup_cbk_stub (call_frame_t *frame,
@@ -633,148 +190,87 @@ fop_lookup_cbk_stub (call_frame_t *frame,
int32_t op_ret,
int32_t op_errno,
inode_t *inode,
- struct stat *buf,
- dict_t *dict);
+ struct iatt *buf,
+ dict_t *xdata,
+ struct iatt *postparent);
call_stub_t *
fop_stat_stub (call_frame_t *frame,
fop_stat_t fn,
- loc_t *loc);
+ loc_t *loc, dict_t *xdata);
call_stub_t *
fop_stat_cbk_stub (call_frame_t *frame,
fop_stat_cbk_t fn,
int32_t op_ret,
int32_t op_errno,
- struct stat *buf);
+ struct iatt *buf, dict_t *xdata);
call_stub_t *
fop_fstat_stub (call_frame_t *frame,
fop_fstat_t fn,
- fd_t *fd);
+ fd_t *fd, dict_t *xdata);
call_stub_t *
fop_fstat_cbk_stub (call_frame_t *frame,
fop_fstat_cbk_t fn,
int32_t op_ret,
int32_t op_errno,
- struct stat *buf);
-call_stub_t *
-fop_chmod_stub (call_frame_t *frame,
- fop_chmod_t fn,
- loc_t *loc,
- mode_t mode);
-call_stub_t *
-fop_chmod_cbk_stub (call_frame_t *frame,
- fop_chmod_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf);
-call_stub_t *
-fop_fchmod_stub (call_frame_t *frame,
- fop_fchmod_t fn,
- fd_t *fd,
- mode_t mode);
-call_stub_t *
-fop_fchmod_cbk_stub (call_frame_t *frame,
- fop_fchmod_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf);
-call_stub_t *
-fop_chown_stub (call_frame_t *frame,
- fop_chown_t fn,
- loc_t *loc,
- uid_t uid,
- gid_t gid);
-
-call_stub_t *
-fop_chown_cbk_stub (call_frame_t *frame,
- fop_chown_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf);
-
-call_stub_t *
-fop_fchown_stub (call_frame_t *frame,
- fop_fchown_t fn,
- fd_t *fd,
- uid_t uid,
- gid_t gid);
-
-call_stub_t *
-fop_fchown_cbk_stub (call_frame_t *frame,
- fop_fchown_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf);
+ struct iatt *buf, dict_t *xdata);
call_stub_t *
fop_truncate_stub (call_frame_t *frame,
fop_truncate_t fn,
loc_t *loc,
- off_t off);
+ off_t off, dict_t *xdata);
call_stub_t *
fop_truncate_cbk_stub (call_frame_t *frame,
fop_truncate_cbk_t fn,
int32_t op_ret,
int32_t op_errno,
- struct stat *buf);
+ struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
call_stub_t *
fop_ftruncate_stub (call_frame_t *frame,
fop_ftruncate_t fn,
fd_t *fd,
- off_t off);
+ off_t off, dict_t *xdata);
call_stub_t *
fop_ftruncate_cbk_stub (call_frame_t *frame,
fop_ftruncate_cbk_t fn,
int32_t op_ret,
int32_t op_errno,
- struct stat *buf);
-
-call_stub_t *
-fop_utimens_stub (call_frame_t *frame,
- fop_utimens_t fn,
- loc_t *loc,
- struct timespec tv[2]);
-
-call_stub_t *
-fop_utimens_cbk_stub (call_frame_t *frame,
- fop_utimens_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf);
+ struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
call_stub_t *
fop_access_stub (call_frame_t *frame,
fop_access_t fn,
loc_t *loc,
- int32_t mask);
+ int32_t mask, dict_t *xdata);
call_stub_t *
fop_access_cbk_stub (call_frame_t *frame,
fop_access_cbk_t fn,
int32_t op_ret,
- int32_t op_errno);
+ int32_t op_errno, dict_t *xdata);
call_stub_t *
fop_readlink_stub (call_frame_t *frame,
fop_readlink_t fn,
loc_t *loc,
- size_t size);
+ size_t size, dict_t *xdata);
call_stub_t *
fop_readlink_cbk_stub (call_frame_t *frame,
fop_readlink_cbk_t fn,
int32_t op_ret,
int32_t op_errno,
- const char *path);
+ const char *path,
+ struct iatt *buf, dict_t *xdata);
call_stub_t *
-fop_mknod_stub (call_frame_t *frame,
- fop_mknod_t fn,
- loc_t *loc,
- mode_t mode,
- dev_t rdev);
+fop_mknod_stub (call_frame_t *frame, fop_mknod_t fn, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata);
call_stub_t *
fop_mknod_cbk_stub (call_frame_t *frame,
@@ -782,13 +278,13 @@ fop_mknod_cbk_stub (call_frame_t *frame,
int32_t op_ret,
int32_t op_errno,
inode_t *inode,
- struct stat *buf);
+ struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
call_stub_t *
-fop_mkdir_stub (call_frame_t *frame,
- fop_mkdir_t fn,
- loc_t *loc,
- mode_t mode);
+fop_mkdir_stub (call_frame_t *frame, fop_mkdir_t fn, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata);
call_stub_t *
fop_mkdir_cbk_stub (call_frame_t *frame,
@@ -796,35 +292,37 @@ fop_mkdir_cbk_stub (call_frame_t *frame,
int32_t op_ret,
int32_t op_errno,
inode_t *inode,
- struct stat *buf);
+ struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
call_stub_t *
-fop_unlink_stub (call_frame_t *frame,
- fop_unlink_t fn,
- loc_t *loc);
+fop_unlink_stub (call_frame_t *frame, fop_unlink_t fn,
+ loc_t *loc, int xflag, dict_t *xdata);
call_stub_t *
fop_unlink_cbk_stub (call_frame_t *frame,
fop_unlink_cbk_t fn,
int32_t op_ret,
- int32_t op_errno);
+ int32_t op_errno,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
call_stub_t *
-fop_rmdir_stub (call_frame_t *frame,
- fop_rmdir_t fn,
- loc_t *loc);
+fop_rmdir_stub (call_frame_t *frame, fop_rmdir_t fn,
+ loc_t *loc, int flags, dict_t *xdata);
call_stub_t *
fop_rmdir_cbk_stub (call_frame_t *frame,
fop_rmdir_cbk_t fn,
int32_t op_ret,
- int32_t op_errno);
+ int32_t op_errno,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
call_stub_t *
-fop_symlink_stub (call_frame_t *frame,
- fop_symlink_t fn,
- const char *linkname,
- loc_t *loc);
+fop_symlink_stub (call_frame_t *frame, fop_symlink_t fn,
+ const char *linkname, loc_t *loc, mode_t umask, dict_t *xdata);
call_stub_t *
fop_symlink_cbk_stub (call_frame_t *frame,
@@ -832,26 +330,32 @@ fop_symlink_cbk_stub (call_frame_t *frame,
int32_t op_ret,
int32_t op_errno,
inode_t *inode,
- struct stat *buf);
+ struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
call_stub_t *
fop_rename_stub (call_frame_t *frame,
fop_rename_t fn,
loc_t *oldloc,
- loc_t *newloc);
+ loc_t *newloc, dict_t *xdata);
call_stub_t *
fop_rename_cbk_stub (call_frame_t *frame,
fop_rename_cbk_t fn,
int32_t op_ret,
int32_t op_errno,
- struct stat *buf);
+ struct iatt *buf,
+ struct iatt *preoldparent,
+ struct iatt *postoldparent,
+ struct iatt *prenewparent,
+ struct iatt *postnewparent, dict_t *xdata);
call_stub_t *
fop_link_stub (call_frame_t *frame,
fop_link_t fn,
loc_t *oldloc,
- loc_t *newloc);
+ loc_t *newloc, dict_t *xdata);
call_stub_t *
fop_link_cbk_stub (call_frame_t *frame,
@@ -859,14 +363,14 @@ fop_link_cbk_stub (call_frame_t *frame,
int32_t op_ret,
int32_t op_errno,
inode_t *inode,
- struct stat *buf);
+ struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
call_stub_t *
-fop_create_stub (call_frame_t *frame,
- fop_create_t fn,
- loc_t *loc,
- int32_t flags,
- mode_t mode, fd_t *fd);
+fop_create_stub (call_frame_t *frame, fop_create_t fn,
+ loc_t *loc, int32_t flags, mode_t mode,
+ mode_t umask, fd_t *fd, dict_t *xdata);
call_stub_t *
fop_create_cbk_stub (call_frame_t *frame,
@@ -875,28 +379,31 @@ fop_create_cbk_stub (call_frame_t *frame,
int32_t op_errno,
fd_t *fd,
inode_t *inode,
- struct stat *buf);
+ struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
call_stub_t *
fop_open_stub (call_frame_t *frame,
fop_open_t fn,
loc_t *loc,
int32_t flags,
- fd_t *fd);
+ fd_t *fd,
+ dict_t *xdata);
call_stub_t *
fop_open_cbk_stub (call_frame_t *frame,
fop_open_cbk_t fn,
int32_t op_ret,
int32_t op_errno,
- fd_t *fd);
+ fd_t *fd, dict_t *xdata);
call_stub_t *
fop_readv_stub (call_frame_t *frame,
fop_readv_t fn,
fd_t *fd,
size_t size,
- off_t off);
+ off_t off, uint32_t flags, dict_t *xdata);
call_stub_t *
fop_readv_cbk_stub (call_frame_t *frame,
@@ -905,8 +412,8 @@ fop_readv_cbk_stub (call_frame_t *frame,
int32_t op_errno,
struct iovec *vector,
int32_t count,
- struct stat *stbuf,
- struct iobref *iobref);
+ struct iatt *stbuf,
+ struct iobref *iobref, dict_t *xdata);
call_stub_t *
fop_writev_stub (call_frame_t *frame,
@@ -914,296 +421,347 @@ fop_writev_stub (call_frame_t *frame,
fd_t *fd,
struct iovec *vector,
int32_t count,
- off_t off,
- struct iobref *iobref);
+ off_t off, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata);
call_stub_t *
fop_writev_cbk_stub (call_frame_t *frame,
fop_writev_cbk_t fn,
int32_t op_ret,
int32_t op_errno,
- struct stat *stbuf);
+ struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
call_stub_t *
fop_flush_stub (call_frame_t *frame,
fop_flush_t fn,
- fd_t *fd);
+ fd_t *fd, dict_t *xdata);
call_stub_t *
fop_flush_cbk_stub (call_frame_t *frame,
fop_flush_cbk_t fn,
int32_t op_ret,
- int32_t op_errno);
+ int32_t op_errno, dict_t *xdata);
call_stub_t *
fop_fsync_stub (call_frame_t *frame,
fop_fsync_t fn,
fd_t *fd,
- int32_t datasync);
+ int32_t datasync, dict_t *xdata);
call_stub_t *
fop_fsync_cbk_stub (call_frame_t *frame,
fop_fsync_cbk_t fn,
int32_t op_ret,
- int32_t op_errno);
+ int32_t op_errno,
+ struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
call_stub_t *
fop_opendir_stub (call_frame_t *frame,
fop_opendir_t fn,
- loc_t *loc, fd_t *fd);
+ loc_t *loc, fd_t *fd, dict_t *xdata);
call_stub_t *
fop_opendir_cbk_stub (call_frame_t *frame,
fop_opendir_cbk_t fn,
int32_t op_ret,
int32_t op_errno,
- fd_t *fd);
-
-call_stub_t *
-fop_getdents_stub (call_frame_t *frame,
- fop_getdents_t fn,
- fd_t *fd,
- size_t size,
- off_t off,
- int32_t flag);
-
-call_stub_t *
-fop_getdents_cbk_stub (call_frame_t *frame,
- fop_getdents_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entries,
- int32_t count);
-
-call_stub_t *
-fop_setdents_stub (call_frame_t *frame,
- fop_setdents_t fn,
- fd_t *fd,
- int32_t flags,
- dir_entry_t *entries,
- int32_t count);
-
-call_stub_t *
-fop_setdents_cbk_stub (call_frame_t *frame,
- fop_setdents_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno);
+ fd_t *fd, dict_t *xdata);
call_stub_t *
fop_fsyncdir_stub (call_frame_t *frame,
fop_fsyncdir_t fn,
fd_t *fd,
- int32_t datasync);
+ int32_t datasync, dict_t *xdata);
call_stub_t *
fop_fsyncdir_cbk_stub (call_frame_t *frame,
fop_fsyncdir_cbk_t fn,
int32_t op_ret,
- int32_t op_errno);
+ int32_t op_errno, dict_t *xdata);
call_stub_t *
fop_statfs_stub (call_frame_t *frame,
fop_statfs_t fn,
- loc_t *loc);
+ loc_t *loc, dict_t *xdata);
call_stub_t *
fop_statfs_cbk_stub (call_frame_t *frame,
fop_statfs_cbk_t fn,
int32_t op_ret,
int32_t op_errno,
- struct statvfs *buf);
+ struct statvfs *buf, dict_t *xdata);
call_stub_t *
fop_setxattr_stub (call_frame_t *frame,
fop_setxattr_t fn,
loc_t *loc,
dict_t *dict,
- int32_t flags);
+ int32_t flags, dict_t *xdata);
call_stub_t *
fop_setxattr_cbk_stub (call_frame_t *frame,
fop_setxattr_cbk_t fn,
int32_t op_ret,
- int32_t op_errno);
+ int32_t op_errno, dict_t *xdata);
call_stub_t *
fop_getxattr_stub (call_frame_t *frame,
fop_getxattr_t fn,
loc_t *loc,
- const char *name);
+ const char *name, dict_t *xdata);
call_stub_t *
fop_getxattr_cbk_stub (call_frame_t *frame,
fop_getxattr_cbk_t fn,
int32_t op_ret,
int32_t op_errno,
- dict_t *value);
+ dict_t *value, dict_t *xdata);
call_stub_t *
fop_fsetxattr_stub (call_frame_t *frame,
fop_fsetxattr_t fn,
fd_t *fd,
dict_t *dict,
- int32_t flags);
+ int32_t flags, dict_t *xdata);
call_stub_t *
fop_fsetxattr_cbk_stub (call_frame_t *frame,
fop_fsetxattr_cbk_t fn,
int32_t op_ret,
- int32_t op_errno);
+ int32_t op_errno, dict_t *xdata);
call_stub_t *
fop_fgetxattr_stub (call_frame_t *frame,
fop_fgetxattr_t fn,
fd_t *fd,
- const char *name);
+ const char *name, dict_t *xdata);
call_stub_t *
fop_fgetxattr_cbk_stub (call_frame_t *frame,
fop_fgetxattr_cbk_t fn,
int32_t op_ret,
int32_t op_errno,
- dict_t *value);
+ dict_t *value, dict_t *xdata);
call_stub_t *
fop_removexattr_stub (call_frame_t *frame,
fop_removexattr_t fn,
loc_t *loc,
- const char *name);
+ const char *name, dict_t *xdata);
call_stub_t *
fop_removexattr_cbk_stub (call_frame_t *frame,
fop_removexattr_cbk_t fn,
int32_t op_ret,
- int32_t op_errno);
+ int32_t op_errno, dict_t *xdata);
+
+
+call_stub_t *
+fop_fremovexattr_stub (call_frame_t *frame,
+ fop_fremovexattr_t fn,
+ fd_t *fd,
+ const char *name, dict_t *xdata);
+
+call_stub_t *
+fop_fremovexattr_cbk_stub (call_frame_t *frame,
+ fop_fremovexattr_cbk_t fn,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
call_stub_t *
fop_lk_stub (call_frame_t *frame,
fop_lk_t fn,
fd_t *fd,
int32_t cmd,
- struct flock *lock);
+ struct gf_flock *lock, dict_t *xdata);
call_stub_t *
fop_lk_cbk_stub (call_frame_t *frame,
fop_lk_cbk_t fn,
int32_t op_ret,
int32_t op_errno,
- struct flock *lock);
+ struct gf_flock *lock, dict_t *xdata);
call_stub_t *
fop_inodelk_stub (call_frame_t *frame, fop_inodelk_t fn,
- const char *volume, loc_t *loc, int32_t cmd,
- struct flock *lock);
+ const char *volume, loc_t *loc, int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata);
call_stub_t *
fop_finodelk_stub (call_frame_t *frame, fop_finodelk_t fn,
- const char *volume, fd_t *fd, int32_t cmd,
- struct flock *lock);
+ const char *volume, fd_t *fd, int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata);
call_stub_t *
fop_entrylk_stub (call_frame_t *frame, fop_entrylk_t fn,
const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type);
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
call_stub_t *
fop_fentrylk_stub (call_frame_t *frame, fop_fentrylk_t fn,
const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type);
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
call_stub_t *
fop_inodelk_cbk_stub (call_frame_t *frame, fop_inodelk_cbk_t fn,
- int32_t op_ret, int32_t op_errno);
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
call_stub_t *
fop_finodelk_cbk_stub (call_frame_t *frame, fop_inodelk_cbk_t fn,
- int32_t op_ret, int32_t op_errno);
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
call_stub_t *
fop_entrylk_cbk_stub (call_frame_t *frame, fop_entrylk_cbk_t fn,
- int32_t op_ret, int32_t op_errno);
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
call_stub_t *
fop_fentrylk_cbk_stub (call_frame_t *frame, fop_entrylk_cbk_t fn,
- int32_t op_ret, int32_t op_errno);
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
call_stub_t *
fop_readdir_stub (call_frame_t *frame,
fop_readdir_t fn,
fd_t *fd,
size_t size,
- off_t off);
+ off_t off, dict_t *xdata);
+
+call_stub_t *
+fop_readdirp_stub (call_frame_t *frame,
+ fop_readdirp_t fn,
+ fd_t *fd,
+ size_t size,
+ off_t off,
+ dict_t *xdata);
+
+call_stub_t *
+fop_readdirp_cbk_stub (call_frame_t *frame,
+ fop_readdir_cbk_t fn,
+ int32_t op_ret,
+ int32_t op_errno,
+ gf_dirent_t *entries, dict_t *xdata);
call_stub_t *
fop_readdir_cbk_stub (call_frame_t *frame,
fop_readdir_cbk_t fn,
int32_t op_ret,
int32_t op_errno,
- gf_dirent_t *entries);
+ gf_dirent_t *entries, dict_t *xdata);
call_stub_t *
-fop_checksum_stub (call_frame_t *frame,
- fop_checksum_t fn,
- loc_t *loc,
- int32_t flags);
+fop_rchecksum_stub (call_frame_t *frame,
+ fop_rchecksum_t fn,
+ fd_t *fd, off_t offset,
+ int32_t len, dict_t *xdata);
call_stub_t *
-fop_checksum_cbk_stub (call_frame_t *frame,
- fop_checksum_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- uint8_t *file_checksum,
- uint8_t *dir_checksum);
+fop_rchecksum_cbk_stub (call_frame_t *frame,
+ fop_rchecksum_cbk_t fn,
+ int32_t op_ret,
+ int32_t op_errno,
+ uint32_t weak_checksum,
+ uint8_t *strong_checksum, dict_t *xdata);
call_stub_t *
fop_xattrop_stub (call_frame_t *frame,
fop_xattrop_t fn,
loc_t *loc,
gf_xattrop_flags_t optype,
- dict_t *xattr);
+ dict_t *xattr, dict_t *xdata);
call_stub_t *
fop_xattrop_stub_cbk_stub (call_frame_t *frame,
fop_xattrop_cbk_t fn,
int32_t op_ret,
- int32_t op_errno);
+ int32_t op_errno, dict_t *xdata);
call_stub_t *
fop_fxattrop_stub (call_frame_t *frame,
fop_fxattrop_t fn,
fd_t *fd,
gf_xattrop_flags_t optype,
- dict_t *xattr);
+ dict_t *xattr, dict_t *xdata);
call_stub_t *
fop_fxattrop_stub_cbk_stub (call_frame_t *frame,
fop_xattrop_cbk_t fn,
int32_t op_ret,
- int32_t op_errno);
+ int32_t op_errno, dict_t *xdata);
call_stub_t *
-fop_lock_notify_stub_cbk_stub (call_frame_t *frame,
- fop_lock_notify_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno);
+fop_setattr_stub (call_frame_t *frame,
+ fop_setattr_t fn,
+ loc_t *loc,
+ struct iatt *stbuf,
+ int32_t valid, dict_t *xdata);
call_stub_t *
-fop_lock_notify_stub (call_frame_t *frame,
- fop_lock_notify_t fn,
- loc_t *loc,
- int32_t timeout);
+fop_setattr_cbk_stub (call_frame_t *frame,
+ fop_setattr_cbk_t fn,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata);
+
+call_stub_t *
+fop_fsetattr_stub (call_frame_t *frame,
+ fop_fsetattr_t fn,
+ fd_t *fd,
+ struct iatt *stbuf,
+ int32_t valid, dict_t *xdata);
+
+call_stub_t *
+fop_fsetattr_cbk_stub (call_frame_t *frame,
+ fop_setattr_cbk_t fn,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata);
+
+call_stub_t *
+fop_fallocate_stub(call_frame_t *frame,
+ fop_fallocate_t fn,
+ fd_t *fd,
+ int32_t mode, off_t offset,
+ size_t len, dict_t *xdata);
+
+call_stub_t *
+fop_fallocate_cbk_stub(call_frame_t *frame,
+ fop_fallocate_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *statpre, struct iatt *statpost,
+ dict_t *xdata);
+
+call_stub_t *
+fop_discard_stub(call_frame_t *frame,
+ fop_discard_t fn,
+ fd_t *fd,
+ off_t offset,
+ size_t len, dict_t *xdata);
+
+call_stub_t *
+fop_discard_cbk_stub(call_frame_t *frame,
+ fop_discard_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *statpre, struct iatt *statpost,
+ dict_t *xdata);
call_stub_t *
-fop_lock_fnotify_stub_cbk_stub (call_frame_t *frame,
- fop_lock_fnotify_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno);
+fop_zerofill_stub(call_frame_t *frame,
+ fop_zerofill_t fn,
+ fd_t *fd,
+ off_t offset,
+ size_t len, dict_t *xdata);
call_stub_t *
-fop_lock_fnotify_stub (call_frame_t *frame,
- fop_lock_fnotify_t fn,
- fd_t *fd,
- int32_t timeout);
+fop_zerofill_cbk_stub(call_frame_t *frame,
+ fop_zerofill_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *statpre, struct iatt *statpost,
+ dict_t *xdata);
void call_resume (call_stub_t *stub);
void call_stub_destroy (call_stub_t *stub);
+void call_unwind_error (call_stub_t *stub, int op_ret, int op_errno);
#endif
diff --git a/libglusterfs/src/checksum.c b/libglusterfs/src/checksum.c
new file mode 100644
index 000000000..e14a3044c
--- /dev/null
+++ b/libglusterfs/src/checksum.c
@@ -0,0 +1,65 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <openssl/md5.h>
+#include <stdint.h>
+
+#include "glusterfs.h"
+
+/*
+ * The "weak" checksum required for the rsync algorithm,
+ * adapted from the rsync source code. The following comment
+ * appears there:
+ *
+ * "a simple 32 bit checksum that can be upadted from either end
+ * (inspired by Mark Adler's Adler-32 checksum)"
+ *
+ * Note: these functions are only called to compute checksums on
+ * pathnames; they don't need to handle arbitrarily long strings of
+ * data. Thus int32_t and uint32_t are sufficient
+ */
+
+uint32_t
+gf_rsync_weak_checksum (unsigned char *buf, size_t len)
+{
+ int32_t i = 0;
+ uint32_t s1, s2;
+
+ uint32_t csum;
+
+ s1 = s2 = 0;
+ if (len >= 4) {
+ for (; i < (len-4); i+=4) {
+ s2 += 4*(s1 + buf[i]) + 3*buf[i+1] + 2*buf[i+2] + buf[i+3];
+ s1 += buf[i+0] + buf[i+1] + buf[i+2] + buf[i+3];
+ }
+ }
+
+ for (; i < len; i++) {
+ s1 += buf[i];
+ s2 += s1;
+ }
+
+ csum = (s1 & 0xffff) + (s2 << 16);
+
+ return csum;
+}
+
+
+/*
+ * The "strong" checksum required for the rsync algorithm,
+ * adapted from the rsync source code.
+ */
+
+void
+gf_rsync_strong_checksum (unsigned char *data, size_t len, unsigned char *md5)
+{
+ MD5(data, len, md5);
+}
diff --git a/libglusterfs/src/checksum.h b/libglusterfs/src/checksum.h
new file mode 100644
index 000000000..bf7eeede8
--- /dev/null
+++ b/libglusterfs/src/checksum.h
@@ -0,0 +1,20 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __CHECKSUM_H__
+#define __CHECKSUM_H__
+
+uint32_t
+gf_rsync_weak_checksum (unsigned char *buf, size_t len);
+
+void
+gf_rsync_strong_checksum (unsigned char *buf, size_t len, unsigned char *sum);
+
+#endif /* __CHECKSUM_H__ */
diff --git a/libglusterfs/src/circ-buff.c b/libglusterfs/src/circ-buff.c
new file mode 100644
index 000000000..484ce7dc9
--- /dev/null
+++ b/libglusterfs/src/circ-buff.c
@@ -0,0 +1,202 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "circ-buff.h"
+
+void
+cb_destroy_data (circular_buffer_t *cb,
+ void (*destroy_buffer_data) (void *data))
+{
+ if (destroy_buffer_data)
+ destroy_buffer_data (cb->data);
+ GF_FREE (cb->data);
+ return;
+}
+
+
+/* hold lock while calling this function */
+int
+__cb_add_entry_buffer (buffer_t *buffer, void *item)
+{
+ circular_buffer_t *ptr = NULL;
+ int ret = -1;
+ //DO we really need the assert here?
+ GF_ASSERT (buffer->used_len <= buffer->size_buffer);
+
+ if (buffer->use_once == _gf_true &&
+ buffer->used_len == buffer->size_buffer) {
+ gf_log ("", GF_LOG_WARNING, "buffer %p is use once buffer",
+ buffer);
+ return -1;
+ } else {
+ if (buffer->used_len == buffer->size_buffer) {
+ if (buffer->cb[buffer->w_index]) {
+ ptr = buffer->cb[buffer->w_index];
+ if (ptr->data) {
+ cb_destroy_data (ptr,
+ buffer->destroy_buffer_data);
+ ptr->data = NULL;
+ GF_FREE (ptr);
+ }
+ buffer->cb[buffer->w_index] = NULL;
+ ptr = NULL;
+ }
+ }
+
+ buffer->cb[buffer->w_index] =
+ GF_CALLOC (1, sizeof (circular_buffer_t),
+ gf_common_mt_circular_buffer_t);
+ if (!buffer->cb[buffer->w_index])
+ return -1;
+
+ buffer->cb[buffer->w_index]->data = item;
+ ret = gettimeofday (&buffer->cb[buffer->w_index]->tv, NULL);
+ if (ret == -1)
+ gf_log_callingfn ("", GF_LOG_WARNING, "getting time of"
+ "the day failed");
+ buffer->w_index++;
+ buffer->w_index %= buffer->size_buffer;
+ //used_buffer size cannot be greater than the total buffer size
+
+ if (buffer->used_len < buffer->size_buffer)
+ buffer->used_len++;
+ return buffer->w_index;
+ }
+}
+
+int
+cb_add_entry_buffer (buffer_t *buffer, void *item)
+{
+ int write_index = -1;
+
+ pthread_mutex_lock (&buffer->lock);
+ {
+ write_index = __cb_add_entry_buffer (buffer, item);
+ }
+ pthread_mutex_unlock (&buffer->lock);
+
+ return write_index;
+}
+
+void
+cb_buffer_show (buffer_t *buffer)
+{
+ pthread_mutex_lock (&buffer->lock);
+ {
+ gf_log ("", GF_LOG_DEBUG, "w_index: %d, size: %"GF_PRI_SIZET
+ " used_buffer: %d", buffer->w_index,
+ buffer->size_buffer,
+ buffer->used_len);
+ }
+ pthread_mutex_unlock (&buffer->lock);
+}
+
+void
+cb_buffer_dump (buffer_t *buffer, void *data,
+ int (fn) (circular_buffer_t *buffer, void *data))
+{
+ int index = 0;
+ circular_buffer_t *entry = NULL;
+ int entries = 0;
+ int ul = 0;
+ int w_ind = 0;
+ int size_buff = 0;
+ int i = 0;
+
+ ul = buffer->used_len;
+ w_ind = buffer->w_index;
+ size_buff = buffer->size_buffer;
+
+ pthread_mutex_lock (&buffer->lock);
+ {
+ if (buffer->use_once == _gf_false) {
+ index = (size_buff + (w_ind - ul))%size_buff;
+ for (entries = 0; entries < buffer->used_len;
+ entries++) {
+ entry = buffer->cb[index];
+ if (entry)
+ fn (entry, data);
+ else
+ gf_log_callingfn ("", GF_LOG_WARNING,
+ "Null entry in "
+ "circular buffer at "
+ "index %d.", index);
+
+ index++;
+ index %= buffer->size_buffer;
+ }
+ } else {
+ for (i = 0; i < buffer->used_len ; i++) {
+ entry = buffer->cb[i];
+ fn (entry, data);
+ }
+ }
+ }
+ pthread_mutex_unlock (&buffer->lock);
+}
+
+buffer_t *
+cb_buffer_new (size_t buffer_size, gf_boolean_t use_once,
+ void (*destroy_buffer_data) (void *data))
+{
+ buffer_t *buffer = NULL;
+
+ buffer = GF_CALLOC (1, sizeof (*buffer), gf_common_mt_buffer_t);
+ if (!buffer) {
+ gf_log ("", GF_LOG_ERROR, "could not allocate the "
+ "buffer");
+ goto out;
+ }
+
+ buffer->cb = GF_CALLOC (buffer_size,
+ sizeof (circular_buffer_t *),
+ gf_common_mt_circular_buffer_t);
+ if (!buffer->cb) {
+ gf_log ("", GF_LOG_ERROR, "could not allocate the "
+ "memory for the circular buffer");
+ GF_FREE (buffer);
+ buffer = NULL;
+ goto out;
+ }
+
+ buffer->w_index = 0;
+ buffer->size_buffer = buffer_size;
+ buffer->use_once = use_once;
+ buffer->used_len = 0;
+ buffer->destroy_buffer_data = destroy_buffer_data;
+ pthread_mutex_init (&buffer->lock, NULL);
+
+out:
+ return buffer;
+}
+
+void
+cb_buffer_destroy (buffer_t *buffer)
+{
+ int i = 0;
+ circular_buffer_t *ptr = NULL;
+ if (buffer) {
+ if (buffer->cb) {
+ for (i = 0; i < buffer->used_len ; i++) {
+ ptr = buffer->cb[i];
+ if (ptr->data) {
+ cb_destroy_data (ptr,
+ buffer->destroy_buffer_data);
+ ptr->data = NULL;
+ GF_FREE (ptr);
+ }
+ }
+ GF_FREE (buffer->cb);
+ }
+ pthread_mutex_destroy (&buffer->lock);
+ GF_FREE (buffer);
+ }
+}
+
diff --git a/libglusterfs/src/circ-buff.h b/libglusterfs/src/circ-buff.h
new file mode 100644
index 000000000..e3459f5e3
--- /dev/null
+++ b/libglusterfs/src/circ-buff.h
@@ -0,0 +1,64 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CB_H
+#define _CB_H
+
+#include "common-utils.h"
+#include "logging.h"
+#include "mem-types.h"
+
+#define BUFFER_SIZE 10
+#define TOTAL_SIZE BUFFER_SIZE + 1
+
+
+struct _circular_buffer {
+ struct timeval tv;
+ void *data;
+};
+
+typedef struct _circular_buffer circular_buffer_t;
+
+struct _buffer {
+ unsigned int w_index;
+ size_t size_buffer;
+ gf_boolean_t use_once;
+ /* This variable is assigned the proper value at the time of initing */
+ /* the buffer. It indicates, whether the buffer should be used once */
+ /* it becomes full. */
+
+ int used_len;
+ /* indicates the amount of circular buffer used. */
+
+ circular_buffer_t **cb;
+ void (*destroy_buffer_data) (void *data);
+ pthread_mutex_t lock;
+};
+
+typedef struct _buffer buffer_t;
+
+int
+cb_add_entry_buffer (buffer_t *buffer, void *item);
+
+void
+cb_buffer_show (buffer_t *buffer);
+
+buffer_t *
+cb_buffer_new (size_t buffer_size,gf_boolean_t use_buffer_once,
+ void (*destroy_data) (void *data));
+
+void
+cb_buffer_destroy (buffer_t *buffer);
+
+void
+cb_buffer_dump (buffer_t *buffer, void *data,
+ int (fn) (circular_buffer_t *buffer, void *data));
+
+#endif /* _CB_H */
diff --git a/libglusterfs/src/client_t.c b/libglusterfs/src/client_t.c
new file mode 100644
index 000000000..06447dc5d
--- /dev/null
+++ b/libglusterfs/src/client_t.c
@@ -0,0 +1,890 @@
+/*
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "glusterfs.h"
+#include "dict.h"
+#include "statedump.h"
+#include "client_t.h"
+#include "list.h"
+#include "rpcsvc.h"
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+
+static int
+gf_client_chain_client_entries (cliententry_t *entries, uint32_t startidx,
+ uint32_t endcount)
+{
+ uint32_t i = 0;
+
+ if (!entries) {
+ gf_log_callingfn ("client_t", GF_LOG_WARNING, "!entries");
+ return -1;
+ }
+
+ /* Chain only till the second to last entry because we want to
+ * ensure that the last entry has GF_CLIENTTABLE_END.
+ */
+ for (i = startidx; i < (endcount - 1); i++)
+ entries[i].next_free = i + 1;
+
+ /* i has already been incremented upto the last entry. */
+ entries[i].next_free = GF_CLIENTTABLE_END;
+
+ return 0;
+}
+
+
+static int
+gf_client_clienttable_expand (clienttable_t *clienttable, uint32_t nr)
+{
+ cliententry_t *oldclients = NULL;
+ uint32_t oldmax_clients = -1;
+ int ret = -1;
+
+ if (clienttable == NULL || nr < 0) {
+ gf_log_callingfn ("client_t", GF_LOG_ERROR, "invalid argument");
+ ret = EINVAL;
+ goto out;
+ }
+
+ /* expand size by power-of-two...
+ this originally came from .../xlators/protocol/server/src/server.c
+ where it was not commented */
+ nr /= (1024 / sizeof (cliententry_t));
+ nr = gf_roundup_next_power_of_two (nr + 1);
+ nr *= (1024 / sizeof (cliententry_t));
+
+ oldclients = clienttable->cliententries;
+ oldmax_clients = clienttable->max_clients;
+
+ clienttable->cliententries = GF_CALLOC (nr, sizeof (cliententry_t),
+ gf_common_mt_cliententry_t);
+ if (!clienttable->cliententries) {
+ ret = ENOMEM;
+ goto out;
+ }
+ clienttable->max_clients = nr;
+
+ if (oldclients) {
+ uint32_t cpy = oldmax_clients * sizeof (cliententry_t);
+ memcpy (clienttable->cliententries, oldclients, cpy);
+ }
+
+ gf_client_chain_client_entries (clienttable->cliententries, oldmax_clients,
+ clienttable->max_clients);
+
+ /* Now that expansion is done, we must update the client list
+ * head pointer so that the client allocation functions can continue
+ * using the expanded table.
+ */
+ clienttable->first_free = oldmax_clients;
+ GF_FREE (oldclients);
+ ret = 0;
+out:
+ return ret;
+}
+
+
+clienttable_t *
+gf_clienttable_alloc (void)
+{
+ clienttable_t *clienttable = NULL;
+
+ clienttable =
+ GF_CALLOC (1, sizeof (clienttable_t), gf_common_mt_clienttable_t);
+ if (!clienttable)
+ return NULL;
+
+ LOCK_INIT (&clienttable->lock);
+ gf_client_clienttable_expand (clienttable, GF_CLIENTTABLE_INITIAL_SIZE);
+ return clienttable;
+}
+
+
+void
+gf_client_clienttable_destroy (clienttable_t *clienttable)
+{
+ client_t *client = NULL;
+ cliententry_t *cliententries = NULL;
+ uint32_t client_count = 0;
+ int32_t i = 0;
+
+ if (!clienttable) {
+ gf_log_callingfn ("client_t", GF_LOG_WARNING, "!clienttable");
+ return;
+ }
+
+ LOCK (&clienttable->lock);
+ {
+ client_count = clienttable->max_clients;
+ clienttable->max_clients = 0;
+ cliententries = clienttable->cliententries;
+ clienttable->cliententries = NULL;
+ }
+ UNLOCK (&clienttable->lock);
+
+ if (cliententries != NULL) {
+ for (i = 0; i < client_count; i++) {
+ client = cliententries[i].client;
+ if (client != NULL) {
+ gf_client_unref (client);
+ }
+ }
+
+ GF_FREE (cliententries);
+ LOCK_DESTROY (&clienttable->lock);
+ GF_FREE (clienttable);
+ }
+}
+
+client_t *
+gf_client_get (xlator_t *this, struct rpcsvc_auth_data *cred, char *client_uid)
+{
+ client_t *client = NULL;
+ cliententry_t *cliententry = NULL;
+ clienttable_t *clienttable = NULL;
+ unsigned int i = 0;
+
+ if (this == NULL || client_uid == NULL) {
+ gf_log_callingfn ("client_t", GF_LOG_ERROR, "invalid argument");
+ errno = EINVAL;
+ return NULL;
+ }
+
+ gf_log (this->name, GF_LOG_INFO, "client_uid=%s", client_uid);
+
+ clienttable = this->ctx->clienttable;
+
+ LOCK (&clienttable->lock);
+ {
+ for (; i < clienttable->max_clients; i++) {
+ client = clienttable->cliententries[i].client;
+ if (client == NULL)
+ continue;
+ /*
+ * look for matching client_uid, _and_
+ * if auth was used, matching auth flavour and data
+ */
+ if (strcmp (client_uid, client->client_uid) == 0 &&
+ (cred->flavour != AUTH_NONE &&
+ (cred->flavour == client->auth.flavour &&
+ (size_t) cred->datalen == client->auth.len &&
+ memcmp (cred->authdata,
+ client->auth.data,
+ client->auth.len) == 0))) {
+#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)
+ __sync_add_and_fetch(&client->ref.bind, 1);
+#else
+ LOCK (&client->ref.lock);
+ {
+ ++client->ref.bind;
+ }
+ UNLOCK (&client->ref.lock);
+#endif
+ break;
+ }
+ }
+ if (client) {
+ gf_client_ref (client);
+ goto unlock;
+ }
+ client = GF_CALLOC (1, sizeof(client_t), gf_common_mt_client_t);
+ if (client == NULL) {
+ errno = ENOMEM;
+ goto unlock;
+ }
+
+ client->this = this;
+
+ LOCK_INIT (&client->scratch_ctx.lock);
+ LOCK_INIT (&client->ref.lock);
+
+ client->client_uid = gf_strdup (client_uid);
+ if (client->client_uid == NULL) {
+ GF_FREE (client);
+ client = NULL;
+ errno = ENOMEM;
+ goto unlock;
+ }
+ client->scratch_ctx.count = GF_CLIENTCTX_INITIAL_SIZE;
+ client->scratch_ctx.ctx =
+ GF_CALLOC (GF_CLIENTCTX_INITIAL_SIZE,
+ sizeof (struct client_ctx),
+ gf_common_mt_client_ctx);
+ if (client->scratch_ctx.ctx == NULL) {
+ GF_FREE (client->client_uid);
+ GF_FREE (client);
+ client = NULL;
+ errno = ENOMEM;
+ goto unlock;
+ }
+
+ /* no need to do these atomically here */
+ client->ref.bind = client->ref.count = 1;
+
+ client->auth.flavour = cred->flavour;
+ if (cred->flavour != AUTH_NONE) {
+ client->auth.data =
+ GF_CALLOC (1, cred->datalen,
+ gf_common_mt_client_t);
+ if (client->auth.data == NULL) {
+ GF_FREE (client->scratch_ctx.ctx);
+ GF_FREE (client->client_uid);
+ GF_FREE (client);
+ client = NULL;
+ errno = ENOMEM;
+ goto unlock;
+ }
+ memcpy (client->auth.data, cred->authdata,
+ cred->datalen);
+ client->auth.len = cred->datalen;
+ }
+
+ client->tbl_index = clienttable->first_free;
+ cliententry = &clienttable->cliententries[client->tbl_index];
+ cliententry->client = client;
+ clienttable->first_free = cliententry->next_free;
+ cliententry->next_free = GF_CLIENTENTRY_ALLOCATED;
+ gf_client_ref (client);
+ }
+unlock:
+ UNLOCK (&clienttable->lock);
+
+ return client;
+}
+
+void
+gf_client_put (client_t *client, gf_boolean_t *detached)
+{
+ gf_boolean_t unref = _gf_false;
+ int bind_ref;
+
+ if (detached)
+ *detached = _gf_false;
+
+#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)
+ bind_ref = __sync_sub_and_fetch(&client->ref.bind, 1);
+#else
+ LOCK (&client->ref.lock);
+ {
+ bind_ref = --client->ref.bind;
+ }
+ UNLOCK (&client->ref.lock);
+#endif
+ if (bind_ref == 0)
+ unref = _gf_true;
+
+ if (unref) {
+ gf_log (THIS->name, GF_LOG_INFO, "Shutting down connection %s",
+ client->client_uid);
+ if (detached)
+ *detached = _gf_true;
+ gf_client_unref (client);
+ }
+}
+
+
+client_t *
+gf_client_ref (client_t *client)
+{
+ if (!client) {
+ gf_log_callingfn ("client_t", GF_LOG_ERROR, "null client");
+ return NULL;
+ }
+
+#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)
+ __sync_add_and_fetch(&client->ref.count, 1);
+#else
+ LOCK (&client->ref.lock);
+ {
+ ++client->ref.count;
+ }
+ UNLOCK (&client->ref.lock);
+#endif
+ return client;
+}
+
+
+static void
+client_destroy (client_t *client)
+{
+ clienttable_t *clienttable = NULL;
+ glusterfs_graph_t *gtrav = NULL;
+ xlator_t *xtrav = NULL;
+
+ if (client == NULL){
+ gf_log_callingfn ("xlator", GF_LOG_ERROR, "invalid argument");
+ goto out;
+ }
+
+ clienttable = client->this->ctx->clienttable;
+
+ LOCK_DESTROY (&client->scratch_ctx.lock);
+ LOCK_DESTROY (&client->ref.lock);
+
+ LOCK (&clienttable->lock);
+ {
+ clienttable->cliententries[client->tbl_index].client = NULL;
+ clienttable->cliententries[client->tbl_index].next_free =
+ clienttable->first_free;
+ clienttable->first_free = client->tbl_index;
+ }
+ UNLOCK (&clienttable->lock);
+
+ list_for_each_entry (gtrav, &client->this->ctx->graphs, list) {
+ xtrav = gtrav->top;
+ while (xtrav != NULL) {
+ if (xtrav->cbks->client_destroy != NULL)
+ xtrav->cbks->client_destroy (xtrav, client);
+ xtrav = xtrav->next;
+ }
+ }
+ GF_FREE (client->auth.data);
+ GF_FREE (client->scratch_ctx.ctx);
+ GF_FREE (client->client_uid);
+ GF_FREE (client);
+out:
+ return;
+}
+
+
+int
+gf_client_disconnect (client_t *client)
+{
+ int ret = 0;
+ glusterfs_graph_t *gtrav = NULL;
+ xlator_t *xtrav = NULL;
+
+ list_for_each_entry (gtrav, &client->this->ctx->graphs, list) {
+ xtrav = gtrav->top;
+ while (xtrav != NULL) {
+ if (xtrav->cbks->client_disconnect != NULL)
+ if (xtrav->cbks->client_disconnect (xtrav, client) != 0)
+ ret = -1;
+ xtrav = xtrav->next;
+ }
+ }
+
+ return ret;
+}
+
+
+void
+gf_client_unref (client_t *client)
+{
+ int refcount;
+
+ if (!client) {
+ gf_log_callingfn ("client_t", GF_LOG_ERROR, "client is NULL");
+ return;
+ }
+
+#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)
+ refcount = __sync_sub_and_fetch(&client->ref.count, 1);
+#else
+ LOCK (&client->ref.lock);
+ {
+ refcount = --client->ref.count;
+ }
+ UNLOCK (&client->ref.lock);
+#endif
+ if (refcount == 0) {
+ client_destroy (client);
+ }
+}
+
+
+static int
+client_ctx_set_int (client_t *client, void *key, void *value)
+{
+ int index = 0;
+ int ret = 0;
+ int set_idx = -1;
+
+ for (index = 0; index < client->scratch_ctx.count; index++) {
+ if (!client->scratch_ctx.ctx[index].ctx_key) {
+ if (set_idx == -1)
+ set_idx = index;
+ /* dont break, to check if key already exists
+ further on */
+ }
+ if (client->scratch_ctx.ctx[index].ctx_key == key) {
+ set_idx = index;
+ break;
+ }
+ }
+
+ if (set_idx == -1) {
+ ret = -1;
+ goto out;
+ }
+
+ client->scratch_ctx.ctx[set_idx].ctx_key = key;
+ client->scratch_ctx.ctx[set_idx].ctx_value = value;
+
+out:
+ return ret;
+}
+
+
+int
+client_ctx_set (client_t *client, void *key, void *value)
+{
+ int ret = 0;
+
+ if (!client || !key)
+ return -1;
+
+ LOCK (&client->scratch_ctx.lock);
+ {
+ ret = client_ctx_set_int (client, key, value);
+ }
+ UNLOCK (&client->scratch_ctx.lock);
+
+ return ret;
+}
+
+
+static int
+client_ctx_get_int (client_t *client, void *key, void **value)
+{
+ int index = 0;
+ int ret = 0;
+
+ for (index = 0; index < client->scratch_ctx.count; index++) {
+ if (client->scratch_ctx.ctx[index].ctx_key == key)
+ break;
+ }
+
+ if (index == client->scratch_ctx.count) {
+ ret = -1;
+ goto out;
+ }
+
+ if (value)
+ *value = client->scratch_ctx.ctx[index].ctx_value;
+
+out:
+ return ret;
+}
+
+
+int
+client_ctx_get (client_t *client, void *key, void **value)
+{
+ int ret = 0;
+
+ if (!client || !key)
+ return -1;
+
+ LOCK (&client->scratch_ctx.lock);
+ {
+ ret = client_ctx_get_int (client, key, value);
+ }
+ UNLOCK (&client->scratch_ctx.lock);
+
+ return ret;
+}
+
+
+static int
+client_ctx_del_int (client_t *client, void *key, void **value)
+{
+ int index = 0;
+ int ret = 0;
+
+ for (index = 0; index < client->scratch_ctx.count; index++) {
+ if (client->scratch_ctx.ctx[index].ctx_key == key)
+ break;
+ }
+
+ if (index == client->scratch_ctx.count) {
+ ret = -1;
+ goto out;
+ }
+
+ if (value)
+ *value = client->scratch_ctx.ctx[index].ctx_value;
+
+ client->scratch_ctx.ctx[index].ctx_key = 0;
+ client->scratch_ctx.ctx[index].ctx_value = 0;
+
+out:
+ return ret;
+}
+
+
+int
+client_ctx_del (client_t *client, void *key, void **value)
+{
+ int ret = 0;
+
+ if (!client || !key)
+ return -1;
+
+ LOCK (&client->scratch_ctx.lock);
+ {
+ ret = client_ctx_del_int (client, key, value);
+ }
+ UNLOCK (&client->scratch_ctx.lock);
+
+ return ret;
+}
+
+
+void
+client_dump (client_t *client, char *prefix)
+{
+ char key[GF_DUMP_MAX_BUF_LEN];
+
+ if (!client)
+ return;
+
+ memset(key, 0, sizeof key);
+ gf_proc_dump_write("refcount", "%d", client->ref.count);
+}
+
+
+void
+cliententry_dump (cliententry_t *cliententry, char *prefix)
+{
+ if (!cliententry)
+ return;
+
+ if (GF_CLIENTENTRY_ALLOCATED != cliententry->next_free)
+ return;
+
+ if (cliententry->client)
+ client_dump(cliententry->client, prefix);
+}
+
+
+void
+clienttable_dump (clienttable_t *clienttable, char *prefix)
+{
+ int i = 0;
+ int ret = -1;
+ char key[GF_DUMP_MAX_BUF_LEN] = {0};
+
+ if (!clienttable)
+ return;
+
+ ret = TRY_LOCK (&clienttable->lock);
+ {
+ if (ret) {
+ gf_log ("client_t", GF_LOG_WARNING,
+ "Unable to acquire lock");
+ return;
+ }
+ memset(key, 0, sizeof key);
+ gf_proc_dump_build_key(key, prefix, "maxclients");
+ gf_proc_dump_write(key, "%d", clienttable->max_clients);
+ gf_proc_dump_build_key(key, prefix, "first_free");
+ gf_proc_dump_write(key, "%d", clienttable->first_free);
+ for ( i = 0 ; i < clienttable->max_clients; i++) {
+ if (GF_CLIENTENTRY_ALLOCATED ==
+ clienttable->cliententries[i].next_free) {
+ gf_proc_dump_build_key(key, prefix,
+ "cliententry[%d]", i);
+ gf_proc_dump_add_section(key);
+ cliententry_dump(&clienttable->cliententries[i],
+ key);
+ }
+ }
+ }
+ UNLOCK(&clienttable->lock);
+}
+
+
+void
+client_ctx_dump (client_t *client, char *prefix)
+{
+#if 0 /* TBD, FIXME */
+ struct client_ctx *client_ctx = NULL;
+ xlator_t *xl = NULL;
+ int i = 0;
+
+ if ((client == NULL) || (client->ctx == NULL)) {
+ goto out;
+ }
+
+ LOCK (&client->ctx_lock);
+ if (client->ctx != NULL) {
+ client_ctx = GF_CALLOC (client->inode->table->xl->graph->ctx_count,
+ sizeof (*client_ctx),
+ gf_common_mt_client_ctx);
+ if (client_ctx == NULL) {
+ goto unlock;
+ }
+
+ for (i = 0; i < client->inode->table->xl->graph->ctx_count; i++) {
+ client_ctx[i] = client->ctx[i];
+ }
+ }
+unlock:
+ UNLOCK (&client->ctx_lock);
+
+ if (client_ctx == NULL) {
+ goto out;
+ }
+
+ for (i = 0; i < client->inode->table->xl->graph->ctx_count; i++) {
+ if (client_ctx[i].xl_key) {
+ xl = (xlator_t *)(long)client_ctx[i].xl_key;
+ if (xl->dumpops && xl->dumpops->clientctx)
+ xl->dumpops->clientctx (xl, client);
+ }
+ }
+out:
+ GF_FREE (client_ctx);
+#endif
+}
+
+
+/*
+ * the following functions are here to preserve legacy behavior of the
+ * protocol/server xlator dump, but perhaps they should just be folded
+ * into the client dump instead?
+ */
+int
+gf_client_dump_fdtables_to_dict (xlator_t *this, dict_t *dict)
+{
+ clienttable_t *clienttable = NULL;
+ int count = 0;
+ int ret = -1;
+#ifdef NOTYET
+ client_t *client = NULL;
+ char key[GF_DUMP_MAX_BUF_LEN] = {0,};
+#endif
+
+ GF_VALIDATE_OR_GOTO (THIS->name, this, out);
+ GF_VALIDATE_OR_GOTO (this->name, dict, out);
+
+ clienttable = this->ctx->clienttable;
+
+ if (!clienttable)
+ return -1;
+
+#ifdef NOTYET
+ ret = TRY_LOCK (&clienttable->lock);
+ {
+ if (ret) {
+ gf_log ("client_t", GF_LOG_WARNING,
+ "Unable to acquire lock");
+ return -1;
+ }
+ for ( ; count < clienttable->max_clients; count++) {
+ if (GF_CLIENTENTRY_ALLOCATED !=
+ clienttable->cliententries[count].next_free)
+ continue;
+ client = clienttable->cliententries[count].client;
+ memset(key, 0, sizeof key);
+ snprintf (key, sizeof key, "conn%d", count++);
+ fdtable_dump_to_dict (client->server_ctx.fdtable,
+ key, dict);
+ }
+ }
+ UNLOCK(&clienttable->lock);
+#endif
+
+ ret = dict_set_int32 (dict, "conncount", count);
+out:
+ return ret;
+}
+
+int
+gf_client_dump_fdtables (xlator_t *this)
+{
+ client_t *client = NULL;
+ clienttable_t *clienttable = NULL;
+ int count = 1;
+ int ret = -1;
+ char key[GF_DUMP_MAX_BUF_LEN] = {0,};
+
+ GF_VALIDATE_OR_GOTO (THIS->name, this, out);
+
+ clienttable = this->ctx->clienttable;
+
+ if (!clienttable)
+ return -1;
+
+ ret = TRY_LOCK (&clienttable->lock);
+ {
+ if (ret) {
+ gf_log ("client_t", GF_LOG_WARNING,
+ "Unable to acquire lock");
+ return -1;
+ }
+
+
+ for ( ; count < clienttable->max_clients; count++) {
+ if (GF_CLIENTENTRY_ALLOCATED !=
+ clienttable->cliententries[count].next_free)
+ continue;
+ client = clienttable->cliententries[count].client;
+ memset(key, 0, sizeof key);
+ if (client->client_uid) {
+ gf_proc_dump_build_key (key, "conn",
+ "%d.id", count);
+ gf_proc_dump_write (key, "%s",
+ client->client_uid);
+ }
+
+ gf_proc_dump_build_key (key, "conn", "%d.ref",
+ count);
+ gf_proc_dump_write (key, "%d", client->ref.count);
+ if (client->bound_xl) {
+ gf_proc_dump_build_key (key, "conn",
+ "%d.bound_xl", count);
+ gf_proc_dump_write (key, "%s",
+ client->bound_xl->name);
+ }
+
+#ifdef NOTYET
+ gf_proc_dump_build_key (key, "conn","%d.id", count);
+ fdtable_dump (client->server_ctx.fdtable, key);
+#endif
+ }
+ }
+
+ UNLOCK(&clienttable->lock);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+int
+gf_client_dump_inodes_to_dict (xlator_t *this, dict_t *dict)
+{
+ client_t *client = NULL;
+ clienttable_t *clienttable = NULL;
+ xlator_t *prev_bound_xl = NULL;
+ char key[32] = {0,};
+ int count = 0;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO (THIS->name, this, out);
+ GF_VALIDATE_OR_GOTO (this->name, dict, out);
+
+ clienttable = this->ctx->clienttable;
+
+ if (!clienttable)
+ return -1;
+
+ ret = TRY_LOCK (&clienttable->lock);
+ {
+ if (ret) {
+ gf_log ("client_t", GF_LOG_WARNING,
+ "Unable to acquire lock");
+ return -1;
+ }
+ for ( ; count < clienttable->max_clients; count++) {
+ if (GF_CLIENTENTRY_ALLOCATED !=
+ clienttable->cliententries[count].next_free)
+ continue;
+ client = clienttable->cliententries[count].client;
+ memset(key, 0, sizeof key);
+ if (client->bound_xl && client->bound_xl->itable) {
+ /* Presently every brick contains only
+ * one bound_xl for all connections.
+ * This will lead to duplicating of
+ * the inode lists, if listing is
+ * done for every connection. This
+ * simple check prevents duplication
+ * in the present case. If need arises
+ * the check can be improved.
+ */
+ if (client->bound_xl == prev_bound_xl)
+ continue;
+ prev_bound_xl = client->bound_xl;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "conn%d", count);
+ inode_table_dump_to_dict (client->bound_xl->itable,
+ key, dict);
+ }
+ }
+ }
+ UNLOCK(&clienttable->lock);
+
+ ret = dict_set_int32 (dict, "conncount", count);
+
+out:
+ if (prev_bound_xl)
+ prev_bound_xl = NULL;
+ return ret;
+}
+
+int
+gf_client_dump_inodes (xlator_t *this)
+{
+ client_t *client = NULL;
+ clienttable_t *clienttable = NULL;
+ xlator_t *prev_bound_xl = NULL;
+ int count = 1;
+ int ret = -1;
+ char key[GF_DUMP_MAX_BUF_LEN] = {0,};
+
+ GF_VALIDATE_OR_GOTO (THIS->name, this, out);
+
+ clienttable = this->ctx->clienttable;
+
+ if (!clienttable)
+ goto out;
+
+ ret = TRY_LOCK (&clienttable->lock);
+ {
+ if (ret) {
+ gf_log ("client_t", GF_LOG_WARNING,
+ "Unable to acquire lock");
+ goto out;
+ }
+
+ for ( ; count < clienttable->max_clients; count++) {
+ if (GF_CLIENTENTRY_ALLOCATED !=
+ clienttable->cliententries[count].next_free)
+ continue;
+ client = clienttable->cliententries[count].client;
+ memset(key, 0, sizeof key);
+ if (client->bound_xl && client->bound_xl->itable) {
+ /* Presently every brick contains only
+ * one bound_xl for all connections.
+ * This will lead to duplicating of
+ * the inode lists, if listing is
+ * done for every connection. This
+ * simple check prevents duplication
+ * in the present case. If need arises
+ * the check can be improved.
+ */
+ if (client->bound_xl == prev_bound_xl)
+ continue;
+ prev_bound_xl = client->bound_xl;
+
+ gf_proc_dump_build_key(key, "conn",
+ "%d.bound_xl.%s", count,
+ client->bound_xl->name);
+ inode_table_dump(client->bound_xl->itable,key);
+ }
+ }
+ }
+ UNLOCK(&clienttable->lock);
+
+ ret = 0;
+out:
+ return ret;
+}
+
diff --git a/libglusterfs/src/client_t.h b/libglusterfs/src/client_t.h
new file mode 100644
index 000000000..f7812f8f0
--- /dev/null
+++ b/libglusterfs/src/client_t.h
@@ -0,0 +1,135 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CLIENT_T_H
+#define _CLIENT_T_H
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "locking.h" /* for gf_lock_t, not included by glusterfs.h */
+
+struct client_ctx {
+ void *ctx_key;
+ void *ctx_value;
+};
+
+typedef struct _client_t {
+ struct {
+ /* e.g. protocol/server stashes its ctx here */
+ gf_lock_t lock;
+ unsigned short count;
+ struct client_ctx *ctx;
+ } scratch_ctx;
+ struct {
+ gf_lock_t lock;
+ volatile int bind;
+ volatile int count;
+ } ref;
+ xlator_t *bound_xl;
+ xlator_t *this;
+ int tbl_index;
+ char *client_uid;
+ struct {
+ int flavour;
+ size_t len;
+ char *data;
+ } auth;
+} client_t;
+
+#define GF_CLIENTCTX_INITIAL_SIZE 8
+
+struct client_table_entry {
+ client_t *client;
+ int next_free;
+};
+typedef struct client_table_entry cliententry_t;
+
+struct clienttable {
+ unsigned int max_clients;
+ gf_lock_t lock;
+ cliententry_t *cliententries;
+ int first_free;
+};
+typedef struct clienttable clienttable_t;
+
+#define GF_CLIENTTABLE_INITIAL_SIZE 32
+
+/* Signifies no more entries in the client table. */
+#define GF_CLIENTTABLE_END -1
+
+/* This is used to invalidate
+ * the next_free value in an cliententry that has been allocated
+ */
+#define GF_CLIENTENTRY_ALLOCATED -2
+
+struct rpcsvc_auth_data;
+
+client_t *
+gf_client_get (xlator_t *this, struct rpcsvc_auth_data *cred, char *client_uid);
+
+void
+gf_client_put (client_t *client, gf_boolean_t *detached);
+
+clienttable_t *
+gf_clienttable_alloc (void);
+
+void
+gf_client_clienttable_destroy (clienttable_t *clienttable);
+
+client_t *
+gf_client_ref (client_t *client);
+
+void
+gf_client_unref (client_t *client);
+
+int
+gf_client_dump_fdtable_to_dict (xlator_t *this, dict_t *dict);
+
+int
+gf_client_dump_fdtable (xlator_t *this);
+
+int
+gf_client_dump_inodes_to_dict (xlator_t *this, dict_t *dict);
+
+int
+gf_client_dump_inodes (xlator_t *this);
+
+int
+client_ctx_set (client_t *client, void *key, void *value);
+
+int
+client_ctx_get (client_t *client, void *key, void **value);
+
+int
+client_ctx_del (client_t *client, void *key, void **value);
+
+void
+client_ctx_dump (client_t *client, char *prefix);
+
+int
+gf_client_dump_fdtables_to_dict (xlator_t *this, dict_t *dict);
+
+int
+gf_client_dump_fdtables (xlator_t *this);
+
+int
+gf_client_dump_inodes_to_dict (xlator_t *this, dict_t *dict);
+
+int
+gf_client_dump_inodes (xlator_t *this);
+
+int
+gf_client_disconnect (client_t *client);
+
+#endif /* _CLIENT_T_H */
diff --git a/libglusterfs/src/common-utils.c b/libglusterfs/src/common-utils.c
index 17ac79840..827475282 100644
--- a/libglusterfs/src/common-utils.c
+++ b/libglusterfs/src/common-utils.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
@@ -36,421 +27,577 @@
#include <time.h>
#include <locale.h>
#include <sys/socket.h>
+#include <sys/wait.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <signal.h>
+#include <stdlib.h>
+
+#if defined GF_BSD_HOST_OS || defined GF_DARWIN_HOST_OS
+#include <sys/sysctl.h>
+#endif
#include "logging.h"
#include "common-utils.h"
#include "revision.h"
#include "glusterfs.h"
#include "stack.h"
+#include "globals.h"
+#include "lkowner.h"
+#include "syscall.h"
+#include <ifaddrs.h>
+
+#ifndef AI_ADDRCONFIG
+#define AI_ADDRCONFIG 0
+#endif /* AI_ADDRCONFIG */
typedef int32_t (*rw_op_t)(int32_t fd, char *buf, int32_t size);
typedef int32_t (*rwv_op_t)(int32_t fd, const struct iovec *buf, int32_t size);
-static glusterfs_ctx_t *gf_global_ctx;
-
struct dnscache6 {
- struct addrinfo *first;
- struct addrinfo *next;
+ struct addrinfo *first;
+ struct addrinfo *next;
};
-int32_t
-gf_resolve_ip6 (const char *hostname,
- uint16_t port,
- int family,
- void **dnscache,
- struct addrinfo **addr_info)
-{
- int32_t ret = 0;
- struct addrinfo hints;
- struct dnscache6 *cache = NULL;
- char service[NI_MAXSERV], host[NI_MAXHOST];
-
- if (!hostname) {
- gf_log ("resolver", GF_LOG_WARNING, "hostname is NULL");
- return -1;
- }
+void
+md5_wrapper(const unsigned char *data, size_t len, char *md5)
+{
+ unsigned short i = 0;
+ unsigned short lim = MD5_DIGEST_LENGTH*2+1;
+ unsigned char scratch[MD5_DIGEST_LENGTH] = {0,};
+ MD5(data, len, scratch);
+ for (; i < MD5_DIGEST_LENGTH; i++)
+ snprintf(md5 + i * 2, lim-i*2, "%02x", scratch[i]);
+}
- if (!*dnscache) {
- *dnscache = CALLOC (1, sizeof (struct dnscache6));
- }
+/* works similar to mkdir(1) -p.
+ */
+int
+mkdir_p (char *path, mode_t mode, gf_boolean_t allow_symlinks)
+{
+ int i = 0;
+ int ret = -1;
+ char dir[PATH_MAX] = {0,};
+ struct stat stbuf = {0,};
- cache = *dnscache;
- if (cache->first && !cache->next) {
- freeaddrinfo(cache->first);
- cache->first = cache->next = NULL;
- gf_log ("resolver", GF_LOG_TRACE,
- "flushing DNS cache");
- }
+ strcpy (dir, path);
+ i = (dir[0] == '/')? 1: 0;
+ do {
+ if (path[i] != '/' && path[i] != '\0')
+ continue;
+
+ dir[i] = '\0';
+ ret = mkdir (dir, mode);
+ if (ret && errno != EEXIST) {
+ gf_log ("", GF_LOG_ERROR, "Failed due to reason %s",
+ strerror (errno));
+ goto out;
+ }
+
+ if (ret && errno == EEXIST && !allow_symlinks) {
+ ret = lstat (dir, &stbuf);
+ if (ret)
+ goto out;
+
+ if (S_ISLNK (stbuf.st_mode)) {
+ ret = -1;
+ gf_log ("", GF_LOG_ERROR, "%s is a symlink",
+ dir);
+ goto out;
+ }
+ }
+ dir[i] = '/';
+
+ } while (path[i++] != '\0');
+
+ ret = stat (dir, &stbuf);
+ if (ret || !S_ISDIR (stbuf.st_mode)) {
+ ret = -1;
+ gf_log ("", GF_LOG_ERROR, "Failed to create directory, "
+ "possibly some of the components were not directories");
+ goto out;
+ }
+
+ ret = 0;
+out:
+
+ return ret;
+}
+
+int
+gf_lstat_dir (const char *path, struct stat *stbuf_in)
+{
+ int ret = -1;
+ struct stat stbuf = {0,};
+
+ if (path == NULL) {
+ errno = EINVAL;
+ goto out;
+ }
+
+ ret = sys_lstat (path, &stbuf);
+ if (ret)
+ goto out;
+
+ if (!S_ISDIR (stbuf.st_mode)) {
+ errno = ENOTDIR;
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+
+out:
+ if (!ret && stbuf_in)
+ *stbuf_in = stbuf;
+
+ return ret;
+}
+
+int
+log_base2 (unsigned long x)
+{
+ int val = 0;
+
+ while (x > 1) {
+ x /= 2;
+ val++;
+ }
+
+ return val;
+}
+
+
+int32_t
+gf_resolve_ip6 (const char *hostname,
+ uint16_t port,
+ int family,
+ void **dnscache,
+ struct addrinfo **addr_info)
+{
+ int32_t ret = 0;
+ struct addrinfo hints;
+ struct dnscache6 *cache = NULL;
+ char service[NI_MAXSERV], host[NI_MAXHOST];
+
+ if (!hostname) {
+ gf_log_callingfn ("resolver", GF_LOG_WARNING, "hostname is NULL");
+ return -1;
+ }
- if (!cache->first) {
- char *port_str = NULL;
- gf_log ("resolver", GF_LOG_TRACE,
- "DNS cache not present, freshly probing hostname: %s",
- hostname);
+ if (!*dnscache) {
+ *dnscache = GF_CALLOC (1, sizeof (struct dnscache6),
+ gf_common_mt_dnscache6);
+ if (!*dnscache)
+ return -1;
+ }
- memset(&hints, 0, sizeof(hints));
- hints.ai_family = family;
- hints.ai_socktype = SOCK_STREAM;
- hints.ai_flags = AI_ADDRCONFIG;
+ cache = *dnscache;
+ if (cache->first && !cache->next) {
+ freeaddrinfo(cache->first);
+ cache->first = cache->next = NULL;
+ gf_log ("resolver", GF_LOG_TRACE,
+ "flushing DNS cache");
+ }
- ret = asprintf (&port_str, "%d", port);
+ if (!cache->first) {
+ char *port_str = NULL;
+ gf_log ("resolver", GF_LOG_TRACE,
+ "DNS cache not present, freshly probing hostname: %s",
+ hostname);
+
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_family = family;
+ hints.ai_socktype = SOCK_STREAM;
+#ifndef __NetBSD__
+ hints.ai_flags = AI_ADDRCONFIG;
+#endif
+
+ ret = gf_asprintf (&port_str, "%d", port);
if (-1 == ret) {
gf_log ("resolver", GF_LOG_ERROR, "asprintf failed");
return -1;
}
- if ((ret = getaddrinfo(hostname, port_str, &hints, &cache->first)) != 0) {
- gf_log ("resolver", GF_LOG_ERROR,
- "getaddrinfo failed (%s)", gai_strerror (ret));
-
- free (*dnscache);
- *dnscache = NULL;
- free (port_str);
- return -1;
- }
- free (port_str);
-
- cache->next = cache->first;
- }
+ if ((ret = getaddrinfo(hostname, port_str, &hints, &cache->first)) != 0) {
+ gf_log ("resolver", GF_LOG_ERROR,
+ "getaddrinfo failed (%s)", gai_strerror (ret));
- if (cache->next) {
- ret = getnameinfo((struct sockaddr *)cache->next->ai_addr,
- cache->next->ai_addrlen,
- host, sizeof (host),
- service, sizeof (service),
- NI_NUMERICHOST);
- if (ret != 0) {
- gf_log ("resolver",
- GF_LOG_ERROR,
- "getnameinfo failed (%s)", gai_strerror (ret));
- goto err;
- }
-
- gf_log ("resolver", GF_LOG_TRACE,
- "returning ip-%s (port-%s) for hostname: %s and port: %d",
- host, service, hostname, port);
-
- *addr_info = cache->next;
- }
+ GF_FREE (*dnscache);
+ *dnscache = NULL;
+ GF_FREE (port_str);
+ return -1;
+ }
+ GF_FREE (port_str);
- cache->next = cache->next->ai_next;
- if (cache->next) {
- ret = getnameinfo((struct sockaddr *)cache->next->ai_addr,
- cache->next->ai_addrlen,
- host, sizeof (host),
- service, sizeof (service),
- NI_NUMERICHOST);
- if (ret != 0) {
- gf_log ("resolver",
- GF_LOG_ERROR,
- "getnameinfo failed (%s)", gai_strerror (ret));
- goto err;
- }
-
- gf_log ("resolver", GF_LOG_TRACE,
- "next DNS query will return: ip-%s port-%s", host, service);
- }
+ cache->next = cache->first;
+ }
- return 0;
+ if (cache->next) {
+ ret = getnameinfo((struct sockaddr *)cache->next->ai_addr,
+ cache->next->ai_addrlen,
+ host, sizeof (host),
+ service, sizeof (service),
+ NI_NUMERICHOST);
+ if (ret != 0) {
+ gf_log ("resolver", GF_LOG_ERROR,
+ "getnameinfo failed (%s)", gai_strerror (ret));
+ goto err;
+ }
+
+ gf_log ("resolver", GF_LOG_DEBUG,
+ "returning ip-%s (port-%s) for hostname: %s and port: %d",
+ host, service, hostname, port);
+
+ *addr_info = cache->next;
+ }
+
+ if (cache->next)
+ cache->next = cache->next->ai_next;
+ if (cache->next) {
+ ret = getnameinfo((struct sockaddr *)cache->next->ai_addr,
+ cache->next->ai_addrlen,
+ host, sizeof (host),
+ service, sizeof (service),
+ NI_NUMERICHOST);
+ if (ret != 0) {
+ gf_log ("resolver", GF_LOG_ERROR,
+ "getnameinfo failed (%s)", gai_strerror (ret));
+ goto err;
+ }
+
+ gf_log ("resolver", GF_LOG_DEBUG,
+ "next DNS query will return: ip-%s port-%s", host, service);
+ }
+
+ return 0;
err:
- freeaddrinfo (cache->first);
- cache->first = cache->next = NULL;
- free (cache);
- *dnscache = NULL;
- return -1;
+ freeaddrinfo (cache->first);
+ cache->first = cache->next = NULL;
+ GF_FREE (cache);
+ *dnscache = NULL;
+ return -1;
}
-char *gf_fop_list[GF_FOP_MAXVALUE];
-char *gf_mop_list[GF_MOP_MAXVALUE];
-char *gf_cbk_list[GF_CBK_MAXVALUE];
-void
-gf_global_variable_init()
-{
- gf_fop_list[GF_FOP_STAT] = "STAT"; /* 0 */
- gf_fop_list[GF_FOP_READLINK] = "READLINK"; /* 1 */
- gf_fop_list[GF_FOP_MKNOD] = "MKNOD"; /* 2 */
- gf_fop_list[GF_FOP_MKDIR] = "MKDIR";
- gf_fop_list[GF_FOP_UNLINK] = "UNLINK";
- gf_fop_list[GF_FOP_RMDIR] = "RMDIR"; /* 5 */
- gf_fop_list[GF_FOP_SYMLINK] = "SYMLINK";
- gf_fop_list[GF_FOP_RENAME] = "RENAME";
- gf_fop_list[GF_FOP_LINK] = "LINK";
- gf_fop_list[GF_FOP_CHMOD] = "CHMOD";
- gf_fop_list[GF_FOP_CHOWN] = "CHOWN"; /* 10 */
- gf_fop_list[GF_FOP_TRUNCATE] = "TRUNCATE";
- gf_fop_list[GF_FOP_OPEN] = "OPEN";
- gf_fop_list[GF_FOP_READ] = "READ";
- gf_fop_list[GF_FOP_WRITE] = "WRITE";
- gf_fop_list[GF_FOP_STATFS] = "STATFS"; /* 15 */
- gf_fop_list[GF_FOP_FLUSH] = "FLUSH";
- gf_fop_list[GF_FOP_FSYNC] = "FSYNC";
- gf_fop_list[GF_FOP_SETXATTR] = "SETXATTR";
- gf_fop_list[GF_FOP_GETXATTR] = "GETXATTR"; /* 20 */
- gf_fop_list[GF_FOP_REMOVEXATTR] = "REMOVEXATTR";
- gf_fop_list[GF_FOP_OPENDIR] = "OPENDIR";
- gf_fop_list[GF_FOP_GETDENTS] = "GETDENTS";
- gf_fop_list[GF_FOP_FSYNCDIR] = "FSYNCDIR"; /* 25 */
- gf_fop_list[GF_FOP_ACCESS] = "ACCESS";
- gf_fop_list[GF_FOP_CREATE] = "CREATE";
- gf_fop_list[GF_FOP_FTRUNCATE] = "FTRUNCATE";
- gf_fop_list[GF_FOP_FSTAT] = "FSTAT";
- gf_fop_list[GF_FOP_LK] = "LK"; /* 30 */
- gf_fop_list[GF_FOP_UTIMENS] = "UTIMENS";
- gf_fop_list[GF_FOP_FCHMOD] = "FCHMOD";
- gf_fop_list[GF_FOP_FCHOWN] = "FCHOWN";
- gf_fop_list[GF_FOP_LOOKUP] = "LOOKUP";
- gf_fop_list[GF_FOP_SETDENTS] = "SETDENTS"; /* 35 */
- gf_fop_list[GF_FOP_READDIR] = "READDIR";
- gf_fop_list[GF_FOP_INODELK] = "INODELK";
- gf_fop_list[GF_FOP_FINODELK] = "FINODELK";
- gf_fop_list[GF_FOP_ENTRYLK] = "ENTRYLK";
- gf_fop_list[GF_FOP_FENTRYLK] = "FENTRYLK"; /* 40 */
- gf_fop_list[GF_FOP_CHECKSUM] = "CHECKSUM"; /* 41 */
- gf_fop_list[GF_FOP_XATTROP] = "XATTROP";
- gf_fop_list[GF_FOP_FXATTROP] = "FXATTROP";
- gf_fop_list[GF_FOP_LOCK_NOTIFY] = "LOCK_NOTIFY";
- gf_fop_list[GF_FOP_LOCK_FNOTIFY]= "LOCK_FNOTIFY";
- gf_fop_list[GF_FOP_FSETXATTR] = "FSETXATTR";
- gf_fop_list[GF_FOP_FGETXATTR] = "FGETXATTR";
-
- gf_mop_list[GF_MOP_SETVOLUME] = "SETVOLUME"; /* 0 */
- gf_mop_list[GF_MOP_GETVOLUME] = "GETVOLUME"; /* 1 */
- gf_mop_list[GF_MOP_STATS] = "STATS";
- gf_mop_list[GF_MOP_SETSPEC] = "SETSPEC";
- gf_mop_list[GF_MOP_GETSPEC] = "GETSPEC";
- gf_mop_list[GF_MOP_LOG] = "LOG";
- gf_mop_list[GF_MOP_PING] = "PING";
-
- gf_cbk_list[GF_CBK_FORGET] = "FORGET";
- gf_cbk_list[GF_CBK_RELEASE] = "RELEASE";
- gf_cbk_list[GF_CBK_RELEASEDIR] = "RELEASEDIR";
- /* Are there any more variables to be included? All global
- variables initialization should go here */
-
- return;
+struct xldump {
+ int lineno;
+ FILE *logfp;
+};
+
+
+static int
+nprintf (struct xldump *dump, const char *fmt, ...)
+{
+ va_list ap;
+ int ret = 0;
+
+
+ ret += fprintf (dump->logfp, "%3d: ", ++dump->lineno);
+
+ va_start (ap, fmt);
+ ret += vfprintf (dump->logfp, fmt, ap);
+ va_end (ap);
+
+ ret += fprintf (dump->logfp, "\n");
+
+ return ret;
}
-void
-set_global_ctx_ptr (glusterfs_ctx_t *ctx)
+
+static int
+xldump_options (dict_t *this, char *key, data_t *value, void *d)
{
- gf_global_ctx = ctx;
+ nprintf (d, " option %s %s", key, value->data);
+ return 0;
}
-/*
- * Don't use this function other than in glusterfsd.c. libglusterfsclient does
- * not set gf_global_ctx since there can be multiple glusterfs-contexts
- * initialized in a single process. Instead access the context from ctx member
- * of the xlator object.
- */
-glusterfs_ctx_t *
-get_global_ctx_ptr (void)
+static void
+xldump_subvolumes (xlator_t *this, void *d)
{
- return gf_global_ctx;
+ xlator_list_t *subv = NULL;
+ int len = 0;
+ char *subvstr = NULL;
+
+ subv = this->children;
+ if (!this->children)
+ return;
+
+ for (subv = this->children; subv; subv = subv->next)
+ len += (strlen (subv->xlator->name) + 1);
+
+ subvstr = GF_CALLOC (1, len, gf_common_mt_strdup);
+
+ len = 0;
+ for (subv = this->children; subv; subv= subv->next)
+ len += sprintf (subvstr + len, "%s%s", subv->xlator->name,
+ subv->next ? " " : "");
+
+ nprintf (d, " subvolumes %s", subvstr);
+
+ GF_FREE (subvstr);
}
-void
-gf_log_volume_file (FILE *specfp)
+
+static void
+xldump (xlator_t *each, void *d)
{
- extern FILE *gf_log_logfile;
- int lcount = 0;
- char data[GF_UNIT_KB];
-
- fseek (specfp, 0L, SEEK_SET);
-
- fprintf (gf_log_logfile, "Given volfile:\n");
- fprintf (gf_log_logfile,
- "+---------------------------------------"
- "---------------------------------------+\n");
- while (fgets (data, GF_UNIT_KB, specfp) != NULL){
- lcount++;
- fprintf (gf_log_logfile, "%3d: %s", lcount, data);
- }
- fprintf (gf_log_logfile,
- "\n+---------------------------------------"
- "---------------------------------------+\n");
- fflush (gf_log_logfile);
- fseek (specfp, 0L, SEEK_SET);
+ nprintf (d, "volume %s", each->name);
+ nprintf (d, " type %s", each->type);
+ dict_foreach (each->options, xldump_options, d);
+
+ xldump_subvolumes (each, d);
+
+ nprintf (d, "end-volume");
+ nprintf (d, "");
}
-static void
+
+void
+gf_log_dump_graph (FILE *specfp, glusterfs_graph_t *graph)
+{
+ glusterfs_ctx_t *ctx;
+ struct xldump xld = {0, };
+
+
+ ctx = THIS->ctx;
+ xld.logfp = ctx->log.gf_log_logfile;
+
+ fprintf (ctx->log.gf_log_logfile, "Final graph:\n");
+ fprintf (ctx->log.gf_log_logfile,
+ "+---------------------------------------"
+ "---------------------------------------+\n");
+
+ xlator_foreach_depth_first (graph->top, xldump, &xld);
+
+ fprintf (ctx->log.gf_log_logfile,
+ "+---------------------------------------"
+ "---------------------------------------+\n");
+ fflush (ctx->log.gf_log_logfile);
+}
+
+static void
gf_dump_config_flags (int fd)
{
int ret = 0;
- /* TODO: 'ret' is not checked properly, add this later */
- ret = write (fd, "configuration details:\n", 23);
+ ret = write (fd, "configuration details:\n", 23);
+ if (ret == -1)
+ goto out;
/* have argp */
#ifdef HAVE_ARGP
- ret = write (fd, "argp 1\n", 7);
+ ret = write (fd, "argp 1\n", 7);
+ if (ret == -1)
+ goto out;
#endif
/* ifdef if found backtrace */
-#ifdef HAVE_BACKTRACE
- ret = write (fd, "backtrace 1\n", 12);
+#ifdef HAVE_BACKTRACE
+ ret = write (fd, "backtrace 1\n", 12);
+ if (ret == -1)
+ goto out;
#endif
/* Berkeley-DB version has cursor->get() */
-#ifdef HAVE_BDB_CURSOR_GET
- ret = write (fd, "bdb->cursor->get 1\n", 19);
+#ifdef HAVE_BDB_CURSOR_GET
+ ret = write (fd, "bdb->cursor->get 1\n", 19);
+ if (ret == -1)
+ goto out;
#endif
/* Define to 1 if you have the <db.h> header file. */
-#ifdef HAVE_DB_H
- ret = write (fd, "db.h 1\n", 7);
+#ifdef HAVE_DB_H
+ ret = write (fd, "db.h 1\n", 7);
+ if (ret == -1)
+ goto out;
#endif
/* Define to 1 if you have the <dlfcn.h> header file. */
-#ifdef HAVE_DLFCN_H
- ret = write (fd, "dlfcn 1\n", 8);
+#ifdef HAVE_DLFCN_H
+ ret = write (fd, "dlfcn 1\n", 8);
+ if (ret == -1)
+ goto out;
#endif
/* define if fdatasync exists */
-#ifdef HAVE_FDATASYNC
- ret = write (fd, "fdatasync 1\n", 12);
+#ifdef HAVE_FDATASYNC
+ ret = write (fd, "fdatasync 1\n", 12);
+ if (ret == -1)
+ goto out;
#endif
/* Define to 1 if you have the `pthread' library (-lpthread). */
-#ifdef HAVE_LIBPTHREAD
- ret = write (fd, "libpthread 1\n", 13);
+#ifdef HAVE_LIBPTHREAD
+ ret = write (fd, "libpthread 1\n", 13);
+ if (ret == -1)
+ goto out;
#endif
/* define if llistxattr exists */
-#ifdef HAVE_LLISTXATTR
- ret = write (fd, "llistxattr 1\n", 13);
+#ifdef HAVE_LLISTXATTR
+ ret = write (fd, "llistxattr 1\n", 13);
+ if (ret == -1)
+ goto out;
#endif
/* define if found setfsuid setfsgid */
-#ifdef HAVE_SET_FSID
- ret = write (fd, "setfsid 1\n", 10);
+#ifdef HAVE_SET_FSID
+ ret = write (fd, "setfsid 1\n", 10);
+ if (ret == -1)
+ goto out;
#endif
/* define if found spinlock */
-#ifdef HAVE_SPINLOCK
- ret = write (fd, "spinlock 1\n", 11);
+#ifdef HAVE_SPINLOCK
+ ret = write (fd, "spinlock 1\n", 11);
+ if (ret == -1)
+ goto out;
#endif
/* Define to 1 if you have the <sys/epoll.h> header file. */
-#ifdef HAVE_SYS_EPOLL_H
- ret = write (fd, "epoll.h 1\n", 10);
+#ifdef HAVE_SYS_EPOLL_H
+ ret = write (fd, "epoll.h 1\n", 10);
+ if (ret == -1)
+ goto out;
#endif
/* Define to 1 if you have the <sys/extattr.h> header file. */
-#ifdef HAVE_SYS_EXTATTR_H
- ret = write (fd, "extattr.h 1\n", 12);
+#ifdef HAVE_SYS_EXTATTR_H
+ ret = write (fd, "extattr.h 1\n", 12);
+ if (ret == -1)
+ goto out;
#endif
/* Define to 1 if you have the <sys/xattr.h> header file. */
-#ifdef HAVE_SYS_XATTR_H
- ret = write (fd, "xattr.h 1\n", 10);
+#ifdef HAVE_SYS_XATTR_H
+ ret = write (fd, "xattr.h 1\n", 10);
+ if (ret == -1)
+ goto out;
#endif
/* define if found st_atim.tv_nsec */
#ifdef HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC
- ret = write (fd, "st_atim.tv_nsec 1\n", 18);
+ ret = write (fd, "st_atim.tv_nsec 1\n", 18);
+ if (ret == -1)
+ goto out;
#endif
/* define if found st_atimespec.tv_nsec */
#ifdef HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC
- ret = write (fd, "st_atimespec.tv_nsec 1\n",23);
+ ret = write (fd, "st_atimespec.tv_nsec 1\n",23);
+ if (ret == -1)
+ goto out;
#endif
/* Define to the full name and version of this package. */
-#ifdef PACKAGE_STRING
- {
- char msg[128];
- sprintf (msg, "package-string: %s\n", PACKAGE_STRING);
- ret = write (fd, msg, strlen (msg));
- }
+#ifdef PACKAGE_STRING
+ {
+ char msg[128];
+ sprintf (msg, "package-string: %s\n", PACKAGE_STRING);
+ ret = write (fd, msg, strlen (msg));
+ if (ret == -1)
+ goto out;
+ }
#endif
- return;
+out:
+ return;
}
/* Obtain a backtrace and print it to stdout. */
/* TODO: It looks like backtrace_symbols allocates memory,
it may be problem because mostly memory allocation/free causes 'sigsegv' */
+
void
-gf_print_trace (int32_t signum)
+gf_print_trace (int32_t signum, glusterfs_ctx_t *ctx)
{
- extern FILE *gf_log_logfile;
- struct tm *tm = NULL;
char msg[1024] = {0,};
- char timestr[256] = {0,};
- time_t utime = 0;
+ char timestr[64] = {0,};
int ret = 0;
int fd = 0;
- fd = fileno (gf_log_logfile);
-
- /* Pending frames, (if any), list them in order */
- ret = write (fd, "pending frames:\n", 16);
- {
- extern glusterfs_ctx_t *gf_global_ctx;
- glusterfs_ctx_t *ctx = gf_global_ctx;
- struct list_head *trav = ((call_pool_t *)ctx->pool)->all_frames.next;
- while (trav != (&((call_pool_t *)ctx->pool)->all_frames)) {
- call_frame_t *tmp = (call_frame_t *)(&((call_stack_t *)trav)->frames);
- if ((tmp->root->type == GF_OP_TYPE_FOP_REQUEST) ||
- (tmp->root->type == GF_OP_TYPE_FOP_REPLY))
- sprintf (msg,"frame : type(%d) op(%s)\n",
- tmp->root->type,
- gf_fop_list[tmp->root->op]);
- if ((tmp->root->type == GF_OP_TYPE_MOP_REQUEST) ||
- (tmp->root->type == GF_OP_TYPE_MOP_REPLY))
- sprintf (msg,"frame : type(%d) op(%s)\n",
- tmp->root->type,
- gf_mop_list[tmp->root->op]);
- if ((tmp->root->type == GF_OP_TYPE_CBK_REQUEST) ||
- (tmp->root->type == GF_OP_TYPE_CBK_REPLY))
- sprintf (msg,"frame : type(%d) op(%s)\n",
- tmp->root->type,
- gf_cbk_list[tmp->root->op]);
-
- ret = write (fd, msg, strlen (msg));
- trav = trav->next;
- }
- ret = write (fd, "\n", 1);
- }
+ fd = fileno (ctx->log.gf_log_logfile);
+
+ /* Now every gf_log call will just write to a buffer and when the
+ * buffer becomes full, its written to the log-file. Suppose the process
+ * crashes and prints the backtrace in the log-file, then the previous
+ * log information will still be in the buffer itself. So flush the
+ * contents of the buffer to the log file before printing the backtrace
+ * which helps in debugging.
+ */
+ fflush (ctx->log.gf_log_logfile);
+ /* Pending frames, (if any), list them in order */
+ ret = write (fd, "pending frames:\n", 16);
+ if (ret < 0)
+ goto out;
- sprintf (msg, "patchset: %s\n", GLUSTERFS_REPOSITORY_REVISION);
- ret = write (fd, msg, strlen (msg));
+ {
+ struct list_head *trav = ((call_pool_t *)ctx->pool)->all_frames.next;
+ while (trav != (&((call_pool_t *)ctx->pool)->all_frames)) {
+ call_frame_t *tmp = (call_frame_t *)(&((call_stack_t *)trav)->frames);
+ if (tmp->root->type == GF_OP_TYPE_FOP)
+ sprintf (msg,"frame : type(%d) op(%s)\n",
+ tmp->root->type,
+ gf_fop_list[tmp->root->op]);
+ else
+ sprintf (msg,"frame : type(%d) op(%d)\n",
+ tmp->root->type,
+ tmp->root->op);
+
+ ret = write (fd, msg, strlen (msg));
+ if (ret < 0)
+ goto out;
+
+ trav = trav->next;
+ }
+ ret = write (fd, "\n", 1);
+ if (ret < 0)
+ goto out;
+ }
+
+ sprintf (msg, "patchset: %s\n", GLUSTERFS_REPOSITORY_REVISION);
+ ret = write (fd, msg, strlen (msg));
+ if (ret < 0)
+ goto out;
- sprintf (msg, "signal received: %d\n", signum);
- ret = write (fd, msg, strlen (msg));
+ sprintf (msg, "signal received: %d\n", signum);
+ ret = write (fd, msg, strlen (msg));
+ if (ret < 0)
+ goto out;
{
- /* Dump the timestamp of the crash too, so the previous logs
+ /* Dump the timestamp of the crash too, so the previous logs
can be related */
- utime = time (NULL);
- tm = localtime (&utime);
- strftime (timestr, 256, "%Y-%m-%d %H:%M:%S\n", tm);
+ gf_time_fmt (timestr, sizeof timestr, time (NULL), gf_timefmt_FT);
ret = write (fd, "time of crash: ", 15);
+ if (ret < 0)
+ goto out;
ret = write (fd, timestr, strlen (timestr));
+ if (ret < 0)
+ goto out;
}
- gf_dump_config_flags (fd);
+ gf_dump_config_flags (fd);
#if HAVE_BACKTRACE
- /* Print 'backtrace' */
- {
- void *array[200];
- size_t size;
-
- size = backtrace (array, 200);
- backtrace_symbols_fd (&array[1], size-1, fd);
- sprintf (msg, "---------\n");
- ret = write (fd, msg, strlen (msg));
- }
+ /* Print 'backtrace' */
+ {
+ void *array[200];
+ size_t size;
+
+ size = backtrace (array, 200);
+ backtrace_symbols_fd (&array[1], size-1, fd);
+ sprintf (msg, "---------\n");
+ ret = write (fd, msg, strlen (msg));
+ if (ret < 0)
+ goto out;
+ }
#endif /* HAVE_BACKTRACE */
-
- /* Send a signal to terminate the process */
- signal (signum, SIG_DFL);
- raise (signum);
+
+out:
+ /* Send a signal to terminate the process */
+ signal (signum, SIG_DFL);
+ raise (signum);
}
void
@@ -462,963 +609,1009 @@ trap (void)
char *
gf_trim (char *string)
{
- register char *s, *t;
-
- if (string == NULL)
- {
- return NULL;
- }
-
- for (s = string; isspace (*s); s++)
- ;
-
- if (*s == 0)
- return s;
-
- t = s + strlen (s) - 1;
- while (t > s && isspace (*t))
- t--;
- *++t = '\0';
-
- return s;
-}
-
-int
-gf_strsplit (const char *str, const char *delim,
- char ***tokens, int *token_count)
-{
- char *_running = NULL;
- char *running = NULL;
- char *token = NULL;
- char **token_list = NULL;
- int count = 0;
- int i = 0;
- int j = 0;
-
- if (str == NULL || delim == NULL || tokens == NULL || token_count == NULL)
- {
- return -1;
- }
-
- if ((_running = strdup (str)) == NULL)
- {
- return -1;
- }
- running = _running;
-
- while ((token = strsep (&running, delim)) != NULL)
- {
- if (token[0] != '\0')
- count++;
- }
- free (_running);
-
- if ((_running = strdup (str)) == NULL)
- {
- return -1;
- }
- running = _running;
-
- if ((token_list = CALLOC (count, sizeof (char *))) == NULL)
- {
- free (_running);
- return -1;
- }
-
- while ((token = strsep (&running, delim)) != NULL)
- {
- if (token[0] == '\0')
- continue;
-
- if ((token_list[i++] = strdup (token)) == NULL)
- goto free_exit;
- }
-
- free (_running);
-
- *tokens = token_list;
- *token_count = count;
- return 0;
-
+ register char *s, *t;
+
+ if (string == NULL) {
+ return NULL;
+ }
+
+ for (s = string; isspace (*s); s++)
+ ;
+
+ if (*s == 0)
+ return s;
+
+ t = s + strlen (s) - 1;
+ while (t > s && isspace (*t))
+ t--;
+ *++t = '\0';
+
+ return s;
+}
+
+int
+gf_strsplit (const char *str, const char *delim,
+ char ***tokens, int *token_count)
+{
+ char *_running = NULL;
+ char *running = NULL;
+ char *token = NULL;
+ char **token_list = NULL;
+ int count = 0;
+ int i = 0;
+ int j = 0;
+
+ if (str == NULL || delim == NULL || tokens == NULL || token_count == NULL) {
+ gf_log_callingfn (THIS->name, GF_LOG_WARNING, "argument invalid");
+ return -1;
+ }
+
+ _running = gf_strdup (str);
+ if (_running == NULL)
+ return -1;
+
+ running = _running;
+
+ while ((token = strsep (&running, delim)) != NULL) {
+ if (token[0] != '\0')
+ count++;
+ }
+ GF_FREE (_running);
+
+ _running = gf_strdup (str);
+ if (_running == NULL)
+ return -1;
+
+ running = _running;
+
+ if ((token_list = GF_CALLOC (count, sizeof (char *),
+ gf_common_mt_char)) == NULL) {
+ GF_FREE (_running);
+ return -1;
+ }
+
+ while ((token = strsep (&running, delim)) != NULL) {
+ if (token[0] == '\0')
+ continue;
+
+ token_list[i] = gf_strdup (token);
+ if (token_list[i] == NULL)
+ goto free_exit;
+ i++;
+ }
+
+ GF_FREE (_running);
+
+ *tokens = token_list;
+ *token_count = count;
+ return 0;
+
free_exit:
- free (_running);
- for (j = 0; j < i; j++)
- {
- free (token_list[j]);
- }
- free (token_list);
- return -1;
+ GF_FREE (_running);
+ for (j = 0; j < i; j++)
+ GF_FREE (token_list[j]);
+
+ GF_FREE (token_list);
+ return -1;
+}
+
+int
+gf_strstr (const char *str, const char *delim, const char *match)
+{
+ char *tmp = NULL;
+ char *save_ptr = NULL;
+ char *tmp_str = NULL;
+
+ int ret = 0;
+
+ tmp_str = strdup (str);
+
+ if (str == NULL || delim == NULL || match == NULL || tmp_str == NULL) {
+ gf_log_callingfn (THIS->name, GF_LOG_WARNING, "argument invalid");
+ ret = -1;
+ goto out;
+ }
+
+
+ tmp = strtok_r (tmp_str, delim, &save_ptr);
+
+ while (tmp) {
+ ret = strcmp (tmp, match);
+
+ if (ret == 0)
+ break;
+
+ tmp = strtok_r (NULL, delim, &save_ptr);
+ }
+
+out:
+ free (tmp_str);
+
+ return ret;
+
}
-int
+int
gf_volume_name_validate (const char *volume_name)
{
- const char *vname = NULL;
-
- if (volume_name == NULL)
- {
- return -1;
- }
-
- if (!isalpha (volume_name[0]))
- {
- return 1;
- }
-
- for (vname = &volume_name[1]; *vname != '\0'; vname++)
- {
- if (!(isalnum (*vname) || *vname == '_'))
- return 1;
- }
-
- return 0;
+ const char *vname = NULL;
+
+ if (volume_name == NULL) {
+ gf_log_callingfn (THIS->name, GF_LOG_WARNING, "argument invalid");
+ return -1;
+ }
+
+ if (!isalpha (volume_name[0]))
+ return 1;
+
+ for (vname = &volume_name[1]; *vname != '\0'; vname++) {
+ if (!(isalnum (*vname) || *vname == '_'))
+ return 1;
+ }
+
+ return 0;
}
-int
+int
gf_string2time (const char *str, uint32_t *n)
{
- unsigned long value = 0;
- char *tail = NULL;
- int old_errno = 0;
- const char *s = NULL;
-
- if (str == NULL || n == NULL)
- {
- errno = EINVAL;
- return -1;
- }
-
- for (s = str; *s != '\0'; s++)
- {
- if (isspace (*s))
- {
- continue;
- }
- if (*s == '-')
- {
- return -1;
- }
- break;
- }
-
- old_errno = errno;
- errno = 0;
- value = strtol (str, &tail, 0);
-
- if (errno == ERANGE || errno == EINVAL)
- {
- return -1;
- }
-
- if (errno == 0)
- {
- errno = old_errno;
- }
-
- if (!((tail[0] == '\0') ||
- ((tail[0] == 's') && (tail[1] == '\0')) ||
- ((tail[0] == 's') && (tail[1] == 'e') && (tail[2] == 'c') && (tail[3] == '\0'))))
- {
- return -1;
- }
-
- *n = value;
-
- return 0;
-}
+ unsigned long value = 0;
+ char *tail = NULL;
+ int old_errno = 0;
+ const char *s = NULL;
+
+ if (str == NULL || n == NULL) {
+ gf_log_callingfn (THIS->name, GF_LOG_WARNING, "argument invalid");
+ errno = EINVAL;
+ return -1;
+ }
+
+ for (s = str; *s != '\0'; s++) {
+ if (isspace (*s))
+ continue;
+ if (*s == '-')
+ return -1;
+ break;
+ }
+ old_errno = errno;
+ errno = 0;
+ value = strtol (str, &tail, 0);
+ if (str == tail)
+ errno = EINVAL;
+
+ if (errno == ERANGE || errno == EINVAL)
+ return -1;
+
+ if (errno == 0)
+ errno = old_errno;
+
+ if (!((tail[0] == '\0') ||
+ ((tail[0] == 's') && (tail[1] == '\0')) ||
+ ((tail[0] == 's') && (tail[1] == 'e') &&
+ (tail[2] == 'c') && (tail[3] == '\0'))))
+ return -1;
+
+ *n = value;
+
+ return 0;
+}
-int
-gf_string2percent (const char *str, uint32_t *n)
+int
+gf_string2percent (const char *str, double *n)
{
- unsigned long value = 0;
- char *tail = NULL;
- int old_errno = 0;
- const char *s = NULL;
-
- if (str == NULL || n == NULL)
- {
- errno = EINVAL;
- return -1;
- }
-
- for (s = str; *s != '\0'; s++)
- {
- if (isspace (*s))
- {
- continue;
- }
- if (*s == '-')
- {
- return -1;
- }
- break;
- }
-
- old_errno = errno;
- errno = 0;
- value = strtol (str, &tail, 0);
-
- if (errno == ERANGE || errno == EINVAL)
- {
- return -1;
- }
-
- if (errno == 0)
- {
- errno = old_errno;
- }
-
- if (!((tail[0] == '\0') ||
- ((tail[0] == '%') && (tail[1] == '\0'))))
- {
- return -1;
- }
-
- *n = value;
-
- return 0;
+ double value = 0;
+ char *tail = NULL;
+ int old_errno = 0;
+ const char *s = NULL;
+
+ if (str == NULL || n == NULL) {
+ gf_log_callingfn (THIS->name, GF_LOG_WARNING, "argument invalid");
+ errno = EINVAL;
+ return -1;
+ }
+
+ for (s = str; *s != '\0'; s++) {
+ if (isspace (*s))
+ continue;
+ if (*s == '-')
+ return -1;
+ break;
+ }
+
+ old_errno = errno;
+ errno = 0;
+ value = strtod (str, &tail);
+ if (str == tail)
+ errno = EINVAL;
+
+ if (errno == ERANGE || errno == EINVAL)
+ return -1;
+
+ if (errno == 0)
+ errno = old_errno;
+
+ if (!((tail[0] == '\0') ||
+ ((tail[0] == '%') && (tail[1] == '\0'))))
+ return -1;
+
+ *n = value;
+
+ return 0;
}
-static int
+static int
_gf_string2long (const char *str, long *n, int base)
{
- long value = 0;
- char *tail = NULL;
- int old_errno = 0;
-
- if (str == NULL || n == NULL)
- {
- errno = EINVAL;
- return -1;
- }
-
- old_errno = errno;
- errno = 0;
- value = strtol (str, &tail, base);
-
- if (errno == ERANGE || errno == EINVAL)
- {
- return -1;
- }
-
- if (errno == 0)
- {
- errno = old_errno;
- }
+ long value = 0;
+ char *tail = NULL;
+ int old_errno = 0;
+
+ if (str == NULL || n == NULL) {
+ gf_log_callingfn (THIS->name, GF_LOG_WARNING, "argument invalid");
+ errno = EINVAL;
+ return -1;
+ }
- if (tail[0] != '\0')
- {
- /* bala: invalid integer format */
- return -1;
- }
-
- *n = value;
-
- return 0;
+ old_errno = errno;
+ errno = 0;
+ value = strtol (str, &tail, base);
+ if (str == tail)
+ errno = EINVAL;
+
+ if (errno == ERANGE || errno == EINVAL)
+ return -1;
+
+ if (errno == 0)
+ errno = old_errno;
+
+ if (tail[0] != '\0')
+ return -1;
+
+ *n = value;
+
+ return 0;
}
-static int
+static int
_gf_string2ulong (const char *str, unsigned long *n, int base)
{
- unsigned long value = 0;
- char *tail = NULL;
- int old_errno = 0;
- const char *s = NULL;
-
- if (str == NULL || n == NULL)
- {
- errno = EINVAL;
- return -1;
- }
-
- for (s = str; *s != '\0'; s++)
- {
- if (isspace (*s))
- {
- continue;
- }
- if (*s == '-')
- {
- /* bala: we do not support suffixed (-) sign and
- invalid integer format */
- return -1;
- }
- break;
- }
-
- old_errno = errno;
- errno = 0;
- value = strtoul (str, &tail, base);
-
- if (errno == ERANGE || errno == EINVAL)
- {
- return -1;
- }
-
- if (errno == 0)
- {
- errno = old_errno;
- }
-
- if (tail[0] != '\0')
- {
- /* bala: invalid integer format */
- return -1;
- }
-
- *n = value;
-
- return 0;
+ unsigned long value = 0;
+ char *tail = NULL;
+ int old_errno = 0;
+ const char *s = NULL;
+
+ if (str == NULL || n == NULL) {
+ gf_log_callingfn (THIS->name, GF_LOG_WARNING, "argument invalid");
+ errno = EINVAL;
+ return -1;
+ }
+
+ for (s = str; *s != '\0'; s++) {
+ if (isspace (*s))
+ continue;
+ if (*s == '-')
+ return -1;
+ break;
+ }
+
+ old_errno = errno;
+ errno = 0;
+ value = strtoul (str, &tail, base);
+ if (str == tail)
+ errno = EINVAL;
+
+ if (errno == ERANGE || errno == EINVAL)
+ return -1;
+
+ if (errno == 0)
+ errno = old_errno;
+
+ if (tail[0] != '\0')
+ return -1;
+
+ *n = value;
+
+ return 0;
}
-static int
+static int
_gf_string2uint (const char *str, unsigned int *n, int base)
{
- unsigned long value = 0;
- char *tail = NULL;
- int old_errno = 0;
- const char *s = NULL;
-
- if (str == NULL || n == NULL)
- {
- errno = EINVAL;
- return -1;
- }
-
- for (s = str; *s != '\0'; s++)
- {
- if (isspace (*s))
- {
- continue;
- }
- if (*s == '-')
- {
- /* bala: we do not support suffixed (-) sign and
- invalid integer format */
- return -1;
- }
- break;
- }
-
- old_errno = errno;
- errno = 0;
- value = strtoul (str, &tail, base);
-
- if (errno == ERANGE || errno == EINVAL)
- {
- return -1;
- }
-
- if (errno == 0)
- {
- errno = old_errno;
- }
-
- if (tail[0] != '\0')
- {
- /* bala: invalid integer format */
- return -1;
- }
-
- *n = (unsigned int)value;
-
- return 0;
+ unsigned long value = 0;
+ char *tail = NULL;
+ int old_errno = 0;
+ const char *s = NULL;
+
+ if (str == NULL || n == NULL) {
+ gf_log_callingfn (THIS->name, GF_LOG_WARNING, "argument invalid");
+ errno = EINVAL;
+ return -1;
+ }
+
+ for (s = str; *s != '\0'; s++) {
+ if (isspace (*s))
+ continue;
+ if (*s == '-')
+ return -1;
+ break;
+ }
+
+ old_errno = errno;
+ errno = 0;
+ value = strtoul (str, &tail, base);
+ if (str == tail)
+ errno = EINVAL;
+
+ if (errno == ERANGE || errno == EINVAL)
+ return -1;
+
+ if (errno == 0)
+ errno = old_errno;
+
+ if (tail[0] != '\0')
+ return -1;
+
+ *n = (unsigned int)value;
+
+ return 0;
}
-static int
+static int
_gf_string2double (const char *str, double *n)
{
- double value = 0.0;
- char *tail = NULL;
- int old_errno = 0;
-
- if (str == NULL || n == NULL) {
- errno = EINVAL;
- return -1;
- }
-
- old_errno = errno;
- errno = 0;
- value = strtod (str, &tail);
-
- if (errno == ERANGE || errno == EINVAL) {
- return -1;
- }
-
- if (errno == 0) {
- errno = old_errno;
- }
-
- if (tail[0] != '\0') {
- return -1;
- }
-
- *n = value;
-
- return 0;
+ double value = 0.0;
+ char *tail = NULL;
+ int old_errno = 0;
+
+ if (str == NULL || n == NULL) {
+ gf_log_callingfn (THIS->name, GF_LOG_WARNING, "argument invalid");
+ errno = EINVAL;
+ return -1;
+ }
+
+ old_errno = errno;
+ errno = 0;
+ value = strtod (str, &tail);
+ if (str == tail)
+ errno = EINVAL;
+
+ if (errno == ERANGE || errno == EINVAL)
+ return -1;
+
+ if (errno == 0)
+ errno = old_errno;
+
+ if (tail[0] != '\0')
+ return -1;
+
+ *n = value;
+
+ return 0;
}
-static int
+static int
_gf_string2longlong (const char *str, long long *n, int base)
{
- long long value = 0;
- char *tail = NULL;
- int old_errno = 0;
-
- if (str == NULL || n == NULL)
- {
- errno = EINVAL;
- return -1;
- }
-
- old_errno = errno;
- errno = 0;
- value = strtoll (str, &tail, base);
-
- if (errno == ERANGE || errno == EINVAL)
- {
- return -1;
- }
-
- if (errno == 0)
- {
- errno = old_errno;
- }
-
- if (tail[0] != '\0')
- {
- /* bala: invalid integer format */
- return -1;
- }
-
- *n = value;
-
- return 0;
+ long long value = 0;
+ char *tail = NULL;
+ int old_errno = 0;
+
+ if (str == NULL || n == NULL) {
+ gf_log_callingfn (THIS->name, GF_LOG_WARNING, "argument invalid");
+ errno = EINVAL;
+ return -1;
+ }
+
+ old_errno = errno;
+ errno = 0;
+ value = strtoll (str, &tail, base);
+ if (str == tail)
+ errno = EINVAL;
+
+ if (errno == ERANGE || errno == EINVAL)
+ return -1;
+
+ if (errno == 0)
+ errno = old_errno;
+
+ if (tail[0] != '\0')
+ return -1;
+
+ *n = value;
+
+ return 0;
}
-static int
+static int
_gf_string2ulonglong (const char *str, unsigned long long *n, int base)
{
- unsigned long long value = 0;
- char *tail = NULL;
- int old_errno = 0;
- const char *s = NULL;
-
- if (str == NULL || n == NULL)
- {
- errno = EINVAL;
- return -1;
- }
-
- for (s = str; *s != '\0'; s++)
- {
- if (isspace (*s))
- {
- continue;
- }
- if (*s == '-')
- {
- /* bala: we do not support suffixed (-) sign and
- invalid integer format */
- return -1;
- }
- break;
- }
-
- old_errno = errno;
- errno = 0;
- value = strtoull (str, &tail, base);
-
- if (errno == ERANGE || errno == EINVAL)
- {
- return -1;
- }
-
- if (errno == 0)
- {
- errno = old_errno;
- }
-
- if (tail[0] != '\0')
- {
- /* bala: invalid integer format */
- return -1;
- }
-
- *n = value;
-
- return 0;
+ unsigned long long value = 0;
+ char *tail = NULL;
+ int old_errno = 0;
+ const char *s = NULL;
+
+ if (str == NULL || n == NULL) {
+ gf_log_callingfn (THIS->name, GF_LOG_WARNING, "argument invalid");
+ errno = EINVAL;
+ return -1;
+ }
+
+ for (s = str; *s != '\0'; s++) {
+ if (isspace (*s))
+ continue;
+ if (*s == '-')
+ return -1;
+ break;
+ }
+
+ old_errno = errno;
+ errno = 0;
+ value = strtoull (str, &tail, base);
+ if (str == tail)
+ errno = EINVAL;
+
+ if (errno == ERANGE || errno == EINVAL)
+ return -1;
+
+ if (errno == 0)
+ errno = old_errno;
+
+ if (tail[0] != '\0')
+ return -1;
+
+ *n = value;
+
+ return 0;
}
-int
+int
gf_string2long (const char *str, long *n)
{
- return _gf_string2long (str, n, 0);
+ return _gf_string2long (str, n, 0);
}
-int
+int
gf_string2ulong (const char *str, unsigned long *n)
{
- return _gf_string2ulong (str, n, 0);
+ return _gf_string2ulong (str, n, 0);
}
-int
+int
gf_string2int (const char *str, int *n)
{
- return _gf_string2long (str, (long *) n, 0);
+ long l = 0;
+ int ret = 0;
+
+ ret = _gf_string2long (str, &l, 0);
+
+ *n = l;
+ return ret;
}
-int
+int
gf_string2uint (const char *str, unsigned int *n)
{
- return _gf_string2uint (str, n, 0);
+ return _gf_string2uint (str, n, 0);
}
int
gf_string2double (const char *str, double *n)
{
- return _gf_string2double (str, n);
+ return _gf_string2double (str, n);
}
-int
+int
gf_string2longlong (const char *str, long long *n)
{
- return _gf_string2longlong (str, n, 0);
+ return _gf_string2longlong (str, n, 0);
}
-int
+int
gf_string2ulonglong (const char *str, unsigned long long *n)
{
- return _gf_string2ulonglong (str, n, 0);
+ return _gf_string2ulonglong (str, n, 0);
}
-int
+int
gf_string2int8 (const char *str, int8_t *n)
{
- long l = 0L;
- int rv = 0;
-
- rv = _gf_string2long (str, &l, 0);
- if (rv != 0)
- return rv;
-
- if (l >= INT8_MIN && l <= INT8_MAX)
- {
- *n = (int8_t) l;
- return 0;
- }
-
- errno = ERANGE;
- return -1;
+ long l = 0L;
+ int rv = 0;
+
+ rv = _gf_string2long (str, &l, 0);
+ if (rv != 0)
+ return rv;
+
+ if ((l >= INT8_MIN) && (l <= INT8_MAX)) {
+ *n = (int8_t) l;
+ return 0;
+ }
+
+ errno = ERANGE;
+ return -1;
}
-int
+int
gf_string2int16 (const char *str, int16_t *n)
{
- long l = 0L;
- int rv = 0;
-
- rv = _gf_string2long (str, &l, 0);
- if (rv != 0)
- return rv;
-
- if (l >= INT16_MIN && l <= INT16_MAX)
- {
- *n = (int16_t) l;
- return 0;
- }
-
- errno = ERANGE;
- return -1;
+ long l = 0L;
+ int rv = 0;
+
+ rv = _gf_string2long (str, &l, 0);
+ if (rv != 0)
+ return rv;
+
+ if ((l >= INT16_MIN) && (l <= INT16_MAX)) {
+ *n = (int16_t) l;
+ return 0;
+ }
+
+ errno = ERANGE;
+ return -1;
}
-int
+int
gf_string2int32 (const char *str, int32_t *n)
{
- long l = 0L;
- int rv = 0;
-
- rv = _gf_string2long (str, &l, 0);
- if (rv != 0)
- return rv;
-
- if (l >= INT32_MIN && l <= INT32_MAX)
- {
- *n = (int32_t) l;
- return 0;
- }
-
- errno = ERANGE;
- return -1;
+ long l = 0L;
+ int rv = 0;
+
+ rv = _gf_string2long (str, &l, 0);
+ if (rv != 0)
+ return rv;
+
+ if ((l >= INT32_MIN) && (l <= INT32_MAX)) {
+ *n = (int32_t) l;
+ return 0;
+ }
+
+ errno = ERANGE;
+ return -1;
}
-int
+int
gf_string2int64 (const char *str, int64_t *n)
{
- long long l = 0LL;
- int rv = 0;
-
- rv = _gf_string2longlong (str, &l, 0);
- if (rv != 0)
- return rv;
-
- if (l >= INT64_MIN && l <= INT64_MAX)
- {
- *n = (int64_t) l;
- return 0;
- }
-
- errno = ERANGE;
- return -1;
+ long long l = 0LL;
+ int rv = 0;
+
+ rv = _gf_string2longlong (str, &l, 0);
+ if (rv != 0)
+ return rv;
+
+ if ((l >= INT64_MIN) && (l <= INT64_MAX)) {
+ *n = (int64_t) l;
+ return 0;
+ }
+
+ errno = ERANGE;
+ return -1;
}
-int
+int
gf_string2uint8 (const char *str, uint8_t *n)
{
- unsigned long l = 0L;
- int rv = 0;
-
- rv = _gf_string2ulong (str, &l, 0);
- if (rv != 0)
- return rv;
-
- if (l >= 0 && l <= UINT8_MAX)
- {
- *n = (uint8_t) l;
- return 0;
- }
-
- errno = ERANGE;
- return -1;
+ unsigned long l = 0L;
+ int rv = 0;
+
+ rv = _gf_string2ulong (str, &l, 0);
+ if (rv != 0)
+ return rv;
+
+ if (l >= 0 && l <= UINT8_MAX) {
+ *n = (uint8_t) l;
+ return 0;
+ }
+
+ errno = ERANGE;
+ return -1;
}
-int
+int
gf_string2uint16 (const char *str, uint16_t *n)
{
- unsigned long l = 0L;
- int rv = 0;
-
- rv = _gf_string2ulong (str, &l, 0);
- if (rv != 0)
- return rv;
-
- if (l >= 0 && l <= UINT16_MAX)
- {
- *n = (uint16_t) l;
- return 0;
- }
-
- errno = ERANGE;
- return -1;
+ unsigned long l = 0L;
+ int rv = 0;
+
+ rv = _gf_string2ulong (str, &l, 0);
+ if (rv != 0)
+ return rv;
+
+ if (l >= 0 && l <= UINT16_MAX) {
+ *n = (uint16_t) l;
+ return 0;
+ }
+
+ errno = ERANGE;
+ return -1;
}
-int
+int
gf_string2uint32 (const char *str, uint32_t *n)
{
- unsigned long l = 0L;
- int rv = 0;
-
- rv = _gf_string2ulong (str, &l, 0);
- if (rv != 0)
- return rv;
-
- if (l >= 0 && l <= UINT32_MAX)
- {
- *n = (uint32_t) l;
- return 0;
- }
-
- errno = ERANGE;
- return -1;
+ unsigned long l = 0L;
+ int rv = 0;
+
+ rv = _gf_string2ulong (str, &l, 0);
+ if (rv != 0)
+ return rv;
+
+ if (l >= 0 && l <= UINT32_MAX) {
+ *n = (uint32_t) l;
+ return 0;
+ }
+
+ errno = ERANGE;
+ return -1;
}
-int
+int
gf_string2uint64 (const char *str, uint64_t *n)
{
- unsigned long long l = 0ULL;
- int rv = 0;
-
- rv = _gf_string2ulonglong (str, &l, 0);
- if (rv != 0)
- return rv;
-
- if (l >= 0 && l <= UINT64_MAX)
- {
- *n = (uint64_t) l;
- return 0;
- }
-
- errno = ERANGE;
- return -1;
+ unsigned long long l = 0ULL;
+ int rv = 0;
+
+ rv = _gf_string2ulonglong (str, &l, 0);
+ if (rv != 0)
+ return rv;
+
+ if (l >= 0 && l <= UINT64_MAX) {
+ *n = (uint64_t) l;
+ return 0;
+ }
+
+ errno = ERANGE;
+ return -1;
}
-int
+int
gf_string2ulong_base10 (const char *str, unsigned long *n)
{
- return _gf_string2ulong (str, n, 10);
+ return _gf_string2ulong (str, n, 10);
}
-int
+int
gf_string2uint_base10 (const char *str, unsigned int *n)
{
- return _gf_string2uint (str, n, 10);
+ return _gf_string2uint (str, n, 10);
}
-int
+int
gf_string2uint8_base10 (const char *str, uint8_t *n)
{
- unsigned long l = 0L;
- int rv = 0;
-
- rv = _gf_string2ulong (str, &l, 10);
- if (rv != 0)
- return rv;
-
- if (l >= 0 && l <= UINT8_MAX)
- {
- *n = (uint8_t) l;
- return 0;
- }
-
- errno = ERANGE;
- return -1;
+ unsigned long l = 0L;
+ int rv = 0;
+
+ rv = _gf_string2ulong (str, &l, 10);
+ if (rv != 0)
+ return rv;
+
+ if (l >= 0 && l <= UINT8_MAX) {
+ *n = (uint8_t) l;
+ return 0;
+ }
+
+ errno = ERANGE;
+ return -1;
}
-int
+int
gf_string2uint16_base10 (const char *str, uint16_t *n)
{
- unsigned long l = 0L;
- int rv = 0;
-
- rv = _gf_string2ulong (str, &l, 10);
- if (rv != 0)
- return rv;
-
- if (l >= 0 && l <= UINT16_MAX)
- {
- *n = (uint16_t) l;
- return 0;
- }
-
- errno = ERANGE;
- return -1;
+ unsigned long l = 0L;
+ int rv = 0;
+
+ rv = _gf_string2ulong (str, &l, 10);
+ if (rv != 0)
+ return rv;
+
+ if (l >= 0 && l <= UINT16_MAX) {
+ *n = (uint16_t) l;
+ return 0;
+ }
+
+ errno = ERANGE;
+ return -1;
}
-int
+int
gf_string2uint32_base10 (const char *str, uint32_t *n)
{
- unsigned long l = 0L;
- int rv = 0;
-
- rv = _gf_string2ulong (str, &l, 10);
- if (rv != 0)
- return rv;
-
- if (l >= 0 && l <= UINT32_MAX)
- {
- *n = (uint32_t) l;
- return 0;
- }
-
- errno = ERANGE;
- return -1;
+ unsigned long l = 0L;
+ int rv = 0;
+
+ rv = _gf_string2ulong (str, &l, 10);
+ if (rv != 0)
+ return rv;
+
+ if (l >= 0 && l <= UINT32_MAX) {
+ *n = (uint32_t) l;
+ return 0;
+ }
+
+ errno = ERANGE;
+ return -1;
}
-int
+int
gf_string2uint64_base10 (const char *str, uint64_t *n)
{
- unsigned long long l = 0ULL;
- int rv = 0;
-
- rv = _gf_string2ulonglong (str, &l, 10);
- if (rv != 0)
- return rv;
-
- if (l >= 0 && l <= UINT64_MAX)
- {
- *n = (uint64_t) l;
- return 0;
- }
-
- errno = ERANGE;
- return -1;
+ unsigned long long l = 0ULL;
+ int rv = 0;
+
+ rv = _gf_string2ulonglong (str, &l, 10);
+ if (rv != 0)
+ return rv;
+
+ if (l >= 0 && l <= UINT64_MAX) {
+ *n = (uint64_t) l;
+ return 0;
+ }
+
+ errno = ERANGE;
+ return -1;
+}
+
+char *
+gf_uint64_2human_readable (uint64_t n)
+{
+ int ret = 0;
+ char *str = NULL;
+
+ if (n >= GF_UNIT_PB) {
+ ret = gf_asprintf (&str, "%.1lfPB", ((double) n)/GF_UNIT_PB);
+ if (ret < 0)
+ goto err;
+ } else if (n >= GF_UNIT_TB) {
+ ret = gf_asprintf (&str, "%.1lfTB", ((double) n)/GF_UNIT_TB);
+ if (ret < 0)
+ goto err;
+ } else if (n >= GF_UNIT_GB) {
+ ret = gf_asprintf (&str, "%.1lfGB", ((double) n)/GF_UNIT_GB);
+ if (ret < 0)
+ goto err;
+ } else if (n >= GF_UNIT_MB) {
+ ret = gf_asprintf (&str, "%.1lfMB", ((double) n)/GF_UNIT_MB);
+ if (ret < 0)
+ goto err;
+ } else if (n >= GF_UNIT_KB) {
+ ret = gf_asprintf (&str, "%.1lfKB", ((double) n)/GF_UNIT_KB);
+ if (ret < 0)
+ goto err;
+ } else {
+ ret = gf_asprintf (&str, "%luBytes", n);
+ if (ret < 0)
+ goto err;
+ }
+ return str;
+err:
+ return NULL;
}
-int
+int
gf_string2bytesize (const char *str, uint64_t *n)
{
- uint64_t value = 0ULL;
- char *tail = NULL;
- int old_errno = 0;
- const char *s = NULL;
-
- if (str == NULL || n == NULL)
- {
- errno = EINVAL;
- return -1;
- }
-
- for (s = str; *s != '\0'; s++)
- {
- if (isspace (*s))
- {
- continue;
- }
- if (*s == '-')
- {
- /* bala: we do not support suffixed (-) sign and
- invalid integer format */
- return -1;
- }
- break;
- }
-
- old_errno = errno;
- errno = 0;
- value = strtoull (str, &tail, 10);
-
- if (errno == ERANGE || errno == EINVAL)
- {
- return -1;
- }
-
- if (errno == 0)
- {
- errno = old_errno;
- }
-
- if (tail[0] != '\0')
- {
- if (strcasecmp (tail, GF_UNIT_KB_STRING) == 0)
- {
- value *= GF_UNIT_KB;
- }
- else if (strcasecmp (tail, GF_UNIT_MB_STRING) == 0)
- {
- value *= GF_UNIT_MB;
- }
- else if (strcasecmp (tail, GF_UNIT_GB_STRING) == 0)
- {
- value *= GF_UNIT_GB;
- }
- else if (strcasecmp (tail, GF_UNIT_TB_STRING) == 0)
- {
- value *= GF_UNIT_TB;
- }
- else if (strcasecmp (tail, GF_UNIT_PB_STRING) == 0)
- {
- value *= GF_UNIT_PB;
- }
- else
- {
- /* bala: invalid integer format */
- return -1;
- }
- }
-
- *n = value;
-
- return 0;
+ double value = 0.0;
+ char *tail = NULL;
+ int old_errno = 0;
+ const char *s = NULL;
+
+ if (str == NULL || n == NULL) {
+ gf_log_callingfn (THIS->name, GF_LOG_WARNING, "argument invalid");
+ errno = EINVAL;
+ return -1;
+ }
+
+ for (s = str; *s != '\0'; s++) {
+ if (isspace (*s))
+ continue;
+ if (*s == '-')
+ return -1;
+ break;
+ }
+
+ old_errno = errno;
+ errno = 0;
+ value = strtod (str, &tail);
+ if (str == tail)
+ errno = EINVAL;
+
+ if (errno == ERANGE || errno == EINVAL)
+ return -1;
+
+ if (errno == 0)
+ errno = old_errno;
+
+ if (tail[0] != '\0')
+ {
+ if (strcasecmp (tail, GF_UNIT_KB_STRING) == 0)
+ value *= GF_UNIT_KB;
+ else if (strcasecmp (tail, GF_UNIT_MB_STRING) == 0)
+ value *= GF_UNIT_MB;
+ else if (strcasecmp (tail, GF_UNIT_GB_STRING) == 0)
+ value *= GF_UNIT_GB;
+ else if (strcasecmp (tail, GF_UNIT_TB_STRING) == 0)
+ value *= GF_UNIT_TB;
+ else if (strcasecmp (tail, GF_UNIT_PB_STRING) == 0)
+ value *= GF_UNIT_PB;
+ else
+ return -1;
+ }
+
+ if ((UINT64_MAX - value) < 0) {
+ errno = ERANGE;
+ return -1;
+ }
+
+ *n = (uint64_t) value;
+
+ return 0;
+}
+
+int
+gf_string2percent_or_bytesize (const char *str,
+ uint64_t *n,
+ gf_boolean_t *is_percent)
+{
+ double value = 0ULL;
+ char *tail = NULL;
+ int old_errno = 0;
+ const char *s = NULL;
+
+ if (str == NULL || n == NULL) {
+ gf_log_callingfn (THIS->name, GF_LOG_WARNING,
+ "argument invalid");
+ errno = EINVAL;
+ return -1;
+ }
+
+ for (s = str; *s != '\0'; s++) {
+ if (isspace (*s))
+ continue;
+ if (*s == '-')
+ return -1;
+ break;
+ }
+
+ old_errno = errno;
+ errno = 0;
+ value = strtod (str, &tail);
+ if (str == tail)
+ errno = EINVAL;
+
+ if (errno == ERANGE || errno == EINVAL)
+ return -1;
+
+ if (errno == 0)
+ errno = old_errno;
+
+ if (tail[0] != '\0') {
+ if (strcasecmp (tail, GF_UNIT_KB_STRING) == 0)
+ value *= GF_UNIT_KB;
+ else if (strcasecmp (tail, GF_UNIT_MB_STRING) == 0)
+ value *= GF_UNIT_MB;
+ else if (strcasecmp (tail, GF_UNIT_GB_STRING) == 0)
+ value *= GF_UNIT_GB;
+ else if (strcasecmp (tail, GF_UNIT_TB_STRING) == 0)
+ value *= GF_UNIT_TB;
+ else if (strcasecmp (tail, GF_UNIT_PB_STRING) == 0)
+ value *= GF_UNIT_PB;
+ else if (strcasecmp (tail, GF_UNIT_PERCENT_STRING) == 0)
+ *is_percent = _gf_true;
+ else
+ return -1;
+ }
+
+ /* Error out if we cannot store the value in uint64 */
+ if ((UINT64_MAX - value) < 0) {
+ errno = ERANGE;
+ return -1;
+ }
+
+ *n = (uint64_t) value;
+
+ return 0;
}
-int64_t
+int64_t
gf_str_to_long_long (const char *number)
{
- int64_t unit = 1;
- int64_t ret = 0;
- char *endptr = NULL ;
- if (!number)
- return 0;
-
- ret = strtoll (number, &endptr, 0);
-
- if (endptr) {
- switch (*endptr) {
- case 'G':
- case 'g':
- if ((* (endptr + 1) == 'B') ||(* (endptr + 1) == 'b'))
- unit = 1024 * 1024 * 1024;
- break;
- case 'M':
- case 'm':
- if ((* (endptr + 1) == 'B') ||(* (endptr + 1) == 'b'))
- unit = 1024 * 1024;
- break;
- case 'K':
- case 'k':
- if ((* (endptr + 1) == 'B') ||(* (endptr + 1) == 'b'))
- unit = 1024;
- break;
- case '%':
- unit = 1;
- break;
- default:
- unit = 1;
- break;
- }
- }
- return ret * unit;
+ int64_t unit = 1;
+ int64_t ret = 0;
+ char *endptr = NULL ;
+ if (!number)
+ return 0;
+
+ ret = strtoll (number, &endptr, 0);
+
+ if (endptr) {
+ switch (*endptr) {
+ case 'G':
+ case 'g':
+ if ((* (endptr + 1) == 'B') ||(* (endptr + 1) == 'b'))
+ unit = 1024 * 1024 * 1024;
+ break;
+ case 'M':
+ case 'm':
+ if ((* (endptr + 1) == 'B') ||(* (endptr + 1) == 'b'))
+ unit = 1024 * 1024;
+ break;
+ case 'K':
+ case 'k':
+ if ((* (endptr + 1) == 'B') ||(* (endptr + 1) == 'b'))
+ unit = 1024;
+ break;
+ case '%':
+ unit = 1;
+ break;
+ default:
+ unit = 1;
+ break;
+ }
+ }
+ return ret * unit;
}
-int
+int
gf_string2boolean (const char *str, gf_boolean_t *b)
{
- if (str == NULL) {
- return -1;
- }
-
- if ((strcasecmp (str, "1") == 0) ||
- (strcasecmp (str, "on") == 0) ||
- (strcasecmp (str, "yes") == 0) ||
- (strcasecmp (str, "true") == 0) ||
- (strcasecmp (str, "enable") == 0)) {
- *b = _gf_true;
- return 0;
- }
-
- if ((strcasecmp (str, "0") == 0) ||
- (strcasecmp (str, "off") == 0) ||
- (strcasecmp (str, "no") == 0) ||
- (strcasecmp (str, "false") == 0) ||
- (strcasecmp (str, "disable") == 0)) {
- *b = _gf_false;
- return 0;
- }
-
- return -1;
+ if (str == NULL) {
+ gf_log_callingfn (THIS->name, GF_LOG_WARNING, "argument invalid");
+ return -1;
+ }
+
+ if ((strcasecmp (str, "1") == 0) ||
+ (strcasecmp (str, "on") == 0) ||
+ (strcasecmp (str, "yes") == 0) ||
+ (strcasecmp (str, "true") == 0) ||
+ (strcasecmp (str, "enable") == 0)) {
+ *b = _gf_true;
+ return 0;
+ }
+
+ if ((strcasecmp (str, "0") == 0) ||
+ (strcasecmp (str, "off") == 0) ||
+ (strcasecmp (str, "no") == 0) ||
+ (strcasecmp (str, "false") == 0) ||
+ (strcasecmp (str, "disable") == 0)) {
+ *b = _gf_false;
+ return 0;
+ }
+
+ return -1;
}
-int
+int
gf_lockfd (int fd)
{
- struct flock fl;
-
- fl.l_type = F_WRLCK;
- fl.l_whence = SEEK_SET;
- fl.l_start = 0;
- fl.l_len = 0;
-
- return fcntl (fd, F_SETLK, &fl);
+ struct gf_flock fl;
+
+ fl.l_type = F_WRLCK;
+ fl.l_whence = SEEK_SET;
+ fl.l_start = 0;
+ fl.l_len = 0;
+
+ return fcntl (fd, F_SETLK, &fl);
}
-int
+int
gf_unlockfd (int fd)
{
- struct flock fl;
-
- fl.l_type = F_UNLCK;
- fl.l_whence = SEEK_SET;
- fl.l_start = 0;
- fl.l_len = 0;
-
- return fcntl (fd, F_SETLK, &fl);
+ struct gf_flock fl;
+
+ fl.l_type = F_UNLCK;
+ fl.l_whence = SEEK_SET;
+ fl.l_start = 0;
+ fl.l_len = 0;
+
+ return fcntl (fd, F_SETLK, &fl);
}
-
+
static void
compute_checksum (char *buf, size_t size, uint32_t *checksum)
{
@@ -1431,7 +1624,7 @@ compute_checksum (char *buf, size_t size, uint32_t *checksum)
checksum_buf [0] = 0xba;
checksum_buf [1] = 0xbe;
checksum_buf [2] = 0xb0;
- checksum_buf [3] = 0x0b;
+ checksum_buf [3] = 0x0b;
}
for (ret = 0; ret < (size - 4); ret += 4) {
@@ -1444,14 +1637,14 @@ compute_checksum (char *buf, size_t size, uint32_t *checksum)
for (ret = 0; ret <= (size % 4); ret++) {
checksum_buf[ret] ^= (buf[(size - 4) + ret] << ret);
}
-
+
return;
}
#define GF_CHECKSUM_BUF_SIZE 1024
int
-get_checksum_for_file (int fd, uint32_t *checksum)
+get_checksum_for_file (int fd, uint32_t *checksum)
{
int ret = -1;
char buf[GF_CHECKSUM_BUF_SIZE] = {0,};
@@ -1461,7 +1654,7 @@ get_checksum_for_file (int fd, uint32_t *checksum)
do {
ret = read (fd, &buf, GF_CHECKSUM_BUF_SIZE);
if (ret > 0)
- compute_checksum (buf, GF_CHECKSUM_BUF_SIZE,
+ compute_checksum (buf, GF_CHECKSUM_BUF_SIZE,
checksum);
} while (ret > 0);
@@ -1470,3 +1663,1246 @@ get_checksum_for_file (int fd, uint32_t *checksum)
return ret;
}
+
+
+int
+get_checksum_for_path (char *path, uint32_t *checksum)
+{
+ int ret = -1;
+ int fd = -1;
+
+ GF_ASSERT (path);
+ GF_ASSERT (checksum);
+
+ fd = open (path, O_RDWR);
+
+ if (fd == -1) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Unable to open %s, errno: %d",
+ path, errno);
+ goto out;
+ }
+
+ ret = get_checksum_for_file (fd, checksum);
+
+out:
+ if (fd != -1)
+ close (fd);
+
+ return ret;
+}
+
+char *
+strtail (char *str, const char *pattern)
+{
+ int i = 0;
+
+ for (i = 0; str[i] == pattern[i] && str[i]; i++);
+
+ if (pattern[i] == '\0')
+ return str + i;
+
+ return NULL;
+}
+
+void
+skipwhite (char **s)
+{
+ while (isspace (**s))
+ (*s)++;
+}
+
+char *
+nwstrtail (char *str, char *pattern)
+{
+ for (;;) {
+ skipwhite (&str);
+ skipwhite (&pattern);
+
+ if (*str != *pattern || !*str)
+ break;
+
+ str++;
+ pattern++;
+ }
+
+ return *pattern ? NULL : str;
+}
+
+void
+skipword (char **s)
+{
+ if (!*s)
+ return;
+
+ skipwhite (s);
+
+ while (!isspace(**s))
+ (*s)++;
+}
+
+char *
+get_nth_word (const char *str, int n)
+{
+ char buf[4096] = {0};
+ char *start = NULL;
+ char *word = NULL;
+ int i = 0;
+ int word_len = 0;
+ const char *end = NULL;
+
+ if (!str)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%s", str);
+ start = buf;
+
+ for (i = 0; i < n-1; i++)
+ skipword (&start);
+
+ skipwhite (&start);
+ end = strpbrk ((const char *)start, " \t\n\0");
+
+ if (!end)
+ goto out;
+
+ word_len = abs (end - start);
+
+ word = GF_CALLOC (1, word_len + 1, gf_common_mt_strdup);
+ if (!word)
+ goto out;
+
+ strncpy (word, start, word_len);
+ *(word + word_len) = '\0';
+ out:
+ return word;
+}
+
+/* Syntax formed according to RFC 1912 (RFC 1123 & 952 are more restrictive) *
+ <hname> ::= <gen-name>*["."<gen-name>] *
+ <gen-name> ::= <let-or-digit> <[*[<let-or-digit-or-hyphen>]<let-or-digit>] */
+char
+valid_host_name (char *address, int length)
+{
+ int i = 0;
+ int str_len = 0;
+ char ret = 1;
+ char *dup_addr = NULL;
+ char *temp_str = NULL;
+ char *save_ptr = NULL;
+
+ if ((length > _POSIX_HOST_NAME_MAX) || (length < 1)) {
+ ret = 0;
+ goto out;
+ }
+
+ dup_addr = gf_strdup (address);
+ if (!dup_addr) {
+ ret = 0;
+ goto out;
+ }
+
+ if (!isalnum (dup_addr[length - 1]) && (dup_addr[length - 1] != '*')) {
+ ret = 0;
+ goto out;
+ }
+
+ /* Check for consecutive dots, which is invalid in a hostname and is
+ * ignored by strtok()
+ */
+ if (strstr (dup_addr, "..")) {
+ ret = 0;
+ goto out;
+ }
+
+ /* gen-name */
+ temp_str = strtok_r (dup_addr, ".", &save_ptr);
+ do {
+ str_len = strlen (temp_str);
+
+ if (!isalnum (temp_str[0]) ||
+ !isalnum (temp_str[str_len-1])) {
+ ret = 0;
+ goto out;
+ }
+ for (i = 1; i < str_len; i++) {
+ if (!isalnum (temp_str[i]) && (temp_str[i] != '-')) {
+ ret = 0;
+ goto out;
+ }
+ }
+ } while ((temp_str = strtok_r (NULL, ".", &save_ptr)));
+
+out:
+ GF_FREE (dup_addr);
+ return ret;
+}
+
+/* Matches all ipv4 address, if wildcard_acc is true '*' wildcard pattern for*
+ subnets is considerd as valid strings as well */
+char
+valid_ipv4_address (char *address, int length, gf_boolean_t wildcard_acc)
+{
+ int octets = 0;
+ int value = 0;
+ char *tmp = NULL, *ptr = NULL, *prev = NULL, *endptr = NULL;
+ char ret = 1;
+ int is_wildcard = 0;
+
+ tmp = gf_strdup (address);
+
+ /*
+ * To prevent cases where last character is '.' and which have
+ * consecutive dots like ".." as strtok ignore consecutive
+ * delimeters.
+ */
+ if (length <= 0 ||
+ (strstr (address, "..")) ||
+ (!isdigit (tmp[length - 1]) && (tmp[length - 1] != '*'))) {
+ ret = 0;
+ goto out;
+ }
+
+ prev = tmp;
+ prev = strtok_r (tmp, ".", &ptr);
+
+ while (prev != NULL) {
+ octets++;
+ if (wildcard_acc && !strcmp (prev, "*")) {
+ is_wildcard = 1;
+ } else {
+ value = strtol (prev, &endptr, 10);
+ if ((value > 255) || (value < 0) ||
+ (endptr != NULL && *endptr != '\0')) {
+ ret = 0;
+ goto out;
+ }
+ }
+ prev = strtok_r (NULL, ".", &ptr);
+ }
+
+ if ((octets > 4) || (octets < 4 && !is_wildcard)) {
+ ret = 0;
+ }
+
+out:
+ GF_FREE (tmp);
+ return ret;
+}
+
+char
+valid_ipv6_address (char *address, int length, gf_boolean_t wildcard_acc)
+{
+ int hex_numbers = 0;
+ int value = 0;
+ int i = 0;
+ char *tmp = NULL, *ptr = NULL, *prev = NULL, *endptr = NULL;
+ char ret = 1;
+ int is_wildcard = 0;
+ int is_compressed = 0;
+
+ tmp = gf_strdup (address);
+
+ /* Check for compressed form */
+ if (length <= 0 || tmp[length - 1] == ':') {
+ ret = 0;
+ goto out;
+ }
+ for (i = 0; i < (length - 1) ; i++) {
+ if (tmp[i] == ':' && tmp[i + 1] == ':') {
+ if (is_compressed == 0)
+ is_compressed = 1;
+ else {
+ ret = 0;
+ goto out;
+ }
+ }
+ }
+
+ prev = strtok_r (tmp, ":", &ptr);
+
+ while (prev != NULL) {
+ hex_numbers++;
+ if (wildcard_acc && !strcmp (prev, "*")) {
+ is_wildcard = 1;
+ } else {
+ value = strtol (prev, &endptr, 16);
+ if ((value > 0xffff) || (value < 0)
+ || (endptr != NULL && *endptr != '\0')) {
+ ret = 0;
+ goto out;
+ }
+ }
+ prev = strtok_r (NULL, ":", &ptr);
+ }
+
+ if ((hex_numbers > 8) || (hex_numbers < 8 && !is_wildcard
+ && !is_compressed)) {
+ ret = 0;
+ }
+
+out:
+ GF_FREE (tmp);
+ return ret;
+}
+
+char
+valid_internet_address (char *address, gf_boolean_t wildcard_acc)
+{
+ char ret = 0;
+ int length = 0;
+
+ if (address == NULL) {
+ gf_log_callingfn (THIS->name, GF_LOG_WARNING, "argument invalid");
+ goto out;
+ }
+
+ length = strlen (address);
+ if (length == 0)
+ goto out;
+
+ if (valid_ipv4_address (address, length, wildcard_acc)
+ || valid_ipv6_address (address, length, wildcard_acc)
+ || valid_host_name (address, length))
+ ret = 1;
+
+out:
+ return ret;
+}
+
+/**
+ * gf_sock_union_equal_addr - check if two given gf_sock_unions have same addr
+ *
+ * @param a - first sock union
+ * @param b - second sock union
+ * @return _gf_true if a and b have same ipv{4,6} addr, _gf_false otherwise
+ */
+gf_boolean_t
+gf_sock_union_equal_addr (union gf_sock_union *a,
+ union gf_sock_union *b)
+{
+ if (!a || !b) {
+ gf_log ("common-utils", GF_LOG_ERROR, "Invalid arguments"
+ " to gf_sock_union_equal_addr");
+ return _gf_false;
+ }
+
+ if (a->storage.ss_family != b->storage.ss_family)
+ return _gf_false;
+
+ switch (a->storage.ss_family) {
+ case AF_INET:
+ if (a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr)
+ return _gf_true;
+ else
+ return _gf_false;
+
+ case AF_INET6:
+ if (memcmp ((void *)(&a->sin6.sin6_addr),
+ (void *)(&b->sin6.sin6_addr),
+ sizeof (a->sin6.sin6_addr)))
+ return _gf_false;
+ else
+ return _gf_true;
+
+ default:
+ gf_log ("common-utils", GF_LOG_DEBUG,
+ "Unsupported/invalid address family");
+ break;
+ }
+
+ return _gf_false;
+}
+
+/*Thread safe conversion function*/
+char *
+uuid_utoa (uuid_t uuid)
+{
+ char *uuid_buffer = glusterfs_uuid_buf_get(THIS->ctx);
+ uuid_unparse (uuid, uuid_buffer);
+ return uuid_buffer;
+}
+
+/*Re-entrant conversion function*/
+char *
+uuid_utoa_r (uuid_t uuid, char *dst)
+{
+ if(!dst)
+ return NULL;
+ uuid_unparse (uuid, dst);
+ return dst;
+}
+
+/*Thread safe conversion function*/
+char *
+lkowner_utoa (gf_lkowner_t *lkowner)
+{
+ char *lkowner_buffer = glusterfs_lkowner_buf_get(THIS->ctx);
+ lkowner_unparse (lkowner, lkowner_buffer, GF_LKOWNER_BUF_SIZE);
+ return lkowner_buffer;
+}
+
+/*Re-entrant conversion function*/
+char *
+lkowner_utoa_r (gf_lkowner_t *lkowner, char *dst, int len)
+{
+ if(!dst)
+ return NULL;
+ lkowner_unparse (lkowner, dst, len);
+ return dst;
+}
+
+void* gf_array_elem (void *a, int index, size_t elem_size)
+{
+ uint8_t* ptr = a;
+ return (void*)(ptr + index * elem_size);
+}
+
+void
+gf_elem_swap (void *x, void *y, size_t l) {
+ uint8_t *a = x, *b = y, c;
+ while(l--) {
+ c = *a;
+ *a++ = *b;
+ *b++ = c;
+ }
+}
+
+void
+gf_array_insertionsort (void *A, int l, int r, size_t elem_size,
+ gf_cmp cmp)
+{
+ int i = l;
+ int N = r+1;
+ void *Temp = NULL;
+ int j = 0;
+
+ for(i = l; i < N; i++) {
+ Temp = gf_array_elem (A, i, elem_size);
+ j = i - 1;
+ while((cmp (Temp, gf_array_elem (A, j, elem_size))
+ < 0) && j>=0) {
+ gf_elem_swap (Temp, gf_array_elem (A, j, elem_size),
+ elem_size);
+ Temp = gf_array_elem (A, j, elem_size);
+ j = j-1;
+ }
+ }
+}
+
+int
+gf_is_str_int (const char *value)
+{
+ int flag = 0;
+ char *str = NULL;
+ char *fptr = NULL;
+
+ GF_VALIDATE_OR_GOTO (THIS->name, value, out);
+
+ str = gf_strdup (value);
+ if (!str)
+ goto out;
+
+ fptr = str;
+
+ while (*str) {
+ if (!isdigit(*str)) {
+ flag = 1;
+ goto out;
+ }
+ str++;
+ }
+
+out:
+ GF_FREE (fptr);
+
+ return flag;
+}
+/*
+ * rounds up nr to power of two. If nr is already a power of two, just returns
+ * nr
+ */
+
+inline int32_t
+gf_roundup_power_of_two (int32_t nr)
+{
+ int32_t result = 1;
+
+ if (nr < 0) {
+ gf_log ("common-utils", GF_LOG_WARNING,
+ "negative number passed");
+ result = -1;
+ goto out;
+ }
+
+ while (result < nr)
+ result *= 2;
+
+out:
+ return result;
+}
+
+/*
+ * rounds up nr to next power of two. If nr is already a power of two, next
+ * power of two is returned.
+ */
+
+inline int32_t
+gf_roundup_next_power_of_two (int32_t nr)
+{
+ int32_t result = 1;
+
+ if (nr < 0) {
+ gf_log ("common-utils", GF_LOG_WARNING,
+ "negative number passed");
+ result = -1;
+ goto out;
+ }
+
+ while (result <= nr)
+ result *= 2;
+
+out:
+ return result;
+}
+
+int
+validate_brick_name (char *brick)
+{
+ char *delimiter = NULL;
+ int ret = 0;
+ delimiter = strrchr (brick, ':');
+ if (!delimiter || delimiter == brick
+ || *(delimiter+1) != '/')
+ ret = -1;
+
+ return ret;
+}
+
+char *
+get_host_name (char *word, char **host)
+{
+ char *delimiter = NULL;
+ delimiter = strrchr (word, ':');
+ if (delimiter)
+ *delimiter = '\0';
+ else
+ return NULL;
+ *host = word;
+ return *host;
+}
+
+
+char *
+get_path_name (char *word, char **path)
+{
+ char *delimiter = NULL;
+ delimiter = strchr (word, '/');
+ if (!delimiter)
+ return NULL;
+ *path = delimiter;
+ return *path;
+}
+
+void
+gf_path_strip_trailing_slashes (char *path)
+{
+ int i = 0;
+ int len = 0;
+
+ if (!path)
+ return;
+
+ len = strlen (path);
+ for (i = len - 1; i > 0; i--) {
+ if (path[i] != '/')
+ break;
+ }
+
+ if (i < (len -1))
+ path [i+1] = '\0';
+
+ return;
+}
+
+uint64_t
+get_mem_size ()
+{
+ uint64_t memsize = -1;
+
+#if defined GF_LINUX_HOST_OS || defined GF_SOLARIS_HOST_OS
+
+ uint64_t page_size = 0;
+ uint64_t num_pages = 0;
+
+ page_size = sysconf (_SC_PAGESIZE);
+ num_pages = sysconf (_SC_PHYS_PAGES);
+
+ memsize = page_size * num_pages;
+#endif
+
+#if defined GF_BSD_HOST_OS || defined GF_DARWIN_HOST_OS
+
+ size_t len = sizeof(memsize);
+ int name [] = { CTL_HW, HW_PHYSMEM };
+
+ sysctl (name, 2, &memsize, &len, NULL, 0);
+#endif
+ return memsize;
+}
+
+/* Strips all whitespace characters in a string and returns length of new string
+ * on success
+ */
+int
+gf_strip_whitespace (char *str, int len)
+{
+ int i = 0;
+ int new_len = 0;
+ char *new_str = NULL;
+
+ GF_ASSERT (str);
+
+ new_str = GF_CALLOC (1, len + 1, gf_common_mt_char);
+ if (new_str == NULL)
+ return -1;
+
+ for (i = 0; i < len; i++) {
+ if (!isspace (str[i]))
+ new_str[new_len++] = str[i];
+ }
+ new_str[new_len] = '\0';
+
+ if (new_len != len) {
+ memset (str, 0, len);
+ strncpy (str, new_str, new_len);
+ }
+
+ GF_FREE (new_str);
+ return new_len;
+}
+
+int
+gf_canonicalize_path (char *path)
+{
+ int ret = -1;
+ int path_len = 0;
+ int dir_path_len = 0;
+ char *tmppath = NULL;
+ char *dir = NULL;
+ char *tmpstr = NULL;
+
+ if (!path || *path != '/')
+ goto out;
+
+ tmppath = gf_strdup (path);
+ if (!tmppath)
+ goto out;
+
+ /* Strip the extra slashes and return */
+ bzero (path, strlen(path));
+ path[0] = '/';
+ dir = strtok_r(tmppath, "/", &tmpstr);
+
+ while (dir) {
+ dir_path_len = strlen(dir);
+ strncpy ((path + path_len + 1), dir, dir_path_len);
+ path_len += dir_path_len + 1;
+ dir = strtok_r (NULL, "/", &tmpstr);
+ if (dir)
+ strncpy ((path + path_len), "/", 1);
+ }
+ path[path_len] = '\0';
+ ret = 0;
+
+ out:
+ if (ret)
+ gf_log ("common-utils", GF_LOG_ERROR,
+ "Path manipulation failed");
+
+ GF_FREE(tmppath);
+
+ return ret;
+}
+
+static const char *__gf_timefmts[] = {
+ "%F %T",
+ "%Y/%m/%d-%T",
+ "%b %d %T",
+ "%F %H%M%S"
+};
+
+static const char *__gf_zerotimes[] = {
+ "0000-00-00 00:00:00",
+ "0000/00/00-00:00:00",
+ "xxx 00 00:00:00",
+ "0000-00-00 000000"
+};
+
+void
+_gf_timestuff (gf_timefmts *fmt, const char ***fmts, const char ***zeros)
+{
+ *fmt = gf_timefmt_last;
+ *fmts = __gf_timefmts;
+ *zeros = __gf_zerotimes;
+}
+
+
+char *
+generate_glusterfs_ctx_id (void)
+{
+ char tmp_str[1024] = {0,};
+ char hostname[256] = {0,};
+ struct timeval tv = {0,};
+ char now_str[32];
+
+ if (gettimeofday (&tv, NULL) == -1) {
+ gf_log ("glusterfsd", GF_LOG_ERROR,
+ "gettimeofday: failed %s",
+ strerror (errno));
+ }
+
+ if (gethostname (hostname, 256) == -1) {
+ gf_log ("glusterfsd", GF_LOG_ERROR,
+ "gethostname: failed %s",
+ strerror (errno));
+ }
+
+ gf_time_fmt (now_str, sizeof now_str, tv.tv_sec, gf_timefmt_Ymd_T);
+ snprintf (tmp_str, sizeof tmp_str, "%s-%d-%s:%"
+#ifdef GF_DARWIN_HOST_OS
+ PRId32,
+#else
+ "ld",
+#endif
+ hostname, getpid(), now_str, tv.tv_usec);
+
+ return gf_strdup (tmp_str);
+}
+
+char *
+gf_get_reserved_ports ()
+{
+ char *ports_info = NULL;
+#if defined GF_LINUX_HOST_OS
+ int proc_fd = -1;
+ char *proc_file = "/proc/sys/net/ipv4/ip_local_reserved_ports";
+ char buffer[4096] = {0,};
+ int32_t ret = -1;
+
+ proc_fd = open (proc_file, O_RDONLY);
+ if (proc_fd == -1) {
+ /* What should be done in this case? error out from here
+ * and thus stop the glusterfs process from starting or
+ * continue with older method of using any of the available
+ * port? For now 2nd option is considered.
+ */
+ gf_log ("glusterfs", GF_LOG_WARNING, "could not open "
+ "the file /proc/sys/net/ipv4/ip_local_reserved_ports "
+ "for getting reserved ports info (%s)",
+ strerror (errno));
+ goto out;
+ }
+
+ ret = read (proc_fd, buffer, sizeof (buffer));
+ if (ret < 0) {
+ gf_log ("glusterfs", GF_LOG_WARNING, "could not "
+ "read the file %s for getting reserved ports "
+ "info (%s)", proc_file, strerror (errno));
+ goto out;
+ }
+ ports_info = gf_strdup (buffer);
+
+out:
+ if (proc_fd != -1)
+ close (proc_fd);
+#endif /* GF_LINUX_HOST_OS */
+ return ports_info;
+}
+
+int
+gf_process_reserved_ports (gf_boolean_t *ports)
+{
+ int ret = -1;
+#if defined GF_LINUX_HOST_OS
+ char *ports_info = NULL;
+ char *tmp = NULL;
+ char *blocked_port = NULL;
+
+ ports_info = gf_get_reserved_ports ();
+ if (!ports_info) {
+ gf_log ("glusterfs", GF_LOG_WARNING, "Not able to get reserved "
+ "ports, hence there is a possibility that glusterfs "
+ "may consume reserved port");
+ goto out;
+ }
+
+ blocked_port = strtok_r (ports_info, ",\n",&tmp);
+
+ while (blocked_port) {
+ gf_ports_reserved (blocked_port, ports);
+ blocked_port = strtok_r (NULL, ",\n", &tmp);
+ }
+
+ ret = 0;
+
+out:
+ GF_FREE (ports_info);
+#endif /* GF_LINUX_HOST_OS */
+ return ret;
+}
+
+gf_boolean_t
+gf_ports_reserved (char *blocked_port, gf_boolean_t *ports)
+{
+ gf_boolean_t result = _gf_false;
+ char *range_port = NULL;
+ int16_t tmp_port1, tmp_port2 = -1;
+
+ if (strstr (blocked_port, "-") == NULL) {
+ /* get rid of the new line character*/
+ if (blocked_port[strlen(blocked_port) -1] == '\n')
+ blocked_port[strlen(blocked_port) -1] = '\0';
+ if (gf_string2int16 (blocked_port, &tmp_port1) == 0) {
+ if (tmp_port1 > (GF_CLIENT_PORT_CEILING - 1)
+ || tmp_port1 < 0) {
+ gf_log ("glusterfs-socket", GF_LOG_WARNING,
+ "invalid port %d", tmp_port1);
+ result = _gf_true;
+ goto out;
+ } else {
+ gf_log ("glusterfs", GF_LOG_DEBUG,
+ "blocking port %d", tmp_port1);
+ ports[tmp_port1] = _gf_true;
+ }
+ } else {
+ gf_log ("glusterfs-socket", GF_LOG_WARNING, "%s is "
+ "not a valid port identifier", blocked_port);
+ result = _gf_true;
+ goto out;
+ }
+ } else {
+ range_port = strtok (blocked_port, "-");
+ if (!range_port){
+ result = _gf_true;
+ goto out;
+ }
+ if (gf_string2int16 (range_port, &tmp_port1) == 0) {
+ if (tmp_port1 > (GF_CLIENT_PORT_CEILING - 1))
+ tmp_port1 = GF_CLIENT_PORT_CEILING - 1;
+ if (tmp_port1 < 0)
+ tmp_port1 = 0;
+ }
+ range_port = strtok (NULL, "-");
+ if (!range_port) {
+ result = _gf_true;
+ goto out;
+ }
+ /* get rid of the new line character*/
+ if (range_port[strlen(range_port) -1] == '\n')
+ range_port[strlen(range_port) - 1] = '\0';
+ if (gf_string2int16 (range_port, &tmp_port2) == 0) {
+ if (tmp_port2 >
+ (GF_CLIENT_PORT_CEILING - 1))
+ tmp_port2 = GF_CLIENT_PORT_CEILING - 1;
+ if (tmp_port2 < 0)
+ tmp_port2 = 0;
+ }
+ gf_log ("glusterfs", GF_LOG_DEBUG, "lower: %d, higher: %d",
+ tmp_port1, tmp_port2);
+ for (; tmp_port1 <= tmp_port2; tmp_port1++)
+ ports[tmp_port1] = _gf_true;
+ }
+
+out:
+ return result;
+}
+
+/* Takes in client ip{v4,v6} and returns associated hostname, if any
+ * Also, allocates memory for the hostname.
+ * Returns: 0 for success, -1 for failure
+ */
+int
+gf_get_hostname_from_ip (char *client_ip, char **hostname)
+{
+ int ret = -1;
+ struct sockaddr *client_sockaddr = NULL;
+ struct sockaddr_in client_sock_in = {0};
+ struct sockaddr_in6 client_sock_in6 = {0};
+ char client_hostname[NI_MAXHOST] = {0};
+ char *client_ip_copy = NULL;
+ char *tmp = NULL;
+ char *ip = NULL;
+
+ /* if ipv4, reverse lookup the hostname to
+ * allow FQDN based rpc authentication
+ */
+ if (valid_ipv4_address (client_ip, strlen (client_ip), 0) == _gf_false) {
+ /* most times, we get a.b.c.d:port form, so check that */
+ client_ip_copy = gf_strdup (client_ip);
+ if (!client_ip_copy)
+ goto out;
+
+ ip = strtok_r (client_ip_copy, ":", &tmp);
+ } else {
+ ip = client_ip;
+ }
+
+ if (valid_ipv4_address (ip, strlen (ip), 0) == _gf_true) {
+ client_sockaddr = (struct sockaddr *)&client_sock_in;
+ client_sock_in.sin_family = AF_INET;
+ ret = inet_pton (AF_INET, ip,
+ (void *)&client_sock_in.sin_addr.s_addr);
+
+ } else if (valid_ipv6_address (ip, strlen (ip), 0) == _gf_true) {
+ client_sockaddr = (struct sockaddr *) &client_sock_in6;
+
+ client_sock_in6.sin6_family = AF_INET6;
+ ret = inet_pton (AF_INET6, ip,
+ (void *)&client_sock_in6.sin6_addr);
+ } else {
+ goto out;
+ }
+
+ if (ret != 1) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = getnameinfo (client_sockaddr,
+ sizeof (*client_sockaddr),
+ client_hostname, sizeof (client_hostname),
+ NULL, 0, 0);
+ if (ret) {
+ gf_log ("common-utils", GF_LOG_ERROR,
+ "Could not lookup hostname of %s : %s",
+ client_ip, gai_strerror (ret));
+ ret = -1;
+ goto out;
+ }
+
+ *hostname = gf_strdup ((char *)client_hostname);
+ out:
+ if (client_ip_copy)
+ GF_FREE (client_ip_copy);
+
+ return ret;
+}
+
+gf_boolean_t
+gf_interface_search (char *ip)
+{
+ int32_t ret = -1;
+ gf_boolean_t found = _gf_false;
+ struct ifaddrs *ifaddr, *ifa;
+ int family;
+ char host[NI_MAXHOST];
+ xlator_t *this = NULL;
+ char *pct = NULL;
+
+ this = THIS;
+
+ ret = getifaddrs (&ifaddr);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "getifaddrs() failed: %s\n",
+ gai_strerror(ret));
+ goto out;
+ }
+
+ for (ifa = ifaddr; ifa != NULL; ifa = ifa->ifa_next) {
+ if (!ifa->ifa_addr) {
+ /*
+ * This seemingly happens if an interface hasn't
+ * been bound to a particular protocol (seen with
+ * TUN devices).
+ */
+ continue;
+ }
+ family = ifa->ifa_addr->sa_family;
+
+ if (family != AF_INET && family != AF_INET6)
+ continue;
+
+ ret = getnameinfo (ifa->ifa_addr,
+ (family == AF_INET) ? sizeof(struct sockaddr_in) :
+ sizeof(struct sockaddr_in6),
+ host, NI_MAXHOST, NULL, 0, NI_NUMERICHOST);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "getnameinfo() failed: %s\n",
+ gai_strerror(ret));
+ goto out;
+ }
+
+ /*
+ * Sometimes the address comes back as addr%eth0 or
+ * similar. Since % is an invalid character, we can
+ * strip it out with confidence that doing so won't
+ * harm anything.
+ */
+ pct = index(host,'%');
+ if (pct) {
+ *pct = '\0';
+ }
+
+ if (strncmp (ip, host, NI_MAXHOST) == 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s is local address at interface %s",
+ ip, ifa->ifa_name);
+ found = _gf_true;
+ goto out;
+ }
+ }
+out:
+ if(ifaddr)
+ freeifaddrs (ifaddr);
+ return found;
+}
+
+char *
+get_ip_from_addrinfo (struct addrinfo *addr, char **ip)
+{
+ char buf[64];
+ void *in_addr = NULL;
+ struct sockaddr_in *s4 = NULL;
+ struct sockaddr_in6 *s6 = NULL;
+
+ switch (addr->ai_family)
+ {
+ case AF_INET:
+ s4 = (struct sockaddr_in *)addr->ai_addr;
+ in_addr = &s4->sin_addr;
+ break;
+
+ case AF_INET6:
+ s6 = (struct sockaddr_in6 *)addr->ai_addr;
+ in_addr = &s6->sin6_addr;
+ break;
+
+ default:
+ gf_log ("glusterd", GF_LOG_ERROR, "Invalid family");
+ return NULL;
+ }
+
+ if (!inet_ntop(addr->ai_family, in_addr, buf, sizeof(buf))) {
+ gf_log ("glusterd", GF_LOG_ERROR, "String conversion failed");
+ return NULL;
+ }
+
+ *ip = strdup (buf);
+ return *ip;
+}
+
+gf_boolean_t
+gf_is_loopback_localhost (const struct sockaddr *sa, char *hostname)
+{
+ GF_ASSERT (sa);
+
+ gf_boolean_t is_local = _gf_false;
+ const struct in_addr *addr4 = NULL;
+ const struct in6_addr *addr6 = NULL;
+ uint8_t *ap = NULL;
+ struct in6_addr loopbackaddr6 = IN6ADDR_LOOPBACK_INIT;
+
+ switch (sa->sa_family) {
+ case AF_INET:
+ addr4 = &(((struct sockaddr_in *)sa)->sin_addr);
+ ap = (uint8_t*)&addr4->s_addr;
+ if (ap[0] == 127)
+ is_local = _gf_true;
+ break;
+
+ case AF_INET6:
+ addr6 = &(((struct sockaddr_in6 *)sa)->sin6_addr);
+ if (memcmp (addr6, &loopbackaddr6,
+ sizeof (loopbackaddr6)) == 0)
+ is_local = _gf_true;
+ break;
+
+ default:
+ if (hostname)
+ gf_log ("glusterd", GF_LOG_ERROR,
+ "unknown address family %d for %s",
+ sa->sa_family, hostname);
+ break;
+ }
+
+ return is_local;
+}
+
+gf_boolean_t
+gf_is_local_addr (char *hostname)
+{
+ int32_t ret = -1;
+ struct addrinfo *result = NULL;
+ struct addrinfo *res = NULL;
+ gf_boolean_t found = _gf_false;
+ char *ip = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ ret = getaddrinfo (hostname, NULL, NULL, &result);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "error in getaddrinfo: %s\n",
+ gai_strerror(ret));
+ goto out;
+ }
+
+ for (res = result; res != NULL; res = res->ai_next) {
+ gf_log (this->name, GF_LOG_DEBUG, "%s ",
+ get_ip_from_addrinfo (res, &ip));
+
+ found = gf_is_loopback_localhost (res->ai_addr, hostname)
+ || gf_interface_search (ip);
+ if (found)
+ goto out;
+ }
+
+out:
+ if (result)
+ freeaddrinfo (result);
+
+ if (!found)
+ gf_log (this->name, GF_LOG_DEBUG, "%s is not local", hostname);
+
+ return found;
+}
+
+gf_boolean_t
+gf_is_same_address (char *name1, char *name2)
+{
+ struct addrinfo *addr1 = NULL;
+ struct addrinfo *addr2 = NULL;
+ struct addrinfo *p = NULL;
+ struct addrinfo *q = NULL;
+ gf_boolean_t ret = _gf_false;
+ int gai_err = 0;
+
+ gai_err = getaddrinfo(name1,NULL,NULL,&addr1);
+ if (gai_err != 0) {
+ gf_log (name1, GF_LOG_WARNING,
+ "error in getaddrinfo: %s\n", gai_strerror(gai_err));
+ goto out;
+ }
+
+ gai_err = getaddrinfo(name2,NULL,NULL,&addr2);
+ if (gai_err != 0) {
+ gf_log (name2, GF_LOG_WARNING,
+ "error in getaddrinfo: %s\n", gai_strerror(gai_err));
+ goto out;
+ }
+
+ for (p = addr1; p; p = p->ai_next) {
+ for (q = addr2; q; q = q->ai_next) {
+ if (p->ai_addrlen != q->ai_addrlen) {
+ continue;
+ }
+ if (memcmp(p->ai_addr,q->ai_addr,p->ai_addrlen)) {
+ continue;
+ }
+ ret = _gf_true;
+ goto out;
+ }
+ }
+
+out:
+ if (addr1) {
+ freeaddrinfo(addr1);
+ }
+ if (addr2) {
+ freeaddrinfo(addr2);
+ }
+ return ret;
+
+}
+
+
+/* Sets log file path from user provided arguments */
+int
+gf_set_log_file_path (cmd_args_t *cmd_args)
+{
+ int i = 0;
+ int j = 0;
+ int ret = 0;
+ char tmp_str[1024] = {0,};
+
+ if (!cmd_args)
+ goto done;
+
+ if (cmd_args->mount_point) {
+ j = 0;
+ i = 0;
+ if (cmd_args->mount_point[0] == '/')
+ i = 1;
+ for (; i < strlen (cmd_args->mount_point); i++,j++) {
+ tmp_str[j] = cmd_args->mount_point[i];
+ if (cmd_args->mount_point[i] == '/')
+ tmp_str[j] = '-';
+ }
+
+ ret = gf_asprintf (&cmd_args->log_file,
+ DEFAULT_LOG_FILE_DIRECTORY "/%s.log",
+ tmp_str);
+ if (ret > 0)
+ ret = 0;
+ goto done;
+ }
+
+ if (cmd_args->volfile) {
+ j = 0;
+ i = 0;
+ if (cmd_args->volfile[0] == '/')
+ i = 1;
+ for (; i < strlen (cmd_args->volfile); i++,j++) {
+ tmp_str[j] = cmd_args->volfile[i];
+ if (cmd_args->volfile[i] == '/')
+ tmp_str[j] = '-';
+ }
+ ret = gf_asprintf (&cmd_args->log_file,
+ DEFAULT_LOG_FILE_DIRECTORY "/%s.log",
+ tmp_str);
+ if (ret > 0)
+ ret = 0;
+ goto done;
+ }
+
+ if (cmd_args->volfile_server) {
+
+ ret = gf_asprintf (&cmd_args->log_file,
+ DEFAULT_LOG_FILE_DIRECTORY "/%s-%s-%d.log",
+ cmd_args->volfile_server,
+ cmd_args->volfile_id, getpid());
+ if (ret > 0)
+ ret = 0;
+ }
+done:
+ return ret;
+}
+
+int
+gf_thread_create (pthread_t *thread, const pthread_attr_t *attr,
+ void *(*start_routine)(void *), void *arg)
+{
+ sigset_t set, old;
+ int ret;
+
+ sigemptyset (&set);
+
+ sigfillset (&set);
+ sigdelset (&set, SIGSEGV);
+ sigdelset (&set, SIGBUS);
+ sigdelset (&set, SIGILL);
+ sigdelset (&set, SIGSYS);
+ sigdelset (&set, SIGFPE);
+ sigdelset (&set, SIGABRT);
+
+ pthread_sigmask (SIG_BLOCK, &set, &old);
+
+ ret = pthread_create (thread, attr, start_routine, arg);
+
+ pthread_sigmask (SIG_SETMASK, &old, NULL);
+
+ return ret;
+}
diff --git a/libglusterfs/src/common-utils.h b/libglusterfs/src/common-utils.h
index 248efc83c..3c99a4212 100644
--- a/libglusterfs/src/common-utils.h
+++ b/libglusterfs/src/common-utils.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _COMMON_UTILS_H
@@ -32,9 +23,10 @@
#include <string.h>
#include <assert.h>
#include <pthread.h>
+#include <openssl/md5.h>
#ifndef GF_BSD_HOST_OS
#include <alloca.h>
-#endif
+#endif
void trap (void);
@@ -47,6 +39,8 @@ void trap (void);
#include "glusterfs.h"
#include "locking.h"
#include "mem-pool.h"
+#include "uuid.h"
+
#define min(a,b) ((a)<(b)?(a):(b))
@@ -67,78 +61,186 @@ void trap (void);
#define GF_UNIT_TB_STRING "TB"
#define GF_UNIT_PB_STRING "PB"
+#define GF_UNIT_PERCENT_STRING "%"
+
+#define GEOREP "geo-replication"
+#define GHADOOP "glusterfs-hadoop"
+
+#define GF_SELINUX_XATTR_KEY "security.selinux"
+
+#define WIPE(statp) do { typeof(*statp) z = {0,}; if (statp) *statp = z; } while (0)
+
+#define IS_EXT_FS(fs_name) \
+ (!strcmp (fs_name, "ext2") || \
+ !strcmp (fs_name, "ext3") || \
+ !strcmp (fs_name, "ext4"))
-#define ERR_ABORT(ptr) \
- if (ptr == NULL) { \
- abort (); \
- }
+/* Defining this here as it is needed by glusterd for setting
+ * nfs port in volume status.
+ */
+#define GF_NFS3_PORT 2049
+#define GF_CLIENT_PORT_CEILING 1024
-enum _gf_boolean
+enum _gf_boolean
{
- _gf_false = 0,
+ _gf_false = 0,
_gf_true = 1
};
+/*
+ * we could have initialized these as +ve values and treated
+ * them as negative while comparing etc.. (which would have
+ * saved us with the pain of assigning values), but since we
+ * only have a couple of clients that use this feature, it's
+ * okay.
+ */
+enum _gf_client_pid
+{
+ GF_CLIENT_PID_MAX = 0,
+ GF_CLIENT_PID_GSYNCD = -1,
+ GF_CLIENT_PID_HADOOP = -2,
+ GF_CLIENT_PID_DEFRAG = -3,
+};
+
typedef enum _gf_boolean gf_boolean_t;
+typedef enum _gf_client_pid gf_client_pid_t;
+typedef int (*gf_cmp) (void *, void *);
void gf_global_variable_init(void);
-void set_global_ctx_ptr (glusterfs_ctx_t *ctx);
-glusterfs_ctx_t *get_global_ctx_ptr (void);
in_addr_t gf_resolve_ip (const char *hostname, void **dnscache);
-void gf_log_volume_file (FILE *specfp);
-void gf_print_trace (int32_t signal);
-
-extern char *gf_fop_list[GF_FOP_MAXVALUE];
-extern char *gf_mop_list[GF_MOP_MAXVALUE];
-extern char *gf_cbk_list[GF_CBK_MAXVALUE];
+void gf_log_dump_graph (FILE *specfp, glusterfs_graph_t *graph);
+void gf_print_trace (int32_t signal, glusterfs_ctx_t *ctx);
+int gf_set_log_file_path (cmd_args_t *cmd_args);
#define VECTORSIZE(count) (count * (sizeof (struct iovec)))
#define STRLEN_0(str) (strlen(str) + 1)
+
#define VALIDATE_OR_GOTO(arg,label) do { \
if (!arg) { \
errno = EINVAL; \
- gf_log ((this ? this->name : "(Govinda! Govinda!)"), \
- GF_LOG_ERROR, \
- "invalid argument: " #arg); \
+ gf_log_callingfn ((this ? (this->name) : \
+ "(Govinda! Govinda!)"), \
+ GF_LOG_WARNING, \
+ "invalid argument: " #arg); \
goto label; \
} \
- } while (0);
-
-#define GF_VALIDATE_OR_GOTO(name,arg,label) do { \
- if (!arg) { \
- errno = EINVAL; \
- gf_log (name, GF_LOG_ERROR, \
- "invalid argument: " #arg); \
- goto label; \
- } \
- } while (0);
-
-#define GF_VALIDATE_OR_GOTO_WITH_ERROR(name, arg, label, error) do { \
- if (!arg) { \
- errno = error; \
- gf_log (name, GF_LOG_ERROR, \
- "invalid argument: " #arg); \
- goto label; \
- } \
- }while (0);
-
-#define GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO(name,arg,label) \
- do { \
- GF_VALIDATE_OR_GOTO (name, arg, label); \
- if ((arg[0]) != '/') { \
- errno = EINVAL; \
- gf_log (name, GF_LOG_ERROR, \
- "invalid argument: " #arg); \
- goto label; \
- } \
- } while (0);
+ } while (0)
+
+#define GF_VALIDATE_OR_GOTO(name,arg,label) do { \
+ if (!arg) { \
+ errno = EINVAL; \
+ gf_log_callingfn (name, GF_LOG_ERROR, \
+ "invalid argument: " #arg); \
+ goto label; \
+ } \
+ } while (0)
+
+#define GF_VALIDATE_OR_GOTO_WITH_ERROR(name, arg, label, errno, error) do { \
+ if (!arg) { \
+ errno = error; \
+ gf_log_callingfn (name, GF_LOG_ERROR, \
+ "invalid argument: " #arg); \
+ goto label; \
+ } \
+ }while (0)
+
+#define GF_ASSERT_AND_GOTO_WITH_ERROR(name, arg, label, errno, error) do { \
+ if (!arg) { \
+ GF_ASSERT (0); \
+ errno = error; \
+ goto label; \
+ } \
+ }while (0)
+
+#define GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO(name,arg,label) \
+ do { \
+ GF_VALIDATE_OR_GOTO (name, arg, label); \
+ if ((arg[0]) != '/') { \
+ errno = EINVAL; \
+ gf_log_callingfn (name, GF_LOG_ERROR, \
+ "invalid argument: " #arg); \
+ goto label; \
+ } \
+ } while (0)
+
+#define GF_REMOVE_SLASH_FROM_PATH(path, string) \
+ do { \
+ int i = 0; \
+ for (i = 1; i < strlen (path); i++) { \
+ string[i-1] = path[i]; \
+ if (string[i-1] == '/') \
+ string[i-1] = '-'; \
+ } \
+ } while (0)
+
+#define GF_IF_INTERNAL_XATTR_GOTO(pattern, dict, op_errno, label) \
+ do { \
+ if (!dict) { \
+ gf_log (this->name, GF_LOG_ERROR, \
+ "setxattr dict is null"); \
+ goto label; \
+ } \
+ if (dict_foreach_fnmatch (dict, pattern, \
+ dict_null_foreach_fn, \
+ NULL) > 0) { \
+ op_errno = EPERM; \
+ gf_log (this->name, GF_LOG_ERROR, \
+ "attempt to set internal" \
+ " xattr: %s: %s", pattern, \
+ strerror (op_errno)); \
+ goto label; \
+ } \
+ } while (0)
+
+#define GF_IF_NATIVE_XATTR_GOTO(pattern, key, op_errno, label) \
+ do { \
+ if (!key) { \
+ gf_log (this->name, GF_LOG_ERROR, \
+ "no key for removexattr"); \
+ goto label; \
+ } \
+ if (!fnmatch (pattern, key, 0)) { \
+ op_errno = EPERM; \
+ gf_log (this->name, GF_LOG_ERROR, \
+ "attempt to remove internal " \
+ "xattr: %s: %s", key, \
+ strerror (op_errno)); \
+ goto label; \
+ } \
+ } while (0)
+
#define GF_FILE_CONTENT_REQUESTED(_xattr_req,_content_limit) \
(dict_get_uint64 (_xattr_req, "glusterfs.content", _content_limit) == 0)
+#ifdef DEBUG
+#define GF_ASSERT(x) assert (x);
+#else
+#define GF_ASSERT(x) \
+ do { \
+ if (!(x)) { \
+ gf_log_callingfn ("", GF_LOG_ERROR, \
+ "Assertion failed: " #x); \
+ } \
+ } while (0)
+#endif
+
+#define GF_UUID_ASSERT(u) \
+ if (uuid_is_null (u))\
+ GF_ASSERT (!"uuid null");
+
+union gf_sock_union {
+ struct sockaddr_storage storage;
+ struct sockaddr_in6 sin6;
+ struct sockaddr_in sin;
+ struct sockaddr sa;
+};
+
+#define GF_HIDDEN_PATH ".glusterfs"
+
static inline void
iov_free (struct iovec *vector, int count)
{
@@ -147,7 +249,7 @@ iov_free (struct iovec *vector, int count)
for (i = 0; i < count; i++)
FREE (vector[i].iov_base);
- FREE (vector);
+ GF_FREE (vector);
}
@@ -165,14 +267,14 @@ iov_length (const struct iovec *vector, int count)
static inline struct iovec *
-iov_dup (struct iovec *vector, int count)
+iov_dup (const struct iovec *vector, int count)
{
int bytecount = 0;
int i;
struct iovec *newvec = NULL;
bytecount = (count * sizeof (struct iovec));
- newvec = MALLOC (bytecount);
+ newvec = GF_MALLOC (bytecount, gf_common_mt_iovec);
if (!newvec)
return NULL;
@@ -245,6 +347,65 @@ iov_unload (char *buf, const struct iovec *vector, int count)
}
+static inline size_t
+iov_load (const struct iovec *vector, int count, char *buf, int size)
+{
+ size_t left = size;
+ size_t cp = 0;
+ int ret = 0;
+ int i = 0;
+
+ while (left && i < count) {
+ cp = min (vector[i].iov_len, left);
+ if (vector[i].iov_base != buf + (size - left))
+ memcpy (vector[i].iov_base, buf + (size - left), cp);
+ ret += cp;
+ left -= cp;
+ if (left)
+ i++;
+ }
+
+ return ret;
+}
+
+
+static inline size_t
+iov_copy (const struct iovec *dst, int dcnt,
+ const struct iovec *src, int scnt)
+{
+ size_t ret = 0;
+ size_t left = 0;
+ size_t min_i = 0;
+ int s_i = 0, s_ii = 0;
+ int d_i = 0, d_ii = 0;
+
+ ret = min (iov_length (dst, dcnt), iov_length (src, scnt));
+ left = ret;
+
+ while (left) {
+ min_i = min (dst[d_i].iov_len - d_ii, src[s_i].iov_len - s_ii);
+ memcpy (dst[d_i].iov_base + d_ii, src[s_i].iov_base + s_ii,
+ min_i);
+
+ d_ii += min_i;
+ if (d_ii == dst[d_i].iov_len) {
+ d_ii = 0;
+ d_i++;
+ }
+
+ s_ii += min_i;
+ if (s_ii == src[s_i].iov_len) {
+ s_ii = 0;
+ s_i++;
+ }
+
+ left -= min_i;
+ }
+
+ return ret;
+}
+
+
static inline int
mem_0filled (const char *buf, size_t size)
{
@@ -282,7 +443,7 @@ memdup (const void *ptr, size_t size)
{
void *newptr = NULL;
- newptr = MALLOC (size);
+ newptr = GF_MALLOC (size, gf_common_mt_memdup);
if (!newptr)
return NULL;
@@ -290,9 +451,55 @@ memdup (const void *ptr, size_t size)
return newptr;
}
+typedef enum {
+ gf_timefmt_default = 0,
+ gf_timefmt_FT = 0, /* YYYY-MM-DD hh:mm:ss */
+ gf_timefmt_Ymd_T, /* YYYY/MM-DD-hh:mm:ss */
+ gf_timefmt_bdT, /* ddd DD hh:mm:ss */
+ gf_timefmt_F_HMS, /* YYYY-MM-DD hhmmss */
+ gf_timefmt_last
+} gf_timefmts;
+
+static inline void
+gf_time_fmt (char *dst, size_t sz_dst, time_t utime, unsigned int fmt)
+{
+ extern void _gf_timestuff (gf_timefmts *, const char ***, const char ***);
+ static gf_timefmts timefmt_last = (gf_timefmts) -1;
+ static const char **fmts;
+ static const char **zeros;
+ struct tm tm;
+
+ if (timefmt_last == -1)
+ _gf_timestuff (&timefmt_last, &fmts, &zeros);
+ if (timefmt_last < fmt) fmt = gf_timefmt_default;
+ if (utime && gmtime_r (&utime, &tm) != NULL) {
+ strftime (dst, sz_dst, fmts[fmt], &tm);
+ } else {
+ strncpy (dst, "N/A", sz_dst);
+ }
+}
+
+int
+mkdir_p (char *path, mode_t mode, gf_boolean_t allow_symlinks);
+/*
+ * rounds up nr to power of two. If nr is already a power of two, just returns
+ * nr
+ */
+
+int
+gf_lstat_dir (const char *path, struct stat *stbuf_in);
+
+int32_t gf_roundup_power_of_two (int32_t nr);
+
+/*
+ * rounds up nr to next power of two. If nr is already a power of two, next
+ * power of two is returned.
+ */
+
+int32_t gf_roundup_next_power_of_two (int32_t nr);
char *gf_trim (char *string);
-int gf_strsplit (const char *str, const char *delim,
+int gf_strsplit (const char *str, const char *delim,
char ***tokens, int *token_count);
int gf_volume_name_validate (const char *volume_name);
@@ -313,6 +520,8 @@ int gf_string2uint16 (const char *str, uint16_t *n);
int gf_string2uint32 (const char *str, uint32_t *n);
int gf_string2uint64 (const char *str, uint64_t *n);
+int gf_strstr (const char *str, const char *delim, const char *match);
+
int gf_string2ulong_base10 (const char *str, unsigned long *n);
int gf_string2uint_base10 (const char *str, unsigned int *n);
int gf_string2uint8_base10 (const char *str, uint8_t *n);
@@ -321,15 +530,65 @@ int gf_string2uint32_base10 (const char *str, uint32_t *n);
int gf_string2uint64_base10 (const char *str, uint64_t *n);
int gf_string2bytesize (const char *str, uint64_t *n);
+int gf_string2percent_or_bytesize (const char *str, uint64_t *n,
+ gf_boolean_t *is_percent);
int gf_string2boolean (const char *str, gf_boolean_t *b);
-int gf_string2percent (const char *str, uint32_t *n);
+int gf_string2percent (const char *str, double *n);
int gf_string2time (const char *str, uint32_t *n);
int gf_lockfd (int fd);
int gf_unlockfd (int fd);
int get_checksum_for_file (int fd, uint32_t *checksum);
+int log_base2 (unsigned long x);
+
+int get_checksum_for_path (char *path, uint32_t *checksum);
+
+char *strtail (char *str, const char *pattern);
+void skipwhite (char **s);
+char *nwstrtail (char *str, char *pattern);
+void skip_word (char **str);
+/* returns a new string with nth word of given string. n>=1 */
+char *get_nth_word (const char *str, int n);
+
+char valid_host_name (char *address, int length);
+char valid_ipv4_address (char *address, int length, gf_boolean_t wildcard_acc);
+char valid_ipv6_address (char *address, int length, gf_boolean_t wildcard_acc);
+char valid_internet_address (char *address, gf_boolean_t wildcard_acc);
+char valid_ipv4_wildcard_check (char *address);
+char valid_ipv6_wildcard_check (char *address);
+char valid_wildcard_internet_address (char *address);
+gf_boolean_t gf_sock_union_equal_addr (union gf_sock_union *a,
+ union gf_sock_union *b);
+
+char *uuid_utoa (uuid_t uuid);
+char *uuid_utoa_r (uuid_t uuid, char *dst);
+char *lkowner_utoa (gf_lkowner_t *lkowner);
+char *lkowner_utoa_r (gf_lkowner_t *lkowner, char *dst, int len);
+
+void gf_array_insertionsort (void *a, int l, int r, size_t elem_size,
+ gf_cmp cmp);
+int gf_is_str_int (const char *value);
+
+char *gf_uint64_2human_readable (uint64_t);
+int validate_brick_name (char *brick);
+char *get_host_name (char *word, char **host);
+char *get_path_name (char *word, char **path);
+void gf_path_strip_trailing_slashes (char *path);
+uint64_t get_mem_size ();
+int gf_strip_whitespace (char *str, int len);
+int gf_canonicalize_path (char *path);
+char *generate_glusterfs_ctx_id (void);
+char *gf_get_reserved_ports();
+int gf_process_reserved_ports (gf_boolean_t ports[]);
+gf_boolean_t gf_ports_reserved (char *blocked_port, gf_boolean_t *ports);
+int gf_get_hostname_from_ip (char *client_ip, char **hostname);
+gf_boolean_t gf_is_local_addr (char *hostname);
+gf_boolean_t gf_is_same_address (char *host1, char *host2);
+void md5_wrapper(const unsigned char *data, size_t len, char *md5);
+
+int gf_thread_create (pthread_t *thread, const pthread_attr_t *attr,
+ void *(*start_routine)(void *), void *arg);
#endif /* _COMMON_UTILS_H */
-
diff --git a/libglusterfs/src/compat-errno.c b/libglusterfs/src/compat-errno.c
index 63acb361a..fd5cc49ce 100644
--- a/libglusterfs/src/compat-errno.c
+++ b/libglusterfs/src/compat-errno.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
@@ -27,868 +18,870 @@
#include "compat-errno.h"
-static int32_t gf_error_to_errno_array[1024];
+static int32_t gf_error_to_errno_array[1024];
static int32_t gf_errno_to_error_array[1024];
static int32_t gf_compat_errno_init_done;
#ifdef GF_SOLARIS_HOST_OS
-static void
+static void
init_compat_errno_arrays ()
{
-/* ENOMSG 35 / * No message of desired type */
- gf_error_to_errno_array[GF_ERROR_CODE_NOMSG] = ENOMSG;
- gf_errno_to_error_array[ENOMSG] = GF_ERROR_CODE_NOMSG;
+/* ENOMSG 35 / * No message of desired type */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOMSG] = ENOMSG;
+ gf_errno_to_error_array[ENOMSG] = GF_ERROR_CODE_NOMSG;
-/* EIDRM 36 / * Identifier removed */
- gf_error_to_errno_array[GF_ERROR_CODE_IDRM] = EIDRM;
- gf_errno_to_error_array[EIDRM] = GF_ERROR_CODE_IDRM;
+/* EIDRM 36 / * Identifier removed */
+ gf_error_to_errno_array[GF_ERROR_CODE_IDRM] = EIDRM;
+ gf_errno_to_error_array[EIDRM] = GF_ERROR_CODE_IDRM;
-/* ECHRNG 37 / * Channel number out of range */
- gf_error_to_errno_array[GF_ERROR_CODE_CHRNG] = ECHRNG;
- gf_errno_to_error_array[ECHRNG] = GF_ERROR_CODE_CHRNG;
+/* ECHRNG 37 / * Channel number out of range */
+ gf_error_to_errno_array[GF_ERROR_CODE_CHRNG] = ECHRNG;
+ gf_errno_to_error_array[ECHRNG] = GF_ERROR_CODE_CHRNG;
-/* EL2NSYNC 38 / * Level 2 not synchronized */
- gf_error_to_errno_array[GF_ERROR_CODE_L2NSYNC] = EL2NSYNC;
- gf_errno_to_error_array[EL2NSYNC] = GF_ERROR_CODE_L2NSYNC;
+/* EL2NSYNC 38 / * Level 2 not synchronized */
+ gf_error_to_errno_array[GF_ERROR_CODE_L2NSYNC] = EL2NSYNC;
+ gf_errno_to_error_array[EL2NSYNC] = GF_ERROR_CODE_L2NSYNC;
-/* EL3HLT 39 / * Level 3 halted */
- gf_error_to_errno_array[GF_ERROR_CODE_L3HLT] = EL3HLT;
- gf_errno_to_error_array[EL3HLT] = GF_ERROR_CODE_L3HLT;
+/* EL3HLT 39 / * Level 3 halted */
+ gf_error_to_errno_array[GF_ERROR_CODE_L3HLT] = EL3HLT;
+ gf_errno_to_error_array[EL3HLT] = GF_ERROR_CODE_L3HLT;
-/* EL3RST 40 / * Level 3 reset */
- gf_error_to_errno_array[GF_ERROR_CODE_L3RST] = EL3RST;
- gf_errno_to_error_array[EL3RST] = GF_ERROR_CODE_L3RST;
+/* EL3RST 40 / * Level 3 reset */
+ gf_error_to_errno_array[GF_ERROR_CODE_L3RST] = EL3RST;
+ gf_errno_to_error_array[EL3RST] = GF_ERROR_CODE_L3RST;
-/* ELNRNG 41 / * Link number out of range */
- gf_error_to_errno_array[GF_ERROR_CODE_LNRNG] = ELNRNG;
- gf_errno_to_error_array[ELNRNG] = GF_ERROR_CODE_LNRNG;
+/* ELNRNG 41 / * Link number out of range */
+ gf_error_to_errno_array[GF_ERROR_CODE_LNRNG] = ELNRNG;
+ gf_errno_to_error_array[ELNRNG] = GF_ERROR_CODE_LNRNG;
-/* EUNATCH 42 / * Protocol driver not attached */
- gf_error_to_errno_array[GF_ERROR_CODE_UNATCH] = EUNATCH;
- gf_errno_to_error_array[EUNATCH] = GF_ERROR_CODE_UNATCH;
+/* EUNATCH 42 / * Protocol driver not attached */
+ gf_error_to_errno_array[GF_ERROR_CODE_UNATCH] = EUNATCH;
+ gf_errno_to_error_array[EUNATCH] = GF_ERROR_CODE_UNATCH;
-/* ENOCSI 43 / * No CSI structure available */
- gf_error_to_errno_array[GF_ERROR_CODE_NOCSI] = ENOCSI;
- gf_errno_to_error_array[ENOCSI] = GF_ERROR_CODE_NOCSI;
+/* ENOCSI 43 / * No CSI structure available */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOCSI] = ENOCSI;
+ gf_errno_to_error_array[ENOCSI] = GF_ERROR_CODE_NOCSI;
-/* EL2HLT 44 / * Level 2 halted */
- gf_error_to_errno_array[GF_ERROR_CODE_L2HLT] = EL2HLT;
- gf_errno_to_error_array[EL2HLT] = GF_ERROR_CODE_L2HLT;
+/* EL2HLT 44 / * Level 2 halted */
+ gf_error_to_errno_array[GF_ERROR_CODE_L2HLT] = EL2HLT;
+ gf_errno_to_error_array[EL2HLT] = GF_ERROR_CODE_L2HLT;
-/* EDEADLK 45 / * Deadlock condition. */
- gf_error_to_errno_array[GF_ERROR_CODE_DEADLK] = EDEADLK;
- gf_errno_to_error_array[EDEADLK] = GF_ERROR_CODE_DEADLK;
+/* EDEADLK 45 / * Deadlock condition. */
+ gf_error_to_errno_array[GF_ERROR_CODE_DEADLK] = EDEADLK;
+ gf_errno_to_error_array[EDEADLK] = GF_ERROR_CODE_DEADLK;
-/* ENOLCK 46 / * No record locks available. */
- gf_error_to_errno_array[GF_ERROR_CODE_NOLCK] = ENOLCK;
- gf_errno_to_error_array[ENOLCK] = GF_ERROR_CODE_NOLCK;
+/* ENOLCK 46 / * No record locks available. */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOLCK] = ENOLCK;
+ gf_errno_to_error_array[ENOLCK] = GF_ERROR_CODE_NOLCK;
-/* ECANCELED 47 / * Operation canceled */
- gf_error_to_errno_array[GF_ERROR_CODE_CANCELED] = ECANCELED;
- gf_errno_to_error_array[ECANCELED] = GF_ERROR_CODE_CANCELED;
+/* ECANCELED 47 / * Operation canceled */
+ gf_error_to_errno_array[GF_ERROR_CODE_CANCELED] = ECANCELED;
+ gf_errno_to_error_array[ECANCELED] = GF_ERROR_CODE_CANCELED;
-/* ENOTSUP 48 / * Operation not supported */
- gf_error_to_errno_array[GF_ERROR_CODE_NOTSUPP] = ENOTSUP;
- gf_errno_to_error_array[ENOTSUP] = GF_ERROR_CODE_NOTSUPP;
+/* ENOTSUP 48 / * Operation not supported */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOTSUPP] = ENOTSUP;
+ gf_errno_to_error_array[ENOTSUP] = GF_ERROR_CODE_NOTSUPP;
/* Filesystem Quotas */
-/* EDQUOT 49 / * Disc quota exceeded */
- gf_error_to_errno_array[GF_ERROR_CODE_DQUOT] = EDQUOT;
- gf_errno_to_error_array[EDQUOT] = GF_ERROR_CODE_DQUOT;
+/* EDQUOT 49 / * Disc quota exceeded */
+ gf_error_to_errno_array[GF_ERROR_CODE_DQUOT] = EDQUOT;
+ gf_errno_to_error_array[EDQUOT] = GF_ERROR_CODE_DQUOT;
/* Convergent Error Returns */
-/* EBADE 50 / * invalid exchange */
- gf_error_to_errno_array[GF_ERROR_CODE_BADE] = EBADE;
- gf_errno_to_error_array[EBADE] = GF_ERROR_CODE_BADE;
-/* EBADR 51 / * invalid request descriptor */
- gf_error_to_errno_array[GF_ERROR_CODE_BADR] = EBADR;
- gf_errno_to_error_array[EBADR] = GF_ERROR_CODE_BADR;
-/* EXFULL 52 / * exchange full */
- gf_error_to_errno_array[GF_ERROR_CODE_XFULL] = EXFULL;
- gf_errno_to_error_array[EXFULL] = GF_ERROR_CODE_XFULL;
-/* ENOANO 53 / * no anode */
- gf_error_to_errno_array[GF_ERROR_CODE_NOANO] = ENOANO;
- gf_errno_to_error_array[ENOANO] = GF_ERROR_CODE_NOANO;
-/* EBADRQC 54 / * invalid request code */
- gf_error_to_errno_array[GF_ERROR_CODE_BADRQC] = EBADRQC;
- gf_errno_to_error_array[EBADRQC] = GF_ERROR_CODE_BADRQC;
-/* EBADSLT 55 / * invalid slot */
- gf_error_to_errno_array[GF_ERROR_CODE_BADSLT] = EBADSLT;
- gf_errno_to_error_array[EBADSLT] = GF_ERROR_CODE_BADSLT;
-/* EDEADLOCK 56 / * file locking deadlock error */
+/* EBADE 50 / * invalid exchange */
+ gf_error_to_errno_array[GF_ERROR_CODE_BADE] = EBADE;
+ gf_errno_to_error_array[EBADE] = GF_ERROR_CODE_BADE;
+/* EBADR 51 / * invalid request descriptor */
+ gf_error_to_errno_array[GF_ERROR_CODE_BADR] = EBADR;
+ gf_errno_to_error_array[EBADR] = GF_ERROR_CODE_BADR;
+/* EXFULL 52 / * exchange full */
+ gf_error_to_errno_array[GF_ERROR_CODE_XFULL] = EXFULL;
+ gf_errno_to_error_array[EXFULL] = GF_ERROR_CODE_XFULL;
+/* ENOANO 53 / * no anode */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOANO] = ENOANO;
+ gf_errno_to_error_array[ENOANO] = GF_ERROR_CODE_NOANO;
+/* EBADRQC 54 / * invalid request code */
+ gf_error_to_errno_array[GF_ERROR_CODE_BADRQC] = EBADRQC;
+ gf_errno_to_error_array[EBADRQC] = GF_ERROR_CODE_BADRQC;
+/* EBADSLT 55 / * invalid slot */
+ gf_error_to_errno_array[GF_ERROR_CODE_BADSLT] = EBADSLT;
+ gf_errno_to_error_array[EBADSLT] = GF_ERROR_CODE_BADSLT;
+/* EDEADLOCK 56 / * file locking deadlock error */
/* This is same as EDEADLK on linux */
- gf_error_to_errno_array[GF_ERROR_CODE_DEADLK] = EDEADLOCK;
- gf_errno_to_error_array[EDEADLOCK] = GF_ERROR_CODE_DEADLK;
+ gf_error_to_errno_array[GF_ERROR_CODE_DEADLK] = EDEADLOCK;
+ gf_errno_to_error_array[EDEADLOCK] = GF_ERROR_CODE_DEADLK;
-/* EBFONT 57 / * bad font file fmt */
- gf_error_to_errno_array[GF_ERROR_CODE_BFONT] = EBFONT;
- gf_errno_to_error_array[EBFONT] = GF_ERROR_CODE_BFONT;
+/* EBFONT 57 / * bad font file fmt */
+ gf_error_to_errno_array[GF_ERROR_CODE_BFONT] = EBFONT;
+ gf_errno_to_error_array[EBFONT] = GF_ERROR_CODE_BFONT;
/* Interprocess Robust Locks */
-/* EOWNERDEAD 58 / * process died with the lock */
- gf_error_to_errno_array[GF_ERROR_CODE_OWNERDEAD] = EOWNERDEAD;
- gf_errno_to_error_array[EOWNERDEAD] = GF_ERROR_CODE_OWNERDEAD;
-/* ENOTRECOVERABLE 59 / * lock is not recoverable */
- gf_error_to_errno_array[GF_ERROR_CODE_NOTRECOVERABLE] = ENOTRECOVERABLE;
- gf_errno_to_error_array[ENOTRECOVERABLE] = GF_ERROR_CODE_NOTRECOVERABLE;
+/* EOWNERDEAD 58 / * process died with the lock */
+ gf_error_to_errno_array[GF_ERROR_CODE_OWNERDEAD] = EOWNERDEAD;
+ gf_errno_to_error_array[EOWNERDEAD] = GF_ERROR_CODE_OWNERDEAD;
+/* ENOTRECOVERABLE 59 / * lock is not recoverable */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOTRECOVERABLE] = ENOTRECOVERABLE;
+ gf_errno_to_error_array[ENOTRECOVERABLE] = GF_ERROR_CODE_NOTRECOVERABLE;
/* stream problems */
-/* ENOSTR 60 / * Device not a stream */
- gf_error_to_errno_array[GF_ERROR_CODE_NOSTR] = ENOSTR;
- gf_errno_to_error_array[ENOSTR] = GF_ERROR_CODE_NOSTR;
-/* ENODATA 61 / * no data (for no delay io) */
- gf_error_to_errno_array[GF_ERROR_CODE_NODATA] = ENODATA;
- gf_errno_to_error_array[ENODATA] = GF_ERROR_CODE_NODATA;
-/* ETIME 62 / * timer expired */
- gf_error_to_errno_array[GF_ERROR_CODE_TIME] = ETIME;
- gf_errno_to_error_array[ETIME] = GF_ERROR_CODE_TIME;
-/* ENOSR 63 / * out of streams resources */
- gf_error_to_errno_array[GF_ERROR_CODE_NOSR] = ENOSR;
- gf_errno_to_error_array[ENOSR] = GF_ERROR_CODE_NOSR;
-
-/* ENONET 64 / * Machine is not on the network */
- gf_error_to_errno_array[GF_ERROR_CODE_NONET] = ENONET;
- gf_errno_to_error_array[ENONET] = GF_ERROR_CODE_NONET;
-/* ENOPKG 65 / * Package not installed */
- gf_error_to_errno_array[GF_ERROR_CODE_NOPKG] = ENOPKG;
- gf_errno_to_error_array[ENOPKG] = GF_ERROR_CODE_NOPKG;
-/* EREMOTE 66 / * The object is remote */
- gf_error_to_errno_array[GF_ERROR_CODE_REMOTE] = EREMOTE;
- gf_errno_to_error_array[EREMOTE] = GF_ERROR_CODE_REMOTE;
-/* ENOLINK 67 / * the link has been severed */
- gf_error_to_errno_array[GF_ERROR_CODE_NOLINK] = ENOLINK;
- gf_errno_to_error_array[ENOLINK] = GF_ERROR_CODE_NOLINK;
-/* EADV 68 / * advertise error */
- gf_error_to_errno_array[GF_ERROR_CODE_ADV] = EADV;
- gf_errno_to_error_array[EADV] = GF_ERROR_CODE_ADV;
-/* ESRMNT 69 / * srmount error */
- gf_error_to_errno_array[GF_ERROR_CODE_SRMNT] = ESRMNT;
- gf_errno_to_error_array[ESRMNT] = GF_ERROR_CODE_SRMNT;
-
-/* ECOMM 70 / * Communication error on send */
- gf_error_to_errno_array[GF_ERROR_CODE_COMM] = ECOMM;
- gf_errno_to_error_array[ECOMM] = GF_ERROR_CODE_COMM;
-/* EPROTO 71 / * Protocol error */
- gf_error_to_errno_array[GF_ERROR_CODE_PROTO] = EPROTO;
- gf_errno_to_error_array[EPROTO] = GF_ERROR_CODE_PROTO;
+/* ENOSTR 60 / * Device not a stream */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOSTR] = ENOSTR;
+ gf_errno_to_error_array[ENOSTR] = GF_ERROR_CODE_NOSTR;
+/* ENODATA 61 / * no data (for no delay io) */
+ gf_error_to_errno_array[GF_ERROR_CODE_NODATA] = ENODATA;
+ gf_errno_to_error_array[ENODATA] = GF_ERROR_CODE_NODATA;
+/* ETIME 62 / * timer expired */
+ gf_error_to_errno_array[GF_ERROR_CODE_TIME] = ETIME;
+ gf_errno_to_error_array[ETIME] = GF_ERROR_CODE_TIME;
+/* ENOSR 63 / * out of streams resources */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOSR] = ENOSR;
+ gf_errno_to_error_array[ENOSR] = GF_ERROR_CODE_NOSR;
+
+/* ENONET 64 / * Machine is not on the network */
+ gf_error_to_errno_array[GF_ERROR_CODE_NONET] = ENONET;
+ gf_errno_to_error_array[ENONET] = GF_ERROR_CODE_NONET;
+/* ENOPKG 65 / * Package not installed */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOPKG] = ENOPKG;
+ gf_errno_to_error_array[ENOPKG] = GF_ERROR_CODE_NOPKG;
+/* EREMOTE 66 / * The object is remote */
+ gf_error_to_errno_array[GF_ERROR_CODE_REMOTE] = EREMOTE;
+ gf_errno_to_error_array[EREMOTE] = GF_ERROR_CODE_REMOTE;
+/* ENOLINK 67 / * the link has been severed */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOLINK] = ENOLINK;
+ gf_errno_to_error_array[ENOLINK] = GF_ERROR_CODE_NOLINK;
+/* EADV 68 / * advertise error */
+ gf_error_to_errno_array[GF_ERROR_CODE_ADV] = EADV;
+ gf_errno_to_error_array[EADV] = GF_ERROR_CODE_ADV;
+/* ESRMNT 69 / * srmount error */
+ gf_error_to_errno_array[GF_ERROR_CODE_SRMNT] = ESRMNT;
+ gf_errno_to_error_array[ESRMNT] = GF_ERROR_CODE_SRMNT;
+
+/* ECOMM 70 / * Communication error on send */
+ gf_error_to_errno_array[GF_ERROR_CODE_COMM] = ECOMM;
+ gf_errno_to_error_array[ECOMM] = GF_ERROR_CODE_COMM;
+/* EPROTO 71 / * Protocol error */
+ gf_error_to_errno_array[GF_ERROR_CODE_PROTO] = EPROTO;
+ gf_errno_to_error_array[EPROTO] = GF_ERROR_CODE_PROTO;
/* Interprocess Robust Locks */
-/* ELOCKUNMAPPED 72 / * locked lock was unmapped */
- gf_error_to_errno_array[GF_ERROR_CODE_LOCKUNMAPPED] = ELOCKUNMAPPED;
- gf_errno_to_error_array[ELOCKUNMAPPED] = GF_ERROR_CODE_LOCKUNMAPPED;
-
-/* ENOTACTIVE 73 / * Facility is not active */
- gf_error_to_errno_array[GF_ERROR_CODE_NOTACTIVE] = ENOTACTIVE;
- gf_errno_to_error_array[ENOTACTIVE] = GF_ERROR_CODE_NOTACTIVE;
-/* EMULTIHOP 74 / * multihop attempted */
- gf_error_to_errno_array[GF_ERROR_CODE_MULTIHOP] = EMULTIHOP;
- gf_errno_to_error_array[EMULTIHOP] = GF_ERROR_CODE_MULTIHOP;
-/* EBADMSG 77 / * trying to read unreadable message */
- gf_error_to_errno_array[GF_ERROR_CODE_BADMSG] = EBADMSG;
- gf_errno_to_error_array[EBADMSG] = GF_ERROR_CODE_BADMSG;
-/* ENAMETOOLONG 78 / * path name is too long */
- gf_error_to_errno_array[GF_ERROR_CODE_NAMETOOLONG] = ENAMETOOLONG;
- gf_errno_to_error_array[ENAMETOOLONG] = GF_ERROR_CODE_NAMETOOLONG;
-/* EOVERFLOW 79 / * value too large to be stored in data type */
- gf_error_to_errno_array[GF_ERROR_CODE_OVERFLOW] = EOVERFLOW;
- gf_errno_to_error_array[EOVERFLOW] = GF_ERROR_CODE_OVERFLOW;
-/* ENOTUNIQ 80 / * given log. name not unique */
- gf_error_to_errno_array[GF_ERROR_CODE_NOTUNIQ] = ENOTUNIQ;
- gf_errno_to_error_array[ENOTUNIQ] = GF_ERROR_CODE_NOTUNIQ;
-/* EBADFD 81 / * f.d. invalid for this operation */
- gf_error_to_errno_array[GF_ERROR_CODE_BADFD] = EBADFD;
- gf_errno_to_error_array[EBADFD] = GF_ERROR_CODE_BADFD;
-/* EREMCHG 82 / * Remote address changed */
- gf_error_to_errno_array[GF_ERROR_CODE_REMCHG] = EREMCHG;
- gf_errno_to_error_array[EREMCHG] = GF_ERROR_CODE_REMCHG;
+/* ELOCKUNMAPPED 72 / * locked lock was unmapped */
+ gf_error_to_errno_array[GF_ERROR_CODE_LOCKUNMAPPED] = ELOCKUNMAPPED;
+ gf_errno_to_error_array[ELOCKUNMAPPED] = GF_ERROR_CODE_LOCKUNMAPPED;
+
+/* ENOTACTIVE 73 / * Facility is not active */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOTACTIVE] = ENOTACTIVE;
+ gf_errno_to_error_array[ENOTACTIVE] = GF_ERROR_CODE_NOTACTIVE;
+/* EMULTIHOP 74 / * multihop attempted */
+ gf_error_to_errno_array[GF_ERROR_CODE_MULTIHOP] = EMULTIHOP;
+ gf_errno_to_error_array[EMULTIHOP] = GF_ERROR_CODE_MULTIHOP;
+/* EBADMSG 77 / * trying to read unreadable message */
+ gf_error_to_errno_array[GF_ERROR_CODE_BADMSG] = EBADMSG;
+ gf_errno_to_error_array[EBADMSG] = GF_ERROR_CODE_BADMSG;
+/* ENAMETOOLONG 78 / * path name is too long */
+ gf_error_to_errno_array[GF_ERROR_CODE_NAMETOOLONG] = ENAMETOOLONG;
+ gf_errno_to_error_array[ENAMETOOLONG] = GF_ERROR_CODE_NAMETOOLONG;
+/* EOVERFLOW 79 / * value too large to be stored in data type */
+ gf_error_to_errno_array[GF_ERROR_CODE_OVERFLOW] = EOVERFLOW;
+ gf_errno_to_error_array[EOVERFLOW] = GF_ERROR_CODE_OVERFLOW;
+/* ENOTUNIQ 80 / * given log. name not unique */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOTUNIQ] = ENOTUNIQ;
+ gf_errno_to_error_array[ENOTUNIQ] = GF_ERROR_CODE_NOTUNIQ;
+/* EBADFD 81 / * f.d. invalid for this operation */
+ gf_error_to_errno_array[GF_ERROR_CODE_BADFD] = EBADFD;
+ gf_errno_to_error_array[EBADFD] = GF_ERROR_CODE_BADFD;
+/* EREMCHG 82 / * Remote address changed */
+ gf_error_to_errno_array[GF_ERROR_CODE_REMCHG] = EREMCHG;
+ gf_errno_to_error_array[EREMCHG] = GF_ERROR_CODE_REMCHG;
/* shared library problems */
-/* ELIBACC 83 / * Can't access a needed shared lib. */
- gf_error_to_errno_array[GF_ERROR_CODE_LIBACC] = ELIBACC;
- gf_errno_to_error_array[ELIBACC] = GF_ERROR_CODE_LIBACC;
-/* ELIBBAD 84 / * Accessing a corrupted shared lib. */
- gf_error_to_errno_array[GF_ERROR_CODE_LIBBAD] = ELIBBAD;
- gf_errno_to_error_array[ELIBBAD] = GF_ERROR_CODE_LIBBAD;
-/* ELIBSCN 85 / * .lib section in a.out corrupted. */
- gf_error_to_errno_array[GF_ERROR_CODE_LIBSCN] = ELIBSCN;
- gf_errno_to_error_array[ELIBSCN] = GF_ERROR_CODE_LIBSCN;
-/* ELIBMAX 86 / * Attempting to link in too many libs. */
- gf_error_to_errno_array[GF_ERROR_CODE_LIBMAX] = ELIBMAX;
- gf_errno_to_error_array[ELIBMAX] = GF_ERROR_CODE_LIBMAX;
-/* ELIBEXEC 87 / * Attempting to exec a shared library. */
- gf_error_to_errno_array[GF_ERROR_CODE_LIBEXEC] = ELIBEXEC;
- gf_errno_to_error_array[ELIBEXEC] = GF_ERROR_CODE_LIBEXEC;
-/* EILSEQ 88 / * Illegal byte sequence. */
- gf_error_to_errno_array[GF_ERROR_CODE_ILSEQ] = EILSEQ;
- gf_errno_to_error_array[EILSEQ] = GF_ERROR_CODE_ILSEQ;
-/* ENOSYS 89 / * Unsupported file system operation */
- gf_error_to_errno_array[GF_ERROR_CODE_NOSYS] = ENOSYS;
- gf_errno_to_error_array[ENOSYS] = GF_ERROR_CODE_NOSYS;
-/* ELOOP 90 / * Symbolic link loop */
- gf_error_to_errno_array[GF_ERROR_CODE_LOOP] = ELOOP;
- gf_errno_to_error_array[ELOOP] = GF_ERROR_CODE_LOOP;
-/* ERESTART 91 / * Restartable system call */
- gf_error_to_errno_array[GF_ERROR_CODE_RESTART] = ERESTART;
- gf_errno_to_error_array[ERESTART] = GF_ERROR_CODE_RESTART;
-/* ESTRPIPE 92 / * if pipe/FIFO, don't sleep in stream head */
- gf_error_to_errno_array[GF_ERROR_CODE_STRPIPE] = ESTRPIPE;
- gf_errno_to_error_array[ESTRPIPE] = GF_ERROR_CODE_STRPIPE;
-/* ENOTEMPTY 93 / * directory not empty */
- gf_error_to_errno_array[GF_ERROR_CODE_NOTEMPTY] = ENOTEMPTY;
- gf_errno_to_error_array[ENOTEMPTY] = GF_ERROR_CODE_NOTEMPTY;
-/* EUSERS 94 / * Too many users (for UFS) */
- gf_error_to_errno_array[GF_ERROR_CODE_USERS] = EUSERS;
- gf_errno_to_error_array[EUSERS] = GF_ERROR_CODE_USERS;
+/* ELIBACC 83 / * Can't access a needed shared lib. */
+ gf_error_to_errno_array[GF_ERROR_CODE_LIBACC] = ELIBACC;
+ gf_errno_to_error_array[ELIBACC] = GF_ERROR_CODE_LIBACC;
+/* ELIBBAD 84 / * Accessing a corrupted shared lib. */
+ gf_error_to_errno_array[GF_ERROR_CODE_LIBBAD] = ELIBBAD;
+ gf_errno_to_error_array[ELIBBAD] = GF_ERROR_CODE_LIBBAD;
+/* ELIBSCN 85 / * .lib section in a.out corrupted. */
+ gf_error_to_errno_array[GF_ERROR_CODE_LIBSCN] = ELIBSCN;
+ gf_errno_to_error_array[ELIBSCN] = GF_ERROR_CODE_LIBSCN;
+/* ELIBMAX 86 / * Attempting to link in too many libs. */
+ gf_error_to_errno_array[GF_ERROR_CODE_LIBMAX] = ELIBMAX;
+ gf_errno_to_error_array[ELIBMAX] = GF_ERROR_CODE_LIBMAX;
+/* ELIBEXEC 87 / * Attempting to exec a shared library. */
+ gf_error_to_errno_array[GF_ERROR_CODE_LIBEXEC] = ELIBEXEC;
+ gf_errno_to_error_array[ELIBEXEC] = GF_ERROR_CODE_LIBEXEC;
+/* EILSEQ 88 / * Illegal byte sequence. */
+ gf_error_to_errno_array[GF_ERROR_CODE_ILSEQ] = EILSEQ;
+ gf_errno_to_error_array[EILSEQ] = GF_ERROR_CODE_ILSEQ;
+/* ENOSYS 89 / * Unsupported file system operation */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOSYS] = ENOSYS;
+ gf_errno_to_error_array[ENOSYS] = GF_ERROR_CODE_NOSYS;
+/* ELOOP 90 / * Symbolic link loop */
+ gf_error_to_errno_array[GF_ERROR_CODE_LOOP] = ELOOP;
+ gf_errno_to_error_array[ELOOP] = GF_ERROR_CODE_LOOP;
+/* ERESTART 91 / * Restartable system call */
+ gf_error_to_errno_array[GF_ERROR_CODE_RESTART] = ERESTART;
+ gf_errno_to_error_array[ERESTART] = GF_ERROR_CODE_RESTART;
+/* ESTRPIPE 92 / * if pipe/FIFO, don't sleep in stream head */
+ gf_error_to_errno_array[GF_ERROR_CODE_STRPIPE] = ESTRPIPE;
+ gf_errno_to_error_array[ESTRPIPE] = GF_ERROR_CODE_STRPIPE;
+/* ENOTEMPTY 93 / * directory not empty */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOTEMPTY] = ENOTEMPTY;
+ gf_errno_to_error_array[ENOTEMPTY] = GF_ERROR_CODE_NOTEMPTY;
+/* EUSERS 94 / * Too many users (for UFS) */
+ gf_error_to_errno_array[GF_ERROR_CODE_USERS] = EUSERS;
+ gf_errno_to_error_array[EUSERS] = GF_ERROR_CODE_USERS;
/* BSD Networking Software */
- /* argument errors */
-/* ENOTSOCK 95 / * Socket operation on non-socket */
- gf_error_to_errno_array[GF_ERROR_CODE_NOTSOCK] = ENOTSOCK;
- gf_errno_to_error_array[ENOTSOCK] = GF_ERROR_CODE_NOTSOCK;
-/* EDESTADDRREQ 96 / * Destination address required */
- gf_error_to_errno_array[GF_ERROR_CODE_DESTADDRREQ] = EDESTADDRREQ;
- gf_errno_to_error_array[EDESTADDRREQ] = GF_ERROR_CODE_DESTADDRREQ;
-/* EMSGSIZE 97 / * Message too long */
- gf_error_to_errno_array[GF_ERROR_CODE_MSGSIZE] = EMSGSIZE;
- gf_errno_to_error_array[EMSGSIZE] = GF_ERROR_CODE_MSGSIZE;
-/* EPROTOTYPE 98 / * Protocol wrong type for socket */
- gf_error_to_errno_array[GF_ERROR_CODE_PROTOTYPE] = EPROTOTYPE;
- gf_errno_to_error_array[EPROTOTYPE] = GF_ERROR_CODE_PROTOTYPE;
-/* ENOPROTOOPT 99 / * Protocol not available */
- gf_error_to_errno_array[GF_ERROR_CODE_NOPROTOOPT] = ENOPROTOOPT;
- gf_errno_to_error_array[ENOPROTOOPT] = GF_ERROR_CODE_NOPROTOOPT;
-/* EPROTONOSUPPORT 120 / * Protocol not supported */
- gf_error_to_errno_array[GF_ERROR_CODE_PROTONOSUPPORT] = EPROTONOSUPPORT;
- gf_errno_to_error_array[EPROTONOSUPPORT] = GF_ERROR_CODE_PROTONOSUPPORT;
-/* ESOCKTNOSUPPORT 121 / * Socket type not supported */
- gf_error_to_errno_array[GF_ERROR_CODE_SOCKTNOSUPPORT] = ESOCKTNOSUPPORT;
- gf_errno_to_error_array[ESOCKTNOSUPPORT] = GF_ERROR_CODE_SOCKTNOSUPPORT;
-
-/* EOPNOTSUPP 122 / * Operation not supported on socket */
- gf_error_to_errno_array[GF_ERROR_CODE_OPNOTSUPP] = EOPNOTSUPP;
- gf_errno_to_error_array[EOPNOTSUPP] = GF_ERROR_CODE_OPNOTSUPP;
-/* EPFNOSUPPORT 123 / * Protocol family not supported */
- gf_error_to_errno_array[GF_ERROR_CODE_PFNOSUPPORT] = EPFNOSUPPORT;
- gf_errno_to_error_array[EPFNOSUPPORT] = GF_ERROR_CODE_PFNOSUPPORT;
-/* EAFNOSUPPORT 124 / * Address family not supported by */
- /* protocol family */
- gf_error_to_errno_array[GF_ERROR_CODE_AFNOSUPPORT] = EAFNOSUPPORT;
- gf_errno_to_error_array[EAFNOSUPPORT] = GF_ERROR_CODE_AFNOSUPPORT;
-/* EADDRINUSE 125 / * Address already in use */
- gf_error_to_errno_array[GF_ERROR_CODE_ADDRINUSE] = EADDRINUSE;
- gf_errno_to_error_array[EADDRINUSE] = GF_ERROR_CODE_ADDRINUSE;
-/* EADDRNOTAVAIL 126 / * Can't assign requested address */
- /* operational errors */
- gf_error_to_errno_array[GF_ERROR_CODE_ADDRNOTAVAIL] = EADDRNOTAVAIL;
- gf_errno_to_error_array[EADDRNOTAVAIL] = GF_ERROR_CODE_ADDRNOTAVAIL;
-/* ENETDOWN 127 / * Network is down */
- gf_error_to_errno_array[GF_ERROR_CODE_NETDOWN] = ENETDOWN;
- gf_errno_to_error_array[ENETDOWN] = GF_ERROR_CODE_NETDOWN;
-/* ENETUNREACH 128 / * Network is unreachable */
- gf_error_to_errno_array[GF_ERROR_CODE_NETUNREACH] = ENETUNREACH;
- gf_errno_to_error_array[ENETUNREACH] = GF_ERROR_CODE_NETUNREACH;
-/* ENETRESET 129 / * Network dropped connection because */
- /* of reset */
- gf_error_to_errno_array[GF_ERROR_CODE_NETRESET] = ENETRESET;
- gf_errno_to_error_array[ENETRESET] = GF_ERROR_CODE_NETRESET;
-/* ECONNABORTED 130 / * Software caused connection abort */
- gf_error_to_errno_array[GF_ERROR_CODE_CONNABORTED] = ECONNABORTED;
- gf_errno_to_error_array[ECONNABORTED] = GF_ERROR_CODE_CONNABORTED;
-/* ECONNRESET 131 / * Connection reset by peer */
- gf_error_to_errno_array[GF_ERROR_CODE_CONNRESET] = ECONNRESET;
- gf_errno_to_error_array[ECONNRESET] = GF_ERROR_CODE_CONNRESET;
-/* ENOBUFS 132 / * No buffer space available */
- gf_error_to_errno_array[GF_ERROR_CODE_NOBUFS] = ENOBUFS;
- gf_errno_to_error_array[ENOBUFS] = GF_ERROR_CODE_NOBUFS;
-/* EISCONN 133 / * Socket is already connected */
- gf_error_to_errno_array[GF_ERROR_CODE_ISCONN] = EISCONN;
- gf_errno_to_error_array[EISCONN] = GF_ERROR_CODE_ISCONN;
-/* ENOTCONN 134 / * Socket is not connected */
- gf_error_to_errno_array[GF_ERROR_CODE_NOTCONN] = ENOTCONN;
- gf_errno_to_error_array[ENOTCONN] = GF_ERROR_CODE_NOTCONN;
+ /* argument errors */
+/* ENOTSOCK 95 / * Socket operation on non-socket */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOTSOCK] = ENOTSOCK;
+ gf_errno_to_error_array[ENOTSOCK] = GF_ERROR_CODE_NOTSOCK;
+/* EDESTADDRREQ 96 / * Destination address required */
+ gf_error_to_errno_array[GF_ERROR_CODE_DESTADDRREQ] = EDESTADDRREQ;
+ gf_errno_to_error_array[EDESTADDRREQ] = GF_ERROR_CODE_DESTADDRREQ;
+/* EMSGSIZE 97 / * Message too long */
+ gf_error_to_errno_array[GF_ERROR_CODE_MSGSIZE] = EMSGSIZE;
+ gf_errno_to_error_array[EMSGSIZE] = GF_ERROR_CODE_MSGSIZE;
+/* EPROTOTYPE 98 / * Protocol wrong type for socket */
+ gf_error_to_errno_array[GF_ERROR_CODE_PROTOTYPE] = EPROTOTYPE;
+ gf_errno_to_error_array[EPROTOTYPE] = GF_ERROR_CODE_PROTOTYPE;
+/* ENOPROTOOPT 99 / * Protocol not available */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOPROTOOPT] = ENOPROTOOPT;
+ gf_errno_to_error_array[ENOPROTOOPT] = GF_ERROR_CODE_NOPROTOOPT;
+/* EPROTONOSUPPORT 120 / * Protocol not supported */
+ gf_error_to_errno_array[GF_ERROR_CODE_PROTONOSUPPORT] = EPROTONOSUPPORT;
+ gf_errno_to_error_array[EPROTONOSUPPORT] = GF_ERROR_CODE_PROTONOSUPPORT;
+/* ESOCKTNOSUPPORT 121 / * Socket type not supported */
+ gf_error_to_errno_array[GF_ERROR_CODE_SOCKTNOSUPPORT] = ESOCKTNOSUPPORT;
+ gf_errno_to_error_array[ESOCKTNOSUPPORT] = GF_ERROR_CODE_SOCKTNOSUPPORT;
+
+/* EOPNOTSUPP 122 / * Operation not supported on socket */
+ gf_error_to_errno_array[GF_ERROR_CODE_OPNOTSUPP] = EOPNOTSUPP;
+ gf_errno_to_error_array[EOPNOTSUPP] = GF_ERROR_CODE_OPNOTSUPP;
+/* EPFNOSUPPORT 123 / * Protocol family not supported */
+ gf_error_to_errno_array[GF_ERROR_CODE_PFNOSUPPORT] = EPFNOSUPPORT;
+ gf_errno_to_error_array[EPFNOSUPPORT] = GF_ERROR_CODE_PFNOSUPPORT;
+/* EAFNOSUPPORT 124 / * Address family not supported by */
+ /* protocol family */
+ gf_error_to_errno_array[GF_ERROR_CODE_AFNOSUPPORT] = EAFNOSUPPORT;
+ gf_errno_to_error_array[EAFNOSUPPORT] = GF_ERROR_CODE_AFNOSUPPORT;
+/* EADDRINUSE 125 / * Address already in use */
+ gf_error_to_errno_array[GF_ERROR_CODE_ADDRINUSE] = EADDRINUSE;
+ gf_errno_to_error_array[EADDRINUSE] = GF_ERROR_CODE_ADDRINUSE;
+/* EADDRNOTAVAIL 126 / * Can't assign requested address */
+ /* operational errors */
+ gf_error_to_errno_array[GF_ERROR_CODE_ADDRNOTAVAIL] = EADDRNOTAVAIL;
+ gf_errno_to_error_array[EADDRNOTAVAIL] = GF_ERROR_CODE_ADDRNOTAVAIL;
+/* ENETDOWN 127 / * Network is down */
+ gf_error_to_errno_array[GF_ERROR_CODE_NETDOWN] = ENETDOWN;
+ gf_errno_to_error_array[ENETDOWN] = GF_ERROR_CODE_NETDOWN;
+/* ENETUNREACH 128 / * Network is unreachable */
+ gf_error_to_errno_array[GF_ERROR_CODE_NETUNREACH] = ENETUNREACH;
+ gf_errno_to_error_array[ENETUNREACH] = GF_ERROR_CODE_NETUNREACH;
+/* ENETRESET 129 / * Network dropped connection because */
+ /* of reset */
+ gf_error_to_errno_array[GF_ERROR_CODE_NETRESET] = ENETRESET;
+ gf_errno_to_error_array[ENETRESET] = GF_ERROR_CODE_NETRESET;
+/* ECONNABORTED 130 / * Software caused connection abort */
+ gf_error_to_errno_array[GF_ERROR_CODE_CONNABORTED] = ECONNABORTED;
+ gf_errno_to_error_array[ECONNABORTED] = GF_ERROR_CODE_CONNABORTED;
+/* ECONNRESET 131 / * Connection reset by peer */
+ gf_error_to_errno_array[GF_ERROR_CODE_CONNRESET] = ECONNRESET;
+ gf_errno_to_error_array[ECONNRESET] = GF_ERROR_CODE_CONNRESET;
+/* ENOBUFS 132 / * No buffer space available */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOBUFS] = ENOBUFS;
+ gf_errno_to_error_array[ENOBUFS] = GF_ERROR_CODE_NOBUFS;
+/* EISCONN 133 / * Socket is already connected */
+ gf_error_to_errno_array[GF_ERROR_CODE_ISCONN] = EISCONN;
+ gf_errno_to_error_array[EISCONN] = GF_ERROR_CODE_ISCONN;
+/* ENOTCONN 134 / * Socket is not connected */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOTCONN] = ENOTCONN;
+ gf_errno_to_error_array[ENOTCONN] = GF_ERROR_CODE_NOTCONN;
/* XENIX has 135 - 142 */
-/* ESHUTDOWN 143 / * Can't send after socket shutdown */
- gf_error_to_errno_array[GF_ERROR_CODE_SHUTDOWN] = ESHUTDOWN;
- gf_errno_to_error_array[ESHUTDOWN] = GF_ERROR_CODE_SHUTDOWN;
-/* ETOOMANYREFS 144 / * Too many references: can't splice */
- gf_error_to_errno_array[GF_ERROR_CODE_TOOMANYREFS] = ETOOMANYREFS;
- gf_errno_to_error_array[ETOOMANYREFS] = GF_ERROR_CODE_TOOMANYREFS;
-/* ETIMEDOUT 145 / * Connection timed out */
- gf_error_to_errno_array[GF_ERROR_CODE_TIMEDOUT] = ETIMEDOUT;
- gf_errno_to_error_array[ETIMEDOUT] = GF_ERROR_CODE_TIMEDOUT;
-
-/* ECONNREFUSED 146 / * Connection refused */
- gf_error_to_errno_array[GF_ERROR_CODE_CONNREFUSED] = ECONNREFUSED;
- gf_errno_to_error_array[ECONNREFUSED] = GF_ERROR_CODE_CONNREFUSED;
-/* EHOSTDOWN 147 / * Host is down */
- gf_error_to_errno_array[GF_ERROR_CODE_HOSTDOWN] = EHOSTDOWN;
- gf_errno_to_error_array[EHOSTDOWN] = GF_ERROR_CODE_HOSTDOWN;
-/* EHOSTUNREACH 148 / * No route to host */
- gf_error_to_errno_array[GF_ERROR_CODE_HOSTUNREACH] = EHOSTUNREACH;
- gf_errno_to_error_array[EHOSTUNREACH] = GF_ERROR_CODE_HOSTUNREACH;
-/* EALREADY 149 / * operation already in progress */
- gf_error_to_errno_array[GF_ERROR_CODE_ALREADY] = EALREADY;
- gf_errno_to_error_array[EALREADY] = GF_ERROR_CODE_ALREADY;
-/* EINPROGRESS 150 / * operation now in progress */
- gf_error_to_errno_array[GF_ERROR_CODE_INPROGRESS] = EINPROGRESS;
- gf_errno_to_error_array[EINPROGRESS] = GF_ERROR_CODE_INPROGRESS;
+/* ESHUTDOWN 143 / * Can't send after socket shutdown */
+ gf_error_to_errno_array[GF_ERROR_CODE_SHUTDOWN] = ESHUTDOWN;
+ gf_errno_to_error_array[ESHUTDOWN] = GF_ERROR_CODE_SHUTDOWN;
+/* ETOOMANYREFS 144 / * Too many references: can't splice */
+ gf_error_to_errno_array[GF_ERROR_CODE_TOOMANYREFS] = ETOOMANYREFS;
+ gf_errno_to_error_array[ETOOMANYREFS] = GF_ERROR_CODE_TOOMANYREFS;
+/* ETIMEDOUT 145 / * Connection timed out */
+ gf_error_to_errno_array[GF_ERROR_CODE_TIMEDOUT] = ETIMEDOUT;
+ gf_errno_to_error_array[ETIMEDOUT] = GF_ERROR_CODE_TIMEDOUT;
+
+/* ECONNREFUSED 146 / * Connection refused */
+ gf_error_to_errno_array[GF_ERROR_CODE_CONNREFUSED] = ECONNREFUSED;
+ gf_errno_to_error_array[ECONNREFUSED] = GF_ERROR_CODE_CONNREFUSED;
+/* EHOSTDOWN 147 / * Host is down */
+ gf_error_to_errno_array[GF_ERROR_CODE_HOSTDOWN] = EHOSTDOWN;
+ gf_errno_to_error_array[EHOSTDOWN] = GF_ERROR_CODE_HOSTDOWN;
+/* EHOSTUNREACH 148 / * No route to host */
+ gf_error_to_errno_array[GF_ERROR_CODE_HOSTUNREACH] = EHOSTUNREACH;
+ gf_errno_to_error_array[EHOSTUNREACH] = GF_ERROR_CODE_HOSTUNREACH;
+/* EALREADY 149 / * operation already in progress */
+ gf_error_to_errno_array[GF_ERROR_CODE_ALREADY] = EALREADY;
+ gf_errno_to_error_array[EALREADY] = GF_ERROR_CODE_ALREADY;
+/* EINPROGRESS 150 / * operation now in progress */
+ gf_error_to_errno_array[GF_ERROR_CODE_INPROGRESS] = EINPROGRESS;
+ gf_errno_to_error_array[EINPROGRESS] = GF_ERROR_CODE_INPROGRESS;
/* SUN Network File System */
-/* ESTALE 151 / * Stale NFS file handle */
- gf_error_to_errno_array[GF_ERROR_CODE_STALE] = ESTALE;
- gf_errno_to_error_array[ESTALE] = GF_ERROR_CODE_STALE;
+/* ESTALE 151 / * Stale NFS file handle */
+ gf_error_to_errno_array[GF_ERROR_CODE_STALE] = ESTALE;
+ gf_errno_to_error_array[ESTALE] = GF_ERROR_CODE_STALE;
- return ;
+ return ;
}
#endif /* GF_SOLARIS_HOST_OS */
#ifdef GF_DARWIN_HOST_OS
-static void
+static void
init_compat_errno_arrays ()
{
- /* EDEADLK 11 / * Resource deadlock would occur */
- gf_error_to_errno_array[GF_ERROR_CODE_DEADLK] = EDEADLK;
- gf_errno_to_error_array[EDEADLK] = GF_ERROR_CODE_DEADLK;
-
- /* EAGAIN 35 / * Try Again */
- gf_error_to_errno_array[GF_ERROR_CODE_AGAIN] = EAGAIN;
- gf_errno_to_error_array[EAGAIN] = GF_ERROR_CODE_AGAIN;
-
- /* EINPROGRESS 36 / * Operation now in progress */
- gf_error_to_errno_array[GF_ERROR_CODE_INPROGRESS] = EINPROGRESS;
- gf_errno_to_error_array[EINPROGRESS] = GF_ERROR_CODE_INPROGRESS;
-
- /* EALREADY 37 / * Operation already in progress */
- gf_error_to_errno_array[GF_ERROR_CODE_ALREADY] = EALREADY;
- gf_errno_to_error_array[EALREADY] = GF_ERROR_CODE_ALREADY;
-
- /* ENOTSOCK 38 / * Socket operation on non-socket */
- gf_error_to_errno_array[GF_ERROR_CODE_NOTSOCK] = ENOTSOCK;
- gf_errno_to_error_array[ENOTSOCK] = GF_ERROR_CODE_NOTSOCK;
-
- /* EDESTADDRREQ 39 / * Destination address required */
- gf_error_to_errno_array[GF_ERROR_CODE_DESTADDRREQ] = EDESTADDRREQ;
- gf_errno_to_error_array[EDESTADDRREQ] = GF_ERROR_CODE_DESTADDRREQ;
-
- /* EMSGSIZE 40 / * Message too long */
- gf_error_to_errno_array[GF_ERROR_CODE_MSGSIZE] = EMSGSIZE;
- gf_errno_to_error_array[EMSGSIZE] = GF_ERROR_CODE_MSGSIZE;
-
- /* EPROTOTYPE 41 / * Protocol wrong type for socket */
- gf_error_to_errno_array[GF_ERROR_CODE_PROTOTYPE] = EPROTOTYPE;
- gf_errno_to_error_array[EPROTOTYPE] = GF_ERROR_CODE_PROTOTYPE;
-
- /* ENOPROTOOPT 42 / * Protocol not available */
- gf_error_to_errno_array[GF_ERROR_CODE_NOPROTOOPT] = ENOPROTOOPT;
- gf_errno_to_error_array[ENOPROTOOPT] = GF_ERROR_CODE_NOPROTOOPT;
-
- /* EPROTONOSUPPORT 43 / * Protocol not supported */
- gf_error_to_errno_array[GF_ERROR_CODE_PROTONOSUPPORT] = EPROTONOSUPPORT;
- gf_errno_to_error_array[EPROTONOSUPPORT] = GF_ERROR_CODE_PROTONOSUPPORT;
-
- /* ESOCKTNOSUPPORT 44 / * Socket type not supported */
- gf_error_to_errno_array[GF_ERROR_CODE_SOCKTNOSUPPORT] = ESOCKTNOSUPPORT;
- gf_errno_to_error_array[ESOCKTNOSUPPORT] = GF_ERROR_CODE_SOCKTNOSUPPORT;
-
- /* EOPNOTSUPP 45 / * Operation not supported */
- gf_error_to_errno_array[GF_ERROR_CODE_OPNOTSUPP] = EOPNOTSUPP;
- gf_errno_to_error_array[EOPNOTSUPP] = GF_ERROR_CODE_OPNOTSUPP;
-
- /* EPFNOSUPPORT 46 / * Protocol family not supported */
- gf_error_to_errno_array[GF_ERROR_CODE_PFNOSUPPORT] = EPFNOSUPPORT;
- gf_errno_to_error_array[EPFNOSUPPORT] = GF_ERROR_CODE_PFNOSUPPORT;
-
- /* EAFNOSUPPORT 47 / * Address family not supported by protocol family */
- gf_error_to_errno_array[GF_ERROR_CODE_AFNOSUPPORT] = EAFNOSUPPORT;
- gf_errno_to_error_array[EAFNOSUPPORT] = GF_ERROR_CODE_AFNOSUPPORT;
-
- /* EADDRINUSE 48 / * Address already in use */
- gf_error_to_errno_array[GF_ERROR_CODE_ADDRINUSE] = EADDRINUSE;
- gf_errno_to_error_array[EADDRINUSE] = GF_ERROR_CODE_ADDRINUSE;
-
- /* EADDRNOTAVAIL 49 / * Can't assign requested address */
- gf_error_to_errno_array[GF_ERROR_CODE_ADDRNOTAVAIL] = EADDRNOTAVAIL;
- gf_errno_to_error_array[EADDRNOTAVAIL] = GF_ERROR_CODE_ADDRNOTAVAIL;
-
- /* ENETDOWN 50 / * Network is down */
- gf_error_to_errno_array[GF_ERROR_CODE_NETDOWN] = ENETDOWN;
- gf_errno_to_error_array[ENETDOWN] = GF_ERROR_CODE_NETDOWN;
-
- /* ENETUNREACH 51 / * Network is unreachable */
- gf_error_to_errno_array[GF_ERROR_CODE_NETUNREACH] = ENETUNREACH;
- gf_errno_to_error_array[ENETUNREACH] = GF_ERROR_CODE_NETUNREACH;
-
- /* ENETRESET 52 / * Network dropped connection on reset */
- gf_error_to_errno_array[GF_ERROR_CODE_NETRESET] = ENETRESET;
- gf_errno_to_error_array[ENETRESET] = GF_ERROR_CODE_NETRESET;
-
- /* ECONNABORTED 53 / * Software caused connection abort */
- gf_error_to_errno_array[GF_ERROR_CODE_CONNABORTED] = ECONNABORTED;
- gf_errno_to_error_array[ECONNABORTED] = GF_ERROR_CODE_CONNABORTED;
-
- /* ECONNRESET 54 / * Connection reset by peer */
- gf_error_to_errno_array[GF_ERROR_CODE_CONNRESET] = ECONNRESET;
- gf_errno_to_error_array[ECONNRESET] = GF_ERROR_CODE_CONNRESET;
-
- /* ENOBUFS 55 / * No buffer space available */
- gf_error_to_errno_array[GF_ERROR_CODE_NOBUFS] = ENOBUFS;
- gf_errno_to_error_array[ENOBUFS] = GF_ERROR_CODE_NOBUFS;
-
- /* EISCONN 56 / * Socket is already connected */
- gf_error_to_errno_array[GF_ERROR_CODE_ISCONN] = EISCONN;
- gf_errno_to_error_array[EISCONN] = GF_ERROR_CODE_ISCONN;
-
- /* ENOTCONN 57 / * Socket is not connected */
- gf_error_to_errno_array[GF_ERROR_CODE_NOTCONN] = ENOTCONN;
- gf_errno_to_error_array[ENOTCONN] = GF_ERROR_CODE_NOTCONN;
-
- /* ESHUTDOWN 58 / * Can't send after socket shutdown */
- gf_error_to_errno_array[GF_ERROR_CODE_SHUTDOWN] = ESHUTDOWN;
- gf_errno_to_error_array[ESHUTDOWN] = GF_ERROR_CODE_SHUTDOWN;
-
- /* ETOOMANYREFS 59 / * Too many references: can't splice */
- gf_error_to_errno_array[GF_ERROR_CODE_TOOMANYREFS] = ETOOMANYREFS;
- gf_errno_to_error_array[ETOOMANYREFS] = GF_ERROR_CODE_TOOMANYREFS;
-
- /* ETIMEDOUT 60 / * Operation timed out */
- gf_error_to_errno_array[GF_ERROR_CODE_TIMEDOUT] = ETIMEDOUT;
- gf_errno_to_error_array[ETIMEDOUT] = GF_ERROR_CODE_TIMEDOUT;
-
- /* ECONNREFUSED 61 / * Connection refused */
- gf_error_to_errno_array[GF_ERROR_CODE_CONNREFUSED] = ECONNREFUSED;
- gf_errno_to_error_array[ECONNREFUSED] = GF_ERROR_CODE_CONNREFUSED;
-
- /* ELOOP 62 / * Too many levels of symbolic links */
- gf_error_to_errno_array[GF_ERROR_CODE_LOOP] = ELOOP;
- gf_errno_to_error_array[ELOOP] = GF_ERROR_CODE_LOOP;
-
- /* ENAMETOOLONG 63 / * File name too long */
- gf_error_to_errno_array[GF_ERROR_CODE_NAMETOOLONG] = ENAMETOOLONG;
- gf_errno_to_error_array[ENAMETOOLONG] = GF_ERROR_CODE_NAMETOOLONG;
-
- /* EHOSTDOWN 64 / * Host is down */
- gf_error_to_errno_array[GF_ERROR_CODE_HOSTDOWN] = EHOSTDOWN;
- gf_errno_to_error_array[EHOSTDOWN] = GF_ERROR_CODE_HOSTDOWN;
-
- /* EHOSTUNREACH 65 / * No route to host */
- gf_error_to_errno_array[GF_ERROR_CODE_HOSTUNREACH] = EHOSTUNREACH;
- gf_errno_to_error_array[EHOSTUNREACH] = GF_ERROR_CODE_HOSTUNREACH;
-
- /* ENOTEMPTY 66 / * Directory not empty */
- gf_error_to_errno_array[GF_ERROR_CODE_NOTEMPTY] = ENOTEMPTY;
- gf_errno_to_error_array[ENOTEMPTY] = GF_ERROR_CODE_NOTEMPTY;
-
- /* EPROCLIM 67 / * Too many processes */
- gf_error_to_errno_array[GF_ERROR_CODE_PROCLIM] = EPROCLIM;
- gf_errno_to_error_array[EPROCLIM] = GF_ERROR_CODE_PROCLIM;
-
- /* EUSERS 68 / * Too many users */
- gf_error_to_errno_array[GF_ERROR_CODE_USERS] = EUSERS;
- gf_errno_to_error_array[EUSERS] = GF_ERROR_CODE_USERS;
-
- /* EDQUOT 69 / * Disc quota exceeded */
- gf_error_to_errno_array[GF_ERROR_CODE_DQUOT] = EDQUOT;
- gf_errno_to_error_array[EDQUOT] = GF_ERROR_CODE_DQUOT;
-
- /* ESTALE 70 / * Stale NFS file handle */
- gf_error_to_errno_array[GF_ERROR_CODE_STALE] = ESTALE;
- gf_errno_to_error_array[ESTALE] = GF_ERROR_CODE_STALE;
-
- /* EREMOTE 71 / * Too many levels of remote in path */
- gf_error_to_errno_array[GF_ERROR_CODE_REMOTE] = EREMOTE;
- gf_errno_to_error_array[EREMOTE] = GF_ERROR_CODE_REMOTE;
-
- /* EBADRPC 72 / * RPC struct is bad */
- gf_error_to_errno_array[GF_ERROR_CODE_BADRPC] = EBADRPC;
- gf_errno_to_error_array[EBADRPC] = GF_ERROR_CODE_BADRPC;
-
- /* ERPCMISMATCH 73 / * RPC version wrong */
- gf_error_to_errno_array[GF_ERROR_CODE_RPCMISMATCH] = ERPCMISMATCH;
- gf_errno_to_error_array[ERPCMISMATCH] = GF_ERROR_CODE_RPCMISMATCH;
-
- /* EPROGUNAVAIL 74 / * RPC prog. not avail */
- gf_error_to_errno_array[GF_ERROR_CODE_PROGUNAVAIL] = EPROGUNAVAIL;
- gf_errno_to_error_array[EPROGUNAVAIL] = GF_ERROR_CODE_PROGUNAVAIL;
-
- /* EPROGMISMATCH 75 / * Program version wrong */
- gf_error_to_errno_array[GF_ERROR_CODE_PROGMISMATCH] = EPROGMISMATCH;
- gf_errno_to_error_array[EPROGMISMATCH] = GF_ERROR_CODE_PROGMISMATCH;
-
- /* EPROCUNAVAIL 76 / * Bad procedure for program */
- gf_error_to_errno_array[GF_ERROR_CODE_PROCUNAVAIL] = EPROCUNAVAIL;
- gf_errno_to_error_array[EPROCUNAVAIL] = GF_ERROR_CODE_PROCUNAVAIL;
-
- /* ENOLCK 77 / * No locks available */
- gf_error_to_errno_array[GF_ERROR_CODE_NOLCK] = ENOLCK;
- gf_errno_to_error_array[ENOLCK] = GF_ERROR_CODE_NOLCK;
-
- /* ENOSYS 78 / * Function not implemented */
- gf_error_to_errno_array[GF_ERROR_CODE_NOSYS] = ENOSYS;
- gf_errno_to_error_array[ENOSYS] = GF_ERROR_CODE_NOSYS;
-
- /* EFTYPE 79 / * Inappropriate file type or format */
- gf_error_to_errno_array[GF_ERROR_CODE_FTYPE] = EFTYPE;
- gf_errno_to_error_array[EFTYPE] = GF_ERROR_CODE_FTYPE;
-
- /* EAUTH 80 / * Authentication error */
- gf_error_to_errno_array[GF_ERROR_CODE_AUTH] = EAUTH;
- gf_errno_to_error_array[EAUTH] = GF_ERROR_CODE_AUTH;
+ /* EDEADLK 11 / * Resource deadlock would occur */
+ gf_error_to_errno_array[GF_ERROR_CODE_DEADLK] = EDEADLK;
+ gf_errno_to_error_array[EDEADLK] = GF_ERROR_CODE_DEADLK;
+
+ /* EAGAIN 35 / * Try Again */
+ gf_error_to_errno_array[GF_ERROR_CODE_AGAIN] = EAGAIN;
+ gf_errno_to_error_array[EAGAIN] = GF_ERROR_CODE_AGAIN;
+
+ /* EINPROGRESS 36 / * Operation now in progress */
+ gf_error_to_errno_array[GF_ERROR_CODE_INPROGRESS] = EINPROGRESS;
+ gf_errno_to_error_array[EINPROGRESS] = GF_ERROR_CODE_INPROGRESS;
+
+ /* EALREADY 37 / * Operation already in progress */
+ gf_error_to_errno_array[GF_ERROR_CODE_ALREADY] = EALREADY;
+ gf_errno_to_error_array[EALREADY] = GF_ERROR_CODE_ALREADY;
+
+ /* ENOTSOCK 38 / * Socket operation on non-socket */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOTSOCK] = ENOTSOCK;
+ gf_errno_to_error_array[ENOTSOCK] = GF_ERROR_CODE_NOTSOCK;
+
+ /* EDESTADDRREQ 39 / * Destination address required */
+ gf_error_to_errno_array[GF_ERROR_CODE_DESTADDRREQ] = EDESTADDRREQ;
+ gf_errno_to_error_array[EDESTADDRREQ] = GF_ERROR_CODE_DESTADDRREQ;
+
+ /* EMSGSIZE 40 / * Message too long */
+ gf_error_to_errno_array[GF_ERROR_CODE_MSGSIZE] = EMSGSIZE;
+ gf_errno_to_error_array[EMSGSIZE] = GF_ERROR_CODE_MSGSIZE;
+
+ /* EPROTOTYPE 41 / * Protocol wrong type for socket */
+ gf_error_to_errno_array[GF_ERROR_CODE_PROTOTYPE] = EPROTOTYPE;
+ gf_errno_to_error_array[EPROTOTYPE] = GF_ERROR_CODE_PROTOTYPE;
+
+ /* ENOPROTOOPT 42 / * Protocol not available */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOPROTOOPT] = ENOPROTOOPT;
+ gf_errno_to_error_array[ENOPROTOOPT] = GF_ERROR_CODE_NOPROTOOPT;
+
+ /* EPROTONOSUPPORT 43 / * Protocol not supported */
+ gf_error_to_errno_array[GF_ERROR_CODE_PROTONOSUPPORT] = EPROTONOSUPPORT;
+ gf_errno_to_error_array[EPROTONOSUPPORT] = GF_ERROR_CODE_PROTONOSUPPORT;
+
+ /* ESOCKTNOSUPPORT 44 / * Socket type not supported */
+ gf_error_to_errno_array[GF_ERROR_CODE_SOCKTNOSUPPORT] = ESOCKTNOSUPPORT;
+ gf_errno_to_error_array[ESOCKTNOSUPPORT] = GF_ERROR_CODE_SOCKTNOSUPPORT;
+
+ /* EOPNOTSUPP 45 / * Operation not supported */
+ gf_error_to_errno_array[GF_ERROR_CODE_OPNOTSUPP] = EOPNOTSUPP;
+ gf_errno_to_error_array[EOPNOTSUPP] = GF_ERROR_CODE_OPNOTSUPP;
+
+ /* EPFNOSUPPORT 46 / * Protocol family not supported */
+ gf_error_to_errno_array[GF_ERROR_CODE_PFNOSUPPORT] = EPFNOSUPPORT;
+ gf_errno_to_error_array[EPFNOSUPPORT] = GF_ERROR_CODE_PFNOSUPPORT;
+
+ /* EAFNOSUPPORT 47 / * Address family not supported by protocol family */
+ gf_error_to_errno_array[GF_ERROR_CODE_AFNOSUPPORT] = EAFNOSUPPORT;
+ gf_errno_to_error_array[EAFNOSUPPORT] = GF_ERROR_CODE_AFNOSUPPORT;
+
+ /* EADDRINUSE 48 / * Address already in use */
+ gf_error_to_errno_array[GF_ERROR_CODE_ADDRINUSE] = EADDRINUSE;
+ gf_errno_to_error_array[EADDRINUSE] = GF_ERROR_CODE_ADDRINUSE;
+
+ /* EADDRNOTAVAIL 49 / * Can't assign requested address */
+ gf_error_to_errno_array[GF_ERROR_CODE_ADDRNOTAVAIL] = EADDRNOTAVAIL;
+ gf_errno_to_error_array[EADDRNOTAVAIL] = GF_ERROR_CODE_ADDRNOTAVAIL;
+
+ /* ENETDOWN 50 / * Network is down */
+ gf_error_to_errno_array[GF_ERROR_CODE_NETDOWN] = ENETDOWN;
+ gf_errno_to_error_array[ENETDOWN] = GF_ERROR_CODE_NETDOWN;
+
+ /* ENETUNREACH 51 / * Network is unreachable */
+ gf_error_to_errno_array[GF_ERROR_CODE_NETUNREACH] = ENETUNREACH;
+ gf_errno_to_error_array[ENETUNREACH] = GF_ERROR_CODE_NETUNREACH;
+
+ /* ENETRESET 52 / * Network dropped connection on reset */
+ gf_error_to_errno_array[GF_ERROR_CODE_NETRESET] = ENETRESET;
+ gf_errno_to_error_array[ENETRESET] = GF_ERROR_CODE_NETRESET;
+
+ /* ECONNABORTED 53 / * Software caused connection abort */
+ gf_error_to_errno_array[GF_ERROR_CODE_CONNABORTED] = ECONNABORTED;
+ gf_errno_to_error_array[ECONNABORTED] = GF_ERROR_CODE_CONNABORTED;
+
+ /* ECONNRESET 54 / * Connection reset by peer */
+ gf_error_to_errno_array[GF_ERROR_CODE_CONNRESET] = ECONNRESET;
+ gf_errno_to_error_array[ECONNRESET] = GF_ERROR_CODE_CONNRESET;
+
+ /* ENOBUFS 55 / * No buffer space available */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOBUFS] = ENOBUFS;
+ gf_errno_to_error_array[ENOBUFS] = GF_ERROR_CODE_NOBUFS;
+
+ /* EISCONN 56 / * Socket is already connected */
+ gf_error_to_errno_array[GF_ERROR_CODE_ISCONN] = EISCONN;
+ gf_errno_to_error_array[EISCONN] = GF_ERROR_CODE_ISCONN;
+
+ /* ENOTCONN 57 / * Socket is not connected */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOTCONN] = ENOTCONN;
+ gf_errno_to_error_array[ENOTCONN] = GF_ERROR_CODE_NOTCONN;
+
+ /* ESHUTDOWN 58 / * Can't send after socket shutdown */
+ gf_error_to_errno_array[GF_ERROR_CODE_SHUTDOWN] = ESHUTDOWN;
+ gf_errno_to_error_array[ESHUTDOWN] = GF_ERROR_CODE_SHUTDOWN;
+
+ /* ETOOMANYREFS 59 / * Too many references: can't splice */
+ gf_error_to_errno_array[GF_ERROR_CODE_TOOMANYREFS] = ETOOMANYREFS;
+ gf_errno_to_error_array[ETOOMANYREFS] = GF_ERROR_CODE_TOOMANYREFS;
+
+ /* ETIMEDOUT 60 / * Operation timed out */
+ gf_error_to_errno_array[GF_ERROR_CODE_TIMEDOUT] = ETIMEDOUT;
+ gf_errno_to_error_array[ETIMEDOUT] = GF_ERROR_CODE_TIMEDOUT;
+
+ /* ECONNREFUSED 61 / * Connection refused */
+ gf_error_to_errno_array[GF_ERROR_CODE_CONNREFUSED] = ECONNREFUSED;
+ gf_errno_to_error_array[ECONNREFUSED] = GF_ERROR_CODE_CONNREFUSED;
+
+ /* ELOOP 62 / * Too many levels of symbolic links */
+ gf_error_to_errno_array[GF_ERROR_CODE_LOOP] = ELOOP;
+ gf_errno_to_error_array[ELOOP] = GF_ERROR_CODE_LOOP;
+
+ /* ENAMETOOLONG 63 / * File name too long */
+ gf_error_to_errno_array[GF_ERROR_CODE_NAMETOOLONG] = ENAMETOOLONG;
+ gf_errno_to_error_array[ENAMETOOLONG] = GF_ERROR_CODE_NAMETOOLONG;
+
+ /* EHOSTDOWN 64 / * Host is down */
+ gf_error_to_errno_array[GF_ERROR_CODE_HOSTDOWN] = EHOSTDOWN;
+ gf_errno_to_error_array[EHOSTDOWN] = GF_ERROR_CODE_HOSTDOWN;
+
+ /* EHOSTUNREACH 65 / * No route to host */
+ gf_error_to_errno_array[GF_ERROR_CODE_HOSTUNREACH] = EHOSTUNREACH;
+ gf_errno_to_error_array[EHOSTUNREACH] = GF_ERROR_CODE_HOSTUNREACH;
+
+ /* ENOTEMPTY 66 / * Directory not empty */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOTEMPTY] = ENOTEMPTY;
+ gf_errno_to_error_array[ENOTEMPTY] = GF_ERROR_CODE_NOTEMPTY;
+
+ /* EPROCLIM 67 / * Too many processes */
+ gf_error_to_errno_array[GF_ERROR_CODE_PROCLIM] = EPROCLIM;
+ gf_errno_to_error_array[EPROCLIM] = GF_ERROR_CODE_PROCLIM;
+
+ /* EUSERS 68 / * Too many users */
+ gf_error_to_errno_array[GF_ERROR_CODE_USERS] = EUSERS;
+ gf_errno_to_error_array[EUSERS] = GF_ERROR_CODE_USERS;
+
+ /* EDQUOT 69 / * Disc quota exceeded */
+ gf_error_to_errno_array[GF_ERROR_CODE_DQUOT] = EDQUOT;
+ gf_errno_to_error_array[EDQUOT] = GF_ERROR_CODE_DQUOT;
+
+ /* ESTALE 70 / * Stale NFS file handle */
+ gf_error_to_errno_array[GF_ERROR_CODE_STALE] = ESTALE;
+ gf_errno_to_error_array[ESTALE] = GF_ERROR_CODE_STALE;
+
+ /* EREMOTE 71 / * Too many levels of remote in path */
+ gf_error_to_errno_array[GF_ERROR_CODE_REMOTE] = EREMOTE;
+ gf_errno_to_error_array[EREMOTE] = GF_ERROR_CODE_REMOTE;
+
+ /* EBADRPC 72 / * RPC struct is bad */
+ gf_error_to_errno_array[GF_ERROR_CODE_BADRPC] = EBADRPC;
+ gf_errno_to_error_array[EBADRPC] = GF_ERROR_CODE_BADRPC;
+
+ /* ERPCMISMATCH 73 / * RPC version wrong */
+ gf_error_to_errno_array[GF_ERROR_CODE_RPCMISMATCH] = ERPCMISMATCH;
+ gf_errno_to_error_array[ERPCMISMATCH] = GF_ERROR_CODE_RPCMISMATCH;
+
+ /* EPROGUNAVAIL 74 / * RPC prog. not avail */
+ gf_error_to_errno_array[GF_ERROR_CODE_PROGUNAVAIL] = EPROGUNAVAIL;
+ gf_errno_to_error_array[EPROGUNAVAIL] = GF_ERROR_CODE_PROGUNAVAIL;
+
+ /* EPROGMISMATCH 75 / * Program version wrong */
+ gf_error_to_errno_array[GF_ERROR_CODE_PROGMISMATCH] = EPROGMISMATCH;
+ gf_errno_to_error_array[EPROGMISMATCH] = GF_ERROR_CODE_PROGMISMATCH;
+
+ /* EPROCUNAVAIL 76 / * Bad procedure for program */
+ gf_error_to_errno_array[GF_ERROR_CODE_PROCUNAVAIL] = EPROCUNAVAIL;
+ gf_errno_to_error_array[EPROCUNAVAIL] = GF_ERROR_CODE_PROCUNAVAIL;
+
+ /* ENOLCK 77 / * No locks available */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOLCK] = ENOLCK;
+ gf_errno_to_error_array[ENOLCK] = GF_ERROR_CODE_NOLCK;
- /* ENEEDAUTH 81 / * Need authenticator */
- gf_error_to_errno_array[GF_ERROR_CODE_NEEDAUTH] = ENEEDAUTH;
- gf_errno_to_error_array[ENEEDAUTH] = GF_ERROR_CODE_NEEDAUTH;
+ /* ENOSYS 78 / * Function not implemented */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOSYS] = ENOSYS;
+ gf_errno_to_error_array[ENOSYS] = GF_ERROR_CODE_NOSYS;
+
+ /* EFTYPE 79 / * Inappropriate file type or format */
+ gf_error_to_errno_array[GF_ERROR_CODE_FTYPE] = EFTYPE;
+ gf_errno_to_error_array[EFTYPE] = GF_ERROR_CODE_FTYPE;
+
+ /* EAUTH 80 / * Authentication error */
+ gf_error_to_errno_array[GF_ERROR_CODE_AUTH] = EAUTH;
+ gf_errno_to_error_array[EAUTH] = GF_ERROR_CODE_AUTH;
+
+ /* ENEEDAUTH 81 / * Need authenticator */
+ gf_error_to_errno_array[GF_ERROR_CODE_NEEDAUTH] = ENEEDAUTH;
+ gf_errno_to_error_array[ENEEDAUTH] = GF_ERROR_CODE_NEEDAUTH;
/* Intelligent device errors */
-/* EPWROFF 82 / * Device power is off */
- gf_error_to_errno_array[GF_ERROR_CODE_PWROFF] = EPWROFF;
- gf_errno_to_error_array[EPWROFF] = GF_ERROR_CODE_PWROFF;
-/* EDEVERR 83 / * Device error, e.g. paper out */
- gf_error_to_errno_array[GF_ERROR_CODE_DEVERR] = EDEVERR;
- gf_errno_to_error_array[EDEVERR] = GF_ERROR_CODE_DEVERR;
-
- /* EOVERFLOW 84 / * Value too large to be stored in data type */
- gf_error_to_errno_array[GF_ERROR_CODE_OVERFLOW] = EOVERFLOW;
- gf_errno_to_error_array[EOVERFLOW] = GF_ERROR_CODE_OVERFLOW;
+/* EPWROFF 82 / * Device power is off */
+ gf_error_to_errno_array[GF_ERROR_CODE_PWROFF] = EPWROFF;
+ gf_errno_to_error_array[EPWROFF] = GF_ERROR_CODE_PWROFF;
+/* EDEVERR 83 / * Device error, e.g. paper out */
+ gf_error_to_errno_array[GF_ERROR_CODE_DEVERR] = EDEVERR;
+ gf_errno_to_error_array[EDEVERR] = GF_ERROR_CODE_DEVERR;
+
+ /* EOVERFLOW 84 / * Value too large to be stored in data type */
+ gf_error_to_errno_array[GF_ERROR_CODE_OVERFLOW] = EOVERFLOW;
+ gf_errno_to_error_array[EOVERFLOW] = GF_ERROR_CODE_OVERFLOW;
/* Program loading errors */
-/* EBADEXEC 85 / * Bad executable */
- gf_error_to_errno_array[GF_ERROR_CODE_BADEXEC] = EBADEXEC;
- gf_errno_to_error_array[EBADEXEC] = GF_ERROR_CODE_BADEXEC;
-
-/* EBADARCH 86 / * Bad CPU type in executable */
- gf_error_to_errno_array[GF_ERROR_CODE_BADARCH] = EBADARCH;
- gf_errno_to_error_array[EBADARCH] = GF_ERROR_CODE_BADARCH;
-
-/* ESHLIBVERS 87 / * Shared library version mismatch */
- gf_error_to_errno_array[GF_ERROR_CODE_SHLIBVERS] = ESHLIBVERS;
- gf_errno_to_error_array[ESHLIBVERS] = GF_ERROR_CODE_SHLIBVERS;
-
-/* EBADMACHO 88 / * Malformed Macho file */
- gf_error_to_errno_array[GF_ERROR_CODE_BADMACHO] = EBADMACHO;
- gf_errno_to_error_array[EBADMACHO] = GF_ERROR_CODE_BADMACHO;
-
-#if 0
- /* EDOOFUS 88 / * Programming error */
- gf_error_to_errno_array[GF_ERROR_CODE_DOOFUS] = EDOOFUS;
- gf_errno_to_error_array[EDOOFUS] = GF_ERROR_CODE_DOOFUS;
+/* EBADEXEC 85 / * Bad executable */
+ gf_error_to_errno_array[GF_ERROR_CODE_BADEXEC] = EBADEXEC;
+ gf_errno_to_error_array[EBADEXEC] = GF_ERROR_CODE_BADEXEC;
+
+/* EBADARCH 86 / * Bad CPU type in executable */
+ gf_error_to_errno_array[GF_ERROR_CODE_BADARCH] = EBADARCH;
+ gf_errno_to_error_array[EBADARCH] = GF_ERROR_CODE_BADARCH;
+
+/* ESHLIBVERS 87 / * Shared library version mismatch */
+ gf_error_to_errno_array[GF_ERROR_CODE_SHLIBVERS] = ESHLIBVERS;
+ gf_errno_to_error_array[ESHLIBVERS] = GF_ERROR_CODE_SHLIBVERS;
+
+/* EBADMACHO 88 / * Malformed Macho file */
+ gf_error_to_errno_array[GF_ERROR_CODE_BADMACHO] = EBADMACHO;
+ gf_errno_to_error_array[EBADMACHO] = GF_ERROR_CODE_BADMACHO;
+
+#ifdef EDOOFUS
+ /* EDOOFUS 88 / * Programming error */
+ gf_error_to_errno_array[GF_ERROR_CODE_DOOFUS] = EDOOFUS;
+ gf_errno_to_error_array[EDOOFUS] = GF_ERROR_CODE_DOOFUS;
#endif
- /* ECANCELED 89 / * Operation canceled */
- gf_error_to_errno_array[GF_ERROR_CODE_CANCELED] = ECANCELED;
- gf_errno_to_error_array[ECANCELED] = GF_ERROR_CODE_CANCELED;
+ /* ECANCELED 89 / * Operation canceled */
+ gf_error_to_errno_array[GF_ERROR_CODE_CANCELED] = ECANCELED;
+ gf_errno_to_error_array[ECANCELED] = GF_ERROR_CODE_CANCELED;
- /* EIDRM 90 / * Identifier removed */
- gf_error_to_errno_array[GF_ERROR_CODE_IDRM] = EIDRM;
- gf_errno_to_error_array[EIDRM] = GF_ERROR_CODE_IDRM;
- /* ENOMSG 91 / * No message of desired type */
- gf_error_to_errno_array[GF_ERROR_CODE_NOMSG] = ENOMSG;
- gf_errno_to_error_array[ENOMSG] = GF_ERROR_CODE_NOMSG;
+ /* EIDRM 90 / * Identifier removed */
+ gf_error_to_errno_array[GF_ERROR_CODE_IDRM] = EIDRM;
+ gf_errno_to_error_array[EIDRM] = GF_ERROR_CODE_IDRM;
+ /* ENOMSG 91 / * No message of desired type */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOMSG] = ENOMSG;
+ gf_errno_to_error_array[ENOMSG] = GF_ERROR_CODE_NOMSG;
- /* EILSEQ 92 / * Illegal byte sequence */
- gf_error_to_errno_array[GF_ERROR_CODE_ILSEQ] = EILSEQ;
- gf_errno_to_error_array[EILSEQ] = GF_ERROR_CODE_ILSEQ;
+ /* EILSEQ 92 / * Illegal byte sequence */
+ gf_error_to_errno_array[GF_ERROR_CODE_ILSEQ] = EILSEQ;
+ gf_errno_to_error_array[EILSEQ] = GF_ERROR_CODE_ILSEQ;
- /* ENOATTR 93 / * Attribute not found */
- gf_error_to_errno_array[GF_ERROR_CODE_NOATTR] = ENOATTR;
- gf_errno_to_error_array[ENOATTR] = GF_ERROR_CODE_NOATTR;
+ /* ENOATTR 93 / * Attribute not found */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOATTR] = ENOATTR;
+ gf_errno_to_error_array[ENOATTR] = GF_ERROR_CODE_NOATTR;
- /* EBADMSG 94 / * Bad message */
- gf_error_to_errno_array[GF_ERROR_CODE_BADMSG] = EBADMSG;
- gf_errno_to_error_array[EBADMSG] = GF_ERROR_CODE_BADMSG;
+ /* EBADMSG 94 / * Bad message */
+ gf_error_to_errno_array[GF_ERROR_CODE_BADMSG] = EBADMSG;
+ gf_errno_to_error_array[EBADMSG] = GF_ERROR_CODE_BADMSG;
- /* EMULTIHOP 95 / * Reserved */
- gf_error_to_errno_array[GF_ERROR_CODE_MULTIHOP] = EMULTIHOP;
- gf_errno_to_error_array[EMULTIHOP] = GF_ERROR_CODE_MULTIHOP;
+ /* EMULTIHOP 95 / * Reserved */
+ gf_error_to_errno_array[GF_ERROR_CODE_MULTIHOP] = EMULTIHOP;
+ gf_errno_to_error_array[EMULTIHOP] = GF_ERROR_CODE_MULTIHOP;
- /* ENODATA 96 / * No message available on STREAM */
- gf_error_to_errno_array[GF_ERROR_CODE_NEEDAUTH] = ENEEDAUTH;
- gf_errno_to_error_array[ENEEDAUTH] = GF_ERROR_CODE_NEEDAUTH;
+ /* ENODATA 96 / * No message available on STREAM */
+ gf_error_to_errno_array[GF_ERROR_CODE_NEEDAUTH] = ENEEDAUTH;
+ gf_errno_to_error_array[ENEEDAUTH] = GF_ERROR_CODE_NEEDAUTH;
- /* ENOLINK 97 / * Reserved */
- gf_error_to_errno_array[GF_ERROR_CODE_NOLINK] = ENOLINK;
- gf_errno_to_error_array[ENOLINK] = GF_ERROR_CODE_NOLINK;
+ /* ENOLINK 97 / * Reserved */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOLINK] = ENOLINK;
+ gf_errno_to_error_array[ENOLINK] = GF_ERROR_CODE_NOLINK;
- /* ENOSR 98 / * No STREAM resources */
- gf_error_to_errno_array[GF_ERROR_CODE_NOSR] = ENOSR;
- gf_errno_to_error_array[ENOSR] = GF_ERROR_CODE_NOSR;
+ /* ENOSR 98 / * No STREAM resources */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOSR] = ENOSR;
+ gf_errno_to_error_array[ENOSR] = GF_ERROR_CODE_NOSR;
- /* ENOSTR 99 / * Not a STREAM */
- gf_error_to_errno_array[GF_ERROR_CODE_NOSTR] = ENOSTR;
- gf_errno_to_error_array[ENOSTR] = GF_ERROR_CODE_NOSTR;
+ /* ENOSTR 99 / * Not a STREAM */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOSTR] = ENOSTR;
+ gf_errno_to_error_array[ENOSTR] = GF_ERROR_CODE_NOSTR;
-/* EPROTO 100 / * Protocol error */
- gf_error_to_errno_array[GF_ERROR_CODE_PROTO] = EPROTO;
- gf_errno_to_error_array[EPROTO] = GF_ERROR_CODE_PROTO;
-/* ETIME 101 / * STREAM ioctl timeout */
- gf_error_to_errno_array[GF_ERROR_CODE_TIME] = ETIME;
- gf_errno_to_error_array[ETIME] = GF_ERROR_CODE_TIME;
+/* EPROTO 100 / * Protocol error */
+ gf_error_to_errno_array[GF_ERROR_CODE_PROTO] = EPROTO;
+ gf_errno_to_error_array[EPROTO] = GF_ERROR_CODE_PROTO;
+/* ETIME 101 / * STREAM ioctl timeout */
+ gf_error_to_errno_array[GF_ERROR_CODE_TIME] = ETIME;
+ gf_errno_to_error_array[ETIME] = GF_ERROR_CODE_TIME;
/* This value is only discrete when compiling __DARWIN_UNIX03, or KERNEL */
-/* EOPNOTSUPP 102 / * Operation not supported on socket */
- gf_error_to_errno_array[GF_ERROR_CODE_OPNOTSUPP] = EOPNOTSUPP;
- gf_errno_to_error_array[EOPNOTSUPP] = GF_ERROR_CODE_OPNOTSUPP;
+/* EOPNOTSUPP 102 / * Operation not supported on socket */
+ gf_error_to_errno_array[GF_ERROR_CODE_OPNOTSUPP] = EOPNOTSUPP;
+ gf_errno_to_error_array[EOPNOTSUPP] = GF_ERROR_CODE_OPNOTSUPP;
-/* ENOPOLICY 103 / * No such policy registered */
- gf_error_to_errno_array[GF_ERROR_CODE_NOPOLICY] = ENOPOLICY;
- gf_errno_to_error_array[ENOPOLICY] = GF_ERROR_CODE_NOPOLICY;
+/* ENOPOLICY 103 / * No such policy registered */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOPOLICY] = ENOPOLICY;
+ gf_errno_to_error_array[ENOPOLICY] = GF_ERROR_CODE_NOPOLICY;
- return ;
+ return ;
}
#endif /* GF_DARWIN_HOST_OS */
#ifdef GF_BSD_HOST_OS
-static void
+static void
init_compat_errno_arrays ()
{
- /* Quite a bit of things changed in FreeBSD - current */
-
- /* EAGAIN 35 / * Try Again */
- gf_error_to_errno_array[GF_ERROR_CODE_AGAIN] = EAGAIN;
- gf_errno_to_error_array[EAGAIN] = GF_ERROR_CODE_AGAIN;
-
- /* EDEADLK 11 / * Resource deadlock would occur */
- gf_error_to_errno_array[GF_ERROR_CODE_DEADLK] = EDEADLK;
- gf_errno_to_error_array[EDEADLK] = GF_ERROR_CODE_DEADLK;
-
- /* EINPROGRESS 36 / * Operation now in progress */
- gf_error_to_errno_array[GF_ERROR_CODE_INPROGRESS] = EINPROGRESS;
- gf_errno_to_error_array[EINPROGRESS] = GF_ERROR_CODE_INPROGRESS;
-
- /* EALREADY 37 / * Operation already in progress */
- gf_error_to_errno_array[GF_ERROR_CODE_ALREADY] = EALREADY;
- gf_errno_to_error_array[EALREADY] = GF_ERROR_CODE_ALREADY;
-
- /* ENOTSOCK 38 / * Socket operation on non-socket */
- gf_error_to_errno_array[GF_ERROR_CODE_NOTSOCK] = ENOTSOCK;
- gf_errno_to_error_array[ENOTSOCK] = GF_ERROR_CODE_NOTSOCK;
-
- /* EDESTADDRREQ 39 / * Destination address required */
- gf_error_to_errno_array[GF_ERROR_CODE_DESTADDRREQ] = EDESTADDRREQ;
- gf_errno_to_error_array[EDESTADDRREQ] = GF_ERROR_CODE_DESTADDRREQ;
-
- /* EMSGSIZE 40 / * Message too long */
- gf_error_to_errno_array[GF_ERROR_CODE_MSGSIZE] = EMSGSIZE;
- gf_errno_to_error_array[EMSGSIZE] = GF_ERROR_CODE_MSGSIZE;
-
- /* EPROTOTYPE 41 / * Protocol wrong type for socket */
- gf_error_to_errno_array[GF_ERROR_CODE_PROTOTYPE] = EPROTOTYPE;
- gf_errno_to_error_array[EPROTOTYPE] = GF_ERROR_CODE_PROTOTYPE;
-
- /* ENOPROTOOPT 42 / * Protocol not available */
- gf_error_to_errno_array[GF_ERROR_CODE_NOPROTOOPT] = ENOPROTOOPT;
- gf_errno_to_error_array[ENOPROTOOPT] = GF_ERROR_CODE_NOPROTOOPT;
-
- /* EPROTONOSUPPORT 43 / * Protocol not supported */
- gf_error_to_errno_array[GF_ERROR_CODE_PROTONOSUPPORT] = EPROTONOSUPPORT;
- gf_errno_to_error_array[EPROTONOSUPPORT] = GF_ERROR_CODE_PROTONOSUPPORT;
-
- /* ESOCKTNOSUPPORT 44 / * Socket type not supported */
- gf_error_to_errno_array[GF_ERROR_CODE_SOCKTNOSUPPORT] = ESOCKTNOSUPPORT;
- gf_errno_to_error_array[ESOCKTNOSUPPORT] = GF_ERROR_CODE_SOCKTNOSUPPORT;
-
- /* EOPNOTSUPP 45 / * Operation not supported */
- gf_error_to_errno_array[GF_ERROR_CODE_OPNOTSUPP] = EOPNOTSUPP;
- gf_errno_to_error_array[EOPNOTSUPP] = GF_ERROR_CODE_OPNOTSUPP;
-
- /* EPFNOSUPPORT 46 / * Protocol family not supported */
- gf_error_to_errno_array[GF_ERROR_CODE_PFNOSUPPORT] = EPFNOSUPPORT;
- gf_errno_to_error_array[EPFNOSUPPORT] = GF_ERROR_CODE_PFNOSUPPORT;
-
- /* EAFNOSUPPORT 47 / * Address family not supported by protocol family */
- gf_error_to_errno_array[GF_ERROR_CODE_AFNOSUPPORT] = EAFNOSUPPORT;
- gf_errno_to_error_array[EAFNOSUPPORT] = GF_ERROR_CODE_AFNOSUPPORT;
-
- /* EADDRINUSE 48 / * Address already in use */
- gf_error_to_errno_array[GF_ERROR_CODE_ADDRINUSE] = EADDRINUSE;
- gf_errno_to_error_array[EADDRINUSE] = GF_ERROR_CODE_ADDRINUSE;
-
- /* EADDRNOTAVAIL 49 / * Can't assign requested address */
- gf_error_to_errno_array[GF_ERROR_CODE_ADDRNOTAVAIL] = EADDRNOTAVAIL;
- gf_errno_to_error_array[EADDRNOTAVAIL] = GF_ERROR_CODE_ADDRNOTAVAIL;
-
- /* ENETDOWN 50 / * Network is down */
- gf_error_to_errno_array[GF_ERROR_CODE_NETDOWN] = ENETDOWN;
- gf_errno_to_error_array[ENETDOWN] = GF_ERROR_CODE_NETDOWN;
-
- /* ENETUNREACH 51 / * Network is unreachable */
- gf_error_to_errno_array[GF_ERROR_CODE_NETUNREACH] = ENETUNREACH;
- gf_errno_to_error_array[ENETUNREACH] = GF_ERROR_CODE_NETUNREACH;
-
- /* ENETRESET 52 / * Network dropped connection on reset */
- gf_error_to_errno_array[GF_ERROR_CODE_NETRESET] = ENETRESET;
- gf_errno_to_error_array[ENETRESET] = GF_ERROR_CODE_NETRESET;
-
- /* ECONNABORTED 53 / * Software caused connection abort */
- gf_error_to_errno_array[GF_ERROR_CODE_CONNABORTED] = ECONNABORTED;
- gf_errno_to_error_array[ECONNABORTED] = GF_ERROR_CODE_CONNABORTED;
-
- /* ECONNRESET 54 / * Connection reset by peer */
- gf_error_to_errno_array[GF_ERROR_CODE_CONNRESET] = ECONNRESET;
- gf_errno_to_error_array[ECONNRESET] = GF_ERROR_CODE_CONNRESET;
-
- /* ENOBUFS 55 / * No buffer space available */
- gf_error_to_errno_array[GF_ERROR_CODE_NOBUFS] = ENOBUFS;
- gf_errno_to_error_array[ENOBUFS] = GF_ERROR_CODE_NOBUFS;
-
- /* EISCONN 56 / * Socket is already connected */
- gf_error_to_errno_array[GF_ERROR_CODE_ISCONN] = EISCONN;
- gf_errno_to_error_array[EISCONN] = GF_ERROR_CODE_ISCONN;
-
- /* ENOTCONN 57 / * Socket is not connected */
- gf_error_to_errno_array[GF_ERROR_CODE_NOTCONN] = ENOTCONN;
- gf_errno_to_error_array[ENOTCONN] = GF_ERROR_CODE_NOTCONN;
-
- /* ESHUTDOWN 58 / * Can't send after socket shutdown */
- gf_error_to_errno_array[GF_ERROR_CODE_SHUTDOWN] = ESHUTDOWN;
- gf_errno_to_error_array[ESHUTDOWN] = GF_ERROR_CODE_SHUTDOWN;
-
- /* ETOOMANYREFS 59 / * Too many references: can't splice */
- gf_error_to_errno_array[GF_ERROR_CODE_TOOMANYREFS] = ETOOMANYREFS;
- gf_errno_to_error_array[ETOOMANYREFS] = GF_ERROR_CODE_TOOMANYREFS;
-
- /* ETIMEDOUT 60 / * Operation timed out */
- gf_error_to_errno_array[GF_ERROR_CODE_TIMEDOUT] = ETIMEDOUT;
- gf_errno_to_error_array[ETIMEDOUT] = GF_ERROR_CODE_TIMEDOUT;
-
- /* ECONNREFUSED 61 / * Connection refused */
- gf_error_to_errno_array[GF_ERROR_CODE_CONNREFUSED] = ECONNREFUSED;
- gf_errno_to_error_array[ECONNREFUSED] = GF_ERROR_CODE_CONNREFUSED;
-
- /* ELOOP 62 / * Too many levels of symbolic links */
- gf_error_to_errno_array[GF_ERROR_CODE_LOOP] = ELOOP;
- gf_errno_to_error_array[ELOOP] = GF_ERROR_CODE_LOOP;
-
- /* ENAMETOOLONG 63 / * File name too long */
- gf_error_to_errno_array[GF_ERROR_CODE_NAMETOOLONG] = ENAMETOOLONG;
- gf_errno_to_error_array[ENAMETOOLONG] = GF_ERROR_CODE_NAMETOOLONG;
-
- /* EHOSTDOWN 64 / * Host is down */
- gf_error_to_errno_array[GF_ERROR_CODE_HOSTDOWN] = EHOSTDOWN;
- gf_errno_to_error_array[EHOSTDOWN] = GF_ERROR_CODE_HOSTDOWN;
-
- /* EHOSTUNREACH 65 / * No route to host */
- gf_error_to_errno_array[GF_ERROR_CODE_HOSTUNREACH] = EHOSTUNREACH;
- gf_errno_to_error_array[EHOSTUNREACH] = GF_ERROR_CODE_HOSTUNREACH;
-
- /* ENOTEMPTY 66 / * Directory not empty */
- gf_error_to_errno_array[GF_ERROR_CODE_NOTEMPTY] = ENOTEMPTY;
- gf_errno_to_error_array[ENOTEMPTY] = GF_ERROR_CODE_NOTEMPTY;
-
- /* EPROCLIM 67 / * Too many processes */
- gf_error_to_errno_array[GF_ERROR_CODE_PROCLIM] = EPROCLIM;
- gf_errno_to_error_array[EPROCLIM] = GF_ERROR_CODE_PROCLIM;
-
- /* EUSERS 68 / * Too many users */
- gf_error_to_errno_array[GF_ERROR_CODE_USERS] = EUSERS;
- gf_errno_to_error_array[EUSERS] = GF_ERROR_CODE_USERS;
-
- /* EDQUOT 69 / * Disc quota exceeded */
- gf_error_to_errno_array[GF_ERROR_CODE_DQUOT] = EDQUOT;
- gf_errno_to_error_array[EDQUOT] = GF_ERROR_CODE_DQUOT;
+ /* Quite a bit of things changed in FreeBSD - current */
+
+ /* EAGAIN 35 / * Try Again */
+ gf_error_to_errno_array[GF_ERROR_CODE_AGAIN] = EAGAIN;
+ gf_errno_to_error_array[EAGAIN] = GF_ERROR_CODE_AGAIN;
+
+ /* EDEADLK 11 / * Resource deadlock would occur */
+ gf_error_to_errno_array[GF_ERROR_CODE_DEADLK] = EDEADLK;
+ gf_errno_to_error_array[EDEADLK] = GF_ERROR_CODE_DEADLK;
+
+ /* EINPROGRESS 36 / * Operation now in progress */
+ gf_error_to_errno_array[GF_ERROR_CODE_INPROGRESS] = EINPROGRESS;
+ gf_errno_to_error_array[EINPROGRESS] = GF_ERROR_CODE_INPROGRESS;
+
+ /* EALREADY 37 / * Operation already in progress */
+ gf_error_to_errno_array[GF_ERROR_CODE_ALREADY] = EALREADY;
+ gf_errno_to_error_array[EALREADY] = GF_ERROR_CODE_ALREADY;
+
+ /* ENOTSOCK 38 / * Socket operation on non-socket */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOTSOCK] = ENOTSOCK;
+ gf_errno_to_error_array[ENOTSOCK] = GF_ERROR_CODE_NOTSOCK;
+
+ /* EDESTADDRREQ 39 / * Destination address required */
+ gf_error_to_errno_array[GF_ERROR_CODE_DESTADDRREQ] = EDESTADDRREQ;
+ gf_errno_to_error_array[EDESTADDRREQ] = GF_ERROR_CODE_DESTADDRREQ;
+
+ /* EMSGSIZE 40 / * Message too long */
+ gf_error_to_errno_array[GF_ERROR_CODE_MSGSIZE] = EMSGSIZE;
+ gf_errno_to_error_array[EMSGSIZE] = GF_ERROR_CODE_MSGSIZE;
+
+ /* EPROTOTYPE 41 / * Protocol wrong type for socket */
+ gf_error_to_errno_array[GF_ERROR_CODE_PROTOTYPE] = EPROTOTYPE;
+ gf_errno_to_error_array[EPROTOTYPE] = GF_ERROR_CODE_PROTOTYPE;
+
+ /* ENOPROTOOPT 42 / * Protocol not available */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOPROTOOPT] = ENOPROTOOPT;
+ gf_errno_to_error_array[ENOPROTOOPT] = GF_ERROR_CODE_NOPROTOOPT;
+
+ /* EPROTONOSUPPORT 43 / * Protocol not supported */
+ gf_error_to_errno_array[GF_ERROR_CODE_PROTONOSUPPORT] = EPROTONOSUPPORT;
+ gf_errno_to_error_array[EPROTONOSUPPORT] = GF_ERROR_CODE_PROTONOSUPPORT;
+
+ /* ESOCKTNOSUPPORT 44 / * Socket type not supported */
+ gf_error_to_errno_array[GF_ERROR_CODE_SOCKTNOSUPPORT] = ESOCKTNOSUPPORT;
+ gf_errno_to_error_array[ESOCKTNOSUPPORT] = GF_ERROR_CODE_SOCKTNOSUPPORT;
+
+ /* EOPNOTSUPP 45 / * Operation not supported */
+ gf_error_to_errno_array[GF_ERROR_CODE_OPNOTSUPP] = EOPNOTSUPP;
+ gf_errno_to_error_array[EOPNOTSUPP] = GF_ERROR_CODE_OPNOTSUPP;
+
+ /* EPFNOSUPPORT 46 / * Protocol family not supported */
+ gf_error_to_errno_array[GF_ERROR_CODE_PFNOSUPPORT] = EPFNOSUPPORT;
+ gf_errno_to_error_array[EPFNOSUPPORT] = GF_ERROR_CODE_PFNOSUPPORT;
+
+ /* EAFNOSUPPORT 47 / * Address family not supported by protocol family */
+ gf_error_to_errno_array[GF_ERROR_CODE_AFNOSUPPORT] = EAFNOSUPPORT;
+ gf_errno_to_error_array[EAFNOSUPPORT] = GF_ERROR_CODE_AFNOSUPPORT;
+
+ /* EADDRINUSE 48 / * Address already in use */
+ gf_error_to_errno_array[GF_ERROR_CODE_ADDRINUSE] = EADDRINUSE;
+ gf_errno_to_error_array[EADDRINUSE] = GF_ERROR_CODE_ADDRINUSE;
+
+ /* EADDRNOTAVAIL 49 / * Can't assign requested address */
+ gf_error_to_errno_array[GF_ERROR_CODE_ADDRNOTAVAIL] = EADDRNOTAVAIL;
+ gf_errno_to_error_array[EADDRNOTAVAIL] = GF_ERROR_CODE_ADDRNOTAVAIL;
+
+ /* ENETDOWN 50 / * Network is down */
+ gf_error_to_errno_array[GF_ERROR_CODE_NETDOWN] = ENETDOWN;
+ gf_errno_to_error_array[ENETDOWN] = GF_ERROR_CODE_NETDOWN;
+
+ /* ENETUNREACH 51 / * Network is unreachable */
+ gf_error_to_errno_array[GF_ERROR_CODE_NETUNREACH] = ENETUNREACH;
+ gf_errno_to_error_array[ENETUNREACH] = GF_ERROR_CODE_NETUNREACH;
- /* ESTALE 70 / * Stale NFS file handle */
- gf_error_to_errno_array[GF_ERROR_CODE_STALE] = ESTALE;
- gf_errno_to_error_array[ESTALE] = GF_ERROR_CODE_STALE;
+ /* ENETRESET 52 / * Network dropped connection on reset */
+ gf_error_to_errno_array[GF_ERROR_CODE_NETRESET] = ENETRESET;
+ gf_errno_to_error_array[ENETRESET] = GF_ERROR_CODE_NETRESET;
- /* EREMOTE 71 / * Too many levels of remote in path */
- gf_error_to_errno_array[GF_ERROR_CODE_REMOTE] = EREMOTE;
- gf_errno_to_error_array[EREMOTE] = GF_ERROR_CODE_REMOTE;
+ /* ECONNABORTED 53 / * Software caused connection abort */
+ gf_error_to_errno_array[GF_ERROR_CODE_CONNABORTED] = ECONNABORTED;
+ gf_errno_to_error_array[ECONNABORTED] = GF_ERROR_CODE_CONNABORTED;
- /* EBADRPC 72 / * RPC struct is bad */
- gf_error_to_errno_array[GF_ERROR_CODE_BADRPC] = EBADRPC;
- gf_errno_to_error_array[EBADRPC] = GF_ERROR_CODE_BADRPC;
+ /* ECONNRESET 54 / * Connection reset by peer */
+ gf_error_to_errno_array[GF_ERROR_CODE_CONNRESET] = ECONNRESET;
+ gf_errno_to_error_array[ECONNRESET] = GF_ERROR_CODE_CONNRESET;
- /* ERPCMISMATCH 73 / * RPC version wrong */
- gf_error_to_errno_array[GF_ERROR_CODE_RPCMISMATCH] = ERPCMISMATCH;
- gf_errno_to_error_array[ERPCMISMATCH] = GF_ERROR_CODE_RPCMISMATCH;
+ /* ENOBUFS 55 / * No buffer space available */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOBUFS] = ENOBUFS;
+ gf_errno_to_error_array[ENOBUFS] = GF_ERROR_CODE_NOBUFS;
- /* EPROGUNAVAIL 74 / * RPC prog. not avail */
- gf_error_to_errno_array[GF_ERROR_CODE_PROGUNAVAIL] = EPROGUNAVAIL;
- gf_errno_to_error_array[EPROGUNAVAIL] = GF_ERROR_CODE_PROGUNAVAIL;
+ /* EISCONN 56 / * Socket is already connected */
+ gf_error_to_errno_array[GF_ERROR_CODE_ISCONN] = EISCONN;
+ gf_errno_to_error_array[EISCONN] = GF_ERROR_CODE_ISCONN;
- /* EPROGMISMATCH 75 / * Program version wrong */
- gf_error_to_errno_array[GF_ERROR_CODE_PROGMISMATCH] = EPROGMISMATCH;
- gf_errno_to_error_array[EPROGMISMATCH] = GF_ERROR_CODE_PROGMISMATCH;
+ /* ENOTCONN 57 / * Socket is not connected */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOTCONN] = ENOTCONN;
+ gf_errno_to_error_array[ENOTCONN] = GF_ERROR_CODE_NOTCONN;
- /* EPROCUNAVAIL 76 / * Bad procedure for program */
- gf_error_to_errno_array[GF_ERROR_CODE_PROCUNAVAIL] = EPROCUNAVAIL;
- gf_errno_to_error_array[EPROCUNAVAIL] = GF_ERROR_CODE_PROCUNAVAIL;
+ /* ESHUTDOWN 58 / * Can't send after socket shutdown */
+ gf_error_to_errno_array[GF_ERROR_CODE_SHUTDOWN] = ESHUTDOWN;
+ gf_errno_to_error_array[ESHUTDOWN] = GF_ERROR_CODE_SHUTDOWN;
- /* ENOLCK 77 / * No locks available */
- gf_error_to_errno_array[GF_ERROR_CODE_NOLCK] = ENOLCK;
- gf_errno_to_error_array[ENOLCK] = GF_ERROR_CODE_NOLCK;
+ /* ETOOMANYREFS 59 / * Too many references: can't splice */
+ gf_error_to_errno_array[GF_ERROR_CODE_TOOMANYREFS] = ETOOMANYREFS;
+ gf_errno_to_error_array[ETOOMANYREFS] = GF_ERROR_CODE_TOOMANYREFS;
- /* ENOSYS 78 / * Function not implemented */
- gf_error_to_errno_array[GF_ERROR_CODE_NOSYS] = ENOSYS;
- gf_errno_to_error_array[ENOSYS] = GF_ERROR_CODE_NOSYS;
+ /* ETIMEDOUT 60 / * Operation timed out */
+ gf_error_to_errno_array[GF_ERROR_CODE_TIMEDOUT] = ETIMEDOUT;
+ gf_errno_to_error_array[ETIMEDOUT] = GF_ERROR_CODE_TIMEDOUT;
- /* EFTYPE 79 / * Inappropriate file type or format */
- gf_error_to_errno_array[GF_ERROR_CODE_FTYPE] = EFTYPE;
- gf_errno_to_error_array[EFTYPE] = GF_ERROR_CODE_FTYPE;
+ /* ECONNREFUSED 61 / * Connection refused */
+ gf_error_to_errno_array[GF_ERROR_CODE_CONNREFUSED] = ECONNREFUSED;
+ gf_errno_to_error_array[ECONNREFUSED] = GF_ERROR_CODE_CONNREFUSED;
- /* EAUTH 80 / * Authentication error */
- gf_error_to_errno_array[GF_ERROR_CODE_AUTH] = EAUTH;
- gf_errno_to_error_array[EAUTH] = GF_ERROR_CODE_AUTH;
+ /* ELOOP 62 / * Too many levels of symbolic links */
+ gf_error_to_errno_array[GF_ERROR_CODE_LOOP] = ELOOP;
+ gf_errno_to_error_array[ELOOP] = GF_ERROR_CODE_LOOP;
- /* ENEEDAUTH 81 / * Need authenticator */
- gf_error_to_errno_array[GF_ERROR_CODE_NEEDAUTH] = ENEEDAUTH;
- gf_errno_to_error_array[ENEEDAUTH] = GF_ERROR_CODE_NEEDAUTH;
+ /* ENAMETOOLONG 63 / * File name too long */
+ gf_error_to_errno_array[GF_ERROR_CODE_NAMETOOLONG] = ENAMETOOLONG;
+ gf_errno_to_error_array[ENAMETOOLONG] = GF_ERROR_CODE_NAMETOOLONG;
- /* EIDRM 82 / * Identifier removed */
- gf_error_to_errno_array[GF_ERROR_CODE_IDRM] = EIDRM;
- gf_errno_to_error_array[EIDRM] = GF_ERROR_CODE_IDRM;
+ /* EHOSTDOWN 64 / * Host is down */
+ gf_error_to_errno_array[GF_ERROR_CODE_HOSTDOWN] = EHOSTDOWN;
+ gf_errno_to_error_array[EHOSTDOWN] = GF_ERROR_CODE_HOSTDOWN;
- /* ENOMSG 83 / * No message of desired type */
- gf_error_to_errno_array[GF_ERROR_CODE_NOMSG] = ENOMSG;
- gf_errno_to_error_array[ENOMSG] = GF_ERROR_CODE_NOMSG;
+ /* EHOSTUNREACH 65 / * No route to host */
+ gf_error_to_errno_array[GF_ERROR_CODE_HOSTUNREACH] = EHOSTUNREACH;
+ gf_errno_to_error_array[EHOSTUNREACH] = GF_ERROR_CODE_HOSTUNREACH;
- /* EOVERFLOW 84 / * Value too large to be stored in data type */
- gf_error_to_errno_array[GF_ERROR_CODE_OVERFLOW] = EOVERFLOW;
- gf_errno_to_error_array[EOVERFLOW] = GF_ERROR_CODE_OVERFLOW;
+ /* ENOTEMPTY 66 / * Directory not empty */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOTEMPTY] = ENOTEMPTY;
+ gf_errno_to_error_array[ENOTEMPTY] = GF_ERROR_CODE_NOTEMPTY;
- /* ECANCELED 85 / * Operation canceled */
- gf_error_to_errno_array[GF_ERROR_CODE_CANCELED] = ECANCELED;
- gf_errno_to_error_array[ECANCELED] = GF_ERROR_CODE_CANCELED;
+ /* EPROCLIM 67 / * Too many processes */
+ gf_error_to_errno_array[GF_ERROR_CODE_PROCLIM] = EPROCLIM;
+ gf_errno_to_error_array[EPROCLIM] = GF_ERROR_CODE_PROCLIM;
- /* EILSEQ 86 / * Illegal byte sequence */
- gf_error_to_errno_array[GF_ERROR_CODE_ILSEQ] = EILSEQ;
- gf_errno_to_error_array[EILSEQ] = GF_ERROR_CODE_ILSEQ;
+ /* EUSERS 68 / * Too many users */
+ gf_error_to_errno_array[GF_ERROR_CODE_USERS] = EUSERS;
+ gf_errno_to_error_array[EUSERS] = GF_ERROR_CODE_USERS;
- /* ENOATTR 87 / * Attribute not found */
- gf_error_to_errno_array[GF_ERROR_CODE_NOATTR] = ENOATTR;
- gf_errno_to_error_array[ENOATTR] = GF_ERROR_CODE_NOATTR;
-
- /* EDOOFUS 88 / * Programming error */
- gf_error_to_errno_array[GF_ERROR_CODE_DOOFUS] = EDOOFUS;
- gf_errno_to_error_array[EDOOFUS] = GF_ERROR_CODE_DOOFUS;
+ /* EDQUOT 69 / * Disc quota exceeded */
+ gf_error_to_errno_array[GF_ERROR_CODE_DQUOT] = EDQUOT;
+ gf_errno_to_error_array[EDQUOT] = GF_ERROR_CODE_DQUOT;
- /* EBADMSG 89 / * Bad message */
- gf_error_to_errno_array[GF_ERROR_CODE_BADMSG] = EBADMSG;
- gf_errno_to_error_array[EBADMSG] = GF_ERROR_CODE_BADMSG;
+ /* ESTALE 70 / * Stale NFS file handle */
+ gf_error_to_errno_array[GF_ERROR_CODE_STALE] = ESTALE;
+ gf_errno_to_error_array[ESTALE] = GF_ERROR_CODE_STALE;
- /* EMULTIHOP 90 / * Multihop attempted */
- gf_error_to_errno_array[GF_ERROR_CODE_MULTIHOP] = EMULTIHOP;
- gf_errno_to_error_array[EMULTIHOP] = GF_ERROR_CODE_MULTIHOP;
+ /* EREMOTE 71 / * Too many levels of remote in path */
+ gf_error_to_errno_array[GF_ERROR_CODE_REMOTE] = EREMOTE;
+ gf_errno_to_error_array[EREMOTE] = GF_ERROR_CODE_REMOTE;
- /* ENOLINK 91 / * Link has been severed */
- gf_error_to_errno_array[GF_ERROR_CODE_NOLINK] = ENOLINK;
- gf_errno_to_error_array[ENOLINK] = GF_ERROR_CODE_NOLINK;
+ /* EBADRPC 72 / * RPC struct is bad */
+ gf_error_to_errno_array[GF_ERROR_CODE_BADRPC] = EBADRPC;
+ gf_errno_to_error_array[EBADRPC] = GF_ERROR_CODE_BADRPC;
- /* EPROTO 92 / * Protocol error */
- gf_error_to_errno_array[GF_ERROR_CODE_PROTO] = EPROTO;
- gf_errno_to_error_array[EPROTO] = GF_ERROR_CODE_PROTO;
+ /* ERPCMISMATCH 73 / * RPC version wrong */
+ gf_error_to_errno_array[GF_ERROR_CODE_RPCMISMATCH] = ERPCMISMATCH;
+ gf_errno_to_error_array[ERPCMISMATCH] = GF_ERROR_CODE_RPCMISMATCH;
+ /* EPROGUNAVAIL 74 / * RPC prog. not avail */
+ gf_error_to_errno_array[GF_ERROR_CODE_PROGUNAVAIL] = EPROGUNAVAIL;
+ gf_errno_to_error_array[EPROGUNAVAIL] = GF_ERROR_CODE_PROGUNAVAIL;
- return ;
+ /* EPROGMISMATCH 75 / * Program version wrong */
+ gf_error_to_errno_array[GF_ERROR_CODE_PROGMISMATCH] = EPROGMISMATCH;
+ gf_errno_to_error_array[EPROGMISMATCH] = GF_ERROR_CODE_PROGMISMATCH;
+
+ /* EPROCUNAVAIL 76 / * Bad procedure for program */
+ gf_error_to_errno_array[GF_ERROR_CODE_PROCUNAVAIL] = EPROCUNAVAIL;
+ gf_errno_to_error_array[EPROCUNAVAIL] = GF_ERROR_CODE_PROCUNAVAIL;
+
+ /* ENOLCK 77 / * No locks available */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOLCK] = ENOLCK;
+ gf_errno_to_error_array[ENOLCK] = GF_ERROR_CODE_NOLCK;
+
+ /* ENOSYS 78 / * Function not implemented */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOSYS] = ENOSYS;
+ gf_errno_to_error_array[ENOSYS] = GF_ERROR_CODE_NOSYS;
+
+ /* EFTYPE 79 / * Inappropriate file type or format */
+ gf_error_to_errno_array[GF_ERROR_CODE_FTYPE] = EFTYPE;
+ gf_errno_to_error_array[EFTYPE] = GF_ERROR_CODE_FTYPE;
+
+ /* EAUTH 80 / * Authentication error */
+ gf_error_to_errno_array[GF_ERROR_CODE_AUTH] = EAUTH;
+ gf_errno_to_error_array[EAUTH] = GF_ERROR_CODE_AUTH;
+
+ /* ENEEDAUTH 81 / * Need authenticator */
+ gf_error_to_errno_array[GF_ERROR_CODE_NEEDAUTH] = ENEEDAUTH;
+ gf_errno_to_error_array[ENEEDAUTH] = GF_ERROR_CODE_NEEDAUTH;
+
+ /* EIDRM 82 / * Identifier removed */
+ gf_error_to_errno_array[GF_ERROR_CODE_IDRM] = EIDRM;
+ gf_errno_to_error_array[EIDRM] = GF_ERROR_CODE_IDRM;
+
+ /* ENOMSG 83 / * No message of desired type */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOMSG] = ENOMSG;
+ gf_errno_to_error_array[ENOMSG] = GF_ERROR_CODE_NOMSG;
+
+ /* EOVERFLOW 84 / * Value too large to be stored in data type */
+ gf_error_to_errno_array[GF_ERROR_CODE_OVERFLOW] = EOVERFLOW;
+ gf_errno_to_error_array[EOVERFLOW] = GF_ERROR_CODE_OVERFLOW;
+
+ /* ECANCELED 85 / * Operation canceled */
+ gf_error_to_errno_array[GF_ERROR_CODE_CANCELED] = ECANCELED;
+ gf_errno_to_error_array[ECANCELED] = GF_ERROR_CODE_CANCELED;
+
+ /* EILSEQ 86 / * Illegal byte sequence */
+ gf_error_to_errno_array[GF_ERROR_CODE_ILSEQ] = EILSEQ;
+ gf_errno_to_error_array[EILSEQ] = GF_ERROR_CODE_ILSEQ;
+
+ /* ENOATTR 87 / * Attribute not found */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOATTR] = ENOATTR;
+ gf_errno_to_error_array[ENOATTR] = GF_ERROR_CODE_NOATTR;
+
+#ifdef EDOOFUS
+ /* EDOOFUS 88 / * Programming error */
+ gf_error_to_errno_array[GF_ERROR_CODE_DOOFUS] = EDOOFUS;
+ gf_errno_to_error_array[EDOOFUS] = GF_ERROR_CODE_DOOFUS;
+#endif
+
+ /* EBADMSG 89 / * Bad message */
+ gf_error_to_errno_array[GF_ERROR_CODE_BADMSG] = EBADMSG;
+ gf_errno_to_error_array[EBADMSG] = GF_ERROR_CODE_BADMSG;
+
+ /* EMULTIHOP 90 / * Multihop attempted */
+ gf_error_to_errno_array[GF_ERROR_CODE_MULTIHOP] = EMULTIHOP;
+ gf_errno_to_error_array[EMULTIHOP] = GF_ERROR_CODE_MULTIHOP;
+
+ /* ENOLINK 91 / * Link has been severed */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOLINK] = ENOLINK;
+ gf_errno_to_error_array[ENOLINK] = GF_ERROR_CODE_NOLINK;
+
+ /* EPROTO 92 / * Protocol error */
+ gf_error_to_errno_array[GF_ERROR_CODE_PROTO] = EPROTO;
+ gf_errno_to_error_array[EPROTO] = GF_ERROR_CODE_PROTO;
+
+
+ return ;
}
#endif /* GF_BSD_HOST_OS */
#ifdef GF_LINUX_HOST_OS
-static void
+static void
init_compat_errno_arrays ()
{
- /* Things are fine. Everything should work seemlessly on GNU/Linux machines */
- return ;
+ /* Things are fine. Everything should work seemlessly on GNU/Linux machines */
+ return ;
}
#endif /* GF_LINUX_HOST_OS */
@@ -896,43 +889,42 @@ init_compat_errno_arrays ()
static void
init_errno_arrays ()
{
- int i;
- for (i=0; i < GF_ERROR_CODE_UNKNOWN; i++) {
- gf_errno_to_error_array[i] = i;
- gf_error_to_errno_array[i] = i;
- }
- /* Now change the order if it needs to be. */
- init_compat_errno_arrays();
-
- return;
+ int i;
+ for (i=0; i < GF_ERROR_CODE_UNKNOWN; i++) {
+ gf_errno_to_error_array[i] = i;
+ gf_error_to_errno_array[i] = i;
+ }
+ /* Now change the order if it needs to be. */
+ init_compat_errno_arrays();
+
+ return;
}
-int32_t
+int32_t
gf_errno_to_error (int32_t op_errno)
{
- if (!gf_compat_errno_init_done) {
- init_errno_arrays ();
- gf_compat_errno_init_done = 1;
- }
+ if (!gf_compat_errno_init_done) {
+ init_errno_arrays ();
+ gf_compat_errno_init_done = 1;
+ }
- if ((op_errno > GF_ERROR_CODE_SUCCESS) && (op_errno < GF_ERROR_CODE_UNKNOWN))
- return gf_errno_to_error_array[op_errno];
+ if ((op_errno > GF_ERROR_CODE_SUCCESS) && (op_errno < GF_ERROR_CODE_UNKNOWN))
+ return gf_errno_to_error_array[op_errno];
- return op_errno;
+ return op_errno;
}
-int32_t
+int32_t
gf_error_to_errno (int32_t error)
{
- if (!gf_compat_errno_init_done) {
- init_errno_arrays ();
- gf_compat_errno_init_done = 1;
- }
+ if (!gf_compat_errno_init_done) {
+ init_errno_arrays ();
+ gf_compat_errno_init_done = 1;
+ }
- if ((error > GF_ERROR_CODE_SUCCESS) && (error < GF_ERROR_CODE_UNKNOWN))
- return gf_error_to_errno_array[error];
+ if ((error > GF_ERROR_CODE_SUCCESS) && (error < GF_ERROR_CODE_UNKNOWN))
+ return gf_error_to_errno_array[error];
- return error;
+ return error;
}
-
diff --git a/libglusterfs/src/compat-errno.h b/libglusterfs/src/compat-errno.h
index e1b2fec2a..65e52081d 100644
--- a/libglusterfs/src/compat-errno.h
+++ b/libglusterfs/src/compat-errno.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __COMPAT_ERRNO_H__
@@ -199,9 +190,9 @@
/* Solaris */
-/* ENOTACTIVE 73 / * Facility is not active */
-#define GF_ERROR_CODE_NOTACTIVE 801
-/* ELOCKUNMAPPED 72 / * locked lock was unmapped */
+/* ENOTACTIVE 73 / * Facility is not active */
+#define GF_ERROR_CODE_NOTACTIVE 801
+/* ELOCKUNMAPPED 72 / * locked lock was unmapped */
#define GF_ERROR_CODE_LOCKUNMAPPED 802
/* BSD system */
@@ -231,8 +222,8 @@
#define EBADFD EBADRPC
#endif /* EBADFD */
-/* These functions are defined for all the OS flags, but content will
- * be different for each OS flag.
+/* These functions are defined for all the OS flags, but content will
+ * be different for each OS flag.
*/
int32_t gf_errno_to_error (int32_t op_errno);
int32_t gf_error_to_errno (int32_t error);
diff --git a/libglusterfs/src/compat.c b/libglusterfs/src/compat.c
index bad62b563..eb6d8d4b7 100644
--- a/libglusterfs/src/compat.c
+++ b/libglusterfs/src/compat.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
@@ -35,404 +26,520 @@
#include "compat.h"
#include "common-utils.h"
+#include "iatt.h"
+#include "inode.h"
-#ifdef GF_DARWIN_HOST_OS
+#ifdef GF_SOLARIS_HOST_OS
+int
+solaris_fsetxattr(int fd, const char* key, const char *value, size_t size,
+ int flags)
+{
+ int attrfd = -1;
+ int ret = 0;
+
+ attrfd = openat (fd, key, flags|O_CREAT|O_WRONLY|O_XATTR, 0777);
+ if (attrfd >= 0) {
+ ftruncate (attrfd, 0);
+ ret = write (attrfd, value, size);
+ close (attrfd);
+ } else {
+ if (errno != ENOENT)
+ gf_log ("libglusterfs", GF_LOG_ERROR,
+ "Couldn't set extended attribute for %d (%d)",
+ fd, errno);
+ return -1;
+ }
+
+ return 0;
+}
-#define GF_FINDER_INFO_XATTR "com.apple.FinderInfo"
-#define GF_RESOURCE_FORK_XATTR "com.apple.ResourceFork"
-#define GF_FINDER_INFO_SIZE 32
-static const char gf_finder_info_content[GF_FINDER_INFO_SIZE] = {
- 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
- 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
- 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
- 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
-};
+int
+solaris_fgetxattr(int fd, const char* key, char *value, size_t size)
+{
+ int attrfd = -1;
+ int ret = 0;
+
+ attrfd = openat (fd, key, O_RDONLY|O_XATTR);
+ if (attrfd >= 0) {
+ if (size == 0) {
+ struct stat buf;
+ fstat (attrfd, &buf);
+ ret = buf.st_size;
+ } else {
+ ret = read (attrfd, value, size);
+ }
+ close (attrfd);
+ } else {
+ if (errno != ENOENT)
+ gf_log ("libglusterfs", GF_LOG_INFO,
+ "Couldn't read extended attribute for the file %d (%d)",
+ fd, errno);
+ if (errno == ENOENT)
+ errno = ENODATA;
+ return -1;
+ }
+
+ return ret;
+}
+/* Solaris does not support xattr for symlinks and dev files. Since gfid and
+ other trusted attributes are stored as xattrs, we need to provide support for
+ them. A mapped regular file is stored in the /.glusterfs_xattr_inode of the export dir.
+ All xattr ops related to the special files are redirected to this map file.
+*/
-int32_t
-gf_darwin_compat_listxattr (int len, dict_t *dict, int size)
+int
+make_export_path (const char *real_path, char **path)
{
- data_t *data = NULL;
- if (len == -1)
- len = 0;
-
- data = dict_get (dict, GF_FINDER_INFO_XATTR);
- if (!data) {
- dict_set (dict, GF_FINDER_INFO_XATTR,
- bin_to_data ((void *)gf_finder_info_content,
- GF_FINDER_INFO_SIZE));
- len += strlen (GF_FINDER_INFO_XATTR);
- }
-
- data = dict_get (dict, GF_RESOURCE_FORK_XATTR);
- if (!data) {
- dict_set (dict, GF_RESOURCE_FORK_XATTR, str_to_data (""));
- len += strlen (GF_RESOURCE_FORK_XATTR);
- }
-
- return len;
+ int ret = -1;
+ char *tmp = NULL;
+ char *export_path = NULL;
+ char *dup = NULL;
+ char *ptr = NULL;
+ char *freeptr = NULL;
+ uuid_t gfid = {0, };
+
+ export_path = GF_CALLOC (1, sizeof (char) * PATH_MAX, 0);
+ if (!export_path)
+ goto out;
+
+ dup = gf_strdup (real_path);
+ if (!dup)
+ goto out;
+
+ freeptr = dup;
+ ret = solaris_getxattr ("/", GFID_XATTR_KEY, gfid, 16);
+ /* Return value of getxattr */
+ if (ret == 16) {
+ if (__is_root_gfid (gfid)){
+ strcat (export_path, "/");
+ ret = 0;
+ goto done;
+ }
+ }
+
+ do {
+ ptr = strtok_r (dup, "/", &tmp);
+ if (!ptr)
+ break;
+ strcat (export_path, dup);
+ ret = solaris_getxattr (export_path, GFID_XATTR_KEY, gfid, 16);
+ if (ret == 16) {
+ if (__is_root_gfid (gfid)) {
+ ret = 0;
+ goto done;
+ }
+ }
+ strcat (export_path, "/");
+ dup = tmp;
+ } while (ptr);
+
+ goto out;
+
+done:
+ if (!ret) {
+ *path = export_path;
+ }
+out:
+ GF_FREE (freeptr);
+ if (ret && export_path)
+ GF_FREE (export_path);
+
+ return ret;
}
-
-int32_t
-gf_darwin_compat_getxattr (const char *key, dict_t *dict)
+int
+solaris_xattr_resolve_path (const char *real_path, char **path)
{
- data_t *data = NULL;
-
- if (strcmp(key, GF_FINDER_INFO_XATTR) == 0) {
- data = dict_get (dict, GF_FINDER_INFO_XATTR);
- if (!data) {
- dict_set (dict, GF_FINDER_INFO_XATTR,
- bin_to_data ((void *)gf_finder_info_content,
- GF_FINDER_INFO_SIZE));
- return GF_FINDER_INFO_SIZE;
- }
- return 0;
- }
-
- if (strcmp(key, GF_RESOURCE_FORK_XATTR) == 0) {
- data = dict_get (dict, GF_RESOURCE_FORK_XATTR);
- if (!data) {
- /* Always null */
- dict_set (dict, GF_RESOURCE_FORK_XATTR,
- str_to_data (""));
- return 0;
- }
- return 0;
- }
- return -1;
+ int ret = -1;
+ char *export_path = NULL;
+ char xattr_path[PATH_MAX] = {0, };
+ struct stat lstatbuf = {0, };
+ struct iatt stbuf = {0, };
+ struct stat statbuf = {0, };
+
+ ret = lstat (real_path, &lstatbuf);
+ if (ret != 0 )
+ return ret;
+ iatt_from_stat (&stbuf, &lstatbuf);
+ if (IA_ISREG(stbuf.ia_type) || IA_ISDIR(stbuf.ia_type))
+ return -1;
+
+ ret = make_export_path (real_path, &export_path);
+ if (!ret && export_path) {
+ strcat (export_path, "/"GF_SOLARIS_XATTR_DIR);
+ if (lstat (export_path, &statbuf)) {
+ ret = mkdir (export_path, 0777);
+ if (ret && (errno != EEXIST)) {
+ gf_log (THIS->name, GF_LOG_DEBUG, "mkdir failed,"
+ " errno: %d", errno);
+ goto out;
+ }
+ }
+
+ snprintf(xattr_path, PATH_MAX, "%s%s%lu", export_path,
+ "/", stbuf.ia_ino);
+
+ ret = lstat (xattr_path, &statbuf);
+
+ if (ret) {
+ ret = mknod (xattr_path, S_IFREG|O_WRONLY, 0);
+ if (ret && (errno != EEXIST)) {
+ gf_log (THIS->name, GF_LOG_WARNING,"Failed to create "
+ "mapped file %s, error %d", xattr_path,
+ errno);
+ goto out;
+ }
+ }
+ *path = gf_strdup (xattr_path);
+ }
+out:
+ GF_FREE (export_path);
+ if (*path)
+ return 0;
+ else
+ return -1;
}
-
-int32_t
-gf_darwin_compat_setxattr (dict_t *dict)
+int
+solaris_setxattr(const char *path, const char* key, const char *value,
+ size_t size, int flags)
{
- data_t *data = NULL;
+ int attrfd = -1;
+ int ret = 0;
+ char *mapped_path = NULL;
+
+ ret = solaris_xattr_resolve_path (path, &mapped_path);
+ if (!ret) {
+ attrfd = attropen (mapped_path, key, flags|O_CREAT|O_WRONLY,
+ 0777);
+ } else {
+ attrfd = attropen (path, key, flags|O_CREAT|O_WRONLY, 0777);
+ }
+ if (attrfd >= 0) {
+ ftruncate (attrfd, 0);
+ ret = write (attrfd, value, size);
+ close (attrfd);
+ ret = 0;
+ } else {
+ if (errno != ENOENT)
+ gf_log ("libglusterfs", GF_LOG_ERROR,
+ "Couldn't set extended attribute for %s (%d)",
+ path, errno);
+ ret = -1;
+ }
+ GF_FREE (mapped_path);
+ return ret;
+}
- data = dict_get (dict, GF_FINDER_INFO_XATTR);
- if (data)
- return 0;
- data = dict_get (dict, GF_RESOURCE_FORK_XATTR);
- if (data)
- return 0;
- return -1;
+int
+solaris_listxattr(const char *path, char *list, size_t size)
+{
+ int attrdirfd = -1;
+ ssize_t len = 0;
+ DIR *dirptr = NULL;
+ struct dirent *dent = NULL;
+ int newfd = -1;
+ char *mapped_path = NULL;
+ int ret = -1;
+
+ ret = solaris_xattr_resolve_path (path, &mapped_path);
+ if (!ret) {
+ attrdirfd = attropen (mapped_path, ".", O_RDONLY, 0);
+ } else {
+ attrdirfd = attropen (path, ".", O_RDONLY, 0);
+ }
+ if (attrdirfd >= 0) {
+ newfd = dup(attrdirfd);
+ dirptr = fdopendir(newfd);
+ if (dirptr) {
+ while ((dent = readdir(dirptr))) {
+ size_t listlen = strlen(dent->d_name);
+ if (!strcmp(dent->d_name, ".") ||
+ !strcmp(dent->d_name, "..")) {
+ /* we don't want "." and ".." here */
+ continue;
+ }
+ if (size == 0) {
+ /* return the current size of the list
+ of extended attribute names*/
+ len += listlen + 1;
+ } else {
+ /* check size and copy entry + null
+ into list. */
+ if ((len + listlen + 1) > size) {
+ errno = ERANGE;
+ len = -1;
+ break;
+ } else {
+ strncpy(list + len, dent->d_name, listlen);
+ len += listlen;
+ list[len] = '\0';
+ ++len;
+ }
+ }
+ }
+
+ if (closedir(dirptr) == -1) {
+ close (attrdirfd);
+ len = -1;
+ goto out;
+ }
+ } else {
+ close (attrdirfd);
+ len = -1;
+ goto out;
+ }
+ close (attrdirfd);
+ }
+out:
+ GF_FREE (mapped_path);
+ return len;
}
-#endif /* DARWIN */
+int
+solaris_flistxattr(int fd, char *list, size_t size)
+{
+ int attrdirfd = -1;
+ ssize_t len = 0;
+ DIR *dirptr = NULL;
+ struct dirent *dent = NULL;
+ int newfd = -1;
+
+ attrdirfd = openat (fd, ".", O_RDONLY, 0);
+ if (attrdirfd >= 0) {
+ newfd = dup(attrdirfd);
+ dirptr = fdopendir(newfd);
+ if (dirptr) {
+ while ((dent = readdir(dirptr))) {
+ size_t listlen = strlen(dent->d_name);
+ if (!strcmp(dent->d_name, ".") ||
+ !strcmp(dent->d_name, "..")) {
+ /* we don't want "." and ".." here */
+ continue;
+ }
+ if (size == 0) {
+ /* return the current size of the list
+ of extended attribute names*/
+ len += listlen + 1;
+ } else {
+ /* check size and copy entry + null
+ into list. */
+ if ((len + listlen + 1) > size) {
+ errno = ERANGE;
+ len = -1;
+ break;
+ } else {
+ strncpy(list + len, dent->d_name, listlen);
+ len += listlen;
+ list[len] = '\0';
+ ++len;
+ }
+ }
+ }
+
+ if (closedir(dirptr) == -1) {
+ close (attrdirfd);
+ return -1;
+ }
+ } else {
+ close (attrdirfd);
+ return -1;
+ }
+ close (attrdirfd);
+ }
+ return len;
+}
-#ifdef GF_SOLARIS_HOST_OS
-int
-solaris_fsetxattr(int fd,
- const char* key,
- const char *value,
- size_t size,
- int flags)
+int
+solaris_removexattr(const char *path, const char* key)
{
- int attrfd = -1;
- int ret = 0;
-
- attrfd = openat (fd, key, flags|O_CREAT|O_WRONLY|O_XATTR, 0777);
- if (attrfd >= 0) {
- ftruncate (attrfd, 0);
- ret = write (attrfd, value, size);
- close (attrfd);
- } else {
- if (errno != ENOENT)
- gf_log ("libglusterfs", GF_LOG_ERROR,
- "Couldn't set extended attribute for %d (%d)",
- fd, errno);
- return -1;
- }
-
- return 0;
+ int ret = -1;
+ int attrfd = -1;
+ char *mapped_path = NULL;
+
+ ret = solaris_xattr_resolve_path (path, &mapped_path);
+ if (!ret) {
+ attrfd = attropen (mapped_path, ".", O_RDONLY, 0);
+ } else {
+ attrfd = attropen (path, ".", O_RDONLY, 0);
+ }
+ if (attrfd >= 0) {
+ ret = unlinkat (attrfd, key, 0);
+ close (attrfd);
+ } else {
+ if (errno == ENOENT)
+ errno = ENODATA;
+ ret = -1;
+ }
+
+ GF_FREE (mapped_path);
+
+ return ret;
}
-
-int
-solaris_fgetxattr(int fd,
- const char* key,
- char *value,
- size_t size)
+int
+solaris_getxattr(const char *path,
+ const char* key,
+ char *value,
+ size_t size)
{
- int attrfd = -1;
- int ret = 0;
-
- attrfd = openat (fd, key, O_RDONLY|O_XATTR);
- if (attrfd >= 0) {
- if (size == 0) {
- struct stat buf;
- fstat (attrfd, &buf);
- ret = buf.st_size;
- } else {
- ret = read (attrfd, value, size);
- }
- close (attrfd);
- } else {
- if (errno == ENOENT)
- errno = ENODATA;
- if (errno != ENOENT)
- gf_log ("libglusterfs", GF_LOG_DEBUG,
- "Couldn't read extended attribute for the file %d (%d)",
- fd, errno);
- return -1;
- }
-
- return ret;
+ int attrfd = -1;
+ int ret = 0;
+ char *mapped_path = NULL;
+
+ ret = solaris_xattr_resolve_path (path, &mapped_path);
+ if (!ret) {
+ attrfd = attropen (mapped_path, key, O_RDONLY, 0);
+ } else {
+ attrfd = attropen (path, key, O_RDONLY, 0);
+ }
+
+ if (attrfd >= 0) {
+ if (size == 0) {
+ struct stat buf;
+ fstat (attrfd, &buf);
+ ret = buf.st_size;
+ } else {
+ ret = read (attrfd, value, size);
+ }
+ close (attrfd);
+ } else {
+ if (errno != ENOENT)
+ gf_log ("libglusterfs", GF_LOG_INFO,
+ "Couldn't read extended attribute for the file %s (%s)",
+ path, strerror (errno));
+ if (errno == ENOENT)
+ errno = ENODATA;
+ ret = -1;
+ }
+ GF_FREE (mapped_path);
+ return ret;
}
-int
-solaris_setxattr(const char *path,
- const char* key,
- const char *value,
- size_t size,
- int flags)
+char* strsep(char** str, const char* delims)
{
- int attrfd = -1;
- int ret = 0;
-
- attrfd = attropen (path, key, flags|O_CREAT|O_WRONLY, 0777);
- if (attrfd >= 0) {
- ftruncate (attrfd, 0);
- ret = write (attrfd, value, size);
- close (attrfd);
- } else {
- if (errno != ENOENT)
- gf_log ("libglusterfs", GF_LOG_ERROR,
- "Couldn't set extended attribute for %s (%d)",
- path, errno);
- return -1;
- }
-
- return 0;
+ char* token;
+
+ if (*str==NULL) {
+ /* No more tokens */
+ return NULL;
+ }
+
+ token=*str;
+ while (**str!='\0') {
+ if (strchr(delims,**str)!=NULL) {
+ **str='\0';
+ (*str)++;
+ return token;
+ }
+ (*str)++;
+ }
+ /* There is no other token */
+ *str=NULL;
+ return token;
}
+/* Code comes from libiberty */
int
-solaris_listxattr(const char *path,
- char *list,
- size_t size)
+vasprintf (char **result, const char *format, va_list args)
{
- int attrdirfd = -1;
- ssize_t len = 0;
- DIR *dirptr = NULL;
- struct dirent *dent = NULL;
- int newfd = -1;
-
- attrdirfd = attropen (path, ".", O_RDONLY, 0);
- if (attrdirfd >= 0) {
- newfd = dup(attrdirfd);
- dirptr = fdopendir(newfd);
- if (dirptr) {
- while ((dent = readdir(dirptr))) {
- size_t listlen = strlen(dent->d_name);
- if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, "..")) {
- /* we don't want "." and ".." here */
- continue;
- }
- if (size == 0) {
- /* return the current size of the list of extended attribute names*/
- len += listlen + 1;
- } else {
- /* check size and copy entrie + nul into list. */
- if ((len + listlen + 1) > size) {
- errno = ERANGE;
- len = -1;
- break;
- } else {
- strncpy(list + len, dent->d_name, listlen);
- len += listlen;
- list[len] = '\0';
- ++len;
- }
- }
- }
-
- if (closedir(dirptr) == -1) {
- close (attrdirfd);
- return -1;
- }
- } else {
- close (attrdirfd);
- return -1;
- }
- close (attrdirfd);
- }
- return len;
+ return gf_vasprintf(result, format, args);
}
-
int
-solaris_flistxattr(int fd,
- char *list,
- size_t size)
+asprintf (char **buf, const char *fmt, ...)
{
- int attrdirfd = -1;
- ssize_t len = 0;
- DIR *dirptr = NULL;
- struct dirent *dent = NULL;
- int newfd = -1;
-
- attrdirfd = openat (fd, ".", O_RDONLY, 0);
- if (attrdirfd >= 0) {
- newfd = dup(attrdirfd);
- dirptr = fdopendir(newfd);
- if (dirptr) {
- while ((dent = readdir(dirptr))) {
- size_t listlen = strlen(dent->d_name);
- if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, "..")) {
- /* we don't want "." and ".." here */
- continue;
- }
- if (size == 0) {
- /* return the current size of the list of extended attribute names*/
- len += listlen + 1;
- } else {
- /* check size and copy entrie + nul into list. */
- if ((len + listlen + 1) > size) {
- errno = ERANGE;
- len = -1;
- break;
- } else {
- strncpy(list + len, dent->d_name, listlen);
- len += listlen;
- list[len] = '\0';
- ++len;
- }
- }
- }
-
- if (closedir(dirptr) == -1) {
- close (attrdirfd);
- return -1;
- }
- } else {
- close (attrdirfd);
- return -1;
- }
- close (attrdirfd);
- }
- return len;
-}
+ int status;
+ va_list ap;
+ va_start (ap, fmt);
+ status = vasprintf (buf, fmt, ap);
+ va_end (ap);
+ return status;
+}
-int
-solaris_removexattr(const char *path,
- const char* key)
+int solaris_unlink (const char *path)
{
- int ret = -1;
- int attrfd = attropen (path, ".", O_RDONLY, 0);
- if (attrfd >= 0) {
- ret = unlinkat (attrfd, key, 0);
- close (attrfd);
- } else {
- if (errno == ENOENT)
- errno = ENODATA;
- return -1;
- }
-
- return ret;
+ char *mapped_path = NULL;
+ struct stat stbuf = {0, };
+ int ret = -1;
+
+ ret = solaris_xattr_resolve_path (path, &mapped_path);
+
+
+ if (!ret && mapped_path) {
+ if (lstat(path, &stbuf)) {
+ gf_log (THIS->name, GF_LOG_WARNING, "Stat failed on mapped"
+ " file %s with error %d", mapped_path, errno);
+ goto out;
+ }
+ if (stbuf.st_nlink == 1) {
+ if(remove (mapped_path))
+ gf_log (THIS->name, GF_LOG_WARNING, "Failed to remove mapped "
+ "file %s. Errno %d", mapped_path, errno);
+ }
+
+ }
+
+out:
+ GF_FREE (mapped_path);
+
+ return unlink (path);
}
-int
-solaris_getxattr(const char *path,
- const char* key,
- char *value,
- size_t size)
+int
+solaris_rename (const char *old_path, const char *new_path)
{
- int attrfd = -1;
- int ret = 0;
-
- attrfd = attropen (path, key, O_RDONLY, 0);
- if (attrfd >= 0) {
- if (size == 0) {
- struct stat buf;
- fstat (attrfd, &buf);
- ret = buf.st_size;
- } else {
- ret = read (attrfd, value, size);
- }
- close (attrfd);
- } else {
- if (errno == ENOENT)
- errno = ENODATA;
- if (errno != ENOENT)
- gf_log ("libglusterfs", GF_LOG_DEBUG,
- "Couldn't read extended attribute for the file %s (%d)",
- path, errno);
- return -1;
- }
- return ret;
-}
+ char *mapped_path = NULL;
+ int ret = -1;
+ ret = solaris_xattr_resolve_path (new_path, &mapped_path);
-int
-asprintf(char **string_ptr, const char *format, ...)
-{
- va_list arg;
- char *str;
- int size;
- int rv;
-
- if (!string_ptr || !format)
- return -1;
-
- va_start(arg, format);
- size = vsnprintf(NULL, 0, format, arg);
- size++;
- va_start(arg, format);
- str = MALLOC(size);
- if (str == NULL) {
- va_end(arg);
- /*
- * Strictly speaking, GNU asprintf doesn't do this,
- * but the caller isn't checking the return value.
- */
- gf_log ("libglusterfs", GF_LOG_CRITICAL, "failed to allocate memory");
- return -1;
- }
- rv = vsnprintf(str, size, format, arg);
- va_end(arg);
-
- *string_ptr = str;
- return (rv);
-}
-char* strsep(char** str, const char* delims)
+ if (!ret && mapped_path) {
+ if (!remove (mapped_path))
+ gf_log (THIS->name, GF_LOG_WARNING, "Failed to remove mapped "
+ "file %s. Errno %d", mapped_path, errno);
+ GF_FREE (mapped_path);
+ }
+
+ return rename(old_path, new_path);
+
+}
+
+char *
+mkdtemp (char *tempstring)
{
- char* token;
-
- if (*str==NULL) {
- /* No more tokens */
- return NULL;
- }
-
- token=*str;
- while (**str!='\0') {
- if (strchr(delims,**str)!=NULL) {
- **str='\0';
- (*str)++;
- return token;
- }
- (*str)++;
- }
- /* There is no other token */
- *str=NULL;
- return token;
+ char *new_string = NULL;
+ int ret = 0;
+
+ new_string = mkstemp (tempstring);
+ if (!new_string)
+ goto out;
+
+ ret = mkdir (new_string, 0700);
+ if (ret < 0)
+ new_string = NULL;
+
+out:
+ return new_string;
}
#endif /* GF_SOLARIS_HOST_OS */
#ifndef HAVE_STRNLEN
-size_t
-strnlen(const char *string, size_t maxlen)
+size_t
+strnlen(const char *string, size_t maxlen)
{
- int len = 0;
- while ((len < maxlen) && string[len])
- len++;
- return len;
+ int len = 0;
+ while ((len < maxlen) && string[len])
+ len++;
+ return len;
}
#endif /* STRNLEN */
diff --git a/libglusterfs/src/compat.h b/libglusterfs/src/compat.h
index 228daf5da..2bd982541 100644
--- a/libglusterfs/src/compat.h
+++ b/libglusterfs/src/compat.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __COMPAT_H__
@@ -29,7 +20,7 @@
#include "dict.h"
#ifndef LLONG_MAX
-#define LLONG_MAX LONG_LONG_MAX /* compat with old gcc */
+#define LLONG_MAX __LONG_LONG_MAX__ /* compat with old gcc */
#endif /* LLONG_MAX */
@@ -41,11 +32,17 @@
#include <linux/limits.h>
#include <sys/xattr.h>
#include <endian.h>
+#ifdef HAVE_FALLOC_H
+#include <linux/falloc.h>
+#else
+#define FALLOC_FL_KEEP_SIZE 0x01 /* default is extend size */
+#define FALLOC_FL_PUNCH_HOLE 0x02 /* de-allocates range */
+#endif
#ifndef HAVE_LLISTXATTR
-/* This part is valid only incase of old glibc which doesn't support
+/* This part is valid only incase of old glibc which doesn't support
* 'llistxattr()' system calls.
*/
@@ -57,8 +54,8 @@
#endif /* HAVE_LLISTXATTR */
#endif /* GF_LINUX_HOST_OS */
-#ifdef GF_BSD_HOST_OS
-/* In case of FreeBSD */
+#ifdef GF_BSD_HOST_OS
+/* In case of FreeBSD and NetBSD */
#define UNIX_PATH_MAX 104
#include <sys/types.h>
@@ -66,16 +63,21 @@
#include <sys/un.h>
#include <sys/endian.h>
#include <sys/extattr.h>
+#ifdef HAVE_SYS_XATTR_H
+#include <sys/xattr.h>
+#endif /* HAVE_SYS_XATTR_H */
#include <limits.h>
#include <libgen.h>
+#ifndef XATTR_CREATE
enum {
ATTR_CREATE = 1,
#define XATTR_CREATE ATTR_CREATE
ATTR_REPLACE = 2
#define XATTR_REPLACE ATTR_REPLACE
};
+#endif /* XATTR_CREATE */
#ifndef sighandler_t
@@ -88,7 +90,7 @@ enum {
#ifndef EUCLEAN
#define EUCLEAN 0
-#endif
+#endif
#include <netinet/in.h>
#ifndef s6_addr16
@@ -96,7 +98,7 @@ enum {
#endif
#ifndef s6_addr32
#define s6_addr32 __u6_addr.__u6_addr32
-#endif
+#endif
/* Posix dictates NAME_MAX to be used */
# ifndef NAME_MAX
@@ -106,7 +108,7 @@ enum {
# define NAME_MAX 255
# endif
# endif
-
+
#define F_GETLK64 F_GETLK
#define F_SETLK64 F_SETLK
#define F_SETLKW64 F_SETLKW
@@ -146,10 +148,10 @@ enum {
#include <netinet/in.h>
#ifndef s6_addr16
#define s6_addr16 __u6_addr.__u6_addr16
-#endif
+#endif
#ifndef s6_addr32
#define s6_addr32 __u6_addr.__u6_addr32
-#endif
+#endif
/* Posix dictates NAME_MAX to be used */
# ifndef NAME_MAX
@@ -164,6 +166,10 @@ enum {
#define F_SETLK64 F_SETLK
#define F_SETLKW64 F_SETLKW
+#ifndef FTW_CONTINUE
+ #define FTW_CONTINUE 0
+#endif
+
int32_t gf_darwin_compat_listxattr (int len, dict_t *dict, int size);
int32_t gf_darwin_compat_getxattr (const char *key, dict_t *dict);
int32_t gf_darwin_compat_setxattr (dict_t *dict);
@@ -173,7 +179,7 @@ int32_t gf_darwin_compat_setxattr (dict_t *dict);
#ifdef GF_SOLARIS_HOST_OS
#define UNIX_PATH_MAX 108
-#define EUCLEAN 117
+#define EUCLEAN 117
#include <sys/un.h>
#include <limits.h>
@@ -185,8 +191,9 @@ int32_t gf_darwin_compat_setxattr (dict_t *dict);
#ifndef lchmod
#define lchmod chmod
-#endif
+#endif
+#define lgetxattr(path, key, value, size) solaris_getxattr(path,key,value,size)
enum {
ATTR_CREATE = 1,
#define XATTR_CREATE ATTR_CREATE
@@ -203,10 +210,10 @@ enum {
# ifndef NAME_MAX
# ifdef MAXNAMLEN
# define NAME_MAX MAXNAMLEN
-# else
+# else
# define NAME_MAX 255
# endif
-# endif
+# endif
#include <netinet/in.h>
#ifndef s6_addr16
@@ -218,20 +225,73 @@ enum {
#define lutimes(filename,times) utimes(filename,times)
-int asprintf(char **string_ptr, const char *format, ...);
+#ifndef SEEK_SET
+#define SEEK_SET 0
+#endif
+
+enum {
+ DT_UNKNOWN = 0,
+# define DT_UNKNOWN DT_UNKNOWN
+ DT_FIFO = 1,
+# define DT_FIFO DT_FIFO
+ DT_CHR = 2,
+# define DT_CHR DT_CHR
+ DT_DIR = 4,
+# define DT_DIR DT_DIR
+ DT_BLK = 6,
+# define DT_BLK DT_BLK
+ DT_REG = 8,
+# define DT_REG DT_REG
+ DT_LNK = 10,
+# define DT_LNK DT_LNK
+ DT_SOCK = 12,
+# define DT_SOCK DT_SOCK
+ DT_WHT = 14
+# define DT_WHT DT_WHT
+};
+
+#ifndef _PATH_MOUNTED
+ #define _PATH_MOUNTED "/etc/mtab"
+#endif
+
+#ifndef O_ASYNC
+ #ifdef FASYNC
+ #define O_ASYNC FASYNC
+ #else
+ #define O_ASYNC 0
+ #endif
+#endif
+
+#ifndef FTW_CONTINUE
+ #define FTW_CONTINUE 0
+#endif
+
+int asprintf(char **string_ptr, const char *format, ...);
+
+int vasprintf (char **result, const char *format, va_list args);
char* strsep(char** str, const char* delims);
int solaris_listxattr(const char *path, char *list, size_t size);
int solaris_removexattr(const char *path, const char* key);
-int solaris_getxattr(const char *path, const char* key,
+int solaris_getxattr(const char *path, const char* key,
char *value, size_t size);
-int solaris_setxattr(const char *path, const char* key, const char *value,
+int solaris_setxattr(const char *path, const char* key, const char *value,
size_t size, int flags);
int solaris_fgetxattr(int fd, const char* key,
char *value, size_t size);
-int solaris_fsetxattr(int fd, const char* key, const char *value,
+int solaris_fsetxattr(int fd, const char* key, const char *value,
size_t size, int flags);
int solaris_flistxattr(int fd, char *list, size_t size);
+int solaris_rename (const char *oldpath, const char *newpath);
+
+int solaris_unlink (const char *pathname);
+
+char *mkdtemp (char *temp);
+
+#define GF_SOLARIS_XATTR_DIR ".glusterfs_xattr_inode"
+
+int solaris_xattr_resolve_path (const char *real_path, char **path);
+
#endif /* GF_SOLARIS_HOST_OS */
#ifndef HAVE_ARGP
@@ -241,7 +301,7 @@ int solaris_flistxattr(int fd, char *list, size_t size);
#endif /* HAVE_ARGP */
#ifndef HAVE_STRNLEN
-size_t strnlen(const char *string, size_t maxlen);
+size_t strnlen(const char *string, size_t maxlen);
#endif /* STRNLEN */
#ifndef strdupa
@@ -253,9 +313,9 @@ size_t strnlen(const char *string, size_t maxlen);
char *__new = (char *) __builtin_alloca (__len); \
(char *) memcpy (__new, __old, __len); \
}))
-#endif
+#endif
-#define ALIGN(x) (((x) + sizeof (uint64_t) - 1) & ~(sizeof (uint64_t) - 1))
+#define GF_DIR_ALIGN(x) (((x) + sizeof (uint64_t) - 1) & ~(sizeof (uint64_t) - 1))
#include <sys/types.h>
#include <dirent.h>
@@ -263,50 +323,35 @@ size_t strnlen(const char *string, size_t maxlen);
static inline int32_t
dirent_size (struct dirent *entry)
{
+ int32_t size = -1;
+
#ifdef GF_BSD_HOST_OS
- return ALIGN (24 /* FIX MEEEE!!! */ + entry->d_namlen);
+ size = GF_DIR_ALIGN (24 /* FIX MEEEE!!! */ + entry->d_namlen);
#endif
#ifdef GF_DARWIN_HOST_OS
- return ALIGN (24 /* FIX MEEEE!!! */ + entry->d_namlen);
+ size = GF_DIR_ALIGN (24 /* FIX MEEEE!!! */ + entry->d_namlen);
#endif
#ifdef GF_LINUX_HOST_OS
- return ALIGN (24 /* FIX MEEEE!!! */ + entry->d_reclen);
+ size = GF_DIR_ALIGN (24 /* FIX MEEEE!!! */ + entry->d_reclen);
#endif
#ifdef GF_SOLARIS_HOST_OS
- return ALIGN (24 /* FIX MEEEE!!! */ + entry->d_reclen);
-#endif
-}
-
-
-static inline int32_t
-gf_compat_getxattr (const char *key, dict_t *dict)
-{
-#ifdef GF_DARWIN_HOST_OS
- return gf_darwin_compat_getxattr (key, dict);
+ size = GF_DIR_ALIGN (24 /* FIX MEEEE!!! */ + entry->d_reclen);
#endif
- return -1;
+ return size;
}
+#ifdef THREAD_UNSAFE_BASENAME
+char *basename_r(const char *);
+#define basename(path) basename_r(path)
+#endif /* THREAD_UNSAFE_BASENAME */
-static inline int32_t
-gf_compat_setxattr (dict_t *dict)
-{
-#ifdef GF_DARWIN_HOST_OS
- return gf_darwin_compat_setxattr (dict);
-#endif
- return -1;
-}
-
-
-static inline int32_t
-gf_compat_listxattr (int len, dict_t *dict, int size)
-{
-#ifdef GF_DARWIN_HOST_OS
- return gf_darwin_compat_listxattr (len, dict, size);
-#endif
- return len;
-}
+#ifdef THREAD_UNSAFE_DIRNAME
+char *dirname_r(char *path);
+#define dirname(path) dirname_r(path)
+#endif /* THREAD_UNSAFE_DIRNAME */
+int gf_mkostemp (char *tmpl, int suffixlen, int flags);
+#define mkostemp(tmpl, flags) gf_mkostemp(tmpl, 0, flags);
#ifdef HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC
/* Linux, Solaris, Cygwin */
@@ -333,4 +378,25 @@ gf_compat_listxattr (int len, dict_t *dict, int size)
#define ST_CTIM_NSEC_SET(stbuf, val) do { } while (0);
#endif
+#ifndef IXDR_GET_LONG
+#define IXDR_GET_LONG(buf) ((long)IXDR_GET_U_INT32(buf))
+#endif
+
+#ifndef IXDR_PUT_LONG
+#define IXDR_PUT_LONG(buf, v) ((long)IXDR_PUT_INT32(buf, (long)(v)))
+#endif
+
+#ifndef IXDR_GET_U_LONG
+#define IXDR_GET_U_LONG(buf) ((u_long)IXDR_GET_LONG(buf))
+#endif
+
+#ifndef IXDR_PUT_U_LONG
+#define IXDR_PUT_U_LONG(buf, v) IXDR_PUT_LONG(buf, (long)(v))
+#endif
+
+#if defined(__GNUC__) && !defined(RELAX_POISONING)
+/* Use run API, see run.h */
+#pragma GCC poison system popen
+#endif
+
#endif /* __COMPAT_H__ */
diff --git a/libglusterfs/src/ctx.c b/libglusterfs/src/ctx.c
new file mode 100644
index 000000000..0082601d4
--- /dev/null
+++ b/libglusterfs/src/ctx.c
@@ -0,0 +1,48 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif /* !_CONFIG_H */
+
+#include <pthread.h>
+
+#include "glusterfs.h"
+
+glusterfs_ctx_t *
+glusterfs_ctx_new ()
+{
+ int ret = 0;
+ glusterfs_ctx_t *ctx = NULL;
+
+ /* no GF_CALLOC here, gf_acct_mem_set_enable is not
+ yet decided at this point */
+ ctx = calloc (1, sizeof (*ctx));
+ if (!ctx) {
+ ret = -1;
+ goto out;
+ }
+
+ INIT_LIST_HEAD (&ctx->graphs);
+ INIT_LIST_HEAD (&ctx->mempool_list);
+
+ ctx->daemon_pipe[0] = -1;
+ ctx->daemon_pipe[1] = -1;
+
+ ret = pthread_mutex_init (&ctx->lock, NULL);
+ if (ret) {
+ free (ctx);
+ ctx = NULL;
+ }
+out:
+ return ctx;
+}
+
diff --git a/libglusterfs/src/daemon.c b/libglusterfs/src/daemon.c
new file mode 100644
index 000000000..348e3ad40
--- /dev/null
+++ b/libglusterfs/src/daemon.c
@@ -0,0 +1,66 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdio.h>
+#include "daemon.h"
+
+int
+os_daemon_return (int nochdir, int noclose)
+{
+ pid_t pid = -1;
+ int ret = -1;
+ FILE *ptr = NULL;
+
+ ret = fork();
+ if (ret)
+ return ret;
+
+ pid = setsid();
+
+ if (pid == -1) {
+ ret = -1;
+ goto out;
+ }
+
+ if (!nochdir)
+ ret = chdir("/");
+
+ if (!noclose) {
+ ptr = freopen (DEVNULLPATH, "r", stdin);
+ if (!ptr)
+ goto out;
+
+ ptr = freopen (DEVNULLPATH, "w", stdout);
+ if (!ptr)
+ goto out;
+
+ ptr = freopen (DEVNULLPATH, "w", stderr);
+ if (!ptr)
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+os_daemon (int nochdir, int noclose)
+{
+ int ret = -1;
+
+ ret = os_daemon_return (nochdir, noclose);
+ if (ret <= 0)
+ return ret;
+
+ _exit (0);
+}
diff --git a/libglusterfs/src/daemon.h b/libglusterfs/src/daemon.h
new file mode 100644
index 000000000..80836a326
--- /dev/null
+++ b/libglusterfs/src/daemon.h
@@ -0,0 +1,23 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _DAEMON_H
+#define _DAEMON_H
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#define DEVNULLPATH "/dev/null"
+
+int os_daemon_return(int nochdir, int noclose);
+int os_daemon(int nochdir, int noclose);
+#endif /*_DAEMON_H */
diff --git a/libglusterfs/src/defaults.c b/libglusterfs/src/defaults.c
index ef8104e59..2ebb25150 100644
--- a/libglusterfs/src/defaults.c
+++ b/libglusterfs/src/defaults.c
@@ -1,28 +1,22 @@
/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
/* libglusterfs/src/defaults.c:
- This file contains functions, which are used to fill the 'fops' and 'mops'
+ This file contains functions, which are used to fill the 'fops', 'cbk'
structures in the xlator structures, if they are not written. Here, all the
function calls are plainly forwared to the first child of the xlator, and
all the *_cbk function does plain STACK_UNWIND of the frame, and returns.
+ This function also implements *_resume () functions, which does same
+ operation as a fop().
+
All the functions are plain enough to understand.
*/
@@ -33,1494 +27,1432 @@
#include "xlator.h"
-static int32_t
-default_lookup_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf,
- dict_t *dict)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- inode,
- buf,
- dict);
- return 0;
+/* _CBK function section */
+
+int32_t
+default_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
+{
+ STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf,
+ xdata, postparent);
+ return 0;
}
int32_t
-default_lookup (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *xattr_req)
+default_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
{
- STACK_WIND (frame,
- default_lookup_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup,
- loc,
- xattr_req);
- return 0;
+ STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
}
int32_t
-default_forget (xlator_t *this,
- inode_t *inode)
+default_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf,
+ dict_t *xdata)
{
- return 0;
+ STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+ return 0;
}
-static int32_t
-default_stat_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+int32_t
+default_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf,
+ dict_t *xdata)
{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
+ STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+ return 0;
}
int32_t
-default_stat (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
+default_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
{
- STACK_WIND (frame,
- default_stat_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat,
- loc);
- return 0;
+ STACK_UNWIND_STRICT (access, frame, op_ret, op_errno, xdata);
+ return 0;
}
-static int32_t
-default_chmod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+int32_t
+default_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, const char *path,
+ struct iatt *buf, dict_t *xdata)
{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
+ STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, path, buf,
+ xdata);
+ return 0;
}
+
int32_t
-default_chmod (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode)
+default_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- STACK_WIND (frame,
- default_chmod_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->chmod,
- loc,
- mode);
- return 0;
+ STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno, inode,
+ buf, preparent, postparent, xdata);
+ return 0;
}
+int32_t
+default_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, inode,
+ buf, preparent, postparent, xdata);
+ return 0;
+}
-static int32_t
-default_fchmod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+int32_t
+default_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
+ STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno, preparent,
+ postparent, xdata);
+ return 0;
}
int32_t
-default_fchmod (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- mode_t mode)
+default_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent,
+ dict_t *xdata)
{
- STACK_WIND (frame,
- default_fchmod_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fchmod,
- fd,
- mode);
- return 0;
+ STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno, preparent,
+ postparent, xdata);
+ return 0;
}
-static int32_t
-default_chown_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+
+int32_t
+default_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
+ STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
}
+
int32_t
-default_chown (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- uid_t uid,
- gid_t gid)
+default_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
- STACK_WIND (frame,
- default_chown_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->chown,
- loc,
- uid,
- gid);
- return 0;
+ STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, buf, preoldparent,
+ postoldparent, prenewparent, postnewparent, xdata);
+ return 0;
}
-static int32_t
-default_fchown_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+
+int32_t
+default_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent,
+ dict_t *xdata)
{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
+ STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
}
+
int32_t
-default_fchown (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- uid_t uid,
- gid_t gid)
+default_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent,
+ dict_t *xdata)
{
- STACK_WIND (frame,
- default_fchown_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fchown,
- fd,
- uid,
- gid);
- return 0;
+ STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
}
-static int32_t
-default_truncate_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+int32_t
+default_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ dict_t *xdata)
{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
+ STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
+ return 0;
}
int32_t
-default_truncate (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- off_t offset)
+default_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata)
{
- STACK_WIND (frame,
- default_truncate_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate,
- loc,
- offset);
- return 0;
+ STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count,
+ stbuf, iobref, xdata);
+ return 0;
}
-static int32_t
-default_ftruncate_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+
+int32_t
+default_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf,
+ dict_t *xdata)
{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
}
+
int32_t
-default_ftruncate (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- off_t offset)
+default_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
{
- STACK_WIND (frame,
- default_ftruncate_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate,
- fd,
- offset);
- return 0;
+ STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, xdata);
+ return 0;
}
+
+
int32_t
-default_utimens_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+default_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf,
+ dict_t *xdata)
{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
+ STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
}
+int32_t
+default_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
int32_t
-default_utimens (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- struct timespec tv[2])
+default_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ dict_t *xdata)
{
- STACK_WIND (frame,
- default_utimens_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->utimens,
- loc,
- tv);
- return 0;
+ STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd, xdata);
+ return 0;
}
-static int32_t
-default_access_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+int32_t
+default_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
{
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- return 0;
+ STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno, xdata);
+ return 0;
}
int32_t
-default_access (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t mask)
+default_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf,
+ dict_t *xdata)
{
- STACK_WIND (frame,
- default_access_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->access,
- loc,
- mask);
- return 0;
+ STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf, xdata);
+ return 0;
}
-static int32_t
-default_readlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- const char *path)
+int32_t
+default_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- path);
- return 0;
+ STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata);
+ return 0;
}
+
int32_t
-default_readlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- size_t size)
+default_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
{
- STACK_WIND (frame,
- default_readlink_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readlink,
- loc,
- size);
- return 0;
+ STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xdata);
+ return 0;
}
-static int32_t
-default_mknod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf)
+
+int32_t
+default_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- inode,
- buf);
- return 0;
+ STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
}
+
int32_t
-default_mknod (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode,
- dev_t rdev)
+default_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
- STACK_WIND (frame,
- default_mknod_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mknod,
- loc, mode, rdev);
- return 0;
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
}
-static int32_t
-default_mkdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- inode,
- buf);
- return 0;
+int32_t
+default_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, dict, xdata);
+ return 0;
}
int32_t
-default_mkdir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode)
+default_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
- STACK_WIND (frame,
- default_mkdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mkdir,
- loc, mode);
- return 0;
+ STACK_UNWIND_STRICT (fxattrop, frame, op_ret, op_errno, dict, xdata);
+ return 0;
}
-static int32_t
-default_unlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+
+int32_t
+default_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, xdata);
+ return 0;
}
+
int32_t
-default_unlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
+default_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
{
- STACK_WIND (frame,
- default_unlink_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink,
- loc);
- return 0;
+ STACK_UNWIND_STRICT (fremovexattr, frame, op_ret, op_errno, xdata);
+ return 0;
}
-static int32_t
-default_rmdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+int32_t
+default_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
{
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- return 0;
+ STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, lock, xdata);
+ return 0;
}
int32_t
-default_rmdir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
+default_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
{
- STACK_WIND (frame,
- default_rmdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rmdir,
- loc);
- return 0;
+ STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno, xdata);
+ return 0;
}
-static int32_t
-default_symlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf)
+int32_t
+default_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
- return 0;
+ STACK_UNWIND_STRICT (finodelk, frame, op_ret, op_errno, xdata);
+ return 0;
}
int32_t
-default_symlink (call_frame_t *frame,
- xlator_t *this,
- const char *linkpath,
- loc_t *loc)
+default_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
{
- STACK_WIND (frame,
- default_symlink_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->symlink,
- linkpath, loc);
- return 0;
+ STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+default_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (fentrylk, frame, op_ret, op_errno, xdata);
+ return 0;
}
-static int32_t
-default_rename_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+int32_t
+default_rchecksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, uint32_t weak_checksum,
+ uint8_t *strong_checksum,
+ dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
+ STACK_UNWIND_STRICT (rchecksum, frame, op_ret, op_errno, weak_checksum,
+ strong_checksum, xdata);
+ return 0;
}
+
int32_t
-default_rename (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc)
+default_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
{
- STACK_WIND (frame,
- default_rename_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rename,
- oldloc, newloc);
- return 0;
+ STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, entries, xdata);
+ return 0;
}
-static int32_t
-default_link_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf)
+int32_t
+default_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
- return 0;
+ STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata);
+ return 0;
}
int32_t
-default_link (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc)
+default_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost,
+ dict_t *xdata)
{
- STACK_WIND (frame,
- default_link_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->link,
- oldloc, newloc);
- return 0;
+ STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, statpre,
+ statpost, xdata);
+ return 0;
}
+int32_t
+default_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost,
+ dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (fsetattr, frame, op_ret, op_errno, statpre,
+ statpost, xdata);
+ return 0;
+}
-static int32_t
-default_create_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd,
- inode_t *inode,
- struct stat *buf)
+int32_t
+default_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf);
+ STACK_UNWIND_STRICT(fallocate, frame, op_ret, op_errno, pre, post, xdata);
return 0;
}
int32_t
-default_create (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags,
- mode_t mode, fd_t *fd)
+default_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
{
- STACK_WIND (frame, default_create_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create,
- loc, flags, mode, fd);
+ STACK_UNWIND_STRICT(discard, frame, op_ret, op_errno, pre, post, xdata);
return 0;
}
-static int32_t
-default_open_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
+int32_t
+default_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- fd);
- return 0;
+ STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, pre,
+ post, xdata);
+ return 0;
}
+
int32_t
-default_open (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags, fd_t *fd)
+default_getspec_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, char *spec_data)
{
- STACK_WIND (frame,
- default_open_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open,
- loc, flags, fd);
- return 0;
+ STACK_UNWIND_STRICT (getspec, frame, op_ret, op_errno, spec_data);
+ return 0;
}
-static int32_t
-default_readv_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iovec *vector,
- int32_t count,
- struct stat *stbuf,
- struct iobref *iobref)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- vector,
- count,
- stbuf,
- iobref);
- return 0;
+/* RESUME */
+
+int32_t
+default_fgetxattr_resume (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ STACK_WIND (frame, default_fgetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata);
+ return 0;
}
int32_t
-default_readv (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset)
+default_fsetxattr_resume (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *dict, int32_t flags, dict_t *xdata)
{
- STACK_WIND (frame,
- default_readv_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readv,
- fd,
- size,
- offset);
- return 0;
+ STACK_WIND (frame, default_fsetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
+ return 0;
}
+int32_t
+default_setxattr_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ STACK_WIND (frame, default_setxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, xdata);
+ return 0;
+}
-static int32_t
-default_writev_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *stbuf)
+int32_t
+default_statfs_resume (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- stbuf);
- return 0;
+ STACK_WIND (frame, default_statfs_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->statfs, loc, xdata);
+ return 0;
}
int32_t
-default_writev (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iovec *vector,
- int32_t count,
- off_t off,
- struct iobref *iobref)
-{
- STACK_WIND (frame,
- default_writev_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev,
- fd,
- vector,
- count,
- off,
- iobref);
- return 0;
+default_fsyncdir_resume (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t flags, dict_t *xdata)
+{
+ STACK_WIND (frame, default_fsyncdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsyncdir, fd, flags, xdata);
+ return 0;
}
-static int32_t
-default_flush_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+int32_t
+default_opendir_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ fd_t *fd, dict_t *xdata)
{
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- return 0;
+ STACK_WIND (frame, default_opendir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->opendir, loc, fd, xdata);
+ return 0;
}
int32_t
-default_flush (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd)
+default_fstat_resume (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- STACK_WIND (frame,
- default_flush_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush,
- fd);
- return 0;
+ STACK_WIND (frame, default_fstat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
+ return 0;
}
+int32_t
+default_fsync_resume (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t flags, dict_t *xdata)
+{
+ STACK_WIND (frame, default_fsync_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsync, fd, flags, xdata);
+ return 0;
+}
-static int32_t
-default_fsync_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+int32_t
+default_flush_resume (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- return 0;
+ STACK_WIND (frame, default_flush_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush, fd, xdata);
+ return 0;
}
int32_t
-default_fsync (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t flags)
+default_writev_resume (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t off,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
- STACK_WIND (frame,
- default_fsync_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsync,
- fd,
- flags);
- return 0;
+ STACK_WIND (frame, default_writev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, vector, count, off,
+ flags, iobref, xdata);
+ return 0;
}
-static int32_t
-default_fstat_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+int32_t
+default_readv_resume (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t offset, uint32_t flags, dict_t *xdata)
{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
+ STACK_WIND (frame, default_readv_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata);
+ return 0;
}
+
int32_t
-default_fstat (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd)
+default_open_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flags, fd_t *fd, dict_t *xdata)
{
- STACK_WIND (frame,
- default_fstat_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat,
- fd);
- return 0;
+ STACK_WIND (frame, default_open_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
+ return 0;
}
-static int32_t
-default_opendir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
+int32_t
+default_create_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flags, mode_t mode, mode_t umask, fd_t *fd,
+ dict_t *xdata)
{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- fd);
- return 0;
+ STACK_WIND (frame, default_create_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create, loc, flags, mode, umask,
+ fd, xdata);
+ return 0;
}
int32_t
-default_opendir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc, fd_t *fd)
+default_link_resume (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
{
- STACK_WIND (frame,
- default_opendir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->opendir,
- loc, fd);
- return 0;
+ STACK_WIND (frame, default_link_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
+ return 0;
+}
+
+int32_t
+default_rename_resume (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
+{
+ STACK_WIND (frame, default_rename_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
+ return 0;
}
-static int32_t
-default_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entries,
- int32_t count)
+int
+default_symlink_resume (call_frame_t *frame, xlator_t *this,
+ const char *linkpath, loc_t *loc, mode_t umask,
+ dict_t *xdata)
{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- entries,
- count);
- return 0;
+ STACK_WIND (frame, default_symlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->symlink, linkpath, loc, umask,
+ xdata);
+ return 0;
}
int32_t
-default_getdents (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset,
- int32_t flag)
+default_rmdir_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int flags, dict_t *xdata)
{
- STACK_WIND (frame,
- default_getdents_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getdents,
- fd,
- size,
- offset,
- flag);
- return 0;
+ STACK_WIND (frame, default_rmdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rmdir, loc, flags, xdata);
+ return 0;
+}
+
+int32_t
+default_unlink_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int xflag, dict_t *xdata)
+{
+ STACK_WIND (frame, default_unlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
+ return 0;
}
+int
+default_mkdir_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ mode_t mode, mode_t umask, dict_t *xdata)
+{
+ STACK_WIND (frame, default_mkdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata);
+ return 0;
+}
-static int32_t
-default_setdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+
+int
+default_mknod_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata)
{
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- return 0;
+ STACK_WIND (frame, default_mknod_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask,
+ xdata);
+ return 0;
}
int32_t
-default_setdents (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t flags,
- dir_entry_t *entries,
- int32_t count)
+default_readlink_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ size_t size, dict_t *xdata)
{
- STACK_WIND (frame,
- default_setdents_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setdents,
- fd,
- flags,
- entries,
- count);
- return 0;
+ STACK_WIND (frame, default_readlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readlink, loc, size, xdata);
+ return 0;
}
-static int32_t
-default_fsyncdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+int32_t
+default_access_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t mask, dict_t *xdata)
{
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- return 0;
+ STACK_WIND (frame, default_access_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->access, loc, mask, xdata);
+ return 0;
}
int32_t
-default_fsyncdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t flags)
+default_ftruncate_resume (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, dict_t *xdata)
{
- STACK_WIND (frame,
- default_fsyncdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsyncdir,
- fd,
- flags);
- return 0;
+ STACK_WIND (frame, default_ftruncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
+ return 0;
}
+int32_t
+default_getxattr_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ STACK_WIND (frame, default_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
+ return 0;
+}
-static int32_t
-default_statfs_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct statvfs *buf)
+
+int32_t
+default_xattrop_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
+ STACK_WIND (frame, default_xattrop_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->xattrop, loc, flags, dict, xdata);
+ return 0;
}
int32_t
-default_statfs (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
+default_fxattrop_resume (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
- STACK_WIND (frame,
- default_statfs_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->statfs,
- loc);
- return 0;
+ STACK_WIND (frame, default_fxattrop_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fxattrop, fd, flags, dict, xdata);
+ return 0;
}
+int32_t
+default_removexattr_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ STACK_WIND (frame, default_removexattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
+ return 0;
+}
-static int32_t
-default_setxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+int32_t
+default_fremovexattr_resume (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
{
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- return 0;
+ STACK_WIND (frame, default_fremovexattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata);
+ return 0;
}
int32_t
-default_setxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *dict,
- int32_t flags)
+default_lk_resume (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t cmd, struct gf_flock *lock, dict_t *xdata)
{
- STACK_WIND (frame,
- default_setxattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr,
- loc,
- dict,
- flags);
- return 0;
+ STACK_WIND (frame, default_lk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lk, fd, cmd, lock, xdata);
+ return 0;
}
-static int32_t
-default_fsetxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+int32_t
+default_inodelk_resume (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc, int32_t cmd,
+ struct gf_flock *lock,
+ dict_t *xdata)
{
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- return 0;
+ STACK_WIND (frame, default_inodelk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->inodelk,
+ volume, loc, cmd, lock, xdata);
+ return 0;
}
int32_t
-default_fsetxattr (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- dict_t *dict,
- int32_t flags)
+default_finodelk_resume (call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd, int32_t cmd,
+ struct gf_flock *lock,
+ dict_t *xdata)
{
- STACK_WIND (frame,
- default_fsetxattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetxattr,
- fd,
- dict,
- flags);
- return 0;
+ STACK_WIND (frame, default_finodelk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ volume, fd, cmd, lock, xdata);
+ return 0;
+}
+
+int32_t
+default_entrylk_resume (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
+{
+ STACK_WIND (frame, default_entrylk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->entrylk,
+ volume, loc, basename, cmd, type, xdata);
+ return 0;
}
+int32_t
+default_fentrylk_resume (call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd, const char *basename,
+ entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
+{
+ STACK_WIND (frame, default_fentrylk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fentrylk,
+ volume, fd, basename, cmd, type, xdata);
+ return 0;
+}
-static int32_t
-default_fgetxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict)
+int32_t
+default_rchecksum_resume (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, int32_t len,
+ dict_t *xdata)
{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- dict);
- return 0;
+ STACK_WIND (frame, default_rchecksum_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rchecksum, fd, offset, len, xdata);
+ return 0;
}
int32_t
-default_fgetxattr (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- const char *name)
+default_readdir_resume (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t off,
+ dict_t *xdata)
{
- STACK_WIND (frame,
- default_fgetxattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fgetxattr,
- fd,
- name);
- return 0;
+ STACK_WIND (frame, default_readdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdir, fd, size, off, xdata);
+ return 0;
}
-static int32_t
-default_getxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict)
+
+int32_t
+default_readdirp_resume (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t off, dict_t *xdata)
{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- dict);
- return 0;
+ STACK_WIND (frame, default_readdirp_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp, fd, size, off, xdata);
+ return 0;
}
int32_t
-default_getxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name)
+default_setattr_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid,
+ dict_t *xdata)
{
- STACK_WIND (frame,
- default_getxattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr,
- loc,
- name);
- return 0;
+ STACK_WIND (frame, default_setattr_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->setattr, loc, stbuf, valid, xdata);
+ return 0;
}
int32_t
-default_xattrop_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict)
+default_truncate_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ off_t offset,
+ dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, dict);
- return 0;
+ STACK_WIND (frame, default_truncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
+ return 0;
}
int32_t
-default_xattrop (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- gf_xattrop_flags_t flags,
- dict_t *dict)
+default_stat_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
{
- STACK_WIND (frame,
- default_xattrop_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->xattrop,
- loc,
- flags,
- dict);
- return 0;
+ STACK_WIND (frame, default_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, loc, xdata);
+ return 0;
}
int32_t
-default_fxattrop_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict)
+default_lookup_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, dict);
- return 0;
+ STACK_WIND (frame, default_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
+ return 0;
}
int32_t
-default_fxattrop (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- gf_xattrop_flags_t flags,
- dict_t *dict)
+default_fsetattr_resume (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid,
+ dict_t *xdata)
{
- STACK_WIND (frame,
- default_fxattrop_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fxattrop,
- fd,
- flags,
- dict);
- return 0;
+ STACK_WIND (frame, default_fsetattr_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fsetattr, fd, stbuf, valid, xdata);
+ return 0;
}
+int32_t
+default_fallocate_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t keep_size, off_t offset, size_t len, dict_t *xdata)
+{
+ STACK_WIND(frame, default_fallocate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fallocate, fd, keep_size, offset, len,
+ xdata);
+ return 0;
+}
-static int32_t
-default_removexattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+int32_t
+default_discard_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, size_t len, dict_t *xdata)
{
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- return 0;
+ STACK_WIND(frame, default_discard_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->discard, fd, offset, len,
+ xdata);
+ return 0;
}
int32_t
-default_removexattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name)
+default_zerofill_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, size_t len, dict_t *xdata)
{
- STACK_WIND (frame,
- default_removexattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->removexattr,
- loc,
- name);
- return 0;
+ STACK_WIND(frame, default_zerofill_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->zerofill, fd, offset, len,
+ xdata);
+ return 0;
}
-static int32_t
-default_lk_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct flock *lock)
+
+/* FOPS */
+
+int32_t
+default_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- lock);
- return 0;
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata);
+ return 0;
}
int32_t
-default_lk (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t cmd,
- struct flock *lock)
+default_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
{
- STACK_WIND (frame,
- default_lk_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lk,
- fd,
- cmd,
- lock);
- return 0;
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags,
+ xdata);
+ return 0;
}
+int32_t
+default_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, loc, dict, flags,
+ xdata);
+ return 0;
+}
-static int32_t
-default_inodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+int32_t
+default_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->statfs, loc, xdata);
+ return 0;
+}
+int32_t
+default_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsyncdir, fd, flags, xdata);
+ return 0;
}
+int32_t
+default_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, dict_t *xdata)
+{
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->opendir, loc, fd, xdata);
+ return 0;
+}
int32_t
-default_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd,
- struct flock *lock)
-{
- STACK_WIND (frame,
- default_inodelk_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->inodelk,
- volume, loc, cmd, lock);
- return 0;
+default_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
+ return 0;
}
+int32_t
+default_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict_t *xdata)
+{
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsync, fd, flags, xdata);
+ return 0;
+}
+
+int32_t
+default_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush, fd, xdata);
+ return 0;
+}
-static int32_t
-default_finodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+int32_t
+default_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t off, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
+{
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, vector, count,
+ off, flags, iobref, xdata);
+ return 0;
+}
+int32_t
+default_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset,
+ flags, xdata);
+ return 0;
}
int32_t
-default_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct flock *lock)
+default_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
{
- STACK_WIND (frame,
- default_finodelk_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->finodelk,
- volume, fd, cmd, lock);
- return 0;
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
+ return 0;
}
+int32_t
+default_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create, loc, flags, mode,
+ umask, fd, xdata);
+ return 0;
+}
-static int32_t
-default_entrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+int32_t
+default_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
+ return 0;
+}
+int32_t
+default_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename, oldloc, newloc,
+ xdata);
+ return 0;
+}
+
+
+int
+default_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata)
+{
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->symlink, linkpath, loc,
+ umask, xdata);
+ return 0;
}
int32_t
-default_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+default_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
{
- STACK_WIND (frame, default_entrylk_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->entrylk,
- volume, loc, basename, cmd, type);
- return 0;
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rmdir, loc, flags, xdata);
+ return 0;
+}
+
+int32_t
+default_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
+{
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
+ return 0;
+}
+
+int
+default_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
+{
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir, loc, mode, umask,
+ xdata);
+ return 0;
}
-static int32_t
-default_fentrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+int
+default_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod, loc, mode, rdev,
+ umask, xdata);
+ return 0;
}
int32_t
-default_fentrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+default_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size, dict_t *xdata)
{
- STACK_WIND (frame, default_fentrylk_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fentrylk,
- volume, fd, basename, cmd, type);
- return 0;
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readlink, loc, size, xdata);
+ return 0;
}
-/* Management operations */
+int32_t
+default_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask, dict_t *xdata)
+{
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->access, loc, mask, xdata);
+ return 0;
+}
-static int32_t
-default_stats_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct xlator_stats *stats)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- stats);
- return 0;
+int32_t
+default_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, dict_t *xdata)
+{
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
+ return 0;
+}
+
+int32_t
+default_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
+ return 0;
}
int32_t
-default_stats (call_frame_t *frame,
- xlator_t *this,
- int32_t flags)
+default_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
- STACK_WIND (frame,
- default_stats_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->mops->stats,
- flags);
- return 0;
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->xattrop, loc, flags, dict,
+ xdata);
+ return 0;
}
-static int32_t
-default_getspec_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- char *spec_data)
+int32_t
+default_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- spec_data);
- return 0;
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fxattrop, fd, flags, dict,
+ xdata);
+ return 0;
+}
+
+int32_t
+default_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, loc, name,
+ xdata);
+ return 0;
}
+int32_t
+default_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr, fd, name,
+ xdata);
+ return 0;
+}
int32_t
-default_getspec (call_frame_t *frame,
- xlator_t *this,
- const char *key,
- int32_t flags)
+default_lk (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t cmd, struct gf_flock *lock, dict_t *xdata)
{
- STACK_WIND (frame,
- default_getspec_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->mops->getspec,
- key, flags);
- return 0;
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lk, fd, cmd, lock, xdata);
+ return 0;
}
-static int32_t
-default_log_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+int32_t
+default_inodelk (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc, int32_t cmd,
+ struct gf_flock *lock,
+ dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->inodelk, volume, loc, cmd,
+ lock, xdata);
+ return 0;
}
+int32_t
+default_finodelk (call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *lock,
+ dict_t *xdata)
+{
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk, volume, fd, cmd,
+ lock, xdata);
+ return 0;
+}
int32_t
-default_log (call_frame_t *frame,
- xlator_t *this,
- const char *msg)
+default_entrylk (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
{
- STACK_WIND (frame,
- default_log_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->mops->log,
- msg);
- return 0;
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->entrylk, volume, loc,
+ basename, cmd, type, xdata);
+ return 0;
}
+int32_t
+default_fentrylk (call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd, const char *basename,
+ entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
+{
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fentrylk, volume, fd,
+ basename, cmd, type, xdata);
+ return 0;
+}
-static int32_t
-default_checksum_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- uint8_t *file_checksum,
- uint8_t *dir_checksum)
+int32_t
+default_rchecksum (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ int32_t len,
+ dict_t *xdata)
{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- file_checksum,
- dir_checksum);
- return 0;
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rchecksum, fd, offset, len,
+ xdata);
+ return 0;
}
int32_t
-default_checksum (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flag)
+default_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t off,
+ dict_t *xdata)
{
- STACK_WIND (frame,
- default_checksum_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->checksum,
- loc,
- flag);
- return 0;
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdir, fd, size, off,
+ xdata);
+ return 0;
}
int32_t
-default_readdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- gf_dirent_t *entries)
+default_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t off, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, entries);
- return 0;
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp, fd, size, off,
+ xdata);
+ return 0;
}
+int32_t
+default_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid,
+ dict_t *xdata)
+{
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->setattr, loc, stbuf, valid,
+ xdata);
+ return 0;
+}
int32_t
-default_readdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t off)
+default_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
{
- STACK_WIND (frame,
- default_readdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readdir,
- fd, size, off);
- return 0;
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
+ return 0;
}
+int32_t
+default_stat (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, loc, xdata);
+ return 0;
+}
int32_t
-default_lock_notify_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+default_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
+ return 0;
}
+int32_t
+default_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid,
+ dict_t *xdata)
+{
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fsetattr, fd, stbuf, valid,
+ xdata);
+ return 0;
+}
int32_t
-default_lock_fnotify_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+default_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t keep_size, off_t offset, size_t len, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fallocate, fd, keep_size, offset,
+ len, xdata);
return 0;
}
-
int32_t
-default_lock_notify (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t timeout)
+default_discard(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, size_t len, dict_t *xdata)
{
- STACK_WIND (frame,
- default_lock_notify_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->lock_notify,
- loc, timeout);
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->discard, fd, offset, len,
+ xdata);
return 0;
}
+int32_t
+default_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->zerofill, fd, offset, len,
+ xdata);
+ return 0;
+}
+
int32_t
-default_lock_fnotify (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t timeout)
+default_forget (xlator_t *this, inode_t *inode)
{
- STACK_WIND (frame,
- default_lock_notify_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->lock_fnotify,
- fd, timeout);
- return 0;
+ gf_log_callingfn (this->name, GF_LOG_WARNING, "xlator does not "
+ "implement forget_cbk");
+ return 0;
+}
+
+
+int32_t
+default_releasedir (xlator_t *this, fd_t *fd)
+{
+ gf_log_callingfn (this->name, GF_LOG_WARNING, "xlator does not "
+ "implement releasedir_cbk");
+ return 0;
+}
+
+int32_t
+default_release (xlator_t *this, fd_t *fd)
+{
+ gf_log_callingfn (this->name, GF_LOG_WARNING, "xlator does not "
+ "implement release_cbk");
+ return 0;
}
+/* End of FOP/_CBK/_RESUME section */
+
+
+/* Management operations */
+
+int32_t
+default_getspec (call_frame_t *frame, xlator_t *this, const char *key,
+ int32_t flags)
+{
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getspec, key, flags);
+ return 0;
+}
/* notify */
int
default_notify (xlator_t *this, int32_t event, void *data, ...)
{
- switch (event)
- {
- case GF_EVENT_PARENT_UP:
- {
- xlator_list_t *list = this->children;
-
- while (list)
- {
- xlator_notify (list->xlator, event, this);
- list = list->next;
- }
- }
- break;
- case GF_EVENT_CHILD_DOWN:
- case GF_EVENT_CHILD_UP:
- default:
- {
- xlator_list_t *parent = this->parents;
- while (parent) {
- if (parent->xlator->ready)
+ switch (event)
+ {
+ case GF_EVENT_PARENT_UP:
+ case GF_EVENT_PARENT_DOWN:
+ {
+ xlator_list_t *list = this->children;
+
+ while (list) {
+ xlator_notify (list->xlator, event, this);
+ list = list->next;
+ }
+ }
+ break;
+ case GF_EVENT_CHILD_CONNECTING:
+ case GF_EVENT_CHILD_MODIFIED:
+ case GF_EVENT_CHILD_DOWN:
+ case GF_EVENT_CHILD_UP:
+ case GF_EVENT_AUTH_FAILED:
+ {
+ xlator_list_t *parent = this->parents;
+ /* Handle case of CHILD_* & AUTH_FAILED event specially, send it to fuse */
+ if (!parent && this->ctx && this->ctx->master)
+ xlator_notify (this->ctx->master, event, this->graph, NULL);
+
+ while (parent) {
+ if (parent->xlator->init_succeeded)
xlator_notify (parent->xlator, event,
this, NULL);
- parent = parent->next;
- }
- }
- }
+ parent = parent->next;
+ }
+ }
+ break;
+ default:
+ {
+ xlator_list_t *parent = this->parents;
+ while (parent) {
+ if (parent->xlator->init_succeeded)
+ xlator_notify (parent->xlator, event,
+ this, NULL);
+ parent = parent->next;
+ }
+ }
+ }
- return 0;
+ return 0;
}
int32_t
-default_releasedir (xlator_t *this,
- fd_t *fd)
+default_mem_acct_init (xlator_t *this)
{
- return 0;
-}
+ int ret = -1;
-int32_t
-default_release (xlator_t *this,
- fd_t *fd)
-{
- return 0;
-}
+ ret = xlator_mem_acct_init (this, gf_common_mt_end);
+ return ret;
+}
diff --git a/libglusterfs/src/defaults.h b/libglusterfs/src/defaults.h
index 33d974520..0747027bc 100644
--- a/libglusterfs/src/defaults.h
+++ b/libglusterfs/src/defaults.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
/* libglusterfs/src/defaults.h:
@@ -31,271 +22,702 @@
#include "xlator.h"
-/* Management Operations */
+int32_t default_notify (xlator_t *this,
+ int32_t event,
+ void *data,
+ ...);
-int32_t default_stats (call_frame_t *frame,
- xlator_t *this,
- int32_t flags);
+int32_t default_forget (xlator_t *this, inode_t *inode);
-int32_t default_getspec (call_frame_t *frame,
- xlator_t *this,
- const char *key,
- int32_t flag);
+int32_t default_release (xlator_t *this, fd_t *fd);
-int32_t
-default_log (call_frame_t *frame,
- xlator_t *this,
- const char *msg);
+int32_t default_releasedir (xlator_t *this, fd_t *fd);
-int32_t default_checksum (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flag);
+/* Management Operations */
+
+int32_t default_getspec (call_frame_t *frame,
+ xlator_t *this,
+ const char *key,
+ int32_t flag);
+
+int32_t default_rchecksum (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd, off_t offset,
+ int32_t len, dict_t *xdata);
/* FileSystem operations */
int32_t default_lookup (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *xattr_req);
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *xdata);
int32_t default_stat (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc);
+ xlator_t *this,
+ loc_t *loc, dict_t *xdata);
int32_t default_fstat (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd);
-
-int32_t default_chmod (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode);
-
-int32_t default_fchmod (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- mode_t mode);
-
-int32_t default_chown (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- uid_t uid,
- gid_t gid);
-
-int32_t default_fchown (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- uid_t uid,
- gid_t gid);
+ xlator_t *this,
+ fd_t *fd, dict_t *xdata);
int32_t default_truncate (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- off_t offset);
+ xlator_t *this,
+ loc_t *loc,
+ off_t offset, dict_t *xdata);
int32_t default_ftruncate (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- off_t offset);
-
-int32_t default_utimens (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- struct timespec tv[2]);
+ xlator_t *this,
+ fd_t *fd,
+ off_t offset, dict_t *xdata);
int32_t default_access (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t mask);
+ xlator_t *this,
+ loc_t *loc,
+ int32_t mask, dict_t *xdata);
int32_t default_readlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- size_t size);
+ xlator_t *this,
+ loc_t *loc,
+ size_t size, dict_t *xdata);
-int32_t default_mknod (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode,
- dev_t rdev);
+int32_t default_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata);
-int32_t default_mkdir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode);
+int32_t default_mkdir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata);
int32_t default_unlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc);
+ xlator_t *this,
+ loc_t *loc, int xflag, dict_t *xdata);
-int32_t default_rmdir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc);
+int32_t default_rmdir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int xflag, dict_t *xdata);
-int32_t default_symlink (call_frame_t *frame,
- xlator_t *this,
- const char *linkpath,
- loc_t *loc);
+int32_t default_symlink (call_frame_t *frame, xlator_t *this,
+ const char *linkpath, loc_t *loc, mode_t umask,
+ dict_t *xdata);
int32_t default_rename (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc);
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata);
int32_t default_link (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc);
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata);
-int32_t default_create (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags,
- mode_t mode, fd_t *fd);
+int32_t default_create (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t flags, mode_t mode,
+ mode_t umask, fd_t *fd, dict_t *xdata);
int32_t default_open (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags, fd_t *fd);
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flags, fd_t *fd,
+ dict_t *xdata);
int32_t default_readv (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset);
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t offset,
+ uint32_t flags, dict_t *xdata);
int32_t default_writev (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iovec *vector,
- int32_t count,
- off_t offset,
- struct iobref *iobref);
+ xlator_t *this,
+ fd_t *fd,
+ struct iovec *vector,
+ int32_t count,
+ off_t offset,
+ uint32_t flags,
+ struct iobref *iobref, dict_t *xdata);
int32_t default_flush (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd);
+ xlator_t *this,
+ fd_t *fd, dict_t *xdata);
int32_t default_fsync (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t datasync);
+ xlator_t *this,
+ fd_t *fd,
+ int32_t datasync, dict_t *xdata);
int32_t default_opendir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc, fd_t *fd);
-
-int32_t default_getdents (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset,
- int32_t flag);
+ xlator_t *this,
+ loc_t *loc, fd_t *fd, dict_t *xdata);
int32_t default_fsyncdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t datasync);
+ xlator_t *this,
+ fd_t *fd,
+ int32_t datasync, dict_t *xdata);
int32_t default_statfs (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc);
+ xlator_t *this,
+ loc_t *loc, dict_t *xdata);
int32_t default_setxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *dict,
- int32_t flags);
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *dict,
+ int32_t flags, dict_t *xdata);
int32_t default_getxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name);
+ xlator_t *this,
+ loc_t *loc,
+ const char *name, dict_t *xdata);
int32_t default_fsetxattr (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
dict_t *dict,
- int32_t flags);
+ int32_t flags, dict_t *xdata);
int32_t default_fgetxattr (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
- const char *name);
+ const char *name, dict_t *xdata);
int32_t default_removexattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name);
+ xlator_t *this,
+ loc_t *loc,
+ const char *name, dict_t *xdata);
+
+int32_t default_fremovexattr (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ const char *name, dict_t *xdata);
int32_t default_lk (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t cmd,
- struct flock *flock);
+ xlator_t *this,
+ fd_t *fd,
+ int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata);
int32_t default_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd,
- struct flock *flock);
+ const char *volume, loc_t *loc, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata);
int32_t default_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd,
- struct flock *flock);
+ const char *volume, fd_t *fd, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata);
int32_t default_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type);
+ const char *volume, loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
int32_t default_fentrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type);
+ const char *volume, fd_t *fd, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
int32_t default_readdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size, off_t off);
-
-int32_t default_setdents (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t flags,
- dir_entry_t *entries,
- int32_t count);
+ xlator_t *this,
+ fd_t *fd,
+ size_t size, off_t off, dict_t *xdata);
+
+int32_t default_readdirp (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size, off_t off, dict_t *xdata);
int32_t default_xattrop (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- gf_xattrop_flags_t flags,
- dict_t *dict);
+ xlator_t *this,
+ loc_t *loc,
+ gf_xattrop_flags_t flags,
+ dict_t *dict, dict_t *xdata);
int32_t default_fxattrop (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ gf_xattrop_flags_t flags,
+ dict_t *dict, dict_t *xdata);
+
+int32_t default_setattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ struct iatt *stbuf,
+ int32_t valid, dict_t *xdata);
+
+int32_t default_fsetattr (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ struct iatt *stbuf,
+ int32_t valid, dict_t *xdata);
+
+int32_t default_fallocate(call_frame_t *frame,
xlator_t *this,
fd_t *fd,
- gf_xattrop_flags_t flags,
- dict_t *dict);
+ int32_t keep_size, off_t offset,
+ size_t len, dict_t *xdata);
+
+int32_t default_discard(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ off_t offset,
+ size_t len, dict_t *xdata);
+
+int32_t default_zerofill(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ off_t offset,
+ size_t len, dict_t *xdata);
+
+
+/* Resume */
+int32_t default_getspec_resume (call_frame_t *frame,
+ xlator_t *this,
+ const char *key,
+ int32_t flag);
+
+int32_t default_rchecksum_resume (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd, off_t offset,
+ int32_t len, dict_t *xdata);
+
+/* FileSystem operations */
+int32_t default_lookup_resume (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *xdata);
+
+int32_t default_stat_resume (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, dict_t *xdata);
+
+int32_t default_fstat_resume (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd, dict_t *xdata);
+
+int32_t default_truncate_resume (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ off_t offset, dict_t *xdata);
+
+int32_t default_ftruncate_resume (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ off_t offset, dict_t *xdata);
+
+int32_t default_access_resume (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t mask, dict_t *xdata);
+
+int32_t default_readlink_resume (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ size_t size, dict_t *xdata);
+
+int32_t default_mknod_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ mode_t mode, dev_t rdev, mode_t umask,
+ dict_t *xdata);
+
+int32_t default_mkdir_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ mode_t mode, mode_t umask, dict_t *xdata);
+
+int32_t default_unlink_resume (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, int xflag, dict_t *xdata);
+
+int32_t default_rmdir_resume (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int xflag, dict_t *xdata);
+
+int32_t default_symlink_resume (call_frame_t *frame, xlator_t *this,
+ const char *linkpath, loc_t *loc, mode_t umask,
+ dict_t *xdata);
+
+int32_t default_rename_resume (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata);
+
+int32_t default_link_resume (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata);
+
+int32_t default_create_resume (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t flags, mode_t mode,
+ mode_t umask, fd_t *fd, dict_t *xdata);
+
+int32_t default_open_resume (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flags, fd_t *fd, dict_t *xdata);
+
+int32_t default_readv_resume (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata);
+
+int32_t default_writev_resume (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ struct iovec *vector,
+ int32_t count,
+ off_t offset, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata);
+
+int32_t default_flush_resume (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd, dict_t *xdata);
+
+int32_t default_fsync_resume (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t datasync, dict_t *xdata);
+
+int32_t default_opendir_resume (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, fd_t *fd, dict_t *xdata);
+
+int32_t default_fsyncdir_resume (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t datasync, dict_t *xdata);
+
+int32_t default_statfs_resume (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, dict_t *xdata);
+
+int32_t default_setxattr_resume (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *dict,
+ int32_t flags, dict_t *xdata);
+
+int32_t default_getxattr_resume (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ const char *name, dict_t *xdata);
+
+int32_t default_fsetxattr_resume (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ dict_t *dict,
+ int32_t flags, dict_t *xdata);
+
+int32_t default_fgetxattr_resume (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ const char *name, dict_t *xdata);
+
+int32_t default_removexattr_resume (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ const char *name, dict_t *xdata);
+
+int32_t default_fremovexattr_resume (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ const char *name, dict_t *xdata);
+
+int32_t default_lk_resume (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata);
+
+int32_t default_inodelk_resume (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata);
+
+int32_t default_finodelk_resume (call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata);
+
+int32_t default_entrylk_resume (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
+
+int32_t default_fentrylk_resume (call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
+
+int32_t default_readdir_resume (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size, off_t off, dict_t *xdata);
+
+int32_t default_readdirp_resume (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size, off_t off, dict_t *xdata);
+
+int32_t default_xattrop_resume (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ gf_xattrop_flags_t flags,
+ dict_t *dict, dict_t *xdata);
+
+int32_t default_fxattrop_resume (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ gf_xattrop_flags_t flags,
+ dict_t *dict, dict_t *xdata);
+int32_t default_rchecksum_resume (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd, off_t offset,
+ int32_t len, dict_t *xdata);
+
+int32_t default_setattr_resume (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ struct iatt *stbuf,
+ int32_t valid, dict_t *xdata);
+
+int32_t default_fsetattr_resume (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ struct iatt *stbuf,
+ int32_t valid, dict_t *xdata);
+
+int32_t default_fallocate_resume(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t keep_size, off_t offset,
+ size_t len, dict_t *xdata);
+
+int32_t default_discard_resume(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ off_t offset,
+ size_t len, dict_t *xdata);
+
+int32_t default_zerofill_resume(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ off_t offset,
+ size_t len, dict_t *xdata);
+
+
+/* _cbk */
int32_t
-default_lock_notify (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t timeout);
+default_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent);
int32_t
-default_lock_fnotify (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t timeout);
+default_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata);
-int32_t default_notify (xlator_t *this,
- int32_t event,
- void *data,
- ...);
+int32_t
+default_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
+
+int32_t
+default_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
+
+int32_t
+default_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, const char *path,
+ struct iatt *buf, dict_t *xdata);
+
+
+int32_t
+default_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+int32_t
+default_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+int32_t
+default_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+int32_t
+default_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+
+int32_t
+default_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+
+int32_t
+default_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent, dict_t *xdata);
+
+
+int32_t
+default_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
-int32_t default_forget (xlator_t *this,
- inode_t *inode);
+int32_t
+default_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+int32_t
+default_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata);
+
+int32_t
+default_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *stbuf, struct iobref *iobref, dict_t *xdata);
+
+
+int32_t
+default_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
+
+
+int32_t
+default_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
-int32_t default_release (xlator_t *this,
- fd_t *fd);
-int32_t default_releasedir (xlator_t *this,
- fd_t *fd);
+
+int32_t
+default_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
+
+int32_t
+default_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata);
+
+int32_t
+default_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata);
+
+int32_t
+default_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf, dict_t *xdata);
+
+
+int32_t
+default_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+
+int32_t
+default_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+
+
+int32_t
+default_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata);
+
+
+int32_t
+default_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata);
+
+int32_t
+default_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata);
+
+int32_t
+default_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata);
+
+
+int32_t
+default_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock, dict_t *xdata);
+
+int32_t
+default_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+
+int32_t
+default_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+
+int32_t
+default_rchecksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, uint32_t weak_checksum,
+ uint8_t *strong_checksum, dict_t *xdata);
+
+
+int32_t
+default_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, dict_t *xdata);
+
+
+int32_t
+default_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, dict_t *xdata);
+
+int32_t
+default_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata);
+
+int32_t
+default_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata);
+
+int32_t default_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata);
+
+int32_t default_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata);
+
+int32_t default_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata);
+
+int32_t
+default_getspec_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, char *spec_data);
+
+int32_t
+default_mem_acct_init (xlator_t *this);
#endif /* _DEFAULTS_H */
diff --git a/libglusterfs/src/dict.c b/libglusterfs/src/dict.c
index fd1be7318..3b7ddce5e 100644
--- a/libglusterfs/src/dict.c
+++ b/libglusterfs/src/dict.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include <unistd.h>
@@ -22,6 +13,8 @@
#include <stdlib.h>
#include <stdio.h>
#include <inttypes.h>
+#include <limits.h>
+#include <fnmatch.h>
#ifndef _CONFIG_H
#define _CONFIG_H
@@ -35,842 +28,663 @@
#include "logging.h"
#include "compat.h"
#include "byte-order.h"
-
-data_pair_t *
-get_new_data_pair ()
-{
- data_pair_t *data_pair_ptr = NULL;
-
- data_pair_ptr = (data_pair_t *) CALLOC (1, sizeof (data_pair_t));
- ERR_ABORT (data_pair_ptr);
-
- return data_pair_ptr;
-}
+#include "globals.h"
data_t *
get_new_data ()
{
- data_t *data = NULL;
+ data_t *data = NULL;
- data = (data_t *) CALLOC (1, sizeof (data_t));
- if (!data) {
- gf_log ("dict", GF_LOG_CRITICAL,
- "calloc () returned NULL");
- return NULL;
- }
+ data = mem_get0 (THIS->ctx->dict_data_pool);
+ if (!data) {
+ return NULL;
+ }
- LOCK_INIT (&data->lock);
- return data;
+ LOCK_INIT (&data->lock);
+ return data;
}
dict_t *
get_new_dict_full (int size_hint)
{
- dict_t *dict = CALLOC (1, sizeof (dict_t));
+ dict_t *dict = mem_get0 (THIS->ctx->dict_pool);
- if (!dict) {
- gf_log ("dict", GF_LOG_CRITICAL,
- "calloc () returned NULL");
- return NULL;
- }
-
- dict->hash_size = size_hint;
- dict->members = CALLOC (size_hint, sizeof (data_pair_t *));
+ if (!dict) {
+ return NULL;
+ }
- if (!dict->members) {
- gf_log ("dict", GF_LOG_CRITICAL,
- "calloc () returned NULL");
- return NULL;
- }
+ dict->hash_size = size_hint;
+ if (size_hint == 1) {
+ /*
+ * This is the only case we ever see currently. If we ever
+ * need to support resizing the hash table, the resize function
+ * will have to take into account the possibility that
+ * "members" is not separately allocated (i.e. don't just call
+ * realloc() blindly.
+ */
+ dict->members = &dict->members_internal;
+ }
+ else {
+ /*
+ * We actually need to allocate space for size_hint *pointers*
+ * but we actually allocate space for one *structure*. Since
+ * a data_pair_t consists of five pointers, we're wasting four
+ * pointers' worth for N=1, and will overrun what we allocated
+ * for N>5. If anybody ever starts using size_hint, we'll need
+ * to fix this.
+ */
+ GF_ASSERT (size_hint <=
+ (sizeof(data_pair_t) / sizeof(data_pair_t *)));
+ dict->members = mem_get0 (THIS->ctx->dict_pair_pool);
+ if (!dict->members) {
+ mem_put (dict);
+ return NULL;
+ }
+ }
- LOCK_INIT (&dict->lock);
+ LOCK_INIT (&dict->lock);
- return dict;
+ return dict;
}
dict_t *
get_new_dict (void)
{
- return get_new_dict_full (1);
+ return get_new_dict_full (1);
}
dict_t *
dict_new (void)
{
- dict_t *dict = NULL;
-
- dict = get_new_dict_full(1);
-
- if (dict)
- dict_ref (dict);
-
- return dict;
+ dict_t *dict = NULL;
+
+ dict = get_new_dict_full(1);
+
+ if (dict)
+ dict_ref (dict);
+
+ return dict;
}
-int32_t
+int32_t
is_data_equal (data_t *one,
- data_t *two)
+ data_t *two)
{
- if (!one || !two || !one->data || !two->data)
- return 1;
+ if (!one || !two || !one->data || !two->data) {
+ gf_log_callingfn ("dict", GF_LOG_ERROR,
+ "input arguments are provided "
+ "with value data_t as NULL");
+ return -1;
+ }
- if (one == two)
- return 1;
+ if (one == two)
+ return 1;
- if (one->len != two->len)
- return 0;
+ if (one->len != two->len)
+ return 0;
- if (one->data == two->data)
- return 1;
+ if (one->data == two->data)
+ return 1;
- if (memcmp (one->data, two->data, one->len) == 0)
- return 1;
+ if (memcmp (one->data, two->data, one->len) == 0)
+ return 1;
- return 0;
+ return 0;
}
void
data_destroy (data_t *data)
{
- if (data) {
- LOCK_DESTROY (&data->lock);
+ if (data) {
+ LOCK_DESTROY (&data->lock);
- if (!data->is_static) {
- if (data->data)
- FREE (data->data);
- if (data->vec)
- FREE (data->vec);
- }
+ if (!data->is_static) {
+ if (data->data) {
+ if (data->is_stdalloc)
+ free (data->data);
+ else
+ GF_FREE (data->data);
+ }
+ }
- data->len = 0xbabababa;
- if (!data->is_const)
- FREE (data);
- }
+ data->len = 0xbabababa;
+ if (!data->is_const)
+ mem_put (data);
+ }
}
data_t *
data_copy (data_t *old)
{
- if (!old) {
- gf_log ("dict", GF_LOG_CRITICAL,
- "@old is NULL");
- return NULL;
- }
+ if (!old) {
+ gf_log_callingfn ("dict", GF_LOG_WARNING,
+ "old is NULL");
+ return NULL;
+ }
- data_t *newdata = (data_t *) CALLOC (1, sizeof (*newdata));
+ data_t *newdata = mem_get0 (THIS->ctx->dict_data_pool);
+ if (!newdata) {
+ return NULL;
+ }
- if (!newdata) {
- gf_log ("dict", GF_LOG_CRITICAL,
- "@newdata - NULL returned by CALLOC");
- return NULL;
- }
+ if (old) {
+ newdata->len = old->len;
+ if (old->data) {
+ newdata->data = memdup (old->data, old->len);
+ if (!newdata->data)
+ goto err_out;
+ }
+ }
- if (old) {
- newdata->len = old->len;
- if (old->data)
- newdata->data = memdup (old->data, old->len);
- if (old->vec)
- newdata->vec = memdup (old->vec, old->len * (sizeof (void *) +
- sizeof (size_t)));
- if (!old->data && !old->vec) {
- gf_log ("dict", GF_LOG_CRITICAL,
- "@newdata->data || @newdata->vec got NULL from CALLOC()");
- return NULL;
- }
- }
+ LOCK_INIT (&newdata->lock);
+ return newdata;
+
+err_out:
- return newdata;
+ FREE (newdata->data);
+ mem_put (newdata);
+
+ return NULL;
}
static data_pair_t *
_dict_lookup (dict_t *this, char *key)
{
- if (!this || !key) {
- gf_log ("dict", GF_LOG_CRITICAL,
- "@this=%p @key=%p", this, key);
- return NULL;
- }
+ if (!this || !key) {
+ gf_log_callingfn ("dict", GF_LOG_WARNING,
+ "!this || !key (%s)", key);
+ return NULL;
+ }
- int hashval = SuperFastHash (key, strlen (key)) % this->hash_size;
- data_pair_t *pair;
-
- for (pair = this->members[hashval]; pair != NULL; pair = pair->hash_next) {
- if (pair->key && !strcmp (pair->key, key))
- return pair;
- }
-
- return NULL;
+ int hashval = SuperFastHash (key, strlen (key)) % this->hash_size;
+ data_pair_t *pair;
+
+ for (pair = this->members[hashval]; pair != NULL; pair = pair->hash_next) {
+ if (pair->key && !strcmp (pair->key, key))
+ return pair;
+ }
+
+ return NULL;
}
+int32_t
+dict_lookup (dict_t *this, char *key, data_t **data)
+{
+ if (!this || !key || !data) {
+ gf_log_callingfn ("dict", GF_LOG_WARNING,
+ "!this || !key || !data");
+ return -1;
+ }
+
+ data_pair_t *tmp = NULL;
+ LOCK (&this->lock);
+ {
+ tmp = _dict_lookup (this, key);
+ }
+ UNLOCK (&this->lock);
+
+ if (!tmp)
+ return -1;
+
+ *data = tmp->value;
+ return 0;
+}
static int32_t
-_dict_set (dict_t *this,
- char *key,
- data_t *value)
-{
- int hashval;
- data_pair_t *pair;
- char key_free = 0;
- int tmp = 0;
+_dict_set (dict_t *this, char *key, data_t *value, gf_boolean_t replace)
+{
+ int hashval;
+ data_pair_t *pair;
+ char key_free = 0;
+ int tmp = 0;
int ret = 0;
- if (!key) {
- ret = asprintf (&key, "ref:%p", value);
+ if (!key) {
+ ret = gf_asprintf (&key, "ref:%p", value);
if (-1 == ret) {
- gf_log ("dict", GF_LOG_ERROR, "asprintf failed");
+ gf_log ("dict", GF_LOG_WARNING, "asprintf failed %s", key);
return -1;
}
- key_free = 1;
- }
+ key_free = 1;
+ }
- tmp = SuperFastHash (key, strlen (key));
- hashval = (tmp % this->hash_size);
- pair = _dict_lookup (this, key);
-
- if (pair) {
- data_t *unref_data = pair->value;
- pair->value = data_ref (value);
- data_unref (unref_data);
- if (key_free)
- FREE (key);
- /* Indicates duplicate key */
- return 0;
- }
- pair = (data_pair_t *) CALLOC (1, sizeof (*pair));
- if (!pair) {
- gf_log ("dict", GF_LOG_CRITICAL,
- "@pair - NULL returned by CALLOC");
- return -1;
- }
+ tmp = SuperFastHash (key, strlen (key));
+ hashval = (tmp % this->hash_size);
+
+ /* Search for a existing key if 'replace' is asked for */
+ if (replace) {
+ pair = _dict_lookup (this, key);
+
+ if (pair) {
+ data_t *unref_data = pair->value;
+ pair->value = data_ref (value);
+ data_unref (unref_data);
+ if (key_free)
+ GF_FREE (key);
+ /* Indicates duplicate key */
+ return 0;
+ }
+ }
- pair->key = (char *) CALLOC (1, strlen (key) + 1);
- if (!pair->key) {
- gf_log ("dict", GF_LOG_CRITICAL,
- "@pair->key - NULL returned by CALLOC");
- return -1;
- }
+ if (this->free_pair_in_use) {
+ pair = mem_get0 (THIS->ctx->dict_pair_pool);
+ if (!pair) {
+ if (key_free)
+ GF_FREE (key);
+ return -1;
+ }
+ }
+ else {
+ pair = &this->free_pair;
+ this->free_pair_in_use = _gf_true;
+ }
+
+ if (key_free) {
+ /* It's ours. Use it. */
+ pair->key = key;
+ key_free = 0;
+ }
+ else {
+ pair->key = (char *) GF_CALLOC (1, strlen (key) + 1,
+ gf_common_mt_char);
+ if (!pair->key) {
+ if (pair == &this->free_pair) {
+ this->free_pair_in_use = _gf_false;
+ }
+ else {
+ mem_put (pair);
+ }
+ return -1;
+ }
+ strcpy (pair->key, key);
+ }
+ pair->value = data_ref (value);
+
+ pair->hash_next = this->members[hashval];
+ this->members[hashval] = pair;
+
+ pair->next = this->members_list;
+ pair->prev = NULL;
+ if (this->members_list)
+ this->members_list->prev = pair;
+ this->members_list = pair;
+ this->count++;
- strcpy (pair->key, key);
- pair->value = data_ref (value);
-
- pair->hash_next = this->members[hashval];
- this->members[hashval] = pair;
-
- pair->next = this->members_list;
- pair->prev = NULL;
- if (this->members_list)
- this->members_list->prev = pair;
- this->members_list = pair;
- this->count++;
-
- if (key_free)
- FREE (key);
- return 0;
+ if (key_free)
+ GF_FREE (key);
+ return 0;
}
int32_t
dict_set (dict_t *this,
- char *key,
- data_t *value)
+ char *key,
+ data_t *value)
{
- int32_t ret;
+ int32_t ret;
- if (!this || !value) {
- gf_log ("dict", GF_LOG_CRITICAL,
- "@this=%p @value=%p", this, value);
- return -1;
- }
+ if (!this || !value) {
+ gf_log_callingfn ("dict", GF_LOG_WARNING,
+ "!this || !value for key=%s", key);
+ return -1;
+ }
- LOCK (&this->lock);
+ LOCK (&this->lock);
- ret = _dict_set (this, key, value);
+ ret = _dict_set (this, key, value, 1);
- UNLOCK (&this->lock);
+ UNLOCK (&this->lock);
- return ret;
+ return ret;
}
-data_t *
-dict_get (dict_t *this,
- char *key)
+int32_t
+dict_add (dict_t *this, char *key, data_t *value)
{
- data_pair_t *pair;
+ int32_t ret;
- if (!this || !key) {
- gf_log ("dict", GF_LOG_DEBUG,
- "@this=%p @key=%p", this, key);
- return NULL;
- }
+ if (!this || !value) {
+ gf_log_callingfn ("dict", GF_LOG_WARNING,
+ "!this || !value for key=%s", key);
+ return -1;
+ }
- LOCK (&this->lock);
+ LOCK (&this->lock);
- pair = _dict_lookup (this, key);
+ ret = _dict_set (this, key, value, 0);
- UNLOCK (&this->lock);
+ UNLOCK (&this->lock);
- if (pair)
- return pair->value;
-
- return NULL;
+ return ret;
}
-void
-dict_del (dict_t *this,
- char *key)
-{
- if (!this || !key) {
- gf_log ("dict", GF_LOG_DEBUG,
- "@this=%p @key=%p", this, key);
- return;
- }
- LOCK (&this->lock);
-
- int hashval = SuperFastHash (key, strlen (key)) % this->hash_size;
- data_pair_t *pair = this->members[hashval];
- data_pair_t *prev = NULL;
-
- while (pair) {
- if (strcmp (pair->key, key) == 0) {
- if (prev)
- prev->hash_next = pair->hash_next;
- else
- this->members[hashval] = pair->hash_next;
-
- data_unref (pair->value);
-
- if (pair->prev)
- pair->prev->next = pair->next;
- else
- this->members_list = pair->next;
-
- if (pair->next)
- pair->next->prev = pair->prev;
-
- FREE (pair->key);
- FREE (pair);
- this->count--;
- break;
- }
-
- prev = pair;
- pair = pair->hash_next;
- }
-
- UNLOCK (&this->lock);
-
- return;
-}
-
-void
-dict_destroy (dict_t *this)
+data_t *
+dict_get (dict_t *this, char *key)
{
- if (!this) {
- gf_log ("dict", GF_LOG_DEBUG,
- "@this=%p", this);
- return;
- }
+ data_pair_t *pair;
- data_pair_t *pair = this->members_list;
- data_pair_t *prev = this->members_list;
-
- LOCK_DESTROY (&this->lock);
+ if (!this || !key) {
+ gf_log_callingfn ("dict", GF_LOG_INFO,
+ "!this || key=%s", (key) ? key : "()");
+ return NULL;
+ }
- while (prev) {
- pair = pair->next;
- data_unref (prev->value);
- FREE (prev->key);
- FREE (prev);
- prev = pair;
- }
+ LOCK (&this->lock);
- FREE (this->members);
+ pair = _dict_lookup (this, key);
- if (this->extra_free)
- FREE (this->extra_free);
+ UNLOCK (&this->lock);
- if (!this->is_static)
- FREE (this);
+ if (pair)
+ return pair->value;
- return;
+ return NULL;
}
void
-dict_unref (dict_t *this)
+dict_del (dict_t *this, char *key)
{
- int32_t ref;
-
- if (!this) {
- gf_log ("dict", GF_LOG_DEBUG,
- "@this=%p", this);
- return;
- }
-
- LOCK (&this->lock);
-
- this->refcount--;
- ref = this->refcount;
-
- UNLOCK (&this->lock);
-
- if (!ref)
- dict_destroy (this);
-}
+ if (!this || !key) {
+ gf_log_callingfn ("dict", GF_LOG_WARNING,
+ "!this || key=%s", key);
+ return;
+ }
-dict_t *
-dict_ref (dict_t *this)
-{
- if (!this) {
- gf_log ("dict", GF_LOG_DEBUG,
- "@this=%p", this);
- return NULL;
- }
+ LOCK (&this->lock);
- LOCK (&this->lock);
+ int hashval = SuperFastHash (key, strlen (key)) % this->hash_size;
+ data_pair_t *pair = this->members[hashval];
+ data_pair_t *prev = NULL;
+
+ while (pair) {
+ if (strcmp (pair->key, key) == 0) {
+ if (prev)
+ prev->hash_next = pair->hash_next;
+ else
+ this->members[hashval] = pair->hash_next;
+
+ data_unref (pair->value);
+
+ if (pair->prev)
+ pair->prev->next = pair->next;
+ else
+ this->members_list = pair->next;
+
+ if (pair->next)
+ pair->next->prev = pair->prev;
+
+ GF_FREE (pair->key);
+ if (pair == &this->free_pair) {
+ this->free_pair_in_use = _gf_false;
+ }
+ else {
+ mem_put (pair);
+ }
+ this->count--;
+ break;
+ }
- this->refcount++;
+ prev = pair;
+ pair = pair->hash_next;
+ }
- UNLOCK (&this->lock);
+ UNLOCK (&this->lock);
- return this;
+ return;
}
void
-data_unref (data_t *this)
+dict_destroy (dict_t *this)
{
- int32_t ref;
-
- if (!this) {
- gf_log ("dict", GF_LOG_DEBUG,
- "@this=%p", this);
- return;
- }
-
- LOCK (&this->lock);
-
- this->refcount--;
- ref = this->refcount;
+ if (!this) {
+ gf_log_callingfn ("dict", GF_LOG_WARNING, "dict is NULL");
+ return;
+ }
- UNLOCK (&this->lock);
+ data_pair_t *pair = this->members_list;
+ data_pair_t *prev = this->members_list;
- if (!ref)
- data_destroy (this);
-}
+ LOCK_DESTROY (&this->lock);
-data_t *
-data_ref (data_t *this)
-{
- if (!this) {
- gf_log ("dict", GF_LOG_DEBUG,
- "@this=%p", this);
- return NULL;
- }
+ while (prev) {
+ pair = pair->next;
+ data_unref (prev->value);
+ GF_FREE (prev->key);
+ if (prev != &this->free_pair) {
+ mem_put (prev);
+ }
+ prev = pair;
+ }
- LOCK (&this->lock);
+ if (this->members != &this->members_internal) {
+ mem_put (this->members);
+ }
- this->refcount++;
+ GF_FREE (this->extra_free);
+ free (this->extra_stdfree);
- UNLOCK (&this->lock);
+ if (!this->is_static)
+ mem_put (this);
- return this;
+ return;
}
-/*
- Serialization format:
- ----
- Count:8
- Key_len:8:Value_len:8
- Key
- Value
- .
- .
- .
-*/
-
-int32_t
-dict_serialized_length_old (dict_t *this)
+void
+dict_unref (dict_t *this)
{
+ int32_t ref;
- if (!this) {
- gf_log ("dict", GF_LOG_DEBUG,
- "@this=%p", this);
- return -1;
- }
+ if (!this) {
+ gf_log_callingfn ("dict", GF_LOG_WARNING, "dict is NULL");
+ return;
+ }
- int32_t len = 9; /* count + \n */
- int32_t count = this->count;
- data_pair_t *pair = this->members_list;
-
- while (count) {
- len += 18;
- len += strlen (pair->key) + 1;
- if (pair->value->vec) {
- int i;
- for (i=0; i<pair->value->len; i++) {
- len += pair->value->vec[i].iov_len;
- }
- } else {
- len += pair->value->len;
- }
- pair = pair->next;
- count--;
- }
+ LOCK (&this->lock);
- return len;
-}
+ this->refcount--;
+ ref = this->refcount;
-int32_t
-dict_serialize_old (dict_t *this, char *buf)
-{
- if (!this || !buf) {
- gf_log ("dict", GF_LOG_DEBUG,
- "@this=%p @buf=%p", this, buf);
- return -1;
- }
+ UNLOCK (&this->lock);
- data_pair_t *pair = this->members_list;
- int32_t count = this->count;
- uint64_t dcount = this->count;
-
- // FIXME: magic numbers
-
- sprintf (buf, "%08"PRIx64"\n", dcount);
- buf += 9;
- while (count) {
- uint64_t keylen = strlen (pair->key) + 1;
- uint64_t vallen = pair->value->len;
-
- sprintf (buf, "%08"PRIx64":%08"PRIx64"\n", keylen, vallen);
- buf += 18;
- memcpy (buf, pair->key, keylen);
- buf += keylen;
- memcpy (buf, pair->value->data, pair->value->len);
- buf += pair->value->len;
- pair = pair->next;
- count--;
- }
- return (0);
+ if (!ref)
+ dict_destroy (this);
}
-
dict_t *
-dict_unserialize_old (char *buf, int32_t size, dict_t **fill)
+dict_ref (dict_t *this)
{
- int32_t ret = 0;
- int32_t cnt = 0;
-
- if (!buf || fill == NULL || !*fill) {
- gf_log ("dict", GF_LOG_ERROR,
- "@buf=%p @fill=%p @*fill=%p", buf, fill, *fill);
- return NULL;
- }
-
- uint64_t count;
- ret = sscanf (buf, "%"SCNx64"\n", &count);
- (*fill)->count = 0;
-
- if (!ret){
- gf_log ("dict",
- GF_LOG_ERROR,
- "sscanf on buf failed");
- goto err;
- }
- buf += 9;
-
- if (count == 0) {
- gf_log ("dict",
- GF_LOG_ERROR,
- "count == 0");
- goto err;
- }
+ if (!this) {
+ gf_log_callingfn ("dict", GF_LOG_WARNING, "dict is NULL");
+ return NULL;
+ }
- for (cnt = 0; cnt < count; cnt++) {
- data_t *value = NULL;
- char *key = NULL;
- uint64_t key_len, value_len;
-
- ret = sscanf (buf, "%"SCNx64":%"SCNx64"\n", &key_len, &value_len);
- if (ret != 2) {
- gf_log ("dict",
- GF_LOG_ERROR,
- "sscanf for key_len and value_len failed");
- goto err;
- }
- buf += 18;
-
- key = buf;
- buf += key_len;
-
- value = get_new_data ();
- value->len = value_len;
- value->data = buf;
- value->is_static = 1;
- buf += value_len;
-
- dict_set (*fill, key, value);
- }
+ LOCK (&this->lock);
- goto ret;
+ this->refcount++;
-err:
- FREE (*fill);
- *fill = NULL;
+ UNLOCK (&this->lock);
-ret:
- return *fill;
+ return this;
}
-
-int32_t
-dict_iovec_len (dict_t *this)
+void
+data_unref (data_t *this)
{
- if (!this) {
- gf_log ("dict", GF_LOG_CRITICAL,
- "@this=%p", this);
- return -1;
- }
+ int32_t ref;
- int32_t len = 0;
- data_pair_t *pair = this->members_list;
+ if (!this) {
+ gf_log_callingfn ("dict", GF_LOG_WARNING, "dict is NULL");
+ return;
+ }
- len++; /* initial header */
- while (pair) {
- len++; /* pair header */
- len++; /* key */
+ LOCK (&this->lock);
- if (pair->value->vec)
- len += pair->value->len;
- else
- len++;
- pair = pair->next;
- }
+ this->refcount--;
+ ref = this->refcount;
- return len;
+ UNLOCK (&this->lock);
+
+ if (!ref)
+ data_destroy (this);
}
-int32_t
-dict_to_iovec (dict_t *this,
- struct iovec *vec,
- int32_t count)
+data_t *
+data_ref (data_t *this)
{
- if (!this || !vec) {
- gf_log ("dict", GF_LOG_CRITICAL,
- "@this=%p @vec=%p", this, vec);
- return -1;
- }
-
- int32_t i = 0;
- data_pair_t *pair = this->members_list;
-
- vec[0].iov_len = 9;
- if (vec[0].iov_base)
- sprintf (vec[0].iov_base,
- "%08"PRIx64"\n",
- (int64_t)this->count);
- i++;
-
- while (pair) {
- int64_t keylen = strlen (pair->key) + 1;
- int64_t vallen = 0;
-
- if (pair->value->vec) {
- int i;
+ if (!this) {
+ gf_log_callingfn ("dict", GF_LOG_WARNING, "dict is NULL");
+ return NULL;
+ }
- for (i=0; i<pair->value->len; i++) {
- vallen += pair->value->vec[i].iov_len;
- }
- } else {
- vallen = pair->value->len;
- }
+ LOCK (&this->lock);
- vec[i].iov_len = 18;
- if (vec[i].iov_base)
- sprintf (vec[i].iov_base,
- "%08"PRIx64":%08"PRIx64"\n",
- keylen,
- vallen);
- i++;
-
- vec[i].iov_len = keylen;
- vec[i].iov_base = pair->key;
- i++;
-
- if (pair->value->vec) {
- int k;
-
- for (k=0; k<pair->value->len; k++) {
- vec[i].iov_len = pair->value->vec[k].iov_len;
- vec[i].iov_base = pair->value->vec[k].iov_base;
- i++;
- }
- } else {
- vec[i].iov_len = pair->value->len;
- vec[i].iov_base = pair->value->data;
- i++;
- }
+ this->refcount++;
- pair = pair->next;
- }
+ UNLOCK (&this->lock);
- return 0;
+ return this;
}
data_t *
int_to_data (int64_t value)
{
int ret = 0;
- data_t *data = get_new_data ();
+ data_t *data = get_new_data ();
- if (!data) {
- gf_log ("dict", GF_LOG_CRITICAL,
- "@data - NULL returned by CALLOC");
- return NULL;
- }
+ if (!data) {
+ return NULL;
+ }
- ret = asprintf (&data->data, "%"PRId64, value);
+ ret = gf_asprintf (&data->data, "%"PRId64, value);
if (-1 == ret) {
- gf_log ("dict", GF_LOG_ERROR, "asprintf failed");
+ gf_log ("dict", GF_LOG_DEBUG, "asprintf failed");
return NULL;
}
- data->len = strlen (data->data) + 1;
+ data->len = strlen (data->data) + 1;
- return data;
+ return data;
}
data_t *
data_from_int64 (int64_t value)
{
int ret = 0;
- data_t *data = get_new_data ();
+ data_t *data = get_new_data ();
- if (!data) {
- gf_log ("dict", GF_LOG_CRITICAL,
- "@data - NULL returned by CALLOC");
- return NULL;
- }
- ret = asprintf (&data->data, "%"PRId64, value);
+ if (!data) {
+ return NULL;
+ }
+ ret = gf_asprintf (&data->data, "%"PRId64, value);
if (-1 == ret) {
- gf_log ("dict", GF_LOG_ERROR, "asprintf failed");
+ gf_log ("dict", GF_LOG_DEBUG, "asprintf failed");
return NULL;
}
- data->len = strlen (data->data) + 1;
+ data->len = strlen (data->data) + 1;
- return data;
+ return data;
}
data_t *
data_from_int32 (int32_t value)
{
int ret = 0;
- data_t *data = get_new_data ();
+ data_t *data = get_new_data ();
- if (!data) {
- gf_log ("dict", GF_LOG_CRITICAL,
- "@data - NULL returned by CALLOC");
- return NULL;
- }
- ret = asprintf (&data->data, "%"PRId32, value);
+ if (!data) {
+ return NULL;
+ }
+ ret = gf_asprintf (&data->data, "%"PRId32, value);
if (-1 == ret) {
- gf_log ("dict", GF_LOG_ERROR, "asprintf failed");
+ gf_log ("dict", GF_LOG_DEBUG, "asprintf failed");
return NULL;
}
- data->len = strlen (data->data) + 1;
+ data->len = strlen (data->data) + 1;
- return data;
+ return data;
}
data_t *
data_from_int16 (int16_t value)
{
int ret = 0;
- data_t *data = get_new_data ();
+ data_t *data = get_new_data ();
- if (!data) {
- gf_log ("dict", GF_LOG_CRITICAL,
- "@data - NULL returned by CALLOC");
- return NULL;
- }
- ret = asprintf (&data->data, "%"PRId16, value);
+ if (!data) {
+ return NULL;
+ }
+ ret = gf_asprintf (&data->data, "%"PRId16, value);
if (-1 == ret) {
- gf_log ("dict", GF_LOG_ERROR, "asprintf failed");
+ gf_log ("dict", GF_LOG_DEBUG, "asprintf failed");
return NULL;
}
- data->len = strlen (data->data) + 1;
+ data->len = strlen (data->data) + 1;
- return data;
+ return data;
}
data_t *
data_from_int8 (int8_t value)
{
int ret = 0;
- data_t *data = get_new_data ();
+ data_t *data = get_new_data ();
- if (!data) {
- gf_log ("dict", GF_LOG_CRITICAL,
- "@data - NULL returned by CALLOC");
- return NULL;
- }
- ret = asprintf (&data->data, "%d", value);
+ if (!data) {
+ return NULL;
+ }
+ ret = gf_asprintf (&data->data, "%d", value);
if (-1 == ret) {
- gf_log ("dict", GF_LOG_ERROR, "asprintf failed");
+ gf_log ("dict", GF_LOG_DEBUG, "asprintf failed");
return NULL;
}
- data->len = strlen (data->data) + 1;
+ data->len = strlen (data->data) + 1;
- return data;
+ return data;
}
data_t *
data_from_uint64 (uint64_t value)
{
int ret = 0;
- data_t *data = get_new_data ();
+ data_t *data = get_new_data ();
- if (!data) {
- gf_log ("dict", GF_LOG_CRITICAL,
- "@data - NULL returned by CALLOC");
- return NULL;
- }
- ret = asprintf (&data->data, "%"PRIu64, value);
+ if (!data) {
+ return NULL;
+ }
+ ret = gf_asprintf (&data->data, "%"PRIu64, value);
if (-1 == ret) {
- gf_log ("dict", GF_LOG_ERROR, "asprintf failed");
+ gf_log ("dict", GF_LOG_DEBUG, "asprintf failed");
return NULL;
}
- data->len = strlen (data->data) + 1;
+ data->len = strlen (data->data) + 1;
- return data;
+ return data;
}
static data_t *
data_from_double (double value)
{
- data_t *data = NULL;
- int ret = 0;
+ data_t *data = NULL;
+ int ret = 0;
- data = get_new_data ();
+ data = get_new_data ();
- if (!data) {
- gf_log ("dict", GF_LOG_CRITICAL,
- "@data - NULL returned by CALLOC");
- return NULL;
- }
+ if (!data) {
+ return NULL;
+ }
- ret = asprintf (&data->data, "%f", value);
- if (ret == -1) {
- gf_log ("dict", GF_LOG_CRITICAL,
- "@data - allocation failed by ASPRINTF");
- return NULL;
- }
- data->len = strlen (data->data) + 1;
+ ret = gf_asprintf (&data->data, "%f", value);
+ if (ret == -1) {
+ return NULL;
+ }
+ data->len = strlen (data->data) + 1;
- return data;
+ return data;
}
@@ -878,22 +692,20 @@ data_t *
data_from_uint32 (uint32_t value)
{
int ret = 0;
- data_t *data = get_new_data ();
+ data_t *data = get_new_data ();
- if (!data) {
- gf_log ("dict", GF_LOG_CRITICAL,
- "@data - NULL returned by CALLOC");
- return NULL;
- }
- ret = asprintf (&data->data, "%"PRIu32, value);
+ if (!data) {
+ return NULL;
+ }
+ ret = gf_asprintf (&data->data, "%"PRIu32, value);
if (-1 == ret) {
- gf_log ("dict", GF_LOG_ERROR, "asprintf failed");
+ gf_log ("dict", GF_LOG_DEBUG, "asprintf failed");
return NULL;
}
- data->len = strlen (data->data) + 1;
+ data->len = strlen (data->data) + 1;
- return data;
+ return data;
}
@@ -901,44 +713,38 @@ data_t *
data_from_uint16 (uint16_t value)
{
int ret = 0;
- data_t *data = get_new_data ();
+ data_t *data = get_new_data ();
- if (!data) {
- gf_log ("dict", GF_LOG_CRITICAL,
- "@data - NULL returned by CALLOC");
- return NULL;
- }
- ret = asprintf (&data->data, "%"PRIu16, value);
+ if (!data) {
+ return NULL;
+ }
+ ret = gf_asprintf (&data->data, "%"PRIu16, value);
if (-1 == ret) {
- gf_log ("dict", GF_LOG_ERROR, "asprintf failed");
return NULL;
}
- data->len = strlen (data->data) + 1;
+ data->len = strlen (data->data) + 1;
- return data;
+ return data;
}
data_t *
data_from_ptr (void *value)
{
- if (!value) {
- gf_log ("dict", GF_LOG_CRITICAL,
- "@value=%p", value);
- return NULL;
- }
+ if (!value) {
+ gf_log_callingfn ("dict", GF_LOG_WARNING, "value is NULL");
+ return NULL;
+ }
- data_t *data = get_new_data ();
+ data_t *data = get_new_data ();
- if (!data) {
- gf_log ("dict", GF_LOG_CRITICAL,
- "@data - NULL returned by CALLOC");
- return NULL;
- }
+ if (!data) {
+ return NULL;
+ }
- data->data = value;
- return data;
+ data->data = value;
+ return data;
}
data_t *
@@ -947,292 +753,517 @@ data_from_static_ptr (void *value)
/*
this is valid to set 0 as value..
- if (!value) {
- gf_log ("dict", GF_LOG_CRITICAL,
- "@value=%p", value);
- return NULL;
- }
+ if (!value) {
+ gf_log ("dict", GF_LOG_CRITICAL,
+ "@value=%p", value);
+ return NULL;
+ }
*/
- data_t *data = get_new_data ();
+ data_t *data = get_new_data ();
- if (!data) {
- gf_log ("dict", GF_LOG_CRITICAL,
- "@data - NULL returned by CALLOC");
- return NULL;
- }
+ if (!data) {
+ return NULL;
+ }
- data->is_static = 1;
- data->data = value;
+ data->is_static = 1;
+ data->data = value;
- return data;
+ return data;
}
data_t *
str_to_data (char *value)
{
- if (!value) {
- gf_log ("dict", GF_LOG_CRITICAL,
- "@value=%p", value);
- return NULL;
- }
- data_t *data = get_new_data ();
+ if (!value) {
+ gf_log_callingfn ("dict", GF_LOG_WARNING, "value is NULL");
+ return NULL;
+ }
+ data_t *data = get_new_data ();
- if (!data) {
- gf_log ("dict", GF_LOG_CRITICAL,
- "@data - NULL returned by CALLOC");
- return NULL;
- }
- data->len = strlen (value) + 1;
+ if (!data) {
+ return NULL;
+ }
+ data->len = strlen (value) + 1;
- data->data = value;
- data->is_static = 1;
+ data->data = value;
+ data->is_static = 1;
- return data;
+ return data;
}
data_t *
data_from_dynstr (char *value)
{
- if (!value) {
- gf_log ("dict", GF_LOG_CRITICAL,
- "@value=%p", value);
- return NULL;
- }
+ if (!value) {
+ gf_log_callingfn ("dict", GF_LOG_WARNING, "value is NULL");
+ return NULL;
+ }
- data_t *data = get_new_data ();
+ data_t *data = get_new_data ();
- data->len = strlen (value) + 1;
- data->data = value;
+ data->len = strlen (value) + 1;
+ data->data = value;
- return data;
+ return data;
+}
+
+data_t *
+data_from_dynmstr (char *value)
+{
+ if (!value) {
+ gf_log_callingfn ("dict", GF_LOG_WARNING, "value is NULL");
+ return NULL;
+ }
+
+ data_t *data = get_new_data ();
+
+ data->len = strlen (value) + 1;
+ data->data = value;
+ data->is_stdalloc = 1;
+
+ return data;
}
data_t *
data_from_dynptr (void *value, int32_t len)
{
- data_t *data = get_new_data ();
+ data_t *data = get_new_data ();
+
+ if (!data)
+ return NULL;
- data->len = len;
- data->data = value;
+ data->len = len;
+ data->data = value;
- return data;
+ return data;
}
data_t *
bin_to_data (void *value, int32_t len)
{
- if (!value) {
- gf_log ("dict", GF_LOG_CRITICAL,
- "@value=%p", value);
- return NULL;
- }
+ if (!value) {
+ gf_log_callingfn ("dict", GF_LOG_WARNING, "value is NULL");
+ return NULL;
+ }
+
+ data_t *data = get_new_data ();
- data_t *data = get_new_data ();
+ if (!data)
+ return NULL;
- data->is_static = 1;
- data->len = len;
- data->data = value;
+ data->is_static = 1;
+ data->len = len;
+ data->data = value;
- return data;
+ return data;
}
int64_t
data_to_int64 (data_t *data)
{
- if (!data)
- return -1;
+ if (!data) {
+ gf_log_callingfn ("dict", GF_LOG_WARNING, "data is NULL");
+ return -1;
+ }
+
+ char *str = alloca (data->len + 1);
+ if (!str)
+ return -1;
- char *str = alloca (data->len + 1);
- ERR_ABORT (str);
- memcpy (str, data->data, data->len);
- str[data->len] = '\0';
- return (int64_t) strtoull (str, NULL, 0);
+ memcpy (str, data->data, data->len);
+ str[data->len] = '\0';
+ return (int64_t) strtoull (str, NULL, 0);
}
int32_t
data_to_int32 (data_t *data)
{
- if (!data)
- return -1;
+ if (!data) {
+ gf_log_callingfn ("dict", GF_LOG_WARNING, "data is NULL");
+ return -1;
+ }
- char *str = alloca (data->len + 1);
- ERR_ABORT (str);
- memcpy (str, data->data, data->len);
- str[data->len] = '\0';
+ char *str = alloca (data->len + 1);
+ if (!str)
+ return -1;
- return strtoul (str, NULL, 0);
+ memcpy (str, data->data, data->len);
+ str[data->len] = '\0';
+
+ return strtoul (str, NULL, 0);
}
int16_t
data_to_int16 (data_t *data)
{
- if (!data)
- return -1;
+ int16_t value = 0;
+
+ if (!data) {
+ gf_log_callingfn ("dict", GF_LOG_WARNING, "data is NULL");
+ return -1;
+ }
+
+ char *str = alloca (data->len + 1);
+ if (!str)
+ return -1;
+
+ memcpy (str, data->data, data->len);
+ str[data->len] = '\0';
- char *str = alloca (data->len + 1);
- ERR_ABORT (str);
- memcpy (str, data->data, data->len);
- str[data->len] = '\0';
+ errno = 0;
+ value = strtol (str, NULL, 0);
- return strtol (str, NULL, 0);
+ if ((value > SHRT_MAX) || (value < SHRT_MIN)) {
+ errno = ERANGE;
+ gf_log_callingfn ("dict", GF_LOG_WARNING,
+ "Error in data conversion: "
+ "detected overflow");
+ return -1;
+ }
+
+ return (int16_t)value;
}
int8_t
data_to_int8 (data_t *data)
{
- if (!data)
- return -1;
+ int8_t value = 0;
+
+ if (!data) {
+ gf_log_callingfn ("dict", GF_LOG_WARNING, "data is NULL");
+ return -1;
+ }
- char *str = alloca (data->len + 1);
- ERR_ABORT (str);
- memcpy (str, data->data, data->len);
- str[data->len] = '\0';
+ char *str = alloca (data->len + 1);
+ if (!str)
+ return -1;
- return (int8_t)strtol (str, NULL, 0);
+ memcpy (str, data->data, data->len);
+ str[data->len] = '\0';
+
+ errno = 0;
+ value = strtol (str, NULL, 0);
+
+ if ((value > SCHAR_MAX) || (value < SCHAR_MIN)) {
+ errno = ERANGE;
+ gf_log_callingfn ("dict", GF_LOG_WARNING,
+ "Error in data conversion: "
+ "detected overflow");
+ return -1;
+ }
+
+ return (int8_t)value;
}
uint64_t
data_to_uint64 (data_t *data)
{
- if (!data)
- return -1;
- char *str = alloca (data->len + 1);
- ERR_ABORT (str);
- memcpy (str, data->data, data->len);
- str[data->len] = '\0';
+ if (!data)
+ return -1;
+ char *str = alloca (data->len + 1);
+ if (!str)
+ return -1;
+
+ memcpy (str, data->data, data->len);
+ str[data->len] = '\0';
- return strtoll (str, NULL, 0);
+ return strtoll (str, NULL, 0);
}
uint32_t
data_to_uint32 (data_t *data)
{
- if (!data)
- return -1;
+ if (!data)
+ return -1;
+
+ char *str = alloca (data->len + 1);
+ if (!str)
+ return -1;
- char *str = alloca (data->len + 1);
- ERR_ABORT (str);
- memcpy (str, data->data, data->len);
- str[data->len] = '\0';
+ memcpy (str, data->data, data->len);
+ str[data->len] = '\0';
- return strtol (str, NULL, 0);
+ return strtol (str, NULL, 0);
}
uint16_t
data_to_uint16 (data_t *data)
{
- if (!data)
+ uint16_t value = 0;
+
+ if (!data)
+ return -1;
+
+ char *str = alloca (data->len + 1);
+ if (!str)
+ return -1;
+
+ memcpy (str, data->data, data->len);
+ str[data->len] = '\0';
+
+ errno = 0;
+ value = strtol (str, NULL, 0);
+
+ if ((USHRT_MAX - value) < 0) {
+ errno = ERANGE;
+ gf_log_callingfn ("dict", GF_LOG_WARNING,
+ "Error in data conversion: "
+ "overflow detected");
return -1;
+ }
+
+ return (uint16_t)value;
+}
+
+uint8_t
+data_to_uint8 (data_t *data)
+{
+ uint32_t value = 0;
+
+ if (!data) {
+ gf_log_callingfn ("dict", GF_LOG_WARNING, "data is NULL");
+ return -1;
+ }
- char *str = alloca (data->len + 1);
- ERR_ABORT (str);
- memcpy (str, data->data, data->len);
- str[data->len] = '\0';
+ char *str = alloca (data->len + 1);
+ if (!str)
+ return -1;
- return strtol (str, NULL, 0);
+ memcpy (str, data->data, data->len);
+ str[data->len] = '\0';
+
+ errno = 0;
+ value = strtol (str, NULL, 0);
+
+ if ((UCHAR_MAX - value) < 0) {
+ errno = ERANGE;
+ gf_log_callingfn ("dict", GF_LOG_WARNING,
+ "data conversion overflow detected (%s)",
+ strerror(errno));
+ return -1;
+ }
+
+ return (uint8_t) value;
}
char *
data_to_str (data_t *data)
{
- if (!data) {
- gf_log ("dict", GF_LOG_CRITICAL,
- "@data=%p", data);
- return NULL;
- }
- return data->data;
+ if (!data) {
+ gf_log_callingfn ("dict", GF_LOG_WARNING, "data is NULL");
+ return NULL;
+ }
+ return data->data;
}
void *
data_to_ptr (data_t *data)
{
- if (!data) {
- return NULL;
- }
- return data->data;
+ if (!data) {
+ gf_log_callingfn ("dict", GF_LOG_WARNING, "data is NULL");
+ return NULL;
+ }
+ return data->data;
}
void *
data_to_bin (data_t *data)
{
- if (!data) {
- gf_log ("dict", GF_LOG_CRITICAL,
- "@data=%p", data);
- return NULL;
- }
- return data->data;
+ if (!data) {
+ gf_log_callingfn ("dict", GF_LOG_WARNING, "data is NULL");
+ return NULL;
+ }
+ return data->data;
}
-void
+int
+dict_null_foreach_fn (dict_t *d, char *k,
+ data_t *v, void *tmp)
+{
+ return 0;
+}
+
+int
dict_foreach (dict_t *dict,
- void (*fn)(dict_t *this,
- char *key,
- data_t *value,
- void *data),
- void *data)
-{
- if (!data) {
- gf_log ("dict", GF_LOG_CRITICAL,
- "@data=%p", data);
- return;
- }
+ int (*fn)(dict_t *this,
+ char *key,
+ data_t *value,
+ void *data),
+ void *data)
+{
+ if (!dict) {
+ gf_log_callingfn ("dict", GF_LOG_WARNING,
+ "dict is NULL");
+ return -1;
+ }
- data_pair_t *pairs = dict->members_list;
+ int ret = -1;
+ data_pair_t *pairs = NULL;
+ data_pair_t *next = NULL;
- while (pairs) {
- fn (dict, pairs->key, pairs->value, data);
- pairs = pairs->next;
- }
+ pairs = dict->members_list;
+ while (pairs) {
+ next = pairs->next;
+ ret = fn (dict, pairs->key, pairs->value, data);
+ if (ret == -1)
+ return -1;
+ pairs = next;
+ }
+
+ return 0;
}
+/* return values:
+ -1 = failure,
+ 0 = no matches found,
+ +n = n number of matches
+*/
+int
+dict_foreach_fnmatch (dict_t *dict, char *pattern,
+ int (*fn)(dict_t *this,
+ char *key,
+ data_t *value,
+ void *data),
+ void *data)
+{
+ if (!dict) {
+ gf_log_callingfn ("dict", GF_LOG_WARNING,
+ "dict is NULL");
+ return 0;
+ }
+
+ int ret = -1;
+ int count = 0;
+ data_pair_t *pairs = NULL;
+ data_pair_t *next = NULL;
+
+ pairs = dict->members_list;
+ while (pairs) {
+ next = pairs->next;
+ if (!fnmatch (pattern, pairs->key, 0)) {
+ ret = fn (dict, pairs->key, pairs->value, data);
+ if (ret == -1)
+ return -1;
+ count++;
+ }
+ pairs = next;
+ }
+
+ return count;
+}
+
+
+/**
+ * dict_keys_join - pack the keys of the dictionary in a buffer.
+ *
+ * @value : buffer in which the keys will be packed (can be NULL)
+ * @size : size of the buffer which is sent (can be 0, in which case buffer
+ * is not packed but only length is returned)
+ * @dict : dictionary of which all the keys will be packed
+ * @filter_fn : keys matched in filter_fn() is counted.
+ *
+ * @return : @length of string after joining keys.
+ *
+ */
+
+int
+dict_keys_join (void *value, int size, dict_t *dict,
+ int (*filter_fn)(char *k))
+{
+ int len = 0;
+ data_pair_t *pairs = NULL;
+ data_pair_t *next = NULL;
+
+ pairs = dict->members_list;
+ while (pairs) {
+ next = pairs->next;
+
+ if (filter_fn && filter_fn (pairs->key)){
+ pairs = next;
+ continue;
+ }
+
+ if (value && (size > len))
+ strncpy (value + len, pairs->key, size - len);
+
+ len += (strlen (pairs->key) + 1);
+
+ pairs = next;
+ }
+
+ return len;
+}
-static void
+static int
_copy (dict_t *unused,
char *key,
data_t *value,
void *newdict)
{
- dict_set ((dict_t *)newdict, key, (value));
+ return dict_set ((dict_t *)newdict, key, (value));
+}
+
+static int
+_remove (dict_t *dict,
+ char *key,
+ data_t *value,
+ void *unused)
+{
+ dict_del ((dict_t *)dict, key);
+ return 0;
}
dict_t *
dict_copy (dict_t *dict,
- dict_t *new)
+ dict_t *new)
{
- if (!dict) {
- gf_log ("dict", GF_LOG_CRITICAL,
- "@data=%p", dict);
- return NULL;
- }
+ if (!dict) {
+ gf_log_callingfn ("dict", GF_LOG_WARNING, "dict is NULL");
+ return NULL;
+ }
- if (!new)
- new = get_new_dict_full (dict->hash_size);
+ if (!new)
+ new = get_new_dict_full (dict->hash_size);
- dict_foreach (dict, _copy, new);
+ dict_foreach (dict, _copy, new);
- return new;
+ return new;
+}
+
+int
+dict_reset (dict_t *dict)
+{
+ int32_t ret = -1;
+ if (!dict) {
+ gf_log_callingfn ("dict", GF_LOG_WARNING, "dict is NULL");
+ goto out;
+ }
+ dict_foreach (dict, _remove, NULL);
+ ret = 0;
+out:
+ return ret;
}
dict_t *
dict_copy_with_ref (dict_t *dict,
- dict_t *new)
+ dict_t *new)
{
- dict_t *local_new = NULL;
+ dict_t *local_new = NULL;
- GF_VALIDATE_OR_GOTO("dict", dict, fail);
+ GF_VALIDATE_OR_GOTO("dict", dict, fail);
- if (new == NULL) {
- local_new = dict_new ();
- GF_VALIDATE_OR_GOTO("dict", local_new, fail);
- new = local_new;
- }
+ if (new == NULL) {
+ local_new = dict_new ();
+ GF_VALIDATE_OR_GOTO("dict", local_new, fail);
+ new = local_new;
+ }
- dict_foreach (dict, _copy, new);
+ dict_foreach (dict, _copy, new);
fail:
- return new;
+ return new;
}
/*
@@ -1241,7 +1272,7 @@ fail:
/**
* Common cleaned up interface:
- *
+ *
* Return value: 0 success
* -val error, val = errno
*/
@@ -1250,506 +1281,508 @@ fail:
static int
dict_get_with_ref (dict_t *this, char *key, data_t **data)
{
- data_pair_t * pair = NULL;
- int ret = -ENOENT;
+ data_pair_t * pair = NULL;
+ int ret = -ENOENT;
- if (!this || !key || !data) {
- ret = -EINVAL;
- goto err;
- }
+ if (!this || !key || !data) {
+ gf_log_callingfn ("dict", GF_LOG_WARNING,
+ "dict OR key (%s) is NULL", key);
+ ret = -EINVAL;
+ goto err;
+ }
- LOCK (&this->lock);
- {
- pair = _dict_lookup (this, key);
- }
- UNLOCK (&this->lock);
+ LOCK (&this->lock);
+ {
+ pair = _dict_lookup (this, key);
+ }
+ UNLOCK (&this->lock);
- if (pair) {
- ret = 0;
- *data = data_ref (pair->value);
- }
+ if (pair) {
+ ret = 0;
+ *data = data_ref (pair->value);
+ }
-err:
- return ret;
+err:
+ return ret;
}
static int
_data_to_ptr (data_t *data, void **val)
{
- int ret = 0;
+ int ret = 0;
- if (!data) {
- ret = -EINVAL;
- goto err;
- }
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
- *val = data->data;
+ *val = data->data;
err:
- return ret;
+ return ret;
}
static int
_data_to_int8 (data_t *data, int8_t *val)
{
- int ret = 0;
- char * str = NULL;
+ int ret = 0;
+ char * str = NULL;
- if (!data || !val) {
- ret = -EINVAL;
- goto err;
- }
+ if (!data || !val) {
+ ret = -EINVAL;
+ goto err;
+ }
- str = alloca (data->len + 1);
- if (!str) {
- ret = -ENOMEM;
- goto err;
- }
- memcpy (str, data->data, data->len);
- str[data->len] = '\0';
+ str = alloca (data->len + 1);
+ if (!str) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ memcpy (str, data->data, data->len);
+ str[data->len] = '\0';
- errno = 0;
- *val = strtol (str, NULL, 0);
- if (errno != 0)
- ret = -errno;
+ errno = 0;
+ *val = strtol (str, NULL, 0);
+ if (errno != 0)
+ ret = -errno;
err:
- return ret;
+ return ret;
}
static int
_data_to_int16 (data_t *data, int16_t *val)
{
- int ret = 0;
- char * str = NULL;
+ int ret = 0;
+ char * str = NULL;
- if (!data || !val) {
- ret = -EINVAL;
- goto err;
- }
+ if (!data || !val) {
+ ret = -EINVAL;
+ goto err;
+ }
- str = alloca (data->len + 1);
- if (!str) {
- ret = -ENOMEM;
- goto err;
- }
- memcpy (str, data->data, data->len);
- str[data->len] = '\0';
+ str = alloca (data->len + 1);
+ if (!str) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ memcpy (str, data->data, data->len);
+ str[data->len] = '\0';
- errno = 0;
- *val = strtol (str, NULL, 0);
- if (errno != 0)
- ret = -errno;
+ errno = 0;
+ *val = strtol (str, NULL, 0);
+ if (errno != 0)
+ ret = -errno;
err:
- return ret;
+ return ret;
}
static int
_data_to_int32 (data_t *data, int32_t *val)
{
- int ret = 0;
- char * str = NULL;
+ int ret = 0;
+ char * str = NULL;
- if (!data || !val) {
- ret = -EINVAL;
- goto err;
- }
+ if (!data || !val) {
+ ret = -EINVAL;
+ goto err;
+ }
- str = alloca (data->len + 1);
- if (!str) {
- ret = -ENOMEM;
- goto err;
- }
- memcpy (str, data->data, data->len);
- str[data->len] = '\0';
+ str = alloca (data->len + 1);
+ if (!str) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ memcpy (str, data->data, data->len);
+ str[data->len] = '\0';
- errno = 0;
- *val = strtol (str, NULL, 0);
- if (errno != 0)
- ret = -errno;
+ errno = 0;
+ *val = strtol (str, NULL, 0);
+ if (errno != 0)
+ ret = -errno;
err:
- return ret;
+ return ret;
}
static int
_data_to_int64 (data_t *data, int64_t *val)
{
- int ret = 0;
- char * str = NULL;
+ int ret = 0;
+ char * str = NULL;
- if (!data || !val) {
- ret = -EINVAL;
- goto err;
- }
+ if (!data || !val) {
+ ret = -EINVAL;
+ goto err;
+ }
- str = alloca (data->len + 1);
- if (!str) {
- ret = -ENOMEM;
- goto err;
- }
- memcpy (str, data->data, data->len);
- str[data->len] = '\0';
+ str = alloca (data->len + 1);
+ if (!str) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ memcpy (str, data->data, data->len);
+ str[data->len] = '\0';
- errno = 0;
- *val = strtoll (str, NULL, 0);
- if (errno != 0)
- ret = -errno;
+ errno = 0;
+ *val = strtoll (str, NULL, 0);
+ if (errno != 0)
+ ret = -errno;
err:
- return ret;
+ return ret;
}
static int
_data_to_uint16 (data_t *data, uint16_t *val)
{
- int ret = 0;
- char * str = NULL;
+ int ret = 0;
+ char * str = NULL;
- if (!data || !val) {
- ret = -EINVAL;
- goto err;
- }
+ if (!data || !val) {
+ ret = -EINVAL;
+ goto err;
+ }
- str = alloca (data->len + 1);
- if (!str) {
- ret = -ENOMEM;
- goto err;
- }
- memcpy (str, data->data, data->len);
- str[data->len] = '\0';
+ str = alloca (data->len + 1);
+ if (!str) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ memcpy (str, data->data, data->len);
+ str[data->len] = '\0';
- errno = 0;
- *val = strtoul (str, NULL, 0);
- if (errno != 0)
- ret = -errno;
+ errno = 0;
+ *val = strtoul (str, NULL, 0);
+ if (errno != 0)
+ ret = -errno;
err:
- return ret;
+ return ret;
}
static int
_data_to_uint32 (data_t *data, uint32_t *val)
{
- int ret = 0;
- char * str = NULL;
+ int ret = 0;
+ char * str = NULL;
- if (!data || !val) {
- ret = -EINVAL;
- goto err;
- }
+ if (!data || !val) {
+ ret = -EINVAL;
+ goto err;
+ }
- str = alloca (data->len + 1);
- if (!str) {
- ret = -ENOMEM;
- goto err;
- }
- memcpy (str, data->data, data->len);
- str[data->len] = '\0';
+ str = alloca (data->len + 1);
+ if (!str) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ memcpy (str, data->data, data->len);
+ str[data->len] = '\0';
- errno = 0;
- *val = strtoul (str, NULL, 0);
- if (errno != 0)
- ret = -errno;
+ errno = 0;
+ *val = strtoul (str, NULL, 0);
+ if (errno != 0)
+ ret = -errno;
err:
- return ret;
+ return ret;
}
static int
_data_to_uint64 (data_t *data, uint64_t *val)
{
- int ret = 0;
- char * str = NULL;
+ int ret = 0;
+ char * str = NULL;
- if (!data || !val) {
- ret = -EINVAL;
- goto err;
- }
+ if (!data || !val) {
+ ret = -EINVAL;
+ goto err;
+ }
- str = alloca (data->len + 1);
- if (!str) {
- ret = -ENOMEM;
- goto err;
- }
- memcpy (str, data->data, data->len);
- str[data->len] = '\0';
+ str = alloca (data->len + 1);
+ if (!str) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ memcpy (str, data->data, data->len);
+ str[data->len] = '\0';
- errno = 0;
- *val = strtoull (str, NULL, 0);
- if (errno != 0)
- ret = -errno;
+ errno = 0;
+ *val = strtoull (str, NULL, 0);
+ if (errno != 0)
+ ret = -errno;
err:
- return ret;
+ return ret;
}
static int
_data_to_double (data_t *data, double *val)
{
- int ret = 0;
- char * str = NULL;
+ int ret = 0;
+ char * str = NULL;
- if (!data || !val) {
- ret = -EINVAL;
- goto err;
- }
+ if (!data || !val) {
+ ret = -EINVAL;
+ goto err;
+ }
- str = alloca (data->len + 1);
- if (!str) {
- ret = -ENOMEM;
- goto err;
- }
- memcpy (str, data->data, data->len);
- str[data->len] = '\0';
+ str = alloca (data->len + 1);
+ if (!str) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ memcpy (str, data->data, data->len);
+ str[data->len] = '\0';
- errno = 0;
- *val = strtod (str, NULL);
- if (errno != 0)
- ret = -errno;
+ errno = 0;
+ *val = strtod (str, NULL);
+ if (errno != 0)
+ ret = -errno;
err:
- return ret;
+ return ret;
}
int
dict_get_int8 (dict_t *this, char *key, int8_t *val)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t * data = NULL;
+ int ret = 0;
- if (!this || !key || !val) {
- ret = -EINVAL;
- goto err;
- }
+ if (!this || !key || !val) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_get_with_ref (this, key, &data);
- if (ret != 0) {
- goto err;
- }
+ ret = dict_get_with_ref (this, key, &data);
+ if (ret != 0) {
+ goto err;
+ }
+
+ ret = _data_to_int8 (data, val);
- ret = _data_to_int8 (data, val);
-
err:
- if (data)
- data_unref (data);
- return ret;
+ if (data)
+ data_unref (data);
+ return ret;
}
int
dict_set_int8 (dict_t *this, char *key, int8_t val)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t * data = NULL;
+ int ret = 0;
- data = data_from_int8 (val);
- if (!data) {
- ret = -EINVAL;
- goto err;
- }
+ data = data_from_int8 (val);
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_set (this, key, data);
+ ret = dict_set (this, key, data);
err:
- return ret;
+ return ret;
}
int
dict_get_int16 (dict_t *this, char *key, int16_t *val)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t * data = NULL;
+ int ret = 0;
- if (!this || !key || !val) {
- ret = -EINVAL;
- goto err;
- }
+ if (!this || !key || !val) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_get_with_ref (this, key, &data);
- if (ret != 0) {
- goto err;
- }
+ ret = dict_get_with_ref (this, key, &data);
+ if (ret != 0) {
+ goto err;
+ }
+
+ ret = _data_to_int16 (data, val);
- ret = _data_to_int16 (data, val);
-
err:
- if (data)
- data_unref (data);
- return ret;
+ if (data)
+ data_unref (data);
+ return ret;
}
int
dict_set_int16 (dict_t *this, char *key, int16_t val)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t * data = NULL;
+ int ret = 0;
- data = data_from_int16 (val);
- if (!data) {
- ret = -EINVAL;
- goto err;
- }
+ data = data_from_int16 (val);
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_set (this, key, data);
+ ret = dict_set (this, key, data);
err:
- return ret;
+ return ret;
}
int
dict_get_int32 (dict_t *this, char *key, int32_t *val)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t * data = NULL;
+ int ret = 0;
- if (!this || !key || !val) {
- ret = -EINVAL;
- goto err;
- }
+ if (!this || !key || !val) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_get_with_ref (this, key, &data);
- if (ret != 0) {
- goto err;
- }
+ ret = dict_get_with_ref (this, key, &data);
+ if (ret != 0) {
+ goto err;
+ }
+
+ ret = _data_to_int32 (data, val);
- ret = _data_to_int32 (data, val);
-
err:
- if (data)
- data_unref (data);
- return ret;
+ if (data)
+ data_unref (data);
+ return ret;
}
int
dict_set_int32 (dict_t *this, char *key, int32_t val)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t * data = NULL;
+ int ret = 0;
- data = data_from_int32 (val);
- if (!data) {
- ret = -EINVAL;
- goto err;
- }
+ data = data_from_int32 (val);
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_set (this, key, data);
+ ret = dict_set (this, key, data);
err:
- return ret;
+ return ret;
}
int
dict_get_int64 (dict_t *this, char *key, int64_t *val)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t * data = NULL;
+ int ret = 0;
- if (!this || !key || !val) {
- ret = -EINVAL;
- goto err;
- }
+ if (!this || !key || !val) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_get_with_ref (this, key, &data);
- if (ret != 0) {
- goto err;
- }
+ ret = dict_get_with_ref (this, key, &data);
+ if (ret != 0) {
+ goto err;
+ }
+
+ ret = _data_to_int64 (data, val);
- ret = _data_to_int64 (data, val);
-
err:
- if (data)
- data_unref (data);
- return ret;
+ if (data)
+ data_unref (data);
+ return ret;
}
int
dict_set_int64 (dict_t *this, char *key, int64_t val)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t * data = NULL;
+ int ret = 0;
- data = data_from_int64 (val);
- if (!data) {
- ret = -EINVAL;
- goto err;
- }
+ data = data_from_int64 (val);
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_set (this, key, data);
+ ret = dict_set (this, key, data);
err:
- return ret;
+ return ret;
}
int
dict_get_uint16 (dict_t *this, char *key, uint16_t *val)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t * data = NULL;
+ int ret = 0;
- if (!this || !key || !val) {
- ret = -EINVAL;
- goto err;
- }
+ if (!this || !key || !val) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_get_with_ref (this, key, &data);
- if (ret != 0) {
- goto err;
- }
+ ret = dict_get_with_ref (this, key, &data);
+ if (ret != 0) {
+ goto err;
+ }
+
+ ret = _data_to_uint16 (data, val);
- ret = _data_to_uint16 (data, val);
-
err:
- if (data)
- data_unref (data);
- return ret;
+ if (data)
+ data_unref (data);
+ return ret;
}
int
dict_set_uint16 (dict_t *this, char *key, uint16_t val)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t * data = NULL;
+ int ret = 0;
- data = data_from_uint16 (val);
- if (!data) {
- ret = -EINVAL;
- goto err;
- }
+ data = data_from_uint16 (val);
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_set (this, key, data);
+ ret = dict_set (this, key, data);
err:
- return ret;
+ return ret;
}
int
dict_get_uint32 (dict_t *this, char *key, uint32_t *val)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t * data = NULL;
+ int ret = 0;
- if (!this || !key || !val) {
- ret = -EINVAL;
- goto err;
- }
+ if (!this || !key || !val) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_get_with_ref (this, key, &data);
- if (ret != 0) {
- goto err;
- }
+ ret = dict_get_with_ref (this, key, &data);
+ if (ret != 0) {
+ goto err;
+ }
+
+ ret = _data_to_uint32 (data, val);
- ret = _data_to_uint32 (data, val);
-
err:
- if (data)
- data_unref (data);
- return ret;
+ if (data)
+ data_unref (data);
+ return ret;
}
@@ -1757,336 +1790,448 @@ err:
int
dict_set_uint32 (dict_t *this, char *key, uint32_t val)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t * data = NULL;
+ int ret = 0;
- data = data_from_uint32 (val);
- if (!data) {
- ret = -EINVAL;
- goto err;
- }
+ data = data_from_uint32 (val);
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_set (this, key, data);
+ ret = dict_set (this, key, data);
err:
- return ret;
+ return ret;
}
int
dict_get_uint64 (dict_t *this, char *key, uint64_t *val)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t * data = NULL;
+ int ret = 0;
- if (!this || !key || !val) {
- ret = -EINVAL;
- goto err;
- }
+ if (!this || !key || !val) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_get_with_ref (this, key, &data);
- if (ret != 0) {
- goto err;
- }
+ ret = dict_get_with_ref (this, key, &data);
+ if (ret != 0) {
+ goto err;
+ }
+
+ ret = _data_to_uint64 (data, val);
- ret = _data_to_uint64 (data, val);
-
err:
- if (data)
- data_unref (data);
- return ret;
+ if (data)
+ data_unref (data);
+ return ret;
}
int
dict_set_uint64 (dict_t *this, char *key, uint64_t val)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t * data = NULL;
+ int ret = 0;
- data = data_from_uint64 (val);
- if (!data) {
- ret = -EINVAL;
- goto err;
- }
+ data = data_from_uint64 (val);
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_set (this, key, data);
+ ret = dict_set (this, key, data);
err:
- return ret;
+ return ret;
}
int
dict_get_double (dict_t *this, char *key, double *val)
{
- data_t *data = NULL;
- int ret = 0;
+ data_t *data = NULL;
+ int ret = 0;
- if (!this || !key || !val) {
- ret = -EINVAL;
- goto err;
- }
+ if (!this || !key || !val) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_get_with_ref (this, key, &data);
- if (ret != 0) {
- goto err;
- }
+ ret = dict_get_with_ref (this, key, &data);
+ if (ret != 0) {
+ goto err;
+ }
+
+ ret = _data_to_double (data, val);
- ret = _data_to_double (data, val);
-
err:
- if (data)
- data_unref (data);
- return ret;
+ if (data)
+ data_unref (data);
+ return ret;
}
int
dict_set_double (dict_t *this, char *key, double val)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t * data = NULL;
+ int ret = 0;
- data = data_from_double (val);
- if (!data) {
- ret = -EINVAL;
- goto err;
- }
+ data = data_from_double (val);
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_set (this, key, data);
+ ret = dict_set (this, key, data);
err:
- return ret;
+ return ret;
}
int
dict_set_static_ptr (dict_t *this, char *key, void *ptr)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t * data = NULL;
+ int ret = 0;
- data = data_from_static_ptr (ptr);
- if (!data) {
- ret = -EINVAL;
- goto err;
- }
+ data = data_from_static_ptr (ptr);
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_set (this, key, data);
+ ret = dict_set (this, key, data);
err:
- return ret;
+ return ret;
}
int
dict_set_dynptr (dict_t *this, char *key, void *ptr, size_t len)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t * data = NULL;
+ int ret = 0;
- data = data_from_dynptr (ptr, len);
- if (!data) {
- ret = -EINVAL;
- goto err;
- }
+ data = data_from_dynptr (ptr, len);
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_set (this, key, data);
+ ret = dict_set (this, key, data);
err:
- return ret;
+ return ret;
}
int
dict_get_ptr (dict_t *this, char *key, void **ptr)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t * data = NULL;
+ int ret = 0;
- if (!this || !key || !ptr) {
- ret = -EINVAL;
- goto err;
- }
+ if (!this || !key || !ptr) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_get_with_ref (this, key, &data);
- if (ret != 0) {
- goto err;
- }
+ ret = dict_get_with_ref (this, key, &data);
+ if (ret != 0) {
+ goto err;
+ }
- ret = _data_to_ptr (data, ptr);
- if (ret != 0) {
- goto err;
- }
+ ret = _data_to_ptr (data, ptr);
+ if (ret != 0) {
+ goto err;
+ }
-err:
- if (data)
- data_unref (data);
+err:
+ if (data)
+ data_unref (data);
- return ret;
+ return ret;
+}
+
+int
+dict_get_ptr_and_len (dict_t *this, char *key, void **ptr, int *len)
+{
+ data_t * data = NULL;
+ int ret = 0;
+
+ if (!this || !key || !ptr) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ ret = dict_get_with_ref (this, key, &data);
+ if (ret != 0) {
+ goto err;
+ }
+
+ *len = data->len;
+
+ ret = _data_to_ptr (data, ptr);
+ if (ret != 0) {
+ goto err;
+ }
+
+err:
+ if (data)
+ data_unref (data);
+
+ return ret;
}
int
dict_set_ptr (dict_t *this, char *key, void *ptr)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t * data = NULL;
+ int ret = 0;
- data = data_from_ptr (ptr);
- if (!data) {
- ret = -EINVAL;
- goto err;
- }
+ data = data_from_ptr (ptr);
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_set (this, key, data);
+ ret = dict_set (this, key, data);
err:
- return ret;
+ return ret;
}
int
dict_get_str (dict_t *this, char *key, char **str)
{
- data_t * data = NULL;
- int ret = -EINVAL;
+ data_t * data = NULL;
+ int ret = -EINVAL;
- if (!this || !key || !str) {
- goto err;
- }
+ if (!this || !key || !str) {
+ goto err;
+ }
- ret = dict_get_with_ref (this, key, &data);
- if (ret < 0) {
- goto err;
- }
+ ret = dict_get_with_ref (this, key, &data);
+ if (ret < 0) {
+ goto err;
+ }
- if (!data || !data->data) {
- goto err;
- }
- *str = data->data;
+ if (!data || !data->data) {
+ goto err;
+ }
+ *str = data->data;
-err:
- if (data)
- data_unref (data);
+err:
+ if (data)
+ data_unref (data);
- return ret;
+ return ret;
}
int
dict_set_str (dict_t *this, char *key, char *str)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t * data = NULL;
+ int ret = 0;
- data = str_to_data (str);
- if (!data) {
- ret = -EINVAL;
- goto err;
- }
+ data = str_to_data (str);
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_set (this, key, data);
+ ret = dict_set (this, key, data);
err:
- return ret;
+ return ret;
}
int
dict_set_dynstr (dict_t *this, char *key, char *str)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t * data = NULL;
+ int ret = 0;
- data = data_from_dynstr (str);
- if (!data) {
- ret = -EINVAL;
- goto err;
- }
+ data = data_from_dynstr (str);
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ ret = dict_set (this, key, data);
+
+err:
+ return ret;
+}
+
+/*
+ for malloced strings we should do a free instead of GF_FREE
+*/
+int
+dict_set_dynmstr (dict_t *this, char *key, char *str)
+{
+ data_t * data = NULL;
+ int ret = 0;
+
+ data = data_from_dynmstr (str);
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_set (this, key, data);
+ ret = dict_set (this, key, data);
err:
- return ret;
+ return ret;
}
int
dict_get_bin (dict_t *this, char *key, void **bin)
{
- data_t * data = NULL;
- int ret = -EINVAL;
+ data_t * data = NULL;
+ int ret = -EINVAL;
- if (!this || !key || !bin) {
- goto err;
- }
+ if (!this || !key || !bin) {
+ goto err;
+ }
- ret = dict_get_with_ref (this, key, &data);
- if (ret < 0) {
- goto err;
- }
+ ret = dict_get_with_ref (this, key, &data);
+ if (ret < 0) {
+ goto err;
+ }
- if (!data || !data->data) {
- goto err;
- }
- *bin = data->data;
+ if (!data || !data->data) {
+ goto err;
+ }
+ *bin = data->data;
-err:
- if (data)
- data_unref (data);
+err:
+ if (data)
+ data_unref (data);
- return ret;
+ return ret;
}
int
dict_set_bin (dict_t *this, char *key, void *ptr, size_t size)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t * data = NULL;
+ int ret = 0;
- if (!ptr || (size < 0)) {
- ret = -EINVAL;
- goto err;
- }
+ if (!ptr || (size < 0)) {
+ ret = -EINVAL;
+ goto err;
+ }
- data = bin_to_data (ptr, size);
- if (!data) {
- ret = -EINVAL;
- goto err;
- }
+ data = bin_to_data (ptr, size);
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
- data->data = ptr;
- data->len = size;
- data->is_static = 0;
+ data->data = ptr;
+ data->len = size;
+ data->is_static = 0;
- ret = dict_set (this, key, data);
+ ret = dict_set (this, key, data);
err:
- return ret;
+ return ret;
}
int
dict_set_static_bin (dict_t *this, char *key, void *ptr, size_t size)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t * data = NULL;
+ int ret = 0;
- if (!ptr || (size < 0)) {
- ret = -EINVAL;
- goto err;
- }
+ if (!ptr || (size < 0)) {
+ ret = -EINVAL;
+ goto err;
+ }
- data = bin_to_data (ptr, size);
- if (!data) {
- ret = -EINVAL;
- goto err;
- }
+ data = bin_to_data (ptr, size);
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ data->data = ptr;
+ data->len = size;
+ data->is_static = 1;
+
+ ret = dict_set (this, key, data);
+
+err:
+ return ret;
+}
+
+
+/**
+ * dict_get_str_boolean - get a boolean value based on string representation.
+ *
+ * @this : dictionary
+ * @key : dictionary key queried
+ * @default_val : default value if key not found
+ *
+ * @return : @default_val if key not found
+ * : boolean interpretation of @this[@key] if it makes sense
+ * (ie., "on", "true", "enable" ...)
+ * : -1 if error occurs or @this[@key] doesn't make sens as
+ * boolean
+ *
+ * So if you query a boolean option, then via @default_val you can choose
+ * between following patterns:
+ *
+ * - fall back to _gf_false if @key is not set [@default_val = 0]
+ * - fall back to _gf_true if @key is not set [@default_val = 1]
+ * - regard as failure if @key is not set [@default_val = -1]
+ * - handle specially (not as error) if @key is not set
+ * [@default_val = anything else]
+ */
- data->data = ptr;
- data->len = size;
- data->is_static = 1;
+int
+dict_get_str_boolean (dict_t *this, char *key, int default_val)
+{
+ data_t *data = NULL;
+ gf_boolean_t boo = _gf_false;
+ int ret = 0;
+
+ ret = dict_get_with_ref (this, key, &data);
+ if (ret < 0) {
+ if (ret == -ENOENT)
+ ret = default_val;
+ else
+ ret = -1;
+ goto err;
+ }
+
+ GF_ASSERT (data);
+
+ if (!data->data) {
+ ret = -1;
+ goto err;
+ }
+
+ ret = gf_string2boolean (data->data, &boo);
+ if (ret == -1)
+ goto err;
- ret = dict_set (this, key, data);
+ ret = boo;
err:
- return ret;
+ if (data)
+ data_unref (data);
+
+ return ret;
}
+
/**
* Serialization format:
* -------- -------- -------- ----------- -------------
@@ -2111,81 +2256,67 @@ err:
int
_dict_serialized_length (dict_t *this)
{
- int ret = -EINVAL;
- int count = 0;
- int len = 0;
- int i = 0;
- data_pair_t * pair = NULL;
+ int ret = -EINVAL;
+ int count = 0;
+ int len = 0;
+ data_pair_t * pair = NULL;
- len = DICT_HDR_LEN;
- count = this->count;
+ len = DICT_HDR_LEN;
+ count = this->count;
- if (count < 0) {
- gf_log ("dict", GF_LOG_ERROR, "count (%d) < 0!", count);
- goto out;
- }
+ if (count < 0) {
+ gf_log ("dict", GF_LOG_ERROR, "count (%d) < 0!", count);
+ goto out;
+ }
- pair = this->members_list;
+ pair = this->members_list;
- while (count) {
- if (!pair) {
- gf_log ("dict", GF_LOG_ERROR,
- "less than count data pairs found!");
- goto out;
- }
+ while (count) {
+ if (!pair) {
+ gf_log ("dict", GF_LOG_ERROR,
+ "less than count data pairs found!");
+ goto out;
+ }
- len += DICT_DATA_HDR_KEY_LEN + DICT_DATA_HDR_VAL_LEN;
+ len += DICT_DATA_HDR_KEY_LEN + DICT_DATA_HDR_VAL_LEN;
- if (!pair->key) {
- gf_log ("dict", GF_LOG_ERROR, "pair->key is null!");
- goto out;
- }
+ if (!pair->key) {
+ gf_log ("dict", GF_LOG_ERROR, "pair->key is null!");
+ goto out;
+ }
- len += strlen (pair->key) + 1 /* for '\0' */;
+ len += strlen (pair->key) + 1 /* for '\0' */;
- if (!pair->value) {
- gf_log ("dict", GF_LOG_ERROR,
- "pair->value is null!");
- goto out;
- }
+ if (!pair->value) {
+ gf_log ("dict", GF_LOG_ERROR,
+ "pair->value is null!");
+ goto out;
+ }
- if (pair->value->vec) {
- for (i = 0; i < pair->value->len; i++) {
- if (pair->value->vec[i].iov_len < 0) {
- gf_log ("dict", GF_LOG_ERROR,
- "iov_len (%"GF_PRI_SIZET") < 0!",
- pair->value->vec[i].iov_len);
- goto out;
- }
-
- len += pair->value->vec[i].iov_len;
- }
- } else {
- if (pair->value->len < 0) {
- gf_log ("dict", GF_LOG_ERROR,
- "value->len (%d) < 0",
- pair->value->len);
- goto out;
- }
-
- len += pair->value->len;
- }
+ if (pair->value->len < 0) {
+ gf_log ("dict", GF_LOG_ERROR,
+ "value->len (%d) < 0",
+ pair->value->len);
+ goto out;
+ }
- pair = pair->next;
- count--;
- }
-
- ret = len;
+ len += pair->value->len;
+
+ pair = pair->next;
+ count--;
+ }
+
+ ret = len;
out:
- return ret;
+ return ret;
}
/**
- * _dict_serialize - serialize a dictionary into a buffer. This procedure has
+ * _dict_serialize - serialize a dictionary into a buffer. This procedure has
* to be called with this->lock held.
*
* @this: dict to serialize
- * @buf: buffer to serialize into. This must be
+ * @buf: buffer to serialize into. This must be
* atleast dict_serialized_length (this) large
*
* @return: success: 0
@@ -2195,74 +2326,80 @@ out:
int
_dict_serialize (dict_t *this, char *buf)
{
- int ret = -1;
- data_pair_t * pair = NULL;
- int32_t count = 0;
- int32_t keylen = 0;
- int32_t vallen = 0;
-
- if (!buf) {
- gf_log ("dict", GF_LOG_ERROR,
- "buf is null!");
- goto out;
- }
-
- count = this->count;
- if (count < 0) {
- gf_log ("dict", GF_LOG_ERROR, "count (%d) < 0!", count);
- goto out;
- }
+ int ret = -1;
+ data_pair_t * pair = NULL;
+ int32_t count = 0;
+ int32_t keylen = 0;
+ int32_t vallen = 0;
+ int32_t netword = 0;
- *(int32_t *) buf = hton32 (count);
- buf += DICT_HDR_LEN;
- pair = this->members_list;
- while (count) {
- if (!pair) {
- gf_log ("dict", GF_LOG_ERROR,
- "less than count data pairs found!");
- goto out;
- }
+ if (!buf) {
+ gf_log ("dict", GF_LOG_ERROR,
+ "buf is null!");
+ goto out;
+ }
- if (!pair->key) {
- gf_log ("dict", GF_LOG_ERROR,
- "pair->key is null!");
- goto out;
- }
- keylen = strlen (pair->key);
- *(int32_t *) buf = hton32 (keylen);
- buf += DICT_DATA_HDR_KEY_LEN;
+ count = this->count;
+ if (count < 0) {
+ gf_log ("dict", GF_LOG_ERROR, "count (%d) < 0!", count);
+ goto out;
+ }
- if (!pair->value) {
- gf_log ("dict", GF_LOG_ERROR,
- "pair->value is null!");
- goto out;
- }
+ netword = hton32 (count);
+ memcpy (buf, &netword, sizeof(netword));
+ buf += DICT_HDR_LEN;
+ pair = this->members_list;
- vallen = pair->value->len;
- *(int32_t *) buf = hton32 (vallen);
- buf += DICT_DATA_HDR_VAL_LEN;
+ while (count) {
+ if (!pair) {
+ gf_log ("dict", GF_LOG_ERROR,
+ "less than count data pairs found!");
+ goto out;
+ }
- memcpy (buf, pair->key, keylen);
- buf += keylen;
- *buf++ = '\0';
+ if (!pair->key) {
+ gf_log ("dict", GF_LOG_ERROR,
+ "pair->key is null!");
+ goto out;
+ }
- if (!pair->value->data) {
- gf_log ("dict", GF_LOG_ERROR,
- "pair->value->data is null!");
- goto out;
- }
- memcpy (buf, pair->value->data, vallen);
- buf += vallen;
+ keylen = strlen (pair->key);
+ netword = hton32 (keylen);
+ memcpy (buf, &netword, sizeof(netword));
+ buf += DICT_DATA_HDR_KEY_LEN;
- pair = pair->next;
- count--;
- }
+ if (!pair->value) {
+ gf_log ("dict", GF_LOG_ERROR,
+ "pair->value is null!");
+ goto out;
+ }
+
+ vallen = pair->value->len;
+ netword = hton32 (vallen);
+ memcpy (buf, &netword, sizeof(netword));
+ buf += DICT_DATA_HDR_VAL_LEN;
+
+ memcpy (buf, pair->key, keylen);
+ buf += keylen;
+ *buf++ = '\0';
+
+ if (!pair->value->data) {
+ gf_log ("dict", GF_LOG_ERROR,
+ "pair->value->data is null!");
+ goto out;
+ }
+ memcpy (buf, pair->value->data, vallen);
+ buf += vallen;
- ret = 0;
+ pair = pair->next;
+ count--;
+ }
+
+ ret = 0;
out:
- return ret;
+ return ret;
}
@@ -2277,13 +2414,13 @@ out:
int
dict_serialized_length (dict_t *this)
{
- int ret = -EINVAL;
+ int ret = -EINVAL;
+
+ if (!this) {
+ gf_log_callingfn ("dict", GF_LOG_WARNING, "dict is null!");
+ goto out;
+ }
- if (!this) {
- gf_log ("dict", GF_LOG_ERROR, "this is null!");
- goto out;
- }
-
LOCK (&this->lock);
{
ret = _dict_serialized_length (this);
@@ -2291,14 +2428,14 @@ dict_serialized_length (dict_t *this)
UNLOCK (&this->lock);
out:
- return ret;
+ return ret;
}
/**
* dict_serialize - serialize a dictionary into a buffer
*
* @this: dict to serialize
- * @buf: buffer to serialize into. This must be
+ * @buf: buffer to serialize into. This must be
* atleast dict_serialized_length (this) large
*
* @return: success: 0
@@ -2308,18 +2445,12 @@ out:
int
dict_serialize (dict_t *this, char *buf)
{
- int ret = -1;
-
- if (!this) {
- gf_log ("dict", GF_LOG_ERROR,
- "this is null!");
- goto out;
- }
- if (!buf) {
- gf_log ("dict", GF_LOG_ERROR,
- "buf is null!");
- goto out;
- }
+ int ret = -1;
+
+ if (!this || !buf) {
+ gf_log_callingfn ("dict", GF_LOG_WARNING, "dict is null!");
+ goto out;
+ }
LOCK (&this->lock);
{
@@ -2327,7 +2458,7 @@ dict_serialize (dict_t *this, char *buf)
}
UNLOCK (&this->lock);
out:
- return ret;
+ return ret;
}
@@ -2337,7 +2468,7 @@ out:
* @buf: buf containing serialized dict
* @size: size of the @buf
* @fill: dict to fill in
- *
+ *
* @return: success: 0
* failure: -errno
*/
@@ -2345,103 +2476,120 @@ out:
int32_t
dict_unserialize (char *orig_buf, int32_t size, dict_t **fill)
{
- char *buf = NULL;
- int ret = -1;
- int32_t count = 0;
- int i = 0;
-
- data_t * value = NULL;
- char * key = NULL;
- int32_t keylen = 0;
- int32_t vallen = 0;
+ char *buf = NULL;
+ int ret = -1;
+ int32_t count = 0;
+ int i = 0;
+ data_t * value = NULL;
+ char * key = NULL;
+ int32_t keylen = 0;
+ int32_t vallen = 0;
+ int32_t hostord = 0;
- buf = orig_buf;
-
- if (!buf) {
- gf_log ("dict", GF_LOG_ERROR,
- "buf is null!");
- goto out;
- }
-
- if (size == 0) {
- gf_log ("dict", GF_LOG_ERROR,
- "size is 0!");
- goto out;
- }
-
- if (!fill) {
- gf_log ("dict", GF_LOG_ERROR,
- "fill is null!");
- goto out;
- }
+ buf = orig_buf;
- if (!*fill) {
- gf_log ("dict", GF_LOG_ERROR,
- "*fill is null!");
- goto out;
- }
+ if (!buf) {
+ gf_log_callingfn ("dict", GF_LOG_WARNING, "buf is null!");
+ goto out;
+ }
- if ((buf + DICT_HDR_LEN) > (orig_buf + size)) {
- gf_log ("dict", GF_LOG_ERROR,
- "undersized buffer passsed");
- goto out;
- }
+ if (size == 0) {
+ gf_log_callingfn ("dict", GF_LOG_ERROR,
+ "size is 0!");
+ goto out;
+ }
- count = ntoh32 (*(int32_t *) buf);
- buf += DICT_HDR_LEN;
+ if (!fill) {
+ gf_log_callingfn ("dict", GF_LOG_ERROR,
+ "fill is null!");
+ goto out;
+ }
- if (count < 0) {
- gf_log ("dict", GF_LOG_ERROR,
- "count (%d) <= 0", count);
- goto out;
- }
+ if (!*fill) {
+ gf_log_callingfn ("dict", GF_LOG_ERROR,
+ "*fill is null!");
+ goto out;
+ }
- /* count will be set by the dict_set's below */
- (*fill)->count = 0;
+ if ((buf + DICT_HDR_LEN) > (orig_buf + size)) {
+ gf_log_callingfn ("dict", GF_LOG_ERROR,
+ "undersized buffer passed. "
+ "available (%lu) < required (%lu)",
+ (long)(orig_buf + size),
+ (long)(buf + DICT_HDR_LEN));
+ goto out;
+ }
- for (i = 0; i < count; i++) {
- if ((buf + DICT_DATA_HDR_KEY_LEN) > (orig_buf + size)) {
- gf_log ("dict", GF_LOG_ERROR,
- "undersized buffer passsed");
- goto out;
- }
- keylen = ntoh32 (*(int32_t *) buf);
- buf += DICT_DATA_HDR_KEY_LEN;
+ memcpy (&hostord, buf, sizeof(hostord));
+ count = ntoh32 (hostord);
+ buf += DICT_HDR_LEN;
- if ((buf + DICT_DATA_HDR_VAL_LEN) > (orig_buf + size)) {
- gf_log ("dict", GF_LOG_ERROR,
- "undersized buffer passsed");
- goto out;
- }
- vallen = ntoh32 (*(int32_t *) buf);
- buf += DICT_DATA_HDR_VAL_LEN;
-
- if ((buf + keylen) > (orig_buf + size)) {
- gf_log ("dict", GF_LOG_ERROR,
- "undersized buffer passsed");
- goto out;
- }
- key = buf;
- buf += keylen + 1; /* for '\0' */
+ if (count < 0) {
+ gf_log ("dict", GF_LOG_ERROR,
+ "count (%d) <= 0", count);
+ goto out;
+ }
- if ((buf + vallen) > (orig_buf + size)) {
- gf_log ("dict", GF_LOG_ERROR,
- "undersized buffer passsed");
- goto out;
- }
- value = get_new_data ();
- value->len = vallen;
- value->data = buf;
- value->is_static = 1;
- buf += vallen;
+ /* count will be set by the dict_set's below */
+ (*fill)->count = 0;
+
+ for (i = 0; i < count; i++) {
+ if ((buf + DICT_DATA_HDR_KEY_LEN) > (orig_buf + size)) {
+ gf_log_callingfn ("dict", GF_LOG_ERROR,
+ "undersized buffer passed. "
+ "available (%lu) < required (%lu)",
+ (long)(orig_buf + size),
+ (long)(buf + DICT_DATA_HDR_KEY_LEN));
+ goto out;
+ }
+ memcpy (&hostord, buf, sizeof(hostord));
+ keylen = ntoh32 (hostord);
+ buf += DICT_DATA_HDR_KEY_LEN;
+
+ if ((buf + DICT_DATA_HDR_VAL_LEN) > (orig_buf + size)) {
+ gf_log_callingfn ("dict", GF_LOG_ERROR,
+ "undersized buffer passed. "
+ "available (%lu) < required (%lu)",
+ (long)(orig_buf + size),
+ (long)(buf + DICT_DATA_HDR_VAL_LEN));
+ goto out;
+ }
+ memcpy (&hostord, buf, sizeof(hostord));
+ vallen = ntoh32 (hostord);
+ buf += DICT_DATA_HDR_VAL_LEN;
+
+ if ((buf + keylen) > (orig_buf + size)) {
+ gf_log_callingfn ("dict", GF_LOG_ERROR,
+ "undersized buffer passed. "
+ "available (%lu) < required (%lu)",
+ (long)(orig_buf + size),
+ (long)(buf + keylen));
+ goto out;
+ }
+ key = buf;
+ buf += keylen + 1; /* for '\0' */
+
+ if ((buf + vallen) > (orig_buf + size)) {
+ gf_log_callingfn ("dict", GF_LOG_ERROR,
+ "undersized buffer passed. "
+ "available (%lu) < required (%lu)",
+ (long)(orig_buf + size),
+ (long)(buf + vallen));
+ goto out;
+ }
+ value = get_new_data ();
+ value->len = vallen;
+ value->data = memdup (buf, vallen);
+ value->is_static = 0;
+ buf += vallen;
- dict_set (*fill, key, value);
- }
+ dict_add (*fill, key, value);
+ }
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
@@ -2450,47 +2598,41 @@ out:
*
* @this: dict to serialize
* @buf: pointer to pointer to character. The allocated buffer is stored in
- * this pointer. The buffer has to be freed by the caller.
+ * this pointer. The buffer has to be freed by the caller.
*
* @return: success: 0
* failure: -errno
*/
int32_t
-dict_allocate_and_serialize (dict_t *this, char **buf, size_t *length)
+dict_allocate_and_serialize (dict_t *this, char **buf, u_int *length)
{
- int ret = -EINVAL;
- ssize_t len = 0;
+ int ret = -EINVAL;
+ ssize_t len = 0;
+
+ if (!this || !buf) {
+ gf_log_callingfn ("dict", GF_LOG_DEBUG,
+ "dict OR buf is NULL");
+ goto out;
+ }
- if (!this) {
- gf_log ("dict", GF_LOG_DEBUG,
- "NULL passed as this pointer");
- goto out;
- }
- if (!buf) {
- gf_log ("dict", GF_LOG_DEBUG,
- "NULL passed as buf");
- goto out;
- }
-
LOCK (&this->lock);
- {
+ {
len = _dict_serialized_length (this);
if (len < 0) {
ret = len;
goto unlock;
}
- *buf = CALLOC (1, len);
+ *buf = GF_CALLOC (1, len, gf_common_mt_char);
if (*buf == NULL) {
ret = -ENOMEM;
- gf_log ("dict", GF_LOG_ERROR, "out of memory");
goto unlock;
}
ret = _dict_serialize (this, *buf);
if (ret < 0) {
- FREE (*buf);
+ GF_FREE (*buf);
*buf = NULL;
goto unlock;
}
@@ -2502,5 +2644,138 @@ dict_allocate_and_serialize (dict_t *this, char **buf, size_t *length)
unlock:
UNLOCK (&this->lock);
out:
- return ret;
+ return ret;
+}
+
+/**
+ * _dict_serialize_value_with_delim: serialize the values in the dictionary
+ * into a buffer separated by delimiter (except the last)
+ *
+ * @this : dictionary to serialize
+ * @buf : the buffer to store the serialized data
+ * @serz_len : the length of the serialized data (excluding the last delimiter)
+ * @delimiter : the delimiter to separate the values
+ *
+ * @return : 0 -> success
+ * : -errno -> faliure
+ */
+int
+_dict_serialize_value_with_delim (dict_t *this, char *buf, int32_t *serz_len,
+ char delimiter)
+{
+ int ret = -1;
+ int32_t count = 0;
+ int32_t vallen = 0;
+ int32_t total_len = 0;
+ data_pair_t *pair = NULL;
+
+ if (!buf) {
+ gf_log ("dict", GF_LOG_ERROR, "buf is null");
+ goto out;
+ }
+
+ count = this->count;
+ if (count < 0) {
+ gf_log ("dict", GF_LOG_ERROR, "count (%d) < 0", count);
+ goto out;
+ }
+
+ pair = this->members_list;
+
+ while (count) {
+ if (!pair) {
+ gf_log ("dict", GF_LOG_ERROR,
+ "less than count data pairs found");
+ goto out;
+ }
+
+ if (!pair->key || !pair->value) {
+ gf_log ("dict", GF_LOG_ERROR,
+ "key or value is null");
+ goto out;
+ }
+
+ if (!pair->value->data) {
+ gf_log ("dict", GF_LOG_ERROR,
+ "null value found in dict");
+ goto out;
+ }
+
+ vallen = pair->value->len - 1; // length includes \0
+ memcpy (buf, pair->value->data, vallen);
+ buf += vallen;
+ *buf++ = delimiter;
+
+ total_len += (vallen + 1);
+
+ pair = pair->next;
+ count--;
+ }
+
+ *--buf = '\0'; // remove the last delimiter
+ total_len--; // adjust the length
+ ret = 0;
+
+ if (serz_len)
+ *serz_len = total_len;
+
+ out:
+ return ret;
+}
+
+int
+dict_serialize_value_with_delim (dict_t *this, char *buf, int32_t *serz_len,
+ char delimiter)
+{
+ int ret = -1;
+
+ if (!this || !buf) {
+ gf_log_callingfn ("dict", GF_LOG_WARNING, "dict is null!");
+ goto out;
+ }
+
+ LOCK (&this->lock);
+ {
+ ret = _dict_serialize_value_with_delim (this, buf, serz_len, delimiter);
+ }
+ UNLOCK (&this->lock);
+out:
+ return ret;
+}
+
+void
+dict_dump (dict_t *this)
+{
+ int ret = 0;
+ int dumplen = 0;
+ data_pair_t *trav = NULL;
+ char dump[64*1024]; /* This is debug only, hence
+ performance should not matter */
+
+ if (!this) {
+ gf_log_callingfn ("dict", GF_LOG_WARNING, "dict NULL");
+ goto out;
+ }
+
+ dump[0] = '\0'; /* the array is not initialized to '\0' */
+
+ /* There is a possibility of issues if data is binary, ignore it
+ for now as debugging is more important */
+ for (trav = this->members_list; trav; trav = trav->next) {
+ ret = snprintf (&dump[dumplen], ((64*1024) - dumplen - 1),
+ "(%s:%s)", trav->key, trav->value->data);
+ if ((ret == -1) || !ret)
+ break;
+
+ dumplen += ret;
+ /* snprintf doesn't append a trailing '\0', add it here */
+ dump[dumplen] = '\0';
+ }
+
+ if (dumplen)
+ gf_log_callingfn ("dict", GF_LOG_INFO,
+ "dict=%p (%s)", this, dump);
+
+out:
+ return;
}
diff --git a/libglusterfs/src/dict.h b/libglusterfs/src/dict.h
index e2d87166a..9b41b5a7d 100644
--- a/libglusterfs/src/dict.h
+++ b/libglusterfs/src/dict.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _DICT_H
@@ -35,60 +26,102 @@ typedef struct _data data_t;
typedef struct _dict dict_t;
typedef struct _data_pair data_pair_t;
+
+#define GF_PROTOCOL_DICT_SERIALIZE(this,from_dict,to,len,ope,labl) do { \
+ int ret = 0; \
+ \
+ if (!from_dict) \
+ break; \
+ \
+ ret = dict_allocate_and_serialize (from_dict, to, &len);\
+ if (ret < 0) { \
+ gf_log (this->name, GF_LOG_WARNING, \
+ "failed to get serialized dict (%s)", \
+ (#from_dict)); \
+ ope = EINVAL; \
+ goto labl; \
+ } \
+ } while (0)
+
+
+#define GF_PROTOCOL_DICT_UNSERIALIZE(xl,to,buff,len,ret,ope,labl) do { \
+ if (!len) \
+ break; \
+ to = dict_new(); \
+ GF_VALIDATE_OR_GOTO (xl->name, to, labl); \
+ \
+ ret = dict_unserialize (buff, len, &to); \
+ if (ret < 0) { \
+ gf_log (xl->name, GF_LOG_WARNING, \
+ "failed to unserialize dictionary (%s)", \
+ (#to)); \
+ \
+ ope = EINVAL; \
+ goto labl; \
+ } \
+ \
+ } while (0)
+
struct _data {
- unsigned char is_static:1;
- unsigned char is_const:1;
- int32_t len;
- struct iovec *vec;
- char *data;
- int32_t refcount;
- gf_lock_t lock;
+ unsigned char is_static:1;
+ unsigned char is_const:1;
+ unsigned char is_stdalloc:1;
+ int32_t len;
+ char *data;
+ int32_t refcount;
+ gf_lock_t lock;
};
struct _data_pair {
- struct _data_pair *hash_next;
- struct _data_pair *prev;
- struct _data_pair *next;
- data_t *value;
- char *key;
+ struct _data_pair *hash_next;
+ struct _data_pair *prev;
+ struct _data_pair *next;
+ data_t *value;
+ char *key;
};
struct _dict {
- unsigned char is_static:1;
- int32_t hash_size;
- int32_t count;
- int32_t refcount;
- data_pair_t **members;
- data_pair_t *members_list;
- char *extra_free;
- gf_lock_t lock;
+ unsigned char is_static:1;
+ int32_t hash_size;
+ int32_t count;
+ int32_t refcount;
+ data_pair_t **members;
+ data_pair_t *members_list;
+ char *extra_free;
+ char *extra_stdfree;
+ gf_lock_t lock;
+ data_pair_t *members_internal;
+ data_pair_t free_pair;
+ gf_boolean_t free_pair_in_use;
};
int32_t is_data_equal (data_t *one, data_t *two);
void data_destroy (data_t *data);
+/* function to set a key/value pair (overwrite existing if matches the key */
int32_t dict_set (dict_t *this, char *key, data_t *value);
+/* function to set a new key/value pair (without checking for duplicate) */
+int32_t dict_add (dict_t *this, char *key, data_t *value);
+
data_t *dict_get (dict_t *this, char *key);
void dict_del (dict_t *this, char *key);
+int dict_reset (dict_t *dict);
int32_t dict_serialized_length (dict_t *dict);
int32_t dict_serialize (dict_t *dict, char *buf);
int32_t dict_unserialize (char *buf, int32_t size, dict_t **fill);
-int32_t
-dict_allocate_and_serialize (dict_t *this, char **buf, size_t *length);
+int32_t dict_allocate_and_serialize (dict_t *this, char **buf, u_int *length);
-int32_t dict_iovec_len (dict_t *dict);
-int32_t dict_to_iovec (dict_t *dict, struct iovec *vec, int32_t count);
-
void dict_destroy (dict_t *dict);
void dict_unref (dict_t *dict);
dict_t *dict_ref (dict_t *dict);
data_t *data_ref (data_t *data);
void data_unref (data_t *data);
-/*
+int32_t dict_lookup (dict_t *this, char *key, data_t **data);
+/*
TODO: provide converts for differnt byte sizes, signedness, and void *
*/
data_t *int_to_data (int64_t value);
@@ -107,6 +140,7 @@ int8_t data_to_int8 (data_t *data);
uint64_t data_to_uint64 (data_t *data);
uint32_t data_to_uint32 (data_t *data);
uint16_t data_to_uint16 (data_t *data);
+uint8_t data_to_uint8 (data_t *data);
data_t *data_from_ptr (void *value);
data_t *data_from_static_ptr (void *value);
@@ -125,25 +159,36 @@ void *data_to_bin (data_t *data);
void *data_to_ptr (data_t *data);
data_t *get_new_data ();
+data_t * data_copy (data_t *old);
dict_t *get_new_dict_full (int size_hint);
dict_t *get_new_dict ();
-data_pair_t *get_new_data_pair ();
+int dict_foreach (dict_t *this,
+ int (*fn)(dict_t *this,
+ char *key,
+ data_t *value,
+ void *data),
+ void *data);
+
+int dict_foreach_fnmatch (dict_t *dict, char *pattern,
+ int (*fn)(dict_t *this,
+ char *key,
+ data_t *value,
+ void *data),
+ void *data);
-void dict_foreach (dict_t *this,
- void (*fn)(dict_t *this,
- char *key,
- data_t *value,
- void *data),
- void *data);
+int dict_null_foreach_fn (dict_t *d, char *k,
+ data_t *v, void *tmp);
-dict_t *dict_copy (dict_t *this,
- dict_t *new);
+dict_t *dict_copy (dict_t *this, dict_t *new);
+int dict_keys_join (void *value, int size, dict_t *dict,
+ int (*filter_fn)(char *key));
/* CLEANED UP FUNCTIONS DECLARATIONS */
GF_MUST_CHECK dict_t *dict_new (void);
-dict_t *dict_copy_with_ref (dict_t *this,
- dict_t *new);
+dict_t *dict_copy_with_ref (dict_t *this, dict_t *new);
+
+GF_MUST_CHECK int dict_reset (dict_t *dict);
GF_MUST_CHECK int dict_get_int8 (dict_t *this, char *key, int8_t *val);
GF_MUST_CHECK int dict_set_int8 (dict_t *this, char *key, int8_t val);
@@ -171,6 +216,7 @@ GF_MUST_CHECK int dict_set_double (dict_t *this, char *key, double val);
GF_MUST_CHECK int dict_set_static_ptr (dict_t *this, char *key, void *ptr);
GF_MUST_CHECK int dict_get_ptr (dict_t *this, char *key, void **ptr);
+GF_MUST_CHECK int dict_get_ptr_and_len (dict_t *this, char *key, void **ptr, int *len);
GF_MUST_CHECK int dict_set_ptr (dict_t *this, char *key, void *ptr);
GF_MUST_CHECK int dict_set_dynptr (dict_t *this, char *key, void *ptr, size_t size);
@@ -179,7 +225,13 @@ GF_MUST_CHECK int dict_set_bin (dict_t *this, char *key, void *ptr, size_t size)
GF_MUST_CHECK int dict_set_static_bin (dict_t *this, char *key, void *ptr, size_t size);
GF_MUST_CHECK int dict_set_str (dict_t *this, char *key, char *str);
+GF_MUST_CHECK int dict_set_dynmstr (dict_t *this, char *key, char *str);
GF_MUST_CHECK int dict_set_dynstr (dict_t *this, char *key, char *str);
GF_MUST_CHECK int dict_get_str (dict_t *this, char *key, char **str);
+GF_MUST_CHECK int dict_get_str_boolean (dict_t *this, char *key, int default_val);
+GF_MUST_CHECK int dict_serialize_value_with_delim (dict_t *this, char *buf, int32_t *serz_len,
+ char delimiter);
+
+void dict_dump (dict_t *dict);
#endif
diff --git a/libglusterfs/src/event-epoll.c b/libglusterfs/src/event-epoll.c
new file mode 100644
index 000000000..06b323624
--- /dev/null
+++ b/libglusterfs/src/event-epoll.c
@@ -0,0 +1,463 @@
+/*
+ Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <sys/poll.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+
+#include "logging.h"
+#include "event.h"
+#include "mem-pool.h"
+#include "common-utils.h"
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+
+#ifdef HAVE_SYS_EPOLL_H
+#include <sys/epoll.h>
+
+
+static int
+__event_getindex (struct event_pool *event_pool, int fd, int idx)
+{
+ int ret = -1;
+ int i = 0;
+
+ GF_VALIDATE_OR_GOTO ("event", event_pool, out);
+
+ if (idx > -1 && idx < event_pool->used) {
+ if (event_pool->reg[idx].fd == fd)
+ ret = idx;
+ }
+
+ for (i=0; ret == -1 && i<event_pool->used; i++) {
+ if (event_pool->reg[i].fd == fd) {
+ ret = i;
+ break;
+ }
+ }
+
+out:
+ return ret;
+}
+
+
+static struct event_pool *
+event_pool_new_epoll (int count)
+{
+ struct event_pool *event_pool = NULL;
+ int epfd = -1;
+
+ event_pool = GF_CALLOC (1, sizeof (*event_pool),
+ gf_common_mt_event_pool);
+
+ if (!event_pool)
+ goto out;
+
+ event_pool->count = count;
+ event_pool->reg = GF_CALLOC (event_pool->count,
+ sizeof (*event_pool->reg),
+ gf_common_mt_reg);
+
+ if (!event_pool->reg) {
+ GF_FREE (event_pool);
+ event_pool = NULL;
+ goto out;
+ }
+
+ epfd = epoll_create (count);
+
+ if (epfd == -1) {
+ gf_log ("epoll", GF_LOG_ERROR, "epoll fd creation failed (%s)",
+ strerror (errno));
+ GF_FREE (event_pool->reg);
+ GF_FREE (event_pool);
+ event_pool = NULL;
+ goto out;
+ }
+
+ event_pool->fd = epfd;
+
+ event_pool->count = count;
+
+ pthread_mutex_init (&event_pool->mutex, NULL);
+ pthread_cond_init (&event_pool->cond, NULL);
+
+out:
+ return event_pool;
+}
+
+
+int
+event_register_epoll (struct event_pool *event_pool, int fd,
+ event_handler_t handler,
+ void *data, int poll_in, int poll_out)
+{
+ int idx = -1;
+ int ret = -1;
+ struct epoll_event epoll_event = {0, };
+ struct event_data *ev_data = (void *)&epoll_event.data;
+
+
+ GF_VALIDATE_OR_GOTO ("event", event_pool, out);
+
+ pthread_mutex_lock (&event_pool->mutex);
+ {
+ if (event_pool->count == event_pool->used) {
+ event_pool->count *= 2;
+
+ event_pool->reg = GF_REALLOC (event_pool->reg,
+ event_pool->count *
+ sizeof (*event_pool->reg));
+
+ if (!event_pool->reg) {
+ gf_log ("epoll", GF_LOG_ERROR,
+ "event registry re-allocation failed");
+ goto unlock;
+ }
+ }
+
+ idx = event_pool->used;
+ event_pool->used++;
+
+ event_pool->reg[idx].fd = fd;
+ event_pool->reg[idx].events = EPOLLPRI;
+ event_pool->reg[idx].handler = handler;
+ event_pool->reg[idx].data = data;
+
+ switch (poll_in) {
+ case 1:
+ event_pool->reg[idx].events |= EPOLLIN;
+ break;
+ case 0:
+ event_pool->reg[idx].events &= ~EPOLLIN;
+ break;
+ case -1:
+ /* do nothing */
+ break;
+ default:
+ gf_log ("epoll", GF_LOG_ERROR,
+ "invalid poll_in value %d", poll_in);
+ break;
+ }
+
+ switch (poll_out) {
+ case 1:
+ event_pool->reg[idx].events |= EPOLLOUT;
+ break;
+ case 0:
+ event_pool->reg[idx].events &= ~EPOLLOUT;
+ break;
+ case -1:
+ /* do nothing */
+ break;
+ default:
+ gf_log ("epoll", GF_LOG_ERROR,
+ "invalid poll_out value %d", poll_out);
+ break;
+ }
+
+ event_pool->changed = 1;
+
+ epoll_event.events = event_pool->reg[idx].events;
+ ev_data->fd = fd;
+ ev_data->idx = idx;
+
+ ret = epoll_ctl (event_pool->fd, EPOLL_CTL_ADD, fd,
+ &epoll_event);
+
+ if (ret == -1) {
+ gf_log ("epoll", GF_LOG_ERROR,
+ "failed to add fd(=%d) to epoll fd(=%d) (%s)",
+ fd, event_pool->fd, strerror (errno));
+ goto unlock;
+ }
+
+ pthread_cond_broadcast (&event_pool->cond);
+ }
+unlock:
+ pthread_mutex_unlock (&event_pool->mutex);
+
+out:
+ return ret;
+}
+
+
+static int
+event_unregister_epoll (struct event_pool *event_pool, int fd, int idx_hint)
+{
+ int idx = -1;
+ int ret = -1;
+
+ struct epoll_event epoll_event = {0, };
+ struct event_data *ev_data = (void *)&epoll_event.data;
+ int lastidx = -1;
+
+ GF_VALIDATE_OR_GOTO ("event", event_pool, out);
+
+ pthread_mutex_lock (&event_pool->mutex);
+ {
+ idx = __event_getindex (event_pool, fd, idx_hint);
+
+ if (idx == -1) {
+ gf_log ("epoll", GF_LOG_ERROR,
+ "index not found for fd=%d (idx_hint=%d)",
+ fd, idx_hint);
+ errno = ENOENT;
+ goto unlock;
+ }
+
+ ret = epoll_ctl (event_pool->fd, EPOLL_CTL_DEL, fd, NULL);
+
+ /* if ret is -1, this array member should never be accessed */
+ /* if it is 0, the array member might be used by idx_cache
+ * in which case the member should not be accessed till
+ * it is reallocated
+ */
+
+ event_pool->reg[idx].fd = -1;
+
+ if (ret == -1) {
+ gf_log ("epoll", GF_LOG_ERROR,
+ "fail to del fd(=%d) from epoll fd(=%d) (%s)",
+ fd, event_pool->fd, strerror (errno));
+ goto unlock;
+ }
+
+ lastidx = event_pool->used - 1;
+ if (lastidx == idx) {
+ event_pool->used--;
+ goto unlock;
+ }
+
+ epoll_event.events = event_pool->reg[lastidx].events;
+ ev_data->fd = event_pool->reg[lastidx].fd;
+ ev_data->idx = idx;
+
+ ret = epoll_ctl (event_pool->fd, EPOLL_CTL_MOD, ev_data->fd,
+ &epoll_event);
+ if (ret == -1) {
+ gf_log ("epoll", GF_LOG_ERROR,
+ "fail to modify fd(=%d) index %d to %d (%s)",
+ ev_data->fd, event_pool->used, idx,
+ strerror (errno));
+ goto unlock;
+ }
+
+ /* just replace the unregistered idx by last one */
+ event_pool->reg[idx] = event_pool->reg[lastidx];
+ event_pool->used--;
+ }
+unlock:
+ pthread_mutex_unlock (&event_pool->mutex);
+
+out:
+ return ret;
+}
+
+
+static int
+event_select_on_epoll (struct event_pool *event_pool, int fd, int idx_hint,
+ int poll_in, int poll_out)
+{
+ int idx = -1;
+ int ret = -1;
+
+ struct epoll_event epoll_event = {0, };
+ struct event_data *ev_data = (void *)&epoll_event.data;
+
+
+ GF_VALIDATE_OR_GOTO ("event", event_pool, out);
+
+ pthread_mutex_lock (&event_pool->mutex);
+ {
+ idx = __event_getindex (event_pool, fd, idx_hint);
+
+ if (idx == -1) {
+ gf_log ("epoll", GF_LOG_ERROR,
+ "index not found for fd=%d (idx_hint=%d)",
+ fd, idx_hint);
+ errno = ENOENT;
+ goto unlock;
+ }
+
+ switch (poll_in) {
+ case 1:
+ event_pool->reg[idx].events |= EPOLLIN;
+ break;
+ case 0:
+ event_pool->reg[idx].events &= ~EPOLLIN;
+ break;
+ case -1:
+ /* do nothing */
+ break;
+ default:
+ gf_log ("epoll", GF_LOG_ERROR,
+ "invalid poll_in value %d", poll_in);
+ break;
+ }
+
+ switch (poll_out) {
+ case 1:
+ event_pool->reg[idx].events |= EPOLLOUT;
+ break;
+ case 0:
+ event_pool->reg[idx].events &= ~EPOLLOUT;
+ break;
+ case -1:
+ /* do nothing */
+ break;
+ default:
+ gf_log ("epoll", GF_LOG_ERROR,
+ "invalid poll_out value %d", poll_out);
+ break;
+ }
+
+ epoll_event.events = event_pool->reg[idx].events;
+ ev_data->fd = fd;
+ ev_data->idx = idx;
+
+ ret = epoll_ctl (event_pool->fd, EPOLL_CTL_MOD, fd,
+ &epoll_event);
+ if (ret == -1) {
+ gf_log ("epoll", GF_LOG_ERROR,
+ "failed to modify fd(=%d) events to %d",
+ fd, epoll_event.events);
+ }
+ }
+unlock:
+ pthread_mutex_unlock (&event_pool->mutex);
+
+out:
+ return ret;
+}
+
+
+static int
+event_dispatch_epoll_handler (struct event_pool *event_pool,
+ struct epoll_event *events, int i)
+{
+ struct event_data *event_data = NULL;
+ event_handler_t handler = NULL;
+ void *data = NULL;
+ int idx = -1;
+ int ret = -1;
+
+
+ event_data = (void *)&events[i].data;
+ handler = NULL;
+ data = NULL;
+
+ pthread_mutex_lock (&event_pool->mutex);
+ {
+ idx = __event_getindex (event_pool, event_data->fd,
+ event_data->idx);
+
+ if (idx == -1) {
+ gf_log ("epoll", GF_LOG_ERROR,
+ "index not found for fd(=%d) (idx_hint=%d)",
+ event_data->fd, event_data->idx);
+ goto unlock;
+ }
+
+ handler = event_pool->reg[idx].handler;
+ data = event_pool->reg[idx].data;
+ }
+unlock:
+ pthread_mutex_unlock (&event_pool->mutex);
+
+ if (handler)
+ ret = handler (event_data->fd, event_data->idx, data,
+ (events[i].events & (EPOLLIN|EPOLLPRI)),
+ (events[i].events & (EPOLLOUT)),
+ (events[i].events & (EPOLLERR|EPOLLHUP)));
+ return ret;
+}
+
+
+static int
+event_dispatch_epoll (struct event_pool *event_pool)
+{
+ struct epoll_event *events = NULL;
+ int size = 0;
+ int i = 0;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("event", event_pool, out);
+
+ while (1) {
+ pthread_mutex_lock (&event_pool->mutex);
+ {
+ while (event_pool->used == 0)
+ pthread_cond_wait (&event_pool->cond,
+ &event_pool->mutex);
+
+ if (event_pool->used > event_pool->evcache_size) {
+ GF_FREE (event_pool->evcache);
+
+ event_pool->evcache = events = NULL;
+
+ event_pool->evcache_size =
+ event_pool->used + 256;
+
+ events = GF_CALLOC (event_pool->evcache_size,
+ sizeof (struct epoll_event),
+ gf_common_mt_epoll_event);
+ if (!events)
+ break;
+
+ event_pool->evcache = events;
+ }
+ }
+ pthread_mutex_unlock (&event_pool->mutex);
+
+ ret = epoll_wait (event_pool->fd, event_pool->evcache,
+ event_pool->evcache_size, -1);
+
+ if (ret == 0)
+ /* timeout */
+ continue;
+
+ if (ret == -1 && errno == EINTR)
+ /* sys call */
+ continue;
+
+ size = ret;
+
+ for (i = 0; i < size; i++) {
+ if (!events || !events[i].events)
+ continue;
+
+ ret = event_dispatch_epoll_handler (event_pool,
+ events, i);
+ }
+ }
+
+out:
+ return ret;
+}
+
+
+struct event_ops event_ops_epoll = {
+ .new = event_pool_new_epoll,
+ .event_register = event_register_epoll,
+ .event_select_on = event_select_on_epoll,
+ .event_unregister = event_unregister_epoll,
+ .event_dispatch = event_dispatch_epoll
+};
+
+#endif
diff --git a/libglusterfs/src/event-history.c b/libglusterfs/src/event-history.c
new file mode 100644
index 000000000..82baa521a
--- /dev/null
+++ b/libglusterfs/src/event-history.c
@@ -0,0 +1,83 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "event-history.h"
+
+eh_t *
+eh_new (size_t buffer_size, gf_boolean_t use_buffer_once,
+ void (*destroy_buffer_data) (void *data))
+{
+ eh_t *history = NULL;
+ buffer_t *buffer = NULL;
+
+ history = GF_CALLOC (1, sizeof (eh_t), gf_common_mt_eh_t);
+ if (!history) {
+ gf_log ("", GF_LOG_ERROR, "allocating history failed.");
+ goto out;
+ }
+
+ buffer = cb_buffer_new (buffer_size, use_buffer_once,
+ destroy_buffer_data);
+ if (!buffer) {
+ gf_log ("", GF_LOG_ERROR, "allocating circular buffer failed");
+ GF_FREE (history);
+ history = NULL;
+ }
+
+ history->buffer = buffer;
+
+ pthread_mutex_init (&history->lock, NULL);
+out:
+ return history;
+}
+
+void
+eh_dump (eh_t *history, void *data,
+ int (dump_fn) (circular_buffer_t *buffer, void *data))
+{
+ if (!history) {
+ gf_log ("", GF_LOG_DEBUG, "history is NULL");
+ goto out;
+ }
+
+ cb_buffer_dump (history->buffer, data, dump_fn);
+
+out:
+ return;
+}
+
+int
+eh_save_history (eh_t *history, void *data)
+{
+ int ret = -1;
+
+ ret = cb_add_entry_buffer (history->buffer, data);
+
+ return ret;
+}
+
+int
+eh_destroy (eh_t *history)
+{
+ if (!history) {
+ gf_log ("", GF_LOG_INFO, "history for the xlator is "
+ "NULL");
+ return -1;
+ }
+
+ cb_buffer_destroy (history->buffer);
+ history->buffer = NULL;
+
+ pthread_mutex_destroy (&history->lock);
+
+ GF_FREE (history);
+
+ return 0;
+}
diff --git a/libglusterfs/src/event-history.h b/libglusterfs/src/event-history.h
new file mode 100644
index 000000000..b64f63b5e
--- /dev/null
+++ b/libglusterfs/src/event-history.h
@@ -0,0 +1,44 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _EH_H
+#define _EH_H
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "mem-types.h"
+#include "circ-buff.h"
+
+struct event_hist
+{
+ buffer_t *buffer;
+ pthread_mutex_t lock;
+};
+
+typedef struct event_hist eh_t;
+
+void
+eh_dump (eh_t *event , void *data,
+ int (fn) (circular_buffer_t *buffer, void *data));
+
+eh_t *
+eh_new (size_t buffer_size, gf_boolean_t use_buffer_once,
+ void (*destroy_data) (void *data));
+
+int
+eh_save_history (eh_t *history, void *string);
+
+int
+eh_destroy (eh_t *history);
+
+#endif /* _EH_H */
diff --git a/libglusterfs/src/event-poll.c b/libglusterfs/src/event-poll.c
new file mode 100644
index 000000000..7f7f560d0
--- /dev/null
+++ b/libglusterfs/src/event-poll.c
@@ -0,0 +1,451 @@
+/*
+ Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <sys/poll.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+
+#include "logging.h"
+#include "event.h"
+#include "mem-pool.h"
+#include "common-utils.h"
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+static int
+event_register_poll (struct event_pool *event_pool, int fd,
+ event_handler_t handler,
+ void *data, int poll_in, int poll_out);
+
+
+static int
+__flush_fd (int fd, int idx, void *data,
+ int poll_in, int poll_out, int poll_err)
+{
+ char buf[64];
+ int ret = -1;
+
+ if (!poll_in)
+ return ret;
+
+ do {
+ ret = read (fd, buf, 64);
+ if (ret == -1 && errno != EAGAIN) {
+ gf_log ("poll", GF_LOG_ERROR,
+ "read on %d returned error (%s)",
+ fd, strerror (errno));
+ }
+ } while (ret == 64);
+
+ return ret;
+}
+
+
+static int
+__event_getindex (struct event_pool *event_pool, int fd, int idx)
+{
+ int ret = -1;
+ int i = 0;
+
+ GF_VALIDATE_OR_GOTO ("event", event_pool, out);
+
+ if (idx > -1 && idx < event_pool->used) {
+ if (event_pool->reg[idx].fd == fd)
+ ret = idx;
+ }
+
+ for (i=0; ret == -1 && i<event_pool->used; i++) {
+ if (event_pool->reg[i].fd == fd) {
+ ret = i;
+ break;
+ }
+ }
+
+out:
+ return ret;
+}
+
+
+static struct event_pool *
+event_pool_new_poll (int count)
+{
+ struct event_pool *event_pool = NULL;
+ int ret = -1;
+
+ event_pool = GF_CALLOC (1, sizeof (*event_pool),
+ gf_common_mt_event_pool);
+
+ if (!event_pool)
+ return NULL;
+
+ event_pool->count = count;
+ event_pool->reg = GF_CALLOC (event_pool->count,
+ sizeof (*event_pool->reg),
+ gf_common_mt_reg);
+
+ if (!event_pool->reg) {
+ GF_FREE (event_pool);
+ return NULL;
+ }
+
+ pthread_mutex_init (&event_pool->mutex, NULL);
+
+ ret = pipe (event_pool->breaker);
+
+ if (ret == -1) {
+ gf_log ("poll", GF_LOG_ERROR,
+ "pipe creation failed (%s)", strerror (errno));
+ GF_FREE (event_pool->reg);
+ GF_FREE (event_pool);
+ return NULL;
+ }
+
+ ret = fcntl (event_pool->breaker[0], F_SETFL, O_NONBLOCK);
+ if (ret == -1) {
+ gf_log ("poll", GF_LOG_ERROR,
+ "could not set pipe to non blocking mode (%s)",
+ strerror (errno));
+ close (event_pool->breaker[0]);
+ close (event_pool->breaker[1]);
+ event_pool->breaker[0] = event_pool->breaker[1] = -1;
+
+ GF_FREE (event_pool->reg);
+ GF_FREE (event_pool);
+ return NULL;
+ }
+
+ ret = fcntl (event_pool->breaker[1], F_SETFL, O_NONBLOCK);
+ if (ret == -1) {
+ gf_log ("poll", GF_LOG_ERROR,
+ "could not set pipe to non blocking mode (%s)",
+ strerror (errno));
+
+ close (event_pool->breaker[0]);
+ close (event_pool->breaker[1]);
+ event_pool->breaker[0] = event_pool->breaker[1] = -1;
+
+ GF_FREE (event_pool->reg);
+ GF_FREE (event_pool);
+ return NULL;
+ }
+
+ ret = event_register_poll (event_pool, event_pool->breaker[0],
+ __flush_fd, NULL, 1, 0);
+ if (ret == -1) {
+ gf_log ("poll", GF_LOG_ERROR,
+ "could not register pipe fd with poll event loop");
+ close (event_pool->breaker[0]);
+ close (event_pool->breaker[1]);
+ event_pool->breaker[0] = event_pool->breaker[1] = -1;
+
+ GF_FREE (event_pool->reg);
+ GF_FREE (event_pool);
+ return NULL;
+ }
+
+ return event_pool;
+}
+
+
+static int
+event_register_poll (struct event_pool *event_pool, int fd,
+ event_handler_t handler,
+ void *data, int poll_in, int poll_out)
+{
+ int idx = -1;
+
+ GF_VALIDATE_OR_GOTO ("event", event_pool, out);
+
+ pthread_mutex_lock (&event_pool->mutex);
+ {
+ if (event_pool->count == event_pool->used)
+ {
+ event_pool->count += 256;
+
+ event_pool->reg = GF_REALLOC (event_pool->reg,
+ event_pool->count *
+ sizeof (*event_pool->reg));
+ if (!event_pool->reg)
+ goto unlock;
+ }
+
+ idx = event_pool->used++;
+
+ event_pool->reg[idx].fd = fd;
+ event_pool->reg[idx].events = POLLPRI;
+ event_pool->reg[idx].handler = handler;
+ event_pool->reg[idx].data = data;
+
+ switch (poll_in) {
+ case 1:
+ event_pool->reg[idx].events |= POLLIN;
+ break;
+ case 0:
+ event_pool->reg[idx].events &= ~POLLIN;
+ break;
+ case -1:
+ /* do nothing */
+ break;
+ default:
+ gf_log ("poll", GF_LOG_ERROR,
+ "invalid poll_in value %d", poll_in);
+ break;
+ }
+
+ switch (poll_out) {
+ case 1:
+ event_pool->reg[idx].events |= POLLOUT;
+ break;
+ case 0:
+ event_pool->reg[idx].events &= ~POLLOUT;
+ break;
+ case -1:
+ /* do nothing */
+ break;
+ default:
+ gf_log ("poll", GF_LOG_ERROR,
+ "invalid poll_out value %d", poll_out);
+ break;
+ }
+
+ event_pool->changed = 1;
+
+ }
+unlock:
+ pthread_mutex_unlock (&event_pool->mutex);
+
+out:
+ return idx;
+}
+
+
+static int
+event_unregister_poll (struct event_pool *event_pool, int fd, int idx_hint)
+{
+ int idx = -1;
+
+ GF_VALIDATE_OR_GOTO ("event", event_pool, out);
+
+ pthread_mutex_lock (&event_pool->mutex);
+ {
+ idx = __event_getindex (event_pool, fd, idx_hint);
+
+ if (idx == -1) {
+ gf_log ("poll", GF_LOG_ERROR,
+ "index not found for fd=%d (idx_hint=%d)",
+ fd, idx_hint);
+ errno = ENOENT;
+ goto unlock;
+ }
+
+ event_pool->reg[idx] = event_pool->reg[--event_pool->used];
+ event_pool->changed = 1;
+ }
+unlock:
+ pthread_mutex_unlock (&event_pool->mutex);
+
+out:
+ return idx;
+}
+
+
+static int
+event_select_on_poll (struct event_pool *event_pool, int fd, int idx_hint,
+ int poll_in, int poll_out)
+{
+ int idx = -1;
+
+ GF_VALIDATE_OR_GOTO ("event", event_pool, out);
+
+ pthread_mutex_lock (&event_pool->mutex);
+ {
+ idx = __event_getindex (event_pool, fd, idx_hint);
+
+ if (idx == -1) {
+ gf_log ("poll", GF_LOG_ERROR,
+ "index not found for fd=%d (idx_hint=%d)",
+ fd, idx_hint);
+ errno = ENOENT;
+ goto unlock;
+ }
+
+ switch (poll_in) {
+ case 1:
+ event_pool->reg[idx].events |= POLLIN;
+ break;
+ case 0:
+ event_pool->reg[idx].events &= ~POLLIN;
+ break;
+ case -1:
+ /* do nothing */
+ break;
+ default:
+ /* TODO: log error */
+ break;
+ }
+
+ switch (poll_out) {
+ case 1:
+ event_pool->reg[idx].events |= POLLOUT;
+ break;
+ case 0:
+ event_pool->reg[idx].events &= ~POLLOUT;
+ break;
+ case -1:
+ /* do nothing */
+ break;
+ default:
+ /* TODO: log error */
+ break;
+ }
+
+ if (poll_in + poll_out > -2)
+ event_pool->changed = 1;
+ }
+unlock:
+ pthread_mutex_unlock (&event_pool->mutex);
+
+out:
+ return idx;
+}
+
+
+static int
+event_dispatch_poll_handler (struct event_pool *event_pool,
+ struct pollfd *ufds, int i)
+{
+ event_handler_t handler = NULL;
+ void *data = NULL;
+ int idx = -1;
+ int ret = 0;
+
+ handler = NULL;
+ data = NULL;
+
+ pthread_mutex_lock (&event_pool->mutex);
+ {
+ idx = __event_getindex (event_pool, ufds[i].fd, i);
+
+ if (idx == -1) {
+ gf_log ("poll", GF_LOG_ERROR,
+ "index not found for fd=%d (idx_hint=%d)",
+ ufds[i].fd, i);
+ goto unlock;
+ }
+
+ handler = event_pool->reg[idx].handler;
+ data = event_pool->reg[idx].data;
+ }
+unlock:
+ pthread_mutex_unlock (&event_pool->mutex);
+
+ if (handler)
+ ret = handler (ufds[i].fd, idx, data,
+ (ufds[i].revents & (POLLIN|POLLPRI)),
+ (ufds[i].revents & (POLLOUT)),
+ (ufds[i].revents & (POLLERR|POLLHUP|POLLNVAL)));
+
+ return ret;
+}
+
+
+static int
+event_dispatch_poll_resize (struct event_pool *event_pool,
+ struct pollfd *ufds, int size)
+{
+ int i = 0;
+
+ pthread_mutex_lock (&event_pool->mutex);
+ {
+ if (event_pool->changed == 0) {
+ goto unlock;
+ }
+
+ if (event_pool->used > event_pool->evcache_size) {
+ GF_FREE (event_pool->evcache);
+
+ event_pool->evcache = ufds = NULL;
+
+ event_pool->evcache_size = event_pool->used;
+
+ ufds = GF_CALLOC (sizeof (struct pollfd),
+ event_pool->evcache_size,
+ gf_common_mt_pollfd);
+ if (!ufds)
+ goto unlock;
+ event_pool->evcache = ufds;
+ }
+
+ for (i = 0; i < event_pool->used; i++) {
+ ufds[i].fd = event_pool->reg[i].fd;
+ ufds[i].events = event_pool->reg[i].events;
+ ufds[i].revents = 0;
+ }
+
+ size = i;
+ }
+unlock:
+ pthread_mutex_unlock (&event_pool->mutex);
+
+ return size;
+}
+
+
+static int
+event_dispatch_poll (struct event_pool *event_pool)
+{
+ struct pollfd *ufds = NULL;
+ int size = 0;
+ int i = 0;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("event", event_pool, out);
+
+ while (1) {
+ size = event_dispatch_poll_resize (event_pool, ufds, size);
+ ufds = event_pool->evcache;
+
+ ret = poll (ufds, size, 1);
+
+ if (ret == 0)
+ /* timeout */
+ continue;
+
+ if (ret == -1 && errno == EINTR)
+ /* sys call */
+ continue;
+
+ for (i = 0; i < size; i++) {
+ if (!ufds[i].revents)
+ continue;
+
+ event_dispatch_poll_handler (event_pool, ufds, i);
+ }
+ }
+
+out:
+ return -1;
+}
+
+
+struct event_ops event_ops_poll = {
+ .new = event_pool_new_poll,
+ .event_register = event_register_poll,
+ .event_select_on = event_select_on_poll,
+ .event_unregister = event_unregister_poll,
+ .event_dispatch = event_dispatch_poll
+};
diff --git a/libglusterfs/src/event.c b/libglusterfs/src/event.c
index e53002e5a..0197e7948 100644
--- a/libglusterfs/src/event.c
+++ b/libglusterfs/src/event.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include <sys/poll.h>
@@ -28,951 +19,99 @@
#include "logging.h"
#include "event.h"
#include "mem-pool.h"
-
+#include "common-utils.h"
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
#endif
-static int
-event_register_poll (struct event_pool *event_pool, int fd,
- event_handler_t handler,
- void *data, int poll_in, int poll_out);
-
-
-static int
-__flush_fd (int fd, int idx, void *data,
- int poll_in, int poll_out, int poll_err)
-{
- char buf[64];
- int ret = -1;
-
- if (!poll_in)
- return ret;
-
- do {
- ret = read (fd, buf, 64);
- if (ret == -1 && errno != EAGAIN) {
- gf_log ("poll", GF_LOG_ERROR,
- "read on %d returned error (%s)",
- fd, strerror (errno));
- }
- } while (ret == 64);
-
- return ret;
-}
-
-
-static int
-__event_getindex (struct event_pool *event_pool, int fd, int idx)
-{
- int ret = -1;
- int i = 0;
-
- if (event_pool == NULL) {
- gf_log ("event", GF_LOG_ERROR, "invalid argument");
- return -1;
- }
-
- if (idx > -1 && idx < event_pool->used) {
- if (event_pool->reg[idx].fd == fd)
- ret = idx;
- }
-
- for (i=0; ret == -1 && i<event_pool->used; i++) {
- if (event_pool->reg[i].fd == fd) {
- ret = i;
- break;
- }
- }
-
- return ret;
-}
-
-
-static struct event_pool *
-event_pool_new_poll (int count)
-{
- struct event_pool *event_pool = NULL;
- int ret = -1;
-
- event_pool = CALLOC (1, sizeof (*event_pool));
-
- if (!event_pool)
- return NULL;
-
- event_pool->count = count;
- event_pool->reg = CALLOC (event_pool->count,
- sizeof (*event_pool->reg));
-
- if (!event_pool->reg) {
- gf_log ("poll", GF_LOG_CRITICAL,
- "failed to allocate event registry");
- free (event_pool);
- return NULL;
- }
-
- pthread_mutex_init (&event_pool->mutex, NULL);
-
- ret = pipe (event_pool->breaker);
-
- if (ret == -1) {
- gf_log ("poll", GF_LOG_ERROR,
- "pipe creation failed (%s)", strerror (errno));
- free (event_pool->reg);
- free (event_pool);
- return NULL;
- }
-
- ret = fcntl (event_pool->breaker[0], F_SETFL, O_NONBLOCK);
- if (ret == -1) {
- gf_log ("poll", GF_LOG_ERROR,
- "could not set pipe to non blocking mode (%s)",
- strerror (errno));
- close (event_pool->breaker[0]);
- close (event_pool->breaker[1]);
- event_pool->breaker[0] = event_pool->breaker[1] = -1;
-
- free (event_pool->reg);
- free (event_pool);
- return NULL;
- }
-
- ret = fcntl (event_pool->breaker[1], F_SETFL, O_NONBLOCK);
- if (ret == -1) {
- gf_log ("poll", GF_LOG_ERROR,
- "could not set pipe to non blocking mode (%s)",
- strerror (errno));
-
- close (event_pool->breaker[0]);
- close (event_pool->breaker[1]);
- event_pool->breaker[0] = event_pool->breaker[1] = -1;
-
- free (event_pool->reg);
- free (event_pool);
- return NULL;
- }
-
- ret = event_register_poll (event_pool, event_pool->breaker[0],
- __flush_fd, NULL, 1, 0);
- if (ret == -1) {
- gf_log ("poll", GF_LOG_ERROR,
- "could not register pipe fd with poll event loop");
- close (event_pool->breaker[0]);
- close (event_pool->breaker[1]);
- event_pool->breaker[0] = event_pool->breaker[1] = -1;
-
- free (event_pool->reg);
- free (event_pool);
- return NULL;
- }
-
- return event_pool;
-}
-
-
-static int
-event_register_poll (struct event_pool *event_pool, int fd,
- event_handler_t handler,
- void *data, int poll_in, int poll_out)
-{
- int idx = -1;
-
- if (event_pool == NULL) {
- gf_log ("event", GF_LOG_ERROR, "invalid argument");
- return -1;
- }
-
- pthread_mutex_lock (&event_pool->mutex);
- {
- if (event_pool->count == event_pool->used)
- {
- event_pool->count += 256;
-
- event_pool->reg = realloc (event_pool->reg,
- event_pool->count *
- sizeof (*event_pool->reg));
- }
-
- idx = event_pool->used++;
-
- event_pool->reg[idx].fd = fd;
- event_pool->reg[idx].events = POLLPRI;
- event_pool->reg[idx].handler = handler;
- event_pool->reg[idx].data = data;
-
- switch (poll_in) {
- case 1:
- event_pool->reg[idx].events |= POLLIN;
- break;
- case 0:
- event_pool->reg[idx].events &= ~POLLIN;
- break;
- case -1:
- /* do nothing */
- break;
- default:
- gf_log ("poll", GF_LOG_ERROR,
- "invalid poll_in value %d", poll_in);
- break;
- }
-
- switch (poll_out) {
- case 1:
- event_pool->reg[idx].events |= POLLOUT;
- break;
- case 0:
- event_pool->reg[idx].events &= ~POLLOUT;
- break;
- case -1:
- /* do nothing */
- break;
- default:
- gf_log ("poll", GF_LOG_ERROR,
- "invalid poll_out value %d", poll_out);
- break;
- }
-
- event_pool->changed = 1;
-
- }
- pthread_mutex_unlock (&event_pool->mutex);
-
- return idx;
-}
-
-
-static int
-event_unregister_poll (struct event_pool *event_pool, int fd, int idx_hint)
-{
- int idx = -1;
-
- if (event_pool == NULL) {
- gf_log ("event", GF_LOG_ERROR, "invalid argument");
- return -1;
- }
-
- pthread_mutex_lock (&event_pool->mutex);
- {
- idx = __event_getindex (event_pool, fd, idx_hint);
-
- if (idx == -1) {
- gf_log ("poll", GF_LOG_ERROR,
- "index not found for fd=%d (idx_hint=%d)",
- fd, idx_hint);
- errno = ENOENT;
- goto unlock;
- }
-
- event_pool->reg[idx] = event_pool->reg[--event_pool->used];
- event_pool->changed = 1;
- }
-unlock:
- pthread_mutex_unlock (&event_pool->mutex);
-
- return idx;
-}
-
-
-static int
-event_select_on_poll (struct event_pool *event_pool, int fd, int idx_hint,
- int poll_in, int poll_out)
-{
- int idx = -1;
-
- if (event_pool == NULL) {
- gf_log ("event", GF_LOG_ERROR, "invalid argument");
- return -1;
- }
-
- pthread_mutex_lock (&event_pool->mutex);
- {
- idx = __event_getindex (event_pool, fd, idx_hint);
-
- if (idx == -1) {
- gf_log ("poll", GF_LOG_ERROR,
- "index not found for fd=%d (idx_hint=%d)",
- fd, idx_hint);
- errno = ENOENT;
- goto unlock;
- }
-
- switch (poll_in) {
- case 1:
- event_pool->reg[idx].events |= POLLIN;
- break;
- case 0:
- event_pool->reg[idx].events &= ~POLLIN;
- break;
- case -1:
- /* do nothing */
- break;
- default:
- /* TODO: log error */
- break;
- }
-
- switch (poll_out) {
- case 1:
- event_pool->reg[idx].events |= POLLOUT;
- break;
- case 0:
- event_pool->reg[idx].events &= ~POLLOUT;
- break;
- case -1:
- /* do nothing */
- break;
- default:
- /* TODO: log error */
- break;
- }
-
- if (poll_in + poll_out > -2)
- event_pool->changed = 1;
- }
-unlock:
- pthread_mutex_unlock (&event_pool->mutex);
-
- return idx;
-}
-
-
-static int
-event_dispatch_poll_handler (struct event_pool *event_pool,
- struct pollfd *ufds, int i)
-{
- event_handler_t handler = NULL;
- void *data = NULL;
- int idx = -1;
- int ret = 0;
-
- handler = NULL;
- data = NULL;
- idx = -1;
-
- pthread_mutex_lock (&event_pool->mutex);
- {
- idx = __event_getindex (event_pool, ufds[i].fd, i);
-
- if (idx == -1) {
- gf_log ("poll", GF_LOG_ERROR,
- "index not found for fd=%d (idx_hint=%d)",
- ufds[i].fd, i);
- goto unlock;
- }
-
- handler = event_pool->reg[idx].handler;
- data = event_pool->reg[idx].data;
- }
-unlock:
- pthread_mutex_unlock (&event_pool->mutex);
-
- if (handler)
- ret = handler (ufds[i].fd, idx, data,
- (ufds[i].revents & (POLLIN|POLLPRI)),
- (ufds[i].revents & (POLLOUT)),
- (ufds[i].revents & (POLLERR|POLLHUP|POLLNVAL)));
-
- return ret;
-}
-
-
-static int
-event_dispatch_poll_resize (struct event_pool *event_pool,
- struct pollfd *ufds, int size)
-{
- int i = 0;
-
- pthread_mutex_lock (&event_pool->mutex);
- {
- if (event_pool->changed == 0) {
- goto unlock;
- }
-
- if (event_pool->used > event_pool->evcache_size) {
- if (event_pool->evcache)
- free (event_pool->evcache);
-
- event_pool->evcache = ufds = NULL;
-
- event_pool->evcache_size = event_pool->used;
-
- ufds = CALLOC (sizeof (struct pollfd),
- event_pool->evcache_size);
- event_pool->evcache = ufds;
- }
-
- for (i = 0; i < event_pool->used; i++) {
- ufds[i].fd = event_pool->reg[i].fd;
- ufds[i].events = event_pool->reg[i].events;
- ufds[i].revents = 0;
- }
-
- size = i;
- }
-unlock:
- pthread_mutex_unlock (&event_pool->mutex);
-
- return size;
-}
-
-
-static int
-event_dispatch_poll (struct event_pool *event_pool)
-{
- struct pollfd *ufds = NULL;
- int size = 0;
- int i = 0;
- int ret = -1;
-
-
- if (event_pool == NULL) {
- gf_log ("event", GF_LOG_ERROR, "invalid argument");
- return -1;
- }
-
- while (1) {
- size = event_dispatch_poll_resize (event_pool, ufds, size);
- ufds = event_pool->evcache;
-
- ret = poll (ufds, size, 1);
-
- if (ret == 0)
- /* timeout */
- continue;
-
- if (ret == -1 && errno == EINTR)
- /* sys call */
- continue;
-
- for (i = 0; i < size; i++) {
- if (!ufds[i].revents)
- continue;
-
- event_dispatch_poll_handler (event_pool, ufds, i);
- }
- }
-
- return -1;
-}
-
-
-static struct event_ops event_ops_poll = {
- .new = event_pool_new_poll,
- .event_register = event_register_poll,
- .event_select_on = event_select_on_poll,
- .event_unregister = event_unregister_poll,
- .event_dispatch = event_dispatch_poll
-};
-
-
-
-#ifdef HAVE_SYS_EPOLL_H
-#include <sys/epoll.h>
-
-
-static struct event_pool *
-event_pool_new_epoll (int count)
-{
- struct event_pool *event_pool = NULL;
- int epfd = -1;
-
- event_pool = CALLOC (1, sizeof (*event_pool));
-
- if (!event_pool)
- return NULL;
-
- event_pool->count = count;
- event_pool->reg = CALLOC (event_pool->count,
- sizeof (*event_pool->reg));
-
- if (!event_pool->reg) {
- gf_log ("epoll", GF_LOG_CRITICAL,
- "event registry allocation failed");
- free (event_pool);
- return NULL;
- }
-
- epfd = epoll_create (count);
-
- if (epfd == -1) {
- gf_log ("epoll", GF_LOG_ERROR, "epoll fd creation failed (%s)",
- strerror (errno));
- free (event_pool->reg);
- free (event_pool);
- return NULL;
- }
-
- event_pool->fd = epfd;
-
- event_pool->count = count;
-
- pthread_mutex_init (&event_pool->mutex, NULL);
- pthread_cond_init (&event_pool->cond, NULL);
-
- return event_pool;
-}
-
-
-int
-event_register_epoll (struct event_pool *event_pool, int fd,
- event_handler_t handler,
- void *data, int poll_in, int poll_out)
-{
- int idx = -1;
- int ret = -1;
- struct epoll_event epoll_event = {0, };
- struct event_data *ev_data = (void *)&epoll_event.data;
-
-
- if (event_pool == NULL) {
- gf_log ("event", GF_LOG_ERROR, "invalid argument");
- return -1;
- }
-
- pthread_mutex_lock (&event_pool->mutex);
- {
- if (event_pool->count == event_pool->used) {
- event_pool->count *= 2;
-
- event_pool->reg = realloc (event_pool->reg,
- event_pool->count *
- sizeof (*event_pool->reg));
-
- if (!event_pool->reg) {
- gf_log ("epoll", GF_LOG_ERROR,
- "event registry re-allocation failed");
- goto unlock;
- }
- }
-
- idx = event_pool->used;
- event_pool->used++;
-
- event_pool->reg[idx].fd = fd;
- event_pool->reg[idx].events = EPOLLPRI;
- event_pool->reg[idx].handler = handler;
- event_pool->reg[idx].data = data;
-
- switch (poll_in) {
- case 1:
- event_pool->reg[idx].events |= EPOLLIN;
- break;
- case 0:
- event_pool->reg[idx].events &= ~EPOLLIN;
- break;
- case -1:
- /* do nothing */
- break;
- default:
- gf_log ("epoll", GF_LOG_ERROR,
- "invalid poll_in value %d", poll_in);
- break;
- }
-
- switch (poll_out) {
- case 1:
- event_pool->reg[idx].events |= EPOLLOUT;
- break;
- case 0:
- event_pool->reg[idx].events &= ~EPOLLOUT;
- break;
- case -1:
- /* do nothing */
- break;
- default:
- gf_log ("epoll", GF_LOG_ERROR,
- "invalid poll_out value %d", poll_out);
- break;
- }
-
- event_pool->changed = 1;
-
- epoll_event.events = event_pool->reg[idx].events;
- ev_data->fd = fd;
- ev_data->idx = idx;
-
- ret = epoll_ctl (event_pool->fd, EPOLL_CTL_ADD, fd,
- &epoll_event);
-
- if (ret == -1) {
- gf_log ("epoll", GF_LOG_ERROR,
- "failed to add fd(=%d) to epoll fd(=%d) (%s)",
- fd, event_pool->fd, strerror (errno));
- goto unlock;
- }
-
- pthread_cond_broadcast (&event_pool->cond);
- }
-unlock:
- pthread_mutex_unlock (&event_pool->mutex);
-
- return ret;
-}
-
-
-static int
-event_unregister_epoll (struct event_pool *event_pool, int fd, int idx_hint)
-{
- int idx = -1;
- int ret = -1;
-
- struct epoll_event epoll_event = {0, };
- struct event_data *ev_data = (void *)&epoll_event.data;
- int lastidx = -1;
-
- if (event_pool == NULL) {
- gf_log ("event", GF_LOG_ERROR, "invalid argument");
- return -1;
- }
-
- pthread_mutex_lock (&event_pool->mutex);
- {
- idx = __event_getindex (event_pool, fd, idx_hint);
-
- if (idx == -1) {
- gf_log ("epoll", GF_LOG_ERROR,
- "index not found for fd=%d (idx_hint=%d)",
- fd, idx_hint);
- errno = ENOENT;
- goto unlock;
- }
-
- ret = epoll_ctl (event_pool->fd, EPOLL_CTL_DEL, fd, NULL);
-
- /* if ret is -1, this array member should never be accessed */
- /* if it is 0, the array member might be used by idx_cache
- * in which case the member should not be accessed till
- * it is reallocated
- */
-
- event_pool->reg[idx].fd = -1;
-
- if (ret == -1) {
- gf_log ("epoll", GF_LOG_ERROR,
- "fail to del fd(=%d) from epoll fd(=%d) (%s)",
- fd, event_pool->fd, strerror (errno));
- goto unlock;
- }
-
- lastidx = event_pool->used - 1;
- if (lastidx == idx) {
- event_pool->used--;
- goto unlock;
- }
-
- epoll_event.events = event_pool->reg[lastidx].events;
- ev_data->fd = event_pool->reg[lastidx].fd;
- ev_data->idx = idx;
-
- ret = epoll_ctl (event_pool->fd, EPOLL_CTL_MOD, ev_data->fd,
- &epoll_event);
- if (ret == -1) {
- gf_log ("epoll", GF_LOG_ERROR,
- "fail to modify fd(=%d) index %d to %d (%s)",
- ev_data->fd, event_pool->used, idx,
- strerror (errno));
- goto unlock;
- }
-
- /* just replace the unregistered idx by last one */
- event_pool->reg[idx] = event_pool->reg[lastidx];
- event_pool->used--;
- }
-unlock:
- pthread_mutex_unlock (&event_pool->mutex);
-
- return ret;
-}
-
-
-static int
-event_select_on_epoll (struct event_pool *event_pool, int fd, int idx_hint,
- int poll_in, int poll_out)
-{
- int idx = -1;
- int ret = -1;
-
- struct epoll_event epoll_event = {0, };
- struct event_data *ev_data = (void *)&epoll_event.data;
-
-
- if (event_pool == NULL) {
- gf_log ("event", GF_LOG_ERROR, "invalid argument");
- return -1;
- }
-
- pthread_mutex_lock (&event_pool->mutex);
- {
- idx = __event_getindex (event_pool, fd, idx_hint);
-
- if (idx == -1) {
- gf_log ("epoll", GF_LOG_ERROR,
- "index not found for fd=%d (idx_hint=%d)",
- fd, idx_hint);
- errno = ENOENT;
- goto unlock;
- }
-
- switch (poll_in) {
- case 1:
- event_pool->reg[idx].events |= EPOLLIN;
- break;
- case 0:
- event_pool->reg[idx].events &= ~EPOLLIN;
- break;
- case -1:
- /* do nothing */
- break;
- default:
- gf_log ("epoll", GF_LOG_ERROR,
- "invalid poll_in value %d", poll_in);
- break;
- }
-
- switch (poll_out) {
- case 1:
- event_pool->reg[idx].events |= EPOLLOUT;
- break;
- case 0:
- event_pool->reg[idx].events &= ~EPOLLOUT;
- break;
- case -1:
- /* do nothing */
- break;
- default:
- gf_log ("epoll", GF_LOG_ERROR,
- "invalid poll_out value %d", poll_out);
- break;
- }
-
- epoll_event.events = event_pool->reg[idx].events;
- ev_data->fd = fd;
- ev_data->idx = idx;
-
- ret = epoll_ctl (event_pool->fd, EPOLL_CTL_MOD, fd,
- &epoll_event);
- if (ret == -1) {
- gf_log ("epoll", GF_LOG_ERROR,
- "failed to modify fd(=%d) events to %d",
- fd, epoll_event.events);
- }
- }
-unlock:
- pthread_mutex_unlock (&event_pool->mutex);
-
- return ret;
-}
-
-
-static int
-event_dispatch_epoll_handler (struct event_pool *event_pool,
- struct epoll_event *events, int i)
-{
- struct event_data *event_data = NULL;
- event_handler_t handler = NULL;
- void *data = NULL;
- int idx = -1;
- int ret = -1;
-
-
- event_data = (void *)&events[i].data;
- handler = NULL;
- data = NULL;
- idx = -1;
-
- pthread_mutex_lock (&event_pool->mutex);
- {
- idx = __event_getindex (event_pool, event_data->fd,
- event_data->idx);
-
- if (idx == -1) {
- gf_log ("epoll", GF_LOG_ERROR,
- "index not found for fd(=%d) (idx_hint=%d)",
- event_data->fd, event_data->idx);
- goto unlock;
- }
-
- handler = event_pool->reg[idx].handler;
- data = event_pool->reg[idx].data;
- }
-unlock:
- pthread_mutex_unlock (&event_pool->mutex);
-
- if (handler)
- ret = handler (event_data->fd, event_data->idx, data,
- (events[i].events & (EPOLLIN|EPOLLPRI)),
- (events[i].events & (EPOLLOUT)),
- (events[i].events & (EPOLLERR|EPOLLHUP)));
- return ret;
-}
-
-
-static int
-event_dispatch_epoll (struct event_pool *event_pool)
-{
- struct epoll_event *events = NULL;
- int size = 0;
- int i = 0;
- int ret = -1;
-
-
- if (event_pool == NULL) {
- gf_log ("event", GF_LOG_ERROR, "invalid argument");
- return -1;
- }
-
- while (1) {
- pthread_mutex_lock (&event_pool->mutex);
- {
- while (event_pool->used == 0)
- pthread_cond_wait (&event_pool->cond,
- &event_pool->mutex);
-
- if (event_pool->used > event_pool->evcache_size) {
- if (event_pool->evcache)
- free (event_pool->evcache);
-
- event_pool->evcache = events = NULL;
-
- event_pool->evcache_size =
- event_pool->used + 256;
-
- events = CALLOC (event_pool->evcache_size,
- sizeof (struct epoll_event));
-
- event_pool->evcache = events;
- }
- }
- pthread_mutex_unlock (&event_pool->mutex);
-
- ret = epoll_wait (event_pool->fd, event_pool->evcache,
- event_pool->evcache_size, -1);
-
- if (ret == 0)
- /* timeout */
- continue;
-
- if (ret == -1 && errno == EINTR)
- /* sys call */
- continue;
-
- size = ret;
-
- for (i = 0; i < size; i++) {
- if (!events[i].events)
- continue;
-
- ret = event_dispatch_epoll_handler (event_pool,
- events, i);
- }
- }
-
- return -1;
-}
-
-
-static struct event_ops event_ops_epoll = {
- .new = event_pool_new_epoll,
- .event_register = event_register_epoll,
- .event_select_on = event_select_on_epoll,
- .event_unregister = event_unregister_epoll,
- .event_dispatch = event_dispatch_epoll
-};
-
-#endif
struct event_pool *
event_pool_new (int count)
{
- struct event_pool *event_pool = NULL;
+ struct event_pool *event_pool = NULL;
+ extern struct event_ops event_ops_poll;
#ifdef HAVE_SYS_EPOLL_H
- event_pool = event_ops_epoll.new (count);
+ extern struct event_ops event_ops_epoll;
+
+ event_pool = event_ops_epoll.new (count);
- if (event_pool) {
- event_pool->ops = &event_ops_epoll;
- } else {
- gf_log ("event", GF_LOG_WARNING,
- "failing back to poll based event handling");
- }
+ if (event_pool) {
+ event_pool->ops = &event_ops_epoll;
+ } else {
+ gf_log ("event", GF_LOG_WARNING,
+ "falling back to poll based event handling");
+ }
#endif
- if (!event_pool) {
- event_pool = event_ops_poll.new (count);
+ if (!event_pool) {
+ event_pool = event_ops_poll.new (count);
- if (event_pool)
- event_pool->ops = &event_ops_poll;
- }
+ if (event_pool)
+ event_pool->ops = &event_ops_poll;
+ }
- return event_pool;
+ return event_pool;
}
int
event_register (struct event_pool *event_pool, int fd,
- event_handler_t handler,
- void *data, int poll_in, int poll_out)
+ event_handler_t handler,
+ void *data, int poll_in, int poll_out)
{
- int ret = -1;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("event", event_pool, out);
- if (event_pool == NULL) {
- gf_log ("event", GF_LOG_ERROR, "invalid argument");
- return -1;
- }
-
- ret = event_pool->ops->event_register (event_pool, fd, handler, data,
- poll_in, poll_out);
- return ret;
+ ret = event_pool->ops->event_register (event_pool, fd, handler, data,
+ poll_in, poll_out);
+out:
+ return ret;
}
int
event_unregister (struct event_pool *event_pool, int fd, int idx)
{
- int ret = -1;
+ int ret = -1;
- if (event_pool == NULL) {
- gf_log ("event", GF_LOG_ERROR, "invalid argument");
- return -1;
- }
-
- ret = event_pool->ops->event_unregister (event_pool, fd, idx);
+ GF_VALIDATE_OR_GOTO ("event", event_pool, out);
- return ret;
+ ret = event_pool->ops->event_unregister (event_pool, fd, idx);
+
+out:
+ return ret;
}
int
event_select_on (struct event_pool *event_pool, int fd, int idx_hint,
- int poll_in, int poll_out)
+ int poll_in, int poll_out)
{
- int ret = -1;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("event", event_pool, out);
- if (event_pool == NULL) {
- gf_log ("event", GF_LOG_ERROR, "invalid argument");
- return -1;
- }
-
- ret = event_pool->ops->event_select_on (event_pool, fd, idx_hint,
- poll_in, poll_out);
- return ret;
+ ret = event_pool->ops->event_select_on (event_pool, fd, idx_hint,
+ poll_in, poll_out);
+out:
+ return ret;
}
int
event_dispatch (struct event_pool *event_pool)
{
- int ret = -1;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("event", event_pool, out);
- if (event_pool == NULL) {
- gf_log ("event", GF_LOG_ERROR, "invalid argument");
- return -1;
- }
-
- ret = event_pool->ops->event_dispatch (event_pool);
+ ret = event_pool->ops->event_dispatch (event_pool);
- return ret;
+out:
+ return ret;
}
diff --git a/libglusterfs/src/event.h b/libglusterfs/src/event.h
index 19565e5ed..7ed182492 100644
--- a/libglusterfs/src/event.h
+++ b/libglusterfs/src/event.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _EVENT_H_
@@ -30,52 +21,51 @@
struct event_pool;
struct event_ops;
struct event_data {
- int fd;
- int idx;
-} __attribute__ ((__packed__));
+ int fd;
+ int idx;
+} __attribute__ ((__packed__, __may_alias__));
typedef int (*event_handler_t) (int fd, int idx, void *data,
int poll_in, int poll_out, int poll_err);
struct event_pool {
- struct event_ops *ops;
+ struct event_ops *ops;
- int fd;
- int breaker[2];
+ int fd;
+ int breaker[2];
- int count;
- struct {
- int fd;
- int events;
- void *data;
- event_handler_t handler;
- } *reg;
+ int count;
+ struct {
+ int fd;
+ int events;
+ void *data;
+ event_handler_t handler;
+ } *reg;
- int used;
- int idx_cache;
- int changed;
+ int used;
+ int changed;
- pthread_mutex_t mutex;
- pthread_cond_t cond;
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
- void *evcache;
- int evcache_size;
+ void *evcache;
+ int evcache_size;
};
struct event_ops {
- struct event_pool * (*new) (int count);
+ struct event_pool * (*new) (int count);
- int (*event_register) (struct event_pool *event_pool, int fd,
- event_handler_t handler,
- void *data, int poll_in, int poll_out);
+ int (*event_register) (struct event_pool *event_pool, int fd,
+ event_handler_t handler,
+ void *data, int poll_in, int poll_out);
- int (*event_select_on) (struct event_pool *event_pool, int fd, int idx,
- int poll_in, int poll_out);
+ int (*event_select_on) (struct event_pool *event_pool, int fd, int idx,
+ int poll_in, int poll_out);
- int (*event_unregister) (struct event_pool *event_pool, int fd, int idx);
+ int (*event_unregister) (struct event_pool *event_pool, int fd, int idx);
- int (*event_dispatch) (struct event_pool *event_pool);
+ int (*event_dispatch) (struct event_pool *event_pool);
};
struct event_pool * event_pool_new (int count);
diff --git a/libglusterfs/src/fd-lk.c b/libglusterfs/src/fd-lk.c
new file mode 100644
index 000000000..caf2bb38e
--- /dev/null
+++ b/libglusterfs/src/fd-lk.c
@@ -0,0 +1,490 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "fd-lk.h"
+#include "common-utils.h"
+
+
+int32_t
+_fd_lk_delete_lock (fd_lk_ctx_node_t *lock)
+{
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("fd-lk", lock, out);
+
+ list_del_init (&lock->next);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int32_t
+_fd_lk_destroy_lock (fd_lk_ctx_node_t *lock)
+{
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("fd-lk", lock, out);
+
+ GF_FREE (lock);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+_fd_lk_destroy_lock_list (fd_lk_ctx_t *lk_ctx)
+{
+ int ret = -1;
+ fd_lk_ctx_node_t *lk = NULL;
+ fd_lk_ctx_node_t *tmp = NULL;
+
+ GF_VALIDATE_OR_GOTO ("fd-lk", lk_ctx, out);
+
+ list_for_each_entry_safe (lk, tmp, &lk_ctx->lk_list, next) {
+ _fd_lk_delete_lock (lk);
+ _fd_lk_destroy_lock (lk);
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+fd_lk_ctx_unref (fd_lk_ctx_t *lk_ctx)
+{
+ int ref = -1;
+
+ GF_VALIDATE_OR_GOTO ("fd-lk", lk_ctx, err);
+
+ LOCK (&lk_ctx->lock);
+ {
+ ref = --lk_ctx->ref;
+ if (ref < 0)
+ GF_ASSERT (!ref);
+ if (ref == 0)
+ _fd_lk_destroy_lock_list (lk_ctx);
+ }
+ UNLOCK (&lk_ctx->lock);
+
+ if (ref == 0) {
+ LOCK_DESTROY (&lk_ctx->lock);
+ GF_FREE (lk_ctx);
+ }
+
+ return 0;
+err:
+ return -1;
+}
+
+fd_lk_ctx_t *
+_fd_lk_ctx_ref (fd_lk_ctx_t *lk_ctx)
+{
+ if (!lk_ctx) {
+ gf_log_callingfn ("fd", GF_LOG_WARNING,
+ "invalid argument");
+ return NULL;
+ }
+
+ ++lk_ctx->ref;
+
+ return lk_ctx;
+}
+
+fd_lk_ctx_t *
+fd_lk_ctx_ref (fd_lk_ctx_t *lk_ctx)
+{
+ fd_lk_ctx_t *new_lk_ctx = NULL;
+
+ if (!lk_ctx) {
+ gf_log_callingfn ("fd", GF_LOG_WARNING,
+ "invalid argument");
+ return NULL;
+ }
+
+ LOCK (&lk_ctx->lock);
+ {
+ new_lk_ctx = _fd_lk_ctx_ref (lk_ctx);
+ }
+ UNLOCK (&lk_ctx->lock);
+
+ return new_lk_ctx;
+}
+
+fd_lk_ctx_t *
+fd_lk_ctx_try_ref (fd_lk_ctx_t *lk_ctx)
+{
+ int ret = -1;
+ fd_lk_ctx_t *new_lk_ctx = NULL;
+
+ if (!lk_ctx) {
+ goto out;
+ }
+
+ ret = TRY_LOCK (&lk_ctx->lock);
+ if (ret)
+ goto out;
+
+ new_lk_ctx = _fd_lk_ctx_ref (lk_ctx);
+ UNLOCK (&lk_ctx->lock);
+
+out:
+ return new_lk_ctx;
+}
+
+fd_lk_ctx_t *
+fd_lk_ctx_create ()
+{
+ fd_lk_ctx_t *fd_lk_ctx = NULL;
+
+ fd_lk_ctx = GF_CALLOC (1, sizeof (fd_lk_ctx_t),
+ gf_common_mt_fd_lk_ctx_t);
+ if (!fd_lk_ctx)
+ goto out;
+
+ INIT_LIST_HEAD (&fd_lk_ctx->lk_list);
+
+ LOCK_INIT (&fd_lk_ctx->lock);
+
+ fd_lk_ctx = fd_lk_ctx_ref (fd_lk_ctx);
+out:
+ return fd_lk_ctx;
+}
+
+int
+_fd_lk_insert_lock (fd_lk_ctx_t *lk_ctx,
+ fd_lk_ctx_node_t *lock)
+{
+ list_add_tail (&lock->next, &lk_ctx->lk_list);
+ return 0;
+}
+
+static off_t
+_fd_lk_get_lock_len (off_t start, off_t end)
+{
+ if (end == LLONG_MAX)
+ return 0;
+ else
+ return (end - start + 1);
+}
+
+fd_lk_ctx_node_t *
+fd_lk_ctx_node_new (int32_t cmd, struct gf_flock *flock)
+{
+ fd_lk_ctx_node_t *new_lock = NULL;
+
+ /* TODO: get from mem-pool */
+ new_lock = GF_CALLOC (1, sizeof (fd_lk_ctx_node_t),
+ gf_common_mt_fd_lk_ctx_node_t);
+ if (!new_lock)
+ goto out;
+
+ new_lock->cmd = cmd;
+
+ if (flock) {
+ new_lock->fl_type = flock->l_type;
+ new_lock->fl_start = flock->l_start;
+
+ if (flock->l_len == 0)
+ new_lock->fl_end = LLONG_MAX;
+ else
+ new_lock->fl_end = flock->l_start + flock->l_len - 1;
+
+ memcpy (&new_lock->user_flock, flock,
+ sizeof (struct gf_flock));
+ }
+
+ INIT_LIST_HEAD (&new_lock->next);
+out:
+ return new_lock;
+}
+
+int32_t
+_fd_lk_delete_unlck_locks (fd_lk_ctx_t *lk_ctx)
+{
+ int32_t ret = -1;
+ fd_lk_ctx_node_t *tmp = NULL;
+ fd_lk_ctx_node_t *lk = NULL;
+
+ GF_VALIDATE_OR_GOTO ("fd-lk", lk_ctx, out);
+
+ list_for_each_entry_safe (lk, tmp, &lk_ctx->lk_list, next) {
+ if (lk->fl_type == F_UNLCK) {
+ _fd_lk_delete_lock (lk);
+ _fd_lk_destroy_lock (lk);
+ }
+ }
+out:
+ return ret;
+}
+
+int
+fd_lk_overlap (fd_lk_ctx_node_t *l1,
+ fd_lk_ctx_node_t *l2)
+{
+ if (l1->fl_end >= l2->fl_start &&
+ l2->fl_end >= l1->fl_start)
+ return 1;
+
+ return 0;
+}
+
+fd_lk_ctx_node_t *
+_fd_lk_add_locks (fd_lk_ctx_node_t *l1,
+ fd_lk_ctx_node_t *l2)
+{
+ fd_lk_ctx_node_t *sum = NULL;
+
+ sum = fd_lk_ctx_node_new (0, NULL);
+ if (!sum)
+ goto out;
+
+ sum->fl_start = min (l1->fl_start, l2->fl_start);
+ sum->fl_end = max (l1->fl_end, l2->fl_end);
+
+ sum->user_flock.l_start = sum->fl_start;
+ sum->user_flock.l_len = _fd_lk_get_lock_len (sum->fl_start,
+ sum->fl_end);
+out:
+ return sum;
+}
+
+/* Subtract two locks */
+struct _values {
+ fd_lk_ctx_node_t *locks[3];
+};
+
+int32_t
+_fd_lk_sub_locks (struct _values *v,
+ fd_lk_ctx_node_t *big,
+ fd_lk_ctx_node_t *small)
+{
+ int32_t ret = -1;
+
+ if ((big->fl_start == small->fl_start) &&
+ (big->fl_end == small->fl_end)) {
+ /* both edges coincide with big */
+ v->locks[0] = fd_lk_ctx_node_new (small->cmd, NULL);
+ if (!v->locks[0])
+ goto out;
+
+ memcpy (v->locks[0], big, sizeof (fd_lk_ctx_node_t));
+
+ v->locks[0]->fl_type = small->fl_type;
+ v->locks[0]->user_flock.l_type = small->fl_type;
+ } else if ((small->fl_start > big->fl_start) &&
+ (small->fl_end < big->fl_end)) {
+ /* small lock is completely inside big lock,
+ break it down into 3 different locks. */
+ v->locks[0] = fd_lk_ctx_node_new (big->cmd, NULL);
+ if (!v->locks[0])
+ goto out;
+
+ v->locks[1] = fd_lk_ctx_node_new (small->cmd, NULL);
+ if (!v->locks[1])
+ goto out;
+
+ v->locks[2] = fd_lk_ctx_node_new (big->cmd, NULL);
+ if (!v->locks[2])
+ goto out;
+
+ memcpy (v->locks[0], big, sizeof (fd_lk_ctx_node_t));
+ v->locks[0]->fl_end = small->fl_start - 1;
+ v->locks[0]->user_flock.l_len =
+ _fd_lk_get_lock_len (v->locks[0]->fl_start,
+ v->locks[0]->fl_end);
+
+ memcpy (v->locks[1], small, sizeof (fd_lk_ctx_node_t));
+
+ memcpy (v->locks[2], big, sizeof (fd_lk_ctx_node_t));
+ v->locks[2]->fl_start = small->fl_end + 1;
+ v->locks[2]->user_flock.l_len =
+ _fd_lk_get_lock_len (v->locks[2]->fl_start,
+ v->locks[2]->fl_end);
+ } else if (small->fl_start == big->fl_start) {
+ /* One of the ends co-incide, break the
+ locks into two seperate parts */
+ v->locks[0] = fd_lk_ctx_node_new (small->cmd, NULL);
+ if (!v->locks[0])
+ goto out;
+
+ v->locks[1] = fd_lk_ctx_node_new (big->cmd, NULL);
+ if (!v->locks[1])
+ goto out;
+
+ memcpy (v->locks[0], small, sizeof (fd_lk_ctx_node_t));
+
+ memcpy (v->locks[1], big, sizeof (fd_lk_ctx_node_t));
+ v->locks[1]->fl_start = small->fl_end + 1;
+ v->locks[1]->user_flock.l_start = small->fl_end + 1;
+ } else if (small->fl_end == big->fl_end) {
+ /* One of the ends co-incide, break the
+ locks into two seperate parts */
+ v->locks[0] = fd_lk_ctx_node_new (small->cmd, NULL);
+ if (!v->locks[0])
+ goto out;
+
+ v->locks[1] = fd_lk_ctx_node_new (big->cmd, NULL);
+ if (!v->locks[1])
+ goto out;
+
+ memcpy (v->locks[0], big, sizeof (fd_lk_ctx_node_t));
+ v->locks[0]->fl_end = small->fl_start - 1;
+ v->locks[0]->user_flock.l_len =
+ _fd_lk_get_lock_len (v->locks[0]->fl_start,
+ v->locks[0]->fl_end);
+
+ memcpy (v->locks[1], small, sizeof (fd_lk_ctx_node_t));
+ } else {
+ /* We should never come to this case */
+ GF_ASSERT (!"Invalid case");
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+static void
+_fd_lk_insert_and_merge (fd_lk_ctx_t *lk_ctx,
+ fd_lk_ctx_node_t *lock)
+{
+ int32_t ret = -1;
+ int32_t i = 0;
+ fd_lk_ctx_node_t *entry = NULL;
+ fd_lk_ctx_node_t *t = NULL;
+ fd_lk_ctx_node_t *sum = NULL;
+ struct _values v = {.locks = {0, 0, 0 }};
+
+ list_for_each_entry_safe (entry, t, &lk_ctx->lk_list, next) {
+ if (!fd_lk_overlap (entry, lock))
+ continue;
+
+ if (entry->fl_type == lock->fl_type) {
+ sum = _fd_lk_add_locks (entry, lock);
+ if (!sum)
+ return;
+ sum->fl_type = entry->fl_type;
+ sum->user_flock.l_type = entry->fl_type;
+ _fd_lk_delete_lock (entry);
+ _fd_lk_destroy_lock (entry);
+ _fd_lk_destroy_lock (lock);
+ _fd_lk_insert_and_merge (lk_ctx, sum);
+ return;
+ } else {
+ sum = _fd_lk_add_locks (entry, lock);
+ sum->fl_type = lock->fl_type;
+ sum->user_flock.l_type = lock->fl_type;
+ ret = _fd_lk_sub_locks (&v, sum, lock);
+ if (ret)
+ return;
+ _fd_lk_delete_lock (entry);
+ _fd_lk_destroy_lock (entry);
+
+ _fd_lk_delete_lock (lock);
+ _fd_lk_destroy_lock (lock);
+
+ _fd_lk_destroy_lock (sum);
+
+ for (i = 0; i < 3; i++) {
+ if (!v.locks[i])
+ continue;
+
+ INIT_LIST_HEAD (&v.locks[i]->next);
+ _fd_lk_insert_and_merge (lk_ctx, v.locks[i]);
+ }
+ _fd_lk_delete_unlck_locks (lk_ctx);
+ return;
+ }
+ }
+
+ /* no conflicts, so just insert */
+ if (lock->fl_type != F_UNLCK) {
+ _fd_lk_insert_lock (lk_ctx, lock);
+ } else {
+ _fd_lk_destroy_lock (lock);
+ }
+}
+
+static void
+print_lock_list (fd_lk_ctx_t *lk_ctx)
+{
+ fd_lk_ctx_node_t *lk = NULL;
+
+ gf_log ("fd-lk", GF_LOG_DEBUG, "lock list:");
+
+ list_for_each_entry (lk, &lk_ctx->lk_list, next)
+ gf_log ("fd-lk", GF_LOG_DEBUG, "owner = %s, "
+ "cmd = %s fl_type = %s, fs_start = %"PRId64", "
+ "fs_end = %"PRId64", user_flock: l_type = %s, "
+ "l_start = %"PRId64", l_len = %"PRId64", ",
+ lkowner_utoa (&lk->user_flock.l_owner),
+ get_lk_cmd (lk->cmd), get_lk_type (lk->fl_type),
+ lk->fl_start, lk->fl_end,
+ get_lk_type (lk->user_flock.l_type),
+ lk->user_flock.l_start,
+ lk->user_flock.l_len);
+}
+
+int
+fd_lk_insert_and_merge (fd_t *fd, int32_t cmd,
+ struct gf_flock *flock)
+{
+ int32_t ret = -1;
+ fd_lk_ctx_t *lk_ctx = NULL;
+ fd_lk_ctx_node_t *lk = NULL;
+
+ GF_VALIDATE_OR_GOTO ("fd-lk", fd, out);
+ GF_VALIDATE_OR_GOTO ("fd-lk", flock, out);
+
+ lk_ctx = fd_lk_ctx_ref (fd->lk_ctx);
+ lk = fd_lk_ctx_node_new (cmd, flock);
+
+ gf_log ("fd-lk", GF_LOG_DEBUG,
+ "new lock requrest: owner = %s, fl_type = %s, "
+ "fs_start = %"PRId64", fs_end = %"PRId64", "
+ "user_flock: l_type = %s, l_start = %"PRId64", "
+ "l_len = %"PRId64, lkowner_utoa (&flock->l_owner),
+ get_lk_type (lk->fl_type), lk->fl_start,
+ lk->fl_end, get_lk_type (lk->user_flock.l_type),
+ lk->user_flock.l_start,
+ lk->user_flock.l_len);
+
+ LOCK (&lk_ctx->lock);
+ {
+ _fd_lk_insert_and_merge (lk_ctx, lk);
+ print_lock_list (lk_ctx);
+ }
+ UNLOCK (&lk_ctx->lock);
+
+ fd_lk_ctx_unref (lk_ctx);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+gf_boolean_t
+fd_lk_ctx_empty (fd_lk_ctx_t *lk_ctx)
+{
+ gf_boolean_t verdict = _gf_true;
+
+ if (!lk_ctx)
+ return _gf_true;
+
+ LOCK (&lk_ctx->lock);
+ {
+ verdict = list_empty (&lk_ctx->lk_list);
+ }
+ UNLOCK (&lk_ctx->lock);
+
+ return verdict;
+}
diff --git a/libglusterfs/src/fd-lk.h b/libglusterfs/src/fd-lk.h
new file mode 100644
index 000000000..1d2ff794c
--- /dev/null
+++ b/libglusterfs/src/fd-lk.h
@@ -0,0 +1,70 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _FD_LK_H
+#define _FD_LK_H
+
+#include "fd.h"
+#include "locking.h"
+#include "list.h"
+#include "logging.h"
+#include "mem-pool.h"
+#include "mem-types.h"
+#include "glusterfs.h"
+#include "common-utils.h"
+
+#define get_lk_type(type) \
+ type == F_UNLCK ? "F_UNLCK" : (type == F_RDLCK ? "F_RDLCK" : "F_WRLCK")
+
+#define get_lk_cmd(cmd) \
+ cmd == F_SETLKW ? "F_SETLKW" : (cmd == F_SETLK ? "F_SETLK" : "F_GETLK")
+
+struct _fd;
+
+struct fd_lk_ctx {
+ struct list_head lk_list;
+ int ref;
+ gf_lock_t lock;
+};
+typedef struct fd_lk_ctx fd_lk_ctx_t;
+
+struct fd_lk_ctx_node {
+ int32_t cmd;
+ struct gf_flock user_flock;
+ off_t fl_start;
+ off_t fl_end;
+ short fl_type;
+ struct list_head next;
+};
+typedef struct fd_lk_ctx_node fd_lk_ctx_node_t;
+
+fd_lk_ctx_t *
+_fd_lk_ctx_ref (fd_lk_ctx_t *lk_ctx);
+
+fd_lk_ctx_t *
+fd_lk_ctx_ref (fd_lk_ctx_t *lk_ctx);
+
+fd_lk_ctx_t *
+fd_lk_ctx_try_ref (fd_lk_ctx_t *lk_ctx);
+
+fd_lk_ctx_t *
+fd_lk_ctx_create ();
+
+int
+fd_lk_insert_and_merge (struct _fd *lk_ctx, int32_t cmd,
+ struct gf_flock *flock);
+
+int
+fd_lk_ctx_unref (fd_lk_ctx_t *lk_ctx);
+
+gf_boolean_t
+fd_lk_ctx_empty (fd_lk_ctx_t *lk_ctx);
+
+#endif /* _FD_LK_H */
diff --git a/libglusterfs/src/fd.c b/libglusterfs/src/fd.c
index 5b2d32bff..36cc4d056 100644
--- a/libglusterfs/src/fd.c
+++ b/libglusterfs/src/fd.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include "fd.h"
@@ -30,33 +21,12 @@
#endif
-static uint32_t
+static int
gf_fd_fdtable_expand (fdtable_t *fdtable, uint32_t nr);
-static fd_t *
-_fd_ref (fd_t *fd);
-
-/*
- Allocate in memory chunks of power of 2 starting from 1024B
- Assumes fdtable->lock is held
-*/
-static inline uint32_t
-gf_roundup_power_of_two (uint32_t nr)
-{
- uint32_t result = 1;
-
- if (nr < 0) {
- gf_log ("server-protocol/fd",
- GF_LOG_ERROR,
- "Negative number passed");
- return -1;
- }
-
- while (result <= nr)
- result *= 2;
- return result;
-}
+fd_t *
+__fd_ref (fd_t *fd);
static int
gf_fd_chain_fd_entries (fdentry_t *entries, uint32_t startidx,
@@ -64,8 +34,10 @@ gf_fd_chain_fd_entries (fdentry_t *entries, uint32_t startidx,
{
uint32_t i = 0;
- if (!entries)
+ if (!entries) {
+ gf_log_callingfn ("fd", GF_LOG_WARNING, "!entries");
return -1;
+ }
/* Chain only till the second to last entry because we want to
* ensure that the last entry has GF_FDTABLE_END.
@@ -80,33 +52,38 @@ gf_fd_chain_fd_entries (fdentry_t *entries, uint32_t startidx,
}
-static uint32_t
+static int
gf_fd_fdtable_expand (fdtable_t *fdtable, uint32_t nr)
{
- fdentry_t *oldfds = NULL;
- uint32_t oldmax_fds = -1;
-
- if (fdtable == NULL || nr < 0)
- {
- gf_log ("fd", GF_LOG_ERROR, "invalid argument");
- return EINVAL;
- }
-
- nr /= (1024 / sizeof (fdentry_t));
- nr = gf_roundup_power_of_two (nr + 1);
- nr *= (1024 / sizeof (fdentry_t));
-
- oldfds = fdtable->fdentries;
- oldmax_fds = fdtable->max_fds;
-
- fdtable->fdentries = CALLOC (nr, sizeof (fdentry_t));
- ERR_ABORT (fdtable->fdentries);
- fdtable->max_fds = nr;
-
- if (oldfds) {
- uint32_t cpy = oldmax_fds * sizeof (fdentry_t);
- memcpy (fdtable->fdentries, oldfds, cpy);
- }
+ fdentry_t *oldfds = NULL;
+ uint32_t oldmax_fds = -1;
+ int ret = -1;
+
+ if (fdtable == NULL || nr < 0) {
+ gf_log_callingfn ("fd", GF_LOG_ERROR, "invalid argument");
+ ret = EINVAL;
+ goto out;
+ }
+
+ nr /= (1024 / sizeof (fdentry_t));
+ nr = gf_roundup_next_power_of_two (nr + 1);
+ nr *= (1024 / sizeof (fdentry_t));
+
+ oldfds = fdtable->fdentries;
+ oldmax_fds = fdtable->max_fds;
+
+ fdtable->fdentries = GF_CALLOC (nr, sizeof (fdentry_t),
+ gf_common_mt_fdentry_t);
+ if (!fdtable->fdentries) {
+ ret = ENOMEM;
+ goto out;
+ }
+ fdtable->max_fds = nr;
+
+ if (oldfds) {
+ uint32_t cpy = oldmax_fds * sizeof (fdentry_t);
+ memcpy (fdtable->fdentries, oldfds, cpy);
+ }
gf_fd_chain_fd_entries (fdtable->fdentries, oldmax_fds,
fdtable->max_fds);
@@ -116,41 +93,47 @@ gf_fd_fdtable_expand (fdtable_t *fdtable, uint32_t nr)
* using the expanded table.
*/
fdtable->first_free = oldmax_fds;
- FREE (oldfds);
- return 0;
+ GF_FREE (oldfds);
+ ret = 0;
+out:
+ return ret;
}
+
fdtable_t *
-gf_fd_fdtable_alloc (void)
+gf_fd_fdtable_alloc (void)
{
- fdtable_t *fdtable = NULL;
+ fdtable_t *fdtable = NULL;
- fdtable = CALLOC (1, sizeof (*fdtable));
- if (!fdtable)
- return NULL;
+ fdtable = GF_CALLOC (1, sizeof (*fdtable), gf_common_mt_fdtable_t);
+ if (!fdtable)
+ return NULL;
- pthread_mutex_init (&fdtable->lock, NULL);
+ pthread_mutex_init (&fdtable->lock, NULL);
- pthread_mutex_lock (&fdtable->lock);
- {
- gf_fd_fdtable_expand (fdtable, 0);
- }
- pthread_mutex_unlock (&fdtable->lock);
+ pthread_mutex_lock (&fdtable->lock);
+ {
+ gf_fd_fdtable_expand (fdtable, 0);
+ }
+ pthread_mutex_unlock (&fdtable->lock);
- return fdtable;
+ return fdtable;
}
-fdentry_t *
+
+static fdentry_t *
__gf_fd_fdtable_get_all_fds (fdtable_t *fdtable, uint32_t *count)
{
fdentry_t *fdentries = NULL;
if (count == NULL) {
+ gf_log_callingfn ("fd", GF_LOG_WARNING, "!count");
goto out;
}
fdentries = fdtable->fdentries;
- fdtable->fdentries = calloc (fdtable->max_fds, sizeof (fdentry_t));
+ fdtable->fdentries = GF_CALLOC (fdtable->max_fds, sizeof (fdentry_t),
+ gf_common_mt_fdentry_t);
gf_fd_chain_fd_entries (fdtable->fdentries, 0, fdtable->max_fds);
*count = fdtable->max_fds;
@@ -158,10 +141,12 @@ out:
return fdentries;
}
+
fdentry_t *
gf_fd_fdtable_get_all_fds (fdtable_t *fdtable, uint32_t *count)
{
fdentry_t *entries = NULL;
+
if (fdtable) {
pthread_mutex_lock (&fdtable->lock);
{
@@ -173,26 +158,76 @@ gf_fd_fdtable_get_all_fds (fdtable_t *fdtable, uint32_t *count)
return entries;
}
-void
+
+static fdentry_t *
+__gf_fd_fdtable_copy_all_fds (fdtable_t *fdtable, uint32_t *count)
+{
+ fdentry_t *fdentries = NULL;
+ int i = 0;
+
+ if (count == NULL) {
+ gf_log_callingfn ("fd", GF_LOG_WARNING, "!count");
+ goto out;
+ }
+
+ fdentries = GF_CALLOC (fdtable->max_fds, sizeof (fdentry_t),
+ gf_common_mt_fdentry_t);
+ if (fdentries == NULL) {
+ goto out;
+ }
+
+ *count = fdtable->max_fds;
+
+ for (i = 0; i < fdtable->max_fds; i++) {
+ if (fdtable->fdentries[i].fd != NULL) {
+ fdentries[i].fd = fd_ref (fdtable->fdentries[i].fd);
+ }
+ }
+
+out:
+ return fdentries;
+}
+
+
+fdentry_t *
+gf_fd_fdtable_copy_all_fds (fdtable_t *fdtable, uint32_t *count)
+{
+ fdentry_t *entries = NULL;
+
+ if (fdtable) {
+ pthread_mutex_lock (&fdtable->lock);
+ {
+ entries = __gf_fd_fdtable_copy_all_fds (fdtable, count);
+ }
+ pthread_mutex_unlock (&fdtable->lock);
+ }
+
+ return entries;
+}
+
+
+void
gf_fd_fdtable_destroy (fdtable_t *fdtable)
{
struct list_head list = {0, };
fd_t *fd = NULL;
fdentry_t *fdentries = NULL;
uint32_t fd_count = 0;
- int32_t i = 0;
+ int32_t i = 0;
INIT_LIST_HEAD (&list);
- if (!fdtable)
+ if (!fdtable) {
+ gf_log_callingfn ("fd", GF_LOG_WARNING, "!fdtable");
return;
+ }
- pthread_mutex_lock (&fdtable->lock);
- {
+ pthread_mutex_lock (&fdtable->lock);
+ {
fdentries = __gf_fd_fdtable_get_all_fds (fdtable, &fd_count);
- FREE (fdtable->fdentries);
- }
- pthread_mutex_unlock (&fdtable->lock);
+ GF_FREE (fdtable->fdentries);
+ }
+ pthread_mutex_unlock (&fdtable->lock);
if (fdentries != NULL) {
for (i = 0; i < fd_count; i++) {
@@ -202,53 +237,53 @@ gf_fd_fdtable_destroy (fdtable_t *fdtable)
}
}
- FREE (fdentries);
- pthread_mutex_destroy (&fdtable->lock);
- FREE (fdtable);
- }
+ GF_FREE (fdentries);
+ pthread_mutex_destroy (&fdtable->lock);
+ GF_FREE (fdtable);
+ }
}
-int32_t
+
+int
gf_fd_unused_get (fdtable_t *fdtable, fd_t *fdptr)
{
- int32_t fd = -1;
+ int32_t fd = -1;
fdentry_t *fde = NULL;
- int error;
+ int error;
int alloc_attempts = 0;
-
- if (fdtable == NULL || fdptr == NULL)
- {
- gf_log ("fd", GF_LOG_ERROR, "invalid argument");
- return EINVAL;
- }
-
- pthread_mutex_lock (&fdtable->lock);
- {
-fd_alloc_try_again:
+
+ if (fdtable == NULL || fdptr == NULL) {
+ gf_log_callingfn ("fd", GF_LOG_ERROR, "invalid argument");
+ return EINVAL;
+ }
+
+ pthread_mutex_lock (&fdtable->lock);
+ {
+ fd_alloc_try_again:
if (fdtable->first_free != GF_FDTABLE_END) {
fde = &fdtable->fdentries[fdtable->first_free];
fd = fdtable->first_free;
fdtable->first_free = fde->next_free;
fde->next_free = GF_FDENTRY_ALLOCATED;
fde->fd = fdptr;
- } else {
+ } else {
/* If this is true, there is something
* seriously wrong with our data structures.
*/
if (alloc_attempts >= 2) {
- gf_log ("server-protocol.c", GF_LOG_ERROR,
- "Multiple attempts to expand fd table"
+ gf_log ("fd", GF_LOG_ERROR,
+ "multiple attempts to expand fd table"
" have failed.");
goto out;
}
error = gf_fd_fdtable_expand (fdtable,
fdtable->max_fds + 1);
- if (error) {
- gf_log ("server-protocol.c",
- GF_LOG_ERROR,
- "Cannot expand fdtable:%s", strerror (error));
+ if (error) {
+ gf_log ("fd", GF_LOG_ERROR,
+ "Cannot expand fdtable: %s",
+ strerror (error));
goto out;
- }
+ }
++alloc_attempts;
/* At this point, the table stands expanded
* with the first_free referring to the first
@@ -257,41 +292,43 @@ fd_alloc_try_again:
* above logic should just work.
*/
goto fd_alloc_try_again;
- }
- }
+ }
+ }
out:
- pthread_mutex_unlock (&fdtable->lock);
+ pthread_mutex_unlock (&fdtable->lock);
- return fd;
+ return fd;
}
-inline void
+inline void
gf_fd_put (fdtable_t *fdtable, int32_t fd)
{
- fd_t *fdptr = NULL;
+ fd_t *fdptr = NULL;
fdentry_t *fde = NULL;
- if (fdtable == NULL || fd < 0)
- {
- gf_log ("fd", GF_LOG_ERROR, "invalid argument");
- return;
- }
-
- if (!(fd < fdtable->max_fds))
- {
- gf_log ("fd", GF_LOG_ERROR, "invalid argument");
- return;
- }
-
- pthread_mutex_lock (&fdtable->lock);
- {
+ if (fd == -2)
+ /* anonymous fd */
+ return;
+
+ if (fdtable == NULL || fd < 0) {
+ gf_log_callingfn ("fd", GF_LOG_ERROR, "invalid argument");
+ return;
+ }
+
+ if (!(fd < fdtable->max_fds)) {
+ gf_log_callingfn ("fd", GF_LOG_ERROR, "invalid argument");
+ return;
+ }
+
+ pthread_mutex_lock (&fdtable->lock);
+ {
fde = &fdtable->fdentries[fd];
/* If the entry is not allocated, put operation must return
* without doing anything.
* This has the potential of masking out any bugs in a user of
* fd that ends up calling gf_fd_put twice for the same fd or
- * for an unallocated fd, but thats a price we have to pay for
+ * for an unallocated fd, but it is a price we have to pay for
* ensuring sanity of our fd-table.
*/
if (fde->next_free != GF_FDENTRY_ALLOCATED)
@@ -300,150 +337,213 @@ gf_fd_put (fdtable_t *fdtable, int32_t fd)
fde->fd = NULL;
fde->next_free = fdtable->first_free;
fdtable->first_free = fd;
- }
+ }
+unlock_out:
+ pthread_mutex_unlock (&fdtable->lock);
+
+ if (fdptr) {
+ fd_unref (fdptr);
+ }
+}
+
+
+inline void
+gf_fdptr_put (fdtable_t *fdtable, fd_t *fd)
+{
+ fdentry_t *fde = NULL;
+ int32_t i = 0;
+
+ if ((fdtable == NULL) || (fd == NULL)) {
+ gf_log_callingfn ("fd", GF_LOG_ERROR, "invalid argument");
+ return;
+ }
+
+ pthread_mutex_lock (&fdtable->lock);
+ {
+ for (i = 0; i < fdtable->max_fds; i++) {
+ if (fdtable->fdentries[i].fd == fd) {
+ fde = &fdtable->fdentries[i];
+ break;
+ }
+ }
+
+ if (fde == NULL) {
+ gf_log_callingfn ("fd", GF_LOG_WARNING,
+ "fd (%p) is not present in fdtable", fd);
+ goto unlock_out;
+ }
+
+ /* If the entry is not allocated, put operation must return
+ * without doing anything.
+ * This has the potential of masking out any bugs in a user of
+ * fd that ends up calling gf_fd_put twice for the same fd or
+ * for an unallocated fd, but it is a price we have to pay for
+ * ensuring sanity of our fd-table.
+ */
+ if (fde->next_free != GF_FDENTRY_ALLOCATED)
+ goto unlock_out;
+ fde->fd = NULL;
+ fde->next_free = fdtable->first_free;
+ fdtable->first_free = i;
+ }
unlock_out:
- pthread_mutex_unlock (&fdtable->lock);
+ pthread_mutex_unlock (&fdtable->lock);
- if (fdptr) {
- fd_unref (fdptr);
- }
+ if ((fd != NULL) && (fde != NULL)) {
+ fd_unref (fd);
+ }
}
fd_t *
gf_fd_fdptr_get (fdtable_t *fdtable, int64_t fd)
{
- fd_t *fdptr = NULL;
-
- if (fdtable == NULL || fd < 0)
- {
- gf_log ("fd", GF_LOG_ERROR, "invalid argument");
- errno = EINVAL;
- return NULL;
- }
-
- if (!(fd < fdtable->max_fds))
- {
- gf_log ("fd", GF_LOG_ERROR, "invalid argument");
- errno = EINVAL;
- return NULL;
- }
-
- pthread_mutex_lock (&fdtable->lock);
- {
- fdptr = fdtable->fdentries[fd].fd;
- if (fdptr) {
- fd_ref (fdptr);
- }
- }
- pthread_mutex_unlock (&fdtable->lock);
-
- return fdptr;
+ fd_t *fdptr = NULL;
+
+ if (fdtable == NULL || fd < 0) {
+ gf_log_callingfn ("fd", GF_LOG_ERROR, "invalid argument");
+ errno = EINVAL;
+ return NULL;
+ }
+
+ if (!(fd < fdtable->max_fds)) {
+ gf_log_callingfn ("fd", GF_LOG_ERROR, "invalid argument");
+ errno = EINVAL;
+ return NULL;
+ }
+
+ pthread_mutex_lock (&fdtable->lock);
+ {
+ fdptr = fdtable->fdentries[fd].fd;
+ if (fdptr) {
+ fd_ref (fdptr);
+ }
+ }
+ pthread_mutex_unlock (&fdtable->lock);
+
+ return fdptr;
}
+
fd_t *
-_fd_ref (fd_t *fd)
+__fd_ref (fd_t *fd)
{
- ++fd->refcount;
-
- return fd;
+ ++fd->refcount;
+
+ return fd;
}
+
fd_t *
fd_ref (fd_t *fd)
{
- fd_t *refed_fd = NULL;
+ fd_t *refed_fd = NULL;
- if (!fd) {
- gf_log ("fd", GF_LOG_ERROR, "@fd=%p", fd);
- return NULL;
- }
+ if (!fd) {
+ gf_log_callingfn ("fd", GF_LOG_ERROR, "null fd");
+ return NULL;
+ }
+
+ LOCK (&fd->inode->lock);
+ refed_fd = __fd_ref (fd);
+ UNLOCK (&fd->inode->lock);
- LOCK (&fd->inode->lock);
- refed_fd = _fd_ref (fd);
- UNLOCK (&fd->inode->lock);
-
- return refed_fd;
+ return refed_fd;
}
+
fd_t *
-_fd_unref (fd_t *fd)
+__fd_unref (fd_t *fd)
{
- assert (fd->refcount);
+ GF_ASSERT (fd->refcount);
+
+ --fd->refcount;
- --fd->refcount;
+ if (fd->refcount == 0) {
+ list_del_init (&fd->inode_list);
+ }
- if (fd->refcount == 0){
- list_del_init (&fd->inode_list);
- }
-
- return fd;
+ return fd;
}
+
static void
fd_destroy (fd_t *fd)
{
xlator_t *xl = NULL;
- int i = 0;
+ int i = 0;
+ xlator_t *old_THIS = NULL;
if (fd == NULL){
- gf_log ("xlator", GF_LOG_ERROR, "invalid arugument");
+ gf_log_callingfn ("xlator", GF_LOG_ERROR, "invalid argument");
goto out;
}
-
+
if (fd->inode == NULL){
- gf_log ("xlator", GF_LOG_ERROR, "fd->inode is NULL");
+ gf_log_callingfn ("xlator", GF_LOG_ERROR, "fd->inode is NULL");
goto out;
}
- if (!fd->_ctx)
- goto out;
-
- if (S_ISDIR (fd->inode->st_mode)) {
- for (i = 0; i < fd->inode->table->xl->ctx->xl_count; i++) {
- if (fd->_ctx[i].key) {
- xl = (xlator_t *)(long)fd->_ctx[i].key;
- if (xl->cbks->releasedir)
- xl->cbks->releasedir (xl, fd);
- }
- }
+ if (!fd->_ctx)
+ goto out;
+
+ if (IA_ISDIR (fd->inode->ia_type)) {
+ for (i = 0; i < fd->xl_count; i++) {
+ if (fd->_ctx[i].key) {
+ xl = fd->_ctx[i].xl_key;
+ old_THIS = THIS;
+ THIS = xl;
+ if (xl->cbks->releasedir)
+ xl->cbks->releasedir (xl, fd);
+ THIS = old_THIS;
+ }
+ }
} else {
- for (i = 0; i < fd->inode->table->xl->ctx->xl_count; i++) {
- if (fd->_ctx[i].key) {
- xl = (xlator_t *)(long)fd->_ctx[i].key;
- if (xl->cbks->release)
- xl->cbks->release (xl, fd);
- }
- }
- }
-
+ for (i = 0; i < fd->xl_count; i++) {
+ if (fd->_ctx[i].key) {
+ xl = fd->_ctx[i].xl_key;
+ old_THIS = THIS;
+ THIS = xl;
+ if (xl->cbks->release)
+ xl->cbks->release (xl, fd);
+ THIS = old_THIS;
+ }
+ }
+ }
+
LOCK_DESTROY (&fd->lock);
- FREE (fd->_ctx);
+ GF_FREE (fd->_ctx);
+ LOCK (&fd->inode->lock);
+ {
+ fd->inode->fd_count--;
+ }
+ UNLOCK (&fd->inode->lock);
inode_unref (fd->inode);
fd->inode = (inode_t *)0xaaaaaaaa;
- FREE (fd);
-
+ fd_lk_ctx_unref (fd->lk_ctx);
+ mem_put (fd);
out:
return;
}
+
void
fd_unref (fd_t *fd)
{
int32_t refcount = 0;
if (!fd) {
- gf_log ("fd.c", GF_LOG_ERROR, "fd is NULL");
+ gf_log_callingfn ("fd", GF_LOG_ERROR, "fd is NULL");
return;
}
-
+
LOCK (&fd->inode->lock);
{
- _fd_unref (fd);
+ __fd_unref (fd);
refcount = fd->refcount;
}
UNLOCK (&fd->inode->lock);
-
+
if (refcount == 0) {
fd_destroy (fd);
}
@@ -451,129 +551,336 @@ fd_unref (fd_t *fd)
return ;
}
+
fd_t *
-fd_bind (fd_t *fd)
+__fd_bind (fd_t *fd)
{
- inode_t *inode = fd->inode;
+ list_del_init (&fd->inode_list);
+ list_add (&fd->inode_list, &fd->inode->fd_list);
+ fd->inode->fd_count++;
- if (!fd) {
- gf_log ("fd.c", GF_LOG_ERROR, "fd is NULL");
+ return fd;
+}
+
+
+fd_t *
+fd_bind (fd_t *fd)
+{
+ if (!fd || !fd->inode) {
+ gf_log_callingfn ("fd", GF_LOG_ERROR, "!fd || !fd->inode");
return NULL;
}
- LOCK (&inode->lock);
+ LOCK (&fd->inode->lock);
{
- list_add (&fd->inode_list, &inode->fd_list);
+ fd = __fd_bind (fd);
}
- UNLOCK (&inode->lock);
-
+ UNLOCK (&fd->inode->lock);
+
return fd;
}
-fd_t *
-fd_create (inode_t *inode, pid_t pid)
+
+static fd_t *
+__fd_create (inode_t *inode, uint64_t pid)
{
fd_t *fd = NULL;
-
+
if (inode == NULL) {
- gf_log ("fd", GF_LOG_ERROR, "invalid argument");
+ gf_log_callingfn ("fd", GF_LOG_ERROR, "invalid argument");
return NULL;
}
-
- fd = CALLOC (1, sizeof (fd_t));
- ERR_ABORT (fd);
-
- fd->_ctx = CALLOC (1, (sizeof (struct _fd_ctx) *
- inode->table->xl->ctx->xl_count));
+
+ fd = mem_get0 (inode->table->fd_mem_pool);
+ if (!fd)
+ goto out;
+
+ fd->xl_count = inode->table->xl->graph->xl_count + 1;
+
+ fd->_ctx = GF_CALLOC (1, (sizeof (struct _fd_ctx) * fd->xl_count),
+ gf_common_mt_fd_ctx);
+ if (!fd->_ctx)
+ goto free_fd;
+
+ fd->lk_ctx = fd_lk_ctx_create ();
+ if (!fd->lk_ctx)
+ goto free_fd_ctx;
+
fd->inode = inode_ref (inode);
fd->pid = pid;
INIT_LIST_HEAD (&fd->inode_list);
-
+
LOCK_INIT (&fd->lock);
+out:
+ return fd;
+
+free_fd_ctx:
+ GF_FREE (fd->_ctx);
+free_fd:
+ mem_put (fd);
+
+ return NULL;
+}
+
+
+fd_t *
+fd_create (inode_t *inode, pid_t pid)
+{
+ fd_t *fd = NULL;
+
+ fd = __fd_create (inode, (uint64_t)pid);
+ if (!fd)
+ goto out;
+
+ fd = fd_ref (fd);
+
+out:
+ return fd;
+}
+
+fd_t *
+fd_create_uint64 (inode_t *inode, uint64_t pid)
+{
+ fd_t *fd = NULL;
+
+ fd = __fd_create (inode, pid);
+ if (!fd)
+ goto out;
+
+ fd = fd_ref (fd);
+
+out:
+ return fd;
+}
+
+
+static fd_t *
+__fd_lookup (inode_t *inode, uint64_t pid)
+{
+ fd_t *iter_fd = NULL;
+ fd_t *fd = NULL;
+
+ if (list_empty (&inode->fd_list))
+ return NULL;
+
+
+ list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
+ if (iter_fd->anonymous)
+ /* If someone was interested in getting an
+ anonymous fd (or was OK getting an anonymous fd),
+ they can as well call fd_anonymous() directly */
+ continue;
+
+ if (!pid || iter_fd->pid == pid) {
+ fd = __fd_ref (iter_fd);
+ break;
+ }
+ }
+
+ return fd;
+}
+
+
+fd_t *
+fd_lookup (inode_t *inode, pid_t pid)
+{
+ fd_t *fd = NULL;
+
+ if (!inode) {
+ gf_log_callingfn ("fd", GF_LOG_WARNING, "!inode");
+ return NULL;
+ }
LOCK (&inode->lock);
- fd = _fd_ref (fd);
+ {
+ fd = __fd_lookup (inode, (uint64_t)pid);
+ }
UNLOCK (&inode->lock);
return fd;
}
fd_t *
-fd_lookup (inode_t *inode, pid_t pid)
+fd_lookup_uint64 (inode_t *inode, uint64_t pid)
{
fd_t *fd = NULL;
- fd_t *iter_fd = NULL;
+
+ if (!inode) {
+ gf_log_callingfn ("fd", GF_LOG_WARNING, "!inode");
+ return NULL;
+ }
LOCK (&inode->lock);
{
- if (list_empty (&inode->fd_list)) {
- fd = NULL;
- } else {
- list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
- if (pid) {
- if (iter_fd->pid == pid) {
- fd = _fd_ref (iter_fd);
- break;
- }
- } else {
- fd = _fd_ref (iter_fd);
- break;
- }
- }
+ fd = __fd_lookup (inode, pid);
+ }
+ UNLOCK (&inode->lock);
+
+ return fd;
+}
+
+static fd_t *
+__fd_lookup_anonymous (inode_t *inode)
+{
+ fd_t *iter_fd = NULL;
+ fd_t *fd = NULL;
+
+ if (list_empty (&inode->fd_list))
+ return NULL;
+
+ list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
+ if (iter_fd->anonymous) {
+ fd = __fd_ref (iter_fd);
+ break;
}
}
+
+ return fd;
+}
+
+static fd_t *
+__fd_anonymous (inode_t *inode)
+{
+ fd_t *fd = NULL;
+
+ fd = __fd_lookup_anonymous (inode);
+
+ /* if (fd); then we already have increased the refcount in
+ __fd_lookup_anonymous(), so no need of one more fd_ref().
+ if (!fd); then both create and bind wont bump up the ref
+ count, so we have to call fd_ref() after bind. */
+ if (!fd) {
+ fd = __fd_create (inode, 0);
+
+ if (!fd)
+ return NULL;
+
+ fd->anonymous = _gf_true;
+
+ __fd_bind (fd);
+
+ __fd_ref (fd);
+ }
+
+ return fd;
+}
+
+
+fd_t *
+fd_anonymous (inode_t *inode)
+{
+ fd_t *fd = NULL;
+
+ LOCK (&inode->lock);
+ {
+ fd = __fd_anonymous (inode);
+ }
+ UNLOCK (&inode->lock);
+
+ return fd;
+}
+
+fd_t*
+fd_lookup_anonymous (inode_t *inode)
+{
+ fd_t *fd = NULL;
+
+ if (!inode) {
+ gf_log_callingfn ("fd", GF_LOG_WARNING, "!inode");
+ return NULL;
+ }
+
+ LOCK (&inode->lock);
+ {
+ fd = __fd_lookup_anonymous (inode);
+ }
UNLOCK (&inode->lock);
-
return fd;
}
+gf_boolean_t
+fd_is_anonymous (fd_t *fd)
+{
+ return (fd && fd->anonymous);
+}
+
+
uint8_t
fd_list_empty (inode_t *inode)
{
- uint8_t empty = 0;
+ uint8_t empty = 0;
LOCK (&inode->lock);
{
empty = list_empty (&inode->fd_list);
}
UNLOCK (&inode->lock);
-
+
return empty;
}
+
int
__fd_ctx_set (fd_t *fd, xlator_t *xlator, uint64_t value)
{
- int index = 0;
- int ret = 0;
- int set_idx = -1;
+ int index = 0, new_xl_count = 0;
+ int ret = 0;
+ int set_idx = -1;
+ void *begin = NULL;
+ size_t diff = 0;
+ struct _fd_ctx *tmp = NULL;
if (!fd || !xlator)
return -1;
-
- for (index = 0; index < xlator->ctx->xl_count; index++) {
+
+ for (index = 0; index < fd->xl_count; index++) {
if (!fd->_ctx[index].key) {
if (set_idx == -1)
set_idx = index;
/* dont break, to check if key already exists
further on */
}
- if (fd->_ctx[index].key == (uint64_t)(long) xlator) {
+ if (fd->_ctx[index].xl_key == xlator) {
set_idx = index;
break;
}
}
-
+
if (set_idx == -1) {
- ret = -1;
- goto out;
+ set_idx = fd->xl_count;
+
+ new_xl_count = fd->xl_count + xlator->graph->xl_count;
+
+ tmp = GF_REALLOC (fd->_ctx,
+ (sizeof (struct _fd_ctx)
+ * new_xl_count));
+ if (tmp == NULL) {
+ gf_log_callingfn (THIS->name, GF_LOG_WARNING,
+ "realloc of fd->_ctx for fd "
+ "(ptr: %p) failed, cannot set the key"
+ , fd);
+ ret = -1;
+ goto out;
+ }
+
+ fd->_ctx = tmp;
+
+ begin = fd->_ctx;
+ begin += (fd->xl_count * sizeof (struct _fd_ctx));
+
+ diff = (new_xl_count - fd->xl_count )
+ * sizeof (struct _fd_ctx);
+
+ memset (begin, 0, diff);
+
+ fd->xl_count = new_xl_count;
}
-
- fd->_ctx[set_idx].key = (uint64_t)(long) xlator;
- fd->_ctx[set_idx].value = value;
+
+ fd->_ctx[set_idx].xl_key = xlator;
+ fd->_ctx[set_idx].value1 = value;
out:
- return ret;
+ return ret;
}
@@ -582,9 +889,11 @@ fd_ctx_set (fd_t *fd, xlator_t *xlator, uint64_t value)
{
int ret = 0;
- if (!fd || !xlator)
+ if (!fd || !xlator) {
+ gf_log_callingfn ("", GF_LOG_WARNING, "%p %p", fd, xlator);
return -1;
-
+ }
+
LOCK (&fd->lock);
{
ret = __fd_ctx_set (fd, xlator, value);
@@ -595,40 +904,40 @@ fd_ctx_set (fd_t *fd, xlator_t *xlator, uint64_t value)
}
-int
+int
__fd_ctx_get (fd_t *fd, xlator_t *xlator, uint64_t *value)
{
- int index = 0;
+ int index = 0;
int ret = 0;
- if (!fd || !xlator)
- return -1;
-
- for (index = 0; index < xlator->ctx->xl_count; index++) {
- if (fd->_ctx[index].key == (uint64_t)(long)xlator)
+ if (!fd || !xlator)
+ return -1;
+
+ for (index = 0; index < fd->xl_count; index++) {
+ if (fd->_ctx[index].xl_key == xlator)
break;
}
-
- if (index == xlator->ctx->xl_count) {
+
+ if (index == fd->xl_count) {
ret = -1;
goto out;
}
- if (value)
- *value = fd->_ctx[index].value;
-
+ if (value)
+ *value = fd->_ctx[index].value1;
+
out:
- return ret;
+ return ret;
}
-int
+int
fd_ctx_get (fd_t *fd, xlator_t *xlator, uint64_t *value)
{
int ret = 0;
- if (!fd || !xlator)
- return -1;
+ if (!fd || !xlator)
+ return -1;
LOCK (&fd->lock);
{
@@ -640,44 +949,44 @@ fd_ctx_get (fd_t *fd, xlator_t *xlator, uint64_t *value)
}
-int
+int
__fd_ctx_del (fd_t *fd, xlator_t *xlator, uint64_t *value)
{
- int index = 0;
+ int index = 0;
int ret = 0;
- if (!fd || !xlator)
- return -1;
-
- for (index = 0; index < xlator->ctx->xl_count; index++) {
- if (fd->_ctx[index].key == (uint64_t)(long)xlator)
+ if (!fd || !xlator)
+ return -1;
+
+ for (index = 0; index < fd->xl_count; index++) {
+ if (fd->_ctx[index].xl_key == xlator)
break;
}
-
- if (index == xlator->ctx->xl_count) {
+
+ if (index == fd->xl_count) {
ret = -1;
goto out;
}
-
- if (value)
- *value = fd->_ctx[index].value;
-
+
+ if (value)
+ *value = fd->_ctx[index].value1;
+
fd->_ctx[index].key = 0;
- fd->_ctx[index].value = 0;
+ fd->_ctx[index].value1 = 0;
out:
- return ret;
+ return ret;
}
-int
+int
fd_ctx_del (fd_t *fd, xlator_t *xlator, uint64_t *value)
{
int ret = 0;
- if (!fd || !xlator)
- return -1;
-
+ if (!fd || !xlator)
+ return -1;
+
LOCK (&fd->lock);
{
ret = __fd_ctx_del (fd, xlator, value);
@@ -695,18 +1004,18 @@ fd_dump (fd_t *fd, char *prefix)
if (!fd)
return;
-
+
memset(key, 0, sizeof(key));
- gf_proc_dump_build_key(key, prefix, "pid");
- gf_proc_dump_write(key, "%d", fd->pid);
- gf_proc_dump_build_key(key, prefix, "refcount");
- gf_proc_dump_write(key, "%d", fd->refcount);
- gf_proc_dump_build_key(key, prefix, "flags");
- gf_proc_dump_write(key, "%d", fd->flags);
+ gf_proc_dump_write("pid", "%llu", fd->pid);
+ gf_proc_dump_write("refcount", "%d", fd->refcount);
+ gf_proc_dump_write("flags", "%d", fd->flags);
+
if (fd->inode) {
- gf_proc_dump_build_key(key, prefix, "inode");
- gf_proc_dump_write(key, "%ld", fd->inode->ino);
+ gf_proc_dump_build_key (key, "inode", NULL);
+ gf_proc_dump_add_section(key);
+ inode_dump (fd->inode, key);
}
+
}
@@ -719,10 +1028,11 @@ fdentry_dump (fdentry_t *fdentry, char *prefix)
if (GF_FDENTRY_ALLOCATED != fdentry->next_free)
return;
- if (fdentry->fd)
+ if (fdentry->fd)
fd_dump(fdentry->fd, prefix);
}
+
void
fdtable_dump (fdtable_t *fdtable, char *prefix)
{
@@ -732,13 +1042,11 @@ fdtable_dump (fdtable_t *fdtable, char *prefix)
if (!fdtable)
return;
-
- ret = pthread_mutex_trylock (&fdtable->lock);
- if (ret) {
- gf_log ("fd", GF_LOG_WARNING, "Unable to acquire lock");
- return;
- }
+ ret = pthread_mutex_trylock (&fdtable->lock);
+
+ if (ret)
+ goto out;
memset(key, 0, sizeof(key));
gf_proc_dump_build_key(key, prefix, "refcount");
@@ -749,8 +1057,8 @@ fdtable_dump (fdtable_t *fdtable, char *prefix)
gf_proc_dump_write(key, "%d", fdtable->first_free);
for ( i = 0 ; i < fdtable->max_fds; i++) {
- if (GF_FDENTRY_ALLOCATED ==
- fdtable->fdentries[i].next_free) {
+ if (GF_FDENTRY_ALLOCATED ==
+ fdtable->fdentries[i].next_free) {
gf_proc_dump_build_key(key, prefix, "fdentry[%d]", i);
gf_proc_dump_add_section(key);
fdentry_dump(&fdtable->fdentries[i], key);
@@ -758,5 +1066,150 @@ fdtable_dump (fdtable_t *fdtable, char *prefix)
}
pthread_mutex_unlock(&fdtable->lock);
+
+out:
+ if (ret != 0)
+ gf_proc_dump_write ("Unable to dump the fdtable",
+ "(Lock acquistion failed) %p", fdtable);
+ return;
}
+
+void
+fd_ctx_dump (fd_t *fd, char *prefix)
+{
+ struct _fd_ctx *fd_ctx = NULL;
+ xlator_t *xl = NULL;
+ int i = 0;
+
+
+ if ((fd == NULL) || (fd->_ctx == NULL)) {
+ goto out;
+ }
+
+ LOCK (&fd->lock);
+ {
+ if (fd->_ctx != NULL) {
+ fd_ctx = GF_CALLOC (fd->xl_count, sizeof (*fd_ctx),
+ gf_common_mt_fd_ctx);
+ if (fd_ctx == NULL) {
+ goto unlock;
+ }
+
+ for (i = 0; i < fd->xl_count; i++) {
+ fd_ctx[i] = fd->_ctx[i];
+ }
+ }
+ }
+unlock:
+ UNLOCK (&fd->lock);
+
+ if (fd_ctx == NULL) {
+ goto out;
+ }
+
+ for (i = 0; i < fd->xl_count; i++) {
+ if (fd_ctx[i].xl_key) {
+ xl = (xlator_t *)(long)fd_ctx[i].xl_key;
+ if (xl->dumpops && xl->dumpops->fdctx)
+ xl->dumpops->fdctx (xl, fd);
+ }
+ }
+
+out:
+ GF_FREE (fd_ctx);
+
+ return;
+}
+
+void
+fdentry_dump_to_dict (fdentry_t *fdentry, char *prefix, dict_t *dict,
+ int *openfds)
+{
+ char key[GF_DUMP_MAX_BUF_LEN] = {0,};
+ int ret = -1;
+
+ if (!fdentry)
+ return;
+ if (!dict)
+ return;
+
+ if (GF_FDENTRY_ALLOCATED != fdentry->next_free)
+ return;
+
+ if (fdentry->fd) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.pid", prefix);
+ ret = dict_set_int32 (dict, key, fdentry->fd->pid);
+ if (ret)
+ return;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.refcount", prefix);
+ ret = dict_set_int32 (dict, key, fdentry->fd->refcount);
+ if (ret)
+ return;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.flags", prefix);
+ ret = dict_set_int32 (dict, key, fdentry->fd->flags);
+
+ (*openfds)++;
+ }
+ return;
+}
+
+void
+fdtable_dump_to_dict (fdtable_t *fdtable, char *prefix, dict_t *dict)
+{
+ char key[GF_DUMP_MAX_BUF_LEN] = {0,};
+ int i = 0;
+ int openfds = 0;
+ int ret = -1;
+
+ if (!fdtable)
+ return;
+ if (!dict)
+ return;
+
+ ret = pthread_mutex_trylock (&fdtable->lock);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.fdtable.refcount", prefix);
+ ret = dict_set_int32 (dict, key, fdtable->refcount);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.fdtable.maxfds", prefix);
+ ret = dict_set_uint32 (dict, key, fdtable->max_fds);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.fdtable.firstfree", prefix);
+ ret = dict_set_int32 (dict, key, fdtable->first_free);
+ if (ret)
+ goto out;
+
+ for (i = 0; i < fdtable->max_fds; i++) {
+ if (GF_FDENTRY_ALLOCATED ==
+ fdtable->fdentries[i].next_free) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.fdtable.fdentry%d",
+ prefix, i);
+ fdentry_dump_to_dict (&fdtable->fdentries[i], key,
+ dict, &openfds);
+ }
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.fdtable.openfds", prefix);
+ ret = dict_set_int32 (dict, key, openfds);
+
+out:
+ pthread_mutex_unlock (&fdtable->lock);
+ return;
+}
diff --git a/libglusterfs/src/fd.h b/libglusterfs/src/fd.h
index 1770658e8..c1b9157d8 100644
--- a/libglusterfs/src/fd.h
+++ b/libglusterfs/src/fd.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _FD_H
@@ -30,20 +21,28 @@
#include <unistd.h>
#include "glusterfs.h"
#include "locking.h"
+#include "fd-lk.h"
+#include "common-utils.h"
+
+#define GF_ANON_FD_NO -2
struct _inode;
struct _dict;
+struct fd_lk_ctx;
+
struct _fd_ctx {
- uint64_t key;
- uint64_t value;
+ union {
+ uint64_t key;
+ void *xl_key;
+ };
+ union {
+ uint64_t value1;
+ void *ptr1;
+ };
};
-/* If this structure changes, please have mercy on the booster maintainer
- * and update the fd_t struct in booster/src/booster-fd.h.
- * See the comment there to know why.
- */
struct _fd {
- pid_t pid;
+ uint64_t pid;
int32_t flags;
int32_t refcount;
struct list_head inode_list;
@@ -51,15 +50,20 @@ struct _fd {
gf_lock_t lock; /* used ONLY for manipulating
'struct _fd_ctx' array (_ctx).*/
struct _fd_ctx *_ctx;
+ int xl_count; /* Number of xl referred in this fd */
+ struct fd_lk_ctx *lk_ctx;
+ gf_boolean_t anonymous; /* geo-rep anonymous fd */
};
typedef struct _fd fd_t;
+
struct fd_table_entry {
fd_t *fd;
int next_free;
};
typedef struct fd_table_entry fdentry_t;
+
struct _fdtable {
int refcount;
uint32_t max_fds;
@@ -69,6 +73,7 @@ struct _fdtable {
};
typedef struct _fdtable fdtable_t;
+
/* Signifies no more entries in the fd table. */
#define GF_FDTABLE_END -1
@@ -80,58 +85,105 @@ typedef struct _fdtable fdtable_t;
#include "logging.h"
#include "xlator.h"
-inline void
+
+void
gf_fd_put (fdtable_t *fdtable, int32_t fd);
+
fd_t *
gf_fd_fdptr_get (fdtable_t *fdtable, int64_t fd);
+
fdtable_t *
gf_fd_fdtable_alloc (void);
-int32_t
+
+int
gf_fd_unused_get (fdtable_t *fdtable, fd_t *fdptr);
+
fdentry_t *
gf_fd_fdtable_get_all_fds (fdtable_t *fdtable, uint32_t *count);
-void
+
+void
gf_fd_fdtable_destroy (fdtable_t *fdtable);
+
+fd_t *
+__fd_ref (fd_t *fd);
+
+
fd_t *
fd_ref (fd_t *fd);
+
void
fd_unref (fd_t *fd);
+
fd_t *
fd_create (struct _inode *inode, pid_t pid);
fd_t *
+fd_create_uint64 (struct _inode *inode, uint64_t pid);
+
+fd_t *
fd_lookup (struct _inode *inode, pid_t pid);
+fd_t *
+fd_lookup_uint64 (struct _inode *inode, uint64_t pid);
+
+fd_t*
+fd_lookup_anonymous (inode_t *inode);
+
+fd_t *
+fd_anonymous (inode_t *inode);
+
+
+gf_boolean_t
+fd_is_anonymous (fd_t *fd);
+
+
uint8_t
fd_list_empty (struct _inode *inode);
+
fd_t *
fd_bind (fd_t *fd);
+
int
fd_ctx_set (fd_t *fd, xlator_t *xlator, uint64_t value);
-int
+
+int
fd_ctx_get (fd_t *fd, xlator_t *xlator, uint64_t *value);
-int
+
+int
fd_ctx_del (fd_t *fd, xlator_t *xlator, uint64_t *value);
int
+__fd_ctx_del (fd_t *fd, xlator_t *xlator, uint64_t *value);
+
+
+int
__fd_ctx_set (fd_t *fd, xlator_t *xlator, uint64_t value);
-int
+
+int
__fd_ctx_get (fd_t *fd, xlator_t *xlator, uint64_t *value);
-int
-__fd_ctx_del (fd_t *fd, xlator_t *xlator, uint64_t *value);
+
+void
+fd_ctx_dump (fd_t *fd, char *prefix);
+
+fdentry_t *
+gf_fd_fdtable_copy_all_fds (fdtable_t *fdtable, uint32_t *count);
+
+
+void
+gf_fdptr_put (fdtable_t *fdtable, fd_t *fd);
#endif /* _FD_H */
diff --git a/libglusterfs/src/gf-dirent.c b/libglusterfs/src/gf-dirent.c
index 546c2f82b..bb028c967 100644
--- a/libglusterfs/src/gf-dirent.c
+++ b/libglusterfs/src/gf-dirent.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -29,54 +20,35 @@
#include <stdint.h>
#include "compat.h"
#include "xlator.h"
-#include "byte-order.h"
-#include "protocol.h"
-
-
-struct gf_dirent_nb {
- uint64_t d_ino;
- uint64_t d_off;
- uint32_t d_len;
- uint32_t d_type;
- struct gf_stat d_stat;
- char d_name[0];
-} __attribute__((packed));
-
-
-int
-gf_dirent_nb_size (gf_dirent_t *entries)
-{
- return (sizeof (struct gf_dirent_nb) + strlen (entries->d_name) + 1);
-}
-
gf_dirent_t *
gf_dirent_for_name (const char *name)
{
- gf_dirent_t *gf_dirent = NULL;
+ gf_dirent_t *gf_dirent = NULL;
- /* TODO: use mem-pool */
- gf_dirent = CALLOC (gf_dirent_size (name), 1);
- if (!gf_dirent)
- return NULL;
+ /* TODO: use mem-pool */
+ gf_dirent = GF_CALLOC (gf_dirent_size (name), 1,
+ gf_common_mt_gf_dirent_t);
+ if (!gf_dirent)
+ return NULL;
- INIT_LIST_HEAD (&gf_dirent->list);
- strcpy (gf_dirent->d_name, name);
+ INIT_LIST_HEAD (&gf_dirent->list);
+ strcpy (gf_dirent->d_name, name);
- gf_dirent->d_off = 0;
- gf_dirent->d_ino = -1;
- gf_dirent->d_type = 0;
- gf_dirent->d_len = strlen (name);
+ gf_dirent->d_off = 0;
+ gf_dirent->d_ino = -1;
+ gf_dirent->d_type = 0;
+ gf_dirent->d_len = strlen (name);
- return gf_dirent;
+ return gf_dirent;
}
void
gf_dirent_free (gf_dirent_t *entries)
{
- gf_dirent_t *entry = NULL;
- gf_dirent_t *tmp = NULL;
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmp = NULL;
if (!entries)
return;
@@ -84,87 +56,37 @@ gf_dirent_free (gf_dirent_t *entries)
if (list_empty (&entries->list))
return;
- list_for_each_entry_safe (entry, tmp, &entries->list, list) {
- list_del (&entry->list);
- FREE (entry);
- }
-}
-
-
-int
-gf_dirent_serialize (gf_dirent_t *entries, char *buf, size_t buf_size)
-{
- struct gf_dirent_nb *entry_nb = NULL;
- gf_dirent_t *entry = NULL;
- int size = 0;
- int entry_size = 0;
-
-
- list_for_each_entry (entry, &entries->list, list) {
- entry_size = gf_dirent_nb_size (entry);
+ list_for_each_entry_safe (entry, tmp, &entries->list, list) {
+ if (entry->dict)
+ dict_unref (entry->dict);
+ if (entry->inode)
+ inode_unref (entry->inode);
- if (buf && (size + entry_size <= buf_size)) {
- entry_nb = (void *) (buf + size);
-
- entry_nb->d_ino = hton64 (entry->d_ino);
- entry_nb->d_off = hton64 (entry->d_off);
- entry_nb->d_len = hton32 (entry->d_len);
- entry_nb->d_type = hton32 (entry->d_type);
-
- gf_stat_from_stat (&entry_nb->d_stat, &entry->d_stat);
-
- strcpy (entry_nb->d_name, entry->d_name);
- }
- size += entry_size;
- }
-
- return size;
+ list_del (&entry->list);
+ GF_FREE (entry);
+ }
}
-
+/* TODO: Currently, with this function, we will be breaking the
+ policy of 1-1 mapping of kernel nlookup refs with our inode_t's
+ nlookup count.
+ Need more thoughts before finalizing this function
+*/
int
-gf_dirent_unserialize (gf_dirent_t *entries, const char *buf, size_t buf_size)
+gf_link_inodes_from_dirent (xlator_t *this, inode_t *parent,
+ gf_dirent_t *entries)
{
- struct gf_dirent_nb *entry_nb = NULL;
- int remaining_size = 0;
- int least_dirent_size = 0;
- int count = 0;
- gf_dirent_t *entry = NULL;
- int entry_strlen = 0;
- int entry_len = 0;
-
-
- remaining_size = buf_size;
- least_dirent_size = (sizeof (struct gf_dirent_nb) + 2);
-
- while (remaining_size >= least_dirent_size) {
- entry_nb = (void *)(buf + (buf_size - remaining_size));
-
- entry_strlen = strnlen (entry_nb->d_name, remaining_size);
- if (entry_strlen == remaining_size) {
- break;
- }
-
- entry_len = sizeof (gf_dirent_t) + entry_strlen + 1;
- entry = CALLOC (1, entry_len);
- if (!entry) {
- break;
- }
-
- entry->d_ino = ntoh64 (entry_nb->d_ino);
- entry->d_off = ntoh64 (entry_nb->d_off);
- entry->d_len = ntoh32 (entry_nb->d_len);
- entry->d_type = ntoh32 (entry_nb->d_type);
-
- gf_stat_to_stat (&entry_nb->d_stat, &entry->d_stat);
-
- strcpy (entry->d_name, entry_nb->d_name);
-
- list_add_tail (&entry->list, &entries->list);
-
- remaining_size -= (sizeof (*entry_nb) + entry_strlen + 1);
- count++;
- }
-
- return count;
+ gf_dirent_t *entry = NULL;
+ inode_t *link_inode = NULL;
+
+ list_for_each_entry (entry, &entries->list, list) {
+ if (entry->inode) {
+ link_inode = inode_link (entry->inode, parent,
+ entry->d_name, &entry->d_stat);
+ inode_lookup (link_inode);
+ inode_unref (link_inode);
+ }
+ }
+
+ return 0;
}
diff --git a/libglusterfs/src/gf-dirent.h b/libglusterfs/src/gf-dirent.h
index baf08a58b..588d522db 100644
--- a/libglusterfs/src/gf-dirent.h
+++ b/libglusterfs/src/gf-dirent.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -26,13 +17,16 @@
#include "config.h"
#endif
+#include "iatt.h"
+#include "inode.h"
+
#define gf_dirent_size(name) (sizeof (gf_dirent_t) + strlen (name) + 1)
struct _dir_entry_t {
struct _dir_entry_t *next;
char *name;
char *link;
- struct stat buf;
+ struct iatt buf;
};
@@ -48,14 +42,17 @@ struct _gf_dirent_t {
uint64_t d_off;
uint32_t d_len;
uint32_t d_type;
- struct stat d_stat;
- char d_name[0];
+ struct iatt d_stat;
+ dict_t *dict;
+ inode_t *inode;
+ char d_name[];
};
+#define DT_ISDIR(mode) (mode == DT_DIR)
gf_dirent_t *gf_dirent_for_name (const char *name);
void gf_dirent_free (gf_dirent_t *entries);
-int gf_dirent_serialize (gf_dirent_t *entries, char *buf, size_t size);
-int gf_dirent_unserialize (gf_dirent_t *entries, const char *buf, size_t size);
+int gf_link_inodes_from_dirent (xlator_t *this, inode_t *parent,
+ gf_dirent_t *entries);
#endif /* _GF_DIRENT_H */
diff --git a/libglusterfs/src/gidcache.c b/libglusterfs/src/gidcache.c
new file mode 100644
index 000000000..c5bdda925
--- /dev/null
+++ b/libglusterfs/src/gidcache.c
@@ -0,0 +1,192 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "gidcache.h"
+#include "mem-pool.h"
+
+/*
+ * We treat this as a very simple set-associative LRU cache, with entries aged
+ * out after a configurable interval. Hardly rocket science, but lots of
+ * details to worry about.
+ */
+#define BUCKET_START(p,n) ((p) + ((n) * AUX_GID_CACHE_ASSOC))
+
+/*
+ * Initialize the cache.
+ */
+int gid_cache_init(gid_cache_t *cache, uint32_t timeout)
+{
+ if (!cache)
+ return -1;
+
+ LOCK_INIT(&cache->gc_lock);
+ cache->gc_max_age = timeout;
+ cache->gc_nbuckets = AUX_GID_CACHE_BUCKETS;
+ memset(cache->gc_cache, 0, sizeof(gid_list_t) * AUX_GID_CACHE_SIZE);
+
+ return 0;
+}
+
+/*
+ * Reconfigure the cache timeout.
+ */
+int gid_cache_reconf(gid_cache_t *cache, uint32_t timeout)
+{
+ if (!cache)
+ return -1;
+
+ LOCK(&cache->gc_lock);
+ cache->gc_max_age = timeout;
+ UNLOCK(&cache->gc_lock);
+
+ return 0;
+}
+
+/*
+ * Look up an ID in the cache. If found, return the actual cache entry to avoid
+ * an additional allocation and memory copy. The caller should copy the data and
+ * release (unlock) the cache as soon as possible.
+ */
+const gid_list_t *gid_cache_lookup(gid_cache_t *cache, uint64_t id)
+{
+ int bucket;
+ int i;
+ time_t now;
+ const gid_list_t *agl;
+
+ LOCK(&cache->gc_lock);
+ now = time(NULL);
+ bucket = id % cache->gc_nbuckets;
+ agl = BUCKET_START(cache->gc_cache, bucket);
+ for (i = 0; i < AUX_GID_CACHE_ASSOC; i++, agl++) {
+ if (!agl->gl_list)
+ continue;
+ if (agl->gl_id != id)
+ continue;
+
+ /*
+ * We don't put new entries in the cache when expiration=0, but
+ * there might be entries still in there if expiration was
+ * changed very recently. Writing the check this way ensures
+ * that they're not used.
+ */
+ if (now < agl->gl_deadline) {
+ return agl;
+ }
+
+ /*
+ * We're not going to find any more UID matches, and reaping
+ * is handled further down to maintain LRU order.
+ */
+ break;
+ }
+ UNLOCK(&cache->gc_lock);
+ return NULL;
+}
+
+/*
+ * Release an entry found via lookup.
+ */
+void gid_cache_release(gid_cache_t *cache, const gid_list_t *agl)
+{
+ UNLOCK(&cache->gc_lock);
+}
+
+/*
+ * Add a new list entry to the cache. If an entry for this ID already exists,
+ * update it.
+ */
+int gid_cache_add(gid_cache_t *cache, gid_list_t *gl)
+{
+ gid_list_t *agl;
+ int bucket;
+ int i;
+ time_t now;
+
+ if (!gl || !gl->gl_list)
+ return -1;
+
+ if (!cache->gc_max_age)
+ return 0;
+
+ LOCK(&cache->gc_lock);
+ now = time(NULL);
+
+ /*
+ * Scan for the first free entry or one that matches this id. The id
+ * check is added to address a bug where the cache might contain an
+ * expired entry for this id. Since lookup occurs in LRU order and
+ * does not reclaim entries, it will always return failure on discovery
+ * of an expired entry. This leads to duplicate entries being added,
+ * which still do not satisfy lookups until the expired entry (and
+ * everything before it) is reclaimed.
+ *
+ * We address this through reuse of an entry already allocated to this
+ * id, whether expired or not, since we have obviously already received
+ * more recent data. The entry is repopulated with the new data and a new
+ * deadline and is pushed forward to reside as the last populated entry in
+ * the bucket.
+ */
+ bucket = gl->gl_id % cache->gc_nbuckets;
+ agl = BUCKET_START(cache->gc_cache, bucket);
+ for (i = 0; i < AUX_GID_CACHE_ASSOC; ++i, ++agl) {
+ if (agl->gl_id == gl->gl_id)
+ break;
+ if (!agl->gl_list)
+ break;
+ }
+
+ /*
+ * The way we allocate free entries naturally places the newest
+ * ones at the highest indices, so evicting the lowest makes
+ * sense, but that also means we can't just replace it with the
+ * one that caused the eviction. That would cause us to thrash
+ * the first entry while others remain idle. Therefore, we
+ * need to slide the other entries down and add the new one at
+ * the end just as if the *last* slot had been free.
+ *
+ * Deadline expiration is also handled here, since the oldest
+ * expired entry will be in the first position. This does mean
+ * the bucket can stay full of expired entries if we're idle
+ * but, if the small amount of extra memory or scan time before
+ * we decide to evict someone ever become issues, we could
+ * easily add a reaper thread.
+ */
+
+ if (i >= AUX_GID_CACHE_ASSOC) {
+ /* cache full, evict the first (LRU) entry */
+ i = 0;
+ agl = BUCKET_START(cache->gc_cache, bucket);
+ GF_FREE(agl->gl_list);
+ } else if (agl->gl_list) {
+ /* evict the old entry we plan to reuse */
+ GF_FREE(agl->gl_list);
+ }
+
+ /*
+ * If we have evicted an entry, slide the subsequent populated entries
+ * back and populate the last entry.
+ */
+ for (; i < AUX_GID_CACHE_ASSOC - 1; i++) {
+ if (!agl[1].gl_list)
+ break;
+ agl[0] = agl[1];
+ agl++;
+ }
+
+ agl->gl_id = gl->gl_id;
+ agl->gl_count = gl->gl_count;
+ agl->gl_list = gl->gl_list;
+ agl->gl_deadline = now + cache->gc_max_age;
+
+ UNLOCK(&cache->gc_lock);
+
+ return 1;
+}
diff --git a/libglusterfs/src/gidcache.h b/libglusterfs/src/gidcache.h
new file mode 100644
index 000000000..9379f8e8b
--- /dev/null
+++ b/libglusterfs/src/gidcache.h
@@ -0,0 +1,53 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __GIDCACHE_H__
+#define __GIDCACHE_H__
+
+#include "glusterfs.h"
+#include "locking.h"
+
+/*
+ * TBD: make the cache size tunable
+ *
+ * The current size represents a pretty trivial amount of memory, and should
+ * provide good hit rates even for quite busy systems. If we ever want to
+ * support really large cache sizes, we'll need to do dynamic allocation
+ * instead of just defining an array within a private structure. It doesn't make
+ * a whole lot of sense to change the associativity, because it won't improve
+ * hit rates all that much and will increase the maintenance cost as we have
+ * to scan more entries with every lookup/update.
+ */
+
+#define AUX_GID_CACHE_ASSOC 4
+#define AUX_GID_CACHE_BUCKETS 256
+#define AUX_GID_CACHE_SIZE (AUX_GID_CACHE_ASSOC * AUX_GID_CACHE_BUCKETS)
+
+typedef struct {
+ uint64_t gl_id;
+ int gl_count;
+ gid_t *gl_list;
+ time_t gl_deadline;
+} gid_list_t;
+
+typedef struct {
+ gf_lock_t gc_lock;
+ uint32_t gc_max_age;
+ unsigned int gc_nbuckets;
+ gid_list_t gc_cache[AUX_GID_CACHE_SIZE];
+} gid_cache_t;
+
+int gid_cache_init(gid_cache_t *, uint32_t);
+int gid_cache_reconf(gid_cache_t *, uint32_t);
+const gid_list_t *gid_cache_lookup(gid_cache_t *, uint64_t);
+void gid_cache_release(gid_cache_t *, const gid_list_t *);
+int gid_cache_add(gid_cache_t *, gid_list_t *);
+
+#endif /* __GIDCACHE_H__ */
diff --git a/libglusterfs/src/globals.c b/libglusterfs/src/globals.c
index ae7fdcd61..259c5c885 100644
--- a/libglusterfs/src/globals.c
+++ b/libglusterfs/src/globals.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
@@ -24,54 +15,78 @@
#include <pthread.h>
-#include "globals.h"
#include "glusterfs.h"
+#include "globals.h"
#include "xlator.h"
-
-
-/* CTX */
-static glusterfs_ctx_t *glusterfs_ctx;
-
-
-int
-glusterfs_ctx_init ()
-{
- int ret = 0;
-
- if (glusterfs_ctx)
- goto out;
-
- glusterfs_ctx = CALLOC (1, sizeof (*glusterfs_ctx));
- if (!glusterfs_ctx) {
- ret = -1;
- goto out;
- }
-
- ret = pthread_mutex_init (&glusterfs_ctx->lock, NULL);
-
-out:
- return ret;
-}
-
-
-glusterfs_ctx_t *
-glusterfs_ctx_get ()
-{
- return glusterfs_ctx;
-
-}
-
-
+#include "mem-pool.h"
+#include "syncop.h"
+
+const char *gf_fop_list[GF_FOP_MAXVALUE] = {
+ [GF_FOP_NULL] = "NULL",
+ [GF_FOP_STAT] = "STAT",
+ [GF_FOP_READLINK] = "READLINK",
+ [GF_FOP_MKNOD] = "MKNOD",
+ [GF_FOP_MKDIR] = "MKDIR",
+ [GF_FOP_UNLINK] = "UNLINK",
+ [GF_FOP_RMDIR] = "RMDIR",
+ [GF_FOP_SYMLINK] = "SYMLINK",
+ [GF_FOP_RENAME] = "RENAME",
+ [GF_FOP_LINK] = "LINK",
+ [GF_FOP_TRUNCATE] = "TRUNCATE",
+ [GF_FOP_OPEN] = "OPEN",
+ [GF_FOP_READ] = "READ",
+ [GF_FOP_WRITE] = "WRITE",
+ [GF_FOP_STATFS] = "STATFS",
+ [GF_FOP_FLUSH] = "FLUSH",
+ [GF_FOP_FSYNC] = "FSYNC",
+ [GF_FOP_SETXATTR] = "SETXATTR",
+ [GF_FOP_GETXATTR] = "GETXATTR",
+ [GF_FOP_REMOVEXATTR] = "REMOVEXATTR",
+ [GF_FOP_OPENDIR] = "OPENDIR",
+ [GF_FOP_FSYNCDIR] = "FSYNCDIR",
+ [GF_FOP_ACCESS] = "ACCESS",
+ [GF_FOP_CREATE] = "CREATE",
+ [GF_FOP_FTRUNCATE] = "FTRUNCATE",
+ [GF_FOP_FSTAT] = "FSTAT",
+ [GF_FOP_LK] = "LK",
+ [GF_FOP_LOOKUP] = "LOOKUP",
+ [GF_FOP_READDIR] = "READDIR",
+ [GF_FOP_INODELK] = "INODELK",
+ [GF_FOP_FINODELK] = "FINODELK",
+ [GF_FOP_ENTRYLK] = "ENTRYLK",
+ [GF_FOP_FENTRYLK] = "FENTRYLK",
+ [GF_FOP_XATTROP] = "XATTROP",
+ [GF_FOP_FXATTROP] = "FXATTROP",
+ [GF_FOP_FSETXATTR] = "FSETXATTR",
+ [GF_FOP_FGETXATTR] = "FGETXATTR",
+ [GF_FOP_RCHECKSUM] = "RCHECKSUM",
+ [GF_FOP_SETATTR] = "SETATTR",
+ [GF_FOP_FSETATTR] = "FSETATTR",
+ [GF_FOP_READDIRP] = "READDIRP",
+ [GF_FOP_GETSPEC] = "GETSPEC",
+ [GF_FOP_FORGET] = "FORGET",
+ [GF_FOP_RELEASE] = "RELEASE",
+ [GF_FOP_RELEASEDIR] = "RELEASEDIR",
+ [GF_FOP_FREMOVEXATTR]= "FREMOVEXATTR",
+ [GF_FOP_FALLOCATE] = "FALLOCATE",
+ [GF_FOP_DISCARD] = "DISCARD",
+ [GF_FOP_ZEROFILL] = "ZEROFILL",
+};
/* THIS */
xlator_t global_xlator;
static pthread_key_t this_xlator_key;
+static pthread_key_t synctask_key;
+static pthread_key_t uuid_buf_key;
+static char global_uuid_buf[GF_UUID_BUF_SIZE];
+static pthread_key_t lkowner_buf_key;
+static char global_lkowner_buf[GF_LKOWNER_BUF_SIZE];
+
void
glusterfs_this_destroy (void *ptr)
{
- if (ptr)
- FREE (ptr);
+ FREE (ptr);
}
@@ -82,12 +97,15 @@ glusterfs_this_init ()
ret = pthread_key_create (&this_xlator_key, glusterfs_this_destroy);
if (ret != 0) {
+ gf_log ("", GF_LOG_WARNING, "failed to create the pthread key");
return ret;
}
global_xlator.name = "glusterfs";
global_xlator.type = "global";
+ INIT_LIST_HEAD (&global_xlator.volume_options);
+
return ret;
}
@@ -148,78 +166,196 @@ glusterfs_this_set (xlator_t *this)
return 0;
}
+/* SYNCOPCTX */
+static pthread_key_t syncopctx_key;
+
+static void
+syncopctx_key_destroy (void *ptr)
+{
+ struct syncopctx *opctx = ptr;
-/* IS_CENTRAL_LOG */
+ if (opctx) {
+ if (opctx->groups)
+ GF_FREE (opctx->groups);
-static pthread_key_t central_log_flag_key;
+ GF_FREE (opctx);
+ }
-void
-glusterfs_central_log_flag_destroy (void *ptr)
+ return;
+}
+
+void *
+syncopctx_getctx ()
+{
+ void *opctx = NULL;
+
+ opctx = pthread_getspecific (syncopctx_key);
+
+ return opctx;
+}
+
+int
+syncopctx_setctx (void *ctx)
+{
+ int ret = 0;
+
+ ret = pthread_setspecific (syncopctx_key, ctx);
+
+ return ret;
+}
+
+static int
+syncopctx_init (void)
{
- if (ptr)
- FREE (ptr);
+ int ret;
+
+ ret = pthread_key_create (&syncopctx_key, syncopctx_key_destroy);
+
+ return ret;
}
+/* SYNCTASK */
int
-glusterfs_central_log_flag_init ()
+synctask_init ()
{
- int ret = 0;
+ int ret = 0;
- ret = pthread_key_create (&central_log_flag_key,
- glusterfs_central_log_flag_destroy);
+ ret = pthread_key_create (&synctask_key, NULL);
+
+ return ret;
+}
+
+void *
+synctask_get ()
+{
+ void *synctask = NULL;
+
+ synctask = pthread_getspecific (synctask_key);
+
+ return synctask;
+}
- if (ret != 0) {
- return ret;
- }
- pthread_setspecific (central_log_flag_key, (void *) 0);
+int
+synctask_set (void *synctask)
+{
+ int ret = 0;
+
+ pthread_setspecific (synctask_key, synctask);
return ret;
}
+//UUID_BUFFER
void
-glusterfs_central_log_flag_set ()
+glusterfs_uuid_buf_destroy (void *ptr)
{
- pthread_setspecific (central_log_flag_key, (void *) 1);
+ FREE (ptr);
}
+int
+glusterfs_uuid_buf_init ()
+{
+ int ret = 0;
+
+ ret = pthread_key_create (&uuid_buf_key,
+ glusterfs_uuid_buf_destroy);
+ return ret;
+}
-long
-glusterfs_central_log_flag_get ()
+char *
+glusterfs_uuid_buf_get ()
{
- long flag = 0;
+ char *buf;
+ int ret = 0;
- flag = (long) pthread_getspecific (central_log_flag_key);
-
- return flag;
+ buf = pthread_getspecific (uuid_buf_key);
+ if(!buf) {
+ buf = MALLOC (GF_UUID_BUF_SIZE);
+ ret = pthread_setspecific (uuid_buf_key, (void *) buf);
+ if (ret)
+ buf = global_uuid_buf;
+ }
+ return buf;
}
+/* LKOWNER_BUFFER */
void
-glusterfs_central_log_flag_unset ()
+glusterfs_lkowner_buf_destroy (void *ptr)
{
- pthread_setspecific (central_log_flag_key, (void *) 0);
+ FREE (ptr);
}
+int
+glusterfs_lkowner_buf_init ()
+{
+ int ret = 0;
+
+ ret = pthread_key_create (&lkowner_buf_key,
+ glusterfs_lkowner_buf_destroy);
+ return ret;
+}
+
+char *
+glusterfs_lkowner_buf_get ()
+{
+ char *buf;
+ int ret = 0;
+
+ buf = pthread_getspecific (lkowner_buf_key);
+ if(!buf) {
+ buf = MALLOC (GF_LKOWNER_BUF_SIZE);
+ ret = pthread_setspecific (lkowner_buf_key, (void *) buf);
+ if (ret)
+ buf = global_lkowner_buf;
+ }
+ return buf;
+}
int
-glusterfs_globals_init ()
+glusterfs_globals_init (glusterfs_ctx_t *ctx)
{
int ret = 0;
- ret = glusterfs_ctx_init ();
- if (ret)
- goto out;
+ gf_log_globals_init (ctx);
ret = glusterfs_this_init ();
- if (ret)
+ if (ret) {
+ gf_log ("", GF_LOG_CRITICAL,
+ "ERROR: glusterfs-translator init failed");
goto out;
+ }
+
+ ret = glusterfs_uuid_buf_init ();
+ if(ret) {
+ gf_log ("", GF_LOG_CRITICAL,
+ "ERROR: glusterfs uuid buffer init failed");
+ goto out;
+ }
- ret = glusterfs_central_log_flag_init ();
- if (ret)
+ ret = glusterfs_lkowner_buf_init ();
+ if(ret) {
+ gf_log ("", GF_LOG_CRITICAL,
+ "ERROR: glusterfs lkowner buffer init failed");
goto out;
+ }
+
+ ret = synctask_init ();
+ if (ret) {
+ gf_log ("", GF_LOG_CRITICAL,
+ "ERROR: glusterfs synctask init failed");
+ goto out;
+ }
+
+ ret = syncopctx_init ();
+ if (ret) {
+ gf_log ("", GF_LOG_CRITICAL,
+ "ERROR: glusterfs syncopctx init failed");
+ goto out;
+ }
out:
return ret;
}
diff --git a/libglusterfs/src/globals.h b/libglusterfs/src/globals.h
index 2f284ca9b..3085db21c 100644
--- a/libglusterfs/src/globals.h
+++ b/libglusterfs/src/globals.h
@@ -1,32 +1,38 @@
/*
- Copyright (c) 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _GLOBALS_H
#define _GLOBALS_H
-#include "glusterfs.h"
-#include "xlator.h"
-
-/* CTX */
-#define CTX (glusterfs_ctx_get())
+#define GF_DEFAULT_BASE_PORT 24007
+
+#define GD_OP_VERSION_KEY "operating-version"
+#define GD_MIN_OP_VERSION_KEY "minimum-operating-version"
+#define GD_MAX_OP_VERSION_KEY "maximum-operating-version"
+
+/* Gluster versions - OP-VERSION mapping
+ *
+ * 3.3.0 - 1
+ * 3.4.0 - 2
+ * 3.next (3.5?) - 3
+ *
+ * TODO: Change above comment once gluster version is finalised
+ * TODO: Finalize the op-version ranges
+ */
+#define GD_OP_VERSION_MIN 1 /* MIN is the fresh start op-version, mostly
+ should not change */
+#define GD_OP_VERSION_MAX 3 /* MAX VERSION is the maximum count in VME table,
+ should keep changing with introduction of newer
+ versions */
-glusterfs_ctx_t *glusterfs_ctx_get ();
+#include "xlator.h"
/* THIS */
#define THIS (*__glusterfs_this_location())
@@ -35,12 +41,22 @@ xlator_t **__glusterfs_this_location ();
xlator_t *glusterfs_this_get ();
int glusterfs_this_set (xlator_t *);
-void glusterfs_central_log_flag_set ();
-long glusterfs_central_log_flag_get ();
-void glusterfs_central_log_flag_unset ();
+/* syncopctx */
+void *syncopctx_getctx ();
+int syncopctx_setctx (void *ctx);
+
+/* task */
+void *synctask_get ();
+int synctask_set (void *);
+/* uuid_buf */
+char *glusterfs_uuid_buf_get();
+/* lkowner_buf */
+char *glusterfs_lkowner_buf_get();
/* init */
-int glusterfs_globals_init (void);
+int glusterfs_globals_init (glusterfs_ctx_t *ctx);
+
+extern const char *gf_fop_list[];
#endif /* !_GLOBALS_H */
diff --git a/libglusterfs/src/glusterfs-acl.h b/libglusterfs/src/glusterfs-acl.h
new file mode 100644
index 000000000..b7de1cdb4
--- /dev/null
+++ b/libglusterfs/src/glusterfs-acl.h
@@ -0,0 +1,81 @@
+/*
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLUSTERFS_ACL_H
+#define _GLUSTERFS_ACL_H
+
+#include <stdint.h>
+#include <sys/types.h> /* For uid_t */
+
+#include "locking.h" /* For gf_lock_t in struct posix_acl_conf */
+
+#define ACL_PROGRAM 100227
+#define ACLV3_VERSION 3
+
+#define POSIX_ACL_MINIMAL_ACE_COUNT 3
+
+#define POSIX_ACL_READ (0x04)
+#define POSIX_ACL_WRITE (0x02)
+#define POSIX_ACL_EXECUTE (0x01)
+
+#define POSIX_ACL_UNDEFINED_TAG (0x00)
+#define POSIX_ACL_USER_OBJ (0x01)
+#define POSIX_ACL_USER (0x02)
+#define POSIX_ACL_GROUP_OBJ (0x04)
+#define POSIX_ACL_GROUP (0x08)
+#define POSIX_ACL_MASK (0x10)
+#define POSIX_ACL_OTHER (0x20)
+
+#define POSIX_ACL_UNDEFINED_ID (-1)
+
+#define POSIX_ACL_VERSION (0x02)
+
+#define POSIX_ACL_ACCESS_XATTR "system.posix_acl_access"
+#define POSIX_ACL_DEFAULT_XATTR "system.posix_acl_default"
+
+struct posix_acl_xattr_entry {
+ uint16_t tag;
+ uint16_t perm;
+ uint32_t id;
+};
+
+struct posix_acl_xattr_header {
+ uint32_t version;
+ struct posix_acl_xattr_entry entries[];
+};
+
+struct posix_ace {
+ uint16_t tag;
+ uint16_t perm;
+ uint32_t id;
+};
+
+
+struct posix_acl {
+ int refcnt;
+ int count;
+ struct posix_ace entries[];
+};
+
+struct posix_acl_ctx {
+ uid_t uid;
+ gid_t gid;
+ mode_t perm;
+ struct posix_acl *acl_access;
+ struct posix_acl *acl_default;
+};
+
+struct posix_acl_conf {
+ gf_lock_t acl_lock;
+ uid_t super_uid;
+ struct posix_acl *minimal_acl;
+};
+
+#endif /* _GLUSTERFS_ACL_H */
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h
index e522c3176..2f1e12ee7 100644
--- a/libglusterfs/src/glusterfs.h
+++ b/libglusterfs/src/glusterfs.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _GLUSTERFS_H
@@ -41,16 +32,23 @@
#include <arpa/inet.h>
#include <sys/poll.h>
#include <pthread.h>
+#include <limits.h> /* For PATH_MAX */
#include "list.h"
#include "logging.h"
+#include "lkowner.h"
#define GF_YES 1
#define GF_NO 0
#ifndef O_LARGEFILE
/* savannah bug #20053, patch for compiling on darwin */
-#define O_LARGEFILE 0
+#define O_LARGEFILE 0100000 /* from bits/fcntl.h */
+#endif
+
+#ifndef O_FMODE_EXEC
+/* redhat bug 843080, added from linux/fs.h */
+#define O_FMODE_EXEC 040 //0x20
#endif
#ifndef O_DIRECT
@@ -63,223 +61,434 @@
#define O_DIRECTORY 0
#endif
+#ifndef EBADFD
+/* Mac OS X does not have EBADFD */
+#define EBADFD EBADF
+#endif
+
+#ifndef FNM_EXTMATCH
+#define FNM_EXTMATCH 0
+#endif
+
+#define GLUSTERD_MAX_SNAP_NAME 256
+#define ZR_MOUNTPOINT_OPT "mountpoint"
+#define ZR_ATTR_TIMEOUT_OPT "attribute-timeout"
+#define ZR_ENTRY_TIMEOUT_OPT "entry-timeout"
+#define ZR_NEGATIVE_TIMEOUT_OPT "negative-timeout"
+#define ZR_DIRECT_IO_OPT "direct-io-mode"
+#define ZR_STRICT_VOLFILE_CHECK "strict-volfile-check"
+#define ZR_DUMP_FUSE "dump-fuse"
+#define ZR_FUSE_MOUNTOPTS "fuse-mountopts"
+
+#define GF_XATTR_CLRLK_CMD "glusterfs.clrlk"
+#define GF_XATTR_PATHINFO_KEY "trusted.glusterfs.pathinfo"
+#define GF_XATTR_NODE_UUID_KEY "trusted.glusterfs.node-uuid"
+#define GF_XATTR_VOL_ID_KEY "trusted.glusterfs.volume-id"
+#define GF_XATTR_LOCKINFO_KEY "trusted.glusterfs.lockinfo"
+#define GF_XATTR_GET_REAL_FILENAME_KEY "user.glusterfs.get_real_filename:"
+
+#define GF_READDIR_SKIP_DIRS "readdir-filter-directories"
+
+#define BD_XATTR_KEY "user.glusterfs"
+
+#define XATTR_IS_PATHINFO(x) (strncmp (x, GF_XATTR_PATHINFO_KEY, \
+ strlen (GF_XATTR_PATHINFO_KEY)) == 0)
+#define XATTR_IS_NODE_UUID(x) (strncmp (x, GF_XATTR_NODE_UUID_KEY, \
+ strlen (GF_XATTR_NODE_UUID_KEY)) == 0)
+#define XATTR_IS_LOCKINFO(x) (strncmp (x, GF_XATTR_LOCKINFO_KEY, \
+ strlen (GF_XATTR_LOCKINFO_KEY)) == 0)
+
+#define XATTR_IS_BD(x) (strncmp (x, BD_XATTR_KEY, strlen (BD_XATTR_KEY)) == 0)
+
+#define GF_XATTR_LINKINFO_KEY "trusted.distribute.linkinfo"
+#define GFID_XATTR_KEY "trusted.gfid"
+#define VIRTUAL_GFID_XATTR_KEY_STR "glusterfs.gfid.string"
+#define VIRTUAL_GFID_XATTR_KEY "glusterfs.gfid"
+#define UUID_CANONICAL_FORM_LEN 36
+
+#define GLUSTERFS_INTERNAL_FOP_KEY "glusterfs-internal-fop"
+
#define ZR_FILE_CONTENT_STR "glusterfs.file."
#define ZR_FILE_CONTENT_STRLEN 15
+#define GLUSTERFS_WRITE_IS_APPEND "glusterfs.write-is-append"
#define GLUSTERFS_OPEN_FD_COUNT "glusterfs.open-fd-count"
+#define GLUSTERFS_INODELK_COUNT "glusterfs.inodelk-count"
+#define GLUSTERFS_ENTRYLK_COUNT "glusterfs.entrylk-count"
+#define GLUSTERFS_POSIXLK_COUNT "glusterfs.posixlk-count"
+#define GLUSTERFS_PARENT_ENTRYLK "glusterfs.parent-entrylk"
+#define GLUSTERFS_INODELK_DOM_COUNT "glusterfs.inodelk-dom-count"
+#define QUOTA_SIZE_KEY "trusted.glusterfs.quota.size"
+#define GFID_TO_PATH_KEY "glusterfs.gfid2path"
+#define GF_XATTR_STIME_PATTERN "trusted.glusterfs.*.stime"
+
+/* Index xlator related */
+#define GF_XATTROP_INDEX_GFID "glusterfs.xattrop_index_gfid"
+#define GF_BASE_INDICES_HOLDER_GFID "glusterfs.base_indicies_holder_gfid"
+
+#define GF_GFIDLESS_LOOKUP "gfidless-lookup"
+/* replace-brick and pump related internal xattrs */
+#define RB_PUMP_CMD_START "glusterfs.pump.start"
+#define RB_PUMP_CMD_PAUSE "glusterfs.pump.pause"
+#define RB_PUMP_CMD_COMMIT "glusterfs.pump.commit"
+#define RB_PUMP_CMD_ABORT "glusterfs.pump.abort"
+#define RB_PUMP_CMD_STATUS "glusterfs.pump.status"
+
+#define GLUSTERFS_RDMA_INLINE_THRESHOLD (2048)
+#define GLUSTERFS_RDMA_MAX_HEADER_SIZE (228) /* (sizeof (rdma_header_t) \
+ + RDMA_MAX_SEGMENTS \
+ * sizeof (rdma_read_chunk_t))
+ */
+
+#define GLUSTERFS_RPC_REPLY_SIZE 24
#define ZR_FILE_CONTENT_REQUEST(key) (!strncmp(key, ZR_FILE_CONTENT_STR, \
- ZR_FILE_CONTENT_STRLEN))
+ ZR_FILE_CONTENT_STRLEN))
-/* TODO: Should we use PATH-MAX? On some systems it may save space */
-#define ZR_PATH_MAX 4096
+#define DEFAULT_VAR_RUN_DIRECTORY DATADIR "/run/gluster"
+#define GF_REPLICATE_TRASH_DIR ".landfill"
+/* GlusterFS's maximum supported Auxilary GIDs */
+/* TODO: Keeping it to 200, so that we can fit in 2KB buffer for auth data
+ * in RPC server code, if there is ever need for having more aux-gids, then
+ * we have to add aux-gid in payload of actors */
+#define GF_MAX_AUX_GROUPS 65536
+
+#define GF_UUID_BUF_SIZE 50
+
+#define GF_REBALANCE_TID_KEY "rebalance-id"
+#define GF_REMOVE_BRICK_TID_KEY "remove-brick-id"
+#define GF_REPLACE_BRICK_TID_KEY "replace-brick-id"
+
+#define UUID_CANONICAL_FORM_LEN 36
+
+/* Adding this here instead of any glusterd*.h files as it is also required by
+ * cli
+ */
+#define DEFAULT_GLUSTERD_SOCKFILE DATADIR "/run/glusterd.socket"
/* NOTE: add members ONLY at the end (just before _MAXVALUE) */
typedef enum {
- GF_FOP_STAT, /* 0 */
- GF_FOP_READLINK, /* 1 */
- GF_FOP_MKNOD, /* 2 */
+ GF_FOP_NULL = 0,
+ GF_FOP_STAT,
+ GF_FOP_READLINK,
+ GF_FOP_MKNOD,
GF_FOP_MKDIR,
GF_FOP_UNLINK,
- GF_FOP_RMDIR, /* 5 */
+ GF_FOP_RMDIR,
GF_FOP_SYMLINK,
GF_FOP_RENAME,
GF_FOP_LINK,
- GF_FOP_CHMOD,
- GF_FOP_CHOWN, /* 10 */
GF_FOP_TRUNCATE,
GF_FOP_OPEN,
GF_FOP_READ,
GF_FOP_WRITE,
- GF_FOP_STATFS, /* 15 */
+ GF_FOP_STATFS,
GF_FOP_FLUSH,
- GF_FOP_FSYNC,
+ GF_FOP_FSYNC, /* 15 */
GF_FOP_SETXATTR,
GF_FOP_GETXATTR,
- GF_FOP_REMOVEXATTR,/* 20 */
+ GF_FOP_REMOVEXATTR,
GF_FOP_OPENDIR,
- GF_FOP_GETDENTS,
GF_FOP_FSYNCDIR,
GF_FOP_ACCESS,
- GF_FOP_CREATE, /* 25 */
+ GF_FOP_CREATE,
GF_FOP_FTRUNCATE,
- GF_FOP_FSTAT,
+ GF_FOP_FSTAT, /* 25 */
GF_FOP_LK,
- GF_FOP_UTIMENS,
- GF_FOP_FCHMOD, /* 30 */
- GF_FOP_FCHOWN,
GF_FOP_LOOKUP,
- GF_FOP_SETDENTS,
GF_FOP_READDIR,
- GF_FOP_INODELK, /* 35 */
+ GF_FOP_INODELK,
GF_FOP_FINODELK,
- GF_FOP_ENTRYLK,
- GF_FOP_FENTRYLK,
- GF_FOP_CHECKSUM,
- GF_FOP_XATTROP, /* 40 */
+ GF_FOP_ENTRYLK,
+ GF_FOP_FENTRYLK,
+ GF_FOP_XATTROP,
GF_FOP_FXATTROP,
- GF_FOP_LOCK_NOTIFY,
- GF_FOP_LOCK_FNOTIFY,
GF_FOP_FGETXATTR,
GF_FOP_FSETXATTR,
+ GF_FOP_RCHECKSUM,
+ GF_FOP_SETATTR,
+ GF_FOP_FSETATTR,
+ GF_FOP_READDIRP,
+ GF_FOP_FORGET,
+ GF_FOP_RELEASE,
+ GF_FOP_RELEASEDIR,
+ GF_FOP_GETSPEC,
+ GF_FOP_FREMOVEXATTR,
+ GF_FOP_FALLOCATE,
+ GF_FOP_DISCARD,
+ GF_FOP_ZEROFILL,
GF_FOP_MAXVALUE,
} glusterfs_fop_t;
-/* NOTE: add members ONLY at the end (just before _MAXVALUE) */
-typedef enum {
- GF_MOP_SETVOLUME, /* 0 */
- GF_MOP_GETVOLUME, /* 1 */
- GF_MOP_STATS,
- GF_MOP_SETSPEC,
- GF_MOP_GETSPEC,
- GF_MOP_PING,
- GF_MOP_LOG,
- GF_MOP_MAXVALUE /* 5 */
-} glusterfs_mop_t;
typedef enum {
- GF_CBK_FORGET, /* 0 */
- GF_CBK_RELEASE, /* 1 */
- GF_CBK_RELEASEDIR, /* 2 */
- GF_CBK_MAXVALUE /* 3 */
-} glusterfs_cbk_t;
+ GF_MGMT_NULL = 0,
+ GF_MGMT_MAXVALUE,
+} glusterfs_mgmt_t;
typedef enum {
- GF_OP_TYPE_FOP_REQUEST = 1,
- GF_OP_TYPE_MOP_REQUEST,
- GF_OP_TYPE_CBK_REQUEST,
- GF_OP_TYPE_FOP_REPLY,
- GF_OP_TYPE_MOP_REPLY,
- GF_OP_TYPE_CBK_REPLY
-} glusterfs_op_type_t;
+ GF_OP_TYPE_NULL = 0,
+ GF_OP_TYPE_FOP,
+ GF_OP_TYPE_MGMT,
+ GF_OP_TYPE_MAX,
+} gf_op_type_t;
/* NOTE: all the miscellaneous flags used by GlusterFS should be listed here */
typedef enum {
GF_LK_GETLK = 0,
GF_LK_SETLK,
GF_LK_SETLKW,
+ GF_LK_RESLK_LCK,
+ GF_LK_RESLK_LCKW,
+ GF_LK_RESLK_UNLCK,
+ GF_LK_GETLK_FD,
} glusterfs_lk_cmds_t;
+
typedef enum {
GF_LK_F_RDLCK = 0,
GF_LK_F_WRLCK,
- GF_LK_F_UNLCK
+ GF_LK_F_UNLCK,
+ GF_LK_EOL,
} glusterfs_lk_types_t;
typedef enum {
- GF_LOCK_POSIX,
+ F_RESLK_LCK = 200,
+ F_RESLK_LCKW,
+ F_RESLK_UNLCK,
+ F_GETLK_FD,
+} glusterfs_lk_recovery_cmds_t;
+
+typedef enum {
+ GF_LOCK_POSIX,
GF_LOCK_INTERNAL
} gf_lk_domain_t;
+
typedef enum {
- ENTRYLK_LOCK,
- ENTRYLK_UNLOCK,
- ENTRYLK_LOCK_NB
+ ENTRYLK_LOCK,
+ ENTRYLK_UNLOCK,
+ ENTRYLK_LOCK_NB
} entrylk_cmd;
+
typedef enum {
- ENTRYLK_RDLCK,
- ENTRYLK_WRLCK
+ ENTRYLK_RDLCK,
+ ENTRYLK_WRLCK
} entrylk_type;
-typedef enum {
- GF_GET_ALL = 1,
- GF_GET_DIR_ONLY,
- GF_GET_SYMLINK_ONLY,
- GF_GET_REGULAR_FILES_ONLY,
-} glusterfs_getdents_flags_t;
typedef enum {
- GF_XATTROP_ADD_ARRAY,
+ GF_XATTROP_ADD_ARRAY,
+ GF_XATTROP_ADD_ARRAY64,
+ GF_XATTROP_OR_ARRAY,
+ GF_XATTROP_AND_ARRAY
} gf_xattrop_flags_t;
+
#define GF_SET_IF_NOT_PRESENT 0x1 /* default behaviour */
#define GF_SET_OVERWRITE 0x2 /* Overwrite with the buf given */
#define GF_SET_DIR_ONLY 0x4
#define GF_SET_EPOCH_TIME 0x8 /* used by afr dir lookup selfheal */
+/* key value which quick read uses to get small files in lookup cbk */
+#define GF_CONTENT_KEY "glusterfs.content"
struct _xlator_cmdline_option {
- struct list_head cmd_args;
- char *volume;
- char *key;
- char *value;
+ struct list_head cmd_args;
+ char *volume;
+ char *key;
+ char *value;
};
typedef struct _xlator_cmdline_option xlator_cmdline_option_t;
+struct _server_cmdline {
+ struct list_head list;
+ char *volfile_server;
+};
+typedef struct _server_cmdline server_cmdline_t;
+
+#define GF_OPTION_ENABLE _gf_true
+#define GF_OPTION_DISABLE _gf_false
+#define GF_OPTION_DEFERRED 2
+
struct _cmd_args {
- /* basic options */
- char *volfile_server;
- char *volume_file;
- char *log_server;
- gf_loglevel_t log_level;
- char *log_file;
+ /* basic options */
+ char *volfile_server;
+ server_cmdline_t *curr_server;
+ /* List of backup volfile servers, including original */
+ struct list_head volfile_servers;
+ char *volfile;
+ char *log_server;
+ gf_loglevel_t log_level;
+ char *log_file;
int32_t max_connect_attempts;
- /* advanced options */
- uint32_t volfile_server_port;
- char *volfile_server_transport;
+ /* advanced options */
+ uint32_t volfile_server_port;
+ char *volfile_server_transport;
uint32_t log_server_port;
- char *pid_file;
- int no_daemon_mode;
- char *run_id;
- int debug_mode;
- struct list_head xlator_options; /* list of xlator_option_t */
-
- /* fuse options */
- int fuse_direct_io_mode_flag;
+ char *pid_file;
+ char *sock_file;
+ int no_daemon_mode;
+ char *run_id;
+ int debug_mode;
+ int read_only;
+ int acl;
+ int selinux;
+ int enable_ino32;
+ int worm;
+ int mac_compat;
+ int fopen_keep_cache;
+ int gid_timeout;
+ int aux_gfid_mount;
+ struct list_head xlator_options; /* list of xlator_option_t */
+
+ /* fuse options */
+ int fuse_direct_io_mode;
+ char *use_readdirp;
int volfile_check;
- double fuse_entry_timeout;
- double fuse_attribute_timeout;
- char *volume_name;
- int non_local; /* Used only by darwin os,
- used for '-o local' option */
- char *icon_name; /* This string will appear as
- Desktop icon name when mounted
- on darwin */
- int fuse_nodev;
- int fuse_nosuid;
-
- /* key args */
- char *mount_point;
- char *volfile_id;
+ double fuse_entry_timeout;
+ double fuse_negative_timeout;
+ double fuse_attribute_timeout;
+ char *volume_name;
+ int fuse_nodev;
+ int fuse_nosuid;
+ char *dump_fuse;
+ pid_t client_pid;
+ int client_pid_set;
+ unsigned uid_map_root;
+ int background_qlen;
+ int congestion_threshold;
+ char *fuse_mountopts;
+
+ /* key args */
+ char *mount_point;
+ char *volfile_id;
+
+ /* required for portmap */
+ int brick_port;
+ char *brick_name;
+ int brick_port2;
};
typedef struct _cmd_args cmd_args_t;
-struct _glusterfs_ctx {
- cmd_args_t cmd_args;
- char *process_uuid;
- FILE *specfp;
- FILE *pidfp;
- char fin;
- void *timer;
- void *ib;
- void *pool;
- void *graph;
- void *top; /* either fuse or server protocol */
- void *event_pool;
- void *iobuf_pool;
- pthread_mutex_t lock;
- int xl_count;
- uint32_t volfile_checksum;
- size_t page_size;
+
+struct _glusterfs_graph {
+ struct list_head list;
+ char graph_uuid[128];
+ struct timeval dob;
+ void *first;
+ void *top; /* selected by -n */
+ int xl_count;
+ int id; /* Used in logging */
+ int used; /* Should be set when fuse gets
+ first CHILD_UP */
+ uint32_t volfile_checksum;
};
+typedef struct _glusterfs_graph glusterfs_graph_t;
+
+typedef int32_t (*glusterfsd_mgmt_event_notify_fn_t) (int32_t event, void *data,
+ ...);
+struct _glusterfs_ctx {
+ cmd_args_t cmd_args;
+ char *process_uuid;
+ FILE *pidfp;
+ char fin;
+ void *timer;
+ void *ib;
+ struct call_pool *pool;
+ void *event_pool;
+ void *iobuf_pool;
+ pthread_mutex_t lock;
+ size_t page_size;
+ struct list_head graphs; /* double linked list of graphs - one per volfile parse */
+ glusterfs_graph_t *active; /* the latest graph in use */
+ void *master; /* fuse, or libglusterfsclient (however, not protocol/server) */
+ void *mgmt; /* xlator implementing MOPs for centralized logging, volfile server */
+ void *listener; /* listener of the commands from glusterd */
+ unsigned char measure_latency; /* toggle switch for latency measurement */
+ pthread_t sigwaiter;
+ struct mem_pool *stub_mem_pool;
+ unsigned char cleanup_started;
+ int graph_id; /* Incremented per graph, value should
+ indicate how many times the graph has
+ got changed */
+ pid_t mnt_pid; /* pid of the mount agent */
+ int process_mode; /*mode in which process is runninng*/
+ struct syncenv *env; /* The env pointer to the synctasks */
+
+ struct list_head mempool_list; /* used to keep a global list of
+ mempools, used to log details of
+ mempool in statedump */
+ char *statedump_path;
+
+ struct mem_pool *dict_pool;
+ struct mem_pool *dict_pair_pool;
+ struct mem_pool *dict_data_pool;
+
+ glusterfsd_mgmt_event_notify_fn_t notify; /* Used for xlators to make
+ call to fsd-mgmt */
+ gf_log_handle_t log; /* all logging related variables */
+
+ int mem_acct_enable;
+
+ int daemon_pipe[2];
+
+ struct client_disconnect *client_disconnect;
+ struct clienttable *clienttable;
+};
typedef struct _glusterfs_ctx glusterfs_ctx_t;
+glusterfs_ctx_t *glusterfs_ctx_new (void);
+
typedef enum {
- GF_EVENT_PARENT_UP = 1,
- GF_EVENT_POLLIN,
- GF_EVENT_POLLOUT,
- GF_EVENT_POLLERR,
- GF_EVENT_CHILD_UP,
- GF_EVENT_CHILD_DOWN,
- GF_EVENT_CHILD_CONNECTING,
- GF_EVENT_TRANSPORT_CLEANUP,
- GF_EVENT_TRANSPORT_CONNECTED,
- GF_EVENT_VOLFILE_MODIFIED,
+ GF_EVENT_PARENT_UP = 1,
+ GF_EVENT_POLLIN,
+ GF_EVENT_POLLOUT,
+ GF_EVENT_POLLERR,
+ GF_EVENT_CHILD_UP,
+ GF_EVENT_CHILD_DOWN,
+ GF_EVENT_CHILD_CONNECTING,
+ GF_EVENT_CHILD_MODIFIED,
+ GF_EVENT_TRANSPORT_CLEANUP,
+ GF_EVENT_TRANSPORT_CONNECTED,
+ GF_EVENT_VOLFILE_MODIFIED,
+ GF_EVENT_GRAPH_NEW,
+ GF_EVENT_TRANSLATOR_INFO,
+ GF_EVENT_TRANSLATOR_OP,
+ GF_EVENT_AUTH_FAILED,
+ GF_EVENT_VOLUME_DEFRAG,
+ GF_EVENT_PARENT_DOWN,
+ GF_EVENT_VOLUME_BARRIER_OP,
+ GF_EVENT_MAXVAL,
} glusterfs_event_t;
+struct gf_flock {
+ short l_type;
+ short l_whence;
+ off_t l_start;
+ off_t l_len;
+ pid_t l_pid;
+ gf_lkowner_t l_owner;
+};
+
#define GF_MUST_CHECK __attribute__((warn_unused_result))
+/*
+ * Some macros (e.g. ALLOC_OR_GOTO) set variables in function scope, but the
+ * calling function might not only declare the variable to keep the macro happy
+ * and not use it otherwise. In such cases, the following can be used to
+ * suppress the "set but not used" warning that would otherwise occur.
+ */
+#define GF_UNUSED __attribute__((unused))
+
+int glusterfs_graph_prepare (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx);
+int glusterfs_graph_destroy (glusterfs_graph_t *graph);
+int glusterfs_graph_activate (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx);
+glusterfs_graph_t *glusterfs_graph_construct (FILE *fp);
+glusterfs_graph_t *glusterfs_graph_new ();
+int glusterfs_graph_reconfigure (glusterfs_graph_t *oldgraph,
+ glusterfs_graph_t *newgraph);
#endif /* _GLUSTERFS_H */
diff --git a/libglusterfs/src/graph-print.c b/libglusterfs/src/graph-print.c
new file mode 100644
index 000000000..d860d63b3
--- /dev/null
+++ b/libglusterfs/src/graph-print.c
@@ -0,0 +1,200 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/uio.h>
+
+#include "common-utils.h"
+#include "xlator.h"
+#include "graph-utils.h"
+
+
+
+struct gf_printer {
+ ssize_t (*write) (struct gf_printer *gp, char *buf, size_t len);
+ void *priv;
+ int len;
+};
+
+static ssize_t
+gp_write_file (struct gf_printer *gp, char *buf, size_t len)
+{
+ FILE *f = gp->priv;
+
+ if (fwrite (buf, len, 1, f) != 1) {
+ gf_log ("graph-print", GF_LOG_ERROR, "fwrite failed (%s)",
+ strerror (errno));
+
+ return -1;
+ }
+
+ return len;
+}
+
+static ssize_t
+gp_write_buf (struct gf_printer *gp, char *buf, size_t len)
+{
+ struct iovec *iov = gp->priv;
+
+ if (iov->iov_len < len) {
+ gf_log ("graph-print", GF_LOG_ERROR, "buffer full");
+
+ return -1;
+ }
+
+ memcpy (iov->iov_base, buf, len);
+ iov->iov_base += len;
+ iov->iov_len -= len;
+
+ return len;
+}
+
+static int
+gpprintf (struct gf_printer *gp, const char *format, ...)
+{
+ va_list arg;
+ char *str = NULL;
+ int ret = 0;
+
+ va_start (arg, format);
+ ret = gf_vasprintf (&str, format, arg);
+ va_end (arg);
+
+ if (ret < 0)
+ return ret;
+
+ ret = gp->write (gp, str, ret);
+
+ GF_FREE (str);
+
+ return ret;
+}
+
+#define GPPRINTF(gp, fmt, ...) do { \
+ ret = gpprintf (gp, fmt, ## __VA_ARGS__); \
+ if (ret == -1) \
+ goto out; \
+ else \
+ gp->len += ret; \
+ } while (0)
+
+static int
+_print_volume_options (dict_t *d, char *k, data_t *v,
+ void *tmp)
+{
+ struct gf_printer *gp = tmp;
+ int ret = 0;
+ GPPRINTF (gp, " option %s %s\n", k, v->data);
+ return 0;
+out:
+ /* means, it is a failure */
+ return -1;
+}
+
+static int
+glusterfs_graph_print (struct gf_printer *gp, glusterfs_graph_t *graph)
+{
+ xlator_t *trav = NULL;
+ xlator_list_t *xch = NULL;
+ int ret = 0;
+ ssize_t len = 0;
+
+ if (!graph->first)
+ return 0;
+
+ for (trav = graph->first; trav->next; trav = trav->next);
+ for (; trav; trav = trav->prev) {
+ GPPRINTF (gp, "volume %s\n type %s\n", trav->name,
+ trav->type);
+
+ ret = dict_foreach (trav->options, _print_volume_options, gp);
+ if (ret)
+ goto out;
+
+ if (trav->children) {
+ GPPRINTF (gp, " subvolumes");
+
+ for (xch = trav->children; xch; xch = xch->next)
+ GPPRINTF (gp, " %s", xch->xlator->name);
+
+ GPPRINTF (gp, "\n");
+ }
+
+ GPPRINTF (gp, "end-volume\n");
+ if (trav != graph->first)
+ GPPRINTF (gp, "\n");
+ }
+
+out:
+ len = gp->len;
+ if (ret == -1) {
+ gf_log ("graph-print", GF_LOG_ERROR, "printing failed");
+
+ return -1;
+ }
+
+ return len;
+
+#undef GPPRINTF
+}
+
+int
+glusterfs_graph_print_file (FILE *file, glusterfs_graph_t *graph)
+{
+ struct gf_printer gp = { .write = gp_write_file,
+ .priv = file
+ };
+
+ return glusterfs_graph_print (&gp, graph);
+}
+
+char *
+glusterfs_graph_print_buf (glusterfs_graph_t *graph)
+{
+ FILE *f = NULL;
+ struct iovec iov = {0,};
+ int len = 0;
+ char *buf = NULL;
+ struct gf_printer gp = { .write = gp_write_buf,
+ .priv = &iov
+ };
+
+ f = fopen ("/dev/null", "a");
+ if (!f) {
+ gf_log ("graph-print", GF_LOG_ERROR,
+ "cannot open /dev/null (%s)", strerror (errno));
+
+ return NULL;
+ }
+ len = glusterfs_graph_print_file (f, graph);
+ fclose (f);
+ if (len == -1)
+ return NULL;
+
+ buf = GF_CALLOC (1, len + 1, gf_common_mt_graph_buf);
+ if (!buf) {
+ return NULL;
+ }
+ iov.iov_base = buf;
+ iov.iov_len = len;
+
+ len = glusterfs_graph_print (&gp, graph);
+ if (len == -1) {
+ GF_FREE (buf);
+
+ return NULL;
+ }
+
+ return buf;
+}
diff --git a/libglusterfs/src/graph-utils.h b/libglusterfs/src/graph-utils.h
new file mode 100644
index 000000000..207664fdb
--- /dev/null
+++ b/libglusterfs/src/graph-utils.h
@@ -0,0 +1,20 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GRAPH_H_
+#define _GRAPH_H_
+
+int glusterfs_graph_print_file (FILE *file, glusterfs_graph_t *graph);
+
+char *glusterfs_graph_print_buf (glusterfs_graph_t *graph);
+
+int glusterfs_xlator_link (xlator_t *pxl, xlator_t *cxl);
+void glusterfs_graph_set_first (glusterfs_graph_t *graph, xlator_t *xl);
+#endif
diff --git a/libglusterfs/src/graph.c b/libglusterfs/src/graph.c
new file mode 100644
index 000000000..e76df1ca5
--- /dev/null
+++ b/libglusterfs/src/graph.c
@@ -0,0 +1,751 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include <dlfcn.h>
+#include <netdb.h>
+#include <fnmatch.h>
+#include "defaults.h"
+
+
+#if 0
+static void
+_gf_dump_details (int argc, char **argv)
+{
+ extern FILE *gf_log_logfile;
+ int i = 0;
+ char timestr[64];
+ time_t utime = 0;
+ pid_t mypid = 0;
+ struct utsname uname_buf = {{0, }, };
+ int uname_ret = -1;
+
+ mypid = getpid ();
+ uname_ret = uname (&uname_buf);
+
+ utime = time (NULL);
+ gf_time_fmt (timestr, sizeof timestr, utime, gf_timefmt_FT);
+ fprintf (gf_log_logfile,
+ "========================================"
+ "========================================\n");
+ fprintf (gf_log_logfile, "Version : %s %s built on %s %s\n",
+ PACKAGE_NAME, PACKAGE_VERSION, __DATE__, __TIME__);
+ fprintf (gf_log_logfile, "git: %s\n",
+ GLUSTERFS_REPOSITORY_REVISION);
+ fprintf (gf_log_logfile, "Starting Time: %s\n", timestr);
+ fprintf (gf_log_logfile, "Command line : ");
+ for (i = 0; i < argc; i++) {
+ fprintf (gf_log_logfile, "%s ", argv[i]);
+ }
+
+ fprintf (gf_log_logfile, "\nPID : %d\n", mypid);
+
+ if (uname_ret == 0) {
+ fprintf (gf_log_logfile, "System name : %s\n",
+ uname_buf.sysname);
+ fprintf (gf_log_logfile, "Nodename : %s\n",
+ uname_buf.nodename);
+ fprintf (gf_log_logfile, "Kernel Release : %s\n",
+ uname_buf.release);
+ fprintf (gf_log_logfile, "Hardware Identifier: %s\n",
+ uname_buf.machine);
+ }
+
+
+ fprintf (gf_log_logfile, "\n");
+ fflush (gf_log_logfile);
+}
+#endif
+
+
+
+int
+glusterfs_xlator_link (xlator_t *pxl, xlator_t *cxl)
+{
+ xlator_list_t *xlchild = NULL;
+ xlator_list_t *xlparent = NULL;
+ xlator_list_t **tmp = NULL;
+
+ xlparent = (void *) GF_CALLOC (1, sizeof (*xlparent),
+ gf_common_mt_xlator_list_t);
+ if (!xlparent)
+ return -1;
+
+ xlchild = (void *) GF_CALLOC (1, sizeof (*xlchild),
+ gf_common_mt_xlator_list_t);
+ if (!xlchild) {
+ GF_FREE (xlparent);
+
+ return -1;
+ }
+
+ xlparent->xlator = pxl;
+ for (tmp = &cxl->parents; *tmp; tmp = &(*tmp)->next);
+ *tmp = xlparent;
+
+ xlchild->xlator = cxl;
+ for (tmp = &pxl->children; *tmp; tmp = &(*tmp)->next);
+ *tmp = xlchild;
+
+ return 0;
+}
+
+
+void
+glusterfs_graph_set_first (glusterfs_graph_t *graph, xlator_t *xl)
+{
+ xl->next = graph->first;
+ if (graph->first)
+ ((xlator_t *)graph->first)->prev = xl;
+ graph->first = xl;
+
+ graph->xl_count++;
+}
+
+
+int
+glusterfs_graph_insert (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx,
+ const char *type, const char *name,
+ gf_boolean_t autoload)
+{
+ xlator_t *ixl = NULL;
+
+ if (!ctx->master) {
+ gf_log ("glusterfs", GF_LOG_ERROR,
+ "volume \"%s\" can be added from command line only "
+ "on client side", type);
+
+ return -1;
+ }
+
+ ixl = GF_CALLOC (1, sizeof (*ixl), gf_common_mt_xlator_t);
+ if (!ixl)
+ return -1;
+
+ ixl->ctx = ctx;
+ ixl->graph = graph;
+ ixl->options = get_new_dict ();
+ if (!ixl->options)
+ goto err;
+
+ ixl->name = gf_strdup (name);
+ if (!ixl->name)
+ goto err;
+
+ ixl->is_autoloaded = autoload;
+
+ if (xlator_set_type (ixl, type) == -1) {
+ gf_log ("glusterfs", GF_LOG_ERROR,
+ "%s (%s) initialization failed",
+ name, type);
+ return -1;
+ }
+
+ if (glusterfs_xlator_link (ixl, graph->top) == -1)
+ goto err;
+ glusterfs_graph_set_first (graph, ixl);
+ graph->top = ixl;
+
+ return 0;
+err:
+ xlator_destroy (ixl);
+ return -1;
+}
+
+int
+glusterfs_graph_acl (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx)
+{
+ int ret = 0;
+ cmd_args_t *cmd_args = NULL;
+
+ cmd_args = &ctx->cmd_args;
+
+ if (!cmd_args->acl)
+ return 0;
+
+ ret = glusterfs_graph_insert (graph, ctx, "system/posix-acl",
+ "posix-acl-autoload", 1);
+ return ret;
+}
+
+int
+glusterfs_graph_worm (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx)
+{
+ int ret = 0;
+ cmd_args_t *cmd_args = NULL;
+
+ cmd_args = &ctx->cmd_args;
+
+ if (!cmd_args->worm)
+ return 0;
+
+ ret = glusterfs_graph_insert (graph, ctx, "features/worm",
+ "worm-autoload", 1);
+ return ret;
+}
+
+int
+glusterfs_graph_mac_compat (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx)
+{
+ int ret = 0;
+ cmd_args_t *cmd_args = NULL;
+
+ cmd_args = &ctx->cmd_args;
+
+ if (cmd_args->mac_compat == GF_OPTION_DISABLE)
+ return 0;
+
+ ret = glusterfs_graph_insert (graph, ctx, "features/mac-compat",
+ "mac-compat-autoload", 1);
+
+ return ret;
+}
+
+int
+glusterfs_graph_gfid_access (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx)
+{
+ int ret = 0;
+ cmd_args_t *cmd_args = NULL;
+
+ cmd_args = &ctx->cmd_args;
+
+ if (!cmd_args->aux_gfid_mount)
+ return 0;
+
+ ret = glusterfs_graph_insert (graph, ctx, "features/gfid-access",
+ "gfid-access-autoload", 1);
+ return ret;
+}
+
+static void
+gf_add_cmdline_options (glusterfs_graph_t *graph, cmd_args_t *cmd_args)
+{
+ int ret = 0;
+ xlator_t *trav = NULL;
+ xlator_cmdline_option_t *cmd_option = NULL;
+
+ trav = graph->first;
+
+ while (trav) {
+ list_for_each_entry (cmd_option,
+ &cmd_args->xlator_options, cmd_args) {
+ if (!fnmatch (cmd_option->volume,
+ trav->name, FNM_NOESCAPE)) {
+ ret = dict_set_str (trav->options,
+ cmd_option->key,
+ cmd_option->value);
+ if (ret == 0) {
+ gf_log (trav->name, GF_LOG_INFO,
+ "adding option '%s' for "
+ "volume '%s' with value '%s'",
+ cmd_option->key, trav->name,
+ cmd_option->value);
+ } else {
+ gf_log (trav->name, GF_LOG_WARNING,
+ "adding option '%s' for "
+ "volume '%s' failed: %s",
+ cmd_option->key, trav->name,
+ strerror (-ret));
+ }
+ }
+ }
+ trav = trav->next;
+ }
+}
+
+
+int
+glusterfs_graph_validate_options (glusterfs_graph_t *graph)
+{
+ xlator_t *trav = NULL;
+ int ret = -1;
+ char *errstr = NULL;
+
+ trav = graph->first;
+
+ while (trav) {
+ if (list_empty (&trav->volume_options))
+ continue;
+
+ ret = xlator_options_validate (trav, trav->options, &errstr);
+ if (ret) {
+ gf_log (trav->name, GF_LOG_ERROR,
+ "validation failed: %s", errstr);
+ return ret;
+ }
+ trav = trav->next;
+ }
+
+ return 0;
+}
+
+
+int
+glusterfs_graph_init (glusterfs_graph_t *graph)
+{
+ xlator_t *trav = NULL;
+ int ret = -1;
+
+ trav = graph->first;
+
+ while (trav) {
+ ret = xlator_init (trav);
+ if (ret) {
+ gf_log (trav->name, GF_LOG_ERROR,
+ "initializing translator failed");
+ return ret;
+ }
+ trav = trav->next;
+ }
+
+ return 0;
+}
+
+
+static int
+_log_if_unknown_option (dict_t *dict, char *key, data_t *value, void *data)
+{
+ volume_option_t *found = NULL;
+ xlator_t *xl = NULL;
+
+ xl = data;
+
+ found = xlator_volume_option_get (xl, key);
+
+ if (!found) {
+ gf_log (xl->name, GF_LOG_WARNING,
+ "option '%s' is not recognized", key);
+ }
+
+ return 0;
+}
+
+
+static void
+_xlator_check_unknown_options (xlator_t *xl, void *data)
+{
+ dict_foreach (xl->options, _log_if_unknown_option, xl);
+}
+
+
+int
+glusterfs_graph_unknown_options (glusterfs_graph_t *graph)
+{
+ xlator_foreach (graph->first, _xlator_check_unknown_options, NULL);
+ return 0;
+}
+
+
+void
+fill_uuid (char *uuid, int size)
+{
+ char hostname[256] = {0,};
+ struct timeval tv = {0,};
+ char now_str[64];
+
+ if (gettimeofday (&tv, NULL) == -1) {
+ gf_log ("graph", GF_LOG_ERROR,
+ "gettimeofday: failed %s",
+ strerror (errno));
+ }
+
+ if (gethostname (hostname, 256) == -1) {
+ gf_log ("graph", GF_LOG_ERROR,
+ "gethostname: failed %s",
+ strerror (errno));
+ }
+
+ gf_time_fmt (now_str, sizeof now_str, tv.tv_sec, gf_timefmt_Ymd_T);
+ snprintf (uuid, size, "%s-%d-%s:%"GF_PRI_SUSECONDS,
+ hostname, getpid(), now_str, tv.tv_usec);
+
+ return;
+}
+
+
+int
+glusterfs_graph_settop (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx)
+{
+ const char *volume_name = NULL;
+ xlator_t *trav = NULL;
+
+ volume_name = ctx->cmd_args.volume_name;
+
+ if (!volume_name) {
+ graph->top = graph->first;
+ return 0;
+ }
+
+ for (trav = graph->first; trav; trav = trav->next) {
+ if (strcmp (trav->name, volume_name) == 0) {
+ graph->top = trav;
+ return 0;
+ }
+ }
+
+ return -1;
+}
+
+
+int
+glusterfs_graph_parent_up (glusterfs_graph_t *graph)
+{
+ xlator_t *trav = NULL;
+ int ret = -1;
+
+ trav = graph->first;
+
+ while (trav) {
+ if (!xlator_has_parent (trav)) {
+ ret = xlator_notify (trav, GF_EVENT_PARENT_UP, trav);
+ }
+
+ if (ret)
+ break;
+
+ trav = trav->next;
+ }
+
+ return ret;
+}
+
+
+int
+glusterfs_graph_prepare (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx)
+{
+ xlator_t *trav = NULL;
+ int ret = 0;
+
+ /* XXX: CHECKSUM */
+
+ /* XXX: attach to -n volname */
+ ret = glusterfs_graph_settop (graph, ctx);
+ if (ret) {
+ gf_log ("graph", GF_LOG_ERROR, "glusterfs graph settop failed");
+ return -1;
+ }
+
+ /* XXX: WORM VOLUME */
+ ret = glusterfs_graph_worm (graph, ctx);
+ if (ret) {
+ gf_log ("graph", GF_LOG_ERROR, "glusterfs graph worm failed");
+ return -1;
+ }
+ ret = glusterfs_graph_acl (graph, ctx);
+ if (ret) {
+ gf_log ("graph", GF_LOG_ERROR, "glusterfs graph ACL failed");
+ return -1;
+ }
+
+ /* XXX: MAC COMPAT */
+ ret = glusterfs_graph_mac_compat (graph, ctx);
+ if (ret) {
+ gf_log ("graph", GF_LOG_ERROR, "glusterfs graph mac compat failed");
+ return -1;
+ }
+
+ /* XXX: gfid-access */
+ ret = glusterfs_graph_gfid_access (graph, ctx);
+ if (ret) {
+ gf_log ("graph", GF_LOG_ERROR,
+ "glusterfs graph 'gfid-access' failed");
+ return -1;
+ }
+
+ /* XXX: this->ctx setting */
+ for (trav = graph->first; trav; trav = trav->next) {
+ trav->ctx = ctx;
+ }
+
+ /* XXX: DOB setting */
+ gettimeofday (&graph->dob, NULL);
+
+ fill_uuid (graph->graph_uuid, 128);
+
+ graph->id = ctx->graph_id++;
+
+ /* XXX: --xlator-option additions */
+ gf_add_cmdline_options (graph, &ctx->cmd_args);
+
+
+ return 0;
+}
+
+
+int
+glusterfs_graph_activate (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx)
+{
+ int ret = 0;
+
+ /* XXX: all xlator options validation */
+ ret = glusterfs_graph_validate_options (graph);
+ if (ret) {
+ gf_log ("graph", GF_LOG_ERROR, "validate options failed");
+ return ret;
+ }
+
+ /* XXX: perform init () */
+ ret = glusterfs_graph_init (graph);
+ if (ret) {
+ gf_log ("graph", GF_LOG_ERROR, "init failed");
+ return ret;
+ }
+
+ ret = glusterfs_graph_unknown_options (graph);
+ if (ret) {
+ gf_log ("graph", GF_LOG_ERROR, "unknown options failed");
+ return ret;
+ }
+
+ /* XXX: log full graph (_gf_dump_details) */
+
+ list_add (&graph->list, &ctx->graphs);
+ ctx->active = graph;
+
+ /* XXX: attach to master and set active pointer */
+ if (ctx->master) {
+ ret = xlator_notify (ctx->master, GF_EVENT_GRAPH_NEW, graph);
+ if (ret) {
+ gf_log ("graph", GF_LOG_ERROR,
+ "graph new notification failed");
+ return ret;
+ }
+ ((xlator_t *)ctx->master)->next = graph->top;
+ }
+
+ /* XXX: perform parent up */
+ ret = glusterfs_graph_parent_up (graph);
+ if (ret) {
+ gf_log ("graph", GF_LOG_ERROR, "parent up notification failed");
+ return ret;
+ }
+
+ return 0;
+}
+
+
+int
+xlator_equal_rec (xlator_t *xl1, xlator_t *xl2)
+{
+ xlator_list_t *trav1 = NULL;
+ xlator_list_t *trav2 = NULL;
+ int ret = 0;
+
+ if (xl1 == NULL || xl2 == NULL) {
+ gf_log ("xlator", GF_LOG_DEBUG, "invalid argument");
+ return -1;
+ }
+
+ trav1 = xl1->children;
+ trav2 = xl2->children;
+
+ while (trav1 && trav2) {
+ ret = xlator_equal_rec (trav1->xlator, trav2->xlator);
+ if (ret) {
+ gf_log ("glusterfsd-mgmt", GF_LOG_DEBUG,
+ "xlators children not equal");
+ goto out;
+ }
+
+ trav1 = trav1->next;
+ trav2 = trav2->next;
+ }
+
+ if (trav1 || trav2) {
+ ret = -1;
+ goto out;
+ }
+
+ if (strcmp (xl1->name, xl2->name)) {
+ ret = -1;
+ goto out;
+ }
+
+ /* type could have changed even if xlator names match,
+ e.g cluster/distrubte and cluster/nufa share the same
+ xlator name
+ */
+ if (strcmp (xl1->type, xl2->type)) {
+ ret = -1;
+ goto out;
+ }
+out :
+ return ret;
+}
+
+
+gf_boolean_t
+is_graph_topology_equal (glusterfs_graph_t *graph1, glusterfs_graph_t *graph2)
+{
+ xlator_t *trav1 = NULL;
+ xlator_t *trav2 = NULL;
+ gf_boolean_t ret = _gf_true;
+
+ trav1 = graph1->first;
+ trav2 = graph2->first;
+
+ ret = xlator_equal_rec (trav1, trav2);
+
+ if (ret) {
+ gf_log ("glusterfsd-mgmt", GF_LOG_DEBUG,
+ "graphs are not equal");
+ ret = _gf_false;
+ goto out;
+ }
+
+ ret = _gf_true;
+ gf_log ("glusterfsd-mgmt", GF_LOG_DEBUG,
+ "graphs are equal");
+
+out:
+ return ret;
+}
+
+
+/* Function has 3types of return value 0, -ve , 1
+ * return 0 =======> reconfiguration of options has succeeded
+ * return 1 =======> the graph has to be reconstructed and all the xlators should be inited
+ * return -1(or -ve) =======> Some Internal Error occurred during the operation
+ */
+int
+glusterfs_volfile_reconfigure (int oldvollen, FILE *newvolfile_fp,
+ glusterfs_ctx_t *ctx, const char *oldvolfile)
+{
+ glusterfs_graph_t *oldvolfile_graph = NULL;
+ glusterfs_graph_t *newvolfile_graph = NULL;
+ FILE *oldvolfile_fp = NULL;
+ gf_boolean_t active_graph_found = _gf_true;
+
+ int ret = -1;
+
+ if (!oldvollen) {
+ ret = 1; // Has to call INIT for the whole graph
+ goto out;
+ }
+
+ if (!ctx) {
+ gf_log ("glusterfsd-mgmt", GF_LOG_ERROR,
+ "ctx is NULL");
+ goto out;
+ }
+
+ oldvolfile_graph = ctx->active;
+ if (!oldvolfile_graph) {
+ active_graph_found = _gf_false;
+ gf_log ("glusterfsd-mgmt", GF_LOG_ERROR,
+ "glusterfs_ctx->active is NULL");
+
+ oldvolfile_fp = tmpfile ();
+ if (!oldvolfile_fp) {
+ gf_log ("glusterfsd-mgmt", GF_LOG_ERROR, "Unable to "
+ "create temporary volfile: (%s)",
+ strerror (errno));
+ goto out;
+ }
+
+ fwrite (oldvolfile, oldvollen, 1, oldvolfile_fp);
+ fflush (oldvolfile_fp);
+ if (ferror (oldvolfile_fp)) {
+ goto out;
+ }
+
+ oldvolfile_graph = glusterfs_graph_construct (oldvolfile_fp);
+ if (!oldvolfile_graph)
+ goto out;
+ }
+
+ newvolfile_graph = glusterfs_graph_construct (newvolfile_fp);
+ if (!newvolfile_graph) {
+ goto out;
+ }
+
+ if (!is_graph_topology_equal (oldvolfile_graph,
+ newvolfile_graph)) {
+
+ ret = 1;
+ gf_log ("glusterfsd-mgmt", GF_LOG_DEBUG,
+ "Graph topology not equal(should call INIT)");
+ goto out;
+ }
+
+ gf_log ("glusterfsd-mgmt", GF_LOG_DEBUG,
+ "Only options have changed in the new "
+ "graph");
+
+ /* */
+ ret = glusterfs_graph_reconfigure (oldvolfile_graph,
+ newvolfile_graph);
+ if (ret) {
+ gf_log ("glusterfsd-mgmt", GF_LOG_DEBUG,
+ "Could not reconfigure new options in old graph");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (oldvolfile_fp)
+ fclose (oldvolfile_fp);
+
+ /* Do not simply destroy the old graph here. If the oldgraph
+ is constructed here in this function itself instead of getting
+ it from ctx->active (which happens only of ctx->active is NULL),
+ then destroy the old graph. If some i/o is still happening in
+ the old graph and the old graph is obtained from ctx->active,
+ then destroying the graph will cause problems.
+ */
+ if (!active_graph_found && oldvolfile_graph)
+ glusterfs_graph_destroy (oldvolfile_graph);
+ if (newvolfile_graph)
+ glusterfs_graph_destroy (newvolfile_graph);
+
+ return ret;
+}
+
+
+int
+glusterfs_graph_reconfigure (glusterfs_graph_t *oldgraph,
+ glusterfs_graph_t *newgraph)
+{
+ xlator_t *old_xl = NULL;
+ xlator_t *new_xl = NULL;
+
+ GF_ASSERT (oldgraph);
+ GF_ASSERT (newgraph);
+
+ old_xl = oldgraph->first;
+ while (old_xl->is_autoloaded) {
+ old_xl = old_xl->children->xlator;
+ }
+
+ new_xl = newgraph->first;
+ while (new_xl->is_autoloaded) {
+ new_xl = new_xl->children->xlator;
+ }
+
+ return xlator_tree_reconfigure (old_xl, new_xl);
+}
+
+int
+glusterfs_graph_destroy (glusterfs_graph_t *graph)
+{
+ xlator_tree_free (graph->first);
+
+ if (graph) {
+ list_del_init (&graph->list);
+ GF_FREE (graph);
+ }
+
+ return 0;
+}
diff --git a/libglusterfs/src/graph.l b/libglusterfs/src/graph.l
new file mode 100644
index 000000000..e4eba9cbe
--- /dev/null
+++ b/libglusterfs/src/graph.l
@@ -0,0 +1,79 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+%x STRING
+%option yylineno
+%option noinput
+%{
+
+#define YYSTYPE char *
+#include "xlator.h"
+#include "y.tab.h"
+#include <string.h>
+#define START_STRSIZE 32
+
+static char *text;
+static int text_asize;
+static int text_size;
+
+void append_string(const char *str, int size)
+{
+ int new_size = text_size + size + 1;
+ if (new_size > text_asize) {
+ new_size += START_STRSIZE - 1;
+ new_size &= -START_STRSIZE;
+ if (!text) {
+ text = GF_CALLOC (1, new_size,
+ gf_common_mt_char);
+ } else {
+ text = GF_REALLOC (text, new_size);
+ }
+ if (!text) {
+ gf_log ("parser", GF_LOG_ERROR,
+ "out of memory");
+ return;
+ }
+ text_asize = new_size;
+ }
+ memcpy(text + text_size, str, size);
+ text_size += size;
+ text[text_size] = 0;
+}
+
+%}
+
+VOLUME [v][o][l][u][m][e]
+END [e][n][d]
+SUB [s][u][b]
+OPTION [o][p][t][i][o][n]
+TYPE [t][y][p][e]
+%%
+\#.* ;
+{VOLUME} return VOLUME_BEGIN;
+{TYPE} return TYPE;
+{END}[-]{VOLUME} return VOLUME_END;
+{SUB}{VOLUME}[Ss] return SUBVOLUME;
+{OPTION} return OPTION;
+\" BEGIN(STRING);
+<STRING>{
+ [^\n\"\\]* { append_string (yytext, yyleng); }
+ \\. { append_string (yytext + 1, yyleng - 1); }
+ \" {
+ if (0) {
+ yyunput (0, NULL);
+ }
+ BEGIN (INITIAL);
+ graphyylval = text;
+ return STRING_TOK;
+ }
+}
+[^ \t\r\n\"\\]+ { graphyylval = gf_strdup (yytext) ; return ID; }
+[ \t\r\n]+ ;
+%%
diff --git a/libglusterfs/src/graph.y b/libglusterfs/src/graph.y
new file mode 100644
index 000000000..a220abeb9
--- /dev/null
+++ b/libglusterfs/src/graph.y
@@ -0,0 +1,620 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+%token VOLUME_BEGIN VOLUME_END OPTION NEWLINE SUBVOLUME ID WHITESPACE COMMENT TYPE STRING_TOK
+
+%{
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <pthread.h>
+
+#define RELAX_POISONING
+
+#include "xlator.h"
+#include "graph-utils.h"
+#include "logging.h"
+
+static int new_volume (char *name);
+static int volume_type (char *type);
+static int volume_option (char *key, char *value);
+static int volume_sub (char *sub);
+static int volume_end (void);
+static void sub_error (void);
+static void type_error (void);
+static void option_error (void);
+
+#define YYSTYPE char *
+#define GF_CMD_BUFFER_LEN (8 * GF_UNIT_KB)
+
+int graphyyerror (const char *);
+int graphyylex ();
+%}
+
+
+%%
+VOLUMES: VOLUME | VOLUMES VOLUME;
+
+VOLUME: VOLUME_HEADER VOLUME_DATA VOLUME_FOOTER;
+VOLUME_HEADER: VOLUME_BEGIN WORD {if (new_volume ($2) == -1) { YYABORT; }};
+VOLUME_FOOTER: VOLUME_END {if (volume_end () == -1) { YYABORT; }};
+
+VOLUME_DATA: TYPE_LINE OPTIONS_LINE SUBVOLUME_LINE OPTIONS_LINE |
+ TYPE_LINE SUBVOLUME_LINE OPTIONS_LINE |
+ TYPE_LINE OPTIONS_LINE SUBVOLUME_LINE |
+ TYPE_LINE SUBVOLUME_LINE |
+ TYPE_LINE OPTIONS_LINE |
+ OPTIONS_LINE SUBVOLUME_LINE OPTIONS_LINE | /* error case */
+ OPTIONS_LINE; /* error case */
+
+TYPE_LINE: TYPE WORD {if (volume_type ($2) == -1) { YYABORT; }} | TYPE { type_error(); YYABORT; };
+
+SUBVOLUME_LINE: SUBVOLUME WORDS | SUBVOLUME { sub_error (); YYABORT; };
+
+OPTIONS_LINE: OPTION_LINE | OPTIONS_LINE OPTION_LINE;
+
+OPTION_LINE: OPTION WORD WORD {if (volume_option ($2, $3) == -1) { YYABORT; }} |
+ OPTION WORD { option_error (); YYABORT; } |
+ OPTION { option_error (); YYABORT; };
+
+WORDS: WORD {if (volume_sub ($1) == -1) {YYABORT; }} | WORDS WORD { if (volume_sub ($2) == -1) { YYABORT; }};
+WORD: ID | STRING_TOK ;
+%%
+
+xlator_t *curr;
+glusterfs_graph_t *construct;
+
+
+static void
+type_error (void)
+{
+ extern int graphyylineno;
+
+ gf_log ("parser", GF_LOG_ERROR,
+ "Volume %s, before line %d: Please specify volume type",
+ curr->name, graphyylineno);
+ return;
+}
+
+
+static void
+sub_error (void)
+{
+ extern int graphyylineno;
+
+ gf_log ("parser", GF_LOG_ERROR,
+ "Volume %s, before line %d: Please specify subvolumes",
+ curr->name, graphyylineno);
+ return;
+}
+
+
+static void
+option_error (void)
+{
+ extern int graphyylineno;
+
+ gf_log ("parser", GF_LOG_ERROR,
+ "Volume %s, before line %d: Please specify "
+ "option <key> <value>",
+ curr->name, graphyylineno);
+ return;
+}
+
+
+static int
+new_volume (char *name)
+{
+ extern int graphyylineno;
+ xlator_t *trav = NULL;
+ int ret = 0;
+
+ if (!name) {
+ gf_log ("parser", GF_LOG_DEBUG,
+ "Invalid argument name: '%s'", name);
+ ret = -1;
+ goto out;
+ }
+
+ if (curr) {
+ gf_log ("parser", GF_LOG_ERROR,
+ "new volume (%s) defintion in line %d unexpected",
+ name, graphyylineno);
+ ret = -1;
+ goto out;
+ }
+
+ curr = (void *) GF_CALLOC (1, sizeof (*curr),
+ gf_common_mt_xlator_t);
+
+ if (!curr) {
+ gf_log ("parser", GF_LOG_ERROR, "Out of memory");
+ ret = -1;
+ goto out;
+ }
+
+ trav = construct->first;
+
+ while (trav) {
+ if (!strcmp (name, trav->name)) {
+ gf_log ("parser", GF_LOG_ERROR,
+ "Line %d: volume '%s' defined again",
+ graphyylineno, name);
+ ret = -1;
+ goto out;
+ }
+ trav = trav->next;
+ }
+
+ curr->name = gf_strdup (name);
+ if (!curr->name) {
+ GF_FREE (curr);
+ ret = -1;
+ goto out;
+ }
+
+ curr->options = get_new_dict ();
+
+ if (!curr->options) {
+ GF_FREE (curr->name);
+ GF_FREE (curr);
+ ret = -1;
+ goto out;
+ }
+
+ curr->next = construct->first;
+ if (curr->next)
+ curr->next->prev = curr;
+
+ curr->graph = construct;
+
+ construct->first = curr;
+
+ construct->xl_count++;
+
+ gf_log ("parser", GF_LOG_TRACE, "New node for '%s'", name);
+
+out:
+ GF_FREE (name);
+
+ return ret;
+}
+
+
+static int
+volume_type (char *type)
+{
+ extern int graphyylineno;
+ int32_t ret = 0;
+
+ if (!type) {
+ gf_log ("parser", GF_LOG_DEBUG, "Invalid argument type");
+ ret = -1;
+ goto out;
+ }
+
+ ret = xlator_set_type (curr, type);
+ if (ret) {
+ gf_log ("parser", GF_LOG_ERROR,
+ "Volume '%s', line %d: type '%s' is not valid or "
+ "not found on this machine",
+ curr->name, graphyylineno, type);
+ ret = -1;
+ goto out;
+ }
+
+ gf_log ("parser", GF_LOG_TRACE, "Type:%s:%s", curr->name, type);
+
+out:
+ GF_FREE (type);
+
+ return 0;
+}
+
+
+static int
+volume_option (char *key, char *value)
+{
+ extern int graphyylineno;
+ int ret = 0;
+ char *set_value = NULL;
+
+ if (!key || !value){
+ gf_log ("parser", GF_LOG_ERROR, "Invalid argument");
+ ret = -1;
+ goto out;
+ }
+
+ set_value = gf_strdup (value);
+ ret = dict_set_dynstr (curr->options, key, set_value);
+
+ if (ret == 1) {
+ gf_log ("parser", GF_LOG_ERROR,
+ "Volume '%s', line %d: duplicate entry "
+ "('option %s') present",
+ curr->name, graphyylineno, key);
+ ret = -1;
+ goto out;
+ }
+
+ gf_log ("parser", GF_LOG_TRACE, "Option:%s:%s:%s",
+ curr->name, key, value);
+
+out:
+ GF_FREE (key);
+ GF_FREE (value);
+
+ return 0;
+}
+
+
+static int
+volume_sub (char *sub)
+{
+ extern int graphyylineno;
+ xlator_t *trav = NULL;
+ int ret = 0;
+
+ if (!sub) {
+ gf_log ("parser", GF_LOG_ERROR, "Invalid subvolumes argument");
+ ret = -1;
+ goto out;
+ }
+
+ trav = construct->first;
+
+ while (trav) {
+ if (!strcmp (sub, trav->name))
+ break;
+ trav = trav->next;
+ }
+
+ if (!trav) {
+ gf_log ("parser", GF_LOG_ERROR,
+ "Volume '%s', line %d: subvolume '%s' is not defined "
+ "prior to usage",
+ curr->name, graphyylineno, sub);
+ ret = -1;
+ goto out;
+ }
+
+ if (trav == curr) {
+ gf_log ("parser", GF_LOG_ERROR,
+ "Volume '%s', line %d: has '%s' itself as subvolume",
+ curr->name, graphyylineno, sub);
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterfs_xlator_link (curr, trav);
+ if (ret) {
+ gf_log ("parser", GF_LOG_ERROR, "Out of memory");
+ ret = -1;
+ goto out;
+ }
+
+ gf_log ("parser", GF_LOG_TRACE, "child:%s->%s", curr->name, sub);
+
+out:
+ GF_FREE (sub);
+
+ return 0;
+}
+
+
+static int
+volume_end (void)
+{
+ if (!curr->fops) {
+ gf_log ("parser", GF_LOG_ERROR,
+ "\"type\" not specified for volume %s", curr->name);
+ return -1;
+ }
+ gf_log ("parser", GF_LOG_TRACE, "end:%s", curr->name);
+
+ curr = NULL;
+ return 0;
+}
+
+
+int
+graphyywrap ()
+{
+ return 1;
+}
+
+
+int
+graphyyerror (const char *str)
+{
+ extern char *graphyytext;
+ extern int graphyylineno;
+
+ if (curr && curr->name && graphyytext) {
+ if (!strcmp (graphyytext, "volume")) {
+ gf_log ("parser", GF_LOG_ERROR,
+ "'end-volume' not defined for volume '%s'",
+ curr->name);
+ } else if (!strcmp (graphyytext, "type")) {
+ gf_log ("parser", GF_LOG_ERROR,
+ "line %d: duplicate 'type' defined for "
+ "volume '%s'",
+ graphyylineno, curr->name);
+ } else if (!strcmp (graphyytext, "subvolumes")) {
+ gf_log ("parser", GF_LOG_ERROR,
+ "line %d: duplicate 'subvolumes' defined for "
+ "volume '%s'",
+ graphyylineno, curr->name);
+ } else if (curr) {
+ gf_log ("parser", GF_LOG_ERROR,
+ "syntax error: line %d (volume '%s'): \"%s\""
+ "\nallowed tokens are 'volume', 'type', "
+ "'subvolumes', 'option', 'end-volume'()",
+ graphyylineno, curr->name,
+ graphyytext);
+ } else {
+ gf_log ("parser", GF_LOG_ERROR,
+ "syntax error: line %d (just after volume "
+ "'%s'): \"%s\"\n(%s)",
+ graphyylineno, curr->name,
+ graphyytext,
+ "allowed tokens are 'volume', 'type', "
+ "'subvolumes', 'option', 'end-volume'");
+ }
+ } else {
+ gf_log ("parser", GF_LOG_ERROR,
+ "syntax error in line %d: \"%s\" \n"
+ "(allowed tokens are 'volume', 'type', "
+ "'subvolumes', 'option', 'end-volume')\n",
+ graphyylineno, graphyytext);
+ }
+
+ return -1;
+}
+
+
+static int
+execute_cmd (char *cmd, char **result, size_t size)
+{
+ FILE *fpp = NULL;
+ int i = 0;
+ int status = 0;
+ int character = 0;
+ char *buf = *result;
+
+ fpp = popen (cmd, "r");
+ if (!fpp) {
+ gf_log ("parser", GF_LOG_ERROR, "%s: failed to popen", cmd);
+ return -1;
+ }
+
+ while ((character = fgetc (fpp)) != EOF) {
+ if (i == size) {
+ size *= 2;
+ buf = *result = GF_REALLOC (*result, size);
+ }
+
+ buf[i++] = character;
+ }
+
+ if (i > 0) {
+ i--;
+ buf[i] = '\0';
+ }
+
+ status = pclose (fpp);
+ if (status == -1 || !WIFEXITED (status) ||
+ ((WEXITSTATUS (status)) != 0)) {
+ i = -1;
+ buf[0] = '\0';
+ }
+
+ return i;
+}
+
+
+static int
+preprocess (FILE *srcfp, FILE *dstfp)
+{
+ int ret = 0;
+ int i = 0;
+ char *cmd = NULL;
+ char *result = NULL;
+ size_t cmd_buf_size = GF_CMD_BUFFER_LEN;
+ char escaped = 0;
+ char in_backtick = 0;
+ int line = 1;
+ int column = 0;
+ int character = 0;
+
+
+ fseek (srcfp, 0L, SEEK_SET);
+ fseek (dstfp, 0L, SEEK_SET);
+
+ cmd = GF_CALLOC (cmd_buf_size, 1,
+ gf_common_mt_char);
+ if (cmd == NULL) {
+ gf_log ("parser", GF_LOG_ERROR, "Out of memory");
+ return -1;
+ }
+
+ result = GF_CALLOC (cmd_buf_size * 2, 1,
+ gf_common_mt_char);
+ if (result == NULL) {
+ GF_FREE (cmd);
+ gf_log ("parser", GF_LOG_ERROR, "Out of memory");
+ return -1;
+ }
+
+ while ((character = fgetc (srcfp)) != EOF) {
+ if ((character == '`') && !escaped) {
+ if (in_backtick) {
+ cmd[i] = '\0';
+ result[0] = '\0';
+
+ ret = execute_cmd (cmd, &result,
+ 2 * cmd_buf_size);
+ if (ret < 0) {
+ ret = -1;
+ goto out;
+ }
+ fwrite (result, ret, 1, dstfp);
+ } else {
+ i = 0;
+ cmd[i] = '\0';
+ }
+
+ in_backtick = !in_backtick;
+ } else {
+ if (in_backtick) {
+ if (i == cmd_buf_size) {
+ cmd_buf_size *= 2;
+ cmd = GF_REALLOC (cmd, cmd_buf_size);
+ if (cmd == NULL) {
+ GF_FREE (result);
+ return -1;
+ }
+
+ result = GF_REALLOC (result,
+ 2 * cmd_buf_size);
+ if (result == NULL) {
+ GF_FREE (cmd);
+ return -1;
+ }
+ }
+
+ cmd[i++] = character;
+ } else {
+ fputc (character, dstfp);
+ }
+ }
+
+ if (character == '\\') {
+ escaped = !escaped;
+ } else {
+ escaped = 0;
+ }
+
+ if (character == '\n') {
+ line++;
+ column = 0;
+ } else {
+ column++;
+ }
+ }
+
+ if (in_backtick) {
+ gf_log ("parser", GF_LOG_ERROR,
+ "Unterminated backtick in volume specfication file at line (%d), column (%d).",
+ line, column);
+ ret = -1;
+ }
+
+out:
+ fseek (srcfp, 0L, SEEK_SET);
+ fseek (dstfp, 0L, SEEK_SET);
+
+ GF_FREE (cmd);
+ GF_FREE (result);
+
+ return ret;
+}
+
+
+extern FILE *graphyyin;
+
+glusterfs_graph_t *
+glusterfs_graph_new ()
+{
+ glusterfs_graph_t *graph = NULL;
+
+ graph = GF_CALLOC (1, sizeof (*graph),
+ gf_common_mt_glusterfs_graph_t);
+ if (!graph)
+ return NULL;
+
+ INIT_LIST_HEAD (&graph->list);
+
+ gettimeofday (&graph->dob, NULL);
+
+ return graph;
+}
+
+
+glusterfs_graph_t *
+glusterfs_graph_construct (FILE *fp)
+{
+ int ret = 0;
+ int tmp_fd = -1;
+ glusterfs_graph_t *graph = NULL;
+ FILE *tmp_file = NULL;
+ char template[PATH_MAX] = {0};
+ static pthread_mutex_t graph_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+ graph = glusterfs_graph_new ();
+ if (!graph)
+ goto err;
+
+ strcpy (template, "/tmp/tmp.XXXXXX");
+ tmp_fd = mkstemp (template);
+ if (-1 == tmp_fd)
+ goto err;
+
+ ret = unlink (template);
+ if (ret < 0) {
+ gf_log ("parser", GF_LOG_WARNING, "Unable to delete file: %s",
+ template);
+ }
+
+ tmp_file = fdopen (tmp_fd, "w+b");
+ if (!tmp_file)
+ goto err;
+
+ ret = preprocess (fp, tmp_file);
+ if (ret < 0) {
+ gf_log ("parser", GF_LOG_ERROR, "parsing of backticks failed");
+ goto err;
+ }
+
+ pthread_mutex_lock (&graph_mutex);
+ {
+ graphyyin = tmp_file;
+ construct = graph;
+ ret = yyparse ();
+ construct = NULL;
+ }
+ pthread_mutex_unlock (&graph_mutex);
+
+ if (ret == 1) {
+ gf_log ("parser", GF_LOG_DEBUG,
+ "parsing of volfile failed, please review it "
+ "once more");
+ goto err;
+ }
+
+ fclose (tmp_file);
+ return graph;
+err:
+ if (tmp_file) {
+ fclose (tmp_file);
+ } else {
+ gf_log ("parser", GF_LOG_ERROR, "cannot create temporary file");
+ if (-1 != tmp_fd)
+ close (tmp_fd);
+ }
+
+ glusterfs_graph_destroy (graph);
+ return NULL;
+}
+
diff --git a/libglusterfs/src/hashfn.c b/libglusterfs/src/hashfn.c
index cf0e72a9f..f79165b22 100644
--- a/libglusterfs/src/hashfn.c
+++ b/libglusterfs/src/hashfn.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include <stdint.h>
@@ -31,7 +22,7 @@
#define DM_DELTA 0x9E3779B9
#define DM_FULLROUNDS 10 /* 32 is overkill, 16 is strong crypto */
-#define DM_PARTROUNDS 6 /* 6 gets complete mixing */
+#define DM_PARTROUNDS 6 /* 6 gets complete mixing */
uint32_t
@@ -52,48 +43,48 @@ ReallySimpleHash (char *path, int len)
/* In any case make sure, you return 1 */
uint32_t SuperFastHash (const char * data, int32_t len) {
- uint32_t hash = len, tmp;
- int32_t rem;
-
- if (len <= 1 || data == NULL) return 1;
-
- rem = len & 3;
- len >>= 2;
-
- /* Main loop */
- for (;len > 0; len--) {
- hash += get16bits (data);
- tmp = (get16bits (data+2) << 11) ^ hash;
- hash = (hash << 16) ^ tmp;
- data += 2*sizeof (uint16_t);
- hash += hash >> 11;
- }
-
- /* Handle end cases */
- switch (rem) {
- case 3: hash += get16bits (data);
- hash ^= hash << 16;
- hash ^= data[sizeof (uint16_t)] << 18;
- hash += hash >> 11;
- break;
- case 2: hash += get16bits (data);
- hash ^= hash << 11;
- hash += hash >> 17;
- break;
- case 1: hash += *data;
- hash ^= hash << 10;
- hash += hash >> 1;
- }
-
- /* Force "avalanching" of final 127 bits */
- hash ^= hash << 3;
- hash += hash >> 5;
- hash ^= hash << 4;
- hash += hash >> 17;
- hash ^= hash << 25;
- hash += hash >> 6;
-
- return hash;
+ uint32_t hash = len, tmp;
+ int32_t rem;
+
+ if (len <= 1 || data == NULL) return 1;
+
+ rem = len & 3;
+ len >>= 2;
+
+ /* Main loop */
+ for (;len > 0; len--) {
+ hash += get16bits (data);
+ tmp = (get16bits (data+2) << 11) ^ hash;
+ hash = (hash << 16) ^ tmp;
+ data += 2*sizeof (uint16_t);
+ hash += hash >> 11;
+ }
+
+ /* Handle end cases */
+ switch (rem) {
+ case 3: hash += get16bits (data);
+ hash ^= hash << 16;
+ hash ^= data[sizeof (uint16_t)] << 18;
+ hash += hash >> 11;
+ break;
+ case 2: hash += get16bits (data);
+ hash ^= hash << 11;
+ hash += hash >> 17;
+ break;
+ case 1: hash += *data;
+ hash ^= hash << 10;
+ hash += hash >> 1;
+ }
+
+ /* Force "avalanching" of final 127 bits */
+ hash ^= hash << 3;
+ hash += hash >> 5;
+ hash ^= hash << 4;
+ hash += hash >> 17;
+ hash ^= hash << 25;
+ hash += hash >> 6;
+
+ return hash;
}
@@ -102,95 +93,95 @@ uint32_t SuperFastHash (const char * data, int32_t len) {
static int
dm_round (int rounds, uint32_t *array, uint32_t *h0, uint32_t *h1)
{
- uint32_t sum = 0;
- int n = 0;
- uint32_t b0 = 0;
- uint32_t b1 = 0;
-
- b0 = *h0;
- b1 = *h1;
-
- n = rounds;
-
- do {
- sum += DM_DELTA;
- b0 += ((b1 << 4) + array[0])
- ^ (b1 + sum)
- ^ ((b1 >> 5) + array[1]);
- b1 += ((b0 << 4) + array[2])
- ^ (b0 + sum)
- ^ ((b0 >> 5) + array[3]);
- } while (--n);
-
- *h0 += b0;
- *h1 += b1;
-
- return 0;
+ uint32_t sum = 0;
+ int n = 0;
+ uint32_t b0 = 0;
+ uint32_t b1 = 0;
+
+ b0 = *h0;
+ b1 = *h1;
+
+ n = rounds;
+
+ do {
+ sum += DM_DELTA;
+ b0 += ((b1 << 4) + array[0])
+ ^ (b1 + sum)
+ ^ ((b1 >> 5) + array[1]);
+ b1 += ((b0 << 4) + array[2])
+ ^ (b0 + sum)
+ ^ ((b0 >> 5) + array[3]);
+ } while (--n);
+
+ *h0 += b0;
+ *h1 += b1;
+
+ return 0;
}
uint32_t
__pad (int len)
{
- uint32_t pad = 0;
+ uint32_t pad = 0;
- pad = (uint32_t) len | ((uint32_t) len << 8);
- pad |= pad << 16;
+ pad = (uint32_t) len | ((uint32_t) len << 8);
+ pad |= pad << 16;
- return pad;
+ return pad;
}
uint32_t
gf_dm_hashfn (const char *msg, int len)
{
- uint32_t h0 = 0x9464a485;
- uint32_t h1 = 0x542e1a94;
- uint32_t array[4];
- uint32_t pad = 0;
- int i = 0;
- int j = 0;
- int full_quads = 0;
- int full_words = 0;
- int full_bytes = 0;
- uint32_t *intmsg = NULL;
- int word = 0;
-
-
- intmsg = (uint32_t *) msg;
- pad = __pad (len);
-
- full_bytes = len;
- full_words = len / 4;
- full_quads = len / 16;
-
- for (i = 0; i < full_quads; i++) {
- for (j = 0; j < 4; j++) {
- word = *intmsg;
- array[j] = word;
- intmsg++;
- full_words--;
- full_bytes -= 4;
- }
- dm_round (DM_PARTROUNDS, &array[0], &h0, &h1);
- }
-
- for (j = 0; j < 4; j++) {
- if (full_words) {
- word = *intmsg;
- array[j] = word;
- intmsg++;
- full_words--;
- full_bytes -= 4;
- } else {
- array[j] = pad;
- while (full_bytes) {
- array[j] <<= 8;
- array[j] |= msg[len - full_bytes];
- full_bytes--;
- }
- }
- }
- dm_round (DM_FULLROUNDS, &array[0], &h0, &h1);
-
- return h0 ^ h1;
+ uint32_t h0 = 0x9464a485;
+ uint32_t h1 = 0x542e1a94;
+ uint32_t array[4];
+ uint32_t pad = 0;
+ int i = 0;
+ int j = 0;
+ int full_quads = 0;
+ int full_words = 0;
+ int full_bytes = 0;
+ uint32_t *intmsg = NULL;
+ int word = 0;
+
+
+ intmsg = (uint32_t *) msg;
+ pad = __pad (len);
+
+ full_bytes = len;
+ full_words = len / 4;
+ full_quads = len / 16;
+
+ for (i = 0; i < full_quads; i++) {
+ for (j = 0; j < 4; j++) {
+ word = *intmsg;
+ array[j] = word;
+ intmsg++;
+ full_words--;
+ full_bytes -= 4;
+ }
+ dm_round (DM_PARTROUNDS, &array[0], &h0, &h1);
+ }
+
+ for (j = 0; j < 4; j++) {
+ if (full_words) {
+ word = *intmsg;
+ array[j] = word;
+ intmsg++;
+ full_words--;
+ full_bytes -= 4;
+ } else {
+ array[j] = pad;
+ while (full_bytes) {
+ array[j] <<= 8;
+ array[j] |= msg[len - full_bytes];
+ full_bytes--;
+ }
+ }
+ }
+ dm_round (DM_FULLROUNDS, &array[0], &h0, &h1);
+
+ return h0 ^ h1;
}
diff --git a/libglusterfs/src/hashfn.h b/libglusterfs/src/hashfn.h
index b8adcc6cc..06ae37e79 100644
--- a/libglusterfs/src/hashfn.h
+++ b/libglusterfs/src/hashfn.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __HASHFN_H__
@@ -32,6 +23,5 @@ uint32_t SuperFastHash (const char * data, int32_t len);
uint32_t gf_dm_hashfn (const char *msg, int len);
-uint32_t
-ReallySimpleHash (char *path, int len);
+uint32_t ReallySimpleHash (char *path, int len);
#endif /* __HASHFN_H__ */
diff --git a/libglusterfs/src/iatt.h b/libglusterfs/src/iatt.h
new file mode 100644
index 000000000..60ae59047
--- /dev/null
+++ b/libglusterfs/src/iatt.h
@@ -0,0 +1,331 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef _IATT_H
+#define _IATT_H
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h> /* for iatt <--> stat conversions */
+#include <unistd.h>
+
+#include "compat.h"
+#include "uuid.h"
+
+typedef enum {
+ IA_INVAL = 0,
+ IA_IFREG,
+ IA_IFDIR,
+ IA_IFLNK,
+ IA_IFBLK,
+ IA_IFCHR,
+ IA_IFIFO,
+ IA_IFSOCK
+} ia_type_t;
+
+
+typedef struct {
+ uint8_t suid:1;
+ uint8_t sgid:1;
+ uint8_t sticky:1;
+ struct {
+ uint8_t read:1;
+ uint8_t write:1;
+ uint8_t exec:1;
+ } owner, group, other;
+} ia_prot_t;
+
+
+struct iatt {
+ uint64_t ia_ino; /* inode number */
+ uuid_t ia_gfid;
+ uint64_t ia_dev; /* backing device ID */
+ ia_type_t ia_type; /* type of file */
+ ia_prot_t ia_prot; /* protection */
+ uint32_t ia_nlink; /* Link count */
+ uint32_t ia_uid; /* user ID of owner */
+ uint32_t ia_gid; /* group ID of owner */
+ uint64_t ia_rdev; /* device ID (if special file) */
+ uint64_t ia_size; /* file size in bytes */
+ uint32_t ia_blksize; /* blocksize for filesystem I/O */
+ uint64_t ia_blocks; /* number of 512B blocks allocated */
+ uint32_t ia_atime; /* last access time */
+ uint32_t ia_atime_nsec;
+ uint32_t ia_mtime; /* last modification time */
+ uint32_t ia_mtime_nsec;
+ uint32_t ia_ctime; /* last status change time */
+ uint32_t ia_ctime_nsec;
+};
+
+
+#define IA_ISREG(t) (t == IA_IFREG)
+#define IA_ISDIR(t) (t == IA_IFDIR)
+#define IA_ISLNK(t) (t == IA_IFLNK)
+#define IA_ISBLK(t) (t == IA_IFBLK)
+#define IA_ISCHR(t) (t == IA_IFCHR)
+#define IA_ISFIFO(t) (t == IA_IFIFO)
+#define IA_ISSOCK(t) (t == IA_IFSOCK)
+
+#define IA_PROT_RUSR(prot) ((prot).owner.read == 1)
+#define IA_PROT_WUSR(prot) ((prot).owner.write == 1)
+#define IA_PROT_XUSR(prot) ((prot).owner.exec == 1)
+
+#define IA_PROT_RGRP(prot) ((prot).group.read == 1)
+#define IA_PROT_WGRP(prot) ((prot).group.write == 1)
+#define IA_PROT_XGRP(prot) ((prot).group.exec == 1)
+
+#define IA_PROT_ROTH(prot) ((prot).other.read == 1)
+#define IA_PROT_WOTH(prot) ((prot).other.write == 1)
+#define IA_PROT_XOTH(prot) ((prot).other.exec == 1)
+
+#define IA_PROT_SUID(prot) ((prot).suid == 1)
+#define IA_PROT_SGID(prot) ((prot).sgid == 1)
+#define IA_PROT_STCKY(prot) ((prot).sticky == 1)
+
+#define IA_FILE_OR_DIR(t) (IA_ISREG(t) || IA_ISDIR(t))
+
+static inline uint32_t
+ia_major (uint64_t ia_dev)
+{
+ return (uint32_t) (ia_dev >> 32);
+}
+
+
+static inline uint32_t
+ia_minor (uint64_t ia_dev)
+{
+ return (uint32_t) (ia_dev & 0xffffffff);
+}
+
+
+static inline uint64_t
+ia_makedev (uint32_t ia_maj, uint32_t ia_min)
+{
+ return ((((uint64_t) ia_maj) << 32) | ia_min);
+}
+
+
+static inline ia_prot_t
+ia_prot_from_st_mode (mode_t mode)
+{
+ ia_prot_t ia_prot = {0, };
+
+ if (mode & S_ISUID)
+ ia_prot.suid = 1;
+ if (mode & S_ISGID)
+ ia_prot.sgid = 1;
+ if (mode & S_ISVTX)
+ ia_prot.sticky = 1;
+
+ if (mode & S_IRUSR)
+ ia_prot.owner.read = 1;
+ if (mode & S_IWUSR)
+ ia_prot.owner.write = 1;
+ if (mode & S_IXUSR)
+ ia_prot.owner.exec = 1;
+
+ if (mode & S_IRGRP)
+ ia_prot.group.read = 1;
+ if (mode & S_IWGRP)
+ ia_prot.group.write = 1;
+ if (mode & S_IXGRP)
+ ia_prot.group.exec = 1;
+
+ if (mode & S_IROTH)
+ ia_prot.other.read = 1;
+ if (mode & S_IWOTH)
+ ia_prot.other.write = 1;
+ if (mode & S_IXOTH)
+ ia_prot.other.exec = 1;
+
+ return ia_prot;
+}
+
+
+static inline ia_type_t
+ia_type_from_st_mode (mode_t mode)
+{
+ ia_type_t type = IA_INVAL;
+
+ if (S_ISREG (mode))
+ type = IA_IFREG;
+ if (S_ISDIR (mode))
+ type = IA_IFDIR;
+ if (S_ISLNK (mode))
+ type = IA_IFLNK;
+ if (S_ISBLK (mode))
+ type = IA_IFBLK;
+ if (S_ISCHR (mode))
+ type = IA_IFCHR;
+ if (S_ISFIFO (mode))
+ type = IA_IFIFO;
+ if (S_ISSOCK (mode))
+ type = IA_IFSOCK;
+
+ return type;
+}
+
+
+static inline mode_t
+st_mode_from_ia (ia_prot_t prot, ia_type_t type)
+{
+ mode_t st_mode = 0;
+ uint32_t type_bit = 0;
+ uint32_t prot_bit = 0;
+
+ switch (type) {
+ case IA_IFREG:
+ type_bit = S_IFREG;
+ break;
+ case IA_IFDIR:
+ type_bit = S_IFDIR;
+ break;
+ case IA_IFLNK:
+ type_bit = S_IFLNK;
+ break;
+ case IA_IFBLK:
+ type_bit = S_IFBLK;
+ break;
+ case IA_IFCHR:
+ type_bit = S_IFCHR;
+ break;
+ case IA_IFIFO:
+ type_bit = S_IFIFO;
+ break;
+ case IA_IFSOCK:
+ type_bit = S_IFSOCK;
+ break;
+ case IA_INVAL:
+ break;
+ }
+
+ if (prot.suid)
+ prot_bit |= S_ISUID;
+ if (prot.sgid)
+ prot_bit |= S_ISGID;
+ if (prot.sticky)
+ prot_bit |= S_ISVTX;
+
+ if (prot.owner.read)
+ prot_bit |= S_IRUSR;
+ if (prot.owner.write)
+ prot_bit |= S_IWUSR;
+ if (prot.owner.exec)
+ prot_bit |= S_IXUSR;
+
+ if (prot.group.read)
+ prot_bit |= S_IRGRP;
+ if (prot.group.write)
+ prot_bit |= S_IWGRP;
+ if (prot.group.exec)
+ prot_bit |= S_IXGRP;
+
+ if (prot.other.read)
+ prot_bit |= S_IROTH;
+ if (prot.other.write)
+ prot_bit |= S_IWOTH;
+ if (prot.other.exec)
+ prot_bit |= S_IXOTH;
+
+ st_mode = (type_bit | prot_bit);
+
+ return st_mode;
+}
+
+
+static inline int
+iatt_from_stat (struct iatt *iatt, struct stat *stat)
+{
+ iatt->ia_dev = stat->st_dev;
+ iatt->ia_ino = stat->st_ino;
+
+ iatt->ia_type = ia_type_from_st_mode (stat->st_mode);
+ iatt->ia_prot = ia_prot_from_st_mode (stat->st_mode);
+
+ iatt->ia_nlink = stat->st_nlink;
+ iatt->ia_uid = stat->st_uid;
+ iatt->ia_gid = stat->st_gid;
+
+ iatt->ia_rdev = ia_makedev (major (stat->st_rdev),
+ minor (stat->st_rdev));
+
+ iatt->ia_size = stat->st_size;
+ iatt->ia_blksize = stat->st_blksize;
+ iatt->ia_blocks = stat->st_blocks;
+
+ /* There is a possibility that the backend FS (like XFS) can
+ allocate blocks beyond EOF for better performance reasons, which
+ results in 'st_blocks' with higher values than what is consumed by
+ the file descriptor. This would break few logic inside GlusterFS,
+ like quota behavior etc, thus we need the exact number of blocks
+ which are consumed by the file to the higher layers inside GlusterFS.
+ Currently, this logic won't work for sparse files (ie, file with
+ holes)
+ */
+ {
+ uint64_t maxblocks;
+
+ maxblocks = (iatt->ia_size + 511) / 512;
+
+ if (iatt->ia_blocks > maxblocks)
+ iatt->ia_blocks = maxblocks;
+ }
+
+ iatt->ia_atime = stat->st_atime;
+ iatt->ia_atime_nsec = ST_ATIM_NSEC (stat);
+
+ iatt->ia_mtime = stat->st_mtime;
+ iatt->ia_mtime_nsec = ST_MTIM_NSEC (stat);
+
+ iatt->ia_ctime = stat->st_ctime;
+ iatt->ia_ctime_nsec = ST_CTIM_NSEC (stat);
+
+ return 0;
+}
+
+
+static inline int
+iatt_to_stat (struct iatt *iatt, struct stat *stat)
+{
+ stat->st_dev = iatt->ia_dev;
+ stat->st_ino = iatt->ia_ino;
+
+ stat->st_mode = st_mode_from_ia (iatt->ia_prot, iatt->ia_type);
+
+ stat->st_nlink = iatt->ia_nlink;
+ stat->st_uid = iatt->ia_uid;
+ stat->st_gid = iatt->ia_gid;
+
+ stat->st_rdev = makedev (ia_major (iatt->ia_rdev),
+ ia_minor (iatt->ia_rdev));
+
+ stat->st_size = iatt->ia_size;
+ stat->st_blksize = iatt->ia_blksize;
+ stat->st_blocks = iatt->ia_blocks;
+
+ stat->st_atime = iatt->ia_atime;
+ ST_ATIM_NSEC_SET (stat, iatt->ia_atime_nsec);
+
+ stat->st_mtime = iatt->ia_mtime;
+ ST_MTIM_NSEC_SET (stat, iatt->ia_mtime_nsec);
+
+ stat->st_ctime = iatt->ia_ctime;
+ ST_CTIM_NSEC_SET (stat, iatt->ia_ctime_nsec);
+
+ return 0;
+}
+
+
+#endif /* _IATT_H */
diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c
index ff65d101e..15e0ccf78 100644
--- a/libglusterfs/src/inode.c
+++ b/libglusterfs/src/inode.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
@@ -23,6 +14,7 @@
#endif
#include "inode.h"
+#include "fd.h"
#include "common-utils.h"
#include "statedump.h"
#include <pthread.h>
@@ -32,21 +24,21 @@
#include <time.h>
#include <assert.h>
-/* TODO:
+/* TODO:
move latest accessed dentry to list_head of inode
*/
-#define INODE_DUMP_LIST(head, key_buf, key_prefix, list_type) \
-{ \
- int i = 1;\
- inode_t *inode = NULL;\
- list_for_each_entry (inode, head, list) {\
- gf_proc_dump_build_key(key_buf, key_prefix, "%s.%d",list_type,\
- i++);\
- gf_proc_dump_add_section(key_buf);\
- inode_dump(inode, key);\
- }\
-}
+#define INODE_DUMP_LIST(head, key_buf, key_prefix, list_type) \
+ { \
+ int i = 1; \
+ inode_t *inode = NULL; \
+ list_for_each_entry (inode, head, list) { \
+ gf_proc_dump_build_key(key_buf, key_prefix, \
+ "%s.%d",list_type, i++); \
+ gf_proc_dump_add_section(key_buf); \
+ inode_dump(inode, key); \
+ } \
+ }
static inode_t *
__inode_unref (inode_t *inode);
@@ -54,10 +46,11 @@ __inode_unref (inode_t *inode);
static int
inode_table_prune (inode_table_t *table);
+void
+fd_dump (struct list_head *head, char *prefix);
+
static int
-hash_name (ino_t par,
- const char *name,
- int mod)
+hash_dentry (inode_t *parent, const char *name, int mod)
{
int hash = 0;
int ret = 0;
@@ -68,21 +61,20 @@ hash_name (ino_t par,
hash = (hash << 5) - hash + *name;
}
}
- ret = (hash + par) % mod;
+ ret = (hash + (unsigned long)parent) % mod;
return ret;
}
static int
-hash_inode (ino_t ino,
- int mod)
+hash_gfid (uuid_t uuid, int mod)
{
- int hash = 0;
+ int ret = 0;
- hash = ino % mod;
+ ret = uuid[15] + (uuid[14] << 8);
- return hash;
+ return ret;
}
@@ -92,21 +84,28 @@ __dentry_hash (dentry_t *dentry)
inode_table_t *table = NULL;
int hash = 0;
+ if (!dentry) {
+ gf_log_callingfn (THIS->name, GF_LOG_WARNING, "dentry not found");
+ return;
+ }
+
table = dentry->inode->table;
- hash = hash_name (dentry->parent->ino, dentry->name,
- table->hashsize);
+ hash = hash_dentry (dentry->parent, dentry->name,
+ table->hashsize);
list_del_init (&dentry->hash);
list_add (&dentry->hash, &table->name_hash[hash]);
-
- list_del_init (&dentry->parent_list);
- list_add (&dentry->parent_list, &dentry->parent->child_list);
}
static int
__is_dentry_hashed (dentry_t *dentry)
{
+ if (!dentry) {
+ gf_log_callingfn (THIS->name, GF_LOG_WARNING, "dentry not found");
+ return 0;
+ }
+
return !list_empty (&dentry->hash);
}
@@ -114,6 +113,11 @@ __is_dentry_hashed (dentry_t *dentry)
static void
__dentry_unhash (dentry_t *dentry)
{
+ if (!dentry) {
+ gf_log_callingfn (THIS->name, GF_LOG_WARNING, "dentry not found");
+ return;
+ }
+
list_del_init (&dentry->hash);
}
@@ -121,120 +125,165 @@ __dentry_unhash (dentry_t *dentry)
static void
__dentry_unset (dentry_t *dentry)
{
+ if (!dentry) {
+ gf_log_callingfn (THIS->name, GF_LOG_WARNING, "dentry not found");
+ return;
+ }
+
__dentry_unhash (dentry);
list_del_init (&dentry->inode_list);
- if (dentry->name)
- FREE (dentry->name);
+ GF_FREE (dentry->name);
if (dentry->parent) {
- list_del_init (&dentry->parent_list);
__inode_unref (dentry->parent);
dentry->parent = NULL;
}
- FREE (dentry);
+ mem_put (dentry);
}
-static void
-__inode_unhash (inode_t *inode)
+static int
+__foreach_ancestor_dentry (dentry_t *dentry,
+ int (per_dentry_fn) (dentry_t *dentry,
+ void *data),
+ void *data)
{
- list_del_init (&inode->hash);
-}
+ inode_t *parent = NULL;
+ dentry_t *each = NULL;
+ int ret = 0;
+ if (!dentry) {
+ gf_log_callingfn (THIS->name, GF_LOG_WARNING, "dentry not found");
+ return 0;
+ }
-static int
-__is_inode_hashed (inode_t *inode)
-{
- return !list_empty (&inode->hash);
+ ret = per_dentry_fn (dentry, data);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_WARNING, "per dentry fn returned %d", ret);
+ goto out;
+ }
+
+ parent = dentry->parent;
+ if (!parent) {
+ gf_log (THIS->name, GF_LOG_WARNING, "parent not found");
+ goto out;
+ }
+
+ list_for_each_entry (each, &parent->dentry_list, inode_list) {
+ ret = __foreach_ancestor_dentry (each, per_dentry_fn, data);
+ if (ret)
+ goto out;
+ }
+out:
+ return ret;
}
-static void
-__inode_hash (inode_t *inode)
+static int
+__check_cycle (dentry_t *a_dentry, void *data)
{
- inode_table_t *table = NULL;
- int hash = 0;
+ inode_t *link_inode = NULL;
- table = inode->table;
- hash = hash_inode (inode->ino, table->hashsize);
+ link_inode = data;
- list_del_init (&inode->hash);
- list_add (&inode->hash, &table->inode_hash[hash]);
+ if (a_dentry->parent == link_inode)
+ return 1;
+
+ return 0;
}
-static inode_t *
-__inode_search (inode_table_t *table,
- ino_t ino)
+static int
+__is_dentry_cyclic (dentry_t *dentry)
{
- int hash = 0;
+ int ret = 0;
inode_t *inode = NULL;
- inode_t *tmp = NULL;
+ char *name = "<nul>";
- hash = hash_inode (ino, table->hashsize);
+ ret = __foreach_ancestor_dentry (dentry, __check_cycle,
+ dentry->inode);
+ if (ret) {
+ inode = dentry->inode;
- list_for_each_entry (tmp, &table->inode_hash[hash], hash) {
- if (tmp->ino == ino) {
- inode = tmp;
- break;
- }
+ if (dentry->name)
+ name = dentry->name;
+
+ gf_log (dentry->inode->table->name, GF_LOG_CRITICAL,
+ "detected cyclic loop formation during inode linkage."
+ " inode (%s) linking under itself as %s",
+ uuid_utoa (inode->gfid), name);
}
- return inode;
+ return ret;
}
-static dentry_t *
-__dentry_search_for_inode (inode_t *inode,
- ino_t par,
- const char *name)
+static void
+__inode_unhash (inode_t *inode)
{
- dentry_t *dentry = NULL;
- dentry_t *tmp = NULL;
+ if (!inode) {
+ gf_log_callingfn (THIS->name, GF_LOG_WARNING, "inode not found");
+ return;
+ }
- list_for_each_entry (tmp, &inode->dentry_list, inode_list) {
- if (tmp->parent->ino == par && !strcmp (tmp->name, name)) {
- dentry = tmp;
- break;
- }
+ list_del_init (&inode->hash);
+}
+
+
+static int
+__is_inode_hashed (inode_t *inode)
+{
+ if (!inode) {
+ gf_log_callingfn (THIS->name, GF_LOG_WARNING, "inode not found");
+ return 0;
}
- return dentry;
+ return !list_empty (&inode->hash);
}
-dentry_t *
-dentry_search_for_inode (inode_t *inode,
- ino_t par,
- const char *name)
+static void
+__inode_hash (inode_t *inode)
{
- dentry_t *dentry = NULL;
- pthread_mutex_lock (&inode->table->lock);
- {
- dentry = __dentry_search_for_inode (inode, par, name);
+ inode_table_t *table = NULL;
+ int hash = 0;
+
+ if (!inode) {
+ gf_log_callingfn (THIS->name, GF_LOG_WARNING, "inode not found");
+ return;
}
- pthread_mutex_unlock (&inode->table->lock);
-
- return dentry;
+
+ table = inode->table;
+ hash = hash_gfid (inode->gfid, 65536);
+
+ list_del_init (&inode->hash);
+ list_add (&inode->hash, &table->inode_hash[hash]);
}
static dentry_t *
-__dentry_search (inode_table_t *table,
- ino_t par,
- const char *name)
+__dentry_search_for_inode (inode_t *inode, uuid_t pargfid, const char *name)
{
- int hash = 0;
dentry_t *dentry = NULL;
dentry_t *tmp = NULL;
- hash = hash_name (par, name, table->hashsize);
+ if (!inode || !name) {
+ gf_log_callingfn (THIS->name, GF_LOG_WARNING, "inode || name not found");
+ return NULL;
+ }
- list_for_each_entry (tmp, &table->name_hash[hash], hash) {
- if (tmp->parent->ino == par && !strcmp (tmp->name, name)) {
+ /* earlier, just the ino was sent, which could have been 0, now
+ we deal with gfid, and if sent gfid is null or 0, no need to
+ continue with the check */
+ if (!pargfid || uuid_is_null (pargfid))
+ return NULL;
+
+ list_for_each_entry (tmp, &inode->dentry_list, inode_list) {
+ if ((uuid_compare (tmp->parent->gfid, pargfid) == 0) &&
+ !strcmp (tmp->name, name)) {
dentry = tmp;
break;
}
@@ -247,31 +296,45 @@ __dentry_search (inode_table_t *table,
static void
__inode_destroy (inode_t *inode)
{
- int index = 0;
+ int index = 0;
xlator_t *xl = NULL;
+ xlator_t *old_THIS = NULL;
- if (!inode->_ctx)
- goto noctx;
+ if (!inode) {
+ gf_log_callingfn (THIS->name, GF_LOG_WARNING, "inode not found");
+ return;
+ }
+
+ if (!inode->_ctx) {
+ gf_log (THIS->name, GF_LOG_WARNING, "_ctx not found");
+ goto noctx;
+ }
- for (index = 0; index < inode->table->xl->ctx->xl_count; index++) {
- if (inode->_ctx[index].key) {
- xl = (xlator_t *)(long)inode->_ctx[index].key;
- if (xl->cbks->forget)
- xl->cbks->forget (xl, inode);
- }
- }
+ for (index = 0; index < inode->table->ctxcount; index++) {
+ if (inode->_ctx[index].xl_key) {
+ xl = (xlator_t *)(long)inode->_ctx[index].xl_key;
+ old_THIS = THIS;
+ THIS = xl;
+ if (xl->cbks->forget)
+ xl->cbks->forget (xl, inode);
+ THIS = old_THIS;
+ }
+ }
- FREE (inode->_ctx);
+ GF_FREE (inode->_ctx);
noctx:
LOCK_DESTROY (&inode->lock);
// memset (inode, 0xb, sizeof (*inode));
- FREE (inode);
+ mem_put (inode);
}
static void
__inode_activate (inode_t *inode)
{
+ if (!inode)
+ return;
+
list_move (&inode->list, &inode->table->active);
inode->table->active_size++;
}
@@ -282,9 +345,11 @@ __inode_passivate (inode_t *inode)
{
dentry_t *dentry = NULL;
dentry_t *t = NULL;
- inode_table_t *table = NULL;
- table = inode->table;
+ if (!inode) {
+ gf_log_callingfn (THIS->name, GF_LOG_WARNING, "inode not found");
+ return;
+ }
list_move_tail (&inode->list, &inode->table->lru);
inode->table->lru_size++;
@@ -301,15 +366,16 @@ __inode_retire (inode_t *inode)
{
dentry_t *dentry = NULL;
dentry_t *t = NULL;
- inode_table_t *table = NULL;
- table = inode->table;
+ if (!inode) {
+ gf_log_callingfn (THIS->name, GF_LOG_WARNING, "inode not found");
+ return;
+ }
list_move_tail (&inode->list, &inode->table->purge);
inode->table->purge_size++;
__inode_unhash (inode);
- assert (list_empty (&inode->child_list));
list_for_each_entry_safe (dentry, t, &inode->dentry_list, inode_list) {
__dentry_unset (dentry);
@@ -320,17 +386,20 @@ __inode_retire (inode_t *inode)
static inode_t *
__inode_unref (inode_t *inode)
{
- if (inode->ino == 1)
+ if (!inode)
+ return NULL;
+
+ if (__is_root_gfid(inode->gfid))
return inode;
- assert (inode->ref);
+ GF_ASSERT (inode->ref);
--inode->ref;
if (!inode->ref) {
inode->table->active_size--;
- if (inode->nlookup && __is_inode_hashed (inode))
+ if (inode->nlookup)
__inode_passivate (inode);
else
__inode_retire (inode);
@@ -343,6 +412,9 @@ __inode_unref (inode_t *inode)
static inode_t *
__inode_ref (inode_t *inode)
{
+ if (!inode)
+ return NULL;
+
if (!inode->ref) {
inode->table->lru_size--;
__inode_activate (inode);
@@ -358,6 +430,9 @@ inode_unref (inode_t *inode)
{
inode_table_t *table = NULL;
+ if (!inode)
+ return NULL;
+
table = inode->table;
pthread_mutex_lock (&table->lock);
@@ -377,6 +452,9 @@ inode_ref (inode_t *inode)
{
inode_table_t *table = NULL;
+ if (!inode)
+ return NULL;
+
table = inode->table;
pthread_mutex_lock (&table->lock);
@@ -390,25 +468,38 @@ inode_ref (inode_t *inode)
static dentry_t *
-__dentry_create (inode_t *inode,
- inode_t *parent,
- const char *name)
+__dentry_create (inode_t *inode, inode_t *parent, const char *name)
{
dentry_t *newd = NULL;
- newd = (void *) CALLOC (1, sizeof (*newd));
+ if (!inode || !parent || !name) {
+ gf_log_callingfn (THIS->name, GF_LOG_WARNING,
+ "inode || parent || name not found");
+ return NULL;
+ }
+
+ newd = mem_get0 (parent->table->dentry_pool);
+ if (newd == NULL) {
+ goto out;
+ }
INIT_LIST_HEAD (&newd->inode_list);
- INIT_LIST_HEAD (&newd->parent_list);
INIT_LIST_HEAD (&newd->hash);
- list_add (&newd->parent_list, &parent->child_list);
- newd->parent = __inode_ref (parent);
- newd->name = strdup (name);
+ newd->name = gf_strdup (name);
+ if (newd->name == NULL) {
+ mem_put (newd);
+ newd = NULL;
+ goto out;
+ }
+
+ if (parent)
+ newd->parent = __inode_ref (parent);
list_add (&newd->inode_list, &inode->dentry_list);
newd->inode = inode;
+out:
return newd;
}
@@ -418,9 +509,15 @@ __inode_create (inode_table_t *table)
{
inode_t *newi = NULL;
- newi = (void *) CALLOC (1, sizeof (*newi));
- if (!newi)
+ if (!table) {
+ gf_log_callingfn (THIS->name, GF_LOG_WARNING, "table not found");
return NULL;
+ }
+
+ newi = mem_get0 (table->inode_pool);
+ if (!newi) {
+ goto out;
+ }
newi->table = table;
@@ -430,14 +527,21 @@ __inode_create (inode_table_t *table)
INIT_LIST_HEAD (&newi->list);
INIT_LIST_HEAD (&newi->hash);
INIT_LIST_HEAD (&newi->dentry_list);
- INIT_LIST_HEAD (&newi->child_list);
+ newi->_ctx = GF_CALLOC (1,
+ (sizeof (struct _inode_ctx) * table->ctxcount),
+ gf_common_mt_inode_ctx);
+ if (newi->_ctx == NULL) {
+ LOCK_DESTROY (&newi->lock);
+ mem_put (newi);
+ newi = NULL;
+ goto out;
+ }
list_add (&newi->list, &table->lru);
table->lru_size++;
- newi->_ctx = CALLOC (1, (sizeof (struct _inode_ctx) *
- table->xl->ctx->xl_count));
+out:
return newi;
}
@@ -448,10 +552,17 @@ inode_new (inode_table_t *table)
{
inode_t *inode = NULL;
+ if (!table) {
+ gf_log_callingfn (THIS->name, GF_LOG_WARNING, "inode not found");
+ return NULL;
+ }
+
pthread_mutex_lock (&table->lock);
{
inode = __inode_create (table);
- __inode_ref (inode);
+ if (inode != NULL) {
+ __inode_ref (inode);
+ }
}
pthread_mutex_unlock (&table->lock);
@@ -462,6 +573,9 @@ inode_new (inode_table_t *table)
static inode_t *
__inode_lookup (inode_t *inode)
{
+ if (!inode)
+ return NULL;
+
inode->nlookup++;
return inode;
@@ -471,7 +585,10 @@ __inode_lookup (inode_t *inode)
static inode_t *
__inode_forget (inode_t *inode, uint64_t nlookup)
{
- assert (inode->nlookup >= nlookup);
+ if (!inode)
+ return NULL;
+
+ GF_ASSERT (inode->nlookup >= nlookup);
inode->nlookup -= nlookup;
@@ -482,25 +599,48 @@ __inode_forget (inode_t *inode, uint64_t nlookup)
}
-inode_t *
-inode_search (inode_table_t *table,
- ino_t ino,
- const char *name)
+dentry_t *
+__dentry_grep (inode_table_t *table, inode_t *parent, const char *name)
{
- inode_t *inode = NULL;
+ int hash = 0;
dentry_t *dentry = NULL;
+ dentry_t *tmp = NULL;
+
+ if (!table || !name || !parent)
+ return NULL;
+
+ hash = hash_dentry (parent, name, table->hashsize);
+
+ list_for_each_entry (tmp, &table->name_hash[hash], hash) {
+ if (tmp->parent == parent && !strcmp (tmp->name, name)) {
+ dentry = tmp;
+ break;
+ }
+ }
+
+ return dentry;
+}
+
+
+inode_t *
+inode_grep (inode_table_t *table, inode_t *parent, const char *name)
+{
+ inode_t *inode = NULL;
+ dentry_t *dentry = NULL;
+
+ if (!table || !parent || !name) {
+ gf_log_callingfn (THIS->name, GF_LOG_WARNING,
+ "table || parent || name not found");
+ return NULL;
+ }
pthread_mutex_lock (&table->lock);
{
- if (!name) {
- inode = __inode_search (table, ino);
- } else {
- dentry = __dentry_search (table, ino, name);
-
- if (dentry)
- inode = dentry->inode;
- }
-
+ dentry = __dentry_grep (table, parent, name);
+
+ if (dentry)
+ inode = dentry->inode;
+
if (inode)
__inode_ref (inode);
}
@@ -510,138 +650,255 @@ inode_search (inode_table_t *table,
}
-static void
-__copy_dentries (inode_t *oldi, inode_t *newi)
+inode_t *
+inode_resolve (inode_table_t *table, char *path)
{
- dentry_t *dentry = NULL;
- dentry_t *newd = NULL;
- dentry_t *tmp = NULL;
+ char *tmp = NULL, *bname = NULL, *str = NULL, *saveptr = NULL;
+ inode_t *inode = NULL, *parent = NULL;
- list_for_each_entry (dentry, &oldi->dentry_list, inode_list) {
- tmp = __dentry_search_for_inode (newi, dentry->parent->ino,
- dentry->name);
+ if ((path == NULL) || (table == NULL)) {
+ goto out;
+ }
- if (!tmp) {
- newd = __dentry_create (newi, dentry->parent,
- dentry->name);
- } else {
- newd = tmp;
+ parent = inode_ref (table->root);
+ str = tmp = gf_strdup (path);
+
+ while (1) {
+ bname = strtok_r (str, "/", &saveptr);
+ if (bname == NULL) {
+ break;
}
- if (__is_dentry_hashed (dentry)) {
- __dentry_unhash (dentry);
- __dentry_hash (newd);
+ if (inode != NULL) {
+ inode_unref (inode);
}
+
+ inode = inode_grep (table, parent, bname);
+ if (inode == NULL) {
+ break;
+ }
+
+ if (parent != NULL) {
+ inode_unref (parent);
+ }
+
+ parent = inode_ref (inode);
+ str = NULL;
}
+
+ inode_unref (parent);
+ GF_FREE (tmp);
+out:
+ return inode;
}
-static void
-__adopt_children (inode_t *oldi, inode_t *newi)
+int
+inode_grep_for_gfid (inode_table_t *table, inode_t *parent, const char *name,
+ uuid_t gfid, ia_type_t *type)
{
- dentry_t *dentry = NULL;
+ inode_t *inode = NULL;
+ dentry_t *dentry = NULL;
+ int ret = -1;
+
+ if (!table || !parent || !name) {
+ gf_log_callingfn (THIS->name, GF_LOG_WARNING,
+ "table || parent || name not found");
+ return ret;
+ }
- list_for_each_entry (dentry, &oldi->child_list, parent_list) {
- assert (dentry->parent == oldi);
- __inode_unref (dentry->parent);
- dentry->parent = __inode_ref (newi);
+ pthread_mutex_lock (&table->lock);
+ {
+ dentry = __dentry_grep (table, parent, name);
+
+ if (dentry)
+ inode = dentry->inode;
+
+ if (inode) {
+ uuid_copy (gfid, inode->gfid);
+ *type = inode->ia_type;
+ ret = 0;
+ }
}
+ pthread_mutex_unlock (&table->lock);
- list_splice_init (&oldi->child_list, &newi->child_list);
+ return ret;
}
-static void
-__inode_replace (inode_t *oldi, inode_t *newi)
+/* return 1 if gfid is of root, 0 if not */
+gf_boolean_t
+__is_root_gfid (uuid_t gfid)
{
- gf_log (oldi->table->name, GF_LOG_DEBUG,
- "inode(%"PRId64") replaced (%"PRId64"",
- oldi->ino, newi->ino);
+ uuid_t root;
+
+ memset (root, 0, 16);
+ root[15] = 1;
- __copy_dentries (oldi, newi);
- __adopt_children (oldi, newi);
+ if (uuid_compare (gfid, root) == 0)
+ return _gf_true;
- newi->nlookup = oldi->nlookup;
- newi->generation = oldi->generation;
+ return _gf_false;
+}
+
+
+inode_t *
+__inode_find (inode_table_t *table, uuid_t gfid)
+{
+ inode_t *inode = NULL;
+ inode_t *tmp = NULL;
+ int hash = 0;
+
+ if (!table) {
+ gf_log_callingfn (THIS->name, GF_LOG_WARNING, "table not found");
+ goto out;
+ }
+
+ if (__is_root_gfid (gfid))
+ return table->root;
+
+ hash = hash_gfid (gfid, 65536);
+
+ list_for_each_entry (tmp, &table->inode_hash[hash], hash) {
+ if (uuid_compare (tmp->gfid, gfid) == 0) {
+ inode = tmp;
+ break;
+ }
+ }
+
+out:
+ return inode;
+}
+
+
+inode_t *
+inode_find (inode_table_t *table, uuid_t gfid)
+{
+ inode_t *inode = NULL;
- oldi->nlookup = 0;
- oldi->generation = 0;
+ if (!table) {
+ gf_log_callingfn (THIS->name, GF_LOG_WARNING, "table not found");
+ return NULL;
+ }
- __inode_unhash (oldi);
+ pthread_mutex_lock (&table->lock);
+ {
+ inode = __inode_find (table, gfid);
+ if (inode)
+ __inode_ref (inode);
+ }
+ pthread_mutex_unlock (&table->lock);
- if (newi->ino == 1)
- newi->table->root = newi;
+ return inode;
}
static inode_t *
-__inode_link (inode_t *inode,
- inode_t *parent,
- const char *name,
- struct stat *stbuf)
+__inode_link (inode_t *inode, inode_t *parent, const char *name,
+ struct iatt *iatt)
{
dentry_t *dentry = NULL;
dentry_t *old_dentry = NULL;
inode_t *old_inode = NULL;
inode_table_t *table = NULL;
+ inode_t *link_inode = NULL;
+
+ if (!inode)
+ return NULL;
table = inode->table;
+ if (!table)
+ return NULL;
+
+ if (parent) {
+ /* We should prevent inode linking between different
+ inode tables. This can cause errors which is very
+ hard to catch/debug. */
+ if (inode->table != parent->table) {
+ GF_ASSERT (!"link attempted b/w inodes of diff table");
+ }
+ }
+
+ link_inode = inode;
- if (inode->ino)
- assert (inode->ino == stbuf->st_ino);
+ if (!__is_inode_hashed (inode)) {
+ if (!iatt)
+ return NULL;
- inode->ino = stbuf->st_ino;
- inode->st_mode = stbuf->st_mode;
+ if (uuid_is_null (iatt->ia_gfid))
+ return NULL;
- old_inode = __inode_search (table, stbuf->st_ino);
+ old_inode = __inode_find (table, iatt->ia_gfid);
- if (old_inode && old_inode != inode) {
- __inode_ref (old_inode);
- __inode_replace (old_inode, inode);
- __inode_unref (old_inode);
+ if (old_inode) {
+ link_inode = old_inode;
+ } else {
+ uuid_copy (inode->gfid, iatt->ia_gfid);
+ inode->ia_type = iatt->ia_type;
+ __inode_hash (inode);
+ }
}
- __inode_hash (inode);
+ if (name) {
+ if (!strcmp(name, ".") || !strcmp(name, ".."))
+ return link_inode;
+ }
+
+ /* use only link_inode beyond this point */
if (parent) {
- dentry = __dentry_search_for_inode (inode, parent->ino, name);
- if (!dentry) {
- dentry = __dentry_create (inode, parent, name);
- }
+ old_dentry = __dentry_grep (table, parent, name);
+
+ if (!old_dentry || old_dentry->inode != link_inode) {
+ dentry = __dentry_create (link_inode, parent, name);
+ if (!dentry) {
+ gf_log_callingfn (THIS->name, GF_LOG_ERROR,
+ "dentry create failed on "
+ "inode %s with parent %s",
+ uuid_utoa (link_inode->gfid),
+ uuid_utoa (parent->gfid));
+ return NULL;
+ }
+ if (old_inode && __is_dentry_cyclic (dentry)) {
+ __dentry_unset (dentry);
+ return NULL;
+ }
+ __dentry_hash (dentry);
- old_dentry = __dentry_search (table, parent->ino, name);
- if (old_dentry) {
- __dentry_unhash (old_dentry);
+ if (old_dentry)
+ __dentry_unset (old_dentry);
}
+ }
- __dentry_hash (dentry);
- } else if (inode->ino != 1) {
- gf_log (table->name, GF_LOG_DEBUG,
- "child (%"PRId64") without a parent!", inode->ino);
- }
-
- return inode;
+ return link_inode;
}
-int
-inode_link (inode_t *inode,
- inode_t *parent,
- const char *name,
- struct stat *stbuf)
+inode_t *
+inode_link (inode_t *inode, inode_t *parent, const char *name,
+ struct iatt *iatt)
{
inode_table_t *table = NULL;
+ inode_t *linked_inode = NULL;
+
+ if (!inode) {
+ gf_log_callingfn (THIS->name, GF_LOG_WARNING, "inode not found");
+ return NULL;
+ }
table = inode->table;
pthread_mutex_lock (&table->lock);
{
- inode = __inode_link (inode, parent, name, stbuf);
+ linked_inode = __inode_link (inode, parent, name, iatt);
+
+ if (linked_inode)
+ __inode_ref (linked_inode);
}
pthread_mutex_unlock (&table->lock);
inode_table_prune (table);
- return 0;
+ return linked_inode;
}
@@ -649,20 +906,17 @@ int
inode_lookup (inode_t *inode)
{
inode_table_t *table = NULL;
- inode_t *lookup_inode = NULL;
+
+ if (!inode) {
+ gf_log_callingfn (THIS->name, GF_LOG_WARNING, "inode not found");
+ return -1;
+ }
table = inode->table;
- lookup_inode = inode;
pthread_mutex_lock (&table->lock);
{
- if (!__is_inode_hashed (inode)) {
- lookup_inode = __inode_search (table, inode->ino);
- if (lookup_inode == NULL)
- lookup_inode = inode;
- }
-
- __inode_lookup (lookup_inode);
+ __inode_lookup (inode);
}
pthread_mutex_unlock (&table->lock);
@@ -674,20 +928,17 @@ int
inode_forget (inode_t *inode, uint64_t nlookup)
{
inode_table_t *table = NULL;
- inode_t *forget_inode = NULL;
+
+ if (!inode) {
+ gf_log_callingfn (THIS->name, GF_LOG_WARNING, "inode not found");
+ return -1;
+ }
table = inode->table;
- forget_inode = inode;
pthread_mutex_lock (&table->lock);
{
- if (!__is_inode_hashed (inode)) {
- forget_inode = __inode_search (table, inode->ino);
- if (forget_inode == NULL)
- forget_inode = inode;
- }
-
- __inode_forget (forget_inode, nlookup);
+ __inode_forget (inode, nlookup);
}
pthread_mutex_unlock (&table->lock);
@@ -696,42 +947,86 @@ inode_forget (inode_t *inode, uint64_t nlookup)
return 0;
}
+/*
+ * Invalidate an inode. This is invoked when a translator decides that an inode's
+ * cache is no longer valid. Any translator interested in taking action in this
+ * situation can define the invalidate callback.
+ */
+int
+inode_invalidate(inode_t *inode)
+{
+ int ret = 0;
+ xlator_t *xl = NULL;
+ xlator_t *old_THIS = NULL;
+
+ if (!inode) {
+ gf_log_callingfn(THIS->name, GF_LOG_WARNING, "inode not found");
+ return -1;
+ }
+
+ /*
+ * The master xlator is not in the graph but it can define an invalidate
+ * handler.
+ */
+ xl = inode->table->xl->ctx->master;
+ if (xl && xl->cbks->invalidate) {
+ old_THIS = THIS;
+ THIS = xl;
+ ret = xl->cbks->invalidate(xl, inode);
+ THIS = old_THIS;
+ if (ret)
+ return ret;
+ }
+
+ xl = inode->table->xl->graph->first;
+ while (xl) {
+ old_THIS = THIS;
+ THIS = xl;
+ if (xl->cbks->invalidate)
+ ret = xl->cbks->invalidate(xl, inode);
+ THIS = old_THIS;
+
+ if (ret)
+ break;
+
+ xl = xl->next;
+ }
+
+ return ret;
+}
+
static void
-__inode_unlink (inode_t *inode,
- inode_t *parent,
- const char *name)
+__inode_unlink (inode_t *inode, inode_t *parent, const char *name)
{
dentry_t *dentry = NULL;
- dentry = __dentry_search_for_inode (inode, parent->ino, name);
+ if (!inode || !parent || !name)
+ return;
+
+ dentry = __dentry_search_for_inode (inode, parent->gfid, name);
- /* dentry NULL for corrupted backend */
- if (dentry)
- __dentry_unset (dentry);
+ /* dentry NULL for corrupted backend */
+ if (dentry)
+ __dentry_unset (dentry);
}
-
+
void
-inode_unlink (inode_t *inode,
- inode_t *parent,
- const char *name)
+inode_unlink (inode_t *inode, inode_t *parent, const char *name)
{
inode_table_t *table = NULL;
- inode_t *unlink_inode = NULL;
+
+ if (!inode) {
+ gf_log_callingfn (THIS->name, GF_LOG_WARNING, "inode not found");
+ return;
+ }
table = inode->table;
- unlink_inode = inode;
pthread_mutex_lock (&table->lock);
{
- if (!__is_inode_hashed (inode)) {
- unlink_inode = __inode_search (table, inode->ino);
- if (unlink_inode == NULL)
- unlink_inode = inode;
- }
-
- __inode_unlink (unlink_inode, parent, name);
+ __inode_unlink (inode, parent, name);
}
pthread_mutex_unlock (&table->lock);
@@ -740,33 +1035,21 @@ inode_unlink (inode_t *inode,
int
-inode_rename (inode_table_t *table,
- inode_t *srcdir,
- const char *srcname,
- inode_t *dstdir,
- const char *dstname,
- inode_t *inode,
- struct stat *stbuf)
+inode_rename (inode_table_t *table, inode_t *srcdir, const char *srcname,
+ inode_t *dstdir, const char *dstname, inode_t *inode,
+ struct iatt *iatt)
{
- dentry_t *old_dst = NULL;
- inode_t *rename_inode = NULL;
+ if (!inode) {
+ gf_log_callingfn (THIS->name, GF_LOG_WARNING, "inode not found");
+ return -1;
+ }
- rename_inode = inode;
+ table = inode->table;
pthread_mutex_lock (&table->lock);
{
- if (!__is_inode_hashed (inode)) {
- rename_inode = __inode_search (table, inode->ino);
- if (rename_inode == NULL)
- rename_inode = inode;
- }
-
- old_dst = __dentry_search (table, dstdir->ino, dstname);
- if (old_dst)
- __dentry_unset (old_dst);
-
- __inode_unlink (rename_inode, srcdir, srcname);
- __inode_link (rename_inode, dstdir, dstname, stbuf);
+ __inode_link (inode, dstdir, dstname, iatt);
+ __inode_unlink (inode, srcdir, srcname);
}
pthread_mutex_unlock (&table->lock);
@@ -779,46 +1062,57 @@ inode_rename (inode_table_t *table,
static dentry_t *
__dentry_search_arbit (inode_t *inode)
{
- dentry_t *dentry = NULL;
- dentry_t *trav = NULL;
+ dentry_t *dentry = NULL;
+ dentry_t *trav = NULL;
- list_for_each_entry (trav, &inode->dentry_list, inode_list) {
- if (__is_dentry_hashed (trav)) {
- dentry = trav;
- break;
- }
- }
+ if (!inode)
+ return NULL;
- if (!dentry) {
- list_for_each_entry (trav, &inode->dentry_list, inode_list) {
- dentry = trav;
- break;
- }
- }
+ list_for_each_entry (trav, &inode->dentry_list, inode_list) {
+ if (__is_dentry_hashed (trav)) {
+ dentry = trav;
+ break;
+ }
+ }
- return dentry;
+ if (!dentry) {
+ list_for_each_entry (trav, &inode->dentry_list, inode_list) {
+ dentry = trav;
+ break;
+ }
+ }
+
+ return dentry;
}
inode_t *
-inode_parent (inode_t *inode, ino_t par, const char *name)
+inode_parent (inode_t *inode, uuid_t pargfid, const char *name)
{
inode_t *parent = NULL;
inode_table_t *table = NULL;
dentry_t *dentry = NULL;
+ if (!inode) {
+ gf_log_callingfn (THIS->name, GF_LOG_WARNING, "inode not found");
+ return NULL;
+ }
+
table = inode->table;
pthread_mutex_lock (&table->lock);
{
- if (par && name) {
- dentry = __dentry_search_for_inode (inode, par, name);
+ if (pargfid && !uuid_is_null (pargfid) && name) {
+ dentry = __dentry_search_for_inode (inode, pargfid, name);
} else {
dentry = __dentry_search_arbit (inode);
}
if (dentry)
- parent = __inode_ref (dentry->parent);
+ parent = dentry->parent;
+
+ if (parent)
+ __inode_ref (parent);
}
pthread_mutex_unlock (&table->lock);
@@ -826,108 +1120,136 @@ inode_parent (inode_t *inode, ino_t par, const char *name)
}
-int32_t
-inode_path (inode_t *inode,
- const char *name,
- char **bufp)
+int
+__inode_path (inode_t *inode, const char *name, char **bufp)
{
inode_table_t *table = NULL;
- dentry_t *trav = NULL;
- size_t i = 0, size = 0;
- int64_t ret = 0;
- int len = 0;
- char *buf = NULL;
-
+ inode_t *itrav = NULL;
+ dentry_t *trav = NULL;
+ size_t i = 0, size = 0;
+ int64_t ret = 0;
+ int len = 0;
+ char *buf = NULL;
+
+ if (!inode || uuid_is_null (inode->gfid)) {
+ GF_ASSERT (0);
+ gf_log_callingfn (THIS->name, GF_LOG_WARNING, "invalid inode");
+ return -1;
+ }
+
table = inode->table;
- pthread_mutex_lock (&table->lock);
- {
- for (trav = __dentry_search_arbit (inode); trav;
- trav = __dentry_search_arbit (trav->parent)) {
- i ++; /* "/" */
- i += strlen (trav->name);
- if (i > PATH_MAX) {
- gf_log ("inode", GF_LOG_CRITICAL,
- "possible infinite loop detected, "
- "forcing break. name=(%s)", name);
- ret = -ENOENT;
- goto unlock;
- }
+ itrav = inode;
+ for (trav = __dentry_search_arbit (itrav); trav;
+ trav = __dentry_search_arbit (itrav)) {
+ itrav = trav->parent;
+ i ++; /* "/" */
+ i += strlen (trav->name);
+ if (i > PATH_MAX) {
+ gf_log (table->name, GF_LOG_CRITICAL,
+ "possible infinite loop detected, "
+ "forcing break. name=(%s)", name);
+ ret = -ENOENT;
+ goto out;
}
-
- if ((inode->ino != 1) &&
- (i == 0)) {
- gf_log (table->name, GF_LOG_DEBUG,
- "no dentry for non-root inode %"PRId64,
- inode->ino);
- ret = -ENOENT;
- goto unlock;
- }
+ }
+
+ if (!__is_root_gfid (itrav->gfid)) {
+ /* "<gfid:00000000-0000-0000-0000-000000000000>"/path */
+ i += GFID_STR_PFX_LEN;
+ }
+
+ if (name) {
+ i++;
+ i += strlen (name);
+ }
+
+ ret = i;
+ size = i + 1;
+ buf = GF_CALLOC (size, sizeof (char), gf_common_mt_char);
+ if (buf) {
+
+ buf[size - 1] = 0;
if (name) {
- i++;
- i += strlen (name);
+ len = strlen (name);
+ strncpy (buf + (i - len), name, len);
+ buf[i-len-1] = '/';
+ i -= (len + 1);
}
- ret = i;
- size = i + 1;
- buf = CALLOC (size, sizeof (char));
- if (buf) {
+ itrav = inode;
+ for (trav = __dentry_search_arbit (itrav); trav;
+ trav = __dentry_search_arbit (itrav)) {
+ itrav = trav->parent;
+ len = strlen (trav->name);
+ strncpy (buf + (i - len), trav->name, len);
+ buf[i-len-1] = '/';
+ i -= (len + 1);
+ }
- buf[size - 1] = 0;
+ if (!__is_root_gfid (itrav->gfid)) {
+ snprintf (&buf[i-GFID_STR_PFX_LEN], GFID_STR_PFX_LEN,
+ INODE_PATH_FMT, uuid_utoa (itrav->gfid));
+ buf[i-1] = '>';
+ }
- if (name) {
- len = strlen (name);
- strncpy (buf + (i - len), name, len);
- buf[i-len-1] = '/';
- i -= (len + 1);
- }
+ *bufp = buf;
+ } else {
+ ret = -ENOMEM;
+ }
- for (trav = __dentry_search_arbit (inode); trav;
- trav = __dentry_search_arbit (trav->parent)) {
- len = strlen (trav->name);
- strncpy (buf + (i - len), trav->name, len);
- buf[i-len-1] = '/';
- i -= (len + 1);
- }
- *bufp = buf;
+out:
+ if (__is_root_gfid (inode->gfid) && !name) {
+ ret = 1;
+ GF_FREE (buf);
+ buf = GF_CALLOC (ret + 1, sizeof (char), gf_common_mt_char);
+ if (buf) {
+ strcpy (buf, "/");
+ *bufp = buf;
} else {
- gf_log (table->name, GF_LOG_ERROR,
- "out of memory");
- ret = -ENOMEM;
- }
+ ret = -ENOMEM;
+ }
}
-unlock:
- pthread_mutex_unlock (&table->lock);
- if (inode->ino == 1 && !name) {
- ret = 1;
- if (buf) {
- FREE (buf);
- }
- buf = CALLOC (ret + 1, sizeof (char));
- if (buf) {
- strcpy (buf, "/");
- *bufp = buf;
- } else {
- gf_log (table->name, GF_LOG_ERROR,
- "out of memory");
- ret = -ENOMEM;
- }
- }
+ if (ret < 0)
+ *bufp = NULL;
+ return ret;
+}
+
+
+int
+inode_path (inode_t *inode, const char *name, char **bufp)
+{
+ inode_table_t *table = NULL;
+ int ret = -1;
+
+ if (!inode)
+ return -1;
+
+ table = inode->table;
+
+ pthread_mutex_lock (&table->lock);
+ {
+ ret = __inode_path (inode, name, bufp);
+ }
+ pthread_mutex_unlock (&table->lock);
return ret;
}
+
static int
inode_table_prune (inode_table_t *table)
{
int ret = 0;
struct list_head purge = {0, };
- inode_t *del = NULL;
- inode_t *tmp = NULL;
- inode_t *entry = NULL;
+ inode_t *del = NULL;
+ inode_t *tmp = NULL;
+ inode_t *entry = NULL;
+ if (!table)
+ return -1;
INIT_LIST_HEAD (&purge);
@@ -935,7 +1257,7 @@ inode_table_prune (inode_table_t *table)
{
while (table->lru_limit
&& table->lru_size > (table->lru_limit)) {
-
+
entry = list_entry (table->lru.next, inode_t, list);
table->lru_size--;
@@ -945,7 +1267,7 @@ inode_table_prune (inode_table_t *table)
}
list_splice_init (&table->purge, &purge);
- table->purge_size = 0;
+ table->purge_size = 0;
}
pthread_mutex_unlock (&table->lock);
@@ -964,15 +1286,19 @@ inode_table_prune (inode_table_t *table)
static void
__inode_table_init_root (inode_table_t *table)
{
- inode_t *root = NULL;
- struct stat stbuf = {0, };
+ inode_t *root = NULL;
+ struct iatt iatt = {0, };
+
+ if (!table)
+ return;
root = __inode_create (table);
- stbuf.st_ino = 1;
- stbuf.st_mode = S_IFDIR|0755;
+ iatt.ia_gfid[15] = 1;
+ iatt.ia_ino = 1;
+ iatt.ia_type = IA_IFDIR;
- __inode_link (root, NULL, NULL, &stbuf);
+ __inode_link (root, NULL, NULL, &iatt);
table->root = root;
}
@@ -981,40 +1307,59 @@ inode_table_t *
inode_table_new (size_t lru_limit, xlator_t *xl)
{
inode_table_t *new = NULL;
- int ret = 0;
+ int ret = -1;
int i = 0;
- new = (void *)calloc (1, sizeof (*new));
+ new = (void *)GF_CALLOC(1, sizeof (*new), gf_common_mt_inode_table_t);
if (!new)
return NULL;
new->xl = xl;
+ new->ctxcount = xl->graph->xl_count + 1;
new->lru_limit = lru_limit;
new->hashsize = 14057; /* TODO: Random Number?? */
- new->inode_hash = (void *)calloc (new->hashsize,
- sizeof (struct list_head));
- if (!new->inode_hash) {
- FREE (new);
- return NULL;
- }
+ /* In case FUSE is initing the inode table. */
+ if (lru_limit == 0)
+ lru_limit = DEFAULT_INODE_MEMPOOL_ENTRIES;
- new->name_hash = (void *)calloc (new->hashsize,
- sizeof (struct list_head));
- if (!new->name_hash) {
- FREE (new->inode_hash);
- FREE (new);
- return NULL;
- }
+ new->inode_pool = mem_pool_new (inode_t, lru_limit);
- for (i=0; i<new->hashsize; i++) {
+ if (!new->inode_pool)
+ goto out;
+
+ new->dentry_pool = mem_pool_new (dentry_t, lru_limit);
+
+ if (!new->dentry_pool)
+ goto out;
+
+ new->inode_hash = (void *)GF_CALLOC (65536,
+ sizeof (struct list_head),
+ gf_common_mt_list_head);
+ if (!new->inode_hash)
+ goto out;
+
+ new->name_hash = (void *)GF_CALLOC (new->hashsize,
+ sizeof (struct list_head),
+ gf_common_mt_list_head);
+ if (!new->name_hash)
+ goto out;
+
+ /* if number of fd open in one process is more than this,
+ we may hit perf issues */
+ new->fd_mem_pool = mem_pool_new (fd_t, 1024);
+
+ if (!new->fd_mem_pool)
+ goto out;
+
+ for (i = 0; i < 65536; i++) {
INIT_LIST_HEAD (&new->inode_hash[i]);
}
- for (i=0; i<new->hashsize; i++) {
+ for (i = 0; i < new->hashsize; i++) {
INIT_LIST_HEAD (&new->name_hash[i]);
}
@@ -1022,16 +1367,31 @@ inode_table_new (size_t lru_limit, xlator_t *xl)
INIT_LIST_HEAD (&new->lru);
INIT_LIST_HEAD (&new->purge);
- ret = asprintf (&new->name, "%s/inode", xl->name);
+ ret = gf_asprintf (&new->name, "%s/inode", xl->name);
if (-1 == ret) {
/* TODO: This should be ok to continue, check with avati */
;
}
- __inode_table_init_root (new);
+ __inode_table_init_root (new);
pthread_mutex_init (&new->lock, NULL);
+ ret = 0;
+out:
+ if (ret) {
+ if (new) {
+ GF_FREE (new->inode_hash);
+ GF_FREE (new->name_hash);
+ if (new->dentry_pool)
+ mem_pool_destroy (new->dentry_pool);
+ if (new->inode_pool)
+ mem_pool_destroy (new->inode_pool);
+ GF_FREE (new);
+ new = NULL;
+ }
+ }
+
return new;
}
@@ -1039,236 +1399,489 @@ inode_table_new (size_t lru_limit, xlator_t *xl)
inode_t *
inode_from_path (inode_table_t *itable, const char *path)
{
- inode_t *inode = NULL;
- inode_t *parent = NULL;
- inode_t *root = NULL;
- inode_t *curr = NULL;
- char *pathname = NULL;
- char *component = NULL, *next_component = NULL;
- char *strtokptr = NULL;
-
- /* top-down approach */
- root = itable->root;
- parent = inode_ref (root);
- pathname = strdup (path);
- component = strtok_r (pathname, "/", &strtokptr);
-
- if (component == NULL)
- /* root inode */
- inode = inode_ref (parent);
-
- while (component) {
- curr = inode_search (itable, parent->ino, component);
-
- if (curr == NULL) {
- component = strtok_r (NULL, "/", &strtokptr);
- break;
- }
-
- next_component = strtok_r (NULL, "/", &strtokptr);
-
- if (next_component) {
- inode_unref (parent);
- parent = curr;
- curr = NULL;
- } else {
- inode = curr;
- }
-
- component = next_component;
- }
-
- if (parent)
- inode_unref (parent);
+ inode_t *inode = NULL;
+ inode_t *parent = NULL;
+ inode_t *root = NULL;
+ inode_t *curr = NULL;
+ char *pathname = NULL;
+ char *component = NULL, *next_component = NULL;
+ char *strtokptr = NULL;
+
+ if (!itable || !path)
+ return NULL;
- if (pathname)
- free (pathname);
+ /* top-down approach */
+ pathname = gf_strdup (path);
+ if (pathname == NULL) {
+ goto out;
+ }
+
+ root = itable->root;
+ parent = inode_ref (root);
+ component = strtok_r (pathname, "/", &strtokptr);
+
+ if (component == NULL)
+ /* root inode */
+ inode = inode_ref (parent);
+
+ while (component) {
+ curr = inode_grep (itable, parent, component);
+
+ if (curr == NULL) {
+ strtok_r (NULL, "/", &strtokptr);
+ break;
+ }
+
+ next_component = strtok_r (NULL, "/", &strtokptr);
+
+ if (next_component) {
+ inode_unref (parent);
+ parent = curr;
+ curr = NULL;
+ } else {
+ inode = curr;
+ }
+
+ component = next_component;
+ }
+
+ if (parent)
+ inode_unref (parent);
- return inode;
+ GF_FREE (pathname);
+
+out:
+ return inode;
}
+
int
-__inode_ctx_put (inode_t *inode, xlator_t *xlator, uint64_t value)
+__inode_ctx_set2 (inode_t *inode, xlator_t *xlator, uint64_t *value1_p,
+ uint64_t *value2_p)
{
int ret = 0;
int index = 0;
- int put_idx = -1;
+ int set_idx = -1;
+
+ if (!inode || !xlator)
+ return -1;
- for (index = 0; index < xlator->ctx->xl_count; index++) {
- if (!inode->_ctx[index].key) {
- if (put_idx == -1)
- put_idx = index;
+ for (index = 0; index < inode->table->ctxcount; index++) {
+ if (!inode->_ctx[index].xl_key) {
+ if (set_idx == -1)
+ set_idx = index;
/* dont break, to check if key already exists
further on */
}
- if (inode->_ctx[index].key == (uint64_t)(long) xlator) {
- put_idx = index;
+ if (inode->_ctx[index].xl_key == xlator) {
+ set_idx = index;
break;
}
}
-
- if (put_idx == -1) {
+
+ if (set_idx == -1) {
ret = -1;
goto out;;
}
- inode->_ctx[put_idx].key = (uint64_t)(long) xlator;
- inode->_ctx[put_idx].value = value;
+ inode->_ctx[set_idx].xl_key = xlator;
+ if (value1_p)
+ inode->_ctx[set_idx].value1 = *value1_p;
+ if (value2_p)
+ inode->_ctx[set_idx].value2 = *value2_p;
out:
return ret;
}
int
-inode_ctx_put (inode_t *inode, xlator_t *xlator, uint64_t value)
+__inode_ctx_set0 (inode_t *inode, xlator_t *xlator, uint64_t *value1_p)
+{
+ return __inode_ctx_set2 (inode, xlator, value1_p, NULL);
+}
+
+int
+__inode_ctx_set1 (inode_t *inode, xlator_t *xlator, uint64_t *value2_p)
+{
+ return __inode_ctx_set2 (inode, xlator, NULL, value2_p);
+}
+
+
+int
+inode_ctx_set2 (inode_t *inode, xlator_t *xlator, uint64_t *value1_p,
+ uint64_t *value2_p)
{
int ret = 0;
- if (!inode || !xlator)
- return -1;
+ if (!inode || !xlator)
+ return -1;
LOCK (&inode->lock);
{
- ret = __inode_ctx_put (inode, xlator, value);
+ ret = __inode_ctx_set2 (inode, xlator, value1_p, value2_p);
}
UNLOCK (&inode->lock);
- return ret;
+ return ret;
}
int
-__inode_ctx_get (inode_t *inode, xlator_t *xlator, uint64_t *value)
+inode_ctx_set1 (inode_t *inode, xlator_t *xlator, uint64_t *value2_p)
{
- int index = 0;
int ret = 0;
- for (index = 0; index < xlator->ctx->xl_count; index++) {
- if (inode->_ctx[index].key == (uint64_t)(long)xlator)
- break;
+
+ if (!inode || !xlator)
+ return -1;
+
+ LOCK (&inode->lock);
+ {
+ ret = __inode_ctx_set1 (inode, xlator, value2_p);
+ }
+ UNLOCK (&inode->lock);
+
+ return ret;
+}
+int
+inode_ctx_set0 (inode_t *inode, xlator_t *xlator, uint64_t *value1_p)
+{
+ int ret = 0;
+
+ if (!inode || !xlator)
+ return -1;
+
+ LOCK (&inode->lock);
+ {
+ ret = __inode_ctx_set0 (inode, xlator, value1_p);
}
+ UNLOCK (&inode->lock);
- if (index == xlator->ctx->xl_count) {
- ret = -1;
+ return ret;
+}
+
+
+int
+__inode_ctx_get2 (inode_t *inode, xlator_t *xlator, uint64_t *value1,
+ uint64_t *value2)
+{
+ int index = 0;
+ int ret = -1;
+
+ if (!inode || !xlator)
goto out;
+
+ for (index = 0; index < inode->table->ctxcount; index++) {
+ if (inode->_ctx[index].xl_key == xlator)
+ break;
}
- if (value)
- *value = inode->_ctx[index].value;
+ if (index == inode->table->ctxcount)
+ goto out;
+ if (inode->_ctx[index].value1) {
+ if (value1)
+ *value1 = inode->_ctx[index].value1;
+ ret = 0;
+ }
+ if (inode->_ctx[index].value2) {
+ if (value2)
+ *value2 = inode->_ctx[index].value2;
+ ret = 0;
+ }
out:
return ret;
}
-int
-inode_ctx_get (inode_t *inode, xlator_t *xlator, uint64_t *value)
+
+
+int
+__inode_ctx_get0 (inode_t *inode, xlator_t *xlator, uint64_t *value1)
{
+ uint64_t tmp_value = 0;
int ret = 0;
- if (!inode || !xlator)
- return -1;
+ ret = __inode_ctx_get2 (inode, xlator, &tmp_value, NULL);
+ if (!ret)
+ *value1 = tmp_value;
+
+ return ret;
+}
+
+int
+__inode_ctx_get1 (inode_t *inode, xlator_t *xlator, uint64_t *value2)
+{
+ uint64_t tmp_value = 0;
+ int ret = 0;
+
+ ret = __inode_ctx_get2 (inode, xlator, NULL, &tmp_value);
+ if (!ret)
+ *value2 = tmp_value;
+
+ return ret;
+}
+
+
+int
+inode_ctx_get2 (inode_t *inode, xlator_t *xlator, uint64_t *value1,
+ uint64_t *value2)
+{
+ int ret = 0;
+
+ if (!inode || !xlator)
+ return -1;
LOCK (&inode->lock);
{
- ret = __inode_ctx_get (inode, xlator, value);
+ ret = __inode_ctx_get2 (inode, xlator, value1, value2);
}
UNLOCK (&inode->lock);
- return ret;
+ return ret;
}
+int
+inode_ctx_get1 (inode_t *inode, xlator_t *xlator, uint64_t *value2)
+{
+ int ret = 0;
+
+ if (!inode || !xlator)
+ return -1;
+
+ LOCK (&inode->lock);
+ {
+ ret = __inode_ctx_get1 (inode, xlator, value2);
+ }
+ UNLOCK (&inode->lock);
-int
-inode_ctx_del (inode_t *inode, xlator_t *xlator, uint64_t *value)
+ return ret;
+}
+
+int
+inode_ctx_get0 (inode_t *inode, xlator_t *xlator, uint64_t *value1)
{
- int index = 0;
int ret = 0;
- if (!inode || !xlator)
- return -1;
+ if (!inode || !xlator)
+ return -1;
+
+ LOCK (&inode->lock);
+ {
+ ret = __inode_ctx_get0 (inode, xlator, value1);
+ }
+ UNLOCK (&inode->lock);
+
+ return ret;
+}
+
+
+int
+inode_ctx_del2 (inode_t *inode, xlator_t *xlator, uint64_t *value1,
+ uint64_t *value2)
+{
+ int index = 0;
+ int ret = 0;
+
+ if (!inode || !xlator)
+ return -1;
LOCK (&inode->lock);
{
- for (index = 0; index < xlator->ctx->xl_count; index++) {
- if (inode->_ctx[index].key == (uint64_t)(long)xlator)
+ for (index = 0; index < inode->table->ctxcount;
+ index++) {
+ if (inode->_ctx[index].xl_key == xlator)
break;
}
- if (index == xlator->ctx->xl_count) {
+ if (index == inode->table->ctxcount) {
ret = -1;
goto unlock;
}
- if (value)
- *value = inode->_ctx[index].value;
+ if (inode->_ctx[index].value1 && value1)
+ *value1 = inode->_ctx[index].value1;
+ if (inode->_ctx[index].value2 && value2)
+ *value2 = inode->_ctx[index].value2;
- inode->_ctx[index].key = 0;
- inode->_ctx[index].value = 0;
+ inode->_ctx[index].key = 0;
+ inode->_ctx[index].value1 = 0;
+ inode->_ctx[index].value2 = 0;
}
unlock:
UNLOCK (&inode->lock);
- return ret;
+ return ret;
+}
+
+/* function behavior:
+ - if value1 is set, value1 in ctx is reset to 0 with current value passed
+ back in value1 address.
+ - if value2 is set, value2 in ctx is reset to 0 with current value passed
+ back in value2 address.
+ - if both are set, both fields are reset.
+*/
+static int
+__inode_ctx_reset2 (inode_t *inode, xlator_t *xlator, uint64_t *value1,
+ uint64_t *value2)
+{
+ int index = 0;
+ int ret = 0;
+
+ if (!inode || !xlator)
+ return -1;
+
+ LOCK (&inode->lock);
+ {
+ for (index = 0; index < inode->table->ctxcount;
+ index++) {
+ if (inode->_ctx[index].xl_key == xlator)
+ break;
+ }
+
+ if (index == inode->table->ctxcount) {
+ ret = -1;
+ goto unlock;
+ }
+
+ if (inode->_ctx[index].value1 && value1) {
+ *value1 = inode->_ctx[index].value1;
+ inode->_ctx[index].value1 = 0;
+ }
+ if (inode->_ctx[index].value2 && value2) {
+ *value2 = inode->_ctx[index].value2;
+ inode->_ctx[index].value2 = 0;
+ }
+ }
+unlock:
+ UNLOCK (&inode->lock);
+
+ return ret;
+}
+
+int
+inode_ctx_reset2 (inode_t *inode, xlator_t *xlator, uint64_t *value1_p,
+ uint64_t *value2_p)
+{
+ uint64_t tmp_value1 = 0;
+ uint64_t tmp_value2 = 0;
+ int ret = 0;
+
+ ret = __inode_ctx_reset2 (inode, xlator, &tmp_value1, &tmp_value2);
+ if (!ret) {
+ if (value1_p)
+ *value1_p = tmp_value1;
+ if (value2_p)
+ *value2_p = tmp_value2;
+ }
+ return ret;
+}
+
+int
+inode_ctx_reset1 (inode_t *inode, xlator_t *xlator, uint64_t *value2_p)
+{
+ uint64_t tmp_value2 = 0;
+ int ret = 0;
+
+ ret = __inode_ctx_reset2 (inode, xlator, NULL, &tmp_value2);
+
+ if (!ret && value2_p)
+ *value2_p = tmp_value2;
+
+ return ret;
+
+}
+int
+inode_ctx_reset0 (inode_t *inode, xlator_t *xlator, uint64_t *value1_p)
+{
+ uint64_t tmp_value1 = 0;
+ int ret = 0;
+
+ ret = __inode_ctx_reset2 (inode, xlator, &tmp_value1, NULL);
+
+ if (!ret && value1_p)
+ *value1_p = tmp_value1;
+
+ return ret;
}
+
void
inode_dump (inode_t *inode, char *prefix)
{
- char key[GF_DUMP_MAX_BUF_LEN];
- int ret = -1;
- xlator_t *xl = NULL;
- int i = 0;
-
- if (!inode)
+ int ret = -1;
+ xlator_t *xl = NULL;
+ int i = 0;
+ fd_t *fd = NULL;
+ struct _inode_ctx *inode_ctx = NULL;
+ struct list_head fd_list;
+
+ if (!inode)
return;
- ret = TRY_LOCK(&inode->lock);
+ INIT_LIST_HEAD (&fd_list);
+ ret = TRY_LOCK(&inode->lock);
if (ret != 0) {
- gf_log("", GF_LOG_WARNING, "Unable to dump inode"
- " errno: %d", errno);
return;
}
- gf_proc_dump_build_key(key, prefix, "nlookup");
- gf_proc_dump_write(key, "%ld", inode->nlookup);
- gf_proc_dump_build_key(key, prefix, "generation");
- gf_proc_dump_write(key, "%ld", inode->generation);
- gf_proc_dump_build_key(key, prefix, "ref");
- gf_proc_dump_write(key, "%u", inode->ref);
- gf_proc_dump_build_key(key, prefix, "ino");
- gf_proc_dump_write(key, "%ld", inode->ino);
- gf_proc_dump_build_key(key, prefix, "st_mode");
- gf_proc_dump_write(key, "%d", inode->st_mode);
- UNLOCK(&inode->lock);
- if (!inode->_ctx)
- goto out;
-
- for (i = 0; i < inode->table->xl->ctx->xl_count; i++) {
- if (inode->_ctx[i].key) {
- xl = (xlator_t *)(long)inode->_ctx[i].key;
- if (xl->dumpops && xl->dumpops->inodectx)
- xl->dumpops->inodectx (xl, inode);
- }
- }
+ {
+ gf_proc_dump_write("gfid", "%s", uuid_utoa (inode->gfid));
+ gf_proc_dump_write("nlookup", "%ld", inode->nlookup);
+ gf_proc_dump_write("fd-count", "%u", inode->fd_count);
+ gf_proc_dump_write("ref", "%u", inode->ref);
+ gf_proc_dump_write("ia_type", "%d", inode->ia_type);
+ if (inode->_ctx) {
+ inode_ctx = GF_CALLOC (inode->table->ctxcount,
+ sizeof (*inode_ctx),
+ gf_common_mt_inode_ctx);
+ if (inode_ctx == NULL) {
+ goto unlock;
+ }
+
+ for (i = 0; i < inode->table->ctxcount;
+ i++) {
+ inode_ctx[i] = inode->_ctx[i];
+ }
+ }
+
+ if (dump_options.xl_options.dump_fdctx != _gf_true)
+ goto unlock;
+
+
+ list_for_each_entry (fd, &inode->fd_list, inode_list) {
+ fd_ctx_dump (fd, prefix);
+ }
+ }
+unlock:
+ UNLOCK(&inode->lock);
+
+ if (inode_ctx && (dump_options.xl_options.dump_inodectx == _gf_true)) {
+ for (i = 0; i < inode->table->ctxcount; i++) {
+ if (inode_ctx[i].xl_key) {
+ xl = (xlator_t *)(long)inode_ctx[i].xl_key;
+ if (xl->dumpops && xl->dumpops->inodectx)
+ xl->dumpops->inodectx (xl, inode);
+ }
+ }
+ }
+
+ GF_FREE (inode_ctx);
-out:
return;
}
void
inode_table_dump (inode_table_t *itable, char *prefix)
{
-
+
char key[GF_DUMP_MAX_BUF_LEN];
int ret = 0;
if (!itable)
return;
-
+
memset(key, 0, sizeof(key));
- ret = pthread_mutex_trylock(&itable->lock);
+ ret = pthread_mutex_trylock(&itable->lock);
if (ret != 0) {
- gf_log("", GF_LOG_WARNING, "Unable to dump inode table"
- " errno: %d", errno);
return;
}
@@ -1276,7 +1889,7 @@ inode_table_dump (inode_table_t *itable, char *prefix)
gf_proc_dump_write(key, "%d", itable->hashsize);
gf_proc_dump_build_key(key, prefix, "name");
gf_proc_dump_write(key, "%s", itable->name);
-
+
gf_proc_dump_build_key(key, prefix, "lru_limit");
gf_proc_dump_write(key, "%d", itable->lru_limit);
gf_proc_dump_build_key(key, prefix, "active_size");
@@ -1293,3 +1906,98 @@ inode_table_dump (inode_table_t *itable, char *prefix)
pthread_mutex_unlock(&itable->lock);
}
+void
+inode_dump_to_dict (inode_t *inode, char *prefix, dict_t *dict)
+{
+ int ret = -1;
+ char key[GF_DUMP_MAX_BUF_LEN] = {0,};
+
+ ret = TRY_LOCK (&inode->lock);
+ if (ret)
+ return;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.gfid", prefix);
+ ret = dict_set_dynstr (dict, key, gf_strdup (uuid_utoa (inode->gfid)));
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.nlookup", prefix);
+ ret = dict_set_uint64 (dict, key, inode->nlookup);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.ref", prefix);
+ ret = dict_set_uint32 (dict, key, inode->ref);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.ia_type", prefix);
+ ret = dict_set_int32 (dict, key, inode->ia_type);
+
+out:
+ UNLOCK (&inode->lock);
+ return;
+}
+
+void
+inode_table_dump_to_dict (inode_table_t *itable, char *prefix, dict_t *dict)
+{
+ char key[GF_DUMP_MAX_BUF_LEN] = {0,};
+ int ret = 0;
+ inode_t *inode = NULL;
+ int count = 0;
+
+ ret = pthread_mutex_trylock (&itable->lock);
+ if (ret)
+ return;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.itable.active_size", prefix);
+ ret = dict_set_uint32 (dict, key, itable->active_size);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.itable.lru_size", prefix);
+ ret = dict_set_uint32 (dict, key, itable->lru_size);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.itable.purge_size", prefix);
+ ret = dict_set_uint32 (dict, key, itable->purge_size);
+ if (ret)
+ goto out;
+
+ list_for_each_entry (inode, &itable->active, list) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.itable.active%d", prefix,
+ count++);
+ inode_dump_to_dict (inode, key, dict);
+ }
+ count = 0;
+
+ list_for_each_entry (inode, &itable->lru, list) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.itable.lru%d", prefix,
+ count++);
+ inode_dump_to_dict (inode, key, dict);
+ }
+ count = 0;
+
+ list_for_each_entry (inode, &itable->purge, list) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.itable.purge%d", prefix,
+ count++);
+ inode_dump_to_dict (inode, key, dict);
+ }
+
+out:
+ pthread_mutex_unlock (&itable->lock);
+
+ return;
+}
diff --git a/libglusterfs/src/inode.h b/libglusterfs/src/inode.h
index e71c6d899..a88976265 100644
--- a/libglusterfs/src/inode.h
+++ b/libglusterfs/src/inode.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _INODE_H
@@ -28,6 +19,8 @@
#include <stdint.h>
#include <sys/types.h>
+#define DEFAULT_INODE_MEMPOOL_ENTRIES 32 * 1024
+#define INODE_PATH_FMT "<gfid:%s>"
struct _inode_table;
typedef struct _inode_table inode_table_t;
@@ -39,6 +32,8 @@ typedef struct _dentry dentry_t;
#include "list.h"
#include "xlator.h"
+#include "iatt.h"
+#include "uuid.h"
struct _inode_table {
@@ -57,43 +52,60 @@ struct _inode_table {
uint32_t lru_size; /* count of inodes in lru list */
struct list_head purge; /* list of inodes to be purged soon */
uint32_t purge_size; /* count of inodes in purge list */
+
+ struct mem_pool *inode_pool; /* memory pool for inodes */
+ struct mem_pool *dentry_pool; /* memory pool for dentrys */
+ struct mem_pool *fd_mem_pool; /* memory pool for fd_t */
+ int ctxcount; /* number of slots in inode->ctx */
};
struct _dentry {
struct list_head inode_list; /* list of dentries of inode */
struct list_head hash; /* hash table pointers */
- struct list_head parent_list; /* list of dentries under the parent */
inode_t *inode; /* inode of this directory entry */
char *name; /* name of the directory entry */
inode_t *parent; /* directory of the entry */
};
-//#define ZR_INODE_CTX_VALUE_LEN 2
struct _inode_ctx {
- uint64_t key;
- uint64_t value;
- //uint64_t value[ZR_INODE_CTX_VALUE_LEN];
+ union {
+ uint64_t key;
+ xlator_t *xl_key;
+ };
+ /* if value1 is 0, then field is not set.. */
+ union {
+ uint64_t value1;
+ void *ptr1;
+ };
+ /* if value2 is 0, then field is not set.. */
+ union {
+ uint64_t value2;
+ void *ptr2;
+ };
};
struct _inode {
- inode_table_t *table; /* the table this inode belongs to */
- gf_lock_t lock;
- uint64_t nlookup;
- uint64_t generation;
- uint32_t ref; /* reference count on this inode */
- ino_t ino; /* inode number in the storage (persistent) */
- mode_t st_mode; /* what kind of file */
- struct list_head fd_list; /* list of open files on this inode */
- struct list_head dentry_list; /* list of directory entries for this inode */
- struct list_head child_list; /* list of directory entries under this inode */
- struct list_head hash; /* hash table pointers */
- struct list_head list; /* active/lru/purge */
-
- struct _inode_ctx *_ctx; /* replacement for dict_t *(inode->ctx) */
+ inode_table_t *table; /* the table this inode belongs to */
+ uuid_t gfid;
+ gf_lock_t lock;
+ uint64_t nlookup;
+ uint32_t fd_count; /* Open fd count */
+ uint32_t ref; /* reference count on this inode */
+ ia_type_t ia_type; /* what kind of file */
+ struct list_head fd_list; /* list of open files on this inode */
+ struct list_head dentry_list; /* list of directory entries for this inode */
+ struct list_head hash; /* hash table pointers */
+ struct list_head list; /* active/lru/purge */
+
+ struct _inode_ctx *_ctx; /* replacement for dict_t *(inode->ctx) */
};
+#define UUID0_STR "00000000-0000-0000-0000-000000000000"
+#define GFID_STR_PFX "<gfid:" UUID0_STR ">"
+#define GFID_STR_PFX_LEN (sizeof (GFID_STR_PFX) - 1)
+
inode_table_t *
inode_table_new (size_t lru_limit, xlator_t *xl);
@@ -101,19 +113,14 @@ inode_t *
inode_new (inode_table_t *table);
inode_t *
-inode_search (inode_table_t *table, ino_t ino, const char *name);
-
-int
inode_link (inode_t *inode, inode_t *parent,
- const char *name, struct stat *stbuf);
+ const char *name, struct iatt *stbuf);
void
-inode_unlink (inode_t *inode,
- inode_t *parent,
- const char *name);
+inode_unlink (inode_t *inode, inode_t *parent, const char *name);
inode_t *
-inode_parent (inode_t *inode, ino_t par, const char *name);
+inode_parent (inode_t *inode, uuid_t pargfid, const char *name);
inode_t *
inode_ref (inode_t *inode);
@@ -125,46 +132,120 @@ int
inode_lookup (inode_t *inode);
int
-inode_forget (inode_t *inode,
- uint64_t nlookup);
+inode_forget (inode_t *inode, uint64_t nlookup);
int
-inode_rename (inode_table_t *table,
- inode_t *olddir,
- const char *oldname,
- inode_t *newdir,
- const char *newname,
- inode_t *inode,
- struct stat *stbuf);
+inode_invalidate(inode_t *inode);
+int
+inode_rename (inode_table_t *table, inode_t *olddir, const char *oldname,
+ inode_t *newdir, const char *newname,
+ inode_t *inode, struct iatt *stbuf);
-int32_t
-inode_path (inode_t *inode,
- const char *name,
- char **bufp);
+inode_t *
+inode_grep (inode_table_t *table, inode_t *parent, const char *name);
+
+int
+inode_grep_for_gfid (inode_table_t *table, inode_t *parent, const char *name,
+ uuid_t gfid, ia_type_t *type);
inode_t *
-inode_from_path (inode_table_t *table,
- const char *path);
+inode_find (inode_table_t *table, uuid_t gfid);
-dentry_t *
-dentry_search_for_inode (inode_t *inode,
- ino_t par,
- const char *name);
+int
+inode_path (inode_t *inode, const char *name, char **bufp);
int
-__inode_ctx_put (inode_t *inode, xlator_t *xlator, uint64_t value);
+__inode_path (inode_t *inode, const char *name, char **bufp);
+inode_t *
+inode_from_path (inode_table_t *table, const char *path);
+
+inode_t *
+inode_resolve (inode_table_t *table, char *path);
+
+/* deal with inode ctx's both values */
+
+int
+inode_ctx_set2 (inode_t *inode, xlator_t *xlator, uint64_t *value1,
+ uint64_t *value2);
+int
+__inode_ctx_set2 (inode_t *inode, xlator_t *xlator, uint64_t *value1,
+ uint64_t *value2);
+
+int
+inode_ctx_get2 (inode_t *inode, xlator_t *xlator, uint64_t *value1,
+ uint64_t *value2);
int
-inode_ctx_put (inode_t *inode, xlator_t *xlator, uint64_t value);
+__inode_ctx_get2 (inode_t *inode, xlator_t *xlator, uint64_t *value1,
+ uint64_t *value2);
int
-__inode_ctx_get (inode_t *inode, xlator_t *xlator, uint64_t *value);
+inode_ctx_del2 (inode_t *inode, xlator_t *xlator, uint64_t *value1,
+ uint64_t *value2);
+
+int
+inode_ctx_reset2 (inode_t *inode, xlator_t *xlator, uint64_t *value1,
+ uint64_t *value2);
+
+/* deal with inode ctx's 1st value */
+
+int
+inode_ctx_set0 (inode_t *inode, xlator_t *xlator, uint64_t *value1);
+
+int
+__inode_ctx_set0 (inode_t *inode, xlator_t *xlator, uint64_t *value1);
+
+int
+inode_ctx_get0 (inode_t *inode, xlator_t *xlator, uint64_t *value1);
+int
+__inode_ctx_get0 (inode_t *inode, xlator_t *xlator, uint64_t *value1);
+
+int
+inode_ctx_reset0 (inode_t *inode, xlator_t *xlator, uint64_t *value1);
+
+/* deal with inode ctx's 2st value */
+
+int
+inode_ctx_set1 (inode_t *inode, xlator_t *xlator, uint64_t *value2);
+
+int
+__inode_ctx_set1 (inode_t *inode, xlator_t *xlator, uint64_t *value2);
+
+int
+inode_ctx_get1 (inode_t *inode, xlator_t *xlator, uint64_t *value2);
+int
+__inode_ctx_get1 (inode_t *inode, xlator_t *xlator, uint64_t *value2);
+
+int
+inode_ctx_reset1 (inode_t *inode, xlator_t *xlator, uint64_t *value2);
+
+
+static inline int
+__inode_ctx_put(inode_t *inode, xlator_t *this, uint64_t v)
+{
+ return __inode_ctx_set0 (inode, this, &v);
+}
+
+static inline int
+inode_ctx_put(inode_t *inode, xlator_t *this, uint64_t v)
+{
+ return inode_ctx_set0 (inode, this, &v);
+}
+
+#define __inode_ctx_set(i,x,v_p) __inode_ctx_set0(i,x,v_p)
+
+#define inode_ctx_set(i,x,v_p) inode_ctx_set0(i,x,v_p)
+
+#define inode_ctx_reset(i,x,v) inode_ctx_reset0(i,x,v)
+
+#define __inode_ctx_get(i,x,v) __inode_ctx_get0(i,x,v)
+
+#define inode_ctx_get(i,x,v) inode_ctx_get0(i,x,v)
-int
-inode_ctx_get (inode_t *inode, xlator_t *xlator, uint64_t *value);
+#define inode_ctx_del(i,x,v) inode_ctx_del2(i,x,v,0)
-int
-inode_ctx_del (inode_t *inode, xlator_t *xlator, uint64_t *value);
+gf_boolean_t
+__is_root_gfid (uuid_t gfid);
#endif /* _INODE_H */
diff --git a/libglusterfs/src/iobuf.c b/libglusterfs/src/iobuf.c
index e9408f86b..a89e96267 100644
--- a/libglusterfs/src/iobuf.c
+++ b/libglusterfs/src/iobuf.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -27,21 +18,70 @@
TODO: implement destroy margins and prefetching of arenas
*/
+#define IOBUF_ARENA_MAX_INDEX (sizeof (gf_iobuf_init_config) / \
+ (sizeof (struct iobuf_init_config)))
+
+/* Make sure this array is sorted based on pagesize */
+struct iobuf_init_config gf_iobuf_init_config[] = {
+ /* { pagesize, num_pages }, */
+ {128, 1024},
+ {512, 512},
+ {2 * 1024, 512},
+ {8 * 1024, 128},
+ {32 * 1024, 64},
+ {128 * 1024, 32},
+ {256 * 1024, 8},
+ {1 * 1024 * 1024, 2},
+};
+
+int
+gf_iobuf_get_arena_index (size_t page_size)
+{
+ int i = -1;
+
+ for (i = 0; i < IOBUF_ARENA_MAX_INDEX; i++) {
+ if (page_size <= gf_iobuf_init_config[i].pagesize)
+ break;
+ }
+
+ if (i >= IOBUF_ARENA_MAX_INDEX)
+ i = -1;
+
+ return i;
+}
+
+size_t
+gf_iobuf_get_pagesize (size_t page_size)
+{
+ int i = 0;
+ size_t size = 0;
+
+ for (i = 0; i < IOBUF_ARENA_MAX_INDEX; i++) {
+ size = gf_iobuf_init_config[i].pagesize;
+ if (page_size <= size)
+ break;
+ }
+
+ if (i >= IOBUF_ARENA_MAX_INDEX)
+ size = -1;
+
+ return size;
+}
+
void
__iobuf_arena_init_iobufs (struct iobuf_arena *iobuf_arena)
{
- size_t arena_size = 0;
- size_t page_size = 0;
int iobuf_cnt = 0;
struct iobuf *iobuf = NULL;
int offset = 0;
int i = 0;
- arena_size = iobuf_arena->iobuf_pool->arena_size;
- page_size = iobuf_arena->iobuf_pool->page_size;
- iobuf_cnt = arena_size / page_size;
+ GF_VALIDATE_OR_GOTO ("iobuf", iobuf_arena, out);
- iobuf_arena->iobufs = CALLOC (sizeof (*iobuf), iobuf_cnt);
+ iobuf_cnt = iobuf_arena->page_count;
+
+ iobuf_arena->iobufs = GF_CALLOC (sizeof (*iobuf), iobuf_cnt,
+ gf_common_mt_iobuf);
if (!iobuf_arena->iobufs)
return;
@@ -57,67 +97,74 @@ __iobuf_arena_init_iobufs (struct iobuf_arena *iobuf_arena)
list_add (&iobuf->list, &iobuf_arena->passive.list);
iobuf_arena->passive_cnt++;
- offset += page_size;
+ offset += iobuf_arena->page_size;
iobuf++;
}
+
+out:
+ return;
}
void
__iobuf_arena_destroy_iobufs (struct iobuf_arena *iobuf_arena)
{
- size_t arena_size = 0;
- size_t page_size = 0;
int iobuf_cnt = 0;
struct iobuf *iobuf = NULL;
int i = 0;
- arena_size = iobuf_arena->iobuf_pool->arena_size;
- page_size = iobuf_arena->iobuf_pool->page_size;
- iobuf_cnt = arena_size / page_size;
+ GF_VALIDATE_OR_GOTO ("iobuf", iobuf_arena, out);
- if (!iobuf_arena->iobufs)
+ iobuf_cnt = iobuf_arena->page_count;
+
+ if (!iobuf_arena->iobufs) {
+ gf_log_callingfn (THIS->name, GF_LOG_ERROR, "iobufs not found");
return;
+ }
iobuf = iobuf_arena->iobufs;
for (i = 0; i < iobuf_cnt; i++) {
- assert (iobuf->ref == 0);
+ GF_ASSERT (iobuf->ref == 0);
list_del_init (&iobuf->list);
iobuf++;
}
- FREE (iobuf_arena->iobufs);
+ GF_FREE (iobuf_arena->iobufs);
+
+out:
+ return;
}
void
__iobuf_arena_destroy (struct iobuf_arena *iobuf_arena)
{
- struct iobuf_pool *iobuf_pool = NULL;
-
- if (!iobuf_arena)
- return;
-
- iobuf_pool = iobuf_arena->iobuf_pool;
+ GF_VALIDATE_OR_GOTO ("iobuf", iobuf_arena, out);
__iobuf_arena_destroy_iobufs (iobuf_arena);
if (iobuf_arena->mem_base
&& iobuf_arena->mem_base != MAP_FAILED)
- munmap (iobuf_arena->mem_base, iobuf_pool->arena_size);
+ munmap (iobuf_arena->mem_base, iobuf_arena->arena_size);
- FREE (iobuf_arena);
+ GF_FREE (iobuf_arena);
+out:
+ return;
}
struct iobuf_arena *
-__iobuf_arena_alloc (struct iobuf_pool *iobuf_pool)
+__iobuf_arena_alloc (struct iobuf_pool *iobuf_pool, size_t page_size,
+ int32_t num_iobufs)
{
struct iobuf_arena *iobuf_arena = NULL;
- size_t arena_size = 0;
+ size_t rounded_size = 0;
- iobuf_arena = CALLOC (sizeof (*iobuf_arena), 1);
+ GF_VALIDATE_OR_GOTO ("iobuf", iobuf_pool, out);
+
+ iobuf_arena = GF_CALLOC (sizeof (*iobuf_arena), 1,
+ gf_common_mt_iobuf_arena);
if (!iobuf_arena)
goto err;
@@ -126,15 +173,26 @@ __iobuf_arena_alloc (struct iobuf_pool *iobuf_pool)
INIT_LIST_HEAD (&iobuf_arena->passive.list);
iobuf_arena->iobuf_pool = iobuf_pool;
- arena_size = iobuf_pool->arena_size;
- iobuf_arena->mem_base = mmap (NULL, arena_size, PROT_READ|PROT_WRITE,
+ rounded_size = gf_iobuf_get_pagesize (page_size);
+
+ iobuf_arena->page_size = rounded_size;
+ iobuf_arena->page_count = num_iobufs;
+
+ iobuf_arena->arena_size = rounded_size * num_iobufs;
+
+ iobuf_arena->mem_base = mmap (NULL, iobuf_arena->arena_size,
+ PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
- if (iobuf_arena->mem_base == MAP_FAILED)
+ if (iobuf_arena->mem_base == MAP_FAILED) {
+ gf_log (THIS->name, GF_LOG_WARNING, "maping failed");
goto err;
+ }
__iobuf_arena_init_iobufs (iobuf_arena);
- if (!iobuf_arena->iobufs)
+ if (!iobuf_arena->iobufs) {
+ gf_log (THIS->name, GF_LOG_ERROR, "init failed");
goto err;
+ }
iobuf_pool->arena_cnt++;
@@ -142,56 +200,87 @@ __iobuf_arena_alloc (struct iobuf_pool *iobuf_pool)
err:
__iobuf_arena_destroy (iobuf_arena);
+
+out:
return NULL;
}
struct iobuf_arena *
-__iobuf_arena_unprune (struct iobuf_pool *iobuf_pool)
+__iobuf_arena_unprune (struct iobuf_pool *iobuf_pool, size_t page_size)
{
- struct iobuf_arena *iobuf_arena = NULL;
- struct iobuf_arena *tmp = NULL;
+ struct iobuf_arena *iobuf_arena = NULL;
+ struct iobuf_arena *tmp = NULL;
+ int index = 0;
+
+ GF_VALIDATE_OR_GOTO ("iobuf", iobuf_pool, out);
- list_for_each_entry (tmp, &iobuf_pool->purge.list, list) {
+ index = gf_iobuf_get_arena_index (page_size);
+ if (index == -1) {
+ gf_log ("iobuf", GF_LOG_ERROR, "page_size (%zu) of "
+ "iobufs in arena being added is greater than max "
+ "available", page_size);
+ return NULL;
+ }
+
+ list_for_each_entry (tmp, &iobuf_pool->purge[index], list) {
list_del_init (&tmp->list);
iobuf_arena = tmp;
break;
}
-
+out:
return iobuf_arena;
}
struct iobuf_arena *
-__iobuf_pool_add_arena (struct iobuf_pool *iobuf_pool)
+__iobuf_pool_add_arena (struct iobuf_pool *iobuf_pool, size_t page_size,
+ int32_t num_pages)
{
- struct iobuf_arena *iobuf_arena = NULL;
+ struct iobuf_arena *iobuf_arena = NULL;
+ int index = 0;
+
+ index = gf_iobuf_get_arena_index (page_size);
+ if (index == -1) {
+ gf_log ("iobuf", GF_LOG_ERROR, "page_size (%zu) of "
+ "iobufs in arena being added is greater than max "
+ "available", page_size);
+ return NULL;
+ }
- iobuf_arena = __iobuf_arena_unprune (iobuf_pool);
+ iobuf_arena = __iobuf_arena_unprune (iobuf_pool, page_size);
if (!iobuf_arena)
- iobuf_arena = __iobuf_arena_alloc (iobuf_pool);
+ iobuf_arena = __iobuf_arena_alloc (iobuf_pool, page_size,
+ num_pages);
- if (!iobuf_arena)
+ if (!iobuf_arena) {
+ gf_log (THIS->name, GF_LOG_WARNING, "arena not found");
return NULL;
+ }
- list_add_tail (&iobuf_arena->list, &iobuf_pool->arenas.list);
+ list_add_tail (&iobuf_arena->list, &iobuf_pool->arenas[index]);
return iobuf_arena;
}
struct iobuf_arena *
-iobuf_pool_add_arena (struct iobuf_pool *iobuf_pool)
+iobuf_pool_add_arena (struct iobuf_pool *iobuf_pool, size_t page_size,
+ int32_t num_pages)
{
struct iobuf_arena *iobuf_arena = NULL;
+ GF_VALIDATE_OR_GOTO ("iobuf", iobuf_pool, out);
+
pthread_mutex_lock (&iobuf_pool->mutex);
{
- iobuf_arena = __iobuf_pool_add_arena (iobuf_pool);
+ iobuf_arena = __iobuf_pool_add_arena (iobuf_pool, page_size,
+ num_pages);
}
pthread_mutex_unlock (&iobuf_pool->mutex);
+out:
return iobuf_arena;
}
@@ -200,91 +289,167 @@ void
iobuf_pool_destroy (struct iobuf_pool *iobuf_pool)
{
struct iobuf_arena *iobuf_arena = NULL;
- struct iobuf_arena *tmp = NULL;
+ struct iobuf_arena *tmp = NULL;
+ int i = 0;
- if (!iobuf_pool)
- return;
+ GF_VALIDATE_OR_GOTO ("iobuf", iobuf_pool, out);
- list_for_each_entry_safe (iobuf_arena, tmp, &iobuf_pool->arenas.list,
- list) {
-
- list_del_init (&iobuf_arena->list);
- iobuf_pool->arena_cnt--;
+ for (i = 0; i < IOBUF_ARENA_MAX_INDEX; i++) {
+ list_for_each_entry_safe (iobuf_arena, tmp,
+ &iobuf_pool->arenas[i], list) {
+ list_del_init (&iobuf_arena->list);
+ iobuf_pool->arena_cnt--;
+ __iobuf_arena_destroy (iobuf_arena);
+ }
- __iobuf_arena_destroy (iobuf_arena);
}
+
+out:
+ return;
}
+static void
+iobuf_create_stdalloc_arena (struct iobuf_pool *iobuf_pool)
+{
+ struct iobuf_arena *iobuf_arena = NULL;
+
+ /* No locking required here as its called only once during init */
+ iobuf_arena = GF_CALLOC (sizeof (*iobuf_arena), 1,
+ gf_common_mt_iobuf_arena);
+ if (!iobuf_arena)
+ goto err;
+
+ INIT_LIST_HEAD (&iobuf_arena->list);
+ INIT_LIST_HEAD (&iobuf_arena->active.list);
+ INIT_LIST_HEAD (&iobuf_arena->passive.list);
+
+ iobuf_arena->iobuf_pool = iobuf_pool;
+
+ iobuf_arena->page_size = 0x7fffffff;
+
+ list_add_tail (&iobuf_arena->list,
+ &iobuf_pool->arenas[IOBUF_ARENA_MAX_INDEX]);
+
+err:
+ return;
+}
struct iobuf_pool *
-iobuf_pool_new (size_t arena_size, size_t page_size)
+iobuf_pool_new (void)
{
struct iobuf_pool *iobuf_pool = NULL;
+ int i = 0;
+ size_t page_size = 0;
+ size_t arena_size = 0;
+ int32_t num_pages = 0;
- if (arena_size < page_size)
- return NULL;
-
- iobuf_pool = CALLOC (sizeof (*iobuf_pool), 1);
+ iobuf_pool = GF_CALLOC (sizeof (*iobuf_pool), 1,
+ gf_common_mt_iobuf_pool);
if (!iobuf_pool)
- return NULL;
+ goto out;
pthread_mutex_init (&iobuf_pool->mutex, NULL);
- INIT_LIST_HEAD (&iobuf_pool->arenas.list);
- INIT_LIST_HEAD (&iobuf_pool->filled.list);
- INIT_LIST_HEAD (&iobuf_pool->purge.list);
+ for (i = 0; i <= IOBUF_ARENA_MAX_INDEX; i++) {
+ INIT_LIST_HEAD (&iobuf_pool->arenas[i]);
+ INIT_LIST_HEAD (&iobuf_pool->filled[i]);
+ INIT_LIST_HEAD (&iobuf_pool->purge[i]);
+ }
- iobuf_pool->arena_size = arena_size;
- iobuf_pool->page_size = page_size;
+ iobuf_pool->default_page_size = 128 * GF_UNIT_KB;
- iobuf_pool_add_arena (iobuf_pool);
+ arena_size = 0;
+ for (i = 0; i < IOBUF_ARENA_MAX_INDEX; i++) {
+ page_size = gf_iobuf_init_config[i].pagesize;
+ num_pages = gf_iobuf_init_config[i].num_pages;
+
+ iobuf_pool_add_arena (iobuf_pool, page_size, num_pages);
+
+ arena_size += page_size * num_pages;
+ }
+
+ /* Need an arena to handle all the bigger iobuf requests */
+ iobuf_create_stdalloc_arena (iobuf_pool);
+
+ iobuf_pool->arena_size = arena_size;
+out:
return iobuf_pool;
}
void
-__iobuf_pool_prune (struct iobuf_pool *iobuf_pool)
+__iobuf_arena_prune (struct iobuf_pool *iobuf_pool,
+ struct iobuf_arena *iobuf_arena, int index)
{
- struct iobuf_arena *iobuf_arena = NULL;
- struct iobuf_arena *tmp = NULL;
-
- if (list_empty (&iobuf_pool->arenas.list))
- /* buffering - preserve this one arena (if at all)
- for __iobuf_arena_unprune */
- return;
+ GF_VALIDATE_OR_GOTO ("iobuf", iobuf_pool, out);
+
+ /* code flow comes here only if the arena is in purge list and we can
+ * free the arena only if we have atleast one arena in 'arenas' list
+ * (ie, at least few iobufs free in arena), that way, there won't
+ * be spurious mmap/unmap of buffers
+ */
+ if (list_empty (&iobuf_pool->arenas[index]))
+ goto out;
- list_for_each_entry_safe (iobuf_arena, tmp, &iobuf_pool->purge.list,
- list) {
- if (iobuf_arena->active_cnt)
- continue;
+ /* All cases matched, destroy */
+ list_del_init (&iobuf_arena->list);
+ iobuf_pool->arena_cnt--;
- list_del_init (&iobuf_arena->list);
- iobuf_pool->arena_cnt--;
+ __iobuf_arena_destroy (iobuf_arena);
- __iobuf_arena_destroy (iobuf_arena);
- }
+out:
+ return;
}
void
iobuf_pool_prune (struct iobuf_pool *iobuf_pool)
{
+ struct iobuf_arena *iobuf_arena = NULL;
+ struct iobuf_arena *tmp = NULL;
+ int i = 0;
+
+ GF_VALIDATE_OR_GOTO ("iobuf", iobuf_pool, out);
+
pthread_mutex_lock (&iobuf_pool->mutex);
{
- __iobuf_pool_prune (iobuf_pool);
+ for (i = 0; i < IOBUF_ARENA_MAX_INDEX; i++) {
+ if (list_empty (&iobuf_pool->arenas[i])) {
+ continue;
+ }
+
+ list_for_each_entry_safe (iobuf_arena, tmp,
+ &iobuf_pool->purge[i], list) {
+ __iobuf_arena_prune (iobuf_pool, iobuf_arena, i);
+ }
+ }
}
pthread_mutex_unlock (&iobuf_pool->mutex);
+
+out:
+ return;
}
struct iobuf_arena *
-__iobuf_select_arena (struct iobuf_pool *iobuf_pool)
+__iobuf_select_arena (struct iobuf_pool *iobuf_pool, size_t page_size)
{
- struct iobuf_arena *iobuf_arena = NULL;
- struct iobuf_arena *trav = NULL;
+ struct iobuf_arena *iobuf_arena = NULL;
+ struct iobuf_arena *trav = NULL;
+ int index = 0;
+
+ GF_VALIDATE_OR_GOTO ("iobuf", iobuf_pool, out);
+
+ index = gf_iobuf_get_arena_index (page_size);
+ if (index == -1) {
+ gf_log ("iobuf", GF_LOG_ERROR, "page_size (%zu) of "
+ "iobufs in arena being added is greater than max "
+ "available", page_size);
+ return NULL;
+ }
/* look for unused iobuf from the head-most arena */
- list_for_each_entry (trav, &iobuf_pool->arenas.list, list) {
+ list_for_each_entry (trav, &iobuf_pool->arenas[index], list) {
if (trav->passive_cnt) {
iobuf_arena = trav;
break;
@@ -292,10 +457,12 @@ __iobuf_select_arena (struct iobuf_pool *iobuf_pool)
}
if (!iobuf_arena) {
- /* all arenas were full */
- iobuf_arena = __iobuf_pool_add_arena (iobuf_pool);
+ /* all arenas were full, find the right count to add */
+ iobuf_arena = __iobuf_pool_add_arena (iobuf_pool, page_size,
+ gf_iobuf_init_config[index].num_pages);
}
+out:
return iobuf_arena;
}
@@ -317,12 +484,14 @@ __iobuf_unref (struct iobuf *iobuf)
return iobuf;
}
-
struct iobuf *
-__iobuf_get (struct iobuf_arena *iobuf_arena)
+__iobuf_get (struct iobuf_arena *iobuf_arena, size_t page_size)
{
- struct iobuf *iobuf = NULL;
- struct iobuf_pool *iobuf_pool = NULL;
+ struct iobuf *iobuf = NULL;
+ struct iobuf_pool *iobuf_pool = NULL;
+ int index = 0;
+
+ GF_VALIDATE_OR_GOTO ("iobuf", iobuf_arena, out);
iobuf_pool = iobuf_arena->iobuf_pool;
@@ -335,51 +504,178 @@ __iobuf_get (struct iobuf_arena *iobuf_arena)
list_add (&iobuf->list, &iobuf_arena->active.list);
iobuf_arena->active_cnt++;
+ /* no resetting requied for this element */
+ iobuf_arena->alloc_cnt++;
+
+ if (iobuf_arena->max_active < iobuf_arena->active_cnt)
+ iobuf_arena->max_active = iobuf_arena->active_cnt;
+
if (iobuf_arena->passive_cnt == 0) {
+ index = gf_iobuf_get_arena_index (page_size);
+ if (index == -1) {
+ gf_log ("iobuf", GF_LOG_ERROR, "page_size (%zu) of "
+ "iobufs in arena being added is greater "
+ "than max available", page_size);
+ goto out;
+ }
+
list_del (&iobuf_arena->list);
- list_add (&iobuf_arena->list, &iobuf_pool->filled.list);
+ list_add (&iobuf_arena->list, &iobuf_pool->filled[index]);
}
+out:
return iobuf;
}
-
struct iobuf *
-iobuf_get (struct iobuf_pool *iobuf_pool)
+iobuf_get_from_stdalloc (struct iobuf_pool *iobuf_pool, size_t page_size)
{
- struct iobuf *iobuf = NULL;
+ struct iobuf *iobuf = NULL;
struct iobuf_arena *iobuf_arena = NULL;
+ struct iobuf_arena *trav = NULL;
+ int ret = -1;
+
+ /* The first arena in the 'MAX-INDEX' will always be used for misc */
+ list_for_each_entry (trav, &iobuf_pool->arenas[IOBUF_ARENA_MAX_INDEX],
+ list) {
+ iobuf_arena = trav;
+ break;
+ }
+
+ iobuf = GF_CALLOC (1, sizeof (*iobuf), gf_common_mt_iobuf);
+ if (!iobuf)
+ goto out;
+
+ /* 4096 is the alignment */
+ iobuf->free_ptr = GF_CALLOC (1, ((page_size + GF_IOBUF_ALIGN_SIZE) - 1),
+ gf_common_mt_char);
+ if (!iobuf->free_ptr)
+ goto out;
+
+ iobuf->ptr = GF_ALIGN_BUF (iobuf->free_ptr, GF_IOBUF_ALIGN_SIZE);
+ iobuf->iobuf_arena = iobuf_arena;
+ LOCK_INIT (&iobuf->lock);
+
+ /* Hold a ref because you are allocating and using it */
+ iobuf->ref = 1;
+
+ ret = 0;
+out:
+ if (ret && iobuf) {
+ GF_FREE (iobuf->free_ptr);
+ GF_FREE (iobuf);
+ iobuf = NULL;
+ }
+
+ return iobuf;
+}
+
+
+struct iobuf *
+iobuf_get2 (struct iobuf_pool *iobuf_pool, size_t page_size)
+{
+ struct iobuf *iobuf = NULL;
+ struct iobuf_arena *iobuf_arena = NULL;
+ size_t rounded_size = 0;
+
+ if (page_size == 0) {
+ page_size = iobuf_pool->default_page_size;
+ }
+
+ rounded_size = gf_iobuf_get_pagesize (page_size);
+ if (rounded_size == -1) {
+ /* make sure to provide the requested buffer with standard
+ memory allocations */
+ iobuf = iobuf_get_from_stdalloc (iobuf_pool, page_size);
+
+ gf_log ("iobuf", GF_LOG_DEBUG, "request for iobuf of size %zu "
+ "is serviced using standard calloc() (%p) as it "
+ "exceeds the maximum available buffer size",
+ page_size, iobuf);
+
+ iobuf_pool->request_misses++;
+ return iobuf;
+ }
pthread_mutex_lock (&iobuf_pool->mutex);
{
/* most eligible arena for picking an iobuf */
- iobuf_arena = __iobuf_select_arena (iobuf_pool);
+ iobuf_arena = __iobuf_select_arena (iobuf_pool, rounded_size);
if (!iobuf_arena)
goto unlock;
- iobuf = __iobuf_get (iobuf_arena);
+ iobuf = __iobuf_get (iobuf_arena, rounded_size);
if (!iobuf)
goto unlock;
__iobuf_ref (iobuf);
- }
+ }
unlock:
pthread_mutex_unlock (&iobuf_pool->mutex);
return iobuf;
}
+struct iobuf *
+iobuf_get (struct iobuf_pool *iobuf_pool)
+{
+ struct iobuf *iobuf = NULL;
+ struct iobuf_arena *iobuf_arena = NULL;
+
+ GF_VALIDATE_OR_GOTO ("iobuf", iobuf_pool, out);
+
+ pthread_mutex_lock (&iobuf_pool->mutex);
+ {
+ /* most eligible arena for picking an iobuf */
+ iobuf_arena = __iobuf_select_arena (iobuf_pool,
+ iobuf_pool->default_page_size);
+ if (!iobuf_arena) {
+ gf_log (THIS->name, GF_LOG_WARNING, "arena not found");
+ goto unlock;
+ }
+
+ iobuf = __iobuf_get (iobuf_arena,
+ iobuf_pool->default_page_size);
+ if (!iobuf) {
+ gf_log (THIS->name, GF_LOG_WARNING, "iobuf not found");
+ goto unlock;
+ }
+
+ __iobuf_ref (iobuf);
+ }
+unlock:
+ pthread_mutex_unlock (&iobuf_pool->mutex);
+
+out:
+ return iobuf;
+}
void
__iobuf_put (struct iobuf *iobuf, struct iobuf_arena *iobuf_arena)
{
struct iobuf_pool *iobuf_pool = NULL;
+ int index = 0;
+
+ GF_VALIDATE_OR_GOTO ("iobuf", iobuf_arena, out);
+ GF_VALIDATE_OR_GOTO ("iobuf", iobuf, out);
iobuf_pool = iobuf_arena->iobuf_pool;
+ index = gf_iobuf_get_arena_index (iobuf_arena->page_size);
+ if (index == -1) {
+ gf_log ("iobuf", GF_LOG_DEBUG, "freeing the iobuf (%p) "
+ "allocated with standard calloc()", iobuf);
+
+ /* free up properly without bothering about lists and all */
+ LOCK_DESTROY (&iobuf->lock);
+ GF_FREE (iobuf->free_ptr);
+ GF_FREE (iobuf);
+ return;
+ }
+
if (iobuf_arena->passive_cnt == 0) {
list_del (&iobuf_arena->list);
- list_add_tail (&iobuf_arena->list, &iobuf_pool->arenas.list);
+ list_add_tail (&iobuf_arena->list, &iobuf_pool->arenas[index]);
}
list_del_init (&iobuf->list);
@@ -390,8 +686,11 @@ __iobuf_put (struct iobuf *iobuf, struct iobuf_arena *iobuf_arena)
if (iobuf_arena->active_cnt == 0) {
list_del (&iobuf_arena->list);
- list_add_tail (&iobuf_arena->list, &iobuf_pool->purge.list);
+ list_add_tail (&iobuf_arena->list, &iobuf_pool->purge[index]);
+ __iobuf_arena_prune (iobuf_pool, iobuf_arena, index);
}
+out:
+ return;
}
@@ -401,16 +700,19 @@ iobuf_put (struct iobuf *iobuf)
struct iobuf_arena *iobuf_arena = NULL;
struct iobuf_pool *iobuf_pool = NULL;
- if (!iobuf)
- return;
+ GF_VALIDATE_OR_GOTO ("iobuf", iobuf, out);
iobuf_arena = iobuf->iobuf_arena;
- if (!iobuf_arena)
+ if (!iobuf_arena) {
+ gf_log (THIS->name, GF_LOG_WARNING, "arena not found");
return;
+ }
iobuf_pool = iobuf_arena->iobuf_pool;
- if (!iobuf_pool)
+ if (!iobuf_pool) {
+ gf_log (THIS->name, GF_LOG_WARNING, "iobuf pool not found");
return;
+ }
pthread_mutex_lock (&iobuf_pool->mutex);
{
@@ -418,7 +720,8 @@ iobuf_put (struct iobuf *iobuf)
}
pthread_mutex_unlock (&iobuf_pool->mutex);
- iobuf_pool_prune (iobuf_pool);
+out:
+ return;
}
@@ -427,8 +730,7 @@ iobuf_unref (struct iobuf *iobuf)
{
int ref = 0;
- if (!iobuf)
- return;
+ GF_VALIDATE_OR_GOTO ("iobuf", iobuf, out);
LOCK (&iobuf->lock);
{
@@ -439,14 +741,16 @@ iobuf_unref (struct iobuf *iobuf)
if (!ref)
iobuf_put (iobuf);
+
+out:
+ return;
}
struct iobuf *
iobuf_ref (struct iobuf *iobuf)
{
- if (!iobuf)
- return NULL;
+ GF_VALIDATE_OR_GOTO ("iobuf", iobuf, out);
LOCK (&iobuf->lock);
{
@@ -454,6 +758,7 @@ iobuf_ref (struct iobuf *iobuf)
}
UNLOCK (&iobuf->lock);
+out:
return iobuf;
}
@@ -463,7 +768,8 @@ iobref_new ()
{
struct iobref *iobref = NULL;
- iobref = CALLOC (sizeof (*iobref), 1);
+ iobref = GF_CALLOC (sizeof (*iobref), 1,
+ gf_common_mt_iobref);
if (!iobref)
return NULL;
@@ -478,8 +784,7 @@ iobref_new ()
struct iobref *
iobref_ref (struct iobref *iobref)
{
- if (!iobref)
- return NULL;
+ GF_VALIDATE_OR_GOTO ("iobuf", iobref, out);
LOCK (&iobref->lock);
{
@@ -487,6 +792,7 @@ iobref_ref (struct iobref *iobref)
}
UNLOCK (&iobref->lock);
+out:
return iobref;
}
@@ -497,10 +803,9 @@ iobref_destroy (struct iobref *iobref)
int i = 0;
struct iobuf *iobuf = NULL;
- if (!iobref)
- return;
+ GF_VALIDATE_OR_GOTO ("iobuf", iobref, out);
- for (i = 0; i < 8; i++) {
+ for (i = 0; i < GF_IOBREF_IOBUF_COUNT; i++) {
iobuf = iobref->iobrefs[i];
iobref->iobrefs[i] = NULL;
@@ -508,7 +813,10 @@ iobref_destroy (struct iobref *iobref)
iobuf_unref (iobuf);
}
- FREE (iobref);
+ GF_FREE (iobref);
+
+out:
+ return;
}
@@ -517,8 +825,7 @@ iobref_unref (struct iobref *iobref)
{
int ref = 0;
- if (!iobref)
- return;
+ GF_VALIDATE_OR_GOTO ("iobuf", iobref, out);
LOCK (&iobref->lock);
{
@@ -528,6 +835,32 @@ iobref_unref (struct iobref *iobref)
if (!ref)
iobref_destroy (iobref);
+
+out:
+ return;
+}
+
+
+void
+iobref_clear (struct iobref *iobref)
+{
+ int i = 0;
+
+ GF_VALIDATE_OR_GOTO ("iobuf", iobref, out);
+
+ for (; i < GF_IOBREF_IOBUF_COUNT; i++) {
+ if (iobref->iobrefs[i] != NULL) {
+ iobuf_unref (iobref->iobrefs[i]);
+ } else {
+ /** iobuf's are attched serially */
+ break;
+ }
+ }
+
+ iobref_unref (iobref);
+
+ out:
+ return;
}
@@ -537,7 +870,10 @@ __iobref_add (struct iobref *iobref, struct iobuf *iobuf)
int i = 0;
int ret = -ENOMEM;
- for (i = 0; i < 8; i++) {
+ GF_VALIDATE_OR_GOTO ("iobuf", iobref, out);
+ GF_VALIDATE_OR_GOTO ("iobuf", iobuf, out);
+
+ for (i = 0; i < GF_IOBREF_IOBUF_COUNT; i++) {
if (iobref->iobrefs[i] == NULL) {
iobref->iobrefs[i] = iobuf_ref (iobuf);
ret = 0;
@@ -545,6 +881,7 @@ __iobref_add (struct iobref *iobref, struct iobuf *iobuf)
}
}
+out:
return ret;
}
@@ -552,13 +889,10 @@ __iobref_add (struct iobref *iobref, struct iobuf *iobuf)
int
iobref_add (struct iobref *iobref, struct iobuf *iobuf)
{
- int ret = 0;
-
- if (!iobref)
- return -EINVAL;
+ int ret = -EINVAL;
- if (!iobuf)
- return -EINVAL;
+ GF_VALIDATE_OR_GOTO ("iobuf", iobref, out);
+ GF_VALIDATE_OR_GOTO ("iobuf", iobuf, out);
LOCK (&iobref->lock);
{
@@ -566,6 +900,7 @@ iobref_add (struct iobref *iobref, struct iobuf *iobuf)
}
UNLOCK (&iobref->lock);
+out:
return ret;
}
@@ -574,12 +909,15 @@ int
iobref_merge (struct iobref *to, struct iobref *from)
{
int i = 0;
- int ret = 0;
+ int ret = -1;
struct iobuf *iobuf = NULL;
+ GF_VALIDATE_OR_GOTO ("iobuf", to, out);
+ GF_VALIDATE_OR_GOTO ("iobuf", from, out);
+
LOCK (&from->lock);
{
- for (i = 0; i < 8; i++) {
+ for (i = 0; i < GF_IOBREF_IOBUF_COUNT; i++) {
iobuf = from->iobrefs[i];
if (!iobuf)
@@ -593,6 +931,7 @@ iobref_merge (struct iobref *to, struct iobref *from)
}
UNLOCK (&from->lock);
+out:
return ret;
}
@@ -602,16 +941,19 @@ iobuf_size (struct iobuf *iobuf)
{
size_t size = 0;
- if (!iobuf)
- goto out;
+ GF_VALIDATE_OR_GOTO ("iobuf", iobuf, out);
- if (!iobuf->iobuf_arena)
+ if (!iobuf->iobuf_arena) {
+ gf_log (THIS->name, GF_LOG_WARNING, "arena not found");
goto out;
+ }
- if (!iobuf->iobuf_arena->iobuf_pool)
+ if (!iobuf->iobuf_arena->iobuf_pool) {
+ gf_log (THIS->name, GF_LOG_WARNING, "pool not found");
goto out;
+ }
- size = iobuf->iobuf_arena->iobuf_pool->page_size;
+ size = iobuf->iobuf_arena->page_size;
out:
return size;
}
@@ -623,120 +965,149 @@ iobref_size (struct iobref *iobref)
size_t size = 0;
int i = 0;
- if (!iobref)
- goto out;
+ GF_VALIDATE_OR_GOTO ("iobuf", iobref, out);
LOCK (&iobref->lock);
{
- for (i = 0; i < 8; i++) {
+ for (i = 0; i < GF_IOBREF_IOBUF_COUNT; i++) {
if (iobref->iobrefs[i])
size += iobuf_size (iobref->iobrefs[i]);
}
}
UNLOCK (&iobref->lock);
+
out:
return size;
}
-void
+void
iobuf_info_dump (struct iobuf *iobuf, const char *key_prefix)
{
char key[GF_DUMP_MAX_BUF_LEN];
struct iobuf my_iobuf;
int ret = 0;
- if (!iobuf)
- return;
+ GF_VALIDATE_OR_GOTO ("iobuf", iobuf, out);
memset(&my_iobuf, 0, sizeof(my_iobuf));
-
+
ret = TRY_LOCK(&iobuf->lock);
if (ret) {
- gf_log("", GF_LOG_WARNING, "Unable to dump iobuf"
- " errno: %d", errno);
return;
}
memcpy(&my_iobuf, iobuf, sizeof(my_iobuf));
UNLOCK(&iobuf->lock);
- gf_proc_dump_build_key(key, key_prefix,"ref");
+ gf_proc_dump_build_key(key, key_prefix,"ref");
gf_proc_dump_write(key, "%d", my_iobuf.ref);
- gf_proc_dump_build_key(key, key_prefix,"ptr");
+ gf_proc_dump_build_key(key, key_prefix,"ptr");
gf_proc_dump_write(key, "%p", my_iobuf.ptr);
+out:
+ return;
}
-void
+void
iobuf_arena_info_dump (struct iobuf_arena *iobuf_arena, const char *key_prefix)
{
- char key[GF_DUMP_MAX_BUF_LEN];
- int i = 1;
+ char key[GF_DUMP_MAX_BUF_LEN];
+ int i = 1;
struct iobuf *trav;
- if (!iobuf_arena)
- return;
+ GF_VALIDATE_OR_GOTO ("iobuf", iobuf_arena, out);
gf_proc_dump_build_key(key, key_prefix,"mem_base");
gf_proc_dump_write(key, "%p", iobuf_arena->mem_base);
- gf_proc_dump_build_key(key, key_prefix, "active_cnt");
+ gf_proc_dump_build_key(key, key_prefix, "active_cnt");
gf_proc_dump_write(key, "%d", iobuf_arena->active_cnt);
- gf_proc_dump_build_key(key, key_prefix, "passive_cnt");
+ gf_proc_dump_build_key(key, key_prefix, "passive_cnt");
gf_proc_dump_write(key, "%d", iobuf_arena->passive_cnt);
- list_for_each_entry (trav, &iobuf_arena->active.list, list) {
+ gf_proc_dump_build_key(key, key_prefix, "alloc_cnt");
+ gf_proc_dump_write(key, "%"PRIu64, iobuf_arena->alloc_cnt);
+ gf_proc_dump_build_key(key, key_prefix, "max_active");
+ gf_proc_dump_write(key, "%"PRIu64, iobuf_arena->max_active);
+ gf_proc_dump_build_key(key, key_prefix, "page_size");
+ gf_proc_dump_write(key, "%"PRIu64, iobuf_arena->page_size);
+ list_for_each_entry (trav, &iobuf_arena->active.list, list) {
gf_proc_dump_build_key(key, key_prefix,"active_iobuf.%d", i++);
gf_proc_dump_add_section(key);
iobuf_info_dump(trav, key);
}
- i = 1;
- list_for_each_entry (trav, &iobuf_arena->passive.list, list) {
- gf_proc_dump_build_key(key, key_prefix,
- "passive_iobuf.%d",i++);
- gf_proc_dump_add_section(key);
- iobuf_info_dump(trav, key);
- }
-
+out:
+ return;
}
void
iobuf_stats_dump (struct iobuf_pool *iobuf_pool)
{
-
char msg[1024];
- struct iobuf_arena *trav;
+ struct iobuf_arena *trav = NULL;
int i = 1;
+ int j = 0;
int ret = -1;
- if (!iobuf_pool)
- return;
+ GF_VALIDATE_OR_GOTO ("iobuf", iobuf_pool, out);
memset(msg, 0, sizeof(msg));
ret = pthread_mutex_trylock(&iobuf_pool->mutex);
if (ret) {
- gf_log("", GF_LOG_WARNING, "Unable to dump iobuf pool"
- " errno: %d", errno);
return;
}
gf_proc_dump_add_section("iobuf.global");
- gf_proc_dump_write("iobuf.global.iobuf_pool","%p", iobuf_pool);
- gf_proc_dump_write("iobuf.global.iobuf_pool.page_size", "%d",
- iobuf_pool->page_size);
- gf_proc_dump_write("iobuf.global.iobuf_pool.arena_size", "%d",
- iobuf_pool->arena_size);
- gf_proc_dump_write("iobuf.global.iobuf_pool.arena_cnt", "%d",
- iobuf_pool->arena_cnt);
-
- list_for_each_entry (trav, &iobuf_pool->arenas.list, list) {
- snprintf(msg, sizeof(msg), "iobuf.global.iobuf_pool.arena.%d",
- i);
- gf_proc_dump_add_section(msg);
- iobuf_arena_info_dump(trav,msg);
- i++;
- }
-
+ gf_proc_dump_write("iobuf_pool","%p", iobuf_pool);
+ gf_proc_dump_write("iobuf_pool.default_page_size", "%d",
+ iobuf_pool->default_page_size);
+ gf_proc_dump_write("iobuf_pool.arena_size", "%d",
+ iobuf_pool->arena_size);
+ gf_proc_dump_write("iobuf_pool.arena_cnt", "%d",
+ iobuf_pool->arena_cnt);
+ gf_proc_dump_write("iobuf_pool.request_misses", "%"PRId64,
+ iobuf_pool->request_misses);
+
+ for (j = 0; j < IOBUF_ARENA_MAX_INDEX; j++) {
+ list_for_each_entry (trav, &iobuf_pool->arenas[j], list) {
+ snprintf(msg, sizeof(msg),
+ "arena.%d", i);
+ gf_proc_dump_add_section(msg);
+ iobuf_arena_info_dump(trav,msg);
+ i++;
+ }
+ list_for_each_entry (trav, &iobuf_pool->purge[j], list) {
+ snprintf(msg, sizeof(msg),
+ "purge.%d", i);
+ gf_proc_dump_add_section(msg);
+ iobuf_arena_info_dump(trav,msg);
+ i++;
+ }
+ list_for_each_entry (trav, &iobuf_pool->filled[j], list) {
+ snprintf(msg, sizeof(msg),
+ "filled.%d", i);
+ gf_proc_dump_add_section(msg);
+ iobuf_arena_info_dump(trav,msg);
+ i++;
+ }
+
+ }
+
pthread_mutex_unlock(&iobuf_pool->mutex);
+out:
+ return;
+}
+
+
+void
+iobuf_to_iovec(struct iobuf *iob, struct iovec *iov)
+{
+ GF_VALIDATE_OR_GOTO ("iobuf", iob, out);
+ GF_VALIDATE_OR_GOTO ("iobuf", iov, out);
+
+ iov->iov_base = iobuf_ptr (iob);
+ iov->iov_len = iobuf_pagesize (iob);
+
+out:
return;
}
diff --git a/libglusterfs/src/iobuf.h b/libglusterfs/src/iobuf.h
index de95da324..5595309e1 100644
--- a/libglusterfs/src/iobuf.h
+++ b/libglusterfs/src/iobuf.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _IOBUF_H_
@@ -24,6 +15,10 @@
#include "common-utils.h"
#include <pthread.h>
#include <sys/mman.h>
+#include <sys/uio.h>
+
+#define GF_VARIABLE_IOBUF_COUNT 32
+#define GF_IOBREF_IOBUF_COUNT 16
/* Lets try to define the new anonymous mapping
* flag, in case the system is still using the
@@ -36,6 +31,11 @@
#define MAP_ANONYMOUS MAP_ANON
#endif
+#define GF_ALIGN_BUF(ptr,bound) ((void *)((unsigned long)(ptr + bound - 1) & \
+ (unsigned long)(~(bound - 1))))
+
+#define GF_IOBUF_ALIGN_SIZE 512
+
/* one allocatable unit for the consumers of the IOBUF API */
/* each unit hosts @page_size bytes of memory */
struct iobuf;
@@ -48,6 +48,10 @@ struct iobuf_arena;
/* expandable and contractable pool of memory, internally broken into arenas */
struct iobuf_pool;
+struct iobuf_init_config {
+ size_t pagesize;
+ int32_t num_pages;
+};
struct iobuf {
union {
@@ -63,6 +67,9 @@ struct iobuf {
int ref; /* 0 == passive, >0 == active */
void *ptr; /* usable memory region by the consumer */
+
+ void *free_ptr; /* in case of stdalloc, this is the
+ one to be freed */
};
@@ -74,6 +81,13 @@ struct iobuf_arena {
struct iobuf_arena *prev;
};
};
+
+ size_t page_size; /* size of all iobufs in this arena */
+ size_t arena_size; /* this is equal to
+ (iobuf_pool->arena_size / page_size)
+ * page_size */
+ size_t page_count;
+
struct iobuf_pool *iobuf_pool;
void *mem_base;
@@ -85,33 +99,50 @@ struct iobuf_arena {
int passive_cnt;
struct iobuf passive; /* head node iobuf
(unused by itself) */
+ uint64_t alloc_cnt; /* total allocs in this pool */
+ int max_active; /* max active buffers at a given time */
};
struct iobuf_pool {
pthread_mutex_t mutex;
- size_t page_size; /* size of all iobufs in this pool */
- size_t arena_size; /* size of memory region in arena */
+ size_t arena_size; /* size of memory region in
+ arena */
+ size_t default_page_size; /* default size of iobuf */
int arena_cnt;
- struct iobuf_arena arenas; /* head node arena
- (unused by itself) */
- struct iobuf_arena filled; /* arenas without free iobufs */
- struct iobuf_arena purge; /* arenas which can be purged */
-};
+ struct list_head arenas[GF_VARIABLE_IOBUF_COUNT];
+ /* array of arenas. Each element of the array is a list of arenas
+ holding iobufs of particular page_size */
+ struct list_head filled[GF_VARIABLE_IOBUF_COUNT];
+ /* array of arenas without free iobufs */
+ struct list_head purge[GF_VARIABLE_IOBUF_COUNT];
+ /* array of of arenas which can be purged */
+ uint64_t request_misses; /* mostly the requests for higher
+ value of iobufs */
+};
-struct iobuf_pool *iobuf_pool_new (size_t arena_size, size_t page_size);
+
+struct iobuf_pool *iobuf_pool_new (void);
+void iobuf_pool_destroy (struct iobuf_pool *iobuf_pool);
struct iobuf *iobuf_get (struct iobuf_pool *iobuf_pool);
void iobuf_unref (struct iobuf *iobuf);
+struct iobuf *iobuf_ref (struct iobuf *iobuf);
+void iobuf_pool_destroy (struct iobuf_pool *iobuf_pool);
+void iobuf_to_iovec(struct iobuf *iob, struct iovec *iov);
+
+#define iobuf_ptr(iob) ((iob)->ptr)
+#define iobpool_default_pagesize(iobpool) ((iobpool)->default_page_size)
+#define iobuf_pagesize(iob) (iob->iobuf_arena->page_size)
struct iobref {
gf_lock_t lock;
int ref;
- struct iobuf *iobrefs[8];
+ struct iobuf *iobrefs[GF_IOBREF_IOBUF_COUNT];
};
struct iobref *iobref_new ();
@@ -119,10 +150,12 @@ struct iobref *iobref_ref (struct iobref *iobref);
void iobref_unref (struct iobref *iobref);
int iobref_add (struct iobref *iobref, struct iobuf *iobuf);
int iobref_merge (struct iobref *to, struct iobref *from);
-
+void iobref_clear (struct iobref *iobref);
size_t iobuf_size (struct iobuf *iobuf);
size_t iobref_size (struct iobref *iobref);
void iobuf_stats_dump (struct iobuf_pool *iobuf_pool);
+struct iobuf *
+iobuf_get2 (struct iobuf_pool *iobuf_pool, size_t page_size);
#endif /* !_IOBUF_H_ */
diff --git a/libglusterfs/src/latency.c b/libglusterfs/src/latency.c
new file mode 100644
index 000000000..b22f72950
--- /dev/null
+++ b/libglusterfs/src/latency.c
@@ -0,0 +1,189 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+/*
+ * This file contains functions to support dumping of
+ * latencies of FOPs broken down by subvolumes.
+ */
+
+#include "glusterfs.h"
+#include "stack.h"
+#include "xlator.h"
+#include "common-utils.h"
+#include "statedump.h"
+
+
+void
+gf_set_fop_from_fn_pointer (call_frame_t *frame, struct xlator_fops *fops, void *fn)
+{
+ glusterfs_fop_t fop = -1;
+
+ if (fops->stat == *(fop_stat_t *)&fn)
+ fop = GF_FOP_STAT;
+ else if (fops->readlink == *(fop_readlink_t *)&fn)
+ fop = GF_FOP_READLINK;
+ else if (fops->mknod == *(fop_mknod_t *)&fn)
+ fop = GF_FOP_MKNOD;
+ else if (fops->mkdir == *(fop_mkdir_t *)&fn)
+ fop = GF_FOP_MKDIR;
+ else if (fops->unlink == *(fop_unlink_t *)&fn)
+ fop = GF_FOP_UNLINK;
+ else if (fops->rmdir == *(fop_rmdir_t *)&fn)
+ fop = GF_FOP_RMDIR;
+ else if (fops->symlink == *(fop_symlink_t *)&fn)
+ fop = GF_FOP_SYMLINK;
+ else if (fops->rename == *(fop_rename_t *)&fn)
+ fop = GF_FOP_RENAME;
+ else if (fops->link == *(fop_link_t *)&fn)
+ fop = GF_FOP_LINK;
+ else if (fops->truncate == *(fop_truncate_t *)&fn)
+ fop = GF_FOP_TRUNCATE;
+ else if (fops->open == *(fop_open_t *)&fn)
+ fop = GF_FOP_OPEN;
+ else if (fops->readv == *(fop_readv_t *)&fn)
+ fop = GF_FOP_READ;
+ else if (fops->writev == *(fop_writev_t *)&fn)
+ fop = GF_FOP_WRITE;
+ else if (fops->statfs == *(fop_statfs_t *)&fn)
+ fop = GF_FOP_STATFS;
+ else if (fops->flush == *(fop_flush_t *)&fn)
+ fop = GF_FOP_FLUSH;
+ else if (fops->fsync == *(fop_fsync_t *)&fn)
+ fop = GF_FOP_FSYNC;
+ else if (fops->setxattr == *(fop_setxattr_t *)&fn)
+ fop = GF_FOP_SETXATTR;
+ else if (fops->getxattr == *(fop_getxattr_t *)&fn)
+ fop = GF_FOP_GETXATTR;
+ else if (fops->removexattr == *(fop_removexattr_t *)&fn)
+ fop = GF_FOP_REMOVEXATTR;
+ else if (fops->opendir == *(fop_opendir_t *)&fn)
+ fop = GF_FOP_OPENDIR;
+ else if (fops->fsyncdir == *(fop_fsyncdir_t *)&fn)
+ fop = GF_FOP_FSYNCDIR;
+ else if (fops->access == *(fop_access_t *)&fn)
+ fop = GF_FOP_ACCESS;
+ else if (fops->create == *(fop_create_t *)&fn)
+ fop = GF_FOP_CREATE;
+ else if (fops->ftruncate == *(fop_ftruncate_t *)&fn)
+ fop = GF_FOP_FTRUNCATE;
+ else if (fops->fstat == *(fop_fstat_t *)&fn)
+ fop = GF_FOP_FSTAT;
+ else if (fops->lk == *(fop_lk_t *)&fn)
+ fop = GF_FOP_LK;
+ else if (fops->lookup == *(fop_lookup_t *)&fn)
+ fop = GF_FOP_LOOKUP;
+ else if (fops->readdir == *(fop_readdir_t *)&fn)
+ fop = GF_FOP_READDIR;
+ else if (fops->inodelk == *(fop_inodelk_t *)&fn)
+ fop = GF_FOP_INODELK;
+ else if (fops->finodelk == *(fop_finodelk_t *)&fn)
+ fop = GF_FOP_FINODELK;
+ else if (fops->entrylk == *(fop_entrylk_t *)&fn)
+ fop = GF_FOP_ENTRYLK;
+ else if (fops->fentrylk == *(fop_fentrylk_t *)&fn)
+ fop = GF_FOP_FENTRYLK;
+ else if (fops->xattrop == *(fop_xattrop_t *)&fn)
+ fop = GF_FOP_XATTROP;
+ else if (fops->fxattrop == *(fop_fxattrop_t *)&fn)
+ fop = GF_FOP_FXATTROP;
+ else if (fops->fgetxattr == *(fop_fgetxattr_t *)&fn)
+ fop = GF_FOP_FGETXATTR;
+ else if (fops->fsetxattr == *(fop_fsetxattr_t *)&fn)
+ fop = GF_FOP_FSETXATTR;
+ else if (fops->rchecksum == *(fop_rchecksum_t *)&fn)
+ fop = GF_FOP_RCHECKSUM;
+ else if (fops->setattr == *(fop_setattr_t *)&fn)
+ fop = GF_FOP_SETATTR;
+ else if (fops->fsetattr == *(fop_fsetattr_t *)&fn)
+ fop = GF_FOP_FSETATTR;
+ else if (fops->readdirp == *(fop_readdirp_t *)&fn)
+ fop = GF_FOP_READDIRP;
+ else if (fops->getspec == *(fop_getspec_t *)&fn)
+ fop = GF_FOP_GETSPEC;
+ else
+ fop = -1;
+
+ frame->op = fop;
+}
+
+
+void
+gf_update_latency (call_frame_t *frame)
+{
+ double elapsed;
+ struct timeval *begin, *end;
+
+ fop_latency_t *lat;
+
+ begin = &frame->begin;
+ end = &frame->end;
+
+ elapsed = (end->tv_sec - begin->tv_sec) * 1e6
+ + (end->tv_usec - begin->tv_usec);
+
+ lat = &frame->this->latencies[frame->op];
+
+ lat->total += elapsed;
+ lat->count++;
+ lat->mean = lat->mean + (elapsed - lat->mean) / lat->count;
+}
+
+void
+gf_latency_begin (call_frame_t *frame, void *fn)
+{
+ gf_set_fop_from_fn_pointer (frame, frame->this->fops, fn);
+
+ gettimeofday (&frame->begin, NULL);
+}
+
+
+void
+gf_latency_end (call_frame_t *frame)
+{
+ gettimeofday (&frame->end, NULL);
+
+ gf_update_latency (frame);
+}
+
+void
+gf_proc_dump_latency_info (xlator_t *xl)
+{
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+ char key[GF_DUMP_MAX_BUF_LEN];
+ int i;
+
+ snprintf (key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.latency", xl->name);
+ gf_proc_dump_add_section (key_prefix);
+
+ for (i = 0; i < GF_FOP_MAXVALUE; i++) {
+ gf_proc_dump_build_key (key, key_prefix,
+ (char *)gf_fop_list[i]);
+
+ gf_proc_dump_write (key, "%.03f,%"PRId64",%.03f",
+ xl->latencies[i].mean,
+ xl->latencies[i].count,
+ xl->latencies[i].total);
+ }
+
+ memset (xl->latencies, 0, sizeof (xl->latencies));
+}
+
+
+void
+gf_latency_toggle (int signum, glusterfs_ctx_t *ctx)
+{
+ if (ctx) {
+ ctx->measure_latency = !ctx->measure_latency;
+ gf_log ("[core]", GF_LOG_INFO,
+ "Latency measurement turned %s",
+ ctx->measure_latency ? "on" : "off");
+ }
+}
diff --git a/libglusterfs/src/latency.h b/libglusterfs/src/latency.h
new file mode 100644
index 000000000..81acbf484
--- /dev/null
+++ b/libglusterfs/src/latency.h
@@ -0,0 +1,28 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __LATENCY_H__
+#define __LATENCY_H__
+
+#include "glusterfs.h"
+
+typedef struct fop_latency {
+ uint64_t min; /* min time for the call (microseconds) */
+ uint64_t max; /* max time for the call (microseconds) */
+ double total; /* total time (microseconds) */
+ double std; /* standard deviation */
+ double mean; /* mean (microseconds) */
+ uint64_t count;
+} fop_latency_t;
+
+void
+gf_latency_toggle (int signum, glusterfs_ctx_t *ctx);
+
+#endif /* __LATENCY_H__ */
diff --git a/libglusterfs/src/list.h b/libglusterfs/src/list.h
index 9ef897fd9..392c22ceb 100644
--- a/libglusterfs/src/list.h
+++ b/libglusterfs/src/list.h
@@ -1,26 +1,16 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _LLIST_H
#define _LLIST_H
-
struct list_head {
struct list_head *next;
struct list_head *prev;
@@ -54,6 +44,31 @@ list_add_tail (struct list_head *new, struct list_head *head)
}
+/* This function will insert the element to the list in a order.
+ Order will be based on the compare function provided as a input.
+ If element to be inserted in ascending order compare should return:
+ 0: if both the arguments are equal
+ >0: if first argument is greater than second argument
+ <0: if first argument is less than second argument */
+static inline void
+list_add_order (struct list_head *new, struct list_head *head,
+ int (*compare)(struct list_head *, struct list_head *))
+{
+ struct list_head *pos = head->prev;
+
+ while ( pos != head ) {
+ if (compare(new, pos) >= 0)
+ break;
+
+ /* Iterate the list in the reverse order. This will have
+ better efficiency if the elements are inserted in the
+ ascending order */
+ pos = pos->prev;
+ }
+
+ list_add (new, pos);
+}
+
static inline void
list_del (struct list_head *old)
{
@@ -120,6 +135,7 @@ list_splice (struct list_head *list, struct list_head *head)
}
+/* Splice moves @list to the head of the list at @head. */
static inline void
list_splice_init (struct list_head *list, struct list_head *head)
{
@@ -131,11 +147,43 @@ list_splice_init (struct list_head *list, struct list_head *head)
}
+static inline void
+__list_append (struct list_head *list, struct list_head *head)
+{
+ (head->prev)->next = (list->next);
+ (list->next)->prev = (head->prev);
+ (head->prev) = (list->prev);
+ (list->prev)->next = head;
+}
+
+
+static inline void
+list_append (struct list_head *list, struct list_head *head)
+{
+ if (list_empty (list))
+ return;
+
+ __list_append (list, head);
+}
+
+
+/* Append moves @list to the end of @head */
+static inline void
+list_append_init (struct list_head *list, struct list_head *head)
+{
+ if (list_empty (list))
+ return;
+
+ __list_append (list, head);
+ INIT_LIST_HEAD (list);
+}
+
+
#define list_entry(ptr, type, member) \
((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
-#define list_for_each(pos, head) \
+#define list_for_each(pos, head) \
for (pos = (head)->next; pos != (head); pos = pos->next)
@@ -151,4 +199,16 @@ list_splice_init (struct list_head *list, struct list_head *head)
&pos->member != (head); \
pos = n, n = list_entry(n->member.next, typeof(*n), member))
+#define list_for_each_entry_reverse(pos, head, member) \
+ for (pos = list_entry((head)->prev, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = list_entry(pos->member.prev, typeof(*pos), member))
+
+
+#define list_for_each_entry_safe_reverse(pos, n, head, member) \
+ for (pos = list_entry((head)->prev, typeof(*pos), member), \
+ n = list_entry(pos->member.prev, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = n, n = list_entry(n->member.prev, typeof(*n), member))
+
#endif /* _LLIST_H */
diff --git a/libglusterfs/src/lkowner.h b/libglusterfs/src/lkowner.h
new file mode 100644
index 000000000..969d13e50
--- /dev/null
+++ b/libglusterfs/src/lkowner.h
@@ -0,0 +1,83 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _LK_OWNER_H
+#define _LK_OWNER_H
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#define GF_MAX_LOCK_OWNER_LEN 1024 /* 1kB as per NLM */
+
+/* 16strings-16strings-... */
+#define GF_LKOWNER_BUF_SIZE ((GF_MAX_LOCK_OWNER_LEN * 2) + \
+ (GF_MAX_LOCK_OWNER_LEN / 8))
+
+typedef struct gf_lkowner_ {
+ int len;
+ char data[GF_MAX_LOCK_OWNER_LEN];
+} gf_lkowner_t;
+
+
+/* LKOWNER to string functions */
+static inline void
+lkowner_unparse (gf_lkowner_t *lkowner, char *buf, int buf_len)
+{
+ int i = 0;
+ int j = 0;
+
+ for (i = 0; i < lkowner->len; i++) {
+ if (i && !(i % 8)) {
+ buf[j] = '-';
+ j++;
+ }
+ sprintf (&buf[j], "%02hhx", lkowner->data[i]);
+ j += 2;
+ if (j == buf_len)
+ break;
+ }
+ if (j < buf_len)
+ buf[j] = '\0';
+}
+
+static inline void
+set_lk_owner_from_ptr (gf_lkowner_t *lkowner, void *data)
+{
+ int i = 0;
+ int j = 0;
+
+ lkowner->len = sizeof (unsigned long);
+ for (i = 0, j = 0; i < lkowner->len; i++, j += 8) {
+ lkowner->data[i] = (char)((((unsigned long)data) >> j) & 0xff);
+ }
+}
+
+static inline void
+set_lk_owner_from_uint64 (gf_lkowner_t *lkowner, uint64_t data)
+{
+ int i = 0;
+ int j = 0;
+
+ lkowner->len = 8;
+ for (i = 0, j = 0; i < lkowner->len; i++, j += 8) {
+ lkowner->data[i] = (char)((data >> j) & 0xff);
+ }
+}
+
+/* Return true if the locks have the same owner */
+static inline int
+is_same_lkowner (gf_lkowner_t *l1, gf_lkowner_t *l2)
+{
+ return ((l1->len == l2->len) && !memcmp(l1->data, l2->data, l1->len));
+}
+
+#endif /* _LK_OWNER_H */
diff --git a/libglusterfs/src/locking.h b/libglusterfs/src/locking.h
index 3c3ea79c4..79c6992af 100644
--- a/libglusterfs/src/locking.h
+++ b/libglusterfs/src/locking.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _LOCKING_H
diff --git a/libglusterfs/src/logging.c b/libglusterfs/src/logging.c
index 7e1e56f87..5deb90cda 100644
--- a/libglusterfs/src/logging.c
+++ b/libglusterfs/src/logging.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
@@ -31,576 +22,1016 @@
#include <string.h>
#include <stdlib.h>
+#ifdef GF_USE_SYSLOG
+#include <libintl.h>
+#include <syslog.h>
+#include <sys/stat.h>
+#include "gf-error-codes.h"
+
+#define GF_JSON_MSG_LENGTH 8192
+#define GF_SYSLOG_CEE_FORMAT \
+ "@cee: {\"msg\": \"%s\", \"gf_code\": \"%u\", \"gf_message\": \"%s\"}"
+#define GF_LOG_CONTROL_FILE "/etc/glusterfs/logger.conf"
+#endif /* GF_USE_SYSLOG */
+
#include "xlator.h"
#include "logging.h"
#include "defaults.h"
+#include "glusterfs.h"
-static pthread_mutex_t logfile_mutex;
-static char *filename = NULL;
-static uint8_t logrotate = 0;
+#ifdef GF_LINUX_HOST_OS
+#include <syslog.h>
+#endif
-static FILE *logfile = NULL;
-static gf_loglevel_t loglevel = GF_LOG_MAX;
+#ifdef HAVE_BACKTRACE
+#include <execinfo.h>
+#endif
-gf_loglevel_t gf_log_loglevel; /* extern'd */
-FILE *gf_log_logfile;
+/* Ideally this should get moved to logging.h */
+struct _msg_queue {
+ struct list_head msgs;
+};
+struct _log_msg {
+ const char *msg;
+ struct list_head queue;
+};
-void
+void
gf_log_logrotate (int signum)
{
- logrotate = 1;
+ THIS->ctx->log.logrotate = 1;
}
+void
+gf_log_enable_syslog (void)
+{
+ THIS->ctx->log.gf_log_syslog = 1;
+}
-gf_loglevel_t
-gf_log_get_loglevel (void)
+void
+gf_log_disable_syslog (void)
{
- return loglevel;
+ THIS->ctx->log.gf_log_syslog = 0;
}
+gf_loglevel_t
+gf_log_get_loglevel (void)
+{
+ return THIS->ctx->log.loglevel;
+}
void
gf_log_set_loglevel (gf_loglevel_t level)
{
- gf_log_loglevel = loglevel = level;
+ THIS->ctx->log.loglevel = level;
}
-void
-gf_log_fini (void)
+gf_loglevel_t
+gf_log_get_xl_loglevel (void *this)
{
- pthread_mutex_destroy (&logfile_mutex);
+ xlator_t *xl = this;
+ if (!xl)
+ return 0;
+ return xl->loglevel;
}
+void
+gf_log_set_xl_loglevel (void *this, gf_loglevel_t level)
+{
+ xlator_t *xl = this;
+ if (!xl)
+ return;
+ xl->ctx->log.gf_log_xl_log_set = 1;
+ xl->loglevel = level;
+}
-int
-gf_log_init (const char *file)
+void
+gf_log_fini (void)
{
- if (!file){
- fprintf (stderr, "gf_log_init: no filename specified\n");
- return -1;
- }
-
- pthread_mutex_init (&logfile_mutex, NULL);
-
- filename = strdup (file);
- if (!filename) {
- fprintf (stderr, "gf_log_init: strdup error\n");
- return -1;
- }
-
- logfile = fopen (file, "a");
- if (!logfile){
- fprintf (stderr,
- "gf_log_init: failed to open logfile \"%s\" (%s)\n",
- file,
- strerror (errno));
- return -1;
- }
-
- gf_log_logfile = logfile;
-
- return 0;
+ pthread_mutex_destroy (&THIS->ctx->log.logfile_mutex);
}
-static int
-dummy_init (xlator_t *xl)
-{
- return 0;
+#ifdef GF_USE_SYSLOG
+/**
+ * gf_get_error_message -function to get error message for given error code
+ * @error_code: error code defined by log book
+ *
+ * @return: success: string
+ * failure: NULL
+ */
+const char *
+gf_get_error_message (int error_code) {
+ return _gf_get_message (error_code);
}
-static int
-gf_log_notify (xlator_t *this_xl, int event, void *data, ...)
+/**
+ * gf_openlog -function to open syslog specific to gluster based on
+ * existence of file /etc/glusterfs/logger.conf
+ * @ident: optional identification string similar to openlog()
+ * @option: optional value to option to openlog(). Passing -1 uses
+ * 'LOG_PID | LOG_NDELAY' as default
+ * @facility: optional facility code similar to openlog(). Passing -1
+ * uses LOG_DAEMON as default
+ *
+ * @return: void
+ */
+void
+gf_openlog (const char *ident, int option, int facility)
{
- int ret = 0;
-
- switch (event) {
- case GF_EVENT_CHILD_UP:
- break;
-
- case GF_EVENT_CHILD_DOWN:
- break;
-
- default:
- ret = default_notify (this_xl, event, data);
- break;
- }
-
- return ret;
+ int _option = option;
+ int _facility = facility;
+
+ if (-1 == _option) {
+ _option = LOG_PID | LOG_NDELAY;
+ }
+ if (-1 == _facility) {
+ _facility = LOG_LOCAL1;
+ }
+
+ setlocale(LC_ALL, "");
+ bindtextdomain("gluster", "/usr/share/locale");
+ textdomain("gluster");
+
+ openlog(ident, _option, _facility);
}
-/*
- * Get a dummy xlator for the purpose of central logging.
- * An xlator is needed because a transport cannot exist without
- * an xlator.
+/**
+ * _json_escape -function to convert string to json encoded string
+ * @str: input string
+ * @buf: buffer to store encoded string
+ * @len: length of @buf
+ *
+ * @return: success: last unprocessed character position by pointer in @str
+ * failure: NULL
+ *
+ * Internal function. Heavily inspired by _ul_str_escape() function in
+ * libumberlog
+ *
+ * Sample output:
+ * [1] str = "devel error"
+ * buf = "devel error"
+ * [2] str = "devel error"
+ * buf = "devel\terror"
+ * [3] str = "I/O error on "/tmp/foo" file"
+ * buf = "I/O error on \"/tmp/foo\" file"
+ * [4] str = "I/O erroron /tmp/bar file"
+ * buf = "I/O error\u001bon /tmp/bar file"
+ *
*/
-
-static xlator_t *
-__get_dummy_xlator (glusterfs_ctx_t *ctx, const char *remote_host,
- const char *transport, uint32_t remote_port)
+char *
+_json_escape(const char *str, char *buf, size_t len)
{
- volume_opt_list_t *vol_opt = NULL;
- xlator_t * trav = NULL;
-
- int ret = 0;
-
- xlator_t * top = NULL;
- xlator_t * trans = NULL;
- xlator_list_t * parent = NULL;
- xlator_list_t * tmp = NULL;
-
- top = CALLOC (1, sizeof (*top));
- if (!top)
- goto out;
+ static const unsigned char json_exceptions[UCHAR_MAX + 1] =
+ {
+ [0x01] = 1, [0x02] = 1, [0x03] = 1, [0x04] = 1,
+ [0x05] = 1, [0x06] = 1, [0x07] = 1, [0x08] = 1,
+ [0x09] = 1, [0x0a] = 1, [0x0b] = 1, [0x0c] = 1,
+ [0x0d] = 1, [0x0e] = 1, [0x0f] = 1, [0x10] = 1,
+ [0x11] = 1, [0x12] = 1, [0x13] = 1, [0x14] = 1,
+ [0x15] = 1, [0x16] = 1, [0x17] = 1, [0x18] = 1,
+ [0x19] = 1, [0x1a] = 1, [0x1b] = 1, [0x1c] = 1,
+ [0x1d] = 1, [0x1e] = 1, [0x1f] = 1,
+ ['\\'] = 1, ['"'] = 1
+ };
+ static const char json_hex_chars[16] = "0123456789abcdef";
+ unsigned char *p = NULL;
+ size_t pos = 0;
+
+ if (!str || !buf || len <= 0) {
+ return NULL;
+ }
- trans = CALLOC (1, sizeof (*trans));
- if (!trans)
- goto out;
-
- INIT_LIST_HEAD (&top->volume_options);
- INIT_LIST_HEAD (&trans->volume_options);
-
- top->name = "log-dummy";
- top->ctx = ctx;
- top->next = trans;
- top->init = dummy_init;
- top->notify = gf_log_notify;
- top->children = (void *) CALLOC (1, sizeof (*top->children));
-
- if (!top->children)
- goto out;
+ for (p = (unsigned char *)str;
+ *p && (pos + 1) < len;
+ p++)
+ {
+ if (json_exceptions[*p] == 0) {
+ buf[pos++] = *p;
+ continue;
+ }
- top->children->xlator = trans;
-
- trans->name = "log-transport";
- trans->ctx = ctx;
- trans->prev = top;
- trans->init = dummy_init;
- trans->notify = default_notify;
- trans->options = get_new_dict ();
-
- parent = CALLOC (1, sizeof(*parent));
-
- if (!parent)
- goto out;
+ if ((pos + 2) >= len) {
+ break;
+ }
- parent->xlator = top;
-
- if (trans->parents == NULL)
- trans->parents = parent;
- else {
- tmp = trans->parents;
- while (tmp->next)
- tmp = tmp->next;
- tmp->next = parent;
- }
-
- /* TODO: log on failure to set dict */
- if (remote_host) {
- ret = dict_set (trans->options, "remote-host",
- str_to_data ((char *)remote_host));
- }
-
- if (remote_port)
- ret = dict_set_uint32 (trans->options, "remote-port",
- remote_port);
-
- /*
- * 'option remote-subvolume <x>' is needed here even though
- * its not used
- */
- ret = dict_set_static_ptr (trans->options, "remote-subvolume",
- "brick");
- ret = dict_set_static_ptr (trans->options, "disable-handshake", "on");
- ret = dict_set_static_ptr (trans->options, "non-blocking-io", "off");
-
- if (transport) {
- char *transport_type = CALLOC (1, strlen (transport) + 10);
- ERR_ABORT (transport_type);
- strcpy(transport_type, transport);
-
- if (strchr (transport_type, ':'))
- *(strchr (transport_type, ':')) = '\0';
-
- ret = dict_set_dynstr (trans->options, "transport-type",
- transport_type);
- }
-
- xlator_set_type (trans, "protocol/client");
-
- trav = top;
- while (trav) {
- /* Get the first volume_option */
- if (!list_empty (&trav->volume_options)) {
- list_for_each_entry (vol_opt,
- &trav->volume_options, list)
- break;
- if ((ret =
- validate_xlator_volume_options (trav,
- vol_opt->given_opt)) < 0) {
- gf_log (trav->name, GF_LOG_ERROR,
- "validating translator failed");
- return NULL;
+ switch (*p)
+ {
+ case '\b':
+ buf[pos++] = '\\';
+ buf[pos++] = 'b';
+ break;
+ case '\n':
+ buf[pos++] = '\\';
+ buf[pos++] = 'n';
+ break;
+ case '\r':
+ buf[pos++] = '\\';
+ buf[pos++] = 'r';
+ break;
+ case '\t':
+ buf[pos++] = '\\';
+ buf[pos++] = 't';
+ break;
+ case '\\':
+ buf[pos++] = '\\';
+ buf[pos++] = '\\';
+ break;
+ case '"':
+ buf[pos++] = '\\';
+ buf[pos++] = '"';
+ break;
+ default:
+ if ((pos + 6) >= len) {
+ buf[pos] = '\0';
+ return (char *)p;
}
+ buf[pos++] = '\\';
+ buf[pos++] = 'u';
+ buf[pos++] = '0';
+ buf[pos++] = '0';
+ buf[pos++] = json_hex_chars[(*p) >> 4];
+ buf[pos++] = json_hex_chars[(*p) & 0xf];
+ break;
}
- trav = trav->next;
- }
-
- if (xlator_tree_init (top) != 0)
- return NULL;
+ }
-out:
- return top;
+ buf[pos] = '\0';
+ return (char *)p;
}
-/*
- * Initialize logging to a central server.
- * If successful, log messages will be written both to
- * the local file and to the remote server.
+/**
+ * gf_syslog -function to submit message to syslog specific to gluster
+ * @error_code: error code defined by log book
+ * @facility_priority: facility_priority of syslog()
+ * @format: optional format string to syslog()
+ *
+ * @return: void
*/
+void
+gf_syslog (int error_code, int facility_priority, char *format, ...)
+{
+ char *msg = NULL;
+ char json_msg[GF_JSON_MSG_LENGTH];
+ GF_UNUSED char *p = NULL;
+ const char *error_message = NULL;
+ char json_error_message[GF_JSON_MSG_LENGTH];
+ va_list ap;
+
+ error_message = gf_get_error_message (error_code);
+
+ va_start (ap, format);
+ if (format) {
+ vasprintf (&msg, format, ap);
+ p = _json_escape (msg, json_msg, GF_JSON_MSG_LENGTH);
+ if (error_message) {
+ p = _json_escape (error_message, json_error_message,
+ GF_JSON_MSG_LENGTH);
+ syslog (facility_priority, GF_SYSLOG_CEE_FORMAT,
+ json_msg, error_code, json_error_message);
+ } else {
+ /* ignore the error code because no error message for it
+ and use normal syslog */
+ syslog (facility_priority, "%s", msg);
+ }
+ free (msg);
+ } else {
+ if (error_message) {
+ /* no user message: treat error_message as msg */
+ syslog (facility_priority, GF_SYSLOG_CEE_FORMAT,
+ json_error_message, error_code,
+ json_error_message);
+ } else {
+ /* cannot produce log as neither error_message nor
+ msg available */
+ }
+ }
+ va_end (ap);
+}
+#endif /* GF_USE_SYSLOG */
-static xlator_t * logging_xl = NULL;
-static int __central_log_enabled = 0;
+void
+gf_log_globals_init (void *data)
+{
+ glusterfs_ctx_t *ctx = data;
-static pthread_t logging_thread;
+ pthread_mutex_init (&ctx->log.logfile_mutex, NULL);
-struct _msg_queue {
- struct list_head msgs;
-};
+ ctx->log.loglevel = GF_LOG_INFO;
+ ctx->log.gf_log_syslog = 1;
+ ctx->log.sys_log_level = GF_LOG_CRITICAL;
+
+#ifndef GF_USE_SYSLOG
+#ifdef GF_LINUX_HOST_OS
+ /* For the 'syslog' output. one can grep 'GlusterFS' in syslog
+ for serious logs */
+ openlog ("GlusterFS", LOG_PID, LOG_DAEMON);
+#endif
+#endif
+}
-static struct _msg_queue msg_queue;
+int
+gf_log_init (void *data, const char *file, const char *ident)
+{
+ glusterfs_ctx_t *ctx = NULL;
+ int fd = -1;
-static pthread_cond_t msg_cond;
-static pthread_mutex_t msg_cond_mutex;
-static pthread_mutex_t msg_queue_mutex;
+ ctx = data;
-struct _log_msg {
- const char *msg;
- struct list_head queue;
-};
+#if defined(GF_USE_SYSLOG)
+ {
+ /* use default ident and option */
+ /* TODO: make FACILITY configurable than LOG_DAEMON */
+ struct stat buf;
+
+ if (stat (GF_LOG_CONTROL_FILE, &buf) == 0) {
+ /* use syslog logging */
+ ctx->log.log_control_file_found = 1;
+ if (ident) {
+ /* we need to keep this value as */
+ /* syslog uses it on every logging */
+ ctx->log.ident = gf_strdup (ident);
+ gf_openlog (ctx->log.ident, -1, LOG_DAEMON);
+ } else {
+ gf_openlog (NULL, -1, LOG_DAEMON);
+ }
+ } else {
+ /* use old style logging */
+ ctx->log.log_control_file_found = 0;
+ }
+ }
+#endif
+ if (!file){
+ fprintf (stderr, "ERROR: no filename specified\n");
+ return -1;
+ }
-int32_t
-gf_log_central_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- struct _log_msg *msg = NULL;
+ if (strcmp (file, "-") == 0) {
+ ctx->log.gf_log_logfile = stderr;
+ ctx->log.logfile = stderr;
+ return 0;
+ }
- msg = (struct _log_msg *) cookie;
+ ctx->log.filename = gf_strdup (file);
+ if (!ctx->log.filename) {
+ fprintf (stderr, "ERROR: updating log-filename failed: %s\n",
+ strerror (errno));
+ return -1;
+ }
+
+ fd = open (file, O_CREAT | O_RDONLY, S_IRUSR | S_IWUSR);
+ if (fd < 0) {
+ fprintf (stderr, "ERROR: failed to create logfile \"%s\" (%s)\n",
+ file, strerror (errno));
+ return -1;
+ }
+ close (fd);
- FREE (msg->msg);
+ ctx->log.logfile = fopen (file, "a");
+ if (!ctx->log.logfile){
+ fprintf (stderr, "ERROR: failed to open logfile \"%s\" (%s)\n",
+ file, strerror (errno));
+ return -1;
+ }
- STACK_DESTROY (frame->root);
+ ctx->log.gf_log_logfile = ctx->log.logfile;
return 0;
}
+void
+set_sys_log_level (gf_loglevel_t level)
+{
+ THIS->ctx->log.sys_log_level = level;
+}
-void *
-logging_thread_loop (void *arg)
+int
+_gf_log_nomem (const char *domain, const char *file,
+ const char *function, int line, gf_loglevel_t level,
+ size_t size)
{
- struct _log_msg *msg;
+ const char *basename = NULL;
+ xlator_t *this = NULL;
+ struct timeval tv = {0,};
+ int ret = 0;
+ char msg[8092] = {0,};
+ char timestr[256] = {0,};
+ char callstr[4096] = {0,};
+ glusterfs_ctx_t *ctx = NULL;
+
+ this = THIS;
+ ctx = this->ctx;
+
+ if (ctx->log.gf_log_xl_log_set) {
+ if (this->loglevel && (level > this->loglevel))
+ goto out;
+ }
+ if (level > ctx->log.loglevel)
+ goto out;
- call_frame_t *frame = NULL;
+ static char *level_strings[] = {"", /* NONE */
+ "M", /* EMERGENCY */
+ "A", /* ALERT */
+ "C", /* CRITICAL */
+ "E", /* ERROR */
+ "W", /* WARNING */
+ "N", /* NOTICE */
+ "I", /* INFO */
+ "D", /* DEBUG */
+ "T", /* TRACE */
+ ""};
- while (1) {
- pthread_mutex_lock (&msg_cond_mutex);
- {
- pthread_cond_wait (&msg_cond, &msg_cond_mutex);
-
- while (!list_empty (&msg_queue.msgs)) {
- pthread_mutex_lock (&msg_queue_mutex);
- {
- msg = list_entry (msg_queue.msgs.next,
- struct _log_msg,
- queue);
-
- list_del_init (&msg->queue);
- }
- pthread_mutex_unlock (&msg_queue_mutex);
-
- frame = create_frame (logging_xl,
- logging_xl->ctx->pool);
-
- frame->local = logging_xl->private;
-
- STACK_WIND_COOKIE (frame, (void *) msg,
- gf_log_central_cbk,
- logging_xl->children->xlator,
- logging_xl->children->xlator->mops->log,
- msg->msg);
- }
+ if (!domain || !file || !function) {
+ fprintf (stderr,
+ "logging: %s:%s():%d: invalid argument\n",
+ __FILE__, __PRETTY_FUNCTION__, __LINE__);
+ return -1;
+ }
+ basename = strrchr (file, '/');
+ if (basename)
+ basename++;
+ else
+ basename = file;
+
+#if HAVE_BACKTRACE
+ /* Print 'calling function' */
+ do {
+ void *array[5];
+ char **callingfn = NULL;
+ size_t bt_size = 0;
+
+ bt_size = backtrace (array, 5);
+ if (bt_size)
+ callingfn = backtrace_symbols (&array[2], bt_size-2);
+ if (!callingfn)
+ break;
+
+ if (bt_size == 5)
+ snprintf (callstr, 4096, "(-->%s (-->%s (-->%s)))",
+ callingfn[2], callingfn[1], callingfn[0]);
+ if (bt_size == 4)
+ snprintf (callstr, 4096, "(-->%s (-->%s))",
+ callingfn[1], callingfn[0]);
+ if (bt_size == 3)
+ snprintf (callstr, 4096, "(-->%s)", callingfn[0]);
+
+ free (callingfn);
+ } while (0);
+#endif /* HAVE_BACKTRACE */
+
+#if defined(GF_USE_SYSLOG)
+ if (ctx->log.log_control_file_found)
+ {
+ int priority;
+ /* treat GF_LOG_TRACE and GF_LOG_NONE as LOG_DEBUG and
+ other level as is */
+ if (GF_LOG_TRACE == level || GF_LOG_NONE == level) {
+ priority = LOG_DEBUG;
+ } else {
+ priority = level - 1;
}
- pthread_mutex_unlock (&msg_cond_mutex);
+ gf_syslog (GF_ERR_DEV, priority,
+ "[%s:%d:%s] %s %s: no memory "
+ "available for size (%"GF_PRI_SIZET")",
+ basename, line, function, callstr, domain,
+ size);
+ goto out;
+ }
+#endif /* GF_USE_SYSLOG */
+ ret = gettimeofday (&tv, NULL);
+ if (-1 == ret)
+ goto out;
+ gf_time_fmt (timestr, sizeof timestr, tv.tv_sec, gf_timefmt_FT);
+ snprintf (timestr + strlen (timestr), sizeof timestr - strlen (timestr),
+ ".%"GF_PRI_SUSECONDS, tv.tv_usec);
+
+ ret = sprintf (msg, "[%s] %s [%s:%d:%s] %s %s: no memory "
+ "available for size (%"GF_PRI_SIZET")",
+ timestr, level_strings[level],
+ basename, line, function, callstr,
+ domain, size);
+ if (-1 == ret) {
+ goto out;
}
- return NULL;
-}
+ pthread_mutex_lock (&ctx->log.logfile_mutex);
+ {
+ if (ctx->log.logfile) {
+ fprintf (ctx->log.logfile, "%s\n", msg);
+ } else {
+ fprintf (stderr, "%s\n", msg);
+ }
+#ifdef GF_LINUX_HOST_OS
+ /* We want only serious log in 'syslog', not our debug
+ and trace logs */
+ if (ctx->log.gf_log_syslog && level &&
+ (level <= ctx->log.sys_log_level))
+ syslog ((level-1), "%s\n", msg);
+#endif
+ }
+
+ pthread_mutex_unlock (&ctx->log.logfile_mutex);
+out:
+ return ret;
+ }
int
-gf_log_central_init (glusterfs_ctx_t *ctx, const char *remote_host,
- const char *transport, uint32_t remote_port)
+_gf_log_callingfn (const char *domain, const char *file, const char *function,
+ int line, gf_loglevel_t level, const char *fmt, ...)
{
- logging_xl = __get_dummy_xlator (ctx, remote_host, transport,
- remote_port);
-
- if (!logging_xl) {
- goto out;
+ const char *basename = NULL;
+ xlator_t *this = NULL;
+ char *str1 = NULL;
+ char *str2 = NULL;
+ char *msg = NULL;
+ char timestr[256] = {0,};
+ char callstr[4096] = {0,};
+ struct timeval tv = {0,};
+ size_t len = 0;
+ int ret = 0;
+ va_list ap;
+ glusterfs_ctx_t *ctx = NULL;
+
+ this = THIS;
+ ctx = this->ctx;
+
+ if (ctx->log.gf_log_xl_log_set) {
+ if (this->loglevel && (level > this->loglevel))
+ goto out;
}
+ if (level > ctx->log.loglevel)
+ goto out;
- __central_log_enabled = 1;
+ static char *level_strings[] = {"", /* NONE */
+ "M", /* EMERGENCY */
+ "A", /* ALERT */
+ "C", /* CRITICAL */
+ "E", /* ERROR */
+ "W", /* WARNING */
+ "N", /* NOTICE */
+ "I", /* INFO */
+ "D", /* DEBUG */
+ "T", /* TRACE */
+ ""};
- INIT_LIST_HEAD (&msg_queue.msgs);
+ if (!domain || !file || !function || !fmt) {
+ fprintf (stderr,
+ "logging: %s:%s():%d: invalid argument\n",
+ __FILE__, __PRETTY_FUNCTION__, __LINE__);
+ return -1;
+ }
- pthread_cond_init (&msg_cond, NULL);
- pthread_mutex_init (&msg_cond_mutex, NULL);
- pthread_mutex_init (&msg_queue_mutex, NULL);
+ basename = strrchr (file, '/');
+ if (basename)
+ basename++;
+ else
+ basename = file;
+
+#if HAVE_BACKTRACE
+ /* Print 'calling function' */
+ do {
+ void *array[5];
+ char **callingfn = NULL;
+ size_t size = 0;
+
+ size = backtrace (array, 5);
+ if (size)
+ callingfn = backtrace_symbols (&array[2], size-2);
+ if (!callingfn)
+ break;
- pthread_create (&logging_thread, NULL, logging_thread_loop, NULL);
+ if (size == 5)
+ snprintf (callstr, 4096, "(-->%s (-->%s (-->%s)))",
+ callingfn[2], callingfn[1], callingfn[0]);
+ if (size == 4)
+ snprintf (callstr, 4096, "(-->%s (-->%s))",
+ callingfn[1], callingfn[0]);
+ if (size == 3)
+ snprintf (callstr, 4096, "(-->%s)", callingfn[0]);
+
+ free (callingfn);
+ } while (0);
+#endif /* HAVE_BACKTRACE */
+
+#if defined(GF_USE_SYSLOG)
+ if (ctx->log.log_control_file_found)
+ {
+ int priority;
+ /* treat GF_LOG_TRACE and GF_LOG_NONE as LOG_DEBUG and
+ other level as is */
+ if (GF_LOG_TRACE == level || GF_LOG_NONE == level) {
+ priority = LOG_DEBUG;
+ } else {
+ priority = level - 1;
+ }
-out:
- return 0;
-}
+ va_start (ap, fmt);
+ vasprintf (&str2, fmt, ap);
+ va_end (ap);
+ gf_syslog (GF_ERR_DEV, priority,
+ "[%s:%d:%s] %s %d-%s: %s",
+ basename, line, function,
+ callstr,
+ ((this->graph) ? this->graph->id:0), domain,
+ str2);
-int
-gf_log_central (const char *msg)
-{
- struct _log_msg *lm = NULL;
+ goto out;
+ }
+#endif /* GF_USE_SYSLOG */
+ ret = gettimeofday (&tv, NULL);
+ if (-1 == ret)
+ goto out;
+ va_start (ap, fmt);
+ gf_time_fmt (timestr, sizeof timestr, tv.tv_sec, gf_timefmt_FT);
+ snprintf (timestr + strlen (timestr), sizeof timestr - strlen (timestr),
+ ".%"GF_PRI_SUSECONDS, tv.tv_usec);
+
+ ret = gf_asprintf (&str1, "[%s] %s [%s:%d:%s] %s %d-%s: ",
+ timestr, level_strings[level],
+ basename, line, function, callstr,
+ ((this->graph) ? this->graph->id:0), domain);
+ if (-1 == ret) {
+ goto out;
+ }
- lm = CALLOC (1, sizeof (*lm));
-
- if (!lm)
+ ret = vasprintf (&str2, fmt, ap);
+ if (-1 == ret) {
goto out;
+ }
- INIT_LIST_HEAD (&lm->queue);
+ va_end (ap);
- lm->msg = strdup (msg);
+ len = strlen (str1);
+ msg = GF_MALLOC (len + strlen (str2) + 1, gf_common_mt_char);
- pthread_mutex_lock (&msg_queue_mutex);
- {
- list_add_tail (&lm->queue, &msg_queue.msgs);
- }
- pthread_mutex_unlock (&msg_queue_mutex);
-
- pthread_cond_signal (&msg_cond);
+ strcpy (msg, str1);
+ strcpy (msg + len, str2);
-out:
- return 0;
-}
+ pthread_mutex_lock (&ctx->log.logfile_mutex);
+ {
+ if (ctx->log.logfile) {
+ fprintf (ctx->log.logfile, "%s\n", msg);
+ } else {
+ fprintf (stderr, "%s\n", msg);
+ }
+#ifdef GF_LINUX_HOST_OS
+ /* We want only serious log in 'syslog', not our debug
+ and trace logs */
+ if (ctx->log.gf_log_syslog && level &&
+ (level <= ctx->log.sys_log_level))
+ syslog ((level-1), "%s\n", msg);
+#endif
+ }
-void
-gf_log_lock (void)
-{
- pthread_mutex_lock (&logfile_mutex);
-}
+ pthread_mutex_unlock (&ctx->log.logfile_mutex);
+out:
+ GF_FREE (msg);
-void
-gf_log_unlock (void)
-{
- pthread_mutex_unlock (&logfile_mutex);
-}
+ GF_FREE (str1);
+ FREE (str2);
-void
-gf_log_cleanup (void)
-{
- pthread_mutex_destroy (&logfile_mutex);
+ return ret;
}
-
int
_gf_log (const char *domain, const char *file, const char *function, int line,
- gf_loglevel_t level, const char *fmt, ...)
+ gf_loglevel_t level, const char *fmt, ...)
{
- const char *basename = NULL;
- FILE *new_logfile = NULL;
- va_list ap;
- time_t utime = 0;
- struct tm *tm = NULL;
- char timestr[256];
-
- char *str1, *str2, *msg;
- size_t len = 0;
- int ret = 0;
-
- static char *level_strings[] = {"", /* NONE */
- "C", /* CRITICAL */
- "E", /* ERROR */
- "W", /* WARNING */
- "N", /* NORMAL */
- "D", /* DEBUG */
+ const char *basename = NULL;
+ FILE *new_logfile = NULL;
+ va_list ap;
+ char timestr[256] = {0,};
+ struct timeval tv = {0,};
+ char *str1 = NULL;
+ char *str2 = NULL;
+ char *msg = NULL;
+ size_t len = 0;
+ int ret = 0;
+ int fd = -1;
+ xlator_t *this = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+
+ this = THIS;
+ ctx = this->ctx;
+
+ if (ctx->log.gf_log_xl_log_set) {
+ if (this->loglevel && (level > this->loglevel))
+ goto out;
+ }
+ if (level > ctx->log.loglevel)
+ goto out;
+
+ static char *level_strings[] = {"", /* NONE */
+ "M", /* EMERGENCY */
+ "A", /* ALERT */
+ "C", /* CRITICAL */
+ "E", /* ERROR */
+ "W", /* WARNING */
+ "N", /* NOTICE */
+ "I", /* INFO */
+ "D", /* DEBUG */
"T", /* TRACE */
- ""};
-
- if (!domain || !file || !function || !fmt) {
- fprintf (stderr,
- "logging: %s:%s():%d: invalid argument\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__);
- return -1;
- }
-
- if (!logfile) {
- fprintf (stderr, "no logfile set\n");
- return (-1);
- }
-
- if (logrotate) {
- logrotate = 0;
-
- new_logfile = fopen (filename, "a");
- if (!new_logfile) {
- gf_log ("logrotate", GF_LOG_CRITICAL,
- "failed to open logfile %s (%s)",
- filename, strerror (errno));
- goto log;
- }
-
- fclose (logfile);
- gf_log_logfile = logfile = new_logfile;
- }
+ ""};
-log:
- utime = time (NULL);
- tm = localtime (&utime);
-
- if (level > loglevel) {
- goto out;
- }
-
- pthread_mutex_lock (&logfile_mutex);
- {
- va_start (ap, fmt);
-
- strftime (timestr, 256, "%Y-%m-%d %H:%M:%S", tm);
-
- basename = strrchr (file, '/');
- if (basename)
- basename++;
- else
- basename = file;
-
- ret = asprintf (&str1, "[%s] %s [%s:%d:%s] %s: ",
- timestr, level_strings[level],
- basename, line, function,
- domain);
- if (-1 == ret) {
- goto out;
- }
+ if (!domain || !file || !function || !fmt) {
+ fprintf (stderr,
+ "logging: %s:%s():%d: invalid argument\n",
+ __FILE__, __PRETTY_FUNCTION__, __LINE__);
+ return -1;
+ }
- ret = vasprintf (&str2, fmt, ap);
- if (-1 == ret) {
- goto out;
+ basename = strrchr (file, '/');
+ if (basename)
+ basename++;
+ else
+ basename = file;
+
+#if defined(GF_USE_SYSLOG)
+ if (ctx->log.log_control_file_found)
+ {
+ int priority;
+ /* treat GF_LOG_TRACE and GF_LOG_NONE as LOG_DEBUG and
+ other level as is */
+ if (GF_LOG_TRACE == level || GF_LOG_NONE == level) {
+ priority = LOG_DEBUG;
+ } else {
+ priority = level - 1;
}
- va_end (ap);
+ va_start (ap, fmt);
+ vasprintf (&str2, fmt, ap);
+ va_end (ap);
- len = strlen (str1);
- msg = malloc (len + strlen (str2) + 1);
-
- strcpy (msg, str1);
- strcpy (msg + len, str2);
+ gf_syslog (GF_ERR_DEV, priority,
+ "[%s:%d:%s] %d-%s: %s",
+ basename, line, function,
+ ((this->graph) ? this->graph->id:0), domain, str2);
+ goto err;
+ }
+#endif /* GF_USE_SYSLOG */
- fprintf (logfile, "%s\n", msg);
- fflush (logfile);
- }
- pthread_mutex_unlock (&logfile_mutex);
+ if (ctx->log.logrotate) {
+ ctx->log.logrotate = 0;
- if (__central_log_enabled &&
- ((glusterfs_central_log_flag_get ()) == 0)) {
-
- glusterfs_central_log_flag_set ();
+ fd = open (ctx->log.filename,
+ O_CREAT | O_RDONLY, S_IRUSR | S_IWUSR);
+ if (fd < 0) {
+ gf_log ("logrotate", GF_LOG_ERROR,
+ "%s", strerror (errno));
+ return -1;
+ }
+ close (fd);
+
+ new_logfile = fopen (ctx->log.filename, "a");
+ if (!new_logfile) {
+ gf_log ("logrotate", GF_LOG_CRITICAL,
+ "failed to open logfile %s (%s)",
+ ctx->log.filename, strerror (errno));
+ goto log;
+ }
+
+ pthread_mutex_lock (&ctx->log.logfile_mutex);
{
- gf_log_central (msg);
+ if (ctx->log.logfile)
+ fclose (ctx->log.logfile);
+
+ ctx->log.gf_log_logfile = ctx->log.logfile = new_logfile;
}
- glusterfs_central_log_flag_unset ();
+ pthread_mutex_unlock (&ctx->log.logfile_mutex);
+
}
-
- FREE (msg);
- FREE (str1);
- FREE (str2);
+log:
+ ret = gettimeofday (&tv, NULL);
+ if (-1 == ret)
+ goto out;
+ va_start (ap, fmt);
+ gf_time_fmt (timestr, sizeof timestr, tv.tv_sec, gf_timefmt_FT);
+ snprintf (timestr + strlen (timestr), sizeof timestr - strlen (timestr),
+ ".%"GF_PRI_SUSECONDS, tv.tv_usec);
+
+ ret = gf_asprintf (&str1, "[%s] %s [%s:%d:%s] %d-%s: ",
+ timestr, level_strings[level],
+ basename, line, function,
+ ((this->graph)?this->graph->id:0), domain);
+ if (-1 == ret) {
+ goto err;
+ }
-out:
- return (0);
-}
+ ret = vasprintf (&str2, fmt, ap);
+ if (-1 == ret) {
+ goto err;
+ }
+ va_end (ap);
-struct _client_log {
- char *identifier;
- FILE *file;
- struct list_head list;
-};
+ len = strlen (str1);
+ msg = GF_MALLOC (len + strlen (str2) + 1, gf_common_mt_char);
+
+ strcpy (msg, str1);
+ strcpy (msg + len, str2);
+
+ pthread_mutex_lock (&ctx->log.logfile_mutex);
+ {
+
+ if (ctx->log.logfile) {
+ fprintf (ctx->log.logfile, "%s\n", msg);
+ fflush (ctx->log.logfile);
+ } else {
+ fprintf (stderr, "%s\n", msg);
+ fflush (stderr);
+ }
+
+#ifdef GF_LINUX_HOST_OS
+ /* We want only serious log in 'syslog', not our debug
+ and trace logs */
+ if (ctx->log.gf_log_syslog && level &&
+ (level <= ctx->log.sys_log_level))
+ syslog ((level-1), "%s\n", msg);
+#endif
+ }
+
+ pthread_mutex_unlock (&ctx->log.logfile_mutex);
-struct _client_log *client_logs = NULL;
+err:
+ GF_FREE (msg);
+ GF_FREE (str1);
-static void
-client_log_init (struct _client_log *cl, char *identifier)
+ FREE (str2);
+
+out:
+ return (0);
+}
+
+int
+_gf_log_eh (const char *function, const char *fmt, ...)
{
- int ret = 0;
- char *path = NULL;
+ int ret = -1;
+ va_list ap;
+ char *str1 = NULL;
+ char *str2 = NULL;
+ char *msg = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ ret = gf_asprintf (&str1, "[%d] %s: ",
+ ((this->graph)?this->graph->id:0),
+ function);
+ if (-1 == ret) {
+ goto out;
+ }
- cl->identifier = identifier;
+ va_start (ap, fmt);
- ret = asprintf (&path, "%s.client-%s", filename, identifier);
+ ret = vasprintf (&str2, fmt, ap);
if (-1 == ret) {
- return;
+ goto out;
}
- cl->file = fopen (path, "a");
- FREE (path);
-
- INIT_LIST_HEAD (&cl->list);
-}
+ va_end (ap);
+
+ msg = GF_MALLOC (strlen (str1) + strlen (str2) + 1, gf_common_mt_char);
+ if (!msg) {
+ ret = -1;
+ goto out;
+ }
+
+ strcpy (msg, str1);
+ strcat (msg, str2);
+
+ ret = eh_save_history (this->history, msg);
+
+out:
+ GF_FREE (str1);
+
+ /* Use FREE instead of GF_FREE since str2 was allocated by vasprintf */
+ if (str2)
+ FREE (str2);
+
+ return ret;
+}
-static FILE *
-__logfile_for_client (char *identifier)
+int
+gf_cmd_log_init (const char *filename)
{
- struct _client_log *client = NULL;
+ int fd = -1;
+ xlator_t *this = NULL;
+ glusterfs_ctx_t *ctx = NULL;
- if (!client_logs) {
- client = CALLOC (1, sizeof (*client));
- client_log_init (client, identifier);
+ this = THIS;
+ ctx = this->ctx;
- client_logs = client;
+ if (!filename){
+ gf_log (this->name, GF_LOG_CRITICAL, "gf_cmd_log_init: no "
+ "filename specified\n");
+ return -1;
}
- list_for_each_entry (client, &client_logs->list, list) {
- if (!strcmp (client->identifier, identifier))
- break;
+ ctx->log.cmd_log_filename = gf_strdup (filename);
+ if (!ctx->log.cmd_log_filename) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "gf_cmd_log_init: strdup error\n");
+ return -1;
+ }
+ /* close and reopen cmdlogfile for log rotate*/
+ if (ctx->log.cmdlogfile) {
+ fclose (ctx->log.cmdlogfile);
+ ctx->log.cmdlogfile = NULL;
+ }
+
+ fd = open (ctx->log.cmd_log_filename,
+ O_CREAT | O_RDONLY, S_IRUSR | S_IWUSR);
+ if (fd < 0) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "%s", strerror (errno));
+ return -1;
}
+ close (fd);
+
+ ctx->log.cmdlogfile = fopen (ctx->log.cmd_log_filename, "a");
+ if (!ctx->log.cmdlogfile){
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "gf_cmd_log_init: failed to open logfile \"%s\" "
+ "(%s)\n", ctx->log.cmd_log_filename, strerror (errno));
+ return -1;
+ }
+ return 0;
+}
- if (!client) {
- client = CALLOC (1, sizeof (*client));
+int
+gf_cmd_log (const char *domain, const char *fmt, ...)
+{
+ va_list ap;
+ char timestr[64];
+ struct timeval tv = {0,};
+ char *str1 = NULL;
+ char *str2 = NULL;
+ char *msg = NULL;
+ size_t len = 0;
+ int ret = 0;
+ glusterfs_ctx_t *ctx = NULL;
+
+ ctx = THIS->ctx;
+ if (!ctx->log.cmdlogfile)
+ return -1;
+
+
+ if (!domain || !fmt) {
+ gf_log ("glusterd", GF_LOG_TRACE,
+ "logging: invalid argument\n");
+ return -1;
+ }
- client_log_init (client, identifier);
+ ret = gettimeofday (&tv, NULL);
+ if (ret == -1)
+ goto out;
+ va_start (ap, fmt);
+ gf_time_fmt (timestr, sizeof timestr, tv.tv_sec, gf_timefmt_FT);
+ snprintf (timestr + strlen (timestr), 256 - strlen (timestr),
+ ".%"GF_PRI_SUSECONDS, tv.tv_usec);
+
+ ret = gf_asprintf (&str1, "[%s] %s : ",
+ timestr, domain);
+ if (ret == -1) {
+ goto out;
+ }
- list_add_tail (&client->list, &client_logs->list);
+ ret = vasprintf (&str2, fmt, ap);
+ if (ret == -1) {
+ goto out;
}
- return client->file;
-}
+ va_end (ap);
+ len = strlen (str1);
+ msg = GF_MALLOC (len + strlen (str2) + 1, gf_common_mt_char);
-int
-gf_log_from_client (const char *msg, char *identifier)
-{
- FILE *client_log = NULL;
+ strcpy (msg, str1);
+ strcpy (msg + len, str2);
- client_log = __logfile_for_client (identifier);
+ fprintf (ctx->log.cmdlogfile, "%s\n", msg);
+ fflush (ctx->log.cmdlogfile);
- fprintf (client_log, "%s\n", msg);
- fflush (client_log);
+out:
+ GF_FREE (msg);
- return 0;
+ GF_FREE (str1);
+
+ FREE (str2);
+
+ return (0);
}
diff --git a/libglusterfs/src/logging.h b/libglusterfs/src/logging.h
index 45b4618ae..cc806a767 100644
--- a/libglusterfs/src/logging.h
+++ b/libglusterfs/src/logging.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef __LOGGING_H__
#define __LOGGING_H__
@@ -27,103 +17,149 @@
#endif
#include <stdint.h>
-#include <stdio.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <pthread.h>
+
+#ifdef GF_DARWIN_HOST_OS
+#define GF_PRI_FSBLK "u"
+#define GF_PRI_DEV PRId32
+#define GF_PRI_NLINK PRIu16
+#define GF_PRI_SUSECONDS "06d"
+#else
+#define GF_PRI_FSBLK PRIu64
+#define GF_PRI_DEV PRIu64
+#define GF_PRI_NLINK PRIu32
+#define GF_PRI_SUSECONDS "06ld"
+#endif
+#define GF_PRI_BLKSIZE PRId32
+#define GF_PRI_SIZET "zu"
+
+
+#if 0
+/* Syslog definitions :-) */
+#define LOG_EMERG 0 /* system is unusable */
+#define LOG_ALERT 1 /* action must be taken immediately */
+#define LOG_CRIT 2 /* critical conditions */
+#define LOG_ERR 3 /* error conditions */
+#define LOG_WARNING 4 /* warning conditions */
+#define LOG_NOTICE 5 /* normal but significant condition */
+#define LOG_INFO 6 /* informational */
+#define LOG_DEBUG 7 /* debug-level messages */
+#endif
-#define GF_PRI_FSBLK PRId64
-#define GF_PRI_BLKSIZE "ld"
-#if GF_LINUX_HOST_OS
+typedef enum {
+ GF_LOG_NONE,
+ GF_LOG_EMERG,
+ GF_LOG_ALERT,
+ GF_LOG_CRITICAL, /* fatal errors */
+ GF_LOG_ERROR, /* major failures (not necessarily fatal) */
+ GF_LOG_WARNING, /* info about normal operation */
+ GF_LOG_NOTICE,
+ GF_LOG_INFO, /* Normal information */
+ GF_LOG_DEBUG, /* internal errors */
+ GF_LOG_TRACE, /* full trace of operation */
+} gf_loglevel_t;
-# if __WORDSIZE == 64
-# define GF_PRI_SIZET "lu"
-# define GF_PRI_NLINK "lu"
-# else
-# define GF_PRI_SIZET "u"
-# define GF_PRI_NLINK "u"
-# endif /* __WORDSIZE */
+#define DEFAULT_LOG_FILE_DIRECTORY DATADIR "/log/glusterfs"
+#define DEFAULT_LOG_LEVEL GF_LOG_INFO
-#elif GF_DARWIN_HOST_OS
+typedef struct gf_log_handle_ {
+ pthread_mutex_t logfile_mutex;
+ uint8_t logrotate;
+ gf_loglevel_t loglevel;
+ int gf_log_syslog;
+ gf_loglevel_t sys_log_level;
+ char gf_log_xl_log_set;
+ char *filename;
+ FILE *logfile;
+ FILE *gf_log_logfile;
+ char *cmd_log_filename;
+ FILE *cmdlogfile;
+#ifdef GF_USE_SYSLOG
+ int log_control_file_found;
+ char *ident;
+#endif /* GF_USE_SYSLOG */
-/* Noticed that size_t and ino_t are different on OSX, need to fix the warnings */
-# define GF_PRI_SIZET "lu"
-# define GF_PRI_NLINK "u"
+} gf_log_handle_t;
-# undef GF_PRI_FSBLK
-# define GF_PRI_FSBLK "u"
-
-# undef GF_PRI_BLKSIZE
-# define GF_PRI_BLKSIZE "u"
+void gf_log_globals_init (void *ctx);
+int gf_log_init (void *data, const char *filename, const char *ident);
-# if __DARWIN_64_BIT_INO_T == 0
-# error '64 bit ino_t is must for GlusterFS to work, Compile with "CFLAGS=-D__DARWIN_64_BIT_INO_T"'
-# endif /* __DARWIN_64_BIT_INO_T */
+void gf_log_logrotate (int signum);
-#else /* !LINUX && !DARWIN */
+void gf_log_cleanup (void);
-/* BSD and Solaris : Change as per testing there.. */
-# define GF_PRI_SIZET "lu"
-# define GF_PRI_NLINK "u"
+int _gf_log (const char *domain, const char *file,
+ const char *function, int32_t line, gf_loglevel_t level,
+ const char *fmt, ...)
+ __attribute__ ((__format__ (__printf__, 6, 7)));
+int _gf_log_callingfn (const char *domain, const char *file,
+ const char *function, int32_t line, gf_loglevel_t level,
+ const char *fmt, ...)
+ __attribute__ ((__format__ (__printf__, 6, 7)));
-#endif /* LINUX_OS */
+int _gf_log_nomem (const char *domain, const char *file,
+ const char *function, int line, gf_loglevel_t level,
+ size_t size);
-#define GF_PRI_DEV GF_PRI_FSBLK
+int _gf_log_eh (const char *function, const char *fmt, ...);
-typedef enum {
- GF_LOG_NONE,
- GF_LOG_CRITICAL, /* fatal errors */
- GF_LOG_ERROR, /* major failures (not necessarily fatal) */
- GF_LOG_WARNING, /* info about normal operation */
- GF_LOG_INFO, /* Normal information */
-#define GF_LOG_NORMAL GF_LOG_INFO
- GF_LOG_DEBUG, /* internal errors */
- GF_LOG_TRACE, /* full trace of operation */
-} gf_loglevel_t;
-#define GF_LOG_MAX GF_LOG_DEBUG
-extern gf_loglevel_t gf_log_loglevel;
+#define FMT_WARN(fmt...) do { if (0) printf (fmt); } while (0)
+
+#define gf_log(dom, levl, fmt...) do { \
+ FMT_WARN (fmt); \
+ _gf_log (dom, __FILE__, __FUNCTION__, __LINE__, \
+ levl, ##fmt); \
+ } while (0)
+
+#define gf_log_eh(fmt...) do { \
+ FMT_WARN (fmt); \
+ _gf_log_eh (__FUNCTION__, ##fmt); \
+ } while (0)
+
+#define gf_log_callingfn(dom, levl, fmt...) do { \
+ FMT_WARN (fmt); \
+ _gf_log_callingfn (dom, __FILE__, __FUNCTION__, __LINE__, \
+ levl, ##fmt); \
+ } while (0)
+
+
+/* No malloc or calloc should be called in this function */
+#define gf_log_nomem(dom, levl, size) do { \
+ _gf_log_nomem (dom, __FILE__, __FUNCTION__, __LINE__, \
+ levl, size); \
+ } while (0)
-#define gf_log(dom, levl, fmt...) do { \
- if (levl <= gf_log_loglevel) \
- _gf_log (dom, __FILE__, __FUNCTION__, __LINE__, \
- levl, ##fmt); \
- if (0) { \
- printf (fmt); \
- } \
-} while (0)
/* Log once in GF_UNIVERSAL_ANSWER times */
#define GF_LOG_OCCASIONALLY(var, args...) if (!(var++%GF_UNIVERSAL_ANSWER)) { \
- gf_log (args); \
+ gf_log (args); \
}
-
-void
-gf_log_logrotate (int signum);
-
-int gf_log_init (const char *filename);
-
-int
-_gf_log (const char *domain, const char *file, const char *function,
- int32_t line, gf_loglevel_t level, const char *fmt, ...);
+void gf_log_disable_syslog (void);
+void gf_log_enable_syslog (void);
+gf_loglevel_t gf_log_get_loglevel (void);
+void gf_log_set_loglevel (gf_loglevel_t level);
+gf_loglevel_t gf_log_get_xl_loglevel (void *xl);
+void gf_log_set_xl_loglevel (void *xl, gf_loglevel_t level);
-int
-gf_log_from_client (const char *msg, char *identifier);
+int gf_cmd_log (const char *domain, const char *fmt, ...)
+ __attribute__ ((__format__ (__printf__, 2, 3)));
-void gf_log_lock (void);
-void gf_log_unlock (void);
+int gf_cmd_log_init (const char *filename);
-gf_loglevel_t
-gf_log_get_loglevel (void);
-void
-gf_log_set_loglevel (gf_loglevel_t level);
+void set_sys_log_level (gf_loglevel_t level);
-#define GF_DEBUG(xl, format, args...) \
- gf_log ((xl)->name, GF_LOG_DEBUG, format, ##args)
-#define GF_INFO(xl, format, args...) \
- gf_log ((xl)->name, GF_LOG_INFO, format, ##args)
-#define GF_WARNING(xl, format, args...) \
- gf_log ((xl)->name, GF_LOG_WARNING, format, ##args)
-#define GF_ERROR(xl, format, args...) \
- gf_log ((xl)->name, GF_LOG_ERROR, format, ##args)
+#define GF_DEBUG(xl, format, args...) \
+ gf_log ((xl)->name, GF_LOG_DEBUG, format, ##args)
+#define GF_INFO(xl, format, args...) \
+ gf_log ((xl)->name, GF_LOG_INFO, format, ##args)
+#define GF_WARNING(xl, format, args...) \
+ gf_log ((xl)->name, GF_LOG_WARNING, format, ##args)
+#define GF_ERROR(xl, format, args...) \
+ gf_log ((xl)->name, GF_LOG_ERROR, format, ##args)
#endif /* __LOGGING_H__ */
diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c
index 3937fcc37..b901dd7a8 100644
--- a/libglusterfs/src/mem-pool.c
+++ b/libglusterfs/src/mem-pool.c
@@ -1,106 +1,401 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include "mem-pool.h"
#include "logging.h"
+#include "xlator.h"
#include <stdlib.h>
+#include <stdarg.h>
-
-#define GF_MEM_POOL_PAD_BOUNDARY (sizeof(struct list_head))
+#define GF_MEM_POOL_LIST_BOUNDARY (sizeof(struct list_head))
+#define GF_MEM_POOL_PTR (sizeof(struct mem_pool*))
+#define GF_MEM_POOL_PAD_BOUNDARY (GF_MEM_POOL_LIST_BOUNDARY + GF_MEM_POOL_PTR + sizeof(int))
#define mem_pool_chunkhead2ptr(head) ((head) + GF_MEM_POOL_PAD_BOUNDARY)
#define mem_pool_ptr2chunkhead(ptr) ((ptr) - GF_MEM_POOL_PAD_BOUNDARY)
+#define is_mem_chunk_in_use(ptr) (*ptr == 1)
+#define mem_pool_from_ptr(ptr) ((ptr) + GF_MEM_POOL_LIST_BOUNDARY)
+
+#define GF_MEM_HEADER_SIZE (4 + sizeof (size_t) + sizeof (xlator_t *) + 4 + 8)
+#define GF_MEM_TRAILER_SIZE 8
+
+#define GF_MEM_HEADER_MAGIC 0xCAFEBABE
+#define GF_MEM_TRAILER_MAGIC 0xBAADF00D
+
+#define GLUSTERFS_ENV_MEM_ACCT_STR "GLUSTERFS_DISABLE_MEM_ACCT"
+
+void
+gf_mem_acct_enable_set (void *data)
+{
+ glusterfs_ctx_t *ctx = NULL;
+
+ ctx = data;
+
+ GF_ASSERT (ctx);
+
+ ctx->mem_acct_enable = 1;
+
+ return;
+}
+
+void
+gf_mem_set_acct_info (xlator_t *xl, char **alloc_ptr,
+ size_t size, uint32_t type)
+{
+
+ char *ptr = NULL;
+
+ if (!alloc_ptr)
+ return;
+
+ ptr = (char *) (*alloc_ptr);
+
+ GF_ASSERT (xl != NULL);
+
+ GF_ASSERT (xl->mem_acct.rec != NULL);
+
+ GF_ASSERT (type <= xl->mem_acct.num_types);
+
+ LOCK(&xl->mem_acct.rec[type].lock);
+ {
+ xl->mem_acct.rec[type].size += size;
+ xl->mem_acct.rec[type].num_allocs++;
+ xl->mem_acct.rec[type].total_allocs++;
+ xl->mem_acct.rec[type].max_size =
+ max (xl->mem_acct.rec[type].max_size,
+ xl->mem_acct.rec[type].size);
+ xl->mem_acct.rec[type].max_num_allocs =
+ max (xl->mem_acct.rec[type].max_num_allocs,
+ xl->mem_acct.rec[type].num_allocs);
+ }
+ UNLOCK(&xl->mem_acct.rec[type].lock);
+
+ *(uint32_t *)(ptr) = type;
+ ptr = ptr + 4;
+ memcpy (ptr, &size, sizeof(size_t));
+ ptr += sizeof (size_t);
+ memcpy (ptr, &xl, sizeof(xlator_t *));
+ ptr += sizeof (xlator_t *);
+ *(uint32_t *)(ptr) = GF_MEM_HEADER_MAGIC;
+ ptr = ptr + 4;
+ ptr = ptr + 8; //padding
+ *(uint32_t *) (ptr + size) = GF_MEM_TRAILER_MAGIC;
+
+ *alloc_ptr = (void *)ptr;
+ return;
+}
+
+
+void *
+__gf_calloc (size_t nmemb, size_t size, uint32_t type)
+{
+ size_t tot_size = 0;
+ size_t req_size = 0;
+ char *ptr = NULL;
+ xlator_t *xl = NULL;
+
+ if (!THIS->ctx->mem_acct_enable)
+ return CALLOC (nmemb, size);
+
+ xl = THIS;
+
+ req_size = nmemb * size;
+ tot_size = req_size + GF_MEM_HEADER_SIZE + GF_MEM_TRAILER_SIZE;
+
+ ptr = calloc (1, tot_size);
+
+ if (!ptr) {
+ gf_log_nomem ("", GF_LOG_ALERT, tot_size);
+ return NULL;
+ }
+ gf_mem_set_acct_info (xl, &ptr, req_size, type);
+
+ return (void *)ptr;
+}
+
+void *
+__gf_malloc (size_t size, uint32_t type)
+{
+ size_t tot_size = 0;
+ char *ptr = NULL;
+ xlator_t *xl = NULL;
+
+ if (!THIS->ctx->mem_acct_enable)
+ return MALLOC (size);
+
+ xl = THIS;
+
+ tot_size = size + GF_MEM_HEADER_SIZE + GF_MEM_TRAILER_SIZE;
+
+ ptr = malloc (tot_size);
+ if (!ptr) {
+ gf_log_nomem ("", GF_LOG_ALERT, tot_size);
+ return NULL;
+ }
+ gf_mem_set_acct_info (xl, &ptr, size, type);
+
+ return (void *)ptr;
+}
+
+void *
+__gf_realloc (void *ptr, size_t size)
+{
+ size_t tot_size = 0;
+ char *orig_ptr = NULL;
+ xlator_t *xl = NULL;
+ uint32_t type = 0;
+
+ if (!THIS->ctx->mem_acct_enable)
+ return REALLOC (ptr, size);
+
+ tot_size = size + GF_MEM_HEADER_SIZE + GF_MEM_TRAILER_SIZE;
+
+ orig_ptr = (char *)ptr - 8 - 4;
+
+ GF_ASSERT (*(uint32_t *)orig_ptr == GF_MEM_HEADER_MAGIC);
+
+ orig_ptr = orig_ptr - sizeof(xlator_t *);
+ xl = *((xlator_t **)orig_ptr);
+
+ orig_ptr = (char *)ptr - GF_MEM_HEADER_SIZE;
+ type = *(uint32_t *)orig_ptr;
+
+ ptr = realloc (orig_ptr, tot_size);
+ if (!ptr) {
+ gf_log_nomem ("", GF_LOG_ALERT, tot_size);
+ return NULL;
+ }
+
+ gf_mem_set_acct_info (xl, (char **)&ptr, size, type);
+
+ return (void *)ptr;
+}
+
+int
+gf_vasprintf (char **string_ptr, const char *format, va_list arg)
+{
+ va_list arg_save;
+ char *str = NULL;
+ int size = 0;
+ int rv = 0;
+
+ if (!string_ptr || !format)
+ return -1;
+
+ va_copy (arg_save, arg);
+
+ size = vsnprintf (NULL, 0, format, arg);
+ size++;
+ str = GF_MALLOC (size, gf_common_mt_asprintf);
+ if (str == NULL) {
+ /* log is done in GF_MALLOC itself */
+ return -1;
+ }
+ rv = vsnprintf (str, size, format, arg_save);
+
+ *string_ptr = str;
+ return (rv);
+}
+
+int
+gf_asprintf (char **string_ptr, const char *format, ...)
+{
+ va_list arg;
+ int rv = 0;
+
+ va_start (arg, format);
+ rv = gf_vasprintf (string_ptr, format, arg);
+ va_end (arg);
+
+ return rv;
+}
+
+void
+__gf_free (void *free_ptr)
+{
+ size_t req_size = 0;
+ char *ptr = NULL;
+ uint32_t type = 0;
+ xlator_t *xl = NULL;
+
+ if (!THIS->ctx->mem_acct_enable) {
+ FREE (free_ptr);
+ return;
+ }
+
+ if (!free_ptr)
+ return;
+
+ ptr = (char *)free_ptr - 8 - 4;
+
+ //Possible corruption, assert here
+ GF_ASSERT (GF_MEM_HEADER_MAGIC == *(uint32_t *)ptr);
+
+ *(uint32_t *)ptr = 0;
+
+ ptr = ptr - sizeof(xlator_t *);
+ memcpy (&xl, ptr, sizeof(xlator_t *));
+
+ //gf_free expects xl to be available
+ GF_ASSERT (xl != NULL);
+
+ if (!xl->mem_acct.rec) {
+ ptr = (char *)free_ptr - GF_MEM_HEADER_SIZE;
+ goto free;
+ }
+
+
+ ptr = ptr - sizeof(size_t);
+ memcpy (&req_size, ptr, sizeof (size_t));
+ ptr = ptr - 4;
+ type = *(uint32_t *)ptr;
+
+ // This points to a memory overrun
+ GF_ASSERT (GF_MEM_TRAILER_MAGIC ==
+ *(uint32_t *)((char *)free_ptr + req_size));
+
+ *(uint32_t *) ((char *)free_ptr + req_size) = 0;
+
+ LOCK (&xl->mem_acct.rec[type].lock);
+ {
+ xl->mem_acct.rec[type].size -= req_size;
+ xl->mem_acct.rec[type].num_allocs--;
+ }
+ UNLOCK (&xl->mem_acct.rec[type].lock);
+free:
+ FREE (ptr);
+}
+
struct mem_pool *
mem_pool_new_fn (unsigned long sizeof_type,
- unsigned long count)
+ unsigned long count, char *name)
{
- struct mem_pool *mem_pool = NULL;
- unsigned long padded_sizeof_type = 0;
- void *pool = NULL;
- int i = 0;
- struct list_head *list = NULL;
-
- if (!sizeof_type || !count) {
- gf_log ("mem-pool", GF_LOG_ERROR, "invalid argument");
- return NULL;
- }
+ struct mem_pool *mem_pool = NULL;
+ unsigned long padded_sizeof_type = 0;
+ void *pool = NULL;
+ int i = 0;
+ int ret = 0;
+ struct list_head *list = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+
+ if (!sizeof_type || !count) {
+ gf_log_callingfn ("mem-pool", GF_LOG_ERROR, "invalid argument");
+ return NULL;
+ }
padded_sizeof_type = sizeof_type + GF_MEM_POOL_PAD_BOUNDARY;
-
- mem_pool = CALLOC (sizeof (*mem_pool), 1);
- if (!mem_pool)
- return NULL;
- LOCK_INIT (&mem_pool->lock);
- INIT_LIST_HEAD (&mem_pool->list);
+ mem_pool = GF_CALLOC (sizeof (*mem_pool), 1, gf_common_mt_mem_pool);
+ if (!mem_pool)
+ return NULL;
+
+ ret = gf_asprintf (&mem_pool->name, "%s:%s", THIS->name, name);
+ if (ret < 0)
+ return NULL;
+
+ if (!mem_pool->name) {
+ GF_FREE (mem_pool);
+ return NULL;
+ }
+
+ LOCK_INIT (&mem_pool->lock);
+ INIT_LIST_HEAD (&mem_pool->list);
+ INIT_LIST_HEAD (&mem_pool->global_list);
- mem_pool->padded_sizeof_type = padded_sizeof_type;
- mem_pool->cold_count = count;
+ mem_pool->padded_sizeof_type = padded_sizeof_type;
+ mem_pool->cold_count = count;
mem_pool->real_sizeof_type = sizeof_type;
- pool = CALLOC (count, padded_sizeof_type);
- if (!pool) {
- FREE (mem_pool);
- return NULL;
+ pool = GF_CALLOC (count, padded_sizeof_type, gf_common_mt_long);
+ if (!pool) {
+ GF_FREE (mem_pool->name);
+ GF_FREE (mem_pool);
+ return NULL;
}
- for (i = 0; i < count; i++) {
- list = pool + (i * (padded_sizeof_type));
- INIT_LIST_HEAD (list);
- list_add_tail (list, &mem_pool->list);
- }
+ for (i = 0; i < count; i++) {
+ list = pool + (i * (padded_sizeof_type));
+ INIT_LIST_HEAD (list);
+ list_add_tail (list, &mem_pool->list);
+ }
+
+ mem_pool->pool = pool;
+ mem_pool->pool_end = pool + (count * (padded_sizeof_type));
+
+ /* add this pool to the global list */
+ ctx = THIS->ctx;
+ if (!ctx)
+ goto out;
- mem_pool->pool = pool;
- mem_pool->pool_end = pool + (count * (padded_sizeof_type));
+ list_add (&mem_pool->global_list, &ctx->mempool_list);
- return mem_pool;
+out:
+ return mem_pool;
}
+void*
+mem_get0 (struct mem_pool *mem_pool)
+{
+ void *ptr = NULL;
+
+ if (!mem_pool) {
+ gf_log_callingfn ("mem-pool", GF_LOG_ERROR, "invalid argument");
+ return NULL;
+ }
+
+ ptr = mem_get(mem_pool);
+
+ if (ptr)
+ memset(ptr, 0, mem_pool->real_sizeof_type);
+
+ return ptr;
+}
void *
mem_get (struct mem_pool *mem_pool)
{
- struct list_head *list = NULL;
- void *ptr = NULL;
-
- if (!mem_pool) {
- gf_log ("mem-pool", GF_LOG_ERROR, "invalid argument");
- return NULL;
- }
-
- LOCK (&mem_pool->lock);
- {
- if (mem_pool->cold_count) {
- list = mem_pool->list.next;
- list_del (list);
-
- mem_pool->hot_count++;
- mem_pool->cold_count--;
-
- ptr = list;
+ struct list_head *list = NULL;
+ void *ptr = NULL;
+ int *in_use = NULL;
+ struct mem_pool **pool_ptr = NULL;
+
+ if (!mem_pool) {
+ gf_log_callingfn ("mem-pool", GF_LOG_ERROR, "invalid argument");
+ return NULL;
+ }
+
+ LOCK (&mem_pool->lock);
+ {
+ mem_pool->alloc_count++;
+ if (mem_pool->cold_count) {
+ list = mem_pool->list.next;
+ list_del (list);
+
+ mem_pool->hot_count++;
+ mem_pool->cold_count--;
+
+ if (mem_pool->max_alloc < mem_pool->hot_count)
+ mem_pool->max_alloc = mem_pool->hot_count;
+
+ ptr = list;
+ in_use = (ptr + GF_MEM_POOL_LIST_BOUNDARY +
+ GF_MEM_POOL_PTR);
+ *in_use = 1;
+
goto fwd_addr_out;
- }
+ }
/* This is a problem area. If we've run out of
* chunks in our slab above, we need to allocate
* enough memory to service this request.
- * The problem is, these indvidual chunks will fail
+ * The problem is, these individual chunks will fail
* the first address range check in __is_member. Now, since
* we're not allocating a full second slab, we wont have
* enough info perform the range check in __is_member.
@@ -117,73 +412,104 @@ mem_get (struct mem_pool *mem_pool)
* because it is too much work knowing that a better slab
* allocator is coming RSN.
*/
- ptr = MALLOC (mem_pool->real_sizeof_type);
+ mem_pool->pool_misses++;
+ mem_pool->curr_stdalloc++;
+ if (mem_pool->max_stdalloc < mem_pool->curr_stdalloc)
+ mem_pool->max_stdalloc = mem_pool->curr_stdalloc;
+ ptr = GF_CALLOC (1, mem_pool->padded_sizeof_type,
+ gf_common_mt_mem_pool);
/* Memory coming from the heap need not be transformed from a
* chunkhead to a usable pointer since it is not coming from
* the pool.
*/
- goto unlocked_out;
- }
+ }
fwd_addr_out:
+ pool_ptr = mem_pool_from_ptr (ptr);
+ *pool_ptr = (struct mem_pool *)mem_pool;
ptr = mem_pool_chunkhead2ptr (ptr);
-unlocked_out:
UNLOCK (&mem_pool->lock);
- return ptr;
+ return ptr;
}
static int
__is_member (struct mem_pool *pool, void *ptr)
{
- if (!pool || !ptr) {
- gf_log ("mem-pool", GF_LOG_ERROR, "invalid argument");
- return -1;
- }
-
- if (ptr < pool->pool || ptr >= pool->pool_end)
- return 0;
-
- if ((mem_pool_ptr2chunkhead (ptr) - pool->pool)
- % pool->padded_sizeof_type)
- return -1;
-
- return 1;
+ if (!pool || !ptr) {
+ gf_log_callingfn ("mem-pool", GF_LOG_ERROR, "invalid argument");
+ return -1;
+ }
+
+ if (ptr < pool->pool || ptr >= pool->pool_end)
+ return 0;
+
+ if ((mem_pool_ptr2chunkhead (ptr) - pool->pool)
+ % pool->padded_sizeof_type)
+ return -1;
+
+ return 1;
}
void
-mem_put (struct mem_pool *pool, void *ptr)
+mem_put (void *ptr)
{
- struct list_head *list = NULL;
-
- if (!pool || !ptr) {
- gf_log ("mem-pool", GF_LOG_ERROR, "invalid argument");
- return;
- }
-
- LOCK (&pool->lock);
- {
-
- switch (__is_member (pool, ptr))
- {
- case 1:
- list = mem_pool_ptr2chunkhead (ptr);
- pool->hot_count--;
- pool->cold_count++;
- list_add (list, &pool->list);
- break;
- case -1:
+ struct list_head *list = NULL;
+ int *in_use = NULL;
+ void *head = NULL;
+ struct mem_pool **tmp = NULL;
+ struct mem_pool *pool = NULL;
+
+ if (!ptr) {
+ gf_log_callingfn ("mem-pool", GF_LOG_ERROR, "invalid argument");
+ return;
+ }
+
+ list = head = mem_pool_ptr2chunkhead (ptr);
+ tmp = mem_pool_from_ptr (head);
+ if (!tmp) {
+ gf_log_callingfn ("mem-pool", GF_LOG_ERROR,
+ "ptr header is corrupted");
+ return;
+ }
+
+ pool = *tmp;
+ if (!pool) {
+ gf_log_callingfn ("mem-pool", GF_LOG_ERROR,
+ "mem-pool ptr is NULL");
+ return;
+ }
+ LOCK (&pool->lock);
+ {
+
+ switch (__is_member (pool, ptr))
+ {
+ case 1:
+ in_use = (head + GF_MEM_POOL_LIST_BOUNDARY +
+ GF_MEM_POOL_PTR);
+ if (!is_mem_chunk_in_use(in_use)) {
+ gf_log_callingfn ("mem-pool", GF_LOG_CRITICAL,
+ "mem_put called on freed ptr %p of mem "
+ "pool %p", ptr, pool);
+ break;
+ }
+ pool->hot_count--;
+ pool->cold_count++;
+ *in_use = 0;
+ list_add (list, &pool->list);
+ break;
+ case -1:
/* For some reason, the address given is within
* the address range of the mem-pool but does not align
* with the expected start of a chunk that includes
* the list headers also. Sounds like a problem in
* layers of clouds up above us. ;)
*/
- abort ();
- break;
- case 0:
+ abort ();
+ break;
+ case 0:
/* The address is outside the range of the mem-pool. We
* assume here that this address was allocated at a
* point when the mem-pool was out of chunks in mem_get
@@ -192,26 +518,32 @@ mem_put (struct mem_pool *pool, void *ptr)
* not have enough info to distinguish between the two
* situations.
*/
- FREE (ptr);
- break;
- default:
- /* log error */
- break;
- }
- }
- UNLOCK (&pool->lock);
+ pool->curr_stdalloc--;
+ GF_FREE (list);
+ break;
+ default:
+ /* log error */
+ break;
+ }
+ }
+ UNLOCK (&pool->lock);
}
-
void
mem_pool_destroy (struct mem_pool *pool)
{
if (!pool)
return;
+ gf_log (THIS->name, GF_LOG_INFO, "size=%lu max=%d total=%"PRIu64,
+ pool->padded_sizeof_type, pool->max_alloc, pool->alloc_count);
+
+ list_del (&pool->global_list);
+
LOCK_DESTROY (&pool->lock);
- FREE (pool->pool);
- FREE (pool);
+ GF_FREE (pool->name);
+ GF_FREE (pool->pool);
+ GF_FREE (pool);
return;
}
diff --git a/libglusterfs/src/mem-pool.h b/libglusterfs/src/mem-pool.h
index 152c17bb4..31f49f75c 100644
--- a/libglusterfs/src/mem-pool.h
+++ b/libglusterfs/src/mem-pool.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _MEM_POOL_H_
@@ -22,36 +13,184 @@
#include "list.h"
#include "locking.h"
+#include "logging.h"
+#include "mem-types.h"
+#include <stdlib.h>
+#include <inttypes.h>
+#include <string.h>
+#include <stdarg.h>
+
+
+struct mem_acct {
+ uint32_t num_types;
+ struct mem_acct_rec *rec;
+};
+
+struct mem_acct_rec {
+ size_t size;
+ size_t max_size;
+ uint32_t num_allocs;
+ uint32_t total_allocs;
+ uint32_t max_num_allocs;
+ gf_lock_t lock;
+};
+
+
+void *
+__gf_calloc (size_t cnt, size_t size, uint32_t type);
+
+void *
+__gf_malloc (size_t size, uint32_t type);
+
+void *
+__gf_realloc (void *ptr, size_t size);
+
+int
+gf_vasprintf (char **string_ptr, const char *format, va_list arg);
+
+int
+gf_asprintf (char **string_ptr, const char *format, ...);
+
+void
+__gf_free (void *ptr);
+
+
+static inline
+void* __gf_default_malloc (size_t size)
+{
+ void *ptr = NULL;
+
+ ptr = malloc (size);
+ if (!ptr)
+ gf_log_nomem ("", GF_LOG_ALERT, size);
+
+ return ptr;
+}
+
+static inline
+void* __gf_default_calloc (int cnt, size_t size)
+{
+ void *ptr = NULL;
+
+ ptr = calloc (cnt, size);
+ if (!ptr)
+ gf_log_nomem ("", GF_LOG_ALERT, (cnt * size));
+
+ return ptr;
+}
+
+static inline
+void* __gf_default_realloc (void *oldptr, size_t size)
+{
+ void *ptr = NULL;
+ ptr = realloc (oldptr, size);
+ if (!ptr)
+ gf_log_nomem ("", GF_LOG_ALERT, size);
-#define MALLOC(size) malloc(size)
-#define CALLOC(cnt,size) calloc(cnt,size)
+ return ptr;
+}
-#define FREE(ptr) \
- if (ptr != NULL) { \
- free ((void *)ptr); \
- ptr = (void *)0xeeeeeeee; \
- }
+#define MALLOC(size) __gf_default_malloc(size)
+#define CALLOC(cnt,size) __gf_default_calloc(cnt,size)
+#define REALLOC(ptr,size) __gf_default_realloc(ptr,size)
+
+#define FREE(ptr) \
+ if (ptr != NULL) { \
+ free ((void *)ptr); \
+ ptr = (void *)0xeeeeeeee; \
+ }
+
+#define GF_CALLOC(nmemb, size, type) __gf_calloc (nmemb, size, type)
+
+#define GF_MALLOC(size, type) __gf_malloc (size, type)
+
+#define GF_REALLOC(ptr, size) __gf_realloc (ptr, size)
+
+#define GF_FREE(free_ptr) __gf_free (free_ptr)
+
+static inline
+char *gf_strndup (const char *src, size_t len)
+{
+ char *dup_str = NULL;
+
+ if (!src) {
+ goto out;
+ }
+
+ dup_str = GF_CALLOC (1, len + 1, gf_common_mt_strdup);
+ if (!dup_str) {
+ goto out;
+ }
+
+ memcpy (dup_str, src, len);
+out:
+ return dup_str;
+}
+
+static inline
+char * gf_strdup (const char *src)
+{
+
+ char *dup_str = NULL;
+ size_t len = 0;
+
+ len = strlen (src) + 1;
+
+ dup_str = GF_CALLOC(1, len, gf_common_mt_strdup);
+
+ if (!dup_str)
+ return NULL;
+
+ memcpy (dup_str, src, len);
+
+ return dup_str;
+}
+
+static inline void *
+gf_memdup (const void *src, size_t size)
+{
+ void *dup_mem = NULL;
+
+ dup_mem = GF_CALLOC(1, size, gf_common_mt_strdup);
+ if (!dup_mem)
+ goto out;
+
+ memcpy (dup_mem, src, size);
+
+out:
+ return dup_mem;
+}
struct mem_pool {
- struct list_head list;
- int hot_count;
- int cold_count;
- gf_lock_t lock;
- unsigned long padded_sizeof_type;
- void *pool;
- void *pool_end;
+ struct list_head list;
+ int hot_count;
+ int cold_count;
+ gf_lock_t lock;
+ unsigned long padded_sizeof_type;
+ void *pool;
+ void *pool_end;
int real_sizeof_type;
+ uint64_t alloc_count;
+ uint64_t pool_misses;
+ int max_alloc;
+ int curr_stdalloc;
+ int max_stdalloc;
+ char *name;
+ struct list_head global_list;
};
struct mem_pool *
-mem_pool_new_fn (unsigned long sizeof_type, unsigned long count);
+mem_pool_new_fn (unsigned long sizeof_type, unsigned long count, char *name);
-#define mem_pool_new(type,count) mem_pool_new_fn (sizeof(type), count)
+#define mem_pool_new(type,count) mem_pool_new_fn (sizeof(type), count, #type)
-void mem_put (struct mem_pool *pool, void *ptr);
+void mem_put (void *ptr);
void *mem_get (struct mem_pool *pool);
+void *mem_get0 (struct mem_pool *pool);
void mem_pool_destroy (struct mem_pool *pool);
+void gf_mem_acct_enable_set (void *ctx);
+
#endif /* _MEM_POOL_H */
diff --git a/libglusterfs/src/mem-types.h b/libglusterfs/src/mem-types.h
new file mode 100644
index 000000000..666bd120a
--- /dev/null
+++ b/libglusterfs/src/mem-types.h
@@ -0,0 +1,125 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __MEM_TYPES_H__
+#define __MEM_TYPES_H__
+
+
+enum gf_common_mem_types_ {
+ gf_common_mt_call_stub_t = 0,
+ gf_common_mt_dnscache6 = 1,
+ gf_common_mt_data_pair_t = 2,
+ gf_common_mt_data_t = 3,
+ gf_common_mt_dict_t = 4,
+ gf_common_mt_event_pool = 5,
+ gf_common_mt_reg = 6,
+ gf_common_mt_pollfd = 7,
+ gf_common_mt_epoll_event = 8,
+ gf_common_mt_fdentry_t = 9,
+ gf_common_mt_fdtable_t = 10,
+ gf_common_mt_fd_t = 11,
+ gf_common_mt_fd_ctx = 12,
+ gf_common_mt_gf_dirent_t = 13,
+ gf_common_mt_glusterfs_ctx_t = 14,
+ gf_common_mt_dentry_t = 15,
+ gf_common_mt_inode_t = 16,
+ gf_common_mt_inode_ctx = 17,
+ gf_common_mt_list_head = 18,
+ gf_common_mt_inode_table_t = 19,
+ gf_common_mt_xlator_t = 20,
+ gf_common_mt_xlator_list_t = 21,
+ gf_common_mt_log_msg = 22,
+ gf_common_mt_client_log = 23,
+ gf_common_mt_volume_opt_list_t = 24,
+ gf_common_mt_gf_hdr_common_t = 25,
+ gf_common_mt_call_frame_t = 26,
+ gf_common_mt_call_stack_t = 27,
+ gf_common_mt_gf_timer_t = 28,
+ gf_common_mt_gf_timer_registry_t = 29,
+ gf_common_mt_transport = 30,
+ gf_common_mt_transport_msg = 31,
+ gf_common_mt_auth_handle_t = 32,
+ gf_common_mt_iobuf = 33,
+ gf_common_mt_iobuf_arena = 34,
+ gf_common_mt_iobref = 35,
+ gf_common_mt_iobuf_pool = 36,
+ gf_common_mt_iovec = 37,
+ gf_common_mt_memdup = 38,
+ gf_common_mt_asprintf = 39,
+ gf_common_mt_strdup = 40,
+ gf_common_mt_socket_private_t = 41,
+ gf_common_mt_ioq = 42,
+ gf_common_mt_transport_t = 43,
+ gf_common_mt_socket_local_t = 44,
+ gf_common_mt_char = 45,
+ gf_common_mt_rbthash_table_t = 46,
+ gf_common_mt_rbthash_bucket = 47,
+ gf_common_mt_mem_pool = 48,
+ gf_common_mt_long = 49,
+ gf_common_mt_rpcsvc_auth_list = 50,
+ gf_common_mt_rpcsvc_t = 51,
+ gf_common_mt_rpcsvc_conn_t = 52,
+ gf_common_mt_rpcsvc_program_t = 53,
+ gf_common_mt_rpcsvc_listener_t = 54,
+ gf_common_mt_rpcsvc_wrapper_t = 55,
+ gf_common_mt_rpcsvc_stage_t = 56,
+ gf_common_mt_rpcclnt_t = 57,
+ gf_common_mt_rpcclnt_savedframe_t = 58,
+ gf_common_mt_rpc_trans_t = 59,
+ gf_common_mt_rpc_trans_pollin_t = 60,
+ gf_common_mt_rpc_trans_handover_t = 61,
+ gf_common_mt_rpc_trans_reqinfo_t = 62,
+ gf_common_mt_rpc_trans_rsp_t = 63,
+ gf_common_mt_glusterfs_graph_t = 64,
+ gf_common_mt_rdma_private_t = 65,
+ gf_common_mt_rdma_ioq_t = 66,
+ gf_common_mt_rpc_transport_t = 67,
+ gf_common_mt_rdma_local_t = 68,
+ gf_common_mt_rdma_post_t = 69,
+ gf_common_mt_qpent = 70,
+ gf_common_mt_rdma_device_t = 71,
+ gf_common_mt_rdma_context_t = 72,
+ gf_common_mt_sge = 73,
+ gf_common_mt_rpcclnt_cb_program_t = 74,
+ gf_common_mt_libxl_marker_local = 75,
+ gf_common_mt_graph_buf = 76,
+ gf_common_mt_trie_trie = 77,
+ gf_common_mt_trie_data = 78,
+ gf_common_mt_trie_node = 79,
+ gf_common_mt_trie_buf = 80,
+ gf_common_mt_trie_end = 81,
+ gf_common_mt_run_argv = 82,
+ gf_common_mt_run_logbuf = 83,
+ gf_common_mt_fd_lk_ctx_t = 84,
+ gf_common_mt_fd_lk_ctx_node_t = 85,
+ gf_common_mt_buffer_t = 86,
+ gf_common_mt_circular_buffer_t = 87,
+ gf_common_mt_eh_t = 88,
+ gf_common_mt_store_handle_t = 89,
+ gf_common_mt_store_iter_t = 90,
+ gf_common_mt_drc_client_t = 91,
+ gf_common_mt_drc_globals_t = 92,
+ gf_common_mt_drc_rbtree_node_t = 93,
+ gf_common_mt_iov_base_t = 94,
+ gf_common_mt_groups_t = 95,
+ gf_common_mt_cliententry_t = 96,
+ gf_common_mt_clienttable_t = 97,
+ gf_common_mt_client_t = 98,
+ gf_common_mt_client_ctx = 99,
+ gf_common_mt_lock_table = 100,
+ gf_common_mt_locker = 101,
+ gf_common_mt_auxgids = 102,
+ gf_common_mt_syncopctx = 103,
+ gf_common_mt_uuid_t = 104,
+ gf_common_mt_mgmt_v3_lock_obj_t = 105,
+ gf_common_mt_txn_opinfo_obj_t = 106,
+ gf_common_mt_end = 107
+};
+#endif
diff --git a/libglusterfs/src/options.c b/libglusterfs/src/options.c
new file mode 100644
index 000000000..842b6413a
--- /dev/null
+++ b/libglusterfs/src/options.c
@@ -0,0 +1,1127 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <fnmatch.h>
+
+#include "xlator.h"
+#include "defaults.h"
+
+#define GF_OPTION_LIST_EMPTY(_opt) (_opt->value[0] == NULL)
+
+
+static int
+xlator_option_validate_path (xlator_t *xl, const char *key, const char *value,
+ volume_option_t *opt, char **op_errstr)
+{
+ int ret = -1;
+ char errstr[256];
+
+ if (strstr (value, "../")) {
+ snprintf (errstr, 256,
+ "invalid path given '%s'",
+ value);
+ gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
+ goto out;
+ }
+
+ /* Make sure the given path is valid */
+ if (value[0] != '/') {
+ snprintf (errstr, 256,
+ "option %s %s: '%s' is not an "
+ "absolute path name",
+ key, value, value);
+ gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret && op_errstr)
+ *op_errstr = gf_strdup (errstr);
+ return ret;
+}
+
+static int
+xlator_option_validate_int (xlator_t *xl, const char *key, const char *value,
+ volume_option_t *opt, char **op_errstr)
+{
+ long long inputll = 0;
+ int ret = -1;
+ char errstr[256];
+
+ /* Check the range */
+ if (gf_string2longlong (value, &inputll) != 0) {
+ snprintf (errstr, 256,
+ "invalid number format \"%s\" in option \"%s\"",
+ value, key);
+ gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
+ goto out;
+ }
+
+ if ((opt->min == 0) && (opt->max == 0) &&
+ (opt->validate == GF_OPT_VALIDATE_BOTH)) {
+ gf_log (xl->name, GF_LOG_TRACE,
+ "no range check required for 'option %s %s'",
+ key, value);
+ ret = 0;
+ goto out;
+ }
+
+ if ((opt->validate == GF_OPT_VALIDATE_MIN)) {
+ if (inputll < opt->min) {
+ snprintf (errstr, 256,
+ "'%lld' in 'option %s %s' is smaller than "
+ "minimum value '%.0f'", inputll, key,
+ value, opt->min);
+ gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
+ goto out;
+ }
+ } else if ((opt->validate == GF_OPT_VALIDATE_MAX)) {
+ if ((inputll > opt->max)) {
+ snprintf (errstr, 256,
+ "'%lld' in 'option %s %s' is greater than "
+ "maximum value '%.0f'", inputll, key,
+ value, opt->max);
+ gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
+ goto out;
+ }
+ } else if ((inputll < opt->min) || (inputll > opt->max)) {
+ snprintf (errstr, 256,
+ "'%lld' in 'option %s %s' is out of range "
+ "[%.0f - %.0f]",
+ inputll, key, value, opt->min, opt->max);
+ gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret && op_errstr)
+ *op_errstr = gf_strdup (errstr);
+ return ret;
+}
+
+
+static int
+xlator_option_validate_sizet (xlator_t *xl, const char *key, const char *value,
+ volume_option_t *opt, char **op_errstr)
+{
+ uint64_t size = 0;
+ int ret = 0;
+ char errstr[256];
+
+ /* Check the range */
+ if (gf_string2bytesize (value, &size) != 0) {
+ snprintf (errstr, 256,
+ "invalid number format \"%s\" in option \"%s\"",
+ value, key);
+ gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
+ ret = -1;
+ goto out;
+ }
+
+ if ((opt->min == 0) && (opt->max == 0)) {
+ gf_log (xl->name, GF_LOG_TRACE,
+ "no range check required for 'option %s %s'",
+ key, value);
+ goto out;
+ }
+
+ if ((size < opt->min) || (size > opt->max)) {
+ if ((strncmp (key, "cache-size", 10) == 0) &&
+ (size > opt->max)) {
+ snprintf (errstr, 256, "Cache size %"PRId64" is out of "
+ "range [%.0f - %.0f]",
+ size, opt->min, opt->max);
+ gf_log (xl->name, GF_LOG_WARNING, "%s", errstr);
+ } else {
+ snprintf (errstr, 256,
+ "'%"PRId64"' in 'option %s %s' "
+ "is out of range [%.0f - %.0f]",
+ size, key, value, opt->min, opt->max);
+ gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
+ ret = -1;
+ }
+ }
+
+out:
+ if (ret && op_errstr)
+ *op_errstr = gf_strdup (errstr);
+ return ret;
+}
+
+
+static int
+xlator_option_validate_bool (xlator_t *xl, const char *key, const char *value,
+ volume_option_t *opt, char **op_errstr)
+{
+ int ret = -1;
+ char errstr[256];
+ gf_boolean_t bool;
+
+
+ /* Check if the value is one of
+ '0|1|on|off|no|yes|true|false|enable|disable' */
+
+ if (gf_string2boolean (value, &bool) != 0) {
+ snprintf (errstr, 256,
+ "option %s %s: '%s' is not a valid boolean value",
+ key, value, value);
+ gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret && op_errstr)
+ *op_errstr = gf_strdup (errstr);
+ return ret;
+}
+
+
+static int
+xlator_option_validate_xlator (xlator_t *xl, const char *key, const char *value,
+ volume_option_t *opt, char **op_errstr)
+{
+ int ret = -1;
+ char errstr[256];
+ xlator_t *xlopt = NULL;
+
+
+ /* Check if the value is one of the xlators */
+ xlopt = xl;
+ while (xlopt->prev)
+ xlopt = xlopt->prev;
+
+ while (xlopt) {
+ if (strcmp (value, xlopt->name) == 0) {
+ ret = 0;
+ break;
+ }
+ xlopt = xlopt->next;
+ }
+
+ if (!xlopt) {
+ snprintf (errstr, 256,
+ "option %s %s: '%s' is not a valid volume name",
+ key, value, value);
+ gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret && op_errstr)
+ *op_errstr = gf_strdup (errstr);
+ return ret;
+}
+
+void
+set_error_str (char *errstr, size_t len, volume_option_t *opt, const char *key,
+ const char *value)
+{
+ int i = 0;
+ char given_array[4096] = {0,};
+
+ for (i = 0; (i < ZR_OPTION_MAX_ARRAY_SIZE) && opt->value[i];) {
+ strcat (given_array, opt->value[i]);
+ if (((++i) < ZR_OPTION_MAX_ARRAY_SIZE) &&
+ (opt->value[i]))
+ strcat (given_array, ", ");
+ else
+ strcat (given_array, ".");
+ }
+ snprintf (errstr, len, "option %s %s: '%s' is not valid "
+ "(possible options are %s)", key, value, value, given_array);
+ return;
+}
+
+int
+is_all_whitespaces (const char *value)
+{
+ int i = 0;
+ size_t len = 0;
+
+ if (value == NULL)
+ return -1;
+
+ len = strlen (value);
+
+ for (i = 0; i < len; i++) {
+ if (value[i] == ' ')
+ continue;
+ else
+ return 0;
+ }
+
+ return 1;
+}
+
+static int
+xlator_option_validate_str (xlator_t *xl, const char *key, const char *value,
+ volume_option_t *opt, char **op_errstr)
+{
+ int ret = -1;
+ int i = 0;
+ char errstr[4096] = {0,};
+
+ /* Check if the '*str' is valid */
+ if (GF_OPTION_LIST_EMPTY(opt)) {
+ ret = 0;
+ goto out;
+ }
+
+ if (is_all_whitespaces (value) == 1)
+ goto out;
+
+ for (i = 0; (i < ZR_OPTION_MAX_ARRAY_SIZE) && opt->value[i]; i++) {
+ #ifdef GF_DARWIN_HOST_OS
+ if (fnmatch (opt->value[i], value, 0) == 0) {
+ ret = 0;
+ break;
+ }
+ #else
+ if (fnmatch (opt->value[i], value, FNM_EXTMATCH) == 0) {
+ ret = 0;
+ break;
+ }
+ #endif
+ }
+
+ if ((i == ZR_OPTION_MAX_ARRAY_SIZE) || (!opt->value[i]))
+ goto out;
+ /* enter here only if
+ * 1. reached end of opt->value array and haven't
+ * validated input
+ * OR
+ * 2. valid input list is less than
+ * ZR_OPTION_MAX_ARRAY_SIZE and input has not
+ * matched all possible input values.
+ */
+
+ ret = 0;
+
+out:
+ if (ret) {
+ set_error_str (errstr, sizeof (errstr), opt, key, value);
+ gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
+ if (op_errstr)
+ *op_errstr = gf_strdup (errstr);
+ }
+ return ret;
+}
+
+
+static int
+xlator_option_validate_percent (xlator_t *xl, const char *key, const char *value,
+ volume_option_t *opt, char **op_errstr)
+{
+ double percent = 0;
+ int ret = -1;
+ char errstr[256];
+
+ /* Check if the value is valid percentage */
+ if (gf_string2percent (value, &percent) != 0) {
+ snprintf (errstr, 256,
+ "invalid percent format \"%s\" in \"option %s\"",
+ value, key);
+ gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
+ goto out;
+ }
+
+ if ((percent < 0.0) || (percent > 100.0)) {
+ snprintf (errstr, 256,
+ "'%lf' in 'option %s %s' is out of range [0 - 100]",
+ percent, key, value);
+ gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret && op_errstr)
+ *op_errstr = gf_strdup (errstr);
+ return ret;
+}
+
+static int
+xlator_option_validate_percent_or_sizet (xlator_t *xl, const char *key,
+ const char *value,
+ volume_option_t *opt, char **op_errstr)
+{
+ int ret = -1;
+ char errstr[256];
+ uint64_t size = 0;
+ gf_boolean_t is_percent = _gf_false;
+
+ if (gf_string2percent_or_bytesize (value, &size, &is_percent) == 0) {
+ if (is_percent) {
+ ret = 0;
+ goto out;
+ }
+ /* Check the range */
+ if ((opt->min == 0) && (opt->max == 0)) {
+ gf_log (xl->name, GF_LOG_TRACE,
+ "no range check required for "
+ "'option %s %s'",
+ key, value);
+ ret = 0;
+ goto out;
+ }
+ if ((size < opt->min) || (size > opt->max)) {
+ snprintf (errstr, 256,
+ "'%"PRId64"' in 'option %s %s'"
+ " is out of range [%.0f - %.0f]",
+ size, key, value, opt->min, opt->max);
+ gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
+ goto out;
+ }
+ ret = 0;
+ goto out;
+ }
+
+ /* If control reaches here, invalid argument */
+
+ snprintf (errstr, 256,
+ "invalid number format \"%s\" in \"option %s\"",
+ value, key);
+ gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
+
+
+out:
+ if (ret && op_errstr)
+ *op_errstr = gf_strdup (errstr);
+ return ret;
+}
+
+
+static int
+xlator_option_validate_time (xlator_t *xl, const char *key, const char *value,
+ volume_option_t *opt, char **op_errstr)
+{
+ int ret = -1;
+ char errstr[256];
+ uint32_t input_time = 0;
+
+ /* Check if the value is valid time */
+ if (gf_string2time (value, &input_time) != 0) {
+ snprintf (errstr, 256,
+ "invalid time format \"%s\" in "
+ "\"option %s\"",
+ value, key);
+ gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
+ goto out;
+ }
+
+ if ((opt->min == 0) && (opt->max == 0)) {
+ gf_log (xl->name, GF_LOG_TRACE,
+ "no range check required for "
+ "'option %s %s'",
+ key, value);
+ ret = 0;
+ goto out;
+ }
+
+ if ((input_time < opt->min) || (input_time > opt->max)) {
+ snprintf (errstr, 256,
+ "'%"PRIu32"' in 'option %s %s' is "
+ "out of range [%.0f - %.0f]",
+ input_time, key, value,
+ opt->min, opt->max);
+ gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret && op_errstr)
+ *op_errstr = gf_strdup (errstr);
+ return ret;
+}
+
+
+static int
+xlator_option_validate_double (xlator_t *xl, const char *key, const char *value,
+ volume_option_t *opt, char **op_errstr)
+{
+ double input = 0.0;
+ int ret = -1;
+ char errstr[256];
+
+ /* Check the range */
+ if (gf_string2double (value, &input) != 0) {
+ snprintf (errstr, 256,
+ "invalid number format \"%s\" in option \"%s\"",
+ value, key);
+ gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
+ goto out;
+ }
+
+ if ((opt->min == 0) && (opt->max == 0) &&
+ (opt->validate == GF_OPT_VALIDATE_BOTH)) {
+ gf_log (xl->name, GF_LOG_TRACE,
+ "no range check required for 'option %s %s'",
+ key, value);
+ ret = 0;
+ goto out;
+ }
+
+ if ((opt->validate == GF_OPT_VALIDATE_MIN)) {
+ if (input < opt->min) {
+ snprintf (errstr, 256,
+ "'%f' in 'option %s %s' is smaller than "
+ "minimum value '%f'", input, key,
+ value, opt->min);
+ gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
+ goto out;
+ }
+ } else if ((opt->validate == GF_OPT_VALIDATE_MAX)) {
+ if ((input > opt->max)) {
+ snprintf (errstr, 256,
+ "'%f' in 'option %s %s' is greater than "
+ "maximum value '%f'", input, key,
+ value, opt->max);
+ gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
+ goto out;
+ }
+ } else if ((input < opt->min) || (input > opt->max)) {
+ snprintf (errstr, 256,
+ "'%f' in 'option %s %s' is out of range "
+ "[%f - %f]",
+ input, key, value, opt->min, opt->max);
+ gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret && op_errstr)
+ *op_errstr = gf_strdup (errstr);
+ return ret;
+}
+
+
+static int
+xlator_option_validate_addr (xlator_t *xl, const char *key, const char *value,
+ volume_option_t *opt, char **op_errstr)
+{
+ int ret = -1;
+ char errstr[256];
+
+ if (!valid_internet_address ((char *)value, _gf_false)) {
+ snprintf (errstr, 256,
+ "option %s %s: '%s' is not a valid internet-address,"
+ " it does not conform to standards.",
+ key, value, value);
+ gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
+ if (op_errstr)
+ *op_errstr = gf_strdup (errstr);
+ }
+
+ ret = 0;
+
+ return ret;
+}
+
+static int
+xlator_option_validate_addr_list (xlator_t *xl, const char *key,
+ const char *value, volume_option_t *opt,
+ char **op_errstr)
+{
+ int ret = -1;
+ char *dup_val = NULL;
+ char *addr_tok = NULL;
+ char *save_ptr = NULL;
+ char errstr[4096] = {0,};
+
+ dup_val = gf_strdup (value);
+ if (!dup_val)
+ goto out;
+
+ addr_tok = strtok_r (dup_val, ",", &save_ptr);
+ if (addr_tok == NULL)
+ goto out;
+ while (addr_tok) {
+ if (!valid_internet_address (addr_tok, _gf_true))
+ goto out;
+
+ addr_tok = strtok_r (NULL, ",", &save_ptr);
+ }
+ ret = 0;
+
+out:
+ if (ret) {
+ snprintf (errstr, sizeof (errstr), "option %s %s: '%s' is not "
+ "a valid internet-address-list", key, value, value);
+ gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
+ if (op_errstr)
+ *op_errstr = gf_strdup (errstr);
+ }
+ GF_FREE (dup_val);
+
+ return ret;
+}
+
+/*XXX: the rules to validate are as per block-size required for stripe xlator */
+static int
+gf_validate_size (const char *sizestr, volume_option_t *opt)
+{
+ uint64_t value = 0;
+ int ret = 0;
+
+ GF_ASSERT (opt);
+
+ if (gf_string2bytesize (sizestr, &value) != 0 ||
+ value < opt->min ||
+ value % 512) {
+ ret = -1;
+ goto out;
+ }
+
+ out:
+ gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+static int
+gf_validate_number (const char *numstr, volume_option_t *opt)
+{
+ int32_t value;
+ return gf_string2int32 (numstr, &value);
+}
+
+/* Parses the string to be of the form <key1>:<value1>,<key2>:<value2>... *
+ * takes two optional validaters key_validator and value_validator */
+static int
+validate_list_elements (const char *string, volume_option_t *opt,
+ int (key_validator)( const char *),
+ int (value_validator)( const char *, volume_option_t *))
+{
+
+ char *dup_string = NULL;
+ char *str_sav = NULL;
+ char *substr_sav = NULL;
+ char *str_ptr = NULL;
+ char *key = NULL;
+ char *value = NULL;
+ int ret = 0;
+
+ GF_ASSERT (string);
+
+ dup_string = gf_strdup (string);
+ if (NULL == dup_string)
+ goto out;
+
+ str_ptr = strtok_r (dup_string, ",", &str_sav);
+ if (str_ptr == NULL) {
+ ret = -1;
+ goto out;
+ }
+ while (str_ptr) {
+
+ key = strtok_r (str_ptr, ":", &substr_sav);
+ if (!key ||
+ (key_validator && key_validator(key))) {
+ ret = -1;
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "invalid list '%s', key '%s' not valid.",
+ string, key);
+ goto out;
+ }
+
+ value = strtok_r (NULL, ":", &substr_sav);
+ if (!value ||
+ (value_validator && value_validator(value, opt))) {
+ ret = -1;
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "invalid list '%s', value '%s' not valid.",
+ string, key);
+ goto out;
+ }
+
+ str_ptr = strtok_r (NULL, ",", &str_sav);
+ substr_sav = NULL;
+ }
+
+ out:
+ GF_FREE (dup_string);
+ gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+static int
+xlator_option_validate_priority_list (xlator_t *xl, const char *key,
+ const char *value, volume_option_t *opt,
+ char **op_errstr)
+{
+ int ret =0;
+ char errstr[1024] = {0, };
+
+ GF_ASSERT (value);
+
+ ret = validate_list_elements (value, opt, NULL, &gf_validate_number);
+ if (ret) {
+ snprintf (errstr, 1024,
+ "option %s %s: '%s' is not a valid "
+ "priority-list", key, value, value);
+ *op_errstr = gf_strdup (errstr);
+ }
+
+ return ret;
+}
+
+static int
+xlator_option_validate_size_list (xlator_t *xl, const char *key,
+ const char *value, volume_option_t *opt,
+ char **op_errstr)
+{
+
+ int ret = 0;
+ char errstr[1024] = {0, };
+
+ GF_ASSERT (value);
+
+ ret = gf_validate_size (value, opt);
+ if (ret)
+ ret = validate_list_elements (value, opt, NULL, &gf_validate_size);
+
+ if (ret) {
+ snprintf (errstr, 1024,
+ "option %s %s: '%s' is not a valid "
+ "size-list", key, value, value);
+ *op_errstr = gf_strdup (errstr);
+ }
+
+ return ret;
+
+}
+
+static int
+xlator_option_validate_any (xlator_t *xl, const char *key, const char *value,
+ volume_option_t *opt, char **op_errstr)
+{
+ return 0;
+}
+
+typedef int (xlator_option_validator_t) (xlator_t *xl, const char *key,
+ const char *value,
+ volume_option_t *opt, char **operrstr);
+
+int
+xlator_option_validate (xlator_t *xl, char *key, char *value,
+ volume_option_t *opt, char **op_errstr)
+{
+ int ret = -1;
+ xlator_option_validator_t *validate;
+ xlator_option_validator_t *validators[] = {
+ [GF_OPTION_TYPE_PATH] = xlator_option_validate_path,
+ [GF_OPTION_TYPE_INT] = xlator_option_validate_int,
+ [GF_OPTION_TYPE_SIZET] = xlator_option_validate_sizet,
+ [GF_OPTION_TYPE_BOOL] = xlator_option_validate_bool,
+ [GF_OPTION_TYPE_XLATOR] = xlator_option_validate_xlator,
+ [GF_OPTION_TYPE_STR] = xlator_option_validate_str,
+ [GF_OPTION_TYPE_PERCENT] = xlator_option_validate_percent,
+ [GF_OPTION_TYPE_PERCENT_OR_SIZET] =
+ xlator_option_validate_percent_or_sizet,
+ [GF_OPTION_TYPE_TIME] = xlator_option_validate_time,
+ [GF_OPTION_TYPE_DOUBLE] = xlator_option_validate_double,
+ [GF_OPTION_TYPE_INTERNET_ADDRESS] = xlator_option_validate_addr,
+ [GF_OPTION_TYPE_INTERNET_ADDRESS_LIST] =
+ xlator_option_validate_addr_list,
+ [GF_OPTION_TYPE_PRIORITY_LIST] =
+ xlator_option_validate_priority_list,
+ [GF_OPTION_TYPE_SIZE_LIST] = xlator_option_validate_size_list,
+ [GF_OPTION_TYPE_ANY] = xlator_option_validate_any,
+ [GF_OPTION_TYPE_MAX] = NULL,
+ };
+
+ if (opt->type < 0 || opt->type >= GF_OPTION_TYPE_MAX) {
+ gf_log (xl->name, GF_LOG_ERROR,
+ "unknown option type '%d'", opt->type);
+ goto out;
+ }
+
+ validate = validators[opt->type];
+
+ ret = validate (xl, key, value, opt, op_errstr);
+out:
+ return ret;
+}
+
+
+volume_option_t *
+xlator_volume_option_get_list (volume_opt_list_t *vol_list, const char *key)
+{
+ volume_option_t *opt = NULL;
+ volume_opt_list_t *opt_list = NULL;
+ volume_option_t *found = NULL;
+ int index = 0;
+ int i = 0;
+ char *cmp_key = NULL;
+
+ if (!vol_list->given_opt) {
+ opt_list = list_entry (vol_list->list.next, volume_opt_list_t,
+ list);
+ opt = opt_list->given_opt;
+ } else
+ opt = vol_list->given_opt;
+
+ for (index = 0; opt[index].key[0]; index++) {
+ for (i = 0; i < ZR_VOLUME_MAX_NUM_KEY; i++) {
+ cmp_key = opt[index].key[i];
+ if (!cmp_key)
+ break;
+ if (fnmatch (cmp_key, key, FNM_NOESCAPE) == 0) {
+ found = &opt[index];
+ goto out;
+ }
+ }
+ }
+out:
+ return found;
+}
+
+
+volume_option_t *
+xlator_volume_option_get (xlator_t *xl, const char *key)
+{
+ volume_opt_list_t *vol_list = NULL;
+ volume_option_t *found = NULL;
+
+ list_for_each_entry (vol_list, &xl->volume_options, list) {
+ found = xlator_volume_option_get_list (vol_list, key);
+ if (found)
+ break;
+ }
+
+ return found;
+}
+
+
+static int
+xl_opt_validate (dict_t *dict, char *key, data_t *value, void *data)
+{
+ xlator_t *xl = NULL;
+ volume_opt_list_t *vol_opt = NULL;
+ volume_option_t *opt = NULL;
+ int ret = 0;
+ char *errstr = NULL;
+
+ struct {
+ xlator_t *this;
+ volume_opt_list_t *vol_opt;
+ char *errstr;
+ } *stub;
+
+ stub = data;
+ xl = stub->this;
+ vol_opt = stub->vol_opt;
+
+ opt = xlator_volume_option_get_list (vol_opt, key);
+ if (!opt)
+ return 0;
+
+ ret = xlator_option_validate (xl, key, value->data, opt, &errstr);
+ if (ret)
+ gf_log (xl->name, GF_LOG_WARNING, "validate of %s returned %d",
+ key, ret);
+
+ if (errstr)
+ /* possible small leak of previously set stub->errstr */
+ stub->errstr = errstr;
+
+ if (fnmatch (opt->key[0], key, FNM_NOESCAPE) != 0) {
+ gf_log (xl->name, GF_LOG_WARNING, "option '%s' is deprecated, "
+ "preferred is '%s', continuing with correction",
+ key, opt->key[0]);
+ dict_set (dict, opt->key[0], value);
+ dict_del (dict, key);
+ }
+ return 0;
+}
+
+
+int
+xlator_options_validate_list (xlator_t *xl, dict_t *options,
+ volume_opt_list_t *vol_opt, char **op_errstr)
+{
+ int ret = 0;
+ struct {
+ xlator_t *this;
+ volume_opt_list_t *vol_opt;
+ char *errstr;
+ } stub;
+
+ stub.this = xl;
+ stub.vol_opt = vol_opt;
+ stub.errstr = NULL;
+
+ dict_foreach (options, xl_opt_validate, &stub);
+ if (stub.errstr) {
+ ret = -1;
+ if (op_errstr)
+ *op_errstr = stub.errstr;
+ }
+
+ return ret;
+}
+
+
+int
+xlator_options_validate (xlator_t *xl, dict_t *options, char **op_errstr)
+{
+ int ret = 0;
+ volume_opt_list_t *vol_opt = NULL;
+
+
+ if (!xl) {
+ gf_log (THIS->name, GF_LOG_DEBUG, "'this' not a valid ptr");
+ ret = -1;
+ goto out;
+ }
+
+ if (list_empty (&xl->volume_options))
+ goto out;
+
+ list_for_each_entry (vol_opt, &xl->volume_options, list) {
+ ret = xlator_options_validate_list (xl, options, vol_opt,
+ op_errstr);
+ }
+out:
+ return ret;
+}
+
+
+int
+xlator_validate_rec (xlator_t *xlator, char **op_errstr)
+{
+ int ret = -1;
+ xlator_list_t *trav = NULL;
+ xlator_t *old_THIS = NULL;
+
+ GF_VALIDATE_OR_GOTO ("xlator", xlator, out);
+
+ trav = xlator->children;
+
+ while (trav) {
+ if (xlator_validate_rec (trav->xlator, op_errstr)) {
+ gf_log ("xlator", GF_LOG_WARNING, "validate_rec failed");
+ goto out;
+ }
+
+ trav = trav->next;
+ }
+
+ if (xlator_dynload (xlator))
+ gf_log (xlator->name, GF_LOG_DEBUG, "Did not load the symbols");
+
+ old_THIS = THIS;
+ THIS = xlator;
+
+ /* Need this here, as this graph has not yet called init() */
+ if (!xlator->mem_acct.num_types) {
+ if (!xlator->mem_acct_init)
+ xlator->mem_acct_init = default_mem_acct_init;
+ xlator->mem_acct_init (xlator);
+ }
+
+ ret = xlator_options_validate (xlator, xlator->options, op_errstr);
+ THIS = old_THIS;
+
+ if (ret) {
+ gf_log (xlator->name, GF_LOG_INFO, "%s", *op_errstr);
+ goto out;
+ }
+
+ gf_log (xlator->name, GF_LOG_DEBUG, "Validated options");
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+int
+graph_reconf_validateopt (glusterfs_graph_t *graph, char **op_errstr)
+{
+ xlator_t *xlator = NULL;
+ int ret = -1;
+
+ GF_ASSERT (graph);
+
+ xlator = graph->first;
+
+ ret = xlator_validate_rec (xlator, op_errstr);
+
+ return ret;
+}
+
+
+static int
+xlator_reconfigure_rec (xlator_t *old_xl, xlator_t *new_xl)
+{
+ xlator_list_t *trav1 = NULL;
+ xlator_list_t *trav2 = NULL;
+ int32_t ret = -1;
+ xlator_t *old_THIS = NULL;
+
+ GF_VALIDATE_OR_GOTO ("xlator", old_xl, out);
+ GF_VALIDATE_OR_GOTO ("xlator", new_xl, out);
+
+ trav1 = old_xl->children;
+ trav2 = new_xl->children;
+
+ while (trav1 && trav2) {
+ ret = xlator_reconfigure_rec (trav1->xlator, trav2->xlator);
+ if (ret)
+ goto out;
+
+ gf_log (trav1->xlator->name, GF_LOG_DEBUG, "reconfigured");
+
+ trav1 = trav1->next;
+ trav2 = trav2->next;
+ }
+
+ if (old_xl->reconfigure) {
+ old_THIS = THIS;
+ THIS = old_xl;
+
+ ret = old_xl->reconfigure (old_xl, new_xl->options);
+
+ THIS = old_THIS;
+
+ if (ret)
+ goto out;
+ } else {
+ gf_log (old_xl->name, GF_LOG_DEBUG, "No reconfigure() found");
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+int
+xlator_tree_reconfigure (xlator_t *old_xl, xlator_t *new_xl)
+{
+ xlator_t *new_top = NULL;
+ xlator_t *old_top = NULL;
+
+ GF_ASSERT (old_xl);
+ GF_ASSERT (new_xl);
+
+ old_top = old_xl;
+ new_top = new_xl;
+
+ return xlator_reconfigure_rec (old_top, new_top);
+}
+
+
+int
+xlator_option_info_list (volume_opt_list_t *list, char *key,
+ char **def_val, char **descr)
+{
+ int ret = -1;
+ volume_option_t *opt = NULL;
+
+
+ opt = xlator_volume_option_get_list (list, key);
+ if (!opt)
+ goto out;
+
+ if (def_val)
+ *def_val = opt->default_value;
+ if (descr)
+ *descr = opt->description;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+static int
+pass (char *in, char **out)
+{
+ *out = in;
+ return 0;
+}
+
+
+static int
+xl_by_name (char *in, xlator_t **out)
+{
+ xlator_t *xl = NULL;
+
+ xl = xlator_search_by_name (THIS, in);
+
+ if (!xl)
+ return -1;
+ *out = xl;
+ return 0;
+}
+
+
+static int
+pc_or_size (char *in, double *out)
+{
+ double pc = 0;
+ int ret = 0;
+ uint64_t size = 0;
+
+ if (gf_string2percent (in, &pc) == 0) {
+ if (pc > 100.0) {
+ ret = gf_string2bytesize (in, &size);
+ if (!ret)
+ *out = size;
+ } else {
+ *out = pc;
+ }
+ } else {
+ ret = gf_string2bytesize (in, &size);
+ if (!ret)
+ *out = size;
+ }
+ return ret;
+}
+
+DEFINE_INIT_OPT(char *, str, pass);
+DEFINE_INIT_OPT(uint64_t, uint64, gf_string2uint64);
+DEFINE_INIT_OPT(int64_t, int64, gf_string2int64);
+DEFINE_INIT_OPT(uint32_t, uint32, gf_string2uint32);
+DEFINE_INIT_OPT(int32_t, int32, gf_string2int32);
+DEFINE_INIT_OPT(uint64_t, size, gf_string2bytesize);
+DEFINE_INIT_OPT(double, percent, gf_string2percent);
+DEFINE_INIT_OPT(double, percent_or_size, pc_or_size);
+DEFINE_INIT_OPT(gf_boolean_t, bool, gf_string2boolean);
+DEFINE_INIT_OPT(xlator_t *, xlator, xl_by_name);
+DEFINE_INIT_OPT(char *, path, pass);
+DEFINE_INIT_OPT(double, double, gf_string2double);
+
+
+
+DEFINE_RECONF_OPT(char *, str, pass);
+DEFINE_RECONF_OPT(uint64_t, uint64, gf_string2uint64);
+DEFINE_RECONF_OPT(int64_t, int64, gf_string2int64);
+DEFINE_RECONF_OPT(uint32_t, uint32, gf_string2uint32);
+DEFINE_RECONF_OPT(int32_t, int32, gf_string2int32);
+DEFINE_RECONF_OPT(uint64_t, size, gf_string2bytesize);
+DEFINE_RECONF_OPT(double, percent, gf_string2percent);
+DEFINE_RECONF_OPT(double, percent_or_size, pc_or_size);
+DEFINE_RECONF_OPT(gf_boolean_t, bool, gf_string2boolean);
+DEFINE_RECONF_OPT(xlator_t *, xlator, xl_by_name);
+DEFINE_RECONF_OPT(char *, path, pass);
+DEFINE_RECONF_OPT(double, double, gf_string2double);
diff --git a/libglusterfs/src/options.h b/libglusterfs/src/options.h
new file mode 100644
index 000000000..e2a25baa9
--- /dev/null
+++ b/libglusterfs/src/options.h
@@ -0,0 +1,259 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _OPTIONS_H
+#define _OPTIONS_H
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdio.h>
+#include <stdint.h>
+#include <inttypes.h>
+
+#include "xlator.h"
+/* Add possible new type of option you may need */
+typedef enum {
+ GF_OPTION_TYPE_ANY = 0,
+ GF_OPTION_TYPE_STR,
+ GF_OPTION_TYPE_INT,
+ GF_OPTION_TYPE_SIZET,
+ GF_OPTION_TYPE_PERCENT,
+ GF_OPTION_TYPE_PERCENT_OR_SIZET,
+ GF_OPTION_TYPE_BOOL,
+ GF_OPTION_TYPE_XLATOR,
+ GF_OPTION_TYPE_PATH,
+ GF_OPTION_TYPE_TIME,
+ GF_OPTION_TYPE_DOUBLE,
+ GF_OPTION_TYPE_INTERNET_ADDRESS,
+ GF_OPTION_TYPE_INTERNET_ADDRESS_LIST,
+ GF_OPTION_TYPE_PRIORITY_LIST,
+ GF_OPTION_TYPE_SIZE_LIST,
+ GF_OPTION_TYPE_MAX,
+} volume_option_type_t;
+
+typedef enum {
+ GF_OPT_VALIDATE_BOTH = 0,
+ GF_OPT_VALIDATE_MIN,
+ GF_OPT_VALIDATE_MAX,
+} opt_validate_type_t;
+
+#define ZR_VOLUME_MAX_NUM_KEY 4
+#define ZR_OPTION_MAX_ARRAY_SIZE 64
+
+/* Each translator should define this structure */
+typedef struct volume_options {
+ char *key[ZR_VOLUME_MAX_NUM_KEY];
+ /* different key, same meaning */
+ volume_option_type_t type;
+ double min; /* 0 means no range */
+ double max; /* 0 means no range */
+ char *value[ZR_OPTION_MAX_ARRAY_SIZE];
+ /* If specified, will check for one of
+ the value from this array */
+ char *default_value;
+ char *description; /* about the key */
+ /* Required for int options where only the min value
+ * is given and is 0. This will cause validation not to
+ * happen
+ */
+ opt_validate_type_t validate;
+} volume_option_t;
+
+
+typedef struct vol_opt_list {
+ struct list_head list;
+ volume_option_t *given_opt;
+} volume_opt_list_t;
+
+
+int xlator_tree_reconfigure (xlator_t *old_xl, xlator_t *new_xl);
+int xlator_validate_rec (xlator_t *xlator, char **op_errstr);
+int graph_reconf_validateopt (glusterfs_graph_t *graph, char **op_errstr);
+int xlator_option_info_list (volume_opt_list_t *list, char *key,
+ char **def_val, char **descr);
+/*
+int validate_xlator_volume_options (xlator_t *xl, dict_t *options,
+ volume_option_t *opt, char **op_errstr);
+*/
+int xlator_options_validate_list (xlator_t *xl, dict_t *options,
+ volume_opt_list_t *list, char **op_errstr);
+int xlator_option_validate (xlator_t *xl, char *key, char *value,
+ volume_option_t *opt, char **op_errstr);
+int xlator_options_validate (xlator_t *xl, dict_t *options, char **errstr);
+volume_option_t *
+xlator_volume_option_get (xlator_t *xl, const char *key);
+
+volume_option_t *
+xlator_volume_option_get_list (volume_opt_list_t *vol_list, const char *key);
+
+
+#define DECLARE_INIT_OPT(type_t, type) \
+int \
+xlator_option_init_##type (xlator_t *this, dict_t *options, char *key, \
+ type_t *val_p);
+
+DECLARE_INIT_OPT(char *, str);
+DECLARE_INIT_OPT(uint64_t, uint64);
+DECLARE_INIT_OPT(int64_t, int64);
+DECLARE_INIT_OPT(uint32_t, uint32);
+DECLARE_INIT_OPT(int32_t, int32);
+DECLARE_INIT_OPT(uint64_t, size);
+DECLARE_INIT_OPT(double, percent);
+DECLARE_INIT_OPT(double, percent_or_size);
+DECLARE_INIT_OPT(gf_boolean_t, bool);
+DECLARE_INIT_OPT(xlator_t *, xlator);
+DECLARE_INIT_OPT(char *, path);
+DECLARE_INIT_OPT(double, double);
+
+
+#define DEFINE_INIT_OPT(type_t, type, conv) \
+int \
+xlator_option_init_##type (xlator_t *this, dict_t *options, char *key, \
+ type_t *val_p) \
+{ \
+ int ret = 0; \
+ volume_option_t *opt = NULL; \
+ char *def_value = NULL; \
+ char *set_value = NULL; \
+ char *value = NULL; \
+ xlator_t *old_THIS = NULL; \
+ \
+ opt = xlator_volume_option_get (this, key); \
+ if (!opt) { \
+ gf_log (this->name, GF_LOG_WARNING, \
+ "unknown option: %s", key); \
+ ret = -1; \
+ return ret; \
+ } \
+ def_value = opt->default_value; \
+ ret = dict_get_str (options, key, &set_value); \
+ \
+ if (def_value) \
+ value = def_value; \
+ if (set_value) \
+ value = set_value; \
+ if (!value) { \
+ gf_log (this->name, GF_LOG_TRACE, "option %s not set", \
+ key); \
+ *val_p = (type_t)0; \
+ return 0; \
+ } \
+ if (value == def_value) { \
+ gf_log (this->name, GF_LOG_TRACE, \
+ "option %s using default value %s", \
+ key, value); \
+ } else { \
+ gf_log (this->name, GF_LOG_DEBUG, \
+ "option %s using set value %s", \
+ key, value); \
+ } \
+ old_THIS = THIS; \
+ THIS = this; \
+ ret = conv (value, val_p); \
+ THIS = old_THIS; \
+ if (ret) \
+ return ret; \
+ ret = xlator_option_validate (this, key, value, opt, NULL); \
+ return ret; \
+}
+
+#define GF_OPTION_INIT(key, val, type, err_label) do { \
+ int val_ret = 0; \
+ val_ret = xlator_option_init_##type (THIS, THIS->options, \
+ key, &(val)); \
+ if (val_ret) \
+ goto err_label; \
+ } while (0)
+
+
+
+#define DECLARE_RECONF_OPT(type_t, type) \
+int \
+xlator_option_reconf_##type (xlator_t *this, dict_t *options, char *key,\
+ type_t *val_p);
+
+DECLARE_RECONF_OPT(char *, str);
+DECLARE_RECONF_OPT(uint64_t, uint64);
+DECLARE_RECONF_OPT(int64_t, int64);
+DECLARE_RECONF_OPT(uint32_t, uint32);
+DECLARE_RECONF_OPT(int32_t, int32);
+DECLARE_RECONF_OPT(uint64_t, size);
+DECLARE_RECONF_OPT(double, percent);
+DECLARE_RECONF_OPT(double, percent_or_size);
+DECLARE_RECONF_OPT(gf_boolean_t, bool);
+DECLARE_RECONF_OPT(xlator_t *, xlator);
+DECLARE_RECONF_OPT(char *, path);
+DECLARE_RECONF_OPT(double, double);
+
+
+#define DEFINE_RECONF_OPT(type_t, type, conv) \
+int \
+xlator_option_reconf_##type (xlator_t *this, dict_t *options, char *key, \
+ type_t *val_p) \
+{ \
+ int ret = 0; \
+ volume_option_t *opt = NULL; \
+ char *def_value = NULL; \
+ char *set_value = NULL; \
+ char *value = NULL; \
+ xlator_t *old_THIS = NULL; \
+ \
+ opt = xlator_volume_option_get (this, key); \
+ if (!opt) { \
+ gf_log (this->name, GF_LOG_WARNING, \
+ "unknown option: %s", key); \
+ ret = -1; \
+ return ret; \
+ } \
+ def_value = opt->default_value; \
+ ret = dict_get_str (options, key, &set_value); \
+ \
+ if (def_value) \
+ value = def_value; \
+ if (set_value) \
+ value = set_value; \
+ if (!value) { \
+ gf_log (this->name, GF_LOG_TRACE, "option %s not set", \
+ key); \
+ *val_p = (type_t)0; \
+ return 0; \
+ } \
+ if (value == def_value) { \
+ gf_log (this->name, GF_LOG_TRACE, \
+ "option %s using default value %s", \
+ key, value); \
+ } else { \
+ gf_log (this->name, GF_LOG_DEBUG, \
+ "option %s using set value %s", \
+ key, value); \
+ } \
+ old_THIS = THIS; \
+ THIS = this; \
+ ret = conv (value, val_p); \
+ THIS = old_THIS; \
+ if (ret) \
+ return ret; \
+ ret = xlator_option_validate (this, key, value, opt, NULL); \
+ return ret; \
+}
+
+#define GF_OPTION_RECONF(key, val, opt, type, err_label) do { \
+ int val_ret = 0; \
+ val_ret = xlator_option_reconf_##type (THIS, opt, key, \
+ &(val)); \
+ if (val_ret) \
+ goto err_label; \
+ } while (0)
+
+
+#endif /* !_OPTIONS_H */
diff --git a/libglusterfs/src/protocol.h b/libglusterfs/src/protocol.h
deleted file mode 100644
index f03759f37..000000000
--- a/libglusterfs/src/protocol.h
+++ /dev/null
@@ -1,945 +0,0 @@
-/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _PROTOCOL_H
-#define _PROTOCOL_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <inttypes.h>
-#include <sys/time.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/statvfs.h>
-#include <unistd.h>
-#include <fcntl.h>
-
-#include "byte-order.h"
-
-/* Any changes in the protocol structure or adding new '[f,m]ops' needs to
- * bump the protocol version by "0.1"
- */
-
-#define GF_PROTOCOL_VERSION "2.1"
-
-struct gf_stat {
- uint64_t ino;
- uint64_t size;
- uint64_t blocks;
- uint32_t dev;
- uint32_t rdev;
- uint32_t mode;
- uint32_t nlink;
- uint32_t uid;
- uint32_t gid;
- uint32_t blksize;
- uint32_t atime;
- uint32_t atime_nsec;
- uint32_t mtime;
- uint32_t mtime_nsec;
- uint32_t ctime;
- uint32_t ctime_nsec;
-} __attribute__((packed));
-
-
-static inline void
-gf_stat_to_stat (struct gf_stat *gf_stat, struct stat *stat)
-{
- stat->st_dev = ntoh32 (gf_stat->dev);
- stat->st_ino = ntoh64 (gf_stat->ino);
- stat->st_mode = ntoh32 (gf_stat->mode);
- stat->st_nlink = ntoh32 (gf_stat->nlink);
- stat->st_uid = ntoh32 (gf_stat->uid);
- stat->st_gid = ntoh32 (gf_stat->gid);
- stat->st_rdev = ntoh32 (gf_stat->rdev);
- stat->st_size = ntoh64 (gf_stat->size);
- stat->st_blksize = ntoh32 (gf_stat->blksize);
- stat->st_blocks = ntoh64 (gf_stat->blocks);
- stat->st_atime = ntoh32 (gf_stat->atime);
- stat->st_mtime = ntoh32 (gf_stat->mtime);
- stat->st_ctime = ntoh32 (gf_stat->ctime);
- /* TODO: handle nsec */
-}
-
-
-static inline void
-gf_stat_from_stat (struct gf_stat *gf_stat, struct stat *stat)
-{
- gf_stat->dev = hton32 (stat->st_dev);
- gf_stat->ino = hton64 (stat->st_ino);
- gf_stat->mode = hton32 (stat->st_mode);
- gf_stat->nlink = hton32 (stat->st_nlink);
- gf_stat->uid = hton32 (stat->st_uid);
- gf_stat->gid = hton32 (stat->st_gid);
- gf_stat->rdev = hton32 (stat->st_rdev);
- gf_stat->size = hton64 (stat->st_size);
- gf_stat->blksize = hton32 (stat->st_blksize);
- gf_stat->blocks = hton64 (stat->st_blocks);
- gf_stat->atime = hton32 (stat->st_atime);
- gf_stat->mtime = hton32 (stat->st_mtime);
- gf_stat->ctime = hton32 (stat->st_ctime);
- /* TODO: handle nsec */
-}
-
-
-struct gf_statfs {
- uint64_t bsize;
- uint64_t frsize;
- uint64_t blocks;
- uint64_t bfree;
- uint64_t bavail;
- uint64_t files;
- uint64_t ffree;
- uint64_t favail;
- uint64_t fsid;
- uint64_t flag;
- uint64_t namemax;
-} __attribute__((packed));
-
-
-static inline void
-gf_statfs_to_statfs (struct gf_statfs *gf_stat, struct statvfs *stat)
-{
- stat->f_bsize = ntoh64 (gf_stat->bsize);
- stat->f_frsize = ntoh64 (gf_stat->frsize);
- stat->f_blocks = ntoh64 (gf_stat->blocks);
- stat->f_bfree = ntoh64 (gf_stat->bfree);
- stat->f_bavail = ntoh64 (gf_stat->bavail);
- stat->f_files = ntoh64 (gf_stat->files);
- stat->f_ffree = ntoh64 (gf_stat->ffree);
- stat->f_favail = ntoh64 (gf_stat->favail);
- stat->f_fsid = ntoh64 (gf_stat->fsid);
- stat->f_flag = ntoh64 (gf_stat->flag);
- stat->f_namemax = ntoh64 (gf_stat->namemax);
-}
-
-
-static inline void
-gf_statfs_from_statfs (struct gf_statfs *gf_stat, struct statvfs *stat)
-{
- gf_stat->bsize = hton64 (stat->f_bsize);
- gf_stat->frsize = hton64 (stat->f_frsize);
- gf_stat->blocks = hton64 (stat->f_blocks);
- gf_stat->bfree = hton64 (stat->f_bfree);
- gf_stat->bavail = hton64 (stat->f_bavail);
- gf_stat->files = hton64 (stat->f_files);
- gf_stat->ffree = hton64 (stat->f_ffree);
- gf_stat->favail = hton64 (stat->f_favail);
- gf_stat->fsid = hton64 (stat->f_fsid);
- gf_stat->flag = hton64 (stat->f_flag);
- gf_stat->namemax = hton64 (stat->f_namemax);
-}
-
-
-struct gf_flock {
- uint16_t type;
- uint16_t whence;
- uint64_t start;
- uint64_t len;
- uint32_t pid;
-} __attribute__((packed));
-
-
-static inline void
-gf_flock_to_flock (struct gf_flock *gf_flock, struct flock *flock)
-{
- flock->l_type = ntoh16 (gf_flock->type);
- flock->l_whence = ntoh16 (gf_flock->whence);
- flock->l_start = ntoh64 (gf_flock->start);
- flock->l_len = ntoh64 (gf_flock->len);
- flock->l_pid = ntoh32 (gf_flock->pid);
-}
-
-
-static inline void
-gf_flock_from_flock (struct gf_flock *gf_flock, struct flock *flock)
-{
- gf_flock->type = hton16 (flock->l_type);
- gf_flock->whence = hton16 (flock->l_whence);
- gf_flock->start = hton64 (flock->l_start);
- gf_flock->len = hton64 (flock->l_len);
- gf_flock->pid = hton32 (flock->l_pid);
-}
-
-
-struct gf_timespec {
- uint32_t tv_sec;
- uint32_t tv_nsec;
-} __attribute__((packed));
-
-
-static inline void
-gf_timespec_to_timespec (struct gf_timespec *gf_ts, struct timespec *ts)
-{
-
- ts[0].tv_sec = ntoh32 (gf_ts[0].tv_sec);
- ts[0].tv_nsec = ntoh32 (gf_ts[0].tv_nsec);
- ts[1].tv_sec = ntoh32 (gf_ts[1].tv_sec);
- ts[1].tv_nsec = ntoh32 (gf_ts[1].tv_nsec);
-}
-
-
-static inline void
-gf_timespec_from_timespec (struct gf_timespec *gf_ts, struct timespec *ts)
-{
- gf_ts[0].tv_sec = hton32 (ts[0].tv_sec);
- gf_ts[0].tv_nsec = hton32 (ts[0].tv_nsec);
- gf_ts[1].tv_sec = hton32 (ts[1].tv_sec);
- gf_ts[1].tv_nsec = hton32 (ts[1].tv_nsec);
-}
-
-
-#define GF_O_ACCMODE 003
-#define GF_O_RDONLY 00
-#define GF_O_WRONLY 01
-#define GF_O_RDWR 02
-#define GF_O_CREAT 0100
-#define GF_O_EXCL 0200
-#define GF_O_NOCTTY 0400
-#define GF_O_TRUNC 01000
-#define GF_O_APPEND 02000
-#define GF_O_NONBLOCK 04000
-#define GF_O_SYNC 010000
-#define GF_O_ASYNC 020000
-
-#define GF_O_DIRECT 040000
-#define GF_O_DIRECTORY 0200000
-#define GF_O_NOFOLLOW 0400000
-#define GF_O_NOATIME 01000000
-#define GF_O_CLOEXEC 02000000
-
-#define GF_O_LARGEFILE 0100000
-
-#define XLATE_BIT(from, to, bit) do { \
- if (from & bit) \
- to = to | GF_##bit; \
- } while (0)
-
-#define UNXLATE_BIT(from, to, bit) do { \
- if (from & GF_##bit) \
- to = to | bit; \
- } while (0)
-
-#define XLATE_ACCESSMODE(from, to) do { \
- switch (from & O_ACCMODE) { \
- case O_RDONLY: to |= GF_O_RDONLY; \
- break; \
- case O_WRONLY: to |= GF_O_WRONLY; \
- break; \
- case O_RDWR: to |= GF_O_RDWR; \
- break; \
- } \
- } while (0)
-
-#define UNXLATE_ACCESSMODE(from, to) do { \
- switch (from & GF_O_ACCMODE) { \
- case GF_O_RDONLY: to |= O_RDONLY; \
- break; \
- case GF_O_WRONLY: to |= O_WRONLY; \
- break; \
- case GF_O_RDWR: to |= O_RDWR; \
- break; \
- } \
- } while (0)
-
-static inline uint32_t
-gf_flags_from_flags (uint32_t flags)
-{
- uint32_t gf_flags = 0;
-
- XLATE_ACCESSMODE (flags, gf_flags);
-
- XLATE_BIT (flags, gf_flags, O_CREAT);
- XLATE_BIT (flags, gf_flags, O_EXCL);
- XLATE_BIT (flags, gf_flags, O_NOCTTY);
- XLATE_BIT (flags, gf_flags, O_TRUNC);
- XLATE_BIT (flags, gf_flags, O_APPEND);
- XLATE_BIT (flags, gf_flags, O_NONBLOCK);
- XLATE_BIT (flags, gf_flags, O_SYNC);
- XLATE_BIT (flags, gf_flags, O_ASYNC);
-
- XLATE_BIT (flags, gf_flags, O_DIRECT);
- XLATE_BIT (flags, gf_flags, O_DIRECTORY);
- XLATE_BIT (flags, gf_flags, O_NOFOLLOW);
-#ifdef O_NOATIME
- XLATE_BIT (flags, gf_flags, O_NOATIME);
-#endif
-#ifdef O_CLOEXEC
- XLATE_BIT (flags, gf_flags, O_CLOEXEC);
-#endif
- XLATE_BIT (flags, gf_flags, O_LARGEFILE);
-
- return gf_flags;
-}
-
-static inline uint32_t
-gf_flags_to_flags (uint32_t gf_flags)
-{
- uint32_t flags = 0;
-
- UNXLATE_ACCESSMODE (gf_flags, flags);
-
- UNXLATE_BIT (gf_flags, flags, O_CREAT);
- UNXLATE_BIT (gf_flags, flags, O_EXCL);
- UNXLATE_BIT (gf_flags, flags, O_NOCTTY);
- UNXLATE_BIT (gf_flags, flags, O_TRUNC);
- UNXLATE_BIT (gf_flags, flags, O_APPEND);
- UNXLATE_BIT (gf_flags, flags, O_NONBLOCK);
- UNXLATE_BIT (gf_flags, flags, O_SYNC);
- UNXLATE_BIT (gf_flags, flags, O_ASYNC);
-
- UNXLATE_BIT (gf_flags, flags, O_DIRECT);
- UNXLATE_BIT (gf_flags, flags, O_DIRECTORY);
- UNXLATE_BIT (gf_flags, flags, O_NOFOLLOW);
-#ifdef O_NOATIME
- UNXLATE_BIT (gf_flags, flags, O_NOATIME);
-#endif
-#ifdef O_CLOEXEC
- UNXLATE_BIT (gf_flags, flags, O_CLOEXEC);
-#endif
- UNXLATE_BIT (gf_flags, flags, O_LARGEFILE);
-
- return flags;
-}
-
-
-typedef struct {
- uint64_t ino;
- char path[0]; /* NULL terminated */
-} __attribute__((packed)) gf_fop_stat_req_t;;
-typedef struct {
- struct gf_stat stat;
-} __attribute__((packed)) gf_fop_stat_rsp_t;
-
-
-typedef struct {
- uint64_t ino;
- uint32_t size;
- char path[0]; /* NULL terminated */
-} __attribute__((packed)) gf_fop_readlink_req_t;
-typedef struct {
- char path[0]; /* NULL terminated */
-} __attribute__((packed)) gf_fop_readlink_rsp_t;
-
-
-typedef struct {
- uint64_t par;
- uint64_t dev;
- uint32_t mode;
- char path[0]; /* NULL terminated */
- char bname[0]; /* NULL terminated */
-} __attribute__((packed)) gf_fop_mknod_req_t;
-typedef struct {
- struct gf_stat stat;
-} __attribute__((packed)) gf_fop_mknod_rsp_t;
-
-
-typedef struct {
- uint64_t par;
- uint32_t mode;
- char path[0]; /* NULL terminated */
- char bname[0]; /* NULL terminated */
-} __attribute__((packed)) gf_fop_mkdir_req_t;
-typedef struct {
- struct gf_stat stat;
-} __attribute__((packed)) gf_fop_mkdir_rsp_t;
-
-
-typedef struct {
- uint64_t par;
- char path[0]; /* NULL terminated */
- char bname[0]; /* NULL terminated */
-} __attribute__((packed)) gf_fop_unlink_req_t;
-typedef struct {
-} __attribute__((packed)) gf_fop_unlink_rsp_t;
-
-
-typedef struct {
- uint64_t par;
- char path[0];
- char bname[0]; /* NULL terminated */
-} __attribute__((packed)) gf_fop_rmdir_req_t;
-typedef struct {
-} __attribute__((packed)) gf_fop_rmdir_rsp_t;
-
-
-typedef struct {
- uint64_t par;
- char path[0];
- char bname[0];
- char linkname[0];
-} __attribute__((packed)) gf_fop_symlink_req_t;
-typedef struct {
- struct gf_stat stat;
-}__attribute__((packed)) gf_fop_symlink_rsp_t;
-
-
-typedef struct {
- uint64_t oldpar;
- uint64_t newpar;
- char oldpath[0];
- char oldbname[0]; /* NULL terminated */
- char newpath[0];
- char newbname[0]; /* NULL terminated */
-} __attribute__((packed)) gf_fop_rename_req_t;
-typedef struct {
- struct gf_stat stat;
-} __attribute__((packed)) gf_fop_rename_rsp_t;
-
-
-typedef struct {
- uint64_t oldino;
- uint64_t newpar;
- char oldpath[0];
- char newpath[0];
- char newbname[0];
-}__attribute__((packed)) gf_fop_link_req_t;
-typedef struct {
- struct gf_stat stat;
-} __attribute__((packed)) gf_fop_link_rsp_t;
-
-
-typedef struct {
- uint64_t ino;
- uint32_t mode;
- char path[0];
-} __attribute__((packed)) gf_fop_chmod_req_t;
-typedef struct {
- struct gf_stat stat;
-} __attribute__((packed)) gf_fop_chmod_rsp_t;
-
-
-typedef struct {
- uint64_t ino;
- uint32_t uid;
- uint32_t gid;
- char path[0];
-} __attribute__((packed)) gf_fop_chown_req_t;
-typedef struct {
- struct gf_stat stat;
-} __attribute__((packed)) gf_fop_chown_rsp_t;
-
-
-typedef struct {
- uint64_t ino;
- uint64_t offset;
- char path[0];
-} __attribute__((packed)) gf_fop_truncate_req_t;
-typedef struct {
- struct gf_stat stat;
-} __attribute__((packed)) gf_fop_truncate_rsp_t;
-
-
-typedef struct {
- uint64_t ino;
- uint32_t flags;
- char path[0];
-} __attribute__((packed)) gf_fop_open_req_t;
-typedef struct {
- int64_t fd;
-} __attribute__((packed)) gf_fop_open_rsp_t;
-
-
-typedef struct {
- uint64_t ino;
- int64_t fd;
- uint64_t offset;
- uint32_t size;
-} __attribute__((packed)) gf_fop_read_req_t;
-typedef struct {
- struct gf_stat stat;
- char buf[0];
-} __attribute__((packed)) gf_fop_read_rsp_t;
-
-
-typedef struct {
- uint64_t ino;
- int64_t fd;
- uint64_t offset;
- uint32_t size;
-} __attribute__((packed)) gf_fop_write_req_t;
-typedef struct {
- struct gf_stat stat;
-} __attribute__((packed)) gf_fop_write_rsp_t;
-
-
-typedef struct {
- uint64_t ino;
- char path[0];
-} __attribute__((packed)) gf_fop_statfs_req_t;
-typedef struct {
- struct gf_statfs statfs;
-} __attribute__((packed)) gf_fop_statfs_rsp_t;
-
-
-typedef struct {
- uint64_t ino;
- int64_t fd;
-} __attribute__((packed)) gf_fop_flush_req_t;
-typedef struct { } __attribute__((packed)) gf_fop_flush_rsp_t;
-
-
-typedef struct fsync_req {
- uint64_t ino;
- int64_t fd;
- uint32_t data;
-} __attribute__((packed)) gf_fop_fsync_req_t;
-typedef struct {
-} __attribute__((packed)) gf_fop_fsync_rsp_t;
-
-
-typedef struct {
- uint64_t ino;
- uint32_t flags;
- uint32_t dict_len;
- char dict[0];
- char path[0];
-} __attribute__((packed)) gf_fop_setxattr_req_t;
-typedef struct { } __attribute__((packed)) gf_fop_setxattr_rsp_t;
-
-
-typedef struct {
- uint64_t ino;
- int64_t fd;
- uint32_t flags;
- uint32_t dict_len;
- char dict[0];
-} __attribute__((packed)) gf_fop_fsetxattr_req_t;
-typedef struct { } __attribute__((packed)) gf_fop_fsetxattr_rsp_t;
-
-
-typedef struct {
- uint64_t ino;
- uint32_t flags;
- uint32_t dict_len;
- char dict[0];
- char path[0];
-} __attribute__((packed)) gf_fop_xattrop_req_t;
-
-typedef struct {
- uint32_t dict_len;
- char dict[0];
-} __attribute__((packed)) gf_fop_xattrop_rsp_t;
-
-
-typedef struct {
- uint64_t ino;
- int64_t fd;
- uint32_t flags;
- uint32_t dict_len;
- char dict[0];
-} __attribute__((packed)) gf_fop_fxattrop_req_t;
-
-typedef struct {
- uint32_t dict_len;
- char dict[0];
-} __attribute__((packed)) gf_fop_fxattrop_rsp_t;
-
-
-typedef struct {
- uint64_t ino;
- uint32_t namelen;
- char path[0];
- char name[0];
-} __attribute__((packed)) gf_fop_getxattr_req_t;
-typedef struct {
- uint32_t dict_len;
- char dict[0];
-} __attribute__((packed)) gf_fop_getxattr_rsp_t;
-
-
-typedef struct {
- uint64_t ino;
- int64_t fd;
- uint32_t namelen;
- char name[0];
-} __attribute__((packed)) gf_fop_fgetxattr_req_t;
-typedef struct {
- uint32_t dict_len;
- char dict[0];
-} __attribute__((packed)) gf_fop_fgetxattr_rsp_t;
-
-
-typedef struct {
- uint64_t ino;
- char path[0];
- char name[0];
-} __attribute__((packed)) gf_fop_removexattr_req_t;
-typedef struct { } __attribute__((packed)) gf_fop_removexattr_rsp_t;
-
-
-typedef struct {
- uint64_t ino;
- char path[0];
-} __attribute__((packed)) gf_fop_opendir_req_t;
-typedef struct {
- int64_t fd;
-} __attribute__((packed)) gf_fop_opendir_rsp_t;
-
-
-typedef struct fsyncdir_req {
- uint64_t ino;
- int64_t fd;
- int32_t data;
-} __attribute__((packed)) gf_fop_fsyncdir_req_t;
-typedef struct {
-} __attribute__((packed)) gf_fop_fsyncdir_rsp_t;
-
-
-typedef struct {
- uint64_t ino;
- int64_t fd;
- uint64_t offset;
- uint32_t size;
-} __attribute__((packed)) gf_fop_readdir_req_t;
-typedef struct {
- uint32_t size;
- char buf[0];
-} __attribute__((packed)) gf_fop_readdir_rsp_t;
-
-
-typedef struct {
- uint64_t ino;
- uint32_t mask;
- char path[0];
-} __attribute__((packed)) gf_fop_access_req_t;
-typedef struct {
-} __attribute__((packed)) gf_fop_access_rsp_t;
-
-
-typedef struct {
- uint64_t par;
- uint32_t flags;
- uint32_t mode;
- char path[0];
- char bname[0];
-} __attribute__((packed)) gf_fop_create_req_t;
-typedef struct {
- struct gf_stat stat;
- uint64_t fd;
-} __attribute__((packed)) gf_fop_create_rsp_t;
-
-
-
-typedef struct {
- uint64_t ino;
- int64_t fd;
- uint64_t offset;
-} __attribute__((packed)) gf_fop_ftruncate_req_t;
-typedef struct {
- struct gf_stat stat;
-} __attribute__((packed)) gf_fop_ftruncate_rsp_t;
-
-
-typedef struct {
- uint64_t ino;
- int64_t fd;
-} __attribute__((packed)) gf_fop_fstat_req_t;
-typedef struct {
- struct gf_stat stat;
-} __attribute__((packed)) gf_fop_fstat_rsp_t;
-
-
-typedef struct {
- uint64_t ino;
- int64_t fd;
- uint32_t cmd;
- uint32_t type;
- struct gf_flock flock;
-} __attribute__((packed)) gf_fop_lk_req_t;
-typedef struct {
- struct gf_flock flock;
-} __attribute__((packed)) gf_fop_lk_rsp_t;
-
-typedef struct {
- uint64_t ino;
- uint32_t cmd;
- uint32_t type;
- struct gf_flock flock;
- char path[0];
- char volume[0];
-} __attribute__((packed)) gf_fop_inodelk_req_t;
-typedef struct {
-} __attribute__((packed)) gf_fop_inodelk_rsp_t;
-
-typedef struct {
- uint64_t ino;
- int64_t fd;
- uint32_t cmd;
- uint32_t type;
- struct gf_flock flock;
- char volume[0];
-} __attribute__((packed)) gf_fop_finodelk_req_t;
-typedef struct {
-} __attribute__((packed)) gf_fop_finodelk_rsp_t;
-
-typedef struct {
- uint64_t ino;
- uint32_t cmd;
- uint32_t type;
- uint64_t namelen;
- char path[0];
- char name[0];
- char volume[0];
-} __attribute__((packed)) gf_fop_entrylk_req_t;
-typedef struct {
-} __attribute__((packed)) gf_fop_entrylk_rsp_t;
-
-typedef struct {
- uint64_t ino;
- int64_t fd;
- uint32_t cmd;
- uint32_t type;
- uint64_t namelen;
- char name[0];
- char volume[0];
-} __attribute__((packed)) gf_fop_fentrylk_req_t;
-typedef struct {
-} __attribute__((packed)) gf_fop_fentrylk_rsp_t;
-
-typedef struct {
- uint64_t ino;
- struct gf_timespec tv[2];
- char path[0];
-} __attribute__((packed)) gf_fop_utimens_req_t;
-typedef struct {
- struct gf_stat stat;
-} __attribute__((packed)) gf_fop_utimens_rsp_t;
-
-typedef struct {
- uint64_t ino;
- uint64_t fd;
- uint32_t mode;
-} __attribute__((packed)) gf_fop_fchmod_req_t;
-typedef struct {
- struct gf_stat stat;
-} __attribute__((packed)) gf_fop_fchmod_rsp_t;
-
-
-typedef struct {
- uint64_t ino;
- int64_t fd;
- uint32_t uid;
- uint32_t gid;
-} __attribute__((packed)) gf_fop_fchown_req_t;
-typedef struct {
- struct gf_stat stat;
-} __attribute__((packed)) gf_fop_fchown_rsp_t;
-
-
-typedef struct {
- uint64_t ino; /* NOTE: used only in case of 'root' lookup */
- uint64_t par;
- uint32_t flags;
- uint32_t dictlen;
- char path[0];
- char bname[0];
- char dict[0];
-} __attribute__((packed)) gf_fop_lookup_req_t;
-typedef struct {
- struct gf_stat stat;
- uint32_t dict_len;
- char dict[0];
-} __attribute__((packed)) gf_fop_lookup_rsp_t;
-
-
-typedef struct {
- uint64_t ino;
- int64_t fd;
- uint32_t flags;
- uint32_t count;
- char buf[0];
-} __attribute__((packed)) gf_fop_setdents_req_t;
-typedef struct { } __attribute__((packed)) gf_fop_setdents_rsp_t;
-
-
-typedef struct {
- uint64_t ino;
- int64_t fd;
- uint64_t offset;
- uint32_t size;
- uint32_t flags;
-} __attribute__((packed)) gf_fop_getdents_req_t;
-typedef struct {
- uint32_t count;
- char buf[0];
-} __attribute__((packed)) gf_fop_getdents_rsp_t;
-
-
-typedef struct {
- uint64_t ino;
- uint32_t flag;
- char path[0];
-} __attribute__((packed)) gf_fop_checksum_req_t;
-typedef struct {
- unsigned char fchecksum[0];
- unsigned char dchecksum[0];
-} __attribute__((packed)) gf_fop_checksum_rsp_t;
-
-
-typedef struct {
- uint64_t ino;
- int32_t timeout;
-} __attribute__((packed)) gf_fop_lock_notify_req_t;
-typedef struct { } __attribute__((packed)) gf_fop_lock_notify_rsp_t;
-
-
-typedef struct {
- int64_t fd;
- int32_t timeout;
-} __attribute__((packed)) gf_fop_lock_fnotify_req_t;
-typedef struct { } __attribute__((packed)) gf_fop_lock_fnotify_rsp_t;
-
-
-typedef struct {
- char name[0];
-} __attribute__((packed)) gf_mop_lock_req_t;
-typedef struct {} __attribute__((packed)) gf_mop_lock_rsp_t;
-
-typedef struct {
- char name[0];
-} __attribute__((packed)) gf_mop_unlock_req_t;
-typedef struct {} __attribute__((packed)) gf_mop_unlock_rsp_t;
-
-typedef struct {
- char pattern[0];
-} __attribute__((packed)) gf_mop_listlocks_req_t;
-typedef struct {} __attribute__((packed)) gf_mop_listlocks_rsp_t;
-
-typedef struct {
- uint32_t flags;
-} __attribute__((packed)) gf_mop_stats_req_t;
-typedef struct {
- char buf[0];
-} __attribute__((packed)) gf_mop_stats_rsp_t;
-
-typedef struct {
- uint32_t flags;
- uint32_t keylen;
- char key[0];
-} __attribute__((packed)) gf_mop_getspec_req_t;
-typedef struct {
- char spec[0];
-} __attribute__((packed)) gf_mop_getspec_rsp_t;
-
-
-typedef struct {
- uint32_t msglen;
- char msg[0];
-} __attribute__((packed)) gf_mop_log_req_t;
-typedef struct {
-} __attribute__((packed)) gf_mop_log_rsp_t;
-
-
-typedef struct {
- uint32_t dict_len;
- char buf[0];
-} __attribute__((packed)) gf_mop_setvolume_req_t;
-typedef struct {
- uint32_t dict_len;
- char buf[0];
-} __attribute__((packed)) gf_mop_setvolume_rsp_t;
-
-
-typedef struct {
-} __attribute__((packed)) gf_mop_ping_req_t;
-typedef struct {
-} __attribute__((packed)) gf_mop_ping_rsp_t;
-
-
-typedef struct {
- uint64_t ino;
- int64_t fd;
-} __attribute__((packed)) gf_cbk_releasedir_req_t;
-typedef struct {
-} __attribute__((packed)) gf_cbk_releasedir_rsp_t;
-
-
-typedef struct {
- uint64_t ino;
- int64_t fd;
-} __attribute__((packed)) gf_cbk_release_req_t;
-typedef struct {
-} __attribute__((packed)) gf_cbk_release_rsp_t;
-
-
-typedef struct {
- uint32_t count;
- uint64_t ino_array[0];
-} __attribute__((packed)) gf_cbk_forget_req_t;
-typedef struct { } __attribute__((packed)) gf_cbk_forget_rsp_t;
-
-
-typedef struct {
- uint32_t pid;
- uint32_t uid;
- uint32_t gid;
-} __attribute__ ((packed)) gf_hdr_req_t;
-
-
-typedef struct {
- uint32_t op_ret;
- uint32_t op_errno;
-} __attribute__ ((packed)) gf_hdr_rsp_t;
-
-
-typedef struct {
- uint64_t callid;
- uint32_t type;
- uint32_t op;
- uint32_t size;
- union {
- gf_hdr_req_t req;
- gf_hdr_rsp_t rsp;
- } __attribute__ ((packed));
-} __attribute__ ((packed)) gf_hdr_common_t;
-
-
-static inline gf_hdr_common_t *
-__gf_hdr_new (int size)
-{
- gf_hdr_common_t *hdr = NULL;
-
- /* TODO: use mem-pool */
- hdr = CALLOC (sizeof (gf_hdr_common_t) + size, 1);
-
- if (!hdr) {
- return NULL;
- }
-
- hdr->size = hton32 (size);
-
- return hdr;
-}
-
-
-#define gf_hdr_len(type, x) (sizeof (gf_hdr_common_t) + sizeof (*type) + x)
-#define gf_hdr_new(type, x) __gf_hdr_new (sizeof (*type) + x)
-
-
-static inline void *
-gf_param (gf_hdr_common_t *hdr)
-{
- return ((void *)hdr) + sizeof (*hdr);
-}
-
-#endif
diff --git a/libglusterfs/src/rbthash.c b/libglusterfs/src/rbthash.c
new file mode 100644
index 000000000..0d7b9e521
--- /dev/null
+++ b/libglusterfs/src/rbthash.c
@@ -0,0 +1,458 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#include "rbthash.h"
+#include "rb.h"
+#include "locking.h"
+#include "mem-pool.h"
+#include "logging.h"
+
+#include <pthread.h>
+#include <string.h>
+
+
+int
+rbthash_comparator (void *entry1, void *entry2, void *param)
+{
+ int ret = 0;
+ rbthash_entry_t *e1 = NULL;
+ rbthash_entry_t *e2 = NULL;
+
+ if ((!entry1) || (!entry2) || (!param))
+ return -1;
+
+ e1 = (rbthash_entry_t *)entry1;
+ e2 = (rbthash_entry_t *)entry2;
+
+ if (e1->keylen != e2->keylen) {
+ if (e1->keylen < e2->keylen)
+ ret = -1;
+ else if (e1->keylen > e2->keylen)
+ ret = 1;
+ } else
+ ret = memcmp (e1->key, e2->key, e1->keylen);
+
+ return ret;
+}
+
+
+int
+__rbthash_init_buckets (rbthash_table_t *tbl, int buckets)
+{
+ int i = 0;
+ int ret = -1;
+
+ if (!tbl)
+ return -1;
+
+ for (; i < buckets; i++) {
+ LOCK_INIT (&tbl->buckets[i].bucketlock);
+ tbl->buckets[i].bucket = rb_create ((rb_comparison_func *)rbthash_comparator, tbl, NULL);
+ if (!tbl->buckets[i].bucket) {
+ gf_log (GF_RBTHASH, GF_LOG_ERROR, "Failed to create rb"
+ " table bucket");
+ ret = -1;
+ goto err;
+ }
+ }
+
+ ret = 0;
+err:
+ return ret;
+}
+
+
+/*
+ * rbthash_table_init - Initialize a RBT based hash table
+ * @buckets - Number of buckets in the hash table
+ * @hfunc - hashing function
+ * @dfunc - destroyer for data in the RBT
+ * @expected_entries - Number of entries expected in RBT. Mutually exclusive
+ * with entrypool.
+ * @entrypool - Memory pool in lieu of expected_entries.
+ */
+
+rbthash_table_t *
+rbthash_table_init (int buckets, rbt_hasher_t hfunc,
+ rbt_data_destroyer_t dfunc,
+ unsigned long expected_entries,
+ struct mem_pool *entrypool)
+{
+ rbthash_table_t *newtab = NULL;
+ int ret = -1;
+
+ if (!hfunc) {
+ gf_log (GF_RBTHASH, GF_LOG_ERROR, "Hash function not given");
+ return NULL;
+ }
+
+ if (!entrypool && !expected_entries) {
+ gf_log (GF_RBTHASH, GF_LOG_ERROR,
+ "Both mem-pool and expected entries not provided");
+ return NULL;
+ }
+
+ if (entrypool && expected_entries) {
+ gf_log (GF_RBTHASH, GF_LOG_ERROR,
+ "Both mem-pool and expected entries are provided");
+ return NULL;
+ }
+
+
+ newtab = GF_CALLOC (1, sizeof (*newtab),
+ gf_common_mt_rbthash_table_t);
+ if (!newtab)
+ return NULL;
+
+ newtab->buckets = GF_CALLOC (buckets, sizeof (struct rbthash_bucket),
+ gf_common_mt_rbthash_bucket);
+ if (!newtab->buckets) {
+ goto free_newtab;
+ }
+
+ if (expected_entries) {
+ newtab->entrypool =
+ mem_pool_new (rbthash_entry_t, expected_entries);
+ if (!newtab->entrypool) {
+ gf_log (GF_RBTHASH, GF_LOG_ERROR,
+ "Failed to allocate mem-pool");
+ goto free_buckets;
+ }
+ newtab->pool_alloced = _gf_true;
+ } else {
+ newtab->entrypool = entrypool;
+ }
+
+ LOCK_INIT (&newtab->tablelock);
+ INIT_LIST_HEAD (&newtab->list);
+ newtab->numbuckets = buckets;
+ ret = __rbthash_init_buckets (newtab, buckets);
+
+ if (ret == -1) {
+ gf_log (GF_RBTHASH, GF_LOG_ERROR, "Failed to init buckets");
+ if (newtab->pool_alloced)
+ mem_pool_destroy (newtab->entrypool);
+ } else {
+ gf_log (GF_RBTHASH, GF_LOG_TRACE, "Inited hash table: buckets:"
+ " %d", buckets);
+ }
+
+ newtab->hashfunc = hfunc;
+ newtab->dfunc = dfunc;
+
+free_buckets:
+ if (ret == -1)
+ GF_FREE (newtab->buckets);
+
+free_newtab:
+ if (ret == -1) {
+ GF_FREE (newtab);
+ newtab = NULL;
+ }
+
+ return newtab;
+}
+
+rbthash_entry_t *
+rbthash_init_entry (rbthash_table_t *tbl, void *data, void *key, int keylen)
+{
+ int ret = -1;
+ rbthash_entry_t *entry = NULL;
+
+ if ((!tbl) || (!data) || (!key))
+ return NULL;
+
+ entry = mem_get (tbl->entrypool);
+ if (!entry) {
+ gf_log (GF_RBTHASH, GF_LOG_ERROR, "Failed to get entry from"
+ " mem-pool");
+ goto ret;
+ }
+
+ entry->data = data;
+ entry->key = GF_CALLOC (keylen, sizeof (char), gf_common_mt_char);
+ if (!entry->key) {
+ goto free_entry;
+ }
+
+ INIT_LIST_HEAD (&entry->list);
+ memcpy (entry->key, key, keylen);
+ entry->keylen = keylen;
+ entry->keyhash = tbl->hashfunc (entry->key, entry->keylen);
+ gf_log (GF_RBTHASH, GF_LOG_TRACE, "HASH: %u", entry->keyhash);
+
+ ret = 0;
+free_entry:
+ if (ret == -1) {
+ mem_put (entry);
+ entry = NULL;
+ }
+
+ret:
+ return entry;
+}
+
+
+void
+rbthash_deinit_entry (rbthash_table_t *tbl, rbthash_entry_t *entry)
+{
+
+ if (!entry)
+ return;
+
+ GF_FREE (entry->key);
+
+ if (tbl) {
+ if ((entry->data) && (tbl->dfunc))
+ tbl->dfunc (entry->data);
+
+ LOCK (&tbl->tablelock);
+ {
+ list_del_init (&entry->list);
+ }
+ UNLOCK (&tbl->tablelock);
+
+ mem_put (entry);
+ }
+
+ return;
+}
+
+
+static inline struct rbthash_bucket *
+rbthash_entry_bucket (rbthash_table_t *tbl, rbthash_entry_t * entry)
+{
+ int nbucket = 0;
+
+ nbucket = (entry->keyhash % tbl->numbuckets);
+ gf_log (GF_RBTHASH, GF_LOG_TRACE, "BUCKET: %d", nbucket);
+ return &tbl->buckets[nbucket];
+}
+
+
+int
+rbthash_insert_entry (rbthash_table_t *tbl, rbthash_entry_t *entry)
+{
+ struct rbthash_bucket *bucket = NULL;
+ int ret = -1;
+
+ if ((!tbl) || (!entry))
+ return -1;
+
+ bucket = rbthash_entry_bucket (tbl, entry);
+ if (!bucket) {
+ gf_log (GF_RBTHASH, GF_LOG_ERROR, "Failed to get bucket");
+ goto err;
+ }
+
+ ret = 0;
+ LOCK (&bucket->bucketlock);
+ {
+ if (!rb_probe (bucket->bucket, (void *)entry)) {
+ gf_log (GF_RBTHASH, GF_LOG_ERROR, "Failed to insert"
+ " entry");
+ ret = -1;
+ }
+ }
+ UNLOCK (&bucket->bucketlock);
+
+err:
+ return ret;
+}
+
+
+int
+rbthash_insert (rbthash_table_t *tbl, void *data, void *key, int keylen)
+{
+ rbthash_entry_t *entry = NULL;
+ int ret = -1;
+
+ if ((!tbl) || (!data) || (!key))
+ return -1;
+
+ entry = rbthash_init_entry (tbl, data, key, keylen);
+ if (!entry) {
+ gf_log (GF_RBTHASH, GF_LOG_ERROR, "Failed to init entry");
+ goto err;
+ }
+
+ ret = rbthash_insert_entry (tbl, entry);
+
+ if (ret == -1) {
+ gf_log (GF_RBTHASH, GF_LOG_ERROR, "Failed to insert entry");
+ rbthash_deinit_entry (tbl, entry);
+ }
+
+ LOCK (&tbl->tablelock);
+ {
+ list_add_tail (&entry->list, &tbl->list);
+ }
+ UNLOCK (&tbl->tablelock);
+
+err:
+ return ret;
+}
+
+static inline struct rbthash_bucket *
+rbthash_key_bucket (rbthash_table_t *tbl, void *key, int keylen)
+{
+ uint32_t keyhash = 0;
+ int nbucket = 0;
+
+ if ((!tbl) || (!key))
+ return NULL;
+
+ keyhash = tbl->hashfunc (key, keylen);
+ gf_log (GF_RBTHASH, GF_LOG_TRACE, "HASH: %u", keyhash);
+ nbucket = (keyhash % tbl->numbuckets);
+ gf_log (GF_RBTHASH, GF_LOG_TRACE, "BUCKET: %u", nbucket);
+
+ return &tbl->buckets[nbucket];
+}
+
+
+void *
+rbthash_get (rbthash_table_t *tbl, void *key, int keylen)
+{
+ struct rbthash_bucket *bucket = NULL;
+ rbthash_entry_t *entry = NULL;
+ rbthash_entry_t searchentry = {0, };
+
+ if ((!tbl) || (!key))
+ return NULL;
+
+ bucket = rbthash_key_bucket (tbl, key, keylen);
+ if (!bucket) {
+ gf_log (GF_RBTHASH, GF_LOG_ERROR, "Failed to get bucket");
+ return NULL;
+ }
+
+ searchentry.key = key;
+ searchentry.keylen = keylen;
+ LOCK (&bucket->bucketlock);
+ {
+ entry = rb_find (bucket->bucket, &searchentry);
+ }
+ UNLOCK (&bucket->bucketlock);
+
+ if (!entry)
+ return NULL;
+
+ return entry->data;
+}
+
+
+void *
+rbthash_remove (rbthash_table_t *tbl, void *key, int keylen)
+{
+ struct rbthash_bucket *bucket = NULL;
+ rbthash_entry_t *entry = NULL;
+ rbthash_entry_t searchentry = {0, };
+ void *dataref = NULL;
+
+ if ((!tbl) || (!key))
+ return NULL;
+
+ bucket = rbthash_key_bucket (tbl, key, keylen);
+ if (!bucket) {
+ gf_log (GF_RBTHASH, GF_LOG_ERROR, "Failed to get bucket");
+ return NULL;
+ }
+
+ searchentry.key = key;
+ searchentry.keylen = keylen;
+
+ LOCK (&bucket->bucketlock);
+ {
+ entry = rb_delete (bucket->bucket, &searchentry);
+ }
+ UNLOCK (&bucket->bucketlock);
+
+ if (!entry)
+ return NULL;
+
+ GF_FREE (entry->key);
+ dataref = entry->data;
+
+ LOCK (&tbl->tablelock);
+ {
+ list_del_init (&entry->list);
+ }
+ UNLOCK (&tbl->tablelock);
+
+ mem_put (entry);
+
+ return dataref;
+}
+
+
+void
+rbthash_entry_deiniter (void *entry, void *rbparam)
+{
+ if (!entry)
+ return;
+
+ rbthash_deinit_entry (rbparam, entry);
+}
+
+
+void
+rbthash_table_destroy_buckets (rbthash_table_t *tbl)
+{
+ int x = 0;
+ if (!tbl)
+ return;
+
+ for (;x < tbl->numbuckets; x++) {
+ LOCK_DESTROY (&tbl->buckets[x].bucketlock);
+ rb_destroy (tbl->buckets[x].bucket, rbthash_entry_deiniter);
+ }
+
+ return;
+}
+
+
+void
+rbthash_table_destroy (rbthash_table_t *tbl)
+{
+ if (!tbl)
+ return;
+
+ rbthash_table_destroy_buckets (tbl);
+ if (tbl->pool_alloced)
+ mem_pool_destroy (tbl->entrypool);
+
+ GF_FREE (tbl->buckets);
+ GF_FREE (tbl);
+}
+
+
+void
+rbthash_table_traverse (rbthash_table_t *tbl, rbt_traverse_t traverse,
+ void *mydata)
+{
+ rbthash_entry_t *entry = NULL;
+
+ if ((tbl == NULL) || (traverse == NULL)) {
+ goto out;
+ }
+
+ LOCK (&tbl->tablelock);
+ {
+ list_for_each_entry (entry, &tbl->list, list) {
+ traverse (entry->data, mydata);
+ }
+ }
+ UNLOCK (&tbl->tablelock);
+
+out:
+ return;
+}
diff --git a/libglusterfs/src/rbthash.h b/libglusterfs/src/rbthash.h
new file mode 100644
index 000000000..b093ce998
--- /dev/null
+++ b/libglusterfs/src/rbthash.h
@@ -0,0 +1,77 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __RBTHASH_TABLE_H_
+#define __RBTHASH_TABLE_H_
+#include "rb.h"
+#include "locking.h"
+#include "mem-pool.h"
+#include "logging.h"
+#include "common-utils.h"
+#include "list.h"
+
+#include <pthread.h>
+
+#define GF_RBTHASH_MEMPOOL 16384 //1048576
+#define GF_RBTHASH "rbthash"
+
+struct rbthash_bucket {
+ struct rb_table *bucket;
+ gf_lock_t bucketlock;
+};
+
+typedef struct rbthash_entry {
+ void *data;
+ void *key;
+ int keylen;
+ uint32_t keyhash;
+ struct list_head list;
+} rbthash_entry_t;
+
+typedef uint32_t (*rbt_hasher_t) (void *data, int len);
+typedef void (*rbt_data_destroyer_t) (void *data);
+typedef void (*rbt_traverse_t) (void *data, void *mydata);
+
+typedef struct rbthash_table {
+ int size;
+ int numbuckets;
+ struct mem_pool *entrypool;
+ gf_lock_t tablelock;
+ struct rbthash_bucket *buckets;
+ rbt_hasher_t hashfunc;
+ rbt_data_destroyer_t dfunc;
+ gf_boolean_t pool_alloced;
+ struct list_head list;
+} rbthash_table_t;
+
+extern rbthash_table_t *
+rbthash_table_init (int buckets, rbt_hasher_t hfunc,
+ rbt_data_destroyer_t dfunc, unsigned long expected_entries,
+ struct mem_pool *entrypool);
+
+extern int
+rbthash_insert (rbthash_table_t *tbl, void *data, void *key, int keylen);
+
+extern void *
+rbthash_get (rbthash_table_t *tbl, void *key, int keylen);
+
+extern void *
+rbthash_remove (rbthash_table_t *tbl, void *key, int keylen);
+
+extern void *
+rbthash_replace (rbthash_table_t *tbl, void *key, int keylen, void *newdata);
+
+extern void
+rbthash_table_destroy (rbthash_table_t *tbl);
+
+extern void
+rbthash_table_traverse (rbthash_table_t *tbl, rbt_traverse_t traverse,
+ void *mydata);
+#endif
diff --git a/libglusterfs/src/revision.h b/libglusterfs/src/revision.h
index 03c887fc0..c1ea4a9e9 100644
--- a/libglusterfs/src/revision.h
+++ b/libglusterfs/src/revision.h
@@ -1 +1 @@
-#define GLUSTERFS_REPOSITORY_REVISION "git://git.sv.gnu.org/gluster.git"
+#define GLUSTERFS_REPOSITORY_REVISION "git://git.gluster.com/glusterfs.git"
diff --git a/libglusterfs/src/run.c b/libglusterfs/src/run.c
new file mode 100644
index 000000000..4fd2a3a0d
--- /dev/null
+++ b/libglusterfs/src/run.c
@@ -0,0 +1,513 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <assert.h>
+#include <signal.h>
+#include <sys/wait.h>
+#include <sys/resource.h>
+
+#ifdef RUN_STANDALONE
+#define GF_CALLOC(n, s, t) calloc(n, s)
+#define GF_ASSERT(cond) assert(cond)
+#define GF_REALLOC(p, s) realloc(p, s)
+#define GF_FREE(p) free(p)
+#define gf_strdup(s) strdup(s)
+#define gf_vasprintf(p, f, va) vasprintf(p, f, va)
+#define gf_loglevel_t int
+#define gf_log(dom, levl, fmt, args...) printf("LOG: " fmt "\n", ##args)
+#define LOG_DEBUG 0
+#ifdef __linux__
+#define GF_LINUX_HOST_OS
+#endif
+#else /* ! RUN_STANDALONE */
+#include "glusterfs.h"
+#include "common-utils.h"
+#endif
+
+#include "run.h"
+
+void
+runinit (runner_t *runner)
+{
+ int i = 0;
+
+ runner->argvlen = 64;
+ runner->argv = GF_CALLOC (runner->argvlen,
+ sizeof (*runner->argv),
+ gf_common_mt_run_argv);
+ runner->runerr = runner->argv ? 0 : errno;
+ runner->chpid = -1;
+ for (i = 0; i < 3; i++) {
+ runner->chfd[i] = -1;
+ runner->chio[i] = NULL;
+ }
+}
+
+FILE *
+runner_chio (runner_t *runner, int fd)
+{
+ GF_ASSERT (fd > 0 && fd < 3);
+
+ if ((fd > 0) && (fd < 3))
+ return runner->chio[fd];
+
+ return NULL;
+}
+
+static void
+runner_insert_arg (runner_t *runner, char *arg)
+{
+ int i = 0;
+
+ GF_ASSERT (arg);
+
+ if (runner->runerr)
+ return;
+
+ for (i = 0; i < runner->argvlen; i++) {
+ if (runner->argv[i] == NULL)
+ break;
+ }
+ GF_ASSERT (i < runner->argvlen);
+
+ if (i == runner->argvlen - 1) {
+ runner->argv = GF_REALLOC (runner->argv,
+ runner->argvlen * 2 * sizeof (*runner->argv));
+ if (!runner->argv) {
+ runner->runerr = errno;
+ return;
+ }
+ memset (/* "+" is aware of the type of its left side,
+ * no need to multiply with type-size */
+ runner->argv + runner->argvlen,
+ 0, runner->argvlen * sizeof (*runner->argv));
+ runner->argvlen *= 2;
+ }
+
+ runner->argv[i] = arg;
+}
+
+void
+runner_add_arg (runner_t *runner, const char *arg)
+{
+ arg = gf_strdup (arg);
+ if (!arg) {
+ runner->runerr = errno;
+ return;
+ }
+
+ runner_insert_arg (runner, (char *)arg);
+}
+
+static void
+runner_va_add_args (runner_t *runner, va_list argp)
+{
+ const char *arg;
+
+ while ((arg = va_arg (argp, const char *)))
+ runner_add_arg (runner, arg);
+}
+
+void
+runner_add_args (runner_t *runner, ...)
+{
+ va_list argp;
+
+ va_start (argp, runner);
+ runner_va_add_args (runner, argp);
+ va_end (argp);
+}
+
+void
+runner_argprintf (runner_t *runner, const char *format, ...)
+{
+ va_list argva;
+ char *arg = NULL;
+ int ret = 0;
+
+ va_start (argva, format);
+ ret = gf_vasprintf (&arg, format, argva);
+ va_end (argva);
+
+ if (ret < 0) {
+ runner->runerr = errno;
+ return;
+ }
+
+ runner_insert_arg (runner, arg);
+}
+
+void
+runner_log (runner_t *runner, const char *dom, gf_loglevel_t lvl,
+ const char *msg)
+{
+ char *buf = NULL;
+ size_t len = 0;
+ int i = 0;
+
+ if (runner->runerr)
+ return;
+
+ for (i = 0;; i++) {
+ if (runner->argv[i] == NULL)
+ break;
+ len += (strlen (runner->argv[i]) + 1);
+ }
+
+ buf = GF_CALLOC (1, len + 1, gf_common_mt_run_logbuf);
+ if (!buf) {
+ runner->runerr = errno;
+ return;
+ }
+ for (i = 0;; i++) {
+ if (runner->argv[i] == NULL)
+ break;
+ strcat (buf, runner->argv[i]);
+ strcat (buf, " ");
+ }
+ if (len > 0)
+ buf[len - 1] = '\0';
+
+ gf_log_callingfn (dom, lvl, "%s: %s", msg, buf);
+
+ GF_FREE (buf);
+}
+
+void
+runner_redir (runner_t *runner, int fd, int tgt_fd)
+{
+ GF_ASSERT (fd > 0 && fd < 3);
+
+ if ((fd > 0) && (fd < 3))
+ runner->chfd[fd] = (tgt_fd >= 0) ? tgt_fd : -2;
+}
+
+int
+runner_start (runner_t *runner)
+{
+ int pi[3][2] = {{-1, -1}, {-1, -1}, {-1, -1}};
+ int xpi[2];
+ int ret = 0;
+ int errno_priv = 0;
+ int i = 0;
+ sigset_t set;
+
+ if (runner->runerr) {
+ errno = runner->runerr;
+ return -1;
+ }
+
+ GF_ASSERT (runner->argv[0]);
+
+ /* set up a channel to child to communicate back
+ * possible execve(2) failures
+ */
+ ret = pipe(xpi);
+ if (ret != -1)
+ ret = fcntl (xpi[1], F_SETFD, FD_CLOEXEC);
+
+ for (i = 0; i < 3; i++) {
+ if (runner->chfd[i] != -2)
+ continue;
+ ret = pipe (pi[i]);
+ if (ret != -1) {
+ runner->chio[i] = fdopen (pi[i][i ? 0 : 1], i ? "r" : "w");
+ if (!runner->chio[i])
+ ret = -1;
+ }
+ }
+
+ if (ret != -1)
+ runner->chpid = fork ();
+ switch (runner->chpid) {
+ case -1:
+ errno_priv = errno;
+ close (xpi[0]);
+ close (xpi[1]);
+ for (i = 0; i < 3; i++) {
+ close (pi[i][0]);
+ close (pi[i][1]);
+ }
+ errno = errno_priv;
+ return -1;
+ case 0:
+ for (i = 0; i < 3; i++)
+ close (pi[i][i ? 0 : 1]);
+ close (xpi[0]);
+ ret = 0;
+
+ for (i = 0; i < 3; i++) {
+ if (ret == -1)
+ break;
+ switch (runner->chfd[i]) {
+ case -1:
+ /* no redir */
+ break;
+ case -2:
+ /* redir to pipe */
+ ret = dup2 (pi[i][i ? 1 : 0], i);
+ break;
+ default:
+ /* redir to file */
+ ret = dup2 (runner->chfd[i], i);
+ }
+ }
+
+ if (ret != -1 ) {
+#ifdef GF_LINUX_HOST_OS
+ DIR *d = NULL;
+ struct dirent *de = NULL;
+ char *e = NULL;
+
+ d = opendir ("/proc/self/fd");
+ if (d) {
+ while ((de = readdir (d))) {
+ i = strtoul (de->d_name, &e, 10);
+ if (*e == '\0' && i > 2 &&
+ i != dirfd (d) && i != xpi[1])
+ close (i);
+ }
+ closedir (d);
+ } else
+ ret = -1;
+#else
+ struct rlimit rl;
+ ret = getrlimit (RLIMIT_NOFILE, &rl);
+ GF_ASSERT (ret == 0);
+
+ for (i = 3; i < rl.rlim_cur; i++) {
+ if (i != xpi[1])
+ close (i);
+ }
+#endif
+ }
+
+ if (ret != -1) {
+ /* save child from inheriting our singal handling */
+ sigemptyset (&set);
+ sigprocmask (SIG_SETMASK, &set, NULL);
+
+ execvp (runner->argv[0], runner->argv);
+ }
+ ret = write (xpi[1], &errno, sizeof (errno));
+ _exit (1);
+ }
+
+ errno_priv = errno;
+ for (i = 0; i < 3; i++)
+ close (pi[i][i ? 1 : 0]);
+ close (xpi[1]);
+ if (ret == -1) {
+ for (i = 0; i < 3; i++) {
+ if (runner->chio[i]) {
+ fclose (runner->chio[i]);
+ runner->chio[i] = NULL;
+ }
+ }
+ } else {
+ ret = read (xpi[0], (char *)&errno_priv, sizeof (errno_priv));
+ close (xpi[0]);
+ if (ret <= 0)
+ return 0;
+ GF_ASSERT (ret == sizeof (errno_priv));
+ }
+ errno = errno_priv;
+ return -1;
+}
+
+int
+runner_end_reuse (runner_t *runner)
+{
+ int i = 0;
+ int ret = -1;
+ int chstat = 0;
+
+ if (runner->chpid > 0) {
+ if (waitpid (runner->chpid, &chstat, 0) == runner->chpid)
+ ret = chstat;
+ }
+
+ for (i = 0; i < 3; i++) {
+ if (runner->chio[i]) {
+ fclose (runner->chio[i]);
+ runner->chio[i] = NULL;
+ }
+ }
+
+ return ret;
+}
+
+int
+runner_end (runner_t *runner)
+{
+ int i = 0;
+ int ret = -1;
+ char **p = NULL;
+
+ ret = runner_end_reuse (runner);
+
+ if (runner->argv) {
+ for (p = runner->argv; *p; p++)
+ GF_FREE (*p);
+ GF_FREE (runner->argv);
+ }
+ for (i = 0; i < 3; i++)
+ close (runner->chfd[i]);
+
+ return ret;
+}
+
+static int
+runner_run_generic (runner_t *runner, int (*rfin)(runner_t *runner))
+{
+ int ret = 0;
+
+ ret = runner_start (runner);
+
+ return -(rfin (runner) || ret);
+}
+
+int
+runner_run (runner_t *runner)
+{
+ return runner_run_generic (runner, runner_end);
+}
+
+
+int
+runner_run_nowait (runner_t *runner)
+{
+ int pid;
+
+ pid = fork ();
+
+ if (!pid) {
+ setsid ();
+ _exit (runner_start (runner));
+ }
+
+ if (pid > 0)
+ runner->chpid = pid;
+ return runner_end (runner);
+}
+
+
+int
+runner_run_reuse (runner_t *runner)
+{
+ return runner_run_generic (runner, runner_end_reuse);
+}
+
+int
+runcmd (const char *arg, ...)
+{
+ runner_t runner;
+ va_list argp;
+
+ runinit (&runner);
+ /* ISO C requires a named argument before '...' */
+ runner_add_arg (&runner, arg);
+
+ va_start (argp, arg);
+ runner_va_add_args (&runner, argp);
+ va_end (argp);
+
+ return runner_run (&runner);
+}
+
+#ifdef RUN_DO_TESTS
+static void
+TBANNER (const char *txt)
+{
+ printf("######\n### testing %s\n", txt);
+}
+
+int
+main (int argc, char **argv)
+{
+ runner_t runner;
+ char buf[80];
+ char *wdbuf;;
+ int ret;
+ int fd;
+ long pathmax = pathconf ("/", _PC_PATH_MAX);
+ struct timeval tv = {0,};
+ struct timeval *tvp = NULL;
+
+ wdbuf = malloc (pathmax);
+ assert (wdbuf);
+ getcwd (wdbuf, pathmax);
+
+ TBANNER ("basic functionality");
+ runcmd ("echo", "a", "b", NULL);
+
+ TBANNER ("argv extension");
+ runcmd ("echo", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10",
+ "11", "12", "13", "14", "15", "16", "17", "18", "19", "20",
+ "21", "22", "23", "24", "25", "26", "27", "28", "29", "30",
+ "31", "32", "33", "34", "35", "36", "37", "38", "39", "40",
+ "41", "42", "43", "44", "45", "46", "47", "48", "49", "50",
+ "51", "52", "53", "54", "55", "56", "57", "58", "59", "60",
+ "61", "62", "63", "64", "65", "66", "67", "68", "69", "70",
+ "71", "72", "73", "74", "75", "76", "77", "78", "79", "80",
+ "81", "82", "83", "84", "85", "86", "87", "88", "89", "90",
+ "91", "92", "93", "94", "95", "96", "97", "98", "99", "100", NULL);
+
+ TBANNER ("add_args, argprintf, log, and popen-style functionality");
+ runinit (&runner);
+ runner_add_args (&runner, "echo", "pid:", NULL);
+ runner_argprintf (&runner, "%d\n", getpid());
+ runner_add_arg (&runner, "wd:");
+ runner_add_arg (&runner, wdbuf);
+ runner_redir (&runner, 1, RUN_PIPE);
+ runner_start (&runner);
+ runner_log (&runner, "(x)", LOG_DEBUG, "starting program");
+ while (fgets (buf, sizeof(buf), runner_chio (&runner, 1)))
+ printf ("got: %s", buf);
+ runner_end (&runner);
+
+ TBANNER ("execve error reporting");
+ ret = runcmd ("bafflavvitty", NULL);
+ printf ("%d %d [%s]\n", ret, errno, strerror (errno));
+
+ TBANNER ("output redirection");
+ fd = mkstemp ("/tmp/foof");
+ assert (fd != -1);
+ runinit (&runner);
+ runner_add_args (&runner, "echo", "foo", NULL);
+ runner_redir (&runner, 1, fd);
+ ret = runner_run (&runner);
+ printf ("%d", ret);
+ if (ret != 0)
+ printf (" %d [%s]", errno, strerror (errno));
+ putchar ('\n');
+
+ if (argc > 1) {
+ tv.tv_sec = strtoul (argv[1], NULL, 10);
+ if (tv.tv_sec > 0)
+ tvp = &tv;
+ select (0, 0, 0, 0, tvp);
+ }
+
+ return 0;
+}
+#endif
diff --git a/libglusterfs/src/run.h b/libglusterfs/src/run.h
new file mode 100644
index 000000000..d7554ef6d
--- /dev/null
+++ b/libglusterfs/src/run.h
@@ -0,0 +1,194 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __RUN_H__
+#define __RUN_H__
+
+#define RUN_PIPE -1
+
+struct runner {
+ char **argv;
+ unsigned argvlen;
+ int runerr;
+ pid_t chpid;
+ int chfd[3];
+ FILE *chio[3];
+};
+
+typedef struct runner runner_t;
+
+/**
+ * initialize runner_t instance.
+ *
+ * @param runner pointer to runner_t instance
+ */
+void runinit (runner_t *runner);
+
+/**
+ * get FILE pointer to which child's stdio is redirected.
+ *
+ * @param runner pointer to runner_t instance
+ * @param fd specifies which standard file descriptor is
+ * is asked for
+ *
+ * @see runner_redir()
+ */
+FILE *runner_chio (runner_t *runner, int fd);
+
+/**
+ * add an argument.
+ *
+ * 'arg' is duplicated.
+ *
+ * Errors are deferred, no error handling is necessary.
+ *
+ * @param runner pointer to runner_t instance
+ * @param arg command line argument
+ */
+void runner_add_arg (runner_t *runner, const char *arg);
+
+/**
+ * add a sequence of arguments.
+ *
+ * Variadic function, calls runner_add_arg() on each vararg.
+ * Argument sequence MUST be NULL terminated.
+ *
+ * Errors are deferred, no error handling is necessary.
+ *
+ * @param runner pointer to runner_t instance
+ *
+ * @see runner_add_arg()
+ */
+void runner_add_args (runner_t *runner, ...);
+
+/**
+ * add an argument with printf style formatting.
+ *
+ * Errors are deferred, no error handling is necessary.
+ *
+ * @param runner pointer to runner_t instance
+ * @param format printf style format specifier
+ */
+void runner_argprintf (runner_t *runner, const char *format, ...);
+
+/**
+ * log a message about the command to be run.
+ *
+ * @param runner pointer to runner_t instance
+ *
+ * @param dom log domain
+ * @param lvl log level
+ * @param msg message with which the command is prefixed in log
+ *
+ * @see gf_log()
+ */
+void runner_log (runner_t *runner, const char *dom, gf_loglevel_t lvl,
+ const char *msg);
+
+/**
+ * set up redirection for child.
+ *
+ * @param runner pointer to runner_t instance
+ *
+ * @param fd fd of child to redirect (0, 1, or 2)
+ * @param tgt_fd fd on parent side to redirect to.
+ * Note that runner_end() will close tgt_fd,
+ * if user needs it in another context it should
+ * be dup'd beforehand.
+ * RUN_PIPE can be used for requiring a
+ * pipe from child to parent. The FILE
+ * created for this purpose will be
+ * accessible via runner_chio() (after
+ * runner_start() has been invoked).
+ *
+ * @see runner_start(), dup(2), runner_chio(), runner_start()
+ */
+void
+runner_redir (runner_t *runner, int fd, int tgt_fd);
+
+/**
+ * spawn child with accumulated arg list.
+ *
+ * @param runner pointer to runner_t instance
+ *
+ * @return 0 on succesful spawn
+ * -1 on failure (either due to earlier errors or execve(2) failing)
+ *
+ * @see runner_cout()
+ */
+int runner_start (runner_t *runner);
+
+/**
+ * complete operation and free resources.
+ *
+ * If child exists, waits for it. Redirections will be closed.
+ * Dynamically allocated memory shall be freed.
+ *
+ * @param runner pointer to runner_t instance
+ *
+ * @return 0 if child terminated successfully
+ * -1 if there is no running child
+ * n > 0 if child failed; value to be interpreted as status
+ * in waitpid(2)
+ *
+ * @see waitpid(2)
+ */
+int runner_end (runner_t *runner);
+
+/**
+ * variant of runner_end() which does not free internal data
+ * so that the runner instance can be run again.
+ *
+ * @see runner_end()
+ */
+int runner_end_reuse (runner_t *runner);
+
+/**
+ * spawn and child, take it to completion and free resources.
+ *
+ * Essentially it's a concatenation of runner_start() and runner_end()
+ * with simplified return semantics.
+ *
+ * @param runner pointer to runner_t instance
+ *
+ * @return 0 on success
+ * -1 on failuire
+ *
+ * @see runner_start(), runner_end()
+ */
+int runner_run (runner_t *runner);
+
+/**
+ * variant for runner_run() which does not wait for acknowledgement
+ * from child, and always assumes it succeeds.
+ */
+int runner_run_nowait (runner_t *runner);
+
+/**
+ * variant of runner_run() which does not free internal data
+ * so that the runner instance can be run again.
+ *
+ * @see runner_run()
+ */
+int runner_run_reuse (runner_t *runner);
+
+/**
+ * run a command with args.
+ *
+ * Variadic function, child process is spawned with
+ * the given sequence of args and waited for.
+ * Argument sequence MUST be NULL terminated.
+ *
+ * @return 0 on success
+ * -1 on failure
+ */
+int runcmd (const char *arg, ...);
+
+#endif
diff --git a/libglusterfs/src/scheduler.c b/libglusterfs/src/scheduler.c
deleted file mode 100644
index f799b52fc..000000000
--- a/libglusterfs/src/scheduler.c
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <dlfcn.h>
-#include <netdb.h>
-#include "xlator.h"
-#include "scheduler.h"
-#include "list.h"
-
-struct sched_ops *
-get_scheduler (xlator_t *xl, const char *name)
-{
- struct sched_ops *tmp_sched = NULL;
- volume_opt_list_t *vol_opt = NULL;
- char *sched_file = NULL;
- void *handle = NULL;
- int ret = 0;
-
- if (name == NULL) {
- gf_log ("scheduler", GF_LOG_ERROR,
- "'name' not specified, EINVAL");
- return NULL;
- }
-
- ret = asprintf (&sched_file, "%s/%s.so", SCHEDULERDIR, name);
- if (-1 == ret) {
- gf_log ("scheduler", GF_LOG_ERROR, "asprintf failed");
- return NULL;
- }
-
- gf_log ("scheduler", GF_LOG_DEBUG,
- "attempt to load file %s.so", name);
-
- handle = dlopen (sched_file, RTLD_LAZY);
- if (!handle) {
- gf_log ("scheduler", GF_LOG_ERROR,
- "dlopen(%s): %s", sched_file, dlerror ());
- return NULL;
- }
-
- tmp_sched = dlsym (handle, "sched");
- if (!tmp_sched) {
- gf_log ("scheduler", GF_LOG_ERROR,
- "dlsym(sched) on %s", dlerror ());
- return NULL;
- }
-
- vol_opt = CALLOC (1, sizeof (volume_opt_list_t));
- vol_opt->given_opt = dlsym (handle, "options");
- if (vol_opt->given_opt == NULL) {
- gf_log ("scheduler", GF_LOG_DEBUG,
- "volume option validation not specified");
- } else {
- list_add_tail (&vol_opt->list, &xl->volume_options);
- if (validate_xlator_volume_options (xl, vol_opt->given_opt)
- == -1) {
- gf_log ("scheduler", GF_LOG_ERROR,
- "volume option validation failed");
- return NULL;
- }
- }
-
- return tmp_sched;
-}
diff --git a/libglusterfs/src/scheduler.h b/libglusterfs/src/scheduler.h
deleted file mode 100644
index 5b5628955..000000000
--- a/libglusterfs/src/scheduler.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _SCHEDULER_H
-#define _SCHEDULER_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "xlator.h"
-
-struct sched_ops {
- int32_t (*init) (xlator_t *this);
- void (*fini) (xlator_t *this);
- void (*update) (xlator_t *this);
- xlator_t *(*schedule) (xlator_t *this, const void *path);
- void (*notify) (xlator_t *xl, int32_t event, void *data);
-};
-
-extern struct sched_ops *get_scheduler (xlator_t *xl, const char *name);
-
-#endif /* _SCHEDULER_H */
diff --git a/libglusterfs/src/spec.l b/libglusterfs/src/spec.l
deleted file mode 100644
index 341689c3a..000000000
--- a/libglusterfs/src/spec.l
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-%x STRING
-%option yylineno
-%option noinput
-%{
-
-#define YYSTYPE char *
-#include "xlator.h"
-#include "y.tab.h"
-#include <string.h>
-#define START_STRSIZE 32
-
-static char *text;
-static int text_asize;
-static int text_size;
-
-void new_string(void)
-{
- text = malloc(START_STRSIZE);
- text_asize = START_STRSIZE;
- text_size = 0;
- *text = 0;
-}
-
-void append_string(const char *str, int size)
-{
- int new_size = text_size + size + 1;
- if (new_size > text_asize) {
- new_size += START_STRSIZE - 1;
- new_size &= -START_STRSIZE;
- text = realloc(text, new_size);
- text_asize = new_size;
- }
- memcpy(text + text_size, str, size);
- text_size += size;
- text[text_size] = 0;
-}
-
-void alloc_string(const char *str, int size)
-{
- text = malloc(size + 1);
- memcpy(text, str, size);
- text[size] = 0;
-}
-
-%}
-
-VOLUME [v][o][l][u][m][e]
-END [e][n][d]
-SUB [s][u][b]
-OPTION [o][p][t][i][o][n]
-TYPE [t][y][p][e]
-%%
-\#.* ;
-{VOLUME} return SECTION_BEGIN;
-{TYPE} return TYPE;
-{END}[-]{VOLUME} return SECTION_END;
-{SUB}{VOLUME}[Ss] return SUBSECTION;
-{OPTION} return OPTION;
-\" BEGIN(STRING);
-<STRING>{
- [^\n\"\\]* { append_string (yytext, yyleng); }
- \\. { append_string (yytext + 1, yyleng - 1); }
- \" {
- if (0) {
- yyunput (0, NULL);
- }
- BEGIN (INITIAL);
- yylval = text;
- return STRING_TOK;
- }
-}
-[^ \t\r\n\"\\]+ { yylval = strdup (yytext) ; return ID; }
-[ \t\r\n]+ ;
-%%
diff --git a/libglusterfs/src/spec.y b/libglusterfs/src/spec.y
deleted file mode 100644
index f1cffe5d9..000000000
--- a/libglusterfs/src/spec.y
+++ /dev/null
@@ -1,606 +0,0 @@
-/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-%token SECTION_BEGIN SECTION_END OPTION NEWLINE SUBSECTION ID WHITESPACE COMMENT TYPE STRING_TOK
-
-%{
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <sys/mman.h>
-#include <sys/types.h>
-#include <sys/wait.h>
-
-#include "xlator.h"
-#include "logging.h"
-
-static int new_section (char *name);
-static int section_type (char *type);
-static int section_option (char *key, char *value);
-static int section_sub (char *sub);
-static int section_end (void);
-static void sub_error (void);
-static void type_error (void);
-static void option_error (void);
-
-#define YYSTYPE char *
-#define GF_CMD_BUFFER_LEN (8 * GF_UNIT_KB)
-
-int yyerror (const char *);
-int yylex ();
-%}
-
-
-%%
-SECTIONS: SECTION | SECTIONS SECTION;
-
-SECTION: SECTION_HEADER SECTION_DATA SECTION_FOOTER;
-SECTION_HEADER: SECTION_BEGIN WORD {if( -1 == new_section ($2)) { YYABORT; } };
-SECTION_FOOTER: SECTION_END {if( -1 == section_end ()) { YYABORT; } };
-
-SECTION_DATA: TYPE_LINE OPTIONS_LINE SUBSECTION_LINE OPTIONS_LINE |
- TYPE_LINE SUBSECTION_LINE OPTIONS_LINE |
- TYPE_LINE OPTIONS_LINE SUBSECTION_LINE |
- TYPE_LINE SUBSECTION_LINE |
- TYPE_LINE OPTIONS_LINE |
- OPTIONS_LINE SUBSECTION_LINE OPTIONS_LINE | /* error case */
- OPTIONS_LINE; /* error case */
-
-TYPE_LINE: TYPE WORD {if ( -1 == section_type ($2)) { YYABORT; }} | TYPE { type_error(); YYABORT; };
-
-SUBSECTION_LINE: SUBSECTION WORDS | SUBSECTION { sub_error (); YYABORT; };
-
-OPTIONS_LINE: OPTION_LINE | OPTIONS_LINE OPTION_LINE;
-
-OPTION_LINE: OPTION WORD WORD {if(-1 == section_option($2,$3)){YYABORT;} } |
- OPTION WORD { option_error (); YYABORT; } |
- OPTION { option_error (); YYABORT; };
-
-WORDS: WORD {if (-1 == section_sub ($1)) {YYABORT; } } | WORDS WORD { if (-1 == section_sub ($2)) { YYABORT; } };
-WORD: ID | STRING_TOK ;
-%%
-
-xlator_t *complete_tree = NULL;
-xlator_t *tree = NULL;
-glusterfs_ctx_t *gctx;
-
-static void
-type_error (void)
-{
- extern int yylineno;
-
- fprintf (stderr, "Volume '%s', before line %d: Please specify volume 'type'.",
- complete_tree->name, yylineno);
- gf_log ("parser", GF_LOG_ERROR,
- "Volume '%s', before line %d: Please specify volume 'type'.",
- complete_tree->name, yylineno);
- return;
-}
-
-static void
-sub_error (void)
-{
- extern int yylineno;
-
- fprintf (stderr, "Volume %s, before line %d: Please specify subvolumes for the volume. ",
- complete_tree->name, yylineno);
- gf_log ("parser", GF_LOG_ERROR,
- "Volume %s, before line %d: Please specify subvolumes for the volume. ",
- complete_tree->name, yylineno);
- return;
-}
-
-static void
-option_error (void)
-{
- extern int yylineno;
-
- fprintf (stderr, "Volume %s, before line %d: Please specify "
- "option <key> <value> \n",
- complete_tree->name, yylineno);
- gf_log ("parser", GF_LOG_ERROR,
- "Volume %s, before line %d: Please specify "
- "option <key> <value>",
- complete_tree->name, yylineno);
- return;
-}
-
-static int
-cut_tree (xlator_t *tree)
-{
- xlator_t *trav = tree, *prev = tree;
-
- if (!tree) {
- gf_log ("parser", GF_LOG_DEBUG, "Translator tree not found");
- return -1;
- }
-
- gf_log ("parser", GF_LOG_DEBUG, "Failed to build translator graph");
-
- while (prev) {
- trav = prev->next;
- dict_destroy (prev->options);
- FREE (prev->name);
- FREE (prev);
- prev = trav;
- }
-
- return 0;
-}
-
-
-static int
-new_section (char *name)
-{
- extern int yylineno;
- xlator_t *trav = complete_tree;
- xlator_t *node = (void *) calloc (1, sizeof (*node));
-
- if (!name) {
- gf_log ("parser", GF_LOG_DEBUG,
- "Invalid argument name: '%s'", name);
- return -1;
- }
-
- while (trav) {
- if (!strcmp (name, trav->name)) {
- fprintf (stderr,
- "Line %d: volume '%s' defined again\n",
- yylineno, name);
- gf_log ("parser", GF_LOG_ERROR,
- "Line %d: volume '%s' defined again",
- yylineno, name);
- return -1;
- }
- trav = trav->next;
- }
-
- node->ctx = gctx;
- node->name = name;
- node->next = complete_tree;
- if (complete_tree)
- complete_tree->prev = node;
- node->options = get_new_dict ();
- complete_tree = node;
-
- tree = node;
- gf_log ("parser", GF_LOG_TRACE, "New node for '%s'", name);
-
- return 0;
-}
-
-static int
-section_type (char *type)
-{
- extern int yylineno;
- int32_t ret = -1;
- if (!type) {
- gf_log ("parser", GF_LOG_DEBUG, "Invalid argument type");
- return -1;
- }
-
- ret = xlator_set_type (tree, type);
- if (ret) {
- fprintf (stderr, "Volume '%s', line %d: type '%s' is not "
- "valid or not found on this machine\n",
- complete_tree->name, yylineno, type);
- gf_log ("parser", GF_LOG_ERROR,
- "Volume '%s', line %d: type '%s' is not valid or "
- "not found on this machine",
- complete_tree->name, yylineno, type);
- return -1;
- }
- gf_log ("parser", GF_LOG_TRACE, "Type:%s:%s", tree->name, type);
-
- return 0;
-}
-
-
-static int
-section_option (char *key, char *value)
-{
- extern int yylineno;
-
- int ret = 0;
-
- if (!key || !value){
- fprintf (stderr, "Invalid argument\n");
- gf_log ("parser", GF_LOG_ERROR, "Invalid argument");
- return -1;
- }
-
- ret = dict_set (tree->options, key, str_to_data (value));
-
- if (ret == 1) {
- gf_log ("parser", GF_LOG_ERROR,
- "Volume '%s', line %d: duplicate entry "
- "('option %s') present",
- tree->name, yylineno, key);
- return -1;
- }
- gf_log ("parser", GF_LOG_TRACE, "Option:%s:%s:%s",
- tree->name, key, value);
-
- return 0;
-}
-
-static int
-section_sub (char *sub)
-{
- extern int yylineno;
- xlator_t *trav = complete_tree;
- xlator_list_t *xlchild, *tmp, *xlparent;
-
- if (!sub) {
- fprintf (stderr, "Invalid subvolumes argument\n");
- gf_log ("parser", GF_LOG_ERROR, "Invalid subvolumes argument");
- return -1;
- }
-
- while (trav) {
- if (!strcmp (sub, trav->name))
- break;
- trav = trav->next;
- }
- if (!trav) {
- fprintf (stderr,
- "Volume '%s', line %d: subvolume '%s' is not "
- "defined prior to usage\n",
- complete_tree->name, yylineno, sub);
- gf_log ("parser", GF_LOG_ERROR,
- "Volume '%s', line %d: subvolume '%s' is not defined "
- "prior to usage",
- complete_tree->name, yylineno, sub);
- return -1;
- }
-
- if (trav == tree) {
- fprintf (stderr, "Volume '%s', line %d: has '%s' itself as "
- "subvolume\n",
- complete_tree->name, yylineno, sub);
- gf_log ("parser", GF_LOG_ERROR,
- "Volume '%s', line %d: has '%s' itself as subvolume",
- complete_tree->name, yylineno, sub);
- return -1;
- }
-
- xlparent = (void *) calloc (1, sizeof (*xlparent));
- xlparent->xlator = tree;
-
- tmp = trav->parents;
- if (tmp == NULL) {
- trav->parents = xlparent;
- } else {
- while (tmp->next)
- tmp = tmp->next;
- tmp->next = xlparent;
- }
-
- xlchild = (void *) calloc (1, sizeof(*xlchild));
- xlchild->xlator = trav;
-
- tmp = tree->children;
- if (tmp == NULL) {
- tree->children = xlchild;
- } else {
- while (tmp->next)
- tmp = tmp->next;
- tmp->next = xlchild;
- }
-
- gf_log ("parser", GF_LOG_TRACE, "child:%s->%s", tree->name, sub);
-
- return 0;
-}
-
-static int
-section_end (void)
-{
- if (!tree->fops || !tree->mops) {
- fprintf (stderr,
- "\"type\" not specified for volume %s\n", tree->name);
- gf_log ("parser", GF_LOG_ERROR,
- "\"type\" not specified for volume %s", tree->name);
- return -1;
- }
- gf_log ("parser", GF_LOG_TRACE, "end:%s", tree->name);
-
- tree = NULL;
- return 0;
-}
-
-int
-yywrap ()
-{
- return 1;
-}
-
-int
-yyerror (const char *str)
-{
- extern char *yytext;
- extern int yylineno;
-
- if (complete_tree && complete_tree->name)
- {
- if (!strcmp (yytext, "volume"))
- {
- fprintf (stderr,
- "'end-volume' not defined for volume '%s'\n",
- complete_tree->name);
- gf_log ("parser", GF_LOG_ERROR,
- "'end-volume' not defined for volume '%s'",
- complete_tree->name);
- }
- else if (!strcmp (yytext, "type"))
- {
- fprintf (stderr, "line %d: duplicate 'type' defined "
- "for volume '%s'",
- yylineno, complete_tree->name);
- gf_log ("parser", GF_LOG_ERROR,
- "line %d: duplicate 'type' defined for "
- "volume '%s'",
- yylineno, complete_tree->name);
- }
- else if (!strcmp (yytext, "subvolumes"))
- {
- fprintf (stderr, "line %d: duplicate 'subvolumes' "
- "defined for volume '%s'",
- yylineno, complete_tree->name);
- gf_log ("parser", GF_LOG_ERROR,
- "line %d: duplicate 'subvolumes' defined for "
- "volume '%s'",
- yylineno, complete_tree->name);
- }
- else if (tree)
- {
- fprintf (stderr,
- "syntax error: line %d (volume '%s'): \"%s\""
- "\nallowed tokens are 'volume', 'type', "
- "'subvolumes', 'option', 'end-volume'",
- yylineno, complete_tree->name,
- yytext);
-
- gf_log ("parser", GF_LOG_ERROR,
- "syntax error: line %d (volume '%s'): \"%s\""
- "\nallowed tokens are 'volume', 'type', "
- "'subvolumes', 'option', 'end-volume'()",
- yylineno, complete_tree->name,
- yytext);
- }
- else
- {
- fprintf (stderr,
- "syntax error: line %d (just after volume "
- "'%s'): \"%s\"\n(%s)",
- yylineno, complete_tree->name,
- yytext,
- "allowed tokens are 'volume', 'type', "
- "'subvolumes', 'option', 'end-volume'");
- gf_log ("parser", GF_LOG_ERROR,
- "syntax error: line %d (just after volume "
- "'%s'): \"%s\"\n(%s)",
- yylineno, complete_tree->name,
- yytext,
- "allowed tokens are 'volume', 'type', "
- "'subvolumes', 'option', 'end-volume'");
- }
- }
- else
- {
- fprintf (stderr,
- "syntax error in line %d: \"%s\" \n"
- "(allowed tokens are 'volume', 'type', "
- "'subvolumes', 'option', 'end-volume')\n",
- yylineno, yytext);
- gf_log ("parser", GF_LOG_ERROR,
- "syntax error in line %d: \"%s\" \n"
- "(allowed tokens are 'volume', 'type', "
- "'subvolumes', 'option', 'end-volume')\n",
- yylineno, yytext);
- }
-
- cut_tree (tree);
- complete_tree = NULL;
- return 0;
-}
-
-static int
-execute_cmd (char *cmd, char **result, size_t size)
-{
- FILE *fpp = NULL;
- int i = 0, status = 0;
- int character = 0;
- char *buf = *result;
-
- fpp = popen (cmd, "r");
- if (!fpp) {
- gf_log ("parser", GF_LOG_ERROR, "%s: failed to popen", cmd);
- return -1;
- }
-
- while ((character = fgetc (fpp)) != EOF) {
- if (i == size) {
- size *= 2;
- buf = *result = realloc (*result, size);
- }
-
- buf[i++] = character;
- }
-
- if (i > 0) {
- i--;
- buf[i] = '\0';
- }
-
- status = pclose (fpp);
- if (status == -1 || !WIFEXITED (status) ||
- ((WEXITSTATUS (status)) != 0)) {
- i = -1;
- buf[0] = '\0';
- }
-
- return i;
-}
-
-static int
-parse_backtick (FILE *srcfp, FILE *dstfp)
-{
- int ret = 0, i = 0;
- char *cmd = NULL, *result = NULL;
- size_t cmd_buf_size = GF_CMD_BUFFER_LEN;
- char escaped = 0, in_backtick = 0;
- int line = 1, column = 0, backtick_line = 0, backtick_column = 0;
- int character = 0;
-
- fseek (srcfp, 0L, SEEK_SET);
- fseek (dstfp, 0L, SEEK_SET);
-
- cmd = CALLOC (cmd_buf_size, 1);
- if (cmd == NULL) {
- return -1;
- }
-
- result = CALLOC (cmd_buf_size * 2, 1);
- if (result == NULL) {
- return -1;
- }
-
- while ((character = fgetc (srcfp)) != EOF) {
- if ((character == '`') && !escaped) {
- if (in_backtick) {
- cmd[i] = '\0';
- result[0] = '\0';
-
- ret = execute_cmd (cmd, &result,
- 2 * cmd_buf_size);
- if (ret < 0) {
- ret = -1;
- goto out;
- }
- fwrite (result, ret, 1, dstfp);
- } else {
- i = 0;
- cmd[i] = '\0';
-
- backtick_column = column;
- backtick_line = line;
- }
-
- in_backtick = !in_backtick;
- } else {
- if (in_backtick) {
- if (i == cmd_buf_size) {
- cmd_buf_size *= 2;
- cmd = realloc (cmd, cmd_buf_size);
- if (cmd == NULL) {
- return -1;
- }
-
- result = realloc (result,
- 2 * cmd_buf_size);
- if (result == NULL) {
- return -1;
- }
- }
-
- cmd[i++] = character;
- } else {
- fputc (character, dstfp);
- }
- }
-
- if (character == '\\') {
- escaped = !escaped;
- } else {
- escaped = 0;
- }
-
- if (character == '\n') {
- line++;
- column = 0;
- } else {
- column++;
- }
- }
-
- if (in_backtick) {
- gf_log ("parser", GF_LOG_ERROR,
- "Unterminated backtick in volume specfication file at line (%d), column (%d).",
- line, column);
- ret = -1;
- }
-
-out:
- fseek (srcfp, 0L, SEEK_SET);
- fseek (dstfp, 0L, SEEK_SET);
- free (cmd);
- free (result);
-
- return ret;
-}
-
-extern FILE *yyin;
-xlator_t *
-file_to_xlator_tree (glusterfs_ctx_t *ctx,
- FILE *fp)
-{
- int32_t ret = 0;
- xlator_t *tmp_tree = NULL;
- FILE *tmp_file = NULL;
- char *buffer = NULL;
-
- tmp_file = tmpfile ();
- if (NULL == tmp_file) {
- gf_log ("parser", GF_LOG_ERROR,
- "cannot create temparory file");
- return NULL;
- }
-
- ret = parse_backtick (fp, tmp_file);
- if (ret < 0) {
- gf_log ("parser", GF_LOG_ERROR,
- "parsing of backticks failed");
- fclose (tmp_file);
- FREE (buffer);
- return NULL;
- }
-
- gctx = ctx;
- yyin = tmp_file;
- ret = yyparse ();
-
- fclose (tmp_file);
- FREE (buffer);
-
- if (1 == ret) {
- gf_log ("parser", GF_LOG_DEBUG,
- "parsing of volfile failed, please review it "
- "once more");
- tree = complete_tree = NULL;
- return NULL;
- }
-
- tmp_tree = complete_tree;
- tree = complete_tree = NULL;
-
- return tmp_tree;
-}
diff --git a/libglusterfs/src/stack.c b/libglusterfs/src/stack.c
index 453248479..37b338f51 100644
--- a/libglusterfs/src/stack.c
+++ b/libglusterfs/src/stack.c
@@ -1,27 +1,18 @@
/*
- Copyright (c) 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include "statedump.h"
#include "stack.h"
static inline
-int call_frames_count (call_frame_t *call_frame)
+int call_frames_count (call_frame_t *call_frame)
{
call_frame_t *pos;
int32_t count = 0;
@@ -35,46 +26,105 @@ int call_frames_count (call_frame_t *call_frame)
return count;
}
+call_frame_t *
+create_frame (xlator_t *xl, call_pool_t *pool)
+{
+ call_stack_t *stack = NULL;
+
+ if (!xl || !pool) {
+ return NULL;
+ }
+
+ stack = mem_get0 (pool->stack_mem_pool);
+ if (!stack)
+ return NULL;
+
+ stack->pool = pool;
+ stack->frames.root = stack;
+ stack->frames.this = xl;
+ stack->ctx = xl->ctx;
+
+ if (stack->ctx->measure_latency) {
+ if (gettimeofday (&stack->tv, NULL) == -1)
+ gf_log ("stack", GF_LOG_ERROR, "gettimeofday () failed."
+ " (%s)", strerror (errno));
+ memcpy (&stack->frames.begin, &stack->tv, sizeof (stack->tv));
+ }
+
+ LOCK (&pool->lock);
+ {
+ list_add (&stack->all_frames, &pool->all_frames);
+ pool->cnt++;
+ }
+ UNLOCK (&pool->lock);
+
+ LOCK_INIT (&stack->frames.lock);
+ LOCK_INIT (&stack->stack_lock);
+
+ return &stack->frames;
+}
+
void
gf_proc_dump_call_frame (call_frame_t *call_frame, const char *key_buf,...)
{
-
+
char prefix[GF_DUMP_MAX_BUF_LEN];
va_list ap;
- char key[GF_DUMP_MAX_BUF_LEN];
call_frame_t my_frame;
int ret = -1;
+ char timestr[256] = {0,};
if (!call_frame)
return;
- assert(key_buf);
+ GF_ASSERT (key_buf);
memset(prefix, 0, sizeof(prefix));
memset(&my_frame, 0, sizeof(my_frame));
va_start(ap, key_buf);
- vsnprintf(prefix, GF_DUMP_MAX_BUF_LEN, key_buf, ap);
+ vsnprintf(prefix, GF_DUMP_MAX_BUF_LEN, key_buf, ap);
va_end(ap);
ret = TRY_LOCK(&call_frame->lock);
- if (ret) {
- gf_log("", GF_LOG_WARNING, "Unable to dump call frame"
- " errno: %d", errno);
- return;
- }
+ if (ret)
+ goto out;
memcpy(&my_frame, call_frame, sizeof(my_frame));
UNLOCK(&call_frame->lock);
- gf_proc_dump_build_key(key, prefix,"ref_count");
- gf_proc_dump_write(key, "%d", my_frame.ref_count);
- gf_proc_dump_build_key(key, prefix,"translator");
- gf_proc_dump_write(key, "%s", my_frame.this->name);
- gf_proc_dump_build_key(key, prefix,"complete");
- gf_proc_dump_write(key, "%d", my_frame.complete);
- if (my_frame.parent) {
- gf_proc_dump_build_key(key, prefix,"parent");
- gf_proc_dump_write(key, "%s", my_frame.parent->this->name);
+ if (my_frame.this->ctx->measure_latency) {
+ gf_time_fmt (timestr, sizeof timestr, my_frame.begin.tv_sec,
+ gf_timefmt_FT);
+ snprintf (timestr + strlen (timestr),
+ sizeof timestr - strlen (timestr),
+ ".%"GF_PRI_SUSECONDS, my_frame.begin.tv_usec);
+ gf_proc_dump_write("frame-creation-time", "%s", timestr);
+ }
+
+ gf_proc_dump_write("ref_count", "%d", my_frame.ref_count);
+ gf_proc_dump_write("translator", "%s", my_frame.this->name);
+ gf_proc_dump_write("complete", "%d", my_frame.complete);
+ if (my_frame.parent)
+ gf_proc_dump_write("parent", "%s", my_frame.parent->this->name);
+
+ if (my_frame.wind_from)
+ gf_proc_dump_write("wind_from", "%s", my_frame.wind_from);
+
+ if (my_frame.wind_to)
+ gf_proc_dump_write("wind_to", "%s", my_frame.wind_to);
+
+ if (my_frame.unwind_from)
+ gf_proc_dump_write("unwind_from", "%s", my_frame.unwind_from);
+
+ if (my_frame.unwind_to)
+ gf_proc_dump_write("unwind_to", "%s", my_frame.unwind_to);
+
+ ret = 0;
+out:
+ if (ret) {
+ gf_proc_dump_write("Unable to dump the frame information",
+ "(Lock acquisition failed) %p", my_frame);
+ return;
}
}
@@ -86,45 +136,43 @@ gf_proc_dump_call_stack (call_stack_t *call_stack, const char *key_buf,...)
va_list ap;
call_frame_t *trav;
int32_t cnt, i;
- char key[GF_DUMP_MAX_BUF_LEN];
+ char timestr[256] = {0,};
if (!call_stack)
return;
- assert(key_buf);
+ GF_ASSERT (key_buf);
cnt = call_frames_count(&call_stack->frames);
memset(prefix, 0, sizeof(prefix));
va_start(ap, key_buf);
- vsnprintf(prefix, GF_DUMP_MAX_BUF_LEN, key_buf, ap);
+ vsnprintf(prefix, GF_DUMP_MAX_BUF_LEN, key_buf, ap);
va_end(ap);
- gf_proc_dump_build_key(key, prefix,"uid");
- gf_proc_dump_write(key, "%d", call_stack->uid);
- gf_proc_dump_build_key(key, prefix,"gid");
- gf_proc_dump_write(key, "%d", call_stack->gid);
- gf_proc_dump_build_key(key, prefix,"pid");
- gf_proc_dump_write(key, "%d", call_stack->pid);
- gf_proc_dump_build_key(key, prefix,"unique");
- gf_proc_dump_write(key, "%Ld", call_stack->unique);
-
- gf_proc_dump_build_key(key, prefix,"op");
- if ((call_stack->type == GF_OP_TYPE_FOP_REQUEST) ||
- (call_stack->type == GF_OP_TYPE_FOP_REPLY)) {
- gf_proc_dump_write(key, "%s", gf_fop_list[call_stack->op]);
- } else if ((call_stack->type == GF_OP_TYPE_MOP_REQUEST) ||
- (call_stack->type == GF_OP_TYPE_MOP_REPLY)) {
- gf_proc_dump_write(key, "%s", gf_mop_list[call_stack->op]);
- } else if ((call_stack->type == GF_OP_TYPE_CBK_REQUEST) ||
- (call_stack->type == GF_OP_TYPE_CBK_REPLY)) {
- gf_proc_dump_write(key, "%s", gf_cbk_list[call_stack->op]);
- }
-
- gf_proc_dump_build_key(key, prefix,"type");
- gf_proc_dump_write(key, "%d", call_stack->type);
- gf_proc_dump_build_key(key, prefix,"cnt");
- gf_proc_dump_write(key, "%d", cnt);
+ if (call_stack->ctx->measure_latency) {
+ gf_time_fmt (timestr, sizeof timestr, call_stack->tv.tv_sec,
+ gf_timefmt_FT);
+ snprintf (timestr + strlen (timestr),
+ sizeof timestr - strlen (timestr),
+ ".%"GF_PRI_SUSECONDS, call_stack->tv.tv_usec);
+ gf_proc_dump_write("callstack-creation-time", "%s", timestr);
+ }
+
+ gf_proc_dump_write("uid", "%d", call_stack->uid);
+ gf_proc_dump_write("gid", "%d", call_stack->gid);
+ gf_proc_dump_write("pid", "%d", call_stack->pid);
+ gf_proc_dump_write("unique", "%Ld", call_stack->unique);
+ gf_proc_dump_write("lk-owner", "%s", lkowner_utoa (&call_stack->lk_owner));
+
+ if (call_stack->type == GF_OP_TYPE_FOP)
+ gf_proc_dump_write("op", "%s",
+ (char *)gf_fop_list[call_stack->op]);
+ else
+ gf_proc_dump_write("op", "stack");
+
+ gf_proc_dump_write("type", "%d", call_stack->type);
+ gf_proc_dump_write("cnt", "%d", cnt);
trav = &call_stack->frames;
@@ -132,7 +180,7 @@ gf_proc_dump_call_stack (call_stack_t *call_stack, const char *key_buf,...)
if (trav) {
gf_proc_dump_add_section("%s.frame.%d", prefix, i);
gf_proc_dump_call_frame(trav, "%s.frame.%d", prefix, i);
- trav = trav->next;
+ trav = trav->next;
}
}
}
@@ -140,25 +188,24 @@ gf_proc_dump_call_stack (call_stack_t *call_stack, const char *key_buf,...)
void
gf_proc_dump_pending_frames (call_pool_t *call_pool)
{
-
+
call_stack_t *trav = NULL;
int i = 1;
int ret = -1;
+ gf_boolean_t section_added = _gf_true;
if (!call_pool)
return;
ret = TRY_LOCK (&(call_pool->lock));
- if (ret) {
- gf_log("", GF_LOG_WARNING, "Unable to dump call pool"
- " errno: %d", errno);
- return;
- }
+ if (ret)
+ goto out;
+
-
gf_proc_dump_add_section("global.callpool");
- gf_proc_dump_write("global.callpool","%p", call_pool);
- gf_proc_dump_write("global.callpool.cnt","%d", call_pool->cnt);
+ section_added = _gf_true;
+ gf_proc_dump_write("callpool_address","%p", call_pool);
+ gf_proc_dump_write("callpool.cnt","%d", call_pool->cnt);
list_for_each_entry (trav, &call_pool->all_frames, all_frames) {
@@ -166,6 +213,220 @@ gf_proc_dump_pending_frames (call_pool_t *call_pool)
gf_proc_dump_call_stack(trav, "global.callpool.stack.%d", i);
i++;
}
- UNLOCK (&(call_pool->lock));
+ UNLOCK (&(call_pool->lock));
+
+ ret = 0;
+out:
+ if (ret) {
+ if (_gf_false == section_added)
+ gf_proc_dump_add_section("global.callpool");
+ gf_proc_dump_write("Unable to dump the callpool",
+ "(Lock acquisition failed) %p",
+ call_pool);
+ }
+ return;
}
+void
+gf_proc_dump_call_frame_to_dict (call_frame_t *call_frame,
+ char *prefix, dict_t *dict)
+{
+ int ret = -1;
+ char key[GF_DUMP_MAX_BUF_LEN] = {0,};
+ call_frame_t tmp_frame = {0,};
+
+ if (!call_frame || !dict)
+ return;
+
+ ret = TRY_LOCK (&call_frame->lock);
+ if (ret)
+ return;
+ memcpy (&tmp_frame, call_frame, sizeof (tmp_frame));
+ UNLOCK (&call_frame->lock);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.refcount", prefix);
+ ret = dict_set_int32 (dict, key, tmp_frame.ref_count);
+ if (ret)
+ return;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.translator", prefix);
+ ret = dict_set_dynstr (dict, key, gf_strdup (tmp_frame.this->name));
+ if (ret)
+ return;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.complete", prefix);
+ ret = dict_set_int32 (dict, key, tmp_frame.complete);
+ if (ret)
+ return;
+
+ if (tmp_frame.parent) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.parent", prefix);
+ ret = dict_set_dynstr (dict, key,
+ gf_strdup (tmp_frame.parent->this->name));
+ if (ret)
+ return;
+ }
+
+ if (tmp_frame.wind_from) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.windfrom", prefix);
+ ret = dict_set_dynstr (dict, key,
+ gf_strdup (tmp_frame.wind_from));
+ if (ret)
+ return;
+ }
+
+ if (tmp_frame.wind_to) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.windto", prefix);
+ ret = dict_set_dynstr (dict, key,
+ gf_strdup (tmp_frame.wind_to));
+ if (ret)
+ return;
+ }
+
+ if (tmp_frame.unwind_from) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.unwindfrom", prefix);
+ ret = dict_set_dynstr (dict, key,
+ gf_strdup (tmp_frame.unwind_from));
+ if (ret)
+ return;
+ }
+
+ if (tmp_frame.unwind_to) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.unwind_to", prefix);
+ ret = dict_set_dynstr (dict, key,
+ gf_strdup (tmp_frame.unwind_to));
+ }
+
+ return;
+}
+
+void
+gf_proc_dump_call_stack_to_dict (call_stack_t *call_stack,
+ char *prefix, dict_t *dict)
+{
+ int ret = -1;
+ char key[GF_DUMP_MAX_BUF_LEN] = {0,};
+ call_frame_t *trav = NULL;
+ int count = 0;
+ int i = 0;
+
+ if (!call_stack || !dict)
+ return;
+
+ count = call_frames_count (&call_stack->frames);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.uid", prefix);
+ ret = dict_set_int32 (dict, key, call_stack->uid);
+ if (ret)
+ return;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.gid", prefix);
+ ret = dict_set_int32 (dict, key, call_stack->gid);
+ if (ret)
+ return;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.pid", prefix);
+ ret = dict_set_int32 (dict, key, call_stack->pid);
+ if (ret)
+ return;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.unique", prefix);
+ ret = dict_set_uint64 (dict, key, call_stack->unique);
+ if (ret)
+ return;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.op", prefix);
+ if (call_stack->type == GF_OP_TYPE_FOP)
+ ret = dict_set_str (dict, key,
+ (char *)gf_fop_list[call_stack->op]);
+ else
+ ret = dict_set_str (dict, key, "other");
+
+ if (ret)
+ return;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.type", prefix);
+ ret = dict_set_int32 (dict, key, call_stack->type);
+ if (ret)
+ return;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.count", prefix);
+ ret = dict_set_int32 (dict, key, count);
+ if (ret)
+ return;
+
+ trav = &call_stack->frames;
+ for (i = 0; i < count; i++) {
+ if (trav) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.frame%d",
+ prefix, i);
+ gf_proc_dump_call_frame_to_dict (trav, key, dict);
+ trav = trav->next;
+ }
+ }
+
+ return;
+}
+
+void
+gf_proc_dump_pending_frames_to_dict (call_pool_t *call_pool, dict_t *dict)
+{
+ int ret = -1;
+ call_stack_t *trav = NULL;
+ char key[GF_DUMP_MAX_BUF_LEN] = {0,};
+ int i = 0;
+
+ if (!call_pool || !dict)
+ return;
+
+ ret = TRY_LOCK (&call_pool->lock);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_WARNING, "Unable to dump call pool"
+ " to dict. errno: %d", errno);
+ return;
+ }
+
+ ret = dict_set_int32 (dict, "callpool.count", call_pool->cnt);
+ if (ret)
+ goto out;
+
+ list_for_each_entry (trav, &call_pool->all_frames, all_frames) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "callpool.stack%d", i);
+ gf_proc_dump_call_stack_to_dict (trav, key, dict);
+ i++;
+ }
+
+out:
+ UNLOCK (&call_pool->lock);
+
+ return;
+}
+
+gf_boolean_t
+__is_fuse_call (call_frame_t *frame)
+{
+ gf_boolean_t is_fuse_call = _gf_false;
+ GF_ASSERT (frame);
+ GF_ASSERT (frame->root);
+
+ if (NFS_PID != frame->root->pid)
+ is_fuse_call = _gf_true;
+ return is_fuse_call;
+}
diff --git a/libglusterfs/src/stack.h b/libglusterfs/src/stack.h
index 942972556..f2d2ef950 100644
--- a/libglusterfs/src/stack.h
+++ b/libglusterfs/src/stack.h
@@ -1,20 +1,11 @@
-/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
/*
@@ -34,256 +25,438 @@ struct _call_stack_t;
typedef struct _call_stack_t call_stack_t;
struct _call_frame_t;
typedef struct _call_frame_t call_frame_t;
-struct _call_pool_t;
-typedef struct _call_pool_t call_pool_t;
+struct call_pool;
+typedef struct call_pool call_pool_t;
+
+#include <sys/time.h>
#include "xlator.h"
#include "dict.h"
#include "list.h"
#include "common-utils.h"
#include "globals.h"
+#include "lkowner.h"
+#include "client_t.h"
-
+#define NFS_PID 1
+#define LOW_PRIO_PROC_PID -1
typedef int32_t (*ret_fn_t) (call_frame_t *frame,
- call_frame_t *prev_frame,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- ...);
-
-struct _call_pool_t {
- union {
- struct list_head all_frames;
- struct {
- call_stack_t *next_call;
- call_stack_t *prev_call;
- } all_stacks;
- };
- int64_t cnt;
- gf_lock_t lock;
+ call_frame_t *prev_frame,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ ...);
+
+struct call_pool {
+ union {
+ struct list_head all_frames;
+ struct {
+ call_stack_t *next_call;
+ call_stack_t *prev_call;
+ } all_stacks;
+ };
+ int64_t cnt;
+ gf_lock_t lock;
+ struct mem_pool *frame_mem_pool;
+ struct mem_pool *stack_mem_pool;
};
struct _call_frame_t {
- call_stack_t *root; /* stack root */
- call_frame_t *parent; /* previous BP */
- call_frame_t *next;
- call_frame_t *prev; /* maintainence list */
- void *local; /* local variables */
- xlator_t *this; /* implicit object */
- ret_fn_t ret; /* op_return address */
- int32_t ref_count;
- gf_lock_t lock;
- void *cookie; /* unique cookie */
- gf_boolean_t complete;
+ call_stack_t *root; /* stack root */
+ call_frame_t *parent; /* previous BP */
+ call_frame_t *next;
+ call_frame_t *prev; /* maintenance list */
+ void *local; /* local variables */
+ xlator_t *this; /* implicit object */
+ ret_fn_t ret; /* op_return address */
+ int32_t ref_count;
+ gf_lock_t lock;
+ void *cookie; /* unique cookie */
+ gf_boolean_t complete;
+
+ glusterfs_fop_t op;
+ struct timeval begin; /* when this frame was created */
+ struct timeval end; /* when this frame completed */
+ const char *wind_from;
+ const char *wind_to;
+ const char *unwind_from;
+ const char *unwind_to;
};
+#define SMALL_GROUP_COUNT 128
+
struct _call_stack_t {
- union {
- struct list_head all_frames;
- struct {
- call_stack_t *next_call;
- call_stack_t *prev_call;
- };
- };
- call_pool_t *pool;
- void *trans;
- uint64_t unique;
- void *state; /* pointer to request state */
- uid_t uid;
- gid_t gid;
- pid_t pid;
- call_frame_t frames;
-
- int32_t op;
- int8_t type;
+ union {
+ struct list_head all_frames;
+ struct {
+ call_stack_t *next_call;
+ call_stack_t *prev_call;
+ };
+ };
+ call_pool_t *pool;
+ gf_lock_t stack_lock;
+ client_t *client;
+ uint64_t unique;
+ void *state; /* pointer to request state */
+ uid_t uid;
+ gid_t gid;
+ pid_t pid;
+ uint16_t ngrps;
+ uint32_t groups_small[SMALL_GROUP_COUNT];
+ uint32_t *groups_large;
+ uint32_t *groups;
+ gf_lkowner_t lk_owner;
+ glusterfs_ctx_t *ctx;
+
+ call_frame_t frames;
+
+ int32_t op;
+ int8_t type;
+ struct timeval tv;
};
+#define frame_set_uid_gid(frm, u, g) \
+ do { \
+ if (frm) { \
+ (frm)->root->uid = u; \
+ (frm)->root->gid = g; \
+ (frm)->root->ngrps = 0; \
+ } \
+ } while (0); \
+
+
+struct xlator_fops;
+
+void
+gf_latency_begin (call_frame_t *frame, void *fn);
+
+void
+gf_latency_end (call_frame_t *frame);
+
static inline void
FRAME_DESTROY (call_frame_t *frame)
{
- if (frame->next)
- frame->next->prev = frame->prev;
- if (frame->prev)
- frame->prev->next = frame->next;
- if (frame->local)
- FREE (frame->local);
- LOCK_DESTROY (&frame->lock);
- FREE (frame);
+ void *local = NULL;
+ if (frame->next)
+ frame->next->prev = frame->prev;
+ if (frame->prev)
+ frame->prev->next = frame->next;
+ if (frame->local) {
+ local = frame->local;
+ frame->local = NULL;
+
+ }
+
+ LOCK_DESTROY (&frame->lock);
+ mem_put (frame);
+
+ if (local)
+ mem_put (local);
}
static inline void
STACK_DESTROY (call_stack_t *stack)
{
- LOCK (&stack->pool->lock);
- {
- list_del_init (&stack->all_frames);
- stack->pool->cnt--;
- }
- UNLOCK (&stack->pool->lock);
+ void *local = NULL;
- if (stack->frames.local)
- FREE (stack->frames.local);
+ LOCK (&stack->pool->lock);
+ {
+ list_del_init (&stack->all_frames);
+ stack->pool->cnt--;
+ }
+ UNLOCK (&stack->pool->lock);
- LOCK_DESTROY (&stack->frames.lock);
+ if (stack->frames.local) {
+ local = stack->frames.local;
+ stack->frames.local = NULL;
+ }
- while (stack->frames.next) {
- FRAME_DESTROY (stack->frames.next);
- }
- FREE (stack);
+ LOCK_DESTROY (&stack->frames.lock);
+ LOCK_DESTROY (&stack->stack_lock);
+
+ while (stack->frames.next) {
+ FRAME_DESTROY (stack->frames.next);
+ }
+
+ GF_FREE (stack->groups_large);
+
+ mem_put (stack);
+
+ if (local)
+ mem_put (local);
}
+static inline void
+STACK_RESET (call_stack_t *stack)
+{
+ void *local = NULL;
+
+ if (stack->frames.local) {
+ local = stack->frames.local;
+ stack->frames.local = NULL;
+ }
+
+ while (stack->frames.next) {
+ FRAME_DESTROY (stack->frames.next);
+ }
+
+ if (local)
+ mem_put (local);
+}
#define cbk(x) cbk_##x
+#define FRAME_SU_DO(frm, local_type) \
+ do { \
+ local_type *__local = (frm)->local; \
+ __local->uid = frm->root->uid; \
+ __local->gid = frm->root->gid; \
+ frm->root->uid = 0; \
+ frm->root->gid = 0; \
+ } while (0); \
+
+#define FRAME_SU_UNDO(frm, local_type) \
+ do { \
+ local_type *__local = (frm)->local; \
+ frm->root->uid = __local->uid; \
+ frm->root->gid = __local->gid; \
+ } while (0); \
+
/* make a call */
-#define STACK_WIND(frame, rfn, obj, fn, params ...) \
- do { \
- call_frame_t *_new = NULL; \
+#define STACK_WIND(frame, rfn, obj, fn, params ...) \
+ do { \
+ call_frame_t *_new = NULL; \
+ xlator_t *old_THIS = NULL; \
+ \
+ _new = mem_get0 (frame->root->pool->frame_mem_pool); \
+ if (!_new) { \
+ gf_log ("stack", GF_LOG_ERROR, "alloc failed"); \
+ break; \
+ } \
+ typeof(fn##_cbk) tmp_cbk = rfn; \
+ _new->root = frame->root; \
+ _new->this = obj; \
+ _new->ret = (ret_fn_t) tmp_cbk; \
+ _new->parent = frame; \
+ _new->cookie = _new; \
+ _new->wind_from = __FUNCTION__; \
+ _new->wind_to = #fn; \
+ _new->unwind_to = #rfn; \
+ \
+ LOCK_INIT (&_new->lock); \
+ LOCK(&frame->root->stack_lock); \
+ { \
+ _new->next = frame->root->frames.next; \
+ _new->prev = &frame->root->frames; \
+ if (frame->root->frames.next) \
+ frame->root->frames.next->prev = _new; \
+ frame->root->frames.next = _new; \
+ frame->ref_count++; \
+ } \
+ UNLOCK(&frame->root->stack_lock); \
+ old_THIS = THIS; \
+ THIS = obj; \
+ if (frame->this->ctx->measure_latency) \
+ gf_latency_begin (_new, fn); \
+ fn (_new, obj, params); \
+ THIS = old_THIS; \
+ } while (0)
+
+
+/* make a call without switching frames */
+#define STACK_WIND_TAIL(frame, obj, fn, params ...) \
+ do { \
xlator_t *old_THIS = NULL; \
- \
- _new = CALLOC (1, sizeof (call_frame_t)); \
- ERR_ABORT (_new); \
- typeof(fn##_cbk) tmp_cbk = rfn; \
- _new->root = frame->root; \
- _new->next = frame->root->frames.next; \
- _new->prev = &frame->root->frames; \
- if (frame->root->frames.next) \
- frame->root->frames.next->prev = _new; \
- frame->root->frames.next = _new; \
- _new->this = obj; \
- _new->ret = (ret_fn_t) tmp_cbk; \
- _new->parent = frame; \
- _new->cookie = _new; \
- LOCK_INIT (&_new->lock); \
- frame->ref_count++; \
- \
+ \
+ frame->this = obj; \
+ frame->wind_to = #fn; \
old_THIS = THIS; \
THIS = obj; \
- fn (_new, obj, params); \
+ fn (frame, obj, params); \
THIS = old_THIS; \
- } while (0)
+ } while (0)
/* make a call with a cookie */
-#define STACK_WIND_COOKIE(frame, rfn, cky, obj, fn, params ...) \
- do { \
+#define STACK_WIND_COOKIE(frame, rfn, cky, obj, fn, params ...) \
+ do { \
call_frame_t *_new = NULL; \
xlator_t *old_THIS = NULL; \
\
- _new = CALLOC (1, sizeof (call_frame_t)); \
- ERR_ABORT (_new); \
- typeof(fn##_cbk) tmp_cbk = rfn; \
- _new->root = frame->root; \
- _new->next = frame->root->frames.next; \
- _new->prev = &frame->root->frames; \
- if (frame->root->frames.next) \
- frame->root->frames.next->prev = _new; \
- frame->root->frames.next = _new; \
- _new->this = obj; \
- _new->ret = (ret_fn_t) tmp_cbk; \
- _new->parent = frame; \
- _new->cookie = cky; \
- LOCK_INIT (&_new->lock); \
- frame->ref_count++; \
- fn##_cbk = rfn; \
- \
+ _new = mem_get0 (frame->root->pool->frame_mem_pool); \
+ if (!_new) { \
+ gf_log ("stack", GF_LOG_ERROR, "alloc failed"); \
+ break; \
+ } \
+ typeof(fn##_cbk) tmp_cbk = rfn; \
+ _new->root = frame->root; \
+ _new->this = obj; \
+ _new->ret = (ret_fn_t) tmp_cbk; \
+ _new->parent = frame; \
+ _new->cookie = cky; \
+ _new->wind_from = __FUNCTION__; \
+ _new->wind_to = #fn; \
+ _new->unwind_to = #rfn; \
+ LOCK_INIT (&_new->lock); \
+ LOCK(&frame->root->stack_lock); \
+ { \
+ frame->ref_count++; \
+ _new->next = frame->root->frames.next; \
+ _new->prev = &frame->root->frames; \
+ if (frame->root->frames.next) \
+ frame->root->frames.next->prev = _new; \
+ frame->root->frames.next = _new; \
+ } \
+ UNLOCK(&frame->root->stack_lock); \
+ fn##_cbk = rfn; \
old_THIS = THIS; \
THIS = obj; \
- fn (_new, obj, params); \
+ if (obj->ctx->measure_latency) \
+ gf_latency_begin (_new, fn); \
+ fn (_new, obj, params); \
THIS = old_THIS; \
- } while (0)
+ } while (0)
/* return from function */
-#define STACK_UNWIND(frame, params ...) \
- do { \
- ret_fn_t fn = NULL; \
- call_frame_t *_parent = NULL; \
+#define STACK_UNWIND(frame, params ...) \
+ do { \
+ ret_fn_t fn = NULL; \
+ call_frame_t *_parent = NULL; \
xlator_t *old_THIS = NULL; \
- \
+ if (!frame) { \
+ gf_log ("stack", GF_LOG_CRITICAL, "!frame"); \
+ break; \
+ } \
fn = frame->ret; \
_parent = frame->parent; \
- _parent->ref_count--; \
+ LOCK(&frame->root->stack_lock); \
+ { \
+ _parent->ref_count--; \
+ } \
+ UNLOCK(&frame->root->stack_lock); \
old_THIS = THIS; \
THIS = _parent->this; \
frame->complete = _gf_true; \
- fn (_parent, frame->cookie, _parent->this, params); \
+ frame->unwind_from = __FUNCTION__; \
+ if (frame->this->ctx->measure_latency) \
+ gf_latency_end (frame); \
+ fn (_parent, frame->cookie, _parent->this, params); \
THIS = old_THIS; \
- } while (0)
+ } while (0)
-static inline call_frame_t *
-copy_frame (call_frame_t *frame)
-{
- call_stack_t *newstack = NULL;
- call_stack_t *oldstack = NULL;
-
- if (!frame) {
- return NULL;
- }
-
- newstack = (void *) CALLOC (1, sizeof (*newstack));
- if (newstack == NULL) {
- return NULL;
- }
-
- oldstack = frame->root;
-
- newstack->uid = oldstack->uid;
- newstack->gid = oldstack->gid;
- newstack->pid = oldstack->pid;
- newstack->unique = oldstack->unique;
-
- newstack->frames.this = frame->this;
- newstack->frames.root = newstack;
- newstack->pool = oldstack->pool;
+/* return from function in type-safe way */
+#define STACK_UNWIND_STRICT(op, frame, params ...) \
+ do { \
+ fop_##op##_cbk_t fn = NULL; \
+ call_frame_t *_parent = NULL; \
+ xlator_t *old_THIS = NULL; \
+ \
+ if (!frame) { \
+ gf_log ("stack", GF_LOG_CRITICAL, "!frame"); \
+ break; \
+ } \
+ fn = (fop_##op##_cbk_t )frame->ret; \
+ _parent = frame->parent; \
+ LOCK(&frame->root->stack_lock); \
+ { \
+ _parent->ref_count--; \
+ } \
+ UNLOCK(&frame->root->stack_lock); \
+ old_THIS = THIS; \
+ THIS = _parent->this; \
+ frame->complete = _gf_true; \
+ frame->unwind_from = __FUNCTION__; \
+ if (frame->this->ctx->measure_latency) \
+ gf_latency_end (frame); \
+ fn (_parent, frame->cookie, _parent->this, params); \
+ THIS = old_THIS; \
+ } while (0)
- LOCK_INIT (&newstack->frames.lock);
- LOCK (&oldstack->pool->lock);
- {
- list_add (&newstack->all_frames, &oldstack->all_frames);
- newstack->pool->cnt++;
-
+static inline int
+call_stack_alloc_groups (call_stack_t *stack, int ngrps)
+{
+ if (ngrps <= SMALL_GROUP_COUNT) {
+ stack->groups = stack->groups_small;
+ } else {
+ stack->groups_large = GF_CALLOC (sizeof (gid_t), ngrps,
+ gf_common_mt_groups_t);
+ if (!stack->groups_large)
+ return -1;
+ stack->groups = stack->groups_large;
}
- UNLOCK (&oldstack->pool->lock);
- return &newstack->frames;
+ stack->ngrps = ngrps;
+
+ return 0;
}
static inline call_frame_t *
-create_frame (xlator_t *xl, call_pool_t *pool)
+copy_frame (call_frame_t *frame)
{
- call_stack_t *stack = NULL;
+ call_stack_t *newstack = NULL;
+ call_stack_t *oldstack = NULL;
- if (!xl || !pool) {
- return NULL;
- }
+ if (!frame) {
+ return NULL;
+ }
- stack = CALLOC (1, sizeof (*stack));
- if (!stack)
- return NULL;
+ newstack = mem_get0 (frame->root->pool->stack_mem_pool);
+ if (newstack == NULL) {
+ return NULL;
+ }
- stack->pool = pool;
- stack->frames.root = stack;
- stack->frames.this = xl;
+ oldstack = frame->root;
- LOCK (&pool->lock);
- {
- list_add (&stack->all_frames, &pool->all_frames);
- pool->cnt++;
+ newstack->uid = oldstack->uid;
+ newstack->gid = oldstack->gid;
+ newstack->pid = oldstack->pid;
+ newstack->ngrps = oldstack->ngrps;
+ newstack->op = oldstack->op;
+ newstack->type = oldstack->type;
+ if (call_stack_alloc_groups (newstack, oldstack->ngrps) != 0) {
+ mem_put (newstack);
+ return NULL;
}
- UNLOCK (&pool->lock);
+ memcpy (newstack->groups, oldstack->groups,
+ sizeof (gid_t) * oldstack->ngrps);
+ newstack->unique = oldstack->unique;
+
+ newstack->frames.this = frame->this;
+ newstack->frames.root = newstack;
+ newstack->pool = oldstack->pool;
+ newstack->lk_owner = oldstack->lk_owner;
+ newstack->ctx = oldstack->ctx;
+
+ if (newstack->ctx->measure_latency) {
+ if (gettimeofday (&newstack->tv, NULL) == -1)
+ gf_log ("stack", GF_LOG_ERROR, "gettimeofday () failed."
+ " (%s)", strerror (errno));
+ memcpy (&newstack->frames.begin, &newstack->tv,
+ sizeof (newstack->tv));
+ }
- LOCK_INIT (&stack->frames.lock);
+ LOCK_INIT (&newstack->frames.lock);
+ LOCK_INIT (&newstack->stack_lock);
- return &stack->frames;
-}
+ LOCK (&oldstack->pool->lock);
+ {
+ list_add (&newstack->all_frames, &oldstack->all_frames);
+ newstack->pool->cnt++;
+ }
+ UNLOCK (&oldstack->pool->lock);
-void
-gf_proc_dump_pending_frames(call_pool_t *call_pool);
+ return &newstack->frames;
+}
+void gf_proc_dump_pending_frames(call_pool_t *call_pool);
+void gf_proc_dump_pending_frames_to_dict (call_pool_t *call_pool,
+ dict_t *dict);
+call_frame_t *create_frame (xlator_t *xl, call_pool_t *pool);
+gf_boolean_t __is_fuse_call (call_frame_t *frame);
#endif /* _STACK_H */
diff --git a/libglusterfs/src/statedump.c b/libglusterfs/src/statedump.c
index 6839c28f6..8175faba4 100644
--- a/libglusterfs/src/statedump.c
+++ b/libglusterfs/src/statedump.c
@@ -1,118 +1,237 @@
/*
- Copyright (c) 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include <stdarg.h>
-#include <malloc.h>
#include "glusterfs.h"
#include "logging.h"
#include "iobuf.h"
#include "statedump.h"
#include "stack.h"
+#include "common-utils.h"
+
+#ifdef HAVE_MALLOC_H
+#include <malloc.h>
+#endif /* MALLOC_H */
+
+/* We don't want gf_log in this function because it may cause
+ 'deadlock' with statedump. This is because statedump happens
+ inside a signal handler and cannot afford to block on a lock.*/
+#ifdef gf_log
+# undef gf_log
+#endif
+
+#define GF_PROC_DUMP_IS_OPTION_ENABLED(opt) \
+ (dump_options.dump_##opt == _gf_true)
+#define GF_PROC_DUMP_IS_XL_OPTION_ENABLED(opt) \
+ (dump_options.xl_options.dump_##opt == _gf_true)
+
+extern xlator_t global_xlator;
static pthread_mutex_t gf_proc_dump_mutex;
static int gf_dump_fd = -1;
+gf_dump_options_t dump_options;
+
-static void
+static void
gf_proc_dump_lock (void)
{
- pthread_mutex_lock(&gf_proc_dump_mutex);
+ pthread_mutex_lock (&gf_proc_dump_mutex);
}
-static void
+
+static void
gf_proc_dump_unlock (void)
{
- pthread_mutex_unlock(&gf_proc_dump_mutex);
-}
+ pthread_mutex_unlock (&gf_proc_dump_mutex);
+}
static int
-gf_proc_dump_open (void)
+gf_proc_dump_open (char *tmpname)
{
- char path[256];
int dump_fd = -1;
- memset(path, 0, sizeof(path));
- snprintf(path, sizeof(path), "%s.%d",GF_DUMP_LOGFILE_ROOT, getpid());
-
- dump_fd = open(path, O_CREAT|O_RDWR|O_TRUNC|O_APPEND, 0600);
- if (dump_fd < 0) {
- gf_log("", GF_LOG_ERROR, "Unable to open file: %s"
- " errno: %d", path, errno);
+ dump_fd = mkstemp (tmpname);
+ if (dump_fd < 0)
return -1;
- }
- gf_dump_fd = dump_fd;
+ gf_dump_fd = dump_fd;
return 0;
}
static void
gf_proc_dump_close (void)
{
- close(gf_dump_fd);
+ close (gf_dump_fd);
gf_dump_fd = -1;
}
-void
-gf_proc_dump_add_section (char *key, ...)
+static int
+gf_proc_dump_set_path (char *dump_options_file)
{
-
+ int ret = -1;
+ FILE *fp = NULL;
+ char buf[256];
+ char *key = NULL, *value = NULL;
+ char *saveptr = NULL;
+
+ fp = fopen (dump_options_file, "r");
+ if (!fp)
+ goto out;
+
+ ret = fscanf (fp, "%s", buf);
+
+ while (ret != EOF) {
+ key = strtok_r (buf, "=", &saveptr);
+ if (!key) {
+ ret = fscanf (fp, "%s", buf);
+ continue;
+ }
+
+ value = strtok_r (NULL, "=", &saveptr);
+
+ if (!value) {
+ ret = fscanf (fp, "%s", buf);
+ continue;
+ }
+ if (!strcmp (key, "path")) {
+ dump_options.dump_path = gf_strdup (value);
+ break;
+ }
+ }
+
+out:
+ if (fp)
+ fclose (fp);
+ return ret;
+}
+
+int
+gf_proc_dump_add_section (char *key, ...)
+{
+
char buf[GF_DUMP_MAX_BUF_LEN];
va_list ap;
- int ret;
- assert(key);
-
- memset(buf, 0, sizeof(buf));
- snprintf(buf, GF_DUMP_MAX_BUF_LEN, "\n[");
- va_start(ap, key);
- vsnprintf(buf + strlen(buf),
- GF_DUMP_MAX_BUF_LEN - strlen(buf), key, ap);
- va_end(ap);
- snprintf(buf + strlen(buf),
- GF_DUMP_MAX_BUF_LEN - strlen(buf), "]\n");
- ret = write(gf_dump_fd, buf, strlen(buf));
+ GF_ASSERT(key);
+
+ memset (buf, 0, sizeof(buf));
+ snprintf (buf, GF_DUMP_MAX_BUF_LEN, "\n[");
+ va_start (ap, key);
+ vsnprintf (buf + strlen(buf),
+ GF_DUMP_MAX_BUF_LEN - strlen (buf), key, ap);
+ va_end (ap);
+ snprintf (buf + strlen(buf),
+ GF_DUMP_MAX_BUF_LEN - strlen (buf), "]\n");
+ return write (gf_dump_fd, buf, strlen (buf));
}
-void
-gf_proc_dump_write (char *key, char *value,...)
-{
-
- char buf[GF_DUMP_MAX_BUF_LEN];
- int offset = 0;
- va_list ap;
- int ret;
-
- offset = strlen(key);
-
- memset(buf, 0, GF_DUMP_MAX_BUF_LEN);
- snprintf(buf, GF_DUMP_MAX_BUF_LEN, "%s",key);
- snprintf(buf + offset, GF_DUMP_MAX_BUF_LEN - offset, "=");
+
+int
+gf_proc_dump_write (char *key, char *value,...)
+{
+
+ char buf[GF_DUMP_MAX_BUF_LEN];
+ int offset = 0;
+ va_list ap;
+
+ GF_ASSERT (key);
+
+ offset = strlen (key);
+
+ memset (buf, 0, GF_DUMP_MAX_BUF_LEN);
+ snprintf (buf, GF_DUMP_MAX_BUF_LEN, "%s", key);
+ snprintf (buf + offset, GF_DUMP_MAX_BUF_LEN - offset, "=");
offset += 1;
- va_start(ap, value);
- vsnprintf(buf + offset, GF_DUMP_MAX_BUF_LEN - offset, value, ap);
- va_end(ap);
+ va_start (ap, value);
+ vsnprintf (buf + offset, GF_DUMP_MAX_BUF_LEN - offset, value, ap);
+ va_end (ap);
+
+ offset = strlen (buf);
+ snprintf (buf + offset, GF_DUMP_MAX_BUF_LEN - offset, "\n");
+ return write (gf_dump_fd, buf, strlen (buf));
+}
+
+static void
+gf_proc_dump_xlator_mem_info (xlator_t *xl)
+{
+ int i = 0;
+ struct mem_acct rec = {0,};
+
+ if (!xl)
+ return;
- offset = strlen(buf);
- snprintf(buf + offset, GF_DUMP_MAX_BUF_LEN - offset, "\n");
- ret = write(gf_dump_fd, buf, strlen(buf));
+ if (!xl->mem_acct.rec)
+ return;
+
+ gf_proc_dump_add_section ("%s.%s - Memory usage", xl->type, xl->name);
+ gf_proc_dump_write ("num_types", "%d", xl->mem_acct.num_types);
+
+ for (i = 0; i < xl->mem_acct.num_types; i++) {
+ if (!(memcmp (&xl->mem_acct.rec[i], &rec,
+ sizeof (struct mem_acct))))
+ continue;
+
+ gf_proc_dump_add_section ("%s.%s - usage-type %d memusage",
+ xl->type, xl->name, i);
+ gf_proc_dump_write ("size", "%u", xl->mem_acct.rec[i].size);
+ gf_proc_dump_write ("num_allocs", "%u",
+ xl->mem_acct.rec[i].num_allocs);
+ gf_proc_dump_write ("max_size", "%u",
+ xl->mem_acct.rec[i].max_size);
+ gf_proc_dump_write ("max_num_allocs", "%u",
+ xl->mem_acct.rec[i].max_num_allocs);
+ gf_proc_dump_write ("total_allocs", "%u",
+ xl->mem_acct.rec[i].total_allocs);
+ }
+
+ return;
}
+static void
+gf_proc_dump_xlator_mem_info_only_in_use (xlator_t *xl)
+{
+ int i = 0;
+
+ if (!xl)
+ return;
+
+ if (!xl->mem_acct.rec)
+ return;
+
+ gf_proc_dump_add_section ("%s.%s - Memory usage", xl->type, xl->name);
+ gf_proc_dump_write ("num_types", "%d", xl->mem_acct.num_types);
+
+ for (i = 0; i < xl->mem_acct.num_types; i++) {
+ if (!xl->mem_acct.rec[i].size)
+ continue;
+
+ gf_proc_dump_add_section ("%s.%s - usage-type %d", xl->type,
+ xl->name,i);
+
+ gf_proc_dump_write ("size", "%u",
+ xl->mem_acct.rec[i].size);
+ gf_proc_dump_write ("max_size", "%u",
+ xl->mem_acct.rec[i].max_size);
+ gf_proc_dump_write ("num_allocs", "%u",
+ xl->mem_acct.rec[i].num_allocs);
+ gf_proc_dump_write ("max_num_allocs", "%u",
+ xl->mem_acct.rec[i].max_num_allocs);
+ gf_proc_dump_write ("total_allocs", "%u",
+ xl->mem_acct.rec[i].total_allocs);
+ }
+
+ return;
+}
+
+
/* Currently this dumps only mallinfo. More can be built on here */
void
@@ -121,93 +240,594 @@ gf_proc_dump_mem_info ()
#ifdef HAVE_MALLOC_STATS
struct mallinfo info;
- memset(&info, 0, sizeof(struct mallinfo));
- info = mallinfo();
-
- gf_proc_dump_add_section("mallinfo");
- gf_proc_dump_write("mallinfo_arena", "%d", info.arena);
- gf_proc_dump_write("mallinfo_ordblks", "%d", info.ordblks);
- gf_proc_dump_write("mallinfo_smblks","%d", info.smblks);
- gf_proc_dump_write("mallinfo_hblks","%d", info.hblks);
- gf_proc_dump_write("mallinfo_hblkhd", "%d", info.hblkhd);
- gf_proc_dump_write("mallinfo_usmblks","%d", info.usmblks);
- gf_proc_dump_write("mallinfo_fsmblks","%d", info.fsmblks);
- gf_proc_dump_write("mallinfo_uordblks","%d", info.uordblks);
- gf_proc_dump_write("mallinfo_fordblks", "%d", info.fordblks);
- gf_proc_dump_write("mallinfo_keepcost", "%d", info.keepcost);
+ memset (&info, 0, sizeof (struct mallinfo));
+ info = mallinfo ();
+
+ gf_proc_dump_add_section ("mallinfo");
+ gf_proc_dump_write ("mallinfo_arena", "%d", info.arena);
+ gf_proc_dump_write ("mallinfo_ordblks", "%d", info.ordblks);
+ gf_proc_dump_write ("mallinfo_smblks", "%d", info.smblks);
+ gf_proc_dump_write ("mallinfo_hblks", "%d", info.hblks);
+ gf_proc_dump_write ("mallinfo_hblkhd", "%d", info.hblkhd);
+ gf_proc_dump_write ("mallinfo_usmblks", "%d", info.usmblks);
+ gf_proc_dump_write ("mallinfo_fsmblks", "%d", info.fsmblks);
+ gf_proc_dump_write ("mallinfo_uordblks", "%d", info.uordblks);
+ gf_proc_dump_write ("mallinfo_fordblks", "%d", info.fordblks);
+ gf_proc_dump_write ("mallinfo_keepcost", "%d", info.keepcost);
+#endif
+ gf_proc_dump_xlator_mem_info(&global_xlator);
+
+}
+
+void
+gf_proc_dump_mem_info_to_dict (dict_t *dict)
+{
+ if (!dict)
+ return;
+#ifdef HAVE_MALLOC_STATS
+ struct mallinfo info;
+ int ret = -1;
+
+ memset (&info, 0, sizeof(struct mallinfo));
+ info = mallinfo ();
+
+ ret = dict_set_int32 (dict, "mallinfo.arena", info.arena);
+ if (ret)
+ return;
+
+ ret = dict_set_int32 (dict, "mallinfo.ordblks", info.ordblks);
+ if (ret)
+ return;
+
+ ret = dict_set_int32 (dict, "mallinfo.smblks", info.smblks);
+ if (ret)
+ return;
+
+ ret = dict_set_int32 (dict, "mallinfo.hblks", info.hblks);
+ if (ret)
+ return;
+
+ ret = dict_set_int32 (dict, "mallinfo.hblkhd", info.hblkhd);
+ if (ret)
+ return;
+
+ ret = dict_set_int32 (dict, "mallinfo.usmblks", info.usmblks);
+ if (ret)
+ return;
+
+ ret = dict_set_int32 (dict, "mallinfo.fsmblks", info.fsmblks);
+ if (ret)
+ return;
+
+ ret = dict_set_int32 (dict, "mallinfo.uordblks", info.uordblks);
+ if (ret)
+ return;
+
+ ret = dict_set_int32 (dict, "mallinfo.fordblks", info.fordblks);
+ if (ret)
+ return;
+
+ ret = dict_set_int32 (dict, "mallinfo.keepcost", info.keepcost);
+ if (ret)
+ return;
#endif
+ return;
+}
+
+void
+gf_proc_dump_mempool_info (glusterfs_ctx_t *ctx)
+{
+ struct mem_pool *pool = NULL;
+
+ gf_proc_dump_add_section ("mempool");
+
+ list_for_each_entry (pool, &ctx->mempool_list, global_list) {
+ gf_proc_dump_write ("-----", "-----");
+ gf_proc_dump_write ("pool-name", "%s", pool->name);
+ gf_proc_dump_write ("hot-count", "%d", pool->hot_count);
+ gf_proc_dump_write ("cold-count", "%d", pool->cold_count);
+ gf_proc_dump_write ("padded_sizeof", "%lu",
+ pool->padded_sizeof_type);
+ gf_proc_dump_write ("alloc-count", "%"PRIu64, pool->alloc_count);
+ gf_proc_dump_write ("max-alloc", "%d", pool->max_alloc);
+
+ gf_proc_dump_write ("pool-misses", "%"PRIu64, pool->pool_misses);
+ gf_proc_dump_write ("max-stdalloc", "%d", pool->max_stdalloc);
+ }
+}
+void
+gf_proc_dump_mempool_info_to_dict (glusterfs_ctx_t *ctx, dict_t *dict)
+{
+ struct mem_pool *pool = NULL;
+ char key[GF_DUMP_MAX_BUF_LEN] = {0,};
+ int count = 0;
+ int ret = -1;
+
+ if (!ctx || !dict)
+ return;
+
+ list_for_each_entry (pool, &ctx->mempool_list, global_list) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "pool%d.name", count);
+ ret = dict_set_str (dict, key, pool->name);
+ if (ret)
+ return;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "pool%d.hotcount", count);
+ ret = dict_set_int32 (dict, key, pool->hot_count);
+ if (ret)
+ return;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "pool%d.coldcount", count);
+ ret = dict_set_int32 (dict, key, pool->cold_count);
+ if (ret)
+ return;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "pool%d.paddedsizeof", count);
+ ret = dict_set_uint64 (dict, key, pool->padded_sizeof_type);
+ if (ret)
+ return;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "pool%d.alloccount", count);
+ ret = dict_set_uint64 (dict, key, pool->alloc_count);
+ if (ret)
+ return;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "pool%d.max_alloc", count);
+ ret = dict_set_int32 (dict, key, pool->max_alloc);
+ if (ret)
+ return;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "pool%d.max-stdalloc", count);
+ ret = dict_set_int32 (dict, key, pool->max_stdalloc);
+ if (ret)
+ return;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "pool%d.pool-misses", count);
+ ret = dict_set_uint64 (dict, key, pool->pool_misses);
+ if (ret)
+ return;
+ count++;
+ }
+ ret = dict_set_int32 (dict, "mempool-count", count);
+
+ return;
}
+void gf_proc_dump_latency_info (xlator_t *xl);
void
-gf_proc_dump_xlator_info (xlator_t *this_xl)
+gf_proc_dump_xlator_info (xlator_t *top)
+{
+ xlator_t *trav = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ char itable_key[1024] = {0,};
+
+ if (!top)
+ return;
+
+ ctx = top->ctx;
+
+ trav = top;
+ while (trav) {
+
+ if (ctx->measure_latency)
+ gf_proc_dump_latency_info (trav);
+
+ gf_proc_dump_xlator_mem_info(trav);
+
+ if (GF_PROC_DUMP_IS_XL_OPTION_ENABLED (inode) &&
+ (trav->itable)) {
+ snprintf (itable_key, 1024, "%d.%s.itable",
+ ctx->graph_id, trav->name);
+ }
+
+ if (!trav->dumpops) {
+ trav = trav->next;
+ continue;
+ }
+
+ if (trav->dumpops->priv &&
+ GF_PROC_DUMP_IS_XL_OPTION_ENABLED (priv))
+ trav->dumpops->priv (trav);
+
+ if (GF_PROC_DUMP_IS_XL_OPTION_ENABLED (inode) &&
+ (trav->dumpops->inode))
+ trav->dumpops->inode (trav);
+
+ if (trav->dumpops->fd &&
+ GF_PROC_DUMP_IS_XL_OPTION_ENABLED (fd))
+ trav->dumpops->fd (trav);
+
+ if (trav->dumpops->history &&
+ GF_PROC_DUMP_IS_XL_OPTION_ENABLED (history))
+ trav->dumpops->history (trav);
+
+ trav = trav->next;
+ }
+
+ return;
+}
+
+static void
+gf_proc_dump_oldgraph_xlator_info (xlator_t *top)
{
+ xlator_t *trav = NULL;
- if (!this_xl)
+ if (!top)
return;
- while (this_xl) {
- if (!this_xl->dumpops) {
- this_xl = this_xl->next;
+ trav = top;
+ while (trav) {
+ gf_proc_dump_xlator_mem_info_only_in_use (trav);
+
+ if (GF_PROC_DUMP_IS_XL_OPTION_ENABLED (inode) &&
+ (trav->itable)) {
+ /*TODO: dump inode table info if necessary by
+ printing the graph id (taken by glusterfs_cbtx_t)
+ in the key
+ */
+ }
+
+ if (!trav->dumpops) {
+ trav = trav->next;
continue;
}
- if (this_xl->dumpops->priv)
- this_xl->dumpops->priv(this_xl);
- if (this_xl->dumpops->inode)
- this_xl->dumpops->inode(this_xl);
- if (this_xl->dumpops->fd)
- this_xl->dumpops->fd(this_xl);
- this_xl = this_xl->next;
+
+ if (GF_PROC_DUMP_IS_XL_OPTION_ENABLED (inode) &&
+ (trav->dumpops->inode))
+ trav->dumpops->inode (trav);
+
+ if (trav->dumpops->fd &&
+ GF_PROC_DUMP_IS_XL_OPTION_ENABLED (fd))
+ trav->dumpops->fd (trav);
+
+ trav = trav->next;
}
return;
}
+static int
+gf_proc_dump_enable_all_options ()
+{
+
+ GF_PROC_DUMP_SET_OPTION (dump_options.dump_mem, _gf_true);
+ GF_PROC_DUMP_SET_OPTION (dump_options.dump_iobuf, _gf_true);
+ GF_PROC_DUMP_SET_OPTION (dump_options.dump_callpool, _gf_true);
+ GF_PROC_DUMP_SET_OPTION (dump_options.xl_options.dump_priv, _gf_true);
+ GF_PROC_DUMP_SET_OPTION (dump_options.xl_options.dump_inode, _gf_true);
+ GF_PROC_DUMP_SET_OPTION (dump_options.xl_options.dump_fd, _gf_true);
+ GF_PROC_DUMP_SET_OPTION (dump_options.xl_options.dump_inodectx,
+ _gf_true);
+ GF_PROC_DUMP_SET_OPTION (dump_options.xl_options.dump_fdctx, _gf_true);
+ GF_PROC_DUMP_SET_OPTION (dump_options.xl_options.dump_history,
+ _gf_true);
+
+ return 0;
+}
+
+gf_boolean_t
+is_gf_proc_dump_all_disabled ()
+{
+ gf_boolean_t all_disabled = _gf_true;
+
+ GF_CHECK_DUMP_OPTION_ENABLED (dump_options.dump_mem, all_disabled, out);
+ GF_CHECK_DUMP_OPTION_ENABLED (dump_options.dump_iobuf, all_disabled, out);
+ GF_CHECK_DUMP_OPTION_ENABLED (dump_options.dump_callpool, all_disabled,
+ out);
+ GF_CHECK_DUMP_OPTION_ENABLED (dump_options.xl_options.dump_priv,
+ all_disabled, out);
+ GF_CHECK_DUMP_OPTION_ENABLED (dump_options.xl_options.dump_inode,
+ all_disabled, out);
+ GF_CHECK_DUMP_OPTION_ENABLED (dump_options.xl_options.dump_fd,
+ all_disabled, out);
+ GF_CHECK_DUMP_OPTION_ENABLED (dump_options.xl_options.dump_inodectx,
+ all_disabled, out);
+ GF_CHECK_DUMP_OPTION_ENABLED (dump_options.xl_options.dump_fdctx,
+ all_disabled, out);
+ GF_CHECK_DUMP_OPTION_ENABLED (dump_options.xl_options.dump_history,
+ all_disabled, out);
+
+out:
+ return all_disabled;
+}
+
+/* These options are dumped by default if glusterdump.options
+ file exists and it is emtpty
+*/
+static int
+gf_proc_dump_enable_default_options ()
+{
+ GF_PROC_DUMP_SET_OPTION (dump_options.dump_mem, _gf_true);
+ GF_PROC_DUMP_SET_OPTION (dump_options.dump_callpool, _gf_true);
+
+ return 0;
+}
+
+static int
+gf_proc_dump_disable_all_options ()
+{
+
+ GF_PROC_DUMP_SET_OPTION (dump_options.dump_mem, _gf_false);
+ GF_PROC_DUMP_SET_OPTION (dump_options.dump_iobuf, _gf_false);
+ GF_PROC_DUMP_SET_OPTION (dump_options.dump_callpool, _gf_false);
+ GF_PROC_DUMP_SET_OPTION (dump_options.xl_options.dump_priv, _gf_false);
+ GF_PROC_DUMP_SET_OPTION (dump_options.xl_options.dump_inode,
+ _gf_false);
+ GF_PROC_DUMP_SET_OPTION (dump_options.xl_options.dump_fd, _gf_false);
+ GF_PROC_DUMP_SET_OPTION (dump_options.xl_options.dump_inodectx,
+ _gf_false);
+ GF_PROC_DUMP_SET_OPTION (dump_options.xl_options.dump_fdctx, _gf_false);
+ GF_PROC_DUMP_SET_OPTION (dump_options.xl_options.dump_history,
+ _gf_false);
+ return 0;
+}
+
+static int
+gf_proc_dump_parse_set_option (char *key, char *value)
+{
+ gf_boolean_t *opt_key = NULL;
+ gf_boolean_t opt_value = _gf_false;
+ char buf[GF_DUMP_MAX_BUF_LEN];
+ int ret = -1;
+
+ if (!strcasecmp (key, "all")) {
+ (void)gf_proc_dump_enable_all_options ();
+ return 0;
+ } else if (!strcasecmp (key, "mem")) {
+ opt_key = &dump_options.dump_mem;
+ } else if (!strcasecmp (key, "iobuf")) {
+ opt_key = &dump_options.dump_iobuf;
+ } else if (!strcasecmp (key, "callpool")) {
+ opt_key = &dump_options.dump_callpool;
+ } else if (!strcasecmp (key, "priv")) {
+ opt_key = &dump_options.xl_options.dump_priv;
+ } else if (!strcasecmp (key, "fd")) {
+ opt_key = &dump_options.xl_options.dump_fd;
+ } else if (!strcasecmp (key, "inode")) {
+ opt_key = &dump_options.xl_options.dump_inode;
+ } else if (!strcasecmp (key, "inodectx")) {
+ opt_key = &dump_options.xl_options.dump_inodectx;
+ } else if (!strcasecmp (key, "fdctx")) {
+ opt_key = &dump_options.xl_options.dump_fdctx;
+ } else if (!strcasecmp (key, "history")) {
+ opt_key = &dump_options.xl_options.dump_history;
+ }
+
+ if (!opt_key) {
+ //None of dump options match the key, return back
+ snprintf (buf, sizeof (buf), "[Warning]:None of the options "
+ "matched key : %s\n", key);
+ ret = write (gf_dump_fd, buf, strlen (buf));
+
+ if (ret >= 0)
+ ret = -1;
+ goto out;
+
+ }
+
+ opt_value = (strncasecmp (value, "yes", 3) ?
+ _gf_false: _gf_true);
+
+ GF_PROC_DUMP_SET_OPTION (*opt_key, opt_value);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+gf_proc_dump_options_init ()
+{
+ int ret = -1;
+ FILE *fp = NULL;
+ char buf[256];
+ char *key = NULL, *value = NULL;
+ char *saveptr = NULL;
+ char dump_option_file[PATH_MAX];
+
+ /* glusterd will create a file glusterdump.<pid>.options and
+ sets the statedump options for the process and the file is removed
+ after the statedump is taken. Direct issue of SIGUSR1 does not have
+ mechanism for considering the statedump options. So to have a way
+ of configuring the statedump of all the glusterfs processes through
+ both cli command and SIGUSR1, glusterdump.options file is searched
+ and the options mentioned in it are given the higher priority.
+ */
+ snprintf (dump_option_file, sizeof (dump_option_file),
+ DEFAULT_VAR_RUN_DIRECTORY
+ "/glusterdump.options");
+ fp = fopen (dump_option_file, "r");
+ if (!fp) {
+ snprintf (dump_option_file, sizeof (dump_option_file),
+ DEFAULT_VAR_RUN_DIRECTORY
+ "/glusterdump.%d.options", getpid ());
+
+ fp = fopen (dump_option_file, "r");
+
+ if (!fp) {
+ //ENOENT, return success
+ (void) gf_proc_dump_enable_all_options ();
+ return 0;
+ }
+ }
+
+ (void) gf_proc_dump_disable_all_options ();
+
+ // swallow the errors if setting statedump file path is failed.
+ ret = gf_proc_dump_set_path (dump_option_file);
+
+ ret = fscanf (fp, "%s", buf);
+
+ while (ret != EOF) {
+ key = strtok_r (buf, "=", &saveptr);
+ if (!key) {
+ ret = fscanf (fp, "%s", buf);
+ continue;
+ }
+
+ value = strtok_r (NULL, "=", &saveptr);
+
+ if (!value) {
+ ret = fscanf (fp, "%s", buf);
+ continue;
+ }
+
+ gf_proc_dump_parse_set_option (key, value);
+ }
+
+ if (is_gf_proc_dump_all_disabled ())
+ (void) gf_proc_dump_enable_default_options ();
+
+ if (fp)
+ fclose (fp);
+
+ return 0;
+}
void
-gf_proc_dump_info (int signum)
-{
- int ret = -1;
- glusterfs_ctx_t *ctx = NULL;
-
- gf_proc_dump_lock();
- ret = gf_proc_dump_open();
- if (ret < 0)
+gf_proc_dump_info (int signum, glusterfs_ctx_t *ctx)
+{
+ int i = 0;
+ int ret = -1;
+ glusterfs_graph_t *trav = NULL;
+ char brick_name[PATH_MAX] = {0,};
+ char timestr[256] = {0,};
+ char sign_string[512] = {0,};
+ char tmp_dump_name[PATH_MAX] = {0,};
+ char path[PATH_MAX] = {0,};
+ struct timeval tv = {0,};
+
+ gf_proc_dump_lock ();
+
+ if (!ctx)
+ goto out;
+
+ if (ctx->cmd_args.brick_name) {
+ GF_REMOVE_SLASH_FROM_PATH (ctx->cmd_args.brick_name, brick_name);
+ } else
+ strncpy (brick_name, "glusterdump", sizeof (brick_name));
+
+ ret = gf_proc_dump_options_init ();
+ if (ret < 0)
goto out;
- gf_proc_dump_mem_info();
- ctx = get_global_ctx_ptr();
- if (ctx) {
- iobuf_stats_dump(ctx->iobuf_pool);
- gf_proc_dump_pending_frames(ctx->pool);
- gf_proc_dump_xlator_info(ctx->graph);
- }
-
- gf_proc_dump_close();
+
+ snprintf (path, sizeof (path), "%s/%s.%d.dump.%"PRIu64,
+ ((dump_options.dump_path != NULL)?dump_options.dump_path:
+ ((ctx->statedump_path != NULL)?ctx->statedump_path:
+ DEFAULT_VAR_RUN_DIRECTORY)), brick_name, getpid(),
+ (uint64_t) time (NULL));
+
+ snprintf (tmp_dump_name, PATH_MAX, "%s/dumpXXXXXX",
+ ((dump_options.dump_path != NULL)?dump_options.dump_path:
+ ((ctx->statedump_path != NULL)?ctx->statedump_path:
+ DEFAULT_VAR_RUN_DIRECTORY)));
+
+ ret = gf_proc_dump_open (tmp_dump_name);
+ if (ret < 0)
+ goto out;
+
+ //continue even though gettimeofday() has failed
+ ret = gettimeofday (&tv, NULL);
+ if (0 == ret) {
+ gf_time_fmt (timestr, sizeof timestr, tv.tv_sec, gf_timefmt_FT);
+ snprintf (timestr + strlen (timestr),
+ sizeof timestr - strlen (timestr),
+ ".%"GF_PRI_SUSECONDS, tv.tv_usec);
+ }
+
+ snprintf (sign_string, sizeof (sign_string), "DUMP-START-TIME: %s\n",
+ timestr);
+
+ //swallow the errors of write for start and end marker
+ ret = write (gf_dump_fd, sign_string, strlen (sign_string));
+
+ memset (sign_string, 0, sizeof (sign_string));
+ memset (timestr, 0, sizeof (timestr));
+ memset (&tv, 0, sizeof (tv));
+
+ if (GF_PROC_DUMP_IS_OPTION_ENABLED (mem)) {
+ gf_proc_dump_mem_info ();
+ gf_proc_dump_mempool_info (ctx);
+ }
+
+ if (GF_PROC_DUMP_IS_OPTION_ENABLED (iobuf))
+ iobuf_stats_dump (ctx->iobuf_pool);
+ if (GF_PROC_DUMP_IS_OPTION_ENABLED (callpool))
+ gf_proc_dump_pending_frames (ctx->pool);
+
+ if (ctx->master) {
+ gf_proc_dump_add_section ("fuse");
+ gf_proc_dump_xlator_info (ctx->master);
+ }
+
+ if (ctx->active) {
+ gf_proc_dump_add_section ("active graph - %d", ctx->graph_id);
+ gf_proc_dump_xlator_info (ctx->active->top);
+ }
+
+ i = 0;
+ list_for_each_entry (trav, &ctx->graphs, list) {
+ if (trav == ctx->active)
+ continue;
+
+ gf_proc_dump_add_section ("oldgraph[%d]", i);
+
+ gf_proc_dump_oldgraph_xlator_info (trav->top);
+ i++;
+ }
+
+ ret = gettimeofday (&tv, NULL);
+ if (0 == ret) {
+ gf_time_fmt (timestr, sizeof timestr, tv.tv_sec, gf_timefmt_FT);
+ snprintf (timestr + strlen (timestr),
+ sizeof timestr - strlen (timestr),
+ ".%"GF_PRI_SUSECONDS, tv.tv_usec);
+ }
+
+ snprintf (sign_string, sizeof (sign_string), "\nDUMP-END-TIME: %s",
+ timestr);
+ ret = write (gf_dump_fd, sign_string, strlen (sign_string));
+
out:
- gf_proc_dump_unlock();
+ if (gf_dump_fd != -1)
+ gf_proc_dump_close ();
+ rename (tmp_dump_name, path);
+ GF_FREE (dump_options.dump_path);
+ dump_options.dump_path = NULL;
+ gf_proc_dump_unlock ();
return;
}
-void
+
+void
gf_proc_dump_fini (void)
{
- pthread_mutex_destroy(&gf_proc_dump_mutex);
+ pthread_mutex_destroy (&gf_proc_dump_mutex);
}
void
gf_proc_dump_init ()
{
- pthread_mutex_init(&gf_proc_dump_mutex, NULL);
+ pthread_mutex_init (&gf_proc_dump_mutex, NULL);
- return;
+ return;
}
+
void
gf_proc_dump_cleanup (void)
{
- pthread_mutex_destroy(&gf_proc_dump_mutex);
+ pthread_mutex_destroy (&gf_proc_dump_mutex);
}
-
diff --git a/libglusterfs/src/statedump.h b/libglusterfs/src/statedump.h
index 1997c65b9..8342b120a 100644
--- a/libglusterfs/src/statedump.h
+++ b/libglusterfs/src/statedump.h
@@ -1,33 +1,40 @@
/*
- Copyright (c) 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef STATEDUMP_H
-#define STATEDUMP_H
+#define STATEDUMP_H
#include <stdarg.h>
#include "inode.h"
#define GF_DUMP_MAX_BUF_LEN 4096
-#define GF_DUMP_LOGFILE_ROOT "/tmp/glusterdump"
-#define GF_DUMP_LOGFILE_ROOT_LEN 256
+typedef struct gf_dump_xl_options_ {
+ gf_boolean_t dump_priv;
+ gf_boolean_t dump_inode;
+ gf_boolean_t dump_fd;
+ gf_boolean_t dump_inodectx;
+ gf_boolean_t dump_fdctx;
+ gf_boolean_t dump_history;
+} gf_dump_xl_options_t;
+
+typedef struct gf_dump_options_ {
+ gf_boolean_t dump_mem;
+ gf_boolean_t dump_iobuf;
+ gf_boolean_t dump_callpool;
+ gf_dump_xl_options_t xl_options; //options for all xlators
+ char *dump_path;
+} gf_dump_options_t;
+
+extern gf_dump_options_t dump_options;
static inline
void _gf_proc_dump_build_key (char *key, const char *prefix, char *fmt,...)
@@ -39,38 +46,49 @@ void _gf_proc_dump_build_key (char *key, const char *prefix, char *fmt,...)
va_start(ap, fmt);
vsnprintf(buf, GF_DUMP_MAX_BUF_LEN, fmt, ap);
va_end(ap);
- snprintf(key, GF_DUMP_MAX_BUF_LEN, "%s.%s", prefix, buf);
+ snprintf(key, GF_DUMP_MAX_BUF_LEN, "%s.%s", prefix, buf);
}
-#define gf_proc_dump_build_key(key, key_prefix, fmt...) \
-{\
- _gf_proc_dump_build_key(key, key_prefix, ##fmt);\
-}
+#define gf_proc_dump_build_key(key, key_prefix, fmt...) \
+ { \
+ _gf_proc_dump_build_key(key, key_prefix, ##fmt); \
+ }
+
+#define GF_PROC_DUMP_SET_OPTION(opt,val) opt = val
+
+#define GF_CHECK_DUMP_OPTION_ENABLED(option_dump, var, label) \
+ do { \
+ if (option_dump == _gf_true) { \
+ var = _gf_false; \
+ goto label; \
+ } \
+ } while (0);
+
+void gf_proc_dump_init();
+
+void gf_proc_dump_fini(void);
+
+void gf_proc_dump_cleanup(void);
+
+void gf_proc_dump_info(int signum, glusterfs_ctx_t *ctx);
+
+int gf_proc_dump_add_section(char *key,...);
-void
-gf_proc_dump_init();
+int gf_proc_dump_write(char *key, char *value,...);
-void
-gf_proc_dump_fini(void);
+void inode_table_dump(inode_table_t *itable, char *prefix);
-void
-gf_proc_dump_cleanup(void);
+void inode_table_dump_to_dict (inode_table_t *itable, char *prefix, dict_t *dict);
-void
-gf_proc_dump_info(int signum);
+void fdtable_dump(fdtable_t *fdtable, char *prefix);
-void
-gf_proc_dump_add_section(char *key,...);
+void fdtable_dump_to_dict (fdtable_t *fdtable, char *prefix, dict_t *dict);
-void
-gf_proc_dump_write(char *key, char *value,...);
+void inode_dump(inode_t *inode, char *prefix);
-void
-inode_table_dump(inode_table_t *itable, char *prefix);
+void gf_proc_dump_mem_info_to_dict (dict_t *dict);
-void
-fdtable_dump(fdtable_t *fdtable, char *prefix);
+void gf_proc_dump_mempool_info_to_dict (glusterfs_ctx_t *ctx, dict_t *dict);
-void
-inode_dump(inode_t *inode, char *prefix);
+void glusterd_init (int sig);
#endif /* STATEDUMP_H */
diff --git a/libglusterfs/src/store.c b/libglusterfs/src/store.c
new file mode 100644
index 000000000..1e6601837
--- /dev/null
+++ b/libglusterfs/src/store.c
@@ -0,0 +1,709 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <inttypes.h>
+#include <libgen.h>
+
+#include "glusterfs.h"
+#include "store.h"
+#include "dict.h"
+#include "xlator.h"
+
+int32_t
+gf_store_mkdir (char *path)
+{
+ int32_t ret = -1;
+
+ ret = mkdir (path, 0777);
+
+ if ((-1 == ret) && (EEXIST != errno)) {
+ gf_log ("", GF_LOG_ERROR, "mkdir() failed on path %s,"
+ "errno: %s", path, strerror (errno));
+ } else {
+ ret = 0;
+ }
+
+ return ret;
+}
+
+int32_t
+gf_store_handle_create_on_absence (gf_store_handle_t **shandle,
+ char *path)
+{
+ GF_ASSERT (shandle);
+ int32_t ret = 0;
+
+ if (*shandle == NULL) {
+ ret = gf_store_handle_new (path, shandle);
+
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to create store"
+ " handle for path: %s", path);
+ }
+ }
+ return ret;
+}
+
+int32_t
+gf_store_mkstemp (gf_store_handle_t *shandle)
+{
+ int fd = -1;
+ char tmppath[PATH_MAX] = {0,};
+
+ GF_ASSERT (shandle);
+ GF_ASSERT (shandle->path);
+
+ snprintf (tmppath, sizeof (tmppath), "%s.tmp", shandle->path);
+ fd = open (tmppath, O_RDWR | O_CREAT | O_TRUNC | O_SYNC, 0600);
+ if (fd <= 0) {
+ gf_log ("", GF_LOG_ERROR, "Failed to open %s, error: %s",
+ tmppath, strerror (errno));
+ }
+
+ return fd;
+}
+
+int
+gf_store_sync_direntry (char *path)
+{
+ int ret = -1;
+ int dirfd = -1;
+ char *dir = NULL;
+ char *pdir = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ dir = gf_strdup (path);
+ if (!dir)
+ goto out;
+
+ pdir = dirname (dir);
+ dirfd = open (pdir, O_RDONLY);
+ if (dirfd == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to open directory "
+ "%s, due to %s", pdir, strerror (errno));
+ goto out;
+ }
+
+ ret = fsync (dirfd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to fsync %s, due to "
+ "%s", pdir, strerror (errno));
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (dirfd >= 0) {
+ ret = close (dirfd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to close "
+ "%s, due to %s", pdir, strerror (errno));
+ }
+ }
+
+ if (dir)
+ GF_FREE (dir);
+
+ return ret;
+}
+
+int32_t
+gf_store_rename_tmppath (gf_store_handle_t *shandle)
+{
+ int32_t ret = -1;
+ char tmppath[PATH_MAX] = {0,};
+
+ GF_ASSERT (shandle);
+ GF_ASSERT (shandle->path);
+
+ snprintf (tmppath, sizeof (tmppath), "%s.tmp", shandle->path);
+ ret = rename (tmppath, shandle->path);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Failed to rename %s to %s, "
+ "error: %s", tmppath, shandle->path, strerror (errno));
+ goto out;
+ }
+
+ ret = gf_store_sync_direntry (tmppath);
+out:
+ return ret;
+}
+
+int32_t
+gf_store_unlink_tmppath (gf_store_handle_t *shandle)
+{
+ int32_t ret = -1;
+ char tmppath[PATH_MAX] = {0,};
+
+ GF_ASSERT (shandle);
+ GF_ASSERT (shandle->path);
+
+ snprintf (tmppath, sizeof (tmppath), "%s.tmp", shandle->path);
+ ret = unlink (tmppath);
+ if (ret && (errno != ENOENT)) {
+ gf_log ("", GF_LOG_ERROR, "Failed to mv %s to %s, error: %s",
+ tmppath, shandle->path, strerror (errno));
+ } else {
+ ret = 0;
+ }
+
+ return ret;
+}
+
+int
+gf_store_read_and_tokenize (FILE *file, char *str, char **iter_key,
+ char **iter_val, gf_store_op_errno_t *store_errno)
+{
+ int32_t ret = -1;
+ char *savetok = NULL;
+ char *key = NULL;
+ char *value = NULL;
+ char *temp = NULL;
+ size_t str_len = 0;
+
+ GF_ASSERT (file);
+ GF_ASSERT (str);
+ GF_ASSERT (iter_key);
+ GF_ASSERT (iter_val);
+ GF_ASSERT (store_errno);
+
+ temp = fgets (str, PATH_MAX, file);
+ if (temp == NULL || feof (file)) {
+ ret = -1;
+ *store_errno = GD_STORE_EOF;
+ goto out;
+ }
+
+ str_len = strlen(str);
+ str[str_len - 1] = '\0';
+ /* Truncate the "\n", as fgets stores "\n" in str */
+
+ key = strtok_r (str, "=", &savetok);
+ if (!key) {
+ ret = -1;
+ *store_errno = GD_STORE_KEY_NULL;
+ goto out;
+ }
+
+ value = strtok_r (NULL, "=", &savetok);
+ if (!value) {
+ ret = -1;
+ *store_errno = GD_STORE_VALUE_NULL;
+ goto out;
+ }
+
+ *iter_key = key;
+ *iter_val = value;
+ *store_errno = GD_STORE_SUCCESS;
+ ret = 0;
+out:
+ return ret;
+}
+
+int32_t
+gf_store_retrieve_value (gf_store_handle_t *handle, char *key, char **value)
+{
+ int32_t ret = -1;
+ char *scan_str = NULL;
+ char *iter_key = NULL;
+ char *iter_val = NULL;
+ char *free_str = NULL;
+ struct stat st = {0,};
+ gf_store_op_errno_t store_errno = GD_STORE_SUCCESS;
+
+ GF_ASSERT (handle);
+
+ if (handle->locked == F_ULOCK)
+ /* no locking is used handle->fd gets closed() after usage */
+ handle->fd = open (handle->path, O_RDWR);
+ else
+ /* handle->fd is valid already, kept open for lockf() */
+ lseek (handle->fd, 0, SEEK_SET);
+
+ if (handle->fd == -1) {
+ gf_log ("", GF_LOG_ERROR, "Unable to open file %s errno: %s",
+ handle->path, strerror (errno));
+ goto out;
+ }
+ if (!handle->read)
+ handle->read = fdopen (dup(handle->fd), "r");
+ else
+ fseek (handle->read, 0, SEEK_SET);
+
+ if (!handle->read) {
+ gf_log ("", GF_LOG_ERROR, "Unable to open file %s errno: %s",
+ handle->path, strerror (errno));
+ goto out;
+ }
+
+ ret = fstat (handle->fd, &st);
+ if (ret < 0) {
+ gf_log ("", GF_LOG_WARNING, "stat on file %s failed",
+ handle->path);
+ ret = -1;
+ store_errno = GD_STORE_STAT_FAILED;
+ goto out;
+ }
+
+ /* "st.st_size + 1" is used as we are fetching each
+ * line of a file using fgets, fgets will append "\0"
+ * to the end of the string
+ */
+ scan_str = GF_CALLOC (1, st.st_size + 1,
+ gf_common_mt_char);
+
+ if (scan_str == NULL) {
+ ret = -1;
+ store_errno = GD_STORE_ENOMEM;
+ goto out;
+ }
+
+ free_str = scan_str;
+
+ do {
+ ret = gf_store_read_and_tokenize (handle->read, scan_str,
+ &iter_key, &iter_val,
+ &store_errno);
+ if (ret < 0) {
+ gf_log ("", GF_LOG_TRACE, "error while reading key "
+ "'%s': %s", key,
+ gf_store_strerror (store_errno));
+ goto out;
+ }
+
+ gf_log ("", GF_LOG_TRACE, "key %s read", iter_key);
+
+ if (!strcmp (key, iter_key)) {
+ gf_log ("", GF_LOG_DEBUG, "key %s found", key);
+ ret = 0;
+ if (iter_val)
+ *value = gf_strdup (iter_val);
+ goto out;
+ }
+ } while (1);
+out:
+ if (handle->read) {
+ fclose (handle->read);
+ handle->read = NULL;
+ }
+
+ if (handle->fd > 0 && handle->locked == F_ULOCK) {
+ /* only invalidate handle->fd if not locked */
+ close (handle->fd);
+ }
+
+ GF_FREE (free_str);
+
+ return ret;
+}
+
+int32_t
+gf_store_save_value (int fd, char *key, char *value)
+{
+ int32_t ret = -1;
+ int dup_fd = -1;
+ FILE *fp = NULL;
+
+ GF_ASSERT (fd > 0);
+ GF_ASSERT (key);
+ GF_ASSERT (value);
+
+ dup_fd = dup (fd);
+ if (dup_fd == -1)
+ goto out;
+
+ fp = fdopen (dup_fd, "a+");
+ if (fp == NULL) {
+ gf_log ("", GF_LOG_WARNING, "fdopen failed.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = fprintf (fp, "%s=%s\n", key, value);
+ if (ret < 0) {
+ gf_log ("", GF_LOG_WARNING, "Unable to store key: %s,"
+ "value: %s, error: %s", key, value,
+ strerror (errno));
+ ret = -1;
+ goto out;
+ }
+
+ ret = fflush (fp);
+ if (feof (fp)) {
+ gf_log ("", GF_LOG_WARNING,
+ "fflush failed, error: %s",
+ strerror (errno));
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (fp)
+ fclose (fp);
+
+ gf_log ("", GF_LOG_DEBUG, "returning: %d", ret);
+ return ret;
+}
+
+int32_t
+gf_store_handle_new (char *path, gf_store_handle_t **handle)
+{
+ int32_t ret = -1;
+ gf_store_handle_t *shandle = NULL;
+ int fd = -1;
+ char *spath = NULL;
+
+ shandle = GF_CALLOC (1, sizeof (*shandle), gf_common_mt_store_handle_t);
+ if (!shandle)
+ goto out;
+
+ spath = gf_strdup (path);
+
+ if (!spath)
+ goto out;
+
+ fd = open (path, O_RDWR | O_CREAT | O_APPEND, 0600);
+ if (fd <= 0) {
+ gf_log ("", GF_LOG_ERROR, "Failed to open file: %s, error: %s",
+ path, strerror (errno));
+ goto out;
+ }
+
+ ret = gf_store_sync_direntry (spath);
+ if (ret)
+ goto out;
+
+ shandle->path = spath;
+ shandle->locked = F_ULOCK;
+ *handle = shandle;
+
+ ret = 0;
+out:
+ if (fd > 0)
+ close (fd);
+
+ if (ret == -1) {
+ GF_FREE (spath);
+ GF_FREE (shandle);
+ }
+
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+gf_store_handle_retrieve (char *path, gf_store_handle_t **handle)
+{
+ int32_t ret = -1;
+ struct stat statbuf = {0};
+
+ ret = stat (path, &statbuf);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Path corresponding to "
+ "%s, returned error: (%s)",
+ path, strerror (errno));
+ goto out;
+ }
+ ret = gf_store_handle_new (path, handle);
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+gf_store_handle_destroy (gf_store_handle_t *handle)
+{
+ int32_t ret = -1;
+
+ if (!handle) {
+ ret = 0;
+ goto out;
+ }
+
+ GF_FREE (handle->path);
+
+ GF_FREE (handle);
+
+ ret = 0;
+
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+
+ return ret;
+}
+
+int32_t
+gf_store_iter_new (gf_store_handle_t *shandle, gf_store_iter_t **iter)
+{
+ int32_t ret = -1;
+ FILE *fp = NULL;
+ gf_store_iter_t *tmp_iter = NULL;
+
+ GF_ASSERT (shandle);
+ GF_ASSERT (iter);
+
+ fp = fopen (shandle->path, "r");
+ if (!fp) {
+ gf_log ("", GF_LOG_ERROR, "Unable to open file %s errno: %d",
+ shandle->path, errno);
+ goto out;
+ }
+
+ tmp_iter = GF_CALLOC (1, sizeof (*tmp_iter),
+ gf_common_mt_store_iter_t);
+ if (!tmp_iter)
+ goto out;
+
+ strncpy (tmp_iter->filepath, shandle->path, sizeof (tmp_iter->filepath));
+ tmp_iter->filepath[sizeof (tmp_iter->filepath) - 1] = 0;
+ tmp_iter->file = fp;
+
+ *iter = tmp_iter;
+ tmp_iter = NULL;
+ ret = 0;
+
+out:
+ if (ret && fp)
+ fclose (fp);
+
+ GF_FREE (tmp_iter);
+
+ gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+ return ret;
+}
+
+int32_t
+gf_store_validate_key_value (char *storepath, char *key, char *val,
+ gf_store_op_errno_t *op_errno)
+{
+ int ret = 0;
+
+ GF_ASSERT (op_errno);
+ GF_ASSERT (storepath);
+
+ if ((key == NULL) && (val == NULL)) {
+ ret = -1;
+ gf_log ("", GF_LOG_ERROR, "Glusterd store may be corrupted, "
+ "Invalid key and value (null) in %s", storepath);
+ *op_errno = GD_STORE_KEY_VALUE_NULL;
+ } else if (key == NULL) {
+ ret = -1;
+ gf_log ("", GF_LOG_ERROR, "Glusterd store may be corrupted, "
+ "Invalid key (null) in %s", storepath);
+ *op_errno = GD_STORE_KEY_NULL;
+ } else if (val == NULL) {
+ ret = -1;
+ gf_log ("", GF_LOG_ERROR, "Glusterd store may be corrupted, "
+ "Invalid value (null) for key %s in %s", key,
+ storepath);
+ *op_errno = GD_STORE_VALUE_NULL;
+ } else {
+ ret = 0;
+ *op_errno = GD_STORE_SUCCESS;
+ }
+
+ return ret;
+}
+
+int32_t
+gf_store_iter_get_next (gf_store_iter_t *iter, char **key, char **value,
+ gf_store_op_errno_t *op_errno)
+{
+ int32_t ret = -1;
+ char *scan_str = NULL;
+ char *iter_key = NULL;
+ char *iter_val = NULL;
+ struct stat st = {0,};
+ gf_store_op_errno_t store_errno = GD_STORE_SUCCESS;
+
+ GF_ASSERT (iter);
+ GF_ASSERT (key);
+ GF_ASSERT (value);
+
+ ret = stat (iter->filepath, &st);
+ if (ret < 0) {
+ gf_log ("", GF_LOG_WARNING, "stat on file failed");
+ ret = -1;
+ store_errno = GD_STORE_STAT_FAILED;
+ goto out;
+ }
+
+ /* "st.st_size + 1" is used as we are fetching each
+ * line of a file using fgets, fgets will append "\0"
+ * to the end of the string
+ */
+ scan_str = GF_CALLOC (1, st.st_size + 1,
+ gf_common_mt_char);
+ if (!scan_str) {
+ ret = -1;
+ store_errno = GD_STORE_ENOMEM;
+ goto out;
+ }
+
+ ret = gf_store_read_and_tokenize (iter->file, scan_str,
+ &iter_key, &iter_val,
+ &store_errno);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = gf_store_validate_key_value (iter->filepath, iter_key,
+ iter_val, &store_errno);
+ if (ret)
+ goto out;
+
+ *key = gf_strdup (iter_key);
+ if (!*key) {
+ ret = -1;
+ store_errno = GD_STORE_ENOMEM;
+ goto out;
+ }
+ *value = gf_strdup (iter_val);
+ if (!*value) {
+ ret = -1;
+ store_errno = GD_STORE_ENOMEM;
+ goto out;
+ }
+ ret = 0;
+
+out:
+ GF_FREE (scan_str);
+ if (ret) {
+ GF_FREE (*key);
+ GF_FREE (*value);
+ *key = NULL;
+ *value = NULL;
+ }
+ if (op_errno)
+ *op_errno = store_errno;
+
+ gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+ return ret;
+}
+
+int32_t
+gf_store_iter_get_matching (gf_store_iter_t *iter, char *key, char **value)
+{
+ int32_t ret = -1;
+ char *tmp_key = NULL;
+ char *tmp_value = NULL;
+
+ ret = gf_store_iter_get_next (iter, &tmp_key, &tmp_value, NULL);
+ while (!ret) {
+ if (!strncmp (key, tmp_key, strlen (key))){
+ *value = tmp_value;
+ GF_FREE (tmp_key);
+ goto out;
+ }
+ GF_FREE (tmp_key);
+ tmp_key = NULL;
+ GF_FREE (tmp_value);
+ tmp_value = NULL;
+ ret = gf_store_iter_get_next (iter, &tmp_key, &tmp_value,
+ NULL);
+ }
+out:
+ return ret;
+}
+
+int32_t
+gf_store_iter_destroy (gf_store_iter_t *iter)
+{
+ int32_t ret = -1;
+
+ if (!iter)
+ return 0;
+
+ /* gf_store_iter_new will not return a valid iter object with iter->file
+ * being NULL*/
+ ret = fclose (iter->file);
+ if (ret)
+ gf_log ("", GF_LOG_ERROR, "Unable to close file: %s, ret: %d, "
+ "errno: %d" ,iter->filepath, ret, errno);
+
+ GF_FREE (iter);
+ return ret;
+}
+
+char*
+gf_store_strerror (gf_store_op_errno_t op_errno)
+{
+ switch (op_errno) {
+ case GD_STORE_SUCCESS:
+ return "Success";
+ case GD_STORE_KEY_NULL:
+ return "Invalid Key";
+ case GD_STORE_VALUE_NULL:
+ return "Invalid Value";
+ case GD_STORE_KEY_VALUE_NULL:
+ return "Invalid Key and Value";
+ case GD_STORE_EOF:
+ return "No data";
+ case GD_STORE_ENOMEM:
+ return "No memory";
+ default:
+ return "Invalid errno";
+ }
+ return "Invalid errno";
+}
+
+int
+gf_store_lock (gf_store_handle_t *sh)
+{
+ int ret;
+
+ GF_ASSERT (sh);
+ GF_ASSERT (sh->path);
+ GF_ASSERT (sh->locked == F_ULOCK);
+
+ sh->fd = open (sh->path, O_RDWR);
+ if (sh->fd == -1) {
+ gf_log ("", GF_LOG_ERROR, "Failed to open '%s': %s", sh->path,
+ strerror (errno));
+ return -1;
+ }
+
+ ret = lockf (sh->fd, F_LOCK, 0);
+ if (ret)
+ gf_log ("", GF_LOG_ERROR, "Failed to gain lock on '%s': %s",
+ sh->path, strerror (errno));
+ else
+ /* sh->locked is protected by the lockf(sh->fd) above */
+ sh->locked = F_LOCK;
+
+ return ret;
+}
+
+void
+gf_store_unlock (gf_store_handle_t *sh)
+{
+ GF_ASSERT (sh);
+ GF_ASSERT (sh->locked == F_LOCK);
+
+ sh->locked = F_ULOCK;
+ lockf (sh->fd, F_ULOCK, 0);
+ close (sh->fd);
+}
+
+int
+gf_store_locked_local (gf_store_handle_t *sh)
+{
+ GF_ASSERT (sh);
+
+ return (sh->locked == F_LOCK);
+}
diff --git a/libglusterfs/src/store.h b/libglusterfs/src/store.h
new file mode 100644
index 000000000..337103ff7
--- /dev/null
+++ b/libglusterfs/src/store.h
@@ -0,0 +1,112 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _GLUSTERD_STORE_H_
+#define _GLUSTERD_STORE_H_
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+
+struct gf_store_handle_ {
+ char *path;
+ int fd;
+ FILE *read;
+ int locked; /* state of lockf() */
+};
+
+typedef struct gf_store_handle_ gf_store_handle_t;
+
+struct gf_store_iter_ {
+ FILE *file;
+ char filepath[PATH_MAX];
+};
+
+typedef struct gf_store_iter_ gf_store_iter_t;
+
+typedef enum {
+ GD_STORE_SUCCESS,
+ GD_STORE_KEY_NULL,
+ GD_STORE_VALUE_NULL,
+ GD_STORE_KEY_VALUE_NULL,
+ GD_STORE_EOF,
+ GD_STORE_ENOMEM,
+ GD_STORE_STAT_FAILED
+} gf_store_op_errno_t;
+
+int32_t
+gf_store_mkdir (char *path);
+
+int32_t
+gf_store_handle_create_on_absence (gf_store_handle_t **shandle, char *path);
+
+int32_t
+gf_store_mkstemp (gf_store_handle_t *shandle);
+
+int
+gf_store_sync_direntry (char *path);
+
+int32_t
+gf_store_rename_tmppath (gf_store_handle_t *shandle);
+
+int32_t
+gf_store_unlink_tmppath (gf_store_handle_t *shandle);
+
+int
+gf_store_read_and_tokenize (FILE *file, char *str, char **iter_key,
+ char **iter_val, gf_store_op_errno_t *store_errno);
+
+int32_t
+gf_store_retrieve_value (gf_store_handle_t *handle, char *key, char **value);
+
+int32_t
+gf_store_save_value (int fd, char *key, char *value);
+
+int32_t
+gf_store_handle_new (char *path, gf_store_handle_t **handle);
+
+int
+gf_store_handle_retrieve (char *path, gf_store_handle_t **handle);
+
+int32_t
+gf_store_handle_destroy (gf_store_handle_t *handle);
+
+int32_t
+gf_store_iter_new (gf_store_handle_t *shandle, gf_store_iter_t **iter);
+
+int32_t
+gf_store_validate_key_value (char *storepath, char *key, char *val,
+ gf_store_op_errno_t *op_errno);
+
+int32_t
+gf_store_iter_get_next (gf_store_iter_t *iter, char **key, char **value,
+ gf_store_op_errno_t *op_errno);
+
+int32_t
+gf_store_iter_get_matching (gf_store_iter_t *iter, char *key, char **value);
+
+int32_t
+gf_store_iter_destroy (gf_store_iter_t *iter);
+
+char*
+gf_store_strerror (gf_store_op_errno_t op_errno);
+
+int
+gf_store_lock (gf_store_handle_t *sh);
+
+void
+gf_store_unlock (gf_store_handle_t *sh);
+
+int
+gf_store_locked_local (gf_store_handle_t *sh);
+
+#endif
diff --git a/libglusterfs/src/syncop.c b/libglusterfs/src/syncop.c
new file mode 100644
index 000000000..c1620bb70
--- /dev/null
+++ b/libglusterfs/src/syncop.c
@@ -0,0 +1,2237 @@
+/*
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "syncop.h"
+
+int
+syncopctx_setfsuid (void *uid)
+{
+ struct syncopctx *opctx = NULL;
+ int ret = 0;
+
+ /* In args check */
+ if (!uid) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ opctx = syncopctx_getctx ();
+
+ /* alloc for this thread the first time */
+ if (!opctx) {
+ opctx = GF_CALLOC (1, sizeof (*opctx), gf_common_mt_syncopctx);
+ if (!opctx) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncopctx_setctx (opctx);
+ if (ret != 0) {
+ GF_FREE (opctx);
+ opctx = NULL;
+ goto out;
+ }
+ }
+
+out:
+ if (opctx && uid) {
+ opctx->uid = *(uid_t *)uid;
+ opctx->valid |= SYNCOPCTX_UID;
+ }
+
+ return ret;
+}
+
+int
+syncopctx_setfsgid (void *gid)
+{
+ struct syncopctx *opctx = NULL;
+ int ret = 0;
+
+ /* In args check */
+ if (!gid) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ opctx = syncopctx_getctx ();
+
+ /* alloc for this thread the first time */
+ if (!opctx) {
+ opctx = GF_CALLOC (1, sizeof (*opctx), gf_common_mt_syncopctx);
+ if (!opctx) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncopctx_setctx (opctx);
+ if (ret != 0) {
+ GF_FREE (opctx);
+ opctx = NULL;
+ goto out;
+ }
+ }
+
+out:
+ if (opctx && gid) {
+ opctx->gid = *(gid_t *)gid;
+ opctx->valid |= SYNCOPCTX_GID;
+ }
+
+ return ret;
+}
+
+int
+syncopctx_setfsgroups (int count, const void *groups)
+{
+ struct syncopctx *opctx = NULL;
+ gid_t *tmpgroups = NULL;
+ int ret = 0;
+
+ /* In args check */
+ if (count != 0 && !groups) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ opctx = syncopctx_getctx ();
+
+ /* alloc for this thread the first time */
+ if (!opctx) {
+ opctx = GF_CALLOC (1, sizeof (*opctx), gf_common_mt_syncopctx);
+ if (!opctx) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncopctx_setctx (opctx);
+ if (ret != 0) {
+ GF_FREE (opctx);
+ opctx = NULL;
+ goto out;
+ }
+ }
+
+ /* resize internal groups as required */
+ if (count && opctx->grpsize < count) {
+ if (opctx->groups) {
+ tmpgroups = GF_REALLOC (opctx->groups,
+ (sizeof (gid_t) * count));
+ /* NOTE: Not really required to zero the reallocation,
+ * as ngrps controls the validity of data,
+ * making a note irrespective */
+ if (tmpgroups == NULL) {
+ opctx->grpsize = 0;
+ GF_FREE (opctx->groups);
+ opctx->groups = NULL;
+ ret = -1;
+ goto out;
+ }
+ }
+ else {
+ tmpgroups = GF_CALLOC (count, sizeof (gid_t),
+ gf_common_mt_syncopctx);
+ if (tmpgroups == NULL) {
+ opctx->grpsize = 0;
+ ret = -1;
+ goto out;
+ }
+ }
+
+ opctx->groups = tmpgroups;
+ opctx->grpsize = count;
+ }
+
+ /* copy out the groups passed */
+ if (count)
+ memcpy (opctx->groups, groups, (sizeof (gid_t) * count));
+
+ /* set/reset the ngrps, this is where reset of groups is handled */
+ opctx->ngrps = count;
+ opctx->valid |= SYNCOPCTX_GROUPS;
+
+out:
+ return ret;
+}
+
+static void
+__run (struct synctask *task)
+{
+ struct syncenv *env = NULL;
+
+ env = task->env;
+
+ list_del_init (&task->all_tasks);
+ switch (task->state) {
+ case SYNCTASK_INIT:
+ case SYNCTASK_SUSPEND:
+ break;
+ case SYNCTASK_RUN:
+ gf_log (task->xl->name, GF_LOG_DEBUG,
+ "re-running already running task");
+ env->runcount--;
+ break;
+ case SYNCTASK_WAIT:
+ env->waitcount--;
+ break;
+ case SYNCTASK_DONE:
+ gf_log (task->xl->name, GF_LOG_WARNING,
+ "running completed task");
+ return;
+ case SYNCTASK_ZOMBIE:
+ gf_log (task->xl->name, GF_LOG_WARNING,
+ "attempted to wake up zombie!!");
+ return;
+ }
+
+ list_add_tail (&task->all_tasks, &env->runq);
+ env->runcount++;
+ task->state = SYNCTASK_RUN;
+}
+
+
+static void
+__wait (struct synctask *task)
+{
+ struct syncenv *env = NULL;
+
+ env = task->env;
+
+ list_del_init (&task->all_tasks);
+ switch (task->state) {
+ case SYNCTASK_INIT:
+ case SYNCTASK_SUSPEND:
+ break;
+ case SYNCTASK_RUN:
+ env->runcount--;
+ break;
+ case SYNCTASK_WAIT:
+ gf_log (task->xl->name, GF_LOG_WARNING,
+ "re-waiting already waiting task");
+ env->waitcount--;
+ break;
+ case SYNCTASK_DONE:
+ gf_log (task->xl->name, GF_LOG_WARNING,
+ "running completed task");
+ return;
+ case SYNCTASK_ZOMBIE:
+ gf_log (task->xl->name, GF_LOG_WARNING,
+ "attempted to sleep a zombie!!");
+ return;
+ }
+
+ list_add_tail (&task->all_tasks, &env->waitq);
+ env->waitcount++;
+ task->state = SYNCTASK_WAIT;
+}
+
+
+void
+synctask_yield (struct synctask *task)
+{
+ xlator_t *oldTHIS = THIS;
+
+#if defined(__NetBSD__) && defined(_UC_TLSBASE)
+ /* Preserve pthread private pointer through swapcontex() */
+ task->proc->sched.uc_flags &= ~_UC_TLSBASE;
+#endif
+
+ if (task->state != SYNCTASK_DONE)
+ task->state = SYNCTASK_SUSPEND;
+ if (swapcontext (&task->ctx, &task->proc->sched) < 0) {
+ gf_log ("syncop", GF_LOG_ERROR,
+ "swapcontext failed (%s)", strerror (errno));
+ }
+
+ THIS = oldTHIS;
+}
+
+
+void
+synctask_wake (struct synctask *task)
+{
+ struct syncenv *env = NULL;
+
+ env = task->env;
+
+ pthread_mutex_lock (&env->mutex);
+ {
+ task->woken = 1;
+
+ if (task->slept)
+ __run (task);
+
+ pthread_cond_broadcast (&env->cond);
+ }
+ pthread_mutex_unlock (&env->mutex);
+}
+
+void
+synctask_wrap (struct synctask *old_task)
+{
+ struct synctask *task = NULL;
+
+ /* Do not trust the pointer received. It may be
+ wrong and can lead to crashes. */
+
+ task = synctask_get ();
+ task->ret = task->syncfn (task->opaque);
+ if (task->synccbk)
+ task->synccbk (task->ret, task->frame, task->opaque);
+
+ task->state = SYNCTASK_DONE;
+
+ synctask_yield (task);
+}
+
+
+void
+synctask_destroy (struct synctask *task)
+{
+ if (!task)
+ return;
+
+ FREE (task->stack);
+
+ if (task->opframe)
+ STACK_DESTROY (task->opframe->root);
+
+ if (task->synccbk == NULL) {
+ pthread_mutex_destroy (&task->mutex);
+ pthread_cond_destroy (&task->cond);
+ }
+
+ FREE (task);
+}
+
+
+void
+synctask_done (struct synctask *task)
+{
+ if (task->synccbk) {
+ synctask_destroy (task);
+ return;
+ }
+
+ pthread_mutex_lock (&task->mutex);
+ {
+ task->state = SYNCTASK_ZOMBIE;
+ task->done = 1;
+ pthread_cond_broadcast (&task->cond);
+ }
+ pthread_mutex_unlock (&task->mutex);
+}
+
+
+int
+synctask_setid (struct synctask *task, uid_t uid, gid_t gid)
+{
+ if (!task)
+ return -1;
+
+ if (uid != -1)
+ task->uid = uid;
+
+ if (gid != -1)
+ task->gid = gid;
+
+ return 0;
+}
+
+
+struct synctask *
+synctask_create (struct syncenv *env, synctask_fn_t fn, synctask_cbk_t cbk,
+ call_frame_t *frame, void *opaque)
+{
+ struct synctask *newtask = NULL;
+ xlator_t *this = THIS;
+
+ VALIDATE_OR_GOTO (env, err);
+ VALIDATE_OR_GOTO (fn, err);
+
+ newtask = CALLOC (1, sizeof (*newtask));
+ if (!newtask)
+ return NULL;
+
+ newtask->frame = frame;
+ if (!frame) {
+ newtask->opframe = create_frame (this, this->ctx->pool);
+ } else {
+ newtask->opframe = copy_frame (frame);
+ }
+ if (!newtask->opframe)
+ goto err;
+ newtask->env = env;
+ newtask->xl = this;
+ newtask->syncfn = fn;
+ newtask->synccbk = cbk;
+ newtask->opaque = opaque;
+
+ /* default to the uid/gid of the passed frame */
+ newtask->uid = newtask->opframe->root->uid;
+ newtask->gid = newtask->opframe->root->gid;
+
+ INIT_LIST_HEAD (&newtask->all_tasks);
+ INIT_LIST_HEAD (&newtask->waitq);
+
+ if (getcontext (&newtask->ctx) < 0) {
+ gf_log ("syncop", GF_LOG_ERROR,
+ "getcontext failed (%s)",
+ strerror (errno));
+ goto err;
+ }
+
+ newtask->stack = CALLOC (1, env->stacksize);
+ if (!newtask->stack) {
+ gf_log ("syncop", GF_LOG_ERROR,
+ "out of memory for stack");
+ goto err;
+ }
+
+ newtask->ctx.uc_stack.ss_sp = newtask->stack;
+ newtask->ctx.uc_stack.ss_size = env->stacksize;
+
+ makecontext (&newtask->ctx, (void (*)(void)) synctask_wrap, 2, newtask);
+
+ newtask->state = SYNCTASK_INIT;
+
+ newtask->slept = 1;
+
+ if (!cbk) {
+ pthread_mutex_init (&newtask->mutex, NULL);
+ pthread_cond_init (&newtask->cond, NULL);
+ newtask->done = 0;
+ }
+
+ synctask_wake (newtask);
+ /*
+ * Make sure someone's there to execute anything we just put on the
+ * run queue.
+ */
+ syncenv_scale(env);
+
+ return newtask;
+err:
+ if (newtask) {
+ FREE (newtask->stack);
+ if (newtask->opframe)
+ STACK_DESTROY (newtask->opframe->root);
+ FREE (newtask);
+ }
+
+ return NULL;
+}
+
+
+int
+synctask_join (struct synctask *task)
+{
+ int ret = 0;
+
+ pthread_mutex_lock (&task->mutex);
+ {
+ while (!task->done)
+ pthread_cond_wait (&task->cond, &task->mutex);
+ }
+ pthread_mutex_unlock (&task->mutex);
+
+ ret = task->ret;
+
+ synctask_destroy (task);
+
+ return ret;
+}
+
+
+int
+synctask_new (struct syncenv *env, synctask_fn_t fn, synctask_cbk_t cbk,
+ call_frame_t *frame, void *opaque)
+{
+ struct synctask *newtask = NULL;
+ int ret = 0;
+
+ newtask = synctask_create (env, fn, cbk, frame, opaque);
+ if (!newtask)
+ return -1;
+
+ if (!cbk)
+ ret = synctask_join (newtask);
+
+ return ret;
+}
+
+
+struct synctask *
+syncenv_task (struct syncproc *proc)
+{
+ struct syncenv *env = NULL;
+ struct synctask *task = NULL;
+ struct timespec sleep_till = {0, };
+ int ret = 0;
+
+ env = proc->env;
+
+ pthread_mutex_lock (&env->mutex);
+ {
+ while (list_empty (&env->runq)) {
+ sleep_till.tv_sec = time (NULL) + SYNCPROC_IDLE_TIME;
+ ret = pthread_cond_timedwait (&env->cond, &env->mutex,
+ &sleep_till);
+ if (!list_empty (&env->runq))
+ break;
+ if ((ret == ETIMEDOUT) &&
+ (env->procs > env->procmin)) {
+ task = NULL;
+ env->procs--;
+ memset (proc, 0, sizeof (*proc));
+ goto unlock;
+ }
+ }
+
+ task = list_entry (env->runq.next, struct synctask, all_tasks);
+
+ list_del_init (&task->all_tasks);
+ env->runcount--;
+
+ task->woken = 0;
+ task->slept = 0;
+
+ task->proc = proc;
+ }
+unlock:
+ pthread_mutex_unlock (&env->mutex);
+
+ return task;
+}
+
+
+void
+synctask_switchto (struct synctask *task)
+{
+ struct syncenv *env = NULL;
+
+ env = task->env;
+
+ synctask_set (task);
+ THIS = task->xl;
+
+#if defined(__NetBSD__) && defined(_UC_TLSBASE)
+ /* Preserve pthread private pointer through swapcontex() */
+ task->ctx.uc_flags &= ~_UC_TLSBASE;
+#endif
+
+ if (swapcontext (&task->proc->sched, &task->ctx) < 0) {
+ gf_log ("syncop", GF_LOG_ERROR,
+ "swapcontext failed (%s)", strerror (errno));
+ }
+
+ if (task->state == SYNCTASK_DONE) {
+ synctask_done (task);
+ return;
+ }
+
+ pthread_mutex_lock (&env->mutex);
+ {
+ if (task->woken) {
+ __run (task);
+ } else {
+ task->slept = 1;
+ __wait (task);
+ }
+ }
+ pthread_mutex_unlock (&env->mutex);
+}
+
+void *
+syncenv_processor (void *thdata)
+{
+ struct syncenv *env = NULL;
+ struct syncproc *proc = NULL;
+ struct synctask *task = NULL;
+
+ proc = thdata;
+ env = proc->env;
+
+ for (;;) {
+ task = syncenv_task (proc);
+ if (!task)
+ break;
+
+ synctask_switchto (task);
+
+ syncenv_scale (env);
+ }
+
+ return NULL;
+}
+
+
+void
+syncenv_scale (struct syncenv *env)
+{
+ int diff = 0;
+ int scale = 0;
+ int i = 0;
+ int ret = 0;
+
+ pthread_mutex_lock (&env->mutex);
+ {
+ if (env->procs > env->runcount)
+ goto unlock;
+
+ scale = env->runcount;
+ if (scale > env->procmax)
+ scale = env->procmax;
+ if (scale > env->procs)
+ diff = scale - env->procs;
+ while (diff) {
+ diff--;
+ for (; (i < env->procmax); i++) {
+ if (env->proc[i].processor == 0)
+ break;
+ }
+
+ env->proc[i].env = env;
+ ret = gf_thread_create (&env->proc[i].processor, NULL,
+ syncenv_processor, &env->proc[i]);
+ if (ret)
+ break;
+ env->procs++;
+ i++;
+ }
+ }
+unlock:
+ pthread_mutex_unlock (&env->mutex);
+}
+
+
+void
+syncenv_destroy (struct syncenv *env)
+{
+
+}
+
+
+struct syncenv *
+syncenv_new (size_t stacksize, int procmin, int procmax)
+{
+ struct syncenv *newenv = NULL;
+ int ret = 0;
+ int i = 0;
+
+ if (!procmin || procmin < 0)
+ procmin = SYNCENV_PROC_MIN;
+ if (!procmax || procmax > SYNCENV_PROC_MAX)
+ procmax = SYNCENV_PROC_MAX;
+
+ if (procmin > procmax)
+ return NULL;
+
+ newenv = CALLOC (1, sizeof (*newenv));
+
+ if (!newenv)
+ return NULL;
+
+ pthread_mutex_init (&newenv->mutex, NULL);
+ pthread_cond_init (&newenv->cond, NULL);
+
+ INIT_LIST_HEAD (&newenv->runq);
+ INIT_LIST_HEAD (&newenv->waitq);
+
+ newenv->stacksize = SYNCENV_DEFAULT_STACKSIZE;
+ if (stacksize)
+ newenv->stacksize = stacksize;
+ newenv->procmin = procmin;
+ newenv->procmax = procmax;
+
+ for (i = 0; i < newenv->procmin; i++) {
+ newenv->proc[i].env = newenv;
+ ret = gf_thread_create (&newenv->proc[i].processor, NULL,
+ syncenv_processor, &newenv->proc[i]);
+ if (ret)
+ break;
+ newenv->procs++;
+ }
+
+ if (ret != 0)
+ syncenv_destroy (newenv);
+
+ return newenv;
+}
+
+
+int
+synclock_init (synclock_t *lock)
+{
+ if (!lock)
+ return -1;
+
+ pthread_cond_init (&lock->cond, 0);
+ lock->lock = 0;
+ INIT_LIST_HEAD (&lock->waitq);
+
+ return pthread_mutex_init (&lock->guard, 0);
+}
+
+
+int
+synclock_destroy (synclock_t *lock)
+{
+ if (!lock)
+ return -1;
+
+ pthread_cond_destroy (&lock->cond);
+ return pthread_mutex_destroy (&lock->guard);
+}
+
+
+static int
+__synclock_lock (struct synclock *lock)
+{
+ struct synctask *task = NULL;
+
+ if (!lock)
+ return -1;
+
+ task = synctask_get ();
+
+ while (lock->lock) {
+ if (task) {
+ /* called within a synctask */
+ list_add_tail (&task->waitq, &lock->waitq);
+ pthread_mutex_unlock (&lock->guard);
+ synctask_yield (task);
+ /* task is removed from waitq in unlock,
+ * under lock->guard.*/
+ pthread_mutex_lock (&lock->guard);
+ } else {
+ /* called by a non-synctask */
+ pthread_cond_wait (&lock->cond, &lock->guard);
+ }
+ }
+
+ lock->lock = _gf_true;
+ lock->owner = task;
+
+ return 0;
+}
+
+
+int
+synclock_lock (synclock_t *lock)
+{
+ int ret = 0;
+
+ pthread_mutex_lock (&lock->guard);
+ {
+ ret = __synclock_lock (lock);
+ }
+ pthread_mutex_unlock (&lock->guard);
+
+ return ret;
+}
+
+
+int
+synclock_trylock (synclock_t *lock)
+{
+ int ret = 0;
+
+ errno = 0;
+
+ pthread_mutex_lock (&lock->guard);
+ {
+ if (lock->lock) {
+ errno = EBUSY;
+ ret = -1;
+ goto unlock;
+ }
+
+ ret = __synclock_lock (lock);
+ }
+unlock:
+ pthread_mutex_unlock (&lock->guard);
+
+ return ret;
+}
+
+
+static int
+__synclock_unlock (synclock_t *lock)
+{
+ struct synctask *task = NULL;
+ struct synctask *curr = NULL;
+
+ if (!lock)
+ return -1;
+
+ curr = synctask_get ();
+
+ if (lock->owner != curr) {
+ /* warn ? */
+ }
+
+ lock->lock = _gf_false;
+
+ /* There could be both synctasks and non synctasks
+ waiting (or none, or either). As a mid-approach
+ between maintaining too many waiting counters
+ at one extreme and a thundering herd on unlock
+ at the other, call a cond_signal (which wakes
+ one waiter) and first synctask waiter. So at
+ most we have two threads waking up to grab the
+ just released lock.
+ */
+ pthread_cond_signal (&lock->cond);
+ if (!list_empty (&lock->waitq)) {
+ task = list_entry (lock->waitq.next, struct synctask, waitq);
+ list_del_init (&task->waitq);
+ synctask_wake (task);
+ }
+
+ return 0;
+}
+
+
+int
+synclock_unlock (synclock_t *lock)
+{
+ int ret = 0;
+
+ pthread_mutex_lock (&lock->guard);
+ {
+ ret = __synclock_unlock (lock);
+ }
+ pthread_mutex_unlock (&lock->guard);
+
+ return ret;
+}
+
+/* Barriers */
+
+int
+syncbarrier_init (struct syncbarrier *barrier)
+{
+ if (!barrier) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ pthread_cond_init (&barrier->cond, 0);
+ barrier->count = 0;
+ INIT_LIST_HEAD (&barrier->waitq);
+
+ return pthread_mutex_init (&barrier->guard, 0);
+}
+
+
+int
+syncbarrier_destroy (struct syncbarrier *barrier)
+{
+ if (!barrier) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ pthread_cond_destroy (&barrier->cond);
+ return pthread_mutex_destroy (&barrier->guard);
+}
+
+
+static int
+__syncbarrier_wait (struct syncbarrier *barrier, int waitfor)
+{
+ struct synctask *task = NULL;
+
+ if (!barrier) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ task = synctask_get ();
+
+ while (barrier->count < waitfor) {
+ if (task) {
+ /* called within a synctask */
+ list_add_tail (&task->waitq, &barrier->waitq);
+ pthread_mutex_unlock (&barrier->guard);
+ synctask_yield (task);
+ pthread_mutex_lock (&barrier->guard);
+ } else {
+ /* called by a non-synctask */
+ pthread_cond_wait (&barrier->cond, &barrier->guard);
+ }
+ }
+
+ barrier->count = 0;
+
+ return 0;
+}
+
+
+int
+syncbarrier_wait (struct syncbarrier *barrier, int waitfor)
+{
+ int ret = 0;
+
+ pthread_mutex_lock (&barrier->guard);
+ {
+ ret = __syncbarrier_wait (barrier, waitfor);
+ }
+ pthread_mutex_unlock (&barrier->guard);
+
+ return ret;
+}
+
+
+static int
+__syncbarrier_wake (struct syncbarrier *barrier)
+{
+ struct synctask *task = NULL;
+
+ if (!barrier) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ barrier->count++;
+
+ pthread_cond_signal (&barrier->cond);
+ if (!list_empty (&barrier->waitq)) {
+ task = list_entry (barrier->waitq.next, struct synctask, waitq);
+ list_del_init (&task->waitq);
+ synctask_wake (task);
+ }
+
+ return 0;
+}
+
+
+int
+syncbarrier_wake (struct syncbarrier *barrier)
+{
+ int ret = 0;
+
+ pthread_mutex_lock (&barrier->guard);
+ {
+ ret = __syncbarrier_wake (barrier);
+ }
+ pthread_mutex_unlock (&barrier->guard);
+
+ return ret;
+}
+
+
+/* FOPS */
+
+
+int
+syncop_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *iatt, dict_t *xdata, struct iatt *parent)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+
+ if (op_ret == 0) {
+ args->iatt1 = *iatt;
+ args->iatt2 = *parent;
+ if (xdata)
+ args->xdata = dict_ref (xdata);
+ }
+
+ __wake (args);
+
+ return 0;
+}
+
+
+int
+syncop_lookup (xlator_t *subvol, loc_t *loc, dict_t *xdata_req,
+ struct iatt *iatt, dict_t **xdata_rsp, struct iatt *parent)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_lookup_cbk, subvol->fops->lookup,
+ loc, xdata_req);
+
+ if (iatt)
+ *iatt = args.iatt1;
+ if (parent)
+ *parent = args.iatt2;
+ if (xdata_rsp)
+ *xdata_rsp = args.xdata;
+ else if (args.xdata)
+ dict_unref (args.xdata);
+
+ errno = args.op_errno;
+ return args.op_ret;
+}
+
+static gf_dirent_t *
+entry_copy (gf_dirent_t *source)
+{
+ gf_dirent_t *sink = NULL;
+
+ sink = gf_dirent_for_name (source->d_name);
+
+ sink->d_off = source->d_off;
+ sink->d_ino = source->d_ino;
+ sink->d_type = source->d_type;
+ sink->d_stat = source->d_stat;
+
+ if (source->inode)
+ sink->inode = inode_ref (source->inode);
+ return sink;
+}
+
+int32_t
+syncop_readdirp_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ gf_dirent_t *entries, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmp = NULL;
+
+ int count = 0;
+
+ args = cookie;
+
+ INIT_LIST_HEAD (&args->entries.list);
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+
+ if (op_ret >= 0) {
+ list_for_each_entry (entry, &entries->list, list) {
+ tmp = entry_copy (entry);
+ gf_log (this->name, GF_LOG_TRACE,
+ "adding entry=%s, count=%d",
+ tmp->d_name, count);
+ list_add_tail (&tmp->list, &(args->entries.list));
+ count++;
+ }
+ }
+
+ __wake (args);
+
+ return 0;
+
+}
+
+int
+syncop_readdirp (xlator_t *subvol,
+ fd_t *fd,
+ size_t size,
+ off_t off,
+ dict_t *dict,
+ gf_dirent_t *entries)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_readdirp_cbk, subvol->fops->readdirp,
+ fd, size, off, dict);
+
+ if (entries)
+ list_splice_init (&args.entries.list, &entries->list);
+ /* TODO: need to free all the 'args.entries' in 'else' case */
+
+ errno = args.op_errno;
+ return args.op_ret;
+
+}
+
+int32_t
+syncop_readdir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ gf_dirent_t *entries, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmp = NULL;
+
+ int count = 0;
+
+ args = cookie;
+
+ INIT_LIST_HEAD (&args->entries.list);
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+
+ if (op_ret >= 0) {
+ list_for_each_entry (entry, &entries->list, list) {
+ tmp = entry_copy (entry);
+ gf_log (this->name, GF_LOG_TRACE,
+ "adding entry=%s, count=%d",
+ tmp->d_name, count);
+ list_add_tail (&tmp->list, &(args->entries.list));
+ count++;
+ }
+ }
+
+ __wake (args);
+
+ return 0;
+
+}
+
+int
+syncop_readdir (xlator_t *subvol,
+ fd_t *fd,
+ size_t size,
+ off_t off,
+ gf_dirent_t *entries)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_readdir_cbk, subvol->fops->readdir,
+ fd, size, off, NULL);
+
+ if (entries)
+ list_splice_init (&args.entries.list, &entries->list);
+ /* TODO: need to free all the 'args.entries' in 'else' case */
+
+ errno = args.op_errno;
+ return args.op_ret;
+
+}
+
+int32_t
+syncop_opendir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+
+ __wake (args);
+
+ return 0;
+}
+
+int
+syncop_opendir (xlator_t *subvol,
+ loc_t *loc,
+ fd_t *fd)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_opendir_cbk, subvol->fops->opendir,
+ loc, fd, NULL);
+
+ errno = args.op_errno;
+ return args.op_ret;
+
+}
+
+int
+syncop_fsyncdir_cbk (call_frame_t *frame, void* cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+
+ __wake (args);
+
+ return 0;
+}
+
+int
+syncop_fsyncdir (xlator_t *subvol, fd_t *fd, int datasync)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_fsyncdir_cbk, subvol->fops->fsyncdir,
+ fd, datasync, NULL);
+
+ errno = args.op_errno;
+ return args.op_ret;
+}
+
+int
+syncop_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+
+ __wake (args);
+
+ return 0;
+}
+
+int
+syncop_removexattr (xlator_t *subvol, loc_t *loc, const char *name)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_removexattr_cbk, subvol->fops->removexattr,
+ loc, name, NULL);
+
+ errno = args.op_errno;
+ return args.op_ret;
+}
+
+int
+syncop_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+
+ __wake (args);
+
+ return 0;
+}
+
+int
+syncop_fremovexattr (xlator_t *subvol, fd_t *fd, const char *name)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_fremovexattr_cbk,
+ subvol->fops->fremovexattr, fd, name, NULL);
+
+ errno = args.op_errno;
+ return args.op_ret;
+}
+
+int
+syncop_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+
+ __wake (args);
+
+ return 0;
+}
+
+
+int
+syncop_setxattr (xlator_t *subvol, loc_t *loc, dict_t *dict, int32_t flags)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_setxattr_cbk, subvol->fops->setxattr,
+ loc, dict, flags, NULL);
+
+ errno = args.op_errno;
+ return args.op_ret;
+}
+
+int
+syncop_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+
+ __wake (args);
+
+ return 0;
+}
+
+
+int
+syncop_fsetxattr (xlator_t *subvol, fd_t *fd, dict_t *dict, int32_t flags)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_fsetxattr_cbk, subvol->fops->fsetxattr,
+ fd, dict, flags, NULL);
+
+ errno = args.op_errno;
+ return args.op_ret;
+}
+
+int
+syncop_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *dict, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (op_ret >= 0)
+ args->xattr = dict_ref (dict);
+
+ __wake (args);
+
+ return 0;
+}
+
+int
+syncop_listxattr (xlator_t *subvol, loc_t *loc, dict_t **dict)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_getxattr_cbk, subvol->fops->getxattr,
+ loc, NULL, NULL);
+
+ if (dict)
+ *dict = args.xattr;
+ else if (args.xattr)
+ dict_unref (args.xattr);
+
+ errno = args.op_errno;
+ return args.op_ret;
+}
+
+int
+syncop_getxattr (xlator_t *subvol, loc_t *loc, dict_t **dict, const char *key)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_getxattr_cbk, subvol->fops->getxattr,
+ loc, key, NULL);
+
+ if (dict)
+ *dict = args.xattr;
+ else if (args.xattr)
+ dict_unref (args.xattr);
+
+ errno = args.op_errno;
+ return args.op_ret;
+}
+
+int
+syncop_fgetxattr (xlator_t *subvol, fd_t *fd, dict_t **dict, const char *key)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_getxattr_cbk, subvol->fops->fgetxattr,
+ fd, key, NULL);
+
+ if (dict)
+ *dict = args.xattr;
+ else if (args.xattr)
+ dict_unref (args.xattr);
+
+ errno = args.op_errno;
+ return args.op_ret;
+}
+
+int
+syncop_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct statvfs *buf, dict_t *xdata)
+
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+
+ if (op_ret == 0) {
+ args->statvfs_buf = *buf;
+ }
+
+ __wake (args);
+
+ return 0;
+}
+
+
+int
+syncop_statfs (xlator_t *subvol, loc_t *loc, struct statvfs *buf)
+
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_statfs_cbk, subvol->fops->statfs,
+ loc, NULL);
+
+ if (buf)
+ *buf = args.statvfs_buf;
+
+ errno = args.op_errno;
+ return args.op_ret;
+}
+
+int
+syncop_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+
+ if (op_ret == 0) {
+ args->iatt1 = *preop;
+ args->iatt2 = *postop;
+ }
+
+ __wake (args);
+
+ return 0;
+}
+
+
+int
+syncop_setattr (xlator_t *subvol, loc_t *loc, struct iatt *iatt, int valid,
+ struct iatt *preop, struct iatt *postop)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_setattr_cbk, subvol->fops->setattr,
+ loc, iatt, valid, NULL);
+
+ if (preop)
+ *preop = args.iatt1;
+ if (postop)
+ *postop = args.iatt2;
+
+ errno = args.op_errno;
+ return args.op_ret;
+}
+
+
+int
+syncop_fsetattr (xlator_t *subvol, fd_t *fd, struct iatt *iatt, int valid,
+ struct iatt *preop, struct iatt *postop)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_setattr_cbk, subvol->fops->fsetattr,
+ fd, iatt, valid, NULL);
+
+ if (preop)
+ *preop = args.iatt1;
+ if (postop)
+ *postop = args.iatt2;
+
+ errno = args.op_errno;
+ return args.op_ret;
+}
+
+
+int32_t
+syncop_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+
+ __wake (args);
+
+ return 0;
+}
+
+int
+syncop_open (xlator_t *subvol, loc_t *loc, int32_t flags, fd_t *fd)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_open_cbk, subvol->fops->open,
+ loc, flags, fd, NULL);
+
+ errno = args.op_errno;
+ return args.op_ret;
+
+}
+
+
+int32_t
+syncop_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ INIT_LIST_HEAD (&args->entries.list);
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+
+ if (args->op_ret >= 0) {
+ if (iobref)
+ args->iobref = iobref_ref (iobref);
+ args->vector = iov_dup (vector, count);
+ args->count = count;
+ }
+
+ __wake (args);
+
+ return 0;
+
+}
+
+int
+syncop_readv (xlator_t *subvol, fd_t *fd, size_t size, off_t off,
+ uint32_t flags, struct iovec **vector, int *count,
+ struct iobref **iobref)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_readv_cbk, subvol->fops->readv,
+ fd, size, off, flags, NULL);
+
+ if (args.op_ret < 0)
+ goto out;
+
+ if (vector)
+ *vector = args.vector;
+ else
+ GF_FREE (args.vector);
+
+ if (count)
+ *count = args.count;
+
+ /* Do we need a 'ref' here? */
+ if (iobref)
+ *iobref = args.iobref;
+ else if (args.iobref)
+ iobref_unref (args.iobref);
+
+out:
+ errno = args.op_errno;
+ return args.op_ret;
+
+}
+
+int
+syncop_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+
+ __wake (args);
+
+ return 0;
+}
+
+int
+syncop_writev (xlator_t *subvol, fd_t *fd, const struct iovec *vector,
+ int32_t count, off_t offset, struct iobref *iobref,
+ uint32_t flags)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_writev_cbk, subvol->fops->writev,
+ fd, (struct iovec *) vector, count, offset, flags, iobref,
+ NULL);
+
+ errno = args.op_errno;
+ return args.op_ret;
+}
+
+int syncop_write (xlator_t *subvol, fd_t *fd, const char *buf, int size,
+ off_t offset, struct iobref *iobref, uint32_t flags)
+{
+ struct syncargs args = {0,};
+ struct iovec vec = {0,};
+
+ vec.iov_len = size;
+ vec.iov_base = (void *)buf;
+
+ SYNCOP (subvol, (&args), syncop_writev_cbk, subvol->fops->writev,
+ fd, &vec, 1, offset, flags, iobref, NULL);
+
+ errno = args.op_errno;
+ return args.op_ret;
+}
+
+
+int
+syncop_close (fd_t *fd)
+{
+ if (fd)
+ fd_unref (fd);
+ return 0;
+}
+
+int32_t
+syncop_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+
+ if (buf)
+ args->iatt1 = *buf;
+
+ __wake (args);
+
+ return 0;
+}
+
+int
+syncop_create (xlator_t *subvol, loc_t *loc, int32_t flags, mode_t mode,
+ fd_t *fd, dict_t *xdata, struct iatt *iatt)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_create_cbk, subvol->fops->create,
+ loc, flags, mode, 0, fd, xdata);
+
+ errno = args.op_errno;
+ if (iatt)
+ *iatt = args.iatt1;
+
+ return args.op_ret;
+
+}
+
+int
+syncop_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+
+ __wake (args);
+
+ return 0;
+}
+
+int
+syncop_unlink (xlator_t *subvol, loc_t *loc)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_unlink_cbk, subvol->fops->unlink, loc,
+ 0, NULL);
+
+ errno = args.op_errno;
+ return args.op_ret;
+}
+
+int
+syncop_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+
+ __wake (args);
+
+ return 0;
+}
+
+int
+syncop_rmdir (xlator_t *subvol, loc_t *loc)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_rmdir_cbk, subvol->fops->rmdir, loc,
+ 0, NULL);
+
+ errno = args.op_errno;
+ return args.op_ret;
+}
+
+
+int
+syncop_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+
+ __wake (args);
+
+ return 0;
+}
+
+
+int
+syncop_link (xlator_t *subvol, loc_t *oldloc, loc_t *newloc)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_link_cbk, subvol->fops->link,
+ oldloc, newloc, NULL);
+
+ errno = args.op_errno;
+
+ return args.op_ret;
+}
+
+
+int
+syncop_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+
+ __wake (args);
+
+ return 0;
+}
+
+
+int
+syncop_rename (xlator_t *subvol, loc_t *oldloc, loc_t *newloc)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_rename_cbk, subvol->fops->rename,
+ oldloc, newloc, NULL);
+
+ errno = args.op_errno;
+
+ return args.op_ret;
+}
+
+
+int
+syncop_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+
+ __wake (args);
+
+ return 0;
+}
+
+int
+syncop_ftruncate (xlator_t *subvol, fd_t *fd, off_t offset)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_ftruncate_cbk, subvol->fops->ftruncate,
+ fd, offset, NULL);
+
+ errno = args.op_errno;
+ return args.op_ret;
+}
+
+int
+syncop_truncate (xlator_t *subvol, loc_t *loc, off_t offset)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_ftruncate_cbk, subvol->fops->truncate,
+ loc, offset, NULL);
+
+ errno = args.op_errno;
+ return args.op_ret;
+}
+
+int
+syncop_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+
+ __wake (args);
+
+ return 0;
+
+}
+
+int
+syncop_fsync (xlator_t *subvol, fd_t *fd, int dataonly)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_fsync_cbk, subvol->fops->fsync,
+ fd, dataonly, NULL);
+
+ errno = args.op_errno;
+ return args.op_ret;
+
+}
+
+
+int
+syncop_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+
+ __wake (args);
+
+ return 0;
+
+}
+
+int
+syncop_flush (xlator_t *subvol, fd_t *fd)
+{
+ struct syncargs args = {0};
+
+ SYNCOP (subvol, (&args), syncop_flush_cbk, subvol->fops->flush,
+ fd, NULL);
+
+ errno = args.op_errno;
+ return args.op_ret;
+
+}
+
+int
+syncop_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *stbuf, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (op_ret == 0)
+ args->iatt1 = *stbuf;
+
+ __wake (args);
+
+ return 0;
+
+}
+
+int
+syncop_fstat (xlator_t *subvol, fd_t *fd, struct iatt *stbuf)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_fstat_cbk, subvol->fops->fstat,
+ fd, NULL);
+
+ if (stbuf)
+ *stbuf = args.iatt1;
+
+ errno = args.op_errno;
+ return args.op_ret;
+
+}
+
+int
+syncop_stat (xlator_t *subvol, loc_t *loc, struct iatt *stbuf)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_fstat_cbk, subvol->fops->stat,
+ loc, NULL);
+
+ if (stbuf)
+ *stbuf = args.iatt1;
+
+ errno = args.op_errno;
+ return args.op_ret;
+
+}
+
+int32_t
+syncop_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (buf)
+ args->iatt1 = *buf;
+
+ __wake (args);
+
+ return 0;
+}
+
+int
+syncop_symlink (xlator_t *subvol, loc_t *loc, const char *newpath, dict_t *dict,
+ struct iatt *iatt)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_symlink_cbk, subvol->fops->symlink,
+ newpath, loc, 0, dict);
+
+ errno = args.op_errno;
+ if (iatt)
+ *iatt = args.iatt1;
+
+ return args.op_ret;
+
+}
+
+int
+syncop_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, const char *path,
+ struct iatt *stbuf, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+
+ if ((op_ret != -1) && path)
+ args->buffer = gf_strdup (path);
+
+ __wake (args);
+
+ return 0;
+}
+
+int
+syncop_readlink (xlator_t *subvol, loc_t *loc, char **buffer, size_t size)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_readlink_cbk, subvol->fops->readlink,
+ loc, size, NULL);
+
+ if (buffer)
+ *buffer = args.buffer;
+ else GF_FREE (args.buffer);
+
+ errno = args.op_errno;
+ return args.op_ret;
+}
+
+int
+syncop_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+
+ if (buf)
+ args->iatt1 = *buf;
+
+ __wake (args);
+
+ return 0;
+}
+
+int
+syncop_mknod (xlator_t *subvol, loc_t *loc, mode_t mode, dev_t rdev,
+ dict_t *dict, struct iatt *iatt)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_mknod_cbk, subvol->fops->mknod,
+ loc, mode, rdev, 0, dict);
+
+ errno = args.op_errno;
+ if (iatt)
+ *iatt = args.iatt1;
+
+ return args.op_ret;
+
+}
+
+
+int
+syncop_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (buf)
+ args->iatt1 = *buf;
+
+ __wake (args);
+
+ return 0;
+}
+
+
+int
+syncop_mkdir (xlator_t *subvol, loc_t *loc, mode_t mode, dict_t *dict,
+ struct iatt *iatt)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_mkdir_cbk, subvol->fops->mkdir,
+ loc, mode, 0, dict);
+
+ errno = args.op_errno;
+ if (iatt)
+ *iatt = args.iatt1;
+
+ return args.op_ret;
+
+}
+
+int
+syncop_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ __wake (args);
+
+ return 0;
+}
+
+int
+syncop_access (xlator_t *subvol, loc_t *loc, int32_t mask)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_access_cbk, subvol->fops->access,
+ loc, mask, NULL);
+
+ errno = args.op_errno;
+ return args.op_ret;
+}
+
+
+int
+syncop_fallocate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+
+ __wake (args);
+
+ return 0;
+}
+
+int
+syncop_fallocate(xlator_t *subvol, fd_t *fd, int32_t keep_size, off_t offset,
+ size_t len)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_fallocate_cbk, subvol->fops->fallocate,
+ fd, keep_size, offset, len, NULL);
+
+ errno = args.op_errno;
+ return args.op_ret;
+}
+
+
+int
+syncop_discard_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+
+ __wake (args);
+
+ return 0;
+}
+
+int
+syncop_discard(xlator_t *subvol, fd_t *fd, off_t offset, size_t len)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_discard_cbk, subvol->fops->discard,
+ fd, offset, len, NULL);
+
+ errno = args.op_errno;
+ return args.op_ret;
+}
+
+int
+syncop_zerofill_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+
+ __wake (args);
+
+ return 0;
+}
+
+int
+syncop_zerofill(xlator_t *subvol, fd_t *fd, off_t offset, size_t len)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_zerofill_cbk, subvol->fops->zerofill,
+ fd, offset, len, NULL);
+
+ errno = args.op_errno;
+ return args.op_ret;
+}
+
+
+int
+syncop_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct gf_flock *flock,
+ dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (flock)
+ args->flock = *flock;
+ __wake (args);
+
+ return 0;
+}
+
+
+int
+syncop_lk (xlator_t *subvol, fd_t *fd, int cmd, struct gf_flock *flock)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_lk_cbk, subvol->fops->lk,
+ fd, cmd, flock, NULL);
+
+ errno = args.op_errno;
+ *flock = args.flock;
+
+ return args.op_ret;
+}
diff --git a/libglusterfs/src/syncop.h b/libglusterfs/src/syncop.h
new file mode 100644
index 000000000..f790981f0
--- /dev/null
+++ b/libglusterfs/src/syncop.h
@@ -0,0 +1,412 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _SYNCOP_H
+#define _SYNCOP_H
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include <sys/time.h>
+#include <pthread.h>
+#include <ucontext.h>
+
+#define SYNCENV_PROC_MAX 16
+#define SYNCENV_PROC_MIN 2
+#define SYNCPROC_IDLE_TIME 600
+
+/*
+ * Flags for syncopctx valid elements
+ */
+#define SYNCOPCTX_UID 0x00000001
+#define SYNCOPCTX_GID 0x00000002
+#define SYNCOPCTX_GROUPS 0x00000004
+
+struct synctask;
+struct syncproc;
+struct syncenv;
+
+
+typedef int (*synctask_cbk_t) (int ret, call_frame_t *frame, void *opaque);
+
+typedef int (*synctask_fn_t) (void *opaque);
+
+
+typedef enum {
+ SYNCTASK_INIT = 0,
+ SYNCTASK_RUN,
+ SYNCTASK_SUSPEND,
+ SYNCTASK_WAIT,
+ SYNCTASK_DONE,
+ SYNCTASK_ZOMBIE,
+} synctask_state_t;
+
+/* for one sequential execution of @syncfn */
+struct synctask {
+ struct list_head all_tasks;
+ struct syncenv *env;
+ xlator_t *xl;
+ call_frame_t *frame;
+ call_frame_t *opframe;
+ synctask_cbk_t synccbk;
+ synctask_fn_t syncfn;
+ synctask_state_t state;
+ void *opaque;
+ void *stack;
+ int woken;
+ int slept;
+ int ret;
+
+ uid_t uid;
+ gid_t gid;
+
+ ucontext_t ctx;
+ struct syncproc *proc;
+
+ pthread_mutex_t mutex; /* for synchronous spawning of synctask */
+ pthread_cond_t cond;
+ int done;
+
+ struct list_head waitq; /* can wait only "once" at a time */
+};
+
+
+struct syncproc {
+ pthread_t processor;
+ ucontext_t sched;
+ struct syncenv *env;
+ struct synctask *current;
+};
+
+/* hosts the scheduler thread and framework for executing synctasks */
+struct syncenv {
+ struct syncproc proc[SYNCENV_PROC_MAX];
+ int procs;
+
+ struct list_head runq;
+ int runcount;
+ struct list_head waitq;
+ int waitcount;
+
+ int procmin;
+ int procmax;
+
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+
+ size_t stacksize;
+};
+
+
+struct synclock {
+ pthread_mutex_t guard; /* guard the remaining members, pair @cond */
+ pthread_cond_t cond; /* waiting non-synctasks */
+ struct list_head waitq; /* waiting synctasks */
+ gf_boolean_t lock; /* _gf_true or _gf_false, lock status */
+ struct synctask *owner; /* NULL if current owner is not a synctask */
+};
+typedef struct synclock synclock_t;
+
+
+struct syncbarrier {
+ pthread_mutex_t guard; /* guard the remaining members, pair @cond */
+ pthread_cond_t cond; /* waiting non-synctasks */
+ struct list_head waitq; /* waiting synctasks */
+ int count; /* count the number of wakes */
+};
+typedef struct syncbarrier syncbarrier_t;
+
+
+struct syncargs {
+ int op_ret;
+ int op_errno;
+ struct iatt iatt1;
+ struct iatt iatt2;
+ dict_t *xattr;
+ gf_dirent_t entries;
+ struct statvfs statvfs_buf;
+ struct iovec *vector;
+ int count;
+ struct iobref *iobref;
+ char *buffer;
+ dict_t *xdata;
+ struct gf_flock flock;
+
+ /* some more _cbk needs */
+ uuid_t uuid;
+ char *errstr;
+ dict_t *dict;
+ pthread_mutex_t lock_dict;
+
+ syncbarrier_t barrier;
+
+ /* do not touch */
+ struct synctask *task;
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+ int done;
+};
+
+struct syncopctx {
+ unsigned int valid; /* valid flags for elements that are set */
+ uid_t uid;
+ gid_t gid;
+ int grpsize;
+ int ngrps;
+ gid_t *groups;
+};
+
+#define __yawn(args) do { \
+ args->task = synctask_get (); \
+ if (args->task) \
+ break; \
+ pthread_mutex_init (&args->mutex, NULL); \
+ pthread_cond_init (&args->cond, NULL); \
+ args->done = 0; \
+ } while (0)
+
+
+#define __wake(args) do { \
+ if (args->task) { \
+ synctask_wake (args->task); \
+ } else { \
+ pthread_mutex_lock (&args->mutex); \
+ { \
+ args->done = 1; \
+ pthread_cond_signal (&args->cond); \
+ } \
+ pthread_mutex_unlock (&args->mutex); \
+ } \
+ } while (0)
+
+
+#define __yield(args) do { \
+ if (args->task) { \
+ synctask_yield (args->task); \
+ } else { \
+ pthread_mutex_lock (&args->mutex); \
+ { \
+ while (!args->done) \
+ pthread_cond_wait (&args->cond, \
+ &args->mutex); \
+ } \
+ pthread_mutex_unlock (&args->mutex); \
+ pthread_mutex_destroy (&args->mutex); \
+ pthread_cond_destroy (&args->cond); \
+ } \
+ } while (0)
+
+
+#define SYNCOP(subvol, stb, cbk, op, params ...) do { \
+ struct synctask *task = NULL; \
+ call_frame_t *frame = NULL; \
+ \
+ task = synctask_get (); \
+ stb->task = task; \
+ if (task) \
+ frame = task->opframe; \
+ else \
+ frame = syncop_create_frame (THIS); \
+ \
+ if (task) { \
+ frame->root->uid = task->uid; \
+ frame->root->gid = task->gid; \
+ } \
+ \
+ __yawn (stb); \
+ \
+ STACK_WIND_COOKIE (frame, cbk, (void *)stb, subvol, \
+ op, params); \
+ \
+ __yield (stb); \
+ if (task) \
+ STACK_RESET (frame->root); \
+ else \
+ STACK_DESTROY (frame->root); \
+ } while (0)
+
+
+#define SYNCENV_DEFAULT_STACKSIZE (2 * 1024 * 1024)
+
+struct syncenv * syncenv_new (size_t stacksize, int procmin, int procmax);
+void syncenv_destroy (struct syncenv *);
+void syncenv_scale (struct syncenv *env);
+
+int synctask_new (struct syncenv *, synctask_fn_t, synctask_cbk_t, call_frame_t* frame, void *);
+struct synctask *synctask_create (struct syncenv *, synctask_fn_t,
+ synctask_cbk_t, call_frame_t *, void *);
+int synctask_join (struct synctask *task);
+void synctask_wake (struct synctask *task);
+void synctask_yield (struct synctask *task);
+void synctask_waitfor (struct synctask *task, int count);
+
+#define synctask_barrier_init(args) syncbarrier_init (&args->barrier)
+#define synctask_barrier_wait(args, n) syncbarrier_wait (&args->barrier, n)
+#define synctask_barrier_wake(args) syncbarrier_wake (&args->barrier)
+
+int synctask_setid (struct synctask *task, uid_t uid, gid_t gid);
+#define SYNCTASK_SETID(uid, gid) synctask_setid (synctask_get(), uid, gid);
+
+int syncopctx_setfsuid (void *uid);
+int syncopctx_setfsgid (void *gid);
+int syncopctx_setfsgroups (int count, const void *groups);
+
+static inline call_frame_t *
+syncop_create_frame (xlator_t *this)
+{
+ call_frame_t *frame = NULL;
+ int ngrps = -1;
+ struct syncopctx *opctx = NULL;
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame)
+ return NULL;
+
+ frame->root->pid = getpid ();
+
+ opctx = syncopctx_getctx ();
+ if (opctx && (opctx->valid & SYNCOPCTX_UID))
+ frame->root->uid = opctx->uid;
+ else
+ frame->root->uid = geteuid ();
+
+ if (opctx && (opctx->valid & SYNCOPCTX_GID))
+ frame->root->gid = opctx->gid;
+ else
+ frame->root->gid = getegid ();
+
+ if (opctx && (opctx->valid & SYNCOPCTX_GROUPS)) {
+ ngrps = opctx->ngrps;
+
+ if (ngrps != 0 && opctx->groups != NULL) {
+ if (call_stack_alloc_groups (frame->root, ngrps) != 0) {
+ STACK_DESTROY (frame->root);
+ return NULL;
+ }
+
+ memcpy (frame->root->groups, opctx->groups,
+ (sizeof (gid_t) * ngrps));
+ }
+ }
+ else {
+ ngrps = getgroups (0, 0);
+ if (ngrps < 0) {
+ STACK_DESTROY (frame->root);
+ return NULL;
+ }
+
+ if (call_stack_alloc_groups (frame->root, ngrps) != 0) {
+ STACK_DESTROY (frame->root);
+ return NULL;
+ }
+
+ if (getgroups (ngrps, frame->root->groups) < 0) {
+ STACK_DESTROY (frame->root);
+ return NULL;
+ }
+ }
+
+ return frame;
+}
+
+int synclock_init (synclock_t *lock);
+int synclock_destory (synclock_t *lock);
+int synclock_lock (synclock_t *lock);
+int synclock_trylock (synclock_t *lock);
+int synclock_unlock (synclock_t *lock);
+
+
+int syncbarrier_init (syncbarrier_t *barrier);
+int syncbarrier_wait (syncbarrier_t *barrier, int waitfor);
+int syncbarrier_wake (syncbarrier_t *barrier);
+int syncbarrier_destroy (syncbarrier_t *barrier);
+
+int syncop_lookup (xlator_t *subvol, loc_t *loc, dict_t *xattr_req,
+ /* out */
+ struct iatt *iatt, dict_t **xattr_rsp, struct iatt *parent);
+
+int syncop_readdirp (xlator_t *subvol, fd_t *fd, size_t size, off_t off,
+ dict_t *dict,
+ /* out */
+ gf_dirent_t *entries);
+
+int syncop_readdir (xlator_t *subvol, fd_t *fd, size_t size, off_t off,
+ gf_dirent_t *entries);
+
+int syncop_opendir (xlator_t *subvol, loc_t *loc, fd_t *fd);
+
+int syncop_setattr (xlator_t *subvol, loc_t *loc, struct iatt *iatt, int valid,
+ /* out */
+ struct iatt *preop, struct iatt *postop);
+
+int syncop_fsetattr (xlator_t *subvol, fd_t *fd, struct iatt *iatt, int valid,
+ /* out */
+ struct iatt *preop, struct iatt *postop);
+
+int syncop_statfs (xlator_t *subvol, loc_t *loc, struct statvfs *buf);
+
+int syncop_setxattr (xlator_t *subvol, loc_t *loc, dict_t *dict, int32_t flags);
+int syncop_fsetxattr (xlator_t *subvol, fd_t *fd, dict_t *dict, int32_t flags);
+int syncop_listxattr (xlator_t *subvol, loc_t *loc, dict_t **dict);
+int syncop_getxattr (xlator_t *xl, loc_t *loc, dict_t **dict, const char *key);
+int syncop_fgetxattr (xlator_t *xl, fd_t *fd, dict_t **dict, const char *key);
+int syncop_removexattr (xlator_t *subvol, loc_t *loc, const char *name);
+int syncop_fremovexattr (xlator_t *subvol, fd_t *fd, const char *name);
+
+int syncop_create (xlator_t *subvol, loc_t *loc, int32_t flags, mode_t mode,
+ fd_t *fd, dict_t *dict, struct iatt *iatt);
+int syncop_open (xlator_t *subvol, loc_t *loc, int32_t flags, fd_t *fd);
+int syncop_close (fd_t *fd);
+
+int syncop_write (xlator_t *subvol, fd_t *fd, const char *buf, int size,
+ off_t offset, struct iobref *iobref, uint32_t flags);
+int syncop_writev (xlator_t *subvol, fd_t *fd, const struct iovec *vector,
+ int32_t count, off_t offset, struct iobref *iobref,
+ uint32_t flags);
+int syncop_readv (xlator_t *subvol, fd_t *fd, size_t size, off_t off,
+ uint32_t flags,
+ /* out */
+ struct iovec **vector, int *count, struct iobref **iobref);
+
+int syncop_ftruncate (xlator_t *subvol, fd_t *fd, off_t offset);
+int syncop_truncate (xlator_t *subvol, loc_t *loc, off_t offset);
+
+int syncop_unlink (xlator_t *subvol, loc_t *loc);
+int syncop_rmdir (xlator_t *subvol, loc_t *loc);
+
+int syncop_fsync (xlator_t *subvol, fd_t *fd, int dataonly);
+int syncop_flush (xlator_t *subvol, fd_t *fd);
+int syncop_fstat (xlator_t *subvol, fd_t *fd, struct iatt *stbuf);
+int syncop_stat (xlator_t *subvol, loc_t *loc, struct iatt *stbuf);
+
+int syncop_symlink (xlator_t *subvol, loc_t *loc, const char *newpath,
+ dict_t *dict, struct iatt *iatt);
+int syncop_readlink (xlator_t *subvol, loc_t *loc, char **buffer, size_t size);
+int syncop_mknod (xlator_t *subvol, loc_t *loc, mode_t mode, dev_t rdev,
+ dict_t *dict, struct iatt *iatt);
+int syncop_mkdir (xlator_t *subvol, loc_t *loc, mode_t mode, dict_t *dict,
+ struct iatt *iatt);
+int syncop_link (xlator_t *subvol, loc_t *oldloc, loc_t *newloc);
+int syncop_fsyncdir (xlator_t *subvol, fd_t *fd, int datasync);
+int syncop_access (xlator_t *subvol, loc_t *loc, int32_t mask);
+int syncop_fallocate(xlator_t *subvol, fd_t *fd, int32_t keep_size, off_t offset,
+ size_t len);
+int syncop_discard(xlator_t *subvol, fd_t *fd, off_t offset, size_t len);
+
+int syncop_zerofill(xlator_t *subvol, fd_t *fd, off_t offset, size_t len);
+
+int syncop_rename (xlator_t *subvol, loc_t *oldloc, loc_t *newloc);
+
+int syncop_lk (xlator_t *subvol, fd_t *fd, int cmd, struct gf_flock *flock);
+
+#endif /* _SYNCOP_H */
diff --git a/libglusterfs/src/syscall.c b/libglusterfs/src/syscall.c
index d350710e2..e8954cc23 100644
--- a/libglusterfs/src/syscall.c
+++ b/libglusterfs/src/syscall.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
@@ -43,7 +34,7 @@ sys_stat (const char *path, struct stat *buf)
}
-int
+int
sys_fstat (int fd, struct stat *buf)
{
return fstat (fd, buf);
@@ -64,14 +55,14 @@ sys_readdir (DIR *dir)
}
-ssize_t
+ssize_t
sys_readlink (const char *path, char *buf, size_t bufsiz)
{
return readlink (path, buf, bufsiz);
}
-int
+int
sys_closedir (DIR *dir)
{
return closedir (dir);
@@ -85,28 +76,31 @@ sys_mknod (const char *pathname, mode_t mode, dev_t dev)
}
-int
+int
sys_mkdir (const char *pathname, mode_t mode)
{
return mkdir (pathname, mode);
}
-int
+int
sys_unlink (const char *pathname)
{
+#ifdef GF_SOLARIS_HOST_OS
+ return solaris_unlink (pathname);
+#endif
return unlink (pathname);
}
-int
+int
sys_rmdir (const char *pathname)
{
return rmdir (pathname);
}
-int
+int
sys_symlink (const char *oldpath, const char *newpath)
{
return symlink (oldpath, newpath);
@@ -116,11 +110,14 @@ sys_symlink (const char *oldpath, const char *newpath)
int
sys_rename (const char *oldpath, const char *newpath)
{
+#ifdef GF_SOLARIS_HOST_OS
+ return solaris_rename (oldpath, newpath);
+#endif
return rename (oldpath, newpath);
}
-int
+int
sys_link (const char *oldpath, const char *newpath)
{
return link (oldpath, newpath);
@@ -141,7 +138,7 @@ sys_fchmod (int fd, mode_t mode)
}
-int
+int
sys_chown (const char *path, uid_t owner, gid_t group)
{
return chown (path, owner, group);
@@ -162,21 +159,21 @@ sys_lchown (const char *path, uid_t owner, gid_t group)
}
-int
+int
sys_truncate (const char *path, off_t length)
{
return truncate (path, length);
}
-int
+int
sys_ftruncate (int fd, off_t length)
{
return ftruncate (fd, length);
}
-int
+int
sys_utimes (const char *filename, const struct timeval times[2])
{
return utimes (filename, times);
@@ -211,7 +208,7 @@ sys_read (int fd, void *buf, size_t count)
}
-ssize_t
+ssize_t
sys_write (int fd, const void *buf, size_t count)
{
return write (fd, buf, count);
@@ -232,21 +229,26 @@ sys_statvfs (const char *path, struct statvfs *buf)
}
-int
+int
sys_close (int fd)
{
- return close (fd);
+ int ret = -1;
+
+ if (fd >= 0)
+ ret = close (fd);
+
+ return ret;
}
-int
+int
sys_fsync (int fd)
{
return fsync (fd);
}
-int
+int
sys_fdatasync (int fd)
{
#ifdef HAVE_FDATASYNC
@@ -257,44 +259,44 @@ sys_fdatasync (int fd)
}
-int
-sys_lsetxattr (const char *path, const char *name, const void *value,
- size_t size, int flags)
+int
+sys_lsetxattr (const char *path, const char *name, const void *value,
+ size_t size, int flags)
{
-
-#ifdef GF_LINUX_HOST_OS
+
+#if defined(GF_LINUX_HOST_OS) || defined(__NetBSD__)
return lsetxattr (path, name, value, size, flags);
#endif
#ifdef GF_BSD_HOST_OS
- return extattr_set_link (path, EXTATTR_NAMESPACE_USER,
+ return extattr_set_link (path, EXTATTR_NAMESPACE_USER,
name, value, size);
#endif
-
+
#ifdef GF_SOLARIS_HOST_OS
return solaris_setxattr (path, name, value, size, flags);
#endif
#ifdef GF_DARWIN_HOST_OS
- return setxattr (path, name, value, size, 0,
+ return setxattr (path, name, value, size, 0,
flags|XATTR_NOFOLLOW);
#endif
-
+
}
ssize_t
-sys_llistxattr (const char *path, char *list, size_t size)
+sys_llistxattr (const char *path, char *list, size_t size)
{
-
-#ifdef GF_LINUX_HOST_OS
+
+#if defined(GF_LINUX_HOST_OS) || defined(__NetBSD__)
return llistxattr (path, list, size);
#endif
#ifdef GF_BSD_HOST_OS
return extattr_list_link (path, EXTATTR_NAMESPACE_USER, list, size);
#endif
-
+
#ifdef GF_SOLARIS_HOST_OS
return solaris_listxattr (path, list, size);
#endif
@@ -302,23 +304,23 @@ sys_llistxattr (const char *path, char *list, size_t size)
#ifdef GF_DARWIN_HOST_OS
return listxattr (path, list, size, XATTR_NOFOLLOW);
#endif
-
+
}
ssize_t
-sys_lgetxattr (const char *path, const char *name, void *value, size_t size)
+sys_lgetxattr (const char *path, const char *name, void *value, size_t size)
{
-
-#ifdef GF_LINUX_HOST_OS
+
+#if defined(GF_LINUX_HOST_OS) || defined(__NetBSD__)
return lgetxattr (path, name, value, size);
#endif
#ifdef GF_BSD_HOST_OS
- return extattr_get_link (path, EXTATTR_NAMESPACE_USER, name, value,
+ return extattr_get_link (path, EXTATTR_NAMESPACE_USER, name, value,
size);
#endif
-
+
#ifdef GF_SOLARIS_HOST_OS
return solaris_getxattr (path, name, value, size);
#endif
@@ -330,19 +332,19 @@ sys_lgetxattr (const char *path, const char *name, void *value, size_t size)
}
-ssize_t
-sys_fgetxattr (int filedes, const char *name, void *value, size_t size)
+ssize_t
+sys_fgetxattr (int filedes, const char *name, void *value, size_t size)
{
-
-#ifdef GF_LINUX_HOST_OS
+
+#if defined(GF_LINUX_HOST_OS) || defined(__NetBSD__)
return fgetxattr (filedes, name, value, size);
#endif
#ifdef GF_BSD_HOST_OS
- return extattr_get_fd (filedes, EXTATTR_NAMESPACE_USER, name,
+ return extattr_get_fd (filedes, EXTATTR_NAMESPACE_USER, name,
value, size);
#endif
-
+
#ifdef GF_SOLARIS_HOST_OS
return solaris_fgetxattr (filedes, name, value, size);
#endif
@@ -354,20 +356,46 @@ sys_fgetxattr (int filedes, const char *name, void *value, size_t size)
}
-int
-sys_fsetxattr (int filedes, const char *name, const void *value,
+int
+sys_fremovexattr (int filedes, const char *name)
+{
+
+#if defined(GF_LINUX_HOST_OS) || defined(__NetBSD__)
+ return fremovexattr (filedes, name);
+#endif
+
+ errno = ENOSYS;
+ return -1;
+#if 0 /* TODO: to port to other OSes, fill in each of below */
+#ifdef GF_BSD_HOST_OS
+ return extattr_remove_fd (filedes, EXTATTR_NAMESPACE_USER, name);
+#endif
+
+#ifdef GF_SOLARIS_HOST_OS
+ return solaris_fremovexattr (filedes, name);
+#endif
+
+#ifdef GF_DARWIN_HOST_OS
+ return fremovexattr (filedes, name, 0);
+#endif
+#endif
+}
+
+
+int
+sys_fsetxattr (int filedes, const char *name, const void *value,
size_t size, int flags)
{
-#ifdef GF_LINUX_HOST_OS
+#if defined(GF_LINUX_HOST_OS) || defined(__NetBSD__)
return fsetxattr (filedes, name, value, size, flags);
#endif
#ifdef GF_BSD_HOST_OS
- return extattr_set_fd (filedes, EXTATTR_NAMESPACE_USER, name,
+ return extattr_set_fd (filedes, EXTATTR_NAMESPACE_USER, name,
value, size);
#endif
-
+
#ifdef GF_SOLARIS_HOST_OS
return solaris_fsetxattr (filedes, name, value, size, flags);
#endif
@@ -379,11 +407,11 @@ sys_fsetxattr (int filedes, const char *name, const void *value,
}
-ssize_t
-sys_flistxattr (int filedes, char *list, size_t size)
+ssize_t
+sys_flistxattr (int filedes, char *list, size_t size)
{
-
-#ifdef GF_LINUX_HOST_OS
+
+#if defined(GF_LINUX_HOST_OS) || defined(__NetBSD__)
return flistxattr (filedes, list, size);
#endif
@@ -402,18 +430,18 @@ sys_flistxattr (int filedes, char *list, size_t size)
}
-int
+int
sys_lremovexattr (const char *path, const char *name)
{
-
-#ifdef GF_LINUX_HOST_OS
+
+#if defined(GF_LINUX_HOST_OS) || defined(__NetBSD__)
return lremovexattr (path, name);
#endif
#ifdef GF_BSD_HOST_OS
return extattr_delete_link (path, EXTATTR_NAMESPACE_USER, name);
#endif
-
+
#ifdef GF_SOLARIS_HOST_OS
return solaris_removexattr (path, name);
#endif
@@ -425,8 +453,31 @@ sys_lremovexattr (const char *path, const char *name)
}
-int
+int
sys_access (const char *pathname, int mode)
{
return access (pathname, mode);
}
+
+
+int
+sys_fallocate(int fd, int mode, off_t offset, off_t len)
+{
+#ifdef HAVE_FALLOCATE
+ return fallocate(fd, mode, offset, len);
+#endif
+
+#ifdef HAVE_POSIX_FALLOCATE
+ if (mode) {
+ /* keep size not supported */
+ errno = EOPNOTSUPP;
+ return -1;
+ }
+
+ return posix_fallocate(fd, offset, len);
+#endif
+
+ errno = ENOSYS;
+ return -1;
+}
+
diff --git a/libglusterfs/src/syscall.h b/libglusterfs/src/syscall.h
index d9cbb8fd4..f1c9f58c3 100644
--- a/libglusterfs/src/syscall.h
+++ b/libglusterfs/src/syscall.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __SYSCALL_H__
@@ -26,7 +17,7 @@ sys_lstat (const char *path, struct stat *buf);
int
sys_stat (const char *path, struct stat *buf);
-int
+int
sys_fstat (int fd, struct stat *buf);
DIR *
@@ -35,31 +26,31 @@ sys_opendir (const char *name);
struct dirent *
sys_readdir (DIR *dir);
-ssize_t
+ssize_t
sys_readlink (const char *path, char *buf, size_t bufsiz);
-int
+int
sys_closedir (DIR *dir);
int
sys_mknod (const char *pathname, mode_t mode, dev_t dev);
-int
+int
sys_mkdir (const char *pathname, mode_t mode);
-int
+int
sys_unlink (const char *pathname);
-int
+int
sys_rmdir (const char *pathname);
-int
+int
sys_symlink (const char *oldpath, const char *newpath);
int
sys_rename (const char *oldpath, const char *newpath);
-int
+int
sys_link (const char *oldpath, const char *newpath);
int
@@ -68,7 +59,7 @@ sys_chmod (const char *path, mode_t mode);
int
sys_fchmod (int fd, mode_t mode);
-int
+int
sys_chown (const char *path, uid_t owner, gid_t group);
int
@@ -77,13 +68,13 @@ sys_fchown (int fd, uid_t owner, gid_t group);
int
sys_lchown (const char *path, uid_t owner, gid_t group);
-int
+int
sys_truncate (const char *path, off_t length);
-int
+int
sys_ftruncate (int fd, off_t length);
-int
+int
sys_utimes (const char *filename, const struct timeval times[2]);
int
@@ -98,7 +89,7 @@ sys_writev (int fd, const struct iovec *iov, int iovcnt);
ssize_t
sys_read (int fd, void *buf, size_t count);
-ssize_t
+ssize_t
sys_write (int fd, const void *buf, size_t count);
off_t
@@ -107,42 +98,47 @@ sys_lseek (int fd, off_t offset, int whence);
int
sys_statvfs (const char *path, struct statvfs *buf);
-int
+int
sys_close (int fd);
-int
+int
sys_fsync (int fd);
-int
+int
sys_fdatasync (int fd);
-int
-sys_lsetxattr (const char *path, const char *name, const void *value,
- size_t size, int flags);
+int
+sys_lsetxattr (const char *path, const char *name, const void *value,
+ size_t size, int flags);
ssize_t
-sys_llistxattr (const char *path, char *list, size_t size);
+sys_llistxattr (const char *path, char *list, size_t size);
ssize_t
-sys_lgetxattr (const char *path, const char *name, void *value, size_t size);
+sys_lgetxattr (const char *path, const char *name, void *value, size_t size);
-ssize_t
-sys_fgetxattr (int filedes, const char *name, void *value, size_t size);
+ssize_t
+sys_fgetxattr (int filedes, const char *name, void *value, size_t size);
-int
-sys_fsetxattr (int filedes, const char *name, const void *value,
+int
+sys_fsetxattr (int filedes, const char *name, const void *value,
size_t size, int flags);
-ssize_t
-sys_flistxattr (int filedes, char *list, size_t size);
+ssize_t
+sys_flistxattr (int filedes, char *list, size_t size);
-int
+int
sys_lremovexattr (const char *path, const char *name);
-int
+int
+sys_fremovexattr (int filedes, const char *name);
+
+int
sys_access (const char *pathname, int mode);
-int
+int
sys_ftruncate (int fd, off_t length);
+int sys_fallocate(int fd, int mode, off_t offset, off_t len);
+
#endif /* __SYSCALL_H__ */
diff --git a/libglusterfs/src/timer.c b/libglusterfs/src/timer.c
index a84fa8cdf..a059cc212 100644
--- a/libglusterfs/src/timer.c
+++ b/libglusterfs/src/timer.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
@@ -25,45 +16,43 @@
#include "timer.h"
#include "logging.h"
#include "common-utils.h"
-
-#define TS(tv) ((((unsigned long long) tv.tv_sec) * 1000000) + (tv.tv_usec))
+#include "globals.h"
+#include "timespec.h"
gf_timer_t *
gf_timer_call_after (glusterfs_ctx_t *ctx,
- struct timeval delta,
- gf_timer_cbk_t cbk,
+ struct timespec delta,
+ gf_timer_cbk_t callbk,
void *data)
{
gf_timer_registry_t *reg = NULL;
gf_timer_t *event = NULL;
gf_timer_t *trav = NULL;
- unsigned long long at = 0L;
-
+ uint64_t at = 0;
+
if (ctx == NULL)
{
- gf_log ("timer", GF_LOG_ERROR, "invalid argument");
+ gf_log_callingfn ("timer", GF_LOG_ERROR, "invalid argument");
return NULL;
}
reg = gf_timer_registry_init (ctx);
if (!reg) {
- gf_log ("timer", GF_LOG_ERROR, "!reg");
+ gf_log_callingfn ("timer", GF_LOG_ERROR, "!reg");
return NULL;
}
- event = CALLOC (1, sizeof (*event));
+ event = GF_CALLOC (1, sizeof (*event), gf_common_mt_gf_timer_t);
if (!event) {
- gf_log ("timer", GF_LOG_CRITICAL, "Not enough memory");
return NULL;
}
- gettimeofday (&event->at, NULL);
- event->at.tv_usec = ((event->at.tv_usec + delta.tv_usec) % 1000000);
- event->at.tv_sec += ((event->at.tv_usec + delta.tv_usec) / 1000000);
- event->at.tv_sec += delta.tv_sec;
+ timespec_now (&event->at);
+ timespec_adjust_delta (&event->at, delta);
at = TS (event->at);
- event->cbk = cbk;
+ event->callbk = callbk;
event->data = data;
+ event->xl = THIS;
pthread_mutex_lock (&reg->lock);
{
trav = reg->active.prev;
@@ -87,10 +76,10 @@ gf_timer_call_stale (gf_timer_registry_t *reg,
{
if (reg == NULL || event == NULL)
{
- gf_log ("timer", GF_LOG_ERROR, "invalid argument");
+ gf_log_callingfn ("timer", GF_LOG_ERROR, "invalid argument");
return 0;
}
-
+
event->next->prev = event->prev;
event->prev->next = event->next;
event->next = &reg->stale;
@@ -106,16 +95,17 @@ gf_timer_call_cancel (glusterfs_ctx_t *ctx,
gf_timer_t *event)
{
gf_timer_registry_t *reg = NULL;
-
+
if (ctx == NULL || event == NULL)
{
- gf_log ("timer", GF_LOG_ERROR, "invalid argument");
+ gf_log_callingfn ("timer", GF_LOG_ERROR, "invalid argument");
return 0;
}
-
+
reg = gf_timer_registry_init (ctx);
if (!reg) {
gf_log ("timer", GF_LOG_ERROR, "!reg");
+ GF_FREE (event);
return 0;
}
@@ -126,7 +116,7 @@ gf_timer_call_cancel (glusterfs_ctx_t *ctx,
}
pthread_mutex_unlock (&reg->lock);
- FREE (event);
+ GF_FREE (event);
return 0;
}
@@ -134,13 +124,14 @@ void *
gf_timer_proc (void *ctx)
{
gf_timer_registry_t *reg = NULL;
-
+ const struct timespec sleepts = {.tv_sec = 1, .tv_nsec = 0, };
+
if (ctx == NULL)
{
- gf_log ("timer", GF_LOG_ERROR, "invalid argument");
+ gf_log_callingfn ("timer", GF_LOG_ERROR, "invalid argument");
return NULL;
}
-
+
reg = gf_timer_registry_init (ctx);
if (!reg) {
gf_log ("timer", GF_LOG_ERROR, "!reg");
@@ -148,14 +139,14 @@ gf_timer_proc (void *ctx)
}
while (!reg->fin) {
- unsigned long long now;
- struct timeval now_tv;
+ uint64_t now;
+ struct timespec now_ts;
gf_timer_t *event = NULL;
- gettimeofday (&now_tv, NULL);
- now = TS (now_tv);
+ timespec_now (&now_ts);
+ now = TS (now_ts);
while (1) {
- unsigned long long at;
+ uint64_t at;
char need_cbk = 0;
pthread_mutex_lock (&reg->lock);
@@ -168,13 +159,15 @@ gf_timer_proc (void *ctx)
}
}
pthread_mutex_unlock (&reg->lock);
+ if (event->xl)
+ THIS = event->xl;
if (need_cbk)
- event->cbk (event->data);
+ event->callbk (event->data);
else
break;
}
- usleep (1000000);
+ nanosleep (&sleepts, NULL);
}
pthread_mutex_lock (&reg->lock);
@@ -189,7 +182,7 @@ gf_timer_proc (void *ctx)
}
pthread_mutex_unlock (&reg->lock);
pthread_mutex_destroy (&reg->lock);
- FREE (((glusterfs_ctx_t *)ctx)->timer);
+ GF_FREE (((glusterfs_ctx_t *)ctx)->timer);
return NULL;
}
@@ -197,24 +190,28 @@ gf_timer_proc (void *ctx)
gf_timer_registry_t *
gf_timer_registry_init (glusterfs_ctx_t *ctx)
{
- if (ctx == NULL)
- {
- gf_log ("timer", GF_LOG_ERROR, "invalid argument");
+ if (ctx == NULL) {
+ gf_log_callingfn ("timer", GF_LOG_ERROR, "invalid argument");
return NULL;
}
-
+
if (!ctx->timer) {
gf_timer_registry_t *reg = NULL;
- ctx->timer = reg = CALLOC (1, sizeof (*reg));
- ERR_ABORT (reg);
+ reg = GF_CALLOC (1, sizeof (*reg),
+ gf_common_mt_gf_timer_registry_t);
+ if (!reg)
+ goto out;
+
pthread_mutex_init (&reg->lock, NULL);
reg->active.next = &reg->active;
reg->active.prev = &reg->active;
reg->stale.next = &reg->stale;
reg->stale.prev = &reg->stale;
- pthread_create (&reg->th, NULL, gf_timer_proc, ctx);
+ ctx->timer = reg;
+ gf_thread_create (&reg->th, NULL, gf_timer_proc, ctx);
}
+out:
return ctx->timer;
}
diff --git a/libglusterfs/src/timer.h b/libglusterfs/src/timer.h
index 4a8cdff21..2f963adbf 100644
--- a/libglusterfs/src/timer.h
+++ b/libglusterfs/src/timer.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _TIMER_H
@@ -26,24 +17,26 @@
#endif
#include "glusterfs.h"
+#include "xlator.h"
#include <sys/time.h>
#include <pthread.h>
typedef void (*gf_timer_cbk_t) (void *);
struct _gf_timer {
- struct _gf_timer *next, *prev;
- struct timeval at;
- gf_timer_cbk_t cbk;
- void *data;
+ struct _gf_timer *next, *prev;
+ struct timespec at;
+ gf_timer_cbk_t callbk;
+ void *data;
+ xlator_t *xl;
};
struct _gf_timer_registry {
- pthread_t th;
- char fin;
- struct _gf_timer stale;
- struct _gf_timer active;
- pthread_mutex_t lock;
+ pthread_t th;
+ char fin;
+ struct _gf_timer stale;
+ struct _gf_timer active;
+ pthread_mutex_t lock;
};
typedef struct _gf_timer gf_timer_t;
@@ -51,13 +44,13 @@ typedef struct _gf_timer_registry gf_timer_registry_t;
gf_timer_t *
gf_timer_call_after (glusterfs_ctx_t *ctx,
- struct timeval delta,
- gf_timer_cbk_t cbk,
- void *data);
+ struct timespec delta,
+ gf_timer_cbk_t cbk,
+ void *data);
int32_t
gf_timer_call_cancel (glusterfs_ctx_t *ctx,
- gf_timer_t *event);
+ gf_timer_t *event);
void *
gf_timer_proc (void *data);
diff --git a/libglusterfs/src/timespec.c b/libglusterfs/src/timespec.c
new file mode 100644
index 000000000..a0c281a1e
--- /dev/null
+++ b/libglusterfs/src/timespec.c
@@ -0,0 +1,68 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <stdio.h>
+#include <inttypes.h>
+#if defined GF_LINUX_HOST_OS || defined GF_SOLARIS_HOST_OS || defined GF_BSD_HOST_OS
+#include <time.h>
+#include <sys/time.h>
+#endif
+
+#if defined GF_DARWIN_HOST_OS
+#include <mach/mach_time.h>
+#endif
+
+#include "logging.h"
+#include "time.h"
+
+
+void tv2ts (struct timeval tv, struct timespec *ts)
+{
+ ts->tv_sec = tv.tv_sec;
+ ts->tv_nsec = tv.tv_usec * 1000;
+}
+
+void timespec_now (struct timespec *ts)
+{
+#if defined GF_LINUX_HOST_OS || defined GF_SOLARIS_HOST_OS || defined GF_BSD_HOST_OS
+
+ if (0 == clock_gettime(CLOCK_MONOTONIC, ts))
+ return;
+ else {
+ struct timeval tv;
+ if (0 == gettimeofday(&tv, NULL))
+ tv2ts(tv, ts);
+ }
+#elif defined GF_DARWIN_HOST_OS
+ mach_timebase_info_data_t tb = { 0 };
+ static double timebase = 0.0;
+ uint64_t time = 0;
+ mach_timebase_info (&tb);
+
+ timebase *= info.numer;
+ timebase /= info.denom;
+
+ time = mach_absolute_time();
+ time *= timebase;
+
+ ts->tv_sec = (time * NANO);
+ ts->tv_nsec = (time - (ts.tv_sec * GIGA));
+
+#endif /* Platform verification */
+ gf_log_callingfn ("timer", GF_LOG_DEBUG, "%"PRIu64".%09"PRIu64,
+ ts->tv_sec, ts->tv_nsec);
+}
+
+void timespec_adjust_delta (struct timespec *ts, struct timespec delta)
+{
+ ts->tv_nsec = ((ts->tv_nsec + delta.tv_nsec) % 1000000000);
+ ts->tv_sec += ((ts->tv_nsec + delta.tv_nsec) / 1000000000);
+ ts->tv_sec += delta.tv_sec;
+}
diff --git a/libglusterfs/src/timespec.h b/libglusterfs/src/timespec.h
new file mode 100644
index 000000000..490255df9
--- /dev/null
+++ b/libglusterfs/src/timespec.h
@@ -0,0 +1,24 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __INCLUDE_TIMESPEC_H__
+#define __INCLUDE_TIMESPEC_H__
+
+#include <stdint.h>
+
+#define TS(ts) ((ts.tv_sec * 1000000000LL) + ts.tv_nsec)
+#define NANO (+1.0E-9)
+#define GIGA UINT64_C(1000000000)
+
+void tv2ts (struct timeval tv, struct timespec *ts);
+void timespec_now (struct timespec *ts);
+void timespec_adjust_delta (struct timespec *ts, struct timespec delta);
+
+#endif /* __INCLUDE_TIMESPEC_H__ */
diff --git a/libglusterfs/src/transport.c b/libglusterfs/src/transport.c
deleted file mode 100644
index 7464ffd66..000000000
--- a/libglusterfs/src/transport.c
+++ /dev/null
@@ -1,441 +0,0 @@
-/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include <dlfcn.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <sys/poll.h>
-#include <fnmatch.h>
-#include <stdint.h>
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "logging.h"
-#include "transport.h"
-#include "glusterfs.h"
-#include "xlator.h"
-#include "list.h"
-
-
-transport_t *
-transport_load (dict_t *options,
- xlator_t *xl)
-{
- struct transport *trans = NULL, *return_trans = NULL;
- char *addr_family = NULL;
- char *name = NULL;
- void *handle = NULL;
- char *type = NULL;
- char str[] = "ERROR";
- int32_t ret = -1;
- int8_t is_tcp = 0, is_unix = 0, is_ibsdp = 0;
- volume_opt_list_t *vol_opt = NULL;
-
- GF_VALIDATE_OR_GOTO("transport", options, fail);
- GF_VALIDATE_OR_GOTO("transport", xl, fail);
-
- trans = CALLOC (1, sizeof (struct transport));
- GF_VALIDATE_OR_GOTO("transport", trans, fail);
-
- trans->xl = xl;
- type = str;
-
- /* Backward compatibility */
- ret = dict_get_str (options, "transport-type", &type);
- if (ret < 0) {
- ret = dict_set_str (options, "transport-type", "socket");
- if (ret < 0)
- gf_log ("dict", GF_LOG_DEBUG,
- "setting transport-type failed");
- ret = dict_get_str (options, "transport.address-family",
- &addr_family);
- if (ret < 0) {
- ret = dict_get_str (options, "address-family",
- &addr_family);
- }
-
- if (ret < 0) {
- ret = dict_set_str (options,
- "transport.address-family",
- "inet");
- if (ret < 0) {
- gf_log ("dict", GF_LOG_ERROR,
- "setting address-family failed");
- }
- }
-
- gf_log ("transport", GF_LOG_WARNING,
- "missing 'option transport-type'. defaulting to "
- "\"socket\" (%s)", addr_family?addr_family:"inet");
- } else {
- {
- /* Backword compatibility to handle * /client,
- * * /server.
- */
- char *tmp = strchr (type, '/');
- if (tmp)
- *tmp = '\0';
- }
-
- is_tcp = strcmp (type, "tcp");
- is_unix = strcmp (type, "unix");
- is_ibsdp = strcmp (type, "ib-sdp");
- if ((is_tcp == 0) ||
- (is_unix == 0) ||
- (is_ibsdp == 0)) {
- if (is_tcp == 0)
- ret = dict_set_str (options,
- "transport.address-family",
- "inet");
- if (is_unix == 0)
- ret = dict_set_str (options,
- "transport.address-family",
- "unix");
- if (is_ibsdp == 0)
- ret = dict_set_str (options,
- "transport.address-family",
- "inet-sdp");
-
- if (ret < 0)
- gf_log ("dict", GF_LOG_DEBUG,
- "setting address-family failed");
-
- ret = dict_set_str (options,
- "transport-type", "socket");
- if (ret < 0)
- gf_log ("dict", GF_LOG_DEBUG,
- "setting transport-type failed");
- }
- }
-
- ret = dict_get_str (options, "transport-type", &type);
- if (ret < 0) {
- FREE (trans);
- gf_log ("transport", GF_LOG_ERROR,
- "'option transport-type <xx>' missing in volume '%s'",
- xl->name);
- goto fail;
- }
-
- ret = asprintf (&name, "%s/%s.so", TRANSPORTDIR, type);
- if (-1 == ret) {
- gf_log ("transport", GF_LOG_ERROR, "asprintf failed");
- goto fail;
- }
- gf_log ("transport", GF_LOG_DEBUG,
- "attempt to load file %s", name);
-
- handle = dlopen (name, RTLD_NOW|RTLD_GLOBAL);
- if (handle == NULL) {
- gf_log ("transport", GF_LOG_ERROR, "%s", dlerror ());
- gf_log ("transport", GF_LOG_ERROR,
- "volume '%s': transport-type '%s' is not valid or "
- "not found on this machine",
- xl->name, type);
- FREE (name);
- FREE (trans);
- goto fail;
- }
- FREE (name);
-
- trans->ops = dlsym (handle, "tops");
- if (trans->ops == NULL) {
- gf_log ("transport", GF_LOG_ERROR,
- "dlsym (transport_ops) on %s", dlerror ());
- FREE (trans);
- goto fail;
- }
-
- trans->init = dlsym (handle, "init");
- if (trans->init == NULL) {
- gf_log ("transport", GF_LOG_ERROR,
- "dlsym (gf_transport_init) on %s", dlerror ());
- FREE (trans);
- goto fail;
- }
-
- trans->fini = dlsym (handle, "fini");
- if (trans->fini == NULL) {
- gf_log ("transport", GF_LOG_ERROR,
- "dlsym (gf_transport_fini) on %s", dlerror ());
- FREE (trans);
- goto fail;
- }
-
- vol_opt = CALLOC (1, sizeof (volume_opt_list_t));
- vol_opt->given_opt = dlsym (handle, "options");
- if (vol_opt->given_opt == NULL) {
- gf_log ("transport", GF_LOG_DEBUG,
- "volume option validation not specified");
- } else {
- list_add_tail (&vol_opt->list, &xl->volume_options);
- if (-1 ==
- validate_xlator_volume_options (xl,
- vol_opt->given_opt)) {
- gf_log ("transport", GF_LOG_ERROR,
- "volume option validation failed");
- FREE (trans);
- goto fail;
- }
- }
-
- ret = trans->init (trans);
- if (ret != 0) {
- gf_log ("transport", GF_LOG_ERROR,
- "'%s' initialization failed", type);
- FREE (trans);
- goto fail;
- }
-
- pthread_mutex_init (&trans->lock, NULL);
- return_trans = trans;
-fail:
- return return_trans;
-}
-
-
-int32_t
-transport_submit (transport_t *this, char *buf, int32_t len,
- struct iovec *vector, int count,
- struct iobref *iobref)
-{
- int32_t ret = -1;
- transport_t *peer_trans = NULL;
- struct iobuf *iobuf = NULL;
- struct transport_msg *msg = NULL;
-
- if (this->peer_trans) {
- peer_trans = this->peer_trans;
-
- msg = CALLOC (1, sizeof (*msg));
- if (!msg) {
- return -ENOMEM;
- }
-
- msg->hdr = buf;
- msg->hdrlen = len;
-
- if (vector) {
- iobuf = iobuf_get (this->xl->ctx->iobuf_pool);
- if (!iobuf) {
- FREE (msg->hdr);
- FREE (msg);
- return -ENOMEM;
- }
-
- iov_unload (iobuf->ptr, vector, count);
- msg->iobuf = iobuf;
- }
-
- pthread_mutex_lock (&peer_trans->handover.mutex);
- {
- list_add_tail (&msg->list, &peer_trans->handover.msgs);
- pthread_cond_broadcast (&peer_trans->handover.cond);
- }
- pthread_mutex_unlock (&peer_trans->handover.mutex);
-
- return 0;
- }
-
- GF_VALIDATE_OR_GOTO("transport", this, fail);
- GF_VALIDATE_OR_GOTO("transport", this->ops, fail);
-
- ret = this->ops->submit (this, buf, len, vector, count, iobref);
-fail:
- return ret;
-}
-
-
-int32_t
-transport_connect (transport_t *this)
-{
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO("transport", this, fail);
-
- ret = this->ops->connect (this);
-fail:
- return ret;
-}
-
-
-int32_t
-transport_listen (transport_t *this)
-{
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO("transport", this, fail);
-
- ret = this->ops->listen (this);
-fail:
- return ret;
-}
-
-
-int32_t
-transport_disconnect (transport_t *this)
-{
- int32_t ret = -1;
-
- GF_VALIDATE_OR_GOTO("transport", this, fail);
-
- ret = this->ops->disconnect (this);
-fail:
- return ret;
-}
-
-
-int32_t
-transport_destroy (transport_t *this)
-{
- int32_t ret = -1;
-
- GF_VALIDATE_OR_GOTO("transport", this, fail);
-
- if (this->fini)
- this->fini (this);
-
- pthread_mutex_destroy (&this->lock);
- FREE (this);
-fail:
- return ret;
-}
-
-
-transport_t *
-transport_ref (transport_t *this)
-{
- transport_t *return_this = NULL;
-
- GF_VALIDATE_OR_GOTO("transport", this, fail);
-
- pthread_mutex_lock (&this->lock);
- {
- this->refcount ++;
- }
- pthread_mutex_unlock (&this->lock);
-
- return_this = this;
-fail:
- return return_this;
-}
-
-
-int32_t
-transport_receive (transport_t *this, char **hdr_p, size_t *hdrlen_p,
- struct iobuf **iobuf_p)
-{
- int32_t ret = -1;
-
- GF_VALIDATE_OR_GOTO("transport", this, fail);
-
- if (this->peer_trans) {
- *hdr_p = this->handover.msg->hdr;
- *hdrlen_p = this->handover.msg->hdrlen;
- *iobuf_p = this->handover.msg->iobuf;
-
- return 0;
- }
-
- ret = this->ops->receive (this, hdr_p, hdrlen_p, iobuf_p);
-fail:
- return ret;
-}
-
-
-int32_t
-transport_unref (transport_t *this)
-{
- int32_t refcount = 0;
- int32_t ret = -1;
-
- GF_VALIDATE_OR_GOTO("transport", this, fail);
-
- pthread_mutex_lock (&this->lock);
- {
- refcount = --this->refcount;
- }
- pthread_mutex_unlock (&this->lock);
-
- if (refcount == 0) {
- this->xl->notify (this->xl, GF_EVENT_TRANSPORT_CLEANUP, this);
- transport_destroy (this);
- }
-
- ret = 0;
-fail:
- return ret;
-}
-
-
-void *
-transport_peerproc (void *trans_data)
-{
- transport_t *trans = NULL;
- struct transport_msg *msg = NULL;
-
- trans = trans_data;
-
- while (1) {
- pthread_mutex_lock (&trans->handover.mutex);
- {
- while (list_empty (&trans->handover.msgs))
- pthread_cond_wait (&trans->handover.cond,
- &trans->handover.mutex);
-
- msg = list_entry (trans->handover.msgs.next,
- struct transport_msg, list);
-
- list_del_init (&msg->list);
- }
- pthread_mutex_unlock (&trans->handover.mutex);
-
- trans->handover.msg = msg;
-
- trans->xl->notify (trans->xl, GF_EVENT_POLLIN, trans);
-
- FREE (msg);
- }
-}
-
-
-int
-transport_setpeer (transport_t *trans, transport_t *peer_trans)
-{
- trans->peer_trans = peer_trans;
-
- INIT_LIST_HEAD (&trans->handover.msgs);
- pthread_cond_init (&trans->handover.cond, NULL);
- pthread_mutex_init (&trans->handover.mutex, NULL);
- pthread_create (&trans->handover.thread, NULL,
- transport_peerproc, trans);
-
- peer_trans->peer_trans = trans;
-
- INIT_LIST_HEAD (&peer_trans->handover.msgs);
- pthread_cond_init (&peer_trans->handover.cond, NULL);
- pthread_mutex_init (&peer_trans->handover.mutex, NULL);
- pthread_create (&peer_trans->handover.thread, NULL,
- transport_peerproc, peer_trans);
-
- return 0;
-}
diff --git a/libglusterfs/src/transport.h b/libglusterfs/src/transport.h
deleted file mode 100644
index 2d85c76cc..000000000
--- a/libglusterfs/src/transport.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef __TRANSPORT_H__
-#define __TRANSPORT_H__
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <inttypes.h>
-
-struct transport_ops;
-typedef struct transport transport_t;
-
-#include "xlator.h"
-#include "dict.h"
-#include "compat.h"
-
-typedef struct peer_info {
- struct sockaddr_storage sockaddr;
- socklen_t sockaddr_len;
- char identifier[UNIX_PATH_MAX];
-}peer_info_t;
-
-struct transport_msg {
- struct list_head list;
- char *hdr;
- int hdrlen;
- struct iobuf *iobuf;
-};
-
-struct transport {
- struct transport_ops *ops;
- void *private;
- void *xl_private;
- pthread_mutex_t lock;
- int32_t refcount;
-
- xlator_t *xl;
- void *dnscache;
- data_t *buf;
- int32_t (*init) (transport_t *this);
- void (*fini) (transport_t *this);
- /* int (*notify) (transport_t *this, int event, void *data); */
- peer_info_t peerinfo;
- peer_info_t myinfo;
-
- transport_t *peer_trans;
- struct {
- pthread_mutex_t mutex;
- pthread_cond_t cond;
- pthread_t thread;
- struct list_head msgs;
- struct transport_msg *msg;
- } handover;
-
-};
-
-struct transport_ops {
- int32_t (*receive) (transport_t *this, char **hdr_p, size_t *hdrlen_p,
- struct iobuf **iobuf_p);
- int32_t (*submit) (transport_t *this, char *buf, int len,
- struct iovec *vector, int count,
- struct iobref *iobref);
- int32_t (*connect) (transport_t *this);
- int32_t (*listen) (transport_t *this);
- int32_t (*disconnect) (transport_t *this);
-};
-
-
-int32_t transport_listen (transport_t *this);
-int32_t transport_connect (transport_t *this);
-int32_t transport_disconnect (transport_t *this);
-int32_t transport_notify (transport_t *this, int event);
-int32_t transport_submit (transport_t *this, char *buf, int len,
- struct iovec *vector, int count,
- struct iobref *iobref);
-int32_t transport_receive (transport_t *this, char **hdr_p, size_t *hdrlen_p,
- struct iobuf **iobuf_p);
-int32_t transport_destroy (transport_t *this);
-
-transport_t *transport_load (dict_t *options, xlator_t *xl);
-transport_t *transport_ref (transport_t *trans);
-int32_t transport_unref (transport_t *trans);
-
-int transport_setpeer (transport_t *trans, transport_t *trans_peer);
-
-#endif /* __TRANSPORT_H__ */
diff --git a/libglusterfs/src/trie.c b/libglusterfs/src/trie.c
new file mode 100644
index 000000000..f96bbebf6
--- /dev/null
+++ b/libglusterfs/src/trie.c
@@ -0,0 +1,387 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <ctype.h>
+
+#include "common-utils.h"
+#include "trie.h"
+
+#define DISTANCE_EDIT 1
+#define DISTANCE_INS 1
+#define DISTANCE_DEL 1
+
+
+struct trienode {
+ char id;
+ char eow;
+ int depth;
+ void *data;
+ struct trie *trie;
+ struct trienode *parent;
+ struct trienode *subnodes[255];
+};
+
+struct trie {
+ struct trienode root;
+ int nodecnt;
+ size_t len;
+};
+
+
+trie_t *
+trie_new ()
+{
+ trie_t *trie = NULL;
+
+ trie = GF_CALLOC (1, sizeof (*trie), gf_common_mt_trie_trie);
+ if (!trie)
+ return NULL;
+
+ trie->root.trie = trie;
+
+ return trie;
+}
+
+
+static trienode_t *
+trie_subnode (trienode_t *node, int id)
+{
+ trienode_t *subnode = NULL;
+
+ subnode = node->subnodes[id];
+ if (!subnode) {
+ subnode = GF_CALLOC (1, sizeof (*subnode),
+ gf_common_mt_trie_node);
+ if (!subnode)
+ return NULL;
+
+ subnode->id = id;
+ subnode->depth = node->depth + 1;
+ node->subnodes[id] = subnode;
+ subnode->parent = node;
+ subnode->trie = node->trie;
+ node->trie->nodecnt++;
+ }
+
+ return subnode;
+}
+
+
+int
+trie_add (trie_t *trie, const char *dword)
+{
+ trienode_t *node = NULL;
+ int i = 0;
+ char id = 0;
+ trienode_t *subnode = NULL;
+
+ node = &trie->root;
+
+ for (i = 0; i < strlen (dword); i++) {
+ id = dword[i];
+
+ subnode = trie_subnode (node, id);
+ if (!subnode)
+ return -1;
+ node = subnode;
+ }
+
+ node->eow = 1;
+
+ return 0;
+}
+
+static void
+trienode_free (trienode_t *node)
+{
+ trienode_t *trav = NULL;
+ int i = 0;
+
+ for (i = 0; i < 255; i++) {
+ trav = node->subnodes[i];
+
+ if (trav)
+ trienode_free (trav);
+ }
+
+ GF_FREE (node->data);
+ GF_FREE (node);
+}
+
+
+void
+trie_destroy (trie_t *trie)
+{
+ trienode_free ((trienode_t *)trie);
+}
+
+
+void
+trie_destroy_bynode (trienode_t *node)
+{
+ trie_destroy (node->trie);
+}
+
+
+static int
+trienode_walk (trienode_t *node, int (*fn)(trienode_t *node, void *data),
+ void *data, int eowonly)
+{
+ trienode_t *trav = NULL;
+ int i = 0;
+ int cret = 0;
+ int ret = 0;
+
+ if (!eowonly || node->eow)
+ ret = fn (node, data);
+
+ if (ret)
+ goto out;
+
+ for (i = 0; i < 255; i++) {
+ trav = node->subnodes[i];
+ if (!trav)
+ continue;
+
+ cret = trienode_walk (trav, fn, data, eowonly);
+ if (cret < 0) {
+ ret = cret;
+ goto out;
+ }
+ ret += cret;
+ }
+
+out:
+ return ret;
+}
+
+
+static int
+trie_walk (trie_t *trie, int (*fn)(trienode_t *node, void *data),
+ void *data, int eowonly)
+{
+ return trienode_walk (&trie->root, fn, data, eowonly);
+}
+
+
+static void
+print_node (trienode_t *node, char **buf)
+{
+ if (!node->parent)
+ return;
+
+ if (node->parent) {
+ print_node (node->parent, buf);
+ *(*buf)++ = node->id;
+ }
+}
+
+
+int
+trienode_get_word (trienode_t *node, char **bufp)
+{
+ char *buf = NULL;
+
+ buf = GF_CALLOC (1, node->depth + 1, gf_common_mt_trie_buf);
+ if (!buf)
+ return -1;
+ *bufp = buf;
+
+ print_node (node, &buf);
+
+ return 0;
+}
+
+
+static int
+calc_dist (trienode_t *node, void *data)
+{
+ const char *word = NULL;
+ int i = 0;
+ int *row = NULL;
+ int *uprow = NULL;
+ int distu = 0;
+ int distl = 0;
+ int distul = 0;
+
+ word = data;
+
+ node->data = GF_CALLOC (node->trie->len, sizeof (int),
+ gf_common_mt_trie_data);
+ if (!node->data)
+ return -1;
+ row = node->data;
+
+ if (!node->parent) {
+ for (i = 0; i < node->trie->len; i++)
+ row[i] = i+1;
+
+ return 0;
+ }
+
+ uprow = node->parent->data;
+
+ distu = node->depth; /* up node */
+ distul = node->parent->depth; /* up-left node */
+
+ for (i = 0; i < node->trie->len; i++) {
+ distl = uprow[i]; /* left node */
+
+ if (word[i] == node->id)
+ row[i] = distul;
+ else
+ row[i] = min ((distul + DISTANCE_EDIT),
+ min ((distu + DISTANCE_DEL),
+ (distl + DISTANCE_INS)));
+
+ distu = row[i];
+ distul = distl;
+ }
+
+ return 0;
+}
+
+
+int
+trienode_get_dist (trienode_t *node)
+{
+ int *row = NULL;
+
+ row = node->data;
+
+ return row[node->trie->len - 1];
+}
+
+
+struct trienodevec_w {
+ struct trienodevec *vec;
+ const char *word;
+};
+
+
+static void
+trienodevec_clear (struct trienodevec *nodevec)
+{
+ memset(nodevec->nodes, 0, sizeof (*nodevec->nodes) * nodevec->cnt);
+}
+
+
+static int
+collect_closest (trienode_t *node, void *data)
+{
+ struct trienodevec_w *nodevec_w = NULL;
+ struct trienodevec *nodevec = NULL;
+ int dist = 0;
+ int i = 0;
+
+ nodevec_w = data;
+ nodevec = nodevec_w->vec;
+
+ if (calc_dist (node, (void *)nodevec_w->word))
+ return -1;
+
+ if (!node->eow || !nodevec->cnt)
+ return 0;
+
+ dist = trienode_get_dist (node);
+
+ /*
+ * I thought that when descending further after some dictionary word dw,
+ * if we see that child's distance is bigger than it was for dw, then we
+ * can prune this branch, as it can contain only worse nodes.
+ *
+ * This conjecture fails, see eg:
+ *
+ * d("AB", "B") = 1;
+ * d("AB", "BA") = 2;
+ * d("AB", "BAB") = 1;
+ *
+ * -- if both "B" and "BAB" are in dict., then pruning at "BA" * would
+ * miss "BAB".
+ *
+ * (example courtesy of Richard Bann <richardbann at gmail.com>)
+
+ if (node->parent->eow && dist > trienode_get_dist (node->parent))
+ return 1;
+
+ */
+
+ if (nodevec->nodes[0] &&
+ dist < trienode_get_dist (nodevec->nodes[0])) {
+ /* improving over the findings so far */
+ trienodevec_clear (nodevec);
+ nodevec->nodes[0] = node;
+ } else if (!nodevec->nodes[0] ||
+ dist == trienode_get_dist (nodevec->nodes[0])) {
+ /* as good as the best so far, add if there is free space */
+ for (i = 0; i < nodevec->cnt; i++) {
+ if (!nodevec->nodes[i]) {
+ nodevec->nodes[i] = node;
+ break;
+ }
+ }
+ }
+
+ return 0;
+}
+
+
+int
+trie_measure (trie_t *trie, const char *word, trienode_t **nodes,
+ int nodecnt)
+{
+ struct trienodevec nodevec = {0,};
+
+ nodevec.nodes = nodes;
+ nodevec.cnt = nodecnt;
+
+ return trie_measure_vec (trie, word, &nodevec);
+}
+
+
+int
+trie_measure_vec (trie_t *trie, const char *word, struct trienodevec *nodevec)
+{
+ struct trienodevec_w nodevec_w = {0,};
+ int ret = 0;
+
+ trie->len = strlen (word);
+
+ trienodevec_clear (nodevec);
+ nodevec_w.vec = nodevec;
+ nodevec_w.word = word;
+
+ ret = trie_walk (trie, collect_closest, &nodevec_w, 0);
+ if (ret > 0)
+ ret = 0;
+
+ return ret;
+}
+
+
+static int
+trienode_reset (trienode_t *node, void *data)
+{
+ GF_FREE (node->data);
+
+ return 0;
+}
+
+
+void
+trie_reset_search (trie_t *trie)
+{
+ trie->len = 0;
+
+ trie_walk (trie, trienode_reset, NULL, 0);
+}
diff --git a/libglusterfs/src/trie.h b/libglusterfs/src/trie.h
new file mode 100644
index 000000000..0356e6621
--- /dev/null
+++ b/libglusterfs/src/trie.h
@@ -0,0 +1,51 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _TRIE_H_
+#define _TRIE_H_
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+struct trienode;
+typedef struct trienode trienode_t;
+
+struct trie;
+typedef struct trie trie_t;
+
+struct trienodevec {
+ trienode_t **nodes;
+ unsigned cnt;
+};
+
+
+trie_t *trie_new ();
+
+int trie_add (trie_t *trie, const char *word);
+
+void trie_destroy (trie_t *trie);
+
+void trie_destroy_bynode (trienode_t *node);
+
+int trie_measure (trie_t *trie, const char *word, trienode_t **nodes,
+ int nodecnt);
+
+int trie_measure_vec (trie_t *trie, const char *word,
+ struct trienodevec *nodevec);
+
+void trie_reset_search (trie_t *trie);
+
+int trienode_get_dist (trienode_t *node);
+
+int trienode_get_word (trienode_t *node, char **buf);
+
+#endif
diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c
index def1ad1df..a277c58a8 100644
--- a/libglusterfs/src/xlator.c
+++ b/libglusterfs/src/xlator.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
@@ -28,893 +19,438 @@
#include <fnmatch.h>
#include "defaults.h"
-
#define SET_DEFAULT_FOP(fn) do { \
- if (!xl->fops->fn) \
- xl->fops->fn = default_##fn; \
- } while (0)
-
-#define SET_DEFAULT_MOP(fn) do { \
- if (!xl->mops->fn) \
- xl->mops->fn = default_##fn; \
- } while (0)
+ if (!xl->fops->fn) \
+ xl->fops->fn = default_##fn; \
+ } while (0)
#define SET_DEFAULT_CBK(fn) do { \
- if (!xl->cbks->fn) \
- xl->cbks->fn = default_##fn; \
- } while (0)
-
+ if (!xl->cbks->fn) \
+ xl->cbks->fn = default_##fn; \
+ } while (0)
-#define GF_OPTION_LIST_EMPTY(_opt) (_opt->value[0] == NULL)
static void
fill_defaults (xlator_t *xl)
{
- if (xl == NULL) {
- gf_log ("xlator", GF_LOG_DEBUG, "invalid argument");
- return;
- }
+ if (xl == NULL) {
+ gf_log_callingfn ("xlator", GF_LOG_WARNING, "invalid argument");
+ return;
+ }
- SET_DEFAULT_FOP (create);
- SET_DEFAULT_FOP (open);
- SET_DEFAULT_FOP (stat);
- SET_DEFAULT_FOP (readlink);
- SET_DEFAULT_FOP (mknod);
- SET_DEFAULT_FOP (mkdir);
- SET_DEFAULT_FOP (unlink);
- SET_DEFAULT_FOP (rmdir);
- SET_DEFAULT_FOP (symlink);
- SET_DEFAULT_FOP (rename);
- SET_DEFAULT_FOP (link);
- SET_DEFAULT_FOP (chmod);
- SET_DEFAULT_FOP (chown);
- SET_DEFAULT_FOP (truncate);
- SET_DEFAULT_FOP (utimens);
- SET_DEFAULT_FOP (readv);
- SET_DEFAULT_FOP (writev);
- SET_DEFAULT_FOP (statfs);
- SET_DEFAULT_FOP (flush);
- SET_DEFAULT_FOP (fsync);
- SET_DEFAULT_FOP (setxattr);
- SET_DEFAULT_FOP (getxattr);
- SET_DEFAULT_FOP (fsetxattr);
- SET_DEFAULT_FOP (fgetxattr);
- SET_DEFAULT_FOP (removexattr);
- SET_DEFAULT_FOP (opendir);
- SET_DEFAULT_FOP (readdir);
- SET_DEFAULT_FOP (fsyncdir);
- SET_DEFAULT_FOP (access);
- SET_DEFAULT_FOP (ftruncate);
- SET_DEFAULT_FOP (fstat);
- SET_DEFAULT_FOP (lk);
- SET_DEFAULT_FOP (inodelk);
- SET_DEFAULT_FOP (finodelk);
- SET_DEFAULT_FOP (entrylk);
- SET_DEFAULT_FOP (fentrylk);
- SET_DEFAULT_FOP (lookup);
- SET_DEFAULT_FOP (fchown);
- SET_DEFAULT_FOP (fchmod);
- SET_DEFAULT_FOP (setdents);
- SET_DEFAULT_FOP (getdents);
- SET_DEFAULT_FOP (checksum);
- SET_DEFAULT_FOP (xattrop);
- SET_DEFAULT_FOP (fxattrop);
- SET_DEFAULT_FOP (lock_notify);
- SET_DEFAULT_FOP (lock_fnotify);
-
- SET_DEFAULT_MOP (log);
- SET_DEFAULT_MOP (stats);
-
- SET_DEFAULT_CBK (release);
- SET_DEFAULT_CBK (releasedir);
- SET_DEFAULT_CBK (forget);
-
- if (!xl->notify)
- xl->notify = default_notify;
-
- return;
+ SET_DEFAULT_FOP (create);
+ SET_DEFAULT_FOP (open);
+ SET_DEFAULT_FOP (stat);
+ SET_DEFAULT_FOP (readlink);
+ SET_DEFAULT_FOP (mknod);
+ SET_DEFAULT_FOP (mkdir);
+ SET_DEFAULT_FOP (unlink);
+ SET_DEFAULT_FOP (rmdir);
+ SET_DEFAULT_FOP (symlink);
+ SET_DEFAULT_FOP (rename);
+ SET_DEFAULT_FOP (link);
+ SET_DEFAULT_FOP (truncate);
+ SET_DEFAULT_FOP (readv);
+ SET_DEFAULT_FOP (writev);
+ SET_DEFAULT_FOP (statfs);
+ SET_DEFAULT_FOP (flush);
+ SET_DEFAULT_FOP (fsync);
+ SET_DEFAULT_FOP (setxattr);
+ SET_DEFAULT_FOP (getxattr);
+ SET_DEFAULT_FOP (fsetxattr);
+ SET_DEFAULT_FOP (fgetxattr);
+ SET_DEFAULT_FOP (removexattr);
+ SET_DEFAULT_FOP (fremovexattr);
+ SET_DEFAULT_FOP (opendir);
+ SET_DEFAULT_FOP (readdir);
+ SET_DEFAULT_FOP (readdirp);
+ SET_DEFAULT_FOP (fsyncdir);
+ SET_DEFAULT_FOP (access);
+ SET_DEFAULT_FOP (ftruncate);
+ SET_DEFAULT_FOP (fstat);
+ SET_DEFAULT_FOP (lk);
+ SET_DEFAULT_FOP (inodelk);
+ SET_DEFAULT_FOP (finodelk);
+ SET_DEFAULT_FOP (entrylk);
+ SET_DEFAULT_FOP (fentrylk);
+ SET_DEFAULT_FOP (lookup);
+ SET_DEFAULT_FOP (rchecksum);
+ SET_DEFAULT_FOP (xattrop);
+ SET_DEFAULT_FOP (fxattrop);
+ SET_DEFAULT_FOP (setattr);
+ SET_DEFAULT_FOP (fsetattr);
+ SET_DEFAULT_FOP (fallocate);
+ SET_DEFAULT_FOP (discard);
+ SET_DEFAULT_FOP (zerofill);
+
+ SET_DEFAULT_FOP (getspec);
+
+ SET_DEFAULT_CBK (release);
+ SET_DEFAULT_CBK (releasedir);
+ SET_DEFAULT_CBK (forget);
+
+ if (!xl->notify)
+ xl->notify = default_notify;
+
+ if (!xl->mem_acct_init)
+ xl->mem_acct_init = default_mem_acct_init;
+
+ return;
}
-/* RFC 1123 & 952 */
-static char
-valid_host_name (char *address, int length)
+
+int
+xlator_set_type_virtual (xlator_t *xl, const char *type)
{
- int i = 0;
- char ret = 1;
+ GF_VALIDATE_OR_GOTO ("xlator", xl, out);
+ GF_VALIDATE_OR_GOTO ("xlator", type, out);
- if ((length > 75) || (length == 1)) {
- ret = 0;
- goto out;
- }
+ xl->type = gf_strdup (type);
- if (!isalnum (address[length - 1])) {
- ret = 0;
- goto out;
- }
-
- for (i = 0; i < length; i++) {
- if (!isalnum (address[i]) && (address[i] != '.')
- && (address[i] != '-')) {
- ret = 0;
- goto out;
- }
- }
+ if (xl->type)
+ return 0;
out:
- return ret;
+ return -1;
}
-static char
-valid_ipv4_address (char *address, int length)
+
+int
+xlator_volopt_dynload (char *xlator_type, void **dl_handle,
+ volume_opt_list_t *opt_list)
{
- int octets = 0;
- int value = 0;
- char *tmp = NULL, *ptr = NULL, *prev = NULL, *endptr = NULL;
- char ret = 1;
-
- prev = tmp = strdup (address);
- prev = strtok_r (tmp, ".", &ptr);
-
- while (prev != NULL)
- {
- octets++;
- value = strtol (prev, &endptr, 10);
- if ((value > 255) || (value < 0) || (endptr != NULL)) {
- ret = 0;
- goto out;
- }
-
- prev = strtok_r (NULL, ".", &ptr);
- }
+ int ret = -1;
+ char *name = NULL;
+ void *handle = NULL;
- if (octets != 4) {
- ret = 0;
+ GF_VALIDATE_OR_GOTO ("xlator", xlator_type, out);
+
+ ret = gf_asprintf (&name, "%s/%s.so", XLATORDIR, xlator_type);
+ if (-1 == ret) {
+ gf_log ("xlator", GF_LOG_ERROR, "asprintf failed");
+ goto out;
}
-out:
- FREE (tmp);
- return ret;
-}
+ ret = -1;
-static char
-valid_ipv6_address (char *address, int length)
-{
- int hex_numbers = 0;
- int value = 0;
- char *tmp = NULL, *ptr = NULL, *prev = NULL, *endptr = NULL;
- char ret = 1;
-
- tmp = strdup (address);
- prev = strtok_r (tmp, ":", &ptr);
-
- while (prev != NULL)
- {
- hex_numbers++;
- value = strtol (prev, &endptr, 16);
- if ((value > 0xffff) || (value < 0) || (endptr != NULL)) {
- ret = 0;
- goto out;
- }
-
- prev = strtok_r (NULL, ":", &ptr);
+ gf_log ("xlator", GF_LOG_TRACE, "attempt to load file %s", name);
+
+ handle = dlopen (name, RTLD_NOW|RTLD_GLOBAL);
+ if (!handle) {
+ gf_log ("xlator", GF_LOG_WARNING, "%s", dlerror ());
+ goto out;
}
-
- if (hex_numbers > 8) {
- ret = 0;
+
+ if (!(opt_list->given_opt = dlsym (handle, "options"))) {
+ dlerror ();
+ gf_log ("xlator", GF_LOG_ERROR,
+ "Failed to load xlator opt table");
+ goto out;
}
-out:
- FREE (tmp);
+ *dl_handle = handle;
+
+ ret = 0;
+ out:
+ GF_FREE (name);
+
+ gf_log ("xlator", GF_LOG_DEBUG, "Returning %d", ret);
return ret;
+
}
-static char
-valid_internet_address (char *address)
+
+int
+xlator_dynload (xlator_t *xl)
{
- char ret = 0;
- int length = 0;
+ int ret = -1;
+ char *name = NULL;
+ void *handle = NULL;
+ volume_opt_list_t *vol_opt = NULL;
+ class_methods_t *vtbl = NULL;
+
+ GF_VALIDATE_OR_GOTO ("xlator", xl, out);
- if (address == NULL) {
+ INIT_LIST_HEAD (&xl->volume_options);
+
+ ret = gf_asprintf (&name, "%s/%s.so", XLATORDIR, xl->type);
+ if (-1 == ret) {
+ gf_log ("xlator", GF_LOG_ERROR, "asprintf failed");
goto out;
}
- length = strlen (address);
- if (length == 0) {
+ ret = -1;
+
+ gf_log ("xlator", GF_LOG_TRACE, "attempt to load file %s", name);
+
+ handle = dlopen (name, RTLD_NOW|RTLD_GLOBAL);
+ if (!handle) {
+ gf_log ("xlator", GF_LOG_WARNING, "%s", dlerror ());
goto out;
}
+ xl->dlhandle = handle;
- if (valid_ipv4_address (address, length)
- || valid_ipv6_address (address, length)
- || valid_host_name (address, length)) {
- ret = 1;
+ if (!(xl->fops = dlsym (handle, "fops"))) {
+ gf_log ("xlator", GF_LOG_WARNING, "dlsym(fops) on %s",
+ dlerror ());
+ goto out;
}
-out:
- return ret;
-}
+ if (!(xl->cbks = dlsym (handle, "cbks"))) {
+ gf_log ("xlator", GF_LOG_WARNING, "dlsym(cbks) on %s",
+ dlerror ());
+ goto out;
+ }
-int
-_volume_option_value_validate (xlator_t *xl,
- data_pair_t *pair,
- volume_option_t *opt)
-{
- int i = 0;
- int ret = -1;
- uint64_t input_size = 0;
- long long inputll = 0;
-
- /* Key is valid, validate the option */
- switch (opt->type) {
- case GF_OPTION_TYPE_PATH:
- {
- if (strstr (pair->value->data, "../")) {
- gf_log (xl->name, GF_LOG_ERROR,
- "invalid path given '%s'",
- pair->value->data);
- ret = -1;
+ /*
+ * If class_methods exists, its contents override any definitions of
+ * init or fini for that translator. Otherwise, we fall back to the
+ * older method of looking for init and fini directly.
+ */
+ vtbl = dlsym(handle,"class_methods");
+ if (vtbl) {
+ xl->init = vtbl->init;
+ xl->fini = vtbl->fini;
+ xl->reconfigure = vtbl->reconfigure;
+ xl->notify = vtbl->notify;
+ }
+ else {
+ if (!(*VOID(&xl->init) = dlsym (handle, "init"))) {
+ gf_log ("xlator", GF_LOG_WARNING, "dlsym(init) on %s",
+ dlerror ());
goto out;
}
- /* Make sure the given path is valid */
- if (pair->value->data[0] != '/') {
- gf_log (xl->name, GF_LOG_WARNING,
- "option %s %s: '%s' is not an "
- "absolute path name",
- pair->key, pair->value->data,
- pair->value->data);
- }
- ret = 0;
- }
- break;
- case GF_OPTION_TYPE_INT:
- {
- /* Check the range */
- if (gf_string2longlong (pair->value->data,
- &inputll) != 0) {
- gf_log (xl->name, GF_LOG_ERROR,
- "invalid number format \"%s\" in "
- "\"option %s\"",
- pair->value->data, pair->key);
- goto out;
- }
-
- if ((opt->min == 0) && (opt->max == 0)) {
- gf_log (xl->name, GF_LOG_DEBUG,
- "no range check required for "
- "'option %s %s'",
- pair->key, pair->value->data);
- ret = 0;
- break;
- }
- if ((inputll < opt->min) ||
- (inputll > opt->max)) {
- gf_log (xl->name, GF_LOG_WARNING,
- "'%lld' in 'option %s %s' is out of "
- "range [%"PRId64" - %"PRId64"]",
- inputll, pair->key,
- pair->value->data,
- opt->min, opt->max);
- }
- ret = 0;
- }
- break;
- case GF_OPTION_TYPE_SIZET:
- {
- /* Check the range */
- if (gf_string2bytesize (pair->value->data,
- &input_size) != 0) {
- gf_log (xl->name, GF_LOG_ERROR,
- "invalid size format \"%s\" in "
- "\"option %s\"",
- pair->value->data, pair->key);
- goto out;
- }
-
- if ((opt->min == 0) && (opt->max == 0)) {
- gf_log (xl->name, GF_LOG_DEBUG,
- "no range check required for "
- "'option %s %s'",
- pair->key, pair->value->data);
- ret = 0;
- break;
- }
- if ((input_size < opt->min) ||
- (input_size > opt->max)) {
- gf_log (xl->name, GF_LOG_ERROR,
- "'%"PRId64"' in 'option %s %s' is "
- "out of range [%"PRId64" - %"PRId64"]",
- input_size, pair->key,
- pair->value->data,
- opt->min, opt->max);
- }
- ret = 0;
- }
- break;
- case GF_OPTION_TYPE_BOOL:
- {
- /* Check if the value is one of
- '0|1|on|off|no|yes|true|false|enable|disable' */
- gf_boolean_t bool_value;
- if (gf_string2boolean (pair->value->data,
- &bool_value) != 0) {
- gf_log (xl->name, GF_LOG_ERROR,
- "option %s %s: '%s' is not a valid "
- "boolean value",
- pair->key, pair->value->data,
- pair->value->data);
- goto out;
- }
- ret = 0;
- }
- break;
- case GF_OPTION_TYPE_XLATOR:
- {
- /* Check if the value is one of the xlators */
- xlator_t *xlopt = xl;
- while (xlopt->prev)
- xlopt = xlopt->prev;
-
- while (xlopt) {
- if (strcmp (pair->value->data,
- xlopt->name) == 0) {
- ret = 0;
- break;
- }
- xlopt = xlopt->next;
- }
- if (!xlopt) {
- gf_log (xl->name, GF_LOG_ERROR,
- "option %s %s: '%s' is not a "
- "valid volume name",
- pair->key, pair->value->data,
- pair->value->data);
- }
- ret = 0;
- }
- break;
- case GF_OPTION_TYPE_STR:
- {
- /* Check if the '*str' is valid */
- if (GF_OPTION_LIST_EMPTY(opt)) {
- ret = 0;
+ if (!(*VOID(&(xl->fini)) = dlsym (handle, "fini"))) {
+ gf_log ("xlator", GF_LOG_WARNING, "dlsym(fini) on %s",
+ dlerror ());
goto out;
}
-
- for (i = 0; (i < ZR_OPTION_MAX_ARRAY_SIZE) &&
- opt->value[i]; i++) {
- if (strcasecmp (opt->value[i],
- pair->value->data) == 0) {
- ret = 0;
- break;
- }
- }
-
- if ((i == ZR_OPTION_MAX_ARRAY_SIZE)
- || ((i < ZR_OPTION_MAX_ARRAY_SIZE)
- && (!opt->value[i]))) {
- /* enter here only if
- * 1. reached end of opt->value array and haven't
- * validated input
- * OR
- * 2. valid input list is less than
- * ZR_OPTION_MAX_ARRAY_SIZE and input has not
- * matched all possible input values.
- */
- char given_array[4096] = {0,};
- for (i = 0; (i < ZR_OPTION_MAX_ARRAY_SIZE) &&
- opt->value[i]; i++) {
- strcat (given_array, opt->value[i]);
- strcat (given_array, ", ");
- }
-
- gf_log (xl->name, GF_LOG_ERROR,
- "option %s %s: '%s' is not valid "
- "(possible options are %s)",
- pair->key, pair->value->data,
- pair->value->data, given_array);
-
- goto out;
- }
- }
- break;
- case GF_OPTION_TYPE_PERCENT:
- {
- uint32_t percent = 0;
-
-
- /* Check if the value is valid percentage */
- if (gf_string2percent (pair->value->data,
- &percent) != 0) {
- gf_log (xl->name, GF_LOG_ERROR,
- "invalid percent format \"%s\" "
- "in \"option %s\"",
- pair->value->data, pair->key);
- goto out;
- }
-
- if ((percent < 0) || (percent > 100)) {
- gf_log (xl->name, GF_LOG_ERROR,
- "'%d' in 'option %s %s' is out of "
- "range [0 - 100]",
- percent, pair->key,
- pair->value->data);
- }
- ret = 0;
- }
- break;
- case GF_OPTION_TYPE_PERCENT_OR_SIZET:
- {
- uint32_t percent = 0;
- uint64_t input_size = 0;
-
- /* Check if the value is valid percentage */
- if (gf_string2percent (pair->value->data,
- &percent) == 0) {
- if (percent > 100) {
- gf_log (xl->name, GF_LOG_DEBUG,
- "value given was greater than 100, "
- "assuming this is actually a size");
- if (gf_string2bytesize (pair->value->data,
- &input_size) == 0) {
- /* Check the range */
- if ((opt->min == 0) &&
- (opt->max == 0)) {
- gf_log (xl->name, GF_LOG_DEBUG,
- "no range check "
- "required for "
- "'option %s %s'",
- pair->key,
- pair->value->data);
- // It is a size
- ret = 0;
- goto out;
- }
- if ((input_size < opt->min) ||
- (input_size > opt->max)) {
- gf_log (xl->name, GF_LOG_ERROR,
- "'%"PRId64"' in "
- "'option %s %s' is out"
- " of range [%"PRId64""
- "- %"PRId64"]",
- input_size, pair->key,
- pair->value->data,
- opt->min, opt->max);
- }
- // It is a size
- ret = 0;
- goto out;
- } else {
- // It's not a percent or size
- gf_log (xl->name, GF_LOG_ERROR,
- "invalid number format \"%s\" "
- "in \"option %s\"",
- pair->value->data, pair->key);
- }
-
- }
- // It is a percent
- ret = 0;
- goto out;
- } else {
- if (gf_string2bytesize (pair->value->data,
- &input_size) == 0) {
- /* Check the range */
- if ((opt->min == 0) && (opt->max == 0)) {
- gf_log (xl->name, GF_LOG_DEBUG,
- "no range check required for "
- "'option %s %s'",
- pair->key, pair->value->data);
- // It is a size
- ret = 0;
- goto out;
- }
- if ((input_size < opt->min) ||
- (input_size > opt->max)) {
- gf_log (xl->name, GF_LOG_ERROR,
- "'%"PRId64"' in 'option %s %s'"
- " is out of range [%"PRId64" -"
- " %"PRId64"]",
- input_size, pair->key,
- pair->value->data,
- opt->min, opt->max);
- }
- } else {
- // It's not a percent or size
- gf_log (xl->name, GF_LOG_ERROR,
- "invalid number format \"%s\" "
- "in \"option %s\"",
- pair->value->data, pair->key);
- }
- //It is a size
- ret = 0;
- goto out;
- }
-
- }
- break;
- case GF_OPTION_TYPE_TIME:
- {
- uint32_t input_time = 0;
-
- /* Check if the value is valid percentage */
- if (gf_string2time (pair->value->data,
- &input_time) != 0) {
- gf_log (xl->name,
- GF_LOG_ERROR,
- "invalid time format \"%s\" in "
- "\"option %s\"",
- pair->value->data, pair->key);
- goto out;
- }
-
- if ((opt->min == 0) && (opt->max == 0)) {
- gf_log (xl->name, GF_LOG_DEBUG,
- "no range check required for "
- "'option %s %s'",
- pair->key, pair->value->data);
- ret = 0;
- goto out;
- }
- if ((input_time < opt->min) ||
- (input_time > opt->max)) {
- gf_log (xl->name, GF_LOG_ERROR,
- "'%"PRIu32"' in 'option %s %s' is "
- "out of range [%"PRId64" - %"PRId64"]",
- input_time, pair->key,
- pair->value->data,
- opt->min, opt->max);
- }
- ret = 0;
- }
- break;
- case GF_OPTION_TYPE_DOUBLE:
- {
- double input_time = 0.0;
-
- /* Check if the value is valid double */
- if (gf_string2double (pair->value->data,
- &input_time) != 0) {
- gf_log (xl->name,
- GF_LOG_ERROR,
- "invalid time format \"%s\" in \"option %s\"",
- pair->value->data, pair->key);
- goto out;
- }
-
- if (input_time < 0.0) {
- gf_log (xl->name,
- GF_LOG_ERROR,
- "invalid time format \"%s\" in \"option %s\"",
- pair->value->data, pair->key);
- goto out;
- }
-
- if ((opt->min == 0) && (opt->max == 0)) {
- gf_log (xl->name, GF_LOG_DEBUG,
- "no range check required for 'option %s %s'",
- pair->key, pair->value->data);
- ret = 0;
- goto out;
- }
- ret = 0;
- }
- break;
- case GF_OPTION_TYPE_INTERNET_ADDRESS:
- {
- if (valid_internet_address (pair->value->data)) {
- ret = 0;
+ if (!(*VOID(&(xl->reconfigure)) = dlsym (handle,
+ "reconfigure"))) {
+ gf_log ("xlator", GF_LOG_TRACE,
+ "dlsym(reconfigure) on %s -- neglecting",
+ dlerror());
+ }
+ if (!(*VOID(&(xl->notify)) = dlsym (handle, "notify"))) {
+ gf_log ("xlator", GF_LOG_TRACE,
+ "dlsym(notify) on %s -- neglecting",
+ dlerror ());
}
- }
- break;
- case GF_OPTION_TYPE_ANY:
- /* NO CHECK */
- ret = 0;
- break;
- }
-
-out:
- return ret;
-}
-int
-validate_xlator_volume_options (xlator_t *xl, volume_option_t *opt)
-{
- int i = 0;
- int ret = -1;
- int index = 0;
- volume_option_t *trav = NULL;
- data_pair_t *pairs = NULL;
-
- if (!opt) {
- ret = 0;
- goto out;
- }
+ }
- /* First search for not supported options, if any report error */
- pairs = xl->options->members_list;
- while (pairs) {
- ret = -1;
- for (index = 0;
- opt[index].key && opt[index].key[0] ; index++) {
- trav = &(opt[index]);
- for (i = 0 ;
- (i < ZR_VOLUME_MAX_NUM_KEY) &&
- trav->key[i]; i++) {
- /* Check if the key is valid */
- if (fnmatch (trav->key[i],
- pairs->key, FNM_NOESCAPE) == 0) {
- ret = 0;
- break;
- }
- }
- if (!ret) {
- if (i) {
- gf_log (xl->name, GF_LOG_WARNING,
- "option '%s' is deprecated, "
- "preferred is '%s', continuing"
- " with correction",
- trav->key[i], trav->key[0]);
- /* TODO: some bytes lost */
- pairs->key = strdup (trav->key[0]);
- }
- break;
- }
- }
- if (!ret) {
- ret = _volume_option_value_validate (xl, pairs, trav);
- if (-1 == ret) {
- goto out;
- }
- }
-
- pairs = pairs->next;
- }
-
- ret = 0;
- out:
- return ret;
-}
+ if (!(xl->dumpops = dlsym (handle, "dumpops"))) {
+ gf_log ("xlator", GF_LOG_TRACE,
+ "dlsym(dumpops) on %s -- neglecting", dlerror ());
+ }
-int32_t
-xlator_set_type (xlator_t *xl,
- const char *type)
-{
- int ret = 0;
- char *name = NULL;
- void *handle = NULL;
- volume_opt_list_t *vol_opt = NULL;
-
- if (xl == NULL || type == NULL) {
- gf_log ("xlator", GF_LOG_DEBUG, "invalid argument");
- return -1;
- }
+ if (!(*VOID(&(xl->mem_acct_init)) = dlsym (handle, "mem_acct_init"))) {
+ gf_log (xl->name, GF_LOG_TRACE,
+ "dlsym(mem_acct_init) on %s -- neglecting",
+ dlerror ());
+ }
- xl->type = strdup (type);
+ vol_opt = GF_CALLOC (1, sizeof (volume_opt_list_t),
+ gf_common_mt_volume_opt_list_t);
- ret = asprintf (&name, "%s/%s.so", XLATORDIR, type);
- if (-1 == ret) {
- gf_log ("xlator", GF_LOG_ERROR, "asprintf failed");
- return -1;
+ if (!vol_opt) {
+ goto out;
}
- gf_log ("xlator", GF_LOG_TRACE, "attempt to load file %s", name);
+ if (!(vol_opt->given_opt = dlsym (handle, "options"))) {
+ dlerror ();
+ gf_log (xl->name, GF_LOG_TRACE,
+ "Strict option validation not enforced -- neglecting");
+ }
+ INIT_LIST_HEAD (&vol_opt->list);
+ list_add_tail (&vol_opt->list, &xl->volume_options);
- handle = dlopen (name, RTLD_NOW|RTLD_GLOBAL);
- if (!handle) {
- gf_log ("xlator", GF_LOG_DEBUG, "%s", dlerror ());
- return -1;
- }
+ fill_defaults (xl);
- if (!(xl->fops = dlsym (handle, "fops"))) {
- gf_log ("xlator", GF_LOG_DEBUG, "dlsym(fops) on %s",
- dlerror ());
- return -1;
- }
+ ret = 0;
- if (!(xl->mops = dlsym (handle, "mops"))) {
- gf_log ("xlator", GF_LOG_DEBUG, "dlsym(mops) on %s",
- dlerror ());
- return -1;
- }
+out:
+ GF_FREE (name);
+ return ret;
+}
- if (!(xl->cbks = dlsym (handle, "cbks"))) {
- gf_log ("xlator", GF_LOG_DEBUG, "dlsym(cbks) on %s",
- dlerror ());
- return -1;
- }
- if (!(xl->init = dlsym (handle, "init"))) {
- gf_log ("xlator", GF_LOG_DEBUG, "dlsym(init) on %s",
- dlerror ());
- return -1;
- }
+int
+xlator_set_type (xlator_t *xl, const char *type)
+{
+ int ret = 0;
- if (!(xl->fini = dlsym (handle, "fini"))) {
- gf_log ("xlator", GF_LOG_DEBUG, "dlsym(fini) on %s",
- dlerror ());
- return -1;
- }
+ ret = xlator_set_type_virtual (xl, type);
+ if (!ret)
+ ret = xlator_dynload (xl);
- if (!(xl->notify = dlsym (handle, "notify"))) {
- gf_log ("xlator", GF_LOG_DEBUG,
- "dlsym(notify) on %s -- neglecting", dlerror ());
- }
+ return ret;
+}
- if (!(xl->dumpops = dlsym (handle, "dumpops"))) {
- gf_log ("xlator", GF_LOG_DEBUG,
- "dlsym(dumpops) on %s -- neglecting", dlerror ());
- }
- INIT_LIST_HEAD (&xl->volume_options);
+void
+xlator_foreach (xlator_t *this,
+ void (*fn)(xlator_t *each,
+ void *data),
+ void *data)
+{
+ xlator_t *first = NULL;
+ xlator_t *old_THIS = NULL;
+
+ GF_VALIDATE_OR_GOTO ("xlator", this, out);
+ GF_VALIDATE_OR_GOTO ("xlator", fn, out);
- vol_opt = CALLOC (1, sizeof (volume_opt_list_t));
+ first = this;
- if (!(vol_opt->given_opt = dlsym (handle, "options"))) {
- dlerror ();
- gf_log (xl->name, GF_LOG_DEBUG,
- "Strict option validation not enforced -- neglecting");
- }
- list_add_tail (&vol_opt->list, &xl->volume_options);
+ while (first->prev)
+ first = first->prev;
+
+ while (first) {
+ old_THIS = THIS;
+ THIS = first;
- fill_defaults (xl);
+ fn (first, data);
- FREE (name);
- return 0;
+ THIS = old_THIS;
+ first = first->next;
+ }
+
+out:
+ return;
}
void
-xlator_foreach (xlator_t *this,
- void (*fn)(xlator_t *each,
- void *data),
- void *data)
+xlator_foreach_depth_first (xlator_t *this,
+ void (*fn)(xlator_t *each, void *data),
+ void *data)
{
- xlator_t *first = NULL;
+ xlator_list_t *subv = NULL;
- if (this == NULL || fn == NULL || data == NULL) {
- gf_log ("xlator", GF_LOG_DEBUG, "invalid argument");
- return;
- }
+ subv = this->children;
- first = this;
-
- while (first->prev)
- first = first->prev;
-
- while (first) {
- fn (first, data);
- first = first->next;
+ while (subv) {
+ xlator_foreach_depth_first (subv->xlator, fn, data);
+ subv = subv->next;
}
+
+ fn (this, data);
}
xlator_t *
xlator_search_by_name (xlator_t *any, const char *name)
{
- xlator_t *search = NULL;
+ xlator_t *search = NULL;
- if (any == NULL || name == NULL) {
- gf_log ("xlator", GF_LOG_DEBUG, "invalid argument");
- return NULL;
- }
+ GF_VALIDATE_OR_GOTO ("xlator", any, out);
+ GF_VALIDATE_OR_GOTO ("xlator", name, out);
- search = any;
+ search = any;
- while (search->prev)
- search = search->prev;
+ while (search->prev)
+ search = search->prev;
- while (search) {
- if (!strcmp (search->name, name))
- break;
- search = search->next;
- }
+ while (search) {
+ if (!strcmp (search->name, name))
+ break;
+ search = search->next;
+ }
- return search;
+out:
+ return search;
}
-
-static int32_t
-xlator_init_rec (xlator_t *xl)
+static int
+__xlator_init(xlator_t *xl)
{
- xlator_list_t *trav = NULL;
- int32_t ret = 0;
-
- if (xl == NULL) {
- gf_log ("xlator", GF_LOG_DEBUG, "invalid argument");
- return 0;
- }
+ xlator_t *old_THIS = NULL;
+ int ret = 0;
- trav = xl->children;
+ old_THIS = THIS;
+ THIS = xl;
- while (trav) {
- ret = 0;
- ret = xlator_init_rec (trav->xlator);
- if (ret != 0)
- break;
- gf_log (trav->xlator->name, GF_LOG_TRACE,
- "Initialization done");
- trav = trav->next;
- }
+ ret = xl->init (xl);
- if (!ret && !xl->ready) {
- ret = -1;
- if (xl->init) {
- ret = xlator_init (xl);
- if (ret) {
- gf_log ("xlator", GF_LOG_ERROR,
- "Initialization of volume '%s' failed,"
- " review your volfile again",
- xl->name);
- } else {
- xl->init_succeeded = 1;
- }
- } else {
- gf_log (xl->name, GF_LOG_DEBUG, "No init() found");
- }
- /* This 'xl' is checked */
- xl->ready = 1;
- }
+ THIS = old_THIS;
- return ret;
+ return ret;
}
-int32_t
-xlator_tree_init (xlator_t *xl)
+int
+xlator_init (xlator_t *xl)
{
- xlator_t *top = NULL;
- int32_t ret = 0;
+ int32_t ret = -1;
- if (xl == NULL) {
- gf_log ("xlator", GF_LOG_DEBUG, "invalid argument");
- return 0;
- }
+ GF_VALIDATE_OR_GOTO ("xlator", xl, out);
- top = xl;
-/*
- while (top->parents)
- top = top->parents->xlator;
-*/
- ret = xlator_init_rec (top);
+ if (xl->mem_acct_init)
+ xl->mem_acct_init (xl);
- if (ret == 0 && top->notify) {
- top->notify (top, GF_EVENT_PARENT_UP, NULL);
- }
+ if (!xl->init) {
+ gf_log (xl->name, GF_LOG_WARNING, "No init() found");
+ goto out;
+ }
+
+ ret = __xlator_init (xl);
+
+ if (ret) {
+ gf_log (xl->name, GF_LOG_ERROR,
+ "Initialization of volume '%s' failed,"
+ " review your volfile again",
+ xl->name);
+ goto out;
+ }
+
+ xl->init_succeeded = 1;
- return ret;
+ ret = 0;
+out:
+ return ret;
}
static void
xlator_fini_rec (xlator_t *xl)
{
- xlator_list_t *trav = NULL;
+ xlator_list_t *trav = NULL;
+ xlator_t *old_THIS = NULL;
- if (xl == NULL) {
- gf_log ("xlator", GF_LOG_DEBUG, "invalid argument");
- return;
- }
+ GF_VALIDATE_OR_GOTO ("xlator", xl, out);
- trav = xl->children;
+ trav = xl->children;
- while (trav) {
- if (!trav->xlator->init_succeeded) {
- break;
- }
+ while (trav) {
+ if (!trav->xlator->init_succeeded) {
+ break;
+ }
- xlator_fini_rec (trav->xlator);
- gf_log (trav->xlator->name, GF_LOG_DEBUG, "fini done");
- trav = trav->next;
- }
+ xlator_fini_rec (trav->xlator);
+ gf_log (trav->xlator->name, GF_LOG_DEBUG, "fini done");
+ trav = trav->next;
+ }
- if (xl->init_succeeded) {
- if (xl->fini) {
- xl->fini (xl);
- } else {
- gf_log (xl->name, GF_LOG_DEBUG, "No fini() found");
- }
- xl->init_succeeded = 0;
- }
+ if (xl->init_succeeded) {
+ if (xl->fini) {
+ old_THIS = THIS;
+ THIS = xl;
+
+ xl->fini (xl);
+
+ if (xl->local_pool)
+ mem_pool_destroy (xl->local_pool);
+
+ THIS = old_THIS;
+ } else {
+ gf_log (xl->name, GF_LOG_DEBUG, "No fini() found");
+ }
+ xl->init_succeeded = 0;
+ }
+
+out:
+ return;
}
@@ -936,57 +472,98 @@ xlator_notify (xlator_t *xl, int event, void *data, ...)
int
-xlator_init (xlator_t *xl)
+xlator_mem_acct_init (xlator_t *xl, int num_types)
{
- xlator_t *old_THIS = NULL;
- int ret = 0;
+ int i = 0;
+ int ret = 0;
- old_THIS = THIS;
- THIS = xl;
+ if (!xl)
+ return -1;
- ret = xl->init (xl);
+ if (!xl->ctx->mem_acct_enable)
+ return 0;
- THIS = old_THIS;
+ xl->mem_acct.num_types = num_types;
- return ret;
+ xl->mem_acct.rec = CALLOC(num_types, sizeof(struct mem_acct_rec));
+
+ if (!xl->mem_acct.rec) {
+ return -1;
+ }
+
+ for (i = 0; i < num_types; i++) {
+ ret = LOCK_INIT(&(xl->mem_acct.rec[i].lock));
+ if (ret) {
+ fprintf(stderr, "Unable to lock..errno : %d",errno);
+ }
+ }
+
+ return 0;
}
void
xlator_tree_fini (xlator_t *xl)
{
- xlator_t *top = NULL;
+ xlator_t *top = NULL;
- if (xl == NULL) {
- gf_log ("xlator", GF_LOG_DEBUG, "invalid argument");
- return;
- }
+ GF_VALIDATE_OR_GOTO ("xlator", xl, out);
+
+ top = xl;
+ xlator_fini_rec (top);
- top = xl;
- xlator_fini_rec (top);
+out:
+ return;
+}
+
+int
+xlator_list_destroy (xlator_list_t *list)
+{
+ xlator_list_t *next = NULL;
+
+ while (list) {
+ next = list->next;
+ GF_FREE (list);
+ list = next;
+ }
+
+ return 0;
}
int
xlator_tree_free (xlator_t *tree)
{
- xlator_t *trav = tree, *prev = tree;
-
- if (!tree) {
- gf_log ("parser", GF_LOG_ERROR, "Translator tree not found");
- return -1;
- }
-
- while (prev) {
- trav = prev->next;
- dict_destroy (prev->options);
- FREE (prev->name);
- FREE (prev->type);
- FREE (prev);
- prev = trav;
- }
-
- return 0;
+ volume_opt_list_t *vol_opt = NULL;
+ volume_opt_list_t *tmp = NULL;
+ xlator_t *trav = tree;
+ xlator_t *prev = tree;
+
+ if (!tree) {
+ gf_log ("parser", GF_LOG_ERROR, "Translator tree not found");
+ return -1;
+ }
+
+ while (prev) {
+ trav = prev->next;
+ if (prev->dlhandle)
+ dlclose (prev->dlhandle);
+ dict_unref (prev->options);
+ GF_FREE (prev->name);
+ GF_FREE (prev->type);
+ xlator_list_destroy (prev->children);
+ xlator_list_destroy (prev->parents);
+
+ list_for_each_entry_safe (vol_opt, tmp, &prev->volume_options,
+ list) {
+ list_del_init (&vol_opt->list);
+ GF_FREE (vol_opt);
+ }
+ GF_FREE (prev);
+ prev = trav;
+ }
+
+ return 0;
}
@@ -998,40 +575,264 @@ loc_wipe (loc_t *loc)
loc->inode = NULL;
}
if (loc->path) {
- FREE (loc->path);
+ GF_FREE ((char *)loc->path);
loc->path = NULL;
}
-
+
if (loc->parent) {
inode_unref (loc->parent);
loc->parent = NULL;
}
+
+ memset (loc, 0, sizeof (*loc));
}
+int
+loc_path (loc_t *loc, const char *bname)
+{
+ int ret = 0;
+
+ if (loc->path)
+ goto out;
+
+ ret = -1;
+
+ if (bname && !strlen (bname))
+ bname = NULL;
+
+ if (!bname)
+ goto inode_path;
+
+ if (loc->parent && !uuid_is_null (loc->parent->gfid)) {
+ ret = inode_path (loc->parent, bname, (char**)&loc->path);
+ } else if (!uuid_is_null (loc->pargfid)) {
+ ret = gf_asprintf ((char**)&loc->path, INODE_PATH_FMT"/%s",
+ uuid_utoa (loc->pargfid), bname);
+ }
+
+ if (loc->path)
+ goto out;
+
+inode_path:
+ if (loc->inode && !uuid_is_null (loc->inode->gfid)) {
+ ret = inode_path (loc->inode, NULL, (char **)&loc->path);
+ } else if (!uuid_is_null (loc->gfid)) {
+ ret = gf_asprintf ((char**)&loc->path, INODE_PATH_FMT,
+ uuid_utoa (loc->gfid));
+ }
+out:
+ return ret;
+}
+
+void
+loc_gfid (loc_t *loc, uuid_t gfid)
+{
+ if (!gfid)
+ goto out;
+ uuid_clear (gfid);
+
+ if (!loc)
+ goto out;
+ else if (!uuid_is_null (loc->gfid))
+ uuid_copy (gfid, loc->gfid);
+ else if (loc->inode && (!uuid_is_null (loc->inode->gfid)))
+ uuid_copy (gfid, loc->inode->gfid);
+out:
+ return;
+}
+
+char*
+loc_gfid_utoa (loc_t *loc)
+{
+ uuid_t gfid;
+ loc_gfid (loc, gfid);
+ return uuid_utoa (gfid);
+}
int
loc_copy (loc_t *dst, loc_t *src)
{
- int ret = -1;
+ int ret = -1;
- dst->ino = src->ino;
+ GF_VALIDATE_OR_GOTO ("xlator", dst, err);
+ GF_VALIDATE_OR_GOTO ("xlator", src, err);
- if (src->inode)
- dst->inode = inode_ref (src->inode);
+ uuid_copy (dst->gfid, src->gfid);
+ uuid_copy (dst->pargfid, src->pargfid);
+ uuid_copy (dst->gfid, src->gfid);
- if (src->parent)
- dst->parent = inode_ref (src->parent);
+ if (src->inode)
+ dst->inode = inode_ref (src->inode);
- dst->path = strdup (src->path);
+ if (src->parent)
+ dst->parent = inode_ref (src->parent);
- if (!dst->path)
- goto out;
+ if (src->path) {
+ dst->path = gf_strdup (src->path);
- dst->name = strrchr (dst->path, '/');
- if (dst->name)
- dst->name++;
+ if (!dst->path)
+ goto out;
+
+ if (src->name)
+ dst->name = strrchr (dst->path, '/');
+ if (dst->name)
+ dst->name++;
+ }
- ret = 0;
+ ret = 0;
out:
- return ret;
+ if (ret == -1)
+ loc_wipe (dst);
+
+err:
+ return ret;
}
+
+gf_boolean_t
+loc_is_root (loc_t *loc)
+{
+ if (loc && __is_root_gfid (loc->gfid)) {
+ return _gf_true;
+ } else if (loc && loc->inode && __is_root_gfid (loc->inode->gfid)) {
+ return _gf_true;
+ }
+ return _gf_false;
+}
+
+int
+xlator_destroy (xlator_t *xl)
+{
+ volume_opt_list_t *vol_opt = NULL;
+ volume_opt_list_t *tmp = NULL;
+
+ if (!xl)
+ return 0;
+
+ GF_FREE (xl->name);
+ GF_FREE (xl->type);
+ if (xl->dlhandle)
+ dlclose (xl->dlhandle);
+ if (xl->options)
+ dict_destroy (xl->options);
+
+ xlator_list_destroy (xl->children);
+
+ xlator_list_destroy (xl->parents);
+
+ list_for_each_entry_safe (vol_opt, tmp, &xl->volume_options, list) {
+ list_del_init (&vol_opt->list);
+ GF_FREE (vol_opt);
+ }
+
+ GF_FREE (xl);
+
+ return 0;
+}
+
+
+int
+is_gf_log_command (xlator_t *this, const char *name, char *value)
+{
+ xlator_t *trav = NULL;
+ char key[1024] = {0,};
+ int ret = -1;
+ int log_level = -1;
+ gf_boolean_t syslog_flag = 0;
+ glusterfs_ctx_t *ctx = NULL;
+
+ if (!strcmp ("trusted.glusterfs.syslog", name)) {
+ ret = gf_string2boolean (value, &syslog_flag);
+ if (ret) {
+ ret = EOPNOTSUPP;
+ goto out;
+ }
+ if (syslog_flag)
+ gf_log_enable_syslog ();
+ else
+ gf_log_disable_syslog ();
+
+ goto out;
+ }
+
+ if (fnmatch ("trusted.glusterfs*set-log-level", name, FNM_NOESCAPE))
+ goto out;
+
+ log_level = glusterd_check_log_level (value);
+ if (log_level == -1) {
+ ret = EOPNOTSUPP;
+ goto out;
+ }
+
+ /* Some crude way to change the log-level of process */
+ if (!strcmp (name, "trusted.glusterfs.set-log-level")) {
+ /* */
+ gf_log ("glusterfs", gf_log_get_loglevel(),
+ "setting log level to %d (old-value=%d)",
+ log_level, gf_log_get_loglevel());
+ gf_log_set_loglevel (log_level);
+ ret = 0;
+ goto out;
+ }
+
+ if (!strcmp (name, "trusted.glusterfs.fuse.set-log-level")) {
+ /* */
+ gf_log (this->name, gf_log_get_xl_loglevel (this),
+ "setting log level to %d (old-value=%d)",
+ log_level, gf_log_get_xl_loglevel (this));
+ gf_log_set_xl_loglevel (this, log_level);
+ ret = 0;
+ goto out;
+ }
+
+ ctx = this->ctx;
+ if (!ctx)
+ goto out;
+ if (!ctx->active)
+ goto out;
+ trav = ctx->active->top;
+
+ while (trav) {
+ snprintf (key, 1024, "trusted.glusterfs.%s.set-log-level",
+ trav->name);
+ if (fnmatch (name, key, FNM_NOESCAPE) == 0) {
+ gf_log (trav->name, gf_log_get_xl_loglevel (trav),
+ "setting log level to %d (old-value=%d)",
+ log_level, gf_log_get_xl_loglevel (trav));
+ gf_log_set_xl_loglevel (trav, log_level);
+ ret = 0;
+ }
+ trav = trav->next;
+ }
+out:
+ return ret;
+}
+
+
+int
+glusterd_check_log_level (const char *value)
+{
+ int log_level = -1;
+
+ if (!strcasecmp (value, "CRITICAL")) {
+ log_level = GF_LOG_CRITICAL;
+ } else if (!strcasecmp (value, "ERROR")) {
+ log_level = GF_LOG_ERROR;
+ } else if (!strcasecmp (value, "WARNING")) {
+ log_level = GF_LOG_WARNING;
+ } else if (!strcasecmp (value, "INFO")) {
+ log_level = GF_LOG_INFO;
+ } else if (!strcasecmp (value, "DEBUG")) {
+ log_level = GF_LOG_DEBUG;
+ } else if (!strcasecmp (value, "TRACE")) {
+ log_level = GF_LOG_TRACE;
+ } else if (!strcasecmp (value, "NONE")) {
+ log_level = GF_LOG_NONE;
+ }
+
+ if (log_level == -1)
+ gf_log (THIS->name, GF_LOG_ERROR, "Invalid log-level. possible values "
+ "are DEBUG|WARNING|ERROR|CRITICAL|NONE|TRACE");
+
+ return log_level;
+}
+
diff --git a/libglusterfs/src/xlator.h b/libglusterfs/src/xlator.h
index 4adbec2e5..b57e5873e 100644
--- a/libglusterfs/src/xlator.h
+++ b/libglusterfs/src/xlator.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _XLATOR_H
@@ -29,16 +20,30 @@
#include <stdint.h>
#include <inttypes.h>
-
-#include "glusterfs.h"
+#include "event-history.h"
#include "logging.h"
#include "common-utils.h"
#include "dict.h"
#include "compat.h"
#include "list.h"
+#include "latency.h"
#define FIRST_CHILD(xl) (xl->children->xlator)
+#define GF_SET_ATTR_MODE 0x1
+#define GF_SET_ATTR_UID 0x2
+#define GF_SET_ATTR_GID 0x4
+#define GF_SET_ATTR_SIZE 0x8
+#define GF_SET_ATTR_ATIME 0x10
+#define GF_SET_ATTR_MTIME 0x20
+
+#define gf_attr_mode_set(mode) ((mode) & GF_SET_ATTR_MODE)
+#define gf_attr_uid_set(mode) ((mode) & GF_SET_ATTR_UID)
+#define gf_attr_gid_set(mode) ((mode) & GF_SET_ATTR_GID)
+#define gf_attr_size_set(mode) ((mode) & GF_SET_ATTR_SIZE)
+#define gf_attr_atime_set(mode) ((mode) & GF_SET_ATTR_ATIME)
+#define gf_attr_mtime_set(mode) ((mode) & GF_SET_ATTR_MTIME)
+
struct _xlator;
typedef struct _xlator xlator_t;
struct _dir_entry_t;
@@ -49,10 +54,8 @@ struct _loc;
typedef struct _loc loc_t;
-typedef int32_t (*event_notify_fn_t) (xlator_t *this,
- int32_t event,
- void *data,
- ...);
+typedef int32_t (*event_notify_fn_t) (xlator_t *this, int32_t event, void *data,
+ ...);
#include "list.h"
#include "gf-dirent.h"
@@ -61,749 +64,734 @@ typedef int32_t (*event_notify_fn_t) (xlator_t *this,
#include "inode.h"
#include "fd.h"
#include "globals.h"
+#include "iatt.h"
+#include "options.h"
+#include "client_t.h"
+
struct _loc {
- const char *path;
- const char *name;
- ino_t ino;
- inode_t *inode;
- inode_t *parent;
+ const char *path;
+ const char *name;
+ inode_t *inode;
+ inode_t *parent;
+ /* Currently all location based operations are through 'gfid' of inode.
+ * But the 'inode->gfid' only gets set in higher most layer (as in,
+ * 'fuse', 'protocol/server', or 'nfs/server'). So if translators want
+ * to send fops on a inode before the 'inode->gfid' is set, they have to
+ * make use of below 'gfid' fields
+ */
+ uuid_t gfid;
+ uuid_t pargfid;
};
-struct xlator_stats {
- uint64_t nr_files; /* Number of files open via this xlator */
- uint64_t free_disk; /* Mega bytes */
- uint64_t total_disk_size; /* Mega Bytes */
- uint64_t disk_usage; /* Mega bytes */
- uint64_t disk_speed; /* MHz or Mbps */
- uint64_t nr_clients; /* Number of client nodes */
- uint64_t write_usage;
- uint64_t read_usage; /* add more stats here */
-};
+typedef int32_t (*fop_getspec_cbk_t) (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ char *spec_data);
+typedef int32_t (*fop_rchecksum_cbk_t) (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ uint32_t weak_checksum,
+ uint8_t *strong_checksum,
+ dict_t *xdata);
-typedef int32_t (*mop_stats_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct xlator_stats *stats);
-
-typedef int32_t (*mop_getspec_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- char *spec_data);
-
-typedef int32_t (*mop_log_cbk_t) (call_frame_t *frame,
- void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno);
-
-typedef int32_t (*fop_checksum_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- uint8_t *file_checksum,
- uint8_t *dir_checksum);
-
-typedef int32_t (*mop_setvolume_t) (call_frame_t *frame,
- xlator_t *this,
- const char *volume);
-
-typedef int32_t (*mop_stats_t) (call_frame_t *frame,
- xlator_t *this,
- int32_t flags);
-
-typedef int32_t (*mop_getspec_t) (call_frame_t *frame,
- xlator_t *this,
- const char *key,
- int32_t flag);
-
-typedef int32_t (*mop_log_t) (call_frame_t *frame,
- xlator_t *this,
- const char *msg);
-
-typedef int32_t (*fop_checksum_t) (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flag);
-
-struct xlator_mops {
- mop_stats_t stats;
- mop_getspec_t getspec;
- mop_log_t log;
-
- mop_log_cbk_t log_cbk;
- mop_stats_cbk_t stats_cbk;
- mop_getspec_cbk_t getspec_cbk;
-};
+typedef int32_t (*fop_getspec_t) (call_frame_t *frame,
+ xlator_t *this,
+ const char *key,
+ int32_t flag);
+
+typedef int32_t (*fop_rchecksum_t) (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd, off_t offset,
+ int32_t len, dict_t *xdata);
typedef int32_t (*fop_lookup_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf,
- dict_t *xattr);
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct iatt *buf,
+ dict_t *xdata,
+ struct iatt *postparent);
typedef int32_t (*fop_stat_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf);
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *buf, dict_t *xdata);
typedef int32_t (*fop_fstat_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf);
-
-typedef int32_t (*fop_chmod_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf);
-
-typedef int32_t (*fop_fchmod_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf);
-
-typedef int32_t (*fop_chown_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf);
-
-typedef int32_t (*fop_fchown_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf);
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *buf, dict_t *xdata);
typedef int32_t (*fop_truncate_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf);
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
typedef int32_t (*fop_ftruncate_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf);
-
-typedef int32_t (*fop_utimens_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf);
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
typedef int32_t (*fop_access_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno);
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
typedef int32_t (*fop_readlink_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- const char *path);
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ const char *path,
+ struct iatt *buf, dict_t *xdata);
typedef int32_t (*fop_mknod_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf);
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
typedef int32_t (*fop_mkdir_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf);
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
typedef int32_t (*fop_unlink_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno);
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
typedef int32_t (*fop_rmdir_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno);
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
typedef int32_t (*fop_symlink_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf);
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
typedef int32_t (*fop_rename_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf);
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *buf,
+ struct iatt *preoldparent,
+ struct iatt *postoldparent,
+ struct iatt *prenewparent,
+ struct iatt *postnewparent, dict_t *xdata);
typedef int32_t (*fop_link_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf);
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
typedef int32_t (*fop_create_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd,
- inode_t *inode,
- struct stat *buf);
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd,
+ inode_t *inode,
+ struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
typedef int32_t (*fop_open_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd);
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd, dict_t *xdata);
typedef int32_t (*fop_readv_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iovec *vector,
- int32_t count,
- struct stat *stbuf,
- struct iobref *iobref);
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vector,
+ int32_t count,
+ struct iatt *stbuf,
+ struct iobref *iobref, dict_t *xdata);
typedef int32_t (*fop_writev_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *stbuf);
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
typedef int32_t (*fop_flush_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno);
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
typedef int32_t (*fop_fsync_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno);
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
typedef int32_t (*fop_opendir_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd);
-
-typedef int32_t (*fop_getdents_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entries,
- int32_t count);
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd, dict_t *xdata);
typedef int32_t (*fop_fsyncdir_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno);
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
typedef int32_t (*fop_statfs_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct statvfs *buf);
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct statvfs *buf, dict_t *xdata);
typedef int32_t (*fop_setxattr_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno);
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
typedef int32_t (*fop_getxattr_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict);
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict, dict_t *xdata);
typedef int32_t (*fop_fsetxattr_cbk_t) (call_frame_t *frame,
void *cookie,
xlator_t *this,
int32_t op_ret,
- int32_t op_errno);
+ int32_t op_errno, dict_t *xdata);
typedef int32_t (*fop_fgetxattr_cbk_t) (call_frame_t *frame,
void *cookie,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- dict_t *dict);
+ dict_t *dict, dict_t *xdata);
typedef int32_t (*fop_removexattr_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno);
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+typedef int32_t (*fop_fremovexattr_cbk_t) (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
typedef int32_t (*fop_lk_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct flock *flock);
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct gf_flock *flock, dict_t *xdata);
typedef int32_t (*fop_inodelk_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno);
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
typedef int32_t (*fop_finodelk_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno);
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
typedef int32_t (*fop_entrylk_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno);
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
typedef int32_t (*fop_fentrylk_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno);
-
-typedef int32_t (*fop_setdents_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno);
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
typedef int32_t (*fop_readdir_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- gf_dirent_t *entries);
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ gf_dirent_t *entries, dict_t *xdata);
+
+typedef int32_t (*fop_readdirp_cbk_t) (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ gf_dirent_t *entries, dict_t *xdata);
typedef int32_t (*fop_xattrop_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *xattr);
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xattr, dict_t *xdata);
typedef int32_t (*fop_fxattrop_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *xattr);
-
-typedef int32_t (*fop_lock_notify_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno);
-
-typedef int32_t (*fop_lock_fnotify_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno);
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xattr, dict_t *xdata);
+
+
+typedef int32_t (*fop_setattr_cbk_t) (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *preop_stbuf,
+ struct iatt *postop_stbuf, dict_t *xdata);
+
+typedef int32_t (*fop_fsetattr_cbk_t) (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *preop_stbuf,
+ struct iatt *postop_stbuf, dict_t *xdata);
+
+typedef int32_t (*fop_fallocate_cbk_t) (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *preop_stbuf,
+ struct iatt *postop_stbuf, dict_t *xdata);
+
+typedef int32_t (*fop_discard_cbk_t) (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *preop_stbuf,
+ struct iatt *postop_stbuf, dict_t *xdata);
+
+typedef int32_t (*fop_zerofill_cbk_t) (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *preop_stbuf,
+ struct iatt *postop_stbuf, dict_t *xdata);
typedef int32_t (*fop_lookup_t) (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *xattr_req);
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *xdata);
typedef int32_t (*fop_stat_t) (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc);
+ xlator_t *this,
+ loc_t *loc, dict_t *xdata);
typedef int32_t (*fop_fstat_t) (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd);
-
-typedef int32_t (*fop_chmod_t) (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode);
-
-typedef int32_t (*fop_fchmod_t) (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- mode_t mode);
-
-typedef int32_t (*fop_chown_t) (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- uid_t uid,
- gid_t gid);
-
-typedef int32_t (*fop_fchown_t) (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- uid_t uid,
- gid_t gid);
+ xlator_t *this,
+ fd_t *fd, dict_t *xdata);
typedef int32_t (*fop_truncate_t) (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- off_t offset);
+ xlator_t *this,
+ loc_t *loc,
+ off_t offset, dict_t *xdata);
typedef int32_t (*fop_ftruncate_t) (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- off_t offset);
-
-typedef int32_t (*fop_utimens_t) (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- struct timespec tv[2]);
+ xlator_t *this,
+ fd_t *fd,
+ off_t offset, dict_t *xdata);
typedef int32_t (*fop_access_t) (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t mask);
+ xlator_t *this,
+ loc_t *loc,
+ int32_t mask, dict_t *xdata);
typedef int32_t (*fop_readlink_t) (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- size_t size);
-
-typedef int32_t (*fop_mknod_t) (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode,
- dev_t rdev);
-
-typedef int32_t (*fop_mkdir_t) (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode);
-
-typedef int32_t (*fop_unlink_t) (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc);
-
-typedef int32_t (*fop_rmdir_t) (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc);
-
-typedef int32_t (*fop_symlink_t) (call_frame_t *frame,
- xlator_t *this,
- const char *linkname,
- loc_t *loc);
+ xlator_t *this,
+ loc_t *loc,
+ size_t size, dict_t *xdata);
+
+typedef int32_t (*fop_mknod_t) (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode, dev_t rdev,
+ mode_t umask, dict_t *xdata);
+
+typedef int32_t (*fop_mkdir_t) (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ mode_t mode, mode_t umask, dict_t *xdata);
+
+typedef int32_t (*fop_unlink_t) (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int xflags, dict_t *xdata);
+
+typedef int32_t (*fop_rmdir_t) (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int xflags, dict_t *xdata);
+
+typedef int32_t (*fop_symlink_t) (call_frame_t *frame, xlator_t *this,
+ const char *linkname, loc_t *loc,
+ mode_t umask, dict_t *xdata);
typedef int32_t (*fop_rename_t) (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc);
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata);
typedef int32_t (*fop_link_t) (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc);
-
-typedef int32_t (*fop_create_t) (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags,
- mode_t mode,
- fd_t *fd);
-
-typedef int32_t (*fop_open_t) (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags,
- fd_t *fd);
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata);
+
+typedef int32_t (*fop_create_t) (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t flags, mode_t mode,
+ mode_t umask, fd_t *fd, dict_t *xdata);
+
+/* Tell subsequent writes on the fd_t to fsync after every writev fop without
+ * requiring a fsync fop.
+ */
+#define GF_OPEN_FSYNC 0x01
+
+/* Tell write-behind to disable writing behind despite O_SYNC not being set.
+ */
+#define GF_OPEN_NOWB 0x02
+
+typedef int32_t (*fop_open_t) (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata);
typedef int32_t (*fop_readv_t) (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset);
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t offset,
+ uint32_t flags, dict_t *xdata);
typedef int32_t (*fop_writev_t) (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iovec *vector,
- int32_t count,
- off_t offset,
- struct iobref *iobref);
+ xlator_t *this,
+ fd_t *fd,
+ struct iovec *vector,
+ int32_t count,
+ off_t offset,
+ uint32_t flags,
+ struct iobref *iobref, dict_t *xdata);
typedef int32_t (*fop_flush_t) (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd);
+ xlator_t *this,
+ fd_t *fd, dict_t *xdata);
typedef int32_t (*fop_fsync_t) (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t datasync);
+ xlator_t *this,
+ fd_t *fd,
+ int32_t datasync, dict_t *xdata);
typedef int32_t (*fop_opendir_t) (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- fd_t *fd);
-
-typedef int32_t (*fop_getdents_t) (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset,
- int32_t flag);
+ xlator_t *this,
+ loc_t *loc,
+ fd_t *fd, dict_t *xdata);
typedef int32_t (*fop_fsyncdir_t) (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t datasync);
+ xlator_t *this,
+ fd_t *fd,
+ int32_t datasync, dict_t *xdata);
typedef int32_t (*fop_statfs_t) (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc);
+ xlator_t *this,
+ loc_t *loc, dict_t *xdata);
typedef int32_t (*fop_setxattr_t) (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *dict,
- int32_t flags);
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *dict,
+ int32_t flags, dict_t *xdata);
typedef int32_t (*fop_getxattr_t) (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name);
+ xlator_t *this,
+ loc_t *loc,
+ const char *name, dict_t *xdata);
typedef int32_t (*fop_fsetxattr_t) (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
dict_t *dict,
- int32_t flags);
+ int32_t flags, dict_t *xdata);
typedef int32_t (*fop_fgetxattr_t) (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
- const char *name);
+ const char *name, dict_t *xdata);
typedef int32_t (*fop_removexattr_t) (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name);
+ xlator_t *this,
+ loc_t *loc,
+ const char *name, dict_t *xdata);
+
+typedef int32_t (*fop_fremovexattr_t) (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ const char *name, dict_t *xdata);
typedef int32_t (*fop_lk_t) (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t cmd,
- struct flock *flock);
+ xlator_t *this,
+ fd_t *fd,
+ int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata);
typedef int32_t (*fop_inodelk_t) (call_frame_t *frame,
- xlator_t *this,
+ xlator_t *this,
const char *volume,
- loc_t *loc,
- int32_t cmd,
- struct flock *flock);
+ loc_t *loc,
+ int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata);
typedef int32_t (*fop_finodelk_t) (call_frame_t *frame,
- xlator_t *this,
+ xlator_t *this,
const char *volume,
- fd_t *fd,
- int32_t cmd,
- struct flock *flock);
+ fd_t *fd,
+ int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata);
typedef int32_t (*fop_entrylk_t) (call_frame_t *frame,
- xlator_t *this,
+ xlator_t *this,
const char *volume, loc_t *loc,
- const char *basename, entrylk_cmd cmd,
- entrylk_type type);
+ const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata);
typedef int32_t (*fop_fentrylk_t) (call_frame_t *frame,
- xlator_t *this,
+ xlator_t *this,
const char *volume, fd_t *fd,
- const char *basename, entrylk_cmd cmd,
- entrylk_type type);
-
-typedef int32_t (*fop_setdents_t) (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t flags,
- dir_entry_t *entries,
- int32_t count);
+ const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata);
typedef int32_t (*fop_readdir_t) (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset);
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t offset, dict_t *xdata);
+
+typedef int32_t (*fop_readdirp_t) (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t offset,
+ dict_t *xdata);
typedef int32_t (*fop_xattrop_t) (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- gf_xattrop_flags_t optype,
- dict_t *xattr);
+ xlator_t *this,
+ loc_t *loc,
+ gf_xattrop_flags_t optype,
+ dict_t *xattr, dict_t *xdata);
typedef int32_t (*fop_fxattrop_t) (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- gf_xattrop_flags_t optype,
- dict_t *xattr);
-
-typedef int32_t (*fop_lock_notify_t) (call_frame_t *frame,
- xlator_t *this, loc_t *loc,
- int32_t timeout);
-
-typedef int32_t (*fop_lock_fnotify_t) (call_frame_t *frame,
- xlator_t *this, fd_t *fd,
- int32_t timeout);
+ xlator_t *this,
+ fd_t *fd,
+ gf_xattrop_flags_t optype,
+ dict_t *xattr, dict_t *xdata);
+
+typedef int32_t (*fop_setattr_t) (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ struct iatt *stbuf,
+ int32_t valid, dict_t *xdata);
+
+typedef int32_t (*fop_fsetattr_t) (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ struct iatt *stbuf,
+ int32_t valid, dict_t *xdata);
+
+typedef int32_t (*fop_fallocate_t) (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t keep_size,
+ off_t offset,
+ size_t len,
+ dict_t *xdata);
+
+typedef int32_t (*fop_discard_t) (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ off_t offset,
+ size_t len,
+ dict_t *xdata);
+typedef int32_t (*fop_zerofill_t) (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ off_t offset,
+ size_t len,
+ dict_t *xdata);
struct xlator_fops {
- fop_lookup_t lookup;
- fop_stat_t stat;
- fop_fstat_t fstat;
- fop_chmod_t chmod;
- fop_fchmod_t fchmod;
- fop_chown_t chown;
- fop_fchown_t fchown;
- fop_truncate_t truncate;
- fop_ftruncate_t ftruncate;
- fop_utimens_t utimens;
- fop_access_t access;
- fop_readlink_t readlink;
- fop_mknod_t mknod;
- fop_mkdir_t mkdir;
- fop_unlink_t unlink;
- fop_rmdir_t rmdir;
- fop_symlink_t symlink;
- fop_rename_t rename;
- fop_link_t link;
- fop_create_t create;
- fop_open_t open;
- fop_readv_t readv;
- fop_writev_t writev;
- fop_flush_t flush;
- fop_fsync_t fsync;
- fop_opendir_t opendir;
- fop_readdir_t readdir;
- fop_fsyncdir_t fsyncdir;
- fop_statfs_t statfs;
- fop_setxattr_t setxattr;
- fop_getxattr_t getxattr;
- fop_fsetxattr_t fsetxattr;
- fop_fgetxattr_t fgetxattr;
- fop_removexattr_t removexattr;
- fop_lk_t lk;
- fop_inodelk_t inodelk;
- fop_finodelk_t finodelk;
- fop_entrylk_t entrylk;
- fop_fentrylk_t fentrylk;
- fop_setdents_t setdents;
- fop_getdents_t getdents;
- fop_checksum_t checksum;
- fop_xattrop_t xattrop;
- fop_fxattrop_t fxattrop;
- fop_lock_notify_t lock_notify;
- fop_lock_fnotify_t lock_fnotify;
-
- /* these entries are used for a typechecking hack in STACK_WIND _only_ */
- fop_lookup_cbk_t lookup_cbk;
- fop_stat_cbk_t stat_cbk;
- fop_fstat_cbk_t fstat_cbk;
- fop_chmod_cbk_t chmod_cbk;
- fop_fchmod_cbk_t fchmod_cbk;
- fop_chown_cbk_t chown_cbk;
- fop_fchown_cbk_t fchown_cbk;
- fop_truncate_cbk_t truncate_cbk;
- fop_ftruncate_cbk_t ftruncate_cbk;
- fop_utimens_cbk_t utimens_cbk;
- fop_access_cbk_t access_cbk;
- fop_readlink_cbk_t readlink_cbk;
- fop_mknod_cbk_t mknod_cbk;
- fop_mkdir_cbk_t mkdir_cbk;
- fop_unlink_cbk_t unlink_cbk;
- fop_rmdir_cbk_t rmdir_cbk;
- fop_symlink_cbk_t symlink_cbk;
- fop_rename_cbk_t rename_cbk;
- fop_link_cbk_t link_cbk;
- fop_create_cbk_t create_cbk;
- fop_open_cbk_t open_cbk;
- fop_readv_cbk_t readv_cbk;
- fop_writev_cbk_t writev_cbk;
- fop_flush_cbk_t flush_cbk;
- fop_fsync_cbk_t fsync_cbk;
- fop_opendir_cbk_t opendir_cbk;
- fop_readdir_cbk_t readdir_cbk;
- fop_fsyncdir_cbk_t fsyncdir_cbk;
- fop_statfs_cbk_t statfs_cbk;
- fop_setxattr_cbk_t setxattr_cbk;
- fop_getxattr_cbk_t getxattr_cbk;
- fop_fsetxattr_cbk_t fsetxattr_cbk;
- fop_fgetxattr_cbk_t fgetxattr_cbk;
- fop_removexattr_cbk_t removexattr_cbk;
- fop_lk_cbk_t lk_cbk;
- fop_inodelk_cbk_t inodelk_cbk;
- fop_finodelk_cbk_t finodelk_cbk;
- fop_entrylk_cbk_t entrylk_cbk;
- fop_fentrylk_cbk_t fentrylk_cbk;
- fop_setdents_cbk_t setdents_cbk;
- fop_getdents_cbk_t getdents_cbk;
- fop_checksum_cbk_t checksum_cbk;
- fop_xattrop_cbk_t xattrop_cbk;
- fop_fxattrop_cbk_t fxattrop_cbk;
- fop_lock_notify_cbk_t lock_notify_cbk;
- fop_lock_fnotify_cbk_t lock_fnotify_cbk;
+ fop_lookup_t lookup;
+ fop_stat_t stat;
+ fop_fstat_t fstat;
+ fop_truncate_t truncate;
+ fop_ftruncate_t ftruncate;
+ fop_access_t access;
+ fop_readlink_t readlink;
+ fop_mknod_t mknod;
+ fop_mkdir_t mkdir;
+ fop_unlink_t unlink;
+ fop_rmdir_t rmdir;
+ fop_symlink_t symlink;
+ fop_rename_t rename;
+ fop_link_t link;
+ fop_create_t create;
+ fop_open_t open;
+ fop_readv_t readv;
+ fop_writev_t writev;
+ fop_flush_t flush;
+ fop_fsync_t fsync;
+ fop_opendir_t opendir;
+ fop_readdir_t readdir;
+ fop_readdirp_t readdirp;
+ fop_fsyncdir_t fsyncdir;
+ fop_statfs_t statfs;
+ fop_setxattr_t setxattr;
+ fop_getxattr_t getxattr;
+ fop_fsetxattr_t fsetxattr;
+ fop_fgetxattr_t fgetxattr;
+ fop_removexattr_t removexattr;
+ fop_fremovexattr_t fremovexattr;
+ fop_lk_t lk;
+ fop_inodelk_t inodelk;
+ fop_finodelk_t finodelk;
+ fop_entrylk_t entrylk;
+ fop_fentrylk_t fentrylk;
+ fop_rchecksum_t rchecksum;
+ fop_xattrop_t xattrop;
+ fop_fxattrop_t fxattrop;
+ fop_setattr_t setattr;
+ fop_fsetattr_t fsetattr;
+ fop_getspec_t getspec;
+ fop_fallocate_t fallocate;
+ fop_discard_t discard;
+ fop_zerofill_t zerofill;
+
+ /* these entries are used for a typechecking hack in STACK_WIND _only_ */
+ fop_lookup_cbk_t lookup_cbk;
+ fop_stat_cbk_t stat_cbk;
+ fop_fstat_cbk_t fstat_cbk;
+ fop_truncate_cbk_t truncate_cbk;
+ fop_ftruncate_cbk_t ftruncate_cbk;
+ fop_access_cbk_t access_cbk;
+ fop_readlink_cbk_t readlink_cbk;
+ fop_mknod_cbk_t mknod_cbk;
+ fop_mkdir_cbk_t mkdir_cbk;
+ fop_unlink_cbk_t unlink_cbk;
+ fop_rmdir_cbk_t rmdir_cbk;
+ fop_symlink_cbk_t symlink_cbk;
+ fop_rename_cbk_t rename_cbk;
+ fop_link_cbk_t link_cbk;
+ fop_create_cbk_t create_cbk;
+ fop_open_cbk_t open_cbk;
+ fop_readv_cbk_t readv_cbk;
+ fop_writev_cbk_t writev_cbk;
+ fop_flush_cbk_t flush_cbk;
+ fop_fsync_cbk_t fsync_cbk;
+ fop_opendir_cbk_t opendir_cbk;
+ fop_readdir_cbk_t readdir_cbk;
+ fop_readdirp_cbk_t readdirp_cbk;
+ fop_fsyncdir_cbk_t fsyncdir_cbk;
+ fop_statfs_cbk_t statfs_cbk;
+ fop_setxattr_cbk_t setxattr_cbk;
+ fop_getxattr_cbk_t getxattr_cbk;
+ fop_fsetxattr_cbk_t fsetxattr_cbk;
+ fop_fgetxattr_cbk_t fgetxattr_cbk;
+ fop_removexattr_cbk_t removexattr_cbk;
+ fop_fremovexattr_cbk_t fremovexattr_cbk;
+ fop_lk_cbk_t lk_cbk;
+ fop_inodelk_cbk_t inodelk_cbk;
+ fop_finodelk_cbk_t finodelk_cbk;
+ fop_entrylk_cbk_t entrylk_cbk;
+ fop_fentrylk_cbk_t fentrylk_cbk;
+ fop_rchecksum_cbk_t rchecksum_cbk;
+ fop_xattrop_cbk_t xattrop_cbk;
+ fop_fxattrop_cbk_t fxattrop_cbk;
+ fop_setattr_cbk_t setattr_cbk;
+ fop_fsetattr_cbk_t fsetattr_cbk;
+ fop_getspec_cbk_t getspec_cbk;
+ fop_fallocate_cbk_t fallocate_cbk;
+ fop_discard_cbk_t discard_cbk;
+ fop_zerofill_cbk_t zerofill_cbk;
};
typedef int32_t (*cbk_forget_t) (xlator_t *this,
- inode_t *inode);
+ inode_t *inode);
typedef int32_t (*cbk_release_t) (xlator_t *this,
- fd_t *fd);
+ fd_t *fd);
+
+typedef int32_t (*cbk_invalidate_t)(xlator_t *this, inode_t *inode);
+
+typedef int32_t (*cbk_client_t)(xlator_t *this, client_t *client);
struct xlator_cbks {
- cbk_forget_t forget;
- cbk_release_t release;
- cbk_release_t releasedir;
+ cbk_forget_t forget;
+ cbk_release_t release;
+ cbk_release_t releasedir;
+ cbk_invalidate_t invalidate;
+ cbk_client_t client_destroy;
+ cbk_client_t client_disconnect;
};
typedef int32_t (*dumpop_priv_t) (xlator_t *this);
@@ -814,94 +802,120 @@ typedef int32_t (*dumpop_fd_t) (xlator_t *this);
typedef int32_t (*dumpop_inodectx_t) (xlator_t *this, inode_t *ino);
-
+typedef int32_t (*dumpop_fdctx_t) (xlator_t *this, fd_t *fd);
+
+typedef int32_t (*dumpop_priv_to_dict_t) (xlator_t *this, dict_t *dict);
+
+typedef int32_t (*dumpop_inode_to_dict_t) (xlator_t *this, dict_t *dict);
+
+typedef int32_t (*dumpop_fd_to_dict_t) (xlator_t *this, dict_t *dict);
+
+typedef int32_t (*dumpop_inodectx_to_dict_t) (xlator_t *this, inode_t *ino,
+ dict_t *dict);
+
+typedef int32_t (*dumpop_fdctx_to_dict_t) (xlator_t *this, fd_t *fd,
+ dict_t *dict);
+
+typedef int32_t (*dumpop_eh_t) (xlator_t *this);
+
struct xlator_dumpops {
- dumpop_priv_t priv;
- dumpop_inode_t inode;
- dumpop_fd_t fd;
- dumpop_inodectx_t inodectx;
+ dumpop_priv_t priv;
+ dumpop_inode_t inode;
+ dumpop_fd_t fd;
+ dumpop_inodectx_t inodectx;
+ dumpop_fdctx_t fdctx;
+ dumpop_priv_to_dict_t priv_to_dict;
+ dumpop_inode_to_dict_t inode_to_dict;
+ dumpop_fd_to_dict_t fd_to_dict;
+ dumpop_inodectx_to_dict_t inodectx_to_dict;
+ dumpop_fdctx_to_dict_t fdctx_to_dict;
+ dumpop_eh_t history;
};
typedef struct xlator_list {
- xlator_t *xlator;
- struct xlator_list *next;
+ xlator_t *xlator;
+ struct xlator_list *next;
} xlator_list_t;
-/* Add possible new type of option you may need */
-typedef enum {
- GF_OPTION_TYPE_ANY = 0,
- GF_OPTION_TYPE_STR,
- GF_OPTION_TYPE_INT,
- GF_OPTION_TYPE_SIZET,
- GF_OPTION_TYPE_PERCENT,
- GF_OPTION_TYPE_PERCENT_OR_SIZET,
- GF_OPTION_TYPE_BOOL,
- GF_OPTION_TYPE_XLATOR,
- GF_OPTION_TYPE_PATH,
- GF_OPTION_TYPE_TIME,
- GF_OPTION_TYPE_DOUBLE,
- GF_OPTION_TYPE_INTERNET_ADDRESS,
-} volume_option_type_t;
-
-#define ZR_VOLUME_MAX_NUM_KEY 4
-#define ZR_OPTION_MAX_ARRAY_SIZE 64
-
-/* Each translator should define this structure */
-typedef struct volume_options {
- char *key[ZR_VOLUME_MAX_NUM_KEY];
- /* different key, same meaning */
- volume_option_type_t type;
- int64_t min; /* -1 means no range */
- int64_t max; /* -1 means no range */
- char *value[ZR_OPTION_MAX_ARRAY_SIZE];
- /* If specified, will check for one of
- the value from this array */
- char *description; /* about the key */
-} volume_option_t;
-
-typedef struct vol_opt_list {
- struct list_head list;
- volume_option_t *given_opt;
-} volume_opt_list_t;
struct _xlator {
- /* Built during parsing */
- char *name;
- char *type;
- xlator_t *next;
- xlator_t *prev;
- xlator_list_t *parents;
- xlator_list_t *children;
- dict_t *options;
-
- /* Set after doing dlopen() */
- struct xlator_fops *fops;
- struct xlator_mops *mops;
- struct xlator_cbks *cbks;
- struct xlator_dumpops *dumpops;
- struct list_head volume_options; /* list of volume_option_t */
-
- void (*fini) (xlator_t *this);
- int32_t (*init) (xlator_t *this);
+ /* Built during parsing */
+ char *name;
+ char *type;
+ xlator_t *next;
+ xlator_t *prev;
+ xlator_list_t *parents;
+ xlator_list_t *children;
+ dict_t *options;
+
+ /* Set after doing dlopen() */
+ void *dlhandle;
+ struct xlator_fops *fops;
+ struct xlator_cbks *cbks;
+ struct xlator_dumpops *dumpops;
+ struct list_head volume_options; /* list of volume_option_t */
+
+ void (*fini) (xlator_t *this);
+ int32_t (*init) (xlator_t *this);
+ int32_t (*reconfigure) (xlator_t *this, dict_t *options);
+ int32_t (*mem_acct_init) (xlator_t *this);
event_notify_fn_t notify;
- /* Misc */
- glusterfs_ctx_t *ctx;
- inode_table_t *itable;
- char ready;
- char init_succeeded;
- void *private;
+ gf_loglevel_t loglevel; /* Log level for translator */
+
+ /* for latency measurement */
+ fop_latency_t latencies[GF_FOP_MAXVALUE];
+
+ /* Misc */
+ eh_t *history; /* event history context */
+ glusterfs_ctx_t *ctx;
+ glusterfs_graph_t *graph; /* not set for fuse */
+ inode_table_t *itable;
+ char init_succeeded;
+ void *private;
+ struct mem_acct mem_acct;
+ uint64_t winds;
+ char switched;
+
+ /* for the memory pool of 'frame->local' */
+ struct mem_pool *local_pool;
+ gf_boolean_t is_autoloaded;
};
-int validate_xlator_volume_options (xlator_t *xl, volume_option_t *opt);
+typedef struct {
+ int32_t (*init) (xlator_t *this);
+ void (*fini) (xlator_t *this);
+ int32_t (*reconfigure) (xlator_t *this,
+ dict_t *options);
+ event_notify_fn_t notify;
+} class_methods_t;
+
+#define xlator_has_parent(xl) (xl->parents != NULL)
+
+#define XLATOR_NOTIFY(_xl, params ...) \
+ do { \
+ xlator_t *_old_THIS = NULL; \
+ \
+ _old_THIS = THIS; \
+ THIS = _xl; \
+ \
+ ret = _xl->notify (_xl, params);\
+ \
+ THIS = _old_THIS; \
+ } while (0);
+
+int32_t xlator_set_type_virtual (xlator_t *xl, const char *type);
int32_t xlator_set_type (xlator_t *xl, const char *type);
+int32_t xlator_dynload (xlator_t *xl);
+
xlator_t *file_to_xlator_tree (glusterfs_ctx_t *ctx,
- FILE *fp);
+ FILE *fp);
int xlator_notify (xlator_t *this, int32_t event, void *data, ...);
int xlator_init (xlator_t *this);
+int xlator_destroy (xlator_t *xl);
int32_t xlator_tree_init (xlator_t *xl);
int32_t xlator_tree_free (xlator_t *xl);
@@ -909,9 +923,14 @@ int32_t xlator_tree_free (xlator_t *xl);
void xlator_tree_fini (xlator_t *xl);
void xlator_foreach (xlator_t *this,
- void (*fn) (xlator_t *each,
- void *data),
- void *data);
+ void (*fn) (xlator_t *each,
+ void *data),
+ void *data);
+
+void xlator_foreach_depth_first (xlator_t *this,
+ void (*fn) (xlator_t *each,
+ void *data),
+ void *data);
xlator_t *xlator_search_by_name (xlator_t *any, const char *name);
@@ -920,14 +939,22 @@ void inode_destroy_notify (inode_t *inode, const char *xlname);
int loc_copy (loc_t *dst, loc_t *src);
#define loc_dup(src, dst) loc_copy(dst, src)
void loc_wipe (loc_t *loc);
-
-#define GF_STAT_PRINT_FMT_STR "%"PRIx64",%"PRIx64",%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx64",%"PRIx64",%"PRIx32",%"PRIx64",%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32",%"PRIx32"\n"
-
-#define GF_STAT_SCAN_FMT_STR "%"SCNx64",%"SCNx64",%"SCNx32",%"SCNx32",%"SCNx32",%"SCNx32",%"SCNx64",%"SCNx64",%"SCNx32",%"SCNx64",%"SCNx32",%"SCNx32",%"SCNx32",%"SCNx32",%"SCNx32",%"SCNx32"\n"
-
-#define GF_STATFS_PRINT_FMT_STR "%"PRIx32",%"PRIx32",%"PRIx64",%"PRIx64",%"PRIx64",%"PRIx64",%"PRIx64",%"PRIx64",%"PRIx32",%"PRIx32",%"PRIx32"\n"
-
-#define GF_STATFS_SCAN_FMT_STR "%"SCNx32",%"SCNx32",%"SCNx64",%"SCNx64",%"SCNx64",%"SCNx64",%"SCNx64",%"SCNx64",%"SCNx32",%"SCNx32",%"SCNx32"\n"
-
+int loc_path (loc_t *loc, const char *bname);
+void loc_gfid (loc_t *loc, uuid_t gfid);
+char* loc_gfid_utoa (loc_t *loc);
+gf_boolean_t loc_is_root (loc_t *loc);
+int xlator_mem_acct_init (xlator_t *xl, int num_types);
+int is_gf_log_command (xlator_t *trans, const char *name, char *value);
+int glusterd_check_log_level (const char *value);
+int xlator_volopt_dynload (char *xlator_type, void **dl_handle,
+ volume_opt_list_t *vol_opt_handle);
+enum gf_hdsk_event_notify_op {
+ GF_EN_DEFRAG_STATUS,
+ GF_EN_MAX,
+};
+gf_boolean_t
+is_graph_topology_equal (glusterfs_graph_t *graph1, glusterfs_graph_t *graph2);
+int
+glusterfs_volfile_reconfigure (int oldvollen, FILE *newvolfile_fp,
+ glusterfs_ctx_t *ctx, const char *oldvolfile);
#endif /* _XLATOR_H */
-
diff --git a/libglusterfsclient/src/Makefile.am b/libglusterfsclient/src/Makefile.am
deleted file mode 100644
index 531f1fbb9..000000000
--- a/libglusterfsclient/src/Makefile.am
+++ /dev/null
@@ -1,16 +0,0 @@
-lib_LTLIBRARIES = libglusterfsclient.la
-noinst_HEADERS = libglusterfsclient-internals.h
-libglusterfsclient_HEADERS = libglusterfsclient.h
-libglusterfsclientdir = $(includedir)
-
-libglusterfsclient_la_SOURCES = libglusterfsclient.c libglusterfsclient-dentry.c
-libglusterfsclient_la_CFLAGS = -fPIC -Wall -pthread
-libglusterfsclient_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-libglusterfsclient_la_CPPFLAGS = -D_FILE_OFFSET_BITS=64 -D$(GF_HOST_OS) -D__USE_FILE_OFFSET64 -D_GNU_SOURCE -I$(top_srcdir)/libglusterfs/src -DDATADIR=\"$(localstatedir)\" -DCONFDIR=\"$(sysconfdir)/glusterfs\" $(GF_CFLAGS)
-libglusterfsclient_la_LDFLAGS = -shared -nostartfiles
-
-CLEANFILES =
-
-$(top_builddir)/libglusterfs/src/libglusterfs.la:
- $(MAKE) -C $(top_builddir)/libglusterfs/src/ all
-
diff --git a/libglusterfsclient/src/libglusterfsclient-dentry.c b/libglusterfsclient/src/libglusterfsclient-dentry.c
deleted file mode 100644
index 1c8910159..000000000
--- a/libglusterfsclient/src/libglusterfsclient-dentry.c
+++ /dev/null
@@ -1,389 +0,0 @@
-/*
- Copyright (c) 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include "libglusterfsclient.h"
-#include "libglusterfsclient-internals.h"
-#include <libgen.h>
-
-#define LIBGLUSTERFS_CLIENT_DENTRY_LOC_PREPARE(_new_loc, _loc, _parent, \
- _resolved) do { \
- size_t pathlen = 0; \
- size_t resolvedlen = 0; \
- char *path = NULL; \
- int pad = 0; \
- pathlen = strlen (_loc->path) + 1; \
- path = CALLOC (1, pathlen); \
- _new_loc.parent = _parent; \
- resolvedlen = strlen (_resolved); \
- strncpy (path, _resolved, resolvedlen); \
- if (resolvedlen == 1) /* only root resolved */ \
- pad = 0; \
- else { \
- pad = 1; \
- path[resolvedlen] = '/'; \
- } \
- strcpy_till (path + resolvedlen + pad, \
- loc->path + resolvedlen + pad, '/'); \
- _new_loc.path = path; \
- _new_loc.name = strrchr (path, '/'); \
- if (_new_loc.name) \
- _new_loc.name++; \
- }while (0);
-
-
-/* strcpy_till - copy @dname to @dest, until 'delim' is encountered in @dest
- * @dest - destination string
- * @dname - source string
- * @delim - delimiter character
- *
- * return - NULL is returned if '0' is encountered in @dname, otherwise returns
- * a pointer to remaining string begining in @dest.
- */
-static char *
-strcpy_till (char *dest, const char *dname, char delim)
-{
- char *src = NULL;
- int idx = 0;
- char *ret = NULL;
-
- src = (char *)dname;
- while (src[idx] && (src[idx] != delim)) {
- dest[idx] = src[idx];
- idx++;
- }
-
- dest[idx] = 0;
-
- if (src[idx] == 0)
- ret = NULL;
- else
- ret = &(src[idx]);
-
- return ret;
-}
-
-/* __libgf_client_path_to_parenti - derive parent inode for @path. if immediate
- * parent is not available in the dentry cache, return nearest
- * available parent inode and set @reslv to the path of
- * the returned directory.
- *
- * @itable - inode table
- * @path - path whose parent has to be looked up.
- * @reslv - if immediate parent is not available, reslv will be set to path of the
- * resolved parent.
- *
- * return - should never return NULL. should at least return '/' inode.
- */
-static inode_t *
-__libgf_client_path_to_parenti (libglusterfs_client_ctx_t *ctx,
- inode_table_t *itable, const char *path,
- char **reslv)
-{
- char *resolved_till = NULL;
- char *strtokptr = NULL;
- char *component = NULL;
- char *next_component = NULL;
- char *pathdup = NULL;
- inode_t *curr = NULL;
- inode_t *parent = NULL;
- size_t pathlen = 0;
-
- pathlen = STRLEN_0 (path);
- resolved_till = CALLOC (1, pathlen);
-
- GF_VALIDATE_OR_GOTO("libglusterfsclient-dentry", resolved_till, out);
- pathdup = strdup (path);
- GF_VALIDATE_OR_GOTO("libglusterfsclient-dentry", pathdup, out);
-
- parent = inode_ref (itable->root);
- curr = NULL;
-
- component = strtok_r (pathdup, "/", &strtokptr);
-
- while (component) {
- curr = inode_search (itable, parent->ino, component);
- if (!curr) {
- break;
- }
- if (!libgf_is_iattr_cache_valid (ctx, curr, NULL,
- LIBGF_VALIDATE_LOOKUP))
- break;
-
- /* It is OK to append the component even if it is the
- last component in the path, because, if 'next_component'
- returns NULL, @parent will remain the same and
- @resolved_till will not be sent back
- */
- strcat (resolved_till, "/");
- strcat (resolved_till, component);
-
- next_component = strtok_r (NULL, "/", &strtokptr);
-
- if (next_component) {
- inode_unref (parent);
- parent = curr;
- curr = NULL;
- } else {
- /* will break */
- inode_unref (curr);
- }
-
- component = next_component;
- }
-
- if (resolved_till[0] == '\0') {
- strcat (resolved_till, "/");
- }
-
- free (pathdup);
-
- if (reslv) {
- *reslv = resolved_till;
- } else {
- FREE (resolved_till);
- }
-
-out:
- return parent;
-}
-
-static inline void
-libgf_client_update_resolved (const char *path, char *resolved)
-{
- int32_t pathlen = 0;
- char *tmp = NULL, *dest = NULL, *dname = NULL;
- char append_slash = 0;
-
- pathlen = strlen (resolved);
- tmp = (char *)(resolved + pathlen);
- if (*((char *) (resolved + pathlen - 1)) != '/') {
- tmp[0] = '/';
- append_slash = 1;
- }
-
- if (append_slash) {
- dest = tmp + 1;
- } else {
- dest = tmp;
- }
-
- if (*((char *) path + pathlen) == '/') {
- dname = (char *) path + pathlen + 1;
- } else {
- dname = (char *) path + pathlen;
- }
-
- strcpy_till (dest, dname, '/');
-}
-
-/* __do_path_resolve - resolve @loc->path into @loc->inode and @loc->parent. also
- * update the dentry cache
- *
- * @loc - loc to resolve.
- * @ctx - libglusterfsclient context
- * @lookup_basename - flag whether to lookup basename(loc->path)
- *
- * return - 0 on success
- * -1 on failure
- *
- */
-static int32_t
-__do_path_resolve (loc_t *loc, libglusterfs_client_ctx_t *ctx,
- char lookup_basename)
-{
- int32_t op_ret = -1;
- char *resolved = NULL;
- inode_t *parent = NULL, *inode = NULL;
- dentry_t *dentry = NULL;
- loc_t new_loc = {0, };
- char *pathname = NULL, *directory = NULL;
- char *file = NULL;
-
- parent = loc->parent;
- if (parent) {
- inode_ref (parent);
- gf_log ("libglusterfsclient-dentry", GF_LOG_DEBUG,
- "loc->parent(%"PRId64") already present. sending "
- "lookup for %"PRId64"/%s", parent->ino, parent->ino,
- loc->path);
- resolved = strdup (loc->path);
- resolved = dirname (resolved);
- } else {
- parent = __libgf_client_path_to_parenti (ctx, ctx->itable,
- loc->path, &resolved);
- }
-
- if (parent == NULL) {
- /* fire in the bush.. run! run!! run!!! */
- gf_log ("libglusterfsclient-dentry",
- GF_LOG_CRITICAL,
- "failed to get parent inode number");
- op_ret = -1;
- goto out;
- }
-
- gf_log ("libglusterfsclient-dentry",
- GF_LOG_DEBUG,
- "resolved path(%s) till %"PRId64"(%s). "
- "sending lookup for remaining path",
- loc->path, parent->ino, resolved);
-
- pathname = strdup (loc->path);
- directory = dirname (pathname);
- pathname = NULL;
-
- while (strcmp (resolved, directory) != 0)
- {
- dentry = NULL;
-
- LIBGLUSTERFS_CLIENT_DENTRY_LOC_PREPARE (new_loc, loc, parent,
- resolved);
-
- if (pathname) {
- free (pathname);
- pathname = NULL;
- }
-
- pathname = strdup (new_loc.path);
- file = basename (pathname);
-
- new_loc.inode = inode_search (ctx->itable, parent->ino, file);
- if (new_loc.inode) {
- if (libgf_is_iattr_cache_valid (ctx, new_loc.inode,
- NULL,
- LIBGF_VALIDATE_LOOKUP))
- dentry = dentry_search_for_inode (new_loc.inode,
- parent->ino,
- file);
- }
-
- if (dentry == NULL) {
- op_ret = libgf_client_lookup (ctx, &new_loc, NULL, NULL,
- 0);
- if (op_ret == -1) {
- inode_ref (new_loc.parent);
- libgf_client_loc_wipe (&new_loc);
- goto out;
- }
- }
-
- parent = inode_ref (new_loc.inode);
- libgf_client_loc_wipe (&new_loc);
-
- libgf_client_update_resolved (loc->path, resolved);
- }
-
- if (pathname) {
- free (pathname);
- pathname = NULL;
- }
-
- if (lookup_basename) {
- pathname = strdup (loc->path);
- file = basename (pathname);
-
- inode = inode_search (ctx->itable, parent->ino, file);
- if (!inode) {
- libgf_client_loc_fill (&new_loc, ctx, 0, parent->ino,
- file);
-
- op_ret = libgf_client_lookup (ctx, &new_loc, NULL, NULL,
- 0);
- if (op_ret == -1) {
- libgf_client_loc_wipe (&new_loc);
- goto out;
- }
-
- inode = inode_ref (new_loc.inode);
- libgf_client_loc_wipe (&new_loc);
- }
- }
-
-out:
- loc->inode = inode;
- loc->parent = parent;
-
- FREE (resolved);
- if (pathname) {
- FREE (pathname);
- }
-
- if (directory) {
- FREE (directory);
- }
-
- return op_ret;
-}
-
-
-/* resolves loc->path to loc->parent and loc->inode */
-int32_t
-libgf_client_path_lookup (loc_t *loc,
- libglusterfs_client_ctx_t *ctx,
- char lookup_basename)
-{
- char *pathname = NULL;
- char *directory = NULL;
- inode_t *inode = NULL;
- inode_t *parent = NULL;
- int32_t op_ret = 0;
-
- pathname = strdup (loc->path);
- directory = dirname (pathname);
- parent = inode_from_path (ctx->itable, directory);
-
- if (parent != NULL) {
- loc->parent = parent;
-
- if (!lookup_basename) {
- gf_log ("libglusterfsclient",
- GF_LOG_DEBUG,
- "resolved dirname(%s) to %"PRId64,
- loc->path, parent->ino);
- goto out;
- } else {
- inode = inode_from_path (ctx->itable, loc->path);
- if (inode != NULL) {
- gf_log ("libglusterfsclient",
- GF_LOG_DEBUG,
- "resolved path(%s) to %"PRId64"/%"PRId64,
- loc->path, parent->ino, inode->ino);
- loc->inode = inode;
- goto out;
- }
- }
- }
-
- if (parent) {
- inode_unref (parent);
- } else if (inode) {
- inode_unref (inode);
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "undesired behaviour. inode(%"PRId64") for %s "
- "exists without parent (%s)",
- inode->ino, loc->path, directory);
- }
- op_ret = __do_path_resolve (loc, ctx, lookup_basename);
-out:
- if (pathname)
- free (pathname);
-
- return op_ret;
-}
diff --git a/libglusterfsclient/src/libglusterfsclient-internals.h b/libglusterfsclient/src/libglusterfsclient-internals.h
deleted file mode 100755
index 32d4156a9..000000000
--- a/libglusterfsclient/src/libglusterfsclient-internals.h
+++ /dev/null
@@ -1,298 +0,0 @@
-/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef __LIBGLUSTERFSCLIENT_INTERNALS_H
-#define __LIBGLUSTERFSCLIENT_INTERNALS_H
-
-#include <glusterfs.h>
-#include <logging.h>
-#include <inode.h>
-#include <pthread.h>
-#include <stack.h>
-#include <list.h>
-#include <signal.h>
-#include <call-stub.h>
-#include <sys/time.h>
-#include <sys/resource.h>
-#include <fd.h>
-#include <dirent.h>
-
-#define LIBGF_IOBUF_SIZE (128 *GF_UNIT_KB)
-typedef void (*sighandler_t) (int);
-typedef struct list_head list_head_t;
-
-typedef struct libglusterfs_client_ctx {
- glusterfs_ctx_t gf_ctx;
- inode_table_t *itable;
- pthread_t reply_thread;
- call_pool_t pool;
- uint32_t counter;
- time_t lookup_timeout;
- time_t stat_timeout;
- /* We generate a fake fsid for the subvolume being
- * accessed through this context.
- */
- dev_t fake_fsid;
- pid_t pid;
-}libglusterfs_client_ctx_t;
-
-typedef struct signal_handler {
- int signo;
- sighandler_t handler;
- list_head_t next;
-}libgf_client_signal_handler_t ;
-
-typedef struct {
- pthread_mutex_t lock;
- pthread_cond_t reply_cond;
- call_stub_t *reply_stub;
- char complete;
- union {
- struct {
- char is_revalidate;
- loc_t *loc;
- int32_t size;
- } lookup;
- }fop;
- fd_t *fd; /* Needed here because we need a ref to the dir
- fd in the libgf_client_readdir_cbk in order
- to process the dirents received, without
- having them added to the reply stub.
- Also used in updating iattr cache. See
- readv_cbk for eg.
- */
-}libgf_client_local_t;
-
-typedef struct {
- pthread_cond_t init_con_established;
- pthread_mutex_t lock;
- char complete;
-}libglusterfs_client_private_t;
-
-typedef struct {
- pthread_mutex_t lock;
- uint32_t previous_lookup_time;
- uint32_t previous_stat_time;
- struct stat stbuf;
-} libglusterfs_client_inode_ctx_t;
-
-/* Our dirent cache is very simplistic when it comes to directory
- * reading workloads. It assumes that all directory traversal operations happen
- * sequentially and that readdir callers dont go jumping around the directory
- * using seekdir, rewinddir. Thats why you'll notice that seekdir, rewinddir
- * API in libglusterfsclient only set the offset. The consequence is that when
- * libgf_dcache_readdir finds that the offset presented to it, is not
- * the same as the offset of the previous dirent returned by dcache (..stored
- * in struct direntcache->prev_off..), it realises that a non-sequential
- * directory read is in progress and returns 0 to signify that the cache is
- * not valid.
- * This could be made a bit more intelligent by using a data structure like
- * a hash-table or a balanced binary tree that allows us to search for the
- * existence of particular offsets in the cache without performing a list or
- * array traversal.
- * Dont use a simple binary search tree because
- * there is no guarantee that offsets in a sequential reading of the directory
- * will be just random integers. If for some reason they are sequential, a BST
- * will end up becoming a list.
- */
-struct direntcache {
- gf_dirent_t entries; /* Head of list of cached dirents. */
- gf_dirent_t *next; /* Pointer to the next entry that
- * should be sent by readdir */
- uint64_t prev_off; /* Offset where the next read will
- * happen.
- */
-};
-
-typedef struct {
- pthread_mutex_t lock;
- off_t offset;
- libglusterfs_client_ctx_t *ctx;
- /* `man readdir` says readdir is non-re-entrant
- * only if two readdirs are racing on the same
- * handle.
- */
- struct dirent dirp;
- struct direntcache *dcache;
-
-} libglusterfs_client_fd_ctx_t;
-
-typedef struct libglusterfs_client_async_local {
- void *cbk_data;
- union {
- struct {
- fd_t *fd;
- glusterfs_readv_cbk_t cbk;
- char update_offset;
- }readv_cbk;
-
- struct {
- fd_t *fd;
- glusterfs_write_cbk_t cbk;
- }write_cbk;
-
- struct {
- fd_t *fd;
- }close_cbk;
-
- struct {
- void *buf;
- size_t size;
- loc_t *loc;
- char is_revalidate;
- glusterfs_get_cbk_t cbk;
- }lookup_cbk;
- }fop;
-}libglusterfs_client_async_local_t;
-
-#define LIBGF_STACK_WIND_AND_WAIT(frame, rfn, obj, fn, params ...) \
- do { \
- STACK_WIND (frame, rfn, obj, fn, params); \
- pthread_mutex_lock (&local->lock); \
- { \
- while (!local->complete) { \
- pthread_cond_wait (&local->reply_cond, \
- &local->lock); \
- } \
- } \
- pthread_mutex_unlock (&local->lock); \
- } while (0)
-
-
-#define LIBGF_CLIENT_SIGNAL(signal_handler_list, signo, handler) \
- do { \
- libgf_client_signal_handler_t *libgf_handler = CALLOC (1, \
- sizeof (*libgf_handler)); \
- ERR_ABORT (libgf_handler); \
- libgf_handler->signo = signo; \
- libgf_handler->handler = signal (signo, handler); \
- list_add (&libgf_handler->next, signal_handler_list); \
- } while (0)
-
-#define LIBGF_INSTALL_SIGNAL_HANDLERS(signal_handlers) \
- do { \
- INIT_LIST_HEAD (&signal_handlers); \
- /* Handle SIGABORT and SIGSEGV */ \
- LIBGF_CLIENT_SIGNAL (&signal_handlers, SIGSEGV, gf_print_trace); \
- LIBGF_CLIENT_SIGNAL (&signal_handlers, SIGABRT, gf_print_trace); \
- LIBGF_CLIENT_SIGNAL (&signal_handlers, SIGHUP, gf_log_logrotate); \
- /* LIBGF_CLIENT_SIGNAL (SIGTERM, glusterfs_cleanup_and_exit); */ \
- } while (0)
-
-#define LIBGF_RESTORE_SIGNAL_HANDLERS(local) \
- do { \
- libgf_client_signal_handler_t *ptr = NULL, *tmp = NULL; \
- list_for_each_entry_safe (ptr, tmp, &local->signal_handlers,\
- next) { \
- signal (ptr->signo, ptr->handler); \
- FREE (ptr); \
- } \
- } while (0)
-
-#define LIBGF_CLIENT_FOP_ASYNC(ctx, local, ret_fn, op, args ...) \
- do { \
- call_frame_t *frame = get_call_frame_for_req (ctx, 1); \
- xlator_t *xl = frame->this->children ? \
- frame->this->children->xlator : NULL; \
- frame->root->state = ctx; \
- frame->local = local; \
- STACK_WIND (frame, ret_fn, xl, xl->fops->op, args); \
- } while (0)
-
-#define LIBGF_CLIENT_FOP(ctx, stub, op, local, args ...) \
- do { \
- call_frame_t *frame = get_call_frame_for_req (ctx, 1); \
- xlator_t *xl = frame->this->children ? \
- frame->this->children->xlator : NULL; \
- if (!local) { \
- local = CALLOC (1, sizeof (*local)); \
- } \
- ERR_ABORT (local); \
- frame->local = local; \
- frame->root->state = ctx; \
- pthread_cond_init (&local->reply_cond, NULL); \
- pthread_mutex_init (&local->lock, NULL); \
- LIBGF_STACK_WIND_AND_WAIT (frame, libgf_client_##op##_cbk, xl, \
- xl->fops->op, args); \
- stub = local->reply_stub; \
- FREE (frame->local); \
- frame->local = NULL; \
- STACK_DESTROY (frame->root); \
- } while (0)
-
-#define LIBGF_REPLY_NOTIFY(local) \
- do { \
- pthread_mutex_lock (&local->lock); \
- { \
- local->complete = 1; \
- pthread_cond_broadcast (&local->reply_cond); \
- } \
- pthread_mutex_unlock (&local->lock); \
- } while (0)
-
-
-void
-libgf_client_loc_wipe (loc_t *loc);
-
-int32_t
-libgf_client_loc_fill (loc_t *loc,
- libglusterfs_client_ctx_t *ctx,
- ino_t ino,
- ino_t par,
- const char *name);
-
-int32_t
-libgf_client_path_lookup (loc_t *loc,
- libglusterfs_client_ctx_t *ctx,
- char lookup_basename);
-
-int32_t
-libgf_client_lookup (libglusterfs_client_ctx_t *ctx,
- loc_t *loc,
- struct stat *stbuf,
- dict_t **dict,
- dict_t *xattr_req);
-
-/* We're not expecting more than 10-15
- * VMPs per process so a list is acceptable.
- */
-struct vmp_entry {
- struct list_head list;
- char * vmp;
- int vmplen;
- glusterfs_handle_t handle;
-};
-
-#define LIBGF_UPDATE_LOOKUP 0x1
-#define LIBGF_UPDATE_STAT 0x2
-#define LIBGF_UPDATE_ALL (LIBGF_UPDATE_LOOKUP | LIBGF_UPDATE_STAT)
-
-#define LIBGF_VALIDATE_LOOKUP 0x1
-#define LIBGF_VALIDATE_STAT 0x2
-
-#define LIBGF_INVALIDATE_LOOKUP 0x1
-#define LIBGF_INVALIDATE_STAT 0x2
-int
-libgf_is_iattr_cache_valid (libglusterfs_client_ctx_t *ctx, inode_t *inode,
- struct stat *sbuf, int flags);
-
-int
-libgf_update_iattr_cache (inode_t *inode, int flags, struct stat *buf);
-
-#endif
diff --git a/libglusterfsclient/src/libglusterfsclient.c b/libglusterfsclient/src/libglusterfsclient.c
deleted file mode 100755
index d426bd3cf..000000000
--- a/libglusterfsclient/src/libglusterfsclient.c
+++ /dev/null
@@ -1,7677 +0,0 @@
-/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _GNU_SOURCE
-#define _GNU_SOURCE
-#endif
-
-#include <stdio.h>
-#include <errno.h>
-#include <libgen.h>
-#include <stddef.h>
-
-#include <sys/time.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#ifdef GF_SOLARIS_HOST_OS
-#include <sys/statfs.h>
-#endif
-#include <unistd.h>
-#include <xlator.h>
-#include <timer.h>
-#include "defaults.h"
-#include <time.h>
-#include <poll.h>
-#include "transport.h"
-#include "event.h"
-#include "libglusterfsclient.h"
-#include "libglusterfsclient-internals.h"
-#include "compat.h"
-#include "compat-errno.h"
-#include <sys/vfs.h>
-#include <utime.h>
-#include <sys/param.h>
-#include <list.h>
-#include <stdarg.h>
-#include <sys/statvfs.h>
-#include "hashfn.h"
-#include <sys/select.h>
-
-#define LIBGF_XL_NAME "libglusterfsclient"
-#define LIBGLUSTERFS_INODE_TABLE_LRU_LIMIT 1000 //14057
-#define LIBGF_SENDFILE_BLOCK_SIZE 4096
-#define LIBGF_READDIR_BLOCK 4096
-
-static inline xlator_t *
-libglusterfs_graph (xlator_t *graph);
-int32_t libgf_client_readlink (libglusterfs_client_ctx_t *ctx, loc_t *loc,
- char *buf, size_t bufsize);
-
-int
-libgf_realpath_loc_fill (libglusterfs_client_ctx_t *ctx, char *link,
- loc_t *targetloc);
-static int first_init = 1;
-static int first_fini = 1;
-
-/* The global list of virtual mount points */
-struct {
- struct list_head list;
- int entries;
-}vmplist;
-
-
-/* Protects the VMP list above. */
-pthread_mutex_t vmplock = PTHREAD_MUTEX_INITIALIZER;
-
-/* Ensures only one thread is ever calling glusterfs_mount.
- * Since that function internally calls routines which
- * use the yacc parser code using global vars, this process
- * needs to be syncronised.
- */
-pthread_mutex_t mountlock = PTHREAD_MUTEX_INITIALIZER;
-
-char *
-libgf_vmp_virtual_path(struct vmp_entry *entry, const char *path)
-{
- char *vpath = NULL;
- char *tmp = NULL;
-
- vpath = calloc (strlen (path) + 1, sizeof (char));
- tmp = ((char *)(path + (entry->vmplen-1)));
- if (tmp[0] != '/') {
- vpath[0] = '/';
- strcat (&vpath[1], tmp);
- } else
- strcpy (vpath, tmp);
-
- return vpath;
-}
-
-char *
-zr_build_process_uuid ()
-{
- char tmp_str[1024] = {0,};
- char hostname[256] = {0,};
- struct timeval tv = {0,};
- struct tm now = {0, };
- char now_str[32];
-
- if (-1 == gettimeofday(&tv, NULL)) {
- gf_log ("", GF_LOG_ERROR,
- "gettimeofday: failed %s",
- strerror (errno));
- }
-
- if (-1 == gethostname (hostname, 256)) {
- gf_log ("", GF_LOG_ERROR,
- "gethostname: failed %s",
- strerror (errno));
- }
-
- localtime_r (&tv.tv_sec, &now);
- strftime (now_str, 32, "%Y/%m/%d-%H:%M:%S", &now);
- snprintf (tmp_str, 1024, "%s-%d-%s:%ld",
- hostname, getpid(), now_str, tv.tv_usec);
-
- return strdup (tmp_str);
-}
-
-
-int32_t
-libgf_client_forget (xlator_t *this,
- inode_t *inode)
-{
- uint64_t ptr = 0;
- libglusterfs_client_inode_ctx_t *ctx = NULL;
-
- inode_ctx_del (inode, this, &ptr);
- ctx = (libglusterfs_client_inode_ctx_t *)(long) ptr;
-
- FREE (ctx);
-
- return 0;
-}
-
-xlator_t *
-libgf_inode_to_xlator (inode_t *inode)
-{
- if (!inode)
- return NULL;
-
- if (!inode->table)
- return NULL;
-
- if (!inode->table->xl)
- return NULL;
-
- if (!inode->table->xl->ctx)
- return NULL;
-
- return inode->table->xl->ctx->top;
-}
-
-libglusterfs_client_fd_ctx_t *
-libgf_get_fd_ctx (fd_t *fd)
-{
- uint64_t ctxaddr = 0;
- libglusterfs_client_fd_ctx_t *ctx = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, fd, out);
-
- if (fd_ctx_get (fd, libgf_inode_to_xlator (fd->inode), &ctxaddr) == -1)
- goto out;
-
- ctx = (libglusterfs_client_fd_ctx_t *)(long)ctxaddr;
-
-out:
- return ctx;
-}
-
-libglusterfs_client_fd_ctx_t *
-libgf_alloc_fd_ctx (libglusterfs_client_ctx_t *ctx, fd_t *fd)
-{
- libglusterfs_client_fd_ctx_t *fdctx = NULL;
- uint64_t ctxaddr = 0;
-
- fdctx = CALLOC (1, sizeof (*fdctx));
- if (fdctx == NULL) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR,
- "memory allocation failure");
- fdctx = NULL;
- goto out;
- }
-
- pthread_mutex_init (&fdctx->lock, NULL);
- fdctx->ctx = ctx;
- ctxaddr = (uint64_t) (long)fdctx;
-
- if (fd->inode) {
- if (S_ISDIR (fd->inode->st_mode)) {
- fdctx->dcache = CALLOC (1, sizeof (struct direntcache));
- if (fdctx->dcache)
- INIT_LIST_HEAD (&fdctx->dcache->entries.list);
- /* If the calloc fails, we can still continue
- * working as the dcache is not required for correct
- * operation.
- */
- }
- }
- fd_ctx_set (fd, libgf_inode_to_xlator (fd->inode), ctxaddr);
-out:
- return fdctx;
-}
-
-libglusterfs_client_fd_ctx_t *
-libgf_del_fd_ctx (fd_t *fd)
-{
- uint64_t ctxaddr = 0;
- libglusterfs_client_fd_ctx_t *ctx = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, fd, out);
-
- if (fd_ctx_del (fd, libgf_inode_to_xlator (fd->inode) , &ctxaddr) == -1)
- goto out;
-
- ctx = (libglusterfs_client_fd_ctx_t *)(long)ctxaddr;
-
-out:
- return ctx;
-}
-
-void
-libgf_dcache_invalidate (fd_t *fd)
-{
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
-
- if (!fd)
- return;
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- errno = EBADF;
- return;
- }
-
- if (!fd_ctx->dcache) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No dcache present");
- return;
- }
-
- if (!list_empty (&fd_ctx->dcache->entries.list))
- gf_dirent_free (&fd_ctx->dcache->entries);
-
- INIT_LIST_HEAD (&fd_ctx->dcache->entries.list);
-
- fd_ctx->dcache->next = NULL;
- fd_ctx->dcache->prev_off = 0;
-
- return;
-}
-
-/* The first entry in the entries is always a placeholder
- * or the list head. The real entries begin from entries->next.
- */
-int
-libgf_dcache_update (libglusterfs_client_ctx_t *ctx, fd_t *fd,
- gf_dirent_t *entries)
-{
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- int op_ret = -1;
-
- if ((!ctx) || (!fd) || (!entries)) {
- errno = EINVAL;
- goto out;
- }
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- errno = EBADF;
- goto out;
- }
-
- /* dcache is not enabled. */
- if (!fd_ctx->dcache) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No dcache present");
- op_ret = 0;
- goto out;
- }
-
- /* If we're updating, we must begin with invalidating any previous
- * entries.
- */
- libgf_dcache_invalidate (fd);
-
- fd_ctx->dcache->next = entries->next;
- /* We still need to store a pointer to the head
- * so we start free'ing from the head when invalidation
- * is required.
- *
- * Need to delink the entries from the list
- * given to us by an underlying translators. Most translators will
- * free this list after this call so we must preserve the dirents in
- * order to cache them.
- */
- list_splice_init (&entries->list, &fd_ctx->dcache->entries.list);
- op_ret = 0;
-out:
- return op_ret;
-}
-
-int
-libgf_dcache_readdir (libglusterfs_client_ctx_t *ctx, fd_t *fd,
- struct dirent *dirp, off_t *offset)
-{
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- int cachevalid = 0;
-
- if ((!ctx) || (!fd) || (!dirp) || (!offset))
- return 0;
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- errno = EBADF;
- goto out;
- }
-
- if (!fd_ctx->dcache) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No dcache present");
- goto out;
- }
-
- /* We've either run out of entries in the cache
- * or the cache is empty.
- */
- if (!fd_ctx->dcache->next) {
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "No entries present");
- goto out;
- }
-
- /* The dirent list is created as a circular linked list
- * so this check is needed to ensure, we dont start
- * reading old entries again.
- * If we're reached this situation, the cache is exhausted
- * and we'll need to pre-fetch more entries to continue serving.
- */
- if (fd_ctx->dcache->next == &fd_ctx->dcache->entries) {
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Entries exhausted");
- goto out;
- }
-
- /* During sequential reading we generally expect that the offset
- * requested is the same as the offset we served in the previous call
- * to readdir. But, seekdir, rewinddir and libgf_dcache_invalidate
- * require special handling because seekdir/rewinddir change the offset
- * in the fd_ctx and libgf_dcache_invalidate changes the prev_off.
- */
- if (*offset != fd_ctx->dcache->prev_off) {
- /* For all cases of the if branch above, we know that the
- * cache is now invalid except for the case below. It handles
- * the case where the two offset values above are different
- * but different because the previous readdir block was
- * exhausted, resulting in a prev_off being set to 0 in
- * libgf_dcache_invalidate, while the requested offset is non
- * zero because that is what we returned for the last dirent
- * of the previous readdir block.
- */
- if ((*offset != 0) && (fd_ctx->dcache->prev_off == 0)) {
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Entries"
- " exhausted");
- cachevalid = 1;
- } else
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Dcache"
- " invalidated previously");
- } else
- cachevalid = 1;
-
- if (!cachevalid)
- goto out;
-
- dirp->d_ino = fd_ctx->dcache->next->d_ino;
- strncpy (dirp->d_name, fd_ctx->dcache->next->d_name,
- fd_ctx->dcache->next->d_len);
-
- *offset = fd_ctx->dcache->next->d_off;
- dirp->d_off = *offset;
- fd_ctx->dcache->prev_off = fd_ctx->dcache->next->d_off;
- fd_ctx->dcache->next = fd_ctx->dcache->next->next;
-
-out:
- return cachevalid;
-}
-
-
-int32_t
-libgf_client_release (xlator_t *this,
- fd_t *fd)
-{
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- fd_ctx = libgf_get_fd_ctx (fd);
- if (S_ISDIR (fd->inode->st_mode)) {
- libgf_dcache_invalidate (fd);
- FREE (fd_ctx->dcache);
- }
-
- libgf_del_fd_ctx (fd);
- if (fd_ctx != NULL) {
- pthread_mutex_destroy (&fd_ctx->lock);
- FREE (fd_ctx);
- }
-
- return 0;
-}
-
-libglusterfs_client_inode_ctx_t *
-libgf_get_inode_ctx (inode_t *inode)
-{
- uint64_t ctxaddr = 0;
- libglusterfs_client_inode_ctx_t *ictx = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, inode, out);
- if (inode_ctx_get (inode, libgf_inode_to_xlator (inode), &ctxaddr) < 0)
- goto out;
-
- ictx = (libglusterfs_client_inode_ctx_t *)(long)ctxaddr;
-
-out:
- return ictx;
-}
-
-libglusterfs_client_inode_ctx_t *
-libgf_del_inode_ctx (inode_t *inode)
-{
- uint64_t ctxaddr = 0;
- libglusterfs_client_inode_ctx_t *ictx = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, inode, out);
- if (inode_ctx_del (inode, libgf_inode_to_xlator (inode), &ctxaddr) < 0)
- goto out;
-
- ictx = (libglusterfs_client_inode_ctx_t *)(long)ctxaddr;
-
-out:
- return ictx;
-}
-
-libglusterfs_client_inode_ctx_t *
-libgf_alloc_inode_ctx (libglusterfs_client_ctx_t *ctx, inode_t *inode)
-{
- uint64_t ctxaddr = 0;
- libglusterfs_client_inode_ctx_t *ictx = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, inode, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- ictx = CALLOC (1, sizeof (*ictx));
- if (ictx == NULL) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR,
- "memory allocation failure");
- goto out;
- }
-
- pthread_mutex_init (&ictx->lock, NULL);
- ctxaddr = (uint64_t) (long)ictx;
- if (inode_ctx_put (inode, libgf_inode_to_xlator (inode), ctxaddr) < 0){
- FREE (ictx);
- ictx = NULL;
- }
-
-out:
- return ictx;
-}
-
-int
-libgf_transform_devnum (libglusterfs_client_ctx_t *libctx, struct stat *buf)
-{
-
- if ((!libctx) || (!buf))
- return -1;
-
- buf->st_dev = libctx->fake_fsid;
- return 0;
-}
-
-int
-libgf_update_iattr_cache (inode_t *inode, int flags, struct stat *buf)
-{
- libglusterfs_client_inode_ctx_t *inode_ctx = NULL;
- time_t current = 0;
- int op_ret = -1;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, inode, out);
-
- inode_ctx = libgf_get_inode_ctx (inode);
- if (!inode_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No inode context"
- " present");
- errno = EINVAL;
- op_ret = -1;
- goto out;
- }
-
- pthread_mutex_lock (&inode_ctx->lock);
- {
- /* Take a timestamp only after we've acquired the
- * lock.
- */
- current = time (NULL);
- if (flags & LIBGF_UPDATE_LOOKUP) {
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Updating lookup");
- inode_ctx->previous_lookup_time = current;
- }
-
- if (flags & LIBGF_UPDATE_STAT) {
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Updating stat");
-
- /* Update the cached stat struct only if a new
- * stat buf is given.
- */
- if (buf != NULL) {
- inode_ctx->previous_stat_time = current;
- memcpy (&inode_ctx->stbuf, buf,
- sizeof (inode_ctx->stbuf));
- }
- }
- }
- pthread_mutex_unlock (&inode_ctx->lock);
- op_ret = 0;
-
-out:
- return op_ret;
-}
-
-
-int
-libgf_invalidate_iattr_cache (inode_t *inode, int flags)
-{
- libglusterfs_client_inode_ctx_t *ictx = NULL;
-
- if (!inode)
- return -1;
-
- ictx = libgf_get_inode_ctx (inode);
- if (!ictx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No inode context"
- " present");
- return -1;
- }
-
- pthread_mutex_lock (&ictx->lock);
- {
- if (flags & LIBGF_INVALIDATE_LOOKUP) {
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Invalidating"
- " lookup");
- ictx->previous_lookup_time = 0;
- }
-
- if (flags & LIBGF_INVALIDATE_STAT) {
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Invalidating"
- " stat");
- ictx->previous_stat_time = 0;
- }
-
- }
- pthread_mutex_unlock (&ictx->lock);
-
- return 0;
-}
-
-
-int
-libgf_is_iattr_cache_valid (libglusterfs_client_ctx_t *ctx, inode_t *inode,
- struct stat *sbuf, int flags)
-{
- time_t current = 0;
- time_t prev = 0;
- libglusterfs_client_inode_ctx_t *inode_ctx = NULL;
- int cache_valid = 0;
- time_t timeout = 0;
-
- if (inode == NULL)
- return 0;
-
- inode_ctx = libgf_get_inode_ctx (inode);
- if (!inode_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No inode context"
- " present\n");
- return 0;
- }
-
- pthread_mutex_lock (&inode_ctx->lock);
- {
- current = time (NULL);
- if (flags & LIBGF_VALIDATE_LOOKUP) {
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Checking lookup");
- prev = inode_ctx->previous_lookup_time;
- timeout = ctx->lookup_timeout;
- } else {
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Checking stat");
- prev = inode_ctx->previous_stat_time;
- timeout = ctx->stat_timeout;
- }
-
- /* Even if the timeout is set to -1 to cache
- * infinitely, fops like write must invalidate the
- * stat cache because writev_cbk cannot update
- * the cache using the stat returned to it. This is
- * because write-behind can return a stat bufs filled
- * with zeroes.
- */
- if (prev == 0) {
- cache_valid = 0;
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Cache Invalid");
- goto iattr_unlock_out;
- }
-
- /* Cache infinitely */
- if (timeout == (time_t)-1) {
- cache_valid = 1;
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Caching On and "
- "valid");
- goto iattr_unlock_out;
- }
-
- /* Disable caching completely */
- if (timeout == 0) {
- cache_valid = 0;
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Cache disabled");
- goto iattr_unlock_out;
- }
-
- if ((prev > 0) && (timeout >= (current - prev))) {
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Cache valid");
- cache_valid = 1;
- }
-
- if (flags & LIBGF_VALIDATE_LOOKUP)
- goto iattr_unlock_out;
-
- if ((cache_valid) && (sbuf))
- *sbuf = inode_ctx->stbuf;
- }
-iattr_unlock_out:
- pthread_mutex_unlock (&inode_ctx->lock);
-
- return cache_valid;
-}
-
-int32_t
-libgf_client_releasedir (xlator_t *this,
- fd_t *fd)
-{
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- fd_ctx = libgf_get_fd_ctx (fd);
- if (S_ISDIR (fd->inode->st_mode)) {
- libgf_dcache_invalidate (fd);
- FREE (fd_ctx->dcache);
- }
-
- libgf_del_fd_ctx (fd);
- if (fd_ctx != NULL) {
- pthread_mutex_destroy (&fd_ctx->lock);
- FREE (fd_ctx);
- }
-
- return 0;
-}
-
-void *poll_proc (void *ptr)
-{
- glusterfs_ctx_t *ctx = ptr;
-
- event_dispatch (ctx->event_pool);
-
- return NULL;
-}
-
-
-int32_t
-xlator_graph_init (xlator_t *xl)
-{
- xlator_t *trav = xl;
- int32_t ret = -1;
-
- while (trav->prev)
- trav = trav->prev;
-
- while (trav) {
- if (!trav->ready) {
- ret = xlator_tree_init (trav);
- if (ret < 0)
- break;
- }
- trav = trav->next;
- }
-
- return ret;
-}
-
-
-void
-xlator_graph_fini (xlator_t *xl)
-{
- xlator_t *trav = xl;
- while (trav->prev)
- trav = trav->prev;
-
- while (trav) {
- if (!trav->init_succeeded) {
- break;
- }
-
- xlator_tree_fini (trav);
- trav = trav->next;
- }
-}
-
-/* Returns a pointer to the @n'th char matching
- * @c in string @str, starting the search from right or
- * end-of-string, rather than starting from left, as rindex
- * function does.
- */
-char *
-libgf_rrindex (char *str, int c, int n)
-{
- int len = 0;
- int occurrence = 0;
-
- if (str == NULL)
- return NULL;
-
- len = strlen (str);
- /* Point to last character of string. */
- str += (len - 1);
- while (len > 0) {
- if ((int)*str == c) {
- ++occurrence;
- if (occurrence == n)
- break;
- }
- --len;
- --str;
- }
-
- return str;
-}
-
-char *
-libgf_trim_to_prev_dir (char * path)
-{
- char *idx = NULL;
- int len = 0;
-
- if (!path)
- return NULL;
-
- /* Check if we're already at root, if yes
- * then there is no prev dir.
- */
- len = strlen (path);
- if (len == 1)
- return path;
-
- if (path[len - 1] == '/') {
- path[len - 1] = '\0';
- }
-
- idx = libgf_rrindex (path, '/', 1);
- /* Move to the char after the / */
- ++idx;
- *idx = '\0';
-
- return path;
-}
-
-/* Performs a lightweight path resolution that only
- * looks for . and .. and replaces those with the
- * proper names.
- *
- * FIXME: This is a stop-gap measure till we have full
- * fledge path resolution going in here.
- * Function returns path strdup'ed so remember to FREE the
- * string as required.
- */
-char *
-libgf_resolve_path_light (char *path)
-{
- char *respath = NULL;
- char *saveptr = NULL;
- char *tok = NULL;
- char *mypath = NULL;
- int len = 0;
- int addslash = 0;
-
- if (!path)
- goto out;
-
- /* We dont as yet support relative paths anywhere in
- * the lib.
- */
- if (path[0] != '/')
- goto out;
-
- mypath = strdup (path);
- len = strlen (mypath);
- respath = calloc (strlen(mypath) + 1, sizeof (char));
- if (respath == NULL) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR,"Memory allocation failed");
- goto out;
- }
-
- /* The path only contains a / or a //, so simply add a /
- * and return.
- * This needs special handling because the loop below does
- * not allow us to do so through strtok.
- */
- if (((mypath[0] == '/') && (len == 1))
- || (strcmp (mypath, "//") == 0)) {
- strcat (respath, "/");
- goto out;
- }
-
- tok = strtok_r (mypath, "/", &saveptr);
- addslash = 0;
- strcat (respath, "/");
- while (tok) {
- if (addslash) {
- if ((strcmp (tok, ".") != 0)
- && (strcmp (tok, "..") != 0)) {
- strcat (respath, "/");
- }
- }
-
- if ((strcmp (tok, ".") != 0) && (strcmp (tok, "..") != 0)) {
- strcat (respath, tok);
- addslash = 1;
- } else if ((strcmp (tok, "..") == 0)) {
- libgf_trim_to_prev_dir (respath);
- addslash = 0;
- }
-
- tok = strtok_r (NULL, "/", &saveptr);
- }
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Path: %s, Resolved Path: %s",
- path, respath);
-out:
- if (mypath)
- free (mypath);
- return respath;
-}
-
-void
-libgf_client_loc_wipe (loc_t *loc)
-{
- if (loc->path) {
- FREE (loc->path);
- }
-
- if (loc->parent) {
- inode_unref (loc->parent);
- loc->parent = NULL;
- }
-
- if (loc->inode) {
- inode_unref (loc->inode);
- loc->inode = NULL;
- }
-
- loc->path = loc->name = NULL;
- loc->ino = 0;
-}
-
-
-int32_t
-libgf_client_loc_fill (loc_t *loc,
- libglusterfs_client_ctx_t *ctx,
- ino_t ino,
- ino_t par,
- const char *name)
-{
- inode_t *inode = NULL, *parent = NULL;
- int32_t ret = -1;
- char *path = NULL;
-
- /* resistance against multiple invocation of loc_fill not to get
- reference leaks via inode_search() */
-
- inode = loc->inode;
-
- if (!inode) {
- if (ino)
- inode = inode_search (ctx->itable, ino, NULL);
-
- if (inode)
- goto inode_found;
-
- if (par && name)
- inode = inode_search (ctx->itable, par, name);
- }
-
-inode_found:
- if (inode) {
- loc->ino = inode->ino;
- loc->inode = inode;
- }
-
- parent = loc->parent;
- if (!parent) {
- if (inode)
- parent = inode_parent (inode, par, name);
- else
- parent = inode_search (ctx->itable, par, NULL);
- loc->parent = parent;
- }
-
- if (!loc->path) {
- if (name && parent) {
- ret = inode_path (parent, name, &path);
- if (ret <= 0) {
- gf_log ("glusterfs-fuse", GF_LOG_ERROR,
- "inode_path failed for %"PRId64"/%s",
- parent->ino, name);
- goto fail;
- } else {
- loc->path = path;
- }
- } else if (inode) {
- ret = inode_path (inode, NULL, &path);
- if (ret <= 0) {
- gf_log ("glusterfs-fuse", GF_LOG_ERROR,
- "inode_path failed for %"PRId64,
- inode->ino);
- goto fail;
- } else {
- loc->path = path;
- }
- }
- }
-
- if (loc->path) {
- loc->name = strrchr (loc->path, '/');
- if (loc->name)
- loc->name++;
- else loc->name = "";
- }
-
- if ((ino != 1) &&
- (parent == NULL)) {
- gf_log ("fuse-bridge", GF_LOG_ERROR,
- "failed to search parent for %"PRId64"/%s (%"PRId64")",
- (ino_t)par, name, (ino_t)ino);
- ret = -1;
- goto fail;
- }
- ret = 0;
-fail:
- return ret;
-}
-
-
-static call_frame_t *
-get_call_frame_for_req (libglusterfs_client_ctx_t *ctx, char d)
-{
- call_pool_t *pool = ctx->gf_ctx.pool;
- xlator_t *this = ctx->gf_ctx.graph;
- call_frame_t *frame = NULL;
-
-
- frame = create_frame (this, pool);
-
- frame->root->uid = geteuid ();
- frame->root->gid = getegid ();
- frame->root->pid = ctx->pid;
- frame->root->unique = ctx->counter++;
-
- return frame;
-}
-
-void
-libgf_client_fini (xlator_t *this)
-{
- FREE (this->private);
- return;
-}
-
-
-int32_t
-libgf_client_notify (xlator_t *this,
- int32_t event,
- void *data,
- ...)
-{
- libglusterfs_client_private_t *priv = this->private;
-
- switch (event)
- {
- case GF_EVENT_CHILD_UP:
- pthread_mutex_lock (&priv->lock);
- {
- priv->complete = 1;
- pthread_cond_broadcast (&priv->init_con_established);
- }
- pthread_mutex_unlock (&priv->lock);
- break;
-
- default:
- default_notify (this, event, data);
- }
-
- return 0;
-}
-
-int32_t
-libgf_client_init (xlator_t *this)
-{
- return 0;
-}
-
-glusterfs_handle_t
-glusterfs_init (glusterfs_init_params_t *init_ctx, uint32_t fakefsid)
-{
- libglusterfs_client_ctx_t *ctx = NULL;
- libglusterfs_client_private_t *priv = NULL;
- FILE *specfp = NULL;
- xlator_t *graph = NULL, *trav = NULL;
- call_pool_t *pool = NULL;
- int32_t ret = 0;
- struct rlimit lim;
- uint32_t xl_count = 0;
- loc_t new_loc = {0, };
- struct timeval tv = {0, };
-
- if (!init_ctx || (!init_ctx->specfile && !init_ctx->specfp)) {
- errno = EINVAL;
- return NULL;
- }
-
- ctx = CALLOC (1, sizeof (*ctx));
- if (!ctx) {
- fprintf (stderr,
- "libglusterfsclient: %s:%s():%d: out of memory\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__);
-
- errno = ENOMEM;
- return NULL;
- }
-
- ctx->lookup_timeout = init_ctx->lookup_timeout;
- ctx->stat_timeout = init_ctx->stat_timeout;
- ctx->fake_fsid = fakefsid;
- ctx->pid = getpid ();
- pthread_mutex_init (&ctx->gf_ctx.lock, NULL);
-
- pool = ctx->gf_ctx.pool = CALLOC (1, sizeof (call_pool_t));
- if (!pool) {
- errno = ENOMEM;
- FREE (ctx);
- return NULL;
- }
-
- LOCK_INIT (&pool->lock);
- INIT_LIST_HEAD (&pool->all_frames);
-
- /* FIXME: why is count hardcoded to 16384 */
- ctx->gf_ctx.event_pool = event_pool_new (16384);
- ctx->gf_ctx.page_size = LIBGF_IOBUF_SIZE;
- ctx->gf_ctx.iobuf_pool = iobuf_pool_new (8 * 1048576,
- ctx->gf_ctx.page_size);
-
- lim.rlim_cur = RLIM_INFINITY;
- lim.rlim_max = RLIM_INFINITY;
- setrlimit (RLIMIT_CORE, &lim);
- setrlimit (RLIMIT_NOFILE, &lim);
-
- ctx->gf_ctx.cmd_args.log_level = GF_LOG_WARNING;
-
- if (init_ctx->logfile)
- ctx->gf_ctx.cmd_args.log_file = strdup (init_ctx->logfile);
- else
- ctx->gf_ctx.cmd_args.log_file = strdup ("/dev/stderr");
-
- if (init_ctx->loglevel) {
- if (!strncasecmp (init_ctx->loglevel, "DEBUG",
- strlen ("DEBUG"))) {
- ctx->gf_ctx.cmd_args.log_level = GF_LOG_DEBUG;
- } else if (!strncasecmp (init_ctx->loglevel, "WARNING",
- strlen ("WARNING"))) {
- ctx->gf_ctx.cmd_args.log_level = GF_LOG_WARNING;
- } else if (!strncasecmp (init_ctx->loglevel, "CRITICAL",
- strlen ("CRITICAL"))) {
- ctx->gf_ctx.cmd_args.log_level = GF_LOG_CRITICAL;
- } else if (!strncasecmp (init_ctx->loglevel, "NONE",
- strlen ("NONE"))) {
- ctx->gf_ctx.cmd_args.log_level = GF_LOG_NONE;
- } else if (!strncasecmp (init_ctx->loglevel, "ERROR",
- strlen ("ERROR"))) {
- ctx->gf_ctx.cmd_args.log_level = GF_LOG_ERROR;
- } else {
- fprintf (stderr,
- "libglusterfsclient: %s:%s():%d: Unrecognized log-level \"%s\", possible values are \"DEBUG|WARNING|[ERROR]|CRITICAL|NONE\"\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__,
- init_ctx->loglevel);
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- FREE (ctx);
- errno = EINVAL;
- return NULL;
- }
- }
-
- if (first_init)
- {
- ret = gf_log_init (ctx->gf_ctx.cmd_args.log_file);
- if (ret == -1) {
- fprintf (stderr,
- "libglusterfsclient: %s:%s():%d: failed to open logfile \"%s\"\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__,
- ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- FREE (ctx);
- return NULL;
- }
-
- gf_log_set_loglevel (ctx->gf_ctx.cmd_args.log_level);
- }
-
- if (init_ctx->specfp) {
- specfp = init_ctx->specfp;
- if (fseek (specfp, 0L, SEEK_SET)) {
- fprintf (stderr,
- "libglusterfsclient: %s:%s():%d: fseek on volume file stream failed (%s)\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__,
- strerror (errno));
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- FREE (ctx);
- return NULL;
- }
- } else if (init_ctx->specfile) {
- specfp = fopen (init_ctx->specfile, "r");
- ctx->gf_ctx.cmd_args.volume_file = strdup (init_ctx->specfile);
- }
-
- if (!specfp) {
- fprintf (stderr,
- "libglusterfsclient: %s:%s():%d: could not open volfile: %s\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__,
- strerror (errno));
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.cmd_args.volume_file);
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- FREE (ctx);
- return NULL;
- }
-
- if (init_ctx->volume_name) {
- ctx->gf_ctx.cmd_args.volume_name = strdup (init_ctx->volume_name);
- }
-
- graph = file_to_xlator_tree (&ctx->gf_ctx, specfp);
- if (!graph) {
- fprintf (stderr,
- "libglusterfsclient: %s:%s():%d: cannot create configuration graph (%s)\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__,
- strerror (errno));
-
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.cmd_args.volume_file);
- FREE (ctx->gf_ctx.cmd_args.volume_name);
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- FREE (ctx);
- return NULL;
- }
-
- if (init_ctx->volume_name) {
- trav = graph;
- while (trav) {
- if (strcmp (trav->name, init_ctx->volume_name) == 0) {
- graph = trav;
- break;
- }
- trav = trav->next;
- }
- }
-
- ctx->gf_ctx.graph = libglusterfs_graph (graph);
- if (!ctx->gf_ctx.graph) {
- fprintf (stderr,
- "libglusterfsclient: %s:%s():%d: graph creation failed (%s)\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__,
- strerror (errno));
-
- xlator_tree_free (graph);
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.cmd_args.volume_file);
- FREE (ctx->gf_ctx.cmd_args.volume_name);
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- FREE (ctx);
- return NULL;
- }
- graph = ctx->gf_ctx.graph;
- ctx->gf_ctx.top = graph;
-
- trav = graph;
- while (trav) {
- xl_count++; /* Getting this value right is very important */
- trav = trav->next;
- }
-
- ctx->gf_ctx.xl_count = xl_count + 1;
-
- priv = CALLOC (1, sizeof (*priv));
- if (!priv) {
- fprintf (stderr,
- "libglusterfsclient: %s:%s():%d: cannot allocate memory (%s)\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__,
- strerror (errno));
-
- xlator_tree_free (graph);
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.cmd_args.volume_file);
- FREE (ctx->gf_ctx.cmd_args.volume_name);
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- /* inode_table_destroy (ctx->itable); */
- FREE (ctx);
-
- return NULL;
- }
-
- pthread_cond_init (&priv->init_con_established, NULL);
- pthread_mutex_init (&priv->lock, NULL);
-
- graph->private = priv;
- ctx->itable = inode_table_new (LIBGLUSTERFS_INODE_TABLE_LRU_LIMIT,
- graph);
- if (!ctx->itable) {
- fprintf (stderr,
- "libglusterfsclient: %s:%s():%d: cannot create inode table\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__);
- xlator_tree_free (graph);
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.cmd_args.volume_file);
- FREE (ctx->gf_ctx.cmd_args.volume_name);
-
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- xlator_tree_free (graph);
- /* TODO: destroy graph */
- /* inode_table_destroy (ctx->itable); */
- FREE (ctx);
-
- return NULL;
- }
-
- set_global_ctx_ptr (&ctx->gf_ctx);
- ctx->gf_ctx.process_uuid = zr_build_process_uuid ();
-
- if (xlator_graph_init (graph) == -1) {
- fprintf (stderr,
- "libglusterfsclient: %s:%s():%d: graph initialization failed\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__);
- xlator_tree_free (graph);
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.cmd_args.volume_file);
- FREE (ctx->gf_ctx.cmd_args.volume_name);
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- /* TODO: destroy graph */
- /* inode_table_destroy (ctx->itable); */
- FREE (ctx);
- return NULL;
- }
-
- /* Send notify to all translator saying things are ready */
- graph->notify (graph, GF_EVENT_PARENT_UP, graph);
-
- if (gf_timer_registry_init (&ctx->gf_ctx) == NULL) {
- fprintf (stderr,
- "libglusterfsclient: %s:%s():%d: timer init failed (%s)\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__,
- strerror (errno));
-
- xlator_graph_fini (graph);
- xlator_tree_free (graph);
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.cmd_args.volume_file);
- FREE (ctx->gf_ctx.cmd_args.volume_name);
-
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- /* TODO: destroy graph */
- /* inode_table_destroy (ctx->itable); */
- FREE (ctx);
- return NULL;
- }
-
- if ((ret = pthread_create (&ctx->reply_thread, NULL, poll_proc,
- (void *)&ctx->gf_ctx))) {
- fprintf (stderr,
- "libglusterfsclient: %s:%s():%d: reply thread creation failed\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__);
- xlator_graph_fini (graph);
- xlator_tree_free (graph);
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.cmd_args.volume_file);
- FREE (ctx->gf_ctx.cmd_args.volume_name);
-
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- /* TODO: destroy graph */
- /* inode_table_destroy (ctx->itable); */
- FREE (ctx);
- return NULL;
- }
-
- pthread_mutex_lock (&priv->lock);
- {
- while (!priv->complete) {
- pthread_cond_wait (&priv->init_con_established,
- &priv->lock);
- }
- }
- pthread_mutex_unlock (&priv->lock);
-
- /*
- * wait for some time to allow initialization of all children of
- * distribute before sending lookup on '/'
- */
-
- tv.tv_sec = 0;
- tv.tv_usec = (100 * 1000);
- select (0, NULL, NULL, NULL, &tv);
-
- /* workaround for xlators like dht which require lookup to be sent
- * on / */
- libgf_client_loc_fill (&new_loc, ctx, 1, 0, "/");
- ret = libgf_client_lookup (ctx, &new_loc, NULL, NULL, NULL);
- if (ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR, "lookup of /"
- " failed");
- return NULL;
- }
- libgf_client_loc_wipe (&new_loc);
-
- first_init = 0;
-
- return ctx;
-}
-
-struct vmp_entry *
-libgf_init_vmpentry (char *vmp, glusterfs_handle_t *vmphandle)
-{
- struct vmp_entry *entry = NULL;
- int vmplen = 0;
- int appendslash = 0;
- int ret = -1;
-
- entry = CALLOC (1, sizeof (struct vmp_entry));
- if (!entry) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR,"Memory allocation failed");
- return NULL;
- }
-
- vmplen = strlen (vmp);
- if (vmp[vmplen - 1] != '/') {
- vmplen++;
- appendslash = 1;
- }
-
- entry->vmp = CALLOC (vmplen + 1, sizeof (char));
- if (!entry->vmp) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Memory allocation "
- "failed");
- goto free_entry;
- }
-
- strcpy (entry->vmp, vmp);
- if (appendslash) {
- entry->vmp[vmplen-1] = '/';
- entry->vmp[vmplen] = '\0';
- }
-
- entry->vmplen = vmplen;
- entry->handle = vmphandle;
- INIT_LIST_HEAD (&entry->list);
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "New VMP entry: %s", vmp);
-
- ret = 0;
-
-free_entry:
- if (ret == -1) {
- if (entry->vmp)
- FREE (entry->vmp);
- if (entry)
- FREE (entry);
- entry = NULL;
- }
- return entry;
-}
-
-void
-libgf_free_vmp_entry (struct vmp_entry *entry)
-{
- FREE (entry->vmp);
- FREE (entry);
-}
-
-int
-libgf_count_path_components (char *path)
-{
- int compos = 0;
- char *pathdup = NULL;
- int len = 0;
-
- if (!path)
- return -1;
-
- pathdup = strdup (path);
- if (!pathdup)
- return -1;
-
- len = strlen (pathdup);
- if (pathdup[len - 1] == '/')
- pathdup[len - 1] = '\0';
-
- path = pathdup;
- while ((path = strchr (path, '/'))) {
- compos++;
- ++path;
- }
-
- free (pathdup);
- return compos;
-}
-
-/* Returns the number of components that match between
- * the VMP and the path. Assumes string1 is vmp entry.
- * Assumes both are absolute paths.
- */
-int
-libgf_strmatchcount (char *string1, char *string2)
-{
- int matchcount = 0;
- char *s1dup = NULL, *s2dup = NULL;
- char *tok1 = NULL, *saveptr1 = NULL;
- char *tok2 = NULL, *saveptr2 = NULL;
-
- if ((!string1) || (!string2))
- return 0;
-
- s1dup = strdup (string1);
- if (!s1dup)
- return 0;
-
- s2dup = strdup (string2);
- if (!s2dup)
- goto free_s1;
-
- string1 = s1dup;
- string2 = s2dup;
-
- tok1 = strtok_r(string1, "/", &saveptr1);
- tok2 = strtok_r (string2, "/", &saveptr2);
- while (tok1) {
- if (!tok2)
- break;
-
- if (strcmp (tok1, tok2) != 0)
- break;
-
- matchcount++;
- tok1 = strtok_r(NULL, "/", &saveptr1);
- tok2 = strtok_r (NULL, "/", &saveptr2);
- }
-
- free (s2dup);
-free_s1:
- free (s1dup);
- return matchcount;
-}
-
-int
-libgf_vmp_entry_match (struct vmp_entry *entry, char *path)
-{
- return libgf_strmatchcount (entry->vmp, path);
-}
-
-#define LIBGF_VMP_EXACT 1
-#define LIBGF_VMP_LONGESTPREFIX 0
-struct vmp_entry *
-_libgf_vmp_search_entry (char *path, int searchtype)
-{
- struct vmp_entry *entry = NULL;
- int matchcount = 0;
- struct vmp_entry *maxentry = NULL;
- int maxcount = 0;
- int vmpcompcount = 0;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "VMP Search: path %s, type: %s",
- path, (searchtype == LIBGF_VMP_EXACT)?"Exact":"LongestPrefix");
- if (vmplist.entries == 0) {
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Virtual Mount Point "
- "list is empty.");
- goto out;
- }
-
- list_for_each_entry(entry, &vmplist.list, list) {
- vmpcompcount = libgf_count_path_components (entry->vmp);
- matchcount = libgf_vmp_entry_match (entry, path);
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Candidate VMP: %s,"
- " Matchcount: %d", entry->vmp, matchcount);
- if ((matchcount > maxcount) && (matchcount == vmpcompcount)) {
- maxcount = matchcount;
- maxentry = entry;
- }
- }
-
- /* To ensure that the longest prefix matched entry is also an exact
- * match, this is used to check whether duplicate entries are present
- * in the vmplist.
- */
- vmpcompcount = 0;
- if ((searchtype == LIBGF_VMP_EXACT) && (maxentry)) {
- vmpcompcount = libgf_count_path_components (maxentry->vmp);
- matchcount = libgf_count_path_components (path);
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Exact Check: VMP: %s,"
- " CompCount: %d, Path: %s, CompCount: %d",
- maxentry->vmp, vmpcompcount, path, matchcount);
- if (vmpcompcount != matchcount) {
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "No Match");
- maxentry = NULL;
- } else
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Matches!");
- }
-
-out:
- return maxentry;
-}
-
-/* Used to search for a exactly matching VMP entry.
- */
-struct vmp_entry *
-libgf_vmp_search_exact_entry (char *path)
-{
- struct vmp_entry *entry = NULL;
-
- if (!path)
- goto out;
-
- pthread_mutex_lock (&vmplock);
- {
- entry = _libgf_vmp_search_entry (path, LIBGF_VMP_EXACT);
- }
- pthread_mutex_unlock (&vmplock);
-
-out:
- if (entry)
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "VMP Entry found: path :%s"
- " vmp: %s", path, entry->vmp);
- else
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "VMP Entry not found: path"
- ": %s", path);
-
- return entry;
-}
-
-
-/* Used to search for a longest prefix matching VMP entry.
- */
-struct vmp_entry *
-libgf_vmp_search_entry (char *path)
-{
- struct vmp_entry *entry = NULL;
-
- if (!path)
- goto out;
-
- pthread_mutex_lock (&vmplock);
- {
- entry = _libgf_vmp_search_entry (path, LIBGF_VMP_LONGESTPREFIX);
- }
- pthread_mutex_unlock (&vmplock);
-
-out:
- if (entry)
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "VMP Entry found: path :%s"
- " vmp: %s", path, entry->vmp);
- else
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "VMP Entry not found: path"
- ": %s", path);
-
- return entry;
-}
-
-int
-libgf_vmp_map_ghandle (char *vmp, glusterfs_handle_t *vmphandle)
-{
- int ret = -1;
- struct vmp_entry *vmpentry = NULL;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "New Entry: %s", vmp);
- vmpentry = libgf_init_vmpentry (vmp, vmphandle);
- if (!vmpentry) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Failed to create VMP"
- " entry");
- goto out;
- }
-
- pthread_mutex_lock (&vmplock);
- {
- if (vmplist.entries == 0) {
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Empty list");
- INIT_LIST_HEAD (&vmplist.list);
- }
-
- list_add_tail (&vmpentry->list, &vmplist.list);
- ++vmplist.entries;
- }
- pthread_mutex_unlock (&vmplock);
- ret = 0;
-
-out:
- return ret;
-}
-
-/* Path must be validated already. */
-glusterfs_handle_t
-libgf_vmp_get_ghandle (char * path)
-{
- struct vmp_entry *entry = NULL;
-
- entry = libgf_vmp_search_entry (path);
-
- if (entry == NULL)
- return NULL;
-
- return entry->handle;
-}
-
-int
-glusterfs_mount (char *vmp, glusterfs_init_params_t *ipars)
-{
- glusterfs_handle_t vmphandle = NULL;
- int ret = -1;
- char *vmp_resolved = NULL;
- struct vmp_entry *vmp_entry = NULL;
- uint32_t vmphash = 0;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, vmp, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ipars, out);
-
- vmp_resolved = libgf_resolve_path_light (vmp);
- if (!vmp_resolved) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Path compaction failed");
- goto out;
- }
-
- vmphash = (dev_t)ReallySimpleHash (vmp, strlen (vmp));
- pthread_mutex_lock (&mountlock);
- {
- vmp_entry = libgf_vmp_search_exact_entry (vmp);
- if (vmp_entry) {
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Entry exists");
- ret = 0;
- goto unlock;
- }
-
- vmphandle = glusterfs_init (ipars, vmphash);
- if (!vmphandle) {
- errno = EINVAL;
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "GlusterFS context"
- " init failed");
- goto unlock;
- }
-
- ret = libgf_vmp_map_ghandle (vmp_resolved, vmphandle);
- if (ret == -1) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Failed to map new"
- " handle: %s", vmp);
- glusterfs_fini (vmphandle);
- }
- }
-unlock:
- pthread_mutex_unlock (&mountlock);
-
-out:
- if (vmp_resolved)
- FREE (vmp_resolved);
-
- return ret;
-}
-
-inline int
-_libgf_umount (char *vmp)
-{
- struct vmp_entry *entry= NULL;
- int ret = -1;
-
- entry = libgf_vmp_search_exact_entry (vmp);
- if (entry == NULL) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path (%s) not mounted", vmp);
- goto out;
- }
-
- if (entry->handle == NULL) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path (%s) has no corresponding glusterfs handle",
- vmp);
- goto out;
- }
-
- ret = glusterfs_fini (entry->handle);
- libgf_free_vmp_entry (entry);
-
- list_del_init (&entry->list);
- vmplist.entries--;
-
-out:
- return ret;
-}
-
-inline int
-libgf_umount (char *vmp)
-{
- int ret = -1;
-
- pthread_mutex_lock (&vmplock);
- {
- ret = _libgf_umount (vmp);
- }
- pthread_mutex_unlock (&vmplock);
-
- return ret;
-}
-
-int
-glusterfs_umount (char *vmp)
-{
- int ret = -1;
- char *vmp_resolved = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, vmp, out);
-
- vmp_resolved = libgf_resolve_path_light (vmp);
- if (!vmp_resolved) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Path compaction failed");
- goto out;
- }
-
- ret = libgf_umount (vmp_resolved);
-
-out:
- if (vmp_resolved)
- FREE (vmp_resolved);
-
- return ret;
-}
-
-int
-glusterfs_umount_all (void)
-{
- struct vmp_entry *entry = NULL, *tmp = NULL;
-
- pthread_mutex_lock (&vmplock);
- {
- list_for_each_entry_safe (entry, tmp, &vmplist.list, list) {
- /* even if there are errors, continue with other
- mounts
- */
- _libgf_umount (entry->vmp);
- }
- }
- pthread_mutex_unlock (&vmplock);
-
- return 0;
-}
-
-void
-glusterfs_reset (void)
-{
- INIT_LIST_HEAD (&vmplist.list);
- vmplist.entries = 0;
-
- memset (&vmplock, 0, sizeof (vmplock));
- pthread_mutex_init (&vmplock, NULL);
-
- first_fini = first_init = 1;
-}
-
-void
-glusterfs_log_lock (void)
-{
- gf_log_lock ();
-}
-
-
-void glusterfs_log_unlock (void)
-{
- gf_log_unlock ();
-}
-
-
-int
-glusterfs_fini (glusterfs_handle_t handle)
-{
- libglusterfs_client_ctx_t *ctx = handle;
-
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.cmd_args.volume_file);
- FREE (ctx->gf_ctx.cmd_args.volume_name);
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- ((gf_timer_registry_t *)ctx->gf_ctx.timer)->fin = 1;
- /* inode_table_destroy (ctx->itable); */
-
- xlator_graph_fini (ctx->gf_ctx.graph);
- xlator_tree_free (ctx->gf_ctx.graph);
- ctx->gf_ctx.graph = NULL;
-
- /* FREE (ctx->gf_ctx.specfile); */
-
- /* TODO complete cleanup of timer */
- /*TODO
- * destroy the reply thread
- * destroy inode table
- * FREE (ctx)
- */
-
- FREE (ctx);
-
- if (first_fini) {
- ;
- //gf_log_cleanup ();
- }
-
- return 0;
-}
-
-
-int32_t
-libgf_client_lookup_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf,
- dict_t *dict)
-{
- libgf_client_local_t *local = frame->local;
- libglusterfs_client_ctx_t *ctx = frame->root->state;
- dict_t *xattr_req = NULL;
-
- if (op_ret == 0) {
- inode_t *parent = NULL;
-
- if (local->fop.lookup.loc->ino == 1) {
- buf->st_ino = 1;
- }
-
- parent = local->fop.lookup.loc->parent;
- libgf_transform_devnum (ctx, buf);
- inode_link (inode, parent, local->fop.lookup.loc->name, buf);
- inode_lookup (inode);
- } else {
- if ((local->fop.lookup.is_revalidate == 0)
- && (op_errno == ENOENT)) {
- gf_log ("libglusterfsclient", GF_LOG_DEBUG,
- "%"PRId64": (op_num=%d) %s => -1 (%s)",
- frame->root->unique, frame->root->op,
- local->fop.lookup.loc->path,
- strerror (op_errno));
- } else {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "%"PRId64": (op_num=%d) %s => -1 (%s)",
- frame->root->unique, frame->root->op,
- local->fop.lookup.loc->path,
- strerror (op_errno));
- }
-
- if (local->fop.lookup.is_revalidate == 1) {
- int32_t ret = 0;
- inode_unref (local->fop.lookup.loc->inode);
- local->fop.lookup.loc->inode = inode_new (ctx->itable);
- local->fop.lookup.is_revalidate = 2;
-
- if (local->fop.lookup.size > 0) {
- xattr_req = dict_new ();
- ret = dict_set (xattr_req, "glusterfs.content",
- data_from_uint64 (local->fop.lookup.size));
- if (ret == -1) {
- op_ret = -1;
- /* TODO: set proper error code */
- op_errno = errno;
- inode = NULL;
- buf = NULL;
- dict = NULL;
- dict_unref (xattr_req);
- goto out;
- }
- }
-
- STACK_WIND (frame, libgf_client_lookup_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->lookup,
- local->fop.lookup.loc, xattr_req);
-
- if (xattr_req) {
- dict_unref (xattr_req);
- xattr_req = NULL;
- }
-
- return 0;
- }
- }
-
-out:
- local->reply_stub = fop_lookup_cbk_stub (frame, NULL, op_ret, op_errno,
- inode, buf, dict);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int32_t
-libgf_client_lookup (libglusterfs_client_ctx_t *ctx,
- loc_t *loc,
- struct stat *stbuf,
- dict_t **dict,
- dict_t *xattr_req)
-{
- call_stub_t *stub = NULL;
- int32_t op_ret;
- libgf_client_local_t *local = NULL;
- inode_t *inode = NULL;
-
- local = CALLOC (1, sizeof (*local));
- if (!local) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Memory allocation"
- " failed");
- errno = ENOMEM;
- return -1;
- }
-
- if (loc->inode) {
- local->fop.lookup.is_revalidate = 1;
- loc->ino = loc->inode->ino;
- }
- else
- loc->inode = inode_new (ctx->itable);
-
- local->fop.lookup.loc = loc;
-
- LIBGF_CLIENT_FOP(ctx, stub, lookup, local, loc, xattr_req);
-
- op_ret = stub->args.lookup_cbk.op_ret;
- errno = stub->args.lookup_cbk.op_errno;
-
- if (op_ret == -1)
- goto out;
-
- inode = stub->args.lookup_cbk.inode;
- if (!(libgf_get_inode_ctx (inode)))
- libgf_alloc_inode_ctx (ctx, inode);
- libgf_transform_devnum (ctx, &stub->args.lookup_cbk.buf);
- libgf_update_iattr_cache (inode, LIBGF_UPDATE_ALL,
- &stub->args.lookup_cbk.buf);
- if (stbuf)
- *stbuf = stub->args.lookup_cbk.buf;
-
- if (dict)
- *dict = dict_ref (stub->args.lookup_cbk.dict);
-out:
- call_stub_destroy (stub);
- return op_ret;
-}
-
-int
-glusterfs_glh_get (glusterfs_handle_t handle, const char *path, void *buf,
- size_t size, struct stat *stbuf)
-{
- int32_t op_ret = -1;
- loc_t loc = {0, };
- libglusterfs_client_ctx_t *ctx = handle;
- dict_t *dict = NULL;
- dict_t *xattr_req = NULL;
- char *name = NULL, *pathname = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, size %lu", path,
- (long unsigned)size);
- if (size < 0) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Invalid size");
- errno = EINVAL;
- goto out;
- }
-
- if (size == 0) {
- op_ret = 0;
- goto out;
- }
-
- loc.path = libgf_resolve_path_light ((char *)path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Path compaction failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 0);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", loc.path);
- goto out;
- }
-
- pathname = strdup (loc.path);
- name = basename (pathname);
-
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino, name);
- if (op_ret < 0) {
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- if (size) {
- xattr_req = dict_new ();
- op_ret = dict_set (xattr_req, "glusterfs.content",
- data_from_uint64 (size));
- if (op_ret < 0) {
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "setting requested content size dictionary failed");
- goto out;
- }
- }
-
- op_ret = libgf_client_lookup (ctx, &loc, stbuf, &dict, xattr_req);
- if (!op_ret && stbuf && (stbuf->st_size <= size) && dict && buf) {
- data_t *mem_data = NULL;
- void *mem = NULL;
-
- mem_data = dict_get (dict, "glusterfs.content");
- if (mem_data) {
- mem = data_to_ptr (mem_data);
- }
-
- if (mem != NULL) {
- memcpy (buf, mem, stbuf->st_size);
- }
- }
-
-out:
- if (xattr_req) {
- dict_unref (xattr_req);
- }
-
- if (dict) {
- dict_unref (dict);
- }
-
- if (pathname) {
- FREE (pathname);
- }
- libgf_client_loc_wipe (&loc);
-
- return op_ret;
-}
-
-int
-glusterfs_get (const char *path, void *buf, size_t size, struct stat *stbuf)
-{
- struct vmp_entry *entry = NULL;
- int op_ret = -1;
- char *vpath = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, buf, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, stbuf, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, size %lu", path,
- (long unsigned)size);
- entry = libgf_vmp_search_entry ((char *)path);
- if (!entry) {
- errno = ENODEV;
- goto out;
- }
-
- vpath = libgf_vmp_virtual_path (entry, path);
- op_ret = glusterfs_glh_get (entry->handle, vpath, buf, size, stbuf);
-
-out:
- if (vpath)
- free (vpath);
- return op_ret;
-}
-
-int
-libgf_client_lookup_async_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *stbuf,
- dict_t *dict)
-{
- libglusterfs_client_async_local_t *local = frame->local;
- glusterfs_get_cbk_t lookup_cbk = local->fop.lookup_cbk.cbk;
- libglusterfs_client_ctx_t *ctx = frame->root->state;
- glusterfs_iobuf_t *iobuf = NULL;
- dict_t *xattr_req = NULL;
- inode_t *parent = NULL;
-
- if (op_ret == 0) {
- parent = local->fop.lookup_cbk.loc->parent;
- libgf_transform_devnum (ctx, stbuf);
- inode_link (inode, parent, local->fop.lookup_cbk.loc->name,
- stbuf);
-
- if (!(libgf_get_inode_ctx (inode)))
- libgf_alloc_inode_ctx (ctx, inode);
- libgf_update_iattr_cache (inode, LIBGF_UPDATE_ALL, stbuf);
- inode_lookup (inode);
- } else {
- if ((local->fop.lookup_cbk.is_revalidate == 0)
- && (op_errno == ENOENT)) {
- gf_log ("libglusterfsclient", GF_LOG_DEBUG,
- "%"PRId64": (op_num=%d) %s => -1 (%s)",
- frame->root->unique, frame->root->op,
- local->fop.lookup_cbk.loc->path,
- strerror (op_errno));
- } else {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "%"PRId64": (op_num=%d) %s => -1 (%s)",
- frame->root->unique, frame->root->op,
- local->fop.lookup_cbk.loc->path,
- strerror (op_errno));
- }
-
- if (local->fop.lookup_cbk.is_revalidate == 1) {
- int32_t ret = 0;
- inode_unref (local->fop.lookup_cbk.loc->inode);
- local->fop.lookup_cbk.loc->inode = inode_new (ctx->itable);
- local->fop.lookup_cbk.is_revalidate = 2;
-
- if (local->fop.lookup_cbk.size > 0) {
- xattr_req = dict_new ();
- ret = dict_set (xattr_req, "glusterfs.content",
- data_from_uint64 (local->fop.lookup_cbk.size));
- if (ret == -1) {
- op_ret = -1;
- /* TODO: set proper error code */
- op_errno = errno;
- inode = NULL;
- stbuf = NULL;
- dict = NULL;
- dict_unref (xattr_req);
- goto out;
- }
- }
-
-
- STACK_WIND (frame, libgf_client_lookup_async_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->lookup,
- local->fop.lookup_cbk.loc, xattr_req);
-
- if (xattr_req) {
- dict_unref (xattr_req);
- xattr_req = NULL;
- }
-
- return 0;
- }
- }
-
-out:
- if (!op_ret && local->fop.lookup_cbk.size && dict) {
- data_t *mem_data = NULL;
- void *mem = NULL;
- struct iovec *vector = NULL;
-
- mem_data = dict_get (dict, "glusterfs.content");
- if (mem_data) {
- mem = data_to_ptr (mem_data);
- }
-
- if (mem && stbuf->st_size <= local->fop.lookup_cbk.size) {
- iobuf = CALLOC (1, sizeof (*iobuf));
- ERR_ABORT (iobuf);
-
- vector = CALLOC (1, sizeof (*vector));
- ERR_ABORT (vector);
- vector->iov_base = mem;
- vector->iov_len = stbuf->st_size;
-
- iobuf->vector = vector;
- iobuf->count = 1;
- iobuf->dictref = dict_ref (dict);
- }
- }
-
- lookup_cbk (op_ret, op_errno, iobuf, stbuf, local->cbk_data);
-
- libgf_client_loc_wipe (local->fop.lookup_cbk.loc);
- free (local->fop.lookup_cbk.loc);
-
- free (local);
- frame->local = NULL;
- STACK_DESTROY (frame->root);
-
- return 0;
-}
-
-/* TODO: implement async dentry lookup */
-
-int
-glusterfs_get_async (glusterfs_handle_t handle,
- const char *path,
- size_t size,
- glusterfs_get_cbk_t cbk,
- void *cbk_data)
-{
- loc_t *loc = NULL;
- libglusterfs_client_ctx_t *ctx = handle;
- libglusterfs_client_async_local_t *local = NULL;
- int32_t op_ret = 0;
- dict_t *xattr_req = NULL;
- char *name = NULL, *pathname = NULL;
-
- if (!ctx || !path || path[0] != '/') {
- errno = EINVAL;
- op_ret = -1;
- goto out;
- }
-
- if (size < 0) {
- errno = EINVAL;
- op_ret = -1;
- goto out;
- }
-
- if (size == 0) {
- op_ret = 0;
- goto out;
- }
-
- local = CALLOC (1, sizeof (*local));
- local->fop.lookup_cbk.is_revalidate = 1;
-
- loc = CALLOC (1, sizeof (*loc));
- loc->path = strdup (path);
- op_ret = libgf_client_path_lookup (loc, ctx, 1);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "path lookup failed for (%s)", path);
- goto out;
- }
-
- pathname = strdup (path);
- name = basename (pathname);
- op_ret = libgf_client_loc_fill (loc, ctx, 0, loc->parent->ino, name);
- if (op_ret < 0) {
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- if (!loc->inode) {
- loc->inode = inode_new (ctx->itable);
- local->fop.lookup_cbk.is_revalidate = 0;
- }
-
- local->fop.lookup_cbk.cbk = cbk;
- local->fop.lookup_cbk.size = size;
- local->fop.lookup_cbk.loc = loc;
- local->cbk_data = cbk_data;
-
- if (size > 0) {
- xattr_req = dict_new ();
- op_ret = dict_set (xattr_req, "glusterfs.content",
- data_from_uint64 (size));
- if (op_ret < 0) {
- dict_unref (xattr_req);
- xattr_req = NULL;
- goto out;
- }
- }
-
- LIBGF_CLIENT_FOP_ASYNC (ctx,
- local,
- libgf_client_lookup_async_cbk,
- lookup,
- loc,
- xattr_req);
- if (xattr_req) {
- dict_unref (xattr_req);
- xattr_req = NULL;
- }
-
-out:
- if (pathname) {
- FREE (pathname);
- }
-
- return op_ret;
-}
-
-int32_t
-libgf_client_getxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict)
-{
-
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_getxattr_cbk_stub (frame, NULL, op_ret,
- op_errno, dict);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-size_t
-libgf_client_getxattr (libglusterfs_client_ctx_t *ctx,
- loc_t *loc,
- const char *name,
- void *value,
- size_t size)
-{
- call_stub_t *stub = NULL;
- int32_t op_ret = 0;
- libgf_client_local_t *local = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, getxattr, local, loc, name);
-
- op_ret = stub->args.getxattr_cbk.op_ret;
- errno = stub->args.getxattr_cbk.op_errno;
-
- if (op_ret >= 0) {
- /*
- gf_log ("LIBGF_CLIENT", GF_LOG_DEBUG,
- "%"PRId64": %s => %d", frame->root->unique,
- state->fuse_loc.loc.path, op_ret);
- */
-
- data_t *value_data = dict_get (stub->args.getxattr_cbk.dict,
- (char *)name);
-
- if (value_data) {
- int32_t copy_len = 0;
-
- /* Don't return the value for '\0' */
- op_ret = value_data->len;
- copy_len = size < value_data->len ?
- size : value_data->len;
- memcpy (value, value_data->data, copy_len);
- } else {
- errno = ENODATA;
- op_ret = -1;
- }
- }
-
- call_stub_destroy (stub);
- return op_ret;
-}
-
-#define LIBGF_DO_GETXATTR 1
-#define LIBGF_DO_LGETXATTR 2
-
-ssize_t
-__glusterfs_glh_getxattr (glusterfs_handle_t handle, const char *path,
- const char *name, void *value, size_t size,
- int whichop)
-{
- int32_t op_ret = -1;
- loc_t loc = {0, };
- dict_t *dict = NULL;
- libglusterfs_client_ctx_t *ctx = handle;
- char *file = NULL;
- dict_t *xattr_req = NULL;
- char *pathres = NULL, *tmp = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, name, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, name %s, size %lu,"
- " op %d", path, name, (long unsigned)size, whichop);
- if (name[0] == '\0') {
- errno = EINVAL;
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Invalid argument: Name"
- " not NULL terminated");
- goto out;
- }
-
- if (size == 0) {
- op_ret = 0;
- goto out;
- }
-
- if (size < 0) {
- errno = EINVAL;
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Invalid argument: size is"
- " less than zero");
- goto out;
- }
-
- pathres = libgf_resolve_path_light ((char *)path);
- if (!pathres) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Path compaction failed");
- goto out;
- }
-
- loc.path = strdup (pathres);
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", loc.path);
- goto out;
- }
-
- tmp = strdup (pathres);
- file = basename (tmp);
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino, file);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- xattr_req = dict_new ();
- op_ret = dict_set (xattr_req, (char *)name,
- data_from_uint64 (size));
- if (op_ret == -1) {
- /* TODO: set proper error code */
- goto out;
- }
-
- if (whichop == LIBGF_DO_LGETXATTR)
- goto do_getx;
-
- if (!S_ISLNK (loc.inode->st_mode))
- goto do_getx;
-
- libgf_client_loc_wipe (&loc);
- op_ret = libgf_realpath_loc_fill (ctx, (char *)pathres, &loc);
- if (op_ret == -1) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "realpath failed");
- goto out;
- }
-
-do_getx:
- op_ret = libgf_client_lookup (ctx, &loc, NULL, &dict, xattr_req);
- if (op_ret == 0) {
- data_t *value_data = dict_get (dict, (char *)name);
-
- if (value_data) {
- int32_t copy_len = 0;
-
- /* Don't return the value for '\0' */
- op_ret = value_data->len;
-
- copy_len = size < value_data->len ?
- size : value_data->len;
- memcpy (value, value_data->data, copy_len);
- } else {
- errno = ENODATA;
- op_ret = -1;
- }
- }
-
-out:
- if (tmp) {
- FREE (tmp);
- }
-
- if (xattr_req) {
- dict_unref (xattr_req);
- }
-
- if (dict) {
- dict_unref (dict);
- }
-
- if (pathres)
- FREE (pathres);
-
- libgf_client_loc_wipe (&loc);
-
- return op_ret;
-}
-
-ssize_t
-glusterfs_glh_getxattr (glusterfs_handle_t handle, const char *path,
- const char *name, void *value, size_t size)
-{
- return __glusterfs_glh_getxattr (handle, path, name, value, size,
- LIBGF_DO_GETXATTR);
-}
-
-ssize_t
-glusterfs_glh_lgetxattr (glusterfs_handle_t handle, const char *path,
- const char *name, void *value, size_t size)
-{
- return __glusterfs_glh_getxattr (handle, path, name, value, size,
- LIBGF_DO_LGETXATTR);
-}
-
-ssize_t
-glusterfs_getxattr (const char *path, const char *name, void *value,
- size_t size)
-{
- int op_ret = -1;
- struct vmp_entry *entry = NULL;
- char *vpath = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, name, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, value, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, name %s, size %lu",
- path, name, (long unsigned)size);
- entry = libgf_vmp_search_entry ((char *)path);
- if (!entry) {
- errno = ENODEV;
- goto out;
- }
-
- vpath = libgf_vmp_virtual_path (entry, path);
- op_ret = __glusterfs_glh_getxattr (entry->handle, vpath, name, value,
- size, LIBGF_DO_GETXATTR);
-
-out:
- if (vpath)
- free (vpath);
- return op_ret;
-}
-
-ssize_t
-glusterfs_lgetxattr (const char *path, const char *name, void *value,
- size_t size)
-{
- int op_ret = -1;
- struct vmp_entry *entry = NULL;
- char *vpath = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, name, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, value, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, name %s, size %lu",
- path, name, (long unsigned)size);
- entry = libgf_vmp_search_entry ((char *)path);
- if (!entry) {
- errno = ENODEV;
- goto out;
- }
-
- vpath = libgf_vmp_virtual_path (entry, path);
- op_ret = __glusterfs_glh_getxattr (entry->handle, vpath, name, value,
- size, LIBGF_DO_LGETXATTR);
-
-out:
- if (vpath)
- free (vpath);
- return op_ret;
-}
-
-static int32_t
-libgf_client_open_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_open_cbk_stub (frame, NULL, op_ret, op_errno,
- fd);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-
-int
-libgf_client_open (libglusterfs_client_ctx_t *ctx,
- loc_t *loc,
- fd_t *fd,
- int flags)
-{
- call_stub_t *stub = NULL;
- int32_t op_ret = 0;
- libgf_client_local_t *local = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, open, local, loc, flags, fd);
-
- op_ret = stub->args.open_cbk.op_ret;
- errno = stub->args.open_cbk.op_errno;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "open: path %s, status: %d, errno"
- " %d", loc->path, op_ret, errno);
- if (op_ret != -1)
- fd_bind (fd);
- call_stub_destroy (stub);
- return op_ret;
-}
-
-static int32_t
-libgf_client_create_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd,
- inode_t *inode,
- struct stat *buf)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_create_cbk_stub (frame, NULL, op_ret, op_errno,
- fd, inode, buf);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int
-libgf_client_creat (libglusterfs_client_ctx_t *ctx,
- loc_t *loc,
- fd_t *fd,
- int flags,
- mode_t mode)
-{
- call_stub_t *stub = NULL;
- int32_t op_ret = 0;
- libgf_client_local_t *local = NULL;
- inode_t *libgf_inode = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, create, local, loc, flags, mode, fd);
-
- op_ret = stub->args.create_cbk.op_ret;
- errno = stub->args.create_cbk.op_errno;
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Create: path %s, status: %d,"
- " errno: %d", loc->path, op_ret, errno);
- if (op_ret == -1)
- goto out;
-
- libgf_inode = stub->args.create_cbk.inode;
- libgf_transform_devnum (ctx, &stub->args.create_cbk.buf);
- inode_link (libgf_inode, loc->parent, loc->name,
- &stub->args.create_cbk.buf);
-
- inode_lookup (libgf_inode);
-
- libgf_alloc_inode_ctx (ctx, libgf_inode);
- libgf_update_iattr_cache (libgf_inode, LIBGF_UPDATE_ALL,
- &stub->args.create_cbk.buf);
-
-out:
- call_stub_destroy (stub);
- return op_ret;
-}
-
-int32_t
-libgf_client_opendir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_opendir_cbk_stub (frame, NULL, op_ret, op_errno,
- fd);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int
-libgf_client_opendir (libglusterfs_client_ctx_t *ctx,
- loc_t *loc,
- fd_t *fd)
-{
- call_stub_t *stub = NULL;
- int32_t op_ret = -1;
- libgf_client_local_t *local = NULL;
-
- if (((fd->flags & O_ACCMODE) == O_WRONLY)
- || ((fd->flags & O_ACCMODE) == O_RDWR)) {
- errno = EISDIR;
- goto out;
- }
- LIBGF_CLIENT_FOP (ctx, stub, opendir, local, loc, fd);
-
- op_ret = stub->args.opendir_cbk.op_ret;
- errno = stub->args.opendir_cbk.op_errno;
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "opendir: path %s, status %d,"
- " errno %d", loc->path, op_ret, errno);
- if (op_ret != -1)
- fd_bind (fd);
-
- call_stub_destroy (stub);
-out:
- return op_ret;
-}
-
-glusterfs_file_t
-glusterfs_glh_open (glusterfs_handle_t handle, const char *path, int flags,...)
-{
- loc_t loc = {0, };
- long op_ret = -1;
- fd_t *fd = NULL;
- int32_t ret = -1;
- libglusterfs_client_ctx_t *ctx = handle;
- char *name = NULL, *pathname = NULL;
- libglusterfs_client_inode_ctx_t *inode_ctx = NULL;
- mode_t mode = 0;
- va_list ap;
- char *pathres = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- pathres = libgf_resolve_path_light ((char *)path);
- if (!pathres) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Path compaction failed");
- goto out;
- }
-
- loc.path = strdup (pathres);
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
-
- if ((op_ret == -1) && ((flags & O_CREAT) != O_CREAT)) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", loc.path);
- goto out;
- }
-
- if (!op_ret && ((flags & O_CREAT) == O_CREAT)
- && ((flags & O_EXCL) == O_EXCL)) {
- errno = EEXIST;
- op_ret = -1;
- goto out;
- }
-
- if ((op_ret == -1) && ((flags & O_CREAT) == O_CREAT)) {
- libgf_client_loc_wipe (&loc);
- loc.path = strdup (pathres);
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 0);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for parent while trying to"
- " create (%s)", pathres);
- goto out;
- }
-
- loc.inode = inode_new (ctx->itable);
- }
-
- pathname = strdup (pathres);
- name = basename (pathname);
-
- ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino, name);
- if (ret == -1) {
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- fd = fd_create (loc.inode, ctx->pid);
- fd->flags = flags;
-
- if ((flags & O_CREAT) == O_CREAT) {
- /* If we have the st_mode for the basename, check if
- * it is a directory here itself, rather than sending
- * a network message through libgf_client_creat, and
- * then receiving a EISDIR.
- */
- if (S_ISDIR (loc.inode->st_mode)) {
- errno = EISDIR;
- op_ret = -1;
- goto op_over;
- }
- va_start (ap, flags);
- mode = va_arg (ap, mode_t);
- va_end (ap);
- op_ret = libgf_client_creat (ctx, &loc, fd, flags, mode);
- } else {
- if (S_ISDIR (loc.inode->st_mode))
- op_ret = libgf_client_opendir (ctx, &loc, fd);
- else
- op_ret = libgf_client_open (ctx, &loc, fd, flags);
- }
-
-op_over:
- if (op_ret == -1) {
- fd_unref (fd);
- fd = NULL;
- goto out;
- }
-
- if (!libgf_get_fd_ctx (fd)) {
- if (!libgf_alloc_fd_ctx (ctx, fd)) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Failed to"
- " allocate fd context");
- errno = EINVAL;
- op_ret = -1;
- goto out;
- }
- }
-
- if ((flags & O_TRUNC) && (((flags & O_ACCMODE) == O_RDWR)
- || ((flags & O_ACCMODE) == O_WRONLY))) {
- inode_ctx = libgf_get_inode_ctx (fd->inode);
- if (S_ISREG (inode_ctx->stbuf.st_mode)) {
- inode_ctx->stbuf.st_size = 0;
- inode_ctx->stbuf.st_blocks = 0;
- }
- }
-
-out:
- libgf_client_loc_wipe (&loc);
-
- if (pathname) {
- FREE (pathname);
- }
-
- if (pathres)
- FREE (pathres);
-
- return fd;
-}
-
-glusterfs_file_t
-glusterfs_open (const char *path, int flags, ...)
-{
- struct vmp_entry *entry = NULL;
- char *vpath = NULL;
- glusterfs_file_t fh = NULL;
- mode_t mode = 0;
- va_list ap;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
- entry = libgf_vmp_search_entry ((char *)path);
- if (!entry) {
- errno = ENODEV;
- goto out;
- }
-
- vpath = libgf_vmp_virtual_path (entry, path);
- if (flags & O_CREAT) {
- va_start (ap, flags);
- mode = va_arg (ap, mode_t);
- va_end (ap);
- fh = glusterfs_glh_open (entry->handle, vpath, flags, mode);
- } else
- fh = glusterfs_glh_open (entry->handle, vpath, flags);
-out:
- if (vpath)
- free (vpath);
- return fh;
-}
-
-glusterfs_file_t
-glusterfs_glh_creat (glusterfs_handle_t handle, const char *path, mode_t mode)
-{
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
- return glusterfs_glh_open (handle, path,
- (O_CREAT | O_WRONLY | O_TRUNC), mode);
-}
-
-glusterfs_file_t
-glusterfs_creat (const char *path, mode_t mode)
-{
- struct vmp_entry *entry = NULL;
- char *vpath = NULL;
- glusterfs_file_t fh = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
- entry = libgf_vmp_search_entry ((char *)path);
- if (!entry) {
- errno = ENODEV;
- goto out;
- }
-
- vpath = libgf_vmp_virtual_path (entry, path);
- fh = glusterfs_glh_creat (entry->handle, vpath, mode);
-
-out:
- if (vpath)
- free (vpath);
- return fh;
-}
-
-int32_t
-libgf_client_flush_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_flush_cbk_stub (frame, NULL, op_ret, op_errno);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-
-int
-libgf_client_flush (libglusterfs_client_ctx_t *ctx, fd_t *fd)
-{
- call_stub_t *stub;
- int32_t op_ret;
- libgf_client_local_t *local = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, flush, local, fd);
-
- op_ret = stub->args.flush_cbk.op_ret;
- errno = stub->args.flush_cbk.op_errno;
-
- call_stub_destroy (stub);
- return op_ret;
-}
-
-
-int
-glusterfs_close (glusterfs_file_t fd)
-{
- int32_t op_ret = -1;
- libglusterfs_client_ctx_t *ctx = NULL;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
-
- if (!fd) {
- errno = EINVAL;
- goto out;
- }
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- errno = EBADF;
- goto out;
- }
- ctx = fd_ctx->ctx;
-
- op_ret = libgf_client_flush (ctx, (fd_t *)fd);
-
- fd_unref ((fd_t *)fd);
-
-out:
- return op_ret;
-}
-
-int32_t
-libgf_client_setxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_setxattr_cbk_stub (frame, NULL, op_ret,
- op_errno);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int
-libgf_client_setxattr (libglusterfs_client_ctx_t *ctx,
- loc_t *loc,
- const char *name,
- const void *value,
- size_t size,
- int flags)
-{
- call_stub_t *stub = NULL;
- int32_t op_ret = 0;
- dict_t *dict;
- libgf_client_local_t *local = NULL;
-
- dict = get_new_dict ();
-
- dict_set (dict, (char *)name,
- bin_to_data ((void *)value, size));
- dict_ref (dict);
-
-
- LIBGF_CLIENT_FOP (ctx, stub, setxattr, local, loc, dict, flags);
-
- op_ret = stub->args.setxattr_cbk.op_ret;
- errno = stub->args.setxattr_cbk.op_errno;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "path %s, name %s, status %d,"
- "errno %d", loc->path, name, op_ret, errno);
- dict_unref (dict);
- call_stub_destroy (stub);
- return op_ret;
-}
-
-
-#define LIBGF_DO_SETXATTR 1
-#define LIBGF_DO_LSETXATTR 2
-
-int
-__glusterfs_glh_setxattr (glusterfs_handle_t handle, const char *path,
- const char *name, const void *value,
- size_t size, int flags, int whichop)
-{
- int32_t op_ret = -1;
- loc_t loc = {0, };
- libglusterfs_client_ctx_t *ctx = handle;
- char *tmppath = NULL;
- loc_t *realloc = NULL;
- char *pathres = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "path %s, name %s, op %d", path
- ,name, whichop);
- if (size <= 0) {
- errno = EINVAL;
- goto out;
- }
-
- pathres = libgf_resolve_path_light ((char *)path);
- if (!pathres) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Path compaction failed");
- goto out;
- }
-
- loc.path = strdup (pathres);
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", pathres);
- goto out;
- }
-
- tmppath = strdup (pathres);
-
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino,
- basename (tmppath));
- FREE (tmppath);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- realloc = &loc;
- if (whichop == LIBGF_DO_LSETXATTR)
- goto do_setx;
-
- if (!S_ISLNK (loc.inode->st_mode))
- goto do_setx;
-
- libgf_client_loc_wipe (&loc);
- realloc = &loc;
- libgf_realpath_loc_fill (ctx, (char *)pathres, realloc);
-
-do_setx:
- if (!op_ret)
- op_ret = libgf_client_setxattr (ctx, realloc, name, value,
- size, flags);
-
-out:
- if (pathres)
- FREE (pathres);
-
- libgf_client_loc_wipe (realloc);
- return op_ret;
-}
-
-int
-glusterfs_glh_setxattr (glusterfs_handle_t handle, const char *path,
- const char *name, const void *value, size_t size,
- int flags)
-{
- return __glusterfs_glh_setxattr (handle, path, name, value, size, flags
- , LIBGF_DO_SETXATTR);
-}
-
-int
-glusterfs_glh_lsetxattr (glusterfs_handle_t handle, const char *path,
- const char *name, const void *value, size_t size,
- int flags)
-{
- return __glusterfs_glh_setxattr (handle, path, name, value, size, flags
- , LIBGF_DO_LSETXATTR);
-}
-
-int
-glusterfs_setxattr (const char *path, const char *name, const void *value,
- size_t size, int flags)
-{
- struct vmp_entry *entry = NULL;
- int op_ret = -1;
- char *vpath = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, name, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, value, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "path %s, name %s", path, name);
- entry = libgf_vmp_search_entry ((char *)path);
- if (!entry) {
- errno = ENODEV;
- goto out;
- }
-
- vpath = libgf_vmp_virtual_path (entry, path);
- op_ret = __glusterfs_glh_setxattr (entry->handle, vpath, name, value,
- size, flags, LIBGF_DO_SETXATTR);
-out:
- if (vpath)
- free (vpath);
- return op_ret;
-}
-
-int
-glusterfs_lsetxattr (glusterfs_handle_t handle, const char *path,
- const char *name, const void *value, size_t size,
- int flags)
-{
- struct vmp_entry *entry = NULL;
- int op_ret = -1;
- char *vpath = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, name, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, value, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "path %s, name %s", path, name);
- entry = libgf_vmp_search_entry ((char *)path);
- if (!entry) {
- errno = ENODEV;
- goto out;
- }
-
- vpath = libgf_vmp_virtual_path (entry, path);
- op_ret = __glusterfs_glh_setxattr (entry->handle, vpath, name, value,
- size, flags, LIBGF_DO_LSETXATTR);
-out:
- if (vpath)
- free (vpath);
- return op_ret;
-}
-
-int32_t
-libgf_client_fsetxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_fsetxattr_cbk_stub (frame, NULL, op_ret,
- op_errno);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int
-libgf_client_fsetxattr (libglusterfs_client_ctx_t *ctx,
- fd_t *fd,
- const char *name,
- const void *value,
- size_t size,
- int flags)
-{
- call_stub_t *stub = NULL;
- int32_t op_ret = 0;
- dict_t *dict;
- libgf_client_local_t *local = NULL;
-
- dict = get_new_dict ();
-
- dict_set (dict, (char *)name,
- bin_to_data ((void *)value, size));
- dict_ref (dict);
-
- LIBGF_CLIENT_FOP (ctx, stub, fsetxattr, local, fd, dict, flags);
-
- op_ret = stub->args.fsetxattr_cbk.op_ret;
- errno = stub->args.fsetxattr_cbk.op_errno;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "name %s, status %d, errno %d",
- name, op_ret, errno);
- dict_unref (dict);
- call_stub_destroy (stub);
-
- return op_ret;
-}
-
-int
-glusterfs_fsetxattr (glusterfs_file_t fd,
- const char *name,
- const void *value,
- size_t size,
- int flags)
-{
- int32_t op_ret = 0;
- fd_t *__fd = fd;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- libglusterfs_client_ctx_t *ctx = NULL;
-
- if (!fd) {
- errno = EINVAL;
- op_ret = -1;
- gf_log("libglusterfsclient",
- GF_LOG_ERROR,
- "invalid fd");
- goto out;
- }
-
- if (size <= 0) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Invalid argument: size is"
- " less than or equal to zero");
- errno = EINVAL;
- op_ret = -1;
- goto out;
- }
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- errno = EBADF;
- op_ret = -1;
- goto out;
- }
-
- ctx = fd_ctx->ctx;
- op_ret = libgf_client_fsetxattr (ctx, __fd, name, value, size,
- flags);
-
-out:
- return op_ret;
-}
-
-int32_t
-libgf_client_fgetxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict)
-{
-
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_fgetxattr_cbk_stub (frame, NULL, op_ret,
- op_errno, dict);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-size_t
-libgf_client_fgetxattr (libglusterfs_client_ctx_t *ctx,
- fd_t *fd,
- const char *name,
- void *value,
- size_t size)
-{
- call_stub_t *stub = NULL;
- int32_t op_ret = 0;
- libgf_client_local_t *local = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, fgetxattr, local, fd, name);
-
- op_ret = stub->args.fgetxattr_cbk.op_ret;
- errno = stub->args.fgetxattr_cbk.op_errno;
-
- if (op_ret >= 0) {
- /*
- gf_log ("LIBGF_CLIENT", GF_LOG_DEBUG,
- "%"PRId64": %s => %d", frame->root->unique,
- state->fuse_loc.loc.path, op_ret);
- */
-
- data_t *value_data = dict_get (stub->args.fgetxattr_cbk.dict,
- (char *)name);
-
- if (value_data) {
- int32_t copy_len = 0;
-
- /* Don't return the value for '\0' */
- op_ret = value_data->len;
- copy_len = size < value_data->len ?
- size : value_data->len;
- memcpy (value, value_data->data, copy_len);
- } else {
- errno = ENODATA;
- op_ret = -1;
- }
- }
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "name %s, status %d, errno %d",
- name, op_ret, errno);
- call_stub_destroy (stub);
- return op_ret;
-}
-
-ssize_t
-glusterfs_fgetxattr (glusterfs_file_t fd,
- const char *name,
- void *value,
- size_t size)
-{
- int32_t op_ret = 0;
- libglusterfs_client_ctx_t *ctx;
- fd_t *__fd = (fd_t *)fd;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "name %s", name);
- if (size < 0) {
- errno = EINVAL;
- op_ret = -1;
- goto out;
- }
-
- if (size == 0)
- goto out;
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- errno = EBADF;
- op_ret = -1;
- goto out;
- }
-
- ctx = fd_ctx->ctx;
- op_ret = libgf_client_fgetxattr (ctx, __fd, name, value, size);
-out:
- return op_ret;
-}
-
-ssize_t
-glusterfs_listxattr (glusterfs_handle_t handle,
- const char *path,
- char *list,
- size_t size)
-{
- return ENOSYS;
-}
-
-ssize_t
-glusterfs_llistxattr (glusterfs_handle_t handle,
- const char *path,
- char *list,
- size_t size)
-{
- return ENOSYS;
-}
-
-ssize_t
-glusterfs_flistxattr (glusterfs_file_t fd,
- char *list,
- size_t size)
-{
- return ENOSYS;
-}
-
-int
-glusterfs_removexattr (glusterfs_handle_t handle,
- const char *path,
- const char *name)
-{
- return ENOSYS;
-}
-
-int
-glusterfs_lremovexattr (glusterfs_handle_t handle,
- const char *path,
- const char *name)
-{
- return ENOSYS;
-}
-
-int
-glusterfs_fremovexattr (glusterfs_file_t fd,
- const char *name)
-{
- return ENOSYS;
-}
-
-int32_t
-libgf_client_readv_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iovec *vector,
- int32_t count,
- struct stat *stbuf,
- struct iobref *iobref)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_readv_cbk_stub (frame, NULL, op_ret, op_errno,
- vector, count, stbuf, iobref);
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int
-libgf_client_read (libglusterfs_client_ctx_t *ctx,
- fd_t *fd,
- void *buf,
- size_t size,
- off_t offset)
-{
- call_stub_t *stub;
- struct iovec *vector;
- int32_t op_ret = -1;
- int count = 0;
- libgf_client_local_t *local = NULL;
- struct stat *stbuf = NULL;
-
- local = CALLOC (1, sizeof (*local));
- ERR_ABORT (local);
- local->fd = fd;
- LIBGF_CLIENT_FOP (ctx, stub, readv, local, fd, size, offset);
-
- op_ret = stub->args.readv_cbk.op_ret;
- errno = stub->args.readv_cbk.op_errno;
- count = stub->args.readv_cbk.count;
- vector = stub->args.readv_cbk.vector;
- if (op_ret > 0) {
- int i = 0;
- op_ret = 0;
- while (size && (i < count)) {
- int len = (size < vector[i].iov_len) ?
- size : vector[i].iov_len;
- memcpy (buf, vector[i++].iov_base, len);
- buf += len;
- size -= len;
- op_ret += len;
- }
- stbuf = &stub->args.readv_cbk.stbuf;
- libgf_transform_devnum (ctx, stbuf);
- libgf_invalidate_iattr_cache (fd->inode, LIBGF_INVALIDATE_STAT);
- }
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "size %lu, offset %"PRIu64,
- (long unsigned)size, offset);
- call_stub_destroy (stub);
- return op_ret;
-}
-
-ssize_t
-glusterfs_read (glusterfs_file_t fd,
- void *buf,
- size_t nbytes)
-{
- int32_t op_ret = -1;
- off_t offset = 0;
- libglusterfs_client_ctx_t *ctx = NULL;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
-
- if (nbytes < 0) {
- errno = EINVAL;
- goto out;
- }
-
- if (nbytes == 0) {
- op_ret = 0;
- goto out;
- }
-
- if (fd == 0) {
- errno = EINVAL;
- goto out;
- }
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- errno = EBADF;
- goto out;
- }
-
- pthread_mutex_lock (&fd_ctx->lock);
- {
- ctx = fd_ctx->ctx;
- offset = fd_ctx->offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
-
- op_ret = libgf_client_read (ctx, (fd_t *)fd, buf, nbytes, offset);
-
- if (op_ret > 0) {
- offset += op_ret;
- pthread_mutex_lock (&fd_ctx->lock);
- {
- fd_ctx->offset = offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
- }
-
-out:
- return op_ret;
-}
-
-
-ssize_t
-libgf_client_readv (libglusterfs_client_ctx_t *ctx,
- fd_t *fd,
- const struct iovec *dst_vector,
- int dst_count,
- off_t offset)
-{
- call_stub_t *stub = NULL;
- struct iovec *src_vector;
- int src_count = 0;
- int32_t op_ret = -1;
- libgf_client_local_t *local = NULL;
- size_t size = 0;
- int32_t i = 0;
- struct stat *stbuf = NULL;
-
- for (i = 0; i < dst_count; i++)
- {
- size += dst_vector[i].iov_len;
- }
-
- local = CALLOC (1, sizeof (*local));
- ERR_ABORT (local);
- local->fd = fd;
- LIBGF_CLIENT_FOP (ctx, stub, readv, local, fd, size, offset);
-
- op_ret = stub->args.readv_cbk.op_ret;
- errno = stub->args.readv_cbk.op_errno;
- src_count = stub->args.readv_cbk.count;
- src_vector = stub->args.readv_cbk.vector;
- if (op_ret > 0) {
- int src = 0, dst = 0;
- off_t src_offset = 0, dst_offset = 0;
-
- while ((size != 0) && (dst < dst_count) && (src < src_count)) {
- int len = 0, src_len, dst_len;
-
- src_len = src_vector[src].iov_len - src_offset;
- dst_len = dst_vector[dst].iov_len - dst_offset;
-
- len = (src_len < dst_len) ? src_len : dst_len;
- if (len > size) {
- len = size;
- }
-
- memcpy (dst_vector[dst].iov_base + dst_offset,
- src_vector[src].iov_base + src_offset, len);
-
- size -= len;
- src_offset += len;
- dst_offset += len;
-
- if (src_offset == src_vector[src].iov_len) {
- src_offset = 0;
- src++;
- }
-
- if (dst_offset == dst_vector[dst].iov_len) {
- dst_offset = 0;
- dst++;
- }
- }
-
- stbuf = &stub->args.readv_cbk.stbuf;
- libgf_transform_devnum (ctx, stbuf);
- libgf_invalidate_iattr_cache (fd->inode, LIBGF_UPDATE_STAT);
- }
-
- call_stub_destroy (stub);
- return op_ret;
-}
-
-
-ssize_t
-glusterfs_readv (glusterfs_file_t fd, const struct iovec *vec, int count)
-{
- int32_t op_ret = -1;
- off_t offset = 0;
- libglusterfs_client_ctx_t *ctx = NULL;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
-
- if (count < 0) {
- errno = EINVAL;
- goto out;
- }
-
- if (count == 0) {
- op_ret = 0;
- goto out;
- }
-
- if (!fd) {
- errno = EINVAL;
- goto out;
- }
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- errno = EBADF;
- goto out;
- }
-
- pthread_mutex_lock (&fd_ctx->lock);
- {
- ctx = fd_ctx->ctx;
- offset = fd_ctx->offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
-
- op_ret = libgf_client_readv (ctx, (fd_t *)fd, vec, count, offset);
-
- if (op_ret > 0) {
- offset += op_ret;
- pthread_mutex_lock (&fd_ctx->lock);
- {
- fd_ctx->offset = offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
- }
-
-out:
- return op_ret;
-}
-
-
-ssize_t
-glusterfs_pread (glusterfs_file_t fd,
- void *buf,
- size_t count,
- off_t offset)
-{
- int32_t op_ret = -1;
- libglusterfs_client_ctx_t *ctx = NULL;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
-
- if (count < 0) {
- errno = EINVAL;
- goto out;
- }
-
- if (count == 0) {
- op_ret = 0;
- goto out;
- }
-
- if (!fd) {
- errno = EINVAL;
- goto out;
- }
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- errno = EBADF;
- goto out;
- }
-
- ctx = fd_ctx->ctx;
-
- op_ret = libgf_client_read (ctx, (fd_t *)fd, buf, count, offset);
-
-out:
- return op_ret;
-}
-
-
-int
-libgf_client_writev_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *stbuf)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_writev_cbk_stub (frame, NULL, op_ret, op_errno,
- stbuf);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-
-int
-libgf_client_iobuf_write (libglusterfs_client_ctx_t *ctx, fd_t *fd, char *addr,
- size_t size, off_t offset)
-{
- struct iobref *ioref = NULL;
- struct iobuf *iob = NULL;
- int op_ret = -1;
- struct iovec iov = {0, };
- call_stub_t *stub = NULL;
- libgf_client_local_t *local = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, fd, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, addr, out);
-
- ioref = iobref_new ();
- if (!ioref) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Out of memory");
- goto out;
- }
-
- iob = iobuf_get (ctx->gf_ctx.iobuf_pool);
- if (!iob) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Out of memory");
- goto out;
- }
-
- memcpy (iob->ptr, addr, size);
- iobref_add (ioref, iob);
-
- iov.iov_base = iob->ptr;
- iov.iov_len = size;
-
- LIBGF_CLIENT_FOP (ctx, stub, writev, local, fd, &iov,
- 1, offset, ioref);
-
- op_ret = stub->args.writev_cbk.op_ret;
- errno = stub->args.writev_cbk.op_errno;
-
- /* We need to invalidate because it is possible that write-behind
- * is a translator below us and returns a stat filled with zeroes.
- */
- libgf_invalidate_iattr_cache (fd->inode, LIBGF_INVALIDATE_STAT);
-
-out:
- if (iob) {
- iobuf_unref (iob);
- }
-
- if (ioref) {
- iobref_unref (ioref);
- }
-
- call_stub_destroy (stub);
- return op_ret;
-}
-
-int
-libgf_client_writev (libglusterfs_client_ctx_t *ctx,
- fd_t *fd,
- struct iovec *vector,
- int count,
- off_t offset)
-{
- int op_ret = 0;
- int written = 0;
- int writesize = 0;
- int size = 0;
- char *base = NULL;
- int i = 0;
-
- for (i = 0; i < count; i++) {
- size = vector[i].iov_len;
- base = vector[i].iov_base;
-
- while (size > 0) {
- writesize = (size > LIBGF_IOBUF_SIZE) ?
- LIBGF_IOBUF_SIZE : size;
-
- written = libgf_client_iobuf_write (ctx, fd, base,
- writesize, offset);
-
- if (written == -1)
- goto out;
-
- op_ret += written;
- base += written;
- size -= written;
- offset += written;
- }
- }
-
-out:
- return op_ret;
-}
-
-
-ssize_t
-glusterfs_write (glusterfs_file_t fd,
- const void *buf,
- size_t n)
-{
- int32_t op_ret = -1;
- off_t offset = 0;
- struct iovec vector;
- libglusterfs_client_ctx_t *ctx = NULL;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
-
- if (n < 0) {
- errno = EINVAL;
- goto out;
- }
-
- if (n == 0) {
- op_ret = 0;
- goto out;
- }
-
- if (!fd) {
- errno = EINVAL;
- goto out;
- }
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- errno = EBADF;
- goto out;
- }
-
- ctx = fd_ctx->ctx;
-
- pthread_mutex_lock (&fd_ctx->lock);
- {
- offset = fd_ctx->offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
-
- vector.iov_base = (void *)buf;
- vector.iov_len = n;
-
- op_ret = libgf_client_writev (ctx,
- (fd_t *)fd,
- &vector,
- 1,
- offset);
-
- if (op_ret >= 0) {
- offset += op_ret;
- pthread_mutex_lock (&fd_ctx->lock);
- {
- fd_ctx->offset = offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
- }
-
-out:
- return op_ret;
-}
-
-ssize_t
-glusterfs_writev (glusterfs_file_t fd,
- const struct iovec *vector,
- int count)
-{
- int32_t op_ret = -1;
- off_t offset = 0;
- libglusterfs_client_ctx_t *ctx = NULL;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
-
- if (count < 0) {
- errno = EINVAL;
- goto out;
- }
-
- if (count == 0) {
- op_ret = 0;
- goto out;
- }
-
- if (!fd) {
- errno = EINVAL;
- goto out;
- }
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- errno = EBADF;
- goto out;
- }
-
- ctx = fd_ctx->ctx;
-
- pthread_mutex_lock (&fd_ctx->lock);
- {
- offset = fd_ctx->offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
-
-
- op_ret = libgf_client_writev (ctx,
- (fd_t *)fd,
- (struct iovec *)vector,
- count,
- offset);
-
- if (op_ret >= 0) {
- offset += op_ret;
- pthread_mutex_lock (&fd_ctx->lock);
- {
- fd_ctx->offset = offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
- }
-
-out:
- return op_ret;
-}
-
-
-ssize_t
-glusterfs_pwrite (glusterfs_file_t fd,
- const void *buf,
- size_t count,
- off_t offset)
-{
- int32_t op_ret = -1;
- struct iovec vector;
- libglusterfs_client_ctx_t *ctx = NULL;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
-
- if (count < 0) {
- errno = EINVAL;
- goto out;
- }
-
- if (count == 0) {
- op_ret = 0;
- goto out;
- }
-
- if (!fd) {
- errno = EINVAL;
- goto out;
- }
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- errno = EBADF;
- goto out;
- }
-
- ctx = fd_ctx->ctx;
-
- vector.iov_base = (void *)buf;
- vector.iov_len = count;
-
- op_ret = libgf_client_writev (ctx,
- (fd_t *)fd,
- &vector,
- 1,
- offset);
-
-out:
- return op_ret;
-}
-
-
-int32_t
-libgf_client_readdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- gf_dirent_t *entries)
-{
- libgf_client_local_t *local = frame->local;
-
- /* Note, we dont let entries reach the stub because there it gets copied
- * while we can simply delink the entries here and link them into our
- * dcache, thereby avoiding the need to perform more allocations and
- * copies.
- */
- local->reply_stub = fop_readdir_cbk_stub (frame, NULL, op_ret, op_errno,
- NULL);
- if (op_ret > 0)
- libgf_dcache_update (frame->root->state, local->fd, entries);
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int
-libgf_client_readdir (libglusterfs_client_ctx_t *ctx, fd_t *fd,
- struct dirent *dirp, off_t *offset)
-{
- call_stub_t *stub = NULL;
- int op_ret = -1;
- libgf_client_local_t *local = NULL;
-
- if (libgf_dcache_readdir (ctx, fd, dirp, offset))
- return 1;
- local = CALLOC (1, sizeof (*local));
- ERR_ABORT (local);
- local->fd = fd;
- LIBGF_CLIENT_FOP (ctx, stub, readdir, local, fd,
- LIBGF_READDIR_BLOCK, *offset);
-
- errno = stub->args.readdir_cbk.op_errno;
-
- op_ret = libgf_dcache_readdir (ctx, fd, dirp, offset);
- call_stub_destroy (stub);
- return op_ret;
-}
-
-
-int
-glusterfs_readdir_r (glusterfs_dir_t dirfd, struct dirent *entry,
- struct dirent **result)
-{
- int op_ret = -1;
- libglusterfs_client_ctx_t *ctx = NULL;
- off_t offset = 0;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- struct dirent *dirp = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, entry, out);
-
- fd_ctx = libgf_get_fd_ctx (dirfd);
- if (!fd_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "fd context not present");
- errno = EBADF;
- goto out;
- }
-
- pthread_mutex_lock (&fd_ctx->lock);
- {
- ctx = fd_ctx->ctx;
- offset = fd_ctx->offset;
- dirp = &fd_ctx->dirp;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "offset %"PRIu64, offset);
- memset (dirp, 0, sizeof (struct dirent));
- op_ret = libgf_client_readdir (ctx, (fd_t *)dirfd, dirp,
- &offset);
- if (op_ret <= 0) {
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "readdir failed:"
- " %s", strerror (errno));
- if (result && (op_ret == 0)) {
- *result = NULL;
- } else if (op_ret < 0){
- op_ret = errno;
- }
- goto unlock;
- }
-
- fd_ctx->offset = offset;
-
- if (result) {
- *result = memcpy (entry, dirp, sizeof (*entry));
- } else {
- memcpy (entry, dirp, sizeof (*entry));
- }
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "new offset %"PRIu64", "
- " entry %s", offset, entry->d_name);
- op_ret = 0;
- }
-unlock:
- pthread_mutex_unlock (&fd_ctx->lock);
-
-out:
- return op_ret;
-}
-
-
-struct dirent *
-glusterfs_readdir (glusterfs_dir_t dirfd)
-{
- int op_ret = -1;
- libglusterfs_client_ctx_t *ctx = NULL;
- off_t offset = 0;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- struct dirent *dirp = NULL;
-
- fd_ctx = libgf_get_fd_ctx (dirfd);
- if (!fd_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "fd context not present");
- errno = EBADF;
- goto out;
- }
-
- pthread_mutex_lock (&fd_ctx->lock);
- {
- ctx = fd_ctx->ctx;
- offset = fd_ctx->offset;
- dirp = &fd_ctx->dirp;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "offset %"PRIu64, offset);
- memset (dirp, 0, sizeof (struct dirent));
- op_ret = libgf_client_readdir (ctx, (fd_t *)dirfd, dirp, &offset);
-
- if (op_ret <= 0) {
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "readdir failed: %s",
- strerror (errno));
- dirp = NULL;
- goto out;
- }
-
- pthread_mutex_lock (&fd_ctx->lock);
- {
- fd_ctx->offset = offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "new offset %"PRIu64", entry %s",
- offset, dirp->d_name);
-out:
- return dirp;
-}
-
-
-int
-glusterfs_getdents (glusterfs_file_t fd, struct dirent *dirp,
- unsigned int count)
-{
- int op_ret = -1;
- libglusterfs_client_ctx_t *ctx = NULL;
- off_t offset = 0;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- errno = EBADF;
- goto out;
- }
-
- pthread_mutex_lock (&fd_ctx->lock);
- {
- ctx = fd_ctx->ctx;
- offset = fd_ctx->offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
-
- op_ret = libgf_client_readdir (ctx, (fd_t *)fd, dirp, &offset);
-
- if (op_ret > 0) {
- pthread_mutex_lock (&fd_ctx->lock);
- {
- fd_ctx->offset = offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
- }
-
-out:
- return op_ret;
-}
-
-
-static int32_t
-libglusterfs_readv_async_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iovec *vector,
- int32_t count,
- struct stat *stbuf,
- struct iobref *iobref)
-{
- glusterfs_iobuf_t *buf;
- libglusterfs_client_async_local_t *local = frame->local;
- fd_t *__fd = local->fop.readv_cbk.fd;
- glusterfs_readv_cbk_t readv_cbk = local->fop.readv_cbk.cbk;
-
- buf = CALLOC (1, sizeof (*buf));
- ERR_ABORT (buf);
-
- if (vector) {
- buf->vector = iov_dup (vector, count);
- }
-
- buf->count = count;
-
- if (iobref) {
- buf->iobref = iobref_ref (iobref);
- }
-
- if (op_ret > 0) {
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- fd_ctx = libgf_get_fd_ctx (__fd);
-
- /* update offset only if we have used offset stored in fd_ctx */
- if (local->fop.readv_cbk.update_offset) {
- pthread_mutex_lock (&fd_ctx->lock);
- {
- fd_ctx->offset += op_ret;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
- }
- }
-
- readv_cbk (op_ret, op_errno, buf, local->cbk_data);
-
- FREE (local);
- frame->local = NULL;
- STACK_DESTROY (frame->root);
-
- return 0;
-}
-
-void
-glusterfs_free (glusterfs_iobuf_t *buf)
-{
- //iov_free (buf->vector, buf->count);
- FREE (buf->vector);
- if (buf->iobref)
- iobref_unref ((struct iobref *) buf->iobref);
- if (buf->dictref)
- dict_unref ((dict_t *) buf->dictref);
- FREE (buf);
-}
-
-int
-glusterfs_read_async (glusterfs_file_t fd,
- size_t nbytes,
- off_t offset,
- glusterfs_readv_cbk_t readv_cbk,
- void *cbk_data)
-{
- libglusterfs_client_ctx_t *ctx;
- fd_t *__fd = (fd_t *)fd;
- libglusterfs_client_async_local_t *local = NULL;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- int32_t op_ret = 0;
-
- if (nbytes < 0) {
- errno = EINVAL;
- op_ret = -1;
- goto out;
- }
-
- if (nbytes == 0) {
- op_ret = 0;
- goto out;
- }
-
- local = CALLOC (1, sizeof (*local));
- ERR_ABORT (local);
- local->fop.readv_cbk.fd = __fd;
- local->fop.readv_cbk.cbk = readv_cbk;
- local->cbk_data = cbk_data;
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- errno = EBADF;
- op_ret = -1;
- goto out;
- }
-
- ctx = fd_ctx->ctx;
-
- if (offset < 0) {
- pthread_mutex_lock (&fd_ctx->lock);
- {
- offset = fd_ctx->offset;
- local->fop.readv_cbk.update_offset = 1;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
- }
-
- LIBGF_CLIENT_FOP_ASYNC (ctx,
- local,
- libglusterfs_readv_async_cbk,
- readv,
- __fd,
- nbytes,
- offset);
-
-out:
- return op_ret;
-}
-
-static int32_t
-libglusterfs_writev_async_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *stbuf)
-{
- libglusterfs_client_async_local_t *local = frame->local;
- fd_t *fd = NULL;
- glusterfs_write_cbk_t write_cbk;
-
- write_cbk = local->fop.write_cbk.cbk;
- fd = local->fop.write_cbk.fd;
-
- if (op_ret > 0) {
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- fd_ctx = libgf_get_fd_ctx (fd);
- pthread_mutex_lock (&fd_ctx->lock);
- {
- fd_ctx->offset += op_ret;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
- }
-
- write_cbk (op_ret, op_errno, local->cbk_data);
-
- STACK_DESTROY (frame->root);
- return 0;
-}
-
-int32_t
-glusterfs_write_async (glusterfs_file_t fd,
- const void *buf,
- size_t nbytes,
- off_t offset,
- glusterfs_write_cbk_t write_cbk,
- void *cbk_data)
-{
- fd_t *__fd = (fd_t *)fd;
- struct iovec vector;
- off_t __offset = offset;
- libglusterfs_client_ctx_t *ctx = NULL;
- libglusterfs_client_async_local_t *local = NULL;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- int32_t op_ret = 0;
- struct iobref *iobref = NULL;
-
- if (nbytes == 0) {
- op_ret = 0;
- goto out;
- }
-
- if (nbytes < 0) {
- op_ret = -1;
- errno = EINVAL;
- goto out;
- }
-
- local = CALLOC (1, sizeof (*local));
- ERR_ABORT (local);
- local->fop.write_cbk.fd = __fd;
- local->fop.write_cbk.cbk = write_cbk;
- local->cbk_data = cbk_data;
-
- vector.iov_base = (void *)buf;
- vector.iov_len = nbytes;
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- errno = EBADF;
- op_ret = -1;
- goto out;
- }
-
- ctx = fd_ctx->ctx;
-
- if (offset < 0) {
- pthread_mutex_lock (&fd_ctx->lock);
- {
- __offset = fd_ctx->offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
- }
-
- iobref = iobref_new ();
- LIBGF_CLIENT_FOP_ASYNC (ctx,
- local,
- libglusterfs_writev_async_cbk,
- writev,
- __fd,
- &vector,
- 1,
- __offset,
- iobref);
- iobref_unref (iobref);
-
-out:
- return op_ret;
-}
-
-off_t
-glusterfs_lseek (glusterfs_file_t fd, off_t offset, int whence)
-{
- off_t __offset = 0;
- int32_t op_ret = -1;
- fd_t *__fd = (fd_t *)fd;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- libglusterfs_client_ctx_t *ctx = NULL;
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- errno = EBADF;
- __offset = -1;
- goto out;
- }
-
- ctx = fd_ctx->ctx;
-
- switch (whence)
- {
- case SEEK_SET:
- __offset = offset;
- break;
-
- case SEEK_CUR:
- pthread_mutex_lock (&fd_ctx->lock);
- {
- __offset = fd_ctx->offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
-
- __offset += offset;
- break;
-
- case SEEK_END:
- {
- char cache_valid = 0;
- off_t end = 0;
- loc_t loc = {0, };
- struct stat stbuf = {0, };
-
- cache_valid = libgf_is_iattr_cache_valid (ctx, __fd->inode,
- &stbuf,
- LIBGF_VALIDATE_STAT);
- if (cache_valid) {
- end = stbuf.st_size;
- } else {
- op_ret = libgf_client_loc_fill (&loc, ctx,
- __fd->inode->ino, 0,
- NULL);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, returning EINVAL");
- errno = EINVAL;
- libgf_client_loc_wipe (&loc);
- __offset = -1;
- goto out;
- }
-
- op_ret = libgf_client_lookup (ctx, &loc, &stbuf, NULL,
- NULL);
- if (op_ret < 0) {
- __offset = -1;
- libgf_client_loc_wipe (&loc);
- goto out;
- }
-
- end = stbuf.st_size;
- }
-
- __offset = end + offset;
- libgf_client_loc_wipe (&loc);
- }
- break;
-
- default:
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "invalid value for whence");
- __offset = -1;
- errno = EINVAL;
- goto out;
- }
-
- pthread_mutex_lock (&fd_ctx->lock);
- {
- fd_ctx->offset = __offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
-
-out:
- return __offset;
-}
-
-
-int32_t
-libgf_client_stat_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_stat_cbk_stub (frame,
- NULL,
- op_ret,
- op_errno,
- buf);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int32_t
-libgf_client_stat (libglusterfs_client_ctx_t *ctx,
- loc_t *loc,
- struct stat *stbuf)
-{
- call_stub_t *stub = NULL;
- int32_t op_ret = 0;
- libgf_client_local_t *local = NULL;
- struct stat cachedbuf = {0, };
-
- if (libgf_is_iattr_cache_valid (ctx, loc->inode, &cachedbuf,
- LIBGF_VALIDATE_STAT)) {
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Cache will be used");
- if (stbuf)
- memcpy (stbuf, &cachedbuf, sizeof (struct stat));
- goto out;
- }
-
- LIBGF_CLIENT_FOP (ctx, stub, stat, local, loc);
-
- op_ret = stub->args.stat_cbk.op_ret;
- errno = stub->args.stat_cbk.op_errno;
- libgf_transform_devnum (ctx, &stub->args.stat_cbk.buf);
- if (stbuf)
- *stbuf = stub->args.stat_cbk.buf;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, status %d, errno %d",
- loc->path, op_ret, errno);
- libgf_update_iattr_cache (loc->inode, LIBGF_UPDATE_STAT,
- &stub->args.stat_cbk.buf);
- call_stub_destroy (stub);
-
-out:
- return op_ret;
-}
-
-int
-libgf_realpath_loc_fill (libglusterfs_client_ctx_t *ctx, char *link,
- loc_t *targetloc)
-{
- int op_ret = -1;
- char *target = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, link, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, targetloc, out);
-
- targetloc->path = glusterfs_glh_realpath (ctx, link, NULL);
-
- if (targetloc->path == NULL)
- goto out;
-
- op_ret = libgf_client_path_lookup (targetloc, ctx, 1);
- if (op_ret == -1)
- goto out;
-
- target = strdup (targetloc->path);
- op_ret = libgf_client_loc_fill (targetloc, ctx, 0,
- targetloc->parent->ino,
- basename (target));
- if (op_ret == -1) {
- errno = EINVAL;
- goto out;
- }
-
-out:
- if (target)
- FREE (target);
-
- return op_ret;
-}
-
-#define LIBGF_DO_LSTAT 0x01
-#define LIBGF_DO_STAT 0x02
-
-int
-__glusterfs_stat (glusterfs_handle_t handle, const char *path,
- struct stat *buf, int whichstat)
-{
- int32_t op_ret = -1;
- loc_t loc = {0, };
- libglusterfs_client_ctx_t *ctx = handle;
- char *name = NULL, *pathname = NULL;
- loc_t targetloc = {0, };
- loc_t *real_loc = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, op: %d", path,
- whichstat);
- loc.path = libgf_resolve_path_light ((char *)path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Path compaction failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", loc.path);
- goto out;
- }
-
- pathname = strdup (loc.path);
- name = basename (pathname);
-
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino, name);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, returning EINVAL");
- errno = EINVAL;
- goto out;
- }
- real_loc = &loc;
- /* The stat fop in glusterfs calls lstat. So we have to
- * provide the POSIX compatible stat fop. To do so, we need to ensure
- * that if the @path is a symlink, we must perform a stat on the
- * target of that symlink than the symlink itself(..because if
- * do a stat on the symlink, we're actually doing what lstat
- * should do. See posix_stat
- */
- if (whichstat & LIBGF_DO_LSTAT)
- goto lstat_fop;
-
- if (!S_ISLNK (loc.inode->st_mode))
- goto lstat_fop;
-
- op_ret = libgf_realpath_loc_fill (ctx, (char *)loc.path, &targetloc);
- if (op_ret == -1)
- goto out;
- real_loc = &targetloc;
-
-lstat_fop:
-
- if (!op_ret) {
- op_ret = libgf_client_stat (ctx, real_loc, buf);
- }
-
-out:
- if (pathname) {
- FREE (pathname);
- }
-
- libgf_client_loc_wipe (&loc);
- libgf_client_loc_wipe (&targetloc);
-
- return op_ret;
-}
-
-int
-glusterfs_glh_stat (glusterfs_handle_t handle, const char *path,
- struct stat *buf)
-{
- return __glusterfs_stat (handle, path, buf, LIBGF_DO_STAT);
-}
-
-int
-glusterfs_stat (const char *path, struct stat *buf)
-{
- struct vmp_entry *entry = NULL;
- int op_ret = -1;
- char *vpath = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, buf, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
- entry = libgf_vmp_search_entry ((char *)path);
- if (!entry) {
- errno = ENODEV;
- goto out;
- }
-
- vpath = libgf_vmp_virtual_path (entry, path);
- op_ret = glusterfs_glh_stat (entry->handle, vpath, buf);
-out:
- if (vpath)
- free (vpath);
- return op_ret;
-}
-
-int
-glusterfs_glh_lstat (glusterfs_handle_t handle, const char *path, struct stat *buf)
-{
- return __glusterfs_stat (handle, path, buf, LIBGF_DO_LSTAT);
-}
-
-int
-glusterfs_lstat (const char *path, struct stat *buf)
-{
- struct vmp_entry *entry = NULL;
- int op_ret = -1;
- char *vpath = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, buf, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
- entry = libgf_vmp_search_entry ((char *)path);
- if (!entry) {
- errno = ENODEV;
- goto out;
- }
-
- vpath = libgf_vmp_virtual_path (entry, path);
- op_ret = glusterfs_glh_lstat (entry->handle, vpath, buf);
-out:
- if (vpath)
- free (vpath);
- return op_ret;
-}
-
-static int32_t
-libgf_client_fstat_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_fstat_cbk_stub (frame,
- NULL,
- op_ret,
- op_errno,
- buf);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-
-}
-
-int32_t
-libgf_client_fstat (libglusterfs_client_ctx_t *ctx,
- fd_t *fd,
- struct stat *buf)
-{
- call_stub_t *stub = NULL;
- int32_t op_ret = 0;
- libgf_client_local_t *local = NULL;
- struct stat cachedbuf = {0, };
-
- if (libgf_is_iattr_cache_valid (ctx, fd->inode, &cachedbuf,
- LIBGF_VALIDATE_STAT)) {
- if (buf)
- memcpy (buf, &cachedbuf, sizeof (struct stat));
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Cache will be used");
- goto out;
- }
-
- LIBGF_CLIENT_FOP (ctx, stub, fstat, local, fd);
-
- op_ret = stub->args.fstat_cbk.op_ret;
- errno = stub->args.fstat_cbk.op_errno;
- libgf_transform_devnum (ctx, &stub->args.fstat_cbk.buf);
- if (buf)
- *buf = stub->args.fstat_cbk.buf;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "status %d, errno %d", op_ret,
- errno);
- libgf_update_iattr_cache (fd->inode, LIBGF_UPDATE_STAT,
- &stub->args.fstat_cbk.buf);
- call_stub_destroy (stub);
-
-out:
- return op_ret;
-}
-
-int32_t
-glusterfs_fstat (glusterfs_file_t fd, struct stat *buf)
-{
- libglusterfs_client_ctx_t *ctx;
- fd_t *__fd = (fd_t *)fd;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- int32_t op_ret = -1;
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- errno = EBADF;
- op_ret = -1;
- goto out;
- }
-
- ctx = fd_ctx->ctx;
-
- op_ret = libgf_client_fstat (ctx, __fd, buf);
-
-out:
- return op_ret;
-}
-
-
-static int32_t
-libgf_client_mkdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_mkdir_cbk_stub (frame, NULL, op_ret, op_errno,
- inode, buf);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-
-static int32_t
-libgf_client_mkdir (libglusterfs_client_ctx_t *ctx,
- loc_t *loc,
- mode_t mode)
-{
- int32_t op_ret = -1;
- call_stub_t *stub = NULL;
- libgf_client_local_t *local = NULL;
- inode_t *libgf_inode = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, mkdir, local, loc, mode);
- op_ret = stub->args.mkdir_cbk.op_ret;
- errno = stub->args.mkdir_cbk.op_errno;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, status %d, errno %d",
- loc->path, op_ret, errno);
- if (op_ret == -1)
- goto out;
-
- libgf_inode = stub->args.mkdir_cbk.inode;
- libgf_transform_devnum (ctx, &stub->args.mkdir_cbk.buf);
- inode_link (libgf_inode, loc->parent, loc->name,
- &stub->args.mkdir_cbk.buf);
-
- inode_lookup (libgf_inode);
-
- libgf_alloc_inode_ctx (ctx, libgf_inode);
- libgf_update_iattr_cache (libgf_inode, LIBGF_UPDATE_ALL,
- &stub->args.mkdir_cbk.buf);
-
-out:
- call_stub_destroy (stub);
-
- return op_ret;
-}
-
-
-int32_t
-glusterfs_glh_mkdir (glusterfs_handle_t handle, const char *path, mode_t mode)
-{
- libglusterfs_client_ctx_t *ctx = handle;
- loc_t loc = {0, };
- char *pathname = NULL, *name = NULL;
- int32_t op_ret = -1;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- loc.path = libgf_resolve_path_light ((char *)path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Path compaction failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == 0) {
- op_ret = -1;
- errno = EEXIST;
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 0);
- if (op_ret == -1) {
- errno = ENOENT;
- goto out;
- }
-
- pathname = strdup (loc.path);
- name = basename (pathname);
-
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino, name);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- loc.inode = inode_new (ctx->itable);
- op_ret = libgf_client_mkdir (ctx, &loc, mode);
- if (op_ret == -1) {
- goto out;
- }
-
-out:
- libgf_client_loc_wipe (&loc);
- if (pathname) {
- free (pathname);
- pathname = NULL;
- }
-
- return op_ret;
-}
-
-int32_t
-glusterfs_mkdir (const char *path, mode_t mode)
-{
- struct vmp_entry *entry = NULL;
- int op_ret = -1;
- char *vpath = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
- entry = libgf_vmp_search_entry ((char *)path);
- if (!entry) {
- errno = ENODEV;
- goto out;
- }
-
- vpath = libgf_vmp_virtual_path (entry, path);
- op_ret = glusterfs_glh_mkdir (entry->handle, vpath, mode);
-out:
- if (vpath)
- free (vpath);
- return op_ret;
-}
-
-static int32_t
-libgf_client_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_rmdir_cbk_stub (frame, NULL, op_ret, op_errno);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-static int32_t
-libgf_client_rmdir (libglusterfs_client_ctx_t *ctx, loc_t *loc)
-{
- int32_t op_ret = -1;
- call_stub_t *stub = NULL;
- libgf_client_local_t *local = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, rmdir, local, loc);
-
- op_ret = stub->args.rmdir_cbk.op_ret;
- errno = stub->args.rmdir_cbk.op_errno;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, status %d, errno %d",
- loc->path, op_ret, errno);
- if (stub->args.rmdir_cbk.op_ret != 0)
- goto out;
-
- inode_unlink (loc->inode, loc->parent, loc->name);
-
-out:
- call_stub_destroy (stub);
-
- return op_ret;
-}
-
-int32_t
-glusterfs_glh_rmdir (glusterfs_handle_t handle, const char *path)
-{
- libglusterfs_client_ctx_t *ctx = handle;
- loc_t loc = {0, };
- char *pathname = NULL, *name = NULL;
- int32_t op_ret = -1;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- loc.path = libgf_resolve_path_light ((char *)path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Path compaction failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", loc.path);
- goto out;
- }
-
- pathname = strdup (loc.path);
- name = basename (pathname);
-
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino, name);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- op_ret = libgf_client_rmdir (ctx, &loc);
- if (op_ret == -1) {
- goto out;
- }
-
-out:
- libgf_client_loc_wipe (&loc);
-
- if (pathname) {
- free (pathname);
- pathname = NULL;
- }
-
- return op_ret;
-}
-
-int32_t
-glusterfs_rmdir (const char *path)
-{
- struct vmp_entry *entry = NULL;
- int op_ret = -1;
- char *vpath = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
- entry = libgf_vmp_search_entry ((char *)path);
- if (!entry) {
- errno = ENODEV;
- goto out;
- }
-
- vpath = libgf_vmp_virtual_path (entry, path);
- op_ret = glusterfs_glh_rmdir (entry->handle, vpath);
-out:
- if (vpath)
- free (vpath);
- return op_ret;
-}
-
-int
-libgf_client_chmod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_chmod_cbk_stub (frame, NULL, op_ret, op_errno,
- buf);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int
-libgf_client_chmod (libglusterfs_client_ctx_t *ctx, loc_t * loc, mode_t mode)
-{
- int op_ret = -1;
- libgf_client_local_t *local = NULL;
- call_stub_t *stub = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, chmod, local, loc, mode);
-
- op_ret = stub->args.chmod_cbk.op_ret;
- errno = stub->args.chmod_cbk.op_errno;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, status %d, errno %d",
- loc->path, op_ret, errno);
- if (op_ret == -1)
- goto out;
-
- libgf_transform_devnum (ctx, &stub->args.chmod_cbk.buf);
- libgf_update_iattr_cache (loc->inode, LIBGF_UPDATE_STAT,
- &stub->args.chmod_cbk.buf);
-out:
- call_stub_destroy (stub);
- return op_ret;
-}
-
-int
-glusterfs_glh_chmod (glusterfs_handle_t handle, const char *path, mode_t mode)
-{
- int op_ret = -1;
- libglusterfs_client_ctx_t *ctx = handle;
- loc_t loc = {0, };
- char *name = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- loc.path = libgf_resolve_path_light ((char *)path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Path compaction failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1)
- goto out;
-
- name = strdup (loc.path);
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino,
- basename (name));
- if (op_ret == -1) {
- errno = EINVAL;
- goto out;
- }
-
- op_ret = libgf_client_chmod (ctx, &loc, mode);
-
-out:
- if (name)
- FREE (name);
-
- libgf_client_loc_wipe (&loc);
- return op_ret;
-}
-
-int
-glusterfs_chmod (const char *path, mode_t mode)
-{
- struct vmp_entry *entry = NULL;
- int op_ret = -1;
- char *vpath = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
- entry = libgf_vmp_search_entry ((char *)path);
- if (!entry) {
- errno = ENODEV;
- goto out;
- }
-
- vpath = libgf_vmp_virtual_path (entry, path);
- op_ret = glusterfs_glh_chmod (entry->handle, vpath, mode);
-out:
- if (vpath)
- free (vpath);
- return op_ret;
-}
-
-int
-libgf_client_chown_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *sbuf)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_chown_cbk_stub (frame, NULL, op_ret, op_errno,
- sbuf);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int
-libgf_client_chown (libglusterfs_client_ctx_t *ctx, loc_t *loc, uid_t uid,
- gid_t gid)
-{
- call_stub_t *stub = NULL;
- libgf_client_local_t *local = NULL;
- int32_t op_ret = -1;
-
- LIBGF_CLIENT_FOP (ctx, stub, chown, local, loc, uid, gid);
-
- op_ret = stub->args.chown_cbk.op_ret;
- errno = stub->args.chown_cbk.op_errno;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, status %d, errno %d",
- loc->path, op_ret, errno);
- if (op_ret == -1)
- goto out;
-
- libgf_transform_devnum (ctx, &stub->args.chown_cbk.buf);
- libgf_update_iattr_cache (loc->inode, LIBGF_UPDATE_STAT,
- &stub->args.chown_cbk.buf);
-out:
- call_stub_destroy (stub);
- return op_ret;
-}
-
-#define LIBGF_DO_CHOWN 1
-#define LIBGF_DO_LCHOWN 2
-
-int
-__glusterfs_chown (glusterfs_handle_t handle, const char *path, uid_t owner,
- gid_t group, int whichop)
-{
- int op_ret = -1;
- libglusterfs_client_ctx_t *ctx = handle;
- loc_t loc = {0, };
- char *name = NULL;
- loc_t *oploc = NULL;
- loc_t targetloc = {0, };
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, op %d", path, whichop);
- loc.path = libgf_resolve_path_light ((char *)path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Path compaction failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1)
- goto out;
-
- name = strdup (loc.path);
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino,
- basename ((char *)name));
- if (op_ret == -1) {
- errno = EINVAL;
- goto out;
- }
-
- oploc = &loc;
- if (whichop == LIBGF_DO_LCHOWN)
- goto do_lchown;
-
- if (!S_ISLNK (loc.inode->st_mode))
- goto do_lchown;
-
- op_ret = libgf_realpath_loc_fill (ctx, (char *)loc.path, &targetloc);
- if (op_ret == -1)
- goto out;
-
- oploc = &targetloc;
-do_lchown:
- op_ret = libgf_client_chown (ctx, oploc, owner, group);
-out:
- if (name)
- FREE (name);
- libgf_client_loc_wipe (&loc);
- libgf_client_loc_wipe (&targetloc);
- return op_ret;
-}
-
-int
-glusterfs_glh_chown (glusterfs_handle_t handle, const char *path, uid_t owner,
- gid_t group)
-{
- return __glusterfs_chown (handle, path, owner, group, LIBGF_DO_CHOWN);
-}
-
-int
-glusterfs_chown (const char *path, uid_t owner, gid_t group)
-{
- struct vmp_entry *entry = NULL;
- int op_ret = -1;
- char *vpath = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
- entry = libgf_vmp_search_entry ((char *)path);
- if (!entry) {
- errno = ENODEV;
- goto out;
- }
-
- vpath = libgf_vmp_virtual_path (entry, path);
- op_ret = glusterfs_glh_chown (entry->handle, vpath, owner, group);
-out:
- if (vpath)
- free (vpath);
- return op_ret;
-}
-
-int
-glusterfs_glh_lchown (glusterfs_handle_t handle, const char *path, uid_t owner,
- gid_t group)
-{
- return __glusterfs_chown (handle, path, owner, group, LIBGF_DO_LCHOWN);
-}
-
-int
-glusterfs_lchown (const char *path, uid_t owner, gid_t group)
-{
- struct vmp_entry *entry = NULL;
- int op_ret = -1;
- char *vpath = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
- entry = libgf_vmp_search_entry ((char *)path);
- if (!entry) {
- errno = ENODEV;
- goto out;
- }
-
- vpath = libgf_vmp_virtual_path (entry, path);
- op_ret = glusterfs_glh_lchown (entry->handle, vpath, owner, group);
-out:
- if (vpath)
- free (vpath);
- return op_ret;
-}
-
-glusterfs_dir_t
-glusterfs_glh_opendir (glusterfs_handle_t handle, const char *path)
-{
- int op_ret = -1;
- libglusterfs_client_ctx_t *ctx = handle;
- loc_t loc = {0, };
- fd_t *dirfd = NULL;
- char *name = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
- loc.path = libgf_resolve_path_light ((char *)path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Path compaction failed");
- goto out;
- }
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
-
- if (op_ret == -1)
- goto out;
-
- name = strdup (loc.path);
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino,
- basename (name));
- if (op_ret == -1) {
- errno = EINVAL;
- goto out;
- }
-
- if (!S_ISDIR (loc.inode->st_mode) && !S_ISLNK (loc.inode->st_mode)) {
- errno = ENOTDIR;
- op_ret = -1;
- goto out;
- }
-
- dirfd = fd_create (loc.inode, ctx->pid);
- op_ret = libgf_client_opendir (ctx, &loc, dirfd);
-
- if (op_ret == -1) {
- fd_unref (dirfd);
- dirfd = NULL;
- goto out;
- }
-
- if (!libgf_get_fd_ctx (dirfd)) {
- if (!(libgf_alloc_fd_ctx (ctx, dirfd))) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Context "
- "allocation failed");
- op_ret = -1;
- errno = EINVAL;
- goto out;
- }
- }
-
-out:
- if (name)
- FREE (name);
-
- if (op_ret == -1) {
- fd_unref (dirfd);
- dirfd = NULL;
- }
-
- libgf_client_loc_wipe (&loc);
- return dirfd;
-}
-
-glusterfs_dir_t
-glusterfs_opendir (const char *path)
-{
- struct vmp_entry *entry = NULL;
- char *vpath = NULL;
- glusterfs_dir_t dir = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- entry = libgf_vmp_search_entry ((char *)path);
- if (!entry) {
- errno = ENODEV;
- goto out;
- }
-
- vpath = libgf_vmp_virtual_path (entry, path);
- dir = glusterfs_glh_opendir (entry->handle, vpath);
-out:
- if (vpath)
- free (vpath);
- return dir;
-}
-
-int
-glusterfs_closedir (glusterfs_dir_t dirfd)
-{
- int op_ret = -1;
- libglusterfs_client_fd_ctx_t *fdctx = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, dirfd, out);
- fdctx = libgf_get_fd_ctx (dirfd);
-
- if (fdctx == NULL) {
- errno = EBADF;
- op_ret = -1;
- goto out;
- }
-
- op_ret = libgf_client_flush (fdctx->ctx, (fd_t *)dirfd);
- fd_unref ((fd_t *)dirfd);
-
-out:
- return op_ret;
-}
-
-int
-libgf_client_fchmod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct stat *buf)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_fchmod_cbk_stub (frame, NULL, op_ret, op_errno,
- buf);
- LIBGF_REPLY_NOTIFY (local);
-
- return 0;
-}
-
-int
-libgf_client_fchmod (libglusterfs_client_ctx_t *ctx, fd_t *fd, mode_t mode)
-{
- int op_ret = -1;
- libgf_client_local_t *local = NULL;
- call_stub_t *stub = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, fchmod, local, fd, mode);
-
- op_ret = stub->args.fchmod_cbk.op_ret;
- errno = stub->args.fchmod_cbk.op_errno;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "status %d, errno %d", op_ret,
- errno);
- if (op_ret == -1)
- goto out;
-
- libgf_transform_devnum (ctx, &stub->args.fchmod_cbk.buf);
- libgf_update_iattr_cache (fd->inode, LIBGF_UPDATE_STAT,
- &stub->args.fchmod_cbk.buf);
-out:
- call_stub_destroy (stub);
- return op_ret;
-}
-
-int
-glusterfs_fchmod (glusterfs_file_t fd, mode_t mode)
-{
- libglusterfs_client_fd_ctx_t *fdctx = NULL;
- int op_ret = -1;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, fd, out);
- fdctx = libgf_get_fd_ctx (fd);
-
- if (!fdctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "No fd context present");
- errno = EBADF;
- goto out;
- }
-
- op_ret = libgf_client_fchmod (fdctx->ctx, fd, mode);
-
-out:
- return op_ret;
-}
-
-int
-libgf_client_fchown_cbk (call_frame_t *frame, void *cookie, xlator_t *xlator,
- int32_t op_ret, int32_t op_errno,
- struct stat *buf)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_fchown_cbk_stub (frame, NULL, op_ret, op_errno,
- buf);
- LIBGF_REPLY_NOTIFY (local);
-
- return 0;
-}
-
-int
-libgf_client_fchown (libglusterfs_client_ctx_t *ctx, fd_t *fd, uid_t uid,
- gid_t gid)
-{
- call_stub_t *stub = NULL;
- libgf_client_local_t *local = NULL;
- int op_ret = -1;
-
- LIBGF_CLIENT_FOP (ctx, stub, fchown, local, fd, uid, gid);
-
- op_ret = stub->args.fchown_cbk.op_ret;
- errno = stub->args.fchown_cbk.op_errno;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "status %d, errno %d", op_ret,
- errno);
- if (op_ret == -1)
- goto out;
-
- libgf_transform_devnum (ctx, &stub->args.fchown_cbk.buf);
- libgf_update_iattr_cache (fd->inode, LIBGF_UPDATE_STAT,
- &stub->args.fchown_cbk.buf);
-out:
- call_stub_destroy (stub);
- return op_ret;
-}
-
-int
-glusterfs_fchown (glusterfs_file_t fd, uid_t uid, gid_t gid)
-{
- int op_ret = -1;
- libglusterfs_client_fd_ctx_t *fdctx = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, fd, out);
-
- fdctx = libgf_get_fd_ctx (fd);
- if (!fd) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- errno = EBADF;
- goto out;
- }
-
- op_ret = libgf_client_fchown (fdctx->ctx, fd, uid, gid);
-
-out:
- return op_ret;
-}
-
-int
-libgf_client_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *xlator,
- int32_t op_ret, int32_t op_errno)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_fsync_cbk_stub (frame, NULL, op_ret, op_errno);
-
- LIBGF_REPLY_NOTIFY (local);
-
- return 0;
-}
-
-int
-libgf_client_fsync (libglusterfs_client_ctx_t *ctx, fd_t *fd)
-{
- libgf_client_local_t *local = NULL;
- call_stub_t *stub = NULL;
- int op_ret = -1;
-
- LIBGF_CLIENT_FOP (ctx, stub, fsync, local, fd, 0);
-
- op_ret = stub->args.fsync_cbk.op_ret;
- errno = stub->args.fsync_cbk.op_errno;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "status %d, errno %d", op_ret,
- errno);
- call_stub_destroy (stub);
-
- return op_ret;
-}
-
-int
-glusterfs_fsync (glusterfs_file_t *fd)
-{
- libglusterfs_client_fd_ctx_t *fdctx = NULL;
- int op_ret = -1;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, fd, out);
-
- fdctx = libgf_get_fd_ctx ((fd_t *)fd);
- if (!fdctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- errno = EBADF;
- goto out;
- }
-
- op_ret = libgf_client_fsync (fdctx->ctx, (fd_t *)fd);
-
-out:
- return op_ret;
-}
-
-int
-libgf_client_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *xlator
- ,int32_t op_ret, int32_t op_errno,
- struct stat *buf)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_ftruncate_cbk_stub (frame, NULL, op_ret,
- op_errno, buf);
-
- LIBGF_REPLY_NOTIFY (local);
-
- return 0;
-}
-
-int
-libgf_client_ftruncate (libglusterfs_client_ctx_t *ctx, fd_t *fd,
- off_t length)
-{
- libgf_client_local_t *local = NULL;
- call_stub_t *stub = NULL;
- int op_ret = -1;
- libglusterfs_client_fd_ctx_t *fdctx = NULL;
-
- if ((!(fd->flags & O_ACCMODE) == O_RDWR)
- && (!((fd->flags & O_ACCMODE) == O_WRONLY))) {
- errno = EBADF;
- goto out;
- }
-
- LIBGF_CLIENT_FOP (ctx, stub, ftruncate, local, fd, length);
-
- op_ret = stub->args.ftruncate_cbk.op_ret;
- errno = stub->args.ftruncate_cbk.op_errno;
-
- if (op_ret == -1)
- goto out;
-
- libgf_transform_devnum (ctx, &stub->args.ftruncate_cbk.buf);
- libgf_update_iattr_cache (fd->inode, LIBGF_UPDATE_STAT,
- &stub->args.ftruncate_cbk.buf);
-
- fdctx = libgf_get_fd_ctx (fd);
- if (!fd) {
- errno = EINVAL;
- op_ret = -1;
- goto out;
- }
-
- pthread_mutex_lock (&fdctx->lock);
- {
- fdctx->offset = stub->args.ftruncate_cbk.buf.st_size;
- }
- pthread_mutex_unlock (&fdctx->lock);
-
-out:
- call_stub_destroy (stub);
- return op_ret;
-}
-
-int
-glusterfs_ftruncate (glusterfs_file_t fd, off_t length)
-{
- libglusterfs_client_fd_ctx_t *fdctx = NULL;
- int op_ret = -1;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, fd, out);
-
- fdctx = libgf_get_fd_ctx (fd);
- if (!fdctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- errno = EBADF;
- goto out;
- }
-
- op_ret = libgf_client_ftruncate (fdctx->ctx, fd, length);
-
-out:
- return op_ret;
-}
-
-int
-libgf_client_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct stat *buf)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_link_cbk_stub (frame, NULL, op_ret, op_errno,
- inode, buf);
-
- LIBGF_REPLY_NOTIFY (local);
-
- return 0;
-}
-
-int
-libgf_client_link (libglusterfs_client_ctx_t *ctx, loc_t *old, loc_t *new)
-{
- call_stub_t *stub = NULL;
- libgf_client_local_t *local = NULL;
- int op_ret = -1;
- inode_t *inode = NULL;
- struct stat *sbuf = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, link, local, old, new);
-
- op_ret = stub->args.link_cbk.op_ret;
- errno = stub->args.link_cbk.op_errno;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "old %s, new %s, status %d,"
- " errno %d", old->path, new->path, op_ret, errno);
- if (op_ret == -1)
- goto out;
-
- inode = stub->args.link_cbk.inode;
- sbuf = &stub->args.link_cbk.buf;
- libgf_transform_devnum (ctx, sbuf);
- inode_link (inode, new->parent, basename ((char *)new->path), sbuf);
- inode_lookup (inode);
- libgf_update_iattr_cache (inode, LIBGF_UPDATE_STAT, sbuf);
-
-out:
- call_stub_destroy (stub);
- return op_ret;
-}
-
-int
-glusterfs_glh_link (glusterfs_handle_t handle, const char *oldpath,
- const char *newpath)
-{
- libglusterfs_client_ctx_t *ctx = handle;
- int op_ret = -1;
- loc_t old = {0,};
- loc_t new = {0,};
- char *oldname = NULL;
- char *newname = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, oldpath, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, newpath, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "old %s, new %s", oldpath,
- newpath);
- old.path = libgf_resolve_path_light ((char *)oldpath);
- if (!old.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Path compaction failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&old, ctx, 1);
- if (op_ret == -1) {
- errno = ENOENT;
- goto out;
- }
-
- oldname = strdup (old.path);
- op_ret = libgf_client_loc_fill (&old, ctx, 0, old.parent->ino,
- basename (oldname));
- if (op_ret == -1) {
- errno = EINVAL;
- goto out;
- }
-
- if (S_ISDIR (old.inode->st_mode)) {
- errno = EPERM;
- op_ret = -1;
- goto out;
- }
-
- new.path = libgf_resolve_path_light ((char *)newpath);
- if (!new.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Path compaction failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&new, ctx, 1);
- if (op_ret == 0) {
- errno = EEXIST;
- op_ret = -1;
- goto out;
- }
-
- newname = strdup (new.path);
- new.inode = inode_ref (old.inode);
- libgf_client_loc_fill (&new, ctx, 0, new.parent->ino,
- basename (newname));
- op_ret = libgf_client_link (ctx, &old, &new);
-
-out:
- if (oldname)
- FREE (oldname);
- if (newname)
- FREE (newname);
- libgf_client_loc_wipe (&old);
- libgf_client_loc_wipe (&new);
-
- return op_ret;
-}
-
-int
-glusterfs_link (const char *oldpath, const char *newpath)
-{
- struct vmp_entry *oldentry = NULL;
- struct vmp_entry *newentry = NULL;
- char *oldvpath = NULL;
- char *newvpath = NULL;
- int op_ret = -1;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, oldpath, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, newpath, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "old %s, new %s", oldpath,
- newpath);
- oldentry = libgf_vmp_search_entry ((char *)oldpath);
- if (!oldentry) {
- errno = ENODEV;
- goto out;
- }
-
- newentry = libgf_vmp_search_entry ((char *)newpath);
- if (!newentry) {
- errno = ENODEV;
- goto out;
- }
-
- /* Cannot hard link across glusterfs mounts. */
- if (newentry != oldentry) {
- errno = EXDEV;
- goto out;
- }
-
- newvpath = libgf_vmp_virtual_path (newentry, newpath);
- oldvpath = libgf_vmp_virtual_path (oldentry, oldpath);
- op_ret = glusterfs_glh_link (newentry->handle, oldvpath, newvpath);
-out:
- if (newvpath)
- free (newvpath);
- if (oldvpath)
- free (oldvpath);
- return op_ret;
-}
-
-int32_t
-libgf_client_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *buf)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_statfs_cbk_stub (frame, NULL, op_ret, op_errno,
- buf);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int32_t
-libgf_client_statvfs (libglusterfs_client_ctx_t *ctx, loc_t *loc,
- struct statvfs *buf)
-{
- call_stub_t *stub = NULL;
- libgf_client_local_t *local = NULL;
- int32_t op_ret = -1;
-
- /* statfs fop receives struct statvfs as an argument */
-
- /* libgf_client_statfs_cbk will be the callback, not
- libgf_client_statvfs_cbk. see definition of LIBGF_CLIENT_FOP
- */
- LIBGF_CLIENT_FOP (ctx, stub, statfs, local, loc);
-
- op_ret = stub->args.statfs_cbk.op_ret;
- errno = stub->args.statfs_cbk.op_errno;
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, status %d, errno %d",
- loc->path, op_ret, errno);
- if (op_ret == -1)
- goto out;
-
- if (buf)
- memcpy (buf, &stub->args.statfs_cbk.buf, sizeof (*buf));
-out:
- call_stub_destroy (stub);
- return op_ret;
-}
-
-int
-glusterfs_glh_statfs (glusterfs_handle_t handle, const char *path,
- struct statfs *buf)
-{
- struct statvfs stvfs = {0, };
- int32_t op_ret = -1;
- loc_t loc = {0, };
- libglusterfs_client_ctx_t *ctx = handle;
- char *name = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
- loc.path = libgf_resolve_path_light ((char *)path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Path compaction failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", loc.path);
- goto out;
- }
-
- name = strdup (loc.path);
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino,
- basename (name));
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, "
- "returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- op_ret = libgf_client_statvfs (ctx, &loc, &stvfs);
- if (op_ret == 0) {
-#ifdef GF_SOLARIS_HOST_OS
- buf->f_fstyp = 0;
- buf->f_bsize = stvfs.f_bsize;
- buf->f_blocks = stvfs.f_blocks;
- buf->f_bfree = stvfs.f_bfree;
- buf->f_files = stvfs.f_bavail;
- buf->f_ffree = stvfs.f_ffree;
-#else
- buf->f_type = 0;
- buf->f_bsize = stvfs.f_bsize;
- buf->f_blocks = stvfs.f_blocks;
- buf->f_bfree = stvfs.f_bfree;
- buf->f_bavail = stvfs.f_bavail;
- buf->f_files = stvfs.f_bavail;
- buf->f_ffree = stvfs.f_ffree;
- /* FIXME: buf->f_fsid has either "val" or "__val" as member
- based on conditional macro expansion. see definition of
- fsid_t - Raghu
- It seems have different structure member names on
- different archs, so I am stepping down to doing a struct
- to struct copy. :Shehjar
- */
- memcpy (&buf->f_fsid, &stvfs.f_fsid, sizeof (stvfs.f_fsid));
- buf->f_namelen = stvfs.f_namemax;
-#endif
- }
-
-out:
- if (name)
- FREE (name);
- libgf_client_loc_wipe (&loc);
- return op_ret;
-}
-
-int
-glusterfs_statfs (const char *path, struct statfs *buf)
-{
- struct vmp_entry *entry = NULL;
- char *vpath = NULL;
- int op_ret = -1;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, buf, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
- entry = libgf_vmp_search_entry ((char *)path);
- if (!entry) {
- errno = ENODEV;
- goto out;
- }
-
- vpath = libgf_vmp_virtual_path (entry, path);
- op_ret = glusterfs_glh_statfs (entry->handle, vpath, buf);
-out:
- if (vpath)
- free (vpath);
- return op_ret;
-}
-
-int
-glusterfs_glh_statvfs (glusterfs_handle_t handle, const char *path,
- struct statvfs *buf)
-{
- int32_t op_ret = -1;
- loc_t loc = {0, };
- libglusterfs_client_ctx_t *ctx = handle;
- char *name = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
- loc.path = libgf_resolve_path_light ((char *)path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Path compaction failed");
- goto out;
- }
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", loc.path);
- goto out;
- }
-
- name = strdup (loc.path);
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino,
- basename (name));
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, returning"
- " EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- op_ret = libgf_client_statvfs (ctx, &loc, buf);
- if (op_ret != -1)
- /* Should've been a call to libgf_transform_devnum but
- * that only handles struct stat
- */
- buf->f_fsid = (unsigned long)ctx->fake_fsid;
-
-out:
- if (name)
- FREE (name);
- libgf_client_loc_wipe (&loc);
- return op_ret;
-}
-
-int
-glusterfs_statvfs (const char *path, struct statvfs *buf)
-{
- struct vmp_entry *entry = NULL;
- char *vpath = NULL;
- int op_ret = -1;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, buf, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
- entry = libgf_vmp_search_entry ((char *)path);
- if (!entry) {
- errno = ENODEV;
- goto out;
- }
-
- vpath = libgf_vmp_virtual_path (entry, path);
- op_ret = glusterfs_glh_statvfs (entry->handle, vpath, buf);
-out:
- if (vpath)
- free (vpath);
- return op_ret;
-}
-
-int32_t
-libgf_client_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct stat *buf)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_rename_cbk_stub (frame, NULL, op_ret, op_errno,
- buf);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int32_t
-libgf_client_rename (libglusterfs_client_ctx_t *ctx, loc_t *oldloc,
- loc_t *newloc)
-{
- int op_ret = -1;
- libgf_client_local_t *local = NULL;
- call_stub_t *stub = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, rename, local, oldloc, newloc);
-
- op_ret = stub->args.rename_cbk.op_ret;
- errno = stub->args.rename_cbk.op_errno;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "old %s, new %s, status %d, errno"
- " %d", oldloc->path, newloc->path, op_ret, errno);
- if (op_ret == -1)
- goto out;
-
- if (!libgf_get_inode_ctx (newloc->inode))
- libgf_alloc_inode_ctx (ctx, newloc->inode);
-
- libgf_transform_devnum (ctx, &stub->args.rename_cbk.buf);
- libgf_update_iattr_cache (newloc->inode, LIBGF_UPDATE_STAT,
- &stub->args.rename_cbk.buf);
-
- inode_unlink (oldloc->inode, oldloc->parent, oldloc->name);
-out:
- call_stub_destroy (stub);
- return op_ret;
-}
-
-int
-glusterfs_glh_rename (glusterfs_handle_t handle, const char *oldpath,
- const char *newpath)
-{
- int32_t op_ret = -1;
- loc_t oldloc = {0, }, newloc = {0, };
- libglusterfs_client_ctx_t *ctx = handle;
- char *newname = NULL;
- char *oldname = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, oldpath, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, newpath, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "old %s, new %s", oldpath,
- newpath);
- oldloc.path = libgf_resolve_path_light ((char *)oldpath);
- if (!oldloc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Path compaction failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&oldloc, ctx, 1);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", oldloc.path);
- goto out;
- }
-
- newloc.path = libgf_resolve_path_light ((char *)newpath);
- if (!newloc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Path compaction failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&newloc, ctx, 1);
-
- oldname = strdup (oldloc.path);
- op_ret = libgf_client_loc_fill (&oldloc, ctx, 0, oldloc.parent->ino,
- basename (oldname));
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1,"
- " returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- newname = strdup (newloc.path);
- op_ret = libgf_client_loc_fill (&newloc, ctx, 0, newloc.parent->ino,
- basename (newname));
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1,"
- " returning EINVAL");
- errno = EINVAL;
- goto out;
- }
- op_ret = libgf_client_rename (ctx, &oldloc, &newloc);
-
-out:
- if (oldname)
- FREE (oldname);
- if (newname)
- FREE (newname);
- libgf_client_loc_wipe (&newloc);
- libgf_client_loc_wipe (&oldloc);
-
- return op_ret;
-}
-
-
-int
-glusterfs_rename (const char *oldpath, const char *newpath)
-{
- int op_ret = -1;
- struct vmp_entry *new = NULL;
- struct vmp_entry *old = NULL;
- char *oldvpath = NULL;
- char *newvpath = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, oldpath, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, newpath, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Old %s, new %s", oldpath,
- newpath);
- old = libgf_vmp_search_entry ((char *)oldpath);
- if (!old) {
- errno = ENODEV;
- goto out;
- }
-
- new = libgf_vmp_search_entry ((char *)newpath);
- if (!new) {
- errno = ENODEV;
- goto out;
- }
-
- if (old != new) {
- errno = EXDEV;
- goto out;
- }
-
- oldvpath = libgf_vmp_virtual_path (old, oldpath);
- newvpath = libgf_vmp_virtual_path (new, newpath);
- op_ret = glusterfs_glh_rename (old->handle, oldvpath, newvpath);
-
-out:
- if (oldvpath)
- free (oldvpath);
- if (newvpath)
- free (newvpath);
- return op_ret;
-}
-
-int32_t
-libgf_client_utimens_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct stat *buf)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_utimens_cbk_stub (frame, NULL, op_ret,
- op_errno, buf);
-
- LIBGF_REPLY_NOTIFY (local);
-
- return 0;
-}
-
-int32_t
-libgf_client_utimens (libglusterfs_client_ctx_t *ctx, loc_t *loc,
- struct timespec ts[2])
-{
- int op_ret = -1;
- libgf_client_local_t *local = NULL;
- call_stub_t *stub = NULL;
- struct stat *stbuf = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, utimens, local, loc, ts);
-
- op_ret = stub->args.utimens_cbk.op_ret;
- errno = stub->args.utimens_cbk.op_errno;
- stbuf = &stub->args.utimens_cbk.buf;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, status %d, errno %d",
- loc->path, op_ret, errno);
- if (op_ret == -1)
- goto out;
-
- libgf_transform_devnum (ctx, stbuf);
- libgf_update_iattr_cache (loc->inode, LIBGF_UPDATE_STAT, stbuf);
-
-out:
- call_stub_destroy (stub);
- return op_ret;
-}
-
-int
-glusterfs_glh_utimes (glusterfs_handle_t handle, const char *path,
- const struct timeval times[2])
-{
- int32_t op_ret = -1;
- loc_t loc = {0, };
- libglusterfs_client_ctx_t *ctx = handle;
- struct timespec ts[2] = {{0,},{0,}};
- char *name = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
- loc.path = libgf_resolve_path_light ((char *)path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Path compaction failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", loc.path);
- goto out;
- }
-
- name = strdup (loc.path);
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino,
- basename (name));
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1"
- " returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- ts[0].tv_sec = times[0].tv_sec;
- ts[0].tv_nsec = times[0].tv_usec * 1000;
- ts[1].tv_sec = times[1].tv_sec;
- ts[1].tv_nsec = times[1].tv_usec * 1000;
-
- op_ret = libgf_client_utimens (ctx, &loc, ts);
-out:
- if (name)
- FREE (name);
- libgf_client_loc_wipe (&loc);
- return op_ret;
-}
-
-int
-glusterfs_utimes (const char *path, const struct timeval times[2])
-{
- struct vmp_entry *entry = NULL;
- char *vpath = NULL;
- int op_ret = -1;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
- entry = libgf_vmp_search_entry ((char *)path);
- if (!entry) {
- errno = ENODEV;
- goto out;
- }
-
- vpath = libgf_vmp_virtual_path (entry, path);
- op_ret = glusterfs_glh_utimes (entry->handle, vpath, times);
-out:
- if (vpath)
- free (vpath);
- return op_ret;
-}
-
-int
-glusterfs_glh_utime (glusterfs_handle_t handle, const char *path,
- const struct utimbuf *buf)
-{
- int32_t op_ret = -1;
- loc_t loc = {0, };
- libglusterfs_client_ctx_t *ctx = handle;
- struct timespec ts[2] = {{0,},{0,}};
- char *name = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
- loc.path = libgf_resolve_path_light ((char *)path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Path compaction failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", loc.path);
- goto out;
- }
-
- name = strdup (loc.path);
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino,
- basename (name));
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1,"
- " returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- ts[0].tv_sec = buf->actime;
- ts[0].tv_nsec = 0;
-
- ts[1].tv_sec = buf->modtime;
- ts[1].tv_nsec = 0;
-
- op_ret = libgf_client_utimens (ctx, &loc, ts);
-out:
- if (name)
- FREE (name);
- libgf_client_loc_wipe (&loc);
- return op_ret;
-}
-
-int
-glusterfs_utime (const char *path, const struct utimbuf *buf)
-{
- struct vmp_entry *entry = NULL;
- char *vpath = NULL;
- int op_ret = -1;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, buf, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
- entry = libgf_vmp_search_entry ((char *)path);
- if (!entry) {
- errno = ENODEV;
- goto out;
- }
-
- vpath = libgf_vmp_virtual_path (entry, path);
- op_ret = glusterfs_glh_utime (entry->handle, vpath, buf);
-out:
- if (vpath)
- free (vpath);
- return op_ret;
-}
-
-static int32_t
-libgf_client_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct stat *buf)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_mknod_cbk_stub (frame, NULL, op_ret, op_errno,
- inode, buf);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-static int32_t
-libgf_client_mknod (libglusterfs_client_ctx_t *ctx, loc_t *loc, mode_t mode,
- dev_t rdev)
-{
- int32_t op_ret = -1;
- call_stub_t *stub = NULL;
- libgf_client_local_t *local = NULL;
- inode_t *inode = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, mknod, local, loc, mode, rdev);
-
- op_ret = stub->args.mknod_cbk.op_ret;
- errno = stub->args.mknod_cbk.op_errno;
- if (op_ret == -1)
- goto out;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, status %d, errno %d",
- loc->path, op_ret, errno);
- inode = stub->args.mknod_cbk.inode;
- libgf_transform_devnum (ctx, &stub->args.mknod_cbk.buf);
- inode_link (inode, loc->parent, loc->name, &stub->args.mknod_cbk.buf);
- inode_lookup (inode);
-
- if (!libgf_alloc_inode_ctx (ctx, inode))
- libgf_alloc_inode_ctx (ctx, inode);
-
- libgf_update_iattr_cache (inode, LIBGF_UPDATE_STAT,
- &stub->args.mknod_cbk.buf);
-
-out:
- call_stub_destroy (stub);
- return op_ret;
-}
-
-int
-glusterfs_glh_mknod(glusterfs_handle_t handle, const char *path, mode_t mode,
- dev_t dev)
-{
- libglusterfs_client_ctx_t *ctx = handle;
- loc_t loc = {0, };
- char *name = NULL;
- int32_t op_ret = -1;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
- loc.path = libgf_resolve_path_light ((char *)path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Path compaction failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == 0) {
- op_ret = -1;
- errno = EEXIST;
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 0);
- if (op_ret == -1) {
- errno = ENOENT;
- goto out;
- }
-
- name = strdup (loc.path);
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino,
- basename (name));
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, "
- " returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- loc.inode = inode_new (ctx->itable);
- op_ret = libgf_client_mknod (ctx, &loc, mode, dev);
-
-out:
- libgf_client_loc_wipe (&loc);
- if (name)
- FREE (name);
-
- return op_ret;
-}
-
-int
-glusterfs_mknod(const char *pathname, mode_t mode, dev_t dev)
-{
- struct vmp_entry *entry = NULL;
- char *vpath = NULL;
- int op_ret = -1;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, pathname, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", pathname);
- entry = libgf_vmp_search_entry ((char *)pathname);
- if (!entry) {
- errno = ENODEV;
- goto out;
- }
-
- vpath = libgf_vmp_virtual_path (entry, pathname);
- op_ret = glusterfs_glh_mknod (entry->handle, vpath, mode, dev);
-out:
- if (vpath)
- free (vpath);
- return op_ret;
-}
-
-int
-glusterfs_glh_mkfifo (glusterfs_handle_t handle, const char *path, mode_t mode)
-{
-
- libglusterfs_client_ctx_t *ctx = handle;
- loc_t loc = {0, };
- char *name = NULL;
- int32_t op_ret = -1;
- dev_t dev = 0;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
- loc.path = libgf_resolve_path_light ((char *)path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Failed to resolve name");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == 0) {
- op_ret = -1;
- errno = EEXIST;
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 0);
- if (op_ret == -1) {
- errno = ENOENT;
- goto out;
- }
-
- name = strdup (loc.path);
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino,
- basename (name));
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, "
- "returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- loc.inode = inode_new (ctx->itable);
- op_ret = libgf_client_mknod (ctx, &loc, mode | S_IFIFO, dev);
-
-out:
- libgf_client_loc_wipe (&loc);
- if (name)
- free (name);
-
- return op_ret;
-}
-
-int
-glusterfs_mkfifo (const char *path, mode_t mode)
-{
- struct vmp_entry *entry = NULL;
- char *vpath = NULL;
- int op_ret = -1;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
- entry = libgf_vmp_search_entry ((char *)path);
- if (!entry) {
- errno = ENODEV;
- goto out;
- }
-
- vpath = libgf_vmp_virtual_path (entry, path);
- op_ret = glusterfs_glh_mkfifo (entry->handle, vpath, mode);
-out:
- if (vpath)
- free (vpath);
- return op_ret;
-}
-
-int32_t
-libgf_client_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_unlink_cbk_stub (frame, NULL, op_ret,
- op_errno);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int
-libgf_client_unlink (libglusterfs_client_ctx_t *ctx, loc_t *loc)
-{
- int op_ret = -1;
- libgf_client_local_t *local = NULL;
- call_stub_t *stub = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, unlink, local, loc);
-
- op_ret = stub->args.unlink_cbk.op_ret;
- errno = stub->args.unlink_cbk.op_errno;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", loc->path);
- if (op_ret == -1)
- goto out;
-
- inode_unlink (loc->inode, loc->parent, loc->name);
-
-out:
- call_stub_destroy (stub);
- return op_ret;
-}
-
-int
-glusterfs_glh_unlink (glusterfs_handle_t handle, const char *path)
-{
- int32_t op_ret = -1;
- loc_t loc = {0, };
- libglusterfs_client_ctx_t *ctx = handle;
- char *name = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
- loc.path = libgf_resolve_path_light ((char *)path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Path compaction failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", loc.path);
- goto out;
- }
-
- name = strdup (loc.path);
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino,
- basename (name));
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, "
- " returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- op_ret = libgf_client_unlink (ctx, &loc);
-
-out:
- if (name)
- FREE (name);
- libgf_client_loc_wipe (&loc);
- return op_ret;
-}
-
-int
-glusterfs_unlink (const char *path)
-{
- struct vmp_entry *entry = NULL;
- char *vpath = NULL;
- int op_ret = -1;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
- entry = libgf_vmp_search_entry ((char *)path);
- if (!entry) {
- errno = ENODEV;
- goto out;
- }
-
- vpath = libgf_vmp_virtual_path (entry, path);
- op_ret = glusterfs_glh_unlink (entry->handle, vpath);
-out:
- if (vpath)
- free (vpath);
- return op_ret;
-}
-
-static int32_t
-libgf_client_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct stat *buf)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_symlink_cbk_stub (frame, NULL, op_ret,
- op_errno, inode, buf);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int32_t
-libgf_client_symlink (libglusterfs_client_ctx_t *ctx, const char *linkpath,
- loc_t *loc)
-{
- int op_ret = -1;
- libgf_client_local_t *local = NULL;
- call_stub_t *stub = NULL;
- inode_t *inode = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, symlink, local, linkpath, loc);
-
- op_ret = stub->args.symlink_cbk.op_ret;
- errno = stub->args.symlink_cbk.op_errno;
- if (op_ret == -1)
- goto out;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "target: %s, link: %s, status %d"
- " errno %d", linkpath, loc->path, op_ret, errno);
- inode = stub->args.symlink_cbk.inode;
- libgf_transform_devnum (ctx, &stub->args.symlink_cbk.buf);
- inode_link (inode, loc->parent, loc->name,
- &stub->args.symlink_cbk.buf);
- inode_lookup (inode);
- if (!libgf_get_inode_ctx (inode))
- libgf_alloc_inode_ctx (ctx, inode);
-
- libgf_update_iattr_cache (inode, LIBGF_UPDATE_STAT,
- &stub->args.symlink_cbk.buf);
-out:
- call_stub_destroy (stub);
- return op_ret;
-}
-
-int
-glusterfs_glh_symlink (glusterfs_handle_t handle, const char *oldpath,
- const char *newpath)
-{
- int32_t op_ret = -1;
- libglusterfs_client_ctx_t *ctx = handle;
- loc_t oldloc = {0, };
- loc_t newloc = {0, };
- char *oldname = NULL;
- char *newname = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, newpath, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "target: %s, link: %s", oldpath,
- newpath);
- /* Old path does not need to be interpreted or looked up */
- oldloc.path = strdup (oldpath);
-
- newloc.path = libgf_resolve_path_light ((char *)newpath);
- if (!newloc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Path compaction failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&newloc, ctx, 1);
- if (op_ret == 0) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "new path (%s) already exists, "
- " returning EEXIST", newloc.path);
- op_ret = -1;
- errno = EEXIST;
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&newloc, ctx, 0);
- if (op_ret == -1) {
- errno = ENOENT;
- goto out;
- }
-
- newloc.inode = inode_new (ctx->itable);
- newname = strdup (newloc.path);
- op_ret = libgf_client_loc_fill (&newloc, ctx, 0, newloc.parent->ino,
- basename (newname));
-
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, "
- "returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- op_ret = libgf_client_symlink (ctx, oldpath, &newloc);
-
-out:
- if (newname)
- FREE (newname);
-
- if (oldname)
- FREE (oldname);
- libgf_client_loc_wipe (&oldloc);
- libgf_client_loc_wipe (&newloc);
- return op_ret;
-}
-
-int
-glusterfs_symlink (const char *oldpath, const char *newpath)
-{
- struct vmp_entry *entry = NULL;
- char *vpath = NULL;
- int op_ret = -1;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, oldpath, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, newpath, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "target: %s, link: %s", oldpath,
- newpath);
- entry = libgf_vmp_search_entry ((char *)newpath);
- if (!entry) {
- errno = ENODEV;
- goto out;
- }
-
- vpath = libgf_vmp_virtual_path (entry, newpath);
- op_ret = glusterfs_glh_symlink (entry->handle, oldpath, vpath);
-out:
- if (vpath)
- free (vpath);
- return op_ret;
-}
-
-
-int32_t
-libgf_client_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- const char *path)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_readlink_cbk_stub (frame, NULL, op_ret,
- op_errno, path);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int32_t
-libgf_client_readlink (libglusterfs_client_ctx_t *ctx, loc_t *loc, char *buf,
- size_t bufsize)
-{
- int op_ret = -1;
- libgf_client_local_t *local = NULL;
- call_stub_t *stub = NULL;
- size_t cpy_size = 0;
-
- LIBGF_CLIENT_FOP (ctx, stub, readlink, local, loc, bufsize);
-
- op_ret = stub->args.readlink_cbk.op_ret;
- errno = stub->args.readlink_cbk.op_errno;
-
- if (op_ret != -1) {
- cpy_size = ((op_ret <= bufsize) ? op_ret : bufsize);
- memcpy (buf, stub->args.readlink_cbk.buf, cpy_size);
- op_ret = cpy_size;
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "link: %s, target: %s,"
- " status %d, errno %d", loc->path, buf, op_ret, errno);
- } else
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "link: %s, status %d, "
- "errno %d", loc->path, op_ret, errno);
-
- call_stub_destroy (stub);
- return op_ret;
-}
-
-ssize_t
-glusterfs_glh_readlink (glusterfs_handle_t handle, const char *path, char *buf,
- size_t bufsize)
-{
- int32_t op_ret = -1;
- loc_t loc = {0, };
- libglusterfs_client_ctx_t *ctx = handle;
- char *name = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
- if (bufsize < 0) {
- errno = EINVAL;
- goto out;
- }
-
- if (bufsize == 0) {
- op_ret = 0;
- goto out;
- }
-
- loc.path = libgf_resolve_path_light ((char *)path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Path compaction failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", loc.path);
- goto out;
- }
-
- name = strdup (loc.path);
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino,
- basename (name));
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, "
- "returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- op_ret = libgf_client_readlink (ctx, &loc, buf, bufsize);
-
-out:
- if (name)
- FREE (name);
-
- libgf_client_loc_wipe (&loc);
- return op_ret;
-}
-
-ssize_t
-glusterfs_readlink (const char *path, char *buf, size_t bufsize)
-{
- struct vmp_entry *entry = NULL;
- char *vpath = NULL;
- int op_ret = -1;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, buf, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
- entry = libgf_vmp_search_entry ((char *)path);
- if (!entry) {
- errno = ENODEV;
- goto out;
- }
-
- vpath = libgf_vmp_virtual_path (entry, path);
- op_ret = glusterfs_glh_readlink (entry->handle, vpath, buf, bufsize);
-out:
- if (vpath)
- free (vpath);
- return op_ret;
-}
-
-char *
-glusterfs_glh_realpath (glusterfs_handle_t handle, const char *path,
- char *resolved_path)
-{
- char *buf = NULL;
- char *rpath = NULL;
- char *start = NULL, *end = NULL;
- char *dest = NULL;
- libglusterfs_client_ctx_t *ctx = handle;
- long int path_max = 0;
- char *ptr = NULL;
- struct stat stbuf = {0, };
- long int new_size = 0;
- char *new_rpath = NULL;
- int dest_offset = 0;
- char *rpath_limit = 0;
- int ret = 0, num_links = 0;
- char *vpath = NULL, *tmppath = NULL;
- char absolute_path[PATH_MAX];
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
-#ifdef PATH_MAX
- path_max = PATH_MAX;
-#else
- path_max = pathconf (path, _PC_PATH_MAX);
- if (path_max <= 0) {
- path_max = 1024;
- }
-#endif
-
- if (resolved_path == NULL) {
- rpath = CALLOC (1, path_max);
- if (rpath == NULL) {
- errno = ENOMEM;
- goto out;
- }
- } else {
- rpath = resolved_path;
- }
-
- rpath_limit = rpath + path_max;
-
- if (path[0] == '/') {
- rpath[0] = '/';
- dest = rpath + 1;
- } else {
- /*
- FIXME: can $CWD be a valid path on glusterfs server? hence is
- it better to handle this case or just return EINVAL for
- relative paths?
- */
- ptr = getcwd (rpath, path_max);
- if (ptr == NULL) {
- goto err;
- }
- dest = rpath + strlen (rpath);
- }
-
- for (start = end = (char *)path; *end; start = end) {
- if (dest[-1] != '/') {
- *dest++ = '/';
- }
-
- while (*start == '/') {
- start++;
- }
-
- for (end = start; *end && *end != '/'; end++);
-
- if ((end - start) == 0) {
- break;
- }
-
- if ((end - start == 1) && (start[0] == '.')) {
- /* do nothing */
- } else if (((end - start) == 2) && (start[0] == '.')
- && (start[1] == '.')) {
- if (dest > rpath + 1) {
- while (--dest[-1] != '/');
- }
- } else {
- if ((dest + (end - start + 1)) >= rpath_limit) {
- if (resolved_path == NULL) {
- errno = ENAMETOOLONG;
- if (dest > rpath + 1)
- dest--;
- *dest = '\0';
- goto err;
- }
-
- dest_offset = dest - rpath;
- new_size = rpath_limit - rpath;
- if ((end - start + 1) > path_max) {
- new_size = (end - start + 1);
- } else {
- new_size = path_max;
- }
-
- new_rpath = realloc (rpath, new_size);
- if (new_rpath == NULL) {
- goto err;
- }
-
-
- dest = new_rpath + dest_offset;
- rpath = new_rpath;
- rpath_limit = rpath + new_size;
- }
-
- memcpy (dest, start, end - start);
- dest += end - start;
- *dest = '\0';
-
- ret = glusterfs_glh_lstat (handle, rpath, &stbuf);
- if (ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "glusterfs_glh_stat returned -1 for"
- " path (%s):(%s)", rpath,
- strerror (errno));
- goto err;
- }
-
- if (S_ISLNK (stbuf.st_mode)) {
- buf = calloc (1, path_max);
- if (buf == NULL) {
- errno = ENOMEM;
- goto err;
- }
-
- if (++num_links > MAXSYMLINKS)
- {
- errno = ELOOP;
- FREE (buf);
- goto err;
- }
-
- ret = glusterfs_glh_readlink (handle, rpath,
- buf,
- path_max - 1);
- if (ret < 0) {
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "glusterfs_readlink returned %d"
- " for path (%s):(%s)",
- ret, rpath, strerror (errno));
- FREE (buf);
- goto err;
- }
- buf[ret] = '\0';
-
- if (buf[0] != '/') {
- tmppath = strdup (rpath);
- tmppath = dirname (tmppath);
- sprintf (absolute_path, "%s/%s",
- tmppath, buf);
- FREE (buf);
- buf = libgf_resolve_path_light ((char *)absolute_path);
- FREE (tmppath);
- }
-
- rpath = glusterfs_glh_realpath (handle, buf,
- rpath);
- FREE (buf);
- if (rpath == NULL) {
- goto out;
- }
- dest = rpath + strlen (rpath);
-
- } else if (!S_ISDIR (stbuf.st_mode) && *end != '\0') {
- errno = ENOTDIR;
- goto err;
- }
- }
- }
- if (dest > rpath + 1 && dest[-1] == '/')
- --dest;
- *dest = '\0';
-
-out:
- if (vpath)
- FREE (vpath);
- return rpath;
-
-err:
- if (vpath)
- FREE (vpath);
- if (resolved_path == NULL) {
- FREE (rpath);
- }
-
- return NULL;
-}
-
-char *
-glusterfs_realpath (const char *path, char *resolved_path)
-{
- struct vmp_entry *entry = NULL;
- char *vpath = NULL;
- char *res = NULL;
- char *realp = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
- entry = libgf_vmp_search_entry ((char *)path);
- if (!entry) {
- errno = ENODEV;
- goto out;
- }
-
- realp = CALLOC (PATH_MAX, sizeof (char));
- if (!realp)
- goto out;
-
- vpath = libgf_vmp_virtual_path (entry, path);
- res = glusterfs_glh_realpath (entry->handle, vpath, resolved_path);
- if (!res)
- goto out;
-
- strncpy (realp, entry->vmp, entry->vmplen-1);
- strcat (realp, res);
- strcpy (resolved_path, realp);
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, resolved %s", path,
- resolved_path);
-out:
- if (vpath)
- free (vpath);
-
- return realp;
-}
-
-int
-glusterfs_glh_remove (glusterfs_handle_t handle, const char *path)
-{
- loc_t loc = {0, };
- int op_ret = -1;
- libglusterfs_client_ctx_t *ctx = handle;
- char *name = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, handle, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
- loc.path = libgf_resolve_path_light ((char *)path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Path compaction failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1)
- goto out;
-
- name = strdup (loc.path);
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino,
- basename (name));
- if (op_ret == -1)
- goto out;
-
- if (S_ISDIR (loc.inode->st_mode))
- op_ret = libgf_client_rmdir (ctx, &loc);
- else
- op_ret = libgf_client_unlink (ctx, &loc);
-
-out:
- if (name)
- FREE (name);
- return op_ret;
-
-}
-
-int
-glusterfs_remove(const char *pathname)
-{
- struct vmp_entry *entry = NULL;
- int op_ret = -1;
- char *vpath = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, pathname, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", pathname);
- entry = libgf_vmp_search_entry ((char *)pathname);
- if (!entry) {
- errno = ENODEV;
- goto out;
- }
-
- vpath = libgf_vmp_virtual_path (entry, pathname);
- op_ret = glusterfs_glh_remove (entry->handle, vpath);
-
-out:
- if (vpath)
- FREE (vpath);
- return op_ret;
-}
-
-void
-glusterfs_rewinddir (glusterfs_dir_t dirfd)
-{
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
-
- fd_ctx = libgf_get_fd_ctx ((fd_t *)dirfd);
- if (!fd_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- errno = EBADF;
- goto out;
- }
-
- pthread_mutex_lock (&fd_ctx->lock);
- {
- fd_ctx->offset = 0;
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Offset: %"PRIu64,
- fd_ctx->offset);
- }
- pthread_mutex_unlock (&fd_ctx->lock);
-
-out:
- return;
-}
-
-void
-glusterfs_seekdir (glusterfs_dir_t dirfd, off_t offset)
-{
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
-
- fd_ctx = libgf_get_fd_ctx ((fd_t *)dirfd);
- if (!fd_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- goto out;
- }
-
- pthread_mutex_lock (&fd_ctx->lock);
- {
- fd_ctx->offset = offset;
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Offset: %"PRIu64,
- fd_ctx->offset);
- }
- pthread_mutex_unlock (&fd_ctx->lock);
-
-out:
- return;
-}
-
-off_t
-glusterfs_telldir (glusterfs_dir_t dirfd)
-{
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- off_t off = -1;
-
- fd_ctx = libgf_get_fd_ctx ((fd_t *)dirfd);
- if (!fd_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- errno = EBADF;
- goto out;
- }
-
- pthread_mutex_lock (&fd_ctx->lock);
- {
- off = fd_ctx->offset;
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Offset: %"PRIu64,
- fd_ctx->offset);
- }
- pthread_mutex_unlock (&fd_ctx->lock);
-
-out:
- return off;
-}
-
-
-struct libgf_client_sendfile_data {
- int reads_sent;
- int reads_completed;
- int out_fd;
- int32_t op_ret;
- int32_t op_errno;
- pthread_mutex_t lock;
- pthread_cond_t cond;
-};
-
-int
-libgf_client_sendfile_read_cbk (int op_ret, int op_errno,
- glusterfs_iobuf_t *buf, void *cbk_data)
-{
- struct libgf_client_sendfile_data *sendfile_data = cbk_data;
- int bytes = 0;
-
- if (op_ret > 0) {
- bytes = writev (sendfile_data->out_fd, buf->vector, buf->count);
- if (bytes != op_ret) {
- op_ret = -1;
- op_errno = errno;
- }
-
- glusterfs_free (buf);
- }
-
- pthread_mutex_lock (&sendfile_data->lock);
- {
- if (sendfile_data->op_ret != -1) {
- if (op_ret == -1) {
- sendfile_data->op_ret = -1;
- sendfile_data->op_errno = op_errno;
- } else {
- sendfile_data->op_ret += op_ret;
- }
- }
-
- sendfile_data->reads_completed++;
-
- if (sendfile_data->reads_completed
- == sendfile_data->reads_sent) {
- pthread_cond_broadcast (&sendfile_data->cond);
- }
- }
- pthread_mutex_unlock (&sendfile_data->lock);
-
- return 0;
-}
-
-
-ssize_t
-glusterfs_sendfile (int out_fd, glusterfs_file_t in_fd, off_t *offset,
- size_t count)
-{
- ssize_t ret = -1;
- struct libgf_client_sendfile_data cbk_data = {0, };
- off_t off = -1;
- size_t size = 0;
- int flags = 0;
- int non_block = 0;
-
-
- pthread_mutex_init (&cbk_data.lock, NULL);
- pthread_cond_init (&cbk_data.cond, NULL);
- cbk_data.out_fd = out_fd;
-
- if (offset) {
- off = *offset;
- }
-
- flags = fcntl (out_fd, F_GETFL);
-
- if (flags != -1) {
- non_block = flags & O_NONBLOCK;
-
- if (non_block) {
- ret = fcntl (out_fd, F_SETFL, flags & ~O_NONBLOCK);
- }
- }
-
- while (count != 0) {
- /*
- * FIXME: what's the optimal size for reads and writes?
- */
- size = (count > LIBGF_SENDFILE_BLOCK_SIZE) ?
- LIBGF_SENDFILE_BLOCK_SIZE : count;
-
- /*
- * we don't wait for reply to previous read, we just send all
- * reads in a single go.
- */
- ret = glusterfs_read_async (in_fd, size, off,
- libgf_client_sendfile_read_cbk,
- &cbk_data);
- if (ret == -1) {
- break;
- }
-
- pthread_mutex_lock (&cbk_data.lock);
- {
- cbk_data.reads_sent++;
- }
- pthread_mutex_unlock (&cbk_data.lock);
-
- if (offset) {
- off += size;
- }
-
- count -= size;
- }
-
- pthread_mutex_lock (&cbk_data.lock);
- {
- /*
- * if we've not received replies to all the reads we've sent,
- * wait for them
- */
- if (cbk_data.reads_sent > cbk_data.reads_completed) {
- pthread_cond_wait (&cbk_data.cond,
- &cbk_data.lock);
- }
- }
- pthread_mutex_unlock (&cbk_data.lock);
-
- if (offset != NULL) {
- *offset = off;
- }
-
- /* if we were able to stack_wind all the reads */
-
- if (ret == 0) {
- ret = cbk_data.op_ret;
- errno = cbk_data.op_errno;
- }
-
- if (non_block) {
- fcntl (out_fd, F_SETFL, flags);
- }
-
- return ret;
-}
-
-
-static int32_t
-libgf_client_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct flock *lock)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_lk_cbk_stub (frame, NULL, op_ret, op_errno,
- lock);
-
- LIBGF_REPLY_NOTIFY (local);
-
- return 0;
-}
-
-
-int
-libgf_client_lk (libglusterfs_client_ctx_t *ctx, fd_t *fd, int cmd,
- struct flock *lock)
-{
- call_stub_t *stub = NULL;
- int32_t op_ret;
- libgf_client_local_t *local = NULL;
-
- LIBGF_CLIENT_FOP(ctx, stub, lk, local, fd, cmd, lock);
-
- op_ret = stub->args.lk_cbk.op_ret;
- errno = stub->args.lk_cbk.op_errno;
- if (op_ret == 0) {
- *lock = stub->args.lk_cbk.lock;
- }
-
- call_stub_destroy (stub);
- return op_ret;
-}
-
-
-int
-glusterfs_fcntl (glusterfs_file_t fd, int cmd, ...)
-{
- int ret = -1;
- struct flock *lock = NULL;
- va_list ap;
- libglusterfs_client_ctx_t *ctx = NULL;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, fd, out);
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- errno = EBADF;
- goto out;
- }
-
- ctx = fd_ctx->ctx;
-
- switch (cmd) {
- case F_SETLK:
- case F_SETLKW:
- case F_GETLK:
-#if F_SETLK != F_SETLK64
- case F_SETLK64:
-#endif
-#if F_SETLKW != F_SETLKW64
- case F_SETLKW64:
-#endif
-#if F_GETLK != F_GETLK64
- case F_GETLK64:
-#endif
- va_start (ap, cmd);
- lock = va_arg (ap, struct flock *);
- va_end (ap);
-
- if (!lock) {
- errno = EINVAL;
- goto out;
- }
-
- ret = libgf_client_lk (ctx, fd, cmd, lock);
- break;
-
- default:
- errno = EINVAL;
- break;
- }
-
-out:
- return ret;
-}
-
-
-static struct xlator_fops libgf_client_fops = {
-};
-
-static struct xlator_mops libgf_client_mops = {
-};
-
-static struct xlator_cbks libgf_client_cbks = {
- .forget = libgf_client_forget,
- .release = libgf_client_release,
- .releasedir = libgf_client_releasedir,
-};
-
-static inline xlator_t *
-libglusterfs_graph (xlator_t *graph)
-{
- int ret = 0;
- xlator_t *top = NULL;
- xlator_list_t *xlchild, *xlparent;
-
- top = CALLOC (1, sizeof (*top));
- ERR_ABORT (top);
-
- xlchild = CALLOC (1, sizeof(*xlchild));
- ERR_ABORT (xlchild);
- xlchild->xlator = graph;
- top->children = xlchild;
- top->ctx = graph->ctx;
- top->next = graph;
- top->name = strdup (LIBGF_XL_NAME);
-
- xlparent = CALLOC (1, sizeof(*xlparent));
- xlparent->xlator = top;
- graph->parents = xlparent;
- ret = asprintf (&top->type, LIBGF_XL_NAME);
- if (-1 == ret) {
- fprintf (stderr, "failed to set the top xl's type");
- }
-
- top->init = libgf_client_init;
- top->fops = &libgf_client_fops;
- top->mops = &libgf_client_mops;
- top->cbks = &libgf_client_cbks;
- top->notify = libgf_client_notify;
- top->fini = libgf_client_fini;
- // fill_defaults (top);
-
- return top;
-}
diff --git a/libglusterfsclient/src/libglusterfsclient.h b/libglusterfsclient/src/libglusterfsclient.h
deleted file mode 100755
index 960284e25..000000000
--- a/libglusterfsclient/src/libglusterfsclient.h
+++ /dev/null
@@ -1,1327 +0,0 @@
-/*
- Copyright (c) 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _LIBGLUSTERFSCLIENT_H
-#define _LIBGLUSTERFSCLIENT_H
-
-#ifndef __BEGIN_DECLS
-#ifdef __cplusplus
-#define __BEGIN_DECLS extern "C" {
-#else
-#define __BEGIN_DECLS
-#endif
-#endif
-
-#ifndef __END_DECLS
-#ifdef __cplusplus
-#define __END_DECLS }
-#else
-#define __END_DECLS
-#endif
-#endif
-
-
-__BEGIN_DECLS
-
-#include <stdio.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <dirent.h>
-#include <sys/statfs.h>
-#include <sys/statvfs.h>
-#include <utime.h>
-#include <sys/time.h>
-#include <stdint.h>
-
-typedef struct {
- struct iovec *vector;
- int count;
- void *iobref;
- void *dictref;
-} glusterfs_iobuf_t;
-
-
-typedef
-int (*glusterfs_readv_cbk_t) (int op_ret, int op_errno, glusterfs_iobuf_t *buf,
- void *cbk_data);
-
-typedef
-int (*glusterfs_write_cbk_t) (int op_ret, int op_errno, void *cbk_data);
-
-typedef
-int (*glusterfs_get_cbk_t) (int op_ret, int op_errno, glusterfs_iobuf_t *buf,
- struct stat *stbuf, void *cbk_data);
-
-
-/* Data Interface
- * The first section describes the data structures required for
- * using libglusterfsclient.
- */
-
-/* This structure needs to be filled up and
- * passed to te glusterfs_init function which uses
- * the params passed herein to initialize a glusterfs
- * client context and then connect to a glusterfs server.
- */
-typedef struct {
- char *logfile; /* Path to the file which will store
- the log.
- */
- char *loglevel; /* The log level required for
- reporting various events within
- libglusterfsclient.
- */
- struct {
- char *specfile; /* Users can either open a volume or
- specfile and assign the pointer to
- specfp, or just refer to the volume
- /spec file path in specfile.
- */
- FILE *specfp;
- };
- char *volume_name; /* The volume file could describe many
- volumes but the specific volume
- within that file is chosen by
- specifying the volume name here.
- */
- unsigned long lookup_timeout; /* libglusterclient provides the inode
- numbers to be cached by the library.
- The duration for which these are
- cached are defined by lookup_timeout
- . In Seconds.
- */
- unsigned long stat_timeout; /* The file attributes received from
- a stat syscall can also be cached
- for the duration specified in this
- member. In Seconds.
- */
-} glusterfs_init_params_t;
-
-
-
-/* This is the handle returned by glusterfs_init
- * once the initialization is complete.
- * Users should treat this as an opaque handle.
- */
-typedef void * glusterfs_handle_t;
-
-
-
-/* These identifiers are used as handles for files and dirs.
- * Users of libglusterfsclient should not in anyway try to interpret
- * the actual structures these will point to.
- */
-typedef void * glusterfs_file_t;
-typedef void * glusterfs_dir_t;
-
-
-/* Function Call Interface */
-/* libglusterfsclient initialization function.
- * @ctx : the structure described above filled with required values.
- * @fakefsid: User generated fsid to be used to identify this
- * volume.
- *
- * Returns NULL on failure and the non-NULL pointer on success.
- * On failure, the error description might be present in the logfile
- * depending on the log level.
- */
-glusterfs_handle_t
-glusterfs_init (glusterfs_init_params_t *ctx, uint32_t fakefsid);
-
-
-
-/* Used to destroy a glusterfs client context and the
- * connection to the glusterfs server.
- *
- * @handle : The glusterfs handle returned by glusterfs_init.
- */
-int
-glusterfs_fini (glusterfs_handle_t handle);
-
-
-
-/* libglusterfs client provides two interfaces.
- * 1. handle-based interface
- * Functions that comprise the handle-based interface accept the
- * glusterfs_handle_t as the first argument. It specifies the
- * glusterfs client context over which to perform the operation.
- *
- * 2. Virtual Mount Point based interface:
- * Functions that do not require a handle to be given in order to
- * identify which client context to operate on. This interface
- * internally determines the corresponding client context for the
- * given path. The down-side is that a virtual mount point (VMP) needs to be
- * registered with the library. A VMP is just a string that maps to a
- * glusterfs_handle_t. The advantage of a VMP based interface is that
- * a user program using multiple client contexts does not need to
- * maintain its own mapping between paths and the corresponding
- * handles.
- */
-
-
-
-/* glusterfs_mount is the function that allows users to register a VMP
- * along with the parameters, which will be used to initialize a
- * context. Applications calling glusterfs_mount do not need to
- * initialized a context using the glusterfs_init interface.
- *
- * @vmp : The virtual mount point.
- * @ipars : Initialization parameters populated as described
- * earlier.
- *
- * Returns 0 on success, and -1 on failure.
- */
-int
-glusterfs_mount (char *vmp, glusterfs_init_params_t *ipars);
-
-
-
-/* glusterfs_umount is the VMP equivalent of glusterfs_fini.
- *
- * @vmp : The VMP which was initialized using glusterfs_mount.
- *
- * Returns 0 on sucess, and -1 on failure.
- */
-int
-glusterfs_umount (char *vmp);
-
-
-/* glusterfs_umount_all unmounts all the mounts */
-int
-glusterfs_umount_all (void);
-
-
-/* For smaller files, application can use just
- * glusterfs_get/glusterfs_get_async
- * to read the whole content. Limit of the file-sizes to be read in
- * glusterfs_get/glusterfs_get_async is passed in the size argument
- */
-
-/* glusterfs_glh_get:
- * @handle : glusterfs handle
- * @path : path to be looked upon
- * @size : upper limit of file-sizes to be read in lookup
- * @stbuf : attribute buffer
- */
-
-int
-glusterfs_glh_get (glusterfs_handle_t handle, const char *path, void *buf,
- size_t size, struct stat *stbuf);
-
-int
-glusterfs_get (const char *path, void *buf, size_t size, struct stat *stbuf);
-
-int
-glusterfs_get_async (glusterfs_handle_t handle, const char *path, size_t size,
- glusterfs_get_cbk_t cbk, void *cbk_data);
-
-
-
-/* Opens a file. Corresponds to the open syscall.
- *
- * @handle : Handle returned from glusterfs_init
- * @path : Path to the file or directory on the glusterfs
- * export. Must be absolute to the export on the server.
- * @flags : flags to control open behaviour.
- * @... : The mode_t argument that defines the mode for a new
- * file, in case a new file is being created using the
- * O_CREAT flag in @flags.
- *
- * Returns a non-NULL handle on success. NULL on failure and sets
- * errno accordingly.
- */
-glusterfs_file_t
-glusterfs_glh_open (glusterfs_handle_t handle, const char *path, int flags,
- ...);
-
-
-/* Opens a file without having to specify a handle.
- *
- * @path : Path to the file to open in the glusterfs export.
- * The path to the file in glusterfs export must be
- * pre-fixed with the VMP string registered with
- * glusterfs_mount.
- * @flags : flags to control open behaviour.
- * @... : The mode_t argument that defines the mode for a new
- * file, in case a new file is being created using the
- * O_CREAT flag in @flags.
- *
- * Returns 0 on success, -1 on failure with errno set accordingly.
- */
-glusterfs_file_t
-glusterfs_open (const char *path, int flags, ...);
-
-
-
-/* Creates a file. Corresponds to the creat syscall.
- *
- * @handle : Handle returned from glusterfs_init
- * @path : Path to the file that needs to be created in the
- * glusterfs export.
- * @mode : File creation mode.
- *
- * Returns the file handle on success. NULL on error with errno set as
- * required.
- */
-glusterfs_file_t
-glusterfs_glh_creat (glusterfs_handle_t handle, const char *path, mode_t mode);
-
-
-
-/* VMP-based creat.
- * @path : Path to the file to be created. Must be
- * pre-prepended with the VMP string registered with
- * glusterfs_mount.
- * @mode : File creation mode.
- *
- * Returns file handle on success. NULL handle on error with errno set
- * accordingly.
- */
-glusterfs_file_t
-glusterfs_creat (const char *path, mode_t mode);
-
-
-
-/* Close the file identified by the handle.
- *
- * @fd : Closes the file.
- *
- * Returns 0 on success, -1 on error with errno set accordingly.
- */
-int
-glusterfs_close (glusterfs_file_t fd);
-
-
-
-/* Get struct stat for the file in path.
- *
- * @handle : The handle that identifies a glusterfs client
- * context.
- * @path : The file for which we need to get struct stat.
- * @stbuf : The buffer into which the file's stat is copied.
- *
- * Returns 0 on success and -1 on error with errno set accordingly.
- */
-int
-glusterfs_glh_stat (glusterfs_handle_t handle, const char *path,
- struct stat *stbuf);
-
-
-/* Get struct stat for file in path.
- *
- * @path : The file for which struct stat is required.
- * @sbuf : The buffer into which the stat structure is copied.
- *
- * Returns 0 on success and -1 on error with errno set accordingly.
- */
-int
-glusterfs_stat (const char *path, struct stat *buf);
-
-
-
-/* Gets stat struct for the file.
- *
- * @handle : The handle identifying a glusterfs client context.
- * @path : Path to the file for which stat structure is
- * required. If path is a symlink, the symlink is
- * interpreted and the stat structure returned for the
- * target of the link.
- * @buf : The buffer into which the stat structure is copied.
- *
- * Returns 0 on success and -1 on error with errno set accordingly.
- */
-int
-glusterfs_glh_lstat (glusterfs_handle_t handle, const char *path,
- struct stat *buf);
-
-
-
-/* Gets stat struct for a file.
- *
- * @path : The file to get the struct stat for.
- * @buf : The receiving struct stat buffer.
- *
- * Returns 0 on success and -1 on error with errno set accordingly.
- */
-int
-glusterfs_lstat (const char *path, struct stat *buf);
-
-
-
-/* Get stat structure for a file.
- *
- * @fd : The file handle identifying a file on the glusterfs
- * server.
- * @stbuf : The buffer into which the stat data is copied.
- *
- * Returns 0 on success and -1 on error with errno set accordingly.
- */
-int
-glusterfs_fstat (glusterfs_file_t fd, struct stat *stbuf);
-
-int
-glusterfs_glh_setxattr (glusterfs_handle_t handle, const char *path,
- const char *name, const void *value,
- size_t size, int flags);
-
-int
-glusterfs_glh_lsetxattr (glusterfs_handle_t handle, const char *path,
- const char *name, const void *value, size_t size,
- int flags);
-
-int
-glusterfs_setxattr (const char *path, const char *name, const void *value,
- size_t size, int flags);
-
-int
-glusterfs_lsetxattr (glusterfs_handle_t handle, const char *path,
- const char *name, const void *value, size_t size,
- int flags);
-
-int
-glusterfs_fsetxattr (glusterfs_file_t fd, const char *name, const void *value,
- size_t size, int flags);
-
-ssize_t
-glusterfs_glh_getxattr (glusterfs_handle_t handle, const char *path,
- const char *name, void *value, size_t size);
-
-ssize_t
-glusterfs_glh_lgetxattr (glusterfs_handle_t handle, const char *path,
- const char *name, void *value, size_t size);
-
-ssize_t
-glusterfs_getxattr (const char *path, const char *name, void *value,
- size_t size);
-
-ssize_t
-glusterfs_lgetxattr (const char *path, const char *name, void *value,
- size_t size);
-
-ssize_t
-glusterfs_fgetxattr (glusterfs_file_t fd, const char *name, void *value,
- size_t size);
-
-ssize_t
-glusterfs_listxattr (glusterfs_handle_t handle, const char *path, char *list,
- size_t size);
-
-ssize_t
-glusterfs_llistxattr (glusterfs_handle_t handle, const char *path, char *list,
- size_t size);
-
-ssize_t
-glusterfs_flistxattr (glusterfs_file_t fd, char *list, size_t size);
-
-int
-glusterfs_removexattr (glusterfs_handle_t handle, const char *path,
- const char *name);
-
-int
-glusterfs_lremovexattr (glusterfs_handle_t handle, const char *path,
- const char *name);
-
-int
-glusterfs_fremovexattr (glusterfs_file_t fd, const char *name);
-
-
-
-/* Read data from a file.
- * @fd : Handle returned by glusterfs_open or
- * glusterfs_glh_open.
- * @buf : Buffer to read the data into.
- * @nbytes : Number of bytes to read.
- *
- * Returns number of bytes actually read on success or -1 on error
- * with errno set to the appropriate error number.
- */
-ssize_t
-glusterfs_read (glusterfs_file_t fd, void *buf, size_t nbytes);
-
-
-
-/* Read data into an array of buffers.
- *
- * @fd : File handle returned by glusterfs_open or
- * glusterfs_glh_open.
- * @vec : Array of buffers into which the data is read.
- * @count : Number of iovecs referred to by vec.
- *
- * Returns number of bytes read on success or -1 on error with errno
- * set appropriately.
- */
-ssize_t
-glusterfs_readv (glusterfs_file_t fd, const struct iovec *vec, int count);
-
-int
-glusterfs_read_async (glusterfs_file_t fd, size_t nbytes, off_t offset,
- glusterfs_readv_cbk_t readv_cbk, void *cbk_data);
-
-
-
-/* Write data into a file.
- *
- * @fd : File handle returned from glusterfs_open or
- * glusterfs_glh_open.
- * @buf : Buffer which is written to the file.
- * @nbytes : Number bytes of the @buf written to the file.
- *
- * On success, returns number of bytes written. On error, returns -1
- * with errno set appropriately.
- */
-ssize_t
-glusterfs_write (glusterfs_file_t fd, const void *buf, size_t nbytes);
-
-
-
-/* Writes an array of buffers into a file.
- *
- * @fd : The file handle returned from glusterfs_open or
- * glusterfs_glh_open.
- * @vector : Array of buffers to be written to the file.
- * @count : Number of separate buffers in the @vector array.
- *
- * Returns number of bytes written on success or -1 on error with
- * errno set approriately.
- */
-ssize_t
-glusterfs_writev (glusterfs_file_t fd, const struct iovec *vector, int count);
-
-int
-glusterfs_write_async (glusterfs_file_t fd, const void *buf, size_t nbytes,
- off_t offset, glusterfs_write_cbk_t write_cbk,
- void *cbk_data);
-
-int
-glusterfs_writev_async (glusterfs_file_t fd, const struct iovec *vector,
- int count, off_t offset,
- glusterfs_write_cbk_t write_cbk, void *cbk_data);
-
-
-
-/* Read from a file starting at a given offset.
- *
- * @fd : File handle returned from glusterfs_open or
- * glusterfs_glh_open.
- * @buf : Buffer to read the data into.
- * @nbytes : Number of bytes to read.
- * @offset : The offset to start reading @nbytes from.
- *
- * Returns number of bytes read on success or -1 on error with errno
- * set appropriately.
- */
-ssize_t
-glusterfs_pread (glusterfs_file_t fd, void *buf, size_t nbytes, off_t offset);
-
-
-
-/* Write to a file starting at a given offset.
- *
- * @fd : Flie handle returned from glusterfs_open or
- * glusterfs_glh_open.
- * @buf : Buffer that will be written to the file.
- * @nbytes : Number of bytes to write from @buf.
- * @offset : The starting offset from where @nbytes will be
- * written.
- *
- * Returns number of bytes written on success and -1 on error with
- * errno set appropriately.
- */
-ssize_t
-glusterfs_pwrite (glusterfs_file_t fd, const void *buf, size_t nbytes,
- off_t offset);
-
-
-
-/* Seek to an offset in the file.
- *
- * @fd : File handle in which to seek to. File handle
- * returned by glusterfs_open or glusterfs_glh_open.
- * @offset : Offset to seek to in the given file.
- * @whence : Determines how the offset is interpreted by this
- * syscall. The behaviour is similar to the options
- * provided by the POSIX lseek system call. See man lseek
- * for more details.
- *
- * On success, returns the resulting absolute offset in the file after the seek
- * operation is performed. ON error, returns -1 with errno set
- * appropriately.
- */
-off_t
-glusterfs_lseek (glusterfs_file_t fd, off_t offset, int whence);
-
-
-
-/* Create a directory.
- *
- * @handle : The handle of the glusterfs context in which the
- * directory needs to be created.
- * @path : The absolute path within the glusterfs context where
- * the directory needs to be created.
- * @mode : The mode bits for the newly created directory.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_mkdir (glusterfs_handle_t handle, const char *path, mode_t mode);
-
-
-
-/* Create a directory.
- *
- * @path : Path to the directory that needs to be created. This
- * path must be prefixed with the VMP of the particular glusterfs
- * context.
- * @mode : Mode flags for the newly created directory.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_mkdir (const char *path, mode_t mode);
-
-
-
-/* Remove a directory.
- *
- * @handle : Handle of the glusterfs context from which to remove
- * the directory.
- * @path : The path of the directory to be removed in the glusterfs
- * context.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_rmdir (glusterfs_handle_t handle, const char *path);
-
-
-
-/* Remove a directory.
- *
- * @path : The absolute path to the directory to be removed.
- * This path must be pre-fixed with the VMP of the
- * particular glusterfs context in which this directory
- * resides.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_rmdir (const char *path);
-
-
-
-/* Read directory entries.
- *
- * @fd : The handle of the directory to be read. This handle
- * is the one returned by opendir.
- *
- * Returns the directory entry on success and NULL pointer on error
- * with errno set appropriately.
- */
-struct dirent *
-glusterfs_readdir (glusterfs_dir_t dirfd);
-
-
-
-/* re-entrant version of glusterfs_readdir.
- *
- * @dirfd : The handle of directory to be read. This handle is the one
- * returned by opendir.
- * @entry : Pointer to storage to store a directory entry. The storage
- * pointed to by entry shall be large enough for a dirent with
- * an array of char d_name members containing at least
- * {NAME_MAX}+1 elements.
- * @result : Upon successful return, the pointer returned at *result shall
- * have the same value as the argument entry. Upon reaching the
- * end of the directory stream, this pointer shall have the
- * value NULL.
- */
-int
-glusterfs_readdir_r (glusterfs_dir_t dirfd, struct dirent *entry,
- struct dirent **result);
-
-/* Close a directory handle.
- *
- * @fd : The directory handle to be closed.
- *
- * Returns 0 on success and -1 on error with errno set to 0.
- */
-int
-glusterfs_closedir (glusterfs_dir_t dirfd);
-/* FIXME: remove getdents */
-int
-glusterfs_getdents (glusterfs_dir_t fd, struct dirent *dirp,
- unsigned int count);
-
-
-
-/* Create device node.
- *
- * @handle : glusterfs context in which to create the device
- * node.
- * @pathname : The absolute path of the device to be created in the
- * given glusterfs context.
- *
- * @mode : Mode flags to apply to the newly created node.
- * @dev : Device numbers that will apply to the node.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_mknod(glusterfs_handle_t handle, const char *pathname,
- mode_t mode, dev_t dev);
-
-
-
-/* Create a device node.
- *
- * @pathname : The full path of the node to be created. This path
- * should be pre-pended with the VMP of the glusterfs
- * context in which this node is to be created.
- * @mode : Mode flags that will be applied to the newly created
- * device file.
- * @dev : The device numbers that will be associated with the
- * device node.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_mknod(const char *pathname, mode_t mode, dev_t dev);
-
-
-
-/* Returns the real absolute path of the given path.
- *
- * @handle : The glusterfs context in which the path resides in.
- * @path : The path to be resolved.
- * @resolved_path : The resolved path is stored in this buffer
- * provided by the caller.
- *
- * Returns a pointer to resolved_path on success and NULL on error
- * with errno set appropriately.
- *
- * See man realpath for details.
- */
-char *
-glusterfs_glh_realpath (glusterfs_handle_t handle, const char *path,
- char *resolved_path);
-
-
-/* Returns the real absolute path of the given path.
- *
- * @path : The path to be resolved. This path must be
- * pre-fixed with the VMP of the glusterfs
- * context in which the file resides.
- *
- * @resolved_path : The resolved path is stored in this user
- * provided buffer.
- *
- * Returns a pointer to resolved_path on success, and NULL on error
- * with errno set appropriately.
- */
-char *
-glusterfs_realpath (const char *path, char *resolved_path);
-
-
-
-/* Change mode flags on a path.
- *
- * @handle : Handle of the glusterfs instance in which the path
- * resides.
- * @path : The path whose mode bits need to be changed.
- * @mode : The new mode bits.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_chmod (glusterfs_handle_t handle, const char *path, mode_t mode);
-
-
-
-/* Change mode flags on a path.
- *
- * @path : The path whose mode bits need to be changed. The
- * path should be pre-fixed with the VMP that identifies the
- * glusterfs context within which the path resides.
- * @mode : The new mode bits.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_chmod (const char *path, mode_t mode);
-
-
-
-/* Change the owner of a path.
- * If @path is a symlink, it is dereferenced and the ownership change
- * happens on the target.
- *
- * @handle : Handle of the glusterfs context in which the path
- * resides.
- * @path : The path whose owner needs to be changed.
- * @owner : ID of the new owner.
- * @group : ID of the new group.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_chown (glusterfs_handle_t handle, const char *path, uid_t owner,
- gid_t group);
-
-
-
-/* Change the owner of a path.
- *
- * If @path is a symlink, it is dereferenced and the ownership change
- * happens on the target.
- * @path : The path whose owner needs to be changed. Path must
- * be pre-fixed with the VMP that identifies the
- * glusterfs context in which the path resides.
- * @owner : ID of the new owner.
- * @group : ID of the new group.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_chown (const char *path, uid_t owner, gid_t group);
-
-
-
-/* Change the owner of the file.
- *
- * @fd : Handle of the file whose owner needs to be changed.
- * @owner : ID of the new owner.
- * @group : ID of the new group.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_fchown (glusterfs_file_t fd, uid_t owner, gid_t group);
-
-
-
-/* Open a directory.
- *
- * @handle : Handle that identifies a glusterfs context.
- * @path : Path to the directory in the glusterfs context.
- *
- * Returns a non-NULL handle on success and NULL on failure with errno
- * set appropriately.
- */
-glusterfs_dir_t
-glusterfs_glh_opendir (glusterfs_handle_t handle, const char *path);
-
-
-
-/* Open a directory.
- *
- * @path : Path to the directory. The path must be prepended
- * with the VMP in order to identify the glusterfs
- * context in which path resides.
- *
- * Returns a non-NULL handle on success and NULL on failure with errno
- * set appropriately.
- */
-glusterfs_dir_t
-glusterfs_opendir (const char *path);
-
-
-
-/* Change the mode bits on an open file.
- *
- * @fd : The file whose mode bits need to be changed.
- * @mode : The new mode bits.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_fchmod (glusterfs_file_t fd, mode_t mode);
-
-
-
-/* Sync the file contents to storage.
- *
- * @fd : The file whose contents need to be sync'ed to
- * storage.
- *
- * Return 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_fsync (glusterfs_file_t *fd);
-
-
-
-/* Truncate an open file.
- *
- * @fd : The file to truncate.
- * @length : The length to truncate to.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_ftruncate (glusterfs_file_t fd, off_t length);
-
-
-
-/* Create a hard link between two paths.
- *
- * @handle : glusterfs context in which both paths should reside.
- * @oldpath : The existing path to link to.
- * @newpath : The new path which will be linked to @oldpath.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_link (glusterfs_handle_t handle, const char *oldpath,
- const char *newpath);
-
-
-
-/* Create a hard link between two paths.
- *
- * @oldpath : The existing path to link to.
- * @newpath : The new path which will be linked to @oldpath.
- *
- * Both paths should exist on the same glusterfs context and should be
- * prefixed with the same VMP.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_link (const char *oldpath, const char *newpath);
-
-
-
-/* Get stats about the underlying file system.
- *
- * @handle : Identifies the glusterfs context in which resides
- * the given path.
- * @path : stats are returned for the file system on which file
- * is located.
- * @buf : The buffer into which the stats are copied.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_statfs (glusterfs_handle_t handle, const char *path,
- struct statfs *buf);
-
-
-
-/* Get stats about the underlying file system.
- *
- * @path : stats are returned for the file system on which file
- * is located. @path must start with the VMP of the
- * glusterfs context on which the file reside.
- * @buf : The buffer into which the stats are copied.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_statfs (const char *path, struct statfs *buf);
-
-
-
-/* Get stats about the underlying file system.
- *
- * @handle : Identifies the glusterfs context in which resides
- * the given path.
- * @path : stats are returned for the file system on which file
- * is located.
- * @buf : The buffer into which the stats are copied.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_statvfs (glusterfs_handle_t handle, const char *path,
- struct statvfs *buf);
-
-
-
-/* Get stats about the underlying file system.
- *
- * @path : stats are returned for the file system on which file
- * is located. @path must start with the VMP of the
- * glusterfs context on which the file reside.
- * @buf : The buffer into which the stats are copied.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_statvfs (const char *path, struct statvfs *buf);
-
-
-
-/* Set the atime and mtime values for a given path.
- *
- * @handle : The handle identifying the glusterfs context.
- * @path : The path for which the times need to be changed.
- * @times : The array containing new time stamps for the file.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_utimes (glusterfs_handle_t handle, const char *path,
- const struct timeval times[2]);
-
-
-
-/* Set the atime and mtime values for a given path.
- *
- * @path : The path for which the times need to be changed.
- * @times : The array containing new time stamps for the file.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_utimes (const char *path, const struct timeval times[2]);
-
-
-
-/* Set the atime and mtime values for a given path.
- *
- * @handle : The handle identifying the glusterfs context.
- * @path : The path for which the times need to be changed.
- * @buf : The structure containing new time stamps for the file.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_utime (glusterfs_handle_t handle, const char *path,
- const struct utimbuf *buf);
-
-
-
-/* Set the atime and mtime values for a given path.
- *
- * @path : The path for which the times need to be changed.
- * @buf : The structure containing new time stamps for the file.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_utime (const char *path, const struct utimbuf *buf);
-
-
-
-/* Create FIFO at the given path.
- *
- * @handle : The glusterfs context in which to create that FIFO.
- * @path : The path within the context where the FIFO is to be
- * created.
- * @mode : The mode bits for the newly create FIFO.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_mkfifo (glusterfs_handle_t handle, const char *path,
- mode_t mode);
-
-
-
-/* Create FIFO at the given path.
- *
- * @path : The path within the context where the FIFO is to be
- * created. @path should begin with the VMP of the
- * glusterfs context in which the FIFO needs to be
- * created.
- * @mode : The mode bits for the newly create FIFO.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_mkfifo (const char *path, mode_t mode);
-
-
-
-/* Unlink a file.
- *
- * @handle : Handle that identifies a glusterfs instance.
- * @path : Path in the glusterfs instance that needs to be
- * unlinked.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_unlink (glusterfs_handle_t handle, const char *path);
-
-
-
-/* Unlink a file.
- *
- * @path : Path in the glusterfs instance that needs to be
- * unlinked.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_unlink (const char *path);
-
-
-
-/* Create a symbolic link.
- *
- * @handle : The handle identifying the glusterfs context.
- * @oldpath : The existing path to which a symlink needs to be
- * created.
- * @newpath : The new path which will be symlinked to the
- * @oldpath.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_symlink (glusterfs_handle_t handle, const char *oldpath,
- const char *newpath);
-
-
-
-/* Create a symbolic link.
- *
- * @oldpath : The existing path to which a symlink needs to be
- * created.
- * @newpath : The new path which will be symlinked to the
- * @oldpath.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_symlink (const char *oldpath, const char *newpath);
-
-
-
-/* Read a symbolic link.
- *
- * @handle : Handle identifying the glusterfs context.
- * @path : The symlink that needs to be read.
- * @buf : The buffer into which the target of @path will be
- * stored.
- * @bufsize : Size of the buffer allocated to @buf.
- *
- * Returns number of bytes copied into @buf and -1 on error with errno
- * set appropriately.
- */
-ssize_t
-glusterfs_glh_readlink (glusterfs_handle_t handle, const char *path, char *buf,
- size_t bufsize);
-
-
-
-/* Read a symbolic link.
- *
- * @path : The symlink that needs to be read.
- * @buf : The buffer into which the target of @path will be
- * stored.
- * @bufsize : Size of the buffer allocated to @buf.
- *
- * Returns number of bytes copied into @buf and -1 on error with errno
- * set appropriately.
- */
-ssize_t
-glusterfs_readlink (const char *path, char *buf, size_t bufsize);
-
-
-
-/* Rename a file or directory.
- *
- * @handle : The identifier of a glusterfs context.
- * @oldpath : The path to be renamed.
- * @newpath : The new name for the @oldpath.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_rename (glusterfs_handle_t handle, const char *oldpath,
- const char *newpath);
-
-
-
-/* Rename a file or directory.
- * @oldpath : The path to be renamed.
- * @newpath : The new name for the @oldpath.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_rename (const char *oldpath, const char *newpath);
-
-
-
-/* Remove a file or directory in the given glusterfs context.
- *
- * @handle : Handle identifying the glusterfs context.
- * @path : Path of the file or directory to be removed.
- *
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_remove (glusterfs_handle_t handle, const char *path);
-
-
-
-/* Remove a file or directory.
- *
- * @path : Path of the file or directory to be removed. The
- * path must be pre-fixed with the VMP.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_remove (const char *path);
-
-
-
-/* Change the owner of the given path.
- *
- * If @path is a symlink, the ownership change happens on the symlink.
- *
- * @handle : Handle identifying the glusterfs client context.
- * @path : Path whose owner needs to be changed.
- * @owner : New owner ID
- * @group : New Group ID
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_lchown (glusterfs_handle_t handle, const char *path, uid_t owner,
- gid_t group);
-
-
-
-/* Change the owner of the given path.
- *
- * If @path is a symlink, the ownership change happens on the symlink.
- *
- * @path : Path whose owner needs to be changed.
- * @owner : New owner ID
- * @group : New Group ID
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-
-int
-glusterfs_lchown (const char *path, uid_t owner, gid_t group);
-
-
-
-/* Rewind directory stream pointer to beginning of the directory.
- *
- * @dirfd : Directory handle returned by glusterfs_open on
- * glusterfs_opendir.
- *
- * Returns no value.
- */
-void
-glusterfs_rewinddir (glusterfs_dir_t dirfd);
-
-
-
-/* Seek to the given offset in the directory handle.
- *
- * @dirfd : Directory handle returned by glusterfs_open on
- * glusterfs_opendir.
- * @offset : The offset to seek to.
- *
- * Returns no value.
- */
-void
-glusterfs_seekdir (glusterfs_dir_t dirfd, off_t offset);
-
-
-
-/* Return the current offset in a directory stream.
- *
- * @dirfd : Directory handle returned by glusterfs_open on
- * glusterfs_opendir.
- *
- * Returns the offset in the directory or -1 on error with errno set
- * appropriately.
- */
-off_t
-glusterfs_telldir (glusterfs_dir_t dirfd);
-
-
-/* Write count bytes from in_fd to out_fd, starting at *offset.
- * glusterfs_sendfile aims at eliminating memory copy at the end of
- * each read from in_fd, copying the file directly to out_fd from the buffer
- * provided by glusterfs.
- *
- * @out_fd: file descriptor opened for writing
- *
- * @in_fd: glusterfs file handle to the file to be read from.
- *
- * @offset: If offset is not NULL, then it points to a variable holding the file
- * offset from which glusterfs_sendfile() will start reading data
- * from in_fd. When glusterfs_sendfile() returns, this variable will
- * be set to the offset of the byte following the last byte that was
- * read. If offset is not NULL, then glusterfs_sendfile() does not
- * modify the current file offset of in_fd; otherwise the current file
- * offset is adjusted to reflect the number of bytes read from in_fd.
- *
- * @count: number of bytes to copy between the file descriptors.
- */
-
-ssize_t
-glusterfs_sendfile (int out_fd, glusterfs_file_t in_fd, off_t *offset,
- size_t count);
-
-/* manipulate file descriptor
- * This api can have 3 forms similar to fcntl(2).
- *
- * int
- * glusterfs_fcntl (glusterfs_file_t fd, int cmd)
- *
- * int
- * glusterfs_fcntl (glusterfs_file_t fd, int cmd, long arg)
- *
- * int
- * glusterfs_fcntl (glusterfs_file_t fd, int cmd, struct flock *lock)
- *
- * @fd : file handle returned by glusterfs_open or glusterfs_create.
- * @cmd : Though the aim is to implement all possible commands supported by
- * fcntl(2), currently following commands are supported.
- * F_SETLK, F_SETLKW, F_GETLK - used to acquire, release, and test for
- * the existence of record locks (also
- * known as file-segment or file-region
- * locks). More detailed explanation is
- * found in 'man 2 fcntl'
- */
-
-int
-glusterfs_fcntl (glusterfs_file_t fd, int cmd, ...);
-
-
-/* FIXME: review the need for these apis */
-/* added for log related initialization in booster fork implementation */
-void
-glusterfs_reset (void);
-
-void
-glusterfs_log_lock (void);
-
-void
-glusterfs_log_unlock (void);
-/* Used to free the glusterfs_read_buf passed to the application from
- glusterfs_read_async_cbk
-*/
-void
-glusterfs_free (glusterfs_iobuf_t *buf);
-
-__END_DECLS
-
-#endif /* !_LIBGLUSTERFSCLIENT_H */
diff --git a/mod_glusterfs/Makefile.am b/mod_glusterfs/Makefile.am
deleted file mode 100644
index 0abe8dcfc..000000000
--- a/mod_glusterfs/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = apache lighttpd
-
-CLEANFILES =
diff --git a/mod_glusterfs/apache/1.3/src/Makefile.am b/mod_glusterfs/apache/1.3/src/Makefile.am
deleted file mode 100644
index 6bb3075f5..000000000
--- a/mod_glusterfs/apache/1.3/src/Makefile.am
+++ /dev/null
@@ -1,30 +0,0 @@
-mod_glusterfs_PROGRAMS = mod_glusterfs.so
-mod_glusterfsdir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/apache/1.3
-
-mod_glusterfs_so_SOURCES = mod_glusterfs.c
-
-all: mod_glusterfs.so
-
-mod_glusterfs.so: $(top_srcdir)/mod_glusterfs/apache/1.3/src/mod_glusterfs.c $(top_builddir)/libglusterfsclient/src/libglusterfsclient.la
- ln -sf $(top_srcdir)/mod_glusterfs/apache/1.3/src/mod_glusterfs.c $(top_builddir)/mod_glusterfs/apache/1.3/src/mod_glusterfs-build.c
- $(APXS) -c -Wc,-g3 -Wc,-O0 -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64 -D_GNU_SOURCE -I$(top_srcdir)/libglusterfsclient/src -Wl,-rpath,$(libdir) -Wl,-rpath,$(top_builddir)/libglusterfsclient/src/.libs/ $(top_builddir)/libglusterfsclient/src/.libs/libglusterfsclient.so mod_glusterfs-build.c -o $(top_builddir)/mod_glusterfs/apache/1.3/src/mod_glusterfs.so
-
-$(top_builddir)/libglusterfsclient/src/libglusterfsclient.la:
- $(MAKE) -C $(top_builddir)/libglusterfsclient/src/ all
-
-install-data-local:
- @echo ""
- @echo ""
- @echo "**********************************************************************************"
- @echo "* TO INSTALL MODGLUSTERFS, PLEASE USE, "
- @echo "* $(APXS) -n glusterfs -ia $(mod_glusterfsdir)/mod_glusterfs.so "
- @echo "**********************************************************************************"
- @echo ""
- @echo ""
-
-#install:
-# cp -fv mod_glusterfs.so $(HTTPD_LIBEXECDIR)
-# cp -fv httpd.conf $(HTTPD_CONF_DIR)
-
-clean:
- -rm -fv *.so *.o mod_glusterfs-build.c
diff --git a/mod_glusterfs/apache/1.3/src/README.txt b/mod_glusterfs/apache/1.3/src/README.txt
deleted file mode 100644
index 378a51d79..000000000
--- a/mod_glusterfs/apache/1.3/src/README.txt
+++ /dev/null
@@ -1,107 +0,0 @@
-What is mod_glusterfs?
-======================
-* mod_glusterfs is a module for apache written for efficient serving of files from glusterfs.
- mod_glusterfs interfaces with glusterfs using apis provided by libglusterfsclient.
-
-* this README speaks about installation of apache-1.3.x, where x is any minor version.
-
-Prerequisites for mod_glusterfs
-===============================
-Though mod_glusterfs has been written as a module, with an intent of making no changes to the way apache has
-been built, currently following points have to be taken care of:
-
-* module "so" has to be enabled, for apache to support modules.
-* since glusterfs is compiled with _FILE_OFFSET_BITS=64 and __USE_FILE_OFFSET64 flags, mod_glusterfs and apache
- in turn have to be compiled with the above two flags.
-
- $ tar xzvf apache-1.3.9.tar.gz
- $ cd apache-1.3.9/
- $ # add -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64 to EXTRA_CFLAGS in src/Configuration.
- $ ./configure --prefix=/usr --enable-module=so
- $ cd src
- $ ./Configure
- $ cd ../
- $ make install
- $ httpd -l | grep -i mod_so
- mod_so.c
-
-* if multiple apache installations are present, make sure to pass --with-apxs=/path/to/apxs/of/proper/version to configure script while building glusterfs.
-
-Build/Install mod_glusterfs
-===========================
-* mod_glusterfs is provided with glusterfs--mainline--3.0 and all releases from the same branch.
-
-* building glusterfs also builds mod_glusterfs. But 'make install' of glusterfs installs mod_glusterfs.so to
- glusterfs install directory instead of the apache modules directory.
-
-* 'make install' of glusterfs will print a message similar to the one given below, which is self explanatory.
- Make sure to use apxs of proper apache version in case of multiple apache installations. This will copy
- mod_glusterfs.so to modules directory of proper apache version and modify the appropriate httpd.conf to enable
- mod_glusterfs.
-
-**********************************************************************************************
-* TO INSTALL MODGLUSTERFS, PLEASE USE,
-* apxs -n mod_glusterfs -ia /usr/lib/glusterfs/1.4.0pre2/apache-1.3/mod_glusterfs.so
-**********************************************************************************************
-
-Configuration
-=============
-* Following configuration has to be added to httpd.conf.
-
- <Location "/glusterfs">
- GlusterfsLogfile "/var/log/glusterfs/glusterfs.log"
- GlusterfsLoglevel "warning"
- GlusterfsVolumeSpecfile "/etc/glusterfs/glusterfs-client.spec"
- GlusterfsCacheTimeout "600"
- GlusterfsXattrFileSize "65536"
- SetHandler "glusterfs-handler"
- </Location>
-
-* GlusterfsVolumeSpecfile (COMPULSORY)
- Path to the the glusterfs volume specification file.
-
-* GlusterfsLogfile (COMPULSORY)
- Path to the glusterfs logfile.
-
-* GlusterfsLoglevel (OPTIONAL, default = warning)
- Severity of messages that are to be logged. Allowed values are critical, error, warning, debug, none
- in the decreasing order of severity.
-
-* GlusterfsCacheTimeOut (OPTIONAL, default = 0)
- Timeout values for glusterfs stat and lookup cache.
-
-* GlusterfsXattrFileSize (OPTIONAL, default = 0)
- Files with sizes upto and including this value are fetched through the extended attribute interface of
- glusterfs rather than the usual open-read-close set of operations. For files of small sizes, it is recommended
- to use extended attribute interface.
-
-* With the above configuration all the requests to httpd of the form www.example.org/glusterfs/path/to/file are
- served from glusterfs.
-
-Miscellaneous points
-====================
-* httpd by default runs with username "nobody" and group "nogroup". Permissions of logfile and specfile have to
- be set suitably.
-
-* Since mod_glusterfs runs with permissions of nobody.nogroup, glusterfs has to use only login based
- authentication. See docs/authentication.txt for more details.
-
-* To copy the data served by httpd into glusterfs mountpoint, glusterfs can be started with the
- volume-specification file provided to mod_glusterfs. Any tool like cp can then be used.
-
-* To run in gdb, apache has to be compiled with -lpthread, since libglusterfsclient is multithreaded.
- If not on Linux gdb runs into errors like:
- "Error while reading shared library symbols:
- Cannot find new threads: generic error"
-
-* when used with ib-verbs transport, ib_verbs initialization fails.
- reason for this is that apache runs as non-privileged user and the amount of memory that can be
- locked by default is not sufficient for ib-verbs. to fix this, as root run,
-
- # ulimit -l unlimited
-
- and then start apache.
-
-TODO
-====
-* directory listing for the directories accessed through mod_glusterfs.
diff --git a/mod_glusterfs/apache/1.3/src/mod_glusterfs.c b/mod_glusterfs/apache/1.3/src/mod_glusterfs.c
deleted file mode 100644
index 03026eac8..000000000
--- a/mod_glusterfs/apache/1.3/src/mod_glusterfs.c
+++ /dev/null
@@ -1,507 +0,0 @@
-/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef CORE_PRIVATE
-#define CORE_PRIVATE
-#endif
-
-#include <httpd.h>
-#include <http_config.h>
-#include <http_core.h>
-#include <http_request.h>
-#include <http_protocol.h>
-#include <http_log.h>
-#include <http_main.h>
-#include <util_script.h>
-#include <libglusterfsclient.h>
-#include <sys/uio.h>
-#include <pthread.h>
-
-#define GLUSTERFS_INVALID_LOGLEVEL "mod_glusterfs: Unrecognized log-level "\
- "\"%s\", possible values are \"DEBUG|"\
- "WARNING|ERROR|CRITICAL|NONE\"\n"
-
-#define GLUSTERFS_HANDLER "glusterfs-handler"
-#define GLUSTERFS_CHUNK_SIZE 131072
-
-module MODULE_VAR_EXPORT glusterfs_module;
-
-/*TODO: verify error returns to server core */
-
-typedef struct glusterfs_dir_config {
- char *logfile;
- char *loglevel;
- char *specfile;
- char *mount_dir;
- char *buf;
- size_t xattr_file_size;
- uint32_t cache_timeout;
-} glusterfs_dir_config_t;
-
-typedef struct glusterfs_async_local {
- int op_ret;
- int op_errno;
- char async_read_complete;
- off_t length;
- off_t read_bytes;
- glusterfs_iobuf_t *buf;
- request_rec *request;
- pthread_mutex_t lock;
- pthread_cond_t cond;
-}glusterfs_async_local_t;
-
-#define GLUSTERFS_CMD_PERMS ACCESS_CONF
-
-static glusterfs_dir_config_t *
-mod_glusterfs_dconfig(request_rec *r)
-{
- glusterfs_dir_config_t *dir_config = NULL;
- if (r->per_dir_config != NULL) {
- dir_config = ap_get_module_config (r->per_dir_config,
- &glusterfs_module);
- }
-
- return dir_config;
-}
-
-static
-const char *add_xattr_file_size(cmd_parms *cmd, void *dummy, char *arg)
-{
- glusterfs_dir_config_t *dir_config = dummy;
- dir_config->xattr_file_size = atoi (arg);
- return NULL;
-}
-
-static
-const char *set_cache_timeout(cmd_parms *cmd, void *dummy, char *arg)
-{
- glusterfs_dir_config_t *dir_config = dummy;
- dir_config->cache_timeout = atoi (arg);
- return NULL;
-}
-
-static
-const char *set_loglevel(cmd_parms *cmd, void *dummy, char *arg)
-{
- glusterfs_dir_config_t *dir_config = dummy;
- char *error = NULL;
- if (strncasecmp (arg, "DEBUG", strlen ("DEBUG"))
- && strncasecmp (arg, "WARNING", strlen ("WARNING"))
- && strncasecmp (arg, "CRITICAL", strlen ("CRITICAL"))
- && strncasecmp (arg, "NONE", strlen ("NONE"))
- && strncasecmp (arg, "ERROR", strlen ("ERROR")))
- error = GLUSTERFS_INVALID_LOGLEVEL;
- else
- dir_config->loglevel = arg;
-
- return error;
-}
-
-static
-const char *add_logfile(cmd_parms *cmd, void *dummy, char *arg)
-{
- glusterfs_dir_config_t *dir_config = dummy;
- dir_config->logfile = arg;
-
- return NULL;
-}
-
-static
-const char *add_specfile(cmd_parms *cmd, void *dummy, char *arg)
-{
- glusterfs_dir_config_t *dir_config = dummy;
-
- dir_config->specfile = arg;
-
- return NULL;
-}
-
-static void *
-mod_glusterfs_create_dir_config(pool *p, char *dirspec)
-{
- glusterfs_dir_config_t *dir_config = NULL;
-
- dir_config = (glusterfs_dir_config_t *) ap_pcalloc(p,
- sizeof(*dir_config));
-
- dir_config->mount_dir = dirspec;
- dir_config->logfile = dir_config->specfile = (char *)0;
- dir_config->loglevel = "warning";
- dir_config->cache_timeout = 0;
- dir_config->buf = NULL;
-
- return (void *) dir_config;
-}
-
-static void
-mod_glusterfs_child_init(server_rec *s, pool *p)
-{
- void **urls = NULL;
- int n, i;
- core_server_config *mod_core_config = ap_get_module_config (s->module_config,
- &core_module);
- glusterfs_dir_config_t *dir_config = NULL;
- glusterfs_init_params_t params = {0, };
-
- n = mod_core_config->sec_url->nelts;
- urls = (void **)mod_core_config->sec_url->elts;
- for (i = 0; i < n; i++) {
- dir_config = ap_get_module_config (urls[i], &glusterfs_module);
-
- if (dir_config) {
- memset (&params, 0, sizeof (params));
-
- params.logfile = dir_config->logfile;
- params.loglevel = dir_config->loglevel;
- params.lookup_timeout = dir_config->cache_timeout;
- params.stat_timeout = dir_config->cache_timeout;
- params.specfile = dir_config->specfile;
-
- glusterfs_mount (dir_config->mount_dir, &params);
- }
- dir_config = NULL;
- }
-}
-
-static void
-mod_glusterfs_child_exit(server_rec *s, pool *p)
-{
- void **urls = NULL;
- int n, i;
- core_server_config *mod_core_config = NULL;
- glusterfs_dir_config_t *dir_config = NULL;
-
- mod_core_config = ap_get_module_config (s->module_config, &core_module);
- n = mod_core_config->sec_url->nelts;
- urls = (void **)mod_core_config->sec_url->elts;
- for (i = 0; i < n; i++) {
- dir_config = ap_get_module_config (urls[i], &glusterfs_module);
- if (dir_config) {
- glusterfs_umount (dir_config->mount_dir);
- }
- }
-}
-
-static int mod_glusterfs_fixup(request_rec *r)
-{
- glusterfs_dir_config_t *dir_config = NULL;
- int access_status;
- int ret;
- char *path = NULL;
-
- dir_config = mod_glusterfs_dconfig(r);
-
- if (dir_config && dir_config->mount_dir
- && !(strncmp (ap_pstrcat (r->pool, dir_config->mount_dir, "/",
- NULL),
- r->uri, strlen (dir_config->mount_dir) + 1)
- && !r->handler))
- r->handler = ap_pstrdup (r->pool, GLUSTERFS_HANDLER);
-
- if (!r->handler || (r->handler && strcmp (r->handler,
- GLUSTERFS_HANDLER)))
- return DECLINED;
-
- path = r->uri;
-
- memset (&r->finfo, 0, sizeof (r->finfo));
-
- dir_config->buf = calloc (1, dir_config->xattr_file_size);
- if (!dir_config->buf) {
- return HTTP_INTERNAL_SERVER_ERROR;
- }
-
- ret = glusterfs_get (path, dir_config->buf,
- dir_config->xattr_file_size, &r->finfo);
-
- if (ret == -1 || r->finfo.st_size > dir_config->xattr_file_size
- || S_ISDIR (r->finfo.st_mode)) {
- free (dir_config->buf);
- dir_config->buf = NULL;
-
- if (ret == -1) {
- int error = HTTP_NOT_FOUND;
- char *emsg = NULL;
- if (r->path_info == NULL) {
- emsg = ap_pstrcat(r->pool, strerror (errno),
- r->filename, NULL);
- }
- else {
- emsg = ap_pstrcat(r->pool, strerror (errno),
- r->filename, r->path_info,
- NULL);
- }
- ap_log_rerror(APLOG_MARK, APLOG_ERR|APLOG_NOERRNO, r,
- "%s", emsg);
- if (errno != ENOENT) {
- error = HTTP_INTERNAL_SERVER_ERROR;
- }
- return error;
- }
- }
-
- if (r->uri && strlen (r->uri) && r->uri[strlen(r->uri) - 1] == '/')
- r->handler = NULL;
-
- r->filename = ap_pstrcat (r->pool, r->filename, r->path_info, NULL);
-
- if ((access_status = ap_find_types(r)) != 0) {
- return DECLINED;
- }
-
- return OK;
-}
-
-
-int
-mod_glusterfs_readv_async_cbk (int32_t op_ret, int32_t op_errno,
- glusterfs_iobuf_t *buf, void *cbk_data)
-{
- glusterfs_async_local_t *local = cbk_data;
-
- pthread_mutex_lock (&local->lock);
- {
- local->async_read_complete = 1;
- local->buf = buf;
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- pthread_cond_signal (&local->cond);
- }
- pthread_mutex_unlock (&local->lock);
-
- return 0;
-}
-
-/* use read_async just to avoid memcpy of read buffer in libglusterfsclient */
-static int
-mod_glusterfs_read_async (request_rec *r, glusterfs_file_t fd, off_t offset,
- off_t length)
-{
- glusterfs_async_local_t local;
- off_t end;
- int nbytes;
- int complete;
- pthread_cond_init (&local.cond, NULL);
- pthread_mutex_init (&local.lock, NULL);
-
- memset (&local, 0, sizeof (local));
- local.request = r;
-
- if (length > 0)
- end = offset + length;
-
- do {
- glusterfs_iobuf_t *buf;
- int i;
- if (length > 0) {
- nbytes = end - offset;
- if (nbytes > GLUSTERFS_CHUNK_SIZE)
- nbytes = GLUSTERFS_CHUNK_SIZE;
- } else
- nbytes = GLUSTERFS_CHUNK_SIZE;
-
- glusterfs_read_async(fd,
- nbytes,
- offset,
- mod_glusterfs_readv_async_cbk,
- (void *)&local);
-
- pthread_mutex_lock (&local.lock);
- {
- while (!local.async_read_complete) {
- pthread_cond_wait (&local.cond, &local.lock);
- }
-
- local.async_read_complete = 0;
- buf = local.buf;
-
- if (length < 0)
- complete = (local.op_ret <= 0);
- else {
- local.read_bytes += local.op_ret;
- complete = ((local.read_bytes == length)
- || (local.op_ret < 0));
- }
- }
- pthread_mutex_unlock (&local.lock);
-
- for (i = 0; i < buf->count; i++) {
- if (ap_rwrite (buf->vector[i].iov_base,
- buf->vector[i].iov_len, r) < 0) {
- local.op_ret = -1;
- complete = 1;
- break;
- }
- }
-
- glusterfs_free (buf);
-
- offset += nbytes;
- } while (!complete);
-
- return (local.op_ret < 0 ? SERVER_ERROR : OK);
-}
-
-static int
-mod_glusterfs_handler(request_rec *r)
-{
- glusterfs_dir_config_t *dir_config;
- char *path = NULL;
- int error = OK;
- int rangestatus = 0;
- int errstatus = OK;
- glusterfs_file_t fd;
-
- if (!r->handler || (r->handler && strcmp (r->handler,
- GLUSTERFS_HANDLER)))
- return DECLINED;
-
- if (r->uri[0] == '\0' || r->uri[strlen(r->uri) - 1] == '/') {
- return DECLINED;
- }
-
- dir_config = mod_glusterfs_dconfig (r);
-
- if (r->method_number != M_GET) {
- return METHOD_NOT_ALLOWED;
- }
-
- ap_update_mtime(r, r->finfo.st_mtime);
- ap_set_last_modified(r);
- ap_set_etag(r);
- ap_table_setn(r->headers_out, "Accept-Ranges", "bytes");
- if (((errstatus = ap_meets_conditions(r)) != OK)
- || (errstatus = ap_set_content_length(r, r->finfo.st_size))) {
- return errstatus;
- }
- rangestatus = ap_set_byterange(r);
- ap_send_http_header(r);
-
- if (r->finfo.st_size <= dir_config->xattr_file_size && dir_config->buf) {
- if (!r->header_only) {
- error = OK;
- ap_log_rerror (APLOG_MARK, APLOG_NOTICE, r,
- "fetching data from glusterfs through "
- "xattr interface\n");
-
- if (!rangestatus) {
- if (ap_rwrite (dir_config->buf,
- r->finfo.st_size, r) < 0) {
- error = HTTP_INTERNAL_SERVER_ERROR;
- }
- } else {
- long offset, length;
- while (ap_each_byterange (r, &offset, &length)) {
- if (ap_rwrite (dir_config->buf + offset,
- length, r) < 0) {
- error = HTTP_INTERNAL_SERVER_ERROR;
- break;
- }
- }
- }
- }
-
- free (dir_config->buf);
- dir_config->buf = NULL;
-
- return error;
- }
-
- path = r->uri;
- fd = glusterfs_open (path , O_RDONLY, 0);
-
- if (fd == 0) {
- ap_log_rerror(APLOG_MARK, APLOG_ERR, r,
- "file permissions deny server access: %s",
- r->filename);
- return FORBIDDEN;
- }
-
- if (!r->header_only) {
- if (!rangestatus) {
- mod_glusterfs_read_async (r, fd, 0, -1);
- } else {
- long offset, length;
- while (ap_each_byterange(r, &offset, &length)) {
- mod_glusterfs_read_async (r, fd, offset, length);
- }
- }
- }
-
- glusterfs_close (fd);
- return error;
-}
-
-static const command_rec mod_glusterfs_cmds[] =
-{
- {"GlusterfsLogfile", add_logfile, NULL,
- GLUSTERFS_CMD_PERMS, TAKE1,
- "Glusterfs Logfile"},
- {"GlusterfsLoglevel", set_loglevel, NULL,
- GLUSTERFS_CMD_PERMS, TAKE1,
- "Glusterfs Loglevel:anyone of none, critical, error, warning, debug"},
- {"GlusterfsCacheTimeout", set_cache_timeout, NULL,
- GLUSTERFS_CMD_PERMS, TAKE1,
- "Timeout value in seconds for caching lookups and stats"},
- {"GlusterfsVolumeSpecfile", add_specfile, NULL,
- GLUSTERFS_CMD_PERMS, TAKE1,
- "Glusterfs Specfile required to access contents of this directory"},
- {"GlusterfsXattrFileSize", add_xattr_file_size, NULL,
- GLUSTERFS_CMD_PERMS, TAKE1,
- "Maximum size of the file to be fetched using xattr interface of "
- "glusterfs"},
- {NULL}
-};
-
-static const handler_rec mod_glusterfs_handlers[] =
-{
- {GLUSTERFS_HANDLER, mod_glusterfs_handler},
- {NULL}
-};
-
-module glusterfs_module =
-{
- STANDARD_MODULE_STUFF,
- NULL,
- mod_glusterfs_create_dir_config, /* per-directory config creator */
- NULL,
- NULL, /* server config creator */
- NULL, /* server config merger */
- mod_glusterfs_cmds, /* command table */
- mod_glusterfs_handlers, /* [7] list of handlers */
- NULL, /* [2] filename-to-URI translation */
- NULL, /* [5] check/validate user_id */
- NULL, /* [6] check user_id is valid *here* */
- NULL, /* [4] check access by host address */
- NULL, /* [7] MIME type checker/setter */
- mod_glusterfs_fixup, /* [8] fixups */
- NULL, /* [10] logger */
-#if MODULE_MAGIC_NUMBER >= 19970103
- NULL, /* [3] header parser */
-#endif
-#if MODULE_MAGIC_NUMBER >= 19970719
- mod_glusterfs_child_init, /* process initializer */
-#endif
-#if MODULE_MAGIC_NUMBER >= 19970728
- mod_glusterfs_child_exit, /* process exit/cleanup */
-#endif
-#if MODULE_MAGIC_NUMBER >= 19970902
- NULL /* [1] post read_request handling */
-#endif
-};
diff --git a/mod_glusterfs/apache/2.2/src/Makefile.am b/mod_glusterfs/apache/2.2/src/Makefile.am
deleted file mode 100644
index 1e8f3a31e..000000000
--- a/mod_glusterfs/apache/2.2/src/Makefile.am
+++ /dev/null
@@ -1,31 +0,0 @@
-mod_glusterfs_PROGRAMS = mod_glusterfs.so
-mod_glusterfsdir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/apache/2.2
-
-mod_glusterfs_so_SOURCES = mod_glusterfs.c
-
-all: mod_glusterfs.so
-
-mod_glusterfs.so: $(top_srcdir)/mod_glusterfs/apache/2.2/src/mod_glusterfs.c $(top_builddir)/libglusterfsclient/src/libglusterfsclient.la
- ln -sf $(top_srcdir)/mod_glusterfs/apache/2.2/src/mod_glusterfs.c $(top_builddir)/mod_glusterfs/apache/2.2/src/mod_glusterfs-build.c
- $(APXS) -c -o mod_glusterfs.la -Wc,-g3 -Wc,-O0 -DLINUX=2 -D_REENTRANT -D_GNU_SOURCE -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64 -I$(top_srcdir)/libglusterfsclient/src -L$(top_builddir)/libglusterfsclient/src/.libs/ -lglusterfsclient mod_glusterfs-build.c
- -ln -sf .libs/mod_glusterfs.so mod_glusterfs.so
-
-$(top_builddir)/libglusterfsclient/src/libglusterfsclient.la:
- $(MAKE) -C $(top_builddir)/libglusterfsclient/src/ all
-
-install-data-local:
- @echo ""
- @echo ""
- @echo "**********************************************************************************"
- @echo "* TO INSTALL MODGLUSTERFS, PLEASE USE, "
- @echo "* $(APXS) -n glusterfs -ia $(mod_glusterfsdir)/mod_glusterfs.so "
- @echo "**********************************************************************************"
- @echo ""
- @echo ""
-
-#install:
-# cp -fv mod_glusterfs.so $(HTTPD_LIBEXECDIR)
-# cp -fv httpd.conf $(HTTPD_CONF_DIR)
-
-clean:
- rm -fv *.so *.o
diff --git a/mod_glusterfs/apache/2.2/src/README.txt b/mod_glusterfs/apache/2.2/src/README.txt
deleted file mode 100644
index 214a2535b..000000000
--- a/mod_glusterfs/apache/2.2/src/README.txt
+++ /dev/null
@@ -1,105 +0,0 @@
-What is mod_glusterfs?
-======================
-* mod_glusterfs is a module for apache written for efficient serving of files from glusterfs.
- mod_glusterfs interfaces with glusterfs using apis provided by libglusterfsclient.
-
-* this README speaks about installing mod_glusterfs for httpd-2.2 and higher.
-
-Prerequisites for mod_glusterfs
-===============================
-Though mod_glusterfs has been written as a module, with an intent of making no changes to
-the way apache has been built, currently following points have to be taken care of:
-
-* since glusterfs is compiled with _FILE_OFFSET_BITS=64 and __USE_FILE_OFFSET64 flags, mod_glusterfs and apache
- in turn have to be compiled with the above two flags.
-
- $ tar xzf httpd-2.2.10.tar.gz
- $ cd httpd-2.2.10/
- $ export CFLAGS='-D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64'
- $ ./configure --prefix=/usr
- $ make
- $ make install
- $ httpd -l | grep -i mod_so
- mod_so.c
-
-* if multiple apache installations are present, make sure to pass --with-apxs=/path/to/apxs/of/proper/version
- to configure script while building glusterfs.
-
-Build/Install mod_glusterfs
-===========================
-* mod_glusterfs is provided with glusterfs--mainline--3.0 and all releases from the same branch.
-
-* building glusterfs also builds mod_glusterfs. But 'make install' of glusterfs installs mod_glusterfs.so to
- glusterfs install directory instead of the apache modules directory.
-
-* 'make install' of glusterfs will print a message similar to the one given below, which is self explanatory.
- Make sure to use apxs of proper apache version in case of multiple apache installations. This will copy
- mod_glusterfs.so to modules directory of proper apache version and modify the appropriate httpd.conf to enable
- mod_glusterfs.
-
-**********************************************************************************
-* TO INSTALL MODGLUSTERFS, PLEASE USE,
-* apxs -n glusterfs -ia /usr/lib/glusterfs/2.0.0rc4/apache/2.2/mod_glusterfs.so
-**********************************************************************************
-
-Configuration
-=============
-* Following configuration has to be added to httpd.conf.
-
- <Location "/glusterfs">
- GlusterfsLogfile "/var/log/glusterfs/glusterfs.log"
- GlusterfsLoglevel "warning"
- GlusterfsVolumeSpecfile "/etc/glusterfs/glusterfs-client.spec"
- GlusterfsCacheTimeout "600"
- GlusterfsXattrFileSize "65536"
- SetHandler "glusterfs-handler"
- </Location>
-
-* GlusterfsVolumeSpecfile (COMPULSORY)
- Path to the the glusterfs volume specification file.
-
-* GlusterfsLogfile (COMPULSORY)
- Path to the glusterfs logfile.
-
-* GlusterfsLoglevel (OPTIONAL, default = warning)
- Severity of messages that are to be logged. Allowed values are critical, error, warning, debug, none
- in the decreasing order of severity.
-
-* GlusterfsCacheTimeOut (OPTIONAL, default = 0)
- Timeout values for glusterfs stat and lookup cache.
-
-* GlusterfsXattrFileSize (OPTIONAL, default = 0)
- Files with sizes upto and including this value are fetched through the extended attribute interface of
- glusterfs rather than the usual open-read-close set of operations. For files of small sizes, it is recommended
- to use extended attribute interface.
-
-* With the above configuration all the requests to httpd of the form www.example.org/glusterfs/path/to/file are
- served from glusterfs.
-
-* mod_glusterfs also implements mod_dir and mod_autoindex behaviour for files under glusterfs mount.
- Hence it also takes the directives related to both of these modules. For more details, refer the
- documentation for both of these modules.
-
-Miscellaneous points
-====================
-* httpd by default runs with username "nobody" and group "nogroup". Permissions of logfile and specfile have to
- be set suitably.
-
-* Since mod_glusterfs runs with permissions of nobody.nogroup, glusterfs has to use only login based
- authentication. See docs/authentication.txt for more details.
-
-* To copy the data served by httpd into glusterfs mountpoint, glusterfs can be started with the
- volume-specification file provided to mod_glusterfs. Any tool like cp can then be used.
-
-* To run in gdb, apache has to be compiled with -lpthread, since libglusterfsclient is
- multithreaded. If not on Linux gdb runs into errors like:
- "Error while reading shared library symbols:
- Cannot find new threads: generic error"
-
-* when used with ib-verbs transport, ib_verbs initialization fails.
- reason for this is that apache runs as non-privileged user and the amount of memory that can be
- locked by default is not sufficient for ib-verbs. to fix this, as root run,
-
- # ulimit -l unlimited
-
- and then start apache.
diff --git a/mod_glusterfs/apache/2.2/src/mod_glusterfs.c b/mod_glusterfs/apache/2.2/src/mod_glusterfs.c
deleted file mode 100644
index ee17ebb65..000000000
--- a/mod_glusterfs/apache/2.2/src/mod_glusterfs.c
+++ /dev/null
@@ -1,3627 +0,0 @@
-/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef CORE_PRIVATE
-#define CORE_PRIVATE
-#endif
-
-#ifndef NO_CONTENT_TYPE
-#define NO_CONTENT_TYPE "none"
-#endif
-
-#define BYTERANGE_FMT "%" APR_OFF_T_FMT "-%" APR_OFF_T_FMT "/%" APR_OFF_T_FMT
-
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <unistd.h>
-
-#include <httpd.h>
-#include <http_config.h>
-#include <http_core.h>
-#include <http_request.h>
-#include <http_protocol.h>
-#include <http_log.h>
-#include <http_main.h>
-#include <util_script.h>
-#include <util_filter.h>
-#include <libglusterfsclient.h>
-#include <sys/uio.h>
-#include <pthread.h>
-#include <apr.h>
-#include <apr_strings.h>
-#include <apr_buckets.h>
-#include <apr_fnmatch.h>
-#include <apr_lib.h>
-
-#define GLUSTERFS_INVALID_LOGLEVEL "mod_glfs: Unrecognized log-level \"%s\", "\
- " possible values are \"DEBUG|WARNING|"\
- "ERROR|CRITICAL|NONE\"\n"
-
-#define GLUSTERFS_HANDLER "glusterfs-handler"
-#define GLUSTERFS_CHUNK_SIZE 131072
-
-static char c_by_encoding, c_by_type, c_by_path;
-
-#define BY_ENCODING &c_by_encoding
-#define BY_TYPE &c_by_type
-#define BY_PATH &c_by_path
-
-module AP_MODULE_DECLARE_DATA glusterfs_module;
-extern module core_module;
-
-#define NO_OPTIONS (1 << 0) /* Indexing options */
-#define ICONS_ARE_LINKS (1 << 1)
-#define SCAN_HTML_TITLES (1 << 2)
-#define SUPPRESS_ICON (1 << 3)
-#define SUPPRESS_LAST_MOD (1 << 4)
-#define SUPPRESS_SIZE (1 << 5)
-#define SUPPRESS_DESC (1 << 6)
-#define SUPPRESS_PREAMBLE (1 << 7)
-#define SUPPRESS_COLSORT (1 << 8)
-#define SUPPRESS_RULES (1 << 9)
-#define FOLDERS_FIRST (1 << 10)
-#define VERSION_SORT (1 << 11)
-#define TRACK_MODIFIED (1 << 12)
-#define FANCY_INDEXING (1 << 13)
-#define TABLE_INDEXING (1 << 14)
-#define IGNORE_CLIENT (1 << 15)
-#define IGNORE_CASE (1 << 16)
-#define EMIT_XHTML (1 << 17)
-#define SHOW_FORBIDDEN (1 << 18)
-
-#define K_NOADJUST 0
-#define K_ADJUST 1
-#define K_UNSET 2
-
-/*
- * Define keys for sorting.
- */
-#define K_NAME 'N' /* Sort by file name (default) */
-#define K_LAST_MOD 'M' /* Last modification date */
-#define K_SIZE 'S' /* Size (absolute, not as displayed) */
-#define K_DESC 'D' /* Description */
-#define K_VALID "NMSD" /* String containing _all_ valid K_ opts */
-
-#define D_ASCENDING 'A'
-#define D_DESCENDING 'D'
-#define D_VALID "AD" /* String containing _all_ valid D_ opts */
-
-/*
- * These are the dimensions of the default icons supplied with Apache.
- */
-#define DEFAULT_ICON_WIDTH 20
-#define DEFAULT_ICON_HEIGHT 22
-
-/*
- * Other default dimensions.
- */
-#define DEFAULT_NAME_WIDTH 23
-#define DEFAULT_DESC_WIDTH 23
-
-struct mod_glfs_ai_item {
- char *type;
- char *apply_to;
- char *apply_path;
- char *data;
-};
-
-typedef struct mod_glfs_ai_desc_t {
- char *pattern;
- char *description;
- int full_path;
- int wildcards;
-} mod_glfs_ai_desc_t;
-
-typedef enum {
- SLASH_OFF = 0,
- SLASH_ON,
- SLASH_UNSET
-} mod_glfs_dir_slash_cfg;
-
-/* static ap_filter_rec_t *mod_glfs_output_filter_handle; */
-
-/*TODO: verify error returns to server core */
-
-typedef struct glusterfs_dir_config {
- char *logfile;
- char *loglevel;
- char *specfile;
- char *mount_dir;
- char *buf;
-
- size_t xattr_file_size;
- uint32_t cache_timeout;
-
- /* mod_dir options */
- apr_array_header_t *index_names;
- mod_glfs_dir_slash_cfg do_slash;
-
- /* autoindex options */
- char *default_icon;
- char *style_sheet;
- apr_int32_t opts;
- apr_int32_t incremented_opts;
- apr_int32_t decremented_opts;
- int name_width;
- int name_adjust;
- int desc_width;
- int desc_adjust;
- int icon_width;
- int icon_height;
- char default_keyid;
- char default_direction;
-
- apr_array_header_t *icon_list;
- apr_array_header_t *alt_list;
- apr_array_header_t *desc_list;
- apr_array_header_t *ign_list;
- apr_array_header_t *hdr_list;
- apr_array_header_t *rdme_list;
-
- char *ctype;
- char *charset;
-} glusterfs_dir_config_t;
-
-typedef struct glusterfs_async_local {
- int op_ret;
- int op_errno;
- char async_read_complete;
- off_t length;
- off_t read_bytes;
- glusterfs_iobuf_t *buf;
- request_rec *request;
- pthread_mutex_t lock;
- pthread_cond_t cond;
-}glusterfs_async_local_t;
-
-#define GLUSTERFS_CMD_PERMS ACCESS_CONF
-
-
-static glusterfs_dir_config_t *
-mod_glfs_dconfig (request_rec *r)
-{
- glusterfs_dir_config_t *dir_config = NULL;
- if (r->per_dir_config != NULL) {
- dir_config = ap_get_module_config (r->per_dir_config,
- &glusterfs_module);
- }
-
- return dir_config;
-}
-
-
-static const char *
-cmd_add_xattr_file_size (cmd_parms *cmd, void *dummy, const char *arg)
-{
- glusterfs_dir_config_t *dir_config = dummy;
- dir_config->xattr_file_size = atoi (arg);
- return NULL;
-}
-
-
-static const char *
-cmd_set_cache_timeout (cmd_parms *cmd, void *dummy, const char *arg)
-{
- glusterfs_dir_config_t *dir_config = dummy;
- dir_config->cache_timeout = atoi (arg);
- return NULL;
-}
-
-
-static const char *
-cmd_set_loglevel (cmd_parms *cmd, void *dummy, const char *arg)
-{
- glusterfs_dir_config_t *dir_config = dummy;
- char *error = NULL;
- if (strncasecmp (arg, "DEBUG", strlen ("DEBUG"))
- && strncasecmp (arg, "WARNING", strlen ("WARNING"))
- && strncasecmp (arg, "CRITICAL", strlen ("CRITICAL"))
- && strncasecmp (arg, "NONE", strlen ("NONE"))
- && strncasecmp (arg, "ERROR", strlen ("ERROR")))
- error = GLUSTERFS_INVALID_LOGLEVEL;
- else
- dir_config->loglevel = apr_pstrdup (cmd->pool, arg);
-
- return error;
-}
-
-static const char *
-cmd_add_logfile (cmd_parms *cmd, void *dummy, const char *arg)
-{
- glusterfs_dir_config_t *dir_config = dummy;
- dir_config->logfile = apr_pstrdup (cmd->pool, arg);
-
- return NULL;
-}
-
-
-static const char *
-cmd_add_volume_specfile (cmd_parms *cmd, void *dummy, const char *arg)
-{
- glusterfs_dir_config_t *dir_config = dummy;
-
- dir_config->specfile = apr_pstrdup (cmd->pool, arg);
-
- return NULL;
-}
-
-#define WILDCARDS_REQUIRED 0
-
-static const char *
-cmd_add_desc (cmd_parms *cmd, void *d, const char *desc,
- const char *to)
-{
- glusterfs_dir_config_t *dcfg = NULL;
- mod_glfs_ai_desc_t *desc_entry = NULL;
- char *prefix = "";
-
- dcfg = (glusterfs_dir_config_t *) d;
- desc_entry = (mod_glfs_ai_desc_t *) apr_array_push(dcfg->desc_list);
- desc_entry->full_path = (ap_strchr_c(to, '/') == NULL) ? 0 : 1;
- desc_entry->wildcards = (WILDCARDS_REQUIRED
- || desc_entry->full_path
- || apr_fnmatch_test(to));
- if (desc_entry->wildcards) {
- prefix = desc_entry->full_path ? "*/" : "*";
- desc_entry->pattern = apr_pstrcat(dcfg->desc_list->pool,
- prefix, to, "*", NULL);
- }
- else {
- desc_entry->pattern = apr_pstrdup(dcfg->desc_list->pool, to);
- }
- desc_entry->description = apr_pstrdup(dcfg->desc_list->pool, desc);
- return NULL;
-}
-
-
-static void push_item(apr_array_header_t *arr, char *type, const char *to,
- const char *path, const char *data)
-{
- struct mod_glfs_ai_item *p = NULL;
-
- p = (struct mod_glfs_ai_item *) apr_array_push(arr);
-
- if (!to) {
- to = "";
- }
- if (!path) {
- path = "";
- }
-
- p->type = type;
- p->data = data ? apr_pstrdup(arr->pool, data) : NULL;
- p->apply_path = apr_pstrcat(arr->pool, path, "*", NULL);
-
- if ((type == BY_PATH) && (!ap_is_matchexp(to))) {
- p->apply_to = apr_pstrcat(arr->pool, "*", to, NULL);
- }
- else if (to) {
- p->apply_to = apr_pstrdup(arr->pool, to);
- }
- else {
- p->apply_to = NULL;
- }
-}
-
-
-static const char *
-cmd_add_ignore (cmd_parms *cmd, void *d, const char *ext)
-{
- push_item(((glusterfs_dir_config_t *) d)->ign_list, 0, ext, cmd->path,
- NULL);
- return NULL;
-}
-
-
-static const char *
-cmd_add_header (cmd_parms *cmd, void *d, const char *name)
-{
- push_item(((glusterfs_dir_config_t *) d)->hdr_list, 0, NULL, cmd->path,
- name);
- return NULL;
-}
-
-
-static const char *
-cmd_add_readme (cmd_parms *cmd, void *d, const char *name)
-{
- push_item(((glusterfs_dir_config_t *) d)->rdme_list, 0, NULL, cmd->path,
- name);
- return NULL;
-}
-
-
-static const char *
-cmd_add_opts (cmd_parms *cmd, void *d, int argc, char *const argv[])
-{
- int i = 0, option = 0;
- char *w = NULL;
- apr_int32_t opts;
- apr_int32_t opts_add;
- apr_int32_t opts_remove;
- char action = 0;
- glusterfs_dir_config_t *d_cfg = (glusterfs_dir_config_t *) d;
-
- opts = d_cfg->opts;
- opts_add = d_cfg->incremented_opts;
- opts_remove = d_cfg->decremented_opts;
-
- for (i = 0; i < argc; i++) {
- w = argv[i];
-
- if ((*w == '+') || (*w == '-')) {
- action = *(w++);
- }
- else {
- action = '\0';
- }
- if (!strcasecmp(w, "FancyIndexing")) {
- option = FANCY_INDEXING;
- }
- else if (!strcasecmp(w, "FoldersFirst")) {
- option = FOLDERS_FIRST;
- }
- else if (!strcasecmp(w, "HTMLTable")) {
- option = TABLE_INDEXING;
- }
- else if (!strcasecmp(w, "IconsAreLinks")) {
- option = ICONS_ARE_LINKS;
- }
- else if (!strcasecmp(w, "IgnoreCase")) {
- option = IGNORE_CASE;
- }
- else if (!strcasecmp(w, "IgnoreClient")) {
- option = IGNORE_CLIENT;
- }
- else if (!strcasecmp(w, "ScanHTMLTitles")) {
- option = SCAN_HTML_TITLES;
- }
- else if (!strcasecmp(w, "SuppressColumnSorting")) {
- option = SUPPRESS_COLSORT;
- }
- else if (!strcasecmp(w, "SuppressDescription")) {
- option = SUPPRESS_DESC;
- }
- else if (!strcasecmp(w, "SuppressHTMLPreamble")) {
- option = SUPPRESS_PREAMBLE;
- }
- else if (!strcasecmp(w, "SuppressIcon")) {
- option = SUPPRESS_ICON;
- }
- else if (!strcasecmp(w, "SuppressLastModified")) {
- option = SUPPRESS_LAST_MOD;
- }
- else if (!strcasecmp(w, "SuppressSize")) {
- option = SUPPRESS_SIZE;
- }
- else if (!strcasecmp(w, "SuppressRules")) {
- option = SUPPRESS_RULES;
- }
- else if (!strcasecmp(w, "TrackModified")) {
- option = TRACK_MODIFIED;
- }
- else if (!strcasecmp(w, "VersionSort")) {
- option = VERSION_SORT;
- }
- else if (!strcasecmp(w, "XHTML")) {
- option = EMIT_XHTML;
- }
- else if (!strcasecmp(w, "ShowForbidden")) {
- option = SHOW_FORBIDDEN;
- }
- else if (!strcasecmp(w, "None")) {
- if (action != '\0') {
- return "Cannot combine '+' or '-' with 'None' "
- "keyword";
- }
- opts = NO_OPTIONS;
- opts_add = 0;
- opts_remove = 0;
- }
- else if (!strcasecmp(w, "IconWidth")) {
- if (action != '-') {
- d_cfg->icon_width = DEFAULT_ICON_WIDTH;
- }
- else {
- d_cfg->icon_width = 0;
- }
- }
- else if (!strncasecmp(w, "IconWidth=", 10)) {
- if (action == '-') {
- return "Cannot combine '-' with IconWidth=n";
- }
- d_cfg->icon_width = atoi(&w[10]);
- }
- else if (!strcasecmp(w, "IconHeight")) {
- if (action != '-') {
- d_cfg->icon_height = DEFAULT_ICON_HEIGHT;
- }
- else {
- d_cfg->icon_height = 0;
- }
- }
- else if (!strncasecmp(w, "IconHeight=", 11)) {
- if (action == '-') {
- return "Cannot combine '-' with IconHeight=n";
- }
- d_cfg->icon_height = atoi(&w[11]);
- }
- else if (!strcasecmp(w, "NameWidth")) {
- if (action != '-') {
- return "NameWidth with no value may only appear"
- " as "
- "'-NameWidth'";
- }
- d_cfg->name_width = DEFAULT_NAME_WIDTH;
- d_cfg->name_adjust = K_NOADJUST;
- }
- else if (!strncasecmp(w, "NameWidth=", 10)) {
- if (action == '-') {
- return "Cannot combine '-' with NameWidth=n";
- }
- if (w[10] == '*') {
- d_cfg->name_adjust = K_ADJUST;
- }
- else {
- int width = atoi(&w[10]);
-
- if (width && (width < 5)) {
- return "NameWidth value must be greater"
- " than 5";
- }
- d_cfg->name_width = width;
- d_cfg->name_adjust = K_NOADJUST;
- }
- }
- else if (!strcasecmp(w, "DescriptionWidth")) {
- if (action != '-') {
- return "DescriptionWidth with no value may only"
- " appear as "
- "'-DescriptionWidth'";
- }
- d_cfg->desc_width = DEFAULT_DESC_WIDTH;
- d_cfg->desc_adjust = K_NOADJUST;
- }
- else if (!strncasecmp(w, "DescriptionWidth=", 17)) {
- if (action == '-') {
- return "Cannot combine '-' with "
- "DescriptionWidth=n";
- }
- if (w[17] == '*') {
- d_cfg->desc_adjust = K_ADJUST;
- }
- else {
- int width = atoi(&w[17]);
-
- if (width && (width < 12)) {
- return "DescriptionWidth value must be "
- "greater than 12";
- }
- d_cfg->desc_width = width;
- d_cfg->desc_adjust = K_NOADJUST;
- }
- }
- else if (!strncasecmp(w, "Type=", 5)) {
- d_cfg->ctype = apr_pstrdup(cmd->pool, &w[5]);
- }
- else if (!strncasecmp(w, "Charset=", 8)) {
- d_cfg->charset = apr_pstrdup(cmd->pool, &w[8]);
- }
- else {
- return "Invalid directory indexing option";
- }
- if (action == '\0') {
- opts |= option;
- opts_add = 0;
- opts_remove = 0;
- }
- else if (action == '+') {
- opts_add |= option;
- opts_remove &= ~option;
- }
- else {
- opts_remove |= option;
- opts_add &= ~option;
- }
- }
- if ((opts & NO_OPTIONS) && (opts & ~NO_OPTIONS)) {
- return "Cannot combine other IndexOptions keywords with 'None'";
- }
- d_cfg->incremented_opts = opts_add;
- d_cfg->decremented_opts = opts_remove;
- d_cfg->opts = opts;
- return NULL;
-}
-
-
-static const char *
-cmd_set_default_order(cmd_parms *cmd, void *m,
- const char *direction, const char *key)
-{
- glusterfs_dir_config_t *d_cfg = (glusterfs_dir_config_t *) m;
-
- if (!strcasecmp(direction, "Ascending")) {
- d_cfg->default_direction = D_ASCENDING;
- }
- else if (!strcasecmp(direction, "Descending")) {
- d_cfg->default_direction = D_DESCENDING;
- }
- else {
- return "First keyword must be 'Ascending' or 'Descending'";
- }
-
- if (!strcasecmp(key, "Name")) {
- d_cfg->default_keyid = K_NAME;
- }
- else if (!strcasecmp(key, "Date")) {
- d_cfg->default_keyid = K_LAST_MOD;
- }
- else if (!strcasecmp(key, "Size")) {
- d_cfg->default_keyid = K_SIZE;
- }
- else if (!strcasecmp(key, "Description")) {
- d_cfg->default_keyid = K_DESC;
- }
- else {
- return "Second keyword must be 'Name', 'Date', 'Size', or "
- "'Description'";
- }
-
- return NULL;
-}
-
-
-static char c_by_encoding, c_by_type, c_by_path;
-
-#define BY_ENCODING &c_by_encoding
-#define BY_TYPE &c_by_type
-#define BY_PATH &c_by_path
-
-/*
- * This routine puts the standard HTML header at the top of the index page.
- * We include the DOCTYPE because we may be using features therefrom (i.e.,
- * HEIGHT and WIDTH attributes on the icons if we're FancyIndexing).
- */
-static void emit_preamble(request_rec *r, int xhtml, const char *title)
-{
- glusterfs_dir_config_t *d;
-
- d = (glusterfs_dir_config_t *) ap_get_module_config(r->per_dir_config,
- &glusterfs_module);
-
- if (xhtml) {
- ap_rvputs(r, DOCTYPE_XHTML_1_0T,
- "<html xmlns=\"http://www.w3.org/1999/xhtml\">\n"
- " <head>\n <title>Index of ", title,
- "</title>\n", NULL);
- } else {
- ap_rvputs(r, DOCTYPE_HTML_3_2,
- "<html>\n <head>\n"
- " <title>Index of ", title,
- "</title>\n", NULL);
- }
-
- if (d->style_sheet != NULL) {
- ap_rvputs(r, " <link rel=\"stylesheet\" href=\"",
- d->style_sheet, "\" type=\"text/css\"",
- xhtml ? " />\n" : ">\n", NULL);
- }
- ap_rvputs(r, " </head>\n <body>\n", NULL);
-}
-
-
-static const char *cmd_add_alt(cmd_parms *cmd, void *d, const char *alt,
- const char *to)
-{
- if (cmd->info == BY_PATH) {
- if (!strcmp(to, "**DIRECTORY**")) {
- to = "^^DIRECTORY^^";
- }
- }
- if (cmd->info == BY_ENCODING) {
- char *tmp = apr_pstrdup(cmd->pool, to);
- ap_str_tolower(tmp);
- to = tmp;
- }
-
- push_item(((glusterfs_dir_config_t *) d)->alt_list, cmd->info, to,
- cmd->path, alt);
- return NULL;
-}
-
-static const char *cmd_add_icon(cmd_parms *cmd, void *d, const char *icon,
- const char *to)
-{
- char *iconbak = apr_pstrdup(cmd->pool, icon);
- char *alt = NULL, *cl = NULL, *tmp = NULL;
-
- if (icon[0] == '(') {
- cl = strchr(iconbak, ')');
-
- if (cl == NULL) {
- return "missing closing paren";
- }
- alt = ap_getword_nc(cmd->pool, &iconbak, ',');
- *cl = '\0'; /* Lose closing paren */
- cmd_add_alt(cmd, d, &alt[1], to);
- }
- if (cmd->info == BY_PATH) {
- if (!strcmp(to, "**DIRECTORY**")) {
- to = "^^DIRECTORY^^";
- }
- }
- if (cmd->info == BY_ENCODING) {
- tmp = apr_pstrdup(cmd->pool, to);
- ap_str_tolower(tmp);
- to = tmp;
- }
-
- push_item(((glusterfs_dir_config_t *) d)->icon_list, cmd->info, to,
- cmd->path, iconbak);
- return NULL;
-}
-
-
-static void *
-mod_glfs_create_dir_config(apr_pool_t *p, char *dirspec)
-{
- glusterfs_dir_config_t *dir_config = NULL;
-
- dir_config = (glusterfs_dir_config_t *) apr_pcalloc(p,
- sizeof(*dir_config));
-
- dir_config->mount_dir = dirspec;
- dir_config->logfile = dir_config->specfile = (char *)0;
- dir_config->loglevel = "warning";
- dir_config->cache_timeout = 0;
- dir_config->buf = NULL;
-
- /* mod_dir options init */
- dir_config->index_names = NULL;
- dir_config->do_slash = SLASH_UNSET;
-
- /* autoindex options init */
- dir_config->icon_width = 0;
- dir_config->icon_height = 0;
- dir_config->name_width = DEFAULT_NAME_WIDTH;
- dir_config->name_adjust = K_UNSET;
- dir_config->desc_width = DEFAULT_DESC_WIDTH;
- dir_config->desc_adjust = K_UNSET;
- dir_config->icon_list = apr_array_make(p, 4,
- sizeof(struct mod_glfs_ai_item));
- dir_config->alt_list = apr_array_make(p, 4,
- sizeof(struct mod_glfs_ai_item));
- dir_config->desc_list = apr_array_make(p, 4,
- sizeof(mod_glfs_ai_desc_t));
- dir_config->ign_list = apr_array_make(p, 4,
- sizeof(struct mod_glfs_ai_item));
- dir_config->hdr_list = apr_array_make(p, 4,
- sizeof(struct mod_glfs_ai_item));
- dir_config->rdme_list = apr_array_make(p, 4,
- sizeof(struct mod_glfs_ai_item));
- dir_config->opts = 0;
- dir_config->incremented_opts = 0;
- dir_config->decremented_opts = 0;
- dir_config->default_keyid = '\0';
- dir_config->default_direction = '\0';
-
- return (void *) dir_config;
-}
-
-
-static void *
-mod_glfs_merge_dir_config(apr_pool_t *p, void *parent_conf,
- void *newloc_conf)
-{
- glusterfs_dir_config_t *new = NULL;
- glusterfs_dir_config_t *add = NULL;
- glusterfs_dir_config_t *base = NULL;
-
- new = (glusterfs_dir_config_t *)
- apr_pcalloc(p, sizeof(glusterfs_dir_config_t));
- add = newloc_conf;
- base = parent_conf;
-
- if (add->logfile)
- new->logfile = apr_pstrdup (p, add->logfile);
-
- if (add->loglevel)
- new->loglevel = apr_pstrdup (p, add->loglevel);
-
- if (add->specfile)
- new->specfile = apr_pstrdup (p, add->specfile);
-
- if (add->mount_dir)
- new->mount_dir = apr_pstrdup (p, add->mount_dir);
-
- new->xattr_file_size = add->xattr_file_size;
- new->cache_timeout = add->cache_timeout;
- new->buf = add->buf;
-
- /* mod_dir */
- new->index_names = add->index_names ?
- add->index_names : base->index_names;
- new->do_slash =
- (add->do_slash == SLASH_UNSET) ? base->do_slash : add->do_slash;
-
- /* auto index */
- new->default_icon = add->default_icon ? add->default_icon
- : base->default_icon;
- new->style_sheet = add->style_sheet ? add->style_sheet
- : base->style_sheet;
- new->icon_height = add->icon_height ?
- add->icon_height : base->icon_height;
- new->icon_width = add->icon_width ? add->icon_width : base->icon_width;
-
- new->ctype = add->ctype ? add->ctype : base->ctype;
- new->charset = add->charset ? add->charset : base->charset;
-
- new->alt_list = apr_array_append(p, add->alt_list, base->alt_list);
- new->ign_list = apr_array_append(p, add->ign_list, base->ign_list);
- new->hdr_list = apr_array_append(p, add->hdr_list, base->hdr_list);
- new->desc_list = apr_array_append(p, add->desc_list, base->desc_list);
- new->icon_list = apr_array_append(p, add->icon_list, base->icon_list);
- new->rdme_list = apr_array_append(p, add->rdme_list, base->rdme_list);
- if (add->opts & NO_OPTIONS) {
- /*
- * If the current directory says 'no options' then we also
- * clear any incremental mods from being inheritable further down.
- */
- new->opts = NO_OPTIONS;
- new->incremented_opts = 0;
- new->decremented_opts = 0;
- }
- else {
- /*
- * If there were any nonincremental options selected for
- * this directory, they dominate and we don't inherit *anything.*
- * Contrariwise, we *do* inherit if the only settings here are
- * incremental ones.
- */
- if (add->opts == 0) {
- new->incremented_opts = (base->incremented_opts
- | add->incremented_opts)
- & ~add->decremented_opts;
- new->decremented_opts = (base->decremented_opts
- | add->decremented_opts);
- /*
- * We may have incremental settings, so make sure we
- * don't inadvertently inherit an IndexOptions None
- * from above.
- */
- new->opts = (base->opts & ~NO_OPTIONS);
- }
- else {
- /*
- * There are local nonincremental settings, which clear
- * all inheritance from above. They *are* the new
- * base settings.
- */
- new->opts = add->opts;;
- }
- /*
- * We're guaranteed that there'll be no overlap between
- * the add-options and the remove-options.
- */
- new->opts |= new->incremented_opts;
- new->opts &= ~new->decremented_opts;
- }
- /*
- * Inherit the NameWidth settings if there aren't any specific to
- * the new location; otherwise we'll end up using the defaults set
- * in the config-rec creation routine.
- */
- if (add->name_adjust == K_UNSET) {
- new->name_width = base->name_width;
- new->name_adjust = base->name_adjust;
- }
- else {
- new->name_width = add->name_width;
- new->name_adjust = add->name_adjust;
- }
-
- /*
- * Likewise for DescriptionWidth.
- */
- if (add->desc_adjust == K_UNSET) {
- new->desc_width = base->desc_width;
- new->desc_adjust = base->desc_adjust;
- }
- else {
- new->desc_width = add->desc_width;
- new->desc_adjust = add->desc_adjust;
- }
-
- new->default_keyid = add->default_keyid ? add->default_keyid
- : base->default_keyid;
- new->default_direction = add->default_direction ? add->default_direction
- : base->default_direction;
-
- return (void *) new;
-}
-
-
-static void
-mod_glfs_child_init(apr_pool_t *p, server_rec *s)
-{
- int i = 0, num_sec = 0, ret = 0;
- core_server_config *sconf = NULL;
- ap_conf_vector_t **sec_ent = NULL;
- glusterfs_dir_config_t *dir_config = NULL;
- glusterfs_init_params_t ctx = {0, };
-
- sconf = (core_server_config *) ap_get_module_config (s->module_config,
- &core_module);
- sec_ent = (ap_conf_vector_t **) sconf->sec_url->elts;
- num_sec = sconf->sec_url->nelts;
-
- for (i = 0; i < num_sec; i++) {
- dir_config = ap_get_module_config (sec_ent[i],
- &glusterfs_module);
-
- if (dir_config) {
- memset (&ctx, 0, sizeof (ctx));
-
- ctx.logfile = dir_config->logfile;
- ctx.loglevel = dir_config->loglevel;
- ctx.lookup_timeout = dir_config->cache_timeout;
- ctx.stat_timeout = dir_config->cache_timeout;
- ctx.specfile = dir_config->specfile;
-
- ret = glusterfs_mount (dir_config->mount_dir, &ctx);
- if (ret != 0) {
- ap_log_error(APLOG_MARK, APLOG_ERR,
- APR_EGENERAL, s,
- "mod_glfs_child_init: "
- "glusterfs_init failed, check "
- "glusterfs logfile %s for more "
- "details",
- dir_config->logfile);
- }
- }
- dir_config = NULL;
- }
-}
-
-
-static void
-mod_glfs_child_exit(server_rec *s, apr_pool_t *p)
-{
- int i = 0, num_sec = 0;
- core_server_config *sconf = NULL;
- ap_conf_vector_t **sec_ent = NULL;
- glusterfs_dir_config_t *dir_config = NULL;
- glusterfs_init_params_t ctx = {0, };
-
- sconf = ap_get_module_config(s->module_config,
- &core_module);
- sec_ent = (ap_conf_vector_t **) sconf->sec_url->elts;
- num_sec = sconf->sec_url->nelts;
- for (i = 0; i < num_sec; i++) {
- dir_config = ap_get_module_config (sec_ent[i],
- &glusterfs_module);
- if (dir_config) {
- glusterfs_umount (dir_config->mount_dir);
- }
- }
-}
-
-static apr_filetype_e filetype_from_mode(mode_t mode)
-{
- apr_filetype_e type = APR_NOFILE;
-
- if (S_ISREG(mode))
- type = APR_REG;
- else if (S_ISDIR(mode))
- type = APR_DIR;
- else if (S_ISCHR(mode))
- type = APR_CHR;
- else if (S_ISBLK(mode))
- type = APR_BLK;
- else if (S_ISFIFO(mode))
- type = APR_PIPE;
- else if (S_ISLNK(mode))
- type = APR_LNK;
- else if (S_ISSOCK(mode))
- type = APR_SOCK;
- else
- type = APR_UNKFILE;
- return type;
-}
-
-
-static void fill_out_finfo(apr_finfo_t *finfo, struct stat *info,
- apr_int32_t wanted)
-{
- finfo->valid = APR_FINFO_MIN | APR_FINFO_IDENT | APR_FINFO_NLINK
- | APR_FINFO_OWNER | APR_FINFO_PROT;
- finfo->protection = apr_unix_mode2perms(info->st_mode);
- finfo->filetype = filetype_from_mode(info->st_mode);
- finfo->user = info->st_uid;
- finfo->group = info->st_gid;
- finfo->size = info->st_size;
- finfo->device = info->st_dev;
- finfo->nlink = info->st_nlink;
-
- /* Check for overflow if storing a 64-bit st_ino in a 32-bit
- * apr_ino_t for LFS builds: */
- if (sizeof(apr_ino_t) >= sizeof(info->st_ino)
- || (apr_ino_t)info->st_ino == info->st_ino) {
- finfo->inode = info->st_ino;
- } else {
- finfo->valid &= ~APR_FINFO_INODE;
- }
-
- apr_time_ansi_put(&finfo->atime, info->st_atime);
-#ifdef HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC
- finfo->atime += info->st_atim.tv_nsec / APR_TIME_C(1000);
-#elif defined(HAVE_STRUCT_STAT_ST_ATIMENSEC)
- finfo->atime += info->st_atimensec / APR_TIME_C(1000);
-#elif defined(HAVE_STRUCT_STAT_ST_ATIME_N)
- finfo->ctime += info->st_atime_n / APR_TIME_C(1000);
-#endif
-
- apr_time_ansi_put(&finfo->mtime, info->st_mtime);
-#ifdef HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC
- finfo->mtime += info->st_mtim.tv_nsec / APR_TIME_C(1000);
-#elif defined(HAVE_STRUCT_STAT_ST_MTIMENSEC)
- finfo->mtime += info->st_mtimensec / APR_TIME_C(1000);
-#elif defined(HAVE_STRUCT_STAT_ST_MTIME_N)
- finfo->ctime += info->st_mtime_n / APR_TIME_C(1000);
-#endif
-
- apr_time_ansi_put(&finfo->ctime, info->st_ctime);
-#ifdef HAVE_STRUCT_STAT_ST_CTIM_TV_NSEC
- finfo->ctime += info->st_ctim.tv_nsec / APR_TIME_C(1000);
-#elif defined(HAVE_STRUCT_STAT_ST_CTIMENSEC)
- finfo->ctime += info->st_ctimensec / APR_TIME_C(1000);
-#elif defined(HAVE_STRUCT_STAT_ST_CTIME_N)
- finfo->ctime += info->st_ctime_n / APR_TIME_C(1000);
-#endif
-
-#ifdef HAVE_STRUCT_STAT_ST_BLOCKS
-#ifdef DEV_BSIZE
- finfo->csize = (apr_off_t)info->st_blocks * (apr_off_t)DEV_BSIZE;
-#else
- finfo->csize = (apr_off_t)info->st_blocks * (apr_off_t)512;
-#endif
- finfo->valid |= APR_FINFO_CSIZE;
-#endif
-}
-
-
-static int
-mod_glfs_map_to_storage(request_rec *r)
-{
- glusterfs_dir_config_t *dir_config = NULL, *tmp = NULL;
- int access_status = 0, ret = 0;
- char *path = NULL;
- struct stat st = {0, };
- core_server_config *sconf = NULL;
- ap_conf_vector_t **sec_ent = NULL;
- int num_sec = 0, i = 0;
-
- sconf = (core_server_config *) ap_get_module_config (r->server->module_config,
- &core_module);
- sec_ent = (ap_conf_vector_t **) sconf->sec_url->elts;
- num_sec = sconf->sec_url->nelts;
-
- for (i = 0; i < num_sec; i++) {
- tmp = ap_get_module_config (sec_ent[i], &glusterfs_module);
-
- if (tmp && !strncmp (tmp->mount_dir, r->uri,
- strlen (tmp->mount_dir))) {
- if (!dir_config ||
- strlen (tmp->mount_dir)
- > strlen (dir_config->mount_dir)) {
- dir_config = tmp;
- }
- }
-
- }
-
- if (dir_config && dir_config->mount_dir
- && !(strncmp (apr_pstrcat (r->pool, dir_config->mount_dir, "/",
- NULL), r->uri,
- strlen (dir_config->mount_dir) + 1)
- && !r->handler))
- r->handler = GLUSTERFS_HANDLER;
-
- if (!r->handler || (r->handler && strcmp (r->handler,
- GLUSTERFS_HANDLER)))
- return DECLINED;
-
- path = r->uri;
-
- memset (&r->finfo, 0, sizeof (r->finfo));
-
- dir_config->buf = calloc (1, dir_config->xattr_file_size);
- if (!dir_config->buf) {
- return HTTP_INTERNAL_SERVER_ERROR;
- }
-
- ret = glusterfs_get (path, dir_config->buf,
- dir_config->xattr_file_size, &st);
-
- if (ret == -1 || st.st_size > dir_config->xattr_file_size
- || S_ISDIR (st.st_mode)) {
- free (dir_config->buf);
- dir_config->buf = NULL;
-
- if (ret == -1) {
- int error = HTTP_NOT_FOUND;
- char *emsg = NULL;
- if (r->path_info == NULL) {
- emsg = apr_pstrcat(r->pool, strerror (errno),
- r->filename, NULL);
- }
- else {
- emsg = apr_pstrcat(r->pool, strerror (errno),
- r->filename, r->path_info,
- NULL);
- }
- ap_log_rerror(APLOG_MARK, APLOG_ERR|APLOG_NOERRNO, 0,
- r, "%s", emsg);
- if (errno != ENOENT) {
- error = HTTP_INTERNAL_SERVER_ERROR;
- }
- return error;
- }
- }
-
- r->finfo.pool = r->pool;
- r->finfo.fname = r->filename;
- fill_out_finfo (&r->finfo, &st,
- APR_FINFO_MIN | APR_FINFO_IDENT | APR_FINFO_NLINK |
- APR_FINFO_OWNER | APR_FINFO_PROT);
-
- /* allow core module to run directory_walk() and location_walk() */
- return DECLINED;
-}
-
-
-static int
-mod_glfs_readv_async_cbk (int32_t op_ret, int32_t op_errno,
- glusterfs_iobuf_t *buf, void *cbk_data)
-{
- glusterfs_async_local_t *local = cbk_data;
-
- pthread_mutex_lock (&local->lock);
- {
- local->async_read_complete = 1;
- local->buf = buf;
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- pthread_cond_signal (&local->cond);
- }
- pthread_mutex_unlock (&local->lock);
-
- return 0;
-}
-
-/* use read_async just to avoid memcpy of read buffer in libglusterfsclient */
-static int
-mod_glfs_read_async (request_rec *r, apr_bucket_brigade *bb,
- glusterfs_file_t fd,
- apr_off_t offset, apr_off_t length)
-{
- glusterfs_async_local_t local = {0, };
- off_t end = 0;
- int nbytes = 0, complete = 0;
- conn_rec *c = r->connection;
- apr_bucket *e = NULL;
- apr_status_t status = APR_SUCCESS;
- glusterfs_iobuf_t *buf = NULL;
-
- if (length == 0) {
- return 0;
- }
-
- pthread_cond_init (&local.cond, NULL);
- pthread_mutex_init (&local.lock, NULL);
-
- memset (&local, 0, sizeof (local));
- local.request = r;
-
- if (length > 0)
- end = offset + length;
-
- do {
- if (length > 0) {
- nbytes = end - offset;
- if (nbytes > GLUSTERFS_CHUNK_SIZE)
- nbytes = GLUSTERFS_CHUNK_SIZE;
- } else
- nbytes = GLUSTERFS_CHUNK_SIZE;
-
- glusterfs_read_async(fd,
- nbytes,
- offset,
- mod_glfs_readv_async_cbk,
- (void *)&local);
-
- pthread_mutex_lock (&local.lock);
- {
- while (!local.async_read_complete) {
- pthread_cond_wait (&local.cond, &local.lock);
- }
-
- local.async_read_complete = 0;
- buf = local.buf;
-
- if (length < 0)
- complete = (local.op_ret <= 0);
- else {
- local.read_bytes += local.op_ret;
- complete = ((local.read_bytes == length) ||
- (local.op_ret < 0));
- }
- }
- pthread_mutex_unlock (&local.lock);
-
- if (!bb) {
- bb = apr_brigade_create (r->pool, c->bucket_alloc);
- }
- apr_brigade_writev (bb, NULL, NULL, buf->vector, buf->count);
-
- /*
- * make sure all the data is written out, since we call
- * glusterfs_free on buf once ap_pass_brigade returns
- */
- e = apr_bucket_flush_create (c->bucket_alloc);
- APR_BRIGADE_INSERT_TAIL (bb, e);
-
- status = ap_pass_brigade (r->output_filters, bb);
- if (status != APR_SUCCESS) {
- /* no way to know what type of error occurred */
- ap_log_rerror(APLOG_MARK, APLOG_DEBUG, status, r,
- "mod_glfs_handler: ap_pass_brigade "
- "returned %i",
- status);
- complete = 1;
- local.op_ret = -1;
- }
-
- glusterfs_free (buf);
-
- /*
- * bb has already been cleaned up by core_output_filter,
- * just being paranoid
- */
- apr_brigade_cleanup (bb);
-
- offset += nbytes;
- } while (!complete);
-
- return (local.op_ret < 0 ? HTTP_INTERNAL_SERVER_ERROR : OK);
-}
-
-static int
-parse_byterange(char *range, apr_off_t clength,
- apr_off_t *start, apr_off_t *end)
-{
- char *dash = NULL, *errp = NULL;
- apr_off_t number;
-
- dash = strchr(range, '-');
- if (!dash) {
- return 0;
- }
-
- if ((dash == range)) {
- /* In the form "-5" */
- if (apr_strtoff(&number, dash+1, &errp, 10) || *errp) {
- return 0;
- }
- *start = clength - number;
- *end = clength - 1;
- }
- else {
- *dash++ = '\0';
- if (apr_strtoff(&number, range, &errp, 10) || *errp) {
- return 0;
- }
- *start = number;
- if (*dash) {
- if (apr_strtoff(&number, dash, &errp, 10) || *errp) {
- return 0;
- }
- *end = number;
- }
- else { /* "5-" */
- *end = clength - 1;
- }
- }
-
- if (*start < 0) {
- *start = 0;
- }
-
- if (*end >= clength) {
- *end = clength - 1;
- }
-
- if (*start > *end) {
- return -1;
- }
-
- return (*start > 0 || *end < clength);
-}
-
-
-static int use_range_x(request_rec *r)
-{
- const char *ua = NULL;
- return (apr_table_get(r->headers_in, "Request-Range")
- || ((ua = apr_table_get(r->headers_in, "User-Agent"))
- && ap_strstr_c(ua, "MSIE 3")));
-}
-
-
-static int ap_set_byterange(request_rec *r)
-{
- const char *range = NULL, *if_range = NULL, *match = NULL, *ct = NULL;
- int num_ranges = 0;
-
- if (r->assbackwards) {
- return 0;
- }
-
- /* Check for Range request-header (HTTP/1.1) or Request-Range for
- * backwards-compatibility with second-draft Luotonen/Franks
- * byte-ranges (e.g. Netscape Navigator 2-3).
- *
- * We support this form, with Request-Range, and (farther down) we
- * send multipart/x-byteranges instead of multipart/byteranges for
- * Request-Range based requests to work around a bug in Netscape
- * Navigator 2-3 and MSIE 3.
- */
-
- if (!(range = apr_table_get(r->headers_in, "Range"))) {
- range = apr_table_get(r->headers_in, "Request-Range");
- }
-
- if (!range || strncasecmp(range, "bytes=", 6) || r->status != HTTP_OK) {
- return 0;
- }
-
- /* is content already a single range? */
- if (apr_table_get(r->headers_out, "Content-Range")) {
- return 0;
- }
-
- /* is content already a multiple range? */
- if ((ct = apr_table_get(r->headers_out, "Content-Type"))
- && (!strncasecmp(ct, "multipart/byteranges", 20)
- || !strncasecmp(ct, "multipart/x-byteranges", 22))) {
- return 0;
- }
-
- /* Check the If-Range header for Etag or Date.
- * Note that this check will return false (as required) if either
- * of the two etags are weak.
- */
- if ((if_range = apr_table_get(r->headers_in, "If-Range"))) {
- if (if_range[0] == '"') {
- if (!(match = apr_table_get(r->headers_out, "Etag"))
- || (strcmp(if_range, match) != 0)) {
- return 0;
- }
- }
- else if (!(match = apr_table_get(r->headers_out,
- "Last-Modified"))
- || (strcmp(if_range, match) != 0)) {
- return 0;
- }
- }
-
- if (!ap_strchr_c(range, ',')) {
- /* a single range */
- num_ranges = 1;
- }
- else {
- /* a multiple range */
- num_ranges = 2;
- }
-
- r->status = HTTP_PARTIAL_CONTENT;
- r->range = range + 6;
-
- return num_ranges;
-}
-
-
-static void
-mod_glfs_handle_byte_ranges (request_rec *r, glusterfs_file_t fd,
- int num_ranges)
-{
- conn_rec *c = r->connection;
- char *ts = NULL, *boundary = NULL, *bound_head = NULL;
- const char *orig_ct = NULL;
- char *current = NULL, *end = NULL;
- apr_bucket_brigade *bsend = NULL;
- apr_bucket *e = NULL;
- apr_off_t range_start, range_end;
- apr_status_t rv = APR_SUCCESS;
- char found = 0;
- apr_bucket *e2 = NULL, *ec = NULL;
-
- orig_ct = ap_make_content_type (r, r->content_type);
-
- if (num_ranges > 1) {
- boundary = apr_psprintf(r->pool, "%" APR_UINT64_T_HEX_FMT "%lx",
- (apr_uint64_t)r->request_time,
- (long) getpid());
-
- ap_set_content_type(r, apr_pstrcat(r->pool, "multipart",
- use_range_x(r) ? "/x-" : "/",
- "byteranges; boundary=",
- boundary, NULL));
-
- if (strcasecmp(orig_ct, NO_CONTENT_TYPE)) {
- bound_head = apr_pstrcat(r->pool,
- CRLF "--", boundary,
- CRLF "Content-type: ",
- orig_ct,
- CRLF "Content-range: bytes ",
- NULL);
- }
- else {
- /* if we have no type for the content, do our best */
- bound_head = apr_pstrcat(r->pool,
- CRLF "--", boundary,
- CRLF "Content-range: bytes ",
- NULL);
- }
- }
-
- while ((current = ap_getword(r->pool, &r->range, ','))
- && (rv = parse_byterange(current, r->finfo.size, &range_start,
- &range_end))) {
- bsend = NULL;
- if (rv == -1) {
- continue;
- }
-
- found = 1;
-
- /* For single range requests, we must produce Content-Range
- * header. Otherwise, we need to produce the multipart
- * boundaries.
- */
- if (num_ranges == 1) {
- apr_table_setn(r->headers_out, "Content-Range",
- apr_psprintf(r->pool,
- "bytes " BYTERANGE_FMT,
- range_start, range_end,
- r->finfo.size));
- }
- else {
- /* this brigade holds what we will be sending */
- bsend = apr_brigade_create(r->pool, c->bucket_alloc);
-
- e = apr_bucket_pool_create(bound_head,
- strlen(bound_head),
- r->pool, c->bucket_alloc);
- APR_BRIGADE_INSERT_TAIL(bsend, e);
-
- ts = apr_psprintf(r->pool, BYTERANGE_FMT CRLF CRLF,
- range_start, range_end,
- r->finfo.size);
- e = apr_bucket_pool_create(ts, strlen(ts), r->pool,
- c->bucket_alloc);
- APR_BRIGADE_INSERT_TAIL(bsend, e);
- }
- mod_glfs_read_async (r, bsend, fd, range_start,
- (range_end + 1 - range_start));
- }
-
- bsend = apr_brigade_create (r->pool, c->bucket_alloc);
-
- if (found == 0) {
- r->status = HTTP_OK;
- /* bsend is assumed to be empty if we get here. */
- e = ap_bucket_error_create(HTTP_RANGE_NOT_SATISFIABLE, NULL,
- r->pool, c->bucket_alloc);
- APR_BRIGADE_INSERT_TAIL(bsend, e);
- e = apr_bucket_eos_create(c->bucket_alloc);
- APR_BRIGADE_INSERT_TAIL(bsend, e);
- ap_pass_brigade (r->output_filters, bsend);
- return;
- }
-
- if (num_ranges > 1) {
- /* add the final boundary */
- end = apr_pstrcat(r->pool, CRLF "--", boundary, "--" CRLF,
- NULL);
-// ap_xlate_proto_to_ascii(end, strlen(end));
- e = apr_bucket_pool_create(end, strlen(end), r->pool,
- c->bucket_alloc);
- APR_BRIGADE_INSERT_TAIL(bsend, e);
- }
-
- ap_pass_brigade (r->output_filters, bsend);
-}
-
-
-
-/****************************************************************
- *
- * Looking things up in config entries...
- */
-
-/* Structure used to hold entries when we're actually building an index */
-
-struct ent {
- char *name;
- char *icon;
- char *alt;
- char *desc;
- apr_off_t size;
- apr_time_t lm;
- struct ent *next;
- int ascending, ignore_case, version_sort;
- char key;
- int isdir;
-};
-
-static char *find_item(request_rec *r, apr_array_header_t *list, int path_only)
-{
- const char *content_type = NULL;
- const char *content_encoding = NULL;
- char *path = NULL;
- int i = 0;
- struct mod_glfs_ai_item *items = NULL;
- struct mod_glfs_ai_item *p = NULL;
-
- content_type = ap_field_noparam(r->pool, r->content_type);
- content_encoding = r->content_encoding;
- path = r->filename;
- items = (struct mod_glfs_ai_item *) list->elts;
-
- for (i = 0; i < list->nelts; ++i) {
- p = &items[i];
- /* Special cased for ^^DIRECTORY^^ and ^^BLANKICON^^ */
- if ((path[0] == '^') || (!ap_strcmp_match(path,
- p->apply_path))) {
- if (!*(p->apply_to)) {
- return p->data;
- }
- else if (p->type == BY_PATH || path[0] == '^') {
- if (!ap_strcmp_match(path, p->apply_to)) {
- return p->data;
- }
- }
- else if (!path_only) {
- if (!content_encoding) {
- if (p->type == BY_TYPE) {
- if (content_type
- && !ap_strcasecmp_match(content_type,
- p->apply_to)) {
- return p->data;
- }
- }
- }
- else {
- if (p->type == BY_ENCODING) {
- if (!ap_strcasecmp_match(content_encoding,
- p->apply_to)) {
- return p->data;
- }
- }
- }
- }
- }
- }
- return NULL;
-}
-
-#define find_icon(d,p,t) find_item(p,d->icon_list,t)
-#define find_alt(d,p,t) find_item(p,d->alt_list,t)
-#define find_header(d,p) find_item(p,d->hdr_list,0)
-#define find_readme(d,p) find_item(p,d->rdme_list,0)
-
-static char *find_default_item(char *bogus_name, apr_array_header_t *list)
-{
- request_rec r;
- /* Bleah. I tried to clean up find_item, and it lead to this bit
- * of ugliness. Note that the fields initialized are precisely
- * those that find_item looks at...
- */
- r.filename = bogus_name;
- r.content_type = r.content_encoding = NULL;
- return find_item(&r, list, 1);
-}
-
-#define find_default_icon(d,n) find_default_item(n, d->icon_list)
-#define find_default_alt(d,n) find_default_item(n, d->alt_list)
-
-/*
- * Look through the list of pattern/description pairs and return the first one
- * if any) that matches the filename in the request. If multiple patterns
- * match, only the first one is used; since the order in the array is the
- * same as the order in which directives were processed, earlier matching
- * directives will dominate.
- */
-
-#ifdef CASE_BLIND_FILESYSTEM
-#define MATCH_FLAGS APR_FNM_CASE_BLIND
-#else
-#define MATCH_FLAGS 0
-#endif
-
-static char *find_desc(glusterfs_dir_config_t *dcfg, const char *filename_full)
-{
- int i = 0;
- mod_glfs_ai_desc_t *list = NULL;
- const char *filename_only = NULL, *filename = NULL;
- mod_glfs_ai_desc_t *tuple = &list[i];
- int found = 0;
-
- list = (mod_glfs_ai_desc_t *) dcfg->desc_list->elts;
- /*
- * If the filename includes a path, extract just the name itself
- * for the simple matches.
- */
- if ((filename_only = ap_strrchr_c(filename_full, '/')) == NULL) {
- filename_only = filename_full;
- }
- else {
- filename_only++;
- }
- for (i = 0; i < dcfg->desc_list->nelts; ++i) {
- /*
- * Only use the full-path filename if the pattern contains '/'s.
- */
- filename = (tuple->full_path) ? filename_full : filename_only;
- /*
- * Make the comparison using the cheapest method; only do
- * wildcard checking if we must.
- */
- if (tuple->wildcards) {
- found = (apr_fnmatch(tuple->pattern, filename,
- MATCH_FLAGS) == 0);
- }
- else {
- found = (ap_strstr_c(filename, tuple->pattern) != NULL);
- }
- if (found) {
- return tuple->description;
- }
- }
- return NULL;
-}
-
-static int ignore_entry(glusterfs_dir_config_t *d, char *path)
-{
- apr_array_header_t *list = d->ign_list;
- struct mod_glfs_ai_item *items = (struct mod_glfs_ai_item *) list->elts;
- char *tt = NULL, *ap = NULL;
- int i = 0;
- struct mod_glfs_ai_item *p = &items[i];
-
- if ((tt = strrchr(path, '/')) == NULL) {
- tt = path;
- }
- else {
- tt++;
- }
-
- for (i = 0; i < list->nelts; ++i) {
- p = &items[i];
- if ((ap = strrchr(p->apply_to, '/')) == NULL) {
- ap = p->apply_to;
- }
- else {
- ap++;
- }
-
-#ifndef CASE_BLIND_FILESYSTEM
- if (!ap_strcmp_match(path, p->apply_path)
- && !ap_strcmp_match(tt, ap)) {
- return 1;
- }
-#else /* !CASE_BLIND_FILESYSTEM */
- /*
- * On some platforms, the match must be case-blind. This is really
- * a factor of the filesystem involved, but we can't detect that
- * reliably - so we have to granularise at the OS level.
- */
- if (!ap_strcasecmp_match(path, p->apply_path)
- && !ap_strcasecmp_match(tt, ap)) {
- return 1;
- }
-#endif /* !CASE_BLIND_FILESYSTEM */
- }
- return 0;
-}
-
-/*****************************************************************
- *
- * Actually generating output
- */
-
-/*
- * Elements of the emitted document:
- * Preamble
- * Emitted unless SUPPRESS_PREAMBLE is set AND ap_run_sub_req
- * succeeds for the (content_type == text/html) header file.
- * Header file
- * Emitted if found (and able).
- * H1 tag line
- * Emitted if a header file is NOT emitted.
- * Directory stuff
- * Always emitted.
- * HR
- * Emitted if FANCY_INDEXING is set.
- * Readme file
- * Emitted if found (and able).
- * ServerSig
- * Emitted if ServerSignature is not Off AND a readme file
- * is NOT emitted.
- * Postamble
- * Emitted unless SUPPRESS_PREAMBLE is set AND ap_run_sub_req
- * succeeds for the (content_type == text/html) readme file.
- */
-
-
-/*
- * emit a plain text file
- */
-static void do_emit_plain(request_rec *r, apr_file_t *f)
-{
- char buf[AP_IOBUFSIZE + 1];
- int ch = 0;
- apr_size_t i = 0, c = 0, n = 0;
- apr_status_t rv = APR_SUCCESS;
-
- ap_rputs("<pre>\n", r);
- while (!apr_file_eof(f)) {
- do {
- n = sizeof(char) * AP_IOBUFSIZE;
- rv = apr_file_read(f, buf, &n);
- } while (APR_STATUS_IS_EINTR(rv));
- if (n == 0 || rv != APR_SUCCESS) {
- /* ###: better error here? */
- break;
- }
- buf[n] = '\0';
- c = 0;
- while (c < n) {
- for (i = c; i < n; i++) {
- if (buf[i] == '<' || buf[i] == '>'
- || buf[i] == '&') {
- break;
- }
- }
- ch = buf[i];
- buf[i] = '\0';
- ap_rputs(&buf[c], r);
- if (ch == '<') {
- ap_rputs("&lt;", r);
- }
- else if (ch == '>') {
- ap_rputs("&gt;", r);
- }
- else if (ch == '&') {
- ap_rputs("&amp;", r);
- }
- c = i + 1;
- }
- }
- ap_rputs("</pre>\n", r);
-}
-
-/*
- * Handle the preamble through the H1 tag line, inclusive. Locate
- * the file with a subrequests. Process text/html documents by actually
- * running the subrequest; text/xxx documents get copied verbatim,
- * and any other content type is ignored. This means that a non-text
- * document (such as HEADER.gif) might get multiviewed as the result
- * instead of a text document, meaning nothing will be displayed, but
- * oh well.
- */
-static void emit_head(request_rec *r, char *header_fname, int suppress_amble,
- int emit_xhtml, char *title)
-{
- apr_table_t *hdrs = r->headers_in;
- apr_file_t *f = NULL;
- request_rec *rr = NULL;
- int emit_amble = 1;
- int emit_H1 = 1;
- const char *r_accept = NULL;
- const char *r_accept_enc = NULL;
-
- /*
- * If there's a header file, send a subrequest to look for it. If it's
- * found and html do the subrequest, otherwise handle it
- */
- r_accept = apr_table_get(hdrs, "Accept");
- r_accept_enc = apr_table_get(hdrs, "Accept-Encoding");
- apr_table_setn(hdrs, "Accept", "text/html, text/plain");
- apr_table_unset(hdrs, "Accept-Encoding");
-
-
- if ((header_fname != NULL) && r->args) {
- header_fname = apr_pstrcat(r->pool, header_fname, "?", r->args,
- NULL);
- }
-
- if ((header_fname != NULL)
- && (rr = ap_sub_req_lookup_uri(header_fname, r, r->output_filters))
- && (rr->status == HTTP_OK)
- && (rr->filename != NULL)
- && (rr->finfo.filetype == APR_REG)) {
- /*
- * Check for the two specific cases we allow: text/html and
- * text/anything-else. The former is allowed to be processed for
- * SSIs.
- */
- if (rr->content_type != NULL) {
- if (!strcasecmp(ap_field_noparam(r->pool,
- rr->content_type),
- "text/html")) {
- ap_filter_t *f = NULL;
-
- /* Hope everything will work... */
- emit_amble = 0;
- emit_H1 = 0;
-
- if (! suppress_amble) {
- emit_preamble(r, emit_xhtml, title);
- }
- /* This is a hack, but I can't find any better
- * way to do this. The problem is that we have
- * already created the sub-request,
- * but we just inserted the OLD_WRITE filter,
- * and the sub-request needs to pass its data
- * through the OLD_WRITE filter, or things go
- * horribly wrong (missing data, data in
- * the wrong order, etc). To fix it, if you
- * create a sub-request and then insert the
- * OLD_WRITE filter before you run the request,
- * you need to make sure that the sub-request
- * data goes through the OLD_WRITE filter. Just
- * steal this code. The long-term solution is
- * to remove the ap_r* functions.
- */
- for (f=rr->output_filters;
- f->frec != ap_subreq_core_filter_handle;
- f = f->next);
- f->next = r->output_filters;
-
- /*
- * If there's a problem running the subrequest,
- * display the preamble if we didn't do it
- * before -- the header file didn't get displayed.
- */
- if (ap_run_sub_req(rr) != OK) {
- /* It didn't work */
- emit_amble = suppress_amble;
- emit_H1 = 1;
- }
- }
- else if (!strncasecmp("text/", rr->content_type, 5)) {
- /*
- * If we can open the file, prefix it with the
- * preamble regardless; since we'll be sending
- * a <pre> block around the file's contents,
- * any HTML header it had won't end up
- * where it belongs.
- */
- if (apr_file_open(&f, rr->filename, APR_READ,
- APR_OS_DEFAULT, r->pool)
- == APR_SUCCESS) {
- emit_preamble(r, emit_xhtml, title);
- emit_amble = 0;
- do_emit_plain(r, f);
- apr_file_close(f);
- emit_H1 = 0;
- }
- }
- }
- }
-
- if (r_accept) {
- apr_table_setn(hdrs, "Accept", r_accept);
- }
- else {
- apr_table_unset(hdrs, "Accept");
- }
-
- if (r_accept_enc) {
- apr_table_setn(hdrs, "Accept-Encoding", r_accept_enc);
- }
-
- if (emit_amble) {
- emit_preamble(r, emit_xhtml, title);
- }
- if (emit_H1) {
- ap_rvputs(r, "<h1>Index of ", title, "</h1>\n", NULL);
- }
- if (rr != NULL) {
- ap_destroy_sub_req(rr);
- }
-}
-
-
-/*
- * Handle the Readme file through the postamble, inclusive. Locate
- * the file with a subrequests. Process text/html documents by actually
- * running the subrequest; text/xxx documents get copied verbatim,
- * and any other content type is ignored. This means that a non-text
- * document (such as FOOTER.gif) might get multiviewed as the result
- * instead of a text document, meaning nothing will be displayed, but
- * oh well.
- */
-static void emit_tail(request_rec *r, char *readme_fname, int suppress_amble)
-{
- apr_file_t *f = NULL;
- request_rec *rr = NULL;
- int suppress_post = 0, suppress_sig = 0;
-
- /*
- * If there's a readme file, send a subrequest to look for it. If it's
- * found and a text file, handle it -- otherwise fall through and
- * pretend there's nothing there.
- */
- if ((readme_fname != NULL)
- && (rr = ap_sub_req_lookup_uri(readme_fname, r, r->output_filters))
- && (rr->status == HTTP_OK)
- && (rr->filename != NULL)
- && rr->finfo.filetype == APR_REG) {
- /*
- * Check for the two specific cases we allow: text/html and
- * text/anything-else. The former is allowed to be processed for
- * SSIs.
- */
- if (rr->content_type != NULL) {
- if (!strcasecmp(ap_field_noparam(r->pool,
- rr->content_type),
- "text/html")) {
- ap_filter_t *f;
- for (f=rr->output_filters;
- f->frec != ap_subreq_core_filter_handle;
- f = f->next);
- f->next = r->output_filters;
-
-
- if (ap_run_sub_req(rr) == OK) {
- /* worked... */
- suppress_sig = 1;
- suppress_post = suppress_amble;
- }
- }
- else if (!strncasecmp("text/", rr->content_type, 5)) {
- /*
- * If we can open the file, suppress the signature.
- */
- if (apr_file_open(&f, rr->filename, APR_READ,
- APR_OS_DEFAULT, r->pool)
- == APR_SUCCESS) {
- do_emit_plain(r, f);
- apr_file_close(f);
- suppress_sig = 1;
- }
- }
- }
- }
-
- if (!suppress_sig) {
- ap_rputs(ap_psignature("", r), r);
- }
- if (!suppress_post) {
- ap_rputs("</body></html>\n", r);
- }
- if (rr != NULL) {
- ap_destroy_sub_req(rr);
- }
-}
-
-
-static char *find_title(request_rec *r)
-{
- char titlebuf[MAX_STRING_LEN], *find = "<title>";
- apr_file_t *thefile = NULL;
- int x = 0, y = 0, p = 0;
- apr_size_t n;
-
- if (r->status != HTTP_OK) {
- return NULL;
- }
- if ((r->content_type != NULL)
- && (!strcasecmp(ap_field_noparam(r->pool, r->content_type),
- "text/html")
- || !strcmp(r->content_type, INCLUDES_MAGIC_TYPE))
- && !r->content_encoding) {
- if (apr_file_open(&thefile, r->filename, APR_READ,
- APR_OS_DEFAULT, r->pool) != APR_SUCCESS) {
- return NULL;
- }
- n = sizeof(char) * (MAX_STRING_LEN - 1);
- apr_file_read(thefile, titlebuf, &n);
- if (n <= 0) {
- apr_file_close(thefile);
- return NULL;
- }
- titlebuf[n] = '\0';
- for (x = 0, p = 0; titlebuf[x]; x++) {
- if (apr_tolower(titlebuf[x]) == find[p]) {
- if (!find[++p]) {
- if ((p = ap_ind(&titlebuf[++x], '<'))
- != -1) {
- titlebuf[x + p] = '\0';
- }
- /* Scan for line breaks for Tanmoy's
- secretary
- */
- for (y = x; titlebuf[y]; y++) {
- if ((titlebuf[y] == CR)
- || (titlebuf[y] == LF)) {
- if (y == x) {
- x++;
- }
- else {
- titlebuf[y] = ' ';
- }
- }
- }
- apr_file_close(thefile);
- return apr_pstrdup(r->pool,
- &titlebuf[x]);
- }
- }
- else {
- p = 0;
- }
- }
- apr_file_close(thefile);
- }
- return NULL;
-}
-
-static struct ent *make_parent_entry(apr_int32_t autoindex_opts,
- glusterfs_dir_config_t *d,
- request_rec *r, char keyid,
- char direction)
-{
- struct ent *p = NULL;
- char *testpath = NULL;
- /*
- * p->name is now the true parent URI.
- * testpath is a crafted lie, so that the syntax '/some/..'
- * (or simply '..')be used to describe 'up' from '/some/'
- * when processeing IndexIgnore, and Icon|Alt|Desc configs.
- */
-
- p = (struct ent *) apr_pcalloc(r->pool, sizeof(struct ent));
- /* The output has always been to the parent. Don't make ourself
- * our own parent (worthless cyclical reference).
- */
- if (!(p->name = ap_make_full_path(r->pool, r->uri, "../"))) {
- return (NULL);
- }
- ap_getparents(p->name);
- if (!*p->name) {
- return (NULL);
- }
-
- /* IndexIgnore has always compared "/thispath/.." */
- testpath = ap_make_full_path(r->pool, r->filename, "..");
- if (ignore_entry(d, testpath)) {
- return (NULL);
- }
-
- p->size = -1;
- p->lm = -1;
- p->key = apr_toupper(keyid);
- p->ascending = (apr_toupper(direction) == D_ASCENDING);
- p->version_sort = autoindex_opts & VERSION_SORT;
- if (autoindex_opts & FANCY_INDEXING) {
- if (!(p->icon = find_default_icon(d, testpath))) {
- p->icon = find_default_icon(d, "^^DIRECTORY^^");
- }
- if (!(p->alt = find_default_alt(d, testpath))) {
- if (!(p->alt = find_default_alt(d, "^^DIRECTORY^^"))) {
- p->alt = "DIR";
- }
- }
- p->desc = find_desc(d, testpath);
- }
- return p;
-}
-
-static struct ent *make_autoindex_entry(const apr_finfo_t *dirent,
- int autoindex_opts,
- glusterfs_dir_config_t *d,
- request_rec *r, char keyid,
- char direction,
- const char *pattern)
-{
- request_rec *rr = NULL;
- struct ent *p = NULL;
- int show_forbidden = 0;
-
- /* Dot is ignored, Parent is handled by make_parent_entry() */
- if ((dirent->name[0] == '.') && (!dirent->name[1]
- || ((dirent->name[1] == '.')
- && !dirent->name[2])))
- return (NULL);
-
- /*
- * On some platforms, the match must be case-blind. This is really
- * a factor of the filesystem involved, but we can't detect that
- * reliably - so we have to granularise at the OS level.
- */
- if (pattern && (apr_fnmatch(pattern, dirent->name,
- APR_FNM_NOESCAPE | APR_FNM_PERIOD
-#ifdef CASE_BLIND_FILESYSTEM
- | APR_FNM_CASE_BLIND
-#endif
- )
- != APR_SUCCESS)) {
- return (NULL);
- }
-
- if (ignore_entry(d, ap_make_full_path(r->pool,
- r->filename, dirent->name))) {
- return (NULL);
- }
-
- if (!(rr = ap_sub_req_lookup_dirent(dirent, r, AP_SUBREQ_NO_ARGS,
- NULL))) {
- return (NULL);
- }
-
- if((autoindex_opts & SHOW_FORBIDDEN)
- && (rr->status == HTTP_UNAUTHORIZED
- || rr->status == HTTP_FORBIDDEN)) {
- show_forbidden = 1;
- }
-
- if ((rr->finfo.filetype != APR_DIR && rr->finfo.filetype != APR_REG)
- || !(rr->status == OK || ap_is_HTTP_SUCCESS(rr->status)
- || ap_is_HTTP_REDIRECT(rr->status)
- || show_forbidden == 1)) {
- ap_destroy_sub_req(rr);
- return (NULL);
- }
-
- p = (struct ent *) apr_pcalloc(r->pool, sizeof(struct ent));
- if (dirent->filetype == APR_DIR) {
- p->name = apr_pstrcat(r->pool, dirent->name, "/", NULL);
- }
- else {
- p->name = apr_pstrdup(r->pool, dirent->name);
- }
- p->size = -1;
- p->icon = NULL;
- p->alt = NULL;
- p->desc = NULL;
- p->lm = -1;
- p->isdir = 0;
- p->key = apr_toupper(keyid);
- p->ascending = (apr_toupper(direction) == D_ASCENDING);
- p->version_sort = !!(autoindex_opts & VERSION_SORT);
- p->ignore_case = !!(autoindex_opts & IGNORE_CASE);
-
- if (autoindex_opts & (FANCY_INDEXING | TABLE_INDEXING)) {
- p->lm = rr->finfo.mtime;
- if (dirent->filetype == APR_DIR) {
- if (autoindex_opts & FOLDERS_FIRST) {
- p->isdir = 1;
- }
- rr->filename = ap_make_dirstr_parent (rr->pool,
- rr->filename);
-
- /* omit the trailing slash (1.3 compat) */
- rr->filename[strlen(rr->filename) - 1] = '\0';
-
- if (!(p->icon = find_icon(d, rr, 1))) {
- p->icon = find_default_icon(d, "^^DIRECTORY^^");
- }
- if (!(p->alt = find_alt(d, rr, 1))) {
- if (!(p->alt = find_default_alt(d,
- "^^DIRECTORY^^"))) {
- p->alt = "DIR";
- }
- }
- }
- else {
- p->icon = find_icon(d, rr, 0);
- p->alt = find_alt(d, rr, 0);
- p->size = rr->finfo.size;
- }
-
- p->desc = find_desc(d, rr->filename);
-
- if ((!p->desc) && (autoindex_opts & SCAN_HTML_TITLES)) {
- p->desc = apr_pstrdup(r->pool, find_title(rr));
- }
- }
- ap_destroy_sub_req(rr);
- /*
- * We don't need to take any special action for the file size key.
- * If we did, it would go here.
- */
- if (keyid == K_LAST_MOD) {
- if (p->lm < 0) {
- p->lm = 0;
- }
- }
- return (p);
-}
-
-static char *terminate_description(glusterfs_dir_config_t *d, char *desc,
- apr_int32_t autoindex_opts, int desc_width)
-{
- int maxsize = desc_width;
- register int x = 0;
-
- /*
- * If there's no DescriptionWidth in effect, default to the old
- * behaviour of adjusting the description size depending upon
- * what else is being displayed. Otherwise, stick with the
- * setting.
- */
- if (d->desc_adjust == K_UNSET) {
- if (autoindex_opts & SUPPRESS_ICON) {
- maxsize += 6;
- }
- if (autoindex_opts & SUPPRESS_LAST_MOD) {
- maxsize += 19;
- }
- if (autoindex_opts & SUPPRESS_SIZE) {
- maxsize += 7;
- }
- }
- for (x = 0; desc[x] && ((maxsize > 0) || (desc[x] == '<')); x++) {
- if (desc[x] == '<') {
- while (desc[x] != '>') {
- if (!desc[x]) {
- maxsize = 0;
- break;
- }
- ++x;
- }
- }
- else if (desc[x] == '&') {
- /* entities like &auml; count as one character */
- --maxsize;
- for ( ; desc[x] != ';'; ++x) {
- if (desc[x] == '\0') {
- maxsize = 0;
- break;
- }
- }
- }
- else {
- --maxsize;
- }
- }
- if (!maxsize && desc[x] != '\0') {
- desc[x - 1] = '>'; /* Grump. */
- desc[x] = '\0'; /* Double Grump! */
- }
- return desc;
-}
-
-/*
- * Emit the anchor for the specified field. If a field is the key for the
- * current request, the link changes its meaning to reverse the order when
- * selected again. Non-active fields always start in ascending order.
- */
-static void emit_link(request_rec *r, const char *anchor, char column,
- char curkey, char curdirection,
- const char *colargs, int nosort)
-{
- char qvalue[9];
-
- if (!nosort) {
-
- qvalue[0] = '?';
- qvalue[1] = 'C';
- qvalue[2] = '=';
- qvalue[3] = column;
- qvalue[4] = ';';
- qvalue[5] = 'O';
- qvalue[6] = '=';
- /* reverse? */
- qvalue[7] = ((curkey == column) && (curdirection == D_ASCENDING))
- ? D_DESCENDING : D_ASCENDING;
- qvalue[8] = '\0';
- ap_rvputs(r, "<a href=\"", qvalue, colargs ? colargs : "",
- "\">", anchor, "</a>", NULL);
- }
- else {
- ap_rputs(anchor, r);
- }
-}
-
-static void output_directories(struct ent **ar, int n,
- glusterfs_dir_config_t *d, request_rec *r,
- apr_int32_t autoindex_opts, char keyid,
- char direction, const char *colargs)
-{
- int x = 0;
- apr_size_t rv = APR_SUCCESS;
- char *name = NULL, *tp = NULL;
- int static_columns = 0;
- apr_pool_t *scratch = NULL;
- int name_width = 0, desc_width = 0, cols = 0;
- char *name_scratch = NULL, *pad_scratch = NULL, *breakrow = "";
- char *anchor = NULL, *t = NULL, *t2 = NULL;
- int nwidth = 0;
- char time_str[MAX_STRING_LEN];
- apr_time_exp_t ts = {0, };
- char buf[5];
-
- name = r->uri;
- static_columns = !!(autoindex_opts & SUPPRESS_COLSORT);
- apr_pool_create(&scratch, r->pool);
- if (name[0] == '\0') {
- name = "/";
- }
-
- name_width = d->name_width;
- desc_width = d->desc_width;
-
- if ((autoindex_opts & (FANCY_INDEXING | TABLE_INDEXING))
- == FANCY_INDEXING) {
- if (d->name_adjust == K_ADJUST) {
- for (x = 0; x < n; x++) {
- int t = 0;
- t = strlen(ar[x]->name);
- if (t > name_width) {
- name_width = t;
- }
- }
- }
-
- if (d->desc_adjust == K_ADJUST) {
- for (x = 0; x < n; x++) {
- if (ar[x]->desc != NULL) {
- int t = 0;
- t = strlen(ar[x]->desc);
- if (t > desc_width) {
- desc_width = t;
- }
- }
- }
- }
- }
- name_scratch = apr_palloc(r->pool, name_width + 1);
- pad_scratch = apr_palloc(r->pool, name_width + 1);
- memset(pad_scratch, ' ', name_width);
- pad_scratch[name_width] = '\0';
-
- if (autoindex_opts & TABLE_INDEXING) {
- cols = 1;
- ap_rputs("<table><tr>", r);
- if (!(autoindex_opts & SUPPRESS_ICON)) {
- ap_rputs("<th>", r);
- if ((tp = find_default_icon(d, "^^BLANKICON^^"))) {
- ap_rvputs(r, "<img src=\"",
- ap_escape_html(scratch, tp),
- "\" alt=\"[ICO]\"", NULL);
- if (d->icon_width) {
- ap_rprintf(r, " width=\"%d\"",
- d->icon_width);
- }
- if (d->icon_height) {
- ap_rprintf(r, " height=\"%d\"",
- d->icon_height);
- }
-
- if (autoindex_opts & EMIT_XHTML) {
- ap_rputs(" /", r);
- }
- ap_rputs("></th>", r);
- }
- else {
- ap_rputs("&nbsp;</th>", r);
- }
-
- ++cols;
- }
- ap_rputs("<th>", r);
- emit_link(r, "Name", K_NAME, keyid, direction,
- colargs, static_columns);
- if (!(autoindex_opts & SUPPRESS_LAST_MOD)) {
- ap_rputs("</th><th>", r);
- emit_link(r, "Last modified", K_LAST_MOD, keyid,
- direction, colargs, static_columns);
- ++cols;
- }
- if (!(autoindex_opts & SUPPRESS_SIZE)) {
- ap_rputs("</th><th>", r);
- emit_link(r, "Size", K_SIZE, keyid, direction,
- colargs, static_columns);
- ++cols;
- }
- if (!(autoindex_opts & SUPPRESS_DESC)) {
- ap_rputs("</th><th>", r);
- emit_link(r, "Description", K_DESC, keyid, direction,
- colargs, static_columns);
- ++cols;
- }
- if (!(autoindex_opts & SUPPRESS_RULES)) {
- breakrow = apr_psprintf(r->pool,
- "<tr><th colspan=\"%d\">"
- "<hr%s></th></tr>\n", cols,
- (autoindex_opts & EMIT_XHTML)
- ? " /" : "");
- }
- ap_rvputs(r, "</th></tr>", breakrow, NULL);
- }
- else if (autoindex_opts & FANCY_INDEXING) {
- ap_rputs("<pre>", r);
- if (!(autoindex_opts & SUPPRESS_ICON)) {
- if ((tp = find_default_icon(d, "^^BLANKICON^^"))) {
- ap_rvputs(r, "<img src=\"",
- ap_escape_html(scratch, tp),
- "\" alt=\"Icon \"", NULL);
- if (d->icon_width) {
- ap_rprintf(r, " width=\"%d\"",
- d->icon_width);
- }
- if (d->icon_height) {
- ap_rprintf(r, " height=\"%d\"",
- d->icon_height);
- }
-
- if (autoindex_opts & EMIT_XHTML) {
- ap_rputs(" /", r);
- }
- ap_rputs("> ", r);
- }
- else {
- ap_rputs(" ", r);
- }
- }
- emit_link(r, "Name", K_NAME, keyid, direction,
- colargs, static_columns);
- ap_rputs(pad_scratch + 4, r);
- /*
- * Emit the guaranteed-at-least-one-space-between-columns byte.
- */
- ap_rputs(" ", r);
- if (!(autoindex_opts & SUPPRESS_LAST_MOD)) {
- emit_link(r, "Last modified", K_LAST_MOD, keyid,
- direction, colargs, static_columns);
- ap_rputs(" ", r);
- }
- if (!(autoindex_opts & SUPPRESS_SIZE)) {
- emit_link(r, "Size", K_SIZE, keyid, direction,
- colargs, static_columns);
- ap_rputs(" ", r);
- }
- if (!(autoindex_opts & SUPPRESS_DESC)) {
- emit_link(r, "Description", K_DESC, keyid, direction,
- colargs, static_columns);
- }
- if (!(autoindex_opts & SUPPRESS_RULES)) {
- ap_rputs("<hr", r);
- if (autoindex_opts & EMIT_XHTML) {
- ap_rputs(" /", r);
- }
- ap_rputs(">", r);
- }
- else {
- ap_rputc('\n', r);
- }
- }
- else {
- ap_rputs("<ul>", r);
- }
-
- for (x = 0; x < n; x++) {
- apr_pool_clear(scratch);
-
- t = ar[x]->name;
- anchor = ap_escape_html(scratch, ap_os_escape_path(scratch, t,
- 0));
-
- if (!x && t[0] == '/') {
- t2 = "Parent Directory";
- }
- else {
- t2 = t;
- }
-
- if (autoindex_opts & TABLE_INDEXING) {
- ap_rputs("<tr>", r);
- if (!(autoindex_opts & SUPPRESS_ICON)) {
- ap_rputs("<td valign=\"top\">", r);
- if (autoindex_opts & ICONS_ARE_LINKS) {
- ap_rvputs(r, "<a href=\"", anchor,
- "\">", NULL);
- }
- if ((ar[x]->icon) || d->default_icon) {
- ap_rvputs(r, "<img src=\"",
- ap_escape_html(scratch,
- ar[x]->icon ?
- ar[x]->icon
- : d->default_icon),
- "\" alt=\"[",
- (ar[x]->alt ?
- ar[x]->alt : " "),
- "]\"", NULL);
- if (d->icon_width) {
- ap_rprintf(r, " width=\"%d\"",
- d->icon_width);
- }
- if (d->icon_height) {
- ap_rprintf(r, " height=\"%d\"",
- d->icon_height);
- }
-
- if (autoindex_opts & EMIT_XHTML) {
- ap_rputs(" /", r);
- }
- ap_rputs(">", r);
- }
- else {
- ap_rputs("&nbsp;", r);
- }
- if (autoindex_opts & ICONS_ARE_LINKS) {
- ap_rputs("</a></td>", r);
- }
- else {
- ap_rputs("</td>", r);
- }
- }
- if (d->name_adjust == K_ADJUST) {
- ap_rvputs(r, "<td><a href=\"", anchor, "\">",
- ap_escape_html(scratch, t2), "</a>",
- NULL);
- }
- else {
- nwidth = strlen(t2);
- if (nwidth > name_width) {
- memcpy(name_scratch, t2, name_width - 3);
- name_scratch[name_width - 3] = '.';
- name_scratch[name_width - 2] = '.';
- name_scratch[name_width - 1] = '>';
- name_scratch[name_width] = 0;
- t2 = name_scratch;
- nwidth = name_width;
- }
- ap_rvputs(r, "<td><a href=\"", anchor, "\">",
- ap_escape_html(scratch, t2),
- "</a>", pad_scratch + nwidth, NULL);
- }
- if (!(autoindex_opts & SUPPRESS_LAST_MOD)) {
- if (ar[x]->lm != -1) {
- char time_str[MAX_STRING_LEN];
- apr_time_exp_t ts;
- apr_time_exp_lt(&ts, ar[x]->lm);
- apr_strftime(time_str, &rv,
- MAX_STRING_LEN,
- "</td><td align=\"right\""
- ">%d-%b-%Y %H:%M ",
- &ts);
- ap_rputs(time_str, r);
- }
- else {
- ap_rputs("</td><td>&nbsp;", r);
- }
- }
- if (!(autoindex_opts & SUPPRESS_SIZE)) {
- ap_rvputs(r, "</td><td align=\"right\">",
- apr_strfsize(ar[x]->size, buf), NULL);
- }
- if (!(autoindex_opts & SUPPRESS_DESC)) {
- if (ar[x]->desc) {
- if (d->desc_adjust == K_ADJUST) {
- ap_rvputs(r, "</td><td>",
- ar[x]->desc, NULL);
- }
- else {
- ap_rvputs(r, "</td><td>",
- terminate_description(d, ar[x]->desc,
- autoindex_opts,
- desc_width), NULL);
- }
- }
- }
- else {
- ap_rputs("</td><td>&nbsp;", r);
- }
- ap_rputs("</td></tr>\n", r);
- }
- else if (autoindex_opts & FANCY_INDEXING) {
- if (!(autoindex_opts & SUPPRESS_ICON)) {
- if (autoindex_opts & ICONS_ARE_LINKS) {
- ap_rvputs(r, "<a href=\"", anchor,
- "\">", NULL);
- }
- if ((ar[x]->icon) || d->default_icon) {
- ap_rvputs(r, "<img src=\"",
- ap_escape_html(scratch,
- ar[x]->icon ?
- ar[x]->icon
- : d->default_icon),
- "\" alt=\"[",
- (ar[x]->alt ? ar[x]->alt
- : " "),
- "]\"", NULL);
- if (d->icon_width) {
- ap_rprintf(r, " width=\"%d\"",
- d->icon_width);
- }
- if (d->icon_height) {
- ap_rprintf(r, " height=\"%d\"",
- d->icon_height);
- }
-
- if (autoindex_opts & EMIT_XHTML) {
- ap_rputs(" /", r);
- }
- ap_rputs(">", r);
- }
- else {
- ap_rputs(" ", r);
- }
- if (autoindex_opts & ICONS_ARE_LINKS) {
- ap_rputs("</a> ", r);
- }
- else {
- ap_rputc(' ', r);
- }
- }
- nwidth = strlen(t2);
- if (nwidth > name_width) {
- memcpy(name_scratch, t2, name_width - 3);
- name_scratch[name_width - 3] = '.';
- name_scratch[name_width - 2] = '.';
- name_scratch[name_width - 1] = '>';
- name_scratch[name_width] = 0;
- t2 = name_scratch;
- nwidth = name_width;
- }
- ap_rvputs(r, "<a href=\"", anchor, "\">",
- ap_escape_html(scratch, t2),
- "</a>", pad_scratch + nwidth, NULL);
- /*
- * The blank before the storm.. er, before the next
- * field.
- */
- ap_rputs(" ", r);
- if (!(autoindex_opts & SUPPRESS_LAST_MOD)) {
- if (ar[x]->lm != -1) {
- apr_time_exp_lt(&ts, ar[x]->lm);
- apr_strftime(time_str, &rv,
- MAX_STRING_LEN,
- "%d-%b-%Y %H:%M ", &ts);
- ap_rputs(time_str, r);
- }
- else {
- /* Length="22-Feb-1998 23:42 "
- * (see 4 lines above)
- */
- ap_rputs(" ", r);
- }
- }
- if (!(autoindex_opts & SUPPRESS_SIZE)) {
- ap_rputs(apr_strfsize(ar[x]->size, buf), r);
- ap_rputs(" ", r);
- }
- if (!(autoindex_opts & SUPPRESS_DESC)) {
- if (ar[x]->desc) {
- ap_rputs(terminate_description(d,
- ar[x]->desc,
- autoindex_opts,
- desc_width), r);
- }
- }
- ap_rputc('\n', r);
- }
- else {
- ap_rvputs(r, "<li><a href=\"", anchor, "\"> ",
- ap_escape_html(scratch, t2),
- "</a></li>\n", NULL);
- }
- }
- if (autoindex_opts & TABLE_INDEXING) {
- ap_rvputs(r, breakrow, "</table>\n", NULL);
- }
- else if (autoindex_opts & FANCY_INDEXING) {
- if (!(autoindex_opts & SUPPRESS_RULES)) {
- ap_rputs("<hr", r);
- if (autoindex_opts & EMIT_XHTML) {
- ap_rputs(" /", r);
- }
- ap_rputs("></pre>\n", r);
- }
- else {
- ap_rputs("</pre>\n", r);
- }
- }
- else {
- ap_rputs("</ul>\n", r);
- }
-}
-
-/*
- * Compare two file entries according to the sort criteria. The return
- * is essentially a signum function value.
- */
-
-static int dsortf(struct ent **e1, struct ent **e2)
-{
- struct ent *c1 = NULL, *c2 = NULL;
- int result = 0;
-
- /*
- * First, see if either of the entries is for the parent directory.
- * If so, that *always* sorts lower than anything else.
- */
- if ((*e1)->name[0] == '/') {
- return -1;
- }
- if ((*e2)->name[0] == '/') {
- return 1;
- }
- /*
- * Now see if one's a directory and one isn't, if we're set
- * isdir for FOLDERS_FIRST.
- */
- if ((*e1)->isdir != (*e2)->isdir) {
- return (*e1)->isdir ? -1 : 1;
- }
- /*
- * All of our comparisons will be of the c1 entry against the c2 one,
- * so assign them appropriately to take care of the ordering.
- */
- if ((*e1)->ascending) {
- c1 = *e1;
- c2 = *e2;
- }
- else {
- c1 = *e2;
- c2 = *e1;
- }
-
- switch (c1->key) {
- case K_LAST_MOD:
- if (c1->lm > c2->lm) {
- return 1;
- }
- else if (c1->lm < c2->lm) {
- return -1;
- }
- break;
- case K_SIZE:
- if (c1->size > c2->size) {
- return 1;
- }
- else if (c1->size < c2->size) {
- return -1;
- }
- break;
- case K_DESC:
- if (c1->version_sort) {
- result = apr_strnatcmp(c1->desc ? c1->desc : "",
- c2->desc ? c2->desc : "");
- }
- else {
- result = strcmp(c1->desc ? c1->desc : "",
- c2->desc ? c2->desc : "");
- }
- if (result) {
- return result;
- }
- break;
- }
-
- /* names may identical when treated case-insensitively,
- * so always fall back on strcmp() flavors to put entries
- * in deterministic order. This means that 'ABC' and 'abc'
- * will always appear in the same order, rather than
- * variably between 'ABC abc' and 'abc ABC' order.
- */
-
- if (c1->version_sort) {
- if (c1->ignore_case) {
- result = apr_strnatcasecmp (c1->name, c2->name);
- }
- if (!result) {
- result = apr_strnatcmp(c1->name, c2->name);
- }
- }
-
- /* The names may be identical in respects other other than
- * filename case when strnatcmp is used above, so fall back
- * to strcmp on conflicts so that fn1.01.zzz and fn1.1.zzz
- * are also sorted in a deterministic order.
- */
-
- if (!result && c1->ignore_case) {
- result = strcasecmp (c1->name, c2->name);
- }
-
- if (!result) {
- result = strcmp (c1->name, c2->name);
- }
-
- return result;
-}
-
-
-static int
-mod_glfs_index_directory (request_rec *r,
- glusterfs_dir_config_t *autoindex_conf)
-{
- char *title_name = NULL, *title_endp = NULL;
- char *pstring = NULL, *colargs = NULL;
- char *path = NULL, *fname = NULL, *charset = NULL;
- char *fullpath = NULL, *name = NULL, *ctype = NULL;
- apr_finfo_t dirent;
- glusterfs_file_t fd = NULL;
- apr_status_t status = APR_SUCCESS;
- int num_ent = 0, x;
- struct ent *head = NULL, *p = NULL;
- struct ent **ar = NULL;
- const char *qstring = NULL;
- apr_int32_t autoindex_opts = autoindex_conf->opts;
- char keyid, direction;
- apr_size_t dirpathlen;
- glusterfs_dir_config_t *dir_config = NULL;
- int ret = -1;
- struct dirent *entry = NULL;
- struct stat st = {0, };
-
- name = r->filename;
- title_name = ap_escape_html(r->pool, r->uri);
- ctype = "text/html";
- dir_config = mod_glfs_dconfig (r);
- if (dir_config == NULL) {
- return HTTP_INTERNAL_SERVER_ERROR;
- }
-
- path = r->uri;
- fd = glusterfs_open (path, O_RDONLY, 0);
- if (fd == 0) {
- ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
- "file permissions deny server access: %s",
- r->filename);
- return HTTP_FORBIDDEN;
- }
-
- if (autoindex_conf->ctype) {
- ctype = autoindex_conf->ctype;
- }
- if (autoindex_conf->charset) {
- charset = autoindex_conf->charset;
- }
- else {
-#if APR_HAS_UNICODE_FS
- charset = "UTF-8";
-#else
- charset = "ISO-8859-1";
-#endif
- }
- if (*charset) {
- ap_set_content_type(r, apr_pstrcat(r->pool, ctype, ";charset=",
- charset, NULL));
- }
- else {
- ap_set_content_type(r, ctype);
- }
-
- if (autoindex_opts & TRACK_MODIFIED) {
- ap_update_mtime(r, r->finfo.mtime);
- ap_set_last_modified(r);
- ap_set_etag(r);
- }
- if (r->header_only) {
- glusterfs_close (fd);
- return 0;
- }
-
- /*
- * If there is no specific ordering defined for this directory,
- * default to ascending by filename.
- */
- keyid = autoindex_conf->default_keyid
- ? autoindex_conf->default_keyid : K_NAME;
- direction = autoindex_conf->default_direction
- ? autoindex_conf->default_direction : D_ASCENDING;
-
- /*
- * Figure out what sort of indexing (if any) we're supposed to use.
- *
- * If no QUERY_STRING was specified or client query strings have been
- * explicitly disabled.
- * If we are ignoring the client, suppress column sorting as well.
- */
- if (autoindex_opts & IGNORE_CLIENT) {
- qstring = NULL;
- autoindex_opts |= SUPPRESS_COLSORT;
- colargs = "";
- }
- else {
- char fval[5], vval[5], *ppre = "", *epattern = "";
- fval[0] = '\0'; vval[0] = '\0';
- qstring = r->args;
-
- while (qstring && *qstring) {
-
- /* C= First Sort key Column (N, M, S, D) */
- if ( qstring[0] == 'C' && qstring[1] == '='
- && qstring[2] && strchr(K_VALID, qstring[2])
- && ( qstring[3] == '&' || qstring[3] == ';'
- || !qstring[3])) {
- keyid = qstring[2];
- qstring += qstring[3] ? 4 : 3;
- }
-
- /* O= Sort order (A, D) */
- else if ( qstring[0] == 'O' && qstring[1] == '='
- && ( (qstring[2] == D_ASCENDING)
- || (qstring[2] == D_DESCENDING))
- && ( qstring[3] == '&'
- || qstring[3] == ';'
- || !qstring[3])) {
- direction = qstring[2];
- qstring += qstring[3] ? 4 : 3;
- }
-
- /* F= Output Format (0 plain, 1 fancy (pre), 2 table) */
- else if ( qstring[0] == 'F' && qstring[1] == '='
- && qstring[2] && strchr("012", qstring[2])
- && ( qstring[3] == '&' || qstring[3] == ';'
- || !qstring[3])) {
- if (qstring[2] == '0') {
- autoindex_opts &= ~(FANCY_INDEXING
- | TABLE_INDEXING);
- }
- else if (qstring[2] == '1') {
- autoindex_opts = (autoindex_opts
- | FANCY_INDEXING)
- & ~TABLE_INDEXING;
- }
- else if (qstring[2] == '2') {
- autoindex_opts |= FANCY_INDEXING
- | TABLE_INDEXING;
- }
- strcpy(fval, ";F= ");
- fval[3] = qstring[2];
- qstring += qstring[3] ? 4 : 3;
- }
-
- /* V= Version sort (0, 1) */
- else if ( qstring[0] == 'V' && qstring[1] == '='
- && (qstring[2] == '0' || qstring[2] == '1')
- && ( qstring[3] == '&' || qstring[3] == ';'
- || !qstring[3])) {
- if (qstring[2] == '0') {
- autoindex_opts &= ~VERSION_SORT;
- }
- else if (qstring[2] == '1') {
- autoindex_opts |= VERSION_SORT;
- }
- strcpy(vval, ";V= ");
- vval[3] = qstring[2];
- qstring += qstring[3] ? 4 : 3;
- }
-
- /* P= wildcard pattern (*.foo) */
- else if (qstring[0] == 'P' && qstring[1] == '=') {
- const char *eos = qstring += 2; /* for efficiency */
-
- while (*eos && *eos != '&' && *eos != ';') {
- ++eos;
- }
-
- if (eos == qstring) {
- pstring = NULL;
- }
- else {
- pstring = apr_pstrndup(r->pool, qstring,
- eos - qstring);
- if (ap_unescape_url(pstring) != OK) {
- /* ignore the pattern, if it's bad. */
- pstring = NULL;
- }
- else {
- ppre = ";P=";
- /* be correct */
- epattern = ap_escape_uri(r->pool,
- pstring);
- }
- }
-
- if (*eos && *++eos) {
- qstring = eos;
- }
- else {
- qstring = NULL;
- }
- }
-
- /* Syntax error? Ignore the remainder! */
- else {
- qstring = NULL;
- }
- }
- colargs = apr_pstrcat(r->pool, fval, vval, ppre, epattern,
- NULL);
- }
-
- /* Spew HTML preamble */
- title_endp = title_name + strlen(title_name) - 1;
-
- while (title_endp > title_name && *title_endp == '/') {
- *title_endp-- = '\0';
- }
-
- emit_head(r, find_header(autoindex_conf, r),
- autoindex_opts & SUPPRESS_PREAMBLE,
- autoindex_opts & EMIT_XHTML, title_name);
-
- /*
- * Since we don't know how many dir. entries there are, put them into a
- * linked list and then arrayificate them so qsort can use them.
- */
- head = NULL;
- p = make_parent_entry(autoindex_opts, autoindex_conf, r, keyid,
- direction);
- if (p != NULL) {
- p->next = head;
- head = p;
- num_ent++;
- }
- fullpath = apr_palloc(r->pool, APR_PATH_MAX);
- dirpathlen = strlen(name);
- memcpy(fullpath, name, dirpathlen);
-
- do {
- entry = glusterfs_readdir (fd);
- if (entry == NULL) {
- break;
- }
-
- fname = apr_pstrcat (r->pool, path, entry->d_name, NULL);
-
- ret = glusterfs_stat (fname, &st);
- if (ret != 0) {
- break;
- }
-
- dirent.fname = fname;
- dirent.name = apr_pstrdup (r->pool, entry->d_name);
- fill_out_finfo (&dirent, &st,
- APR_FINFO_MIN | APR_FINFO_IDENT
- | APR_FINFO_NLINK | APR_FINFO_OWNER
- | APR_FINFO_PROT);
-
- p = make_autoindex_entry(&dirent, autoindex_opts,
- autoindex_conf, r,
- keyid, direction, pstring);
- if (p != NULL) {
- p->next = head;
- head = p;
- num_ent++;
- }
- } while (1);
-
- if (num_ent > 0) {
- ar = (struct ent **) apr_palloc(r->pool,
- num_ent * sizeof(struct ent *));
- p = head;
- x = 0;
- while (p) {
- ar[x++] = p;
- p = p->next;
- }
-
- qsort((void *) ar, num_ent, sizeof(struct ent *),
- (int (*)(const void *, const void *)) dsortf);
- }
- output_directories(ar, num_ent, autoindex_conf, r, autoindex_opts,
- keyid, direction, colargs);
- glusterfs_close (fd);
-
- emit_tail(r, find_readme(autoindex_conf, r),
- autoindex_opts & SUPPRESS_PREAMBLE);
-
- return 0;
-}
-
-
-static int
-handle_autoindex(request_rec *r)
-{
- glusterfs_dir_config_t *dir_config = NULL;
- int allow_opts;
-
- allow_opts = ap_allow_options(r);
-
- r->allowed |= (AP_METHOD_BIT << M_GET);
- if (r->method_number != M_GET) {
- return DECLINED;
- }
-
- dir_config = mod_glfs_dconfig (r);
-
- /* OK, nothing easy. Trot out the heavy artillery... */
-
- if (allow_opts & OPT_INDEXES) {
- int errstatus;
-
- if ((errstatus = ap_discard_request_body(r)) != OK) {
- return errstatus;
- }
-
- /* KLUDGE --- make the sub_req lookups happen in the right
- * directory. Fixing this in the sub_req_lookup functions
- * themselves is difficult, and would probably break
- * virtual includes...
- */
-
- if (r->filename[strlen(r->filename) - 1] != '/') {
- r->filename = apr_pstrcat(r->pool, r->filename, "/",
- NULL);
- }
- return mod_glfs_index_directory(r, dir_config);
- } else {
- ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
- "Directory index forbidden by "
- "Options directive: %s", r->filename);
- return HTTP_FORBIDDEN;
- }
-}
-
-
-static int
-mod_glfs_handler (request_rec *r)
-{
- conn_rec *c = r->connection;
- apr_bucket_brigade *bb;
- apr_bucket *e;
- core_dir_config *d;
- int errstatus;
- glusterfs_file_t fd = NULL;
- apr_status_t status;
- glusterfs_dir_config_t *dir_config = NULL;
- char *path = NULL;
- int num_ranges = 0;
- apr_size_t size = 0;
- apr_off_t range_start = 0, range_end = 0;
- char *current = NULL;
- apr_status_t rv = 0;
- core_request_config *req_cfg = NULL;
-
- /* XXX if/when somebody writes a content-md5 filter we either need to
- * remove this support or coordinate when to use the filter vs.
- * when to use this code
- * The current choice of when to compute the md5 here matches the 1.3
- * support fairly closely (unlike 1.3, we don't handle computing md5
- * when the charset is translated).
- */
-
- int bld_content_md5;
- if (!r->handler || (r->handler
- && strcmp (r->handler, GLUSTERFS_HANDLER)))
- return DECLINED;
-
- if (r->uri[0] == '\0') {
- return DECLINED;
- }
-
- if (r->finfo.filetype == APR_DIR) {
- return handle_autoindex (r);
- }
-
- dir_config = mod_glfs_dconfig (r);
-
- ap_allow_standard_methods(r, MERGE_ALLOW, M_GET, -1);
-
- /* We understood the (non-GET) method, but it might not be legal for
- this particular resource. Check to see if the 'deliver_script'
- flag is set. If so, then we go ahead and deliver the file since
- it isn't really content (only GET normally returns content).
-
- Note: based on logic further above, the only possible non-GET
- method at this point is POST. In the future, we should enable
- script delivery for all methods. */
- if (r->method_number != M_GET) {
- req_cfg = ap_get_module_config(r->request_config, &core_module);
- if (!req_cfg->deliver_script) {
- /* The flag hasn't been set for this request. Punt. */
- ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
- "This resource does not accept the %s "
- "method.",
- r->method);
- return HTTP_METHOD_NOT_ALLOWED;
- }
- }
-
- d = (core_dir_config *)ap_get_module_config(r->per_dir_config,
- &core_module);
- bld_content_md5 = (d->content_md5 & 1)
- && r->output_filters->frec->ftype != AP_FTYPE_RESOURCE;
-
- if ((errstatus = ap_discard_request_body(r)) != OK) {
- return errstatus;
- }
-
- if (r->finfo.filetype == 0) {
- ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
- "File does not exist: %s", r->filename);
- return HTTP_NOT_FOUND;
- }
-
- if ((r->used_path_info != AP_REQ_ACCEPT_PATH_INFO) &&
- r->path_info && *r->path_info)
- {
- /* default to reject */
- ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
- "File does not exist: %s",
- apr_pstrcat(r->pool, r->filename, r->path_info,
- NULL));
- return HTTP_NOT_FOUND;
- }
-
- ap_update_mtime (r, r->finfo.mtime);
- ap_set_last_modified (r);
- ap_set_etag (r);
- apr_table_setn (r->headers_out, "Accept-Ranges", "bytes");
-
- num_ranges = ap_set_byterange(r);
- if (num_ranges == 0) {
- size = r->finfo.size;
- } else {
- char *tmp = apr_pstrdup (r->pool, r->range);
- while ((current = ap_getword(r->pool, (const char **)&tmp, ','))
- && (rv = parse_byterange(current, r->finfo.size,
- &range_start, &range_end))) {
- size += (range_end - range_start);
- }
- }
-
- ap_set_content_length (r, size);
-
- if ((errstatus = ap_meets_conditions(r)) != OK) {
- r->status = errstatus;
- }
-
- /*
- * file is small enough to have already got the content in
- * glusterfs_lookup
- */
- if (r->finfo.size <= dir_config->xattr_file_size && dir_config->buf) {
- if (bld_content_md5) {
- apr_table_setn (r->headers_out, "Content-MD5",
- (const char *)ap_md5_binary(r->pool,
- dir_config->buf
- , r->finfo.size));
- }
-
- ap_log_rerror (APLOG_MARK, APLOG_NOTICE, 0, r,
- "fetching data from glusterfs through xattr "
- "interface\n");
-
- bb = apr_brigade_create(r->pool, c->bucket_alloc);
-
- e = apr_bucket_heap_create (dir_config->buf, r->finfo.size,
- free, c->bucket_alloc);
- APR_BRIGADE_INSERT_TAIL (bb, e);
-
- e = apr_bucket_eos_create(c->bucket_alloc);
- APR_BRIGADE_INSERT_TAIL(bb, e);
-
- dir_config->buf = NULL;
-
- /* let the byterange_filter handle multipart requests */
- status = ap_pass_brigade(r->output_filters, bb);
- if (status == APR_SUCCESS
- || r->status != HTTP_OK
- || c->aborted) {
- return OK;
- }
- else {
- /* no way to know what type of error occurred */
- ap_log_rerror(APLOG_MARK, APLOG_DEBUG, status, r,
- "mod_glfs_handler: ap_pass_brigade "
- "returned %i",
- status);
- return HTTP_INTERNAL_SERVER_ERROR;
- }
- }
-
- /* do standard open/read/close to fetch content */
- path = r->uri;
-
- fd = glusterfs_open (path, O_RDONLY, 0);
- if (fd == 0) {
- ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
- "file permissions deny server access: %s",
- r->filename);
- return HTTP_FORBIDDEN;
- }
-
- /*
- * byterange_filter cannot handle range requests, since we are not
- * sending the whole data in a single brigade
- */
-
-
- if (num_ranges == 0) {
- mod_glfs_read_async (r, NULL, fd, 0, -1);
- } else {
- mod_glfs_handle_byte_ranges (r, fd, num_ranges);
- }
-
- glusterfs_close (fd);
-}
-
-
-#if 0
-static apr_status_t
-mod_glfs_output_filter (ap_filter_t *f,
- apr_bucket_brigade *b)
-{
- size_t size = 0;
- apr_bucket_t *e = NULL;
- size = atol (apr_table_get (r->notes, MOD_GLFS_SIZE));
-
- for (e = APR_BRIGADE_FIRST(b);
- e != APR_BRIGADE_SENTINEL(b);
- e = APR_BUCKET_NEXT(e))
- {
- /* FIXME: can there be more than one heap buckets? */
- if (e->type == &apr_bucket_type_heap) {
- break;
- }
- }
-
- if (e != APR_BRIGADE_SENTINEL(b)) {
- e->length = size;
- }
-
- return ap_pass_brigade (f->next, b);
-}
-#endif
-
-static int
-mod_glfs_fixup_dir(request_rec *r)
-{
- glusterfs_dir_config_t *d = NULL;
- char *dummy_ptr[1];
- char **names_ptr = NULL, *name_ptr = NULL;
- int num_names;
- int error_notfound = 0;
- char *ifile = NULL;
- request_rec *rr = NULL;
-
- /* only handle requests against directories */
- if (r->finfo.filetype != APR_DIR) {
- return DECLINED;
- }
-
- if (!r->handler || strcmp (r->handler, GLUSTERFS_HANDLER)) {
- return DECLINED;
- }
-
- /* Never tolerate path_info on dir requests */
- if (r->path_info && *r->path_info) {
- return DECLINED;
- }
-
- d = (glusterfs_dir_config_t *)ap_get_module_config(r->per_dir_config,
- &glusterfs_module);
-
- /* Redirect requests that are not '/' terminated */
- if (r->uri[0] == '\0' || r->uri[strlen(r->uri) - 1] != '/')
- {
- if (!d->do_slash) {
- return DECLINED;
- }
-
- /* Only redirect non-get requests if we have no note to warn
- * that this browser cannot handle redirs on non-GET requests
- * (such as Microsoft's WebFolders).
- */
- if ((r->method_number != M_GET)
- && apr_table_get(r->subprocess_env, "redirect-carefully")) {
- return DECLINED;
- }
-
- if (r->args != NULL) {
- ifile = apr_pstrcat(r->pool, ap_escape_uri(r->pool,
- r->uri),
- "/", "?", r->args, NULL);
- }
- else {
- ifile = apr_pstrcat(r->pool, ap_escape_uri(r->pool,
- r->uri),
- "/", NULL);
- }
-
- apr_table_setn(r->headers_out, "Location",
- ap_construct_url(r->pool, ifile, r));
- return HTTP_MOVED_PERMANENTLY;
- }
-
- if (d->index_names) {
- names_ptr = (char **)d->index_names->elts;
- num_names = d->index_names->nelts;
- }
- else {
- dummy_ptr[0] = AP_DEFAULT_INDEX;
- names_ptr = dummy_ptr;
- num_names = 1;
- }
-
- for (; num_names; ++names_ptr, --num_names) {
- /* XXX: Is this name_ptr considered escaped yet, or not??? */
- name_ptr = *names_ptr;
-
- /* Once upon a time args were handled _after_ the successful
- * redirect. But that redirect might then _refuse_ the
- * given r->args, creating a nasty tangle. It seems safer to
- * consider the r->args while we determine if name_ptr is our
- * viable index, and therefore set them up correctly on redirect.
- */
- if (r->args != NULL) {
- name_ptr = apr_pstrcat(r->pool, name_ptr, "?", r->args,
- NULL);
- }
-
- rr = ap_sub_req_lookup_uri(name_ptr, r, NULL);
-
- /* The sub request lookup is very liberal, and the core
- * map_to_storage handler will almost always result in HTTP_OK
- * as /foo/index.html may be /foo with PATH_INFO="/index.html",
- * or even / with PATH_INFO="/foo/index.html". To get around
- * this we insist that the the index be a regular filetype.
- *
- * Another reason is that the core handler also makes the
- * assumption that if r->finfo is still NULL by the time it
- * gets called, the file does not exist.
- */
- if (rr->status == HTTP_OK
- && ( (rr->handler && !strcmp(rr->handler, "proxy-server"))
- || rr->finfo.filetype == APR_REG)) {
- ap_internal_fast_redirect(rr, r);
- return OK;
- }
-
- /* If the request returned a redirect, propagate it to the
- * client
- */
-
- if (ap_is_HTTP_REDIRECT(rr->status)
- || (rr->status == HTTP_NOT_ACCEPTABLE && num_names == 1)
- || (rr->status == HTTP_UNAUTHORIZED && num_names == 1)) {
-
- apr_pool_join(r->pool, rr->pool);
- error_notfound = rr->status;
- r->notes = apr_table_overlay(r->pool, r->notes,
- rr->notes);
- r->headers_out = apr_table_overlay(r->pool,
- r->headers_out,
- rr->headers_out);
- r->err_headers_out = apr_table_overlay(r->pool,
- r->err_headers_out,
- rr->err_headers_out);
- return error_notfound;
- }
-
- /* If the request returned something other than 404 (or 200),
- * it means the module encountered some sort of problem. To be
- * secure, we should return the error, rather than allow
- * autoindex to create a (possibly unsafe) directory index.
- *
- * So we store the error, and if none of the listed files
- * exist, we return the last error response we got, instead
- * of a directory listing.
- */
- if (rr->status && rr->status != HTTP_NOT_FOUND
- && rr->status != HTTP_OK) {
- error_notfound = rr->status;
- }
-
- ap_destroy_sub_req(rr);
- }
-
- if (error_notfound) {
- return error_notfound;
- }
-
- /* nothing for us to do, pass on through */
- return DECLINED;
-}
-
-
-static void
-mod_glfs_register_hooks(apr_pool_t *p)
-{
- ap_hook_child_init (mod_glfs_child_init, NULL, NULL, APR_HOOK_MIDDLE);
- ap_hook_handler (mod_glfs_handler, NULL, NULL, APR_HOOK_REALLY_FIRST);
- ap_hook_map_to_storage (mod_glfs_map_to_storage, NULL, NULL,
- APR_HOOK_REALLY_FIRST);
- ap_hook_fixups(mod_glfs_fixup_dir,NULL,NULL,APR_HOOK_LAST);
-
-/* mod_glfs_output_filter_handle =
- ap_register_output_filter ("MODGLFS", mod_glfs_output_filter,
- NULL, AP_FTYPE_PROTOCOL); */
-}
-
-static const char *
-cmd_add_index (cmd_parms *cmd, void *dummy, const char *arg)
-{
- glusterfs_dir_config_t *d = dummy;
-
- if (!d->index_names) {
- d->index_names = apr_array_make(cmd->pool, 2, sizeof(char *));
- }
- *(const char **)apr_array_push(d->index_names) = arg;
- return NULL;
-}
-
-static const char *
-cmd_configure_slash (cmd_parms *cmd, void *d_, int arg)
-{
- glusterfs_dir_config_t *d = d_;
-
- d->do_slash = arg ? SLASH_ON : SLASH_OFF;
- return NULL;
-}
-
-#define DIR_CMD_PERMS OR_INDEXES
-
-static const
-command_rec mod_glfs_cmds[] =
-{
- AP_INIT_TAKE1(
- "GlusterfsLogfile",
- cmd_add_logfile,
- NULL,
- ACCESS_CONF, /*FIXME: allow overriding in .htaccess files */
- "Glusterfs logfile"
- ),
-
- AP_INIT_TAKE1(
- "GlusterfsLoglevel",
- cmd_set_loglevel,
- NULL,
- ACCESS_CONF,
- "Glusterfs loglevel:anyone of none, critical, error, warning, "
- "debug"
- ),
-
- AP_INIT_TAKE1(
- "GlusterfsCacheTimeout",
- cmd_set_cache_timeout,
- NULL,
- ACCESS_CONF,
- "Timeout value in seconds for lookup and stat cache of "
- "libglusterfsclient"
- ),
-
- AP_INIT_TAKE1(
- "GlusterfsVolumeSpecfile",
- cmd_add_volume_specfile,
- NULL,
- ACCESS_CONF,
- "Glusterfs Volume specfication file specifying filesystem "
- "under this directory"
- ),
-
- AP_INIT_TAKE1(
- "GlusterfsXattrFileSize",
- cmd_add_xattr_file_size,
- NULL,
- ACCESS_CONF,
- "Maximum size of the file that can be fetched through "
- "extended attribute interface of libglusterfsclient"
- ),
-
- /* mod_dir cmds */
- AP_INIT_ITERATE("DirectoryIndex", cmd_add_index,
- NULL, DIR_CMD_PERMS,
- "a list of file names"),
-
- AP_INIT_FLAG("DirectorySlash", cmd_configure_slash,
- NULL, DIR_CMD_PERMS,
- "On or Off"),
-
- /* autoindex cmds */
- AP_INIT_ITERATE2("AddIcon", cmd_add_icon,
- BY_PATH, DIR_CMD_PERMS,
- "an icon URL followed by one or more filenames"),
-
- AP_INIT_ITERATE2("AddIconByType", cmd_add_icon,
- BY_TYPE, DIR_CMD_PERMS,
- "an icon URL followed by one or more MIME types"),
-
- AP_INIT_ITERATE2("AddIconByEncoding", cmd_add_icon,
- BY_ENCODING, DIR_CMD_PERMS,
- "an icon URL followed by one or more content encodings"),
-
- AP_INIT_ITERATE2("AddAlt", cmd_add_alt, BY_PATH,
- DIR_CMD_PERMS,
- "alternate descriptive text followed by one or more "
- "filenames"),
-
- AP_INIT_ITERATE2("AddAltByType", cmd_add_alt,
- BY_TYPE, DIR_CMD_PERMS,
- "alternate descriptive text followed by one or more "
- "MIME types"),
-
- AP_INIT_ITERATE2("AddAltByEncoding", cmd_add_alt,
- BY_ENCODING, DIR_CMD_PERMS,
- "alternate descriptive text followed by one or more "
- "content encodings"),
-
- AP_INIT_TAKE_ARGV("IndexOptions", cmd_add_opts,
- NULL, DIR_CMD_PERMS,
- "one or more index options [+|-][]"),
-
- AP_INIT_TAKE2("IndexOrderDefault", cmd_set_default_order,
- NULL, DIR_CMD_PERMS,
- "{Ascending,Descending} {Name,Size,Description,Date}"),
-
- AP_INIT_ITERATE("IndexIgnore", cmd_add_ignore,
- NULL, DIR_CMD_PERMS,
- "one or more file extensions"),
-
- AP_INIT_ITERATE2("AddDescription", cmd_add_desc,
- BY_PATH, DIR_CMD_PERMS,
- "Descriptive text followed by one or more filenames"),
-
- AP_INIT_TAKE1("HeaderName", cmd_add_header,
- NULL, DIR_CMD_PERMS,
- "a filename"),
-
- AP_INIT_TAKE1("ReadmeName", cmd_add_readme,
- NULL, DIR_CMD_PERMS,
- "a filename"),
-
- AP_INIT_RAW_ARGS("FancyIndexing", ap_set_deprecated,
- NULL, OR_ALL,
- "The FancyIndexing directive is no longer supported. "
- "Use IndexOptions FancyIndexing."),
-
- AP_INIT_TAKE1("DefaultIcon", ap_set_string_slot,
- (void *)APR_OFFSETOF(glusterfs_dir_config_t,
- default_icon),
- DIR_CMD_PERMS, "an icon URL"),
-
- AP_INIT_TAKE1("IndexStyleSheet", ap_set_string_slot,
- (void *)APR_OFFSETOF(glusterfs_dir_config_t, style_sheet),
- DIR_CMD_PERMS, "URL to style sheet"),
-
- {NULL}
-};
-
-module AP_MODULE_DECLARE_DATA glusterfs_module =
-{
- STANDARD20_MODULE_STUFF,
- mod_glfs_create_dir_config,
- mod_glfs_merge_dir_config,
- NULL, //mod_glfs_create_server_config,
- NULL, //mod_glfs_merge_server_config,
- mod_glfs_cmds,
- mod_glfs_register_hooks,
-};
diff --git a/mod_glusterfs/apache/Makefile.am b/mod_glusterfs/apache/Makefile.am
deleted file mode 100644
index bda039310..000000000
--- a/mod_glusterfs/apache/Makefile.am
+++ /dev/null
@@ -1,10 +0,0 @@
-SUBDIRS = $(MOD_GLUSTERFS_HTTPD_VERSION)
-
-EXTRA_DIST = 1.3/Makefile.am 1.3/Makefile.in \
- 1.3/src/Makefile.am 1.3/src/Makefile.in \
- 1.3/src/mod_glusterfs.c \
- 1.3/src/README.txt \
- 2.2/Makefile.am 2.2/Makefile.in \
- 2.2/src/Makefile.am 2.2/src/Makefile.in \
- 2.2/src/mod_glusterfs.c
-CLEANFILES =
diff --git a/mod_glusterfs/lighttpd/1.4/Makefile.am b/mod_glusterfs/lighttpd/1.4/Makefile.am
deleted file mode 100644
index eda329111..000000000
--- a/mod_glusterfs/lighttpd/1.4/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-EXTRA_DIST = Makefile.am.diff mod_glusterfs.c mod_glusterfs.h README.txt
-
-CLEANFILES =
diff --git a/mod_glusterfs/lighttpd/1.4/Makefile.am.diff b/mod_glusterfs/lighttpd/1.4/Makefile.am.diff
deleted file mode 100644
index 375696b5d..000000000
--- a/mod_glusterfs/lighttpd/1.4/Makefile.am.diff
+++ /dev/null
@@ -1,29 +0,0 @@
---- lighttpd-1.4.19/src/Makefile.am 2008-04-16 18:42:18.000000000 +0400
-+++ lighttpd-1.4.19.mod/src/Makefile.am 2008-04-16 18:41:11.000000000 +0400
-@@ -1,4 +1,4 @@
--AM_CFLAGS = $(FAM_CFLAGS)
-+AM_CFLAGS = $(FAM_CFLAGS) -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64
-
- noinst_PROGRAMS=proc_open lemon # simple-fcgi #graphic evalo bench ajp ssl error_test adserver gen-license
- sbin_PROGRAMS=lighttpd lighttpd-angel
-@@ -241,6 +241,11 @@
- mod_accesslog_la_LDFLAGS = -module -export-dynamic -avoid-version -no-undefined
- mod_accesslog_la_LIBADD = $(common_libadd)
-
-+lib_LTLIBRARIES += mod_glusterfs.la
-+mod_glusterfs_la_SOURCES = mod_glusterfs.c
-+mod_glusterfs_la_CFLAGS = $(AM_CFLAGS)
-+mod_glusterfs_la_LDFLAGS = -module -export-dynamic -avoid-version -no-undefined -lglusterfsclient -lpthread
-+mod_glusterfs_la_LIBADD = $(common_libadd)
-
- hdr = server.h buffer.h network.h log.h keyvalue.h \
- response.h request.h fastcgi.h chunk.h \
-@@ -254,7 +259,7 @@
- configparser.h mod_ssi_exprparser.h \
- sys-mmap.h sys-socket.h mod_cml.h mod_cml_funcs.h \
- splaytree.h proc_open.h status_counter.h \
-- mod_magnet_cache.h
-+ mod_magnet_cache.h mod_glusterfs.h
-
- DEFS= @DEFS@ -DLIBRARY_DIR="\"$(libdir)\"" -DSBIN_DIR="\"$(sbindir)\""
-
diff --git a/mod_glusterfs/lighttpd/1.4/README.txt b/mod_glusterfs/lighttpd/1.4/README.txt
deleted file mode 100644
index 786a146e4..000000000
--- a/mod_glusterfs/lighttpd/1.4/README.txt
+++ /dev/null
@@ -1,57 +0,0 @@
-Introduction
-============
-mod_glusterfs is a module written for lighttpd to speed up the access of files present on glusterfs. mod_glusterfs uses libglusterfsclient library provided for glusterfs and hence can be used without fuse (File System in User Space).
-
-Usage
-=====
-To use mod_glusterfs with lighttpd-1.4, copy mod_glusterfs.c and mod_glusterfs.h into src/ of lighttpd-1.4 source tree, and apply the Makefile.am.diff to src/Makefile.am. Re-run ./autogen.sh on the top level of the lighttpd-1.4 build tree and recompile.
-
-# cp mod_glusterfs.[ch] /home/glusterfs/lighttpd-1.4/src/
-# cp Makefile.am.diff /home/glusterfs/lighttpd-1.4/
-# cd /home/glusterfs/lighttpd-1.4
-# patch -p1 < Makefile.am.diff
-# ./autogen.sh
-# ./configure
-# make
-# make install
-
-Configuration
-=============
-* mod_glusterfs should be listed at the begining of the list server.modules in lighttpd.conf.
-
-Below is a snippet from lighttpd.conf concerning to mod_glusterfs.
-
-$HTTP["url"] =~ "^/glusterfs" {
- glusterfs.prefix = "/glusterfs"
- glusterfs.document-root = "/home/glusterfs/document-root"
- glusterfs.logfile = "/var/log/glusterfs-logfile"
- glusterfs.volume-specfile = "/etc/glusterfs/glusterfs.vol"
- glusterfs.loglevel = "error"
- glusterfs.cache-timeout = 300
- glusterfs.xattr-interface-size-limit = "65536"
-}
-
-* $HTTP["url"] =~ "^/glusterfs"
- A perl style regular expression used to match against the url. If regular expression matches the url, the url is handled by mod_glusterfs. Note that the pattern given here should match glusterfs.prefix.
-
-* glusterfs.prefix (COMPULSORY)
- A string to be present at the starting of the file path in the url so that the file would be handled by glusterfs.
- Eg., A GET request on the url http://www.example.com/glusterfs-prefix/some-dir/example-file will result in fetching of the file "/some-dir/example-file" from glusterfs mount if glusterfs.prefix is set to "/glusterfs-prefix".
-
-* glusterfs.volume-specfile (COMPULSORY)
- Path to the the glusterfs volume specification file.
-
-* glusterfs.logfile (COMPULSORY)
- Path to the glusterfs logfile.
-
-* glusterfs.loglevel (OPTIONAL, default = warning)
- Allowed values are critical, error, warning, debug, none in the decreasing order of severity of error conditions.
-
-* glusterfs.cache-timeout (OPTIONAL, default = 0)
- Timeout values for glusterfs stat and lookup cache.
-
-* glusterfs.document-root (COMPULSORY)
- An absolute path, relative to which all the files are fetched from glusterfs.
-
-* glusterfs.xattr-interface-size-limit (OPTIONAL, default = 0)
- Files with sizes upto and including this value are fetched through the extended attribute interface of glusterfs rather than the usual open-read-close set of operations. For files of small sizes, it is recommended to use extended attribute interface.
diff --git a/mod_glusterfs/lighttpd/1.4/mod_glusterfs.c b/mod_glusterfs/lighttpd/1.4/mod_glusterfs.c
deleted file mode 100644
index d565dce28..000000000
--- a/mod_glusterfs/lighttpd/1.4/mod_glusterfs.c
+++ /dev/null
@@ -1,1820 +0,0 @@
-/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include <ctype.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <pthread.h>
-#include <sys/types.h>
-#include <fcntl.h>
-
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include <errno.h>
-#include <unistd.h>
-#include <assert.h>
-
-#include "base.h"
-#include "log.h"
-#include "buffer.h"
-
-#include "plugin.h"
-
-#include "stat_cache.h"
-#include "mod_glusterfs.h"
-#include "etag.h"
-#include "http_chunk.h"
-#include "response.h"
-
-#include "fdevent.h"
-#include <libglusterfsclient.h>
-
-#ifdef HAVE_ATTR_ATTRIBUTES_H
-#include <attr/attributes.h>
-#endif
-
-#ifdef HAVE_FAM_H
-# include <fam.h>
-#endif
-
-#include "sys-mmap.h"
-
-/* NetBSD 1.3.x needs it */
-#ifndef MAP_FAILED
-# define MAP_FAILED -1
-#endif
-
-#ifndef O_LARGEFILE
-# define O_LARGEFILE 0
-#endif
-
-#ifndef HAVE_LSTAT
-#define lstat stat
-#endif
-
-#if 0
-/*
- enables debug code for testing if all nodes in the stat-cache as accessable
-*/
-#define DEBUG_STAT_CACHE
-#endif
-
-#ifdef HAVE_LSTAT
-#undef HAVE_LSTAT
-#endif
-
-#define GLUSTERFS_FILE_CHUNK (FILE_CHUNK + 1)
-
-/*
- Keep this value large. Each glusterfs_async_read of GLUSTERFS_CHUNK_SIZE
- results in a network_backend_write of the read data
-*/
-
-#define GLUSTERFS_CHUNK_SIZE 8192
-
-/**
- * this is a staticfile for a lighttpd plugin
- *
- */
-
-typedef struct glusterfs_async_local {
- int op_ret;
- int op_errno;
- char async_read_complete;
- off_t length;
- size_t read_bytes;
- glusterfs_iobuf_t *buf;
- pthread_mutex_t lock;
- pthread_cond_t cond;
-} glusterfs_async_local_t;
-
-
-typedef struct {
- glusterfs_file_t fd;
- void *buf;
- buffer *glusterfs_path;
- /* off_t response_content_length; */
- int prefix;
-}mod_glusterfs_ctx_t;
-
-/* plugin config for all request/connections */
-typedef struct {
- buffer *logfile;
- buffer *loglevel;
- buffer *specfile;
- buffer *prefix;
- buffer *xattr_file_size;
- buffer *document_root;
- array *exclude_exts;
- unsigned short cache_timeout;
- char mounted;
-} plugin_config;
-
-static int (*network_backend_write)(struct server *srv, connection *con, int fd,
- chunkqueue *cq);
-
-typedef struct {
- PLUGIN_DATA;
- buffer *range_buf;
- plugin_config **config_storage;
-
- plugin_config conf;
-} plugin_data;
-
-typedef struct {
- chunkqueue *cq;
- glusterfs_iobuf_t *buf;
- size_t length;
-}mod_glusterfs_chunkqueue;
-
-#ifdef HAVE_FAM_H
-typedef struct {
- FAMRequest *req;
- FAMConnection *fc;
-
- buffer *name;
-
- int version;
-} fam_dir_entry;
-#endif
-
-/* the directory name is too long to always compare on it
- * - we need a hash
- * - the hash-key is used as sorting criteria for a tree
- * - a splay-tree is used as we can use the caching effect of it
- */
-
-/* we want to cleanup the stat-cache every few seconds, let's say 10
- *
- * - remove entries which are outdated since 30s
- * - remove entries which are fresh but havn't been used since 60s
- * - if we don't have a stat-cache entry for a directory, release it from the
- * monitor
- */
-
-#ifdef DEBUG_STAT_CACHE
-typedef struct {
- int *ptr;
-
- size_t used;
- size_t size;
-} fake_keys;
-
-static fake_keys ctrl;
-#endif
-
-int
-mod_glusterfs_readv_async_cbk (int op_ret, int op_errno,
- glusterfs_iobuf_t *buf,
- void *cbk_data)
-{
- glusterfs_async_local_t *local = cbk_data;
- pthread_mutex_lock (&local->lock);
- {
- local->async_read_complete = 1;
- local->buf = buf;
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- pthread_cond_signal (&local->cond);
- }
- pthread_mutex_unlock (&local->lock);
-
- return 0;
-}
-
-static int
-mod_glusterfs_read_async (server *srv, connection *con, chunk *glusterfs_chunk)
-{
- glusterfs_async_local_t local;
- off_t end = 0;
- int nbytes;
- int complete;
- chunkqueue *cq = NULL;
- chunk *c = NULL;
- off_t offset = glusterfs_chunk->file.start;
- size_t length = glusterfs_chunk->file.length;
- glusterfs_file_t fd = glusterfs_chunk->file.name;
-
- pthread_cond_init (&local.cond, NULL);
- pthread_mutex_init (&local.lock, NULL);
-
- //local.fd = fd;
- memset (&local, 0, sizeof (local));
-
- if (length > 0)
- end = offset + length;
-
- cq = chunkqueue_init ();
- if (!cq) {
- con->http_status = 500;
- return HANDLER_FINISHED;
- }
-
- do {
- glusterfs_iobuf_t *buf;
- int i;
- if (length > 0) {
- nbytes = end - offset;
- if (nbytes > GLUSTERFS_CHUNK_SIZE)
- nbytes = GLUSTERFS_CHUNK_SIZE;
- } else
- nbytes = GLUSTERFS_CHUNK_SIZE;
-
- glusterfs_read_async(fd,
- nbytes,
- offset,
- mod_glusterfs_readv_async_cbk,
- (void *)&local);
-
- pthread_mutex_lock (&local.lock);
- {
- while (!local.async_read_complete) {
- pthread_cond_wait (&local.cond, &local.lock);
- }
-
- local.async_read_complete = 0;
- buf = local.buf;
-
- if ((int)length < 0)
- complete = (local.op_ret <= 0);
- else {
- local.read_bytes += local.op_ret;
- complete = ((local.read_bytes == length)
- || (local.op_ret <= 0));
- }
- }
- pthread_mutex_unlock (&local.lock);
-
- if (local.op_ret > 0) {
- unsigned long check = 0;
- for (i = 0; i < buf->count; i++) {
- buffer *nw_write_buf = buffer_init ();
-
- check += buf->vector[i].iov_len;
-
- nw_write_buf->used = buf->vector[i].iov_len + 1;
- nw_write_buf->size = buf->vector[i].iov_len;
- nw_write_buf->ptr = buf->vector[i].iov_base;
-
- offset += local.op_ret;
- chunkqueue_append_buffer_weak(cq, nw_write_buf);
- }
-
- network_backend_write (srv, con, con->fd, cq);
-
- if (chunkqueue_written (cq) != local.op_ret) {
- mod_glusterfs_chunkqueue *gf_cq;
- glusterfs_chunk->file.start = offset;
- if ((int)glusterfs_chunk->file.length > 0)
- glusterfs_chunk->file.length -= local.read_bytes;
-
- gf_cq = calloc (1, sizeof (*gf_cq));
- /* ERR_ABORT (gf_cq); */
- gf_cq->cq = cq;
- gf_cq->buf = buf;
- gf_cq->length = local.op_ret;
- glusterfs_chunk->file.mmap.start =(char *)gf_cq;
- return local.read_bytes;
- }
-
- for (c = cq->first ; c; c = c->next)
- c->mem->ptr = NULL;
-
- chunkqueue_reset (cq);
- }
-
- glusterfs_free (buf);
- } while (!complete);
-
- chunkqueue_free (cq);
- glusterfs_close (fd);
-
- if (local.op_ret < 0)
- con->http_status = 500;
-
- return (local.op_ret < 0 ? HANDLER_FINISHED : HANDLER_GO_ON);
-}
-
-int mod_glusterfs_network_backend_write(struct server *srv, connection *con,
- int fd, chunkqueue *cq)
-{
- chunk *c, *prev, *first;
- int chunks_written = 0;
- int error = 0;
-
- for (first = prev = c = cq->first; c; c = c->next, chunks_written++) {
-
- if (c->type == MEM_CHUNK && c->mem->used && !c->mem->ptr) {
- if (cq->first != c) {
- prev->next = NULL;
-
- /* call stored network_backend_write */
- network_backend_write (srv, con, fd, cq);
-
- prev->next = c;
- }
- cq->first = c->next;
-
- if (c->file.fd < 0) {
- error = HANDLER_ERROR;
- break;
- }
-
- if (c->file.mmap.start) {
- chunk *tmp;
- mod_glusterfs_chunkqueue *gf_cq = NULL;
-
- gf_cq = (mod_glusterfs_chunkqueue *)c->file.mmap.start;
-
- network_backend_write (srv, con, fd, gf_cq->cq);
-
- if ((size_t)chunkqueue_written (gf_cq->cq)
- != gf_cq->length) {
- cq->first = first;
- return chunks_written;
- }
- for (tmp = gf_cq->cq->first ; tmp;
- tmp = tmp->next)
- tmp->mem->ptr = NULL;
-
- chunkqueue_free (gf_cq->cq);
- glusterfs_free (gf_cq->buf);
- free (gf_cq);
- c->file.mmap.start = NULL;
- }
-
- mod_glusterfs_read_async (srv, con, c);
- if (c->file.mmap.start) {
- /* pending chunkqueue from
- mod_glusterfs_read_async to be written to
- network */
- cq->first = first;
- return chunks_written;
- }
-
- buffer_free (c->mem);
- c->mem = NULL;
-
- c->type = FILE_CHUNK;
- c->offset = c->file.length = 0;
- c->file.name = NULL;
-
- if (first == c)
- first = c->next;
-
- if (cq->last == c)
- cq->last = NULL;
-
- prev->next = c->next;
-
- free(c);
- }
- prev = c;
- }
-
- network_backend_write (srv, con, fd, cq);
-
- cq->first = first;
-
- return chunks_written;
-}
-
-int chunkqueue_append_glusterfs_file (connection *con, glusterfs_file_t fd,
- off_t offset, size_t len, size_t buf_size)
-{
- chunk *c = NULL;
- c = chunkqueue_get_append_tempfile (con->write_queue);
-
- if (c->file.is_temp) {
- close (c->file.fd);
- unlink (c->file.name->ptr);
- }
-
- c->type = MEM_CHUNK;
-
- buffer_free (c->mem);
-
- c->mem = buffer_init ();
- c->mem->used = len + 1;
- c->mem->size = buf_size;
- c->mem->ptr = NULL;
- c->offset = 0;
-
- buffer_free (c->file.name);
-
- /* fd returned by libglusterfsclient is a pointer */
- c->file.name = (buffer *)fd;
- c->file.start = offset;
- c->file.length = len;
-
- //c->file.fd = fd;
- c->file.mmap.start = NULL;
- return 0;
-}
-
-/* init the plugin data */
-INIT_FUNC(mod_glusterfs_init) {
- plugin_data *p;
-
- p = calloc(1, sizeof(*p));
- network_backend_write = NULL;
-
- return p;
-}
-
-/* detroy the plugin data */
-FREE_FUNC(mod_glusterfs_free) {
- plugin_data *p = p_d;
-
- UNUSED (srv);
-
- if (!p) return HANDLER_GO_ON;
-
- if (p->config_storage) {
- size_t i;
- for (i = 0; i < srv->config_context->used; i++) {
- plugin_config *s = p->config_storage[i];
-
- buffer_free (s->logfile);
- buffer_free (s->loglevel);
- buffer_free (s->specfile);
- buffer_free (s->prefix);
- buffer_free (s->xattr_file_size);
- buffer_free (s->document_root);
- array_free (s->exclude_exts);
-
- free (s);
- }
- free (p->config_storage);
- }
- buffer_free (p->range_buf);
-
- free (p);
-
- return HANDLER_GO_ON;
-}
-
-SETDEFAULTS_FUNC(mod_glusterfs_set_defaults) {
- plugin_data *p = p_d;
- size_t i = 0;
-
- config_values_t cv[] = {
- { "glusterfs.logfile", NULL, T_CONFIG_STRING,
- T_CONFIG_SCOPE_CONNECTION },
-
- { "glusterfs.loglevel", NULL, T_CONFIG_STRING,
- T_CONFIG_SCOPE_CONNECTION },
-
- { "glusterfs.volume-specfile", NULL, T_CONFIG_STRING,
- T_CONFIG_SCOPE_CONNECTION },
-
- { "glusterfs.cache-timeout", NULL, T_CONFIG_SHORT,
- T_CONFIG_SCOPE_CONNECTION },
-
- { "glusterfs.exclude-extensions", NULL, T_CONFIG_ARRAY,
- T_CONFIG_SCOPE_CONNECTION },
-
- /*TODO: get the prefix from config_conext and
- remove glusterfs.prefix from conf file */
- { "glusterfs.prefix", NULL, T_CONFIG_STRING,
- T_CONFIG_SCOPE_CONNECTION },
-
- { "glusterfs.xattr-interface-size-limit", NULL, T_CONFIG_STRING,
- T_CONFIG_SCOPE_CONNECTION },
-
- { "glusterfs.document-root", NULL, T_CONFIG_STRING,
- T_CONFIG_SCOPE_CONNECTION },
-
- { NULL, NULL, T_CONFIG_UNSET,
- T_CONFIG_SCOPE_UNSET }
- };
-
- p->config_storage = calloc(1,
- srv->config_context->used
- * sizeof(specific_config *));
- /* ERR_ABORT (p->config_storage);*/
- p->range_buf = buffer_init ();
-
- for (i = 0; i < srv->config_context->used; i++) {
- plugin_config *s;
-
- s = calloc(1, sizeof(plugin_config));
- /* ERR_ABORT (s); */
- s->logfile = buffer_init ();
- s->loglevel = buffer_init ();
- s->specfile = buffer_init ();
- s->document_root = buffer_init ();
- s->exclude_exts = array_init ();
- s->prefix = buffer_init ();
- s->xattr_file_size = buffer_init ();
-
- cv[0].destination = s->logfile;
- cv[1].destination = s->loglevel;
- cv[2].destination = s->specfile;
- cv[3].destination = &s->cache_timeout;
- cv[4].destination = s->exclude_exts;
- cv[5].destination = s->prefix;
- cv[6].destination = s->xattr_file_size;
- cv[7].destination = s->document_root;
- p->config_storage[i] = s;
-
- if (0 != config_insert_values_global(srv,
- ((data_config *)srv->config_context->data[i])->value,
- cv)) {
- return HANDLER_FINISHED;
- }
- }
-
- return HANDLER_GO_ON;
-}
-
-#define PATCH(x) \
- p->conf.x = s->x;
-
-static int mod_glusterfs_patch_connection(server *srv, connection *con,
- plugin_data *p) {
- size_t i, j;
- plugin_config *s;
-
- /* skip the first, the global context */
- /*
- glusterfs related config can only occur inside
- $HTTP["url"] == "<glusterfs-prefix>"
- */
- p->conf.logfile = NULL;
- p->conf.loglevel = NULL;
- p->conf.specfile = NULL;
- p->conf.cache_timeout = 0;
- p->conf.exclude_exts = NULL;
- p->conf.prefix = NULL;
- p->conf.xattr_file_size = NULL;
- p->conf.document_root = NULL;
-
- for (i = 1; i < srv->config_context->used; i++) {
- data_config *dc = (data_config *)srv->config_context->data[i];
- s = p->config_storage[i];
-
- /* condition didn't match */
- if (!config_check_cond(srv, con, dc)) continue;
-
- /* merge config */
- for (j = 0; j < dc->value->used; j++) {
- data_unset *du = dc->value->data[j];
-
- if (buffer_is_equal_string (du->key,
- CONST_STR_LEN("glusterfs.logfile"))) {
- PATCH (logfile);
- } else if (buffer_is_equal_string (du->key,
- CONST_STR_LEN("glusterfs.loglevel"))) {
- PATCH (loglevel);
- } else if (buffer_is_equal_string (du->key,
- CONST_STR_LEN ("glusterfs.volume-specfile"))) {
- PATCH (specfile);
- } else if (buffer_is_equal_string (du->key,
- CONST_STR_LEN("glusterfs.cache-timeout"))) {
- PATCH (cache_timeout);
- } else if (buffer_is_equal_string (du->key,
- CONST_STR_LEN ("glusterfs.exclude-extensions"))) {
- PATCH (exclude_exts);
- } else if (buffer_is_equal_string (du->key,
- CONST_STR_LEN ("glusterfs.prefix"))) {
- PATCH (prefix);
- } else if (buffer_is_equal_string (du->key,
- CONST_STR_LEN ("glusterfs.xattr-interface-size-limit"))) {
- PATCH (xattr_file_size);
- } else if (buffer_is_equal_string (du->key,
- CONST_STR_LEN ("glusterfs.document-root"))) {
- PATCH (document_root);
- }
- }
- }
- return 0;
-}
-
-#undef PATCH
-
-static int http_response_parse_range(server *srv, connection *con,
- plugin_data *p) {
- int multipart = 0;
- int error;
- off_t start, end;
- const char *s, *minus;
- char *boundary = "fkj49sn38dcn3";
- data_string *ds;
- stat_cache_entry *sce = NULL;
- buffer *content_type = NULL;
- size_t size = 0;
- mod_glusterfs_ctx_t *ctx = con->plugin_ctx[p->id];
-
- if (p->conf.xattr_file_size && p->conf.xattr_file_size->ptr) {
- size = atoi (p->conf.xattr_file_size->ptr);
- }
-
- if (HANDLER_ERROR == stat_cache_get_entry(srv, con, con->physical.path,
- &sce)) {
- SEGFAULT();
- }
-
- start = 0;
- end = sce->st.st_size - 1;
-
- con->response.content_length = 0;
-
- if (NULL != (ds = (data_string *)array_get_element(con->response.headers,
- "Content-Type"))) {
- content_type = ds->value;
- }
-
- for (s = con->request.http_range, error = 0;
- !error && *s && NULL != (minus = strchr(s, '-')); ) {
- char *err;
- off_t la, le;
-
- if (s == minus) {
- /* -<stop> */
-
- le = strtoll(s, &err, 10);
-
- if (le == 0) {
- /* RFC 2616 - 14.35.1 */
-
- con->http_status = 416;
- error = 1;
- } else if (*err == '\0') {
- /* end */
- s = err;
-
- end = sce->st.st_size - 1;
- start = sce->st.st_size + le;
- } else if (*err == ',') {
- multipart = 1;
- s = err + 1;
-
- end = sce->st.st_size - 1;
- start = sce->st.st_size + le;
- } else {
- error = 1;
- }
-
- } else if (*(minus+1) == '\0' || *(minus+1) == ',') {
- /* <start>- */
-
- la = strtoll(s, &err, 10);
-
- if (err == minus) {
- /* ok */
-
- if (*(err + 1) == '\0') {
- s = err + 1;
-
- end = sce->st.st_size - 1;
- start = la;
-
- } else if (*(err + 1) == ',') {
- multipart = 1;
- s = err + 2;
-
- end = sce->st.st_size - 1;
- start = la;
- } else {
- error = 1;
- }
- } else {
- /* error */
- error = 1;
- }
- } else {
- /* <start>-<stop> */
-
- la = strtoll(s, &err, 10);
-
- if (err == minus) {
- le = strtoll(minus+1, &err, 10);
-
- /* RFC 2616 - 14.35.1 */
- if (la > le) {
- error = 1;
- }
-
- if (*err == '\0') {
- /* ok, end*/
- s = err;
-
- end = le;
- start = la;
- } else if (*err == ',') {
- multipart = 1;
- s = err + 1;
-
- end = le;
- start = la;
- } else {
- /* error */
-
- error = 1;
- }
- } else {
- /* error */
-
- error = 1;
- }
- }
-
- if (!error) {
- if (start < 0) start = 0;
-
- /* RFC 2616 - 14.35.1 */
- if (end > sce->st.st_size - 1) end = sce->st.st_size - 1;
-
- if (start > sce->st.st_size - 1) {
- error = 1;
-
- con->http_status = 416;
- }
- }
-
- if (!error) {
- if (multipart) {
- /* write boundary-header */
- buffer *b;
-
- b = chunkqueue_get_append_buffer(con->write_queue);
-
- buffer_copy_string(b, "\r\n--");
- buffer_append_string(b, boundary);
-
- /* write Content-Range */
- buffer_append_string(b, "\r\nContent-Range: "
- "bytes ");
- buffer_append_off_t(b, start);
- buffer_append_string(b, "-");
- buffer_append_off_t(b, end);
- buffer_append_string(b, "/");
- buffer_append_off_t(b, sce->st.st_size);
-
- buffer_append_string(b, "\r\nContent-Type: ");
- buffer_append_string_buffer(b, content_type);
-
- /* write END-OF-HEADER */
- buffer_append_string(b, "\r\n\r\n");
-
- con->response.content_length += b->used - 1;
-
- }
-
- if ((size_t)sce->st.st_size >= size) {
- chunkqueue_append_glusterfs_file(con, ctx->fd,
- start,
- end - start,
- size);
- } else {
- if (!start) {
- buffer *mem = buffer_init ();
- mem->ptr = ctx->buf;
- mem->used = mem->size = sce->st.st_size;
- http_chunk_append_buffer (srv, con, mem);
- ctx->buf = NULL;
- } else {
- chunkqueue_append_mem (con->write_queue,
- ((char *)ctx->buf)
- + start,
- end - start + 1);
- }
- }
-
- con->response.content_length += end - start + 1;
- }
- }
-
- if (ctx->buf) {
- free (ctx->buf);
- ctx->buf = NULL;
- }
-
- /* something went wrong */
- if (error) return -1;
-
- if (multipart) {
- /* add boundary end */
- buffer *b;
-
- b = chunkqueue_get_append_buffer(con->write_queue);
-
- buffer_copy_string_len(b, "\r\n--", 4);
- buffer_append_string(b, boundary);
- buffer_append_string_len(b, "--\r\n", 4);
-
- con->response.content_length += b->used - 1;
-
- /* set header-fields */
-
- buffer_copy_string(p->range_buf, "multipart/byteranges; boundary=");
- buffer_append_string(p->range_buf, boundary);
-
- /* overwrite content-type */
- response_header_overwrite(srv, con, CONST_STR_LEN("Content-Type"),
- CONST_BUF_LEN(p->range_buf));
- } else {
- /* add Content-Range-header */
-
- buffer_copy_string(p->range_buf, "bytes ");
- buffer_append_off_t(p->range_buf, start);
- buffer_append_string(p->range_buf, "-");
- buffer_append_off_t(p->range_buf, end);
- buffer_append_string(p->range_buf, "/");
- buffer_append_off_t(p->range_buf, sce->st.st_size);
-
- response_header_insert(srv, con, CONST_STR_LEN("Content-Range"),
- CONST_BUF_LEN(p->range_buf));
- }
-
- /* ok, the file is set-up */
- return 0;
-}
-
-PHYSICALPATH_FUNC(mod_glusterfs_handle_physical) {
- plugin_data *p = p_d;
- stat_cache_entry *sce;
- mod_glusterfs_ctx_t *plugin_ctx = NULL;
- size_t size = 0;
- int ret = 0;
-
- if (con->http_status != 0) return HANDLER_GO_ON;
- if (con->uri.path->used == 0) return HANDLER_GO_ON;
- if (con->physical.path->used == 0) return HANDLER_GO_ON;
-
- if (con->mode != DIRECT) return HANDLER_GO_ON;
-
- /*
- network_backend_write = srv->network_backend_write;
- srv->network_backend_write = mod_glusterfs_network_backend_write;
- */
-
- switch (con->request.http_method) {
- case HTTP_METHOD_GET:
- case HTTP_METHOD_POST:
- case HTTP_METHOD_HEAD:
- break;
-
- default:
- return HANDLER_GO_ON;
- }
-
- mod_glusterfs_patch_connection(srv, con, p);
- if (!p->conf.prefix || p->conf.prefix->used == 0) {
- return HANDLER_GO_ON;
- }
-
- if (!p->conf.document_root || p->conf.document_root->used == 0) {
- log_error_write(srv, __FILE__, __LINE__, "s",
- "glusterfs.document-root is not specified");
- con->http_status = 500;
- return HANDLER_FINISHED;
- }
-
- if (!p->conf.mounted) {
- glusterfs_init_params_t ctx;
-
- if (!p->conf.specfile || p->conf.specfile->used == 0) {
- return HANDLER_GO_ON;
- }
- memset (&ctx, 0, sizeof (ctx));
-
- ctx.specfile = p->conf.specfile->ptr;
- ctx.logfile = p->conf.logfile->ptr;
- ctx.loglevel = p->conf.loglevel->ptr;
- ctx.lookup_timeout = ctx.stat_timeout = p->conf.cache_timeout;
-
- ret = glusterfs_mount (p->conf.prefix->ptr, &ctx);
- if (ret != 0) {
- con->http_status = 500;
- log_error_write(srv, __FILE__, __LINE__, "sbs",
- "glusterfs initialization failed, "
- "please check your configuration. "
- "Glusterfs logfile ",
- p->conf.logfile,
- "might contain details");
- return HANDLER_FINISHED;
- }
- p->conf.mounted = 1;
- }
-
- size = 0;
- if (p->conf.xattr_file_size && p->conf.xattr_file_size->ptr)
- size = atoi (p->conf.xattr_file_size->ptr);
-
- if (!con->plugin_ctx[p->id]) {
-/* FIXME: what if multiple files are requested from a single connection? */
-/* TODO: check whether this works fine for HTTP protocol 1.1 */
-
- buffer *tmp_buf = buffer_init_buffer (con->physical.basedir);
-
- plugin_ctx = calloc (1, sizeof (*plugin_ctx));
- /* ERR_ABORT (plugin_ctx); */
- con->plugin_ctx[p->id] = plugin_ctx;
-
- buffer_append_string_buffer (tmp_buf, p->conf.prefix);
- buffer_path_simplify (tmp_buf, tmp_buf);
-
- plugin_ctx->prefix = tmp_buf->used - 1;
- if (tmp_buf->ptr[plugin_ctx->prefix - 1] == '/')
- plugin_ctx->prefix--;
-
- buffer_free (tmp_buf);
- } else
- /*FIXME: error!! error!! */
- plugin_ctx = con->plugin_ctx[p->id];
-
-
- if (size)
- {
- plugin_ctx->buf = malloc (size);
- /* ERR_ABORT (plugin_ctx->buf); */
- }
-
- plugin_ctx->glusterfs_path = buffer_init ();
- buffer_copy_string_buffer (plugin_ctx->glusterfs_path,
- p->conf.prefix);
- buffer_append_string (plugin_ctx->glusterfs_path,
- p->conf.document_root->ptr);
- buffer_append_string (plugin_ctx->glusterfs_path, "/");
- buffer_append_string (plugin_ctx->glusterfs_path,
- con->physical.path->ptr + plugin_ctx->prefix);
- buffer_path_simplify (plugin_ctx->glusterfs_path,
- plugin_ctx->glusterfs_path);
-
- if (glusterfs_stat_cache_get_entry (srv, con,
- plugin_ctx->glusterfs_path,
- con->physical.path, plugin_ctx->buf,
- size, &sce) == HANDLER_ERROR) {
- if (errno == ENOENT)
- con->http_status = 404;
- else
- con->http_status = 403;
-
- free (plugin_ctx->buf);
- buffer_free (plugin_ctx->glusterfs_path);
- plugin_ctx->glusterfs_path = NULL;
- plugin_ctx->buf = NULL;
-
- free (plugin_ctx);
- con->plugin_ctx[p->id] = NULL;
-
- return HANDLER_FINISHED;
- }
-
- if (!(S_ISREG (sce->st.st_mode) && (size_t)sce->st.st_size < size)) {
- free (plugin_ctx->buf);
- plugin_ctx->buf = NULL;
- }
-
- return HANDLER_GO_ON;
-}
-
-static int http_chunk_append_len(server *srv, connection *con, size_t len) {
- size_t i, olen = len, j;
- buffer *b;
-
- b = srv->tmp_chunk_len;
-
- if (len == 0) {
- buffer_copy_string(b, "0");
- } else {
- for (i = 0; i < 8 && len; i++) {
- len >>= 4;
- }
-
- /* i is the number of hex digits we have */
- buffer_prepare_copy(b, i + 1);
-
- for (j = i-1, len = olen; j+1 > 0; j--) {
- b->ptr[j] = (len & 0xf) + (((len & 0xf) <= 9) ?
- '0' : 'a' - 10);
- len >>= 4;
- }
- b->used = i;
- b->ptr[b->used++] = '\0';
- }
-
- buffer_append_string(b, "\r\n");
- chunkqueue_append_buffer(con->write_queue, b);
-
- return 0;
-}
-
-int http_chunk_append_glusterfs_file_chunk(server *srv, connection *con,
- glusterfs_file_t fd, off_t offset,
- off_t len, size_t buf_size) {
- chunkqueue *cq;
-
- if (!con) return -1;
-
- cq = con->write_queue;
-
- if (con->response.transfer_encoding & HTTP_TRANSFER_ENCODING_CHUNKED) {
- http_chunk_append_len(srv, con, len);
- }
-
- chunkqueue_append_glusterfs_file (con, fd, offset, len, buf_size);
-
- if ((con->response.transfer_encoding & HTTP_TRANSFER_ENCODING_CHUNKED)
- && (len > 0)) {
- chunkqueue_append_mem(cq, "\r\n", 2 + 1);
- }
-
- return 0;
-}
-
-int http_chunk_append_glusterfs_mem(server *srv, connection *con,
- char * mem, size_t len,
- size_t buf_size)
-{
- chunkqueue *cq = NULL;
- buffer *buf = NULL;
-
- if (!con) return -1;
-
- cq = con->write_queue;
-
- if (len == 0) {
- free (mem);
- if (con->response.transfer_encoding
- & HTTP_TRANSFER_ENCODING_CHUNKED) {
- chunkqueue_append_mem(cq, "0\r\n\r\n", 5 + 1);
- } else {
- chunkqueue_append_mem(cq, "", 1);
- }
- return 0;
- }
-
- if (con->response.transfer_encoding & HTTP_TRANSFER_ENCODING_CHUNKED) {
- http_chunk_append_len(srv, con, len - 1);
- }
-
- buf = buffer_init ();
-
- buf->used = len + 1;
- buf->size = buf_size;
- buf->ptr = (char *)mem;
- chunkqueue_append_buffer_weak (cq, buf);
-
- if (con->response.transfer_encoding & HTTP_TRANSFER_ENCODING_CHUNKED) {
- chunkqueue_append_mem(cq, "\r\n", 2 + 1);
- }
-
- return 0;
-}
-
-
-
-URIHANDLER_FUNC(mod_glusterfs_subrequest) {
- plugin_data *p = p_d;
- stat_cache_entry *sce = NULL;
- int s_len;
- char allow_caching = 1;
- size_t size = 0;
- mod_glusterfs_ctx_t *ctx = con->plugin_ctx[p->id];
-
- /* someone else has done a decision for us */
- if (con->http_status != 0) return HANDLER_GO_ON;
- if (con->uri.path->used == 0) return HANDLER_GO_ON;
- if (con->physical.path->used == 0) return HANDLER_GO_ON;
-
- /* someone else has handled this request */
- if (con->mode != DIRECT) return HANDLER_GO_ON;
-
- /* we only handle GET, POST and HEAD */
- switch(con->request.http_method) {
- case HTTP_METHOD_GET:
- case HTTP_METHOD_POST:
- case HTTP_METHOD_HEAD:
- break;
- default:
- return HANDLER_GO_ON;
- }
-
- mod_glusterfs_patch_connection(srv, con, p);
-
- if (!p->conf.prefix || !p->conf.prefix->used)
- return HANDLER_GO_ON;
-
- s_len = con->uri.path->used - 1;
- /* ignore certain extensions */
- /*
- for (k = 0; k < p->conf.exclude_exts->used; k++) {
- data_string *ds;
- ds = (data_string *)p->conf.exclude_exts->data[k];
-
- if (ds->value->used == 0) continue;
-
- if (!strncmp (ds->value->ptr, con->uri.path->ptr,
- strlen (ds->value->ptr)))
- break;
- }
-
- if (k == p->conf.exclude_exts->used) {
- return HANDLER_GO_ON;
- }
- */
-
- if (con->conf.log_request_handling) {
- log_error_write(srv, __FILE__, __LINE__, "s",
- "-- serving file from glusterfs");
- }
-
- if (HANDLER_ERROR == stat_cache_get_entry(srv, con, con->physical.path,
- &sce)) {
- con->http_status = 403;
-
- log_error_write(srv, __FILE__, __LINE__, "sbsb",
- "not a regular file:", con->uri.path,
- "->", con->physical.path);
-
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
-
- return HANDLER_FINISHED;
- }
-
- if (con->uri.path->ptr[s_len] == '/' || !S_ISREG(sce->st.st_mode)) {
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
- return HANDLER_FINISHED;
- }
-
- if (p->conf.xattr_file_size && p->conf.xattr_file_size->ptr)
- size = atoi (p->conf.xattr_file_size->ptr);
-
- if ((size_t)sce->st.st_size > size) {
- ctx->fd = glusterfs_open (ctx->glusterfs_path->ptr, O_RDONLY,
- 0);
-
- if (((long)ctx->fd) == 0) {
- con->http_status = 403;
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
- return HANDLER_FINISHED;
- }
- }
-
- buffer_free (ctx->glusterfs_path);
- ctx->glusterfs_path = NULL;
-
- /* we only handline regular files */
-#ifdef HAVE_LSTAT
- if ((sce->is_symlink == 1) && !con->conf.follow_symlink) {
- con->http_status = 403;
-
- if (con->conf.log_request_handling) {
- log_error_write(srv, __FILE__, __LINE__, "s",
- "-- access denied due symlink "
- "restriction");
- log_error_write(srv, __FILE__, __LINE__, "sb",
- "Path :", con->physical.path);
- }
-
- buffer_reset(con->physical.path);
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
- return HANDLER_FINISHED;
- }
-#endif
- if (!S_ISREG(sce->st.st_mode)) {
- con->http_status = 404;
-
- if (con->conf.log_file_not_found) {
- log_error_write(srv, __FILE__, __LINE__, "sbsb",
- "not a regular file:", con->uri.path,
- "->", sce->name);
- }
-
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
-
- return HANDLER_FINISHED;
- }
-
- /* mod_compress might set several data directly, don't overwrite them */
-
- /* set response content-type, if not set already */
-
- if (NULL == array_get_element(con->response.headers, "Content-Type")) {
- if (buffer_is_empty(sce->content_type)) {
- /* we are setting application/octet-stream, but also "
- * announce that this header field might change in "
- * the seconds few requests. This should fix the
- * aggressive caching of FF and the script download
- * seen by the first installations
- */
- response_header_overwrite(srv, con,
- CONST_STR_LEN("Content-Type"),
- CONST_STR_LEN("application/"
- "octet-stream"));
-
- allow_caching = 0;
- } else {
- response_header_overwrite(srv, con,
- CONST_STR_LEN("Content-Type"),
- CONST_BUF_LEN(sce->content_type));
- }
- }
-
- if (con->conf.range_requests) {
- response_header_overwrite(srv, con,
- CONST_STR_LEN("Accept-Ranges"),
- CONST_STR_LEN("bytes"));
- }
-
- /* TODO: Allow Cachable requests */
-#if 0
- if (allow_caching) {
- if (p->conf.etags_used && con->etag_flags != 0
- && !buffer_is_empty(sce->etag)) {
- if (NULL == array_get_element(con->response.headers,
- "ETag")) {
- /* generate e-tag */
- etag_mutate(con->physical.etag, sce->etag);
-
- response_header_overwrite(srv, con,
- CONST_STR_LEN("ETag"),
- CONST_BUF_LEN(con->physical.etag));
- }
- }
-
- /* prepare header */
- if (NULL == (ds = (data_string *)array_get_element(con->response.headers,
- "Last-Modified"))) {
- mtime = strftime_cache_get(srv, sce->st.st_mtime);
- response_header_overwrite(srv, con, CONST_STR_LEN("Last-Modified"),
- CONST_BUF_LEN(mtime));
- } else {
- mtime = ds->value;
- }
-
- if (HANDLER_FINISHED == http_response_handle_cachable(srv, con,
- mtime)) {
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
- return HANDLER_FINISHED;
- }
- }
-#endif
-
- /*TODO: Read about etags */
- if (con->request.http_range && con->conf.range_requests) {
- int do_range_request = 1;
- data_string *ds = NULL;
- buffer *mtime = NULL;
- /* check if we have a conditional GET */
-
- /* prepare header */
- if (NULL == (ds = (data_string *)array_get_element(con->response.headers,
- "Last-Modified"))) {
- mtime = strftime_cache_get(srv, sce->st.st_mtime);
- response_header_overwrite(srv, con, CONST_STR_LEN("Last-Modified"),
- CONST_BUF_LEN(mtime));
- } else {
- mtime = ds->value;
- }
-
- if (NULL != (ds = (data_string *)array_get_element(con->request.headers,
- "If-Range"))) {
- /* if the value is the same as our ETag, we do a Range-request,
- * otherwise a full 200 */
-
- if (ds->value->ptr[0] == '"') {
- /**
- * client wants a ETag
- */
- if (!con->physical.etag) {
- do_range_request = 0;
- } else if (!buffer_is_equal(ds->value,
- con->physical.etag)) {
- do_range_request = 0;
- }
- } else if (!mtime) {
- /**
- * we don't have a Last-Modified and can match
- * the If-Range:
- *
- * sending all
- */
- do_range_request = 0;
- } else if (!buffer_is_equal(ds->value, mtime)) {
- do_range_request = 0;
- }
- }
-
- if (do_range_request) {
- /* content prepared, I'm done */
- con->file_finished = 1;
-
- if (0 == http_response_parse_range(srv, con, p)) {
- con->http_status = 206;
- }
-
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
- return HANDLER_FINISHED;
- }
- }
-
- /* if we are still here, prepare body */
-
- /* we add it here for all requests
- * the HEAD request will drop it afterwards again
- */
- /*TODO check whether 1 should be subtracted */
-
- if (p->conf.xattr_file_size && p->conf.xattr_file_size->ptr)
- size = atoi (p->conf.xattr_file_size->ptr);
-
- if (size <= (size_t)sce->st.st_size) {
- http_chunk_append_glusterfs_file_chunk (srv, con, ctx->fd, 0,
- sce->st.st_size, size);
- } else {
- http_chunk_append_glusterfs_mem (srv, con, ctx->buf,
- sce->st.st_size, size);
- }
-
- con->http_status = 200;
- con->file_finished = 1;
-
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
-
- return HANDLER_FINISHED;
-}
-
-#if 0
-URIHANDLER_FUNC(mod_glusterfs_request_done)
-{
- mod_glusterfs_iobuf_t *cur = first, *prev;
- while (cur) {
- prev = cur;
- glusterfs_free (cur->buf);
- cur = cur->next;
- free (prev);
- }
- first = NULL
- }
-#endif
-
-/* this function is called at dlopen() time and inits the callbacks */
-CONNECTION_FUNC(mod_glusterfs_connection_reset)
-{
- (void) p_d;
- (void) con;
- if (!network_backend_write)
- network_backend_write = srv->network_backend_write;
-
- srv->network_backend_write = mod_glusterfs_network_backend_write;
-
- return HANDLER_GO_ON;
-}
-
-int mod_glusterfs_plugin_init(plugin *p) {
- p->version = LIGHTTPD_VERSION_ID;
- p->name = buffer_init_string("glusterfs");
- p->init = mod_glusterfs_init;
- p->handle_physical = mod_glusterfs_handle_physical;
- p->handle_subrequest_start = mod_glusterfs_subrequest;
- // p->handle_request_done = mod_glusterfs_request_done;
- p->set_defaults = mod_glusterfs_set_defaults;
- p->connection_reset = mod_glusterfs_connection_reset;
- p->cleanup = mod_glusterfs_free;
-
- p->data = NULL;
-
- return 0;
-}
-
-
-/* mod_glusterfs_stat_cache */
-static stat_cache_entry * stat_cache_entry_init(void) {
- stat_cache_entry *sce = NULL;
-
- sce = calloc(1, sizeof(*sce));
- /* ERR_ABORT (sce); */
-
- sce->name = buffer_init();
- sce->etag = buffer_init();
- sce->content_type = buffer_init();
-
- return sce;
-}
-
-#ifdef HAVE_FAM_H
-static fam_dir_entry * fam_dir_entry_init(void) {
- fam_dir_entry *fam_dir = NULL;
-
- fam_dir = calloc(1, sizeof(*fam_dir));
- /* ERR_ABORT (fam_dir); */
-
- fam_dir->name = buffer_init();
-
- return fam_dir;
-}
-
-static void fam_dir_entry_free(void *data) {
- fam_dir_entry *fam_dir = data;
-
- if (!fam_dir) return;
-
- FAMCancelMonitor(fam_dir->fc, fam_dir->req);
-
- buffer_free(fam_dir->name);
- free(fam_dir->req);
-
- free(fam_dir);
-}
-#endif
-
-#ifdef HAVE_XATTR
-static int stat_cache_attr_get(buffer *buf, char *name) {
- int attrlen;
- int ret;
-
- attrlen = 1024;
- buffer_prepare_copy(buf, attrlen);
- attrlen--;
- if(0 == (ret = attr_get(name, "Content-Type", buf->ptr, &attrlen, 0))) {
- buf->used = attrlen + 1;
- buf->ptr[attrlen] = '\0';
- }
- return ret;
-}
-#endif
-
-/* the famous DJB hash function for strings */
-static uint32_t hashme(buffer *str) {
- uint32_t hash = 5381;
- const char *s;
- for (s = str->ptr; *s; s++) {
- hash = ((hash << 5) + hash) + *s;
- }
-
- hash &= ~(1 << 31); /* strip the highest bit */
-
- return hash;
-}
-
-
-#ifdef HAVE_LSTAT
-static int stat_cache_lstat(server *srv, buffer *dname, struct stat *lst) {
- if (lstat(dname->ptr, lst) == 0) {
- return S_ISLNK(lst->st_mode) ? 0 : 1;
- }
- else {
- log_error_write(srv, __FILE__, __LINE__, "sbs",
- "lstat failed for:",
- dname, strerror(errno));
- };
- return -1;
-}
-#endif
-
-/***
- *
- *
- *
- * returns:
- * - HANDLER_FINISHED on cache-miss (don't forget to reopen the file)
- * - HANDLER_ERROR on stat() failed -> see errno for problem
- */
-
-handler_t glusterfs_stat_cache_get_entry(server *srv,
- connection *con,
- buffer *glusterfs_path,
- buffer *name,
- void *buf,
- size_t size,
- stat_cache_entry **ret_sce)
-{
-#ifdef HAVE_FAM_H
- fam_dir_entry *fam_dir = NULL;
- int dir_ndx = -1;
- splay_tree *dir_node = NULL;
-#endif
- stat_cache_entry *sce = NULL;
- stat_cache *sc;
- struct stat st;
- size_t k;
-#ifdef DEBUG_STAT_CACHE
- size_t i;
-#endif
- int file_ndx;
- splay_tree *file_node = NULL;
-
- *ret_sce = NULL;
- memset (&st, 0, sizeof (st));
-
- /*
- * check if the directory for this file has changed
- */
-
- sc = srv->stat_cache;
-
- buffer_copy_string_buffer(sc->hash_key, name);
- buffer_append_long(sc->hash_key, con->conf.follow_symlink);
-
- file_ndx = hashme(sc->hash_key);
- sc->files = splaytree_splay(sc->files, file_ndx);
-
-#ifdef DEBUG_STAT_CACHE
- for (i = 0; i < ctrl.used; i++) {
- if (ctrl.ptr[i] == file_ndx) break;
- }
-#endif
-
- if (sc->files && (sc->files->key == file_ndx)) {
-#ifdef DEBUG_STAT_CACHE
- /* it was in the cache */
- assert(i < ctrl.used);
-#endif
-
- /* we have seen this file already and
- * don't stat() it again in the same second */
-
- file_node = sc->files;
-
- sce = file_node->data;
-
- /* check if the name is the same, we might have a collision */
-
- if (buffer_is_equal(name, sce->name)) {
- if (srv->srvconf.stat_cache_engine
- == STAT_CACHE_ENGINE_SIMPLE) {
- if (sce->stat_ts == srv->cur_ts && !buf) {
- *ret_sce = sce;
- return HANDLER_GO_ON;
- }
- }
- } else {
- /* oops, a collision,
- *
- * file_node is used by the FAM check below to see if
- * we know this file and if we can save a stat().
- *
- * BUT, the sce is not reset here as the entry into
- * the cache is ok, we it is just not pointing to
- * our requested file.
- */
-
- file_node = NULL;
- }
- } else {
-#ifdef DEBUG_STAT_CACHE
- if (i != ctrl.used) {
- fprintf(stderr, "%s.%d: %08x was already inserted "
- "but not found in cache, %s\n",
- __FILE__, __LINE__, file_ndx, name->ptr);
- }
- assert(i == ctrl.used);
-#endif
- }
- /*
- * *lol*
- * - open() + fstat() on a named-pipe results in a (intended) hang.
- * - stat() if regular file + open() to see if we can read from it is
- * better
- *
- * */
- if (-1 == glusterfs_get (glusterfs_path->ptr, buf, size, &st)) {
- return HANDLER_ERROR;
- }
-
- if (NULL == sce) {
- int osize = 0;
-
- if (sc->files) {
- osize = sc->files->size;
- }
-
- sce = stat_cache_entry_init();
- buffer_copy_string_buffer(sce->name, name);
-
- sc->files = splaytree_insert(sc->files, file_ndx, sce);
-#ifdef DEBUG_STAT_CACHE
- if (ctrl.size == 0) {
- ctrl.size = 16;
- ctrl.used = 0;
- ctrl.ptr = malloc(ctrl.size * sizeof(*ctrl.ptr));
- /* ERR_ABORT (ctrl.ptr); */
- } else if (ctrl.size == ctrl.used) {
- ctrl.size += 16;
- ctrl.ptr = realloc(ctrl.ptr,
- ctrl.size * sizeof(*ctrl.ptr));
- /* ERR_ABORT (ctrl.ptr); */
- }
-
- ctrl.ptr[ctrl.used++] = file_ndx;
-
- assert(sc->files);
- assert(sc->files->data == sce);
- assert(osize + 1 == splaytree_size(sc->files));
-#endif
- }
-
- sce->st = st;
- sce->stat_ts = srv->cur_ts;
-
- /* catch the obvious symlinks
- *
- * this is not a secure check as we still have a race-condition between
- * the stat() and the open. We can only solve this by
- * 1. open() the file
- * 2. fstat() the fd
- *
- * and keeping the file open for the rest of the time. But this can
- * only be done at network level.
- *
- * per default it is not a symlink
- * */
-#ifdef HAVE_LSTAT
- sce->is_symlink = 0;
-
- /* we want to only check for symlinks if we should block symlinks.
- */
- if (!con->conf.follow_symlink) {
- if (stat_cache_lstat(srv, name, &lst) == 0) {
-#ifdef DEBUG_STAT_CACHE
- log_error_write(srv, __FILE__, __LINE__, "sb",
- "found symlink", name);
-#endif
- sce->is_symlink = 1;
- }
-
- /*
- * we assume "/" can not be symlink, so
- * skip the symlink stuff if our path is /
- **/
- else if ((name->used > 2)) {
- buffer *dname;
- char *s_cur;
-
- dname = buffer_init();
- buffer_copy_string_buffer(dname, name);
-
- while ((s_cur = strrchr(dname->ptr,'/'))) {
- *s_cur = '\0';
- dname->used = s_cur - dname->ptr + 1;
- if (dname->ptr == s_cur) {
-#ifdef DEBUG_STAT_CACHE
- log_error_write(srv, __FILE__, __LINE__,
- "s", "reached /");
-#endif
- break;
- }
-#ifdef DEBUG_STAT_CACHE
- log_error_write(srv, __FILE__, __LINE__, "sbs",
- "checking if", dname, "is a "
- "symlink");
-#endif
- if (stat_cache_lstat(srv, dname, &lst) == 0) {
- sce->is_symlink = 1;
-#ifdef DEBUG_STAT_CACHE
- log_error_write(srv, __FILE__, __LINE__,
- "sb",
- "found symlink", dname);
-#endif
- break;
- };
- };
- buffer_free(dname);
- };
- };
-#endif
-
- if (S_ISREG(st.st_mode)) {
- /* determine mimetype */
- buffer_reset(sce->content_type);
-
- for (k = 0; k < con->conf.mimetypes->used; k++) {
- data_string *ds = NULL;
- buffer *type = NULL;
-
- ds = (data_string *)con->conf.mimetypes->data[k];
- type = ds->key;
- if (type->used == 0) continue;
-
- /* check if the right side is the same */
- if (type->used > name->used) continue;
-
- if (0 == strncasecmp(name->ptr + name->used - type->used,
- type->ptr, type->used - 1)) {
- buffer_copy_string_buffer(sce->content_type,
- ds->value);
- break;
- }
- }
- etag_create(sce->etag, &(sce->st), con->etag_flags);
-#ifdef HAVE_XATTR
- if (con->conf.use_xattr && buffer_is_empty(sce->content_type)) {
- stat_cache_attr_get(sce->content_type, name->ptr);
- }
-#endif
- } else if (S_ISDIR(st.st_mode)) {
- etag_create(sce->etag, &(sce->st), con->etag_flags);
- }
-
-#ifdef HAVE_FAM_H
- if (sc->fam &&
- (srv->srvconf.stat_cache_engine == STAT_CACHE_ENGINE_FAM)) {
- /* is this directory already registered ? */
- if (!dir_node) {
- fam_dir = fam_dir_entry_init();
- fam_dir->fc = sc->fam;
-
- buffer_copy_string_buffer(fam_dir->name, sc->dir_name);
-
- fam_dir->version = 1;
-
- fam_dir->req = calloc(1, sizeof(FAMRequest));
- /* ERR_ABORT (fam_dir->req); */
-
- if (0 != FAMMonitorDirectory(sc->fam, fam_dir->name->ptr,
- fam_dir->req, fam_dir)) {
-
- log_error_write(srv, __FILE__, __LINE__, "sbsbs",
- "monitoring dir failed:",
- fam_dir->name,
- "file:", name,
- FamErrlist[FAMErrno]);
-
- fam_dir_entry_free(fam_dir);
- } else {
- int osize = 0;
-
- if (sc->dirs) {
- osize = sc->dirs->size;
- }
-
- sc->dirs = splaytree_insert(sc->dirs, dir_ndx,
- fam_dir);
- assert(sc->dirs);
- assert(sc->dirs->data == fam_dir);
- assert(osize == (sc->dirs->size - 1));
- }
- } else {
- fam_dir = dir_node->data;
- }
-
- /* bind the fam_fc to the stat() cache entry */
-
- if (fam_dir) {
- sce->dir_version = fam_dir->version;
- sce->dir_ndx = dir_ndx;
- }
- }
-#endif
-
- *ret_sce = sce;
-
- return HANDLER_GO_ON;
-}
-
-/**
- * remove stat() from cache which havn't been stat()ed for
- * more than 10 seconds
- *
- *
- * walk though the stat-cache, collect the ids which are too old
- * and remove them in a second loop
- */
-
-static int stat_cache_tag_old_entries(server *srv, splay_tree *t, int *keys,
- size_t *ndx) {
- stat_cache_entry *sce;
-
- if (!t) return 0;
-
- stat_cache_tag_old_entries(srv, t->left, keys, ndx);
- stat_cache_tag_old_entries(srv, t->right, keys, ndx);
-
- sce = t->data;
-
- if (srv->cur_ts - sce->stat_ts > 2) {
- keys[(*ndx)++] = t->key;
- }
-
- return 0;
-}
diff --git a/mod_glusterfs/lighttpd/1.4/mod_glusterfs.h b/mod_glusterfs/lighttpd/1.4/mod_glusterfs.h
deleted file mode 100644
index 7d7742e8d..000000000
--- a/mod_glusterfs/lighttpd/1.4/mod_glusterfs.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _MOD_GLUSTERFS_FILE_CACHE_H_
-#define _MOD_GLUSTERFS_FILE_CACHE_H_
-
-#include "stat_cache.h"
-#include <libglusterfsclient.h>
-#include "base.h"
-
-handler_t glusterfs_stat_cache_get_entry(server *srv, connection *con,
- buffer *glusterfs_path, buffer *name,
- void *buf, size_t size,
- stat_cache_entry **fce);
-
-#endif
diff --git a/mod_glusterfs/lighttpd/1.5/Makefile.am b/mod_glusterfs/lighttpd/1.5/Makefile.am
deleted file mode 100644
index eda329111..000000000
--- a/mod_glusterfs/lighttpd/1.5/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-EXTRA_DIST = Makefile.am.diff mod_glusterfs.c mod_glusterfs.h README.txt
-
-CLEANFILES =
diff --git a/mod_glusterfs/lighttpd/1.5/Makefile.am.diff b/mod_glusterfs/lighttpd/1.5/Makefile.am.diff
deleted file mode 100644
index 375696b5d..000000000
--- a/mod_glusterfs/lighttpd/1.5/Makefile.am.diff
+++ /dev/null
@@ -1,29 +0,0 @@
---- lighttpd-1.4.19/src/Makefile.am 2008-04-16 18:42:18.000000000 +0400
-+++ lighttpd-1.4.19.mod/src/Makefile.am 2008-04-16 18:41:11.000000000 +0400
-@@ -1,4 +1,4 @@
--AM_CFLAGS = $(FAM_CFLAGS)
-+AM_CFLAGS = $(FAM_CFLAGS) -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64
-
- noinst_PROGRAMS=proc_open lemon # simple-fcgi #graphic evalo bench ajp ssl error_test adserver gen-license
- sbin_PROGRAMS=lighttpd lighttpd-angel
-@@ -241,6 +241,11 @@
- mod_accesslog_la_LDFLAGS = -module -export-dynamic -avoid-version -no-undefined
- mod_accesslog_la_LIBADD = $(common_libadd)
-
-+lib_LTLIBRARIES += mod_glusterfs.la
-+mod_glusterfs_la_SOURCES = mod_glusterfs.c
-+mod_glusterfs_la_CFLAGS = $(AM_CFLAGS)
-+mod_glusterfs_la_LDFLAGS = -module -export-dynamic -avoid-version -no-undefined -lglusterfsclient -lpthread
-+mod_glusterfs_la_LIBADD = $(common_libadd)
-
- hdr = server.h buffer.h network.h log.h keyvalue.h \
- response.h request.h fastcgi.h chunk.h \
-@@ -254,7 +259,7 @@
- configparser.h mod_ssi_exprparser.h \
- sys-mmap.h sys-socket.h mod_cml.h mod_cml_funcs.h \
- splaytree.h proc_open.h status_counter.h \
-- mod_magnet_cache.h
-+ mod_magnet_cache.h mod_glusterfs.h
-
- DEFS= @DEFS@ -DLIBRARY_DIR="\"$(libdir)\"" -DSBIN_DIR="\"$(sbindir)\""
-
diff --git a/mod_glusterfs/lighttpd/1.5/README.txt b/mod_glusterfs/lighttpd/1.5/README.txt
deleted file mode 100644
index bdbdfffbc..000000000
--- a/mod_glusterfs/lighttpd/1.5/README.txt
+++ /dev/null
@@ -1,57 +0,0 @@
-Introduction
-============
-mod_glusterfs is a module written for lighttpd to speed up the access of files present on glusterfs. mod_glusterfs uses libglusterfsclient library provided for glusterfs and hence can be used without fuse (File System in User Space).
-
-Usage
-=====
-To use mod_glusterfs with lighttpd-1.5, copy mod_glusterfs.c and mod_glusterfs.h into src/ of lighttpd-1.5 source tree, and apply the Makefile.am.diff to src/Makefile.am. Re-run ./autogen.sh on the top level of the lighttpd-1.5 build tree and recompile.
-
-# cp mod_glusterfs.[ch] /home/glusterfs/lighttpd-1.5/src/
-# cp Makefile.am.diff /home/glusterfs/lighttpd-1.5/
-# cd /home/glusterfs/lighttpd-1.5
-# patch -p1 < Makefile.am.diff
-# ./autogen.sh
-# ./configure
-# make
-# make install
-
-Configuration
-=============
-* mod_glusterfs should be listed at the begining of the list server.modules in lighttpd.conf.
-
-Below is a snippet from lighttpd.conf concerning to mod_glusterfs.
-
-$HTTP["url"] =~ "^/glusterfs" {
- glusterfs.prefix = "/glusterfs"
- glusterfs.logfile = "/var/log/glusterfs-logfile"
- glusterfs.document-root = "/home/glusterfs/document-root"
- glusterfs.volume-specfile = "/etc/glusterfs/glusterfs.vol"
- glusterfs.loglevel = "error"
- glusterfs.cache-timeout = 300
- glusterfs.xattr-interface-size-limit = "65536"
-}
-
-* $HTTP["url"] =~ "^/glusterfs"
- A perl style regular expression used to match against the url. If regular expression matches the url, the url is handled by mod_glusterfs. Note that the pattern given here should match glusterfs.prefix.
-
-* glusterfs.prefix (COMPULSORY)
- A string to be present at the starting of the file path in the url so that the file would be handled by glusterfs.
- Eg., A GET request on the url http://www.example.com/glusterfs-prefix/some-dir/example-file will result in fetching of the file "/some-dir/example-file" from glusterfs mount if glusterfs.prefix is set to "/glusterfs-prefix".
-
-* glusterfs.volume-specfile (COMPULSORY)
- Path to the the glusterfs volume specification file.
-
-* glusterfs.logfile (COMPULSORY)
- Path to the glusterfs logfile.
-
-* glusterfs.loglevel (OPTIONAL, default = warning)
- Allowed values are critical, error, warning, debug, none in the decreasing order of severity of error conditions.
-
-* glusterfs.cache-timeout (OPTIONAL, default = 0)
- Timeout values for glusterfs stat and lookup cache.
-
-* glusterfs.document-root (COMPULSORY)
- An absolute path, relative to which all the files are fetched from glusterfs.
-
-* glusterfs.xattr-interface-size-limit (OPTIONAL, default = 0)
- Files with sizes upto and including this value are fetched through the extended attribute interface of glusterfs rather than the usual open-read-close set of operations. For files of small sizes, it is recommended to use extended attribute interface.
diff --git a/mod_glusterfs/lighttpd/1.5/mod_glusterfs.c b/mod_glusterfs/lighttpd/1.5/mod_glusterfs.c
deleted file mode 100644
index b9710967c..000000000
--- a/mod_glusterfs/lighttpd/1.5/mod_glusterfs.c
+++ /dev/null
@@ -1,1476 +0,0 @@
-/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include <ctype.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <pthread.h>
-#include <sys/types.h>
-#include <fcntl.h>
-
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include <errno.h>
-#include <unistd.h>
-#include <assert.h>
-
-#include "base.h"
-#include "log.h"
-#include "buffer.h"
-
-#include "plugin.h"
-
-#include "stat_cache.h"
-#include "mod_glusterfs.h"
-#include "etag.h"
-#include "response.h"
-
-#include "fdevent.h"
-#include "joblist.h"
-#include "http_req_range.h"
-#include "connections.h"
-#include "configfile.h"
-
-#include <libglusterfsclient.h>
-
-#ifdef HAVE_ATTR_ATTRIBUTES_H
-#include <attr/attributes.h>
-#endif
-
-#ifdef HAVE_FAM_H
-# include <fam.h>
-#endif
-
-#include "sys-mmap.h"
-
-/* NetBSD 1.3.x needs it */
-#ifndef MAP_FAILED
-# define MAP_FAILED -1
-#endif
-
-#ifndef O_LARGEFILE
-# define O_LARGEFILE 0
-#endif
-
-#ifndef HAVE_LSTAT
-#define lstat stat
-#endif
-
-#if 0
-/* enables debug code for testing if all nodes in the stat-cache as accessable */
-#define DEBUG_STAT_CACHE
-#endif
-
-#ifdef HAVE_LSTAT
-#undef HAVE_LSTAT
-#endif
-
-#define GLUSTERFS_FILE_CHUNK (FILE_CHUNK + 1)
-
-/* Keep this value large. Each glusterfs_async_read of GLUSTERFS_CHUNK_SIZE results in a network_backend_write of the read data*/
-
-#define GLUSTERFS_CHUNK_SIZE 8192
-
-/**
- * this is a staticfile for a lighttpd plugin
- *
- */
-
-
-/* plugin config for all request/connections */
-
-typedef struct {
- buffer *logfile;
- buffer *loglevel;
- buffer *specfile;
- buffer *prefix;
- buffer *xattr_file_size;
- buffer *document_root;
- array *exclude_exts;
- unsigned short cache_timeout;
-
- /* FIXME: its a pointer, hence cant be short */
- unsigned long handle;
-} plugin_config;
-
-static network_status_t (*network_backend_write)(struct server *srv, connection *con, iosocket *sock, chunkqueue *cq);
-
-typedef struct {
- PLUGIN_DATA;
- buffer *range_buf;
- plugin_config **config_storage;
- http_req_range *ranges;
- plugin_config conf;
-} plugin_data;
-
-typedef struct glusterfs_async_local {
- int op_ret;
- int op_errno;
- pthread_mutex_t lock;
- pthread_cond_t cond;
- connection *con;
- server *srv;
- plugin_data *p;
-
- union {
- struct {
- char async_read_complete;
- off_t length;
- size_t read_bytes;
- glusterfs_read_buf_t *buf;
- }readv;
-
- struct {
- buffer *name;
- buffer *hash_key;
- size_t size;
- }lookup;
- }fop;
-} glusterfs_async_local_t;
-
-typedef struct {
- unsigned long fd;
- buffer *glusterfs_path;
- void *buf;
- off_t response_content_length;
- int prefix;
-}mod_glusterfs_ctx_t;
-
-typedef struct {
- chunkqueue *cq;
- glusterfs_read_buf_t *buf;
- size_t length;
-}mod_glusterfs_chunkqueue;
-
-#ifdef HAVE_FAM_H
-typedef struct {
- FAMRequest *req;
- FAMConnection *fc;
-
- buffer *name;
-
- int version;
-} fam_dir_entry;
-#endif
-
-/* the directory name is too long to always compare on it
- * - we need a hash
- * - the hash-key is used as sorting criteria for a tree
- * - a splay-tree is used as we can use the caching effect of it
- */
-
-/* we want to cleanup the stat-cache every few seconds, let's say 10
- *
- * - remove entries which are outdated since 30s
- * - remove entries which are fresh but havn't been used since 60s
- * - if we don't have a stat-cache entry for a directory, release it from the monitor
- */
-
-#ifdef DEBUG_STAT_CACHE
-typedef struct {
- int *ptr;
-
- size_t used;
- size_t size;
-} fake_keys;
-
-static fake_keys ctrl;
-#endif
-
-static stat_cache_entry *
-stat_cache_entry_init(void)
-{
- stat_cache_entry *sce = NULL;
-
- sce = calloc(1, sizeof(*sce));
- /* ERR_ABORT (sce); */
-
- sce->name = buffer_init();
- sce->etag = buffer_init();
- sce->content_type = buffer_init();
-
- return sce;
-}
-
-int chunkqueue_append_glusterfs_mem (chunkqueue *cq, const char * mem, size_t len) {
- buffer *buf = NULL;
-
- buf = chunkqueue_get_append_buffer (cq);
-
- if (buf->ptr)
- free (buf->ptr);
-
- buf->used = len + 1;
- buf->ptr = (char *)mem;
- buf->size = len;
-
- return 0;
-}
-
-static int
-glusterfs_lookup_async_cbk (int op_ret,
- int op_errno,
- void *buf,
- struct stat *st,
- void *cbk_data)
-{
- glusterfs_async_local_t *local = cbk_data;
-
- mod_glusterfs_ctx_t *ctx = NULL;
- ctx = local->con->plugin_ctx[local->p->id];
-
- assert (ctx->buf== buf);
-
- if (op_ret || !(S_ISREG (st->st_mode) && (size_t)st->st_size <= local->fop.lookup.size)) {
-
- free (ctx->buf);
- ctx->buf = NULL;
-
- if (op_ret) {
- buffer_free (ctx->glusterfs_path);
- ctx->glusterfs_path = NULL;
- free (ctx);
- local->con->plugin_ctx[local->p->id] = NULL;
-
- if (op_errno == ENOENT)
- local->con->http_status = 404;
- else
- local->con->http_status = 403;
- }
- }
-
- if (!op_ret) {
- stat_cache_entry *sce = NULL;
- stat_cache *sc = local->srv->stat_cache;
-
- sce = (stat_cache_entry *)g_hash_table_lookup(sc->files, local->fop.lookup.hash_key);
-
- if (!sce) {
- sce = stat_cache_entry_init();
-
- buffer_copy_string_buffer(sce->name, local->fop.lookup.name);
- g_hash_table_insert(sc->files, buffer_init_string(BUF_STR(local->fop.lookup.hash_key)), sce);
- }
-
- sce->state = STAT_CACHE_ENTRY_STAT_FINISHED;
- sce->stat_ts = time (NULL);
- memcpy (&sce->st, st, sizeof (*st));
- }
-
- g_async_queue_push (local->srv->joblist_queue, local->con);
- /*
- joblist_append (local->srv, local->con);
- kill (getpid(), SIGUSR1);
- */
- free (local);
- return 0;
-}
-
-static handler_t
-glusterfs_stat_cache_get_entry_async (server *srv,
- connection *con,
- plugin_data *p,
- buffer *glusterfs_path,
- buffer *name,
- void *buf,
- size_t size,
- stat_cache_entry **ret_sce)
-{
- stat_cache_entry *sce = NULL;
- stat_cache *sc;
- glusterfs_async_local_t *local = NULL;
-
- *ret_sce = NULL;
-
- /*
- * check if the directory for this file has changed
- */
-
- sc = srv->stat_cache;
-
- buffer_copy_string_buffer(sc->hash_key, name);
- buffer_append_long(sc->hash_key, con->conf.follow_symlink);
-
- if ((sce = (stat_cache_entry *)g_hash_table_lookup(sc->files, sc->hash_key))) {
- /* know this entry already */
-
- if (sce->state == STAT_CACHE_ENTRY_STAT_FINISHED &&
- !buf) {
- /* verify that this entry is still fresh */
-
- *ret_sce = sce;
-
- return HANDLER_GO_ON;
- }
- }
-
-
- /*
- * *lol*
- * - open() + fstat() on a named-pipe results in a (intended) hang.
- * - stat() if regular file + open() to see if we can read from it is better
- *
- * */
-
- /* pass a job to the stat-queue */
-
- local = calloc (1, sizeof (*local));
- /* ERR_ABORT (local); */
- local->con = con;
- local->srv = srv;
- local->p = p;
- local->fop.lookup.name = buffer_init_buffer (name);
- local->fop.lookup.hash_key = buffer_init_buffer (sc->hash_key);
- local->fop.lookup.size = size;
-
- if (glusterfs_lookup_async ((libglusterfs_handle_t )p->conf.handle, glusterfs_path->ptr, buf, size, glusterfs_lookup_async_cbk, (void *) local)) {
- free (local);
- return HANDLER_ERROR;
- }
-
- return HANDLER_WAIT_FOR_EVENT;
-}
-
-int
-mod_glusterfs_readv_async_cbk (glusterfs_read_buf_t *buf,
- void *cbk_data)
-{
- glusterfs_async_local_t *local = cbk_data;
- pthread_mutex_lock (&local->lock);
- {
- local->fop.readv.async_read_complete = 1;
- local->fop.readv.buf = buf;
-
- pthread_cond_signal (&local->cond);
- }
- pthread_mutex_unlock (&local->lock);
-
- return 0;
-}
-
-network_status_t
-mod_glusterfs_read_async (server *srv, connection *con, chunk *glusterfs_chunk)
-{
- glusterfs_async_local_t local;
- off_t end = 0;
- int nbytes;
- int complete;
- chunkqueue *cq = NULL;
- chunk *c = NULL;
- off_t offset = glusterfs_chunk->file.start;
- size_t length = glusterfs_chunk->file.length;
- unsigned long fd = (unsigned long)glusterfs_chunk->file.name;
- network_status_t ret;
-
- pthread_cond_init (&local.cond, NULL);
- pthread_mutex_init (&local.lock, NULL);
-
- //local.fd = fd;
- memset (&local, 0, sizeof (local));
-
- if (length > 0)
- end = offset + length;
-
- cq = chunkqueue_init ();
- if (!cq) {
- con->http_status = 500;
- return NETWORK_STATUS_FATAL_ERROR;
- }
-
- do {
- glusterfs_read_buf_t *buf;
- int i;
- if (length > 0) {
- nbytes = end - offset;
- if (nbytes > GLUSTERFS_CHUNK_SIZE)
- nbytes = GLUSTERFS_CHUNK_SIZE;
- } else
- nbytes = GLUSTERFS_CHUNK_SIZE;
-
- glusterfs_read_async(fd,
- nbytes,
- offset,
- mod_glusterfs_readv_async_cbk,
- (void *)&local);
-
- pthread_mutex_lock (&local.lock);
- {
- while (!local.fop.readv.async_read_complete) {
- pthread_cond_wait (&local.cond, &local.lock);
- }
-
- local.op_ret = local.fop.readv.buf->op_ret;
- local.op_errno = local.fop.readv.buf->op_errno;
-
- local.fop.readv.async_read_complete = 0;
- buf = local.fop.readv.buf;
-
- if ((int)length < 0)
- complete = (local.fop.readv.buf->op_ret <= 0);
- else {
- local.fop.readv.read_bytes += local.fop.readv.buf->op_ret;
- complete = ((local.fop.readv.read_bytes == length) || (local.fop.readv.buf->op_ret <= 0));
- }
- }
- pthread_mutex_unlock (&local.lock);
-
- if (local.op_ret > 0) {
- for (i = 0; i < buf->count; i++) {
- buffer *nw_write_buf = chunkqueue_get_append_buffer (cq);
-
- nw_write_buf->used = nw_write_buf->size = buf->vector[i].iov_len + 1;
- nw_write_buf->ptr = buf->vector[i].iov_base;
-
- // buffer_copy_memory (nw_write_buf, buf->vector[i].iov_base, buf->vector[i].iov_len + 1);
- offset += local.op_ret;
- }
-
- ret = network_backend_write (srv, con, con->sock, cq);
-
- if (chunkqueue_written (cq) != local.op_ret) {
- mod_glusterfs_chunkqueue *gf_cq;
- glusterfs_chunk->file.start = offset;
- if ((int)glusterfs_chunk->file.length > 0)
- glusterfs_chunk->file.length -= local.fop.readv.read_bytes;
-
- gf_cq = calloc (1, sizeof (*gf_cq));
- /* ERR_ABORT (qf_cq); */
- gf_cq->cq = cq;
- gf_cq->buf = buf;
- gf_cq->length = local.op_ret;
- glusterfs_chunk->file.mmap.start = (char *)gf_cq;
- return ret;
- }
-
- for (c = cq->first ; c; c = c->next)
- c->mem->ptr = NULL;
-
- chunkqueue_reset (cq);
- }
-
- glusterfs_free (buf);
- } while (!complete);
-
- chunkqueue_free (cq);
- glusterfs_close (fd);
-
- if (local.op_ret < 0)
- con->http_status = 500;
-
- return (local.op_ret < 0 ? NETWORK_STATUS_FATAL_ERROR : NETWORK_STATUS_SUCCESS);
-}
-
-network_status_t mod_glusterfs_network_backend_write(struct server *srv, connection *con, iosocket *sock, chunkqueue *cq)
-{
- chunk *c, *prev, *first;
- int chunks_written = 0;
- int error = 0;
- network_status_t ret;
-
- for (first = prev = c = cq->first; c; c = c->next, chunks_written++) {
-
- if (c->type == MEM_CHUNK && c->mem->used && !c->mem->ptr) {
- if (cq->first != c) {
- prev->next = NULL;
-
- /* call stored network_backend_write */
- ret = network_backend_write (srv, con, sock, cq);
-
- prev->next = c;
- if (ret != NETWORK_STATUS_SUCCESS) {
- cq->first = first;
- return ret;
- }
- }
- cq->first = c->next;
-
- if (c->file.fd < 0) {
- error = HANDLER_ERROR;
- break;
- }
-
- if (c->file.mmap.start) {
- chunk *tmp;
- size_t len;
- mod_glusterfs_chunkqueue *gf_cq = (mod_glusterfs_chunkqueue *)c->file.mmap.start;
-
- ret = network_backend_write (srv, con, sock, gf_cq->cq);
-
- if ((len = (size_t)chunkqueue_written (gf_cq->cq)) != gf_cq->length) {
- gf_cq->length -= len;
- cq->first = first;
- chunkqueue_remove_finished_chunks (gf_cq->cq);
- return ret;
- }
-
- for (tmp = gf_cq->cq->first ; tmp; tmp = tmp->next)
- tmp->mem->ptr = NULL;
-
- chunkqueue_free (gf_cq->cq);
- glusterfs_free (gf_cq->buf);
- free (gf_cq);
- c->file.mmap.start = NULL;
- }
-
- ret = mod_glusterfs_read_async (srv, con, c); //c->file.fd, c->file.start, -1);//c->file.length);
- if (c->file.mmap.start) {
- /* pending chunkqueue from mod_glusterfs_read_async to be written to network */
- cq->first = first;
- return ret;
- }
-
- buffer_free (c->mem);
- c->mem = NULL;
-
- c->type = FILE_CHUNK;
- c->offset = c->file.length = 0;
- c->file.name = NULL;
-
- if (first == c)
- first = c->next;
-
- if (cq->last == c)
- cq->last = NULL;
-
- prev->next = c->next;
-
- free(c);
- }
- prev = c;
- }
-
- ret = network_backend_write (srv, con, sock, cq);
-
- cq->first = first;
-
- return ret;
-}
-
-#if 0
-int chunkqueue_append_glusterfs_file (chunkqueue *cq, unsigned long fd, off_t offset, off_t len)
-{
- chunk *c = NULL;
- c = chunkqueue_get_append_tempfile (cq);
-
- if (c->file.is_temp) {
- close (c->file.fd);
- unlink (c->file.name->ptr);
- }
-
- c->type = MEM_CHUNK;
-
- c->mem = buffer_init ();
- c->mem->used = len + 1;
- c->mem->ptr = NULL;
- c->offset = 0;
-
- /* buffer_copy_string_buffer (c->file.name, fn); */
- c->file.start = offset;
- c->file.length = len;
- /* buffer_free (c->file.name); */
-
- /* identify chunk as glusterfs related */
- c->file.mmap.start = MAP_FAILED;
- /* c->file.mmap.length = c->file.mmap.offset = len;*/
-
- return 0;
-}
-#endif
-
-int chunkqueue_append_dummy_mem_chunk (chunkqueue *cq, off_t len)
-{
- chunk *c = NULL;
- c = chunkqueue_get_append_tempfile (cq);
-
- if (c->file.is_temp) {
- close (c->file.fd);
- unlink (c->file.name->ptr);
- c->file.is_temp = 0;
- }
-
- c->type = MEM_CHUNK;
-
- c->mem->used = len + 1;
- c->offset = len;
- c->mem->ptr = NULL;
-
- return 0;
-}
-
-int chunkqueue_append_glusterfs_file (chunkqueue *cq, unsigned long fd, off_t offset, off_t len)
-{
- chunk *c = NULL;
- c = chunkqueue_get_append_tempfile (cq);
-
- if (c->file.is_temp) {
- close (c->file.fd);
- unlink (c->file.name->ptr);
- c->file.is_temp = 0;
- }
-
- c->type = MEM_CHUNK;
-
- c->mem = buffer_init ();
- c->mem->used = len + 1;
- c->mem->ptr = NULL;
- c->offset = 0;
-
- /* buffer_copy_string_buffer (c->file.name, fn); */
- buffer_free (c->file.name);
-
- /* fd returned by libglusterfsclient is a pointer */
- c->file.name = (buffer *)fd;
- c->file.start = offset;
- c->file.length = len;
-
- //c->file.fd = fd;
- c->file.mmap.start = NULL;
- return 0;
-}
-
-/* init the plugin data */
-INIT_FUNC(mod_glusterfs_init) {
- plugin_data *p;
-
- UNUSED (srv);
- p = calloc(1, sizeof(*p));
- /* ERR_ABORT (p); */
- network_backend_write = NULL;
- p->ranges = http_request_range_init();
- return p;
-}
-
-/* detroy the plugin data */
-FREE_FUNC(mod_glusterfs_free) {
- plugin_data *p = p_d;
-
- UNUSED (srv);
-
- if (!p) return HANDLER_GO_ON;
-
- if (p->config_storage) {
- size_t i;
- for (i = 0; i < srv->config_context->used; i++) {
- plugin_config *s = p->config_storage[i];
-
- buffer_free (s->logfile);
- buffer_free (s->loglevel);
- buffer_free (s->specfile);
- buffer_free (s->prefix);
- buffer_free (s->xattr_file_size);
- buffer_free (s->document_root);
- array_free (s->exclude_exts);
-
- free (s);
- }
- free (p->config_storage);
- }
- buffer_free (p->range_buf);
- http_request_range_free (p->ranges);
-
- free (p);
-
- return HANDLER_GO_ON;
-}
-
-SETDEFAULTS_FUNC(mod_glusterfs_set_defaults) {
- plugin_data *p = p_d;
- size_t i = 0;
-
- config_values_t cv[] = {
- { "glusterfs.logfile", NULL, T_CONFIG_STRING, T_CONFIG_SCOPE_CONNECTION },
-
- { "glusterfs.loglevel", NULL, T_CONFIG_STRING, T_CONFIG_SCOPE_CONNECTION },
- { "glusterfs.volume-specfile", NULL, T_CONFIG_STRING, T_CONFIG_SCOPE_CONNECTION },
- { "glusterfs.cache-timeout", NULL, T_CONFIG_SHORT, T_CONFIG_SCOPE_CONNECTION },
-
- { "glusterfs.exclude-extensions", NULL, T_CONFIG_ARRAY, T_CONFIG_SCOPE_CONNECTION },
-
- /*TODO: get the prefix from config_conext and remove glusterfs.prefix from conf file */
- { "glusterfs.prefix", NULL, T_CONFIG_STRING, T_CONFIG_SCOPE_CONNECTION },
-
- { "glusterfs.xattr-interface-size-limit", NULL, T_CONFIG_STRING, T_CONFIG_SCOPE_CONNECTION },
-
- { "glusterfs.document-root", NULL, T_CONFIG_STRING, T_CONFIG_SCOPE_CONNECTION },
-
- { NULL, NULL, T_CONFIG_UNSET, T_CONFIG_SCOPE_UNSET }
- };
-
- p->config_storage = calloc(1, srv->config_context->used * sizeof(specific_config *));
- /* ERR_ABORT (p->config_storage); */
- p->range_buf = buffer_init ();
-
- for (i = 0; i < srv->config_context->used; i++) {
- plugin_config *s;
-
- s = calloc(1, sizeof(plugin_config));
- /* ERR_ABORT (s); */
- s->logfile = buffer_init ();
- s->loglevel = buffer_init ();
- s->specfile = buffer_init ();
- s->exclude_exts = array_init ();
- s->prefix = buffer_init ();
- s->xattr_file_size = buffer_init ();
- s->document_root = buffer_init ();
-
- cv[0].destination = s->logfile;
- cv[1].destination = s->loglevel;
- cv[2].destination = s->specfile;
- cv[3].destination = &s->cache_timeout;
- cv[4].destination = s->exclude_exts;
- cv[5].destination = s->prefix;
- cv[6].destination = s->xattr_file_size;
- cv[7].destination = s->document_root;
-
- p->config_storage[i] = s;
-
- if (0 != config_insert_values_global(srv, ((data_config *)srv->config_context->data[i])->value, cv)) {
- return HANDLER_FINISHED;
- }
- }
-
- return HANDLER_GO_ON;
-}
-
-#define PATCH(x) \
- p->conf.x = s->x;
-
-static int mod_glusterfs_patch_connection(server *srv, connection *con, plugin_data *p) {
- size_t i, j;
- plugin_config *s;
-
- p->conf.logfile = NULL;
- p->conf.loglevel = NULL;
- p->conf.specfile = NULL;
- p->conf.cache_timeout = 0;
- p->conf.exclude_exts = NULL;
- p->conf.prefix = NULL;
- p->conf.xattr_file_size = NULL;
- p->conf.exclude_exts = NULL;
-
- /* skip the first, the global context */
- /* glusterfs related config can only occur inside $HTTP["url"] == "<glusterfs-prefix>" */
- for (i = 1; i < srv->config_context->used; i++) {
- data_config *dc = (data_config *)srv->config_context->data[i];
- s = p->config_storage[i];
-
- /* condition didn't match */
- if (!config_check_cond(srv, con, dc)) continue;
-
- /* merge config */
- for (j = 0; j < dc->value->used; j++) {
- data_unset *du = dc->value->data[j];
-
- if (buffer_is_equal_string (du->key, CONST_STR_LEN("glusterfs.logfile"))) {
- PATCH (logfile);
- } else if (buffer_is_equal_string (du->key, CONST_STR_LEN("glusterfs.loglevel"))) {
- PATCH (loglevel);
- } else if (buffer_is_equal_string (du->key, CONST_STR_LEN ("glusterfs.volume-specfile"))) {
- PATCH (specfile);
- } else if (buffer_is_equal_string (du->key, CONST_STR_LEN("glusterfs.cache-timeout"))) {
- PATCH (cache_timeout);
- } else if (buffer_is_equal_string (du->key, CONST_STR_LEN ("glusterfs.exclude-extensions"))) {
- PATCH (exclude_exts);
- } else if (buffer_is_equal_string (du->key, CONST_STR_LEN ("glusterfs.prefix"))) {
- PATCH (prefix);
- } else if (buffer_is_equal_string (du->key, CONST_STR_LEN ("glusterfs.xattr-interface-size-limit"))) {
- PATCH (xattr_file_size);
- } else if (buffer_is_equal_string (du->key, CONST_STR_LEN ("glusterfs.document-root"))) {
- PATCH (document_root);
- }
- }
- }
- return 0;
-}
-
-#undef PATCH
-
-static int http_response_parse_range(server *srv, connection *con, plugin_data *p) {
- int multipart = 0;
- char *boundary = "fkj49sn38dcn3";
- data_string *ds;
- stat_cache_entry *sce = NULL;
- buffer *content_type = NULL;
- buffer *range = NULL;
- http_req_range *ranges, *r;
- mod_glusterfs_ctx_t *ctx = con->plugin_ctx[p->id];
- size_t size = 0;
-
- if (!ctx) {
- return -1;
- }
-
- if (NULL != (ds = (data_string *)array_get_element(con->request.headers, CONST_STR_LEN("Range")))) {
- range = ds->value;
- } else {
- /* we don't have a Range header */
-
- return -1;
- }
-
- if (HANDLER_ERROR == stat_cache_get_entry(srv, con, con->physical.path, &sce)) {
- SEGFAULT();
- }
-
- ctx->response_content_length = con->response.content_length = 0;
-
- if (NULL != (ds = (data_string *)array_get_element(con->response.headers, CONST_STR_LEN("Content-Type")))) {
- content_type = ds->value;
- }
-
- /* start the range-header parser
- * bytes=<num> */
-
- ranges = p->ranges;
- http_request_range_reset(ranges);
- switch (http_request_range_parse(range, ranges)) {
- case PARSE_ERROR:
- return -1; /* no range valid Range Header */
- case PARSE_SUCCESS:
- break;
- default:
- TRACE("%s", "foobar");
- return -1;
- }
-
- if (ranges->next) {
- multipart = 1;
- }
-
- if (p->conf.xattr_file_size && p->conf.xattr_file_size->ptr) {
- size = atoi (p->conf.xattr_file_size->ptr);
- }
-
- /* patch the '-1' */
- for (r = ranges; r; r = r->next) {
- if (r->start == -1) {
- /* -<end>
- *
- * the last <end> bytes */
- r->start = sce->st.st_size - r->end;
- r->end = sce->st.st_size - 1;
- }
- if (r->end == -1) {
- /* <start>-
- * all but the first <start> bytes */
-
- r->end = sce->st.st_size - 1;
- }
-
- if (r->end > sce->st.st_size - 1) {
- /* RFC 2616 - 14.35.1
- *
- * if last-byte-pos not present or > size-of-file
- * take the size-of-file
- *
- * */
- r->end = sce->st.st_size - 1;
- }
-
- if (r->start > sce->st.st_size - 1) {
- /* RFC 2616 - 14.35.1
- *
- * if first-byte-pos > file-size, 416
- */
-
- con->http_status = 416;
- return -1;
- }
-
- if (r->start > r->end) {
- /* RFC 2616 - 14.35.1
- *
- * if last-byte-pos is present, it has to be >= first-byte-pos
- *
- * invalid ranges have to be handle as no Range specified
- * */
-
- return -1;
- }
- }
-
- if (r) {
- /* we ran into an range violation */
- return -1;
- }
-
- if (multipart) {
- buffer *b;
- for (r = ranges; r; r = r->next) {
- /* write boundary-header */
-
- b = chunkqueue_get_append_buffer(con->send);
-
- buffer_copy_string(b, "\r\n--");
- buffer_append_string(b, boundary);
-
- /* write Content-Range */
- buffer_append_string(b, "\r\nContent-Range: bytes ");
- buffer_append_off_t(b, r->start);
- buffer_append_string(b, "-");
- buffer_append_off_t(b, r->end);
- buffer_append_string(b, "/");
- buffer_append_off_t(b, sce->st.st_size);
-
- buffer_append_string(b, "\r\nContent-Type: ");
- buffer_append_string_buffer(b, content_type);
-
- /* write END-OF-HEADER */
- buffer_append_string(b, "\r\n\r\n");
-
- con->response.content_length += b->used - 1;
- ctx->response_content_length += b->used - 1;
- con->send->bytes_in += b->used - 1;
-
- if ((size_t)sce->st.st_size > size) {
- chunkqueue_append_glusterfs_file(con->send_raw, ctx->fd, r->start, r->end - r->start + 1);
- con->send_raw->bytes_in += (r->end - r->start + 1);
- chunkqueue_append_dummy_mem_chunk (con->send, r->end - r->start + 1);
- } else {
- chunkqueue_append_mem (con->send, ((char *)ctx->buf) + r->start, r->end - r->start + 1);
- free (ctx->buf);
- ctx->buf = NULL;
- }
-
- con->response.content_length += r->end - r->start + 1;
- ctx->response_content_length += r->end - r->start + 1;
- con->send->bytes_in += r->end - r->start + 1;
- }
-
- /* add boundary end */
- b = chunkqueue_get_append_buffer(con->send);
-
- buffer_copy_string_len(b, "\r\n--", 4);
- buffer_append_string(b, boundary);
- buffer_append_string_len(b, "--\r\n", 4);
-
- con->response.content_length += b->used - 1;
- ctx->response_content_length += b->used - 1;
- con->send->bytes_in += b->used - 1;
-
- /* set header-fields */
-
- buffer_copy_string(p->range_buf, "multipart/byteranges; boundary=");
- buffer_append_string(p->range_buf, boundary);
-
- /* overwrite content-type */
- response_header_overwrite(srv, con, CONST_STR_LEN("Content-Type"), CONST_BUF_LEN(p->range_buf));
-
- } else {
- r = ranges;
-
- chunkqueue_append_glusterfs_file(con->send_raw, ctx->fd, r->start, r->end - r->start + 1);
- con->send_raw->bytes_in += (r->end - r->start + 1);
- chunkqueue_append_dummy_mem_chunk (con->send, r->end - r->start + 1);
- con->response.content_length += r->end - r->start + 1;
- ctx->response_content_length += r->end - r->start + 1;
- con->send->bytes_in += r->end - r->start + 1;
-
- buffer_copy_string(p->range_buf, "bytes ");
- buffer_append_off_t(p->range_buf, r->start);
- buffer_append_string(p->range_buf, "-");
- buffer_append_off_t(p->range_buf, r->end);
- buffer_append_string(p->range_buf, "/");
- buffer_append_off_t(p->range_buf, sce->st.st_size);
-
- response_header_insert(srv, con, CONST_STR_LEN("Content-Range"), CONST_BUF_LEN(p->range_buf));
- }
-
- /* ok, the file is set-up */
- return 0;
-}
-
-PHYSICALPATH_FUNC(mod_glusterfs_handle_physical) {
- plugin_data *p = p_d;
- stat_cache_entry *sce;
- size_t size = 0;
- handler_t ret = 0;
- mod_glusterfs_ctx_t *plugin_ctx = NULL;
-
- if (con->http_status != 0) return HANDLER_GO_ON;
- if (con->uri.path->used == 0) return HANDLER_GO_ON;
- if (con->physical.path->used == 0) return HANDLER_GO_ON;
-
- if (con->mode != DIRECT) return HANDLER_GO_ON;
-
- /*
- network_backend_write = srv->network_backend_write;
- srv->network_backend_write = mod_glusterfs_network_backend_write;
- */
-
- switch (con->request.http_method) {
- case HTTP_METHOD_GET:
- case HTTP_METHOD_POST:
- case HTTP_METHOD_HEAD:
- break;
-
- default:
- return HANDLER_GO_ON;
- }
-
- mod_glusterfs_patch_connection(srv, con, p);
-
- if (!p->conf.prefix || !p->conf.prefix->ptr) {
- return HANDLER_GO_ON;
- }
-
- if (!p->conf.document_root || p->conf.document_root->used == 0) {
- log_error_write(srv, __FILE__, __LINE__, "s", "glusterfs.document-root is not specified");
- con->http_status = 500;
- return HANDLER_FINISHED;
- }
-
- if (p->conf.handle <= 0) {
- glusterfs_init_ctx_t ctx;
-
- if (!p->conf.specfile || p->conf.specfile->used == 0) {
- return HANDLER_GO_ON;
- }
- memset (&ctx, 0, sizeof (ctx));
-
- ctx.specfile = p->conf.specfile->ptr;
- ctx.logfile = p->conf.logfile->ptr;
- ctx.loglevel = p->conf.loglevel->ptr;
- ctx.lookup_timeout = ctx.stat_timeout = p->conf.cache_timeout;
-
- p->conf.handle = (unsigned long)glusterfs_init (&ctx);
-
- if (p->conf.handle <= 0) {
- con->http_status = 500;
- log_error_write(srv, __FILE__, __LINE__, "sbs", "glusterfs initialization failed, please check your configuration. Glusterfs logfile ", p->conf.logfile, "might contain details");
- return HANDLER_FINISHED;
- }
- }
-
- size = 0;
- if (p->conf.xattr_file_size && p->conf.xattr_file_size->ptr)
- size = atoi (p->conf.xattr_file_size->ptr);
-
- if (!con->plugin_ctx[p->id]) {
- buffer *tmp_buf = buffer_init_buffer (con->physical.basedir);
-
- plugin_ctx = calloc (1, sizeof (*plugin_ctx));
- /* ERR_ABORT (plugin_ctx); */
- con->plugin_ctx[p->id] = plugin_ctx;
-
- buffer_append_string_buffer (tmp_buf, p->conf.prefix);
- buffer_path_simplify (tmp_buf, tmp_buf);
-
- plugin_ctx->prefix = tmp_buf->used - 1;
- if (tmp_buf->ptr[plugin_ctx->prefix - 1] == '/')
- plugin_ctx->prefix--;
-
- buffer_free (tmp_buf);
- } else
- /*FIXME: error!! error!! */
- plugin_ctx = con->plugin_ctx[p->id];
-
-
- if (size)
- {
- plugin_ctx->buf = malloc (size);
- /* ERR_ABORT (plugin_ctx->buf); */
- }
-
- plugin_ctx->glusterfs_path = buffer_init ();
- buffer_copy_string_buffer (plugin_ctx->glusterfs_path, p->conf.document_root);
- buffer_append_string (plugin_ctx->glusterfs_path, "/");
- buffer_append_string (plugin_ctx->glusterfs_path, con->physical.path->ptr + plugin_ctx->prefix);
- buffer_path_simplify (plugin_ctx->glusterfs_path, plugin_ctx->glusterfs_path);
-
- ret = glusterfs_stat_cache_get_entry_async (srv, con, p, plugin_ctx->glusterfs_path, con->physical.path, plugin_ctx->buf, size, &sce);
-
- if (ret == HANDLER_ERROR) {
- free (plugin_ctx->buf);
- plugin_ctx->buf = NULL;
-
- buffer_free (plugin_ctx->glusterfs_path);
- plugin_ctx->glusterfs_path = NULL;
-
- free (plugin_ctx);
- con->plugin_ctx[p->id] = NULL;
-
- con->http_status = 500;
- ret = HANDLER_FINISHED;
- }
-
- return ret;
-}
-
-URIHANDLER_FUNC(mod_glusterfs_subrequest) {
- plugin_data *p = p_d;
- stat_cache_entry *sce = NULL;
- int s_len;
- unsigned long fd;
- char allow_caching = 1;
- size_t size = 0;
- mod_glusterfs_ctx_t *ctx = con->plugin_ctx[p->id];
-
- /* someone else has done a decision for us */
- if (con->http_status != 0) return HANDLER_GO_ON;
- if (con->uri.path->used == 0) return HANDLER_GO_ON;
- if (con->physical.path->used == 0) return HANDLER_GO_ON;
-
- /* someone else has handled this request */
- if (con->mode != DIRECT) return HANDLER_GO_ON;
-
- /* we only handle GET, POST and HEAD */
- switch(con->request.http_method) {
- case HTTP_METHOD_GET:
- case HTTP_METHOD_POST:
- case HTTP_METHOD_HEAD:
- break;
- default:
- return HANDLER_GO_ON;
- }
-
- mod_glusterfs_patch_connection(srv, con, p);
-
- if (!p->conf.prefix || !p->conf.prefix->ptr)
- return HANDLER_GO_ON;
-
- if (!ctx) {
- con->http_status = 500;
- return HANDLER_FINISHED;
- }
-
- s_len = con->uri.path->used - 1;
- /* ignore certain extensions */
- /*
- for (k = 0; k < p->conf.exclude_exts->used; k++) {
- data_string *ds;
- ds = (data_string *)p->conf.exclude_exts->data[k];
-
- if (ds->value->used == 0) continue;
-
- if (!strncmp (ds->value->ptr, con->uri.path->ptr, strlen (ds->value->ptr)))
- break;
- }
-
- if (k == p->conf.exclude_exts->used) {
- return HANDLER_GO_ON;
- }
- */
-
- if (con->conf.log_request_handling) {
- log_error_write(srv, __FILE__, __LINE__, "s", "-- serving file from glusterfs");
- }
-
- if (HANDLER_ERROR == stat_cache_get_entry(srv, con, con->physical.path, &sce)) {
- con->http_status = 403;
-
- /* this might happen if the sce is removed from stat-cache after a successful glusterfs_lookup */
- if (ctx) {
- if (ctx->buf) {
- free (ctx->buf);
- ctx->buf = NULL;
- }
-
- if (ctx->glusterfs_path) {
- buffer_free (ctx->glusterfs_path);
- ctx->glusterfs_path = NULL;
- }
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
- }
-
- log_error_write(srv, __FILE__, __LINE__, "sbsb",
- "not a regular file:", con->uri.path,
- "->", con->physical.path);
-
- return HANDLER_FINISHED;
- }
-
- if (con->uri.path->ptr[s_len] == '/' || !S_ISREG(sce->st.st_mode)) {
- if (ctx) {
- if (ctx->glusterfs_path) {
- buffer_free (ctx->glusterfs_path);
- ctx->glusterfs_path = NULL;
- }
-
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
- }
-
- return HANDLER_FINISHED;
- }
-
- if (p->conf.xattr_file_size && p->conf.xattr_file_size->ptr)
- size = atoi (p->conf.xattr_file_size->ptr);
-
- if ((size_t)sce->st.st_size > size) {
-
- fd = glusterfs_open ((libglusterfs_handle_t ) ((unsigned long)p->conf.handle), ctx->glusterfs_path->ptr, O_RDONLY, 0);
-
- if (!fd) {
- if (ctx) {
- if (ctx->glusterfs_path) {
- buffer_free (ctx->glusterfs_path);
- ctx->glusterfs_path = NULL;
- }
-
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
- }
-
- con->http_status = 403;
- return HANDLER_FINISHED;
- }
- ctx->fd = fd;
- }
-
- /* we only handline regular files */
-#ifdef HAVE_LSTAT
- if ((sce->is_symlink == 1) && !con->conf.follow_symlink) {
- con->http_status = 403;
-
- if (con->conf.log_request_handling) {
- log_error_write(srv, __FILE__, __LINE__, "s", "-- access denied due symlink restriction");
- log_error_write(srv, __FILE__, __LINE__, "sb", "Path :", con->physical.path);
- }
-
- buffer_reset(con->physical.path);
- if (ctx) {
- if (ctx->glusterfs_path) {
- buffer_free (ctx->glusterfs_path);
- ctx->glusterfs_path = NULL;
- }
-
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
- }
-
- return HANDLER_FINISHED;
- }
-#endif
- if (!S_ISREG(sce->st.st_mode)) {
- con->http_status = 404;
-
- if (con->conf.log_file_not_found) {
- log_error_write(srv, __FILE__, __LINE__, "sbsb",
- "not a regular file:", con->uri.path,
- "->", sce->name);
- }
-
- if (ctx) {
- if (ctx->glusterfs_path) {
- buffer_free (ctx->glusterfs_path);
- ctx->glusterfs_path = NULL;
- }
-
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
- }
-
- return HANDLER_FINISHED;
- }
-
- /* mod_compress might set several data directly, don't overwrite them */
-
- /* set response content-type, if not set already */
-
- if (NULL == array_get_element(con->response.headers, CONST_STR_LEN("Content-Type"))) {
- if (buffer_is_empty(sce->content_type)) {
- /* we are setting application/octet-stream, but also announce that
- * this header field might change in the seconds few requests
- *
- * This should fix the aggressive caching of FF and the script download
- * seen by the first installations
- */
- response_header_overwrite(srv, con, CONST_STR_LEN("Content-Type"), CONST_STR_LEN("application/octet-stream"));
-
- allow_caching = 0;
- } else {
- response_header_overwrite(srv, con, CONST_STR_LEN("Content-Type"), CONST_BUF_LEN(sce->content_type));
- }
- }
-
- if (con->conf.range_requests) {
- response_header_overwrite(srv, con, CONST_STR_LEN("Accept-Ranges"), CONST_STR_LEN("bytes"));
- }
-
- /* TODO: Allow Cachable requests */
-#if 0
- if (allow_caching) {
- if (p->conf.etags_used && con->etag_flags != 0 && !buffer_is_empty(sce->etag)) {
- if (NULL == array_get_element(con->response.headers, "ETag")) {
- /* generate e-tag */
- etag_mutate(con->physical.etag, sce->etag);
-
- response_header_overwrite(srv, con, CONST_STR_LEN("ETag"), CONST_BUF_LEN(con->physical.etag));
- }
- }
-
- /* prepare header */
- if (NULL == (ds = (data_string *)array_get_element(con->response.headers, "Last-Modified"))) {
- mtime = strftime_cache_get(srv, sce->st.st_mtime);
- response_header_overwrite(srv, con, CONST_STR_LEN("Last-Modified"), CONST_BUF_LEN(mtime));
- } else {
- mtime = ds->value;
- }
-
- if (HANDLER_FINISHED == http_response_handle_cachable(srv, con, mtime)) {
- if (ctx) {
- if (ctx->glusterfs_path) {
- buffer_free (ctx->glusterfs_path);
- ctx->glusterfs_path = NULL;
- }
-
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
- }
-
- return HANDLER_FINISHED;
- }
- }
-#endif
-
- /*TODO: Read about etags */
- if (NULL != array_get_element(con->request.headers, CONST_STR_LEN("Range")) && con->conf.range_requests) {
- int do_range_request = 1;
- data_string *ds = NULL;
- buffer *mtime = NULL;
- /* check if we have a conditional GET */
-
- /* prepare header */
- if (NULL == (ds = (data_string *)array_get_element(con->response.headers, CONST_STR_LEN("Last-Modified")))) {
- mtime = strftime_cache_get(srv, sce->st.st_mtime);
- response_header_overwrite(srv, con, CONST_STR_LEN("Last-Modified"), CONST_BUF_LEN(mtime));
- } else {
- mtime = ds->value;
- }
-
- if (NULL != (ds = (data_string *)array_get_element(con->request.headers, CONST_STR_LEN("If-Range")))) {
- /* if the value is the same as our ETag, we do a Range-request,
- * otherwise a full 200 */
-
- if (ds->value->ptr[0] == '"') {
- /**
- * client wants a ETag
- */
- if (!con->physical.etag) {
- do_range_request = 0;
- } else if (!buffer_is_equal(ds->value, con->physical.etag)) {
- do_range_request = 0;
- }
- } else if (!mtime) {
- /**
- * we don't have a Last-Modified and can match the If-Range:
- *
- * sending all
- */
- do_range_request = 0;
- } else if (!buffer_is_equal(ds->value, mtime)) {
- do_range_request = 0;
- }
- }
-
- if (do_range_request) {
- /* content prepared, I'm done */
- con->send->is_closed = 1;
-
- if (0 == http_response_parse_range(srv, con, p)) {
- con->http_status = 206;
- }
- if (ctx) {
- if (ctx->glusterfs_path) {
- buffer_free (ctx->glusterfs_path);
- ctx->glusterfs_path = NULL;
- }
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
- }
-
- return HANDLER_FINISHED;
- }
- }
-
- /* if we are still here, prepare body */
-
- /* we add it here for all requests
- * the HEAD request will drop it afterwards again
- */
-
- if (p->conf.xattr_file_size && p->conf.xattr_file_size->ptr)
- size = atoi (p->conf.xattr_file_size->ptr);
-
- if (size < (size_t)sce->st.st_size) {
- chunkqueue_append_glusterfs_file (con->send_raw, fd, 0, sce->st.st_size);
- con->send_raw->bytes_in += sce->st.st_size;
- chunkqueue_append_dummy_mem_chunk (con->send, sce->st.st_size);
- } else {
- if (!ctx->buf) {
- con->http_status = 404;
- return HANDLER_ERROR;
- }
- chunkqueue_append_glusterfs_mem (con->send, ctx->buf, sce->st.st_size);
- ctx->buf = NULL;
- }
- ctx->response_content_length = con->response.content_length = sce->st.st_size;
-
- con->send->is_closed = 1;
- con->send->bytes_in = sce->st.st_size;
-
- return HANDLER_FINISHED;
-}
-
-/* this function is called at dlopen() time and inits the callbacks */
-CONNECTION_FUNC(mod_glusterfs_connection_reset)
-{
- (void) p_d;
- (void) con;
- if (!network_backend_write)
- network_backend_write = srv->network_backend_write;
-
- srv->network_backend_write = mod_glusterfs_network_backend_write;
-
- return HANDLER_GO_ON;
-}
-
-URIHANDLER_FUNC(mod_glusterfs_response_done) {
- plugin_data *p = p_d;
- UNUSED (srv);
- mod_glusterfs_ctx_t *ctx = con->plugin_ctx[p->id];
-
- con->plugin_ctx[p->id] = NULL;
- if (ctx->glusterfs_path) {
- free (ctx->glusterfs_path);
- }
-
- free (ctx);
- return HANDLER_GO_ON;
-}
-
-int mod_glusterfs_plugin_init(plugin *p) {
- p->version = LIGHTTPD_VERSION_ID;
- p->name = buffer_init_string("glusterfs");
- p->init = mod_glusterfs_init;
- p->handle_physical = mod_glusterfs_handle_physical;
- p->handle_start_backend = mod_glusterfs_subrequest;
- p->handle_response_done = mod_glusterfs_response_done;
- p->set_defaults = mod_glusterfs_set_defaults;
- p->connection_reset = mod_glusterfs_connection_reset;
- p->cleanup = mod_glusterfs_free;
-
- p->data = NULL;
-
- return 0;
-}
diff --git a/mod_glusterfs/lighttpd/1.5/mod_glusterfs.h b/mod_glusterfs/lighttpd/1.5/mod_glusterfs.h
deleted file mode 100644
index 07ea32395..000000000
--- a/mod_glusterfs/lighttpd/1.5/mod_glusterfs.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _MOD_GLUSTERFS_FILE_CACHE_H_
-#define _MOD_GLUSTERFS_FILE_CACHE_H_
-
-#include "stat_cache.h"
-#include <libglusterfsclient.h>
-#include "base.h"
-
-handler_t glusterfs_stat_cache_get_entry(server *srv, connection *con, libglusterfs_handle_t handle, buffer *glusterfs_path, buffer *name, void *buf, size_t size, stat_cache_entry **fce);
-
-#endif
diff --git a/mod_glusterfs/lighttpd/Makefile.am b/mod_glusterfs/lighttpd/Makefile.am
deleted file mode 100644
index c934412b3..000000000
--- a/mod_glusterfs/lighttpd/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = 1.4 1.5
-
-CLEANFILES =
diff --git a/rfc.sh b/rfc.sh
new file mode 100755
index 000000000..baf700495
--- /dev/null
+++ b/rfc.sh
@@ -0,0 +1,114 @@
+#!/bin/sh -e
+
+
+branch="development";
+
+
+set_hooks_commit_msg()
+{
+ f=".git/hooks/commit-msg";
+ u="http://review.gluster.com/tools/hooks/commit-msg";
+
+ if [ -x "$f" ]; then
+ return;
+ fi
+
+ curl -o $f $u || wget -O $f $u;
+
+ chmod +x .git/hooks/commit-msg;
+
+ # Let the 'Change-Id: ' header get assigned on first run of rfc.sh
+ GIT_EDITOR=true git commit --amend;
+}
+
+
+is_num()
+{
+ local num;
+
+ num="$1";
+
+ [ -z "$(echo $num | sed -e 's/[0-9]//g')" ]
+}
+
+
+rebase_changes()
+{
+ git fetch origin;
+
+ GIT_EDITOR=$0 git rebase -i origin/$branch;
+}
+
+
+editor_mode()
+{
+ if [ $(basename "$1") = "git-rebase-todo" ]; then
+ sed 's/^pick /reword /g' "$1" > $1.new && mv $1.new $1;
+ return;
+ fi
+
+ if [ $(basename "$1") = "COMMIT_EDITMSG" ]; then
+ if grep -qi '^BUG: ' $1; then
+ return;
+ fi
+ while true; do
+ echo Commit: "\"$(head -n 1 $1)\""
+ echo -n "Enter Bug ID: "
+ read bug
+ if [ -z "$bug" ]; then
+ return;
+ fi
+ if ! is_num "$bug"; then
+ echo "Invalid Bug ID ($bug)!!!";
+ continue;
+ fi
+
+ sed "/^Change-Id:/{p; s/^.*$/BUG: $bug/;}" $1 > $1.new && \
+ mv $1.new $1;
+ return;
+ done
+ fi
+
+ cat <<EOF
+$0 - editor_mode called on unrecognized file $1 with content:
+$(cat $1)
+EOF
+ return 1;
+}
+
+
+assert_diverge()
+{
+ git diff origin/$branch..HEAD | grep -q .;
+}
+
+
+main()
+{
+ set_hooks_commit_msg;
+
+ if [ -e "$1" ]; then
+ editor_mode "$@";
+ return;
+ fi
+
+ rebase_changes;
+
+ assert_diverge;
+
+ bug=$(git show --format='%b' | grep -i '^BUG: ' | awk '{print $2}');
+
+ if [ "$DRY_RUN" = 1 ]; then
+ drier='echo -e Please use the following command to send your commits to review:\n\n'
+ else
+ drier=
+ fi
+
+ if [ -z "$bug" ]; then
+ $drier git push origin HEAD:refs/for/$branch/rfc;
+ else
+ $drier git push origin HEAD:refs/for/$branch/bug-$bug;
+ fi
+}
+
+main "$@"
diff --git a/rpc/Makefile.am b/rpc/Makefile.am
new file mode 100644
index 000000000..ffb76e901
--- /dev/null
+++ b/rpc/Makefile.am
@@ -0,0 +1 @@
+SUBDIRS = rpc-lib rpc-transport xdr
diff --git a/xlators/performance/stat-prefetch/Makefile.am b/rpc/rpc-lib/Makefile.am
index af437a64d..af437a64d 100644
--- a/xlators/performance/stat-prefetch/Makefile.am
+++ b/rpc/rpc-lib/Makefile.am
diff --git a/rpc/rpc-lib/src/Makefile.am b/rpc/rpc-lib/src/Makefile.am
new file mode 100644
index 000000000..f19c3c8a4
--- /dev/null
+++ b/rpc/rpc-lib/src/Makefile.am
@@ -0,0 +1,19 @@
+lib_LTLIBRARIES = libgfrpc.la
+
+libgfrpc_la_SOURCES = auth-unix.c rpcsvc-auth.c rpcsvc.c auth-null.c \
+ rpc-transport.c xdr-rpc.c xdr-rpcclnt.c rpc-clnt.c auth-glusterfs.c \
+ rpc-drc.c
+
+libgfrpc_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = rpcsvc.h rpc-transport.h xdr-common.h xdr-rpc.h xdr-rpcclnt.h \
+ rpc-clnt.h rpcsvc-common.h protocol-common.h rpc-drc.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src \
+ -DRPC_TRANSPORTDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/rpc-transport\" \
+ -I$(top_srcdir)/contrib/rbtree
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES = *~
diff --git a/rpc/rpc-lib/src/auth-glusterfs.c b/rpc/rpc-lib/src/auth-glusterfs.c
new file mode 100644
index 000000000..db488434c
--- /dev/null
+++ b/rpc/rpc-lib/src/auth-glusterfs.c
@@ -0,0 +1,276 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "rpcsvc.h"
+#include "list.h"
+#include "dict.h"
+#include "xdr-rpc.h"
+#include "xdr-common.h"
+#include "rpc-common-xdr.h"
+
+/* V1 */
+
+ssize_t
+xdr_to_glusterfs_auth (char *buf, struct auth_glusterfs_parms *req)
+{
+ XDR xdr;
+ ssize_t ret = -1;
+
+ if ((!buf) || (!req))
+ return -1;
+
+ xdrmem_create (&xdr, buf, sizeof (struct auth_glusterfs_parms),
+ XDR_DECODE);
+ if (!xdr_auth_glusterfs_parms (&xdr, req)) {
+ gf_log ("", GF_LOG_WARNING,
+ "failed to decode glusterfs parameters");
+ ret = -1;
+ goto ret;
+ }
+
+ ret = (((size_t)(&xdr)->x_private) - ((size_t)(&xdr)->x_base));
+ret:
+ return ret;
+
+}
+int
+auth_glusterfs_request_init (rpcsvc_request_t *req, void *priv)
+{
+ if (!req)
+ return -1;
+ memset (req->verf.authdata, 0, GF_MAX_AUTH_BYTES);
+ req->verf.datalen = 0;
+ req->verf.flavour = AUTH_NULL;
+
+ return 0;
+}
+
+int auth_glusterfs_authenticate (rpcsvc_request_t *req, void *priv)
+{
+ struct auth_glusterfs_parms au = {0,};
+
+ int ret = RPCSVC_AUTH_REJECT;
+ int j = 0;
+ int i = 0;
+ int gidcount = 0;
+
+ if (!req)
+ return ret;
+
+ ret = xdr_to_glusterfs_auth (req->cred.authdata, &au);
+ if (ret == -1) {
+ gf_log ("", GF_LOG_WARNING,
+ "failed to decode glusterfs credentials");
+ ret = RPCSVC_AUTH_REJECT;
+ goto err;
+ }
+
+ req->pid = au.pid;
+ req->uid = au.uid;
+ req->gid = au.gid;
+ req->lk_owner.len = 8;
+ {
+ for (i = 0; i < req->lk_owner.len; i++, j += 8)
+ req->lk_owner.data[i] = (char)((au.lk_owner >> j) & 0xff);
+ }
+ req->auxgidcount = au.ngrps;
+
+ if (req->auxgidcount > 16) {
+ gf_log ("", GF_LOG_WARNING,
+ "more than 16 aux gids found, failing authentication");
+ ret = RPCSVC_AUTH_REJECT;
+ goto err;
+ }
+
+ if (req->auxgidcount > SMALL_GROUP_COUNT) {
+ req->auxgidlarge = GF_CALLOC(req->auxgidcount,
+ sizeof(req->auxgids[0]),
+ gf_common_mt_auxgids);
+ req->auxgids = req->auxgidlarge;
+ } else {
+ req->auxgids = req->auxgidsmall;
+ }
+
+ if (!req->auxgids) {
+ gf_log ("auth-glusterfs", GF_LOG_WARNING,
+ "cannot allocate gid list");
+ ret = RPCSVC_AUTH_REJECT;
+ goto err;
+ }
+
+ for (gidcount = 0; gidcount < au.ngrps; ++gidcount)
+ req->auxgids[gidcount] = au.groups[gidcount];
+
+ RPC_AUTH_ROOT_SQUASH(req);
+
+ gf_log (GF_RPCSVC, GF_LOG_TRACE, "Auth Info: pid: %u, uid: %d"
+ ", gid: %d, owner: %s",
+ req->pid, req->uid, req->gid, lkowner_utoa (&req->lk_owner));
+ ret = RPCSVC_AUTH_ACCEPT;
+err:
+ return ret;
+}
+
+rpcsvc_auth_ops_t auth_glusterfs_ops = {
+ .transport_init = NULL,
+ .request_init = auth_glusterfs_request_init,
+ .authenticate = auth_glusterfs_authenticate
+};
+
+rpcsvc_auth_t rpcsvc_auth_glusterfs = {
+ .authname = "AUTH_GLUSTERFS",
+ .authnum = AUTH_GLUSTERFS,
+ .authops = &auth_glusterfs_ops,
+ .authprivate = NULL
+};
+
+
+rpcsvc_auth_t *
+rpcsvc_auth_glusterfs_init (rpcsvc_t *svc, dict_t *options)
+{
+ return &rpcsvc_auth_glusterfs;
+}
+
+/* V2 */
+
+ssize_t
+xdr_to_glusterfs_auth_v2 (char *buf, struct auth_glusterfs_parms_v2 *req)
+{
+ XDR xdr;
+ ssize_t ret = -1;
+
+ if ((!buf) || (!req))
+ return -1;
+
+ xdrmem_create (&xdr, buf, GF_MAX_AUTH_BYTES, XDR_DECODE);
+ if (!xdr_auth_glusterfs_parms_v2 (&xdr, req)) {
+ gf_log ("", GF_LOG_WARNING,
+ "failed to decode glusterfs v2 parameters");
+ ret = -1;
+ goto ret;
+ }
+
+ ret = (((size_t)(&xdr)->x_private) - ((size_t)(&xdr)->x_base));
+ret:
+ return ret;
+
+}
+int
+auth_glusterfs_v2_request_init (rpcsvc_request_t *req, void *priv)
+{
+ if (!req)
+ return -1;
+ memset (req->verf.authdata, 0, GF_MAX_AUTH_BYTES);
+ req->verf.datalen = 0;
+ req->verf.flavour = AUTH_NULL;
+
+ return 0;
+}
+
+int auth_glusterfs_v2_authenticate (rpcsvc_request_t *req, void *priv)
+{
+ struct auth_glusterfs_parms_v2 au = {0,};
+ int ret = RPCSVC_AUTH_REJECT;
+ int i = 0;
+
+ if (!req)
+ return ret;
+
+ ret = xdr_to_glusterfs_auth_v2 (req->cred.authdata, &au);
+ if (ret == -1) {
+ gf_log ("", GF_LOG_WARNING,
+ "failed to decode glusterfs credentials");
+ ret = RPCSVC_AUTH_REJECT;
+ goto err;
+ }
+
+ req->pid = au.pid;
+ req->uid = au.uid;
+ req->gid = au.gid;
+ req->lk_owner.len = au.lk_owner.lk_owner_len;
+ req->auxgidcount = au.groups.groups_len;
+
+ if (req->auxgidcount > GF_MAX_AUX_GROUPS) {
+ gf_log ("", GF_LOG_WARNING,
+ "more than max aux gids found (%d) , truncating it "
+ "to %d and continuing", au.groups.groups_len,
+ GF_MAX_AUX_GROUPS);
+ req->auxgidcount = GF_MAX_AUX_GROUPS;
+ }
+
+ if (req->lk_owner.len > GF_MAX_LOCK_OWNER_LEN) {
+ gf_log ("", GF_LOG_WARNING,
+ "lkowner field > 1k, failing authentication");
+ ret = RPCSVC_AUTH_REJECT;
+ goto err;
+ }
+
+ if (req->auxgidcount > SMALL_GROUP_COUNT) {
+ req->auxgidlarge = GF_CALLOC(req->auxgidcount,
+ sizeof(req->auxgids[0]),
+ gf_common_mt_auxgids);
+ req->auxgids = req->auxgidlarge;
+ } else {
+ req->auxgids = req->auxgidsmall;
+ }
+
+ if (!req->auxgids) {
+ gf_log ("auth-glusterfs-v2", GF_LOG_WARNING,
+ "cannot allocate gid list");
+ ret = RPCSVC_AUTH_REJECT;
+ goto err;
+ }
+
+ for (i = 0; i < req->auxgidcount; ++i)
+ req->auxgids[i] = au.groups.groups_val[i];
+
+ for (i = 0; i < au.lk_owner.lk_owner_len; ++i)
+ req->lk_owner.data[i] = au.lk_owner.lk_owner_val[i];
+
+ RPC_AUTH_ROOT_SQUASH(req);
+
+ gf_log (GF_RPCSVC, GF_LOG_TRACE, "Auth Info: pid: %u, uid: %d"
+ ", gid: %d, owner: %s",
+ req->pid, req->uid, req->gid, lkowner_utoa (&req->lk_owner));
+ ret = RPCSVC_AUTH_ACCEPT;
+err:
+ /* TODO: instead use alloca() for these variables */
+ free (au.groups.groups_val);
+ free (au.lk_owner.lk_owner_val);
+
+ return ret;
+}
+
+rpcsvc_auth_ops_t auth_glusterfs_ops_v2 = {
+ .transport_init = NULL,
+ .request_init = auth_glusterfs_v2_request_init,
+ .authenticate = auth_glusterfs_v2_authenticate
+};
+
+rpcsvc_auth_t rpcsvc_auth_glusterfs_v2 = {
+ .authname = "AUTH_GLUSTERFS-v2",
+ .authnum = AUTH_GLUSTERFS_v2,
+ .authops = &auth_glusterfs_ops_v2,
+ .authprivate = NULL
+};
+
+
+rpcsvc_auth_t *
+rpcsvc_auth_glusterfs_v2_init (rpcsvc_t *svc, dict_t *options)
+{
+ return &rpcsvc_auth_glusterfs_v2;
+}
diff --git a/rpc/rpc-lib/src/auth-null.c b/rpc/rpc-lib/src/auth-null.c
new file mode 100644
index 000000000..ebdcc8ff8
--- /dev/null
+++ b/rpc/rpc-lib/src/auth-null.c
@@ -0,0 +1,61 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "rpcsvc.h"
+#include "list.h"
+#include "dict.h"
+
+
+int
+auth_null_request_init (rpcsvc_request_t *req, void *priv)
+{
+ if (!req)
+ return -1;
+
+ memset (req->cred.authdata, 0, GF_MAX_AUTH_BYTES);
+ req->cred.datalen = 0;
+
+ memset (req->verf.authdata, 0, GF_MAX_AUTH_BYTES);
+ req->verf.datalen = 0;
+
+ return 0;
+}
+
+int auth_null_authenticate (rpcsvc_request_t *req, void *priv)
+{
+ /* Always succeed. */
+ return RPCSVC_AUTH_ACCEPT;
+}
+
+rpcsvc_auth_ops_t auth_null_ops = {
+ .transport_init = NULL,
+ .request_init = auth_null_request_init,
+ .authenticate = auth_null_authenticate
+};
+
+rpcsvc_auth_t rpcsvc_auth_null = {
+ .authname = "AUTH_NULL",
+ .authnum = AUTH_NULL,
+ .authops = &auth_null_ops,
+ .authprivate = NULL
+};
+
+
+rpcsvc_auth_t *
+rpcsvc_auth_null_init (rpcsvc_t *svc, dict_t *options)
+{
+ return &rpcsvc_auth_null;
+}
diff --git a/rpc/rpc-lib/src/auth-unix.c b/rpc/rpc-lib/src/auth-unix.c
new file mode 100644
index 000000000..fa5f0576e
--- /dev/null
+++ b/rpc/rpc-lib/src/auth-unix.c
@@ -0,0 +1,83 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "rpcsvc.h"
+#include "list.h"
+#include "dict.h"
+#include "xdr-rpc.h"
+
+
+int
+auth_unix_request_init (rpcsvc_request_t *req, void *priv)
+{
+ if (!req)
+ return -1;
+ memset (req->verf.authdata, 0, GF_MAX_AUTH_BYTES);
+ req->verf.datalen = 0;
+ req->verf.flavour = AUTH_NULL;
+
+ return 0;
+}
+
+int auth_unix_authenticate (rpcsvc_request_t *req, void *priv)
+{
+ int ret = RPCSVC_AUTH_REJECT;
+ struct authunix_parms aup;
+ char machname[MAX_MACHINE_NAME];
+
+ if (!req)
+ return ret;
+
+ req->auxgids = req->auxgidsmall;
+ ret = xdr_to_auth_unix_cred (req->cred.authdata, req->cred.datalen,
+ &aup, machname, req->auxgids);
+ if (ret == -1) {
+ gf_log ("", GF_LOG_WARNING, "failed to decode unix credentials");
+ ret = RPCSVC_AUTH_REJECT;
+ goto err;
+ }
+
+ req->uid = aup.aup_uid;
+ req->gid = aup.aup_gid;
+ req->auxgidcount = aup.aup_len;
+
+ gf_log (GF_RPCSVC, GF_LOG_TRACE, "Auth Info: machine name: %s, uid: %d"
+ ", gid: %d", machname, req->uid, req->gid);
+ ret = RPCSVC_AUTH_ACCEPT;
+err:
+ return ret;
+}
+
+rpcsvc_auth_ops_t auth_unix_ops = {
+ .transport_init = NULL,
+ .request_init = auth_unix_request_init,
+ .authenticate = auth_unix_authenticate
+};
+
+rpcsvc_auth_t rpcsvc_auth_unix = {
+ .authname = "AUTH_UNIX",
+ .authnum = AUTH_UNIX,
+ .authops = &auth_unix_ops,
+ .authprivate = NULL
+};
+
+
+rpcsvc_auth_t *
+rpcsvc_auth_unix_init (rpcsvc_t *svc, dict_t *options)
+{
+ return &rpcsvc_auth_unix;
+}
diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h
new file mode 100644
index 000000000..8bef906cc
--- /dev/null
+++ b/rpc/rpc-lib/src/protocol-common.h
@@ -0,0 +1,264 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _PROTOCOL_COMMON_H
+#define _PROTOCOL_COMMON_H
+
+enum gf_fop_procnum {
+ GFS3_OP_NULL, /* 0 */
+ GFS3_OP_STAT,
+ GFS3_OP_READLINK,
+ GFS3_OP_MKNOD,
+ GFS3_OP_MKDIR,
+ GFS3_OP_UNLINK,
+ GFS3_OP_RMDIR,
+ GFS3_OP_SYMLINK,
+ GFS3_OP_RENAME,
+ GFS3_OP_LINK,
+ GFS3_OP_TRUNCATE,
+ GFS3_OP_OPEN,
+ GFS3_OP_READ,
+ GFS3_OP_WRITE,
+ GFS3_OP_STATFS,
+ GFS3_OP_FLUSH,
+ GFS3_OP_FSYNC,
+ GFS3_OP_SETXATTR,
+ GFS3_OP_GETXATTR,
+ GFS3_OP_REMOVEXATTR,
+ GFS3_OP_OPENDIR,
+ GFS3_OP_FSYNCDIR,
+ GFS3_OP_ACCESS,
+ GFS3_OP_CREATE,
+ GFS3_OP_FTRUNCATE,
+ GFS3_OP_FSTAT,
+ GFS3_OP_LK,
+ GFS3_OP_LOOKUP,
+ GFS3_OP_READDIR,
+ GFS3_OP_INODELK,
+ GFS3_OP_FINODELK,
+ GFS3_OP_ENTRYLK,
+ GFS3_OP_FENTRYLK,
+ GFS3_OP_XATTROP,
+ GFS3_OP_FXATTROP,
+ GFS3_OP_FGETXATTR,
+ GFS3_OP_FSETXATTR,
+ GFS3_OP_RCHECKSUM,
+ GFS3_OP_SETATTR,
+ GFS3_OP_FSETATTR,
+ GFS3_OP_READDIRP,
+ GFS3_OP_RELEASE,
+ GFS3_OP_RELEASEDIR,
+ GFS3_OP_FREMOVEXATTR,
+ GFS3_OP_FALLOCATE,
+ GFS3_OP_DISCARD,
+ GFS3_OP_ZEROFILL,
+ GFS3_OP_MAXVALUE,
+} ;
+
+enum gf_handshake_procnum {
+ GF_HNDSK_NULL,
+ GF_HNDSK_SETVOLUME,
+ GF_HNDSK_GETSPEC,
+ GF_HNDSK_PING,
+ GF_HNDSK_SET_LK_VER,
+ GF_HNDSK_EVENT_NOTIFY,
+ GF_HNDSK_MAXVALUE,
+};
+
+enum gf_pmap_procnum {
+ GF_PMAP_NULL = 0,
+ GF_PMAP_PORTBYBRICK,
+ GF_PMAP_BRICKBYPORT,
+ GF_PMAP_SIGNUP,
+ GF_PMAP_SIGNIN,
+ GF_PMAP_SIGNOUT,
+ GF_PMAP_MAXVALUE,
+};
+
+enum gf_pmap_port_type {
+ GF_PMAP_PORT_FREE = 0,
+ GF_PMAP_PORT_FOREIGN,
+ GF_PMAP_PORT_LEASED,
+ GF_PMAP_PORT_NONE,
+ GF_PMAP_PORT_BRICKSERVER,
+};
+typedef enum gf_pmap_port_type gf_pmap_port_type_t;
+
+enum gf_probe_resp {
+ GF_PROBE_SUCCESS,
+ GF_PROBE_LOCALHOST,
+ GF_PROBE_FRIEND,
+ GF_PROBE_ANOTHER_CLUSTER,
+ GF_PROBE_VOLUME_CONFLICT,
+ GF_PROBE_SAME_UUID,
+ GF_PROBE_UNKNOWN_PEER,
+ GF_PROBE_ADD_FAILED,
+ GF_PROBE_QUORUM_NOT_MET
+};
+
+enum gf_deprobe_resp {
+ GF_DEPROBE_SUCCESS,
+ GF_DEPROBE_LOCALHOST,
+ GF_DEPROBE_NOT_FRIEND,
+ GF_DEPROBE_BRICK_EXIST,
+ GF_DEPROBE_FRIEND_DOWN,
+ GF_DEPROBE_QUORUM_NOT_MET,
+};
+
+enum gf_cbk_procnum {
+ GF_CBK_NULL = 0,
+ GF_CBK_FETCHSPEC,
+ GF_CBK_INO_FLUSH,
+ GF_CBK_EVENT_NOTIFY,
+ GF_CBK_MAXVALUE,
+};
+
+enum gluster_cli_procnum {
+ GLUSTER_CLI_NULL, /* 0 */
+ GLUSTER_CLI_PROBE,
+ GLUSTER_CLI_DEPROBE,
+ GLUSTER_CLI_LIST_FRIENDS,
+ GLUSTER_CLI_CREATE_VOLUME,
+ GLUSTER_CLI_GET_VOLUME,
+ GLUSTER_CLI_GET_NEXT_VOLUME,
+ GLUSTER_CLI_DELETE_VOLUME,
+ GLUSTER_CLI_START_VOLUME,
+ GLUSTER_CLI_STOP_VOLUME,
+ GLUSTER_CLI_RENAME_VOLUME,
+ GLUSTER_CLI_DEFRAG_VOLUME,
+ GLUSTER_CLI_SET_VOLUME,
+ GLUSTER_CLI_ADD_BRICK,
+ GLUSTER_CLI_REMOVE_BRICK,
+ GLUSTER_CLI_REPLACE_BRICK,
+ GLUSTER_CLI_LOG_ROTATE,
+ GLUSTER_CLI_GETSPEC,
+ GLUSTER_CLI_PMAP_PORTBYBRICK,
+ GLUSTER_CLI_SYNC_VOLUME,
+ GLUSTER_CLI_RESET_VOLUME,
+ GLUSTER_CLI_FSM_LOG,
+ GLUSTER_CLI_GSYNC_SET,
+ GLUSTER_CLI_PROFILE_VOLUME,
+ GLUSTER_CLI_QUOTA,
+ GLUSTER_CLI_TOP_VOLUME,
+ GLUSTER_CLI_GETWD,
+ GLUSTER_CLI_STATUS_VOLUME,
+ GLUSTER_CLI_STATUS_ALL,
+ GLUSTER_CLI_MOUNT,
+ GLUSTER_CLI_UMOUNT,
+ GLUSTER_CLI_HEAL_VOLUME,
+ GLUSTER_CLI_STATEDUMP_VOLUME,
+ GLUSTER_CLI_LIST_VOLUME,
+ GLUSTER_CLI_CLRLOCKS_VOLUME,
+ GLUSTER_CLI_UUID_RESET,
+ GLUSTER_CLI_UUID_GET,
+ GLUSTER_CLI_COPY_FILE,
+ GLUSTER_CLI_SYS_EXEC,
+ GLUSTER_CLI_SNAP,
+ GLUSTER_CLI_MAXVALUE,
+};
+
+enum glusterd_mgmt_procnum {
+ GLUSTERD_MGMT_NULL, /* 0 */
+ GLUSTERD_MGMT_CLUSTER_LOCK,
+ GLUSTERD_MGMT_CLUSTER_UNLOCK,
+ GLUSTERD_MGMT_STAGE_OP,
+ GLUSTERD_MGMT_COMMIT_OP,
+ GLUSTERD_MGMT_MAXVALUE,
+};
+
+enum glusterd_friend_procnum {
+ GLUSTERD_FRIEND_NULL, /* 0 */
+ GLUSTERD_PROBE_QUERY,
+ GLUSTERD_FRIEND_ADD,
+ GLUSTERD_FRIEND_REMOVE,
+ GLUSTERD_FRIEND_UPDATE,
+ GLUSTERD_FRIEND_MAXVALUE,
+};
+
+enum glusterd_brick_procnum {
+ GLUSTERD_BRICK_NULL, /* 0 */
+ GLUSTERD_BRICK_TERMINATE,
+ GLUSTERD_BRICK_XLATOR_INFO,
+ GLUSTERD_BRICK_XLATOR_OP,
+ GLUSTERD_BRICK_STATUS,
+ GLUSTERD_BRICK_OP,
+ GLUSTERD_BRICK_XLATOR_DEFRAG,
+ GLUSTERD_NODE_PROFILE,
+ GLUSTERD_NODE_STATUS,
+ GLUSTERD_VOLUME_BARRIER_OP,
+ GLUSTERD_BRICK_MAXVALUE,
+};
+
+enum glusterd_mgmt_hndsk_procnum {
+ GD_MGMT_HNDSK_NULL,
+ GD_MGMT_HNDSK_VERSIONS,
+ GD_MGMT_HNDSK_VERSIONS_ACK,
+ GD_MGMT_HNDSK_MAXVALUE,
+};
+
+typedef enum {
+ GF_AFR_OP_INVALID,
+ GF_AFR_OP_HEAL_INDEX,
+ GF_AFR_OP_HEAL_FULL,
+ GF_AFR_OP_INDEX_SUMMARY,
+ GF_AFR_OP_HEALED_FILES,
+ GF_AFR_OP_HEAL_FAILED_FILES,
+ GF_AFR_OP_SPLIT_BRAIN_FILES,
+ GF_AFR_OP_STATISTICS,
+ GF_AFR_OP_STATISTICS_HEAL_COUNT,
+ GF_AFR_OP_STATISTICS_HEAL_COUNT_PER_REPLICA,
+} gf_xl_afr_op_t ;
+
+enum glusterd_mgmt_v3_procnum {
+ GLUSTERD_MGMT_V3_NULL, /* 0 */
+ GLUSTERD_MGMT_V3_LOCK,
+ GLUSTERD_MGMT_V3_PRE_VALIDATE,
+ GLUSTERD_MGMT_V3_BRICK_OP,
+ GLUSTERD_MGMT_V3_COMMIT,
+ GLUSTERD_MGMT_V3_POST_VALIDATE,
+ GLUSTERD_MGMT_V3_UNLOCK,
+ GLUSTERD_MGMT_V3_MAXVALUE,
+};
+
+#define GLUSTER_HNDSK_PROGRAM 14398633 /* Completely random */
+#define GLUSTER_HNDSK_VERSION 2 /* 0.0.2 */
+
+#define GLUSTER_PMAP_PROGRAM 34123456
+#define GLUSTER_PMAP_VERSION 1
+
+#define GLUSTER_CBK_PROGRAM 52743234 /* Completely random */
+#define GLUSTER_CBK_VERSION 1 /* 0.0.1 */
+
+#define GLUSTER_FOP_PROGRAM 1298437 /* Completely random */
+#define GLUSTER_FOP_VERSION 330 /* 3.3.0 */
+#define GLUSTER_FOP_PROCCNT GFS3_OP_MAXVALUE
+
+/* Second version */
+#define GD_MGMT_PROGRAM 1238433 /* Completely random */
+#define GD_MGMT_VERSION 2 /* 0.0.2 */
+
+#define GD_FRIEND_PROGRAM 1238437 /* Completely random */
+#define GD_FRIEND_VERSION 2 /* 0.0.2 */
+
+#define GLUSTER_CLI_PROGRAM 1238463 /* Completely random */
+#define GLUSTER_CLI_VERSION 2 /* 0.0.2 */
+
+#define GD_BRICK_PROGRAM 4867634 /*Completely random*/
+#define GD_BRICK_VERSION 2
+
+/* Third version */
+#define GD_MGMT_V3_PROGRAM 2210013 /* Completely random */
+#define GD_MGMT_V3_VERSION 3
+
+/* OP-VERSION handshake */
+#define GD_MGMT_HNDSK_PROGRAM 1239873 /* Completely random */
+#define GD_MGMT_HNDSK_VERSION 1
+
+#endif /* !_PROTOCOL_COMMON_H */
diff --git a/rpc/rpc-lib/src/rpc-clnt.c b/rpc/rpc-lib/src/rpc-clnt.c
new file mode 100644
index 000000000..ac98a5c91
--- /dev/null
+++ b/rpc/rpc-lib/src/rpc-clnt.c
@@ -0,0 +1,1700 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#define RPC_CLNT_DEFAULT_REQUEST_COUNT 512
+
+#include "rpc-clnt.h"
+#include "byte-order.h"
+#include "xdr-rpcclnt.h"
+#include "rpc-transport.h"
+#include "protocol-common.h"
+#include "mem-pool.h"
+#include "xdr-rpc.h"
+#include "rpc-common-xdr.h"
+
+void
+rpc_clnt_reply_deinit (struct rpc_req *req, struct mem_pool *pool);
+
+uint64_t
+rpc_clnt_new_callid (struct rpc_clnt *clnt)
+{
+ uint64_t callid = 0;
+
+ pthread_mutex_lock (&clnt->lock);
+ {
+ callid = ++clnt->xid;
+ }
+ pthread_mutex_unlock (&clnt->lock);
+
+ return callid;
+}
+
+
+struct saved_frame *
+__saved_frames_get_timedout (struct saved_frames *frames, uint32_t timeout,
+ struct timeval *current)
+{
+ struct saved_frame *bailout_frame = NULL, *tmp = NULL;
+
+ if (!list_empty(&frames->sf.list)) {
+ tmp = list_entry (frames->sf.list.next, typeof (*tmp), list);
+ if ((tmp->saved_at.tv_sec + timeout) < current->tv_sec) {
+ bailout_frame = tmp;
+ list_del_init (&bailout_frame->list);
+ frames->count--;
+ }
+ }
+
+ return bailout_frame;
+}
+
+static int
+_is_lock_fop (struct saved_frame *sframe)
+{
+ int fop = 0;
+
+ if (SFRAME_GET_PROGNUM (sframe) == GLUSTER_FOP_PROGRAM &&
+ SFRAME_GET_PROGVER (sframe) == GLUSTER_FOP_VERSION)
+ fop = SFRAME_GET_PROCNUM (sframe);
+
+ return ((fop == GFS3_OP_LK) ||
+ (fop == GFS3_OP_INODELK) ||
+ (fop == GFS3_OP_FINODELK) ||
+ (fop == GFS3_OP_ENTRYLK) ||
+ (fop == GFS3_OP_FENTRYLK));
+}
+
+struct saved_frame *
+__saved_frames_put (struct saved_frames *frames, void *frame,
+ struct rpc_req *rpcreq)
+{
+ struct saved_frame *saved_frame = NULL;
+
+ saved_frame = mem_get (rpcreq->conn->rpc_clnt->saved_frames_pool);
+ if (!saved_frame) {
+ goto out;
+ }
+ /* THIS should be saved and set back */
+
+ memset (saved_frame, 0, sizeof (*saved_frame));
+ INIT_LIST_HEAD (&saved_frame->list);
+
+ saved_frame->capital_this = THIS;
+ saved_frame->frame = frame;
+ saved_frame->rpcreq = rpcreq;
+ gettimeofday (&saved_frame->saved_at, NULL);
+
+ if (_is_lock_fop (saved_frame))
+ list_add_tail (&saved_frame->list, &frames->lk_sf.list);
+ else
+ list_add_tail (&saved_frame->list, &frames->sf.list);
+
+ frames->count++;
+
+out:
+ return saved_frame;
+}
+
+
+void
+saved_frames_delete (struct saved_frame *saved_frame,
+ rpc_clnt_connection_t *conn)
+{
+ GF_VALIDATE_OR_GOTO ("rpc-clnt", saved_frame, out);
+ GF_VALIDATE_OR_GOTO ("rpc-clnt", conn, out);
+
+ pthread_mutex_lock (&conn->lock);
+ {
+ list_del_init (&saved_frame->list);
+ conn->saved_frames->count--;
+ }
+ pthread_mutex_unlock (&conn->lock);
+
+ if (saved_frame->rpcreq != NULL) {
+ rpc_clnt_reply_deinit (saved_frame->rpcreq,
+ conn->rpc_clnt->reqpool);
+ }
+
+ mem_put (saved_frame);
+out:
+ return;
+}
+
+
+static void
+call_bail (void *data)
+{
+ struct rpc_clnt *clnt = NULL;
+ rpc_clnt_connection_t *conn = NULL;
+ struct timeval current;
+ struct list_head list;
+ struct saved_frame *saved_frame = NULL;
+ struct saved_frame *trav = NULL;
+ struct saved_frame *tmp = NULL;
+ char frame_sent[256] = {0,};
+ struct timespec timeout = {0,};
+ struct iovec iov = {0,};
+
+ GF_VALIDATE_OR_GOTO ("client", data, out);
+
+ clnt = data;
+
+ conn = &clnt->conn;
+
+ gettimeofday (&current, NULL);
+ INIT_LIST_HEAD (&list);
+
+ pthread_mutex_lock (&conn->lock);
+ {
+ /* Chaining to get call-always functionality from
+ call-once timer */
+ if (conn->timer) {
+ timeout.tv_sec = 10;
+ timeout.tv_nsec = 0;
+
+ gf_timer_call_cancel (clnt->ctx, conn->timer);
+ conn->timer = gf_timer_call_after (clnt->ctx,
+ timeout,
+ call_bail,
+ (void *) clnt);
+
+ if (conn->timer == NULL) {
+ gf_log (conn->trans->name, GF_LOG_WARNING,
+ "Cannot create bailout timer for %s",
+ conn->trans->peerinfo.identifier);
+ }
+ }
+
+ do {
+ saved_frame =
+ __saved_frames_get_timedout (conn->saved_frames,
+ conn->frame_timeout,
+ &current);
+ if (saved_frame)
+ list_add (&saved_frame->list, &list);
+
+ } while (saved_frame);
+ }
+ pthread_mutex_unlock (&conn->lock);
+
+ list_for_each_entry_safe (trav, tmp, &list, list) {
+ gf_time_fmt (frame_sent, sizeof frame_sent,
+ trav->saved_at.tv_sec, gf_timefmt_FT);
+ snprintf (frame_sent + strlen (frame_sent),
+ 256 - strlen (frame_sent),
+ ".%"GF_PRI_SUSECONDS, trav->saved_at.tv_usec);
+
+ gf_log (conn->trans->name, GF_LOG_ERROR,
+ "bailing out frame type(%s) op(%s(%d)) xid = 0x%x "
+ "sent = %s. timeout = %d for %s",
+ trav->rpcreq->prog->progname,
+ (trav->rpcreq->prog->procnames) ?
+ trav->rpcreq->prog->procnames[trav->rpcreq->procnum] :
+ "--",
+ trav->rpcreq->procnum, trav->rpcreq->xid, frame_sent,
+ conn->frame_timeout, conn->trans->peerinfo.identifier);
+
+ clnt = rpc_clnt_ref (clnt);
+ trav->rpcreq->rpc_status = -1;
+ trav->rpcreq->cbkfn (trav->rpcreq, &iov, 1, trav->frame);
+
+ rpc_clnt_reply_deinit (trav->rpcreq, clnt->reqpool);
+ clnt = rpc_clnt_unref (clnt);
+ list_del_init (&trav->list);
+ mem_put (trav);
+ }
+out:
+ return;
+}
+
+
+/* to be called with conn->lock held */
+struct saved_frame *
+__save_frame (struct rpc_clnt *rpc_clnt, call_frame_t *frame,
+ struct rpc_req *rpcreq)
+{
+ rpc_clnt_connection_t *conn = NULL;
+ struct timespec timeout = {0, };
+ struct saved_frame *saved_frame = NULL;
+
+ conn = &rpc_clnt->conn;
+
+ saved_frame = __saved_frames_put (conn->saved_frames, frame, rpcreq);
+
+ if (saved_frame == NULL) {
+ goto out;
+ }
+
+ /* TODO: make timeout configurable */
+ if (conn->timer == NULL) {
+ timeout.tv_sec = 10;
+ timeout.tv_nsec = 0;
+ conn->timer = gf_timer_call_after (rpc_clnt->ctx,
+ timeout,
+ call_bail,
+ (void *) rpc_clnt);
+ }
+
+out:
+ return saved_frame;
+}
+
+
+struct saved_frames *
+saved_frames_new (void)
+{
+ struct saved_frames *saved_frames = NULL;
+
+ saved_frames = GF_CALLOC (1, sizeof (*saved_frames),
+ gf_common_mt_rpcclnt_savedframe_t);
+ if (!saved_frames) {
+ return NULL;
+ }
+
+ INIT_LIST_HEAD (&saved_frames->sf.list);
+ INIT_LIST_HEAD (&saved_frames->lk_sf.list);
+
+ return saved_frames;
+}
+
+
+int
+__saved_frame_copy (struct saved_frames *frames, int64_t callid,
+ struct saved_frame *saved_frame)
+{
+ struct saved_frame *tmp = NULL;
+ int ret = -1;
+
+ if (!saved_frame) {
+ ret = 0;
+ goto out;
+ }
+
+ list_for_each_entry (tmp, &frames->sf.list, list) {
+ if (tmp->rpcreq->xid == callid) {
+ *saved_frame = *tmp;
+ ret = 0;
+ goto out;
+ }
+ }
+
+ list_for_each_entry (tmp, &frames->lk_sf.list, list) {
+ if (tmp->rpcreq->xid == callid) {
+ *saved_frame = *tmp;
+ ret = 0;
+ goto out;
+ }
+ }
+
+out:
+ return ret;
+}
+
+
+struct saved_frame *
+__saved_frame_get (struct saved_frames *frames, int64_t callid)
+{
+ struct saved_frame *saved_frame = NULL;
+ struct saved_frame *tmp = NULL;
+
+ list_for_each_entry (tmp, &frames->sf.list, list) {
+ if (tmp->rpcreq->xid == callid) {
+ list_del_init (&tmp->list);
+ frames->count--;
+ saved_frame = tmp;
+ goto out;
+ }
+ }
+
+ list_for_each_entry (tmp, &frames->lk_sf.list, list) {
+ if (tmp->rpcreq->xid == callid) {
+ list_del_init (&tmp->list);
+ frames->count--;
+ saved_frame = tmp;
+ goto out;
+ }
+ }
+
+out:
+ if (saved_frame) {
+ THIS = saved_frame->capital_this;
+ }
+
+ return saved_frame;
+}
+
+
+void
+saved_frames_unwind (struct saved_frames *saved_frames)
+{
+ struct saved_frame *trav = NULL;
+ struct saved_frame *tmp = NULL;
+ char timestr[1024] = {0,};
+ struct iovec iov = {0,};
+
+ list_splice_init (&saved_frames->lk_sf.list, &saved_frames->sf.list);
+
+ list_for_each_entry_safe (trav, tmp, &saved_frames->sf.list, list) {
+ gf_time_fmt (timestr, sizeof timestr,
+ trav->saved_at.tv_sec, gf_timefmt_FT);
+ snprintf (timestr + strlen (timestr),
+ sizeof(timestr) - strlen (timestr),
+ ".%"GF_PRI_SUSECONDS, trav->saved_at.tv_usec);
+
+ if (!trav->rpcreq || !trav->rpcreq->prog)
+ continue;
+
+ gf_log_callingfn (trav->rpcreq->conn->trans->name,
+ GF_LOG_ERROR,
+ "forced unwinding frame type(%s) op(%s(%d)) "
+ "called at %s (xid=0x%x)",
+ trav->rpcreq->prog->progname,
+ ((trav->rpcreq->prog->procnames) ?
+ trav->rpcreq->prog->procnames[trav->rpcreq->procnum]
+ : "--"),
+ trav->rpcreq->procnum, timestr,
+ trav->rpcreq->xid);
+ saved_frames->count--;
+
+ trav->rpcreq->rpc_status = -1;
+ trav->rpcreq->cbkfn (trav->rpcreq, &iov, 1, trav->frame);
+
+ rpc_clnt_reply_deinit (trav->rpcreq,
+ trav->rpcreq->conn->rpc_clnt->reqpool);
+
+ list_del_init (&trav->list);
+ mem_put (trav);
+ }
+}
+
+
+void
+saved_frames_destroy (struct saved_frames *frames)
+{
+ if (!frames)
+ return;
+
+ saved_frames_unwind (frames);
+
+ GF_FREE (frames);
+}
+
+
+void
+rpc_clnt_reconnect (void *trans_ptr)
+{
+ rpc_transport_t *trans = NULL;
+ rpc_clnt_connection_t *conn = NULL;
+ struct timespec ts = {0, 0};
+ int32_t ret = 0;
+ struct rpc_clnt *clnt = NULL;
+
+ trans = trans_ptr;
+ if (!trans || !trans->mydata)
+ return;
+
+ conn = trans->mydata;
+ clnt = conn->rpc_clnt;
+
+ pthread_mutex_lock (&conn->lock);
+ {
+ if (conn->reconnect)
+ gf_timer_call_cancel (clnt->ctx,
+ conn->reconnect);
+ conn->reconnect = 0;
+
+ if (conn->connected == 0) {
+ ts.tv_sec = 3;
+ ts.tv_nsec = 0;
+
+ gf_log (trans->name, GF_LOG_TRACE,
+ "attempting reconnect");
+ ret = rpc_transport_connect (trans,
+ conn->config.remote_port);
+ conn->reconnect =
+ gf_timer_call_after (clnt->ctx, ts,
+ rpc_clnt_reconnect,
+ trans);
+ } else {
+ gf_log (trans->name, GF_LOG_TRACE,
+ "breaking reconnect chain");
+ }
+ }
+ pthread_mutex_unlock (&conn->lock);
+
+ if ((ret == -1) && (errno != EINPROGRESS) && (clnt->notifyfn)) {
+ clnt->notifyfn (clnt, clnt->mydata, RPC_CLNT_DISCONNECT, NULL);
+ }
+
+ return;
+}
+
+
+int
+rpc_clnt_fill_request_info (struct rpc_clnt *clnt, rpc_request_info_t *info)
+{
+ struct saved_frame saved_frame;
+ int ret = -1;
+
+ pthread_mutex_lock (&clnt->conn.lock);
+ {
+ ret = __saved_frame_copy (clnt->conn.saved_frames, info->xid,
+ &saved_frame);
+ }
+ pthread_mutex_unlock (&clnt->conn.lock);
+
+ if (ret == -1) {
+ gf_log (clnt->conn.trans->name, GF_LOG_CRITICAL,
+ "cannot lookup the saved "
+ "frame corresponding to xid (%d)", info->xid);
+ goto out;
+ }
+
+ info->prognum = saved_frame.rpcreq->prog->prognum;
+ info->procnum = saved_frame.rpcreq->procnum;
+ info->progver = saved_frame.rpcreq->prog->progver;
+ info->rpc_req = saved_frame.rpcreq;
+ info->rsp = saved_frame.rsp;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+rpc_clnt_reconnect_cleanup (rpc_clnt_connection_t *conn)
+{
+ struct rpc_clnt *clnt = NULL;
+
+ if (!conn) {
+ goto out;
+ }
+
+ clnt = conn->rpc_clnt;
+
+ pthread_mutex_lock (&conn->lock);
+ {
+
+ if (conn->reconnect) {
+ gf_timer_call_cancel (clnt->ctx, conn->reconnect);
+ conn->reconnect = NULL;
+ }
+
+ }
+ pthread_mutex_unlock (&conn->lock);
+
+out:
+ return 0;
+}
+
+/*
+ * client_protocol_cleanup - cleanup function
+ * @trans: transport object
+ *
+ */
+int
+rpc_clnt_connection_cleanup (rpc_clnt_connection_t *conn)
+{
+ struct saved_frames *saved_frames = NULL;
+ struct rpc_clnt *clnt = NULL;
+
+ if (!conn) {
+ goto out;
+ }
+
+ clnt = conn->rpc_clnt;
+
+ gf_log (conn->trans->name, GF_LOG_TRACE,
+ "cleaning up state in transport object %p", conn->trans);
+
+ pthread_mutex_lock (&conn->lock);
+ {
+ saved_frames = conn->saved_frames;
+ conn->saved_frames = saved_frames_new ();
+
+ /* bailout logic cleanup */
+ if (conn->timer) {
+ gf_timer_call_cancel (clnt->ctx, conn->timer);
+ conn->timer = NULL;
+ }
+
+ conn->connected = 0;
+
+ if (conn->ping_timer) {
+ gf_timer_call_cancel (clnt->ctx, conn->ping_timer);
+ conn->ping_timer = NULL;
+ conn->ping_started = 0;
+ }
+ }
+ pthread_mutex_unlock (&conn->lock);
+
+ saved_frames_destroy (saved_frames);
+
+out:
+ return 0;
+}
+
+/*
+ * lookup_frame - lookup call frame corresponding to a given callid
+ * @trans: transport object
+ * @callid: call id of the frame
+ *
+ * not for external reference
+ */
+
+static struct saved_frame *
+lookup_frame (rpc_clnt_connection_t *conn, int64_t callid)
+{
+ struct saved_frame *frame = NULL;
+
+ pthread_mutex_lock (&conn->lock);
+ {
+ frame = __saved_frame_get (conn->saved_frames, callid);
+ }
+ pthread_mutex_unlock (&conn->lock);
+
+ return frame;
+}
+
+
+int
+rpc_clnt_reply_fill (rpc_transport_pollin_t *msg,
+ rpc_clnt_connection_t *conn,
+ struct rpc_msg *replymsg, struct iovec progmsg,
+ struct rpc_req *req,
+ struct saved_frame *saved_frame)
+{
+ int ret = -1;
+
+ if ((!conn) || (!replymsg)|| (!req) || (!saved_frame) || (!msg)) {
+ goto out;
+ }
+
+ req->rpc_status = 0;
+ if ((rpc_reply_status (replymsg) == MSG_DENIED)
+ || (rpc_accepted_reply_status (replymsg) != SUCCESS)) {
+ req->rpc_status = -1;
+ }
+
+ req->rsp[0] = progmsg;
+ req->rsp_iobref = iobref_ref (msg->iobref);
+
+ if (msg->vectored) {
+ req->rsp[1] = msg->vector[1];
+ req->rspcnt = 2;
+ } else {
+ req->rspcnt = 1;
+ }
+
+ /* By this time, the data bytes for the auth scheme would have already
+ * been copied into the required sections of the req structure,
+ * we just need to fill in the meta-data about it now.
+ */
+ if (req->rpc_status == 0) {
+ /*
+ * req->verf.flavour = rpc_reply_verf_flavour (replymsg);
+ * req->verf.datalen = rpc_reply_verf_len (replymsg);
+ */
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+
+void
+rpc_clnt_reply_deinit (struct rpc_req *req, struct mem_pool *pool)
+{
+ if (!req) {
+ goto out;
+ }
+
+ if (req->rsp_iobref) {
+ iobref_unref (req->rsp_iobref);
+ }
+
+ mem_put (req);
+out:
+ return;
+}
+
+
+/* TODO: use mem-pool for allocating requests */
+int
+rpc_clnt_reply_init (rpc_clnt_connection_t *conn, rpc_transport_pollin_t *msg,
+ struct rpc_req *req, struct saved_frame *saved_frame)
+{
+ char *msgbuf = NULL;
+ struct rpc_msg rpcmsg;
+ struct iovec progmsg; /* RPC Program payload */
+ size_t msglen = 0;
+ int ret = -1;
+
+ msgbuf = msg->vector[0].iov_base;
+ msglen = msg->vector[0].iov_len;
+
+ ret = xdr_to_rpc_reply (msgbuf, msglen, &rpcmsg, &progmsg,
+ req->verf.authdata);
+ if (ret != 0) {
+ gf_log (conn->trans->name, GF_LOG_WARNING,
+ "RPC reply decoding failed");
+ goto out;
+ }
+
+ ret = rpc_clnt_reply_fill (msg, conn, &rpcmsg, progmsg, req,
+ saved_frame);
+ if (ret != 0) {
+ goto out;
+ }
+
+ gf_log (conn->trans->name, GF_LOG_TRACE,
+ "received rpc message (RPC XID: 0x%x"
+ " Program: %s, ProgVers: %d, Proc: %d) from rpc-transport (%s)",
+ saved_frame->rpcreq->xid,
+ saved_frame->rpcreq->prog->progname,
+ saved_frame->rpcreq->prog->progver,
+ saved_frame->rpcreq->procnum, conn->trans->name);
+
+out:
+ if (ret != 0) {
+ req->rpc_status = -1;
+ }
+
+ return ret;
+}
+
+int
+rpc_clnt_handle_cbk (struct rpc_clnt *clnt, rpc_transport_pollin_t *msg)
+{
+ char *msgbuf = NULL;
+ rpcclnt_cb_program_t *program = NULL;
+ struct rpc_msg rpcmsg;
+ struct iovec progmsg; /* RPC Program payload */
+ size_t msglen = 0;
+ int found = 0;
+ int ret = -1;
+ int procnum = 0;
+
+ msgbuf = msg->vector[0].iov_base;
+ msglen = msg->vector[0].iov_len;
+
+ clnt = rpc_clnt_ref (clnt);
+ ret = xdr_to_rpc_call (msgbuf, msglen, &rpcmsg, &progmsg, NULL,NULL);
+ if (ret == -1) {
+ gf_log (clnt->conn.trans->name, GF_LOG_WARNING,
+ "RPC call decoding failed");
+ goto out;
+ }
+
+ gf_log (clnt->conn.trans->name, GF_LOG_TRACE,
+ "received rpc message (XID: 0x%lx, "
+ "Ver: %ld, Program: %ld, ProgVers: %ld, Proc: %ld) "
+ "from rpc-transport (%s)", rpc_call_xid (&rpcmsg),
+ rpc_call_rpcvers (&rpcmsg), rpc_call_program (&rpcmsg),
+ rpc_call_progver (&rpcmsg), rpc_call_progproc (&rpcmsg),
+ clnt->conn.trans->name);
+
+ procnum = rpc_call_progproc (&rpcmsg);
+
+ pthread_mutex_lock (&clnt->lock);
+ {
+ list_for_each_entry (program, &clnt->programs, program) {
+ if ((program->prognum == rpc_call_program (&rpcmsg))
+ && (program->progver
+ == rpc_call_progver (&rpcmsg))) {
+ found = 1;
+ break;
+ }
+ }
+ }
+ pthread_mutex_unlock (&clnt->lock);
+
+ if (found && (procnum < program->numactors) &&
+ (program->actors[procnum].actor)) {
+ program->actors[procnum].actor (clnt, program->mydata,
+ &progmsg);
+ }
+
+out:
+ clnt = rpc_clnt_unref (clnt);
+ return ret;
+}
+
+int
+rpc_clnt_handle_reply (struct rpc_clnt *clnt, rpc_transport_pollin_t *pollin)
+{
+ rpc_clnt_connection_t *conn = NULL;
+ struct saved_frame *saved_frame = NULL;
+ int ret = -1;
+ struct rpc_req *req = NULL;
+ uint32_t xid = 0;
+
+ clnt = rpc_clnt_ref (clnt);
+ conn = &clnt->conn;
+
+ xid = ntoh32 (*((uint32_t *)pollin->vector[0].iov_base));
+ saved_frame = lookup_frame (conn, xid);
+ if (saved_frame == NULL) {
+ gf_log (conn->trans->name, GF_LOG_ERROR,
+ "cannot lookup the saved frame for reply with xid (%u)",
+ xid);
+ goto out;
+ }
+
+ req = saved_frame->rpcreq;
+ if (req == NULL) {
+ gf_log (conn->trans->name, GF_LOG_ERROR,
+ "no request with frame for xid (%u)", xid);
+ goto out;
+ }
+
+ ret = rpc_clnt_reply_init (conn, pollin, req, saved_frame);
+ if (ret != 0) {
+ req->rpc_status = -1;
+ gf_log (conn->trans->name, GF_LOG_WARNING,
+ "initialising rpc reply failed");
+ }
+
+ req->cbkfn (req, req->rsp, req->rspcnt, saved_frame->frame);
+
+ if (req) {
+ rpc_clnt_reply_deinit (req, conn->rpc_clnt->reqpool);
+ }
+out:
+
+ if (saved_frame) {
+ mem_put (saved_frame);
+ }
+
+ clnt = rpc_clnt_unref (clnt);
+ return ret;
+}
+
+
+inline void
+rpc_clnt_set_connected (rpc_clnt_connection_t *conn)
+{
+ if (!conn) {
+ goto out;
+ }
+
+ pthread_mutex_lock (&conn->lock);
+ {
+ conn->connected = 1;
+ }
+ pthread_mutex_unlock (&conn->lock);
+
+out:
+ return;
+}
+
+
+void
+rpc_clnt_unset_connected (rpc_clnt_connection_t *conn)
+{
+ if (!conn) {
+ goto out;
+ }
+
+ pthread_mutex_lock (&conn->lock);
+ {
+ conn->connected = 0;
+ }
+ pthread_mutex_unlock (&conn->lock);
+
+out:
+ return;
+}
+
+static void
+rpc_clnt_destroy (struct rpc_clnt *rpc);
+
+int
+rpc_clnt_notify (rpc_transport_t *trans, void *mydata,
+ rpc_transport_event_t event, void *data, ...)
+{
+ rpc_clnt_connection_t *conn = NULL;
+ struct rpc_clnt *clnt = NULL;
+ int ret = -1;
+ rpc_request_info_t *req_info = NULL;
+ rpc_transport_pollin_t *pollin = NULL;
+ struct timespec ts = {0, };
+
+ conn = mydata;
+ if (conn == NULL) {
+ goto out;
+ }
+ clnt = conn->rpc_clnt;
+ if (!clnt)
+ goto out;
+
+ switch (event) {
+ case RPC_TRANSPORT_DISCONNECT:
+ {
+ rpc_clnt_connection_cleanup (conn);
+
+ pthread_mutex_lock (&conn->lock);
+ {
+ if (!conn->rpc_clnt->disabled
+ && (conn->reconnect == NULL)) {
+ ts.tv_sec = 10;
+ ts.tv_nsec = 0;
+
+ conn->reconnect =
+ gf_timer_call_after (clnt->ctx, ts,
+ rpc_clnt_reconnect,
+ conn->trans);
+ }
+ }
+ pthread_mutex_unlock (&conn->lock);
+
+ if (clnt->notifyfn)
+ ret = clnt->notifyfn (clnt, clnt->mydata,
+ RPC_CLNT_DISCONNECT, NULL);
+ break;
+ }
+
+ case RPC_TRANSPORT_CLEANUP:
+ rpc_clnt_destroy (clnt);
+ ret = 0;
+ break;
+
+ case RPC_TRANSPORT_MAP_XID_REQUEST:
+ {
+ req_info = data;
+ ret = rpc_clnt_fill_request_info (clnt, req_info);
+ break;
+ }
+
+ case RPC_TRANSPORT_MSG_RECEIVED:
+ {
+ pthread_mutex_lock (&conn->lock);
+ {
+ gettimeofday (&conn->last_received, NULL);
+ }
+ pthread_mutex_unlock (&conn->lock);
+
+ pollin = data;
+ if (pollin->is_reply)
+ ret = rpc_clnt_handle_reply (clnt, pollin);
+ else
+ ret = rpc_clnt_handle_cbk (clnt, pollin);
+ /* ret = clnt->notifyfn (clnt, clnt->mydata, RPC_CLNT_MSG,
+ * data);
+ */
+ break;
+ }
+
+ case RPC_TRANSPORT_MSG_SENT:
+ {
+ pthread_mutex_lock (&conn->lock);
+ {
+ gettimeofday (&conn->last_sent, NULL);
+ }
+ pthread_mutex_unlock (&conn->lock);
+
+ ret = 0;
+ break;
+ }
+
+ case RPC_TRANSPORT_CONNECT:
+ {
+ /* Every time there is a disconnection, processes
+ should try to connect to 'glusterd' (ie, default
+ port) or whichever port given as 'option remote-port'
+ in volume file. */
+ /* Below code makes sure the (re-)configured port lasts
+ for just one successful attempt */
+ conn->config.remote_port = 0;
+
+ if (clnt->notifyfn)
+ ret = clnt->notifyfn (clnt, clnt->mydata,
+ RPC_CLNT_CONNECT, NULL);
+ break;
+ }
+
+ case RPC_TRANSPORT_ACCEPT:
+ /* only meaningful on a server, no need of handling this event
+ * in a client.
+ */
+ ret = 0;
+ break;
+ }
+
+out:
+ return ret;
+}
+
+
+void
+rpc_clnt_connection_deinit (rpc_clnt_connection_t *conn)
+{
+ return;
+}
+
+
+static inline int
+rpc_clnt_connection_init (struct rpc_clnt *clnt, glusterfs_ctx_t *ctx,
+ dict_t *options, char *name)
+{
+ int ret = -1;
+ rpc_clnt_connection_t *conn = NULL;
+
+ conn = &clnt->conn;
+ pthread_mutex_init (&clnt->conn.lock, NULL);
+
+ ret = dict_get_int32 (options, "frame-timeout",
+ &conn->frame_timeout);
+ if (ret >= 0) {
+ gf_log (name, GF_LOG_INFO,
+ "setting frame-timeout to %d", conn->frame_timeout);
+ } else {
+ gf_log (name, GF_LOG_DEBUG,
+ "defaulting frame-timeout to 30mins");
+ conn->frame_timeout = 1800;
+ }
+
+ conn->trans = rpc_transport_load (ctx, options, name);
+ if (!conn->trans) {
+ gf_log (name, GF_LOG_WARNING, "loading of new rpc-transport"
+ " failed");
+ ret = -1;
+ goto out;
+ }
+
+ rpc_transport_ref (conn->trans);
+
+ conn->rpc_clnt = clnt;
+
+ ret = rpc_transport_register_notify (conn->trans, rpc_clnt_notify,
+ conn);
+ if (ret == -1) {
+ gf_log (name, GF_LOG_WARNING, "registering notify failed");
+ rpc_clnt_connection_cleanup (conn);
+ conn = NULL;
+ goto out;
+ }
+
+ conn->saved_frames = saved_frames_new ();
+ if (!conn->saved_frames) {
+ gf_log (name, GF_LOG_WARNING, "creation of saved_frames "
+ "failed");
+ rpc_clnt_connection_cleanup (conn);
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+struct rpc_clnt *
+rpc_clnt_new (dict_t *options, glusterfs_ctx_t *ctx, char *name,
+ uint32_t reqpool_size)
+{
+ int ret = -1;
+ struct rpc_clnt *rpc = NULL;
+
+ rpc = GF_CALLOC (1, sizeof (*rpc), gf_common_mt_rpcclnt_t);
+ if (!rpc) {
+ goto out;
+ }
+
+ pthread_mutex_init (&rpc->lock, NULL);
+ rpc->ctx = ctx;
+
+ if (!reqpool_size)
+ reqpool_size = RPC_CLNT_DEFAULT_REQUEST_COUNT;
+
+ rpc->reqpool = mem_pool_new (struct rpc_req, reqpool_size);
+ if (rpc->reqpool == NULL) {
+ pthread_mutex_destroy (&rpc->lock);
+ GF_FREE (rpc);
+ rpc = NULL;
+ goto out;
+ }
+
+ rpc->saved_frames_pool = mem_pool_new (struct saved_frame,
+ reqpool_size);
+ if (rpc->saved_frames_pool == NULL) {
+ pthread_mutex_destroy (&rpc->lock);
+ mem_pool_destroy (rpc->reqpool);
+ GF_FREE (rpc);
+ rpc = NULL;
+ goto out;
+ }
+
+ ret = rpc_clnt_connection_init (rpc, ctx, options, name);
+ if (ret == -1) {
+ pthread_mutex_destroy (&rpc->lock);
+ mem_pool_destroy (rpc->reqpool);
+ mem_pool_destroy (rpc->saved_frames_pool);
+ GF_FREE (rpc);
+ rpc = NULL;
+ if (options)
+ dict_unref (options);
+ goto out;
+ }
+
+ rpc->auth_null = dict_get_str_boolean (options, "auth-null", 0);
+
+ rpc = rpc_clnt_ref (rpc);
+ INIT_LIST_HEAD (&rpc->programs);
+
+out:
+ return rpc;
+}
+
+
+int
+rpc_clnt_start (struct rpc_clnt *rpc)
+{
+ struct rpc_clnt_connection *conn = NULL;
+
+ if (!rpc)
+ return -1;
+
+ conn = &rpc->conn;
+
+ rpc_clnt_reconnect (conn->trans);
+
+ return 0;
+}
+
+
+int
+rpc_clnt_register_notify (struct rpc_clnt *rpc, rpc_clnt_notify_t fn,
+ void *mydata)
+{
+ rpc->mydata = mydata;
+ rpc->notifyfn = fn;
+
+ return 0;
+}
+
+ssize_t
+xdr_serialize_glusterfs_auth (char *dest, struct auth_glusterfs_parms_v2 *au)
+{
+ ssize_t ret = -1;
+ XDR xdr;
+
+ if ((!dest) || (!au))
+ return -1;
+
+ xdrmem_create (&xdr, dest, GF_MAX_AUTH_BYTES, XDR_ENCODE);
+
+ if (!xdr_auth_glusterfs_parms_v2 (&xdr, au)) {
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to encode auth glusterfs elements");
+ ret = -1;
+ goto ret;
+ }
+
+ ret = (((size_t)(&xdr)->x_private) - ((size_t)(&xdr)->x_base));
+
+ret:
+ return ret;
+}
+
+
+int
+rpc_clnt_fill_request (int prognum, int progver, int procnum,
+ uint64_t xid, struct auth_glusterfs_parms_v2 *au,
+ struct rpc_msg *request, char *auth_data)
+{
+ int ret = -1;
+
+ if (!request) {
+ goto out;
+ }
+
+ memset (request, 0, sizeof (*request));
+
+ request->rm_xid = xid;
+ request->rm_direction = CALL;
+
+ request->rm_call.cb_rpcvers = 2;
+ request->rm_call.cb_prog = prognum;
+ request->rm_call.cb_vers = progver;
+ request->rm_call.cb_proc = procnum;
+
+ /* TODO: Using AUTH_(GLUSTERFS/NULL) in a kludgy way for time-being.
+ * Make it modular in future so it is easy to plug-in new
+ * authentication schemes.
+ */
+ if (auth_data) {
+ ret = xdr_serialize_glusterfs_auth (auth_data, au);
+ if (ret == -1) {
+ gf_log ("rpc-clnt", GF_LOG_DEBUG,
+ "cannot encode credentials");
+ goto out;
+ }
+
+ request->rm_call.cb_cred.oa_flavor = AUTH_GLUSTERFS_v2;
+ request->rm_call.cb_cred.oa_base = auth_data;
+ request->rm_call.cb_cred.oa_length = ret;
+ } else {
+ request->rm_call.cb_cred.oa_flavor = AUTH_NULL;
+ request->rm_call.cb_cred.oa_base = NULL;
+ request->rm_call.cb_cred.oa_length = 0;
+ }
+ request->rm_call.cb_verf.oa_flavor = AUTH_NONE;
+ request->rm_call.cb_verf.oa_base = NULL;
+ request->rm_call.cb_verf.oa_length = 0;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+struct iovec
+rpc_clnt_record_build_header (char *recordstart, size_t rlen,
+ struct rpc_msg *request, size_t payload)
+{
+ struct iovec requesthdr = {0, };
+ struct iovec txrecord = {0, 0};
+ int ret = -1;
+ size_t fraglen = 0;
+
+ ret = rpc_request_to_xdr (request, recordstart, rlen, &requesthdr);
+ if (ret == -1) {
+ gf_log ("rpc-clnt", GF_LOG_DEBUG,
+ "Failed to create RPC request");
+ goto out;
+ }
+
+ fraglen = payload + requesthdr.iov_len;
+ gf_log ("rpc-clnt", GF_LOG_TRACE, "Request fraglen %zu, payload: %zu, "
+ "rpc hdr: %zu", fraglen, payload, requesthdr.iov_len);
+
+
+ txrecord.iov_base = recordstart;
+
+ /* Remember, this is only the vec for the RPC header and does not
+ * include the payload above. We needed the payload only to calculate
+ * the size of the full fragment. This size is sent in the fragment
+ * header.
+ */
+ txrecord.iov_len = requesthdr.iov_len;
+
+out:
+ return txrecord;
+}
+
+
+struct iobuf *
+rpc_clnt_record_build_record (struct rpc_clnt *clnt, int prognum, int progver,
+ int procnum, size_t hdrsize, uint64_t xid,
+ struct auth_glusterfs_parms_v2 *au,
+ struct iovec *recbuf)
+{
+ struct rpc_msg request = {0, };
+ struct iobuf *request_iob = NULL;
+ char *record = NULL;
+ struct iovec recordhdr = {0, };
+ size_t pagesize = 0;
+ int ret = -1;
+ size_t xdr_size = 0;
+ char auth_data[GF_MAX_AUTH_BYTES] = {0, };
+
+ if ((!clnt) || (!recbuf) || (!au)) {
+ goto out;
+ }
+
+ /* Fill the rpc structure and XDR it into the buffer got above. */
+ if (clnt->auth_null)
+ ret = rpc_clnt_fill_request (prognum, progver, procnum,
+ xid, NULL, &request, NULL);
+ else
+ ret = rpc_clnt_fill_request (prognum, progver, procnum,
+ xid, au, &request, auth_data);
+
+ if (ret == -1) {
+ gf_log (clnt->conn.trans->name, GF_LOG_WARNING,
+ "cannot build a rpc-request xid (%"PRIu64")", xid);
+ goto out;
+ }
+
+ xdr_size = xdr_sizeof ((xdrproc_t)xdr_callmsg, &request);
+
+ /* First, try to get a pointer into the buffer which the RPC
+ * layer can use.
+ */
+ request_iob = iobuf_get2 (clnt->ctx->iobuf_pool, (xdr_size + hdrsize));
+ if (!request_iob) {
+ goto out;
+ }
+
+ pagesize = iobuf_pagesize (request_iob);
+
+ record = iobuf_ptr (request_iob); /* Now we have it. */
+
+ recordhdr = rpc_clnt_record_build_header (record, pagesize, &request,
+ hdrsize);
+
+ if (!recordhdr.iov_base) {
+ gf_log (clnt->conn.trans->name, GF_LOG_ERROR,
+ "Failed to build record header");
+ iobuf_unref (request_iob);
+ request_iob = NULL;
+ recbuf->iov_base = NULL;
+ goto out;
+ }
+
+ recbuf->iov_base = recordhdr.iov_base;
+ recbuf->iov_len = recordhdr.iov_len;
+
+out:
+ return request_iob;
+}
+
+
+struct iobuf *
+rpc_clnt_record (struct rpc_clnt *clnt, call_frame_t *call_frame,
+ rpc_clnt_prog_t *prog, int procnum, size_t hdrlen,
+ struct iovec *rpchdr, uint64_t callid)
+{
+ struct auth_glusterfs_parms_v2 au = {0, };
+ struct iobuf *request_iob = NULL;
+ char owner[4] = {0,};
+
+ if (!prog || !rpchdr || !call_frame) {
+ goto out;
+ }
+
+ au.pid = call_frame->root->pid;
+ au.uid = call_frame->root->uid;
+ au.gid = call_frame->root->gid;
+ au.groups.groups_len = call_frame->root->ngrps;
+ au.lk_owner.lk_owner_len = call_frame->root->lk_owner.len;
+
+ if (au.groups.groups_len)
+ au.groups.groups_val = call_frame->root->groups;
+
+ if (call_frame->root->lk_owner.len)
+ au.lk_owner.lk_owner_val = call_frame->root->lk_owner.data;
+ else {
+ owner[0] = (char)(au.pid & 0xff);
+ owner[1] = (char)((au.pid >> 8) & 0xff);
+ owner[2] = (char)((au.pid >> 16) & 0xff);
+ owner[3] = (char)((au.pid >> 24) & 0xff);
+
+ au.lk_owner.lk_owner_val = owner;
+ au.lk_owner.lk_owner_len = 4;
+ }
+
+ gf_log (clnt->conn.trans->name, GF_LOG_TRACE, "Auth Info: pid: %u, uid: %d"
+ ", gid: %d, owner: %s", au.pid, au.uid, au.gid,
+ lkowner_utoa (&call_frame->root->lk_owner));
+
+ request_iob = rpc_clnt_record_build_record (clnt, prog->prognum,
+ prog->progver,
+ procnum, hdrlen,
+ callid, &au,
+ rpchdr);
+ if (!request_iob) {
+ gf_log (clnt->conn.trans->name, GF_LOG_WARNING,
+ "cannot build rpc-record");
+ goto out;
+ }
+
+out:
+ return request_iob;
+}
+
+int
+rpcclnt_cbk_program_register (struct rpc_clnt *clnt,
+ rpcclnt_cb_program_t *program, void *mydata)
+{
+ int ret = -1;
+ char already_registered = 0;
+ rpcclnt_cb_program_t *tmp = NULL;
+
+ if (!clnt)
+ goto out;
+
+ if (program->actors == NULL)
+ goto out;
+
+ pthread_mutex_lock (&clnt->lock);
+ {
+ list_for_each_entry (tmp, &clnt->programs, program) {
+ if ((program->prognum == tmp->prognum)
+ && (program->progver == tmp->progver)) {
+ already_registered = 1;
+ break;
+ }
+ }
+ }
+ pthread_mutex_unlock (&clnt->lock);
+
+ if (already_registered) {
+ gf_log_callingfn (clnt->conn.trans->name, GF_LOG_DEBUG,
+ "already registered");
+ ret = 0;
+ goto out;
+ }
+
+ tmp = GF_CALLOC (1, sizeof (*tmp),
+ gf_common_mt_rpcclnt_cb_program_t);
+ if (tmp == NULL) {
+ goto out;
+ }
+
+ memcpy (tmp, program, sizeof (*tmp));
+ INIT_LIST_HEAD (&tmp->program);
+
+ tmp->mydata = mydata;
+
+ pthread_mutex_lock (&clnt->lock);
+ {
+ list_add_tail (&tmp->program, &clnt->programs);
+ }
+ pthread_mutex_unlock (&clnt->lock);
+
+ ret = 0;
+ gf_log (clnt->conn.trans->name, GF_LOG_DEBUG,
+ "New program registered: %s, Num: %d, Ver: %d",
+ program->progname, program->prognum,
+ program->progver);
+
+out:
+ if (ret == -1) {
+ gf_log (clnt->conn.trans->name, GF_LOG_ERROR,
+ "Program registration failed:"
+ " %s, Num: %d, Ver: %d", program->progname,
+ program->prognum, program->progver);
+ }
+
+ return ret;
+}
+
+
+int
+rpc_clnt_submit (struct rpc_clnt *rpc, rpc_clnt_prog_t *prog,
+ int procnum, fop_cbk_fn_t cbkfn,
+ struct iovec *proghdr, int proghdrcount,
+ struct iovec *progpayload, int progpayloadcount,
+ struct iobref *iobref, void *frame, struct iovec *rsphdr,
+ int rsphdr_count, struct iovec *rsp_payload,
+ int rsp_payload_count, struct iobref *rsp_iobref)
+{
+ rpc_clnt_connection_t *conn = NULL;
+ struct iobuf *request_iob = NULL;
+ struct iovec rpchdr = {0,};
+ struct rpc_req *rpcreq = NULL;
+ rpc_transport_req_t req;
+ int ret = -1;
+ int proglen = 0;
+ char new_iobref = 0;
+ uint64_t callid = 0;
+
+ if (!rpc || !prog || !frame) {
+ goto out;
+ }
+
+ conn = &rpc->conn;
+
+ if (conn->trans == NULL) {
+ goto out;
+ }
+
+ rpcreq = mem_get (rpc->reqpool);
+ if (rpcreq == NULL) {
+ goto out;
+ }
+
+ memset (rpcreq, 0, sizeof (*rpcreq));
+ memset (&req, 0, sizeof (req));
+
+ if (!iobref) {
+ iobref = iobref_new ();
+ if (!iobref) {
+ goto out;
+ }
+
+ new_iobref = 1;
+ }
+
+ callid = rpc_clnt_new_callid (rpc);
+
+ rpcreq->prog = prog;
+ rpcreq->procnum = procnum;
+ rpcreq->conn = conn;
+ rpcreq->xid = callid;
+ rpcreq->cbkfn = cbkfn;
+
+ ret = -1;
+
+ if (proghdr) {
+ proglen += iov_length (proghdr, proghdrcount);
+ }
+
+ request_iob = rpc_clnt_record (rpc, frame, prog,
+ procnum, proglen,
+ &rpchdr, callid);
+ if (!request_iob) {
+ gf_log (conn->trans->name, GF_LOG_WARNING,
+ "cannot build rpc-record");
+ goto out;
+ }
+
+ iobref_add (iobref, request_iob);
+
+ req.msg.rpchdr = &rpchdr;
+ req.msg.rpchdrcount = 1;
+ req.msg.proghdr = proghdr;
+ req.msg.proghdrcount = proghdrcount;
+ req.msg.progpayload = progpayload;
+ req.msg.progpayloadcount = progpayloadcount;
+ req.msg.iobref = iobref;
+
+ req.rsp.rsphdr = rsphdr;
+ req.rsp.rsphdr_count = rsphdr_count;
+ req.rsp.rsp_payload = rsp_payload;
+ req.rsp.rsp_payload_count = rsp_payload_count;
+ req.rsp.rsp_iobref = rsp_iobref;
+ req.rpc_req = rpcreq;
+
+ pthread_mutex_lock (&conn->lock);
+ {
+ if (conn->connected == 0) {
+ ret = rpc_transport_connect (conn->trans,
+ conn->config.remote_port);
+ }
+
+ ret = rpc_transport_submit_request (rpc->conn.trans,
+ &req);
+ if (ret == -1) {
+ gf_log (conn->trans->name, GF_LOG_WARNING,
+ "failed to submit rpc-request "
+ "(XID: 0x%x Program: %s, ProgVers: %d, "
+ "Proc: %d) to rpc-transport (%s)", rpcreq->xid,
+ rpcreq->prog->progname, rpcreq->prog->progver,
+ rpcreq->procnum, rpc->conn.trans->name);
+ }
+
+ if ((ret >= 0) && frame) {
+ /* Save the frame in queue */
+ __save_frame (rpc, frame, rpcreq);
+
+ gf_log ("rpc-clnt", GF_LOG_TRACE, "submitted request "
+ "(XID: 0x%x Program: %s, ProgVers: %d, "
+ "Proc: %d) to rpc-transport (%s)", rpcreq->xid,
+ rpcreq->prog->progname, rpcreq->prog->progver,
+ rpcreq->procnum, rpc->conn.trans->name);
+ }
+ }
+ pthread_mutex_unlock (&conn->lock);
+
+ if (ret == -1) {
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ if (request_iob) {
+ iobuf_unref (request_iob);
+ }
+
+ if (new_iobref && iobref) {
+ iobref_unref (iobref);
+ }
+
+ if (frame && (ret == -1)) {
+ if (rpcreq) {
+ rpcreq->rpc_status = -1;
+ cbkfn (rpcreq, NULL, 0, frame);
+ mem_put (rpcreq);
+ }
+ }
+ return ret;
+}
+
+
+struct rpc_clnt *
+rpc_clnt_ref (struct rpc_clnt *rpc)
+{
+ if (!rpc)
+ return NULL;
+ pthread_mutex_lock (&rpc->lock);
+ {
+ rpc->refcount++;
+ }
+ pthread_mutex_unlock (&rpc->lock);
+ return rpc;
+}
+
+
+static void
+rpc_clnt_trigger_destroy (struct rpc_clnt *rpc)
+{
+ if (!rpc)
+ return;
+
+ rpc_clnt_disable (rpc);
+ rpc_transport_unref (rpc->conn.trans);
+}
+
+static void
+rpc_clnt_destroy (struct rpc_clnt *rpc)
+{
+ if (!rpc)
+ return;
+
+ saved_frames_destroy (rpc->conn.saved_frames);
+ pthread_mutex_destroy (&rpc->lock);
+ pthread_mutex_destroy (&rpc->conn.lock);
+
+ /* mem-pool should be destroyed, otherwise,
+ it will cause huge memory leaks */
+ mem_pool_destroy (rpc->reqpool);
+ mem_pool_destroy (rpc->saved_frames_pool);
+
+ GF_FREE (rpc);
+ return;
+}
+
+struct rpc_clnt *
+rpc_clnt_unref (struct rpc_clnt *rpc)
+{
+ int count = 0;
+
+ if (!rpc)
+ return NULL;
+ pthread_mutex_lock (&rpc->lock);
+ {
+ count = --rpc->refcount;
+ }
+ pthread_mutex_unlock (&rpc->lock);
+ if (!count) {
+ rpc_clnt_trigger_destroy (rpc);
+ return NULL;
+ }
+ return rpc;
+}
+
+
+char
+rpc_clnt_is_disabled (struct rpc_clnt *rpc)
+{
+
+ rpc_clnt_connection_t *conn = NULL;
+ char disabled = 0;
+
+ if (!rpc) {
+ goto out;
+ }
+
+ conn = &rpc->conn;
+
+ pthread_mutex_lock (&conn->lock);
+ {
+ disabled = rpc->disabled;
+ }
+ pthread_mutex_unlock (&conn->lock);
+
+out:
+ return disabled;
+}
+
+void
+rpc_clnt_disable (struct rpc_clnt *rpc)
+{
+ rpc_clnt_connection_t *conn = NULL;
+
+ if (!rpc) {
+ goto out;
+ }
+
+ conn = &rpc->conn;
+
+ pthread_mutex_lock (&conn->lock);
+ {
+ rpc->disabled = 1;
+
+ if (conn->timer) {
+ gf_timer_call_cancel (rpc->ctx, conn->timer);
+ conn->timer = NULL;
+ }
+
+ if (conn->reconnect) {
+ gf_timer_call_cancel (rpc->ctx, conn->reconnect);
+ conn->reconnect = NULL;
+ }
+ conn->connected = 0;
+
+ if (conn->ping_timer) {
+ gf_timer_call_cancel (rpc->ctx, conn->ping_timer);
+ conn->ping_timer = NULL;
+ conn->ping_started = 0;
+ }
+
+ }
+ pthread_mutex_unlock (&conn->lock);
+
+ rpc_transport_disconnect (rpc->conn.trans);
+
+out:
+ return;
+}
+
+
+void
+rpc_clnt_reconfig (struct rpc_clnt *rpc, struct rpc_clnt_config *config)
+{
+ if (config->rpc_timeout) {
+ if (config->rpc_timeout != rpc->conn.config.rpc_timeout)
+ gf_log (rpc->conn.trans->name, GF_LOG_INFO,
+ "changing timeout to %d (from %d)",
+ config->rpc_timeout,
+ rpc->conn.config.rpc_timeout);
+ rpc->conn.config.rpc_timeout = config->rpc_timeout;
+ }
+
+ if (config->remote_port) {
+ if (config->remote_port != rpc->conn.config.remote_port)
+ gf_log (rpc->conn.trans->name, GF_LOG_INFO,
+ "changing port to %d (from %d)",
+ config->remote_port,
+ rpc->conn.config.remote_port);
+
+ rpc->conn.config.remote_port = config->remote_port;
+ }
+
+ if (config->remote_host) {
+ if (rpc->conn.config.remote_host) {
+ if (strcmp (rpc->conn.config.remote_host,
+ config->remote_host))
+ gf_log (rpc->conn.trans->name, GF_LOG_INFO,
+ "changing hostname to %s (from %s)",
+ config->remote_host,
+ rpc->conn.config.remote_host);
+ FREE (rpc->conn.config.remote_host);
+ } else {
+ gf_log (rpc->conn.trans->name, GF_LOG_INFO,
+ "setting hostname to %s",
+ config->remote_host);
+ }
+
+ rpc->conn.config.remote_host = gf_strdup (config->remote_host);
+ }
+}
diff --git a/rpc/rpc-lib/src/rpc-clnt.h b/rpc/rpc-lib/src/rpc-clnt.h
new file mode 100644
index 000000000..584963ad0
--- /dev/null
+++ b/rpc/rpc-lib/src/rpc-clnt.h
@@ -0,0 +1,246 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __RPC_CLNT_H
+#define __RPC_CLNT_H
+
+#include "stack.h"
+#include "rpc-transport.h"
+#include "timer.h"
+#include "xdr-common.h"
+
+typedef enum {
+ RPC_CLNT_CONNECT,
+ RPC_CLNT_DISCONNECT,
+ RPC_CLNT_MSG
+} rpc_clnt_event_t;
+
+
+#define SFRAME_GET_PROGNUM(sframe) (sframe->rpcreq->prog->prognum)
+#define SFRAME_GET_PROGVER(sframe) (sframe->rpcreq->prog->progver)
+#define SFRAME_GET_PROCNUM(sframe) (sframe->rpcreq->procnum)
+
+struct xptr_clnt;
+struct rpc_req;
+struct rpc_clnt;
+struct rpc_clnt_config;
+struct rpc_clnt_program;
+
+typedef int (*rpc_clnt_notify_t) (struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t fn, void *data);
+
+typedef int (*fop_cbk_fn_t) (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe);
+
+typedef int (*clnt_fn_t) (call_frame_t *fr, xlator_t *xl, void *args);
+
+struct saved_frame {
+ union {
+ struct list_head list;
+ struct {
+ struct saved_frame *frame_next;
+ struct saved_frame *frame_prev;
+ };
+ };
+ void *capital_this;
+ void *frame;
+ struct timeval saved_at;
+ struct rpc_req *rpcreq;
+ rpc_transport_rsp_t rsp;
+};
+
+struct saved_frames {
+ int64_t count;
+ struct saved_frame sf;
+ struct saved_frame lk_sf;
+};
+
+
+/* Initialized by procnum */
+typedef struct rpc_clnt_procedure {
+ char *procname;
+ clnt_fn_t fn;
+} rpc_clnt_procedure_t;
+
+typedef struct rpc_clnt_program {
+ char *progname;
+ int prognum;
+ int progver;
+ rpc_clnt_procedure_t *proctable;
+ char **procnames;
+ int numproc;
+} rpc_clnt_prog_t;
+
+typedef int (*rpcclnt_cb_fn) (struct rpc_clnt *rpc, void *mydata, void *data);
+
+/* The descriptor for each procedure/actor that runs
+ * over the RPC service.
+ */
+typedef struct rpcclnt_actor_desc {
+ char procname[32];
+ int procnum;
+ rpcclnt_cb_fn actor;
+} rpcclnt_cb_actor_t;
+
+/* Describes a program and its version along with the function pointers
+ * required to handle the procedures/actors of each program/version.
+ * Never changed ever by any thread so no need for a lock.
+ */
+typedef struct rpcclnt_cb_program {
+ char progname[32];
+ int prognum;
+ int progver;
+ rpcclnt_cb_actor_t *actors; /* All procedure handlers */
+ int numactors; /* Num actors in actor array */
+
+ /* Program specific state handed to actors */
+ void *private;
+
+
+ /* list member to link to list of registered services with rpc_clnt */
+ struct list_head program;
+
+ /* Needed for passing back in cb_actor */
+ void *mydata;
+} rpcclnt_cb_program_t;
+
+
+
+typedef struct rpc_auth_data {
+ int flavour;
+ int datalen;
+ char authdata[GF_MAX_AUTH_BYTES];
+} rpc_auth_data_t;
+
+
+struct rpc_clnt_config {
+ int rpc_timeout;
+ int remote_port;
+ char * remote_host;
+};
+
+
+#define rpc_auth_flavour(au) ((au).flavour)
+
+struct rpc_clnt_connection {
+ pthread_mutex_t lock;
+ rpc_transport_t *trans;
+ struct rpc_clnt_config config;
+ gf_timer_t *reconnect;
+ gf_timer_t *timer;
+ gf_timer_t *ping_timer;
+ struct rpc_clnt *rpc_clnt;
+ char connected;
+ struct saved_frames *saved_frames;
+ int32_t frame_timeout;
+ struct timeval last_sent;
+ struct timeval last_received;
+ int32_t ping_started;
+};
+typedef struct rpc_clnt_connection rpc_clnt_connection_t;
+
+struct rpc_req {
+ rpc_clnt_connection_t *conn;
+ uint32_t xid;
+ struct iovec req[2];
+ int reqcnt;
+ struct iobref *req_iobref;
+ struct iovec rsp[2];
+ int rspcnt;
+ struct iobref *rsp_iobref;
+ int rpc_status;
+ rpc_auth_data_t verf;
+ rpc_clnt_prog_t *prog;
+ int procnum;
+ fop_cbk_fn_t cbkfn;
+ void *conn_private;
+};
+
+typedef struct rpc_clnt {
+ pthread_mutex_t lock;
+ rpc_clnt_notify_t notifyfn;
+ rpc_clnt_connection_t conn;
+ void *mydata;
+ uint64_t xid;
+
+ /* list of cb programs registered with rpc-clnt */
+ struct list_head programs;
+
+ /* Memory pool for rpc_req_t */
+ struct mem_pool *reqpool;
+
+ struct mem_pool *saved_frames_pool;
+
+ glusterfs_ctx_t *ctx;
+ int refcount;
+ int auth_null;
+ char disabled;
+} rpc_clnt_t;
+
+
+struct rpc_clnt *rpc_clnt_new (dict_t *options, glusterfs_ctx_t *ctx,
+ char *name, uint32_t reqpool_size);
+
+int rpc_clnt_start (struct rpc_clnt *rpc);
+
+int rpc_clnt_register_notify (struct rpc_clnt *rpc, rpc_clnt_notify_t fn,
+ void *mydata);
+
+/* Some preconditions related to vectors holding responses.
+ * @rsphdr: should contain pointer to buffer which can hold response header
+ * and length of the program header. In case of procedures whose
+ * respnose size is not bounded (eg., glusterfs lookup), the length
+ * should be equal to size of buffer.
+ * @rsp_payload: should contain pointer and length of the bu
+ *
+ * 1. Both @rsp_hdr and @rsp_payload are optional.
+ * 2. The user of rpc_clnt_submit, if wants response hdr and payload in its own
+ * buffers, then it has to populate @rsphdr and @rsp_payload.
+ * 3. when @rsp_payload is not NULL, @rsphdr should
+ * also be filled with pointer to buffer to hold header and length
+ * of the header.
+ */
+
+int rpc_clnt_submit (struct rpc_clnt *rpc, rpc_clnt_prog_t *prog,
+ int procnum, fop_cbk_fn_t cbkfn,
+ struct iovec *proghdr, int proghdrcount,
+ struct iovec *progpayload, int progpayloadcount,
+ struct iobref *iobref, void *frame, struct iovec *rsphdr,
+ int rsphdr_count, struct iovec *rsp_payload,
+ int rsp_payload_count, struct iobref *rsp_iobref);
+
+struct rpc_clnt *
+rpc_clnt_ref (struct rpc_clnt *rpc);
+
+struct rpc_clnt *
+rpc_clnt_unref (struct rpc_clnt *rpc);
+
+int rpc_clnt_connection_cleanup (rpc_clnt_connection_t *conn);
+
+void rpc_clnt_set_connected (rpc_clnt_connection_t *conn);
+
+void rpc_clnt_unset_connected (rpc_clnt_connection_t *conn);
+void rpc_clnt_reconnect (void *trans_ptr);
+
+void rpc_clnt_reconfig (struct rpc_clnt *rpc, struct rpc_clnt_config *config);
+
+/* All users of RPC services should use this API to register their
+ * procedure handlers.
+ */
+int rpcclnt_cbk_program_register (struct rpc_clnt *svc,
+ rpcclnt_cb_program_t *program, void *mydata);
+
+void
+rpc_clnt_disable (struct rpc_clnt *rpc);
+
+char
+rpc_clnt_is_disabled (struct rpc_clnt *rpc);
+
+#endif /* !_RPC_CLNT_H */
diff --git a/rpc/rpc-lib/src/rpc-drc.c b/rpc/rpc-lib/src/rpc-drc.c
new file mode 100644
index 000000000..8181e6aee
--- /dev/null
+++ b/rpc/rpc-lib/src/rpc-drc.c
@@ -0,0 +1,872 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "rpcsvc.h"
+#ifndef RPC_DRC_H
+#include "rpc-drc.h"
+#endif
+#include "locking.h"
+#include "hashfn.h"
+#include "common-utils.h"
+#include "statedump.h"
+#include "mem-pool.h"
+
+#include <netinet/in.h>
+#include <unistd.h>
+
+/**
+ * rpcsvc_drc_op_destroy - Destroys the cached reply
+ *
+ * @param drc - the main drc structure
+ * @param reply - the cached reply to destroy
+ * @return NULL if reply is destroyed, reply otherwise
+ */
+static drc_cached_op_t *
+rpcsvc_drc_op_destroy (rpcsvc_drc_globals_t *drc, drc_cached_op_t *reply)
+{
+ GF_ASSERT (drc);
+ GF_ASSERT (reply);
+
+ if (reply->state == DRC_OP_IN_TRANSIT)
+ return reply;
+
+ iobref_unref (reply->msg.iobref);
+ if (reply->msg.rpchdr)
+ GF_FREE (reply->msg.rpchdr);
+ if (reply->msg.proghdr)
+ GF_FREE (reply->msg.proghdr);
+ if (reply->msg.progpayload)
+ GF_FREE (reply->msg.progpayload);
+
+ list_del (&reply->global_list);
+ reply->client->op_count--;
+ drc->op_count--;
+ mem_put (reply);
+ reply = NULL;
+
+ return reply;
+}
+
+/**
+ * rpcsvc_drc_op_rb_unref - This function is used in rb tree cleanup only
+ *
+ * @param reply - the cached reply to unref
+ * @param drc - the main drc structure
+ * @return void
+ */
+static void
+rpcsvc_drc_rb_op_destroy (void *reply, void *drc)
+{
+ rpcsvc_drc_op_destroy (drc, (drc_cached_op_t *)reply);
+}
+
+/**
+ * rpcsvc_remove_drc_client - Cleanup the drc client
+ *
+ * @param client - the drc client to be removed
+ * @return void
+ */
+static void
+rpcsvc_remove_drc_client (drc_client_t *client)
+{
+ rb_destroy (client->rbtree, rpcsvc_drc_rb_op_destroy);
+ list_del (&client->client_list);
+ GF_FREE (client);
+}
+
+/**
+ * rpcsvc_client_lookup - Given a sockaddr_storage, find the client if it exists
+ *
+ * @param drc - the main drc structure
+ * @param sockaddr - the network address of the client to be looked up
+ * @return drc client if it exists, NULL otherwise
+ */
+static drc_client_t *
+rpcsvc_client_lookup (rpcsvc_drc_globals_t *drc,
+ struct sockaddr_storage *sockaddr)
+{
+ drc_client_t *client = NULL;
+
+ GF_ASSERT (drc);
+ GF_ASSERT (sockaddr);
+
+ if (list_empty (&drc->clients_head))
+ return NULL;
+
+ list_for_each_entry (client, &drc->clients_head, client_list) {
+ if (gf_sock_union_equal_addr (&client->sock_union,
+ (union gf_sock_union *)sockaddr))
+ return client;
+ }
+
+ return NULL;
+}
+
+/**
+ * drc_compare_reqs - Used by rbtree to determine if incoming req matches with
+ * an existing node(cached reply) in rbtree
+ *
+ * @param item - pointer to the incoming req
+ * @param rb_node_data - pointer to an rbtree node (cached reply)
+ * @param param - drc pointer - unused here, but used in *op_destroy
+ * @return 0 if req matches reply, else (req->xid - reply->xid)
+ */
+int
+drc_compare_reqs (const void *item, const void *rb_node_data, void *param)
+{
+ int ret = -1;
+ rpcsvc_request_t *req = NULL;
+ drc_cached_op_t *reply = NULL;
+
+ GF_ASSERT (item);
+ GF_ASSERT (rb_node_data);
+ GF_ASSERT (param);
+
+ req = (rpcsvc_request_t *)item;
+ reply = (drc_cached_op_t *)rb_node_data;
+
+ ret = req->xid - reply->xid;
+ if (ret != 0)
+ return ret;
+
+ if (req->prognum == reply->prognum &&
+ req->procnum == reply->procnum &&
+ req->progver == reply->progversion)
+ return 0;
+
+ return 1;
+}
+
+/**
+ * drc_rb_calloc - used by rbtree api to allocate memory for nodes
+ *
+ * @param allocator - the libavl_allocator structure used by rbtree
+ * @param size - not needed by this function
+ * @return pointer to new cached reply (node in rbtree)
+ */
+static void *
+drc_rb_calloc (struct libavl_allocator *allocator, size_t size)
+{
+ rpcsvc_drc_globals_t *drc = NULL;
+
+ /* get the drc pointer by simple typecast, since allocator
+ * is the first member of rpcsvc_drc_globals_t
+ */
+ drc = (rpcsvc_drc_globals_t *)allocator;
+
+ return mem_get (drc->mempool);
+}
+
+/**
+ * drc_rb_free - used by rbtree api to free a node
+ *
+ * @param a - the libavl_allocator structure used by rbtree api
+ * @param block - node that needs to be freed
+ * @return void
+ */
+static void
+drc_rb_free (struct libavl_allocator *a, void *block)
+{
+ mem_put (block);
+}
+
+/**
+ * drc_init_client_cache - initialize a drc client and its rb tree
+ *
+ * @param drc - the main drc structure
+ * @param client - the drc client to be initialized
+ * @return 0 on success, -1 on failure
+ */
+static int
+drc_init_client_cache (rpcsvc_drc_globals_t *drc, drc_client_t *client)
+{
+ GF_ASSERT (drc);
+ GF_ASSERT (client);
+
+ drc->allocator.libavl_malloc = drc_rb_calloc;
+ drc->allocator.libavl_free = drc_rb_free;
+
+ client->rbtree = rb_create (drc_compare_reqs, drc,
+ (struct libavl_allocator *)drc);
+ if (!client->rbtree) {
+ gf_log (GF_RPCSVC, GF_LOG_DEBUG, "rb tree creation failed");
+ return -1;
+ }
+
+ return 0;
+}
+
+/**
+ * rpcsvc_get_drc_client - find the drc client with given sockaddr, else
+ * allocate and initialize a new drc client
+ *
+ * @param drc - the main drc structure
+ * @param sockaddr - network address of client
+ * @return drc client on success, NULL on failure
+ */
+static drc_client_t *
+rpcsvc_get_drc_client (rpcsvc_drc_globals_t *drc,
+ struct sockaddr_storage *sockaddr)
+{
+ drc_client_t *client = NULL;
+
+ GF_ASSERT (drc);
+ GF_ASSERT (sockaddr);
+
+ client = rpcsvc_client_lookup (drc, sockaddr);
+ if (client)
+ goto out;
+
+ /* if lookup fails, allocate cache for the new client */
+ client = GF_CALLOC (1, sizeof (drc_client_t),
+ gf_common_mt_drc_client_t);
+ if (!client)
+ goto out;
+
+ client->ref = 0;
+ client->sock_union = (union gf_sock_union)*sockaddr;
+ client->op_count = 0;
+
+ if (drc_init_client_cache (drc, client)) {
+ gf_log (GF_RPCSVC, GF_LOG_DEBUG,
+ "initialization of drc client failed");
+ GF_FREE (client);
+ client = NULL;
+ goto out;
+ }
+ drc->client_count++;
+
+ list_add (&client->client_list, &drc->clients_head);
+
+ out:
+ return client;
+}
+
+/**
+ * rpcsvc_need_drc - Determine if a request needs DRC service
+ *
+ * @param req - incoming request
+ * @return 1 if DRC is needed for req, 0 otherwise
+ */
+int
+rpcsvc_need_drc (rpcsvc_request_t *req)
+{
+ rpcsvc_actor_t *actor = NULL;
+ rpcsvc_drc_globals_t *drc = NULL;
+
+ GF_ASSERT (req);
+ GF_ASSERT (req->svc);
+
+ drc = req->svc->drc;
+
+ if (!drc || drc->status == DRC_UNINITIATED)
+ return 0;
+
+ actor = rpcsvc_program_actor (req);
+ if (!actor)
+ return 0;
+
+ return (actor->op_type == DRC_NON_IDEMPOTENT
+ && drc->type != DRC_TYPE_NONE);
+}
+
+/**
+ * rpcsvc_drc_client_ref - ref the drc client
+ *
+ * @param client - the drc client to ref
+ * @return client
+ */
+static drc_client_t *
+rpcsvc_drc_client_ref (drc_client_t *client)
+{
+ GF_ASSERT (client);
+ client->ref++;
+ return client;
+}
+
+/**
+ * rpcsvc_drc_client_unref - unref the drc client, and destroy
+ * the client on last unref
+ *
+ * @param drc - the main drc structure
+ * @param client - the drc client to unref
+ * @return NULL if it is the last unref, client otherwise
+ */
+static drc_client_t *
+rpcsvc_drc_client_unref (rpcsvc_drc_globals_t *drc, drc_client_t *client)
+{
+ GF_ASSERT (drc);
+ GF_ASSERT (client->ref);
+
+ client->ref--;
+ if (!client->ref) {
+ drc->client_count--;
+ rpcsvc_remove_drc_client (client);
+ client = NULL;
+ }
+
+ return client;
+}
+
+/**
+ * rpcsvc_drc_lookup - lookup a request to see if it is already cached
+ *
+ * @param req - incoming request
+ * @return cached reply of req if found, NULL otherwise
+ */
+drc_cached_op_t *
+rpcsvc_drc_lookup (rpcsvc_request_t *req)
+{
+ drc_client_t *client = NULL;
+ drc_cached_op_t *reply = NULL;
+
+ GF_ASSERT (req);
+
+ if (!req->trans->drc_client) {
+ client = rpcsvc_get_drc_client (req->svc->drc,
+ &req->trans->peerinfo.sockaddr);
+ if (!client)
+ goto out;
+ req->trans->drc_client = client;
+ }
+
+ client = rpcsvc_drc_client_ref (req->trans->drc_client);
+
+ if (client->op_count == 0)
+ goto out;
+
+ reply = rb_find (client->rbtree, req);
+
+ out:
+ if (client)
+ rpcsvc_drc_client_unref (req->svc->drc, client);
+
+ return reply;
+}
+
+/**
+ * rpcsvc_send_cached_reply - send the cached reply for the incoming request
+ *
+ * @param req - incoming request (which is a duplicate in this case)
+ * @param reply - the cached reply for req
+ * @return 0 on successful reply submission, -1 or other non-zero value otherwise
+ */
+int
+rpcsvc_send_cached_reply (rpcsvc_request_t *req, drc_cached_op_t *reply)
+{
+ int ret = 0;
+
+ GF_ASSERT (req);
+ GF_ASSERT (reply);
+
+ gf_log (GF_RPCSVC, GF_LOG_DEBUG, "sending cached reply: xid: %d, "
+ "client: %s", req->xid, req->trans->peerinfo.identifier);
+
+ rpcsvc_drc_client_ref (reply->client);
+ ret = rpcsvc_transport_submit (req->trans,
+ reply->msg.rpchdr, reply->msg.rpchdrcount,
+ reply->msg.proghdr, reply->msg.proghdrcount,
+ reply->msg.progpayload, reply->msg.progpayloadcount,
+ reply->msg.iobref, req->trans_private);
+ rpcsvc_drc_client_unref (req->svc->drc, reply->client);
+
+ return ret;
+}
+
+/**
+ * rpcsvc_cache_reply - cache the reply for the processed request 'req'
+ *
+ * @param req - processed request
+ * @param iobref - iobref structure of the reply
+ * @param rpchdr - rpc header of the reply
+ * @param rpchdrcount - size of rpchdr
+ * @param proghdr - program header of the reply
+ * @param proghdrcount - size of proghdr
+ * @param payload - payload of the reply if any
+ * @param payloadcount - size of payload
+ * @return 0 on success, -1 on failure
+ */
+int
+rpcsvc_cache_reply (rpcsvc_request_t *req, struct iobref *iobref,
+ struct iovec *rpchdr, int rpchdrcount,
+ struct iovec *proghdr, int proghdrcount,
+ struct iovec *payload, int payloadcount)
+{
+ int ret = -1;
+ drc_cached_op_t *reply = NULL;
+
+ GF_ASSERT (req);
+ GF_ASSERT (req->reply);
+
+ reply = req->reply;
+
+ reply->state = DRC_OP_CACHED;
+
+ reply->msg.iobref = iobref_ref (iobref);
+
+ reply->msg.rpchdrcount = rpchdrcount;
+ reply->msg.rpchdr = iov_dup (rpchdr, rpchdrcount);
+
+ reply->msg.proghdrcount = proghdrcount;
+ reply->msg.proghdr = iov_dup (proghdr, proghdrcount);
+
+ reply->msg.progpayloadcount = payloadcount;
+ if (payloadcount)
+ reply->msg.progpayload = iov_dup (payload, payloadcount);
+
+ // rpcsvc_drc_client_unref (req->svc->drc, req->trans->drc_client);
+ // rpcsvc_drc_op_unref (req->svc->drc, reply);
+ ret = 0;
+
+ return ret;
+}
+
+/**
+ * rpcsvc_vacate_drc_entries - free up some percentage of drc cache
+ * based on the lru factor
+ *
+ * @param drc - the main drc structure
+ * @return void
+ */
+static void
+rpcsvc_vacate_drc_entries (rpcsvc_drc_globals_t *drc)
+{
+ uint32_t i = 0;
+ uint32_t n = 0;
+ drc_cached_op_t *reply = NULL;
+ drc_cached_op_t *tmp = NULL;
+ drc_client_t *client = NULL;
+
+ GF_ASSERT (drc);
+
+ n = drc->global_cache_size / drc->lru_factor;
+
+ list_for_each_entry_safe_reverse (reply, tmp, &drc->cache_head, global_list) {
+ /* Don't delete ops that are in transit */
+ if (reply->state == DRC_OP_IN_TRANSIT)
+ continue;
+
+ client = reply->client;
+
+ (void *)rb_delete (client->rbtree, reply);
+
+ rpcsvc_drc_op_destroy (drc, reply);
+ rpcsvc_drc_client_unref (drc, client);
+ i++;
+ if (i >= n)
+ break;
+ }
+}
+
+/**
+ * rpcsvc_add_op_to_cache - insert the cached op into the client rbtree and drc list
+ *
+ * @param drc - the main drc structure
+ * @param reply - the op to be inserted
+ * @return 0 on success, -1 on failure
+ */
+static int
+rpcsvc_add_op_to_cache (rpcsvc_drc_globals_t *drc, drc_cached_op_t *reply)
+{
+ drc_client_t *client = NULL;
+ drc_cached_op_t **tmp_reply = NULL;
+
+ GF_ASSERT (drc);
+ GF_ASSERT (reply);
+
+ client = reply->client;
+
+ /* cache is full, free up some space */
+ if (drc->op_count >= drc->global_cache_size)
+ rpcsvc_vacate_drc_entries (drc);
+
+ tmp_reply = (drc_cached_op_t **)rb_probe (client->rbtree, reply);
+ if (*tmp_reply != reply) {
+ /* should never happen */
+ gf_log (GF_RPCSVC, GF_LOG_ERROR,
+ "DRC failed to detect duplicates");
+ return -1;
+ } else if (*tmp_reply == NULL) {
+ /* mem alloc failed */
+ return -1;
+ }
+
+ client->op_count++;
+ list_add (&reply->global_list, &drc->cache_head);
+ drc->op_count++;
+
+ return 0;
+}
+
+/**
+ * rpcsvc_cache_request - cache the in-transition incoming request
+ *
+ * @param req - incoming request
+ * @return 0 on success, -1 on failure
+ */
+int
+rpcsvc_cache_request (rpcsvc_request_t *req)
+{
+ int ret = -1;
+ drc_client_t *client = NULL;
+ drc_cached_op_t *reply = NULL;
+ rpcsvc_drc_globals_t *drc = NULL;
+
+ GF_ASSERT (req);
+
+ drc = req->svc->drc;
+
+ client = req->trans->drc_client;
+ if (!client) {
+ gf_log (GF_RPCSVC, GF_LOG_DEBUG, "drc client is NULL");
+ goto out;
+ }
+
+ reply = mem_get (drc->mempool);
+ if (!reply)
+ goto out;
+
+ reply->client = rpcsvc_drc_client_ref (client);
+ reply->xid = req->xid;
+ reply->prognum = req->prognum;
+ reply->progversion = req->progver;
+ reply->procnum = req->procnum;
+ reply->state = DRC_OP_IN_TRANSIT;
+ req->reply = reply;
+
+ ret = rpcsvc_add_op_to_cache (drc, reply);
+ if (ret) {
+ req->reply = NULL;
+ rpcsvc_drc_op_destroy (drc, reply);
+ rpcsvc_drc_client_unref (drc, client);
+ gf_log (GF_RPCSVC, GF_LOG_DEBUG, "Failed to add op to drc cache");
+ }
+
+ out:
+ return ret;
+}
+
+/**
+ *
+ * rpcsvc_drc_priv - function which dumps the drc state
+ *
+ * @param drc - the main drc structure
+ * @return 0 on success, -1 on failure
+ */
+int32_t
+rpcsvc_drc_priv (rpcsvc_drc_globals_t *drc)
+{
+ int i = 0;
+ char key[GF_DUMP_MAX_BUF_LEN] = {0};
+ drc_client_t *client = NULL;
+ char ip[INET6_ADDRSTRLEN] = {0};
+
+ if (!drc || drc->status == DRC_UNINITIATED) {
+ gf_log (GF_RPCSVC, GF_LOG_DEBUG, "DRC is "
+ "uninitialized, not dumping its state");
+ return 0;
+ }
+
+ gf_proc_dump_add_section("rpc.drc");
+
+ if (TRY_LOCK (&drc->lock))
+ return -1;
+
+ gf_proc_dump_build_key (key, "drc", "type");
+ gf_proc_dump_write (key, "%d", drc->type);
+
+ gf_proc_dump_build_key (key, "drc", "client_count");
+ gf_proc_dump_write (key, "%d", drc->client_count);
+
+ gf_proc_dump_build_key (key, "drc", "current_cache_size");
+ gf_proc_dump_write (key, "%d", drc->op_count);
+
+ gf_proc_dump_build_key (key, "drc", "max_cache_size");
+ gf_proc_dump_write (key, "%d", drc->global_cache_size);
+
+ gf_proc_dump_build_key (key, "drc", "lru_factor");
+ gf_proc_dump_write (key, "%d", drc->lru_factor);
+
+ gf_proc_dump_build_key (key, "drc", "duplicate_request_count");
+ gf_proc_dump_write (key, "%d", drc->cache_hits);
+
+ gf_proc_dump_build_key (key, "drc", "in_transit_duplicate_requests");
+ gf_proc_dump_write (key, "%d", drc->intransit_hits);
+
+ list_for_each_entry (client, &drc->clients_head, client_list) {
+ gf_proc_dump_build_key (key, "client", "%d.ip-address", i);
+ memset (ip, 0, INET6_ADDRSTRLEN);
+ switch (client->sock_union.storage.ss_family) {
+ case AF_INET:
+ gf_proc_dump_write (key, "%s", inet_ntop (AF_INET,
+ &client->sock_union.sin.sin_addr.s_addr,
+ ip, INET_ADDRSTRLEN));
+ break;
+ case AF_INET6:
+ gf_proc_dump_write (key, "%s", inet_ntop (AF_INET6,
+ &client->sock_union.sin6.sin6_addr,
+ ip, INET6_ADDRSTRLEN));
+ break;
+ default:
+ gf_proc_dump_write (key, "%s", "N/A");
+ }
+
+ gf_proc_dump_build_key (key, "client", "%d.ref_count", i);
+ gf_proc_dump_write (key, "%d", client->ref);
+ gf_proc_dump_build_key (key, "client", "%d.op_count", i);
+ gf_proc_dump_write (key, "%d", client->op_count);
+ i++;
+ }
+
+ UNLOCK (&drc->lock);
+ return 0;
+}
+
+/**
+ * rpcsvc_drc_notify - function which is notified of RPC transport events
+ *
+ * @param svc - pointer to rpcsvc_t structure of the rpc
+ * @param xl - pointer to the xlator
+ * @param event - the event which triggered this notify
+ * @param data - the transport structure
+ * @return 0 on success, -1 on failure
+ */
+int
+rpcsvc_drc_notify (rpcsvc_t *svc, void *xl,
+ rpcsvc_event_t event, void *data)
+{
+ int ret = -1;
+ rpc_transport_t *trans = NULL;
+ drc_client_t *client = NULL;
+ rpcsvc_drc_globals_t *drc = NULL;
+
+ GF_ASSERT (svc);
+ GF_ASSERT (svc->drc);
+ GF_ASSERT (data);
+
+ drc = svc->drc;
+
+ if (drc->status == DRC_UNINITIATED ||
+ drc->type == DRC_TYPE_NONE)
+ return 0;
+
+ LOCK (&drc->lock);
+
+ trans = (rpc_transport_t *)data;
+ client = rpcsvc_get_drc_client (drc, &trans->peerinfo.sockaddr);
+ if (!client)
+ goto out;
+
+ switch (event) {
+ case RPCSVC_EVENT_ACCEPT:
+ trans->drc_client = rpcsvc_drc_client_ref (client);
+ ret = 0;
+ break;
+
+ case RPCSVC_EVENT_DISCONNECT:
+ ret = 0;
+ if (list_empty (&drc->clients_head))
+ break;
+ /* should be the last unref */
+ rpcsvc_drc_client_unref (drc, client);
+ trans->drc_client = NULL;
+ break;
+
+ default:
+ break;
+ }
+
+ out:
+ UNLOCK (&drc->lock);
+ return ret;
+}
+
+/**
+ * rpcsvc_drc_init - Initialize the duplicate request cache service
+ *
+ * @param svc - pointer to rpcsvc_t structure of the rpc
+ * @param options - the options dictionary which configures drc
+ * @return 0 on success, non-zero integer on failure
+ */
+int
+rpcsvc_drc_init (rpcsvc_t *svc, dict_t *options)
+{
+ int ret = 0;
+ uint32_t drc_type = 0;
+ uint32_t drc_size = 0;
+ uint32_t drc_factor = 0;
+ rpcsvc_drc_globals_t *drc = NULL;
+ static gf_boolean_t drc_inited = _gf_false;
+
+ GF_ASSERT (svc);
+ GF_ASSERT (options);
+
+ /* Already inited */
+ if (drc_inited)
+ return 0;
+
+ if (!svc->drc) {
+ drc = GF_CALLOC (1, sizeof (rpcsvc_drc_globals_t),
+ gf_common_mt_drc_globals_t);
+ if (!drc)
+ return -1;
+
+ svc->drc = drc;
+ LOCK_INIT (&drc->lock);
+ } else {
+ drc = svc->drc;
+ }
+
+ LOCK (&drc->lock);
+ if (drc->type != DRC_TYPE_NONE) {
+ ret = 0;
+ goto out;
+ }
+
+ /* Toggle DRC on/off, when more drc types(persistent/cluster)
+ are added, we shouldn't treat this as boolean */
+ ret = dict_get_str_boolean (options, "nfs.drc", _gf_true);
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_INFO, "drc user options need second look");
+ ret = _gf_true;
+ }
+ drc->enable_drc = ret;
+
+ if (ret == _gf_false) {
+ /* drc off */
+ gf_log (GF_RPCSVC, GF_LOG_DEBUG, "DRC is off");
+ ret = 0;
+ goto out;
+ }
+
+ /* Specify type of DRC to be used */
+ ret = dict_get_uint32 (options, "nfs.drc-type", &drc_type);
+ if (ret) {
+ gf_log (GF_RPCSVC, GF_LOG_DEBUG, "drc type not set."
+ " Continuing with default");
+ drc_type = DRC_DEFAULT_TYPE;
+ }
+
+ drc->type = drc_type;
+
+ /* Set the global cache size (no. of ops to cache) */
+ ret = dict_get_uint32 (options, "nfs.drc-size", &drc_size);
+ if (ret) {
+ gf_log (GF_RPCSVC, GF_LOG_DEBUG, "drc size not set."
+ " Continuing with default size");
+ drc_size = DRC_DEFAULT_CACHE_SIZE;
+ }
+
+ drc->global_cache_size = drc_size;
+
+ /* Mempool for cached ops */
+ drc->mempool = mem_pool_new (drc_cached_op_t, drc->global_cache_size);
+ if (!drc->mempool) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to get mempool for"
+ " DRC, drc-size: %d", drc->global_cache_size);
+ ret = -1;
+ goto out;
+ }
+
+ /* What percent of cache to be evicted whenever it fills up */
+ ret = dict_get_uint32 (options, "nfs.drc-lru-factor", &drc_factor);
+ if (ret) {
+ gf_log (GF_RPCSVC, GF_LOG_DEBUG, "drc lru factor not set."
+ " Continuing with policy default");
+ drc_factor = DRC_DEFAULT_LRU_FACTOR;
+ }
+
+ drc->lru_factor = (drc_lru_factor_t) drc_factor;
+
+ INIT_LIST_HEAD (&drc->clients_head);
+ INIT_LIST_HEAD (&drc->cache_head);
+
+ ret = rpcsvc_register_notify (svc, rpcsvc_drc_notify, THIS);
+ if (ret) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR,
+ "registration of drc_notify function failed");
+ goto out;
+ }
+
+ gf_log (GF_RPCSVC, GF_LOG_DEBUG, "drc init successful");
+ drc->status = DRC_INITIATED;
+ drc_inited = _gf_true;
+
+ out:
+ UNLOCK (&drc->lock);
+ if (ret == -1) {
+ if (drc->mempool) {
+ mem_pool_destroy (drc->mempool);
+ drc->mempool = NULL;
+ }
+ GF_FREE (drc);
+ svc->drc = NULL;
+ }
+ return ret;
+}
+
+int
+rpcsvc_drc_reconfigure (rpcsvc_t *svc, dict_t *options)
+{
+ int ret = -1;
+ gf_boolean_t enable_drc = _gf_false;
+ rpcsvc_drc_globals_t *drc = NULL;
+ uint32_t drc_size = 0;
+
+ if ((!svc) || (!options))
+ return (-1);
+
+ drc = svc->drc;
+ /* reconfig for drc-size */
+ if (dict_get_uint32 (options, "nfs.drc-size", &drc_size))
+ drc_size = DRC_DEFAULT_CACHE_SIZE;
+
+ if (drc->global_cache_size != drc_size) {
+ gf_log (GF_RPCSVC, GF_LOG_DEBUG, "nfs.drc-size size can not "
+ "be reconfigured without NFS server restart.");
+ return (-1);
+ }
+
+ /* reconfig for nfs.drc */
+ ret = dict_get_str_boolean (options, "nfs.drc", _gf_true);
+ if (ret < 0) {
+ ret = _gf_true;
+ }
+ enable_drc = ret;
+
+ if (drc->enable_drc == enable_drc)
+ return 0;
+
+ drc->enable_drc = enable_drc;
+ if (enable_drc) {
+ if (drc == NULL)
+ return rpcsvc_drc_init(svc, options);
+ } else {
+ if (drc == NULL)
+ return (0);
+
+ LOCK (&drc->lock);
+ (void) rpcsvc_unregister_notify (svc, rpcsvc_drc_notify, THIS);
+ if (drc->mempool) {
+ mem_pool_destroy (drc->mempool);
+ drc->mempool = NULL;
+ }
+ UNLOCK (&drc->lock);
+ GF_FREE (drc);
+ svc->drc = NULL;
+ }
+
+ return (0);
+}
diff --git a/rpc/rpc-lib/src/rpc-drc.h b/rpc/rpc-lib/src/rpc-drc.h
new file mode 100644
index 000000000..7dfaef978
--- /dev/null
+++ b/rpc/rpc-lib/src/rpc-drc.h
@@ -0,0 +1,104 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef RPC_DRC_H
+#define RPC_DRC_H
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "rpcsvc-common.h"
+#include "rpcsvc.h"
+#include "locking.h"
+#include "dict.h"
+#include "rb.h"
+
+/* per-client cache structure */
+struct drc_client {
+ uint32_t ref;
+ union gf_sock_union sock_union;
+ /* pointers to the cache */
+ struct rb_table *rbtree;
+ /* no. of ops currently cached */
+ uint32_t op_count;
+ struct list_head client_list;
+};
+
+struct drc_cached_op {
+ drc_op_state_t state;
+ uint32_t xid;
+ int prognum;
+ int progversion;
+ int procnum;
+ rpc_transport_msg_t msg;
+ drc_client_t *client;
+ struct list_head client_list;
+ struct list_head global_list;
+ int32_t ref;
+};
+
+/* global drc definitions */
+enum drc_status {
+ DRC_UNINITIATED,
+ DRC_INITIATED
+};
+typedef enum drc_status drc_status_t;
+
+struct drc_globals {
+ /* allocator must be the first member since
+ * it is used so in gf_libavl_allocator
+ */
+ struct libavl_allocator allocator;
+ drc_type_t type;
+ /* configurable size parameter */
+ uint32_t global_cache_size;
+ drc_lru_factor_t lru_factor;
+ gf_lock_t lock;
+ drc_status_t status;
+ uint32_t op_count;
+ uint64_t cache_hits;
+ uint64_t intransit_hits;
+ struct mem_pool *mempool;
+ struct list_head cache_head;
+ uint32_t client_count;
+ struct list_head clients_head;
+ gf_boolean_t enable_drc;
+};
+
+int
+rpcsvc_need_drc (rpcsvc_request_t *req);
+
+drc_cached_op_t *
+rpcsvc_drc_lookup (rpcsvc_request_t *req);
+
+int
+rpcsvc_send_cached_reply (rpcsvc_request_t *req, drc_cached_op_t *reply);
+
+int
+rpcsvc_cache_reply (rpcsvc_request_t *req, struct iobref *iobref,
+ struct iovec *rpchdr, int rpchdrcount,
+ struct iovec *proghdr, int proghdrcount,
+ struct iovec *payload, int payloadcount);
+
+int
+rpcsvc_cache_request (rpcsvc_request_t *req);
+
+int32_t
+rpcsvc_drc_priv (rpcsvc_drc_globals_t *drc);
+
+int
+rpcsvc_drc_init (rpcsvc_t *svc, dict_t *options);
+
+int
+rpcsvc_drc_reconfigure (rpcsvc_t *svc, dict_t *options);
+
+#endif /* RPC_DRC_H */
diff --git a/rpc/rpc-lib/src/rpc-transport.c b/rpc/rpc-lib/src/rpc-transport.c
new file mode 100644
index 000000000..c24d41084
--- /dev/null
+++ b/rpc/rpc-lib/src/rpc-transport.c
@@ -0,0 +1,679 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <dlfcn.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/poll.h>
+#include <fnmatch.h>
+#include <stdint.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "logging.h"
+#include "rpc-transport.h"
+#include "glusterfs.h"
+/* FIXME: xlator.h is needed for volume_option_t, need to define the datatype
+ * in some other header
+ */
+#include "xlator.h"
+#include "list.h"
+
+#ifndef GF_OPTION_LIST_EMPTY
+#define GF_OPTION_LIST_EMPTY(_opt) (_opt->value[0] == NULL)
+#endif
+
+
+int
+rpc_transport_get_myaddr (rpc_transport_t *this, char *peeraddr, int addrlen,
+ struct sockaddr_storage *sa, size_t salen)
+{
+ int32_t ret = -1;
+ GF_VALIDATE_OR_GOTO ("rpc", this, out);
+
+ ret = this->ops->get_myaddr (this, peeraddr, addrlen, sa, salen);
+
+out:
+ return ret;
+}
+
+int32_t
+rpc_transport_get_myname (rpc_transport_t *this, char *hostname, int hostlen)
+{
+ int32_t ret = -1;
+ GF_VALIDATE_OR_GOTO ("rpc", this, out);
+
+ ret = this->ops->get_myname (this, hostname, hostlen);
+out:
+ return ret;
+}
+
+int32_t
+rpc_transport_get_peername (rpc_transport_t *this, char *hostname, int hostlen)
+{
+ int32_t ret = -1;
+ GF_VALIDATE_OR_GOTO ("rpc", this, out);
+
+ ret = this->ops->get_peername (this, hostname, hostlen);
+out:
+ return ret;
+}
+
+int
+rpc_transport_throttle (rpc_transport_t *this, gf_boolean_t onoff)
+{
+ int ret = 0;
+
+ if (!this->ops->throttle)
+ return -ENOSYS;
+
+ ret = this->ops->throttle (this, onoff);
+
+ return ret;
+}
+
+int32_t
+rpc_transport_get_peeraddr (rpc_transport_t *this, char *peeraddr, int addrlen,
+ struct sockaddr_storage *sa, size_t salen)
+{
+ int32_t ret = -1;
+ GF_VALIDATE_OR_GOTO ("rpc", this, out);
+
+ ret = this->ops->get_peeraddr (this, peeraddr, addrlen, sa, salen);
+out:
+ return ret;
+}
+
+void
+rpc_transport_pollin_destroy (rpc_transport_pollin_t *pollin)
+{
+ GF_VALIDATE_OR_GOTO ("rpc", pollin, out);
+
+ if (pollin->iobref) {
+ iobref_unref (pollin->iobref);
+ }
+
+ if (pollin->hdr_iobuf) {
+ iobuf_unref (pollin->hdr_iobuf);
+ }
+
+ if (pollin->private) {
+ /* */
+ GF_FREE (pollin->private);
+ }
+
+ GF_FREE (pollin);
+out:
+ return;
+}
+
+
+rpc_transport_pollin_t *
+rpc_transport_pollin_alloc (rpc_transport_t *this, struct iovec *vector,
+ int count, struct iobuf *hdr_iobuf,
+ struct iobref *iobref, void *private)
+{
+ rpc_transport_pollin_t *msg = NULL;
+ msg = GF_CALLOC (1, sizeof (*msg), gf_common_mt_rpc_trans_pollin_t);
+ if (!msg) {
+ goto out;
+ }
+
+ if (count > 1) {
+ msg->vectored = 1;
+ }
+
+ memcpy (msg->vector, vector, count * sizeof (*vector));
+ msg->count = count;
+ msg->iobref = iobref_ref (iobref);
+ msg->private = private;
+ if (hdr_iobuf)
+ msg->hdr_iobuf = iobuf_ref (hdr_iobuf);
+
+out:
+ return msg;
+}
+
+
+
+rpc_transport_t *
+rpc_transport_load (glusterfs_ctx_t *ctx, dict_t *options, char *trans_name)
+{
+ struct rpc_transport *trans = NULL, *return_trans = NULL;
+ char *name = NULL;
+ void *handle = NULL;
+ char *type = NULL;
+ char str[] = "ERROR";
+ int32_t ret = -1;
+ int8_t is_tcp = 0, is_unix = 0, is_ibsdp = 0;
+ volume_opt_list_t *vol_opt = NULL;
+ gf_boolean_t bind_insecure = _gf_false;
+ xlator_t *this = NULL;
+
+ GF_VALIDATE_OR_GOTO("rpc-transport", options, fail);
+ GF_VALIDATE_OR_GOTO("rpc-transport", ctx, fail);
+ GF_VALIDATE_OR_GOTO("rpc-transport", trans_name, fail);
+
+ trans = GF_CALLOC (1, sizeof (struct rpc_transport), gf_common_mt_rpc_trans_t);
+ if (!trans)
+ goto fail;
+
+ trans->name = gf_strdup (trans_name);
+ if (!trans->name)
+ goto fail;
+
+ trans->ctx = ctx;
+ type = str;
+
+ /* Backward compatibility */
+ ret = dict_get_str (options, "transport-type", &type);
+ if (ret < 0) {
+ ret = dict_set_str (options, "transport-type", "socket");
+ if (ret < 0)
+ gf_log ("dict", GF_LOG_DEBUG,
+ "setting transport-type failed");
+ else
+ gf_log ("rpc-transport", GF_LOG_DEBUG,
+ "missing 'option transport-type'. defaulting to "
+ "\"socket\"");
+ } else {
+ {
+ /* Backword compatibility to handle * /client,
+ * * /server.
+ */
+ char *tmp = strchr (type, '/');
+ if (tmp)
+ *tmp = '\0';
+ }
+
+ is_tcp = strcmp (type, "tcp");
+ is_unix = strcmp (type, "unix");
+ is_ibsdp = strcmp (type, "ib-sdp");
+ if ((is_tcp == 0) ||
+ (is_unix == 0) ||
+ (is_ibsdp == 0)) {
+ if (is_unix == 0)
+ ret = dict_set_str (options,
+ "transport.address-family",
+ "unix");
+ if (is_ibsdp == 0)
+ ret = dict_set_str (options,
+ "transport.address-family",
+ "inet-sdp");
+
+ if (ret < 0)
+ gf_log ("dict", GF_LOG_DEBUG,
+ "setting address-family failed");
+
+ ret = dict_set_str (options,
+ "transport-type", "socket");
+ if (ret < 0)
+ gf_log ("dict", GF_LOG_DEBUG,
+ "setting transport-type failed");
+ }
+ }
+
+ /* client-bind-insecure is for clients protocol, and
+ * bind-insecure for glusterd. Both mutually exclusive
+ */
+ ret = dict_get_str (options, "client-bind-insecure", &type);
+ if (ret)
+ ret = dict_get_str (options, "bind-insecure", &type);
+ if (ret == 0) {
+ ret = gf_string2boolean (type, &bind_insecure);
+ if (ret < 0) {
+ gf_log ("rcp-transport", GF_LOG_WARNING,
+ "bind-insecure option %s is not a"
+ " valid bool option", type);
+ goto fail;
+ }
+ if (_gf_true == bind_insecure)
+ trans->bind_insecure = 1;
+ else
+ trans->bind_insecure = 0;
+ } else {
+ trans->bind_insecure = 0;
+ }
+
+ ret = dict_get_str (options, "transport-type", &type);
+ if (ret < 0) {
+ gf_log ("rpc-transport", GF_LOG_ERROR,
+ "'option transport-type <xx>' missing in volume '%s'",
+ trans_name);
+ goto fail;
+ }
+
+ ret = gf_asprintf (&name, "%s/%s.so", RPC_TRANSPORTDIR, type);
+ if (-1 == ret) {
+ goto fail;
+ }
+
+ gf_log ("rpc-transport", GF_LOG_DEBUG,
+ "attempt to load file %s", name);
+
+ handle = dlopen (name, RTLD_NOW|RTLD_GLOBAL);
+ if (handle == NULL) {
+ gf_log ("rpc-transport", GF_LOG_ERROR, "%s", dlerror ());
+ gf_log ("rpc-transport", GF_LOG_WARNING,
+ "volume '%s': transport-type '%s' is not valid or "
+ "not found on this machine",
+ trans_name, type);
+ goto fail;
+ }
+
+ trans->dl_handle = handle;
+
+ trans->ops = dlsym (handle, "tops");
+ if (trans->ops == NULL) {
+ gf_log ("rpc-transport", GF_LOG_ERROR,
+ "dlsym (rpc_transport_ops) on %s", dlerror ());
+ goto fail;
+ }
+
+ *VOID(&(trans->init)) = dlsym (handle, "init");
+ if (trans->init == NULL) {
+ gf_log ("rpc-transport", GF_LOG_ERROR,
+ "dlsym (gf_rpc_transport_init) on %s", dlerror ());
+ goto fail;
+ }
+
+ *VOID(&(trans->fini)) = dlsym (handle, "fini");
+ if (trans->fini == NULL) {
+ gf_log ("rpc-transport", GF_LOG_ERROR,
+ "dlsym (gf_rpc_transport_fini) on %s", dlerror ());
+ goto fail;
+ }
+
+ *VOID(&(trans->reconfigure)) = dlsym (handle, "reconfigure");
+ if (trans->reconfigure == NULL) {
+ gf_log ("rpc-transport", GF_LOG_DEBUG,
+ "dlsym (gf_rpc_transport_reconfigure) on %s", dlerror());
+ }
+
+ vol_opt = GF_CALLOC (1, sizeof (volume_opt_list_t),
+ gf_common_mt_volume_opt_list_t);
+ if (!vol_opt) {
+ goto fail;
+ }
+
+ this = THIS;
+ vol_opt->given_opt = dlsym (handle, "options");
+ if (vol_opt->given_opt == NULL) {
+ gf_log ("rpc-transport", GF_LOG_DEBUG,
+ "volume option validation not specified");
+ } else {
+ INIT_LIST_HEAD (&vol_opt->list);
+ list_add_tail (&vol_opt->list, &(this->volume_options));
+ if (xlator_options_validate_list (this, options, vol_opt,
+ NULL)) {
+ gf_log ("rpc-transport", GF_LOG_ERROR,
+ "volume option validation failed");
+ goto fail;
+ }
+ }
+
+ trans->options = options;
+
+ pthread_mutex_init (&trans->lock, NULL);
+ trans->xl = this;
+
+ ret = trans->init (trans);
+ if (ret != 0) {
+ gf_log ("rpc-transport", GF_LOG_ERROR,
+ "'%s' initialization failed", type);
+ goto fail;
+ }
+
+ return_trans = trans;
+
+ GF_FREE (name);
+
+ return return_trans;
+
+fail:
+ if (trans) {
+ GF_FREE (trans->name);
+
+ if (trans->dl_handle)
+ dlclose (trans->dl_handle);
+
+ GF_FREE (trans);
+ }
+
+ GF_FREE (name);
+
+ if (vol_opt && !list_empty (&vol_opt->list)) {
+ list_del_init (&vol_opt->list);
+ GF_FREE (vol_opt);
+ }
+
+ return NULL;
+}
+
+
+int32_t
+rpc_transport_submit_request (rpc_transport_t *this, rpc_transport_req_t *req)
+{
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO("rpc_transport", this, fail);
+ GF_VALIDATE_OR_GOTO("rpc_transport", this->ops, fail);
+
+ ret = this->ops->submit_request (this, req);
+fail:
+ return ret;
+}
+
+
+int32_t
+rpc_transport_submit_reply (rpc_transport_t *this, rpc_transport_reply_t *reply)
+{
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO("rpc_transport", this, fail);
+ GF_VALIDATE_OR_GOTO("rpc_transport", this->ops, fail);
+
+ ret = this->ops->submit_reply (this, reply);
+fail:
+ return ret;
+}
+
+
+int32_t
+rpc_transport_connect (rpc_transport_t *this, int port)
+{
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO("rpc_transport", this, fail);
+
+ ret = this->ops->connect (this, port);
+fail:
+ return ret;
+}
+
+
+int32_t
+rpc_transport_listen (rpc_transport_t *this)
+{
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO("rpc_transport", this, fail);
+
+ ret = this->ops->listen (this);
+fail:
+ return ret;
+}
+
+
+int32_t
+rpc_transport_disconnect (rpc_transport_t *this)
+{
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO("rpc_transport", this, fail);
+
+ ret = this->ops->disconnect (this);
+fail:
+ return ret;
+}
+
+
+int32_t
+rpc_transport_destroy (rpc_transport_t *this)
+{
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO("rpc_transport", this, fail);
+
+ if (this->options)
+ dict_unref (this->options);
+ if (this->fini)
+ this->fini (this);
+
+ pthread_mutex_destroy (&this->lock);
+
+ GF_FREE (this->name);
+
+ if (this->dl_handle)
+ dlclose (this->dl_handle);
+
+ GF_FREE (this);
+fail:
+ return ret;
+}
+
+
+rpc_transport_t *
+rpc_transport_ref (rpc_transport_t *this)
+{
+ rpc_transport_t *return_this = NULL;
+
+ GF_VALIDATE_OR_GOTO("rpc_transport", this, fail);
+
+ pthread_mutex_lock (&this->lock);
+ {
+ this->refcount ++;
+ }
+ pthread_mutex_unlock (&this->lock);
+
+ return_this = this;
+fail:
+ return return_this;
+}
+
+
+int32_t
+rpc_transport_unref (rpc_transport_t *this)
+{
+ int32_t refcount = 0;
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO("rpc_transport", this, fail);
+
+ pthread_mutex_lock (&this->lock);
+ {
+ refcount = --this->refcount;
+ }
+ pthread_mutex_unlock (&this->lock);
+
+ if (refcount == 0) {
+ if (this->mydata)
+ this->notify (this, this->mydata, RPC_TRANSPORT_CLEANUP,
+ NULL);
+ this->mydata = NULL;
+ this->notify = NULL;
+ rpc_transport_destroy (this);
+ }
+
+ ret = 0;
+fail:
+ return ret;
+}
+
+
+int32_t
+rpc_transport_notify (rpc_transport_t *this, rpc_transport_event_t event,
+ void *data, ...)
+{
+ int32_t ret = -1;
+ GF_VALIDATE_OR_GOTO ("rpc", this, out);
+
+ if (this->notify != NULL) {
+ ret = this->notify (this, this->mydata, event, data);
+ } else {
+ ret = 0;
+ }
+out:
+ return ret;
+}
+
+
+
+inline int
+rpc_transport_register_notify (rpc_transport_t *trans,
+ rpc_transport_notify_t notify, void *mydata)
+{
+ int32_t ret = -1;
+ GF_VALIDATE_OR_GOTO ("rpc", trans, out);
+
+ trans->notify = notify;
+ trans->mydata = mydata;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+
+//give negative values to skip setting that value
+//this function asserts if both the values are negative.
+//why call it if you dont set it.
+int
+rpc_transport_keepalive_options_set (dict_t *options, int32_t interval,
+ int32_t time)
+{
+ int ret = -1;
+
+ GF_ASSERT (options);
+ GF_ASSERT ((interval > 0) || (time > 0));
+
+ ret = dict_set_int32 (options,
+ "transport.socket.keepalive-interval", interval);
+ if (ret)
+ goto out;
+
+ ret = dict_set_int32 (options,
+ "transport.socket.keepalive-time", time);
+ if (ret)
+ goto out;
+out:
+ return ret;
+}
+
+int
+rpc_transport_unix_options_build (dict_t **options, char *filepath,
+ int frame_timeout)
+{
+ dict_t *dict = NULL;
+ char *fpath = NULL;
+ int ret = -1;
+
+ GF_ASSERT (filepath);
+ GF_ASSERT (options);
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ fpath = gf_strdup (filepath);
+ if (!fpath) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr (dict, "transport.socket.connect-path", fpath);
+ if (ret)
+ goto out;
+
+ ret = dict_set_str (dict, "transport.address-family", "unix");
+ if (ret)
+ goto out;
+
+ ret = dict_set_str (dict, "transport.socket.nodelay", "off");
+ if (ret)
+ goto out;
+
+ ret = dict_set_str (dict, "transport-type", "socket");
+ if (ret)
+ goto out;
+
+ ret = dict_set_str (dict, "transport.socket.keepalive", "off");
+ if (ret)
+ goto out;
+
+ if (frame_timeout > 0) {
+ ret = dict_set_int32 (dict, "frame-timeout", frame_timeout);
+ if (ret)
+ goto out;
+ }
+
+ *options = dict;
+out:
+ if (ret) {
+ GF_FREE (fpath);
+ if (dict)
+ dict_unref (dict);
+ }
+ return ret;
+}
+
+int
+rpc_transport_inet_options_build (dict_t **options, const char *hostname,
+ int port)
+{
+ dict_t *dict = NULL;
+ char *host = NULL;
+ int ret = -1;
+
+ GF_ASSERT (options);
+ GF_ASSERT (hostname);
+ GF_ASSERT (port >= 1024);
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ host = gf_strdup ((char*)hostname);
+ if (!hostname) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr (dict, "remote-host", host);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set remote-host with %s", host);
+ goto out;
+ }
+
+ ret = dict_set_int32 (dict, "remote-port", port);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set remote-port with %d", port);
+ goto out;
+ }
+ ret = dict_set_str (dict, "transport.address-family", "inet");
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set addr-family with inet");
+ goto out;
+ }
+
+ ret = dict_set_str (dict, "transport-type", "socket");
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set trans-type with socket");
+ goto out;
+ }
+
+ *options = dict;
+out:
+ if (ret) {
+ GF_FREE (host);
+ if (dict)
+ dict_unref (dict);
+ }
+
+ return ret;
+}
diff --git a/rpc/rpc-lib/src/rpc-transport.h b/rpc/rpc-lib/src/rpc-transport.h
new file mode 100644
index 000000000..2db9072ae
--- /dev/null
+++ b/rpc/rpc-lib/src/rpc-transport.h
@@ -0,0 +1,314 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __RPC_TRANSPORT_H__
+#define __RPC_TRANSPORT_H__
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+
+#include <inttypes.h>
+#ifdef GF_SOLARIS_HOST_OS
+#include <rpc/auth.h>
+#else
+#include <rpc/rpc.h>
+#endif
+
+#include <rpc/rpc_msg.h>
+
+
+#ifndef MAX_IOVEC
+#define MAX_IOVEC 16
+#endif
+
+#ifndef AI_ADDRCONFIG
+#define AI_ADDRCONFIG 0
+#endif /* AI_ADDRCONFIG */
+
+/* Given the 4-byte fragment header, returns non-zero if this fragment
+ * is the last fragment for the RPC record being assembled.
+ * RPC Record marking standard defines a 32 bit value as the fragment
+ * header with the MSB signifying whether the fragment is the last
+ * fragment for the record being assembled.
+ */
+#define RPC_LASTFRAG(fraghdr) ((uint32_t)(fraghdr & 0x80000000U))
+
+/* Given the 4-byte fragment header, extracts the bits that contain
+ * the fragment size.
+ */
+#define RPC_FRAGSIZE(fraghdr) ((uint32_t)(fraghdr & 0x7fffffffU))
+
+#define RPC_FRAGHDR_SIZE 4
+#define RPC_MSGTYPE_SIZE 8
+
+/* size of the msg from the start of call-body till and including credlen */
+#define RPC_CALL_BODY_SIZE 24
+
+#define RPC_REPLY_STATUS_SIZE 4
+
+#define RPC_AUTH_FLAVOUR_N_LENGTH_SIZE 8
+
+#define RPC_ACCEPT_STATUS_LEN 4
+
+struct rpc_transport_ops;
+typedef struct rpc_transport rpc_transport_t;
+
+#include "dict.h"
+#include "compat.h"
+#include "rpcsvc-common.h"
+
+struct peer_info {
+ struct sockaddr_storage sockaddr;
+ socklen_t sockaddr_len;
+ char identifier[UNIX_PATH_MAX];
+ // OP-VERSION of clients
+ uint32_t max_op_version;
+ uint32_t min_op_version;
+ //Volume mounted by client
+ char volname[1024];
+};
+typedef struct peer_info peer_info_t;
+
+typedef enum msg_type msg_type_t;
+
+typedef enum {
+ RPC_TRANSPORT_ACCEPT, /* New client has been accepted */
+ RPC_TRANSPORT_DISCONNECT, /* Connection is disconnected */
+ RPC_TRANSPORT_CLEANUP, /* connection is about to be freed */
+ /*RPC_TRANSPORT_READ,*/ /* An event used to enable rpcsvc to instruct
+ * transport the number of bytes to read.
+ * This helps in reading large msgs, wherein
+ * the rpc actors might decide to place the
+ * actor's payload in new iobufs separate
+ * from the rpc header, proghdr and
+ * authentication information. glusterfs/nfs
+ * read and write actors are few examples
+ * that might beniefit from this. While
+ * reading a single msg, this event may be
+ * delivered more than once.
+ */
+ RPC_TRANSPORT_MAP_XID_REQUEST, /* receiver of this event should send
+ * the prognum and procnum corresponding
+ * to xid.
+ */
+ RPC_TRANSPORT_MSG_RECEIVED, /* Complete rpc msg has been read */
+ RPC_TRANSPORT_CONNECT, /* client is connected to server */
+ RPC_TRANSPORT_MSG_SENT,
+} rpc_transport_event_t;
+
+struct rpc_transport_msg {
+ struct iovec *rpchdr;
+ int rpchdrcount;
+ struct iovec *proghdr;
+ int proghdrcount;
+ struct iovec *progpayload;
+ int progpayloadcount;
+ struct iobref *iobref;
+};
+typedef struct rpc_transport_msg rpc_transport_msg_t;
+
+struct rpc_transport_rsp {
+ struct iovec *rsphdr;
+ int rsphdr_count;
+ struct iovec *rsp_payload;
+ int rsp_payload_count;
+ struct iobref *rsp_iobref;
+};
+typedef struct rpc_transport_rsp rpc_transport_rsp_t;
+
+struct rpc_transport_req {
+ rpc_transport_msg_t msg;
+ rpc_transport_rsp_t rsp;
+ struct rpc_req *rpc_req;
+};
+typedef struct rpc_transport_req rpc_transport_req_t;
+
+struct rpc_transport_reply {
+ rpc_transport_msg_t msg;
+ void *private;
+};
+typedef struct rpc_transport_reply rpc_transport_reply_t;
+
+struct rpc_transport_data {
+ char is_request;
+ union {
+ rpc_transport_req_t req;
+ rpc_transport_reply_t reply;
+ } data;
+};
+typedef struct rpc_transport_data rpc_transport_data_t;
+
+/* FIXME: prognum, procnum and progver are already present in
+ * rpc_request, hence these should be removed from request_info
+ */
+struct rpc_request_info {
+ uint32_t xid;
+ int prognum;
+ int progver;
+ int procnum;
+ void *rpc_req; /* struct rpc_req */
+ rpc_transport_rsp_t rsp;
+};
+typedef struct rpc_request_info rpc_request_info_t;
+
+
+struct rpc_transport_pollin {
+ struct iovec vector[2];
+ int count;
+ char vectored;
+ void *private;
+ struct iobref *iobref;
+ struct iobuf *hdr_iobuf;
+ char is_reply;
+};
+typedef struct rpc_transport_pollin rpc_transport_pollin_t;
+
+typedef int (*rpc_transport_notify_t) (rpc_transport_t *, void *mydata,
+ rpc_transport_event_t, void *data, ...);
+
+
+struct rpc_transport {
+ struct rpc_transport_ops *ops;
+ rpc_transport_t *listener; /* listener transport to which
+ * request for creation of this
+ * transport came from. valid only
+ * on server process.
+ */
+
+ void *private;
+ struct _client_t *xl_private;
+ void *xl; /* Used for THIS */
+ void *mydata;
+ pthread_mutex_t lock;
+ int32_t refcount;
+
+ int32_t outstanding_rpc_count;
+
+ glusterfs_ctx_t *ctx;
+ dict_t *options;
+ char *name;
+ void *dnscache;
+ void *drc_client;
+ data_t *buf;
+ int32_t (*init) (rpc_transport_t *this);
+ void (*fini) (rpc_transport_t *this);
+ int (*reconfigure) (rpc_transport_t *this, dict_t *options);
+ rpc_transport_notify_t notify;
+ void *notify_data;
+ peer_info_t peerinfo;
+ peer_info_t myinfo;
+
+ uint64_t total_bytes_read;
+ uint64_t total_bytes_write;
+
+ struct list_head list;
+ int bind_insecure;
+ void *dl_handle; /* handle of dlopen() */
+};
+
+struct rpc_transport_ops {
+ /* no need of receive op, msg will be delivered through an event
+ * notification
+ */
+ int32_t (*submit_request) (rpc_transport_t *this,
+ rpc_transport_req_t *req);
+ int32_t (*submit_reply) (rpc_transport_t *this,
+ rpc_transport_reply_t *reply);
+ int32_t (*connect) (rpc_transport_t *this, int port);
+ int32_t (*listen) (rpc_transport_t *this);
+ int32_t (*disconnect) (rpc_transport_t *this);
+ int32_t (*get_peername) (rpc_transport_t *this, char *hostname,
+ int hostlen);
+ int32_t (*get_peeraddr) (rpc_transport_t *this, char *peeraddr,
+ int addrlen, struct sockaddr_storage *sa,
+ socklen_t sasize);
+ int32_t (*get_myname) (rpc_transport_t *this, char *hostname,
+ int hostlen);
+ int32_t (*get_myaddr) (rpc_transport_t *this, char *peeraddr,
+ int addrlen, struct sockaddr_storage *sa,
+ socklen_t sasize);
+ int32_t (*throttle) (rpc_transport_t *this, gf_boolean_t onoff);
+};
+
+
+int32_t
+rpc_transport_listen (rpc_transport_t *this);
+
+int32_t
+rpc_transport_connect (rpc_transport_t *this, int port);
+
+int32_t
+rpc_transport_disconnect (rpc_transport_t *this);
+
+int32_t
+rpc_transport_destroy (rpc_transport_t *this);
+
+int32_t
+rpc_transport_notify (rpc_transport_t *this, rpc_transport_event_t event,
+ void *data, ...);
+
+int32_t
+rpc_transport_submit_request (rpc_transport_t *this, rpc_transport_req_t *req);
+
+int32_t
+rpc_transport_submit_reply (rpc_transport_t *this,
+ rpc_transport_reply_t *reply);
+
+rpc_transport_t *
+rpc_transport_load (glusterfs_ctx_t *ctx, dict_t *options, char *name);
+
+rpc_transport_t *
+rpc_transport_ref (rpc_transport_t *trans);
+
+int32_t
+rpc_transport_unref (rpc_transport_t *trans);
+
+int
+rpc_transport_register_notify (rpc_transport_t *trans, rpc_transport_notify_t,
+ void *mydata);
+
+int32_t
+rpc_transport_get_peername (rpc_transport_t *this, char *hostname, int hostlen);
+
+int32_t
+rpc_transport_get_peeraddr (rpc_transport_t *this, char *peeraddr, int addrlen,
+ struct sockaddr_storage *sa, size_t salen);
+
+int32_t
+rpc_transport_get_myname (rpc_transport_t *this, char *hostname, int hostlen);
+
+int32_t
+rpc_transport_get_myaddr (rpc_transport_t *this, char *peeraddr, int addrlen,
+ struct sockaddr_storage *sa, size_t salen);
+
+int
+rpc_transport_throttle (rpc_transport_t *this, gf_boolean_t onoff);
+
+rpc_transport_pollin_t *
+rpc_transport_pollin_alloc (rpc_transport_t *this, struct iovec *vector,
+ int count, struct iobuf *hdr_iobuf,
+ struct iobref *iobref, void *private);
+void
+rpc_transport_pollin_destroy (rpc_transport_pollin_t *pollin);
+
+int
+rpc_transport_keepalive_options_set (dict_t *options, int32_t interval,
+ int32_t time);
+
+int
+rpc_transport_unix_options_build (dict_t **options, char *filepath,
+ int frame_timeout);
+
+int
+rpc_transport_inet_options_build (dict_t **options, const char *hostname, int port);
+#endif /* __RPC_TRANSPORT_H__ */
diff --git a/rpc/rpc-lib/src/rpcsvc-auth.c b/rpc/rpc-lib/src/rpcsvc-auth.c
new file mode 100644
index 000000000..4cb86a758
--- /dev/null
+++ b/rpc/rpc-lib/src/rpcsvc-auth.c
@@ -0,0 +1,486 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "rpcsvc.h"
+#include "logging.h"
+#include "dict.h"
+
+extern rpcsvc_auth_t *
+rpcsvc_auth_null_init (rpcsvc_t *svc, dict_t *options);
+
+extern rpcsvc_auth_t *
+rpcsvc_auth_unix_init (rpcsvc_t *svc, dict_t *options);
+
+extern rpcsvc_auth_t *
+rpcsvc_auth_glusterfs_init (rpcsvc_t *svc, dict_t *options);
+extern rpcsvc_auth_t *
+rpcsvc_auth_glusterfs_v2_init (rpcsvc_t *svc, dict_t *options);
+
+int
+rpcsvc_auth_add_initer (struct list_head *list, char *idfier,
+ rpcsvc_auth_initer_t init)
+{
+ struct rpcsvc_auth_list *new = NULL;
+
+ if ((!list) || (!init) || (!idfier))
+ return -1;
+
+ new = GF_CALLOC (1, sizeof (*new), gf_common_mt_rpcsvc_auth_list);
+ if (!new) {
+ return -1;
+ }
+
+ new->init = init;
+ strcpy (new->name, idfier);
+ INIT_LIST_HEAD (&new->authlist);
+ list_add_tail (&new->authlist, list);
+ return 0;
+}
+
+
+
+int
+rpcsvc_auth_add_initers (rpcsvc_t *svc)
+{
+ int ret = -1;
+
+ ret = rpcsvc_auth_add_initer (&svc->authschemes, "auth-glusterfs",
+ (rpcsvc_auth_initer_t)
+ rpcsvc_auth_glusterfs_init);
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to add AUTH_GLUSTERFS");
+ goto err;
+ }
+
+
+ ret = rpcsvc_auth_add_initer (&svc->authschemes, "auth-glusterfs-v2",
+ (rpcsvc_auth_initer_t)
+ rpcsvc_auth_glusterfs_v2_init);
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR,
+ "Failed to add AUTH_GLUSTERFS-v2");
+ goto err;
+ }
+
+ ret = rpcsvc_auth_add_initer (&svc->authschemes, "auth-unix",
+ (rpcsvc_auth_initer_t)
+ rpcsvc_auth_unix_init);
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to add AUTH_UNIX");
+ goto err;
+ }
+
+ ret = rpcsvc_auth_add_initer (&svc->authschemes, "auth-null",
+ (rpcsvc_auth_initer_t)
+ rpcsvc_auth_null_init);
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to add AUTH_NULL");
+ goto err;
+ }
+
+ ret = 0;
+err:
+ return ret;
+}
+
+
+int
+rpcsvc_auth_init_auth (rpcsvc_t *svc, dict_t *options,
+ struct rpcsvc_auth_list *authitem)
+{
+ int ret = -1;
+
+ if ((!svc) || (!options) || (!authitem))
+ return -1;
+
+ if (!authitem->init) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "No init function defined");
+ ret = -1;
+ goto err;
+ }
+
+ authitem->auth = authitem->init (svc, options);
+ if (!authitem->auth) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Registration of auth failed:"
+ " %s", authitem->name);
+ ret = -1;
+ goto err;
+ }
+
+ authitem->enable = 1;
+ gf_log (GF_RPCSVC, GF_LOG_TRACE, "Authentication enabled: %s",
+ authitem->auth->authname);
+
+ ret = 0;
+err:
+ return ret;
+}
+
+
+int
+rpcsvc_auth_init_auths (rpcsvc_t *svc, dict_t *options)
+{
+ int ret = -1;
+ struct rpcsvc_auth_list *auth = NULL;
+ struct rpcsvc_auth_list *tmp = NULL;
+
+ if (!svc)
+ return -1;
+
+ if (list_empty (&svc->authschemes)) {
+ gf_log (GF_RPCSVC, GF_LOG_WARNING, "No authentication!");
+ ret = 0;
+ goto err;
+ }
+
+ /* If auth null and sys are not disabled by the user, we must enable
+ * it by default. This is a globally default rule, the user is still
+ * allowed to disable the two for particular subvolumes.
+ */
+ if (!dict_get (options, "rpc-auth.auth-null")) {
+ ret = dict_set_str (options, "rpc-auth.auth-null", "on");
+ if (ret)
+ gf_log ("rpc-auth", GF_LOG_DEBUG,
+ "dict_set failed for 'auth-nill'");
+ }
+
+ if (!dict_get (options, "rpc-auth.auth-unix")) {
+ ret = dict_set_str (options, "rpc-auth.auth-unix", "on");
+ if (ret)
+ gf_log ("rpc-auth", GF_LOG_DEBUG,
+ "dict_set failed for 'auth-unix'");
+ }
+
+ if (!dict_get (options, "rpc-auth.auth-glusterfs")) {
+ ret = dict_set_str (options, "rpc-auth.auth-glusterfs", "on");
+ if (ret)
+ gf_log ("rpc-auth", GF_LOG_DEBUG,
+ "dict_set failed for 'auth-unix'");
+ }
+
+ list_for_each_entry_safe (auth, tmp, &svc->authschemes, authlist) {
+ ret = rpcsvc_auth_init_auth (svc, options, auth);
+ if (ret == -1)
+ goto err;
+ }
+
+ ret = 0;
+err:
+ return ret;
+
+}
+
+int
+rpcsvc_set_addr_namelookup (rpcsvc_t *svc, dict_t *options)
+{
+ int ret;
+ static char *addrlookup_key = "rpc-auth.addr.namelookup";
+
+ if (!svc || !options)
+ return (-1);
+
+ /* By default it's disabled */
+ ret = dict_get_str_boolean (options, addrlookup_key, _gf_false);
+ if (ret < 0) {
+ svc->addr_namelookup = _gf_false;
+ } else {
+ svc->addr_namelookup = ret;
+ }
+
+ if (svc->addr_namelookup)
+ gf_log (GF_RPCSVC, GF_LOG_DEBUG, "Addr-Name lookup enabled");
+
+ return (0);
+}
+
+int
+rpcsvc_set_allow_insecure (rpcsvc_t *svc, dict_t *options)
+{
+ int ret = -1;
+ char *allow_insecure_str = NULL;
+ gf_boolean_t is_allow_insecure = _gf_false;
+
+ GF_ASSERT (svc);
+ GF_ASSERT (options);
+
+ ret = dict_get_str (options, "rpc-auth-allow-insecure",
+ &allow_insecure_str);
+ if (0 == ret) {
+ ret = gf_string2boolean (allow_insecure_str,
+ &is_allow_insecure);
+ if (0 == ret) {
+ if (_gf_true == is_allow_insecure)
+ svc->allow_insecure = 1;
+ else
+ svc->allow_insecure = 0;
+ }
+ }
+
+ return 0;
+}
+
+int
+rpcsvc_set_root_squash (rpcsvc_t *svc, dict_t *options)
+{
+ int ret = -1;
+
+ GF_ASSERT (svc);
+ GF_ASSERT (options);
+
+ ret = dict_get_str_boolean (options, "root-squash", 0);
+ if (ret != -1)
+ svc->root_squash = ret;
+ else
+ svc->root_squash = _gf_false;
+
+ if (svc->root_squash)
+ gf_log (GF_RPCSVC, GF_LOG_DEBUG, "root squashing enabled ");
+
+ return 0;
+}
+
+int
+rpcsvc_auth_init (rpcsvc_t *svc, dict_t *options)
+{
+ int ret = -1;
+
+ if ((!svc) || (!options))
+ return -1;
+
+ (void) rpcsvc_set_allow_insecure (svc, options);
+ (void) rpcsvc_set_root_squash (svc, options);
+ (void) rpcsvc_set_addr_namelookup (svc, options);
+ ret = rpcsvc_auth_add_initers (svc);
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to add initers");
+ goto out;
+ }
+
+ ret = rpcsvc_auth_init_auths (svc, options);
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to init auth schemes");
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+int
+rpcsvc_auth_reconf (rpcsvc_t *svc, dict_t *options)
+{
+ int ret = 0;
+
+ if ((!svc) || (!options))
+ return (-1);
+
+ ret = rpcsvc_set_allow_insecure (svc, options);
+ if (ret)
+ return (-1);
+
+ ret = rpcsvc_set_root_squash (svc, options);
+ if (ret)
+ return (-1);
+
+ return rpcsvc_set_addr_namelookup (svc, options);
+}
+
+
+rpcsvc_auth_t *
+__rpcsvc_auth_get_handler (rpcsvc_request_t *req)
+{
+ struct rpcsvc_auth_list *auth = NULL;
+ struct rpcsvc_auth_list *tmp = NULL;
+ rpcsvc_t *svc = NULL;
+
+ if (!req)
+ return NULL;
+
+ svc = req->svc;
+ if (!svc) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "!svc");
+ goto err;
+ }
+
+ if (list_empty (&svc->authschemes)) {
+ gf_log (GF_RPCSVC, GF_LOG_WARNING, "No authentication!");
+ goto err;
+ }
+
+ list_for_each_entry_safe (auth, tmp, &svc->authschemes, authlist) {
+ if (!auth->enable)
+ continue;
+ if (auth->auth->authnum == req->cred.flavour)
+ goto err;
+
+ }
+
+ auth = NULL;
+err:
+ if (auth)
+ return auth->auth;
+ else
+ return NULL;
+}
+
+rpcsvc_auth_t *
+rpcsvc_auth_get_handler (rpcsvc_request_t *req)
+{
+ rpcsvc_auth_t *auth = NULL;
+
+ auth = __rpcsvc_auth_get_handler (req);
+ if (auth)
+ goto ret;
+
+ gf_log (GF_RPCSVC, GF_LOG_TRACE, "No auth handler: %d",
+ req->cred.flavour);
+
+ /* The requested scheme was not available so fall back the to one
+ * scheme that will always be present.
+ */
+ req->cred.flavour = AUTH_NULL;
+ req->verf.flavour = AUTH_NULL;
+ auth = __rpcsvc_auth_get_handler (req);
+ret:
+ return auth;
+}
+
+
+int
+rpcsvc_auth_request_init (rpcsvc_request_t *req)
+{
+ int ret = -1;
+ rpcsvc_auth_t *auth = NULL;
+
+ if (!req)
+ return -1;
+
+ auth = rpcsvc_auth_get_handler (req);
+ if (!auth)
+ goto err;
+ ret = 0;
+ gf_log (GF_RPCSVC, GF_LOG_TRACE, "Auth handler: %s", auth->authname);
+ if (!auth->authops->request_init)
+ ret = auth->authops->request_init (req, auth->authprivate);
+
+ req->auxgids = req->auxgidsmall; /* reset to auxgidlarge during
+ unsersialize if necessary */
+ req->auxgidlarge = NULL;
+err:
+ return ret;
+}
+
+
+int
+rpcsvc_authenticate (rpcsvc_request_t *req)
+{
+ int ret = RPCSVC_AUTH_REJECT;
+ rpcsvc_auth_t *auth = NULL;
+ int minauth = 0;
+
+ if (!req)
+ return ret;
+
+ /* FIXME use rpcsvc_request_prog_minauth() */
+ minauth = 0;
+ if (minauth > rpcsvc_request_cred_flavour (req)) {
+ gf_log (GF_RPCSVC, GF_LOG_WARNING, "Auth too weak");
+ rpcsvc_request_set_autherr (req, AUTH_TOOWEAK);
+ goto err;
+ }
+
+ auth = rpcsvc_auth_get_handler (req);
+ if (!auth) {
+ gf_log (GF_RPCSVC, GF_LOG_WARNING, "No auth handler found");
+ goto err;
+ }
+
+ if (auth->authops->authenticate)
+ ret = auth->authops->authenticate (req, auth->authprivate);
+
+err:
+ return ret;
+}
+
+int
+rpcsvc_auth_array (rpcsvc_t *svc, char *volname, int *autharr, int arrlen)
+{
+ int count = 0;
+ int result = RPCSVC_AUTH_REJECT;
+ char *srchstr = NULL;
+ int ret = 0;
+
+ struct rpcsvc_auth_list *auth = NULL;
+ struct rpcsvc_auth_list *tmp = NULL;
+
+ if ((!svc) || (!autharr) || (!volname))
+ return -1;
+
+ memset (autharr, 0, arrlen * sizeof(int));
+ if (list_empty (&svc->authschemes)) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "No authentication!");
+ goto err;
+ }
+
+ list_for_each_entry_safe (auth, tmp, &svc->authschemes, authlist) {
+ if (count >= arrlen)
+ break;
+
+ result = gf_asprintf (&srchstr, "rpc-auth.%s.%s",
+ auth->name, volname);
+ if (result == -1) {
+ count = -1;
+ goto err;
+ }
+
+ ret = dict_get_str_boolean (svc->options, srchstr, 0xC00FFEE);
+ GF_FREE (srchstr);
+
+ switch (ret) {
+ case _gf_true:
+ result = RPCSVC_AUTH_ACCEPT;
+ autharr[count] = auth->auth->authnum;
+ ++count;
+ break;
+ case _gf_false:
+ result = RPCSVC_AUTH_REJECT;
+ break;
+ default:
+ result = RPCSVC_AUTH_DONTCARE;
+ }
+ }
+
+err:
+ return count;
+}
+
+gid_t *
+rpcsvc_auth_unix_auxgids (rpcsvc_request_t *req, int *arrlen)
+{
+ if ((!req) || (!arrlen))
+ return NULL;
+
+ /* In case of AUTH_NULL auxgids are not used */
+ switch (req->cred.flavour) {
+ case AUTH_UNIX:
+ case AUTH_GLUSTERFS:
+ case AUTH_GLUSTERFS_v2:
+ break;
+ default:
+ gf_log ("rpc", GF_LOG_DEBUG, "auth type not unix or glusterfs");
+ return NULL;
+ }
+
+ *arrlen = req->auxgidcount;
+ if (*arrlen == 0)
+ return NULL;
+
+ return &req->auxgids[0];
+}
diff --git a/rpc/rpc-lib/src/rpcsvc-common.h b/rpc/rpc-lib/src/rpcsvc-common.h
new file mode 100644
index 000000000..aed55e039
--- /dev/null
+++ b/rpc/rpc-lib/src/rpcsvc-common.h
@@ -0,0 +1,126 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _RPCSVC_COMMON_H
+#define _RPCSVC_COMMON_H
+
+#include <pthread.h>
+#include "list.h"
+#include "compat.h"
+#include "glusterfs.h"
+#include "dict.h"
+
+typedef enum {
+ RPCSVC_EVENT_ACCEPT,
+ RPCSVC_EVENT_DISCONNECT,
+ RPCSVC_EVENT_TRANSPORT_DESTROY,
+ RPCSVC_EVENT_LISTENER_DEAD,
+} rpcsvc_event_t;
+
+
+struct rpcsvc_state;
+
+typedef int (*rpcsvc_notify_t) (struct rpcsvc_state *, void *mydata,
+ rpcsvc_event_t, void *data);
+
+struct drc_globals;
+typedef struct drc_globals rpcsvc_drc_globals_t;
+
+/* Contains global state required for all the RPC services.
+ */
+typedef struct rpcsvc_state {
+
+ /* Contains list of (program, version) handlers.
+ * other options.
+ */
+
+ pthread_mutex_t rpclock;
+
+ unsigned int memfactor;
+
+ /* List of the authentication schemes available. */
+ struct list_head authschemes;
+
+ /* Reference to the options */
+ dict_t *options;
+
+ /* Allow insecure ports. */
+ gf_boolean_t allow_insecure;
+ gf_boolean_t register_portmap;
+ gf_boolean_t root_squash;
+ glusterfs_ctx_t *ctx;
+
+ /* list of connections which will listen for incoming connections */
+ struct list_head listeners;
+
+ /* list of programs registered with rpcsvc */
+ struct list_head programs;
+
+ /* list of notification callbacks */
+ struct list_head notify;
+ int notify_count;
+
+ void *mydata; /* This is xlator */
+ rpcsvc_notify_t notifyfn;
+ struct mem_pool *rxpool;
+ rpcsvc_drc_globals_t *drc;
+
+ /* per-client limit of outstanding rpc requests */
+ int outstanding_rpc_limit;
+ gf_boolean_t addr_namelookup;
+} rpcsvc_t;
+
+/* DRC START */
+enum drc_op_type {
+ DRC_NA = 0,
+ DRC_IDEMPOTENT = 1,
+ DRC_NON_IDEMPOTENT = 2
+};
+typedef enum drc_op_type drc_op_type_t;
+
+enum drc_type {
+ DRC_TYPE_NONE = 0,
+ DRC_TYPE_IN_MEMORY = 1
+};
+typedef enum drc_type drc_type_t;
+
+enum drc_lru_factor {
+ DRC_LRU_5_PC = 20,
+ DRC_LRU_10_PC = 10,
+ DRC_LRU_25_PC = 4,
+ DRC_LRU_50_PC = 2
+};
+typedef enum drc_lru_factor drc_lru_factor_t;
+
+enum drc_xid_state {
+ DRC_XID_MONOTONOUS = 0,
+ DRC_XID_WRAPPED = 1
+};
+typedef enum drc_xid_state drc_xid_state_t;
+
+enum drc_op_state {
+ DRC_OP_IN_TRANSIT = 0,
+ DRC_OP_CACHED = 1
+};
+typedef enum drc_op_state drc_op_state_t;
+
+enum drc_policy {
+ DRC_LRU = 0
+};
+typedef enum drc_policy drc_policy_t;
+
+/* Default policies for DRC */
+#define DRC_DEFAULT_TYPE DRC_TYPE_IN_MEMORY
+#define DRC_DEFAULT_CACHE_SIZE 0x20000
+#define DRC_DEFAULT_LRU_FACTOR DRC_LRU_25_PC
+
+/* DRC END */
+
+#endif /* #ifndef _RPCSVC_COMMON_H */
diff --git a/rpc/rpc-lib/src/rpcsvc.c b/rpc/rpc-lib/src/rpcsvc.c
new file mode 100644
index 000000000..037c157f2
--- /dev/null
+++ b/rpc/rpc-lib/src/rpcsvc.c
@@ -0,0 +1,2480 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "rpcsvc.h"
+#include "rpc-transport.h"
+#include "dict.h"
+#include "logging.h"
+#include "byte-order.h"
+#include "common-utils.h"
+#include "compat-errno.h"
+#include "list.h"
+#include "xdr-rpc.h"
+#include "iobuf.h"
+#include "globals.h"
+#include "xdr-common.h"
+#include "xdr-generic.h"
+#include "rpc-common-xdr.h"
+#include "syncop.h"
+#include "rpc-drc.h"
+
+#include <errno.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <rpc/rpc.h>
+#include <rpc/pmap_clnt.h>
+#include <arpa/inet.h>
+#include <rpc/xdr.h>
+#include <fnmatch.h>
+#include <stdarg.h>
+#include <stdio.h>
+
+#include "xdr-rpcclnt.h"
+#include "glusterfs-acl.h"
+
+struct rpcsvc_program gluster_dump_prog;
+
+#define rpcsvc_alloc_request(svc, request) \
+ do { \
+ request = (rpcsvc_request_t *) mem_get ((svc)->rxpool); \
+ memset (request, 0, sizeof (rpcsvc_request_t)); \
+ } while (0)
+
+rpcsvc_listener_t *
+rpcsvc_get_listener (rpcsvc_t *svc, uint16_t port, rpc_transport_t *trans);
+
+int
+rpcsvc_notify (rpc_transport_t *trans, void *mydata,
+ rpc_transport_event_t event, void *data, ...);
+
+rpcsvc_notify_wrapper_t *
+rpcsvc_notify_wrapper_alloc (void)
+{
+ rpcsvc_notify_wrapper_t *wrapper = NULL;
+
+ wrapper = GF_CALLOC (1, sizeof (*wrapper), gf_common_mt_rpcsvc_wrapper_t);
+ if (!wrapper) {
+ goto out;
+ }
+
+ INIT_LIST_HEAD (&wrapper->list);
+out:
+ return wrapper;
+}
+
+
+void
+rpcsvc_listener_destroy (rpcsvc_listener_t *listener)
+{
+ rpcsvc_t *svc = NULL;
+
+ if (!listener) {
+ goto out;
+ }
+
+ svc = listener->svc;
+ if (!svc) {
+ goto listener_free;
+ }
+
+ pthread_mutex_lock (&svc->rpclock);
+ {
+ list_del_init (&listener->list);
+ }
+ pthread_mutex_unlock (&svc->rpclock);
+
+listener_free:
+ GF_FREE (listener);
+out:
+ return;
+}
+
+rpcsvc_vector_sizer
+rpcsvc_get_program_vector_sizer (rpcsvc_t *svc, uint32_t prognum,
+ uint32_t progver, uint32_t procnum)
+{
+ rpcsvc_program_t *program = NULL;
+ char found = 0;
+
+ if (!svc)
+ return NULL;
+
+ pthread_mutex_lock (&svc->rpclock);
+ {
+ list_for_each_entry (program, &svc->programs, program) {
+ if ((program->prognum == prognum)
+ && (program->progver == progver)) {
+ found = 1;
+ break;
+ }
+ }
+ }
+ pthread_mutex_unlock (&svc->rpclock);
+
+ if (found)
+ return program->actors[procnum].vector_sizer;
+ else
+ return NULL;
+}
+
+int
+rpcsvc_request_outstanding (rpcsvc_t *svc, rpc_transport_t *trans, int delta)
+{
+ int ret = 0;
+ int old_count = 0;
+ int new_count = 0;
+ int limit = 0;
+
+ pthread_mutex_lock (&trans->lock);
+ {
+ limit = svc->outstanding_rpc_limit;
+ if (!limit)
+ goto unlock;
+
+ old_count = trans->outstanding_rpc_count;
+ trans->outstanding_rpc_count += delta;
+ new_count = trans->outstanding_rpc_count;
+
+ if (old_count <= limit && new_count > limit)
+ ret = rpc_transport_throttle (trans, _gf_true);
+
+ if (old_count > limit && new_count <= limit)
+ ret = rpc_transport_throttle (trans, _gf_false);
+ }
+unlock:
+ pthread_mutex_unlock (&trans->lock);
+
+ return ret;
+}
+
+
+/* This needs to change to returning errors, since
+ * we need to return RPC specific error messages when some
+ * of the pointers below are NULL.
+ */
+rpcsvc_actor_t *
+rpcsvc_program_actor (rpcsvc_request_t *req)
+{
+ rpcsvc_program_t *program = NULL;
+ int err = SYSTEM_ERR;
+ rpcsvc_actor_t *actor = NULL;
+ rpcsvc_t *svc = NULL;
+ char found = 0;
+
+ if (!req)
+ goto err;
+
+ svc = req->svc;
+ pthread_mutex_lock (&svc->rpclock);
+ {
+ list_for_each_entry (program, &svc->programs, program) {
+ if (program->prognum == req->prognum) {
+ err = PROG_MISMATCH;
+ }
+
+ if ((program->prognum == req->prognum)
+ && (program->progver == req->progver)) {
+ found = 1;
+ break;
+ }
+ }
+ }
+ pthread_mutex_unlock (&svc->rpclock);
+
+ if (!found) {
+ if (err != PROG_MISMATCH) {
+ /* log in DEBUG when nfs clients try to see if
+ * ACL requests are accepted by nfs server
+ */
+ gf_log (GF_RPCSVC, (req->prognum == ACL_PROGRAM) ?
+ GF_LOG_DEBUG : GF_LOG_WARNING,
+ "RPC program not available (req %u %u)",
+ req->prognum, req->progver);
+ err = PROG_UNAVAIL;
+ goto err;
+ }
+
+ gf_log (GF_RPCSVC, GF_LOG_WARNING,
+ "RPC program version not available (req %u %u)",
+ req->prognum, req->progver);
+ goto err;
+ }
+ req->prog = program;
+ if (!program->actors) {
+ gf_log (GF_RPCSVC, GF_LOG_WARNING,
+ "RPC Actor not found for program %s %d",
+ program->progname, program->prognum);
+ err = SYSTEM_ERR;
+ goto err;
+ }
+
+ if ((req->procnum < 0) || (req->procnum >= program->numactors)) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "RPC Program procedure not"
+ " available for procedure %d in %s", req->procnum,
+ program->progname);
+ err = PROC_UNAVAIL;
+ goto err;
+ }
+
+ actor = &program->actors[req->procnum];
+ if (!actor->actor) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "RPC Program procedure not"
+ " available for procedure %d in %s", req->procnum,
+ program->progname);
+ err = PROC_UNAVAIL;
+ actor = NULL;
+ goto err;
+ }
+
+ req->synctask = program->synctask;
+
+ err = SUCCESS;
+ gf_log (GF_RPCSVC, GF_LOG_TRACE, "Actor found: %s - %s",
+ program->progname, actor->procname);
+err:
+ if (req)
+ req->rpc_err = err;
+
+ return actor;
+}
+
+
+/* this procedure can only pass 4 arguments to registered notifyfn. To send more
+ * arguments call wrapper->notify directly.
+ */
+static inline void
+rpcsvc_program_notify (rpcsvc_listener_t *listener, rpcsvc_event_t event,
+ void *data)
+{
+ rpcsvc_notify_wrapper_t *wrapper = NULL;
+
+ if (!listener) {
+ goto out;
+ }
+
+ list_for_each_entry (wrapper, &listener->svc->notify, list) {
+ if (wrapper->notify) {
+ wrapper->notify (listener->svc,
+ wrapper->data,
+ event, data);
+ }
+ }
+
+out:
+ return;
+}
+
+
+static inline int
+rpcsvc_accept (rpcsvc_t *svc, rpc_transport_t *listen_trans,
+ rpc_transport_t *new_trans)
+{
+ rpcsvc_listener_t *listener = NULL;
+ int32_t ret = -1;
+
+ listener = rpcsvc_get_listener (svc, -1, listen_trans);
+ if (listener == NULL) {
+ goto out;
+ }
+
+ rpcsvc_program_notify (listener, RPCSVC_EVENT_ACCEPT, new_trans);
+ ret = 0;
+out:
+ return ret;
+}
+
+
+void
+rpcsvc_request_destroy (rpcsvc_request_t *req)
+{
+ if (!req) {
+ goto out;
+ }
+
+ if (req->iobref) {
+ iobref_unref (req->iobref);
+ }
+
+ if (req->hdr_iobuf)
+ iobuf_unref (req->hdr_iobuf);
+
+ /* This marks the "end" of an RPC request. Reply is
+ completely written to the socket and is on the way
+ to the client. It is time to decrement the
+ outstanding request counter by 1.
+ */
+ rpcsvc_request_outstanding (req->svc, req->trans, -1);
+
+ rpc_transport_unref (req->trans);
+
+ GF_FREE (req->auxgidlarge);
+
+ mem_put (req);
+
+out:
+ return;
+}
+
+
+rpcsvc_request_t *
+rpcsvc_request_init (rpcsvc_t *svc, rpc_transport_t *trans,
+ struct rpc_msg *callmsg,
+ struct iovec progmsg, rpc_transport_pollin_t *msg,
+ rpcsvc_request_t *req)
+{
+ int i = 0;
+
+ if ((!trans) || (!callmsg)|| (!req) || (!msg))
+ return NULL;
+
+ /* We start a RPC request as always denied. */
+ req->rpc_status = MSG_DENIED;
+ req->xid = rpc_call_xid (callmsg);
+ req->prognum = rpc_call_program (callmsg);
+ req->progver = rpc_call_progver (callmsg);
+ req->procnum = rpc_call_progproc (callmsg);
+ req->trans = rpc_transport_ref (trans);
+ req->count = msg->count;
+ req->msg[0] = progmsg;
+ req->iobref = iobref_ref (msg->iobref);
+ if (msg->vectored) {
+ /* msg->vector[2] is defined in structure. prevent a
+ out of bound access */
+ for (i = 1; i < min (msg->count, 2); i++) {
+ req->msg[i] = msg->vector[i];
+ }
+ }
+
+ req->svc = svc;
+ req->trans_private = msg->private;
+
+ INIT_LIST_HEAD (&req->txlist);
+ req->payloadsize = 0;
+
+ /* By this time, the data bytes for the auth scheme would have already
+ * been copied into the required sections of the req structure,
+ * we just need to fill in the meta-data about it now.
+ */
+ req->cred.flavour = rpc_call_cred_flavour (callmsg);
+ req->cred.datalen = rpc_call_cred_len (callmsg);
+ req->verf.flavour = rpc_call_verf_flavour (callmsg);
+ req->verf.datalen = rpc_call_verf_len (callmsg);
+
+ /* AUTH */
+ rpcsvc_auth_request_init (req);
+ return req;
+}
+
+
+rpcsvc_request_t *
+rpcsvc_request_create (rpcsvc_t *svc, rpc_transport_t *trans,
+ rpc_transport_pollin_t *msg)
+{
+ char *msgbuf = NULL;
+ struct rpc_msg rpcmsg;
+ struct iovec progmsg; /* RPC Program payload */
+ rpcsvc_request_t *req = NULL;
+ size_t msglen = 0;
+ int ret = -1;
+
+ if (!svc || !trans)
+ return NULL;
+
+ /* We need to allocate the request before actually calling
+ * rpcsvc_request_init on the request so that we, can fill the auth
+ * data directly into the request structure from the message iobuf.
+ * This avoids a need to keep a temp buffer into which the auth data
+ * would've been copied otherwise.
+ */
+ rpcsvc_alloc_request (svc, req);
+ if (!req) {
+ goto err;
+ }
+
+ /* We just received a new request from the wire. Account for
+ it in the outsanding request counter to make sure we don't
+ ingest too many concurrent requests from the same client.
+ */
+ ret = rpcsvc_request_outstanding (svc, trans, +1);
+
+ msgbuf = msg->vector[0].iov_base;
+ msglen = msg->vector[0].iov_len;
+
+ ret = xdr_to_rpc_call (msgbuf, msglen, &rpcmsg, &progmsg,
+ req->cred.authdata,req->verf.authdata);
+
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_WARNING, "RPC call decoding failed");
+ rpcsvc_request_seterr (req, GARBAGE_ARGS);
+ req->trans = rpc_transport_ref (trans);
+ req->svc = svc;
+ goto err;
+ }
+
+ ret = -1;
+ rpcsvc_request_init (svc, trans, &rpcmsg, progmsg, msg, req);
+
+ gf_log (GF_RPCSVC, GF_LOG_TRACE, "received rpc-message (XID: 0x%lx, "
+ "Ver: %ld, Program: %ld, ProgVers: %ld, Proc: %ld) from"
+ " rpc-transport (%s)", rpc_call_xid (&rpcmsg),
+ rpc_call_rpcvers (&rpcmsg), rpc_call_program (&rpcmsg),
+ rpc_call_progver (&rpcmsg), rpc_call_progproc (&rpcmsg),
+ trans->name);
+
+ if (rpc_call_rpcvers (&rpcmsg) != 2) {
+ /* LOG- TODO: print rpc version, also print the peerinfo
+ from transport */
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "RPC version not supported "
+ "(XID: 0x%lx, Ver: %ld, Prog: %ld, ProgVers: %ld, "
+ "Proc: %ld) from trans (%s)", rpc_call_xid (&rpcmsg),
+ rpc_call_rpcvers (&rpcmsg), rpc_call_program (&rpcmsg),
+ rpc_call_progver (&rpcmsg), rpc_call_progproc (&rpcmsg),
+ trans->name);
+ rpcsvc_request_seterr (req, RPC_MISMATCH);
+ goto err;
+ }
+
+ ret = rpcsvc_authenticate (req);
+ if (ret == RPCSVC_AUTH_REJECT) {
+ /* No need to set auth_err, that is the responsibility of
+ * the authentication handler since only that know what exact
+ * error happened.
+ */
+ rpcsvc_request_seterr (req, AUTH_ERROR);
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "auth failed on request. "
+ "(XID: 0x%lx, Ver: %ld, Prog: %ld, ProgVers: %ld, "
+ "Proc: %ld) from trans (%s)", rpc_call_xid (&rpcmsg),
+ rpc_call_rpcvers (&rpcmsg), rpc_call_program (&rpcmsg),
+ rpc_call_progver (&rpcmsg), rpc_call_progproc (&rpcmsg),
+ trans->name);
+ ret = -1;
+ goto err;
+ }
+
+
+ /* If the error is not RPC_MISMATCH, we consider the call as accepted
+ * since we are not handling authentication failures for now.
+ */
+ req->rpc_status = MSG_ACCEPTED;
+ req->reply = NULL;
+ ret = 0;
+err:
+ if (ret == -1) {
+ ret = rpcsvc_error_reply (req);
+ if (ret)
+ gf_log ("rpcsvc", GF_LOG_WARNING,
+ "failed to queue error reply");
+ req = NULL;
+ }
+
+ return req;
+}
+
+
+int
+rpcsvc_check_and_reply_error (int ret, call_frame_t *frame, void *opaque)
+{
+ rpcsvc_request_t *req = NULL;
+
+ req = opaque;
+
+ if (ret)
+ gf_log ("rpcsvc", GF_LOG_ERROR,
+ "rpc actor failed to complete successfully");
+
+ if (ret == RPCSVC_ACTOR_ERROR) {
+ ret = rpcsvc_error_reply (req);
+ if (ret)
+ gf_log ("rpcsvc", GF_LOG_WARNING,
+ "failed to queue error reply");
+ }
+
+ return 0;
+}
+
+int
+rpcsvc_handle_rpc_call (rpcsvc_t *svc, rpc_transport_t *trans,
+ rpc_transport_pollin_t *msg)
+{
+ rpcsvc_actor_t *actor = NULL;
+ rpcsvc_actor actor_fn = NULL;
+ rpcsvc_request_t *req = NULL;
+ int ret = -1;
+ uint16_t port = 0;
+ gf_boolean_t is_unix = _gf_false;
+ gf_boolean_t unprivileged = _gf_false;
+ drc_cached_op_t *reply = NULL;
+ rpcsvc_drc_globals_t *drc = NULL;
+
+ if (!trans || !svc)
+ return -1;
+
+ switch (trans->peerinfo.sockaddr.ss_family) {
+ case AF_INET:
+ port = ((struct sockaddr_in *)&trans->peerinfo.sockaddr)->sin_port;
+ break;
+
+ case AF_INET6:
+ port = ((struct sockaddr_in6 *)&trans->peerinfo.sockaddr)->sin6_port;
+ break;
+ case AF_UNIX:
+ is_unix = _gf_true;
+ break;
+ default:
+ gf_log (GF_RPCSVC, GF_LOG_ERROR,
+ "invalid address family (%d)",
+ trans->peerinfo.sockaddr.ss_family);
+ return -1;
+ }
+
+
+
+ if (is_unix == _gf_false) {
+ port = ntohs (port);
+
+ gf_log ("rpcsvc", GF_LOG_TRACE, "Client port: %d", (int)port);
+
+ if (port > 1024)
+ unprivileged = _gf_true;
+ }
+
+ req = rpcsvc_request_create (svc, trans, msg);
+ if (!req)
+ goto out;
+
+ if (!rpcsvc_request_accepted (req))
+ goto err_reply;
+
+ actor = rpcsvc_program_actor (req);
+ if (!actor)
+ goto err_reply;
+
+ if (0 == svc->allow_insecure && unprivileged && !actor->unprivileged) {
+ /* Non-privileged user, fail request */
+ gf_log ("glusterd", GF_LOG_ERROR,
+ "Request received from non-"
+ "privileged port. Failing request");
+ rpcsvc_request_destroy (req);
+ return -1;
+ }
+
+ /* DRC */
+ if (rpcsvc_need_drc (req)) {
+ drc = req->svc->drc;
+
+ LOCK (&drc->lock);
+ reply = rpcsvc_drc_lookup (req);
+
+ /* retransmission of completed request, send cached reply */
+ if (reply && reply->state == DRC_OP_CACHED) {
+ gf_log (GF_RPCSVC, GF_LOG_INFO, "duplicate request:"
+ " XID: 0x%x", req->xid);
+ ret = rpcsvc_send_cached_reply (req, reply);
+ drc->cache_hits++;
+ UNLOCK (&drc->lock);
+ goto out;
+
+ } /* retransmitted request, original op in transit, drop it */
+ else if (reply && reply->state == DRC_OP_IN_TRANSIT) {
+ gf_log (GF_RPCSVC, GF_LOG_INFO, "op in transit,"
+ " discarding. XID: 0x%x", req->xid);
+ ret = 0;
+ drc->intransit_hits++;
+ rpcsvc_request_destroy (req);
+ UNLOCK (&drc->lock);
+ goto out;
+
+ } /* fresh request, cache it as in-transit and proceed */
+ else {
+ ret = rpcsvc_cache_request (req);
+ }
+ UNLOCK (&drc->lock);
+ }
+
+ if (req->rpc_err == SUCCESS) {
+ /* Before going to xlator code, set the THIS properly */
+ THIS = svc->mydata;
+
+ actor_fn = actor->actor;
+
+ if (!actor_fn) {
+ rpcsvc_request_seterr (req, PROC_UNAVAIL);
+ /* LOG TODO: print more info about procnum,
+ prognum etc, also print transport info */
+ gf_log (GF_RPCSVC, GF_LOG_ERROR,
+ "No vectored handler present");
+ ret = RPCSVC_ACTOR_ERROR;
+ goto err_reply;
+ }
+
+ if (req->synctask) {
+ if (msg->hdr_iobuf)
+ req->hdr_iobuf = iobuf_ref (msg->hdr_iobuf);
+
+ ret = synctask_new (THIS->ctx->env,
+ (synctask_fn_t) actor_fn,
+ rpcsvc_check_and_reply_error, NULL,
+ req);
+ } else {
+ ret = actor_fn (req);
+ }
+ }
+
+err_reply:
+
+ ret = rpcsvc_check_and_reply_error (ret, NULL, req);
+ /* No need to propagate error beyond this function since the reply
+ * has now been queued. */
+ ret = 0;
+
+out:
+ return ret;
+}
+
+
+int
+rpcsvc_handle_disconnect (rpcsvc_t *svc, rpc_transport_t *trans)
+{
+ rpcsvc_event_t event;
+ rpcsvc_notify_wrapper_t *wrappers = NULL, *wrapper;
+ int32_t ret = -1, i = 0, wrapper_count = 0;
+ rpcsvc_listener_t *listener = NULL;
+
+ event = (trans->listener == NULL) ? RPCSVC_EVENT_LISTENER_DEAD
+ : RPCSVC_EVENT_DISCONNECT;
+
+ pthread_mutex_lock (&svc->rpclock);
+ {
+ if (!svc->notify_count)
+ goto unlock;
+
+ wrappers = GF_CALLOC (svc->notify_count, sizeof (*wrapper),
+ gf_common_mt_rpcsvc_wrapper_t);
+ if (!wrappers) {
+ goto unlock;
+ }
+
+ list_for_each_entry (wrapper, &svc->notify, list) {
+ if (wrapper->notify) {
+ wrappers[i++] = *wrapper;
+ }
+ }
+
+ wrapper_count = i;
+ }
+unlock:
+ pthread_mutex_unlock (&svc->rpclock);
+
+ if (wrappers) {
+ for (i = 0; i < wrapper_count; i++) {
+ wrappers[i].notify (svc, wrappers[i].data,
+ event, trans);
+ }
+
+ GF_FREE (wrappers);
+ }
+
+ if (event == RPCSVC_EVENT_LISTENER_DEAD) {
+ listener = rpcsvc_get_listener (svc, -1, trans->listener);
+ rpcsvc_listener_destroy (listener);
+ }
+
+ return ret;
+}
+
+
+int
+rpcsvc_notify (rpc_transport_t *trans, void *mydata,
+ rpc_transport_event_t event, void *data, ...)
+{
+ int ret = -1;
+ rpc_transport_pollin_t *msg = NULL;
+ rpc_transport_t *new_trans = NULL;
+ rpcsvc_t *svc = NULL;
+ rpcsvc_listener_t *listener = NULL;
+
+ svc = mydata;
+ if (svc == NULL) {
+ goto out;
+ }
+
+ switch (event) {
+ case RPC_TRANSPORT_ACCEPT:
+ new_trans = data;
+ ret = rpcsvc_accept (svc, trans, new_trans);
+ break;
+
+ case RPC_TRANSPORT_DISCONNECT:
+ ret = rpcsvc_handle_disconnect (svc, trans);
+ break;
+
+ case RPC_TRANSPORT_MSG_RECEIVED:
+ msg = data;
+ ret = rpcsvc_handle_rpc_call (svc, trans, msg);
+ break;
+
+ case RPC_TRANSPORT_MSG_SENT:
+ ret = 0;
+ break;
+
+ case RPC_TRANSPORT_CONNECT:
+ /* do nothing, no need for rpcsvc to handle this, client should
+ * handle this event
+ */
+ /* print info about transport too : LOG TODO */
+ gf_log ("rpcsvc", GF_LOG_CRITICAL,
+ "got CONNECT event, which should have not come");
+ ret = 0;
+ break;
+
+ case RPC_TRANSPORT_CLEANUP:
+ listener = rpcsvc_get_listener (svc, -1, trans->listener);
+ if (listener == NULL) {
+ goto out;
+ }
+
+ rpcsvc_program_notify (listener, RPCSVC_EVENT_TRANSPORT_DESTROY,
+ trans);
+ ret = 0;
+ break;
+
+ case RPC_TRANSPORT_MAP_XID_REQUEST:
+ /* FIXME: think about this later */
+ gf_log ("rpcsvc", GF_LOG_CRITICAL,
+ "got MAP_XID event, which should have not come");
+ ret = 0;
+ break;
+ }
+
+out:
+ return ret;
+}
+
+
+/* Given the RPC reply structure and the payload handed by the RPC program,
+ * encode the RPC record header into the buffer pointed by recordstart.
+ */
+struct iovec
+rpcsvc_record_build_header (char *recordstart, size_t rlen,
+ struct rpc_msg reply, size_t payload)
+{
+ struct iovec replyhdr;
+ struct iovec txrecord = {0, 0};
+ size_t fraglen = 0;
+ int ret = -1;
+
+ /* After leaving aside the 4 bytes for the fragment header, lets
+ * encode the RPC reply structure into the buffer given to us.
+ */
+ ret = rpc_reply_to_xdr (&reply, recordstart, rlen, &replyhdr);
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_WARNING, "Failed to create RPC reply");
+ goto err;
+ }
+
+ fraglen = payload + replyhdr.iov_len;
+ gf_log (GF_RPCSVC, GF_LOG_TRACE, "Reply fraglen %zu, payload: %zu, "
+ "rpc hdr: %zu", fraglen, payload, replyhdr.iov_len);
+
+ txrecord.iov_base = recordstart;
+
+ /* Remember, this is only the vec for the RPC header and does not
+ * include the payload above. We needed the payload only to calculate
+ * the size of the full fragment. This size is sent in the fragment
+ * header.
+ */
+ txrecord.iov_len = replyhdr.iov_len;
+err:
+ return txrecord;
+}
+
+static inline int
+rpcsvc_get_callid (rpcsvc_t *rpc)
+{
+ return GF_UNIVERSAL_ANSWER;
+}
+
+int
+rpcsvc_fill_callback (int prognum, int progver, int procnum, int payload,
+ uint64_t xid, struct rpc_msg *request)
+{
+ int ret = -1;
+
+ if (!request) {
+ goto out;
+ }
+
+ memset (request, 0, sizeof (*request));
+
+ request->rm_xid = xid;
+ request->rm_direction = CALL;
+
+ request->rm_call.cb_rpcvers = 2;
+ request->rm_call.cb_prog = prognum;
+ request->rm_call.cb_vers = progver;
+ request->rm_call.cb_proc = procnum;
+
+ request->rm_call.cb_cred.oa_flavor = AUTH_NONE;
+ request->rm_call.cb_cred.oa_base = NULL;
+ request->rm_call.cb_cred.oa_length = 0;
+
+ request->rm_call.cb_verf.oa_flavor = AUTH_NONE;
+ request->rm_call.cb_verf.oa_base = NULL;
+ request->rm_call.cb_verf.oa_length = 0;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+struct iovec
+rpcsvc_callback_build_header (char *recordstart, size_t rlen,
+ struct rpc_msg *request, size_t payload)
+{
+ struct iovec requesthdr = {0, };
+ struct iovec txrecord = {0, 0};
+ int ret = -1;
+ size_t fraglen = 0;
+
+ ret = rpc_request_to_xdr (request, recordstart, rlen, &requesthdr);
+ if (ret == -1) {
+ gf_log ("rpcsvc", GF_LOG_WARNING,
+ "Failed to create RPC request");
+ goto out;
+ }
+
+ fraglen = payload + requesthdr.iov_len;
+ gf_log ("rpcsvc", GF_LOG_TRACE, "Request fraglen %zu, payload: %zu, "
+ "rpc hdr: %zu", fraglen, payload, requesthdr.iov_len);
+
+ txrecord.iov_base = recordstart;
+
+ /* Remember, this is only the vec for the RPC header and does not
+ * include the payload above. We needed the payload only to calculate
+ * the size of the full fragment. This size is sent in the fragment
+ * header.
+ */
+ txrecord.iov_len = requesthdr.iov_len;
+
+out:
+ return txrecord;
+}
+
+struct iobuf *
+rpcsvc_callback_build_record (rpcsvc_t *rpc, int prognum, int progver,
+ int procnum, size_t payload, uint64_t xid,
+ struct iovec *recbuf)
+{
+ struct rpc_msg request = {0, };
+ struct iobuf *request_iob = NULL;
+ char *record = NULL;
+ struct iovec recordhdr = {0, };
+ size_t pagesize = 0;
+ size_t xdr_size = 0;
+ int ret = -1;
+
+ if ((!rpc) || (!recbuf)) {
+ goto out;
+ }
+
+ /* Fill the rpc structure and XDR it into the buffer got above. */
+ ret = rpcsvc_fill_callback (prognum, progver, procnum, payload, xid,
+ &request);
+ if (ret == -1) {
+ gf_log ("rpcsvc", GF_LOG_WARNING, "cannot build a rpc-request "
+ "xid (%"PRIu64")", xid);
+ goto out;
+ }
+
+ /* First, try to get a pointer into the buffer which the RPC
+ * layer can use.
+ */
+ xdr_size = xdr_sizeof ((xdrproc_t)xdr_callmsg, &request);
+
+ request_iob = iobuf_get2 (rpc->ctx->iobuf_pool, (xdr_size + payload));
+ if (!request_iob) {
+ goto out;
+ }
+
+ pagesize = iobuf_pagesize (request_iob);
+
+ record = iobuf_ptr (request_iob); /* Now we have it. */
+
+ recordhdr = rpcsvc_callback_build_header (record, pagesize, &request,
+ payload);
+
+ if (!recordhdr.iov_base) {
+ gf_log ("rpc-clnt", GF_LOG_ERROR, "Failed to build record "
+ " header");
+ iobuf_unref (request_iob);
+ request_iob = NULL;
+ recbuf->iov_base = NULL;
+ goto out;
+ }
+
+ recbuf->iov_base = recordhdr.iov_base;
+ recbuf->iov_len = recordhdr.iov_len;
+
+out:
+ return request_iob;
+}
+
+int
+rpcsvc_callback_submit (rpcsvc_t *rpc, rpc_transport_t *trans,
+ rpcsvc_cbk_program_t *prog, int procnum,
+ struct iovec *proghdr, int proghdrcount)
+{
+ struct iobuf *request_iob = NULL;
+ struct iovec rpchdr = {0,};
+ rpc_transport_req_t req;
+ int ret = -1;
+ int proglen = 0;
+ uint64_t callid = 0;
+
+ if (!rpc) {
+ goto out;
+ }
+
+ memset (&req, 0, sizeof (req));
+
+ callid = rpcsvc_get_callid (rpc);
+
+ if (proghdr) {
+ proglen += iov_length (proghdr, proghdrcount);
+ }
+
+ request_iob = rpcsvc_callback_build_record (rpc, prog->prognum,
+ prog->progver, procnum,
+ proglen, callid,
+ &rpchdr);
+ if (!request_iob) {
+ gf_log ("rpcsvc", GF_LOG_WARNING,
+ "cannot build rpc-record");
+ goto out;
+ }
+
+ req.msg.rpchdr = &rpchdr;
+ req.msg.rpchdrcount = 1;
+ req.msg.proghdr = proghdr;
+ req.msg.proghdrcount = proghdrcount;
+
+ ret = rpc_transport_submit_request (trans, &req);
+ if (ret == -1) {
+ gf_log ("rpcsvc", GF_LOG_WARNING,
+ "transmission of rpc-request failed");
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ iobuf_unref (request_iob);
+
+ return ret;
+}
+
+int
+rpcsvc_transport_submit (rpc_transport_t *trans, struct iovec *rpchdr,
+ int rpchdrcount, struct iovec *proghdr,
+ int proghdrcount, struct iovec *progpayload,
+ int progpayloadcount, struct iobref *iobref,
+ void *priv)
+{
+ int ret = -1;
+ rpc_transport_reply_t reply = {{0, }};
+
+ if ((!trans) || (!rpchdr) || (!rpchdr->iov_base)) {
+ goto out;
+ }
+
+ reply.msg.rpchdr = rpchdr;
+ reply.msg.rpchdrcount = rpchdrcount;
+ reply.msg.proghdr = proghdr;
+ reply.msg.proghdrcount = proghdrcount;
+ reply.msg.progpayload = progpayload;
+ reply.msg.progpayloadcount = progpayloadcount;
+ reply.msg.iobref = iobref;
+ reply.private = priv;
+
+ ret = rpc_transport_submit_reply (trans, &reply);
+
+out:
+ return ret;
+}
+
+
+int
+rpcsvc_fill_reply (rpcsvc_request_t *req, struct rpc_msg *reply)
+{
+ int ret = -1;
+ rpcsvc_program_t *prog = NULL;
+ if ((!req) || (!reply))
+ goto out;
+
+ ret = 0;
+ rpc_fill_empty_reply (reply, req->xid);
+ if (req->rpc_status == MSG_DENIED) {
+ rpc_fill_denied_reply (reply, req->rpc_err, req->auth_err);
+ goto out;
+ }
+
+ prog = rpcsvc_request_program (req);
+
+ if (req->rpc_status == MSG_ACCEPTED)
+ rpc_fill_accepted_reply (reply, req->rpc_err,
+ (prog) ? prog->proglowvers : 0,
+ (prog) ? prog->proghighvers: 0,
+ req->verf.flavour, req->verf.datalen,
+ req->verf.authdata);
+ else
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Invalid rpc_status value");
+
+out:
+ return ret;
+}
+
+
+/* Given a request and the reply payload, build a reply and encodes the reply
+ * into a record header. This record header is encoded into the vector pointed
+ * to be recbuf.
+ * msgvec is the buffer that points to the payload of the RPC program.
+ * This buffer can be NULL, if an RPC error reply is being constructed.
+ * The only reason it is needed here is that in case the buffer is provided,
+ * we should account for the length of that buffer in the RPC fragment header.
+ */
+struct iobuf *
+rpcsvc_record_build_record (rpcsvc_request_t *req, size_t payload,
+ size_t hdrlen, struct iovec *recbuf)
+{
+ struct rpc_msg reply;
+ struct iobuf *replyiob = NULL;
+ char *record = NULL;
+ struct iovec recordhdr = {0, };
+ size_t pagesize = 0;
+ size_t xdr_size = 0;
+ rpcsvc_t *svc = NULL;
+ int ret = -1;
+
+ if ((!req) || (!req->trans) || (!req->svc) || (!recbuf))
+ return NULL;
+
+ svc = req->svc;
+
+ /* Fill the rpc structure and XDR it into the buffer got above. */
+ ret = rpcsvc_fill_reply (req, &reply);
+ if (ret)
+ goto err_exit;
+
+ xdr_size = xdr_sizeof ((xdrproc_t)xdr_replymsg, &reply);
+
+ /* Payload would include 'readv' size etc too, where as
+ that comes as another payload iobuf */
+ replyiob = iobuf_get2 (svc->ctx->iobuf_pool, (xdr_size + hdrlen));
+ if (!replyiob) {
+ goto err_exit;
+ }
+
+ pagesize = iobuf_pagesize (replyiob);
+
+ record = iobuf_ptr (replyiob); /* Now we have it. */
+
+ recordhdr = rpcsvc_record_build_header (record, pagesize, reply,
+ payload);
+ if (!recordhdr.iov_base) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to build record "
+ " header");
+ iobuf_unref (replyiob);
+ replyiob = NULL;
+ recbuf->iov_base = NULL;
+ goto err_exit;
+ }
+
+ recbuf->iov_base = recordhdr.iov_base;
+ recbuf->iov_len = recordhdr.iov_len;
+err_exit:
+ return replyiob;
+}
+
+
+/*
+ * The function to submit a program message to the RPC service.
+ * This message is added to the transmission queue of the
+ * conn.
+ *
+ * Program callers are not expected to use the msgvec->iov_base
+ * address for anything else.
+ * Nor are they expected to free it once this function returns.
+ * Once the transmission of the buffer is completed by the RPC service,
+ * the memory area as referenced through @msg will be unrefed.
+ * If a higher layer does not want anything to do with this iobuf
+ * after this function returns, it should call unref on it. For keeping
+ * it around till the transmission is actually complete, rpcsvc also refs it.
+ * *
+ * If this function returns an error by returning -1, the
+ * higher layer programs should assume that a disconnection happened
+ * and should know that the conn memory area as well as the req structure
+ * has been freed internally.
+ *
+ * For now, this function assumes that a submit is always called
+ * to send a new record. Later, if there is a situation where different
+ * buffers for the same record come from different sources, then we'll
+ * need to change this code to account for multiple submit calls adding
+ * the buffers into a single record.
+ */
+
+int
+rpcsvc_submit_generic (rpcsvc_request_t *req, struct iovec *proghdr,
+ int hdrcount, struct iovec *payload, int payloadcount,
+ struct iobref *iobref)
+{
+ int ret = -1, i = 0;
+ struct iobuf *replyiob = NULL;
+ struct iovec recordhdr = {0, };
+ rpc_transport_t *trans = NULL;
+ size_t msglen = 0;
+ size_t hdrlen = 0;
+ char new_iobref = 0;
+ rpcsvc_drc_globals_t *drc = NULL;
+
+ if ((!req) || (!req->trans))
+ return -1;
+
+ trans = req->trans;
+
+ for (i = 0; i < hdrcount; i++) {
+ msglen += proghdr[i].iov_len;
+ }
+
+ for (i = 0; i < payloadcount; i++) {
+ msglen += payload[i].iov_len;
+ }
+
+ gf_log (GF_RPCSVC, GF_LOG_TRACE, "Tx message: %zu", msglen);
+
+ /* Build the buffer containing the encoded RPC reply. */
+ replyiob = rpcsvc_record_build_record (req, msglen, hdrlen, &recordhdr);
+ if (!replyiob) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR,"Reply record creation failed");
+ goto disconnect_exit;
+ }
+
+ if (!iobref) {
+ iobref = iobref_new ();
+ if (!iobref) {
+ goto disconnect_exit;
+ }
+
+ new_iobref = 1;
+ }
+
+ iobref_add (iobref, replyiob);
+
+ /* cache the request in the duplicate request cache for appropriate ops */
+ if (req->reply) {
+ drc = req->svc->drc;
+
+ LOCK (&drc->lock);
+ ret = rpcsvc_cache_reply (req, iobref, &recordhdr, 1,
+ proghdr, hdrcount,
+ payload, payloadcount);
+ UNLOCK (&drc->lock);
+ }
+
+ ret = rpcsvc_transport_submit (trans, &recordhdr, 1, proghdr, hdrcount,
+ payload, payloadcount, iobref,
+ req->trans_private);
+
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "failed to submit message "
+ "(XID: 0x%x, Program: %s, ProgVers: %d, Proc: %d) to "
+ "rpc-transport (%s)", req->xid,
+ req->prog ? req->prog->progname : "(not matched)",
+ req->prog ? req->prog->progver : 0,
+ req->procnum, trans->name);
+ } else {
+ gf_log (GF_RPCSVC, GF_LOG_TRACE,
+ "submitted reply for rpc-message (XID: 0x%x, "
+ "Program: %s, ProgVers: %d, Proc: %d) to rpc-transport "
+ "(%s)", req->xid, req->prog ? req->prog->progname: "-",
+ req->prog ? req->prog->progver : 0,
+ req->procnum, trans->name);
+ }
+
+disconnect_exit:
+ if (replyiob) {
+ iobuf_unref (replyiob);
+ }
+
+ if (new_iobref) {
+ iobref_unref (iobref);
+ }
+
+ rpcsvc_request_destroy (req);
+
+ return ret;
+}
+
+
+int
+rpcsvc_error_reply (rpcsvc_request_t *req)
+{
+ struct iovec dummyvec = {0, };
+
+ if (!req)
+ return -1;
+
+ gf_log_callingfn ("", GF_LOG_DEBUG, "sending a RPC error reply");
+
+ /* At this point the req should already have been filled with the
+ * appropriate RPC error numbers.
+ */
+ return rpcsvc_submit_generic (req, &dummyvec, 0, NULL, 0, NULL);
+}
+
+
+/* Register the program with the local portmapper service. */
+inline int
+rpcsvc_program_register_portmap (rpcsvc_program_t *newprog, uint32_t port)
+{
+ int ret = -1; /* FAIL */
+
+ if (!newprog) {
+ goto out;
+ }
+
+ /* pmap_set() returns 0 for FAIL and 1 for SUCCESS */
+ if (!(pmap_set (newprog->prognum, newprog->progver, IPPROTO_TCP,
+ port))) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Could not register with"
+ " portmap");
+ goto out;
+ }
+
+ ret = 0; /* SUCCESS */
+out:
+ return ret;
+}
+
+
+inline int
+rpcsvc_program_unregister_portmap (rpcsvc_program_t *prog)
+{
+ int ret = -1;
+
+ if (!prog)
+ goto out;
+
+ if (!(pmap_unset(prog->prognum, prog->progver))) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Could not unregister with"
+ " portmap");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+rpcsvc_register_portmap_enabled (rpcsvc_t *svc)
+{
+ return svc->register_portmap;
+}
+
+int32_t
+rpcsvc_get_listener_port (rpcsvc_listener_t *listener)
+{
+ int32_t listener_port = -1;
+
+ if ((listener == NULL) || (listener->trans == NULL)) {
+ goto out;
+ }
+
+ switch (listener->trans->myinfo.sockaddr.ss_family) {
+ case AF_INET:
+ listener_port = ((struct sockaddr_in *)&listener->trans->myinfo.sockaddr)->sin_port;
+ break;
+
+ case AF_INET6:
+ listener_port = ((struct sockaddr_in6 *)&listener->trans->myinfo.sockaddr)->sin6_port;
+ break;
+
+ default:
+ gf_log (GF_RPCSVC, GF_LOG_DEBUG,
+ "invalid address family (%d)",
+ listener->trans->myinfo.sockaddr.ss_family);
+ goto out;
+ }
+
+ listener_port = ntohs (listener_port);
+
+out:
+ return listener_port;
+}
+
+
+rpcsvc_listener_t *
+rpcsvc_get_listener (rpcsvc_t *svc, uint16_t port, rpc_transport_t *trans)
+{
+ rpcsvc_listener_t *listener = NULL;
+ char found = 0;
+ uint32_t listener_port = 0;
+
+ if (!svc) {
+ goto out;
+ }
+
+ pthread_mutex_lock (&svc->rpclock);
+ {
+ list_for_each_entry (listener, &svc->listeners, list) {
+ if (trans != NULL) {
+ if (listener->trans == trans) {
+ found = 1;
+ break;
+ }
+
+ continue;
+ }
+
+ listener_port = rpcsvc_get_listener_port (listener);
+ if (listener_port == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR,
+ "invalid port for listener %s",
+ listener->trans->name);
+ continue;
+ }
+
+ if (listener_port == port) {
+ found = 1;
+ break;
+ }
+ }
+ }
+ pthread_mutex_unlock (&svc->rpclock);
+
+ if (!found) {
+ listener = NULL;
+ }
+
+out:
+ return listener;
+}
+
+
+/* The only difference between the generic submit and this one is that the
+ * generic submit is also used for submitting RPC error replies in where there
+ * are no payloads so the msgvec and msgbuf can be NULL.
+ * Since RPC programs should be using this function along with their payloads
+ * we must perform NULL checks before calling the generic submit.
+ */
+int
+rpcsvc_submit_message (rpcsvc_request_t *req, struct iovec *proghdr,
+ int hdrcount, struct iovec *payload, int payloadcount,
+ struct iobref *iobref)
+{
+ if ((!req) || (!req->trans) || (!proghdr) || (!proghdr->iov_base))
+ return -1;
+
+ return rpcsvc_submit_generic (req, proghdr, hdrcount, payload,
+ payloadcount, iobref);
+}
+
+
+int
+rpcsvc_program_unregister (rpcsvc_t *svc, rpcsvc_program_t *program)
+{
+ int ret = -1;
+ rpcsvc_program_t *prog = NULL;
+ if (!svc || !program) {
+ goto out;
+ }
+
+ ret = rpcsvc_program_unregister_portmap (program);
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "portmap unregistration of"
+ " program failed");
+ goto out;
+ }
+
+ pthread_mutex_lock (&svc->rpclock);
+ {
+ list_for_each_entry (prog, &svc->programs, program) {
+ if ((prog->prognum == program->prognum)
+ && (prog->progver == program->progver)) {
+ break;
+ }
+ }
+ }
+ pthread_mutex_unlock (&svc->rpclock);
+
+ if (prog == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ gf_log (GF_RPCSVC, GF_LOG_DEBUG, "Program unregistered: %s, Num: %d,"
+ " Ver: %d, Port: %d", prog->progname, prog->prognum,
+ prog->progver, prog->progport);
+
+ pthread_mutex_lock (&svc->rpclock);
+ {
+ list_del_init (&prog->program);
+ }
+ pthread_mutex_unlock (&svc->rpclock);
+
+ ret = 0;
+out:
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Program unregistration failed"
+ ": %s, Num: %d, Ver: %d, Port: %d", program->progname,
+ program->prognum, program->progver, program->progport);
+ }
+
+ return ret;
+}
+
+
+inline int
+rpcsvc_transport_peername (rpc_transport_t *trans, char *hostname, int hostlen)
+{
+ if (!trans) {
+ return -1;
+ }
+
+ return rpc_transport_get_peername (trans, hostname, hostlen);
+}
+
+
+inline int
+rpcsvc_transport_peeraddr (rpc_transport_t *trans, char *addrstr, int addrlen,
+ struct sockaddr_storage *sa, socklen_t sasize)
+{
+ if (!trans) {
+ return -1;
+ }
+
+ return rpc_transport_get_peeraddr(trans, addrstr, addrlen, sa,
+ sasize);
+}
+
+
+rpc_transport_t *
+rpcsvc_transport_create (rpcsvc_t *svc, dict_t *options, char *name)
+{
+ int ret = -1;
+ rpc_transport_t *trans = NULL;
+
+ trans = rpc_transport_load (svc->ctx, options, name);
+ if (!trans) {
+ gf_log (GF_RPCSVC, GF_LOG_WARNING, "cannot create listener, "
+ "initing the transport failed");
+ goto out;
+ }
+
+ ret = rpc_transport_listen (trans);
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_WARNING,
+ "listening on transport failed");
+ goto out;
+ }
+
+ ret = rpc_transport_register_notify (trans, rpcsvc_notify, svc);
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_WARNING, "registering notify failed");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if ((ret == -1) && (trans)) {
+ rpc_transport_disconnect (trans);
+ trans = NULL;
+ }
+
+ return trans;
+}
+
+rpcsvc_listener_t *
+rpcsvc_listener_alloc (rpcsvc_t *svc, rpc_transport_t *trans)
+{
+ rpcsvc_listener_t *listener = NULL;
+
+ listener = GF_CALLOC (1, sizeof (*listener),
+ gf_common_mt_rpcsvc_listener_t);
+ if (!listener) {
+ goto out;
+ }
+
+ listener->trans = trans;
+ listener->svc = svc;
+
+ INIT_LIST_HEAD (&listener->list);
+
+ pthread_mutex_lock (&svc->rpclock);
+ {
+ list_add_tail (&listener->list, &svc->listeners);
+ }
+ pthread_mutex_unlock (&svc->rpclock);
+out:
+ return listener;
+}
+
+
+int32_t
+rpcsvc_create_listener (rpcsvc_t *svc, dict_t *options, char *name)
+{
+ rpc_transport_t *trans = NULL;
+ rpcsvc_listener_t *listener = NULL;
+ int32_t ret = -1;
+
+ if (!svc || !options) {
+ goto out;
+ }
+
+ trans = rpcsvc_transport_create (svc, options, name);
+ if (!trans) {
+ /* LOG TODO */
+ goto out;
+ }
+
+ listener = rpcsvc_listener_alloc (svc, trans);
+ if (listener == NULL) {
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (!listener && trans) {
+ rpc_transport_disconnect (trans);
+ }
+
+ return ret;
+}
+
+
+int32_t
+rpcsvc_create_listeners (rpcsvc_t *svc, dict_t *options, char *name)
+{
+ int32_t ret = -1, count = 0;
+ data_t *data = NULL;
+ char *str = NULL, *ptr = NULL, *transport_name = NULL;
+ char *transport_type = NULL, *saveptr = NULL, *tmp = NULL;
+
+ if ((svc == NULL) || (options == NULL) || (name == NULL)) {
+ goto out;
+ }
+
+ data = dict_get (options, "transport-type");
+ if (data == NULL) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR,
+ "option transport-type not set");
+ goto out;
+ }
+
+ transport_type = data_to_str (data);
+ if (transport_type == NULL) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR,
+ "option transport-type not set");
+ goto out;
+ }
+
+ /* duplicate transport_type, since following dict_set will free it */
+ transport_type = gf_strdup (transport_type);
+ if (transport_type == NULL) {
+ goto out;
+ }
+
+ str = gf_strdup (transport_type);
+ if (str == NULL) {
+ goto out;
+ }
+
+ ptr = strtok_r (str, ",", &saveptr);
+
+ while (ptr != NULL) {
+ tmp = gf_strdup (ptr);
+ if (tmp == NULL) {
+ goto out;
+ }
+
+ ret = gf_asprintf (&transport_name, "%s.%s", tmp, name);
+ if (ret == -1) {
+ goto out;
+ }
+
+ ret = dict_set_dynstr (options, "transport-type", tmp);
+ if (ret == -1) {
+ goto out;
+ }
+
+ tmp = NULL;
+ ptr = strtok_r (NULL, ",", &saveptr);
+
+ ret = rpcsvc_create_listener (svc, options, transport_name);
+ if (ret != 0) {
+ goto out;
+ }
+
+ GF_FREE (transport_name);
+ transport_name = NULL;
+ count++;
+ }
+
+ ret = dict_set_dynstr (options, "transport-type", transport_type);
+ if (ret == -1) {
+ goto out;
+ }
+
+ transport_type = NULL;
+
+out:
+ GF_FREE (str);
+
+ GF_FREE (transport_type);
+
+ GF_FREE (tmp);
+
+ GF_FREE (transport_name);
+
+ return count;
+}
+
+
+int
+rpcsvc_unregister_notify (rpcsvc_t *svc, rpcsvc_notify_t notify, void *mydata)
+{
+ rpcsvc_notify_wrapper_t *wrapper = NULL, *tmp = NULL;
+ int ret = 0;
+
+ if (!svc || !notify) {
+ goto out;
+ }
+
+ pthread_mutex_lock (&svc->rpclock);
+ {
+ list_for_each_entry_safe (wrapper, tmp, &svc->notify, list) {
+ if ((wrapper->notify == notify)
+ && (mydata == wrapper->data)) {
+ list_del_init (&wrapper->list);
+ GF_FREE (wrapper);
+ ret++;
+ }
+ }
+ }
+ pthread_mutex_unlock (&svc->rpclock);
+
+out:
+ return ret;
+}
+
+int
+rpcsvc_register_notify (rpcsvc_t *svc, rpcsvc_notify_t notify, void *mydata)
+{
+ rpcsvc_notify_wrapper_t *wrapper = NULL;
+ int ret = -1;
+
+ wrapper = rpcsvc_notify_wrapper_alloc ();
+ if (!wrapper) {
+ goto out;
+ }
+ svc->mydata = mydata; /* this_xlator */
+ wrapper->data = mydata;
+ wrapper->notify = notify;
+
+ pthread_mutex_lock (&svc->rpclock);
+ {
+ list_add_tail (&wrapper->list, &svc->notify);
+ svc->notify_count++;
+ }
+ pthread_mutex_unlock (&svc->rpclock);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+inline int
+rpcsvc_program_register (rpcsvc_t *svc, rpcsvc_program_t *program)
+{
+ int ret = -1;
+ rpcsvc_program_t *newprog = NULL;
+ char already_registered = 0;
+
+ if (!svc) {
+ goto out;
+ }
+
+ if (program->actors == NULL) {
+ goto out;
+ }
+
+ pthread_mutex_lock (&svc->rpclock);
+ {
+ list_for_each_entry (newprog, &svc->programs, program) {
+ if ((newprog->prognum == program->prognum)
+ && (newprog->progver == program->progver)) {
+ already_registered = 1;
+ break;
+ }
+ }
+ }
+ pthread_mutex_unlock (&svc->rpclock);
+
+ if (already_registered) {
+ ret = 0;
+ goto out;
+ }
+
+ newprog = GF_CALLOC (1, sizeof(*newprog),gf_common_mt_rpcsvc_program_t);
+ if (newprog == NULL) {
+ goto out;
+ }
+
+ memcpy (newprog, program, sizeof (*program));
+
+ INIT_LIST_HEAD (&newprog->program);
+
+ pthread_mutex_lock (&svc->rpclock);
+ {
+ list_add_tail (&newprog->program, &svc->programs);
+ }
+ pthread_mutex_unlock (&svc->rpclock);
+
+ ret = 0;
+ gf_log (GF_RPCSVC, GF_LOG_DEBUG, "New program registered: %s, Num: %d,"
+ " Ver: %d, Port: %d", newprog->progname, newprog->prognum,
+ newprog->progver, newprog->progport);
+
+out:
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Program registration failed:"
+ " %s, Num: %d, Ver: %d, Port: %d", program->progname,
+ program->prognum, program->progver, program->progport);
+ }
+
+ return ret;
+}
+
+static void
+free_prog_details (gf_dump_rsp *rsp)
+{
+ gf_prog_detail *prev = NULL;
+ gf_prog_detail *trav = NULL;
+
+ trav = rsp->prog;
+ while (trav) {
+ prev = trav;
+ trav = trav->next;
+ GF_FREE (prev);
+ }
+}
+
+static int
+build_prog_details (rpcsvc_request_t *req, gf_dump_rsp *rsp)
+{
+ int ret = -1;
+ rpcsvc_program_t *program = NULL;
+ gf_prog_detail *prog = NULL;
+ gf_prog_detail *prev = NULL;
+
+ if (!req || !req->trans || !req->svc)
+ goto out;
+
+ list_for_each_entry (program, &req->svc->programs, program) {
+ prog = GF_CALLOC (1, sizeof (*prog), 0);
+ if (!prog)
+ goto out;
+ prog->progname = program->progname;
+ prog->prognum = program->prognum;
+ prog->progver = program->progver;
+ if (!rsp->prog)
+ rsp->prog = prog;
+ if (prev)
+ prev->next = prog;
+ prev = prog;
+ }
+ if (prev)
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+rpcsvc_dump (rpcsvc_request_t *req)
+{
+ char rsp_buf[8 * 1024] = {0,};
+ gf_dump_rsp rsp = {0,};
+ struct iovec iov = {0,};
+ int op_errno = EINVAL;
+ int ret = -1;
+ uint32_t dump_rsp_len = 0;
+
+ if (!req)
+ goto sendrsp;
+
+ ret = build_prog_details (req, &rsp);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto sendrsp;
+ }
+
+ op_errno = 0;
+
+sendrsp:
+ rsp.op_errno = gf_errno_to_error (op_errno);
+ rsp.op_ret = ret;
+
+ dump_rsp_len = xdr_sizeof ((xdrproc_t) xdr_gf_dump_rsp,
+ &rsp);
+
+ iov.iov_base = rsp_buf;
+ iov.iov_len = dump_rsp_len;
+
+ ret = xdr_serialize_generic (iov, &rsp, (xdrproc_t)xdr_gf_dump_rsp);
+ if (ret < 0) {
+ ret = RPCSVC_ACTOR_ERROR;
+ } else {
+ rpcsvc_submit_generic (req, &iov, 1, NULL, 0, NULL);
+ ret = 0;
+ }
+
+ free_prog_details (&rsp);
+
+ return ret;
+}
+
+int
+rpcsvc_init_options (rpcsvc_t *svc, dict_t *options)
+{
+ char *optstr = NULL;
+ int ret = -1;
+
+ if ((!svc) || (!options))
+ return -1;
+
+ svc->memfactor = RPCSVC_DEFAULT_MEMFACTOR;
+
+ svc->register_portmap = _gf_true;
+ if (dict_get (options, "rpc.register-with-portmap")) {
+ ret = dict_get_str (options, "rpc.register-with-portmap",
+ &optstr);
+ if (ret < 0) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to parse "
+ "dict");
+ goto out;
+ }
+
+ ret = gf_string2boolean (optstr, &svc->register_portmap);
+ if (ret < 0) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to parse bool "
+ "string");
+ goto out;
+ }
+ }
+
+ if (!svc->register_portmap)
+ gf_log (GF_RPCSVC, GF_LOG_DEBUG, "Portmap registration "
+ "disabled");
+
+ ret = rpcsvc_set_outstanding_rpc_limit (svc, options);
+out:
+ return ret;
+}
+
+int
+rpcsvc_reconfigure_options (rpcsvc_t *svc, dict_t *options)
+{
+ xlator_t *xlator = NULL;
+ xlator_list_t *volentry = NULL;
+ char *srchkey = NULL;
+ char *keyval = NULL;
+ int ret = -1;
+
+ if ((!svc) || (!svc->options) || (!options))
+ return (-1);
+
+ /* Fetch the xlator from svc */
+ xlator = (xlator_t *) svc->mydata;
+ if (!xlator)
+ return (-1);
+
+ /* Reconfigure the volume specific rpc-auth.addr allow part */
+ volentry = xlator->children;
+ while (volentry) {
+ ret = gf_asprintf (&srchkey, "rpc-auth.addr.%s.allow",
+ volentry->xlator->name);
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "asprintf failed");
+ return (-1);
+ }
+
+ /* If found the srchkey, delete old key/val pair
+ * and set the key with new value.
+ */
+ if (!dict_get_str (options, srchkey, &keyval)) {
+ dict_del (svc->options, srchkey);
+ ret = dict_set_str (svc->options, srchkey, keyval);
+ if (ret < 0) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR,
+ "dict_set_str error");
+ GF_FREE (srchkey);
+ return (-1);
+ }
+ }
+
+ GF_FREE (srchkey);
+ volentry = volentry->next;
+ }
+
+ /* Reconfigure the volume specific rpc-auth.addr reject part */
+ volentry = xlator->children;
+ while (volentry) {
+ ret = gf_asprintf (&srchkey, "rpc-auth.addr.%s.reject",
+ volentry->xlator->name);
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "asprintf failed");
+ return (-1);
+ }
+
+ /* If found the srchkey, delete old key/val pair
+ * and set the key with new value.
+ */
+ if (!dict_get_str (options, srchkey, &keyval)) {
+ dict_del (svc->options, srchkey);
+ ret = dict_set_str (svc->options, srchkey, keyval);
+ if (ret < 0) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR,
+ "dict_set_str error");
+ GF_FREE (srchkey);
+ return (-1);
+ }
+ }
+
+ GF_FREE (srchkey);
+ volentry = volentry->next;
+ }
+
+ ret = rpcsvc_init_options (svc, options);
+ if (ret)
+ return (-1);
+
+ return rpcsvc_auth_reconf (svc, options);
+}
+
+int
+rpcsvc_transport_unix_options_build (dict_t **options, char *filepath)
+{
+ dict_t *dict = NULL;
+ char *fpath = NULL;
+ int ret = -1;
+
+ GF_ASSERT (filepath);
+ GF_ASSERT (options);
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ fpath = gf_strdup (filepath);
+ if (!fpath) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr (dict, "transport.socket.listen-path", fpath);
+ if (ret)
+ goto out;
+
+ ret = dict_set_str (dict, "transport.address-family", "unix");
+ if (ret)
+ goto out;
+
+ ret = dict_set_str (dict, "transport.socket.nodelay", "off");
+ if (ret)
+ goto out;
+
+ ret = dict_set_str (dict, "transport-type", "socket");
+ if (ret)
+ goto out;
+
+ *options = dict;
+out:
+ if (ret) {
+ GF_FREE (fpath);
+ if (dict)
+ dict_unref (dict);
+ }
+ return ret;
+}
+
+/*
+ * Reconfigure() the rpc.outstanding-rpc-limit param.
+ */
+int
+rpcsvc_set_outstanding_rpc_limit (rpcsvc_t *svc, dict_t *options)
+{
+ int ret = -1; /* FAILURE */
+ int rpclim = 0;
+ static char *rpclimkey = "rpc.outstanding-rpc-limit";
+
+ if ((!svc) || (!options))
+ return (-1);
+
+ /* Reconfigure() the rpc.outstanding-rpc-limit param */
+ ret = dict_get_int32 (options, rpclimkey, &rpclim);
+ if (ret < 0) {
+ /* Fall back to default for FAILURE */
+ rpclim = RPCSVC_DEFAULT_OUTSTANDING_RPC_LIMIT;
+ } else {
+ /* SUCCESS: round off to multiple of 8.
+ * If the input value fails Boundary check, fall back to
+ * default i.e. RPCSVC_DEFAULT_OUTSTANDING_RPC_LIMIT.
+ * NB: value 0 is special, means its unset i.e. unlimited.
+ */
+ rpclim = ((rpclim + 8 - 1) >> 3) * 8;
+ if (rpclim < RPCSVC_MIN_OUTSTANDING_RPC_LIMIT) {
+ rpclim = RPCSVC_DEFAULT_OUTSTANDING_RPC_LIMIT;
+ } else if (rpclim > RPCSVC_MAX_OUTSTANDING_RPC_LIMIT) {
+ rpclim = RPCSVC_MAX_OUTSTANDING_RPC_LIMIT;
+ }
+ }
+
+ if (svc->outstanding_rpc_limit != rpclim) {
+ svc->outstanding_rpc_limit = rpclim;
+ gf_log (GF_RPCSVC, GF_LOG_INFO,
+ "Configured %s with value %d",
+ rpclimkey, rpclim);
+ }
+
+ return (0);
+}
+
+/* The global RPC service initializer.
+ */
+rpcsvc_t *
+rpcsvc_init (xlator_t *xl, glusterfs_ctx_t *ctx, dict_t *options,
+ uint32_t poolcount)
+{
+ rpcsvc_t *svc = NULL;
+ int ret = -1;
+
+ if ((!ctx) || (!options))
+ return NULL;
+
+ svc = GF_CALLOC (1, sizeof (*svc), gf_common_mt_rpcsvc_t);
+ if (!svc)
+ return NULL;
+
+ pthread_mutex_init (&svc->rpclock, NULL);
+ INIT_LIST_HEAD (&svc->authschemes);
+ INIT_LIST_HEAD (&svc->notify);
+ INIT_LIST_HEAD (&svc->listeners);
+ INIT_LIST_HEAD (&svc->programs);
+
+ ret = rpcsvc_init_options (svc, options);
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to init options");
+ goto free_svc;
+ }
+
+ if (!poolcount)
+ poolcount = RPCSVC_POOLCOUNT_MULT * svc->memfactor;
+
+ gf_log (GF_RPCSVC, GF_LOG_TRACE, "rx pool: %d", poolcount);
+ svc->rxpool = mem_pool_new (rpcsvc_request_t, poolcount);
+ /* TODO: leak */
+ if (!svc->rxpool) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "mem pool allocation failed");
+ goto free_svc;
+ }
+
+ ret = rpcsvc_auth_init (svc, options);
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to init "
+ "authentication");
+ goto free_svc;
+ }
+
+ ret = -1;
+ svc->options = options;
+ svc->ctx = ctx;
+ svc->mydata = xl;
+ gf_log (GF_RPCSVC, GF_LOG_DEBUG, "RPC service inited.");
+
+ gluster_dump_prog.options = options;
+
+ ret = rpcsvc_program_register (svc, &gluster_dump_prog);
+ if (ret) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR,
+ "failed to register DUMP program");
+ goto free_svc;
+ }
+
+ ret = 0;
+free_svc:
+ if (ret == -1) {
+ GF_FREE (svc);
+ svc = NULL;
+ }
+
+ return svc;
+}
+
+
+int
+rpcsvc_transport_peer_check_search (dict_t *options, char *pattern,
+ char *ip, char *hostname)
+{
+ int ret = -1;
+ char *addrtok = NULL;
+ char *addrstr = NULL;
+ char *dup_addrstr = NULL;
+ char *svptr = NULL;
+
+ if ((!options) || (!ip))
+ return -1;
+
+ ret = dict_get_str (options, pattern, &addrstr);
+ if (ret < 0) {
+ ret = -1;
+ goto err;
+ }
+
+ if (!addrstr) {
+ ret = -1;
+ goto err;
+ }
+
+ dup_addrstr = gf_strdup (addrstr);
+ addrtok = strtok_r (dup_addrstr, ",", &svptr);
+ while (addrtok) {
+
+ /* CASEFOLD not present on Solaris */
+#ifdef FNM_CASEFOLD
+ ret = fnmatch (addrtok, ip, FNM_CASEFOLD);
+#else
+ ret = fnmatch (addrtok, ip, 0);
+#endif
+ if (ret == 0)
+ goto err;
+
+ /* compare hostnames if applicable */
+ if (hostname) {
+#ifdef FNM_CASEFOLD
+ ret = fnmatch (addrtok, hostname, FNM_CASEFOLD);
+#else
+ ret = fnmatch (addrtok, hostname, 0);
+#endif
+ if (ret == 0)
+ goto err;
+ }
+
+ addrtok = strtok_r (NULL, ",", &svptr);
+ }
+
+ ret = -1;
+err:
+ GF_FREE (dup_addrstr);
+
+ return ret;
+}
+
+
+static int
+rpcsvc_transport_peer_check_allow (dict_t *options, char *volname,
+ char *ip, char *hostname)
+{
+ int ret = RPCSVC_AUTH_DONTCARE;
+ char *srchstr = NULL;
+
+ if ((!options) || (!ip) || (!volname))
+ return ret;
+
+ ret = gf_asprintf (&srchstr, "rpc-auth.addr.%s.allow", volname);
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "asprintf failed");
+ ret = RPCSVC_AUTH_DONTCARE;
+ goto out;
+ }
+
+ ret = rpcsvc_transport_peer_check_search (options, srchstr,
+ ip, hostname);
+ GF_FREE (srchstr);
+
+ if (ret == 0)
+ ret = RPCSVC_AUTH_ACCEPT;
+ else
+ ret = RPCSVC_AUTH_REJECT;
+out:
+ return ret;
+}
+
+static int
+rpcsvc_transport_peer_check_reject (dict_t *options, char *volname,
+ char *ip, char *hostname)
+{
+ int ret = RPCSVC_AUTH_DONTCARE;
+ char *srchstr = NULL;
+
+ if ((!options) || (!ip) || (!volname))
+ return ret;
+
+ ret = gf_asprintf (&srchstr, "rpc-auth.addr.%s.reject",
+ volname);
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "asprintf failed");
+ ret = RPCSVC_AUTH_REJECT;
+ goto out;
+ }
+
+ ret = rpcsvc_transport_peer_check_search (options, srchstr,
+ ip, hostname);
+ GF_FREE (srchstr);
+
+ if (ret == 0)
+ ret = RPCSVC_AUTH_REJECT;
+ else
+ ret = RPCSVC_AUTH_DONTCARE;
+out:
+ return ret;
+}
+
+
+/* Combines rpc auth's allow and reject options.
+ * Order of checks is important.
+ * First, REJECT if either rejects.
+ * If neither rejects, ACCEPT if either accepts.
+ * If neither accepts, DONTCARE
+ */
+int
+rpcsvc_combine_allow_reject_volume_check (int allow, int reject)
+{
+ if (allow == RPCSVC_AUTH_REJECT ||
+ reject == RPCSVC_AUTH_REJECT)
+ return RPCSVC_AUTH_REJECT;
+
+ if (allow == RPCSVC_AUTH_ACCEPT ||
+ reject == RPCSVC_AUTH_ACCEPT)
+ return RPCSVC_AUTH_ACCEPT;
+
+ return RPCSVC_AUTH_DONTCARE;
+}
+
+int
+rpcsvc_auth_check (rpcsvc_t *svc, char *volname,
+ rpc_transport_t *trans)
+{
+ int ret = RPCSVC_AUTH_REJECT;
+ int accept = RPCSVC_AUTH_REJECT;
+ int reject = RPCSVC_AUTH_REJECT;
+ char *hostname = NULL;
+ char *ip = NULL;
+ char client_ip[RPCSVC_PEER_STRLEN] = {0};
+ char *allow_str = NULL;
+ char *reject_str = NULL;
+ char *srchstr = NULL;
+ dict_t *options = NULL;
+
+ if (!svc || !volname || !trans)
+ return ret;
+
+ /* Fetch the options from svc struct and validate */
+ options = svc->options;
+ if (!options)
+ return ret;
+
+ ret = rpcsvc_transport_peername (trans, client_ip, RPCSVC_PEER_STRLEN);
+ if (ret != 0) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to get remote addr: "
+ "%s", gai_strerror (ret));
+ return RPCSVC_AUTH_REJECT;
+ }
+
+ /* Accept if its the default case: Allow all, Reject none
+ * The default volfile always contains a 'allow *' rule
+ * for each volume. If allow rule is missing (which implies
+ * there is some bad volfile generating code doing this), we
+ * assume no one is allowed mounts, and thus, we reject mounts.
+ */
+ ret = gf_asprintf (&srchstr, "rpc-auth.addr.%s.allow", volname);
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "asprintf failed");
+ return RPCSVC_AUTH_REJECT;
+ }
+
+ ret = dict_get_str (options, srchstr, &allow_str);
+ GF_FREE (srchstr);
+ if (ret < 0)
+ return RPCSVC_AUTH_REJECT;
+
+ ret = gf_asprintf (&srchstr, "rpc-auth.addr.%s.reject", volname);
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "asprintf failed");
+ return RPCSVC_AUTH_REJECT;
+ }
+
+ ret = dict_get_str (options, srchstr, &reject_str);
+ GF_FREE (srchstr);
+ if (reject_str == NULL && !strcmp ("*", allow_str))
+ return RPCSVC_AUTH_ACCEPT;
+
+ /* Non-default rule, authenticate */
+ if (!get_host_name (client_ip, &ip))
+ ip = client_ip;
+
+ /* addr-namelookup check */
+ if (svc->addr_namelookup == _gf_true) {
+ ret = gf_get_hostname_from_ip (ip, &hostname);
+ if (ret) {
+ if (hostname)
+ GF_FREE (hostname);
+ /* failed to get hostname, but hostname auth
+ * is enabled, so authentication will not be
+ * 100% correct. reject mounts
+ */
+ return RPCSVC_AUTH_REJECT;
+ }
+ }
+
+ accept = rpcsvc_transport_peer_check_allow (options, volname,
+ ip, hostname);
+
+ reject = rpcsvc_transport_peer_check_reject (options, volname,
+ ip, hostname);
+
+ if (hostname)
+ GF_FREE (hostname);
+ return rpcsvc_combine_allow_reject_volume_check (accept, reject);
+}
+
+int
+rpcsvc_transport_privport_check (rpcsvc_t *svc, char *volname,
+ rpc_transport_t *trans)
+{
+ union gf_sock_union sock_union;
+ int ret = RPCSVC_AUTH_REJECT;
+ socklen_t sinsize = sizeof (&sock_union.sin);
+ char *srchstr = NULL;
+ char *valstr = NULL;
+ uint16_t port = 0;
+ gf_boolean_t insecure = _gf_false;
+
+ memset (&sock_union, 0, sizeof (sock_union));
+
+ if ((!svc) || (!volname) || (!trans))
+ return ret;
+
+ ret = rpcsvc_transport_peeraddr (trans, NULL, 0, &sock_union.storage,
+ sinsize);
+ if (ret != 0) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to get peer addr: %s",
+ gai_strerror (ret));
+ ret = RPCSVC_AUTH_REJECT;
+ goto err;
+ }
+
+ port = ntohs (sock_union.sin.sin_port);
+ gf_log (GF_RPCSVC, GF_LOG_TRACE, "Client port: %d", (int)port);
+ /* If the port is already a privileged one, dont bother with checking
+ * options.
+ */
+ if (port <= 1024) {
+ ret = RPCSVC_AUTH_ACCEPT;
+ goto err;
+ }
+
+ /* Disabled by default */
+ ret = gf_asprintf (&srchstr, "rpc-auth.ports.%s.insecure", volname);
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "asprintf failed");
+ ret = RPCSVC_AUTH_REJECT;
+ goto err;
+ }
+
+ ret = dict_get_str (svc->options, srchstr, &valstr);
+ if (ret) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to"
+ " read rpc-auth.ports.insecure value");
+ goto err;
+ }
+
+ ret = gf_string2boolean (valstr, &insecure);
+ if (ret) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to"
+ " convert rpc-auth.ports.insecure value");
+ goto err;
+ }
+
+ ret = insecure ? RPCSVC_AUTH_ACCEPT : RPCSVC_AUTH_REJECT;
+
+ if (ret == RPCSVC_AUTH_ACCEPT)
+ gf_log (GF_RPCSVC, GF_LOG_DEBUG, "Unprivileged port allowed");
+ else
+ gf_log (GF_RPCSVC, GF_LOG_DEBUG, "Unprivileged port not"
+ " allowed");
+
+err:
+ if (srchstr)
+ GF_FREE (srchstr);
+
+ return ret;
+}
+
+
+char *
+rpcsvc_volume_allowed (dict_t *options, char *volname)
+{
+ char globalrule[] = "rpc-auth.addr.allow";
+ char *srchstr = NULL;
+ char *addrstr = NULL;
+ int ret = -1;
+
+ if ((!options) || (!volname))
+ return NULL;
+
+ ret = gf_asprintf (&srchstr, "rpc-auth.addr.%s.allow", volname);
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "asprintf failed");
+ goto out;
+ }
+
+ if (!dict_get (options, srchstr))
+ ret = dict_get_str (options, globalrule, &addrstr);
+ else
+ ret = dict_get_str (options, srchstr, &addrstr);
+
+out:
+ GF_FREE (srchstr);
+
+ return addrstr;
+}
+
+
+rpcsvc_actor_t gluster_dump_actors[] = {
+ [GF_DUMP_NULL] = {"NULL", GF_DUMP_NULL, NULL, NULL, 0, DRC_NA},
+ [GF_DUMP_DUMP] = {"DUMP", GF_DUMP_DUMP, rpcsvc_dump, NULL, 0, DRC_NA},
+ [GF_DUMP_MAXVALUE] = {"MAXVALUE", GF_DUMP_MAXVALUE, NULL, NULL, 0, DRC_NA},
+};
+
+
+struct rpcsvc_program gluster_dump_prog = {
+ .progname = "GF-DUMP",
+ .prognum = GLUSTER_DUMP_PROGRAM,
+ .progver = GLUSTER_DUMP_VERSION,
+ .actors = gluster_dump_actors,
+ .numactors = 2,
+};
diff --git a/rpc/rpc-lib/src/rpcsvc.h b/rpc/rpc-lib/src/rpcsvc.h
new file mode 100644
index 000000000..cbc1f4226
--- /dev/null
+++ b/rpc/rpc-lib/src/rpcsvc.h
@@ -0,0 +1,606 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _RPCSVC_H
+#define _RPCSVC_H
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "event.h"
+#include "rpc-transport.h"
+#include "logging.h"
+#include "dict.h"
+#include "mem-pool.h"
+#include "list.h"
+#include "iobuf.h"
+#include "xdr-rpc.h"
+#include "glusterfs.h"
+#include "xlator.h"
+#include "rpcsvc-common.h"
+
+#include <pthread.h>
+#include <sys/uio.h>
+#include <inttypes.h>
+#include <rpc/rpc_msg.h>
+#include "compat.h"
+
+#ifndef MAX_IOVEC
+#define MAX_IOVEC 16
+#endif
+
+#define RPCSVC_DEFAULT_OUTSTANDING_RPC_LIMIT 64
+#define RPCSVC_MAX_OUTSTANDING_RPC_LIMIT 65536
+#define RPCSVC_MIN_OUTSTANDING_RPC_LIMIT 0 /* No limit i.e. Unlimited */
+
+#define GF_RPCSVC "rpc-service"
+#define RPCSVC_THREAD_STACK_SIZE ((size_t)(1024 * GF_UNIT_KB))
+
+#define RPCSVC_FRAGHDR_SIZE 4 /* 4-byte RPC fragment header size */
+#define RPCSVC_DEFAULT_LISTEN_PORT GF_DEFAULT_BASE_PORT
+#define RPCSVC_DEFAULT_MEMFACTOR 8
+#define RPCSVC_EVENTPOOL_SIZE_MULT 1024
+#define RPCSVC_POOLCOUNT_MULT 64
+#define RPCSVC_CONN_READ (128 * GF_UNIT_KB)
+#define RPCSVC_PAGE_SIZE (128 * GF_UNIT_KB)
+#define RPC_ROOT_UID 0
+#define RPC_ROOT_GID 0
+#define RPC_NOBODY_UID 65534
+#define RPC_NOBODY_GID 65534
+
+/* RPC Record States */
+#define RPCSVC_READ_FRAGHDR 1
+#define RPCSVC_READ_FRAG 2
+/* The size in bytes, if crossed by a fragment will be handed over to the
+ * vectored actor so that it can allocate its buffers the way it wants.
+ * In our RPC layer, we assume that vectored RPC requests/records are never
+ * spread over multiple RPC fragments since that prevents us from determining
+ * whether the record should be handled in RPC layer completely or handed to
+ * the vectored handler.
+ */
+#define RPCSVC_VECTORED_FRAGSZ 4096
+#define RPCSVC_VECTOR_READCRED 1003
+#define RPCSVC_VECTOR_READVERFSZ 1004
+#define RPCSVC_VECTOR_READVERF 1005
+#define RPCSVC_VECTOR_IGNORE 1006
+#define RPCSVC_VECTOR_READVEC 1007
+#define RPCSVC_VECTOR_READPROCHDR 1008
+
+#define rpcsvc_record_vectored_baremsg(rs) (((rs)->state == RPCSVC_READ_FRAG) && (rs)->vecstate == 0)
+#define rpcsvc_record_vectored_cred(rs) ((rs)->vecstate == RPCSVC_VECTOR_READCRED)
+#define rpcsvc_record_vectored_verfsz(rs) ((rs)->vecstate == RPCSVC_VECTOR_READVERFSZ)
+#define rpcsvc_record_vectored_verfread(rs) ((rs)->vecstate == RPCSVC_VECTOR_READVERF)
+#define rpcsvc_record_vectored_ignore(rs) ((rs)->vecstate == RPCSVC_VECTOR_IGNORE)
+#define rpcsvc_record_vectored_readvec(rs) ((rs)->vecstate == RPCSVC_VECTOR_READVEC)
+#define rpcsvc_record_vectored_readprochdr(rs) ((rs)->vecstate == RPCSVC_VECTOR_READPROCHDR)
+#define rpcsvc_record_vectored(rs) ((rs)->fragsize > RPCSVC_VECTORED_FRAGSZ)
+/* Includes bytes up to and including the credential length field. The credlen
+ * will be followed by @credlen bytes of credential data which will have to be
+ * read separately by the vectored reader. After the credentials comes the
+ * verifier which will also have to be read separately including the 8 bytes of
+ * verf flavour and verflen.
+ */
+#define RPCSVC_BARERPC_MSGSZ 32
+#define rpcsvc_record_readfraghdr(rs) ((rs)->state == RPCSVC_READ_FRAGHDR)
+#define rpcsvc_record_readfrag(rs) ((rs)->state == RPCSVC_READ_FRAG)
+
+#define RPCSVC_LOWVERS 2
+#define RPCSVC_HIGHVERS 2
+
+
+#if 0
+#error "defined in /usr/include/rpc/auth.h"
+
+#define AUTH_NONE 0 /* no authentication */
+#define AUTH_NULL 0 /* backward compatibility */
+#define AUTH_SYS 1 /* unix style (uid, gids) */
+#define AUTH_UNIX AUTH_SYS
+#define AUTH_SHORT 2 /* short hand unix style */
+#define AUTH_DES 3 /* des style (encrypted timestamps) */
+#define AUTH_DH AUTH_DES /* Diffie-Hellman (this is DES) */
+#define AUTH_KERB 4 /* kerberos style */
+#endif /* */
+
+typedef struct rpcsvc_program rpcsvc_program_t;
+
+struct rpcsvc_notify_wrapper {
+ struct list_head list;
+ void *data;
+ rpcsvc_notify_t notify;
+};
+typedef struct rpcsvc_notify_wrapper rpcsvc_notify_wrapper_t;
+
+
+typedef struct rpcsvc_request rpcsvc_request_t;
+
+typedef struct {
+ rpc_transport_t *trans;
+ rpcsvc_t *svc;
+ /* FIXME: remove address from this structure. Instead use get_myaddr
+ * interface implemented by individual transports.
+ */
+ struct sockaddr_storage sa;
+ struct list_head list;
+} rpcsvc_listener_t;
+
+struct rpcsvc_config {
+ int max_block_size;
+};
+
+typedef struct rpcsvc_auth_data {
+ int flavour;
+ int datalen;
+ char authdata[GF_MAX_AUTH_BYTES];
+} rpcsvc_auth_data_t;
+
+#define rpcsvc_auth_flavour(au) ((au).flavour)
+
+typedef struct drc_client drc_client_t;
+typedef struct drc_cached_op drc_cached_op_t;
+
+/* The container for the RPC call handed up to an actor.
+ * Dynamically allocated. Lives till the call reply is completely
+ * transmitted.
+ * */
+struct rpcsvc_request {
+ /* connection over which this request came. */
+ rpc_transport_t *trans;
+
+ rpcsvc_t *svc;
+
+ rpcsvc_program_t *prog;
+
+ /* The identifier for the call from client.
+ * Needed to pair the reply with the call.
+ */
+ uint32_t xid;
+
+ int prognum;
+
+ int progver;
+
+ int procnum;
+
+ int type;
+
+ /* Uid and gid filled by the rpc-auth module during the authentication
+ * phase.
+ */
+ uid_t uid;
+ gid_t gid;
+ pid_t pid;
+
+ gf_lkowner_t lk_owner;
+ uint64_t gfs_id;
+
+ /* Might want to move this to AUTH_UNIX specific state since this array
+ * is not available for every authentication scheme.
+ */
+ gid_t *auxgids;
+ gid_t auxgidsmall[SMALL_GROUP_COUNT];
+ gid_t *auxgidlarge;
+ int auxgidcount;
+
+
+ /* The RPC message payload, contains the data required
+ * by the program actors. This is the buffer that will need to
+ * be de-xdred by the actor.
+ */
+ struct iovec msg[MAX_IOVEC];
+ int count;
+
+ struct iobref *iobref;
+
+ /* Status of the RPC call, whether it was accepted or denied. */
+ int rpc_status;
+
+ /* In case, the call was denied, the RPC error is stored here
+ * till the reply is sent.
+ */
+ int rpc_err;
+
+ /* In case the failure happened because of an authentication problem
+ * , this value needs to be assigned the correct auth error number.
+ */
+ int auth_err;
+
+ /* There can be cases of RPC requests where the reply needs to
+ * be built from multiple sources. E.g. where even the NFS reply
+ * can contain a payload, as in the NFSv3 read reply. Here the RPC header
+ * ,NFS header and the read data are brought together separately from
+ * different buffers, so we need to stage the buffers temporarily here
+ * before all of them get added to the connection's transmission list.
+ */
+ struct list_head txlist;
+
+ /* While the reply record is being built, this variable keeps track
+ * of how many bytes have been added to the record.
+ */
+ size_t payloadsize;
+
+ /* The credentials extracted from the rpc request */
+ rpcsvc_auth_data_t cred;
+
+ /* The verified extracted from the rpc request. In request side
+ * processing this contains the verifier sent by the client, on reply
+ * side processing, it is filled with the verified that will be
+ * sent to the client.
+ */
+ rpcsvc_auth_data_t verf;
+
+ /* Execute this request's actor function as a synctask? */
+ gf_boolean_t synctask;
+
+ /* Container for a RPC program wanting to store a temp
+ * request-specific item.
+ */
+ void *private;
+
+ /* Container for transport to store request-specific item */
+ void *trans_private;
+
+ /* we need to ref the 'iobuf' in case of 'synctasking' it */
+ struct iobuf *hdr_iobuf;
+
+ /* pointer to cached reply for use in DRC */
+ drc_cached_op_t *reply;
+};
+
+#define rpcsvc_request_program(req) ((rpcsvc_program_t *)((req)->prog))
+#define rpcsvc_request_procnum(req) (((req)->procnum))
+#define rpcsvc_request_program_private(req) (((rpcsvc_program_t *)((req)->prog))->private)
+#define rpcsvc_request_accepted(req) ((req)->rpc_status == MSG_ACCEPTED)
+#define rpcsvc_request_accepted_success(req) ((req)->rpc_err == SUCCESS)
+#define rpcsvc_request_prog_minauth(req) (rpcsvc_request_program(req)->min_auth)
+#define rpcsvc_request_cred_flavour(req) (rpcsvc_auth_flavour(req->cred))
+#define rpcsvc_request_verf_flavour(req) (rpcsvc_auth_flavour(req->verf))
+#define rpcsvc_request_service(req) ((req)->svc)
+#define rpcsvc_request_uid(req) ((req)->uid)
+#define rpcsvc_request_gid(req) ((req)->gid)
+#define rpcsvc_request_private(req) ((req)->private)
+#define rpcsvc_request_xid(req) ((req)->xid)
+#define rpcsvc_request_set_private(req,prv) (req)->private = (void *)(prv)
+#define rpcsvc_request_iobref_ref(req) (iobref_ref ((req)->iobref))
+#define rpcsvc_request_record_ref(req) (iobuf_ref ((req)->recordiob))
+#define rpcsvc_request_record_unref(req) (iobuf_unref ((req)->recordiob))
+#define rpcsvc_request_record_iob(req) ((req)->recordiob)
+#define rpcsvc_request_set_vecstate(req, state) ((req)->vecstate = state)
+#define rpcsvc_request_vecstate(req) ((req)->vecstate)
+#define rpcsvc_request_transport(req) ((req)->trans)
+#define rpcsvc_request_transport_ref(req) (rpc_transport_ref((req)->trans))
+#define RPC_AUTH_ROOT_SQUASH(req) \
+ do { \
+ int gidcount = 0; \
+ if (req->svc->root_squash) { \
+ if (req->uid == RPC_ROOT_UID) \
+ req->uid = RPC_NOBODY_UID; \
+ if (req->gid == RPC_ROOT_GID) \
+ req->gid = RPC_NOBODY_GID; \
+ for (gidcount = 0; gidcount < req->auxgidcount; \
+ ++gidcount) { \
+ if (!req->auxgids[gidcount]) \
+ req->auxgids[gidcount] = \
+ RPC_NOBODY_GID; \
+ } \
+ } \
+ } while (0);
+
+#define RPCSVC_ACTOR_SUCCESS 0
+#define RPCSVC_ACTOR_ERROR (-1)
+#define RPCSVC_ACTOR_IGNORE (-2)
+
+/* Functor for every type of protocol actor
+ * must be defined like this.
+ *
+ * See the request structure for info on how to handle the request
+ * in the program actor.
+ *
+ * On successful santify checks inside the actor, it should return
+ * RPCSVC_ACTOR_SUCCESS.
+ * On an error, on which the RPC layer is expected to return a reply, the actor
+ * should return RPCSVC_ACTOR_ERROR.
+ *
+ */
+typedef int (*rpcsvc_actor) (rpcsvc_request_t *req);
+typedef int (*rpcsvc_vector_sizer) (int state, ssize_t *readsize,
+ char *base_addr, char *curr_addr);
+
+/* Every protocol actor will also need to specify the function the RPC layer
+ * will use to serialize or encode the message into XDR format just before
+ * transmitting on the connection.
+ */
+typedef void *(*rpcsvc_encode_reply) (void *msg);
+
+/* Once the reply has been transmitted, the message will have to be de-allocated
+ * , so every actor will need to provide a function that deallocates the message
+ * it had allocated as a response.
+ */
+typedef void (*rpcsvc_deallocate_reply) (void *msg);
+
+#define RPCSVC_NAME_MAX 32
+/* The descriptor for each procedure/actor that runs
+ * over the RPC service.
+ */
+typedef struct rpcsvc_actor_desc {
+ char procname[RPCSVC_NAME_MAX];
+ int procnum;
+ rpcsvc_actor actor;
+
+ /* Handler for cases where the RPC requests fragments are large enough
+ * to benefit from being decoded into aligned memory addresses. While
+ * decoding the request in a non-vectored manner, due to the nature of
+ * the XDR scheme, RPC cannot guarantee memory aligned addresses for
+ * the resulting message-specific structures. Allowing a specialized
+ * handler for letting the RPC program read the data from the network
+ * directly into its aligned buffers.
+ */
+ rpcsvc_vector_sizer vector_sizer;
+
+ /* Can actor be ran on behalf an unprivileged requestor? */
+ gf_boolean_t unprivileged;
+ drc_op_type_t op_type;
+} rpcsvc_actor_t;
+
+/* Describes a program and its version along with the function pointers
+ * required to handle the procedures/actors of each program/version.
+ * Never changed ever by any thread so no need for a lock.
+ */
+struct rpcsvc_program {
+ char progname[RPCSVC_NAME_MAX];
+ int prognum;
+ int progver;
+ /* FIXME */
+ dict_t *options; /* An opaque dictionary
+ * populated by the program
+ * (probably from xl->options)
+ * which contain enough
+ * information for transport to
+ * initialize. As a part of
+ * cleanup, the members of
+ * options which are of interest
+ * to transport should be put
+ * into a structure for better
+ * readability and structure
+ * should replace options member
+ * here.
+ */
+ uint16_t progport; /* Registered with portmap */
+#if 0
+ int progaddrfamily; /* AF_INET or AF_INET6 */
+ char *proghost; /* Bind host, can be NULL */
+#endif
+ rpcsvc_actor_t *actors; /* All procedure handlers */
+ int numactors; /* Num actors in actor array */
+ int proghighvers; /* Highest ver for program
+ supported by the system. */
+ int proglowvers; /* Lowest ver */
+
+ /* Program specific state handed to actors */
+ void *private;
+
+
+ /* This upcall is provided by the program during registration.
+ * It is used to notify the program about events like connection being
+ * destroyed etc. The rpc program may take appropriate actions, for eg.,
+ * in the case of connection being destroyed, it should cleanup its
+ * state stored in the connection.
+ */
+ rpcsvc_notify_t notify;
+
+ /* An integer that identifies the min auth strength that is required
+ * by this protocol, for eg. MOUNT3 needs AUTH_UNIX at least.
+ * See RFC 1813, Section 5.2.1.
+ */
+ int min_auth;
+
+ /* Execute actor function as a synctask? */
+ gf_boolean_t synctask;
+
+ /* list member to link to list of registered services with rpcsvc */
+ struct list_head program;
+};
+
+typedef struct rpcsvc_cbk_program {
+ char *progname;
+ int prognum;
+ int progver;
+} rpcsvc_cbk_program_t;
+/* All users of RPC services should use this API to register their
+ * procedure handlers.
+ */
+extern int
+rpcsvc_program_register (rpcsvc_t *svc, rpcsvc_program_t *program);
+
+extern int
+rpcsvc_program_unregister (rpcsvc_t *svc, rpcsvc_program_t *program);
+
+/* This will create and add a listener to listener pool. Programs can
+ * use any of the listener in this pool. A single listener can be used by
+ * multiple programs and vice versa. There can also be a one to one mapping
+ * between a program and a listener. After registering a program with rpcsvc,
+ * the program has to be associated with a listener using
+ * rpcsvc_program_register_portmap.
+ */
+/* FIXME: can multiple programs registered on same port? */
+extern int32_t
+rpcsvc_create_listeners (rpcsvc_t *svc, dict_t *options, char *name);
+
+void
+rpcsvc_listener_destroy (rpcsvc_listener_t *listener);
+
+extern int
+rpcsvc_program_register_portmap (rpcsvc_program_t *newprog, uint32_t port);
+
+extern int
+rpcsvc_program_unregister_portmap (rpcsvc_program_t *newprog);
+
+extern int
+rpcsvc_register_portmap_enabled (rpcsvc_t *svc);
+
+/* Inits the global RPC service data structures.
+ * Called in main.
+ */
+extern rpcsvc_t *
+rpcsvc_init (xlator_t *xl, glusterfs_ctx_t *ctx, dict_t *options,
+ uint32_t poolcount);
+
+extern int
+rpcsvc_reconfigure_options (rpcsvc_t *svc, dict_t *options);
+
+int
+rpcsvc_register_notify (rpcsvc_t *svc, rpcsvc_notify_t notify, void *mydata);
+
+/* unregister a notification callback @notify with data @mydata from svc.
+ * returns the number of notification callbacks unregistered.
+ */
+int
+rpcsvc_unregister_notify (rpcsvc_t *svc, rpcsvc_notify_t notify, void *mydata);
+
+int
+rpcsvc_transport_submit (rpc_transport_t *trans, struct iovec *rpchdr,
+ int rpchdrcount, struct iovec *proghdr,
+ int proghdrcount, struct iovec *progpayload,
+ int progpayloadcount, struct iobref *iobref,
+ void *priv);
+
+int
+rpcsvc_submit_message (rpcsvc_request_t *req, struct iovec *proghdr,
+ int hdrcount, struct iovec *payload, int payloadcount,
+ struct iobref *iobref);
+
+int
+rpcsvc_submit_generic (rpcsvc_request_t *req, struct iovec *proghdr,
+ int hdrcount, struct iovec *payload, int payloadcount,
+ struct iobref *iobref);
+
+extern int
+rpcsvc_error_reply (rpcsvc_request_t *req);
+
+#define RPCSVC_PEER_STRLEN 1024
+#define RPCSVC_AUTH_ACCEPT 1
+#define RPCSVC_AUTH_REJECT 2
+#define RPCSVC_AUTH_DONTCARE 3
+
+extern int
+rpcsvc_transport_peername (rpc_transport_t *trans, char *hostname, int hostlen);
+
+extern int
+rpcsvc_transport_peeraddr (rpc_transport_t *trans, char *addrstr, int addrlen,
+ struct sockaddr_storage *returnsa, socklen_t sasize);
+
+extern int
+rpcsvc_auth_check (rpcsvc_t *svc, char *volname, rpc_transport_t *trans);
+
+extern int
+rpcsvc_transport_privport_check (rpcsvc_t *svc, char *volname,
+ rpc_transport_t *trans);
+
+#define rpcsvc_request_seterr(req, err) (req)->rpc_err = err
+#define rpcsvc_request_set_autherr(req, err) (req)->auth_err = err
+
+extern int rpcsvc_submit_vectors (rpcsvc_request_t *req);
+
+extern int rpcsvc_request_attach_vector (rpcsvc_request_t *req,
+ struct iovec msgvec, struct iobuf *iob,
+ struct iobref *ioref, int finalvector);
+
+
+typedef int (*auth_init_trans) (rpc_transport_t *trans, void *priv);
+typedef int (*auth_init_request) (rpcsvc_request_t *req, void *priv);
+typedef int (*auth_request_authenticate) (rpcsvc_request_t *req, void *priv);
+
+/* This structure needs to be registered by every authentication scheme.
+ * Our authentication schemes are stored per connection because
+ * each connection will end up using a different authentication scheme.
+ */
+typedef struct rpcsvc_auth_ops {
+ auth_init_trans transport_init;
+ auth_init_request request_init;
+ auth_request_authenticate authenticate;
+} rpcsvc_auth_ops_t;
+
+typedef struct rpcsvc_auth_flavour_desc {
+ char authname[RPCSVC_NAME_MAX];
+ int authnum;
+ rpcsvc_auth_ops_t *authops;
+ void *authprivate;
+} rpcsvc_auth_t;
+
+typedef void * (*rpcsvc_auth_initer_t) (rpcsvc_t *svc, dict_t *options);
+
+struct rpcsvc_auth_list {
+ struct list_head authlist;
+ rpcsvc_auth_initer_t init;
+ /* Should be the name with which we identify the auth scheme given
+ * in the volfile options.
+ * This should be different from the authname in rpc_auth_t
+ * in way that makes it easier to specify this scheme in the volfile.
+ * This is because the technical names of the schemes can be a bit
+ * arcane.
+ */
+ char name[RPCSVC_NAME_MAX];
+ rpcsvc_auth_t *auth;
+ int enable;
+};
+
+extern int
+rpcsvc_auth_request_init (rpcsvc_request_t *req);
+
+extern int
+rpcsvc_auth_init (rpcsvc_t *svc, dict_t *options);
+
+extern int
+rpcsvc_auth_reconf (rpcsvc_t *svc, dict_t *options);
+
+extern int
+rpcsvc_auth_transport_init (rpc_transport_t *xprt);
+
+extern int
+rpcsvc_authenticate (rpcsvc_request_t *req);
+
+extern int
+rpcsvc_auth_array (rpcsvc_t *svc, char *volname, int *autharr, int arrlen);
+
+/* If the request has been sent using AUTH_UNIX, this function returns the
+ * auxiliary gids as an array, otherwise, it returns NULL.
+ * Move to auth-unix specific source file when we need to modularize the
+ * authentication code even further to support mode auth schemes.
+ */
+extern gid_t *
+rpcsvc_auth_unix_auxgids (rpcsvc_request_t *req, int *arrlen);
+
+extern char *
+rpcsvc_volume_allowed (dict_t *options, char *volname);
+
+int rpcsvc_callback_submit (rpcsvc_t *rpc, rpc_transport_t *trans,
+ rpcsvc_cbk_program_t *prog, int procnum,
+ struct iovec *proghdr, int proghdrcount);
+
+rpcsvc_actor_t *
+rpcsvc_program_actor (rpcsvc_request_t *req);
+
+int
+rpcsvc_transport_unix_options_build (dict_t **options, char *filepath);
+int
+rpcsvc_set_allow_insecure (rpcsvc_t *svc, dict_t *options);
+int
+rpcsvc_set_addr_namelookup (rpcsvc_t *svc, dict_t *options);
+int
+rpcsvc_set_root_squash (rpcsvc_t *svc, dict_t *options);
+int
+rpcsvc_set_outstanding_rpc_limit (rpcsvc_t *svc, dict_t *options);
+int
+rpcsvc_auth_array (rpcsvc_t *svc, char *volname, int *autharr, int arrlen);
+rpcsvc_vector_sizer
+rpcsvc_get_program_vector_sizer (rpcsvc_t *svc, uint32_t prognum,
+ uint32_t progver, uint32_t procnum);
+#endif
diff --git a/rpc/rpc-lib/src/xdr-common.h b/rpc/rpc-lib/src/xdr-common.h
new file mode 100644
index 000000000..34dc9c6a2
--- /dev/null
+++ b/rpc/rpc-lib/src/xdr-common.h
@@ -0,0 +1,79 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _XDR_COMMON_H_
+#define _XDR_COMMON_H_
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <rpc/types.h>
+#include <sys/types.h>
+#include <rpc/xdr.h>
+#include <sys/uio.h>
+
+#ifdef __NetBSD__
+#include <dirent.h>
+#endif /* __NetBSD__ */
+
+enum gf_dump_procnum {
+ GF_DUMP_NULL,
+ GF_DUMP_DUMP,
+ GF_DUMP_MAXVALUE,
+};
+
+#define GLUSTER_DUMP_PROGRAM 123451501 /* Completely random */
+#define GLUSTER_DUMP_VERSION 1
+
+#define GF_MAX_AUTH_BYTES 2048
+
+#if GF_DARWIN_HOST_OS
+#define xdr_u_quad_t xdr_u_int64_t
+#define xdr_quad_t xdr_int64_t
+#define xdr_uint32_t xdr_u_int32_t
+#define uint64_t u_int64_t
+#endif
+
+#if defined(__NetBSD__)
+#define xdr_u_quad_t xdr_u_int64_t
+#define xdr_quad_t xdr_int64_t
+#define xdr_uint32_t xdr_u_int32_t
+#define xdr_uint64_t xdr_u_int64_t
+#endif
+
+
+#if GF_SOLARIS_HOST_OS
+#define u_quad_t uint64_t
+#define quad_t int64_t
+#define xdr_u_quad_t xdr_uint64_t
+#define xdr_quad_t xdr_int64_t
+#define xdr_uint32_t xdr_uint32_t
+#endif
+
+/* Returns the address of the byte that follows the
+ * last byte used for decoding the previous xdr component.
+ * E.g. once the RPC call for NFS has been decoded, the macro will return
+ * the address from which the NFS header starts.
+ */
+#define xdr_decoded_remaining_addr(xdr) ((&xdr)->x_private)
+
+/* Returns the length of the remaining record after the previous decode
+ * operation completed.
+ */
+#define xdr_decoded_remaining_len(xdr) ((&xdr)->x_handy)
+
+/* Returns the number of bytes used by the last encode operation. */
+#define xdr_encoded_length(xdr) (((size_t)(&xdr)->x_private) - ((size_t)(&xdr)->x_base))
+
+#define xdr_decoded_length(xdr) (((size_t)(&xdr)->x_private) - ((size_t)(&xdr)->x_base))
+
+#endif
diff --git a/rpc/rpc-lib/src/xdr-rpc.c b/rpc/rpc-lib/src/xdr-rpc.c
new file mode 100644
index 000000000..adb48a531
--- /dev/null
+++ b/rpc/rpc-lib/src/xdr-rpc.c
@@ -0,0 +1,212 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <rpc/rpc.h>
+#include <rpc/pmap_clnt.h>
+#include <arpa/inet.h>
+#include <rpc/xdr.h>
+#include <sys/uio.h>
+#include <rpc/auth_unix.h>
+
+#include "mem-pool.h"
+#include "xdr-rpc.h"
+#include "xdr-common.h"
+#include "logging.h"
+#include "common-utils.h"
+
+/* Decodes the XDR format in msgbuf into rpc_msg.
+ * The remaining payload is returned into payload.
+ */
+int
+xdr_to_rpc_call (char *msgbuf, size_t len, struct rpc_msg *call,
+ struct iovec *payload, char *credbytes, char *verfbytes)
+{
+ XDR xdr;
+ char opaquebytes[GF_MAX_AUTH_BYTES];
+ struct opaque_auth *oa = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("rpc", msgbuf, out);
+ GF_VALIDATE_OR_GOTO ("rpc", call, out);
+
+ memset (call, 0, sizeof (*call));
+
+ oa = &call->rm_call.cb_cred;
+ if (!credbytes)
+ oa->oa_base = opaquebytes;
+ else
+ oa->oa_base = credbytes;
+
+ oa = &call->rm_call.cb_verf;
+ if (!verfbytes)
+ oa->oa_base = opaquebytes;
+ else
+ oa->oa_base = verfbytes;
+
+ xdrmem_create (&xdr, msgbuf, len, XDR_DECODE);
+ if (!xdr_callmsg (&xdr, call)) {
+ gf_log ("rpc", GF_LOG_WARNING, "failed to decode call msg");
+ goto out;
+ }
+
+ if (payload) {
+ payload->iov_base = xdr_decoded_remaining_addr (xdr);
+ payload->iov_len = xdr_decoded_remaining_len (xdr);
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+bool_t
+true_func (XDR *s, caddr_t *a)
+{
+ return TRUE;
+}
+
+
+int
+rpc_fill_empty_reply (struct rpc_msg *reply, uint32_t xid)
+{
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("rpc", reply, out);
+
+ /* Setting to 0 also results in reply verifier flavor to be
+ * set to AUTH_NULL which is what we want right now.
+ */
+ memset (reply, 0, sizeof (*reply));
+ reply->rm_xid = xid;
+ reply->rm_direction = REPLY;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+rpc_fill_denied_reply (struct rpc_msg *reply, int rjstat, int auth_err)
+{
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("rpc", reply, out);
+
+ reply->rm_reply.rp_stat = MSG_DENIED;
+ reply->rjcted_rply.rj_stat = rjstat;
+ if (rjstat == RPC_MISMATCH) {
+ /* No problem with hardcoding
+ * RPC version numbers. We only support
+ * v2 anyway.
+ */
+ reply->rjcted_rply.rj_vers.low = 2;
+ reply->rjcted_rply.rj_vers.high = 2;
+ } else if (rjstat == AUTH_ERROR)
+ reply->rjcted_rply.rj_why = auth_err;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+int
+rpc_fill_accepted_reply (struct rpc_msg *reply, int arstat, int proglow,
+ int proghigh, int verf, int len, char *vdata)
+{
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("rpc", reply, out);
+
+ reply->rm_reply.rp_stat = MSG_ACCEPTED;
+ reply->acpted_rply.ar_stat = arstat;
+
+ reply->acpted_rply.ar_verf.oa_flavor = verf;
+ reply->acpted_rply.ar_verf.oa_length = len;
+ reply->acpted_rply.ar_verf.oa_base = vdata;
+ if (arstat == PROG_MISMATCH) {
+ reply->acpted_rply.ar_vers.low = proglow;
+ reply->acpted_rply.ar_vers.high = proghigh;
+ } else if (arstat == SUCCESS) {
+
+ /* This is a hack. I'd really like to build a custom
+ * XDR library because Sun RPC interface is not very flexible.
+ */
+ reply->acpted_rply.ar_results.proc = (xdrproc_t)true_func;
+ reply->acpted_rply.ar_results.where = NULL;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+rpc_reply_to_xdr (struct rpc_msg *reply, char *dest, size_t len,
+ struct iovec *dst)
+{
+ XDR xdr;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("rpc", reply, out);
+ GF_VALIDATE_OR_GOTO ("rpc", dest, out);
+ GF_VALIDATE_OR_GOTO ("rpc", dst, out);
+
+ xdrmem_create (&xdr, dest, len, XDR_ENCODE);
+ if (!xdr_replymsg(&xdr, reply)) {
+ gf_log ("rpc", GF_LOG_WARNING, "failed to encode reply msg");
+ goto out;
+ }
+
+ dst->iov_base = dest;
+ dst->iov_len = xdr_encoded_length (xdr);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+int
+xdr_to_auth_unix_cred (char *msgbuf, int msglen, struct authunix_parms *au,
+ char *machname, gid_t *gids)
+{
+ XDR xdr;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("rpc", msgbuf, out);
+ GF_VALIDATE_OR_GOTO ("rpc", machname, out);
+ GF_VALIDATE_OR_GOTO ("rpc", gids, out);
+ GF_VALIDATE_OR_GOTO ("rpc", au, out);
+
+ au->aup_machname = machname;
+#ifdef GF_DARWIN_HOST_OS
+ au->aup_gids = (int *)gids;
+#else
+ au->aup_gids = gids;
+#endif
+
+ xdrmem_create (&xdr, msgbuf, msglen, XDR_DECODE);
+
+ if (!xdr_authunix_parms (&xdr, au)) {
+ gf_log ("rpc", GF_LOG_WARNING, "failed to decode auth unix parms");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
diff --git a/rpc/rpc-lib/src/xdr-rpc.h b/rpc/rpc-lib/src/xdr-rpc.h
new file mode 100644
index 000000000..f5f4a941e
--- /dev/null
+++ b/rpc/rpc-lib/src/xdr-rpc.h
@@ -0,0 +1,81 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _XDR_RPC_H_
+#define _XDR_RPC_H_
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#ifndef GF_SOLARIS_HOST_OS
+#include <rpc/rpc.h>
+#endif
+
+#ifdef GF_SOLARIS_HOST_OS
+#include <rpc/auth.h>
+#include <rpc/auth_sys.h>
+#endif
+
+//#include <rpc/pmap_clnt.h>
+#include <arpa/inet.h>
+#include <rpc/xdr.h>
+#include <sys/uio.h>
+
+#include "xdr-common.h"
+
+typedef enum {
+ AUTH_GLUSTERFS = 5,
+ AUTH_GLUSTERFS_v2 = 390039, /* using a number from 'unused' range,
+ from the list available in RFC5531 */
+} gf_rpc_authtype_t;
+
+/* Converts a given network buffer from its XDR format to a structure
+ * that contains everything an RPC call needs to work.
+ */
+extern int
+xdr_to_rpc_call (char *msgbuf, size_t len, struct rpc_msg *call,
+ struct iovec *payload, char *credbytes, char *verfbytes);
+
+extern int
+rpc_fill_empty_reply (struct rpc_msg *reply, uint32_t xid);
+
+extern int
+rpc_fill_denied_reply (struct rpc_msg *reply, int rjstat, int auth_err);
+
+extern int
+rpc_fill_accepted_reply (struct rpc_msg *reply, int arstat, int proglow,
+ int proghigh, int verf, int len, char *vdata);
+extern int
+rpc_reply_to_xdr (struct rpc_msg *reply, char *dest, size_t len,
+ struct iovec *dst);
+
+extern int
+xdr_to_auth_unix_cred (char *msgbuf, int msglen, struct authunix_parms *au,
+ char *machname, gid_t *gids);
+/* Macros that simplify accessing the members of an RPC call structure. */
+#define rpc_call_xid(call) ((call)->rm_xid)
+#define rpc_call_direction(call) ((call)->rm_direction)
+#define rpc_call_rpcvers(call) ((call)->ru.RM_cmb.cb_rpcvers)
+#define rpc_call_program(call) ((call)->ru.RM_cmb.cb_prog)
+#define rpc_call_progver(call) ((call)->ru.RM_cmb.cb_vers)
+#define rpc_call_progproc(call) ((call)->ru.RM_cmb.cb_proc)
+#define rpc_opaque_auth_flavour(oa) ((oa)->oa_flavor)
+#define rpc_opaque_auth_len(oa) ((oa)->oa_length)
+
+#define rpc_call_cred_flavour(call) (rpc_opaque_auth_flavour ((&(call)->ru.RM_cmb.cb_cred)))
+#define rpc_call_cred_len(call) (rpc_opaque_auth_len ((&(call)->ru.RM_cmb.cb_cred)))
+
+
+#define rpc_call_verf_flavour(call) (rpc_opaque_auth_flavour ((&(call)->ru.RM_cmb.cb_verf)))
+#define rpc_call_verf_len(call) (rpc_opaque_auth_len ((&(call)->ru.RM_cmb.cb_verf)))
+
+#endif
diff --git a/rpc/rpc-lib/src/xdr-rpcclnt.c b/rpc/rpc-lib/src/xdr-rpcclnt.c
new file mode 100644
index 000000000..810d1961b
--- /dev/null
+++ b/rpc/rpc-lib/src/xdr-rpcclnt.c
@@ -0,0 +1,117 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <rpc/rpc.h>
+#include <rpc/pmap_clnt.h>
+#include <arpa/inet.h>
+#include <rpc/xdr.h>
+#include <sys/uio.h>
+#include <rpc/auth_unix.h>
+#include <errno.h>
+
+#include "mem-pool.h"
+#include "xdr-rpc.h"
+#include "xdr-common.h"
+#include "logging.h"
+#include "common-utils.h"
+
+/* Decodes the XDR format in msgbuf into rpc_msg.
+ * The remaining payload is returned into payload.
+ */
+int
+xdr_to_rpc_reply (char *msgbuf, size_t len, struct rpc_msg *reply,
+ struct iovec *payload, char *verfbytes)
+{
+ XDR xdr;
+ int ret = -EINVAL;
+
+ GF_VALIDATE_OR_GOTO ("rpc", msgbuf, out);
+ GF_VALIDATE_OR_GOTO ("rpc", reply, out);
+
+ memset (reply, 0, sizeof (struct rpc_msg));
+
+ reply->acpted_rply.ar_verf = _null_auth;
+ reply->acpted_rply.ar_results.where = NULL;
+ reply->acpted_rply.ar_results.proc = (xdrproc_t)(xdr_void);
+
+ xdrmem_create (&xdr, msgbuf, len, XDR_DECODE);
+ if (!xdr_replymsg (&xdr, reply)) {
+ gf_log ("rpc", GF_LOG_WARNING, "failed to decode reply msg");
+ ret = -errno;
+ goto out;
+ }
+ if (payload) {
+ payload->iov_base = xdr_decoded_remaining_addr (xdr);
+ payload->iov_len = xdr_decoded_remaining_len (xdr);
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+int
+rpc_request_to_xdr (struct rpc_msg *request, char *dest, size_t len,
+ struct iovec *dst)
+{
+ XDR xdr;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("rpc", dest, out);
+ GF_VALIDATE_OR_GOTO ("rpc", request, out);
+ GF_VALIDATE_OR_GOTO ("rpc", dst, out);
+
+ xdrmem_create (&xdr, dest, len, XDR_ENCODE);
+ if (!xdr_callmsg (&xdr, request)) {
+ gf_log ("rpc", GF_LOG_WARNING, "failed to encode call msg");
+ goto out;
+ }
+
+ dst->iov_base = dest;
+ dst->iov_len = xdr_encoded_length (xdr);
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+
+int
+auth_unix_cred_to_xdr (struct authunix_parms *au, char *dest, size_t len,
+ struct iovec *iov)
+{
+ XDR xdr;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("rpc", au, out);
+ GF_VALIDATE_OR_GOTO ("rpc", dest, out);
+ GF_VALIDATE_OR_GOTO ("rpc", iov, out);
+
+ xdrmem_create (&xdr, dest, len, XDR_DECODE);
+
+ if (!xdr_authunix_parms (&xdr, au)) {
+ gf_log ("rpc", GF_LOG_WARNING, "failed to decode authunix parms");
+ goto out;
+ }
+
+ iov->iov_base = dest;
+ iov->iov_len = xdr_encoded_length (xdr);
+
+ ret = 0;
+out:
+ return ret;
+}
diff --git a/rpc/rpc-lib/src/xdr-rpcclnt.h b/rpc/rpc-lib/src/xdr-rpcclnt.h
new file mode 100644
index 000000000..c08d872f8
--- /dev/null
+++ b/rpc/rpc-lib/src/xdr-rpcclnt.h
@@ -0,0 +1,42 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _XDR_RPCCLNT_H
+#define _XDR_RPCCLNT_H
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+//#include <rpc/rpc.h>
+//#include <rpc/pmap_clnt.h>
+#include <arpa/inet.h>
+#include <rpc/xdr.h>
+#include <sys/uio.h>
+#include <rpc/rpc_msg.h>
+#include <rpc/auth_unix.h>
+
+/* Macros that simplify accessing the members of an RPC call structure. */
+#define rpc_reply_xid(reply) ((reply)->rm_xid)
+#define rpc_reply_status(reply) ((reply)->ru.RM_rmb.rp_stat)
+#define rpc_accepted_reply_status(reply) ((reply)->acpted_rply.ar_stat)
+#define rpc_reply_verf_flavour(reply) ((reply)->acpted_rply.ar_verf.oa_flavor)
+
+int xdr_to_rpc_reply (char *msgbuf, size_t len, struct rpc_msg *reply,
+ struct iovec *payload, char *verfbytes);
+int
+rpc_request_to_xdr (struct rpc_msg *request, char *dest, size_t len,
+ struct iovec *dst);
+int
+auth_unix_cred_to_xdr (struct authunix_parms *au, char *dest, size_t len,
+ struct iovec *iov);
+
+#endif
diff --git a/rpc/rpc-transport/Makefile.am b/rpc/rpc-transport/Makefile.am
new file mode 100644
index 000000000..221fd6405
--- /dev/null
+++ b/rpc/rpc-transport/Makefile.am
@@ -0,0 +1 @@
+SUBDIRS = socket $(RDMA_SUBDIR)
diff --git a/glusterfs-guts/Makefile.am b/rpc/rpc-transport/rdma/Makefile.am
index f963effea..f963effea 100644
--- a/glusterfs-guts/Makefile.am
+++ b/rpc/rpc-transport/rdma/Makefile.am
diff --git a/rpc/rpc-transport/rdma/src/Makefile.am b/rpc/rpc-transport/rdma/src/Makefile.am
new file mode 100644
index 000000000..2bf7cf238
--- /dev/null
+++ b/rpc/rpc-transport/rdma/src/Makefile.am
@@ -0,0 +1,22 @@
+# TODO : need to change transportdir
+
+transport_LTLIBRARIES = rdma.la
+transportdir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/rpc-transport
+
+rdma_la_LDFLAGS = -module -avoid-version
+
+rdma_la_SOURCES = rdma.c name.c
+rdma_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
+ -libverbs -lrdmacm
+
+noinst_HEADERS = rdma.h name.h
+ -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/rpc-lib/src/ \
+ -I$(top_srcdir)/xlators/protocol/lib/src/ -shared -nostartfiles $(GF_CFLAGS)
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) \
+ -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/rpc-lib/src/ \
+ -I$(top_srcdir)/rpc/xdr/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES = *~
diff --git a/transport/ib-verbs/src/name.c b/rpc/rpc-transport/rdma/src/name.c
index c9d6e51d8..c57428ad6 100644
--- a/transport/ib-verbs/src/name.c
+++ b/rpc/rpc-transport/rdma/src/name.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include <sys/types.h>
@@ -22,50 +13,62 @@
#include <errno.h>
#include <netdb.h>
#include <string.h>
-
-#ifdef CLIENT_PORT_CEILING
-#undef CLIENT_PORT_CEILING
-#endif
-
-#define CLIENT_PORT_CEILING 1024
+#include <rdma/rdma_cma.h>
#ifndef AF_INET_SDP
#define AF_INET_SDP 27
#endif
-#include "transport.h"
-#include "ib-verbs.h"
+#include "rpc-transport.h"
+#include "rdma.h"
+#include "common-utils.h"
+
int32_t
-gf_resolve_ip6 (const char *hostname,
- uint16_t port,
- int family,
- void **dnscache,
+gf_resolve_ip6 (const char *hostname,
+ uint16_t port,
+ int family,
+ void **dnscache,
struct addrinfo **addr_info);
static int32_t
-af_inet_bind_to_port_lt_ceiling (int fd, struct sockaddr *sockaddr,
+af_inet_bind_to_port_lt_ceiling (struct rdma_cm_id *cm_id,
+ struct sockaddr *sockaddr,
socklen_t sockaddr_len, int ceiling)
{
- int32_t ret = -1;
- /* struct sockaddr_in sin = {0, }; */
- uint16_t port = ceiling - 1;
+ int32_t ret = -1;
+ uint16_t port = ceiling - 1;
+ // by default assume none of the ports are blocked and all are available
+ gf_boolean_t ports[1024] = {_gf_false,};
+ int i = 0;
+
+ ret = gf_process_reserved_ports (ports);
+ if (ret != 0) {
+ for (i = 0; i < 1024; i++)
+ ports[i] = _gf_false;
+ }
while (port)
{
switch (sockaddr->sa_family)
{
case AF_INET6:
- ((struct sockaddr_in6 *)sockaddr)->sin6_port = htons (port);
+ ((struct sockaddr_in6 *)sockaddr)->sin6_port
+ = htons (port);
break;
case AF_INET_SDP:
case AF_INET:
- ((struct sockaddr_in *)sockaddr)->sin_port = htons (port);
+ ((struct sockaddr_in *)sockaddr)->sin_port
+ = htons (port);
break;
}
-
- ret = bind (fd, sockaddr, sockaddr_len);
+ // ignore the reserved ports
+ if (ports[port] == _gf_true) {
+ port--;
+ continue;
+ }
+ ret = rdma_bind_addr (cm_id, sockaddr);
if (ret == 0)
break;
@@ -79,23 +82,22 @@ af_inet_bind_to_port_lt_ceiling (int fd, struct sockaddr *sockaddr,
return ret;
}
+#if 0
static int32_t
-af_unix_client_bind (transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t sockaddr_len,
- int sock)
+af_unix_client_bind (rpc_transport_t *this, struct sockaddr *sockaddr,
+ socklen_t sockaddr_len, struct rdma_cm_id *cm_id)
{
data_t *path_data = NULL;
struct sockaddr_un *addr = NULL;
int32_t ret = -1;
- path_data = dict_get (this->xl->options,
- "transport.ib-verbs.bind-path");
+ path_data = dict_get (this->options,
+ "transport.rdma.bind-path");
if (path_data) {
char *path = data_to_str (path_data);
if (!path || strlen (path) > UNIX_PATH_MAX) {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "transport.ib-verbs.bind-path not specfied "
+ gf_log (this->name, GF_LOG_DEBUG,
+ "transport.rdma.bind-path not specified "
"for unix socket, letting connect to assign "
"default value");
goto err;
@@ -105,8 +107,8 @@ af_unix_client_bind (transport_t *this,
strcpy (addr->sun_path, path);
ret = bind (sock, (struct sockaddr *)addr, sockaddr_len);
if (ret == -1) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "cannot bind to unix-domain socket %d (%s)",
+ gf_log (this->name, GF_LOG_ERROR,
+ "cannot bind to unix-domain socket %d (%s)",
sock, strerror (errno));
goto err;
}
@@ -115,38 +117,39 @@ af_unix_client_bind (transport_t *this,
err:
return ret;
}
+#endif
static int32_t
-client_fill_address_family (transport_t *this, struct sockaddr *sockaddr)
+client_fill_address_family (rpc_transport_t *this, struct sockaddr *sockaddr)
{
data_t *address_family_data = NULL;
- address_family_data = dict_get (this->xl->options,
+ address_family_data = dict_get (this->options,
"transport.address-family");
if (!address_family_data) {
data_t *remote_host_data = NULL, *connect_path_data = NULL;
- remote_host_data = dict_get (this->xl->options, "remote-host");
- connect_path_data = dict_get (this->xl->options,
- "transport.ib-verbs.connect-path");
+ remote_host_data = dict_get (this->options, "remote-host");
+ connect_path_data = dict_get (this->options,
+ "transport.rdma.connect-path");
- if (!(remote_host_data || connect_path_data) ||
+ if (!(remote_host_data || connect_path_data) ||
(remote_host_data && connect_path_data)) {
- gf_log (this->xl->name, GF_LOG_ERROR,
+ gf_log (this->name, GF_LOG_ERROR,
"address-family not specified and not able to "
"determine the same from other options "
- "(remote-host:%s and connect-path:%s)",
- data_to_str (remote_host_data),
+ "(remote-host:%s and connect-path:%s)",
+ data_to_str (remote_host_data),
data_to_str (connect_path_data));
return -1;
- }
+ }
if (remote_host_data) {
- gf_log (this->xl->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_DEBUG,
"address-family not specified, guessing it "
"to be inet/inet6");
sockaddr->sa_family = AF_UNSPEC;
} else {
- gf_log (this->xl->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_DEBUG,
"address-family not specified, guessing it "
"to be unix");
sockaddr->sa_family = AF_UNIX;
@@ -162,13 +165,11 @@ client_fill_address_family (transport_t *this, struct sockaddr *sockaddr)
sockaddr->sa_family = AF_INET6;
} else if (!strcasecmp (address_family, "inet-sdp")) {
sockaddr->sa_family = AF_INET_SDP;
- } else if (!strcasecmp (address_family, "inet/inet6")
- || !strcasecmp (address_family, "inet6/inet")) {
- sockaddr->sa_family = AF_UNSPEC;
} else {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "unknown address-family (%s) specified",
+ gf_log (this->name, GF_LOG_ERROR,
+ "unknown address-family (%s) specified",
address_family);
+ sockaddr->sa_family = AF_UNSPEC;
return -1;
}
}
@@ -177,24 +178,24 @@ client_fill_address_family (transport_t *this, struct sockaddr *sockaddr)
}
static int32_t
-af_inet_client_get_remote_sockaddr (transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t *sockaddr_len)
+af_inet_client_get_remote_sockaddr (rpc_transport_t *this,
+ struct sockaddr *sockaddr,
+ socklen_t *sockaddr_len,
+ int16_t remote_port)
{
- dict_t *options = this->xl->options;
+ dict_t *options = this->options;
data_t *remote_host_data = NULL;
data_t *remote_port_data = NULL;
char *remote_host = NULL;
- uint16_t remote_port = 0;
struct addrinfo *addr_info = NULL;
int32_t ret = 0;
remote_host_data = dict_get (options, "remote-host");
if (remote_host_data == NULL)
{
- gf_log (this->xl->name, GF_LOG_ERROR,
- "option remote-host missing in volume %s",
- this->xl->name);
+ gf_log (this->name, GF_LOG_ERROR,
+ "option remote-host missing in volume %s",
+ this->name);
ret = -1;
goto err;
}
@@ -202,33 +203,35 @@ af_inet_client_get_remote_sockaddr (transport_t *this,
remote_host = data_to_str (remote_host_data);
if (remote_host == NULL)
{
- gf_log (this->xl->name, GF_LOG_ERROR,
- "option remote-host has data NULL in volume %s",
- this->xl->name);
+ gf_log (this->name, GF_LOG_ERROR,
+ "option remote-host has data NULL in volume %s",
+ this->name);
ret = -1;
goto err;
}
- remote_port_data = dict_get (options, "remote-port");
- if (remote_port_data == NULL)
- {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "option remote-port missing in volume %s. "
- "Defaulting to %d",
- this->xl->name, GF_DEFAULT_IBVERBS_LISTEN_PORT);
+ if (remote_port == 0) {
+ remote_port_data = dict_get (options, "remote-port");
+ if (remote_port_data == NULL)
+ {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "option remote-port missing in volume %s. "
+ "Defaulting to %d",
+ this->name, GF_DEFAULT_RDMA_LISTEN_PORT);
- remote_port = GF_DEFAULT_IBVERBS_LISTEN_PORT;
- }
- else
- {
- remote_port = data_to_uint16 (remote_port_data);
+ remote_port = GF_DEFAULT_RDMA_LISTEN_PORT;
+ }
+ else
+ {
+ remote_port = data_to_uint16 (remote_port_data);
+ }
}
- if (remote_port == (uint16_t)-1)
+ if (remote_port == -1)
{
- gf_log (this->xl->name, GF_LOG_ERROR,
+ gf_log (this->name, GF_LOG_ERROR,
"option remote-port has invalid port in volume %s",
- this->xl->name);
+ this->name);
ret = -1;
goto err;
}
@@ -236,10 +239,10 @@ af_inet_client_get_remote_sockaddr (transport_t *this,
/* TODO: gf_resolve is a blocking call. kick in some
non blocking dns techniques */
ret = gf_resolve_ip6 (remote_host, remote_port,
- sockaddr->sa_family,
+ sockaddr->sa_family,
&this->dnscache, &addr_info);
if (ret == -1) {
- gf_log (this->xl->name, GF_LOG_ERROR,
+ gf_log (this->name, GF_LOG_ERROR,
"DNS resolution failed on host %s", remote_host);
goto err;
}
@@ -252,8 +255,8 @@ err:
}
static int32_t
-af_unix_client_get_remote_sockaddr (transport_t *this,
- struct sockaddr *sockaddr,
+af_unix_client_get_remote_sockaddr (rpc_transport_t *this,
+ struct sockaddr *sockaddr,
socklen_t *sockaddr_len)
{
struct sockaddr_un *sockaddr_un = NULL;
@@ -261,11 +264,11 @@ af_unix_client_get_remote_sockaddr (transport_t *this,
data_t *connect_path_data = NULL;
int32_t ret = 0;
- connect_path_data = dict_get (this->xl->options,
- "transport.ib-verbs.connect-path");
+ connect_path_data = dict_get (this->options,
+ "transport.rdma.connect-path");
if (!connect_path_data) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "option transport.ib-verbs.connect-path not "
+ gf_log (this->name, GF_LOG_ERROR,
+ "option transport.rdma.connect-path not "
"specified for address-family unix");
ret = -1;
goto err;
@@ -273,21 +276,21 @@ af_unix_client_get_remote_sockaddr (transport_t *this,
connect_path = data_to_str (connect_path_data);
if (!connect_path) {
- gf_log (this->xl->name, GF_LOG_ERROR,
+ gf_log (this->name, GF_LOG_ERROR,
"connect-path is null-string");
ret = -1;
goto err;
}
if (strlen (connect_path) > UNIX_PATH_MAX) {
- gf_log (this->xl->name, GF_LOG_ERROR,
+ gf_log (this->name, GF_LOG_ERROR,
"connect-path value length %"GF_PRI_SIZET" > "
"%d octets", strlen (connect_path), UNIX_PATH_MAX);
ret = -1;
goto err;
}
- gf_log (this->xl->name,
+ gf_log (this->name,
GF_LOG_DEBUG,
"using connect-path %s", connect_path);
sockaddr_un = (struct sockaddr_un *)sockaddr;
@@ -299,7 +302,7 @@ err:
}
static int32_t
-af_unix_server_get_local_sockaddr (transport_t *this,
+af_unix_server_get_local_sockaddr (rpc_transport_t *this,
struct sockaddr *addr,
socklen_t *addr_len)
{
@@ -309,10 +312,10 @@ af_unix_server_get_local_sockaddr (transport_t *this,
struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
- listen_path_data = dict_get (this->xl->options,
- "transport.ib-verbs.listen-path");
+ listen_path_data = dict_get (this->options,
+ "transport.rdma.listen-path");
if (!listen_path_data) {
- gf_log (this->xl->name, GF_LOG_ERROR,
+ gf_log (this->name, GF_LOG_ERROR,
"missing option listen-path");
ret = -1;
goto err;
@@ -325,7 +328,7 @@ af_unix_server_get_local_sockaddr (transport_t *this,
#endif
if (strlen (listen_path) > UNIX_PATH_MAX) {
- gf_log (this->xl->name, GF_LOG_ERROR,
+ gf_log (this->name, GF_LOG_ERROR,
"option listen-path has value length %"GF_PRI_SIZET" > %d",
strlen (listen_path), UNIX_PATH_MAX);
ret = -1;
@@ -340,9 +343,9 @@ err:
return ret;
}
-static int32_t
-af_inet_server_get_local_sockaddr (transport_t *this,
- struct sockaddr *addr,
+static int32_t
+af_inet_server_get_local_sockaddr (rpc_transport_t *this,
+ struct sockaddr *addr,
socklen_t *addr_len)
{
struct addrinfo hints, *res = 0;
@@ -352,22 +355,37 @@ af_inet_server_get_local_sockaddr (transport_t *this,
dict_t *options = NULL;
int32_t ret = 0;
- options = this->xl->options;
+ options = this->options;
- listen_port_data = dict_get (options, "transport.ib-verbs.listen-port");
- listen_host_data = dict_get (options, "transport.ib-verbs.bind-address");
+ listen_port_data = dict_get (options, "transport.rdma.listen-port");
+ listen_host_data = dict_get (options,
+ "transport.rdma.bind-address");
- if (listen_port_data)
- {
+ if (listen_port_data) {
listen_port = data_to_uint16 (listen_port_data);
+ } else {
+ listen_port = GF_DEFAULT_RDMA_LISTEN_PORT;
+
+ if (addr->sa_family == AF_INET6) {
+ struct sockaddr_in6 *in = (struct sockaddr_in6 *) addr;
+ in->sin6_addr = in6addr_any;
+ in->sin6_port = htons(listen_port);
+ *addr_len = sizeof(struct sockaddr_in6);
+ goto out;
+ } else if (addr->sa_family == AF_INET) {
+ struct sockaddr_in *in = (struct sockaddr_in *) addr;
+ in->sin_addr.s_addr = htonl(INADDR_ANY);
+ in->sin_port = htons(listen_port);
+ *addr_len = sizeof(struct sockaddr_in);
+ goto out;
+ }
}
if (listen_port == (uint16_t) -1)
- listen_port = GF_DEFAULT_IBVERBS_LISTEN_PORT;
+ listen_port = GF_DEFAULT_RDMA_LISTEN_PORT;
- if (listen_host_data)
- {
+ if (listen_host_data) {
listen_host = data_to_str (listen_host_data);
}
@@ -381,12 +399,11 @@ af_inet_server_get_local_sockaddr (transport_t *this,
ret = getaddrinfo(listen_host, service, &hints, &res);
if (ret != 0) {
- gf_log (this->xl->name,
- GF_LOG_ERROR,
- "getaddrinfo failed for host %s, service %s (%s)",
+ gf_log (this->name, GF_LOG_ERROR,
+ "getaddrinfo failed for host %s, service %s (%s)",
listen_host, service, gai_strerror (ret));
ret = -1;
- goto err;
+ goto out;
}
memcpy (addr, res->ai_addr, res->ai_addrlen);
@@ -394,15 +411,13 @@ af_inet_server_get_local_sockaddr (transport_t *this,
freeaddrinfo (res);
-err:
+out:
return ret;
}
-int32_t
-client_bind (transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t *sockaddr_len,
- int sock)
+int32_t
+gf_rdma_client_bind (rpc_transport_t *this, struct sockaddr *sockaddr,
+ socklen_t *sockaddr_len, struct rdma_cm_id *cm_id)
{
int ret = 0;
@@ -414,26 +429,28 @@ client_bind (transport_t *this,
*sockaddr_len = sizeof (struct sockaddr_in);
case AF_INET6:
- ret = af_inet_bind_to_port_lt_ceiling (sock, sockaddr,
- *sockaddr_len,
- CLIENT_PORT_CEILING);
+ ret = af_inet_bind_to_port_lt_ceiling (cm_id, sockaddr,
+ *sockaddr_len,
+ GF_CLIENT_PORT_CEILING);
if (ret == -1) {
- gf_log (this->xl->name, GF_LOG_WARNING,
- "cannot bind inet socket (%d) to port "
- "less than %d (%s)",
- sock, CLIENT_PORT_CEILING, strerror (errno));
+ gf_log (this->name, GF_LOG_WARNING,
+ "cannot bind rdma_cm_id to port "
+ "less than %d (%s)", GF_CLIENT_PORT_CEILING,
+ strerror (errno));
ret = 0;
}
break;
case AF_UNIX:
*sockaddr_len = sizeof (struct sockaddr_un);
- ret = af_unix_client_bind (this, (struct sockaddr *)sockaddr,
+#if 0
+ ret = af_unix_client_bind (this, (struct sockaddr *)sockaddr,
*sockaddr_len, sock);
+#endif
break;
default:
- gf_log (this->xl->name, GF_LOG_ERROR,
+ gf_log (this->name, GF_LOG_ERROR,
"unknown address family %d", sockaddr->sa_family);
ret = -1;
break;
@@ -443,9 +460,10 @@ client_bind (transport_t *this,
}
int32_t
-ibverbs_client_get_remote_sockaddr (transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t *sockaddr_len)
+gf_rdma_client_get_remote_sockaddr (rpc_transport_t *this,
+ struct sockaddr *sockaddr,
+ socklen_t *sockaddr_len,
+ int16_t remote_port)
{
int32_t ret = 0;
char is_inet_sdp = 0;
@@ -455,7 +473,7 @@ ibverbs_client_get_remote_sockaddr (transport_t *this,
ret = -1;
goto err;
}
-
+
switch (sockaddr->sa_family)
{
case AF_INET_SDP:
@@ -465,9 +483,10 @@ ibverbs_client_get_remote_sockaddr (transport_t *this,
case AF_INET:
case AF_INET6:
case AF_UNSPEC:
- ret = af_inet_client_get_remote_sockaddr (this,
- sockaddr,
- sockaddr_len);
+ ret = af_inet_client_get_remote_sockaddr (this,
+ sockaddr,
+ sockaddr_len,
+ remote_port);
if (is_inet_sdp) {
sockaddr->sa_family = AF_INET_SDP;
@@ -476,31 +495,31 @@ ibverbs_client_get_remote_sockaddr (transport_t *this,
break;
case AF_UNIX:
- ret = af_unix_client_get_remote_sockaddr (this,
- sockaddr,
+ ret = af_unix_client_get_remote_sockaddr (this,
+ sockaddr,
sockaddr_len);
break;
default:
- gf_log (this->xl->name, GF_LOG_ERROR,
+ gf_log (this->name, GF_LOG_ERROR,
"unknown address-family %d", sockaddr->sa_family);
ret = -1;
}
-
+
err:
return ret;
}
int32_t
-ibverbs_server_get_local_sockaddr (transport_t *this,
- struct sockaddr *addr,
+gf_rdma_server_get_local_sockaddr (rpc_transport_t *this,
+ struct sockaddr *addr,
socklen_t *addr_len)
{
data_t *address_family_data = NULL;
int32_t ret = 0;
char is_inet_sdp = 0;
- address_family_data = dict_get (this->xl->options,
+ address_family_data = dict_get (this->options,
"transport.address-family");
if (address_family_data) {
char *address_family = NULL;
@@ -514,21 +533,19 @@ ibverbs_server_get_local_sockaddr (transport_t *this,
addr->sa_family = AF_INET_SDP;
} else if (!strcasecmp (address_family, "unix")) {
addr->sa_family = AF_UNIX;
- } else if (!strcasecmp (address_family, "inet/inet6")
- || !strcasecmp (address_family, "inet6/inet")) {
- addr->sa_family = AF_UNSPEC;
} else {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "unknown address family (%s) specified",
+ gf_log (this->name, GF_LOG_ERROR,
+ "unknown address family (%s) specified",
address_family);
+ addr->sa_family = AF_UNSPEC;
ret = -1;
goto err;
}
} else {
- gf_log (this->xl->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_DEBUG,
"option address-family not specified, defaulting "
- "to inet/inet6");
- addr->sa_family = AF_UNSPEC;
+ "to inet");
+ addr->sa_family = AF_INET;
}
switch (addr->sa_family)
@@ -555,60 +572,53 @@ err:
return ret;
}
-int32_t
-fill_inet6_inet_identifiers (transport_t *this, struct sockaddr_storage *addr,
+int32_t
+fill_inet6_inet_identifiers (rpc_transport_t *this, struct sockaddr_storage *addr,
int32_t addr_len, char *identifier)
{
int32_t ret = 0, tmpaddr_len = 0;
char service[NI_MAXSERV], host[NI_MAXHOST];
- struct sockaddr_storage tmpaddr;
+ union gf_sock_union sock_union;
- memset (&tmpaddr, 0, sizeof (tmpaddr));
- tmpaddr = *addr;
+ memset (&sock_union, 0, sizeof (sock_union));
+ sock_union.storage = *addr;
tmpaddr_len = addr_len;
- if (((struct sockaddr *) &tmpaddr)->sa_family == AF_INET6) {
+ if (sock_union.sa.sa_family == AF_INET6) {
int32_t one_to_four, four_to_eight, twelve_to_sixteen;
int16_t eight_to_ten, ten_to_twelve;
-
+
one_to_four = four_to_eight = twelve_to_sixteen = 0;
eight_to_ten = ten_to_twelve = 0;
-
- one_to_four = ((struct sockaddr_in6 *)
- &tmpaddr)->sin6_addr.s6_addr32[0];
- four_to_eight = ((struct sockaddr_in6 *)
- &tmpaddr)->sin6_addr.s6_addr32[1];
+
+ one_to_four = sock_union.sin6.sin6_addr.s6_addr32[0];
+ four_to_eight = sock_union.sin6.sin6_addr.s6_addr32[1];
#ifdef GF_SOLARIS_HOST_OS
- eight_to_ten = S6_ADDR16(((struct sockaddr_in6 *)
- &tmpaddr)->sin6_addr)[4];
+ eight_to_ten = S6_ADDR16(sock_union.sin6.sin6_addr)[4];
#else
- eight_to_ten = ((struct sockaddr_in6 *)
- &tmpaddr)->sin6_addr.s6_addr16[4];
+ eight_to_ten = sock_union.sin6.sin6_addr.s6_addr16[4];
#endif
#ifdef GF_SOLARIS_HOST_OS
- ten_to_twelve = S6_ADDR16(((struct sockaddr_in6 *)
- &tmpaddr)->sin6_addr)[5];
+ ten_to_twelve = S6_ADDR16(sock_union.sin6.sin6_addr)[5];
#else
- ten_to_twelve = ((struct sockaddr_in6 *)
- &tmpaddr)->sin6_addr.s6_addr16[5];
+ ten_to_twelve = sock_union.sin6.sin6_addr.s6_addr16[5];
#endif
- twelve_to_sixteen = ((struct sockaddr_in6 *)
- &tmpaddr)->sin6_addr.s6_addr32[3];
+ twelve_to_sixteen = sock_union.sin6.sin6_addr.s6_addr32[3];
/* ipv4 mapped ipv6 address has
bits 0-80: 0
bits 80-96: 0xffff
- bits 96-128: ipv4 address
+ bits 96-128: ipv4 address
*/
-
+
if (one_to_four == 0 &&
four_to_eight == 0 &&
eight_to_ten == 0 &&
ten_to_twelve == -1) {
- struct sockaddr_in *in_ptr = (struct sockaddr_in *)&tmpaddr;
- memset (&tmpaddr, 0, sizeof (tmpaddr));
-
+ struct sockaddr_in *in_ptr = &sock_union.sin;
+ memset (&sock_union, 0, sizeof (sock_union));
+
in_ptr->sin_family = AF_INET;
in_ptr->sin_port = ((struct sockaddr_in6 *)addr)->sin6_port;
in_ptr->sin_addr.s_addr = twelve_to_sixteen;
@@ -616,13 +626,13 @@ fill_inet6_inet_identifiers (transport_t *this, struct sockaddr_storage *addr,
}
}
- ret = getnameinfo ((struct sockaddr *) &tmpaddr,
+ ret = getnameinfo (&sock_union.sa,
tmpaddr_len,
host, sizeof (host),
service, sizeof (service),
NI_NUMERICHOST | NI_NUMERICSERV);
if (ret != 0) {
- gf_log (this->xl->name,
+ gf_log (this->name,
GF_LOG_ERROR,
"getnameinfo failed (%s)", gai_strerror (ret));
}
@@ -633,7 +643,7 @@ fill_inet6_inet_identifiers (transport_t *this, struct sockaddr_storage *addr,
}
int32_t
-get_transport_identifiers (transport_t *this)
+gf_rdma_get_transport_identifiers (rpc_transport_t *this)
{
int32_t ret = 0;
char is_inet_sdp = 0;
@@ -647,12 +657,12 @@ get_transport_identifiers (transport_t *this)
case AF_INET:
case AF_INET6:
{
- ret = fill_inet6_inet_identifiers (this,
- &this->myinfo.sockaddr,
+ ret = fill_inet6_inet_identifiers (this,
+ &this->myinfo.sockaddr,
this->myinfo.sockaddr_len,
this->myinfo.identifier);
if (ret == -1) {
- gf_log (this->xl->name, GF_LOG_ERROR,
+ gf_log (this->name, GF_LOG_ERROR,
"can't fill inet/inet6 identifier for server");
goto err;
}
@@ -662,7 +672,7 @@ get_transport_identifiers (transport_t *this)
this->peerinfo.sockaddr_len,
this->peerinfo.identifier);
if (ret == -1) {
- gf_log (this->xl->name, GF_LOG_ERROR,
+ gf_log (this->name, GF_LOG_ERROR,
"can't fill inet/inet6 identifier for client");
goto err;
}
@@ -686,7 +696,7 @@ get_transport_identifiers (transport_t *this)
break;
default:
- gf_log (this->xl->name, GF_LOG_ERROR,
+ gf_log (this->name, GF_LOG_ERROR,
"unknown address family (%d)",
((struct sockaddr *) &this->myinfo.sockaddr)->sa_family);
ret = -1;
diff --git a/rpc/rpc-transport/rdma/src/name.h b/rpc/rpc-transport/rdma/src/name.h
new file mode 100644
index 000000000..742fc5fc3
--- /dev/null
+++ b/rpc/rpc-transport/rdma/src/name.h
@@ -0,0 +1,36 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _IB_VERBS_NAME_H
+#define _IB_VERBS_NAME_H
+
+#include <rdma/rdma_cma.h>
+
+#include "compat.h"
+
+int32_t
+gf_rdma_client_bind (rpc_transport_t *this, struct sockaddr *sockaddr,
+ socklen_t *sockaddr_len, struct rdma_cm_id *cm_id);
+
+int32_t
+gf_rdma_client_get_remote_sockaddr (rpc_transport_t *this,
+ struct sockaddr *sockaddr,
+ socklen_t *sockaddr_len,
+ int16_t remote_port);
+
+int32_t
+gf_rdma_server_get_local_sockaddr (rpc_transport_t *this,
+ struct sockaddr *addr,
+ socklen_t *addr_len);
+
+int32_t
+gf_rdma_get_transport_identifiers (rpc_transport_t *this);
+
+#endif /* _IB_VERBS_NAME_H */
diff --git a/rpc/rpc-transport/rdma/src/rdma.c b/rpc/rpc-transport/rdma/src/rdma.c
new file mode 100644
index 000000000..6e6099a98
--- /dev/null
+++ b/rpc/rpc-transport/rdma/src/rdma.c
@@ -0,0 +1,4575 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "dict.h"
+#include "glusterfs.h"
+#include "logging.h"
+#include "rdma.h"
+#include "name.h"
+#include "byte-order.h"
+#include "xlator.h"
+#include "xdr-rpc.h"
+#include <signal.h>
+
+#define GF_RDMA_LOG_NAME "rpc-transport/rdma"
+
+static int32_t
+__gf_rdma_ioq_churn (gf_rdma_peer_t *peer);
+
+gf_rdma_post_t *
+gf_rdma_post_ref (gf_rdma_post_t *post);
+
+int
+gf_rdma_post_unref (gf_rdma_post_t *post);
+
+static void *
+gf_rdma_send_completion_proc (void *data);
+
+static void *
+gf_rdma_recv_completion_proc (void *data);
+
+void *
+gf_rdma_async_event_thread (void *context);
+
+static int32_t
+gf_rdma_create_qp (rpc_transport_t *this);
+
+static int32_t
+__gf_rdma_teardown (rpc_transport_t *this);
+
+static int32_t
+gf_rdma_teardown (rpc_transport_t *this);
+
+static int32_t
+gf_rdma_disconnect (rpc_transport_t *this);
+
+static void
+gf_rdma_cm_handle_disconnect (rpc_transport_t *this);
+
+
+static void
+gf_rdma_put_post (gf_rdma_queue_t *queue, gf_rdma_post_t *post)
+{
+ post->ctx.is_request = 0;
+
+ pthread_mutex_lock (&queue->lock);
+ {
+ if (post->prev) {
+ queue->active_count--;
+ post->prev->next = post->next;
+ }
+
+ if (post->next) {
+ post->next->prev = post->prev;
+ }
+
+ post->prev = &queue->passive_posts;
+ post->next = post->prev->next;
+ post->prev->next = post;
+ post->next->prev = post;
+ queue->passive_count++;
+ }
+ pthread_mutex_unlock (&queue->lock);
+}
+
+
+static gf_rdma_post_t *
+gf_rdma_new_post (rpc_transport_t *this, gf_rdma_device_t *device, int32_t len,
+ gf_rdma_post_type_t type)
+{
+ gf_rdma_post_t *post = NULL;
+ int ret = -1;
+
+ post = (gf_rdma_post_t *) GF_CALLOC (1, sizeof (*post),
+ gf_common_mt_rdma_post_t);
+ if (post == NULL) {
+ goto out;
+ }
+
+ pthread_mutex_init (&post->lock, NULL);
+
+ post->buf_size = len;
+
+ post->buf = valloc (len);
+ if (!post->buf) {
+ gf_log_nomem (GF_RDMA_LOG_NAME, GF_LOG_ERROR, len);
+ goto out;
+ }
+
+ post->mr = ibv_reg_mr (device->pd,
+ post->buf,
+ post->buf_size,
+ IBV_ACCESS_LOCAL_WRITE);
+ if (!post->mr) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "memory registration failed (%s)",
+ strerror (errno));
+ goto out;
+ }
+
+ post->device = device;
+ post->type = type;
+
+ ret = 0;
+out:
+ if (ret != 0) {
+ free (post->buf);
+
+ GF_FREE (post);
+ post = NULL;
+ }
+
+ return post;
+}
+
+
+static gf_rdma_post_t *
+gf_rdma_get_post (gf_rdma_queue_t *queue)
+{
+ gf_rdma_post_t *post = NULL;
+
+ pthread_mutex_lock (&queue->lock);
+ {
+ post = queue->passive_posts.next;
+ if (post == &queue->passive_posts)
+ post = NULL;
+
+ if (post) {
+ if (post->prev)
+ post->prev->next = post->next;
+ if (post->next)
+ post->next->prev = post->prev;
+ post->prev = &queue->active_posts;
+ post->next = post->prev->next;
+ post->prev->next = post;
+ post->next->prev = post;
+ post->reused++;
+ queue->active_count++;
+ }
+ }
+ pthread_mutex_unlock (&queue->lock);
+
+ return post;
+}
+
+void
+gf_rdma_destroy_post (gf_rdma_post_t *post)
+{
+ ibv_dereg_mr (post->mr);
+ free (post->buf);
+ GF_FREE (post);
+}
+
+
+static int32_t
+__gf_rdma_quota_get (gf_rdma_peer_t *peer)
+{
+ int32_t ret = -1;
+ gf_rdma_private_t *priv = NULL;
+
+ priv = peer->trans->private;
+
+ if (priv->connected && peer->quota > 0) {
+ ret = peer->quota--;
+ }
+
+ return ret;
+}
+
+
+static void
+__gf_rdma_ioq_entry_free (gf_rdma_ioq_t *entry)
+{
+ list_del_init (&entry->list);
+
+ if (entry->iobref) {
+ iobref_unref (entry->iobref);
+ entry->iobref = NULL;
+ }
+
+ if (entry->msg.request.rsp_iobref) {
+ iobref_unref (entry->msg.request.rsp_iobref);
+ entry->msg.request.rsp_iobref = NULL;
+ }
+
+ mem_put (entry);
+}
+
+
+static void
+__gf_rdma_ioq_flush (gf_rdma_peer_t *peer)
+{
+ gf_rdma_ioq_t *entry = NULL, *dummy = NULL;
+
+ list_for_each_entry_safe (entry, dummy, &peer->ioq, list) {
+ __gf_rdma_ioq_entry_free (entry);
+ }
+}
+
+
+static int32_t
+__gf_rdma_disconnect (rpc_transport_t *this)
+{
+ gf_rdma_private_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->connected) {
+ rdma_disconnect (priv->peer.cm_id);
+ }
+
+ return 0;
+}
+
+
+static void
+gf_rdma_queue_init (gf_rdma_queue_t *queue)
+{
+ pthread_mutex_init (&queue->lock, NULL);
+
+ queue->active_posts.next = &queue->active_posts;
+ queue->active_posts.prev = &queue->active_posts;
+ queue->passive_posts.next = &queue->passive_posts;
+ queue->passive_posts.prev = &queue->passive_posts;
+}
+
+
+static void
+__gf_rdma_destroy_queue (gf_rdma_post_t *post)
+{
+ gf_rdma_post_t *tmp = NULL;
+
+ while (post->next != post) {
+ tmp = post->next;
+
+ post->next = post->next->next;
+ post->next->prev = post;
+
+ gf_rdma_destroy_post (tmp);
+ }
+}
+
+
+static void
+gf_rdma_destroy_queue (gf_rdma_queue_t *queue)
+{
+ if (queue == NULL) {
+ goto out;
+ }
+
+ pthread_mutex_lock (&queue->lock);
+ {
+ if (queue->passive_count > 0) {
+ __gf_rdma_destroy_queue (&queue->passive_posts);
+ queue->passive_count = 0;
+ }
+
+ if (queue->active_count > 0) {
+ __gf_rdma_destroy_queue (&queue->active_posts);
+ queue->active_count = 0;
+ }
+ }
+ pthread_mutex_unlock (&queue->lock);
+
+out:
+ return;
+}
+
+
+static void
+gf_rdma_destroy_posts (rpc_transport_t *this)
+{
+ gf_rdma_device_t *device = NULL;
+ gf_rdma_private_t *priv = NULL;
+
+ if (this == NULL) {
+ goto out;
+ }
+
+ priv = this->private;
+ device = priv->device;
+
+ gf_rdma_destroy_queue (&device->sendq);
+ gf_rdma_destroy_queue (&device->recvq);
+
+out:
+ return;
+}
+
+
+static int32_t
+__gf_rdma_create_posts (rpc_transport_t *this, int32_t count, int32_t size,
+ gf_rdma_queue_t *q, gf_rdma_post_type_t type)
+{
+ int32_t i = 0;
+ int32_t ret = 0;
+ gf_rdma_private_t *priv = NULL;
+ gf_rdma_device_t *device = NULL;
+
+ priv = this->private;
+ device = priv->device;
+
+ for (i=0 ; i<count ; i++) {
+ gf_rdma_post_t *post = NULL;
+
+ post = gf_rdma_new_post (this, device, size + 2048, type);
+ if (!post) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "post creation failed");
+ ret = -1;
+ break;
+ }
+
+ gf_rdma_put_post (q, post);
+ }
+ return ret;
+}
+
+
+static int32_t
+gf_rdma_post_recv (struct ibv_srq *srq,
+ gf_rdma_post_t *post)
+{
+ struct ibv_sge list = {
+ .addr = (unsigned long) post->buf,
+ .length = post->buf_size,
+ .lkey = post->mr->lkey
+ };
+
+ struct ibv_recv_wr wr = {
+ .wr_id = (unsigned long) post,
+ .sg_list = &list,
+ .num_sge = 1,
+ }, *bad_wr;
+
+ gf_rdma_post_ref (post);
+
+ return ibv_post_srq_recv (srq, &wr, &bad_wr);
+}
+
+
+static int32_t
+gf_rdma_create_posts (rpc_transport_t *this)
+{
+ int32_t i = 0, ret = 0;
+ gf_rdma_post_t *post = NULL;
+ gf_rdma_private_t *priv = NULL;
+ gf_rdma_options_t *options = NULL;
+ gf_rdma_device_t *device = NULL;
+
+ priv = this->private;
+ options = &priv->options;
+ device = priv->device;
+
+ ret = __gf_rdma_create_posts (this, options->send_count,
+ options->send_size,
+ &device->sendq, GF_RDMA_SEND_POST);
+ if (!ret)
+ ret = __gf_rdma_create_posts (this, options->recv_count,
+ options->recv_size,
+ &device->recvq,
+ GF_RDMA_RECV_POST);
+
+ if (!ret) {
+ for (i=0 ; i<options->recv_count ; i++) {
+ post = gf_rdma_get_post (&device->recvq);
+ if (gf_rdma_post_recv (device->srq, post) != 0) {
+ ret = -1;
+ break;
+ }
+ }
+ }
+
+ if (ret)
+ gf_rdma_destroy_posts (this);
+
+ return ret;
+}
+
+
+static void
+gf_rdma_destroy_cq (rpc_transport_t *this)
+{
+ gf_rdma_private_t *priv = NULL;
+ gf_rdma_device_t *device = NULL;
+
+ priv = this->private;
+ device = priv->device;
+
+ if (device->recv_cq)
+ ibv_destroy_cq (device->recv_cq);
+ device->recv_cq = NULL;
+
+ if (device->send_cq)
+ ibv_destroy_cq (device->send_cq);
+ device->send_cq = NULL;
+
+ return;
+}
+
+
+static int32_t
+gf_rdma_create_cq (rpc_transport_t *this)
+{
+ gf_rdma_private_t *priv = NULL;
+ gf_rdma_options_t *options = NULL;
+ gf_rdma_device_t *device = NULL;
+ uint64_t send_cqe = 0;
+ int32_t ret = 0;
+ struct ibv_device_attr device_attr = {{0}, };
+
+ priv = this->private;
+ options = &priv->options;
+ device = priv->device;
+
+ device->recv_cq = ibv_create_cq (priv->device->context,
+ options->recv_count * 2,
+ device,
+ device->recv_chan,
+ 0);
+ if (!device->recv_cq) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "creation of CQ for device %s failed",
+ device->device_name);
+ ret = -1;
+ goto out;
+ } else if (ibv_req_notify_cq (device->recv_cq, 0)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "ibv_req_notify_cq on recv CQ of device %s failed",
+ device->device_name);
+ ret = -1;
+ goto out;
+ }
+
+ do {
+ ret = ibv_query_device (priv->device->context, &device_attr);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "ibv_query_device on %s returned %d (%s)",
+ priv->device->device_name, ret,
+ (ret > 0) ? strerror (ret) : "");
+ ret = -1;
+ goto out;
+ }
+
+ send_cqe = options->send_count * 128;
+ send_cqe = (send_cqe > device_attr.max_cqe)
+ ? device_attr.max_cqe : send_cqe;
+
+ /* TODO: make send_cq size dynamically adaptive */
+ device->send_cq = ibv_create_cq (priv->device->context,
+ send_cqe, device,
+ device->send_chan, 0);
+ if (!device->send_cq) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "creation of send_cq for device %s failed",
+ device->device_name);
+ ret = -1;
+ goto out;
+ }
+
+ if (ibv_req_notify_cq (device->send_cq, 0)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "ibv_req_notify_cq on send_cq for device %s"
+ " failed", device->device_name);
+ ret = -1;
+ goto out;
+ }
+ } while (0);
+
+out:
+ if (ret != 0)
+ gf_rdma_destroy_cq (this);
+
+ return ret;
+}
+
+
+static gf_rdma_device_t *
+gf_rdma_get_device (rpc_transport_t *this, struct ibv_context *ibctx,
+ char *device_name)
+{
+ glusterfs_ctx_t *ctx = NULL;
+ gf_rdma_private_t *priv = NULL;
+ gf_rdma_options_t *options = NULL;
+ int32_t ret = 0;
+ int32_t i = 0;
+ gf_rdma_device_t *trav = NULL, *device = NULL;
+ gf_rdma_ctx_t *rdma_ctx = NULL;
+
+ priv = this->private;
+ options = &priv->options;
+ ctx = this->ctx;
+ rdma_ctx = ctx->ib;
+
+ trav = rdma_ctx->device;
+
+ while (trav) {
+ if (!strcmp (trav->device_name, device_name))
+ break;
+ trav = trav->next;
+ }
+
+ if (!trav) {
+ trav = GF_CALLOC (1, sizeof (*trav),
+ gf_common_mt_rdma_device_t);
+ if (trav == NULL) {
+ goto out;
+ }
+
+ priv->device = trav;
+ trav->context = ibctx;
+
+ trav->request_ctx_pool
+ = mem_pool_new (gf_rdma_request_context_t,
+ GF_RDMA_POOL_SIZE);
+ if (trav->request_ctx_pool == NULL) {
+ goto out;
+ }
+
+ trav->ioq_pool
+ = mem_pool_new (gf_rdma_ioq_t, GF_RDMA_POOL_SIZE);
+ if (trav->ioq_pool == NULL) {
+ goto out;
+ }
+
+ trav->reply_info_pool = mem_pool_new (gf_rdma_reply_info_t,
+ GF_RDMA_POOL_SIZE);
+ if (trav->reply_info_pool == NULL) {
+ goto out;
+ }
+
+ trav->device_name = gf_strdup (device_name);
+
+ trav->next = rdma_ctx->device;
+ rdma_ctx->device = trav;
+
+ trav->send_chan = ibv_create_comp_channel (trav->context);
+ if (!trav->send_chan) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not create send completion channel for "
+ "device (%s)", device_name);
+ goto out;
+ }
+
+ trav->recv_chan = ibv_create_comp_channel (trav->context);
+ if (!trav->recv_chan) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not create recv completion channel for "
+ "device (%s)", device_name);
+
+ /* TODO: cleanup current mess */
+ goto out;
+ }
+
+ if (gf_rdma_create_cq (this) < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not create CQ for device (%s)",
+ device_name);
+ goto out;
+ }
+
+ /* protection domain */
+ trav->pd = ibv_alloc_pd (trav->context);
+
+ if (!trav->pd) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not allocate protection domain for "
+ "device (%s)", device_name);
+ goto out;
+ }
+
+ struct ibv_srq_init_attr attr = {
+ .attr = {
+ .max_wr = options->recv_count,
+ .max_sge = 1,
+ .srq_limit = 10
+ }
+ };
+ trav->srq = ibv_create_srq (trav->pd, &attr);
+
+ if (!trav->srq) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not create SRQ for device (%s)",
+ device_name);
+ goto out;
+ }
+
+ /* queue init */
+ gf_rdma_queue_init (&trav->sendq);
+ gf_rdma_queue_init (&trav->recvq);
+
+ if (gf_rdma_create_posts (this) < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not allocate posts for device (%s)",
+ device_name);
+ goto out;
+ }
+
+ /* completion threads */
+ ret = gf_thread_create (&trav->send_thread, NULL,
+ gf_rdma_send_completion_proc,
+ trav->send_chan);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not create send completion thread for "
+ "device (%s)", device_name);
+ goto out;
+ }
+
+ ret = gf_thread_create (&trav->recv_thread, NULL,
+ gf_rdma_recv_completion_proc,
+ trav->recv_chan);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not create recv completion thread "
+ "for device (%s)", device_name);
+ return NULL;
+ }
+
+ ret = gf_thread_create (&trav->async_event_thread, NULL,
+ gf_rdma_async_event_thread,
+ ibctx);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not create async_event_thread");
+ return NULL;
+ }
+
+ /* qpreg */
+ pthread_mutex_init (&trav->qpreg.lock, NULL);
+ for (i=0; i<42; i++) {
+ trav->qpreg.ents[i].next = &trav->qpreg.ents[i];
+ trav->qpreg.ents[i].prev = &trav->qpreg.ents[i];
+ }
+ }
+
+ device = trav;
+ trav = NULL;
+out:
+
+ if (trav != NULL) {
+ gf_rdma_destroy_posts (this);
+ mem_pool_destroy (trav->ioq_pool);
+ mem_pool_destroy (trav->request_ctx_pool);
+ mem_pool_destroy (trav->reply_info_pool);
+ ibv_dealloc_pd (trav->pd);
+ gf_rdma_destroy_cq (this);
+ ibv_destroy_comp_channel (trav->recv_chan);
+ ibv_destroy_comp_channel (trav->send_chan);
+ GF_FREE ((char *)trav->device_name);
+ GF_FREE (trav);
+ }
+
+ return device;
+}
+
+
+static rpc_transport_t *
+gf_rdma_transport_new (rpc_transport_t *listener, struct rdma_cm_id *cm_id)
+{
+ gf_rdma_private_t *listener_priv = NULL, *priv = NULL;
+ rpc_transport_t *this = NULL, *new = NULL;
+ gf_rdma_options_t *options = NULL;
+ char *device_name = NULL;
+
+ listener_priv = listener->private;
+
+ this = GF_CALLOC (1, sizeof (rpc_transport_t),
+ gf_common_mt_rpc_transport_t);
+ if (this == NULL) {
+ goto out;
+ }
+
+ this->listener = listener;
+
+ priv = GF_CALLOC (1, sizeof (gf_rdma_private_t),
+ gf_common_mt_rdma_private_t);
+ if (priv == NULL) {
+ goto out;
+ }
+
+ this->private = priv;
+ priv->options = listener_priv->options;
+
+ priv->listener = listener;
+ priv->entity = GF_RDMA_SERVER;
+
+ options = &priv->options;
+
+ this->ops = listener->ops;
+ this->init = listener->init;
+ this->fini = listener->fini;
+ this->ctx = listener->ctx;
+ this->name = gf_strdup (listener->name);
+ this->notify = listener->notify;
+ this->mydata = listener->mydata;
+
+ this->myinfo.sockaddr_len = sizeof (cm_id->route.addr.src_addr);
+ memcpy (&this->myinfo.sockaddr, &cm_id->route.addr.src_addr,
+ this->myinfo.sockaddr_len);
+
+ this->peerinfo.sockaddr_len = sizeof (cm_id->route.addr.dst_addr);
+ memcpy (&this->peerinfo.sockaddr, &cm_id->route.addr.dst_addr,
+ this->peerinfo.sockaddr_len);
+
+ priv->peer.trans = this;
+ gf_rdma_get_transport_identifiers (this);
+
+ device_name = (char *)ibv_get_device_name (cm_id->verbs->device);
+ if (device_name == NULL) {
+ gf_log (listener->name, GF_LOG_WARNING,
+ "cannot get device name (peer:%s me:%s)",
+ this->peerinfo.identifier, this->myinfo.identifier);
+ goto out;
+ }
+
+ priv->device = gf_rdma_get_device (this, cm_id->verbs,
+ device_name);
+ if (priv->device == NULL) {
+ gf_log (listener->name, GF_LOG_WARNING,
+ "cannot get infiniband device %s (peer:%s me:%s)",
+ device_name, this->peerinfo.identifier,
+ this->myinfo.identifier);
+ goto out;
+ }
+
+ priv->peer.send_count = options->send_count;
+ priv->peer.recv_count = options->recv_count;
+ priv->peer.send_size = options->send_size;
+ priv->peer.recv_size = options->recv_size;
+ priv->peer.cm_id = cm_id;
+ INIT_LIST_HEAD (&priv->peer.ioq);
+
+ pthread_mutex_init (&priv->write_mutex, NULL);
+ pthread_mutex_init (&priv->recv_mutex, NULL);
+
+ cm_id->context = this;
+
+ new = rpc_transport_ref (this);
+ this = NULL;
+out:
+ if (this != NULL) {
+ if (this->private != NULL) {
+ GF_FREE (this->private);
+ }
+
+ if (this->name != NULL) {
+ GF_FREE (this->name);
+ }
+
+ GF_FREE (this);
+ }
+
+ return new;
+}
+
+
+static int
+gf_rdma_cm_handle_connect_request (struct rdma_cm_event *event)
+{
+ int ret = -1;
+ rpc_transport_t *this = NULL, *listener = NULL;
+ struct rdma_cm_id *child_cm_id = NULL, *listener_cm_id = NULL;
+ struct rdma_conn_param conn_param = {0, };
+ gf_rdma_private_t *priv = NULL;
+ gf_rdma_options_t *options = NULL;
+
+ child_cm_id = event->id;
+ listener_cm_id = event->listen_id;
+
+ listener = listener_cm_id->context;
+ priv = listener->private;
+ options = &priv->options;
+
+ this = gf_rdma_transport_new (listener, child_cm_id);
+ if (this == NULL) {
+ gf_log (listener->name, GF_LOG_WARNING,
+ "could not create a transport for incoming connection"
+ " (me.name:%s me.identifier:%s)", listener->name,
+ listener->myinfo.identifier);
+ rdma_destroy_id (child_cm_id);
+ goto out;
+ }
+
+ gf_log (listener->name, GF_LOG_TRACE,
+ "got a connect request (me:%s peer:%s)",
+ listener->myinfo.identifier, this->peerinfo.identifier);
+
+ ret = gf_rdma_create_qp (this);
+ if (ret < 0) {
+ gf_log (listener->name, GF_LOG_WARNING,
+ "could not create QP (peer:%s me:%s)",
+ this->peerinfo.identifier, this->myinfo.identifier);
+ gf_rdma_cm_handle_disconnect (this);
+ goto out;
+ }
+
+ conn_param.responder_resources = 1;
+ conn_param.initiator_depth = 1;
+ conn_param.retry_count = options->attr_retry_cnt;
+ conn_param.rnr_retry_count = options->attr_rnr_retry;
+
+ ret = rdma_accept(child_cm_id, &conn_param);
+ if (ret < 0) {
+ gf_log (listener->name, GF_LOG_WARNING, "rdma_accept failed "
+ "peer:%s me:%s (%s)", this->peerinfo.identifier,
+ this->myinfo.identifier, strerror (errno));
+ gf_rdma_cm_handle_disconnect (this);
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+
+static int
+gf_rdma_cm_handle_route_resolved (struct rdma_cm_event *event)
+{
+ struct rdma_conn_param conn_param = {0, };
+ int ret = 0;
+ rpc_transport_t *this = NULL;
+ gf_rdma_private_t *priv = NULL;
+ gf_rdma_peer_t *peer = NULL;
+ gf_rdma_options_t *options = NULL;
+
+ if (event == NULL) {
+ goto out;
+ }
+
+ this = event->id->context;
+
+ priv = this->private;
+ peer = &priv->peer;
+ options = &priv->options;
+
+ ret = gf_rdma_create_qp (this);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "could not create QP (peer:%s me:%s)",
+ this->peerinfo.identifier, this->myinfo.identifier);
+ gf_rdma_cm_handle_disconnect (this);
+ goto out;
+ }
+
+ memset(&conn_param, 0, sizeof conn_param);
+ conn_param.responder_resources = 1;
+ conn_param.initiator_depth = 1;
+ conn_param.retry_count = options->attr_retry_cnt;
+ conn_param.rnr_retry_count = options->attr_rnr_retry;
+
+ ret = rdma_connect(peer->cm_id, &conn_param);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "rdma_connect failed (%s)", strerror (errno));
+ gf_rdma_cm_handle_disconnect (this);
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_TRACE, "route resolved (me:%s peer:%s)",
+ this->myinfo.identifier, this->peerinfo.identifier);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+static int
+gf_rdma_cm_handle_addr_resolved (struct rdma_cm_event *event)
+{
+ rpc_transport_t *this = NULL;
+ gf_rdma_peer_t *peer = NULL;
+ gf_rdma_private_t *priv = NULL;
+ int ret = 0;
+
+ this = event->id->context;
+
+ priv = this->private;
+ peer = &priv->peer;
+
+ GF_ASSERT (peer->cm_id == event->id);
+
+ this->myinfo.sockaddr_len = sizeof (peer->cm_id->route.addr.src_addr);
+ memcpy (&this->myinfo.sockaddr, &peer->cm_id->route.addr.src_addr,
+ this->myinfo.sockaddr_len);
+
+ this->peerinfo.sockaddr_len = sizeof (peer->cm_id->route.addr.dst_addr);
+ memcpy (&this->peerinfo.sockaddr, &peer->cm_id->route.addr.dst_addr,
+ this->peerinfo.sockaddr_len);
+
+ gf_rdma_get_transport_identifiers (this);
+
+ ret = rdma_resolve_route(peer->cm_id, 2000);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "rdma_resolve_route failed (me:%s peer:%s) (%s)",
+ this->myinfo.identifier, this->peerinfo.identifier,
+ strerror (errno));
+ gf_rdma_cm_handle_disconnect (this);
+ }
+
+ gf_log (this->name, GF_LOG_TRACE, "Address resolved (me:%s peer:%s)",
+ this->myinfo.identifier, this->peerinfo.identifier);
+
+ return ret;
+}
+
+
+static void
+gf_rdma_cm_handle_disconnect (rpc_transport_t *this)
+{
+ gf_rdma_private_t *priv = NULL;
+ char need_unref = 0, connected = 0;
+
+ priv = this->private;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "peer disconnected, cleaning up");
+
+ pthread_mutex_lock (&priv->write_mutex);
+ {
+ if (priv->peer.cm_id != NULL) {
+ need_unref = 1;
+ connected = priv->connected;
+ priv->connected = 0;
+ }
+
+ __gf_rdma_teardown (this);
+ }
+ pthread_mutex_unlock (&priv->write_mutex);
+
+ if (connected) {
+ rpc_transport_notify (this, RPC_TRANSPORT_DISCONNECT, this);
+ }
+
+ if (need_unref)
+ rpc_transport_unref (this);
+
+}
+
+
+static int
+gf_rdma_cm_handle_event_established (struct rdma_cm_event *event)
+{
+ rpc_transport_t *this = NULL;
+ gf_rdma_private_t *priv = NULL;
+ struct rdma_cm_id *cm_id = NULL;
+ int ret = 0;
+
+ cm_id = event->id;
+ this = cm_id->context;
+ priv = this->private;
+
+ priv->connected = 1;
+
+ pthread_mutex_lock (&priv->write_mutex);
+ {
+ priv->peer.quota = 1;
+ priv->peer.quota_set = 0;
+ }
+ pthread_mutex_unlock (&priv->write_mutex);
+
+ if (priv->entity == GF_RDMA_CLIENT) {
+ ret = rpc_transport_notify (this, RPC_TRANSPORT_CONNECT, this);
+
+ } else if (priv->entity == GF_RDMA_SERVER) {
+ ret = rpc_transport_notify (priv->listener,
+ RPC_TRANSPORT_ACCEPT, this);
+ }
+
+ if (ret < 0) {
+ gf_rdma_disconnect (this);
+ }
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "recieved event RDMA_CM_EVENT_ESTABLISHED (me:%s peer:%s)",
+ this->myinfo.identifier, this->peerinfo.identifier);
+
+ return ret;
+}
+
+
+static int
+gf_rdma_cm_handle_event_error (rpc_transport_t *this)
+{
+ gf_rdma_private_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->entity != GF_RDMA_SERVER_LISTENER) {
+ gf_rdma_cm_handle_disconnect (this);
+ }
+
+ return 0;
+}
+
+
+static int
+gf_rdma_cm_handle_device_removal (struct rdma_cm_event *event)
+{
+ return 0;
+}
+
+
+static void *
+gf_rdma_cm_event_handler (void *data)
+{
+ struct rdma_cm_event *event = NULL;
+ int ret = 0;
+ rpc_transport_t *this = NULL;
+ struct rdma_event_channel *event_channel = NULL;
+
+ event_channel = data;
+
+ while (1) {
+ ret = rdma_get_cm_event (event_channel, &event);
+ if (ret != 0) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "rdma_cm_get_event failed (%s)",
+ strerror (errno));
+ break;
+ }
+
+ switch (event->event) {
+ case RDMA_CM_EVENT_ADDR_RESOLVED:
+ gf_rdma_cm_handle_addr_resolved (event);
+ break;
+
+ case RDMA_CM_EVENT_ROUTE_RESOLVED:
+ gf_rdma_cm_handle_route_resolved (event);
+ break;
+
+ case RDMA_CM_EVENT_CONNECT_REQUEST:
+ gf_rdma_cm_handle_connect_request (event);
+ break;
+
+ case RDMA_CM_EVENT_ESTABLISHED:
+ gf_rdma_cm_handle_event_established (event);
+ break;
+
+ case RDMA_CM_EVENT_ADDR_ERROR:
+ case RDMA_CM_EVENT_ROUTE_ERROR:
+ case RDMA_CM_EVENT_CONNECT_ERROR:
+ case RDMA_CM_EVENT_UNREACHABLE:
+ case RDMA_CM_EVENT_REJECTED:
+ this = event->id->context;
+
+ gf_log (this->name, GF_LOG_WARNING,
+ "cma event %s, error %d (me:%s peer:%s)\n",
+ rdma_event_str(event->event), event->status,
+ this->myinfo.identifier,
+ this->peerinfo.identifier);
+
+ rdma_ack_cm_event (event);
+ event = NULL;
+
+ gf_rdma_cm_handle_event_error (this);
+ continue;
+
+ case RDMA_CM_EVENT_DISCONNECTED:
+ this = event->id->context;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "recieved disconnect (me:%s peer:%s)\n",
+ this->myinfo.identifier,
+ this->peerinfo.identifier);
+
+ rdma_ack_cm_event (event);
+ event = NULL;
+
+ gf_rdma_cm_handle_disconnect (this);
+ continue;
+
+ case RDMA_CM_EVENT_DEVICE_REMOVAL:
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "device removed");
+ gf_rdma_cm_handle_device_removal (event);
+ break;
+
+ default:
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "unhandled event: %s, ignoring",
+ rdma_event_str(event->event));
+ break;
+ }
+
+ rdma_ack_cm_event (event);
+ }
+
+ return NULL;
+}
+
+
+static int32_t
+gf_rdma_post_send (struct ibv_qp *qp, gf_rdma_post_t *post, int32_t len)
+{
+ struct ibv_sge list = {
+ .addr = (unsigned long) post->buf,
+ .length = len,
+ .lkey = post->mr->lkey
+ };
+
+ struct ibv_send_wr wr = {
+ .wr_id = (unsigned long) post,
+ .sg_list = &list,
+ .num_sge = 1,
+ .opcode = IBV_WR_SEND,
+ .send_flags = IBV_SEND_SIGNALED,
+ }, *bad_wr;
+
+ if (!qp)
+ return EINVAL;
+
+ return ibv_post_send (qp, &wr, &bad_wr);
+}
+
+int
+__gf_rdma_encode_error(gf_rdma_peer_t *peer, gf_rdma_reply_info_t *reply_info,
+ struct iovec *rpchdr, gf_rdma_header_t *hdr,
+ gf_rdma_errcode_t err)
+{
+ struct rpc_msg *rpc_msg = NULL;
+
+ if (reply_info != NULL) {
+ hdr->rm_xid = hton32(reply_info->rm_xid);
+ } else {
+ rpc_msg = rpchdr[0].iov_base; /* assume rpchdr contains
+ * only one vector.
+ * (which is true)
+ */
+ hdr->rm_xid = rpc_msg->rm_xid;
+ }
+
+ hdr->rm_vers = hton32(GF_RDMA_VERSION);
+ hdr->rm_credit = hton32(peer->send_count);
+ hdr->rm_type = hton32(GF_RDMA_ERROR);
+ hdr->rm_body.rm_error.rm_type = hton32(err);
+ if (err == ERR_VERS) {
+ hdr->rm_body.rm_error.rm_version.gf_rdma_vers_low
+ = hton32(GF_RDMA_VERSION);
+ hdr->rm_body.rm_error.rm_version.gf_rdma_vers_high
+ = hton32(GF_RDMA_VERSION);
+ }
+
+ return sizeof (*hdr);
+}
+
+
+int32_t
+__gf_rdma_send_error (gf_rdma_peer_t *peer, gf_rdma_ioq_t *entry,
+ gf_rdma_post_t *post, gf_rdma_reply_info_t *reply_info,
+ gf_rdma_errcode_t err)
+{
+ int32_t ret = -1, len = 0;
+
+ len = __gf_rdma_encode_error (peer, reply_info, entry->rpchdr,
+ (gf_rdma_header_t *)post->buf, err);
+ if (len == -1) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
+ "encode error returned -1");
+ goto out;
+ }
+
+ gf_rdma_post_ref (post);
+
+ ret = gf_rdma_post_send (peer->qp, post, len);
+ if (!ret) {
+ ret = len;
+ } else {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "gf_rdma_post_send (to %s) failed with ret = %d (%s)",
+ peer->trans->peerinfo.identifier, ret,
+ (ret > 0) ? strerror (ret) : "");
+ gf_rdma_post_unref (post);
+ __gf_rdma_disconnect (peer->trans);
+ ret = -1;
+ }
+
+out:
+ return ret;
+}
+
+
+int32_t
+__gf_rdma_create_read_chunks_from_vector (gf_rdma_peer_t *peer,
+ gf_rdma_read_chunk_t **readch_ptr,
+ int32_t *pos, struct iovec *vector,
+ int count,
+ gf_rdma_request_context_t *request_ctx)
+{
+ int i = 0;
+ gf_rdma_private_t *priv = NULL;
+ gf_rdma_device_t *device = NULL;
+ struct ibv_mr *mr = NULL;
+ gf_rdma_read_chunk_t *readch = NULL;
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, peer, out);
+ GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, readch_ptr, out);
+ GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, *readch_ptr, out);
+ GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, request_ctx, out);
+ GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, vector, out);
+
+ priv = peer->trans->private;
+ device = priv->device;
+ readch = *readch_ptr;
+
+ for (i = 0; i < count; i++) {
+ readch->rc_discrim = hton32 (1);
+ readch->rc_position = hton32 (*pos);
+
+ mr = ibv_reg_mr (device->pd, vector[i].iov_base,
+ vector[i].iov_len,
+ IBV_ACCESS_REMOTE_READ);
+ if (!mr) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "memory registration failed (%s) (peer:%s)",
+ strerror (errno),
+ peer->trans->peerinfo.identifier);
+ goto out;
+ }
+
+ request_ctx->mr[request_ctx->mr_count++] = mr;
+
+ readch->rc_target.rs_handle = hton32 (mr->rkey);
+ readch->rc_target.rs_length
+ = hton32 (vector[i].iov_len);
+ readch->rc_target.rs_offset
+ = hton64 ((uint64_t)(unsigned long)vector[i].iov_base);
+
+ *pos = *pos + vector[i].iov_len;
+ readch++;
+ }
+
+ *readch_ptr = readch;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+int32_t
+__gf_rdma_create_read_chunks (gf_rdma_peer_t *peer, gf_rdma_ioq_t *entry,
+ gf_rdma_chunktype_t type, uint32_t **ptr,
+ gf_rdma_request_context_t *request_ctx)
+{
+ int32_t ret = -1;
+ int pos = 0;
+
+ GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, peer, out);
+ GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, entry, out);
+ GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, ptr, out);
+ GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, *ptr, out);
+ GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, request_ctx, out);
+
+ request_ctx->iobref = iobref_ref (entry->iobref);
+
+ if (type == gf_rdma_areadch) {
+ pos = 0;
+ ret = __gf_rdma_create_read_chunks_from_vector (peer,
+ (gf_rdma_read_chunk_t **)ptr,
+ &pos,
+ entry->rpchdr,
+ entry->rpchdr_count,
+ request_ctx);
+ if (ret == -1) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "cannot create read chunks from vector "
+ "entry->rpchdr");
+ goto out;
+ }
+
+ ret = __gf_rdma_create_read_chunks_from_vector (peer,
+ (gf_rdma_read_chunk_t **)ptr,
+ &pos,
+ entry->proghdr,
+ entry->proghdr_count,
+ request_ctx);
+ if (ret == -1) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "cannot create read chunks from vector "
+ "entry->proghdr");
+ }
+
+ if (entry->prog_payload_count != 0) {
+ ret = __gf_rdma_create_read_chunks_from_vector (peer,
+ (gf_rdma_read_chunk_t **)ptr,
+ &pos,
+ entry->prog_payload,
+ entry->prog_payload_count,
+ request_ctx);
+ if (ret == -1) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "cannot create read chunks from vector"
+ " entry->prog_payload");
+ }
+ }
+ } else {
+ pos = iov_length (entry->rpchdr, entry->rpchdr_count);
+ ret = __gf_rdma_create_read_chunks_from_vector (peer,
+ (gf_rdma_read_chunk_t **)ptr,
+ &pos,
+ entry->prog_payload,
+ entry->prog_payload_count,
+ request_ctx);
+ if (ret == -1) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "cannot create read chunks from vector "
+ "entry->prog_payload");
+ }
+ }
+
+ /* terminate read-chunk list*/
+ **ptr = 0;
+ *ptr = *ptr + 1;
+out:
+ return ret;
+}
+
+
+int32_t
+__gf_rdma_create_write_chunks_from_vector (gf_rdma_peer_t *peer,
+ gf_rdma_write_chunk_t **writech_ptr,
+ struct iovec *vector, int count,
+ gf_rdma_request_context_t *request_ctx)
+{
+ int i = 0;
+ gf_rdma_private_t *priv = NULL;
+ gf_rdma_device_t *device = NULL;
+ struct ibv_mr *mr = NULL;
+ gf_rdma_write_chunk_t *writech = NULL;
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, peer, out);
+ GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, writech_ptr, out);
+ GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, *writech_ptr, out);
+ GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, request_ctx, out);
+ GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, vector, out);
+
+ writech = *writech_ptr;
+
+ priv = peer->trans->private;
+ device = priv->device;
+
+ for (i = 0; i < count; i++) {
+ mr = ibv_reg_mr (device->pd, vector[i].iov_base,
+ vector[i].iov_len,
+ IBV_ACCESS_REMOTE_WRITE
+ | IBV_ACCESS_LOCAL_WRITE);
+ if (!mr) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "memory registration failed (%s) (peer:%s)",
+ strerror (errno),
+ peer->trans->peerinfo.identifier);
+ goto out;
+ }
+
+ request_ctx->mr[request_ctx->mr_count++] = mr;
+
+ writech->wc_target.rs_handle = hton32 (mr->rkey);
+ writech->wc_target.rs_length = hton32 (vector[i].iov_len);
+ writech->wc_target.rs_offset
+ = hton64 (((uint64_t)(unsigned long)vector[i].iov_base));
+
+ writech++;
+ }
+
+ *writech_ptr = writech;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+int32_t
+__gf_rdma_create_write_chunks (gf_rdma_peer_t *peer, gf_rdma_ioq_t *entry,
+ gf_rdma_chunktype_t chunk_type, uint32_t **ptr,
+ gf_rdma_request_context_t *request_ctx)
+{
+ int32_t ret = -1;
+ gf_rdma_write_array_t *warray = NULL;
+
+ GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, peer, out);
+ GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, ptr, out);
+ GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, *ptr, out);
+ GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, request_ctx, out);
+ GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, entry, out);
+
+ if ((chunk_type == gf_rdma_replych)
+ && ((entry->msg.request.rsphdr_count != 1) ||
+ (entry->msg.request.rsphdr_vec[0].iov_base == NULL))) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ (entry->msg.request.rsphdr_count == 1)
+ ? "chunktype specified as reply chunk but the vector "
+ "specifying the buffer to be used for holding reply"
+ " header is not correct" :
+ "chunktype specified as reply chunk, but more than one "
+ "buffer provided for holding reply");
+ goto out;
+ }
+
+/*
+ if ((chunk_type == gf_rdma_writech)
+ && ((entry->msg.request.rsphdr_count == 0)
+ || (entry->msg.request.rsphdr_vec[0].iov_base == NULL))) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG,
+ "vector specifying buffer to hold the program's reply "
+ "header should also be provided when buffers are "
+ "provided for holding the program's payload in reply");
+ goto out;
+ }
+*/
+
+ if (chunk_type == gf_rdma_writech) {
+ warray = (gf_rdma_write_array_t *)*ptr;
+ warray->wc_discrim = hton32 (1);
+ warray->wc_nchunks
+ = hton32 (entry->msg.request.rsp_payload_count);
+
+ *ptr = (uint32_t *)&warray->wc_array[0];
+
+ ret = __gf_rdma_create_write_chunks_from_vector (peer,
+ (gf_rdma_write_chunk_t **)ptr,
+ entry->msg.request.rsp_payload,
+ entry->msg.request.rsp_payload_count,
+ request_ctx);
+ if (ret == -1) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "cannot create write chunks from vector "
+ "entry->rpc_payload");
+ goto out;
+ }
+
+ /* terminate write chunklist */
+ **ptr = 0;
+ *ptr = *ptr + 1;
+
+ /* no reply chunklist */
+ **ptr = 0;
+ *ptr = *ptr + 1;
+ } else {
+ /* no write chunklist */
+ **ptr = 0;
+ *ptr = *ptr + 1;
+
+ warray = (gf_rdma_write_array_t *)*ptr;
+ warray->wc_discrim = hton32 (1);
+ warray->wc_nchunks = hton32 (entry->msg.request.rsphdr_count);
+
+ *ptr = (uint32_t *)&warray->wc_array[0];
+
+ ret = __gf_rdma_create_write_chunks_from_vector (peer,
+ (gf_rdma_write_chunk_t **)ptr,
+ entry->msg.request.rsphdr_vec,
+ entry->msg.request.rsphdr_count,
+ request_ctx);
+ if (ret == -1) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "cannot create write chunks from vector "
+ "entry->rpchdr");
+ goto out;
+ }
+
+ /* terminate reply chunklist */
+ **ptr = 0;
+ *ptr = *ptr + 1;
+ }
+
+out:
+ return ret;
+}
+
+
+static inline void
+__gf_rdma_deregister_mr (struct ibv_mr **mr, int count)
+{
+ int i = 0;
+
+ if (mr == NULL) {
+ goto out;
+ }
+
+ for (i = 0; i < count; i++) {
+ ibv_dereg_mr (mr[i]);
+ }
+
+out:
+ return;
+}
+
+
+static int32_t
+__gf_rdma_quota_put (gf_rdma_peer_t *peer)
+{
+ int32_t ret = 0;
+
+ peer->quota++;
+ ret = peer->quota;
+
+ if (!list_empty (&peer->ioq)) {
+ ret = __gf_rdma_ioq_churn (peer);
+ }
+
+ return ret;
+}
+
+
+static int32_t
+gf_rdma_quota_put (gf_rdma_peer_t *peer)
+{
+ int32_t ret = 0;
+ gf_rdma_private_t *priv = NULL;
+
+ priv = peer->trans->private;
+ pthread_mutex_lock (&priv->write_mutex);
+ {
+ ret = __gf_rdma_quota_put (peer);
+ }
+ pthread_mutex_unlock (&priv->write_mutex);
+
+ return ret;
+}
+
+
+/* to be called with priv->mutex held */
+void
+__gf_rdma_request_context_destroy (gf_rdma_request_context_t *context)
+{
+ gf_rdma_peer_t *peer = NULL;
+ gf_rdma_private_t *priv = NULL;
+ int32_t ret = 0;
+
+ if (context == NULL) {
+ goto out;
+ }
+
+ peer = context->peer;
+
+ __gf_rdma_deregister_mr (context->mr, context->mr_count);
+
+ priv = peer->trans->private;
+
+ if (priv->connected) {
+ ret = __gf_rdma_quota_put (peer);
+ if (ret < 0) {
+ gf_log ("rdma", GF_LOG_DEBUG,
+ "failed to send "
+ "message");
+ mem_put (context);
+ __gf_rdma_disconnect (peer->trans);
+ goto out;
+ }
+ }
+
+ if (context->iobref != NULL) {
+ iobref_unref (context->iobref);
+ context->iobref = NULL;
+ }
+
+ if (context->rsp_iobref != NULL) {
+ iobref_unref (context->rsp_iobref);
+ context->rsp_iobref = NULL;
+ }
+
+ mem_put (context);
+
+out:
+ return;
+}
+
+
+void
+gf_rdma_post_context_destroy (gf_rdma_post_context_t *ctx)
+{
+ if (ctx == NULL) {
+ goto out;
+ }
+
+ __gf_rdma_deregister_mr (ctx->mr, ctx->mr_count);
+
+ if (ctx->iobref != NULL) {
+ iobref_unref (ctx->iobref);
+ }
+
+ if (ctx->hdr_iobuf != NULL) {
+ iobuf_unref (ctx->hdr_iobuf);
+ }
+
+ memset (ctx, 0, sizeof (*ctx));
+out:
+ return;
+}
+
+
+int
+gf_rdma_post_unref (gf_rdma_post_t *post)
+{
+ int refcount = -1;
+
+ if (post == NULL) {
+ goto out;
+ }
+
+ pthread_mutex_lock (&post->lock);
+ {
+ refcount = --post->refcount;
+ }
+ pthread_mutex_unlock (&post->lock);
+
+ if (refcount == 0) {
+ gf_rdma_post_context_destroy (&post->ctx);
+ if (post->type == GF_RDMA_SEND_POST) {
+ gf_rdma_put_post (&post->device->sendq, post);
+ } else {
+ gf_rdma_post_recv (post->device->srq, post);
+ }
+ }
+out:
+ return refcount;
+}
+
+
+int
+gf_rdma_post_get_refcount (gf_rdma_post_t *post)
+{
+ int refcount = -1;
+
+ if (post == NULL) {
+ goto out;
+ }
+
+ pthread_mutex_lock (&post->lock);
+ {
+ refcount = post->refcount;
+ }
+ pthread_mutex_unlock (&post->lock);
+
+out:
+ return refcount;
+}
+
+gf_rdma_post_t *
+gf_rdma_post_ref (gf_rdma_post_t *post)
+{
+ if (post == NULL) {
+ goto out;
+ }
+
+ pthread_mutex_lock (&post->lock);
+ {
+ post->refcount++;
+ }
+ pthread_mutex_unlock (&post->lock);
+
+out:
+ return post;
+}
+
+
+int32_t
+__gf_rdma_ioq_churn_request (gf_rdma_peer_t *peer, gf_rdma_ioq_t *entry,
+ gf_rdma_post_t *post)
+{
+ gf_rdma_chunktype_t rtype = gf_rdma_noch;
+ gf_rdma_chunktype_t wtype = gf_rdma_noch;
+ uint64_t send_size = 0;
+ gf_rdma_header_t *hdr = NULL;
+ struct rpc_msg *rpc_msg = NULL;
+ uint32_t *chunkptr = NULL;
+ char *buf = NULL;
+ int32_t ret = 0;
+ gf_rdma_private_t *priv = NULL;
+ gf_rdma_device_t *device = NULL;
+ int chunk_count = 0;
+ gf_rdma_request_context_t *request_ctx = NULL;
+ uint32_t prog_payload_length = 0, len = 0;
+ struct rpc_req *rpc_req = NULL;
+
+ GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, peer, out);
+ GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, entry, out);
+ GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, post, out);
+
+ if ((entry->msg.request.rsphdr_count != 0)
+ && (entry->msg.request.rsp_payload_count != 0)) {
+ ret = -1;
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "both write-chunklist and reply-chunk cannot be "
+ "present");
+ goto out;
+ }
+
+ post->ctx.is_request = 1;
+ priv = peer->trans->private;
+ device = priv->device;
+
+ hdr = (gf_rdma_header_t *)post->buf;
+
+ send_size = iov_length (entry->rpchdr, entry->rpchdr_count)
+ + iov_length (entry->proghdr, entry->proghdr_count)
+ + GLUSTERFS_RDMA_MAX_HEADER_SIZE;
+
+ if (entry->prog_payload_count != 0) {
+ prog_payload_length
+ = iov_length (entry->prog_payload,
+ entry->prog_payload_count);
+ }
+
+ if (send_size > GLUSTERFS_RDMA_INLINE_THRESHOLD) {
+ rtype = gf_rdma_areadch;
+ } else if ((send_size + prog_payload_length)
+ < GLUSTERFS_RDMA_INLINE_THRESHOLD) {
+ rtype = gf_rdma_noch;
+ } else if (entry->prog_payload_count != 0) {
+ rtype = gf_rdma_readch;
+ }
+
+ if (entry->msg.request.rsphdr_count != 0) {
+ wtype = gf_rdma_replych;
+ } else if (entry->msg.request.rsp_payload_count != 0) {
+ wtype = gf_rdma_writech;
+ }
+
+ if (rtype == gf_rdma_readch) {
+ chunk_count += entry->prog_payload_count;
+ } else if (rtype == gf_rdma_areadch) {
+ chunk_count += entry->rpchdr_count;
+ chunk_count += entry->proghdr_count;
+ }
+
+ if (wtype == gf_rdma_writech) {
+ chunk_count += entry->msg.request.rsp_payload_count;
+ } else if (wtype == gf_rdma_replych) {
+ chunk_count += entry->msg.request.rsphdr_count;
+ }
+
+ if (chunk_count > GF_RDMA_MAX_SEGMENTS) {
+ ret = -1;
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "chunk count(%d) exceeding maximum allowed RDMA "
+ "segment count(%d)", chunk_count, GF_RDMA_MAX_SEGMENTS);
+ goto out;
+ }
+
+ if ((wtype != gf_rdma_noch) || (rtype != gf_rdma_noch)) {
+ request_ctx = mem_get (device->request_ctx_pool);
+ if (request_ctx == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ memset (request_ctx, 0, sizeof (*request_ctx));
+
+ request_ctx->pool = device->request_ctx_pool;
+ request_ctx->peer = peer;
+
+ entry->msg.request.rpc_req->conn_private = request_ctx;
+
+ if (entry->msg.request.rsp_iobref != NULL) {
+ request_ctx->rsp_iobref
+ = iobref_ref (entry->msg.request.rsp_iobref);
+ }
+ }
+
+ rpc_msg = (struct rpc_msg *) entry->rpchdr[0].iov_base;
+
+ hdr->rm_xid = rpc_msg->rm_xid; /* no need of hton32(rpc_msg->rm_xid),
+ * since rpc_msg->rm_xid is already
+ * hton32ed value of actual xid
+ */
+ hdr->rm_vers = hton32 (GF_RDMA_VERSION);
+ hdr->rm_credit = hton32 (peer->send_count);
+ if (rtype == gf_rdma_areadch) {
+ hdr->rm_type = hton32 (GF_RDMA_NOMSG);
+ } else {
+ hdr->rm_type = hton32 (GF_RDMA_MSG);
+ }
+
+ chunkptr = &hdr->rm_body.rm_chunks[0];
+ if (rtype != gf_rdma_noch) {
+ ret = __gf_rdma_create_read_chunks (peer, entry, rtype,
+ &chunkptr,
+ request_ctx);
+ if (ret != 0) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "creation of read chunks failed");
+ goto out;
+ }
+ } else {
+ *chunkptr++ = 0; /* no read chunks */
+ }
+
+ if (wtype != gf_rdma_noch) {
+ ret = __gf_rdma_create_write_chunks (peer, entry, wtype,
+ &chunkptr,
+ request_ctx);
+ if (ret != 0) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "creation of write/reply chunk failed");
+ goto out;
+ }
+ } else {
+ *chunkptr++ = 0; /* no write chunks */
+ *chunkptr++ = 0; /* no reply chunk */
+ }
+
+ buf = (char *)chunkptr;
+
+ if (rtype != gf_rdma_areadch) {
+ iov_unload (buf, entry->rpchdr, entry->rpchdr_count);
+ buf += iov_length (entry->rpchdr, entry->rpchdr_count);
+
+ iov_unload (buf, entry->proghdr, entry->proghdr_count);
+ buf += iov_length (entry->proghdr, entry->proghdr_count);
+
+ if (rtype != gf_rdma_readch) {
+ iov_unload (buf, entry->prog_payload,
+ entry->prog_payload_count);
+ buf += iov_length (entry->prog_payload,
+ entry->prog_payload_count);
+ }
+ }
+
+ len = buf - post->buf;
+
+ gf_rdma_post_ref (post);
+
+ ret = gf_rdma_post_send (peer->qp, post, len);
+ if (!ret) {
+ ret = len;
+ } else {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "gf_rdma_post_send (to %s) failed with ret = %d (%s)",
+ peer->trans->peerinfo.identifier, ret,
+ (ret > 0) ? strerror (ret) : "");
+ gf_rdma_post_unref (post);
+ __gf_rdma_disconnect (peer->trans);
+ ret = -1;
+ }
+
+out:
+ if (ret == -1) {
+ rpc_req = entry->msg.request.rpc_req;
+
+ if (request_ctx != NULL) {
+ __gf_rdma_request_context_destroy (rpc_req->conn_private);
+ }
+
+ rpc_req->conn_private = NULL;
+ }
+
+ return ret;
+}
+
+
+static inline void
+__gf_rdma_fill_reply_header (gf_rdma_header_t *header, struct iovec *rpchdr,
+ gf_rdma_reply_info_t *reply_info, int credits)
+{
+ struct rpc_msg *rpc_msg = NULL;
+
+ if (reply_info != NULL) {
+ header->rm_xid = hton32 (reply_info->rm_xid);
+ } else {
+ rpc_msg = rpchdr[0].iov_base; /* assume rpchdr contains
+ * only one vector.
+ * (which is true)
+ */
+ header->rm_xid = rpc_msg->rm_xid;
+ }
+
+ header->rm_type = hton32 (GF_RDMA_MSG);
+ header->rm_vers = hton32 (GF_RDMA_VERSION);
+ header->rm_credit = hton32 (credits);
+
+ header->rm_body.rm_chunks[0] = 0; /* no read chunks */
+ header->rm_body.rm_chunks[1] = 0; /* no write chunks */
+ header->rm_body.rm_chunks[2] = 0; /* no reply chunks */
+
+ return;
+}
+
+
+int32_t
+__gf_rdma_send_reply_inline (gf_rdma_peer_t *peer, gf_rdma_ioq_t *entry,
+ gf_rdma_post_t *post,
+ gf_rdma_reply_info_t *reply_info)
+{
+ gf_rdma_header_t *header = NULL;
+ int32_t send_size = 0, ret = 0;
+ char *buf = NULL;
+
+ send_size = iov_length (entry->rpchdr, entry->rpchdr_count)
+ + iov_length (entry->proghdr, entry->proghdr_count)
+ + iov_length (entry->prog_payload, entry->prog_payload_count)
+ + sizeof (gf_rdma_header_t); /*
+ * remember, no chunklists in the
+ * reply
+ */
+
+ if (send_size > GLUSTERFS_RDMA_INLINE_THRESHOLD) {
+ ret = __gf_rdma_send_error (peer, entry, post, reply_info,
+ ERR_CHUNK);
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "msg size (%d) is greater than maximum size "
+ "of msg that can be sent inlined (%d)",
+ send_size, GLUSTERFS_RDMA_INLINE_THRESHOLD);
+ goto out;
+ }
+
+ header = (gf_rdma_header_t *)post->buf;
+
+ __gf_rdma_fill_reply_header (header, entry->rpchdr, reply_info,
+ peer->send_count);
+
+ buf = (char *)&header->rm_body.rm_chunks[3];
+
+ if (entry->rpchdr_count != 0) {
+ iov_unload (buf, entry->rpchdr, entry->rpchdr_count);
+ buf += iov_length (entry->rpchdr, entry->rpchdr_count);
+ }
+
+ if (entry->proghdr_count != 0) {
+ iov_unload (buf, entry->proghdr, entry->proghdr_count);
+ buf += iov_length (entry->proghdr, entry->proghdr_count);
+ }
+
+ if (entry->prog_payload_count != 0) {
+ iov_unload (buf, entry->prog_payload,
+ entry->prog_payload_count);
+ buf += iov_length (entry->prog_payload,
+ entry->prog_payload_count);
+ }
+
+ gf_rdma_post_ref (post);
+
+ ret = gf_rdma_post_send (peer->qp, post, (buf - post->buf));
+ if (!ret) {
+ ret = send_size;
+ } else {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "posting send (to %s) failed with ret = %d (%s)",
+ peer->trans->peerinfo.identifier, ret,
+ (ret > 0) ? strerror (ret) : "");
+ gf_rdma_post_unref (post);
+ __gf_rdma_disconnect (peer->trans);
+ ret = -1;
+ }
+
+out:
+ return ret;
+}
+
+
+int32_t
+__gf_rdma_reply_encode_write_chunks (gf_rdma_peer_t *peer,
+ uint32_t payload_size,
+ gf_rdma_post_t *post,
+ gf_rdma_reply_info_t *reply_info,
+ uint32_t **ptr)
+{
+ uint32_t chunk_size = 0;
+ int32_t ret = -1;
+ gf_rdma_write_array_t *target_array = NULL;
+ int i = 0;
+
+ target_array = (gf_rdma_write_array_t *)*ptr;
+
+ for (i = 0; i < reply_info->wc_array->wc_nchunks; i++) {
+ chunk_size +=
+ reply_info->wc_array->wc_array[i].wc_target.rs_length;
+ }
+
+ if (chunk_size < payload_size) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG,
+ "length of payload (%d) is exceeding the total "
+ "write chunk length (%d)", payload_size, chunk_size);
+ goto out;
+ }
+
+ target_array->wc_discrim = hton32 (1);
+ for (i = 0; (i < reply_info->wc_array->wc_nchunks)
+ && (payload_size != 0);
+ i++) {
+ target_array->wc_array[i].wc_target.rs_offset
+ = hton64 (reply_info->wc_array->wc_array[i].wc_target.rs_offset);
+
+ target_array->wc_array[i].wc_target.rs_length
+ = hton32 (min (payload_size,
+ reply_info->wc_array->wc_array[i].wc_target.rs_length));
+ }
+
+ target_array->wc_nchunks = hton32 (i);
+ target_array->wc_array[i].wc_target.rs_handle = 0; /* terminate
+ chunklist */
+
+ ret = 0;
+
+ *ptr = &target_array->wc_array[i].wc_target.rs_length;
+out:
+ return ret;
+}
+
+
+inline int32_t
+__gf_rdma_register_local_mr_for_rdma (gf_rdma_peer_t *peer,
+ struct iovec *vector, int count,
+ gf_rdma_post_context_t *ctx)
+{
+ int i = 0;
+ int32_t ret = -1;
+ gf_rdma_private_t *priv = NULL;
+ gf_rdma_device_t *device = NULL;
+
+ GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, ctx, out);
+ GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, vector, out);
+
+ priv = peer->trans->private;
+ device = priv->device;
+
+ for (i = 0; i < count; i++) {
+ /* what if the memory is registered more than once?
+ * Assume that a single write buffer is passed to afr, which
+ * then passes it to its children. If more than one children
+ * happen to use rdma, then the buffer is registered more than
+ * once.
+ * Ib-verbs specification says that multiple registrations of
+ * same memory location is allowed. Refer to 10.6.3.8 of
+ * Infiniband Architecture Specification Volume 1
+ * (Release 1.2.1)
+ */
+ ctx->mr[ctx->mr_count] = ibv_reg_mr (device->pd,
+ vector[i].iov_base,
+ vector[i].iov_len,
+ IBV_ACCESS_LOCAL_WRITE);
+ if (ctx->mr[ctx->mr_count] == NULL) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "registering memory for IBV_ACCESS_LOCAL_WRITE "
+ "failed (%s)", strerror (errno));
+ goto out;
+ }
+
+ ctx->mr_count++;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/* 1. assumes xfer_len of data is pointed by vector(s) starting from vec[*idx]
+ * 2. modifies vec
+ */
+int32_t
+__gf_rdma_write (gf_rdma_peer_t *peer, gf_rdma_post_t *post, struct iovec *vec,
+ uint32_t xfer_len, int *idx, gf_rdma_write_chunk_t *writech)
+{
+ int size = 0, num_sge = 0, i = 0;
+ int32_t ret = -1;
+ struct ibv_sge *sg_list = NULL;
+ struct ibv_send_wr wr = {
+ .opcode = IBV_WR_RDMA_WRITE,
+ .send_flags = IBV_SEND_SIGNALED,
+ }, *bad_wr;
+
+ if ((peer == NULL) || (writech == NULL) || (idx == NULL)
+ || (post == NULL) || (vec == NULL) || (xfer_len == 0)) {
+ goto out;
+ }
+
+ for (i = *idx; size < xfer_len; i++) {
+ size += vec[i].iov_len;
+ }
+
+ num_sge = i - *idx;
+
+ sg_list = GF_CALLOC (num_sge, sizeof (struct ibv_sge),
+ gf_common_mt_sge);
+ if (sg_list == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ for ((i = *idx), (num_sge = 0); (xfer_len != 0); i++, num_sge++) {
+ size = min (xfer_len, vec[i].iov_len);
+
+ sg_list [num_sge].addr = (unsigned long)vec[i].iov_base;
+ sg_list [num_sge].length = size;
+ sg_list [num_sge].lkey = post->ctx.mr[i]->lkey;
+
+ xfer_len -= size;
+ }
+
+ *idx = i;
+
+ if (size < vec[i - 1].iov_len) {
+ vec[i - 1].iov_base += size;
+ vec[i - 1].iov_len -= size;
+ *idx = i - 1;
+ }
+
+ wr.sg_list = sg_list;
+ wr.num_sge = num_sge;
+ wr.wr_id = (unsigned long) gf_rdma_post_ref (post);
+ wr.wr.rdma.rkey = writech->wc_target.rs_handle;
+ wr.wr.rdma.remote_addr = writech->wc_target.rs_offset;
+
+ ret = ibv_post_send(peer->qp, &wr, &bad_wr);
+ if (ret) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "rdma write to "
+ "client (%s) failed with ret = %d (%s)",
+ peer->trans->peerinfo.identifier, ret,
+ (ret > 0) ? strerror (ret) : "");
+ ret = -1;
+ }
+
+ GF_FREE (sg_list);
+out:
+ return ret;
+}
+
+
+int32_t
+__gf_rdma_do_gf_rdma_write (gf_rdma_peer_t *peer, gf_rdma_post_t *post,
+ struct iovec *vector, int count,
+ struct iobref *iobref,
+ gf_rdma_reply_info_t *reply_info)
+{
+ int i = 0, payload_idx = 0;
+ uint32_t payload_size = 0, xfer_len = 0;
+ int32_t ret = -1;
+
+ if (count != 0) {
+ payload_size = iov_length (vector, count);
+ }
+
+ if (payload_size == 0) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = __gf_rdma_register_local_mr_for_rdma (peer, vector, count,
+ &post->ctx);
+ if (ret == -1) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "registering memory region for rdma failed");
+ goto out;
+ }
+
+ post->ctx.iobref = iobref_ref (iobref);
+
+ for (i = 0; (i < reply_info->wc_array->wc_nchunks)
+ && (payload_size != 0);
+ i++) {
+ xfer_len = min (payload_size,
+ reply_info->wc_array->wc_array[i].wc_target.rs_length);
+
+ ret = __gf_rdma_write (peer, post, vector, xfer_len,
+ &payload_idx,
+ &reply_info->wc_array->wc_array[i]);
+ if (ret == -1) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "rdma write to client (%s) failed",
+ peer->trans->peerinfo.identifier);
+ goto out;
+ }
+
+ payload_size -= xfer_len;
+ }
+
+ ret = 0;
+out:
+
+ return ret;
+}
+
+
+int32_t
+__gf_rdma_send_reply_type_nomsg (gf_rdma_peer_t *peer, gf_rdma_ioq_t *entry,
+ gf_rdma_post_t *post,
+ gf_rdma_reply_info_t *reply_info)
+{
+ gf_rdma_header_t *header = NULL;
+ char *buf = NULL;
+ uint32_t payload_size = 0;
+ int count = 0, i = 0;
+ int32_t ret = 0;
+ struct iovec vector[MAX_IOVEC];
+
+ header = (gf_rdma_header_t *)post->buf;
+
+ __gf_rdma_fill_reply_header (header, entry->rpchdr, reply_info,
+ peer->send_count);
+
+ header->rm_type = hton32 (GF_RDMA_NOMSG);
+
+ payload_size = iov_length (entry->rpchdr, entry->rpchdr_count) +
+ iov_length (entry->proghdr, entry->proghdr_count);
+
+ /* encode reply chunklist */
+ buf = (char *)&header->rm_body.rm_chunks[2];
+ ret = __gf_rdma_reply_encode_write_chunks (peer, payload_size, post,
+ reply_info,
+ (uint32_t **)&buf);
+ if (ret == -1) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "encoding write chunks failed");
+ ret = __gf_rdma_send_error (peer, entry, post, reply_info,
+ ERR_CHUNK);
+ goto out;
+ }
+
+ gf_rdma_post_ref (post);
+
+ for (i = 0; i < entry->rpchdr_count; i++) {
+ vector[count++] = entry->rpchdr[i];
+ }
+
+ for (i = 0; i < entry->proghdr_count; i++) {
+ vector[count++] = entry->proghdr[i];
+ }
+
+ ret = __gf_rdma_do_gf_rdma_write (peer, post, vector, count,
+ entry->iobref, reply_info);
+ if (ret == -1) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "rdma write to peer (%s) failed",
+ peer->trans->peerinfo.identifier);
+ gf_rdma_post_unref (post);
+ goto out;
+ }
+
+ ret = gf_rdma_post_send (peer->qp, post, (buf - post->buf));
+ if (ret) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "posting a send request to client (%s) failed with "
+ "ret = %d (%s)", peer->trans->peerinfo.identifier, ret,
+ (ret > 0) ? strerror (ret) : "");
+ ret = -1;
+ gf_rdma_post_unref (post);
+ } else {
+ ret = payload_size;
+ }
+
+out:
+ return ret;
+}
+
+
+int32_t
+__gf_rdma_send_reply_type_msg (gf_rdma_peer_t *peer, gf_rdma_ioq_t *entry,
+ gf_rdma_post_t *post,
+ gf_rdma_reply_info_t *reply_info)
+{
+ gf_rdma_header_t *header = NULL;
+ int32_t send_size = 0, ret = 0;
+ char *ptr = NULL;
+ uint32_t payload_size = 0;
+
+ send_size = iov_length (entry->rpchdr, entry->rpchdr_count)
+ + iov_length (entry->proghdr, entry->proghdr_count)
+ + GLUSTERFS_RDMA_MAX_HEADER_SIZE;
+
+ if (send_size > GLUSTERFS_RDMA_INLINE_THRESHOLD) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "client has provided only write chunks, but the "
+ "combined size of rpc and program header (%d) is "
+ "exceeding the size of msg that can be sent using "
+ "RDMA send (%d)", send_size,
+ GLUSTERFS_RDMA_INLINE_THRESHOLD);
+
+ ret = __gf_rdma_send_error (peer, entry, post, reply_info,
+ ERR_CHUNK);
+ goto out;
+ }
+
+ header = (gf_rdma_header_t *)post->buf;
+
+ __gf_rdma_fill_reply_header (header, entry->rpchdr, reply_info,
+ peer->send_count);
+
+ payload_size = iov_length (entry->prog_payload,
+ entry->prog_payload_count);
+ ptr = (char *)&header->rm_body.rm_chunks[1];
+
+ ret = __gf_rdma_reply_encode_write_chunks (peer, payload_size, post,
+ reply_info,
+ (uint32_t **)&ptr);
+ if (ret == -1) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "encoding write chunks failed");
+ ret = __gf_rdma_send_error (peer, entry, post, reply_info,
+ ERR_CHUNK);
+ goto out;
+ }
+
+ *(uint32_t *)ptr = 0; /* terminate reply chunklist */
+ ptr += sizeof (uint32_t);
+
+ gf_rdma_post_ref (post);
+
+ ret = __gf_rdma_do_gf_rdma_write (peer, post, entry->prog_payload,
+ entry->prog_payload_count,
+ entry->iobref, reply_info);
+ if (ret == -1) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, "rdma write to peer "
+ "(%s) failed", peer->trans->peerinfo.identifier);
+ gf_rdma_post_unref (post);
+ goto out;
+ }
+
+ iov_unload (ptr, entry->rpchdr, entry->rpchdr_count);
+ ptr += iov_length (entry->rpchdr, entry->rpchdr_count);
+
+ iov_unload (ptr, entry->proghdr, entry->proghdr_count);
+ ptr += iov_length (entry->proghdr, entry->proghdr_count);
+
+ ret = gf_rdma_post_send (peer->qp, post, (ptr - post->buf));
+ if (ret) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "rdma send to client (%s) failed with ret = %d (%s)",
+ peer->trans->peerinfo.identifier, ret,
+ (ret > 0) ? strerror (ret) : "");
+ gf_rdma_post_unref (post);
+ ret = -1;
+ } else {
+ ret = send_size + payload_size;
+ }
+
+out:
+ return ret;
+}
+
+
+void
+gf_rdma_reply_info_destroy (gf_rdma_reply_info_t *reply_info)
+{
+ if (reply_info == NULL) {
+ goto out;
+ }
+
+ if (reply_info->wc_array != NULL) {
+ GF_FREE (reply_info->wc_array);
+ reply_info->wc_array = NULL;
+ }
+
+ mem_put (reply_info);
+out:
+ return;
+}
+
+
+gf_rdma_reply_info_t *
+gf_rdma_reply_info_alloc (gf_rdma_peer_t *peer)
+{
+ gf_rdma_reply_info_t *reply_info = NULL;
+ gf_rdma_private_t *priv = NULL;
+
+ priv = peer->trans->private;
+
+ reply_info = mem_get (priv->device->reply_info_pool);
+ if (reply_info == NULL) {
+ goto out;
+ }
+
+ memset (reply_info, 0, sizeof (*reply_info));
+ reply_info->pool = priv->device->reply_info_pool;
+
+out:
+ return reply_info;
+}
+
+
+int32_t
+__gf_rdma_ioq_churn_reply (gf_rdma_peer_t *peer, gf_rdma_ioq_t *entry,
+ gf_rdma_post_t *post)
+{
+ gf_rdma_reply_info_t *reply_info = NULL;
+ int32_t ret = -1;
+ gf_rdma_chunktype_t type = gf_rdma_noch;
+
+ GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, peer, out);
+ GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, entry, out);
+ GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, post, out);
+
+ reply_info = entry->msg.reply_info;
+ if (reply_info != NULL) {
+ type = reply_info->type;
+ }
+
+ switch (type) {
+ case gf_rdma_noch:
+ ret = __gf_rdma_send_reply_inline (peer, entry, post,
+ reply_info);
+ if (ret < 0) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "failed to send reply to peer (%s) as an "
+ "inlined rdma msg",
+ peer->trans->peerinfo.identifier);
+ }
+ break;
+
+ case gf_rdma_replych:
+ ret = __gf_rdma_send_reply_type_nomsg (peer, entry, post,
+ reply_info);
+ if (ret < 0) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "failed to send reply to peer (%s) as "
+ "RDMA_NOMSG", peer->trans->peerinfo.identifier);
+ }
+ break;
+
+ case gf_rdma_writech:
+ ret = __gf_rdma_send_reply_type_msg (peer, entry, post,
+ reply_info);
+ if (ret < 0) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "failed to send reply with write chunks "
+ "to peer (%s)",
+ peer->trans->peerinfo.identifier);
+ }
+ break;
+
+ default:
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "invalid chunktype (%d) specified for sending reply "
+ " (peer:%s)", type, peer->trans->peerinfo.identifier);
+ break;
+ }
+
+ if (reply_info != NULL) {
+ gf_rdma_reply_info_destroy (reply_info);
+ }
+out:
+ return ret;
+}
+
+
+int32_t
+__gf_rdma_ioq_churn_entry (gf_rdma_peer_t *peer, gf_rdma_ioq_t *entry)
+{
+ int32_t ret = 0, quota = 0;
+ gf_rdma_private_t *priv = NULL;
+ gf_rdma_device_t *device = NULL;
+ gf_rdma_options_t *options = NULL;
+ gf_rdma_post_t *post = NULL;
+
+ priv = peer->trans->private;
+ options = &priv->options;
+ device = priv->device;
+
+ quota = __gf_rdma_quota_get (peer);
+ if (quota > 0) {
+ post = gf_rdma_get_post (&device->sendq);
+ if (post == NULL) {
+ post = gf_rdma_new_post (peer->trans, device,
+ (options->send_size + 2048),
+ GF_RDMA_SEND_POST);
+ }
+
+ if (post == NULL) {
+ ret = -1;
+ gf_log_callingfn (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "not able to get a post to send msg");
+ goto out;
+ }
+
+ if (entry->is_request) {
+ ret = __gf_rdma_ioq_churn_request (peer, entry, post);
+ if (ret < 0) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "failed to process request ioq entry "
+ "to peer(%s)",
+ peer->trans->peerinfo.identifier);
+ }
+ } else {
+ ret = __gf_rdma_ioq_churn_reply (peer, entry, post);
+ if (ret < 0) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "failed to process reply ioq entry "
+ "to peer (%s)",
+ peer->trans->peerinfo.identifier);
+ }
+ }
+
+ if (ret != 0) {
+ __gf_rdma_ioq_entry_free (entry);
+ }
+ } else {
+ ret = 0;
+ }
+
+out:
+ return ret;
+}
+
+
+static int32_t
+__gf_rdma_ioq_churn (gf_rdma_peer_t *peer)
+{
+ gf_rdma_ioq_t *entry = NULL;
+ int32_t ret = 0;
+
+ while (!list_empty (&peer->ioq))
+ {
+ /* pick next entry */
+ entry = peer->ioq_next;
+
+ ret = __gf_rdma_ioq_churn_entry (peer, entry);
+
+ if (ret <= 0)
+ break;
+ }
+
+ /*
+ list_for_each_entry_safe (entry, dummy, &peer->ioq, list) {
+ ret = __gf_rdma_ioq_churn_entry (peer, entry);
+ if (ret <= 0) {
+ break;
+ }
+ }
+ */
+
+ return ret;
+}
+
+
+static int32_t
+gf_rdma_writev (rpc_transport_t *this, gf_rdma_ioq_t *entry)
+{
+ int32_t ret = 0, need_append = 1;
+ gf_rdma_private_t *priv = NULL;
+ gf_rdma_peer_t *peer = NULL;
+
+ priv = this->private;
+ pthread_mutex_lock (&priv->write_mutex);
+ {
+ if (!priv->connected) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "rdma is not connected to peer (%s)",
+ this->peerinfo.identifier);
+ ret = -1;
+ goto unlock;
+ }
+
+ peer = &priv->peer;
+ if (list_empty (&peer->ioq)) {
+ ret = __gf_rdma_ioq_churn_entry (peer, entry);
+ if (ret != 0) {
+ need_append = 0;
+
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "processing ioq entry destined "
+ "to (%s) failed",
+ this->peerinfo.identifier);
+ }
+ }
+ }
+
+ if (need_append) {
+ list_add_tail (&entry->list, &peer->ioq);
+ }
+ }
+unlock:
+ pthread_mutex_unlock (&priv->write_mutex);
+ return ret;
+}
+
+
+gf_rdma_ioq_t *
+gf_rdma_ioq_new (rpc_transport_t *this, rpc_transport_data_t *data)
+{
+ gf_rdma_ioq_t *entry = NULL;
+ int count = 0, i = 0;
+ rpc_transport_msg_t *msg = NULL;
+ gf_rdma_private_t *priv = NULL;
+
+ if ((data == NULL) || (this == NULL)) {
+ goto out;
+ }
+
+ priv = this->private;
+
+ entry = mem_get (priv->device->ioq_pool);
+ if (entry == NULL) {
+ goto out;
+ }
+ memset (entry, 0, sizeof (*entry));
+ entry->pool = priv->device->ioq_pool;
+
+ if (data->is_request) {
+ msg = &data->data.req.msg;
+ if (data->data.req.rsp.rsphdr_count != 0) {
+ for (i = 0; i < data->data.req.rsp.rsphdr_count; i++) {
+ entry->msg.request.rsphdr_vec[i]
+ = data->data.req.rsp.rsphdr[i];
+ }
+
+ entry->msg.request.rsphdr_count =
+ data->data.req.rsp.rsphdr_count;
+ }
+
+ if (data->data.req.rsp.rsp_payload_count != 0) {
+ for (i = 0; i < data->data.req.rsp.rsp_payload_count;
+ i++) {
+ entry->msg.request.rsp_payload[i]
+ = data->data.req.rsp.rsp_payload[i];
+ }
+
+ entry->msg.request.rsp_payload_count =
+ data->data.req.rsp.rsp_payload_count;
+ }
+
+ entry->msg.request.rpc_req = data->data.req.rpc_req;
+
+ if (data->data.req.rsp.rsp_iobref != NULL) {
+ entry->msg.request.rsp_iobref
+ = iobref_ref (data->data.req.rsp.rsp_iobref);
+ }
+ } else {
+ msg = &data->data.reply.msg;
+ entry->msg.reply_info = data->data.reply.private;
+ }
+
+ entry->is_request = data->is_request;
+
+ count = msg->rpchdrcount + msg->proghdrcount + msg->progpayloadcount;
+
+ GF_ASSERT (count <= MAX_IOVEC);
+
+ if (msg->rpchdr != NULL) {
+ memcpy (&entry->rpchdr[0], msg->rpchdr,
+ sizeof (struct iovec) * msg->rpchdrcount);
+ entry->rpchdr_count = msg->rpchdrcount;
+ }
+
+ if (msg->proghdr != NULL) {
+ memcpy (&entry->proghdr[0], msg->proghdr,
+ sizeof (struct iovec) * msg->proghdrcount);
+ entry->proghdr_count = msg->proghdrcount;
+ }
+
+ if (msg->progpayload != NULL) {
+ memcpy (&entry->prog_payload[0], msg->progpayload,
+ sizeof (struct iovec) * msg->progpayloadcount);
+ entry->prog_payload_count = msg->progpayloadcount;
+ }
+
+ if (msg->iobref != NULL) {
+ entry->iobref = iobref_ref (msg->iobref);
+ }
+
+ INIT_LIST_HEAD (&entry->list);
+
+out:
+ return entry;
+}
+
+
+int32_t
+gf_rdma_submit_request (rpc_transport_t *this, rpc_transport_req_t *req)
+{
+ int32_t ret = 0;
+ gf_rdma_ioq_t *entry = NULL;
+ rpc_transport_data_t data = {0, };
+
+ if (req == NULL) {
+ goto out;
+ }
+
+ data.is_request = 1;
+ data.data.req = *req;
+
+ entry = gf_rdma_ioq_new (this, &data);
+ if (entry == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "getting a new ioq entry failed (peer:%s)",
+ this->peerinfo.identifier);
+ goto out;
+ }
+
+ ret = gf_rdma_writev (this, entry);
+
+ if (ret > 0) {
+ ret = 0;
+ } else if (ret < 0) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "sending request to peer (%s) failed",
+ this->peerinfo.identifier);
+ rpc_transport_disconnect (this);
+ }
+
+out:
+ return ret;
+}
+
+int32_t
+gf_rdma_submit_reply (rpc_transport_t *this, rpc_transport_reply_t *reply)
+{
+ int32_t ret = 0;
+ gf_rdma_ioq_t *entry = NULL;
+ rpc_transport_data_t data = {0, };
+
+ if (reply == NULL) {
+ goto out;
+ }
+
+ data.data.reply = *reply;
+
+ entry = gf_rdma_ioq_new (this, &data);
+ if (entry == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "getting a new ioq entry failed (peer:%s)",
+ this->peerinfo.identifier);
+ goto out;
+ }
+
+ ret = gf_rdma_writev (this, entry);
+ if (ret > 0) {
+ ret = 0;
+ } else if (ret < 0) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "sending request to peer (%s) failed",
+ this->peerinfo.identifier);
+ rpc_transport_disconnect (this);
+ }
+
+out:
+ return ret;
+}
+
+
+static int
+gf_rdma_register_peer (gf_rdma_device_t *device, int32_t qp_num,
+ gf_rdma_peer_t *peer)
+{
+ struct _qpent *ent = NULL;
+ gf_rdma_qpreg_t *qpreg = NULL;
+ int32_t hash = 0;
+ int ret = -1;
+
+ qpreg = &device->qpreg;
+ hash = qp_num % 42;
+
+ pthread_mutex_lock (&qpreg->lock);
+ {
+ ent = qpreg->ents[hash].next;
+ while ((ent != &qpreg->ents[hash]) && (ent->qp_num != qp_num)) {
+ ent = ent->next;
+ }
+
+ if (ent->qp_num == qp_num) {
+ ret = 0;
+ goto unlock;
+ }
+
+ ent = (struct _qpent *) GF_CALLOC (1, sizeof (*ent),
+ gf_common_mt_qpent);
+ if (ent == NULL) {
+ goto unlock;
+ }
+
+ /* TODO: ref reg->peer */
+ ent->peer = peer;
+ ent->next = &qpreg->ents[hash];
+ ent->prev = ent->next->prev;
+ ent->next->prev = ent;
+ ent->prev->next = ent;
+ ent->qp_num = qp_num;
+ qpreg->count++;
+ ret = 0;
+ }
+unlock:
+ pthread_mutex_unlock (&qpreg->lock);
+
+ return ret;
+}
+
+
+static void
+gf_rdma_unregister_peer (gf_rdma_device_t *device, int32_t qp_num)
+{
+ struct _qpent *ent = NULL;
+ gf_rdma_qpreg_t *qpreg = NULL;
+ int32_t hash = 0;
+
+ qpreg = &device->qpreg;
+ hash = qp_num % 42;
+
+ pthread_mutex_lock (&qpreg->lock);
+ {
+ ent = qpreg->ents[hash].next;
+ while ((ent != &qpreg->ents[hash]) && (ent->qp_num != qp_num))
+ ent = ent->next;
+ if (ent->qp_num != qp_num) {
+ pthread_mutex_unlock (&qpreg->lock);
+ return;
+ }
+ ent->prev->next = ent->next;
+ ent->next->prev = ent->prev;
+ /* TODO: unref reg->peer */
+ GF_FREE (ent);
+ qpreg->count--;
+ }
+ pthread_mutex_unlock (&qpreg->lock);
+}
+
+
+static gf_rdma_peer_t *
+__gf_rdma_lookup_peer (gf_rdma_device_t *device, int32_t qp_num)
+{
+ struct _qpent *ent = NULL;
+ gf_rdma_peer_t *peer = NULL;
+ gf_rdma_qpreg_t *qpreg = NULL;
+ int32_t hash = 0;
+
+ qpreg = &device->qpreg;
+ hash = qp_num % 42;
+ ent = qpreg->ents[hash].next;
+ while ((ent != &qpreg->ents[hash]) && (ent->qp_num != qp_num))
+ ent = ent->next;
+
+ if (ent != &qpreg->ents[hash]) {
+ peer = ent->peer;
+ }
+
+ return peer;
+}
+
+
+static void
+__gf_rdma_destroy_qp (rpc_transport_t *this)
+{
+ gf_rdma_private_t *priv = NULL;
+
+ priv = this->private;
+ if (priv->peer.qp) {
+ gf_rdma_unregister_peer (priv->device, priv->peer.qp->qp_num);
+ rdma_destroy_qp (priv->peer.cm_id);
+ }
+ priv->peer.qp = NULL;
+
+ return;
+}
+
+
+static int32_t
+gf_rdma_create_qp (rpc_transport_t *this)
+{
+ gf_rdma_private_t *priv = NULL;
+ gf_rdma_device_t *device = NULL;
+ int32_t ret = 0;
+ gf_rdma_peer_t *peer = NULL;
+ char *device_name = NULL;
+
+ priv = this->private;
+
+ peer = &priv->peer;
+
+ device_name = (char *)ibv_get_device_name (peer->cm_id->verbs->device);
+ if (device_name == NULL) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_WARNING, "cannot get device_name");
+ goto out;
+ }
+
+ device = gf_rdma_get_device (this, peer->cm_id->verbs,
+ device_name);
+ if (device == NULL) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_WARNING, "cannot get device for "
+ "device %s", device_name);
+ goto out;
+ }
+
+ if (priv->device == NULL) {
+ priv->device = device;
+ }
+
+ struct ibv_qp_init_attr init_attr = {
+ .send_cq = device->send_cq,
+ .recv_cq = device->recv_cq,
+ .srq = device->srq,
+ .cap = {
+ .max_send_wr = peer->send_count,
+ .max_recv_wr = peer->recv_count,
+ .max_send_sge = 2,
+ .max_recv_sge = 1
+ },
+ .qp_type = IBV_QPT_RC
+ };
+
+ ret = rdma_create_qp(peer->cm_id, device->pd, &init_attr);
+ if (ret != 0) {
+ gf_log (peer->trans->name, GF_LOG_CRITICAL,
+ "%s: could not create QP (%s)", this->name,
+ strerror (errno));
+ ret = -1;
+ goto out;
+ }
+
+ peer->qp = peer->cm_id->qp;
+
+ ret = gf_rdma_register_peer (device, peer->qp->qp_num, peer);
+
+out:
+ if (ret == -1)
+ __gf_rdma_destroy_qp (this);
+
+ return ret;
+}
+
+
+static int32_t
+__gf_rdma_teardown (rpc_transport_t *this)
+{
+ gf_rdma_private_t *priv = NULL;
+ gf_rdma_peer_t *peer = NULL;
+
+ priv = this->private;
+ peer = &priv->peer;
+
+ if (peer->cm_id->qp != NULL) {
+ __gf_rdma_destroy_qp (this);
+ }
+
+ if (!list_empty (&priv->peer.ioq)) {
+ __gf_rdma_ioq_flush (peer);
+ }
+
+ if (peer->cm_id != NULL) {
+ rdma_destroy_id (peer->cm_id);
+ peer->cm_id = NULL;
+ }
+
+ /* TODO: decrement cq size */
+ return 0;
+}
+
+
+static int32_t
+gf_rdma_teardown (rpc_transport_t *this)
+{
+ int32_t ret = 0;
+ gf_rdma_private_t *priv = NULL;
+
+ if (this == NULL) {
+ goto out;
+ }
+
+ priv = this->private;
+
+ pthread_mutex_lock (&priv->write_mutex);
+ {
+ ret = __gf_rdma_teardown (this);
+ }
+ pthread_mutex_unlock (&priv->write_mutex);
+
+out:
+ return ret;
+}
+
+
+/*
+ * allocates new memory to hold write-chunklist. New memory is needed since
+ * write-chunklist will be used while sending reply and the post holding initial
+ * write-chunklist sent from client will be put back to srq before a pollin
+ * event is sent to upper layers.
+ */
+int32_t
+gf_rdma_get_write_chunklist (char **ptr, gf_rdma_write_array_t **write_ary)
+{
+ gf_rdma_write_array_t *from = NULL, *to = NULL;
+ int32_t ret = -1, size = 0, i = 0;
+
+ from = (gf_rdma_write_array_t *) *ptr;
+ if (from->wc_discrim == 0) {
+ ret = 0;
+ goto out;
+ }
+
+ from->wc_nchunks = ntoh32 (from->wc_nchunks);
+
+ size = sizeof (*from)
+ + (sizeof (gf_rdma_write_chunk_t) * from->wc_nchunks);
+
+ to = GF_CALLOC (1, size, gf_common_mt_char);
+ if (to == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ to->wc_discrim = ntoh32 (from->wc_discrim);
+ to->wc_nchunks = from->wc_nchunks;
+
+ for (i = 0; i < to->wc_nchunks; i++) {
+ to->wc_array[i].wc_target.rs_handle
+ = ntoh32 (from->wc_array[i].wc_target.rs_handle);
+ to->wc_array[i].wc_target.rs_length
+ = ntoh32 (from->wc_array[i].wc_target.rs_length);
+ to->wc_array[i].wc_target.rs_offset
+ = ntoh64 (from->wc_array[i].wc_target.rs_offset);
+ }
+
+ *write_ary = to;
+ ret = 0;
+ *ptr = (char *)&from->wc_array[i].wc_target.rs_handle;
+out:
+ return ret;
+}
+
+
+/*
+ * does not allocate new memory to hold read-chunklist. New memory is not
+ * needed, since post is not put back to srq till we've completed all the
+ * rdma-reads and hence readchunk-list can point to memory held by post.
+ */
+int32_t
+gf_rdma_get_read_chunklist (char **ptr, gf_rdma_read_chunk_t **readch)
+{
+ int32_t ret = -1;
+ gf_rdma_read_chunk_t *chunk = NULL;
+ int i = 0;
+
+ chunk = (gf_rdma_read_chunk_t *)*ptr;
+ if (chunk[0].rc_discrim == 0) {
+ ret = 0;
+ goto out;
+ }
+
+ for (i = 0; chunk[i].rc_discrim != 0; i++) {
+ chunk[i].rc_discrim = ntoh32 (chunk[i].rc_discrim);
+ chunk[i].rc_position = ntoh32 (chunk[i].rc_position);
+ chunk[i].rc_target.rs_handle
+ = ntoh32 (chunk[i].rc_target.rs_handle);
+ chunk[i].rc_target.rs_length
+ = ntoh32 (chunk[i].rc_target.rs_length);
+ chunk[i].rc_target.rs_offset
+ = ntoh64 (chunk[i].rc_target.rs_offset);
+ }
+
+ *readch = &chunk[0];
+ ret = 0;
+ *ptr = (char *)&chunk[i].rc_discrim;
+out:
+ return ret;
+}
+
+
+inline int32_t
+gf_rdma_decode_error_msg (gf_rdma_peer_t *peer, gf_rdma_post_t *post,
+ size_t bytes_in_post)
+{
+ gf_rdma_header_t *header = NULL;
+ struct iobuf *iobuf = NULL;
+ struct iobref *iobref = NULL;
+ int32_t ret = -1;
+ struct rpc_msg rpc_msg = {0, };
+
+ header = (gf_rdma_header_t *)post->buf;
+ header->rm_body.rm_error.rm_type
+ = ntoh32 (header->rm_body.rm_error.rm_type);
+ if (header->rm_body.rm_error.rm_type == ERR_VERS) {
+ header->rm_body.rm_error.rm_version.gf_rdma_vers_low =
+ ntoh32 (header->rm_body.rm_error.rm_version.gf_rdma_vers_low);
+ header->rm_body.rm_error.rm_version.gf_rdma_vers_high =
+ ntoh32 (header->rm_body.rm_error.rm_version.gf_rdma_vers_high);
+ }
+
+ rpc_msg.rm_xid = header->rm_xid;
+ rpc_msg.rm_direction = REPLY;
+ rpc_msg.rm_reply.rp_stat = MSG_DENIED;
+
+ iobuf = iobuf_get2 (peer->trans->ctx->iobuf_pool, bytes_in_post);
+ if (iobuf == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ post->ctx.iobref = iobref = iobref_new ();
+ if (iobref == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ iobref_add (iobref, iobuf);
+ iobuf_unref (iobuf);
+
+ ret = rpc_reply_to_xdr (&rpc_msg, iobuf_ptr (iobuf),
+ iobuf_pagesize (iobuf), &post->ctx.vector[0]);
+ if (ret == -1) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "Failed to create RPC reply");
+ goto out;
+ }
+
+ post->ctx.count = 1;
+
+ iobuf = NULL;
+ iobref = NULL;
+
+out:
+ if (ret == -1) {
+ if (iobuf != NULL) {
+ iobuf_unref (iobuf);
+ }
+
+ if (iobref != NULL) {
+ iobref_unref (iobref);
+ }
+ }
+
+ return 0;
+}
+
+
+int32_t
+gf_rdma_decode_msg (gf_rdma_peer_t *peer, gf_rdma_post_t *post,
+ gf_rdma_read_chunk_t **readch, size_t bytes_in_post)
+{
+ int32_t ret = -1;
+ gf_rdma_header_t *header = NULL;
+ gf_rdma_reply_info_t *reply_info = NULL;
+ char *ptr = NULL;
+ gf_rdma_write_array_t *write_ary = NULL;
+ size_t header_len = 0;
+
+ header = (gf_rdma_header_t *)post->buf;
+
+ ptr = (char *)&header->rm_body.rm_chunks[0];
+
+ ret = gf_rdma_get_read_chunklist (&ptr, readch);
+ if (ret == -1) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "cannot get read chunklist from msg");
+ goto out;
+ }
+
+ /* skip terminator of read-chunklist */
+ ptr = ptr + sizeof (uint32_t);
+
+ ret = gf_rdma_get_write_chunklist (&ptr, &write_ary);
+ if (ret == -1) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "cannot get write chunklist from msg");
+ goto out;
+ }
+
+ /* skip terminator of write-chunklist */
+ ptr = ptr + sizeof (uint32_t);
+
+ if (write_ary != NULL) {
+ reply_info = gf_rdma_reply_info_alloc (peer);
+ if (reply_info == NULL) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "reply_info_alloc failed");
+ ret = -1;
+ goto out;
+ }
+
+ reply_info->type = gf_rdma_writech;
+ reply_info->wc_array = write_ary;
+ reply_info->rm_xid = header->rm_xid;
+ } else {
+ ret = gf_rdma_get_write_chunklist (&ptr, &write_ary);
+ if (ret == -1) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "cannot get reply chunklist from msg");
+ goto out;
+ }
+
+ if (write_ary != NULL) {
+ reply_info = gf_rdma_reply_info_alloc (peer);
+ if (reply_info == NULL) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "reply_info_alloc_failed");
+ ret = -1;
+ goto out;
+ }
+
+ reply_info->type = gf_rdma_replych;
+ reply_info->wc_array = write_ary;
+ reply_info->rm_xid = header->rm_xid;
+ }
+ }
+
+ /* skip terminator of reply chunk */
+ ptr = ptr + sizeof (uint32_t);
+ if (header->rm_type != GF_RDMA_NOMSG) {
+ header_len = (long)ptr - (long)post->buf;
+ post->ctx.vector[0].iov_len = (bytes_in_post - header_len);
+
+ post->ctx.hdr_iobuf = iobuf_get2 (peer->trans->ctx->iobuf_pool,
+ (bytes_in_post - header_len));
+ if (post->ctx.hdr_iobuf == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ post->ctx.vector[0].iov_base = iobuf_ptr (post->ctx.hdr_iobuf);
+ memcpy (post->ctx.vector[0].iov_base, ptr,
+ post->ctx.vector[0].iov_len);
+ post->ctx.count = 1;
+ }
+
+ post->ctx.reply_info = reply_info;
+out:
+ if (ret == -1) {
+ if (*readch != NULL) {
+ GF_FREE (*readch);
+ *readch = NULL;
+ }
+
+ GF_FREE (write_ary);
+ }
+
+ return ret;
+}
+
+
+/* Assumes only one of either write-chunklist or a reply chunk is present */
+int32_t
+gf_rdma_decode_header (gf_rdma_peer_t *peer, gf_rdma_post_t *post,
+ gf_rdma_read_chunk_t **readch, size_t bytes_in_post)
+{
+ int32_t ret = -1;
+ gf_rdma_header_t *header = NULL;
+
+ header = (gf_rdma_header_t *)post->buf;
+
+ header->rm_xid = ntoh32 (header->rm_xid);
+ header->rm_vers = ntoh32 (header->rm_vers);
+ header->rm_credit = ntoh32 (header->rm_credit);
+ header->rm_type = ntoh32 (header->rm_type);
+
+ switch (header->rm_type) {
+ case GF_RDMA_MSG:
+ case GF_RDMA_NOMSG:
+ ret = gf_rdma_decode_msg (peer, post, readch, bytes_in_post);
+ if (ret < 0) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "cannot decode msg of type (%d)",
+ header->rm_type);
+ }
+
+ break;
+
+ case GF_RDMA_MSGP:
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "rdma msg of msg-type GF_RDMA_MSGP should not have "
+ "been received");
+ ret = -1;
+ break;
+
+ case GF_RDMA_DONE:
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "rdma msg of msg-type GF_RDMA_DONE should not have "
+ "been received");
+ ret = -1;
+ break;
+
+ case GF_RDMA_ERROR:
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "received a msg of type RDMA_ERROR");
+ ret = gf_rdma_decode_error_msg (peer, post, bytes_in_post);
+ break;
+
+ default:
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "unknown rdma msg-type (%d)", header->rm_type);
+ }
+
+ return ret;
+}
+
+
+int32_t
+__gf_rdma_read (gf_rdma_peer_t *peer, gf_rdma_post_t *post, struct iovec *to,
+ gf_rdma_read_chunk_t *readch)
+{
+ int32_t ret = -1;
+ struct ibv_sge list = {0, };
+ struct ibv_send_wr wr = {0, }, *bad_wr = NULL;
+
+ ret = __gf_rdma_register_local_mr_for_rdma (peer, to, 1, &post->ctx);
+ if (ret == -1) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "registering local memory for rdma read failed");
+ goto out;
+ }
+
+ list.addr = (unsigned long) to->iov_base;
+ list.length = to->iov_len;
+ list.lkey = post->ctx.mr[post->ctx.mr_count - 1]->lkey;
+
+ wr.wr_id = (unsigned long) gf_rdma_post_ref (post);
+ wr.sg_list = &list;
+ wr.num_sge = 1;
+ wr.opcode = IBV_WR_RDMA_READ;
+ wr.send_flags = IBV_SEND_SIGNALED;
+ wr.wr.rdma.remote_addr = readch->rc_target.rs_offset;
+ wr.wr.rdma.rkey = readch->rc_target.rs_handle;
+
+ ret = ibv_post_send (peer->qp, &wr, &bad_wr);
+ if (ret) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "rdma read from client "
+ "(%s) failed with ret = %d (%s)",
+ peer->trans->peerinfo.identifier,
+ ret, (ret > 0) ? strerror (ret) : "");
+ ret = -1;
+ gf_rdma_post_unref (post);
+ }
+out:
+ return ret;
+}
+
+
+int32_t
+gf_rdma_do_reads (gf_rdma_peer_t *peer, gf_rdma_post_t *post,
+ gf_rdma_read_chunk_t *readch)
+{
+ int32_t ret = -1, i = 0, count = 0;
+ size_t size = 0;
+ char *ptr = NULL;
+ struct iobuf *iobuf = NULL;
+ gf_rdma_private_t *priv = NULL;
+
+ priv = peer->trans->private;
+
+ for (i = 0; readch[i].rc_discrim != 0; i++) {
+ size += readch[i].rc_target.rs_length;
+ }
+
+ if (i == 0) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "message type specified as rdma-read but there are no "
+ "rdma read-chunks present");
+ goto out;
+ }
+
+ post->ctx.gf_rdma_reads = i;
+
+ iobuf = iobuf_get2 (peer->trans->ctx->iobuf_pool, size);
+ if (iobuf == NULL) {
+ goto out;
+ }
+
+ if (post->ctx.iobref == NULL) {
+ post->ctx.iobref = iobref_new ();
+ if (post->ctx.iobref == NULL) {
+ iobuf_unref (iobuf);
+ goto out;
+ }
+ }
+
+ iobref_add (post->ctx.iobref, iobuf);
+ iobuf_unref (iobuf);
+
+ ptr = iobuf_ptr (iobuf);
+ iobuf = NULL;
+
+ pthread_mutex_lock (&priv->write_mutex);
+ {
+ if (!priv->connected) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "transport not connected to peer (%s), "
+ "not doing rdma reads",
+ peer->trans->peerinfo.identifier);
+ goto unlock;
+ }
+
+ for (i = 0; readch[i].rc_discrim != 0; i++) {
+ count = post->ctx.count++;
+ post->ctx.vector[count].iov_base = ptr;
+ post->ctx.vector[count].iov_len
+ = readch[i].rc_target.rs_length;
+
+ ret = __gf_rdma_read (peer, post,
+ &post->ctx.vector[count],
+ &readch[i]);
+ if (ret == -1) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "rdma read from peer (%s) failed",
+ peer->trans->peerinfo.identifier);
+ goto unlock;
+ }
+
+ ptr += readch[i].rc_target.rs_length;
+ }
+
+ ret = 0;
+ }
+unlock:
+ pthread_mutex_unlock (&priv->write_mutex);
+out:
+
+ if (ret == -1) {
+ if (iobuf != NULL) {
+ iobuf_unref (iobuf);
+ }
+ }
+
+ return ret;
+}
+
+
+int32_t
+gf_rdma_pollin_notify (gf_rdma_peer_t *peer, gf_rdma_post_t *post)
+{
+ int32_t ret = -1;
+ enum msg_type msg_type = 0;
+ struct rpc_req *rpc_req = NULL;
+ gf_rdma_request_context_t *request_context = NULL;
+ rpc_request_info_t request_info = {0, };
+ gf_rdma_private_t *priv = NULL;
+ uint32_t *ptr = NULL;
+ rpc_transport_pollin_t *pollin = NULL;
+
+ if ((peer == NULL) || (post == NULL)) {
+ goto out;
+ }
+
+ if (post->ctx.iobref == NULL) {
+ post->ctx.iobref = iobref_new ();
+ if (post->ctx.iobref == NULL) {
+ goto out;
+ }
+
+ /* handling the case where both hdr and payload of
+ * GF_FOP_READ_CBK were received in a single iobuf
+ * because of server sending entire msg as inline without
+ * doing rdma writes.
+ */
+ if (post->ctx.hdr_iobuf)
+ iobref_add (post->ctx.iobref, post->ctx.hdr_iobuf);
+ }
+
+ pollin = rpc_transport_pollin_alloc (peer->trans,
+ post->ctx.vector,
+ post->ctx.count,
+ post->ctx.hdr_iobuf,
+ post->ctx.iobref,
+ post->ctx.reply_info);
+ if (pollin == NULL) {
+ goto out;
+ }
+
+ ptr = (uint32_t *)pollin->vector[0].iov_base;
+
+ request_info.xid = ntoh32 (*ptr);
+ msg_type = ntoh32 (*(ptr + 1));
+
+ if (msg_type == REPLY) {
+ ret = rpc_transport_notify (peer->trans,
+ RPC_TRANSPORT_MAP_XID_REQUEST,
+ &request_info);
+ if (ret == -1) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG,
+ "cannot get request information from rpc "
+ "layer");
+ goto out;
+ }
+
+ rpc_req = request_info.rpc_req;
+ if (rpc_req == NULL) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG,
+ "rpc request structure not found");
+ ret = -1;
+ goto out;
+ }
+
+ request_context = rpc_req->conn_private;
+ rpc_req->conn_private = NULL;
+
+ priv = peer->trans->private;
+ if (request_context != NULL) {
+ pthread_mutex_lock (&priv->write_mutex);
+ {
+ __gf_rdma_request_context_destroy (request_context);
+ }
+ pthread_mutex_unlock (&priv->write_mutex);
+ } else {
+ gf_rdma_quota_put (peer);
+ }
+
+ pollin->is_reply = 1;
+ }
+
+ ret = rpc_transport_notify (peer->trans, RPC_TRANSPORT_MSG_RECEIVED,
+ pollin);
+ if (ret < 0) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "transport_notify failed");
+ }
+
+out:
+ if (pollin != NULL) {
+ pollin->private = NULL;
+ rpc_transport_pollin_destroy (pollin);
+ }
+
+ return ret;
+}
+
+
+int32_t
+gf_rdma_recv_reply (gf_rdma_peer_t *peer, gf_rdma_post_t *post)
+{
+ int32_t ret = -1;
+ gf_rdma_header_t *header = NULL;
+ gf_rdma_reply_info_t *reply_info = NULL;
+ gf_rdma_write_array_t *wc_array = NULL;
+ int i = 0;
+ uint32_t *ptr = NULL;
+ gf_rdma_request_context_t *ctx = NULL;
+ rpc_request_info_t request_info = {0, };
+ struct rpc_req *rpc_req = NULL;
+
+ header = (gf_rdma_header_t *)post->buf;
+ reply_info = post->ctx.reply_info;
+
+ /* no write chunklist, just notify upper layers */
+ if (reply_info == NULL) {
+ ret = 0;
+ goto out;
+ }
+
+ wc_array = reply_info->wc_array;
+
+ if (header->rm_type == GF_RDMA_NOMSG) {
+ post->ctx.vector[0].iov_base
+ = (void *)(long)wc_array->wc_array[0].wc_target.rs_offset;
+ post->ctx.vector[0].iov_len
+ = wc_array->wc_array[0].wc_target.rs_length;
+
+ post->ctx.count = 1;
+ } else {
+ for (i = 0; i < wc_array->wc_nchunks; i++) {
+ post->ctx.vector[i + 1].iov_base
+ = (void *)(long)wc_array->wc_array[i].wc_target.rs_offset;
+ post->ctx.vector[i + 1].iov_len
+ = wc_array->wc_array[i].wc_target.rs_length;
+ }
+
+ post->ctx.count += wc_array->wc_nchunks;
+ }
+
+ ptr = (uint32_t *)post->ctx.vector[0].iov_base;
+ request_info.xid = ntoh32 (*ptr);
+
+ ret = rpc_transport_notify (peer->trans,
+ RPC_TRANSPORT_MAP_XID_REQUEST,
+ &request_info);
+ if (ret == -1) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "cannot get request information (peer:%s) from rpc "
+ "layer", peer->trans->peerinfo.identifier);
+ goto out;
+ }
+
+ rpc_req = request_info.rpc_req;
+ if (rpc_req == NULL) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "rpc request structure not found");
+ ret = -1;
+ goto out;
+ }
+
+ ctx = rpc_req->conn_private;
+ if ((post->ctx.iobref == NULL) && ctx->rsp_iobref) {
+ post->ctx.iobref = iobref_ref (ctx->rsp_iobref);
+ }
+
+ ret = 0;
+
+ gf_rdma_reply_info_destroy (reply_info);
+
+out:
+ if (ret == 0) {
+ ret = gf_rdma_pollin_notify (peer, post);
+ if (ret < 0) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "pollin notify failed");
+ }
+ }
+
+ return ret;
+}
+
+
+inline int32_t
+gf_rdma_recv_request (gf_rdma_peer_t *peer, gf_rdma_post_t *post,
+ gf_rdma_read_chunk_t *readch)
+{
+ int32_t ret = -1;
+
+ if (readch != NULL) {
+ ret = gf_rdma_do_reads (peer, post, readch);
+ if (ret < 0) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "rdma read from peer (%s) failed",
+ peer->trans->peerinfo.identifier);
+ }
+ } else {
+ ret = gf_rdma_pollin_notify (peer, post);
+ if (ret == -1) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "pollin notification failed");
+ }
+ }
+
+ return ret;
+}
+
+void
+gf_rdma_process_recv (gf_rdma_peer_t *peer, struct ibv_wc *wc)
+{
+ gf_rdma_post_t *post = NULL;
+ gf_rdma_read_chunk_t *readch = NULL;
+ int ret = -1;
+ uint32_t *ptr = NULL;
+ enum msg_type msg_type = 0;
+ gf_rdma_header_t *header = NULL;
+ gf_rdma_private_t *priv = NULL;
+
+ post = (gf_rdma_post_t *) (long) wc->wr_id;
+ if (post == NULL) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "no post found in successful work completion element");
+ goto out;
+ }
+
+ ret = gf_rdma_decode_header (peer, post, &readch, wc->byte_len);
+ if (ret == -1) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "decoding of header failed");
+ goto out;
+ }
+
+ header = (gf_rdma_header_t *)post->buf;
+
+ priv = peer->trans->private;
+
+ pthread_mutex_lock (&priv->write_mutex);
+ {
+ if (!priv->peer.quota_set) {
+ priv->peer.quota_set = 1;
+
+ /* Initially peer.quota is set to 1 as per RFC 5666. We
+ * have to account for the quota used while sending
+ * first msg (which may or may not be returned to pool
+ * at this point) while deriving peer.quota from
+ * header->rm_credit. Hence the arithmatic below,
+ * instead of directly setting it to header->rm_credit.
+ */
+ priv->peer.quota = header->rm_credit
+ - ( 1 - priv->peer.quota);
+ }
+ }
+ pthread_mutex_unlock (&priv->write_mutex);
+
+ switch (header->rm_type) {
+ case GF_RDMA_MSG:
+ ptr = (uint32_t *)post->ctx.vector[0].iov_base;
+ msg_type = ntoh32 (*(ptr + 1));
+ break;
+
+ case GF_RDMA_NOMSG:
+ if (readch != NULL) {
+ msg_type = CALL;
+ } else {
+ msg_type = REPLY;
+ }
+ break;
+
+ case GF_RDMA_ERROR:
+ if (header->rm_body.rm_error.rm_type == ERR_CHUNK) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "peer (%s), couldn't encode or decode the msg "
+ "properly or write chunks were not provided "
+ "for replies that were bigger than "
+ "RDMA_INLINE_THRESHOLD (%d)",
+ peer->trans->peerinfo.identifier,
+ GLUSTERFS_RDMA_INLINE_THRESHOLD);
+ ret = gf_rdma_pollin_notify (peer, post);
+ if (ret == -1) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG,
+ "pollin notification failed");
+ }
+ goto out;
+ } else {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
+ "an error has happened while transmission of "
+ "msg, disconnecting the transport");
+ ret = -1;
+ goto out;
+ }
+
+ default:
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "invalid rdma msg-type (%d)", header->rm_type);
+ goto out;
+ }
+
+ if (msg_type == CALL) {
+ ret = gf_rdma_recv_request (peer, post, readch);
+ if (ret < 0) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "receiving a request from peer (%s) failed",
+ peer->trans->peerinfo.identifier);
+ }
+ } else {
+ ret = gf_rdma_recv_reply (peer, post);
+ if (ret < 0) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "receiving a reply from peer (%s) failed",
+ peer->trans->peerinfo.identifier);
+ }
+ }
+
+out:
+ if (ret == -1) {
+ rpc_transport_disconnect (peer->trans);
+ }
+
+ return;
+}
+
+void *
+gf_rdma_async_event_thread (void *context)
+{
+ struct ibv_async_event event;
+ int ret;
+
+ while (1) {
+ do {
+ ret = ibv_get_async_event((struct ibv_context *)context,
+ &event);
+
+ if (ret && errno != EINTR) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "Error getting event (%s)",
+ strerror (errno));
+ }
+ } while(ret && errno == EINTR);
+
+ switch (event.event_type) {
+ case IBV_EVENT_SRQ_LIMIT_REACHED:
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "recieved srq_limit reached");
+ break;
+
+ default:
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG,
+ "event (%d) recieved", event.event_type);
+ break;
+ }
+
+ ibv_ack_async_event(&event);
+ }
+
+ return 0;
+}
+
+
+static void *
+gf_rdma_recv_completion_proc (void *data)
+{
+ struct ibv_comp_channel *chan = NULL;
+ gf_rdma_device_t *device = NULL;;
+ gf_rdma_post_t *post = NULL;
+ gf_rdma_peer_t *peer = NULL;
+ struct ibv_cq *event_cq = NULL;
+ struct ibv_wc wc = {0, };
+ void *event_ctx = NULL;
+ int32_t ret = 0;
+
+ chan = data;
+
+ while (1) {
+ ret = ibv_get_cq_event (chan, &event_cq, &event_ctx);
+ if (ret) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
+ "ibv_get_cq_event failed, terminating recv "
+ "thread %d (%d)", ret, errno);
+ continue;
+ }
+
+ device = event_ctx;
+
+ ret = ibv_req_notify_cq (event_cq, 0);
+ if (ret) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
+ "ibv_req_notify_cq on %s failed, terminating "
+ "recv thread: %d (%d)",
+ device->device_name, ret, errno);
+ continue;
+ }
+
+ device = (gf_rdma_device_t *) event_ctx;
+
+ while ((ret = ibv_poll_cq (event_cq, 1, &wc)) > 0) {
+ post = (gf_rdma_post_t *) (long) wc.wr_id;
+
+ pthread_mutex_lock (&device->qpreg.lock);
+ {
+ peer = __gf_rdma_lookup_peer (device,
+ wc.qp_num);
+
+ /*
+ * keep a refcount on transport so that it
+ * does not get freed because of some error
+ * indicated by wc.status till we are done
+ * with usage of peer and thereby that of trans.
+ */
+ if (peer != NULL) {
+ rpc_transport_ref (peer->trans);
+ }
+ }
+ pthread_mutex_unlock (&device->qpreg.lock);
+
+ if (wc.status != IBV_WC_SUCCESS) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
+ "recv work request on `%s' returned "
+ "error (%d)", device->device_name,
+ wc.status);
+ if (peer) {
+ rpc_transport_unref (peer->trans);
+ rpc_transport_disconnect (peer->trans);
+ }
+
+ if (post) {
+ gf_rdma_post_unref (post);
+ }
+ continue;
+ }
+
+ if (peer) {
+ gf_rdma_process_recv (peer, &wc);
+ rpc_transport_unref (peer->trans);
+ } else {
+ gf_log (GF_RDMA_LOG_NAME,
+ GF_LOG_DEBUG,
+ "could not lookup peer for qp_num: %d",
+ wc.qp_num);
+ }
+
+ gf_rdma_post_unref (post);
+ }
+
+ if (ret < 0) {
+ gf_log (GF_RDMA_LOG_NAME,
+ GF_LOG_ERROR,
+ "ibv_poll_cq on `%s' returned error "
+ "(ret = %d, errno = %d)",
+ device->device_name, ret, errno);
+ continue;
+ }
+ ibv_ack_cq_events (event_cq, 1);
+ }
+
+ return NULL;
+}
+
+
+void
+gf_rdma_handle_failed_send_completion (gf_rdma_peer_t *peer, struct ibv_wc *wc)
+{
+ gf_rdma_post_t *post = NULL;
+ gf_rdma_device_t *device = NULL;
+ gf_rdma_private_t *priv = NULL;
+
+ if (peer != NULL) {
+ priv = peer->trans->private;
+ if (priv != NULL) {
+ device = priv->device;
+ }
+ }
+
+
+ post = (gf_rdma_post_t *) (long) wc->wr_id;
+
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "send work request on `%s' returned error "
+ "wc.status = %d, wc.vendor_err = %d, post->buf = %p, "
+ "wc.byte_len = %d, post->reused = %d",
+ (device != NULL) ? device->device_name : NULL, wc->status,
+ wc->vendor_err, post->buf, wc->byte_len, post->reused);
+
+ if (wc->status == IBV_WC_RETRY_EXC_ERR) {
+ gf_log ("rdma", GF_LOG_ERROR, "connection between client and"
+ " server not working. check by running "
+ "'ibv_srq_pingpong'. also make sure subnet manager"
+ " is running (eg: 'opensm'), or check if rdma port is "
+ "valid (or active) by running 'ibv_devinfo'. contact "
+ "Gluster Support Team if the problem persists.");
+ }
+
+ if (peer) {
+ rpc_transport_disconnect (peer->trans);
+ }
+
+ return;
+}
+
+
+void
+gf_rdma_handle_successful_send_completion (gf_rdma_peer_t *peer,
+ struct ibv_wc *wc)
+{
+ gf_rdma_post_t *post = NULL;
+ int reads = 0, ret = 0;
+ gf_rdma_header_t *header = NULL;
+
+ if (wc->opcode != IBV_WC_RDMA_READ) {
+ goto out;
+ }
+
+ post = (gf_rdma_post_t *)(long) wc->wr_id;
+
+ pthread_mutex_lock (&post->lock);
+ {
+ reads = --post->ctx.gf_rdma_reads;
+ }
+ pthread_mutex_unlock (&post->lock);
+
+ if (reads != 0) {
+ /* if it is not the last rdma read, we've got nothing to do */
+ goto out;
+ }
+
+ header = (gf_rdma_header_t *)post->buf;
+
+ if (header->rm_type == GF_RDMA_NOMSG) {
+ post->ctx.count = 1;
+ post->ctx.vector[0].iov_len += post->ctx.vector[1].iov_len;
+ }
+
+ ret = gf_rdma_pollin_notify (peer, post);
+ if ((ret == -1) && (peer != NULL)) {
+ rpc_transport_disconnect (peer->trans);
+ }
+
+out:
+ return;
+}
+
+
+static void *
+gf_rdma_send_completion_proc (void *data)
+{
+ struct ibv_comp_channel *chan = NULL;
+ gf_rdma_post_t *post = NULL;
+ gf_rdma_peer_t *peer = NULL;
+ struct ibv_cq *event_cq = NULL;
+ void *event_ctx = NULL;
+ gf_rdma_device_t *device = NULL;
+ struct ibv_wc wc = {0, };
+ char is_request = 0;
+ int32_t ret = 0, quota_ret = 0;
+
+ chan = data;
+ while (1) {
+ ret = ibv_get_cq_event (chan, &event_cq, &event_ctx);
+ if (ret) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
+ "ibv_get_cq_event on failed, terminating "
+ "send thread: %d (%d)", ret, errno);
+ continue;
+ }
+
+ device = event_ctx;
+
+ ret = ibv_req_notify_cq (event_cq, 0);
+ if (ret) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
+ "ibv_req_notify_cq on %s failed, terminating "
+ "send thread: %d (%d)",
+ device->device_name, ret, errno);
+ continue;
+ }
+
+ while ((ret = ibv_poll_cq (event_cq, 1, &wc)) > 0) {
+ post = (gf_rdma_post_t *) (long) wc.wr_id;
+
+ pthread_mutex_lock (&device->qpreg.lock);
+ {
+ peer = __gf_rdma_lookup_peer (device, wc.qp_num);
+
+ /*
+ * keep a refcount on transport so that it
+ * does not get freed because of some error
+ * indicated by wc.status, till we are done
+ * with usage of peer and thereby that of trans.
+ */
+ if (peer != NULL) {
+ rpc_transport_ref (peer->trans);
+ }
+ }
+ pthread_mutex_unlock (&device->qpreg.lock);
+
+ if (wc.status != IBV_WC_SUCCESS) {
+ gf_rdma_handle_failed_send_completion (peer, &wc);
+ } else {
+ gf_rdma_handle_successful_send_completion (peer,
+ &wc);
+ }
+
+ if (post) {
+ is_request = post->ctx.is_request;
+
+ ret = gf_rdma_post_unref (post);
+ if ((ret == 0)
+ && (wc.status == IBV_WC_SUCCESS)
+ && !is_request
+ && (post->type == GF_RDMA_SEND_POST)
+ && (peer != NULL)) {
+ /* An GF_RDMA_RECV_POST can end up in
+ * gf_rdma_send_completion_proc for
+ * rdma-reads, and we do not take
+ * quota for getting an GF_RDMA_RECV_POST.
+ */
+
+ /*
+ * if it is request, quota is returned
+ * after reply has come.
+ */
+ quota_ret = gf_rdma_quota_put (peer);
+ if (quota_ret < 0) {
+ gf_log ("rdma", GF_LOG_DEBUG,
+ "failed to send "
+ "message");
+ }
+ }
+ }
+
+ if (peer) {
+ rpc_transport_unref (peer->trans);
+ } else {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG,
+ "could not lookup peer for qp_num: %d",
+ wc.qp_num);
+ }
+ }
+
+ if (ret < 0) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
+ "ibv_poll_cq on `%s' returned error (ret = %d,"
+ " errno = %d)",
+ device->device_name, ret, errno);
+ continue;
+ }
+
+ ibv_ack_cq_events (event_cq, 1);
+ }
+
+ return NULL;
+}
+
+
+static void
+gf_rdma_options_init (rpc_transport_t *this)
+{
+ gf_rdma_private_t *priv = NULL;
+ gf_rdma_options_t *options = NULL;
+ int32_t mtu = 0;
+ data_t *temp = NULL;
+
+ /* TODO: validate arguments from options below */
+
+ priv = this->private;
+ options = &priv->options;
+ options->send_size = GLUSTERFS_RDMA_INLINE_THRESHOLD;/*this->ctx->page_size * 4; 512 KB*/
+ options->recv_size = GLUSTERFS_RDMA_INLINE_THRESHOLD;/*this->ctx->page_size * 4; 512 KB*/
+ options->send_count = 4096;
+ options->recv_count = 4096;
+ options->attr_timeout = GF_RDMA_TIMEOUT;
+ options->attr_retry_cnt = GF_RDMA_RETRY_CNT;
+ options->attr_rnr_retry = GF_RDMA_RNR_RETRY;
+
+ temp = dict_get (this->options,
+ "transport.rdma.work-request-send-count");
+ if (temp)
+ options->send_count = data_to_int32 (temp);
+
+ temp = dict_get (this->options,
+ "transport.rdma.work-request-recv-count");
+ if (temp)
+ options->recv_count = data_to_int32 (temp);
+
+ temp = dict_get (this->options, "transport.rdma.attr-timeout");
+
+ if (temp)
+ options->attr_timeout = data_to_uint8 (temp);
+
+ temp = dict_get (this->options, "transport.rdma.attr-retry-cnt");
+
+ if (temp)
+ options->attr_retry_cnt = data_to_uint8 (temp);
+
+ temp = dict_get (this->options, "transport.rdma.attr-rnr-retry");
+
+ if (temp)
+ options->attr_rnr_retry = data_to_uint8 (temp);
+
+ options->port = 1;
+ temp = dict_get (this->options,
+ "transport.rdma.port");
+ if (temp)
+ options->port = data_to_uint64 (temp);
+
+ options->mtu = mtu = IBV_MTU_2048;
+ temp = dict_get (this->options,
+ "transport.rdma.mtu");
+ if (temp)
+ mtu = data_to_int32 (temp);
+ switch (mtu) {
+ case 256: options->mtu = IBV_MTU_256;
+ break;
+ case 512: options->mtu = IBV_MTU_512;
+ break;
+ case 1024: options->mtu = IBV_MTU_1024;
+ break;
+ case 2048: options->mtu = IBV_MTU_2048;
+ break;
+ case 4096: options->mtu = IBV_MTU_4096;
+ break;
+ default:
+ if (temp)
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "%s: unrecognized MTU value '%s', defaulting "
+ "to '2048'", this->name,
+ data_to_str (temp));
+ else
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_TRACE,
+ "%s: defaulting MTU to '2048'",
+ this->name);
+ options->mtu = IBV_MTU_2048;
+ break;
+ }
+
+ temp = dict_get (this->options,
+ "transport.rdma.device-name");
+ if (temp)
+ options->device_name = gf_strdup (temp->data);
+
+ return;
+}
+
+
+gf_rdma_ctx_t *
+__gf_rdma_ctx_create (void)
+{
+ gf_rdma_ctx_t *rdma_ctx = NULL;
+ int ret = -1;
+
+ rdma_ctx = GF_CALLOC (1, sizeof (*rdma_ctx), gf_common_mt_char);
+ if (rdma_ctx == NULL) {
+ goto out;
+ }
+
+ rdma_ctx->rdma_cm_event_channel = rdma_create_event_channel ();
+ if (rdma_ctx->rdma_cm_event_channel == NULL) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "rdma_cm event channel creation failed (%s)",
+ strerror (errno));
+ goto out;
+ }
+
+ ret = gf_thread_create (&rdma_ctx->rdma_cm_thread, NULL,
+ gf_rdma_cm_event_handler,
+ rdma_ctx->rdma_cm_event_channel);
+ if (ret != 0) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "creation of thread to handle rdma-cm events "
+ "failed (%s)", strerror (ret));
+ goto out;
+ }
+
+out:
+ if (ret < 0) {
+ if (rdma_ctx->rdma_cm_event_channel != NULL) {
+ rdma_destroy_event_channel (rdma_ctx->rdma_cm_event_channel);
+ }
+
+ GF_FREE (rdma_ctx);
+ rdma_ctx = NULL;
+ }
+
+ return rdma_ctx;
+}
+
+static int32_t
+gf_rdma_init (rpc_transport_t *this)
+{
+ gf_rdma_private_t *priv = NULL;
+ int32_t ret = 0;
+ glusterfs_ctx_t *ctx = NULL;
+ gf_rdma_options_t *options = NULL;
+
+ ctx= this->ctx;
+
+ priv = this->private;
+
+ ibv_fork_init ();
+ gf_rdma_options_init (this);
+
+ options = &priv->options;
+ priv->peer.send_count = options->send_count;
+ priv->peer.recv_count = options->recv_count;
+ priv->peer.send_size = options->send_size;
+ priv->peer.recv_size = options->recv_size;
+
+ priv->peer.trans = this;
+ INIT_LIST_HEAD (&priv->peer.ioq);
+
+ pthread_mutex_init (&priv->write_mutex, NULL);
+ pthread_mutex_init (&priv->recv_mutex, NULL);
+ pthread_cond_init (&priv->recv_cond, NULL);
+
+ pthread_mutex_lock (&ctx->lock);
+ {
+ if (ctx->ib == NULL) {
+ ctx->ib = __gf_rdma_ctx_create ();
+ if (ctx->ib == NULL) {
+ ret = -1;
+ }
+ }
+ }
+ pthread_mutex_unlock (&ctx->lock);
+
+ return ret;
+}
+
+
+static int32_t
+gf_rdma_disconnect (rpc_transport_t *this)
+{
+ gf_rdma_private_t *priv = NULL;
+ int32_t ret = 0;
+
+ priv = this->private;
+ gf_log_callingfn (this->name, GF_LOG_WARNING,
+ "disconnect called (peer:%s)",
+ this->peerinfo.identifier);
+
+ pthread_mutex_lock (&priv->write_mutex);
+ {
+ ret = __gf_rdma_disconnect (this);
+ }
+ pthread_mutex_unlock (&priv->write_mutex);
+
+ return ret;
+}
+
+
+static int32_t
+gf_rdma_connect (struct rpc_transport *this, int port)
+{
+ gf_rdma_private_t *priv = NULL;
+ int32_t ret = 0;
+ union gf_sock_union sock_union = {{0, }, };
+ socklen_t sockaddr_len = 0;
+ gf_rdma_peer_t *peer = NULL;
+ gf_rdma_ctx_t *rdma_ctx = NULL;
+ gf_boolean_t connected = _gf_false;
+
+ priv = this->private;
+
+ peer = &priv->peer;
+
+ rpc_transport_ref (this);
+
+ ret = gf_rdma_client_get_remote_sockaddr (this,
+ &sock_union.sa,
+ &sockaddr_len, port);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "cannot get remote address to connect");
+ goto out;
+ }
+
+ rdma_ctx = this->ctx->ib;
+
+ pthread_mutex_lock (&priv->write_mutex);
+ {
+ if (peer->cm_id != NULL) {
+ ret = -1;
+ errno = EINPROGRESS;
+ connected = _gf_true;
+ goto unlock;
+ }
+
+ priv->entity = GF_RDMA_CLIENT;
+
+ ret = rdma_create_id (rdma_ctx->rdma_cm_event_channel,
+ &peer->cm_id, this, RDMA_PS_TCP);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "creation of rdma_cm_id failed (%s)",
+ strerror (errno));
+ ret = -errno;
+ goto unlock;
+ }
+
+ memcpy (&this->peerinfo.sockaddr, &sock_union.storage,
+ sockaddr_len);
+ this->peerinfo.sockaddr_len = sockaddr_len;
+
+ if (port > 0)
+ sock_union.sin.sin_port = htons (port);
+
+ ((struct sockaddr *) &this->myinfo.sockaddr)->sa_family =
+ ((struct sockaddr *)&this->peerinfo.sockaddr)->sa_family;
+
+ ret = gf_rdma_client_bind (this,
+ (struct sockaddr *)&this->myinfo.sockaddr,
+ &this->myinfo.sockaddr_len,
+ peer->cm_id);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "client bind failed: %s", strerror (errno));
+ goto unlock;
+ }
+
+ ret = rdma_resolve_addr (peer->cm_id, NULL, &sock_union.sa,
+ 2000);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "rdma_resolve_addr failed (%s)",
+ strerror (errno));
+ goto unlock;
+ }
+
+ priv->connected = 0;
+ }
+unlock:
+ pthread_mutex_unlock (&priv->write_mutex);
+
+out:
+ if (ret != 0) {
+ if (!connected) {
+ gf_rdma_teardown (this);
+ }
+
+ rpc_transport_unref (this);
+ }
+
+ return ret;
+}
+
+
+static int32_t
+gf_rdma_listen (rpc_transport_t *this)
+{
+ union gf_sock_union sock_union = {{0, }, };
+ socklen_t sockaddr_len = 0;
+ gf_rdma_private_t *priv = NULL;
+ gf_rdma_peer_t *peer = NULL;
+ int ret = 0;
+ gf_rdma_ctx_t *rdma_ctx = NULL;
+ char service[NI_MAXSERV], host[NI_MAXHOST];
+
+ priv = this->private;
+ peer = &priv->peer;
+
+ priv->entity = GF_RDMA_SERVER_LISTENER;
+
+ rdma_ctx = this->ctx->ib;
+
+ ret = gf_rdma_server_get_local_sockaddr (this, &sock_union.sa,
+ &sockaddr_len);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "cannot find network address of server to bind to");
+ goto err;
+ }
+
+ ret = rdma_create_id (rdma_ctx->rdma_cm_event_channel,
+ &peer->cm_id, this, RDMA_PS_TCP);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "creation of rdma_cm_id failed (%s)",
+ strerror (errno));
+ goto err;
+ }
+
+ memcpy (&this->myinfo.sockaddr, &sock_union.storage,
+ sockaddr_len);
+ this->myinfo.sockaddr_len = sockaddr_len;
+
+ ret = getnameinfo ((struct sockaddr *)&this->myinfo.sockaddr,
+ this->myinfo.sockaddr_len, host, sizeof (host),
+ service, sizeof (service),
+ NI_NUMERICHOST);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "getnameinfo failed (%s)", gai_strerror (ret));
+ goto err;
+ }
+
+ sprintf (this->myinfo.identifier, "%s:%s", host, service);
+
+ ret = rdma_bind_addr (peer->cm_id, &sock_union.sa);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "rdma_bind_addr failed (%s)", strerror (errno));
+ goto err;
+ }
+
+ ret = rdma_listen (peer->cm_id, 10);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "rdma_listen failed (%s)", strerror (errno));
+ goto err;
+ }
+
+ rpc_transport_ref (this);
+
+ ret = 0;
+err:
+ if (ret < 0) {
+ if (peer->cm_id != NULL) {
+ rdma_destroy_id (peer->cm_id);
+ peer->cm_id = NULL;
+ }
+ }
+
+ return ret;
+}
+
+
+struct rpc_transport_ops tops = {
+ .submit_request = gf_rdma_submit_request,
+ .submit_reply = gf_rdma_submit_reply,
+ .connect = gf_rdma_connect,
+ .disconnect = gf_rdma_disconnect,
+ .listen = gf_rdma_listen,
+};
+
+int32_t
+init (rpc_transport_t *this)
+{
+ gf_rdma_private_t *priv = NULL;
+
+ priv = GF_CALLOC (1, sizeof (*priv), gf_common_mt_rdma_private_t);
+ if (!priv)
+ return -1;
+
+ this->private = priv;
+
+ if (gf_rdma_init (this)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to initialize IB Device");
+ return -1;
+ }
+
+ return 0;
+}
+
+void
+fini (struct rpc_transport *this)
+{
+ /* TODO: verify this function does graceful finish */
+ gf_rdma_private_t *priv = NULL;
+
+ priv = this->private;
+
+ this->private = NULL;
+
+ if (priv) {
+ pthread_mutex_destroy (&priv->recv_mutex);
+ pthread_mutex_destroy (&priv->write_mutex);
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "called fini on transport: %p", this);
+ GF_FREE (priv);
+ }
+ return;
+}
+
+/* TODO: expand each option */
+struct volume_options options[] = {
+ { .key = {"transport.rdma.port",
+ "rdma-port"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 4,
+ .description = "check the option by 'ibv_devinfo'"
+ },
+ { .key = {"transport.rdma.mtu",
+ "rdma-mtu"},
+ .type = GF_OPTION_TYPE_INT,
+ },
+ { .key = {"transport.rdma.device-name",
+ "rdma-device-name"},
+ .type = GF_OPTION_TYPE_ANY,
+ .description = "check by 'ibv_devinfo'"
+ },
+ { .key = {"transport.rdma.work-request-send-count",
+ "rdma-work-request-send-count"},
+ .type = GF_OPTION_TYPE_INT,
+ },
+ { .key = {"transport.rdma.work-request-recv-count",
+ "rdma-work-request-recv-count"},
+ .type = GF_OPTION_TYPE_INT,
+ },
+ { .key = {"remote-port",
+ "transport.remote-port",
+ "transport.rdma.remote-port"},
+ .type = GF_OPTION_TYPE_INT
+ },
+ { .key = {"transport.rdma.attr-timeout",
+ "rdma-attr-timeout"},
+ .type = GF_OPTION_TYPE_INT
+ },
+ { .key = {"transport.rdma.attr-retry-cnt",
+ "rdma-attr-retry-cnt"},
+ .type = GF_OPTION_TYPE_INT
+ },
+ { .key = {"transport.rdma.attr-rnr-retry",
+ "rdma-attr-rnr-retry"},
+ .type = GF_OPTION_TYPE_INT
+ },
+ { .key = {"transport.rdma.listen-port", "listen-port"},
+ .type = GF_OPTION_TYPE_INT
+ },
+ { .key = {"transport.rdma.connect-path", "connect-path"},
+ .type = GF_OPTION_TYPE_ANY
+ },
+ { .key = {"transport.rdma.bind-path", "bind-path"},
+ .type = GF_OPTION_TYPE_ANY
+ },
+ { .key = {"transport.rdma.listen-path", "listen-path"},
+ .type = GF_OPTION_TYPE_ANY
+ },
+ { .key = {"transport.address-family",
+ "address-family"},
+ .value = {"inet", "inet6", "inet/inet6", "inet6/inet",
+ "unix", "inet-sdp" },
+ .type = GF_OPTION_TYPE_STR
+ },
+ { .key = {"transport.socket.lowlat"},
+ .type = GF_OPTION_TYPE_BOOL
+ },
+ { .key = {NULL} }
+};
diff --git a/rpc/rpc-transport/rdma/src/rdma.h b/rpc/rpc-transport/rdma/src/rdma.h
new file mode 100644
index 000000000..7f76244f0
--- /dev/null
+++ b/rpc/rpc-transport/rdma/src/rdma.h
@@ -0,0 +1,382 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _XPORT_RDMA_H
+#define _XPORT_RDMA_H
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#ifndef MAX_IOVEC
+#define MAX_IOVEC 16
+#endif /* MAX_IOVEC */
+
+#include "rpc-clnt.h"
+#include "rpc-transport.h"
+#include "xlator.h"
+#include "event.h"
+#include <stdio.h>
+#include <list.h>
+#include <arpa/inet.h>
+#include <infiniband/verbs.h>
+#include <rdma/rdma_cma.h>
+
+/* FIXME: give appropriate values to these macros */
+#define GF_DEFAULT_RDMA_LISTEN_PORT (GF_DEFAULT_BASE_PORT + 1)
+
+/* If you are changing GF_RDMA_MAX_SEGMENTS, please make sure to update
+ * GLUSTERFS_GF_RDMA_MAX_HEADER_SIZE defined in glusterfs.h .
+ */
+#define GF_RDMA_MAX_SEGMENTS 8
+
+#define GF_RDMA_VERSION 1
+#define GF_RDMA_POOL_SIZE 512
+
+/* Additional attributes */
+#define GF_RDMA_TIMEOUT 14
+#define GF_RDMA_RETRY_CNT 7
+#define GF_RDMA_RNR_RETRY 7
+
+typedef enum gf_rdma_errcode {
+ ERR_VERS = 1,
+ ERR_CHUNK = 2
+}gf_rdma_errcode_t;
+
+struct gf_rdma_err_vers {
+ uint32_t gf_rdma_vers_low; /* Version range supported by peer */
+ uint32_t gf_rdma_vers_high;
+}__attribute__ ((packed));
+typedef struct gf_rdma_err_vers gf_rdma_err_vers_t;
+
+typedef enum gf_rdma_proc {
+ GF_RDMA_MSG = 0, /* An RPC call or reply msg */
+ GF_RDMA_NOMSG = 1, /* An RPC call or reply msg - separate body */
+ GF_RDMA_MSGP = 2, /* An RPC call or reply msg with padding */
+ GF_RDMA_DONE = 3, /* Client signals reply completion */
+ GF_RDMA_ERROR = 4 /* An RPC RDMA encoding error */
+}gf_rdma_proc_t;
+
+typedef enum gf_rdma_chunktype {
+ gf_rdma_noch = 0, /* no chunk */
+ gf_rdma_readch, /* some argument through rdma read */
+ gf_rdma_areadch, /* entire request through rdma read */
+ gf_rdma_writech, /* some result through rdma write */
+ gf_rdma_replych /* entire reply through rdma write */
+}gf_rdma_chunktype_t;
+
+/* If you are modifying __gf_rdma_header, please make sure to change
+ * GLUSTERFS_GF_RDMA_MAX_HEADER_SIZE defined in glusterfs.h to reflect your changes
+ */
+struct __gf_rdma_header {
+ uint32_t rm_xid; /* Mirrors the RPC header xid */
+ uint32_t rm_vers; /* Version of this protocol */
+ uint32_t rm_credit; /* Buffers requested/granted */
+ uint32_t rm_type; /* Type of message (enum gf_rdma_proc) */
+ union {
+ struct { /* no chunks */
+ uint32_t rm_empty[3]; /* 3 empty chunk lists */
+ }__attribute__((packed)) rm_nochunks;
+
+ struct { /* no chunks and padded */
+ uint32_t rm_align; /* Padding alignment */
+ uint32_t rm_thresh; /* Padding threshold */
+ uint32_t rm_pempty[3]; /* 3 empty chunk lists */
+ }__attribute__((packed)) rm_padded;
+
+ struct {
+ uint32_t rm_type;
+ gf_rdma_err_vers_t rm_version;
+ }__attribute__ ((packed)) rm_error;
+
+ uint32_t rm_chunks[0]; /* read, write and reply chunks */
+ }__attribute__ ((packed)) rm_body;
+} __attribute__((packed));
+typedef struct __gf_rdma_header gf_rdma_header_t;
+
+/* If you are modifying __gf_rdma_segment or __gf_rdma_read_chunk, please make sure
+ * to change GLUSTERFS_GF_RDMA_MAX_HEADER_SIZE defined in glusterfs.h to reflect
+ * your changes.
+ */
+struct __gf_rdma_segment {
+ uint32_t rs_handle; /* Registered memory handle */
+ uint32_t rs_length; /* Length of the chunk in bytes */
+ uint64_t rs_offset; /* Chunk virtual address or offset */
+} __attribute__((packed));
+typedef struct __gf_rdma_segment gf_rdma_segment_t;
+
+/* read chunk(s), encoded as a linked list. */
+struct __gf_rdma_read_chunk {
+ uint32_t rc_discrim; /* 1 indicates presence */
+ uint32_t rc_position; /* Position in XDR stream */
+ gf_rdma_segment_t rc_target;
+} __attribute__((packed));
+typedef struct __gf_rdma_read_chunk gf_rdma_read_chunk_t;
+
+/* write chunk, and reply chunk. */
+struct __gf_rdma_write_chunk {
+ gf_rdma_segment_t wc_target;
+} __attribute__((packed));
+typedef struct __gf_rdma_write_chunk gf_rdma_write_chunk_t;
+
+/* write chunk(s), encoded as a counted array. */
+struct __gf_rdma_write_array {
+ uint32_t wc_discrim; /* 1 indicates presence */
+ uint32_t wc_nchunks; /* Array count */
+ struct __gf_rdma_write_chunk wc_array[0];
+} __attribute__((packed));
+typedef struct __gf_rdma_write_array gf_rdma_write_array_t;
+
+/* options per transport end point */
+struct __gf_rdma_options {
+ int32_t port;
+ char *device_name;
+ enum ibv_mtu mtu;
+ int32_t send_count;
+ int32_t recv_count;
+ uint64_t recv_size;
+ uint64_t send_size;
+ uint8_t attr_timeout;
+ uint8_t attr_retry_cnt;
+ uint8_t attr_rnr_retry;
+};
+typedef struct __gf_rdma_options gf_rdma_options_t;
+
+struct __gf_rdma_reply_info {
+ uint32_t rm_xid; /* xid in network endian */
+ gf_rdma_chunktype_t type; /*
+ * can be either gf_rdma_replych
+ * or gf_rdma_writech.
+ */
+ gf_rdma_write_array_t *wc_array;
+ struct mem_pool *pool;
+};
+typedef struct __gf_rdma_reply_info gf_rdma_reply_info_t;
+
+struct __gf_rdma_ioq {
+ union {
+ struct list_head list;
+ struct {
+ struct __gf_rdma_ioq *next;
+ struct __gf_rdma_ioq *prev;
+ };
+ };
+
+ char is_request;
+ struct iovec rpchdr[MAX_IOVEC];
+ int rpchdr_count;
+ struct iovec proghdr[MAX_IOVEC];
+ int proghdr_count;
+ struct iovec prog_payload[MAX_IOVEC];
+ int prog_payload_count;
+
+ struct iobref *iobref;
+
+ union {
+ struct __gf_rdma_ioq_request {
+ /* used to build reply_chunk for GF_RDMA_NOMSG type msgs */
+ struct iovec rsphdr_vec[MAX_IOVEC];
+ int rsphdr_count;
+
+ /*
+ * used to build write_array during operations like
+ * read.
+ */
+ struct iovec rsp_payload[MAX_IOVEC];
+ int rsp_payload_count;
+
+ struct rpc_req *rpc_req; /* FIXME: hack! hack! should be
+ * cleaned up later
+ */
+ struct iobref *rsp_iobref;
+ }request;
+
+ gf_rdma_reply_info_t *reply_info;
+ }msg;
+
+ struct mem_pool *pool;
+};
+typedef struct __gf_rdma_ioq gf_rdma_ioq_t;
+
+typedef enum __gf_rdma_send_post_type {
+ GF_RDMA_SEND_POST_NO_CHUNKLIST, /* post which is sent using rdma-send
+ * and the msg carries no
+ * chunklists.
+ */
+ GF_RDMA_SEND_POST_READ_CHUNKLIST, /* post which is sent using rdma-send
+ * and the msg carries only read
+ * chunklist.
+ */
+ GF_RDMA_SEND_POST_WRITE_CHUNKLIST, /* post which is sent using
+ * rdma-send and the msg carries
+ * only write chunklist.
+ */
+ GF_RDMA_SEND_POST_READ_WRITE_CHUNKLIST, /* post which is sent using
+ * rdma-send and the msg
+ * carries both read and
+ * write chunklists.
+ */
+ GF_RDMA_SEND_POST_GF_RDMA_READ, /* RDMA read */
+ GF_RDMA_SEND_POST_GF_RDMA_WRITE, /* RDMA write */
+}gf_rdma_send_post_type_t;
+
+/* represents one communication peer, two per transport_t */
+struct __gf_rdma_peer {
+ rpc_transport_t *trans;
+ struct rdma_cm_id *cm_id;
+ struct ibv_qp *qp;
+ pthread_t rdma_event_thread;
+ char quota_set;
+
+ int32_t recv_count;
+ int32_t send_count;
+ int32_t recv_size;
+ int32_t send_size;
+
+ int32_t quota;
+ union {
+ struct list_head ioq;
+ struct {
+ gf_rdma_ioq_t *ioq_next;
+ gf_rdma_ioq_t *ioq_prev;
+ };
+ };
+
+ /* QP attributes, needed to connect with remote QP */
+ int32_t local_lid;
+ int32_t local_psn;
+ int32_t local_qpn;
+ int32_t remote_lid;
+ int32_t remote_psn;
+ int32_t remote_qpn;
+};
+typedef struct __gf_rdma_peer gf_rdma_peer_t;
+
+struct __gf_rdma_post_context {
+ struct ibv_mr *mr[GF_RDMA_MAX_SEGMENTS];
+ int mr_count;
+ struct iovec vector[MAX_IOVEC];
+ int count;
+ struct iobref *iobref;
+ struct iobuf *hdr_iobuf;
+ char is_request;
+ int gf_rdma_reads;
+ gf_rdma_reply_info_t *reply_info;
+};
+typedef struct __gf_rdma_post_context gf_rdma_post_context_t;
+
+typedef enum {
+ GF_RDMA_SEND_POST,
+ GF_RDMA_RECV_POST
+} gf_rdma_post_type_t;
+
+struct __gf_rdma_post {
+ struct __gf_rdma_post *next, *prev;
+ struct ibv_mr *mr;
+ char *buf;
+ int32_t buf_size;
+ char aux;
+ int32_t reused;
+ struct __gf_rdma_device *device;
+ gf_rdma_post_type_t type;
+ gf_rdma_post_context_t ctx;
+ int refcount;
+ pthread_mutex_t lock;
+};
+typedef struct __gf_rdma_post gf_rdma_post_t;
+
+struct __gf_rdma_queue {
+ gf_rdma_post_t active_posts, passive_posts;
+ int32_t active_count, passive_count;
+ pthread_mutex_t lock;
+};
+typedef struct __gf_rdma_queue gf_rdma_queue_t;
+
+struct __gf_rdma_qpreg {
+ pthread_mutex_t lock;
+ int32_t count;
+ struct _qpent {
+ struct _qpent *next, *prev;
+ int32_t qp_num;
+ gf_rdma_peer_t *peer;
+ } ents[42];
+};
+typedef struct __gf_rdma_qpreg gf_rdma_qpreg_t;
+
+/* context per device, stored in global glusterfs_ctx_t->ib */
+struct __gf_rdma_device {
+ struct __gf_rdma_device *next;
+ const char *device_name;
+ struct ibv_context *context;
+ int32_t port;
+ struct ibv_pd *pd;
+ struct ibv_srq *srq;
+ gf_rdma_qpreg_t qpreg;
+ struct ibv_comp_channel *send_chan, *recv_chan;
+ struct ibv_cq *send_cq, *recv_cq;
+ gf_rdma_queue_t sendq, recvq;
+ pthread_t send_thread, recv_thread, async_event_thread;
+ struct mem_pool *request_ctx_pool;
+ struct mem_pool *ioq_pool;
+ struct mem_pool *reply_info_pool;
+};
+typedef struct __gf_rdma_device gf_rdma_device_t;
+
+struct __gf_rdma_ctx {
+ gf_rdma_device_t *device;
+ struct rdma_event_channel *rdma_cm_event_channel;
+ pthread_t rdma_cm_thread;
+};
+typedef struct __gf_rdma_ctx gf_rdma_ctx_t;
+
+struct __gf_rdma_request_context {
+ struct ibv_mr *mr[GF_RDMA_MAX_SEGMENTS];
+ int mr_count;
+ struct mem_pool *pool;
+ gf_rdma_peer_t *peer;
+ struct iobref *iobref;
+ struct iobref *rsp_iobref;
+};
+typedef struct __gf_rdma_request_context gf_rdma_request_context_t;
+
+typedef enum {
+ GF_RDMA_SERVER_LISTENER,
+ GF_RDMA_SERVER,
+ GF_RDMA_CLIENT,
+} gf_rdma_transport_entity_t;
+
+struct __gf_rdma_private {
+ int32_t idx;
+ unsigned char connected;
+ in_addr_t addr;
+ unsigned short port;
+
+ /* IB Verbs Driver specific variables, pointers */
+ gf_rdma_peer_t peer;
+ struct __gf_rdma_device *device;
+ gf_rdma_options_t options;
+
+ /* Used by trans->op->receive */
+ char *data_ptr;
+ int32_t data_offset;
+ int32_t data_len;
+
+ /* Mutex */
+ pthread_mutex_t write_mutex;
+ rpc_transport_t *listener;
+ pthread_mutex_t recv_mutex;
+ pthread_cond_t recv_cond;
+ gf_rdma_transport_entity_t entity;
+};
+typedef struct __gf_rdma_private gf_rdma_private_t;
+
+#endif /* _XPORT_GF_RDMA_H */
diff --git a/transport/ib-verbs/Makefile.am b/rpc/rpc-transport/socket/Makefile.am
index f963effea..f963effea 100644
--- a/transport/ib-verbs/Makefile.am
+++ b/rpc/rpc-transport/socket/Makefile.am
diff --git a/rpc/rpc-transport/socket/src/Makefile.am b/rpc/rpc-transport/socket/src/Makefile.am
new file mode 100644
index 000000000..71e6ed6ff
--- /dev/null
+++ b/rpc/rpc-transport/socket/src/Makefile.am
@@ -0,0 +1,17 @@
+noinst_HEADERS = socket.h name.h
+
+rpctransport_LTLIBRARIES = socket.la
+rpctransportdir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/rpc-transport
+
+socket_la_LDFLAGS = -module -avoid-version
+
+socket_la_SOURCES = socket.c name.c
+socket_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la -lssl
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) \
+ -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/rpc-lib/src/ \
+ -I$(top_srcdir)/rpc/xdr/src/
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES = *~
diff --git a/transport/socket/src/name.c b/rpc/rpc-transport/socket/src/name.c
index 83a97ed04..1647d5b6b 100644
--- a/transport/socket/src/name.c
+++ b/rpc/rpc-transport/socket/src/name.c
@@ -1,55 +1,50 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include <sys/types.h>
#include <sys/socket.h>
+#include <netinet/in.h>
#include <errno.h>
#include <netdb.h>
#include <string.h>
-#ifdef CLIENT_PORT_CEILING
-#undef CLIENT_PORT_CEILING
-#endif
-
-#define CLIENT_PORT_CEILING 1024
-
#ifndef AF_INET_SDP
#define AF_INET_SDP 27
#endif
-#include "transport.h"
+#include "rpc-transport.h"
#include "socket.h"
+#include "common-utils.h"
int32_t
-gf_resolve_ip6 (const char *hostname,
- uint16_t port,
- int family,
- void **dnscache,
+gf_resolve_ip6 (const char *hostname,
+ uint16_t port,
+ int family,
+ void **dnscache,
struct addrinfo **addr_info);
static int32_t
-af_inet_bind_to_port_lt_ceiling (int fd, struct sockaddr *sockaddr,
+af_inet_bind_to_port_lt_ceiling (int fd, struct sockaddr *sockaddr,
socklen_t sockaddr_len, int ceiling)
{
- int32_t ret = -1;
- /* struct sockaddr_in sin = {0, }; */
- uint16_t port = ceiling - 1;
+ int32_t ret = -1;
+ uint16_t port = ceiling - 1;
+ // by default assume none of the ports are blocked and all are available
+ gf_boolean_t ports[1024] = {_gf_false,};
+ int i = 0;
+
+ ret = gf_process_reserved_ports (ports);
+ if (ret != 0) {
+ for (i = 0; i < 1024; i++)
+ ports[i] = _gf_false;
+ }
while (port)
{
@@ -64,7 +59,11 @@ af_inet_bind_to_port_lt_ceiling (int fd, struct sockaddr *sockaddr,
((struct sockaddr_in *)sockaddr)->sin_port = htons (port);
break;
}
-
+ // ignore the reserved ports
+ if (ports[port] == _gf_true) {
+ port--;
+ continue;
+ }
ret = bind (fd, sockaddr, sockaddr_len);
if (ret == 0)
@@ -80,21 +79,21 @@ af_inet_bind_to_port_lt_ceiling (int fd, struct sockaddr *sockaddr,
}
static int32_t
-af_unix_client_bind (transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t sockaddr_len,
+af_unix_client_bind (rpc_transport_t *this,
+ struct sockaddr *sockaddr,
+ socklen_t sockaddr_len,
int sock)
{
data_t *path_data = NULL;
struct sockaddr_un *addr = NULL;
int32_t ret = 0;
- path_data = dict_get (this->xl->options, "transport.socket.bind-path");
+ path_data = dict_get (this->options, "transport.socket.bind-path");
if (path_data) {
char *path = data_to_str (path_data);
if (!path || strlen (path) > UNIX_PATH_MAX) {
- gf_log (this->xl->name, GF_LOG_TRACE,
- "bind-path not specfied for unix socket, "
+ gf_log (this->name, GF_LOG_TRACE,
+ "bind-path not specified for unix socket, "
"letting connect to assign default value");
goto err;
}
@@ -103,14 +102,14 @@ af_unix_client_bind (transport_t *this,
strcpy (addr->sun_path, path);
ret = bind (sock, (struct sockaddr *)addr, sockaddr_len);
if (ret == -1) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "cannot bind to unix-domain socket %d (%s)",
+ gf_log (this->name, GF_LOG_ERROR,
+ "cannot bind to unix-domain socket %d (%s)",
sock, strerror (errno));
goto err;
}
} else {
- gf_log (this->xl->name, GF_LOG_TRACE,
- "bind-path not specfied for unix socket, "
+ gf_log (this->name, GF_LOG_TRACE,
+ "bind-path not specified for unix socket, "
"letting connect to assign default value");
}
@@ -118,73 +117,81 @@ err:
return ret;
}
-static int32_t
-client_fill_address_family (transport_t *this, struct sockaddr *sockaddr)
+int32_t
+client_fill_address_family (rpc_transport_t *this, sa_family_t *sa_family)
{
- data_t *address_family_data = NULL;
+ data_t *address_family_data = NULL;
+ int32_t ret = -1;
- address_family_data = dict_get (this->xl->options,
+ if (sa_family == NULL) {
+ gf_log_callingfn ("", GF_LOG_WARNING,
+ "sa_family argument is NULL");
+ goto out;
+ }
+
+ address_family_data = dict_get (this->options,
"transport.address-family");
if (!address_family_data) {
data_t *remote_host_data = NULL, *connect_path_data = NULL;
- remote_host_data = dict_get (this->xl->options, "remote-host");
- connect_path_data = dict_get (this->xl->options,
+ remote_host_data = dict_get (this->options, "remote-host");
+ connect_path_data = dict_get (this->options,
"transport.socket.connect-path");
- if (!(remote_host_data || connect_path_data) ||
+ if (!(remote_host_data || connect_path_data) ||
(remote_host_data && connect_path_data)) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "transport.address-family not specified and "
- "not able to determine the "
- "same from other options (remote-host:%s and "
- "transport.unix.connect-path:%s)",
- data_to_str (remote_host_data),
+ gf_log (this->name, GF_LOG_ERROR,
+ "transport.address-family not specified. "
+ "Could not guess default value from (remote-host:%s or "
+ "transport.unix.connect-path:%s) options",
+ data_to_str (remote_host_data),
data_to_str (connect_path_data));
- return -1;
- }
+ *sa_family = AF_UNSPEC;
+ goto out;
+ }
if (remote_host_data) {
- gf_log (this->xl->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_DEBUG,
"address-family not specified, guessing it "
- "to be inet/inet6");
- sockaddr->sa_family = AF_UNSPEC;
+ "to be inet from (remote-host: %s)", data_to_str (remote_host_data));
+ *sa_family = AF_INET;
} else {
- gf_log (this->xl->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_DEBUG,
"address-family not specified, guessing it "
- "to be unix");
- sockaddr->sa_family = AF_UNIX;
+ "to be unix from (transport.unix.connect-path: %s)", data_to_str (connect_path_data));
+ *sa_family = AF_UNIX;
}
} else {
char *address_family = data_to_str (address_family_data);
if (!strcasecmp (address_family, "unix")) {
- sockaddr->sa_family = AF_UNIX;
+ *sa_family = AF_UNIX;
} else if (!strcasecmp (address_family, "inet")) {
- sockaddr->sa_family = AF_INET;
+ *sa_family = AF_INET;
} else if (!strcasecmp (address_family, "inet6")) {
- sockaddr->sa_family = AF_INET6;
+ *sa_family = AF_INET6;
} else if (!strcasecmp (address_family, "inet-sdp")) {
- sockaddr->sa_family = AF_INET_SDP;
- } else if (!strcasecmp (address_family, "inet/inet6")
- || !strcasecmp (address_family, "inet6/inet")) {
- sockaddr->sa_family = AF_UNSPEC;
+ *sa_family = AF_INET_SDP;
} else {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "unknown address-family (%s) specified",
+ gf_log (this->name, GF_LOG_ERROR,
+ "unknown address-family (%s) specified",
address_family);
- return -1;
+ *sa_family = AF_UNSPEC;
+ goto out;
}
}
- return 0;
+ ret = 0;
+
+out:
+ return ret;
}
static int32_t
-af_inet_client_get_remote_sockaddr (transport_t *this,
- struct sockaddr *sockaddr,
+af_inet_client_get_remote_sockaddr (rpc_transport_t *this,
+ struct sockaddr *sockaddr,
socklen_t *sockaddr_len)
{
- dict_t *options = this->xl->options;
+ dict_t *options = this->options;
data_t *remote_host_data = NULL;
data_t *remote_port_data = NULL;
char *remote_host = NULL;
@@ -195,8 +202,8 @@ af_inet_client_get_remote_sockaddr (transport_t *this,
remote_host_data = dict_get (options, "remote-host");
if (remote_host_data == NULL)
{
- gf_log (this->xl->name, GF_LOG_ERROR,
- "option remote-host missing in volume %s", this->xl->name);
+ gf_log (this->name, GF_LOG_ERROR,
+ "option remote-host missing in volume %s", this->name);
ret = -1;
goto err;
}
@@ -204,8 +211,8 @@ af_inet_client_get_remote_sockaddr (transport_t *this,
remote_host = data_to_str (remote_host_data);
if (remote_host == NULL)
{
- gf_log (this->xl->name, GF_LOG_ERROR,
- "option remote-host has data NULL in volume %s", this->xl->name);
+ gf_log (this->name, GF_LOG_ERROR,
+ "option remote-host has data NULL in volume %s", this->name);
ret = -1;
goto err;
}
@@ -213,9 +220,9 @@ af_inet_client_get_remote_sockaddr (transport_t *this,
remote_port_data = dict_get (options, "remote-port");
if (remote_port_data == NULL)
{
- gf_log (this->xl->name, GF_LOG_TRACE,
+ gf_log (this->name, GF_LOG_TRACE,
"option remote-port missing in volume %s. Defaulting to %d",
- this->xl->name, GF_DEFAULT_SOCKET_LISTEN_PORT);
+ this->name, GF_DEFAULT_SOCKET_LISTEN_PORT);
remote_port = GF_DEFAULT_SOCKET_LISTEN_PORT;
}
@@ -226,9 +233,9 @@ af_inet_client_get_remote_sockaddr (transport_t *this,
if (remote_port == (uint16_t)-1)
{
- gf_log (this->xl->name, GF_LOG_ERROR,
+ gf_log (this->name, GF_LOG_ERROR,
"option remote-port has invalid port in volume %s",
- this->xl->name);
+ this->name);
ret = -1;
goto err;
}
@@ -238,7 +245,7 @@ af_inet_client_get_remote_sockaddr (transport_t *this,
ret = gf_resolve_ip6 (remote_host, remote_port,
sockaddr->sa_family, &this->dnscache, &addr_info);
if (ret == -1) {
- gf_log (this->xl->name, GF_LOG_ERROR,
+ gf_log (this->name, GF_LOG_ERROR,
"DNS resolution failed on host %s", remote_host);
goto err;
}
@@ -251,8 +258,8 @@ err:
}
static int32_t
-af_unix_client_get_remote_sockaddr (transport_t *this,
- struct sockaddr *sockaddr,
+af_unix_client_get_remote_sockaddr (rpc_transport_t *this,
+ struct sockaddr *sockaddr,
socklen_t *sockaddr_len)
{
struct sockaddr_un *sockaddr_un = NULL;
@@ -260,10 +267,10 @@ af_unix_client_get_remote_sockaddr (transport_t *this,
data_t *connect_path_data = NULL;
int32_t ret = 0;
- connect_path_data = dict_get (this->xl->options,
+ connect_path_data = dict_get (this->options,
"transport.socket.connect-path");
if (!connect_path_data) {
- gf_log (this->xl->name, GF_LOG_ERROR,
+ gf_log (this->name, GF_LOG_ERROR,
"option transport.unix.connect-path not specified for "
"address-family unix");
ret = -1;
@@ -272,21 +279,21 @@ af_unix_client_get_remote_sockaddr (transport_t *this,
connect_path = data_to_str (connect_path_data);
if (!connect_path) {
- gf_log (this->xl->name, GF_LOG_ERROR,
+ gf_log (this->name, GF_LOG_ERROR,
"transport.unix.connect-path is null-string");
ret = -1;
goto err;
}
if (strlen (connect_path) > UNIX_PATH_MAX) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "connect-path value length %"GF_PRI_SIZET" > %d octets",
+ gf_log (this->name, GF_LOG_ERROR,
+ "connect-path value length %"GF_PRI_SIZET" > %d octets",
strlen (connect_path), UNIX_PATH_MAX);
ret = -1;
goto err;
}
- gf_log (this->xl->name, GF_LOG_TRACE,
+ gf_log (this->name, GF_LOG_TRACE,
"using connect-path %s", connect_path);
sockaddr_un = (struct sockaddr_un *)sockaddr;
strcpy (sockaddr_un->sun_path, connect_path);
@@ -297,7 +304,7 @@ err:
}
static int32_t
-af_unix_server_get_local_sockaddr (transport_t *this,
+af_unix_server_get_local_sockaddr (rpc_transport_t *this,
struct sockaddr *addr,
socklen_t *addr_len)
{
@@ -307,10 +314,10 @@ af_unix_server_get_local_sockaddr (transport_t *this,
struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
- listen_path_data = dict_get (this->xl->options,
+ listen_path_data = dict_get (this->options,
"transport.socket.listen-path");
if (!listen_path_data) {
- gf_log (this->xl->name, GF_LOG_ERROR,
+ gf_log (this->name, GF_LOG_ERROR,
"missing option transport.socket.listen-path");
ret = -1;
goto err;
@@ -323,7 +330,7 @@ af_unix_server_get_local_sockaddr (transport_t *this,
#endif
if (strlen (listen_path) > UNIX_PATH_MAX) {
- gf_log (this->xl->name, GF_LOG_ERROR,
+ gf_log (this->name, GF_LOG_ERROR,
"option transport.unix.listen-path has value length "
"%"GF_PRI_SIZET" > %d",
strlen (listen_path), UNIX_PATH_MAX);
@@ -339,19 +346,19 @@ err:
return ret;
}
-static int32_t
-af_inet_server_get_local_sockaddr (transport_t *this,
- struct sockaddr *addr,
+static int32_t
+af_inet_server_get_local_sockaddr (rpc_transport_t *this,
+ struct sockaddr *addr,
socklen_t *addr_len)
{
- struct addrinfo hints, *res = 0;
+ struct addrinfo hints, *res = 0, *rp = NULL;
data_t *listen_port_data = NULL, *listen_host_data = NULL;
uint16_t listen_port = -1;
char service[NI_MAXSERV], *listen_host = NULL;
dict_t *options = NULL;
int32_t ret = 0;
- options = this->xl->options;
+ options = this->options;
listen_port_data = dict_get (options, "transport.socket.listen-port");
listen_host_data = dict_get (options, "transport.socket.bind-address");
@@ -368,6 +375,20 @@ af_inet_server_get_local_sockaddr (transport_t *this,
if (listen_host_data)
{
listen_host = data_to_str (listen_host_data);
+ } else {
+ if (addr->sa_family == AF_INET6) {
+ struct sockaddr_in6 *in = (struct sockaddr_in6 *) addr;
+ in->sin6_addr = in6addr_any;
+ in->sin6_port = htons(listen_port);
+ *addr_len = sizeof(struct sockaddr_in6);
+ goto out;
+ } else if (addr->sa_family == AF_INET) {
+ struct sockaddr_in *in = (struct sockaddr_in *) addr;
+ in->sin_addr.s_addr = htonl(INADDR_ANY);
+ in->sin_port = htons(listen_port);
+ *addr_len = sizeof(struct sockaddr_in);
+ goto out;
+ }
}
memset (service, 0, sizeof (service));
@@ -376,30 +397,41 @@ af_inet_server_get_local_sockaddr (transport_t *this,
memset (&hints, 0, sizeof (hints));
hints.ai_family = addr->sa_family;
hints.ai_socktype = SOCK_STREAM;
- hints.ai_flags = AI_ADDRCONFIG | AI_PASSIVE;
+ hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG;
ret = getaddrinfo(listen_host, service, &hints, &res);
if (ret != 0) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "getaddrinfo failed for host %s, service %s (%s)",
+ gf_log (this->name, GF_LOG_ERROR,
+ "getaddrinfo failed for host %s, service %s (%s)",
listen_host, service, gai_strerror (ret));
ret = -1;
- goto err;
+ goto out;
+ }
+ /* IPV6 server can handle both ipv4 and ipv6 clients */
+ for (rp = res; rp != NULL; rp = rp->ai_next) {
+ if (rp->ai_addr == NULL)
+ continue;
+ if (rp->ai_family == AF_INET6) {
+ memcpy (addr, rp->ai_addr, rp->ai_addrlen);
+ *addr_len = rp->ai_addrlen;
+ }
}
- memcpy (addr, res->ai_addr, res->ai_addrlen);
- *addr_len = res->ai_addrlen;
+ if (!(*addr_len)) {
+ memcpy (addr, res->ai_addr, res->ai_addrlen);
+ *addr_len = res->ai_addrlen;
+ }
freeaddrinfo (res);
-err:
+out:
return ret;
}
-int32_t
-client_bind (transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t *sockaddr_len,
+int32_t
+client_bind (rpc_transport_t *this,
+ struct sockaddr *sockaddr,
+ socklen_t *sockaddr_len,
int sock)
{
int ret = 0;
@@ -412,24 +444,26 @@ client_bind (transport_t *this,
*sockaddr_len = sizeof (struct sockaddr_in);
case AF_INET6:
- ret = af_inet_bind_to_port_lt_ceiling (sock, sockaddr,
- *sockaddr_len, CLIENT_PORT_CEILING);
+ if (!this->bind_insecure) {
+ ret = af_inet_bind_to_port_lt_ceiling (sock, sockaddr,
+ *sockaddr_len, GF_CLIENT_PORT_CEILING);
+ }
if (ret == -1) {
- gf_log (this->xl->name, GF_LOG_WARNING,
- "cannot bind inet socket (%d) to port less than %d (%s)",
- sock, CLIENT_PORT_CEILING, strerror (errno));
+ gf_log (this->name, GF_LOG_DEBUG,
+ "cannot bind inet socket (%d) to port less than %d (%s)",
+ sock, GF_CLIENT_PORT_CEILING, strerror (errno));
ret = 0;
}
break;
case AF_UNIX:
*sockaddr_len = sizeof (struct sockaddr_un);
- ret = af_unix_client_bind (this, (struct sockaddr *)sockaddr,
+ ret = af_unix_client_bind (this, (struct sockaddr *)sockaddr,
*sockaddr_len, sock);
break;
default:
- gf_log (this->xl->name, GF_LOG_ERROR,
+ gf_log (this->name, GF_LOG_ERROR,
"unknown address family %d", sockaddr->sa_family);
ret = -1;
break;
@@ -439,101 +473,123 @@ client_bind (transport_t *this,
}
int32_t
-socket_client_get_remote_sockaddr (transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t *sockaddr_len)
+socket_client_get_remote_sockaddr (rpc_transport_t *this,
+ struct sockaddr *sockaddr,
+ socklen_t *sockaddr_len,
+ sa_family_t *sa_family)
{
int32_t ret = 0;
- char is_inet_sdp = 0;
- ret = client_fill_address_family (this, sockaddr);
+ GF_VALIDATE_OR_GOTO ("socket", sockaddr, err);
+ GF_VALIDATE_OR_GOTO ("socket", sockaddr_len, err);
+ GF_VALIDATE_OR_GOTO ("socket", sa_family, err);
+
+ ret = client_fill_address_family (this, &sockaddr->sa_family);
if (ret) {
ret = -1;
goto err;
}
-
+
+ *sa_family = sockaddr->sa_family;
+
switch (sockaddr->sa_family)
{
case AF_INET_SDP:
sockaddr->sa_family = AF_INET;
- is_inet_sdp = 1;
case AF_INET:
case AF_INET6:
case AF_UNSPEC:
- ret = af_inet_client_get_remote_sockaddr (this, sockaddr, sockaddr_len);
-
- if (is_inet_sdp) {
- sockaddr->sa_family = AF_INET_SDP;
- }
-
+ ret = af_inet_client_get_remote_sockaddr (this, sockaddr,
+ sockaddr_len);
break;
case AF_UNIX:
- ret = af_unix_client_get_remote_sockaddr (this, sockaddr, sockaddr_len);
+ ret = af_unix_client_get_remote_sockaddr (this, sockaddr,
+ sockaddr_len);
break;
default:
- gf_log (this->xl->name, GF_LOG_ERROR,
+ gf_log (this->name, GF_LOG_ERROR,
"unknown address-family %d", sockaddr->sa_family);
ret = -1;
}
-
+
+ if (*sa_family == AF_UNSPEC) {
+ *sa_family = sockaddr->sa_family;
+ }
+
err:
return ret;
}
+
int32_t
-socket_server_get_local_sockaddr (transport_t *this,
- struct sockaddr *addr,
- socklen_t *addr_len)
+server_fill_address_family (rpc_transport_t *this, sa_family_t *sa_family)
{
- data_t *address_family_data = NULL;
- int32_t ret = 0;
- char is_inet_sdp = 0;
+ data_t *address_family_data = NULL;
+ int32_t ret = -1;
- address_family_data = dict_get (this->xl->options,
+ GF_VALIDATE_OR_GOTO ("socket", sa_family, out);
+
+ address_family_data = dict_get (this->options,
"transport.address-family");
if (address_family_data) {
char *address_family = NULL;
address_family = data_to_str (address_family_data);
if (!strcasecmp (address_family, "inet")) {
- addr->sa_family = AF_INET;
+ *sa_family = AF_INET;
} else if (!strcasecmp (address_family, "inet6")) {
- addr->sa_family = AF_INET6;
+ *sa_family = AF_INET6;
} else if (!strcasecmp (address_family, "inet-sdp")) {
- addr->sa_family = AF_INET_SDP;
+ *sa_family = AF_INET_SDP;
} else if (!strcasecmp (address_family, "unix")) {
- addr->sa_family = AF_UNIX;
- } else if (!strcasecmp (address_family, "inet/inet6")
- || !strcasecmp (address_family, "inet6/inet")) {
- addr->sa_family = AF_UNSPEC;
+ *sa_family = AF_UNIX;
} else {
- gf_log (this->xl->name, GF_LOG_ERROR,
+ gf_log (this->name, GF_LOG_ERROR,
"unknown address family (%s) specified", address_family);
- ret = -1;
- goto err;
+ *sa_family = AF_UNSPEC;
+ goto out;
}
} else {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "option address-family not specified, defaulting to inet/inet6");
- addr->sa_family = AF_UNSPEC;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "option address-family not specified, defaulting to inet");
+ *sa_family = AF_INET;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+int32_t
+socket_server_get_local_sockaddr (rpc_transport_t *this, struct sockaddr *addr,
+ socklen_t *addr_len, sa_family_t *sa_family)
+{
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("socket", sa_family, err);
+ GF_VALIDATE_OR_GOTO ("socket", addr, err);
+ GF_VALIDATE_OR_GOTO ("socket", addr_len, err);
+
+ ret = server_fill_address_family (this, &addr->sa_family);
+ if (ret == -1) {
+ goto err;
}
+ *sa_family = addr->sa_family;
+
switch (addr->sa_family)
{
case AF_INET_SDP:
- is_inet_sdp = 1;
addr->sa_family = AF_INET;
case AF_INET:
case AF_INET6:
case AF_UNSPEC:
ret = af_inet_server_get_local_sockaddr (this, addr, addr_len);
- if (is_inet_sdp && !ret) {
- addr->sa_family = AF_INET_SDP;
- }
break;
case AF_UNIX:
@@ -541,58 +597,64 @@ socket_server_get_local_sockaddr (transport_t *this,
break;
}
+ if (*sa_family == AF_UNSPEC) {
+ *sa_family = addr->sa_family;
+ }
+
err:
return ret;
}
-int32_t
-fill_inet6_inet_identifiers (transport_t *this, struct sockaddr_storage *addr,
+int32_t
+fill_inet6_inet_identifiers (rpc_transport_t *this, struct sockaddr_storage *addr,
int32_t addr_len, char *identifier)
{
- int32_t ret = 0, tmpaddr_len = 0;
- char service[NI_MAXSERV], host[NI_MAXHOST];
- struct sockaddr_storage tmpaddr;
-
- memset (&tmpaddr, 0, sizeof (tmpaddr));
- tmpaddr = *addr;
+ union gf_sock_union sock_union;
+
+ char service[NI_MAXSERV] = {0,};
+ char host[NI_MAXHOST] = {0,};
+ int32_t ret = 0;
+ int32_t tmpaddr_len = 0;
+ int32_t one_to_four = 0;
+ int32_t four_to_eight = 0;
+ int32_t twelve_to_sixteen = 0;
+ int16_t eight_to_ten = 0;
+ int16_t ten_to_twelve = 0;
+
+ memset (&sock_union, 0, sizeof (sock_union));
+ sock_union.storage = *addr;
tmpaddr_len = addr_len;
- if (((struct sockaddr *) &tmpaddr)->sa_family == AF_INET6) {
- int32_t one_to_four, four_to_eight, twelve_to_sixteen;
- int16_t eight_to_ten, ten_to_twelve;
-
- one_to_four = four_to_eight = twelve_to_sixteen = 0;
- eight_to_ten = ten_to_twelve = 0;
-
- one_to_four = ((struct sockaddr_in6 *) &tmpaddr)->sin6_addr.s6_addr32[0];
- four_to_eight = ((struct sockaddr_in6 *) &tmpaddr)->sin6_addr.s6_addr32[1];
+ if (sock_union.sa.sa_family == AF_INET6) {
+ one_to_four = sock_union.sin6.sin6_addr.s6_addr32[0];
+ four_to_eight = sock_union.sin6.sin6_addr.s6_addr32[1];
#ifdef GF_SOLARIS_HOST_OS
- eight_to_ten = S6_ADDR16(((struct sockaddr_in6 *) &tmpaddr)->sin6_addr)[4];
+ eight_to_ten = S6_ADDR16(sock_union.sin6.sin6_addr)[4];
#else
- eight_to_ten = ((struct sockaddr_in6 *) &tmpaddr)->sin6_addr.s6_addr16[4];
+ eight_to_ten = sock_union.sin6.sin6_addr.s6_addr16[4];
#endif
#ifdef GF_SOLARIS_HOST_OS
- ten_to_twelve = S6_ADDR16(((struct sockaddr_in6 *) &tmpaddr)->sin6_addr)[5];
+ ten_to_twelve = S6_ADDR16(sock_union.sin6.sin6_addr)[5];
#else
- ten_to_twelve = ((struct sockaddr_in6 *) &tmpaddr)->sin6_addr.s6_addr16[5];
+ ten_to_twelve = sock_union.sin6.sin6_addr.s6_addr16[5];
#endif
- twelve_to_sixteen = ((struct sockaddr_in6 *) &tmpaddr)->sin6_addr.s6_addr32[3];
+ twelve_to_sixteen = sock_union.sin6.sin6_addr.s6_addr32[3];
/* ipv4 mapped ipv6 address has
bits 0-80: 0
bits 80-96: 0xffff
- bits 96-128: ipv4 address
+ bits 96-128: ipv4 address
*/
-
+
if (one_to_four == 0 &&
four_to_eight == 0 &&
eight_to_ten == 0 &&
ten_to_twelve == -1) {
- struct sockaddr_in *in_ptr = (struct sockaddr_in *)&tmpaddr;
- memset (&tmpaddr, 0, sizeof (tmpaddr));
-
+ struct sockaddr_in *in_ptr = &sock_union.sin;
+ memset (&sock_union, 0, sizeof (sock_union));
+
in_ptr->sin_family = AF_INET;
in_ptr->sin_port = ((struct sockaddr_in6 *)addr)->sin6_port;
in_ptr->sin_addr.s_addr = twelve_to_sixteen;
@@ -600,13 +662,13 @@ fill_inet6_inet_identifiers (transport_t *this, struct sockaddr_storage *addr,
}
}
- ret = getnameinfo ((struct sockaddr *) &tmpaddr,
+ ret = getnameinfo (&sock_union.sa,
tmpaddr_len,
host, sizeof (host),
service, sizeof (service),
NI_NUMERICHOST | NI_NUMERICSERV);
if (ret != 0) {
- gf_log (this->xl->name, GF_LOG_ERROR,
+ gf_log (this->name, GF_LOG_ERROR,
"getnameinfo failed (%s)", gai_strerror (ret));
}
@@ -616,7 +678,7 @@ fill_inet6_inet_identifiers (transport_t *this, struct sockaddr_storage *addr,
}
int32_t
-get_transport_identifiers (transport_t *this)
+get_transport_identifiers (rpc_transport_t *this)
{
int32_t ret = 0;
char is_inet_sdp = 0;
@@ -630,12 +692,12 @@ get_transport_identifiers (transport_t *this)
case AF_INET:
case AF_INET6:
{
- ret = fill_inet6_inet_identifiers (this,
- &this->myinfo.sockaddr,
+ ret = fill_inet6_inet_identifiers (this,
+ &this->myinfo.sockaddr,
this->myinfo.sockaddr_len,
this->myinfo.identifier);
if (ret == -1) {
- gf_log (this->xl->name, GF_LOG_ERROR,
+ gf_log (this->name, GF_LOG_ERROR,
"cannot fill inet/inet6 identifier for server");
goto err;
}
@@ -645,7 +707,7 @@ get_transport_identifiers (transport_t *this)
this->peerinfo.sockaddr_len,
this->peerinfo.identifier);
if (ret == -1) {
- gf_log (this->xl->name, GF_LOG_ERROR,
+ gf_log (this->name, GF_LOG_ERROR,
"cannot fill inet/inet6 identifier for client");
goto err;
}
@@ -669,8 +731,8 @@ get_transport_identifiers (transport_t *this)
break;
default:
- gf_log (this->xl->name, GF_LOG_ERROR,
- "unknown address family (%d)",
+ gf_log (this->name, GF_LOG_ERROR,
+ "unknown address family (%d)",
((struct sockaddr *) &this->myinfo.sockaddr)->sa_family);
ret = -1;
break;
diff --git a/rpc/rpc-transport/socket/src/name.h b/rpc/rpc-transport/socket/src/name.h
new file mode 100644
index 000000000..0a13d8a96
--- /dev/null
+++ b/rpc/rpc-transport/socket/src/name.h
@@ -0,0 +1,35 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _SOCKET_NAME_H
+#define _SOCKET_NAME_H
+
+#include "compat.h"
+
+int32_t
+client_bind (rpc_transport_t *this,
+ struct sockaddr *sockaddr,
+ socklen_t *sockaddr_len,
+ int sock);
+
+int32_t
+socket_client_get_remote_sockaddr (rpc_transport_t *this,
+ struct sockaddr *sockaddr,
+ socklen_t *sockaddr_len,
+ sa_family_t *sa_family);
+
+int32_t
+socket_server_get_local_sockaddr (rpc_transport_t *this, struct sockaddr *addr,
+ socklen_t *addr_len, sa_family_t *sa_family);
+
+int32_t
+get_transport_identifiers (rpc_transport_t *this);
+
+#endif /* _SOCKET_NAME_H */
diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c
new file mode 100644
index 000000000..93da3f296
--- /dev/null
+++ b/rpc/rpc-transport/socket/src/socket.c
@@ -0,0 +1,3744 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "socket.h"
+#include "name.h"
+#include "dict.h"
+#include "rpc-transport.h"
+#include "logging.h"
+#include "xlator.h"
+#include "byte-order.h"
+#include "common-utils.h"
+#include "compat-errno.h"
+
+
+/* ugly #includes below */
+#include "protocol-common.h"
+#include "glusterfs3-xdr.h"
+#include "xdr-nfs3.h"
+#include "rpcsvc.h"
+
+#include <fcntl.h>
+#include <errno.h>
+#include <netinet/tcp.h>
+#include <rpc/xdr.h>
+#include <sys/ioctl.h>
+#define GF_LOG_ERRNO(errno) ((errno == ENOTCONN) ? GF_LOG_DEBUG : GF_LOG_ERROR)
+#define SA(ptr) ((struct sockaddr *)ptr)
+
+#define SSL_ENABLED_OPT "transport.socket.ssl-enabled"
+#define SSL_OWN_CERT_OPT "transport.socket.ssl-own-cert"
+#define SSL_PRIVATE_KEY_OPT "transport.socket.ssl-private-key"
+#define SSL_CA_LIST_OPT "transport.socket.ssl-ca-list"
+#define OWN_THREAD_OPT "transport.socket.own-thread"
+
+/* TBD: do automake substitutions etc. (ick) to set these. */
+#if !defined(DEFAULT_CERT_PATH)
+#define DEFAULT_CERT_PATH "/etc/ssl/glusterfs.pem"
+#endif
+#if !defined(DEFAULT_KEY_PATH)
+#define DEFAULT_KEY_PATH "/etc/ssl/glusterfs.key"
+#endif
+#if !defined(DEFAULT_CA_PATH)
+#define DEFAULT_CA_PATH "/etc/ssl/glusterfs.ca"
+#endif
+
+#define POLL_MASK_INPUT (POLLIN | POLLPRI)
+#define POLL_MASK_OUTPUT (POLLOUT)
+#define POLL_MASK_ERROR (POLLERR | POLLHUP | POLLNVAL)
+
+typedef int SSL_unary_func (SSL *);
+typedef int SSL_trinary_func (SSL *, void *, int);
+
+#define __socket_proto_reset_pending(priv) do { \
+ struct gf_sock_incoming_frag *frag; \
+ frag = &priv->incoming.frag; \
+ \
+ memset (&frag->vector, 0, sizeof (frag->vector)); \
+ frag->pending_vector = &frag->vector; \
+ frag->pending_vector->iov_base = frag->fragcurrent; \
+ priv->incoming.pending_vector = frag->pending_vector; \
+ } while (0)
+
+
+#define __socket_proto_update_pending(priv) \
+ do { \
+ uint32_t remaining; \
+ struct gf_sock_incoming_frag *frag; \
+ frag = &priv->incoming.frag; \
+ if (frag->pending_vector->iov_len == 0) { \
+ remaining = (RPC_FRAGSIZE (priv->incoming.fraghdr) \
+ - frag->bytes_read); \
+ \
+ frag->pending_vector->iov_len = \
+ (remaining > frag->remaining_size) \
+ ? frag->remaining_size : remaining; \
+ \
+ frag->remaining_size -= \
+ frag->pending_vector->iov_len; \
+ } \
+ } while (0)
+
+#define __socket_proto_update_priv_after_read(priv, ret, bytes_read) \
+ { \
+ struct gf_sock_incoming_frag *frag; \
+ frag = &priv->incoming.frag; \
+ \
+ frag->fragcurrent += bytes_read; \
+ frag->bytes_read += bytes_read; \
+ \
+ if ((ret > 0) || (frag->remaining_size != 0)) { \
+ if (frag->remaining_size != 0 && ret == 0) { \
+ __socket_proto_reset_pending (priv); \
+ } \
+ \
+ gf_log (this->name, GF_LOG_TRACE, \
+ "partial read on non-blocking socket"); \
+ \
+ break; \
+ } \
+ }
+
+#define __socket_proto_init_pending(priv,size) \
+ do { \
+ uint32_t remaining = 0; \
+ struct gf_sock_incoming_frag *frag; \
+ frag = &priv->incoming.frag; \
+ \
+ remaining = (RPC_FRAGSIZE (priv->incoming.fraghdr) \
+ - frag->bytes_read); \
+ \
+ __socket_proto_reset_pending (priv); \
+ \
+ frag->pending_vector->iov_len = \
+ (remaining > size) ? size : remaining; \
+ \
+ frag->remaining_size = (size - frag->pending_vector->iov_len); \
+ \
+ } while(0)
+
+
+/* This will be used in a switch case and breaks from the switch case if all
+ * the pending data is not read.
+ */
+#define __socket_proto_read(priv, ret) \
+ { \
+ size_t bytes_read = 0; \
+ struct gf_sock_incoming *in; \
+ in = &priv->incoming; \
+ \
+ __socket_proto_update_pending (priv); \
+ \
+ ret = __socket_readv (this, \
+ in->pending_vector, 1, \
+ &in->pending_vector, \
+ &in->pending_count, \
+ &bytes_read); \
+ if (ret == -1) \
+ break; \
+ __socket_proto_update_priv_after_read (priv, ret, bytes_read); \
+ }
+
+static int socket_init (rpc_transport_t *this);
+
+static void
+ssl_dump_error_stack (const char *caller)
+{
+ unsigned long errnum = 0;
+ char errbuf[120] = {0,};
+
+ /* OpenSSL docs explicitly give 120 as the error-string length. */
+
+ while ((errnum = ERR_get_error())) {
+ ERR_error_string(errnum,errbuf);
+ gf_log(caller,GF_LOG_ERROR," %s",errbuf);
+ }
+}
+
+static int
+ssl_do (rpc_transport_t *this, void *buf, size_t len, SSL_trinary_func *func)
+{
+ int r = (-1);
+ struct pollfd pfd = {-1,};
+ socket_private_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO(this->name,this->private,out);
+ priv = this->private;
+
+ for (;;) {
+ if (buf) {
+ if (priv->connected == -1) {
+ /*
+ * Fields in the SSL structure (especially
+ * the BIO pointers) are not valid at this
+ * point, so we'll segfault if we pass them
+ * to SSL_read/SSL_write.
+ */
+ gf_log(this->name,GF_LOG_INFO,
+ "lost connection in %s", __func__);
+ break;
+ }
+ r = func(priv->ssl_ssl,buf,len);
+ }
+ else {
+ /*
+ * We actually need these functions to get to
+ * priv->connected == 1.
+ */
+ r = ((SSL_unary_func *)func)(priv->ssl_ssl);
+ }
+ switch (SSL_get_error(priv->ssl_ssl,r)) {
+ case SSL_ERROR_NONE:
+ return r;
+ case SSL_ERROR_WANT_READ:
+ pfd.fd = priv->sock;
+ pfd.events = POLLIN;
+ if (poll(&pfd,1,-1) < 0) {
+ gf_log(this->name,GF_LOG_ERROR,"poll error %d",
+ errno);
+ }
+ break;
+ case SSL_ERROR_WANT_WRITE:
+ pfd.fd = priv->sock;
+ pfd.events = POLLOUT;
+ if (poll(&pfd,1,-1) < 0) {
+ gf_log(this->name,GF_LOG_ERROR,"poll error %d",
+ errno);
+ }
+ break;
+ case SSL_ERROR_SYSCALL:
+ /* This is what we get when remote disconnects. */
+ gf_log(this->name,GF_LOG_DEBUG,
+ "syscall error (probably remote disconnect)");
+ errno = ENODATA;
+ goto out;
+ default:
+ errno = EIO;
+ goto out; /* "break" would just loop again */
+ }
+ }
+out:
+ return -1;
+}
+
+#define ssl_connect_one(t) ssl_do((t),NULL,0,(SSL_trinary_func *)SSL_connect)
+#define ssl_accept_one(t) ssl_do((t),NULL,0,(SSL_trinary_func *)SSL_accept)
+#define ssl_read_one(t,b,l) ssl_do((t),(b),(l),(SSL_trinary_func *)SSL_read)
+#define ssl_write_one(t,b,l) ssl_do((t),(b),(l),(SSL_trinary_func *)SSL_write)
+
+static int
+ssl_setup_connection (rpc_transport_t *this, int server)
+{
+ X509 *peer = NULL;
+ char peer_CN[256] = "";
+ int ret = -1;
+ socket_private_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO(this->name,this->private,done);
+ priv = this->private;
+
+ priv->ssl_ssl = SSL_new(priv->ssl_ctx);
+ if (!priv->ssl_ssl) {
+ gf_log(this->name,GF_LOG_ERROR,"SSL_new failed");
+ ssl_dump_error_stack(this->name);
+ goto done;
+ }
+ priv->ssl_sbio = BIO_new_socket(priv->sock,BIO_NOCLOSE);
+ if (!priv->ssl_sbio) {
+ gf_log(this->name,GF_LOG_ERROR,"BIO_new_socket failed");
+ ssl_dump_error_stack(this->name);
+ goto free_ssl;
+ }
+ SSL_set_bio(priv->ssl_ssl,priv->ssl_sbio,priv->ssl_sbio);
+
+ if (server) {
+ ret = ssl_accept_one(this);
+ }
+ else {
+ ret = ssl_connect_one(this);
+ }
+
+ /* Make sure _the call_ succeeded. */
+ if (ret < 0) {
+ goto ssl_error;
+ }
+
+ /* Make sure _SSL verification_ succeeded, yielding an identity. */
+ if (SSL_get_verify_result(priv->ssl_ssl) != X509_V_OK) {
+ goto ssl_error;
+ }
+ peer = SSL_get_peer_certificate(priv->ssl_ssl);
+ if (!peer) {
+ goto ssl_error;
+ }
+
+ /* Finally, everything seems OK. */
+ X509_NAME_get_text_by_NID(X509_get_subject_name(peer),
+ NID_commonName, peer_CN, sizeof(peer_CN)-1);
+ peer_CN[sizeof(peer_CN)-1] = '\0';
+ gf_log(this->name,GF_LOG_INFO,"peer CN = %s", peer_CN);
+ return 0;
+
+ /* Error paths. */
+ssl_error:
+ gf_log(this->name,GF_LOG_ERROR,"SSL connect error");
+ ssl_dump_error_stack(this->name);
+free_ssl:
+ SSL_free(priv->ssl_ssl);
+ priv->ssl_ssl = NULL;
+done:
+ return ret;
+}
+
+
+static void
+ssl_teardown_connection (socket_private_t *priv)
+{
+ SSL_shutdown(priv->ssl_ssl);
+ SSL_clear(priv->ssl_ssl);
+ SSL_free(priv->ssl_ssl);
+ priv->ssl_ssl = NULL;
+}
+
+
+static ssize_t
+__socket_ssl_readv (rpc_transport_t *this, struct iovec *opvector, int opcount)
+{
+ socket_private_t *priv = NULL;
+ int sock = -1;
+ int ret = -1;
+
+ priv = this->private;
+ sock = priv->sock;
+
+ if (priv->use_ssl) {
+ ret = ssl_read_one (this, opvector->iov_base, opvector->iov_len);
+ } else {
+ ret = readv (sock, opvector, opcount);
+ }
+
+ return ret;
+}
+
+
+static ssize_t
+__socket_ssl_read (rpc_transport_t *this, void *buf, size_t count)
+{
+ struct iovec iov = {0, };
+ int ret = -1;
+
+ iov.iov_base = buf;
+ iov.iov_len = count;
+
+ ret = __socket_ssl_readv (this, &iov, 1);
+
+ return ret;
+}
+
+
+static int
+__socket_cached_read (rpc_transport_t *this, struct iovec *opvector, int opcount)
+{
+ socket_private_t *priv = NULL;
+ int sock = -1;
+ struct gf_sock_incoming *in = NULL;
+ int req_len = -1;
+ int ret = -1;
+
+ priv = this->private;
+ sock = priv->sock;
+ in = &priv->incoming;
+ req_len = iov_length (opvector, opcount);
+
+ if (in->record_state == SP_STATE_READING_FRAGHDR) {
+ in->ra_read = 0;
+ in->ra_served = 0;
+ in->ra_max = 0;
+ in->ra_buf = NULL;
+ goto uncached;
+ }
+
+ if (!in->ra_max) {
+ /* first call after passing SP_STATE_READING_FRAGHDR */
+ in->ra_max = min (RPC_FRAGSIZE (in->fraghdr), GF_SOCKET_RA_MAX);
+ /* Note that the in->iobuf is the primary iobuf into which
+ headers are read into. By using this itself as our
+ read-ahead cache, we can avoid memory copies in iov_load
+ */
+ in->ra_buf = iobuf_ptr (in->iobuf);
+ }
+
+ /* fill read-ahead */
+ if (in->ra_read < in->ra_max) {
+ ret = __socket_ssl_read (this, &in->ra_buf[in->ra_read],
+ (in->ra_max - in->ra_read));
+ if (ret > 0)
+ in->ra_read += ret;
+
+ /* we proceed to test if there is still cached data to
+ be served even if readahead could not progress */
+ }
+
+ /* serve cached */
+ if (in->ra_served < in->ra_read) {
+ ret = iov_load (opvector, opcount, &in->ra_buf[in->ra_served],
+ min (req_len, (in->ra_read - in->ra_served)));
+
+ in->ra_served += ret;
+ /* Do not read uncached and cached in the same call */
+ goto out;
+ }
+
+ if (in->ra_read < in->ra_max)
+ /* If there was no cached data to be served, (and we are
+ guaranteed to have already performed an attempt to progress
+ readahead above), and we have not yet read out the full
+ readahead capacity, then bail out for now without doing
+ the uncached read below (as that will overtake future cached
+ read)
+ */
+ goto out;
+uncached:
+ ret = __socket_ssl_readv (this, opvector, opcount);
+out:
+ return ret;
+}
+
+static gf_boolean_t
+__does_socket_rwv_error_need_logging (socket_private_t *priv, int write)
+{
+ int read = !write;
+
+ if (priv->connected == -1) /* Didn't even connect, of course it fails */
+ return _gf_false;
+
+ if (read && (priv->read_fail_log == _gf_false))
+ return _gf_false;
+
+ return _gf_true;
+}
+
+/*
+ * return value:
+ * 0 = success (completed)
+ * -1 = error
+ * > 0 = incomplete
+ */
+
+static int
+__socket_rwv (rpc_transport_t *this, struct iovec *vector, int count,
+ struct iovec **pending_vector, int *pending_count, size_t *bytes,
+ int write)
+{
+ socket_private_t *priv = NULL;
+ int sock = -1;
+ int ret = -1;
+ struct iovec *opvector = NULL;
+ int opcount = 0;
+ int moved = 0;
+
+ GF_VALIDATE_OR_GOTO ("socket", this, out);
+ GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+
+ priv = this->private;
+ sock = priv->sock;
+
+ opvector = vector;
+ opcount = count;
+
+ if (bytes != NULL) {
+ *bytes = 0;
+ }
+
+ while (opcount > 0) {
+ if (opvector->iov_len == 0) {
+ gf_log(this->name,GF_LOG_DEBUG,
+ "would have passed zero length to read/write");
+ ++opvector;
+ --opcount;
+ continue;
+ }
+ if (write) {
+ if (priv->use_ssl) {
+ ret = ssl_write_one(this,
+ opvector->iov_base, opvector->iov_len);
+ }
+ else {
+ ret = writev (sock, opvector, opcount);
+ }
+
+ if (ret == 0 || (ret == -1 && errno == EAGAIN)) {
+ /* done for now */
+ break;
+ }
+ this->total_bytes_write += ret;
+ } else {
+ ret = __socket_cached_read (this, opvector, opcount);
+
+ if (ret == 0) {
+ gf_log(this->name,GF_LOG_DEBUG,"EOF on socket");
+ errno = ENODATA;
+ ret = -1;
+ }
+ if (ret == -1 && errno == EAGAIN) {
+ /* done for now */
+ break;
+ }
+ this->total_bytes_read += ret;
+ }
+
+ if (ret == 0) {
+ /* Mostly due to 'umount' in client */
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "EOF from peer %s", this->peerinfo.identifier);
+ opcount = -1;
+ errno = ENOTCONN;
+ break;
+ }
+ if (ret == -1) {
+ if (errno == EINTR)
+ continue;
+
+ if (__does_socket_rwv_error_need_logging (priv,
+ write)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s on %s failed (%s)",
+ write ? "writev":"readv",
+ this->peerinfo.identifier,
+ strerror (errno));
+ }
+
+ if (priv->use_ssl) {
+ ssl_dump_error_stack(this->name);
+ }
+ opcount = -1;
+ break;
+ }
+
+ if (bytes != NULL) {
+ *bytes += ret;
+ }
+
+ moved = 0;
+
+ while (moved < ret) {
+ if (!opcount) {
+ gf_log(this->name,GF_LOG_DEBUG,
+ "ran out of iov, moved %d/%d",
+ moved, ret);
+ goto ran_out;
+ }
+ if (!opvector[0].iov_len) {
+ opvector++;
+ opcount--;
+ continue;
+ }
+ if ((ret - moved) >= opvector[0].iov_len) {
+ moved += opvector[0].iov_len;
+ opvector++;
+ opcount--;
+ } else {
+ opvector[0].iov_len -= (ret - moved);
+ opvector[0].iov_base += (ret - moved);
+ moved += (ret - moved);
+ }
+ }
+ }
+
+ran_out:
+
+ if (pending_vector)
+ *pending_vector = opvector;
+
+ if (pending_count)
+ *pending_count = opcount;
+
+out:
+ return opcount;
+}
+
+
+static int
+__socket_readv (rpc_transport_t *this, struct iovec *vector, int count,
+ struct iovec **pending_vector, int *pending_count,
+ size_t *bytes)
+{
+ int ret = -1;
+
+ ret = __socket_rwv (this, vector, count,
+ pending_vector, pending_count, bytes, 0);
+
+ return ret;
+}
+
+
+static int
+__socket_writev (rpc_transport_t *this, struct iovec *vector, int count,
+ struct iovec **pending_vector, int *pending_count)
+{
+ int ret = -1;
+
+ ret = __socket_rwv (this, vector, count,
+ pending_vector, pending_count, NULL, 1);
+
+ return ret;
+}
+
+
+static int
+__socket_shutdown (rpc_transport_t *this)
+{
+ int ret = -1;
+ socket_private_t *priv = this->private;
+
+ priv->connected = -1;
+ ret = shutdown (priv->sock, SHUT_RDWR);
+ if (ret) {
+ /* its already disconnected.. no need to understand
+ why it failed to shutdown in normal cases */
+ gf_log (this->name, GF_LOG_DEBUG,
+ "shutdown() returned %d. %s",
+ ret, strerror (errno));
+ }
+
+ return ret;
+}
+
+static int
+__socket_disconnect (rpc_transport_t *this)
+{
+ int ret = -1;
+ socket_private_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO ("socket", this, out);
+ GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+
+ priv = this->private;
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "disconnecting %p, state=%u gen=%u sock=%d", this,
+ priv->ot_state, priv->ot_gen, priv->sock);
+
+ if (priv->sock != -1) {
+ ret = __socket_shutdown(this);
+ if (priv->own_thread) {
+ /*
+ * Without this, reconnect (= disconnect + connect)
+ * won't work except by accident.
+ */
+ close(priv->sock);
+ priv->sock = -1;
+ gf_log (this->name, GF_LOG_TRACE,
+ "OT_PLEASE_DIE on %p", this);
+ priv->ot_state = OT_PLEASE_DIE;
+ }
+ else if (priv->use_ssl) {
+ ssl_teardown_connection(priv);
+ }
+ }
+
+out:
+ return ret;
+}
+
+
+static int
+__socket_server_bind (rpc_transport_t *this)
+{
+ socket_private_t *priv = NULL;
+ int ret = -1;
+ int opt = 1;
+ int reuse_check_sock = -1;
+ struct sockaddr_storage unix_addr = {0};
+
+ GF_VALIDATE_OR_GOTO ("socket", this, out);
+ GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+
+ priv = this->private;
+
+ ret = setsockopt (priv->sock, SOL_SOCKET, SO_REUSEADDR,
+ &opt, sizeof (opt));
+
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "setsockopt() for SO_REUSEADDR failed (%s)",
+ strerror (errno));
+ }
+
+ /* reuse-address doesn't work for unix type sockets */
+ if (AF_UNIX == SA (&this->myinfo.sockaddr)->sa_family) {
+ memcpy (&unix_addr, SA (&this->myinfo.sockaddr),
+ this->myinfo.sockaddr_len);
+ reuse_check_sock = socket (AF_UNIX, SOCK_STREAM, 0);
+ if (reuse_check_sock >= 0) {
+ ret = connect (reuse_check_sock, SA (&unix_addr),
+ this->myinfo.sockaddr_len);
+ if ((ret == -1) && (ECONNREFUSED == errno)) {
+ unlink (((struct sockaddr_un*)&unix_addr)->sun_path);
+ }
+ close (reuse_check_sock);
+ }
+ }
+
+ ret = bind (priv->sock, (struct sockaddr *)&this->myinfo.sockaddr,
+ this->myinfo.sockaddr_len);
+
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "binding to %s failed: %s",
+ this->myinfo.identifier, strerror (errno));
+ if (errno == EADDRINUSE) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Port is already in use");
+ }
+ }
+
+out:
+ return ret;
+}
+
+
+static int
+__socket_nonblock (int fd)
+{
+ int flags = 0;
+ int ret = -1;
+
+ flags = fcntl (fd, F_GETFL);
+
+ if (flags != -1)
+ ret = fcntl (fd, F_SETFL, flags | O_NONBLOCK);
+
+ return ret;
+}
+
+static int
+__socket_nodelay (int fd)
+{
+ int on = 1;
+ int ret = -1;
+
+ ret = setsockopt (fd, IPPROTO_TCP, TCP_NODELAY,
+ &on, sizeof (on));
+ if (!ret)
+ gf_log (THIS->name, GF_LOG_TRACE,
+ "NODELAY enabled for socket %d", fd);
+
+ return ret;
+}
+
+
+static int
+__socket_keepalive (int fd, int family, int keepalive_intvl, int keepalive_idle)
+{
+ int on = 1;
+ int ret = -1;
+
+ ret = setsockopt (fd, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof (on));
+ if (ret == -1) {
+ gf_log ("socket", GF_LOG_WARNING,
+ "failed to set keep alive option on socket %d", fd);
+ goto err;
+ }
+
+ if (keepalive_intvl == GF_USE_DEFAULT_KEEPALIVE)
+ goto done;
+
+#if !defined(GF_LINUX_HOST_OS) && !defined(__NetBSD__)
+#ifdef GF_SOLARIS_HOST_OS
+ ret = setsockopt (fd, SOL_SOCKET, SO_KEEPALIVE, &keepalive_intvl,
+ sizeof (keepalive_intvl));
+#else
+ ret = setsockopt (fd, IPPROTO_TCP, TCP_KEEPALIVE, &keepalive_intvl,
+ sizeof (keepalive_intvl));
+#endif
+ if (ret == -1) {
+ gf_log ("socket", GF_LOG_WARNING,
+ "failed to set keep alive interval on socket %d", fd);
+ goto err;
+ }
+#else
+ if (family != AF_INET)
+ goto done;
+
+ ret = setsockopt (fd, IPPROTO_TCP, TCP_KEEPIDLE, &keepalive_idle,
+ sizeof (keepalive_intvl));
+ if (ret == -1) {
+ gf_log ("socket", GF_LOG_WARNING,
+ "failed to set keep idle %d on socket %d, %s",
+ keepalive_idle, fd, strerror(errno));
+ goto err;
+ }
+ ret = setsockopt (fd, IPPROTO_TCP , TCP_KEEPINTVL, &keepalive_intvl,
+ sizeof (keepalive_intvl));
+ if (ret == -1) {
+ gf_log ("socket", GF_LOG_WARNING,
+ "failed to set keep interval %d on socket %d, %s",
+ keepalive_intvl, fd, strerror(errno));
+ goto err;
+ }
+#endif
+
+done:
+ gf_log (THIS->name, GF_LOG_TRACE, "Keep-alive enabled for socket %d, interval "
+ "%d, idle: %d", fd, keepalive_intvl, keepalive_idle);
+
+err:
+ return ret;
+}
+
+
+static int
+__socket_connect_finish (int fd)
+{
+ int ret = -1;
+ int optval = 0;
+ socklen_t optlen = sizeof (int);
+
+ ret = getsockopt (fd, SOL_SOCKET, SO_ERROR, (void *)&optval, &optlen);
+
+ if (ret == 0 && optval) {
+ errno = optval;
+ ret = -1;
+ }
+
+ return ret;
+}
+
+
+static void
+__socket_reset (rpc_transport_t *this)
+{
+ socket_private_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO ("socket", this, out);
+ GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+
+ priv = this->private;
+
+ /* TODO: use mem-pool on incoming data */
+
+ if (priv->incoming.iobref) {
+ iobref_unref (priv->incoming.iobref);
+ priv->incoming.iobref = NULL;
+ }
+
+ if (priv->incoming.iobuf) {
+ iobuf_unref (priv->incoming.iobuf);
+ }
+
+ GF_FREE (priv->incoming.request_info);
+
+ memset (&priv->incoming, 0, sizeof (priv->incoming));
+
+ event_unregister (this->ctx->event_pool, priv->sock, priv->idx);
+
+ close (priv->sock);
+ priv->sock = -1;
+ priv->idx = -1;
+ priv->connected = -1;
+
+out:
+ return;
+}
+
+
+static void
+socket_set_lastfrag (uint32_t *fragsize) {
+ (*fragsize) |= 0x80000000U;
+}
+
+
+static void
+socket_set_frag_header_size (uint32_t size, char *haddr)
+{
+ size = htonl (size);
+ memcpy (haddr, &size, sizeof (size));
+}
+
+
+static void
+socket_set_last_frag_header_size (uint32_t size, char *haddr)
+{
+ socket_set_lastfrag (&size);
+ socket_set_frag_header_size (size, haddr);
+}
+
+static struct ioq *
+__socket_ioq_new (rpc_transport_t *this, rpc_transport_msg_t *msg)
+{
+ struct ioq *entry = NULL;
+ int count = 0;
+ uint32_t size = 0;
+
+ GF_VALIDATE_OR_GOTO ("socket", this, out);
+
+ /* TODO: use mem-pool */
+ entry = GF_CALLOC (1, sizeof (*entry), gf_common_mt_ioq);
+ if (!entry)
+ return NULL;
+
+ count = msg->rpchdrcount + msg->proghdrcount + msg->progpayloadcount;
+
+ GF_ASSERT (count <= (MAX_IOVEC - 1));
+
+ size = iov_length (msg->rpchdr, msg->rpchdrcount)
+ + iov_length (msg->proghdr, msg->proghdrcount)
+ + iov_length (msg->progpayload, msg->progpayloadcount);
+
+ if (size > RPC_MAX_FRAGMENT_SIZE) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "msg size (%u) bigger than the maximum allowed size on "
+ "sockets (%u)", size, RPC_MAX_FRAGMENT_SIZE);
+ GF_FREE (entry);
+ return NULL;
+ }
+
+ socket_set_last_frag_header_size (size, (char *)&entry->fraghdr);
+
+ entry->vector[0].iov_base = (char *)&entry->fraghdr;
+ entry->vector[0].iov_len = sizeof (entry->fraghdr);
+ entry->count = 1;
+
+ if (msg->rpchdr != NULL) {
+ memcpy (&entry->vector[1], msg->rpchdr,
+ sizeof (struct iovec) * msg->rpchdrcount);
+ entry->count += msg->rpchdrcount;
+ }
+
+ if (msg->proghdr != NULL) {
+ memcpy (&entry->vector[entry->count], msg->proghdr,
+ sizeof (struct iovec) * msg->proghdrcount);
+ entry->count += msg->proghdrcount;
+ }
+
+ if (msg->progpayload != NULL) {
+ memcpy (&entry->vector[entry->count], msg->progpayload,
+ sizeof (struct iovec) * msg->progpayloadcount);
+ entry->count += msg->progpayloadcount;
+ }
+
+ entry->pending_vector = entry->vector;
+ entry->pending_count = entry->count;
+
+ if (msg->iobref != NULL)
+ entry->iobref = iobref_ref (msg->iobref);
+
+ INIT_LIST_HEAD (&entry->list);
+
+out:
+ return entry;
+}
+
+
+static void
+__socket_ioq_entry_free (struct ioq *entry)
+{
+ GF_VALIDATE_OR_GOTO ("socket", entry, out);
+
+ list_del_init (&entry->list);
+ if (entry->iobref)
+ iobref_unref (entry->iobref);
+
+ /* TODO: use mem-pool */
+ GF_FREE (entry);
+
+out:
+ return;
+}
+
+
+static void
+__socket_ioq_flush (rpc_transport_t *this)
+{
+ socket_private_t *priv = NULL;
+ struct ioq *entry = NULL;
+
+ GF_VALIDATE_OR_GOTO ("socket", this, out);
+ GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+
+ priv = this->private;
+
+ while (!list_empty (&priv->ioq)) {
+ entry = priv->ioq_next;
+ __socket_ioq_entry_free (entry);
+ }
+
+out:
+ return;
+}
+
+
+static int
+__socket_ioq_churn_entry (rpc_transport_t *this, struct ioq *entry, int direct)
+{
+ int ret = -1;
+ socket_private_t *priv = NULL;
+ char a_byte = 0;
+
+ ret = __socket_writev (this, entry->pending_vector,
+ entry->pending_count,
+ &entry->pending_vector,
+ &entry->pending_count);
+
+ if (ret == 0) {
+ /* current entry was completely written */
+ GF_ASSERT (entry->pending_count == 0);
+ __socket_ioq_entry_free (entry);
+ priv = this->private;
+ if (priv->own_thread) {
+ /*
+ * The pipe should only remain readable if there are
+ * more entries after this, so drain the byte
+ * representing this entry.
+ */
+ if (!direct && read(priv->pipe[0],&a_byte,1) < 1) {
+ gf_log(this->name,GF_LOG_WARNING,
+ "read error on pipe");
+ }
+ }
+ }
+
+ return ret;
+}
+
+
+static int
+__socket_ioq_churn (rpc_transport_t *this)
+{
+ socket_private_t *priv = NULL;
+ int ret = 0;
+ struct ioq *entry = NULL;
+
+ GF_VALIDATE_OR_GOTO ("socket", this, out);
+ GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+
+ priv = this->private;
+
+ while (!list_empty (&priv->ioq)) {
+ /* pick next entry */
+ entry = priv->ioq_next;
+
+ ret = __socket_ioq_churn_entry (this, entry, 0);
+
+ if (ret != 0)
+ break;
+ }
+
+ if (!priv->own_thread && list_empty (&priv->ioq)) {
+ /* all pending writes done, not interested in POLLOUT */
+ priv->idx = event_select_on (this->ctx->event_pool,
+ priv->sock, priv->idx, -1, 0);
+ }
+
+out:
+ return ret;
+}
+
+
+static int
+socket_event_poll_err (rpc_transport_t *this)
+{
+ socket_private_t *priv = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("socket", this, out);
+ GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+
+ priv = this->private;
+
+ pthread_mutex_lock (&priv->lock);
+ {
+ __socket_ioq_flush (this);
+ __socket_reset (this);
+ }
+ pthread_mutex_unlock (&priv->lock);
+
+ rpc_transport_notify (this, RPC_TRANSPORT_DISCONNECT, this);
+
+out:
+ return ret;
+}
+
+
+static int
+socket_event_poll_out (rpc_transport_t *this)
+{
+ socket_private_t *priv = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("socket", this, out);
+ GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+
+ priv = this->private;
+
+ pthread_mutex_lock (&priv->lock);
+ {
+ if (priv->connected == 1) {
+ ret = __socket_ioq_churn (this);
+
+ if (ret == -1) {
+ __socket_disconnect (this);
+ }
+ }
+ }
+ pthread_mutex_unlock (&priv->lock);
+
+ ret = rpc_transport_notify (this, RPC_TRANSPORT_MSG_SENT, NULL);
+
+out:
+ return ret;
+}
+
+
+static inline int
+__socket_read_simple_msg (rpc_transport_t *this)
+{
+ int ret = 0;
+ uint32_t remaining_size = 0;
+ size_t bytes_read = 0;
+ socket_private_t *priv = NULL;
+ struct gf_sock_incoming *in = NULL;
+ struct gf_sock_incoming_frag *frag = NULL;
+
+ GF_VALIDATE_OR_GOTO ("socket", this, out);
+ GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+
+ priv = this->private;
+
+ in = &priv->incoming;
+ frag = &in->frag;
+
+ switch (frag->simple_state) {
+
+ case SP_STATE_SIMPLE_MSG_INIT:
+ remaining_size = RPC_FRAGSIZE (in->fraghdr) - frag->bytes_read;
+
+ __socket_proto_init_pending (priv, remaining_size);
+
+ frag->simple_state = SP_STATE_READING_SIMPLE_MSG;
+
+ /* fall through */
+
+ case SP_STATE_READING_SIMPLE_MSG:
+ ret = 0;
+
+ remaining_size = RPC_FRAGSIZE (in->fraghdr) - frag->bytes_read;
+
+ if (remaining_size > 0) {
+ ret = __socket_readv (this,
+ in->pending_vector, 1,
+ &in->pending_vector,
+ &in->pending_count,
+ &bytes_read);
+ }
+
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "reading from socket failed. Error (%s), "
+ "peer (%s)", strerror (errno),
+ this->peerinfo.identifier);
+ break;
+ }
+
+ frag->bytes_read += bytes_read;
+ frag->fragcurrent += bytes_read;
+
+ if (ret > 0) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "partial read on non-blocking socket.");
+ break;
+ }
+
+ if (ret == 0) {
+ frag->simple_state = SP_STATE_SIMPLE_MSG_INIT;
+ }
+ }
+
+out:
+ return ret;
+}
+
+
+static inline int
+__socket_read_simple_request (rpc_transport_t *this)
+{
+ return __socket_read_simple_msg (this);
+}
+
+
+#define rpc_cred_addr(buf) (buf + RPC_MSGTYPE_SIZE + RPC_CALL_BODY_SIZE - 4)
+
+#define rpc_verf_addr(fragcurrent) (fragcurrent - 4)
+
+#define rpc_msgtype_addr(buf) (buf + 4)
+
+#define rpc_prognum_addr(buf) (buf + RPC_MSGTYPE_SIZE + 4)
+#define rpc_progver_addr(buf) (buf + RPC_MSGTYPE_SIZE + 8)
+#define rpc_procnum_addr(buf) (buf + RPC_MSGTYPE_SIZE + 12)
+
+static inline int
+__socket_read_vectored_request (rpc_transport_t *this, rpcsvc_vector_sizer vector_sizer)
+{
+ socket_private_t *priv = NULL;
+ int ret = 0;
+ uint32_t credlen = 0, verflen = 0;
+ char *addr = NULL;
+ struct iobuf *iobuf = NULL;
+ uint32_t remaining_size = 0;
+ ssize_t readsize = 0;
+ size_t size = 0;
+ struct gf_sock_incoming *in = NULL;
+ struct gf_sock_incoming_frag *frag = NULL;
+ sp_rpcfrag_request_state_t *request = NULL;
+
+ GF_VALIDATE_OR_GOTO ("socket", this, out);
+ GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+
+ priv = this->private;
+
+ /* used to reduce the indirection */
+ in = &priv->incoming;
+ frag = &in->frag;
+ request = &frag->call_body.request;
+
+ switch (request->vector_state) {
+ case SP_STATE_VECTORED_REQUEST_INIT:
+ request->vector_sizer_state = 0;
+
+ addr = rpc_cred_addr (iobuf_ptr (in->iobuf));
+
+ /* also read verf flavour and verflen */
+ credlen = ntoh32 (*((uint32_t *)addr))
+ + RPC_AUTH_FLAVOUR_N_LENGTH_SIZE;
+
+ __socket_proto_init_pending (priv, credlen);
+
+ request->vector_state = SP_STATE_READING_CREDBYTES;
+
+ /* fall through */
+
+ case SP_STATE_READING_CREDBYTES:
+ __socket_proto_read (priv, ret);
+
+ request->vector_state = SP_STATE_READ_CREDBYTES;
+
+ /* fall through */
+
+ case SP_STATE_READ_CREDBYTES:
+ addr = rpc_verf_addr (frag->fragcurrent);
+ verflen = ntoh32 (*((uint32_t *)addr));
+
+ if (verflen == 0) {
+ request->vector_state = SP_STATE_READ_VERFBYTES;
+ goto sp_state_read_verfbytes;
+ }
+ __socket_proto_init_pending (priv, verflen);
+
+ request->vector_state = SP_STATE_READING_VERFBYTES;
+
+ /* fall through */
+
+ case SP_STATE_READING_VERFBYTES:
+ __socket_proto_read (priv, ret);
+
+ request->vector_state = SP_STATE_READ_VERFBYTES;
+
+ /* fall through */
+
+ case SP_STATE_READ_VERFBYTES:
+sp_state_read_verfbytes:
+ /* set the base_addr 'persistently' across multiple calls
+ into the state machine */
+ in->proghdr_base_addr = frag->fragcurrent;
+
+ request->vector_sizer_state =
+ vector_sizer (request->vector_sizer_state,
+ &readsize, in->proghdr_base_addr,
+ frag->fragcurrent);
+ __socket_proto_init_pending (priv, readsize);
+
+ request->vector_state = SP_STATE_READING_PROGHDR;
+
+ /* fall through */
+
+ case SP_STATE_READING_PROGHDR:
+ __socket_proto_read (priv, ret);
+
+ request->vector_state = SP_STATE_READ_PROGHDR;
+
+ /* fall through */
+
+ case SP_STATE_READ_PROGHDR:
+sp_state_read_proghdr:
+ request->vector_sizer_state =
+ vector_sizer (request->vector_sizer_state,
+ &readsize, in->proghdr_base_addr,
+ frag->fragcurrent);
+ if (readsize == 0) {
+ request->vector_state = SP_STATE_READ_PROGHDR_XDATA;
+ goto sp_state_read_proghdr_xdata;
+ }
+
+ __socket_proto_init_pending (priv, readsize);
+
+ request->vector_state = SP_STATE_READING_PROGHDR_XDATA;
+
+ /* fall through */
+
+ case SP_STATE_READING_PROGHDR_XDATA:
+ __socket_proto_read (priv, ret);
+
+ request->vector_state = SP_STATE_READ_PROGHDR;
+ /* check if the vector_sizer() has more to say */
+ goto sp_state_read_proghdr;
+
+ case SP_STATE_READ_PROGHDR_XDATA:
+sp_state_read_proghdr_xdata:
+ if (in->payload_vector.iov_base == NULL) {
+
+ size = RPC_FRAGSIZE (in->fraghdr) - frag->bytes_read;
+ iobuf = iobuf_get2 (this->ctx->iobuf_pool, size);
+ if (!iobuf) {
+ ret = -1;
+ break;
+ }
+
+ if (in->iobref == NULL) {
+ in->iobref = iobref_new ();
+ if (in->iobref == NULL) {
+ ret = -1;
+ iobuf_unref (iobuf);
+ break;
+ }
+ }
+
+ iobref_add (in->iobref, iobuf);
+ iobuf_unref (iobuf);
+
+ in->payload_vector.iov_base = iobuf_ptr (iobuf);
+
+ frag->fragcurrent = iobuf_ptr (iobuf);
+ }
+
+ request->vector_state = SP_STATE_READING_PROG;
+
+ /* fall through */
+
+ case SP_STATE_READING_PROG:
+ /* now read the remaining rpc msg into buffer pointed by
+ * fragcurrent
+ */
+
+ ret = __socket_read_simple_msg (this);
+
+ remaining_size = RPC_FRAGSIZE (in->fraghdr) - frag->bytes_read;
+
+ if ((ret == -1) ||
+ ((ret == 0) && (remaining_size == 0)
+ && RPC_LASTFRAG (in->fraghdr))) {
+ request->vector_state = SP_STATE_VECTORED_REQUEST_INIT;
+ in->payload_vector.iov_len
+ = ((unsigned long)frag->fragcurrent
+ - (unsigned long)in->payload_vector.iov_base);
+ }
+ break;
+ }
+
+out:
+ return ret;
+}
+
+static inline int
+__socket_read_request (rpc_transport_t *this)
+{
+ socket_private_t *priv = NULL;
+ uint32_t prognum = 0, procnum = 0, progver = 0;
+ uint32_t remaining_size = 0;
+ int ret = -1;
+ char *buf = NULL;
+ rpcsvc_vector_sizer vector_sizer = NULL;
+ struct gf_sock_incoming *in = NULL;
+ struct gf_sock_incoming_frag *frag = NULL;
+ sp_rpcfrag_request_state_t *request = NULL;
+
+ GF_VALIDATE_OR_GOTO ("socket", this, out);
+ GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+
+ priv = this->private;
+
+ /* used to reduce the indirection */
+ in = &priv->incoming;
+ frag = &in->frag;
+ request = &frag->call_body.request;
+
+ switch (request->header_state) {
+
+ case SP_STATE_REQUEST_HEADER_INIT:
+
+ __socket_proto_init_pending (priv, RPC_CALL_BODY_SIZE);
+
+ request->header_state = SP_STATE_READING_RPCHDR1;
+
+ /* fall through */
+
+ case SP_STATE_READING_RPCHDR1:
+ __socket_proto_read (priv, ret);
+
+ request->header_state = SP_STATE_READ_RPCHDR1;
+
+ /* fall through */
+
+ case SP_STATE_READ_RPCHDR1:
+ buf = rpc_prognum_addr (iobuf_ptr (in->iobuf));
+ prognum = ntoh32 (*((uint32_t *)buf));
+
+ buf = rpc_progver_addr (iobuf_ptr (in->iobuf));
+ progver = ntoh32 (*((uint32_t *)buf));
+
+ buf = rpc_procnum_addr (iobuf_ptr (in->iobuf));
+ procnum = ntoh32 (*((uint32_t *)buf));
+
+ if (priv->is_server) {
+ /* this check is needed as rpcsvc and rpc-clnt
+ * actor structures are not same */
+ vector_sizer =
+ rpcsvc_get_program_vector_sizer ((rpcsvc_t *)this->mydata,
+ prognum, progver, procnum);
+ }
+
+ if (vector_sizer) {
+ ret = __socket_read_vectored_request (this, vector_sizer);
+ } else {
+ ret = __socket_read_simple_request (this);
+ }
+
+ remaining_size = RPC_FRAGSIZE (in->fraghdr) - frag->bytes_read;
+
+ if ((ret == -1)
+ || ((ret == 0)
+ && (remaining_size == 0)
+ && (RPC_LASTFRAG (in->fraghdr)))) {
+ request->header_state = SP_STATE_REQUEST_HEADER_INIT;
+ }
+
+ break;
+ }
+
+out:
+ return ret;
+}
+
+
+static inline int
+__socket_read_accepted_successful_reply (rpc_transport_t *this)
+{
+ socket_private_t *priv = NULL;
+ int ret = 0;
+ struct iobuf *iobuf = NULL;
+ gfs3_read_rsp read_rsp = {0, };
+ ssize_t size = 0;
+ ssize_t default_read_size = 0;
+ char *proghdr_buf = NULL;
+ XDR xdr;
+ struct gf_sock_incoming *in = NULL;
+ struct gf_sock_incoming_frag *frag = NULL;
+
+ GF_VALIDATE_OR_GOTO ("socket", this, out);
+ GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+
+ priv = this->private;
+
+ /* used to reduce the indirection */
+ in = &priv->incoming;
+ frag = &in->frag;
+
+ switch (frag->call_body.reply.accepted_success_state) {
+
+ case SP_STATE_ACCEPTED_SUCCESS_REPLY_INIT:
+ default_read_size = xdr_sizeof ((xdrproc_t) xdr_gfs3_read_rsp,
+ &read_rsp);
+
+ proghdr_buf = frag->fragcurrent;
+
+ __socket_proto_init_pending (priv, default_read_size);
+
+ frag->call_body.reply.accepted_success_state
+ = SP_STATE_READING_PROC_HEADER;
+
+ /* fall through */
+
+ case SP_STATE_READING_PROC_HEADER:
+ __socket_proto_read (priv, ret);
+
+ /* there can be 'xdata' in read response, figure it out */
+ xdrmem_create (&xdr, proghdr_buf, default_read_size,
+ XDR_DECODE);
+
+ /* This will fail if there is xdata sent from server, if not,
+ well and good, we don't need to worry about */
+ xdr_gfs3_read_rsp (&xdr, &read_rsp);
+
+ free (read_rsp.xdata.xdata_val);
+
+ /* need to round off to proper roof (%4), as XDR packing pads
+ the end of opaque object with '0' */
+ size = roof (read_rsp.xdata.xdata_len, 4);
+
+ if (!size) {
+ frag->call_body.reply.accepted_success_state
+ = SP_STATE_READ_PROC_OPAQUE;
+ goto read_proc_opaque;
+ }
+
+ __socket_proto_init_pending (priv, size);
+
+ frag->call_body.reply.accepted_success_state
+ = SP_STATE_READING_PROC_OPAQUE;
+
+ case SP_STATE_READING_PROC_OPAQUE:
+ __socket_proto_read (priv, ret);
+
+ frag->call_body.reply.accepted_success_state
+ = SP_STATE_READ_PROC_OPAQUE;
+
+ case SP_STATE_READ_PROC_OPAQUE:
+ read_proc_opaque:
+ if (in->payload_vector.iov_base == NULL) {
+
+ size = (RPC_FRAGSIZE (in->fraghdr) - frag->bytes_read);
+
+ iobuf = iobuf_get2 (this->ctx->iobuf_pool, size);
+ if (iobuf == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ if (in->iobref == NULL) {
+ in->iobref = iobref_new ();
+ if (in->iobref == NULL) {
+ ret = -1;
+ iobuf_unref (iobuf);
+ goto out;
+ }
+ }
+
+ iobref_add (in->iobref, iobuf);
+ iobuf_unref (iobuf);
+
+ in->payload_vector.iov_base = iobuf_ptr (iobuf);
+
+ in->payload_vector.iov_len = size;
+ }
+
+ frag->fragcurrent = in->payload_vector.iov_base;
+
+ frag->call_body.reply.accepted_success_state
+ = SP_STATE_READ_PROC_HEADER;
+
+ /* fall through */
+
+ case SP_STATE_READ_PROC_HEADER:
+ /* now read the entire remaining msg into new iobuf */
+ ret = __socket_read_simple_msg (this);
+ if ((ret == -1)
+ || ((ret == 0) && RPC_LASTFRAG (in->fraghdr))) {
+ frag->call_body.reply.accepted_success_state
+ = SP_STATE_ACCEPTED_SUCCESS_REPLY_INIT;
+ }
+
+ break;
+ }
+
+out:
+ return ret;
+}
+
+#define rpc_reply_verflen_addr(fragcurrent) ((char *)fragcurrent - 4)
+#define rpc_reply_accept_status_addr(fragcurrent) ((char *)fragcurrent - 4)
+
+static inline int
+__socket_read_accepted_reply (rpc_transport_t *this)
+{
+ socket_private_t *priv = NULL;
+ int ret = -1;
+ char *buf = NULL;
+ uint32_t verflen = 0, len = 0;
+ uint32_t remaining_size = 0;
+ struct gf_sock_incoming *in = NULL;
+ struct gf_sock_incoming_frag *frag = NULL;
+
+ GF_VALIDATE_OR_GOTO ("socket", this, out);
+ GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+
+ priv = this->private;
+ /* used to reduce the indirection */
+ in = &priv->incoming;
+ frag = &in->frag;
+
+ switch (frag->call_body.reply.accepted_state) {
+
+ case SP_STATE_ACCEPTED_REPLY_INIT:
+ __socket_proto_init_pending (priv,
+ RPC_AUTH_FLAVOUR_N_LENGTH_SIZE);
+
+ frag->call_body.reply.accepted_state
+ = SP_STATE_READING_REPLY_VERFLEN;
+
+ /* fall through */
+
+ case SP_STATE_READING_REPLY_VERFLEN:
+ __socket_proto_read (priv, ret);
+
+ frag->call_body.reply.accepted_state
+ = SP_STATE_READ_REPLY_VERFLEN;
+
+ /* fall through */
+
+ case SP_STATE_READ_REPLY_VERFLEN:
+ buf = rpc_reply_verflen_addr (frag->fragcurrent);
+
+ verflen = ntoh32 (*((uint32_t *) buf));
+
+ /* also read accept status along with verf data */
+ len = verflen + RPC_ACCEPT_STATUS_LEN;
+
+ __socket_proto_init_pending (priv, len);
+
+ frag->call_body.reply.accepted_state
+ = SP_STATE_READING_REPLY_VERFBYTES;
+
+ /* fall through */
+
+ case SP_STATE_READING_REPLY_VERFBYTES:
+ __socket_proto_read (priv, ret);
+
+ frag->call_body.reply.accepted_state
+ = SP_STATE_READ_REPLY_VERFBYTES;
+
+ buf = rpc_reply_accept_status_addr (frag->fragcurrent);
+
+ frag->call_body.reply.accept_status
+ = ntoh32 (*(uint32_t *) buf);
+
+ /* fall through */
+
+ case SP_STATE_READ_REPLY_VERFBYTES:
+
+ if (frag->call_body.reply.accept_status
+ == SUCCESS) {
+ ret = __socket_read_accepted_successful_reply (this);
+ } else {
+ /* read entire remaining msg into buffer pointed to by
+ * fragcurrent
+ */
+ ret = __socket_read_simple_msg (this);
+ }
+
+ remaining_size = RPC_FRAGSIZE (in->fraghdr)
+ - frag->bytes_read;
+
+ if ((ret == -1)
+ || ((ret == 0) && (remaining_size == 0)
+ && (RPC_LASTFRAG (in->fraghdr)))) {
+ frag->call_body.reply.accepted_state
+ = SP_STATE_ACCEPTED_REPLY_INIT;
+ }
+
+ break;
+ }
+
+out:
+ return ret;
+}
+
+
+static inline int
+__socket_read_denied_reply (rpc_transport_t *this)
+{
+ return __socket_read_simple_msg (this);
+}
+
+
+#define rpc_reply_status_addr(fragcurrent) ((char *)fragcurrent - 4)
+
+
+static inline int
+__socket_read_vectored_reply (rpc_transport_t *this)
+{
+ socket_private_t *priv = NULL;
+ int ret = 0;
+ char *buf = NULL;
+ uint32_t remaining_size = 0;
+ struct gf_sock_incoming *in = NULL;
+ struct gf_sock_incoming_frag *frag = NULL;
+
+ GF_VALIDATE_OR_GOTO ("socket", this, out);
+ GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+
+ priv = this->private;
+ in = &priv->incoming;
+ frag = &in->frag;
+
+ switch (frag->call_body.reply.status_state) {
+
+ case SP_STATE_ACCEPTED_REPLY_INIT:
+ __socket_proto_init_pending (priv, RPC_REPLY_STATUS_SIZE);
+
+ frag->call_body.reply.status_state
+ = SP_STATE_READING_REPLY_STATUS;
+
+ /* fall through */
+
+ case SP_STATE_READING_REPLY_STATUS:
+ __socket_proto_read (priv, ret);
+
+ buf = rpc_reply_status_addr (frag->fragcurrent);
+
+ frag->call_body.reply.accept_status
+ = ntoh32 (*((uint32_t *) buf));
+
+ frag->call_body.reply.status_state
+ = SP_STATE_READ_REPLY_STATUS;
+
+ /* fall through */
+
+ case SP_STATE_READ_REPLY_STATUS:
+ if (frag->call_body.reply.accept_status == MSG_ACCEPTED) {
+ ret = __socket_read_accepted_reply (this);
+ } else {
+ ret = __socket_read_denied_reply (this);
+ }
+
+ remaining_size = RPC_FRAGSIZE (in->fraghdr) - frag->bytes_read;
+
+ if ((ret == -1)
+ || ((ret == 0) && (remaining_size == 0)
+ && (RPC_LASTFRAG (in->fraghdr)))) {
+ frag->call_body.reply.status_state
+ = SP_STATE_ACCEPTED_REPLY_INIT;
+ in->payload_vector.iov_len
+ = (unsigned long)frag->fragcurrent
+ - (unsigned long)in->payload_vector.iov_base;
+ }
+ break;
+ }
+
+out:
+ return ret;
+}
+
+
+static inline int
+__socket_read_simple_reply (rpc_transport_t *this)
+{
+ return __socket_read_simple_msg (this);
+}
+
+#define rpc_xid_addr(buf) (buf)
+
+static inline int
+__socket_read_reply (rpc_transport_t *this)
+{
+ socket_private_t *priv = NULL;
+ char *buf = NULL;
+ int32_t ret = -1;
+ rpc_request_info_t *request_info = NULL;
+ char map_xid = 0;
+ struct gf_sock_incoming *in = NULL;
+ struct gf_sock_incoming_frag *frag = NULL;
+
+ GF_VALIDATE_OR_GOTO ("socket", this, out);
+ GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+
+ priv = this->private;
+ in = &priv->incoming;
+ frag = &in->frag;
+
+ buf = rpc_xid_addr (iobuf_ptr (in->iobuf));
+
+ if (in->request_info == NULL) {
+ in->request_info = GF_CALLOC (1, sizeof (*request_info),
+ gf_common_mt_rpc_trans_reqinfo_t);
+ if (in->request_info == NULL) {
+ goto out;
+ }
+
+ map_xid = 1;
+ }
+
+ request_info = in->request_info;
+
+ if (map_xid) {
+ request_info->xid = ntoh32 (*((uint32_t *) buf));
+
+ /* release priv->lock, so as to avoid deadlock b/w conn->lock
+ * and priv->lock, since we are doing an upcall here.
+ */
+ pthread_mutex_unlock (&priv->lock);
+ {
+ ret = rpc_transport_notify (this,
+ RPC_TRANSPORT_MAP_XID_REQUEST,
+ in->request_info);
+ }
+ pthread_mutex_lock (&priv->lock);
+
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "notify for event MAP_XID failed");
+ goto out;
+ }
+ }
+
+ if ((request_info->prognum == GLUSTER_FOP_PROGRAM)
+ && (request_info->procnum == GF_FOP_READ)) {
+ if (map_xid && request_info->rsp.rsp_payload_count != 0) {
+ in->iobref = iobref_ref (request_info->rsp.rsp_iobref);
+ in->payload_vector = *request_info->rsp.rsp_payload;
+ }
+
+ ret = __socket_read_vectored_reply (this);
+ } else {
+ ret = __socket_read_simple_reply (this);
+ }
+out:
+ return ret;
+}
+
+
+/* returns the number of bytes yet to be read in a fragment */
+static inline int
+__socket_read_frag (rpc_transport_t *this)
+{
+ socket_private_t *priv = NULL;
+ int32_t ret = 0;
+ char *buf = NULL;
+ uint32_t remaining_size = 0;
+ struct gf_sock_incoming *in = NULL;
+ struct gf_sock_incoming_frag *frag = NULL;
+
+ GF_VALIDATE_OR_GOTO ("socket", this, out);
+ GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+
+ priv = this->private;
+ /* used to reduce the indirection */
+ in = &priv->incoming;
+ frag = &in->frag;
+
+ switch (frag->state) {
+ case SP_STATE_NADA:
+ __socket_proto_init_pending (priv, RPC_MSGTYPE_SIZE);
+
+ frag->state = SP_STATE_READING_MSGTYPE;
+
+ /* fall through */
+
+ case SP_STATE_READING_MSGTYPE:
+ __socket_proto_read (priv, ret);
+
+ frag->state = SP_STATE_READ_MSGTYPE;
+ /* fall through */
+
+ case SP_STATE_READ_MSGTYPE:
+ buf = rpc_msgtype_addr (iobuf_ptr (in->iobuf));
+ in->msg_type = ntoh32 (*((uint32_t *)buf));
+
+ if (in->msg_type == CALL) {
+ ret = __socket_read_request (this);
+ } else if (in->msg_type == REPLY) {
+ ret = __socket_read_reply (this);
+ } else if (in->msg_type == GF_UNIVERSAL_ANSWER) {
+ gf_log ("rpc", GF_LOG_ERROR,
+ "older version of protocol/process trying to "
+ "connect from %s. use newer version on that node",
+ this->peerinfo.identifier);
+ } else {
+ gf_log ("rpc", GF_LOG_ERROR,
+ "wrong MSG-TYPE (%d) received from %s",
+ in->msg_type,
+ this->peerinfo.identifier);
+ ret = -1;
+ }
+
+ remaining_size = RPC_FRAGSIZE (in->fraghdr) - frag->bytes_read;
+
+ if ((ret == -1)
+ || ((ret == 0) && (remaining_size == 0)
+ && (RPC_LASTFRAG (in->fraghdr)))) {
+ frag->state = SP_STATE_NADA;
+ }
+
+ break;
+ }
+
+out:
+ return ret;
+}
+
+
+static inline
+void __socket_reset_priv (socket_private_t *priv)
+{
+ struct gf_sock_incoming *in = NULL;
+
+ /* used to reduce the indirection */
+ in = &priv->incoming;
+
+ if (in->iobref) {
+ iobref_unref (in->iobref);
+ in->iobref = NULL;
+ }
+
+ if (in->iobuf) {
+ iobuf_unref (in->iobuf);
+ }
+
+ if (in->request_info != NULL) {
+ GF_FREE (in->request_info);
+ in->request_info = NULL;
+ }
+
+ memset (&in->payload_vector, 0,
+ sizeof (in->payload_vector));
+
+ in->iobuf = NULL;
+}
+
+
+static int
+__socket_proto_state_machine (rpc_transport_t *this,
+ rpc_transport_pollin_t **pollin)
+{
+ int ret = -1;
+ socket_private_t *priv = NULL;
+ struct iobuf *iobuf = NULL;
+ struct iobref *iobref = NULL;
+ struct iovec vector[2];
+ struct gf_sock_incoming *in = NULL;
+ struct gf_sock_incoming_frag *frag = NULL;
+
+ GF_VALIDATE_OR_GOTO ("socket", this, out);
+ GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+
+ priv = this->private;
+ /* used to reduce the indirection */
+ in = &priv->incoming;
+ frag = &in->frag;
+
+ while (in->record_state != SP_STATE_COMPLETE) {
+ switch (in->record_state) {
+
+ case SP_STATE_NADA:
+ in->total_bytes_read = 0;
+ in->payload_vector.iov_len = 0;
+
+ in->pending_vector = in->vector;
+ in->pending_vector->iov_base = &in->fraghdr;
+
+ in->pending_vector->iov_len = sizeof (in->fraghdr);
+
+ in->record_state = SP_STATE_READING_FRAGHDR;
+
+ /* fall through */
+
+ case SP_STATE_READING_FRAGHDR:
+ ret = __socket_readv (this, in->pending_vector, 1,
+ &in->pending_vector,
+ &in->pending_count,
+ NULL);
+ if (ret == -1)
+ goto out;
+
+ if (ret > 0) {
+ gf_log (this->name, GF_LOG_TRACE, "partial "
+ "fragment header read");
+ goto out;
+ }
+
+ if (ret == 0) {
+ in->record_state = SP_STATE_READ_FRAGHDR;
+ }
+ /* fall through */
+
+ case SP_STATE_READ_FRAGHDR:
+
+ in->fraghdr = ntoh32 (in->fraghdr);
+ in->total_bytes_read += RPC_FRAGSIZE(in->fraghdr);
+ iobuf = iobuf_get2 (this->ctx->iobuf_pool,
+ (in->total_bytes_read +
+ sizeof (in->fraghdr)));
+ if (!iobuf) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ in->iobuf = iobuf;
+ in->iobuf_size = 0;
+ frag->fragcurrent = iobuf_ptr (iobuf);
+ in->record_state = SP_STATE_READING_FRAG;
+ /* fall through */
+
+ case SP_STATE_READING_FRAG:
+ ret = __socket_read_frag (this);
+
+ if ((ret == -1) ||
+ (frag->bytes_read != RPC_FRAGSIZE (in->fraghdr))) {
+ goto out;
+ }
+
+ frag->bytes_read = 0;
+
+ if (!RPC_LASTFRAG (in->fraghdr)) {
+ in->record_state = SP_STATE_READING_FRAGHDR;
+ break;
+ }
+
+ /* we've read the entire rpc record, notify the
+ * upper layers.
+ */
+ if (pollin != NULL) {
+ int count = 0;
+ in->iobuf_size = (in->total_bytes_read -
+ in->payload_vector.iov_len);
+
+ memset (vector, 0, sizeof (vector));
+
+ if (in->iobref == NULL) {
+ in->iobref = iobref_new ();
+ if (in->iobref == NULL) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ vector[count].iov_base = iobuf_ptr (in->iobuf);
+ vector[count].iov_len = in->iobuf_size;
+
+ iobref = in->iobref;
+
+ count++;
+
+ if (in->payload_vector.iov_base != NULL) {
+ vector[count] = in->payload_vector;
+ count++;
+ }
+
+ *pollin = rpc_transport_pollin_alloc (this,
+ vector,
+ count,
+ in->iobuf,
+ iobref,
+ in->request_info);
+ iobuf_unref (in->iobuf);
+ in->iobuf = NULL;
+
+ if (*pollin == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "transport pollin allocation failed");
+ ret = -1;
+ goto out;
+ }
+ if (in->msg_type == REPLY)
+ (*pollin)->is_reply = 1;
+
+ in->request_info = NULL;
+ }
+ in->record_state = SP_STATE_COMPLETE;
+ break;
+
+ case SP_STATE_COMPLETE:
+ /* control should not reach here */
+ gf_log (this->name, GF_LOG_WARNING, "control reached to "
+ "SP_STATE_COMPLETE, which should not have "
+ "happened");
+ break;
+ }
+ }
+
+ if (in->record_state == SP_STATE_COMPLETE) {
+ in->record_state = SP_STATE_NADA;
+ __socket_reset_priv (priv);
+ }
+
+out:
+ if ((ret == -1) && (errno == EAGAIN)) {
+ ret = 0;
+ }
+ return ret;
+}
+
+
+static int
+socket_proto_state_machine (rpc_transport_t *this,
+ rpc_transport_pollin_t **pollin)
+{
+ socket_private_t *priv = NULL;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO ("socket", this, out);
+ GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+
+ priv = this->private;
+
+ pthread_mutex_lock (&priv->lock);
+ {
+ ret = __socket_proto_state_machine (this, pollin);
+ }
+ pthread_mutex_unlock (&priv->lock);
+
+out:
+ return ret;
+}
+
+
+static int
+socket_event_poll_in (rpc_transport_t *this)
+{
+ int ret = -1;
+ rpc_transport_pollin_t *pollin = NULL;
+ socket_private_t *priv = this->private;
+
+ ret = socket_proto_state_machine (this, &pollin);
+
+ if (pollin != NULL) {
+ priv->ot_state = OT_CALLBACK;
+ ret = rpc_transport_notify (this, RPC_TRANSPORT_MSG_RECEIVED,
+ pollin);
+ if (priv->ot_state == OT_CALLBACK) {
+ priv->ot_state = OT_RUNNING;
+ }
+ rpc_transport_pollin_destroy (pollin);
+ }
+
+ return ret;
+}
+
+
+static int
+socket_connect_finish (rpc_transport_t *this)
+{
+ int ret = -1;
+ socket_private_t *priv = NULL;
+ rpc_transport_event_t event = 0;
+ char notify_rpc = 0;
+
+ GF_VALIDATE_OR_GOTO ("socket", this, out);
+ GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+
+ priv = this->private;
+
+ pthread_mutex_lock (&priv->lock);
+ {
+ if (priv->connected != 0)
+ goto unlock;
+
+ get_transport_identifiers (this);
+
+ ret = __socket_connect_finish (priv->sock);
+
+ if (ret == -1 && errno == EINPROGRESS)
+ ret = 1;
+
+ if (ret == -1 && errno != EINPROGRESS) {
+ if (!priv->connect_finish_log) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "connection to %s failed (%s)",
+ this->peerinfo.identifier,
+ strerror (errno));
+ priv->connect_finish_log = 1;
+ }
+ __socket_disconnect (this);
+ goto unlock;
+ }
+
+ if (ret == 0) {
+ notify_rpc = 1;
+
+ this->myinfo.sockaddr_len =
+ sizeof (this->myinfo.sockaddr);
+
+ ret = getsockname (priv->sock,
+ SA (&this->myinfo.sockaddr),
+ &this->myinfo.sockaddr_len);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "getsockname on (%d) failed (%s)",
+ priv->sock, strerror (errno));
+ __socket_disconnect (this);
+ event = GF_EVENT_POLLERR;
+ goto unlock;
+ }
+
+ priv->connected = 1;
+ priv->connect_finish_log = 0;
+ event = RPC_TRANSPORT_CONNECT;
+ }
+ }
+unlock:
+ pthread_mutex_unlock (&priv->lock);
+
+ if (notify_rpc) {
+ rpc_transport_notify (this, event, this);
+ }
+out:
+ return 0;
+}
+
+
+/* reads rpc_requests during pollin */
+static int
+socket_event_handler (int fd, int idx, void *data,
+ int poll_in, int poll_out, int poll_err)
+{
+ rpc_transport_t *this = NULL;
+ socket_private_t *priv = NULL;
+ int ret = -1;
+
+ this = data;
+ GF_VALIDATE_OR_GOTO ("socket", this, out);
+ GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+ GF_VALIDATE_OR_GOTO ("socket", this->xl, out);
+
+ THIS = this->xl;
+ priv = this->private;
+
+ pthread_mutex_lock (&priv->lock);
+ {
+ priv->idx = idx;
+ }
+ pthread_mutex_unlock (&priv->lock);
+
+ ret = (priv->connected == 1) ? 0 : socket_connect_finish(this);
+
+ if (!ret && poll_out) {
+ ret = socket_event_poll_out (this);
+ }
+
+ if (!ret && poll_in) {
+ ret = socket_event_poll_in (this);
+ }
+
+ if ((ret < 0) || poll_err) {
+ /* Logging has happened already in earlier cases */
+ gf_log ("transport", ((ret >= 0) ? GF_LOG_INFO : GF_LOG_DEBUG),
+ "disconnecting now");
+ socket_event_poll_err (this);
+ rpc_transport_unref (this);
+ }
+
+out:
+ return ret;
+}
+
+
+static void *
+socket_poller (void *ctx)
+{
+ rpc_transport_t *this = ctx;
+ socket_private_t *priv = this->private;
+ struct pollfd pfd[2] = {{0,},};
+ gf_boolean_t to_write = _gf_false;
+ int ret = 0;
+ uint32_t gen = 0;
+
+ priv->ot_state = OT_RUNNING;
+
+ if (priv->use_ssl) {
+ if (ssl_setup_connection(this,priv->connected) < 0) {
+ gf_log (this->name,GF_LOG_ERROR, "%s setup failed",
+ priv->connected ? "server" : "client");
+ goto err;
+ }
+ }
+
+ if (!priv->bio) {
+ ret = __socket_nonblock (priv->sock);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "NBIO on %d failed (%s)",
+ priv->sock, strerror (errno));
+ goto err;
+ }
+ }
+
+ if (priv->connected == 0) {
+ THIS = this->xl;
+ ret = socket_connect_finish (this);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "asynchronous socket_connect_finish failed");
+ }
+ }
+
+ ret = rpc_transport_notify (this->listener,
+ RPC_TRANSPORT_ACCEPT, this);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "asynchronous rpc_transport_notify failed");
+ }
+
+ gen = priv->ot_gen;
+ for (;;) {
+ pthread_mutex_lock(&priv->lock);
+ to_write = !list_empty(&priv->ioq);
+ pthread_mutex_unlock(&priv->lock);
+ pfd[0].fd = priv->pipe[0];
+ pfd[0].events = POLL_MASK_ERROR;
+ pfd[0].revents = 0;
+ pfd[1].fd = priv->sock;
+ pfd[1].events = POLL_MASK_INPUT | POLL_MASK_ERROR;
+ pfd[1].revents = 0;
+ if (to_write) {
+ pfd[1].events |= POLL_MASK_OUTPUT;
+ }
+ else {
+ pfd[0].events |= POLL_MASK_INPUT;
+ }
+ if (poll(pfd,2,-1) < 0) {
+ gf_log(this->name,GF_LOG_ERROR,"poll failed");
+ break;
+ }
+ if (pfd[0].revents & POLL_MASK_ERROR) {
+ gf_log(this->name,GF_LOG_ERROR,
+ "poll error on pipe");
+ break;
+ }
+ /* Only glusterd actually seems to need this. */
+ THIS = this->xl;
+ if (pfd[1].revents & POLL_MASK_INPUT) {
+ ret = socket_event_poll_in(this);
+ if (ret >= 0) {
+ /* Suppress errors while making progress. */
+ pfd[1].revents &= ~POLL_MASK_ERROR;
+ }
+ else if (errno == ENOTCONN) {
+ ret = 0;
+ }
+ if (priv->ot_state == OT_PLEASE_DIE) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "OT_IDLE on %p (input request)",
+ this);
+ priv->ot_state = OT_IDLE;
+ break;
+ }
+ }
+ else if (pfd[1].revents & POLL_MASK_OUTPUT) {
+ ret = socket_event_poll_out(this);
+ if (ret >= 0) {
+ /* Suppress errors while making progress. */
+ pfd[1].revents &= ~POLL_MASK_ERROR;
+ }
+ else if (errno == ENOTCONN) {
+ ret = 0;
+ }
+ if (priv->ot_state == OT_PLEASE_DIE) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "OT_IDLE on %p (output request)",
+ this);
+ priv->ot_state = OT_IDLE;
+ break;
+ }
+ }
+ else {
+ /*
+ * This usually means that we left poll() because
+ * somebody pushed a byte onto our pipe. That wakeup
+ * is why the pipe is there, but once awake we can do
+ * all the checking we need on the next iteration.
+ */
+ ret = 0;
+ }
+ if (pfd[1].revents & POLL_MASK_ERROR) {
+ gf_log(this->name,GF_LOG_ERROR,
+ "poll error on socket");
+ break;
+ }
+ if (ret < 0) {
+ gf_log(this->name,GF_LOG_ERROR,
+ "error in polling loop");
+ break;
+ }
+ if (priv->ot_gen != gen) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "generation mismatch, my %u != %u",
+ gen, priv->ot_gen);
+ return NULL;
+ }
+ }
+
+err:
+ /* All (and only) I/O errors should come here. */
+ pthread_mutex_lock(&priv->lock);
+ if (priv->ssl_ssl) {
+ /*
+ * We're always responsible for this part, but only actually
+ * have to do it if we got far enough for ssl_ssl to be valid
+ * (i.e. errors in ssl_setup_connection don't count).
+ */
+ ssl_teardown_connection(priv);
+ }
+ __socket_shutdown(this);
+ close(priv->sock);
+ priv->sock = -1;
+ priv->ot_state = OT_IDLE;
+ pthread_mutex_unlock(&priv->lock);
+ rpc_transport_notify (this->listener, RPC_TRANSPORT_DISCONNECT,
+ this);
+ rpc_transport_unref (this);
+ return NULL;
+}
+
+
+static void
+socket_spawn (rpc_transport_t *this)
+{
+ socket_private_t *priv = this->private;
+
+ switch (priv->ot_state) {
+ case OT_IDLE:
+ case OT_PLEASE_DIE:
+ break;
+ default:
+ gf_log (this->name, GF_LOG_WARNING,
+ "refusing to start redundant poller");
+ return;
+ }
+
+ priv->ot_gen += 7;
+ priv->ot_state = OT_SPAWNING;
+ gf_log (this->name, GF_LOG_TRACE,
+ "spawning %p with gen %u", this, priv->ot_gen);
+
+ if (gf_thread_create(&priv->thread,NULL,socket_poller,this) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not create poll thread");
+ }
+}
+
+static int
+socket_server_event_handler (int fd, int idx, void *data,
+ int poll_in, int poll_out, int poll_err)
+{
+ rpc_transport_t *this = NULL;
+ socket_private_t *priv = NULL;
+ int ret = 0;
+ int new_sock = -1;
+ rpc_transport_t *new_trans = NULL;
+ struct sockaddr_storage new_sockaddr = {0, };
+ socklen_t addrlen = sizeof (new_sockaddr);
+ socket_private_t *new_priv = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+
+ this = data;
+ GF_VALIDATE_OR_GOTO ("socket", this, out);
+ GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+ GF_VALIDATE_OR_GOTO ("socket", this->xl, out);
+
+ THIS = this->xl;
+ priv = this->private;
+ ctx = this->ctx;
+
+ pthread_mutex_lock (&priv->lock);
+ {
+ priv->idx = idx;
+
+ if (poll_in) {
+ new_sock = accept (priv->sock, SA (&new_sockaddr),
+ &addrlen);
+
+ if (new_sock == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "accept on %d failed (%s)",
+ priv->sock, strerror (errno));
+ goto unlock;
+ }
+
+ if (priv->nodelay && (new_sockaddr.ss_family != AF_UNIX)) {
+ ret = __socket_nodelay (new_sock);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "setsockopt() failed for "
+ "NODELAY (%s)",
+ strerror (errno));
+ }
+ }
+
+ if (priv->keepalive &&
+ new_sockaddr.ss_family != AF_UNIX) {
+ ret = __socket_keepalive (new_sock,
+ new_sockaddr.ss_family,
+ priv->keepaliveintvl,
+ priv->keepaliveidle);
+ if (ret == -1)
+ gf_log (this->name, GF_LOG_WARNING,
+ "Failed to set keep-alive: %s",
+ strerror (errno));
+ }
+
+ new_trans = GF_CALLOC (1, sizeof (*new_trans),
+ gf_common_mt_rpc_trans_t);
+ if (!new_trans)
+ goto unlock;
+
+ ret = pthread_mutex_init(&new_trans->lock, NULL);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "pthread_mutex_init() failed: %s",
+ strerror (errno));
+ close (new_sock);
+ goto unlock;
+ }
+
+ new_trans->name = gf_strdup (this->name);
+
+ memcpy (&new_trans->peerinfo.sockaddr, &new_sockaddr,
+ addrlen);
+ new_trans->peerinfo.sockaddr_len = addrlen;
+
+ new_trans->myinfo.sockaddr_len =
+ sizeof (new_trans->myinfo.sockaddr);
+
+ ret = getsockname (new_sock,
+ SA (&new_trans->myinfo.sockaddr),
+ &new_trans->myinfo.sockaddr_len);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "getsockname on %d failed (%s)",
+ new_sock, strerror (errno));
+ close (new_sock);
+ goto unlock;
+ }
+
+ get_transport_identifiers (new_trans);
+ ret = socket_init(new_trans);
+ if (ret != 0) {
+ close(new_sock);
+ goto unlock;
+ }
+ new_trans->ops = this->ops;
+ new_trans->init = this->init;
+ new_trans->fini = this->fini;
+ new_trans->ctx = ctx;
+ new_trans->xl = this->xl;
+ new_trans->mydata = this->mydata;
+ new_trans->notify = this->notify;
+ new_trans->listener = this;
+ new_priv = new_trans->private;
+
+ new_priv->use_ssl = priv->use_ssl;
+ new_priv->sock = new_sock;
+ new_priv->own_thread = priv->own_thread;
+
+ new_priv->ssl_ctx = priv->ssl_ctx;
+ if (priv->use_ssl && !priv->own_thread) {
+ if (ssl_setup_connection(new_trans,1) < 0) {
+ gf_log(this->name,GF_LOG_ERROR,
+ "server setup failed");
+ close(new_sock);
+ goto unlock;
+ }
+ }
+
+ if (!priv->bio && !priv->own_thread) {
+ ret = __socket_nonblock (new_sock);
+
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "NBIO on %d failed (%s)",
+ new_sock, strerror (errno));
+
+ close (new_sock);
+ goto unlock;
+ }
+ }
+
+ pthread_mutex_lock (&new_priv->lock);
+ {
+ /*
+ * In the own_thread case, this is used to
+ * indicate that we're initializing a server
+ * connection.
+ */
+ new_priv->connected = 1;
+ new_priv->is_server = _gf_true;
+ rpc_transport_ref (new_trans);
+
+ if (new_priv->own_thread) {
+ if (pipe(new_priv->pipe) < 0) {
+ gf_log(this->name,GF_LOG_ERROR,
+ "could not create pipe");
+ }
+ socket_spawn(new_trans);
+ }
+ else {
+ new_priv->idx =
+ event_register (ctx->event_pool,
+ new_sock,
+ socket_event_handler,
+ new_trans,
+ 1, 0);
+ if (new_priv->idx == -1)
+ ret = -1;
+ }
+
+ }
+ pthread_mutex_unlock (&new_priv->lock);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to register the socket with event");
+ goto unlock;
+ }
+
+ if (!priv->own_thread) {
+ ret = rpc_transport_notify (this,
+ RPC_TRANSPORT_ACCEPT, new_trans);
+ }
+ }
+ }
+unlock:
+ pthread_mutex_unlock (&priv->lock);
+
+out:
+ return ret;
+}
+
+
+static int
+socket_disconnect (rpc_transport_t *this)
+{
+ socket_private_t *priv = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("socket", this, out);
+ GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+
+ priv = this->private;
+
+ pthread_mutex_lock (&priv->lock);
+ {
+ ret = __socket_disconnect (this);
+ }
+ pthread_mutex_unlock (&priv->lock);
+
+out:
+ return ret;
+}
+
+
+static int
+socket_connect (rpc_transport_t *this, int port)
+{
+ int ret = -1;
+ int sock = -1;
+ socket_private_t *priv = NULL;
+ socklen_t sockaddr_len = 0;
+ glusterfs_ctx_t *ctx = NULL;
+ sa_family_t sa_family = {0, };
+ char *local_addr = NULL;
+ union gf_sock_union sock_union;
+ struct sockaddr_in *addr = NULL;
+
+ GF_VALIDATE_OR_GOTO ("socket", this, err);
+ GF_VALIDATE_OR_GOTO ("socket", this->private, err);
+
+ priv = this->private;
+ ctx = this->ctx;
+
+ if (!priv) {
+ gf_log_callingfn (this->name, GF_LOG_WARNING,
+ "connect() called on uninitialized transport");
+ goto err;
+ }
+
+ pthread_mutex_lock (&priv->lock);
+ {
+ sock = priv->sock;
+ }
+ pthread_mutex_unlock (&priv->lock);
+
+ if (sock != -1) {
+ gf_log_callingfn (this->name, GF_LOG_TRACE,
+ "connect () called on transport already connected");
+ errno = EINPROGRESS;
+ ret = -1;
+ goto err;
+ }
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "connecting %p, state=%u gen=%u sock=%d", this,
+ priv->ot_state, priv->ot_gen, priv->sock);
+
+ ret = socket_client_get_remote_sockaddr (this, &sock_union.sa,
+ &sockaddr_len, &sa_family);
+ if (ret == -1) {
+ /* logged inside client_get_remote_sockaddr */
+ goto err;
+ }
+
+ if (port > 0) {
+ sock_union.sin.sin_port = htons (port);
+ }
+ if (ntohs(sock_union.sin.sin_port) == GF_DEFAULT_SOCKET_LISTEN_PORT) {
+ if (priv->use_ssl) {
+ gf_log(this->name,GF_LOG_DEBUG,
+ "disabling SSL for portmapper connection");
+ priv->use_ssl = _gf_false;
+ }
+ }
+ else {
+ if (priv->ssl_enabled && !priv->use_ssl) {
+ gf_log(this->name,GF_LOG_DEBUG,
+ "re-enabling SSL for I/O connection");
+ priv->use_ssl = _gf_true;
+ }
+ }
+ pthread_mutex_lock (&priv->lock);
+ {
+ if (priv->sock != -1) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "connect() -- already connected");
+ goto unlock;
+ }
+
+ memcpy (&this->peerinfo.sockaddr, &sock_union.storage,
+ sockaddr_len);
+ this->peerinfo.sockaddr_len = sockaddr_len;
+
+ priv->sock = socket (sa_family, SOCK_STREAM, 0);
+ if (priv->sock == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "socket creation failed (%s)",
+ strerror (errno));
+ goto unlock;
+ }
+
+ /* Cant help if setting socket options fails. We can continue
+ * working nonetheless.
+ */
+ if (priv->windowsize != 0) {
+ if (setsockopt (priv->sock, SOL_SOCKET, SO_RCVBUF,
+ &priv->windowsize,
+ sizeof (priv->windowsize)) < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "setting receive window "
+ "size failed: %d: %d: %s",
+ priv->sock, priv->windowsize,
+ strerror (errno));
+ }
+
+ if (setsockopt (priv->sock, SOL_SOCKET, SO_SNDBUF,
+ &priv->windowsize,
+ sizeof (priv->windowsize)) < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "setting send window size "
+ "failed: %d: %d: %s",
+ priv->sock, priv->windowsize,
+ strerror (errno));
+ }
+ }
+
+ if (priv->nodelay && (sa_family != AF_UNIX)) {
+ ret = __socket_nodelay (priv->sock);
+
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "NODELAY on %d failed (%s)",
+ priv->sock, strerror (errno));
+ }
+ }
+
+ if (priv->keepalive && sa_family != AF_UNIX) {
+ ret = __socket_keepalive (priv->sock,
+ sa_family,
+ priv->keepaliveintvl,
+ priv->keepaliveidle);
+ if (ret == -1)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set keep-alive: %s",
+ strerror (errno));
+ }
+
+ SA (&this->myinfo.sockaddr)->sa_family =
+ SA (&this->peerinfo.sockaddr)->sa_family;
+
+ /* If a source addr is explicitly specified, use it */
+ ret = dict_get_str (this->options,
+ "transport.socket.source-addr",
+ &local_addr);
+ if (!ret && SA (&this->myinfo.sockaddr)->sa_family == AF_INET) {
+ addr = (struct sockaddr_in *)(&this->myinfo.sockaddr);
+ ret = inet_pton (AF_INET, local_addr, &(addr->sin_addr.s_addr));
+ }
+
+ ret = client_bind (this, SA (&this->myinfo.sockaddr),
+ &this->myinfo.sockaddr_len, priv->sock);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "client bind failed: %s", strerror (errno));
+ close (priv->sock);
+ priv->sock = -1;
+ goto unlock;
+ }
+
+ if (!priv->use_ssl && !priv->bio && !priv->own_thread) {
+ ret = __socket_nonblock (priv->sock);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "NBIO on %d failed (%s)",
+ priv->sock, strerror (errno));
+ close (priv->sock);
+ priv->sock = -1;
+ goto unlock;
+ }
+ }
+
+ ret = connect (priv->sock, SA (&this->peerinfo.sockaddr),
+ this->peerinfo.sockaddr_len);
+
+ if (ret == -1 && ((errno != EINPROGRESS) && (errno != ENOENT))) {
+ /* For unix path based sockets, the socket path is
+ * cryptic (md5sum of path) and may not be useful for
+ * the user in debugging so log it in DEBUG
+ */
+ gf_log (this->name, ((sa_family == AF_UNIX) ?
+ GF_LOG_DEBUG : GF_LOG_ERROR),
+ "connection attempt on %s failed, (%s)",
+ this->peerinfo.identifier, strerror (errno));
+ close (priv->sock);
+ priv->sock = -1;
+ goto unlock;
+ }
+
+ if (priv->use_ssl && !priv->own_thread) {
+ ret = ssl_setup_connection(this,0);
+ if (ret < 0) {
+ gf_log(this->name,GF_LOG_ERROR,
+ "client setup failed");
+ close(priv->sock);
+ priv->sock = -1;
+ goto unlock;
+ }
+ }
+
+ if (!priv->bio && !priv->own_thread) {
+ ret = __socket_nonblock (priv->sock);
+
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "NBIO on %d failed (%s)",
+ priv->sock, strerror (errno));
+ close (priv->sock);
+ priv->sock = -1;
+ goto unlock;
+ }
+ }
+
+ /*
+ * In the own_thread case, this is used to indicate that we're
+ * initializing a client connection.
+ */
+ priv->connected = 0;
+ priv->is_server = _gf_false;
+ rpc_transport_ref (this);
+
+ if (priv->own_thread) {
+ if (pipe(priv->pipe) < 0) {
+ gf_log(this->name,GF_LOG_ERROR,
+ "could not create pipe");
+ }
+
+ this->listener = this;
+ socket_spawn(this);
+ }
+ else {
+ priv->idx = event_register (ctx->event_pool, priv->sock,
+ socket_event_handler,
+ this, 1, 1);
+ if (priv->idx == -1) {
+ gf_log ("", GF_LOG_WARNING,
+ "failed to register the event");
+ ret = -1;
+ }
+ }
+ }
+unlock:
+ pthread_mutex_unlock (&priv->lock);
+
+err:
+ return ret;
+}
+
+
+static int
+socket_listen (rpc_transport_t *this)
+{
+ socket_private_t * priv = NULL;
+ int ret = -1;
+ int sock = -1;
+ struct sockaddr_storage sockaddr;
+ socklen_t sockaddr_len = 0;
+ peer_info_t *myinfo = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ sa_family_t sa_family = {0, };
+
+ GF_VALIDATE_OR_GOTO ("socket", this, out);
+ GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+
+ priv = this->private;
+ myinfo = &this->myinfo;
+ ctx = this->ctx;
+
+ pthread_mutex_lock (&priv->lock);
+ {
+ sock = priv->sock;
+ }
+ pthread_mutex_unlock (&priv->lock);
+
+ if (sock != -1) {
+ gf_log_callingfn (this->name, GF_LOG_DEBUG,
+ "already listening");
+ return ret;
+ }
+
+ ret = socket_server_get_local_sockaddr (this, SA (&sockaddr),
+ &sockaddr_len, &sa_family);
+ if (ret == -1) {
+ return ret;
+ }
+
+ pthread_mutex_lock (&priv->lock);
+ {
+ if (priv->sock != -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "already listening");
+ goto unlock;
+ }
+
+ memcpy (&myinfo->sockaddr, &sockaddr, sockaddr_len);
+ myinfo->sockaddr_len = sockaddr_len;
+
+ priv->sock = socket (sa_family, SOCK_STREAM, 0);
+
+ if (priv->sock == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "socket creation failed (%s)",
+ strerror (errno));
+ goto unlock;
+ }
+
+ /* Cant help if setting socket options fails. We can continue
+ * working nonetheless.
+ */
+ if (priv->windowsize != 0) {
+ if (setsockopt (priv->sock, SOL_SOCKET, SO_RCVBUF,
+ &priv->windowsize,
+ sizeof (priv->windowsize)) < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "setting receive window size "
+ "failed: %d: %d: %s", priv->sock,
+ priv->windowsize,
+ strerror (errno));
+ }
+
+ if (setsockopt (priv->sock, SOL_SOCKET, SO_SNDBUF,
+ &priv->windowsize,
+ sizeof (priv->windowsize)) < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "setting send window size failed:"
+ " %d: %d: %s", priv->sock,
+ priv->windowsize,
+ strerror (errno));
+ }
+ }
+
+ if (priv->nodelay && (sa_family != AF_UNIX)) {
+ ret = __socket_nodelay (priv->sock);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "setsockopt() failed for NODELAY (%s)",
+ strerror (errno));
+ }
+ }
+
+ if (!priv->bio) {
+ ret = __socket_nonblock (priv->sock);
+
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "NBIO on %d failed (%s)",
+ priv->sock, strerror (errno));
+ close (priv->sock);
+ priv->sock = -1;
+ goto unlock;
+ }
+ }
+
+ ret = __socket_server_bind (this);
+
+ if (ret == -1) {
+ /* logged inside __socket_server_bind() */
+ close (priv->sock);
+ priv->sock = -1;
+ goto unlock;
+ }
+
+ if (priv->backlog)
+ ret = listen (priv->sock, priv->backlog);
+ else
+ ret = listen (priv->sock, 10);
+
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not set socket %d to listen mode (%s)",
+ priv->sock, strerror (errno));
+ close (priv->sock);
+ priv->sock = -1;
+ goto unlock;
+ }
+
+ rpc_transport_ref (this);
+
+ priv->idx = event_register (ctx->event_pool, priv->sock,
+ socket_server_event_handler,
+ this, 1, 0);
+
+ if (priv->idx == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "could not register socket %d with events",
+ priv->sock);
+ ret = -1;
+ close (priv->sock);
+ priv->sock = -1;
+ goto unlock;
+ }
+ }
+unlock:
+ pthread_mutex_unlock (&priv->lock);
+
+out:
+ return ret;
+}
+
+
+static int32_t
+socket_submit_request (rpc_transport_t *this, rpc_transport_req_t *req)
+{
+ socket_private_t *priv = NULL;
+ int ret = -1;
+ char need_poll_out = 0;
+ char need_append = 1;
+ struct ioq *entry = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ char a_byte = 'j';
+
+ GF_VALIDATE_OR_GOTO ("socket", this, out);
+ GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+
+ priv = this->private;
+ ctx = this->ctx;
+
+ pthread_mutex_lock (&priv->lock);
+ {
+ if (priv->connected != 1) {
+ if (!priv->submit_log && !priv->connect_finish_log) {
+ gf_log (this->name, GF_LOG_INFO,
+ "not connected (priv->connected = %d)",
+ priv->connected);
+ priv->submit_log = 1;
+ }
+ goto unlock;
+ }
+
+ priv->submit_log = 0;
+ entry = __socket_ioq_new (this, &req->msg);
+ if (!entry)
+ goto unlock;
+
+ if (list_empty (&priv->ioq)) {
+ ret = __socket_ioq_churn_entry (this, entry, 1);
+
+ if (ret == 0) {
+ need_append = 0;
+ }
+ if (ret > 0) {
+ need_poll_out = 1;
+ }
+ }
+
+ if (need_append) {
+ list_add_tail (&entry->list, &priv->ioq);
+ if (priv->own_thread) {
+ /*
+ * Make sure the polling thread wakes up, by
+ * writing a byte to represent this entry.
+ */
+ if (write(priv->pipe[1],&a_byte,1) < 1) {
+ gf_log(this->name,GF_LOG_WARNING,
+ "write error on pipe");
+ }
+ }
+ ret = 0;
+ }
+ if (!priv->own_thread && need_poll_out) {
+ /* first entry to wait. continue writing on POLLOUT */
+ priv->idx = event_select_on (ctx->event_pool,
+ priv->sock,
+ priv->idx, -1, 1);
+ }
+ }
+unlock:
+ pthread_mutex_unlock (&priv->lock);
+
+out:
+ return ret;
+}
+
+
+static int32_t
+socket_submit_reply (rpc_transport_t *this, rpc_transport_reply_t *reply)
+{
+ socket_private_t *priv = NULL;
+ int ret = -1;
+ char need_poll_out = 0;
+ char need_append = 1;
+ struct ioq *entry = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ char a_byte = 'd';
+
+ GF_VALIDATE_OR_GOTO ("socket", this, out);
+ GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+
+ priv = this->private;
+ ctx = this->ctx;
+
+ pthread_mutex_lock (&priv->lock);
+ {
+ if (priv->connected != 1) {
+ if (!priv->submit_log && !priv->connect_finish_log) {
+ gf_log (this->name, GF_LOG_INFO,
+ "not connected (priv->connected = %d)",
+ priv->connected);
+ priv->submit_log = 1;
+ }
+ goto unlock;
+ }
+
+ priv->submit_log = 0;
+ entry = __socket_ioq_new (this, &reply->msg);
+ if (!entry)
+ goto unlock;
+
+ if (list_empty (&priv->ioq)) {
+ ret = __socket_ioq_churn_entry (this, entry, 1);
+
+ if (ret == 0) {
+ need_append = 0;
+ }
+ if (ret > 0) {
+ need_poll_out = 1;
+ }
+ }
+
+ if (need_append) {
+ list_add_tail (&entry->list, &priv->ioq);
+ if (priv->own_thread) {
+ /*
+ * Make sure the polling thread wakes up, by
+ * writing a byte to represent this entry.
+ */
+ if (write(priv->pipe[1],&a_byte,1) < 1) {
+ gf_log(this->name,GF_LOG_WARNING,
+ "write error on pipe");
+ }
+ }
+ ret = 0;
+ }
+ if (!priv->own_thread && need_poll_out) {
+ /* first entry to wait. continue writing on POLLOUT */
+ priv->idx = event_select_on (ctx->event_pool,
+ priv->sock,
+ priv->idx, -1, 1);
+ }
+ }
+unlock:
+ pthread_mutex_unlock (&priv->lock);
+
+out:
+ return ret;
+}
+
+
+static int32_t
+socket_getpeername (rpc_transport_t *this, char *hostname, int hostlen)
+{
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("socket", this, out);
+ GF_VALIDATE_OR_GOTO ("socket", hostname, out);
+
+ if (hostlen < (strlen (this->peerinfo.identifier) + 1)) {
+ goto out;
+ }
+
+ strcpy (hostname, this->peerinfo.identifier);
+ ret = 0;
+out:
+ return ret;
+}
+
+
+static int32_t
+socket_getpeeraddr (rpc_transport_t *this, char *peeraddr, int addrlen,
+ struct sockaddr_storage *sa, socklen_t salen)
+{
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("socket", this, out);
+ GF_VALIDATE_OR_GOTO ("socket", sa, out);
+
+ *sa = this->peerinfo.sockaddr;
+
+ if (peeraddr != NULL) {
+ ret = socket_getpeername (this, peeraddr, addrlen);
+ }
+ ret = 0;
+
+out:
+ return ret;
+}
+
+
+static int32_t
+socket_getmyname (rpc_transport_t *this, char *hostname, int hostlen)
+{
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("socket", this, out);
+ GF_VALIDATE_OR_GOTO ("socket", hostname, out);
+
+ if (hostlen < (strlen (this->myinfo.identifier) + 1)) {
+ goto out;
+ }
+
+ strcpy (hostname, this->myinfo.identifier);
+ ret = 0;
+out:
+ return ret;
+}
+
+
+static int32_t
+socket_getmyaddr (rpc_transport_t *this, char *myaddr, int addrlen,
+ struct sockaddr_storage *sa, socklen_t salen)
+{
+ int32_t ret = 0;
+
+ GF_VALIDATE_OR_GOTO ("socket", this, out);
+ GF_VALIDATE_OR_GOTO ("socket", sa, out);
+
+ *sa = this->myinfo.sockaddr;
+
+ if (myaddr != NULL) {
+ ret = socket_getmyname (this, myaddr, addrlen);
+ }
+
+out:
+ return ret;
+}
+
+
+static int
+socket_throttle (rpc_transport_t *this, gf_boolean_t onoff)
+{
+ socket_private_t *priv = NULL;
+
+ priv = this->private;
+
+ /* The way we implement throttling is by taking off
+ POLLIN event from the polled flags. This way we
+ never get called with the POLLIN event and therefore
+ will never read() any more data until throttling
+ is turned off.
+ */
+ priv->idx = event_select_on (this->ctx->event_pool, priv->sock,
+ priv->idx, (int) !onoff, -1);
+ return 0;
+}
+
+
+struct rpc_transport_ops tops = {
+ .listen = socket_listen,
+ .connect = socket_connect,
+ .disconnect = socket_disconnect,
+ .submit_request = socket_submit_request,
+ .submit_reply = socket_submit_reply,
+ .get_peername = socket_getpeername,
+ .get_peeraddr = socket_getpeeraddr,
+ .get_myname = socket_getmyname,
+ .get_myaddr = socket_getmyaddr,
+ .throttle = socket_throttle,
+};
+
+int
+reconfigure (rpc_transport_t *this, dict_t *options)
+{
+ socket_private_t *priv = NULL;
+ gf_boolean_t tmp_bool = _gf_false;
+ char *optstr = NULL;
+ int ret = 0;
+ uint64_t windowsize = 0;
+
+ GF_VALIDATE_OR_GOTO ("socket", this, out);
+ GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+
+ if (!this || !this->private) {
+ ret =-1;
+ goto out;
+ }
+
+ priv = this->private;
+
+ if (dict_get_str (this->options, "transport.socket.keepalive",
+ &optstr) == 0) {
+ if (gf_string2boolean (optstr, &tmp_bool) == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "'transport.socket.keepalive' takes only "
+ "boolean options, not taking any action");
+ priv->keepalive = 1;
+ ret = -1;
+ goto out;
+ }
+ gf_log (this->name, GF_LOG_DEBUG, "Reconfigured transport.socket.keepalive");
+
+ priv->keepalive = tmp_bool;
+ }
+ else
+ priv->keepalive = 1;
+
+ optstr = NULL;
+ if (dict_get_str (this->options, "tcp-window-size",
+ &optstr) == 0) {
+ if (gf_string2bytesize (optstr, &windowsize) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "invalid number format: %s", optstr);
+ goto out;
+ }
+ }
+
+ priv->windowsize = (int)windowsize;
+
+ ret = 0;
+out:
+ return ret;
+
+}
+
+static int
+socket_init (rpc_transport_t *this)
+{
+ socket_private_t *priv = NULL;
+ gf_boolean_t tmp_bool = 0;
+ uint64_t windowsize = GF_DEFAULT_SOCKET_WINDOW_SIZE;
+ char *optstr = NULL;
+ uint32_t keepalive = 0;
+ uint32_t backlog = 0;
+ int session_id = 0;
+
+ if (this->private) {
+ gf_log_callingfn (this->name, GF_LOG_ERROR,
+ "double init attempted");
+ return -1;
+ }
+
+ priv = GF_CALLOC (1, sizeof (*priv), gf_common_mt_socket_private_t);
+ if (!priv) {
+ return -1;
+ }
+ memset(priv,0,sizeof(*priv));
+
+ pthread_mutex_init (&priv->lock, NULL);
+
+ priv->sock = -1;
+ priv->idx = -1;
+ priv->connected = -1;
+ priv->nodelay = 1;
+ priv->bio = 0;
+ priv->windowsize = GF_DEFAULT_SOCKET_WINDOW_SIZE;
+ INIT_LIST_HEAD (&priv->ioq);
+
+ /* All the below section needs 'this->options' to be present */
+ if (!this->options)
+ goto out;
+
+ if (dict_get (this->options, "non-blocking-io")) {
+ optstr = data_to_str (dict_get (this->options,
+ "non-blocking-io"));
+
+ if (gf_string2boolean (optstr, &tmp_bool) == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "'non-blocking-io' takes only boolean options,"
+ " not taking any action");
+ tmp_bool = 1;
+ }
+
+ if (!tmp_bool) {
+ priv->bio = 1;
+ gf_log (this->name, GF_LOG_WARNING,
+ "disabling non-blocking IO");
+ }
+ }
+
+ optstr = NULL;
+
+ // By default, we enable NODELAY
+ if (dict_get (this->options, "transport.socket.nodelay")) {
+ optstr = data_to_str (dict_get (this->options,
+ "transport.socket.nodelay"));
+
+ if (gf_string2boolean (optstr, &tmp_bool) == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "'transport.socket.nodelay' takes only "
+ "boolean options, not taking any action");
+ tmp_bool = 1;
+ }
+ if (!tmp_bool) {
+ priv->nodelay = 0;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "disabling nodelay");
+ }
+ }
+
+ optstr = NULL;
+ if (dict_get_str (this->options, "tcp-window-size",
+ &optstr) == 0) {
+ if (gf_string2bytesize (optstr, &windowsize) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "invalid number format: %s", optstr);
+ return -1;
+ }
+ }
+
+ priv->windowsize = (int)windowsize;
+
+ optstr = NULL;
+ /* Enable Keep-alive by default. */
+ priv->keepalive = 1;
+ priv->keepaliveintvl = 2;
+ priv->keepaliveidle = 20;
+ if (dict_get_str (this->options, "transport.socket.keepalive",
+ &optstr) == 0) {
+ if (gf_string2boolean (optstr, &tmp_bool) == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "'transport.socket.keepalive' takes only "
+ "boolean options, not taking any action");
+ tmp_bool = 1;
+ }
+
+ if (!tmp_bool)
+ priv->keepalive = 0;
+ }
+
+ if (dict_get_uint32 (this->options,
+ "transport.socket.keepalive-interval",
+ &keepalive) == 0) {
+ priv->keepaliveintvl = keepalive;
+ }
+
+ if (dict_get_uint32 (this->options,
+ "transport.socket.keepalive-time",
+ &keepalive) == 0) {
+ priv->keepaliveidle = keepalive;
+ }
+
+ if (dict_get_uint32 (this->options,
+ "transport.socket.listen-backlog",
+ &backlog) == 0) {
+ priv->backlog = backlog;
+ }
+
+ optstr = NULL;
+
+ /* Check if socket read failures are to be logged */
+ priv->read_fail_log = 1;
+ if (dict_get (this->options, "transport.socket.read-fail-log")) {
+ optstr = data_to_str (dict_get (this->options, "transport.socket.read-fail-log"));
+ if (gf_string2boolean (optstr, &tmp_bool) == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "'transport.socket.read-fail-log' takes only "
+ "boolean options; logging socket read fails");
+ }
+ else if (tmp_bool == _gf_false) {
+ priv->read_fail_log = 0;
+ }
+ }
+
+ priv->windowsize = (int)windowsize;
+
+ priv->ssl_enabled = _gf_false;
+ if (dict_get_str(this->options,SSL_ENABLED_OPT,&optstr) == 0) {
+ if (gf_string2boolean (optstr, &priv->ssl_enabled) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "invalid value given for ssl-enabled boolean");
+ }
+ }
+
+ priv->ssl_own_cert = DEFAULT_CERT_PATH;
+ if (dict_get_str(this->options,SSL_OWN_CERT_OPT,&optstr) == 0) {
+ if (!priv->ssl_enabled) {
+ gf_log(this->name,GF_LOG_WARNING,
+ "%s specified without %s (ignored)",
+ SSL_OWN_CERT_OPT, SSL_ENABLED_OPT);
+ }
+ priv->ssl_own_cert = optstr;
+ }
+ priv->ssl_own_cert = gf_strdup(priv->ssl_own_cert);
+
+ priv->ssl_private_key = DEFAULT_KEY_PATH;
+ if (dict_get_str(this->options,SSL_PRIVATE_KEY_OPT,&optstr) == 0) {
+ if (!priv->ssl_enabled) {
+ gf_log(this->name,GF_LOG_WARNING,
+ "%s specified without %s (ignored)",
+ SSL_PRIVATE_KEY_OPT, SSL_ENABLED_OPT);
+ }
+ priv->ssl_private_key = optstr;
+ }
+ priv->ssl_private_key = gf_strdup(priv->ssl_private_key);
+
+ priv->ssl_ca_list = DEFAULT_CA_PATH;
+ if (dict_get_str(this->options,SSL_CA_LIST_OPT,&optstr) == 0) {
+ if (!priv->ssl_enabled) {
+ gf_log(this->name,GF_LOG_WARNING,
+ "%s specified without %s (ignored)",
+ SSL_CA_LIST_OPT, SSL_ENABLED_OPT);
+ }
+ priv->ssl_ca_list = optstr;
+ }
+ priv->ssl_ca_list = gf_strdup(priv->ssl_ca_list);
+
+ gf_log(this->name,GF_LOG_INFO,"SSL support is %s",
+ priv->ssl_enabled ? "ENABLED" : "NOT enabled");
+ /*
+ * This might get overridden temporarily in socket_connect (q.v.)
+ * if we're using the glusterd portmapper.
+ */
+ priv->use_ssl = priv->ssl_enabled;
+
+ priv->own_thread = priv->use_ssl;
+ if (dict_get_str(this->options,OWN_THREAD_OPT,&optstr) == 0) {
+ if (gf_string2boolean (optstr, &priv->own_thread) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "invalid value given for own-thread boolean");
+ }
+ }
+ gf_log(this->name,GF_LOG_INFO,"using %s polling thread",
+ priv->own_thread ? "private" : "system");
+
+ if (priv->use_ssl) {
+ SSL_library_init();
+ SSL_load_error_strings();
+ priv->ssl_meth = (SSL_METHOD *)TLSv1_method();
+ priv->ssl_ctx = SSL_CTX_new(priv->ssl_meth);
+
+ if (SSL_CTX_set_cipher_list(priv->ssl_ctx,
+ "HIGH:-SSLv2") == 0) {
+ gf_log(this->name,GF_LOG_ERROR,
+ "failed to find any valid ciphers");
+ goto err;
+ }
+
+ if (!SSL_CTX_use_certificate_chain_file(priv->ssl_ctx,
+ priv->ssl_own_cert)) {
+ gf_log(this->name,GF_LOG_ERROR,
+ "could not load our cert");
+ goto err;
+ }
+
+ if (!SSL_CTX_use_PrivateKey_file(priv->ssl_ctx,
+ priv->ssl_private_key,
+ SSL_FILETYPE_PEM)) {
+ gf_log(this->name,GF_LOG_ERROR,
+ "could not load private key");
+ goto err;
+ }
+
+ if (!SSL_CTX_load_verify_locations(priv->ssl_ctx,
+ priv->ssl_ca_list,0)) {
+ gf_log(this->name,GF_LOG_ERROR,
+ "could not load CA list");
+ goto err;
+ }
+
+#if (OPENSSL_VERSION_NUMBER < 0x00905100L)
+ SSL_CTX_set_verify_depth(ctx,1);
+#endif
+
+ priv->ssl_session_id = ++session_id;
+ SSL_CTX_set_session_id_context(priv->ssl_ctx,
+ (void *)&priv->ssl_session_id,
+ sizeof(priv->ssl_session_id));
+
+ SSL_CTX_set_verify(priv->ssl_ctx,SSL_VERIFY_PEER,0);
+ }
+
+ if (priv->own_thread) {
+ priv->ot_state = OT_IDLE;
+ }
+
+out:
+ this->private = priv;
+ return 0;
+
+err:
+ if (priv->ssl_own_cert) {
+ GF_FREE(priv->ssl_own_cert);
+ }
+ if (priv->ssl_private_key) {
+ GF_FREE(priv->ssl_private_key);
+ }
+ if (priv->ssl_ca_list) {
+ GF_FREE(priv->ssl_ca_list);
+ }
+ GF_FREE(priv);
+ return -1;
+}
+
+
+void
+fini (rpc_transport_t *this)
+{
+ socket_private_t *priv = NULL;
+
+ if (!this)
+ return;
+
+ priv = this->private;
+ if (priv) {
+ if (priv->sock != -1) {
+ pthread_mutex_lock (&priv->lock);
+ {
+ __socket_ioq_flush (this);
+ __socket_reset (this);
+ }
+ pthread_mutex_unlock (&priv->lock);
+ }
+ gf_log (this->name, GF_LOG_TRACE,
+ "transport %p destroyed", this);
+
+ pthread_mutex_destroy (&priv->lock);
+ if (priv->ssl_private_key) {
+ GF_FREE(priv->ssl_private_key);
+ }
+ if (priv->ssl_own_cert) {
+ GF_FREE(priv->ssl_own_cert);
+ }
+ if (priv->ssl_ca_list) {
+ GF_FREE(priv->ssl_ca_list);
+ }
+ GF_FREE (priv);
+ }
+
+ this->private = NULL;
+}
+
+
+int32_t
+init (rpc_transport_t *this)
+{
+ int ret = -1;
+
+ ret = socket_init (this);
+
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG, "socket_init() failed");
+ }
+
+ return ret;
+}
+
+struct volume_options options[] = {
+ { .key = {"remote-port",
+ "transport.remote-port",
+ "transport.socket.remote-port"},
+ .type = GF_OPTION_TYPE_INT
+ },
+ { .key = {"transport.socket.listen-port", "listen-port"},
+ .type = GF_OPTION_TYPE_INT
+ },
+ { .key = {"transport.socket.bind-address", "bind-address" },
+ .type = GF_OPTION_TYPE_INTERNET_ADDRESS
+ },
+ { .key = {"transport.socket.connect-path", "connect-path"},
+ .type = GF_OPTION_TYPE_ANY
+ },
+ { .key = {"transport.socket.bind-path", "bind-path"},
+ .type = GF_OPTION_TYPE_ANY
+ },
+ { .key = {"transport.socket.listen-path", "listen-path"},
+ .type = GF_OPTION_TYPE_ANY
+ },
+ { .key = { "transport.address-family",
+ "address-family" },
+ .value = {"inet", "inet6", "unix", "inet-sdp" },
+ .type = GF_OPTION_TYPE_STR
+ },
+
+ { .key = {"non-blocking-io"},
+ .type = GF_OPTION_TYPE_BOOL
+ },
+ { .key = {"tcp-window-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .min = GF_MIN_SOCKET_WINDOW_SIZE,
+ .max = GF_MAX_SOCKET_WINDOW_SIZE
+ },
+ { .key = {"transport.socket.nodelay"},
+ .type = GF_OPTION_TYPE_BOOL
+ },
+ { .key = {"transport.socket.lowlat"},
+ .type = GF_OPTION_TYPE_BOOL
+ },
+ { .key = {"transport.socket.keepalive"},
+ .type = GF_OPTION_TYPE_BOOL
+ },
+ { .key = {"transport.socket.keepalive-interval"},
+ .type = GF_OPTION_TYPE_INT
+ },
+ { .key = {"transport.socket.keepalive-time"},
+ .type = GF_OPTION_TYPE_INT
+ },
+ { .key = {"transport.socket.listen-backlog"},
+ .type = GF_OPTION_TYPE_INT
+ },
+ { .key = {"transport.socket.read-fail-log"},
+ .type = GF_OPTION_TYPE_BOOL
+ },
+ { .key = {SSL_ENABLED_OPT},
+ .type = GF_OPTION_TYPE_BOOL
+ },
+ { .key = {SSL_OWN_CERT_OPT},
+ .type = GF_OPTION_TYPE_STR
+ },
+ { .key = {SSL_PRIVATE_KEY_OPT},
+ .type = GF_OPTION_TYPE_STR
+ },
+ { .key = {SSL_CA_LIST_OPT},
+ .type = GF_OPTION_TYPE_STR
+ },
+ { .key = {OWN_THREAD_OPT},
+ .type = GF_OPTION_TYPE_BOOL
+ },
+ { .key = {NULL} }
+};
diff --git a/rpc/rpc-transport/socket/src/socket.h b/rpc/rpc-transport/socket/src/socket.h
new file mode 100644
index 000000000..e0b412fcc
--- /dev/null
+++ b/rpc/rpc-transport/socket/src/socket.h
@@ -0,0 +1,239 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _SOCKET_H
+#define _SOCKET_H
+
+#include <openssl/ssl.h>
+#include <openssl/err.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "event.h"
+#include "rpc-transport.h"
+#include "logging.h"
+#include "dict.h"
+#include "mem-pool.h"
+#include "globals.h"
+
+#ifndef MAX_IOVEC
+#define MAX_IOVEC 16
+#endif /* MAX_IOVEC */
+
+#define GF_DEFAULT_SOCKET_LISTEN_PORT GF_DEFAULT_BASE_PORT
+
+#define RPC_MAX_FRAGMENT_SIZE 0x7fffffff
+
+/* The default window size will be 0, indicating not to set
+ * it to any size. Default size of Linux is found to be
+ * performance friendly.
+ * Linux allows us to over-ride the max values for the system.
+ * Should we over-ride them? Because if we set a value larger than the default
+ * setsockopt will fail. Having larger values might be beneficial for
+ * IB links.
+ */
+#define GF_DEFAULT_SOCKET_WINDOW_SIZE (0)
+#define GF_MAX_SOCKET_WINDOW_SIZE (1 * GF_UNIT_MB)
+#define GF_MIN_SOCKET_WINDOW_SIZE (0)
+#define GF_USE_DEFAULT_KEEPALIVE (-1)
+
+typedef enum {
+ SP_STATE_NADA = 0,
+ SP_STATE_COMPLETE,
+ SP_STATE_READING_FRAGHDR,
+ SP_STATE_READ_FRAGHDR,
+ SP_STATE_READING_FRAG,
+} sp_rpcrecord_state_t;
+
+typedef enum {
+ SP_STATE_RPCFRAG_INIT,
+ SP_STATE_READING_MSGTYPE,
+ SP_STATE_READ_MSGTYPE,
+} sp_rpcfrag_state_t;
+
+typedef enum {
+ SP_STATE_SIMPLE_MSG_INIT,
+ SP_STATE_READING_SIMPLE_MSG,
+} sp_rpcfrag_simple_msg_state_t;
+
+typedef enum {
+ SP_STATE_VECTORED_REQUEST_INIT,
+ SP_STATE_READING_CREDBYTES,
+ SP_STATE_READ_CREDBYTES, /* read credential data. */
+ SP_STATE_READING_VERFBYTES,
+ SP_STATE_READ_VERFBYTES, /* read verifier data */
+ SP_STATE_READING_PROGHDR,
+ SP_STATE_READ_PROGHDR,
+ SP_STATE_READING_PROGHDR_XDATA,
+ SP_STATE_READ_PROGHDR_XDATA, /* It's a bad "name" in the generic
+ RPC state machine, but greatly
+ aids code review (and xdata is
+ the only "consumer" of this state)
+ */
+ SP_STATE_READING_PROG,
+} sp_rpcfrag_vectored_request_state_t;
+
+typedef enum {
+ SP_STATE_REQUEST_HEADER_INIT,
+ SP_STATE_READING_RPCHDR1,
+ SP_STATE_READ_RPCHDR1, /* read msg from beginning till and
+ * including credlen
+ */
+} sp_rpcfrag_request_header_state_t;
+
+struct ioq {
+ union {
+ struct list_head list;
+ struct {
+ struct ioq *next;
+ struct ioq *prev;
+ };
+ };
+
+ uint32_t fraghdr;
+ struct iovec vector[MAX_IOVEC];
+ int count;
+ struct iovec *pending_vector;
+ int pending_count;
+ struct iobref *iobref;
+};
+
+typedef struct {
+ sp_rpcfrag_request_header_state_t header_state;
+ sp_rpcfrag_vectored_request_state_t vector_state;
+ int vector_sizer_state;
+} sp_rpcfrag_request_state_t;
+
+typedef enum {
+ SP_STATE_VECTORED_REPLY_STATUS_INIT,
+ SP_STATE_READING_REPLY_STATUS,
+ SP_STATE_READ_REPLY_STATUS,
+} sp_rpcfrag_vectored_reply_status_state_t;
+
+typedef enum {
+ SP_STATE_ACCEPTED_SUCCESS_REPLY_INIT,
+ SP_STATE_READING_PROC_HEADER,
+ SP_STATE_READING_PROC_OPAQUE,
+ SP_STATE_READ_PROC_OPAQUE,
+ SP_STATE_READ_PROC_HEADER,
+} sp_rpcfrag_vectored_reply_accepted_success_state_t;
+
+typedef enum {
+ SP_STATE_ACCEPTED_REPLY_INIT,
+ SP_STATE_READING_REPLY_VERFLEN,
+ SP_STATE_READ_REPLY_VERFLEN,
+ SP_STATE_READING_REPLY_VERFBYTES,
+ SP_STATE_READ_REPLY_VERFBYTES,
+} sp_rpcfrag_vectored_reply_accepted_state_t;
+
+typedef struct {
+ uint32_t accept_status;
+ sp_rpcfrag_vectored_reply_status_state_t status_state;
+ sp_rpcfrag_vectored_reply_accepted_state_t accepted_state;
+ sp_rpcfrag_vectored_reply_accepted_success_state_t accepted_success_state;
+} sp_rpcfrag_vectored_reply_state_t;
+
+struct gf_sock_incoming_frag {
+ char *fragcurrent;
+ uint32_t bytes_read;
+ uint32_t remaining_size;
+ struct iovec vector;
+ struct iovec *pending_vector;
+ union {
+ sp_rpcfrag_request_state_t request;
+ sp_rpcfrag_vectored_reply_state_t reply;
+ } call_body;
+
+ sp_rpcfrag_simple_msg_state_t simple_state;
+ sp_rpcfrag_state_t state;
+};
+
+#define GF_SOCKET_RA_MAX 1024
+
+struct gf_sock_incoming {
+ sp_rpcrecord_state_t record_state;
+ struct gf_sock_incoming_frag frag;
+ char *proghdr_base_addr;
+ struct iobuf *iobuf;
+ size_t iobuf_size;
+ struct iovec vector[2];
+ int count;
+ struct iovec payload_vector;
+ struct iobref *iobref;
+ rpc_request_info_t *request_info;
+ struct iovec *pending_vector;
+ int pending_count;
+ uint32_t fraghdr;
+ char complete_record;
+ msg_type_t msg_type;
+ size_t total_bytes_read;
+
+ size_t ra_read;
+ size_t ra_max;
+ size_t ra_served;
+ char *ra_buf;
+};
+
+typedef enum {
+ OT_IDLE, /* Uninitialized or termination complete. */
+ OT_SPAWNING, /* Past pthread_create but not in thread yet. */
+ OT_RUNNING, /* Poller thread running normally. */
+ OT_CALLBACK, /* Poller thread in the middle of a callback. */
+ OT_PLEASE_DIE, /* Poller termination requested. */
+} ot_state_t;
+
+typedef struct {
+ int32_t sock;
+ int32_t idx;
+ /* -1 = not connected. 0 = in progress. 1 = connected */
+ char connected;
+ char bio;
+ char connect_finish_log;
+ char submit_log;
+ union {
+ struct list_head ioq;
+ struct {
+ struct ioq *ioq_next;
+ struct ioq *ioq_prev;
+ };
+ };
+ struct gf_sock_incoming incoming;
+ pthread_mutex_t lock;
+ int windowsize;
+ char lowlat;
+ char nodelay;
+ int keepalive;
+ int keepaliveidle;
+ int keepaliveintvl;
+ uint32_t backlog;
+ gf_boolean_t read_fail_log;
+ gf_boolean_t ssl_enabled;
+ gf_boolean_t use_ssl;
+ SSL_METHOD *ssl_meth;
+ SSL_CTX *ssl_ctx;
+ int ssl_session_id;
+ BIO *ssl_sbio;
+ SSL *ssl_ssl;
+ char *ssl_own_cert;
+ char *ssl_private_key;
+ char *ssl_ca_list;
+ pthread_t thread;
+ int pipe[2];
+ gf_boolean_t own_thread;
+ ot_state_t ot_state;
+ uint32_t ot_gen;
+ gf_boolean_t is_server;
+} socket_private_t;
+
+
+#endif
diff --git a/rpc/xdr/Makefile.am b/rpc/xdr/Makefile.am
new file mode 100644
index 000000000..af437a64d
--- /dev/null
+++ b/rpc/xdr/Makefile.am
@@ -0,0 +1 @@
+SUBDIRS = src
diff --git a/rpc/xdr/src/Makefile.am b/rpc/xdr/src/Makefile.am
new file mode 100644
index 000000000..949e75e8d
--- /dev/null
+++ b/rpc/xdr/src/Makefile.am
@@ -0,0 +1,25 @@
+lib_LTLIBRARIES = libgfxdr.la
+
+libgfxdr_la_CFLAGS = -Wall $(GF_CFLAGS) $(GF_DARWIN_LIBGLUSTERFS_CFLAGS)
+
+libgfxdr_la_CPPFLAGS = $(GF_CPPFLAGS) -D__USE_FILE_OFFSET64 \
+ -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/rpc-lib/src
+
+libgfxdr_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
+ $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la
+
+libgfxdr_la_SOURCES = xdr-generic.c rpc-common-xdr.c \
+ glusterfs3-xdr.c \
+ cli1-xdr.c \
+ glusterd1-xdr.c \
+ portmap-xdr.c \
+ nlm4-xdr.c xdr-nfs3.c msg-nfs3.c nsm-xdr.c \
+ nlmcbk-xdr.c acl3-xdr.c
+
+noinst_HEADERS = xdr-generic.h rpc-common-xdr.h \
+ glusterfs3-xdr.h glusterfs3.h \
+ cli1-xdr.h \
+ glusterd1-xdr.h \
+ portmap-xdr.h \
+ nlm4-xdr.h xdr-nfs3.h msg-nfs3.h nsm-xdr.h \
+ nlmcbk-xdr.h acl3-xdr.h
diff --git a/rpc/xdr/src/acl.x b/rpc/xdr/src/acl.x
new file mode 100644
index 000000000..6cf4f1d3b
--- /dev/null
+++ b/rpc/xdr/src/acl.x
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+
+struct aclentry {
+ int type;
+ int uid;
+ int perm;
+};
+
+struct getaclargs {
+ netobj fh;
+ int mask;
+};
+
+struct getaclreply {
+ int status;
+ int attr_follows;
+ struct fattr3 attr;
+ int mask;
+ int aclcount;
+ struct aclentry aclentry<>;
+ int daclcount;
+ struct aclentry daclentry<>;
+};
+
+struct setaclargs {
+ netobj fh;
+ int mask;
+ int aclcount;
+ struct aclentry aclentry<>;
+ int daclcount;
+ struct aclentry daclentry<>;
+};
+
+struct setaclreply {
+ int status;
+ int attr_follows;
+ struct fattr3 attr;
+};
+
diff --git a/rpc/xdr/src/acl3-xdr.c b/rpc/xdr/src/acl3-xdr.c
new file mode 100644
index 000000000..8fbaeff16
--- /dev/null
+++ b/rpc/xdr/src/acl3-xdr.c
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+/*
+ * Please do not edit this file.
+ * It was generated using rpcgen.
+ */
+
+#include "acl3-xdr.h"
+
+bool_t
+xdr_aclentry (XDR *xdrs, aclentry *objp)
+{
+ if (!xdr_int (xdrs, &objp->type))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->uid))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->perm))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_getaclargs (XDR *xdrs, getaclargs *objp)
+{
+ if (!xdr_netobj (xdrs, &objp->fh))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->mask))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_getaclreply (XDR *xdrs, getaclreply *objp)
+{
+ if (!xdr_int (xdrs, &objp->status))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->attr_follows))
+ return FALSE;
+ if (!xdr_fattr3 (xdrs, &objp->attr))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->mask))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->aclcount))
+ return FALSE;
+ if (!xdr_array (xdrs, (char **)&objp->aclentry.aclentry_val, (u_int *) &objp->aclentry.aclentry_len, ~0,
+ sizeof (aclentry), (xdrproc_t) xdr_aclentry))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->daclcount))
+ return FALSE;
+ if (!xdr_array (xdrs, (char **)&objp->daclentry.daclentry_val, (u_int *) &objp->daclentry.daclentry_len, ~0,
+ sizeof (aclentry), (xdrproc_t) xdr_aclentry))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_setaclargs (XDR *xdrs, setaclargs *objp)
+{
+ if (!xdr_netobj (xdrs, &objp->fh))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->mask))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->aclcount))
+ return FALSE;
+ if (!xdr_array (xdrs, (char **)&objp->aclentry.aclentry_val, (u_int *) &objp->aclentry.aclentry_len, ~0,
+ sizeof (aclentry), (xdrproc_t) xdr_aclentry))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->daclcount))
+ return FALSE;
+ if (!xdr_array (xdrs, (char **)&objp->daclentry.daclentry_val, (u_int *) &objp->daclentry.daclentry_len, ~0,
+ sizeof (aclentry), (xdrproc_t) xdr_aclentry))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_setaclreply (XDR *xdrs, setaclreply *objp)
+{
+ if (!xdr_int (xdrs, &objp->status))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->attr_follows))
+ return FALSE;
+ if (!xdr_fattr3 (xdrs, &objp->attr))
+ return FALSE;
+ return TRUE;
+}
diff --git a/rpc/xdr/src/acl3-xdr.h b/rpc/xdr/src/acl3-xdr.h
new file mode 100644
index 000000000..7cebaed69
--- /dev/null
+++ b/rpc/xdr/src/acl3-xdr.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+/*
+ * Please do not edit this file.
+ * It was generated using rpcgen.
+ */
+
+#ifndef _ACL_H_RPCGEN
+#define _ACL_H_RPCGEN
+
+#include <rpc/rpc.h>
+#include "xdr-nfs3.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+struct aclentry {
+ int type;
+ int uid;
+ int perm;
+};
+typedef struct aclentry aclentry;
+
+struct getaclargs {
+ netobj fh;
+ int mask;
+};
+typedef struct getaclargs getaclargs;
+
+struct getaclreply {
+ int status;
+ int attr_follows;
+ struct fattr3 attr;
+ int mask;
+ int aclcount;
+ struct {
+ u_int aclentry_len;
+ struct aclentry *aclentry_val;
+ } aclentry;
+ int daclcount;
+ struct {
+ u_int daclentry_len;
+ struct aclentry *daclentry_val;
+ } daclentry;
+};
+typedef struct getaclreply getaclreply;
+
+struct setaclargs {
+ netobj fh;
+ int mask;
+ int aclcount;
+ struct {
+ u_int aclentry_len;
+ struct aclentry *aclentry_val;
+ } aclentry;
+ int daclcount;
+ struct {
+ u_int daclentry_len;
+ struct aclentry *daclentry_val;
+ } daclentry;
+};
+typedef struct setaclargs setaclargs;
+
+struct setaclreply {
+ int status;
+ int attr_follows;
+ struct fattr3 attr;
+};
+typedef struct setaclreply setaclreply;
+
+#define ACL3_NULL 0
+#define ACL3_GETACL 1
+#define ACL3_SETACL 2
+#define ACL3_PROC_COUNT 3
+/* the xdr functions */
+
+#if defined(__STDC__) || defined(__cplusplus)
+extern bool_t xdr_aclentry (XDR *, aclentry*);
+extern bool_t xdr_getaclargs (XDR *, getaclargs*);
+extern bool_t xdr_getaclreply (XDR *, getaclreply*);
+extern bool_t xdr_setaclargs (XDR *, setaclargs*);
+extern bool_t xdr_setaclreply (XDR *, setaclreply*);
+
+#else /* K&R C */
+extern bool_t xdr_aclentry ();
+extern bool_t xdr_getaclargs ();
+extern bool_t xdr_getaclreply ();
+extern bool_t xdr_setaclargs ();
+extern bool_t xdr_setaclreply ();
+
+#endif /* K&R C */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* !_ACL_H_RPCGEN */
diff --git a/rpc/xdr/src/cli1-xdr.c b/rpc/xdr/src/cli1-xdr.c
new file mode 100644
index 000000000..97b210e14
--- /dev/null
+++ b/rpc/xdr/src/cli1-xdr.c
@@ -0,0 +1,365 @@
+/*
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xdr-common.h"
+#include "compat.h"
+
+#if defined(__GNUC__)
+#if __GNUC__ >= 4
+#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
+#endif
+#endif
+
+/*
+ * Please do not edit this file.
+ * It was generated using rpcgen.
+ */
+
+#include "cli1-xdr.h"
+
+bool_t
+xdr_gf_cli_defrag_type (XDR *xdrs, gf_cli_defrag_type *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_enum (xdrs, (enum_t *) objp))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gf_defrag_status_t (XDR *xdrs, gf_defrag_status_t *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_enum (xdrs, (enum_t *) objp))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gf1_cluster_type (XDR *xdrs, gf1_cluster_type *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_enum (xdrs, (enum_t *) objp))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gf1_cli_replace_op (XDR *xdrs, gf1_cli_replace_op *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_enum (xdrs, (enum_t *) objp))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gf1_op_commands (XDR *xdrs, gf1_op_commands *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_enum (xdrs, (enum_t *) objp))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gf_quota_type (XDR *xdrs, gf_quota_type *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_enum (xdrs, (enum_t *) objp))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gf1_cli_friends_list (XDR *xdrs, gf1_cli_friends_list *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_enum (xdrs, (enum_t *) objp))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gf1_cli_get_volume (XDR *xdrs, gf1_cli_get_volume *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_enum (xdrs, (enum_t *) objp))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gf1_cli_sync_volume (XDR *xdrs, gf1_cli_sync_volume *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_enum (xdrs, (enum_t *) objp))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gf1_cli_op_flags (XDR *xdrs, gf1_cli_op_flags *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_enum (xdrs, (enum_t *) objp))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gf1_cli_gsync_set (XDR *xdrs, gf1_cli_gsync_set *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_enum (xdrs, (enum_t *) objp))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gf1_cli_stats_op (XDR *xdrs, gf1_cli_stats_op *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_enum (xdrs, (enum_t *) objp))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gf1_cli_top_op (XDR *xdrs, gf1_cli_top_op *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_enum (xdrs, (enum_t *) objp))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gf_cli_status_type (XDR *xdrs, gf_cli_status_type *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_enum (xdrs, (enum_t *) objp))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gf1_cli_snapshot (XDR *xdrs, gf1_cli_snapshot *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_enum (xdrs, (enum_t *) objp))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gf1_cli_snapshot_config (XDR *xdrs, gf1_cli_snapshot_config *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_enum (xdrs, (enum_t *) objp))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gf_cli_req (XDR *xdrs, gf_cli_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gf_cli_rsp (XDR *xdrs, gf_cli_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->op_errstr, ~0))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gf1_cli_peer_list_req (XDR *xdrs, gf1_cli_peer_list_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->flags))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gf1_cli_peer_list_rsp (XDR *xdrs, gf1_cli_peer_list_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->friends.friends_val, (u_int *) &objp->friends.friends_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gf1_cli_fsm_log_req (XDR *xdrs, gf1_cli_fsm_log_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_string (xdrs, &objp->name, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gf1_cli_fsm_log_rsp (XDR *xdrs, gf1_cli_fsm_log_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->op_errstr, ~0))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->fsm_log.fsm_log_val, (u_int *) &objp->fsm_log.fsm_log_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gf1_cli_getwd_req (XDR *xdrs, gf1_cli_getwd_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->unused))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gf1_cli_getwd_rsp (XDR *xdrs, gf1_cli_getwd_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->wd, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gf1_cli_mount_req (XDR *xdrs, gf1_cli_mount_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_string (xdrs, &objp->label, ~0))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gf1_cli_mount_rsp (XDR *xdrs, gf1_cli_mount_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->path, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gf1_cli_umount_req (XDR *xdrs, gf1_cli_umount_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->lazy))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->path, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gf1_cli_umount_rsp (XDR *xdrs, gf1_cli_umount_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ return TRUE;
+}
diff --git a/rpc/xdr/src/cli1-xdr.h b/rpc/xdr/src/cli1-xdr.h
new file mode 100644
index 000000000..5e8c29fbb
--- /dev/null
+++ b/rpc/xdr/src/cli1-xdr.h
@@ -0,0 +1,369 @@
+/*
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xdr-common.h"
+#include "compat.h"
+
+#if defined(__GNUC__)
+#if __GNUC__ >= 4
+#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
+#endif
+#endif
+
+/*
+ * Please do not edit this file.
+ * It was generated using rpcgen.
+ */
+
+#ifndef _CLI1_XDR_H_RPCGEN
+#define _CLI1_XDR_H_RPCGEN
+
+#include <rpc/rpc.h>
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+enum gf_cli_defrag_type {
+ GF_DEFRAG_CMD_START = 1,
+ GF_DEFRAG_CMD_STOP = 1 + 1,
+ GF_DEFRAG_CMD_STATUS = 1 + 2,
+ GF_DEFRAG_CMD_START_LAYOUT_FIX = 1 + 3,
+ GF_DEFRAG_CMD_START_FORCE = 1 + 4,
+};
+typedef enum gf_cli_defrag_type gf_cli_defrag_type;
+
+enum gf_defrag_status_t {
+ GF_DEFRAG_STATUS_NOT_STARTED = 0,
+ GF_DEFRAG_STATUS_STARTED = 1,
+ GF_DEFRAG_STATUS_STOPPED = 2,
+ GF_DEFRAG_STATUS_COMPLETE = 3,
+ GF_DEFRAG_STATUS_FAILED = 4,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED = 5,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_STOPPED = 6,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE = 7,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_FAILED = 8,
+};
+typedef enum gf_defrag_status_t gf_defrag_status_t;
+
+enum gf1_cluster_type {
+ GF_CLUSTER_TYPE_NONE = 0,
+ GF_CLUSTER_TYPE_STRIPE = 0 + 1,
+ GF_CLUSTER_TYPE_REPLICATE = 0 + 2,
+ GF_CLUSTER_TYPE_STRIPE_REPLICATE = 0 + 3,
+};
+typedef enum gf1_cluster_type gf1_cluster_type;
+
+enum gf1_cli_replace_op {
+ GF_REPLACE_OP_NONE = 0,
+ GF_REPLACE_OP_START = 0 + 1,
+ GF_REPLACE_OP_COMMIT = 0 + 2,
+ GF_REPLACE_OP_PAUSE = 0 + 3,
+ GF_REPLACE_OP_ABORT = 0 + 4,
+ GF_REPLACE_OP_STATUS = 0 + 5,
+ GF_REPLACE_OP_COMMIT_FORCE = 0 + 6,
+};
+typedef enum gf1_cli_replace_op gf1_cli_replace_op;
+
+enum gf1_op_commands {
+ GF_OP_CMD_NONE = 0,
+ GF_OP_CMD_START = 0 + 1,
+ GF_OP_CMD_COMMIT = 0 + 2,
+ GF_OP_CMD_STOP = 0 + 3,
+ GF_OP_CMD_STATUS = 0 + 4,
+ GF_OP_CMD_COMMIT_FORCE = 0 + 5,
+};
+typedef enum gf1_op_commands gf1_op_commands;
+
+enum gf_quota_type {
+ GF_QUOTA_OPTION_TYPE_NONE = 0,
+ GF_QUOTA_OPTION_TYPE_ENABLE = 0 + 1,
+ GF_QUOTA_OPTION_TYPE_DISABLE = 0 + 2,
+ GF_QUOTA_OPTION_TYPE_LIMIT_USAGE = 0 + 3,
+ GF_QUOTA_OPTION_TYPE_REMOVE = 0 + 4,
+ GF_QUOTA_OPTION_TYPE_LIST = 0 + 5,
+ GF_QUOTA_OPTION_TYPE_VERSION = 0 + 6,
+};
+typedef enum gf_quota_type gf_quota_type;
+
+enum gf1_cli_friends_list {
+ GF_CLI_LIST_PEERS = 1,
+ GF_CLI_LIST_POOL_NODES = 2,
+};
+typedef enum gf1_cli_friends_list gf1_cli_friends_list;
+
+enum gf1_cli_get_volume {
+ GF_CLI_GET_VOLUME_ALL = 1,
+ GF_CLI_GET_VOLUME = 1 + 1,
+ GF_CLI_GET_NEXT_VOLUME = 1 + 2,
+};
+typedef enum gf1_cli_get_volume gf1_cli_get_volume;
+
+enum gf1_cli_sync_volume {
+ GF_CLI_SYNC_ALL = 1,
+};
+typedef enum gf1_cli_sync_volume gf1_cli_sync_volume;
+
+enum gf1_cli_op_flags {
+ GF_CLI_FLAG_OP_FORCE = 1,
+};
+typedef enum gf1_cli_op_flags gf1_cli_op_flags;
+
+enum gf1_cli_gsync_set {
+ GF_GSYNC_OPTION_TYPE_NONE = 0,
+ GF_GSYNC_OPTION_TYPE_START = 1,
+ GF_GSYNC_OPTION_TYPE_STOP = 2,
+ GF_GSYNC_OPTION_TYPE_CONFIG = 3,
+ GF_GSYNC_OPTION_TYPE_STATUS = 4,
+ GF_GSYNC_OPTION_TYPE_ROTATE = 5,
+ GF_GSYNC_OPTION_TYPE_CREATE = 6,
+ GF_GSYNC_OPTION_TYPE_DELETE = 7,
+};
+typedef enum gf1_cli_gsync_set gf1_cli_gsync_set;
+
+enum gf1_cli_stats_op {
+ GF_CLI_STATS_NONE = 0,
+ GF_CLI_STATS_START = 1,
+ GF_CLI_STATS_STOP = 2,
+ GF_CLI_STATS_INFO = 3,
+ GF_CLI_STATS_TOP = 4,
+};
+typedef enum gf1_cli_stats_op gf1_cli_stats_op;
+
+enum gf1_cli_top_op {
+ GF_CLI_TOP_NONE = 0,
+ GF_CLI_TOP_OPEN = 0 + 1,
+ GF_CLI_TOP_READ = 0 + 2,
+ GF_CLI_TOP_WRITE = 0 + 3,
+ GF_CLI_TOP_OPENDIR = 0 + 4,
+ GF_CLI_TOP_READDIR = 0 + 5,
+ GF_CLI_TOP_READ_PERF = 0 + 6,
+ GF_CLI_TOP_WRITE_PERF = 0 + 7,
+};
+typedef enum gf1_cli_top_op gf1_cli_top_op;
+
+enum gf_cli_status_type {
+ GF_CLI_STATUS_NONE = 0x0000,
+ GF_CLI_STATUS_MEM = 0x0001,
+ GF_CLI_STATUS_CLIENTS = 0x0002,
+ GF_CLI_STATUS_INODE = 0x0004,
+ GF_CLI_STATUS_FD = 0x0008,
+ GF_CLI_STATUS_CALLPOOL = 0x0010,
+ GF_CLI_STATUS_DETAIL = 0x0020,
+ GF_CLI_STATUS_TASKS = 0x0040,
+ GF_CLI_STATUS_MASK = 0x00FF,
+ GF_CLI_STATUS_VOL = 0x0100,
+ GF_CLI_STATUS_ALL = 0x0200,
+ GF_CLI_STATUS_BRICK = 0x0400,
+ GF_CLI_STATUS_NFS = 0x0800,
+ GF_CLI_STATUS_SHD = 0x1000,
+};
+typedef enum gf_cli_status_type gf_cli_status_type;
+
+enum gf1_cli_snapshot {
+ GF_SNAP_OPTION_TYPE_NONE = 0,
+ GF_SNAP_OPTION_TYPE_CREATE = 1,
+ GF_SNAP_OPTION_TYPE_DELETE = 2,
+ GF_SNAP_OPTION_TYPE_RESTORE = 3,
+ GF_SNAP_OPTION_TYPE_START = 4,
+ GF_SNAP_OPTION_TYPE_STOP = 5,
+ GF_SNAP_OPTION_TYPE_LIST = 6,
+ GF_SNAP_OPTION_TYPE_STATUS = 7,
+ GF_SNAP_OPTION_TYPE_CONFIG = 8,
+ GF_SNAP_OPTION_TYPE_INFO = 9,
+};
+typedef enum gf1_cli_snapshot gf1_cli_snapshot;
+
+enum gf1_cli_snapshot_info {
+ GF_SNAP_INFO_TYPE_ALL = 0,
+ GF_SNAP_INFO_TYPE_SNAP = 1,
+ GF_SNAP_INFO_TYPE_VOL = 2,
+};
+typedef enum gf1_cli_snapshot_info gf1_cli_snapshot_info;
+
+enum gf1_cli_snapshot_config {
+ GF_SNAP_CONFIG_TYPE_NONE = 0,
+ GF_SNAP_CONFIG_TYPE_SET = 1,
+ GF_SNAP_CONFIG_DISPLAY = 2,
+};
+typedef enum gf1_cli_snapshot_config gf1_cli_snapshot_config;
+
+enum gf1_cli_snapshot_status {
+ GF_SNAP_STATUS_TYPE_ALL = 0,
+ GF_SNAP_STATUS_TYPE_SNAP = 1,
+ GF_SNAP_STATUS_TYPE_VOL = 2,
+};
+typedef enum gf1_cli_snapshot_status gf1_cli_snapshot_status;
+
+struct gf_cli_req {
+ struct {
+ u_int dict_len;
+ char *dict_val;
+ } dict;
+};
+typedef struct gf_cli_req gf_cli_req;
+
+struct gf_cli_rsp {
+ int op_ret;
+ int op_errno;
+ char *op_errstr;
+ struct {
+ u_int dict_len;
+ char *dict_val;
+ } dict;
+};
+typedef struct gf_cli_rsp gf_cli_rsp;
+
+struct gf1_cli_peer_list_req {
+ int flags;
+ struct {
+ u_int dict_len;
+ char *dict_val;
+ } dict;
+};
+typedef struct gf1_cli_peer_list_req gf1_cli_peer_list_req;
+
+struct gf1_cli_peer_list_rsp {
+ int op_ret;
+ int op_errno;
+ struct {
+ u_int friends_len;
+ char *friends_val;
+ } friends;
+};
+typedef struct gf1_cli_peer_list_rsp gf1_cli_peer_list_rsp;
+
+struct gf1_cli_fsm_log_req {
+ char *name;
+};
+typedef struct gf1_cli_fsm_log_req gf1_cli_fsm_log_req;
+
+struct gf1_cli_fsm_log_rsp {
+ int op_ret;
+ int op_errno;
+ char *op_errstr;
+ struct {
+ u_int fsm_log_len;
+ char *fsm_log_val;
+ } fsm_log;
+};
+typedef struct gf1_cli_fsm_log_rsp gf1_cli_fsm_log_rsp;
+
+struct gf1_cli_getwd_req {
+ int unused;
+};
+typedef struct gf1_cli_getwd_req gf1_cli_getwd_req;
+
+struct gf1_cli_getwd_rsp {
+ int op_ret;
+ int op_errno;
+ char *wd;
+};
+typedef struct gf1_cli_getwd_rsp gf1_cli_getwd_rsp;
+
+struct gf1_cli_mount_req {
+ char *label;
+ struct {
+ u_int dict_len;
+ char *dict_val;
+ } dict;
+};
+typedef struct gf1_cli_mount_req gf1_cli_mount_req;
+
+struct gf1_cli_mount_rsp {
+ int op_ret;
+ int op_errno;
+ char *path;
+};
+typedef struct gf1_cli_mount_rsp gf1_cli_mount_rsp;
+
+struct gf1_cli_umount_req {
+ int lazy;
+ char *path;
+};
+typedef struct gf1_cli_umount_req gf1_cli_umount_req;
+
+struct gf1_cli_umount_rsp {
+ int op_ret;
+ int op_errno;
+};
+typedef struct gf1_cli_umount_rsp gf1_cli_umount_rsp;
+
+/* the xdr functions */
+
+#if defined(__STDC__) || defined(__cplusplus)
+extern bool_t xdr_gf_cli_defrag_type (XDR *, gf_cli_defrag_type*);
+extern bool_t xdr_gf_defrag_status_t (XDR *, gf_defrag_status_t*);
+extern bool_t xdr_gf1_cluster_type (XDR *, gf1_cluster_type*);
+extern bool_t xdr_gf1_cli_replace_op (XDR *, gf1_cli_replace_op*);
+extern bool_t xdr_gf1_op_commands (XDR *, gf1_op_commands*);
+extern bool_t xdr_gf_quota_type (XDR *, gf_quota_type*);
+extern bool_t xdr_gf1_cli_friends_list (XDR *, gf1_cli_friends_list*);
+extern bool_t xdr_gf1_cli_get_volume (XDR *, gf1_cli_get_volume*);
+extern bool_t xdr_gf1_cli_sync_volume (XDR *, gf1_cli_sync_volume*);
+extern bool_t xdr_gf1_cli_op_flags (XDR *, gf1_cli_op_flags*);
+extern bool_t xdr_gf1_cli_gsync_set (XDR *, gf1_cli_gsync_set*);
+extern bool_t xdr_gf1_cli_stats_op (XDR *, gf1_cli_stats_op*);
+extern bool_t xdr_gf1_cli_top_op (XDR *, gf1_cli_top_op*);
+extern bool_t xdr_gf_cli_status_type (XDR *, gf_cli_status_type*);
+extern bool_t xdr_gf1_cli_snapshot (XDR *, gf1_cli_snapshot*);
+extern bool_t xdr_gf1_cli_snapshot_config (XDR *, gf1_cli_snapshot_config*);
+extern bool_t xdr_gf_cli_req (XDR *, gf_cli_req*);
+extern bool_t xdr_gf_cli_rsp (XDR *, gf_cli_rsp*);
+extern bool_t xdr_gf1_cli_peer_list_req (XDR *, gf1_cli_peer_list_req*);
+extern bool_t xdr_gf1_cli_peer_list_rsp (XDR *, gf1_cli_peer_list_rsp*);
+extern bool_t xdr_gf1_cli_fsm_log_req (XDR *, gf1_cli_fsm_log_req*);
+extern bool_t xdr_gf1_cli_fsm_log_rsp (XDR *, gf1_cli_fsm_log_rsp*);
+extern bool_t xdr_gf1_cli_getwd_req (XDR *, gf1_cli_getwd_req*);
+extern bool_t xdr_gf1_cli_getwd_rsp (XDR *, gf1_cli_getwd_rsp*);
+extern bool_t xdr_gf1_cli_mount_req (XDR *, gf1_cli_mount_req*);
+extern bool_t xdr_gf1_cli_mount_rsp (XDR *, gf1_cli_mount_rsp*);
+extern bool_t xdr_gf1_cli_umount_req (XDR *, gf1_cli_umount_req*);
+extern bool_t xdr_gf1_cli_umount_rsp (XDR *, gf1_cli_umount_rsp*);
+
+#else /* K&R C */
+extern bool_t xdr_gf_cli_defrag_type ();
+extern bool_t xdr_gf_defrag_status_t ();
+extern bool_t xdr_gf1_cluster_type ();
+extern bool_t xdr_gf1_cli_replace_op ();
+extern bool_t xdr_gf1_op_commands ();
+extern bool_t xdr_gf_quota_type ();
+extern bool_t xdr_gf1_cli_friends_list ();
+extern bool_t xdr_gf1_cli_get_volume ();
+extern bool_t xdr_gf1_cli_sync_volume ();
+extern bool_t xdr_gf1_cli_op_flags ();
+extern bool_t xdr_gf1_cli_gsync_set ();
+extern bool_t xdr_gf1_cli_stats_op ();
+extern bool_t xdr_gf1_cli_top_op ();
+extern bool_t xdr_gf_cli_status_type ();
+extern bool_t xdr_gf1_cli_snapshot ();
+extern bool_t xdr_gf1_cli_snapshot_config ();
+extern bool_t xdr_gf_cli_req ();
+extern bool_t xdr_gf_cli_rsp ();
+extern bool_t xdr_gf1_cli_peer_list_req ();
+extern bool_t xdr_gf1_cli_peer_list_rsp ();
+extern bool_t xdr_gf1_cli_fsm_log_req ();
+extern bool_t xdr_gf1_cli_fsm_log_rsp ();
+extern bool_t xdr_gf1_cli_getwd_req ();
+extern bool_t xdr_gf1_cli_getwd_rsp ();
+extern bool_t xdr_gf1_cli_mount_req ();
+extern bool_t xdr_gf1_cli_mount_rsp ();
+extern bool_t xdr_gf1_cli_umount_req ();
+extern bool_t xdr_gf1_cli_umount_rsp ();
+
+#endif /* K&R C */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* !_CLI1_XDR_H_RPCGEN */
diff --git a/rpc/xdr/src/cli1-xdr.x b/rpc/xdr/src/cli1-xdr.x
new file mode 100644
index 000000000..f9d29b7e1
--- /dev/null
+++ b/rpc/xdr/src/cli1-xdr.x
@@ -0,0 +1,207 @@
+ enum gf_cli_defrag_type {
+ GF_DEFRAG_CMD_START = 1,
+ GF_DEFRAG_CMD_STOP,
+ GF_DEFRAG_CMD_STATUS,
+ GF_DEFRAG_CMD_START_LAYOUT_FIX,
+ GF_DEFRAG_CMD_START_FORCE /* used by remove-brick data migration */
+} ;
+
+ enum gf_defrag_status_t {
+ GF_DEFRAG_STATUS_NOT_STARTED,
+ GF_DEFRAG_STATUS_STARTED,
+ GF_DEFRAG_STATUS_STOPPED,
+ GF_DEFRAG_STATUS_COMPLETE,
+ GF_DEFRAG_STATUS_FAILED,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_STOPPED,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_FAILED
+} ;
+
+ enum gf1_cluster_type {
+ GF_CLUSTER_TYPE_NONE = 0,
+ GF_CLUSTER_TYPE_STRIPE,
+ GF_CLUSTER_TYPE_REPLICATE,
+ GF_CLUSTER_TYPE_STRIPE_REPLICATE
+} ;
+
+ enum gf1_cli_replace_op {
+ GF_REPLACE_OP_NONE = 0,
+ GF_REPLACE_OP_START,
+ GF_REPLACE_OP_COMMIT,
+ GF_REPLACE_OP_PAUSE,
+ GF_REPLACE_OP_ABORT,
+ GF_REPLACE_OP_STATUS,
+ GF_REPLACE_OP_COMMIT_FORCE
+} ;
+
+ enum gf1_op_commands {
+ GF_OP_CMD_NONE = 0,
+ GF_OP_CMD_START,
+ GF_OP_CMD_COMMIT,
+ GF_OP_CMD_STOP,
+ GF_OP_CMD_STATUS,
+ GF_OP_CMD_COMMIT_FORCE
+} ;
+
+enum gf_quota_type {
+ GF_QUOTA_OPTION_TYPE_NONE = 0,
+ GF_QUOTA_OPTION_TYPE_ENABLE,
+ GF_QUOTA_OPTION_TYPE_DISABLE,
+ GF_QUOTA_OPTION_TYPE_LIMIT_USAGE,
+ GF_QUOTA_OPTION_TYPE_REMOVE,
+ GF_QUOTA_OPTION_TYPE_LIST,
+ GF_QUOTA_OPTION_TYPE_VERSION
+};
+
+enum gf1_cli_friends_list {
+ GF_CLI_LIST_PEERS = 1,
+ GF_CLI_LIST_POOL_NODES = 2
+} ;
+
+enum gf1_cli_get_volume {
+ GF_CLI_GET_VOLUME_ALL = 1,
+ GF_CLI_GET_VOLUME,
+ GF_CLI_GET_NEXT_VOLUME
+} ;
+
+enum gf1_cli_sync_volume {
+ GF_CLI_SYNC_ALL = 1
+} ;
+
+enum gf1_cli_op_flags {
+ GF_CLI_FLAG_OP_FORCE = 1
+};
+
+enum gf1_cli_gsync_set {
+ GF_GSYNC_OPTION_TYPE_NONE,
+ GF_GSYNC_OPTION_TYPE_START,
+ GF_GSYNC_OPTION_TYPE_STOP,
+ GF_GSYNC_OPTION_TYPE_CONFIG,
+ GF_GSYNC_OPTION_TYPE_STATUS,
+ GF_GSYNC_OPTION_TYPE_ROTATE,
+ GF_GSYNC_OPTION_TYPE_CREATE,
+ GF_GSYNC_OPTION_TYPE_DELETE
+};
+
+enum gf1_cli_stats_op {
+ GF_CLI_STATS_NONE = 0,
+ GF_CLI_STATS_START = 1,
+ GF_CLI_STATS_STOP = 2,
+ GF_CLI_STATS_INFO = 3,
+ GF_CLI_STATS_TOP = 4
+};
+
+enum gf1_cli_top_op {
+ GF_CLI_TOP_NONE = 0,
+ GF_CLI_TOP_OPEN,
+ GF_CLI_TOP_READ,
+ GF_CLI_TOP_WRITE,
+ GF_CLI_TOP_OPENDIR,
+ GF_CLI_TOP_READDIR,
+ GF_CLI_TOP_READ_PERF,
+ GF_CLI_TOP_WRITE_PERF
+};
+
+/* The unconventional hex numbers help us perform
+ bit-wise operations which reduces complexity */
+enum gf_cli_status_type {
+ GF_CLI_STATUS_NONE = 0x0000,
+ GF_CLI_STATUS_MEM = 0x0001, /*0000000000001*/
+ GF_CLI_STATUS_CLIENTS = 0x0002, /*0000000000010*/
+ GF_CLI_STATUS_INODE = 0x0004, /*0000000000100*/
+ GF_CLI_STATUS_FD = 0x0008, /*0000000001000*/
+ GF_CLI_STATUS_CALLPOOL = 0x0010, /*0000000010000*/
+ GF_CLI_STATUS_DETAIL = 0x0020, /*0000000100000*/
+ GF_CLI_STATUS_TASKS = 0x0040, /*0000001000000*/
+ GF_CLI_STATUS_MASK = 0x00FF, /*0000011111111 Used to get the op*/
+ GF_CLI_STATUS_VOL = 0x0100, /*0000100000000*/
+ GF_CLI_STATUS_ALL = 0x0200, /*0001000000000*/
+ GF_CLI_STATUS_BRICK = 0x0400, /*0010000000000*/
+ GF_CLI_STATUS_NFS = 0x0800, /*0100000000000*/
+ GF_CLI_STATUS_SHD = 0x1000 /*1000000000000*/
+};
+
+/* Identifiers for snapshot clis */
+enum gf1_cli_snapshot {
+ GF_SNAP_OPTION_TYPE_NONE = 0,
+ GF_SNAP_OPTION_TYPE_CREATE,
+ GF_SNAP_OPTION_TYPE_DELETE,
+ GF_SNAP_OPTION_TYPE_RESTORE,
+ GF_SNAP_OPTION_TYPE_START,
+ GF_SNAP_OPTION_TYPE_STOP,
+ GF_SNAP_OPTION_TYPE_LIST,
+ GF_SNAP_OPTION_TYPE_STATUS,
+ GF_SNAP_OPTION_TYPE_CONFIG
+};
+
+enum gf1_cli_snapshot_config {
+ GF_SNAP_CONFIG_TYPE_NONE = 0,
+ GF_SNAP_CONFIG_TYPE_SET,
+ GF_SNAP_CONFIG_DISPLAY,
+
+};
+
+ struct gf_cli_req {
+ opaque dict<>;
+} ;
+
+ struct gf_cli_rsp {
+ int op_ret;
+ int op_errno;
+ string op_errstr<>;
+ opaque dict<>;
+} ;
+
+struct gf1_cli_peer_list_req {
+ int flags;
+ opaque dict<>;
+} ;
+
+struct gf1_cli_peer_list_rsp {
+ int op_ret;
+ int op_errno;
+ opaque friends<>;
+} ;
+
+struct gf1_cli_fsm_log_req {
+ string name<>;
+};
+
+struct gf1_cli_fsm_log_rsp {
+ int op_ret;
+ int op_errno;
+ string op_errstr<>;
+ opaque fsm_log<>;
+};
+
+struct gf1_cli_getwd_req {
+ int unused;
+} ;
+
+struct gf1_cli_getwd_rsp {
+ int op_ret;
+ int op_errno;
+ string wd<>;
+};
+
+struct gf1_cli_mount_req {
+ string label<>;
+ opaque dict<>;
+};
+
+struct gf1_cli_mount_rsp {
+ int op_ret;
+ int op_errno;
+ string path<>;
+};
+
+struct gf1_cli_umount_req {
+ int lazy;
+ string path<>;
+};
+
+struct gf1_cli_umount_rsp {
+ int op_ret;
+ int op_errno;
+};
diff --git a/rpc/xdr/src/glusterd1-xdr.c b/rpc/xdr/src/glusterd1-xdr.c
new file mode 100644
index 000000000..7fa98aaeb
--- /dev/null
+++ b/rpc/xdr/src/glusterd1-xdr.c
@@ -0,0 +1,923 @@
+/*
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xdr-common.h"
+#include "compat.h"
+
+#if defined(__GNUC__)
+#if __GNUC__ >= 4
+#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
+#endif
+#endif
+
+/*
+ * Please do not edit this file.
+ * It was generated using rpcgen.
+ */
+
+#include "glusterd1-xdr.h"
+
+bool_t
+xdr_glusterd_volume_status (XDR *xdrs, glusterd_volume_status *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_enum (xdrs, (enum_t *) objp))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gd1_mgmt_probe_req (XDR *xdrs, gd1_mgmt_probe_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
+ sizeof (u_char), (xdrproc_t) xdr_u_char))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->hostname, ~0))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->port))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gd1_mgmt_probe_rsp (XDR *xdrs, gd1_mgmt_probe_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+
+ if (xdrs->x_op == XDR_ENCODE) {
+ if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
+ sizeof (u_char), (xdrproc_t) xdr_u_char))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->hostname, ~0))
+ return FALSE;
+ buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
+ if (buf == NULL) {
+ if (!xdr_int (xdrs, &objp->port))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+
+ } else {
+ IXDR_PUT_LONG(buf, objp->port);
+ IXDR_PUT_LONG(buf, objp->op_ret);
+ IXDR_PUT_LONG(buf, objp->op_errno);
+ }
+ if (!xdr_string (xdrs, &objp->op_errstr, ~0))
+ return FALSE;
+ return TRUE;
+ } else if (xdrs->x_op == XDR_DECODE) {
+ if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
+ sizeof (u_char), (xdrproc_t) xdr_u_char))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->hostname, ~0))
+ return FALSE;
+ buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
+ if (buf == NULL) {
+ if (!xdr_int (xdrs, &objp->port))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+
+ } else {
+ objp->port = IXDR_GET_LONG(buf);
+ objp->op_ret = IXDR_GET_LONG(buf);
+ objp->op_errno = IXDR_GET_LONG(buf);
+ }
+ if (!xdr_string (xdrs, &objp->op_errstr, ~0))
+ return FALSE;
+ return TRUE;
+ }
+
+ if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
+ sizeof (u_char), (xdrproc_t) xdr_u_char))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->hostname, ~0))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->port))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->op_errstr, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gd1_mgmt_friend_req (XDR *xdrs, gd1_mgmt_friend_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
+ sizeof (u_char), (xdrproc_t) xdr_u_char))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->hostname, ~0))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->port))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->vols.vols_val, (u_int *) &objp->vols.vols_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gd1_mgmt_friend_rsp (XDR *xdrs, gd1_mgmt_friend_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
+ sizeof (u_char), (xdrproc_t) xdr_u_char))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->hostname, ~0))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->port))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gd1_mgmt_unfriend_req (XDR *xdrs, gd1_mgmt_unfriend_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
+ sizeof (u_char), (xdrproc_t) xdr_u_char))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->hostname, ~0))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->port))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gd1_mgmt_unfriend_rsp (XDR *xdrs, gd1_mgmt_unfriend_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
+ sizeof (u_char), (xdrproc_t) xdr_u_char))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->hostname, ~0))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->port))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gd1_mgmt_cluster_lock_req (XDR *xdrs, gd1_mgmt_cluster_lock_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
+ sizeof (u_char), (xdrproc_t) xdr_u_char))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gd1_mgmt_cluster_lock_rsp (XDR *xdrs, gd1_mgmt_cluster_lock_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
+ sizeof (u_char), (xdrproc_t) xdr_u_char))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gd1_mgmt_cluster_unlock_req (XDR *xdrs, gd1_mgmt_cluster_unlock_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
+ sizeof (u_char), (xdrproc_t) xdr_u_char))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gd1_mgmt_cluster_unlock_rsp (XDR *xdrs, gd1_mgmt_cluster_unlock_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
+ sizeof (u_char), (xdrproc_t) xdr_u_char))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gd1_mgmt_stage_op_req (XDR *xdrs, gd1_mgmt_stage_op_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
+ sizeof (u_char), (xdrproc_t) xdr_u_char))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->buf.buf_val, (u_int *) &objp->buf.buf_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gd1_mgmt_stage_op_rsp (XDR *xdrs, gd1_mgmt_stage_op_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+
+ if (xdrs->x_op == XDR_ENCODE) {
+ if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
+ sizeof (u_char), (xdrproc_t) xdr_u_char))
+ return FALSE;
+ buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
+ if (buf == NULL) {
+ if (!xdr_int (xdrs, &objp->op))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+
+ } else {
+ IXDR_PUT_LONG(buf, objp->op);
+ IXDR_PUT_LONG(buf, objp->op_ret);
+ IXDR_PUT_LONG(buf, objp->op_errno);
+ }
+ if (!xdr_string (xdrs, &objp->op_errstr, ~0))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
+ return FALSE;
+ return TRUE;
+ } else if (xdrs->x_op == XDR_DECODE) {
+ if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
+ sizeof (u_char), (xdrproc_t) xdr_u_char))
+ return FALSE;
+ buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
+ if (buf == NULL) {
+ if (!xdr_int (xdrs, &objp->op))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+
+ } else {
+ objp->op = IXDR_GET_LONG(buf);
+ objp->op_ret = IXDR_GET_LONG(buf);
+ objp->op_errno = IXDR_GET_LONG(buf);
+ }
+ if (!xdr_string (xdrs, &objp->op_errstr, ~0))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
+ return FALSE;
+ return TRUE;
+ }
+
+ if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
+ sizeof (u_char), (xdrproc_t) xdr_u_char))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->op_errstr, ~0))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gd1_mgmt_commit_op_req (XDR *xdrs, gd1_mgmt_commit_op_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
+ sizeof (u_char), (xdrproc_t) xdr_u_char))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->buf.buf_val, (u_int *) &objp->buf.buf_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gd1_mgmt_commit_op_rsp (XDR *xdrs, gd1_mgmt_commit_op_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+
+ if (xdrs->x_op == XDR_ENCODE) {
+ if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
+ sizeof (u_char), (xdrproc_t) xdr_u_char))
+ return FALSE;
+ buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
+ if (buf == NULL) {
+ if (!xdr_int (xdrs, &objp->op))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+
+ } else {
+ IXDR_PUT_LONG(buf, objp->op);
+ IXDR_PUT_LONG(buf, objp->op_ret);
+ IXDR_PUT_LONG(buf, objp->op_errno);
+ }
+ if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->op_errstr, ~0))
+ return FALSE;
+ return TRUE;
+ } else if (xdrs->x_op == XDR_DECODE) {
+ if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
+ sizeof (u_char), (xdrproc_t) xdr_u_char))
+ return FALSE;
+ buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
+ if (buf == NULL) {
+ if (!xdr_int (xdrs, &objp->op))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+
+ } else {
+ objp->op = IXDR_GET_LONG(buf);
+ objp->op_ret = IXDR_GET_LONG(buf);
+ objp->op_errno = IXDR_GET_LONG(buf);
+ }
+ if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->op_errstr, ~0))
+ return FALSE;
+ return TRUE;
+ }
+
+ if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
+ sizeof (u_char), (xdrproc_t) xdr_u_char))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->op_errstr, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gd1_mgmt_friend_update (XDR *xdrs, gd1_mgmt_friend_update *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
+ sizeof (u_char), (xdrproc_t) xdr_u_char))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->friends.friends_val, (u_int *) &objp->friends.friends_len, ~0))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->port))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gd1_mgmt_friend_update_rsp (XDR *xdrs, gd1_mgmt_friend_update_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
+ sizeof (u_char), (xdrproc_t) xdr_u_char))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gd1_mgmt_brick_op_req (XDR *xdrs, gd1_mgmt_brick_op_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_string (xdrs, &objp->name, ~0))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->input.input_val, (u_int *) &objp->input.input_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gd1_mgmt_brick_op_rsp (XDR *xdrs, gd1_mgmt_brick_op_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->output.output_val, (u_int *) &objp->output.output_len, ~0))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->op_errstr, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gd1_mgmt_v3_lock_req (XDR *xdrs, gd1_mgmt_v3_lock_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
+ sizeof (u_char), (xdrproc_t) xdr_u_char))
+ return FALSE;
+ if (!xdr_vector (xdrs, (char *)objp->txn_id, 16,
+ sizeof (u_char), (xdrproc_t) xdr_u_char))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gd1_mgmt_v3_lock_rsp (XDR *xdrs, gd1_mgmt_v3_lock_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
+ sizeof (u_char), (xdrproc_t) xdr_u_char))
+ return FALSE;
+ if (!xdr_vector (xdrs, (char *)objp->txn_id, 16,
+ sizeof (u_char), (xdrproc_t) xdr_u_char))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gd1_mgmt_v3_pre_val_req (XDR *xdrs, gd1_mgmt_v3_pre_val_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
+ sizeof (u_char), (xdrproc_t) xdr_u_char))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gd1_mgmt_v3_pre_val_rsp (XDR *xdrs, gd1_mgmt_v3_pre_val_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+
+ if (xdrs->x_op == XDR_ENCODE) {
+ if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
+ sizeof (u_char), (xdrproc_t) xdr_u_char))
+ return FALSE;
+ buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
+ if (buf == NULL) {
+ if (!xdr_int (xdrs, &objp->op))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+
+ } else {
+ IXDR_PUT_LONG(buf, objp->op);
+ IXDR_PUT_LONG(buf, objp->op_ret);
+ IXDR_PUT_LONG(buf, objp->op_errno);
+ }
+ if (!xdr_string (xdrs, &objp->op_errstr, ~0))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
+ return FALSE;
+ return TRUE;
+ } else if (xdrs->x_op == XDR_DECODE) {
+ if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
+ sizeof (u_char), (xdrproc_t) xdr_u_char))
+ return FALSE;
+ buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
+ if (buf == NULL) {
+ if (!xdr_int (xdrs, &objp->op))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+
+ } else {
+ objp->op = IXDR_GET_LONG(buf);
+ objp->op_ret = IXDR_GET_LONG(buf);
+ objp->op_errno = IXDR_GET_LONG(buf);
+ }
+ if (!xdr_string (xdrs, &objp->op_errstr, ~0))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
+ return FALSE;
+ return TRUE;
+ }
+
+ if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
+ sizeof (u_char), (xdrproc_t) xdr_u_char))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->op_errstr, ~0))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gd1_mgmt_v3_brick_op_req (XDR *xdrs, gd1_mgmt_v3_brick_op_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
+ sizeof (u_char), (xdrproc_t) xdr_u_char))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gd1_mgmt_v3_brick_op_rsp (XDR *xdrs, gd1_mgmt_v3_brick_op_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+
+ if (xdrs->x_op == XDR_ENCODE) {
+ if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
+ sizeof (u_char), (xdrproc_t) xdr_u_char))
+ return FALSE;
+ buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
+ if (buf == NULL) {
+ if (!xdr_int (xdrs, &objp->op))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+
+ } else {
+ IXDR_PUT_LONG(buf, objp->op);
+ IXDR_PUT_LONG(buf, objp->op_ret);
+ IXDR_PUT_LONG(buf, objp->op_errno);
+ }
+ if (!xdr_string (xdrs, &objp->op_errstr, ~0))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
+ return FALSE;
+ return TRUE;
+ } else if (xdrs->x_op == XDR_DECODE) {
+ if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
+ sizeof (u_char), (xdrproc_t) xdr_u_char))
+ return FALSE;
+ buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
+ if (buf == NULL) {
+ if (!xdr_int (xdrs, &objp->op))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+
+ } else {
+ objp->op = IXDR_GET_LONG(buf);
+ objp->op_ret = IXDR_GET_LONG(buf);
+ objp->op_errno = IXDR_GET_LONG(buf);
+ }
+ if (!xdr_string (xdrs, &objp->op_errstr, ~0))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
+ return FALSE;
+ return TRUE;
+ }
+
+ if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
+ sizeof (u_char), (xdrproc_t) xdr_u_char))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->op_errstr, ~0))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gd1_mgmt_v3_commit_req (XDR *xdrs, gd1_mgmt_v3_commit_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
+ sizeof (u_char), (xdrproc_t) xdr_u_char))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gd1_mgmt_v3_commit_rsp (XDR *xdrs, gd1_mgmt_v3_commit_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+
+ if (xdrs->x_op == XDR_ENCODE) {
+ if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
+ sizeof (u_char), (xdrproc_t) xdr_u_char))
+ return FALSE;
+ buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
+ if (buf == NULL) {
+ if (!xdr_int (xdrs, &objp->op))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+
+ } else {
+ IXDR_PUT_LONG(buf, objp->op);
+ IXDR_PUT_LONG(buf, objp->op_ret);
+ IXDR_PUT_LONG(buf, objp->op_errno);
+ }
+ if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->op_errstr, ~0))
+ return FALSE;
+ return TRUE;
+ } else if (xdrs->x_op == XDR_DECODE) {
+ if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
+ sizeof (u_char), (xdrproc_t) xdr_u_char))
+ return FALSE;
+ buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
+ if (buf == NULL) {
+ if (!xdr_int (xdrs, &objp->op))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+
+ } else {
+ objp->op = IXDR_GET_LONG(buf);
+ objp->op_ret = IXDR_GET_LONG(buf);
+ objp->op_errno = IXDR_GET_LONG(buf);
+ }
+ if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->op_errstr, ~0))
+ return FALSE;
+ return TRUE;
+ }
+
+ if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
+ sizeof (u_char), (xdrproc_t) xdr_u_char))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->op_errstr, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gd1_mgmt_v3_post_val_req (XDR *xdrs, gd1_mgmt_v3_post_val_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
+ sizeof (u_char), (xdrproc_t) xdr_u_char))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gd1_mgmt_v3_post_val_rsp (XDR *xdrs, gd1_mgmt_v3_post_val_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+
+ if (xdrs->x_op == XDR_ENCODE) {
+ if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
+ sizeof (u_char), (xdrproc_t) xdr_u_char))
+ return FALSE;
+ buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
+ if (buf == NULL) {
+ if (!xdr_int (xdrs, &objp->op))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+
+ } else {
+ IXDR_PUT_LONG(buf, objp->op);
+ IXDR_PUT_LONG(buf, objp->op_ret);
+ IXDR_PUT_LONG(buf, objp->op_errno);
+ }
+ if (!xdr_string (xdrs, &objp->op_errstr, ~0))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
+ return FALSE;
+ return TRUE;
+ } else if (xdrs->x_op == XDR_DECODE) {
+ if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
+ sizeof (u_char), (xdrproc_t) xdr_u_char))
+ return FALSE;
+ buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
+ if (buf == NULL) {
+ if (!xdr_int (xdrs, &objp->op))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+
+ } else {
+ objp->op = IXDR_GET_LONG(buf);
+ objp->op_ret = IXDR_GET_LONG(buf);
+ objp->op_errno = IXDR_GET_LONG(buf);
+ }
+ if (!xdr_string (xdrs, &objp->op_errstr, ~0))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
+ return FALSE;
+ return TRUE;
+ }
+
+ if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
+ sizeof (u_char), (xdrproc_t) xdr_u_char))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->op_errstr, ~0))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gd1_mgmt_v3_unlock_req (XDR *xdrs, gd1_mgmt_v3_unlock_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
+ sizeof (u_char), (xdrproc_t) xdr_u_char))
+ return FALSE;
+ if (!xdr_vector (xdrs, (char *)objp->txn_id, 16,
+ sizeof (u_char), (xdrproc_t) xdr_u_char))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gd1_mgmt_v3_unlock_rsp (XDR *xdrs, gd1_mgmt_v3_unlock_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
+ sizeof (u_char), (xdrproc_t) xdr_u_char))
+ return FALSE;
+ if (!xdr_vector (xdrs, (char *)objp->txn_id, 16,
+ sizeof (u_char), (xdrproc_t) xdr_u_char))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ return TRUE;
+}
diff --git a/rpc/xdr/src/glusterd1-xdr.h b/rpc/xdr/src/glusterd1-xdr.h
new file mode 100644
index 000000000..b6be23d06
--- /dev/null
+++ b/rpc/xdr/src/glusterd1-xdr.h
@@ -0,0 +1,418 @@
+/*
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xdr-common.h"
+#include "compat.h"
+
+#if defined(__GNUC__)
+#if __GNUC__ >= 4
+#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
+#endif
+#endif
+
+/*
+ * Please do not edit this file.
+ * It was generated using rpcgen.
+ */
+
+#ifndef _GLUSTERD1_XDR_H_RPCGEN
+#define _GLUSTERD1_XDR_H_RPCGEN
+
+#include <rpc/rpc.h>
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+enum glusterd_volume_status {
+ GLUSTERD_STATUS_NONE = 0,
+ GLUSTERD_STATUS_STARTED = 0 + 1,
+ GLUSTERD_STATUS_STOPPED = 0 + 2,
+};
+typedef enum glusterd_volume_status glusterd_volume_status;
+
+struct gd1_mgmt_probe_req {
+ u_char uuid[16];
+ char *hostname;
+ int port;
+};
+typedef struct gd1_mgmt_probe_req gd1_mgmt_probe_req;
+
+struct gd1_mgmt_probe_rsp {
+ u_char uuid[16];
+ char *hostname;
+ int port;
+ int op_ret;
+ int op_errno;
+ char *op_errstr;
+};
+typedef struct gd1_mgmt_probe_rsp gd1_mgmt_probe_rsp;
+
+struct gd1_mgmt_friend_req {
+ u_char uuid[16];
+ char *hostname;
+ int port;
+ struct {
+ u_int vols_len;
+ char *vols_val;
+ } vols;
+};
+typedef struct gd1_mgmt_friend_req gd1_mgmt_friend_req;
+
+struct gd1_mgmt_friend_rsp {
+ u_char uuid[16];
+ char *hostname;
+ int op_ret;
+ int op_errno;
+ int port;
+};
+typedef struct gd1_mgmt_friend_rsp gd1_mgmt_friend_rsp;
+
+struct gd1_mgmt_unfriend_req {
+ u_char uuid[16];
+ char *hostname;
+ int port;
+};
+typedef struct gd1_mgmt_unfriend_req gd1_mgmt_unfriend_req;
+
+struct gd1_mgmt_unfriend_rsp {
+ u_char uuid[16];
+ char *hostname;
+ int op_ret;
+ int op_errno;
+ int port;
+};
+typedef struct gd1_mgmt_unfriend_rsp gd1_mgmt_unfriend_rsp;
+
+struct gd1_mgmt_cluster_lock_req {
+ u_char uuid[16];
+};
+typedef struct gd1_mgmt_cluster_lock_req gd1_mgmt_cluster_lock_req;
+
+struct gd1_mgmt_cluster_lock_rsp {
+ u_char uuid[16];
+ int op_ret;
+ int op_errno;
+};
+typedef struct gd1_mgmt_cluster_lock_rsp gd1_mgmt_cluster_lock_rsp;
+
+struct gd1_mgmt_cluster_unlock_req {
+ u_char uuid[16];
+};
+typedef struct gd1_mgmt_cluster_unlock_req gd1_mgmt_cluster_unlock_req;
+
+struct gd1_mgmt_cluster_unlock_rsp {
+ u_char uuid[16];
+ int op_ret;
+ int op_errno;
+};
+typedef struct gd1_mgmt_cluster_unlock_rsp gd1_mgmt_cluster_unlock_rsp;
+
+struct gd1_mgmt_stage_op_req {
+ u_char uuid[16];
+ int op;
+ struct {
+ u_int buf_len;
+ char *buf_val;
+ } buf;
+};
+typedef struct gd1_mgmt_stage_op_req gd1_mgmt_stage_op_req;
+
+struct gd1_mgmt_stage_op_rsp {
+ u_char uuid[16];
+ int op;
+ int op_ret;
+ int op_errno;
+ char *op_errstr;
+ struct {
+ u_int dict_len;
+ char *dict_val;
+ } dict;
+};
+typedef struct gd1_mgmt_stage_op_rsp gd1_mgmt_stage_op_rsp;
+
+struct gd1_mgmt_commit_op_req {
+ u_char uuid[16];
+ int op;
+ struct {
+ u_int buf_len;
+ char *buf_val;
+ } buf;
+};
+typedef struct gd1_mgmt_commit_op_req gd1_mgmt_commit_op_req;
+
+struct gd1_mgmt_commit_op_rsp {
+ u_char uuid[16];
+ int op;
+ int op_ret;
+ int op_errno;
+ struct {
+ u_int dict_len;
+ char *dict_val;
+ } dict;
+ char *op_errstr;
+};
+typedef struct gd1_mgmt_commit_op_rsp gd1_mgmt_commit_op_rsp;
+
+struct gd1_mgmt_friend_update {
+ u_char uuid[16];
+ struct {
+ u_int friends_len;
+ char *friends_val;
+ } friends;
+ int port;
+};
+typedef struct gd1_mgmt_friend_update gd1_mgmt_friend_update;
+
+struct gd1_mgmt_friend_update_rsp {
+ u_char uuid[16];
+ int op;
+ int op_ret;
+ int op_errno;
+};
+typedef struct gd1_mgmt_friend_update_rsp gd1_mgmt_friend_update_rsp;
+
+struct gd1_mgmt_brick_op_req {
+ char *name;
+ int op;
+ struct {
+ u_int input_len;
+ char *input_val;
+ } input;
+};
+typedef struct gd1_mgmt_brick_op_req gd1_mgmt_brick_op_req;
+
+struct gd1_mgmt_brick_op_rsp {
+ int op_ret;
+ int op_errno;
+ struct {
+ u_int output_len;
+ char *output_val;
+ } output;
+ char *op_errstr;
+};
+typedef struct gd1_mgmt_brick_op_rsp gd1_mgmt_brick_op_rsp;
+
+struct gd1_mgmt_v3_lock_req {
+ u_char uuid[16];
+ u_char txn_id[16];
+ int op;
+ struct {
+ u_int dict_len;
+ char *dict_val;
+ } dict;
+};
+typedef struct gd1_mgmt_v3_lock_req gd1_mgmt_v3_lock_req;
+
+struct gd1_mgmt_v3_lock_rsp {
+ u_char uuid[16];
+ u_char txn_id[16];
+ struct {
+ u_int dict_len;
+ char *dict_val;
+ } dict;
+ int op_ret;
+ int op_errno;
+};
+typedef struct gd1_mgmt_v3_lock_rsp gd1_mgmt_v3_lock_rsp;
+
+struct gd1_mgmt_v3_pre_val_req {
+ u_char uuid[16];
+ int op;
+ struct {
+ u_int dict_len;
+ char *dict_val;
+ } dict;
+};
+typedef struct gd1_mgmt_v3_pre_val_req gd1_mgmt_v3_pre_val_req;
+
+struct gd1_mgmt_v3_pre_val_rsp {
+ u_char uuid[16];
+ int op;
+ int op_ret;
+ int op_errno;
+ char *op_errstr;
+ struct {
+ u_int dict_len;
+ char *dict_val;
+ } dict;
+};
+typedef struct gd1_mgmt_v3_pre_val_rsp gd1_mgmt_v3_pre_val_rsp;
+
+struct gd1_mgmt_v3_brick_op_req {
+ u_char uuid[16];
+ int op;
+ struct {
+ u_int dict_len;
+ char *dict_val;
+ } dict;
+};
+typedef struct gd1_mgmt_v3_brick_op_req gd1_mgmt_v3_brick_op_req;
+
+struct gd1_mgmt_v3_brick_op_rsp {
+ u_char uuid[16];
+ int op;
+ int op_ret;
+ int op_errno;
+ char *op_errstr;
+ struct {
+ u_int dict_len;
+ char *dict_val;
+ } dict;
+};
+typedef struct gd1_mgmt_v3_brick_op_rsp gd1_mgmt_v3_brick_op_rsp;
+
+struct gd1_mgmt_v3_commit_req {
+ u_char uuid[16];
+ int op;
+ struct {
+ u_int dict_len;
+ char *dict_val;
+ } dict;
+};
+typedef struct gd1_mgmt_v3_commit_req gd1_mgmt_v3_commit_req;
+
+struct gd1_mgmt_v3_commit_rsp {
+ u_char uuid[16];
+ int op;
+ int op_ret;
+ int op_errno;
+ struct {
+ u_int dict_len;
+ char *dict_val;
+ } dict;
+ char *op_errstr;
+};
+typedef struct gd1_mgmt_v3_commit_rsp gd1_mgmt_v3_commit_rsp;
+
+struct gd1_mgmt_v3_post_val_req {
+ u_char uuid[16];
+ int op;
+ int op_ret;
+ struct {
+ u_int dict_len;
+ char *dict_val;
+ } dict;
+};
+typedef struct gd1_mgmt_v3_post_val_req gd1_mgmt_v3_post_val_req;
+
+struct gd1_mgmt_v3_post_val_rsp {
+ u_char uuid[16];
+ int op;
+ int op_ret;
+ int op_errno;
+ char *op_errstr;
+ struct {
+ u_int dict_len;
+ char *dict_val;
+ } dict;
+};
+typedef struct gd1_mgmt_v3_post_val_rsp gd1_mgmt_v3_post_val_rsp;
+
+struct gd1_mgmt_v3_unlock_req {
+ u_char uuid[16];
+ u_char txn_id[16];
+ int op;
+ struct {
+ u_int dict_len;
+ char *dict_val;
+ } dict;
+};
+typedef struct gd1_mgmt_v3_unlock_req gd1_mgmt_v3_unlock_req;
+
+struct gd1_mgmt_v3_unlock_rsp {
+ u_char uuid[16];
+ u_char txn_id[16];
+ struct {
+ u_int dict_len;
+ char *dict_val;
+ } dict;
+ int op_ret;
+ int op_errno;
+};
+typedef struct gd1_mgmt_v3_unlock_rsp gd1_mgmt_v3_unlock_rsp;
+
+/* the xdr functions */
+
+#if defined(__STDC__) || defined(__cplusplus)
+extern bool_t xdr_glusterd_volume_status (XDR *, glusterd_volume_status*);
+extern bool_t xdr_gd1_mgmt_probe_req (XDR *, gd1_mgmt_probe_req*);
+extern bool_t xdr_gd1_mgmt_probe_rsp (XDR *, gd1_mgmt_probe_rsp*);
+extern bool_t xdr_gd1_mgmt_friend_req (XDR *, gd1_mgmt_friend_req*);
+extern bool_t xdr_gd1_mgmt_friend_rsp (XDR *, gd1_mgmt_friend_rsp*);
+extern bool_t xdr_gd1_mgmt_unfriend_req (XDR *, gd1_mgmt_unfriend_req*);
+extern bool_t xdr_gd1_mgmt_unfriend_rsp (XDR *, gd1_mgmt_unfriend_rsp*);
+extern bool_t xdr_gd1_mgmt_cluster_lock_req (XDR *, gd1_mgmt_cluster_lock_req*);
+extern bool_t xdr_gd1_mgmt_cluster_lock_rsp (XDR *, gd1_mgmt_cluster_lock_rsp*);
+extern bool_t xdr_gd1_mgmt_cluster_unlock_req (XDR *, gd1_mgmt_cluster_unlock_req*);
+extern bool_t xdr_gd1_mgmt_cluster_unlock_rsp (XDR *, gd1_mgmt_cluster_unlock_rsp*);
+extern bool_t xdr_gd1_mgmt_stage_op_req (XDR *, gd1_mgmt_stage_op_req*);
+extern bool_t xdr_gd1_mgmt_stage_op_rsp (XDR *, gd1_mgmt_stage_op_rsp*);
+extern bool_t xdr_gd1_mgmt_commit_op_req (XDR *, gd1_mgmt_commit_op_req*);
+extern bool_t xdr_gd1_mgmt_commit_op_rsp (XDR *, gd1_mgmt_commit_op_rsp*);
+extern bool_t xdr_gd1_mgmt_friend_update (XDR *, gd1_mgmt_friend_update*);
+extern bool_t xdr_gd1_mgmt_friend_update_rsp (XDR *, gd1_mgmt_friend_update_rsp*);
+extern bool_t xdr_gd1_mgmt_brick_op_req (XDR *, gd1_mgmt_brick_op_req*);
+extern bool_t xdr_gd1_mgmt_brick_op_rsp (XDR *, gd1_mgmt_brick_op_rsp*);
+extern bool_t xdr_gd1_mgmt_v3_lock_req (XDR *, gd1_mgmt_v3_lock_req*);
+extern bool_t xdr_gd1_mgmt_v3_lock_rsp (XDR *, gd1_mgmt_v3_lock_rsp*);
+extern bool_t xdr_gd1_mgmt_v3_pre_val_req (XDR *, gd1_mgmt_v3_pre_val_req*);
+extern bool_t xdr_gd1_mgmt_v3_pre_val_rsp (XDR *, gd1_mgmt_v3_pre_val_rsp*);
+extern bool_t xdr_gd1_mgmt_v3_brick_op_req (XDR *, gd1_mgmt_v3_brick_op_req*);
+extern bool_t xdr_gd1_mgmt_v3_brick_op_rsp (XDR *, gd1_mgmt_v3_brick_op_rsp*);
+extern bool_t xdr_gd1_mgmt_v3_commit_req (XDR *, gd1_mgmt_v3_commit_req*);
+extern bool_t xdr_gd1_mgmt_v3_commit_rsp (XDR *, gd1_mgmt_v3_commit_rsp*);
+extern bool_t xdr_gd1_mgmt_v3_post_val_req (XDR *, gd1_mgmt_v3_post_val_req*);
+extern bool_t xdr_gd1_mgmt_v3_post_val_rsp (XDR *, gd1_mgmt_v3_post_val_rsp*);
+extern bool_t xdr_gd1_mgmt_v3_unlock_req (XDR *, gd1_mgmt_v3_unlock_req*);
+extern bool_t xdr_gd1_mgmt_v3_unlock_rsp (XDR *, gd1_mgmt_v3_unlock_rsp*);
+
+#else /* K&R C */
+extern bool_t xdr_glusterd_volume_status ();
+extern bool_t xdr_gd1_mgmt_probe_req ();
+extern bool_t xdr_gd1_mgmt_probe_rsp ();
+extern bool_t xdr_gd1_mgmt_friend_req ();
+extern bool_t xdr_gd1_mgmt_friend_rsp ();
+extern bool_t xdr_gd1_mgmt_unfriend_req ();
+extern bool_t xdr_gd1_mgmt_unfriend_rsp ();
+extern bool_t xdr_gd1_mgmt_cluster_lock_req ();
+extern bool_t xdr_gd1_mgmt_cluster_lock_rsp ();
+extern bool_t xdr_gd1_mgmt_cluster_unlock_req ();
+extern bool_t xdr_gd1_mgmt_cluster_unlock_rsp ();
+extern bool_t xdr_gd1_mgmt_stage_op_req ();
+extern bool_t xdr_gd1_mgmt_stage_op_rsp ();
+extern bool_t xdr_gd1_mgmt_commit_op_req ();
+extern bool_t xdr_gd1_mgmt_commit_op_rsp ();
+extern bool_t xdr_gd1_mgmt_friend_update ();
+extern bool_t xdr_gd1_mgmt_friend_update_rsp ();
+extern bool_t xdr_gd1_mgmt_brick_op_req ();
+extern bool_t xdr_gd1_mgmt_brick_op_rsp ();
+extern bool_t xdr_gd1_mgmt_v3_lock_req ();
+extern bool_t xdr_gd1_mgmt_v3_lock_rsp ();
+extern bool_t xdr_gd1_mgmt_v3_pre_val_req ();
+extern bool_t xdr_gd1_mgmt_v3_pre_val_rsp ();
+extern bool_t xdr_gd1_mgmt_v3_brick_op_req ();
+extern bool_t xdr_gd1_mgmt_v3_brick_op_rsp ();
+extern bool_t xdr_gd1_mgmt_v3_commit_req ();
+extern bool_t xdr_gd1_mgmt_v3_commit_rsp ();
+extern bool_t xdr_gd1_mgmt_v3_post_val_req ();
+extern bool_t xdr_gd1_mgmt_v3_post_val_rsp ();
+extern bool_t xdr_gd1_mgmt_v3_unlock_req ();
+extern bool_t xdr_gd1_mgmt_v3_unlock_rsp ();
+
+#endif /* K&R C */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* !_GLUSTERD1_XDR_H_RPCGEN */
diff --git a/rpc/xdr/src/glusterd1-xdr.x b/rpc/xdr/src/glusterd1-xdr.x
new file mode 100644
index 000000000..f5c45c9e4
--- /dev/null
+++ b/rpc/xdr/src/glusterd1-xdr.x
@@ -0,0 +1,218 @@
+ enum glusterd_volume_status {
+ GLUSTERD_STATUS_NONE = 0,
+ GLUSTERD_STATUS_STARTED,
+ GLUSTERD_STATUS_STOPPED
+} ;
+
+ struct gd1_mgmt_probe_req {
+ unsigned char uuid[16];
+ string hostname<>;
+ int port;
+} ;
+
+ struct gd1_mgmt_probe_rsp {
+ unsigned char uuid[16];
+ string hostname<>;
+ int port;
+ int op_ret;
+ int op_errno;
+ string op_errstr<>;
+} ;
+
+struct gd1_mgmt_friend_req {
+ unsigned char uuid[16];
+ string hostname<>;
+ int port;
+ opaque vols<>;
+} ;
+
+struct gd1_mgmt_friend_rsp {
+ unsigned char uuid[16];
+ string hostname<>;
+ int op_ret;
+ int op_errno;
+ int port;
+} ;
+
+struct gd1_mgmt_unfriend_req {
+ unsigned char uuid[16];
+ string hostname<>;
+ int port;
+} ;
+
+struct gd1_mgmt_unfriend_rsp {
+ unsigned char uuid[16];
+ string hostname<>;
+ int op_ret;
+ int op_errno;
+ int port;
+} ;
+
+struct gd1_mgmt_cluster_lock_req {
+ unsigned char uuid[16];
+} ;
+
+struct gd1_mgmt_cluster_lock_rsp {
+ unsigned char uuid[16];
+ int op_ret;
+ int op_errno;
+} ;
+
+struct gd1_mgmt_cluster_unlock_req {
+ unsigned char uuid[16];
+} ;
+
+struct gd1_mgmt_cluster_unlock_rsp {
+ unsigned char uuid[16];
+ int op_ret;
+ int op_errno;
+} ;
+
+struct gd1_mgmt_stage_op_req {
+ unsigned char uuid[16];
+ int op;
+ opaque buf<>;
+} ;
+
+
+struct gd1_mgmt_stage_op_rsp {
+ unsigned char uuid[16];
+ int op;
+ int op_ret;
+ int op_errno;
+ string op_errstr<>;
+ opaque dict<>;
+} ;
+
+struct gd1_mgmt_commit_op_req {
+ unsigned char uuid[16];
+ int op;
+ opaque buf<>;
+} ;
+
+
+struct gd1_mgmt_commit_op_rsp {
+ unsigned char uuid[16];
+ int op;
+ int op_ret;
+ int op_errno;
+ opaque dict<>;
+ string op_errstr<>;
+} ;
+
+struct gd1_mgmt_friend_update {
+ unsigned char uuid[16];
+ opaque friends<>;
+ int port;
+} ;
+
+struct gd1_mgmt_friend_update_rsp {
+ unsigned char uuid[16];
+ int op;
+ int op_ret;
+ int op_errno;
+} ;
+
+struct gd1_mgmt_brick_op_req {
+ string name<>;
+ int op;
+ opaque input<>;
+} ;
+
+struct gd1_mgmt_brick_op_rsp {
+ int op_ret;
+ int op_errno;
+ opaque output<>;
+ string op_errstr<>;
+} ;
+
+struct gd1_mgmt_v3_lock_req {
+ unsigned char uuid[16];
+ unsigned char txn_id[16];
+ int op;
+ opaque dict<>;
+} ;
+
+struct gd1_mgmt_v3_lock_rsp {
+ unsigned char uuid[16];
+ unsigned char txn_id[16];
+ opaque dict<>;
+ int op_ret;
+ int op_errno;
+} ;
+
+struct gd1_mgmt_v3_pre_val_req {
+ unsigned char uuid[16];
+ int op;
+ opaque dict<>;
+} ;
+
+struct gd1_mgmt_v3_pre_val_rsp {
+ unsigned char uuid[16];
+ int op;
+ int op_ret;
+ int op_errno;
+ string op_errstr<>;
+ opaque dict<>;
+} ;
+
+struct gd1_mgmt_v3_brick_op_req {
+ unsigned char uuid[16];
+ int op;
+ opaque dict<>;
+} ;
+
+struct gd1_mgmt_v3_brick_op_rsp {
+ unsigned char uuid[16];
+ int op;
+ int op_ret;
+ int op_errno;
+ string op_errstr<>;
+ opaque dict<>;
+} ;
+
+struct gd1_mgmt_v3_commit_req {
+ unsigned char uuid[16];
+ int op;
+ opaque dict<>;
+} ;
+
+struct gd1_mgmt_v3_commit_rsp {
+ unsigned char uuid[16];
+ int op;
+ int op_ret;
+ int op_errno;
+ opaque dict<>;
+ string op_errstr<>;
+} ;
+
+struct gd1_mgmt_v3_post_val_req {
+ unsigned char uuid[16];
+ int op;
+ int op_ret;
+ opaque dict<>;
+} ;
+
+struct gd1_mgmt_v3_post_val_rsp {
+ unsigned char uuid[16];
+ int op;
+ int op_ret;
+ int op_errno;
+ string op_errstr<>;
+ opaque dict<>;
+} ;
+
+struct gd1_mgmt_v3_unlock_req {
+ unsigned char uuid[16];
+ unsigned char txn_id[16];
+ int op;
+ opaque dict<>;
+} ;
+
+struct gd1_mgmt_v3_unlock_rsp {
+ unsigned char uuid[16];
+ unsigned char txn_id[16];
+ opaque dict<>;
+ int op_ret;
+ int op_errno;
+} ;
diff --git a/rpc/xdr/src/glusterfs3-xdr.c b/rpc/xdr/src/glusterfs3-xdr.c
new file mode 100644
index 000000000..3205c551e
--- /dev/null
+++ b/rpc/xdr/src/glusterfs3-xdr.c
@@ -0,0 +1,2058 @@
+/*
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xdr-common.h"
+#include "compat.h"
+
+#if defined(__GNUC__)
+#if __GNUC__ >= 4
+#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
+#endif
+#endif
+
+/*
+ * Please do not edit this file.
+ * It was generated using rpcgen.
+ */
+
+#include "glusterfs3-xdr.h"
+
+bool_t
+xdr_gf_statfs (XDR *xdrs, gf_statfs *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_u_quad_t (xdrs, &objp->bsize))
+ return FALSE;
+ if (!xdr_u_quad_t (xdrs, &objp->frsize))
+ return FALSE;
+ if (!xdr_u_quad_t (xdrs, &objp->blocks))
+ return FALSE;
+ if (!xdr_u_quad_t (xdrs, &objp->bfree))
+ return FALSE;
+ if (!xdr_u_quad_t (xdrs, &objp->bavail))
+ return FALSE;
+ if (!xdr_u_quad_t (xdrs, &objp->files))
+ return FALSE;
+ if (!xdr_u_quad_t (xdrs, &objp->ffree))
+ return FALSE;
+ if (!xdr_u_quad_t (xdrs, &objp->favail))
+ return FALSE;
+ if (!xdr_u_quad_t (xdrs, &objp->fsid))
+ return FALSE;
+ if (!xdr_u_quad_t (xdrs, &objp->flag))
+ return FALSE;
+ if (!xdr_u_quad_t (xdrs, &objp->namemax))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gf_proto_flock (XDR *xdrs, gf_proto_flock *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_u_int (xdrs, &objp->type))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->whence))
+ return FALSE;
+ if (!xdr_u_quad_t (xdrs, &objp->start))
+ return FALSE;
+ if (!xdr_u_quad_t (xdrs, &objp->len))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->pid))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->lk_owner.lk_owner_val, (u_int *) &objp->lk_owner.lk_owner_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gf_iatt (XDR *xdrs, gf_iatt *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+
+ if (xdrs->x_op == XDR_ENCODE) {
+ if (!xdr_opaque (xdrs, objp->ia_gfid, 16))
+ return FALSE;
+ if (!xdr_u_quad_t (xdrs, &objp->ia_ino))
+ return FALSE;
+ if (!xdr_u_quad_t (xdrs, &objp->ia_dev))
+ return FALSE;
+ buf = XDR_INLINE (xdrs, 4 * BYTES_PER_XDR_UNIT);
+ if (buf == NULL) {
+ if (!xdr_u_int (xdrs, &objp->mode))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->ia_nlink))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->ia_uid))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->ia_gid))
+ return FALSE;
+
+ } else {
+ IXDR_PUT_U_LONG(buf, objp->mode);
+ IXDR_PUT_U_LONG(buf, objp->ia_nlink);
+ IXDR_PUT_U_LONG(buf, objp->ia_uid);
+ IXDR_PUT_U_LONG(buf, objp->ia_gid);
+ }
+ if (!xdr_u_quad_t (xdrs, &objp->ia_rdev))
+ return FALSE;
+ if (!xdr_u_quad_t (xdrs, &objp->ia_size))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->ia_blksize))
+ return FALSE;
+ if (!xdr_u_quad_t (xdrs, &objp->ia_blocks))
+ return FALSE;
+ buf = XDR_INLINE (xdrs, 6 * BYTES_PER_XDR_UNIT);
+ if (buf == NULL) {
+ if (!xdr_u_int (xdrs, &objp->ia_atime))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->ia_atime_nsec))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->ia_mtime))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->ia_mtime_nsec))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->ia_ctime))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->ia_ctime_nsec))
+ return FALSE;
+ } else {
+ IXDR_PUT_U_LONG(buf, objp->ia_atime);
+ IXDR_PUT_U_LONG(buf, objp->ia_atime_nsec);
+ IXDR_PUT_U_LONG(buf, objp->ia_mtime);
+ IXDR_PUT_U_LONG(buf, objp->ia_mtime_nsec);
+ IXDR_PUT_U_LONG(buf, objp->ia_ctime);
+ IXDR_PUT_U_LONG(buf, objp->ia_ctime_nsec);
+ }
+ return TRUE;
+ } else if (xdrs->x_op == XDR_DECODE) {
+ if (!xdr_opaque (xdrs, objp->ia_gfid, 16))
+ return FALSE;
+ if (!xdr_u_quad_t (xdrs, &objp->ia_ino))
+ return FALSE;
+ if (!xdr_u_quad_t (xdrs, &objp->ia_dev))
+ return FALSE;
+ buf = XDR_INLINE (xdrs, 4 * BYTES_PER_XDR_UNIT);
+ if (buf == NULL) {
+ if (!xdr_u_int (xdrs, &objp->mode))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->ia_nlink))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->ia_uid))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->ia_gid))
+ return FALSE;
+
+ } else {
+ objp->mode = IXDR_GET_U_LONG(buf);
+ objp->ia_nlink = IXDR_GET_U_LONG(buf);
+ objp->ia_uid = IXDR_GET_U_LONG(buf);
+ objp->ia_gid = IXDR_GET_U_LONG(buf);
+ }
+ if (!xdr_u_quad_t (xdrs, &objp->ia_rdev))
+ return FALSE;
+ if (!xdr_u_quad_t (xdrs, &objp->ia_size))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->ia_blksize))
+ return FALSE;
+ if (!xdr_u_quad_t (xdrs, &objp->ia_blocks))
+ return FALSE;
+ buf = XDR_INLINE (xdrs, 6 * BYTES_PER_XDR_UNIT);
+ if (buf == NULL) {
+ if (!xdr_u_int (xdrs, &objp->ia_atime))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->ia_atime_nsec))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->ia_mtime))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->ia_mtime_nsec))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->ia_ctime))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->ia_ctime_nsec))
+ return FALSE;
+ } else {
+ objp->ia_atime = IXDR_GET_U_LONG(buf);
+ objp->ia_atime_nsec = IXDR_GET_U_LONG(buf);
+ objp->ia_mtime = IXDR_GET_U_LONG(buf);
+ objp->ia_mtime_nsec = IXDR_GET_U_LONG(buf);
+ objp->ia_ctime = IXDR_GET_U_LONG(buf);
+ objp->ia_ctime_nsec = IXDR_GET_U_LONG(buf);
+ }
+ return TRUE;
+ }
+
+ if (!xdr_opaque (xdrs, objp->ia_gfid, 16))
+ return FALSE;
+ if (!xdr_u_quad_t (xdrs, &objp->ia_ino))
+ return FALSE;
+ if (!xdr_u_quad_t (xdrs, &objp->ia_dev))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->mode))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->ia_nlink))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->ia_uid))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->ia_gid))
+ return FALSE;
+ if (!xdr_u_quad_t (xdrs, &objp->ia_rdev))
+ return FALSE;
+ if (!xdr_u_quad_t (xdrs, &objp->ia_size))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->ia_blksize))
+ return FALSE;
+ if (!xdr_u_quad_t (xdrs, &objp->ia_blocks))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->ia_atime))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->ia_atime_nsec))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->ia_mtime))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->ia_mtime_nsec))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->ia_ctime))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->ia_ctime_nsec))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_stat_req (XDR *xdrs, gfs3_stat_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_opaque (xdrs, objp->gfid, 16))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_stat_rsp (XDR *xdrs, gfs3_stat_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_gf_iatt (xdrs, &objp->stat))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_readlink_req (XDR *xdrs, gfs3_readlink_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_opaque (xdrs, objp->gfid, 16))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->size))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_readlink_rsp (XDR *xdrs, gfs3_readlink_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_gf_iatt (xdrs, &objp->buf))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->path, ~0))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_mknod_req (XDR *xdrs, gfs3_mknod_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_opaque (xdrs, objp->pargfid, 16))
+ return FALSE;
+ if (!xdr_u_quad_t (xdrs, &objp->dev))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->mode))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->umask))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->bname, ~0))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_mknod_rsp (XDR *xdrs, gfs3_mknod_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_gf_iatt (xdrs, &objp->stat))
+ return FALSE;
+ if (!xdr_gf_iatt (xdrs, &objp->preparent))
+ return FALSE;
+ if (!xdr_gf_iatt (xdrs, &objp->postparent))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_mkdir_req (XDR *xdrs, gfs3_mkdir_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_opaque (xdrs, objp->pargfid, 16))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->mode))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->umask))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->bname, ~0))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_mkdir_rsp (XDR *xdrs, gfs3_mkdir_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_gf_iatt (xdrs, &objp->stat))
+ return FALSE;
+ if (!xdr_gf_iatt (xdrs, &objp->preparent))
+ return FALSE;
+ if (!xdr_gf_iatt (xdrs, &objp->postparent))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_unlink_req (XDR *xdrs, gfs3_unlink_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_opaque (xdrs, objp->pargfid, 16))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->bname, ~0))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->xflags))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_unlink_rsp (XDR *xdrs, gfs3_unlink_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_gf_iatt (xdrs, &objp->preparent))
+ return FALSE;
+ if (!xdr_gf_iatt (xdrs, &objp->postparent))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_rmdir_req (XDR *xdrs, gfs3_rmdir_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_opaque (xdrs, objp->pargfid, 16))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->xflags))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->bname, ~0))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_rmdir_rsp (XDR *xdrs, gfs3_rmdir_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_gf_iatt (xdrs, &objp->preparent))
+ return FALSE;
+ if (!xdr_gf_iatt (xdrs, &objp->postparent))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_symlink_req (XDR *xdrs, gfs3_symlink_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_opaque (xdrs, objp->pargfid, 16))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->bname, ~0))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->umask))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->linkname, ~0))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_symlink_rsp (XDR *xdrs, gfs3_symlink_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_gf_iatt (xdrs, &objp->stat))
+ return FALSE;
+ if (!xdr_gf_iatt (xdrs, &objp->preparent))
+ return FALSE;
+ if (!xdr_gf_iatt (xdrs, &objp->postparent))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_rename_req (XDR *xdrs, gfs3_rename_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_opaque (xdrs, objp->oldgfid, 16))
+ return FALSE;
+ if (!xdr_opaque (xdrs, objp->newgfid, 16))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->oldbname, ~0))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->newbname, ~0))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_rename_rsp (XDR *xdrs, gfs3_rename_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_gf_iatt (xdrs, &objp->stat))
+ return FALSE;
+ if (!xdr_gf_iatt (xdrs, &objp->preoldparent))
+ return FALSE;
+ if (!xdr_gf_iatt (xdrs, &objp->postoldparent))
+ return FALSE;
+ if (!xdr_gf_iatt (xdrs, &objp->prenewparent))
+ return FALSE;
+ if (!xdr_gf_iatt (xdrs, &objp->postnewparent))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_link_req (XDR *xdrs, gfs3_link_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_opaque (xdrs, objp->oldgfid, 16))
+ return FALSE;
+ if (!xdr_opaque (xdrs, objp->newgfid, 16))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->newbname, ~0))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_link_rsp (XDR *xdrs, gfs3_link_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_gf_iatt (xdrs, &objp->stat))
+ return FALSE;
+ if (!xdr_gf_iatt (xdrs, &objp->preparent))
+ return FALSE;
+ if (!xdr_gf_iatt (xdrs, &objp->postparent))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_truncate_req (XDR *xdrs, gfs3_truncate_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_opaque (xdrs, objp->gfid, 16))
+ return FALSE;
+ if (!xdr_u_quad_t (xdrs, &objp->offset))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_truncate_rsp (XDR *xdrs, gfs3_truncate_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_gf_iatt (xdrs, &objp->prestat))
+ return FALSE;
+ if (!xdr_gf_iatt (xdrs, &objp->poststat))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_open_req (XDR *xdrs, gfs3_open_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_opaque (xdrs, objp->gfid, 16))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->flags))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_open_rsp (XDR *xdrs, gfs3_open_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_quad_t (xdrs, &objp->fd))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_read_req (XDR *xdrs, gfs3_read_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_opaque (xdrs, objp->gfid, 16))
+ return FALSE;
+ if (!xdr_quad_t (xdrs, &objp->fd))
+ return FALSE;
+ if (!xdr_u_quad_t (xdrs, &objp->offset))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->size))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->flag))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_read_rsp (XDR *xdrs, gfs3_read_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_gf_iatt (xdrs, &objp->stat))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->size))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_lookup_req (XDR *xdrs, gfs3_lookup_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_opaque (xdrs, objp->gfid, 16))
+ return FALSE;
+ if (!xdr_opaque (xdrs, objp->pargfid, 16))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->flags))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->bname, ~0))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_lookup_rsp (XDR *xdrs, gfs3_lookup_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_gf_iatt (xdrs, &objp->stat))
+ return FALSE;
+ if (!xdr_gf_iatt (xdrs, &objp->postparent))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_write_req (XDR *xdrs, gfs3_write_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_opaque (xdrs, objp->gfid, 16))
+ return FALSE;
+ if (!xdr_quad_t (xdrs, &objp->fd))
+ return FALSE;
+ if (!xdr_u_quad_t (xdrs, &objp->offset))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->size))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->flag))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_write_rsp (XDR *xdrs, gfs3_write_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_gf_iatt (xdrs, &objp->prestat))
+ return FALSE;
+ if (!xdr_gf_iatt (xdrs, &objp->poststat))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_statfs_req (XDR *xdrs, gfs3_statfs_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_opaque (xdrs, objp->gfid, 16))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_statfs_rsp (XDR *xdrs, gfs3_statfs_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_gf_statfs (xdrs, &objp->statfs))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_lk_req (XDR *xdrs, gfs3_lk_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_opaque (xdrs, objp->gfid, 16))
+ return FALSE;
+ if (!xdr_quad_t (xdrs, &objp->fd))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->cmd))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->type))
+ return FALSE;
+ if (!xdr_gf_proto_flock (xdrs, &objp->flock))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_lk_rsp (XDR *xdrs, gfs3_lk_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_gf_proto_flock (xdrs, &objp->flock))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_inodelk_req (XDR *xdrs, gfs3_inodelk_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_opaque (xdrs, objp->gfid, 16))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->cmd))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->type))
+ return FALSE;
+ if (!xdr_gf_proto_flock (xdrs, &objp->flock))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->volume, ~0))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_finodelk_req (XDR *xdrs, gfs3_finodelk_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_opaque (xdrs, objp->gfid, 16))
+ return FALSE;
+ if (!xdr_quad_t (xdrs, &objp->fd))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->cmd))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->type))
+ return FALSE;
+ if (!xdr_gf_proto_flock (xdrs, &objp->flock))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->volume, ~0))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_flush_req (XDR *xdrs, gfs3_flush_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_opaque (xdrs, objp->gfid, 16))
+ return FALSE;
+ if (!xdr_quad_t (xdrs, &objp->fd))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_fsync_req (XDR *xdrs, gfs3_fsync_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_opaque (xdrs, objp->gfid, 16))
+ return FALSE;
+ if (!xdr_quad_t (xdrs, &objp->fd))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->data))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_fsync_rsp (XDR *xdrs, gfs3_fsync_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_gf_iatt (xdrs, &objp->prestat))
+ return FALSE;
+ if (!xdr_gf_iatt (xdrs, &objp->poststat))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_setxattr_req (XDR *xdrs, gfs3_setxattr_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_opaque (xdrs, objp->gfid, 16))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->flags))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_fsetxattr_req (XDR *xdrs, gfs3_fsetxattr_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_opaque (xdrs, objp->gfid, 16))
+ return FALSE;
+ if (!xdr_quad_t (xdrs, &objp->fd))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->flags))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_xattrop_req (XDR *xdrs, gfs3_xattrop_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_opaque (xdrs, objp->gfid, 16))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->flags))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_xattrop_rsp (XDR *xdrs, gfs3_xattrop_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_fxattrop_req (XDR *xdrs, gfs3_fxattrop_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_opaque (xdrs, objp->gfid, 16))
+ return FALSE;
+ if (!xdr_quad_t (xdrs, &objp->fd))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->flags))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_fxattrop_rsp (XDR *xdrs, gfs3_fxattrop_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_getxattr_req (XDR *xdrs, gfs3_getxattr_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_opaque (xdrs, objp->gfid, 16))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->namelen))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->name, ~0))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_getxattr_rsp (XDR *xdrs, gfs3_getxattr_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_fgetxattr_req (XDR *xdrs, gfs3_fgetxattr_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_opaque (xdrs, objp->gfid, 16))
+ return FALSE;
+ if (!xdr_quad_t (xdrs, &objp->fd))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->namelen))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->name, ~0))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_fgetxattr_rsp (XDR *xdrs, gfs3_fgetxattr_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_removexattr_req (XDR *xdrs, gfs3_removexattr_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_opaque (xdrs, objp->gfid, 16))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->name, ~0))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_fremovexattr_req (XDR *xdrs, gfs3_fremovexattr_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_opaque (xdrs, objp->gfid, 16))
+ return FALSE;
+ if (!xdr_quad_t (xdrs, &objp->fd))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->name, ~0))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_opendir_req (XDR *xdrs, gfs3_opendir_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_opaque (xdrs, objp->gfid, 16))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_opendir_rsp (XDR *xdrs, gfs3_opendir_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_quad_t (xdrs, &objp->fd))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_fsyncdir_req (XDR *xdrs, gfs3_fsyncdir_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_opaque (xdrs, objp->gfid, 16))
+ return FALSE;
+ if (!xdr_quad_t (xdrs, &objp->fd))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->data))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_readdir_req (XDR *xdrs, gfs3_readdir_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_opaque (xdrs, objp->gfid, 16))
+ return FALSE;
+ if (!xdr_quad_t (xdrs, &objp->fd))
+ return FALSE;
+ if (!xdr_u_quad_t (xdrs, &objp->offset))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->size))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_readdirp_req (XDR *xdrs, gfs3_readdirp_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_opaque (xdrs, objp->gfid, 16))
+ return FALSE;
+ if (!xdr_quad_t (xdrs, &objp->fd))
+ return FALSE;
+ if (!xdr_u_quad_t (xdrs, &objp->offset))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->size))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_access_req (XDR *xdrs, gfs3_access_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_opaque (xdrs, objp->gfid, 16))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->mask))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_create_req (XDR *xdrs, gfs3_create_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+
+ if (xdrs->x_op == XDR_ENCODE) {
+ if (!xdr_opaque (xdrs, objp->pargfid, 16))
+ return FALSE;
+ buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
+ if (buf == NULL) {
+ if (!xdr_u_int (xdrs, &objp->flags))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->mode))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->umask))
+ return FALSE;
+
+ } else {
+ IXDR_PUT_U_LONG(buf, objp->flags);
+ IXDR_PUT_U_LONG(buf, objp->mode);
+ IXDR_PUT_U_LONG(buf, objp->umask);
+ }
+ if (!xdr_string (xdrs, &objp->bname, ~0))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+ } else if (xdrs->x_op == XDR_DECODE) {
+ if (!xdr_opaque (xdrs, objp->pargfid, 16))
+ return FALSE;
+ buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
+ if (buf == NULL) {
+ if (!xdr_u_int (xdrs, &objp->flags))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->mode))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->umask))
+ return FALSE;
+
+ } else {
+ objp->flags = IXDR_GET_U_LONG(buf);
+ objp->mode = IXDR_GET_U_LONG(buf);
+ objp->umask = IXDR_GET_U_LONG(buf);
+ }
+ if (!xdr_string (xdrs, &objp->bname, ~0))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+ }
+
+ if (!xdr_opaque (xdrs, objp->pargfid, 16))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->flags))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->mode))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->umask))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->bname, ~0))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_create_rsp (XDR *xdrs, gfs3_create_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_gf_iatt (xdrs, &objp->stat))
+ return FALSE;
+ if (!xdr_u_quad_t (xdrs, &objp->fd))
+ return FALSE;
+ if (!xdr_gf_iatt (xdrs, &objp->preparent))
+ return FALSE;
+ if (!xdr_gf_iatt (xdrs, &objp->postparent))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_ftruncate_req (XDR *xdrs, gfs3_ftruncate_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_opaque (xdrs, objp->gfid, 16))
+ return FALSE;
+ if (!xdr_quad_t (xdrs, &objp->fd))
+ return FALSE;
+ if (!xdr_u_quad_t (xdrs, &objp->offset))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_ftruncate_rsp (XDR *xdrs, gfs3_ftruncate_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_gf_iatt (xdrs, &objp->prestat))
+ return FALSE;
+ if (!xdr_gf_iatt (xdrs, &objp->poststat))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_fstat_req (XDR *xdrs, gfs3_fstat_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_opaque (xdrs, objp->gfid, 16))
+ return FALSE;
+ if (!xdr_quad_t (xdrs, &objp->fd))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_fstat_rsp (XDR *xdrs, gfs3_fstat_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_gf_iatt (xdrs, &objp->stat))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_entrylk_req (XDR *xdrs, gfs3_entrylk_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_opaque (xdrs, objp->gfid, 16))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->cmd))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->type))
+ return FALSE;
+ if (!xdr_u_quad_t (xdrs, &objp->namelen))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->name, ~0))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->volume, ~0))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_fentrylk_req (XDR *xdrs, gfs3_fentrylk_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_opaque (xdrs, objp->gfid, 16))
+ return FALSE;
+ if (!xdr_quad_t (xdrs, &objp->fd))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->cmd))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->type))
+ return FALSE;
+ if (!xdr_u_quad_t (xdrs, &objp->namelen))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->name, ~0))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->volume, ~0))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_setattr_req (XDR *xdrs, gfs3_setattr_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_opaque (xdrs, objp->gfid, 16))
+ return FALSE;
+ if (!xdr_gf_iatt (xdrs, &objp->stbuf))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->valid))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_setattr_rsp (XDR *xdrs, gfs3_setattr_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_gf_iatt (xdrs, &objp->statpre))
+ return FALSE;
+ if (!xdr_gf_iatt (xdrs, &objp->statpost))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_fsetattr_req (XDR *xdrs, gfs3_fsetattr_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_quad_t (xdrs, &objp->fd))
+ return FALSE;
+ if (!xdr_gf_iatt (xdrs, &objp->stbuf))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->valid))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_fsetattr_rsp (XDR *xdrs, gfs3_fsetattr_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_gf_iatt (xdrs, &objp->statpre))
+ return FALSE;
+ if (!xdr_gf_iatt (xdrs, &objp->statpost))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_fallocate_req (XDR *xdrs, gfs3_fallocate_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_opaque (xdrs, objp->gfid, 16))
+ return FALSE;
+ if (!xdr_quad_t (xdrs, &objp->fd))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->flags))
+ return FALSE;
+ if (!xdr_u_quad_t (xdrs, &objp->offset))
+ return FALSE;
+ if (!xdr_u_quad_t (xdrs, &objp->size))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_fallocate_rsp (XDR *xdrs, gfs3_fallocate_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_gf_iatt (xdrs, &objp->statpre))
+ return FALSE;
+ if (!xdr_gf_iatt (xdrs, &objp->statpost))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_discard_req (XDR *xdrs, gfs3_discard_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_opaque (xdrs, objp->gfid, 16))
+ return FALSE;
+ if (!xdr_quad_t (xdrs, &objp->fd))
+ return FALSE;
+ if (!xdr_u_quad_t (xdrs, &objp->offset))
+ return FALSE;
+ if (!xdr_u_quad_t (xdrs, &objp->size))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_discard_rsp (XDR *xdrs, gfs3_discard_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_gf_iatt (xdrs, &objp->statpre))
+ return FALSE;
+ if (!xdr_gf_iatt (xdrs, &objp->statpost))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_zerofill_req (XDR *xdrs, gfs3_zerofill_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_opaque (xdrs, objp->gfid, 16))
+ return FALSE;
+ if (!xdr_quad_t (xdrs, &objp->fd))
+ return FALSE;
+ if (!xdr_u_quad_t (xdrs, &objp->offset))
+ return FALSE;
+ if (!xdr_u_quad_t (xdrs, &objp->size))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val,
+ (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_zerofill_rsp (XDR *xdrs, gfs3_zerofill_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_gf_iatt (xdrs, &objp->statpre))
+ return FALSE;
+ if (!xdr_gf_iatt (xdrs, &objp->statpost))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val,
+ (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+
+bool_t
+xdr_gfs3_rchecksum_req (XDR *xdrs, gfs3_rchecksum_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_quad_t (xdrs, &objp->fd))
+ return FALSE;
+ if (!xdr_u_quad_t (xdrs, &objp->offset))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->len))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_rchecksum_rsp (XDR *xdrs, gfs3_rchecksum_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+
+ if (xdrs->x_op == XDR_ENCODE) {
+ buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
+ if (buf == NULL) {
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->weak_checksum))
+ return FALSE;
+
+ } else {
+ IXDR_PUT_LONG(buf, objp->op_ret);
+ IXDR_PUT_LONG(buf, objp->op_errno);
+ IXDR_PUT_U_LONG(buf, objp->weak_checksum);
+ }
+ if (!xdr_bytes (xdrs, (char **)&objp->strong_checksum.strong_checksum_val, (u_int *) &objp->strong_checksum.strong_checksum_len, ~0))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+ } else if (xdrs->x_op == XDR_DECODE) {
+ buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
+ if (buf == NULL) {
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->weak_checksum))
+ return FALSE;
+
+ } else {
+ objp->op_ret = IXDR_GET_LONG(buf);
+ objp->op_errno = IXDR_GET_LONG(buf);
+ objp->weak_checksum = IXDR_GET_U_LONG(buf);
+ }
+ if (!xdr_bytes (xdrs, (char **)&objp->strong_checksum.strong_checksum_val, (u_int *) &objp->strong_checksum.strong_checksum_len, ~0))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+ }
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->weak_checksum))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->strong_checksum.strong_checksum_val, (u_int *) &objp->strong_checksum.strong_checksum_len, ~0))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gf_setvolume_req (XDR *xdrs, gf_setvolume_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gf_setvolume_rsp (XDR *xdrs, gf_setvolume_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gf_getspec_req (XDR *xdrs, gf_getspec_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_u_int (xdrs, &objp->flags))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->key, ~0))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gf_getspec_rsp (XDR *xdrs, gf_getspec_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->spec, ~0))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gf_mgmt_hndsk_req (XDR *xdrs, gf_mgmt_hndsk_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_bytes (xdrs, (char **)&objp->hndsk.hndsk_val, (u_int *) &objp->hndsk.hndsk_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gf_mgmt_hndsk_rsp (XDR *xdrs, gf_mgmt_hndsk_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->hndsk.hndsk_val, (u_int *) &objp->hndsk.hndsk_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gf_log_req (XDR *xdrs, gf_log_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_bytes (xdrs, (char **)&objp->msg.msg_val, (u_int *) &objp->msg.msg_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gf_notify_req (XDR *xdrs, gf_notify_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_u_int (xdrs, &objp->flags))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->buf, ~0))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gf_notify_rsp (XDR *xdrs, gf_notify_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+
+ if (xdrs->x_op == XDR_ENCODE) {
+ buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
+ if (buf == NULL) {
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->flags))
+ return FALSE;
+
+ } else {
+ IXDR_PUT_LONG(buf, objp->op_ret);
+ IXDR_PUT_LONG(buf, objp->op_errno);
+ IXDR_PUT_U_LONG(buf, objp->flags);
+ }
+ if (!xdr_string (xdrs, &objp->buf, ~0))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+ } else if (xdrs->x_op == XDR_DECODE) {
+ buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
+ if (buf == NULL) {
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->flags))
+ return FALSE;
+
+ } else {
+ objp->op_ret = IXDR_GET_LONG(buf);
+ objp->op_errno = IXDR_GET_LONG(buf);
+ objp->flags = IXDR_GET_U_LONG(buf);
+ }
+ if (!xdr_string (xdrs, &objp->buf, ~0))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+ }
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->flags))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->buf, ~0))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_releasedir_req (XDR *xdrs, gfs3_releasedir_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_opaque (xdrs, objp->gfid, 16))
+ return FALSE;
+ if (!xdr_quad_t (xdrs, &objp->fd))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_release_req (XDR *xdrs, gfs3_release_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_opaque (xdrs, objp->gfid, 16))
+ return FALSE;
+ if (!xdr_quad_t (xdrs, &objp->fd))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gf_common_rsp (XDR *xdrs, gf_common_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_dirlist (XDR *xdrs, gfs3_dirlist *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_u_quad_t (xdrs, &objp->d_ino))
+ return FALSE;
+ if (!xdr_u_quad_t (xdrs, &objp->d_off))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->d_len))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->d_type))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->name, ~0))
+ return FALSE;
+ if (!xdr_pointer (xdrs, (char **)&objp->nextentry, sizeof (gfs3_dirlist), (xdrproc_t) xdr_gfs3_dirlist))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_readdir_rsp (XDR *xdrs, gfs3_readdir_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_pointer (xdrs, (char **)&objp->reply, sizeof (gfs3_dirlist), (xdrproc_t) xdr_gfs3_dirlist))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_dirplist (XDR *xdrs, gfs3_dirplist *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_u_quad_t (xdrs, &objp->d_ino))
+ return FALSE;
+ if (!xdr_u_quad_t (xdrs, &objp->d_off))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->d_len))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->d_type))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->name, ~0))
+ return FALSE;
+ if (!xdr_gf_iatt (xdrs, &objp->stat))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
+ return FALSE;
+ if (!xdr_pointer (xdrs, (char **)&objp->nextentry, sizeof (gfs3_dirplist), (xdrproc_t) xdr_gfs3_dirplist))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gfs3_readdirp_rsp (XDR *xdrs, gfs3_readdirp_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_pointer (xdrs, (char **)&objp->reply, sizeof (gfs3_dirplist), (xdrproc_t) xdr_gfs3_dirplist))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gf_set_lk_ver_rsp (XDR *xdrs, gf_set_lk_ver_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->lk_ver))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gf_set_lk_ver_req (XDR *xdrs, gf_set_lk_ver_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_string (xdrs, &objp->uid, ~0))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->lk_ver))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gf_event_notify_req (XDR *xdrs, gf_event_notify_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->op))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gf_event_notify_rsp (XDR *xdrs, gf_event_notify_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
+ return FALSE;
+ return TRUE;
+}
diff --git a/rpc/xdr/src/glusterfs3-xdr.h b/rpc/xdr/src/glusterfs3-xdr.h
new file mode 100644
index 000000000..13566e694
--- /dev/null
+++ b/rpc/xdr/src/glusterfs3-xdr.h
@@ -0,0 +1,1394 @@
+/*
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xdr-common.h"
+#include "compat.h"
+
+#if defined(__GNUC__)
+#if __GNUC__ >= 4
+#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
+#endif
+#endif
+
+/*
+ * Please do not edit this file.
+ * It was generated using rpcgen.
+ */
+
+#ifndef _GLUSTERFS3_XDR_H_RPCGEN
+#define _GLUSTERFS3_XDR_H_RPCGEN
+
+#include <rpc/rpc.h>
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+struct gf_statfs {
+ u_quad_t bsize;
+ u_quad_t frsize;
+ u_quad_t blocks;
+ u_quad_t bfree;
+ u_quad_t bavail;
+ u_quad_t files;
+ u_quad_t ffree;
+ u_quad_t favail;
+ u_quad_t fsid;
+ u_quad_t flag;
+ u_quad_t namemax;
+};
+typedef struct gf_statfs gf_statfs;
+
+struct gf_proto_flock {
+ u_int type;
+ u_int whence;
+ u_quad_t start;
+ u_quad_t len;
+ u_int pid;
+ struct {
+ u_int lk_owner_len;
+ char *lk_owner_val;
+ } lk_owner;
+};
+typedef struct gf_proto_flock gf_proto_flock;
+
+struct gf_iatt {
+ char ia_gfid[16];
+ u_quad_t ia_ino;
+ u_quad_t ia_dev;
+ u_int mode;
+ u_int ia_nlink;
+ u_int ia_uid;
+ u_int ia_gid;
+ u_quad_t ia_rdev;
+ u_quad_t ia_size;
+ u_int ia_blksize;
+ u_quad_t ia_blocks;
+ u_int ia_atime;
+ u_int ia_atime_nsec;
+ u_int ia_mtime;
+ u_int ia_mtime_nsec;
+ u_int ia_ctime;
+ u_int ia_ctime_nsec;
+};
+typedef struct gf_iatt gf_iatt;
+
+struct gfs3_stat_req {
+ char gfid[16];
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_stat_req gfs3_stat_req;
+
+struct gfs3_stat_rsp {
+ int op_ret;
+ int op_errno;
+ struct gf_iatt stat;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_stat_rsp gfs3_stat_rsp;
+
+struct gfs3_readlink_req {
+ char gfid[16];
+ u_int size;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_readlink_req gfs3_readlink_req;
+
+struct gfs3_readlink_rsp {
+ int op_ret;
+ int op_errno;
+ struct gf_iatt buf;
+ char *path;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_readlink_rsp gfs3_readlink_rsp;
+
+struct gfs3_mknod_req {
+ char pargfid[16];
+ u_quad_t dev;
+ u_int mode;
+ u_int umask;
+ char *bname;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_mknod_req gfs3_mknod_req;
+
+struct gfs3_mknod_rsp {
+ int op_ret;
+ int op_errno;
+ struct gf_iatt stat;
+ struct gf_iatt preparent;
+ struct gf_iatt postparent;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_mknod_rsp gfs3_mknod_rsp;
+
+struct gfs3_mkdir_req {
+ char pargfid[16];
+ u_int mode;
+ u_int umask;
+ char *bname;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_mkdir_req gfs3_mkdir_req;
+
+struct gfs3_mkdir_rsp {
+ int op_ret;
+ int op_errno;
+ struct gf_iatt stat;
+ struct gf_iatt preparent;
+ struct gf_iatt postparent;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_mkdir_rsp gfs3_mkdir_rsp;
+
+struct gfs3_unlink_req {
+ char pargfid[16];
+ char *bname;
+ u_int xflags;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_unlink_req gfs3_unlink_req;
+
+struct gfs3_unlink_rsp {
+ int op_ret;
+ int op_errno;
+ struct gf_iatt preparent;
+ struct gf_iatt postparent;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_unlink_rsp gfs3_unlink_rsp;
+
+struct gfs3_rmdir_req {
+ char pargfid[16];
+ int xflags;
+ char *bname;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_rmdir_req gfs3_rmdir_req;
+
+struct gfs3_rmdir_rsp {
+ int op_ret;
+ int op_errno;
+ struct gf_iatt preparent;
+ struct gf_iatt postparent;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_rmdir_rsp gfs3_rmdir_rsp;
+
+struct gfs3_symlink_req {
+ char pargfid[16];
+ char *bname;
+ u_int umask;
+ char *linkname;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_symlink_req gfs3_symlink_req;
+
+struct gfs3_symlink_rsp {
+ int op_ret;
+ int op_errno;
+ struct gf_iatt stat;
+ struct gf_iatt preparent;
+ struct gf_iatt postparent;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_symlink_rsp gfs3_symlink_rsp;
+
+struct gfs3_rename_req {
+ char oldgfid[16];
+ char newgfid[16];
+ char *oldbname;
+ char *newbname;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_rename_req gfs3_rename_req;
+
+struct gfs3_rename_rsp {
+ int op_ret;
+ int op_errno;
+ struct gf_iatt stat;
+ struct gf_iatt preoldparent;
+ struct gf_iatt postoldparent;
+ struct gf_iatt prenewparent;
+ struct gf_iatt postnewparent;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_rename_rsp gfs3_rename_rsp;
+
+struct gfs3_link_req {
+ char oldgfid[16];
+ char newgfid[16];
+ char *newbname;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_link_req gfs3_link_req;
+
+struct gfs3_link_rsp {
+ int op_ret;
+ int op_errno;
+ struct gf_iatt stat;
+ struct gf_iatt preparent;
+ struct gf_iatt postparent;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_link_rsp gfs3_link_rsp;
+
+struct gfs3_truncate_req {
+ char gfid[16];
+ u_quad_t offset;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_truncate_req gfs3_truncate_req;
+
+struct gfs3_truncate_rsp {
+ int op_ret;
+ int op_errno;
+ struct gf_iatt prestat;
+ struct gf_iatt poststat;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_truncate_rsp gfs3_truncate_rsp;
+
+struct gfs3_open_req {
+ char gfid[16];
+ u_int flags;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_open_req gfs3_open_req;
+
+struct gfs3_open_rsp {
+ int op_ret;
+ int op_errno;
+ quad_t fd;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_open_rsp gfs3_open_rsp;
+
+struct gfs3_read_req {
+ char gfid[16];
+ quad_t fd;
+ u_quad_t offset;
+ u_int size;
+ u_int flag;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_read_req gfs3_read_req;
+
+struct gfs3_read_rsp {
+ int op_ret;
+ int op_errno;
+ struct gf_iatt stat;
+ u_int size;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_read_rsp gfs3_read_rsp;
+
+struct gfs3_lookup_req {
+ char gfid[16];
+ char pargfid[16];
+ u_int flags;
+ char *bname;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_lookup_req gfs3_lookup_req;
+
+struct gfs3_lookup_rsp {
+ int op_ret;
+ int op_errno;
+ struct gf_iatt stat;
+ struct gf_iatt postparent;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_lookup_rsp gfs3_lookup_rsp;
+
+struct gfs3_write_req {
+ char gfid[16];
+ quad_t fd;
+ u_quad_t offset;
+ u_int size;
+ u_int flag;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_write_req gfs3_write_req;
+
+struct gfs3_write_rsp {
+ int op_ret;
+ int op_errno;
+ struct gf_iatt prestat;
+ struct gf_iatt poststat;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_write_rsp gfs3_write_rsp;
+
+struct gfs3_statfs_req {
+ char gfid[16];
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_statfs_req gfs3_statfs_req;
+
+struct gfs3_statfs_rsp {
+ int op_ret;
+ int op_errno;
+ struct gf_statfs statfs;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_statfs_rsp gfs3_statfs_rsp;
+
+struct gfs3_lk_req {
+ char gfid[16];
+ quad_t fd;
+ u_int cmd;
+ u_int type;
+ struct gf_proto_flock flock;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_lk_req gfs3_lk_req;
+
+struct gfs3_lk_rsp {
+ int op_ret;
+ int op_errno;
+ struct gf_proto_flock flock;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_lk_rsp gfs3_lk_rsp;
+
+struct gfs3_inodelk_req {
+ char gfid[16];
+ u_int cmd;
+ u_int type;
+ struct gf_proto_flock flock;
+ char *volume;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_inodelk_req gfs3_inodelk_req;
+
+struct gfs3_finodelk_req {
+ char gfid[16];
+ quad_t fd;
+ u_int cmd;
+ u_int type;
+ struct gf_proto_flock flock;
+ char *volume;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_finodelk_req gfs3_finodelk_req;
+
+struct gfs3_flush_req {
+ char gfid[16];
+ quad_t fd;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_flush_req gfs3_flush_req;
+
+struct gfs3_fsync_req {
+ char gfid[16];
+ quad_t fd;
+ u_int data;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_fsync_req gfs3_fsync_req;
+
+struct gfs3_fsync_rsp {
+ int op_ret;
+ int op_errno;
+ struct gf_iatt prestat;
+ struct gf_iatt poststat;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_fsync_rsp gfs3_fsync_rsp;
+
+struct gfs3_setxattr_req {
+ char gfid[16];
+ u_int flags;
+ struct {
+ u_int dict_len;
+ char *dict_val;
+ } dict;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_setxattr_req gfs3_setxattr_req;
+
+struct gfs3_fsetxattr_req {
+ char gfid[16];
+ quad_t fd;
+ u_int flags;
+ struct {
+ u_int dict_len;
+ char *dict_val;
+ } dict;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_fsetxattr_req gfs3_fsetxattr_req;
+
+struct gfs3_xattrop_req {
+ char gfid[16];
+ u_int flags;
+ struct {
+ u_int dict_len;
+ char *dict_val;
+ } dict;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_xattrop_req gfs3_xattrop_req;
+
+struct gfs3_xattrop_rsp {
+ int op_ret;
+ int op_errno;
+ struct {
+ u_int dict_len;
+ char *dict_val;
+ } dict;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_xattrop_rsp gfs3_xattrop_rsp;
+
+struct gfs3_fxattrop_req {
+ char gfid[16];
+ quad_t fd;
+ u_int flags;
+ struct {
+ u_int dict_len;
+ char *dict_val;
+ } dict;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_fxattrop_req gfs3_fxattrop_req;
+
+struct gfs3_fxattrop_rsp {
+ int op_ret;
+ int op_errno;
+ struct {
+ u_int dict_len;
+ char *dict_val;
+ } dict;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_fxattrop_rsp gfs3_fxattrop_rsp;
+
+struct gfs3_getxattr_req {
+ char gfid[16];
+ u_int namelen;
+ char *name;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_getxattr_req gfs3_getxattr_req;
+
+struct gfs3_getxattr_rsp {
+ int op_ret;
+ int op_errno;
+ struct {
+ u_int dict_len;
+ char *dict_val;
+ } dict;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_getxattr_rsp gfs3_getxattr_rsp;
+
+struct gfs3_fgetxattr_req {
+ char gfid[16];
+ quad_t fd;
+ u_int namelen;
+ char *name;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_fgetxattr_req gfs3_fgetxattr_req;
+
+struct gfs3_fgetxattr_rsp {
+ int op_ret;
+ int op_errno;
+ struct {
+ u_int dict_len;
+ char *dict_val;
+ } dict;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_fgetxattr_rsp gfs3_fgetxattr_rsp;
+
+struct gfs3_removexattr_req {
+ char gfid[16];
+ char *name;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_removexattr_req gfs3_removexattr_req;
+
+struct gfs3_fremovexattr_req {
+ char gfid[16];
+ quad_t fd;
+ char *name;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_fremovexattr_req gfs3_fremovexattr_req;
+
+struct gfs3_opendir_req {
+ char gfid[16];
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_opendir_req gfs3_opendir_req;
+
+struct gfs3_opendir_rsp {
+ int op_ret;
+ int op_errno;
+ quad_t fd;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_opendir_rsp gfs3_opendir_rsp;
+
+struct gfs3_fsyncdir_req {
+ char gfid[16];
+ quad_t fd;
+ int data;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_fsyncdir_req gfs3_fsyncdir_req;
+
+struct gfs3_readdir_req {
+ char gfid[16];
+ quad_t fd;
+ u_quad_t offset;
+ u_int size;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_readdir_req gfs3_readdir_req;
+
+struct gfs3_readdirp_req {
+ char gfid[16];
+ quad_t fd;
+ u_quad_t offset;
+ u_int size;
+ struct {
+ u_int dict_len;
+ char *dict_val;
+ } dict;
+};
+typedef struct gfs3_readdirp_req gfs3_readdirp_req;
+
+struct gfs3_access_req {
+ char gfid[16];
+ u_int mask;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_access_req gfs3_access_req;
+
+struct gfs3_create_req {
+ char pargfid[16];
+ u_int flags;
+ u_int mode;
+ u_int umask;
+ char *bname;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_create_req gfs3_create_req;
+
+struct gfs3_create_rsp {
+ int op_ret;
+ int op_errno;
+ struct gf_iatt stat;
+ u_quad_t fd;
+ struct gf_iatt preparent;
+ struct gf_iatt postparent;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_create_rsp gfs3_create_rsp;
+
+struct gfs3_ftruncate_req {
+ char gfid[16];
+ quad_t fd;
+ u_quad_t offset;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_ftruncate_req gfs3_ftruncate_req;
+
+struct gfs3_ftruncate_rsp {
+ int op_ret;
+ int op_errno;
+ struct gf_iatt prestat;
+ struct gf_iatt poststat;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_ftruncate_rsp gfs3_ftruncate_rsp;
+
+struct gfs3_fstat_req {
+ char gfid[16];
+ quad_t fd;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_fstat_req gfs3_fstat_req;
+
+struct gfs3_fstat_rsp {
+ int op_ret;
+ int op_errno;
+ struct gf_iatt stat;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_fstat_rsp gfs3_fstat_rsp;
+
+struct gfs3_entrylk_req {
+ char gfid[16];
+ u_int cmd;
+ u_int type;
+ u_quad_t namelen;
+ char *name;
+ char *volume;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_entrylk_req gfs3_entrylk_req;
+
+struct gfs3_fentrylk_req {
+ char gfid[16];
+ quad_t fd;
+ u_int cmd;
+ u_int type;
+ u_quad_t namelen;
+ char *name;
+ char *volume;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_fentrylk_req gfs3_fentrylk_req;
+
+struct gfs3_setattr_req {
+ char gfid[16];
+ struct gf_iatt stbuf;
+ int valid;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_setattr_req gfs3_setattr_req;
+
+struct gfs3_setattr_rsp {
+ int op_ret;
+ int op_errno;
+ struct gf_iatt statpre;
+ struct gf_iatt statpost;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_setattr_rsp gfs3_setattr_rsp;
+
+struct gfs3_fsetattr_req {
+ quad_t fd;
+ struct gf_iatt stbuf;
+ int valid;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_fsetattr_req gfs3_fsetattr_req;
+
+struct gfs3_fsetattr_rsp {
+ int op_ret;
+ int op_errno;
+ struct gf_iatt statpre;
+ struct gf_iatt statpost;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_fsetattr_rsp gfs3_fsetattr_rsp;
+
+struct gfs3_fallocate_req {
+ char gfid[16];
+ quad_t fd;
+ u_int flags;
+ u_quad_t offset;
+ u_quad_t size;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_fallocate_req gfs3_fallocate_req;
+
+struct gfs3_fallocate_rsp {
+ int op_ret;
+ int op_errno;
+ struct gf_iatt statpre;
+ struct gf_iatt statpost;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_fallocate_rsp gfs3_fallocate_rsp;
+
+struct gfs3_discard_req {
+ char gfid[16];
+ quad_t fd;
+ u_quad_t offset;
+ u_quad_t size;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_discard_req gfs3_discard_req;
+
+struct gfs3_discard_rsp {
+ int op_ret;
+ int op_errno;
+ struct gf_iatt statpre;
+ struct gf_iatt statpost;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_discard_rsp gfs3_discard_rsp;
+
+struct gfs3_zerofill_req {
+ char gfid[16];
+ quad_t fd;
+ u_quad_t offset;
+ u_quad_t size;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_zerofill_req gfs3_zerofill_req;
+
+struct gfs3_zerofill_rsp {
+ int op_ret;
+ int op_errno;
+ struct gf_iatt statpre;
+ struct gf_iatt statpost;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_zerofill_rsp gfs3_zerofill_rsp;
+
+
+struct gfs3_rchecksum_req {
+ quad_t fd;
+ u_quad_t offset;
+ u_int len;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_rchecksum_req gfs3_rchecksum_req;
+
+struct gfs3_rchecksum_rsp {
+ int op_ret;
+ int op_errno;
+ u_int weak_checksum;
+ struct {
+ u_int strong_checksum_len;
+ char *strong_checksum_val;
+ } strong_checksum;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_rchecksum_rsp gfs3_rchecksum_rsp;
+
+struct gf_setvolume_req {
+ struct {
+ u_int dict_len;
+ char *dict_val;
+ } dict;
+};
+typedef struct gf_setvolume_req gf_setvolume_req;
+
+struct gf_setvolume_rsp {
+ int op_ret;
+ int op_errno;
+ struct {
+ u_int dict_len;
+ char *dict_val;
+ } dict;
+};
+typedef struct gf_setvolume_rsp gf_setvolume_rsp;
+
+struct gf_getspec_req {
+ u_int flags;
+ char *key;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gf_getspec_req gf_getspec_req;
+
+struct gf_getspec_rsp {
+ int op_ret;
+ int op_errno;
+ char *spec;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gf_getspec_rsp gf_getspec_rsp;
+
+struct gf_mgmt_hndsk_req {
+ struct {
+ u_int hndsk_len;
+ char *hndsk_val;
+ } hndsk;
+};
+typedef struct gf_mgmt_hndsk_req gf_mgmt_hndsk_req;
+
+struct gf_mgmt_hndsk_rsp {
+ int op_ret;
+ int op_errno;
+ struct {
+ u_int hndsk_len;
+ char *hndsk_val;
+ } hndsk;
+};
+typedef struct gf_mgmt_hndsk_rsp gf_mgmt_hndsk_rsp;
+
+struct gf_log_req {
+ struct {
+ u_int msg_len;
+ char *msg_val;
+ } msg;
+};
+typedef struct gf_log_req gf_log_req;
+
+struct gf_notify_req {
+ u_int flags;
+ char *buf;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gf_notify_req gf_notify_req;
+
+struct gf_notify_rsp {
+ int op_ret;
+ int op_errno;
+ u_int flags;
+ char *buf;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gf_notify_rsp gf_notify_rsp;
+
+struct gfs3_releasedir_req {
+ char gfid[16];
+ quad_t fd;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_releasedir_req gfs3_releasedir_req;
+
+struct gfs3_release_req {
+ char gfid[16];
+ quad_t fd;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_release_req gfs3_release_req;
+
+struct gf_common_rsp {
+ int op_ret;
+ int op_errno;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gf_common_rsp gf_common_rsp;
+
+struct gfs3_dirlist {
+ u_quad_t d_ino;
+ u_quad_t d_off;
+ u_int d_len;
+ u_int d_type;
+ char *name;
+ struct gfs3_dirlist *nextentry;
+};
+typedef struct gfs3_dirlist gfs3_dirlist;
+
+struct gfs3_readdir_rsp {
+ int op_ret;
+ int op_errno;
+ struct gfs3_dirlist *reply;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_readdir_rsp gfs3_readdir_rsp;
+
+struct gfs3_dirplist {
+ u_quad_t d_ino;
+ u_quad_t d_off;
+ u_int d_len;
+ u_int d_type;
+ char *name;
+ struct gf_iatt stat;
+ struct {
+ u_int dict_len;
+ char *dict_val;
+ } dict;
+ struct gfs3_dirplist *nextentry;
+};
+typedef struct gfs3_dirplist gfs3_dirplist;
+
+struct gfs3_readdirp_rsp {
+ int op_ret;
+ int op_errno;
+ struct gfs3_dirplist *reply;
+ struct {
+ u_int xdata_len;
+ char *xdata_val;
+ } xdata;
+};
+typedef struct gfs3_readdirp_rsp gfs3_readdirp_rsp;
+
+struct gf_set_lk_ver_rsp {
+ int op_ret;
+ int op_errno;
+ int lk_ver;
+};
+typedef struct gf_set_lk_ver_rsp gf_set_lk_ver_rsp;
+
+struct gf_set_lk_ver_req {
+ char *uid;
+ int lk_ver;
+};
+typedef struct gf_set_lk_ver_req gf_set_lk_ver_req;
+
+struct gf_event_notify_req {
+ int op;
+ struct {
+ u_int dict_len;
+ char *dict_val;
+ } dict;
+};
+typedef struct gf_event_notify_req gf_event_notify_req;
+
+struct gf_event_notify_rsp {
+ int op_ret;
+ int op_errno;
+ struct {
+ u_int dict_len;
+ char *dict_val;
+ } dict;
+};
+typedef struct gf_event_notify_rsp gf_event_notify_rsp;
+
+/* the xdr functions */
+
+#if defined(__STDC__) || defined(__cplusplus)
+extern bool_t xdr_gf_statfs (XDR *, gf_statfs*);
+extern bool_t xdr_gf_proto_flock (XDR *, gf_proto_flock*);
+extern bool_t xdr_gf_iatt (XDR *, gf_iatt*);
+extern bool_t xdr_gfs3_stat_req (XDR *, gfs3_stat_req*);
+extern bool_t xdr_gfs3_stat_rsp (XDR *, gfs3_stat_rsp*);
+extern bool_t xdr_gfs3_readlink_req (XDR *, gfs3_readlink_req*);
+extern bool_t xdr_gfs3_readlink_rsp (XDR *, gfs3_readlink_rsp*);
+extern bool_t xdr_gfs3_mknod_req (XDR *, gfs3_mknod_req*);
+extern bool_t xdr_gfs3_mknod_rsp (XDR *, gfs3_mknod_rsp*);
+extern bool_t xdr_gfs3_mkdir_req (XDR *, gfs3_mkdir_req*);
+extern bool_t xdr_gfs3_mkdir_rsp (XDR *, gfs3_mkdir_rsp*);
+extern bool_t xdr_gfs3_unlink_req (XDR *, gfs3_unlink_req*);
+extern bool_t xdr_gfs3_unlink_rsp (XDR *, gfs3_unlink_rsp*);
+extern bool_t xdr_gfs3_rmdir_req (XDR *, gfs3_rmdir_req*);
+extern bool_t xdr_gfs3_rmdir_rsp (XDR *, gfs3_rmdir_rsp*);
+extern bool_t xdr_gfs3_symlink_req (XDR *, gfs3_symlink_req*);
+extern bool_t xdr_gfs3_symlink_rsp (XDR *, gfs3_symlink_rsp*);
+extern bool_t xdr_gfs3_rename_req (XDR *, gfs3_rename_req*);
+extern bool_t xdr_gfs3_rename_rsp (XDR *, gfs3_rename_rsp*);
+extern bool_t xdr_gfs3_link_req (XDR *, gfs3_link_req*);
+extern bool_t xdr_gfs3_link_rsp (XDR *, gfs3_link_rsp*);
+extern bool_t xdr_gfs3_truncate_req (XDR *, gfs3_truncate_req*);
+extern bool_t xdr_gfs3_truncate_rsp (XDR *, gfs3_truncate_rsp*);
+extern bool_t xdr_gfs3_open_req (XDR *, gfs3_open_req*);
+extern bool_t xdr_gfs3_open_rsp (XDR *, gfs3_open_rsp*);
+extern bool_t xdr_gfs3_read_req (XDR *, gfs3_read_req*);
+extern bool_t xdr_gfs3_read_rsp (XDR *, gfs3_read_rsp*);
+extern bool_t xdr_gfs3_lookup_req (XDR *, gfs3_lookup_req*);
+extern bool_t xdr_gfs3_lookup_rsp (XDR *, gfs3_lookup_rsp*);
+extern bool_t xdr_gfs3_write_req (XDR *, gfs3_write_req*);
+extern bool_t xdr_gfs3_write_rsp (XDR *, gfs3_write_rsp*);
+extern bool_t xdr_gfs3_statfs_req (XDR *, gfs3_statfs_req*);
+extern bool_t xdr_gfs3_statfs_rsp (XDR *, gfs3_statfs_rsp*);
+extern bool_t xdr_gfs3_lk_req (XDR *, gfs3_lk_req*);
+extern bool_t xdr_gfs3_lk_rsp (XDR *, gfs3_lk_rsp*);
+extern bool_t xdr_gfs3_inodelk_req (XDR *, gfs3_inodelk_req*);
+extern bool_t xdr_gfs3_finodelk_req (XDR *, gfs3_finodelk_req*);
+extern bool_t xdr_gfs3_flush_req (XDR *, gfs3_flush_req*);
+extern bool_t xdr_gfs3_fsync_req (XDR *, gfs3_fsync_req*);
+extern bool_t xdr_gfs3_fsync_rsp (XDR *, gfs3_fsync_rsp*);
+extern bool_t xdr_gfs3_setxattr_req (XDR *, gfs3_setxattr_req*);
+extern bool_t xdr_gfs3_fsetxattr_req (XDR *, gfs3_fsetxattr_req*);
+extern bool_t xdr_gfs3_xattrop_req (XDR *, gfs3_xattrop_req*);
+extern bool_t xdr_gfs3_xattrop_rsp (XDR *, gfs3_xattrop_rsp*);
+extern bool_t xdr_gfs3_fxattrop_req (XDR *, gfs3_fxattrop_req*);
+extern bool_t xdr_gfs3_fxattrop_rsp (XDR *, gfs3_fxattrop_rsp*);
+extern bool_t xdr_gfs3_getxattr_req (XDR *, gfs3_getxattr_req*);
+extern bool_t xdr_gfs3_getxattr_rsp (XDR *, gfs3_getxattr_rsp*);
+extern bool_t xdr_gfs3_fgetxattr_req (XDR *, gfs3_fgetxattr_req*);
+extern bool_t xdr_gfs3_fgetxattr_rsp (XDR *, gfs3_fgetxattr_rsp*);
+extern bool_t xdr_gfs3_removexattr_req (XDR *, gfs3_removexattr_req*);
+extern bool_t xdr_gfs3_fremovexattr_req (XDR *, gfs3_fremovexattr_req*);
+extern bool_t xdr_gfs3_opendir_req (XDR *, gfs3_opendir_req*);
+extern bool_t xdr_gfs3_opendir_rsp (XDR *, gfs3_opendir_rsp*);
+extern bool_t xdr_gfs3_fsyncdir_req (XDR *, gfs3_fsyncdir_req*);
+extern bool_t xdr_gfs3_readdir_req (XDR *, gfs3_readdir_req*);
+extern bool_t xdr_gfs3_readdirp_req (XDR *, gfs3_readdirp_req*);
+extern bool_t xdr_gfs3_access_req (XDR *, gfs3_access_req*);
+extern bool_t xdr_gfs3_create_req (XDR *, gfs3_create_req*);
+extern bool_t xdr_gfs3_create_rsp (XDR *, gfs3_create_rsp*);
+extern bool_t xdr_gfs3_ftruncate_req (XDR *, gfs3_ftruncate_req*);
+extern bool_t xdr_gfs3_ftruncate_rsp (XDR *, gfs3_ftruncate_rsp*);
+extern bool_t xdr_gfs3_fstat_req (XDR *, gfs3_fstat_req*);
+extern bool_t xdr_gfs3_fstat_rsp (XDR *, gfs3_fstat_rsp*);
+extern bool_t xdr_gfs3_entrylk_req (XDR *, gfs3_entrylk_req*);
+extern bool_t xdr_gfs3_fentrylk_req (XDR *, gfs3_fentrylk_req*);
+extern bool_t xdr_gfs3_setattr_req (XDR *, gfs3_setattr_req*);
+extern bool_t xdr_gfs3_setattr_rsp (XDR *, gfs3_setattr_rsp*);
+extern bool_t xdr_gfs3_fsetattr_req (XDR *, gfs3_fsetattr_req*);
+extern bool_t xdr_gfs3_fsetattr_rsp (XDR *, gfs3_fsetattr_rsp*);
+extern bool_t xdr_gfs3_fallocate_req (XDR *, gfs3_fallocate_req*);
+extern bool_t xdr_gfs3_fallocate_rsp (XDR *, gfs3_fallocate_rsp*);
+extern bool_t xdr_gfs3_discard_req (XDR *, gfs3_discard_req*);
+extern bool_t xdr_gfs3_discard_rsp (XDR *, gfs3_discard_rsp*);
+extern bool_t xdr_gfs3_zerofill_req (XDR *, gfs3_zerofill_req*);
+extern bool_t xdr_gfs3_zerofill_rsp (XDR *, gfs3_zerofill_rsp*);
+extern bool_t xdr_gfs3_rchecksum_req (XDR *, gfs3_rchecksum_req*);
+extern bool_t xdr_gfs3_rchecksum_rsp (XDR *, gfs3_rchecksum_rsp*);
+extern bool_t xdr_gf_setvolume_req (XDR *, gf_setvolume_req*);
+extern bool_t xdr_gf_setvolume_rsp (XDR *, gf_setvolume_rsp*);
+extern bool_t xdr_gf_getspec_req (XDR *, gf_getspec_req*);
+extern bool_t xdr_gf_getspec_rsp (XDR *, gf_getspec_rsp*);
+extern bool_t xdr_gf_mgmt_hndsk_req (XDR *, gf_mgmt_hndsk_req*);
+extern bool_t xdr_gf_mgmt_hndsk_rsp (XDR *, gf_mgmt_hndsk_rsp*);
+extern bool_t xdr_gf_log_req (XDR *, gf_log_req*);
+extern bool_t xdr_gf_notify_req (XDR *, gf_notify_req*);
+extern bool_t xdr_gf_notify_rsp (XDR *, gf_notify_rsp*);
+extern bool_t xdr_gfs3_releasedir_req (XDR *, gfs3_releasedir_req*);
+extern bool_t xdr_gfs3_release_req (XDR *, gfs3_release_req*);
+extern bool_t xdr_gf_common_rsp (XDR *, gf_common_rsp*);
+extern bool_t xdr_gfs3_dirlist (XDR *, gfs3_dirlist*);
+extern bool_t xdr_gfs3_readdir_rsp (XDR *, gfs3_readdir_rsp*);
+extern bool_t xdr_gfs3_dirplist (XDR *, gfs3_dirplist*);
+extern bool_t xdr_gfs3_readdirp_rsp (XDR *, gfs3_readdirp_rsp*);
+extern bool_t xdr_gf_set_lk_ver_rsp (XDR *, gf_set_lk_ver_rsp*);
+extern bool_t xdr_gf_set_lk_ver_req (XDR *, gf_set_lk_ver_req*);
+extern bool_t xdr_gf_event_notify_req (XDR *, gf_event_notify_req*);
+extern bool_t xdr_gf_event_notify_rsp (XDR *, gf_event_notify_rsp*);
+
+#else /* K&R C */
+extern bool_t xdr_gf_statfs ();
+extern bool_t xdr_gf_proto_flock ();
+extern bool_t xdr_gf_iatt ();
+extern bool_t xdr_gfs3_stat_req ();
+extern bool_t xdr_gfs3_stat_rsp ();
+extern bool_t xdr_gfs3_readlink_req ();
+extern bool_t xdr_gfs3_readlink_rsp ();
+extern bool_t xdr_gfs3_mknod_req ();
+extern bool_t xdr_gfs3_mknod_rsp ();
+extern bool_t xdr_gfs3_mkdir_req ();
+extern bool_t xdr_gfs3_mkdir_rsp ();
+extern bool_t xdr_gfs3_unlink_req ();
+extern bool_t xdr_gfs3_unlink_rsp ();
+extern bool_t xdr_gfs3_rmdir_req ();
+extern bool_t xdr_gfs3_rmdir_rsp ();
+extern bool_t xdr_gfs3_symlink_req ();
+extern bool_t xdr_gfs3_symlink_rsp ();
+extern bool_t xdr_gfs3_rename_req ();
+extern bool_t xdr_gfs3_rename_rsp ();
+extern bool_t xdr_gfs3_link_req ();
+extern bool_t xdr_gfs3_link_rsp ();
+extern bool_t xdr_gfs3_truncate_req ();
+extern bool_t xdr_gfs3_truncate_rsp ();
+extern bool_t xdr_gfs3_open_req ();
+extern bool_t xdr_gfs3_open_rsp ();
+extern bool_t xdr_gfs3_read_req ();
+extern bool_t xdr_gfs3_read_rsp ();
+extern bool_t xdr_gfs3_lookup_req ();
+extern bool_t xdr_gfs3_lookup_rsp ();
+extern bool_t xdr_gfs3_write_req ();
+extern bool_t xdr_gfs3_write_rsp ();
+extern bool_t xdr_gfs3_statfs_req ();
+extern bool_t xdr_gfs3_statfs_rsp ();
+extern bool_t xdr_gfs3_lk_req ();
+extern bool_t xdr_gfs3_lk_rsp ();
+extern bool_t xdr_gfs3_inodelk_req ();
+extern bool_t xdr_gfs3_finodelk_req ();
+extern bool_t xdr_gfs3_flush_req ();
+extern bool_t xdr_gfs3_fsync_req ();
+extern bool_t xdr_gfs3_fsync_rsp ();
+extern bool_t xdr_gfs3_setxattr_req ();
+extern bool_t xdr_gfs3_fsetxattr_req ();
+extern bool_t xdr_gfs3_xattrop_req ();
+extern bool_t xdr_gfs3_xattrop_rsp ();
+extern bool_t xdr_gfs3_fxattrop_req ();
+extern bool_t xdr_gfs3_fxattrop_rsp ();
+extern bool_t xdr_gfs3_getxattr_req ();
+extern bool_t xdr_gfs3_getxattr_rsp ();
+extern bool_t xdr_gfs3_fgetxattr_req ();
+extern bool_t xdr_gfs3_fgetxattr_rsp ();
+extern bool_t xdr_gfs3_removexattr_req ();
+extern bool_t xdr_gfs3_fremovexattr_req ();
+extern bool_t xdr_gfs3_opendir_req ();
+extern bool_t xdr_gfs3_opendir_rsp ();
+extern bool_t xdr_gfs3_fsyncdir_req ();
+extern bool_t xdr_gfs3_readdir_req ();
+extern bool_t xdr_gfs3_readdirp_req ();
+extern bool_t xdr_gfs3_access_req ();
+extern bool_t xdr_gfs3_create_req ();
+extern bool_t xdr_gfs3_create_rsp ();
+extern bool_t xdr_gfs3_ftruncate_req ();
+extern bool_t xdr_gfs3_ftruncate_rsp ();
+extern bool_t xdr_gfs3_fstat_req ();
+extern bool_t xdr_gfs3_fstat_rsp ();
+extern bool_t xdr_gfs3_entrylk_req ();
+extern bool_t xdr_gfs3_fentrylk_req ();
+extern bool_t xdr_gfs3_setattr_req ();
+extern bool_t xdr_gfs3_setattr_rsp ();
+extern bool_t xdr_gfs3_fsetattr_req ();
+extern bool_t xdr_gfs3_fsetattr_rsp ();
+extern bool_t xdr_gfs3_fallocate_req ();
+extern bool_t xdr_gfs3_fallocate_rsp ();
+extern bool_t xdr_gfs3_discard_req ();
+extern bool_t xdr_gfs3_discard_rsp ();
+extern bool_t xdr_gfs3_zerofill_req ();
+extern bool_t xdr_gfs3_zerofill_rsp ();
+extern bool_t xdr_gfs3_rchecksum_req ();
+extern bool_t xdr_gfs3_rchecksum_rsp ();
+extern bool_t xdr_gf_setvolume_req ();
+extern bool_t xdr_gf_setvolume_rsp ();
+extern bool_t xdr_gf_getspec_req ();
+extern bool_t xdr_gf_getspec_rsp ();
+extern bool_t xdr_gf_mgmt_hndsk_req ();
+extern bool_t xdr_gf_mgmt_hndsk_rsp ();
+extern bool_t xdr_gf_log_req ();
+extern bool_t xdr_gf_notify_req ();
+extern bool_t xdr_gf_notify_rsp ();
+extern bool_t xdr_gfs3_releasedir_req ();
+extern bool_t xdr_gfs3_release_req ();
+extern bool_t xdr_gf_common_rsp ();
+extern bool_t xdr_gfs3_dirlist ();
+extern bool_t xdr_gfs3_readdir_rsp ();
+extern bool_t xdr_gfs3_dirplist ();
+extern bool_t xdr_gfs3_readdirp_rsp ();
+extern bool_t xdr_gf_set_lk_ver_rsp ();
+extern bool_t xdr_gf_set_lk_ver_req ();
+extern bool_t xdr_gf_event_notify_req ();
+extern bool_t xdr_gf_event_notify_rsp ();
+
+#endif /* K&R C */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* !_GLUSTERFS3_XDR_H_RPCGEN */
diff --git a/rpc/xdr/src/glusterfs3-xdr.x b/rpc/xdr/src/glusterfs3-xdr.x
new file mode 100644
index 000000000..1edbda3ad
--- /dev/null
+++ b/rpc/xdr/src/glusterfs3-xdr.x
@@ -0,0 +1,756 @@
+#define GF_REQUEST_MAXGROUPS 16
+struct gf_statfs {
+ unsigned hyper bsize;
+ unsigned hyper frsize;
+ unsigned hyper blocks;
+ unsigned hyper bfree;
+ unsigned hyper bavail;
+ unsigned hyper files;
+ unsigned hyper ffree;
+ unsigned hyper favail;
+ unsigned hyper fsid;
+ unsigned hyper flag;
+ unsigned hyper namemax;
+};
+
+struct gf_proto_flock {
+ unsigned int type;
+ unsigned int whence;
+ unsigned hyper start;
+ unsigned hyper len;
+ unsigned int pid;
+ opaque lk_owner<>;
+} ;
+
+
+struct gf_iatt {
+ opaque ia_gfid[16];
+ unsigned hyper ia_ino; /* inode number */
+ unsigned hyper ia_dev; /* backing device ID */
+ unsigned int mode; /* mode (type + protection )*/
+ unsigned int ia_nlink; /* Link count */
+ unsigned int ia_uid; /* user ID of owner */
+ unsigned int ia_gid; /* group ID of owner */
+ unsigned hyper ia_rdev; /* device ID (if special file) */
+ unsigned hyper ia_size; /* file size in bytes */
+ unsigned int ia_blksize; /* blocksize for filesystem I/O */
+ unsigned hyper ia_blocks; /* number of 512B blocks allocated */
+ unsigned int ia_atime; /* last access time */
+ unsigned int ia_atime_nsec;
+ unsigned int ia_mtime; /* last modification time */
+ unsigned int ia_mtime_nsec;
+ unsigned int ia_ctime; /* last status change time */
+ unsigned int ia_ctime_nsec;
+};
+
+struct gfs3_stat_req {
+ opaque gfid[16];
+ opaque xdata<>; /* Extra data */
+};
+struct gfs3_stat_rsp {
+ int op_ret;
+ int op_errno;
+ struct gf_iatt stat;
+ opaque xdata<>; /* Extra data */
+} ;
+
+
+struct gfs3_readlink_req {
+ opaque gfid[16];
+ unsigned int size;
+ opaque xdata<>; /* Extra data */
+} ;
+ struct gfs3_readlink_rsp {
+ int op_ret;
+ int op_errno;
+ struct gf_iatt buf;
+ string path<>; /* NULL terminated */
+ opaque xdata<>; /* Extra data */
+} ;
+
+
+ struct gfs3_mknod_req {
+ opaque pargfid[16];
+ unsigned hyper dev;
+ unsigned int mode;
+ unsigned int umask;
+ string bname<>; /* NULL terminated */
+ opaque xdata<>; /* Extra data */
+} ;
+ struct gfs3_mknod_rsp {
+ int op_ret;
+ int op_errno;
+ struct gf_iatt stat;
+ struct gf_iatt preparent;
+ struct gf_iatt postparent;
+ opaque xdata<>; /* Extra data */
+};
+
+
+ struct gfs3_mkdir_req {
+ opaque pargfid[16];
+ unsigned int mode;
+ unsigned int umask;
+ string bname<>; /* NULL terminated */
+ opaque xdata<>; /* Extra data */
+} ;
+ struct gfs3_mkdir_rsp {
+ int op_ret;
+ int op_errno;
+ struct gf_iatt stat;
+ struct gf_iatt preparent;
+ struct gf_iatt postparent;
+ opaque xdata<>; /* Extra data */
+} ;
+
+
+ struct gfs3_unlink_req {
+ opaque pargfid[16];
+ string bname<>; /* NULL terminated */
+ unsigned int xflags;
+ opaque xdata<>; /* Extra data */
+};
+ struct gfs3_unlink_rsp {
+ int op_ret;
+ int op_errno;
+ struct gf_iatt preparent;
+ struct gf_iatt postparent;
+ opaque xdata<>; /* Extra data */
+};
+
+
+ struct gfs3_rmdir_req {
+ opaque pargfid[16];
+ int xflags;
+ string bname<>; /* NULL terminated */
+ opaque xdata<>; /* Extra data */
+};
+ struct gfs3_rmdir_rsp {
+ int op_ret;
+ int op_errno;
+ struct gf_iatt preparent;
+ struct gf_iatt postparent;
+ opaque xdata<>; /* Extra data */
+};
+
+
+ struct gfs3_symlink_req {
+ opaque pargfid[16];
+ string bname<>;
+ unsigned int umask;
+ string linkname<>;
+ opaque xdata<>; /* Extra data */
+};
+ struct gfs3_symlink_rsp {
+ int op_ret;
+ int op_errno;
+ struct gf_iatt stat;
+ struct gf_iatt preparent;
+ struct gf_iatt postparent;
+ opaque xdata<>; /* Extra data */
+};
+
+
+ struct gfs3_rename_req {
+ opaque oldgfid[16];
+ opaque newgfid[16];
+ string oldbname<>; /* NULL terminated */
+ string newbname<>; /* NULL terminated */
+ opaque xdata<>; /* Extra data */
+};
+ struct gfs3_rename_rsp {
+ int op_ret;
+ int op_errno;
+ struct gf_iatt stat;
+ struct gf_iatt preoldparent;
+ struct gf_iatt postoldparent;
+ struct gf_iatt prenewparent;
+ struct gf_iatt postnewparent;
+ opaque xdata<>; /* Extra data */
+};
+
+
+ struct gfs3_link_req {
+ opaque oldgfid[16];
+ opaque newgfid[16];
+ string newbname<>;
+ opaque xdata<>; /* Extra data */
+};
+ struct gfs3_link_rsp {
+ int op_ret;
+ int op_errno;
+ struct gf_iatt stat;
+ struct gf_iatt preparent;
+ struct gf_iatt postparent;
+ opaque xdata<>; /* Extra data */
+};
+
+ struct gfs3_truncate_req {
+ opaque gfid[16];
+ unsigned hyper offset;
+ opaque xdata<>; /* Extra data */
+};
+ struct gfs3_truncate_rsp {
+ int op_ret;
+ int op_errno;
+ struct gf_iatt prestat;
+ struct gf_iatt poststat;
+ opaque xdata<>; /* Extra data */
+};
+
+
+ struct gfs3_open_req {
+ opaque gfid[16];
+ unsigned int flags;
+ opaque xdata<>; /* Extra data */
+};
+ struct gfs3_open_rsp {
+ int op_ret;
+ int op_errno;
+ hyper fd;
+ opaque xdata<>; /* Extra data */
+};
+
+
+ struct gfs3_read_req {
+ opaque gfid[16];
+ hyper fd;
+ unsigned hyper offset;
+ unsigned int size;
+ unsigned int flag;
+ opaque xdata<>; /* Extra data */
+};
+ struct gfs3_read_rsp {
+ int op_ret;
+ int op_errno;
+ struct gf_iatt stat;
+ unsigned int size;
+ opaque xdata<>; /* Extra data */
+} ;
+
+struct gfs3_lookup_req {
+ opaque gfid[16];
+ opaque pargfid[16];
+ unsigned int flags;
+ string bname<>;
+ opaque xdata<>; /* Extra data */
+};
+ struct gfs3_lookup_rsp {
+ int op_ret;
+ int op_errno;
+ struct gf_iatt stat;
+ struct gf_iatt postparent;
+ opaque xdata<>; /* Extra data */
+} ;
+
+
+
+ struct gfs3_write_req {
+ opaque gfid[16];
+ hyper fd;
+ unsigned hyper offset;
+ unsigned int size;
+ unsigned int flag;
+ opaque xdata<>; /* Extra data */
+};
+ struct gfs3_write_rsp {
+ int op_ret;
+ int op_errno;
+ struct gf_iatt prestat;
+ struct gf_iatt poststat;
+ opaque xdata<>; /* Extra data */
+} ;
+
+
+ struct gfs3_statfs_req {
+ opaque gfid[16];
+ opaque xdata<>; /* Extra data */
+} ;
+ struct gfs3_statfs_rsp {
+ int op_ret;
+ int op_errno;
+ struct gf_statfs statfs;
+ opaque xdata<>; /* Extra data */
+} ;
+
+ struct gfs3_lk_req {
+ opaque gfid[16];
+ hyper fd;
+ unsigned int cmd;
+ unsigned int type;
+ struct gf_proto_flock flock;
+ opaque xdata<>; /* Extra data */
+} ;
+ struct gfs3_lk_rsp {
+ int op_ret;
+ int op_errno;
+ struct gf_proto_flock flock;
+ opaque xdata<>; /* Extra data */
+} ;
+
+ struct gfs3_inodelk_req {
+ opaque gfid[16];
+ unsigned int cmd;
+ unsigned int type;
+ struct gf_proto_flock flock;
+ string volume<>;
+ opaque xdata<>; /* Extra data */
+} ;
+
+struct gfs3_finodelk_req {
+ opaque gfid[16];
+ hyper fd;
+ unsigned int cmd;
+ unsigned int type;
+ struct gf_proto_flock flock;
+ string volume<>;
+ opaque xdata<>; /* Extra data */
+} ;
+
+
+ struct gfs3_flush_req {
+ opaque gfid[16];
+ hyper fd;
+ opaque xdata<>; /* Extra data */
+} ;
+
+
+ struct gfs3_fsync_req {
+ opaque gfid[16];
+ hyper fd;
+ unsigned int data;
+ opaque xdata<>; /* Extra data */
+} ;
+ struct gfs3_fsync_rsp {
+ int op_ret;
+ int op_errno;
+ struct gf_iatt prestat;
+ struct gf_iatt poststat;
+ opaque xdata<>; /* Extra data */
+} ;
+
+
+ struct gfs3_setxattr_req {
+ opaque gfid[16];
+ unsigned int flags;
+ opaque dict<>;
+ opaque xdata<>; /* Extra data */
+} ;
+
+
+
+ struct gfs3_fsetxattr_req {
+ opaque gfid[16];
+ hyper fd;
+ unsigned int flags;
+ opaque dict<>;
+ opaque xdata<>; /* Extra data */
+} ;
+
+
+
+ struct gfs3_xattrop_req {
+ opaque gfid[16];
+ unsigned int flags;
+ opaque dict<>;
+ opaque xdata<>; /* Extra data */
+} ;
+
+ struct gfs3_xattrop_rsp {
+ int op_ret;
+ int op_errno;
+ opaque dict<>;
+ opaque xdata<>; /* Extra data */
+} ;
+
+
+ struct gfs3_fxattrop_req {
+ opaque gfid[16];
+ hyper fd;
+ unsigned int flags;
+ opaque dict<>;
+ opaque xdata<>; /* Extra data */
+} ;
+
+ struct gfs3_fxattrop_rsp {
+ int op_ret;
+ int op_errno;
+ opaque dict<>;
+ opaque xdata<>; /* Extra data */
+} ;
+
+
+ struct gfs3_getxattr_req {
+ opaque gfid[16];
+ unsigned int namelen;
+ string name<>;
+ opaque xdata<>; /* Extra data */
+} ;
+ struct gfs3_getxattr_rsp {
+ int op_ret;
+ int op_errno;
+ opaque dict<>;
+ opaque xdata<>; /* Extra data */
+} ;
+
+
+ struct gfs3_fgetxattr_req {
+ opaque gfid[16];
+ hyper fd;
+ unsigned int namelen;
+ string name<>;
+ opaque xdata<>; /* Extra data */
+} ;
+ struct gfs3_fgetxattr_rsp {
+ int op_ret;
+ int op_errno;
+ opaque dict<>;
+ opaque xdata<>; /* Extra data */
+} ;
+
+
+ struct gfs3_removexattr_req {
+ opaque gfid[16];
+ string name<>;
+ opaque xdata<>; /* Extra data */
+} ;
+
+ struct gfs3_fremovexattr_req {
+ opaque gfid[16];
+ hyper fd;
+ string name<>;
+ opaque xdata<>; /* Extra data */
+} ;
+
+
+
+ struct gfs3_opendir_req {
+ opaque gfid[16];
+ opaque xdata<>; /* Extra data */
+} ;
+ struct gfs3_opendir_rsp {
+ int op_ret;
+ int op_errno;
+ hyper fd;
+ opaque xdata<>; /* Extra data */
+} ;
+
+
+ struct gfs3_fsyncdir_req {
+ opaque gfid[16];
+ hyper fd;
+ int data;
+ opaque xdata<>; /* Extra data */
+} ;
+
+ struct gfs3_readdir_req {
+ opaque gfid[16];
+ hyper fd;
+ unsigned hyper offset;
+ unsigned int size;
+ opaque xdata<>; /* Extra data */
+};
+
+ struct gfs3_readdirp_req {
+ opaque gfid[16];
+ hyper fd;
+ unsigned hyper offset;
+ unsigned int size;
+ opaque dict<>;
+} ;
+
+
+struct gfs3_access_req {
+ opaque gfid[16];
+ unsigned int mask;
+ opaque xdata<>; /* Extra data */
+} ;
+
+
+struct gfs3_create_req {
+ opaque pargfid[16];
+ unsigned int flags;
+ unsigned int mode;
+ unsigned int umask;
+ string bname<>;
+ opaque xdata<>; /* Extra data */
+} ;
+struct gfs3_create_rsp {
+ int op_ret;
+ int op_errno;
+ struct gf_iatt stat;
+ unsigned hyper fd;
+ struct gf_iatt preparent;
+ struct gf_iatt postparent;
+ opaque xdata<>; /* Extra data */
+} ;
+
+
+
+struct gfs3_ftruncate_req {
+ opaque gfid[16];
+ hyper fd;
+ unsigned hyper offset;
+ opaque xdata<>; /* Extra data */
+} ;
+struct gfs3_ftruncate_rsp {
+ int op_ret;
+ int op_errno;
+ struct gf_iatt prestat;
+ struct gf_iatt poststat;
+ opaque xdata<>; /* Extra data */
+} ;
+
+
+struct gfs3_fstat_req {
+ opaque gfid[16];
+ hyper fd;
+ opaque xdata<>; /* Extra data */
+} ;
+ struct gfs3_fstat_rsp {
+ int op_ret;
+ int op_errno;
+ struct gf_iatt stat;
+ opaque xdata<>; /* Extra data */
+} ;
+
+
+
+ struct gfs3_entrylk_req {
+ opaque gfid[16];
+ unsigned int cmd;
+ unsigned int type;
+ unsigned hyper namelen;
+ string name<>;
+ string volume<>;
+ opaque xdata<>; /* Extra data */
+};
+
+ struct gfs3_fentrylk_req {
+ opaque gfid[16];
+ hyper fd;
+ unsigned int cmd;
+ unsigned int type;
+ unsigned hyper namelen;
+ string name<>;
+ string volume<>;
+ opaque xdata<>; /* Extra data */
+};
+
+
+ struct gfs3_setattr_req {
+ opaque gfid[16];
+ struct gf_iatt stbuf;
+ int valid;
+ opaque xdata<>; /* Extra data */
+} ;
+ struct gfs3_setattr_rsp {
+ int op_ret;
+ int op_errno;
+ struct gf_iatt statpre;
+ struct gf_iatt statpost;
+ opaque xdata<>; /* Extra data */
+} ;
+
+ struct gfs3_fsetattr_req {
+ hyper fd;
+ struct gf_iatt stbuf;
+ int valid;
+ opaque xdata<>; /* Extra data */
+} ;
+ struct gfs3_fsetattr_rsp {
+ int op_ret;
+ int op_errno;
+ struct gf_iatt statpre;
+ struct gf_iatt statpost;
+ opaque xdata<>; /* Extra data */
+} ;
+
+ struct gfs3_fallocate_req {
+ opaque gfid[16];
+ hyper fd;
+ unsigned int flags;
+ unsigned hyper offset;
+ unsigned hyper size;
+ opaque xdata<>; /* Extra data */
+} ;
+
+ struct gfs3_fallocate_rsp {
+ int op_ret;
+ int op_errno;
+ struct gf_iatt statpre;
+ struct gf_iatt statpost;
+ opaque xdata<>; /* Extra data */
+} ;
+
+ struct gfs3_discard_req {
+ opaque gfid[16];
+ hyper fd;
+ unsigned hyper offset;
+ unsigned hyper size;
+ opaque xdata<>; /* Extra data */
+} ;
+
+ struct gfs3_discard_rsp {
+ int op_ret;
+ int op_errno;
+ struct gf_iatt statpre;
+ struct gf_iatt statpost;
+ opaque xdata<>; /* Extra data */
+} ;
+
+ struct gfs3_zerofill_req {
+ opaque gfid[16];
+ hyper fd;
+ unsigned hyper offset;
+ unsigned hyper size;
+ opaque xdata<>;
+} ;
+
+ struct gfs3_zerofill_rsp {
+ int op_ret;
+ int op_errno;
+ struct gf_iatt statpre;
+ struct gf_iatt statpost;
+ opaque xdata<>;
+} ;
+
+
+ struct gfs3_rchecksum_req {
+ hyper fd;
+ unsigned hyper offset;
+ unsigned int len;
+ opaque xdata<>; /* Extra data */
+} ;
+ struct gfs3_rchecksum_rsp {
+ int op_ret;
+ int op_errno;
+ unsigned int weak_checksum;
+ opaque strong_checksum<>;
+ opaque xdata<>; /* Extra data */
+} ;
+
+
+ struct gf_setvolume_req {
+ opaque dict<>;
+} ;
+ struct gf_setvolume_rsp {
+ int op_ret;
+ int op_errno;
+ opaque dict<>;
+} ;
+
+
+ struct gf_getspec_req {
+ unsigned int flags;
+ string key<>;
+ opaque xdata<>; /* Extra data */
+} ;
+ struct gf_getspec_rsp {
+ int op_ret;
+ int op_errno;
+ string spec<>;
+ opaque xdata<>; /* Extra data */
+} ;
+
+ struct gf_mgmt_hndsk_req {
+ opaque hndsk<>;
+} ;
+
+ struct gf_mgmt_hndsk_rsp {
+ int op_ret;
+ int op_errno;
+ opaque hndsk<>;
+} ;
+
+ struct gf_log_req {
+ opaque msg<>;
+} ;
+
+ struct gf_notify_req {
+ unsigned int flags;
+ string buf<>;
+ opaque xdata<>; /* Extra data */
+} ;
+ struct gf_notify_rsp {
+ int op_ret;
+ int op_errno;
+ unsigned int flags;
+ string buf<>;
+ opaque xdata<>; /* Extra data */
+} ;
+
+struct gfs3_releasedir_req {
+ opaque gfid[16];
+ hyper fd;
+ opaque xdata<>; /* Extra data */
+} ;
+
+struct gfs3_release_req {
+ opaque gfid[16];
+ hyper fd;
+ opaque xdata<>; /* Extra data */
+} ;
+
+struct gf_common_rsp {
+ int op_ret;
+ int op_errno;
+ opaque xdata<>; /* Extra data */
+} ;
+
+struct gfs3_dirlist {
+ unsigned hyper d_ino;
+ unsigned hyper d_off;
+ unsigned int d_len;
+ unsigned int d_type;
+ string name<>;
+ struct gfs3_dirlist *nextentry;
+};
+
+
+struct gfs3_readdir_rsp {
+ int op_ret;
+ int op_errno;
+ struct gfs3_dirlist *reply;
+ opaque xdata<>; /* Extra data */
+};
+
+struct gfs3_dirplist {
+ unsigned hyper d_ino;
+ unsigned hyper d_off;
+ unsigned int d_len;
+ unsigned int d_type;
+ string name<>;
+ struct gf_iatt stat;
+ opaque dict<>;
+ struct gfs3_dirplist *nextentry;
+};
+
+struct gfs3_readdirp_rsp {
+ int op_ret;
+ int op_errno;
+ struct gfs3_dirplist *reply;
+ opaque xdata<>; /* Extra data */
+};
+
+struct gf_set_lk_ver_rsp {
+ int op_ret;
+ int op_errno;
+ int lk_ver;
+};
+
+struct gf_set_lk_ver_req {
+ string uid<>;
+ int lk_ver;
+};
+
+struct gf_event_notify_req {
+ int op;
+ opaque dict<>;
+};
+
+struct gf_event_notify_rsp {
+ int op_ret;
+ int op_errno;
+ opaque dict<>;
+};
diff --git a/rpc/xdr/src/glusterfs3.h b/rpc/xdr/src/glusterfs3.h
new file mode 100644
index 000000000..798413e31
--- /dev/null
+++ b/rpc/xdr/src/glusterfs3.h
@@ -0,0 +1,270 @@
+/*
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLUSTERFS3_H
+#define _GLUSTERFS3_H
+
+#include <sys/uio.h>
+
+#include "xdr-generic.h"
+#include "glusterfs3-xdr.h"
+#include "iatt.h"
+
+#define xdr_decoded_remaining_addr(xdr) ((&xdr)->x_private)
+#define xdr_decoded_remaining_len(xdr) ((&xdr)->x_handy)
+#define xdr_encoded_length(xdr) (((size_t)(&xdr)->x_private) - ((size_t)(&xdr)->x_base))
+#define xdr_decoded_length(xdr) (((size_t)(&xdr)->x_private) - ((size_t)(&xdr)->x_base))
+
+
+#define GF_O_ACCMODE 003
+#define GF_O_RDONLY 00
+#define GF_O_WRONLY 01
+#define GF_O_RDWR 02
+#define GF_O_CREAT 0100
+#define GF_O_EXCL 0200
+#define GF_O_NOCTTY 0400
+#define GF_O_TRUNC 01000
+#define GF_O_APPEND 02000
+#define GF_O_NONBLOCK 04000
+#define GF_O_SYNC 010000
+#define GF_O_ASYNC 020000
+
+#define GF_O_DIRECT 040000
+#define GF_O_DIRECTORY 0200000
+#define GF_O_NOFOLLOW 0400000
+#define GF_O_NOATIME 01000000
+#define GF_O_CLOEXEC 02000000
+
+#define GF_O_LARGEFILE 0100000
+
+#define GF_O_FMODE_EXEC 040
+
+#define XLATE_BIT(from, to, bit) do { \
+ if (from & bit) \
+ to = to | GF_##bit; \
+ } while (0)
+
+#define UNXLATE_BIT(from, to, bit) do { \
+ if (from & GF_##bit) \
+ to = to | bit; \
+ } while (0)
+
+#define XLATE_ACCESSMODE(from, to) do { \
+ switch (from & O_ACCMODE) { \
+ case O_RDONLY: to |= GF_O_RDONLY; \
+ break; \
+ case O_WRONLY: to |= GF_O_WRONLY; \
+ break; \
+ case O_RDWR: to |= GF_O_RDWR; \
+ break; \
+ } \
+ } while (0)
+
+#define UNXLATE_ACCESSMODE(from, to) do { \
+ switch (from & GF_O_ACCMODE) { \
+ case GF_O_RDONLY: to |= O_RDONLY; \
+ break; \
+ case GF_O_WRONLY: to |= O_WRONLY; \
+ break; \
+ case GF_O_RDWR: to |= O_RDWR; \
+ break; \
+ } \
+ } while (0)
+
+static inline uint32_t
+gf_flags_from_flags (uint32_t flags)
+{
+ uint32_t gf_flags = 0;
+
+ XLATE_ACCESSMODE (flags, gf_flags);
+
+ XLATE_BIT (flags, gf_flags, O_CREAT);
+ XLATE_BIT (flags, gf_flags, O_EXCL);
+ XLATE_BIT (flags, gf_flags, O_NOCTTY);
+ XLATE_BIT (flags, gf_flags, O_TRUNC);
+ XLATE_BIT (flags, gf_flags, O_APPEND);
+ XLATE_BIT (flags, gf_flags, O_NONBLOCK);
+ XLATE_BIT (flags, gf_flags, O_SYNC);
+ XLATE_BIT (flags, gf_flags, O_ASYNC);
+
+ XLATE_BIT (flags, gf_flags, O_DIRECT);
+ XLATE_BIT (flags, gf_flags, O_DIRECTORY);
+ XLATE_BIT (flags, gf_flags, O_NOFOLLOW);
+#ifdef O_NOATIME
+ XLATE_BIT (flags, gf_flags, O_NOATIME);
+#endif
+#ifdef O_CLOEXEC
+ XLATE_BIT (flags, gf_flags, O_CLOEXEC);
+#endif
+ XLATE_BIT (flags, gf_flags, O_LARGEFILE);
+ XLATE_BIT (flags, gf_flags, O_FMODE_EXEC);
+
+ return gf_flags;
+}
+
+static inline uint32_t
+gf_flags_to_flags (uint32_t gf_flags)
+{
+ uint32_t flags = 0;
+
+ UNXLATE_ACCESSMODE (gf_flags, flags);
+
+ UNXLATE_BIT (gf_flags, flags, O_CREAT);
+ UNXLATE_BIT (gf_flags, flags, O_EXCL);
+ UNXLATE_BIT (gf_flags, flags, O_NOCTTY);
+ UNXLATE_BIT (gf_flags, flags, O_TRUNC);
+ UNXLATE_BIT (gf_flags, flags, O_APPEND);
+ UNXLATE_BIT (gf_flags, flags, O_NONBLOCK);
+ UNXLATE_BIT (gf_flags, flags, O_SYNC);
+ UNXLATE_BIT (gf_flags, flags, O_ASYNC);
+
+ UNXLATE_BIT (gf_flags, flags, O_DIRECT);
+ UNXLATE_BIT (gf_flags, flags, O_DIRECTORY);
+ UNXLATE_BIT (gf_flags, flags, O_NOFOLLOW);
+#ifdef O_NOATIME
+ UNXLATE_BIT (gf_flags, flags, O_NOATIME);
+#endif
+#ifdef O_CLOEXEC
+ UNXLATE_BIT (gf_flags, flags, O_CLOEXEC);
+#endif
+ UNXLATE_BIT (gf_flags, flags, O_LARGEFILE);
+ UNXLATE_BIT (gf_flags, flags, O_FMODE_EXEC);
+
+ return flags;
+}
+
+
+static inline void
+gf_statfs_to_statfs (struct gf_statfs *gf_stat, struct statvfs *stat)
+{
+ if (!stat || !gf_stat)
+ return;
+
+ stat->f_bsize = (gf_stat->bsize);
+ stat->f_frsize = (gf_stat->frsize);
+ stat->f_blocks = (gf_stat->blocks);
+ stat->f_bfree = (gf_stat->bfree);
+ stat->f_bavail = (gf_stat->bavail);
+ stat->f_files = (gf_stat->files);
+ stat->f_ffree = (gf_stat->ffree);
+ stat->f_favail = (gf_stat->favail);
+ stat->f_fsid = (gf_stat->fsid);
+ stat->f_flag = (gf_stat->flag);
+ stat->f_namemax = (gf_stat->namemax);
+}
+
+
+static inline void
+gf_statfs_from_statfs (struct gf_statfs *gf_stat, struct statvfs *stat)
+{
+ if (!stat || !gf_stat)
+ return;
+
+ gf_stat->bsize = stat->f_bsize;
+ gf_stat->frsize = stat->f_frsize;
+ gf_stat->blocks = stat->f_blocks;
+ gf_stat->bfree = stat->f_bfree;
+ gf_stat->bavail = stat->f_bavail;
+ gf_stat->files = stat->f_files;
+ gf_stat->ffree = stat->f_ffree;
+ gf_stat->favail = stat->f_favail;
+ gf_stat->fsid = stat->f_fsid;
+ gf_stat->flag = stat->f_flag;
+ gf_stat->namemax = stat->f_namemax;
+}
+
+static inline void
+gf_proto_flock_to_flock (struct gf_proto_flock *gf_proto_flock, struct gf_flock *gf_flock)
+{
+ if (!gf_flock || !gf_proto_flock)
+ return;
+
+ gf_flock->l_type = gf_proto_flock->type;
+ gf_flock->l_whence = gf_proto_flock->whence;
+ gf_flock->l_start = gf_proto_flock->start;
+ gf_flock->l_len = gf_proto_flock->len;
+ gf_flock->l_pid = gf_proto_flock->pid;
+ gf_flock->l_owner.len = gf_proto_flock->lk_owner.lk_owner_len;
+ if (gf_flock->l_owner.len &&
+ (gf_flock->l_owner.len < GF_MAX_LOCK_OWNER_LEN))
+ memcpy (gf_flock->l_owner.data, gf_proto_flock->lk_owner.lk_owner_val,
+ gf_flock->l_owner.len);
+}
+
+
+static inline void
+gf_proto_flock_from_flock (struct gf_proto_flock *gf_proto_flock, struct gf_flock *gf_flock)
+{
+ if (!gf_flock || !gf_proto_flock)
+ return;
+
+ gf_proto_flock->type = (gf_flock->l_type);
+ gf_proto_flock->whence = (gf_flock->l_whence);
+ gf_proto_flock->start = (gf_flock->l_start);
+ gf_proto_flock->len = (gf_flock->l_len);
+ gf_proto_flock->pid = (gf_flock->l_pid);
+ gf_proto_flock->lk_owner.lk_owner_len = gf_flock->l_owner.len;
+ if (gf_flock->l_owner.len)
+ gf_proto_flock->lk_owner.lk_owner_val = gf_flock->l_owner.data;
+}
+
+static inline void
+gf_stat_to_iatt (struct gf_iatt *gf_stat, struct iatt *iatt)
+{
+ if (!iatt || !gf_stat)
+ return;
+
+ memcpy (iatt->ia_gfid, gf_stat->ia_gfid, 16);
+ iatt->ia_ino = gf_stat->ia_ino ;
+ iatt->ia_dev = gf_stat->ia_dev ;
+ iatt->ia_type = ia_type_from_st_mode (gf_stat->mode) ;
+ iatt->ia_prot = ia_prot_from_st_mode (gf_stat->mode) ;
+ iatt->ia_nlink = gf_stat->ia_nlink ;
+ iatt->ia_uid = gf_stat->ia_uid ;
+ iatt->ia_gid = gf_stat->ia_gid ;
+ iatt->ia_rdev = gf_stat->ia_rdev ;
+ iatt->ia_size = gf_stat->ia_size ;
+ iatt->ia_blksize = gf_stat->ia_blksize ;
+ iatt->ia_blocks = gf_stat->ia_blocks ;
+ iatt->ia_atime = gf_stat->ia_atime ;
+ iatt->ia_atime_nsec = gf_stat->ia_atime_nsec ;
+ iatt->ia_mtime = gf_stat->ia_mtime ;
+ iatt->ia_mtime_nsec = gf_stat->ia_mtime_nsec ;
+ iatt->ia_ctime = gf_stat->ia_ctime ;
+ iatt->ia_ctime_nsec = gf_stat->ia_ctime_nsec ;
+}
+
+
+static inline void
+gf_stat_from_iatt (struct gf_iatt *gf_stat, struct iatt *iatt)
+{
+ if (!iatt || !gf_stat)
+ return;
+
+ memcpy (gf_stat->ia_gfid, iatt->ia_gfid, 16);
+ gf_stat->ia_ino = iatt->ia_ino ;
+ gf_stat->ia_dev = iatt->ia_dev ;
+ gf_stat->mode = st_mode_from_ia (iatt->ia_prot, iatt->ia_type);
+ gf_stat->ia_nlink = iatt->ia_nlink ;
+ gf_stat->ia_uid = iatt->ia_uid ;
+ gf_stat->ia_gid = iatt->ia_gid ;
+ gf_stat->ia_rdev = iatt->ia_rdev ;
+ gf_stat->ia_size = iatt->ia_size ;
+ gf_stat->ia_blksize = iatt->ia_blksize ;
+ gf_stat->ia_blocks = iatt->ia_blocks ;
+ gf_stat->ia_atime = iatt->ia_atime ;
+ gf_stat->ia_atime_nsec = iatt->ia_atime_nsec ;
+ gf_stat->ia_mtime = iatt->ia_mtime ;
+ gf_stat->ia_mtime_nsec = iatt->ia_mtime_nsec ;
+ gf_stat->ia_ctime = iatt->ia_ctime ;
+ gf_stat->ia_ctime_nsec = iatt->ia_ctime_nsec ;
+}
+
+#endif /* !_GLUSTERFS3_H */
diff --git a/rpc/xdr/src/mount3udp.x b/rpc/xdr/src/mount3udp.x
new file mode 100644
index 000000000..888c53120
--- /dev/null
+++ b/rpc/xdr/src/mount3udp.x
@@ -0,0 +1,25 @@
+/*
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/* This is used by rpcgen to auto generate the rpc stubs.
+ * mount3udp_svc.c is heavily modified though
+ */
+
+const MNTUDPPATHLEN = 1024;
+
+typedef string mntudpdirpath<MNTPATHLEN>;
+
+program MOUNTUDP_PROGRAM {
+ version MOUNTUDP_V3 {
+ void MOUNTUDPPROC3_NULL(void) = 0;
+ mountres3 MOUNTUDPPROC3_MNT (mntudpdirpath) = 1;
+ mountstat3 MOUNTUDPPROC3_UMNT (mntudpdirpath) = 3;
+ } = 3;
+} = 100005;
diff --git a/rpc/xdr/src/msg-nfs3.c b/rpc/xdr/src/msg-nfs3.c
new file mode 100644
index 000000000..6cdb5d37e
--- /dev/null
+++ b/rpc/xdr/src/msg-nfs3.c
@@ -0,0 +1,572 @@
+/*
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/uio.h>
+#include <rpc/rpc.h>
+#include <rpc/xdr.h>
+#include <sys/types.h>
+
+#include "xdr-nfs3.h"
+#include "msg-nfs3.h"
+#include "xdr-generic.h"
+#include "xdr-common.h"
+
+
+/* Decode the mount path from the network message in inmsg
+ * into the memory referenced by outpath.iov_base.
+ * The size allocated for outpath.iov_base is outpath.iov_len.
+ * The size of the path extracted from the message is returned.
+ */
+ssize_t
+xdr_to_mountpath (struct iovec outpath, struct iovec inmsg)
+{
+ XDR xdr;
+ ssize_t ret = -1;
+ char *mntpath = NULL;
+
+ if ((!outpath.iov_base) || (!inmsg.iov_base))
+ return -1;
+
+ xdrmem_create (&xdr, inmsg.iov_base, (unsigned int)inmsg.iov_len,
+ XDR_DECODE);
+
+ mntpath = outpath.iov_base;
+ if (!xdr_dirpath (&xdr, (dirpath *)&mntpath)) {
+ ret = -1;
+ goto ret;
+ }
+
+ ret = xdr_decoded_length (xdr);
+
+ret:
+ return ret;
+}
+
+/* Translate the mountres3 structure in res into XDR format into memory
+ * referenced by outmsg.iov_base.
+ * Returns the number of bytes used in encoding into XDR format.
+ */
+ssize_t
+xdr_serialize_mountres3 (struct iovec outmsg, mountres3 *res)
+{
+ return xdr_serialize_generic (outmsg, (void *)res,
+ (xdrproc_t)xdr_mountres3);
+}
+
+
+ssize_t
+xdr_serialize_mountbody (struct iovec outmsg, mountbody *mb)
+{
+ return xdr_serialize_generic (outmsg, (void *)mb,
+ (xdrproc_t)xdr_mountbody);
+}
+
+ssize_t
+xdr_serialize_mountlist (struct iovec outmsg, mountlist *ml)
+{
+ return xdr_serialize_generic (outmsg, (void *)ml,
+ (xdrproc_t)xdr_mountlist);
+}
+
+
+ssize_t
+xdr_serialize_mountstat3 (struct iovec outmsg, mountstat3 *m)
+{
+ return xdr_serialize_generic (outmsg, (void *)m,
+ (xdrproc_t)xdr_mountstat3);
+}
+
+
+ssize_t
+xdr_to_getattr3args (struct iovec inmsg, getattr3args *ga)
+{
+ return xdr_to_generic (inmsg, (void *)ga,
+ (xdrproc_t)xdr_getattr3args);
+}
+
+
+ssize_t
+xdr_serialize_getattr3res (struct iovec outmsg, getattr3res *res)
+{
+ return xdr_serialize_generic (outmsg, (void *)res,
+ (xdrproc_t)xdr_getattr3res);
+}
+
+
+ssize_t
+xdr_serialize_setattr3res (struct iovec outmsg, setattr3res *res)
+{
+ return xdr_serialize_generic (outmsg, (void *)res,
+ (xdrproc_t)xdr_setattr3res);
+}
+
+
+ssize_t
+xdr_to_setattr3args (struct iovec inmsg, setattr3args *sa)
+{
+ return xdr_to_generic (inmsg, (void *)sa,
+ (xdrproc_t)xdr_setattr3args);
+}
+
+
+ssize_t
+xdr_serialize_lookup3res (struct iovec outmsg, lookup3res *res)
+{
+ return xdr_serialize_generic (outmsg, (void *)res,
+ (xdrproc_t)xdr_lookup3res);
+}
+
+
+ssize_t
+xdr_to_lookup3args (struct iovec inmsg, lookup3args *la)
+{
+ return xdr_to_generic (inmsg, (void *)la,
+ (xdrproc_t)xdr_lookup3args);
+}
+
+
+ssize_t
+xdr_to_access3args (struct iovec inmsg, access3args *ac)
+{
+ return xdr_to_generic (inmsg,(void *)ac,
+ (xdrproc_t)xdr_access3args);
+}
+
+
+ssize_t
+xdr_serialize_access3res (struct iovec outmsg, access3res *res)
+{
+ return xdr_serialize_generic (outmsg, (void *)res,
+ (xdrproc_t)xdr_access3res);
+}
+
+
+ssize_t
+xdr_to_readlink3args (struct iovec inmsg, readlink3args *ra)
+{
+ return xdr_to_generic (inmsg, (void *)ra,
+ (xdrproc_t)xdr_readlink3args);
+}
+
+
+ssize_t
+xdr_serialize_readlink3res (struct iovec outmsg, readlink3res *res)
+{
+ return xdr_serialize_generic (outmsg, (void *)res,
+ (xdrproc_t)xdr_readlink3res);
+}
+
+
+ssize_t
+xdr_to_read3args (struct iovec inmsg, read3args *ra)
+{
+ return xdr_to_generic (inmsg, (void *)ra, (xdrproc_t)xdr_read3args);
+}
+
+
+ssize_t
+xdr_serialize_read3res (struct iovec outmsg, read3res *res)
+{
+ return xdr_serialize_generic (outmsg, (void *)res,
+ (xdrproc_t)xdr_read3res);
+}
+
+ssize_t
+xdr_serialize_read3res_nocopy (struct iovec outmsg, read3res *res)
+{
+ return xdr_serialize_generic (outmsg, (void *)res,
+ (xdrproc_t)xdr_read3res_nocopy);
+}
+
+
+ssize_t
+xdr_to_write3args (struct iovec inmsg, write3args *wa)
+{
+ return xdr_to_generic (inmsg, (void *)wa,(xdrproc_t)xdr_write3args);
+}
+
+
+ssize_t
+xdr_to_write3args_nocopy (struct iovec inmsg, write3args *wa,
+ struct iovec *payload)
+{
+ return xdr_to_generic_payload (inmsg, (void *)wa,
+ (xdrproc_t)xdr_write3args, payload);
+}
+
+
+ssize_t
+xdr_serialize_write3res (struct iovec outmsg, write3res *res)
+{
+ return xdr_serialize_generic (outmsg, (void *)res,
+ (xdrproc_t)xdr_write3res);
+}
+
+
+ssize_t
+xdr_to_create3args (struct iovec inmsg, create3args *ca)
+{
+ return xdr_to_generic (inmsg, (void *)ca,
+ (xdrproc_t)xdr_create3args);
+}
+
+
+ssize_t
+xdr_serialize_create3res (struct iovec outmsg, create3res *res)
+{
+ return xdr_serialize_generic (outmsg, (void *)res,
+ (xdrproc_t)xdr_create3res);
+}
+
+
+ssize_t
+xdr_serialize_mkdir3res (struct iovec outmsg, mkdir3res *res)
+{
+ return xdr_serialize_generic (outmsg, (void *)res,
+ (xdrproc_t)xdr_mkdir3res);
+}
+
+
+ssize_t
+xdr_to_mkdir3args (struct iovec inmsg, mkdir3args *ma)
+{
+ return xdr_to_generic (inmsg, (void *)ma,
+ (xdrproc_t)xdr_mkdir3args);
+}
+
+
+ssize_t
+xdr_to_symlink3args (struct iovec inmsg, symlink3args *sa)
+{
+ return xdr_to_generic (inmsg, (void *)sa,
+ (xdrproc_t)xdr_symlink3args);
+}
+
+
+ssize_t
+xdr_serialize_symlink3res (struct iovec outmsg, symlink3res *res)
+{
+ return xdr_serialize_generic (outmsg, (void *)res,
+ (xdrproc_t)xdr_symlink3res);
+}
+
+
+ssize_t
+xdr_to_mknod3args (struct iovec inmsg, mknod3args *ma)
+{
+ return xdr_to_generic (inmsg, (void *)ma,
+ (xdrproc_t)xdr_mknod3args);
+}
+
+
+ssize_t
+xdr_serialize_mknod3res (struct iovec outmsg, mknod3res *res)
+{
+ return xdr_serialize_generic (outmsg, (void *)res,
+ (xdrproc_t)xdr_mknod3res);
+}
+
+
+ssize_t
+xdr_to_remove3args (struct iovec inmsg, remove3args *ra)
+{
+ return xdr_to_generic (inmsg, (void *)ra,
+ (xdrproc_t)xdr_remove3args);
+}
+
+
+ssize_t
+xdr_serialize_remove3res (struct iovec outmsg, remove3res *res)
+{
+ return xdr_serialize_generic (outmsg, (void *)res,
+ (xdrproc_t)xdr_remove3res);
+}
+
+
+ssize_t
+xdr_to_rmdir3args (struct iovec inmsg, rmdir3args *ra)
+{
+ return xdr_to_generic (inmsg, (void *)ra,
+ (xdrproc_t)xdr_rmdir3args);
+}
+
+
+ssize_t
+xdr_serialize_rmdir3res (struct iovec outmsg, rmdir3res *res)
+{
+ return xdr_serialize_generic (outmsg, (void *)res,
+ (xdrproc_t)xdr_rmdir3res);
+}
+
+
+ssize_t
+xdr_serialize_rename3res (struct iovec outmsg, rename3res *res)
+{
+ return xdr_serialize_generic (outmsg, (void *)res,
+ (xdrproc_t)xdr_rename3res);
+}
+
+
+ssize_t
+xdr_to_rename3args (struct iovec inmsg, rename3args *ra)
+{
+ return xdr_to_generic (inmsg, (void *)ra,
+ (xdrproc_t)xdr_rename3args);
+}
+
+
+ssize_t
+xdr_serialize_link3res (struct iovec outmsg, link3res *li)
+{
+ return xdr_serialize_generic (outmsg, (void *)li,
+ (xdrproc_t)xdr_link3res);
+}
+
+
+ssize_t
+xdr_to_link3args (struct iovec inmsg, link3args *la)
+{
+ return xdr_to_generic (inmsg, (void *)la, (xdrproc_t)xdr_link3args);
+}
+
+
+ssize_t
+xdr_to_readdir3args (struct iovec inmsg, readdir3args *rd)
+{
+ return xdr_to_generic (inmsg, (void *)rd,
+ (xdrproc_t)xdr_readdir3args);
+}
+
+
+ssize_t
+xdr_serialize_readdir3res (struct iovec outmsg, readdir3res *res)
+{
+ return xdr_serialize_generic (outmsg, (void *)res,
+ (xdrproc_t)xdr_readdir3res);
+}
+
+
+ssize_t
+xdr_to_readdirp3args (struct iovec inmsg, readdirp3args *rp)
+{
+ return xdr_to_generic (inmsg, (void *)rp,
+ (xdrproc_t)xdr_readdirp3args);
+}
+
+
+ssize_t
+xdr_serialize_readdirp3res (struct iovec outmsg, readdirp3res *res)
+{
+ return xdr_serialize_generic (outmsg, (void *)res,
+ (xdrproc_t)xdr_readdirp3res);
+}
+
+
+ssize_t
+xdr_to_fsstat3args (struct iovec inmsg, fsstat3args *fa)
+{
+ return xdr_to_generic (inmsg, (void *)fa,
+ (xdrproc_t)xdr_fsstat3args);
+}
+
+
+ssize_t
+xdr_serialize_fsstat3res (struct iovec outmsg, fsstat3res *res)
+{
+ return xdr_serialize_generic (outmsg, (void *)res,
+ (xdrproc_t)xdr_fsstat3res);
+}
+
+ssize_t
+xdr_to_fsinfo3args (struct iovec inmsg, fsinfo3args *fi)
+{
+ return xdr_to_generic (inmsg, (void *)fi,
+ (xdrproc_t)xdr_fsinfo3args);
+}
+
+
+ssize_t
+xdr_serialize_fsinfo3res (struct iovec outmsg, fsinfo3res *res)
+{
+ return xdr_serialize_generic (outmsg, (void *)res,
+ (xdrproc_t)xdr_fsinfo3res);
+}
+
+
+ssize_t
+xdr_to_pathconf3args (struct iovec inmsg, pathconf3args *pc)
+{
+ return xdr_to_generic (inmsg, (void *)pc,
+ (xdrproc_t)xdr_pathconf3args);}
+
+
+ssize_t
+xdr_serialize_pathconf3res (struct iovec outmsg, pathconf3res *res)
+{
+ return xdr_serialize_generic (outmsg, (void *)res,
+ (xdrproc_t)xdr_pathconf3res);
+}
+
+
+ssize_t
+xdr_to_commit3args (struct iovec inmsg, commit3args *ca)
+{
+ return xdr_to_generic (inmsg, (void *)ca,
+ (xdrproc_t)xdr_commit3args);
+}
+
+
+ssize_t
+xdr_serialize_commit3res (struct iovec outmsg, commit3res *res)
+{
+ return xdr_serialize_generic (outmsg, (void *)res,
+ (xdrproc_t)xdr_commit3res);
+}
+
+
+ssize_t
+xdr_serialize_exports (struct iovec outmsg, exports *elist)
+{
+ XDR xdr;
+ ssize_t ret = -1;
+
+ if ((!outmsg.iov_base) || (!elist))
+ return -1;
+
+ xdrmem_create (&xdr, outmsg.iov_base, (unsigned int)outmsg.iov_len,
+ XDR_ENCODE);
+
+ if (!xdr_exports (&xdr, elist))
+ goto ret;
+
+ ret = xdr_decoded_length (xdr);
+
+ret:
+ return ret;
+}
+
+
+ssize_t
+xdr_serialize_nfsstat3 (struct iovec outmsg, nfsstat3 *s)
+{
+ return xdr_serialize_generic (outmsg, (void *)s,
+ (xdrproc_t)xdr_nfsstat3);
+}
+
+ssize_t
+xdr_to_nlm4_testargs (struct iovec inmsg, nlm4_testargs *args)
+{
+ return xdr_to_generic (inmsg, (void*)args,
+ (xdrproc_t)xdr_nlm4_testargs);
+}
+
+ssize_t
+xdr_serialize_nlm4_testres (struct iovec outmsg, nlm4_testres *res)
+{
+ return xdr_serialize_generic (outmsg, (void*)res,
+ (xdrproc_t)xdr_nlm4_testres);
+}
+
+ssize_t
+xdr_to_nlm4_lockargs (struct iovec inmsg, nlm4_lockargs *args)
+{
+ return xdr_to_generic (inmsg, (void*)args,
+ (xdrproc_t)xdr_nlm4_lockargs);
+}
+
+ssize_t
+xdr_serialize_nlm4_res (struct iovec outmsg, nlm4_res *res)
+{
+ return xdr_serialize_generic (outmsg, (void*)res,
+ (xdrproc_t)xdr_nlm4_res);
+}
+
+ssize_t
+xdr_to_nlm4_cancelargs (struct iovec inmsg, nlm4_cancargs *args)
+{
+ return xdr_to_generic (inmsg, (void*)args,
+ (xdrproc_t)xdr_nlm4_cancargs);
+}
+
+ssize_t
+xdr_to_nlm4_unlockargs (struct iovec inmsg, nlm4_unlockargs *args)
+{
+ return xdr_to_generic (inmsg, (void*)args,
+ (xdrproc_t)xdr_nlm4_unlockargs);
+}
+
+ssize_t
+xdr_to_nlm4_shareargs (struct iovec inmsg, nlm4_shareargs *args)
+{
+ return xdr_to_generic (inmsg, (void*)args,
+ (xdrproc_t)xdr_nlm4_shareargs);
+}
+
+ssize_t
+xdr_serialize_nlm4_shareres (struct iovec outmsg, nlm4_shareres *res)
+{
+ return xdr_serialize_generic (outmsg, (void *)res,
+ (xdrproc_t)xdr_nlm4_shareres);
+}
+
+ssize_t
+xdr_serialize_nlm4_testargs (struct iovec outmsg, nlm4_testargs *args)
+{
+ return xdr_serialize_generic (outmsg, (void*)args,
+ (xdrproc_t)xdr_nlm4_testargs);
+}
+
+ssize_t
+xdr_to_nlm4_res (struct iovec inmsg, nlm4_res *args)
+{
+ return xdr_to_generic (inmsg, (void*)args,
+ (xdrproc_t)xdr_nlm4_res);
+}
+
+ssize_t
+xdr_to_nlm4_freeallargs (struct iovec inmsg, nlm4_freeallargs *args)
+{
+ return xdr_to_generic (inmsg, (void*)args,
+ (xdrproc_t)xdr_nlm4_freeallargs);
+}
+
+ssize_t
+xdr_to_getaclargs (struct iovec inmsg, getaclargs *args)
+{
+ return xdr_to_generic (inmsg, (void *) args,
+ (xdrproc_t)xdr_getaclargs);
+}
+
+ssize_t
+xdr_to_setaclargs (struct iovec inmsg, setaclargs *args)
+{
+ return xdr_to_generic (inmsg, (void *) args,
+ (xdrproc_t)xdr_setaclargs);
+}
+
+ssize_t
+xdr_serialize_getaclreply (struct iovec inmsg, getaclreply *res)
+{
+ return xdr_serialize_generic (inmsg, (void *) res,
+ (xdrproc_t)xdr_getaclreply);
+}
+
+ssize_t
+xdr_serialize_setaclreply (struct iovec inmsg, setaclreply *res)
+{
+ return xdr_serialize_generic (inmsg, (void *) res,
+ (xdrproc_t)xdr_setaclreply);
+}
+
diff --git a/rpc/xdr/src/msg-nfs3.h b/rpc/xdr/src/msg-nfs3.h
new file mode 100644
index 000000000..b8e2c9694
--- /dev/null
+++ b/rpc/xdr/src/msg-nfs3.h
@@ -0,0 +1,224 @@
+/*
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _MSG_NFS3_H_
+#define _MSG_NFS3_H_
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xdr-nfs3.h"
+#include "nlm4-xdr.h"
+#include "acl3-xdr.h"
+#include <sys/types.h>
+#include <sys/uio.h>
+
+extern ssize_t
+xdr_to_mountpath (struct iovec outpath, struct iovec inmsg);
+
+extern ssize_t
+xdr_serialize_mountres3 (struct iovec outmsg, mountres3 *res);
+
+extern ssize_t
+xdr_serialize_mountbody (struct iovec outmsg, mountbody *mb);
+
+extern ssize_t
+xdr_to_getattr3args (struct iovec inmsg, getattr3args *ga);
+
+extern ssize_t
+xdr_serialize_getattr3res (struct iovec outmsg, getattr3res *res);
+
+extern ssize_t
+xdr_serialize_setattr3res (struct iovec outmsg, setattr3res *res);
+
+extern ssize_t
+xdr_to_setattr3args (struct iovec inmsg, setattr3args *sa);
+
+extern ssize_t
+xdr_serialize_lookup3res (struct iovec outmsg, lookup3res *res);
+
+extern ssize_t
+xdr_to_lookup3args (struct iovec inmsg, lookup3args *la);
+
+extern ssize_t
+xdr_to_access3args (struct iovec inmsg, access3args *ac);
+
+extern ssize_t
+xdr_serialize_access3res (struct iovec outmsg, access3res *res);
+
+extern ssize_t
+xdr_to_readlink3args (struct iovec inmsg, readlink3args *ra);
+
+extern ssize_t
+xdr_serialize_readlink3res (struct iovec outmsg, readlink3res *res);
+
+extern ssize_t
+xdr_to_read3args (struct iovec inmsg, read3args *ra);
+
+extern ssize_t
+xdr_serialize_read3res (struct iovec outmsg, read3res *res);
+
+extern ssize_t
+xdr_serialize_read3res_nocopy (struct iovec outmsg, read3res *res);
+
+extern ssize_t
+xdr_to_write3args (struct iovec inmsg, write3args *wa);
+
+extern ssize_t
+xdr_to_write3args_nocopy (struct iovec inmsg, write3args *wa,
+ struct iovec *payload);
+
+extern ssize_t
+xdr_serialize_write3res (struct iovec outmsg, write3res *res);
+
+extern ssize_t
+xdr_to_create3args (struct iovec inmsg, create3args *ca);
+
+extern ssize_t
+xdr_serialize_create3res (struct iovec outmsg, create3res *res);
+
+extern ssize_t
+xdr_serialize_mkdir3res (struct iovec outmsg, mkdir3res *res);
+
+extern ssize_t
+xdr_to_mkdir3args (struct iovec inmsg, mkdir3args *ma);
+
+extern ssize_t
+xdr_to_symlink3args (struct iovec inmsg, symlink3args *sa);
+
+extern ssize_t
+xdr_serialize_symlink3res (struct iovec outmsg, symlink3res *res);
+
+extern ssize_t
+xdr_to_mknod3args (struct iovec inmsg, mknod3args *ma);
+
+extern ssize_t
+xdr_serialize_mknod3res (struct iovec outmsg, mknod3res *res);
+
+extern ssize_t
+xdr_to_remove3args (struct iovec inmsg, remove3args *ra);
+
+extern ssize_t
+xdr_serialize_remove3res (struct iovec outmsg, remove3res *res);
+
+extern ssize_t
+xdr_to_rmdir3args (struct iovec inmsg, rmdir3args *ra);
+
+extern ssize_t
+xdr_serialize_rmdir3res (struct iovec outmsg, rmdir3res *res);
+
+extern ssize_t
+xdr_serialize_rename3res (struct iovec outmsg, rename3res *res);
+
+extern ssize_t
+xdr_to_rename3args (struct iovec inmsg, rename3args *ra);
+
+extern ssize_t
+xdr_serialize_link3res (struct iovec outmsg, link3res *li);
+
+extern ssize_t
+xdr_to_link3args (struct iovec inmsg, link3args *la);
+
+extern ssize_t
+xdr_to_readdir3args (struct iovec inmsg, readdir3args *rd);
+
+extern ssize_t
+xdr_serialize_readdir3res (struct iovec outmsg, readdir3res *res);
+
+extern ssize_t
+xdr_to_readdirp3args (struct iovec inmsg, readdirp3args *rp);
+
+extern ssize_t
+xdr_serialize_readdirp3res (struct iovec outmsg, readdirp3res *res);
+
+extern ssize_t
+xdr_to_fsstat3args (struct iovec inmsg, fsstat3args *fa);
+
+extern ssize_t
+xdr_serialize_fsstat3res (struct iovec outmsg, fsstat3res *res);
+
+extern ssize_t
+xdr_to_fsinfo3args (struct iovec inmsg, fsinfo3args *fi);
+
+extern ssize_t
+xdr_serialize_fsinfo3res (struct iovec outmsg, fsinfo3res *res);
+
+extern ssize_t
+xdr_to_pathconf3args (struct iovec inmsg, pathconf3args *pc);
+
+extern ssize_t
+xdr_serialize_pathconf3res (struct iovec outmsg, pathconf3res *res);
+
+extern ssize_t
+xdr_to_commit3args (struct iovec inmsg, commit3args *ca);
+
+extern ssize_t
+xdr_serialize_commit3res (struct iovec outmsg, commit3res *res);
+
+extern ssize_t
+xdr_serialize_exports (struct iovec outmsg, exports *elist);
+
+extern ssize_t
+xdr_serialize_mountlist (struct iovec outmsg, mountlist *ml);
+
+extern ssize_t
+xdr_serialize_mountstat3 (struct iovec outmsg, mountstat3 *m);
+
+extern ssize_t
+xdr_serialize_nfsstat3 (struct iovec outmsg, nfsstat3 *s);
+
+extern ssize_t
+xdr_to_nlm4_testargs (struct iovec inmsg, nlm4_testargs *args);
+
+extern ssize_t
+xdr_serialize_nlm4_testres (struct iovec outmsg, nlm4_testres *res);
+
+extern ssize_t
+xdr_to_nlm4_lockargs (struct iovec inmsg, nlm4_lockargs *args);
+
+extern ssize_t
+xdr_serialize_nlm4_res (struct iovec outmsg, nlm4_res *res);
+
+extern ssize_t
+xdr_to_nlm4_cancelargs (struct iovec inmsg, nlm4_cancargs *args);
+
+extern ssize_t
+xdr_to_nlm4_unlockargs (struct iovec inmsg, nlm4_unlockargs *args);
+
+extern ssize_t
+xdr_to_nlm4_shareargs (struct iovec inmsg, nlm4_shareargs *args);
+
+extern ssize_t
+xdr_serialize_nlm4_shareres (struct iovec outmsg, nlm4_shareres *res);
+
+extern ssize_t
+xdr_serialize_nlm4_testargs (struct iovec outmsg, nlm4_testargs *args);
+
+extern ssize_t
+xdr_to_nlm4_res (struct iovec inmsg, nlm4_res *args);
+
+extern ssize_t
+xdr_to_nlm4_freeallargs (struct iovec inmsg, nlm4_freeallargs *args);
+
+extern ssize_t
+xdr_to_getaclargs (struct iovec inmsg, getaclargs *args);
+
+extern ssize_t
+xdr_to_setaclargs (struct iovec inmsg, setaclargs *args);
+
+extern ssize_t
+xdr_serialize_getaclreply (struct iovec inmsg, getaclreply *res);
+
+extern ssize_t
+xdr_serialize_setaclreply (struct iovec inmsg, setaclreply *res);
+
+#endif
diff --git a/rpc/xdr/src/nlm4-xdr.c b/rpc/xdr/src/nlm4-xdr.c
new file mode 100644
index 000000000..caba05f58
--- /dev/null
+++ b/rpc/xdr/src/nlm4-xdr.c
@@ -0,0 +1,245 @@
+/*
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/*
+ * Please do not edit this file.
+ * It was generated using rpcgen.
+ */
+
+#include "nlm4-xdr.h"
+
+bool_t
+xdr_netobj (XDR *xdrs, netobj *objp)
+{
+ if (!xdr_bytes (xdrs, (char **)&objp->n_bytes, (u_int *) &objp->n_len, MAXNETOBJ_SZ))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_fsh_mode (XDR *xdrs, fsh_mode *objp)
+{
+ if (!xdr_enum (xdrs, (enum_t *) objp))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_fsh_access (XDR *xdrs, fsh_access *objp)
+{
+ if (!xdr_enum (xdrs, (enum_t *) objp))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_nlm4_stats (XDR *xdrs, nlm4_stats *objp)
+{
+ if (!xdr_enum (xdrs, (enum_t *) objp))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_nlm4_stat (XDR *xdrs, nlm4_stat *objp)
+{
+ if (!xdr_nlm4_stats (xdrs, &objp->stat))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_nlm4_holder (XDR *xdrs, nlm4_holder *objp)
+{
+ if (!xdr_bool (xdrs, &objp->exclusive))
+ return FALSE;
+ if (!xdr_uint32_t (xdrs, &objp->svid))
+ return FALSE;
+ if (!xdr_netobj (xdrs, &objp->oh))
+ return FALSE;
+ if (!xdr_uint64_t (xdrs, &objp->l_offset))
+ return FALSE;
+ if (!xdr_uint64_t (xdrs, &objp->l_len))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_nlm4_lock (XDR *xdrs, nlm4_lock *objp)
+{
+ if (!xdr_string (xdrs, &objp->caller_name, MAXNAMELEN))
+ return FALSE;
+ if (!xdr_netobj (xdrs, &objp->fh))
+ return FALSE;
+ if (!xdr_netobj (xdrs, &objp->oh))
+ return FALSE;
+ if (!xdr_uint32_t (xdrs, &objp->svid))
+ return FALSE;
+ if (!xdr_uint64_t (xdrs, &objp->l_offset))
+ return FALSE;
+ if (!xdr_uint64_t (xdrs, &objp->l_len))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_nlm4_share (XDR *xdrs, nlm4_share *objp)
+{
+ if (!xdr_string (xdrs, &objp->caller_name, MAXNAMELEN))
+ return FALSE;
+ if (!xdr_netobj (xdrs, &objp->fh))
+ return FALSE;
+ if (!xdr_netobj (xdrs, &objp->oh))
+ return FALSE;
+ if (!xdr_fsh_mode (xdrs, &objp->mode))
+ return FALSE;
+ if (!xdr_fsh_access (xdrs, &objp->access))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_nlm4_testrply (XDR *xdrs, nlm4_testrply *objp)
+{
+ if (!xdr_nlm4_stats (xdrs, &objp->stat))
+ return FALSE;
+ switch (objp->stat) {
+ case nlm4_denied:
+ if (!xdr_nlm4_holder (xdrs, &objp->nlm4_testrply_u.holder))
+ return FALSE;
+ break;
+ default:
+ break;
+ }
+ return TRUE;
+}
+
+bool_t
+xdr_nlm4_testres (XDR *xdrs, nlm4_testres *objp)
+{
+ if (!xdr_netobj (xdrs, &objp->cookie))
+ return FALSE;
+ if (!xdr_nlm4_testrply (xdrs, &objp->stat))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_nlm4_testargs (XDR *xdrs, nlm4_testargs *objp)
+{
+ if (!xdr_netobj (xdrs, &objp->cookie))
+ return FALSE;
+ if (!xdr_bool (xdrs, &objp->exclusive))
+ return FALSE;
+ if (!xdr_nlm4_lock (xdrs, &objp->alock))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_nlm4_res (XDR *xdrs, nlm4_res *objp)
+{
+ if (!xdr_netobj (xdrs, &objp->cookie))
+ return FALSE;
+ if (!xdr_nlm4_stat (xdrs, &objp->stat))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_nlm4_lockargs (XDR *xdrs, nlm4_lockargs *objp)
+{
+ if (!xdr_netobj (xdrs, &objp->cookie))
+ return FALSE;
+ if (!xdr_bool (xdrs, &objp->block))
+ return FALSE;
+ if (!xdr_bool (xdrs, &objp->exclusive))
+ return FALSE;
+ if (!xdr_nlm4_lock (xdrs, &objp->alock))
+ return FALSE;
+ if (!xdr_bool (xdrs, &objp->reclaim))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->state))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_nlm4_cancargs (XDR *xdrs, nlm4_cancargs *objp)
+{
+ if (!xdr_netobj (xdrs, &objp->cookie))
+ return FALSE;
+ if (!xdr_bool (xdrs, &objp->block))
+ return FALSE;
+ if (!xdr_bool (xdrs, &objp->exclusive))
+ return FALSE;
+ if (!xdr_nlm4_lock (xdrs, &objp->alock))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_nlm4_unlockargs (XDR *xdrs, nlm4_unlockargs *objp)
+{
+ if (!xdr_netobj (xdrs, &objp->cookie))
+ return FALSE;
+ if (!xdr_nlm4_lock (xdrs, &objp->alock))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_nlm4_shareargs (XDR *xdrs, nlm4_shareargs *objp)
+{
+ if (!xdr_netobj (xdrs, &objp->cookie))
+ return FALSE;
+ if (!xdr_nlm4_share (xdrs, &objp->share))
+ return FALSE;
+ if (!xdr_bool (xdrs, &objp->reclaim))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_nlm4_shareres (XDR *xdrs, nlm4_shareres *objp)
+{
+ if (!xdr_netobj (xdrs, &objp->cookie))
+ return FALSE;
+ if (!xdr_nlm4_stats (xdrs, &objp->stat))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->sequence))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_nlm4_freeallargs (XDR *xdrs, nlm4_freeallargs *objp)
+{
+ if (!xdr_string (xdrs, &objp->name, LM_MAXSTRLEN))
+ return FALSE;
+ if (!xdr_uint32_t (xdrs, &objp->state))
+ return FALSE;
+ return TRUE;
+}
+
+
+/*
+bool_t
+xdr_nlm_sm_status (XDR *xdrs, nlm_sm_status *objp)
+{
+ if (!xdr_string (xdrs, &objp->mon_name, LM_MAXSTRLEN))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->state))
+ return FALSE;
+ if (!xdr_opaque (xdrs, objp->priv, 16))
+ return FALSE;
+ return TRUE;
+}
+*/
diff --git a/rpc/xdr/src/nlm4-xdr.h b/rpc/xdr/src/nlm4-xdr.h
new file mode 100644
index 000000000..4391a4790
--- /dev/null
+++ b/rpc/xdr/src/nlm4-xdr.h
@@ -0,0 +1,258 @@
+/*
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/*
+ * Please do not edit this file.
+ * It was generated using rpcgen.
+ */
+
+#ifndef _NLM_H_RPCGEN
+#define _NLM_H_RPCGEN
+
+#include <rpc/rpc.h>
+
+#if defined(__NetBSD__)
+#define xdr_u_quad_t xdr_u_int64_t
+#define xdr_quad_t xdr_int64_t
+#define xdr_uint32_t xdr_u_int32_t
+#define xdr_uint64_t xdr_u_int64_t
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MAXNETOBJ_SZ 1024
+#define LM_MAXSTRLEN 1024
+#define MAXNAMELEN 1025
+
+#if defined(__NetBSD__)
+#define xdr_u_quad_t xdr_u_int64_t
+#define xdr_quad_t xdr_int64_t
+#define xdr_uint32_t xdr_u_int32_t
+#define xdr_uint64_t xdr_u_int64_t
+#endif
+
+/*
+ * The following enums are actually bit encoded for efficient
+ * boolean algebra.... DON'T change them.....
+ */
+
+enum fsh_mode {
+ fsm_DN = 0,
+ fsm_DR = 1,
+ fsm_DW = 2,
+ fsm_DRW = 3,
+};
+typedef enum fsh_mode fsh_mode;
+
+enum fsh_access {
+ fsa_NONE = 0,
+ fsa_R = 1,
+ fsa_W = 2,
+ fsa_RW = 3,
+};
+typedef enum fsh_access fsh_access;
+/* definitions for NLM version 4 */
+
+enum nlm4_stats {
+ nlm4_granted = 0,
+ nlm4_denied = 1,
+ nlm4_denied_nolock = 2,
+ nlm4_blocked = 3,
+ nlm4_denied_grace_period = 4,
+ nlm4_deadlck = 5,
+ nlm4_rofs = 6,
+ nlm4_stale_fh = 7,
+ nlm4_fbig = 8,
+ nlm4_failed = 9,
+};
+typedef enum nlm4_stats nlm4_stats;
+
+struct nlm4_stat {
+ nlm4_stats stat;
+};
+typedef struct nlm4_stat nlm4_stat;
+
+struct nlm4_holder {
+ bool_t exclusive;
+ u_int32_t svid;
+ netobj oh;
+ u_int64_t l_offset;
+ u_int64_t l_len;
+};
+typedef struct nlm4_holder nlm4_holder;
+
+struct nlm4_lock {
+ char *caller_name;
+ netobj fh;
+ netobj oh;
+ u_int32_t svid;
+ u_int64_t l_offset;
+ u_int64_t l_len;
+};
+typedef struct nlm4_lock nlm4_lock;
+
+struct nlm4_share {
+ char *caller_name;
+ netobj fh;
+ netobj oh;
+ fsh_mode mode;
+ fsh_access access;
+};
+typedef struct nlm4_share nlm4_share;
+
+struct nlm4_testrply {
+ nlm4_stats stat;
+ union {
+ struct nlm4_holder holder;
+ } nlm4_testrply_u;
+};
+typedef struct nlm4_testrply nlm4_testrply;
+
+struct nlm4_testres {
+ netobj cookie;
+ nlm4_testrply stat;
+};
+typedef struct nlm4_testres nlm4_testres;
+
+struct nlm4_testargs {
+ netobj cookie;
+ bool_t exclusive;
+ struct nlm4_lock alock;
+};
+typedef struct nlm4_testargs nlm4_testargs;
+
+struct nlm4_res {
+ netobj cookie;
+ nlm4_stat stat;
+};
+typedef struct nlm4_res nlm4_res;
+
+struct nlm4_lockargs {
+ netobj cookie;
+ bool_t block;
+ bool_t exclusive;
+ struct nlm4_lock alock;
+ bool_t reclaim;
+ int state;
+};
+typedef struct nlm4_lockargs nlm4_lockargs;
+
+struct nlm4_cancargs {
+ netobj cookie;
+ bool_t block;
+ bool_t exclusive;
+ struct nlm4_lock alock;
+};
+typedef struct nlm4_cancargs nlm4_cancargs;
+
+struct nlm4_unlockargs {
+ netobj cookie;
+ struct nlm4_lock alock;
+};
+typedef struct nlm4_unlockargs nlm4_unlockargs;
+
+struct nlm4_shareargs {
+ netobj cookie;
+ nlm4_share share;
+ bool_t reclaim;
+};
+typedef struct nlm4_shareargs nlm4_shareargs;
+
+struct nlm4_shareres {
+ netobj cookie;
+ nlm4_stats stat;
+ int sequence;
+};
+typedef struct nlm4_shareres nlm4_shareres;
+
+struct nlm4_freeallargs {
+ char *name;
+ u_int32_t state;
+};
+typedef struct nlm4_freeallargs nlm4_freeallargs;
+
+
+#define NLM4_NULL 0
+#define NLM4_TEST 1
+#define NLM4_LOCK 2
+#define NLM4_CANCEL 3
+#define NLM4_UNLOCK 4
+#define NLM4_GRANTED 5
+#define NLM4_TEST_MSG 6
+#define NLM4_LOCK_MSG 7
+#define NLM4_CANCEL_MSG 8
+#define NLM4_UNLOCK_MSG 9
+#define NLM4_GRANTED_MSG 10
+#define NLM4_TEST_RES 11
+#define NLM4_LOCK_RES 12
+#define NLM4_CANCEL_RES 13
+#define NLM4_UNLOCK_RES 14
+#define NLM4_GRANTED_RES 15
+#define NLM4_SM_NOTIFY 16
+#define NLM4_SEVENTEEN 17
+#define NLM4_EIGHTEEN 18
+#define NLM4_NINETEEN 19
+#define NLM4_SHARE 20
+#define NLM4_UNSHARE 21
+#define NLM4_NM_LOCK 22
+#define NLM4_FREE_ALL 23
+#define NLM4_PROC_COUNT 24
+
+/* the xdr functions */
+
+#if defined(__STDC__) || defined(__cplusplus)
+extern bool_t xdr_netobj (XDR *, netobj*);
+extern bool_t xdr_fsh_mode (XDR *, fsh_mode*);
+extern bool_t xdr_fsh_access (XDR *, fsh_access*);
+extern bool_t xdr_nlm4_stats (XDR *, nlm4_stats*);
+extern bool_t xdr_nlm4_stat (XDR *, nlm4_stat*);
+extern bool_t xdr_nlm4_holder (XDR *, nlm4_holder*);
+extern bool_t xdr_nlm4_lock (XDR *, nlm4_lock*);
+extern bool_t xdr_nlm4_share (XDR *, nlm4_share*);
+extern bool_t xdr_nlm4_testrply (XDR *, nlm4_testrply*);
+extern bool_t xdr_nlm4_testres (XDR *, nlm4_testres*);
+extern bool_t xdr_nlm4_testargs (XDR *, nlm4_testargs*);
+extern bool_t xdr_nlm4_res (XDR *, nlm4_res*);
+extern bool_t xdr_nlm4_lockargs (XDR *, nlm4_lockargs*);
+extern bool_t xdr_nlm4_cancargs (XDR *, nlm4_cancargs*);
+extern bool_t xdr_nlm4_unlockargs (XDR *, nlm4_unlockargs*);
+extern bool_t xdr_nlm4_shareargs (XDR *, nlm4_shareargs*);
+extern bool_t xdr_nlm4_shareres (XDR *, nlm4_shareres*);
+extern bool_t xdr_nlm4_freeallargs (XDR *, nlm4_freeallargs*);
+
+#else /* K&R C */
+extern bool_t xdr_netobj ();
+extern bool_t xdr_fsh_mode ();
+extern bool_t xdr_fsh_access ();
+extern bool_t xdr_nlm4_stats ();
+extern bool_t xdr_nlm4_stat ();
+extern bool_t xdr_nlm4_holder ();
+extern bool_t xdr_nlm4_lock ();
+extern bool_t xdr_nlm4_share ();
+extern bool_t xdr_nlm4_testrply ();
+extern bool_t xdr_nlm4_testres ();
+extern bool_t xdr_nlm4_testargs ();
+extern bool_t xdr_nlm4_res ();
+extern bool_t xdr_nlm4_lockargs ();
+extern bool_t xdr_nlm4_cancargs ();
+extern bool_t xdr_nlm4_unlockargs ();
+extern bool_t xdr_nlm4_shareargs ();
+extern bool_t xdr_nlm4_shareres ();
+extern bool_t xdr_nlm4_freeallargs ();
+
+#endif /* K&R C */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* !_NLM_H_RPCGEN */
diff --git a/rpc/xdr/src/nlm4.x b/rpc/xdr/src/nlm4.x
new file mode 100644
index 000000000..e22ac99f2
--- /dev/null
+++ b/rpc/xdr/src/nlm4.x
@@ -0,0 +1,154 @@
+/*
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/* .x file defined as according to the RFC */
+
+const MAXNETOBJ_SZ = 1024;
+const LM_MAXSTRLEN = 1024;
+const MAXNAMELEN = 1025;
+
+typedef opaque netobj<MAXNETOBJ_SZ>;
+
+#ifdef RPC_HDR
+%/*
+% * The following enums are actually bit encoded for efficient
+% * boolean algebra.... DON'T change them.....
+% */
+#endif
+enum fsh_mode {
+ fsm_DN = 0, /* deny none */
+ fsm_DR = 1, /* deny read */
+ fsm_DW = 2, /* deny write */
+ fsm_DRW = 3 /* deny read/write */
+};
+
+enum fsh_access {
+ fsa_NONE = 0, /* for completeness */
+ fsa_R = 1, /* read only */
+ fsa_W = 2, /* write only */
+ fsa_RW = 3 /* read/write */
+};
+
+#ifdef RPC_HDR
+%/* definitions for NLM version 4 */
+#endif
+enum nlm4_stats {
+ nlm4_granted = 0,
+ nlm4_denied = 1,
+ nlm4_denied_nolock = 2,
+ nlm4_blocked = 3,
+ nlm4_denied_grace_period = 4,
+ nlm4_deadlck = 5,
+ nlm4_rofs = 6,
+ nlm4_stale_fh = 7,
+ nlm4_fbig = 8,
+ nlm4_failed = 9
+};
+
+struct nlm4_stat {
+ nlm4_stats stat;
+};
+
+struct nlm4_holder {
+ bool exclusive;
+ u_int32_t svid;
+ netobj oh;
+ u_int64_t l_offset;
+ u_int64_t l_len;
+};
+
+struct nlm4_lock {
+ string caller_name<LM_MAXSTRLEN>;
+ netobj fh;
+ netobj oh;
+ u_int32_t svid;
+ u_int64_t l_offset;
+ u_int64_t l_len;
+};
+
+struct nlm4_share {
+ string caller_name<LM_MAXSTRLEN>;
+ netobj fh;
+ netobj oh;
+ fsh_mode mode;
+ fsh_access access;
+};
+
+union nlm4_testrply switch (nlm4_stats stat) {
+ case nlm_denied:
+ struct nlm4_holder holder;
+ default:
+ void;
+};
+
+struct nlm4_testres {
+ netobj cookie;
+ nlm4_testrply stat;
+};
+
+struct nlm4_testargs {
+ netobj cookie;
+ bool exclusive;
+ struct nlm4_lock alock;
+};
+
+struct nlm4_res {
+ netobj cookie;
+ nlm4_stat stat;
+};
+
+struct nlm4_lockargs {
+ netobj cookie;
+ bool block;
+ bool exclusive;
+ struct nlm4_lock alock;
+ bool reclaim; /* used for recovering locks */
+ int state; /* specify local status monitor state */
+};
+
+struct nlm4_cancargs {
+ netobj cookie;
+ bool block;
+ bool exclusive;
+ struct nlm4_lock alock;
+};
+
+struct nlm4_unlockargs {
+ netobj cookie;
+ struct nlm4_lock alock;
+};
+
+struct nlm4_shareargs {
+ netobj cookie;
+ nlm4_share share;
+ bool reclaim;
+};
+
+struct nlm4_shareres {
+ netobj cookie;
+ nlm4_stats stat;
+ int sequence;
+};
+
+struct nlm4_freeallargs {
+ string name<LM_MAXSTRLEN>; /* client hostname */
+ uint32 state; /* unused */
+};
+
+/*
+ * argument for the procedure called by rpc.statd when a monitored host
+ * status change.
+ * XXX assumes LM_MAXSTRLEN == SM_MAXSTRLEN
+ */
+struct nlm_sm_status {
+ string mon_name<LM_MAXSTRLEN>; /* name of host */
+ int state; /* new state */
+ opaque priv[16]; /* private data */
+};
diff --git a/rpc/xdr/src/nlmcbk-xdr.c b/rpc/xdr/src/nlmcbk-xdr.c
new file mode 100644
index 000000000..3d75acc55
--- /dev/null
+++ b/rpc/xdr/src/nlmcbk-xdr.c
@@ -0,0 +1,28 @@
+/*
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/*
+ * Please do not edit this file.
+ * It was generated using rpcgen.
+ */
+
+#include "nlmcbk-xdr.h"
+
+bool_t
+xdr_nlm_sm_status (XDR *xdrs, nlm_sm_status *objp)
+{
+ if (!xdr_string (xdrs, &objp->mon_name, LM_MAXSTRLEN))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->state))
+ return FALSE;
+ if (!xdr_opaque (xdrs, objp->priv, 16))
+ return FALSE;
+ return TRUE;
+}
diff --git a/rpc/xdr/src/nlmcbk-xdr.h b/rpc/xdr/src/nlmcbk-xdr.h
new file mode 100644
index 000000000..ad8421857
--- /dev/null
+++ b/rpc/xdr/src/nlmcbk-xdr.h
@@ -0,0 +1,65 @@
+/*
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/*
+ * Please do not edit this file.
+ * It was generated using rpcgen.
+ */
+
+#ifndef _NLMCBK_H_RPCGEN
+#define _NLMCBK_H_RPCGEN
+
+#include <rpc/rpc.h>
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define LM_MAXSTRLEN 1024
+
+struct nlm_sm_status {
+ char *mon_name;
+ int state;
+ char priv[16];
+};
+typedef struct nlm_sm_status nlm_sm_status;
+
+#define NLMCBK_PROGRAM 100021
+#define NLMCBK_V1 1
+
+#if defined(__STDC__) || defined(__cplusplus)
+#define NLMCBK_SM_NOTIFY 16
+extern void * nlmcbk_sm_notify_0(struct nlm_sm_status *, CLIENT *);
+extern void * nlmcbk_sm_notify_0_svc(struct nlm_sm_status *, struct svc_req *);
+extern int nlmcbk_program_0_freeresult (SVCXPRT *, xdrproc_t, caddr_t);
+
+#else /* K&R C */
+#define NLMCBK_SM_NOTIFY 16
+extern void * nlmcbk_sm_notify_0();
+extern void * nlmcbk_sm_notify_0_svc();
+extern int nlmcbk_program_0_freeresult ();
+#endif /* K&R C */
+
+/* the xdr functions */
+
+#if defined(__STDC__) || defined(__cplusplus)
+extern bool_t xdr_nlm_sm_status (XDR *, nlm_sm_status*);
+
+#else /* K&R C */
+extern bool_t xdr_nlm_sm_status ();
+
+#endif /* K&R C */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* !_NLMCBK_H_RPCGEN */
diff --git a/rpc/xdr/src/nlmcbk.x b/rpc/xdr/src/nlmcbk.x
new file mode 100644
index 000000000..1d3746c9a
--- /dev/null
+++ b/rpc/xdr/src/nlmcbk.x
@@ -0,0 +1,24 @@
+/*
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+const LM_MAXSTRLEN = 1024;
+
+struct nlm_sm_status {
+ string mon_name<LM_MAXSTRLEN>; /* name of host */
+ int state; /* new state */
+ opaque priv[16]; /* private data */
+};
+
+program NLMCBK_PROGRAM {
+ version NLMCBK_V0 {
+ void NLMCBK_SM_NOTIFY(struct nlm_sm_status) = 1;
+ } = 0;
+} = 1238477;
+
diff --git a/rpc/xdr/src/nsm-xdr.c b/rpc/xdr/src/nsm-xdr.c
new file mode 100644
index 000000000..58712737b
--- /dev/null
+++ b/rpc/xdr/src/nsm-xdr.c
@@ -0,0 +1,96 @@
+/*
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/*
+ * Please do not edit this file.
+ * It was generated using rpcgen.
+ */
+
+#include "nsm-xdr.h"
+
+bool_t
+xdr_sm_name (XDR *xdrs, sm_name *objp)
+{
+ if (!xdr_string (xdrs, &objp->mon_name, SM_MAXSTRLEN))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_res (XDR *xdrs, res *objp)
+{
+ if (!xdr_enum (xdrs, (enum_t *) objp))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_sm_stat_res (XDR *xdrs, sm_stat_res *objp)
+{
+ if (!xdr_res (xdrs, &objp->res_stat))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->state))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_sm_stat (XDR *xdrs, sm_stat *objp)
+{
+ if (!xdr_int (xdrs, &objp->state))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_my_id (XDR *xdrs, my_id *objp)
+{
+ if (!xdr_string (xdrs, &objp->my_name, SM_MAXSTRLEN))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->my_prog))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->my_vers))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->my_proc))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_mon_id (XDR *xdrs, mon_id *objp)
+{
+ if (!xdr_string (xdrs, &objp->mon_name, SM_MAXSTRLEN))
+ return FALSE;
+ if (!xdr_my_id (xdrs, &objp->my_id))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_mon (XDR *xdrs, mon *objp)
+{
+ if (!xdr_mon_id (xdrs, &objp->mon_id))
+ return FALSE;
+ if (!xdr_opaque (xdrs, objp->priv, 16))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_nsm_callback_status (XDR *xdrs, nsm_callback_status *objp)
+{
+ if (!xdr_string (xdrs, &objp->mon_name, SM_MAXSTRLEN))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->state))
+ return FALSE;
+ if (!xdr_opaque (xdrs, objp->priv, 16))
+ return FALSE;
+ return TRUE;
+}
diff --git a/rpc/xdr/src/nsm-xdr.h b/rpc/xdr/src/nsm-xdr.h
new file mode 100644
index 000000000..9de642c15
--- /dev/null
+++ b/rpc/xdr/src/nsm-xdr.h
@@ -0,0 +1,95 @@
+/*
+ * Please do not edit this file.
+ * It was generated using rpcgen.
+ */
+
+#ifndef _NSM_H_RPCGEN
+#define _NSM_H_RPCGEN
+
+#include <rpc/rpc.h>
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define SM_MAXSTRLEN 1024
+
+struct sm_name {
+ char *mon_name;
+};
+typedef struct sm_name sm_name;
+
+enum res {
+ STAT_SUCC = 0,
+ STAT_FAIL = 1,
+};
+typedef enum res res;
+
+struct sm_stat_res {
+ res res_stat;
+ int state;
+};
+typedef struct sm_stat_res sm_stat_res;
+
+struct sm_stat {
+ int state;
+};
+typedef struct sm_stat sm_stat;
+
+struct my_id {
+ char *my_name;
+ int my_prog;
+ int my_vers;
+ int my_proc;
+};
+typedef struct my_id my_id;
+
+struct mon_id {
+ char *mon_name;
+ struct my_id my_id;
+};
+typedef struct mon_id mon_id;
+
+struct mon {
+ struct mon_id mon_id;
+ char priv[16];
+};
+typedef struct mon mon;
+
+struct nsm_callback_status {
+ char *mon_name;
+ int state;
+ char priv[16];
+};
+typedef struct nsm_callback_status nsm_callback_status;
+
+/* the xdr functions */
+
+#if defined(__STDC__) || defined(__cplusplus)
+extern bool_t xdr_sm_name (XDR *, sm_name*);
+extern bool_t xdr_res (XDR *, res*);
+extern bool_t xdr_sm_stat_res (XDR *, sm_stat_res*);
+extern bool_t xdr_sm_stat (XDR *, sm_stat*);
+extern bool_t xdr_my_id (XDR *, my_id*);
+extern bool_t xdr_mon_id (XDR *, mon_id*);
+extern bool_t xdr_mon (XDR *, mon*);
+extern bool_t xdr_nsm_callback_status (XDR *, nsm_callback_status*);
+
+#else /* K&R C */
+extern bool_t xdr_sm_name ();
+extern bool_t xdr_res ();
+extern bool_t xdr_sm_stat_res ();
+extern bool_t xdr_sm_stat ();
+extern bool_t xdr_my_id ();
+extern bool_t xdr_mon_id ();
+extern bool_t xdr_mon ();
+extern bool_t xdr_nsm_callback_status ();
+
+#endif /* K&R C */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* !_NSM_H_RPCGEN */
diff --git a/rpc/xdr/src/nsm.x b/rpc/xdr/src/nsm.x
new file mode 100644
index 000000000..8f97b1aaa
--- /dev/null
+++ b/rpc/xdr/src/nsm.x
@@ -0,0 +1,47 @@
+/*
+ * This defines the maximum length of the string
+ * identifying the caller.
+ */
+const SM_MAXSTRLEN = 1024;
+
+struct sm_name {
+ string mon_name<SM_MAXSTRLEN>;
+};
+
+enum res {
+ STAT_SUCC = 0, /* NSM agrees to monitor. */
+ STAT_FAIL = 1 /* NSM cannot monitor. */
+};
+
+struct sm_stat_res {
+ res res_stat;
+ int state;
+};
+
+struct sm_stat {
+ int state; /* state number of NSM */
+};
+
+struct my_id {
+ string my_name<SM_MAXSTRLEN>; /* hostname */
+ int my_prog; /* RPC program number */
+ int my_vers; /* program version number */
+ int my_proc; /* procedure number */
+};
+
+struct mon_id {
+ string mon_name<SM_MAXSTRLEN>; /* name of the host to be monitored */
+ struct my_id my_id;
+};
+
+struct mon {
+ struct mon_id mon_id;
+ opaque priv[16]; /* private information */
+};
+
+struct nsm_callback_status {
+ string mon_name<SM_MAXSTRLEN>;
+ int state;
+ opaque priv[16]; /* for private information */
+};
+
diff --git a/rpc/xdr/src/portmap-xdr.c b/rpc/xdr/src/portmap-xdr.c
new file mode 100644
index 000000000..4766122e6
--- /dev/null
+++ b/rpc/xdr/src/portmap-xdr.c
@@ -0,0 +1,237 @@
+/*
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xdr-common.h"
+#include "compat.h"
+
+#if defined(__GNUC__)
+#if __GNUC__ >= 4
+#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
+#endif
+#endif
+
+/*
+ * Please do not edit this file.
+ * It was generated using rpcgen.
+ */
+
+#include "portmap-xdr.h"
+
+bool_t
+xdr_pmap_port_by_brick_req (XDR *xdrs, pmap_port_by_brick_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_string (xdrs, &objp->brick, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_pmap_port_by_brick_rsp (XDR *xdrs, pmap_port_by_brick_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+
+ if (xdrs->x_op == XDR_ENCODE) {
+ buf = XDR_INLINE (xdrs, 4 * BYTES_PER_XDR_UNIT);
+ if (buf == NULL) {
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->status))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->port))
+ return FALSE;
+ } else {
+ IXDR_PUT_LONG(buf, objp->op_ret);
+ IXDR_PUT_LONG(buf, objp->op_errno);
+ IXDR_PUT_LONG(buf, objp->status);
+ IXDR_PUT_LONG(buf, objp->port);
+ }
+ return TRUE;
+ } else if (xdrs->x_op == XDR_DECODE) {
+ buf = XDR_INLINE (xdrs, 4 * BYTES_PER_XDR_UNIT);
+ if (buf == NULL) {
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->status))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->port))
+ return FALSE;
+ } else {
+ objp->op_ret = IXDR_GET_LONG(buf);
+ objp->op_errno = IXDR_GET_LONG(buf);
+ objp->status = IXDR_GET_LONG(buf);
+ objp->port = IXDR_GET_LONG(buf);
+ }
+ return TRUE;
+ }
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->status))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->port))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_pmap_brick_by_port_req (XDR *xdrs, pmap_brick_by_port_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->port))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_pmap_brick_by_port_rsp (XDR *xdrs, pmap_brick_by_port_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+
+ if (xdrs->x_op == XDR_ENCODE) {
+ buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
+ if (buf == NULL) {
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->status))
+ return FALSE;
+
+ } else {
+ IXDR_PUT_LONG(buf, objp->op_ret);
+ IXDR_PUT_LONG(buf, objp->op_errno);
+ IXDR_PUT_LONG(buf, objp->status);
+ }
+ if (!xdr_string (xdrs, &objp->brick, ~0))
+ return FALSE;
+ return TRUE;
+ } else if (xdrs->x_op == XDR_DECODE) {
+ buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
+ if (buf == NULL) {
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->status))
+ return FALSE;
+
+ } else {
+ objp->op_ret = IXDR_GET_LONG(buf);
+ objp->op_errno = IXDR_GET_LONG(buf);
+ objp->status = IXDR_GET_LONG(buf);
+ }
+ if (!xdr_string (xdrs, &objp->brick, ~0))
+ return FALSE;
+ return TRUE;
+ }
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->status))
+ return FALSE;
+ if (!xdr_string (xdrs, &objp->brick, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_pmap_signup_req (XDR *xdrs, pmap_signup_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_string (xdrs, &objp->brick, ~0))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->port))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_pmap_signup_rsp (XDR *xdrs, pmap_signup_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_pmap_signin_req (XDR *xdrs, pmap_signin_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_string (xdrs, &objp->brick, ~0))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->port))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_pmap_signin_rsp (XDR *xdrs, pmap_signin_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_pmap_signout_req (XDR *xdrs, pmap_signout_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_string (xdrs, &objp->brick, ~0))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->port))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_pmap_signout_rsp (XDR *xdrs, pmap_signout_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ return TRUE;
+}
diff --git a/rpc/xdr/src/portmap-xdr.h b/rpc/xdr/src/portmap-xdr.h
new file mode 100644
index 000000000..8e4ff4f45
--- /dev/null
+++ b/rpc/xdr/src/portmap-xdr.h
@@ -0,0 +1,130 @@
+/*
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xdr-common.h"
+#include "compat.h"
+
+#if defined(__GNUC__)
+#if __GNUC__ >= 4
+#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
+#endif
+#endif
+
+/*
+ * Please do not edit this file.
+ * It was generated using rpcgen.
+ */
+
+#ifndef _PORTMAP_XDR_H_RPCGEN
+#define _PORTMAP_XDR_H_RPCGEN
+
+#include <rpc/rpc.h>
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+struct pmap_port_by_brick_req {
+ char *brick;
+};
+typedef struct pmap_port_by_brick_req pmap_port_by_brick_req;
+
+struct pmap_port_by_brick_rsp {
+ int op_ret;
+ int op_errno;
+ int status;
+ int port;
+};
+typedef struct pmap_port_by_brick_rsp pmap_port_by_brick_rsp;
+
+struct pmap_brick_by_port_req {
+ int port;
+};
+typedef struct pmap_brick_by_port_req pmap_brick_by_port_req;
+
+struct pmap_brick_by_port_rsp {
+ int op_ret;
+ int op_errno;
+ int status;
+ char *brick;
+};
+typedef struct pmap_brick_by_port_rsp pmap_brick_by_port_rsp;
+
+struct pmap_signup_req {
+ char *brick;
+ int port;
+};
+typedef struct pmap_signup_req pmap_signup_req;
+
+struct pmap_signup_rsp {
+ int op_ret;
+ int op_errno;
+};
+typedef struct pmap_signup_rsp pmap_signup_rsp;
+
+struct pmap_signin_req {
+ char *brick;
+ int port;
+};
+typedef struct pmap_signin_req pmap_signin_req;
+
+struct pmap_signin_rsp {
+ int op_ret;
+ int op_errno;
+};
+typedef struct pmap_signin_rsp pmap_signin_rsp;
+
+struct pmap_signout_req {
+ char *brick;
+ int port;
+};
+typedef struct pmap_signout_req pmap_signout_req;
+
+struct pmap_signout_rsp {
+ int op_ret;
+ int op_errno;
+};
+typedef struct pmap_signout_rsp pmap_signout_rsp;
+
+/* the xdr functions */
+
+#if defined(__STDC__) || defined(__cplusplus)
+extern bool_t xdr_pmap_port_by_brick_req (XDR *, pmap_port_by_brick_req*);
+extern bool_t xdr_pmap_port_by_brick_rsp (XDR *, pmap_port_by_brick_rsp*);
+extern bool_t xdr_pmap_brick_by_port_req (XDR *, pmap_brick_by_port_req*);
+extern bool_t xdr_pmap_brick_by_port_rsp (XDR *, pmap_brick_by_port_rsp*);
+extern bool_t xdr_pmap_signup_req (XDR *, pmap_signup_req*);
+extern bool_t xdr_pmap_signup_rsp (XDR *, pmap_signup_rsp*);
+extern bool_t xdr_pmap_signin_req (XDR *, pmap_signin_req*);
+extern bool_t xdr_pmap_signin_rsp (XDR *, pmap_signin_rsp*);
+extern bool_t xdr_pmap_signout_req (XDR *, pmap_signout_req*);
+extern bool_t xdr_pmap_signout_rsp (XDR *, pmap_signout_rsp*);
+
+#else /* K&R C */
+extern bool_t xdr_pmap_port_by_brick_req ();
+extern bool_t xdr_pmap_port_by_brick_rsp ();
+extern bool_t xdr_pmap_brick_by_port_req ();
+extern bool_t xdr_pmap_brick_by_port_rsp ();
+extern bool_t xdr_pmap_signup_req ();
+extern bool_t xdr_pmap_signup_rsp ();
+extern bool_t xdr_pmap_signin_req ();
+extern bool_t xdr_pmap_signin_rsp ();
+extern bool_t xdr_pmap_signout_req ();
+extern bool_t xdr_pmap_signout_rsp ();
+
+#endif /* K&R C */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* !_PORTMAP_XDR_H_RPCGEN */
diff --git a/rpc/xdr/src/portmap-xdr.x b/rpc/xdr/src/portmap-xdr.x
new file mode 100644
index 000000000..f60dcc76c
--- /dev/null
+++ b/rpc/xdr/src/portmap-xdr.x
@@ -0,0 +1,55 @@
+
+struct pmap_port_by_brick_req {
+ string brick<>;
+};
+
+struct pmap_port_by_brick_rsp {
+ int op_ret;
+ int op_errno;
+ int status;
+ int port;
+};
+
+
+struct pmap_brick_by_port_req {
+ int port;
+};
+
+struct pmap_brick_by_port_rsp {
+ int op_ret;
+ int op_errno;
+ int status;
+ string brick<>;
+};
+
+
+struct pmap_signup_req {
+ string brick<>;
+ int port;
+};
+
+struct pmap_signup_rsp {
+ int op_ret;
+ int op_errno;
+};
+
+
+struct pmap_signin_req {
+ string brick<>;
+ int port;
+};
+
+struct pmap_signin_rsp {
+ int op_ret;
+ int op_errno;
+};
+
+struct pmap_signout_req {
+ string brick<>;
+ int port;
+};
+
+struct pmap_signout_rsp {
+ int op_ret;
+ int op_errno;
+};
diff --git a/rpc/xdr/src/rpc-common-xdr.c b/rpc/xdr/src/rpc-common-xdr.c
new file mode 100644
index 000000000..6cb48a923
--- /dev/null
+++ b/rpc/xdr/src/rpc-common-xdr.c
@@ -0,0 +1,223 @@
+/*
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xdr-common.h"
+#include "compat.h"
+
+#if defined(__GNUC__)
+#if __GNUC__ >= 4
+#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
+#endif
+#endif
+
+/*
+ * Please do not edit this file.
+ * It was generated using rpcgen.
+ */
+
+#include "rpc-common-xdr.h"
+
+bool_t
+xdr_auth_glusterfs_parms_v2 (XDR *xdrs, auth_glusterfs_parms_v2 *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+
+ if (xdrs->x_op == XDR_ENCODE) {
+ buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
+ if (buf == NULL) {
+ if (!xdr_int (xdrs, &objp->pid))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->uid))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->gid))
+ return FALSE;
+
+ } else {
+ IXDR_PUT_LONG(buf, objp->pid);
+ IXDR_PUT_U_LONG(buf, objp->uid);
+ IXDR_PUT_U_LONG(buf, objp->gid);
+ }
+ if (!xdr_array (xdrs, (char **)&objp->groups.groups_val, (u_int *) &objp->groups.groups_len, ~0,
+ sizeof (u_int), (xdrproc_t) xdr_u_int))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->lk_owner.lk_owner_val, (u_int *) &objp->lk_owner.lk_owner_len, ~0))
+ return FALSE;
+ return TRUE;
+ } else if (xdrs->x_op == XDR_DECODE) {
+ buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
+ if (buf == NULL) {
+ if (!xdr_int (xdrs, &objp->pid))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->uid))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->gid))
+ return FALSE;
+
+ } else {
+ objp->pid = IXDR_GET_LONG(buf);
+ objp->uid = IXDR_GET_U_LONG(buf);
+ objp->gid = IXDR_GET_U_LONG(buf);
+ }
+ if (!xdr_array (xdrs, (char **)&objp->groups.groups_val, (u_int *) &objp->groups.groups_len, ~0,
+ sizeof (u_int), (xdrproc_t) xdr_u_int))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->lk_owner.lk_owner_val, (u_int *) &objp->lk_owner.lk_owner_len, ~0))
+ return FALSE;
+ return TRUE;
+ }
+
+ if (!xdr_int (xdrs, &objp->pid))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->uid))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->gid))
+ return FALSE;
+ if (!xdr_array (xdrs, (char **)&objp->groups.groups_val, (u_int *) &objp->groups.groups_len, ~0,
+ sizeof (u_int), (xdrproc_t) xdr_u_int))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->lk_owner.lk_owner_val, (u_int *) &objp->lk_owner.lk_owner_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_auth_glusterfs_parms (XDR *xdrs, auth_glusterfs_parms *objp)
+{
+ register int32_t *buf;
+ int i;
+ buf = NULL;
+
+
+ if (xdrs->x_op == XDR_ENCODE) {
+ if (!xdr_u_quad_t (xdrs, &objp->lk_owner))
+ return FALSE;
+ buf = XDR_INLINE (xdrs, (4 + 16 )* BYTES_PER_XDR_UNIT);
+ if (buf == NULL) {
+ if (!xdr_u_int (xdrs, &objp->pid))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->uid))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->gid))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->ngrps))
+ return FALSE;
+ if (!xdr_vector (xdrs, (char *)objp->groups, 16,
+ sizeof (u_int), (xdrproc_t) xdr_u_int))
+ return FALSE;
+ } else {
+ IXDR_PUT_U_LONG(buf, objp->pid);
+ IXDR_PUT_U_LONG(buf, objp->uid);
+ IXDR_PUT_U_LONG(buf, objp->gid);
+ IXDR_PUT_U_LONG(buf, objp->ngrps);
+ {
+ register u_int *genp;
+
+ for (i = 0, genp = objp->groups;
+ i < 16; ++i) {
+ IXDR_PUT_U_LONG(buf, *genp++);
+ }
+ }
+ }
+ return TRUE;
+ } else if (xdrs->x_op == XDR_DECODE) {
+ if (!xdr_u_quad_t (xdrs, &objp->lk_owner))
+ return FALSE;
+ buf = XDR_INLINE (xdrs, (4 + 16 )* BYTES_PER_XDR_UNIT);
+ if (buf == NULL) {
+ if (!xdr_u_int (xdrs, &objp->pid))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->uid))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->gid))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->ngrps))
+ return FALSE;
+ if (!xdr_vector (xdrs, (char *)objp->groups, 16,
+ sizeof (u_int), (xdrproc_t) xdr_u_int))
+ return FALSE;
+ } else {
+ objp->pid = IXDR_GET_U_LONG(buf);
+ objp->uid = IXDR_GET_U_LONG(buf);
+ objp->gid = IXDR_GET_U_LONG(buf);
+ objp->ngrps = IXDR_GET_U_LONG(buf);
+ {
+ register u_int *genp;
+
+ for (i = 0, genp = objp->groups;
+ i < 16; ++i) {
+ *genp++ = IXDR_GET_U_LONG(buf);
+ }
+ }
+ }
+ return TRUE;
+ }
+
+ if (!xdr_u_quad_t (xdrs, &objp->lk_owner))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->pid))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->uid))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->gid))
+ return FALSE;
+ if (!xdr_u_int (xdrs, &objp->ngrps))
+ return FALSE;
+ if (!xdr_vector (xdrs, (char *)objp->groups, 16,
+ sizeof (u_int), (xdrproc_t) xdr_u_int))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gf_dump_req (XDR *xdrs, gf_dump_req *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gf_prog_detail (XDR *xdrs, gf_prog_detail *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_string (xdrs, &objp->progname, ~0))
+ return FALSE;
+ if (!xdr_u_quad_t (xdrs, &objp->prognum))
+ return FALSE;
+ if (!xdr_u_quad_t (xdrs, &objp->progver))
+ return FALSE;
+ if (!xdr_pointer (xdrs, (char **)&objp->next, sizeof (gf_prog_detail), (xdrproc_t) xdr_gf_prog_detail))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gf_dump_rsp (XDR *xdrs, gf_dump_rsp *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_ret))
+ return FALSE;
+ if (!xdr_int (xdrs, &objp->op_errno))
+ return FALSE;
+ if (!xdr_pointer (xdrs, (char **)&objp->prog, sizeof (gf_prog_detail), (xdrproc_t) xdr_gf_prog_detail))
+ return FALSE;
+ return TRUE;
+}
diff --git a/rpc/xdr/src/rpc-common-xdr.h b/rpc/xdr/src/rpc-common-xdr.h
new file mode 100644
index 000000000..c43eab315
--- /dev/null
+++ b/rpc/xdr/src/rpc-common-xdr.h
@@ -0,0 +1,104 @@
+/*
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xdr-common.h"
+#include "compat.h"
+
+#if defined(__GNUC__)
+#if __GNUC__ >= 4
+#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
+#endif
+#endif
+
+/*
+ * Please do not edit this file.
+ * It was generated using rpcgen.
+ */
+
+#ifndef _RPC_COMMON_XDR_H_RPCGEN
+#define _RPC_COMMON_XDR_H_RPCGEN
+
+#include <rpc/rpc.h>
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+struct auth_glusterfs_parms_v2 {
+ int pid;
+ u_int uid;
+ u_int gid;
+ struct {
+ u_int groups_len;
+ u_int *groups_val;
+ } groups;
+ struct {
+ u_int lk_owner_len;
+ char *lk_owner_val;
+ } lk_owner;
+};
+typedef struct auth_glusterfs_parms_v2 auth_glusterfs_parms_v2;
+
+struct auth_glusterfs_parms {
+ u_quad_t lk_owner;
+ u_int pid;
+ u_int uid;
+ u_int gid;
+ u_int ngrps;
+ u_int groups[16];
+};
+typedef struct auth_glusterfs_parms auth_glusterfs_parms;
+
+struct gf_dump_req {
+ u_quad_t gfs_id;
+};
+typedef struct gf_dump_req gf_dump_req;
+
+struct gf_prog_detail {
+ char *progname;
+ u_quad_t prognum;
+ u_quad_t progver;
+ struct gf_prog_detail *next;
+};
+typedef struct gf_prog_detail gf_prog_detail;
+
+struct gf_dump_rsp {
+ u_quad_t gfs_id;
+ int op_ret;
+ int op_errno;
+ struct gf_prog_detail *prog;
+};
+typedef struct gf_dump_rsp gf_dump_rsp;
+
+/* the xdr functions */
+
+#if defined(__STDC__) || defined(__cplusplus)
+extern bool_t xdr_auth_glusterfs_parms_v2 (XDR *, auth_glusterfs_parms_v2*);
+extern bool_t xdr_auth_glusterfs_parms (XDR *, auth_glusterfs_parms*);
+extern bool_t xdr_gf_dump_req (XDR *, gf_dump_req*);
+extern bool_t xdr_gf_prog_detail (XDR *, gf_prog_detail*);
+extern bool_t xdr_gf_dump_rsp (XDR *, gf_dump_rsp*);
+
+#else /* K&R C */
+extern bool_t xdr_auth_glusterfs_parms_v2 ();
+extern bool_t xdr_auth_glusterfs_parms ();
+extern bool_t xdr_gf_dump_req ();
+extern bool_t xdr_gf_prog_detail ();
+extern bool_t xdr_gf_dump_rsp ();
+
+#endif /* K&R C */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* !_RPC_COMMON_XDR_H_RPCGEN */
diff --git a/rpc/xdr/src/rpc-common-xdr.x b/rpc/xdr/src/rpc-common-xdr.x
new file mode 100644
index 000000000..ca28f38b5
--- /dev/null
+++ b/rpc/xdr/src/rpc-common-xdr.x
@@ -0,0 +1,39 @@
+/* This file has definition of few XDR structures which are
+ * not captured in any section specific file */
+
+struct auth_glusterfs_parms_v2 {
+ int pid;
+ unsigned int uid;
+ unsigned int gid;
+ unsigned int groups<>;
+ opaque lk_owner<>;
+};
+
+struct auth_glusterfs_parms {
+ unsigned hyper lk_owner;
+ unsigned int pid;
+ unsigned int uid;
+ unsigned int gid;
+ unsigned int ngrps;
+ unsigned groups[16];
+};
+
+struct gf_dump_req {
+ unsigned hyper gfs_id;
+};
+
+
+struct gf_prog_detail {
+ string progname<>;
+ unsigned hyper prognum;
+ unsigned hyper progver;
+ struct gf_prog_detail *next;
+};
+
+
+struct gf_dump_rsp {
+ unsigned hyper gfs_id;
+ int op_ret;
+ int op_errno;
+ struct gf_prog_detail *prog;
+};
diff --git a/rpc/xdr/src/xdr-generic.c b/rpc/xdr/src/xdr-generic.c
new file mode 100644
index 000000000..58d1ee77e
--- /dev/null
+++ b/rpc/xdr/src/xdr-generic.c
@@ -0,0 +1,125 @@
+/*
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#include "xdr-generic.h"
+
+
+ssize_t
+xdr_serialize_generic (struct iovec outmsg, void *res, xdrproc_t proc)
+{
+ ssize_t ret = -1;
+ XDR xdr;
+
+ if ((!outmsg.iov_base) || (!res) || (!proc))
+ return -1;
+
+ xdrmem_create (&xdr, outmsg.iov_base, (unsigned int)outmsg.iov_len,
+ XDR_ENCODE);
+
+ if (!proc (&xdr, res)) {
+ ret = -1;
+ goto ret;
+ }
+
+ ret = xdr_encoded_length (xdr);
+
+ret:
+ return ret;
+}
+
+
+ssize_t
+xdr_to_generic (struct iovec inmsg, void *args, xdrproc_t proc)
+{
+ XDR xdr;
+ ssize_t ret = -1;
+
+ if ((!inmsg.iov_base) || (!args) || (!proc))
+ return -1;
+
+ xdrmem_create (&xdr, inmsg.iov_base, (unsigned int)inmsg.iov_len,
+ XDR_DECODE);
+
+ if (!proc (&xdr, args)) {
+ ret = -1;
+ goto ret;
+ }
+
+ ret = xdr_decoded_length (xdr);
+ret:
+ return ret;
+}
+
+
+ssize_t
+xdr_to_generic_payload (struct iovec inmsg, void *args, xdrproc_t proc,
+ struct iovec *pendingpayload)
+{
+ XDR xdr;
+ ssize_t ret = -1;
+
+ if ((!inmsg.iov_base) || (!args) || (!proc))
+ return -1;
+
+ xdrmem_create (&xdr, inmsg.iov_base, (unsigned int)inmsg.iov_len,
+ XDR_DECODE);
+
+ if (!proc (&xdr, args)) {
+ ret = -1;
+ goto ret;
+ }
+
+ ret = xdr_decoded_length (xdr);
+
+ if (pendingpayload) {
+ pendingpayload->iov_base = xdr_decoded_remaining_addr (xdr);
+ pendingpayload->iov_len = xdr_decoded_remaining_len (xdr);
+ }
+
+ret:
+ return ret;
+}
+
+ssize_t
+xdr_length_round_up (size_t len, size_t bufsize)
+{
+ int roundup = 0;
+
+ roundup = len % XDR_BYTES_PER_UNIT;
+ if (roundup > 0)
+ roundup = XDR_BYTES_PER_UNIT - roundup;
+
+ if ((roundup > 0) && ((roundup + len) <= bufsize))
+ len += roundup;
+
+ return len;
+}
+
+int
+xdr_bytes_round_up (struct iovec *vec, size_t bufsize)
+{
+ vec->iov_len = xdr_length_round_up (vec->iov_len, bufsize);
+ return 0;
+}
+
+
+void
+xdr_vector_round_up (struct iovec *vec, int vcount, uint32_t count)
+{
+ uint32_t round_count = 0;
+
+ round_count = xdr_length_round_up (count, 1048576);
+ round_count -= count;
+ if (round_count == 0 || vcount <= 0)
+ return;
+
+ vec[vcount-1].iov_len += round_count;
+}
diff --git a/rpc/xdr/src/xdr-generic.h b/rpc/xdr/src/xdr-generic.h
new file mode 100644
index 000000000..bb3759bbe
--- /dev/null
+++ b/rpc/xdr/src/xdr-generic.h
@@ -0,0 +1,48 @@
+/*
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _XDR_GENERIC_H
+#define _XDR_GENERIC_H
+
+#include <sys/uio.h>
+//#include <rpc/rpc.h>
+#include <rpc/types.h>
+#include <rpc/xdr.h>
+
+#include "compat.h"
+
+#define xdr_decoded_remaining_addr(xdr) ((&xdr)->x_private)
+#define xdr_decoded_remaining_len(xdr) ((&xdr)->x_handy)
+#define xdr_encoded_length(xdr) (((size_t)(&xdr)->x_private) - ((size_t)(&xdr)->x_base))
+#define xdr_decoded_length(xdr) (((size_t)(&xdr)->x_private) - ((size_t)(&xdr)->x_base))
+
+#define XDR_BYTES_PER_UNIT 4
+
+ssize_t
+xdr_serialize_generic (struct iovec outmsg, void *res, xdrproc_t proc);
+
+ssize_t
+xdr_to_generic (struct iovec inmsg, void *args, xdrproc_t proc);
+
+ssize_t
+xdr_to_generic_payload (struct iovec inmsg, void *args, xdrproc_t proc,
+ struct iovec *pendingpayload);
+
+
+extern int
+xdr_bytes_round_up (struct iovec *vec, size_t bufsize);
+
+extern ssize_t
+xdr_length_round_up (size_t len, size_t bufsize);
+
+void
+xdr_vector_round_up (struct iovec *vec, int vcount, uint32_t count);
+
+#endif /* !_XDR_GENERIC_H */
diff --git a/rpc/xdr/src/xdr-nfs3.c b/rpc/xdr/src/xdr-nfs3.c
new file mode 100644
index 000000000..a497e9f54
--- /dev/null
+++ b/rpc/xdr/src/xdr-nfs3.c
@@ -0,0 +1,1888 @@
+/*
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xdr-nfs3.h"
+#include "mem-pool.h"
+#include "xdr-common.h"
+
+#if GF_DARWIN_HOST_OS
+#define xdr_u_quad_t xdr_u_int64_t
+#define xdr_quad_t xdr_int64_t
+#define xdr_uint32_t xdr_u_int32_t
+#define xdr_uint64_t xdr_u_int64_t
+#endif
+
+bool_t
+xdr_uint64 (XDR *xdrs, uint64 *objp)
+{
+ if (!xdr_uint64_t (xdrs, objp))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_int64 (XDR *xdrs, int64 *objp)
+{
+ if (!xdr_int64_t (xdrs, objp))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_uint32 (XDR *xdrs, uint32 *objp)
+{
+ if (!xdr_uint32_t (xdrs, objp))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_int32 (XDR *xdrs, int32 *objp)
+{
+ if (!xdr_int32_t (xdrs, objp))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_filename3 (XDR *xdrs, filename3 *objp)
+{
+ if (!xdr_string (xdrs, objp, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_nfspath3 (XDR *xdrs, nfspath3 *objp)
+{
+ if (!xdr_string (xdrs, objp, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_fileid3 (XDR *xdrs, fileid3 *objp)
+{
+ if (!xdr_uint64 (xdrs, objp))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_cookie3 (XDR *xdrs, cookie3 *objp)
+{
+ if (!xdr_uint64 (xdrs, objp))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_cookieverf3 (XDR *xdrs, cookieverf3 objp)
+{
+ if (!xdr_opaque (xdrs, objp, NFS3_COOKIEVERFSIZE))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_createverf3 (XDR *xdrs, createverf3 objp)
+{
+ if (!xdr_opaque (xdrs, objp, NFS3_CREATEVERFSIZE))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_writeverf3 (XDR *xdrs, writeverf3 objp)
+{
+ if (!xdr_opaque (xdrs, objp, NFS3_WRITEVERFSIZE))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_uid3 (XDR *xdrs, uid3 *objp)
+{
+ if (!xdr_uint32 (xdrs, objp))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_gid3 (XDR *xdrs, gid3 *objp)
+{
+ if (!xdr_uint32 (xdrs, objp))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_size3 (XDR *xdrs, size3 *objp)
+{
+ if (!xdr_uint64 (xdrs, objp))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_offset3 (XDR *xdrs, offset3 *objp)
+{
+ if (!xdr_uint64 (xdrs, objp))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_mode3 (XDR *xdrs, mode3 *objp)
+{
+ if (!xdr_uint32 (xdrs, objp))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_count3 (XDR *xdrs, count3 *objp)
+{
+ if (!xdr_uint32 (xdrs, objp))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_nfsstat3 (XDR *xdrs, nfsstat3 *objp)
+{
+ if (!xdr_enum (xdrs, (enum_t *) objp))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_ftype3 (XDR *xdrs, ftype3 *objp)
+{
+ if (!xdr_enum (xdrs, (enum_t *) objp))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_specdata3 (XDR *xdrs, specdata3 *objp)
+{
+ if (!xdr_uint32 (xdrs, &objp->specdata1))
+ return FALSE;
+ if (!xdr_uint32 (xdrs, &objp->specdata2))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_nfs_fh3 (XDR *xdrs, nfs_fh3 *objp)
+{
+ if (!xdr_bytes (xdrs, (char **)&objp->data.data_val, (u_int *) &objp->data.data_len, NFS3_FHSIZE))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_nfstime3 (XDR *xdrs, nfstime3 *objp)
+{
+ if (!xdr_uint32 (xdrs, &objp->seconds))
+ return FALSE;
+ if (!xdr_uint32 (xdrs, &objp->nseconds))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_fattr3 (XDR *xdrs, fattr3 *objp)
+{
+ if (!xdr_ftype3 (xdrs, &objp->type))
+ return FALSE;
+ if (!xdr_mode3 (xdrs, &objp->mode))
+ return FALSE;
+ if (!xdr_uint32 (xdrs, &objp->nlink))
+ return FALSE;
+ if (!xdr_uid3 (xdrs, &objp->uid))
+ return FALSE;
+ if (!xdr_gid3 (xdrs, &objp->gid))
+ return FALSE;
+ if (!xdr_size3 (xdrs, &objp->size))
+ return FALSE;
+ if (!xdr_size3 (xdrs, &objp->used))
+ return FALSE;
+ if (!xdr_specdata3 (xdrs, &objp->rdev))
+ return FALSE;
+ if (!xdr_uint64 (xdrs, &objp->fsid))
+ return FALSE;
+ if (!xdr_fileid3 (xdrs, &objp->fileid))
+ return FALSE;
+ if (!xdr_nfstime3 (xdrs, &objp->atime))
+ return FALSE;
+ if (!xdr_nfstime3 (xdrs, &objp->mtime))
+ return FALSE;
+ if (!xdr_nfstime3 (xdrs, &objp->ctime))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_post_op_attr (XDR *xdrs, post_op_attr *objp)
+{
+ if (!xdr_bool (xdrs, &objp->attributes_follow))
+ return FALSE;
+ switch (objp->attributes_follow) {
+ case TRUE:
+ if (!xdr_fattr3 (xdrs, &objp->post_op_attr_u.attributes))
+ return FALSE;
+ break;
+ case FALSE:
+ break;
+ default:
+ return FALSE;
+ }
+ return TRUE;
+}
+
+bool_t
+xdr_wcc_attr (XDR *xdrs, wcc_attr *objp)
+{
+ if (!xdr_size3 (xdrs, &objp->size))
+ return FALSE;
+ if (!xdr_nfstime3 (xdrs, &objp->mtime))
+ return FALSE;
+ if (!xdr_nfstime3 (xdrs, &objp->ctime))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_pre_op_attr (XDR *xdrs, pre_op_attr *objp)
+{
+ if (!xdr_bool (xdrs, &objp->attributes_follow))
+ return FALSE;
+ switch (objp->attributes_follow) {
+ case TRUE:
+ if (!xdr_wcc_attr (xdrs, &objp->pre_op_attr_u.attributes))
+ return FALSE;
+ break;
+ case FALSE:
+ break;
+ default:
+ return FALSE;
+ }
+ return TRUE;
+}
+
+bool_t
+xdr_wcc_data (XDR *xdrs, wcc_data *objp)
+{
+ if (!xdr_pre_op_attr (xdrs, &objp->before))
+ return FALSE;
+ if (!xdr_post_op_attr (xdrs, &objp->after))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_post_op_fh3 (XDR *xdrs, post_op_fh3 *objp)
+{
+ if (!xdr_bool (xdrs, &objp->handle_follows))
+ return FALSE;
+ switch (objp->handle_follows) {
+ case TRUE:
+ if (!xdr_nfs_fh3 (xdrs, &objp->post_op_fh3_u.handle))
+ return FALSE;
+ break;
+ case FALSE:
+ break;
+ default:
+ return FALSE;
+ }
+ return TRUE;
+}
+
+bool_t
+xdr_time_how (XDR *xdrs, time_how *objp)
+{
+ if (!xdr_enum (xdrs, (enum_t *) objp))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_set_mode3 (XDR *xdrs, set_mode3 *objp)
+{
+ if (!xdr_bool (xdrs, &objp->set_it))
+ return FALSE;
+ switch (objp->set_it) {
+ case TRUE:
+ if (!xdr_mode3 (xdrs, &objp->set_mode3_u.mode))
+ return FALSE;
+ break;
+ default:
+ break;
+ }
+ return TRUE;
+}
+
+bool_t
+xdr_set_uid3 (XDR *xdrs, set_uid3 *objp)
+{
+ if (!xdr_bool (xdrs, &objp->set_it))
+ return FALSE;
+ switch (objp->set_it) {
+ case TRUE:
+ if (!xdr_uid3 (xdrs, &objp->set_uid3_u.uid))
+ return FALSE;
+ break;
+ default:
+ break;
+ }
+ return TRUE;
+}
+
+bool_t
+xdr_set_gid3 (XDR *xdrs, set_gid3 *objp)
+{
+ if (!xdr_bool (xdrs, &objp->set_it))
+ return FALSE;
+ switch (objp->set_it) {
+ case TRUE:
+ if (!xdr_gid3 (xdrs, &objp->set_gid3_u.gid))
+ return FALSE;
+ break;
+ default:
+ break;
+ }
+ return TRUE;
+}
+
+bool_t
+xdr_set_size3 (XDR *xdrs, set_size3 *objp)
+{
+ if (!xdr_bool (xdrs, &objp->set_it))
+ return FALSE;
+ switch (objp->set_it) {
+ case TRUE:
+ if (!xdr_size3 (xdrs, &objp->set_size3_u.size))
+ return FALSE;
+ break;
+ default:
+ break;
+ }
+ return TRUE;
+}
+
+bool_t
+xdr_set_atime (XDR *xdrs, set_atime *objp)
+{
+ if (!xdr_time_how (xdrs, &objp->set_it))
+ return FALSE;
+ switch (objp->set_it) {
+ case SET_TO_CLIENT_TIME:
+ if (!xdr_nfstime3 (xdrs, &objp->set_atime_u.atime))
+ return FALSE;
+ break;
+ default:
+ break;
+ }
+ return TRUE;
+}
+
+bool_t
+xdr_set_mtime (XDR *xdrs, set_mtime *objp)
+{
+ if (!xdr_time_how (xdrs, &objp->set_it))
+ return FALSE;
+ switch (objp->set_it) {
+ case SET_TO_CLIENT_TIME:
+ if (!xdr_nfstime3 (xdrs, &objp->set_mtime_u.mtime))
+ return FALSE;
+ break;
+ default:
+ break;
+ }
+ return TRUE;
+}
+
+bool_t
+xdr_sattr3 (XDR *xdrs, sattr3 *objp)
+{
+ if (!xdr_set_mode3 (xdrs, &objp->mode))
+ return FALSE;
+ if (!xdr_set_uid3 (xdrs, &objp->uid))
+ return FALSE;
+ if (!xdr_set_gid3 (xdrs, &objp->gid))
+ return FALSE;
+ if (!xdr_set_size3 (xdrs, &objp->size))
+ return FALSE;
+ if (!xdr_set_atime (xdrs, &objp->atime))
+ return FALSE;
+ if (!xdr_set_mtime (xdrs, &objp->mtime))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_diropargs3 (XDR *xdrs, diropargs3 *objp)
+{
+ if (!xdr_nfs_fh3 (xdrs, &objp->dir))
+ return FALSE;
+ if (!xdr_filename3 (xdrs, &objp->name))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_getattr3args (XDR *xdrs, getattr3args *objp)
+{
+ if (!xdr_nfs_fh3 (xdrs, &objp->object))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_getattr3resok (XDR *xdrs, getattr3resok *objp)
+{
+ if (!xdr_fattr3 (xdrs, &objp->obj_attributes))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_getattr3res (XDR *xdrs, getattr3res *objp)
+{
+ if (!xdr_nfsstat3 (xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_getattr3resok (xdrs, &objp->getattr3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ break;
+ }
+ return TRUE;
+}
+
+bool_t
+xdr_sattrguard3 (XDR *xdrs, sattrguard3 *objp)
+{
+ if (!xdr_bool (xdrs, &objp->check))
+ return FALSE;
+ switch (objp->check) {
+ case TRUE:
+ if (!xdr_nfstime3 (xdrs, &objp->sattrguard3_u.obj_ctime))
+ return FALSE;
+ break;
+ case FALSE:
+ break;
+ default:
+ return FALSE;
+ }
+ return TRUE;
+}
+
+bool_t
+xdr_setattr3args (XDR *xdrs, setattr3args *objp)
+{
+ if (!xdr_nfs_fh3 (xdrs, &objp->object))
+ return FALSE;
+ if (!xdr_sattr3 (xdrs, &objp->new_attributes))
+ return FALSE;
+ if (!xdr_sattrguard3 (xdrs, &objp->guard))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_setattr3resok (XDR *xdrs, setattr3resok *objp)
+{
+ if (!xdr_wcc_data (xdrs, &objp->obj_wcc))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_setattr3resfail (XDR *xdrs, setattr3resfail *objp)
+{
+ if (!xdr_wcc_data (xdrs, &objp->obj_wcc))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_setattr3res (XDR *xdrs, setattr3res *objp)
+{
+ if (!xdr_nfsstat3 (xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_setattr3resok (xdrs, &objp->setattr3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_setattr3resfail (xdrs, &objp->setattr3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
+}
+
+bool_t
+xdr_lookup3args (XDR *xdrs, lookup3args *objp)
+{
+ if (!xdr_diropargs3 (xdrs, &objp->what))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_lookup3resok (XDR *xdrs, lookup3resok *objp)
+{
+ if (!xdr_nfs_fh3 (xdrs, &objp->object))
+ return FALSE;
+ if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
+ return FALSE;
+ if (!xdr_post_op_attr (xdrs, &objp->dir_attributes))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_lookup3resfail (XDR *xdrs, lookup3resfail *objp)
+{
+ if (!xdr_post_op_attr (xdrs, &objp->dir_attributes))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_lookup3res (XDR *xdrs, lookup3res *objp)
+{
+ if (!xdr_nfsstat3 (xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_lookup3resok (xdrs, &objp->lookup3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_lookup3resfail (xdrs, &objp->lookup3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
+}
+
+bool_t
+xdr_access3args (XDR *xdrs, access3args *objp)
+{
+ if (!xdr_nfs_fh3 (xdrs, &objp->object))
+ return FALSE;
+ if (!xdr_uint32 (xdrs, &objp->access))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_access3resok (XDR *xdrs, access3resok *objp)
+{
+ if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
+ return FALSE;
+ if (!xdr_uint32 (xdrs, &objp->access))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_access3resfail (XDR *xdrs, access3resfail *objp)
+{
+ if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_access3res (XDR *xdrs, access3res *objp)
+{
+ if (!xdr_nfsstat3 (xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_access3resok (xdrs, &objp->access3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_access3resfail (xdrs, &objp->access3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
+}
+
+bool_t
+xdr_readlink3args (XDR *xdrs, readlink3args *objp)
+{
+ if (!xdr_nfs_fh3 (xdrs, &objp->symlink))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_readlink3resok (XDR *xdrs, readlink3resok *objp)
+{
+ if (!xdr_post_op_attr (xdrs, &objp->symlink_attributes))
+ return FALSE;
+ if (!xdr_nfspath3 (xdrs, &objp->data))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_readlink3resfail (XDR *xdrs, readlink3resfail *objp)
+{
+ if (!xdr_post_op_attr (xdrs, &objp->symlink_attributes))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_readlink3res (XDR *xdrs, readlink3res *objp)
+{
+ if (!xdr_nfsstat3 (xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_readlink3resok (xdrs, &objp->readlink3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_readlink3resfail (xdrs, &objp->readlink3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
+}
+
+bool_t
+xdr_read3args (XDR *xdrs, read3args *objp)
+{
+ if (!xdr_nfs_fh3 (xdrs, &objp->file))
+ return FALSE;
+ if (!xdr_offset3 (xdrs, &objp->offset))
+ return FALSE;
+ if (!xdr_count3 (xdrs, &objp->count))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_read3resok_nocopy (XDR *xdrs, read3resok *objp)
+{
+ if (!xdr_post_op_attr (xdrs, &objp->file_attributes))
+ return FALSE;
+ if (!xdr_count3 (xdrs, &objp->count))
+ return FALSE;
+ if (!xdr_bool (xdrs, &objp->eof))
+ return FALSE;
+ if (!xdr_u_int (xdrs, (u_int *) &objp->data.data_len))
+ return FALSE;
+ return TRUE;
+}
+
+
+bool_t
+xdr_read3resok (XDR *xdrs, read3resok *objp)
+{
+ if (!xdr_post_op_attr (xdrs, &objp->file_attributes))
+ return FALSE;
+ if (!xdr_count3 (xdrs, &objp->count))
+ return FALSE;
+ if (!xdr_bool (xdrs, &objp->eof))
+ return FALSE;
+ if (!xdr_bytes (xdrs, (char **)&objp->data.data_val, (u_int *) &objp->data.data_len, ~0))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_read3resfail (XDR *xdrs, read3resfail *objp)
+{
+ if (!xdr_post_op_attr (xdrs, &objp->file_attributes))
+ return FALSE;
+ return TRUE;
+}
+
+
+bool_t
+xdr_read3res_nocopy (XDR *xdrs, read3res *objp)
+{
+ if (!xdr_nfsstat3 (xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_read3resok_nocopy (xdrs, &objp->read3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_read3resfail (xdrs, &objp->read3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
+}
+
+
+bool_t
+xdr_read3res (XDR *xdrs, read3res *objp)
+{
+ if (!xdr_nfsstat3 (xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_read3resok (xdrs, &objp->read3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_read3resfail (xdrs, &objp->read3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
+}
+
+bool_t
+xdr_stable_how (XDR *xdrs, stable_how *objp)
+{
+ if (!xdr_enum (xdrs, (enum_t *) objp))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_write3args (XDR *xdrs, write3args *objp)
+{
+ if (!xdr_nfs_fh3 (xdrs, &objp->file))
+ return FALSE;
+ if (!xdr_offset3 (xdrs, &objp->offset))
+ return FALSE;
+ if (!xdr_count3 (xdrs, &objp->count))
+ return FALSE;
+ if (!xdr_stable_how (xdrs, &objp->stable))
+ return FALSE;
+
+ /* Added specifically to avoid copies from the xdr buffer into
+ * the write3args structure, which will also require an already
+ * allocated buffer. That is not optimal.
+ */
+ if (!xdr_u_int (xdrs, (u_int *) &objp->data.data_len))
+ return FALSE;
+
+ /* The remaining bytes in the xdr buffer are the bytes that need to be
+ * written. See how these bytes are extracted in the xdr_to_write3args
+ * code path. Be careful, while using the write3args structure, since
+ * only the data.data_len has been filled. The actual data is
+ * extracted in xdr_to_write3args path.
+ */
+
+ /* if (!xdr_bytes (xdrs, (char **)&objp->data.data_val, (u_int *) &objp->data.data_len, ~0))
+ return FALSE;
+ */
+ return TRUE;
+}
+
+bool_t
+xdr_write3resok (XDR *xdrs, write3resok *objp)
+{
+ if (!xdr_wcc_data (xdrs, &objp->file_wcc))
+ return FALSE;
+ if (!xdr_count3 (xdrs, &objp->count))
+ return FALSE;
+ if (!xdr_stable_how (xdrs, &objp->committed))
+ return FALSE;
+ if (!xdr_writeverf3 (xdrs, objp->verf))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_write3resfail (XDR *xdrs, write3resfail *objp)
+{
+ if (!xdr_wcc_data (xdrs, &objp->file_wcc))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_write3res (XDR *xdrs, write3res *objp)
+{
+ if (!xdr_nfsstat3 (xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_write3resok (xdrs, &objp->write3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_write3resfail (xdrs, &objp->write3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
+}
+
+bool_t
+xdr_createmode3 (XDR *xdrs, createmode3 *objp)
+{
+ if (!xdr_enum (xdrs, (enum_t *) objp))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_createhow3 (XDR *xdrs, createhow3 *objp)
+{
+ if (!xdr_createmode3 (xdrs, &objp->mode))
+ return FALSE;
+ switch (objp->mode) {
+ case UNCHECKED:
+ case GUARDED:
+ if (!xdr_sattr3 (xdrs, &objp->createhow3_u.obj_attributes))
+ return FALSE;
+ break;
+ case EXCLUSIVE:
+ if (!xdr_createverf3 (xdrs, objp->createhow3_u.verf))
+ return FALSE;
+ break;
+ default:
+ return FALSE;
+ }
+ return TRUE;
+}
+
+bool_t
+xdr_create3args (XDR *xdrs, create3args *objp)
+{
+ if (!xdr_diropargs3 (xdrs, &objp->where))
+ return FALSE;
+ if (!xdr_createhow3 (xdrs, &objp->how))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_create3resok (XDR *xdrs, create3resok *objp)
+{
+ if (!xdr_post_op_fh3 (xdrs, &objp->obj))
+ return FALSE;
+ if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
+ return FALSE;
+ if (!xdr_wcc_data (xdrs, &objp->dir_wcc))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_create3resfail (XDR *xdrs, create3resfail *objp)
+{
+ if (!xdr_wcc_data (xdrs, &objp->dir_wcc))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_create3res (XDR *xdrs, create3res *objp)
+{
+ if (!xdr_nfsstat3 (xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_create3resok (xdrs, &objp->create3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_create3resfail (xdrs, &objp->create3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
+}
+
+bool_t
+xdr_mkdir3args (XDR *xdrs, mkdir3args *objp)
+{
+ if (!xdr_diropargs3 (xdrs, &objp->where))
+ return FALSE;
+ if (!xdr_sattr3 (xdrs, &objp->attributes))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_mkdir3resok (XDR *xdrs, mkdir3resok *objp)
+{
+ if (!xdr_post_op_fh3 (xdrs, &objp->obj))
+ return FALSE;
+ if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
+ return FALSE;
+ if (!xdr_wcc_data (xdrs, &objp->dir_wcc))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_mkdir3resfail (XDR *xdrs, mkdir3resfail *objp)
+{
+ if (!xdr_wcc_data (xdrs, &objp->dir_wcc))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_mkdir3res (XDR *xdrs, mkdir3res *objp)
+{
+ if (!xdr_nfsstat3 (xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_mkdir3resok (xdrs, &objp->mkdir3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_mkdir3resfail (xdrs, &objp->mkdir3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
+}
+
+bool_t
+xdr_symlinkdata3 (XDR *xdrs, symlinkdata3 *objp)
+{
+ if (!xdr_sattr3 (xdrs, &objp->symlink_attributes))
+ return FALSE;
+ if (!xdr_nfspath3 (xdrs, &objp->symlink_data))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_symlink3args (XDR *xdrs, symlink3args *objp)
+{
+ if (!xdr_diropargs3 (xdrs, &objp->where))
+ return FALSE;
+ if (!xdr_symlinkdata3 (xdrs, &objp->symlink))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_symlink3resok (XDR *xdrs, symlink3resok *objp)
+{
+ if (!xdr_post_op_fh3 (xdrs, &objp->obj))
+ return FALSE;
+ if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
+ return FALSE;
+ if (!xdr_wcc_data (xdrs, &objp->dir_wcc))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_symlink3resfail (XDR *xdrs, symlink3resfail *objp)
+{
+ if (!xdr_wcc_data (xdrs, &objp->dir_wcc))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_symlink3res (XDR *xdrs, symlink3res *objp)
+{
+ if (!xdr_nfsstat3 (xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_symlink3resok (xdrs, &objp->symlink3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_symlink3resfail (xdrs, &objp->symlink3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
+}
+
+bool_t
+xdr_devicedata3 (XDR *xdrs, devicedata3 *objp)
+{
+ if (!xdr_sattr3 (xdrs, &objp->dev_attributes))
+ return FALSE;
+ if (!xdr_specdata3 (xdrs, &objp->spec))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_mknoddata3 (XDR *xdrs, mknoddata3 *objp)
+{
+ if (!xdr_ftype3 (xdrs, &objp->type))
+ return FALSE;
+ switch (objp->type) {
+ case NF3CHR:
+ case NF3BLK:
+ if (!xdr_devicedata3 (xdrs, &objp->mknoddata3_u.device))
+ return FALSE;
+ break;
+ case NF3SOCK:
+ case NF3FIFO:
+ if (!xdr_sattr3 (xdrs, &objp->mknoddata3_u.pipe_attributes))
+ return FALSE;
+ break;
+ default:
+ break;
+ }
+ return TRUE;
+}
+
+bool_t
+xdr_mknod3args (XDR *xdrs, mknod3args *objp)
+{
+ if (!xdr_diropargs3 (xdrs, &objp->where))
+ return FALSE;
+ if (!xdr_mknoddata3 (xdrs, &objp->what))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_mknod3resok (XDR *xdrs, mknod3resok *objp)
+{
+ if (!xdr_post_op_fh3 (xdrs, &objp->obj))
+ return FALSE;
+ if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
+ return FALSE;
+ if (!xdr_wcc_data (xdrs, &objp->dir_wcc))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_mknod3resfail (XDR *xdrs, mknod3resfail *objp)
+{
+ if (!xdr_wcc_data (xdrs, &objp->dir_wcc))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_mknod3res (XDR *xdrs, mknod3res *objp)
+{
+ if (!xdr_nfsstat3 (xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_mknod3resok (xdrs, &objp->mknod3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_mknod3resfail (xdrs, &objp->mknod3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
+}
+
+bool_t
+xdr_remove3args (XDR *xdrs, remove3args *objp)
+{
+ if (!xdr_diropargs3 (xdrs, &objp->object))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_remove3resok (XDR *xdrs, remove3resok *objp)
+{
+ if (!xdr_wcc_data (xdrs, &objp->dir_wcc))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_remove3resfail (XDR *xdrs, remove3resfail *objp)
+{
+ if (!xdr_wcc_data (xdrs, &objp->dir_wcc))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_remove3res (XDR *xdrs, remove3res *objp)
+{
+ if (!xdr_nfsstat3 (xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_remove3resok (xdrs, &objp->remove3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_remove3resfail (xdrs, &objp->remove3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
+}
+
+bool_t
+xdr_rmdir3args (XDR *xdrs, rmdir3args *objp)
+{
+ if (!xdr_diropargs3 (xdrs, &objp->object))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_rmdir3resok (XDR *xdrs, rmdir3resok *objp)
+{
+ if (!xdr_wcc_data (xdrs, &objp->dir_wcc))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_rmdir3resfail (XDR *xdrs, rmdir3resfail *objp)
+{
+ if (!xdr_wcc_data (xdrs, &objp->dir_wcc))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_rmdir3res (XDR *xdrs, rmdir3res *objp)
+{
+ if (!xdr_nfsstat3 (xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_rmdir3resok (xdrs, &objp->rmdir3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_rmdir3resfail (xdrs, &objp->rmdir3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
+}
+
+bool_t
+xdr_rename3args (XDR *xdrs, rename3args *objp)
+{
+ if (!xdr_diropargs3 (xdrs, &objp->from))
+ return FALSE;
+ if (!xdr_diropargs3 (xdrs, &objp->to))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_rename3resok (XDR *xdrs, rename3resok *objp)
+{
+ if (!xdr_wcc_data (xdrs, &objp->fromdir_wcc))
+ return FALSE;
+ if (!xdr_wcc_data (xdrs, &objp->todir_wcc))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_rename3resfail (XDR *xdrs, rename3resfail *objp)
+{
+ if (!xdr_wcc_data (xdrs, &objp->fromdir_wcc))
+ return FALSE;
+ if (!xdr_wcc_data (xdrs, &objp->todir_wcc))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_rename3res (XDR *xdrs, rename3res *objp)
+{
+ if (!xdr_nfsstat3 (xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_rename3resok (xdrs, &objp->rename3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_rename3resfail (xdrs, &objp->rename3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
+}
+
+bool_t
+xdr_link3args (XDR *xdrs, link3args *objp)
+{
+ if (!xdr_nfs_fh3 (xdrs, &objp->file))
+ return FALSE;
+ if (!xdr_diropargs3 (xdrs, &objp->link))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_link3resok (XDR *xdrs, link3resok *objp)
+{
+ if (!xdr_post_op_attr (xdrs, &objp->file_attributes))
+ return FALSE;
+ if (!xdr_wcc_data (xdrs, &objp->linkdir_wcc))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_link3resfail (XDR *xdrs, link3resfail *objp)
+{
+ if (!xdr_post_op_attr (xdrs, &objp->file_attributes))
+ return FALSE;
+ if (!xdr_wcc_data (xdrs, &objp->linkdir_wcc))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_link3res (XDR *xdrs, link3res *objp)
+{
+ if (!xdr_nfsstat3 (xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_link3resok (xdrs, &objp->link3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_link3resfail (xdrs, &objp->link3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
+}
+
+bool_t
+xdr_readdir3args (XDR *xdrs, readdir3args *objp)
+{
+ if (!xdr_nfs_fh3 (xdrs, &objp->dir))
+ return FALSE;
+ if (!xdr_cookie3 (xdrs, &objp->cookie))
+ return FALSE;
+ if (!xdr_cookieverf3 (xdrs, objp->cookieverf))
+ return FALSE;
+ if (!xdr_count3 (xdrs, &objp->count))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_entry3 (XDR *xdrs, entry3 *objp)
+{
+ if (!xdr_fileid3 (xdrs, &objp->fileid))
+ return FALSE;
+ if (!xdr_filename3 (xdrs, &objp->name))
+ return FALSE;
+ if (!xdr_cookie3 (xdrs, &objp->cookie))
+ return FALSE;
+ if (!xdr_pointer (xdrs, (char **)&objp->nextentry, sizeof (entry3), (xdrproc_t) xdr_entry3))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_dirlist3 (XDR *xdrs, dirlist3 *objp)
+{
+ if (!xdr_pointer (xdrs, (char **)&objp->entries, sizeof (entry3), (xdrproc_t) xdr_entry3))
+ return FALSE;
+ if (!xdr_bool (xdrs, &objp->eof))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_readdir3resok (XDR *xdrs, readdir3resok *objp)
+{
+ if (!xdr_post_op_attr (xdrs, &objp->dir_attributes))
+ return FALSE;
+ if (!xdr_cookieverf3 (xdrs, objp->cookieverf))
+ return FALSE;
+ if (!xdr_dirlist3 (xdrs, &objp->reply))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_readdir3resfail (XDR *xdrs, readdir3resfail *objp)
+{
+ if (!xdr_post_op_attr (xdrs, &objp->dir_attributes))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_readdir3res (XDR *xdrs, readdir3res *objp)
+{
+ if (!xdr_nfsstat3 (xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_readdir3resok (xdrs, &objp->readdir3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_readdir3resfail (xdrs, &objp->readdir3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
+}
+
+bool_t
+xdr_readdirp3args (XDR *xdrs, readdirp3args *objp)
+{
+ if (!xdr_nfs_fh3 (xdrs, &objp->dir))
+ return FALSE;
+ if (!xdr_cookie3 (xdrs, &objp->cookie))
+ return FALSE;
+ if (!xdr_cookieverf3 (xdrs, objp->cookieverf))
+ return FALSE;
+ if (!xdr_count3 (xdrs, &objp->dircount))
+ return FALSE;
+ if (!xdr_count3 (xdrs, &objp->maxcount))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_entryp3 (XDR *xdrs, entryp3 *objp)
+{
+ if (!xdr_fileid3 (xdrs, &objp->fileid))
+ return FALSE;
+ if (!xdr_filename3 (xdrs, &objp->name))
+ return FALSE;
+ if (!xdr_cookie3 (xdrs, &objp->cookie))
+ return FALSE;
+ if (!xdr_post_op_attr (xdrs, &objp->name_attributes))
+ return FALSE;
+ if (!xdr_post_op_fh3 (xdrs, &objp->name_handle))
+ return FALSE;
+ if (!xdr_pointer (xdrs, (char **)&objp->nextentry, sizeof (entryp3), (xdrproc_t) xdr_entryp3))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_dirlistp3 (XDR *xdrs, dirlistp3 *objp)
+{
+ if (!xdr_pointer (xdrs, (char **)&objp->entries, sizeof (entryp3), (xdrproc_t) xdr_entryp3))
+ return FALSE;
+ if (!xdr_bool (xdrs, &objp->eof))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_readdirp3resok (XDR *xdrs, readdirp3resok *objp)
+{
+ if (!xdr_post_op_attr (xdrs, &objp->dir_attributes))
+ return FALSE;
+ if (!xdr_cookieverf3 (xdrs, objp->cookieverf))
+ return FALSE;
+ if (!xdr_dirlistp3 (xdrs, &objp->reply))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_readdirp3resfail (XDR *xdrs, readdirp3resfail *objp)
+{
+ if (!xdr_post_op_attr (xdrs, &objp->dir_attributes))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_readdirp3res (XDR *xdrs, readdirp3res *objp)
+{
+ if (!xdr_nfsstat3 (xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_readdirp3resok (xdrs, &objp->readdirp3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_readdirp3resfail (xdrs, &objp->readdirp3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
+}
+
+bool_t
+xdr_fsstat3args (XDR *xdrs, fsstat3args *objp)
+{
+ if (!xdr_nfs_fh3 (xdrs, &objp->fsroot))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_fsstat3resok (XDR *xdrs, fsstat3resok *objp)
+{
+ if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
+ return FALSE;
+ if (!xdr_size3 (xdrs, &objp->tbytes))
+ return FALSE;
+ if (!xdr_size3 (xdrs, &objp->fbytes))
+ return FALSE;
+ if (!xdr_size3 (xdrs, &objp->abytes))
+ return FALSE;
+ if (!xdr_size3 (xdrs, &objp->tfiles))
+ return FALSE;
+ if (!xdr_size3 (xdrs, &objp->ffiles))
+ return FALSE;
+ if (!xdr_size3 (xdrs, &objp->afiles))
+ return FALSE;
+ if (!xdr_uint32 (xdrs, &objp->invarsec))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_fsstat3resfail (XDR *xdrs, fsstat3resfail *objp)
+{
+ if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_fsstat3res (XDR *xdrs, fsstat3res *objp)
+{
+ if (!xdr_nfsstat3 (xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_fsstat3resok (xdrs, &objp->fsstat3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_fsstat3resfail (xdrs, &objp->fsstat3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
+}
+
+bool_t
+xdr_fsinfo3args (XDR *xdrs, fsinfo3args *objp)
+{
+ if (!xdr_nfs_fh3 (xdrs, &objp->fsroot))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_fsinfo3resok (XDR *xdrs, fsinfo3resok *objp)
+{
+ if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
+ return FALSE;
+ if (!xdr_uint32 (xdrs, &objp->rtmax))
+ return FALSE;
+ if (!xdr_uint32 (xdrs, &objp->rtpref))
+ return FALSE;
+ if (!xdr_uint32 (xdrs, &objp->rtmult))
+ return FALSE;
+ if (!xdr_uint32 (xdrs, &objp->wtmax))
+ return FALSE;
+ if (!xdr_uint32 (xdrs, &objp->wtpref))
+ return FALSE;
+ if (!xdr_uint32 (xdrs, &objp->wtmult))
+ return FALSE;
+ if (!xdr_uint32 (xdrs, &objp->dtpref))
+ return FALSE;
+ if (!xdr_size3 (xdrs, &objp->maxfilesize))
+ return FALSE;
+ if (!xdr_nfstime3 (xdrs, &objp->time_delta))
+ return FALSE;
+ if (!xdr_uint32 (xdrs, &objp->properties))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_fsinfo3resfail (XDR *xdrs, fsinfo3resfail *objp)
+{
+ if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_fsinfo3res (XDR *xdrs, fsinfo3res *objp)
+{
+ if (!xdr_nfsstat3 (xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_fsinfo3resok (xdrs, &objp->fsinfo3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_fsinfo3resfail (xdrs, &objp->fsinfo3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
+}
+
+bool_t
+xdr_pathconf3args (XDR *xdrs, pathconf3args *objp)
+{
+ if (!xdr_nfs_fh3 (xdrs, &objp->object))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_pathconf3resok (XDR *xdrs, pathconf3resok *objp)
+{
+ register int32_t *buf;
+
+
+ if (xdrs->x_op == XDR_ENCODE) {
+ if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
+ return FALSE;
+ if (!xdr_uint32 (xdrs, &objp->linkmax))
+ return FALSE;
+ if (!xdr_uint32 (xdrs, &objp->name_max))
+ return FALSE;
+ buf = XDR_INLINE (xdrs, 4 * BYTES_PER_XDR_UNIT);
+ if (buf == NULL) {
+ if (!xdr_bool (xdrs, &objp->no_trunc))
+ return FALSE;
+ if (!xdr_bool (xdrs, &objp->chown_restricted))
+ return FALSE;
+ if (!xdr_bool (xdrs, &objp->case_insensitive))
+ return FALSE;
+ if (!xdr_bool (xdrs, &objp->case_preserving))
+ return FALSE;
+ } else {
+ IXDR_PUT_BOOL(buf, objp->no_trunc);
+ IXDR_PUT_BOOL(buf, objp->chown_restricted);
+ IXDR_PUT_BOOL(buf, objp->case_insensitive);
+ IXDR_PUT_BOOL(buf, objp->case_preserving);
+ }
+ return TRUE;
+ } else if (xdrs->x_op == XDR_DECODE) {
+ if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
+ return FALSE;
+ if (!xdr_uint32 (xdrs, &objp->linkmax))
+ return FALSE;
+ if (!xdr_uint32 (xdrs, &objp->name_max))
+ return FALSE;
+ buf = XDR_INLINE (xdrs, 4 * BYTES_PER_XDR_UNIT);
+ if (buf == NULL) {
+ if (!xdr_bool (xdrs, &objp->no_trunc))
+ return FALSE;
+ if (!xdr_bool (xdrs, &objp->chown_restricted))
+ return FALSE;
+ if (!xdr_bool (xdrs, &objp->case_insensitive))
+ return FALSE;
+ if (!xdr_bool (xdrs, &objp->case_preserving))
+ return FALSE;
+ } else {
+ objp->no_trunc = IXDR_GET_BOOL(buf);
+ objp->chown_restricted = IXDR_GET_BOOL(buf);
+ objp->case_insensitive = IXDR_GET_BOOL(buf);
+ objp->case_preserving = IXDR_GET_BOOL(buf);
+ }
+ return TRUE;
+ }
+
+ if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
+ return FALSE;
+ if (!xdr_uint32 (xdrs, &objp->linkmax))
+ return FALSE;
+ if (!xdr_uint32 (xdrs, &objp->name_max))
+ return FALSE;
+ if (!xdr_bool (xdrs, &objp->no_trunc))
+ return FALSE;
+ if (!xdr_bool (xdrs, &objp->chown_restricted))
+ return FALSE;
+ if (!xdr_bool (xdrs, &objp->case_insensitive))
+ return FALSE;
+ if (!xdr_bool (xdrs, &objp->case_preserving))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_pathconf3resfail (XDR *xdrs, pathconf3resfail *objp)
+{
+ if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_pathconf3res (XDR *xdrs, pathconf3res *objp)
+{
+ if (!xdr_nfsstat3 (xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_pathconf3resok (xdrs, &objp->pathconf3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_pathconf3resfail (xdrs, &objp->pathconf3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
+}
+
+bool_t
+xdr_commit3args (XDR *xdrs, commit3args *objp)
+{
+ if (!xdr_nfs_fh3 (xdrs, &objp->file))
+ return FALSE;
+ if (!xdr_offset3 (xdrs, &objp->offset))
+ return FALSE;
+ if (!xdr_count3 (xdrs, &objp->count))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_commit3resok (XDR *xdrs, commit3resok *objp)
+{
+ if (!xdr_wcc_data (xdrs, &objp->file_wcc))
+ return FALSE;
+ if (!xdr_writeverf3 (xdrs, objp->verf))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_commit3resfail (XDR *xdrs, commit3resfail *objp)
+{
+ if (!xdr_wcc_data (xdrs, &objp->file_wcc))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_commit3res (XDR *xdrs, commit3res *objp)
+{
+ if (!xdr_nfsstat3 (xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_commit3resok (xdrs, &objp->commit3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_commit3resfail (xdrs, &objp->commit3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
+}
+
+bool_t
+xdr_fhandle3 (XDR *xdrs, fhandle3 *objp)
+{
+ if (!xdr_bytes (xdrs, (char **)&objp->fhandle3_val, (u_int *) &objp->fhandle3_len, FHSIZE3))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_dirpath (XDR *xdrs, dirpath *objp)
+{
+ if (!xdr_string (xdrs, objp, MNTPATHLEN))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_name (XDR *xdrs, name *objp)
+{
+ if (!xdr_string (xdrs, objp, MNTNAMLEN))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_mountstat3 (XDR *xdrs, mountstat3 *objp)
+{
+ if (!xdr_enum (xdrs, (enum_t *) objp))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_mountres3_ok (XDR *xdrs, mountres3_ok *objp)
+{
+ if (!xdr_fhandle3 (xdrs, &objp->fhandle))
+ return FALSE;
+ if (!xdr_array (xdrs, (char **)&objp->auth_flavors.auth_flavors_val, (u_int *) &objp->auth_flavors.auth_flavors_len, ~0,
+ sizeof (int), (xdrproc_t) xdr_int))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_mountres3 (XDR *xdrs, mountres3 *objp)
+{
+ if (!xdr_mountstat3 (xdrs, &objp->fhs_status))
+ return FALSE;
+ switch (objp->fhs_status) {
+ case MNT3_OK:
+ if (!xdr_mountres3_ok (xdrs, &objp->mountres3_u.mountinfo))
+ return FALSE;
+ break;
+ default:
+ break;
+ }
+ return TRUE;
+}
+
+bool_t
+xdr_mountlist (XDR *xdrs, mountlist *objp)
+{
+ if (!xdr_pointer (xdrs, (char **)objp, sizeof (struct mountbody), (xdrproc_t) xdr_mountbody))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_mountbody (XDR *xdrs, mountbody *objp)
+{
+ if (!xdr_name (xdrs, &objp->ml_hostname))
+ return FALSE;
+ if (!xdr_dirpath (xdrs, &objp->ml_directory))
+ return FALSE;
+ if (!xdr_mountlist (xdrs, &objp->ml_next))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_groups (XDR *xdrs, groups *objp)
+{
+ if (!xdr_pointer (xdrs, (char **)objp, sizeof (struct groupnode), (xdrproc_t) xdr_groupnode))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_groupnode (XDR *xdrs, groupnode *objp)
+{
+ if (!xdr_name (xdrs, &objp->gr_name))
+ return FALSE;
+ if (!xdr_groups (xdrs, &objp->gr_next))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_exports (XDR *xdrs, exports *objp)
+{
+ if (!xdr_pointer (xdrs, (char **)objp, sizeof (struct exportnode), (xdrproc_t) xdr_exportnode))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_exportnode (XDR *xdrs, exportnode *objp)
+{
+ if (!xdr_dirpath (xdrs, &objp->ex_dir))
+ return FALSE;
+ if (!xdr_groups (xdrs, &objp->ex_groups))
+ return FALSE;
+ if (!xdr_exports (xdrs, &objp->ex_next))
+ return FALSE;
+ return TRUE;
+}
+
+void
+xdr_free_exports_list (struct exportnode *first)
+{
+ struct exportnode *elist = NULL;
+
+ if (!first)
+ return;
+
+ while (first) {
+ elist = first->ex_next;
+ GF_FREE (first->ex_dir);
+
+ if (first->ex_groups) {
+ GF_FREE (first->ex_groups->gr_name);
+ GF_FREE (first->ex_groups);
+ }
+
+ GF_FREE (first);
+ first = elist;
+ }
+
+}
+
+
+void
+xdr_free_mountlist (mountlist ml)
+{
+ struct mountbody *next = NULL;
+
+ if (!ml)
+ return;
+
+ while (ml) {
+ GF_FREE (ml->ml_hostname);
+ GF_FREE (ml->ml_directory);
+ next = ml->ml_next;
+ GF_FREE (ml);
+ ml = next;
+ }
+
+ return;
+}
+
+
+/* Free statements are based on the way sunrpc xdr decoding
+ * code performs memory allocations.
+ */
+void
+xdr_free_write3args_nocopy (write3args *wa)
+{
+ if (!wa)
+ return;
+
+ FREE (wa->file.data.data_val);
+}
+
+
diff --git a/rpc/xdr/src/xdr-nfs3.h b/rpc/xdr/src/xdr-nfs3.h
new file mode 100644
index 000000000..6f6b0e1f9
--- /dev/null
+++ b/rpc/xdr/src/xdr-nfs3.h
@@ -0,0 +1,1199 @@
+/*
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _XDR_NFS3_H
+#define _XDR_NFS3_H
+
+#include <rpc/rpc.h>
+#include <sys/types.h>
+
+#define NFS3_FHSIZE 64
+#define NFS3_COOKIEVERFSIZE 8
+#define NFS3_CREATEVERFSIZE 8
+#define NFS3_WRITEVERFSIZE 8
+
+#define NFS3_ENTRY3_FIXED_SIZE 24
+#define NFS3_POSTOPATTR_SIZE 88
+#define NFS3_READDIR_RESOK_SIZE (NFS3_POSTOPATTR_SIZE + sizeof (bool_t) + NFS3_COOKIEVERFSIZE)
+
+/* In size of post_op_fh3, the length of the file handle will have to be
+ * included separately since we have variable length fh. Here we only account
+ * for the field for handle_follows and for the file handle length field.
+ */
+#define NFS3_POSTOPFH3_FIXED_SIZE (sizeof (bool_t) + sizeof (uint32_t))
+
+/* Similarly, the size of the entry will have to include the variable length
+ * file handle and the length of the entry name.
+ */
+#define NFS3_ENTRYP3_FIXED_SIZE (NFS3_ENTRY3_FIXED_SIZE + NFS3_POSTOPATTR_SIZE + NFS3_POSTOPFH3_FIXED_SIZE)
+
+typedef uint64_t uint64;
+typedef int64_t int64;
+typedef uint32_t uint32;
+typedef int32_t int32;
+typedef char *filename3;
+typedef char *nfspath3;
+typedef uint64 fileid3;
+typedef uint64 cookie3;
+typedef char cookieverf3[NFS3_COOKIEVERFSIZE];
+typedef char createverf3[NFS3_CREATEVERFSIZE];
+typedef char writeverf3[NFS3_WRITEVERFSIZE];
+typedef uint32 uid3;
+typedef uint32 gid3;
+typedef uint64 size3;
+typedef uint64 offset3;
+typedef uint32 mode3;
+typedef uint32 count3;
+
+#define NFS3MODE_SETXUID 0x00800
+#define NFS3MODE_SETXGID 0x00400
+#define NFS3MODE_SAVESWAPTXT 0x00200
+#define NFS3MODE_ROWNER 0x00100
+#define NFS3MODE_WOWNER 0x00080
+#define NFS3MODE_XOWNER 0x00040
+#define NFS3MODE_RGROUP 0x00020
+#define NFS3MODE_WGROUP 0x00010
+#define NFS3MODE_XGROUP 0x00008
+#define NFS3MODE_ROTHER 0x00004
+#define NFS3MODE_WOTHER 0x00002
+#define NFS3MODE_XOTHER 0x00001
+
+enum nfsstat3 {
+ NFS3_OK = 0,
+ NFS3ERR_PERM = 1,
+ NFS3ERR_NOENT = 2,
+ NFS3ERR_IO = 5,
+ NFS3ERR_NXIO = 6,
+ NFS3ERR_ACCES = 13,
+ NFS3ERR_EXIST = 17,
+ NFS3ERR_XDEV = 18,
+ NFS3ERR_NODEV = 19,
+ NFS3ERR_NOTDIR = 20,
+ NFS3ERR_ISDIR = 21,
+ NFS3ERR_INVAL = 22,
+ NFS3ERR_FBIG = 27,
+ NFS3ERR_NOSPC = 28,
+ NFS3ERR_ROFS = 30,
+ NFS3ERR_MLINK = 31,
+ NFS3ERR_NAMETOOLONG = 63,
+ NFS3ERR_NOTEMPTY = 66,
+ NFS3ERR_DQUOT = 69,
+ NFS3ERR_STALE = 70,
+ NFS3ERR_REMOTE = 71,
+ NFS3ERR_BADHANDLE = 10001,
+ NFS3ERR_NOT_SYNC = 10002,
+ NFS3ERR_BAD_COOKIE = 10003,
+ NFS3ERR_NOTSUPP = 10004,
+ NFS3ERR_TOOSMALL = 10005,
+ NFS3ERR_SERVERFAULT = 10006,
+ NFS3ERR_BADTYPE = 10007,
+ NFS3ERR_JUKEBOX = 10008,
+};
+typedef enum nfsstat3 nfsstat3;
+
+enum ftype3 {
+ NF3REG = 1,
+ NF3DIR = 2,
+ NF3BLK = 3,
+ NF3CHR = 4,
+ NF3LNK = 5,
+ NF3SOCK = 6,
+ NF3FIFO = 7,
+};
+typedef enum ftype3 ftype3;
+
+struct specdata3 {
+ uint32 specdata1;
+ uint32 specdata2;
+};
+typedef struct specdata3 specdata3;
+
+struct nfs_fh3 {
+ struct {
+ u_int data_len;
+ char *data_val;
+ } data;
+};
+typedef struct nfs_fh3 nfs_fh3;
+
+struct nfstime3 {
+ uint32 seconds;
+ uint32 nseconds;
+};
+typedef struct nfstime3 nfstime3;
+
+struct fattr3 {
+ ftype3 type;
+ mode3 mode;
+ uint32 nlink;
+ uid3 uid;
+ gid3 gid;
+ size3 size;
+ size3 used;
+ specdata3 rdev;
+ uint64 fsid;
+ fileid3 fileid;
+ nfstime3 atime;
+ nfstime3 mtime;
+ nfstime3 ctime;
+};
+typedef struct fattr3 fattr3;
+
+struct post_op_attr {
+ bool_t attributes_follow;
+ union {
+ fattr3 attributes;
+ } post_op_attr_u;
+};
+typedef struct post_op_attr post_op_attr;
+
+struct wcc_attr {
+ size3 size;
+ nfstime3 mtime;
+ nfstime3 ctime;
+};
+typedef struct wcc_attr wcc_attr;
+
+struct pre_op_attr {
+ bool_t attributes_follow;
+ union {
+ wcc_attr attributes;
+ } pre_op_attr_u;
+};
+typedef struct pre_op_attr pre_op_attr;
+
+struct wcc_data {
+ pre_op_attr before;
+ post_op_attr after;
+};
+typedef struct wcc_data wcc_data;
+
+struct post_op_fh3 {
+ bool_t handle_follows;
+ union {
+ nfs_fh3 handle;
+ } post_op_fh3_u;
+};
+typedef struct post_op_fh3 post_op_fh3;
+
+enum time_how {
+ DONT_CHANGE = 0,
+ SET_TO_SERVER_TIME = 1,
+ SET_TO_CLIENT_TIME = 2,
+};
+typedef enum time_how time_how;
+
+struct set_mode3 {
+ bool_t set_it;
+ union {
+ mode3 mode;
+ } set_mode3_u;
+};
+typedef struct set_mode3 set_mode3;
+
+struct set_uid3 {
+ bool_t set_it;
+ union {
+ uid3 uid;
+ } set_uid3_u;
+};
+typedef struct set_uid3 set_uid3;
+
+struct set_gid3 {
+ bool_t set_it;
+ union {
+ gid3 gid;
+ } set_gid3_u;
+};
+typedef struct set_gid3 set_gid3;
+
+struct set_size3 {
+ bool_t set_it;
+ union {
+ size3 size;
+ } set_size3_u;
+};
+typedef struct set_size3 set_size3;
+
+struct set_atime {
+ time_how set_it;
+ union {
+ nfstime3 atime;
+ } set_atime_u;
+};
+typedef struct set_atime set_atime;
+
+struct set_mtime {
+ time_how set_it;
+ union {
+ nfstime3 mtime;
+ } set_mtime_u;
+};
+typedef struct set_mtime set_mtime;
+
+struct sattr3 {
+ set_mode3 mode;
+ set_uid3 uid;
+ set_gid3 gid;
+ set_size3 size;
+ set_atime atime;
+ set_mtime mtime;
+};
+typedef struct sattr3 sattr3;
+
+struct diropargs3 {
+ nfs_fh3 dir;
+ filename3 name;
+};
+typedef struct diropargs3 diropargs3;
+
+struct getattr3args {
+ nfs_fh3 object;
+};
+typedef struct getattr3args getattr3args;
+
+struct getattr3resok {
+ fattr3 obj_attributes;
+};
+typedef struct getattr3resok getattr3resok;
+
+struct getattr3res {
+ nfsstat3 status;
+ union {
+ getattr3resok resok;
+ } getattr3res_u;
+};
+typedef struct getattr3res getattr3res;
+
+struct sattrguard3 {
+ bool_t check;
+ union {
+ nfstime3 obj_ctime;
+ } sattrguard3_u;
+};
+typedef struct sattrguard3 sattrguard3;
+
+struct setattr3args {
+ nfs_fh3 object;
+ sattr3 new_attributes;
+ sattrguard3 guard;
+};
+typedef struct setattr3args setattr3args;
+
+struct setattr3resok {
+ wcc_data obj_wcc;
+};
+typedef struct setattr3resok setattr3resok;
+
+struct setattr3resfail {
+ wcc_data obj_wcc;
+};
+typedef struct setattr3resfail setattr3resfail;
+
+struct setattr3res {
+ nfsstat3 status;
+ union {
+ setattr3resok resok;
+ setattr3resfail resfail;
+ } setattr3res_u;
+};
+typedef struct setattr3res setattr3res;
+
+struct lookup3args {
+ diropargs3 what;
+};
+typedef struct lookup3args lookup3args;
+
+struct lookup3resok {
+ nfs_fh3 object;
+ post_op_attr obj_attributes;
+ post_op_attr dir_attributes;
+};
+typedef struct lookup3resok lookup3resok;
+
+struct lookup3resfail {
+ post_op_attr dir_attributes;
+};
+typedef struct lookup3resfail lookup3resfail;
+
+struct lookup3res {
+ nfsstat3 status;
+ union {
+ lookup3resok resok;
+ lookup3resfail resfail;
+ } lookup3res_u;
+};
+typedef struct lookup3res lookup3res;
+#define ACCESS3_READ 0x0001
+#define ACCESS3_LOOKUP 0x0002
+#define ACCESS3_MODIFY 0x0004
+#define ACCESS3_EXTEND 0x0008
+#define ACCESS3_DELETE 0x0010
+#define ACCESS3_EXECUTE 0x0020
+
+struct access3args {
+ nfs_fh3 object;
+ uint32 access;
+};
+typedef struct access3args access3args;
+
+struct access3resok {
+ post_op_attr obj_attributes;
+ uint32 access;
+};
+typedef struct access3resok access3resok;
+
+struct access3resfail {
+ post_op_attr obj_attributes;
+};
+typedef struct access3resfail access3resfail;
+
+struct access3res {
+ nfsstat3 status;
+ union {
+ access3resok resok;
+ access3resfail resfail;
+ } access3res_u;
+};
+typedef struct access3res access3res;
+
+struct readlink3args {
+ nfs_fh3 symlink;
+};
+typedef struct readlink3args readlink3args;
+
+struct readlink3resok {
+ post_op_attr symlink_attributes;
+ nfspath3 data;
+};
+typedef struct readlink3resok readlink3resok;
+
+struct readlink3resfail {
+ post_op_attr symlink_attributes;
+};
+typedef struct readlink3resfail readlink3resfail;
+
+struct readlink3res {
+ nfsstat3 status;
+ union {
+ readlink3resok resok;
+ readlink3resfail resfail;
+ } readlink3res_u;
+};
+typedef struct readlink3res readlink3res;
+
+struct read3args {
+ nfs_fh3 file;
+ offset3 offset;
+ count3 count;
+};
+typedef struct read3args read3args;
+
+struct read3resok {
+ post_op_attr file_attributes;
+ count3 count;
+ bool_t eof;
+ struct {
+ u_int data_len;
+ char *data_val;
+ } data;
+};
+typedef struct read3resok read3resok;
+
+struct read3resfail {
+ post_op_attr file_attributes;
+};
+typedef struct read3resfail read3resfail;
+
+struct read3res {
+ nfsstat3 status;
+ union {
+ read3resok resok;
+ read3resfail resfail;
+ } read3res_u;
+};
+typedef struct read3res read3res;
+
+enum stable_how {
+ UNSTABLE = 0,
+ DATA_SYNC = 1,
+ FILE_SYNC = 2,
+};
+typedef enum stable_how stable_how;
+
+struct write3args {
+ nfs_fh3 file;
+ offset3 offset;
+ count3 count;
+ stable_how stable;
+ struct {
+ u_int data_len;
+ char *data_val;
+ } data;
+};
+typedef struct write3args write3args;
+
+/* Generally, the protocol allows the file handle to be less than 64 bytes but
+ * our server does not return file handles less than 64b so we can safely say
+ * sizeof (nfs_fh3) rather than first trying to extract the fh size of the
+ * network followed by a sized-read of the file handle.
+ */
+#define NFS3_WRITE3ARGS_SIZE (sizeof (uint32_t) + NFS3_FHSIZE + sizeof (offset3) + sizeof (count3) + sizeof (uint32_t))
+struct write3resok {
+ wcc_data file_wcc;
+ count3 count;
+ stable_how committed;
+ writeverf3 verf;
+};
+typedef struct write3resok write3resok;
+
+struct write3resfail {
+ wcc_data file_wcc;
+};
+typedef struct write3resfail write3resfail;
+
+struct write3res {
+ nfsstat3 status;
+ union {
+ write3resok resok;
+ write3resfail resfail;
+ } write3res_u;
+};
+typedef struct write3res write3res;
+
+enum createmode3 {
+ UNCHECKED = 0,
+ GUARDED = 1,
+ EXCLUSIVE = 2,
+};
+typedef enum createmode3 createmode3;
+
+struct createhow3 {
+ createmode3 mode;
+ union {
+ sattr3 obj_attributes;
+ createverf3 verf;
+ } createhow3_u;
+};
+typedef struct createhow3 createhow3;
+
+struct create3args {
+ diropargs3 where;
+ createhow3 how;
+};
+typedef struct create3args create3args;
+
+struct create3resok {
+ post_op_fh3 obj;
+ post_op_attr obj_attributes;
+ wcc_data dir_wcc;
+};
+typedef struct create3resok create3resok;
+
+struct create3resfail {
+ wcc_data dir_wcc;
+};
+typedef struct create3resfail create3resfail;
+
+struct create3res {
+ nfsstat3 status;
+ union {
+ create3resok resok;
+ create3resfail resfail;
+ } create3res_u;
+};
+typedef struct create3res create3res;
+
+struct mkdir3args {
+ diropargs3 where;
+ sattr3 attributes;
+};
+typedef struct mkdir3args mkdir3args;
+
+struct mkdir3resok {
+ post_op_fh3 obj;
+ post_op_attr obj_attributes;
+ wcc_data dir_wcc;
+};
+typedef struct mkdir3resok mkdir3resok;
+
+struct mkdir3resfail {
+ wcc_data dir_wcc;
+};
+typedef struct mkdir3resfail mkdir3resfail;
+
+struct mkdir3res {
+ nfsstat3 status;
+ union {
+ mkdir3resok resok;
+ mkdir3resfail resfail;
+ } mkdir3res_u;
+};
+typedef struct mkdir3res mkdir3res;
+
+struct symlinkdata3 {
+ sattr3 symlink_attributes;
+ nfspath3 symlink_data;
+};
+typedef struct symlinkdata3 symlinkdata3;
+
+struct symlink3args {
+ diropargs3 where;
+ symlinkdata3 symlink;
+};
+typedef struct symlink3args symlink3args;
+
+struct symlink3resok {
+ post_op_fh3 obj;
+ post_op_attr obj_attributes;
+ wcc_data dir_wcc;
+};
+typedef struct symlink3resok symlink3resok;
+
+struct symlink3resfail {
+ wcc_data dir_wcc;
+};
+typedef struct symlink3resfail symlink3resfail;
+
+struct symlink3res {
+ nfsstat3 status;
+ union {
+ symlink3resok resok;
+ symlink3resfail resfail;
+ } symlink3res_u;
+};
+typedef struct symlink3res symlink3res;
+
+struct devicedata3 {
+ sattr3 dev_attributes;
+ specdata3 spec;
+};
+typedef struct devicedata3 devicedata3;
+
+struct mknoddata3 {
+ ftype3 type;
+ union {
+ devicedata3 device;
+ sattr3 pipe_attributes;
+ } mknoddata3_u;
+};
+typedef struct mknoddata3 mknoddata3;
+
+struct mknod3args {
+ diropargs3 where;
+ mknoddata3 what;
+};
+typedef struct mknod3args mknod3args;
+
+struct mknod3resok {
+ post_op_fh3 obj;
+ post_op_attr obj_attributes;
+ wcc_data dir_wcc;
+};
+typedef struct mknod3resok mknod3resok;
+
+struct mknod3resfail {
+ wcc_data dir_wcc;
+};
+typedef struct mknod3resfail mknod3resfail;
+
+struct mknod3res {
+ nfsstat3 status;
+ union {
+ mknod3resok resok;
+ mknod3resfail resfail;
+ } mknod3res_u;
+};
+typedef struct mknod3res mknod3res;
+
+struct remove3args {
+ diropargs3 object;
+};
+typedef struct remove3args remove3args;
+
+struct remove3resok {
+ wcc_data dir_wcc;
+};
+typedef struct remove3resok remove3resok;
+
+struct remove3resfail {
+ wcc_data dir_wcc;
+};
+typedef struct remove3resfail remove3resfail;
+
+struct remove3res {
+ nfsstat3 status;
+ union {
+ remove3resok resok;
+ remove3resfail resfail;
+ } remove3res_u;
+};
+typedef struct remove3res remove3res;
+
+struct rmdir3args {
+ diropargs3 object;
+};
+typedef struct rmdir3args rmdir3args;
+
+struct rmdir3resok {
+ wcc_data dir_wcc;
+};
+typedef struct rmdir3resok rmdir3resok;
+
+struct rmdir3resfail {
+ wcc_data dir_wcc;
+};
+typedef struct rmdir3resfail rmdir3resfail;
+
+struct rmdir3res {
+ nfsstat3 status;
+ union {
+ rmdir3resok resok;
+ rmdir3resfail resfail;
+ } rmdir3res_u;
+};
+typedef struct rmdir3res rmdir3res;
+
+struct rename3args {
+ diropargs3 from;
+ diropargs3 to;
+};
+typedef struct rename3args rename3args;
+
+struct rename3resok {
+ wcc_data fromdir_wcc;
+ wcc_data todir_wcc;
+};
+typedef struct rename3resok rename3resok;
+
+struct rename3resfail {
+ wcc_data fromdir_wcc;
+ wcc_data todir_wcc;
+};
+typedef struct rename3resfail rename3resfail;
+
+struct rename3res {
+ nfsstat3 status;
+ union {
+ rename3resok resok;
+ rename3resfail resfail;
+ } rename3res_u;
+};
+typedef struct rename3res rename3res;
+
+struct link3args {
+ nfs_fh3 file;
+ diropargs3 link;
+};
+typedef struct link3args link3args;
+
+struct link3resok {
+ post_op_attr file_attributes;
+ wcc_data linkdir_wcc;
+};
+typedef struct link3resok link3resok;
+
+struct link3resfail {
+ post_op_attr file_attributes;
+ wcc_data linkdir_wcc;
+};
+typedef struct link3resfail link3resfail;
+
+struct link3res {
+ nfsstat3 status;
+ union {
+ link3resok resok;
+ link3resfail resfail;
+ } link3res_u;
+};
+typedef struct link3res link3res;
+
+struct readdir3args {
+ nfs_fh3 dir;
+ cookie3 cookie;
+ cookieverf3 cookieverf;
+ count3 count;
+};
+typedef struct readdir3args readdir3args;
+
+struct entry3 {
+ fileid3 fileid;
+ filename3 name;
+ cookie3 cookie;
+ struct entry3 *nextentry;
+};
+typedef struct entry3 entry3;
+
+struct dirlist3 {
+ entry3 *entries;
+ bool_t eof;
+};
+typedef struct dirlist3 dirlist3;
+
+struct readdir3resok {
+ post_op_attr dir_attributes;
+ cookieverf3 cookieverf;
+ dirlist3 reply;
+};
+typedef struct readdir3resok readdir3resok;
+
+struct readdir3resfail {
+ post_op_attr dir_attributes;
+};
+typedef struct readdir3resfail readdir3resfail;
+
+struct readdir3res {
+ nfsstat3 status;
+ union {
+ readdir3resok resok;
+ readdir3resfail resfail;
+ } readdir3res_u;
+};
+typedef struct readdir3res readdir3res;
+
+struct readdirp3args {
+ nfs_fh3 dir;
+ cookie3 cookie;
+ cookieverf3 cookieverf;
+ count3 dircount;
+ count3 maxcount;
+};
+typedef struct readdirp3args readdirp3args;
+
+struct entryp3 {
+ fileid3 fileid;
+ filename3 name;
+ cookie3 cookie;
+ post_op_attr name_attributes;
+ post_op_fh3 name_handle;
+ struct entryp3 *nextentry;
+};
+typedef struct entryp3 entryp3;
+
+struct dirlistp3 {
+ entryp3 *entries;
+ bool_t eof;
+};
+typedef struct dirlistp3 dirlistp3;
+
+struct readdirp3resok {
+ post_op_attr dir_attributes;
+ cookieverf3 cookieverf;
+ dirlistp3 reply;
+};
+typedef struct readdirp3resok readdirp3resok;
+
+struct readdirp3resfail {
+ post_op_attr dir_attributes;
+};
+typedef struct readdirp3resfail readdirp3resfail;
+
+struct readdirp3res {
+ nfsstat3 status;
+ union {
+ readdirp3resok resok;
+ readdirp3resfail resfail;
+ } readdirp3res_u;
+};
+typedef struct readdirp3res readdirp3res;
+
+struct fsstat3args {
+ nfs_fh3 fsroot;
+};
+typedef struct fsstat3args fsstat3args;
+
+struct fsstat3resok {
+ post_op_attr obj_attributes;
+ size3 tbytes;
+ size3 fbytes;
+ size3 abytes;
+ size3 tfiles;
+ size3 ffiles;
+ size3 afiles;
+ uint32 invarsec;
+};
+typedef struct fsstat3resok fsstat3resok;
+
+struct fsstat3resfail {
+ post_op_attr obj_attributes;
+};
+typedef struct fsstat3resfail fsstat3resfail;
+
+struct fsstat3res {
+ nfsstat3 status;
+ union {
+ fsstat3resok resok;
+ fsstat3resfail resfail;
+ } fsstat3res_u;
+};
+typedef struct fsstat3res fsstat3res;
+#define FSF3_LINK 0x0001
+#define FSF3_SYMLINK 0x0002
+#define FSF3_HOMOGENEOUS 0x0008
+#define FSF3_CANSETTIME 0x0010
+
+struct fsinfo3args {
+ nfs_fh3 fsroot;
+};
+typedef struct fsinfo3args fsinfo3args;
+
+struct fsinfo3resok {
+ post_op_attr obj_attributes;
+ uint32 rtmax;
+ uint32 rtpref;
+ uint32 rtmult;
+ uint32 wtmax;
+ uint32 wtpref;
+ uint32 wtmult;
+ uint32 dtpref;
+ size3 maxfilesize;
+ nfstime3 time_delta;
+ uint32 properties;
+};
+typedef struct fsinfo3resok fsinfo3resok;
+
+struct fsinfo3resfail {
+ post_op_attr obj_attributes;
+};
+typedef struct fsinfo3resfail fsinfo3resfail;
+
+struct fsinfo3res {
+ nfsstat3 status;
+ union {
+ fsinfo3resok resok;
+ fsinfo3resfail resfail;
+ } fsinfo3res_u;
+};
+typedef struct fsinfo3res fsinfo3res;
+
+struct pathconf3args {
+ nfs_fh3 object;
+};
+typedef struct pathconf3args pathconf3args;
+
+struct pathconf3resok {
+ post_op_attr obj_attributes;
+ uint32 linkmax;
+ uint32 name_max;
+ bool_t no_trunc;
+ bool_t chown_restricted;
+ bool_t case_insensitive;
+ bool_t case_preserving;
+};
+typedef struct pathconf3resok pathconf3resok;
+
+struct pathconf3resfail {
+ post_op_attr obj_attributes;
+};
+typedef struct pathconf3resfail pathconf3resfail;
+
+struct pathconf3res {
+ nfsstat3 status;
+ union {
+ pathconf3resok resok;
+ pathconf3resfail resfail;
+ } pathconf3res_u;
+};
+typedef struct pathconf3res pathconf3res;
+
+struct commit3args {
+ nfs_fh3 file;
+ offset3 offset;
+ count3 count;
+};
+typedef struct commit3args commit3args;
+
+struct commit3resok {
+ wcc_data file_wcc;
+ writeverf3 verf;
+};
+typedef struct commit3resok commit3resok;
+
+struct commit3resfail {
+ wcc_data file_wcc;
+};
+typedef struct commit3resfail commit3resfail;
+
+struct commit3res {
+ nfsstat3 status;
+ union {
+ commit3resok resok;
+ commit3resfail resfail;
+ } commit3res_u;
+};
+typedef struct commit3res commit3res;
+#define MNTPATHLEN 1024
+#define MNTNAMLEN 255
+#define FHSIZE3 NFS3_FHSIZE
+
+typedef struct {
+ u_int fhandle3_len;
+ char *fhandle3_val;
+} fhandle3;
+
+typedef char *dirpath;
+
+typedef char *name;
+
+enum mountstat3 {
+ MNT3_OK = 0,
+ MNT3ERR_PERM = 1,
+ MNT3ERR_NOENT = 2,
+ MNT3ERR_IO = 5,
+ MNT3ERR_ACCES = 13,
+ MNT3ERR_NOTDIR = 20,
+ MNT3ERR_INVAL = 22,
+ MNT3ERR_NAMETOOLONG = 63,
+ MNT3ERR_NOTSUPP = 10004,
+ MNT3ERR_SERVERFAULT = 10006,
+};
+typedef enum mountstat3 mountstat3;
+
+struct mountres3_ok {
+ fhandle3 fhandle;
+ struct {
+ u_int auth_flavors_len;
+ int *auth_flavors_val;
+ } auth_flavors;
+};
+typedef struct mountres3_ok mountres3_ok;
+
+struct mountres3 {
+ mountstat3 fhs_status;
+ union {
+ mountres3_ok mountinfo;
+ } mountres3_u;
+};
+typedef struct mountres3 mountres3;
+
+typedef struct mountbody *mountlist;
+
+struct mountbody {
+ name ml_hostname;
+ dirpath ml_directory;
+ mountlist ml_next;
+};
+typedef struct mountbody mountbody;
+
+typedef struct groupnode *groups;
+
+struct groupnode {
+ name gr_name;
+ groups gr_next;
+};
+typedef struct groupnode groupnode;
+
+typedef struct exportnode *exports;
+
+struct exportnode {
+ dirpath ex_dir;
+ groups ex_groups;
+ exports ex_next;
+};
+typedef struct exportnode exportnode;
+
+#define NFS_PROGRAM 100003
+#define NFS_V3 3
+
+#define NFS3_NULL 0
+#define NFS3_GETATTR 1
+#define NFS3_SETATTR 2
+#define NFS3_LOOKUP 3
+#define NFS3_ACCESS 4
+#define NFS3_READLINK 5
+#define NFS3_READ 6
+#define NFS3_WRITE 7
+#define NFS3_CREATE 8
+#define NFS3_MKDIR 9
+#define NFS3_SYMLINK 10
+#define NFS3_MKNOD 11
+#define NFS3_REMOVE 12
+#define NFS3_RMDIR 13
+#define NFS3_RENAME 14
+#define NFS3_LINK 15
+#define NFS3_READDIR 16
+#define NFS3_READDIRP 17
+#define NFS3_FSSTAT 18
+#define NFS3_FSINFO 19
+#define NFS3_PATHCONF 20
+#define NFS3_COMMIT 21
+#define NFS3_PROC_COUNT 22
+
+#define MOUNT_PROGRAM 100005
+#define MOUNT_V3 3
+#define MOUNT_V1 1
+
+#define MOUNT3_NULL 0
+#define MOUNT3_MNT 1
+#define MOUNT3_DUMP 2
+#define MOUNT3_UMNT 3
+#define MOUNT3_UMNTALL 4
+#define MOUNT3_EXPORT 5
+#define MOUNT3_PROC_COUNT 6
+
+#define MOUNT1_NULL 0
+#define MOUNT1_MNT 1
+#define MOUNT1_DUMP 2
+#define MOUNT1_UMNT 3
+#define MOUNT1_UMNTALL 4
+#define MOUNT1_EXPORT 5
+#define MOUNT1_PROC_COUNT 6
+/* the xdr functions */
+
+extern bool_t xdr_uint64 (XDR *, uint64*);
+extern bool_t xdr_int64 (XDR *, int64*);
+extern bool_t xdr_uint32 (XDR *, uint32*);
+extern bool_t xdr_int32 (XDR *, int32*);
+extern bool_t xdr_filename3 (XDR *, filename3*);
+extern bool_t xdr_nfspath3 (XDR *, nfspath3*);
+extern bool_t xdr_fileid3 (XDR *, fileid3*);
+extern bool_t xdr_cookie3 (XDR *, cookie3*);
+extern bool_t xdr_cookieverf3 (XDR *, cookieverf3);
+extern bool_t xdr_createverf3 (XDR *, createverf3);
+extern bool_t xdr_writeverf3 (XDR *, writeverf3);
+extern bool_t xdr_uid3 (XDR *, uid3*);
+extern bool_t xdr_gid3 (XDR *, gid3*);
+extern bool_t xdr_size3 (XDR *, size3*);
+extern bool_t xdr_offset3 (XDR *, offset3*);
+extern bool_t xdr_mode3 (XDR *, mode3*);
+extern bool_t xdr_count3 (XDR *, count3*);
+extern bool_t xdr_nfsstat3 (XDR *, nfsstat3*);
+extern bool_t xdr_ftype3 (XDR *, ftype3*);
+extern bool_t xdr_specdata3 (XDR *, specdata3*);
+extern bool_t xdr_nfs_fh3 (XDR *, nfs_fh3*);
+extern bool_t xdr_nfstime3 (XDR *, nfstime3*);
+extern bool_t xdr_fattr3 (XDR *, fattr3*);
+extern bool_t xdr_post_op_attr (XDR *, post_op_attr*);
+extern bool_t xdr_wcc_attr (XDR *, wcc_attr*);
+extern bool_t xdr_pre_op_attr (XDR *, pre_op_attr*);
+extern bool_t xdr_wcc_data (XDR *, wcc_data*);
+extern bool_t xdr_post_op_fh3 (XDR *, post_op_fh3*);
+extern bool_t xdr_time_how (XDR *, time_how*);
+extern bool_t xdr_set_mode3 (XDR *, set_mode3*);
+extern bool_t xdr_set_uid3 (XDR *, set_uid3*);
+extern bool_t xdr_set_gid3 (XDR *, set_gid3*);
+extern bool_t xdr_set_size3 (XDR *, set_size3*);
+extern bool_t xdr_set_atime (XDR *, set_atime*);
+extern bool_t xdr_set_mtime (XDR *, set_mtime*);
+extern bool_t xdr_sattr3 (XDR *, sattr3*);
+extern bool_t xdr_diropargs3 (XDR *, diropargs3*);
+extern bool_t xdr_getattr3args (XDR *, getattr3args*);
+extern bool_t xdr_getattr3resok (XDR *, getattr3resok*);
+extern bool_t xdr_getattr3res (XDR *, getattr3res*);
+extern bool_t xdr_sattrguard3 (XDR *, sattrguard3*);
+extern bool_t xdr_setattr3args (XDR *, setattr3args*);
+extern bool_t xdr_setattr3resok (XDR *, setattr3resok*);
+extern bool_t xdr_setattr3resfail (XDR *, setattr3resfail*);
+extern bool_t xdr_setattr3res (XDR *, setattr3res*);
+extern bool_t xdr_lookup3args (XDR *, lookup3args*);
+extern bool_t xdr_lookup3resok (XDR *, lookup3resok*);
+extern bool_t xdr_lookup3resfail (XDR *, lookup3resfail*);
+extern bool_t xdr_lookup3res (XDR *, lookup3res*);
+extern bool_t xdr_access3args (XDR *, access3args*);
+extern bool_t xdr_access3resok (XDR *, access3resok*);
+extern bool_t xdr_access3resfail (XDR *, access3resfail*);
+extern bool_t xdr_access3res (XDR *, access3res*);
+extern bool_t xdr_readlink3args (XDR *, readlink3args*);
+extern bool_t xdr_readlink3resok (XDR *, readlink3resok*);
+extern bool_t xdr_readlink3resfail (XDR *, readlink3resfail*);
+extern bool_t xdr_readlink3res (XDR *, readlink3res*);
+extern bool_t xdr_read3args (XDR *, read3args*);
+extern bool_t xdr_read3resok (XDR *, read3resok*);
+extern bool_t xdr_read3resfail (XDR *, read3resfail*);
+extern bool_t xdr_read3res (XDR *, read3res*);
+extern bool_t xdr_read3res_nocopy (XDR *xdrs, read3res *objp);
+extern bool_t xdr_stable_how (XDR *, stable_how*);
+extern bool_t xdr_write3args (XDR *, write3args*);
+extern bool_t xdr_write3resok (XDR *, write3resok*);
+extern bool_t xdr_write3resfail (XDR *, write3resfail*);
+extern bool_t xdr_write3res (XDR *, write3res*);
+extern bool_t xdr_createmode3 (XDR *, createmode3*);
+extern bool_t xdr_createhow3 (XDR *, createhow3*);
+extern bool_t xdr_create3args (XDR *, create3args*);
+extern bool_t xdr_create3resok (XDR *, create3resok*);
+extern bool_t xdr_create3resfail (XDR *, create3resfail*);
+extern bool_t xdr_create3res (XDR *, create3res*);
+extern bool_t xdr_mkdir3args (XDR *, mkdir3args*);
+extern bool_t xdr_mkdir3resok (XDR *, mkdir3resok*);
+extern bool_t xdr_mkdir3resfail (XDR *, mkdir3resfail*);
+extern bool_t xdr_mkdir3res (XDR *, mkdir3res*);
+extern bool_t xdr_symlinkdata3 (XDR *, symlinkdata3*);
+extern bool_t xdr_symlink3args (XDR *, symlink3args*);
+extern bool_t xdr_symlink3resok (XDR *, symlink3resok*);
+extern bool_t xdr_symlink3resfail (XDR *, symlink3resfail*);
+extern bool_t xdr_symlink3res (XDR *, symlink3res*);
+extern bool_t xdr_devicedata3 (XDR *, devicedata3*);
+extern bool_t xdr_mknoddata3 (XDR *, mknoddata3*);
+extern bool_t xdr_mknod3args (XDR *, mknod3args*);
+extern bool_t xdr_mknod3resok (XDR *, mknod3resok*);
+extern bool_t xdr_mknod3resfail (XDR *, mknod3resfail*);
+extern bool_t xdr_mknod3res (XDR *, mknod3res*);
+extern bool_t xdr_remove3args (XDR *, remove3args*);
+extern bool_t xdr_remove3resok (XDR *, remove3resok*);
+extern bool_t xdr_remove3resfail (XDR *, remove3resfail*);
+extern bool_t xdr_remove3res (XDR *, remove3res*);
+extern bool_t xdr_rmdir3args (XDR *, rmdir3args*);
+extern bool_t xdr_rmdir3resok (XDR *, rmdir3resok*);
+extern bool_t xdr_rmdir3resfail (XDR *, rmdir3resfail*);
+extern bool_t xdr_rmdir3res (XDR *, rmdir3res*);
+extern bool_t xdr_rename3args (XDR *, rename3args*);
+extern bool_t xdr_rename3resok (XDR *, rename3resok*);
+extern bool_t xdr_rename3resfail (XDR *, rename3resfail*);
+extern bool_t xdr_rename3res (XDR *, rename3res*);
+extern bool_t xdr_link3args (XDR *, link3args*);
+extern bool_t xdr_link3resok (XDR *, link3resok*);
+extern bool_t xdr_link3resfail (XDR *, link3resfail*);
+extern bool_t xdr_link3res (XDR *, link3res*);
+extern bool_t xdr_readdir3args (XDR *, readdir3args*);
+extern bool_t xdr_entry3 (XDR *, entry3*);
+extern bool_t xdr_dirlist3 (XDR *, dirlist3*);
+extern bool_t xdr_readdir3resok (XDR *, readdir3resok*);
+extern bool_t xdr_readdir3resfail (XDR *, readdir3resfail*);
+extern bool_t xdr_readdir3res (XDR *, readdir3res*);
+extern bool_t xdr_readdirp3args (XDR *, readdirp3args*);
+extern bool_t xdr_entryp3 (XDR *, entryp3*);
+extern bool_t xdr_dirlistp3 (XDR *, dirlistp3*);
+extern bool_t xdr_readdirp3resok (XDR *, readdirp3resok*);
+extern bool_t xdr_readdirp3resfail (XDR *, readdirp3resfail*);
+extern bool_t xdr_readdirp3res (XDR *, readdirp3res*);
+extern bool_t xdr_fsstat3args (XDR *, fsstat3args*);
+extern bool_t xdr_fsstat3resok (XDR *, fsstat3resok*);
+extern bool_t xdr_fsstat3resfail (XDR *, fsstat3resfail*);
+extern bool_t xdr_fsstat3res (XDR *, fsstat3res*);
+extern bool_t xdr_fsinfo3args (XDR *, fsinfo3args*);
+extern bool_t xdr_fsinfo3resok (XDR *, fsinfo3resok*);
+extern bool_t xdr_fsinfo3resfail (XDR *, fsinfo3resfail*);
+extern bool_t xdr_fsinfo3res (XDR *, fsinfo3res*);
+extern bool_t xdr_pathconf3args (XDR *, pathconf3args*);
+extern bool_t xdr_pathconf3resok (XDR *, pathconf3resok*);
+extern bool_t xdr_pathconf3resfail (XDR *, pathconf3resfail*);
+extern bool_t xdr_pathconf3res (XDR *, pathconf3res*);
+extern bool_t xdr_commit3args (XDR *, commit3args*);
+extern bool_t xdr_commit3resok (XDR *, commit3resok*);
+extern bool_t xdr_commit3resfail (XDR *, commit3resfail*);
+extern bool_t xdr_commit3res (XDR *, commit3res*);
+extern bool_t xdr_fhandle3 (XDR *, fhandle3*);
+extern bool_t xdr_dirpath (XDR *, dirpath*);
+extern bool_t xdr_name (XDR *, name*);
+extern bool_t xdr_mountstat3 (XDR *, mountstat3*);
+extern bool_t xdr_mountres3_ok (XDR *, mountres3_ok*);
+extern bool_t xdr_mountres3 (XDR *, mountres3*);
+extern bool_t xdr_mountlist (XDR *, mountlist*);
+extern bool_t xdr_mountbody (XDR *, mountbody*);
+extern bool_t xdr_groups (XDR *, groups*);
+extern bool_t xdr_groupnode (XDR *, groupnode*);
+extern bool_t xdr_exports (XDR *, exports*);
+extern bool_t xdr_exportnode (XDR *, exportnode*);
+
+extern void xdr_free_exports_list (struct exportnode *first);
+extern void xdr_free_mountlist (mountlist ml);
+
+extern void xdr_free_write3args_nocopy (write3args *wa);
+#endif
diff --git a/run-tests.sh b/run-tests.sh
new file mode 100755
index 000000000..128404ed5
--- /dev/null
+++ b/run-tests.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+# Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+#
+
+function _init()
+{
+ regression_testsdir=$(dirname $0);
+
+ if [ ! -f ${regression_testsdir}/tests/include.rc ]; then
+ echo "Seems like GlusterFS quality tests are corrupted..aborting!!"
+ exit 1
+ fi
+}
+
+function main()
+{
+ if [ $# -lt 1 ]; then
+ echo "Running all the regression test cases"
+ prove -rf --timer ${regression_testsdir}/tests;
+ else
+ ## TODO
+ echo "Running single regression test.."
+ echo "WARNING: yet to be implemented.. exiting safely"
+ exit 0
+ #export DEBUG=1;
+ #echo "Automatically setting up DEBUG=1 for this test $1";
+ fi
+}
+
+_init "$@" && main "$@"
diff --git a/scheduler/Makefile.am b/scheduler/Makefile.am
deleted file mode 100644
index 618fa7dd8..000000000
--- a/scheduler/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = alu random nufa rr switch
-
-CLEANFILES =
diff --git a/scheduler/alu/src/Makefile.am b/scheduler/alu/src/Makefile.am
deleted file mode 100644
index eb7d0db07..000000000
--- a/scheduler/alu/src/Makefile.am
+++ /dev/null
@@ -1,14 +0,0 @@
-sched_LTLIBRARIES = alu.la
-scheddir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/scheduler
-
-alu_la_LDFLAGS = -module -avoidversion
-
-alu_la_SOURCES = alu.c
-alu_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-noinst_HEADERS = alu.h
-
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
-
-CLEANFILES =
diff --git a/scheduler/alu/src/alu.c b/scheduler/alu/src/alu.c
deleted file mode 100644
index 571d06ee7..000000000
--- a/scheduler/alu/src/alu.c
+++ /dev/null
@@ -1,993 +0,0 @@
-/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-
-/* ALU code needs a complete re-write. This is one of the most important
- * part of GlusterFS and so needs more and more reviews and testing
- */
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <sys/time.h>
-#include <stdint.h>
-#include "stack.h"
-#include "alu.h"
-
-#define ALU_DISK_USAGE_ENTRY_THRESHOLD_DEFAULT (1 * GF_UNIT_GB)
-#define ALU_DISK_USAGE_EXIT_THRESHOLD_DEFAULT (512 * GF_UNIT_MB)
-
-#define ALU_WRITE_USAGE_ENTRY_THRESHOLD_DEFAULT 25
-#define ALU_WRITE_USAGE_EXIT_THRESHOLD_DEFAULT 5
-
-#define ALU_READ_USAGE_ENTRY_THRESHOLD_DEFAULT 25
-#define ALU_READ_USAGE_EXIT_THRESHOLD_DEFAULT 5
-
-#define ALU_OPEN_FILES_USAGE_ENTRY_THRESHOLD_DEFAULT 1000
-#define ALU_OPEN_FILES_USAGE_EXIT_THRESHOLD_DEFAULT 100
-
-#define ALU_LIMITS_TOTAL_DISK_SIZE_DEFAULT 100
-
-#define ALU_REFRESH_INTERVAL_DEFAULT 5
-#define ALU_REFRESH_CREATE_COUNT_DEFAULT 5
-
-
-static int64_t
-get_stats_disk_usage (struct xlator_stats *this)
-{
- return this->disk_usage;
-}
-
-static int64_t
-get_stats_write_usage (struct xlator_stats *this)
-{
- return this->write_usage;
-}
-
-static int64_t
-get_stats_read_usage (struct xlator_stats *this)
-{
- return this->read_usage;
-}
-
-static int64_t
-get_stats_disk_speed (struct xlator_stats *this)
-{
- return this->disk_speed;
-}
-
-static int64_t
-get_stats_file_usage (struct xlator_stats *this)
-{
- /* Avoid warning "defined but not used" */
- (void) &get_stats_file_usage;
-
- return this->nr_files;
-}
-
-static int64_t
-get_stats_free_disk (struct xlator_stats *this)
-{
- if (this->total_disk_size > 0)
- return (this->free_disk * 100) / this->total_disk_size;
- return 0;
-}
-
-static int64_t
-get_max_diff_write_usage (struct xlator_stats *max, struct xlator_stats *min)
-{
- return (max->write_usage - min->write_usage);
-}
-
-static int64_t
-get_max_diff_read_usage (struct xlator_stats *max, struct xlator_stats *min)
-{
- return (max->read_usage - min->read_usage);
-}
-
-static int64_t
-get_max_diff_disk_usage (struct xlator_stats *max, struct xlator_stats *min)
-{
- return (max->disk_usage - min->disk_usage);
-}
-
-static int64_t
-get_max_diff_disk_speed (struct xlator_stats *max, struct xlator_stats *min)
-{
- return (max->disk_speed - min->disk_speed);
-}
-
-static int64_t
-get_max_diff_file_usage (struct xlator_stats *max, struct xlator_stats *min)
-{
- return (max->nr_files - min->nr_files);
-}
-
-
-int
-alu_parse_options (xlator_t *xl, struct alu_sched *alu_sched)
-{
- data_t *order = dict_get (xl->options, "scheduler.alu.order");
- if (!order) {
- gf_log (xl->name, GF_LOG_ERROR,
- "option 'scheduler.alu.order' not specified");
- return -1;
- }
- struct alu_threshold *_threshold_fn;
- struct alu_threshold *tmp_threshold;
- data_t *entry_fn = NULL;
- data_t *exit_fn = NULL;
- char *tmp_str = NULL;
- char *order_str = strtok_r (order->data, ":", &tmp_str);
- /* Get the scheduling priority order, specified by the user. */
- while (order_str) {
- gf_log ("alu", GF_LOG_DEBUG,
- "alu_init: order string: %s",
- order_str);
- if (strcmp (order_str, "disk-usage") == 0) {
- /* Disk usage */
- _threshold_fn =
- CALLOC (1, sizeof (struct alu_threshold));
- ERR_ABORT (_threshold_fn);
- _threshold_fn->diff_value = get_max_diff_disk_usage;
- _threshold_fn->sched_value = get_stats_disk_usage;
- entry_fn =
- dict_get (xl->options,
- "scheduler.alu.disk-usage.entry-threshold");
- if (entry_fn) {
- if (gf_string2bytesize (entry_fn->data,
- &alu_sched->entry_limit.disk_usage) != 0) {
- gf_log (xl->name, GF_LOG_ERROR,
- "invalid number format \"%s\" "
- "of \"option scheduler.alu."
- "disk-usage.entry-threshold\"",
- entry_fn->data);
- return -1;
- }
- } else {
- alu_sched->entry_limit.disk_usage = ALU_DISK_USAGE_ENTRY_THRESHOLD_DEFAULT;
- }
- _threshold_fn->entry_value = get_stats_disk_usage;
- exit_fn = dict_get (xl->options,
- "scheduler.alu.disk-usage.exit-threshold");
- if (exit_fn) {
- if (gf_string2bytesize (exit_fn->data, &alu_sched->exit_limit.disk_usage) != 0) {
- gf_log (xl->name, GF_LOG_ERROR,
- "invalid number format \"%s\" "
- "of \"option scheduler.alu."
- "disk-usage.exit-threshold\"",
- exit_fn->data);
- return -1;
- }
- } else {
- alu_sched->exit_limit.disk_usage = ALU_DISK_USAGE_EXIT_THRESHOLD_DEFAULT;
- }
- _threshold_fn->exit_value = get_stats_disk_usage;
- tmp_threshold = alu_sched->threshold_fn;
- if (!tmp_threshold) {
- alu_sched->threshold_fn = _threshold_fn;
- } else {
- while (tmp_threshold->next) {
- tmp_threshold = tmp_threshold->next;
- }
- tmp_threshold->next = _threshold_fn;
- }
- gf_log ("alu",
- GF_LOG_DEBUG, "alu_init: = %"PRId64",%"PRId64"",
- alu_sched->entry_limit.disk_usage,
- alu_sched->exit_limit.disk_usage);
-
- } else if (strcmp (order_str, "write-usage") == 0) {
- /* Handle "write-usage" */
-
- _threshold_fn = CALLOC (1, sizeof (struct alu_threshold));
- ERR_ABORT (_threshold_fn);
- _threshold_fn->diff_value = get_max_diff_write_usage;
- _threshold_fn->sched_value = get_stats_write_usage;
- entry_fn = dict_get (xl->options,
- "scheduler.alu.write-usage.entry-threshold");
- if (entry_fn) {
- if (gf_string2bytesize (entry_fn->data,
- &alu_sched->entry_limit.write_usage) != 0) {
- gf_log (xl->name, GF_LOG_ERROR,
- "invalid number format \"%s\" "
- "of option scheduler.alu."
- "write-usage.entry-threshold",
- entry_fn->data);
- return -1;
- }
- } else {
- alu_sched->entry_limit.write_usage = ALU_WRITE_USAGE_ENTRY_THRESHOLD_DEFAULT;
- }
- _threshold_fn->entry_value = get_stats_write_usage;
- exit_fn = dict_get (xl->options,
- "scheduler.alu.write-usage.exit-threshold");
- if (exit_fn) {
- if (gf_string2bytesize (exit_fn->data,
- &alu_sched->exit_limit.write_usage) != 0) {
- gf_log (xl->name, GF_LOG_ERROR,
- "invalid number format \"%s\""
- " of \"option scheduler.alu."
- "write-usage.exit-threshold\"",
- exit_fn->data);
- return -1;
- }
- } else {
- alu_sched->exit_limit.write_usage = ALU_WRITE_USAGE_EXIT_THRESHOLD_DEFAULT;
- }
- _threshold_fn->exit_value = get_stats_write_usage;
- tmp_threshold = alu_sched->threshold_fn;
- if (!tmp_threshold) {
- alu_sched->threshold_fn = _threshold_fn;
- } else {
- while (tmp_threshold->next) {
- tmp_threshold = tmp_threshold->next;
- }
- tmp_threshold->next = _threshold_fn;
- }
- gf_log (xl->name, GF_LOG_DEBUG,
- "alu_init: = %"PRId64",%"PRId64"",
- alu_sched->entry_limit.write_usage,
- alu_sched->exit_limit.write_usage);
-
- } else if (strcmp (order_str, "read-usage") == 0) {
- /* Read usage */
-
- _threshold_fn = CALLOC (1, sizeof (struct alu_threshold));
- ERR_ABORT (_threshold_fn);
- _threshold_fn->diff_value = get_max_diff_read_usage;
- _threshold_fn->sched_value = get_stats_read_usage;
- entry_fn = dict_get (xl->options,
- "scheduler.alu.read-usage.entry-threshold");
- if (entry_fn) {
- if (gf_string2bytesize (entry_fn->data,
- &alu_sched->entry_limit.read_usage) != 0) {
- gf_log (xl->name,
- GF_LOG_ERROR,
- "invalid number format \"%s\" "
- "of \"option scheduler.alu."
- "read-usage.entry-threshold\"",
- entry_fn->data);
- return -1;
- }
- } else {
- alu_sched->entry_limit.read_usage = ALU_READ_USAGE_ENTRY_THRESHOLD_DEFAULT;
- }
- _threshold_fn->entry_value = get_stats_read_usage;
- exit_fn = dict_get (xl->options,
- "scheduler.alu.read-usage.exit-threshold");
- if (exit_fn)
- {
- if (gf_string2bytesize (exit_fn->data,
- &alu_sched->exit_limit.read_usage) != 0)
- {
- gf_log ("alu", GF_LOG_ERROR,
- "invalid number format \"%s\" "
- "of \"option scheduler.alu."
- "read-usage.exit-threshold\"",
- exit_fn->data);
- return -1;
- }
- }
- else
- {
- alu_sched->exit_limit.read_usage = ALU_READ_USAGE_EXIT_THRESHOLD_DEFAULT;
- }
- _threshold_fn->exit_value = get_stats_read_usage;
- tmp_threshold = alu_sched->threshold_fn;
- if (!tmp_threshold) {
- alu_sched->threshold_fn = _threshold_fn;
- }
- else {
- while (tmp_threshold->next) {
- tmp_threshold = tmp_threshold->next;
- }
- tmp_threshold->next = _threshold_fn;
- }
- gf_log ("alu", GF_LOG_DEBUG,
- "alu_init: = %"PRId64",%"PRId64"",
- alu_sched->entry_limit.read_usage,
- alu_sched->exit_limit.read_usage);
-
- } else if (strcmp (order_str, "open-files-usage") == 0) {
- /* Open files counter */
-
- _threshold_fn = CALLOC (1, sizeof (struct alu_threshold));
- ERR_ABORT (_threshold_fn);
- _threshold_fn->diff_value = get_max_diff_file_usage;
- _threshold_fn->sched_value = get_stats_file_usage;
- entry_fn = dict_get (xl->options,
- "scheduler.alu.open-files-usage.entry-threshold");
- if (entry_fn) {
- if (gf_string2uint64 (entry_fn->data,
- &alu_sched->entry_limit.nr_files) != 0)
- {
- gf_log ("alu", GF_LOG_ERROR,
- "invalid number format \"%s\" "
- "of \"option scheduler.alu."
- "open-files-usage.entry-"
- "threshold\"", entry_fn->data);
- return -1;
- }
- }
- else
- {
- alu_sched->entry_limit.nr_files = ALU_OPEN_FILES_USAGE_ENTRY_THRESHOLD_DEFAULT;
- }
- _threshold_fn->entry_value = get_stats_file_usage;
- exit_fn = dict_get (xl->options,
- "scheduler.alu.open-files-usage.exit-threshold");
- if (exit_fn)
- {
- if (gf_string2uint64 (exit_fn->data,
- &alu_sched->exit_limit.nr_files) != 0)
- {
- gf_log ("alu", GF_LOG_ERROR,
- "invalid number format \"%s\" "
- "of \"option scheduler.alu."
- "open-files-usage.exit-"
- "threshold\"", exit_fn->data);
- return -1;
- }
- }
- else
- {
- alu_sched->exit_limit.nr_files = ALU_OPEN_FILES_USAGE_EXIT_THRESHOLD_DEFAULT;
- }
- _threshold_fn->exit_value = get_stats_file_usage;
- tmp_threshold = alu_sched->threshold_fn;
- if (!tmp_threshold) {
- alu_sched->threshold_fn = _threshold_fn;
- }
- else {
- while (tmp_threshold->next) {
- tmp_threshold = tmp_threshold->next;
- }
- tmp_threshold->next = _threshold_fn;
- }
- gf_log ("alu", GF_LOG_DEBUG,
- "alu.c->alu_init: = %"PRIu64",%"PRIu64"",
- alu_sched->entry_limit.nr_files,
- alu_sched->exit_limit.nr_files);
-
- } else if (strcmp (order_str, "disk-speed-usage") == 0) {
- /* Disk speed */
-
- _threshold_fn = CALLOC (1, sizeof (struct alu_threshold));
- ERR_ABORT (_threshold_fn);
- _threshold_fn->diff_value = get_max_diff_disk_speed;
- _threshold_fn->sched_value = get_stats_disk_speed;
- entry_fn = dict_get (xl->options,
- "scheduler.alu.disk-speed-usage.entry-threshold");
- if (entry_fn) {
- gf_log ("alu", GF_LOG_DEBUG,
- "entry-threshold is given, "
- "value is constant");
- }
- _threshold_fn->entry_value = NULL;
- exit_fn = dict_get (xl->options,
- "scheduler.alu.disk-speed-usage.exit-threshold");
- if (exit_fn) {
- gf_log ("alu", GF_LOG_DEBUG,
- "exit-threshold is given, "
- "value is constant");
- }
- _threshold_fn->exit_value = NULL;
- tmp_threshold = alu_sched->threshold_fn;
- if (!tmp_threshold) {
- alu_sched->threshold_fn = _threshold_fn;
- }
- else {
- while (tmp_threshold->next) {
- tmp_threshold = tmp_threshold->next;
- }
- tmp_threshold->next = _threshold_fn;
- }
-
- } else {
- gf_log ("alu", GF_LOG_DEBUG,
- "%s, unknown option provided to scheduler",
- order_str);
- }
- order_str = strtok_r (NULL, ":", &tmp_str);
- }
-
- return 0;
-}
-
-static int32_t
-alu_init (xlator_t *xl)
-{
- struct alu_sched *alu_sched = NULL;
- struct alu_limits *_limit_fn = NULL;
- struct alu_limits *tmp_limits = NULL;
- uint32_t min_free_disk = 0;
- data_t *limits = NULL;
-
- alu_sched = CALLOC (1, sizeof (struct alu_sched));
- ERR_ABORT (alu_sched);
-
- {
- alu_parse_options (xl, alu_sched);
- }
-
- /* Get the limits */
-
- limits = dict_get (xl->options,
- "scheduler.limits.min-free-disk");
- if (limits) {
- _limit_fn = CALLOC (1, sizeof (struct alu_limits));
- ERR_ABORT (_limit_fn);
- _limit_fn->min_value = get_stats_free_disk;
- _limit_fn->cur_value = get_stats_free_disk;
- tmp_limits = alu_sched->limits_fn ;
- _limit_fn->next = tmp_limits;
- alu_sched->limits_fn = _limit_fn;
-
- if (gf_string2percent (limits->data,
- &min_free_disk) != 0) {
- gf_log ("alu", GF_LOG_ERROR,
- "invalid number format \"%s\" "
- "of \"option scheduler.limits."
- "min-free-disk\"", limits->data);
- return -1;
- }
- alu_sched->spec_limit.free_disk = min_free_disk;
-
- if (alu_sched->spec_limit.free_disk >= 100) {
- gf_log ("alu", GF_LOG_ERROR,
- "check the \"option scheduler."
- "limits.min-free-disk\", it should "
- "be percentage value");
- return -1;
- }
- alu_sched->spec_limit.total_disk_size = ALU_LIMITS_TOTAL_DISK_SIZE_DEFAULT; /* Its in % */
- gf_log ("alu", GF_LOG_DEBUG,
- "alu.limit.min-disk-free = %"PRId64"",
- _limit_fn->cur_value (&(alu_sched->spec_limit)));
- }
-
- limits = dict_get (xl->options,
- "scheduler.limits.max-open-files");
- if (limits) {
- // Update alu_sched->priority properly
- _limit_fn = CALLOC (1, sizeof (struct alu_limits));
- ERR_ABORT (_limit_fn);
- _limit_fn->max_value = get_stats_file_usage;
- _limit_fn->cur_value = get_stats_file_usage;
- tmp_limits = alu_sched->limits_fn ;
- _limit_fn->next = tmp_limits;
- alu_sched->limits_fn = _limit_fn;
- if (gf_string2uint64_base10 (limits->data,
- &alu_sched->spec_limit.nr_files) != 0)
- {
- gf_log ("alu", GF_LOG_ERROR,
- "invalid number format '%s' of option "
- "scheduler.limits.max-open-files",
- limits->data);
- return -1;
- }
-
- gf_log ("alu", GF_LOG_DEBUG,
- "alu_init: limit.max-open-files = %"PRId64"",
- _limit_fn->cur_value (&(alu_sched->spec_limit)));
- }
-
-
- /* Stats refresh options */
- limits = dict_get (xl->options,
- "scheduler.refresh-interval");
- if (limits) {
- if (gf_string2time (limits->data,
- &alu_sched->refresh_interval) != 0) {
- gf_log ("alu", GF_LOG_ERROR,
- "invalid number format \"%s\" of "
- "option scheduler.refresh-interval",
- limits->data);
- return -1;
- }
- } else {
- alu_sched->refresh_interval = ALU_REFRESH_INTERVAL_DEFAULT;
- }
- gettimeofday (&(alu_sched->last_stat_fetch), NULL);
-
-
- limits = dict_get (xl->options,
- "scheduler.alu.stat-refresh.num-file-create");
- if (limits) {
- if (gf_string2uint32 (limits->data,
- &alu_sched->refresh_create_count) != 0)
- {
- gf_log ("alu", GF_LOG_ERROR,
- "invalid number format \"%s\" of \"option "
- "alu.stat-refresh.num-file-create\"",
- limits->data);
- return -1;
- }
- } else {
- alu_sched->refresh_create_count = ALU_REFRESH_CREATE_COUNT_DEFAULT;
- }
-
- {
- /* Build an array of child_nodes */
- struct alu_sched_struct *sched_array = NULL;
- xlator_list_t *trav_xl = xl->children;
- data_t *data = NULL;
- int32_t index = 0;
-
- while (trav_xl) {
- index++;
- trav_xl = trav_xl->next;
- }
- alu_sched->child_count = index;
- sched_array = CALLOC (index, sizeof (struct alu_sched_struct));
- ERR_ABORT (sched_array);
- trav_xl = xl->children;
- index = 0;
- while (trav_xl) {
- sched_array[index].xl = trav_xl->xlator;
- sched_array[index].eligible = 1;
- index++;
- trav_xl = trav_xl->next;
- }
- alu_sched->array = sched_array;
-
- data = dict_get (xl->options,
- "scheduler.read-only-subvolumes");
- if (data) {
- char *child = NULL;
- char *tmp = NULL;
- char *childs_data = strdup (data->data);
-
- child = strtok_r (childs_data, ",", &tmp);
- while (child) {
- for (index = 1; index < alu_sched->child_count; index++) {
- if (strcmp (alu_sched->array[index -1].xl->name, child) == 0) {
- memcpy (&(alu_sched->array[index -1]),
- &(alu_sched->array[alu_sched->child_count -1]),
- sizeof (struct alu_sched_struct));
- alu_sched->child_count--;
- break;
- }
- }
- child = strtok_r (NULL, ",", &tmp);
- }
- }
- }
-
- *((long *)xl->private) = (long)alu_sched;
-
- /* Initialize all the alu_sched structure's elements */
- {
- alu_sched->sched_nodes_pending = 0;
-
- alu_sched->min_limit.free_disk = 0x00FFFFFF;
- alu_sched->min_limit.disk_usage = 0xFFFFFFFF;
- alu_sched->min_limit.total_disk_size = 0xFFFFFFFF;
- alu_sched->min_limit.disk_speed = 0xFFFFFFFF;
- alu_sched->min_limit.write_usage = 0xFFFFFFFF;
- alu_sched->min_limit.read_usage = 0xFFFFFFFF;
- alu_sched->min_limit.nr_files = 0xFFFFFFFF;
- alu_sched->min_limit.nr_clients = 0xFFFFFFFF;
- }
-
- pthread_mutex_init (&alu_sched->alu_mutex, NULL);
- return 0;
-}
-
-static void
-alu_fini (xlator_t *xl)
-{
- if (!xl)
- return;
- struct alu_sched *alu_sched = (struct alu_sched *)*((long *)xl->private);
- struct alu_limits *limit = alu_sched->limits_fn;
- struct alu_threshold *threshold = alu_sched->threshold_fn;
- void *tmp = NULL;
- pthread_mutex_destroy (&alu_sched->alu_mutex);
- free (alu_sched->array);
- while (limit) {
- tmp = limit;
- limit = limit->next;
- free (tmp);
- }
- while (threshold) {
- tmp = threshold;
- threshold = threshold->next;
- free (tmp);
- }
- free (alu_sched);
-}
-
-static int32_t
-update_stat_array_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *xl,
- int32_t op_ret,
- int32_t op_errno,
- struct xlator_stats *trav_stats)
-{
- struct alu_sched *alu_sched = (struct alu_sched *)*((long *)xl->private);
- struct alu_limits *limits_fn = alu_sched->limits_fn;
- int32_t idx = 0;
-
- pthread_mutex_lock (&alu_sched->alu_mutex);
- for (idx = 0; idx < alu_sched->child_count; idx++) {
- if (alu_sched->array[idx].xl == (xlator_t *)cookie)
- break;
- }
- pthread_mutex_unlock (&alu_sched->alu_mutex);
-
- if (op_ret == -1) {
- alu_sched->array[idx].eligible = 0;
- } else {
- memcpy (&(alu_sched->array[idx].stats), trav_stats, sizeof (struct xlator_stats));
-
- /* Get stats from all the child node */
- /* Here check the limits specified by the user to
- consider the nodes to be used by scheduler */
- alu_sched->array[idx].eligible = 1;
- limits_fn = alu_sched->limits_fn;
- while (limits_fn){
- if (limits_fn->max_value &&
- (limits_fn->cur_value (trav_stats) >
- limits_fn->max_value (&(alu_sched->spec_limit)))) {
- alu_sched->array[idx].eligible = 0;
- }
- if (limits_fn->min_value &&
- (limits_fn->cur_value (trav_stats) <
- limits_fn->min_value (&(alu_sched->spec_limit)))) {
- alu_sched->array[idx].eligible = 0;
- }
- limits_fn = limits_fn->next;
- }
-
- /* Select minimum and maximum disk_usage */
- if (trav_stats->disk_usage > alu_sched->max_limit.disk_usage) {
- alu_sched->max_limit.disk_usage = trav_stats->disk_usage;
- }
- if (trav_stats->disk_usage < alu_sched->min_limit.disk_usage) {
- alu_sched->min_limit.disk_usage = trav_stats->disk_usage;
- }
-
- /* Select minimum and maximum disk_speed */
- if (trav_stats->disk_speed > alu_sched->max_limit.disk_speed) {
- alu_sched->max_limit.disk_speed = trav_stats->disk_speed;
- }
- if (trav_stats->disk_speed < alu_sched->min_limit.disk_speed) {
- alu_sched->min_limit.disk_speed = trav_stats->disk_speed;
- }
-
- /* Select minimum and maximum number of open files */
- if (trav_stats->nr_files > alu_sched->max_limit.nr_files) {
- alu_sched->max_limit.nr_files = trav_stats->nr_files;
- }
- if (trav_stats->nr_files < alu_sched->min_limit.nr_files) {
- alu_sched->min_limit.nr_files = trav_stats->nr_files;
- }
-
- /* Select minimum and maximum write-usage */
- if (trav_stats->write_usage > alu_sched->max_limit.write_usage) {
- alu_sched->max_limit.write_usage = trav_stats->write_usage;
- }
- if (trav_stats->write_usage < alu_sched->min_limit.write_usage) {
- alu_sched->min_limit.write_usage = trav_stats->write_usage;
- }
-
- /* Select minimum and maximum read-usage */
- if (trav_stats->read_usage > alu_sched->max_limit.read_usage) {
- alu_sched->max_limit.read_usage = trav_stats->read_usage;
- }
- if (trav_stats->read_usage < alu_sched->min_limit.read_usage) {
- alu_sched->min_limit.read_usage = trav_stats->read_usage;
- }
-
- /* Select minimum and maximum free-disk */
- if (trav_stats->free_disk > alu_sched->max_limit.free_disk) {
- alu_sched->max_limit.free_disk = trav_stats->free_disk;
- }
- if (trav_stats->free_disk < alu_sched->min_limit.free_disk) {
- alu_sched->min_limit.free_disk = trav_stats->free_disk;
- }
- }
-
- STACK_DESTROY (frame->root);
-
- return 0;
-}
-
-static void
-update_stat_array (xlator_t *xl)
-{
- /* This function schedules the file in one of the child nodes */
- struct alu_sched *alu_sched = (struct alu_sched *)*((long *)xl->private);
- int32_t idx = 0;
- call_frame_t *frame = NULL;
- call_pool_t *pool = xl->ctx->pool;
-
- for (idx = 0 ; idx < alu_sched->child_count; idx++) {
- frame = create_frame (xl, pool);
-
- STACK_WIND_COOKIE (frame,
- update_stat_array_cbk,
- alu_sched->array[idx].xl, //cookie
- alu_sched->array[idx].xl,
- (alu_sched->array[idx].xl)->mops->stats,
- 0); //flag
- }
- return;
-}
-
-static void
-alu_update (xlator_t *xl)
-{
- struct timeval tv;
- struct alu_sched *alu_sched = (struct alu_sched *)*((long *)xl->private);
-
- gettimeofday (&tv, NULL);
- if (tv.tv_sec > (alu_sched->refresh_interval + alu_sched->last_stat_fetch.tv_sec)) {
- /* Update the stats from all the server */
- update_stat_array (xl);
- alu_sched->last_stat_fetch.tv_sec = tv.tv_sec;
- }
-}
-
-static xlator_t *
-alu_scheduler (xlator_t *xl, const void *path)
-{
- /* This function schedules the file in one of the child nodes */
- struct alu_sched *alu_sched = (struct alu_sched *)*((long *)xl->private);
- int32_t sched_index = 0;
- int32_t sched_index_orig = 0;
- int32_t idx = 0;
-
- alu_update (xl);
-
- /* Now check each threshold one by one if some nodes are classified */
- {
- struct alu_threshold *trav_threshold = alu_sched->threshold_fn;
- struct alu_threshold *tmp_threshold = alu_sched->sched_method;
- struct alu_sched_node *tmp_sched_node;
-
- /* This pointer 'trav_threshold' contains function pointers according to spec file
- give by user, */
- while (trav_threshold) {
- /* This check is needed for seeing if already there are nodes in this criteria
- to be scheduled */
- if (!alu_sched->sched_nodes_pending) {
- for (idx = 0; idx < alu_sched->child_count; idx++) {
- if (!alu_sched->array[idx].eligible) {
- continue;
- }
- if (trav_threshold->entry_value) {
- if (trav_threshold->diff_value (&(alu_sched->max_limit),
- &(alu_sched->array[idx].stats)) <
- trav_threshold->entry_value (&(alu_sched->entry_limit))) {
- continue;
- }
- }
- tmp_sched_node = CALLOC (1, sizeof (struct alu_sched_node));
- ERR_ABORT (tmp_sched_node);
- tmp_sched_node->index = idx;
- if (!alu_sched->sched_node) {
- alu_sched->sched_node = tmp_sched_node;
- } else {
- pthread_mutex_lock (&alu_sched->alu_mutex);
- tmp_sched_node->next = alu_sched->sched_node;
- alu_sched->sched_node = tmp_sched_node;
- pthread_mutex_unlock (&alu_sched->alu_mutex);
- }
- alu_sched->sched_nodes_pending++;
- }
- } /* end of if (sched_nodes_pending) */
-
- /* This loop is required to check the eligible nodes */
- struct alu_sched_node *trav_sched_node;
- while (alu_sched->sched_nodes_pending) {
- trav_sched_node = alu_sched->sched_node;
- sched_index = trav_sched_node->index;
- if (alu_sched->array[sched_index].eligible)
- break;
- alu_sched->sched_node = trav_sched_node->next;
- free (trav_sched_node);
- alu_sched->sched_nodes_pending--;
- }
- if (alu_sched->sched_nodes_pending) {
- /* There are some node in this criteria to be scheduled, no need
- * to sort and check other methods
- */
- if (tmp_threshold && tmp_threshold->exit_value) {
- /* verify the exit value && whether node is eligible or not */
- if (tmp_threshold->diff_value (&(alu_sched->max_limit),
- &(alu_sched->array[sched_index].stats)) >
- tmp_threshold->exit_value (&(alu_sched->exit_limit))) {
- /* Free the allocated info for the node :) */
- pthread_mutex_lock (&alu_sched->alu_mutex);
- alu_sched->sched_node = trav_sched_node->next;
- free (trav_sched_node);
- trav_sched_node = alu_sched->sched_node;
- alu_sched->sched_nodes_pending--;
- pthread_mutex_unlock (&alu_sched->alu_mutex);
- }
- } else {
- /* if there is no exit value, then exit after scheduling once */
- pthread_mutex_lock (&alu_sched->alu_mutex);
- alu_sched->sched_node = trav_sched_node->next;
- free (trav_sched_node);
- trav_sched_node = alu_sched->sched_node;
- alu_sched->sched_nodes_pending--;
- pthread_mutex_unlock (&alu_sched->alu_mutex);
- }
-
- alu_sched->sched_method = tmp_threshold; /* this is the method used for selecting */
-
- /* */
- if (trav_sched_node) {
- tmp_sched_node = trav_sched_node;
- while (trav_sched_node->next) {
- trav_sched_node = trav_sched_node->next;
- }
- if (tmp_sched_node->next) {
- pthread_mutex_lock (&alu_sched->alu_mutex);
- alu_sched->sched_node = tmp_sched_node->next;
- tmp_sched_node->next = NULL;
- trav_sched_node->next = tmp_sched_node;
- pthread_mutex_unlock (&alu_sched->alu_mutex);
- }
- }
- /* return the scheduled node */
- return alu_sched->array[sched_index].xl;
- } /* end of if (pending_nodes) */
-
- tmp_threshold = trav_threshold;
- trav_threshold = trav_threshold->next;
- }
- }
-
- /* This is used only when there is everything seems ok, or no eligible nodes */
- sched_index_orig = alu_sched->sched_index;
- alu_sched->sched_method = NULL;
- while (1) {
- //lock
- pthread_mutex_lock (&alu_sched->alu_mutex);
- sched_index = alu_sched->sched_index++;
- alu_sched->sched_index = alu_sched->sched_index % alu_sched->child_count;
- pthread_mutex_unlock (&alu_sched->alu_mutex);
- //unlock
- if (alu_sched->array[sched_index].eligible)
- break;
- if (sched_index_orig == (sched_index + 1) % alu_sched->child_count) {
- gf_log ("alu", GF_LOG_WARNING, "No node is eligible to schedule");
- //lock
- pthread_mutex_lock (&alu_sched->alu_mutex);
- alu_sched->sched_index++;
- alu_sched->sched_index = alu_sched->sched_index % alu_sched->child_count;
- pthread_mutex_unlock (&alu_sched->alu_mutex);
- //unlock
- break;
- }
- }
- return alu_sched->array[sched_index].xl;
-}
-
-/**
- * notify
- */
-void
-alu_notify (xlator_t *xl, int32_t event, void *data)
-{
- struct alu_sched *alu_sched = NULL;
- int32_t idx = 0;
-
- alu_sched = (struct alu_sched *)*((long *)xl->private);
- if (!alu_sched)
- return;
-
- for (idx = 0; idx < alu_sched->child_count; idx++) {
- if (alu_sched->array[idx].xl == (xlator_t *)data)
- break;
- }
-
- switch (event)
- {
- case GF_EVENT_CHILD_UP:
- {
- //alu_sched->array[idx].eligible = 1;
- }
- break;
- case GF_EVENT_CHILD_DOWN:
- {
- alu_sched->array[idx].eligible = 0;
- }
- break;
- default:
- {
- ;
- }
- break;
- }
-
-}
-
-struct sched_ops sched = {
- .init = alu_init,
- .fini = alu_fini,
- .update = alu_update,
- .schedule = alu_scheduler,
- .notify = alu_notify
-};
-
-struct volume_options options[] = {
- { .key = { "scheduler.alu.order", "alu.order" },
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = { "scheduler.alu.disk-usage.entry-threshold",
- "alu.disk-usage.entry-threshold" },
- .type = GF_OPTION_TYPE_SIZET
- },
- { .key = { "scheduler.alu.disk-usage.exit-threshold",
- "alu.disk-usage.exit-threshold" },
- .type = GF_OPTION_TYPE_SIZET
- },
- { .key = { "scheduler.alu.write-usage.entry-threshold",
- "alu.write-usage.entry-threshold" },
- .type = GF_OPTION_TYPE_SIZET
- },
- { .key = { "scheduler.alu.write-usage.exit-threshold",
- "alu.write-usage.exit-threshold" },
- .type = GF_OPTION_TYPE_SIZET
- },
- { .key = { "scheduler.alu.read-usage.entry-threshold",
- "alu.read-usage.entry-threshold" },
- .type = GF_OPTION_TYPE_SIZET
- },
- { .key = { "scheduler.alu.read-usage.exit-threshold",
- "alu.read-usage.exit-threshold" },
- .type = GF_OPTION_TYPE_SIZET
- },
- { .key = { "scheduler.alu.open-files-usage.entry-threshold",
- "alu.open-files-usage.entry-threshold" },
- .type = GF_OPTION_TYPE_INT
- },
- { .key = { "scheduler.alu.open-files-usage.exit-threshold",
- "alu.open-files-usage.exit-threshold" },
- .type = GF_OPTION_TYPE_INT
- },
- { .key = { "scheduler.read-only-subvolumes",
- "alu.read-only-subvolumes" },
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = { "scheduler.refresh-interval",
- "alu.refresh-interval",
- "alu.stat-refresh.interval" },
- .type = GF_OPTION_TYPE_TIME
- },
- { .key = { "scheduler.limits.min-free-disk",
- "alu.limits.min-free-disk" },
- .type = GF_OPTION_TYPE_PERCENT
- },
- { .key = { "scheduler.alu.stat-refresh.num-file-create"
- "alu.stat-refresh.num-file-create"},
- .type = GF_OPTION_TYPE_INT
- },
- { .key = {NULL}, }
-};
diff --git a/scheduler/alu/src/alu.h b/scheduler/alu/src/alu.h
deleted file mode 100644
index 1051cd2e0..000000000
--- a/scheduler/alu/src/alu.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _ALU_H
-#define _ALU_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "scheduler.h"
-
-struct alu_sched;
-
-struct alu_sched_struct {
- xlator_t *xl;
- struct xlator_stats stats;
- unsigned char eligible;
-};
-
-// Write better name for these functions
-struct alu_limits {
- struct alu_limits *next;
- int64_t (*max_value) (struct xlator_stats *); /* Max limit, specified by the user */
- int64_t (*min_value) (struct xlator_stats *); /* Min limit, specified by the user */
- int64_t (*cur_value) (struct xlator_stats *); /* Current values of variables got from stats call */
-};
-
-struct alu_threshold {
- struct alu_threshold *next;
- int64_t (*diff_value) (struct xlator_stats *max, struct xlator_stats *min); /* Diff b/w max and min */
- int64_t (*entry_value) (struct xlator_stats *); /* Limit specified user */
- int64_t (*exit_value) (struct xlator_stats *); /* Exit point for the limit */
- int64_t (*sched_value) (struct xlator_stats *); /* This will return the index of the child area */
-};
-
-struct alu_sched_node {
- struct alu_sched_node *next;
- int32_t index;
-};
-
-struct alu_sched {
- struct alu_limits *limits_fn;
- struct alu_threshold *threshold_fn;
- struct alu_sched_struct *array;
- struct alu_sched_node *sched_node;
- struct alu_threshold *sched_method;
- struct xlator_stats max_limit;
- struct xlator_stats min_limit;
- struct xlator_stats entry_limit;
- struct xlator_stats exit_limit;
- struct xlator_stats spec_limit; /* User given limit */
-
- pthread_mutex_t alu_mutex;
- struct timeval last_stat_fetch;
- uint32_t refresh_interval; /* in seconds */
- uint32_t refresh_create_count; /* num-file-create */
-
- int32_t sched_nodes_pending;
-
- int32_t sched_index; /* used for round robin scheduling in case of many nodes getting the criteria match. */
- int32_t child_count;
-
-};
-
-struct _alu_local_t {
- int32_t call_count;
-};
-
-typedef struct _alu_local_t alu_local_t;
-
-#endif /* _ALU_H */
diff --git a/scheduler/nufa/src/Makefile.am b/scheduler/nufa/src/Makefile.am
deleted file mode 100644
index 6eb3d39f1..000000000
--- a/scheduler/nufa/src/Makefile.am
+++ /dev/null
@@ -1,12 +0,0 @@
-sched_LTLIBRARIES = nufa.la
-scheddir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/scheduler
-
-nufa_la_LDFLAGS = -module -avoidversion
-
-nufa_la_SOURCES = nufa.c
-nufa_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
-
-CLEANFILES =
diff --git a/scheduler/nufa/src/nufa.c b/scheduler/nufa/src/nufa.c
deleted file mode 100644
index 790704a8f..000000000
--- a/scheduler/nufa/src/nufa.c
+++ /dev/null
@@ -1,405 +0,0 @@
-/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <sys/time.h>
-
-#include "scheduler.h"
-#include "common-utils.h"
-
-struct nufa_sched_struct {
- xlator_t *xl;
- struct timeval last_stat_fetch;
- int64_t free_disk;
- int32_t refresh_interval;
- unsigned char eligible;
-};
-
-struct nufa_struct {
- struct nufa_sched_struct *array;
- struct timeval last_stat_fetch;
-
- int32_t *local_array; /* Used to keep the index of the local xlators */
- int32_t local_xl_index; /* index in the above array */
- int32_t local_xl_count; /* Count of the local subvolumes */
-
- uint32_t refresh_interval;
- uint32_t min_free_disk;
-
- gf_lock_t nufa_lock;
- int32_t child_count;
- int32_t sched_index;
-};
-
-#define NUFA_LIMITS_MIN_FREE_DISK_DEFAULT 15
-#define NUFA_REFRESH_INTERVAL_DEFAULT 30
-
-static int32_t
-nufa_init (xlator_t *xl)
-{
- int32_t index = 0;
- data_t *local_name = NULL;
- data_t *data = NULL;
- xlator_list_t *trav_xl = xl->children;
- struct nufa_struct *nufa_buf = NULL;
-
- nufa_buf = CALLOC (1, sizeof (struct nufa_struct));
- ERR_ABORT (nufa_buf);
-
- data = dict_get (xl->options, "scheduler.limits.min-free-disk");
- if (data) {
- if (gf_string2percent (data->data,
- &nufa_buf->min_free_disk) != 0) {
- gf_log ("nufa", GF_LOG_ERROR,
- "invalid number format \"%s\" of "
- "\"option scheduler.limits.min-free-disk\"",
- data->data);
- return -1;
- }
- if (nufa_buf->min_free_disk >= 100) {
- gf_log ("nufa", GF_LOG_ERROR,
- "check \"option scheduler.limits.min-free-disk"
- "\", it should be percentage value");
- return -1;
- }
- } else {
- gf_log ("nufa", GF_LOG_WARNING,
- "No option for limit min-free-disk given, "
- "defaulting it to 15%%");
- nufa_buf->min_free_disk = NUFA_LIMITS_MIN_FREE_DISK_DEFAULT;
- }
- data = dict_get (xl->options, "scheduler.refresh-interval");
- if (data != NULL) {
- if (gf_string2time (data->data,
- &nufa_buf->refresh_interval) != 0) {
- gf_log ("nufa", GF_LOG_ERROR,
- "invalid number format \"%s\" of "
- "\"option scheduler.refresh-interval\"",
- data->data);
- return -1;
- }
- } else {
- gf_log ("nufa", GF_LOG_WARNING,
- "No option for scheduler.refresh-interval given, "
- "defaulting it to 30");
- nufa_buf->refresh_interval = NUFA_REFRESH_INTERVAL_DEFAULT;
- }
-
- /* Get the array built */
- while (trav_xl) {
- index++;
- trav_xl = trav_xl->next;
- }
- nufa_buf->child_count = index;
- nufa_buf->sched_index = 0;
- nufa_buf->array = CALLOC (index, sizeof (struct nufa_sched_struct));
- ERR_ABORT (nufa_buf->array);
- nufa_buf->local_array = CALLOC (index, sizeof (int32_t));
- ERR_ABORT (nufa_buf->array);
- trav_xl = xl->children;
-
- local_name = dict_get (xl->options, "scheduler.local-volume-name");
- if (!local_name) {
- /* Error */
- gf_log ("nufa", GF_LOG_ERROR,
- "No 'local-volume-name' option given in volume file");
- FREE (nufa_buf->array);
- FREE (nufa_buf->local_array);
- FREE (nufa_buf);
- return -1;
- }
-
- /* Get the array properly */
- index = 0;
- trav_xl = xl->children;
- while (trav_xl) {
- nufa_buf->array[index].xl = trav_xl->xlator;
- nufa_buf->array[index].eligible = 1;
- nufa_buf->array[index].free_disk = nufa_buf->min_free_disk;
- nufa_buf->array[index].refresh_interval =
- nufa_buf->refresh_interval;
-
- trav_xl = trav_xl->next;
- index++;
- }
-
- {
- int32_t array_index = 0;
- char *child = NULL;
- char *tmp = NULL;
- char *childs_data = strdup (local_name->data);
-
- child = strtok_r (childs_data, ",", &tmp);
- while (child) {
- /* Check if the local_volume specified is proper
- subvolume of unify */
- trav_xl = xl->children;
- index=0;
- while (trav_xl) {
- if (strcmp (child, trav_xl->xlator->name) == 0)
- break;
- trav_xl = trav_xl->next;
- index++;
- }
-
- if (!trav_xl) {
- /* entry for 'local-volume-name' is wrong,
- not present in subvolumes */
- gf_log ("nufa", GF_LOG_ERROR,
- "option 'scheduler.local-volume-name' "
- "%s is wrong", child);
- FREE (nufa_buf->array);
- FREE (nufa_buf->local_array);
- FREE (nufa_buf);
- return -1;
- } else {
- nufa_buf->local_array[array_index++] = index;
- nufa_buf->local_xl_count++;
- }
- child = strtok_r (NULL, ",", &tmp);
- }
- free (childs_data);
- }
-
- LOCK_INIT (&nufa_buf->nufa_lock);
- *((long *)xl->private) = (long)nufa_buf; // put it at the proper place
- return 0;
-}
-
-static void
-nufa_fini (xlator_t *xl)
-{
- struct nufa_struct *nufa_buf =
- (struct nufa_struct *)*((long *)xl->private);
-
- LOCK_DESTROY (&nufa_buf->nufa_lock);
- FREE (nufa_buf->local_array);
- FREE (nufa_buf->array);
- FREE (nufa_buf);
-}
-
-static int32_t
-update_stat_array_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *xl,
- int32_t op_ret,
- int32_t op_errno,
- struct xlator_stats *trav_stats)
-{
- struct nufa_struct *nufa_struct = NULL;
- int32_t idx = 0;
- int32_t percent = 0;
-
- nufa_struct = (struct nufa_struct *)*((long *)xl->private);
- LOCK (&nufa_struct->nufa_lock);
- for (idx = 0; idx < nufa_struct->child_count; idx++) {
- if (nufa_struct->array[idx].xl->name == (char *)cookie)
- break;
- }
- UNLOCK (&nufa_struct->nufa_lock);
-
- if (op_ret == 0) {
- percent = ((trav_stats->free_disk * 100) /
- trav_stats->total_disk_size);
- if (nufa_struct->array[idx].free_disk > percent) {
- if (nufa_struct->array[idx].eligible)
- gf_log ("nufa", GF_LOG_CRITICAL,
- "node \"%s\" is _almost_ (%d %%) full",
- nufa_struct->array[idx].xl->name,
- 100 - percent);
- nufa_struct->array[idx].eligible = 0;
- } else {
- nufa_struct->array[idx].eligible = 1;
- }
- } else {
- nufa_struct->array[idx].eligible = 0;
- }
-
- STACK_DESTROY (frame->root);
- return 0;
-}
-
-static void
-update_stat_array (xlator_t *xl)
-{
- /* This function schedules the file in one of the child nodes */
- int32_t idx;
- struct nufa_struct *nufa_buf =
- (struct nufa_struct *)*((long *)xl->private);
- call_frame_t *frame = NULL;
- call_pool_t *pool = xl->ctx->pool;
-
- for (idx = 0; idx < nufa_buf->child_count; idx++) {
- frame = create_frame (xl, pool);
-
- STACK_WIND_COOKIE (frame,
- update_stat_array_cbk,
- nufa_buf->array[idx].xl->name,
- nufa_buf->array[idx].xl,
- (nufa_buf->array[idx].xl)->mops->stats,
- 0); //flag
- }
-
- return;
-}
-
-static void
-nufa_update (xlator_t *xl)
-{
- struct nufa_struct *nufa_buf =
- (struct nufa_struct *)*((long *)xl->private);
- struct timeval tv;
- gettimeofday (&tv, NULL);
- if (tv.tv_sec > (nufa_buf->refresh_interval
- + nufa_buf->last_stat_fetch.tv_sec)) {
- /* Update the stats from all the server */
- update_stat_array (xl);
- nufa_buf->last_stat_fetch.tv_sec = tv.tv_sec;
- }
-}
-
-static xlator_t *
-nufa_schedule (xlator_t *xl, const void *path)
-{
- struct nufa_struct *nufa_buf =
- (struct nufa_struct *)*((long *)xl->private);
- int32_t nufa_orig = nufa_buf->local_xl_index;
- int32_t rr;
-
- nufa_update (xl);
-
- while (1) {
- LOCK (&nufa_buf->nufa_lock);
- rr = nufa_buf->local_xl_index++;
- nufa_buf->local_xl_index %= nufa_buf->local_xl_count;
- UNLOCK (&nufa_buf->nufa_lock);
-
- /* if 'eligible' or there are _no_ eligible nodes */
- if (nufa_buf->array[nufa_buf->local_array[rr]].eligible) {
- /* Return the local node */
- return nufa_buf->array[nufa_buf->local_array[rr]].xl;
- }
- if ((rr + 1) % nufa_buf->local_xl_count == nufa_orig) {
- gf_log ("nufa", GF_LOG_CRITICAL,
- "No free space available on any local "
- "volumes, using RR scheduler");
- LOCK (&nufa_buf->nufa_lock);
- nufa_buf->local_xl_index++;
- nufa_buf->local_xl_index %= nufa_buf->local_xl_count;
- UNLOCK (&nufa_buf->nufa_lock);
- break;
- }
- }
-
- nufa_orig = nufa_buf->sched_index;
- while (1) {
- LOCK (&nufa_buf->nufa_lock);
- rr = nufa_buf->sched_index++;
- nufa_buf->sched_index = (nufa_buf->sched_index %
- nufa_buf->child_count);
- UNLOCK (&nufa_buf->nufa_lock);
-
- /* if 'eligible' or there are _no_ eligible nodes */
- if (nufa_buf->array[rr].eligible) {
- break;
- }
- if ((rr + 1) % nufa_buf->child_count == nufa_orig) {
- gf_log ("nufa", GF_LOG_CRITICAL,
- "No free space available on any server, "
- "using RR scheduler.");
- LOCK (&nufa_buf->nufa_lock);
- nufa_buf->sched_index++;
- nufa_buf->sched_index = (nufa_buf->sched_index %
- nufa_buf->child_count);
- UNLOCK (&nufa_buf->nufa_lock);
- break;
- }
- }
- return nufa_buf->array[rr].xl;
-}
-
-
-/**
- * notify
- */
-void
-nufa_notify (xlator_t *xl, int32_t event, void *data)
-{
- struct nufa_struct *nufa_buf =
- (struct nufa_struct *)*((long *)xl->private);
- int32_t idx = 0;
-
- if (!nufa_buf)
- return;
-
- for (idx = 0; idx < nufa_buf->child_count; idx++) {
- if (strcmp (nufa_buf->array[idx].xl->name,
- ((xlator_t *)data)->name) == 0)
- break;
- }
-
- switch (event)
- {
- case GF_EVENT_CHILD_UP:
- {
- //nufa_buf->array[idx].eligible = 1;
- }
- break;
- case GF_EVENT_CHILD_DOWN:
- {
- nufa_buf->array[idx].eligible = 0;
- }
- break;
- default:
- {
- ;
- }
- break;
- }
-
-}
-
-struct sched_ops sched = {
- .init = nufa_init,
- .fini = nufa_fini,
- .update = nufa_update,
- .schedule = nufa_schedule,
- .notify = nufa_notify
-};
-
-struct volume_options options[] = {
- { .key = { "scheduler.refresh-interval",
- "nufa.refresh-interval" },
- .type = GF_OPTION_TYPE_TIME
- },
- { .key = { "scheduler.limits.min-free-disk",
- "nufa.limits.min-free-disk" },
- .type = GF_OPTION_TYPE_PERCENT
- },
- { .key = { "scheduler.local-volume-name",
- "nufa.local-volume-name" },
- .type = GF_OPTION_TYPE_XLATOR
- },
- { .key = {NULL} }
-};
-
diff --git a/scheduler/random/src/Makefile.am b/scheduler/random/src/Makefile.am
deleted file mode 100644
index 572181336..000000000
--- a/scheduler/random/src/Makefile.am
+++ /dev/null
@@ -1,14 +0,0 @@
-sched_LTLIBRARIES = random.la
-scheddir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/scheduler
-
-random_la_LDFLAGS = -module -avoidversion
-
-random_la_SOURCES = random.c
-random_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-noinst_HEADERS = random.h
-
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
-
-CLEANFILES =
diff --git a/scheduler/random/src/random.c b/scheduler/random/src/random.c
deleted file mode 100644
index 14bca3301..000000000
--- a/scheduler/random/src/random.c
+++ /dev/null
@@ -1,283 +0,0 @@
-/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include <stdlib.h>
-#include <sys/time.h>
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "random.h"
-
-#define RANDOM_LIMITS_MIN_FREE_DISK_DEFAULT 15
-#define RANDOM_REFRESH_INTERVAL_DEFAULT 10
-
-
-static int32_t
-random_init (xlator_t *xl)
-{
- struct random_struct *random_buf = NULL;
- xlator_list_t *trav_xl = xl->children;
- data_t *limit = NULL;
- int32_t index = 0;
-
- random_buf = CALLOC (1, sizeof (struct random_struct));
- ERR_ABORT (random_buf);
-
- /* Set the seed for the 'random' function */
- srandom ((uint32_t) time (NULL));
-
- limit = dict_get (xl->options, "scheduler.limits.min-free-disk");
- if (limit) {
- if (gf_string2percent (data_to_str (limit),
- &random_buf->min_free_disk) != 0) {
- gf_log ("random", GF_LOG_ERROR,
- "invalid number format \"%s\" of \"option "
- "scheduler.limits.min-free-disk\"",
- limit->data);
- return -1;
- }
- if (random_buf->min_free_disk >= 100) {
- gf_log ("random", GF_LOG_ERROR,
- "check the \"option random.limits.min-free"
- "-disk\", it should be percentage value");
- return -1;
- }
-
- } else {
- gf_log ("random", GF_LOG_WARNING,
- "No option for limit min-free-disk given, "
- "defaulting it to 5%%");
- random_buf->min_free_disk =
- RANDOM_LIMITS_MIN_FREE_DISK_DEFAULT;
- }
-
- limit = dict_get (xl->options, "scheduler.refresh-interval");
- if (limit) {
- if (gf_string2time (data_to_str (limit),
- &random_buf->refresh_interval) != 0) {
- gf_log ("random", GF_LOG_ERROR,
- "invalid number format \"%s\" of "
- "\"option random.refresh-interval\"",
- limit->data);
- return -1;
- }
- } else {
- random_buf->refresh_interval = RANDOM_REFRESH_INTERVAL_DEFAULT;
- }
-
- while (trav_xl) {
- index++;
- trav_xl = trav_xl->next;
- }
- random_buf->child_count = index;
- random_buf->array = CALLOC (index,
- sizeof (struct random_sched_struct));
- ERR_ABORT (random_buf->array);
- trav_xl = xl->children;
- index = 0;
-
- while (trav_xl) {
- random_buf->array[index].xl = trav_xl->xlator;
- random_buf->array[index].eligible = 1;
- trav_xl = trav_xl->next;
- index++;
- }
- pthread_mutex_init (&random_buf->random_mutex, NULL);
-
- // put it at the proper place
- *((long *)xl->private) = (long)random_buf;
- return 0;
-}
-
-static void
-random_fini (xlator_t *xl)
-{
- struct random_struct *random_buf = NULL;
-
- random_buf = (struct random_struct *)*((long *)xl->private);
- pthread_mutex_destroy (&random_buf->random_mutex);
- free (random_buf->array);
- free (random_buf);
-}
-
-
-static int32_t
-update_stat_array_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *xl,
- int32_t op_ret,
- int32_t op_errno,
- struct xlator_stats *trav_stats)
-{
- int32_t idx = 0;
- int32_t percent = 0;
- struct random_struct *random_buf = NULL;
-
- random_buf = (struct random_struct *)*((long *)xl->private);
-
- pthread_mutex_lock (&random_buf->random_mutex);
- for (idx = 0; idx < random_buf->child_count; idx++) {
- if (strcmp (random_buf->array[idx].xl->name,
- (char *)cookie) == 0)
- break;
- }
- pthread_mutex_unlock (&random_buf->random_mutex);
-
- if (op_ret == 0) {
- percent = ((trav_stats->free_disk *100) /
- trav_stats->total_disk_size);
- if (random_buf->min_free_disk > percent) {
- random_buf->array[idx].eligible = 0;
- } else {
- random_buf->array[idx].eligible = 1;
- }
- } else {
- random_buf->array[idx].eligible = 0;
- }
-
- STACK_DESTROY (frame->root);
- return 0;
-}
-
-static void
-update_stat_array (xlator_t *xl)
-{
- int32_t idx;
- struct random_struct *random_buf = NULL;
- call_frame_t *frame = NULL;
- call_pool_t *pool = xl->ctx->pool;
-
- random_buf = (struct random_struct *)*((long *)xl->private);
- for (idx = 0; idx < random_buf->child_count; idx++) {
- frame = create_frame (xl, pool);
-
- STACK_WIND_COOKIE (frame,
- update_stat_array_cbk,
- random_buf->array[idx].xl->name,
- random_buf->array[idx].xl,
- (random_buf->array[idx].xl)->mops->stats,
- 0);
- }
- return ;
-}
-
-static void
-random_update (xlator_t *xl)
-{
- struct timeval tv;
- struct random_struct *random_buf = NULL;
-
- random_buf = (struct random_struct *)*((long *)xl->private);
-
- gettimeofday(&tv, NULL);
- if (tv.tv_sec > (random_buf->refresh_interval +
- random_buf->last_stat_entry.tv_sec)) {
- update_stat_array (xl);
- random_buf->last_stat_entry.tv_sec = tv.tv_sec;
- }
-}
-
-static xlator_t *
-random_schedule (xlator_t *xl, const void *path)
-{
- struct random_struct *random_buf = NULL;
- int32_t rand = 0;
- int32_t try = 0;
-
- random_buf = (struct random_struct *)*((long *)xl->private);
-
- rand = (int32_t) (1.0*random_buf->child_count *
- (random() / (RAND_MAX + 1.0)));
-
- random_update (xl);
-
- while (!random_buf->array[rand].eligible) {
- if (try++ > 100) {
- /* there is a chance of this becoming a
- infinite loop otherwise. */
- break;
- }
- rand = (int32_t) (1.0*random_buf->child_count *
- (random() / (RAND_MAX + 1.0)));
- }
- return random_buf->array[rand].xl;
-}
-
-
-/**
- * notify
- */
-void
-random_notify (xlator_t *xl, int32_t event, void *data)
-{
- struct random_struct *random_buf = NULL;
- int32_t idx = 0;
-
- random_buf = (struct random_struct *)*((long *)xl->private);
- if (!random_buf)
- return;
-
- for (idx = 0; idx < random_buf->child_count; idx++) {
- if (random_buf->array[idx].xl == (xlator_t *)data)
- break;
- }
-
- switch (event)
- {
- case GF_EVENT_CHILD_UP:
- {
- //random_buf->array[idx].eligible = 1;
- }
- break;
- case GF_EVENT_CHILD_DOWN:
- {
- random_buf->array[idx].eligible = 0;
- }
- break;
- default:
- {
- ;
- }
- break;
- }
-
-}
-
-struct sched_ops sched = {
- .init = random_init,
- .fini = random_fini,
- .update = random_update,
- .schedule = random_schedule,
- .notify = random_notify
-};
-
-struct volume_options options[] = {
- { .key = { "scheduler.refresh-interval",
- "random.refresh-interval" },
- .type = GF_OPTION_TYPE_TIME
- },
- { .key = { "scheduler.limits.min-free-disk",
- "random.limits.min-free-disk" },
- .type = GF_OPTION_TYPE_PERCENT
- },
- { .key = {NULL} }
-};
diff --git a/scheduler/random/src/random.h b/scheduler/random/src/random.h
deleted file mode 100644
index b723559f1..000000000
--- a/scheduler/random/src/random.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _RANDOM_H
-#define _RANDOM_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-
-#include <sys/time.h>
-#include "scheduler.h"
-
-struct random_sched_struct {
- xlator_t *xl;
- unsigned char eligible;
-};
-
-struct random_struct {
- int32_t child_count;
- uint32_t refresh_interval;
- uint32_t min_free_disk;
- struct timeval last_stat_entry;
- struct random_sched_struct *array;
- pthread_mutex_t random_mutex;
-};
-
-#endif /* _RANDOM_H */
diff --git a/scheduler/rr/src/Makefile.am b/scheduler/rr/src/Makefile.am
deleted file mode 100644
index 7e911c0ed..000000000
--- a/scheduler/rr/src/Makefile.am
+++ /dev/null
@@ -1,13 +0,0 @@
-sched_LTLIBRARIES = rr.la
-scheddir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/scheduler
-
-rr_la_LDFLAGS = -module -avoidversion
-
-rr_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-rr_la_SOURCES = rr.c rr-options.c
-noinst_HEADERS = rr.h rr-options.h
-
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
-
-CLEANFILES =
diff --git a/scheduler/rr/src/rr-options.c b/scheduler/rr/src/rr-options.c
deleted file mode 100644
index 2fd32d63d..000000000
--- a/scheduler/rr/src/rr-options.c
+++ /dev/null
@@ -1,256 +0,0 @@
-/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include "scheduler.h"
-#include "rr-options.h"
-
-#define RR_LIMITS_MIN_FREE_DISK_OPTION_STRING "scheduler.limits.min-free-disk"
-#define RR_LIMITS_MIN_FREE_DISK_VALUE_DEFAULT 15
-#define RR_LIMITS_MIN_FREE_DISK_VALUE_MIN 0
-#define RR_LIMITS_MIN_FREE_DISK_VALUE_MAX 100
-
-#define RR_REFRESH_INTERVAL_OPTION_STRING "scheduler.refresh-interval"
-#define RR_REFRESH_INTERVAL_VALUE_DEFAULT 10
-
-#define RR_READ_ONLY_SUBVOLUMES_OPTION_STRING "scheduler.read-only-subvolumes"
-
-#define LOG_ERROR(args...) gf_log ("rr-options", GF_LOG_ERROR, ##args)
-#define LOG_WARNING(args...) gf_log ("rr-options", GF_LOG_WARNING, ##args)
-
-static int
-_rr_options_min_free_disk_validate (const char *value_string, uint32_t *n)
-{
- uint32_t value = 0;
-
- if (value_string == NULL)
- {
- return -1;
- }
-
- if (gf_string2percent (value_string, &value) != 0)
- {
- gf_log ("rr",
- GF_LOG_ERROR,
- "invalid number format [%s] of option [%s]",
- value_string,
- RR_LIMITS_MIN_FREE_DISK_OPTION_STRING);
- return -1;
- }
-
- if ((value <= RR_LIMITS_MIN_FREE_DISK_VALUE_MIN) ||
- (value >= RR_LIMITS_MIN_FREE_DISK_VALUE_MAX))
- {
- gf_log ("rr",
- GF_LOG_ERROR,
- "out of range [%d] of option [%s]. Allowed range is 0 to 100.",
- value,
- RR_LIMITS_MIN_FREE_DISK_OPTION_STRING);
- return -1;
- }
-
- *n = value;
-
- return 0;
-}
-
-static int
-_rr_options_refresh_interval_validate (const char *value_string, uint32_t *n)
-{
- uint32_t value = 0;
-
- if (value_string == NULL)
- {
- return -1;
- }
-
- if (gf_string2time (value_string, &value) != 0)
- {
- gf_log ("rr",
- GF_LOG_ERROR,
- "invalid number format [%s] of option [%s]",
- value_string,
- RR_REFRESH_INTERVAL_OPTION_STRING);
- return -1;
- }
-
- *n = value;
-
- return 0;
-}
-
-static int
-_rr_options_read_only_subvolumes_validate (const char *value_string,
- char ***volume_list,
- uint64_t *volume_count)
-{
- char **vlist = NULL;
- int vcount = 0;
- int i = 0;
-
- if (value_string == NULL || volume_list == NULL || volume_count)
- {
- return -1;
- }
-
- if (gf_strsplit (value_string,
- ", ",
- &vlist,
- &vcount) != 0)
- {
- gf_log ("rr",
- GF_LOG_ERROR,
- "invalid subvolume list [%s] of option [%s]",
- value_string,
- RR_READ_ONLY_SUBVOLUMES_OPTION_STRING);
- return -1;
- }
-
- for (i = 0; i < vcount; i++)
- {
- if (gf_volume_name_validate (vlist[i]) != 0)
- {
- gf_log ("rr",
- GF_LOG_ERROR,
- "invalid subvolume name [%s] in [%s] of option [%s]",
- vlist[i],
- value_string,
- RR_READ_ONLY_SUBVOLUMES_OPTION_STRING);
- goto free_exit;
- }
- }
-
- *volume_list = vlist;
- *volume_count = vcount;
-
- return 0;
-
- free_exit:
- for (i = 0; i < vcount; i++)
- {
- free (vlist[i]);
- }
- free (vlist);
-
- return -1;
-}
-
-int
-rr_options_validate (dict_t *options, rr_options_t *rr_options)
-{
- char *value_string = NULL;
-
- if (options == NULL || rr_options == NULL)
- {
- return -1;
- }
-
- if (dict_get (options, RR_LIMITS_MIN_FREE_DISK_OPTION_STRING))
- if (data_to_str (dict_get (options, RR_LIMITS_MIN_FREE_DISK_OPTION_STRING)))
- value_string = data_to_str (dict_get (options,
- RR_LIMITS_MIN_FREE_DISK_OPTION_STRING));
- if (value_string != NULL)
- {
- if (_rr_options_min_free_disk_validate (value_string,
- &rr_options->min_free_disk) != 0)
- {
- return -1;
- }
-
- gf_log ("rr",
- GF_LOG_WARNING,
- "using %s = %d",
- RR_LIMITS_MIN_FREE_DISK_OPTION_STRING,
- rr_options->min_free_disk);
- }
- else
- {
- rr_options->min_free_disk = RR_LIMITS_MIN_FREE_DISK_VALUE_DEFAULT;
-
- gf_log ("rr", GF_LOG_DEBUG,
- "using %s = %d [default]",
- RR_LIMITS_MIN_FREE_DISK_OPTION_STRING,
- rr_options->min_free_disk);
- }
-
- value_string = NULL;
- if (dict_get (options, RR_REFRESH_INTERVAL_OPTION_STRING))
- value_string = data_to_str (dict_get (options,
- RR_REFRESH_INTERVAL_OPTION_STRING));
- if (value_string != NULL)
- {
- if (_rr_options_refresh_interval_validate (value_string,
- &rr_options->refresh_interval) != 0)
- {
- return -1;
- }
-
- gf_log ("rr",
- GF_LOG_WARNING,
- "using %s = %d",
- RR_REFRESH_INTERVAL_OPTION_STRING,
- rr_options->refresh_interval);
- }
- else
- {
- rr_options->refresh_interval = RR_REFRESH_INTERVAL_VALUE_DEFAULT;
-
- gf_log ("rr", GF_LOG_DEBUG,
- "using %s = %d [default]",
- RR_REFRESH_INTERVAL_OPTION_STRING,
- rr_options->refresh_interval);
- }
-
- value_string = NULL;
- if (dict_get (options, RR_READ_ONLY_SUBVOLUMES_OPTION_STRING))
- value_string = data_to_str (dict_get (options,
- RR_READ_ONLY_SUBVOLUMES_OPTION_STRING));
- if (value_string != NULL)
- {
- if (_rr_options_read_only_subvolumes_validate (value_string,
- &rr_options->read_only_subvolume_list,
- &rr_options->read_only_subvolume_count) != 0)
- {
- return -1;
- }
-
- gf_log ("rr",
- GF_LOG_WARNING,
- "using %s = [%s]",
- RR_READ_ONLY_SUBVOLUMES_OPTION_STRING,
- value_string);
- }
-
- return 0;
-}
-
-struct volume_options options[] = {
- { .key = { "scheduler.refresh-interval",
- "rr.refresh-interval" },
- .type = GF_OPTION_TYPE_TIME
- },
- { .key = { "scheduler.limits.min-free-disk",
- "rr.limits.min-free-disk" },
- .type = GF_OPTION_TYPE_PERCENT
- },
- { .key = { "scheduler.read-only-subvolumes",
- "rr.read-only-subvolumes" },
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {NULL} }
-};
diff --git a/scheduler/rr/src/rr-options.h b/scheduler/rr/src/rr-options.h
deleted file mode 100644
index ef62f7f0a..000000000
--- a/scheduler/rr/src/rr-options.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _RR_OPTIONS_H
-#define _RR_OPTIONS_H
-
-struct rr_options
-{
- uint32_t min_free_disk;
- uint32_t refresh_interval;
- char **read_only_subvolume_list;
- uint64_t read_only_subvolume_count;
-};
-typedef struct rr_options rr_options_t;
-
-int rr_options_validate (dict_t *options, rr_options_t *rr_options);
-
-#endif
diff --git a/scheduler/rr/src/rr.c b/scheduler/rr/src/rr.c
deleted file mode 100644
index 7c57307f2..000000000
--- a/scheduler/rr/src/rr.c
+++ /dev/null
@@ -1,565 +0,0 @@
-/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <sys/time.h>
-#include <stdlib.h>
-
-#include <stdint.h>
-
-#include "scheduler.h"
-
-#include "rr-options.h"
-#include "rr.h"
-
-#define RR_MIN_FREE_DISK_NOT_REACHED 0
-#define RR_MIN_FREE_DISK_REACHED 1
-
-#define RR_SUBVOLUME_OFFLINE 0
-#define RR_SUBVOLUME_ONLINE 1
-
-#define LOG_ERROR(args...) gf_log ("rr", GF_LOG_ERROR, ##args)
-#define LOG_WARNING(args...) gf_log ("rr", GF_LOG_WARNING, ##args)
-#define LOG_CRITICAL(args...) gf_log ("rr", GF_LOG_CRITICAL, ##args)
-
-#define ROUND_ROBIN(index, count) ((index + 1) % count)
-
-static int
-_cleanup_rr (rr_t *rr)
-{
- int i;
-
- if (rr == NULL)
- {
- return -1;
- }
-
- if (rr->options.read_only_subvolume_list != NULL)
- {
- for (i = 0; i < rr->options.read_only_subvolume_count; i++)
- {
- free (rr->options.read_only_subvolume_list[i]);
- }
- free (rr->options.read_only_subvolume_list);
- }
-
- free (rr->subvolume_list);
-
- free (rr);
-
- return 0;
-}
-
-int
-rr_init (xlator_t *this_xl)
-{
- rr_t *rr = NULL;
- dict_t *options = NULL;
- xlator_list_t *children = NULL;
- uint64_t children_count = 0;
- int i = 0;
- int j = 0;
-
- if (this_xl == NULL)
- {
- return -1;
- }
-
- if ((options = this_xl->options) == NULL)
- {
- return -1;
- }
-
- if ((children = this_xl->children) == NULL)
- {
- return -1;
- }
-
- if ((rr = CALLOC (1, sizeof (rr_t))) == NULL)
- {
- return -1;
- }
-
- if (rr_options_validate (options, &rr->options) != 0)
- {
- free (rr);
- return -1;
- }
-
- for (i = 0; i < rr->options.read_only_subvolume_count; i++)
- {
- char found = 0;
-
- for (children = this_xl->children;
- children != NULL;
- children = children->next)
- {
- if (strcmp (rr->options.read_only_subvolume_list[i],
- children->xlator->name) == 0)
- {
- found = 1;
- break;
- }
- }
-
- if (!found)
- {
- LOG_ERROR ("read-only subvolume [%s] not found in volume list",
- rr->options.read_only_subvolume_list[i]);
- _cleanup_rr (rr);
- return -1;
- }
- }
-
- for (children = this_xl->children;
- children != NULL;
- children = children->next)
- {
- children_count++;
- }
-
- /* bala: excluding read_only_subvolumes */
- if ((rr->subvolume_count = children_count -
- rr->options.read_only_subvolume_count) == 0)
- {
- LOG_ERROR ("no writable volumes found for scheduling");
- _cleanup_rr (rr);
- return -1;
- }
-
- if ((rr->subvolume_list = CALLOC (rr->subvolume_count,
- sizeof (rr_subvolume_t))) == NULL)
- {
- _cleanup_rr (rr);
- return -1;
- }
-
- i = 0;
- j = 0;
- for (children = this_xl->children;
- children != NULL;
- children = children->next)
- {
- char found = 0;
-
- for (j = 0; j < rr->options.read_only_subvolume_count; j++)
- {
- if (strcmp (rr->options.read_only_subvolume_list[i],
- children->xlator->name) == 0)
- {
- found = 1;
- break;
- }
- }
-
- if (!found)
- {
- rr_subvolume_t *subvolume = NULL;
-
- subvolume = &rr->subvolume_list[i];
-
- subvolume->xl = children->xlator;
- subvolume->free_disk_status = RR_MIN_FREE_DISK_NOT_REACHED;
- subvolume->status = RR_SUBVOLUME_ONLINE;
-
- i++;
- }
- }
-
- rr->schedule_index = UINT64_MAX;
- rr->last_stat_fetched_time.tv_sec = 0;
- rr->last_stat_fetched_time.tv_usec = 0;
- pthread_mutex_init (&rr->mutex, NULL);
-
- *((long *)this_xl->private) = (long)rr;
-
- return 0;
-}
-
-void
-rr_fini (xlator_t *this_xl)
-{
- rr_t *rr = NULL;
-
- if (this_xl == NULL)
- {
- return;
- }
-
- if ((rr = (rr_t *) *((long *)this_xl->private)) != NULL)
- {
- pthread_mutex_destroy (&rr->mutex);
- _cleanup_rr (rr);
- this_xl->private = NULL;
- }
-
- return;
-}
-
-xlator_t *
-rr_schedule (xlator_t *this_xl, const void *path)
-{
- rr_t *rr = NULL;
- uint64_t next_schedule_index = 0;
- int i = 0;
-
- if (this_xl == NULL || path == NULL)
- {
- return NULL;
- }
-
- rr = (rr_t *) *((long *)this_xl->private);
- next_schedule_index = ROUND_ROBIN (rr->schedule_index,
- rr->subvolume_count);
-
- rr_update (this_xl);
-
- for (i = next_schedule_index; i < rr->subvolume_count; i++)
- {
- if (rr->subvolume_list[i].status == RR_SUBVOLUME_ONLINE &&
- rr->subvolume_list[i].status == RR_MIN_FREE_DISK_NOT_REACHED)
- {
- pthread_mutex_lock (&rr->mutex);
- rr->schedule_index = i;
- pthread_mutex_unlock (&rr->mutex);
- return rr->subvolume_list[i].xl;
- }
- }
-
- for (i = 0; i < next_schedule_index; i++)
- {
- if (rr->subvolume_list[i].status == RR_SUBVOLUME_ONLINE &&
- rr->subvolume_list[i].status == RR_MIN_FREE_DISK_NOT_REACHED)
- {
- pthread_mutex_lock (&rr->mutex);
- rr->schedule_index = i;
- pthread_mutex_unlock (&rr->mutex);
- return rr->subvolume_list[i].xl;
- }
- }
-
- for (i = next_schedule_index; i < rr->subvolume_count; i++)
- {
- if (rr->subvolume_list[i].status == RR_SUBVOLUME_ONLINE)
- {
- pthread_mutex_lock (&rr->mutex);
- rr->schedule_index = i;
- pthread_mutex_unlock (&rr->mutex);
- return rr->subvolume_list[i].xl;
- }
- }
-
- for (i = 0; i < next_schedule_index; i++)
- {
- if (rr->subvolume_list[i].status == RR_SUBVOLUME_ONLINE)
- {
- pthread_mutex_lock (&rr->mutex);
- rr->schedule_index = i;
- pthread_mutex_unlock (&rr->mutex);
- return rr->subvolume_list[i].xl;
- }
- }
-
- return NULL;
-}
-
-void
-rr_update (xlator_t *this_xl)
-{
- rr_t *rr = NULL;
- struct timeval ctime = {0, 0};
- int i = 0;
-
- if (this_xl == NULL)
- {
- return ;
- }
-
- if ((rr = (rr_t *) *((long *)this_xl->private)) == NULL)
- {
- return ;
- }
-
- if (gettimeofday (&ctime, NULL) != 0)
- {
- return ;
- }
-
- if (ctime.tv_sec > (rr->options.refresh_interval +
- rr->last_stat_fetched_time.tv_sec))
- {
- pthread_mutex_lock (&rr->mutex);
- rr->last_stat_fetched_time = ctime;
- pthread_mutex_unlock (&rr->mutex);
-
- for (i = 0; i < rr->subvolume_count; i++)
- {
- xlator_t *subvolume_xl = NULL;
- call_frame_t *frame = NULL;
- call_pool_t *pool = NULL;
-
- subvolume_xl = rr->subvolume_list[i].xl;
-
- pool = this_xl->ctx->pool;
-
- frame = create_frame (this_xl, pool);
-
- STACK_WIND_COOKIE (frame,
- rr_update_cbk,
- subvolume_xl->name,
- subvolume_xl,
- subvolume_xl->mops->stats,
- 0);
- }
- }
-
- return ;
-}
-
-int
-rr_update_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this_xl,
- int32_t op_ret,
- int32_t op_errno,
- struct xlator_stats *stats)
-{
- rr_t *rr = NULL;
- rr_subvolume_t *subvolume = NULL;
- uint8_t free_disk_percent = 0;
- int i = 0;
-
- if (frame == NULL)
- {
- return -1;
- }
-
- if (cookie == NULL || this_xl == NULL)
- {
- STACK_DESTROY (frame->root);
- return -1;
- }
-
- if (op_ret == 0 && stats == NULL)
- {
- LOG_CRITICAL ("fatal! op_ret is 0 and stats is NULL. "
- "Please report this to <gluster-devel@nongnu.org>");
- STACK_DESTROY (frame->root);
- return -1;
- }
-
- if ((rr = (rr_t *) *((long *)this_xl->private)) == NULL)
- {
- STACK_DESTROY (frame->root);
- return -1;
- }
-
- for (i = 0; i < rr->subvolume_count; i++)
- {
- if (rr->subvolume_list[i].xl->name == (char *) cookie)
- {
- subvolume = &rr->subvolume_list[i];
- break;
- }
- }
-
- if (subvolume == NULL)
- {
- LOG_ERROR ("unknown cookie [%s]", (char *) cookie);
- STACK_DESTROY (frame->root);
- return -1;
- }
-
- if (op_ret == 0)
- {
- free_disk_percent = (stats->free_disk * 100) / stats->total_disk_size;
- if (free_disk_percent > rr->options.min_free_disk)
- {
- if (subvolume->free_disk_status != RR_MIN_FREE_DISK_NOT_REACHED)
- {
- pthread_mutex_lock (&rr->mutex);
- subvolume->free_disk_status = RR_MIN_FREE_DISK_NOT_REACHED;
- pthread_mutex_unlock (&rr->mutex);
- LOG_WARNING ("subvolume [%s] is available with free space for scheduling",
- subvolume->xl->name);
- }
- }
- else
- {
- if (subvolume->free_disk_status != RR_MIN_FREE_DISK_REACHED)
- {
- pthread_mutex_lock (&rr->mutex);
- subvolume->free_disk_status = RR_MIN_FREE_DISK_REACHED;
- pthread_mutex_unlock (&rr->mutex);
- LOG_WARNING ("subvolume [%s] reached minimum disk space requirement",
- subvolume->xl->name);
- }
- }
- }
- else
- {
- pthread_mutex_lock (&rr->mutex);
- subvolume->status = RR_SUBVOLUME_OFFLINE;
- pthread_mutex_unlock (&rr->mutex);
- LOG_ERROR ("unable to get subvolume [%s] status information and "
- "scheduling is disabled",
- subvolume->xl->name);
- }
-
- STACK_DESTROY (frame->root);
- return 0;
-}
-
-void
-rr_notify (xlator_t *this_xl, int32_t event, void *data)
-{
- rr_t *rr = NULL;
- rr_subvolume_t *subvolume = NULL;
- xlator_t *subvolume_xl = NULL;
- int i = 0, ret = 0;
- call_frame_t *frame = NULL;
- call_pool_t *pool = NULL;
- dict_t *xattr = get_new_dict ();
- int32_t version[1] = {1};
-
- if (this_xl == NULL || data == NULL) {
- return ;
- }
-
- if ((rr = (rr_t *) *((long *)this_xl->private)) == NULL) {
- return ;
- }
-
- subvolume_xl = (xlator_t *) data;
-
- for (i = 0; i < rr->subvolume_count; i++) {
- if (rr->subvolume_list[i].xl == subvolume_xl) {
- subvolume = &rr->subvolume_list[i];
- break;
- }
- }
-
- switch (event) {
- case GF_EVENT_CHILD_UP:
- /* Seeding, to be done only once */
- if (rr->first_time && (i == rr->subvolume_count)) {
- loc_t loc = {0,};
- xlator_t *trav = NULL;
-
- pool = this_xl->ctx->pool;
- frame = create_frame (this_xl, pool);
- ret = dict_set_bin (xattr, "trusted.glusterfs.scheduler.rr",
- version, sizeof (int32_t));
- if (-1 == ret) {
- gf_log (this_xl->name, GF_LOG_ERROR, "rr seed setting failed");
- }
- if (xattr)
- dict_ref (xattr);
-
- loc.path = strdup ("/");
- for (trav = this_xl->parents->xlator; trav; trav = trav->parents->xlator) {
- if (trav->itable) {
- loc.inode = trav->itable->root;
- break;
- }
- }
- STACK_WIND (frame,
- rr_notify_cbk,
- (xlator_t *)data,
- ((xlator_t *)data)->fops->xattrop,
- &loc,
- GF_XATTROP_ADD_ARRAY,
- xattr);
-
- if (xattr)
- dict_unref (xattr);
-
- rr->first_time = 0;
- }
- if (subvolume) {
- pthread_mutex_lock (&rr->mutex);
- subvolume->status = RR_SUBVOLUME_ONLINE;
- pthread_mutex_unlock (&rr->mutex);
- }
- break;
- case GF_EVENT_CHILD_DOWN:
- if (subvolume) {
- pthread_mutex_lock (&rr->mutex);
- subvolume->status = RR_SUBVOLUME_OFFLINE;
- pthread_mutex_unlock (&rr->mutex);
- }
- break;
- }
-
- return ;
-}
-
-int
-rr_notify_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this_xl,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *xattr)
-{
- rr_t *rr = NULL;
- int32_t *index = NULL;
- int32_t ret = -1;
- void *tmp_index_ptr = NULL;
-
- if (frame == NULL)
- {
- return -1;
- }
-
- if ((this_xl == NULL) || (op_ret == -1))
- {
- STACK_DESTROY (frame->root);
- return -1;
- }
-
- if ((rr = (rr_t *) *((long *)this_xl->private)) == NULL)
- {
- STACK_DESTROY (frame->root);
- return -1;
- }
-
- ret = dict_get_bin (xattr, "trusted.glusterfs.scheduler.rr", &tmp_index_ptr);
- index = tmp_index_ptr;
- if (ret == 0)
- rr->schedule_index = (index[0] % rr->subvolume_count);
- else
- rr->schedule_index = 0;
-
- STACK_DESTROY (frame->root);
- return 0;
-}
-
-struct sched_ops sched = {
- .init = rr_init,
- .fini = rr_fini,
- .update = rr_update,
- .schedule = rr_schedule,
- .notify = rr_notify
-};
-
diff --git a/scheduler/rr/src/rr.h b/scheduler/rr/src/rr.h
deleted file mode 100644
index 067652d41..000000000
--- a/scheduler/rr/src/rr.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _RR_H
-#define _RR_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "scheduler.h"
-#include <stdint.h>
-#include <sys/time.h>
-
-struct rr_subvolume
-{
- xlator_t *xl;
- uint8_t free_disk_status;
- uint8_t status;
-};
-typedef struct rr_subvolume rr_subvolume_t;
-
-struct rr
-{
- rr_options_t options;
- rr_subvolume_t *subvolume_list;
- uint64_t subvolume_count;
- uint64_t schedule_index;
- struct timeval last_stat_fetched_time;
- pthread_mutex_t mutex;
- char first_time;
-};
-typedef struct rr rr_t;
-
-int rr_init (xlator_t *this_xl);
-void rr_fini (xlator_t *this_xl);
-xlator_t *rr_schedule (xlator_t *this_xl, const void *path);
-void rr_update (xlator_t *this_xl);
-int rr_update_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this_xl,
- int32_t op_ret,
- int32_t op_errno,
- struct xlator_stats *stats);
-void rr_notify (xlator_t *this_xl, int32_t event, void *data);
-int rr_notify_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this_xl,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *xattr);
-
-#endif /* _RR_H */
diff --git a/scheduler/switch/Makefile.am b/scheduler/switch/Makefile.am
deleted file mode 100644
index d471a3f92..000000000
--- a/scheduler/switch/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = src
-
-CLEANFILES =
diff --git a/scheduler/switch/src/Makefile.am b/scheduler/switch/src/Makefile.am
deleted file mode 100644
index dc7d16d40..000000000
--- a/scheduler/switch/src/Makefile.am
+++ /dev/null
@@ -1,12 +0,0 @@
-sched_LTLIBRARIES = switch.la
-scheddir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/scheduler
-
-switch_la_LDFLAGS = -module -avoidversion
-
-switch_la_SOURCES = switch.c
-switch_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
-
-CLEANFILES =
diff --git a/scheduler/switch/src/switch.c b/scheduler/switch/src/switch.c
deleted file mode 100644
index 791b06550..000000000
--- a/scheduler/switch/src/switch.c
+++ /dev/null
@@ -1,429 +0,0 @@
-/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include <sys/time.h>
-#include <stdlib.h>
-#include <fnmatch.h>
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "xlator.h"
-#include "scheduler.h"
-
-struct switch_sched_array {
- xlator_t *xl;
- int32_t eligible;
- int32_t considered;
-};
-
-/* Select one of this struct based on the path's pattern match */
-struct switch_sched_struct {
- struct switch_sched_struct *next;
- struct switch_sched_array *array;
- char path_pattern[256];
- int32_t node_index; /* Index of the node in
- this pattern. */
- int32_t num_child; /* Total num of child nodes
- with this pattern. */
-};
-
-struct switch_struct {
- struct switch_sched_struct *cond;
- struct switch_sched_array *array;
- pthread_mutex_t switch_mutex;
- int32_t child_count;
-};
-
-/* This function should return child node as '*:subvolumes' is inserterd */
-static xlator_t *
-switch_get_matching_xl (const char *path, struct switch_sched_struct *cond)
-{
- struct switch_sched_struct *trav = cond;
- char *pathname = strdup (path);
- int index = 0;
-
- while (trav) {
- if (fnmatch (trav->path_pattern,
- pathname, FNM_NOESCAPE) == 0) {
- free (pathname);
- trav->node_index %= trav->num_child;
- index = (trav->node_index++) % trav->num_child;
- return trav->array[index].xl;
- }
- trav = trav->next;
- }
- free (pathname);
- return NULL;
-}
-
-static int32_t
-gf_unify_valid_child (const char *child,
- xlator_t *xl)
-{
- xlator_list_t *children = NULL, *prev = NULL;
- int32_t ret = 0;
-
- children = xl->children;
- do {
- if (!strcmp (child, children->xlator->name)) {
- ret = 1;
- break;
- }
-
- prev = children;
- children = prev->next;
- } while (children);
-
- return ret;
-}
-
-static int32_t
-switch_init (xlator_t *xl)
-{
- int32_t index = 0;
- data_t *data = NULL;
- char *child = NULL;
- char *tmp = NULL;
- char *childs_data = NULL;
- xlator_list_t *trav_xl = xl->children;
- struct switch_struct *switch_buf = NULL;
-
- switch_buf = CALLOC (1, sizeof (struct switch_struct));
- ERR_ABORT (switch_buf);
-
- while (trav_xl) {
- index++;
- trav_xl = trav_xl->next;
- }
- switch_buf->child_count = index;
- switch_buf->array = CALLOC (index + 1,
- sizeof (struct switch_sched_struct));
- ERR_ABORT (switch_buf->array);
- trav_xl = xl->children;
- index = 0;
-
- while (trav_xl) {
- switch_buf->array[index].xl = trav_xl->xlator;
- switch_buf->array[index].eligible = 1;
- trav_xl = trav_xl->next;
- index++;
- }
-
- data = dict_get (xl->options, "scheduler.read-only-subvolumes");
- if (data) {
- childs_data = strdup (data->data);
- child = strtok_r (childs_data, ",", &tmp);
- while (child) {
- for (index = 1;
- index < switch_buf->child_count; index++) {
- if (strcmp (switch_buf->array[index - 1].xl->name, child) == 0) {
- gf_log ("switch", GF_LOG_DEBUG,
- "Child '%s' is read-only",
- child);
- memcpy (&(switch_buf->array[index-1]),
- &(switch_buf->array[switch_buf->child_count - 1]),
- sizeof (struct switch_sched_array));
- switch_buf->child_count--;
- break;
- }
- }
- child = strtok_r (NULL, ",", &tmp);
- }
- free (childs_data);
- }
-
- data = dict_get (xl->options, "scheduler.local-volume-name");
- if (data) {
- /* Means, give preference to that node first */
- gf_log ("switch", GF_LOG_DEBUG,
- "local volume defined as %s", data->data);
-
- /* TODO: parse it properly, have an extra index to
- specify that first */
- }
-
- /* *jpg:child1,child2;*mpg:child3;*:child4,child5,child6 */
- data = dict_get (xl->options, "scheduler.switch.case");
- if (data) {
- char *tmp_str = NULL;
- char *tmp_str1 = NULL;
- char *dup_str = NULL;
- char *switch_str = NULL;
- char *pattern = NULL;
- char *childs = NULL;
- struct switch_sched_struct *switch_opt = NULL;
- struct switch_sched_struct *trav = NULL;
- /* Get the pattern for considering switch case.
- "option block-size *avi:10MB" etc */
- switch_str = strtok_r (data->data, ";", &tmp_str);
- while (switch_str) {
- dup_str = strdup (switch_str);
- switch_opt =
- CALLOC (1,
- sizeof (struct switch_sched_struct));
- ERR_ABORT (switch_opt);
-
- /* Link it to the main structure */
- if (switch_buf->cond) {
- /* there are already few entries */
- trav = switch_buf->cond;
- while (trav->next)
- trav = trav->next;
- trav->next = switch_opt;
- } else {
- /* First entry */
- switch_buf->cond = switch_opt;
- }
- pattern = strtok_r (dup_str, ":", &tmp_str1);
- childs = strtok_r (NULL, ":", &tmp_str1);
- if (strncmp (pattern, "*", 2) == 0) {
- gf_log ("switch", GF_LOG_WARNING,
- "'*' pattern will be taken by default "
- "for all the unconfigured child nodes,"
- " hence neglecting current option");
- switch_str = strtok_r (NULL, ";", &tmp_str);
- free (dup_str);
- continue;
- }
- memcpy (switch_opt->path_pattern,
- pattern, strlen (pattern));
- if (childs) {
- int32_t idx = 0;
- char *tmp1 = NULL;
- char *dup_childs = NULL;
- /* TODO: get the list of child nodes for
- the given pattern */
- dup_childs = strdup (childs);
- child = strtok_r (dup_childs, ",", &tmp);
- while (child) {
- if (gf_unify_valid_child (child, xl)) {
- idx++;
- child = strtok_r (NULL, ",", &tmp);
- } else {
- gf_log ("switch", GF_LOG_ERROR,
- "%s is not a subvolume "
- "of %s. pattern can "
- "only be scheduled only"
- " to a subvolume of %s",
- child, xl->name,
- xl->name);
- return -1;
- }
- }
- free (dup_childs);
- child = strtok_r (childs, ",", &tmp1);
- switch_opt->num_child = idx;
- switch_opt->array =
- CALLOC (1, idx * sizeof (struct switch_sched_array));
- ERR_ABORT (switch_opt->array);
- idx = 0;
- child = strtok_r (childs, ",", &tmp);
- while (child) {
- for (index = 1;
- index < switch_buf->child_count;
- index++) {
- if (strcmp (switch_buf->array[index - 1].xl->name,
- child) == 0) {
- gf_log ("switch",
- GF_LOG_DEBUG,
- "'%s' pattern will be scheduled to \"%s\"",
- switch_opt->path_pattern, child);
- /*
- if (switch_buf->array[index-1].considered) {
- gf_log ("switch", GF_LOG_DEBUG,
- "ambiguity found, exiting");
- return -1;
- }
- */
- switch_opt->array[idx].xl = switch_buf->array[index-1].xl;
- switch_buf->array[index-1].considered = 1;
- idx++;
- break;
- }
- }
- child = strtok_r (NULL, ",", &tmp1);
- }
- } else {
- /* error */
- gf_log ("switch", GF_LOG_ERROR,
- "Check \"scheduler.switch.case\" "
- "option in unify volume. Exiting");
- free (switch_buf->array);
- free (switch_buf);
- return -1;
- }
- free (dup_str);
- switch_str = strtok_r (NULL, ";", &tmp_str);
- }
- }
- /* Now, all the pattern based considerations done, so for all the
- * remaining pattern, '*' to all the remaining child nodes
- */
- {
- struct switch_sched_struct *switch_opt = NULL;
- int32_t flag = 0;
- int32_t index = 0;
- for (index=0; index < switch_buf->child_count; index++) {
- /* check for considered flag */
- if (switch_buf->array[index].considered)
- continue;
- flag++;
- }
- if (!flag) {
- gf_log ("switch", GF_LOG_ERROR,
- "No nodes left for pattern '*'. Exiting.");
- return -1;
- }
- switch_opt = CALLOC (1, sizeof (struct switch_sched_struct));
- ERR_ABORT (switch_opt);
- if (switch_buf->cond) {
- /* there are already few entries */
- struct switch_sched_struct *trav = switch_buf->cond;
- while (trav->next)
- trav = trav->next;
- trav->next = switch_opt;
- } else {
- /* First entry */
- switch_buf->cond = switch_opt;
- }
- /* Add the '*' pattern to the array */
- memcpy (switch_opt->path_pattern, "*", 2);
- switch_opt->num_child = flag;
- switch_opt->array =
- CALLOC (1, flag * sizeof (struct switch_sched_array));
- ERR_ABORT (switch_opt->array);
- flag = 0;
- for (index=0; index < switch_buf->child_count; index++) {
- /* check for considered flag */
- if (switch_buf->array[index].considered)
- continue;
- gf_log ("switch", GF_LOG_DEBUG,
- "'%s' pattern will be scheduled to \"%s\"",
- switch_opt->path_pattern,
- switch_buf->array[index].xl->name);
- switch_opt->array[flag].xl =
- switch_buf->array[index].xl;
- switch_buf->array[index].considered = 1;
- flag++;
- }
- }
-
- pthread_mutex_init (&switch_buf->switch_mutex, NULL);
-
- // put it at the proper place
- *((long *)xl->private) = (long)switch_buf;
-
- return 0;
-}
-
-static void
-switch_fini (xlator_t *xl)
-{
- /* TODO: free all the allocated entries */
- struct switch_struct *switch_buf = NULL;
- switch_buf = (struct switch_struct *)*((long *)xl->private);
-
- pthread_mutex_destroy (&switch_buf->switch_mutex);
- free (switch_buf->array);
- free (switch_buf);
-}
-
-static xlator_t *
-switch_schedule (xlator_t *xl, const void *path)
-{
- struct switch_struct *switch_buf = NULL;
- switch_buf = (struct switch_struct *)*((long *)xl->private);
-
- return switch_get_matching_xl (path, switch_buf->cond);
-}
-
-
-/**
- * notify
- */
-void
-switch_notify (xlator_t *xl, int32_t event, void *data)
-{
- /* TODO: This should be checking in switch_sched_struct */
-#if 0
- struct switch_struct *switch_buf = NULL;
- int32_t idx = 0;
-
- switch_buf = (struct switch_struct *)*((long *)xl->private);
- if (!switch_buf)
- return;
-
- for (idx = 0; idx < switch_buf->child_count; idx++) {
- if (switch_buf->array[idx].xl == (xlator_t *)data)
- break;
- }
-
- switch (event)
- {
- case GF_EVENT_CHILD_UP:
- {
- switch_buf->array[idx].eligible = 1;
- }
- break;
- case GF_EVENT_CHILD_DOWN:
- {
- switch_buf->array[idx].eligible = 0;
- }
- break;
- default:
- {
- ;
- }
- break;
- }
-#endif
-}
-
-static void
-switch_update (xlator_t *xl)
-{
- return;
-}
-
-struct sched_ops sched = {
- .init = switch_init,
- .fini = switch_fini,
- .update = switch_update,
- .schedule = switch_schedule,
- .notify = switch_notify
-};
-
-struct volume_options options[] = {
- { .key = { "scheduler.read-only-subvolumes" ,
- "switch.read-only-subvolumes"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = { "scheduler.local-volume-name",
- "switch.nufa.local-volume-name" },
- .type = GF_OPTION_TYPE_XLATOR
- },
- { .key = { "scheduler.switch.case",
- "switch.case" },
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {NULL} }
-};
diff --git a/tests/README.md b/tests/README.md
new file mode 100644
index 000000000..2b5ed8dcd
--- /dev/null
+++ b/tests/README.md
@@ -0,0 +1,27 @@
+Regression tests framework for GlusterFS
+========================================
+
+## Prereq
+- Build and install the version of glusterfs with your changes. Make
+ sure the installed version is accessible from $PATH.
+
+## How-To
+- To mount glusterfs, NEVER use 'mount -t glusterfs', instead use
+ 'glusterfs -s ' method. This is because with the patch build setup
+ doesnot install the /sbin/mount.glusterfs necessary, where as the
+ glusterfs binary will be accessible with $PATH, and will pick the
+ right version.
+- (optional) Set environment variables to specify location of
+ export directories and mount points. Unless you have special
+ requirements, the defaults should just work. The variables
+ themselves can be found at the top of tests/include.rc. All
+ of them can be overriden with environment variables.
+
+## Usage
+- Execute `/usr/share/glusterfs/run-tests.sh` as root.
+
+- If some test cases fail, report to GlusterFS community at
+ `gluster-devel@nongnu.org`.
+
+## Reminder
+- BE WARNED THAT THE TEST CASES DELETE /var/lib/glusterd/* !!! \ No newline at end of file
diff --git a/tests/afr.rc b/tests/afr.rc
new file mode 100644
index 000000000..60ea1b7f1
--- /dev/null
+++ b/tests/afr.rc
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+#count the number of entries marked for self-heal
+#in brick $1's index
+
+function count_sh_entries()
+{
+ val1=0
+ for g in `ls $1/.glusterfs/indices/xattrop`
+ do
+ val1=$(( val1 + 1 ))
+ done
+
+ echo $val1;
+}
diff --git a/tests/basic/bd.t b/tests/basic/bd.t
new file mode 100755
index 000000000..eb6305414
--- /dev/null
+++ b/tests/basic/bd.t
@@ -0,0 +1,131 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+function execute()
+{
+ cmd=$1
+ shift
+ ${cmd} $@ >/dev/null 2>&1
+}
+
+function bd_cleanup()
+{
+ execute vgremove -f ${V0}
+ execute pvremove ${ld}
+ execute losetup -d ${ld}
+ execute rm ${BD_DISK}
+ cleanup
+}
+
+function check()
+{
+ if [ $? -ne 0 ]; then
+ echo prerequsite $@ failed
+ bd_cleanup
+ exit
+ fi
+}
+
+SIZE=256 #in MB
+
+bd_cleanup;
+
+## Configure environment needed for BD backend volumes
+## Create a file with configured size and
+## set it as a temporary loop device to create
+## physical volume & VG. These are basic things needed
+## for testing BD xlator if anyone of these steps fail,
+## test script exits
+function configure()
+{
+ GLDIR=`$CLI system:: getwd`
+ BD_DISK=${GLDIR}/bd_disk
+
+ execute truncate -s${SIZE}M ${BD_DISK}
+ check ${BD_DISK} creation
+
+ execute losetup -f
+ check losetup
+ ld=`losetup -f`
+
+ execute losetup ${ld} ${BD_DISK}
+ check losetup ${BD_DISK}
+ execute pvcreate -f ${ld}
+ check pvcreate ${ld}
+ execute vgcreate ${V0} ${ld}
+ check vgcreate ${V0}
+ execute lvcreate --thin ${V0}/pool --size 128M
+}
+
+function volinfo_field()
+{
+ local vol=$1;
+ local field=$2;
+ $CLI volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+function volume_type()
+{
+ getfattr -n volume.type $M0/. --only-values --absolute-names -e text
+}
+
+TEST glusterd
+TEST pidof glusterd
+configure
+
+TEST $CLI volume create $V0 ${H0}:/$B0/$V0?${V0}
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+EXPECT '1' volume_type
+
+## Create posix file
+TEST touch $M0/posix
+
+TEST touch $M0/lv
+gfid=`getfattr -n glusterfs.gfid.string $M0/lv --only-values --absolute-names`
+TEST setfattr -n user.glusterfs.bd -v "lv:4MB" $M0/lv
+# Check if LV is created
+TEST stat /dev/$V0/${gfid}
+
+## Create filesystem
+sleep 1
+TEST mkfs.ext4 -qF $M0/lv
+# Cloning
+TEST touch $M0/lv_clone
+gfid=`getfattr -n glusterfs.gfid.string $M0/lv_clone --only-values --absolute-names`
+TEST setfattr -n clone -v ${gfid} $M0/lv
+TEST stat /dev/$V0/${gfid}
+
+sleep 1
+## Check mounting
+TEST mount -o loop $M0/lv $M1
+umount $M1
+
+# Snapshot
+TEST touch $M0/lv_sn
+gfid=`getfattr -n glusterfs.gfid.string $M0/lv_sn --only-values --absolute-names`
+TEST setfattr -n snapshot -v ${gfid} $M0/lv
+TEST stat /dev/$V0/${gfid}
+
+# Merge
+sleep 1
+TEST setfattr -n merge -v "$M0/lv_sn" $M0/lv_sn
+TEST ! stat $M0/lv_sn
+TEST ! stat /dev/$V0/${gfid}
+
+
+rm $M0/* -f
+
+TEST umount $M0
+TEST $CLI volume stop ${V0}
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+TEST $CLI volume delete ${V0}
+
+bd_cleanup
diff --git a/tests/basic/cdc.t b/tests/basic/cdc.t
new file mode 100755
index 000000000..4cd915aa9
--- /dev/null
+++ b/tests/basic/cdc.t
@@ -0,0 +1,135 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+## Create a volume with one brick
+TEST $CLI volume create $V0 $H0:$B0/${V0}1;
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '1' brick_count $V0
+
+## Turn off performance translators
+## This is required for testing readv calls
+TEST $CLI volume set $V0 performance.io-cache off
+EXPECT 'off' volinfo_field $V0 'performance.io-cache'
+TEST $CLI volume set $V0 performance.quick-read off
+EXPECT 'off' volinfo_field $V0 'performance.quick-read'
+
+TEST $CLI volume set $V0 strict-write-ordering on
+EXPECT 'on' volinfo_field $V0 'performance.strict-write-ordering'
+
+## Turn on cdc xlator by setting features.compress to on
+TEST $CLI volume set $V0 compress on
+EXPECT 'on' volinfo_field $V0 'features.compress'
+EXPECT 'server' volinfo_field $V0 'compress.mode'
+
+## Make sure that user cannot change compress.mode
+## This would break the cdc xlator if allowed!
+TEST $CLI volume set $V0 compress.mode client
+EXPECT 'server' volinfo_field $V0 'compress.mode'
+
+## Turn on compress.debug option
+## This will dump compressed data onto disk as gzip file
+## This is used to check if compression actually happened
+TEST $CLI volume set $V0 compress.debug on
+EXPECT 'on' volinfo_field $V0 'compress.debug'
+
+## Start the volume
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Mount FUSE with caching disabled
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0;
+
+####################
+## Testing writev ##
+####################
+
+## Create a 1K file locally and find the md5sum
+TEST dd if=/dev/zero of=/tmp/cdc-orig count=1 bs=1K 2>/dev/null
+checksum[original-file]=`md5sum /tmp/cdc-orig | cut -d' ' -f1`
+
+## Copy the file to mountpoint and find its md5sum on brick
+TEST dd if=/tmp/cdc-orig of=$M0/cdc-server count=1 bs=1K 2>/dev/null
+checksum[brick-file]=`md5sum $B0/${V0}1/cdc-server | cut -d' ' -f1`
+
+## Uncompress the gzip dump file and find its md5sum
+EXPECT '/tmp/cdcdump.gz: application/x-gzip; charset=binary' file -i /tmp/cdcdump.gz
+TEST gunzip -f /tmp/cdcdump.gz
+checksum[dump-file-writev]=`md5sum /tmp/cdcdump | cut -d' ' -f1`
+
+## Check if all 3 checksums are same
+TEST test ${checksum[original-file]} = ${checksum[brick-file]}
+TEST test ${checksum[brick-file]} = ${checksum[dump-file-writev]}
+
+## Cleanup files
+TEST rm -f /tmp/cdcdump.gz
+
+###################
+## Testing readv ##
+###################
+
+## Copy file from mount point to client and find checksum
+TEST dd if=$M0/cdc-server of=/tmp/cdc-client count=1 bs=1K 2>/dev/null
+checksum[client-file]=`md5sum /tmp/cdc-client | cut -d' ' -f1`
+
+## Uncompress the gzip dump file and find its md5sum
+EXPECT '/tmp/cdcdump.gz: application/x-gzip; charset=binary' file -i /tmp/cdcdump.gz
+TEST gunzip -f /tmp/cdcdump.gz
+checksum[dump-file-readv]=`md5sum /tmp/cdcdump | cut -d' ' -f1`
+
+## Check if all 3 checksums are same
+TEST test ${checksum[brick-file]} = ${checksum[client-file]}
+TEST test ${checksum[client-file]} = ${checksum[dump-file-readv]}
+
+## Cleanup files and unmount
+TEST rm -f /tmp/cdc* $M0/cdc*
+TEST umount $M0
+
+## Stop the volume
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+## Turn on compress.min-size and set it to 100 bytes
+## Compression should not take place if file size
+## is less than 100 bytes
+TEST $CLI volume set $V0 compress.min-size 100
+EXPECT '100' volinfo_field $V0 'compress.min-size'
+
+## Start the volume
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Mount FUSE with caching disabled
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0;
+
+## Create a file of size 99 bytes on mountpoint
+## This is should not be compressed
+TEST dd if=/dev/zero of=$M0/cdc-small count=1 bs=99 2>/dev/null
+TEST ! test -e /tmp/cdcdump.gz
+
+## Cleanup files and unmount
+TEST rm -f /tmp/cdc* $M0/cdc*
+TEST umount $M0
+
+## Reset the compress options
+TEST $CLI volume reset $V0 compress.debug
+TEST $CLI volume reset $V0 compress.min-size
+TEST $CLI volume reset $V0 compress.mode
+TEST $CLI volume reset $V0 features.compress
+
+## Stop the volume
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+## Delete the volume
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/basic/file-snapshot.t b/tests/basic/file-snapshot.t
new file mode 100755
index 000000000..36908192b
--- /dev/null
+++ b/tests/basic/file-snapshot.t
@@ -0,0 +1,56 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/$V0;
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST $CLI volume set $V0 features.file-snapshot on;
+
+TEST $CLI volume set $V0 performance.quick-read off;
+TEST $CLI volume set $V0 performance.io-cache off;
+TEST glusterfs -s $H0 --volfile-id $V0 $M0 --attribute-timeout=0;
+
+TEST touch $M0/big-file;
+
+TEST setfattr -n trusted.glusterfs.block-format -v qcow2:10GB $M0/big-file;
+
+TEST ls -al $M0 # test readdirplus
+TEST [ `stat -c '%s' $M0/big-file` = 10737418240 ]
+
+echo 'ABCDEFGHIJ' > $M0/data-file1
+TEST dd if=$M0/data-file1 of=$M0/big-file conv=notrunc;
+TEST setfattr -n trusted.glusterfs.block-snapshot-create -v image1 $M0/big-file;
+
+echo '1234567890' > $M0/data-file2
+TEST dd if=$M0/data-file2 of=$M0/big-file conv=notrunc;
+TEST setfattr -n trusted.glusterfs.block-snapshot-create -v image2 $M0/big-file;
+
+TEST setfattr -n trusted.glusterfs.block-snapshot-goto -v image1 $M0/big-file;
+TEST dd if=$M0/big-file of=$M0/out-file1 bs=11 count=1;
+
+TEST setfattr -n trusted.glusterfs.block-snapshot-goto -v image2 $M0/big-file;
+TEST dd if=$M0/big-file of=$M0/out-file2 bs=11 count=1;
+
+TEST cmp $M0/data-file1 $M0/out-file1;
+TEST cmp $M0/data-file2 $M0/out-file2;
+
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/basic/mgmt_v3-locks.t b/tests/basic/mgmt_v3-locks.t
new file mode 100755
index 000000000..22ca27b9f
--- /dev/null
+++ b/tests/basic/mgmt_v3-locks.t
@@ -0,0 +1,121 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../cluster.rc
+
+function check_peers {
+ $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+
+function volume_count {
+ local cli=$1;
+ if [ $cli -eq '1' ] ; then
+ $CLI_1 volume info | grep 'Volume Name' | wc -l;
+ else
+ $CLI_2 volume info | grep 'Volume Name' | wc -l;
+ fi
+}
+
+function volinfo_field()
+{
+ local vol=$1;
+ local field=$2;
+
+ $CLI_1 volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+function two_diff_vols_create {
+ # Both volume creates should be successful
+ $CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0 $H3:$B3/$V0 &
+ PID_1=$!
+
+ $CLI_2 volume create $V1 $H1:$B1/$V1 $H2:$B2/$V1 $H3:$B3/$V1 &
+ PID_2=$!
+
+ wait $PID_1 $PID_2
+}
+
+function two_diff_vols_start {
+ # Both volume starts should be successful
+ $CLI_1 volume start $V0 &
+ PID_1=$!
+
+ $CLI_2 volume start $V1 &
+ PID_2=$!
+
+ wait $PID_1 $PID_2
+}
+
+function two_diff_vols_stop_force {
+ # Force stop, so that if rebalance from the
+ # remove bricks is in progress, stop can
+ # still go ahead. Both volume stops should
+ # be successful
+ $CLI_1 volume stop $V0 force &
+ PID_1=$!
+
+ $CLI_2 volume stop $V1 force &
+ PID_2=$!
+
+ wait $PID_1 $PID_2
+}
+
+function same_vol_remove_brick {
+
+ # Running two same vol commands at the same time can result in
+ # two success', two failures, or one success and one failure, all
+ # of which are valid. The only thing that shouldn't happen is a
+ # glusterd crash.
+
+ local vol=$1
+ local brick=$2
+ $CLI_1 volume remove-brick $1 $2 start &
+ $CLI_2 volume remove-brick $1 $2 start
+}
+
+cleanup;
+
+TEST launch_cluster 3;
+TEST $CLI_1 peer probe $H2;
+TEST $CLI_1 peer probe $H3;
+
+EXPECT_WITHIN 20 2 check_peers
+
+two_diff_vols_create
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT 'Created' volinfo_field $V1 'Status';
+
+two_diff_vols_start
+EXPECT 'Started' volinfo_field $V0 'Status';
+EXPECT 'Started' volinfo_field $V1 'Status';
+
+same_vol_remove_brick $V0 $H2:$B2/$V0
+# Checking glusterd crashed or not after same volume remove brick
+# on both nodes.
+EXPECT_WITHIN 20 2 check_peers
+
+same_vol_remove_brick $V1 $H2:$B2/$V1
+# Checking glusterd crashed or not after same volume remove brick
+# on both nodes.
+EXPECT_WITHIN 20 2 check_peers
+
+$CLI_1 volume set $V0 diagnostics.client-log-level DEBUG &
+$CLI_1 volume set $V1 diagnostics.client-log-level DEBUG
+kill_glusterd 3
+$CLI_1 volume status $V0
+$CLI_2 volume status $V1
+$CLI_1 peer status
+EXPECT_WITHIN 20 1 check_peers
+EXPECT 'Started' volinfo_field $V0 'Status';
+EXPECT 'Started' volinfo_field $V1 'Status';
+
+TEST $glusterd_3
+$CLI_1 volume status $V0
+$CLI_2 volume status $V1
+$CLI_1 peer status
+#EXPECT_WITHIN 20 2 check_peers
+#EXPECT 'Started' volinfo_field $V0 'Status';
+#EXPECT 'Started' volinfo_field $V1 'Status';
+#two_diff_vols_stop_force
+#EXPECT_WITHIN 20 2 check_peers
+cleanup;
diff --git a/tests/basic/mount.t b/tests/basic/mount.t
new file mode 100755
index 000000000..90e522c5e
--- /dev/null
+++ b/tests/basic/mount.t
@@ -0,0 +1,78 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+
+## Start and create a volume
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 stripe 2 $H0:$B0/${V0}{1,2,3,4,5,6,7,8};
+
+function volinfo_field()
+{
+ local vol=$1;
+ local field=$2;
+
+ $CLI volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+
+## Make volume tightly consistent for metdata
+TEST $CLI volume set $V0 performance.stat-prefetch off;
+
+## Mount FUSE with caching disabled (read-write)
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0;
+
+## Check consistent "rw" option
+TEST 'mount -t fuse.glusterfs | grep -E "^$H0:$V0 .+ \(rw,"';
+TEST 'grep -E "^$H0:$V0 .+ ,?rw," /proc/mounts';
+
+## Mount FUSE with caching disabled (read-only)
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 --read-only -s $H0 --volfile-id $V0 $M1;
+
+## Check consistent "ro" option
+TEST 'mount -t fuse.glusterfs | grep -E "^$H0:$V0 .+ \(ro,"';
+TEST 'grep -E "^$H0:$V0 .+ ,?ro,.+" /proc/mounts';
+
+## Wait for volume to register with rpc.mountd
+sleep 5;
+
+## Mount NFS
+TEST mount -t nfs -o nolock,soft,intr $H0:/$V0 $N0;
+
+
+## Test for consistent views between NFS and FUSE mounts
+## write access to $M1 should fail
+TEST ! stat $M0/newfile;
+TEST ! touch $M1/newfile;
+TEST touch $M0/newfile;
+TEST stat $M1/newfile;
+TEST stat $N0/newfile;
+TEST ! rm $M1/newfile;
+TEST rm $N0/newfile;
+TEST ! stat $M0/newfile;
+TEST ! stat $M1/newfile;
+
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/basic/nufa.t b/tests/basic/nufa.t
new file mode 100644
index 000000000..0d4c229a0
--- /dev/null
+++ b/tests/basic/nufa.t
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 stripe 2 $H0:$B0/${V0}{1,2,3,4,5,6,7,8};
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '8' brick_count $V0
+
+TEST $CLI volume set $V0 nufa on;
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Mount FUSE with caching disabled (read-only)
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 --read-only -s $H0 --volfile-id $V0 $M1;
+
+## Wait for volume to register with rpc.mountd
+sleep 5;
+
+## Mount NFS
+TEST mount -t nfs -o nolock,soft,intr $H0:/$V0 $N0;
+
+cleanup;
diff --git a/tests/basic/posixonly.t b/tests/basic/posixonly.t
new file mode 100755
index 000000000..b9de317a4
--- /dev/null
+++ b/tests/basic/posixonly.t
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+TEST mkdir -p $B0/posixonly
+cat > $B0/posixonly.vol <<EOF
+volume poisxonly
+ type storage/posix
+ option directory $B0/posixonly
+end-volume
+EOF
+
+TEST glusterfs -f $B0/posixonly.vol $M0;
+
+TEST touch $M0/filename;
+TEST stat $M0/filename;
+TEST mkdir $M0/dirname;
+TEST stat $M0/dirname;
+TEST touch $M0/dirname/filename;
+TEST stat $M0/dirname/filename;
+TEST ln $M0/dirname/filename $M0/dirname/linkname;
+TEST chown 100:100 $M0/dirname/filename;
+TEST chown 100:100 $M0/dirname;
+TEST rm -rf $M0/filename $M0/dirname;
+
+TEST umount $M0;
+
+cleanup;
diff --git a/tests/basic/pump.t b/tests/basic/pump.t
new file mode 100644
index 000000000..3faf06f05
--- /dev/null
+++ b/tests/basic/pump.t
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+cd $M0
+for i in {1..3}
+do
+ for j in {1..10}
+ do
+ dd if=/dev/urandom of=file$j bs=128K count=10 2>/dev/null 1>/dev/null
+ done
+ mkdir dir$i && cd dir$i
+done
+cd
+TEST umount $M0
+TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1 start
+EXPECT_WITHIN 60 "Y" gd_is_replace_brick_completed $H0 $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1 commit
+TEST $CLI volume stop $V0
+TEST diff -r --exclude=.glusterfs $B0/${V0}0 $B0/${V0}1
+
+files=""
+
+cd $B0/${V0}0
+for f in `find . -path ./.glusterfs -prune -o -print`;
+do
+ if [ -d $f ]; then continue; fi
+ cmp $f $B0/${V0}1/$f
+ if [ $? -ne 0 ]; then
+ files="$files $f"
+ fi
+done
+
+EXPECT "" echo $files
+
+cleanup
diff --git a/tests/basic/quota.t b/tests/basic/quota.t
new file mode 100755
index 000000000..ef015a30d
--- /dev/null
+++ b/tests/basic/quota.t
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 stripe 2 $H0:$B0/${V0}{1,2,3,4,5,6,7,8};
+
+function limit_on()
+{
+ local QUOTA_PATH=$1;
+ $CLI volume quota $V0 list | grep "$QUOTA_PATH" | awk '{print $2}'
+}
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '8' brick_count $V0
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## ------------------------------
+## Verify quota commands
+## ------------------------------
+TEST $CLI volume quota $V0 enable
+
+TEST $CLI volume quota $V0 limit-usage /test_dir 100MB
+
+TEST $CLI volume quota $V0 limit-usage /test_dir/in_test_dir 150MB
+
+EXPECT "150MB" limit_on "/test_dir/in_test_dir";
+
+TEST $CLI volume quota $V0 remove /test_dir/in_test_dir
+
+EXPECT "100MB" limit_on "/test_dir";
+
+TEST $CLI volume quota $V0 disable
+## ------------------------------
+
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/basic/rpm.t b/tests/basic/rpm.t
new file mode 100755
index 000000000..a577726a8
--- /dev/null
+++ b/tests/basic/rpm.t
@@ -0,0 +1,109 @@
+#!/bin/bash
+#
+# This test will run mock and rebuild the srpm for the latest two EPEL version.
+# By default, the results and the chroots are deleted.
+#
+# When debugging is needed, make sure to set DEBUG=1 in the environment or this
+# script. When debugging is enabled, the resulting log files and chroots are
+# kept. With debugging enabled, this test will fail the regression test, and
+# all output is saved to rpmbuild-mock.log. Tests are run in parallel, so the
+# logfile may be difficult to read.
+#
+# chroots are configured in /etc/mock/*.cfg, with site-defaults.cfg as main
+# configuration file. The default for chroots is /var/lib/mock, but this
+# depends on the 'basedir' configuration option set in the mentioned files.
+#
+
+. $(dirname $0)/../include.rc
+
+# enable some extra debugging
+if [ -n "${DEBUG}" -a "${DEBUG}" != "0" ]
+then
+ exec &> rpmbuild-mock.log
+ set -x
+ MOCK_CLEANUP='--no-cleanup-after'
+else
+ MOCK_CLEANUP='--cleanup-after'
+fi
+
+# detect the branch we're based off
+if [ -n "${BRANCH}" ] ; then
+ # $BRANCH is set in the environment (by Jenkins or other)
+ GIT_PARENT="origin/${BRANCH}"
+else
+ # get a reference to the latest clean tree
+ GIT_PARENT=$(git describe --abbrev=0)
+fi
+
+# Filter out everything and what remains needs to be built
+BUILD_FILES=$(git diff --name-status ${GIT_PARENT} | grep -Ev '^M.*\.(c|h|py)' | awk {'print $2'})
+SELFTEST=$(grep -e 'tests/basic/rpm.t' <<< "${BUILD_FILES}")
+BUILD_FILES=$(grep -Ev '^tests/' <<< "${BUILD_FILES}")
+if [ -z "${BUILD_FILES}" -a -z "${SELFTEST}" ]
+then
+ # nothing affecting packaging changed, no need to retest rpmbuild
+ SKIP_TESTS
+ cleanup
+ exit 0
+fi
+
+# checkout the sources to a new directory to execute ./configure and all
+REPO=${PWD}
+COMMIT=$(git describe)
+mkdir rpmbuild-mock.d
+pushd rpmbuild-mock.d 2>/dev/null
+git clone -q -s file://${REPO} .
+git checkout -q -b rpm-test ${COMMIT}
+
+# build the glusterfs-*.tar.gz
+[ -e configure ] || ./autogen.sh 2>&1 > /dev/null
+TEST ./configure --enable-fusermount
+TEST make dist
+
+# build the glusterfs src.rpm
+ls extras
+TEST make -C extras/LinuxRPM testsrpm
+
+# build for the last two Fedora EPEL releases (x86_64 only)
+for MOCK_CONF in $(ls -x1 /etc/mock/*.cfg | egrep -e 'epel-[0-9]+-x86_64.cfg$' | tail -n2)
+do
+ EPEL_RELEASE=$(basename ${MOCK_CONF} .cfg)
+ # expand the mock command line
+ MOCK_CMD="/usr/bin/mock ${MOCK_CLEANUP} \
+ -r ${EPEL_RELEASE} --rebuild ${PWD}/*.src.rpm"
+
+ # write the mock command to a file, so that its easier to execute
+ cat << EOF > mock-${EPEL_RELEASE}.sh
+#!/bin/sh
+${MOCK_CMD}
+EOF
+ chmod +x mock-${EPEL_RELEASE}.sh
+
+ # root can not run 'mock', it needs to drop priviledges
+ if (groups | grep -q mock)
+ then
+ # the current user is in group 'mock'
+ ${PWD}/mock-${EPEL_RELEASE}.sh &
+ else
+ # "su" might not work, using sudo instead
+ sudo -u mock -E ${PWD}/mock-${EPEL_RELEASE}.sh &
+ fi
+ sleep 5
+done
+
+# TAP and Prove aren't smart about loops
+TESTS_EXPECTED_IN_LOOP=2
+for mockjob in $(jobs -p)
+do
+ TEST_IN_LOOP wait ${mockjob}
+done
+
+# we could build for the last two Fedora releases too, but that is not
+# possible on EPEL-5/6 installations, Fedora 17 and newer have unmet
+# dependencies on the build-server :-/
+
+popd 2>/dev/null
+# only remove rpmbuild-mock.d if we're not debugging
+[ "${DEBUG}" = "0" ] && rm -rf rpmbuild-mock.d
+
+cleanup
diff --git a/tests/basic/self-heald.t b/tests/basic/self-heald.t
new file mode 100644
index 000000000..4468c881b
--- /dev/null
+++ b/tests/basic/self-heald.t
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1,2,3,4,5}
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+TEST $CLI volume set $V0 cluster.eager-lock off
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST kill_brick $V0 $H0 $B0/${V0}4
+cd $M0
+HEAL_FILES=0
+for i in {1..10}
+do
+ dd if=/dev/urandom of=f bs=1M count=10 2>/dev/null
+ HEAL_FILES=$(($HEAL_FILES+1))
+ mkdir a; cd a;
+ HEAL_FILES=$(($HEAL_FILES+3)) #As many times as distribute subvols
+done
+HEAL_FILES=$(($HEAL_FILES + 3)) #Count the brick root dir
+
+cd ~
+EXPECT "$HEAL_FILES" afr_get_pending_heal_count $V0
+TEST ! $CLI volume heal $V0
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST ! $CLI volume heal $V0 info
+TEST ! $CLI volume heal $V0
+TEST $CLI volume start $V0 force
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN 20 "Y" glustershd_up_status
+EXPECT_WITHIN 20 "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN 20 "1" afr_child_up_status_in_shd $V0 2
+EXPECT_WITHIN 20 "1" afr_child_up_status_in_shd $V0 4
+TEST $CLI volume heal $V0
+sleep 5 #Until the heal-statistics command implementation
+#check that this heals the contents partially
+TEST [ $HEAL_FILES -gt $(afr_get_pending_heal_count $V0) ]
+
+TEST $CLI volume heal $V0 full
+EXPECT_WITHIN 30 "0" afr_get_pending_heal_count $V0
+cleanup
diff --git a/tests/basic/volume-snapshot.t b/tests/basic/volume-snapshot.t
new file mode 100755
index 000000000..35c748372
--- /dev/null
+++ b/tests/basic/volume-snapshot.t
@@ -0,0 +1,83 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../cluster.rc
+. $(dirname $0)/../snapshot.rc
+
+V1="patchy2"
+
+function create_volumes() {
+ $CLI_1 volume create $V0 $H1:$L1 &
+ PID_1=$!
+
+ $CLI_2 volume create $V1 $H2:$L2 $H3:$L3 &
+ PID_2=$!
+
+ wait $PID_1 $PID_2
+}
+
+function create_snapshots() {
+ $CLI_1 snapshot create ${V0}_snap ${V0}&
+ PID_1=$!
+
+ $CLI_1 snapshot create ${V1}_snap ${V1}&
+ PID_2=$!
+
+ wait $PID_1 $PID_2
+}
+
+function delete_snapshots() {
+ $CLI_1 snapshot delete ${V0}_snap &
+ PID_1=$!
+
+ $CLI_1 snapshot delete ${V1}_snap &
+ PID_2=$!
+
+ wait $PID_1 $PID_2
+}
+cleanup;
+
+#Create cluster with 3 nodes
+TEST launch_cluster 3;
+TEST setup_lvm 3
+
+TEST $CLI_1 peer probe $H2;
+TEST $CLI_1 peer probe $H3;
+EXPECT_WITHIN 20 2 peer_count;
+
+create_volumes
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT 'Created' volinfo_field $V1 'Status';
+
+start_volumes 2
+EXPECT 'Started' volinfo_field $V0 'Status';
+EXPECT 'Started' volinfo_field $V1 'Status';
+
+#Snapshot Operations
+create_snapshots
+TEST snapshot_exists $V0 ${V0}_snap
+TEST snapshot_exists $V1 ${V1}_snap
+
+TEST $CLI_1 snapshot config $V0 snap-max-hard-limit 100
+TEST $CLI_1 snapshot config $V1 snap-max-hard-limit 100
+
+TEST mount -t glusterfs $H1:/snaps/${V0}_snap/${V0} $M0
+TEST umount -f $M0
+TEST mount -t glusterfs $H2:/snaps/${V1}_snap/${V1} $M0
+TEST umount -f $M0
+
+#Clean up
+delete_snapshots
+TEST ! snapshot_exists $V0 ${V0}_snap
+TEST ! snapshot_exists $V1 ${V1}_snap
+
+stop_force_volumes 2
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+EXPECT 'Stopped' volinfo_field $V1 'Status';
+
+delete_volumes 2
+TEST ! volume_exists $V0
+TEST ! volume_exists $V1
+
+cleanup;
diff --git a/tests/basic/volume-status.t b/tests/basic/volume-status.t
new file mode 100644
index 000000000..f4196ac30
--- /dev/null
+++ b/tests/basic/volume-status.t
@@ -0,0 +1,66 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 stripe 2 $H0:$B0/${V0}{1,2,3,4,5,6,7,8};
+
+TEST $CLI volume start $V0;
+
+sleep 2
+
+## Mount FUSE
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+
+## Mount NFS
+TEST mount -t nfs -o vers=3,nolock,soft,intr $H0:/$V0 $N0;
+
+TEST $CLI volume status all
+TEST $CLI volume status $V0
+
+EXPECT_WITHIN 10 'Y' nfs_up_status
+EXPECT_WITHIN 10 'Y' glustershd_up_status
+function test_nfs_cmds () {
+ local ret=0
+ declare -a nfs_cmds=("clients" "mem" "inode" "callpool")
+ for cmd in ${nfs_cmds[@]}; do
+ $CLI volume status $V0 nfs $cmd
+ (( ret += $? ))
+ done
+ return $ret
+}
+
+function test_shd_cmds () {
+ local ret=0
+ declare -a shd_cmds=("mem" "inode" "callpool")
+ for cmd in ${shd_cmds[@]}; do
+ $CLI volume status $V0 shd $cmd
+ (( ret += $? ))
+ done
+ return $ret
+}
+
+function test_brick_cmds () {
+ local ret=0
+ declare -a cmds=("detail" "clients" "mem" "inode" "fd" "callpool")
+ for cmd in ${cmds[@]}; do
+ for i in {1..2}; do
+ $CLI volume status $V0 $H0:$B0/${V0}$i $cmd
+ (( ret += $? ))
+ done
+ done
+ return $ret
+}
+
+TEST test_shd_cmds;
+TEST test_nfs_cmds;
+TEST test_brick_cmds;
+
+cleanup;
+
diff --git a/tests/basic/volume.t b/tests/basic/volume.t
new file mode 100755
index 000000000..2f9096055
--- /dev/null
+++ b/tests/basic/volume.t
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 stripe 2 $H0:$B0/${V0}{1,2,3,4,5,6,7,8};
+
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '8' brick_count $V0
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}{9,10,11,12};
+EXPECT '12' brick_count $V0
+
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}{1,2,3,4};
+EXPECT '8' brick_count $V0
+
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/859927/repl.t b/tests/bugs/859927/repl.t
new file mode 100755
index 000000000..73c86e7be
--- /dev/null
+++ b/tests/bugs/859927/repl.t
@@ -0,0 +1,69 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd;
+TEST pidof glusterd
+
+#Tests for data-self-heal-algorithm option
+function create_setup_for_self_heal {
+ file=$1
+ kill_brick $V0 $H0 $B0/${V0}1
+ dd of=$file if=/dev/urandom bs=1M count=1 2>&1 > /dev/null
+ $CLI volume start $V0 force
+}
+
+function test_write {
+ dd of=$M0/a if=/dev/urandom bs=1k count=1 2>&1 > /dev/null
+}
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2};
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 client-log-level DEBUG
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $M0;
+
+touch $M0/a
+
+TEST $CLI volume set $V0 cluster.data-self-heal-algorithm full
+EXPECT full volume_option $V0 cluster.data-self-heal-algorithm
+create_setup_for_self_heal $M0/a
+EXPECT_WITHIN 20 "1" afr_child_up_status $V0 0
+ls -l $file 2>&1 > /dev/null
+TEST cmp $B0/${V0}1/a $B0/${V0}2/a
+
+TEST $CLI volume set $V0 cluster.data-self-heal-algorithm diff
+EXPECT diff volume_option $V0 cluster.data-self-heal-algorithm
+create_setup_for_self_heal $M0/a
+EXPECT_WITHIN 20 "1" afr_child_up_status $V0 0
+ls -l $file 2>&1 > /dev/null
+TEST cmp $B0/${V0}1/a $B0/${V0}2/a
+
+TEST $CLI volume reset $V0 cluster.data-self-heal-algorithm
+create_setup_for_self_heal $M0/a
+EXPECT_WITHIN 20 "1" afr_child_up_status $V0 0
+ls -l $file 2>&1 > /dev/null
+TEST cmp $B0/${V0}1/a $B0/${V0}2/a
+
+TEST ! $CLI volume set $V0 cluster.data-self-heal-algorithm ""
+
+#Tests for quorum-type option
+TEST ! $CLI volume set $V0 cluster.quorum-type ""
+TEST $CLI volume set $V0 cluster.quorum-type fixed
+EXPECT fixed volume_option $V0 cluster.quorum-type
+TEST $CLI volume set $V0 cluster.quorum-count 2
+kill_brick $V0 $H0 $B0/${V0}1
+TEST ! test_write
+TEST $CLI volume set $V0 cluster.quorum-type auto
+EXPECT auto volume_option $V0 cluster.quorum-type
+TEST ! test_write
+TEST $CLI volume set $V0 cluster.quorum-type none
+EXPECT none volume_option $V0 cluster.quorum-type
+TEST test_write
+TEST $CLI volume reset $V0 cluster.quorum-type
+TEST test_write
+cleanup;
diff --git a/tests/bugs/886998/strict-readdir.t b/tests/bugs/886998/strict-readdir.t
new file mode 100644
index 000000000..0de953e8a
--- /dev/null
+++ b/tests/bugs/886998/strict-readdir.t
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+function num_files_in_dir {
+ d=$1
+ ls $d | sort | uniq | wc -l
+}
+
+#Basic sanity tests for readdir functionality
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/r2d2_0 $H0:$B0/r2d2_1 $H0:$B0/r2d2_2 $H0:$B0/r2d2_3
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-server=$H0 --volfile-id=/$V0 $M0
+
+TEST touch $M0/{1..100}
+EXPECT "100" num_files_in_dir $M0
+
+TEST kill_brick $V0 $H0 $B0/r2d2_0
+TEST kill_brick $V0 $H0 $B0/r2d2_2
+EXPECT "100" num_files_in_dir $M0
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN 20 "1" afr_child_up_status $V0 0
+EXPECT_WITHIN 20 "1" afr_child_up_status $V0 2
+
+TEST kill_brick $V0 $H0 $B0/r2d2_1
+TEST kill_brick $V0 $H0 $B0/r2d2_3
+EXPECT "100" num_files_in_dir $M0
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN 20 "1" afr_child_up_status $V0 1
+EXPECT_WITHIN 20 "1" afr_child_up_status $V0 3
+
+TEST $CLI volume set $V0 cluster.strict-readdir on
+EXPECT "on" volinfo_field $V0 cluster.strict-readdir
+TEST kill_brick $V0 $H0 $B0/r2d2_0
+TEST kill_brick $V0 $H0 $B0/r2d2_2
+EXPECT "100" num_files_in_dir $M0
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN 20 "1" afr_child_up_status $V0 0
+EXPECT_WITHIN 20 "1" afr_child_up_status $V0 2
+
+TEST kill_brick $V0 $H0 $B0/r2d2_1
+TEST kill_brick $V0 $H0 $B0/r2d2_3
+EXPECT "100" num_files_in_dir $M0
+cleanup;
diff --git a/tests/bugs/949327.t b/tests/bugs/949327.t
new file mode 100644
index 000000000..7b0c5c51a
--- /dev/null
+++ b/tests/bugs/949327.t
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+function tmp_file_count()
+{
+echo $(ls -lh /tmp/tmp.* | wc -l)
+}
+
+
+old_count=$(tmp_file_count);
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+TEST $CLI volume start $V0
+new_count=$(tmp_file_count);
+
+TEST [ "$old_count" -eq "$new_count" ]
+
+cleanup
diff --git a/tests/bugs/bug-000000.t b/tests/bugs/bug-000000.t
new file mode 100755
index 000000000..7f3d15c9d
--- /dev/null
+++ b/tests/bugs/bug-000000.t
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+TEST glusterd
+
+cleanup;
diff --git a/tests/bugs/bug-1002207.t b/tests/bugs/bug-1002207.t
new file mode 100644
index 000000000..50b8c7d31
--- /dev/null
+++ b/tests/bugs/bug-1002207.t
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 stripe 2 $H0:$B0/${V0}{1,2,3,4,5,6,7,8};
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+dd if=/dev/zero of=$M0/file$i.data bs=1024 count=1024 1>/dev/null 2>&1
+
+function xattr_query_check()
+{
+ local path=$1
+ local xa_name=$2
+
+ local ret=`getfattr -m . -n $xa_name $path 2>&1 | grep -o "$xa_name: No such attribute" | wc -l`
+ echo $ret
+}
+
+function set_xattr()
+{
+ local path=$1
+ local xa_name=$2
+ local xa_val=$3
+
+ setfattr -n $xa_name -v $xa_val $path
+ echo $?
+}
+
+EXPECT 0 set_xattr $M0/file$i.data "trusted.name" "testofafairlylongxattrstringthatbutnotlongenoughtofailmemoryallocation"
+
+EXPECT 0 xattr_query_check $M0/file$i.data "trusted.name"
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
+
diff --git a/tests/bugs/bug-1002556.t b/tests/bugs/bug-1002556.t
new file mode 100755
index 000000000..a57f455d4
--- /dev/null
+++ b/tests/bugs/bug-1002556.t
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+EXPECT '1 x 2 = 2' volinfo_field $V0 'Number of Bricks';
+
+TEST $CLI volume add-brick $V0 replica 3 $H0:$B0/${V0}2
+EXPECT '1 x 3 = 3' volinfo_field $V0 'Number of Bricks';
+
+TEST $CLI volume remove-brick $V0 replica 2 $H0:$B0/${V0}1 force
+EXPECT '1 x 2 = 2' volinfo_field $V0 'Number of Bricks';
+
+TEST killall glusterd
+TEST glusterd
+
+EXPECT '1 x 2 = 2' volinfo_field $V0 'Number of Bricks';
+cleanup
diff --git a/tests/bugs/bug-1004218.t b/tests/bugs/bug-1004218.t
new file mode 100644
index 000000000..17eb3c65b
--- /dev/null
+++ b/tests/bugs/bug-1004218.t
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+# Test if only a single xml document is generated by 'status all'
+# when a volume is not started
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create ${V0}1 $H0:$B0/${V0}1{1,2}
+TEST $CLI volume create ${V0}2 $H0:$B0/${V0}2{1,2}
+
+TEST $CLI volume start ${V0}1
+
+function test_status_all ()
+{
+ $CLI volume status all --xml | xmllint -format -
+}
+
+TEST test_status_all
+
+TEST $CLI volume stop ${V0}1
+
+cleanup
diff --git a/tests/bugs/bug-1004744.t b/tests/bugs/bug-1004744.t
new file mode 100644
index 000000000..0290119ef
--- /dev/null
+++ b/tests/bugs/bug-1004744.t
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+#Test case: After a rebalance fix-layout, check if the rebalance status command
+#displays the appropriate message at the CLI.
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+#Basic checks
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+#Create a 2x1 distributed volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+TEST $CLI volume start $V0
+
+# Mount FUSE and create file/directory
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+for i in `seq 1 10`;
+do
+ mkdir $M0/dir_$i
+ echo file>$M0/dir_$i/file_$i
+ for j in `seq 1 100`;
+ do
+ mkdir $M0/dir_$i/dir_$j
+ echo file>$M0/dir_$i/dir_$j/file_$j
+ done
+done
+
+#add 2 bricks
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}{3,4};
+
+#perform rebalance fix-layout
+TEST $CLI volume rebalance $V0 fix-layout start
+
+EXPECT_WITHIN 1 "fix-layout in progress" rebalance_status_field $V0;
+
+EXPECT_WITHIN 20 "fix-layout completed" rebalance_status_field $V0;
+
+TEST umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/bug-1015990-rep.t b/tests/bugs/bug-1015990-rep.t
new file mode 100755
index 000000000..f59bb2f75
--- /dev/null
+++ b/tests/bugs/bug-1015990-rep.t
@@ -0,0 +1,81 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../afr.rc
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4};
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+
+
+
+TEST kill_brick $V0 $H0 $B0/$V0"1"
+sleep 5
+TEST kill_brick $V0 $H0 $B0/$V0"3"
+sleep 5
+
+for i in {1..100}; do echo "STRING" > $M0/File$i; done
+
+brick_2_sh_entries=$(count_sh_entries $B0/$V0"2")
+brick_4_sh_entries=$(count_sh_entries $B0/$V0"4")
+
+
+command_output=$(gluster volume heal $V0 statistics heal-count replica $H0:$B0/$V0"1")
+
+
+substring="Number of entries:"
+count=0
+while read -r line;
+do
+ if [[ "$line" == *$substring* ]]
+ then
+ value=$(echo $line | cut -f 2 -d :)
+ count=$(($count + $value))
+ fi
+
+done <<< "$command_output"
+
+brick_2_entries_count=$(($count-$value))
+
+EXPECT "0" echo $brick_2_entries_count
+
+brick_2_entries_count=$count
+
+
+xattrop_count_brick_2=$(count_sh_entries $B0/$V0"2")
+##Remove the count of the xattrop-gfid entry count as it does not contribute
+##to the number of files to be healed
+
+sub_val=1
+xattrop_count_brick_2=$(($xattrop_count_brick_2-$sub_val))
+
+ret=0
+if [ "$xattrop_count_brick_2" -eq "$brick_2_entries_count" ]
+ then
+ ret=$(($ret + $sub_val))
+fi
+
+EXPECT "1" echo $ret
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0
+
+cleanup;
diff --git a/tests/bugs/bug-1015990.t b/tests/bugs/bug-1015990.t
new file mode 100755
index 000000000..165af5168
--- /dev/null
+++ b/tests/bugs/bug-1015990.t
@@ -0,0 +1,95 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../afr.rc
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4};
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+
+
+
+TEST kill_brick $V0 $H0 $B0/$V0"1"
+sleep 5
+TEST kill_brick $V0 $H0 $B0/$V0"3"
+sleep 5
+
+for i in {1..100}; do echo "STRING" > $M0/File$i; done
+
+brick_2_sh_entries=$(count_sh_entries $B0/$V0"2")
+brick_4_sh_entries=$(count_sh_entries $B0/$V0"4")
+
+
+command_output=$(gluster volume heal $V0 statistics heal-count)
+
+
+substring="Number of entries:"
+count=0
+while read -r line;
+do
+ if [[ "$line" == *$substring* ]]
+ then
+ value=$(echo $line | cut -f 2 -d :)
+ count=$(($count + $value))
+ fi
+
+done <<< "$command_output"
+
+brick_2_entries_count=$(($count-$value))
+brick_4_entries_count=$value
+
+
+xattrop_count_brick_2=$(count_sh_entries $B0/$V0"2")
+##Remove the count of the xattrop-gfid entry count as it does not contribute
+##to the number of files to be healed
+
+sub_val=1
+xattrop_count_brick_2=$(($xattrop_count_brick_2-$sub_val))
+
+xattrop_count_brick_4=$(count_sh_entries $B0/$V0"4")
+##Remove xattrop-gfid entry count
+
+xattrop_count_brick_4=$(($xattrop_count_brick_4-$sub_val))
+
+
+ret=0
+if [ "$xattrop_count_brick_2" -eq "$brick_2_entries_count" ]
+ then
+ ret=$(($ret + $sub_val))
+fi
+
+EXPECT "1" echo $ret
+
+
+ret=0
+if [ "$xattrop_count_brick_4" -eq "$brick_4_entries_count" ]
+ then
+ ret=$(($ret + $sub_val))
+fi
+
+EXPECT "1" echo $ret
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0
+
+cleanup;
+
diff --git a/tests/bugs/bug-1022055.t b/tests/bugs/bug-1022055.t
new file mode 100755
index 000000000..c2f4218bb
--- /dev/null
+++ b/tests/bugs/bug-1022055.t
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../cluster.rc
+
+function check_peers {
+ $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+
+cleanup;
+
+TEST launch_cluster 2;
+
+TEST $CLI_1 peer probe $H2;
+
+EXPECT_WITHIN 20 1 check_peers;
+
+TEST $CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0;
+
+TEST $CLI_1 volume start $V0;
+
+TEST $CLI_1 volume log rotate $V0;
+
+TEST $CLI_1 volume status;
+
+cleanup;
diff --git a/tests/bugs/bug-1022905.t b/tests/bugs/bug-1022905.t
new file mode 100644
index 000000000..aef3395dd
--- /dev/null
+++ b/tests/bugs/bug-1022905.t
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+## Create a volume
+TEST glusterd;
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1};
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Volume start
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Enable a protected and a resettable/unprotected option
+TEST $CLI volume quota $V0 enable
+TEST $CLI volume set $V0 diagnostics.client-log-level DEBUG
+
+## Reset cmd resets only unprotected option(s), succeeds.
+TEST $CLI volume reset $V0;
+
+## Reset should fail
+TEST ! $CLI volume reset $V0;
+
+## Set an unprotected option
+TEST $CLI volume set $V0 diagnostics.client-log-level DEBUG
+
+## Now 1 protected and 1 unprotected options are set
+## Reset force should succeed
+TEST $CLI volume reset $V0 force;
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/bugs/bug-1030208.t b/tests/bugs/bug-1030208.t
new file mode 100644
index 000000000..866999692
--- /dev/null
+++ b/tests/bugs/bug-1030208.t
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+#Test case: Hardlink test
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+#Basic checks
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+#Create a distributed volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1..2};
+TEST $CLI volume start $V0
+
+# Mount FUSE
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+
+#Create a file and perform fop on a DIR
+TEST touch $M0/foo
+TEST ls $M0/
+
+#Create hardlink
+TEST ln $M0/foo $M0/bar
+
+
+TEST umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/bug-1040934.t b/tests/bugs/bug-1040934.t
new file mode 100644
index 000000000..3089d7ce1
--- /dev/null
+++ b/tests/bugs/bug-1040934.t
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../cluster.rc
+. $(dirname $0)/../snapshot.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST launch_cluster 2
+TEST setup_lvm 2
+
+TEST $CLI_1 peer probe $H2
+EXPECT_WITHIN 20 1 peer_count
+
+TEST $CLI_1 volume create $V0 replica 2 $H1:$L1 $H2:$L2
+EXPECT 'Created' volinfo_field $V0 'Status'
+
+TEST $CLI_1 volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+TEST $CLI_1 snapshot create ${V0}_snap ${V0}
+PID_1=$!
+wait $PID_1
+
+TEST snapshot_exists ${V0}_snap
+TEST mount -t glusterfs $H1:/snaps/${V0}_snap/$V0 $M0
+cd $M0
+TEST ! touch a
+
+TEST $CLI_1 snapshot delete ${V0}_snap
+PID_1=$!
+wait $PID_1
+
+TEST ! snapshot_exists ${V0}_snap
+
+cleanup;
diff --git a/tests/bugs/bug-1045333.t b/tests/bugs/bug-1045333.t
new file mode 100644
index 000000000..d1f8069e8
--- /dev/null
+++ b/tests/bugs/bug-1045333.t
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../snapshot.rc
+
+cleanup;
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST setup_lvm 1
+
+TEST $CLI volume create $V0 $H0:$L1
+TEST $CLI volume start $V0
+
+
+S1="${V0}-snap1" #Create snapshot with name contains hyphen(-)
+S2="-${V0}-snap2" #Create snapshot with name starts with hyphen(-)
+#Create snapshot with a long name
+S3="${V0}_single_gluster_volume_is_accessible_by_multiple_clients_offline_snapshot_is_a_long_name"
+
+TEST $CLI snapshot create $S1 $V0
+TEST snapshot_exists $S1
+
+TEST $CLI snapshot create $S2 $V0
+TEST snapshot_exists $S2
+
+TEST $CLI snapshot create $S3 $V0
+TEST snapshot_exists $S3
+
+
+TEST mount -t glusterfs $H0:/snaps/$S1/$V0 $M0
+TEST umount -f $M0
+
+TEST mount -t glusterfs $H0:/snaps/$S2/$V0 $M0
+TEST umount -f $M0
+
+TEST mount -t glusterfs $H0:/snaps/$S3/$V0 $M0
+TEST umount -f $M0
+
+#Clean up
+TEST $CLI snapshot delete $S1
+TEST $CLI snapshot delete $S2
+TEST $CLI snapshot delete $S3
+
+TEST $CLI volume stop $V0 force
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/bugs/bug-1049834.t b/tests/bugs/bug-1049834.t
new file mode 100755
index 000000000..6019a561c
--- /dev/null
+++ b/tests/bugs/bug-1049834.t
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../cluster.rc
+. $(dirname $0)/../snapshot.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST launch_cluster 2
+TEST setup_lvm 2
+
+TEST $CLI_1 peer probe $H2
+EXPECT_WITHIN 20 1 peer_count
+
+TEST $CLI_1 volume create $V0 $H1:$L1 $H2:$L2
+EXPECT 'Created' volinfo_field $V0 'Status'
+
+TEST $CLI_1 volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+#Setting the snap-max-hard-limit to 4
+TEST $CLI_1 snapshot config $V0 snap-max-hard-limit 4
+PID_1=$!
+wait $PID_1
+
+#Creating 4 snapshots on the volume
+TEST create_n_snapshots $V0 4 $V0_snap
+TEST snapshot_n_exists $V0 4 $V0_snap
+
+#Creating the 5th snapshots on the volume and expecting it not to be created.
+TEST ! $CLI_1 snapshot create ${V0}_snap5 ${V0}
+TEST ! snapshot_exists ${V0}_snap5
+TEST ! $CLI_1 snapshot delete ${V0}_snap5
+
+#Deleting the 4 snaps
+TEST delete_n_snapshots $V0 4 $V0_snap
+TEST ! snapshot_n_exists $V0 4 $V0_snap
+
+cleanup;
diff --git a/tests/bugs/bug-1064768.t b/tests/bugs/bug-1064768.t
new file mode 100755
index 000000000..b87168150
--- /dev/null
+++ b/tests/bugs/bug-1064768.t
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick0 $H0:$B0/brick1
+TEST $CLI volume start $V0
+EXPECT_WITHIN 15 'Started' volinfo_field $V0 'Status';
+
+TEST $CLI volume profile $V0 start
+TEST $CLI volume profile $V0 info
+TEST $CLI volume profile $V0 stop
+
+TEST $CLI volume status
+TEST $CLI volume stop $V0
+EXPECT_WITHIN 15 'Stopped' volinfo_field $V0 'Status';
+cleanup;
diff --git a/tests/bugs/bug-762989.t b/tests/bugs/bug-762989.t
new file mode 100755
index 000000000..1794693cc
--- /dev/null
+++ b/tests/bugs/bug-762989.t
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+## reserve port 1023
+older_ports=$(cat /proc/sys/net/ipv4/ip_local_reserved_ports);
+echo "1023" > /proc/sys/net/ipv4/ip_local_reserved_ports;
+
+## Start and create a volume
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 stripe 2 $H0:$B0/${V0}{1,2,3,4,5,6,7,8};
+
+TEST $CLI volume start $V0;
+
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 \
+$M0;
+
+## Wait for volume to register with rpc.mountd
+sleep 6;
+## check if port 1023 (which has been reserved) is used by the gluster processes
+op=$(netstat -ntp | grep gluster | grep -w 1023);
+EXPECT "" echo $op;
+
+#set the reserved ports to the older values
+echo $older_ports > /proc/sys/net/ipv4/ip_local_reserved_ports
+
+cleanup;
diff --git a/tests/bugs/bug-764638.t b/tests/bugs/bug-764638.t
new file mode 100644
index 000000000..816546524
--- /dev/null
+++ b/tests/bugs/bug-764638.t
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI pool list;
+TEST $CLI pool list --xml;
+
+cleanup;
diff --git a/tests/bugs/bug-765230.t b/tests/bugs/bug-765230.t
new file mode 100755
index 000000000..2012be5ad
--- /dev/null
+++ b/tests/bugs/bug-765230.t
@@ -0,0 +1,60 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 replica 2 stripe 2 $H0:$B0/${V0}{1,2,3,4,5,6,7,8};
+
+## Verify volume is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Setting quota-timeout as 20
+TEST ! $CLI volume set $V0 features.quota-timeout 20
+EXPECT '' volinfo_field $V0 'features.quota-timeout';
+
+## Enabling features.quota-deem-statfs
+TEST ! $CLI volume set $V0 features.quota-deem-statfs on
+EXPECT '' volinfo_field $V0 'features.quota-deem-statfs'
+
+## Enabling quota
+TEST $CLI volume quota $V0 enable
+EXPECT 'on' volinfo_field $V0 'features.quota'
+
+## Setting quota-timeout as 20
+TEST $CLI volume set $V0 features.quota-timeout 20
+EXPECT '20' volinfo_field $V0 'features.quota-timeout';
+
+## Enabling features.quota-deem-statfs
+TEST $CLI volume set $V0 features.quota-deem-statfs on
+EXPECT 'on' volinfo_field $V0 'features.quota-deem-statfs'
+
+## Disabling quota
+TEST $CLI volume quota $V0 disable
+EXPECT 'off' volinfo_field $V0 'features.quota'
+
+## Setting quota-timeout as 30
+TEST ! $CLI volume set $V0 features.quota-timeout 30
+EXPECT '20' volinfo_field $V0 'features.quota-timeout';
+
+## Disabling features.quota-deem-statfs
+TEST ! $CLI volume set $V0 features.quota-deem-statfs off
+EXPECT 'on' volinfo_field $V0 'features.quota-deem-statfs'
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/bug-765380.t b/tests/bugs/bug-765380.t
new file mode 100644
index 000000000..a9784b93d
--- /dev/null
+++ b/tests/bugs/bug-765380.t
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+REPLICA=2
+
+TEST $CLI volume create $V0 replica $REPLICA $H0:$B0/${V0}00 $H0:$B0/${V0}01 $H0:$B0/${V0}10 $H0:$B0/${V0}11
+TEST $CLI volume start $V0
+
+## Mount FUSE with caching disabled
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0;
+
+function count_hostname_or_uuid_from_pathinfo()
+{
+ pathinfo=`getfattr -m . -n trusted.glusterfs.pathinfo $M0/f00f`
+ echo $pathinfo | grep -o $1 | wc -l
+}
+
+touch $M0/f00f
+
+EXPECT $REPLICA count_hostname_or_uuid_from_pathinfo $H0
+
+# turn on node-uuid-pathinfo option
+TEST $CLI volume set $V0 node-uuid-pathinfo on
+
+# do not expext hostname as part of the pathinfo string
+EXPECT 0 count_hostname_or_uuid_from_pathinfo $H0
+
+uuid=`grep UUID /var/lib/glusterd/glusterd.info | cut -f2 -d=`
+
+# ... but expect the uuid $REPLICA times
+EXPECT $REPLICA count_hostname_or_uuid_from_pathinfo $uuid
+
+cleanup;
diff --git a/tests/bugs/bug-765473.t b/tests/bugs/bug-765473.t
new file mode 100755
index 000000000..5fc0ec9d7
--- /dev/null
+++ b/tests/bugs/bug-765473.t
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../fileio.rc
+
+cleanup;
+
+function clients_connected()
+{
+ volname=$1
+ gluster volume status $volname clients | grep -i 'Clients connected' | sed -e 's/[^0-9]*\(.*\)/\1/g'
+}
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1}
+TEST $CLI volume start $V0;
+
+TEST glusterfs --direct-io-mode=yes --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0;
+
+TEST fd=`fd_available`
+TEST fd_open $fd 'w' "$M0/testfile"
+TEST fd_write $fd "content"
+TEST $CLI volume stop $V0
+# write some content which will result in marking fd bad
+fd_write $fd "more content"
+TEST $CLI volume start $V0
+EXPECT_WITHIN 30 2 clients_connected $V0
+TEST ! fd_write $fd "still more content"
+
+cleanup
diff --git a/tests/bugs/bug-765564.t b/tests/bugs/bug-765564.t
new file mode 100644
index 000000000..0b8b8cd4f
--- /dev/null
+++ b/tests/bugs/bug-765564.t
@@ -0,0 +1,83 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+TEST glusterd
+TEST pidof glusterd
+
+## Start and create a volume
+mkdir -p ${B0}/${V0}-0
+mkdir -p ${B0}/${V0}-1
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}-{0,1}
+
+TEST $CLI volume set $V0 performance.io-cache off;
+TEST $CLI volume set $V0 performance.write-behind off;
+TEST $CLI volume set $V0 performance.stat-prefetch off
+
+TEST $CLI volume start $V0;
+
+## Mount native
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+
+#returns success if 'olddir' is absent
+#'olddir' must be absent in both replicas
+function rm_succeeded () {
+ local dir1=$1
+ [[ -d $H0:$B0/${V0}-0/$dir1 || -d $H0:$B0/${V0}-1/$dir1 ]] && return 0
+ return 1
+}
+
+# returns successes if 'newdir' is present
+#'newdir' must be present in both replicas
+function mv_succeeded () {
+ local dir1=$1
+ [[ -d $H0:$B0/${V0}-0/$dir1 && -d $H0:$B0/${V0}-1/$dir1 ]] && return 1
+ return 0
+}
+
+# returns zero on success
+# Only one of rm and mv can succeed. This is captured by the XOR below
+
+function chk_backend_consistency(){
+ local dir1=$1
+ local dir2=$2
+ local rm_status=rm_succeeded $dir1
+ local mv_status=mv_succeeded $dir2
+ [[ ( $rm_status && ! $mv_status ) || ( ! $rm_status && $mv_status ) ]] && return 0
+ return 1
+}
+
+#concurrent removal/rename of dirs
+function rm_mv_correctness () {
+ ret=0
+ for i in {1..100}; do
+ mkdir $M0/"dir"$i
+ rmdir $M0/"dir"$i &
+ mv $M0/"dir"$i $M0/"adir"$i &
+ wait
+ tmp_ret=$(chk_backend_consistency "dir"$i "adir"$i)
+ (( ret += tmp_ret ))
+ rm -rf $M0/"dir"$i
+ rm -rf $M0/"adir"$i
+ done
+ return $ret
+}
+
+TEST touch $M0/a;
+TEST mv $M0/a $M0/b;
+
+#test rename fop when one of the bricks is down
+kill_brick ${V0} ${H0} ${B0}/${V0}-1;
+TEST touch $M0/h;
+TEST mv $M0/h $M0/1;
+
+TEST $CLI volume start $V0 force;
+
+EXPECT_WITHIN 20 "1" afr_child_up_status $V0 1;
+find $M0 | xargs stat 2>/dev/null 1>/dev/null;
+
+TEST rm_mv_correctness;
+TEST umount $M0;
+cleanup;
+
diff --git a/tests/bugs/bug-767095.t b/tests/bugs/bug-767095.t
new file mode 100755
index 000000000..a8842bd54
--- /dev/null
+++ b/tests/bugs/bug-767095.t
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 stripe 2 $H0:$B0/${V0}{1,2,3,4,5,6,7,8};
+
+function volinfo_field()
+{
+ local vol=$1;
+ local field=$2;
+
+ $CLI volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+dump_dir='/tmp/gerrit_glusterfs'
+TEST mkdir -p $dump_dir;
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+TEST $CLI volume set $V0 error-gen posix;
+TEST $CLI volume set $V0 server.statedump-path $dump_dir;
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST PID=`gluster volume status $V0 | grep patchy1 | awk {'print $5'}`;
+TEST kill -USR1 $PID;
+sleep 2;
+for file_name in $(ls $dump_dir)
+do
+ TEST grep "error-gen.priv" $dump_dir/$file_name;
+done
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+TEST rm -rf $dump_dir;
+
+cleanup;
diff --git a/tests/bugs/bug-767585-gfid.t b/tests/bugs/bug-767585-gfid.t
new file mode 100755
index 000000000..49cf7423f
--- /dev/null
+++ b/tests/bugs/bug-767585-gfid.t
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+#Test cases to perform gfid-self-heal
+#file 'a' should be assigned a fresh gfid
+#file 'b' should be healed with gfid1 from brick1
+#file 'c' should be healed with gfid2 from brick2
+
+gfid1="0x8428b7193a764bf8be8046fb860b8993"
+gfid2="0x85ad91afa2f74694bf52c3326d048209"
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 --direct-io-mode=enable
+touch $B0/${V0}0/a $B0/${V0}1/a
+touch $B0/${V0}0/b $B0/${V0}1/b
+touch $B0/${V0}0/c $B0/${V0}1/c
+
+TEST setfattr -n trusted.gfid -v $gfid1 $B0/${V0}0/b
+TEST setfattr -n trusted.gfid -v $gfid2 $B0/${V0}1/c
+
+sleep 2
+
+cd $M0
+TEST ls -l a
+TEST ls -l b
+TEST ls -l c
+
+TEST gf_get_gfid_xattr $B0/${V0}0/a
+TEST gf_get_gfid_xattr $B0/${V0}1/a
+
+EXPECT "$gfid1" gf_get_gfid_xattr $B0/${V0}0/b
+EXPECT "$gfid1" gf_get_gfid_xattr $B0/${V0}1/b
+
+EXPECT "$gfid2" gf_get_gfid_xattr $B0/${V0}0/c
+EXPECT "$gfid2" gf_get_gfid_xattr $B0/${V0}1/c
+
+cleanup;
diff --git a/tests/bugs/bug-770655.t b/tests/bugs/bug-770655.t
new file mode 100755
index 000000000..945e323bb
--- /dev/null
+++ b/tests/bugs/bug-770655.t
@@ -0,0 +1,168 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+## Start and create a distribute-replicate volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4,5,6,7,8};
+
+## Verify volume is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT 'Distributed-Replicate' volinfo_field $V0 'Type';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Setting stripe-block-size as 10MB
+TEST ! $CLI volume set $V0 stripe-block-size 10MB
+EXPECT '' volinfo_field $V0 'cluster.stripe-block-size';
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
+
+## Start and create a replicate volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 replica 8 $H0:$B0/${V0}{1,2,3,4,5,6,7,8};
+
+## Verify volume is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT 'Replicate' volinfo_field $V0 'Type';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Setting stripe-block-size as 10MB
+TEST ! $CLI volume set $V0 stripe-block-size 10MB
+EXPECT '' volinfo_field $V0 'cluster.stripe-block-size';
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
+
+## Start and create a distribute volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2,3,4,5,6,7,8};
+
+## Verify volume is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT 'Distribute' volinfo_field $V0 'Type';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Setting stripe-block-size as 10MB
+TEST ! $CLI volume set $V0 stripe-block-size 10MB
+EXPECT '' volinfo_field $V0 'cluster.stripe-block-size';
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
+
+## Start and create a stripe volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 stripe 8 $H0:$B0/${V0}{1,2,3,4,5,6,7,8};
+
+## Verify volume is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT 'Stripe' volinfo_field $V0 'Type';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Setting stripe-block-size as 10MB
+TEST $CLI volume set $V0 stripe-block-size 10MB
+EXPECT '10MB' volinfo_field $V0 'cluster.stripe-block-size';
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
+
+## Start and create a distributed stripe volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 stripe 4 $H0:$B0/${V0}{1,2,3,4,5,6,7,8};
+
+## Verify volume is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT 'Distributed-Stripe' volinfo_field $V0 'Type';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Setting stripe-block-size as 10MB
+TEST $CLI volume set $V0 stripe-block-size 10MB
+EXPECT '10MB' volinfo_field $V0 'cluster.stripe-block-size';
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
+
+## Start and create a distributed stripe replicate volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 stripe 2 replica 2 $H0:$B0/${V0}{1,2,3,4,5,6,7,8};
+
+## Verify volume is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT 'Distributed-Striped-Replicate' volinfo_field $V0 'Type';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Setting stripe-block-size as 10MB
+TEST $CLI volume set $V0 stripe-block-size 10MB
+EXPECT '10MB' volinfo_field $V0 'cluster.stripe-block-size';
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/bug-782095.t b/tests/bugs/bug-782095.t
new file mode 100755
index 000000000..a0cea14ee
--- /dev/null
+++ b/tests/bugs/bug-782095.t
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 replica 2 stripe 2 $H0:$B0/${V0}{1,2,3,4,5,6,7,8};
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Setting performance cache min size as 2MB
+TEST $CLI volume set $V0 performance.cache-min-file-size 2MB
+EXPECT '2MB' volinfo_field $V0 'performance.cache-min-file-size';
+
+## Setting performance cache max size as 20MB
+TEST $CLI volume set $V0 performance.cache-max-file-size 20MB
+EXPECT '20MB' volinfo_field $V0 'performance.cache-max-file-size';
+
+## Trying to set performance cache min size as 25MB
+TEST ! $CLI volume set $V0 performance.cache-min-file-size 25MB
+EXPECT '2MB' volinfo_field $V0 'performance.cache-min-file-size';
+
+## Able to set performance cache min size as long as its lesser than max size
+TEST $CLI volume set $V0 performance.cache-min-file-size 15MB
+EXPECT '15MB' volinfo_field $V0 'performance.cache-min-file-size';
+
+## Trying it out with only cache-max-file-size in CLI as 10MB
+TEST ! $CLI volume set $V0 cache-max-file-size 10MB
+EXPECT '20MB' volinfo_field $V0 'performance.cache-max-file-size';
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/bug-797171.t b/tests/bugs/bug-797171.t
new file mode 100755
index 000000000..a1b28d9ff
--- /dev/null
+++ b/tests/bugs/bug-797171.t
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+TEST $CLI volume set $V0 debug.trace marker;
+TEST $CLI volume set $V0 debug.log-history on
+
+TEST $CLI volume start $V0;
+
+sleep 1;
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 \
+$M0;
+
+sleep 5;
+
+touch $M0/{1..22};
+rm -f $M0/*;
+
+pid_file=$(ls /var/lib/glusterd/vols/$V0/run);
+brick_pid=$(cat /var/lib/glusterd/vols/$V0/run/$pid_file);
+
+mkdir $statedumpdir/statedump_tmp/;
+echo "path=$statedumpdir/statedump_tmp" > $statedumpdir/glusterdump.options;
+echo "all=yes" >> $statedumpdir/glusterdump.options;
+
+TEST $CLI volume statedump $V0 history;
+
+file_name=$(ls $statedumpdir/statedump_tmp);
+TEST grep "xlator.debug.trace.history" $statedumpdir/statedump_tmp/$file_name;
+
+TEST umount $M0
+
+rm -rf $statedumpdir/statedump_tmp;
+rm -f $statedumpdir/glusterdump.options;
+
+cleanup;
diff --git a/tests/bugs/bug-802417.t b/tests/bugs/bug-802417.t
new file mode 100755
index 000000000..314141f6b
--- /dev/null
+++ b/tests/bugs/bug-802417.t
@@ -0,0 +1,108 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+function write_file()
+{
+ path="$1"; shift
+ echo "$*" > "$path"
+}
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+## Start and create a volume
+mkdir -p ${B0}/${V0}-0
+mkdir -p ${B0}/${V0}-1
+mkdir -p ${B0}/${V0}-2
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}-{0,1,2}
+
+## Verify volume is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Make sure io-cache and write-behind don't interfere.
+TEST $CLI volume set $V0 performance.io-cache off;
+TEST $CLI volume set $V0 performance.write-behind off;
+TEST $CLI volume set $V0 performance.stat-prefetch off
+
+## Make sure automatic self-heal doesn't perturb our results.
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 cluster.data-self-heal on
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Mount native
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+
+## Create a file with some recognizably stale data.
+TEST write_file $M0/a_file "old_data"
+
+## Kill two of the bricks and write some newer data.
+TEST kill_brick ${V0} ${H0} ${B0}/${V0}-1
+TEST kill_brick ${V0} ${H0} ${B0}/${V0}-2
+TEST write_file $M0/a_file "new_data"
+
+## Bring all the bricks up and kill one so we do a partial self-heal.
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN 20 "1" afr_child_up_status $V0 0
+EXPECT_WITHIN 20 "1" afr_child_up_status $V0 1
+EXPECT_WITHIN 20 "1" afr_child_up_status $V0 2
+TEST kill_brick ${V0} ${H0} ${B0}/${V0}-2
+TEST ls -l ${M0}/a_file
+
+
+obs_path_0=${B0}/${V0}-0/a_file
+obs_path_1=${B0}/${V0}-1/a_file
+obs_path_2=${B0}/${V0}-2/a_file
+
+tgt_xattr_0="trusted.afr.${V0}-client-0"
+tgt_xattr_1="trusted.afr.${V0}-client-1"
+tgt_xattr_2="trusted.afr.${V0}-client-2"
+
+actual=$(afr_get_changelog_xattr $obs_path_0 $tgt_xattr_0)
+EXPECT "0x000000000000000000000000" echo $actual
+
+actual=$(afr_get_changelog_xattr $obs_path_0 $tgt_xattr_1)
+EXPECT "0x000000000000000000000000" echo $actual
+
+actual=$(afr_get_changelog_xattr $obs_path_0 $tgt_xattr_2)
+EXPECT "0x000000020000000000000000" echo $actual
+
+actual=$(afr_get_changelog_xattr $obs_path_1 $tgt_xattr_0)
+EXPECT "0x000000000000000000000000" echo $actual
+
+actual=$(afr_get_changelog_xattr $obs_path_1 $tgt_xattr_1)
+EXPECT "0x000000000000000000000000" echo $actual
+
+actual=$(afr_get_changelog_xattr $obs_path_1 $tgt_xattr_2)
+EXPECT "0x000000020000000000000000" echo $actual
+
+actual=$(afr_get_changelog_xattr $obs_path_2 $tgt_xattr_0)
+EXPECT "0x000000000000000000000000" echo $actual
+
+actual=$(afr_get_changelog_xattr $obs_path_2 $tgt_xattr_1)
+EXPECT "0x000000000000000000000000" echo $actual
+
+actual=$(afr_get_changelog_xattr $obs_path_2 $tgt_xattr_2)
+EXPECT "0x000000000000000000000000" echo $actual
+
+if [ "$EXIT_EARLY" = "1" ]; then
+ exit 0;
+fi
+
+## Finish up
+TEST umount $M0;
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/bug-808400-dist.t b/tests/bugs/bug-808400-dist.t
new file mode 100755
index 000000000..6a29eb626
--- /dev/null
+++ b/tests/bugs/bug-808400-dist.t
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/brick1 $H0:$B0/brick2;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+MOUNTDIR=$M0;
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 --volfile-server=$H0 --volfile-id=$V0 $MOUNTDIR;
+
+build_tester $(dirname $0)/bug-808400-flock.c
+build_tester $(dirname $0)/bug-808400-fcntl.c
+
+TEST $(dirname $0)/bug-808400-flock $MOUNTDIR/testfile \'gluster volume set $V0 performance.write-behind off\'
+TEST $(dirname $0)/bug-808400-fcntl $MOUNTDIR/testfile \'gluster volume set $V0 performance.write-behind on\'
+
+TEST rm -rf $MOUNTDIR/*
+TEST rm -rf $(dirname $0)/bug-808400-flock $(dirname $0)/bug-808400-fcntl $(dirname $0)/glusterfs.log
+
+TEST umount $MOUNTDIR -l
+
+cleanup; \ No newline at end of file
diff --git a/tests/bugs/bug-808400-fcntl.c b/tests/bugs/bug-808400-fcntl.c
new file mode 100644
index 000000000..4deef34a5
--- /dev/null
+++ b/tests/bugs/bug-808400-fcntl.c
@@ -0,0 +1,113 @@
+#include <sys/file.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/wait.h>
+
+int
+run_child (char *filename)
+{
+ int fd = -1, ret = -1;
+ struct flock lock = {0, };
+ int ppid = 0;
+
+ fd = open (filename, O_RDWR);
+ if (fd < 0) {
+ fprintf (stderr, "open failed (%s)\n", strerror (errno));
+ goto out;
+ }
+
+ ppid = getppid ();
+
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+
+ ret = fcntl (fd, F_GETLK, &lock);
+ if (ret < 0) {
+ fprintf (stderr, "GETLK failed (%s)\n", strerror (errno));
+ goto out;
+ }
+
+ if ((lock.l_type == F_UNLCK) ||
+ (ppid != lock.l_pid)) {
+ fprintf (stderr, "no locks present, though parent has held "
+ "one\n");
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+main (int argc, char *argv[])
+{
+ int fd = -1, ret = -1, status = 0;
+ char *filename = NULL, *cmd = NULL;
+ struct stat stbuf = {0, };
+ struct flock lock = {0, };
+
+ if (argc != 3) {
+ fprintf (stderr, "Usage: %s <filename> "
+ "<gluster-cmd-to-trigger-graph-switch>\n", argv[0]);
+ goto out;
+ }
+
+ filename = argv[1];
+ cmd = argv[2];
+
+ fd = open (filename, O_RDWR | O_CREAT, 0);
+ if (fd < 0) {
+ fprintf (stderr, "open (%s) failed (%s)\n", filename,
+ strerror (errno));
+ goto out;
+ }
+
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+
+ ret = fcntl (fd, F_SETLK, &lock);
+ if (ret < 0) {
+ fprintf (stderr, "fcntl failed (%s)\n", strerror (errno));
+ goto out;
+ }
+
+ system (cmd);
+
+ /* wait till graph switch completes */
+ ret = fstat64 (fd, &stbuf);
+ if (ret < 0) {
+ fprintf (stderr, "fstat64 failure (%s)\n", strerror (errno));
+ goto out;
+ }
+
+ sleep (10);
+
+ /* By now old-graph would be disconnected and locks should be cleaned
+ * up if they are not migrated. Check that by trying to acquire a lock
+ * on a new fd opened by another process on same file.
+ */
+ ret = fork ();
+ if (ret == 0) {
+ ret = run_child (filename);
+ } else {
+ wait (&status);
+ if (WIFEXITED(status)) {
+ ret = WEXITSTATUS(status);
+ } else {
+ ret = 0;
+ }
+ }
+
+out:
+ return ret;
+}
diff --git a/tests/bugs/bug-808400-flock.c b/tests/bugs/bug-808400-flock.c
new file mode 100644
index 000000000..4770c81dc
--- /dev/null
+++ b/tests/bugs/bug-808400-flock.c
@@ -0,0 +1,92 @@
+#include <sys/file.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/wait.h>
+
+int
+run_child (char *filename)
+{
+ int fd = -1, ret = -1;
+
+ fd = open (filename, O_RDWR);
+ if (fd < 0) {
+ fprintf (stderr, "open failed (%s)\n", strerror (errno));
+ goto out;
+ }
+
+ ret = flock (fd, LOCK_EX | LOCK_NB);
+ if ((ret == 0) || (errno != EWOULDBLOCK)) {
+ fprintf (stderr, "no locks present, though parent has held "
+ "one\n");
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+main (int argc, char *argv[])
+{
+ int fd = -1, ret = -1, status = 0;
+ char *filename = NULL, *cmd = NULL;
+ struct stat stbuf = {0, };
+
+ if (argc != 3) {
+ fprintf (stderr, "Usage: %s <filename> "
+ "<gluster-cmd-to-trigger-graph-switch>\n", argv[0]);
+ goto out;
+ }
+
+ filename = argv[1];
+ cmd = argv[2];
+
+ fd = open (filename, O_RDWR | O_CREAT, 0);
+ if (fd < 0) {
+ fprintf (stderr, "open (%s) failed (%s)\n", filename,
+ strerror (errno));
+ goto out;
+ }
+
+ ret = flock (fd, LOCK_EX);
+ if (ret < 0) {
+ fprintf (stderr, "flock failed (%s)\n", strerror (errno));
+ goto out;
+ }
+
+ system (cmd);
+
+ /* wait till graph switch completes */
+ ret = fstat64 (fd, &stbuf);
+ if (ret < 0) {
+ fprintf (stderr, "fstat64 failure (%s)\n", strerror (errno));
+ goto out;
+ }
+
+ sleep (10);
+
+ /* By now old-graph would be disconnected and locks should be cleaned
+ * up if they are not migrated. Check that by trying to acquire a lock
+ * on a new fd opened by another process on same file
+ */
+ ret = fork ();
+ if (ret == 0) {
+ ret = run_child (filename);
+ } else {
+ wait (&status);
+ if (WIFEXITED(status)) {
+ ret = WEXITSTATUS(status);
+ } else {
+ ret = 0;
+ }
+ }
+
+out:
+ return ret;
+}
diff --git a/tests/bugs/bug-808400-repl.t b/tests/bugs/bug-808400-repl.t
new file mode 100755
index 000000000..69cd9379b
--- /dev/null
+++ b/tests/bugs/bug-808400-repl.t
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick1 $H0:$B0/brick2;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+MOUNTDIR=$M0;
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 --volfile-server=$H0 --volfile-id=$V0 $MOUNTDIR;
+
+build_tester $(dirname $0)/bug-808400-flock.c
+build_tester $(dirname $0)/bug-808400-fcntl.c
+
+TEST $(dirname $0)/bug-808400-flock $MOUNTDIR/testfile \'gluster volume set $V0 performance.write-behind off\'
+TEST $(dirname $0)/bug-808400-fcntl $MOUNTDIR/testfile \'gluster volume set $V0 performance.write-behind on\'
+
+TEST rm -rf $MOUNTDIR/*
+TEST rm -rf $(dirname $0)/bug-808400-flock $(dirname $0)/bug-808400-fcntl $(dirname $0)/glusterfs.log
+
+TEST umount $MOUNTDIR -l
+
+cleanup; \ No newline at end of file
diff --git a/tests/bugs/bug-808400-stripe.t b/tests/bugs/bug-808400-stripe.t
new file mode 100755
index 000000000..3ab6f738e
--- /dev/null
+++ b/tests/bugs/bug-808400-stripe.t
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 stripe 2 $H0:$B0/brick1 $H0:$B0/brick2;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+MOUNTDIR=$M0;
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 --volfile-server=$H0 --volfile-id=$V0 $MOUNTDIR;
+
+build_tester $(dirname $0)/bug-808400-flock.c
+build_tester $(dirname $0)/bug-808400-fcntl.c
+
+TEST $(dirname $0)/bug-808400-flock $MOUNTDIR/testfile \'gluster volume set $V0 performance.write-behind off\'
+TEST $(dirname $0)/bug-808400-fcntl $MOUNTDIR/testfile \'gluster volume set $V0 performance.write-behind on\'
+
+TEST rm -rf $MOUNTDIR/*
+TEST rm -rf $(dirname $0)/bug-808400-flock $(dirname $0)/bug-808400-fcntl $(dirname $0)/glusterfs.log
+
+TEST umount $MOUNTDIR -l
+
+cleanup; \ No newline at end of file
diff --git a/tests/bugs/bug-808400.t b/tests/bugs/bug-808400.t
new file mode 100755
index 000000000..49d88afd6
--- /dev/null
+++ b/tests/bugs/bug-808400.t
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+#mount on a random dir
+TEST MOUNTDIR="/tmp/$RANDOM"
+TEST mkdir $MOUNTDIR
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 --volfile-server=$H0 --volfile-id=$V0 $MOUNTDIR;
+
+build_tester $(dirname $0)/bug-808400-flock.c
+build_tester $(dirname $0)/bug-808400-fcntl.c
+
+TEST $(dirname $0)/bug-808400-flock $MOUNTDIR/testfile \'gluster volume set $V0 performance.write-behind off\'
+TEST $(dirname $0)/bug-808400-fcntl $MOUNTDIR/testfile \'gluster volume set $V0 performance.write-behind on\'
+
+TEST rm -rf $MOUNTDIR/*
+TEST rm -rf $(dirname $0)/bug-808400-flock $(dirname $0)/bug-808400-fcntl $(dirname $0)/glusterfs.log
+
+TEST umount $MOUNTDIR -l
+TEST rm -rf $MOUNTDIR
+
+cleanup; \ No newline at end of file
diff --git a/tests/bugs/bug-811493.t b/tests/bugs/bug-811493.t
new file mode 100755
index 000000000..13e99af57
--- /dev/null
+++ b/tests/bugs/bug-811493.t
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI system uuid reset;
+
+uuid1=$(grep UUID /var/lib/glusterd/glusterd.info | cut -f 2 -d "=");
+
+TEST $CLI system uuid reset;
+uuid2=$(grep UUID /var/lib/glusterd/glusterd.info | cut -f 2 -d "=");
+
+TEST [ $uuid1 != $uuid2 ]
+
+cleanup
diff --git a/tests/bugs/bug-821056.t b/tests/bugs/bug-821056.t
new file mode 100644
index 000000000..5e81541ac
--- /dev/null
+++ b/tests/bugs/bug-821056.t
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 eager-lock off
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.write-behind on
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $M0 --direct-io-mode=enable
+touch $M0/a
+
+#Open file with fd as 5
+exec 5>$M0/a
+realpath=$(gf_get_gfid_backend_file_path $B0/${V0}0 "a")
+
+kill_brick $V0 $H0 $B0/${V0}0
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN 20 "1" afr_child_up_status $V0 0
+
+EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 "$realpath"
+
+kill_brick $V0 $H0 $B0/${V0}0
+TEST gf_rm_file_and_gfid_link $B0/${V0}0 "a"
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN 20 "1" afr_child_up_status $V0 0
+ls -l $M0/a 2>&1 > /dev/null #Make sure the file is re-created
+EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 "$realpath"
+EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $B0/${V0}0/a
+
+for i in {1..1024}; do
+ echo "open sesame" >&5
+done
+
+EXPECT_WITHIN 20 "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $B0/${V0}0/a
+#close the fd
+exec 5>&-
+
+#Check that anon-fd based file is not leaking.
+EXPECT_WITHIN 20 "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 "$realpath"
+cleanup;
diff --git a/tests/bugs/bug-822830.t b/tests/bugs/bug-822830.t
new file mode 100755
index 000000000..000d99f03
--- /dev/null
+++ b/tests/bugs/bug-822830.t
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 replica 2 stripe 2 $H0:$B0/${V0}{1,2,3,4,5,6,7,8};
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Setting nfs.rpc-auth-reject as 192.*..*
+TEST ! $CLI volume set $V0 nfs.rpc-auth-reject 192.*..*
+EXPECT '' volinfo_field $V0 'nfs.rpc-auth-reject';
+
+# Setting nfs.rpc-auth-allow as a.a.
+TEST ! $CLI volume set $V0 nfs.rpc-auth-allow a.a.
+EXPECT '' volinfo_field $V0 'nfs.rpc-auth-allow';
+
+# Setting nfs.rpc-auth-allow as a.a
+TEST $CLI volume set $V0 nfs.rpc-auth-allow a.a
+EXPECT 'a.a' volinfo_field $V0 'nfs.rpc-auth-allow';
+
+## Setting nfs.rpc-auth-reject as 192.*.*
+TEST $CLI volume set $V0 nfs.rpc-auth-reject 192.*.*
+EXPECT '192.*.*' volinfo_field $V0 'nfs.rpc-auth-reject';
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/bug-823081.t b/tests/bugs/bug-823081.t
new file mode 100755
index 000000000..760d9e2b6
--- /dev/null
+++ b/tests/bugs/bug-823081.t
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+V1=patchy2
+
+TEST glusterd
+TEST pidof glusterd
+
+logdir=`gluster --print-logdir`
+function set_tail ()
+{
+ vol=$1;
+ tail_success="volume create $vol $H0:$B0/${vol}1 $H0:$B0/${vol}2 : SUCCESS"
+ tail_failure="volume create $vol $H0:$B0/${vol}1 $H0:$B0/${vol}2 : FAILED : Volume $vol already exists"
+ tail_success_force="volume create $vol $H0:$B0/${vol}1 $H0:$B0/${vol}2 force : SUCCESS"
+ tail_failure_force="volume create $vol $H0:$B0/${vol}1 $H0:$B0/${vol}2 force : FAILED : Volume $vol already exists"
+}
+
+set_tail $V0;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+tail=`tail --lines=1 $logdir/.cmd_log_history | cut -d " " -f 5-`
+TEST [[ \"$tail\" == \"$tail_success\" ]]
+
+TEST ! $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+tail=`tail --lines=1 $logdir/.cmd_log_history | cut -d " " -f 5-`
+TEST [[ \"$tail\" == \"$tail_failure\" ]]
+
+set_tail $V1;
+TEST gluster volume create $V1 $H0:$B0/${V1}{1,2} force;
+tail=`tail --lines=1 $logdir/.cmd_log_history | cut -d " " -f 5-`
+TEST [[ \"$tail\" == \"$tail_success_force\" ]]
+
+TEST ! gluster volume create $V1 $H0:$B0/${V1}{1,2} force;
+tail=`tail --lines=1 $logdir/.cmd_log_history | cut -d " " -f 5-`
+TEST [[ \"$tail\" == \"$tail_failure_force\" ]]
+
+cleanup;
diff --git a/tests/bugs/bug-824753-file-locker.c b/tests/bugs/bug-824753-file-locker.c
new file mode 100644
index 000000000..903e23e0a
--- /dev/null
+++ b/tests/bugs/bug-824753-file-locker.c
@@ -0,0 +1,42 @@
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+
+int main (int argc, char *argv[])
+{
+ int fd = -1;
+ int ret = -1;
+ char command[2048] = "";
+ char filepath[255] = "";
+ struct flock fl;
+
+ fl.l_type = F_WRLCK;
+ fl.l_whence = SEEK_SET;
+ fl.l_start = 7;
+ fl.l_len = 1;
+ fl.l_pid = getpid();
+
+ snprintf(filepath, 255, "%s/%s", argv[4], argv[5]);
+
+ fd = open(filepath, O_RDWR);
+
+ if (fd == -1)
+ return -1;
+
+ if (fcntl(fd, F_SETLKW, &fl) == -1) {
+ return -1;
+ }
+
+ snprintf(command, sizeof(command),
+ "gluster volume clear-locks %s /%s kind all posix 0,7-1 |"
+ " grep %s | awk -F'..: ' '{print $1}' | grep %s:%s/%s",
+ argv[1], argv[5], argv[2], argv[2], argv[3], argv[1]);
+
+ ret = system (command);
+ close(fd);
+
+ if (ret)
+ return -1;
+ else
+ return 0;
+}
diff --git a/tests/bugs/bug-824753.t b/tests/bugs/bug-824753.t
new file mode 100755
index 000000000..38f6bf696
--- /dev/null
+++ b/tests/bugs/bug-824753.t
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 stripe 2 $H0:$B0/${V0}{1,2,3,4,5,6,7,8};
+
+function volinfo_field()
+{
+ local vol=$1;
+ local field=$2;
+
+ $CLI volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST glusterfs -s $H0 --volfile-id=$V0 $M0
+touch $M0/file1;
+
+TEST gcc -g $(dirname $0)/bug-824753-file-locker.c -o $(dirname $0)/file-locker
+
+TEST $(dirname $0)/file-locker $V0 $H0 $B0 $M0 file1
+
+## Finish up
+TEST rm -f $(dirname $0)/file-locker
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/bug-830665.t b/tests/bugs/bug-830665.t
new file mode 100755
index 000000000..0073ff1d9
--- /dev/null
+++ b/tests/bugs/bug-830665.t
@@ -0,0 +1,106 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+function recreate {
+ rm -rf $1 && mkdir -p $1
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+## Start and create a volume
+recreate ${B0}/${V0}-0
+recreate ${B0}/${V0}-1
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}-{0,1}
+
+function volinfo_field()
+{
+ local vol=$1;
+ local field=$2;
+
+ $CLI volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+
+## Verify volume is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Make sure stat-prefetch doesn't prevent self-heal checks.
+TEST $CLI volume set $V0 performance.stat-prefetch off;
+
+## Make sure automatic self-heal doesn't perturb our results.
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+
+## Wait for volume to register with rpc.mountd
+sleep 5;
+
+## Mount NFS
+TEST mount -t nfs -o vers=3,nolock,soft,intr $H0:/$V0 $N0;
+
+## Create some files and directories
+echo "test_data" > $N0/a_file;
+mkdir $N0/a_dir;
+echo "more_test_data" > $N0/a_dir/another_file;
+
+## Unmount and stop the volume.
+TEST umount $N0;
+TEST $CLI volume stop $V0;
+
+# Recreate the brick. Note that because of http://review.gluster.org/#change,4202
+# we need to preserve and restore the volume ID or else the brick (and thus the
+# entire not-very-HA-any-more volume) won't start. When that bug is fixed, we can
+# remove the [gs]etxattr calls.
+volid=$(getfattr -e hex -n trusted.glusterfs.volume-id $B0/${V0}-0 2> /dev/null \
+ | grep = | cut -d= -f2)
+rm -rf $B0/${V0}-0;
+mkdir $B0/${V0}-0;
+setfattr -n trusted.glusterfs.volume-id -v $volid $B0/${V0}-0
+
+## Restart and remount. Note that we use actimeo=0 so that the stat calls
+## we need for self-heal don't get blocked by the NFS client.
+TEST $CLI volume start $V0;
+sleep 5
+TEST mount -t nfs -o vers=3,nolock,soft,intr,actimeo=0 $H0:/$V0 $N0;
+
+## The Linux NFS client has a really charming habit of caching stuff right
+## after mount, even though we set actimeo=0 above. Life would be much easier
+## if NFS developers cared as much about correctness as they do about shaving
+## a few seconds off of benchmarks.
+ls -l $N0 &> /dev/null;
+sleep 5;
+
+## Force entry self-heal.
+find $N0 | xargs stat > /dev/null;
+#ls -lR $N0 > /dev/null;
+
+## Do NOT check through the NFS mount here. That will force a new self-heal
+## check, but we want to test whether self-heal already happened.
+
+## Make sure everything's in order on the recreated brick.
+EXPECT 'test_data' cat $B0/${V0}-0/a_file;
+EXPECT 'more_test_data' cat $B0/${V0}-0/a_dir/another_file;
+
+if [ "$EXIT_EARLY" = "1" ]; then
+ exit 0;
+fi
+
+## Finish up
+TEST umount $N0;
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/bug-834465.c b/tests/bugs/bug-834465.c
new file mode 100644
index 000000000..61d3deac0
--- /dev/null
+++ b/tests/bugs/bug-834465.c
@@ -0,0 +1,61 @@
+#include <sys/file.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+int
+main (int argc, char *argv[])
+{
+ int fd = -1;
+ char *filename = NULL;
+ struct flock lock = {0, };
+ int i = 0;
+ int ret = -1;
+
+ if (argc != 2) {
+ fprintf (stderr, "Usage: %s <filename> ", argv[0]);
+ goto out;
+ }
+
+ filename = argv[1];
+
+ fd = open (filename, O_RDWR | O_CREAT, 0);
+ if (fd < 0) {
+ fprintf (stderr, "open (%s) failed (%s)\n", filename,
+ strerror (errno));
+ goto out;
+ }
+
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 1;
+ lock.l_len = 1;
+
+ while (i < 100) {
+ lock.l_type = F_WRLCK;
+ ret = fcntl (fd, F_SETLK, &lock);
+ if (ret < 0) {
+ fprintf (stderr, "fcntl setlk failed (%s)\n",
+ strerror (errno));
+ goto out;
+ }
+
+ lock.l_type = F_UNLCK;
+ ret = fcntl (fd, F_SETLK, &lock);
+ if (ret < 0) {
+ fprintf (stderr, "fcntl setlk failed (%s)\n",
+ strerror (errno));
+ goto out;
+ }
+
+ i++;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
diff --git a/tests/bugs/bug-834465.t b/tests/bugs/bug-834465.t
new file mode 100755
index 000000000..af7f4bd12
--- /dev/null
+++ b/tests/bugs/bug-834465.t
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/brick1 $H0:$B0/brick2;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+MOUNTDIR=$M0;
+TEST glusterfs --mem-accounting --volfile-server=$H0 --volfile-id=$V0 $MOUNTDIR;
+
+sdump1=$(generate_mount_statedump $V0);
+nalloc1=0
+grep -A2 "fuse - usage-type 85" $sdump1
+if [ $? -eq '0' ]
+then
+ nalloc1=`grep -A2 "fuse - usage-type 85" $sdump1 | grep num_allocs | cut -d '=' -f2`
+fi
+
+build_tester $(dirname $0)/bug-834465.c
+
+TEST $(dirname $0)/bug-834465 $M0/testfile
+
+sdump2=$(generate_mount_statedump $V0);
+nalloc2=`grep -A2 "fuse - usage-type 85" $sdump2 | grep num_allocs | cut -d '=' -f2`
+
+TEST [ $nalloc1 -eq $nalloc2 ];
+
+TEST rm -rf $MOUNTDIR/*
+TEST rm -rf $(dirname $0)/bug-834465
+cleanup_mount_statedump $V0
+
+TEST umount $MOUNTDIR -l
+
+cleanup;
diff --git a/tests/bugs/bug-839595.t b/tests/bugs/bug-839595.t
new file mode 100644
index 000000000..979827fa7
--- /dev/null
+++ b/tests/bugs/bug-839595.t
@@ -0,0 +1,31 @@
+#!/bin/bash
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}1
+TEST $CLI volume set $V0 cluster.server-quorum-type server
+EXPECT "server" volume_option $V0 cluster.server-quorum-type
+TEST $CLI volume set $V0 cluster.server-quorum-type none
+EXPECT "none" volume_option $V0 cluster.server-quorum-type
+TEST $CLI volume reset $V0 cluster.server-quorum-type
+TEST ! $CLI volume set $V0 cluster.server-quorum-type abc
+TEST ! $CLI volume set all cluster.server-quorum-type none
+TEST ! $CLI volume set $V0 cluster.server-quorum-ratio 100
+
+TEST ! $CLI volume set all cluster.server-quorum-ratio abc
+TEST ! $CLI volume set all cluster.server-quorum-ratio -1
+TEST ! $CLI volume set all cluster.server-quorum-ratio 100.0000005
+TEST $CLI volume set all cluster.server-quorum-ratio 0
+EXPECT "0" volume_option $V0 cluster.server-quorum-ratio
+TEST $CLI volume set all cluster.server-quorum-ratio 100
+EXPECT "100" volume_option $V0 cluster.server-quorum-ratio
+TEST $CLI volume set all cluster.server-quorum-ratio 0.0000005
+EXPECT "0.0000005" volume_option $V0 cluster.server-quorum-ratio
+TEST $CLI volume set all cluster.server-quorum-ratio 100%
+EXPECT "100%" volume_option $V0 cluster.server-quorum-ratio
+cleanup;
diff --git a/tests/bugs/bug-844688.t b/tests/bugs/bug-844688.t
new file mode 100755
index 000000000..154d35e48
--- /dev/null
+++ b/tests/bugs/bug-844688.t
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/brick0
+TEST $CLI volume start $V0
+
+sleep 5
+
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+
+mount_pid=$(get_mount_process_pid $V0);
+# enable dumping of call stack creation and frame creation times in statedump
+kill -USR2 $mount_pid;
+
+TEST touch $M0/touchfile;
+(dd if=/dev/urandom of=$M0/file bs=5K 2>/dev/null 1>/dev/null)&
+back_pid=$!;
+statedump_file=$(generate_mount_statedump $V0);
+grep "callstack-creation-time" $statedump_file 2>/dev/null 1>/dev/null;
+TEST [ $? -eq 0 ];
+grep "frame-creation-time" $statedump_file 2>/dev/null 1>/dev/null;
+TEST [ $? -eq 0 ];
+
+kill -SIGTERM $back_pid;
+wait >/dev/null 2>&1;
+
+TEST rm -f $M0/touchfile $M0/file;
+TEST umount $M0;
+
+rm -f $statedumpdir/glusterdump.$mount_pid.*;
+cleanup
diff --git a/tests/bugs/bug-845213.t b/tests/bugs/bug-845213.t
new file mode 100644
index 000000000..e79b37109
--- /dev/null
+++ b/tests/bugs/bug-845213.t
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+## Create and start a volume with aio enabled
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+TEST $CLI volume set $V0 remote-dio enable;
+TEST $CLI volume set $V0 network.remote-dio disable;
+
+cleanup;
+
diff --git a/tests/bugs/bug-846240.t b/tests/bugs/bug-846240.t
new file mode 100644
index 000000000..12e4949ef
--- /dev/null
+++ b/tests/bugs/bug-846240.t
@@ -0,0 +1,58 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../fileio.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+function volinfo_field()
+{
+ local vol=$1;
+ local field=$2;
+
+ $CLI volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+TEST $CLI volume create $V0 $H0:$B0/brick1 $H0:$B0/brick2;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+MOUNTDIR=$M0;
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $MOUNTDIR;
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M1;
+
+TEST touch $M0/testfile;
+
+# open the file with the fd as 4
+TEST fd=`fd_available`;
+TEST fd_open $fd 'w' "$M0/testfile";
+
+# remove the file from the other mount point. If unlink is sent from
+# $M0 itself, then the file will be actually opened by open-behind which
+# we dont want for this testcase
+TEST rm -f $M1/testfile;
+
+# below command opens the file and writes to the file.
+# upon open, open-behind unwinds the open call with success.
+# now when write comes, open-behind actually opens the file
+# and then sends write on the fd. But before sending open itself,
+# the file would have been removed from the mount $M1. open() gets error
+# and the write call which is put into a stub (open had to be sent first)
+# should unwind with the error received in the open call.
+echo "data" >> $M0/testfile 2>/dev/null 1>/dev/null;
+TEST [ $? -ne 0 ]
+
+TEST fd_close $fd;
+
+TEST rm -rf $MOUNTDIR/*
+
+TEST umount $MOUNTDIR -l
+
+cleanup;
diff --git a/tests/bugs/bug-847622.t b/tests/bugs/bug-847622.t
new file mode 100755
index 000000000..138499527
--- /dev/null
+++ b/tests/bugs/bug-847622.t
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/brick0
+TEST $CLI volume start $V0
+
+sleep 5
+
+TEST mount -t nfs -o vers=3,nolock $H0:/$V0 $N0
+cd $N0
+
+# simple getfacl setfacl commands
+TEST touch testfile
+TEST setfacl -m u:14:r testfile
+TEST getfacl testfile
+
+cd
+TEST umount $N0
+cleanup
+
diff --git a/tests/bugs/bug-847624.t b/tests/bugs/bug-847624.t
new file mode 100755
index 000000000..f4e9942e9
--- /dev/null
+++ b/tests/bugs/bug-847624.t
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+cleanup
+
+#1
+TEST glusterd
+TEST pidof glusterd
+#3
+TEST $CLI volume create $V0 $H0:$B0/$V0
+TEST $CLI volume set $V0 nfs.drc on
+TEST $CLI volume start $V0
+sleep 5
+TEST mount -t nfs -o vers=3,nolock,soft,intr $H0:/$V0 $N0
+cd $N0
+#7
+TEST dbench -t 10 10
+TEST rm -rf $N0/*
+cd
+TEST umount $N0
+#10
+TEST $CLI volume set $V0 nfs.drc-size 10000
+cleanup
diff --git a/tests/bugs/bug-848251.t b/tests/bugs/bug-848251.t
new file mode 100644
index 000000000..dda393272
--- /dev/null
+++ b/tests/bugs/bug-848251.t
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+
+TEST $CLI volume start $V0;
+
+#enable quota
+TEST $CLI volume quota $V0 enable;
+
+#mount on a random dir
+TEST MOUNTDIR="/tmp/$RANDOM"
+TEST mkdir $MOUNTDIR
+TEST glusterfs -s $H0 --volfile-id=$V0 $MOUNTDIR
+
+function set_quota(){
+ mkdir "$MOUNTDIR/$name"
+ $CLI volume quota $V0 limit-usage /$name 50KB
+}
+
+function quota_list(){
+ $CLI volume quota $V0 list | grep -- /$name | awk '{print $3}'
+}
+
+TEST name=":d1"
+#file name containing ':' in the start
+TEST set_quota
+EXPECT "0Bytes" quota_list
+
+TEST name=":d1/d:1"
+#file name containing ':' in between
+TEST set_quota
+EXPECT "0Bytes" quota_list
+
+TEST name=":d1/d:1/d1:"
+#file name containing ':' in the end
+TEST set_quota
+EXPECT "0Bytes" quota_list
+
+TEST umount $MOUNTDIR
+TEST rm -rf $MOUNTDIR
+
+cleanup;
diff --git a/tests/bugs/bug-852147.t b/tests/bugs/bug-852147.t
new file mode 100755
index 000000000..0e7923086
--- /dev/null
+++ b/tests/bugs/bug-852147.t
@@ -0,0 +1,85 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+logdir=`gluster --print-logdir`"/bricks"
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 stripe 2 $H0:$B0/${V0}{1,2,3,4,5,6,7,8};
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST glusterfs -s $H0 --volfile-id=$V0 $M0
+touch $M0/file1;
+
+TEST $CLI volume set $V0 performance.cache-max-file-size 20MB
+TEST $CLI volume set $V0 performance.cache-min-file-size 10MB
+
+EXPECT "20MB" volinfo_field $V0 'performance.cache-max-file-size';
+EXPECT "10MB" volinfo_field $V0 'performance.cache-min-file-size';
+
+#Performing volume reset and verifying.
+TEST $CLI volume reset $V0
+EXPECT "" volinfo_field $V0 'performance.cache-max-file-size';
+EXPECT "" volinfo_field $V0 'performance.cache-min-file-size';
+
+#Verifying vlolume-profile start, info and stop
+EXPECT "Starting volume profile on $V0 has been successful " $CLI volume profile $V0 start
+
+function vol_prof_info()
+{
+ $CLI volume profile $V0 info | grep Brick | wc -l
+}
+EXPECT "8" vol_prof_info
+
+EXPECT "Stopping volume profile on $V0 has been successful " $CLI volume profile $V0 stop
+
+function log-file-name()
+{
+ logfilename=$B0"/"$V0"1.log"
+ echo ${logfilename:1} | tr / -
+}
+
+function file-size()
+{
+ ls -lrt $1 | awk '{print $5}'
+}
+
+#Finding the current log file's size
+log_file=$logdir"/"`log-file-name`
+log_file_size=`file-size $log_file`
+
+#Removing the old backup log files
+ren_file=$log_file".*"
+rm -rf $ren_file
+
+#Initiating log rotate
+TEST $CLI volume log rotate $V0
+
+#Capturing new log file's size
+new_file_size=`file-size $log_file`
+
+#Verifying the size of the new log file and the creation of the backup log file
+TEST ! [ $new_file_size -eq $log_file_size ]
+TEST ls -lrt $ren_file
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/bug-853258.t b/tests/bugs/bug-853258.t
new file mode 100755
index 000000000..faa9d4465
--- /dev/null
+++ b/tests/bugs/bug-853258.t
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+mkdir -p $H0:$B0/${V0}0
+mkdir -p $H0:$B0/${V0}1
+mkdir -p $H0:$B0/${V0}2
+mkdir -p $H0:$B0/${V0}3
+
+# Create and start a volume.
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1 $H0:$B0/${V0}2
+TEST $CLI volume start $V0
+EXPECT_WITHIN 15 'Started' volinfo_field $V0 'Status';
+
+# Force assignment of initial ranges.
+TEST $CLI volume rebalance $V0 fix-layout start
+EXPECT_WITHIN 15 "fix-layout completed" rebalance_status_field $V0
+
+# Get the original values.
+xattrs=""
+for i in $(seq 0 2); do
+ xattrs="$xattrs $(dht_get_layout $B0/${V0}$i)"
+done
+
+# Expand the volume and force assignment of new ranges.
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}3
+# Force assignment of initial ranges.
+TEST $CLI volume rebalance $V0 fix-layout start
+EXPECT_WITHIN 15 "fix-layout completed" rebalance_status_field $V0
+
+for i in $(seq 0 3); do
+ xattrs="$xattrs $(dht_get_layout $B0/${V0}$i)"
+done
+
+overlap=$(python2 $(dirname $0)/overlap.py $xattrs)
+# 2863311531 = 0xaaaaaaab = 2/3 overlap
+TEST [ "$overlap" -ge 2863311531 ]
+
+cleanup
diff --git a/tests/bugs/bug-853680.t b/tests/bugs/bug-853680.t
new file mode 100755
index 000000000..72d53ae6c
--- /dev/null
+++ b/tests/bugs/bug-853680.t
@@ -0,0 +1,52 @@
+#!/bin/bash
+#
+# Bug 853680
+#
+# Test that io-threads least-rate-limit throttling functions as expected. Set
+# a limit, perform a few operations with a least-priority mount and verify
+# said operations take a minimum amount of time according to the limit.
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}1
+TEST $CLI volume start $V0
+
+#Accept min val
+TEST $CLI volume set $V0 performance.least-rate-limit 0
+#Accept some value in between
+TEST $CLI volume set $V0 performance.least-rate-limit 1035
+#Accept max val INT_MAX
+TEST $CLI volume set $V0 performance.least-rate-limit 2147483647
+
+#Reject other values
+TEST ! $CLI volume set $V0 performance.least-rate-limit 2147483648
+TEST ! $CLI volume set $V0 performace.least-rate-limit -8
+TEST ! $CLI volume set $V0 performance.least-rate-limit abc
+TEST ! $CLI volume set $V0 performance.least-rate-limit 0.0
+TEST ! $CLI volume set $V0 performance.least-rate-limit -10.0
+TEST ! $CLI volume set $V0 performance.least-rate-limit 1%
+
+# set rate limit to 1 operation/sec
+TEST $CLI volume set $V0 performance.least-rate-limit 1
+
+# use client-pid=-1 for least priority mount
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 --client-pid=-1
+
+# create a few files and verify this takes more than a few seconds
+date1=`date +%s`
+TEST touch $M0/file{0..2}
+date2=`date +%s`
+
+optime=$(($date2 - $date1))
+TEST [ $optime -ge 3 ]
+
+TEST umount $M0
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/bugs/bug-853690.t b/tests/bugs/bug-853690.t
new file mode 100755
index 000000000..77a581f54
--- /dev/null
+++ b/tests/bugs/bug-853690.t
@@ -0,0 +1,94 @@
+#!/bin/bash
+#
+# Bug 853690 - Test that short writes do not lead to corruption.
+#
+# Mismanagement of short writes in AFR leads to corruption and immediately
+# detectable split-brain. Write a file to a replica volume using error-gen
+# to cause short writes on one replica.
+#
+# Short writes are also possible during heal. If ignored, the files are marked
+# consistent and silently differ. After reading the file, cause a lookup, wait
+# for self-heal and verify that the afr xattrs do not match.
+#
+########
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+TEST mkdir -p $B0/test{1,2}
+
+# Our graph is a two brick replica with 100% frequency of short writes on one
+# side of the replica. This guarantees a single write fop leads to an out-of-sync
+# situation.
+cat > $B0/test.vol <<EOF
+volume test-posix-0
+ type storage/posix
+ option directory $B0/test1
+end-volume
+
+volume test-error-0
+ type debug/error-gen
+ option failure 100
+ option enable writev
+ option error-no GF_ERROR_SHORT_WRITE
+ subvolumes test-posix-0
+end-volume
+
+volume test-locks-0
+ type features/locks
+ subvolumes test-error-0
+end-volume
+
+volume test-posix-1
+ type storage/posix
+ option directory $B0/test2
+end-volume
+
+volume test-locks-1
+ type features/locks
+ subvolumes test-posix-1
+end-volume
+
+volume test-replicate-0
+ type cluster/replicate
+ option background-self-heal-count 0
+ subvolumes test-locks-0 test-locks-1
+end-volume
+EOF
+
+TEST glusterd
+
+TEST glusterfs --volfile=$B0/test.vol --attribute-timeout=0 --entry-timeout=0 $M0
+
+# Send a single write, guaranteed to be short on one replica, and attempt to
+# read the data back. Failure to detect the short write results in different
+# file sizes and immediate split-brain (EIO).
+TEST dd if=/dev/zero of=$M0/file bs=128k count=1
+TEST dd if=$M0/file of=/dev/null bs=128k count=1
+
+########
+#
+# Test self-heal with short writes...
+#
+########
+
+# Cause a lookup and wait a few seconds for posterity. This self-heal also fails
+# due to a short write.
+TEST ls $M0/file
+
+# Verify the attributes on the healthy replica do not reflect consistency with
+# the other replica.
+TEST "getfattr -n trusted.afr.test-locks-0 $B0/test2/file --only-values > $B0/out1 2> /dev/null"
+TEST "getfattr -n trusted.afr.test-locks-1 $B0/test2/file --only-values > $B0/out2 2> /dev/null"
+TEST ! cmp $B0/out1 $B0/out2
+
+TEST rm -f $B0/out1 $B0/out2
+TEST rm -f $M0/file
+TEST umount $M0
+
+rm -f $B0/test.vol
+rm -rf $B0/test1 $B0/test2
+
+cleanup;
+
diff --git a/tests/bugs/bug-856455.t b/tests/bugs/bug-856455.t
new file mode 100644
index 000000000..becb20222
--- /dev/null
+++ b/tests/bugs/bug-856455.t
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+BRICK_COUNT=3
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1 $H0:$B0/${V0}2
+TEST $CLI volume start $V0
+
+## Mount FUSE with caching disabled
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0;
+
+function query_pathinfo()
+{
+ local path=$1;
+ local retval;
+
+ local pathinfo=`getfattr -m . -n trusted.glusterfs.pathinfo $path`;
+ retval=`echo $pathinfo | grep -o 'POSIX' | wc -l`;
+ echo $retval
+}
+
+touch $M0/f00f;
+mkdir $M0/f00d;
+
+# verify pathinfo for a file and directory
+EXPECT 1 query_pathinfo $M0/f00f;
+EXPECT $BRICK_COUNT query_pathinfo $M0/f00d;
+
+# Kill a brick process and then query for pathinfo
+# for directories pathinfo should list backend patch from available (up) subvolumes
+
+kill -9 `cat /var/lib/glusterd/vols/$V0/run/$H0-d-backends-${V0}1.pid`;
+
+EXPECT `expr $BRICK_COUNT - 1` query_pathinfo $M0/f00d;
+
+cleanup;
diff --git a/tests/bugs/bug-857330/common.rc b/tests/bugs/bug-857330/common.rc
new file mode 100644
index 000000000..e5a7cd79a
--- /dev/null
+++ b/tests/bugs/bug-857330/common.rc
@@ -0,0 +1,55 @@
+. $(dirname $0)/../../include.rc
+
+UUID_REGEX='[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}'
+
+TASK_ID=""
+COMMAND=""
+PATTERN=""
+
+function check-and-store-task-id()
+{
+ TASK_ID=""
+
+ local task_id=$($CLI $COMMAND | grep $PATTERN | grep -o -E "$UUID_REGEX")
+
+ if [ -z "$task_id" ] && [ "${task_id+asdf}" = "asdf" ]; then
+ return 1
+ fi
+
+ TASK_ID=$task_id
+ return 0;
+}
+
+function get-task-id()
+{
+ $CLI $COMMAND | grep $PATTERN | grep -o -E "$UUID_REGEX" | tail -n1
+
+}
+
+function check-and-store-task-id-xml()
+{
+ TASK_ID=""
+
+ local task_id=$($CLI $COMMAND --xml | xmllint --format - | grep $PATTERN | grep -o -E "$UUID_REGEX")
+
+ if [ -z "$task_id" ] && [ "${task_id+asdf}" = "asdf" ]; then
+ return 1
+ fi
+
+ TASK_ID=$task_id
+ return 0;
+}
+
+function get-task-id-xml()
+{
+ $CLI $COMMAND --xml | xmllint --format - | grep $PATTERN | grep -o -E "$UUID_REGEX"
+}
+
+function get-task-status()
+{
+ $CLI $COMMAND | grep -o $PATTERN
+ if [ ${PIPESTATUS[0]} -ne 0 ]; then
+ return 1
+ fi
+ return 0
+}
diff --git a/tests/bugs/bug-857330/normal.t b/tests/bugs/bug-857330/normal.t
new file mode 100755
index 000000000..24dfe52c4
--- /dev/null
+++ b/tests/bugs/bug-857330/normal.t
@@ -0,0 +1,78 @@
+#!/bin/bash
+
+. $(dirname $0)/common.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}1;
+TEST $CLI volume info $V0;
+TEST $CLI volume start $V0;
+
+TEST glusterfs -s $H0 --volfile-id=$V0 $M0;
+
+TEST python2 $(dirname $0)/../../utils/create-files.py --multi -b 10 -d 10 -n 10 $M0;
+
+TEST umount $M0;
+
+###############
+## Rebalance ##
+###############
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}2;
+
+COMMAND="volume rebalance $V0 start"
+PATTERN="ID:"
+TEST check-and-store-task-id
+
+COMMAND="volume status $V0"
+PATTERN="ID"
+EXPECT $TASK_ID get-task-id
+
+COMMAND="volume rebalance $V0 status"
+PATTERN="completed"
+EXPECT_WITHIN 300 $PATTERN get-task-status
+
+###################
+## Replace-brick ##
+###################
+REP_BRICK_PAIR="$H0:$B0/${V0}2 $H0:$B0/${V0}3"
+
+COMMAND="volume replace-brick $V0 $REP_BRICK_PAIR start"
+PATTERN="ID:"
+TEST check-and-store-task-id
+
+COMMAND="volume status $V0"
+PATTERN="ID"
+EXPECT $TASK_ID get-task-id
+
+COMMAND="volume replace-brick $V0 $REP_BRICK_PAIR status"
+PATTERN="complete"
+EXPECT_WITHIN 300 $PATTERN get-task-status
+
+TEST $CLI volume replace-brick $V0 $REP_BRICK_PAIR commit;
+
+##################
+## Remove-brick ##
+##################
+COMMAND="volume remove-brick $V0 $H0:$B0/${V0}3 start"
+PATTERN="ID:"
+TEST check-and-store-task-id
+
+COMMAND="volume status $V0"
+PATTERN="ID"
+EXPECT $TASK_ID get-task-id
+
+COMMAND="volume remove-brick $V0 $H0:$B0/${V0}3 status"
+PATTERN="completed"
+EXPECT_WITHIN 300 $PATTERN get-task-status
+
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}3 commit
+
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/bug-857330/xml.t b/tests/bugs/bug-857330/xml.t
new file mode 100755
index 000000000..688f46619
--- /dev/null
+++ b/tests/bugs/bug-857330/xml.t
@@ -0,0 +1,101 @@
+#!/bin/bash
+
+. $(dirname $0)/common.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}1;
+TEST $CLI volume info $V0;
+TEST $CLI volume start $V0;
+
+TEST glusterfs -s $H0 --volfile-id=$V0 $M0;
+
+TEST python2 $(dirname $0)/../../utils/create-files.py --multi -b 10 -d 10 -n 10 $M0;
+
+TEST umount $M0;
+
+
+###############
+## Rebalance ##
+###############
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}2;
+
+COMMAND="volume rebalance $V0 start"
+PATTERN="task-id"
+TEST check-and-store-task-id-xml
+
+COMMAND="volume status $V0"
+PATTERN="id"
+EXPECT $TASK_ID get-task-id-xml
+
+COMMAND="volume rebalance $V0 status"
+PATTERN="task-id"
+EXPECT $TASK_ID get-task-id-xml
+
+## TODO: Add tests for rebalance stop
+
+COMMAND="volume rebalance $V0 status"
+PATTERN="completed"
+EXPECT_WITHIN 300 $PATTERN get-task-status
+
+###################
+## Replace-brick ##
+###################
+REP_BRICK_PAIR="$H0:$B0/${V0}2 $H0:$B0/${V0}3"
+
+COMMAND="volume replace-brick $V0 $REP_BRICK_PAIR start"
+PATTERN="task-id"
+TEST check-and-store-task-id-xml
+
+COMMAND="volume status $V0"
+PATTERN="id"
+EXPECT $TASK_ID get-task-id-xml
+
+COMMAND="volume replace-brick $V0 $REP_BRICK_PAIR status"
+PATTERN="task-id"
+EXPECT $TASK_ID get-task-id-xml
+
+## TODO: Add more tests for replace-brick pause|abort
+
+COMMAND="volume replace-brick $V0 $REP_BRICK_PAIR status"
+PATTERN="complete"
+EXPECT_WITHIN 300 $PATTERN get-task-status
+
+COMMAND="volume replace-brick $V0 $REP_BRICK_PAIR commit"
+PATTERN="task-id"
+EXPECT $TASK_ID get-task-id-xml
+
+##################
+## Remove-brick ##
+##################
+COMMAND="volume remove-brick $V0 $H0:$B0/${V0}3 start"
+PATTERN="task-id"
+TEST check-and-store-task-id-xml
+
+COMMAND="volume status $V0"
+PATTERN="id"
+EXPECT $TASK_ID get-task-id-xml
+
+COMMAND="volume remove-brick $V0 $H0:$B0/${V0}3 status"
+PATTERN="task-id"
+EXPECT $TASK_ID get-task-id-xml
+
+COMMAND="volume remove-brick $V0 $H0:$B0/${V0}3 status"
+PATTERN="completed"
+EXPECT_WITHIN 300 $PATTERN get-task-status
+
+## TODO: Add tests for remove-brick stop
+
+COMMAND="volume remove-brick $V0 $H0:$B0/${V0}3 commit"
+PATTERN="task-id"
+EXPECT $TASK_ID get-task-id-xml
+
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/bug-858215.t b/tests/bugs/bug-858215.t
new file mode 100755
index 000000000..aee7d5fcb
--- /dev/null
+++ b/tests/bugs/bug-858215.t
@@ -0,0 +1,81 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 stripe 2 $H0:$B0/${V0}{1,2,3,4,5,6,7,8};
+
+function volinfo_field()
+{
+ local vol=$1;
+ local field=$2;
+
+ $CLI volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Mount FUSE with caching disabled
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0;
+
+## Wait for volume to register with rpc.mountd
+sleep 5;
+
+## Test for checking whether the fops have been saved in the event-history
+TEST ! stat $M0/newfile;
+TEST touch $M0/newfile;
+TEST stat $M0/newfile;
+TEST rm $M0/newfile;
+
+nfs_pid=$(cat /var/lib/glusterd/nfs/run/nfs.pid);
+glustershd_pid=$(cat /var/lib/glusterd/glustershd/run/glustershd.pid);
+
+pids=$(pidof glusterfs);
+for i in $pids
+do
+ if [ $i -ne $nfs_pid ] && [ $i -ne $glustershd_pid ]; then
+ mount_pid=$i;
+ break;
+ fi
+done
+
+dump_dir='/tmp/gerrit_glusterfs'
+cat >$statedumpdir/glusterdump.options <<EOF
+all=yes
+path=$dump_dir
+EOF
+
+TEST mkdir -p $dump_dir;
+TEST kill -USR1 $mount_pid;
+sleep 2;
+for file_name in $(ls $dump_dir)
+do
+ TEST grep "xlator.mount.fuse.history" $dump_dir/$file_name;
+done
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+TEST rm -rf $dump_dir;
+TEST rm $statedumpdir/glusterdump.options;
+
+cleanup;
diff --git a/tests/bugs/bug-858242.c b/tests/bugs/bug-858242.c
new file mode 100644
index 000000000..00a3a2d5f
--- /dev/null
+++ b/tests/bugs/bug-858242.c
@@ -0,0 +1,77 @@
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+int
+main (int argc, char *argv[])
+{
+ char *filename = NULL, *volname = NULL, *cmd = NULL;
+ char buffer[1024] = {0, };
+ int fd = -1;
+ int ret = -1;
+ struct stat statbuf = {0, };
+
+ if (argc != 3) {
+ fprintf (stderr, "usage: %s <file-name> <volname>\n", argv[0]);
+ goto out;
+ }
+
+ filename = argv[1];
+ volname = argv[2];
+
+ fd = open (filename, O_RDWR | O_CREAT, 0);
+ if (fd < 0) {
+ fprintf (stderr, "open (%s) failed (%s)\n", filename,
+ strerror (errno));
+ goto out;
+ }
+
+ ret = write (fd, "test-content", 12);
+ if (ret < 0) {
+ fprintf (stderr, "write failed (%s)", strerror (errno));
+ goto out;
+ }
+
+ ret = fsync (fd);
+ if (ret < 0) {
+ fprintf (stderr, "fsync failed (%s)", strerror (errno));
+ goto out;
+ }
+
+ ret = fstat64 (fd, &statbuf);
+ if (ret < 0) {
+ fprintf (stderr, "fstat64 failed (%s)", strerror (errno));
+ goto out;
+ }
+
+ ret = asprintf (&cmd, "gluster --mode=script volume stop %s force",
+ volname);
+ if (ret < 0) {
+ fprintf (stderr, "cannot construct cli command string (%s)",
+ strerror (errno));
+ goto out;
+ }
+
+ ret = system (cmd);
+ if (ret < 0) {
+ fprintf (stderr, "stopping volume (%s) failed", volname);
+ goto out;
+ }
+
+ ret = read (fd, buffer, 1024);
+ if (ret >= 0) {
+ fprintf (stderr, "read should've returned error, "
+ "but is successful\n");
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
diff --git a/tests/bugs/bug-858242.t b/tests/bugs/bug-858242.t
new file mode 100755
index 000000000..e93c2d244
--- /dev/null
+++ b/tests/bugs/bug-858242.t
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST $CLI volume set $V0 performance.quick-read off
+
+#mount on a random dir
+TEST glusterfs --entry-timeout=3600 --attribute-timeout=3600 -s $H0 --volfile-id=$V0 $M0 --direct-io-mode=yes
+
+build_tester $(dirname $0)/bug-858242.c
+
+TEST $(dirname $0)/bug-858242 $M0/testfile $V0
+
+TEST rm -rf $(dirname $0)/858242
+cleanup;
+
diff --git a/tests/bugs/bug-858488-min-free-disk.t b/tests/bugs/bug-858488-min-free-disk.t
new file mode 100644
index 000000000..ae5ac3bde
--- /dev/null
+++ b/tests/bugs/bug-858488-min-free-disk.t
@@ -0,0 +1,114 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+function pidgrep()
+{
+ ps ax | grep "$1" | grep -v grep | awk '{print $1}' | head -1
+}
+
+## Start glusterd
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+## Lets create partitions for bricks
+TEST truncate -s 100M $B0/brick1
+TEST truncate -s 200M $B0/brick2
+TEST LO1=`losetup --find --show $B0/brick1`
+TEST mkfs.xfs $LO1
+TEST LO2=`losetup --find --show $B0/brick2`
+TEST mkfs.xfs $LO2
+TEST mkdir -p $B0/${V0}1 $B0/${V0}2
+TEST mount -t xfs $LO1 $B0/${V0}1
+TEST mount -t xfs $LO2 $B0/${V0}2
+
+
+## Lets create volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+
+## Verify volume is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+TEST glusterfs -s $H0 --volfile-id=$V0 --acl $M0
+MOUNT_PID=`ps ax |grep "glusterfs -s $H0 --volfile-id=$V0 --acl $M0" | awk '{print $1}' | head -1`
+## Real test starts here
+## ----------------------------------------------------------------------------
+
+MINFREEDISKVALUE=90
+
+## Set min free disk to MINFREEDISKVALUE percent
+TEST $CLI volume set $V0 cluster.min-free-disk $MINFREEDISKVALUE
+
+## We need to have file name to brick map based on hash.
+## We will use this info in test case 0.
+i=1
+CONTINUE=2
+BRICK1FILE=0
+BRICK2FILE=0
+while [[ $CONTINUE -ne 0 ]]
+do
+ dd if=/dev/zero of=$M0/file$i.data bs=1024 count=1024 1>/dev/null 2>&1
+
+ if [[ -e $B0/${V0}1/file$i.data && $BRICK1FILE = "0" ]]
+ then
+ BRICK1FILE=file$i.data
+ CONTINUE=$CONTINUE-1
+ fi
+
+ if [[ -e $B0/${V0}2/file$i.data && $BRICK2FILE = "0" ]]
+ then
+ BRICK2FILE=file$i.data
+ CONTINUE=$CONTINUE-1
+ fi
+
+ rm $M0/file$i.data
+ let i++
+done
+
+
+## Bring free space on one of the bricks to less than minfree value by
+## creating one big file.
+dd if=/dev/zero of=$M0/fillonebrick.data bs=1024 count=25600 1>/dev/null 2>&1
+
+#Lets find out where it was created
+if [ -f $B0/${V0}1/fillonebrick.data ]
+then
+ FILETOCREATE=$BRICK1FILE
+ OTHERBRICK=$B0/${V0}2
+else
+ FILETOCREATE=$BRICK2FILE
+ OTHERBRICK=$B0/${V0}1
+fi
+
+##--------------------------------TEST CASE 0-----------------------------------
+## If we try to create a file which should go into full brick as per hash, it
+## should go into the other brick instead.
+
+## Before that let us create files just to make gluster refresh the stat
+## Using touch so it should not change the disk usage stats
+for k in {1..20};
+do
+ touch $M0/dummyfile$k
+done
+
+dd if=/dev/zero of=$M0/$FILETOCREATE bs=1024 count=2048 1>/dev/null 2>&1
+TEST [ -e $OTHERBRICK/$FILETOCREATE ]
+
+## Done testing, lets clean up
+EXPECT "$MOUNT_PID" pidgrep $MOUNT_PID
+TEST rm -rf $M0/*
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+$CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/bugs/bug-859927.t b/tests/bugs/bug-859927.t
new file mode 100755
index 000000000..ed74d3eb8
--- /dev/null
+++ b/tests/bugs/bug-859927.t
@@ -0,0 +1,70 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+cleanup;
+
+glusterd;
+
+TEST $CLI volume create $V0 replica 2 stripe 2 $H0:$B0/${V0}{1,2,3,4,5,6,7,8};
+
+TEST ! $CLI volume set $V0 statedump-path ""
+TEST ! $CLI volume set $V0 statedump-path " "
+TEST $CLI volume set $V0 statedump-path "/home/"
+EXPECT "/home/" volume_option $V0 server.statedump-path
+
+TEST ! $CLI volume set $V0 background-self-heal-count ""
+TEST ! $CLI volume set $V0 background-self-heal-count " "
+TEST $CLI volume set $V0 background-self-heal-count 10
+EXPECT "10" volume_option $V0 cluster.background-self-heal-count
+
+TEST ! $CLI volume set $V0 cache-size ""
+TEST ! $CLI volume set $V0 cache-size " "
+TEST $CLI volume set $V0 cache-size 512MB
+EXPECT "512MB" volume_option $V0 performance.cache-size
+
+TEST ! $CLI volume set $V0 self-heal-daemon ""
+TEST ! $CLI volume set $V0 self-heal-daemon " "
+TEST $CLI volume set $V0 self-heal-daemon on
+EXPECT "on" volume_option $V0 cluster.self-heal-daemon
+
+TEST ! $CLI volume set $V0 read-subvolume ""
+TEST ! $CLI volume set $V0 read-subvolume " "
+TEST $CLI volume set $V0 read-subvolume $V0-client-0
+EXPECT "$V0-client-0" volume_option $V0 cluster.read-subvolume
+
+TEST ! $CLI volume set $V0 data-self-heal-algorithm ""
+TEST ! $CLI volume set $V0 data-self-heal-algorithm " "
+TEST ! $CLI volume set $V0 data-self-heal-algorithm on
+TEST $CLI volume set $V0 data-self-heal-algorithm full
+EXPECT "full" volume_option $V0 cluster.data-self-heal-algorithm
+
+TEST ! $CLI volume set $V0 min-free-inodes ""
+TEST ! $CLI volume set $V0 min-free-inodes " "
+TEST $CLI volume set $V0 min-free-inodes 60%
+EXPECT "60%" volume_option $V0 cluster.min-free-inodes
+
+TEST ! $CLI volume set $V0 min-free-disk ""
+TEST ! $CLI volume set $V0 min-free-disk " "
+TEST $CLI volume set $V0 min-free-disk 60%
+EXPECT "60%" volume_option $V0 cluster.min-free-disk
+
+TEST $CLI volume set $V0 min-free-disk 120
+EXPECT "120" volume_option $V0 cluster.min-free-disk
+
+TEST ! $CLI volume set $V0 frame-timeout ""
+TEST ! $CLI volume set $V0 frame-timeout " "
+TEST $CLI volume set $V0 frame-timeout 0
+EXPECT "0" volume_option $V0 network.frame-timeout
+
+TEST ! $CLI volume set $V0 auth.allow ""
+TEST ! $CLI volume set $V0 auth.allow " "
+TEST $CLI volume set $V0 auth.allow 192.168.122.1
+EXPECT "192.168.122.1" volume_option $V0 auth.allow
+
+TEST ! $CLI volume set $V0 stripe-block-size ""
+TEST ! $CLI volume set $V0 stripe-block-size " "
+TEST $CLI volume set $V0 stripe-block-size 512MB
+EXPECT "512MB" volume_option $V0 cluster.stripe-block-size
+
+cleanup;
diff --git a/tests/bugs/bug-860297.t b/tests/bugs/bug-860297.t
new file mode 100644
index 000000000..2a3ca7a7a
--- /dev/null
+++ b/tests/bugs/bug-860297.t
@@ -0,0 +1,13 @@
+#!/bin/bash
+. $(dirname $0)/../include.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+TEST $CLI volume create $V0 $H0:$B0/brick1
+setfattr -x trusted.glusterfs.volume-id $B0/brick1
+## If Extended attribute trusted.glusterfs.volume-id is not present
+## then volume should not be able to start
+TEST ! $CLI volume start $V0;
+cleanup;
diff --git a/tests/bugs/bug-860663.t b/tests/bugs/bug-860663.t
new file mode 100644
index 000000000..05dea5fbc
--- /dev/null
+++ b/tests/bugs/bug-860663.t
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+function file_count()
+{
+ val=1
+
+ if [ "$1" == "$2" ]
+ then
+ val=0
+ fi
+ echo $val
+}
+
+BRICK_COUNT=3
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1 $H0:$B0/${V0}2
+TEST $CLI volume start $V0
+
+## Mount FUSE
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+sleep 5;
+
+TEST touch $M0/files{1..10000};
+
+ORIG_FILE_COUNT=`ls -l $M0 | wc -l`;
+
+# Kill a brick process
+kill -9 `cat /var/lib/glusterd/vols/$V0/run/$H0-d-backends-${V0}1.pid`;
+
+TEST $CLI volume rebalance $V0 fix-layout start
+
+sleep 30;
+
+TEST ! touch $M0/files{1..10000};
+
+TEST $CLI volume start $V0 force
+
+sleep 5;
+
+NEW_FILE_COUNT=`ls -l $M0 | wc -l`;
+
+EXPECT "0" file_count $ORIG_FILE_COUNT $NEW_FILE_COUNT
+
+cleanup;
diff --git a/tests/bugs/bug-861015-index.t b/tests/bugs/bug-861015-index.t
new file mode 100644
index 000000000..4b148e6cc
--- /dev/null
+++ b/tests/bugs/bug-861015-index.t
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1,2,3,4,5}
+TEST $CLI volume set $V0 ensure-durability off
+TEST $CLI volume start $V0
+EXPECT_WITHIN 20 "Y" glustershd_up_status
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST kill_brick $V0 $H0 $B0/${V0}4
+cd $M0
+HEAL_FILES=0
+for i in {1..10}
+do
+ echo "abc" > $i
+ HEAL_FILES=$(($HEAL_FILES+1))
+done
+HEAL_FILES=$(($HEAL_FILES+3)) #count brick root distribute-subvol num of times
+
+cd ~
+EXPECT "$HEAL_FILES" afr_get_pending_heal_count $V0
+TEST rm -f $M0/*
+TEST umount $M0
+TEST $CLI volume heal $V0 info
+#Only root dir should be present now in the indices
+EXPECT "1" afr_get_num_indices_in_brick $B0/${V0}1
+EXPECT "1" afr_get_num_indices_in_brick $B0/${V0}3
+EXPECT "1" afr_get_num_indices_in_brick $B0/${V0}5
+cleanup
diff --git a/tests/bugs/bug-861015-log.t b/tests/bugs/bug-861015-log.t
new file mode 100644
index 000000000..032032470
--- /dev/null
+++ b/tests/bugs/bug-861015-log.t
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+log_wd=$(gluster --print-logdir)
+TEST glusterd
+TEST pidof glusterd
+rm -f $log_wd/glustershd.log
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+TEST kill_brick $V0 $H0 $B0/${V0}0
+cd $M0
+for i in {1..10}
+do
+ dd if=/dev/urandom of=f bs=1M count=10 2>/dev/null
+done
+
+cd ~
+TEST $CLI volume heal $V0 info
+function count_inode_link_failures {
+ logfile=$1
+ grep "inode link failed on the inode" $logfile | wc -l
+}
+EXPECT "0" count_inode_link_failures $log_wd/glustershd.log
+cleanup
diff --git a/tests/bugs/bug-861542.t b/tests/bugs/bug-861542.t
new file mode 100755
index 000000000..5fd08f12d
--- /dev/null
+++ b/tests/bugs/bug-861542.t
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+# Distributed volume with a single brick was chosen solely for the ease of
+#implementing the test case (to be precise, for the ease of extracting the port number).
+TEST $CLI volume create $V0 $H0:$B0/brick0;
+
+TEST $CLI volume start $V0;
+
+function port_field()
+{
+ local vol=$1;
+ local opt=$2;
+ if [ $opt -eq '0' ]; then
+ $CLI volume status $vol | grep "brick0" | awk '{print $3}';
+ else
+ $CLI volume status $vol detail | grep "^Port " | awk '{print $3}';
+ fi
+}
+
+function xml_port_field()
+{
+ local vol=$1;
+ local opt=$2;
+ $CLI --xml volume status $vol $opt | tr -d '\n' |\
+#Find the first occurrence of the string between <port> and </port>
+ sed -r 's/<port>/&\n/;s/<\/port>/\n&/;s/^.*\n(.*)\n.*$/\1/'| \
+ grep -v xml | tr -d '\n';
+}
+
+TEST $CLI volume status $V0;
+TEST $CLI volume status $V0 detail;
+TEST $CLI --xml volume status $V0;
+TEST $CLI --xml volume status $V0 detail;
+
+# Kill the brick process. After this, port number for the killed (in this case brick) process must be "N/A".
+kill `cat /var/lib/glusterd/vols/$V0/run/$H0-d-backends-brick0.pid`
+
+EXPECT "N/A" port_field $V0 '0'; # volume status
+EXPECT "N/A" port_field $V0 '1'; # volume status detail
+
+EXPECT "N/A" xml_port_field $V0 '';
+EXPECT "N/A" xml_port_field $V0 'detail';
+
+cleanup;
diff --git a/tests/bugs/bug-862834.t b/tests/bugs/bug-862834.t
new file mode 100755
index 000000000..33aaea1a8
--- /dev/null
+++ b/tests/bugs/bug-862834.t
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+V1="patchy2"
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+
+function check_brick()
+{
+ vol=$1;
+ num=$2
+ $CLI volume info $V0 | grep "Brick$num" | awk '{print $2}';
+}
+
+function volinfo_field()
+{
+ local vol=$1;
+ local field=$2;
+
+ $CLI volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+function brick_count()
+{
+ local vol=$1;
+
+ $CLI volume info $vol | egrep "^Brick[0-9]+: " | wc -l;
+}
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '2' brick_count $V0
+
+
+EXPECT "$H0:$B0/${V0}1" check_brick $V0 '1';
+EXPECT "$H0:$B0/${V0}2" check_brick $V0 '2';
+
+TEST ! $CLI volume create $V1 $H0:$B0/${V1}0 $H0:$B0/${V0}1;
+
+cleanup;
diff --git a/tests/bugs/bug-862967.t b/tests/bugs/bug-862967.t
new file mode 100644
index 000000000..00fa88440
--- /dev/null
+++ b/tests/bugs/bug-862967.t
@@ -0,0 +1,59 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+function uid_gid_compare()
+{
+ val=1
+
+ if [ "$1" == "$3" ]
+ then
+ if [ "$2" == "$4" ]
+ then
+ val=0
+ fi
+ fi
+ echo "$val"
+}
+
+BRICK_COUNT=3
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1 $H0:$B0/${V0}2
+TEST $CLI volume set $V0 stat-prefetch off
+TEST $CLI volume start $V0
+
+## Mount FUSE
+TEST glusterfs --attribute-timeout=0 --entry-timeout=0 --gid-timeout=-1 -s $H0 --volfile-id $V0 $M0;
+
+# change dir permissions
+mkdir $M0/dir;
+chown 1:1 $M0/dir;
+
+# Kill a brick process
+
+kill -9 `cat /var/lib/glusterd/vols/$V0/run/$H0-d-backends-${V0}1.pid`;
+# change dir ownership
+NEW_UID=36;
+NEW_GID=36;
+chown $NEW_UID:$NEW_GID $M0/dir;
+
+# bring the brick back up
+TEST $CLI volume start $V0 force
+
+sleep 10;
+
+ls -l $M0/dir;
+
+# check if uid/gid is healed on backend brick which was taken down
+BACKEND_UID=`stat --printf=%u $B0/${V0}1/dir`;
+BACKEND_GID=`stat --printf=%g $B0/${V0}1/dir`;
+
+
+EXPECT "0" uid_gid_compare $NEW_UID $NEW_GID $BACKEND_UID $BACKEND_GID
+
+cleanup;
diff --git a/tests/bugs/bug-863068.t b/tests/bugs/bug-863068.t
new file mode 100644
index 000000000..931aad623
--- /dev/null
+++ b/tests/bugs/bug-863068.t
@@ -0,0 +1,76 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+## This function get the No. of entries for
+## gluster volume heal volnmae info healed command for brick1 and brick2
+## and compare the initial value (Before volume heal full) and final value
+## (After gluster volume heal vol full) and compare.
+
+function getdiff()
+{
+ val=10
+ if [ "$1" == "$3" ]
+ then
+ if [ "$2" == "$4" ]
+ then
+ val=0
+ else
+ val=20
+ fi
+ fi
+
+ echo $val
+}
+
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick1 $H0:$B0/brick2;
+TEST $CLI volume start $V0;
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0;
+B0_hiphenated=`echo $B0 | tr '/' '-'`
+kill -9 `cat /var/lib/glusterd/vols/$V0/run/$H0$B0_hiphenated-brick1.pid` ;
+
+mkdir $M0/{a,b,c};
+echo "GLUSTERFS" >> $M0/a/file;
+
+TEST $CLI volume start $V0 force;
+sleep 5
+TEST $CLI volume heal $V0 full;
+sleep 5
+
+##First Brick Initial(Before full type self heal) value
+FBI=`gluster volume heal $V0 info healed | grep entries | awk '{print $4}' | head -n 1`
+
+##Second Brick Initial Value
+SBI=`gluster volume heal $V0 info healed | grep entries | awk '{print $4}' | tail -n 1`
+TEST $CLI volume heal $V0 full;
+
+sleep 5
+
+##First Brick Final value
+##Number of entries from output of <gluster volume heal volname info healed>
+
+FBF=`gluster volume heal $V0 info healed | grep entries | awk '{print $4}' | head -n 1`
+
+##Second Brick Final Value
+SBF=`gluster volume heal $V0 info healed | grep entries | awk '{print $4}' | tail -n 1`
+
+##get the difference of values
+EXPECT "0" getdiff $FBI $SBI $FBF $SBF;
+
+## Tests after this comment checks for the background self heal
+
+TEST mkdir $M0/d
+kill -9 `cat /var/lib/glusterd/vols/$V0/run/$H0$B0_hiphenated-brick1.pid` ;
+TEST $CLI volume set $V0 self-heal-daemon off
+dd if=/dev/random of=$M0/d/file1 bs=100M count=1 2>/dev/null;
+TEST $CLI volume start $V0 force
+sleep 3
+TEST ls -l $M0/d
+
+cleanup;
diff --git a/tests/bugs/bug-864222.t b/tests/bugs/bug-864222.t
new file mode 100755
index 000000000..6e02ab60b
--- /dev/null
+++ b/tests/bugs/bug-864222.t
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/brick0
+TEST $CLI volume start $V0
+
+sleep 5
+
+TEST mount -t nfs -o vers=3,nolock $H0:/$V0 $N0
+cd $N0
+
+TEST ls
+
+TEST $CLI volume set $V0 nfs.enable-ino32 on
+# Main test. This should pass.
+TEST ls
+
+cd
+TEST umount $N0
+cleanup
+
diff --git a/tests/bugs/bug-865825.t b/tests/bugs/bug-865825.t
new file mode 100755
index 000000000..6bb1c2348
--- /dev/null
+++ b/tests/bugs/bug-865825.t
@@ -0,0 +1,76 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+## Start and create a volume
+mkdir -p ${B0}/${V0}-0
+mkdir -p ${B0}/${V0}-1
+mkdir -p ${B0}/${V0}-2
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}-{0,1,2}
+
+function volinfo_field()
+{
+ local vol=$1;
+ local field=$2;
+
+ $CLI volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+
+## Verify volume is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Make sure io-cache and write-behind don't interfere.
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+TEST $CLI volume set $V0 performance.io-cache off;
+TEST $CLI volume set $V0 performance.write-behind off;
+TEST $CLI volume set $V0 performance.stat-prefetch off
+
+## Make sure automatic self-heal doesn't perturb our results.
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Mount native
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+
+## Create a file with some recognizable contents.
+echo "test_data" > $M0/a_file;
+
+## Unmount.
+TEST umount $M0;
+
+## Mess with the flags as though brick-0 accuses brick-2 while brick-1 is
+## missing its brick-2 changelog altogether.
+value=0x000000010000000000000000
+setfattr -n trusted.afr.${V0}-client-2 -v $value $B0/${V0}-0/a_file
+setfattr -x trusted.afr.${V0}-client-2 $B0/${V0}-1/a_file
+echo "wrong_data" > $B0/${V0}-2/a_file
+
+## Remount and force a self-heal.
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+stat ${M0}/a_file > /dev/null
+
+## Make sure brick 2 now has the correct contents.
+EXPECT "test_data" cat $B0/${V0}-2/a_file
+
+if [ "$EXIT_EARLY" = "1" ]; then
+ exit 0;
+fi
+
+## Finish up
+TEST umount $M0;
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/bug-866459.t b/tests/bugs/bug-866459.t
new file mode 100644
index 000000000..d66f70c69
--- /dev/null
+++ b/tests/bugs/bug-866459.t
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+
+## Start and create a volume
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+## Create and start a volume with aio enabled
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2};
+TEST $CLI volume set $V0 linux-aio on
+TEST $CLI volume set $V0 background-self-heal-count 0
+TEST $CLI volume set $V0 performance.stat-prefetch off;
+TEST $CLI volume start $V0
+
+## Mount FUSE with caching disabled
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0;
+
+dd of=$M0/a if=/dev/urandom bs=1M count=1 2>&1 > /dev/null
+B0_hiphenated=`echo $B0 | tr '/' '-'`
+## Bring a brick down
+kill -9 `cat /var/lib/glusterd/vols/$V0/run/$H0$B0_hiphenated-${V0}1.pid`
+EXPECT '1' echo `pgrep glusterfsd | wc -l`
+## Rewrite the file
+dd of=$M0/a if=/dev/urandom bs=1M count=1 2>&1 > /dev/null
+TEST $CLI volume start $V0 force
+## Wait for the brick to give CHILD_UP in client protocol
+sleep 5
+md5offile2=`md5sum $B0/${V0}2/a | awk '{print $1}'`
+
+##trigger self-heal
+ls -l $M0/a
+
+EXPECT "$md5offile2" echo `md5sum $B0/${V0}1/a | awk '{print $1}'`
+
+## Finish up
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/bugs/bug-867252.t b/tests/bugs/bug-867252.t
new file mode 100644
index 000000000..8309ed9b9
--- /dev/null
+++ b/tests/bugs/bug-867252.t
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}1;
+
+
+function volinfo_field()
+{
+ local vol=$1;
+ local field=$2;
+
+ $CLI volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+
+function brick_count()
+{
+ local vol=$1;
+
+ $CLI volume info $vol | egrep "^Brick[0-9]+: " | wc -l;
+}
+
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '1' brick_count $V0
+
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}2;
+EXPECT '2' brick_count $V0
+
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2;
+EXPECT '1' brick_count $V0
+
+cleanup;
diff --git a/tests/bugs/bug-867253.t b/tests/bugs/bug-867253.t
new file mode 100644
index 000000000..ae4e243af
--- /dev/null
+++ b/tests/bugs/bug-867253.t
@@ -0,0 +1,59 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+function file_count()
+{
+ val=1
+
+ if [ "$1" == "0" ]
+ then
+ if [ "$2" == "0" ]
+ then
+ val=0
+ fi
+ fi
+ echo $val
+}
+
+BRICK_COUNT=2
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+TEST $CLI volume start $V0
+
+sleep 5;
+## Mount nfs, with nocache option
+TEST mount -o vers=3,nolock,noac -t nfs $H0:/$V0 $M0;
+
+touch $M0/files{1..1000};
+
+# Kill a brick process
+kill -9 `cat /var/lib/glusterd/vols/$V0/run/$H0-d-backends-${V0}0.pid`;
+
+echo 3 >/proc/sys/vm/drop_caches;
+
+ls -l $M0 >/dev/null;
+
+NEW_FILE_COUNT=`echo $?`;
+
+TEST $CLI volume start $V0 force
+
+# Kill a brick process
+kill -9 `cat /var/lib/glusterd/vols/$V0/run/$H0-d-backends-${V0}1.pid`;
+
+echo 3 >/proc/sys/vm/drop_caches;
+
+ls -l $M0 >/dev/null;
+
+NEW_FILE_COUNT1=`echo $?`;
+
+EXPECT "0" file_count $NEW_FILE_COUNT $NEW_FILE_COUNT1
+
+TEST umount -l $M0
+
+cleanup
diff --git a/tests/bugs/bug-869724.t b/tests/bugs/bug-869724.t
new file mode 100644
index 000000000..eec5d344c
--- /dev/null
+++ b/tests/bugs/bug-869724.t
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+
+## Start and create a volume
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}1;
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+
+## Make volume tightly consistent for metdata
+TEST $CLI volume set $V0 performance.stat-prefetch off;
+
+## Mount FUSE with caching disabled
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0;
+
+touch $M0/test;
+build_tester $(dirname $0)/getlk_owner.c
+
+TEST $(dirname $0)/getlk_owner $M0/test;
+
+rm -f $(dirname $0)/getlk_owner
+cleanup;
+
diff --git a/tests/bugs/bug-872923.t b/tests/bugs/bug-872923.t
new file mode 100755
index 000000000..6757846dc
--- /dev/null
+++ b/tests/bugs/bug-872923.t
@@ -0,0 +1,57 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick0 $H0:$B0/brick1
+TEST $CLI volume start $V0
+sleep 5
+
+mount -t nfs -o vers=3,nolock `hostname`:/$V0 $N0
+
+cd $N0
+mkdir test_hardlink_self_heal;
+cd test_hardlink_self_heal;
+
+for i in `seq 1 5`;
+do
+ mkdir dir.$i;
+ for j in `seq 1 10`;
+ do
+ dd if=/dev/zero of=dir.$i/file.$j bs=1k count=$j > /dev/null 2>&1;
+ done;
+done;
+
+cd ..
+kill `cat /var/lib/glusterd/vols/$V0/run/$H0-d-backends-brick0.pid`
+sleep 2
+
+
+cd test_hardlink_self_heal;
+
+RET=0
+for i in `seq 1 5`;
+do
+ for j in `seq 1 10`;
+ do
+ ln dir.$i/file.$j dir.$i/link_file.$j > /dev/null 2>&1;
+ RET=$?
+ if [ $RET -ne 0 ]; then
+ break;
+ fi
+ done ;
+ if [ $RET -ne 0 ]; then
+ break;
+ fi
+done;
+
+cd
+umount $N0
+
+EXPECT "0" echo $RET;
+
+cleanup;
diff --git a/tests/bugs/bug-873367.t b/tests/bugs/bug-873367.t
new file mode 100755
index 000000000..cfbbc98d0
--- /dev/null
+++ b/tests/bugs/bug-873367.t
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+SSL_BASE=/etc/ssl
+SSL_KEY=$SSL_BASE/glusterfs.key
+SSL_CERT=$SSL_BASE/glusterfs.pem
+SSL_CA=$SSL_BASE/glusterfs.ca
+
+cleanup;
+rm -f $SSL_BASE/glusterfs.*
+mkdir -p $B0/1
+mkdir -p $M0
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST openssl genrsa -out $SSL_KEY 1024
+TEST openssl req -new -x509 -key $SSL_KEY -subj /CN=Anyone -out $SSL_CERT
+ln $SSL_CERT $SSL_CA
+
+TEST $CLI volume create $V0 $H0:$B0/1
+TEST $CLI volume set $V0 server.ssl on
+TEST $CLI volume set $V0 client.ssl on
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+echo some_data > $M0/data_file
+TEST umount $M0
+
+# If the bug is not fixed, the next mount will fail.
+
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+EXPECT some_data cat $M0/data_file
+
+TEST umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/bugs/bug-873549.t b/tests/bugs/bug-873549.t
new file mode 100644
index 000000000..5b541de6c
--- /dev/null
+++ b/tests/bugs/bug-873549.t
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+TEST glusterd -LDEBUG;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+
+TEST $CLI volume set $V0 performance.cache-size 512MB
+TEST $CLI volume start $V0
+TEST $CLI volume statedump $V0 all
+
+cleanup;
diff --git a/tests/bugs/bug-873962-spb.t b/tests/bugs/bug-873962-spb.t
new file mode 100644
index 000000000..62a8318ed
--- /dev/null
+++ b/tests/bugs/bug-873962-spb.t
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $M0 --direct-io-mode=enable
+touch $M0/a
+
+exec 5<$M0/a
+
+kill_brick $V0 $H0 $B0/${V0}0
+echo "hi" > $M0/a
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN 20 "1" afr_child_up_status $V0 0
+
+kill_brick $V0 $H0 $B0/${V0}1
+echo "bye" > $M0/a
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN 20 "1" afr_child_up_status $V0 1
+
+TEST ! cat $M0/a #To mark split-brain
+
+TEST ! read -u 5 line
+exec 5<&-
+
+cleanup;
diff --git a/tests/bugs/bug-873962.t b/tests/bugs/bug-873962.t
new file mode 100755
index 000000000..b245cc3da
--- /dev/null
+++ b/tests/bugs/bug-873962.t
@@ -0,0 +1,108 @@
+#!/bin/bash
+
+#AFR TEST-IDENTIFIER SPLIT-BRAIN
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+B0_hiphenated=`echo $B0 | tr '/' '-'`
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2}
+
+# If we allow self-heal to happen in the background, we'll get spurious
+# failures - especially at the point labeled "FAIL HERE" but
+# occasionally elsewhere. This behavior is very timing-dependent. It
+# doesn't show up in Jenkins, but it does on JD's and KP's machines, and
+# it got sharply worse because of an unrelated fsync change (6ae6f3d)
+# which changed timing. Putting anything at the FAIL HERE marker tends
+# to make it go away most of the time on affected machines, even if the
+# "anything" is unrelated.
+#
+# What's going on is that the I/O on the first mountpoint is allowed to
+# complete even though self-heal is still in progress and the state on
+# disk does not reflect its result. In fact, the state changes during
+# self-heal create the appearance of split brain when the second I/O
+# comes in, so that fails even though we haven't actually been in split
+# brain since the manual xattr operations. By disallowing background
+# self-heal, we ensure that the second I/O can't happen before self-heal
+# is complete, because it has to follow the first I/O which now has to
+# follow self-heal.
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+
+#Make sure self-heal is not triggered when the bricks are re-started
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $M0 --direct-io-mode=enable
+TEST touch $M0/a
+TEST touch $M0/b
+TEST touch $M0/c
+TEST touch $M0/d
+echo "1" > $M0/b
+echo "1" > $M0/d
+TEST kill_brick $V0 $H0 $B0/${V0}2
+echo "1" > $M0/a
+echo "1" > $M0/c
+TEST setfattr -n trusted.mdata -v abc $M0/b
+TEST setfattr -n trusted.mdata -v abc $M0/d
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN 20 "1" afr_child_up_status $V0 1
+TEST kill_brick $V0 $H0 $B0/${V0}1
+echo "2" > $M0/a
+echo "2" > $M0/c
+TEST setfattr -n trusted.mdata -v def $M0/b
+TEST setfattr -n trusted.mdata -v def $M0/d
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN 20 "1" afr_child_up_status $V0 0
+EXPECT_WITHIN 20 "1" afr_child_up_status $V0 1
+
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $M1 --direct-io-mode=enable
+#Files are in split-brain, so open should fail
+TEST ! cat $M0/a;
+TEST ! cat $M1/a;
+TEST ! cat $M0/b;
+TEST ! cat $M1/b;
+
+#Reset split-brain status
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000000 $B0/${V0}1/a;
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000000 $B0/${V0}1/b;
+
+#The operations should do self-heal and give correct output
+EXPECT "2" cat $M0/a;
+# FAIL HERE - see comment about cluster.self-heal-background-count above.
+EXPECT "2" cat $M1/a;
+EXPECT "def" getfattr -n trusted.mdata --only-values $M0/b 2>/dev/null
+EXPECT "def" getfattr -n trusted.mdata --only-values $M1/b 2>/dev/null
+
+TEST umount $M0
+TEST umount $M1
+
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $M0 --direct-io-mode=enable
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $M1 --direct-io-mode=enable
+
+#Files are in split-brain, so open should fail
+TEST ! cat $M0/c
+TEST ! cat $M1/c
+TEST ! cat $M0/d
+TEST ! cat $M1/d
+
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000000 $B0/${V0}1/c
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000000 $B0/${V0}1/d
+
+#The operations should NOT do self-heal but give correct output
+EXPECT "2" cat $M0/c
+EXPECT "2" cat $M1/c
+EXPECT "1" cat $M0/d
+EXPECT "1" cat $M1/d
+
+#Check that the self-heal is not triggered.
+EXPECT "1" cat $B0/${V0}1/c
+EXPECT "abc" getfattr -n trusted.mdata --only-values $B0/${V0}1/d 2>/dev/null
+cleanup;
diff --git a/tests/bugs/bug-874498.t b/tests/bugs/bug-874498.t
new file mode 100644
index 000000000..0b5991011
--- /dev/null
+++ b/tests/bugs/bug-874498.t
@@ -0,0 +1,61 @@
+#!/bin/bash
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../afr.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick1 $H0:$B0/brick2;
+TEST $CLI volume start $V0;
+
+
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0;
+B0_hiphenated=`echo $B0 | tr '/' '-'`
+kill -9 `cat /var/lib/glusterd/vols/$V0/run/$H0$B0_hiphenated-brick1.pid` ;
+
+echo "GLUSTER FILE SYSTEM" > $M0/FILE1
+echo "GLUSTER FILE SYSTEM" > $M0/FILE2
+
+FILEN=$B0"/brick2"
+XATTROP=$FILEN/.glusterfs/indices/xattrop
+
+function get_gfid()
+{
+path_of_file=$1
+
+gfid_value=`getfattr -d -m . $path_of_file -e hex 2>/dev/null | grep trusted.gfid | cut --complement -c -15 | sed 's/\([a-f0-9]\{8\}\)\([a-f0-9]\{4\}\)\([a-f0-9]\{4\}\)\([a-f0-9]\{4\}\)/\1-\2-\3-\4-/'`
+
+echo $gfid_value
+}
+
+GFID_ROOT=`get_gfid $B0/brick2`
+GFID_FILE1=`get_gfid $B0/brick2/FILE1`
+GFID_FILE2=`get_gfid $B0/brick2/FILE2`
+
+
+count=0
+for i in `ls $XATTROP`
+do
+ if [ "$i" == "$GFID_ROOT" ] || [ "$i" == "$GFID_FILE1" ] || [ "$i" == "$GFID_FILE2" ]
+ then
+ count=$(( count + 1 ))
+ fi
+done
+
+EXPECT "3" echo $count
+
+
+TEST $CLI volume start $V0 force
+sleep 5
+TEST $CLI volume heal $V0
+
+
+##Expected number of entries are 0 in the .glusterfs/indices/xattrop directory
+EXPECT_WITHIN 60 '0' count_sh_entries $FILEN;
+
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/bugs/bug-877293.t b/tests/bugs/bug-877293.t
new file mode 100755
index 000000000..774c2a0cc
--- /dev/null
+++ b/tests/bugs/bug-877293.t
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+TEST glusterd
+TEST pidof glusterd
+
+## Start and create a replicated volume
+mkdir -p ${B0}/${V0}-0
+mkdir -p ${B0}/${V0}-1
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}-{0,1}
+
+TEST $CLI volume set $V0 indexing on
+
+TEST $CLI volume start $V0;
+
+## Mount native
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+
+## Mount client-pid=-1
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 --client-pid=-1 $M1
+
+TEST touch $M0
+
+vol_uuid=`getfattr -n trusted.glusterfs.volume-mark -ehex $M1 | sed -n 's/^trusted.glusterfs.volume-mark=0x//p' | cut -b5-36 | sed 's/\([a-f0-9]\{8\}\)\([a-f0-9]\{4\}\)\([a-f0-9]\{4\}\)\([a-f0-9]\{4\}\)/\1-\2-\3-\4-/'`
+xtime=trusted.glusterfs.$vol_uuid.xtime
+
+TEST "getfattr -n $xtime $M1 | grep -q ${xtime}="
+
+TEST kill_brick $V0 $H0 $B0/${V0}-0
+
+TEST "getfattr -n $xtime $M1 | grep -q ${xtime}="
+
+TEST umount $M0
+TEST umount $M1
+
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+cleanup
diff --git a/tests/bugs/bug-877885.t b/tests/bugs/bug-877885.t
new file mode 100755
index 000000000..0d4620b00
--- /dev/null
+++ b/tests/bugs/bug-877885.t
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick0 $H0:$B0/brick1
+TEST $CLI volume start $V0
+
+sleep 5
+
+## Mount FUSE with caching disabled
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 \
+$M0;
+
+TEST touch $M0/file
+TEST mkdir $M0/dir
+
+TEST mount -t nfs -o vers=3,nolock $H0:/$V0 $N0
+cd $N0
+
+rm -rf * &
+
+TEST mount -t nfs -o retry=0,nolock,vers=3 $H0:/$V0 $N1;
+
+cd;
+
+kill %1;
+
+TEST umount $N0
+TEST umount $N1;
+
+cleanup
diff --git a/tests/bugs/bug-877992.t b/tests/bugs/bug-877992.t
new file mode 100755
index 000000000..932ecc77b
--- /dev/null
+++ b/tests/bugs/bug-877992.t
@@ -0,0 +1,61 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+
+## Start and create a volume
+TEST glusterd -LDEBUG
+TEST pidof glusterd
+
+
+function volinfo_field()
+{
+ local vol=$1;
+ local field=$2;
+
+ $CLI volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+
+function hooks_prep ()
+{
+ local event=$1
+ touch /tmp/pre.out /tmp/post.out
+ touch /var/lib/glusterd/hooks/1/"$event"/pre/Spre.sh
+ touch /var/lib/glusterd/hooks/1/"$event"/post/Spost.sh
+
+ printf "#! /bin/bash\necho "$event"Pre > /tmp/pre.out\n" > /var/lib/glusterd/hooks/1/"$event"/pre/Spre.sh
+ printf "#! /bin/bash\necho "$event"Post > /tmp/post.out\n" > /var/lib/glusterd/hooks/1/"$event"/post/Spost.sh
+ chmod a+x /var/lib/glusterd/hooks/1/"$event"/pre/Spre.sh
+ chmod a+x /var/lib/glusterd/hooks/1/"$event"/post/Spost.sh
+}
+
+function hooks_cleanup ()
+{
+ local event=$1
+ rm /tmp/pre.out /tmp/post.out
+ rm /var/lib/glusterd/hooks/1/"$event"/pre/Spre.sh
+ rm /var/lib/glusterd/hooks/1/"$event"/post/Spost.sh
+}
+
+## Verify volume is created and its hooks script ran
+hooks_prep 'create'
+TEST $CLI volume create $V0 $H0:$B0/${V0}1;
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT 'createPre' cat /tmp/pre.out;
+EXPECT 'createPost' cat /tmp/post.out;
+hooks_cleanup 'create'
+
+
+## Start volume and verify that its hooks script ran
+hooks_prep 'start'
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+EXPECT 'startPre' cat /tmp/pre.out;
+EXPECT 'startPost' cat /tmp/post.out;
+hooks_cleanup 'start'
+
+cleanup;
diff --git a/tests/bugs/bug-878004.t b/tests/bugs/bug-878004.t
new file mode 100644
index 000000000..5bee4c62f
--- /dev/null
+++ b/tests/bugs/bug-878004.t
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}2 $H0:$B0/${V0}3;
+
+function brick_count()
+{
+ local vol=$1;
+
+ $CLI volume info $vol | egrep "^Brick[0-9]+: " | wc -l;
+}
+
+
+TEST $CLI volume start $V0
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2;
+EXPECT '2' brick_count $V0
+
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}3;
+EXPECT '1' brick_count $V0
+
+cleanup;
+
diff --git a/tests/bugs/bug-879490.t b/tests/bugs/bug-879490.t
new file mode 100755
index 000000000..5b9ae7bb9
--- /dev/null
+++ b/tests/bugs/bug-879490.t
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 stripe 2 $H0:$B0/${V0}{1,2,3,4,5,6,7,8};
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+function peer_probe()
+{
+ $CLI peer probe a.b.c.d --xml | xmllint --format - | grep "<opErrstr>"
+}
+
+EXPECT " <opErrstr>Probe returned with unknown errno 107</opErrstr>" peer_probe
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/bug-879494.t b/tests/bugs/bug-879494.t
new file mode 100755
index 000000000..5caca7922
--- /dev/null
+++ b/tests/bugs/bug-879494.t
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 stripe 2 $H0:$B0/${V0}{1,2,3,4,5,6,7,8};
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+function peer_probe()
+{
+ $CLI peer detach a.b.c.d --xml | xmllint --format - | grep "<opErrstr>"
+}
+
+EXPECT " <opErrstr>a.b.c.d is not part of cluster</opErrstr>" peer_probe
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/bug-880898.t b/tests/bugs/bug-880898.t
new file mode 100644
index 000000000..a069d4a8a
--- /dev/null
+++ b/tests/bugs/bug-880898.t
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick1 $H0:$B0/brick2
+TEST $CLI volume start $V0
+pkill glusterfs
+uuid=""
+for line in $(cat /var/lib/glusterd/glusterd.info)
+do
+ if [[ $line == UUID* ]]
+ then
+ uuid=`echo $line | sed -r 's/^.{5}//'`
+ fi
+done
+
+gluster volume heal $V0 info | grep "Status: self-heal-daemon is not running on $uuid";
+EXPECT "0" echo $?
+
+cleanup;
diff --git a/tests/bugs/bug-882278.t b/tests/bugs/bug-882278.t
new file mode 100755
index 000000000..7933e1863
--- /dev/null
+++ b/tests/bugs/bug-882278.t
@@ -0,0 +1,72 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+cleanup
+
+# Is there a good reason to require --fqdn elsewhere? It's worse than useless
+# here.
+H0=$(hostname -s)
+
+function recreate {
+ # The rm is necessary so we don't get fooled by leftovers from old runs.
+ rm -rf $1 && mkdir -p $1
+}
+
+function count_lines {
+ grep "$1" $2/* | wc -l
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+## Start and create a volume
+TEST recreate ${B0}/${V0}-0
+TEST recreate ${B0}/${V0}-1
+TEST $CLI volume create $V0 $H0:$B0/${V0}-{0,1}
+TEST $CLI volume set $V0 cluster.nufa on
+
+function volinfo_field()
+{
+ local vol=$1;
+ local field=$2;
+
+ $CLI volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+
+## Verify volume is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Mount native
+special_option="--xlator-option ${V0}-dht.local-volume-name=${V0}-client-1"
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $special_option $M0
+
+## Create a bunch of test files.
+for i in $(seq 0 99); do
+ echo hello > $(printf $M0/file%02d $i)
+done
+
+## Make sure the files went to the right place. There might be link files in
+## the other brick, but they won't have any contents.
+EXPECT "0" count_lines hello ${B0}/${V0}-0
+EXPECT "100" count_lines hello ${B0}/${V0}-1
+
+if [ "$EXIT_EARLY" = "1" ]; then
+ exit 0;
+fi
+
+## Finish up
+TEST umount $M0;
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/bug-884328.t b/tests/bugs/bug-884328.t
new file mode 100644
index 000000000..ee5509bbc
--- /dev/null
+++ b/tests/bugs/bug-884328.t
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+
+TEST check_option_help_presence "cluster.quorum-type"
+TEST check_option_help_presence "cluster.quorum-count"
+cleanup;
diff --git a/tests/bugs/bug-884452.t b/tests/bugs/bug-884452.t
new file mode 100644
index 000000000..d07651e46
--- /dev/null
+++ b/tests/bugs/bug-884452.t
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0
+TEST $CLI volume start $V0
+
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+TEST touch $M0/{1..10000}
+
+RUN_LS_LOOP_FILE="$M0/run-ls-loop"
+function ls-loop
+{
+ while [ -f $RUN_LS_LOOP_FILE ]; do
+ ls -lR $M0 1>/dev/null 2>&1
+ done;
+}
+
+touch $RUN_LS_LOOP_FILE
+ls-loop &
+
+function vol-status-loop
+{
+ for i in {1..1000}; do
+ $CLI volume status $V0 clients >/dev/null 2>&1
+ if [ $? -ne 0 ]; then
+ return 1
+ fi
+ done;
+
+ return 0
+}
+
+TEST vol-status-loop
+
+rm -f $RUN_LS_LOOP_FILE
+wait
+
+TEST umount $M0
+
+cleanup;
diff --git a/tests/bugs/bug-884455.t b/tests/bugs/bug-884455.t
new file mode 100755
index 000000000..3b3a2241e
--- /dev/null
+++ b/tests/bugs/bug-884455.t
@@ -0,0 +1,84 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../dht.rc
+
+cleanup;
+
+function layout_compare()
+{
+ res=0
+
+ if [ "$1" == "$2" ]
+ then
+ res=1
+ fi
+ if [ "$1" == "$3" ]
+ then
+ res=1
+ fi
+ if [ "$2" == "$3" ]
+ then
+ res=1
+ fi
+
+ echo $res
+}
+
+function get_layout()
+{
+ layout1=`getfattr -n trusted.glusterfs.dht -e hex $1 2>&1|grep dht |cut -d = -f2`
+ layout2=`getfattr -n trusted.glusterfs.dht -e hex $2 2>&1|grep dht |cut -d = -f2`
+ layout3=`getfattr -n trusted.glusterfs.dht -e hex $3 2>&1|grep dht |cut -d = -f2`
+
+ ret=$(layout_compare $layout1 $layout2 $layout3)
+
+ if [ $ret -ne 0 ]
+ then
+ echo 1
+ else
+ echo 0
+ fi
+
+}
+
+BRICK_COUNT=3
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+## set subvols-per-dir option
+TEST $CLI volume set $V0 subvols-per-directory 2
+TEST $CLI volume start $V0
+
+## Mount FUSE
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+
+TEST mkdir $M0/dir{1..10} 2>/dev/null;
+
+## Add-brick n run rebalance to force re-write of layout
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}2
+sleep 5;
+
+## trigger dir self heal on client
+TEST ls -l $M0 2>/dev/null;
+
+TEST $CLI volume rebalance $V0 start force
+
+EXPECT_WITHIN 30 "0" rebalance_completed
+
+## check for layout overlaps.
+EXPECT "0" get_layout $B0/${V0}0 $B0/${V0}1 $B0/${V0}2
+EXPECT "0" get_layout $B0/${V0}0/dir1 $B0/${V0}1/dir1 $B0/${V0}2/dir1
+EXPECT "0" get_layout $B0/${V0}0/dir2 $B0/${V0}1/dir2 $B0/${V0}2/dir2
+EXPECT "0" get_layout $B0/${V0}0/dir3 $B0/${V0}1/dir3 $B0/${V0}2/dir3
+EXPECT "0" get_layout $B0/${V0}0/dir4 $B0/${V0}1/dir4 $B0/${V0}2/dir4
+EXPECT "0" get_layout $B0/${V0}0/dir5 $B0/${V0}1/dir5 $B0/${V0}2/dir5
+EXPECT "0" get_layout $B0/${V0}0/dir6 $B0/${V0}1/dir6 $B0/${V0}2/dir6
+EXPECT "0" get_layout $B0/${V0}0/dir7 $B0/${V0}1/dir7 $B0/${V0}2/dir7
+EXPECT "0" get_layout $B0/${V0}0/dir8 $B0/${V0}1/dir8 $B0/${V0}2/dir8
+EXPECT "0" get_layout $B0/${V0}0/dir9 $B0/${V0}1/dir9 $B0/${V0}2/dir9
+EXPECT "0" get_layout $B0/${V0}0/dir10 $B0/${V0}1/dir10 $B0/${V0}2/dir10
+
+cleanup;
diff --git a/tests/bugs/bug-884597.t b/tests/bugs/bug-884597.t
new file mode 100755
index 000000000..8eb1f330b
--- /dev/null
+++ b/tests/bugs/bug-884597.t
@@ -0,0 +1,152 @@
+#!/bin/bash
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../dht.rc
+
+cleanup;
+BRICK_COUNT=3
+function uid_gid_compare()
+{
+ val=1
+
+ if [ "$1" == "$3" ]
+ then
+ if [ "$2" == "$4" ]
+ then
+ val=0
+ fi
+ fi
+ echo "$val"
+}
+
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1 $H0:$B0/${V0}2
+TEST $CLI volume start $V0
+
+## Mount FUSE
+TEST glusterfs --attribute-timeout=0 --entry-timeout=0 -s $H0 --volfile-id $V0 $M0;
+
+i=1
+NEW_UID=36
+NEW_GID=36
+
+TEST touch $M0/$i
+
+chown $NEW_UID:$NEW_GID $M0/$i
+## rename till file gets a linkfile
+
+while [ $i -ne 0 ]
+do
+ TEST mv $M0/$i $M0/$(( $i+1 ))
+ let i++
+ file_has_linkfile $i
+ has_link=$?
+ if [ $has_link -eq 2 ]
+ then
+ break;
+ fi
+done
+
+get_hashed_brick $i
+cached=$?
+
+# check if uid/gid on linkfile is created with correct uid/gid
+BACKEND_UID=`stat --printf=%u $B0/${V0}$cached/$i`;
+BACKEND_GID=`stat --printf=%g $B0/${V0}$cached/$i`;
+
+EXPECT "0" uid_gid_compare $NEW_UID $NEW_GID $BACKEND_UID $BACKEND_GID
+
+# remove linkfile from backend, and trigger a lookup heal. uid/gid should match
+rm -rf $B0/${V0}$cached/$i
+
+# without a unmount, we are not able to trigger a lookup based heal
+
+TEST umount $M0
+
+## Mount FUSE
+TEST glusterfs --attribute-timeout=0 --entry-timeout=0 -s $H0 --volfile-id $V0 $M0;
+
+lookup=`ls -l $M0/$i 2>/dev/null`
+
+# check if uid/gid on linkfile is created with correct uid/gid
+BACKEND_UID=`stat --printf=%u $B0/${V0}$cached/$i`;
+BACKEND_GID=`stat --printf=%g $B0/${V0}$cached/$i`;
+
+EXPECT "0" uid_gid_compare $NEW_UID $NEW_GID $BACKEND_UID $BACKEND_GID
+# create hardlinks. Make sure a linkfile gets created
+
+i=1
+NEW_UID=36
+NEW_GID=36
+
+TEST touch $M0/file
+chown $NEW_UID:$NEW_GID $M0/file;
+
+## ln till file gets a linkfile
+
+while [ $i -ne 0 ]
+do
+ TEST ln $M0/file $M0/link$i
+
+ file_has_linkfile link$i
+ has_link=$?
+ if [ $has_link -eq 2 ]
+ then
+ break;
+ fi
+ let i++
+done
+
+get_hashed_brick link$i
+cached=$?
+
+# check if uid/gid on linkfile is created with correct uid/gid
+BACKEND_UID=`stat --printf=%u $B0/${V0}$cached/link$i`;
+BACKEND_GID=`stat --printf=%g $B0/${V0}$cached/link$i`;
+
+EXPECT "0" uid_gid_compare $NEW_UID $NEW_GID $BACKEND_UID $BACKEND_GID
+
+## UID/GID creation as different user
+i=1
+NEW_UID=36
+NEW_GID=36
+
+TEST touch $M0/user_file1
+TEST chown $NEW_UID:$NEW_GID $M0/user_file1;
+
+## Give permission on volume, so that different users can perform rename
+
+TEST chmod 0777 $M0
+
+## Add a user known as ABC and perform renames
+TEST `useradd -M ABC 2>/dev/null`
+
+TEST cd $M0
+## rename as different user till file gets a linkfile
+
+while [ $i -ne 0 ]
+do
+ su -c "mv $M0/user_file$i $M0/user_file$(( $i+1 ))" ABC
+ let i++
+ file_has_linkfile user_file$i
+ has_link=$?
+ if [ $has_link -eq 2 ]
+ then
+ break;
+ fi
+done
+
+## del user ABC
+TEST userdel ABC
+
+get_hashed_brick user_file$i
+cached=$?
+
+# check if uid/gid on linkfile is created with correct uid/gid
+BACKEND_UID=`stat --printf=%u $B0/${V0}$cached/user_file$i`;
+BACKEND_GID=`stat --printf=%g $B0/${V0}$cached/user_file$i`;
+
+EXPECT "0" uid_gid_compare $NEW_UID $NEW_GID $BACKEND_UID $BACKEND_GID
+cleanup;
diff --git a/tests/bugs/bug-886998.t b/tests/bugs/bug-886998.t
new file mode 100644
index 000000000..7a905a113
--- /dev/null
+++ b/tests/bugs/bug-886998.t
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+# This tests that the replicate trash directory(.landfill) has following
+# properties.
+# Note: This is to have backward compatibility with 3.3 glusterfs
+# In the latest releases this dir is present inside .glusterfs of brick.
+# 1) lookup of trash dir fails
+# 2) readdir does not show this directory
+# 3) Self-heal does not do any self-heal of these directories.
+gfid1="0xc2e75dde97f346e7842d1076a8e699f8"
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 --direct-io-mode=enable
+
+TEST mkdir $B0/${V0}1/.landfill
+TEST setfattr -n trusted.gfid -v $gfid1 $B0/${V0}1/.landfill
+TEST mkdir $B0/${V0}0/.landfill
+TEST setfattr -n trusted.gfid -v $gfid1 $B0/${V0}0/.landfill
+
+TEST ! stat $M0/.landfill
+EXPECT "" echo $(ls -a $M0 | grep ".landfill")
+
+TEST rmdir $B0/${V0}0/.landfill
+#Force a conservative merge and it should not create .landfill
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000000 $B0/${V0}0/
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}0/
+
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}1/
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000000 $B0/${V0}1/
+
+EXPECT "" echo $(ls -a $M0 | grep ".landfill")
+TEST ! stat $B0/${V0}0/.landfill
+TEST stat $B0/${V0}1/.landfill
+
+#TEST that the dir is not deleted even when xattrs suggest to delete
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000000 $B0/${V0}0/
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}0/
+
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000000 $B0/${V0}1/
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000000 $B0/${V0}1/
+
+EXPECT "" echo $(ls -a $M0 | grep ".landfill")
+TEST ! stat $B0/${V0}0/.landfill
+TEST stat $B0/${V0}1/.landfill
+cleanup;
diff --git a/tests/bugs/bug-887098-gmount-crash.t b/tests/bugs/bug-887098-gmount-crash.t
new file mode 100644
index 000000000..1998b4062
--- /dev/null
+++ b/tests/bugs/bug-887098-gmount-crash.t
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4};
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+function pidgrep()
+{
+ ps ax | grep "$1" | grep -v grep | awk '{print $1}' | head -1
+}
+
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST glusterfs -s $H0 --volfile-id=$V0 --acl $M0
+MOUNT_PID=`ps ax |grep "glusterfs -s $H0 --volfile-id=$V0 --acl $M0" | grep -v grep | awk '{print $1}' | head -1`
+
+for i in {1..25};
+do
+ mkdir $M0/tmp_$i && cat /etc/hosts > $M0/tmp_$i/file
+ cp -RPp $M0/tmp_$i $M0/newtmp_$i && cat /etc/hosts > $M0/newtmp_$i/newfile
+done
+
+EXPECT "$MOUNT_PID" pidgrep $MOUNT_PID
+TEST rm -rf $M0/*
+umount $M0
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/bugs/bug-887145.t b/tests/bugs/bug-887145.t
new file mode 100755
index 000000000..e2013e50b
--- /dev/null
+++ b/tests/bugs/bug-887145.t
@@ -0,0 +1,89 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../nfs.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2};
+TEST $CLI volume set $V0 performance.open-behind off;
+TEST $CLI volume start $V0
+
+sleep 2;
+## Mount FUSE with caching disabled
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0;
+
+EXPECT_WITHIN 20 "1" is_nfs_export_available;
+
+
+useradd tmp_user 2>/dev/null 1>/dev/null;
+mkdir $M0/dir;
+mkdir $M0/other;
+cp /etc/passwd $M0/;
+cp $M0/passwd $M0/file;
+chmod 600 $M0/file;
+
+TEST mount -t nfs -o vers=3,nolock $H0:/$V0 $N0;
+
+chown -R nfsnobody:nfsnobody $M0/dir;
+chown -R tmp_user:tmp_user $M0/other;
+
+TEST $CLI volume set $V0 server.root-squash on;
+
+sleep 2;
+
+EXPECT_WITHIN 20 "1" is_nfs_export_available;
+
+# create files and directories in the root of the glusterfs and nfs mount
+# which is owned by root and hence the right behavior is getting EACCESS
+# as the fops are executed as nfsnobody.
+touch $M0/foo 2>/dev/null;
+TEST [ $? -ne 0 ]
+touch $N0/foo 2>/dev/null;
+TEST [ $? -ne 0 ]
+mkdir $M0/new 2>/dev/null;
+TEST [ $? -ne 0 ]
+mkdir $N0/new 2>/dev/null;
+TEST [ $? -ne 0 ]
+cp $M0/file $M0/tmp_file 2>/dev/null;
+TEST [ $? -ne 0 ]
+cp $N0/file $N0/tmp_file 2>/dev/null;
+TEST [ $? -ne 0 ]
+cat $M0/file 2>/dev/null;
+TEST [ $? -ne 0 ]
+# here read should be allowed because eventhough file "passwd" is owned
+# by root, the permissions if the file allow other users to read it.
+cat $M0/passwd 1>/dev/null;
+TEST [ $? -eq 0 ]
+cat $N0/passwd 1>/dev/null;
+TEST [ $? -eq 0 ]
+
+# create files and directories should succeed as the fops are being executed
+# inside the directory owned by nfsnobody
+TEST touch $M0/dir/file;
+TEST touch $N0/dir/foo;
+TEST mkdir $M0/dir/new;
+TEST mkdir $N0/dir/other;
+TEST rm -f $M0/dir/file $M0/dir/foo;
+TEST rmdir $N0/dir/*;
+
+# create files and directories here should fail as other directory is owned
+# by tmp_user.
+touch $M0/other/foo 2>/dev/null;
+TEST [ $? -ne 0 ]
+touch $N0/other/foo 2>/dev/null;
+TEST [ $? -ne 0 ]
+mkdir $M0/other/new 2>/dev/null;
+TEST [ $? -ne 0 ]
+mkdir $N0/other/new 2>/dev/null;
+TEST [ $? -ne 0 ]
+
+userdel tmp_user;
+rm -rf /home/tmp_user;
+
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/bugs/bug-888174.t b/tests/bugs/bug-888174.t
new file mode 100644
index 000000000..4ea34645b
--- /dev/null
+++ b/tests/bugs/bug-888174.t
@@ -0,0 +1,65 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+#This tests if flush, fsync wakes up the delayed post-op or not.
+#If it is not woken up, INODELK from the next command waits
+#for post-op-delay secs. There would be pending changelog even after the command
+#completes.
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/r2_0 $H0:$B0/r2_1
+
+TEST $CLI volume set $V0 cluster.eager-lock on
+
+TEST $CLI volume set $V0 performance.flush-behind off
+EXPECT "off" volume_option $V0 performance.flush-behind
+
+TEST $CLI volume set $V0 cluster.post-op-delay-secs 3
+EXPECT "3" volume_option $V0 cluster.post-op-delay-secs
+
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $M0
+
+#Check that INODELK MAX latency is not in the order of seconds
+TEST gluster volume profile $V0 start
+for i in {1..5}
+do
+ echo hi > $M0/a
+done
+#Test if the MAX INODELK fop latency is of the order of seconds.
+inodelk_max_latency=$($CLI volume profile $V0 info | grep INODELK | awk 'BEGIN {max = 0} {if ($6 > max) max=$6;} END {print max}' | cut -d. -f 1 | egrep "[0-9]{7,}")
+
+TEST [ -z $inodelk_max_latency ]
+
+TEST dd of=$M0/a if=/dev/urandom bs=1M count=10 conv=fsync
+#Check for no trace of pending changelog. Flush should make sure of it.
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/r2_0/a trusted.afr.$V0-client-0
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/r2_0/a trusted.afr.$V0-client-1
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/r2_1/a trusted.afr.$V0-client-0
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/r2_1/a trusted.afr.$V0-client-1
+
+dd of=$M0/a if=/dev/urandom bs=1M count=1024 2>/dev/null &
+p=$!
+#trigger graph switches, tests for fsync not leaving any pending flags
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+
+kill -SIGTERM $p
+#wait for dd to exit
+wait > /dev/null 2>&1
+
+#Goal is to check if there is permanent FOOL changelog
+sleep 5
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/r2_0/a trusted.afr.$V0-client-0
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/r2_0/a trusted.afr.$V0-client-1
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/r2_1/a trusted.afr.$V0-client-0
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/r2_1/a trusted.afr.$V0-client-1
+
+cleanup;
diff --git a/tests/bugs/bug-888752.t b/tests/bugs/bug-888752.t
new file mode 100644
index 000000000..56d3f9ffb
--- /dev/null
+++ b/tests/bugs/bug-888752.t
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../cluster.rc
+
+# Check if xml output is generated correctly for volume status for a single brick
+# present on another peer and no async tasks are running.
+
+function get_peer_count {
+ $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+cleanup
+
+TEST launch_cluster 2;
+TEST $CLI_1 peer probe $H2;
+EXPECT_WITHIN 5 1 get_peer_count
+TEST $CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0
+TEST $CLI_1 volume start $V0
+
+TEST $CLI_1 volume status $V0 $H2:$B2/$V0 --xml
+
+TEST $CLI_1 volume stop $V0
+
+cleanup
diff --git a/tests/bugs/bug-889630.t b/tests/bugs/bug-889630.t
new file mode 100755
index 000000000..b04eb3407
--- /dev/null
+++ b/tests/bugs/bug-889630.t
@@ -0,0 +1,56 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../cluster.rc
+
+function check_peers {
+ $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+
+function volume_count {
+ local cli=$1;
+ if [ $cli -eq '1' ] ; then
+ $CLI_1 volume info | grep 'Volume Name' | wc -l;
+ else
+ $CLI_2 volume info | grep 'Volume Name' | wc -l;
+ fi
+}
+
+cleanup;
+
+TEST launch_cluster 2;
+TEST $CLI_1 peer probe $H2;
+
+EXPECT_WITHIN 20 1 check_peers
+
+TEST $CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0
+TEST $CLI_1 volume start $V0
+
+b="B1";
+
+#Create an extra file in the originator's volume store
+touch ${!b}/glusterd/vols/$V0/run/file
+
+TEST $CLI_1 volume stop $V0
+#Test for self-commit failure
+TEST $CLI_1 volume delete $V0
+
+#Check whether delete succeeded on both the nodes
+EXPECT "0" volume_count '1'
+EXPECT "0" volume_count '2'
+
+#Check whether the volume name can be reused after deletion
+TEST $CLI_1 volume create $V0 $H1:$B1/${V0}1 $H2:$B2/${V0}1
+TEST $CLI_1 volume start $V0
+
+#Create an extra file in the peer's volume store
+touch ${!b}/glusterd/vols/$V0/run/file
+
+TEST $CLI_1 volume stop $V0
+#Test for commit failure on the other node
+TEST $CLI_2 volume delete $V0
+
+EXPECT "0" volume_count '1';
+EXPECT "0" volume_count '2';
+
+cleanup;
diff --git a/tests/bugs/bug-889996.t b/tests/bugs/bug-889996.t
new file mode 100644
index 000000000..6b07d8918
--- /dev/null
+++ b/tests/bugs/bug-889996.t
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+
+rm -rf $B0/${V0}1;
+
+TEST ! $CLI volume start $V0;
+EXPECT 0 online_brick_count;
+
+cleanup;
diff --git a/tests/bugs/bug-892730.t b/tests/bugs/bug-892730.t
new file mode 100755
index 000000000..0a677069e
--- /dev/null
+++ b/tests/bugs/bug-892730.t
@@ -0,0 +1,76 @@
+#!/bin/bash
+#
+# Bug 892730 - Verify that afr handles EIO errors from the brick properly.
+#
+# The associated bug describes a problem where EIO errors returned from the
+# local filesystem of a brick that is part of a replica volume are exposed to
+# the user. This test simulates such failures and verifies that the volume
+# operates as expected.
+#
+########
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+TEST mkdir -p $B0/test{1,2}
+
+# The graph is a two brick replica with error-gen enabled on the second brick
+# and configured to return EIO lookup errors 100% of the time. This simulates
+# a brick with a crashed or shut down local filesystem. Note that the order in
+# which errors occur is a factor in reproducing the original bug (error-gen
+# must be enabled in the second brick for this test to be effective).
+
+cat > $B0/test.vol <<EOF
+volume test-posix-0
+ type storage/posix
+ option directory $B0/test1
+end-volume
+
+volume test-locks-0
+ type features/locks
+ subvolumes test-posix-0
+end-volume
+
+volume test-posix-1
+ type storage/posix
+ option directory $B0/test2
+end-volume
+
+volume test-error-1
+ type debug/error-gen
+ option failure 100
+ option enable lookup
+ option error-no EIO
+ subvolumes test-posix-1
+end-volume
+
+volume test-locks-1
+ type features/locks
+ subvolumes test-error-1
+end-volume
+
+volume test-replicate-0
+ type cluster/replicate
+ option background-self-heal-count 0
+ subvolumes test-locks-0 test-locks-1
+end-volume
+EOF
+
+TEST glusterd
+
+TEST glusterfs --volfile=$B0/test.vol --attribute-timeout=0 --entry-timeout=0 $M0
+
+# We should be able to create and remove a file without interference from the
+# "broken" brick.
+
+TEST touch $M0/file
+TEST rm $M0/file
+
+TEST umount $M0
+
+rm -f $B0/test.vol
+rm -rf $B0/test1 $B0/test2
+
+cleanup;
+
diff --git a/tests/bugs/bug-893338.t b/tests/bugs/bug-893338.t
new file mode 100644
index 000000000..cc39f28e3
--- /dev/null
+++ b/tests/bugs/bug-893338.t
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 stripe 2 $H0:$B0/${V0}{1,2,3,4};
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+
+TEST glusterfs -s $H0 --volfile-id=$V0 $M0
+
+## Test for symlink success
+TEST touch $M0/reg_file
+TEST ln -s $M0/reg_file $M0/symlink
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/bugs/bug-893378.t b/tests/bugs/bug-893378.t
new file mode 100755
index 000000000..fd8b9a7ce
--- /dev/null
+++ b/tests/bugs/bug-893378.t
@@ -0,0 +1,73 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+cleanup;
+BRICK_COUNT=3
+
+function file_has_linkfile()
+{
+ i=0
+ j=0
+ while [ $i -lt $BRICK_COUNT ]
+ do
+ stat=`stat $B0/${V0}$i/$1 2>/dev/null`
+ if [ $? -eq 0 ]
+ then
+ let j++
+ let "BRICK${j}=$i"
+
+ fi
+ let i++
+ done
+ return $j
+}
+
+function get_cached_brick()
+{
+ i=1
+ while [ $i -lt 3 ]
+ do
+ test=`getfattr -n trusted.glusterfs.dht.linkto -e text $B0/${V0}$BRICK$i 2>&1`
+ if [ $? -eq 1 ]
+ then
+ cached=$BRICK"$i"
+ i=$(( $i+3 ))
+ fi
+ let i++
+ done
+
+ return $cached
+}
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1 $H0:$B0/${V0}2
+TEST $CLI volume start $V0
+
+## Mount FUSE
+TEST glusterfs --attribute-timeout=0 --entry-timeout=0 -s $H0 --volfile-id $V0 $M0;
+
+## create a linkfile on subvolume 0
+TEST touch $M0/1
+TEST mv $M0/1 $M0/2
+
+file_has_linkfile 2
+has_link=$?
+if [ $has_link -eq 2 ]
+then
+ get_cached_brick
+ CACHED=$?
+ # Kill a brick process
+ kill -9 `cat /var/lib/glusterd/vols/$V0/run/$H0-d-backends-${V0}$CACHED.pid`;
+fi
+
+## trigger a lookup
+ls -l $M0/2 2>/dev/null
+
+## fail dd if file exists.
+
+dd if=/dev/zero of=$M0/2 bs=1 count=1 conv=excl 2>/dev/null
+EXPECT "1" echo $?
+
+cleanup;
diff --git a/tests/bugs/bug-895235.t b/tests/bugs/bug-895235.t
new file mode 100644
index 000000000..0764b50d4
--- /dev/null
+++ b/tests/bugs/bug-895235.t
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 ensure-durability off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 cluster.eager-lock off
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $M0 --direct-io-mode=enable
+
+TEST gluster volume profile $V0 start
+TEST dd of=$M0/a if=/dev/zero bs=1M count=1 oflag=append
+finodelk_max_latency=$($CLI volume profile $V0 info | grep FINODELK | awk 'BEGIN {max = 0} {if ($6 > max) max=$6;} END {print max}' | cut -d. -f 1 | egrep "[0-9]{7,}")
+
+TEST [ -z $finodelk_max_latency ]
+
+cleanup;
diff --git a/tests/bugs/bug-896431.t b/tests/bugs/bug-896431.t
new file mode 100755
index 000000000..f968e59c1
--- /dev/null
+++ b/tests/bugs/bug-896431.t
@@ -0,0 +1,124 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 replica 2 stripe 2 $H0:$B0/${V0}{1,2,3,4,5,6,7,8};
+
+## Verify volume is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Setting cluster.subvols-per-directory as -5
+TEST ! $CLI volume set $V0 cluster.subvols-per-directory -5
+EXPECT '' volinfo_field $V0 'cluster.subvols-per-directory';
+TEST ! $CLI volume set $V0 subvols-per-directory -5
+EXPECT '' volinfo_field $V0 'cluster.subvols-per-directory';
+
+## Setting cluster.subvols-per-directory as 0
+TEST ! $CLI volume set $V0 cluster.subvols-per-directory 0
+EXPECT '' volinfo_field $V0 'cluster.subvols-per-directory';
+TEST ! $CLI volume set $V0 subvols-per-directory 0
+EXPECT '' volinfo_field $V0 'cluster.subvols-per-directory';
+
+## Setting cluster.subvols-per-directory as 4 (the total number of bricks)
+TEST ! $CLI volume set $V0 cluster.subvols-per-directory 4
+EXPECT '' volinfo_field $V0 'cluster.subvols-per-directory';
+TEST ! $CLI volume set $V0 subvols-per-directory 4
+EXPECT '' volinfo_field $V0 'cluster.subvols-per-directory';
+
+## Setting cluster.subvols-per-directory as 2 (the total number of subvolumes)
+TEST $CLI volume set $V0 cluster.subvols-per-directory 2
+EXPECT '2' volinfo_field $V0 'cluster.subvols-per-directory';
+
+## Setting cluster.subvols-per-directory as 1
+TEST $CLI volume set $V0 subvols-per-directory 1
+EXPECT '1' volinfo_field $V0 'cluster.subvols-per-directory';
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
+
+## Start and create a pure replicate volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 replica 8 $H0:$B0/${V0}{1,2,3,4,5,6,7,8};
+
+## Verify volume is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT 'Replicate' volinfo_field $V0 'Type';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Setting cluster.subvols-per-directory as 8 for a replicate volume
+TEST ! $CLI volume set $V0 cluster.subvols-per-directory 8
+EXPECT '' volinfo_field $V0 'cluster.subvols-per-directory';
+TEST ! $CLI volume set $V0 subvols-per-directory 8
+EXPECT '' volinfo_field $V0 'cluster.subvols-per-directory';
+
+## Setting cluster.subvols-per-directory as 1 for a replicate volume
+TEST $CLI volume set $V0 cluster.subvols-per-directory 1
+EXPECT '1' volinfo_field $V0 'cluster.subvols-per-directory';
+TEST $CLI volume set $V0 subvols-per-directory 1
+EXPECT '1' volinfo_field $V0 'cluster.subvols-per-directory';
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
+
+## Start and create a pure stripe volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 stripe 8 $H0:$B0/${V0}{1,2,3,4,5,6,7,8};
+
+## Verify volume is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT 'Stripe' volinfo_field $V0 'Type';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Setting cluster.subvols-per-directory as 8 for a stripe volume
+TEST ! $CLI volume set $V0 cluster.subvols-per-directory 8
+EXPECT '' volinfo_field $V0 'cluster.subvols-per-directory';
+TEST ! $CLI volume set $V0 subvols-per-directory 8
+EXPECT '' volinfo_field $V0 'cluster.subvols-per-directory';
+
+## Setting cluster.subvols-per-directory as 1 for a stripe volume
+TEST $CLI volume set $V0 cluster.subvols-per-directory 1
+EXPECT '1' volinfo_field $V0 'cluster.subvols-per-directory';
+TEST $CLI volume set $V0 subvols-per-directory 1
+EXPECT '1' volinfo_field $V0 'cluster.subvols-per-directory';
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/bug-902610.t b/tests/bugs/bug-902610.t
new file mode 100755
index 000000000..00ba03adf
--- /dev/null
+++ b/tests/bugs/bug-902610.t
@@ -0,0 +1,59 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+## Layout-spread set to 3, but subvols up are 2. So layout should split 50-50
+function get_layout()
+{
+ layout1=`getfattr -n trusted.glusterfs.dht -e hex $1 2>&1|grep dht |cut -d = -f2`
+ layout2=`getfattr -n trusted.glusterfs.dht -e hex $2 2>&1|grep dht |cut -d = -f2`
+
+ if [ $layout1 == "0x0000000100000000000000007ffffffe" ]
+ then
+ if [ $layout2 == "0x00000001000000007fffffffffffffff" ]
+ then
+ return 0
+ else
+ return 1
+ fi
+ fi
+
+ if [ $layout2 == "0x0000000100000000000000007ffffffe" ]
+ then
+ if [ $layout1 == "0x00000001000000007fffffffffffffff" ]
+ then
+ return 0
+ else
+ return 1
+ fi
+ fi
+ return 1
+}
+
+BRICK_COUNT=4
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1 $H0:$B0/${V0}2 $H0:$B0/${V0}3
+## set subvols-per-dir option
+TEST $CLI volume set $V0 subvols-per-directory 3
+TEST $CLI volume start $V0
+
+## Mount FUSE
+TEST glusterfs -s $H0 --volfile-id $V0 $M0 --entry-timeout=0 --attribute-timeout=0;
+
+TEST ls -l $M0
+
+## kill 2 bricks to bring down available subvol < spread count
+kill -9 `cat /var/lib/glusterd/vols/$V0/run/$H0-d-backends-${V0}2.pid`;
+kill -9 `cat /var/lib/glusterd/vols/$V0/run/$H0-d-backends-${V0}3.pid`;
+
+mkdir $M0/dir1 2>/dev/null
+
+get_layout $B0/${V0}0/dir1 $B0/${V0}1/dir1
+EXPECT "0" echo $?
+
+cleanup;
diff --git a/tests/bugs/bug-903336.t b/tests/bugs/bug-903336.t
new file mode 100644
index 000000000..c1f91312a
--- /dev/null
+++ b/tests/bugs/bug-903336.t
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+TEST setfattr -n trusted.io-stats-dump -v /tmp $M0
+cleanup
diff --git a/tests/bugs/bug-904065.t b/tests/bugs/bug-904065.t
new file mode 100755
index 000000000..505854d9b
--- /dev/null
+++ b/tests/bugs/bug-904065.t
@@ -0,0 +1,90 @@
+#!/bin/bash
+#
+# This test does not use 'showmount' from the nfs-utils package, it would
+# require setting up a portmapper (either rpcbind or portmap, depending on the
+# Linux distribution used for testing). The persistancy of the rmtab should not
+# affect the current showmount outputs, so existing regression tests should be
+# sufficient.
+#
+
+# count the lines of a file, return 0 if the file does not exist
+function count_lines()
+{
+ if [ -e "$1" ]
+ then
+ wc -l < $1
+ else
+ echo 0
+ fi
+}
+
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../nfs.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/brick1
+EXPECT 'Created' volinfo_field $V0 'Status'
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+# glusterfs/nfs needs some time to start up in the background
+EXPECT_WITHIN 20 1 is_nfs_export_available
+
+# before mounting the rmtab should be empty
+EXPECT '0' count_lines /var/lib/glusterd/nfs/rmtab
+
+TEST mount -t nfs -o vers=3,nolock $H0:/$V0 $N0
+# the output would looks similar to:
+#
+# hostname-0=172.31.122.104
+# mountpoint-0=/ufo
+#
+EXPECT '2' count_lines /var/lib/glusterd/nfs/rmtab
+
+# duplicate mounts should not be recorded (client could have crashed)
+TEST mount -t nfs -o vers=3,nolock $H0:/$V0 $N1
+EXPECT '2' count_lines /var/lib/glusterd/nfs/rmtab
+
+# removing a mount should (even if there are two) should remove the entry
+TEST umount $N1
+EXPECT '0' count_lines /var/lib/glusterd/nfs/rmtab
+
+# unmounting the other mount should work flawlessly
+TEST umount $N0
+EXPECT '0' count_lines /var/lib/glusterd/nfs/rmtab
+
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 --volfile-server=$H0 --volfile-id=$V0 $M0
+
+# we'll create a fake rmtab here, similar to how an other storage server would do
+# using an invalid IP address to prevent (unlikely) collisions on the test-machine
+cat << EOF > $M0/rmtab
+hostname-0=127.0.0.256
+mountpoint-0=/ufo
+EOF
+EXPECT '2' count_lines $M0/rmtab
+
+# reconfigure merges the rmtab with the one on the volume
+TEST gluster volume set $V0 nfs.mount-rmtab $M0/rmtab
+
+# glusterfs/nfs needs some time to restart
+EXPECT_WITHIN 20 1 is_nfs_export_available
+
+# a new mount should be added to the rmtab, not overwrite exiting ones
+TEST mount -t nfs -o vers=3,nolock $H0:/$V0 $N0
+EXPECT '4' count_lines $M0/rmtab
+
+TEST umount $N0
+EXPECT '2' count_lines $M0/rmtab
+
+# TODO: nfs/reconfigure() is never called and is therefor disabled. When the
+# NFS-server supports reloading and does not get restarted anymore, we should
+# add a test that includes the merging of entries in the old rmtab with the new
+# rmtab.
+
+cleanup
diff --git a/tests/bugs/bug-904300.t b/tests/bugs/bug-904300.t
new file mode 100755
index 000000000..4276ee229
--- /dev/null
+++ b/tests/bugs/bug-904300.t
@@ -0,0 +1,61 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../nfs.rc
+
+cleanup;
+
+# 1-8
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0;
+TEST $CLI volume start $V0
+EXPECT_WITHIN 20 1 is_nfs_export_available
+
+TEST mount -t nfs -o vers=3,nolock,soft,intr $H0:/$V0 $N0
+TEST mkdir $N0/dir1
+TEST umount $N0
+
+#
+# Case 1: Allow "dir1" to be mounted only from 127.0.0.1
+# 9-12
+TEST $CLI volume set $V0 export-dir \""/dir1(127.0.0.1)"\"
+EXPECT_WITHIN 20 2 is_nfs_export_available
+
+TEST mount -t nfs -o vers=3,nolock,soft,intr localhost:/$V0/dir1 $N0
+TEST umount $N0
+
+#
+# Case 2: Allow "dir1" to be mounted only from 8.8.8.8. This is
+# a negative test case therefore the mount should fail.
+# 13-16
+TEST $CLI volume set $V0 export-dir \""/dir1(8.8.8.8)"\"
+EXPECT_WITHIN 20 2 is_nfs_export_available
+
+TEST ! mount -t nfs -o vers=3,nolock,soft,intr $H0:/$V0/dir1 $N0
+TEST ! umount $N0
+
+
+# Case 3: Variation of test case1. Here we are checking with hostname
+# instead of ip address.
+# 17-20
+TEST $CLI volume set $V0 export-dir \""/dir1($H0)"\"
+EXPECT_WITHIN 20 2 is_nfs_export_available
+
+TEST mount -t nfs -o vers=3,nolock,soft,intr $H0:/$V0/dir1 $N0
+TEST umount $N0
+
+# Case 4: Variation of test case1. Here we are checking with IP range
+# 21-24
+TEST $CLI volume set $V0 export-dir \""/dir1(127.0.0.0/24)"\"
+EXPECT_WITHIN 20 2 is_nfs_export_available
+
+TEST mount -t nfs -o vers=3,nolock,soft,intr localhost:/$V0/dir1 $N0
+TEST umount $N0
+
+## Finish up
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/bugs/bug-905307.t b/tests/bugs/bug-905307.t
new file mode 100644
index 000000000..d81d81c9f
--- /dev/null
+++ b/tests/bugs/bug-905307.t
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+
+#test functionality of post-op-delay-secs
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+
+#Strings should not be accepted.
+TEST ! $CLI volume set $V0 cluster.post-op-delay-secs abc
+
+#-ve ints should not be accepted.
+TEST ! $CLI volume set $V0 cluster.post-op-delay-secs -1
+
+#INT_MAX+1 should not be accepted.
+TEST ! $CLI volume set $V0 cluster.post-op-delay-secs 2147483648
+
+#floats should not be accepted.
+TEST ! $CLI volume set $V0 cluster.post-op-delay-secs 1.25
+
+#min val 0 should be accepted
+TEST $CLI volume set $V0 cluster.post-op-delay-secs 0
+EXPECT "0" volume_option $V0 cluster.post-op-delay-secs
+
+#max val 2147483647 should be accepted
+TEST $CLI volume set $V0 cluster.post-op-delay-secs 2147483647
+EXPECT "2147483647" volume_option $V0 cluster.post-op-delay-secs
+
+#some middle val in range 2147 should be accepted
+TEST $CLI volume set $V0 cluster.post-op-delay-secs 2147
+EXPECT "2147" volume_option $V0 cluster.post-op-delay-secs
+cleanup;
diff --git a/tests/bugs/bug-905864.c b/tests/bugs/bug-905864.c
new file mode 100644
index 000000000..ed09b6e2b
--- /dev/null
+++ b/tests/bugs/bug-905864.c
@@ -0,0 +1,82 @@
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <pthread.h>
+
+
+pthread_t th[5] = {0};
+void
+flock_init (struct flock *f, short int type, off_t start, off_t len)
+{
+ f->l_type = type;
+ f->l_start = start;
+ f->l_len = len;
+}
+
+int
+flock_range_in_steps (int fd, int is_set, short l_type,
+ int start, int end, int step)
+{
+ int ret = 0;
+ int i = 0;
+ struct flock f = {0,};
+
+ for (i = start; i+step < end; i += step) {
+ flock_init (&f, l_type, i, step);
+ ret = fcntl (fd, (is_set)? F_SETLKW:F_GETLK, &f);
+ if (ret) {
+ perror ("fcntl");
+ goto out;
+ }
+ }
+out:
+ return ret;
+}
+
+void *
+random_locker (void *arg)
+{
+ int fd = *(int *)arg;
+ int i = 0;
+ int is_set = 0;
+
+ /* use thread id to choose GETLK or SETLK operation*/
+ is_set = pthread_self () % 2;
+ (void)flock_range_in_steps (fd, is_set, F_WRLCK, 0, 400, 1);
+
+ return NULL;
+}
+
+
+int main (int argc, char **argv)
+{
+ int fd = -1;
+ int ret = 1;
+ int i = 0;
+ char *fname = NULL;
+
+ if (argc < 2)
+ goto out;
+
+ fname = argv[1];
+ fd = open (fname, O_RDWR);
+ if (fd == -1) {
+ perror ("open");
+ goto out;
+ }
+
+ ret = flock_range_in_steps (fd, 1, F_WRLCK, 0, 2000, 2);
+ for (i = 0; i < 5; i++) {
+ pthread_create (&th[i], NULL, random_locker, (void *) &fd);
+ }
+ ret = flock_range_in_steps (fd, 1, F_WRLCK, 0, 2000, 2);
+ for (i = 0; i < 5; i++) {
+ pthread_join (th[i], NULL);
+ }
+out:
+ if (fd != -1)
+ close (fd);
+
+ return ret;
+}
diff --git a/tests/bugs/bug-905864.t b/tests/bugs/bug-905864.t
new file mode 100644
index 000000000..44bb469f2
--- /dev/null
+++ b/tests/bugs/bug-905864.t
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4};
+TEST $CLI volume start $V0;
+
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0;
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M1;
+
+TEST touch $M0/file1;
+
+#following C program tries open up race(s) if any, in F_GETLK/F_SETLKW codepaths
+#of locks xlator
+gcc -lpthread -g3 $(dirname $0)/bug-905864.c -o $(dirname $0)/bug-905864
+$(dirname $0)/bug-905864 $M0/file1 &
+$(dirname $0)/bug-905864 $M1/file1;
+wait
+rm -f $(dirname $0)/bug-905864
+
+EXPECT $(brick_count $V0) online_brick_count
+
+cleanup
+
diff --git a/tests/bugs/bug-906646.t b/tests/bugs/bug-906646.t
new file mode 100644
index 000000000..0e6a3bcb6
--- /dev/null
+++ b/tests/bugs/bug-906646.t
@@ -0,0 +1,93 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+REPLICA=2
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica $REPLICA $H0:$B0/${V0}-00 $H0:$B0/${V0}-01 $H0:$B0/${V0}-10 $H0:$B0/${V0}-11
+TEST $CLI volume start $V0
+
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+
+## Mount FUSE with caching disabled
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0;
+
+function xattr_query_check()
+{
+ local path=$1
+ local xa_name=$2
+
+ local ret=`getfattr -m . -n $xa_name $path 2>&1 | grep -o "$xa_name: No such attribute" | wc -l`
+ echo $ret
+}
+
+function set_xattr()
+{
+ local path=$1
+ local xa_name=$2
+ local xa_val=$3
+
+ setfattr -n $xa_name -v $xa_val $path
+ echo $?
+}
+
+function remove_xattr()
+{
+ local path=$1
+ local xa_name=$2
+
+ setfattr -x $xa_name $path
+ echo $?
+}
+
+f=f00f
+pth=$M0/$f
+
+touch $pth
+
+# fetch backend paths
+backend_paths=`get_backend_paths $pth`
+
+# convert it into and array
+backend_paths_array=($backend_paths)
+
+# setxattr xattr for this file
+EXPECT 0 set_xattr $pth "trusted.name" "test"
+
+# confirm the set on backend
+EXPECT 0 xattr_query_check ${backend_paths_array[0]} "trusted.name"
+EXPECT 0 xattr_query_check ${backend_paths_array[1]} "trusted.name"
+
+brick_path=`echo ${backend_paths_array[0]} | sed -n 's/\(.*\)\/'$f'/\1/p'`
+brick_id=`$CLI volume info $V0 | grep "Brick[[:digit:]]" | grep -n $brick_path | cut -f1 -d:`
+
+# Kill a brick process
+TEST kill_brick $V0 $H0 $brick_path
+
+# remove the xattr from the mount point
+EXPECT 0 remove_xattr $pth "trusted.name"
+
+# we killed ${backend_paths[0]} - so expect the xattr to be there
+# on the backend there
+EXPECT 0 xattr_query_check ${backend_paths_array[0]} "trusted.name"
+EXPECT 1 xattr_query_check ${backend_paths_array[1]} "trusted.name"
+
+# restart the brick process
+TEST $CLI volume start $V0 force
+
+EXPECT_WITHIN 20 "1" afr_child_up_status $V0 `expr $brick_id - 1`
+
+stat $pth
+
+# check backends - xattr should not be present anywhere
+EXPECT 1 xattr_query_check ${backend_paths_array[0]} "trusted.name"
+EXPECT 1 xattr_query_check ${backend_paths_array[1]} "trusted.name"
+
+cleanup;
diff --git a/tests/bugs/bug-907072.t b/tests/bugs/bug-907072.t
new file mode 100755
index 000000000..49b477767
--- /dev/null
+++ b/tests/bugs/bug-907072.t
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../fileio.rc
+. $(dirname $0)/../dht.rc
+
+cleanup;
+
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1,2,3};
+TEST $CLI volume start $V0;
+
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+
+TEST mkdir $M0/test;
+
+OLD_LAYOUT0=`get_layout $B0/${V0}0/test`;
+OLD_LAYOUT1=`get_layout $B0/${V0}1/test`;
+OLD_LAYOUT2=`get_layout $B0/${V0}2/test`;
+OLD_LAYOUT3=`get_layout $B0/${V0}3/test`;
+
+TEST killall glusterfsd;
+
+# Delete directory on one brick
+TEST rm -rf $B0/${V}1/test;
+
+# And only layout xattr on another brick
+TEST setfattr -x trusted.glusterfs.dht $B0/${V0}2/test;
+
+TEST $CLI volume start $V0 force;
+
+TEST umount $M0;
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+TEST stat $M0/test;
+
+NEW_LAYOUT0=`get_layout $B0/${V0}0/test`;
+NEW_LAYOUT1=`get_layout $B0/${V0}1/test`;
+NEW_LAYOUT2=`get_layout $B0/${V0}2/test`;
+NEW_LAYOUT3=`get_layout $B0/${V0}3/test`;
+
+EXPECT $OLD_LAYOUT0 echo $NEW_LAYOUT0;
+EXPECT $OLD_LAYOUT1 echo $NEW_LAYOUT1;
+EXPECT $OLD_LAYOUT2 echo $NEW_LAYOUT2;
+EXPECT $OLD_LAYOUT3 echo $NEW_LAYOUT3;
diff --git a/tests/bugs/bug-908146.t b/tests/bugs/bug-908146.t
new file mode 100755
index 000000000..87b456e6e
--- /dev/null
+++ b/tests/bugs/bug-908146.t
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+function get_fd_count {
+ local vol=$1
+ local host=$2
+ local brick=$3
+ local fname=$4
+ local gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $brick/$fname))
+ local statedump=$(generate_brick_statedump $vol $host $brick)
+ local count=$(grep "gfid=$gfid_str" $statedump -A2 | grep fd-count | cut -f2 -d'=' | tail -1)
+ rm -f $statedump
+ echo $count
+}
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 --direct-io-mode=enable
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M1 --attribute-timeout=0 --entry-timeout=0 --direct-io-mode=enable
+
+TEST touch $M0/a
+
+exec 4>"$M0/a"
+exec 5>"$M1/a"
+EXPECT "2" get_fd_count $V0 $H0 $B0/${V0}0 a
+
+exec 4>&-
+EXPECT "1" get_fd_count $V0 $H0 $B0/${V0}0 a
+
+exec 5>&-
+EXPECT "0" get_fd_count $V0 $H0 $B0/${V0}0 a
+
+cleanup
diff --git a/tests/bugs/bug-912297.t b/tests/bugs/bug-912297.t
new file mode 100755
index 000000000..f5a5babf5
--- /dev/null
+++ b/tests/bugs/bug-912297.t
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 replica 2 stripe 2 $H0:$B0/${V0}{1,2,3,4,5,6,7,8};
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Setting owner-uid as -12
+TEST ! $CLI volume set $V0 owner-uid -12
+EXPECT '' volinfo_field $V0 'storage.owner-uid'
+
+## Setting owner-gid as -5
+TEST ! $CLI volume set $V0 owner-gid -5
+EXPECT '' volinfo_field $V0 'storage.owner-gid'
+
+## Setting owner-uid as 36
+TEST $CLI volume set $V0 owner-uid 36
+EXPECT '36' volinfo_field $V0 'storage.owner-uid'
+
+## Setting owner-gid as 36
+TEST $CLI volume set $V0 owner-gid 36
+EXPECT '36' volinfo_field $V0 'storage.owner-gid'
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/bug-912564.t b/tests/bugs/bug-912564.t
new file mode 100755
index 000000000..b24268fbc
--- /dev/null
+++ b/tests/bugs/bug-912564.t
@@ -0,0 +1,92 @@
+#!/bin/bash
+
+# Test that the rsync and "extra" regexes cause rename-in-place without
+# creating linkfiles, when they're supposed to. Without the regex we'd have a
+# 1/4 chance of each file being assigned to the right place, so with 16 files
+# we have a 1/2^32 chance of getting the correct result by accident.
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+function count_linkfiles {
+ local i
+ local count=0
+ for i in $(seq $2 $3); do
+ x=$(find $1$i -perm -1000 | wc -l)
+ # Divide by two because of the .glusterfs links.
+ count=$((count+x/2))
+ done
+ echo $count
+}
+
+# This function only exists to get around quoting difficulties in TEST.
+function set_regex {
+ $CLI volume set $1 cluster.extra-hash-regex '^foo(.+)bar$'
+}
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+mkdir -p $H0:$B0/${V0}0
+mkdir -p $H0:$B0/${V0}1
+mkdir -p $H0:$B0/${V0}2
+mkdir -p $H0:$B0/${V0}3
+
+# Create and start a volume.
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1 \
+ $H0:$B0/${V0}2 $H0:$B0/${V0}3
+TEST $CLI volume start $V0
+EXPECT_WITHIN 15 'Started' volinfo_field $V0 'Status';
+
+# Mount it.
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+
+# Make sure the rsync regex works, by verifying that no linkfiles are
+# created.
+rm -f $M0/file*
+for i in $(seq 0 15); do
+ fn=$(printf file%x $i)
+ tmp_fn=$(printf .%s.%d $fn $RANDOM)
+ echo testing > $M0/$tmp_fn
+ mv $M0/$tmp_fn $M0/$fn
+done
+lf=$(count_linkfiles $B0/$V0 0 3)
+TEST [ "$lf" -eq "0" ]
+
+# Make sure that linkfiles *are* created for normal files.
+rm -f $M0/file*
+for i in $(seq 0 15); do
+ fn=$(printf file%x $i)
+ tmp_fn=$(printf foo%sbar $fn)
+ echo testing > $M0/$tmp_fn
+ mv $M0/$tmp_fn $M0/$fn
+done
+lf=$(count_linkfiles $B0/$V0 0 3)
+TEST [ "$lf" -ne "0" ]
+
+# Make sure that setting an extra regex suppresses the linkfiles.
+TEST set_regex $V0
+rm -f $M0/file*
+for i in $(seq 0 15); do
+ fn=$(printf file%x $i)
+ tmp_fn=$(printf foo%sbar $fn)
+ echo testing > $M0/$tmp_fn
+ mv $M0/$tmp_fn $M0/$fn
+done
+lf=$(count_linkfiles $B0/$V0 0 3)
+TEST [ "$lf" -eq "0" ]
+
+# Re-test the rsync regex, to make sure the extra one didn't break it.
+rm -f $M0/file*
+for i in $(seq 0 15); do
+ fn=$(printf file%x $i)
+ tmp_fn=$(printf .%s.%d $fn $RANDOM)
+ echo testing > $M0/$tmp_fn
+ mv $M0/$tmp_fn $M0/$fn
+done
+lf=$(count_linkfiles $B0/$V0 0 3)
+TEST [ "$lf" -eq "0" ]
+
+cleanup
diff --git a/tests/bugs/bug-913051.t b/tests/bugs/bug-913051.t
new file mode 100644
index 000000000..69e90cf66
--- /dev/null
+++ b/tests/bugs/bug-913051.t
@@ -0,0 +1,65 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../fileio.rc
+
+cleanup;
+
+#Test that afr opens the file on the bricks that were offline at the time of
+# open after the brick comes online. This tests for writev, readv triggering
+# open-fd-fix in afr.
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $M0 --direct-io-mode=enable
+TEST kill_brick $V0 $H0 $B0/${V0}0
+
+TEST mkdir $M0/dir
+TEST touch $M0/dir/a
+TEST touch $M0/dir/b
+echo abc > $M0/dir/b
+
+TEST wfd=`fd_available`
+TEST fd_open $wfd "w" $M0/dir/a
+TEST rfd=`fd_available`
+TEST fd_open $rfd "r" $M0/dir/b
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN 20 "1" afr_child_up_status $V0 0
+
+#check that the files are not opned on brick-0
+realpatha=$(gf_get_gfid_backend_file_path $B0/${V0}0 "dir/a")
+EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 "$realpatha"
+EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $B0/${V0}0/dir/a
+
+realpathb=$(gf_get_gfid_backend_file_path $B0/${V0}0 "dir/b")
+EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 "$realpathb"
+EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $B0/${V0}0/dir/b
+
+#attempt self-heal so that the files are created on brick-0
+
+TEST ls -l $M0/dir/a
+TEST ls -l $M0/dir/b
+
+#trigger writev for attempting open-fd-fix in afr
+TEST fd_write $wfd "open sesame"
+
+#trigger readv for attempting open-fd-fix in afr
+TEST fd_cat $rfd
+
+EXPECT_WITHIN 20 "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $B0/${V0}0/dir/a
+EXPECT_WITHIN 20 "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $B0/${V0}0/dir/b
+
+TEST fd_close $wfd
+TEST fd_close $rfd
+cleanup;
diff --git a/tests/bugs/bug-913487.t b/tests/bugs/bug-913487.t
new file mode 100644
index 000000000..2095903d9
--- /dev/null
+++ b/tests/bugs/bug-913487.t
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST ! $CLI volume set $V0 performance.open-behind off;
+
+TEST pidof glusterd;
+
+cleanup;
diff --git a/tests/bugs/bug-913544.t b/tests/bugs/bug-913544.t
new file mode 100644
index 000000000..790bc0898
--- /dev/null
+++ b/tests/bugs/bug-913544.t
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+#simulate a split-brain of a file and do truncate. This should not crash the mount point
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+TEST $CLI volume set $V0 stat-prefetch off
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+cd $M0
+TEST touch a
+#simulate no-changelog data split-brain
+echo "abc" > $B0/${V0}1/a
+echo "abcd" > $B0/${V0}0/a
+TEST ! truncate -s 0 a
+TEST ls
+cd
+
+cleanup
diff --git a/tests/bugs/bug-913555.t b/tests/bugs/bug-913555.t
new file mode 100755
index 000000000..f58d7bd6d
--- /dev/null
+++ b/tests/bugs/bug-913555.t
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+# Test that a volume becomes unwritable when the cluster loses quorum.
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../cluster.rc
+
+
+function check_fs {
+ df $1 &> /dev/null
+ echo $?
+}
+
+function check_peers {
+ $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+
+function glusterfsd_count {
+ pidof glusterfsd | wc -w;
+}
+
+cleanup;
+
+TEST launch_cluster 3; # start 3-node virtual cluster
+TEST $CLI_1 peer probe $H2; # peer probe server 2 from server 1 cli
+TEST $CLI_1 peer probe $H3; # peer probe server 3 from server 1 cli
+
+EXPECT_WITHIN 20 2 check_peers
+
+TEST $CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0 $H3:$B3/$V0
+TEST $CLI_1 volume set $V0 cluster.server-quorum-type server
+TEST $CLI_1 volume start $V0
+TEST glusterfs --volfile-server=$H1 --volfile-id=$V0 $M0
+
+# Kill one pseudo-node, make sure the others survive and volume stays up.
+TEST kill_node 3;
+EXPECT_WITHIN 20 1 check_peers;
+EXPECT 0 check_fs $M0;
+EXPECT 2 glusterfsd_count;
+
+# Kill another pseudo-node, make sure the last one dies and volume goes down.
+TEST kill_node 2;
+EXPECT_WITHIN 20 0 check_peers
+EXPECT 1 check_fs $M0;
+EXPECT 0 glusterfsd_count; # the two glusterfsds of the other two glusterds
+ # must be dead
+
+TEST $glusterd_2;
+TEST $glusterd_3;
+EXPECT_WITHIN 20 3 glusterfsd_count; # restore quorum, all ok
+EXPECT_WITHIN 5 0 check_fs $M0;
+
+cleanup
diff --git a/tests/bugs/bug-915280.t b/tests/bugs/bug-915280.t
new file mode 100755
index 000000000..a1f92f201
--- /dev/null
+++ b/tests/bugs/bug-915280.t
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+function volinfo_field()
+{
+ local vol=$1;
+ local field=$2;
+
+ $CLI volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+TEST $CLI volume create $V0 $H0:$B0/brick1 $H0:$B0/brick2;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+sleep 3
+
+MOUNTDIR=$N0;
+TEST mount -t nfs -o vers=3,nolock,soft,timeo=30,retrans=1 $H0:/$V0 $N0
+TEST touch $N0/testfile
+
+TEST $CLI volume set $V0 debug.error-gen client
+TEST $CLI volume set $V0 debug.error-fops stat
+TEST $CLI volume set $V0 debug.error-failure 100
+
+sleep 1
+
+pid_file=$(read_nfs_pidfile);
+
+getfacl $N0/testfile 2>/dev/null
+
+nfs_pid=$(get_nfs_pid);
+if [ ! $nfs_pid ]
+then
+ nfs_pid=0;
+fi
+
+TEST [ $nfs_pid -eq $pid_file ]
+
+TEST umount $MOUNTDIR -l
+
+cleanup;
diff --git a/tests/bugs/bug-915554.t b/tests/bugs/bug-915554.t
new file mode 100755
index 000000000..beb669f8c
--- /dev/null
+++ b/tests/bugs/bug-915554.t
@@ -0,0 +1,75 @@
+#!/bin/bash
+#
+# Bug <915554>
+#
+# This test checks for a condition where a rebalance migrates a file and does
+# not preserve the original file size. This can occur due to hole preservation
+# logic in the file migration code. If a file size is aligned to a disk sector
+# boundary (512b) and the tail portion of the file is zero-filled, the file
+# may end up truncated to the end of the last data region in the file.
+#
+###
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../dht.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+BRICK_COUNT=3
+# create, start and mount a two brick DHT volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1 $H0:$B0/${V0}2
+TEST $CLI volume start $V0
+
+TEST glusterfs --attribute-timeout=0 --entry-timeout=0 --gid-timeout=-1 -s $H0 --volfile-id $V0 $M0;
+
+i=1
+# Write some data to a file and extend such that the file is sparse to a sector
+# aligned boundary.
+echo test > $M0/$i
+TEST truncate --size=1m $M0/$i
+
+# cache the original size
+SIZE1=`stat -c %s $M0/$i`
+
+# rename till file gets a linkfile
+
+while [ $i -ne 0 ]
+do
+ test=`mv $M0/$i $M0/$(( $i+1 )) 2>/dev/null`
+ if [ $? -ne 0 ]
+ then
+ echo "rename failed"
+ break
+ fi
+ let i++
+ file_has_linkfile $i
+ has_link=$?
+ if [ $has_link -eq 2 ]
+ then
+ break;
+ fi
+done
+
+# start a rebalance (force option to overide checks) to trigger migration of
+# file
+
+TEST $CLI volume rebalance $V0 start force
+
+# check if rebalance has completed for upto 15 secs
+
+EXPECT_WITHIN 30 "0" rebalance_completed
+
+# validate the file size after the migration
+SIZE2=`stat -c %s $M0/$i`
+
+TEST [ $SIZE1 -eq $SIZE2 ]
+
+TEST rm -f $M0/$i
+TEST umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/bugs/bug-916226.t b/tests/bugs/bug-916226.t
new file mode 100644
index 000000000..2abfa1fc6
--- /dev/null
+++ b/tests/bugs/bug-916226.t
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1 $H0:$B0/${V0}2 $H0:$B0/${V0}3
+TEST $CLI volume set $V0 cluster.eager-lock on
+TEST $CLI volume start $V0
+
+## Mount FUSE
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+
+TEST mkdir $M0/dir{1..10};
+TEST touch $M0/dir{1..10}/files{1..10};
+
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}4 $H0:/$B0/${V0}5
+
+TEST $CLI volume rebalance $V0 start force
+EXPECT_WITHIN 60 "completed" rebalance_status_field $V0
+
+cleanup;
diff --git a/tests/bugs/bug-916549.t b/tests/bugs/bug-916549.t
new file mode 100755
index 000000000..d6a45b827
--- /dev/null
+++ b/tests/bugs/bug-916549.t
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+TEST glusterd;
+TEST $CLI volume create $V0 $H0:$B0/${V0}1;
+TEST $CLI volume start $V0;
+
+pid_file=$(ls /var/lib/glusterd/vols/$V0/run);
+brick_pid=$(cat /var/lib/glusterd/vols/$V0/run/$pid_file);
+
+
+kill -SIGKILL $brick_pid;
+TEST $CLI volume start $V0 force;
+TEST process_leak_count $(pidof glusterd);
+
+cleanup;
diff --git a/tests/bugs/bug-918437-sh-mtime.t b/tests/bugs/bug-918437-sh-mtime.t
new file mode 100644
index 000000000..080956f51
--- /dev/null
+++ b/tests/bugs/bug-918437-sh-mtime.t
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+function get_mtime {
+ local f=$1
+ stat $f | grep Modify | awk '{print $2 $3}' | cut -f1 -d'.'
+}
+cleanup;
+
+## Tests if mtime is correct after self-heal.
+TEST glusterd
+TEST pidof glusterd
+TEST mkdir -p $B0/gfs0/brick0{1,2}
+TEST $CLI volume create $V0 replica 2 transport tcp $H0:$B0/gfs0/brick01 $H0:$B0/gfs0/brick02
+TEST $CLI volume set $V0 nfs.disable on
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --direct-io-mode=enable
+# file 'a' is healed from brick02 to brick01 where as file 'b' is healed from
+# brick01 to brick02
+
+TEST cp -p /etc/passwd $M0/a
+TEST cp -p /etc/passwd $M0/b
+
+#Store mtimes before self-heals
+TEST modify_atstamp=$(get_mtime $B0/gfs0/brick02/a)
+TEST modify_btstamp=$(get_mtime $B0/gfs0/brick02/b)
+
+TEST $CLI volume stop $V0
+TEST gf_rm_file_and_gfid_link $B0/gfs0/brick01 a
+TEST gf_rm_file_and_gfid_link $B0/gfs0/brick02 b
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN 20 "1" afr_child_up_status $V0 0
+EXPECT_WITHIN 20 "1" afr_child_up_status $V0 1
+
+find $M0 | xargs stat 1>/dev/null
+
+TEST modify_atstamp1=$(get_mtime $B0/gfs0/brick01/a)
+TEST modify_atstamp2=$(get_mtime $B0/gfs0/brick02/a)
+EXPECT $modify_atstamp echo $modify_atstamp1
+EXPECT $modify_atstamp echo $modify_atstamp2
+
+TEST modify_btstamp1=$(get_mtime $B0/gfs0/brick01/b)
+TEST modify_btstamp2=$(get_mtime $B0/gfs0/brick02/b)
+EXPECT $modify_btstamp echo $modify_btstamp1
+EXPECT $modify_btstamp echo $modify_btstamp2
+cleanup;
diff --git a/tests/bugs/bug-921072.t b/tests/bugs/bug-921072.t
new file mode 100755
index 000000000..e101d5b46
--- /dev/null
+++ b/tests/bugs/bug-921072.t
@@ -0,0 +1,118 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../nfs.rc
+
+cleanup;
+
+#1
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0
+TEST $CLI volume start $V0
+EXPECT_WITHIN 20 1 is_nfs_export_available
+TEST mount -t nfs -o vers=3,nolock,soft,intr $H0:/$V0 $N0
+TEST umount $N0
+
+# based on ip addresses (1-4)
+# case 1: allow only localhost ip
+TEST $CLI volume set $V0 nfs.rpc-auth-allow 127.0.0.1
+EXPECT_WITHIN 20 1 is_nfs_export_available
+
+TEST mount -t nfs -o vers=3,nolock,soft,intr localhost:/$V0 $N0
+TEST umount $N0
+
+# case 2: allow only non-localhost ip
+TEST $CLI volume set $V0 nfs.rpc-auth-allow 192.168.1.1
+EXPECT_WITHIN 20 1 is_nfs_export_available
+#11
+TEST ! mount -t nfs -o vers=3,nolock,soft,intr localhost:/$V0 $N0
+TEST $CLI volume reset --mode=script $V0
+# case 3: reject only localhost ip
+TEST $CLI volume set $V0 nfs.rpc-auth-reject 127.0.0.1
+EXPECT_WITHIN 20 1 is_nfs_export_available
+
+TEST ! mount -t nfs -o vers=3,nolock,soft,intr localhost:/$V0 $N0
+
+# case 4: reject only non-localhost ip
+TEST $CLI volume set $V0 nfs.rpc-auth-reject 192.168.1.1
+EXPECT_WITHIN 20 1 is_nfs_export_available
+
+TEST mount -t nfs -o vers=3,nolock,soft,intr localhost:/$V0 $N0
+TEST umount $N0
+
+
+
+# NEED TO CHECK BOTH IP AND NAME BASED AUTH.
+# CASES WITH NFS.ADDR-NAMELOOKUP ON (5-12)
+TEST $CLI volume reset --mode=script $V0
+TEST $CLI volume set $V0 nfs.addr-namelookup on
+EXPECT_WITHIN 20 1 is_nfs_export_available
+#20
+TEST mount -t nfs -o vers=3,nolock,soft,intr localhost:/$V0 $N0
+TEST umount $N0
+
+# case 5: allow only localhost
+TEST $CLI volume set $V0 nfs.rpc-auth-allow localhost
+EXPECT_WITHIN 20 1 is_nfs_export_available
+
+TEST mount -t nfs -o vers=3,nolock,soft,intr localhost:/$V0 $N0
+TEST umount $N0
+
+# case 6: allow only somehost
+TEST $CLI volume set $V0 nfs.rpc-auth-allow somehost
+EXPECT_WITHIN 20 1 is_nfs_export_available
+
+TEST ! mount -t nfs -o vers=3,nolock,soft,intr localhost:/$V0 $N0
+
+# case 7: reject only localhost
+TEST $CLI volume reset --mode=script $V0
+TEST $CLI volume set $V0 nfs.addr-namelookup on
+TEST $CLI volume set $V0 nfs.rpc-auth-reject localhost
+EXPECT_WITHIN 20 1 is_nfs_export_available
+#30
+TEST ! mount -t nfs -o vers=3,nolock,soft,intr localhost:/$V0 $N0
+
+# case 8: reject only somehost
+TEST $CLI volume set $V0 nfs.rpc-auth-reject somehost
+EXPECT_WITHIN 20 1 is_nfs_export_available
+
+TEST mount -t nfs -o vers=3,nolock,soft,intr localhost:/$V0 $N0
+TEST umount $N0
+
+# based on ip addresses: repeat of cases 1-4
+# case 9: allow only localhost ip
+TEST $CLI volume reset --mode=script $V0
+TEST $CLI volume set $V0 nfs.addr-namelookup on
+TEST $CLI volume set $V0 nfs.rpc-auth-allow 127.0.0.1
+EXPECT_WITHIN 20 1 is_nfs_export_available
+
+TEST mount -t nfs -o vers=3,nolock,soft,intr localhost:/$V0 $N0
+TEST umount $N0
+
+# case 10: allow a non-localhost ip
+TEST $CLI volume set $V0 nfs.rpc-auth-allow 192.168.1.1
+EXPECT_WITHIN 20 1 is_nfs_export_available
+#40
+TEST ! mount -t nfs -o vers=3,nolock,soft,intr localhost:/$V0 $N0
+
+# case 11: reject only localhost ip
+TEST $CLI volume reset --mode=script $V0
+TEST $CLI volume set $V0 nfs.addr-namelookup on
+TEST $CLI volume set $V0 nfs.rpc-auth-reject 127.0.0.1
+EXPECT_WITHIN 20 1 is_nfs_export_available
+
+TEST ! mount -t nfs -o vers=3,nolock,soft,intr localhost:/$V0 $N0
+
+# case 12: reject only non-localhost ip
+TEST $CLI volume set $V0 nfs.rpc-auth-reject 192.168.1.1
+EXPECT_WITHIN 20 1 is_nfs_export_available
+
+TEST mount -t nfs -o vers=3,nolock,soft,intr localhost:/$V0 $N0
+TEST umount $N0
+
+TEST $CLI volume stop --mode=script $V0
+#49
+TEST $CLI volume delete --mode=script $V0
+cleanup
diff --git a/tests/bugs/bug-921231.t b/tests/bugs/bug-921231.t
new file mode 100644
index 000000000..db9cf3b6f
--- /dev/null
+++ b/tests/bugs/bug-921231.t
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+# This test writes to same file with 2 fds and tests that eager-lock is not
+# causing extra delay because of post-op-delay-secs
+cleanup;
+
+function write_to_file {
+ dd of=$M0/1 if=/dev/zero bs=1M count=128 oflag=append 2>&1 >/dev/null
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+TEST $CLI volume set $V0 eager-lock on
+TEST $CLI volume set $V0 post-op-delay-secs 3
+TEST $CLI volume set $V0 client-log-level DEBUG
+TEST $CLI volume start $V0
+TEST $CLI volume profile $V0 start
+TEST $CLI volume set $V0 ensure-durability off
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+write_to_file &
+write_to_file &
+wait
+#Test if the MAX [F]INODELK fop latency is of the order of seconds.
+inodelk_max_latency=$($CLI volume profile $V0 info | grep INODELK | awk 'BEGIN {max = 0} {if ($6 > max) max=$6;} END {print max}' | cut -d. -f 1 | egrep "[0-9]{7,}")
+TEST [ -z $inodelk_max_latency ]
+
+cleanup;
diff --git a/tests/bugs/bug-921408.t b/tests/bugs/bug-921408.t
new file mode 100755
index 000000000..ef2b4fb21
--- /dev/null
+++ b/tests/bugs/bug-921408.t
@@ -0,0 +1,89 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../fileio.rc
+. $(dirname $0)/../dht.rc
+
+cleanup;
+wait_check_status ()
+{
+ n=0
+ while [ $n -lt $1 ]
+ do
+ ret=$(rebalance_completed)
+ if [ $ret == "0" ]
+ then
+ return 0;
+ else
+ sleep 1
+ n=`expr $n + 1`;
+ fi
+ done
+ return 1;
+}
+
+addbr_rebal_till_layout_change()
+{
+ val=1
+ l=$1
+ i=1
+ while [ $i -lt 5 ]
+ do
+ $CLI volume add-brick $V0 $H0:$B0/${V0}$l &>/dev/null
+ $CLI volume rebalance $V0 fix-layout start &>/dev/null
+ wait_check_status 15
+ if [ $? -eq 1 ]
+ then
+ break
+ fi
+ NEW_LAYOUT=`get_layout $B0/${V0}0`
+ if [ $OLD_LAYOUT == $NEW_LAYOUT ]
+ then
+ i=`expr $i + 1`;
+ l=`expr $l + 1`;
+ else
+ val=0
+ break
+ fi
+ done
+ return $val
+}
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
+TEST $CLI volume set $V0 subvols-per-directory 1
+TEST $CLI volume start $V0
+
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+
+TEST mkdir $M0/test
+TEST touch $M0/test/test
+
+fd=`fd_available`
+TEST fd_open $fd "rw" $M0/test/test
+
+OLD_LAYOUT=`get_layout $B0/${V0}0`
+
+addbr_rebal_till_layout_change 1
+
+TEST [ $? -eq 0 ]
+
+for i in $(seq 1 1000)
+do
+ ls -l $M0/ >/dev/null
+ ret=$?
+ if [ $ret != 0 ]
+ then
+ break
+ fi
+done
+
+TEST [ $ret == 0 ];
+TEST fd_close $fd;
+
+TEST umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup
diff --git a/tests/bugs/bug-924075.t b/tests/bugs/bug-924075.t
new file mode 100755
index 000000000..f4e03e33a
--- /dev/null
+++ b/tests/bugs/bug-924075.t
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+#FIXME: there is another patch which moves the following function into
+#include.rc
+function process_leak_count ()
+{
+ local pid=$1;
+ return $(ls -lh /proc/$pid/fd | grep "(deleted)" | wc -l)
+}
+
+TEST glusterd;
+TEST $CLI volume create $V0 $H0:$B0/${V0}1;
+TEST $CLI volume start $V0;
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+mount_pid=$(get_mount_process_pid $V0);
+TEST process_leak_count $mount_pid;
+
+cleanup;
diff --git a/tests/bugs/bug-924265.t b/tests/bugs/bug-924265.t
new file mode 100755
index 000000000..13491356d
--- /dev/null
+++ b/tests/bugs/bug-924265.t
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+# Test that setting cluster.dht-xattr-name works, and that DHT consistently
+# uses the specified name instead of the default.
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+# We only care about the exit code, so keep it quiet.
+function silent_getfattr {
+ getfattr $* &> /dev/null
+}
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+
+mkdir -p $H0:$B0/${V0}0
+
+# Create a volume and set the option.
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
+TEST $CLI volume set $V0 cluster.dht-xattr-name trusted.foo.bar
+
+# Start and mount the volume.
+TEST $CLI volume start $V0
+EXPECT_WITHIN 15 'Started' volinfo_field $V0 'Status';
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+
+# Create a directory and make sure it has the right xattr.
+mkdir $M0/test
+TEST ! silent_getfattr -n trusted.glusterfs.dht $B0/${V0}0/test
+TEST silent_getfattr -n trusted.foo.bar $B0/${V0}0/test
+
+cleanup
diff --git a/tests/bugs/bug-927616.t b/tests/bugs/bug-927616.t
new file mode 100755
index 000000000..22b20aff2
--- /dev/null
+++ b/tests/bugs/bug-927616.t
@@ -0,0 +1,61 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2};
+TEST $CLI volume set $V0 performance.open-behind off;
+TEST $CLI volume start $V0
+
+sleep 1;
+## Mount FUSE with caching disabled
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0;
+
+sleep 1;
+
+TEST mount -t nfs -o vers=3,nolock $H0:/$V0 $N0;
+
+TEST mkdir $M0/dir;
+
+mkdir $M0/other;
+cp /etc/passwd $M0/;
+cp $M0/passwd $M0/file;
+chmod 600 $M0/file;
+
+chown -R nfsnobody:nfsnobody $M0/dir;
+
+TEST $CLI volume set $V0 server.root-squash on;
+
+sleep 1;
+
+# tests should fail.
+touch $M0/foo 2>/dev/null;
+TEST [ $? -ne 0 ]
+touch $N0/foo 2>/dev/null;
+TEST [ $? -ne 0 ]
+mkdir $M0/new 2>/dev/null;
+TEST [ $? -ne 0 ]
+mkdir $N0/new 2>/dev/null;
+TEST [ $? -ne 0 ]
+
+TEST $CLI volume set $V0 server.root-squash off;
+
+sleep 1;
+
+# tests should pass.
+touch $M0/foo 2>/dev/null;
+TEST [ $? -eq 0 ]
+touch $N0/bar 2>/dev/null;
+TEST [ $? -eq 0 ]
+mkdir $M0/new 2>/dev/null;
+TEST [ $? -eq 0 ]
+mkdir $N0/old 2>/dev/null;
+TEST [ $? -eq 0 ]
+
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/bugs/bug-948686.t b/tests/bugs/bug-948686.t
new file mode 100644
index 000000000..db9c198a9
--- /dev/null
+++ b/tests/bugs/bug-948686.t
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../cluster.rc
+
+function check_peers {
+ $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+cleanup;
+#setup cluster and test volume
+TEST launch_cluster 3; # start 3-node virtual cluster
+TEST $CLI_1 peer probe $H2; # peer probe server 2 from server 1 cli
+TEST $CLI_1 peer probe $H3; # peer probe server 3 from server 1 cli
+
+EXPECT_WITHIN 20 2 check_peers;
+
+TEST $CLI_1 volume create $V0 replica 2 $H1:$B1/$V0 $H1:$B1/${V0}_1 $H2:$B2/$V0 $H3:$B3/$V0
+TEST $CLI_1 volume start $V0
+TEST glusterfs --volfile-server=$H1 --volfile-id=$V0 $M0
+
+#kill a node
+TEST kill_node 3
+
+#modify volume config to see change in volume-sync
+TEST $CLI_1 volume set $V0 write-behind off
+#add some files to the volume to see effect of volume-heal cmd
+TEST touch $M0/{1..100};
+TEST $CLI_1 volume stop $V0;
+TEST $glusterd_3;
+sleep 3;
+TEST $CLI_3 volume start $V0;
+TEST $CLI_2 volume stop $V0;
+TEST $CLI_2 volume delete $V0;
+
+cleanup;
+
+TEST glusterd;
+TEST $CLI volume create $V0 $H0:$B0/$V0
+TEST $CLI volume start $V0
+pkill glusterd;
+pkill glusterfsd;
+TEST glusterd
+TEST $CLI volume status $V0
+
+cleanup;
diff --git a/tests/bugs/bug-948729/bug-948729-force.t b/tests/bugs/bug-948729/bug-948729-force.t
new file mode 100644
index 000000000..d14e94061
--- /dev/null
+++ b/tests/bugs/bug-948729/bug-948729-force.t
@@ -0,0 +1,84 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+
+function check_peers {
+ $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+
+cleanup;
+uuid1=`uuidgen`;
+uuid2=`uuidgen`;
+uuid3=`uuidgen`;
+
+V1=patchy1
+V2=patchy2
+
+TEST launch_cluster 2;
+
+TEST $CLI_1 peer probe $H2;
+
+EXPECT_WITHIN 20 1 check_peers;
+
+B3=/d/backends/3
+B4=/d/backends/4
+B5=/d/backends/5
+B6=/d/backends/6
+
+mkdir -p $B3 $B4 $B5 $B6
+
+TEST truncate -s 16M $B1/brick1
+TEST truncate -s 16M $B2/brick2
+TEST truncate -s 16M $B3/brick3
+TEST truncate -s 16M $B4/brick4
+TEST truncate -s 16M $B5/brick5
+TEST truncate -s 16M $B6/brick6
+
+TEST LD1=`losetup --find --show $B1/brick1`
+TEST mkfs.xfs $LD1
+TEST LD2=`losetup --find --show $B2/brick2`
+TEST mkfs.xfs $LD2
+TEST LD3=`losetup --find --show $B3/brick3`
+TEST mkfs.xfs $LD3
+TEST LD4=`losetup --find --show $B4/brick4`
+TEST mkfs.xfs $LD4
+TEST LD5=`losetup --find --show $B5/brick5`
+TEST mkfs.xfs $LD5
+TEST LD6=`losetup --find --show $B6/brick6`
+TEST mkfs.xfs $LD6
+
+mkdir -p $B1/$V0 $B2/$V0 $B3/$V0 $B4/$V0 $B5/$V0 $B6/$V0
+
+TEST mount -t xfs $LD1 $B1/$V0
+TEST mount -t xfs $LD2 $B2/$V0
+TEST mount -t xfs $LD3 $B3/$V0
+TEST mount -t xfs $LD4 $B4/$V0
+TEST mount -t xfs $LD5 $B5/$V0
+TEST mount -t xfs $LD6 $B6/$V0
+
+#Case 0: Parent directory of the brick is absent
+TEST ! $CLI1 volume create $V0 $H1:$B1/$V0/nonexistent/b1 $H2:$B2/$V0/nonexistent/b2 force
+
+#Case 1: File system root is being used as brick directory
+TEST $CLI1 volume create $V0 $H1:$B5/$V0 $H2:$B6/$V0 force
+
+#Case 2: Brick directory contains only one component
+TEST $CLI1 volume create $V1 $H1:/$uuid1 $H2:/$uuid2 force
+
+#Case 3: Sub-directories of the backend FS being used as brick directory
+TEST $CLI1 volume create $V2 $H1:$B1/$V0/brick1 $H2:$B2/$V0/brick2 force
+
+#add-brick tests
+TEST ! $CLI1 volume add-brick $V0 $H1:$B3/$V0/nonexistent/brick3 force
+TEST $CLI1 volume add-brick $V0 $H1:$B3/$V0 force
+TEST $CLI1 volume add-brick $V1 $H1:/$uuid3 force
+TEST $CLI1 volume add-brick $V2 $H1:$B4/$V0/brick3 force
+
+#####replace-brick tests
+#FIX-ME: replace-brick does not work with the newly introduced cluster test
+#####framework
+
+rmdir /$uuid1 /$uuid2 /$uuid3;
+
+cleanup;
diff --git a/tests/bugs/bug-948729/bug-948729-mode-script.t b/tests/bugs/bug-948729/bug-948729-mode-script.t
new file mode 100644
index 000000000..541ca897d
--- /dev/null
+++ b/tests/bugs/bug-948729/bug-948729-mode-script.t
@@ -0,0 +1,85 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+
+function check_peers {
+ $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+
+cleanup;
+
+uuid1=`uuidgen`;
+uuid2=`uuidgen`;
+uuid3=`uuidgen`;
+
+V1=patchy1
+V2=patchy2
+V3=patchy3
+
+TEST launch_cluster 2;
+
+TEST $CLI_1 peer probe $H2;
+
+EXPECT_WITHIN 20 1 check_peers;
+
+B3=/d/backends/3
+B4=/d/backends/4
+B5=/d/backends/5
+B6=/d/backends/6
+mkdir -p $B3 $B4 $B5 $B6
+
+TEST truncate -s 16M $B1/brick1
+TEST truncate -s 16M $B2/brick2
+TEST truncate -s 16M $B3/brick3
+TEST truncate -s 16M $B4/brick4
+TEST truncate -s 16M $B5/brick5
+TEST truncate -s 16M $B6/brick6
+
+TEST LD1=`losetup --find --show $B1/brick1`
+TEST mkfs.xfs $LD1
+TEST LD2=`losetup --find --show $B2/brick2`
+TEST mkfs.xfs $LD2
+TEST LD3=`losetup --find --show $B3/brick3`
+TEST mkfs.xfs $LD3
+TEST LD4=`losetup --find --show $B4/brick4`
+TEST mkfs.xfs $LD4
+TEST LD5=`losetup --find --show $B5/brick5`
+TEST mkfs.xfs $LD5
+TEST LD6=`losetup --find --show $B6/brick6`
+TEST mkfs.xfs $LD6
+
+mkdir -p $B1/$V0 $B2/$V0 $B3/$V0 $B4/$V0 $B5/$V0 $B6/$V0
+
+TEST mount -t xfs $LD1 $B1/$V0
+TEST mount -t xfs $LD2 $B2/$V0
+TEST mount -t xfs $LD3 $B3/$V0
+TEST mount -t xfs $LD4 $B4/$V0
+TEST mount -t xfs $LD5 $B5/$V0
+TEST mount -t xfs $LD6 $B6/$V0
+
+#Case 0: Parent directory of the brick is absent
+TEST ! $CLI_1 volume create $V0 $H1:$B1/$V0/nonexistent/b1 $H2:$B2/$V0/nonexistent/b2
+
+#Case 1: File system root being used as brick directory
+TEST $CLI_1 volume create $V0 $H1:$B5/$V0 $H2:$B6/$V0
+
+#Case 2: Brick directory contains only one component
+TEST $CLI_1 volume create $V1 $H1:/$uuid1 $H2:/$uuid2
+
+#Case 3: Sub-directories of the backend FS being used as brick directory
+TEST $CLI_1 volume create $V2 $H1:$B1/$V0/brick1 $H2:$B2/$V0/brick2
+
+#add-brick tests
+TEST ! $CLI_1 volume add-brick $V0 $H1:$B3/$V0/nonexistent/brick3
+TEST $CLI_1 volume add-brick $V0 $H1:$B3/$V0
+TEST $CLI_1 volume add-brick $V1 $H1:/$uuid3
+TEST $CLI_1 volume add-brick $V2 $H1:$B4/$V0/brick3
+
+#####replace-brick tests
+#FIX-ME : replace-brick does not currently work in the newly introduced
+#####cluster test framework
+
+rmdir /$uuid1 /$uuid2 /$uuid3
+
+cleanup;
diff --git a/tests/bugs/bug-948729/bug-948729.t b/tests/bugs/bug-948729/bug-948729.t
new file mode 100644
index 000000000..f94db1ea0
--- /dev/null
+++ b/tests/bugs/bug-948729/bug-948729.t
@@ -0,0 +1,67 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+
+function check_peers {
+ $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+
+cleanup;
+
+uuid1=`uuidgen`;
+uuid2=`uuidgen`;
+uuid3=`uuidgen`;
+
+TEST launch_cluster 2;
+
+TEST $CLI_1 peer probe $H2;
+
+EXPECT_WITHIN 20 1 check_peers;
+
+B3=/d/backends/3
+
+mkdir -p $B3
+
+TEST truncate -s 16M $B1/brick1
+TEST truncate -s 16M $B2/brick2
+TEST truncate -s 16M $B3/brick3
+
+TEST LD1=`losetup --find --show $B1/brick1`
+TEST mkfs.xfs $LD1
+TEST LD2=`losetup --find --show $B2/brick2`
+TEST mkfs.xfs $LD2
+TEST LD3=`losetup --find --show $B3/brick3`
+TEST mkfs.xfs $LD3
+
+mkdir -p $B1/$V0 $B2/$V0 $B3/$V0
+
+TEST mount -t xfs $LD1 $B1/$V0
+TEST mount -t xfs $LD2 $B2/$V0
+TEST mount -t xfs $LD3 $B3/$V0
+
+#Tests without --mode=script option
+cli1=$(echo $CLI1 | sed 's/ --mode=script//')
+#Case 0: Parent directory of the brick is absent
+TEST ! $cli1 volume create $V0 $H1:$B1/$V0/nonexistent/b1 $H2:$B2/$V0/nonexistent/b2
+
+#Case 1: File system root being used as brick directory
+TEST ! $cli1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0
+
+#Case 2: Brick directory contains only one component
+TEST ! $cli1 volume create $V0 $H1:/$uuid1 $H2:/$uuid2
+
+#Case 3: Sub-directories of the backend FS being used as brick directory
+TEST $cli1 volume create $V0 $H1:$B1/$V0/brick1 $H2:$B2/$V0/brick2
+
+#add-brick tests
+TEST ! $cli1 volume add-brick $V0 $H1:$B3/$V0/nonexistent/b3
+TEST ! $cli1 volume add-brick $V0 $H1:$B3/$V0
+TEST ! $cli1 volume add-brick $V0 $H1:/$uuid3
+TEST $cli1 volume add-brick $V0 $H1:$B3/$V0/brick3
+
+#####replace-brick tests
+#FIX-ME: Replace-brick does not work currently in the newly introduced cluster
+#####test framework.
+
+cleanup;
diff --git a/tests/bugs/bug-949242.t b/tests/bugs/bug-949242.t
new file mode 100644
index 000000000..717084673
--- /dev/null
+++ b/tests/bugs/bug-949242.t
@@ -0,0 +1,54 @@
+#!/bin/bash
+#
+# Bug 949242 - Test basic fallocate functionality.
+#
+# Run several commands to verify basic fallocate functionality. We verify that
+# fallocate creates and allocates blocks to a file. We also verify that the keep
+# size option does not modify the file size.
+###
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../fallocate.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4}
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+
+# check for fallocate support before continuing the test
+require_fallocate -l 1m -n $M0/file && rm -f $M0/file
+
+# fallocate a file and verify blocks are allocated
+TEST fallocate -l 1m $M0/file
+blksz=`stat --printf=%b $M0/file`
+nblks=`stat --printf=%B $M0/file`
+TEST [ $(($blksz * $nblks)) -eq 1048576 ]
+
+TEST unlink $M0/file
+
+# truncate a file to a fixed size, fallocate and verify that the size does not
+# change
+TEST truncate --size=1m $M0/file
+TEST fallocate -l 2m -n $M0/file
+blksz=`stat --printf=%b $M0/file`
+nblks=`stat --printf=%B $M0/file`
+sz=`stat --printf=%s $M0/file`
+TEST [ $sz -eq 1048576 ]
+# Note that gluster currently incorporates a hack to limit the number of blocks
+# reported as allocated to the file by the file size. We have allocated beyond the
+# file size here. Just check for non-zero allocation to avoid setting a land mine
+# for if/when that behavior might change.
+TEST [ ! $(($blksz * $nblks)) -eq 0 ]
+
+TEST unlink $M0/file
+
+TEST umount $M0
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/bugs/bug-949298.t b/tests/bugs/bug-949298.t
new file mode 100644
index 000000000..1394127ec
--- /dev/null
+++ b/tests/bugs/bug-949298.t
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI --xml volume info $V0
+
+cleanup;
diff --git a/tests/bugs/bug-949930.t b/tests/bugs/bug-949930.t
new file mode 100644
index 000000000..4a738befa
--- /dev/null
+++ b/tests/bugs/bug-949930.t
@@ -0,0 +1,27 @@
+#!/bin/bash
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+V1=patchy2
+
+cleanup;
+
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+TEST $CLI volume start $V0;
+
+TEST $CLI volume create $V1 $H0:$B0/${V1}{1,2};
+TEST $CLI volume start $V1;
+
+TEST ! $CLI volume set $V0 performance.nfs.read-ahead blah
+EXPECT '' volume_option $V0 performance.nfs.read-ahead
+
+TEST $CLI volume set $V0 performance.nfs.read-ahead on
+EXPECT "on" volume_option $V0 performance.nfs.read-ahead
+
+EXPECT '' volume_option $V1 performance.nfs.read-ahead
+
+cleanup;
+
diff --git a/tests/bugs/bug-955588.t b/tests/bugs/bug-955588.t
new file mode 100755
index 000000000..3f0361167
--- /dev/null
+++ b/tests/bugs/bug-955588.t
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+
+function get_brick_host_uuid()
+{
+ local vol=$1;
+ local uuid_regex='[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}'
+ local host_uuid_list=$($CLI volume info $vol --xml | grep "brick.uuid" | grep -o -E "$uuid_regex");
+
+ echo $host_uuid_list | awk '{print $1}'
+}
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+
+uuid=`grep UUID /var/lib/glusterd/glusterd.info | cut -f2 -d=`
+EXPECT $uuid get_brick_host_uuid $V0
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/bug-957877.t b/tests/bugs/bug-957877.t
new file mode 100644
index 000000000..23aefea25
--- /dev/null
+++ b/tests/bugs/bug-957877.t
@@ -0,0 +1,31 @@
+#!/bin/bash
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../afr.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0;
+
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0;
+kill_brick $V0 $H0 $B0/${V0}0
+TEST touch $M0/f1
+TEST setfattr -n "user.foo" -v "test" $M0/f1
+
+BRICK=$B0"/${V0}1"
+
+TEST $CLI volume start $V0 force
+sleep 5
+TEST $CLI volume heal $V0
+
+# Wait for self-heal to complete
+EXPECT_WITHIN 30 '0' count_sh_entries $BRICK;
+
+TEST getfattr -n "user.foo" $B0/${V0}0/f1;
+
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/bugs/bug-958691.t b/tests/bugs/bug-958691.t
new file mode 100644
index 000000000..a5ac406c9
--- /dev/null
+++ b/tests/bugs/bug-958691.t
@@ -0,0 +1,50 @@
+#!/bin/bash
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0;
+
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0;
+sleep 1;
+TEST mount -t nfs -o vers=3,nolock $H0:/$V0 $N0;
+
+sleep 2;
+
+# Tests for the fuse mount
+TEST mkdir $M0/dir;
+TEST chmod 1777 $M0/dir;
+TEST touch $M0/dir/file{1,2};
+
+TEST $CLI volume set $V0 server.root-squash enable;
+
+mv $M0/dir/file1 $M0/dir/file11 2>/dev/null;
+TEST [ $? -ne 0 ];
+
+TEST $CLI volume set $V0 server.root-squash disable;
+TEST rm -rf $M0/dir;
+
+sleep 1;
+
+# tests for nfs mount
+TEST mkdir $N0/dir;
+TEST chmod 1777 $N0/dir;
+TEST touch $N0/dir/file{1,2};
+
+TEST $CLI volume set $V0 server.root-squash enable;
+
+mv $N0/dir/file1 $N0/dir/file11 2>/dev/null;
+TEST [ $? -ne 0 ];
+
+TEST $CLI volume set $V0 server.root-squash disable;
+TEST rm -rf $N0/dir;
+TEST umount $N0;
+
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/bugs/bug-958790.t b/tests/bugs/bug-958790.t
new file mode 100644
index 000000000..6cc799c25
--- /dev/null
+++ b/tests/bugs/bug-958790.t
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+touch /var/lib/glusterd/groups/test
+echo "read-ahead=off" > /var/lib/glusterd/groups/test
+echo "open-behind=off" >> /var/lib/glusterd/groups/test
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+TEST $CLI volume set $V0 group test
+EXPECT "off" volume_option $V0 performance.read-ahead
+EXPECT "off" volume_option $V0 performance.open-behind
+
+cleanup;
diff --git a/tests/bugs/bug-961307.t b/tests/bugs/bug-961307.t
new file mode 100644
index 000000000..96e93a56f
--- /dev/null
+++ b/tests/bugs/bug-961307.t
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+REPLICA=2
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica $REPLICA $H0:$B0/${V0}-00 $H0:$B0/${V0}-01 $H0:$B0/${V0}-10 $H0:$B0/${V0}-11
+TEST $CLI volume start $V0
+
+var1=$(gluster volume remove-brick $H0:$B0/${V0}-00 $H0:$B0/${V0}-01 start 2>&1)
+var2="volume remove-brick start: failed: Volume $H0:$B0/${V0}-00 does not exist"
+
+
+function compare_string()
+{
+ val="-1"
+ if [ "$1" == "$2" ]; then
+ val="0"
+ else
+ val="1"
+ fi
+ echo $val
+}
+
+EXPECT 0 compare_string "$var1" "$var2"
+cleanup;
diff --git a/tests/bugs/bug-961615.t b/tests/bugs/bug-961615.t
new file mode 100644
index 000000000..d183e6c52
--- /dev/null
+++ b/tests/bugs/bug-961615.t
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+#This test tests that an extra fd_unref does not happen in rebalance
+#migration completion check code path in dht
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+TEST touch $M0/1
+#This rename creates a link file for 10 in the other volume.
+TEST mv $M0/1 $M0/10
+#Lets keep writing to the file which will trigger rebalance completion check
+dd if=/dev/zero of=$M0/10 bs=1k &
+bg_pid=$!
+#Now rebalance force will migrate file '10'
+TEST $CLI volume rebalance $V0 start force
+EXPECT_WITHIN 60 "completed" rebalance_status_field $V0
+#If the bug exists mount would have crashed by now
+TEST ls $M0
+kill -9 $bg_pid > /dev/null 2>&1
+wait > /dev/null 2>&1
+cleanup
diff --git a/tests/bugs/bug-961669.t b/tests/bugs/bug-961669.t
new file mode 100644
index 000000000..751a63df2
--- /dev/null
+++ b/tests/bugs/bug-961669.t
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+#Test case: Fail remove-brick 'start' variant when reducing the replica count of a volume.
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+#Basic checks
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+#Create a 3x3 dist-rep volume
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2,3,4,5,6,7,8};
+TEST $CLI volume start $V0
+
+# Mount FUSE and create file/directory
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+TEST touch $M0/zerobytefile.txt
+TEST mkdir $M0/test_dir
+TEST dd if=/dev/zero of=$M0/file bs=1024 count=1024
+
+function remove_brick_start {
+ $CLI volume remove-brick $V0 replica 2 $H0:$B0/${V0}{1,4,7} start 2>&1|grep -oE 'success|failed'
+}
+
+function remove_brick {
+ $CLI volume remove-brick $V0 replica 2 $H0:$B0/${V0}{1,4,7} 2>&1|grep -oE 'success|failed'
+}
+
+#remove-brick start variant
+#Actual message displayed at cli is:
+#"volume remove-brick start: failed: Rebalancing not needed when reducing replica count. Try without the 'start' option"
+EXPECT "failed" remove_brick_start;
+
+#remove-brick commit-force
+#Actual message displayed at cli is:
+#"volume remove-brick commit force: success"
+EXPECT "success" remove_brick
+
+TEST umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/bug-963541.t b/tests/bugs/bug-963541.t
new file mode 100755
index 000000000..950c7db54
--- /dev/null
+++ b/tests/bugs/bug-963541.t
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1..3};
+TEST $CLI volume start $V0;
+
+# Start a remove-brick and try to start a rebalance/remove-brick without committing
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}1 start
+
+TEST ! $CLI volume rebalance $V0 start
+TEST ! $CLI volume remove-brick $V0 $H0:$B0/${V0}2 start
+
+#Try to start rebalance/remove-brick again after commit
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}1 commit
+
+gluster volume status
+
+TEST $CLI volume rebalance $V0 start
+TEST $CLI volume rebalance $V0 stop
+
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 start
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 stop
+
+TEST $CLI volume stop $V0
+
+cleanup;
+
diff --git a/tests/bugs/bug-963678.t b/tests/bugs/bug-963678.t
new file mode 100644
index 000000000..14d566579
--- /dev/null
+++ b/tests/bugs/bug-963678.t
@@ -0,0 +1,56 @@
+#!/bin/bash
+#
+# Bug 963678 - Test discard functionality
+#
+# Test that basic discard (hole punch) functionality works via the fallocate
+# command line tool. Hole punch deallocates a region of a file, creating a hole
+# and a zero-filled data region. We verify that hole punch works, frees blocks
+# and that subsequent reads do not read stale data (caches are invalidated).
+#
+# NOTE: fuse fallocate is known to be broken with regard to cache invalidation
+# up to 3.9.0 kernels. Therefore, FOPEN_KEEP_CACHE is not used in this
+# test (opens will invalidate the fuse cache).
+###
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../fallocate.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2}
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+
+# check for fallocate and hole punch support
+require_fallocate -l 1m $M0/file
+require_fallocate -p -l 512k $M0/file && rm -f $M0/file
+
+# allocate some blocks, punch a hole and verify block allocation
+TEST fallocate -l 1m $M0/file
+blksz=`stat --printf=%B $M0/file`
+nblks=`stat --printf=%b $M0/file`
+TEST [ $(($blksz * $nblks)) -ge 1048576 ]
+TEST fallocate -p -o 512k -l 128k $M0/file
+
+nblks=`stat --printf=%b $M0/file`
+# allow some room for xattr blocks
+TEST [ $(($blksz * $nblks)) -lt $((917504 + 16384)) ]
+TEST unlink $M0/file
+
+# write some data, punch a hole and verify the file content changes
+TEST dd if=/dev/urandom of=$M0/file bs=1M count=1
+TEST cp $M0/file $M0/file.copy.pre
+TEST fallocate -p -o 512k -l 128k $M0/file
+TEST cp $M0/file $M0/file.copy.post
+TEST ! cmp $M0/file.copy.pre $M0/file.copy.post
+TEST unlink $M0/file
+
+TEST umount $M0
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/bugs/bug-964059.t b/tests/bugs/bug-964059.t
new file mode 100755
index 000000000..df07f95ee
--- /dev/null
+++ b/tests/bugs/bug-964059.t
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../cluster.rc
+
+function check_peers {
+ $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+
+function volume_count {
+ local cli=$1;
+ if [ $cli -eq '1' ] ; then
+ $CLI_1 volume info | grep 'Volume Name' | wc -l;
+ else
+ $CLI_2 volume info | grep 'Volume Name' | wc -l;
+ fi
+}
+
+cleanup;
+
+TEST launch_cluster 2;
+TEST $CLI_1 peer probe $H2;
+
+EXPECT_WITHIN 20 1 check_peers
+
+TEST $CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0
+TEST $CLI_1 volume start $V0
+TEST $CLI_1 volume remove-brick $V0 $H2:$B2/$V0 start
+TEST $CLI_1 volume status
+cleanup;
diff --git a/tests/bugs/bug-966018.t b/tests/bugs/bug-966018.t
new file mode 100644
index 000000000..2a4697241
--- /dev/null
+++ b/tests/bugs/bug-966018.t
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+#This tests if eager-lock blocks metadata operations on nfs/fuse mounts.
+#If it is not woken up, INODELK from the next command waits
+#for post-op-delay secs.
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/r2_0 $H0:$B0/r2_1
+TEST $CLI volume set $V0 ensure-durability off
+TEST $CLI volume set $V0 cluster.eager-lock on
+TEST $CLI volume set $V0 cluster.post-op-delay-secs 3
+
+TEST $CLI volume start $V0
+TEST $CLI volume profile $V0 start
+sleep 5
+TEST mount -t nfs -o vers=3,nolock $H0:/$V0 $N0;
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $M0
+echo 1 > $N0/1 && chmod +x $N0/1
+echo 1 > $M0/1 && chmod +x $M0/1
+
+#Check that INODELK MAX latency is not in the order of seconds
+#Test if the MAX INODELK fop latency is of the order of seconds.
+inodelk_max_latency=$($CLI volume profile $V0 info | grep INODELK | awk 'BEGIN {max = 0} {if ($6 > max) max=$6;} END {print max}' | cut -d. -f 1 | egrep "[0-9]{7,}")
+
+TEST [ -z $inodelk_max_latency ]
+TEST umount $N0
+
+cleanup;
diff --git a/tests/bugs/bug-969193.t b/tests/bugs/bug-969193.t
new file mode 100755
index 000000000..e78a2980e
--- /dev/null
+++ b/tests/bugs/bug-969193.t
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+# Test that "system getspec" works without op_version problems.
+
+. $(dirname $0)/../include.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+TEST $CLI volume create $V0 $H0:$B0/brick1
+TEST $CLI system getspec $V0
+cleanup;
diff --git a/tests/bugs/bug-970070.t b/tests/bugs/bug-970070.t
new file mode 100755
index 000000000..da28b1ed7
--- /dev/null
+++ b/tests/bugs/bug-970070.t
@@ -0,0 +1,14 @@
+#!/bin/bash
+# TEST the nfs.acl option
+. $(dirname $0)/../include.rc
+
+cleanup
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0
+TEST $CLI volume start $V0
+sleep 5
+TEST $CLI volume set $V0 nfs.acl off
+TEST $CLI volume set $V0 nfs.acl on
+cleanup
diff --git a/tests/bugs/bug-973073.t b/tests/bugs/bug-973073.t
new file mode 100755
index 000000000..83e2839c6
--- /dev/null
+++ b/tests/bugs/bug-973073.t
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../dht.rc
+
+## Steps followed are one descibed in bugzilla
+
+cleanup;
+
+function get_layout()
+{
+ layout1=`getfattr -n trusted.glusterfs.dht -e hex $1 2>&1`
+
+ if [ $? -ne 0 ]
+ then
+ echo 1
+ else
+ echo 0
+ fi
+
+}
+
+BRICK_COUNT=3
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1 $H0:$B0/${V0}2
+TEST $CLI volume start $V0
+
+## Mount FUSE
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 start
+
+## remove-brick status == rebalance_status
+EXPECT_WITHIN 30 "0" rebalance_completed
+
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 stop
+
+TEST $CLI volume rebalance $V0 fix-layout start
+
+EXPECT_WITHIN 30 "0" rebalance_completed
+
+TEST mkdir $M0/dir 2>/dev/null;
+
+EXPECT "0" get_layout $B0/${V0}2/dir
+cleanup;
diff --git a/tests/bugs/bug-974007.t b/tests/bugs/bug-974007.t
new file mode 100644
index 000000000..c8c1c862b
--- /dev/null
+++ b/tests/bugs/bug-974007.t
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+#Test case: Create a distributed replicate volume, and remove multiple
+#replica pairs in a single remove-brick command.
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+#Basic checks
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+#Create a 3X2 distributed-replicate volume
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1..6};
+TEST $CLI volume start $V0
+
+# Mount FUSE and create files
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+TEST touch $M0/file{1..10}
+
+# Remove bricks from two sub-volumes to make it a 1x2 vol.
+# Bricks in question are given in a random order but from the same subvols.
+function remove_brick_start_status {
+ $CLI volume remove-brick $V0 \
+ $H0:$B0/${V0}6 $H0:$B0/${V0}1 \
+ $H0:$B0/${V0}2 $H0:$B0/${V0}5 start 2>&1 |grep -oE "success|failed"
+}
+EXPECT "success" remove_brick_start_status;
+
+# Wait for rebalance to complete
+EXPECT_WITHIN 10 "completed" remove_brick_status_completed_field "$V0" "$H0:$B0/${V0}6 $H0:$B0/${V0}1 $H0:$B0/${V0}2 $H0:$B0/${V0}5"
+
+# Check commit status
+function remove_brick_commit_status {
+ $CLI volume remove-brick $V0 \
+ $H0:$B0/${V0}6 $H0:$B0/${V0}1 \
+ $H0:$B0/${V0}2 $H0:$B0/${V0}5 commit 2>&1 |grep -oE "success|failed"
+}
+EXPECT "success" remove_brick_commit_status;
+
+# Check the volume type
+EXPECT "Replicate" echo `$CLI volume info |grep Type |awk '{print $2}'`
+
+TEST umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/bug-974972.t b/tests/bugs/bug-974972.t
new file mode 100755
index 000000000..15deac090
--- /dev/null
+++ b/tests/bugs/bug-974972.t
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+#This script checks that nfs mount does not fail lookup on files with split-brain
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume start $V0
+sleep 5
+TEST mount -t nfs -o vers=3 $H0:/$V0 $N0
+TEST touch $N0/1
+TEST kill_brick ${V0} ${H0} ${B0}/${V0}1
+echo abc > $N0/1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN 20 "Y" nfs_up_status
+EXPECT_WITHIN 20 "1" afr_child_up_status_in_nfs $V0 0
+EXPECT_WITHIN 20 "1" afr_child_up_status_in_nfs $V0 1
+
+TEST kill_brick ${V0} ${H0} ${B0}/${V0}0
+echo def > $N0/1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN 20 "Y" nfs_up_status
+EXPECT_WITHIN 20 "1" afr_child_up_status_in_nfs $V0 0
+EXPECT_WITHIN 20 "1" afr_child_up_status_in_nfs $V0 1
+
+#Lookup should not fail
+TEST ls $N0/1
+TEST ! cat $N0/1
+
+TEST umount $N0
+cleanup
diff --git a/tests/bugs/bug-976800.t b/tests/bugs/bug-976800.t
new file mode 100644
index 000000000..2aee8cc11
--- /dev/null
+++ b/tests/bugs/bug-976800.t
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+# This test checks if there are any open fds on the brick
+# even after the file is closed on the mount. This particular
+# test tests dd with "fsync" to check afr's fsync codepath
+cleanup;
+
+function is_fd_open {
+ local v=$1
+ local h=$2
+ local b=$3
+ local bpid=$(get_brick_pid $v $h $b)
+ ls -l /proc/$bpid/fd | grep -w "\-> $b/1"
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 ensure-durability off
+TEST $CLI volume set $V0 eager-lock off
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+TEST dd of=$M0/1 if=/dev/zero bs=1k count=1 conv=fsync
+TEST ! is_fd_open $V0 $H0 $B0/${V0}0
+cleanup;
diff --git a/tests/bugs/bug-977246.t b/tests/bugs/bug-977246.t
new file mode 100644
index 000000000..e07ee1919
--- /dev/null
+++ b/tests/bugs/bug-977246.t
@@ -0,0 +1,21 @@
+#! /bin/bash
+
+# This test checks if address validation, correctly catches hostnames
+# with consective dots, such as 'example..org', as invalid
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}1
+TEST $CLI volume info $V0
+TEST $CLI volume start $V0
+
+TEST ! $CLI volume set $V0 auth.allow example..org
+
+TEST $CLI volume stop $V0
+
+cleanup;
diff --git a/tests/bugs/bug-977797.t b/tests/bugs/bug-977797.t
new file mode 100755
index 000000000..08cdbe8f1
--- /dev/null
+++ b/tests/bugs/bug-977797.t
@@ -0,0 +1,114 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2};
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 open-behind off
+TEST $CLI volume set $V0 quick-read off
+TEST $CLI volume set $V0 read-ahead off
+TEST $CLI volume set $V0 write-behind off
+TEST $CLI volume set $V0 io-cache off
+TEST $CLI volume set $V0 background-self-heal-count 0
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+
+
+TEST mkdir -p $M0/a
+TEST `echo "GLUSTERFS" > $M0/a/file`
+
+TEST kill_brick $V0 $H0 $B0/$V0"1"
+
+TEST chown root $M0/a
+TEST chown root $M0/a/file
+TEST `echo "GLUSTER-FILE-SYSTEM" > $M0/a/file`
+TEST mkdir $M0/a/b
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN 20 "1" afr_child_up_status $V0 0;
+
+
+
+TEST kill_brick $V0 $H0 $B0/$V0"2"
+
+TEST chmod 757 $M0/a
+TEST chmod 757 $M0/a/file
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN 20 "1" afr_child_up_status $V0 1;
+
+TEST ls -l $M0/a/file
+
+b1c0dir=$(afr_get_specific_changelog_xattr $B0/$V0"1"/a \
+ trusted.afr.$V0-client-0 "entry")
+b1c1dir=$(afr_get_specific_changelog_xattr $B0/$V0"1"/a \
+ trusted.afr.$V0-client-1 "entry")
+b2c0dir=$(afr_get_specific_changelog_xattr \
+ $B0/$V0"2"/a trusted.afr.$V0-client-0 "entry")
+b2c1dir=$(afr_get_specific_changelog_xattr \
+ $B0/$V0"2"/a trusted.afr.$V0-client-1 "entry")
+
+
+b1c0f=$(afr_get_specific_changelog_xattr $B0/$V0"1"/a/file \
+ trusted.afr.$V0-client-0 "data")
+b1c1f=$(afr_get_specific_changelog_xattr $B0/$V0"1"/a/file \
+ trusted.afr.$V0-client-1 "data")
+b2c0f=$(afr_get_specific_changelog_xattr $B0/$V0"2"/a/file \
+ trusted.afr.$V0-client-0 "data")
+b2c1f=$(afr_get_specific_changelog_xattr $B0/$V0"2"/a/file \
+ trusted.afr.$V0-client-1 "data")
+
+EXPECT "00000000" echo $b1c0f
+EXPECT "00000000" echo $b1c1f
+EXPECT "00000000" echo $b2c0f
+EXPECT "00000000" echo $b2c1f
+
+EXPECT "00000000" echo $b1c0dir
+EXPECT "00000000" echo $b1c1dir
+EXPECT "00000000" echo $b2c0dir
+EXPECT "00000000" echo $b2c1dir
+
+contains() {
+ string="$1"
+ substring="$2"
+ var="-1"
+ if test "${string#*$substring}" != "$string"
+ then
+ var="0" # $substring is in $string
+ else
+ var="1" # $substring is not in $string
+ fi
+ echo $var
+}
+
+var1=$(cat $M0/a/file 2>&1)
+var2="Input/output error"
+
+
+EXPECT "0" contains "$var1" "$var2"
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/bug-978794.t b/tests/bugs/bug-978794.t
new file mode 100644
index 000000000..d22d3cde3
--- /dev/null
+++ b/tests/bugs/bug-978794.t
@@ -0,0 +1,29 @@
+#!/bin/bash
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../fileio.rc
+
+
+# This test opens 100 fds and triggers graph switches to check if fsync
+# as part of graph-switch causes crash or not.
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST touch $M0/{1..100}
+for i in {1..100}; do fd[$i]=`fd_available`; fd_open ${fd[$i]} 'w' $M0/$i; done
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}{2,3}
+TEST $CLI volume rebalance $V0 start force
+EXPECT_WITHIN 120 "completed" rebalance_status_field $V0
+TEST cat $M0/{1..100}
+for i in {1..100}; do fd_write ${fd[$i]} 'abc'; done
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}{4,5}
+TEST $CLI volume rebalance $V0 start force
+EXPECT_WITHIN 120 "completed" rebalance_status_field $V0
+for i in {1..100}; do fd_write ${fd[$i]} 'abc'; done
+TEST cat $M0/{1..100}
+cleanup
diff --git a/tests/bugs/bug-979365.t b/tests/bugs/bug-979365.t
new file mode 100755
index 000000000..e94dc9aa8
--- /dev/null
+++ b/tests/bugs/bug-979365.t
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+#This script checks that ensure-durability option enables/disables afr
+#sending fsyncs
+cleanup;
+
+function num_fsyncs {
+ $CLI volume profile $V0 info | grep -w FSYNC | wc -l
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 ensure-durability on
+TEST $CLI volume set $V0 eager-lock off
+TEST $CLI volume start $V0
+TEST $CLI volume profile $V0 start
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST dd of=$M0/a if=/dev/zero bs=1M count=10
+#fsyncs take a while to complete.
+sleep 5
+
+# There can be zero or more fsyncs, depending on the order
+# in which the writes reached the server, in turn deciding
+# whether they were treated as "appending" writes or not.
+
+TEST [[ $(num_fsyncs) -ge 0 ]]
+#Stop the volume to erase the profile info of old operations
+TEST $CLI volume profile $V0 stop
+TEST $CLI volume stop $V0
+umount $M0
+#Disable ensure-durability now to disable fsyncs in afr.
+TEST $CLI volume set $V0 ensure-durability off
+TEST $CLI volume start $V0
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST $CLI volume profile $V0 start
+TEST dd of=$M0/a if=/dev/zero bs=1M count=10
+#fsyncs take a while to complete.
+sleep 5
+TEST [[ $(num_fsyncs) -eq 0 ]]
+
+cleanup
diff --git a/tests/bugs/bug-982174.t b/tests/bugs/bug-982174.t
new file mode 100644
index 000000000..460af7511
--- /dev/null
+++ b/tests/bugs/bug-982174.t
@@ -0,0 +1,36 @@
+#!/bin/bash
+# Test to check
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+#Check if incorrect log-level keywords does not crash the CLI
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/brick1 $H0:$B0/brick2
+TEST $CLI volume start $V0
+
+function set_log_level_status {
+ local level=$1
+ $CLI volume set $V0 diagnostics.client-log-level $level 2>&1 |grep -oE 'success|failed'
+}
+
+
+LOG_LEVEL="trace"
+EXPECT "failed" set_log_level_status $LOG_LEVEL
+
+
+LOG_LEVEL="error-gen"
+EXPECT "failed" set_log_level_status $LOG_LEVEL
+
+
+LOG_LEVEL="TRACE"
+EXPECT "success" set_log_level_status $LOG_LEVEL
+
+EXPECT "$LOG_LEVEL" echo `$CLI volume info | grep diagnostics | awk '{print $2}'`
+
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/bugs/bug-983477.t b/tests/bugs/bug-983477.t
new file mode 100755
index 000000000..c19fa96c8
--- /dev/null
+++ b/tests/bugs/bug-983477.t
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+#This script checks if use-readdirp option works as accepted in mount options
+
+function get_use_readdirp_value {
+ local vol=$1
+ local statedump=$(generate_mount_statedump $vol)
+ local val=$(grep "use_readdirp=" $statedump | cut -f2 -d'=' | tail -1)
+ rm -f $statedump
+ echo $val
+}
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}
+TEST $CLI volume start $V0
+#If readdirp is enabled statedump should reflect it
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 --use-readdirp=yes
+TEST cd $M0
+EXPECT_WITHIN 20 "1" get_use_readdirp_value $V0
+TEST cd -
+TEST umount $M0
+
+#If readdirp is enabled statedump should reflect it
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 --use-readdirp=no
+TEST cd $M0
+EXPECT_WITHIN 20 "0" get_use_readdirp_value $V0
+TEST cd -
+TEST umount $M0
+
+#Since args are optional on this argument just specifying "--use-readdirp" should also turn it `on` not `off`
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 --use-readdirp
+TEST cd $M0
+EXPECT_WITHIN 20 "1" get_use_readdirp_value $V0
+TEST cd -
+TEST umount $M0
+
+#By default it is enabled.
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+TEST cd $M0
+EXPECT_WITHIN 20 "1" get_use_readdirp_value $V0
+TEST cd -
+TEST umount $M0
+
+#Invalid values for use-readdirp should not be accepted
+TEST ! glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 --use-readdirp=please-fail
+
+cleanup
diff --git a/tests/bugs/bug-985074.t b/tests/bugs/bug-985074.t
new file mode 100644
index 000000000..80052129e
--- /dev/null
+++ b/tests/bugs/bug-985074.t
@@ -0,0 +1,55 @@
+#!/bin/bash
+#
+# Bug 985074 - Verify stale inode/dentry mappings are cleaned out.
+#
+# This test verifies that an inode/dentry mapping for a file removed via a
+# separate mount point is cleaned up appropriately. We create a file and hard
+# link from client 1. Next we remove the link via client 2. Finally, from client
+# 1 we attempt to rename the original filename to the name of the just removed
+# hard link.
+#
+# If the inode is not unlinked properly, the removed directory entry can resolve
+# to an inode (on the client that never saw the rm) that ends up passed down
+# through the lookup call. If md-cache holds valid metadata on the inode (due to
+# a large timeout value or recent lookup on the valid name), it is tricked into
+# returning a successful lookup that should have returned ENOENT. This manifests
+# as an error from the mv command in the following test sequence because file
+# and file.link resolve to the same file:
+#
+# # mv /mnt/glusterfs/0/file /mnt/glusterfs/0/file.link
+# mv: `/mnt/glusterfs/0/file' and `/mnt/glusterfs/0/file.link' are the same file
+#
+###
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 md-cache-timeout 3
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 --entry-timeout=0 --attribute-timeout=0
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M1 --entry-timeout=0 --attribute-timeout=0
+
+TEST touch $M0/file
+TEST ln $M0/file $M0/file.link
+TEST ls -ali $M0 $M1
+TEST rm -f $M1/file.link
+TEST ls -ali $M0 $M1
+# expire the md-cache timeout
+sleep 3
+TEST mv $M0/file $M0/file.link
+TEST stat $M0/file.link
+TEST ! stat $M0/file
+
+TEST umount $M1
+TEST umount $M0
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/bugs/bug-986429.t b/tests/bugs/bug-986429.t
new file mode 100644
index 000000000..6e43f72b7
--- /dev/null
+++ b/tests/bugs/bug-986429.t
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+## This tests failover achieved by providing multiple
+## servers from the trusted pool for fetching volume
+## specification
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s non-existent -s $H0 --volfile-id=/$V0 $M0
+
+cleanup;
diff --git a/tests/bugs/bug-986905.t b/tests/bugs/bug-986905.t
new file mode 100755
index 000000000..0fac40fb4
--- /dev/null
+++ b/tests/bugs/bug-986905.t
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+#This script checks if hardlinks that are created while a brick is down are
+#healed properly.
+
+cleanup;
+function get_inum {
+ ls -i $1 | awk '{print $1}'
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST touch $M0/a
+TEST ln $M0/a $M0/link_a
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN 20 "1" afr_child_up_status $V0 0
+TEST ls -l $M0
+inum=$(get_inum $B0/${V0}0/a)
+EXPECT "$inum" get_inum $B0/${V0}0/link_a
+cleanup
diff --git a/tests/bugs/bug-991622.t b/tests/bugs/bug-991622.t
new file mode 100644
index 000000000..5c3243465
--- /dev/null
+++ b/tests/bugs/bug-991622.t
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../fileio.rc
+
+#This tests that no fd leaks are observed in unlink/rename in open-behind
+function leaked_fds {
+ ls -l /proc/$(get_brick_pid $V0 $H0 $B0/$V0)/fd | grep deleted
+}
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0
+TEST $CLI volume set $V0 open-behind on
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $M0 --direct-io-mode=enable
+
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' "$M0/testfile1"
+TEST fd_write $fd1 "content"
+
+TEST fd2=`fd_available`
+TEST fd_open $fd2 'w' "$M0/testfile2"
+TEST fd_write $fd2 "content"
+
+TEST touch $M0/a
+TEST rm $M0/testfile1
+TEST mv $M0/a $M0/testfile2
+TEST fd_close $fd1
+TEST fd_close $fd2
+TEST ! leaked_fds
+cleanup;
diff --git a/tests/bugs/getlk_owner.c b/tests/bugs/getlk_owner.c
new file mode 100644
index 000000000..619c2e32d
--- /dev/null
+++ b/tests/bugs/getlk_owner.c
@@ -0,0 +1,120 @@
+#include <stdio.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <string.h>
+
+#define GETLK_OWNER_CHECK(f, cp, label) \
+ do { \
+ switch (f.l_type) { \
+ case F_RDLCK: \
+ case F_WRLCK: \
+ ret = 1; \
+ goto label; \
+ case F_UNLCK: \
+ if (!are_flocks_sane (&f, &cp)) { \
+ ret = 1; \
+ goto label; \
+ } \
+ break; \
+ } \
+ } while (0)
+
+void
+flock_init (struct flock *f, short int type, off_t start, off_t len)
+{
+ f->l_type = type;
+ f->l_start = start;
+ f->l_len = len;
+}
+
+int
+flock_cp (struct flock *dst, struct flock *src)
+{
+ memcpy ((void *) dst, (void *) src, sizeof (struct flock));
+}
+
+int
+are_flocks_sane (struct flock *src, struct flock *cpy)
+{
+ return ((src->l_whence == cpy->l_whence) &&
+ (src->l_start == cpy->l_start) &&
+ (src->l_len == cpy->l_len));
+}
+
+/*
+ * Test description:
+ * SETLK (0,3), F_WRLCK
+ * SETLK (3,3), F_WRLCK
+ *
+ * the following GETLK requests must return flock struct unmodified
+ * except for l_type to F_UNLCK
+ * GETLK (3,3), F_WRLCK
+ * GETLK (3,3), F_RDLCK
+ *
+ * */
+
+int main (int argc, char **argv)
+{
+ int fd = -1;
+ int ret = 1;
+ char *fname = NULL;
+ struct flock f = {0,};
+ struct flock cp = {0,};
+
+ if (argc < 2)
+ goto out;
+
+ fname = argv[1];
+ fd = open (fname, O_RDWR);
+ if (fd == -1) {
+ perror ("open");
+ goto out;
+ }
+
+ flock_init (&f, F_WRLCK, 0, 3);
+ flock_cp (&cp, &f);
+ ret = fcntl (fd, F_SETLK, &f);
+ if (ret) {
+ perror ("fcntl");
+ goto out;
+ }
+ if (!are_flocks_sane (&f, &cp)) {
+ ret = 1;
+ goto out;
+ }
+
+ flock_init (&f, F_WRLCK, 3, 3);
+ flock_cp (&cp, &f);
+ ret = fcntl (fd, F_SETLK, &f);
+ if (ret) {
+ perror ("fcntl");
+ goto out;
+ }
+ if (!are_flocks_sane (&f, &cp)) {
+ ret = 1;
+ goto out;
+ }
+
+ flock_init (&f, F_WRLCK, 3, 3);
+ flock_cp (&cp, &f);
+ ret = fcntl (fd, F_GETLK, &f);
+ if (ret) {
+ perror ("fcntl");
+ return 1;
+ }
+ GETLK_OWNER_CHECK (f, cp, out);
+
+ flock_init (&f, F_RDLCK, 3, 3);
+ flock_cp (&cp, &f);
+ ret = fcntl (fd, F_GETLK, &f);
+ if (ret) {
+ perror ("fcntl");
+ return 1;
+ }
+ GETLK_OWNER_CHECK (f, cp, out);
+
+out:
+ if (fd != -1)
+ close (fd);
+ return ret;
+}
diff --git a/tests/bugs/overlap.py b/tests/bugs/overlap.py
new file mode 100755
index 000000000..15f2da473
--- /dev/null
+++ b/tests/bugs/overlap.py
@@ -0,0 +1,59 @@
+#!/usr/bin/python
+
+import sys
+
+def calculate_one (ov, nv):
+ old_start = int(ov[18:26],16)
+ old_end = int(ov[26:34],16)
+ new_start = int(nv[18:26],16)
+ new_end = int(nv[26:34],16)
+ if (new_end < old_start) or (new_start > old_end):
+ #print '%s, %s -> ZERO' % (ov, nv)
+ return 0
+ all_start = max(old_start,new_start)
+ all_end = min(old_end,new_end)
+ #print '%s, %s -> %08x' % (ov, nv, all_end - all_start + 1)
+ return all_end - all_start + 1
+
+def calculate_all (values):
+ total = 0
+ nv_index = len(values) / 2
+ for old_val in values[:nv_index]:
+ new_val = values[nv_index]
+ nv_index += 1
+ total += calculate_one(old_val,new_val)
+ return total
+
+"""
+test1_vals = [
+ '0x0000000000000000000000003fffffff', # first quarter
+ '0x0000000000000000400000007fffffff', # second quarter
+ '0x000000000000000080000000ffffffff', # second half
+ '0x00000000000000000000000055555554', # first third
+ '0x000000000000000055555555aaaaaaa9', # second third
+ '0x0000000000000000aaaaaaaaffffffff', # last third
+]
+
+test2_vals = [
+ '0x0000000000000000000000003fffffff', # first quarter
+ '0x0000000000000000400000007fffffff', # second quarter
+ '0x000000000000000080000000ffffffff', # second half
+ '0x00000000000000000000000055555554', # first third
+ # Next two are (incorrectly) swapped.
+ '0x0000000000000000aaaaaaaaffffffff', # last third
+ '0x000000000000000055555555aaaaaaa9', # second third
+]
+
+print '%08x' % calculate_one(test1_vals[0],test1_vals[3])
+print '%08x' % calculate_one(test1_vals[1],test1_vals[4])
+print '%08x' % calculate_one(test1_vals[2],test1_vals[5])
+print '= %08x' % calculate_all(test1_vals)
+print '%08x' % calculate_one(test2_vals[0],test2_vals[3])
+print '%08x' % calculate_one(test2_vals[1],test2_vals[4])
+print '%08x' % calculate_one(test2_vals[2],test2_vals[5])
+print '= %08x' % calculate_all(test2_vals)
+"""
+
+if __name__ == '__main__':
+ # Return decimal so bash can reason about it.
+ print '%d' % calculate_all(sys.argv[1:])
diff --git a/tests/cluster.rc b/tests/cluster.rc
new file mode 100755
index 000000000..3b10d19f7
--- /dev/null
+++ b/tests/cluster.rc
@@ -0,0 +1,112 @@
+#!/bin/bash
+
+CLUSTER_PFX="127.1.1"; # ".x" for each glusterd
+CLUSTER_COUNT=1; # Just initial definition
+
+function launch_cluster() {
+ local count=$1;
+
+ CLUSTER_COUNT=$count;
+
+ define_backends $count;
+ define_hosts $count;
+ define_glusterds $count;
+ define_clis $count;
+
+ start_glusterds;
+}
+
+
+function define_backends() {
+ local b;
+
+ for i in `seq 1 $count`; do
+ eval "B$i=$B0/$i";
+ done
+
+ for i in `seq 1 $count`; do
+ b="B$i";
+ mkdir -pv ${!b}/glusterd;
+ done
+}
+
+
+function define_glusterds() {
+ local count=$1;
+ local h;
+ local b;
+ local wopt;
+ local bopt;
+ local popt;
+
+ for i in `seq 1 $count`; do
+ b="B$i";
+ h="H$i";
+ wopt="management.working-directory=${!b}/glusterd";
+ bopt="management.transport.socket.bind-address=${!h}";
+ popt="--pid-file=${!b}/glusterd.pid";
+ sopt="management.glusterd-sockfile=${!b}/glusterd/gd.sock"
+ lopt="--log-file=${!b}/glusterd.log"
+ eval "glusterd_$i='glusterd --xlator-option $wopt --xlator-option $bopt --xlator-option $sopt $lopt $popt'";
+ eval "glusterd$i='glusterd --xlator-option $wopt --xlator-option $bopt --xlator-option $sopt $lopt $popt'";
+ done
+}
+
+
+function start_glusterds() {
+ local g;
+
+ for i in `seq 1 $CLUSTER_COUNT`; do
+ g="glusterd_$i";
+ ${!g};
+ done
+}
+
+
+function kill_glusterd() {
+ local index=$1;
+ local b;
+ local pidfile;
+
+ b="B$index";
+ pidfile="${!b}/glusterd.pid";
+
+ kill `cat $pidfile`;
+}
+
+
+function kill_node() {
+ local index=$1;
+ local h;
+
+ h="H$index";
+
+ kill -9 $(ps -ef | grep gluster | grep ${!h} | awk '{print $2}');
+}
+
+
+function define_hosts() {
+ local count=$1;
+
+ for i in `seq 1 $count`; do
+ eval "H_$i=${CLUSTER_PFX}.$i"
+ eval "H$i=${CLUSTER_PFX}.$i";
+ done
+}
+
+
+function define_clis() {
+ local count=$1;
+ local h;
+
+ for i in `seq 1 $count`; do
+ b="B$i";
+ eval "CLI_$i='$CLI --glusterd-sock=${!b}/glusterd/gd.sock'";
+ eval "CLI$i='$CLI --glusterd-sock=${!b}/glusterd/gd.sock'";
+ done
+ CLI="$CLI_1"
+}
+
+function peer_count() {
+ $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
diff --git a/tests/dht.rc b/tests/dht.rc
new file mode 100644
index 000000000..663ea5431
--- /dev/null
+++ b/tests/dht.rc
@@ -0,0 +1,79 @@
+#!/bin/bash
+
+function get_layout()
+{
+ getfattr -n trusted.glusterfs.dht -e hex $1 2>&1|grep dht |cut -d = -f2
+
+}
+
+## populates $BRICK1 and $BRICK2 with hashed/cached subvolume. These will be
+## used by get_cached_brick and get_hashed_brick
+
+function file_has_linkfile()
+{
+ k=0
+ l=0
+ while [ $k -lt $BRICK_COUNT ]
+ do
+ stat=`stat $B0/${V0}$k/$1 2>/dev/null`
+ if [ $? -eq 0 ]
+ then
+ let l++
+ let "BRICK${l}=$k"
+
+ fi
+ let k++
+ done
+ return $l
+}
+
+function get_cached_brick()
+{
+ i=1
+ brick=$BRICK1
+ while [ $i -lt 3 ]
+ do
+ test=`getfattr -n trusted.glusterfs.dht.linkto -e text $B0/${V0}$brick/$1 2>&1`
+ if [ $? -eq 1 ]
+ then
+ cached=$brick
+ i=$(( $i+3 ))
+ fi
+ brick=$BRICK1
+ let i++
+ done
+
+ return $cached
+}
+
+function get_hashed_brick()
+{
+ j=1
+ brick=$BRICK1
+ while [ $j -lt 3 ]
+ do
+ test=`getfattr -n trusted.glusterfs.dht.linkto -e text $B0/${V0}$brick/$1 2>&1`
+ if [ $? -eq 0 ]
+ then
+ hashed=$brick
+ j=$(( $j+3 ))
+ fi
+ brick=$BRICK2
+ let j++
+ done
+
+ return $hashed
+}
+
+
+function rebalance_completed()
+{
+ val=1
+ test=`gluster volume rebalance $V0 status |grep localhost|grep -v "in progress" 2>&1`
+ if [ $? -eq 0 ]
+ then
+ val=0
+ fi
+
+ echo $val
+}
diff --git a/tests/fallocate.rc b/tests/fallocate.rc
new file mode 100644
index 000000000..3756bb949
--- /dev/null
+++ b/tests/fallocate.rc
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+# Helper to verify a given fallocate command is supported and skip a test
+# otherwise. Older versions of the fallocate utility might not support all modes
+# (i.e., discard) and older versions of fuse might not support the associated
+# fallocate requests.
+
+function require_fallocate()
+{
+ output=`fallocate $* 2>&1`
+ ret=$?
+ if [ ! $ret -eq 0 ] && ([[ $output == *unsupported* ]] ||
+ [[ $output == *invalid* ]] ||
+ [[ $output == *"not supported"* ]])
+ then
+ SKIP_TESTS
+ exit
+ fi
+}
diff --git a/tests/features/glupy.t b/tests/features/glupy.t
new file mode 100755
index 000000000..49bf11df5
--- /dev/null
+++ b/tests/features/glupy.t
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+TEST mkdir -p $B0/glupytest
+cat > $B0/glupytest.vol <<EOF
+volume vol-posix
+ type storage/posix
+ option directory $B0/glupytest
+end-volume
+
+volume vol-glupy
+ type features/glupy
+ option module-name helloworld
+ subvolumes vol-posix
+end-volume
+EOF
+
+TEST glusterfs -f $B0/glupytest.vol $M0;
+
+TEST touch $M0/filename;
+EXPECT "filename" ls $M0
+TEST rm -f $M0/filename;
+
+TEST umount -l $M0;
+
+cleanup;
diff --git a/tests/features/readdir-ahead.t b/tests/features/readdir-ahead.t
new file mode 100755
index 000000000..c7ee637f0
--- /dev/null
+++ b/tests/features/readdir-ahead.t
@@ -0,0 +1,44 @@
+#!/bin/bash
+#
+# Test basic readdir-ahead functionality. Verify that readdir-ahead can be
+# enabled, create a set of files and run some ls tests.
+#
+###
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0
+TEST $CLI volume start $V0
+
+TEST $CLI volume set $V0 readdir-ahead on
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
+
+TEST mkdir $M0/test
+for i in $(seq 0 99)
+do
+ touch $M0/test/$i
+done
+
+count=`ls -1 $M0/test | wc -l`
+TEST [ $count -eq 100 ]
+
+count=`ls -1 $M0/test | wc -l`
+TEST [ $count -eq 100 ]
+
+TEST rm -rf $M0/test/*
+
+count=`ls -1 $M0/test | wc -l`
+TEST [ $count -eq 0 ]
+
+TEST rmdir $M0/test
+
+TEST umount -l $M0;
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/fileio.rc b/tests/fileio.rc
new file mode 100644
index 000000000..58871b3b9
--- /dev/null
+++ b/tests/fileio.rc
@@ -0,0 +1,61 @@
+#!/bin/bash
+
+function fd_available() {
+ for i in {1..65536}; do
+ if [ ! -e /proc/$$/fd/$i ]; then
+ echo $i;
+ return 0;
+ fi
+ done
+
+ return 1;
+}
+
+function fd_open() {
+ local fd=$1;
+ local mode=$2
+ local path=$3;
+
+ case $mode in
+ r)
+ eval "exec $fd<$path";;
+ w)
+ eval "exec $fd>$path";;
+ rw)
+ eval "exec $fd<>$path";;
+ *)
+ false;;
+ esac
+}
+
+
+function fd_cat() {
+ local fd=$1;
+
+ eval "cat <&$fd";
+}
+
+
+function fd_write() {
+ local fd=$1;
+ shift;
+ local msg="$@";
+
+ eval "echo $@ >&$fd";
+}
+
+
+function fd_close() {
+ local fd=$1;
+
+ eval "exec $fd>&-";
+}
+
+
+function fd_based_example() {
+ TEST fd=`fd_available`;
+ TEST fd_open $fd "rw" $M0/filename;
+ TEST fd_cat $fd; # print existing stuff
+ TEST fd_write $fd "new stuff"; # append
+ TEST fd_close $fd;
+}
diff --git a/tests/include.rc b/tests/include.rc
new file mode 100644
index 000000000..80457f124
--- /dev/null
+++ b/tests/include.rc
@@ -0,0 +1,248 @@
+M0=${M0:=/mnt/glusterfs/0}; # 0th mount point for FUSE
+M1=${M1:=/mnt/glusterfs/1}; # 1st mount point for FUSE
+N0=${N0:=/mnt/nfs/0}; # 0th mount point for NFS
+N1=${N1:=/mnt/nfs/1}; # 1st mount point for NFS
+V0=${V0:=patchy}; # volume name to use in tests
+V1=${V1:=patchy1}; # volume name to use in tests
+B0=${B0:=/d/backends}; # top level of brick directories
+H0=${H0:=`hostname --fqdn`}; # hostname
+DEBUG=${DEBUG:=0} # turn on debugging?
+statedumpdir=`gluster --print-statedumpdir`; # Default directory for statedump
+
+CLI="gluster --mode=script";
+
+mkdir -p $B0;
+mkdir -p $M0 $M1;
+mkdir -p $N0 $N1;
+
+testcnt=`egrep '^[[:space:]]*(EXPECT|TEST|EXPECT_WITHIN|EXPECT_KEYWORD)[[:space:]]' $0 | wc -l`
+expect_tests=`egrep '^[[:space:]]*TESTS_EXPECTED_IN_LOOP[[:space:]]*' $0`
+
+x_ifs=$IFS
+IFS=$'\n'
+for line in $expect_tests; do
+ expect_tests=`echo $line | cut -f 2 -d =`
+ testcnt=`expr $testcnt + $expect_tests`
+done
+IFS=$x_ifs
+
+echo 1..$testcnt
+
+t=1
+
+function dbg()
+{
+ [ "x$DEBUG" = "x0" ] || echo "$*" >&2;
+}
+
+
+function test_header()
+{
+ dbg "=========================";
+ dbg "TEST $t (line $TESTLINE): $*";
+}
+
+
+function test_footer()
+{
+ RET=$?
+ local err=$1
+
+ if [ $RET -eq 0 ]; then
+ echo "ok $t";
+ else
+ echo "not ok $t $err";
+ if [ "$EXIT_EARLY" = "1" ]; then
+ exit $RET
+ fi
+ fi
+
+ dbg "RESULT $t: $RET";
+
+ t=`expr $t + 1`;
+}
+
+function test_expect_footer()
+{
+ local e=$1
+ local a=$2
+ local err=""
+
+ if [ "x${e}" != "x${a}" ]; then
+ err="Got \"$a\" instead of \"$e\""
+ fi
+ [[ "x${e}" == "x${a}" ]];
+ test_footer "$err";
+}
+
+function _EXPECT()
+{
+ TESTLINE=$1;
+ shift;
+ local a=""
+
+ test_header "$@";
+
+ e="$1";
+ shift;
+ a=$("$@" | tail -1)
+
+ test_expect_footer "$e" "$a";
+}
+
+function _EXPECT_KEYWORD()
+{
+ TESTLINE=$1;
+ shift;
+
+ test_header "$@";
+
+ e="$1";
+ shift;
+ "$@" | tail -1 | grep -q "$e"
+
+ test_footer;
+}
+
+
+function _TEST()
+{
+ TESTLINE=$1;
+ shift;
+ local redirect=""
+
+ test_header "$@";
+
+ if [ "$1" = "!" ]; then
+ redirect="2>&1"
+ fi
+
+ eval "$@" >/dev/null $redirect
+
+ test_footer;
+}
+
+function _EXPECT_WITHIN()
+{
+ TESTLINE=$1
+ shift;
+
+ local timeout=$1
+ shift;
+
+ test_header "$@"
+
+ e=$1;
+ shift;
+
+ local endtime=$(( ${timeout}+`date +%s` ))
+
+ local success=0
+ while [ `date +%s` -lt $endtime ]; do
+ ("$@") | tail -1 | egrep -q "^${e}\$"
+
+ local pipestatus=(${PIPESTATUS[@]})
+
+ ## Check command success
+ if [ ${pipestatus[0]} -ne 0 ]; then
+ break;
+ fi
+
+ ## Check match success
+ if [ ${pipestatus[2]} -eq 0 ]; then
+ success=1;
+ break;
+ fi
+ sleep 1;
+ done
+
+ if [ $success -eq 1 ]; then
+ true;
+ else
+ false;
+ fi
+
+ test_footer;
+}
+
+
+function SKIP_TESTS()
+{
+ dbg "Skipping tests $t-$testcnt";
+ while [ $t -le $testcnt ]; do
+ true ; test_footer;
+ done
+}
+
+
+function _TEST_IN_LOOP()
+{
+ testcnt=`expr $testcnt + 1`;
+ _TEST $@
+}
+
+
+function cleanup()
+{
+ killall -15 glusterfs glusterfsd glusterd 2>/dev/null || true;
+ killall -9 glusterfs glusterfsd glusterd 2>/dev/null || true;
+
+ type cleanup_lvm &>/dev/null && cleanup_lvm
+
+ MOUNTPOINTS=`mount | grep "$B0/" | awk '{print $3}'`
+ for m in $MOUNTPOINTS;
+ do
+ umount $m
+ done
+
+ LOOPDEVICES=`losetup -a | grep "$B0/" | awk '{print $1}' | tr -d :`
+ for l in $LOOPDEVICES;
+ do
+ losetup -d $l
+ done
+
+
+ rm -rf /var/lib/glusterd/* $B0/* /etc/glusterd/*;
+
+ umount -l $M0 2>/dev/null || true;
+ umount -l $M1 2>/dev/null || true;
+ umount -l $N0 2>/dev/null || true;
+ umount -l $N1 2>/dev/null || true;
+
+}
+
+function volinfo_field()
+{
+ local vol=$1;
+ local field=$2;
+
+ $CLI volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+function cleanup_tester ()
+{
+ local exe=$1
+ rm -f $exe
+}
+
+function build_tester ()
+{
+ local cfile=$1
+ local fname=$(basename "$cfile")
+ local ext="${fname##*.}"
+ local execname="${fname%.*}"
+ gcc -g -o $(dirname $cfile)/$execname $cfile
+}
+
+function process_leak_count ()
+{
+ local pid=$1;
+ return $(ls -lh /proc/$pid/fd | grep "(deleted)"| wc -l)
+}
+
+alias EXPECT='_EXPECT $LINENO'
+alias TEST='_TEST $LINENO'
+alias EXPECT_WITHIN='_EXPECT_WITHIN $LINENO'
+alias EXPECT_KEYWORD='_EXPECT_KEYWORD $LINENO'
+alias TEST_IN_LOOP='_TEST_IN_LOOP $LINENO'
+shopt -s expand_aliases
diff --git a/tests/nfs.rc b/tests/nfs.rc
new file mode 100644
index 000000000..f3abee842
--- /dev/null
+++ b/tests/nfs.rc
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+
+# Due to portmap registration NFS takes some time to
+# export all volumes. Therefore tests should start only
+# after exports are visible by showmount command. This
+# routine will check if showmount shows the exports or not
+#
+function is_nfs_export_available ()
+{
+ vol=$1
+
+ if [ "$vol" == "" ]; then
+ vol=$V0
+ fi
+
+ exp=$(showmount -e 2> /dev/null | grep $vol | wc -l)
+ echo "$exp"
+}
+
+
diff --git a/tests/performance/open-behind.t b/tests/performance/open-behind.t
new file mode 100755
index 000000000..032154a20
--- /dev/null
+++ b/tests/performance/open-behind.t
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+
+TEST $CLI volume start $V0;
+
+## Mount FUSE
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+
+TEST glusterfs -s $H0 --volfile-id $V0 $M1;
+
+D0="hello-this-is-a-test-message0";
+F0="test-file0";
+
+function write_to()
+{
+ local file="$1";
+ local data="$2";
+
+ echo "$data" > "$file";
+}
+
+
+TEST write_to "$M0/$F0" "$D0";
+EXPECT "$D0" cat $M1/$F0;
+
+# open-behind delays open and uses anonymous fds for fops like
+# fstat and readv. So after creating the file, if volume is restarted
+# then later when the file is read, because of the use of anonymous fds
+# volume top open will show number of files opened as 0.
+TEST $CLI volume stop $V0;
+sleep 1;
+TEST $CLI volume start $V0;
+
+sleep 2;
+cat $M1/$F0 >/dev/null;
+
+string=$(gluster volume top $V0 open | grep -w "$F0");
+
+EXPECT "" echo $string;
+
+TEST $CLI volume set $V0 performance.open-behind off;
+
+D1="hello-this-is-a-test-message1";
+F1="test-file1";
+
+TEST write_to "$M0/$F1" "$D1";
+EXPECT "$D1" cat $M1/$F1;
+
+EXPECT "$D0" cat $M1/$F0;
+
+gluster volume top $V0 open | grep -w "$F0" >/dev/null 2>&1
+TEST [ $? -eq 0 ];
+
+cleanup;
diff --git a/tests/performance/quick-read.t b/tests/performance/quick-read.t
new file mode 100644
index 000000000..082998e43
--- /dev/null
+++ b/tests/performance/quick-read.t
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+
+TEST $CLI volume start $V0;
+
+## Mount FUSE
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+
+TEST glusterfs -s $H0 --volfile-id $V0 $M1;
+
+D0="hello-this-is-a-test-message0";
+F0="test-file0";
+
+function write_to()
+{
+ local file="$1";
+ local data="$2";
+
+ echo "$data" > "$file";
+}
+
+
+TEST write_to "$M0/$F0" "$D0";
+EXPECT "$D0" cat $M1/$F0;
+EXPECT "$D0" cat $M0/$F0;
+EXPECT "$D0" cat $M1/$F0;
+EXPECT "$D0" cat $M0/$F0;
+
+sleep 1;
+
+EXPECT "$D0" cat $M1/$F0;
+EXPECT "$D0" cat $M0/$F0;
+EXPECT "$D0" cat $M1/$F0;
+EXPECT "$D0" cat $M0/$F0;
+
+TEST $CLI volume set $V0 performance.quick-read off;
+
+D1="hello-this-is-a-test-message1";
+F1="test-file1";
+
+TEST write_to "$M0/$F1" "$D1";
+EXPECT "$D1" cat $M0/$F1;
+
+EXPECT "$D0" cat $M1/$F0;
+
+cleanup;
diff --git a/tests/snapshot.rc b/tests/snapshot.rc
new file mode 100755
index 000000000..a5b86f674
--- /dev/null
+++ b/tests/snapshot.rc
@@ -0,0 +1,251 @@
+#!/bin/bash
+
+LVM_DEFINED=0
+LVM_PREFIX="patchy_snap"
+LVM_COUNT=0
+VHD_SIZE="1G"
+
+function init_lvm() {
+ if [ "$1" == "" ]; then
+ echo "Error: Invalid argument supplied"
+ return 1
+ fi
+ LVM_COUNT=$1
+
+ if [ "$2" != "" ]; then
+ VHD_SIZE=$2
+ fi
+
+ local b
+ local i
+
+ if [ "$B1" = "" ]; then
+ B1=$B0
+ fi
+
+ for i in `seq 1 $LVM_COUNT`; do
+ b="B$i"
+ if [ "${!b}" = "" ]; then
+ echo "Error: $b not defined."
+ echo "Please run launch_cluster with atleast $LVM_COUNT nodes"
+ return 1
+ fi
+
+ eval "L$i=${!b}/${LVM_PREFIX}_mnt"
+ l="L$i"
+ mkdir -p ${!l}
+ if [ $? -ne 0 ]; then
+ echo "Error: failed to create dir ${!l}"
+ return 1
+ fi
+
+ eval "VG$i=${LVM_PREFIX}_vg_${i}"
+ done
+
+ LVM_DEFINED=1
+ return 0
+}
+
+function setup_lvm() {
+ init_lvm $@ || return 1
+ _setup_lvm
+ return 0
+}
+
+function cleanup_lvm() {
+ pkill gluster
+ sleep 2
+
+ if [ "$LVM_DEFINED" = "1" ]; then
+ _cleanup_lvm
+ fi
+
+ _cleanup_lvm_again >/dev/null 2>&1
+ return 0
+}
+
+########################################################
+# Private Functions
+########################################################
+function _setup_lvm() {
+ local count=$LVM_COUNT
+ local b
+ local i
+
+ for i in `seq 1 $count`; do
+ b="B$i"
+
+ _create_vhd ${!b} $i
+ _create_lv ${!b} $i
+ _mount_lv $i
+ done
+}
+
+function _cleanup_lvm() {
+ local count=$LVM_COUNT
+ local b
+ local i
+
+ for i in `seq 1 $count`; do
+ b="B$i"
+ _umount_lv $i
+ _remove_lv $i
+ _remove_vhd ${!b}
+ done
+}
+
+function _cleanup_lvm_again() {
+ local file
+
+ mount | grep $LVM_PREFIX | awk '{print $3}' | xargs -r umount -f
+
+ /sbin/vgs | grep $LVM_PREFIX | awk '{print $1}' | xargs -r vgremove -f
+
+ find $B0 -name "${LVM_PREFIX}_loop" | xargs -r losetup -d
+
+ find $B0 -name "${LVM_PREFIX}*" | xargs -r rm -rf
+
+ find /run/gluster/snaps -name "*${LVM_PREFIX}*" | xargs -r rm -rf
+
+ for file in `ls /run/gluster/snaps`; do
+ find /run/gluster/snaps/$file -mmin -2 | xargs -r rm -rf
+ done
+}
+
+########################################################
+########################################################
+function _create_vhd() {
+ local dir=$1
+ local num=$2
+ local loop_num=`expr $2 + 8`
+
+ fallocate -l${VHD_SIZE} $dir/${LVM_PREFIX}_vhd
+ mknod -m660 $dir/${LVM_PREFIX}_loop b 7 $loop_num
+ /sbin/losetup $dir/${LVM_PREFIX}_loop $dir/${LVM_PREFIX}_vhd
+}
+
+function _create_lv() {
+ local dir=$1
+ local num=$2
+ local vg="VG$num"
+
+ /sbin/pvcreate $dir/${LVM_PREFIX}_loop
+ /sbin/vgcreate ${!vg} $dir/${LVM_PREFIX}_loop
+
+ /sbin/lvcreate -l 100%FREE -T /dev/${!vg}/thinpool
+ /sbin/lvcreate -V $VHD_SIZE -T /dev/${!vg}/thinpool -n brick_lvm
+
+ mkfs.xfs -f /dev/${!vg}/brick_lvm
+}
+
+function _mount_lv() {
+ local num=$1
+ local vg="VG$num"
+ local l="L$num"
+
+ mount -t xfs -o nouuid /dev/${!vg}/brick_lvm ${!l}
+}
+
+function _umount_lv() {
+ local num=$1
+ local l="L$num"
+
+ umount -f ${!l} 2>/dev/null || true
+ rmdir ${!l} 2>/dev/null || true
+}
+
+function _remove_lv() {
+ local num=$1
+ local vg="VG$num"
+
+ vgremove -f ${!vg}
+}
+
+function _remove_vhd() {
+ local dir=$1
+
+ losetup -d $dir/${LVM_PREFIX}_loop
+ rm -f $dir/${LVM_PREFIX}_loop
+ rm -f $dir/${LVM_PREFIX}_vhd
+}
+
+########################################################
+# Utility Functions
+########################################################
+function snapshot_exists() {
+ local volname=$1
+ local snapname=$2
+ $CLI snapshot list $volname | egrep -q "^$snapname\$"
+ return $?
+}
+
+#Create N number of snaps in a given volume
+#Arg1 : <Volume Name>
+#Arg2 : <Count of snaps to be created>
+#Arg3 : <Snap Name Pattern>
+#Return: Returns 0 if all snaps are created ,
+# if not will return exit code of last failed
+# snap create command.
+function create_n_snapshots() {
+ local vol=$1
+ local snap_count=$2
+ local snap_name=$3
+ local ret=0
+ for i in `seq 1 $snap_count`; do
+ $CLI_1 snapshot create $snap_name$i ${vol}&
+ PID_1=$!
+ wait $PID_1
+ ret=$?
+ if [ "$ret" != "0" ]; then
+ break
+ fi
+ done
+ return $ret
+}
+
+
+#Delete N number of snaps in a given volume
+#Arg1 : <Volume Name>
+#Arg2 : <Count of snaps to be deleted>
+#Arg3 : <Snap Name Pattern>
+#Return: Returns 0 if all snaps are Delete,
+# if not will return exit code of last failed
+# snap delete command.
+function delete_n_snapshots() {
+ local vol=$1
+ local snap_count=$2
+ local snap_name=$3
+ local ret=0
+ for i in `seq 1 $snap_count`; do
+ $CLI_1 snapshot delete $snap_name$i &
+ PID_1=$!
+ wait $PID_1
+ temp=$?
+ if [ "$temp" != "0" ]; then
+ ret=$temp
+ fi
+ done
+ return $ret
+}
+
+#Check for the existance of N number of snaps in a given volume
+#Arg1 : <Volume Name>
+#Arg2 : <Count of snaps to be checked>
+#Arg3 : <Snap Name Pattern>
+#Return: Returns 0 if all snaps exists,
+# if not will return exit code of last failed
+# snapshot_exists().
+function snapshot_n_exists() {
+ local vol=$1
+ local snap_count=$2
+ local snap_name=$3
+ local ret=0
+ for i in `seq 1 $snap_count`; do
+ snapshot_exists $snap_name$i
+ ret=$?
+ if [ "$ret" != "0" ]; then
+ break
+ fi
+ done
+ return $ret
+}
diff --git a/tests/utils/create-files.py b/tests/utils/create-files.py
new file mode 100755
index 000000000..0d937eff9
--- /dev/null
+++ b/tests/utils/create-files.py
@@ -0,0 +1,207 @@
+#!/usr/bin/python
+
+# This script was developed by Vijaykumar Koppad (vkoppad@redhat.com)
+# The latest version of this script can found at
+# http://github.com/vijaykumar-koppad/crefi
+
+from __future__ import with_statement
+import sys
+import os
+import re
+import random
+from optparse import OptionParser
+import time
+import string
+import errno
+
+def os_rd(src, size):
+ fd = os.open(src,os.O_RDONLY)
+ data = os.read(fd, size)
+ os.close(fd)
+ return data
+
+def os_wr(dest, data):
+ fd = os.open(dest,os.O_WRONLY|os.O_CREAT|os.O_EXCL, 0644)
+ os.write(fd, data)
+ os.close(fd)
+ return
+
+def create_sparse_file(fil):
+ if option.size:
+ option.random = False
+ size = option.size
+ else:
+ size = random.randint(option.min, option.max)
+ data = os_rd("/dev/zero", size)
+ os_wr(fil, data)
+ return
+
+def create_binary_file(fil):
+ if option.size:
+ option.random = False
+ size = option.size
+ else:
+ size = random.randint(option.min, option.max)
+ data = os_rd("/dev/urandom", size)
+ os_wr(fil, data)
+ return
+
+def create_txt_file(fil):
+ if option.size:
+ option.random = False
+ size = option.size
+ else:
+ size = random.randint(option.min, option.max)
+ if size < 500*1024:
+ data = os_rd("/etc/services", size)
+ os_wr(fil, data)
+ else:
+ data = os_rd("/etc/services", 500*1024)
+ file_size = 0
+ fd = os.open(fil,os.O_WRONLY|os.O_CREAT|os.O_EXCL, 0644)
+ while file_size < size:
+ os.write(fd, data)
+ file_size += 500*1024
+ os.close(fd)
+ return
+
+def get_filename():
+ size = option.flen
+ char = string.uppercase+string.digits
+ st = ''.join(random.choice(char) for i in range(size))
+ ti = str((hex(int(str(time.time()).split('.')[0])))[2:])
+ return ti+"~~"+st
+
+def text_files(files, file_count):
+ for k in range(files):
+ if not file_count%option.inter:
+ print file_count
+ fil = get_filename()
+ create_txt_file(fil)
+ file_count += 1
+ return file_count
+
+def sparse_files(files, file_count):
+ for k in range(files):
+ if not file_count%option.inter:
+ print file_count
+ fil = get_filename()
+ create_sparse_file(fil)
+ file_count += 1
+ return file_count
+
+def binary_files(files, file_count):
+ for k in range(files):
+ if not file_count%option.inter:
+ print file_count
+ fil = get_filename()
+ create_binary_file(fil)
+ file_count += 1
+ return file_count
+
+def human2bytes(size):
+ size_short = {
+ 1024 : ['K','KB','KiB','k','kB','kiB'],
+ 1024*1024 : ['M','MB','MiB'],
+ 1024*1024*1024 : ['G','GB','GiB']
+}
+ num = re.search('(\d+)',size).group()
+ ext = size[len(num):]
+ num = int(num)
+ if ext == '':
+ return num
+ for value, keys in size_short.items():
+ if ext in keys:
+ size = num*value
+ return size
+
+def multipledir(mnt_pnt,brdth,depth,files):
+ files_count = 1
+ for i in range(brdth):
+ breadth = mnt_pnt+"/"+str(i)
+ try:
+ os.makedirs(breadth)
+ except OSError as ex:
+ if not ex.errno is errno.EEXIST:
+ raise
+ os.chdir(breadth)
+ dir_depth = breadth
+ print breadth
+ for j in range(depth):
+ dir_depth = dir_depth+"/"+str(j)
+ try:
+ os.makedirs(dir_depth)
+ except OSError as ex:
+ if not ex.errno is errno.EEXIST:
+ raise
+ os.chdir(dir_depth)
+ if option.file_type == "text":
+ files_count = text_files(files, files_count)
+ elif option.file_type == "sparse":
+ files_count = sparse_files(files, files_count)
+ elif option.file_type == "binary":
+ files_count = binary_files(files, files_count)
+ else:
+ print "Not a valid file type"
+ sys.exit(1)
+
+def singledir(mnt_pnt, files):
+ files_count = 1
+ os.chdir(mnt_pnt)
+ if option.file_type == "text":
+ files_count = text_files(files, files_count)
+ elif option.file_type == "sparse":
+ files_count = sparse_files(files, files_count)
+ elif option.file_type == "binary":
+ files_count = binary_files(files, files_count)
+ else:
+ print "Not a valid file type"
+ sys.exit(1)
+
+if __name__ == '__main__':
+ usage = "usage: %prog [option] <MNT_PT>"
+ parser = OptionParser(usage=usage)
+ parser.add_option("-n", dest="files",type="int" ,default=100,
+ help="number of files in each level [default: %default]")
+ parser.add_option("--size", action = "store",type="string",
+ help="size of the files to be used")
+ parser.add_option("--random", action="store_true", default=True,
+ help="random size of the file between --min and --max "
+ "[default: %default]")
+ parser.add_option("--max", action = "store",type="string", default="500K",
+ help="maximum size of the files, if random is True "
+ "[default: %default]")
+ parser.add_option("--min", action = "store",type="string", default="10K",
+ help="minimum size of the files, if random is True "
+ "[default: %default]" )
+ parser.add_option("--single", action="store_true", dest="dir",default=True,
+ help="create files in single directory [default: %default]" )
+ parser.add_option("--multi", action="store_false", dest="dir",
+ help="create files in multiple directories")
+ parser.add_option("-b", dest="brdth",type="int",default=5,
+ help="number of directories in one level(works with --multi)[default: %default]")
+ parser.add_option("-d", dest="depth",type="int",default=5,
+ help="number of levels of directories(works with --multi)[default: %default]")
+ parser.add_option("-l", dest="flen",type="int" ,default=10,
+ help="number of bytes for filename "
+ "[default: %default]")
+ parser.add_option("-t","--type", action="store", type="string" , dest="file_type",default="text",
+ help="type of the file to be created (text, sparse, binary) [default: %default]" )
+ parser.add_option("-I", dest="inter", type="int", default=100,
+ help="print number files created of interval [defailt: %dafault]")
+ (option,args) = parser.parse_args()
+ if not args:
+ print "usage: <script> [option] <MNT_PT>"
+ print ""
+ sys.exit(1)
+ args[0] = os.path.abspath(args[0])
+ if option.size:
+ option.size = human2bytes(option.size)
+ else:
+ option.max = human2bytes(option.max)
+ option.min = human2bytes(option.min)
+ if option.dir:
+ singledir(args[0], option.files)
+ else:
+ multipledir(args[0], option.brdth, option.depth, option.files)
+ print "creation of files completed.\n"
diff --git a/tests/volume.rc b/tests/volume.rc
new file mode 100644
index 000000000..171f8d709
--- /dev/null
+++ b/tests/volume.rc
@@ -0,0 +1,325 @@
+function volinfo_field()
+{
+ local vol=$1;
+ local field=$2;
+
+ $CLI volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+
+function brick_count()
+{
+ local vol=$1;
+
+ $CLI volume info $vol | egrep "^Brick[0-9]+: " | wc -l;
+}
+
+function online_brick_count ()
+{
+ pidof glusterfsd | wc -w
+}
+
+function volume_option()
+{
+ local vol=$1
+ local key=$2
+ $CLI volume info $vol | egrep "^$key: " | cut -f2 -d' ';
+}
+
+function rebalance_status_field {
+ #The rebalance status can be upto 3 words, (ex:'fix-layout in progress'), hence the awk-print $7 thru $9.
+ #But if the status is less than 3 words, it also prints the next field i.e the run_time_in_secs.(ex:'completed 3.00').
+ #So we trim the numbers out with `tr`. Finally remove the trailing white spaces with sed. What we get is one of the
+ #strings in the 'cli_vol_task_status_str' char array of cli-rpc-ops.c
+
+ $CLI volume rebalance $1 status | awk '{print $7,$8,$9}' |sed -n 3p |tr -d '[^0-9+\.]'|sed 's/ *$//g'
+}
+
+function remove_brick_status_completed_field {
+ local vol=$1
+ local brick_list=$2
+ $CLI volume remove-brick $vol $brick_list status | awk '{print $7}' | sed -n 3p
+}
+
+function get_mount_process_pid {
+ local vol=$1
+ ps aux | grep glusterfs | grep -E "volfile-id[ =]/?$vol " | awk '{print $2}' | head -1
+}
+
+function get_nfs_pid ()
+{
+ ps aux | grep "volfile-id\ gluster\/nfs" | awk '{print $2}' | head -1
+}
+
+function read_nfs_pidfile ()
+{
+ echo `cat /var/lib/glusterd/nfs/run/nfs.pid`
+}
+
+function cleanup_statedump {
+ pid=$1
+ rm -f $statedumpdir/*$pid.dump.*
+ #.vimrc friendly comment */
+}
+
+function generate_statedump {
+ local fpath=""
+ pid=$1
+ #remove old stale statedumps
+ cleanup_statedump $pid
+ kill -USR1 $pid
+ #Wait till the statedump is generated
+ sleep 1
+ fname=$(ls $statedumpdir | grep -E "\.$pid\.dump\.")
+ echo $statedumpdir/$fname
+}
+
+function generate_mount_statedump {
+ local vol=$1
+ generate_statedump $(get_mount_process_pid $vol)
+}
+
+function cleanup_mount_statedump {
+ local vol=$1
+ cleanup_statedump $(get_mount_process_pid $vol)
+}
+
+function _afr_child_up_status {
+ local vol=$1
+ #brick_id is (brick-num in volume info - 1)
+ local brick_id=$2
+ local gen_state_dump=$3
+ local fpath=$($gen_state_dump $vol)
+ up=$(grep -B1 trusted.afr.$vol-client-$brick_id $fpath | head -1 | cut -f2 -d'=')
+ rm -f $fpath
+ echo "$up"
+}
+
+function afr_child_up_status {
+ local vol=$1
+ #brick_id is (brick-num in volume info - 1)
+ local brick_id=$2
+ _afr_child_up_status $vol $brick_id generate_mount_statedump
+}
+
+function get_shd_process_pid {
+ local vol=$1
+ ps aux | grep glusterfs | grep -E "glustershd/run/glustershd.pid" | awk '{print $2}' | head -1
+}
+
+function generate_shd_statedump {
+ local vol=$1
+ generate_statedump $(get_shd_process_pid $vol)
+}
+
+function generate_nfs_statedump {
+ local vol=$1
+ generate_statedump $(get_nfs_pid $vol)
+}
+
+function generate_brick_statedump {
+ local vol=$1
+ local host=$2
+ local brick=$3
+ generate_statedump $(get_brick_pid $vol $host $brick)
+}
+
+function afr_child_up_status_in_shd {
+ local vol=$1
+ #brick_id is (brick-num in volume info - 1)
+ local brick_id=$2
+ _afr_child_up_status $vol $brick_id generate_shd_statedump
+}
+
+function afr_child_up_status_in_nfs {
+ local vol=$1
+ #brick_id is (brick-num in volume info - 1)
+ local brick_id=$2
+ _afr_child_up_status $vol $brick_id generate_nfs_statedump
+}
+
+function nfs_up_status {
+ gluster volume status | grep "NFS Server" | awk '{print $6}'
+}
+
+function glustershd_up_status {
+ gluster volume status | grep "Self-heal Daemon" | awk '{print $6}'
+}
+
+function get_brick_pid {
+ local vol=$1
+ local host=$2
+ local brick=$3
+ local brick_hiphenated=$(echo $brick | tr '/' '-')
+ echo `cat /var/lib/glusterd/vols/$vol/run/${host}${brick_hiphenated}.pid`
+}
+
+function kill_brick {
+ local vol=$1
+ local host=$2
+ local brick=$3
+ kill -9 $(get_brick_pid $vol $host $brick)
+}
+
+function check_option_help_presence {
+ local option=$1
+ $CLI volume set help | grep "^Option:" | grep -w $option
+}
+
+function afr_get_changelog_xattr {
+ local file=$1
+ local xkey=$2
+ getfattr -n $xkey -e hex $file 2>/dev/null | grep "client-" | cut -f2 -d'='
+}
+
+function afr_get_pending_heal_count {
+ local vol=$1
+ gluster volume heal $vol info | grep "Number of entries" | awk '{ sum+=$4} END {print sum}'
+}
+
+function afr_get_index_path {
+ local brick_path=$1
+ echo "$brick_path/.glusterfs/indices/xattrop"
+}
+
+function afr_get_num_indices_in_brick {
+ local brick_path=$1
+ echo $(ls $(afr_get_index_path $brick_path) | grep -v xattrop | wc -l)
+}
+
+function gf_get_gfid_xattr {
+ file=$1
+ getfattr -n trusted.gfid -e hex $file 2>/dev/null | grep "trusted.gfid" | cut -f2 -d'='
+}
+
+function gf_gfid_xattr_to_str {
+ xval=$1
+ echo "${xval:2:8}-${xval:10:4}-${xval:14:4}-${xval:18:4}-${xval:22:12}"
+}
+
+function gf_check_file_opened_in_brick {
+ vol=$1
+ host=$2
+ brick=$3
+ realpath=$4
+ ls -l /proc/$(get_brick_pid $vol $host $brick)/fd | grep "${realpath}$" 2>&1 > /dev/null
+ if [ $? -eq 0 ]; then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+
+function gf_get_gfid_backend_file_path {
+ brickpath=$1
+ filepath_in_brick=$2
+ gfid=$(gf_get_gfid_xattr "$brickpath/$filepath_in_brick")
+ gfidstr=$(gf_gfid_xattr_to_str $gfid)
+ echo "$brickpath/.glusterfs/${gfidstr:0:2}/${gfidstr:2:2}/$gfidstr"
+}
+
+function gf_rm_file_and_gfid_link {
+ brickpath=$1
+ filepath_in_brick=$2
+ rm -f $(gf_get_gfid_backend_file_path $brickpath $filepath_in_brick)
+ rm -f "$brickpath/$filepath_in_brick"
+}
+
+
+function gd_is_replace_brick_completed {
+ local host=$1
+ local vol=$2
+ local src_brick=$3
+ local dst_brick=$4
+ $CLI volume replace-brick $vol $src_brick $dst_brick status | grep -i "Migration complete"
+ if [ $? -eq 0 ]; then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+
+function dht_get_layout {
+ local my_xa=trusted.glusterfs.dht
+ getfattr -d -e hex -n $my_xa $1 2> /dev/null | grep "$my_xa=" | cut -d= -f2
+}
+
+function afr_get_specific_changelog_xattr ()
+{
+ local path=$1
+ local key=$2
+ local type=$3
+ local specific_changelog=""
+
+ changelog_xattr=$(afr_get_changelog_xattr "$path" "$key")
+ if [ "$type" == "data" ]; then
+ specific_changelog=${changelog_xattr:2:8}
+ elif [ "$type" == "metadata" ]; then
+ specific_changelog=${changelog_xattr:10:8}
+ elif [ "$type" == "entry" ]; then
+ specific_changelog=${changelog_xattr:18:8}
+ else
+ specific_changlog="error"
+ fi
+
+ echo $specific_changelog
+}
+##
+ # query pathinfo xattr and extract POSIX pathname(s)
+ ##
+function get_backend_paths {
+ local path=$1
+
+ getfattr -m . -n trusted.glusterfs.pathinfo $path | tr ' ' '\n' | sed -n 's/<POSIX.*:.*:\(.*\)>.*/\1/p'
+}
+
+function do_volume_operations() {
+ local operation=$1
+ local count=$2
+ local force=$3
+
+ local pids=()
+ local cli
+ local v
+
+ for i in `seq 1 $count`; do
+ cli="CLI_$i"
+ v="V`expr $i - 1`"
+ ${!cli} volume $operation ${!v} $force &
+ pids[$i]=$!
+ done
+
+ for i in `seq 1 $count`; do
+ wait ${pids[$i]}
+ done
+}
+
+function start_volumes() {
+ do_volume_operations start $1
+}
+
+function stop_volumes() {
+ do_volume_operations stop $1
+}
+
+function start_force_volumes() {
+ do_volume_operations start $1 force
+}
+
+function stop_force_volumes() {
+ do_volume_operations stop $1 force
+}
+
+function delete_volumes() {
+ do_volume_operations delete $1
+}
+
+function volume_exists() {
+ local volname=$1
+ $CLI volume info $volname 2>&1 | grep -q 'does not exist'
+ if [ $? -eq 0 ]; then
+ return 1
+ else
+ return 0
+ fi
+}
diff --git a/transport/Makefile.am b/transport/Makefile.am
deleted file mode 100644
index e2f97437c..000000000
--- a/transport/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = socket $(IBVERBS_SUBDIR)
-
-CLEANFILES =
diff --git a/transport/ib-verbs/src/Makefile.am b/transport/ib-verbs/src/Makefile.am
deleted file mode 100644
index 1baf080f2..000000000
--- a/transport/ib-verbs/src/Makefile.am
+++ /dev/null
@@ -1,15 +0,0 @@
-noinst_HEADERS = ib-verbs.h name.h
-
-transport_LTLIBRARIES = ib-verbs.la
-transportdir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/transport
-
-ib_verbs_la_LDFLAGS = -module -avoidversion
-
-ib_verbs_la_SOURCES = ib-verbs.c name.c
-ib_verbs_la_LIBADD = -libverbs $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/transport/ib-verbs \
- -shared -nostartfiles $(GF_CFLAGS)
-
-CLEANFILES = *~
diff --git a/transport/ib-verbs/src/ib-verbs.c b/transport/ib-verbs/src/ib-verbs.c
deleted file mode 100644
index 8938b58e1..000000000
--- a/transport/ib-verbs/src/ib-verbs.c
+++ /dev/null
@@ -1,2406 +0,0 @@
-/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "dict.h"
-#include "glusterfs.h"
-#include "transport.h"
-#include "protocol.h"
-#include "logging.h"
-#include "xlator.h"
-#include "name.h"
-#include "ib-verbs.h"
-#include <signal.h>
-
-int32_t
-gf_resolve_ip6 (const char *hostname,
- uint16_t port,
- int family,
- void **dnscache,
- struct addrinfo **addr_info);
-
-static uint16_t
-ib_verbs_get_local_lid (struct ibv_context *context,
- int32_t port)
-{
- struct ibv_port_attr attr;
-
- if (ibv_query_port (context, port, &attr))
- return 0;
-
- return attr.lid;
-}
-
-
-static void
-ib_verbs_put_post (ib_verbs_queue_t *queue,
- ib_verbs_post_t *post)
-{
- pthread_mutex_lock (&queue->lock);
- if (post->prev) {
- queue->active_count--;
- post->prev->next = post->next;
- }
- if (post->next)
- post->next->prev = post->prev;
- post->prev = &queue->passive_posts;
- post->next = post->prev->next;
- post->prev->next = post;
- post->next->prev = post;
- queue->passive_count++;
- pthread_mutex_unlock (&queue->lock);
-}
-
-
-static ib_verbs_post_t *
-ib_verbs_new_post (ib_verbs_device_t *device, int32_t len)
-{
- ib_verbs_post_t *post;
-
- post = (ib_verbs_post_t *) CALLOC (1, sizeof (*post));
- if (!post)
- return NULL;
-
- post->buf_size = len;
-
- post->buf = valloc (len);
- if (!post->buf) {
- free (post);
- return NULL;
- }
-
- post->mr = ibv_reg_mr (device->pd,
- post->buf,
- post->buf_size,
- IBV_ACCESS_LOCAL_WRITE);
- if (!post->mr) {
- free (post->buf);
- free (post);
- return NULL;
- }
-
- return post;
-}
-
-
-static ib_verbs_post_t *
-ib_verbs_get_post (ib_verbs_queue_t *queue)
-{
- ib_verbs_post_t *post;
-
- pthread_mutex_lock (&queue->lock);
- {
- post = queue->passive_posts.next;
- if (post == &queue->passive_posts)
- post = NULL;
-
- if (post) {
- if (post->prev)
- post->prev->next = post->next;
- if (post->next)
- post->next->prev = post->prev;
- post->prev = &queue->active_posts;
- post->next = post->prev->next;
- post->prev->next = post;
- post->next->prev = post;
- post->reused++;
- queue->active_count++;
- }
- }
- pthread_mutex_unlock (&queue->lock);
-
- return post;
-}
-
-void
-ib_verbs_destroy_post (ib_verbs_post_t *post)
-{
- ibv_dereg_mr (post->mr);
- free (post->buf);
- free (post);
-}
-
-
-static int32_t
-__ib_verbs_quota_get (ib_verbs_peer_t *peer)
-{
- int32_t ret = -1;
- ib_verbs_private_t *priv = peer->trans->private;
-
- if (priv->connected && peer->quota > 0) {
- ret = peer->quota--;
- }
-
- return ret;
-}
-
-/*
- static int32_t
- ib_verbs_quota_get (ib_verbs_peer_t *peer)
- {
- int32_t ret = -1;
- ib_verbs_private_t *priv = peer->trans->private;
-
- pthread_mutex_lock (&priv->write_mutex);
- {
- ret = __ib_verbs_quota_get (peer);
- }
- pthread_mutex_unlock (&priv->write_mutex);
-
- return ret;
- }
-*/
-
-static void
-__ib_verbs_ioq_entry_free (ib_verbs_ioq_t *entry)
-{
- list_del_init (&entry->list);
- if (entry->iobref)
- iobref_unref (entry->iobref);
-
- /* TODO: use mem-pool */
- free (entry->buf);
-
- /* TODO: use mem-pool */
- free (entry);
-}
-
-
-static void
-__ib_verbs_ioq_flush (ib_verbs_peer_t *peer)
-{
- ib_verbs_ioq_t *entry = NULL, *dummy = NULL;
-
- list_for_each_entry_safe (entry, dummy, &peer->ioq, list) {
- __ib_verbs_ioq_entry_free (entry);
- }
-}
-
-
-static int32_t
-__ib_verbs_disconnect (transport_t *this)
-{
- ib_verbs_private_t *priv = this->private;
- int32_t ret = 0;
-
- if (priv->connected || priv->tcp_connected) {
- fcntl (priv->sock, F_SETFL, O_NONBLOCK);
- if (shutdown (priv->sock, SHUT_RDWR) != 0) {
- gf_log ("transport/ib-verbs",
- GF_LOG_DEBUG,
- "shutdown () - error: %s",
- strerror (errno));
- ret = -errno;
- priv->tcp_connected = 0;
- }
- }
-
- return ret;
-}
-
-
-static int32_t
-ib_verbs_post_send (struct ibv_qp *qp,
- ib_verbs_post_t *post,
- int32_t len)
-{
- struct ibv_sge list = {
- .addr = (unsigned long) post->buf,
- .length = len,
- .lkey = post->mr->lkey
- };
-
- struct ibv_send_wr wr = {
- .wr_id = (unsigned long) post,
- .sg_list = &list,
- .num_sge = 1,
- .opcode = IBV_WR_SEND,
- .send_flags = IBV_SEND_SIGNALED,
- }, *bad_wr;
-
- if (!qp)
- return -1;
-
- return ibv_post_send (qp, &wr, &bad_wr);
-}
-
-
-static int32_t
-__ib_verbs_ioq_churn_entry (ib_verbs_peer_t *peer, ib_verbs_ioq_t *entry)
-{
- int32_t ret = 0, quota = 0;
- ib_verbs_private_t *priv = peer->trans->private;
- ib_verbs_device_t *device = priv->device;
- ib_verbs_options_t *options = &priv->options;
- ib_verbs_post_t *post = NULL;
- int32_t len = 0;
-
- quota = __ib_verbs_quota_get (peer);
- if (quota > 0) {
- post = ib_verbs_get_post (&device->sendq);
- if (!post)
- post = ib_verbs_new_post (device,
- (options->send_size + 2048));
-
- len = iov_length ((const struct iovec *)&entry->vector,
- entry->count);
- if (len >= (options->send_size + 2048)) {
- gf_log ("transport/ib-verbs", GF_LOG_ERROR,
- "increase value of option 'transport.ib-verbs."
- "work-request-send-size' (given=> %"PRId64") "
- "to send bigger (%d) messages",
- (options->send_size + 2048), len);
- return -1;
- }
-
- iov_unload (post->buf,
- (const struct iovec *)&entry->vector,
- entry->count);
-
- ret = ib_verbs_post_send (peer->qp, post, len);
- if (!ret) {
- __ib_verbs_ioq_entry_free (entry);
- ret = len;
- } else {
- gf_log ("transport/ib-verbs", GF_LOG_DEBUG,
- "ibv_post_send failed with ret = %d", ret);
- ib_verbs_put_post (&device->sendq, post);
- __ib_verbs_disconnect (peer->trans);
- ret = -1;
- }
- }
-
- return ret;
-}
-
-
-static int32_t
-__ib_verbs_ioq_churn (ib_verbs_peer_t *peer)
-{
- ib_verbs_ioq_t *entry = NULL;
- int32_t ret = 0;
-
- while (!list_empty (&peer->ioq))
- {
- /* pick next entry */
- entry = peer->ioq_next;
-
- ret = __ib_verbs_ioq_churn_entry (peer, entry);
-
- if (ret <= 0)
- break;
- }
-
- /*
- list_for_each_entry_safe (entry, dummy, &peer->ioq, list) {
- ret = __ib_verbs_ioq_churn_entry (peer, entry);
- if (ret <= 0) {
- break;
- }
- }
- */
-
- return ret;
-}
-
-static int32_t
-__ib_verbs_quota_put (ib_verbs_peer_t *peer)
-{
- int32_t ret;
-
- peer->quota++;
- ret = peer->quota;
-
- if (!list_empty (&peer->ioq)) {
- ret = __ib_verbs_ioq_churn (peer);
- }
-
- return ret;
-}
-
-
-static int32_t
-ib_verbs_quota_put (ib_verbs_peer_t *peer)
-{
- int32_t ret;
- ib_verbs_private_t *priv = peer->trans->private;
-
- pthread_mutex_lock (&priv->write_mutex);
- {
- ret = __ib_verbs_quota_put (peer);
- }
- pthread_mutex_unlock (&priv->write_mutex);
-
- return ret;
-}
-
-
-static int32_t
-ib_verbs_post_recv (struct ibv_srq *srq,
- ib_verbs_post_t *post)
-{
- struct ibv_sge list = {
- .addr = (unsigned long) post->buf,
- .length = post->buf_size,
- .lkey = post->mr->lkey
- };
-
- struct ibv_recv_wr wr = {
- .wr_id = (unsigned long) post,
- .sg_list = &list,
- .num_sge = 1,
- }, *bad_wr;
-
- return ibv_post_srq_recv (srq, &wr, &bad_wr);
-}
-
-
-static int32_t
-ib_verbs_writev (transport_t *this,
- ib_verbs_ioq_t *entry)
-{
- int32_t ret = 0, need_append = 1;
- ib_verbs_private_t *priv = this->private;
- ib_verbs_peer_t *peer = NULL;
-
- pthread_mutex_lock (&priv->write_mutex);
- {
- if (!priv->connected) {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "ib-verbs is not connected to post a "
- "send request");
- ret = -1;
- goto unlock;
- }
-
- peer = &priv->peer;
- if (list_empty (&peer->ioq)) {
- ret = __ib_verbs_ioq_churn_entry (peer, entry);
- if (ret != 0) {
- need_append = 0;
- }
- }
-
- if (need_append) {
- list_add_tail (&entry->list, &peer->ioq);
- }
- }
-unlock:
- pthread_mutex_unlock (&priv->write_mutex);
- return ret;
-}
-
-
-static ib_verbs_ioq_t *
-ib_verbs_ioq_new (char *buf, int len, struct iovec *vector,
- int count, struct iobref *iobref)
-{
- ib_verbs_ioq_t *entry = NULL;
-
- /* TODO: use mem-pool */
- entry = CALLOC (1, sizeof (*entry));
-
- assert (count <= (MAX_IOVEC-2));
-
- entry->header.colonO[0] = ':';
- entry->header.colonO[1] = 'O';
- entry->header.colonO[2] = '\0';
- entry->header.version = 42;
- entry->header.size1 = hton32 (len);
- entry->header.size2 = hton32 (iov_length (vector, count));
-
- entry->vector[0].iov_base = &entry->header;
- entry->vector[0].iov_len = sizeof (entry->header);
- entry->count++;
-
- entry->vector[1].iov_base = buf;
- entry->vector[1].iov_len = len;
- entry->count++;
-
- if (vector && count)
- {
- memcpy (&entry->vector[2], vector, sizeof (*vector) * count);
- entry->count += count;
- }
-
- if (iobref)
- entry->iobref = iobref_ref (iobref);
-
- entry->buf = buf;
-
- INIT_LIST_HEAD (&entry->list);
-
- return entry;
-}
-
-
-static int32_t
-ib_verbs_submit (transport_t *this, char *buf, int32_t len,
- struct iovec *vector, int count, struct iobref *iobref)
-{
- int32_t ret = 0;
- ib_verbs_ioq_t *entry = NULL;
-
- entry = ib_verbs_ioq_new (buf, len, vector, count, iobref);
- ret = ib_verbs_writev (this, entry);
-
- if (ret > 0) {
- ret = 0;
- }
-
- return ret;
-}
-
-static int
-ib_verbs_receive (transport_t *this, char **hdr_p, size_t *hdrlen_p,
- struct iobuf **iobuf_p)
-{
- ib_verbs_private_t *priv = this->private;
- /* TODO: return error if !priv->connected, check with locks */
- /* TODO: boundry checks for data_ptr/offset */
- char *copy_from = NULL;
- ib_verbs_header_t *header = NULL;
- uint32_t size1, size2, data_len = 0;
- char *hdr = NULL;
- struct iobuf *iobuf = NULL;
- int32_t ret = 0;
-
- pthread_mutex_lock (&priv->recv_mutex);
- {
-/*
- while (!priv->data_ptr)
- pthread_cond_wait (&priv->recv_cond, &priv->recv_mutex);
-*/
-
- copy_from = priv->data_ptr + priv->data_offset;
-
- priv->data_ptr = NULL;
- data_len = priv->data_len;
- /* pthread_cond_broadcast (&priv->recv_cond); */
- }
- pthread_mutex_unlock (&priv->recv_mutex);
-
- header = (ib_verbs_header_t *)copy_from;
- if (strcmp (header->colonO, ":O")) {
- gf_log ("transport/ib-verbs", GF_LOG_DEBUG,
- "%s: corrupt header received", this->xl->name);
- ret = -1;
- goto err;
- }
-
- size1 = ntoh32 (header->size1);
- size2 = ntoh32 (header->size2);
-
- if (data_len != (size1 + size2 + sizeof (*header))) {
- gf_log ("transport/ib-verbs", GF_LOG_DEBUG,
- "%s: sizeof data read from transport is not equal "
- "to the size specified in the header",
- this->xl->name);
- ret = -1;
- goto err;
- }
-
- copy_from += sizeof (*header);
-
- if (size1) {
- hdr = CALLOC (1, size1);
- if (!hdr) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "unable to allocate header for peer %s",
- this->peerinfo.identifier);
- ret = -ENOMEM;
- goto err;
- }
- memcpy (hdr, copy_from, size1);
- copy_from += size1;
- *hdr_p = hdr;
- }
- *hdrlen_p = size1;
-
- if (size2) {
- iobuf = iobuf_get (this->xl->ctx->iobuf_pool);
- if (!iobuf) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "unable to allocate IO buffer for peer %s",
- this->peerinfo.identifier);
- ret = -ENOMEM;
- goto err;
- }
- memcpy (iobuf->ptr, copy_from, size2);
- *iobuf_p = iobuf;
- }
-
-err:
- return ret;
-}
-
-
-static void
-ib_verbs_destroy_cq (transport_t *this)
-{
- ib_verbs_private_t *priv = this->private;
- ib_verbs_device_t *device = priv->device;
-
- if (device->recv_cq)
- ibv_destroy_cq (device->recv_cq);
- device->recv_cq = NULL;
-
- if (device->send_cq)
- ibv_destroy_cq (device->send_cq);
- device->send_cq = NULL;
-
- return;
-}
-
-
-static int32_t
-ib_verbs_create_cq (transport_t *this)
-{
- ib_verbs_private_t *priv = this->private;
- ib_verbs_options_t *options = &priv->options;
- ib_verbs_device_t *device = priv->device;
- int32_t ret = 0;
-
- device->recv_cq = ibv_create_cq (priv->device->context,
- options->recv_count * 2,
- device,
- device->recv_chan,
- 0);
- if (!device->recv_cq) {
- gf_log ("transport/ib-verbs",
- GF_LOG_ERROR,
- "%s: creation of CQ failed",
- this->xl->name);
- ret = -1;
- } else if (ibv_req_notify_cq (device->recv_cq, 0)) {
- gf_log ("transport/ib-verbs",
- GF_LOG_ERROR,
- "%s: ibv_req_notify_cq on CQ failed",
- this->xl->name);
- ret = -1;
- }
-
- do {
- /* TODO: make send_cq size dynamically adaptive */
- device->send_cq = ibv_create_cq (priv->device->context,
- options->send_count * 1024,
- device,
- device->send_chan,
- 0);
- if (!device->send_cq) {
- gf_log ("transport/ib-verbs",
- GF_LOG_ERROR,
- "%s: creation of send_cq failed",
- this->xl->name);
- ret = -1;
- break;
- }
-
- if (ibv_req_notify_cq (device->send_cq, 0)) {
- gf_log ("transport/ib-verbs",
- GF_LOG_ERROR,
- "%s: ibv_req_notify_cq on send_cq failed",
- this->xl->name);
- ret = -1;
- break;
- }
- } while (0);
-
- if (ret != 0)
- ib_verbs_destroy_cq (this);
-
- return ret;
-}
-
-
-static void
-ib_verbs_register_peer (ib_verbs_device_t *device,
- int32_t qp_num,
- ib_verbs_peer_t *peer)
-{
- struct _qpent *ent;
- ib_verbs_qpreg_t *qpreg = &device->qpreg;
- int32_t hash = qp_num % 42;
-
- pthread_mutex_lock (&qpreg->lock);
- ent = qpreg->ents[hash].next;
- while ((ent != &qpreg->ents[hash]) && (ent->qp_num != qp_num))
- ent = ent->next;
- if (ent->qp_num == qp_num) {
- pthread_mutex_unlock (&qpreg->lock);
- return;
- }
- ent = (struct _qpent *) CALLOC (1, sizeof (*ent));
- ERR_ABORT (ent);
- /* TODO: ref reg->peer */
- ent->peer = peer;
- ent->next = &qpreg->ents[hash];
- ent->prev = ent->next->prev;
- ent->next->prev = ent;
- ent->prev->next = ent;
- ent->qp_num = qp_num;
- qpreg->count++;
- pthread_mutex_unlock (&qpreg->lock);
-}
-
-
-static void
-ib_verbs_unregister_peer (ib_verbs_device_t *device,
- int32_t qp_num)
-{
- struct _qpent *ent;
- ib_verbs_qpreg_t *qpreg = &device->qpreg;
- int32_t hash = qp_num % 42;
-
- pthread_mutex_lock (&qpreg->lock);
- ent = qpreg->ents[hash].next;
- while ((ent != &qpreg->ents[hash]) && (ent->qp_num != qp_num))
- ent = ent->next;
- if (ent->qp_num != qp_num) {
- pthread_mutex_unlock (&qpreg->lock);
- return;
- }
- ent->prev->next = ent->next;
- ent->next->prev = ent->prev;
- /* TODO: unref reg->peer */
- free (ent);
- qpreg->count--;
- pthread_mutex_unlock (&qpreg->lock);
-}
-
-
-static ib_verbs_peer_t *
-ib_verbs_lookup_peer (ib_verbs_device_t *device,
- int32_t qp_num)
-{
- struct _qpent *ent;
- ib_verbs_qpreg_t *qpreg = &device->qpreg;
- ib_verbs_peer_t *peer;
- int32_t hash = qp_num % 42;
-
- pthread_mutex_lock (&qpreg->lock);
- ent = qpreg->ents[hash].next;
- while ((ent != &qpreg->ents[hash]) && (ent->qp_num != qp_num))
- ent = ent->next;
- peer = ent->peer;
- pthread_mutex_unlock (&qpreg->lock);
- return peer;
-}
-
-
-static void
-__ib_verbs_destroy_qp (transport_t *this)
-{
- ib_verbs_private_t *priv = this->private;
-
- if (priv->peer.qp) {
- ib_verbs_unregister_peer (priv->device, priv->peer.qp->qp_num);
- ibv_destroy_qp (priv->peer.qp);
- }
- priv->peer.qp = NULL;
-
- return;
-}
-
-
-static int32_t
-ib_verbs_create_qp (transport_t *this)
-{
- ib_verbs_private_t *priv = this->private;
- ib_verbs_options_t *options = &priv->options;
- ib_verbs_device_t *device = priv->device;
- int32_t ret = 0;
- ib_verbs_peer_t *peer;
-
- peer = &priv->peer;
- struct ibv_qp_init_attr init_attr = {
- .send_cq = device->send_cq,
- .recv_cq = device->recv_cq,
- .srq = device->srq,
- .cap = {
- .max_send_wr = peer->send_count,
- .max_recv_wr = peer->recv_count,
- .max_send_sge = 1,
- .max_recv_sge = 1
- },
- .qp_type = IBV_QPT_RC
- };
-
- struct ibv_qp_attr attr = {
- .qp_state = IBV_QPS_INIT,
- .pkey_index = 0,
- .port_num = options->port,
- .qp_access_flags = 0
- };
-
- peer->qp = ibv_create_qp (device->pd, &init_attr);
- if (!peer->qp) {
- gf_log ("transport/ib-verbs",
- GF_LOG_CRITICAL,
- "%s: could not create QP",
- this->xl->name);
- ret = -1;
- goto out;
- } else if (ibv_modify_qp (peer->qp, &attr,
- IBV_QP_STATE |
- IBV_QP_PKEY_INDEX |
- IBV_QP_PORT |
- IBV_QP_ACCESS_FLAGS)) {
- gf_log ("transport/ib-verbs",
- GF_LOG_ERROR,
- "%s: failed to modify QP to INIT state",
- this->xl->name);
- ret = -1;
- goto out;
- }
-
- peer->local_lid = ib_verbs_get_local_lid (device->context,
- options->port);
- peer->local_qpn = peer->qp->qp_num;
- peer->local_psn = lrand48 () & 0xffffff;
-
- ib_verbs_register_peer (device, peer->qp->qp_num, peer);
-
-out:
- if (ret == -1)
- __ib_verbs_destroy_qp (this);
-
- return ret;
-}
-
-
-static void
-ib_verbs_destroy_posts (transport_t *this)
-{
-
-}
-
-
-static int32_t
-__ib_verbs_create_posts (transport_t *this,
- int32_t count,
- int32_t size,
- ib_verbs_queue_t *q)
-{
- int32_t i;
- int32_t ret = 0;
- ib_verbs_private_t *priv = this->private;
- ib_verbs_device_t *device = priv->device;
-
- for (i=0 ; i<count ; i++) {
- ib_verbs_post_t *post;
-
- post = ib_verbs_new_post (device, size + 2048);
- if (!post) {
- gf_log ("transport/ib-verbs",
- GF_LOG_ERROR,
- "%s: post creation failed",
- this->xl->name);
- ret = -1;
- break;
- }
-
- ib_verbs_put_post (q, post);
- }
- return ret;
-}
-
-
-static int32_t
-ib_verbs_create_posts (transport_t *this)
-{
- int32_t i, ret;
- ib_verbs_post_t *post = NULL;
- ib_verbs_private_t *priv = this->private;
- ib_verbs_options_t *options = &priv->options;
- ib_verbs_device_t *device = priv->device;
-
- ret = __ib_verbs_create_posts (this, options->send_count,
- options->send_size,
- &device->sendq);
- if (!ret)
- ret = __ib_verbs_create_posts (this, options->recv_count,
- options->recv_size,
- &device->recvq);
-
- if (!ret) {
- for (i=0 ; i<options->recv_count ; i++) {
- post = ib_verbs_get_post (&device->recvq);
- if (ib_verbs_post_recv (device->srq, post) != 0) {
- ret = -1;
- break;
- }
- }
- }
-
- if (ret)
- ib_verbs_destroy_posts (this);
-
- return ret;
-}
-
-
-static int32_t
-ib_verbs_connect_qp (transport_t *this)
-{
- ib_verbs_private_t *priv = this->private;
- ib_verbs_options_t *options = &priv->options;
- struct ibv_qp_attr attr = {
- .qp_state = IBV_QPS_RTR,
- .path_mtu = options->mtu,
- .dest_qp_num = priv->peer.remote_qpn,
- .rq_psn = priv->peer.remote_psn,
- .max_dest_rd_atomic = 1,
- .min_rnr_timer = 12,
- .ah_attr = {
- .is_global = 0,
- .dlid = priv->peer.remote_lid,
- .sl = 0,
- .src_path_bits = 0,
- .port_num = options->port
- }
- };
- if (ibv_modify_qp (priv->peer.qp, &attr,
- IBV_QP_STATE |
- IBV_QP_AV |
- IBV_QP_PATH_MTU |
- IBV_QP_DEST_QPN |
- IBV_QP_RQ_PSN |
- IBV_QP_MAX_DEST_RD_ATOMIC |
- IBV_QP_MIN_RNR_TIMER)) {
- gf_log ("transport/ib-verbs",
- GF_LOG_CRITICAL,
- "Failed to modify QP to RTR\n");
- return -1;
- }
-
- /* TODO: make timeout and retry_cnt configurable from options */
- attr.qp_state = IBV_QPS_RTS;
- attr.timeout = 14;
- attr.retry_cnt = 7;
- attr.rnr_retry = 7;
- attr.sq_psn = priv->peer.local_psn;
- attr.max_rd_atomic = 1;
- if (ibv_modify_qp (priv->peer.qp, &attr,
- IBV_QP_STATE |
- IBV_QP_TIMEOUT |
- IBV_QP_RETRY_CNT |
- IBV_QP_RNR_RETRY |
- IBV_QP_SQ_PSN |
- IBV_QP_MAX_QP_RD_ATOMIC)) {
- gf_log ("transport/ib-verbs",
- GF_LOG_CRITICAL,
- "Failed to modify QP to RTS\n");
- return -1;
- }
-
- return 0;
-}
-
-static int32_t
-__ib_verbs_teardown (transport_t *this)
-{
- ib_verbs_private_t *priv = this->private;
-
- __ib_verbs_destroy_qp (this);
-
- if (!list_empty (&priv->peer.ioq)) {
- __ib_verbs_ioq_flush (&priv->peer);
- }
-
- /* TODO: decrement cq size */
- return 0;
-}
-
-/*
- * return value:
- * 0 = success (completed)
- * -1 = error
- * > 0 = incomplete
- */
-
-static int
-__tcp_rwv (transport_t *this, struct iovec *vector, int count,
- struct iovec **pending_vector, int *pending_count,
- int write)
-{
- ib_verbs_private_t *priv = NULL;
- int sock = -1;
- int ret = -1;
- struct iovec *opvector = vector;
- int opcount = count;
- int moved = 0;
-
- priv = this->private;
- sock = priv->sock;
-
- while (opcount)
- {
- if (write)
- {
- ret = writev (sock, opvector, opcount);
-
- if (ret == 0 || (ret == -1 && errno == EAGAIN))
- {
- /* done for now */
- break;
- }
- }
- else
- {
- ret = readv (sock, opvector, opcount);
-
- if (ret == -1 && errno == EAGAIN)
- {
- /* done for now */
- break;
- }
- }
-
- if (ret == 0)
- {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "EOF from peer %s", this->peerinfo.identifier);
- opcount = -1;
- errno = ENOTCONN;
- break;
- }
-
- if (ret == -1)
- {
- if (errno == EINTR)
- continue;
-
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "%s failed (%s)", write ? "writev" : "readv",
- strerror (errno));
- if (write && !priv->connected &&
- (errno == ECONNREFUSED))
- gf_log (this->xl->name, GF_LOG_ERROR,
- "possible mismatch of 'transport-type'"
- " in protocol server and client. "
- "check volume file");
- opcount = -1;
- break;
- }
-
- moved = 0;
-
- while (moved < ret)
- {
- if ((ret - moved) >= opvector[0].iov_len)
- {
- moved += opvector[0].iov_len;
- opvector++;
- opcount--;
- }
- else
- {
- opvector[0].iov_len -= (ret - moved);
- opvector[0].iov_base += (ret - moved);
- moved += (ret - moved);
- }
- while (opcount && !opvector[0].iov_len)
- {
- opvector++;
- opcount--;
- }
- }
- }
-
- if (pending_vector)
- *pending_vector = opvector;
-
- if (pending_count)
- *pending_count = opcount;
-
- return opcount;
-}
-
-
-static int
-__tcp_readv (transport_t *this, struct iovec *vector, int count,
- struct iovec **pending_vector, int *pending_count)
-{
- int ret = -1;
-
- ret = __tcp_rwv (this, vector, count,
- pending_vector, pending_count, 0);
-
- return ret;
-}
-
-
-static int
-__tcp_writev (transport_t *this, struct iovec *vector, int count,
- struct iovec **pending_vector, int *pending_count)
-{
- int ret = -1;
- ib_verbs_private_t *priv = this->private;
-
- ret = __tcp_rwv (this, vector, count, pending_vector,
- pending_count, 1);
-
- if (ret > 0) {
- /* TODO: Avoid multiple calls when socket is already
- registered for POLLOUT */
- priv->idx = event_select_on (this->xl->ctx->event_pool,
- priv->sock, priv->idx, -1, 1);
- } else if (ret == 0) {
- priv->idx = event_select_on (this->xl->ctx->event_pool,
- priv->sock,
- priv->idx, -1, 0);
- }
-
- return ret;
-}
-
-
-static void *
-ib_verbs_recv_completion_proc (void *data)
-{
- struct ibv_comp_channel *chan = data;
- ib_verbs_private_t *priv = NULL;
- ib_verbs_device_t *device;
- ib_verbs_post_t *post;
- ib_verbs_peer_t *peer;
- struct ibv_cq *event_cq;
- struct ibv_wc wc;
- void *event_ctx;
- int32_t ret = 0;
-
-
- while (1) {
- ret = ibv_get_cq_event (chan, &event_cq, &event_ctx);
- if (ret) {
- gf_log ("transport/ib-verbs", GF_LOG_ERROR,
- "ibv_get_cq_event failed, terminating recv "
- "thread %d (%d)", ret, errno);
- continue;
- }
-
- device = event_ctx;
-
- ret = ibv_req_notify_cq (event_cq, 0);
- if (ret) {
- gf_log ("transport/ib-verbs", GF_LOG_ERROR,
- "ibv_req_notify_cq on %s failed, terminating "
- "recv thread: %d (%d)",
- device->device_name, ret, errno);
- continue;
- }
-
- device = (ib_verbs_device_t *) event_ctx;
-
- while ((ret = ibv_poll_cq (event_cq, 1, &wc)) > 0) {
- post = (ib_verbs_post_t *) (long) wc.wr_id;
- peer = ib_verbs_lookup_peer (device, wc.qp_num);
-
- if (wc.status != IBV_WC_SUCCESS) {
- gf_log ("transport/ib-verbs", GF_LOG_ERROR,
- "recv work request on `%s' returned "
- "error (%d)",
- device->device_name,
- wc.status);
- if (peer)
- transport_disconnect (peer->trans);
-
- if (post) {
- ib_verbs_post_recv (device->srq, post);
- }
- continue;
- }
-
- if (peer) {
- priv = peer->trans->private;
-
- pthread_mutex_lock (&priv->recv_mutex);
- {
-/* while (priv->data_ptr)
- pthread_cond_wait (&priv->recv_cond, &priv->recv_mutex);
-*/
-
- priv->data_ptr = post->buf;
- priv->data_offset = 0;
- priv->data_len = wc.byte_len;
-
- /*pthread_cond_broadcast (&priv->recv_cond);*/
- }
- pthread_mutex_unlock (&priv->recv_mutex);
-
- if ((ret = xlator_notify (peer->trans->xl, GF_EVENT_POLLIN,
- peer->trans, NULL)) == -1) {
- gf_log ("transport/ib-verbs",
- GF_LOG_DEBUG,
- "pollin notification to %s "
- "failed, disconnecting "
- "transport",
- peer->trans->xl->name);
- transport_disconnect (peer->trans);
- }
- } else {
- gf_log ("transport/ib-verbs",
- GF_LOG_DEBUG,
- "could not lookup peer for qp_num: %d",
- wc.qp_num);
- }
- ib_verbs_post_recv (device->srq, post);
- }
-
- if (ret < 0) {
- gf_log ("transport/ib-verbs",
- GF_LOG_ERROR,
- "ibv_poll_cq on `%s' returned error "
- "(ret = %d, errno = %d)",
- device->device_name, ret, errno);
- continue;
- }
- ibv_ack_cq_events (event_cq, 1);
- }
- return NULL;
-}
-
-
-static void *
-ib_verbs_send_completion_proc (void *data)
-{
- struct ibv_comp_channel *chan = data;
- ib_verbs_post_t *post;
- ib_verbs_peer_t *peer;
- struct ibv_cq *event_cq;
- void *event_ctx;
- ib_verbs_device_t *device;
- struct ibv_wc wc;
- int32_t ret;
-
- while (1) {
- ret = ibv_get_cq_event (chan, &event_cq, &event_ctx);
- if (ret) {
- gf_log ("transport/ib-verbs", GF_LOG_ERROR,
- "ibv_get_cq_event on failed, terminating "
- "send thread: %d (%d)", ret, errno);
- continue;
- }
-
- device = event_ctx;
-
- ret = ibv_req_notify_cq (event_cq, 0);
- if (ret) {
- gf_log ("transport/ib-verbs", GF_LOG_ERROR,
- "ibv_req_notify_cq on %s failed, terminating "
- "send thread: %d (%d)",
- device->device_name, ret, errno);
- continue;
- }
-
- while ((ret = ibv_poll_cq (event_cq, 1, &wc)) > 0) {
- post = (ib_verbs_post_t *) (long) wc.wr_id;
- peer = ib_verbs_lookup_peer (device, wc.qp_num);
-
- if (wc.status != IBV_WC_SUCCESS) {
- gf_log ("transport/ib-verbs", GF_LOG_ERROR,
- "send work request on `%s' returned "
- "error wc.status = %d, wc.vendor_err "
- "= %d, post->buf = %p, wc.byte_len = "
- "%d, post->reused = %d",
- device->device_name, wc.status,
- wc.vendor_err,
- post->buf, wc.byte_len, post->reused);
- if (wc.status == IBV_WC_RETRY_EXC_ERR)
- gf_log ("ib-verbs", GF_LOG_ERROR,
- "connection between client and"
- " server not working. check by"
- " running 'ibv_srq_pingpong'. "
- "also make sure subnet manager"
- " is running (eg: 'opensm'), "
- "or check if ib-verbs port is "
- "valid (or active) by running "
- " 'ibv_devinfo'. contact "
- "Gluster Support Team if "
- "the problem persists.");
- if (peer)
- transport_disconnect (peer->trans);
- }
-
- if (post) {
- ib_verbs_put_post (&device->sendq, post);
- }
-
- if (peer) {
- int quota_ret = ib_verbs_quota_put (peer);
- if (quota_ret < 0) {
- gf_log ("ib-verbs", GF_LOG_DEBUG,
- "failed to send message");
-
- }
- } else {
- gf_log ("transport/ib-verbs", GF_LOG_DEBUG,
- "could not lookup peer for qp_num: %d",
- wc.qp_num);
- }
- }
-
- if (ret < 0) {
- gf_log ("transport/ib-verbs", GF_LOG_ERROR,
- "ibv_poll_cq on `%s' returned error (ret = %d,"
- " errno = %d)",
- device->device_name, ret, errno);
- continue;
- }
- ibv_ack_cq_events (event_cq, 1);
- }
-
- return NULL;
-}
-
-static void
-ib_verbs_options_init (transport_t *this)
-{
- ib_verbs_private_t *priv = this->private;
- ib_verbs_options_t *options = &priv->options;
- int32_t mtu;
- data_t *temp;
-
- /* TODO: validate arguments from options below */
-
- options->send_size = this->xl->ctx->page_size * 4; /* 512 KB */
- options->recv_size = this->xl->ctx->page_size * 4; /* 512 KB */
- options->send_count = 32;
- options->recv_count = 32;
-
- temp = dict_get (this->xl->options,
- "transport.ib-verbs.work-request-send-count");
- if (temp)
- options->send_count = data_to_int32 (temp);
-
- temp = dict_get (this->xl->options,
- "transport.ib-verbs.work-request-recv-count");
- if (temp)
- options->recv_count = data_to_int32 (temp);
-
- options->port = 1;
- temp = dict_get (this->xl->options,
- "transport.ib-verbs.port");
- if (temp)
- options->port = data_to_uint64 (temp);
-
- options->mtu = mtu = IBV_MTU_2048;
- temp = dict_get (this->xl->options,
- "transport.ib-verbs.mtu");
- if (temp)
- mtu = data_to_int32 (temp);
- switch (mtu) {
- case 256: options->mtu = IBV_MTU_256;
- break;
- case 512: options->mtu = IBV_MTU_512;
- break;
- case 1024: options->mtu = IBV_MTU_1024;
- break;
- case 2048: options->mtu = IBV_MTU_2048;
- break;
- case 4096: options->mtu = IBV_MTU_4096;
- break;
- default:
- if (temp)
- gf_log ("transport/ib-verbs", GF_LOG_WARNING,
- "%s: unrecognized MTU value '%s', defaulting "
- "to '2048'", this->xl->name,
- data_to_str (temp));
- else
- gf_log ("transport/ib-verbs", GF_LOG_TRACE,
- "%s: defaulting MTU to '2048'",
- this->xl->name);
- options->mtu = IBV_MTU_2048;
- break;
- }
-
- temp = dict_get (this->xl->options,
- "transport.ib-verbs.device-name");
- if (temp)
- options->device_name = strdup (temp->data);
-
- return;
-}
-
-static void
-ib_verbs_queue_init (ib_verbs_queue_t *queue)
-{
- pthread_mutex_init (&queue->lock, NULL);
-
- queue->active_posts.next = &queue->active_posts;
- queue->active_posts.prev = &queue->active_posts;
- queue->passive_posts.next = &queue->passive_posts;
- queue->passive_posts.prev = &queue->passive_posts;
-}
-
-
-static ib_verbs_device_t *
-ib_verbs_get_device (transport_t *this,
- struct ibv_device *ib_dev,
- int32_t port)
-{
- glusterfs_ctx_t *ctx = this->xl->ctx;
- ib_verbs_private_t *priv = this->private;
- ib_verbs_options_t *options = &priv->options;
- char *device_name = priv->options.device_name;
- int32_t ret = 0, i = 0;
-
- ib_verbs_device_t *trav;
-
- trav = ctx->ib;
- while (trav) {
- if ((!strcmp (trav->device_name, device_name)) &&
- (trav->port == port))
- break;
- trav = trav->next;
- }
-
- if (!trav) {
- struct ibv_context *ibctx = ibv_open_device (ib_dev);
-
- if (!ibctx) {
- gf_log ("transport/ib-verbs", GF_LOG_ERROR,
- "cannot open device `%s'",
- device_name);
- return NULL;
- }
-
- trav = CALLOC (1, sizeof (*trav));
- ERR_ABORT (trav);
- priv->device = trav;
-
- trav->context = ibctx;
- trav->device_name = strdup (device_name);
- trav->port = port;
-
- trav->next = ctx->ib;
- ctx->ib = trav;
-
- trav->send_chan = ibv_create_comp_channel (trav->context);
- if (!trav->send_chan) {
- gf_log ("transport/ib-verbs", GF_LOG_ERROR,
- "%s: could not create send completion channel",
- device_name);
- /* TODO: cleanup current mess */
- return NULL;
- }
-
- trav->recv_chan = ibv_create_comp_channel (trav->context);
- if (!trav->recv_chan) {
- gf_log ("transport/ib-verbs", GF_LOG_ERROR,
- "could not create recv completion channel");
- /* TODO: cleanup current mess */
- return NULL;
- }
-
- if (ib_verbs_create_cq (this) < 0) {
- gf_log ("transport/ib-verbs", GF_LOG_ERROR,
- "%s: could not create CQ",
- this->xl->name);
- return NULL;
- }
-
- /* protection domain */
- trav->pd = ibv_alloc_pd (trav->context);
-
- if (!trav->pd) {
- gf_log ("transport/ib-verbs", GF_LOG_ERROR,
- "%s: could not allocate protection domain",
- this->xl->name);
- return NULL;
- }
-
- struct ibv_srq_init_attr attr = {
- .attr = {
- .max_wr = options->recv_count,
- .max_sge = 1
- }
- };
- trav->srq = ibv_create_srq (trav->pd, &attr);
-
- if (!trav->srq) {
- gf_log ("transport/ib-verbs", GF_LOG_ERROR,
- "%s: could not create SRQ",
- this->xl->name);
- return NULL;
- }
-
- /* queue init */
- ib_verbs_queue_init (&trav->sendq);
- ib_verbs_queue_init (&trav->recvq);
-
- if (ib_verbs_create_posts (this) < 0) {
- gf_log ("transport/ib-verbs", GF_LOG_ERROR,
- "%s: could not allocate posts",
- this->xl->name);
- return NULL;
- }
-
- /* completion threads */
- ret = pthread_create (&trav->send_thread,
- NULL,
- ib_verbs_send_completion_proc,
- trav->send_chan);
- if (ret) {
- gf_log ("transport/ib-verbs", GF_LOG_ERROR,
- "could not create send completion thread");
- return NULL;
- }
- ret = pthread_create (&trav->recv_thread,
- NULL,
- ib_verbs_recv_completion_proc,
- trav->recv_chan);
- if (ret) {
- gf_log ("transport/ib-verbs", GF_LOG_ERROR,
- "could not create recv completion thread");
- return NULL;
- }
-
- /* qpreg */
- pthread_mutex_init (&trav->qpreg.lock, NULL);
- for (i=0; i<42; i++) {
- trav->qpreg.ents[i].next = &trav->qpreg.ents[i];
- trav->qpreg.ents[i].prev = &trav->qpreg.ents[i];
- }
- }
- return trav;
-}
-
-static int32_t
-ib_verbs_init (transport_t *this)
-{
- ib_verbs_private_t *priv = this->private;
- ib_verbs_options_t *options = &priv->options;
- struct ibv_device **dev_list;
- struct ibv_device *ib_dev = NULL;
- int32_t i;
-
- ib_verbs_options_init (this);
-
- {
- dev_list = ibv_get_device_list (NULL);
-
- if (!dev_list) {
- gf_log ("transport/ib-verbs",
- GF_LOG_CRITICAL,
- "No IB devices found");
- return -1;
- }
-
- if (!options->device_name) {
- if (*dev_list) {
- options->device_name =
- strdup (ibv_get_device_name (*dev_list));
- } else {
- gf_log ("transport/ib-verbs", GF_LOG_CRITICAL,
- "IB device list is empty. Check for "
- "'ib_uverbs' module");
- return -1;
- }
- }
-
- for (i = 0; dev_list[i]; i++) {
- if (!strcmp (ibv_get_device_name (dev_list[i]),
- options->device_name)) {
- ib_dev = dev_list[i];
- break;
- }
- }
-
- if (!ib_dev) {
- gf_log ("transport/ib-verbs", GF_LOG_ERROR,
- "could not open device `%s' (does not exist)",
- options->device_name);
- ibv_free_device_list (dev_list);
- return -1;
- }
-
- priv->device = ib_verbs_get_device (this, ib_dev,
- options->port);
-
- if (!priv->device) {
- gf_log ("transport/ib-verbs", GF_LOG_ERROR,
- "could not create ib_verbs device for %s",
- options->device_name);
- ibv_free_device_list (dev_list);
- return -1;
- }
- ibv_free_device_list (dev_list);
- }
-
- priv->peer.trans = this;
- INIT_LIST_HEAD (&priv->peer.ioq);
-
- pthread_mutex_init (&priv->read_mutex, NULL);
- pthread_mutex_init (&priv->write_mutex, NULL);
- pthread_mutex_init (&priv->recv_mutex, NULL);
- /* pthread_cond_init (&priv->recv_cond, NULL); */
-
- return 0;
-}
-
-
-static int32_t
-ib_verbs_disconnect (transport_t *this)
-{
- ib_verbs_private_t *priv = this->private;
- int32_t ret = 0;
-
- pthread_mutex_lock (&priv->write_mutex);
- {
- ret = __ib_verbs_disconnect (this);
- }
- pthread_mutex_unlock (&priv->write_mutex);
-
- return ret;
-}
-
-
-static int32_t
-__tcp_connect_finish (int fd)
-{
- int ret = -1;
- int optval = 0;
- socklen_t optlen = sizeof (int);
-
- ret = getsockopt (fd, SOL_SOCKET, SO_ERROR,
- (void *)&optval, &optlen);
-
- if (ret == 0 && optval)
- {
- errno = optval;
- ret = -1;
- }
-
- return ret;
-}
-
-static inline void
-ib_verbs_fill_handshake_data (char *buf, struct ib_verbs_nbio *nbio,
- ib_verbs_private_t *priv)
-{
- sprintf (buf,
- "QP1:RECV_BLKSIZE=%08x:SEND_BLKSIZE=%08x\n"
- "QP1:LID=%04x:QPN=%06x:PSN=%06x\n",
- priv->peer.recv_size,
- priv->peer.send_size,
- priv->peer.local_lid,
- priv->peer.local_qpn,
- priv->peer.local_psn);
-
- nbio->vector.iov_base = buf;
- nbio->vector.iov_len = strlen (buf) + 1;
- nbio->count = 1;
- return;
-}
-
-static inline void
-ib_verbs_fill_handshake_ack (char *buf, struct ib_verbs_nbio *nbio)
-{
- sprintf (buf, "DONE\n");
- nbio->vector.iov_base = buf;
- nbio->vector.iov_len = strlen (buf) + 1;
- nbio->count = 1;
- return;
-}
-
-static int
-ib_verbs_handshake_pollin (transport_t *this)
-{
- int ret = 0;
- ib_verbs_private_t *priv = this->private;
- char *buf = priv->handshake.incoming.buf;
- int32_t recv_buf_size, send_buf_size;
- socklen_t sock_len;
-
- if (priv->handshake.incoming.state == IB_VERBS_HANDSHAKE_COMPLETE) {
- return -1;
- }
-
- pthread_mutex_lock (&priv->write_mutex);
- {
- while (priv->handshake.incoming.state != IB_VERBS_HANDSHAKE_COMPLETE)
- {
- switch (priv->handshake.incoming.state)
- {
- case IB_VERBS_HANDSHAKE_START:
- buf = priv->handshake.incoming.buf = CALLOC (1, 256);
- ib_verbs_fill_handshake_data (buf, &priv->handshake.incoming, priv);
- buf[0] = 0;
- priv->handshake.incoming.state = IB_VERBS_HANDSHAKE_RECEIVING_DATA;
- break;
-
- case IB_VERBS_HANDSHAKE_RECEIVING_DATA:
- ret = __tcp_readv (this,
- &priv->handshake.incoming.vector,
- priv->handshake.incoming.count,
- &priv->handshake.incoming.pending_vector,
- &priv->handshake.incoming.pending_count);
- if (ret == -1) {
- goto unlock;
- }
-
- if (ret > 0) {
- gf_log (this->xl->name, GF_LOG_TRACE,
- "partial header read on NB socket. continue later");
- goto unlock;
- }
-
- if (!ret) {
- priv->handshake.incoming.state = IB_VERBS_HANDSHAKE_RECEIVED_DATA;
- }
- break;
-
- case IB_VERBS_HANDSHAKE_RECEIVED_DATA:
- ret = sscanf (buf,
- "QP1:RECV_BLKSIZE=%08x:SEND_BLKSIZE=%08x\n"
- "QP1:LID=%04x:QPN=%06x:PSN=%06x\n",
- &recv_buf_size,
- &send_buf_size,
- &priv->peer.remote_lid,
- &priv->peer.remote_qpn,
- &priv->peer.remote_psn);
-
- if ((ret != 5) && (strncmp (buf, "QP1:", 4))) {
- gf_log ("transport/ib-verbs",
- GF_LOG_CRITICAL,
- "%s: remote-host(%s)'s "
- "transport type is different",
- this->xl->name,
- this->peerinfo.identifier);
- ret = -1;
- goto unlock;
- }
-
- if (recv_buf_size < priv->peer.recv_size)
- priv->peer.recv_size = recv_buf_size;
- if (send_buf_size < priv->peer.send_size)
- priv->peer.send_size = send_buf_size;
-
- gf_log ("transport/ib-verbs", GF_LOG_TRACE,
- "%s: transacted recv_size=%d "
- "send_size=%d",
- this->xl->name, priv->peer.recv_size,
- priv->peer.send_size);
-
- priv->peer.quota = priv->peer.send_count;
-
- if (ib_verbs_connect_qp (this)) {
- gf_log ("transport/ib-verbs",
- GF_LOG_ERROR,
- "%s: failed to connect with "
- "remote QP", this->xl->name);
- ret = -1;
- goto unlock;
- }
- ib_verbs_fill_handshake_ack (buf, &priv->handshake.incoming);
- buf[0] = 0;
- priv->handshake.incoming.state = IB_VERBS_HANDSHAKE_RECEIVING_ACK;
- break;
-
- case IB_VERBS_HANDSHAKE_RECEIVING_ACK:
- ret = __tcp_readv (this,
- &priv->handshake.incoming.vector,
- priv->handshake.incoming.count,
- &priv->handshake.incoming.pending_vector,
- &priv->handshake.incoming.pending_count);
- if (ret == -1) {
- goto unlock;
- }
-
- if (ret > 0) {
- gf_log (this->xl->name, GF_LOG_TRACE,
- "partial header read on NB "
- "socket. continue later");
- goto unlock;
- }
-
- if (!ret) {
- priv->handshake.incoming.state = IB_VERBS_HANDSHAKE_RECEIVED_ACK;
- }
- break;
-
- case IB_VERBS_HANDSHAKE_RECEIVED_ACK:
- if (strncmp (buf, "DONE", 4)) {
- gf_log ("transport/ib-verbs",
- GF_LOG_DEBUG,
- "%s: handshake-3 did not "
- "return 'DONE' (%s)",
- this->xl->name, buf);
- ret = -1;
- goto unlock;
- }
- ret = 0;
- priv->connected = 1;
- sock_len = sizeof (struct sockaddr_storage);
- getpeername (priv->sock,
- (struct sockaddr *) &this->peerinfo.sockaddr,
- &sock_len);
-
- FREE (priv->handshake.incoming.buf);
- priv->handshake.incoming.buf = NULL;
- priv->handshake.incoming.state = IB_VERBS_HANDSHAKE_COMPLETE;
- }
- }
- }
-unlock:
- pthread_mutex_unlock (&priv->write_mutex);
-
- if (ret == -1) {
- transport_disconnect (this);
- } else {
- ret = 0;
- }
-
- if (!ret && priv->connected) {
- ret = xlator_notify (this->xl, GF_EVENT_CHILD_UP, this);
- }
-
- return ret;
-}
-
-static int
-ib_verbs_handshake_pollout (transport_t *this)
-{
- ib_verbs_private_t *priv = this->private;
- char *buf = priv->handshake.outgoing.buf;
- int32_t ret = 0;
-
- if (priv->handshake.outgoing.state == IB_VERBS_HANDSHAKE_COMPLETE) {
- return 0;
- }
-
- pthread_mutex_unlock (&priv->write_mutex);
- {
- while (priv->handshake.outgoing.state != IB_VERBS_HANDSHAKE_COMPLETE)
- {
- switch (priv->handshake.outgoing.state)
- {
- case IB_VERBS_HANDSHAKE_START:
- buf = priv->handshake.outgoing.buf = CALLOC (1, 256);
- ib_verbs_fill_handshake_data (buf, &priv->handshake.outgoing, priv);
- priv->handshake.outgoing.state = IB_VERBS_HANDSHAKE_SENDING_DATA;
- break;
-
- case IB_VERBS_HANDSHAKE_SENDING_DATA:
- ret = __tcp_writev (this,
- &priv->handshake.outgoing.vector,
- priv->handshake.outgoing.count,
- &priv->handshake.outgoing.pending_vector,
- &priv->handshake.outgoing.pending_count);
- if (ret == -1) {
- goto unlock;
- }
-
- if (ret > 0) {
- gf_log (this->xl->name, GF_LOG_TRACE,
- "partial header read on NB socket. continue later");
- goto unlock;
- }
-
- if (!ret) {
- priv->handshake.outgoing.state = IB_VERBS_HANDSHAKE_SENT_DATA;
- }
- break;
-
- case IB_VERBS_HANDSHAKE_SENT_DATA:
- ib_verbs_fill_handshake_ack (buf, &priv->handshake.outgoing);
- priv->handshake.outgoing.state = IB_VERBS_HANDSHAKE_SENDING_ACK;
- break;
-
- case IB_VERBS_HANDSHAKE_SENDING_ACK:
- ret = __tcp_writev (this,
- &priv->handshake.outgoing.vector,
- priv->handshake.outgoing.count,
- &priv->handshake.outgoing.pending_vector,
- &priv->handshake.outgoing.pending_count);
-
- if (ret == -1) {
- goto unlock;
- }
-
- if (ret > 0) {
- gf_log (this->xl->name, GF_LOG_TRACE,
- "partial header read on NB "
- "socket. continue later");
- goto unlock;
- }
-
- if (!ret) {
- FREE (priv->handshake.outgoing.buf);
- priv->handshake.outgoing.buf = NULL;
- priv->handshake.outgoing.state = IB_VERBS_HANDSHAKE_COMPLETE;
- }
- break;
- }
- }
- }
-unlock:
- pthread_mutex_unlock (&priv->write_mutex);
-
- if (ret == -1) {
- transport_disconnect (this);
- } else {
- ret = 0;
- }
-
- return ret;
-}
-
-static int
-ib_verbs_handshake_pollerr (transport_t *this)
-{
- ib_verbs_private_t *priv = this->private;
- int32_t ret = 0;
- char need_unref = 0;
-
- gf_log ("transport/ib-verbs", GF_LOG_DEBUG,
- "%s: peer disconnected, cleaning up",
- this->xl->name);
-
- pthread_mutex_lock (&priv->write_mutex);
- {
- __ib_verbs_teardown (this);
-
- if (priv->sock != -1) {
- event_unregister (this->xl->ctx->event_pool,
- priv->sock, priv->idx);
- need_unref = 1;
-
- if (close (priv->sock) != 0) {
- gf_log ("transport/ib-verbs", GF_LOG_ERROR,
- "close () - error: %s",
- strerror (errno));
- ret = -errno;
- }
- priv->tcp_connected = priv->connected = 0;
- priv->sock = -1;
- }
-
- if (priv->handshake.incoming.buf) {
- FREE (priv->handshake.incoming.buf);
- priv->handshake.incoming.buf = NULL;
- }
-
- priv->handshake.incoming.state = IB_VERBS_HANDSHAKE_START;
-
- if (priv->handshake.outgoing.buf) {
- FREE (priv->handshake.outgoing.buf);
- priv->handshake.outgoing.buf = NULL;
- }
-
- priv->handshake.outgoing.state = IB_VERBS_HANDSHAKE_START;
- }
- pthread_mutex_unlock (&priv->write_mutex);
-
- xlator_notify (this->xl, GF_EVENT_POLLERR, this, NULL);
-
- if (need_unref)
- transport_unref (this);
-
- return 0;
-}
-
-
-static int
-tcp_connect_finish (transport_t *this)
-{
- ib_verbs_private_t *priv = this->private;
- int error = 0, ret = 0;
-
- pthread_mutex_lock (&priv->write_mutex);
- {
- ret = __tcp_connect_finish (priv->sock);
-
- if (!ret) {
- this->myinfo.sockaddr_len =
- sizeof (this->myinfo.sockaddr);
- ret = getsockname (priv->sock,
- (struct sockaddr *)&this->myinfo.sockaddr,
- &this->myinfo.sockaddr_len);
- if (ret == -1)
- {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "getsockname on new client-socket %d "
- "failed (%s)",
- priv->sock, strerror (errno));
- close (priv->sock);
- error = 1;
- goto unlock;
- }
-
- get_transport_identifiers (this);
- priv->tcp_connected = 1;
- }
-
- if (ret == -1 && errno != EINPROGRESS) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "tcp connect to %s failed (%s)",
- this->peerinfo.identifier, strerror (errno));
- error = 1;
- }
- }
-unlock:
- pthread_mutex_unlock (&priv->write_mutex);
-
- if (error) {
- transport_disconnect (this);
- }
-
- return ret;
-}
-
-static int
-ib_verbs_event_handler (int fd, int idx, void *data,
- int poll_in, int poll_out, int poll_err)
-{
- transport_t *this = data;
- ib_verbs_private_t *priv = this->private;
- ib_verbs_options_t *options = NULL;
- int ret = 0;
-
- if (!priv->tcp_connected) {
- ret = tcp_connect_finish (this);
- if (priv->tcp_connected) {
- options = &priv->options;
-
- priv->peer.send_count = options->send_count;
- priv->peer.recv_count = options->recv_count;
- priv->peer.send_size = options->send_size;
- priv->peer.recv_size = options->recv_size;
-
- if ((ret = ib_verbs_create_qp (this)) < 0) {
- gf_log ("transport/ib-verbs", GF_LOG_ERROR,
- "%s: could not create QP",
- this->xl->name);
- transport_disconnect (this);
- }
- }
- }
-
- if (!ret && poll_out && priv->tcp_connected) {
- ret = ib_verbs_handshake_pollout (this);
- }
-
- if (!ret && poll_in && priv->tcp_connected) {
- if (priv->handshake.incoming.state == IB_VERBS_HANDSHAKE_COMPLETE) {
- gf_log ("transport/ib-verbs", GF_LOG_ERROR,
- "%s: pollin received on tcp socket (peer: %s) "
- "after handshake is complete",
- this->xl->name, this->peerinfo.identifier);
- ib_verbs_handshake_pollerr (this);
- return 0;
- }
- ret = ib_verbs_handshake_pollin (this);
- }
-
- if (ret < 0 || poll_err) {
- ret = ib_verbs_handshake_pollerr (this);
- }
-
- return 0;
-}
-
-static int
-__tcp_nonblock (int fd)
-{
- int flags = 0;
- int ret = -1;
-
- flags = fcntl (fd, F_GETFL);
-
- if (flags != -1)
- ret = fcntl (fd, F_SETFL, flags | O_NONBLOCK);
-
- return ret;
-}
-
-static int32_t
-ib_verbs_connect (struct transport *this)
-{
- dict_t *options = this->xl->options;
-
- ib_verbs_private_t *priv = this->private;
-
- int32_t ret = 0;
- gf_boolean_t non_blocking = 1;
- struct sockaddr_storage sockaddr;
- socklen_t sockaddr_len = 0;
-
- if (priv->connected) {
- return 0;
- }
-
- if (dict_get (options, "non-blocking-io")) {
- char *nb_connect = data_to_str (dict_get (this->xl->options,
- "non-blocking-io"));
-
- if (gf_string2boolean (nb_connect, &non_blocking) == -1) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "'non-blocking-io' takes only boolean "
- "options, not taking any action");
- non_blocking = 1;
- }
- }
-
- ret = ibverbs_client_get_remote_sockaddr (this, (struct sockaddr *)&sockaddr,
- &sockaddr_len);
- if (ret != 0) {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "cannot get remote address to connect");
- return ret;
- }
-
- pthread_mutex_lock (&priv->write_mutex);
- {
- if (priv->sock != -1) {
- ret = 0;
- goto unlock;
- }
-
- priv->sock = socket (((struct sockaddr *)&sockaddr)->sa_family,
- SOCK_STREAM, 0);
-
- if (priv->sock == -1) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "socket () - error: %s", strerror (errno));
- ret = -errno;
- goto unlock;
- }
-
- gf_log (this->xl->name, GF_LOG_TRACE,
- "socket fd = %d", priv->sock);
-
- memcpy (&this->peerinfo.sockaddr, &sockaddr, sockaddr_len);
- this->peerinfo.sockaddr_len = sockaddr_len;
-
- ((struct sockaddr *) &this->myinfo.sockaddr)->sa_family =
- ((struct sockaddr *)&this->peerinfo.sockaddr)->sa_family;
-
- if (non_blocking)
- {
- ret = __tcp_nonblock (priv->sock);
-
- if (ret == -1)
- {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "could not set socket %d to non "
- "blocking mode (%s)",
- priv->sock, strerror (errno));
- close (priv->sock);
- priv->sock = -1;
- goto unlock;
- }
- }
-
- ret = client_bind (this,
- (struct sockaddr *)&this->myinfo.sockaddr,
- &this->myinfo.sockaddr_len, priv->sock);
- if (ret == -1)
- {
- gf_log (this->xl->name, GF_LOG_WARNING,
- "client bind failed: %s", strerror (errno));
- close (priv->sock);
- priv->sock = -1;
- goto unlock;
- }
-
- ret = connect (priv->sock,
- (struct sockaddr *)&this->peerinfo.sockaddr,
- this->peerinfo.sockaddr_len);
- if (ret == -1 && errno != EINPROGRESS)
- {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "connection attempt failed (%s)",
- strerror (errno));
- close (priv->sock);
- priv->sock = -1;
- goto unlock;
- }
-
- priv->tcp_connected = priv->connected = 0;
-
- transport_ref (this);
-
- priv->handshake.incoming.state = IB_VERBS_HANDSHAKE_START;
- priv->handshake.outgoing.state = IB_VERBS_HANDSHAKE_START;
-
- priv->idx = event_register (this->xl->ctx->event_pool,
- priv->sock, ib_verbs_event_handler,
- this, 1, 1);
- }
-unlock:
- pthread_mutex_unlock (&priv->write_mutex);
-
- return ret;
-}
-
-static int
-ib_verbs_server_event_handler (int fd, int idx, void *data,
- int poll_in, int poll_out, int poll_err)
-{
- int32_t main_sock = -1;
- transport_t *this, *trans = data;
- ib_verbs_private_t *priv = NULL;
- ib_verbs_private_t *trans_priv = (ib_verbs_private_t *) trans->private;
- ib_verbs_options_t *options = NULL;
-
- if (!poll_in)
- return 0;
-
- this = CALLOC (1, sizeof (transport_t));
- ERR_ABORT (this);
- priv = CALLOC (1, sizeof (ib_verbs_private_t));
- ERR_ABORT (priv);
- this->private = priv;
- /* Copy all the ib_verbs related values in priv, from trans_priv
- as other than QP, all the values remain same */
- priv->device = trans_priv->device;
- priv->options = trans_priv->options;
- options = &priv->options;
-
- this->ops = trans->ops;
- this->xl = trans->xl;
- this->init = trans->init;
- this->fini = trans->fini;
-
- memcpy (&this->myinfo.sockaddr, &trans->myinfo.sockaddr,
- trans->myinfo.sockaddr_len);
- this->myinfo.sockaddr_len = trans->myinfo.sockaddr_len;
-
- main_sock = (trans_priv)->sock;
- this->peerinfo.sockaddr_len = sizeof (this->peerinfo.sockaddr);
- priv->sock = accept (main_sock,
- (struct sockaddr *)&this->peerinfo.sockaddr,
- &this->peerinfo.sockaddr_len);
- if (priv->sock == -1) {
- gf_log ("ib-verbs/server", GF_LOG_ERROR,
- "accept() failed: %s",
- strerror (errno));
- free (this->private);
- free (this);
- return -1;
- }
-
- priv->peer.trans = this;
- transport_ref (this);
-
- get_transport_identifiers (this);
-
- priv->tcp_connected = 1;
- priv->handshake.incoming.state = IB_VERBS_HANDSHAKE_START;
- priv->handshake.outgoing.state = IB_VERBS_HANDSHAKE_START;
-
- priv->peer.send_count = options->send_count;
- priv->peer.recv_count = options->recv_count;
- priv->peer.send_size = options->send_size;
- priv->peer.recv_size = options->recv_size;
- INIT_LIST_HEAD (&priv->peer.ioq);
-
- if (ib_verbs_create_qp (this) < 0) {
- gf_log ("transport/ib-verbs", GF_LOG_ERROR,
- "%s: could not create QP",
- this->xl->name);
- transport_disconnect (this);
- return -1;
- }
-
- priv->idx = event_register (this->xl->ctx->event_pool, priv->sock,
- ib_verbs_event_handler, this, 1, 1);
-
- pthread_mutex_init (&priv->read_mutex, NULL);
- pthread_mutex_init (&priv->write_mutex, NULL);
- pthread_mutex_init (&priv->recv_mutex, NULL);
- /* pthread_cond_init (&priv->recv_cond, NULL); */
-
- return 0;
-}
-
-static int32_t
-ib_verbs_listen (transport_t *this)
-{
- struct sockaddr_storage sockaddr;
- socklen_t sockaddr_len;
- ib_verbs_private_t *priv = this->private;
- int opt = 1, ret = 0;
- char service[NI_MAXSERV], host[NI_MAXHOST];
-
- memset (&sockaddr, 0, sizeof (sockaddr));
- ret = ibverbs_server_get_local_sockaddr (this,
- (struct sockaddr *)&sockaddr,
- &sockaddr_len);
- if (ret != 0) {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "cannot find network address of server to bind to");
- goto err;
- }
-
- priv->sock = socket (((struct sockaddr *)&sockaddr)->sa_family,
- SOCK_STREAM, 0);
- if (priv->sock == -1) {
- gf_log ("ib-verbs/server", GF_LOG_CRITICAL,
- "init: failed to create socket, error: %s",
- strerror (errno));
- free (this->private);
- ret = -1;
- goto err;
- }
-
- memcpy (&this->myinfo.sockaddr, &sockaddr, sockaddr_len);
- this->myinfo.sockaddr_len = sockaddr_len;
-
- ret = getnameinfo ((struct sockaddr *)&this->myinfo.sockaddr,
- this->myinfo.sockaddr_len,
- host, sizeof (host),
- service, sizeof (service),
- NI_NUMERICHOST);
- if (ret != 0) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "getnameinfo failed (%s)", gai_strerror (ret));
- goto err;
- }
- sprintf (this->myinfo.identifier, "%s:%s", host, service);
-
- setsockopt (priv->sock, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof (opt));
- if (bind (priv->sock,
- (struct sockaddr *)&sockaddr,
- sockaddr_len) != 0) {
- ret = -1;
- gf_log ("ib-verbs/server", GF_LOG_ERROR,
- "init: failed to bind to socket for %s (%s)",
- this->myinfo.identifier, strerror (errno));
- goto err;
- }
-
- if (listen (priv->sock, 10) != 0) {
- gf_log ("ib-verbs/server", GF_LOG_ERROR,
- "init: listen () failed on socket for %s (%s)",
- this->myinfo.identifier, strerror (errno));
- ret = -1;
- goto err;
- }
-
- /* Register the main socket */
- priv->idx = event_register (this->xl->ctx->event_pool, priv->sock,
- ib_verbs_server_event_handler,
- transport_ref (this), 1, 0);
-
-err:
- return ret;
-}
-
-struct transport_ops tops = {
- .receive = ib_verbs_receive,
- .submit = ib_verbs_submit,
- .connect = ib_verbs_connect,
- .disconnect = ib_verbs_disconnect,
- .listen = ib_verbs_listen,
-};
-
-int32_t
-init (transport_t *this)
-{
- ib_verbs_private_t *priv = CALLOC (1, sizeof (*priv));
- this->private = priv;
- priv->sock = -1;
-
- if (ib_verbs_init (this)) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "Failed to initialize IB Device");
- return -1;
- }
-
- return 0;
-}
-
-void
-fini (struct transport *this)
-{
- /* TODO: verify this function does graceful finish */
- ib_verbs_private_t *priv = this->private;
- this->private = NULL;
-
- pthread_mutex_destroy (&priv->recv_mutex);
- pthread_mutex_destroy (&priv->write_mutex);
- pthread_mutex_destroy (&priv->read_mutex);
- /* pthread_cond_destroy (&priv->recv_cond); */
-
- gf_log (this->xl->name, GF_LOG_TRACE,
- "called fini on transport: %p",
- this);
- free (priv);
- return;
-}
-
-/* TODO: expand each option */
-struct volume_options options[] = {
- { .key = {"transport.ib-verbs.port",
- "ib-verbs-port"},
- .type = GF_OPTION_TYPE_INT,
- .min = 1,
- .max = 4,
- .description = "check the option by 'ibv_devinfo'"
- },
- { .key = {"transport.ib-verbs.mtu",
- "ib-verbs-mtu"},
- .type = GF_OPTION_TYPE_INT,
- },
- { .key = {"transport.ib-verbs.device-name",
- "ib-verbs-device-name"},
- .type = GF_OPTION_TYPE_ANY,
- .description = "check by 'ibv_devinfo'"
- },
- { .key = {"transport.ib-verbs.work-request-send-count",
- "ib-verbs-work-request-send-count"},
- .type = GF_OPTION_TYPE_INT,
- },
- { .key = {"transport.ib-verbs.work-request-recv-count",
- "ib-verbs-work-request-recv-count"},
- .type = GF_OPTION_TYPE_INT,
- },
- { .key = {"remote-port",
- "transport.remote-port",
- "transport.ib-verbs.remote-port"},
- .type = GF_OPTION_TYPE_INT
- },
- { .key = {"transport.ib-verbs.listen-port", "listen-port"},
- .type = GF_OPTION_TYPE_INT
- },
- { .key = {"transport.ib-verbs.connect-path", "connect-path"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {"transport.ib-verbs.bind-path", "bind-path"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {"transport.ib-verbs.listen-path", "listen-path"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {"transport.address-family",
- "address-family"},
- .value = {"inet", "inet6", "inet/inet6", "inet6/inet",
- "unix", "inet-sdp" },
- .type = GF_OPTION_TYPE_STR
- },
- { .key = {NULL} }
-};
diff --git a/transport/ib-verbs/src/ib-verbs.h b/transport/ib-verbs/src/ib-verbs.h
deleted file mode 100644
index a5513f882..000000000
--- a/transport/ib-verbs/src/ib-verbs.h
+++ /dev/null
@@ -1,218 +0,0 @@
-/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _XPORT_IB_VERBS_H
-#define _XPORT_IB_VERBS_H
-
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#ifndef MAX_IOVEC
-#define MAX_IOVEC 16
-#endif /* MAX_IOVEC */
-
-#include "xlator.h"
-#include "event.h"
-
-#include <stdio.h>
-#include <list.h>
-#include <arpa/inet.h>
-#include <infiniband/verbs.h>
-
-#define GF_DEFAULT_IBVERBS_LISTEN_PORT 6997
-
-/* options per transport end point */
-struct _ib_verbs_options {
- int32_t port;
- char *device_name;
- enum ibv_mtu mtu;
- int32_t send_count;
- int32_t recv_count;
- uint64_t recv_size;
- uint64_t send_size;
-};
-typedef struct _ib_verbs_options ib_verbs_options_t;
-
-
-struct _ib_verbs_header {
- char colonO[3];
- uint32_t size1;
- uint32_t size2;
- char version;
-} __attribute__((packed));
-typedef struct _ib_verbs_header ib_verbs_header_t;
-
-struct _ib_verbs_ioq {
- union {
- struct list_head list;
- struct {
- struct _ib_verbs_ioq *next;
- struct _ib_verbs_ioq *prev;
- };
- };
- ib_verbs_header_t header;
- struct iovec vector[MAX_IOVEC];
- int count;
- char *buf;
- struct iobref *iobref;
-};
-typedef struct _ib_verbs_ioq ib_verbs_ioq_t;
-
-/* represents one communication peer, two per transport_t */
-struct _ib_verbs_peer {
- transport_t *trans;
- struct ibv_qp *qp;
-
- int32_t recv_count;
- int32_t send_count;
- int32_t recv_size;
- int32_t send_size;
-
- int32_t quota;
- union {
- struct list_head ioq;
- struct {
- ib_verbs_ioq_t *ioq_next;
- ib_verbs_ioq_t *ioq_prev;
- };
- };
-
- /* QP attributes, needed to connect with remote QP */
- int32_t local_lid;
- int32_t local_psn;
- int32_t local_qpn;
- int32_t remote_lid;
- int32_t remote_psn;
- int32_t remote_qpn;
-};
-typedef struct _ib_verbs_peer ib_verbs_peer_t;
-
-
-struct _ib_verbs_post {
- struct _ib_verbs_post *next, *prev;
- struct ibv_mr *mr;
- char *buf;
- int32_t buf_size;
- char aux;
- int32_t reused;
- pthread_barrier_t wait;
-};
-typedef struct _ib_verbs_post ib_verbs_post_t;
-
-
-struct _ib_verbs_queue {
- ib_verbs_post_t active_posts, passive_posts;
- int32_t active_count, passive_count;
- pthread_mutex_t lock;
-};
-typedef struct _ib_verbs_queue ib_verbs_queue_t;
-
-
-struct _ib_verbs_qpreg {
- pthread_mutex_t lock;
- int32_t count;
- struct _qpent {
- struct _qpent *next, *prev;
- int32_t qp_num;
- ib_verbs_peer_t *peer;
- } ents[42];
-};
-typedef struct _ib_verbs_qpreg ib_verbs_qpreg_t;
-
-/* context per device, stored in global glusterfs_ctx_t->ib */
-struct _ib_verbs_device {
- struct _ib_verbs_device *next;
- const char *device_name;
- struct ibv_context *context;
- int32_t port;
- struct ibv_pd *pd;
- struct ibv_srq *srq;
- ib_verbs_qpreg_t qpreg;
- struct ibv_comp_channel *send_chan, *recv_chan;
- struct ibv_cq *send_cq, *recv_cq;
- ib_verbs_queue_t sendq, recvq;
- pthread_t send_thread, recv_thread;
-};
-typedef struct _ib_verbs_device ib_verbs_device_t;
-
-typedef enum {
- IB_VERBS_HANDSHAKE_START = 0,
- IB_VERBS_HANDSHAKE_SENDING_DATA,
- IB_VERBS_HANDSHAKE_RECEIVING_DATA,
- IB_VERBS_HANDSHAKE_SENT_DATA,
- IB_VERBS_HANDSHAKE_RECEIVED_DATA,
- IB_VERBS_HANDSHAKE_SENDING_ACK,
- IB_VERBS_HANDSHAKE_RECEIVING_ACK,
- IB_VERBS_HANDSHAKE_RECEIVED_ACK,
- IB_VERBS_HANDSHAKE_COMPLETE,
-} ib_verbs_handshake_state_t;
-
-struct ib_verbs_nbio {
- int state;
- char *buf;
- int count;
- struct iovec vector;
- struct iovec *pending_vector;
- int pending_count;
-};
-
-
-struct _ib_verbs_private {
- int32_t sock;
- int32_t idx;
- unsigned char connected;
- unsigned char tcp_connected;
- unsigned char ib_connected;
- in_addr_t addr;
- unsigned short port;
-
- /* IB Verbs Driver specific variables, pointers */
- ib_verbs_peer_t peer;
- ib_verbs_device_t *device;
- ib_verbs_options_t options;
-
- /* Used by trans->op->receive */
- char *data_ptr;
- int32_t data_offset;
- int32_t data_len;
-
- /* Mutex */
- pthread_mutex_t read_mutex;
- pthread_mutex_t write_mutex;
- pthread_barrier_t handshake_barrier;
- char handshake_ret;
-
- pthread_mutex_t recv_mutex;
-
- /* used during ib_verbs_handshake */
- struct {
- struct ib_verbs_nbio incoming;
- struct ib_verbs_nbio outgoing;
- int state;
- ib_verbs_header_t header;
- char *buf;
- size_t size;
- } handshake;
-};
-typedef struct _ib_verbs_private ib_verbs_private_t;
-
-#endif /* _XPORT_IB_VERBS_H */
diff --git a/transport/ib-verbs/src/name.h b/transport/ib-verbs/src/name.h
deleted file mode 100644
index 5f516cb82..000000000
--- a/transport/ib-verbs/src/name.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _IB_VERBS_NAME_H
-#define _IB_VERBS_NAME_H
-
-#include <sys/socket.h>
-#include <sys/un.h>
-
-#include "compat.h"
-
-int32_t
-client_bind (transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t *sockaddr_len,
- int sock);
-
-int32_t
-ibverbs_client_get_remote_sockaddr (transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t *sockaddr_len);
-
-int32_t
-ibverbs_server_get_local_sockaddr (transport_t *this,
- struct sockaddr *addr,
- socklen_t *addr_len);
-
-int32_t
-get_transport_identifiers (transport_t *this);
-
-#endif /* _IB_VERBS_NAME_H */
diff --git a/transport/socket/Makefile.am b/transport/socket/Makefile.am
deleted file mode 100644
index f963effea..000000000
--- a/transport/socket/Makefile.am
+++ /dev/null
@@ -1 +0,0 @@
-SUBDIRS = src \ No newline at end of file
diff --git a/transport/socket/src/Makefile.am b/transport/socket/src/Makefile.am
deleted file mode 100644
index 1832587a6..000000000
--- a/transport/socket/src/Makefile.am
+++ /dev/null
@@ -1,14 +0,0 @@
-noinst_HEADERS = socket.h name.h
-
-transport_LTLIBRARIES = socket.la
-transportdir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/transport
-
-socket_la_LDFLAGS = -module -avoidversion
-
-socket_la_SOURCES = socket.c name.c
-socket_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
-
-CLEANFILES = *~
diff --git a/transport/socket/src/name.h b/transport/socket/src/name.h
deleted file mode 100644
index cefc1f476..000000000
--- a/transport/socket/src/name.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _SOCKET_NAME_H
-#define _SOCKET_NAME_H
-
-#include "compat.h"
-
-int32_t
-client_bind (transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t *sockaddr_len,
- int sock);
-
-int32_t
-socket_client_get_remote_sockaddr (transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t *sockaddr_len);
-
-int32_t
-socket_server_get_local_sockaddr (transport_t *this,
- struct sockaddr *addr,
- socklen_t *addr_len);
-
-int32_t
-get_transport_identifiers (transport_t *this);
-
-#endif /* _SOCKET_NAME_H */
diff --git a/transport/socket/src/socket.c b/transport/socket/src/socket.c
deleted file mode 100644
index 9035eafc6..000000000
--- a/transport/socket/src/socket.c
+++ /dev/null
@@ -1,1447 +0,0 @@
-/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "socket.h"
-#include "name.h"
-#include "dict.h"
-#include "transport.h"
-#include "logging.h"
-#include "xlator.h"
-#include "byte-order.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-
-#include <fcntl.h>
-#include <errno.h>
-
-
-#define GF_LOG_ERRNO(errno) ((errno == ENOTCONN) ? GF_LOG_DEBUG : GF_LOG_ERROR)
-#define SA(ptr) ((struct sockaddr *)ptr)
-
-int socket_init (transport_t *this);
-
-/*
- * return value:
- * 0 = success (completed)
- * -1 = error
- * > 0 = incomplete
- */
-
-int
-__socket_rwv (transport_t *this, struct iovec *vector, int count,
- struct iovec **pending_vector, int *pending_count,
- int write)
-{
- socket_private_t *priv = NULL;
- int sock = -1;
- int ret = -1;
- struct iovec *opvector = NULL;
- int opcount = 0;
- int moved = 0;
-
- priv = this->private;
- sock = priv->sock;
-
- opvector = vector;
- opcount = count;
-
- while (opcount) {
- if (write) {
- ret = writev (sock, opvector, opcount);
-
- if (ret == 0 || (ret == -1 && errno == EAGAIN)) {
- /* done for now */
- break;
- }
- } else {
- ret = readv (sock, opvector, opcount);
-
- if (ret == -1 && errno == EAGAIN) {
- /* done for now */
- break;
- }
- }
-
- if (ret == 0) {
- /* Mostly due to 'umount' in client */
- gf_log (this->xl->name, GF_LOG_TRACE,
- "EOF from peer %s", this->peerinfo.identifier);
- opcount = -1;
- errno = ENOTCONN;
- break;
- }
-
- if (ret == -1) {
- if (errno == EINTR)
- continue;
-
- gf_log (this->xl->name, GF_LOG_TRACE,
- "%s failed (%s)", write ? "writev" : "readv",
- strerror (errno));
- opcount = -1;
- break;
- }
-
- moved = 0;
-
- while (moved < ret) {
- if ((ret - moved) >= opvector[0].iov_len) {
- moved += opvector[0].iov_len;
- opvector++;
- opcount--;
- } else {
- opvector[0].iov_len -= (ret - moved);
- opvector[0].iov_base += (ret - moved);
- moved += (ret - moved);
- }
- while (opcount && !opvector[0].iov_len) {
- opvector++;
- opcount--;
- }
- }
- }
-
- if (pending_vector)
- *pending_vector = opvector;
-
- if (pending_count)
- *pending_count = opcount;
-
- return opcount;
-}
-
-
-int
-__socket_readv (transport_t *this, struct iovec *vector, int count,
- struct iovec **pending_vector, int *pending_count)
-{
- int ret = -1;
-
- ret = __socket_rwv (this, vector, count,
- pending_vector, pending_count, 0);
-
- return ret;
-}
-
-
-int
-__socket_writev (transport_t *this, struct iovec *vector, int count,
- struct iovec **pending_vector, int *pending_count)
-{
- int ret = -1;
-
- ret = __socket_rwv (this, vector, count,
- pending_vector, pending_count, 1);
-
- return ret;
-}
-
-
-int
-__socket_disconnect (transport_t *this)
-{
- socket_private_t *priv = NULL;
- int ret = -1;
-
- priv = this->private;
-
- if (priv->sock != -1) {
- ret = shutdown (priv->sock, SHUT_RDWR);
- priv->connected = -1;
- gf_log (this->xl->name, GF_LOG_TRACE,
- "shutdown() returned %d. set connection state to -1",
- ret);
- }
-
- return ret;
-}
-
-
-int
-__socket_server_bind (transport_t *this)
-{
- socket_private_t *priv = NULL;
- int ret = -1;
- int opt = 1;
-
- priv = this->private;
-
- ret = setsockopt (priv->sock, SOL_SOCKET, SO_REUSEADDR,
- &opt, sizeof (opt));
-
- if (ret == -1) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "setsockopt() for SO_REUSEADDR failed (%s)",
- strerror (errno));
- }
-
- ret = bind (priv->sock, (struct sockaddr *)&this->myinfo.sockaddr,
- this->myinfo.sockaddr_len);
-
- if (ret == -1) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "binding to %s failed: %s",
- this->myinfo.identifier, strerror (errno));
- if (errno == EADDRINUSE) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "Port is already in use");
- }
- }
-
- return ret;
-}
-
-
-int
-__socket_nonblock (int fd)
-{
- int flags = 0;
- int ret = -1;
-
- flags = fcntl (fd, F_GETFL);
-
- if (flags != -1)
- ret = fcntl (fd, F_SETFL, flags | O_NONBLOCK);
-
- return ret;
-}
-
-
-int
-__socket_connect_finish (int fd)
-{
- int ret = -1;
- int optval = 0;
- socklen_t optlen = sizeof (int);
-
- ret = getsockopt (fd, SOL_SOCKET, SO_ERROR, (void *)&optval, &optlen);
-
- if (ret == 0 && optval) {
- errno = optval;
- ret = -1;
- }
-
- return ret;
-}
-
-
-void
-__socket_reset (transport_t *this)
-{
- socket_private_t *priv = NULL;
-
- priv = this->private;
-
- /* TODO: use mem-pool on incoming data */
-
- if (priv->incoming.hdr_p)
- free (priv->incoming.hdr_p);
-
- if (priv->incoming.iobuf)
- iobuf_unref (priv->incoming.iobuf);
-
- memset (&priv->incoming, 0, sizeof (priv->incoming));
-
- event_unregister (this->xl->ctx->event_pool, priv->sock, priv->idx);
- close (priv->sock);
- priv->sock = -1;
- priv->idx = -1;
- priv->connected = -1;
-}
-
-
-struct ioq *
-__socket_ioq_new (transport_t *this, char *buf, int len,
- struct iovec *vector, int count, struct iobref *iobref)
-{
- socket_private_t *priv = NULL;
- struct ioq *entry = NULL;
-
- priv = this->private;
-
- /* TODO: use mem-pool */
- entry = CALLOC (1, sizeof (*entry));
-
- assert (count <= (MAX_IOVEC-2));
-
- entry->header.colonO[0] = ':';
- entry->header.colonO[1] = 'O';
- entry->header.colonO[2] = '\0';
- entry->header.version = 42;
- entry->header.size1 = hton32 (len);
- entry->header.size2 = hton32 (iov_length (vector, count));
-
- entry->vector[0].iov_base = &entry->header;
- entry->vector[0].iov_len = sizeof (entry->header);
- entry->count++;
-
- entry->vector[1].iov_base = buf;
- entry->vector[1].iov_len = len;
- entry->count++;
-
- if (vector && count) {
- memcpy (&entry->vector[2], vector, sizeof (*vector) * count);
- entry->count += count;
- }
-
- entry->pending_vector = entry->vector;
- entry->pending_count = entry->count;
-
- if (iobref)
- entry->iobref = iobref_ref (iobref);
-
- entry->buf = buf;
-
- INIT_LIST_HEAD (&entry->list);
-
- return entry;
-}
-
-
-void
-__socket_ioq_entry_free (struct ioq *entry)
-{
- list_del_init (&entry->list);
- if (entry->iobref)
- iobref_unref (entry->iobref);
-
- /* TODO: use mem-pool */
- free (entry->buf);
-
- /* TODO: use mem-pool */
- free (entry);
-}
-
-
-void
-__socket_ioq_flush (transport_t *this)
-{
- socket_private_t *priv = NULL;
- struct ioq *entry = NULL;
-
- priv = this->private;
-
- while (!list_empty (&priv->ioq)) {
- entry = priv->ioq_next;
- __socket_ioq_entry_free (entry);
- }
-
- return;
-}
-
-
-int
-__socket_ioq_churn_entry (transport_t *this, struct ioq *entry)
-{
- int ret = -1;
-
- ret = __socket_writev (this, entry->pending_vector,
- entry->pending_count,
- &entry->pending_vector,
- &entry->pending_count);
-
- if (ret == 0) {
- /* current entry was completely written */
- assert (entry->pending_count == 0);
- __socket_ioq_entry_free (entry);
- }
-
- return ret;
-}
-
-
-int
-__socket_ioq_churn (transport_t *this)
-{
- socket_private_t *priv = NULL;
- int ret = 0;
- struct ioq *entry = NULL;
-
- priv = this->private;
-
- while (!list_empty (&priv->ioq)) {
- /* pick next entry */
- entry = priv->ioq_next;
-
- ret = __socket_ioq_churn_entry (this, entry);
-
- if (ret != 0)
- break;
- }
-
- if (list_empty (&priv->ioq)) {
- /* all pending writes done, not interested in POLLOUT */
- priv->idx = event_select_on (this->xl->ctx->event_pool,
- priv->sock, priv->idx, -1, 0);
- }
-
- return ret;
-}
-
-
-int
-socket_event_poll_err (transport_t *this)
-{
- socket_private_t *priv = NULL;
- int ret = -1;
-
- priv = this->private;
-
- pthread_mutex_lock (&priv->lock);
- {
- __socket_ioq_flush (this);
- __socket_reset (this);
- }
- pthread_mutex_unlock (&priv->lock);
-
- xlator_notify (this->xl, GF_EVENT_POLLERR, this);
-
- return ret;
-}
-
-
-int
-socket_event_poll_out (transport_t *this)
-{
- socket_private_t *priv = NULL;
- int ret = -1;
-
- priv = this->private;
-
- pthread_mutex_lock (&priv->lock);
- {
- if (priv->connected == 1) {
- ret = __socket_ioq_churn (this);
-
- if (ret == -1) {
- __socket_disconnect (this);
- }
- }
- }
- pthread_mutex_unlock (&priv->lock);
-
- xlator_notify (this->xl, GF_EVENT_POLLOUT, this);
-
- return ret;
-}
-
-
-int
-__socket_proto_validate_header (transport_t *this,
- struct socket_header *header,
- size_t *size1_p, size_t *size2_p)
-{
- size_t size1 = 0;
- size_t size2 = 0;
-
- if (strcmp (header->colonO, ":O")) {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "socket header signature does not match :O (%x.%x.%x)",
- header->colonO[0], header->colonO[1],
- header->colonO[2]);
- return -1;
- }
-
- if (header->version != 42) {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "socket header version does not match 42 != %d",
- header->version);
- return -1;
- }
-
- size1 = ntoh32 (header->size1);
- size2 = ntoh32 (header->size2);
-
- if (size1 <= 0 || size1 > 1048576) {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "socket header has incorrect size1=%"GF_PRI_SIZET,
- size1);
- return -1;
- }
-
- if (size2 > (131072)) {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "socket header has incorrect size2=%"GF_PRI_SIZET,
- size2);
- return -1;
- }
-
- if (size1_p)
- *size1_p = size1;
-
- if (size2_p)
- *size2_p = size2;
-
- return 0;
-}
-
-
-
-/* socket protocol state machine */
-
-int
-__socket_proto_state_machine (transport_t *this)
-{
- int ret = -1;
- socket_private_t *priv = NULL;
- size_t size1 = 0;
- size_t size2 = 0;
- int previous_state = -1;
- struct socket_header *hdr = NULL;
- struct iobuf *iobuf = NULL;
-
-
- priv = this->private;
-
- while (priv->incoming.state != SOCKET_PROTO_STATE_COMPLETE) {
- /* debug check against infinite loops */
- if (previous_state == priv->incoming.state) {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "state did not change! (%d) breaking",
- previous_state);
- ret = -1;
- goto unlock;
- }
- previous_state = priv->incoming.state;
-
- switch (priv->incoming.state) {
-
- case SOCKET_PROTO_STATE_NADA:
- priv->incoming.pending_vector =
- priv->incoming.vector;
-
- priv->incoming.pending_vector->iov_base =
- &priv->incoming.header;
-
- priv->incoming.pending_vector->iov_len =
- sizeof (struct socket_header);
-
- priv->incoming.state =
- SOCKET_PROTO_STATE_HEADER_COMING;
- break;
-
- case SOCKET_PROTO_STATE_HEADER_COMING:
-
- ret = __socket_readv (this,
- priv->incoming.pending_vector, 1,
- &priv->incoming.pending_vector,
- NULL);
- if (ret == 0) {
- priv->incoming.state =
- SOCKET_PROTO_STATE_HEADER_CAME;
- break;
- }
-
- if (ret == -1) {
- gf_log (this->xl->name, GF_LOG_TRACE,
- "read (%s) in state %d (%s)",
- strerror (errno),
- SOCKET_PROTO_STATE_HEADER_COMING,
- this->peerinfo.identifier);
- goto unlock;
- }
-
- if (ret > 0) {
- gf_log (this->xl->name, GF_LOG_TRACE,
- "partial header read on NB socket.");
- goto unlock;
- }
- break;
-
- case SOCKET_PROTO_STATE_HEADER_CAME:
- hdr = &priv->incoming.header;
- ret = __socket_proto_validate_header (this, hdr,
- &size1, &size2);
-
- if (ret == -1) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "socket header validate failed (%s). "
- "possible mismatch of transport-type "
- "between server and client volumes, "
- "or version mismatch",
- this->peerinfo.identifier);
- goto unlock;
- }
-
- priv->incoming.hdrlen = size1;
- priv->incoming.buflen = size2;
-
- /* TODO: use mem-pool */
- priv->incoming.hdr_p = MALLOC (size1);
- if (size2) {
- /* TODO: sanity check size2 < page size
- */
- iobuf = iobuf_get (this->xl->ctx->iobuf_pool);
- if (!iobuf) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "unable to allocate IO buffer "
- "for peer %s",
- this->peerinfo.identifier);
- ret = -ENOMEM;
- goto unlock;
- }
- priv->incoming.iobuf = iobuf;
- priv->incoming.buf_p = iobuf->ptr;
- }
-
- priv->incoming.vector[0].iov_base =
- priv->incoming.hdr_p;
-
- priv->incoming.vector[0].iov_len = size1;
-
- priv->incoming.vector[1].iov_base =
- priv->incoming.buf_p;
-
- priv->incoming.vector[1].iov_len = size2;
- priv->incoming.count = size2 ? 2 : 1;
-
- priv->incoming.pending_vector =
- priv->incoming.vector;
-
- priv->incoming.pending_count =
- priv->incoming.count;
-
- priv->incoming.state =
- SOCKET_PROTO_STATE_DATA_COMING;
- break;
-
- case SOCKET_PROTO_STATE_DATA_COMING:
-
- ret = __socket_readv (this,
- priv->incoming.pending_vector,
- priv->incoming.pending_count,
- &priv->incoming.pending_vector,
- &priv->incoming.pending_count);
- if (ret == 0) {
- priv->incoming.state =
- SOCKET_PROTO_STATE_DATA_CAME;
- break;
- }
-
- if (ret == -1) {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "read (%s) in state %d (%s)",
- strerror (errno),
- SOCKET_PROTO_STATE_DATA_COMING,
- this->peerinfo.identifier);
- goto unlock;
- }
-
- if (ret > 0) {
- gf_log (this->xl->name, GF_LOG_TRACE,
- "partial data read on NB socket");
- goto unlock;
- }
- break;
-
- case SOCKET_PROTO_STATE_DATA_CAME:
- memset (&priv->incoming.vector, 0,
- sizeof (priv->incoming.vector));
- priv->incoming.pending_vector = NULL;
- priv->incoming.pending_count = 0;
- priv->incoming.state = SOCKET_PROTO_STATE_COMPLETE;
- break;
-
- case SOCKET_PROTO_STATE_COMPLETE:
- /* not reached */
- break;
-
- default:
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "undefined state reached: %d",
- priv->incoming.state);
- goto unlock;
- }
- }
-unlock:
-
- return ret;
-}
-
-
-int
-socket_proto_state_machine (transport_t *this)
-{
- socket_private_t *priv = NULL;
- int ret = 0;
-
- priv = this->private;
-
- pthread_mutex_lock (&priv->lock);
- {
- ret = __socket_proto_state_machine (this);
- }
- pthread_mutex_unlock (&priv->lock);
-
- return ret;
-}
-
-
-int
-socket_event_poll_in (transport_t *this)
-{
- int ret = -1;
-
- ret = socket_proto_state_machine (this);
-
- /* call POLLIN on xlator even if complete block is not received,
- just to keep the last_received timestamp ticking */
-
- if (ret == 0)
- ret = xlator_notify (this->xl, GF_EVENT_POLLIN, this);
-
- return ret;
-}
-
-
-int
-socket_connect_finish (transport_t *this)
-{
- int ret = -1;
- socket_private_t *priv = NULL;
- int event = -1;
- char notify_xlator = 0;
-
- priv = this->private;
-
- pthread_mutex_lock (&priv->lock);
- {
- if (priv->connected)
- goto unlock;
-
- ret = __socket_connect_finish (priv->sock);
-
- if (ret == -1 && errno == EINPROGRESS)
- ret = 1;
-
- if (ret == -1 && errno != EINPROGRESS) {
- if (!priv->connect_finish_log) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "connection to %s failed (%s)",
- this->peerinfo.identifier,
- strerror (errno));
- priv->connect_finish_log = 1;
- }
- __socket_disconnect (this);
- notify_xlator = 1;
- event = GF_EVENT_POLLERR;
- goto unlock;
- }
-
- if (ret == 0) {
- notify_xlator = 1;
-
- this->myinfo.sockaddr_len =
- sizeof (this->myinfo.sockaddr);
-
- ret = getsockname (priv->sock,
- SA (&this->myinfo.sockaddr),
- &this->myinfo.sockaddr_len);
- if (ret == -1) {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "getsockname on (%d) failed (%s)",
- priv->sock, strerror (errno));
- __socket_disconnect (this);
- event = GF_EVENT_POLLERR;
- goto unlock;
- }
-
- priv->connected = 1;
- priv->connect_finish_log = 0;
- event = GF_EVENT_CHILD_UP;
- get_transport_identifiers (this);
- }
- }
-unlock:
- pthread_mutex_unlock (&priv->lock);
-
- if (notify_xlator)
- xlator_notify (this->xl, event, this);
-
- return 0;
-}
-
-
-int
-socket_event_handler (int fd, int idx, void *data,
- int poll_in, int poll_out, int poll_err)
-{
- transport_t *this = NULL;
- socket_private_t *priv = NULL;
- int ret = 0;
-
- this = data;
- priv = this->private;
-
- pthread_mutex_lock (&priv->lock);
- {
- priv->idx = idx;
- }
- pthread_mutex_unlock (&priv->lock);
-
- if (!priv->connected) {
- ret = socket_connect_finish (this);
- }
-
- if (!ret && poll_out) {
- ret = socket_event_poll_out (this);
- }
-
- if (!ret && poll_in) {
- ret = socket_event_poll_in (this);
- }
-
- if (ret < 0 || poll_err) {
- socket_event_poll_err (this);
- transport_unref (this);
- }
-
- return 0;
-}
-
-
-int
-socket_server_event_handler (int fd, int idx, void *data,
- int poll_in, int poll_out, int poll_err)
-{
- transport_t *this = NULL;
- socket_private_t *priv = NULL;
- int ret = 0;
- int new_sock = -1;
- transport_t *new_trans = NULL;
- struct sockaddr_storage new_sockaddr = {0, };
- socklen_t addrlen = sizeof (new_sockaddr);
- socket_private_t *new_priv = NULL;
- glusterfs_ctx_t *ctx = NULL;
-
- this = data;
- priv = this->private;
- ctx = this->xl->ctx;
-
- pthread_mutex_lock (&priv->lock);
- {
- priv->idx = idx;
-
- if (poll_in) {
- new_sock = accept (priv->sock, SA (&new_sockaddr),
- &addrlen);
-
- if (new_sock == -1)
- goto unlock;
-
- if (!priv->bio) {
- ret = __socket_nonblock (new_sock);
-
- if (ret == -1) {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "NBIO on %d failed (%s)",
- new_sock, strerror (errno));
- close (new_sock);
- goto unlock;
- }
- }
-
- new_trans = CALLOC (1, sizeof (*new_trans));
- new_trans->xl = this->xl;
- new_trans->fini = this->fini;
-
- memcpy (&new_trans->peerinfo.sockaddr, &new_sockaddr,
- addrlen);
- new_trans->peerinfo.sockaddr_len = addrlen;
-
- new_trans->myinfo.sockaddr_len =
- sizeof (new_trans->myinfo.sockaddr);
-
- ret = getsockname (new_sock,
- SA (&new_trans->myinfo.sockaddr),
- &new_trans->myinfo.sockaddr_len);
- if (ret == -1) {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "getsockname on %d failed (%s)",
- new_sock, strerror (errno));
- close (new_sock);
- goto unlock;
- }
-
- get_transport_identifiers (new_trans);
- socket_init (new_trans);
- new_trans->ops = this->ops;
- new_trans->init = this->init;
- new_trans->fini = this->fini;
-
- new_priv = new_trans->private;
-
- pthread_mutex_lock (&new_priv->lock);
- {
- new_priv->sock = new_sock;
- new_priv->connected = 1;
-
- transport_ref (new_trans);
- new_priv->idx =
- event_register (ctx->event_pool,
- new_sock,
- socket_event_handler,
- new_trans, 1, 0);
-
- if (new_priv->idx == -1)
- ret = -1;
- }
- pthread_mutex_unlock (&new_priv->lock);
- }
- }
-unlock:
- pthread_mutex_unlock (&priv->lock);
-
- return ret;
-}
-
-
-int
-socket_disconnect (transport_t *this)
-{
- socket_private_t *priv = NULL;
- int ret = -1;
-
- priv = this->private;
-
- pthread_mutex_lock (&priv->lock);
- {
- ret = __socket_disconnect (this);
- }
- pthread_mutex_unlock (&priv->lock);
-
- return ret;
-}
-
-
-int
-socket_connect (transport_t *this)
-{
- int ret = -1;
- int sock = -1;
- socket_private_t *priv = NULL;
- struct sockaddr_storage sockaddr = {0, };
- socklen_t sockaddr_len = 0;
- glusterfs_ctx_t *ctx = NULL;
-
- priv = this->private;
- ctx = this->xl->ctx;
-
- if (!priv) {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "connect() called on uninitialized transport");
- goto err;
- }
-
- pthread_mutex_lock (&priv->lock);
- {
- sock = priv->sock;
- }
- pthread_mutex_unlock (&priv->lock);
-
- if (sock != -1) {
- gf_log (this->xl->name, GF_LOG_TRACE,
- "connect () called on transport already connected");
- ret = 0;
- goto err;
- }
-
- ret = socket_client_get_remote_sockaddr (this, SA (&sockaddr),
- &sockaddr_len);
- if (ret == -1) {
- /* logged inside client_get_remote_sockaddr */
- goto err;
- }
-
- pthread_mutex_lock (&priv->lock);
- {
- if (priv->sock != -1) {
- gf_log (this->xl->name, GF_LOG_TRACE,
- "connect() -- already connected");
- goto unlock;
- }
-
- memcpy (&this->peerinfo.sockaddr, &sockaddr, sockaddr_len);
- this->peerinfo.sockaddr_len = sockaddr_len;
-
- priv->sock = socket (SA (&sockaddr)->sa_family,
- SOCK_STREAM, 0);
-
- if (priv->sock == -1) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "socket creation failed (%s)",
- strerror (errno));
- goto unlock;
- }
-
- /* Cant help if setting socket options fails. We can continue
- * working nonetheless.
- */
- if (setsockopt (priv->sock, SOL_SOCKET, SO_RCVBUF,
- &priv->windowsize,
- sizeof (priv->windowsize)) < 0) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "setting receive window size failed: %d: %d: "
- "%s", priv->sock, priv->windowsize,
- strerror (errno));
- }
-
- if (setsockopt (priv->sock, SOL_SOCKET, SO_SNDBUF,
- &priv->windowsize,
- sizeof (priv->windowsize)) < 0) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "setting send window size failed: %d: %d: "
- "%s", priv->sock, priv->windowsize,
- strerror (errno));
- }
-
- if (!priv->bio) {
- ret = __socket_nonblock (priv->sock);
-
- if (ret == -1) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "NBIO on %d failed (%s)",
- priv->sock, strerror (errno));
- close (priv->sock);
- priv->sock = -1;
- goto unlock;
- }
- }
-
- SA (&this->myinfo.sockaddr)->sa_family =
- SA (&this->peerinfo.sockaddr)->sa_family;
-
- ret = client_bind (this, SA (&this->myinfo.sockaddr),
- &this->myinfo.sockaddr_len, priv->sock);
- if (ret == -1) {
- gf_log (this->xl->name, GF_LOG_WARNING,
- "client bind failed: %s", strerror (errno));
- close (priv->sock);
- priv->sock = -1;
- goto unlock;
- }
-
- ret = connect (priv->sock, SA (&this->peerinfo.sockaddr),
- this->peerinfo.sockaddr_len);
-
- if (ret == -1 && errno != EINPROGRESS) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "connection attempt failed (%s)",
- strerror (errno));
- close (priv->sock);
- priv->sock = -1;
- goto unlock;
- }
-
- priv->connected = 0;
-
- transport_ref (this);
-
- priv->idx = event_register (ctx->event_pool, priv->sock,
- socket_event_handler, this, 1, 1);
- if (priv->idx == -1)
- ret = -1;
- }
-unlock:
- pthread_mutex_unlock (&priv->lock);
-
-err:
- return ret;
-}
-
-
-int
-socket_listen (transport_t *this)
-{
- socket_private_t * priv = NULL;
- int ret = -1;
- int sock = -1;
- struct sockaddr_storage sockaddr;
- socklen_t sockaddr_len;
- peer_info_t *myinfo = NULL;
- glusterfs_ctx_t *ctx = NULL;
-
- priv = this->private;
- myinfo = &this->myinfo;
- ctx = this->xl->ctx;
-
- pthread_mutex_lock (&priv->lock);
- {
- sock = priv->sock;
- }
- pthread_mutex_unlock (&priv->lock);
-
- if (sock != -1) {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "alreading listening");
- return ret;
- }
-
- ret = socket_server_get_local_sockaddr (this, SA (&sockaddr),
- &sockaddr_len);
-
- if (ret == -1) {
- return ret;
- }
-
- pthread_mutex_lock (&priv->lock);
- {
- if (priv->sock != -1) {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "already listening");
- goto unlock;
- }
-
- memcpy (&myinfo->sockaddr, &sockaddr, sockaddr_len);
- myinfo->sockaddr_len = sockaddr_len;
-
- priv->sock = socket (SA (&sockaddr)->sa_family,
- SOCK_STREAM, 0);
-
- if (priv->sock == -1) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "socket creation failed (%s)",
- strerror (errno));
- goto unlock;
- }
-
- /* Cant help if setting socket options fails. We can continue
- * working nonetheless.
- */
- if (setsockopt (priv->sock, SOL_SOCKET, SO_RCVBUF,
- &priv->windowsize,
- sizeof (priv->windowsize)) < 0) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "setting receive window size failed: %d: %d: "
- "%s", priv->sock, priv->windowsize,
- strerror (errno));
- }
-
- if (setsockopt (priv->sock, SOL_SOCKET, SO_SNDBUF,
- &priv->windowsize,
- sizeof (priv->windowsize)) < 0) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "setting send window size failed: %d: %d: "
- "%s", priv->sock, priv->windowsize,
- strerror (errno));
- }
-
- if (!priv->bio) {
- ret = __socket_nonblock (priv->sock);
-
- if (ret == -1) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "NBIO on %d failed (%s)",
- priv->sock, strerror (errno));
- close (priv->sock);
- priv->sock = -1;
- goto unlock;
- }
- }
-
- ret = __socket_server_bind (this);
-
- if (ret == -1) {
- /* logged inside __socket_server_bind() */
- close (priv->sock);
- priv->sock = -1;
- goto unlock;
- }
-
- ret = listen (priv->sock, 10);
-
- if (ret == -1) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "could not set socket %d to listen mode (%s)",
- priv->sock, strerror (errno));
- close (priv->sock);
- priv->sock = -1;
- goto unlock;
- }
-
- transport_ref (this);
-
- priv->idx = event_register (ctx->event_pool, priv->sock,
- socket_server_event_handler,
- this, 1, 0);
-
- if (priv->idx == -1) {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "could not register socket %d with events",
- priv->sock);
- ret = -1;
- close (priv->sock);
- priv->sock = -1;
- goto unlock;
- }
- }
-unlock:
- pthread_mutex_unlock (&priv->lock);
-
- return ret;
-}
-
-
-int
-socket_receive (transport_t *this, char **hdr_p, size_t *hdrlen_p,
- struct iobuf **iobuf_p)
-{
- socket_private_t *priv = NULL;
- int ret = -1;
-
- priv = this->private;
-
- pthread_mutex_lock (&priv->lock);
- {
- if (priv->connected != 1) {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "socket not connected to receive");
- goto unlock;
- }
-
- if (!hdr_p || !hdrlen_p || !iobuf_p) {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "bad parameters %p %p %p",
- hdr_p, hdrlen_p, iobuf_p);
- goto unlock;
- }
-
- if (priv->incoming.state == SOCKET_PROTO_STATE_COMPLETE) {
- *hdr_p = priv->incoming.hdr_p;
- *hdrlen_p = priv->incoming.hdrlen;
- *iobuf_p = priv->incoming.iobuf;
-
- memset (&priv->incoming, 0, sizeof (priv->incoming));
- priv->incoming.state = SOCKET_PROTO_STATE_NADA;
-
- ret = 0;
- }
- }
-unlock:
- pthread_mutex_unlock (&priv->lock);
-
- return ret;
-}
-
-
-/* TODO: implement per transfer limit */
-int
-socket_submit (transport_t *this, char *buf, int len,
- struct iovec *vector, int count,
- struct iobref *iobref)
-{
- socket_private_t *priv = NULL;
- int ret = -1;
- char need_poll_out = 0;
- char need_append = 1;
- struct ioq *entry = NULL;
- glusterfs_ctx_t *ctx = NULL;
-
- priv = this->private;
- ctx = this->xl->ctx;
-
- pthread_mutex_lock (&priv->lock);
- {
- if (priv->connected != 1) {
- if (!priv->submit_log && !priv->connect_finish_log) {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "not connected (priv->connected = %d)",
- priv->connected);
- priv->submit_log = 1;
- }
- goto unlock;
- }
-
- priv->submit_log = 0;
- entry = __socket_ioq_new (this, buf, len, vector, count, iobref);
-
- if (list_empty (&priv->ioq)) {
- ret = __socket_ioq_churn_entry (this, entry);
-
- if (ret == 0)
- need_append = 0;
-
- if (ret > 0)
- need_poll_out = 1;
- }
-
- if (need_append) {
- list_add_tail (&entry->list, &priv->ioq);
- ret = 0;
- }
-
- if (need_poll_out) {
- /* first entry to wait. continue writing on POLLOUT */
- priv->idx = event_select_on (ctx->event_pool,
- priv->sock,
- priv->idx, -1, 1);
- }
- }
-unlock:
- pthread_mutex_unlock (&priv->lock);
-
- return ret;
-}
-
-
-struct transport_ops tops = {
- .listen = socket_listen,
- .connect = socket_connect,
- .disconnect = socket_disconnect,
- .submit = socket_submit,
- .receive = socket_receive
-};
-
-
-int
-socket_init (transport_t *this)
-{
- socket_private_t *priv = NULL;
- gf_boolean_t tmp_bool = 0;
- uint64_t windowsize = GF_DEFAULT_SOCKET_WINDOW_SIZE;
- char *optstr = NULL;
-
- if (this->private) {
- gf_log (this->xl->name, GF_LOG_DEBUG,
- "double init attempted");
- return -1;
- }
-
- priv = CALLOC (1, sizeof (*priv));
- if (!priv) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "calloc (1, %"GF_PRI_SIZET") returned NULL",
- sizeof (*priv));
- return -1;
- }
-
- pthread_mutex_init (&priv->lock, NULL);
-
- priv->sock = -1;
- priv->idx = -1;
- priv->connected = -1;
-
- INIT_LIST_HEAD (&priv->ioq);
-
- if (dict_get (this->xl->options, "non-blocking-io")) {
- optstr = data_to_str (dict_get (this->xl->options,
- "non-blocking-io"));
-
- if (gf_string2boolean (optstr, &tmp_bool) == -1) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "'non-blocking-io' takes only boolean options,"
- " not taking any action");
- tmp_bool = 1;
- }
- priv->bio = 0;
- if (!tmp_bool) {
- priv->bio = 1;
- gf_log (this->xl->name, GF_LOG_WARNING,
- "disabling non-blocking IO");
- }
- }
-
- optstr = NULL;
- if (dict_get_str (this->xl->options, "transport.window-size",
- &optstr) == 0) {
- if (gf_string2bytesize (optstr, &windowsize) != 0) {
- gf_log (this->xl->name, GF_LOG_ERROR,
- "invalid number format: %s", optstr);
- return -1;
- }
- }
- priv->windowsize = (int)windowsize;
- this->private = priv;
-
- return 0;
-}
-
-
-void
-fini (transport_t *this)
-{
- socket_private_t *priv = this->private;
-
- gf_log (this->xl->name, GF_LOG_TRACE,
- "transport %p destroyed", this);
-
- pthread_mutex_destroy (&priv->lock);
- FREE (priv);
-}
-
-
-int32_t
-init (transport_t *this)
-{
- int ret = -1;
-
- ret = socket_init (this);
-
- if (ret == -1) {
- gf_log (this->xl->name, GF_LOG_DEBUG, "socket_init() failed");
- }
-
- return ret;
-}
-
-struct volume_options options[] = {
- { .key = {"remote-port",
- "transport.remote-port",
- "transport.socket.remote-port"},
- .type = GF_OPTION_TYPE_INT
- },
- { .key = {"transport.socket.listen-port", "listen-port"},
- .type = GF_OPTION_TYPE_INT
- },
- { .key = {"transport.socket.bind-address", "bind-address" },
- .type = GF_OPTION_TYPE_INTERNET_ADDRESS
- },
- { .key = {"transport.socket.connect-path", "connect-path"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {"transport.socket.bind-path", "bind-path"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {"transport.socket.listen-path", "listen-path"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = { "transport.address-family",
- "address-family" },
- .value = {"inet", "inet6", "inet/inet6", "inet6/inet",
- "unix", "inet-sdp" },
- .type = GF_OPTION_TYPE_STR
- },
-
- { .key = {"non-blocking-io"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {"transport.window-size"},
- .type = GF_OPTION_TYPE_SIZET,
- .min = GF_MIN_SOCKET_WINDOW_SIZE,
- .max = GF_MAX_SOCKET_WINDOW_SIZE,
- },
- { .key = {NULL} }
-};
-
diff --git a/transport/socket/src/socket.h b/transport/socket/src/socket.h
deleted file mode 100644
index 50d7d0303..000000000
--- a/transport/socket/src/socket.h
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _SOCKET_H
-#define _SOCKET_H
-
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "event.h"
-#include "transport.h"
-#include "logging.h"
-#include "dict.h"
-#include "mem-pool.h"
-
-#ifndef MAX_IOVEC
-#define MAX_IOVEC 16
-#endif /* MAX_IOVEC */
-
-#define GF_DEFAULT_SOCKET_LISTEN_PORT 6996
-
-/* This is the size set through setsockopt for
- * both the TCP receive window size and the
- * send buffer size.
- * Till the time iobuf size becomes configurable, this size is set to include
- * two iobufs + the GlusterFS protocol headers.
- * Linux allows us to over-ride the max values for the system.
- * Should we over-ride them? Because if we set a value larger than the default
- * setsockopt will fail. Having larger values might be beneficial for
- * IB links.
- */
-#define GF_DEFAULT_SOCKET_WINDOW_SIZE (512 * GF_UNIT_KB)
-#define GF_MAX_SOCKET_WINDOW_SIZE (1 * GF_UNIT_MB)
-#define GF_MIN_SOCKET_WINDOW_SIZE (128 * GF_UNIT_KB)
-
-typedef enum {
- SOCKET_PROTO_STATE_NADA = 0,
- SOCKET_PROTO_STATE_HEADER_COMING,
- SOCKET_PROTO_STATE_HEADER_CAME,
- SOCKET_PROTO_STATE_DATA_COMING,
- SOCKET_PROTO_STATE_DATA_CAME,
- SOCKET_PROTO_STATE_COMPLETE,
-} socket_proto_state_t;
-
-struct socket_header {
- char colonO[3];
- uint32_t size1;
- uint32_t size2;
- char version;
-} __attribute__((packed));
-
-
-struct ioq {
- union {
- struct list_head list;
- struct {
- struct ioq *next;
- struct ioq *prev;
- };
- };
- struct socket_header header;
- struct iovec vector[MAX_IOVEC];
- int count;
- struct iovec *pending_vector;
- int pending_count;
- char *buf;
- struct iobref *iobref;
-};
-
-
-typedef struct {
- int32_t sock;
- int32_t idx;
- unsigned char connected; // -1 = not connected. 0 = in progress. 1 = connected
- char bio;
- char connect_finish_log;
- char submit_log;
- union {
- struct list_head ioq;
- struct {
- struct ioq *ioq_next;
- struct ioq *ioq_prev;
- };
- };
- struct {
- int state;
- struct socket_header header;
- char *hdr_p;
- size_t hdrlen;
- struct iobuf *iobuf;
- char *buf_p;
- size_t buflen;
- struct iovec vector[2];
- int count;
- struct iovec *pending_vector;
- int pending_count;
- } incoming;
- pthread_mutex_t lock;
- int windowsize;
-} socket_private_t;
-
-
-#endif
diff --git a/xlators/Makefile.am b/xlators/Makefile.am
index 2abb52194..f60fa85ce 100644
--- a/xlators/Makefile.am
+++ b/xlators/Makefile.am
@@ -1,3 +1,4 @@
-SUBDIRS = cluster storage protocol performance debug features encryption mount
+SUBDIRS = cluster storage protocol performance debug features encryption mount nfs mgmt system \
+ playground
-CLEANFILES =
+CLEANFILES =
diff --git a/xlators/bindings/python/src/Makefile.am b/xlators/bindings/python/src/Makefile.am
index c0b9141c6..90370d861 100644
--- a/xlators/bindings/python/src/Makefile.am
+++ b/xlators/bindings/python/src/Makefile.am
@@ -9,7 +9,7 @@ pythondir = $(xlatordir)/python
python_so_SOURCES = python.c
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall \
+AM_CFLAGS = -fPIC $(GF_CPPFLAGS) -Wall \
-I$(top_srcdir)/libglusterfs/src -shared -nostartfiles \
$(PYTHON_CPPLAGS) -DGLUSTER_PYTHON_PATH=\"$(pythondir)\"
diff --git a/xlators/bindings/python/src/gluster.py b/xlators/bindings/python/src/gluster.py
index ee0eb1310..337c983ec 100644
--- a/xlators/bindings/python/src/gluster.py
+++ b/xlators/bindings/python/src/gluster.py
@@ -1,19 +1,12 @@
-# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
-# This file is part of GlusterFS.
-#
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-#
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
+
+# Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
from ctypes import *
from glustertypes import *
from glusterstack import *
diff --git a/xlators/bindings/python/src/glusterstack.py b/xlators/bindings/python/src/glusterstack.py
index ba24c8165..0c071ae98 100644
--- a/xlators/bindings/python/src/glusterstack.py
+++ b/xlators/bindings/python/src/glusterstack.py
@@ -1,19 +1,12 @@
-# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
-# This file is part of GlusterFS.
-#
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-#
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
+
+# Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
from ctypes import *
from glustertypes import *
diff --git a/xlators/bindings/python/src/glustertypes.py b/xlators/bindings/python/src/glustertypes.py
index e9069d07c..98437d22e 100644
--- a/xlators/bindings/python/src/glustertypes.py
+++ b/xlators/bindings/python/src/glustertypes.py
@@ -1,19 +1,12 @@
-# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
-# This file is part of GlusterFS.
-#
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-#
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
+
+# Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
from ctypes import *
import collections
diff --git a/xlators/bindings/python/src/python.c b/xlators/bindings/python/src/python.c
index 7f32d7f29..9b96790de 100644
--- a/xlators/bindings/python/src/python.c
+++ b/xlators/bindings/python/src/python.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2007-2009 Chris AtLee <chris@atlee.ca>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#include <Python.h>
#ifndef _CONFIG_H
@@ -45,7 +35,7 @@ python_writev (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
struct iovec *vector,
- int32_t count,
+ int32_t count,
off_t offset)
{
python_private_t *priv = (python_private_t *)this->private;
@@ -77,9 +67,6 @@ struct xlator_fops fops = {
.writev = python_writev
};
-struct xlator_mops mops = {
-};
-
static PyObject *
AnonModule_FromFile (const char* fname)
{
@@ -151,7 +138,7 @@ init (xlator_t *this)
Py_InitializeEx(0);
if (!this->children) {
- gf_log ("python", GF_LOG_ERROR,
+ gf_log ("python", GF_LOG_ERROR,
"FATAL: python should have exactly one child");
return -1;
}
@@ -169,7 +156,7 @@ init (xlator_t *this)
}
priv->pInterp = Py_NewInterpreter();
-
+
// Adjust python's path
PyObject *syspath = PySys_GetObject("path");
PyObject *path = PyString_FromString(GLUSTER_PYTHON_PATH);
@@ -191,7 +178,7 @@ init (xlator_t *this)
priv->pVectorType = PyObject_GetAttrString(priv->pGlusterModule, "iovec");
gf_log("python", GF_LOG_DEBUG, "Loading script...%s", priv->scriptname);
-
+
priv->pScriptModule = AnonModule_FromFile(priv->scriptname);
if (!priv->pScriptModule || PyErr_Occurred())
{
@@ -220,7 +207,7 @@ init (xlator_t *this)
return 0;
}
-void
+void
fini (xlator_t *this)
{
python_private_t *priv = (python_private_t*)(this->private);
diff --git a/xlators/bindings/python/src/testxlator.py b/xlators/bindings/python/src/testxlator.py
index 507455c85..59a991dca 100644
--- a/xlators/bindings/python/src/testxlator.py
+++ b/xlators/bindings/python/src/testxlator.py
@@ -1,19 +1,12 @@
-# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
-# This file is part of GlusterFS.
-#
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-#
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
+"""
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+"""
"""
This is a test translator written in python.
diff --git a/xlators/cluster/Makefile.am b/xlators/cluster/Makefile.am
index a6ddb3564..0990822a7 100644
--- a/xlators/cluster/Makefile.am
+++ b/xlators/cluster/Makefile.am
@@ -1,3 +1,3 @@
-SUBDIRS = unify stripe afr dht ha map
+SUBDIRS = stripe afr dht
CLEANFILES =
diff --git a/xlators/cluster/afr/src/Makefile.am b/xlators/cluster/afr/src/Makefile.am
index 1bde9e5ba..35d18a6c0 100644
--- a/xlators/cluster/afr/src/Makefile.am
+++ b/xlators/cluster/afr/src/Makefile.am
@@ -1,20 +1,37 @@
-xlator_LTLIBRARIES = afr.la
+xlator_LTLIBRARIES = afr.la pump.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster
-afr_la_LDFLAGS = -module -avoidversion
+afr_common_source = afr-dir-read.c afr-dir-write.c afr-inode-read.c \
+ afr-inode-write.c afr-open.c afr-transaction.c afr-self-heal-data.c \
+ afr-self-heal-common.c afr-self-heal-metadata.c afr-self-heal-entry.c \
+ afr-self-heal-algorithm.c afr-lk-common.c afr-self-heald.c \
+ $(top_builddir)/xlators/lib/src/libxlator.c
-afr_la_SOURCES = afr.c afr-dir-read.c afr-dir-write.c afr-inode-read.c afr-inode-write.c afr-transaction.c afr-self-heal-data.c afr-self-heal-common.c afr-self-heal-metadata.c afr-self-heal-entry.c
+afr_la_LDFLAGS = -module -avoid-version
+afr_la_SOURCES = $(afr_common_source) afr.c
afr_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = afr.h afr-transaction.h afr-inode-write.h afr-inode-read.h afr-dir-read.h afr-dir-write.h afr-self-heal.h afr-self-heal-common.h
+pump_la_LDFLAGS = -module -avoid-version
+pump_la_SOURCES = $(afr_common_source) pump.c
+pump_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+noinst_HEADERS = afr.h afr-transaction.h afr-inode-write.h afr-inode-read.h \
+ afr-dir-read.h afr-dir-write.h afr-self-heal.h afr-self-heal-common.h \
+ afr-self-heal-algorithm.h pump.h afr-mem-types.h afr-common.c \
+ afr-self-heald.h $(top_builddir)/xlators/lib/src/libxlator.h \
+ $(top_builddir)/glusterfsd/src/glusterfsd.h
-CLEANFILES =
+AM_CPPFLAGS = $(GF_CPPFLAGS) \
+ -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/xlators/lib/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
uninstall-local:
rm -f $(DESTDIR)$(xlatordir)/replicate.so
+ rm -f $(DESTDIR)$(xlatordir)/pump.so
install-data-hook:
- ln -sf afr.so $(DESTDIR)$(xlatordir)/replicate.so \ No newline at end of file
+ ln -sf afr.so $(DESTDIR)$(xlatordir)/replicate.so
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
new file mode 100644
index 000000000..af01f2ef2
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -0,0 +1,4591 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <libgen.h>
+#include <unistd.h>
+#include <fnmatch.h>
+#include <sys/time.h>
+#include <stdlib.h>
+#include <signal.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "afr.h"
+#include "dict.h"
+#include "xlator.h"
+#include "hashfn.h"
+#include "logging.h"
+#include "stack.h"
+#include "list.h"
+#include "call-stub.h"
+#include "defaults.h"
+#include "common-utils.h"
+#include "compat-errno.h"
+#include "compat.h"
+#include "byte-order.h"
+#include "statedump.h"
+#include "inode.h"
+
+#include "fd.h"
+
+#include "afr-inode-read.h"
+#include "afr-inode-write.h"
+#include "afr-dir-read.h"
+#include "afr-dir-write.h"
+#include "afr-transaction.h"
+#include "afr-self-heal.h"
+#include "afr-self-heal-common.h"
+#include "afr-self-heald.h"
+#include "pump.h"
+
+#define AFR_ICTX_OPENDIR_DONE_MASK 0x0000000100000000ULL
+#define AFR_ICTX_READ_CHILD_MASK 0x00000000FFFFFFFFULL
+#define AFR_STATISTICS_HISTORY_SIZE 50
+int
+afr_lookup_done_success_action (call_frame_t *frame, xlator_t *this,
+ gf_boolean_t fail_conflict);
+void
+afr_children_copy (int32_t *dst, int32_t *src, unsigned int child_count)
+{
+ int i = 0;
+
+ for (i = 0; i < child_count; i++)
+ dst[i] = src[i];
+}
+
+void
+afr_xattr_req_prepare (xlator_t *this, dict_t *xattr_req, const char *path)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ int ret = 0;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ ret = dict_set_uint64 (xattr_req, priv->pending_key[i],
+ 3 * sizeof(int32_t));
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: Unable to set dict value for %s",
+ path, priv->pending_key[i]);
+ /* 3 = data+metadata+entry */
+ }
+ ret = dict_set_int32 (xattr_req, GF_GFIDLESS_LOOKUP, 1);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG, "%s: failed to set gfidless "
+ "lookup", path);
+ }
+}
+
+int
+afr_lookup_xattr_req_prepare (afr_local_t *local, xlator_t *this,
+ dict_t *xattr_req, loc_t *loc, void **gfid_req)
+{
+ int ret = -ENOMEM;
+
+ GF_ASSERT (gfid_req);
+
+ *gfid_req = NULL;
+ local->xattr_req = dict_new ();
+ if (!local->xattr_req)
+ goto out;
+ if (xattr_req)
+ dict_copy (xattr_req, local->xattr_req);
+
+ afr_xattr_req_prepare (this, local->xattr_req, loc->path);
+ ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_INODELK_COUNT, 0);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: Unable to set dict value for %s",
+ loc->path, GLUSTERFS_INODELK_COUNT);
+ }
+ ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_ENTRYLK_COUNT, 0);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: Unable to set dict value for %s",
+ loc->path, GLUSTERFS_ENTRYLK_COUNT);
+ }
+
+ ret = dict_set_uint32 (local->xattr_req, GLUSTERFS_PARENT_ENTRYLK, 0);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: Unable to set dict value for %s",
+ loc->path, GLUSTERFS_PARENT_ENTRYLK);
+ }
+
+ ret = dict_get_ptr (local->xattr_req, "gfid-req", gfid_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s: failed to get the gfid from dict", loc->path);
+ *gfid_req = NULL;
+ } else {
+ if (loc->parent != NULL)
+ dict_del (local->xattr_req, "gfid-req");
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+void
+afr_lookup_save_gfid (uuid_t dst, void* new, const loc_t *loc)
+{
+ inode_t *inode = NULL;
+
+ inode = loc->inode;
+ if (inode && !uuid_is_null (inode->gfid))
+ uuid_copy (dst, inode->gfid);
+ else if (!uuid_is_null (loc->gfid))
+ uuid_copy (dst, loc->gfid);
+ else if (new && !uuid_is_null (new))
+ uuid_copy (dst, new);
+}
+
+int
+afr_errno_count (int32_t *children, int *child_errno,
+ unsigned int child_count, int32_t op_errno)
+{
+ int i = 0;
+ int errno_count = 0;
+ int child = 0;
+
+ for (i = 0; i < child_count; i++) {
+ if (children) {
+ child = children[i];
+ if (child == -1)
+ break;
+ } else {
+ child = i;
+ }
+ if (child_errno[child] == op_errno)
+ errno_count++;
+ }
+ return errno_count;
+}
+
+int32_t
+afr_set_dict_gfid (dict_t *dict, uuid_t gfid)
+{
+ int ret = 0;
+ uuid_t *pgfid = NULL;
+
+ GF_ASSERT (gfid);
+
+ pgfid = GF_CALLOC (1, sizeof (uuid_t), gf_common_mt_char);
+ if (!pgfid) {
+ ret = -1;
+ goto out;
+ }
+
+ uuid_copy (*pgfid, gfid);
+
+ ret = dict_set_dynptr (dict, "gfid-req", pgfid, sizeof (uuid_t));
+ if (ret)
+ gf_log (THIS->name, GF_LOG_ERROR, "gfid set failed");
+
+out:
+ if (ret && pgfid)
+ GF_FREE (pgfid);
+
+ return ret;
+}
+
+void
+afr_inode_ctx_destroy (afr_inode_ctx_t *ctx)
+{
+ if (!ctx)
+ return;
+ GF_FREE (ctx->fresh_children);
+ GF_FREE (ctx);
+}
+
+afr_inode_ctx_t*
+__afr_inode_ctx_get (inode_t *inode, xlator_t *this)
+{
+ int ret = 0;
+ uint64_t ctx_addr = 0;
+ afr_inode_ctx_t *ctx = NULL;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ ret = __inode_ctx_get (inode, this, &ctx_addr);
+ if (ret < 0)
+ ctx_addr = 0;
+ if (ctx_addr != 0) {
+ ctx = (afr_inode_ctx_t*) (long) ctx_addr;
+ goto out;
+ }
+ ctx = GF_CALLOC (1, sizeof (*ctx),
+ gf_afr_mt_inode_ctx_t);
+ if (!ctx)
+ goto fail;
+ ctx->fresh_children = GF_CALLOC (priv->child_count,
+ sizeof (*ctx->fresh_children),
+ gf_afr_mt_int32_t);
+ if (!ctx->fresh_children)
+ goto fail;
+ ret = __inode_ctx_put (inode, this, (uint64_t)ctx);
+ if (ret) {
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "failed to "
+ "set the inode ctx (%s)",
+ uuid_utoa (inode->gfid));
+ goto fail;
+ }
+
+out:
+ return ctx;
+
+fail:
+ afr_inode_ctx_destroy (ctx);
+ return NULL;
+}
+
+afr_inode_ctx_t*
+afr_inode_ctx_get (inode_t *inode, xlator_t *this)
+{
+ afr_inode_ctx_t *ctx = NULL;
+
+ LOCK (&inode->lock);
+ {
+ ctx = __afr_inode_ctx_get (inode, this);
+ }
+ UNLOCK (&inode->lock);
+ return ctx;
+}
+
+void
+afr_inode_get_ctx_params (xlator_t *this, inode_t *inode,
+ afr_inode_params_t *params)
+{
+ GF_ASSERT (inode);
+ GF_ASSERT (params);
+
+ afr_inode_ctx_t *ctx = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int32_t read_child = -1;
+ int32_t *fresh_children = NULL;
+
+ priv = this->private;
+ LOCK (&inode->lock);
+ {
+ ctx = __afr_inode_ctx_get (inode, this);
+ if (!ctx)
+ goto unlock;
+ switch (params->op) {
+ case AFR_INODE_GET_READ_CTX:
+ fresh_children = params->u.read_ctx.children;
+ read_child = (int32_t)(ctx->masks &
+ AFR_ICTX_READ_CHILD_MASK);
+ params->u.read_ctx.read_child = read_child;
+ if (!fresh_children)
+ goto unlock;
+ for (i = 0; i < priv->child_count; i++)
+ fresh_children[i] = ctx->fresh_children[i];
+ break;
+ case AFR_INODE_GET_OPENDIR_DONE:
+ params->u.value = _gf_false;
+ if (ctx->masks & AFR_ICTX_OPENDIR_DONE_MASK)
+ params->u.value = _gf_true;
+ break;
+ default:
+ GF_ASSERT (0);
+ break;
+ }
+ }
+unlock:
+ UNLOCK (&inode->lock);
+}
+
+gf_boolean_t
+afr_is_split_brain (xlator_t *this, inode_t *inode)
+{
+ afr_inode_ctx_t *ctx = NULL;
+ gf_boolean_t spb = _gf_false;
+
+ ctx = afr_inode_ctx_get (inode, this);
+ if (!ctx)
+ goto out;
+ if ((ctx->mdata_spb == SPB) || (ctx->data_spb == SPB))
+ spb = _gf_true;
+out:
+ return spb;
+}
+
+gf_boolean_t
+afr_is_opendir_done (xlator_t *this, inode_t *inode)
+{
+ afr_inode_params_t params = {0};
+
+ params.op = AFR_INODE_GET_OPENDIR_DONE;
+ afr_inode_get_ctx_params (this, inode, &params);
+ return params.u.value;
+}
+
+int32_t
+afr_inode_get_read_ctx (xlator_t *this, inode_t *inode, int32_t *fresh_children)
+{
+ afr_inode_params_t params = {0};
+
+ params.op = AFR_INODE_GET_READ_CTX;
+ params.u.read_ctx.children = fresh_children;
+ afr_inode_get_ctx_params (this, inode, &params);
+ return params.u.read_ctx.read_child;
+}
+
+void
+afr_inode_ctx_set_read_child (afr_inode_ctx_t *ctx, int32_t read_child)
+{
+ uint64_t remaining_mask = 0;
+ uint64_t mask = 0;
+
+ remaining_mask = (~AFR_ICTX_READ_CHILD_MASK & ctx->masks);
+ mask = (AFR_ICTX_READ_CHILD_MASK & read_child);
+ ctx->masks = remaining_mask | mask;
+}
+
+void
+afr_inode_ctx_set_read_ctx (afr_inode_ctx_t *ctx, int32_t read_child,
+ int32_t *fresh_children, int32_t child_count)
+{
+ int i = 0;
+
+ afr_inode_ctx_set_read_child (ctx, read_child);
+ for (i = 0; i < child_count; i++) {
+ if (fresh_children)
+ ctx->fresh_children[i] = fresh_children[i];
+ else
+ ctx->fresh_children[i] = -1;
+ }
+}
+
+void
+afr_inode_ctx_rm_stale_children (afr_inode_ctx_t *ctx, int32_t *stale_children,
+ int32_t child_count)
+{
+ int i = 0;
+ int32_t read_child = -1;
+
+ GF_ASSERT (stale_children);
+ for (i = 0; i < child_count; i++) {
+ if (stale_children[i] == -1)
+ break;
+ afr_children_rm_child (ctx->fresh_children,
+ stale_children[i], child_count);
+ }
+ read_child = (int32_t)(ctx->masks & AFR_ICTX_READ_CHILD_MASK);
+ if (!afr_is_child_present (ctx->fresh_children, child_count,
+ read_child))
+ afr_inode_ctx_set_read_child (ctx, ctx->fresh_children[0]);
+}
+
+void
+afr_inode_ctx_set_opendir_done (afr_inode_ctx_t *ctx)
+{
+ uint64_t remaining_mask = 0;
+ uint64_t mask = 0;
+
+ remaining_mask = (~AFR_ICTX_OPENDIR_DONE_MASK & ctx->masks);
+ mask = (0xFFFFFFFFFFFFFFFFULL & AFR_ICTX_OPENDIR_DONE_MASK);
+ ctx->masks = remaining_mask | mask;
+}
+
+void
+afr_inode_set_ctx_params (xlator_t *this, inode_t *inode,
+ afr_inode_params_t *params)
+{
+ GF_ASSERT (inode);
+ GF_ASSERT (params);
+
+ afr_inode_ctx_t *ctx = NULL;
+ afr_private_t *priv = NULL;
+ int32_t read_child = -1;
+ int32_t *fresh_children = NULL;
+ int32_t *stale_children = NULL;
+
+ priv = this->private;
+ LOCK (&inode->lock);
+ {
+ ctx = __afr_inode_ctx_get (inode, this);
+ if (!ctx)
+ goto unlock;
+ switch (params->op) {
+ case AFR_INODE_SET_READ_CTX:
+ read_child = params->u.read_ctx.read_child;
+ fresh_children = params->u.read_ctx.children;
+ afr_inode_ctx_set_read_ctx (ctx, read_child,
+ fresh_children,
+ priv->child_count);
+ break;
+ case AFR_INODE_RM_STALE_CHILDREN:
+ stale_children = params->u.read_ctx.children;
+ afr_inode_ctx_rm_stale_children (ctx,
+ stale_children,
+ priv->child_count);
+ break;
+ case AFR_INODE_SET_OPENDIR_DONE:
+ afr_inode_ctx_set_opendir_done (ctx);
+ break;
+ default:
+ GF_ASSERT (0);
+ break;
+ }
+ }
+unlock:
+ UNLOCK (&inode->lock);
+}
+
+void
+afr_set_split_brain (xlator_t *this, inode_t *inode, afr_spb_state_t mdata_spb,
+ afr_spb_state_t data_spb)
+{
+ afr_inode_ctx_t *ctx = NULL;
+
+ ctx = afr_inode_ctx_get (inode, this);
+ if (mdata_spb != DONT_KNOW)
+ ctx->mdata_spb = mdata_spb;
+ if (data_spb != DONT_KNOW)
+ ctx->data_spb = data_spb;
+}
+
+void
+afr_set_opendir_done (xlator_t *this, inode_t *inode)
+{
+ afr_inode_params_t params = {0};
+
+ params.op = AFR_INODE_SET_OPENDIR_DONE;
+ afr_inode_set_ctx_params (this, inode, &params);
+}
+
+void
+afr_inode_set_read_ctx (xlator_t *this, inode_t *inode, int32_t read_child,
+ int32_t *fresh_children)
+{
+ afr_inode_params_t params = {0};
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ GF_ASSERT (read_child >= 0);
+ GF_ASSERT (fresh_children);
+ GF_ASSERT (afr_is_child_present (fresh_children, priv->child_count,
+ read_child));
+
+ params.op = AFR_INODE_SET_READ_CTX;
+ params.u.read_ctx.read_child = read_child;
+ params.u.read_ctx.children = fresh_children;
+ afr_inode_set_ctx_params (this, inode, &params);
+}
+
+void
+afr_inode_rm_stale_children (xlator_t *this, inode_t *inode,
+ int32_t *stale_children)
+{
+ afr_inode_params_t params = {0};
+
+ GF_ASSERT (stale_children);
+
+ params.op = AFR_INODE_RM_STALE_CHILDREN;
+ params.u.read_ctx.children = stale_children;
+ afr_inode_set_ctx_params (this, inode, &params);
+}
+
+gf_boolean_t
+afr_is_source_child (int32_t *sources, int32_t child_count, int32_t child)
+{
+ gf_boolean_t source_xattrs = _gf_false;
+
+ GF_ASSERT (child < child_count);
+
+ if ((child >= 0) && (child < child_count) &&
+ sources[child]) {
+ source_xattrs = _gf_true;
+ }
+ return source_xattrs;
+}
+
+gf_boolean_t
+afr_is_child_present (int32_t *success_children, int32_t child_count,
+ int32_t child)
+{
+ gf_boolean_t success_child = _gf_false;
+ int i = 0;
+
+ GF_ASSERT (child < child_count);
+
+ for (i = 0; i < child_count; i++) {
+ if (success_children[i] == -1)
+ break;
+ if (child == success_children[i]) {
+ success_child = _gf_true;
+ break;
+ }
+ }
+ return success_child;
+}
+
+gf_boolean_t
+afr_is_read_child (int32_t *success_children, int32_t *sources,
+ int32_t child_count, int32_t child)
+{
+ gf_boolean_t success_child = _gf_false;
+ gf_boolean_t source = _gf_false;
+
+ if (child < 0) {
+ return _gf_false;
+ }
+
+ GF_ASSERT (success_children);
+ GF_ASSERT (child_count > 0);
+
+ success_child = afr_is_child_present (success_children, child_count,
+ child);
+ if (!success_child)
+ goto out;
+ if (NULL == sources) {
+ source = _gf_true;
+ goto out;
+ }
+ source = afr_is_source_child (sources, child_count, child);
+out:
+ return (success_child && source);
+}
+
+int32_t
+afr_hash_child (int32_t *success_children, int32_t child_count,
+ unsigned int hmode, uuid_t gfid)
+{
+ uuid_t gfid_copy = {0,};
+ pid_t pid;
+
+ if (!hmode) {
+ return -1;
+ }
+
+ if (gfid) {
+ uuid_copy(gfid_copy,gfid);
+ }
+ if (hmode > 1) {
+ /*
+ * Why getpid? Because it's one of the cheapest calls
+ * available - faster than gethostname etc. - and returns a
+ * constant-length value that's sure to be shorter than a UUID.
+ * It's still very unlikely to be the same across clients, so
+ * it still provides good mixing. We're not trying for
+ * perfection here. All we need is a low probability that
+ * multiple clients won't converge on the same subvolume.
+ */
+ pid = getpid();
+ memcpy (gfid_copy, &pid, sizeof(pid));
+ }
+
+ return SuperFastHash((char *)gfid_copy,
+ sizeof(gfid_copy)) % child_count;
+}
+
+/* If sources is NULL the xattrs are assumed to be of source for all
+ * success_children.
+ */
+int
+afr_select_read_child_from_policy (int32_t *success_children,
+ int32_t child_count, int32_t prev_read_child,
+ int32_t config_read_child, int32_t *sources,
+ unsigned int hmode, uuid_t gfid)
+{
+ int32_t read_child = -1;
+ int i = 0;
+
+ GF_ASSERT (success_children);
+
+ read_child = config_read_child;
+ if (afr_is_read_child (success_children, sources, child_count,
+ read_child))
+ goto out;
+
+ read_child = prev_read_child;
+ if (afr_is_read_child (success_children, sources, child_count,
+ read_child))
+ goto out;
+
+ read_child = afr_hash_child (success_children, child_count,
+ hmode, gfid);
+ if (afr_is_read_child (success_children, sources, child_count,
+ read_child)) {
+ goto out;
+ }
+
+ for (i = 0; i < child_count; i++) {
+ read_child = success_children[i];
+ if (read_child < 0)
+ break;
+ if (afr_is_read_child (success_children, sources, child_count,
+ read_child))
+ goto out;
+ }
+ read_child = -1;
+
+out:
+ return read_child;
+}
+
+/* This function should be used when all the success_children are sources
+ */
+void
+afr_set_read_ctx_from_policy (xlator_t *this, inode_t *inode,
+ int32_t *fresh_children, int32_t prev_read_child,
+ int32_t config_read_child, uuid_t gfid)
+{
+ int read_child = -1;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ read_child = afr_select_read_child_from_policy (fresh_children,
+ priv->child_count,
+ prev_read_child,
+ config_read_child,
+ NULL,
+ priv->hash_mode, gfid);
+ if (read_child >= 0)
+ afr_inode_set_read_ctx (this, inode, read_child,
+ fresh_children);
+}
+
+/* afr_next_call_child ()
+ * This is a common function used by all the read-type fops
+ * This function should not be called with the inode's read_children array.
+ * The fop's handler should make a copy of the inode's read_children,
+ * preferred read_child into the local vars, because while this function is
+ * in execution there is a chance for inode's read_ctx to change.
+ */
+int32_t
+afr_next_call_child (int32_t *fresh_children, unsigned char *child_up,
+ size_t child_count, int32_t *last_index,
+ int32_t read_child)
+{
+ int next_index = 0;
+ int32_t next_call_child = -1;
+
+ GF_ASSERT (last_index);
+
+ next_index = *last_index;
+retry:
+ next_index++;
+ if ((next_index >= child_count) ||
+ (fresh_children[next_index] == -1))
+ goto out;
+ if ((fresh_children[next_index] == read_child) ||
+ (!child_up[fresh_children[next_index]]))
+ goto retry;
+ *last_index = next_index;
+ next_call_child = fresh_children[next_index];
+out:
+ return next_call_child;
+}
+
+ /* This function should not be called with the inode's read_children array.
+ * The fop's handler should make a copy of the inode's read_children,
+ * preferred read_child into the local vars, because while this function is
+ * in execution there is a chance for inode's read_ctx to change.
+ */
+int32_t
+afr_get_call_child (xlator_t *this, unsigned char *child_up, int32_t read_child,
+ int32_t *fresh_children,
+ int32_t *call_child, int32_t *last_index)
+{
+ int ret = 0;
+ afr_private_t *priv = NULL;
+ int i = 0;
+
+ GF_ASSERT (child_up);
+ GF_ASSERT (call_child);
+ GF_ASSERT (last_index);
+ GF_ASSERT (fresh_children);
+
+ if (read_child < 0) {
+ ret = -EIO;
+ goto out;
+ }
+ priv = this->private;
+ *call_child = -1;
+ *last_index = -1;
+
+ if (child_up[read_child]) {
+ *call_child = read_child;
+ } else {
+ for (i = 0; i < priv->child_count; i++) {
+ if (fresh_children[i] == -1)
+ break;
+ if (child_up[fresh_children[i]]) {
+ *call_child = fresh_children[i];
+ ret = 0;
+ break;
+ }
+ }
+
+ if (*call_child == -1) {
+ ret = -ENOTCONN;
+ goto out;
+ }
+
+ *last_index = i;
+ }
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "Returning %d, call_child: %d, "
+ "last_index: %d", ret, *call_child, *last_index);
+ return ret;
+}
+
+void
+afr_reset_xattr (dict_t **xattr, unsigned int child_count)
+{
+ unsigned int i = 0;
+
+ if (!xattr)
+ goto out;
+ for (i = 0; i < child_count; i++) {
+ if (xattr[i]) {
+ dict_unref (xattr[i]);
+ xattr[i] = NULL;
+ }
+ }
+out:
+ return;
+}
+
+void
+afr_xattr_array_destroy (dict_t **xattr, unsigned int child_count)
+{
+ afr_reset_xattr (xattr, child_count);
+ GF_FREE (xattr);
+}
+
+void
+afr_local_sh_cleanup (afr_local_t *local, xlator_t *this)
+{
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+
+ sh = &local->self_heal;
+ priv = this->private;
+
+ if (sh->data_sh_info && strcmp (sh->data_sh_info, ""))
+ GF_FREE (sh->data_sh_info);
+
+ if (sh->metadata_sh_info && strcmp (sh->metadata_sh_info, ""))
+ GF_FREE (sh->metadata_sh_info);
+
+ GF_FREE (sh->buf);
+
+ GF_FREE (sh->parentbufs);
+
+ if (sh->inode)
+ inode_unref (sh->inode);
+
+ afr_xattr_array_destroy (sh->xattr, priv->child_count);
+
+ GF_FREE (sh->child_errno);
+
+ afr_matrix_cleanup (sh->pending_matrix, priv->child_count);
+ afr_matrix_cleanup (sh->delta_matrix, priv->child_count);
+
+ GF_FREE (sh->sources);
+
+ GF_FREE (sh->success);
+
+ GF_FREE (sh->locked_nodes);
+
+ if (sh->healing_fd) {
+ fd_unref (sh->healing_fd);
+ sh->healing_fd = NULL;
+ }
+
+ GF_FREE ((char *)sh->linkname);
+
+ GF_FREE (sh->success_children);
+
+ GF_FREE (sh->fresh_children);
+
+ GF_FREE (sh->fresh_parent_dirs);
+
+ loc_wipe (&sh->parent_loc);
+ loc_wipe (&sh->lookup_loc);
+
+ GF_FREE (sh->checksum);
+
+ GF_FREE (sh->write_needed);
+ if (sh->healing_fd)
+ fd_unref (sh->healing_fd);
+}
+
+
+void
+afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+
+ priv = this->private;
+
+ afr_matrix_cleanup (local->pending, priv->child_count);
+ afr_matrix_cleanup (local->transaction.txn_changelog,
+ priv->child_count);
+
+ GF_FREE (local->internal_lock.locked_nodes);
+
+ for (i = 0; local->internal_lock.inodelk[i].domain; i++) {
+ GF_FREE (local->internal_lock.inodelk[i].locked_nodes);
+ }
+
+ GF_FREE (local->internal_lock.lower_locked_nodes);
+
+ afr_entry_lockee_cleanup (&local->internal_lock);
+
+ GF_FREE (local->transaction.pre_op);
+ GF_FREE (local->transaction.eager_lock);
+
+ GF_FREE (local->transaction.basename);
+ GF_FREE (local->transaction.new_basename);
+
+ loc_wipe (&local->transaction.parent_loc);
+ loc_wipe (&local->transaction.new_parent_loc);
+
+ GF_FREE (local->transaction.postop_piggybacked);
+}
+
+
+void
+afr_local_cleanup (afr_local_t *local, xlator_t *this)
+{
+ afr_private_t * priv = NULL;
+
+ if (!local)
+ return;
+
+ afr_local_sh_cleanup (local, this);
+
+ afr_local_transaction_cleanup (local, this);
+
+ priv = this->private;
+
+ loc_wipe (&local->loc);
+ loc_wipe (&local->newloc);
+
+ if (local->fd)
+ fd_unref (local->fd);
+
+ if (local->xattr_req)
+ dict_unref (local->xattr_req);
+
+ if (local->dict)
+ dict_unref (local->dict);
+
+ GF_FREE(local->replies);
+
+ GF_FREE (local->child_up);
+
+ GF_FREE (local->child_errno);
+
+ GF_FREE (local->fresh_children);
+
+ { /* lookup */
+ if (local->cont.lookup.xattrs) {
+ afr_reset_xattr (local->cont.lookup.xattrs,
+ priv->child_count);
+ GF_FREE (local->cont.lookup.xattrs);
+ local->cont.lookup.xattrs = NULL;
+ }
+
+ if (local->cont.lookup.xattr) {
+ dict_unref (local->cont.lookup.xattr);
+ }
+
+ if (local->cont.lookup.inode) {
+ inode_unref (local->cont.lookup.inode);
+ }
+
+ GF_FREE (local->cont.lookup.postparents);
+
+ GF_FREE (local->cont.lookup.bufs);
+
+ GF_FREE (local->cont.lookup.success_children);
+
+ GF_FREE (local->cont.lookup.sources);
+ afr_matrix_cleanup (local->cont.lookup.pending_matrix,
+ priv->child_count);
+ }
+
+ { /* getxattr */
+ GF_FREE (local->cont.getxattr.name);
+ }
+
+ { /* lk */
+ GF_FREE (local->cont.lk.locked_nodes);
+ }
+
+ { /* create */
+ if (local->cont.create.fd)
+ fd_unref (local->cont.create.fd);
+ if (local->cont.create.params)
+ dict_unref (local->cont.create.params);
+ }
+
+ { /* mknod */
+ if (local->cont.mknod.params)
+ dict_unref (local->cont.mknod.params);
+ }
+
+ { /* mkdir */
+ if (local->cont.mkdir.params)
+ dict_unref (local->cont.mkdir.params);
+ }
+
+ { /* symlink */
+ if (local->cont.symlink.params)
+ dict_unref (local->cont.symlink.params);
+ }
+
+ { /* writev */
+ GF_FREE (local->cont.writev.vector);
+ }
+
+ { /* setxattr */
+ if (local->cont.setxattr.dict)
+ dict_unref (local->cont.setxattr.dict);
+ }
+
+ { /* fsetxattr */
+ if (local->cont.fsetxattr.dict)
+ dict_unref (local->cont.fsetxattr.dict);
+ }
+
+ { /* removexattr */
+ GF_FREE (local->cont.removexattr.name);
+ }
+ { /* xattrop */
+ if (local->cont.xattrop.xattr)
+ dict_unref (local->cont.xattrop.xattr);
+ }
+ { /* fxattrop */
+ if (local->cont.fxattrop.xattr)
+ dict_unref (local->cont.fxattrop.xattr);
+ }
+ { /* symlink */
+ GF_FREE (local->cont.symlink.linkpath);
+ }
+
+ { /* opendir */
+ GF_FREE (local->cont.opendir.checksum);
+ }
+
+ { /* readdirp */
+ if (local->cont.readdir.dict)
+ dict_unref (local->cont.readdir.dict);
+ }
+
+ if (local->xdata_req)
+ dict_unref (local->xdata_req);
+
+ if (local->xdata_rsp)
+ dict_unref (local->xdata_rsp);
+}
+
+
+int
+afr_frame_return (call_frame_t *frame)
+{
+ afr_local_t *local = NULL;
+ int call_count = 0;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ call_count = --local->call_count;
+ }
+ UNLOCK (&frame->lock);
+
+ return call_count;
+}
+
+int
+afr_set_elem_count_get (unsigned char *elems, int child_count)
+{
+ int i = 0;
+ int ret = 0;
+
+ for (i = 0; i < child_count; i++)
+ if (elems[i])
+ ret++;
+ return ret;
+}
+
+/**
+ * up_children_count - return the number of children that are up
+ */
+
+unsigned int
+afr_up_children_count (unsigned char *child_up, unsigned int child_count)
+{
+ return afr_set_elem_count_get (child_up, child_count);
+}
+
+unsigned int
+afr_locked_children_count (unsigned char *children, unsigned int child_count)
+{
+ return afr_set_elem_count_get (children, child_count);
+}
+
+unsigned int
+afr_pre_op_done_children_count (unsigned char *pre_op,
+ unsigned int child_count)
+{
+ return afr_set_elem_count_get (pre_op, child_count);
+}
+
+gf_boolean_t
+afr_is_fresh_lookup (loc_t *loc, xlator_t *this)
+{
+ uint64_t ctx = 0;
+ int32_t ret = 0;
+
+ GF_ASSERT (loc);
+ GF_ASSERT (this);
+ GF_ASSERT (loc->inode);
+
+ ret = inode_ctx_get (loc->inode, this, &ctx);
+ if (0 == ret)
+ return _gf_false;
+ return _gf_true;
+}
+
+void
+afr_update_loc_gfids (loc_t *loc, struct iatt *buf, struct iatt *postparent)
+{
+ GF_ASSERT (loc);
+ GF_ASSERT (buf);
+
+ uuid_copy (loc->gfid, buf->ia_gfid);
+ if (postparent)
+ uuid_copy (loc->pargfid, postparent->ia_gfid);
+}
+
+/*
+ * Quota size xattrs are not maintained by afr. There is a
+ * possibility that they differ even when both the directory changelog xattrs
+ * suggest everything is fine. So if there is at least one 'source' check among
+ * the sources which has the maximum quota size. Otherwise check among all the
+ * available ones for maximum quota size. This way if there is a source and
+ * stale copies it always votes for the 'source'.
+ * */
+
+static void
+afr_handle_quota_size (afr_local_t *local, xlator_t *this,
+ dict_t *rsp_dict)
+{
+ int32_t *sources = NULL;
+ dict_t *xattr = NULL;
+ data_t *max_data = NULL;
+ int64_t max_quota_size = -1;
+ data_t *data = NULL;
+ int64_t *size = NULL;
+ int64_t quota_size = -1;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int ret = -1;
+ gf_boolean_t source_present = _gf_false;
+
+ priv = this->private;
+ sources = local->cont.lookup.sources;
+
+ if (rsp_dict == NULL) {
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "%s: Invalid "
+ "response dictionary", local->loc.path);
+ return;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i]) {
+ source_present = _gf_true;
+ break;
+ }
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ /*
+ * If there is at least one source lets check
+ * for maximum quota sizes among sources, otherwise take the
+ * maximum of the ones present to be on the safer side.
+ */
+ if (source_present && !sources[i])
+ continue;
+
+ xattr = local->cont.lookup.xattrs[i];
+ if (!xattr)
+ continue;
+
+ data = dict_get (xattr, QUOTA_SIZE_KEY);
+ if (!data)
+ continue;
+
+ size = (int64_t*)data->data;
+ quota_size = ntoh64(*size);
+ gf_log (this->name, GF_LOG_DEBUG, "%s: %d, size: %"PRId64,
+ local->loc.path, i, quota_size);
+ if (quota_size > max_quota_size) {
+ if (max_data)
+ data_unref (max_data);
+
+ max_quota_size = quota_size;
+ max_data = data_ref (data);
+ }
+ }
+
+ if (max_data) {
+ ret = dict_set (rsp_dict, QUOTA_SIZE_KEY, max_data);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s: Failed to set "
+ "quota size", local->loc.path);
+ }
+
+ data_unref (max_data);
+ }
+}
+
+int
+afr_lookup_build_response_params (afr_local_t *local, xlator_t *this)
+{
+ struct iatt *buf = NULL;
+ struct iatt *postparent = NULL;
+ dict_t **xattr = NULL;
+ int32_t *success_children = NULL;
+ int32_t *sources = NULL;
+ afr_private_t *priv = NULL;
+ int32_t read_child = -1;
+ int ret = 0;
+ int i = 0;
+
+ GF_ASSERT (local);
+
+ buf = &local->cont.lookup.buf;
+ postparent = &local->cont.lookup.postparent;
+ xattr = &local->cont.lookup.xattr;
+ priv = this->private;
+
+ read_child = afr_inode_get_read_ctx (this, local->cont.lookup.inode,
+ local->fresh_children);
+ if (read_child < 0) {
+ ret = -1;
+ goto out;
+ }
+ success_children = local->cont.lookup.success_children;
+ sources = local->cont.lookup.sources;
+ memset (sources, 0, sizeof (*sources) * priv->child_count);
+ afr_children_intersection_get (local->fresh_children, success_children,
+ sources, priv->child_count);
+ if (!sources[read_child]) {
+ read_child = -1;
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i]) {
+ read_child = i;
+ break;
+ }
+ }
+ }
+ if (read_child < 0) {
+ ret = -1;
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG, "Building lookup response from %d",
+ read_child);
+ if (!*xattr)
+ *xattr = dict_ref (local->cont.lookup.xattrs[read_child]);
+
+ *buf = local->cont.lookup.bufs[read_child];
+ *postparent = local->cont.lookup.postparents[read_child];
+
+ if (dict_get (local->xattr_req, QUOTA_SIZE_KEY))
+ afr_handle_quota_size (local, this, *xattr);
+
+ if (IA_INVAL == local->cont.lookup.inode->ia_type) {
+ /* fix for RT #602 */
+ local->cont.lookup.inode->ia_type = buf->ia_type;
+ }
+out:
+ return ret;
+}
+
+static void
+afr_lookup_update_lk_counts (afr_local_t *local, xlator_t *this,
+ int child_index, dict_t *xattr)
+{
+ uint32_t inodelk_count = 0;
+ uint32_t entrylk_count = 0;
+ int ret = -1;
+ uint32_t parent_entrylk = 0;
+
+ GF_ASSERT (local);
+ GF_ASSERT (this);
+ GF_ASSERT (xattr);
+ GF_ASSERT (child_index >= 0);
+
+ ret = dict_get_uint32 (xattr, GLUSTERFS_INODELK_COUNT,
+ &inodelk_count);
+ if (ret == 0)
+ local->inodelk_count += inodelk_count;
+
+ ret = dict_get_uint32 (xattr, GLUSTERFS_ENTRYLK_COUNT,
+ &entrylk_count);
+ if (ret == 0)
+ local->entrylk_count += entrylk_count;
+ ret = dict_get_uint32 (xattr, GLUSTERFS_PARENT_ENTRYLK,
+ &parent_entrylk);
+ if (!ret)
+ local->cont.lookup.parent_entrylk += parent_entrylk;
+}
+
+/*
+ * It's important to maintain a commutative property on do_*_self_heal and
+ * found*; once set, they must not be cleared by a subsequent iteration or
+ * call, so that they represent a logical OR of all iterations and calls
+ * regardless of child/key order. That allows the caller to call us multiple
+ * times without having to use a separate variable as a "reduce" accumulator.
+ */
+static void
+afr_lookup_set_self_heal_params_by_xattr (afr_local_t *local, xlator_t *this,
+ dict_t *xattr)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int ret = -1;
+ void *pending_raw = NULL;
+ int32_t *pending = NULL;
+
+ GF_ASSERT (local);
+ GF_ASSERT (this);
+ GF_ASSERT (xattr);
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ ret = dict_get_ptr (xattr, priv->pending_key[i],
+ &pending_raw);
+ if (ret != 0) {
+ continue;
+ }
+ pending = pending_raw;
+
+ if (pending[AFR_METADATA_TRANSACTION]) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "metadata self-heal is pending for %s.",
+ local->loc.path);
+ local->self_heal.do_metadata_self_heal = _gf_true;
+ }
+
+ if (pending[AFR_ENTRY_TRANSACTION]) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "entry self-heal is pending for %s.",
+ local->loc.path);
+ local->self_heal.do_entry_self_heal = _gf_true;
+ }
+
+ if (pending[AFR_DATA_TRANSACTION]) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "data self-heal is pending for %s.",
+ local->loc.path);
+ local->self_heal.do_data_self_heal = _gf_true;
+ }
+ }
+}
+
+void
+afr_lookup_check_set_metadata_split_brain (afr_local_t *local, xlator_t *this)
+{
+ int32_t *sources = NULL;
+ afr_private_t *priv = NULL;
+ int32_t subvol_status = 0;
+ int32_t *success_children = NULL;
+ dict_t **xattrs = NULL;
+ struct iatt *bufs = NULL;
+ int32_t **pending_matrix = NULL;
+
+ priv = this->private;
+
+ sources = GF_CALLOC (priv->child_count, sizeof (*sources),
+ gf_afr_mt_int32_t);
+ if (NULL == sources)
+ goto out;
+ success_children = local->cont.lookup.success_children;
+ xattrs = local->cont.lookup.xattrs;
+ bufs = local->cont.lookup.bufs;
+ pending_matrix = local->cont.lookup.pending_matrix;
+ afr_build_sources (this, xattrs, bufs, pending_matrix,
+ sources, success_children, AFR_METADATA_TRANSACTION,
+ &subvol_status, _gf_false);
+ if (subvol_status & SPLIT_BRAIN)
+ local->cont.lookup.possible_spb = _gf_true;
+out:
+ GF_FREE (sources);
+}
+
+static void
+afr_detect_self_heal_by_iatt (afr_local_t *local, xlator_t *this,
+ struct iatt *buf, struct iatt *lookup_buf)
+{
+ if (PERMISSION_DIFFERS (buf, lookup_buf)) {
+ /* mismatching permissions */
+ gf_log (this->name, GF_LOG_DEBUG,
+ "permissions differ for %s ", local->loc.path);
+ local->self_heal.do_metadata_self_heal = _gf_true;
+ }
+
+ if (OWNERSHIP_DIFFERS (buf, lookup_buf)) {
+ /* mismatching permissions */
+ local->self_heal.do_metadata_self_heal = _gf_true;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "ownership differs for %s ", local->loc.path);
+ }
+
+ if (SIZE_DIFFERS (buf, lookup_buf)
+ && IA_ISREG (buf->ia_type)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "size differs for %s ", local->loc.path);
+ local->self_heal.do_data_self_heal = _gf_true;
+ }
+
+ if (uuid_compare (buf->ia_gfid, lookup_buf->ia_gfid)) {
+ /* mismatching gfid */
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s: gfid different on subvolume", local->loc.path);
+ }
+}
+
+static void
+afr_detect_self_heal_by_split_brain_status (afr_local_t *local, xlator_t *this)
+{
+ gf_boolean_t split_brain = _gf_false;
+ afr_self_heal_t *sh = NULL;
+
+ sh = &local->self_heal;
+
+ split_brain = afr_is_split_brain (this, local->cont.lookup.inode);
+ split_brain = split_brain || local->cont.lookup.possible_spb;
+ if ((local->success_count > 0) && split_brain &&
+ IA_ISREG (local->cont.lookup.inode->ia_type)) {
+ sh->force_confirm_spb = _gf_true;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "split brain detected during lookup of %s.",
+ local->loc.path);
+ }
+}
+
+static void
+afr_detect_self_heal_by_lookup_status (afr_local_t *local, xlator_t *this)
+{
+ GF_ASSERT (local);
+ GF_ASSERT (this);
+
+ if ((local->success_count > 0) && (local->enoent_count > 0)) {
+ local->self_heal.do_metadata_self_heal = _gf_true;
+ local->self_heal.do_data_self_heal = _gf_true;
+ local->self_heal.do_entry_self_heal = _gf_true;
+ local->self_heal.do_gfid_self_heal = _gf_true;
+ local->self_heal.do_missing_entry_self_heal = _gf_true;
+ gf_log(this->name, GF_LOG_DEBUG,
+ "entries are missing in lookup of %s.",
+ local->loc.path);
+ }
+
+ return;
+}
+
+gf_boolean_t
+afr_can_self_heal_proceed (afr_self_heal_t *sh, afr_private_t *priv)
+{
+ GF_ASSERT (sh);
+ GF_ASSERT (priv);
+
+ if (sh->force_confirm_spb)
+ return _gf_true;
+ return (sh->do_gfid_self_heal
+ || sh->do_missing_entry_self_heal
+ || (afr_data_self_heal_enabled (priv->data_self_heal) &&
+ sh->do_data_self_heal)
+ || (priv->metadata_self_heal && sh->do_metadata_self_heal)
+ || (priv->entry_self_heal && sh->do_entry_self_heal));
+}
+
+afr_transaction_type
+afr_transaction_type_get (ia_type_t ia_type)
+{
+ afr_transaction_type type = AFR_METADATA_TRANSACTION;
+
+ GF_ASSERT (ia_type != IA_INVAL);
+
+ if (IA_ISDIR (ia_type)) {
+ type = AFR_ENTRY_TRANSACTION;
+ } else if (IA_ISREG (ia_type)) {
+ type = AFR_DATA_TRANSACTION;
+ }
+ return type;
+}
+
+int
+afr_lookup_select_read_child (afr_local_t *local, xlator_t *this,
+ int32_t *read_child)
+{
+ ia_type_t ia_type = IA_INVAL;
+ int32_t source = -1;
+ int ret = -1;
+ dict_t **xattrs = NULL;
+ int32_t *success_children = NULL;
+ afr_transaction_type type = AFR_METADATA_TRANSACTION;
+ uuid_t *gfid = NULL;
+
+ GF_ASSERT (local);
+ GF_ASSERT (this);
+ GF_ASSERT (local->success_count > 0);
+
+ success_children = local->cont.lookup.success_children;
+ /*We can take the success_children[0] only because we already
+ *handle the conflicting children other wise, we could select the
+ *read_child based on wrong file type
+ */
+ ia_type = local->cont.lookup.bufs[success_children[0]].ia_type;
+ type = afr_transaction_type_get (ia_type);
+ xattrs = local->cont.lookup.xattrs;
+ gfid = &local->cont.lookup.buf.ia_gfid;
+ source = afr_lookup_select_read_child_by_txn_type (this, local, xattrs,
+ type, *gfid);
+ if (source < 0) {
+ gf_log (this->name, GF_LOG_DEBUG, "failed to select source "
+ "for %s", local->loc.path);
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG, "Source selected as %d for %s",
+ source, local->loc.path);
+ *read_child = source;
+ ret = 0;
+out:
+ return ret;
+}
+
+static inline gf_boolean_t
+afr_is_transaction_running (afr_local_t *local)
+{
+ GF_ASSERT (local->fop == GF_FOP_LOOKUP);
+ return ((local->inodelk_count > 0) || (local->entrylk_count > 0));
+}
+
+void
+afr_launch_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ gf_boolean_t background, ia_type_t ia_type, char *reason,
+ void (*gfid_sh_success_cbk) (call_frame_t *sh_frame,
+ xlator_t *this),
+ int (*unwind) (call_frame_t *frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ int32_t sh_failed))
+{
+ afr_local_t *local = NULL;
+ char sh_type_str[256] = {0,};
+ char *bg = "";
+
+ GF_ASSERT (frame);
+ GF_ASSERT (this);
+ GF_ASSERT (inode);
+ GF_ASSERT (ia_type != IA_INVAL);
+
+ local = frame->local;
+ local->self_heal.background = background;
+ local->self_heal.type = ia_type;
+ local->self_heal.unwind = unwind;
+ local->self_heal.gfid_sh_success_cbk = gfid_sh_success_cbk;
+
+ afr_self_heal_type_str_get (&local->self_heal,
+ sh_type_str,
+ sizeof (sh_type_str));
+
+ if (background)
+ bg = "background";
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s %s self-heal triggered. path: %s, reason: %s", bg,
+ sh_type_str, local->loc.path, reason);
+
+ afr_self_heal (frame, this, inode);
+}
+
+unsigned int
+afr_gfid_missing_count (const char *xlator_name, int32_t *success_children,
+ struct iatt *bufs, unsigned int child_count,
+ const char *path)
+{
+ unsigned int gfid_miss_count = 0;
+ int i = 0;
+ struct iatt *child1 = NULL;
+
+ for (i = 0; i < child_count; i++) {
+ if (success_children[i] == -1)
+ break;
+ child1 = &bufs[success_children[i]];
+ if (uuid_is_null (child1->ia_gfid)) {
+ gf_log (xlator_name, GF_LOG_DEBUG, "%s: gfid is null"
+ " on subvolume %d", path, success_children[i]);
+ gfid_miss_count++;
+ }
+ }
+
+ return gfid_miss_count;
+}
+
+static int
+afr_lookup_gfid_missing_count (afr_local_t *local, xlator_t *this)
+{
+ int32_t *success_children = NULL;
+ afr_private_t *priv = NULL;
+ struct iatt *bufs = NULL;
+ int miss_count = 0;
+
+ priv = this->private;
+ bufs = local->cont.lookup.bufs;
+ success_children = local->cont.lookup.success_children;
+
+ miss_count = afr_gfid_missing_count (this->name, success_children,
+ bufs, priv->child_count,
+ local->loc.path);
+ return miss_count;
+}
+
+gf_boolean_t
+afr_conflicting_iattrs (struct iatt *bufs, int32_t *success_children,
+ unsigned int child_count, const char *path,
+ const char *xlator_name)
+{
+ gf_boolean_t conflicting = _gf_false;
+ int i = 0;
+ struct iatt *child1 = NULL;
+ struct iatt *child2 = NULL;
+ uuid_t *gfid = NULL;
+
+ for (i = 0; i < child_count; i++) {
+ if (success_children[i] == -1)
+ break;
+ child1 = &bufs[success_children[i]];
+ if ((!gfid) && (!uuid_is_null (child1->ia_gfid)))
+ gfid = &child1->ia_gfid;
+
+ if (i == 0)
+ continue;
+
+ child2 = &bufs[success_children[i-1]];
+ if (FILETYPE_DIFFERS (child1, child2)) {
+ gf_log (xlator_name, GF_LOG_DEBUG, "%s: filetype "
+ "differs on subvolumes (%d, %d)", path,
+ success_children[i-1], success_children[i]);
+ conflicting = _gf_true;
+ goto out;
+ }
+ if (!gfid || uuid_is_null (child1->ia_gfid))
+ continue;
+ if (uuid_compare (*gfid, child1->ia_gfid)) {
+ gf_log (xlator_name, GF_LOG_DEBUG, "%s: gfid differs"
+ " on subvolume %d", path, success_children[i]);
+ conflicting = _gf_true;
+ goto out;
+ }
+ }
+out:
+ return conflicting;
+}
+
+/* afr_update_gfid_from_iatts: This function should be called only if the
+ * iatts are not conflicting.
+ */
+void
+afr_update_gfid_from_iatts (uuid_t uuid, struct iatt *bufs,
+ int32_t *success_children, unsigned int child_count)
+{
+ uuid_t *gfid = NULL;
+ int i = 0;
+ int child = 0;
+
+ for (i = 0; i < child_count; i++) {
+ child = success_children[i];
+ if (child == -1)
+ break;
+ if ((!gfid) && (!uuid_is_null (bufs[child].ia_gfid))) {
+ gfid = &bufs[child].ia_gfid;
+ } else if (gfid && (!uuid_is_null (bufs[child].ia_gfid))) {
+ if (uuid_compare (*gfid, bufs[child].ia_gfid)) {
+ GF_ASSERT (0);
+ goto out;
+ }
+ }
+ }
+ if (gfid && (!uuid_is_null (*gfid)))
+ uuid_copy (uuid, *gfid);
+out:
+ return;
+}
+
+static gf_boolean_t
+afr_lookup_conflicting_entries (afr_local_t *local, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ gf_boolean_t conflict = _gf_false;
+
+ priv = this->private;
+ conflict = afr_conflicting_iattrs (local->cont.lookup.bufs,
+ local->cont.lookup.success_children,
+ priv->child_count, local->loc.path,
+ this->name);
+ return conflict;
+}
+
+gf_boolean_t
+afr_open_only_data_self_heal (char *data_self_heal)
+{
+ return !strcmp (data_self_heal, "open");
+}
+
+gf_boolean_t
+afr_data_self_heal_enabled (char *data_self_heal)
+{
+ gf_boolean_t enabled = _gf_false;
+
+ if (gf_string2boolean (data_self_heal, &enabled) == -1) {
+ enabled = !strcmp (data_self_heal, "open");
+ GF_ASSERT (enabled);
+ }
+
+ return enabled;
+}
+
+static void
+afr_lookup_set_self_heal_params (afr_local_t *local, xlator_t *this)
+{
+ int i = 0;
+ struct iatt *bufs = NULL;
+ dict_t **xattr = NULL;
+ afr_private_t *priv = NULL;
+ int32_t child1 = -1;
+ int32_t child2 = -1;
+ afr_self_heal_t *sh = NULL;
+
+ priv = this->private;
+ sh = &local->self_heal;
+
+ afr_detect_self_heal_by_lookup_status (local, this);
+
+ if (afr_lookup_gfid_missing_count (local, this))
+ local->self_heal.do_gfid_self_heal = _gf_true;
+
+ if (_gf_true == afr_lookup_conflicting_entries (local, this))
+ local->self_heal.do_missing_entry_self_heal = _gf_true;
+ else
+ afr_update_gfid_from_iatts (local->self_heal.sh_gfid_req,
+ local->cont.lookup.bufs,
+ local->cont.lookup.success_children,
+ priv->child_count);
+
+ bufs = local->cont.lookup.bufs;
+ for (i = 1; i < local->success_count; i++) {
+ child1 = local->cont.lookup.success_children[i-1];
+ child2 = local->cont.lookup.success_children[i];
+ afr_detect_self_heal_by_iatt (local, this,
+ &bufs[child1], &bufs[child2]);
+ }
+
+ xattr = local->cont.lookup.xattrs;
+ for (i = 0; i < local->success_count; i++) {
+ child1 = local->cont.lookup.success_children[i];
+ afr_lookup_set_self_heal_params_by_xattr (local, this,
+ xattr[child1]);
+ }
+ if (afr_open_only_data_self_heal (priv->data_self_heal))
+ sh->do_data_self_heal = _gf_false;
+ if (sh->do_metadata_self_heal)
+ afr_lookup_check_set_metadata_split_brain (local, this);
+ afr_detect_self_heal_by_split_brain_status (local, this);
+}
+
+int
+afr_self_heal_lookup_unwind (call_frame_t *frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ int32_t sh_failed)
+{
+ afr_local_t *local = NULL;
+ int ret = -1;
+ dict_t *xattr = NULL;
+
+ local = frame->local;
+
+ if (op_ret == -1) {
+ local->op_ret = -1;
+ local->op_errno = afr_most_important_error(local->op_errno,
+ op_errno, _gf_true);
+
+ goto out;
+ } else {
+ local->op_ret = 0;
+ }
+
+ afr_lookup_done_success_action (frame, this, _gf_true);
+ xattr = local->cont.lookup.xattr;
+ if (xattr) {
+ ret = dict_set_int32 (xattr, "sh-failed", sh_failed);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "%s: Failed to set "
+ "sh-failed to %d", local->loc.path, sh_failed);
+
+ if (local->self_heal.actual_sh_started == _gf_true &&
+ sh_failed == 0) {
+ ret = dict_set_int32 (xattr, "actual-sh-done", 1);
+ if (ret)
+ gf_log(this->name, GF_LOG_ERROR, "%s: Failed to"
+ " set actual-sh-done to %d",
+ local->loc.path,
+ local->self_heal.actual_sh_started);
+ }
+ }
+out:
+ AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
+ local->cont.lookup.inode, &local->cont.lookup.buf,
+ local->cont.lookup.xattr,
+ &local->cont.lookup.postparent);
+
+ return 0;
+}
+
+//TODO: At the moment only lookup needs this, so not doing any checks, in the
+// future we will have to do fop specific operations
+void
+afr_post_gfid_sh_success (call_frame_t *sh_frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_local_t *sh_local = NULL;
+ afr_private_t *priv = NULL;
+ afr_self_heal_t *sh = NULL;
+ int i = 0;
+ struct iatt *lookup_bufs = NULL;
+ struct iatt *lookup_parentbufs = NULL;
+
+ sh_local = sh_frame->local;
+ sh = &sh_local->self_heal;
+ local = sh->orig_frame->local;
+ lookup_bufs = local->cont.lookup.bufs;
+ lookup_parentbufs = local->cont.lookup.postparents;
+ priv = this->private;
+
+ memcpy (lookup_bufs, sh->buf, priv->child_count * sizeof (*sh->buf));
+ memcpy (lookup_parentbufs, sh->parentbufs,
+ priv->child_count * sizeof (*sh->parentbufs));
+
+ afr_reset_xattr (local->cont.lookup.xattrs, priv->child_count);
+ if (local->cont.lookup.xattr) {
+ dict_unref (local->cont.lookup.xattr);
+ local->cont.lookup.xattr = NULL;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (sh->xattr[i])
+ local->cont.lookup.xattrs[i] = dict_ref (sh->xattr[i]);
+ }
+
+ afr_reset_children (local->cont.lookup.success_children,
+ priv->child_count);
+ afr_children_copy (local->cont.lookup.success_children,
+ sh->fresh_children, priv->child_count);
+}
+
+static void
+afr_lookup_perform_self_heal (call_frame_t *frame, xlator_t *this,
+ gf_boolean_t *sh_launched)
+{
+ unsigned int up_count = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ char *reason = NULL;
+
+ GF_ASSERT (sh_launched);
+ *sh_launched = _gf_false;
+ priv = this->private;
+ local = frame->local;
+
+ up_count = afr_up_children_count (local->child_up, priv->child_count);
+ if (up_count == 1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Only 1 child up - do not attempt to detect self heal");
+ goto out;
+ }
+
+ afr_lookup_set_self_heal_params (local, this);
+ if (afr_can_self_heal_proceed (&local->self_heal, priv)) {
+ if (afr_is_transaction_running (local) &&
+ (!local->allow_sh_for_running_transaction))
+ goto out;
+
+ reason = "lookup detected pending operations";
+ afr_launch_self_heal (frame, this, local->cont.lookup.inode,
+ _gf_true, local->cont.lookup.buf.ia_type,
+ reason, afr_post_gfid_sh_success,
+ afr_self_heal_lookup_unwind);
+ *sh_launched = _gf_true;
+ }
+out:
+ return;
+}
+
+void
+afr_get_fresh_children (int32_t *success_children, int32_t *sources,
+ int32_t *fresh_children, unsigned int child_count)
+{
+ unsigned int i = 0;
+ unsigned int j = 0;
+
+ GF_ASSERT (success_children);
+ GF_ASSERT (sources);
+ GF_ASSERT (fresh_children);
+
+ afr_reset_children (fresh_children, child_count);
+ for (i = 0; i < child_count; i++) {
+ if (success_children[i] == -1)
+ break;
+ if (afr_is_read_child (success_children, sources, child_count,
+ success_children[i])) {
+ fresh_children[j] = success_children[i];
+ j++;
+ }
+ }
+}
+
+static int
+afr_lookup_set_read_ctx (afr_local_t *local, xlator_t *this, int32_t read_child)
+{
+ afr_private_t *priv = NULL;
+
+ GF_ASSERT (read_child >= 0);
+
+ priv = this->private;
+ afr_get_fresh_children (local->cont.lookup.success_children,
+ local->cont.lookup.sources,
+ local->fresh_children, priv->child_count);
+ afr_inode_set_read_ctx (this, local->cont.lookup.inode, read_child,
+ local->fresh_children);
+
+ return 0;
+}
+
+int
+afr_lookup_done_success_action (call_frame_t *frame, xlator_t *this,
+ gf_boolean_t fail_conflict)
+{
+ int32_t read_child = -1;
+ int32_t ret = -1;
+ afr_local_t *local = NULL;
+ gf_boolean_t fresh_lookup = _gf_false;
+
+ local = frame->local;
+ fresh_lookup = local->cont.lookup.fresh_lookup;
+
+ if (local->loc.parent == NULL)
+ fail_conflict = _gf_true;
+
+ if (afr_lookup_conflicting_entries (local, this)) {
+ if (fail_conflict == _gf_false)
+ ret = 0;
+ goto out;
+ }
+
+ ret = afr_lookup_select_read_child (local, this, &read_child);
+ if (!afr_is_transaction_running (local) || fresh_lookup) {
+ if (read_child < 0)
+ goto out;
+
+ ret = afr_lookup_set_read_ctx (local, this, read_child);
+ if (ret)
+ goto out;
+ }
+
+ ret = afr_lookup_build_response_params (local, this);
+ if (ret)
+ goto out;
+ afr_update_loc_gfids (&local->loc,
+ &local->cont.lookup.buf,
+ &local->cont.lookup.postparent);
+
+ ret = 0;
+out:
+ if (ret) {
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ }
+ return ret;
+}
+
+int
+afr_lookup_get_latest_subvol (afr_local_t *local, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int32_t *success_children = NULL;
+ struct iatt *bufs = NULL;
+ int i = 0;
+ int child = 0;
+ int lsubvol = -1;
+
+ priv = this->private;
+ success_children = local->cont.lookup.success_children;
+ bufs = local->cont.lookup.bufs;
+ for (i = 0; i < priv->child_count; i++) {
+ child = success_children[i];
+ if (child == -1)
+ break;
+ if (uuid_is_null (bufs[child].ia_gfid))
+ continue;
+ if (lsubvol < 0) {
+ lsubvol = child;
+ } else if (bufs[lsubvol].ia_ctime < bufs[child].ia_ctime) {
+ lsubvol = child;
+ } else if ((bufs[lsubvol].ia_ctime == bufs[child].ia_ctime) &&
+ (bufs[lsubvol].ia_ctime_nsec < bufs[child].ia_ctime_nsec)) {
+ lsubvol = child;
+ }
+ }
+ return lsubvol;
+}
+
+void
+afr_lookup_mark_other_entries_stale (afr_local_t *local, xlator_t *this,
+ int subvol)
+{
+ afr_private_t *priv = NULL;
+ int32_t *success_children = NULL;
+ struct iatt *bufs = NULL;
+ int i = 0;
+ int child = 0;
+
+ priv = this->private;
+ success_children = local->cont.lookup.success_children;
+ bufs = local->cont.lookup.bufs;
+ memcpy (local->fresh_children, success_children,
+ sizeof (*success_children) * priv->child_count);
+ for (i = 0; i < priv->child_count; i++) {
+ child = local->fresh_children[i];
+ if (child == -1)
+ break;
+ if (child == subvol)
+ continue;
+ if (uuid_is_null (bufs[child].ia_gfid) &&
+ (bufs[child].ia_type == bufs[subvol].ia_type))
+ continue;
+ afr_children_rm_child (success_children, child,
+ priv->child_count);
+ local->success_count--;
+ }
+ afr_reset_children (local->fresh_children, priv->child_count);
+}
+
+void
+afr_succeed_lookup_on_latest_iatt (afr_local_t *local, xlator_t *this)
+{
+ int lsubvol = 0;
+
+ if (!afr_lookup_conflicting_entries (local, this))
+ goto out;
+
+ lsubvol = afr_lookup_get_latest_subvol (local, this);
+ if (lsubvol < 0)
+ goto out;
+ afr_lookup_mark_other_entries_stale (local, this, lsubvol);
+out:
+ return;
+}
+
+gf_boolean_t
+afr_is_entry_possibly_under_creation (afr_local_t *local, xlator_t *this)
+{
+ /*
+ * We need to perform this test in lookup done and treat on going
+ * create/DELETE as ENOENT.
+ * Reason:
+ Multiple clients A, B and C are attempting 'mkdir -p /mnt/a/b/c'
+
+ 1 Client A is in the middle of mkdir(/a). It has acquired lock.
+ It has performed mkdir(/a) on one subvol, and second one is still
+ in progress
+ 2 Client B performs a lookup, sees directory /a on one,
+ ENOENT on the other, succeeds lookup.
+ 3 Client B performs lookup on /a/b on both subvols, both return ENOENT
+ (one subvol because /a/b does not exist, another because /a
+ itself does not exist)
+ 4 Client B proceeds to mkdir /a/b. It obtains entrylk on inode=/a with
+ basename=b on one subvol, but fails on other subvol as /a is yet to
+ be created by Client A.
+ 5 Client A finishes mkdir of /a on other subvol
+ 6 Client C also attempts to create /a/b, lookup returns ENOENT on
+ both subvols.
+ 7 Client C tries to obtain entrylk on on inode=/a with basename=b,
+ obtains on one subvol (where B had failed), and waits for B to unlock
+ on other subvol.
+ 8 Client B finishes mkdir() on one subvol with GFID-1 and completes
+ transaction and unlocks
+ 9 Client C gets the lock on the second subvol, At this stage second
+ subvol already has /a/b created from Client B, but Client C does not
+ check that in the middle of mkdir transaction
+ 10 Client C attempts mkdir /a/b on both subvols. It succeeds on
+ ONLY ONE (where Client B could not get lock because of
+ missing parent /a dir) with GFID-2, and gets EEXIST from ONE subvol.
+ This way we have /a/b in GFID mismatch. One subvol got GFID-1 because
+ Client B performed transaction on only one subvol (because entrylk()
+ could not be obtained on second subvol because of missing parent dir --
+ caused by premature/speculative succeeding of lookup() on /a when locks
+ are detected). Other subvol gets GFID-2 from Client C because while
+ it was waiting for entrylk() on both subvols, Client B was in the
+ middle of creating mkdir() on only one subvol, and Client C does not
+ "expect" this when it is between lock() and pre-op()/op() phase of the
+ transaction.
+ */
+ if (local->cont.lookup.parent_entrylk && local->enoent_count)
+ return _gf_true;
+
+ return _gf_false;
+}
+
+
+static void
+afr_lookup_done (call_frame_t *frame, xlator_t *this)
+{
+ int unwind = 1;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ gf_boolean_t sh_launched = _gf_false;
+ gf_boolean_t fail_conflict = _gf_false;
+ int gfid_miss_count = 0;
+ int enotconn_count = 0;
+ int up_children_count = 0;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (afr_is_entry_possibly_under_creation (local, this)) {
+ local->op_ret = -1;
+ local->op_errno = ENOENT;
+ goto unwind;
+ }
+
+ if (local->op_ret < 0)
+ goto unwind;
+
+ if (local->cont.lookup.parent_entrylk && local->success_count > 1)
+ afr_succeed_lookup_on_latest_iatt (local, this);
+
+ gfid_miss_count = afr_lookup_gfid_missing_count (local, this);
+ up_children_count = afr_up_children_count (local->child_up,
+ priv->child_count);
+ enotconn_count = priv->child_count - up_children_count;
+ if ((gfid_miss_count == local->success_count) &&
+ (enotconn_count > 0)) {
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ gf_log (this->name, GF_LOG_ERROR, "Failing lookup for %s, "
+ "LOOKUP on a file without gfid is not allowed when "
+ "some of the children are down", local->loc.path);
+ goto unwind;
+ }
+
+ if ((gfid_miss_count == local->success_count) &&
+ uuid_is_null (local->cont.lookup.gfid_req)) {
+ local->op_ret = -1;
+ local->op_errno = ENODATA;
+ gf_log (this->name, GF_LOG_ERROR, "%s: No gfid present",
+ local->loc.path);
+ goto unwind;
+ }
+
+ if (gfid_miss_count && uuid_is_null (local->cont.lookup.gfid_req))
+ fail_conflict = _gf_true;
+ ret = afr_lookup_done_success_action (frame, this, fail_conflict);
+ if (ret)
+ goto unwind;
+ uuid_copy (local->self_heal.sh_gfid_req, local->cont.lookup.gfid_req);
+
+ afr_lookup_perform_self_heal (frame, this, &sh_launched);
+ if (sh_launched) {
+ unwind = 0;
+ goto unwind;
+ }
+
+ unwind:
+ if (unwind) {
+ AFR_STACK_UNWIND (lookup, frame, local->op_ret,
+ local->op_errno, local->cont.lookup.inode,
+ &local->cont.lookup.buf,
+ local->cont.lookup.xattr,
+ &local->cont.lookup.postparent);
+ }
+}
+
+/*
+ * During a lookup, some errors are more "important" than
+ * others in that they must be given higher priority while
+ * returning to the user.
+ *
+ * The hierarchy is ESTALE > EIO > ENOENT > others
+ */
+int32_t
+afr_most_important_error(int32_t old_errno, int32_t new_errno,
+ gf_boolean_t eio)
+{
+ if (old_errno == ESTALE || new_errno == ESTALE)
+ return ESTALE;
+ if (eio && (old_errno == EIO || new_errno == EIO))
+ return EIO;
+ if (old_errno == ENOENT || new_errno == ENOENT)
+ return ENOENT;
+
+ return new_errno;
+}
+
+int32_t
+afr_resultant_errno_get (int32_t *children,
+ int *child_errno, unsigned int child_count)
+{
+ int i = 0;
+ int32_t op_errno = 0;
+ int child = 0;
+
+ for (i = 0; i < child_count; i++) {
+ if (children) {
+ child = children[i];
+ if (child == -1)
+ break;
+ } else {
+ child = i;
+ }
+ op_errno = afr_most_important_error(op_errno,
+ child_errno[child],
+ _gf_false);
+ }
+ return op_errno;
+}
+
+static void
+afr_lookup_handle_error (afr_local_t *local, int32_t op_ret, int32_t op_errno)
+{
+ GF_ASSERT (local);
+ if (op_errno == ENOENT)
+ local->enoent_count++;
+
+ local->op_errno = afr_most_important_error(local->op_errno, op_errno,
+ _gf_false);
+
+ if (local->op_errno == ESTALE) {
+ local->op_ret = -1;
+ }
+}
+
+static void
+afr_set_root_inode_on_first_lookup (afr_local_t *local, xlator_t *this,
+ inode_t *inode)
+{
+ afr_private_t *priv = NULL;
+ GF_ASSERT (inode);
+
+ if (!__is_root_gfid (inode->gfid))
+ goto out;
+ if (!afr_is_fresh_lookup (&local->loc, this))
+ goto out;
+ priv = this->private;
+ if ((priv->first_lookup)) {
+ gf_log (this->name, GF_LOG_INFO, "added root inode");
+ priv->root_inode = inode_ref (inode);
+ priv->first_lookup = 0;
+ }
+out:
+ return;
+}
+
+static void
+afr_lookup_cache_args (afr_local_t *local, int child_index, dict_t *xattr,
+ struct iatt *buf, struct iatt *postparent)
+{
+ GF_ASSERT (child_index >= 0);
+ local->cont.lookup.xattrs[child_index] = dict_ref (xattr);
+ local->cont.lookup.postparents[child_index] = *postparent;
+ local->cont.lookup.bufs[child_index] = *buf;
+}
+
+static void
+afr_lookup_handle_first_success (afr_local_t *local, xlator_t *this,
+ inode_t *inode, struct iatt *buf)
+{
+ local->cont.lookup.inode = inode_ref (inode);
+ local->cont.lookup.buf = *buf;
+ afr_set_root_inode_on_first_lookup (local, this, inode);
+}
+
+static int32_t
+afr_discovery_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ int ret = 0;
+ char *pathinfo = NULL;
+ gf_boolean_t is_local = _gf_false;
+ afr_private_t *priv = NULL;
+ int32_t child_index = -1;
+
+ if (op_ret != 0) {
+ goto out;
+ }
+
+ ret = dict_get_str (dict, GF_XATTR_PATHINFO_KEY, &pathinfo);
+ if (ret != 0) {
+ goto out;
+ }
+
+ ret = afr_local_pathinfo (pathinfo, &is_local);
+ if (ret) {
+ goto out;
+ }
+
+ priv = this->private;
+ /*
+ * Note that one local subvolume will override another here. The only
+ * way to avoid that would be to retain extra information about whether
+ * the previous read_child is local, and it's just not worth it. Even
+ * the slowest local subvolume is far preferable to a remote one.
+ */
+ if (is_local) {
+ child_index = (int32_t)(long)cookie;
+ gf_log (this->name, GF_LOG_INFO,
+ "selecting local read_child %s",
+ priv->children[child_index]->name);
+ priv->read_child = child_index;
+ }
+
+out:
+ STACK_DESTROY(frame->root);
+ return 0;
+}
+
+static void
+afr_attempt_local_discovery (xlator_t *this, int32_t child_index)
+{
+ call_frame_t *newframe = NULL;
+ loc_t tmploc = {0,};
+ afr_private_t *priv = this->private;
+
+ newframe = create_frame(this,this->ctx->pool);
+ if (!newframe) {
+ return;
+ }
+
+ tmploc.gfid[sizeof(tmploc.gfid)-1] = 1;
+ STACK_WIND_COOKIE (newframe, afr_discovery_cbk,
+ (void *)(long)child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->getxattr,
+ &tmploc, GF_XATTR_PATHINFO_KEY, NULL);
+}
+
+static void
+afr_lookup_handle_success (afr_local_t *local, xlator_t *this, int32_t child_index,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ afr_private_t *priv = this->private;
+
+ if (local->success_count == 0) {
+ if (local->op_errno != ESTALE) {
+ local->op_ret = op_ret;
+ local->op_errno = 0;
+ }
+ afr_lookup_handle_first_success (local, this, inode, buf);
+ }
+ afr_lookup_update_lk_counts (local, this,
+ child_index, xattr);
+
+ afr_lookup_cache_args (local, child_index, xattr,
+ buf, postparent);
+
+ if (local->do_discovery && (priv->read_child == (-1))) {
+ afr_attempt_local_discovery(this,child_index);
+ }
+
+ local->cont.lookup.success_children[local->success_count] = child_index;
+ local->success_count++;
+}
+
+int
+afr_lookup_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ afr_local_t * local = NULL;
+ int call_count = -1;
+ int child_index = -1;
+
+ child_index = (long) cookie;
+
+ LOCK (&frame->lock);
+ {
+ local = frame->local;
+
+ if (op_ret == -1) {
+ afr_lookup_handle_error (local, op_ret, op_errno);
+ goto unlock;
+ }
+ afr_lookup_handle_success (local, this, child_index, op_ret,
+ op_errno, inode, buf, xattr,
+ postparent);
+
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+ if (call_count == 0) {
+ afr_lookup_done (frame, this);
+ }
+
+ return 0;
+}
+
+int
+afr_lookup_cont_init (afr_local_t *local, unsigned int child_count)
+{
+ int ret = -ENOMEM;
+ struct iatt *iatts = NULL;
+ int32_t *success_children = NULL;
+ int32_t *sources = NULL;
+ int32_t **pending_matrix = NULL;
+
+ GF_ASSERT (local);
+ local->cont.lookup.xattrs = GF_CALLOC (child_count,
+ sizeof (*local->cont.lookup.xattr),
+ gf_afr_mt_dict_t);
+ if (NULL == local->cont.lookup.xattrs)
+ goto out;
+
+ iatts = GF_CALLOC (child_count, sizeof (*iatts), gf_afr_mt_iatt);
+ if (NULL == iatts)
+ goto out;
+ local->cont.lookup.postparents = iatts;
+
+ iatts = GF_CALLOC (child_count, sizeof (*iatts), gf_afr_mt_iatt);
+ if (NULL == iatts)
+ goto out;
+ local->cont.lookup.bufs = iatts;
+
+ success_children = afr_children_create (child_count);
+ if (NULL == success_children)
+ goto out;
+ local->cont.lookup.success_children = success_children;
+
+ local->fresh_children = afr_children_create (child_count);
+ if (NULL == local->fresh_children)
+ goto out;
+
+ sources = GF_CALLOC (sizeof (*sources), child_count, gf_afr_mt_int32_t);
+ if (NULL == sources)
+ goto out;
+ local->cont.lookup.sources = sources;
+
+ pending_matrix = afr_matrix_create (child_count, child_count);
+ if (NULL == pending_matrix)
+ goto out;
+ local->cont.lookup.pending_matrix = pending_matrix;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+afr_lookup (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *xattr_req)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ void *gfid_req = NULL;
+ int ret = -1;
+ int i = 0;
+ int call_count = 0;
+ uint64_t ctx = 0;
+ int32_t op_errno = 0;
+ int allow_sh = 0;
+ priv = this->private;
+
+ AFR_LOCAL_ALLOC_OR_GOTO (local, out);
+
+ local->op_ret = -1;
+
+ frame->local = local;
+ local->fop = GF_FOP_LOOKUP;
+
+ loc_copy (&local->loc, loc);
+ ret = loc_path (&local->loc, NULL);
+ if (ret < 0) {
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ if (local->loc.path &&
+ (strcmp (local->loc.path, "/" GF_REPLICATE_TRASH_DIR) == 0)) {
+ op_errno = EPERM;
+ ret = -1;
+ goto out;
+ }
+
+ ret = inode_ctx_get (local->loc.inode, this, &ctx);
+ if (ret == 0) {
+ /* lookup is a revalidate */
+
+ local->read_child_index = afr_inode_get_read_ctx (this,
+ local->loc.inode,
+ NULL);
+ } else {
+ LOCK (&priv->read_child_lock);
+ {
+ if (priv->hash_mode) {
+ local->read_child_index = -1;
+ }
+ else {
+ local->read_child_index =
+ (++priv->read_child_rr) %
+ (priv->child_count);
+ }
+ }
+ UNLOCK (&priv->read_child_lock);
+ local->cont.lookup.fresh_lookup = _gf_true;
+ }
+
+ local->child_up = memdup (priv->child_up,
+ sizeof (*local->child_up) * priv->child_count);
+ if (NULL == local->child_up) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ ret = afr_lookup_cont_init (local, priv->child_count);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ local->call_count = afr_up_children_count (local->child_up,
+ priv->child_count);
+ call_count = local->call_count;
+ if (local->call_count == 0) {
+ ret = -1;
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ /* By default assume ENOTCONN. On success it will be set to 0. */
+ local->op_errno = ENOTCONN;
+
+ ret = dict_get_int32 (xattr_req, "allow-sh-for-running-transaction",
+ &allow_sh);
+ dict_del (xattr_req, "allow-sh-for-running-transaction");
+ local->allow_sh_for_running_transaction = allow_sh;
+
+ ret = afr_lookup_xattr_req_prepare (local, this, xattr_req, &local->loc,
+ &gfid_req);
+ if (ret) {
+ local->op_errno = -ret;
+ goto out;
+ }
+ afr_lookup_save_gfid (local->cont.lookup.gfid_req, gfid_req,
+ &local->loc);
+ local->fop = GF_FOP_LOOKUP;
+ if (priv->choose_local && !priv->did_discovery) {
+ if (gfid_req && __is_root_gfid(gfid_req)) {
+ local->do_discovery = _gf_true;
+ priv->did_discovery = _gf_true;
+ }
+ }
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_lookup_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->lookup,
+ &local->loc, local->xattr_req);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ ret = 0;
+out:
+ if (ret)
+ AFR_STACK_UNWIND (lookup, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+/* {{{ open */
+
+int
+__afr_fd_ctx_set (xlator_t *this, fd_t *fd)
+{
+ afr_private_t * priv = NULL;
+ int ret = -1;
+ uint64_t ctx = 0;
+ afr_fd_ctx_t * fd_ctx = NULL;
+
+ VALIDATE_OR_GOTO (this->private, out);
+ VALIDATE_OR_GOTO (fd, out);
+
+ priv = this->private;
+
+ ret = __fd_ctx_get (fd, this, &ctx);
+
+ if (ret == 0)
+ goto out;
+
+ fd_ctx = GF_CALLOC (1, sizeof (afr_fd_ctx_t),
+ gf_afr_mt_afr_fd_ctx_t);
+ if (!fd_ctx) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ fd_ctx->pre_op_done = GF_CALLOC (sizeof (*fd_ctx->pre_op_done),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!fd_ctx->pre_op_done) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ fd_ctx->pre_op_piggyback = GF_CALLOC (sizeof (*fd_ctx->pre_op_piggyback),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!fd_ctx->pre_op_piggyback) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ fd_ctx->opened_on = GF_CALLOC (sizeof (*fd_ctx->opened_on),
+ priv->child_count,
+ gf_afr_mt_int32_t);
+ if (!fd_ctx->opened_on) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ fd_ctx->lock_piggyback = GF_CALLOC (sizeof (*fd_ctx->lock_piggyback),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!fd_ctx->lock_piggyback) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ fd_ctx->lock_acquired = GF_CALLOC (sizeof (*fd_ctx->lock_acquired),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!fd_ctx->lock_acquired) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ fd_ctx->up_count = priv->up_count;
+ fd_ctx->down_count = priv->down_count;
+
+ fd_ctx->locked_on = GF_CALLOC (sizeof (*fd_ctx->locked_on),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!fd_ctx->locked_on) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ pthread_mutex_init (&fd_ctx->delay_lock, NULL);
+ INIT_LIST_HEAD (&fd_ctx->entries);
+ fd_ctx->call_child = -1;
+
+ INIT_LIST_HEAD (&fd_ctx->eager_locked);
+
+ ret = __fd_ctx_set (fd, this, (uint64_t)(long) fd_ctx);
+ if (ret)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "failed to set fd ctx (%p)", fd);
+out:
+ return ret;
+}
+
+
+int
+afr_fd_ctx_set (xlator_t *this, fd_t *fd)
+{
+ int ret = -1;
+
+ LOCK (&fd->lock);
+ {
+ ret = __afr_fd_ctx_set (this, fd);
+ }
+ UNLOCK (&fd->lock);
+
+ return ret;
+}
+
+/* {{{ flush */
+
+int
+afr_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ afr_local_t * local = NULL;
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret != -1) {
+ if (local->success_count == 0) {
+ local->op_ret = op_ret;
+ }
+ local->success_count++;
+ }
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND(flush, frame, local->op_ret,
+ local->op_errno, NULL);
+
+ return 0;
+}
+
+static int
+afr_flush_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ int i = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+
+ priv = this->private;
+ local = frame->local;
+ call_count = local->call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_flush_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->flush,
+ local->fd, NULL);
+ if (!--call_count)
+ break;
+
+ }
+ }
+
+ return 0;
+}
+
+int
+afr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+ int ret = -1;
+ int op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init(local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ local->fd = fd_ref(fd);
+ stub = fop_flush_stub (frame, afr_flush_wrapper, fd, xdata);
+ if (!stub) {
+ ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ afr_delayed_changelog_wake_resume (this, fd, stub);
+ ret = 0;
+
+out:
+ if (ret < 0)
+ AFR_STACK_UNWIND(flush, frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+/* }}} */
+
+
+int
+afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd)
+{
+ uint64_t ctx = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int ret = 0;
+
+ ret = fd_ctx_get (fd, this, &ctx);
+ if (ret < 0)
+ goto out;
+
+ fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+
+ if (fd_ctx) {
+ GF_FREE (fd_ctx->pre_op_done);
+
+ GF_FREE (fd_ctx->opened_on);
+
+ GF_FREE (fd_ctx->locked_on);
+
+ GF_FREE (fd_ctx->pre_op_piggyback);
+ GF_FREE (fd_ctx->lock_piggyback);
+
+ GF_FREE (fd_ctx->lock_acquired);
+
+ pthread_mutex_destroy (&fd_ctx->delay_lock);
+
+ GF_FREE (fd_ctx);
+ }
+
+out:
+ return 0;
+}
+
+
+int
+afr_release (xlator_t *this, fd_t *fd)
+{
+ afr_locked_fd_t *locked_fd = NULL;
+ afr_locked_fd_t *tmp = NULL;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ afr_cleanup_fd_ctx (this, fd);
+
+ list_for_each_entry_safe (locked_fd, tmp, &priv->saved_fds,
+ list) {
+
+ if (locked_fd->fd == fd) {
+ list_del_init (&locked_fd->list);
+ GF_FREE (locked_fd);
+ }
+
+ }
+
+ return 0;
+}
+
+
+/* {{{ fsync */
+
+int
+afr_fsync_unwind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ AFR_STACK_UNWIND (fsync, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
+
+int
+afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int call_count = -1;
+ int child_index = (long) cookie;
+ int read_child = 0;
+ call_stub_t *stub = NULL;
+
+ local = frame->local;
+
+ read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
+
+ LOCK (&frame->lock);
+ {
+ if (child_index == read_child) {
+ local->read_child_returned = _gf_true;
+ }
+
+ if (op_ret == 0) {
+ local->op_ret = 0;
+
+ if (local->success_count == 0) {
+ local->cont.inode_wfop.prebuf = *prebuf;
+ local->cont.inode_wfop.postbuf = *postbuf;
+ }
+
+ if (child_index == read_child) {
+ local->cont.inode_wfop.prebuf = *prebuf;
+ local->cont.inode_wfop.postbuf = *postbuf;
+ }
+
+ local->success_count++;
+ }
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ /* Make a stub out of the frame, and register it
+ with the waking up post-op. When the call-stub resumes,
+ we are guaranteed that there was no post-op pending
+ (i.e changelogs were unset in the server). This is an
+ essential "guarantee", that fsync() returns only after
+ completely finishing EVERYTHING, including the delayed
+ post-op. This guarantee is expected by FUSE graph switching
+ for example.
+ */
+ stub = fop_fsync_cbk_stub (frame, afr_fsync_unwind_cbk,
+ local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ xdata);
+ if (!stub) {
+ AFR_STACK_UNWIND (fsync, frame, -1, ENOMEM, 0, 0, 0);
+ return 0;
+ }
+
+ /* If no new unstable writes happened between the
+ time we cleared the unstable write witness flag in afr_fsync
+ and now, calling afr_delayed_changelog_wake_up() should
+ wake up and skip over the fsync phase and go straight to
+ afr_changelog_post_op_now()
+ */
+ afr_delayed_changelog_wake_resume (this, local->fd, stub);
+ }
+
+ return 0;
+}
+
+
+int
+afr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t datasync, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ int i = 0;
+ int32_t call_count = 0;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ call_count = local->call_count;
+
+ local->fd = fd_ref (fd);
+
+ if (afr_fd_has_witnessed_unstable_write (this, fd)) {
+ /* don't care. we only wanted to CLEAR the bit */
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_fsync_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->fsync,
+ fd, datasync, xdata);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ ret = 0;
+out:
+ if (ret < 0)
+ AFR_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+/* }}} */
+
+/* {{{ fsync */
+
+int32_t
+afr_fsyncdir_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == 0)
+ local->op_ret = 0;
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (fsyncdir, frame, local->op_ret,
+ local->op_errno, xdata);
+
+ return 0;
+}
+
+
+int32_t
+afr_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t datasync, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ int i = 0;
+ int32_t call_count = 0;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ call_count = local->call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND (frame, afr_fsyncdir_cbk,
+ priv->children[i],
+ priv->children[i]->fops->fsyncdir,
+ fd, datasync, xdata);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ ret = 0;
+out:
+ if (ret < 0)
+ AFR_STACK_UNWIND (fsyncdir, frame, -1, op_errno, NULL);
+ return 0;
+}
+
+/* }}} */
+
+/* {{{ xattrop */
+
+int32_t
+afr_xattrop_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xattr, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == 0) {
+ if (!local->cont.xattrop.xattr)
+ local->cont.xattrop.xattr = dict_ref (xattr);
+ local->op_ret = 0;
+ }
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (xattrop, frame, local->op_ret, local->op_errno,
+ local->cont.xattrop.xattr, xdata);
+
+ return 0;
+}
+
+
+int32_t
+afr_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ int i = 0;
+ int32_t call_count = 0;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ call_count = local->call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND (frame, afr_xattrop_cbk,
+ priv->children[i],
+ priv->children[i]->fops->xattrop,
+ loc, optype, xattr, xdata);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ ret = 0;
+out:
+ if (ret < 0)
+ AFR_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
+
+/* }}} */
+
+/* {{{ fxattrop */
+
+int32_t
+afr_fxattrop_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xattr, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == 0) {
+ if (!local->cont.fxattrop.xattr)
+ local->cont.fxattrop.xattr = dict_ref (xattr);
+
+ local->op_ret = 0;
+ }
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (fxattrop, frame, local->op_ret, local->op_errno,
+ local->cont.fxattrop.xattr, xdata);
+
+ return 0;
+}
+
+
+int32_t
+afr_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ int i = 0;
+ int32_t call_count = 0;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ call_count = local->call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND (frame, afr_fxattrop_cbk,
+ priv->children[i],
+ priv->children[i]->fops->fxattrop,
+ fd, optype, xattr, xdata);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ ret = 0;
+out:
+ if (ret < 0)
+ AFR_STACK_UNWIND (fxattrop, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
+
+/* }}} */
+
+
+int32_t
+afr_inodelk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
+
+{
+ afr_local_t *local = NULL;
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == 0)
+ local->op_ret = 0;
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (inodelk, frame, local->op_ret,
+ local->op_errno, xdata);
+
+ return 0;
+}
+
+
+int32_t
+afr_inodelk (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ int i = 0;
+ int32_t call_count = 0;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ call_count = local->call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND (frame, afr_inodelk_cbk,
+ priv->children[i],
+ priv->children[i]->fops->inodelk,
+ volume, loc, cmd, flock, xdata);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ ret = 0;
+out:
+ if (ret < 0)
+ AFR_STACK_UNWIND (inodelk, frame, -1, op_errno, NULL);
+ return 0;
+}
+
+
+int32_t
+afr_finodelk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
+
+{
+ afr_local_t *local = NULL;
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == 0)
+ local->op_ret = 0;
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (finodelk, frame, local->op_ret,
+ local->op_errno, xdata);
+
+ return 0;
+}
+
+
+int32_t
+afr_finodelk (call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *flock,
+ dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ int i = 0;
+ int32_t call_count = 0;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ call_count = local->call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND (frame, afr_finodelk_cbk,
+ priv->children[i],
+ priv->children[i]->fops->finodelk,
+ volume, fd, cmd, flock, xdata);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ ret = 0;
+out:
+ if (ret < 0)
+ AFR_STACK_UNWIND (finodelk, frame, -1, op_errno, NULL);
+ return 0;
+}
+
+
+int32_t
+afr_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == 0)
+ local->op_ret = 0;
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (entrylk, frame, local->op_ret,
+ local->op_errno, xdata);
+
+ return 0;
+}
+
+
+int32_t
+afr_entrylk (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ int i = 0;
+ int32_t call_count = 0;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ call_count = local->call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND (frame, afr_entrylk_cbk,
+ priv->children[i],
+ priv->children[i]->fops->entrylk,
+ volume, loc, basename, cmd, type, xdata);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ ret = 0;
+out:
+ if (ret < 0)
+ AFR_STACK_UNWIND (entrylk, frame, -1, op_errno, NULL);
+ return 0;
+}
+
+
+
+int32_t
+afr_fentrylk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
+
+{
+ afr_local_t *local = NULL;
+ int call_count = -1;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == 0)
+ local->op_ret = 0;
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (fentrylk, frame, local->op_ret,
+ local->op_errno, xdata);
+
+ return 0;
+}
+
+
+int32_t
+afr_fentrylk (call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd,
+ const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ int i = 0;
+ int32_t call_count = 0;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ call_count = local->call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND (frame, afr_fentrylk_cbk,
+ priv->children[i],
+ priv->children[i]->fops->fentrylk,
+ volume, fd, basename, cmd, type, xdata);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ ret = 0;
+out:
+ if (ret < 0)
+ AFR_STACK_UNWIND (fentrylk, frame, -1, op_errno, NULL);
+ return 0;
+}
+
+int32_t
+afr_statfs_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ struct statvfs *statvfs, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int call_count = 0;
+
+ LOCK (&frame->lock);
+ {
+ local = frame->local;
+
+ if (op_ret == 0) {
+ local->op_ret = op_ret;
+
+ if (local->cont.statfs.buf_set) {
+ if (statvfs->f_bavail < local->cont.statfs.buf.f_bavail)
+ local->cont.statfs.buf = *statvfs;
+ } else {
+ local->cont.statfs.buf = *statvfs;
+ local->cont.statfs.buf_set = 1;
+ }
+ }
+
+ if (op_ret == -1)
+ local->op_errno = op_errno;
+
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (statfs, frame, local->op_ret, local->op_errno,
+ &local->cont.statfs.buf, xdata);
+
+ return 0;
+}
+
+
+int32_t
+afr_statfs (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *xdata)
+{
+ afr_private_t * priv = NULL;
+ int child_count = 0;
+ afr_local_t * local = NULL;
+ int i = 0;
+ int ret = -1;
+ int call_count = 0;
+ int32_t op_errno = 0;
+
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+ VALIDATE_OR_GOTO (loc, out);
+
+ priv = this->private;
+ child_count = priv->child_count;
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ call_count = local->call_count;
+
+ for (i = 0; i < child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND (frame, afr_statfs_cbk,
+ priv->children[i],
+ priv->children[i]->fops->statfs,
+ loc, xdata);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ ret = 0;
+out:
+ if (ret < 0)
+ AFR_STACK_UNWIND (statfs, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
+
+
+int32_t
+afr_lk_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
+{
+ afr_local_t * local = NULL;
+ int call_count = -1;
+
+ local = frame->local;
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
+ lock, xdata);
+
+ return 0;
+}
+
+
+int32_t
+afr_lk_unlock (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ int i = 0;
+ int call_count = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_locked_nodes_count (local->cont.lk.locked_nodes,
+ priv->child_count);
+
+ if (call_count == 0) {
+ AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
+ &local->cont.lk.ret_flock, NULL);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ local->cont.lk.user_flock.l_type = F_UNLCK;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->cont.lk.locked_nodes[i]) {
+ STACK_WIND (frame, afr_lk_unlock_cbk,
+ priv->children[i],
+ priv->children[i]->fops->lk,
+ local->fd, F_SETLK,
+ &local->cont.lk.user_flock, NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+int32_t
+afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int child_index = -1;
+/* int ret = 0; */
+
+
+ local = frame->local;
+ priv = this->private;
+
+ child_index = (long) cookie;
+
+ if (!child_went_down (op_ret, op_errno) && (op_ret == -1)) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+
+ afr_lk_unlock (frame, this);
+ return 0;
+ }
+
+ if (op_ret == 0) {
+ local->op_ret = 0;
+ local->op_errno = 0;
+ local->cont.lk.locked_nodes[child_index] = 1;
+ local->cont.lk.ret_flock = *lock;
+ }
+
+ child_index++;
+
+ if (child_index < priv->child_count) {
+ STACK_WIND_COOKIE (frame, afr_lk_cbk, (void *) (long) child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->lk,
+ local->fd, local->cont.lk.cmd,
+ &local->cont.lk.user_flock, xdata);
+ } else if (local->op_ret == -1) {
+ /* all nodes have gone down */
+
+ AFR_STACK_UNWIND (lk, frame, -1, ENOTCONN,
+ &local->cont.lk.ret_flock, NULL);
+ } else {
+ /* locking has succeeded on all nodes that are up */
+
+ /* temporarily
+ ret = afr_mark_locked_nodes (this, local->fd,
+ local->cont.lk.locked_nodes);
+ if (ret)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Could not save locked nodes info in fdctx");
+
+ ret = afr_save_locked_fd (this, local->fd);
+ if (ret)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Could not save locked fd");
+
+ */
+ AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
+ &local->cont.lk.ret_flock, NULL);
+ }
+
+ return 0;
+}
+
+
+int
+afr_lk (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
+ int32_t op_errno = 0;
+ int ret = -1;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ local->cont.lk.locked_nodes = GF_CALLOC (priv->child_count,
+ sizeof (*local->cont.lk.locked_nodes),
+ gf_afr_mt_char);
+
+ if (!local->cont.lk.locked_nodes) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->fd = fd_ref (fd);
+ local->cont.lk.cmd = cmd;
+ local->cont.lk.user_flock = *flock;
+ local->cont.lk.ret_flock = *flock;
+
+ STACK_WIND_COOKIE (frame, afr_lk_cbk, (void *) (long) 0,
+ priv->children[i],
+ priv->children[i]->fops->lk,
+ fd, cmd, flock, xdata);
+
+ ret = 0;
+out:
+ if (ret < 0)
+ AFR_STACK_UNWIND (lk, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
+
+int
+afr_forget (xlator_t *this, inode_t *inode)
+{
+ uint64_t ctx_addr = 0;
+ afr_inode_ctx_t *ctx = NULL;
+
+ inode_ctx_get (inode, this, &ctx_addr);
+
+ if (!ctx_addr)
+ goto out;
+
+ ctx = (afr_inode_ctx_t *)(long)ctx_addr;
+ GF_FREE (ctx->fresh_children);
+ GF_FREE (ctx);
+out:
+ return 0;
+}
+
+int
+afr_priv_dump (xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+ char key[GF_DUMP_MAX_BUF_LEN];
+ int i = 0;
+
+
+ GF_ASSERT (this);
+ priv = this->private;
+
+ GF_ASSERT (priv);
+ snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name);
+ gf_proc_dump_add_section(key_prefix);
+ gf_proc_dump_write("child_count", "%u", priv->child_count);
+ gf_proc_dump_write("read_child_rr", "%u", priv->read_child_rr);
+ for (i = 0; i < priv->child_count; i++) {
+ sprintf (key, "child_up[%d]", i);
+ gf_proc_dump_write(key, "%d", priv->child_up[i]);
+ sprintf (key, "pending_key[%d]", i);
+ gf_proc_dump_write(key, "%s", priv->pending_key[i]);
+ }
+ gf_proc_dump_write("data_self_heal", "%s", priv->data_self_heal);
+ gf_proc_dump_write("metadata_self_heal", "%d", priv->metadata_self_heal);
+ gf_proc_dump_write("entry_self_heal", "%d", priv->entry_self_heal);
+ gf_proc_dump_write("data_change_log", "%d", priv->data_change_log);
+ gf_proc_dump_write("metadata_change_log", "%d", priv->metadata_change_log);
+ gf_proc_dump_write("entry-change_log", "%d", priv->entry_change_log);
+ gf_proc_dump_write("read_child", "%d", priv->read_child);
+ gf_proc_dump_write("favorite_child", "%d", priv->favorite_child);
+ gf_proc_dump_write("wait_count", "%u", priv->wait_count);
+
+ return 0;
+}
+
+
+/**
+ * find_child_index - find the child's index in the array of subvolumes
+ * @this: AFR
+ * @child: child
+ */
+
+static int
+find_child_index (xlator_t *this, xlator_t *child)
+{
+ afr_private_t *priv = NULL;
+ int i = -1;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if ((xlator_t *) child == priv->children[i])
+ break;
+ }
+
+ return i;
+}
+
+int32_t
+afr_notify (xlator_t *this, int32_t event,
+ void *data, void *data2)
+{
+ afr_private_t *priv = NULL;
+ int i = -1;
+ int up_children = 0;
+ int down_children = 0;
+ int propagate = 0;
+ int had_heard_from_all = 0;
+ int have_heard_from_all = 0;
+ int idx = -1;
+ int ret = -1;
+ int call_psh = 0;
+ int up_child = AFR_ALL_CHILDREN;
+ dict_t *input = NULL;
+ dict_t *output = NULL;
+
+ priv = this->private;
+
+ if (!priv)
+ return 0;
+
+ /*
+ * We need to reset this in case children come up in "staggered"
+ * fashion, so that we discover a late-arriving local subvolume. Note
+ * that we could end up issuing N lookups to the first subvolume, and
+ * O(N^2) overall, but N is small for AFR so it shouldn't be an issue.
+ */
+ priv->did_discovery = _gf_false;
+
+ had_heard_from_all = 1;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!priv->last_event[i]) {
+ had_heard_from_all = 0;
+ }
+ }
+
+ /* parent xlators dont need to know about every child_up, child_down
+ * because of afr ha. If all subvolumes go down, child_down has
+ * to be triggered. In that state when 1 subvolume comes up child_up
+ * needs to be triggered. dht optimizes revalidate lookup by sending
+ * it only to one of its subvolumes. When child up/down happens
+ * for afr's subvolumes dht should be notified by child_modified. The
+ * subsequent revalidate lookup happens on all the dht's subvolumes
+ * which triggers afr self-heals if any.
+ */
+ idx = find_child_index (this, data);
+ if (idx < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Received child_up "
+ "from invalid subvolume");
+ goto out;
+ }
+
+ switch (event) {
+ case GF_EVENT_CHILD_UP:
+ LOCK (&priv->lock);
+ {
+ /*
+ * This only really counts if the child was never up
+ * (value = -1) or had been down (value = 0). See
+ * comment at GF_EVENT_CHILD_DOWN for a more detailed
+ * explanation.
+ */
+ if (priv->child_up[idx] != 1) {
+ priv->up_count++;
+ }
+ priv->child_up[idx] = 1;
+
+ call_psh = 1;
+ up_child = idx;
+ for (i = 0; i < priv->child_count; i++)
+ if (priv->child_up[i] == 1)
+ up_children++;
+ if (up_children == 1) {
+ gf_log (this->name, GF_LOG_INFO,
+ "Subvolume '%s' came back up; "
+ "going online.", ((xlator_t *)data)->name);
+ } else {
+ event = GF_EVENT_CHILD_MODIFIED;
+ }
+
+ priv->last_event[idx] = event;
+ }
+ UNLOCK (&priv->lock);
+
+ break;
+
+ case GF_EVENT_CHILD_DOWN:
+ LOCK (&priv->lock);
+ {
+ /*
+ * If a brick is down when we start, we'll get a
+ * CHILD_DOWN to indicate its initial state. There
+ * was never a CHILD_UP in this case, so if we
+ * increment "down_count" the difference between than
+ * and "up_count" will no longer be the number of
+ * children that are currently up. This has serious
+ * implications e.g. for quorum enforcement, so we
+ * don't increment these values unless the event
+ * represents an actual state transition between "up"
+ * (value = 1) and anything else.
+ */
+ if (priv->child_up[idx] == 1) {
+ priv->down_count++;
+ }
+ priv->child_up[idx] = 0;
+
+ for (i = 0; i < priv->child_count; i++)
+ if (priv->child_up[i] == 0)
+ down_children++;
+ if (down_children == priv->child_count) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "All subvolumes are down. Going offline "
+ "until atleast one of them comes back up.");
+ } else {
+ event = GF_EVENT_CHILD_MODIFIED;
+ }
+
+ priv->last_event[idx] = event;
+ }
+ UNLOCK (&priv->lock);
+
+ break;
+
+ case GF_EVENT_CHILD_CONNECTING:
+ LOCK (&priv->lock);
+ {
+ priv->last_event[idx] = event;
+ }
+ UNLOCK (&priv->lock);
+
+ break;
+
+ case GF_EVENT_TRANSLATOR_OP:
+ input = data;
+ output = data2;
+ ret = afr_xl_op (this, input, output);
+ goto out;
+ break;
+
+ default:
+ propagate = 1;
+ break;
+ }
+
+ /* have all subvolumes reported status once by now? */
+ have_heard_from_all = 1;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!priv->last_event[i])
+ have_heard_from_all = 0;
+ }
+
+ /* if all subvols have reported status, no need to hide anything
+ or wait for anything else. Just propagate blindly */
+ if (have_heard_from_all)
+ propagate = 1;
+
+ if (!had_heard_from_all && have_heard_from_all) {
+ /* This is the first event which completes aggregation
+ of events from all subvolumes. If at least one subvol
+ had come up, propagate CHILD_UP, but only this time
+ */
+ event = GF_EVENT_CHILD_DOWN;
+
+ LOCK (&priv->lock);
+ {
+ up_children = afr_up_children_count (priv->child_up,
+ priv->child_count);
+ for (i = 0; i < priv->child_count; i++) {
+ if (priv->last_event[i] == GF_EVENT_CHILD_UP) {
+ event = GF_EVENT_CHILD_UP;
+ break;
+ }
+
+ if (priv->last_event[i] ==
+ GF_EVENT_CHILD_CONNECTING) {
+ event = GF_EVENT_CHILD_CONNECTING;
+ /* continue to check other events for CHILD_UP */
+ }
+ }
+ }
+ UNLOCK (&priv->lock);
+ }
+
+ ret = 0;
+ if (propagate)
+ ret = default_notify (this, event, data);
+ if (call_psh && priv->shd.iamshd)
+ afr_proactive_self_heal ((void*) (long) up_child);
+
+out:
+ return ret;
+}
+
+int
+afr_first_up_child (unsigned char *child_up, size_t child_count)
+{
+ int ret = -1;
+ int i = 0;
+
+ GF_ASSERT (child_up);
+
+ for (i = 0; i < child_count; i++) {
+ if (child_up[i]) {
+ ret = i;
+ break;
+ }
+ }
+
+ return ret;
+}
+
+int
+afr_local_init (afr_local_t *local, afr_private_t *priv, int32_t *op_errno)
+{
+ int ret = -1;
+
+ local->op_ret = -1;
+ local->op_errno = EUCLEAN;
+
+ local->child_up = GF_CALLOC (priv->child_count,
+ sizeof (*local->child_up),
+ gf_afr_mt_char);
+ if (!local->child_up) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ memcpy (local->child_up, priv->child_up,
+ sizeof (*local->child_up) * priv->child_count);
+ local->call_count = afr_up_children_count (local->child_up,
+ priv->child_count);
+ if (local->call_count == 0) {
+ gf_log (THIS->name, GF_LOG_INFO, "no subvolumes up");
+ if (op_errno)
+ *op_errno = ENOTCONN;
+ goto out;
+ }
+
+ local->child_errno = GF_CALLOC (priv->child_count,
+ sizeof (*local->child_errno),
+ gf_afr_mt_int32_t);
+ if (!local->child_errno) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->transaction.postop_piggybacked = GF_CALLOC (priv->child_count,
+ sizeof (int),
+ gf_afr_mt_int32_t);
+ if (!local->transaction.postop_piggybacked) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->append_write = _gf_false;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+afr_internal_lock_init (afr_internal_lock_t *lk, size_t child_count,
+ transaction_lk_type_t lk_type)
+{
+ int ret = -ENOMEM;
+
+ lk->locked_nodes = GF_CALLOC (sizeof (*lk->locked_nodes),
+ child_count, gf_afr_mt_char);
+ if (NULL == lk->locked_nodes)
+ goto out;
+
+ lk->lower_locked_nodes = GF_CALLOC (sizeof (*lk->lower_locked_nodes),
+ child_count, gf_afr_mt_char);
+ if (NULL == lk->lower_locked_nodes)
+ goto out;
+
+ lk->lock_op_ret = -1;
+ lk->lock_op_errno = EUCLEAN;
+ lk->transaction_lk_type = lk_type;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+void
+afr_matrix_cleanup (int32_t **matrix, unsigned int m)
+{
+ int i = 0;
+
+ if (!matrix)
+ goto out;
+ for (i = 0; i < m; i++) {
+ GF_FREE (matrix[i]);
+ }
+
+ GF_FREE (matrix);
+out:
+ return;
+}
+
+int32_t**
+afr_matrix_create (unsigned int m, unsigned int n)
+{
+ int32_t **matrix = NULL;
+ int i = 0;
+
+ matrix = GF_CALLOC (sizeof (*matrix), m, gf_afr_mt_int32_t);
+ if (!matrix)
+ goto out;
+
+ for (i = 0; i < m; i++) {
+ matrix[i] = GF_CALLOC (sizeof (*matrix[i]), n,
+ gf_afr_mt_int32_t);
+ if (!matrix[i])
+ goto out;
+ }
+ return matrix;
+out:
+ afr_matrix_cleanup (matrix, m);
+ return NULL;
+}
+
+int
+afr_inodelk_init (afr_inodelk_t *lk, char *dom, size_t child_count)
+{
+ int ret = -ENOMEM;
+
+ lk->domain = dom;
+ lk->locked_nodes = GF_CALLOC (sizeof (*lk->locked_nodes),
+ child_count, gf_afr_mt_char);
+ if (NULL == lk->locked_nodes)
+ goto out;
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+afr_transaction_local_init (afr_local_t *local, xlator_t *this)
+{
+ int child_up_count = 0;
+ int ret = -ENOMEM;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ ret = afr_internal_lock_init (&local->internal_lock, priv->child_count,
+ AFR_TRANSACTION_LK);
+ if (ret < 0)
+ goto out;
+
+ if ((local->transaction.type == AFR_DATA_TRANSACTION) ||
+ (local->transaction.type == AFR_METADATA_TRANSACTION)) {
+ ret = afr_inodelk_init (&local->internal_lock.inodelk[0],
+ this->name, priv->child_count);
+ if (ret < 0)
+ goto out;
+ }
+
+ ret = -ENOMEM;
+ child_up_count = afr_up_children_count (local->child_up,
+ priv->child_count);
+ if (priv->optimistic_change_log && child_up_count == priv->child_count)
+ local->optimistic_change_log = 1;
+
+ local->first_up_child = afr_first_up_child (local->child_up,
+ priv->child_count);
+
+ local->transaction.eager_lock =
+ GF_CALLOC (sizeof (*local->transaction.eager_lock),
+ priv->child_count,
+ gf_afr_mt_int32_t);
+
+ if (!local->transaction.eager_lock)
+ goto out;
+
+ local->fresh_children = afr_children_create (priv->child_count);
+ if (!local->fresh_children)
+ goto out;
+
+ local->transaction.pre_op = GF_CALLOC (sizeof (*local->transaction.pre_op),
+ priv->child_count,
+ gf_afr_mt_char);
+ if (!local->transaction.pre_op)
+ goto out;
+
+ local->pending = afr_matrix_create (priv->child_count,
+ AFR_NUM_CHANGE_LOGS);
+ if (!local->pending)
+ goto out;
+
+ local->transaction.txn_changelog = afr_matrix_create (priv->child_count,
+ AFR_NUM_CHANGE_LOGS);
+ if (!local->transaction.txn_changelog)
+ goto out;
+
+ INIT_LIST_HEAD (&local->transaction.eager_locked);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+void
+afr_reset_children (int32_t *fresh_children, int32_t child_count)
+{
+ unsigned int i = 0;
+ for (i = 0; i < child_count; i++)
+ fresh_children[i] = -1;
+}
+
+int32_t*
+afr_children_create (int32_t child_count)
+{
+ int32_t *children = NULL;
+ int i = 0;
+
+ GF_ASSERT (child_count > 0);
+
+ children = GF_CALLOC (child_count, sizeof (*children),
+ gf_afr_mt_int32_t);
+ if (NULL == children)
+ goto out;
+ for (i = 0; i < child_count; i++)
+ children[i] = -1;
+out:
+ return children;
+}
+
+void
+afr_children_add_child (int32_t *children, int32_t child,
+ int32_t child_count)
+{
+ gf_boolean_t child_found = _gf_false;
+ int i = 0;
+
+ for (i = 0; i < child_count; i++) {
+ if (children[i] == -1)
+ break;
+ if (children[i] == child) {
+ child_found = _gf_true;
+ break;
+ }
+ }
+
+ if (!child_found) {
+ GF_ASSERT (i < child_count);
+ children[i] = child;
+ }
+}
+
+void
+afr_children_rm_child (int32_t *children, int32_t child, int32_t child_count)
+{
+ int i = 0;
+
+ GF_ASSERT ((child >= 0) && (child < child_count));
+ for (i = 0; i < child_count; i++) {
+ if (children[i] == -1)
+ break;
+ if (children[i] == child) {
+ if (i != (child_count - 1))
+ memmove (children + i, children + i + 1,
+ sizeof (*children)*(child_count - i - 1));
+ children[child_count - 1] = -1;
+ break;
+ }
+ }
+}
+
+int
+afr_get_children_count (int32_t *children, unsigned int child_count)
+{
+ int count = 0;
+ int i = 0;
+
+ for (i = 0; i < child_count; i++) {
+ if (children[i] == -1)
+ break;
+ count++;
+ }
+ return count;
+}
+
+void
+afr_set_low_priority (call_frame_t *frame)
+{
+ frame->root->pid = LOW_PRIO_PROC_PID;
+}
+
+int
+afr_child_fd_ctx_set (xlator_t *this, fd_t *fd, int32_t child,
+ int flags)
+{
+ int ret = 0;
+ uint64_t ctx = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+ GF_ASSERT (fd && fd->inode);
+ ret = afr_fd_ctx_set (this, fd);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not set fd ctx for fd=%p", fd);
+ goto out;
+ }
+
+ ret = fd_ctx_get (fd, this, &ctx);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not get fd ctx for fd=%p", fd);
+ goto out;
+ }
+
+ fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+ fd_ctx->opened_on[child] = AFR_FD_OPENED;
+ if (!IA_ISDIR (fd->inode->ia_type)) {
+ fd_ctx->flags = flags;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+gf_boolean_t
+afr_have_quorum (char *logname, afr_private_t *priv)
+{
+ unsigned int quorum = 0;
+
+ GF_VALIDATE_OR_GOTO(logname,priv,out);
+
+ quorum = priv->quorum_count;
+ if (quorum != AFR_QUORUM_AUTO) {
+ return (priv->up_count >= (priv->down_count + quorum));
+ }
+
+ quorum = priv->child_count / 2 + 1;
+ if (priv->up_count >= (priv->down_count + quorum)) {
+ return _gf_true;
+ }
+
+ /*
+ * Special case for even numbers of nodes: if we have exactly half
+ * and that includes the first ("senior-most") node, then that counts
+ * as quorum even if it wouldn't otherwise. This supports e.g. N=2
+ * while preserving the critical property that there can only be one
+ * such group.
+ */
+ if ((priv->child_count % 2) == 0) {
+ quorum = priv->child_count / 2;
+ if (priv->up_count >= (priv->down_count + quorum)) {
+ if (priv->child_up[0]) {
+ return _gf_true;
+ }
+ }
+ }
+
+out:
+ return _gf_false;
+}
+
+void
+afr_priv_destroy (afr_private_t *priv)
+{
+ int i = 0;
+
+ if (!priv)
+ goto out;
+ inode_unref (priv->root_inode);
+ GF_FREE (priv->shd.pos);
+ GF_FREE (priv->shd.pending);
+ GF_FREE (priv->shd.inprogress);
+// for (i = 0; i < priv->child_count; i++)
+// if (priv->shd.timer && priv->shd.timer[i])
+// gf_timer_call_cancel (this->ctx, priv->shd.timer[i]);
+ GF_FREE (priv->shd.timer);
+
+ if (priv->shd.healed)
+ eh_destroy (priv->shd.healed);
+
+ if (priv->shd.heal_failed)
+ eh_destroy (priv->shd.heal_failed);
+
+ if (priv->shd.split_brain)
+ eh_destroy (priv->shd.split_brain);
+
+ for (i = 0; i < priv->child_count; i++)
+ {
+ if (priv->shd.statistics[i])
+ eh_destroy (priv->shd.statistics[i]);
+ }
+
+ GF_FREE (priv->shd.statistics);
+
+ GF_FREE (priv->shd.crawl_events);
+
+ GF_FREE (priv->last_event);
+ if (priv->pending_key) {
+ for (i = 0; i < priv->child_count; i++)
+ GF_FREE (priv->pending_key[i]);
+ }
+ GF_FREE (priv->pending_key);
+ GF_FREE (priv->children);
+ GF_FREE (priv->child_up);
+ LOCK_DESTROY (&priv->lock);
+ LOCK_DESTROY (&priv->read_child_lock);
+ pthread_mutex_destroy (&priv->mutex);
+ GF_FREE (priv);
+out:
+ return;
+}
+
+int
+xlator_subvolume_count (xlator_t *this)
+{
+ int i = 0;
+ xlator_list_t *list = NULL;
+
+ for (list = this->children; list; list = list->next)
+ i++;
+ return i;
+}
+
+inline gf_boolean_t
+afr_is_errno_set (int *child_errno, int child)
+{
+ return child_errno[child];
+}
+
+inline gf_boolean_t
+afr_is_errno_unset (int *child_errno, int child)
+{
+ return !afr_is_errno_set (child_errno, child);
+}
+
+void
+afr_prepare_new_entry_pending_matrix (int32_t **pending,
+ gf_boolean_t (*is_pending) (int *, int),
+ int *ctx, struct iatt *buf,
+ unsigned int child_count)
+{
+ int midx = 0;
+ int idx = 0;
+ int i = 0;
+
+ midx = afr_index_for_transaction_type (AFR_METADATA_TRANSACTION);
+ if (IA_ISDIR (buf->ia_type))
+ idx = afr_index_for_transaction_type (AFR_ENTRY_TRANSACTION);
+ else if (IA_ISREG (buf->ia_type))
+ idx = afr_index_for_transaction_type (AFR_DATA_TRANSACTION);
+ else
+ idx = -1;
+ for (i = 0; i < child_count; i++) {
+ if (is_pending (ctx, i)) {
+ pending[i][midx] = hton32 (1);
+ if (idx == -1)
+ continue;
+ pending[i][idx] = hton32 (1);
+ }
+ }
+}
+
+gf_boolean_t
+afr_is_fd_fixable (fd_t *fd)
+{
+ if (!fd || !fd->inode)
+ return _gf_false;
+ else if (fd_is_anonymous (fd))
+ return _gf_false;
+ else if (uuid_is_null (fd->inode->gfid))
+ return _gf_false;
+
+ return _gf_true;
+}
+
+void
+afr_handle_open_fd_count (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ inode_t *inode = NULL;
+ afr_inode_ctx_t *ctx = NULL;
+
+ local = frame->local;
+
+ if (local->fd)
+ inode = local->fd->inode;
+ else
+ inode = local->loc.inode;
+
+ if (!inode)
+ return;
+
+ LOCK (&inode->lock);
+ {
+ ctx = __afr_inode_ctx_get (inode, this);
+ ctx->open_fd_count = local->open_fd_count;
+ }
+ UNLOCK (&inode->lock);
+}
+
+int
+afr_initialise_statistics (xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int ret = -1;
+ int i = 0;
+ int child_count = 0;
+ eh_t *stats_per_brick = NULL;
+ shd_crawl_event_t ***shd_crawl_events = NULL;
+ priv = this->private;
+
+ priv->shd.statistics = GF_CALLOC (sizeof(eh_t *), priv->child_count,
+ gf_common_mt_eh_t);
+ if (!priv->shd.statistics) {
+ ret = -1;
+ goto out;
+ }
+ child_count = priv->child_count;
+ for (i=0; i < child_count ; i++) {
+ stats_per_brick = eh_new (AFR_STATISTICS_HISTORY_SIZE,
+ _gf_false,
+ _destroy_crawl_event_data);
+ if (!stats_per_brick) {
+ ret = -1;
+ goto out;
+ }
+ priv->shd.statistics[i] = stats_per_brick;
+
+ }
+
+ shd_crawl_events = (shd_crawl_event_t***)(&priv->shd.crawl_events);
+ *shd_crawl_events = GF_CALLOC (sizeof(shd_crawl_event_t*),
+ priv->child_count,
+ gf_afr_mt_shd_crawl_event_t);
+
+ if (!priv->shd.crawl_events) {
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
+
+}
diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c
index f1638c740..689dd84e6 100644
--- a/xlators/cluster/afr/src/afr-dir-read.c
+++ b/xlators/cluster/afr/src/afr-dir-read.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -24,6 +15,7 @@
#include <sys/time.h>
#include <stdlib.h>
#include <signal.h>
+#include <string.h>
#ifndef _CONFIG_H
#define _CONFIG_H
@@ -42,293 +34,512 @@
#include "common-utils.h"
#include "compat-errno.h"
#include "compat.h"
+#include "checksum.h"
#include "afr.h"
+#include "afr-self-heal.h"
+#include "afr-self-heal-common.h"
-
-int32_t
-afr_opendir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- fd_t *fd)
+int
+afr_examine_dir_sh_unwind (call_frame_t *frame, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, int32_t sh_failed)
{
- afr_local_t * local = NULL;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ afr_set_opendir_done (this, local->fd->inode);
- int call_count = -1;
+ AFR_STACK_UNWIND (opendir, frame, local->op_ret,
+ local->op_errno, local->fd, NULL);
- LOCK (&frame->lock);
- {
- local = frame->local;
+ return 0;
+}
- if (op_ret == 0)
- local->op_ret = 0;
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+gf_boolean_t
+__checksums_differ (uint32_t *checksum, int child_count,
+ unsigned char *child_up)
+{
+ int ret = _gf_false;
+ int i = 0;
+ uint32_t cksum = 0;
+ gf_boolean_t activate_check = _gf_false;
+
+ for (i = 0; i < child_count; i++) {
+ if (!child_up[i])
+ continue;
+ if (_gf_false == activate_check) {
+ cksum = checksum[i];
+ activate_check = _gf_true;
+ continue;
+ }
- call_count = afr_frame_return (frame);
+ if (cksum != checksum[i]) {
+ ret = _gf_true;
+ break;
+ }
- if (call_count == 0) {
- AFR_STACK_UNWIND (frame, local->op_ret,
- local->op_errno, local->fd);
- }
+ cksum = checksum[i];
+ }
- return 0;
+ return ret;
}
-int32_t
-afr_opendir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, fd_t *fd)
+int32_t
+afr_examine_dir_readdir_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ gf_dirent_t *entries, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ afr_self_heal_t * sh = NULL;
+ gf_dirent_t * entry = NULL;
+ gf_dirent_t * tmp = NULL;
+ char *reason = NULL;
+ int child_index = 0;
+ uint32_t entry_cksum = 0;
+ int call_count = 0;
+ off_t last_offset = 0;
+ inode_t *inode = NULL;
+
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+ inode = local->fd->inode;
- int child_count = 0;
- int i = 0;
+ child_index = (long) cookie;
- int ret = -1;
- int call_count = -1;
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_INFO,
+ "%s: failed to do opendir on %s",
+ local->loc.path, priv->children[child_index]->name);
+ local->op_ret = -1;
+ local->op_ret = op_errno;
+ goto out;
+ }
+
+ if (op_ret == 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s: no entries found in %s",
+ local->loc.path, priv->children[child_index]->name);
+ goto out;
+ }
+
+ list_for_each_entry_safe (entry, tmp, &entries->list, list) {
+ entry_cksum = gf_rsync_weak_checksum ((unsigned char *)entry->d_name,
+ strlen (entry->d_name));
+ local->cont.opendir.checksum[child_index] ^= entry_cksum;
+ }
+
+ list_for_each_entry (entry, &entries->list, list) {
+ last_offset = entry->d_off;
+ }
+
+ /* read more entries */
+
+ STACK_WIND_COOKIE (frame, afr_examine_dir_readdir_cbk,
+ (void *) (long) child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->readdir,
+ local->fd, 131072, last_offset, NULL);
+
+ return 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
+out:
+ call_count = afr_frame_return (frame);
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ if (call_count == 0) {
+ if (__checksums_differ (local->cont.opendir.checksum,
+ priv->child_count,
+ local->child_up)) {
- priv = this->private;
+ sh->do_entry_self_heal = _gf_true;
+ sh->forced_merge = _gf_true;
- child_count = priv->child_count;
+ reason = "checksums of directory differ";
+ afr_launch_self_heal (frame, this, inode, _gf_false,
+ inode->ia_type, reason, NULL,
+ afr_examine_dir_sh_unwind);
+ } else {
+ afr_set_opendir_done (this, inode);
- ALLOC_OR_GOTO (local, afr_local_t, out);
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ AFR_STACK_UNWIND (opendir, frame, local->op_ret,
+ local->op_errno, local->fd, NULL);
+ }
+ }
- frame->local = local;
- local->fd = fd_ref (fd);
+ return 0;
+}
- call_count = local->call_count;
-
- for (i = 0; i < child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_opendir_cbk,
- priv->children[i],
- priv->children[i]->fops->opendir,
- loc, fd);
- if (!--call_count)
- break;
- }
- }
+int
+afr_examine_dir (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ int i = 0;
+ int call_count = 0;
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno, fd);
- }
+ local = frame->local;
+ priv = this->private;
- return 0;
-}
+ local->cont.opendir.checksum = GF_CALLOC (priv->child_count,
+ sizeof (*local->cont.opendir.checksum),
+ gf_afr_mt_int32_t);
+ call_count = afr_up_children_count (local->child_up, priv->child_count);
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_examine_dir_readdir_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->readdir,
+ local->fd, 131072, 0, NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
-/**
- * Common algorithm for directory read calls:
- *
- * - Try the fop on the first child that is up
- * - if we have failed due to ENOTCONN:
- * try the next child
- *
- * Applicable to: readdir
- */
int32_t
-afr_readdir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- gf_dirent_t *entries)
+afr_opendir_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ fd_t *fd, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int32_t up_children_count = 0;
+ int ret = -1;
+ int call_count = -1;
+ int32_t child_index = 0;
+
+ priv = this->private;
+ local = frame->local;
+ child_index = (long) cookie;
- gf_dirent_t * entry = NULL;
+ up_children_count = afr_up_children_count (local->child_up,
+ priv->child_count);
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret >= 0) {
+ local->op_ret = op_ret;
+ ret = afr_child_fd_ctx_set (this, fd, child_index, 0);
+ if (ret) {
+ local->op_ret = -1;
+ local->op_errno = -ret;
+ goto unlock;
+ }
+ }
- int child_index = -1;
+ local->op_errno = op_errno;
+ }
+unlock:
+ UNLOCK (&frame->lock);
- priv = this->private;
- children = priv->children;
+ call_count = afr_frame_return (frame);
- local = frame->local;
+ if (call_count == 0) {
+ if (local->op_ret != 0)
+ goto out;
- child_index = (long) cookie;
+ if (!afr_is_opendir_done (this, local->fd->inode) &&
+ up_children_count > 1 && priv->entry_self_heal) {
- if (op_ret != -1) {
- list_for_each_entry (entry, &entries->list, list) {
- entry->d_ino = afr_itransform (entry->d_ino,
- priv->child_count,
- child_index);
- }
- }
+ /*
+ * This is the first opendir on this inode. We need
+ * to check if the directory's entries are the same
+ * on all subvolumes. This is needed in addition
+ * to regular entry self-heal because the readdir
+ * call is sent only to the first subvolume, and
+ * thus files that exist only there will never be healed
+ * otherwise (assuming changelog shows no anomalies).
+ */
- AFR_STACK_UNWIND (frame, op_ret, op_errno, entries);
+ gf_log (this->name, GF_LOG_TRACE,
+ "reading contents of directory %s looking for mismatch",
+ local->loc.path);
- return 0;
-}
+ afr_examine_dir (frame, this);
+ } else {
+ /* do the unwind */
+ goto out;
+ }
+ }
+
+ return 0;
-int32_t
-afr_readdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset)
-{
- afr_private_t * priv = NULL;
- xlator_t ** children = NULL;
- int call_child = 0;
- afr_local_t *local = NULL;
-
- int ret = -1;
-
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
- children = priv->children;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- frame->local = local;
-
- call_child = afr_first_up_child (priv);
- if (call_child == -1) {
- op_errno = ENOTCONN;
- gf_log (this->name, GF_LOG_DEBUG,
- "no child is up");
- goto out;
- }
-
- local->fd = fd_ref (fd);
- local->cont.readdir.size = size;
- local->cont.readdir.offset = offset;
-
- STACK_WIND_COOKIE (frame, afr_readdir_cbk, (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->readdir,
- fd, size, offset);
-
- op_ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
- }
- return 0;
+ AFR_STACK_UNWIND (opendir, frame, local->op_ret,
+ local->op_errno, local->fd, NULL);
+
+ return 0;
}
int32_t
-afr_getdents_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dir_entry_t *entry, int32_t count)
+afr_opendir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, fd_t *fd)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ int child_count = 0;
+ int i = 0;
+ int ret = -1;
+ int call_count = -1;
+ int32_t op_errno = 0;
- int unwind = 1;
- int last_tried = -1;
- int this_try = -1;
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
- priv = this->private;
- children = priv->children;
+ priv = this->private;
- local = frame->local;
+ child_count = priv->child_count;
- if (op_ret == -1) {
- last_tried = local->cont.getdents.last_tried;
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
- if (all_tried (last_tried, priv->child_count)) {
- goto out;
- }
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- this_try = ++local->cont.getdents.last_tried;
- unwind = 0;
+ loc_copy (&local->loc, loc);
- STACK_WIND (frame, afr_getdents_cbk,
- children[this_try],
- children[this_try]->fops->getdents,
- local->fd, local->cont.getdents.size,
- local->cont.getdents.offset, local->cont.getdents.flag);
- }
+ local->fd = fd_ref (fd);
+ call_count = local->call_count;
+
+ for (i = 0; i < child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_opendir_cbk,
+ (void*) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->opendir,
+ loc, fd, NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ ret = 0;
out:
- if (unwind) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno, entry, count);
- }
+ if (ret < 0)
+ AFR_STACK_UNWIND (opendir, frame, -1, op_errno, fd, NULL);
- return 0;
+ return 0;
}
-int32_t
-afr_getdents (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset, int32_t flag)
+/**
+ * Common algorithm for directory read calls:
+ *
+ * - Try the fop on the first child that is up
+ * - if we have failed due to ENOTCONN:
+ * try the next child
+ *
+ * Applicable to: readdir
+ */
+
+
+struct entry_name {
+ char *name;
+ struct list_head list;
+};
+
+static void
+afr_forget_entries (fd_t *fd)
{
- afr_private_t * priv = NULL;
- xlator_t ** children = NULL;
- int call_child = 0;
- afr_local_t *local = NULL;
+ struct entry_name *entry = NULL;
+ struct entry_name *tmp = NULL;
+ int ret = 0;
+ uint64_t ctx = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+ ret = fd_ctx_get (fd, THIS, &ctx);
+ if (ret < 0) {
+ gf_log (THIS->name, GF_LOG_INFO,
+ "could not get fd ctx for fd=%p", fd);
+ return;
+ }
+
+ fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+
+ list_for_each_entry_safe (entry, tmp, &fd_ctx->entries, list) {
+ GF_FREE (entry->name);
+ list_del (&entry->list);
+ GF_FREE (entry);
+ }
+}
- int32_t op_ret = -1;
- int32_t op_errno = 0;
+static void
+afr_readdir_filter_trash_dir (gf_dirent_t *entries, fd_t *fd)
+{
+ gf_dirent_t * entry = NULL;
+ gf_dirent_t * tmp = NULL;
+
+ list_for_each_entry_safe (entry, tmp, &entries->list, list) {
+ if (__is_root_gfid (fd->inode->gfid) &&
+ !strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR)) {
+ list_del_init (&entry->list);
+ GF_FREE (entry);
+ }
+ }
+}
+
+int32_t
+afr_readdir_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ gf_dirent_t *entries, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ if (op_ret == -1)
+ goto out;
- priv = this->private;
- children = priv->children;
+ local = frame->local;
+ afr_readdir_filter_trash_dir (entries, local->fd);
- ALLOC_OR_GOTO (local, afr_local_t, out);
+out:
+ AFR_STACK_UNWIND (readdir, frame, op_ret, op_errno, entries, NULL);
+ return 0;
+}
- call_child = afr_first_up_child (priv);
- if (call_child == -1) {
- op_errno = ENOTCONN;
- gf_log (this->name, GF_LOG_DEBUG,
- "no child is up.");
- goto out;
- }
- local->cont.getdents.last_tried = call_child;
+int32_t
+afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
- local->fd = fd_ref (fd);
+ if (op_ret == -1)
+ goto out;
- local->cont.getdents.size = size;
- local->cont.getdents.offset = offset;
- local->cont.getdents.flag = flag;
-
- frame->local = local;
+ local = frame->local;
+ afr_readdir_filter_trash_dir (entries, local->fd);
- STACK_WIND (frame, afr_getdents_cbk,
- children[call_child], children[call_child]->fops->getdents,
- fd, size, offset, flag);
+out:
+ AFR_STACK_UNWIND (readdirp, frame, op_ret, op_errno, entries, NULL);
+ return 0;
+}
- op_ret = 0;
+int32_t
+afr_do_readdir (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t offset, int whichop, dict_t *dict)
+{
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ int call_child = 0;
+ afr_local_t *local = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int ret = -1;
+ int32_t op_errno = 0;
+ uint64_t read_child = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+ children = priv->children;
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ local->fresh_children = afr_children_create (priv->child_count);
+ if (!local->fresh_children) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ read_child = afr_inode_get_read_ctx (this, fd->inode,
+ local->fresh_children);
+ ret = afr_get_call_child (this, local->child_up, read_child,
+ local->fresh_children,
+ &call_child,
+ &local->cont.readdir.last_index);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx) {
+ op_errno = EBADF;
+ goto out;
+ }
+
+ if ((offset == 0) || (fd_ctx->call_child == -1)) {
+ fd_ctx->call_child = call_child;
+ } else if ((priv->readdir_failover == _gf_false) &&
+ (call_child != fd_ctx->call_child)) {
+ op_errno = EBADF;
+ goto out;
+ }
+
+ local->fd = fd_ref (fd);
+ local->cont.readdir.size = size;
+ local->cont.readdir.dict = (dict)? dict_ref (dict) : NULL;
+
+ if (whichop == GF_FOP_READDIR)
+ STACK_WIND_COOKIE (frame, afr_readdir_cbk,
+ (void *) (long) call_child,
+ children[call_child],
+ children[call_child]->fops->readdir, fd,
+ size, offset, dict);
+ else
+ STACK_WIND_COOKIE (frame, afr_readdirp_cbk,
+ (void *) (long) call_child,
+ children[call_child],
+ children[call_child]->fops->readdirp, fd,
+ size, offset, dict);
+
+ return 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
- }
+ AFR_STACK_UNWIND (readdir, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
+
- return 0;
+int32_t
+afr_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
+{
+ afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIR, xdata);
+ return 0;
}
+int32_t
+afr_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *dict)
+{
+ afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIRP, dict);
+ return 0;
+}
+
+
+int32_t
+afr_releasedir (xlator_t *this, fd_t *fd)
+{
+ afr_forget_entries (fd);
+ afr_cleanup_fd_ctx (this, fd);
+
+ return 0;
+}
diff --git a/xlators/cluster/afr/src/afr-dir-read.h b/xlators/cluster/afr/src/afr-dir-read.h
index 6d981fdfd..09456d159 100644
--- a/xlators/cluster/afr/src/afr-dir-read.h
+++ b/xlators/cluster/afr/src/afr-dir-read.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __DIR_READ_H__
@@ -23,25 +14,23 @@
int32_t
afr_opendir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, fd_t *fd);
+ loc_t *loc, fd_t *fd, dict_t *xdata);
int32_t
-afr_closedir (call_frame_t *frame, xlator_t *this,
- fd_t *fd);
+afr_releasedir (xlator_t *this, fd_t *fd);
int32_t
afr_readdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset);
+ fd_t *fd, size_t size, off_t offset, dict_t *xdata);
int32_t
-afr_getdents (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset, int32_t flag);
-
+afr_readdirp (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t offset, dict_t *dict);
int32_t
afr_checksum (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags);
+ loc_t *loc, int32_t flags, dict_t *xdata);
#endif /* __DIR_READ_H__ */
diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c
index 7be32ce4c..1943b719b 100644
--- a/xlators/cluster/afr/src/afr-dir-write.c
+++ b/xlators/cluster/afr/src/afr-dir-write.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -47,279 +38,470 @@
#include "afr.h"
#include "afr-transaction.h"
+int
+afr_build_parent_loc (loc_t *parent, loc_t *child, int32_t *op_errno)
+{
+ int ret = -1;
+ char *child_path = NULL;
+
+ if (!child->parent) {
+ if (op_errno)
+ *op_errno = EINVAL;
+ goto out;
+ }
+
+ child_path = gf_strdup (child->path);
+ if (!child_path) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+ parent->path = gf_strdup( dirname (child_path) );
+ if (!parent->path) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+ parent->inode = inode_ref (child->parent);
+ uuid_copy (parent->gfid, child->pargfid);
+
+ ret = 0;
+out:
+ GF_FREE(child_path);
+
+ return ret;
+}
void
-afr_build_parent_loc (loc_t *parent, loc_t *child)
+__dir_entry_fop_common_cbk (call_frame_t *frame, int child_index,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, struct iatt *prenewparent,
+ struct iatt *postnewparent)
{
- char *tmp = NULL;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (afr_fop_failed (op_ret, op_errno))
+ afr_transaction_fop_failed (frame, this, child_index);
+
+ if (op_ret > -1) {
+ local->op_ret = op_ret;
+
+ if ((local->success_count == 0) ||
+ (child_index == local->read_child_index)) {
+ local->cont.dir_fop.preparent = *preparent;
+ local->cont.dir_fop.postparent = *postparent;
+ if (buf)
+ local->cont.dir_fop.buf = *buf;
+ if (prenewparent)
+ local->cont.dir_fop.prenewparent = *prenewparent;
+ if (postnewparent)
+ local->cont.dir_fop.postnewparent = *postnewparent;
+ }
- if (!child->parent) {
- loc_copy (parent, child);
- return;
- }
+ local->cont.dir_fop.inode = inode;
- tmp = strdup (child->path);
- parent->path = strdup (dirname (tmp));
- FREE (tmp);
+ local->fresh_children[local->success_count] = child_index;
+ local->success_count++;
+ local->child_errno[child_index] = 0;
+ } else {
+ local->child_errno[child_index] = op_errno;
+ }
- parent->name = strrchr (parent->path, '/');
- if (parent->name)
- parent->name++;
+ local->op_errno = op_errno;
+}
- parent->inode = inode_ref (child->parent);
- parent->parent = inode_parent (parent->inode, 0, NULL);
- parent->ino = parent->inode->ino;
+int
+afr_mark_new_entry_changelog_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ dict_t *xattr, dict_t *xdata)
+{
+ int call_count = 0;
+
+ call_count = afr_frame_return (frame);
+ if (call_count == 0) {
+ AFR_STACK_DESTROY (frame);
+ }
+ return 0;
}
-/* {{{ create */
+void
+afr_mark_new_entry_changelog (call_frame_t *frame, xlator_t *this)
+{
+ call_frame_t *new_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_local_t *new_local = NULL;
+ afr_private_t *priv = NULL;
+ dict_t **xattr = NULL;
+ int32_t **changelog = NULL;
+ int i = 0;
+ GF_UNUSED int op_errno = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ new_frame = copy_frame (frame);
+ if (!new_frame) {
+ goto out;
+ }
-int
-afr_create_unwind (call_frame_t *frame, xlator_t *this)
+ AFR_LOCAL_ALLOC_OR_GOTO (new_frame->local, out);
+ new_local = new_frame->local;
+ changelog = afr_matrix_create (priv->child_count, AFR_NUM_CHANGE_LOGS);
+ if (!changelog)
+ goto out;
+
+ xattr = GF_CALLOC (priv->child_count, sizeof (*xattr),
+ gf_afr_mt_dict_t);
+ if (!xattr)
+ goto out;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_errno[i])
+ continue;
+ xattr[i] = dict_new ();
+ if (!xattr[i])
+ goto out;
+ }
+
+ afr_prepare_new_entry_pending_matrix (changelog,
+ afr_is_errno_set,
+ local->child_errno,
+ &local->cont.dir_fop.buf,
+ priv->child_count);
+
+ new_local->pending = changelog;
+ uuid_copy (new_local->loc.gfid, local->cont.dir_fop.buf.ia_gfid);
+ new_local->loc.inode = inode_ref (local->cont.dir_fop.inode);
+ new_local->call_count = local->success_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_errno[i])
+ continue;
+
+ afr_set_pending_dict (priv, xattr[i], changelog, i, LOCAL_LAST);
+ STACK_WIND_COOKIE (new_frame, afr_mark_new_entry_changelog_cbk,
+ (void *) (long) i, priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &new_local->loc, GF_XATTROP_ADD_ARRAY,
+ xattr[i], NULL);
+ }
+ new_frame = NULL;
+out:
+ if (new_frame)
+ AFR_STACK_DESTROY (new_frame);
+ afr_xattr_array_destroy (xattr, priv->child_count);
+ return;
+}
+
+gf_boolean_t
+afr_is_new_entry_changelog_needed (glusterfs_fop_t fop)
+{
+ glusterfs_fop_t fops[] = {GF_FOP_CREATE, GF_FOP_MKNOD, GF_FOP_NULL};
+ int i = 0;
+
+ for (i = 0; fops[i] != GF_FOP_NULL; i++) {
+ if (fop == fops[i])
+ return _gf_true;
+ }
+ return _gf_false;
+}
+
+void
+afr_dir_fop_mark_entry_pending_changelog (call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_private_t * priv = NULL;
- afr_local_t *local = NULL;
- struct stat *unwind_buf = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ local = frame->local;
priv = this->private;
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- if (local->cont.create.read_child_buf.st_ino) {
- unwind_buf = &local->cont.create.read_child_buf;
- } else {
- unwind_buf = &local->cont.create.buf;
- }
- unwind_buf->st_ino = local->cont.create.ino;
+ if (local->op_ret < 0)
+ goto out;
- AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno,
- local->cont.create.fd,
- local->cont.create.inode,
- unwind_buf);
- }
-
- return 0;
+ if (local->success_count == priv->child_count)
+ goto out;
+
+ if (!afr_is_new_entry_changelog_needed (local->op))
+ goto out;
+
+ afr_mark_new_entry_changelog (frame, this);
+
+out:
+ return;
+}
+
+void
+afr_dir_fop_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (local->cont.dir_fop.inode == NULL)
+ goto done;
+ afr_set_read_ctx_from_policy (this, local->cont.dir_fop.inode,
+ local->fresh_children,
+ local->read_child_index,
+ priv->read_child,
+ local->cont.dir_fop.buf.ia_gfid);
+done:
+ local->transaction.unwind (frame, this);
+ afr_dir_fop_mark_entry_pending_changelog (frame, this);
+ local->transaction.resume (frame, this);
}
+/* {{{ create */
int
-afr_create_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- fd_t *fd, inode_t *inode, struct stat *buf)
+afr_create_unwind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- int ret = 0;
+ local = frame->local;
- int call_count = -1;
- int child_index = -1;
- int first_up_child = 0;
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame) {
+ main_frame = local->transaction.main_frame;
+ }
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (create, main_frame,
+ local->op_ret, local->op_errno,
+ local->cont.create.fd,
+ local->cont.dir_fop.inode,
+ &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ local->xdata_rsp);
+ }
- local = frame->local;
- priv = this->private;
+ return 0;
+}
- child_index = (long) cookie;
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- ret = afr_fd_ctx_set (this, fd);
+int
+afr_create_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ fd_t *fd, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ uint64_t ctx = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int ret = 0;
+ int call_count = -1;
+ int child_index = -1;
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "could not set ctx on fd=%p", fd);
+ local = frame->local;
- local->op_ret = -1;
- local->op_errno = -ret;
- }
+ child_index = (long) cookie;
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
+ LOCK (&frame->lock);
+ {
+ if (op_ret > -1) {
+ ret = afr_fd_ctx_set (this, fd);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not set ctx on fd=%p", fd);
+
+ local->op_ret = -1;
+ local->op_errno = -ret;
+ goto unlock;
+ }
- if (local->success_count == 0) {
- local->cont.create.buf = *buf;
-
- if (priv->read_child >= 0) {
- afr_set_read_child (this, inode,
- priv->read_child);
- } else {
- afr_set_read_child (this, inode,
- local->read_child_index);
- }
- }
-
- first_up_child = afr_first_up_child (priv);
-
- if (child_index == first_up_child) {
- local->cont.create.ino =
- afr_itransform (buf->st_ino,
- priv->child_count,
- first_up_child);
- }
-
- if (child_index == local->read_child_index) {
- local->cont.create.read_child_buf = *buf;
+ ret = fd_ctx_get (fd, this, &ctx);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not get fd ctx for fd=%p", fd);
+ local->op_ret = -1;
+ local->op_errno = -ret;
+ goto unlock;
}
- local->cont.create.inode = inode;
+ fd_ctx = (afr_fd_ctx_t *)(long) ctx;
- local->success_count++;
- }
+ fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
+ fd_ctx->flags = local->cont.create.flags;
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ if (local->success_count == 0) {
+ if (xdata)
+ local->xdata_rsp = dict_ref(xdata);
+ }
+ }
+ __dir_entry_fop_common_cbk (frame, child_index, this,
+ op_ret, op_errno, inode, buf,
+ preparent, postparent, NULL, NULL);
+ }
+
+unlock:
+ UNLOCK (&frame->lock);
- call_count = afr_frame_return (frame);
+ call_count = afr_frame_return (frame);
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
+ if (call_count == 0)
+ afr_dir_fop_done (frame, this);
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return 0;
}
int
afr_create_wind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_up_children_count (priv->child_count, local->child_up);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_create_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->create,
- &local->loc,
- local->cont.create.flags,
- local->cont.create.mode,
- local->cont.create.fd);
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_create_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->create,
+ &local->loc,
+ local->cont.create.flags,
+ local->cont.create.mode,
+ local->umask,
+ local->cont.create.fd,
+ local->xdata_req);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
}
int
afr_create_done (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
+ afr_local_t * local = NULL;
- local = frame->local;
+ local = frame->local;
- local->transaction.unwind (frame, this);
+ local->transaction.unwind (frame, this);
- AFR_STACK_DESTROY (frame);
+ AFR_STACK_DESTROY (frame);
- return 0;
+ return 0;
}
int
afr_create (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode, fd_t *fd)
+ loc_t *loc, int32_t flags, mode_t mode,
+ mode_t umask, fd_t *fd, dict_t *params)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
- int ret = -1;
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
- int op_ret = -1;
- int op_errno = 0;
+ priv = this->private;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ QUORUM_CHECK(create,out);
- priv = this->private;
-
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
- transaction_frame->local = local;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- loc_copy (&local->loc, loc);
+ loc_copy (&local->loc, loc);
LOCK (&priv->read_child_lock);
{
- local->read_child_index = (++priv->read_child_rr)
+ local->read_child_index = (++priv->read_child_rr)
% (priv->child_count);
}
UNLOCK (&priv->read_child_lock);
- local->cont.create.flags = flags;
- local->cont.create.mode = mode;
- local->cont.create.fd = fd_ref (fd);
-
- local->transaction.fop = afr_create_wind;
- local->transaction.done = afr_create_done;
- local->transaction.unwind = afr_create_unwind;
-
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
-
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (loc->path);
-
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ local->op = GF_FOP_CREATE;
+ local->cont.create.flags = flags;
+ local->cont.create.mode = mode;
+ local->cont.create.fd = fd_ref (fd);
+ local->umask = umask;
+ if (params)
+ local->xdata_req = dict_ref (params);
+
+ local->transaction.fop = afr_create_wind;
+ local->transaction.done = afr_create_done;
+ local->transaction.unwind = afr_create_unwind;
+
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
+ &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
+
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
- }
+ if (ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (create, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL, NULL, NULL);
+ }
- return 0;
+ return 0;
}
/* }}} */
@@ -329,222 +511,201 @@ out:
int
afr_mknod_unwind (call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
-
- struct stat *unwind_buf = NULL;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- if (local->cont.mknod.read_child_buf.st_ino) {
- unwind_buf = &local->cont.mknod.read_child_buf;
- } else {
- unwind_buf = &local->cont.mknod.buf;
- }
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
- AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno,
- local->cont.mknod.inode,
- unwind_buf);
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame) {
+ main_frame = local->transaction.main_frame;
+ }
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (mknod, main_frame,
+ local->op_ret, local->op_errno,
+ local->cont.dir_fop.inode,
+ &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ NULL);
}
- return 0;
+ return 0;
}
int
-afr_mknod_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct stat *buf)
+afr_mknod_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int call_count = -1;
- int child_index = -1;
-
- local = frame->local;
- priv = this->private;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
- if (local->success_count == 0){
- local->cont.mknod.buf = *buf;
- local->cont.mknod.buf.st_ino =
- afr_itransform (buf->st_ino,
- priv->child_count,
- child_index);
-
- if (priv->read_child >= 0) {
- afr_set_read_child (this, inode,
- priv->read_child);
- } else {
- afr_set_read_child (this, inode,
- local->read_child_index);
- }
- }
-
- if (child_index == local->read_child_index) {
- local->cont.mknod.read_child_buf = *buf;
- }
-
- local->cont.mknod.inode = inode;
+ int call_count = -1;
+ int child_index = -1;
- local->success_count++;
- }
+ child_index = (long) cookie;
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ LOCK (&frame->lock);
+ {
+ __dir_entry_fop_common_cbk (frame, child_index, this,
+ op_ret, op_errno, inode, buf,
+ preparent, postparent, NULL, NULL);
+ }
+ UNLOCK (&frame->lock);
- call_count = afr_frame_return (frame);
+ call_count = afr_frame_return (frame);
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
+ if (call_count == 0)
+ afr_dir_fop_done (frame, this);
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return 0;
}
int32_t
afr_mknod_wind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_up_children_count (priv->child_count, local->child_up);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_mknod_wind_cbk, (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->mknod,
- &local->loc, local->cont.mknod.mode,
- local->cont.mknod.dev);
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_mknod_wind_cbk, (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->mknod,
+ &local->loc, local->cont.mknod.mode,
+ local->cont.mknod.dev,
+ local->umask,
+ local->xdata_req);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
}
int
afr_mknod_done (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
+ afr_local_t * local = NULL;
- local = frame->local;
+ local = frame->local;
- local->transaction.unwind (frame, this);
- AFR_STACK_DESTROY (frame);
+ local->transaction.unwind (frame, this);
+ AFR_STACK_DESTROY (frame);
- return 0;
+ return 0;
}
int
-afr_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t dev)
+afr_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t dev, mode_t umask, dict_t *params)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
-
- int ret = -1;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
- int op_ret = -1;
- int op_errno = 0;
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ priv = this->private;
- priv = this->private;
+ QUORUM_CHECK(mknod,out);
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
- transaction_frame->local = local;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- loc_copy (&local->loc, loc);
+ loc_copy (&local->loc, loc);
LOCK (&priv->read_child_lock);
{
- local->read_child_index = (++priv->read_child_rr)
+ local->read_child_index = (++priv->read_child_rr)
% (priv->child_count);
}
UNLOCK (&priv->read_child_lock);
- local->cont.mknod.mode = mode;
- local->cont.mknod.dev = dev;
-
- local->transaction.fop = afr_mknod_wind;
- local->transaction.done = afr_mknod_done;
- local->transaction.unwind = afr_mknod_unwind;
-
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
-
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (loc->path);
-
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ local->op = GF_FOP_MKNOD;
+ local->cont.mknod.mode = mode;
+ local->cont.mknod.dev = dev;
+ local->umask = umask;
+ if (params)
+ local->xdata_req = dict_ref (params);
+
+ local->transaction.fop = afr_mknod_wind;
+ local->transaction.done = afr_mknod_done;
+ local->transaction.unwind = afr_mknod_unwind;
+
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
+ &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
+
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
- }
+ if (ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (mknod, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL, NULL);
+ }
- return 0;
+ return 0;
}
/* }}} */
@@ -555,231 +716,201 @@ out:
int
afr_mkdir_unwind (call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
-
- struct stat *unwind_buf = NULL;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- if (local->cont.mkdir.read_child_buf.st_ino) {
- unwind_buf = &local->cont.mkdir.read_child_buf;
- } else {
- unwind_buf = &local->cont.mkdir.buf;
- }
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- unwind_buf->st_ino = local->cont.mkdir.ino;
+ local = frame->local;
- AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno,
- local->cont.mkdir.inode,
- unwind_buf);
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame) {
+ main_frame = local->transaction.main_frame;
+ }
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (mkdir, main_frame,
+ local->op_ret, local->op_errno,
+ local->cont.dir_fop.inode,
+ &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ NULL);
}
- return 0;
+ return 0;
}
int
-afr_mkdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct stat *buf)
+afr_mkdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int call_count = -1;
- int child_index = -1;
- int first_up_child = 0;
-
- local = frame->local;
- priv = this->private;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
+ int call_count = -1;
+ int child_index = -1;
- if (op_ret != -1) {
- local->op_ret = op_ret;
+ child_index = (long) cookie;
- if (local->success_count == 0) {
- local->cont.mkdir.buf = *buf;
-
- if (priv->read_child >= 0) {
- afr_set_read_child (this, inode,
- priv->read_child);
- } else {
- afr_set_read_child (this, inode,
- local->read_child_index);
- }
- }
+ LOCK (&frame->lock);
+ {
+ __dir_entry_fop_common_cbk (frame, child_index, this,
+ op_ret, op_errno, inode, buf,
+ preparent, postparent, NULL, NULL);
+ }
+ UNLOCK (&frame->lock);
- first_up_child = afr_first_up_child (priv);
+ call_count = afr_frame_return (frame);
- if (child_index == first_up_child) {
- local->cont.mkdir.ino =
- afr_itransform (buf->st_ino,
- priv->child_count,
- first_up_child);
- }
-
- if (child_index == local->read_child_index) {
- local->cont.mkdir.read_child_buf = *buf;
- }
+ if (call_count == 0)
+ afr_dir_fop_done (frame, this);
- local->cont.mkdir.inode = inode;
+ return 0;
+}
- local->success_count++;
- }
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+int
+afr_mkdir_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
- call_count = afr_frame_return (frame);
+ local = frame->local;
+ priv = this->private;
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
- local->transaction.resume (frame, this);
- }
-
- return 0;
-}
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_mkdir_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->mkdir,
+ &local->loc, local->cont.mkdir.mode,
+ local->umask,
+ local->xdata_req);
+ if (!--call_count)
+ break;
+ }
+ }
-int
-afr_mkdir_wind (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_up_children_count (priv->child_count, local->child_up);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_mkdir_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->mkdir,
- &local->loc, local->cont.mkdir.mode);
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ return 0;
}
int
afr_mkdir_done (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
+ afr_local_t * local = NULL;
- local = frame->local;
+ local = frame->local;
- local->transaction.unwind (frame, this);
+ local->transaction.unwind (frame, this);
- AFR_STACK_DESTROY (frame);
+ AFR_STACK_DESTROY (frame);
- return 0;
+ return 0;
}
-
int
afr_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode)
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *params)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
-
- int ret = -1;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
- int op_ret = -1;
- int op_errno = 0;
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ priv = this->private;
- priv = this->private;
+ QUORUM_CHECK(mkdir,out);
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
- transaction_frame->local = local;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- loc_copy (&local->loc, loc);
+ loc_copy (&local->loc, loc);
LOCK (&priv->read_child_lock);
{
- local->read_child_index = (++priv->read_child_rr)
+ local->read_child_index = (++priv->read_child_rr)
% (priv->child_count);
}
UNLOCK (&priv->read_child_lock);
- local->cont.mkdir.mode = mode;
-
- local->transaction.fop = afr_mkdir_wind;
- local->transaction.done = afr_mkdir_done;
- local->transaction.unwind = afr_mkdir_unwind;
-
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
-
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (loc->path);
-
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ local->cont.mkdir.mode = mode;
+ local->umask = umask;
+ if (params)
+ local->xdata_req = dict_ref (params);
+
+ local->op = GF_FOP_MKDIR;
+ local->transaction.fop = afr_mkdir_wind;
+ local->transaction.done = afr_mkdir_done;
+ local->transaction.unwind = afr_mkdir_unwind;
+
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
+ &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
+
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
+ if (ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
- }
+ AFR_STACK_UNWIND (mkdir, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL, NULL);
+ }
- return 0;
+ return 0;
}
/* }}} */
@@ -790,224 +921,196 @@ out:
int
afr_link_unwind (call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
-
- struct stat *unwind_buf = NULL;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- if (local->cont.link.read_child_buf.st_ino) {
- unwind_buf = &local->cont.link.read_child_buf;
- } else {
- unwind_buf = &local->cont.link.buf;
- }
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- unwind_buf->st_ino = local->cont.link.ino;
+ local = frame->local;
- AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno,
- local->cont.link.inode,
- unwind_buf);
- }
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame) {
+ main_frame = local->transaction.main_frame;
+ }
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (link, main_frame,
+ local->op_ret, local->op_errno,
+ local->cont.dir_fop.inode,
+ &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ NULL);
+ }
- return 0;
+ return 0;
}
int
-afr_link_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct stat *buf)
+afr_link_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int call_count = -1;
- int child_index = -1;
-
- local = frame->local;
- priv = this->private;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
- if (local->success_count == 0) {
- local->cont.link.buf = *buf;
- local->cont.link.buf.st_ino =
- afr_itransform (buf->st_ino, priv->child_count,
- child_index);
-
- if (priv->read_child >= 0) {
- afr_set_read_child (this, inode,
- priv->read_child);
- } else {
- afr_set_read_child (this, inode,
- local->read_child_index);
- }
- }
-
- if (child_index == local->read_child_index) {
- local->cont.link.read_child_buf = *buf;
- }
-
- local->cont.link.inode = inode;
+ int call_count = -1;
+ int child_index = -1;
- local->success_count++;
- }
+ child_index = (long) cookie;
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ LOCK (&frame->lock);
+ {
+ __dir_entry_fop_common_cbk (frame, child_index, this,
+ op_ret, op_errno, inode, buf,
+ preparent, postparent, NULL, NULL);
+ }
+ UNLOCK (&frame->lock);
- call_count = afr_frame_return (frame);
+ call_count = afr_frame_return (frame);
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
+ if (call_count == 0)
+ afr_dir_fop_done (frame, this);
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return 0;
}
int
afr_link_wind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_up_children_count (priv->child_count, local->child_up);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_link_wind_cbk, (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->link,
- &local->loc,
- &local->newloc);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_link_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->link,
+ &local->loc,
+ &local->newloc, local->xdata_req);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
}
int
afr_link_done (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = frame->local;
+ afr_local_t * local = frame->local;
- local->transaction.unwind (frame, this);
+ local->transaction.unwind (frame, this);
- AFR_STACK_DESTROY (frame);
+ AFR_STACK_DESTROY (frame);
- return 0;
+ return 0;
}
int
afr_link (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc)
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
-
- int ret = -1;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
- int op_ret = -1;
- int op_errno = 0;
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ priv = this->private;
- priv = this->private;
+ QUORUM_CHECK(link,out);
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
- transaction_frame->local = local;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- loc_copy (&local->loc, oldloc);
- loc_copy (&local->newloc, newloc);
+ loc_copy (&local->loc, oldloc);
+ loc_copy (&local->newloc, newloc);
+ if (xdata)
+ local->xdata_req = dict_ref (xdata);
LOCK (&priv->read_child_lock);
{
- local->read_child_index = (++priv->read_child_rr)
+ local->read_child_index = (++priv->read_child_rr)
% (priv->child_count);
}
UNLOCK (&priv->read_child_lock);
- local->cont.link.ino = oldloc->inode->ino;
-
- local->transaction.fop = afr_link_wind;
- local->transaction.done = afr_link_done;
- local->transaction.unwind = afr_link_unwind;
-
- afr_build_parent_loc (&local->transaction.parent_loc, oldloc);
-
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (oldloc->path);
- local->transaction.new_basename = AFR_BASENAME (newloc->path);
-
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
-
- op_ret = 0;
+ local->op = GF_FOP_LINK;
+ local->transaction.fop = afr_link_wind;
+ local->transaction.done = afr_link_done;
+ local->transaction.unwind = afr_link_unwind;
+
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, newloc,
+ &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME (newloc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
+
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+ ret = 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (frame, op_ret, op_errno);
- }
+ if (ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (link, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL, NULL);
+ }
- return 0;
+ return 0;
}
/* }}} */
@@ -1018,223 +1121,202 @@ out:
int
afr_symlink_unwind (call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
-
- struct stat *unwind_buf = NULL;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- if (local->cont.symlink.read_child_buf.st_ino) {
- unwind_buf = &local->cont.symlink.read_child_buf;
- } else {
- unwind_buf = &local->cont.symlink.buf;
- }
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
- AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno,
- local->cont.symlink.inode,
- unwind_buf);
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame) {
+ main_frame = local->transaction.main_frame;
+ }
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (symlink, main_frame,
+ local->op_ret, local->op_errno,
+ local->cont.dir_fop.inode,
+ &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ NULL);
}
- return 0;
+ return 0;
}
int
-afr_symlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct stat *buf)
+afr_symlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int call_count = -1;
- int child_index = -1;
-
- local = frame->local;
- priv = this->private;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
- if (local->success_count == 0) {
- local->cont.symlink.buf = *buf;
- local->cont.symlink.buf.st_ino =
- afr_itransform (buf->st_ino, priv->child_count,
- child_index);
-
- if (priv->read_child >= 0) {
- afr_set_read_child (this, inode,
- priv->read_child);
- } else {
- afr_set_read_child (this, inode,
- local->read_child_index);
- }
- }
-
- if (child_index == local->read_child_index) {
- local->cont.symlink.read_child_buf = *buf;
- }
+ int call_count = -1;
+ int child_index = -1;
- local->cont.symlink.inode = inode;
+ child_index = (long) cookie;
- local->success_count++;
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ LOCK (&frame->lock);
+ {
+ __dir_entry_fop_common_cbk (frame, child_index, this,
+ op_ret, op_errno, inode, buf,
+ preparent, postparent, NULL, NULL);
+ }
+ UNLOCK (&frame->lock);
- call_count = afr_frame_return (frame);
+ call_count = afr_frame_return (frame);
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
+ if (call_count == 0)
+ afr_dir_fop_done (frame, this);
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return 0;
}
int
afr_symlink_wind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
- int call_count = -1;
- int i = 0;
+ local = frame->local;
+ priv = this->private;
- local = frame->local;
- priv = this->private;
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+ local->call_count = call_count;
- local->call_count = call_count;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_symlink_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->symlink,
+ local->cont.symlink.linkpath,
+ &local->loc,
+ local->umask,
+ local->xdata_req);
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_symlink_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->symlink,
- local->cont.symlink.linkpath,
- &local->loc);
+ if (!--call_count)
+ break;
- if (!--call_count)
- break;
+ }
+ }
- }
- }
-
- return 0;
+ return 0;
}
int
afr_symlink_done (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = frame->local;
+ afr_local_t * local = frame->local;
- local->transaction.unwind (frame, this);
+ local->transaction.unwind (frame, this);
- AFR_STACK_DESTROY (frame);
-
- return 0;
+ AFR_STACK_DESTROY (frame);
+
+ return 0;
}
int
afr_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkpath, loc_t *loc)
+ const char *linkpath, loc_t *loc, mode_t umask, dict_t *params)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
-
- int ret = -1;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
- int op_ret = -1;
- int op_errno = 0;
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ priv = this->private;
- priv = this->private;
+ QUORUM_CHECK(symlink,out);
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- transaction_frame->local = local;
-
- loc_copy (&local->loc, loc);
+ loc_copy (&local->loc, loc);
LOCK (&priv->read_child_lock);
{
- local->read_child_index = (++priv->read_child_rr)
+ local->read_child_index = (++priv->read_child_rr)
% (priv->child_count);
}
UNLOCK (&priv->read_child_lock);
- local->cont.symlink.ino = loc->inode->ino;
- local->cont.symlink.linkpath = strdup (linkpath);
-
- local->transaction.fop = afr_symlink_wind;
- local->transaction.done = afr_symlink_done;
- local->transaction.unwind = afr_symlink_unwind;
-
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
-
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (loc->path);
-
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ local->cont.symlink.linkpath = gf_strdup (linkpath);
+ local->umask = umask;
+ if (params)
+ local->xdata_req = dict_ref (params);
+
+ local->op = GF_FOP_SYMLINK;
+ local->transaction.fop = afr_symlink_wind;
+ local->transaction.done = afr_symlink_done;
+ local->transaction.unwind = afr_symlink_unwind;
+
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
+ &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
+
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL, NULL);
- }
+ if (ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (symlink, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL, NULL);
+ }
- return 0;
+ return 0;
}
/* }}} */
@@ -1244,210 +1326,231 @@ out:
int
afr_rename_unwind (call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
-
- struct stat *unwind_buf = NULL;
-
- local = frame->local;
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+ local = frame->local;
- if (main_frame) {
- local->cont.rename.buf.st_ino = local->cont.rename.ino;
-
- if (local->cont.rename.read_child_buf.st_ino) {
- unwind_buf = &local->cont.rename.read_child_buf;
- } else {
- unwind_buf = &local->cont.rename.buf;
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame) {
+ main_frame = local->transaction.main_frame;
}
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (rename, main_frame,
+ local->op_ret, local->op_errno,
+ &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ &local->cont.dir_fop.prenewparent,
+ &local->cont.dir_fop.postnewparent,
+ NULL);
+ }
- AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno,
- unwind_buf);
- }
-
- return 0;
+ return 0;
}
int
-afr_rename_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+afr_rename_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ int call_count = -1;
+ int child_index = -1;
- int call_count = -1;
- int child_index = -1;
+ local = frame->local;
- local = frame->local;
- priv = this->private;
+ child_index = (long) cookie;
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
+ LOCK (&frame->lock);
+ {
+ if (afr_fop_failed (op_ret, op_errno) && op_errno != ENOTEMPTY)
+ afr_transaction_fop_failed (frame, this, child_index);
+ local->op_errno = op_errno;
+ local->child_errno[child_index] = op_errno;
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
-
- if (buf) {
- local->cont.rename.buf = *buf;
- local->cont.rename.buf.st_ino =
- afr_itransform (buf->st_ino, priv->child_count,
- child_index);
- }
- local->success_count++;
- }
+ if (op_ret > -1)
+ __dir_entry_fop_common_cbk (frame, child_index, this,
+ op_ret, op_errno, NULL, buf,
+ preoldparent, postoldparent,
+ prenewparent, postnewparent);
- if (child_index == local->read_child_index) {
- local->cont.rename.read_child_buf = *buf;
- }
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ }
+ UNLOCK (&frame->lock);
- call_count = afr_frame_return (frame);
+ call_count = afr_frame_return (frame);
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
+ if (call_count == 0)
+ afr_dir_fop_done (frame, this);
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return 0;
}
int32_t
afr_rename_wind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_up_children_count (priv->child_count, local->child_up);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_rename_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->rename,
- &local->loc,
- &local->newloc);
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-}
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
+ local = frame->local;
+ priv = this->private;
-int
-afr_rename_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = frame->local;
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
- local->transaction.unwind (frame, this);
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_rename_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->rename,
+ &local->loc,
+ &local->newloc, NULL);
+ if (!--call_count)
+ break;
+ }
+ }
- AFR_STACK_DESTROY (frame);
-
- return 0;
+ return 0;
}
int
-afr_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc)
+afr_rename_done (call_frame_t *frame, xlator_t *this)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
-
- int ret = -1;
-
- int op_ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
+ afr_local_t * local = frame->local;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
+ local->transaction.unwind (frame, this);
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_STACK_DESTROY (frame);
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- transaction_frame->local = local;
-
- loc_copy (&local->loc, oldloc);
- loc_copy (&local->newloc, newloc);
-
- local->read_child_index = afr_read_child (this, oldloc->inode);
-
- local->cont.rename.ino = oldloc->inode->ino;
-
- local->transaction.fop = afr_rename_wind;
- local->transaction.done = afr_rename_done;
- local->transaction.unwind = afr_rename_unwind;
+ return 0;
+}
- afr_build_parent_loc (&local->transaction.parent_loc, oldloc);
- afr_build_parent_loc (&local->transaction.new_parent_loc, newloc);
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (oldloc->path);
- local->transaction.new_basename = AFR_BASENAME (newloc->path);
+int
+afr_rename (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
+ int nlockee = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ QUORUM_CHECK(rename,out);
+
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- afr_transaction (transaction_frame, this, AFR_ENTRY_RENAME_TRANSACTION);
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ loc_copy (&local->loc, oldloc);
+ loc_copy (&local->newloc, newloc);
+
+ local->read_child_index = afr_inode_get_read_ctx (this, oldloc->inode, NULL);
+
+ local->op = GF_FOP_RENAME;
+ local->transaction.fop = afr_rename_wind;
+ local->transaction.done = afr_rename_done;
+ local->transaction.unwind = afr_rename_unwind;
+
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, oldloc,
+ &op_errno);
+ if (ret)
+ goto out;
+ ret = afr_build_parent_loc (&local->transaction.new_parent_loc, newloc,
+ &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME (oldloc->path);
+ local->transaction.new_basename = AFR_BASENAME (newloc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = nlockee = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local,
+ &local->transaction.new_parent_loc,
+ local->transaction.new_basename,
+ priv->child_count);
+ if (ret)
+ goto out;
+
+ nlockee++;
+ ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
+
+ nlockee++;
+ if (local->newloc.inode && IA_ISDIR (local->newloc.inode->ia_type)) {
+ ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local,
+ &local->newloc,
+ NULL,
+ priv->child_count);
+ if (ret)
+ goto out;
+
+ nlockee++;
+ }
+ qsort (int_lock->lockee, nlockee, sizeof (*int_lock->lockee),
+ afr_entry_lockee_cmp);
+ int_lock->lockee_count = nlockee;
+
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_RENAME_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
+ if (ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
- }
+ AFR_STACK_UNWIND (rename, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL, NULL, NULL);
+ }
- return 0;
+ return 0;
}
/* }}} */
@@ -1457,182 +1560,190 @@ out:
int
afr_unlink_unwind (call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
-
- local = frame->local;
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+ local = frame->local;
- if (main_frame)
- AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno);
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame) {
+ main_frame = local->transaction.main_frame;
+ }
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (unlink, main_frame,
+ local->op_ret, local->op_errno,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ NULL);
+ }
- return 0;
+ return 0;
}
int
-afr_unlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+afr_unlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int call_count = -1;
- int child_index = (long) cookie;
- int need_unwind = 0;
-
- local = frame->local;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- }
- local->success_count++;
+ afr_local_t * local = NULL;
+ int call_count = -1;
+ int child_index = (long) cookie;
- if (local->success_count == priv->wait_count) {
- need_unwind = 1;
- }
- }
+ local = frame->local;
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
+ LOCK (&frame->lock);
+ {
+ if (child_index == local->read_child_index) {
+ local->read_child_returned = _gf_true;
+ }
+ __dir_entry_fop_common_cbk (frame, child_index, this,
+ op_ret, op_errno, NULL, NULL,
+ preparent, postparent, NULL, NULL);
+ }
+ UNLOCK (&frame->lock);
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
+ call_count = afr_frame_return (frame);
+ if (call_count == 0)
+ afr_dir_fop_done (frame, this);
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return 0;
}
int32_t
afr_unlink_wind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_up_children_count (priv->child_count, local->child_up);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_unlink_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->unlink,
- &local->loc);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-}
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
+ local = frame->local;
+ priv = this->private;
-int32_t
-afr_unlink_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = frame->local;
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
- local->transaction.unwind (frame, this);
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
- AFR_STACK_DESTROY (frame);
-
- return 0;
-}
+ local->call_count = call_count;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_unlink_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->unlink,
+ &local->loc, local->xflag,
+ local->xdata_req);
-int32_t
-afr_unlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
+ if (!--call_count)
+ break;
+ }
+ }
- int ret = -1;
+ return 0;
+}
- int op_ret = -1;
- int op_errno = 0;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+int32_t
+afr_unlink_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = frame->local;
- priv = this->private;
+ local->transaction.unwind (frame, this);
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
+ AFR_STACK_DESTROY (frame);
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ return 0;
+}
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
- transaction_frame->local = local;
+int32_t
+afr_unlink (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int xflag, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
- loc_copy (&local->loc, loc);
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
- local->transaction.fop = afr_unlink_wind;
- local->transaction.done = afr_unlink_done;
- local->transaction.unwind = afr_unlink_unwind;
+ priv = this->private;
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
+ QUORUM_CHECK(unlink,out);
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (loc->path);
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ loc_copy (&local->loc, loc);
+ local->xflag = xflag;
+ if (xdata)
+ local->xdata_req = dict_ref (xdata);
+
+ local->op = GF_FOP_UNLINK;
+ local->transaction.fop = afr_unlink_wind;
+ local->transaction.done = afr_unlink_done;
+ local->transaction.unwind = afr_unlink_unwind;
+
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
+ &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[0], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
+
+ int_lock->lockee_count++;
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (frame, op_ret, op_errno);
- }
+ if (ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (unlink, frame, -1, op_errno,
+ NULL, NULL, NULL);
+ }
- return 0;
+ return 0;
}
/* }}} */
@@ -1644,323 +1755,208 @@ out:
int
afr_rmdir_unwind (call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
-
- local = frame->local;
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+ local = frame->local;
- if (main_frame)
- AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno);
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame) {
+ main_frame = local->transaction.main_frame;
+ }
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (rmdir, main_frame,
+ local->op_ret, local->op_errno,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ NULL);
+ }
- return 0;
+ return 0;
}
int
-afr_rmdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+afr_rmdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int call_count = -1;
- int child_index = (long) cookie;
- int need_unwind = 0;
-
- local = frame->local;
- priv = this->private;
+ afr_local_t * local = NULL;
+ int call_count = -1;
+ int child_index = (long) cookie;
+ int read_child = 0;
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- }
- local->success_count++;
+ local = frame->local;
- if (local->success_count == priv->wait_count)
- need_unwind = 1;
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ LOCK (&frame->lock);
+ {
+ if (child_index == read_child) {
+ local->read_child_returned = _gf_true;
+ }
+ if (afr_fop_failed (op_ret, op_errno) && (op_errno != ENOTEMPTY))
+ afr_transaction_fop_failed (frame, this, child_index);
+ local->op_errno = op_errno;
+ local->child_errno[child_index] = op_errno;
+ if (op_ret > -1)
+ __dir_entry_fop_common_cbk (frame, child_index, this,
+ op_ret, op_errno, NULL, NULL,
+ preparent, postparent, NULL,
+ NULL);
- call_count = afr_frame_return (frame);
+ }
+ UNLOCK (&frame->lock);
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
+ call_count = afr_frame_return (frame);
+ if (call_count == 0)
+ afr_dir_fop_done (frame, this);
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return 0;
}
int
afr_rmdir_wind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_up_children_count (priv->child_count, local->child_up);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_rmdir_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->rmdir,
- &local->loc);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-}
-
-
-int
-afr_rmdir_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
- return 0;
-}
-
-
-int
-afr_rmdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
-
- int ret = -1;
-
- int op_ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- transaction_frame->local = local;
-
- loc_copy (&local->loc, loc);
-
- local->transaction.fop = afr_rmdir_wind;
- local->transaction.done = afr_rmdir_done;
- local->transaction.unwind = afr_rmdir_unwind;
-
- afr_build_parent_loc (&local->transaction.parent_loc, loc);
-
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (loc->path);
-
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (frame, op_ret, op_errno);
- }
-
- return 0;
-}
-
-/* }}} */
-
-/* {{{ setdents */
-
-int32_t
-afr_setdents_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int call_count = -1;
- int child_index = (long) cookie;
+ local = frame->local;
+ priv = this->private;
- local = frame->local;
- priv = this->private;
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
- if ((op_ret != -1) && (local->success_count == 0)) {
- local->op_ret = op_ret;
- local->success_count++;
- }
+ local->call_count = call_count;
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_rmdir_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->rmdir,
+ &local->loc, local->cont.rmdir.flags,
+ NULL);
- call_count = afr_frame_return (frame);
+ if (!--call_count)
+ break;
+ }
+ }
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return 0;
}
-int32_t
-afr_setdents_wind (call_frame_t *frame, xlator_t *this)
+int
+afr_rmdir_done (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_up_children_count (priv->child_count, local->child_up);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_setdents_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->setdents,
- local->fd, local->cont.setdents.flags,
- local->cont.setdents.entries,
- local->cont.setdents.count);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-}
+ afr_local_t * local = frame->local;
+ local->transaction.unwind (frame, this);
-int32_t
-afr_setdents_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = frame->local;
+ AFR_STACK_DESTROY (frame);
- AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno);
-
- return 0;
+ return 0;
}
-int32_t
-afr_setdents (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t flags, dir_entry_t *entries, int32_t count)
+int
+afr_rmdir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int flags, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
-
- int ret = -1;
-
- int op_ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- frame->local = local;
-
- local->fd = fd_ref (fd);
-
- local->cont.setdents.flags = flags;
- local->cont.setdents.entries = entries;
- local->cont.setdents.count = count;
-
- local->transaction.fop = afr_setdents_wind;
- local->transaction.done = afr_setdents_done;
-
- local->transaction.basename = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
+ int nlockee = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ QUORUM_CHECK(rmdir,out);
+
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- afr_transaction (frame, this, AFR_ENTRY_TRANSACTION);
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ local->cont.rmdir.flags = flags;
+ loc_copy (&local->loc, loc);
+
+ local->op = GF_FOP_RMDIR;
+ local->transaction.fop = afr_rmdir_wind;
+ local->transaction.done = afr_rmdir_done;
+ local->transaction.unwind = afr_rmdir_unwind;
+
+ ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
+ &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME (loc->path);
+ int_lock = &local->internal_lock;
+
+ int_lock->lockee_count = nlockee = 0;
+ ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local,
+ &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret)
+ goto out;
+
+ nlockee++;
+ ret = afr_init_entry_lockee (&int_lock->lockee[nlockee], local,
+ &local->loc,
+ NULL,
+ priv->child_count);
+ if (ret)
+ goto out;
+
+ nlockee++;
+ qsort (int_lock->lockee, nlockee, sizeof (*int_lock->lockee),
+ afr_entry_lockee_cmp);
+ int_lock->lockee_count = nlockee;
+
+ ret = afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno);
- }
+ if (ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (rmdir, frame, -1, op_errno, NULL, NULL, NULL);
+ }
- return 0;
+ return 0;
}
/* }}} */
diff --git a/xlators/cluster/afr/src/afr-dir-write.h b/xlators/cluster/afr/src/afr-dir-write.h
index 76b4b60fa..02f0a3682 100644
--- a/xlators/cluster/afr/src/afr-dir-write.h
+++ b/xlators/cluster/afr/src/afr-dir-write.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __DIR_WRITE_H__
@@ -22,38 +13,35 @@
int32_t
afr_create (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode, fd_t *fd);
+ loc_t *loc, int32_t flags, mode_t mode,
+ mode_t umask, fd_t *fd, dict_t *xdata);
int32_t
afr_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t dev);
+ loc_t *loc, mode_t mode, dev_t dev, mode_t umask, dict_t *xdata);
int32_t
afr_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode);
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata);
int32_t
afr_unlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc);
+ loc_t *loc, int xflag, dict_t *xdata);
int32_t
afr_rmdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc);
+ loc_t *loc, int flags, dict_t *xdata);
int32_t
afr_link (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc);
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata);
int32_t
afr_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc);
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata);
-int32_t
+int
afr_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkpath, loc_t *oldloc);
-
-int32_t
-afr_setdents (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t flags, dir_entry_t *entries, int32_t count);
+ const char *linkpath, loc_t *oldloc, mode_t umask, dict_t *params);
#endif /* __DIR_WRITE_H__ */
diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c
index 203206511..e06e3b2f2 100644
--- a/xlators/cluster/afr/src/afr-inode-read.c
+++ b/xlators/cluster/afr/src/afr-inode-read.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -44,9 +35,6 @@
#include "compat-errno.h"
#include "compat.h"
-#include "afr.h"
-
-
/**
* Common algorithm for inode read calls:
*
@@ -61,114 +49,115 @@
int32_t
afr_access_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+ xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
-
- int unwind = 1;
- int last_tried = -1;
- int this_try = -1;
- int read_child = -1;
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ xlator_t ** children = NULL;
+ int unwind = 1;
+ int32_t *last_index = NULL;
+ int32_t next_call_child = -1;
+ int32_t read_child = -1;
+ int32_t *fresh_children = NULL;
- priv = this->private;
- children = priv->children;
+ priv = this->private;
+ children = priv->children;
- local = frame->local;
+ local = frame->local;
read_child = (long) cookie;
- if (op_ret == -1) {
- retry:
- last_tried = local->cont.access.last_tried;
-
- if (all_tried (last_tried, priv->child_count)) {
- goto out;
- }
- this_try = ++local->cont.access.last_tried;
-
- if (this_try == read_child) {
- goto retry;
- }
+ if (op_ret == -1) {
+ last_index = &local->cont.access.last_index;
+ fresh_children = local->fresh_children;
+ next_call_child = afr_next_call_child (fresh_children,
+ local->child_up,
+ priv->child_count,
+ last_index, read_child);
+ if (next_call_child < 0)
+ goto out;
- unwind = 0;
+ unwind = 0;
- STACK_WIND_COOKIE (frame, afr_access_cbk,
- (void *) (long) read_child,
- children[this_try],
- children[this_try]->fops->access,
- &local->loc, local->cont.access.mask);
- }
+ STACK_WIND_COOKIE (frame, afr_access_cbk,
+ (void *) (long) read_child,
+ children[next_call_child],
+ children[next_call_child]->fops->access,
+ &local->loc, local->cont.access.mask,
+ NULL);
+ }
out:
- if (unwind) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno);
- }
+ if (unwind) {
+ AFR_STACK_UNWIND (access, frame, op_ret, op_errno, xdata);
+ }
- return 0;
+ return 0;
}
int32_t
-afr_access (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t mask)
+afr_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata)
{
- afr_private_t * priv = NULL;
- xlator_t ** children = NULL;
- int call_child = 0;
- afr_local_t *local = NULL;
-
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ int call_child = 0;
+ afr_local_t *local = NULL;
+ int32_t op_errno = 0;
int32_t read_child = -1;
+ int ret = -1;
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ priv = this->private;
+ VALIDATE_OR_GOTO (priv->children, out);
- priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
+ children = priv->children;
- children = priv->children;
+ AFR_SBRAIN_CHECK_LOC (loc, out);
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
- read_child = afr_read_child (this, loc->inode);
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- if (read_child >= 0) {
- call_child = read_child;
+ local->fresh_children = afr_children_create (priv->child_count);
+ if (!local->fresh_children) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- local->cont.access.last_tried = -1;
- } else {
- call_child = afr_first_up_child (priv);
- if (call_child == -1) {
- op_errno = ENOTCONN;
- gf_log (this->name, GF_LOG_DEBUG,
- "no child is up");
- goto out;
- }
-
- local->cont.access.last_tried = call_child;
+ read_child = afr_inode_get_read_ctx (this, loc->inode,
+ local->fresh_children);
+ ret = afr_get_call_child (this, local->child_up, read_child,
+ local->fresh_children,
+ &call_child,
+ &local->cont.access.last_index);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
}
- loc_copy (&local->loc, loc);
- local->cont.access.mask = mask;
+ loc_copy (&local->loc, loc);
+ local->cont.access.mask = mask;
- STACK_WIND_COOKIE (frame, afr_access_cbk,
- (void *) (long) call_child,
- children[call_child], children[call_child]->fops->access,
- loc, mask);
+ STACK_WIND_COOKIE (frame, afr_access_cbk,
+ (void *) (long) call_child,
+ children[call_child],
+ children[call_child]->fops->access,
+ loc, mask, xdata);
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno);
- }
- return 0;
+ if (ret < 0)
+ AFR_STACK_UNWIND (access, frame, -1, op_errno, NULL);
+ return 0;
}
@@ -178,121 +167,111 @@ out:
int32_t
afr_stat_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct stat *buf)
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ struct iatt *buf, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
-
- int unwind = 1;
- int last_tried = -1;
- int this_try = -1;
- int read_child = -1;
-
- priv = this->private;
- children = priv->children;
-
- read_child = (long) cookie;
-
- local = frame->local;
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ xlator_t ** children = NULL;
+ int unwind = 1;
+ int32_t *last_index = NULL;
+ int32_t next_call_child = -1;
+ int32_t read_child = -1;
+ int32_t *fresh_children = NULL;
+
+ priv = this->private;
+ children = priv->children;
- if (op_ret == -1) {
- retry:
- last_tried = local->cont.stat.last_tried;
+ read_child = (long) cookie;
- if (all_tried (last_tried, priv->child_count)) {
- goto out;
- }
- this_try = ++local->cont.stat.last_tried;
+ local = frame->local;
- if (this_try == read_child) {
- goto retry;
- }
+ if (op_ret == -1) {
+ last_index = &local->cont.stat.last_index;
+ fresh_children = local->fresh_children;
+ next_call_child = afr_next_call_child (fresh_children,
+ local->child_up,
+ priv->child_count,
+ last_index, read_child);
+ if (next_call_child < 0)
+ goto out;
- unwind = 0;
+ unwind = 0;
- STACK_WIND_COOKIE (frame, afr_stat_cbk,
- (void *) (long) read_child,
- children[this_try],
- children[this_try]->fops->stat,
- &local->loc);
- }
+ STACK_WIND_COOKIE (frame, afr_stat_cbk,
+ (void *) (long) read_child,
+ children[next_call_child],
+ children[next_call_child]->fops->stat,
+ &local->loc, NULL);
+ }
out:
- if (unwind) {
- if (buf)
- buf->st_ino = local->cont.stat.ino;
-
- AFR_STACK_UNWIND (frame, op_ret, op_errno, buf);
- }
+ if (unwind) {
+ AFR_STACK_UNWIND (stat, frame, op_ret, op_errno, buf, xdata);
+ }
- return 0;
+ return 0;
}
int32_t
-afr_stat (call_frame_t *frame, xlator_t *this,
- loc_t *loc)
+afr_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
-
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ xlator_t **children = NULL;
+ int call_child = 0;
+ int32_t op_errno = 0;
int32_t read_child = -1;
- int call_child = 0;
-
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
-
- children = priv->children;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ int ret = -1;
- frame->local = local;
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
- read_child = afr_read_child (this, loc->inode);
+ priv = this->private;
+ VALIDATE_OR_GOTO (priv->children, out);
- if (read_child >= 0) {
- call_child = read_child;
+ children = priv->children;
- local->cont.stat.last_tried = -1;
+ AFR_SBRAIN_CHECK_LOC (loc, out);
- } else {
- call_child = afr_first_up_child (priv);
- if (call_child == -1) {
- op_errno = ENOTCONN;
- gf_log (this->name, GF_LOG_DEBUG,
- "no child is up");
- goto out;
- }
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
- local->cont.stat.last_tried = call_child;
- }
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- loc_copy (&local->loc, loc);
+ local->fresh_children = afr_children_create (priv->child_count);
+ if (!local->fresh_children) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- local->cont.stat.ino = loc->inode->ino;
+ read_child = afr_inode_get_read_ctx (this, loc->inode,
+ local->fresh_children);
+ ret = afr_get_call_child (this, local->child_up, read_child,
+ local->fresh_children,
+ &call_child,
+ &local->cont.stat.last_index);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+ loc_copy (&local->loc, loc);
- STACK_WIND_COOKIE (frame, afr_stat_cbk, (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->stat,
- loc);
+ STACK_WIND_COOKIE (frame, afr_stat_cbk, (void *) (long) call_child,
+ children[call_child],
+ children[call_child]->fops->stat,
+ loc, xdata);
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
- }
+ if (ret < 0)
+ AFR_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL);
- return 0;
+ return 0;
}
@@ -301,124 +280,122 @@ out:
/* {{{ fstat */
int32_t
-afr_fstat_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct stat *buf)
+afr_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
-
- int unwind = 1;
- int last_tried = -1;
- int this_try = -1;
- int read_child = -1;
-
- priv = this->private;
- children = priv->children;
-
- local = frame->local;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ xlator_t **children = NULL;
+ int unwind = 1;
+ int32_t *last_index = NULL;
+ int32_t next_call_child = -1;
+ int32_t read_child = -1;
+ int32_t *fresh_children = NULL;
- read_child = (long) cookie;
+ priv = this->private;
+ children = priv->children;
- if (op_ret == -1) {
- retry:
- last_tried = local->cont.fstat.last_tried;
+ local = frame->local;
- if (all_tried (last_tried, priv->child_count)) {
- goto out;
- }
- this_try = ++local->cont.fstat.last_tried;
+ read_child = (long) cookie;
- if (this_try == read_child) {
- goto retry;
- }
+ if (op_ret == -1) {
+ last_index = &local->cont.fstat.last_index;
+ fresh_children = local->fresh_children;
+ next_call_child = afr_next_call_child (fresh_children,
+ local->child_up,
+ priv->child_count,
+ last_index, read_child);
+ if (next_call_child < 0)
+ goto out;
- unwind = 0;
+ unwind = 0;
- STACK_WIND_COOKIE (frame, afr_fstat_cbk,
- (void *) (long) read_child,
- children[this_try],
- children[this_try]->fops->fstat,
- local->fd);
- }
+ STACK_WIND_COOKIE (frame, afr_fstat_cbk,
+ (void *) (long) read_child,
+ children[next_call_child],
+ children[next_call_child]->fops->fstat,
+ local->fd, NULL);
+ }
out:
- if (unwind) {
- if (buf)
- buf->st_ino = local->cont.fstat.ino;
-
- AFR_STACK_UNWIND (frame, op_ret, op_errno, buf);
- }
+ if (unwind) {
+ AFR_STACK_UNWIND (fstat, frame, op_ret, op_errno, buf, xdata);
+ }
- return 0;
+ return 0;
}
int32_t
afr_fstat (call_frame_t *frame, xlator_t *this,
- fd_t *fd)
+ fd_t *fd, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ xlator_t **children = NULL;
+ int call_child = 0;
+ int32_t op_errno = 0;
+ int32_t read_child = 0;
+ int ret = -1;
- int call_child = 0;
- int32_t read_child = -1;
-
- int32_t op_ret = -1;
- int32_t op_errno = 0;
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (fd, out);
+ VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (fd, out);
- VALIDATE_OR_GOTO (this->private, out);
+ priv = this->private;
+ VALIDATE_OR_GOTO (priv->children, out);
- priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
+ children = priv->children;
- children = priv->children;
+ VALIDATE_OR_GOTO (fd->inode, out);
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_SBRAIN_CHECK_FD (fd, out);
- frame->local = local;
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
- VALIDATE_OR_GOTO (fd->inode, out);
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- read_child = afr_read_child (this, fd->inode);
+ local->fresh_children = afr_children_create (priv->child_count);
+ if (!local->fresh_children) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- if (read_child >= 0) {
- call_child = read_child;
+ read_child = afr_inode_get_read_ctx (this, fd->inode,
+ local->fresh_children);
- local->cont.fstat.last_tried = -1;
- } else {
- call_child = afr_first_up_child (priv);
- if (call_child == -1) {
- op_errno = ENOTCONN;
- gf_log (this->name, GF_LOG_DEBUG,
- "no child is up");
- goto out;
- }
- local->cont.fstat.last_tried = call_child;
+ ret = afr_get_call_child (this, local->child_up, read_child,
+ local->fresh_children,
+ &call_child,
+ &local->cont.fstat.last_index);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
}
- local->cont.fstat.ino = fd->inode->ino;
- local->fd = fd_ref (fd);
+ local->fd = fd_ref (fd);
+
+ afr_open_fd_fix (fd, this);
- STACK_WIND_COOKIE (frame, afr_fstat_cbk, (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->fstat,
- fd);
+ STACK_WIND_COOKIE (frame, afr_fstat_cbk, (void *) (long) call_child,
+ children[call_child],
+ children[call_child]->fops->fstat,
+ fd, xdata);
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
- }
+ if (ret < 0)
+ AFR_STACK_UNWIND (fstat, frame, -1, op_errno, NULL, NULL);
- return 0;
+ return 0;
}
/* }}} */
@@ -427,117 +404,115 @@ out:
int32_t
afr_readlink_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- const char *buf)
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ const char *buf, struct iatt *sbuf, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
-
- int unwind = 1;
- int last_tried = -1;
- int this_try = -1;
- int read_child = -1;
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ xlator_t ** children = NULL;
+ int unwind = 1;
+ int32_t *last_index = NULL;
+ int32_t next_call_child = -1;
+ int32_t read_child = -1;
+ int32_t *fresh_children = NULL;
- priv = this->private;
- children = priv->children;
+ priv = this->private;
+ children = priv->children;
- local = frame->local;
+ local = frame->local;
read_child = (long) cookie;
- if (op_ret == -1) {
- retry:
- last_tried = local->cont.readlink.last_tried;
-
- if (all_tried (last_tried, priv->child_count)) {
- goto out;
- }
- this_try = ++local->cont.readlink.last_tried;
-
- if (this_try == read_child) {
- goto retry;
- }
+ if (op_ret == -1) {
+ last_index = &local->cont.readlink.last_index;
+ fresh_children = local->fresh_children;
+ next_call_child = afr_next_call_child (fresh_children,
+ local->child_up,
+ priv->child_count,
+ last_index, read_child);
+ if (next_call_child < 0)
+ goto out;
- unwind = 0;
- STACK_WIND_COOKIE (frame, afr_readlink_cbk,
- (void *) (long) read_child,
- children[this_try],
- children[this_try]->fops->readlink,
- &local->loc,
- local->cont.readlink.size);
- }
+ unwind = 0;
+ STACK_WIND_COOKIE (frame, afr_readlink_cbk,
+ (void *) (long) read_child,
+ children[next_call_child],
+ children[next_call_child]->fops->readlink,
+ &local->loc,
+ local->cont.readlink.size, NULL);
+ }
out:
- if (unwind) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno, buf);
- }
+ if (unwind) {
+ AFR_STACK_UNWIND (readlink, frame, op_ret, op_errno, buf, sbuf,
+ xdata);
+ }
- return 0;
+ return 0;
}
int32_t
afr_readlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc, size_t size)
+ loc_t *loc, size_t size, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- xlator_t ** children = NULL;
- int call_child = 0;
- afr_local_t *local = NULL;
-
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ int call_child = 0;
+ afr_local_t *local = NULL;
+ int32_t op_errno = 0;
int32_t read_child = -1;
+ int ret = -1;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
- priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
+ priv = this->private;
+ VALIDATE_OR_GOTO (priv->children, out);
- children = priv->children;
+ children = priv->children;
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_SBRAIN_CHECK_LOC (loc, out);
- frame->local = local;
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
- read_child = afr_read_child (this, loc->inode);
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- if (read_child >= 0) {
- call_child = read_child;
-
- local->cont.readlink.last_tried = -1;
-
- } else {
- call_child = afr_first_up_child (priv);
-
- if (call_child == -1) {
- op_errno = ENOTCONN;
- gf_log (this->name, GF_LOG_DEBUG,
- "no child is up");
- goto out;
- }
-
- local->cont.readlink.last_tried = call_child;
+ local->fresh_children = afr_children_create (priv->child_count);
+ if (!local->fresh_children) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ read_child = afr_inode_get_read_ctx (this, loc->inode,
+ local->fresh_children);
+ ret = afr_get_call_child (this, local->child_up, read_child,
+ local->fresh_children,
+ &call_child,
+ &local->cont.readlink.last_index);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
}
- loc_copy (&local->loc, loc);
- local->cont.readlink.size = size;
+ loc_copy (&local->loc, loc);
- STACK_WIND_COOKIE (frame, afr_readlink_cbk,
- (void *) (long) call_child,
- children[call_child], children[call_child]->fops->readlink,
- loc, size);
+ local->cont.readlink.size = size;
- op_ret = 0;
+ STACK_WIND_COOKIE (frame, afr_readlink_cbk,
+ (void *) (long) call_child,
+ children[call_child],
+ children[call_child]->fops->readlink,
+ loc, size, xdata);
+
+ ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
- }
- return 0;
+ if (ret < 0)
+ AFR_STACK_UNWIND (readlink, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
@@ -551,7 +526,7 @@ struct _xattr_key {
};
-void
+int
__gather_xattr_keys (dict_t *dict, char *key, data_t *value,
void *data)
{
@@ -561,25 +536,25 @@ __gather_xattr_keys (dict_t *dict, char *key, data_t *value,
if (!strncmp (key, AFR_XATTR_PREFIX,
strlen (AFR_XATTR_PREFIX))) {
- xkey = CALLOC (1, sizeof (*xkey));
+ xkey = GF_CALLOC (1, sizeof (*xkey), gf_afr_mt_xattr_key);
if (!xkey)
- return;
+ return -1;
xkey->key = key;
INIT_LIST_HEAD (&xkey->list);
list_add_tail (&xkey->list, list);
}
+ return 0;
}
void
__filter_xattrs (dict_t *dict)
{
- struct list_head keys;
-
- struct _xattr_key *key;
- struct _xattr_key *tmp;
+ struct list_head keys = {0,};
+ struct _xattr_key *key = NULL;
+ struct _xattr_key *tmp = NULL;
INIT_LIST_HEAD (&keys);
@@ -591,7 +566,7 @@ __filter_xattrs (dict_t *dict)
list_del_init (&key->list);
- FREE (key);
+ GF_FREE (key);
}
}
@@ -599,128 +574,1222 @@ __filter_xattrs (dict_t *dict)
int32_t
afr_getxattr_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *dict)
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ xlator_t ** children = NULL;
+ int unwind = 1;
+ int32_t *last_index = NULL;
+ int32_t next_call_child = -1;
+ int32_t read_child = -1;
+ int32_t *fresh_children = NULL;
- int unwind = 1;
- int last_tried = -1;
- int this_try = -1;
- int read_child = -1;
+ priv = this->private;
+ children = priv->children;
- priv = this->private;
- children = priv->children;
-
- local = frame->local;
+ local = frame->local;
read_child = (long) cookie;
- if (op_ret == -1) {
- retry:
- last_tried = local->cont.getxattr.last_tried;
-
- if (all_tried (last_tried, priv->child_count)) {
- goto out;
- }
- this_try = ++local->cont.getxattr.last_tried;
-
- if (this_try == read_child) {
- goto retry;
- }
+ if (op_ret == -1) {
+ last_index = &local->cont.getxattr.last_index;
+ fresh_children = local->fresh_children;
+ next_call_child = afr_next_call_child (fresh_children,
+ local->child_up,
+ priv->child_count,
+ last_index, read_child);
+ if (next_call_child < 0)
+ goto out;
- unwind = 0;
- STACK_WIND_COOKIE (frame, afr_getxattr_cbk,
- (void *) (long) read_child,
- children[this_try],
- children[this_try]->fops->getxattr,
- &local->loc,
- local->cont.getxattr.name);
- }
+ unwind = 0;
+ STACK_WIND_COOKIE (frame, afr_getxattr_cbk,
+ (void *) (long) read_child,
+ children[next_call_child],
+ children[next_call_child]->fops->getxattr,
+ &local->loc,
+ local->cont.getxattr.name,
+ NULL);
+ }
out:
- if (unwind) {
+ if (unwind) {
if (op_ret >= 0 && dict)
__filter_xattrs (dict);
- AFR_STACK_UNWIND (frame, op_ret, op_errno, dict);
- }
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata);
+ }
- return 0;
+ return 0;
}
+int32_t
+afr_getxattr_unwind (call_frame_t *frame, int op_ret, int op_errno,
+ dict_t *dict, dict_t *xdata)
+
+{
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
int32_t
-afr_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name)
+afr_fgetxattr_clrlk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ dict_t *xattr = NULL;
+ char *tmp_report = NULL;
+ char lk_summary[1024] = {0,};
+ int serz_len = 0;
+ int32_t callcnt = 0;
+ long int cky = 0;
+ int ret = 0;
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+ cky = (long) cookie;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+ if (op_ret == -1)
+ local->child_errno[cky] = op_errno;
+
+ if (!local->dict)
+ local->dict = dict_new ();
+ if (local->dict) {
+ ret = dict_get_str (dict, local->cont.getxattr.name,
+ &tmp_report);
+ if (ret)
+ goto unlock;
+ ret = dict_set_dynstr (local->dict,
+ children[cky]->name,
+ gf_strdup (tmp_report));
+ if (ret)
+ goto unlock;
+ }
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ xattr = dict_new ();
+ if (!xattr) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ ret = dict_serialize_value_with_delim (local->dict,
+ lk_summary,
+ &serz_len, '\n');
+ if (ret) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error serializing dictionary");
+ goto unwind;
+ }
+ if (serz_len == -1)
+ snprintf (lk_summary, sizeof (lk_summary),
+ "No locks cleared.");
+ ret = dict_set_dynstr (xattr, local->cont.getxattr.name,
+ gf_strdup (lk_summary));
+ if (ret) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error setting dictionary");
+ goto unwind;
+ }
+
+ unwind:
+ // Updating child_errno with more recent 'events'
+ local->child_errno[cky] = op_errno;
+ op_errno = afr_resultant_errno_get (NULL, local->child_errno,
+ priv->child_count);
+ AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, xattr,
+ xdata);
+
+ if (xattr)
+ dict_unref (xattr);
+ }
+
+ return ret;
+}
+
+int32_t
+afr_getxattr_clrlk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ dict_t *xattr = NULL;
+ char *tmp_report = NULL;
+ char lk_summary[1024] = {0,};
+ int serz_len = 0;
+ int32_t callcnt = 0;
+ long int cky = 0;
+ int ret = 0;
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+ cky = (long) cookie;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+ if (op_ret == -1)
+ local->child_errno[cky] = op_errno;
+
+ if (!local->dict)
+ local->dict = dict_new ();
+ if (local->dict) {
+ ret = dict_get_str (dict, local->cont.getxattr.name,
+ &tmp_report);
+ if (ret)
+ goto unlock;
+ ret = dict_set_dynstr (local->dict,
+ children[cky]->name,
+ gf_strdup (tmp_report));
+ if (ret)
+ goto unlock;
+ }
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ xattr = dict_new ();
+ if (!xattr) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ ret = dict_serialize_value_with_delim (local->dict,
+ lk_summary,
+ &serz_len, '\n');
+ if (ret) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error serializing dictionary");
+ goto unwind;
+ }
+ if (serz_len == -1)
+ snprintf (lk_summary, sizeof (lk_summary),
+ "No locks cleared.");
+ ret = dict_set_dynstr (xattr, local->cont.getxattr.name,
+ gf_strdup (lk_summary));
+ if (ret) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error setting dictionary");
+ goto unwind;
+ }
+
+ unwind:
+ // Updating child_errno with more recent 'events'
+ local->child_errno[cky] = op_errno;
+ op_errno = afr_resultant_errno_get (NULL, local->child_errno,
+ priv->child_count);
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, xattr, xdata);
+
+ if (xattr)
+ dict_unref (xattr);
+ }
+
+ return ret;
+}
+
+/**
+ * node-uuid cbk uses next child querying mechanism
+ */
+int32_t
+afr_getxattr_node_uuid_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ xlator_t **children = NULL;
+ int unwind = 1;
+ int curr_call_child = 0;
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+
+ if (op_ret == -1) { /** query the _next_ child */
+
+ /**
+ * _current_ becomes _next_
+ * If done with all childs and yet no success; give up !
+ */
+ curr_call_child = (int) ((long)cookie);
+ if (++curr_call_child == priv->child_count)
+ goto unwind;
+
+ gf_log (this->name, GF_LOG_WARNING,
+ "op_ret (-1): Re-querying afr-child (%d/%d)",
+ curr_call_child, priv->child_count);
+
+ unwind = 0;
+ STACK_WIND_COOKIE (frame, afr_getxattr_node_uuid_cbk,
+ (void *) (long) curr_call_child,
+ children[curr_call_child],
+ children[curr_call_child]->fops->getxattr,
+ &local->loc,
+ local->cont.getxattr.name,
+ NULL);
+ }
+
+ unwind:
+ if (unwind)
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict,
+ NULL);
+
+ return 0;
+}
+
+int32_t
+afr_getxattr_lockinfo_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ int call_cnt = 0, len = 0;
+ char *lockinfo_buf = NULL;
+ dict_t *lockinfo = NULL, *newdict = NULL;
+ afr_local_t *local = NULL;
+
+ LOCK (&frame->lock);
+ {
+ local = frame->local;
+
+ call_cnt = --local->call_count;
+
+ if ((op_ret < 0) || (!dict && !xdata)) {
+ goto unlock;
+ }
+
+ if (xdata) {
+ if (!local->xdata_rsp) {
+ local->xdata_rsp = dict_new ();
+ if (!local->xdata_rsp) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+ }
+
+ if (!dict) {
+ goto unlock;
+ }
+
+ op_ret = dict_get_ptr_and_len (dict, GF_XATTR_LOCKINFO_KEY,
+ (void **)&lockinfo_buf, &len);
+
+ if (!lockinfo_buf) {
+ goto unlock;
+ }
+
+ if (!local->dict) {
+ local->dict = dict_new ();
+ if (!local->dict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ if (lockinfo_buf != NULL) {
+ lockinfo = dict_new ();
+ if (lockinfo == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ } else {
+ op_ret = dict_unserialize (lockinfo_buf, len,
+ &lockinfo);
+
+ if (lockinfo && local->dict) {
+ dict_copy (lockinfo, local->dict);
+ }
+ }
+ }
+
+ if (xdata && local->xdata_rsp) {
+ dict_copy (xdata, local->xdata_rsp);
+ }
+
+ if (!call_cnt) {
+ newdict = dict_new ();
+ if (!newdict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ len = dict_serialized_length (local->dict);
+ if (len == 0) {
+ goto unwind;
+ }
+
+ lockinfo_buf = GF_CALLOC (1, len, gf_common_mt_char);
+ if (!lockinfo_buf) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ op_ret = dict_serialize (local->dict, lockinfo_buf);
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -op_ret;
+ }
+
+ op_ret = dict_set_dynptr (newdict, GF_XATTR_LOCKINFO_KEY,
+ (void *)lockinfo_buf, len);
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -op_ret;
+ goto unwind;
+ }
+
+ unwind:
+ AFR_STACK_UNWIND (getxattr, frame, op_ret,
+ op_errno, newdict,
+ local->xdata_rsp);
+ }
+
+ dict_unref (lockinfo);
+
+ return 0;
+}
+
+int32_t
+afr_fgetxattr_lockinfo_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- xlator_t ** children = NULL;
- int call_child = 0;
- afr_local_t * local = NULL;
+ int call_cnt = 0, len = 0;
+ char *lockinfo_buf = NULL;
+ dict_t *lockinfo = NULL, *newdict = NULL;
+ afr_local_t *local = NULL;
+
+ LOCK (&frame->lock);
+ {
+ local = frame->local;
- int read_child = -1;
+ call_cnt = --local->call_count;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
+ if ((op_ret < 0) || (!dict && !xdata)) {
+ goto unlock;
+ }
+
+ if (xdata) {
+ if (!local->xdata_rsp) {
+ local->xdata_rsp = dict_new ();
+ if (!local->xdata_rsp) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+ }
+
+ if (!dict) {
+ goto unlock;
+ }
+
+ op_ret = dict_get_ptr_and_len (dict, GF_XATTR_LOCKINFO_KEY,
+ (void **)&lockinfo_buf, &len);
+
+ if (!lockinfo_buf) {
+ goto unlock;
+ }
+
+ if (!local->dict) {
+ local->dict = dict_new ();
+ if (!local->dict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ if (lockinfo_buf != NULL) {
+ lockinfo = dict_new ();
+ if (lockinfo == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ } else {
+ op_ret = dict_unserialize (lockinfo_buf, len,
+ &lockinfo);
+
+ if (lockinfo && local->dict) {
+ dict_copy (lockinfo, local->dict);
+ }
+ }
+ }
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ if (xdata && local->xdata_rsp) {
+ dict_copy (xdata, local->xdata_rsp);
+ }
- priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
+ if (!call_cnt) {
+ newdict = dict_new ();
+ if (!newdict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
- children = priv->children;
+ len = dict_serialized_length (local->dict);
+ if (len <= 0) {
+ goto unwind;
+ }
- ALLOC_OR_GOTO (local, afr_local_t, out);
- frame->local = local;
+ lockinfo_buf = GF_CALLOC (1, len, gf_common_mt_char);
+ if (!lockinfo_buf) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
- if (name) {
- if (!strncmp (name, AFR_XATTR_PREFIX,
- strlen (AFR_XATTR_PREFIX))) {
+ op_ret = dict_serialize (local->dict, lockinfo_buf);
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -op_ret;
+ }
+
+ op_ret = dict_set_dynptr (newdict, GF_XATTR_LOCKINFO_KEY,
+ (void *)lockinfo_buf, len);
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -op_ret;
+ goto unwind;
+ }
+
+ unwind:
+ AFR_STACK_UNWIND (fgetxattr, frame, op_ret,
+ op_errno, newdict,
+ local->xdata_rsp);
+ }
- op_errno = ENODATA;
+ dict_unref (lockinfo);
+
+ return 0;
+}
+
+int32_t
+afr_fgetxattr_pathinfo_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int32_t callcnt = 0;
+ int ret = 0;
+ char *xattr = NULL;
+ char *xattr_serz = NULL;
+ char xattr_cky[1024] = {0,};
+ dict_t *nxattr = NULL;
+ long cky = 0;
+ int32_t padding = 0;
+ int32_t tlen = 0;
+
+ if (!frame || !frame->local || !this) {
+ gf_log ("", GF_LOG_ERROR, "possible NULL deref");
+ goto out;
+ }
+
+ local = frame->local;
+ cky = (long) cookie;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (!dict || (op_ret < 0))
goto out;
+
+ if (!local->dict)
+ local->dict = dict_new ();
+
+ if (local->dict) {
+ ret = dict_get_str (dict,
+ local->cont.getxattr.name,
+ &xattr);
+ if (ret)
+ goto out;
+
+ xattr = gf_strdup (xattr);
+
+ (void)snprintf (xattr_cky, 1024, "%s-%ld",
+ local->cont.getxattr.name, cky);
+ ret = dict_set_dynstr (local->dict,
+ xattr_cky, xattr);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Cannot set xattr cookie key");
+ goto out;
+ }
+
+ local->cont.getxattr.xattr_len
+ += strlen (xattr) + 1;
+ }
+ }
+out:
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ if (!local->cont.getxattr.xattr_len)
+ goto unwind;
+
+ nxattr = dict_new ();
+ if (!nxattr)
+ goto unwind;
+
+ /* extra bytes for decorations (brackets and <>'s) */
+ padding += strlen (this->name)
+ + strlen (AFR_PATHINFO_HEADER) + 4;
+ local->cont.getxattr.xattr_len += (padding + 2);
+
+ xattr_serz = GF_CALLOC (local->cont.getxattr.xattr_len,
+ sizeof (char), gf_common_mt_char);
+
+ if (!xattr_serz)
+ goto unwind;
+
+ /* the xlator info */
+ (void) sprintf (xattr_serz, "(<"AFR_PATHINFO_HEADER"%s> ",
+ this->name);
+
+ /* actual series of pathinfo */
+ ret = dict_serialize_value_with_delim (local->dict,
+ xattr_serz
+ + strlen (xattr_serz),
+ &tlen, ' ');
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Error serializing"
+ " dictionary");
+ goto unwind;
+ }
+
+ /* closing part */
+ *(xattr_serz + padding + tlen) = ')';
+ *(xattr_serz + padding + tlen + 1) = '\0';
+
+ ret = dict_set_dynstr (nxattr, local->cont.getxattr.name,
+ xattr_serz);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "Cannot set pathinfo"
+ " key in dict");
+
+ unwind:
+ AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, nxattr,
+ xdata);
+
+ if (nxattr)
+ dict_unref (nxattr);
+ }
+
+ return ret;
+}
+
+int32_t
+afr_getxattr_pathinfo_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int32_t callcnt = 0;
+ int ret = 0;
+ char *xattr = NULL;
+ char *xattr_serz = NULL;
+ char xattr_cky[1024] = {0,};
+ dict_t *nxattr = NULL;
+ long cky = 0;
+ int32_t padding = 0;
+ int32_t tlen = 0;
+
+ if (!frame || !frame->local || !this) {
+ gf_log ("", GF_LOG_ERROR, "possible NULL deref");
+ goto out;
+ }
+
+ local = frame->local;
+ cky = (long) cookie;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (!dict || (op_ret < 0))
+ goto out;
+
+ if (!local->dict)
+ local->dict = dict_new ();
+
+ if (local->dict) {
+ ret = dict_get_str (dict,
+ local->cont.getxattr.name,
+ &xattr);
+ if (ret)
+ goto out;
+
+ xattr = gf_strdup (xattr);
+
+ (void)snprintf (xattr_cky, 1024, "%s-%ld",
+ local->cont.getxattr.name, cky);
+ ret = dict_set_dynstr (local->dict,
+ xattr_cky, xattr);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Cannot set xattr cookie key");
+ goto out;
+ }
+
+ local->cont.getxattr.xattr_len += strlen (xattr) + 1;
+ }
+ }
+ out:
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ if (!local->cont.getxattr.xattr_len)
+ goto unwind;
+
+ nxattr = dict_new ();
+ if (!nxattr)
+ goto unwind;
+
+ /* extra bytes for decorations (brackets and <>'s) */
+ padding += strlen (this->name) + strlen (AFR_PATHINFO_HEADER) + 4;
+ local->cont.getxattr.xattr_len += (padding + 2);
+
+ xattr_serz = GF_CALLOC (local->cont.getxattr.xattr_len,
+ sizeof (char), gf_common_mt_char);
+
+ if (!xattr_serz)
+ goto unwind;
+
+ /* the xlator info */
+ (void) sprintf (xattr_serz, "(<"AFR_PATHINFO_HEADER"%s> ",
+ this->name);
+
+ /* actual series of pathinfo */
+ ret = dict_serialize_value_with_delim (local->dict,
+ xattr_serz + strlen (xattr_serz),
+ &tlen, ' ');
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Error serializing"
+ " dictionary");
+ goto unwind;
+ }
+
+ /* closing part */
+ *(xattr_serz + padding + tlen) = ')';
+ *(xattr_serz + padding + tlen + 1) = '\0';
+
+ ret = dict_set_dynstr (nxattr, local->cont.getxattr.name,
+ xattr_serz);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "Cannot set pathinfo"
+ " key in dict");
+
+ unwind:
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, nxattr,
+ xdata);
+
+ if (nxattr)
+ dict_unref (nxattr);
+ }
+
+ return ret;
+}
+
+static int
+afr_aggregate_stime_xattr (dict_t *this, char *key, data_t *value, void *data)
+{
+ int ret = 0;
+
+ if (fnmatch (GF_XATTR_STIME_PATTERN, key, FNM_NOESCAPE) == 0)
+ ret = gf_get_min_stime (THIS, data, key, value);
+
+ return ret;
+}
+
+int32_t
+afr_common_getxattr_stime_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int32_t callcnt = 0;
+
+ if (!frame || !frame->local || !this) {
+ gf_log ("", GF_LOG_ERROR, "possible NULL deref");
+ goto out;
+ }
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (!dict || (op_ret < 0)) {
+ local->op_errno = op_errno;
+ goto cleanup;
}
+
+ if (!local->dict)
+ local->dict = dict_copy_with_ref (dict, NULL);
+ else
+ dict_foreach (dict, afr_aggregate_stime_xattr,
+ local->dict);
+ local->op_ret = 0;
+ }
+
+cleanup:
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ AFR_STACK_UNWIND (getxattr, frame, local->op_ret,
+ local->op_errno, local->dict, xdata);
}
- read_child = afr_read_child (this, loc->inode);
+out:
+ return 0;
+}
- if (read_child >= 0) {
- call_child = read_child;
- local->cont.getxattr.last_tried = -1;
+static gf_boolean_t
+afr_is_special_xattr (const char *name, fop_getxattr_cbk_t *cbk,
+ gf_boolean_t is_fgetxattr)
+{
+ gf_boolean_t is_spl = _gf_true;
+
+ GF_ASSERT (cbk);
+ if (!cbk) {
+ is_spl = _gf_false;
+ goto out;
+ }
+
+ if (!strcmp (name, GF_XATTR_PATHINFO_KEY)) {
+ if (is_fgetxattr) {
+ *cbk = afr_fgetxattr_pathinfo_cbk;
+ } else {
+ *cbk = afr_getxattr_pathinfo_cbk;
+ }
+ } else if (!strncmp (name, GF_XATTR_CLRLK_CMD,
+ strlen (GF_XATTR_CLRLK_CMD))) {
+ if (is_fgetxattr) {
+ *cbk = afr_fgetxattr_clrlk_cbk;
+ } else {
+ *cbk = afr_getxattr_clrlk_cbk;
+ }
+ } else if (!strncmp (name, GF_XATTR_LOCKINFO_KEY,
+ strlen (GF_XATTR_LOCKINFO_KEY))) {
+ if (is_fgetxattr) {
+ *cbk = afr_fgetxattr_lockinfo_cbk;
+ } else {
+ *cbk = afr_getxattr_lockinfo_cbk;
+ }
+ } else if (fnmatch (GF_XATTR_STIME_PATTERN, name, FNM_NOESCAPE) == 0) {
+ *cbk = afr_common_getxattr_stime_cbk;
} else {
- call_child = afr_first_up_child (priv);
+ is_spl = _gf_false;
+ }
+
+out:
+ return is_spl;
+}
+
+static void
+afr_getxattr_frm_all_children (xlator_t *this, call_frame_t *frame,
+ const char *name, loc_t *loc,
+ fop_getxattr_cbk_t cbk)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ xlator_t **children = NULL;
+ int i = 0;
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+ local->call_count = priv->child_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ STACK_WIND_COOKIE (frame, cbk,
+ (void *) (long) i,
+ children[i], children[i]->fops->getxattr,
+ loc, name, NULL);
+ }
+ return;
+}
+
+int32_t
+afr_getxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ int call_child = 0;
+ afr_local_t *local = NULL;
+ xlator_list_t *trav = NULL;
+ xlator_t **sub_volumes = NULL;
+ int i = 0;
+ int32_t op_errno = 0;
+ int32_t read_child = -1;
+ int ret = -1;
+ fop_getxattr_cbk_t cbk = NULL;
+ int afr_xtime_gauge[MCNT_MAX] = {0,};
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+ VALIDATE_OR_GOTO (priv->children, out);
+
+ children = priv->children;
+
+ AFR_SBRAIN_CHECK_LOC (loc, out);
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ loc_copy (&local->loc, loc);
+ if (!name)
+ goto no_name;
+
+ local->cont.getxattr.name = gf_strdup (name);
- if (call_child == -1) {
- op_errno = ENOTCONN;
- gf_log (this->name, GF_LOG_DEBUG,
- "no child is up");
+ if (!strncmp (name, AFR_XATTR_PREFIX,
+ strlen (AFR_XATTR_PREFIX))) {
+ gf_log (this->name, GF_LOG_INFO,
+ "%s: no data present for key %s",
+ loc->path, name);
+ op_errno = ENODATA;
+ goto out;
+ }
+ if ((strcmp (GF_XATTR_MARKER_KEY, name) == 0)
+ && (GF_CLIENT_PID_GSYNCD == frame->root->pid)) {
+
+ local->marker.call_count = priv->child_count;
+
+ sub_volumes = alloca ( priv->child_count * sizeof (xlator_t *));
+ for (i = 0, trav = this->children; trav ;
+ trav = trav->next, i++) {
+
+ *(sub_volumes + i) = trav->xlator;
+ }
+
+ if (cluster_getmarkerattr (frame, this, loc, name,
+ local, afr_getxattr_unwind,
+ sub_volumes,
+ priv->child_count,
+ MARKER_UUID_TYPE,
+ marker_uuid_default_gauge,
+ priv->vol_uuid)) {
+
+ gf_log (this->name, GF_LOG_INFO,
+ "%s: failed to get marker attr (%s)",
+ loc->path, name);
+ op_errno = EINVAL;
goto out;
}
- local->cont.getxattr.last_tried = call_child;
+ return 0;
+ }
+
+ /*
+ * if we are doing getxattr with pathinfo as the key then we
+ * collect information from all childs
+ */
+ if (afr_is_special_xattr (name, &cbk, 0)) {
+ afr_getxattr_frm_all_children (this, frame, name,
+ loc, cbk);
+ return 0;
+ }
+
+ if (XATTR_IS_NODE_UUID (name)) {
+ i = 0;
+ STACK_WIND_COOKIE (frame, afr_getxattr_node_uuid_cbk,
+ (void *) (long) i,
+ children[i],
+ children[i]->fops->getxattr,
+ loc, name, xdata);
+ return 0;
+ }
+
+ if (*priv->vol_uuid) {
+ if ((match_uuid_local (name, priv->vol_uuid) == 0)
+ && (GF_CLIENT_PID_GSYNCD == frame->root->pid)) {
+ local->marker.call_count = priv->child_count;
+
+ sub_volumes = alloca ( priv->child_count
+ * sizeof (xlator_t *));
+ for (i = 0, trav = this->children; trav ;
+ trav = trav->next, i++) {
+
+ *(sub_volumes + i) = trav->xlator;
+
+ }
+
+ /* don't err out on getting ENOTCONN (brick down)
+ * from a subset of the bricks
+ */
+ memcpy (afr_xtime_gauge, marker_xtime_default_gauge,
+ sizeof (afr_xtime_gauge));
+ afr_xtime_gauge[MCNT_NOTFOUND] = 0;
+ afr_xtime_gauge[MCNT_ENOTCONN] = 0;
+ if (cluster_getmarkerattr (frame, this, loc,
+ name, local,
+ afr_getxattr_unwind,
+ sub_volumes,
+ priv->child_count,
+ MARKER_XTIME_TYPE,
+ afr_xtime_gauge,
+ priv->vol_uuid)) {
+ gf_log (this->name, GF_LOG_INFO,
+ "%s: failed to get marker attr (%s)",
+ loc->path, name);
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ return 0;
+ }
+ }
+
+no_name:
+ local->fresh_children = afr_children_create (priv->child_count);
+ if (!local->fresh_children) {
+ op_errno = ENOMEM;
+ goto out;
}
- loc_copy (&local->loc, loc);
- if (name)
- local->cont.getxattr.name = strdup (name);
+ read_child = afr_inode_get_read_ctx (this, loc->inode,
+ local->fresh_children);
+ ret = afr_get_call_child (this, local->child_up, read_child,
+ local->fresh_children,
+ &call_child,
+ &local->cont.getxattr.last_index);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ STACK_WIND_COOKIE (frame, afr_getxattr_cbk,
+ (void *) (long) call_child,
+ children[call_child],
+ children[call_child]->fops->getxattr,
+ loc, name, xdata);
+
+ ret = 0;
+out:
+ if (ret < 0)
+ AFR_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
+
+/* {{{ fgetxattr */
- STACK_WIND_COOKIE (frame, afr_getxattr_cbk,
- (void *) (long) call_child,
- children[call_child], children[call_child]->fops->getxattr,
- loc, name);
- op_ret = 0;
+int32_t
+afr_fgetxattr_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ xlator_t ** children = NULL;
+ int unwind = 1;
+ int32_t *last_index = NULL;
+ int32_t next_call_child = -1;
+ int32_t read_child = -1;
+ int32_t *fresh_children = NULL;
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+
+ read_child = (long) cookie;
+
+ if (op_ret == -1) {
+ last_index = &local->cont.getxattr.last_index;
+ fresh_children = local->fresh_children;
+ next_call_child = afr_next_call_child (fresh_children,
+ local->child_up,
+ priv->child_count,
+ last_index, read_child);
+ if (next_call_child < 0)
+ goto out;
+
+ unwind = 0;
+ STACK_WIND_COOKIE (frame, afr_fgetxattr_cbk,
+ (void *) (long) read_child,
+ children[next_call_child],
+ children[next_call_child]->fops->fgetxattr,
+ local->fd,
+ local->cont.getxattr.name,
+ NULL);
+ }
+
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
- }
- return 0;
+ if (unwind) {
+ if (op_ret >= 0 && dict)
+ __filter_xattrs (dict);
+
+ AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, dict,
+ xdata);
+ }
+
+ return 0;
+}
+
+int32_t
+afr_fgetxattr_unwind (call_frame_t *frame,
+ int op_ret, int op_errno, dict_t *dict, dict_t *xdata)
+
+{
+ AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+static void
+afr_fgetxattr_frm_all_children (xlator_t *this, call_frame_t *frame,
+ const char *name, fd_t *fd,
+ fop_fgetxattr_cbk_t cbk)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ xlator_t **children = NULL;
+ int i = 0;
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+ local->call_count = priv->child_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ STACK_WIND_COOKIE (frame, cbk,
+ (void *) (long) i,
+ children[i], children[i]->fops->fgetxattr,
+ fd, name, NULL);
+ }
+
+ return;
+}
+
+int32_t
+afr_fgetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ int call_child = 0;
+ afr_local_t *local = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int32_t read_child = -1;
+ fop_fgetxattr_cbk_t cbk = NULL;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+ VALIDATE_OR_GOTO (priv->children, out);
+
+ children = priv->children;
+
+ AFR_SBRAIN_CHECK_FD (fd, out);
+
+ AFR_LOCAL_ALLOC_OR_GOTO (local, out);
+ frame->local = local;
+
+ op_ret = afr_local_init (local, priv, &op_errno);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto out;
+ }
+
+ local->fd = fd_ref (fd);
+ if (name)
+ local->cont.getxattr.name = gf_strdup (name);
+
+ /* pathinfo gets handled only in getxattr(), but we need to handle
+ * lockinfo.
+ * If we are doing fgetxattr with lockinfo as the key then we
+ * collect information from all children.
+ */
+ if (afr_is_special_xattr (name, &cbk, 1)) {
+ afr_fgetxattr_frm_all_children (this, frame, name,
+ fd, cbk);
+ return 0;
+ }
+
+
+ local->fresh_children = afr_children_create (priv->child_count);
+ if (!local->fresh_children) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ read_child = afr_inode_get_read_ctx (this, fd->inode,
+ local->fresh_children);
+ op_ret = afr_get_call_child (this, local->child_up, read_child,
+ local->fresh_children,
+ &call_child,
+ &local->cont.getxattr.last_index);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ op_ret = -1;
+ goto out;
+ }
+
+ STACK_WIND_COOKIE (frame, afr_fgetxattr_cbk,
+ (void *) (long) call_child,
+ children[call_child],
+ children[call_child]->fops->fgetxattr,
+ fd, name, xdata);
+
+ op_ret = 0;
+out:
+ if (op_ret == -1) {
+ AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, NULL,
+ NULL);
+ }
+ return 0;
}
@@ -742,140 +1811,130 @@ out:
int32_t
afr_readv_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct iovec *vector, int32_t count, struct stat *buf,
- struct iobref *iobref)
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ struct iovec *vector, int32_t count, struct iatt *buf,
+ struct iobref *iobref, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
-
- int unwind = 1;
- int last_tried = -1;
- int this_try = -1;
- int read_child = -1;
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ xlator_t ** children = NULL;
+ int unwind = 1;
+ int32_t *last_index = NULL;
+ int32_t next_call_child = -1;
+ int32_t *fresh_children = NULL;
+ int32_t read_child = -1;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
- priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
+ priv = this->private;
+ VALIDATE_OR_GOTO (priv->children, out);
- children = priv->children;
+ children = priv->children;
- local = frame->local;
+ local = frame->local;
read_child = (long) cookie;
- if (op_ret == -1) {
- retry:
- last_tried = local->cont.readv.last_tried;
-
- if (all_tried (last_tried, priv->child_count)) {
- goto out;
- }
- this_try = ++local->cont.readv.last_tried;
-
- if (this_try == read_child) {
- /*
- skip the read child since if we are here
- we must have already tried that child
- */
- goto retry;
- }
-
- unwind = 0;
-
- STACK_WIND_COOKIE (frame, afr_readv_cbk,
- (void *) (long) read_child,
- children[this_try],
- children[this_try]->fops->readv,
- local->fd, local->cont.readv.size,
- local->cont.readv.offset);
- }
+ if (op_ret == -1) {
+ last_index = &local->cont.readv.last_index;
+ fresh_children = local->fresh_children;
+ next_call_child = afr_next_call_child (fresh_children,
+ local->child_up,
+ priv->child_count,
+ last_index, read_child);
+ if (next_call_child < 0)
+ goto out;
-out:
- if (unwind) {
- if (buf)
- buf->st_ino = local->cont.readv.ino;
+ unwind = 0;
- AFR_STACK_UNWIND (frame, op_ret, op_errno, vector, count, buf,
- iobref);
- }
+ STACK_WIND_COOKIE (frame, afr_readv_cbk,
+ (void *) (long) read_child,
+ children[next_call_child],
+ children[next_call_child]->fops->readv,
+ local->fd, local->cont.readv.size,
+ local->cont.readv.offset,
+ local->cont.readv.flags,
+ NULL);
+ }
- return 0;
+out:
+ if (unwind) {
+ AFR_STACK_UNWIND (readv, frame, op_ret, op_errno,
+ vector, count, buf, iobref, xdata);
+ }
+
+ return 0;
}
int32_t
afr_readv (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset)
+ fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
-
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ xlator_t ** children = NULL;
+ int call_child = 0;
+ int32_t op_errno = 0;
int32_t read_child = -1;
- int call_child = 0;
+ int ret = -1;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+ VALIDATE_OR_GOTO (fd, out);
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (fd, out);
+ priv = this->private;
+ children = priv->children;
- priv = this->private;
- children = priv->children;
+ AFR_SBRAIN_CHECK_FD (fd, out);
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
- frame->local = local;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- read_child = afr_read_child (this, fd->inode);
+ local->fresh_children = afr_children_create (priv->child_count);
+ if (!local->fresh_children) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- if (read_child >= 0) {
- call_child = read_child;
+ read_child = afr_inode_get_read_ctx (this, fd->inode, local->fresh_children);
+ ret = afr_get_call_child (this, local->child_up, read_child,
+ local->fresh_children,
+ &call_child,
+ &local->cont.readv.last_index);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- /*
- if read fails from the read child, we try
- all children starting with the first one
- */
- local->cont.readv.last_tried = -1;
+ local->fd = fd_ref (fd);
- } else {
- call_child = afr_first_up_child (priv);
- if (call_child == -1) {
- op_errno = ENOTCONN;
- gf_log (this->name, GF_LOG_DEBUG,
- "no child is up");
- goto out;
- }
-
- local->cont.readv.last_tried = call_child;
- }
-
- local->fd = fd_ref (fd);
-
- local->cont.readv.ino = fd->inode->ino;
- local->cont.readv.size = size;
- local->cont.readv.offset = offset;
-
- STACK_WIND_COOKIE (frame, afr_readv_cbk,
- (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->readv,
- fd, size, offset);
-
- op_ret = 0;
+ local->cont.readv.size = size;
+ local->cont.readv.offset = offset;
+ local->cont.readv.flags = flags;
+
+ afr_open_fd_fix (fd, this);
+
+ STACK_WIND_COOKIE (frame, afr_readv_cbk,
+ (void *) (long) call_child,
+ children[call_child],
+ children[call_child]->fops->readv,
+ fd, size, offset, flags, xdata);
+
+ ret = 0;
out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL, 0, NULL,
- NULL);
- }
- return 0;
+ if (ret < 0) {
+ AFR_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL,
+ NULL, NULL);
+ }
+ return 0;
}
/* }}} */
diff --git a/xlators/cluster/afr/src/afr-inode-read.h b/xlators/cluster/afr/src/afr-inode-read.h
index 1127cb652..e4091a793 100644
--- a/xlators/cluster/afr/src/afr-inode-read.h
+++ b/xlators/cluster/afr/src/afr-inode-read.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __INODE_READ_H__
@@ -22,26 +13,30 @@
int32_t
afr_access (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t mask);
+ loc_t *loc, int32_t mask, dict_t *xdata);
int32_t
afr_stat (call_frame_t *frame, xlator_t *this,
- loc_t *loc);
+ loc_t *loc, dict_t *xdata);
int32_t
afr_fstat (call_frame_t *frame, xlator_t *this,
- fd_t *fd);
+ fd_t *fd, dict_t *xdata);
int32_t
afr_readlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc, size_t size);
+ loc_t *loc, size_t size, dict_t *xdata);
int32_t
afr_readv (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset);
+ fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata);
int32_t
afr_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name);
+ loc_t *loc, const char *name, dict_t *xdata);
+
+int32_t
+afr_fgetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata);
#endif /* __INODE_READ_H__ */
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
index 6a2774bee..c1ec69a55 100644
--- a/xlators/cluster/afr/src/afr-inode-write.c
+++ b/xlators/cluster/afr/src/afr-inode-write.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -46,2144 +37,2825 @@
#include "afr.h"
#include "afr-transaction.h"
+#include "afr-self-heal-common.h"
+void
+__inode_write_fop_cbk (call_frame_t *frame, int child_index, int read_child,
+ xlator_t *this, int32_t *op_ret, int32_t *op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (afr_fop_failed (*op_ret, *op_errno)) {
+ local->child_errno[child_index] = *op_errno;
+
+ switch (local->op) {
+ case GF_FOP_TRUNCATE:
+ case GF_FOP_FTRUNCATE:
+ if (*op_errno != EFBIG)
+ afr_transaction_fop_failed (frame, this,
+ child_index);
+ break;
+ default:
+ afr_transaction_fop_failed (frame, this, child_index);
+ break;
+ }
+ local->op_errno = *op_errno;
+ goto out;
+ }
-/* {{{ chmod */
+ if ((local->success_count == 0) || (read_child == child_index)) {
+ local->op_ret = *op_ret;
+ if (prebuf)
+ local->cont.inode_wfop.prebuf = *prebuf;
+ if (postbuf)
+ local->cont.inode_wfop.postbuf = *postbuf;
+ }
+ local->success_count++;
+out:
+ return;
+}
-int
-afr_chmod_unwind (call_frame_t *frame, xlator_t *this)
+/* {{{ writev */
+
+void
+afr_writev_copy_outvars (call_frame_t *src_frame, call_frame_t *dst_frame)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- call_frame_t *main_frame = NULL;
+ afr_local_t *src_local = NULL;
+ afr_local_t *dst_local = NULL;
+
+ src_local = src_frame->local;
+ dst_local = dst_frame->local;
- struct stat * unwind_buf = NULL;
+ dst_local->op_ret = src_local->op_ret;
+ dst_local->op_errno = src_local->op_errno;
+ dst_local->cont.inode_wfop.prebuf = src_local->cont.inode_wfop.prebuf;
+ dst_local->cont.inode_wfop.postbuf = src_local->cont.inode_wfop.postbuf;
+}
- local = frame->local;
- priv = this->private;
+void
+afr_writev_unwind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+ local = frame->local;
+
+ AFR_STACK_UNWIND (writev, frame,
+ local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ NULL);
+}
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+call_frame_t*
+afr_transaction_detach_fop_frame (call_frame_t *frame)
+{
+ afr_local_t * local = NULL;
+ call_frame_t *fop_frame = NULL;
- if (main_frame) {
- if (local->cont.chmod.read_child_buf.st_ino) {
- unwind_buf = &local->cont.chmod.read_child_buf;
- } else {
- unwind_buf = &local->cont.chmod.buf;
- }
+ local = frame->local;
- unwind_buf->st_ino = local->cont.chmod.ino;
+ LOCK (&frame->lock);
+ {
+ fop_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
- AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno,
- unwind_buf);
- }
- return 0;
+ return fop_frame;
}
-
int
-afr_chmod_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+afr_transaction_writev_unwind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
+ call_frame_t *fop_frame = NULL;
- int call_count = -1;
- int child_index = (long) cookie;
- int need_unwind = 0;
+ fop_frame = afr_transaction_detach_fop_frame (frame);
+
+ if (fop_frame) {
+ afr_writev_copy_outvars (frame, fop_frame);
+ afr_writev_unwind (fop_frame, this);
+ }
+ return 0;
+}
+
+static void
+afr_writev_handle_short_writes (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+ /*
+ * We already have the best case result of the writev calls staged
+ * as the return value. Any writev that returns some value less
+ * than the best case is now out of sync, so mark the fop as
+ * failed. Note that fops that have returned with errors have
+ * already been marked as failed.
+ */
+ for (i = 0; i < priv->child_count; i++) {
+ if ((!local->replies[i].valid) ||
+ (local->replies[i].op_ret == -1))
+ continue;
+
+ if (local->replies[i].op_ret < local->op_ret)
+ afr_transaction_fop_failed(frame, this, i);
+ }
+}
+
+int
+afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ afr_local_t * local = NULL;
+ afr_private_t *priv = NULL;
+ call_frame_t *fop_frame = NULL;
+ int child_index = (long) cookie;
+ int call_count = -1;
int read_child = 0;
+ int ret = 0;
+ uint32_t open_fd_count = 0;
+ uint32_t write_is_append = 0;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
+ priv = this->private;
- read_child = afr_read_child (this, local->loc.inode);
+ read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
- LOCK (&frame->lock);
- {
+ LOCK (&frame->lock);
+ {
if (child_index == read_child) {
local->read_child_returned = _gf_true;
}
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
+ __inode_write_fop_cbk (frame, child_index, read_child, this,
+ &op_ret, &op_errno, prebuf, postbuf,
+ xdata);
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.chmod.buf = *buf;
- }
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
- if (child_index == read_child) {
- local->cont.chmod.read_child_buf = *buf;
- }
- local->success_count++;
+ /* stage the best case return value for unwind */
+ if ((local->success_count == 0) || (op_ret > local->op_ret)) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ }
+
+ if (op_ret != -1) {
+ if (xdata) {
+ ret = dict_get_uint32 (xdata,
+ GLUSTERFS_OPEN_FD_COUNT,
+ &open_fd_count);
+ if ((ret == 0) &&
+ (open_fd_count > local->open_fd_count)) {
+ local->open_fd_count = open_fd_count;
+ local->update_open_fd_count = _gf_true;
+ }
+
+ write_is_append = 0;
+ ret = dict_get_uint32 (xdata,
+ GLUSTERFS_WRITE_IS_APPEND,
+ &write_is_append);
+ if (ret || !write_is_append)
+ local->append_write = _gf_false;
+ }
- if ((local->success_count >= priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
}
+ }
+ UNLOCK (&frame->lock);
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ call_count = afr_frame_return (frame);
- if (need_unwind)
- afr_chmod_unwind (frame, this);
+ if (call_count == 0) {
- call_count = afr_frame_return (frame);
+ if (local->update_open_fd_count)
+ afr_handle_open_fd_count (frame, this);
- if (call_count == 0) {
- local->transaction.resume (frame, this);
+ if (!local->stable_write && !local->append_write)
+ /* An appended write removes the necessity to
+ fsync() the file. This is because self-heal
+ has the logic to check for larger file when
+ the xattrs are not reliably pointing at
+ a stale file.
+ */
+ afr_fd_report_unstable_write (this, local->fd);
+
+ afr_writev_handle_short_writes (frame, this);
+ if (afr_any_fops_failed (local, priv)) {
+ //Don't unwind until post-op is complete
+ local->transaction.resume (frame, this);
+ } else {
+ /*
+ * Generally inode-write fops do transaction.unwind then
+ * transaction.resume, but writev needs to make sure that
+ * delayed post-op frame is placed in fdctx before unwind
+ * happens. This prevents the race of flush doing the
+ * changelog wakeup first in fuse thread and then this
+ * writev placing its delayed post-op frame in fdctx.
+ * This helps flush make sure all the delayed post-ops are
+ * completed.
+ */
+
+ fop_frame = afr_transaction_detach_fop_frame (frame);
+ afr_writev_copy_outvars (frame, fop_frame);
+ local->transaction.resume (frame, this);
+ afr_writev_unwind (fop_frame, this);
+ }
+ }
+ return 0;
+}
+
+int
+afr_writev_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int call_count = -1;
+ dict_t *xdata = NULL;
+ GF_UNUSED int ret = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+ local->replies = GF_CALLOC(priv->child_count, sizeof(*local->replies),
+ gf_afr_mt_reply_t);
+ if (!local->replies) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ local->transaction.unwind(frame, this);
+ local->transaction.resume(frame, this);
+ return 0;
}
-
- return 0;
+
+ xdata = dict_new ();
+ if (xdata) {
+ ret = dict_set_uint32 (xdata, GLUSTERFS_OPEN_FD_COUNT,
+ sizeof (uint32_t));
+ ret = dict_set_uint32 (xdata, GLUSTERFS_WRITE_IS_APPEND,
+ 0);
+ /* Set append_write to be true speculatively. If on any
+ server it turns not be true, we unset it in the
+ callback.
+ */
+ local->append_write = _gf_true;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_writev_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->writev,
+ local->fd,
+ local->cont.writev.vector,
+ local->cont.writev.count,
+ local->cont.writev.offset,
+ local->cont.writev.flags,
+ local->cont.writev.iobref,
+ xdata);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
}
int
-afr_chmod_wind (call_frame_t *frame, xlator_t *this)
+afr_writev_done (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int i = 0;
- int call_count = -1;
+ afr_local_t *local = NULL;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ iobref_unref (local->cont.writev.iobref);
+ local->cont.writev.iobref = NULL;
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+ local->transaction.unwind (frame, this);
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_chmod_wind_cbk, (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->chmod,
- &local->loc,
- local->cont.chmod.mode);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ AFR_STACK_DESTROY (frame);
+
+ return 0;
}
int
-afr_chmod_done (call_frame_t *frame, xlator_t *this)
+afr_do_writev (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ afr_local_t *local = NULL;
+ int op_ret = -1;
+ int op_errno = 0;
+
+ local = frame->local;
- local = frame->local;
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- local->transaction.unwind (frame, this);
+ transaction_frame->local = local;
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+
+ local->op = GF_FOP_WRITE;
+
+ local->success_count = 0;
+
+ local->transaction.fop = afr_writev_wind;
+ local->transaction.done = afr_writev_done;
+ local->transaction.unwind = afr_transaction_writev_unwind;
+
+ local->transaction.main_frame = frame;
+ if (local->fd->flags & O_APPEND) {
+ /*
+ * Backend vfs ignores the 'offset' for append mode fd so
+ * locking just the region provided for the writev does not
+ * give consistency gurantee. The actual write may happen at a
+ * completely different range than the one provided by the
+ * offset, len in the fop. So lock the entire file.
+ */
+ local->transaction.start = 0;
+ local->transaction.len = 0;
+ } else {
+ local->transaction.start = local->cont.writev.offset;
+ local->transaction.len = iov_length (local->cont.writev.vector,
+ local->cont.writev.count);
+ }
- AFR_STACK_DESTROY (frame);
-
- return 0;
+ op_ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto out;
+ }
+
+ op_ret = 0;
+out:
+ if (op_ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (writev, frame, op_ret, op_errno, NULL, NULL, NULL);
+ }
+
+ return 0;
}
+static void
+afr_trigger_open_fd_self_heal (fd_t *fd, xlator_t *this)
+{
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ char *reason = NULL;
+ int32_t op_errno = 0;
+ int ret = 0;
+
+ if (!fd || !fd->inode || uuid_is_null (fd->inode->gfid)) {
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "Invalid args: "
+ "fd: %p, inode: %p", fd,
+ fd ? fd->inode : NULL);
+ goto out;
+ }
-int32_t
-afr_chmod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode)
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame)
+ goto out;
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+ ret = afr_local_init (local, this->private, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ local->loc.inode = inode_ref (fd->inode);
+ ret = loc_path (&local->loc, NULL);
+ if (ret < 0)
+ goto out;
+
+ sh = &local->self_heal;
+ sh->do_metadata_self_heal = _gf_true;
+ if (fd->inode->ia_type == IA_IFREG)
+ sh->do_data_self_heal = _gf_true;
+ else if (fd->inode->ia_type == IA_IFDIR)
+ sh->do_entry_self_heal = _gf_true;
+
+ reason = "subvolume came online";
+ afr_launch_self_heal (frame, this, fd->inode, _gf_true,
+ fd->inode->ia_type, reason, NULL, NULL);
+ return;
+out:
+ AFR_STACK_DESTROY (frame);
+}
+
+void
+afr_open_fd_fix (fd_t *fd, xlator_t *this)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
+ int ret = 0;
+ int i = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ gf_boolean_t need_self_heal = _gf_false;
+ int *need_open = NULL;
+ size_t need_open_count = 0;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ if (!afr_is_fd_fixable (fd))
+ goto out;
+
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx)
+ goto out;
+
+ LOCK (&fd->lock);
+ {
+ if (fd_ctx->up_count < priv->up_count) {
+ need_self_heal = _gf_true;
+ fd_ctx->up_count = priv->up_count;
+ fd_ctx->down_count = priv->down_count;
+ }
- int ret = -1;
+ need_open = alloca (priv->child_count * sizeof (*need_open));
+ for (i = 0; i < priv->child_count; i++) {
+ need_open[i] = 0;
+ if (fd_ctx->opened_on[i] != AFR_FD_NOT_OPENED)
+ continue;
- int op_ret = -1;
- int op_errno = 0;
+ if (!priv->child_up[i])
+ continue;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ fd_ctx->opened_on[i] = AFR_FD_OPENING;
- priv = this->private;
+ need_open[i] = 1;
+ need_open_count++;
+ }
+ }
+ UNLOCK (&fd->lock);
+ if (ret)
+ goto out;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
+ if (need_self_heal)
+ afr_trigger_open_fd_self_heal (fd, this);
- ALLOC_OR_GOTO (local, afr_local_t, out);
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ if (!need_open_count)
+ goto out;
- transaction_frame->local = local;
+ afr_fix_open (this, fd, need_open_count, need_open);
+out:
+ return;
+}
+
+int
+afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ int ret = -1;
+ int op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ if (afr_is_split_brain (this, fd->inode)) {
+ op_errno = EIO;
+ goto out;
+ }
+
+ QUORUM_CHECK(writev,out);
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
- local->cont.chmod.mode = mode;
- local->cont.chmod.ino = loc->inode->ino;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- local->transaction.fop = afr_chmod_wind;
- local->transaction.done = afr_chmod_done;
- local->transaction.unwind = afr_chmod_unwind;
+ local->cont.writev.vector = iov_dup (vector, count);
+ local->cont.writev.count = count;
+ local->cont.writev.offset = offset;
+ local->cont.writev.flags = flags;
+ local->cont.writev.iobref = iobref_ref (iobref);
- loc_copy (&local->loc, loc);
-
- local->transaction.main_frame = frame;
- local->transaction.start = LLONG_MAX - 1;
- local->transaction.len = 0;
+ local->fd = fd_ref (fd);
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ /* detect here, but set it in writev_wind_cbk *after* the unstable
+ write is performed
+ */
+ local->stable_write = !!((fd->flags|flags)&(O_SYNC|O_DSYNC));
- op_ret = 0;
+ afr_open_fd_fix (fd, this);
+
+ afr_do_writev (frame, this);
+
+ ret = 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
- }
+ if (ret < 0)
+ AFR_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
+ return 0;
}
-/* }}} */
+/* }}} */
-/* {{{ fchmod */
+/* {{{ truncate */
int
-afr_fchmod_unwind (call_frame_t *frame, xlator_t *this)
+afr_truncate_unwind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- call_frame_t *main_frame = NULL;
- struct stat *unwind_buf = NULL;
-
- local = frame->local;
- priv = this->private;
+ afr_local_t * local = NULL;
+ call_frame_t *main_frame = NULL;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+ local = frame->local;
- if (main_frame) {
- if (local->cont.fchmod.read_child_buf.st_ino) {
- unwind_buf = &local->cont.fchmod.read_child_buf;
- } else {
- unwind_buf = &local->cont.fchmod.buf;
- }
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame)
+ main_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
- unwind_buf->st_ino = local->cont.fchmod.ino;
+ if (main_frame) {
+ AFR_STACK_UNWIND (truncate, main_frame, local->op_ret,
+ local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ NULL);
+ }
- AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno,
- unwind_buf);
- }
- return 0;
+ return 0;
}
int
-afr_fchmod_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+afr_truncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int call_count = -1;
- int child_index = (long) cookie;
- int need_unwind = 0;
+ afr_local_t * local = NULL;
+ int child_index = (long) cookie;
int read_child = 0;
+ int call_count = -1;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
- read_child = afr_read_child (this, local->fd->inode);
+ read_child = afr_inode_get_read_ctx (this, local->loc.inode, NULL);
- LOCK (&frame->lock);
- {
+ LOCK (&frame->lock);
+ {
if (child_index == read_child) {
local->read_child_returned = _gf_true;
}
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.fchmod.buf = *buf;
- }
-
- if (child_index == read_child) {
- local->cont.fchmod.read_child_buf = *buf;
- }
-
- local->success_count++;
-
- if ((local->success_count >= priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
- }
+ if (op_ret != -1) {
+ if (prebuf->ia_size != postbuf->ia_size)
+ local->stable_write = _gf_false;
+ }
+ __inode_write_fop_cbk (frame, child_index, read_child, this,
+ &op_ret, &op_errno, prebuf, postbuf,
+ xdata);
+ }
+ UNLOCK (&frame->lock);
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ call_count = afr_frame_return (frame);
- if (need_unwind)
- afr_fchmod_unwind (frame, this);
+ if (call_count == 0) {
+ if (local->stable_write && afr_txn_nothing_failed (frame, this))
+ local->transaction.unwind (frame, this);
- call_count = afr_frame_return (frame);
+ local->transaction.resume (frame, this);
+ }
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return 0;
}
-int
-afr_fchmod_wind (call_frame_t *frame, xlator_t *this)
+int32_t
+afr_truncate_wind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int i = 0;
- int call_count = -1;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
+ priv = this->private;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_fchmod_wind_cbk, (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fchmod,
- local->fd,
- local->cont.fchmod.mode);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ local->call_count = call_count;
+ local->stable_write = _gf_true;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_truncate_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->truncate,
+ &local->loc,
+ local->cont.truncate.offset,
+ NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
}
int
-afr_fchmod_done (call_frame_t *frame, xlator_t *this)
+afr_truncate_done (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
+ afr_local_t *local = NULL;
- local = frame->local;
+ local = frame->local;
- local->transaction.unwind (frame, this);
+ local->transaction.unwind (frame, this);
- AFR_STACK_DESTROY (frame);
-
- return 0;
+ AFR_STACK_DESTROY (frame);
+
+ return 0;
}
-int32_t
-afr_fchmod (call_frame_t *frame, xlator_t *this,
- fd_t *fd, mode_t mode)
+int
+afr_truncate (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, off_t offset, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
- int ret = -1;
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
- int op_ret = -1;
- int op_errno = 0;
+ priv = this->private;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ QUORUM_CHECK(truncate,out);
- priv = this->private;
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
- ALLOC_OR_GOTO (local, afr_local_t, out);
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- transaction_frame->local = local;
+ local->cont.truncate.offset = offset;
- local->cont.fchmod.mode = mode;
- local->cont.fchmod.ino = fd->inode->ino;
+ local->transaction.fop = afr_truncate_wind;
+ local->transaction.done = afr_truncate_done;
+ local->transaction.unwind = afr_truncate_unwind;
- local->transaction.fop = afr_fchmod_wind;
- local->transaction.done = afr_fchmod_done;
- local->transaction.unwind = afr_fchmod_unwind;
+ loc_copy (&local->loc, loc);
- local->fd = fd_ref (fd);
-
- local->transaction.main_frame = frame;
- local->transaction.start = LLONG_MAX - 1;
- local->transaction.len = 0;
+ local->transaction.main_frame = frame;
+ local->transaction.start = offset;
+ local->transaction.len = 0;
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
- }
+ if (ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL, NULL);
+ }
- return 0;
+ return 0;
}
+
/* }}} */
-/* {{{ chown */
+/* {{{ ftruncate */
+
int
-afr_chown_unwind (call_frame_t *frame, xlator_t *this)
+afr_ftruncate_unwind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- call_frame_t *main_frame = NULL;
-
- struct stat * unwind_buf = NULL;
-
- local = frame->local;
- priv = this->private;
+ afr_local_t * local = NULL;
+ call_frame_t *main_frame = NULL;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+ local = frame->local;
- if (main_frame) {
- if (local->cont.chown.read_child_buf.st_ino) {
- unwind_buf = &local->cont.chown.read_child_buf;
- } else {
- unwind_buf = &local->cont.chown.buf;
- }
-
- unwind_buf->st_ino = local->cont.chown.ino;
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame)
+ main_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
- AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno,
- unwind_buf);
- }
- return 0;
+ if (main_frame) {
+ AFR_STACK_UNWIND (ftruncate, main_frame, local->op_ret,
+ local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ NULL);
+ }
+ return 0;
}
int
-afr_chown_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
-{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int call_count = -1;
- int child_index = (long) cookie;
- int need_unwind = 0;
+afr_ftruncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ afr_local_t * local = NULL;
+ int child_index = (long) cookie;
+ int call_count = -1;
int read_child = 0;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
- read_child = afr_read_child (this, local->loc.inode);
+ read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
- LOCK (&frame->lock);
- {
+ LOCK (&frame->lock);
+ {
if (child_index == read_child) {
local->read_child_returned = _gf_true;
}
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.chown.buf = *buf;
- }
-
- if (child_index == read_child) {
- local->cont.chown.read_child_buf = *buf;
- }
-
- local->success_count++;
-
- if ((local->success_count >= priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
- }
+ if (op_ret != -1) {
+ if (prebuf->ia_size != postbuf->ia_size)
+ local->stable_write = _gf_false;
+ }
+ __inode_write_fop_cbk (frame, child_index, read_child, this,
+ &op_ret, &op_errno, prebuf, postbuf,
+ xdata);
+ }
+ UNLOCK (&frame->lock);
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ call_count = afr_frame_return (frame);
- if (need_unwind) {
- local->transaction.unwind (frame, this);
- }
+ if (call_count == 0) {
+ if (local->stable_write && afr_txn_nothing_failed (frame, this))
+ local->transaction.unwind (frame, this);
- call_count = afr_frame_return (frame);
+ local->transaction.resume (frame, this);
+ }
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return 0;
}
int
-afr_chown_wind (call_frame_t *frame, xlator_t *this)
+afr_ftruncate_wind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ local = frame->local;
+ priv = this->private;
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
- local->call_count = call_count;
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_chown_wind_cbk, (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->chown,
- &local->loc, local->cont.chown.uid,
- local->cont.chown.gid);
+ local->call_count = call_count;
+ local->stable_write = _gf_true;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_ftruncate_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->ftruncate,
+ local->fd,
+ local->cont.ftruncate.offset,
+ NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ return 0;
}
int
-afr_chown_done (call_frame_t *frame, xlator_t *this)
+afr_ftruncate_done (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
+ afr_local_t *local = NULL;
- local = frame->local;
+ local = frame->local;
- local->transaction.unwind (frame, this);
+ local->transaction.unwind (frame, this);
- AFR_STACK_DESTROY (frame);
+ AFR_STACK_DESTROY (frame);
- return 0;
+ return 0;
}
int
-afr_chown (call_frame_t *frame, xlator_t *this,
- loc_t *loc, uid_t uid, gid_t gid)
+afr_do_ftruncate (call_frame_t *frame, xlator_t *this)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
+ call_frame_t * transaction_frame = NULL;
+ afr_local_t * local = NULL;
+ int op_ret = -1;
+ int op_errno = 0;
- int ret = -1;
+ local = frame->local;
- int op_ret = -1;
- int op_errno = 0;
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ goto out;
+ }
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ transaction_frame->local = local;
+ frame->local = NULL;
- priv = this->private;
+ local->op = GF_FOP_FTRUNCATE;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
+ local->transaction.fop = afr_ftruncate_wind;
+ local->transaction.done = afr_ftruncate_done;
+ local->transaction.unwind = afr_ftruncate_unwind;
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ local->transaction.main_frame = frame;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ local->transaction.start = local->cont.ftruncate.offset;
+ local->transaction.len = 0;
+
+ op_ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto out;
+ }
- transaction_frame->local = local;
+ op_ret = 0;
+out:
+ if (op_ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, NULL,
+ NULL, NULL);
+ }
+
+ return 0;
+}
+
+
+int
+afr_ftruncate (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, off_t offset, dict_t *xdata)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
- local->cont.chown.uid = uid;
- local->cont.chown.gid = gid;
- local->cont.chown.ino = loc->inode->ino;
+ if (afr_is_split_brain (this, fd->inode)) {
+ op_errno = EIO;
+ goto out;
+ }
+ QUORUM_CHECK(ftruncate,out);
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- local->transaction.fop = afr_chown_wind;
- local->transaction.done = afr_chown_done;
- local->transaction.unwind = afr_chown_unwind;
+ local->cont.ftruncate.offset = offset;
- loc_copy (&local->loc, loc);
+ local->fd = fd_ref (fd);
- local->transaction.main_frame = frame;
- local->transaction.start = LLONG_MAX - 1;
- local->transaction.len = 0;
+ afr_open_fd_fix (fd, this);
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ afr_do_ftruncate (frame, this);
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
- }
+ if (ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
+ }
- return 0;
+ return 0;
}
-
/* }}} */
-/* {{{ chown */
+/* {{{ setattr */
int
-afr_fchown_unwind (call_frame_t *frame, xlator_t *this)
+afr_setattr_unwind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- call_frame_t *main_frame = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t *main_frame = NULL;
- struct stat * unwind_buf = NULL;
+ local = frame->local;
- local = frame->local;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- if (local->cont.fchown.read_child_buf.st_ino) {
- unwind_buf = &local->cont.fchown.read_child_buf;
- } else {
- unwind_buf = &local->cont.fchown.buf;
- }
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame)
+ main_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
- unwind_buf->st_ino = local->cont.fchown.ino;
+ if (main_frame) {
+ AFR_STACK_UNWIND (setattr, main_frame, local->op_ret,
+ local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ NULL);
+ }
- AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno,
- unwind_buf);
- }
- return 0;
+ return 0;
}
int
-afr_fchown_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
-{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int call_count = -1;
- int child_index = (long) cookie;
- int need_unwind = 0;
+afr_setattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ int child_index = (long) cookie;
int read_child = 0;
+ int call_count = -1;
+ int need_unwind = 0;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
+ priv = this->private;
- read_child = afr_read_child (this, local->fd->inode);
+ read_child = afr_inode_get_read_ctx (this, local->loc.inode, NULL);
- LOCK (&frame->lock);
- {
+ LOCK (&frame->lock);
+ {
if (child_index == read_child) {
local->read_child_returned = _gf_true;
}
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.fchown.buf = *buf;
- }
-
- if (child_index == read_child) {
- local->cont.fchown.read_child_buf = *buf;
- }
-
- local->success_count++;
+ __inode_write_fop_cbk (frame, child_index, read_child, this,
+ &op_ret, &op_errno, preop, postop,
+ xdata);
- if ((local->success_count >= priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
- }
+ if ((local->success_count >= priv->wait_count)
+ && local->read_child_returned) {
+ need_unwind = 1;
+ }
+ }
+ UNLOCK (&frame->lock);
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ if (need_unwind)
+ local->transaction.unwind (frame, this);
- if (need_unwind) {
- local->transaction.unwind (frame, this);
- }
+ call_count = afr_frame_return (frame);
- call_count = afr_frame_return (frame);
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ }
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return 0;
}
-int
-afr_fchown_wind (call_frame_t *frame, xlator_t *this)
+int32_t
+afr_setattr_wind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int call_count = -1;
- int i = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
+ priv = this->private;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_fchown_wind_cbk, (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fchown,
- local->fd, local->cont.fchown.uid,
- local->cont.fchown.gid);
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_setattr_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->setattr,
+ &local->loc,
+ &local->cont.setattr.in_buf,
+ local->cont.setattr.valid,
+ NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ return 0;
}
int
-afr_fchown_done (call_frame_t *frame, xlator_t *this)
+afr_setattr_done (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
+ afr_local_t *local = NULL;
- local = frame->local;
+ local = frame->local;
- local->transaction.unwind (frame, this);
+ local->transaction.unwind (frame, this);
- AFR_STACK_DESTROY (frame);
+ AFR_STACK_DESTROY (frame);
- return 0;
+ return 0;
}
int
-afr_fchown (call_frame_t *frame, xlator_t *this,
- fd_t *fd, uid_t uid, gid_t gid)
+afr_setattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, struct iatt *buf, int32_t valid, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
-
- int ret = -1;
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
- int op_ret = -1;
- int op_errno = 0;
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ priv = this->private;
- priv = this->private;
-
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
+ QUORUM_CHECK(setattr,out);
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
- transaction_frame->local = local;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- local->cont.fchown.uid = uid;
- local->cont.fchown.gid = gid;
- local->cont.fchown.ino = fd->inode->ino;
+ local->cont.setattr.in_buf = *buf;
+ local->cont.setattr.valid = valid;
- local->transaction.fop = afr_fchown_wind;
- local->transaction.done = afr_fchown_done;
- local->transaction.unwind = afr_fchown_unwind;
+ local->transaction.fop = afr_setattr_wind;
+ local->transaction.done = afr_setattr_done;
+ local->transaction.unwind = afr_setattr_unwind;
- local->fd = fd_ref (fd);
+ loc_copy (&local->loc, loc);
- local->transaction.main_frame = frame;
- local->transaction.start = LLONG_MAX - 1;
- local->transaction.len = 0;
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
- }
+ if (ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL, NULL);
+ }
- return 0;
+ return 0;
}
-/* }}} */
-
-/* {{{ writev */
+/* {{{ fsetattr */
int
-afr_writev_unwind (call_frame_t *frame, xlator_t *this)
+afr_fsetattr_unwind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- call_frame_t *main_frame = NULL;
-
- struct stat * unwind_buf = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t *main_frame = NULL;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- if (local->cont.writev.read_child_buf.st_ino) {
- unwind_buf = &local->cont.writev.read_child_buf;
- } else {
- unwind_buf = &local->cont.writev.buf;
- }
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame)
+ main_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
- unwind_buf->st_ino = local->cont.writev.ino;
+ if (main_frame) {
+ AFR_STACK_UNWIND (fsetattr, main_frame, local->op_ret,
+ local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ NULL);
+ }
- AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno,
- unwind_buf);
- }
- return 0;
+ return 0;
}
int
-afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+afr_fsetattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int child_index = (long) cookie;
- int call_count = -1;
- int need_unwind = 0;
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ int child_index = (long) cookie;
int read_child = 0;
+ int call_count = -1;
+ int need_unwind = 0;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
+ priv = this->private;
- read_child = afr_read_child (this, local->fd->inode);
+ read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
- LOCK (&frame->lock);
- {
+ LOCK (&frame->lock);
+ {
if (child_index == read_child) {
local->read_child_returned = _gf_true;
}
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.writev.buf = *buf;
- }
-
- if (child_index == read_child) {
- local->cont.writev.read_child_buf = *buf;
- }
-
- local->success_count++;
+ __inode_write_fop_cbk (frame, child_index, read_child, this,
+ &op_ret, &op_errno, preop, postop,
+ xdata);
- if ((local->success_count >= priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
- }
+ if ((local->success_count >= priv->wait_count)
+ && local->read_child_returned) {
+ need_unwind = 1;
+ }
+ }
+ UNLOCK (&frame->lock);
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ if (need_unwind)
+ local->transaction.unwind (frame, this);
- call_count = afr_frame_return (frame);
+ call_count = afr_frame_return (frame);
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ }
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return 0;
}
-int
-afr_writev_wind (call_frame_t *frame, xlator_t *this)
+int32_t
+afr_fsetattr_wind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int i = 0;
- int call_count = -1;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
+ priv = this->private;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_writev_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->writev,
- local->fd,
- local->cont.writev.vector,
- local->cont.writev.count,
- local->cont.writev.offset,
- local->cont.writev.iobref);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_fsetattr_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->fsetattr,
+ local->fd,
+ &local->cont.fsetattr.in_buf,
+ local->cont.fsetattr.valid,
+ NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
}
int
-afr_writev_done (call_frame_t *frame, xlator_t *this)
+afr_fsetattr_done (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
-
- local = frame->local;
+ afr_local_t *local = NULL;
- iobref_unref (local->cont.writev.iobref);
- local->cont.writev.iobref = NULL;
+ local = frame->local;
- local->transaction.unwind (frame, this);
+ local->transaction.unwind (frame, this);
- AFR_STACK_DESTROY (frame);
+ AFR_STACK_DESTROY (frame);
- return 0;
+ return 0;
}
-
int
-afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int32_t count, off_t offset,
- struct iobref *iobref)
+afr_fsetattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, struct iatt *buf, int32_t valid, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
- int ret = -1;
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
- int op_ret = -1;
- int op_errno = 0;
+ priv = this->private;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ if (afr_is_split_brain (this, fd->inode)) {
+ op_errno = EIO;
+ goto out;
+ }
- priv = this->private;
+ QUORUM_CHECK(fsetattr,out);
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- transaction_frame->local = local;
+ local->cont.fsetattr.in_buf = *buf;
+ local->cont.fsetattr.valid = valid;
- local->op = GF_FOP_WRITE;
- local->cont.writev.vector = iov_dup (vector, count);
- local->cont.writev.count = count;
- local->cont.writev.offset = offset;
- local->cont.writev.ino = fd->inode->ino;
- local->cont.writev.iobref = iobref_ref (iobref);
+ local->transaction.fop = afr_fsetattr_wind;
+ local->transaction.done = afr_fsetattr_done;
+ local->transaction.unwind = afr_fsetattr_unwind;
- local->transaction.fop = afr_writev_wind;
- local->transaction.done = afr_writev_done;
- local->transaction.unwind = afr_writev_unwind;
+ local->fd = fd_ref (fd);
- local->fd = fd_ref (fd);
+ afr_open_fd_fix (fd, this);
- local->transaction.main_frame = frame;
- if (fd->flags & O_APPEND) {
- local->transaction.start = 0;
- local->transaction.len = 0;
- } else {
- local->transaction.start = offset;
- local->transaction.len = iov_length (vector, count);
- }
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
- afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
- }
+ if (ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL, NULL);
+ }
- return 0;
+ return 0;
}
-/* }}} */
+/* {{{ setxattr */
-/* {{{ truncate */
int
-afr_truncate_unwind (call_frame_t *frame, xlator_t *this)
+afr_setxattr_unwind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- call_frame_t *main_frame = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t *main_frame = NULL;
- struct stat * unwind_buf = NULL;
+ local = frame->local;
- local = frame->local;
- priv = this->private;
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame)
+ main_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+ if (main_frame) {
+ AFR_STACK_UNWIND (setxattr, main_frame,
+ local->op_ret, local->op_errno,
+ NULL);
+ }
+ return 0;
+}
- if (main_frame) {
- if (local->cont.truncate.read_child_buf.st_ino) {
- unwind_buf = &local->cont.truncate.read_child_buf;
- } else {
- unwind_buf = &local->cont.truncate.buf;
+
+int
+afr_setxattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int need_unwind = 0;
+ int child_index = (long) cookie;
+
+ local = frame->local;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ __inode_write_fop_cbk (frame, child_index, -1, this,
+ &op_ret, &op_errno, NULL, NULL,
+ xdata);
+ if (local->success_count == priv->child_count) {
+ need_unwind = 1;
}
+ }
+ UNLOCK (&frame->lock);
- unwind_buf->st_ino = local->cont.truncate.ino;
+ if (need_unwind)
+ local->transaction.unwind (frame, this);
- AFR_STACK_UNWIND (main_frame, local->op_ret,
- local->op_errno,
- unwind_buf);
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
}
- return 0;
+ return 0;
}
int
-afr_truncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+afr_setxattr_wind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
- int child_index = (long) cookie;
- int read_child = 0;
- int call_count = -1;
- int need_unwind = 0;
+ local = frame->local;
+ priv = this->private;
- local = frame->local;
- priv = this->private;
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
- read_child = afr_read_child (this, local->loc.inode);
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
- LOCK (&frame->lock);
- {
- if (child_index == read_child) {
- local->read_child_returned = _gf_true;
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_setxattr_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->setxattr,
+ &local->loc,
+ local->cont.setxattr.dict,
+ local->cont.setxattr.flags,
+ NULL);
+
+ if (!--call_count)
+ break;
}
+ }
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
+ return 0;
+}
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.truncate.buf = *buf;
- }
- if (child_index == read_child) {
- local->cont.truncate.read_child_buf = *buf;
- }
+int
+afr_setxattr_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = frame->local;
- local->success_count++;
+ local->transaction.unwind (frame, this);
- if ((local->success_count >= priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
- }
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ AFR_STACK_DESTROY (frame);
- if (need_unwind)
- local->transaction.unwind (frame, this);
+ return 0;
+}
- call_count = afr_frame_return (frame);
+int
+afr_setxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = EINVAL;
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ VALIDATE_OR_GOTO (this, out);
+
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.afr.*", dict,
+ op_errno, out);
+
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.afr.*", dict,
+ op_errno, out);
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+
+ QUORUM_CHECK(setxattr,out);
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ local->cont.setxattr.dict = dict_ref (dict);
+ local->cont.setxattr.flags = flags;
+
+ local->transaction.fop = afr_setxattr_wind;
+ local->transaction.done = afr_setxattr_done;
+ local->transaction.unwind = afr_setxattr_unwind;
+
+ loc_copy (&local->loc, loc);
+
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
+
+ ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
+ }
+
+ return 0;
}
+/* {{{ fsetxattr */
-int32_t
-afr_truncate_wind (call_frame_t *frame, xlator_t *this)
+
+int
+afr_fsetxattr_unwind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int call_count = -1;
- int i = 0;
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame)
+ main_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+ if (main_frame) {
+ AFR_STACK_UNWIND (fsetxattr, main_frame,
+ local->op_ret, local->op_errno,
+ NULL);
+ }
+ return 0;
+}
- local->call_count = call_count;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_truncate_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->truncate,
- &local->loc,
- local->cont.truncate.offset);
+int
+afr_fsetxattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int need_unwind = 0;
+ int child_index = (long) cookie;
+
+ local = frame->local;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+
+ __inode_write_fop_cbk (frame, child_index, -1, this,
+ &op_ret, &op_errno, NULL, NULL,
+ xdata);
+ if (local->success_count == priv->child_count) {
+ need_unwind = 1;
+ }
+ }
+ UNLOCK (&frame->lock);
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ if (need_unwind)
+ local->transaction.unwind (frame, this);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ }
+
+ return 0;
}
int
-afr_truncate_done (call_frame_t *frame, xlator_t *this)
+afr_fsetxattr_wind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
- local = frame->local;
+ local = frame->local;
+ priv = this->private;
- local->transaction.unwind (frame, this);
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
- AFR_STACK_DESTROY (frame);
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_fsetxattr_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->fsetxattr,
+ local->fd,
+ local->cont.fsetxattr.dict,
+ local->cont.fsetxattr.flags,
+ NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
- return 0;
+ return 0;
}
int
-afr_truncate (call_frame_t *frame, xlator_t *this,
- loc_t *loc, off_t offset)
+afr_fsetxattr_done (call_frame_t *frame, xlator_t *this)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
+ afr_local_t *local = frame->local;
- int ret = -1;
+ local->transaction.unwind (frame, this);
- int op_ret = -1;
- int op_errno = 0;
+ AFR_STACK_DESTROY (frame);
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ return 0;
+}
- priv = this->private;
+int
+afr_fsetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = EINVAL;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.afr.*", dict,
+ op_errno, out);
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.afr.*", dict,
+ op_errno, out);
- transaction_frame->local = local;
+ priv = this->private;
- local->op_ret = -1;
+ if (afr_is_split_brain (this, fd->inode)) {
+ op_errno = EIO;
+ goto out;
+ }
- local->cont.truncate.offset = offset;
- local->cont.truncate.ino = loc->inode->ino;
+ QUORUM_CHECK(fsetxattr,out);
- local->transaction.fop = afr_truncate_wind;
- local->transaction.done = afr_truncate_done;
- local->transaction.unwind = afr_truncate_unwind;
+ AFR_LOCAL_ALLOC_OR_GOTO (local, out);
- loc_copy (&local->loc, loc);
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- local->transaction.main_frame = frame;
- local->transaction.start = 0;
- local->transaction.len = offset;
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ goto out;
+ }
+
+ transaction_frame->local = local;
+
+ local->op_ret = -1;
+
+ local->cont.fsetxattr.dict = dict_ref (dict);
+ local->cont.fsetxattr.flags = flags;
+
+ local->transaction.fop = afr_fsetxattr_wind;
+ local->transaction.done = afr_fsetxattr_done;
+ local->transaction.unwind = afr_fsetxattr_unwind;
+
+ local->fd = fd_ref (fd);
- afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
- op_ret = 0;
+ ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ ret = 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
- }
+ if (ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL);
+ }
- return 0;
+ return 0;
}
-
/* }}} */
-/* {{{ ftruncate */
+
+/* {{{ removexattr */
int
-afr_ftruncate_unwind (call_frame_t *frame, xlator_t *this)
+afr_removexattr_unwind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- call_frame_t *main_frame = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t *main_frame = NULL;
- struct stat * unwind_buf = NULL;
+ local = frame->local;
- local = frame->local;
- priv = this->private;
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame)
+ main_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+ if (main_frame) {
+ AFR_STACK_UNWIND (removexattr, main_frame,
+ local->op_ret, local->op_errno,
+ NULL);
+ }
+ return 0;
+}
- if (main_frame) {
- if (local->cont.ftruncate.read_child_buf.st_ino) {
- unwind_buf = &local->cont.ftruncate.read_child_buf;
- } else {
- unwind_buf = &local->cont.ftruncate.buf;
+
+int
+afr_removexattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int need_unwind = 0;
+ int child_index = (long) cookie;
+
+ local = frame->local;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ __inode_write_fop_cbk (frame, child_index, -1, this,
+ &op_ret, &op_errno, NULL, NULL,
+ xdata);
+ if (local->success_count == priv->wait_count) {
+ need_unwind = 1;
}
+ }
+ UNLOCK (&frame->lock);
- unwind_buf->st_ino = local->cont.ftruncate.ino;
+ if (need_unwind)
+ local->transaction.unwind (frame, this);
- AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno,
- unwind_buf);
- }
- return 0;
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ }
+
+ return 0;
}
-int
-afr_ftruncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+int32_t
+afr_removexattr_wind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
- int child_index = (long) cookie;
- int call_count = -1;
- int need_unwind = 0;
- int read_child = 0;
+ local = frame->local;
+ priv = this->private;
- local = frame->local;
- priv = this->private;
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
- read_child = afr_read_child (this, local->fd->inode);
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
- LOCK (&frame->lock);
- {
- if (child_index == read_child) {
- local->read_child_returned = _gf_true;
- }
+ local->call_count = call_count;
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_removexattr_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->removexattr,
+ &local->loc,
+ local->cont.removexattr.name,
+ NULL);
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.ftruncate.buf = *buf;
- }
+ if (!--call_count)
+ break;
+ }
+ }
- if (child_index == read_child) {
- local->cont.ftruncate.read_child_buf = *buf;
- }
+ return 0;
+}
- local->success_count++;
- if ((local->success_count >= priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
- }
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+int
+afr_removexattr_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = frame->local;
- if (need_unwind)
- local->transaction.unwind (frame, this);
+ local->transaction.unwind (frame, this);
- call_count = afr_frame_return (frame);
+ AFR_STACK_DESTROY (frame);
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return 0;
}
int
-afr_ftruncate_wind (call_frame_t *frame, xlator_t *this)
+afr_removexattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int call_count = -1;
- int i = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
- local = frame->local;
- priv = this->private;
+ VALIDATE_OR_GOTO (this, out);
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.afr.*",
+ name, op_errno, out);
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.afr.*",
+ name, op_errno, out);
- local->call_count = call_count;
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this->private, out);
+ VALIDATE_OR_GOTO (loc, out);
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_ftruncate_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->ftruncate,
- local->fd, local->cont.ftruncate.offset);
+ priv = this->private;
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-}
+ QUORUM_CHECK(removexattr,out);
+
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
+ local = transaction_frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ local->cont.removexattr.name = gf_strdup (name);
+
+ local->transaction.fop = afr_removexattr_wind;
+ local->transaction.done = afr_removexattr_done;
+ local->transaction.unwind = afr_removexattr_unwind;
+
+ loc_copy (&local->loc, loc);
+
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
+ ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL);
+ }
+
+ return 0;
+}
+/* ffremovexattr */
int
-afr_ftruncate_done (call_frame_t *frame, xlator_t *this)
+afr_fremovexattr_unwind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
-
- local = frame->local;
+ afr_local_t * local = NULL;
+ call_frame_t *main_frame = NULL;
- local->transaction.unwind (frame, this);
+ local = frame->local;
- AFR_STACK_DESTROY (frame);
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame)
+ main_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
- return 0;
+ if (main_frame) {
+ AFR_STACK_UNWIND (fremovexattr, main_frame,
+ local->op_ret, local->op_errno,
+ NULL);
+ }
+ return 0;
}
int
-afr_ftruncate (call_frame_t *frame, xlator_t *this,
- fd_t *fd, off_t offset)
+afr_fremovexattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ int call_count = -1;
+ int need_unwind = 0;
+ int child_index = (long) cookie;
+
+ local = frame->local;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ __inode_write_fop_cbk (frame, child_index, -1, this,
+ &op_ret, &op_errno, NULL, NULL,
+ xdata);
+
+ if (local->success_count == priv->wait_count) {
+ need_unwind = 1;
+ }
+ }
+ UNLOCK (&frame->lock);
- int ret = -1;
+ if (need_unwind)
+ local->transaction.unwind (frame, this);
- int op_ret = -1;
- int op_errno = 0;
+ call_count = afr_frame_return (frame);
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ }
- priv = this->private;
+ return 0;
+}
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
- ALLOC_OR_GOTO (local, afr_local_t, out);
+int32_t
+afr_fremovexattr_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ local = frame->local;
+ priv = this->private;
- transaction_frame->local = local;
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
- local->op = GF_FOP_FTRUNCATE;
- local->op_ret = -1;
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
- local->cont.ftruncate.offset = offset;
- local->cont.ftruncate.ino = fd->inode->ino;
+ local->call_count = call_count;
- local->transaction.fop = afr_ftruncate_wind;
- local->transaction.done = afr_ftruncate_done;
- local->transaction.unwind = afr_ftruncate_unwind;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_fremovexattr_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->fremovexattr,
+ local->fd,
+ local->cont.removexattr.name,
+ NULL);
- local->fd = fd_ref (fd);
+ if (!--call_count)
+ break;
+ }
+ }
- local->transaction.main_frame = frame;
- local->transaction.start = 0;
- local->transaction.len = offset;
+ return 0;
+}
- afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
- op_ret = 0;
-out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
- }
+int
+afr_fremovexattr_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = frame->local;
- return 0;
-}
+ local->transaction.unwind (frame, this);
-/* }}} */
+ AFR_STACK_DESTROY (frame);
-/* {{{ utimens */
+ return 0;
+}
int
-afr_utimens_unwind (call_frame_t *frame, xlator_t *this)
+afr_fremovexattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- call_frame_t *main_frame = NULL;
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_ret = -1;
+ int op_errno = 0;
- struct stat * unwind_buf = NULL;
+ VALIDATE_OR_GOTO (this, out);
- local = frame->local;
- priv = this->private;
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.afr.*",
+ name, op_errno, out);
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.afr.*",
+ name, op_errno, out);
- if (main_frame) {
- if (local->cont.utimens.read_child_buf.st_ino) {
- unwind_buf = &local->cont.utimens.read_child_buf;
- } else {
- unwind_buf = &local->cont.utimens.buf;
- }
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this->private, out);
- unwind_buf->st_ino = local->cont.utimens.ino;
+ priv = this->private;
+ if (afr_is_split_brain (this, fd->inode)) {
+ op_errno = EIO;
+ goto out;
+ }
- AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno,
- unwind_buf);
- }
- return 0;
-}
+ QUORUM_CHECK(fremovexattr, out);
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ goto out;
+ }
-int
-afr_utimens_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+ AFR_LOCAL_ALLOC_OR_GOTO (local, out);
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ transaction_frame->local = local;
+
+ local->op_ret = -1;
+
+ local->cont.removexattr.name = gf_strdup (name);
+
+ local->transaction.fop = afr_fremovexattr_wind;
+ local->transaction.done = afr_fremovexattr_done;
+ local->transaction.unwind = afr_fremovexattr_unwind;
+
+ local->fd = fd_ref (fd);
+
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
+
+ op_ret = afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto out;
+ }
+
+ op_ret = 0;
+out:
+ if (op_ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, NULL);
+ }
+
+ return 0;
+}
+
+static int
+afr_fallocate_unwind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t *main_frame = NULL;
+
+ local = frame->local;
- int child_index = (long) cookie;
- int call_count = -1;
- int need_unwind = 1;
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame)
+ main_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (fallocate, main_frame, local->op_ret,
+ local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ NULL);
+ }
+ return 0;
+}
+
+static int
+afr_fallocate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ int child_index = (long) cookie;
+ int call_count = -1;
+ int need_unwind = 0;
int read_child = 0;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
+ priv = this->private;
- read_child = afr_read_child (this, local->loc.inode);
+ read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
- LOCK (&frame->lock);
- {
+ LOCK (&frame->lock);
+ {
if (child_index == read_child) {
local->read_child_returned = _gf_true;
}
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
+ __inode_write_fop_cbk (frame, child_index, read_child, this,
+ &op_ret, &op_errno, prebuf, postbuf,
+ xdata);
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.utimens.buf = *buf;
- }
+ if ((local->success_count >= priv->wait_count)
+ && local->read_child_returned) {
+ need_unwind = 1;
+ }
+ }
+ UNLOCK (&frame->lock);
- if (child_index == read_child) {
- local->cont.utimens.read_child_buf = *buf;
- }
+ if (need_unwind)
+ local->transaction.unwind (frame, this);
- local->success_count++;
+ call_count = afr_frame_return (frame);
- if ((local->success_count >= priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
- }
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ }
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ return 0;
+}
- if (need_unwind)
- local->transaction.unwind (frame, this);
+static int
+afr_fallocate_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
- call_count = afr_frame_return (frame);
+ local = frame->local;
+ priv = this->private;
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
-
- return 0;
-}
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
-int
-afr_utimens_wind (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int call_count = -1;
- int i = 0;
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_fallocate_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->fallocate,
+ local->fd,
+ local->cont.fallocate.mode,
+ local->cont.fallocate.offset,
+ local->cont.fallocate.len,
+ NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
- local = frame->local;
- priv = this->private;
+ return 0;
+}
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+static int
+afr_fallocate_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+ local = frame->local;
- local->call_count = call_count;
+ local->transaction.unwind (frame, this);
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_utimens_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->utimens,
- &local->loc,
- local->cont.utimens.tv);
+ AFR_STACK_DESTROY (frame);
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ return 0;
}
-
-int
-afr_utimens_done (call_frame_t *frame, xlator_t *this)
+static int
+afr_do_fallocate (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
+ call_frame_t * transaction_frame = NULL;
+ afr_local_t * local = NULL;
+ int op_ret = -1;
+ int op_errno = 0;
- local = frame->local;
+ local = frame->local;
- local->transaction.unwind (frame, this);
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ goto out;
+ }
- AFR_STACK_DESTROY (frame);
+ transaction_frame->local = local;
+ frame->local = NULL;
- return 0;
-}
+ local->op = GF_FOP_FALLOCATE;
+ local->transaction.fop = afr_fallocate_wind;
+ local->transaction.done = afr_fallocate_done;
+ local->transaction.unwind = afr_fallocate_unwind;
-int
-afr_utimens (call_frame_t *frame, xlator_t *this,
- loc_t *loc, struct timespec tv[2])
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
+ local->transaction.main_frame = frame;
- int ret = -1;
+ local->transaction.start = local->cont.fallocate.offset;
+ local->transaction.len = 0;
- int op_ret = -1;
- int op_errno = 0;
+ /* fallocate can modify the file size */
+ op_ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto out;
+ }
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ op_ret = 0;
+out:
+ if (op_ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (fallocate, frame, op_ret, op_errno, NULL,
+ NULL, NULL);
+ }
- priv = this->private;
+ return 0;
+}
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
+int
+afr_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ priv = this->private;
- transaction_frame->local = local;
+ if (afr_is_split_brain (this, fd->inode)) {
+ op_errno = EIO;
+ goto out;
+ }
+ QUORUM_CHECK(fallocate,out);
- local->op_ret = -1;
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
- local->cont.utimens.tv[0] = tv[0];
- local->cont.utimens.tv[1] = tv[1];
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- local->cont.utimens.ino = loc->inode->ino;
+ local->cont.fallocate.mode = mode;
+ local->cont.fallocate.offset = offset;
+ local->cont.fallocate.len = len;
- local->transaction.fop = afr_utimens_wind;
- local->transaction.done = afr_utimens_done;
- local->transaction.unwind = afr_utimens_unwind;
+ local->fd = fd_ref (fd);
- loc_copy (&local->loc, loc);
-
- local->transaction.main_frame = frame;
- local->transaction.start = LLONG_MAX - 1;
- local->transaction.len = 0;
+ afr_open_fd_fix (fd, this);
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ afr_do_fallocate (frame, this);
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
- }
+ if (ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL);
+ }
- return 0;
+ return 0;
}
/* }}} */
-/* {{{ setxattr */
+/* {{{ discard */
-
-int
-afr_setxattr_unwind (call_frame_t *frame, xlator_t *this)
+static int
+afr_discard_unwind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- call_frame_t *main_frame = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t *main_frame = NULL;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame)
+ main_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
- if (main_frame) {
- AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno)
- }
- return 0;
+ if (main_frame) {
+ AFR_STACK_UNWIND (discard, main_frame, local->op_ret,
+ local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf,
+ NULL);
+ }
+ return 0;
}
-
-int
-afr_setxattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+static int
+afr_discard_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ int child_index = (long) cookie;
+ int call_count = -1;
+ int need_unwind = 0;
+ int read_child = 0;
- int call_count = -1;
- int need_unwind = 0;
+ local = frame->local;
+ priv = this->private;
- local = frame->local;
- priv = this->private;
+ read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
- LOCK (&frame->lock);
- {
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- }
- local->success_count++;
-
- if (local->success_count == priv->wait_count) {
- need_unwind = 1;
- }
- }
+ LOCK (&frame->lock);
+ {
+ if (child_index == read_child) {
+ local->read_child_returned = _gf_true;
+ }
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ __inode_write_fop_cbk (frame, child_index, read_child, this,
+ &op_ret, &op_errno, prebuf, postbuf,
+ xdata);
- if (need_unwind)
- local->transaction.unwind (frame, this);
+ if ((local->success_count >= priv->wait_count)
+ && local->read_child_returned) {
+ need_unwind = 1;
+ }
+ }
+ UNLOCK (&frame->lock);
- call_count = afr_frame_return (frame);
+ if (need_unwind)
+ local->transaction.unwind (frame, this);
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
-
- return 0;
-}
+ call_count = afr_frame_return (frame);
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ }
-int
-afr_setxattr_wind (call_frame_t *frame, xlator_t *this)
+ return 0;
+}
+
+static int
+afr_discard_wind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
- int call_count = -1;
- int i = 0;
+ local = frame->local;
+ priv = this->private;
- local = frame->local;
- priv = this->private;
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_discard_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->discard,
+ local->fd,
+ local->cont.discard.offset,
+ local->cont.discard.len,
+ NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
- local->call_count = call_count;
+ return 0;
+}
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_setxattr_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->setxattr,
- &local->loc,
- local->cont.setxattr.dict,
- local->cont.setxattr.flags);
+static int
+afr_discard_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-}
+ local = frame->local;
+ local->transaction.unwind (frame, this);
-int
-afr_setxattr_done (call_frame_t *frame, xlator_t *this)
+ AFR_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+static int
+afr_do_discard (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = frame->local;
+ call_frame_t * transaction_frame = NULL;
+ afr_local_t * local = NULL;
+ int op_ret = -1;
+ int op_errno = 0;
- local->transaction.unwind (frame, this);
+ local = frame->local;
- AFR_STACK_DESTROY (frame);
-
- return 0;
-}
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ goto out;
+ }
+ transaction_frame->local = local;
+ frame->local = NULL;
-int
-afr_setxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *dict, int32_t flags)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
+ local->op = GF_FOP_DISCARD;
- int ret = -1;
+ local->transaction.fop = afr_discard_wind;
+ local->transaction.done = afr_discard_done;
+ local->transaction.unwind = afr_discard_unwind;
- int op_ret = -1;
- int op_errno = 0;
+ local->transaction.main_frame = frame;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ local->transaction.start = local->cont.discard.offset;
+ local->transaction.len = 0;
- priv = this->private;
+ op_ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto out;
+ }
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
+ op_ret = 0;
+out:
+ if (op_ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (discard, frame, op_ret, op_errno, NULL,
+ NULL, NULL);
+ }
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ return 0;
+}
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+int
+afr_discard (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
- transaction_frame->local = local;
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
- local->op_ret = -1;
+ priv = this->private;
- local->cont.setxattr.dict = dict_ref (dict);
- local->cont.setxattr.flags = flags;
+ if (afr_is_split_brain (this, fd->inode)) {
+ op_errno = EIO;
+ goto out;
+ }
+ QUORUM_CHECK(discard, out);
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
- local->transaction.fop = afr_setxattr_wind;
- local->transaction.done = afr_setxattr_done;
- local->transaction.unwind = afr_setxattr_unwind;
+ local->cont.discard.offset = offset;
+ local->cont.discard.len = len;
- loc_copy (&local->loc, loc);
+ local->fd = fd_ref (fd);
- local->transaction.main_frame = frame;
- local->transaction.start = LLONG_MAX - 1;
- local->transaction.len = 0;
+ afr_open_fd_fix (fd, this);
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ afr_do_discard(frame, this);
- op_ret = 0;
+ ret = 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (frame, op_ret, op_errno);
- }
+ if (ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (discard, frame, -1, op_errno, NULL, NULL, NULL);
+ }
- return 0;
+ return 0;
}
-/* }}} */
-
-/* {{{ removexattr */
+/* {{{ zerofill */
-int
-afr_removexattr_unwind (call_frame_t *frame, xlator_t *this)
+static int
+afr_zerofill_unwind (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame) {
+ main_frame = local->transaction.main_frame;
+ }
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
- if (main_frame) {
- AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno)
- }
- return 0;
+ if (main_frame) {
+ AFR_STACK_UNWIND (zerofill, main_frame, local->op_ret,
+ local->op_errno,
+ &local->cont.zerofill.prebuf,
+ &local->cont.zerofill.postbuf,
+ NULL);
+ }
+ return 0;
}
-
-int
-afr_removexattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+static int
+afr_zerofill_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int child_index = (long) cookie;
+ int call_count = -1;
+ int need_unwind = 0;
+ int read_child = 0;
- int call_count = -1;
- int need_unwind = 0;
+ local = frame->local;
+ priv = this->private;
- local = frame->local;
- priv = this->private;
+ read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
- LOCK (&frame->lock);
- {
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- }
- local->success_count++;
-
- if (local->success_count == priv->wait_count) {
- need_unwind = 1;
- }
- }
+ LOCK (&frame->lock);
+ {
+ if (child_index == read_child) {
+ local->read_child_returned = _gf_true;
+ }
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ if (afr_fop_failed (op_ret, op_errno)) {
+ afr_transaction_fop_failed (frame, this, child_index);
+ }
- if (need_unwind)
- local->transaction.unwind (frame, this);
+ if (op_ret != -1) {
+ if (local->success_count == 0) {
+ local->op_ret = op_ret;
+ local->cont.zerofill.prebuf = *prebuf;
+ local->cont.zerofill.postbuf = *postbuf;
+ }
- call_count = afr_frame_return (frame);
+ if (child_index == read_child) {
+ local->cont.zerofill.prebuf = *prebuf;
+ local->cont.zerofill.postbuf = *postbuf;
+ }
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
-
- return 0;
-}
+ local->success_count++;
+ if ((local->success_count >= priv->wait_count)
+ && local->read_child_returned) {
+ need_unwind = 1;
+ }
+ }
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
-int32_t
-afr_removexattr_wind (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
+ if (need_unwind) {
+ local->transaction.unwind (frame, this);
+ }
+ call_count = afr_frame_return (frame);
- int call_count = -1;
- int i = 0;
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ }
- local = frame->local;
- priv = this->private;
+ return 0;
+}
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+static int
+afr_zerofill_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int i = 0;
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+ local = frame->local;
+ priv = this->private;
- local->call_count = call_count;
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_removexattr_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->removexattr,
- &local->loc,
- local->cont.removexattr.name);
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-}
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i]) {
+ STACK_WIND_COOKIE (frame, afr_zerofill_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->zerofill,
+ local->fd,
+ local->cont.zerofill.offset,
+ local->cont.zerofill.len,
+ NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+ return 0;
+}
-int
-afr_removexattr_done (call_frame_t *frame, xlator_t *this)
+static int
+afr_zerofill_done (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = frame->local;
+ afr_local_t *local = NULL;
- local->transaction.unwind (frame, this);
+ local = frame->local;
- AFR_STACK_DESTROY (frame);
-
- return 0;
-}
+ local->transaction.unwind (frame, this);
+ AFR_STACK_DESTROY (frame);
-int
-afr_removexattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name)
+ return 0;
+}
+
+static int
+afr_do_zerofill(call_frame_t *frame, xlator_t *this)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
+ call_frame_t *transaction_frame = NULL;
+ afr_local_t *local = NULL;
+ int op_ret = -1;
+ int op_errno = 0;
- int ret = -1;
+ local = frame->local;
- int op_ret = -1;
- int op_errno = 0;
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ goto out;
+ }
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (loc, out);
+ transaction_frame->local = local;
+ frame->local = NULL;
- priv = this->private;
+ local->op = GF_FOP_ZEROFILL;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
+ local->transaction.fop = afr_zerofill_wind;
+ local->transaction.done = afr_zerofill_done;
+ local->transaction.unwind = afr_zerofill_unwind;
- ALLOC_OR_GOTO (local, afr_local_t, out);
+ local->transaction.main_frame = frame;
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ local->transaction.start = local->cont.zerofill.offset;
+ local->transaction.len = 0;
- transaction_frame->local = local;
+ op_ret = afr_transaction (transaction_frame, this,
+ AFR_DATA_TRANSACTION);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto out;
+ }
- local->op_ret = -1;
+ op_ret = 0;
+out:
+ if (op_ret < 0) {
+ if (transaction_frame) {
+ AFR_STACK_DESTROY (transaction_frame);
+ }
+ AFR_STACK_UNWIND (zerofill, frame, op_ret, op_errno, NULL,
+ NULL, NULL);
+ }
- local->cont.removexattr.name = strdup (name);
+ return 0;
+}
- local->transaction.fop = afr_removexattr_wind;
- local->transaction.done = afr_removexattr_done;
- local->transaction.unwind = afr_removexattr_unwind;
+int
+afr_zerofill (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
- loc_copy (&local->loc, loc);
+ priv = this->private;
- local->transaction.main_frame = frame;
- local->transaction.start = LLONG_MAX - 1;
- local->transaction.len = 0;
+ if (afr_is_split_brain (this, fd->inode)) {
+ op_errno = EIO;
+ goto out;
+ }
+ QUORUM_CHECK(zerofill, out);
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0) {
+ goto out;
+ }
+ local->cont.zerofill.offset = offset;
+ local->cont.zerofill.len = len;
- op_ret = 0;
+ local->fd = fd_ref (fd);
+
+ afr_open_fd_fix (fd, this);
+
+ afr_do_zerofill(frame, this);
+
+ ret = 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (frame, op_ret, op_errno);
- }
+ if (ret < 0) {
+ if (transaction_frame) {
+ AFR_STACK_DESTROY (transaction_frame);
+ }
+ AFR_STACK_UNWIND (zerofill, frame, -1, op_errno, NULL,
+ NULL, NULL);
+ }
- return 0;
+ return 0;
}
+
+/* }}} */
+
+
diff --git a/xlators/cluster/afr/src/afr-inode-write.h b/xlators/cluster/afr/src/afr-inode-write.h
index 358d25b52..8e93ca44a 100644
--- a/xlators/cluster/afr/src/afr-inode-write.h
+++ b/xlators/cluster/afr/src/afr-inode-write.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __INODE_WRITE_H__
@@ -22,43 +13,70 @@
int32_t
afr_chmod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode);
+ loc_t *loc, mode_t mode, dict_t *xdata);
int32_t
afr_chown (call_frame_t *frame, xlator_t *this,
- loc_t *loc, uid_t uid, gid_t gid);
+ loc_t *loc, uid_t uid, gid_t gid, dict_t *xdata);
int
afr_fchown (call_frame_t *frame, xlator_t *this,
- fd_t *fd, uid_t uid, gid_t gid);
+ fd_t *fd, uid_t uid, gid_t gid, dict_t *xdata);
int32_t
afr_fchmod (call_frame_t *frame, xlator_t *this,
- fd_t *fd, mode_t mode);
+ fd_t *fd, mode_t mode, dict_t *xdata);
int32_t
-afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iovec *vector, int32_t count, off_t offset,
- struct iobref *iobref);
+ uint32_t flags, struct iobref *iobref, dict_t *xdata);
int32_t
afr_truncate (call_frame_t *frame, xlator_t *this,
- loc_t *loc, off_t offset);
+ loc_t *loc, off_t offset, dict_t *xdata);
int32_t
afr_ftruncate (call_frame_t *frame, xlator_t *this,
- fd_t *fd, off_t offset);
+ fd_t *fd, off_t offset, dict_t *xdata);
int32_t
afr_utimens (call_frame_t *frame, xlator_t *this,
- loc_t *loc, struct timespec tv[2]);
+ loc_t *loc, struct timespec tv[2], dict_t *xdata);
+
+int
+afr_setattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, struct iatt *buf, int32_t valid, dict_t *xdata);
+
+int
+afr_fsetattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, struct iatt *buf, int32_t valid, dict_t *xdata);
int32_t
afr_setxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *dict, int32_t flags);
+ loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata);
+
+int32_t
+afr_fsetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, dict_t *dict, int32_t flags, dict_t *xdata);
int32_t
afr_removexattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name);
+ loc_t *loc, const char *name, dict_t *xdata);
+int32_t
+afr_fremovexattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata);
+
+int
+afr_discard (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata);
+
+int
+afr_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata);
+
+int
+afr_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata);
#endif /* __INODE_WRITE_H__ */
diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c
new file mode 100644
index 000000000..060d78f35
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-lk-common.c
@@ -0,0 +1,2174 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "dict.h"
+#include "byte-order.h"
+#include "common-utils.h"
+
+#include "afr.h"
+#include "afr-transaction.h"
+
+#include <signal.h>
+
+
+#define LOCKED_NO 0x0 /* no lock held */
+#define LOCKED_YES 0x1 /* for DATA, METADATA, ENTRY and higher_path */
+#define LOCKED_LOWER 0x2 /* for lower path */
+
+#define AFR_TRACE_INODELK_IN(frame, this, params ...) \
+ do { \
+ afr_private_t *_priv = this->private; \
+ if (!_priv->inodelk_trace) \
+ break; \
+ afr_trace_inodelk_in (frame, this, params); \
+ } while (0);
+
+#define AFR_TRACE_INODELK_OUT(frame, this, params ...) \
+ do { \
+ afr_private_t *_priv = this->private; \
+ if (!_priv->inodelk_trace) \
+ break; \
+ afr_trace_inodelk_out (frame, this, params); \
+ } while (0);
+
+#define AFR_TRACE_ENTRYLK_IN(frame, this, params ...) \
+ do { \
+ afr_private_t *_priv = this->private; \
+ if (!_priv->entrylk_trace) \
+ break; \
+ afr_trace_entrylk_in (frame, this, params); \
+ } while (0);
+
+#define AFR_TRACE_ENTRYLK_OUT(frame, this, params ...) \
+ do { \
+ afr_private_t *_priv = this->private; \
+ if (!_priv->entrylk_trace) \
+ break; \
+ afr_trace_entrylk_out (frame, this, params); \
+ } while (0);
+
+int
+afr_entry_lockee_cmp (const void *l1, const void *l2)
+{
+ const afr_entry_lockee_t *r1 = l1;
+ const afr_entry_lockee_t *r2 = l2;
+ int ret = 0;
+ uuid_t gfid1 = {0};
+ uuid_t gfid2 = {0};
+
+ loc_gfid ((loc_t*)&r1->loc, gfid1);
+ loc_gfid ((loc_t*)&r2->loc, gfid2);
+ ret = uuid_compare (gfid1, gfid2);
+ /*Entrylks with NULL basename are the 'smallest'*/
+ if (ret == 0) {
+ if (!r1->basename)
+ return -1;
+ if (!r2->basename)
+ return 1;
+ ret = strcmp (r1->basename, r2->basename);
+ }
+
+ if (ret <= 0)
+ return -1;
+ else
+ return 1;
+}
+
+int afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index);
+
+static int
+afr_copy_locked_nodes (call_frame_t *frame, xlator_t *this);
+
+static uint64_t afr_lock_number = 1;
+
+static uint64_t
+get_afr_lock_number ()
+{
+ return (++afr_lock_number);
+}
+
+int
+afr_set_lock_number (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ int_lock->lock_number = get_afr_lock_number ();
+
+ return 0;
+}
+
+void
+afr_set_lk_owner (call_frame_t *frame, xlator_t *this, void *lk_owner)
+{
+ gf_log (this->name, GF_LOG_TRACE,
+ "Setting lk-owner=%llu",
+ (unsigned long long) (unsigned long)lk_owner);
+
+ set_lk_owner_from_ptr (&frame->root->lk_owner, lk_owner);
+}
+
+static int
+is_afr_lock_selfheal (afr_local_t *local)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ int ret = -1;
+
+ int_lock = &local->internal_lock;
+
+ switch (int_lock->selfheal_lk_type) {
+ case AFR_DATA_SELF_HEAL_LK:
+ case AFR_METADATA_SELF_HEAL_LK:
+ ret = 1;
+ break;
+ case AFR_ENTRY_SELF_HEAL_LK:
+ ret = 0;
+ break;
+ }
+
+ return ret;
+
+}
+
+int32_t
+internal_lock_count (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int32_t call_count = 0;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i])
+ ++call_count;
+ }
+
+ return call_count;
+}
+
+static void
+afr_print_inodelk (char *str, int size, int cmd,
+ struct gf_flock *flock, gf_lkowner_t *owner)
+{
+ char *cmd_str = NULL;
+ char *type_str = NULL;
+
+ switch (cmd) {
+#if F_GETLK != F_GETLK64
+ case F_GETLK64:
+#endif
+ case F_GETLK:
+ cmd_str = "GETLK";
+ break;
+
+#if F_SETLK != F_SETLK64
+ case F_SETLK64:
+#endif
+ case F_SETLK:
+ cmd_str = "SETLK";
+ break;
+
+#if F_SETLKW != F_SETLKW64
+ case F_SETLKW64:
+#endif
+ case F_SETLKW:
+ cmd_str = "SETLKW";
+ break;
+
+ default:
+ cmd_str = "<null>";
+ break;
+ }
+
+ switch (flock->l_type) {
+ case F_RDLCK:
+ type_str = "READ";
+ break;
+ case F_WRLCK:
+ type_str = "WRITE";
+ break;
+ case F_UNLCK:
+ type_str = "UNLOCK";
+ break;
+ default:
+ type_str = "UNKNOWN";
+ break;
+ }
+
+ snprintf (str, size, "lock=INODELK, cmd=%s, type=%s, "
+ "start=%llu, len=%llu, pid=%llu, lk-owner=%s",
+ cmd_str, type_str, (unsigned long long) flock->l_start,
+ (unsigned long long) flock->l_len,
+ (unsigned long long) flock->l_pid,
+ lkowner_utoa (owner));
+
+}
+
+static void
+afr_print_lockee (char *str, int size, loc_t *loc, fd_t *fd,
+ int child_index)
+{
+ snprintf (str, size, "path=%s, fd=%p, child=%d",
+ loc->path ? loc->path : "<nul>",
+ fd ? fd : NULL,
+ child_index);
+}
+
+void
+afr_print_entrylk (char *str, int size, const char *basename,
+ gf_lkowner_t *owner)
+{
+ snprintf (str, size, "Basename=%s, lk-owner=%s",
+ basename ? basename : "<nul>",
+ lkowner_utoa (owner));
+}
+
+static void
+afr_print_verdict (int op_ret, int op_errno, char *str)
+{
+ if (op_ret < 0) {
+ if (op_errno == EAGAIN)
+ strcpy (str, "EAGAIN");
+ else
+ strcpy (str, "FAILED");
+ }
+ else
+ strcpy (str, "GRANTED");
+}
+
+static void
+afr_set_lock_call_type (afr_lock_call_type_t lock_call_type,
+ char *lock_call_type_str,
+ afr_internal_lock_t *int_lock)
+{
+ switch (lock_call_type) {
+ case AFR_INODELK_TRANSACTION:
+ if (int_lock->transaction_lk_type == AFR_TRANSACTION_LK)
+ strcpy (lock_call_type_str, "AFR_INODELK_TRANSACTION");
+ else
+ strcpy (lock_call_type_str, "AFR_INODELK_SELFHEAL");
+ break;
+ case AFR_INODELK_NB_TRANSACTION:
+ if (int_lock->transaction_lk_type == AFR_TRANSACTION_LK)
+ strcpy (lock_call_type_str, "AFR_INODELK_NB_TRANSACTION");
+ else
+ strcpy (lock_call_type_str, "AFR_INODELK_NB_SELFHEAL");
+ break;
+ case AFR_ENTRYLK_TRANSACTION:
+ if (int_lock->transaction_lk_type == AFR_TRANSACTION_LK)
+ strcpy (lock_call_type_str, "AFR_ENTRYLK_TRANSACTION");
+ else
+ strcpy (lock_call_type_str, "AFR_ENTRYLK_SELFHEAL");
+ break;
+ case AFR_ENTRYLK_NB_TRANSACTION:
+ if (int_lock->transaction_lk_type == AFR_TRANSACTION_LK)
+ strcpy (lock_call_type_str, "AFR_ENTRYLK_NB_TRANSACTION");
+ else
+ strcpy (lock_call_type_str, "AFR_ENTRYLK_NB_SELFHEAL");
+ break;
+ default:
+ strcpy (lock_call_type_str, "UNKNOWN");
+ break;
+ }
+
+}
+
+static void
+afr_trace_inodelk_out (call_frame_t *frame, xlator_t *this,
+ afr_lock_call_type_t lock_call_type,
+ afr_lock_op_type_t lk_op_type, struct gf_flock *flock,
+ int op_ret, int op_errno, int32_t child_index)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+
+ char lockee[256];
+ char lock_call_type_str[256];
+ char verdict[16];
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ afr_print_lockee (lockee, 256, &local->loc, local->fd, child_index);
+
+ afr_set_lock_call_type (lock_call_type, lock_call_type_str, int_lock);
+
+ afr_print_verdict (op_ret, op_errno, verdict);
+
+ gf_log (this->name, GF_LOG_INFO,
+ "[%s %s] [%s] lk-owner=%s Lockee={%s} Number={%llu}",
+ lock_call_type_str,
+ lk_op_type == AFR_LOCK_OP ? "LOCK REPLY" : "UNLOCK REPLY",
+ verdict, lkowner_utoa (&frame->root->lk_owner), lockee,
+ (unsigned long long) int_lock->lock_number);
+
+}
+
+static void
+afr_trace_inodelk_in (call_frame_t *frame, xlator_t *this,
+ afr_lock_call_type_t lock_call_type,
+ afr_lock_op_type_t lk_op_type, struct gf_flock *flock,
+ int32_t cmd, int32_t child_index)
+{
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+
+ char lock[256];
+ char lockee[256];
+ char lock_call_type_str[256];
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ afr_print_inodelk (lock, 256, cmd, flock, &frame->root->lk_owner);
+ afr_print_lockee (lockee, 256, &local->loc, local->fd, child_index);
+
+ afr_set_lock_call_type (lock_call_type, lock_call_type_str, int_lock);
+
+ gf_log (this->name, GF_LOG_INFO,
+ "[%s %s] Lock={%s} Lockee={%s} Number={%llu}",
+ lock_call_type_str,
+ lk_op_type == AFR_LOCK_OP ? "LOCK REQUEST" : "UNLOCK REQUEST",
+ lock, lockee,
+ (unsigned long long) int_lock->lock_number);
+
+}
+
+static void
+afr_trace_entrylk_in (call_frame_t *frame, xlator_t *this,
+ afr_lock_call_type_t lock_call_type,
+ afr_lock_op_type_t lk_op_type, const char *basename,
+ int32_t cookie)
+{
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_private_t *priv = NULL;
+ int child_index = 0;
+ int lockee_no = 0;
+
+ char lock[256];
+ char lockee[256];
+ char lock_call_type_str[256];
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ priv = this->private;
+
+ if (!priv->entrylk_trace) {
+ return;
+ }
+ lockee_no = cookie / priv->child_count;
+ child_index = cookie % priv->child_count;
+
+ afr_print_entrylk (lock, 256, basename, &frame->root->lk_owner);
+ afr_print_lockee (lockee, 256, &int_lock->lockee[lockee_no].loc, local->fd,
+ child_index);
+
+ afr_set_lock_call_type (lock_call_type, lock_call_type_str, int_lock);
+
+ gf_log (this->name, GF_LOG_INFO,
+ "[%s %s] Lock={%s} Lockee={%s} Number={%llu}, Cookie={%d}",
+ lock_call_type_str,
+ lk_op_type == AFR_LOCK_OP ? "LOCK REQUEST" : "UNLOCK REQUEST",
+ lock, lockee,
+ (unsigned long long) int_lock->lock_number,
+ cookie);
+}
+
+static void
+afr_trace_entrylk_out (call_frame_t *frame, xlator_t *this,
+ afr_lock_call_type_t lock_call_type,
+ afr_lock_op_type_t lk_op_type, const char *basename,
+ int op_ret, int op_errno, int32_t cookie)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int lockee_no = 0;
+ int child_index = 0;
+
+ char lock[256];
+ char lockee[256];
+ char lock_call_type_str[256];
+ char verdict[16];
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ priv = this->private;
+
+ if (!priv->entrylk_trace) {
+ return;
+ }
+ lockee_no = cookie / priv->child_count;
+ child_index = cookie % priv->child_count;
+
+ afr_print_entrylk (lock, 256, basename, &frame->root->lk_owner);
+ afr_print_lockee (lockee, 256, &int_lock->lockee[lockee_no].loc, local->fd,
+ child_index);
+
+ afr_set_lock_call_type (lock_call_type, lock_call_type_str, int_lock);
+
+ afr_print_verdict (op_ret, op_errno, verdict);
+
+ gf_log (this->name, GF_LOG_INFO,
+ "[%s %s] [%s] Lock={%s} Lockee={%s} Number={%llu} Cookie={%d}",
+ lock_call_type_str,
+ lk_op_type == AFR_LOCK_OP ? "LOCK REPLY" : "UNLOCK REPLY",
+ verdict,
+ lock, lockee,
+ (unsigned long long) int_lock->lock_number,
+ cookie);
+
+}
+
+static int
+transaction_lk_op (afr_local_t *local)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ int ret = -1;
+
+ int_lock = &local->internal_lock;
+
+ if (int_lock->transaction_lk_type == AFR_TRANSACTION_LK) {
+ gf_log (THIS->name, GF_LOG_DEBUG,
+ "lk op is for a transaction");
+ ret = 1;
+ }
+ else if (int_lock->transaction_lk_type == AFR_SELFHEAL_LK) {
+ gf_log (THIS->name, GF_LOG_DEBUG,
+ "lk op is for a self heal");
+
+ ret = 0;
+ }
+
+ if (ret == -1)
+ gf_log (THIS->name, GF_LOG_DEBUG,
+ "lk op is not set");
+
+ return ret;
+
+}
+
+static int
+is_afr_lock_transaction (afr_local_t *local)
+{
+ int ret = 0;
+
+ switch (local->transaction.type) {
+ case AFR_DATA_TRANSACTION:
+ case AFR_METADATA_TRANSACTION:
+ ret = 1;
+ break;
+
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ case AFR_ENTRY_TRANSACTION:
+ ret = 0;
+ break;
+
+ }
+
+ return ret;
+}
+
+int
+afr_init_entry_lockee (afr_entry_lockee_t *lockee, afr_local_t *local,
+ loc_t *loc, char *basename, int child_count)
+{
+ int ret = -1;
+
+ loc_copy (&lockee->loc, loc);
+ lockee->basename = (basename)? gf_strdup (basename): NULL;
+ if (basename && !lockee->basename)
+ goto out;
+
+ lockee->locked_count = 0;
+ lockee->locked_nodes = GF_CALLOC (child_count,
+ sizeof (*lockee->locked_nodes),
+ gf_afr_mt_afr_node_character);
+
+ if (!lockee->locked_nodes)
+ goto out;
+
+ ret = 0;
+out:
+ return ret;
+
+}
+
+void
+afr_entry_lockee_cleanup (afr_internal_lock_t *int_lock)
+{
+ int i = 0;
+
+ for (i = 0; i < int_lock->lockee_count; i++) {
+ loc_wipe (&int_lock->lockee[i].loc);
+ if (int_lock->lockee[i].basename)
+ GF_FREE (int_lock->lockee[i].basename);
+ if (int_lock->lockee[i].locked_nodes)
+ GF_FREE (int_lock->lockee[i].locked_nodes);
+ }
+
+ return;
+}
+
+static int
+initialize_entrylk_variables (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_private_t *priv = NULL;
+
+ int i = 0;
+
+ priv = this->private;
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ int_lock->entrylk_lock_count = 0;
+ int_lock->lock_op_ret = -1;
+ int_lock->lock_op_errno = 0;
+
+ for (i = 0; i < AFR_LOCKEE_COUNT_MAX; i++) {
+ if (!int_lock->lockee[i].locked_nodes)
+ break;
+ int_lock->lockee[i].locked_count = 0;
+ memset (int_lock->lockee[i].locked_nodes, 0,
+ sizeof (*int_lock->lockee[i].locked_nodes) *
+ priv->child_count);
+ }
+
+ return 0;
+}
+
+static int
+initialize_inodelk_variables (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_private_t *priv = NULL;
+ afr_inodelk_t *inodelk = NULL;
+
+ priv = this->private;
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+
+ inodelk->lock_count = 0;
+ int_lock->lk_attempted_count = 0;
+ int_lock->lock_op_ret = -1;
+ int_lock->lock_op_errno = 0;
+
+ memset (inodelk->locked_nodes, 0,
+ sizeof (*inodelk->locked_nodes) * priv->child_count);
+ memset (int_lock->locked_nodes, 0,
+ sizeof (*int_lock->locked_nodes) * priv->child_count);
+
+ return 0;
+}
+
+loc_t *
+lower_path (loc_t *l1, const char *b1, loc_t *l2, const char *b2)
+{
+ int ret = 0;
+
+ ret = uuid_compare (l1->inode->gfid, l2->inode->gfid);
+
+ if (ret == 0)
+ ret = strcmp (b1, b2);
+
+ if (ret <= 0)
+ return l1;
+ else
+ return l2;
+}
+
+int
+afr_lockee_locked_nodes_count (afr_internal_lock_t *int_lock)
+{
+ int call_count = 0;
+ int i = 0;
+
+ for (i = 0; i < int_lock->lockee_count; i++)
+ call_count += int_lock->lockee[i].locked_count;
+
+ return call_count;
+}
+
+int
+afr_locked_nodes_count (unsigned char *locked_nodes, int child_count)
+
+{
+ int i = 0;
+ int call_count = 0;
+
+ for (i = 0; i < child_count; i++) {
+ if (locked_nodes[i] & LOCKED_YES)
+ call_count++;
+ }
+
+ return call_count;
+}
+
+/* FIXME: What if UNLOCK fails */
+static int32_t
+afr_unlock_common_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ int call_count = 0;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ LOCK (&frame->lock);
+ {
+ call_count = --int_lock->lk_call_count;
+ }
+ UNLOCK (&frame->lock);
+
+ if (call_count == 0) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "All internal locks unlocked");
+ int_lock->lock_cbk (frame, this);
+ }
+
+ return 0;
+}
+
+static int32_t
+afr_unlock_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
+ int32_t child_index = (long)cookie;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ AFR_TRACE_INODELK_OUT (frame, this, AFR_INODELK_TRANSACTION,
+ AFR_UNLOCK_OP, NULL, op_ret,
+ op_errno, child_index);
+
+ priv = this->private;
+
+ if (op_ret < 0 && op_errno != ENOTCONN && op_errno != EBADFD) {
+ gf_log (this->name, GF_LOG_INFO, "%s: unlock failed on subvolume %s "
+ "with lock owner %s", local->loc.path,
+ priv->children[child_index]->name,
+ lkowner_utoa (&frame->root->lk_owner));
+ }
+
+
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+ inodelk->locked_nodes[child_index] &= LOCKED_NO;
+ if (local->transaction.eager_lock)
+ local->transaction.eager_lock[child_index] = 0;
+
+ afr_unlock_common_cbk (frame, cookie, this, op_ret, op_errno, xdata);
+
+ return 0;
+
+}
+
+static int
+afr_unlock_inodelk (call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ struct gf_flock flock = {0,};
+ struct gf_flock full_flock = {0,};
+ struct gf_flock *flock_use = NULL;
+ int call_count = 0;
+ int i = 0;
+ int piggyback = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ priv = this->private;
+
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+
+ flock.l_start = inodelk->flock.l_start;
+ flock.l_len = inodelk->flock.l_len;
+ flock.l_type = F_UNLCK;
+
+ full_flock.l_type = F_UNLCK;
+ call_count = afr_locked_nodes_count (inodelk->locked_nodes,
+ priv->child_count);
+
+ int_lock->lk_call_count = call_count;
+
+ if (!call_count) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "No internal locks unlocked");
+ int_lock->lock_cbk (frame, this);
+ goto out;
+ }
+
+ if (local->fd)
+ fd_ctx = afr_fd_ctx_get (local->fd, this);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if ((inodelk->locked_nodes[i] & LOCKED_YES) != LOCKED_YES)
+ continue;
+
+ if (local->fd) {
+ flock_use = &flock;
+ if (!local->transaction.eager_lock[i]) {
+ goto wind;
+ }
+
+ piggyback = 0;
+
+ LOCK (&local->fd->lock);
+ {
+ if (fd_ctx->lock_piggyback[i]) {
+ fd_ctx->lock_piggyback[i]--;
+ piggyback = 1;
+ } else {
+ fd_ctx->lock_acquired[i]--;
+ }
+ }
+ UNLOCK (&local->fd->lock);
+
+ if (piggyback) {
+ afr_unlock_inodelk_cbk (frame, (void *) (long) i,
+ this, 1, 0, NULL);
+ if (!--call_count)
+ break;
+ continue;
+ }
+
+ flock_use = &full_flock;
+ wind:
+ AFR_TRACE_INODELK_IN (frame, this,
+ AFR_INODELK_TRANSACTION,
+ AFR_UNLOCK_OP, flock_use, F_SETLK,
+ i);
+
+ STACK_WIND_COOKIE (frame, afr_unlock_inodelk_cbk,
+ (void *) (long)i,
+ priv->children[i],
+ priv->children[i]->fops->finodelk,
+ int_lock->domain, local->fd,
+ F_SETLK, flock_use, NULL);
+
+ if (!--call_count)
+ break;
+
+ } else {
+ AFR_TRACE_INODELK_IN (frame, this,
+ AFR_INODELK_TRANSACTION,
+ AFR_UNLOCK_OP, &flock, F_SETLK, i);
+
+ STACK_WIND_COOKIE (frame, afr_unlock_inodelk_cbk,
+ (void *) (long)i,
+ priv->children[i],
+ priv->children[i]->fops->inodelk,
+ int_lock->domain, &local->loc,
+ F_SETLK, &flock, NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+out:
+ return 0;
+}
+
+static int32_t
+afr_unlock_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ int32_t child_index = 0;
+ int lockee_no = 0;
+
+ priv = this->private;
+ lockee_no = (int)((long) cookie) / priv->child_count;
+ child_index = (int) ((long) cookie) % priv->child_count;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ AFR_TRACE_ENTRYLK_OUT (frame, this, AFR_ENTRYLK_TRANSACTION,
+ AFR_UNLOCK_OP,
+ int_lock->lockee[lockee_no].basename, op_ret,
+ op_errno, (int) ((long)cookie));
+
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: unlock failed on %d, reason: %s",
+ local->loc.path, child_index, strerror (op_errno));
+ }
+
+ int_lock->lockee[lockee_no].locked_nodes[child_index] &= LOCKED_NO;
+ afr_unlock_common_cbk (frame, cookie, this, op_ret, op_errno, NULL);
+
+ return 0;
+}
+
+static int
+afr_unlock_entrylk (call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int index = 0;
+ int lockee_no = 0;
+ int copies = 0;
+ int i = -1;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ priv = this->private;
+ copies = priv->child_count;
+
+ call_count = afr_lockee_locked_nodes_count (int_lock);
+
+ int_lock->lk_call_count = call_count;
+
+ if (!call_count){
+ gf_log (this->name, GF_LOG_TRACE,
+ "No internal locks unlocked");
+ int_lock->lock_cbk (frame, this);
+ goto out;
+ }
+
+ for (i = 0; i < int_lock->lockee_count * priv->child_count; i++) {
+ lockee_no = i / copies;
+ index = i % copies;
+ if (int_lock->lockee[lockee_no].locked_nodes[index] & LOCKED_YES) {
+ AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_NB_TRANSACTION,
+ AFR_UNLOCK_OP,
+ int_lock->lockee[lockee_no].basename,
+ i);
+
+ STACK_WIND_COOKIE (frame, afr_unlock_entrylk_cbk,
+ (void *) (long) i,
+ priv->children[index],
+ priv->children[index]->fops->entrylk,
+ int_lock->domain,
+ &int_lock->lockee[lockee_no].loc,
+ int_lock->lockee[lockee_no].basename,
+ ENTRYLK_UNLOCK, ENTRYLK_WRLCK, NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+out:
+ return 0;
+
+}
+
+static int32_t
+afr_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int cky = (long) cookie;
+ int child_index = 0;
+ int lockee_no = 0;
+
+ priv = this->private;
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ child_index = ((int)cky) % priv->child_count;
+ lockee_no = ((int)cky) / priv->child_count;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ if (op_errno == ENOSYS) {
+ /* return ENOTSUP */
+ gf_log (this->name, GF_LOG_ERROR,
+ "subvolume does not support locking. "
+ "please load features/locks xlator on server");
+ local->op_ret = op_ret;
+ int_lock->lock_op_ret = op_ret;
+ }
+
+ local->op_errno = op_errno;
+ int_lock->lock_op_errno = op_errno;
+ }
+
+ int_lock->lk_attempted_count++;
+ }
+ UNLOCK (&frame->lock);
+
+ if ((op_ret == -1) &&
+ (op_errno == ENOSYS)) {
+ afr_unlock (frame, this);
+ } else {
+ if (op_ret == 0) {
+ if (local->transaction.type == AFR_ENTRY_TRANSACTION ||
+ local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) {
+ int_lock->lockee[lockee_no].locked_nodes[child_index] |= LOCKED_YES;
+ int_lock->lockee[lockee_no].locked_count++;
+ int_lock->entrylk_lock_count++;
+ } else {
+ int_lock->locked_nodes[child_index] |= LOCKED_YES;
+ int_lock->lock_count++;
+ }
+ }
+ afr_lock_blocking (frame, this, cky + 1);
+ }
+
+ return 0;
+}
+
+static int32_t
+afr_blocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ AFR_TRACE_INODELK_OUT (frame, this, AFR_INODELK_TRANSACTION,
+ AFR_LOCK_OP, NULL, op_ret,
+ op_errno, (long) cookie);
+
+ afr_lock_cbk (frame, cookie, this, op_ret, op_errno, xdata);
+ return 0;
+
+}
+
+static int32_t
+afr_blocking_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ AFR_TRACE_ENTRYLK_OUT (frame, this, AFR_ENTRYLK_TRANSACTION,
+ AFR_LOCK_OP, NULL, op_ret,
+ op_errno, (long)cookie);
+
+ afr_lock_cbk (frame, cookie, this, op_ret, op_errno, xdata);
+ return 0;
+}
+
+static int
+afr_copy_locked_nodes (call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ switch (local->transaction.type) {
+ case AFR_DATA_TRANSACTION:
+ case AFR_METADATA_TRANSACTION:
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+ memcpy (inodelk->locked_nodes, int_lock->locked_nodes,
+ sizeof (*inodelk->locked_nodes) * priv->child_count);
+ inodelk->lock_count = int_lock->lock_count;
+ break;
+
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ case AFR_ENTRY_TRANSACTION:
+ /*entrylk_count is being used in both non-blocking and blocking
+ * modes */
+ break;
+ }
+
+ return 0;
+
+}
+
+static inline gf_boolean_t
+afr_is_entrylk (afr_internal_lock_t *int_lock,
+ afr_transaction_type trans_type)
+{
+ gf_boolean_t is_entrylk = _gf_false;
+
+ if ((int_lock->transaction_lk_type == AFR_SELFHEAL_LK) &&
+ int_lock->selfheal_lk_type == AFR_ENTRY_SELF_HEAL_LK) {
+
+ is_entrylk = _gf_true;
+
+ } else if ((int_lock->transaction_lk_type == AFR_TRANSACTION_LK) &&
+ (trans_type == AFR_ENTRY_TRANSACTION ||
+ trans_type == AFR_ENTRY_RENAME_TRANSACTION)) {
+
+ is_entrylk = _gf_true;
+
+ } else {
+ is_entrylk = _gf_false;
+ }
+
+ return is_entrylk;
+}
+
+static gf_boolean_t
+_is_lock_wind_needed (afr_local_t *local, int child_index)
+{
+ if (!local->child_up[child_index])
+ return _gf_false;
+
+ return _gf_true;
+}
+
+int
+afr_lock_blocking (call_frame_t *frame, xlator_t *this, int cookie)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ struct gf_flock flock = {0,};
+ uint64_t ctx = 0;
+ int ret = 0;
+ int child_index = 0;
+ int lockee_no = 0;
+ gf_boolean_t is_entrylk = _gf_false;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ priv = this->private;
+ child_index = cookie % priv->child_count;
+ lockee_no = cookie / priv->child_count;
+ is_entrylk = afr_is_entrylk (int_lock, local->transaction.type);
+
+
+ if (!is_entrylk) {
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+ flock.l_start = inodelk->flock.l_start;
+ flock.l_len = inodelk->flock.l_len;
+ flock.l_type = inodelk->flock.l_type;
+ }
+
+ if (local->fd) {
+ ret = fd_ctx_get (local->fd, this, &ctx);
+
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "unable to get fd ctx for fd=%p",
+ local->fd);
+
+ local->op_ret = -1;
+ int_lock->lock_op_ret = -1;
+
+ afr_copy_locked_nodes (frame, this);
+
+ afr_unlock (frame, this);
+
+ return 0;
+ }
+ }
+
+ if (int_lock->lk_expected_count == int_lock->lk_attempted_count) {
+ if ((is_entrylk && int_lock->entrylk_lock_count == 0) ||
+ (!is_entrylk && int_lock->lock_count == 0)) {
+ gf_log (this->name, GF_LOG_INFO,
+ "unable to lock on even one child");
+
+ local->op_ret = -1;
+ int_lock->lock_op_ret = -1;
+
+ afr_copy_locked_nodes (frame, this);
+
+ afr_unlock(frame, this);
+
+ return 0;
+ }
+ }
+
+ if (int_lock->lk_expected_count == int_lock->lk_attempted_count) {
+ /* we're done locking */
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "we're done locking");
+
+ afr_copy_locked_nodes (frame, this);
+
+ int_lock->lock_op_ret = 0;
+ int_lock->lock_cbk (frame, this);
+ return 0;
+ }
+
+ if (!_is_lock_wind_needed (local, child_index)) {
+ afr_lock_blocking (frame, this, cookie + 1);
+ return 0;
+ }
+
+ switch (local->transaction.type) {
+ case AFR_DATA_TRANSACTION:
+ case AFR_METADATA_TRANSACTION:
+
+ if (local->fd) {
+ AFR_TRACE_INODELK_IN (frame, this,
+ AFR_INODELK_TRANSACTION,
+ AFR_LOCK_OP, &flock, F_SETLKW,
+ child_index);
+
+ STACK_WIND_COOKIE (frame, afr_blocking_inodelk_cbk,
+ (void *) (long) child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->finodelk,
+ int_lock->domain, local->fd,
+ F_SETLKW, &flock, NULL);
+
+ } else {
+ AFR_TRACE_INODELK_IN (frame, this,
+ AFR_INODELK_TRANSACTION,
+ AFR_LOCK_OP, &flock, F_SETLKW,
+ child_index);
+
+ STACK_WIND_COOKIE (frame, afr_blocking_inodelk_cbk,
+ (void *) (long) child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->inodelk,
+ int_lock->domain, &local->loc,
+ F_SETLKW, &flock, NULL);
+ }
+
+ break;
+
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ case AFR_ENTRY_TRANSACTION:
+ /*Accounting for child_index increments on 'down'
+ *and 'fd-less' children */
+
+ if (local->fd) {
+ AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_TRANSACTION,
+ AFR_LOCK_OP,
+ int_lock->lockee[lockee_no].basename,
+ cookie);
+
+ STACK_WIND_COOKIE (frame, afr_blocking_entrylk_cbk,
+ (void *) (long) cookie,
+ priv->children[child_index],
+ priv->children[child_index]->fops->fentrylk,
+ int_lock->domain, local->fd,
+ int_lock->lockee[lockee_no].basename,
+ ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
+ } else {
+ AFR_TRACE_ENTRYLK_IN (frame, this,
+ AFR_ENTRYLK_TRANSACTION,
+ AFR_LOCK_OP, local->transaction.basename,
+ child_index);
+
+ STACK_WIND_COOKIE (frame, afr_blocking_entrylk_cbk,
+ (void *) (long) cookie,
+ priv->children[child_index],
+ priv->children[child_index]->fops->entrylk,
+ int_lock->domain,
+ &int_lock->lockee[lockee_no].loc,
+ int_lock->lockee[lockee_no].basename,
+ ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
+ }
+
+ break;
+ }
+
+ return 0;
+}
+
+int32_t
+afr_blocking_lock (call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int up_count = 0;
+
+ priv = this->private;
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ switch (local->transaction.type) {
+ case AFR_DATA_TRANSACTION:
+ case AFR_METADATA_TRANSACTION:
+ initialize_inodelk_variables (frame, this);
+ break;
+
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ case AFR_ENTRY_TRANSACTION:
+ up_count = afr_up_children_count (local->child_up,
+ priv->child_count);
+ int_lock->lk_call_count = int_lock->lk_expected_count
+ = (int_lock->lockee_count *
+ up_count);
+ initialize_entrylk_variables (frame, this);
+ break;
+ }
+
+ afr_lock_blocking (frame, this, 0);
+
+ return 0;
+}
+
+static int32_t
+afr_nonblocking_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ int call_count = 0;
+ int child_index = (long) cookie;
+ int copies = 0;
+ int index = 0;
+ int lockee_no = 0;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ copies = priv->child_count;
+ index = child_index % copies;
+ lockee_no = child_index / copies;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ AFR_TRACE_ENTRYLK_OUT (frame, this, AFR_ENTRYLK_TRANSACTION,
+ AFR_LOCK_OP,
+ int_lock->lockee[lockee_no].basename, op_ret,
+ op_errno, (long) cookie);
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret < 0 ) {
+ if (op_errno == ENOSYS) {
+ /* return ENOTSUP */
+ gf_log (this->name, GF_LOG_ERROR,
+ "subvolume does not support locking. "
+ "please load features/locks xlator on server");
+ local->op_ret = op_ret;
+ int_lock->lock_op_ret = op_ret;
+
+ int_lock->lock_op_errno = op_errno;
+ local->op_errno = op_errno;
+ }
+ } else if (op_ret == 0) {
+ int_lock->lockee[lockee_no].locked_nodes[index] |= \
+ LOCKED_YES;
+ int_lock->lockee[lockee_no].locked_count++;
+ int_lock->entrylk_lock_count++;
+ }
+
+ call_count = --int_lock->lk_call_count;
+ }
+ UNLOCK (&frame->lock);
+
+ if (call_count == 0) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "Last locking reply received");
+ /* all locks successful. Proceed to call FOP */
+ if (int_lock->entrylk_lock_count ==
+ int_lock->lk_expected_count) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "All servers locked. Calling the cbk");
+ int_lock->lock_op_ret = 0;
+ int_lock->lock_cbk (frame, this);
+ }
+ /* Not all locks were successful. Unlock and try locking
+ again, this time with serially blocking locks */
+ else {
+ gf_log (this->name, GF_LOG_TRACE,
+ "%d servers locked. Trying again with blocking calls",
+ int_lock->lock_count);
+
+ afr_unlock(frame, this);
+ }
+ }
+
+ return 0;
+}
+
+int
+afr_nonblocking_entrylk (call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int copies = 0;
+ int index = 0;
+ int lockee_no = 0;
+ int32_t call_count = 0;
+ int i = 0;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ priv = this->private;
+
+ copies = priv->child_count;
+ initialize_entrylk_variables (frame, this);
+
+ if (local->fd) {
+ fd_ctx = afr_fd_ctx_get (local->fd, this);
+ if (!fd_ctx) {
+ gf_log (this->name, GF_LOG_INFO,
+ "unable to get fd ctx for fd=%p",
+ local->fd);
+
+ local->op_ret = -1;
+ int_lock->lock_op_ret = -1;
+ local->op_errno = EINVAL;
+ int_lock->lock_op_errno = EINVAL;
+
+ afr_unlock (frame, this);
+ return -1;
+ }
+
+ call_count = int_lock->lockee_count * internal_lock_count (frame, this);
+ int_lock->lk_call_count = call_count;
+ int_lock->lk_expected_count = call_count;
+
+ if (!call_count) {
+ gf_log (this->name, GF_LOG_INFO,
+ "fd not open on any subvolumes. aborting.");
+ afr_unlock (frame, this);
+ goto out;
+ }
+
+ /* Send non-blocking entrylk calls only on up children
+ and where the fd has been opened */
+ for (i = 0; i < int_lock->lockee_count*priv->child_count; i++) {
+ index = i%copies;
+ lockee_no = i/copies;
+ if (local->child_up[index]) {
+ AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_NB_TRANSACTION,
+ AFR_LOCK_OP,
+ int_lock->lockee[lockee_no].basename,
+ i);
+
+ STACK_WIND_COOKIE (frame, afr_nonblocking_entrylk_cbk,
+ (void *) (long) i,
+ priv->children[index],
+ priv->children[index]->fops->fentrylk,
+ this->name, local->fd,
+ int_lock->lockee[lockee_no].basename,
+ ENTRYLK_LOCK_NB, ENTRYLK_WRLCK,
+ NULL);
+ if (!--call_count)
+ break;
+ }
+ }
+ } else {
+ call_count = int_lock->lockee_count * internal_lock_count (frame, this);
+ int_lock->lk_call_count = call_count;
+ int_lock->lk_expected_count = call_count;
+
+ for (i = 0; i < int_lock->lockee_count*priv->child_count; i++) {
+ index = i%copies;
+ lockee_no = i/copies;
+ if (local->child_up[index]) {
+ AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_NB_TRANSACTION,
+ AFR_LOCK_OP,
+ int_lock->lockee[lockee_no].basename,
+ i);
+
+ STACK_WIND_COOKIE (frame, afr_nonblocking_entrylk_cbk,
+ (void *) (long) i,
+ priv->children[index],
+ priv->children[index]->fops->entrylk,
+ this->name, &int_lock->lockee[lockee_no].loc,
+ int_lock->lockee[lockee_no].basename,
+ ENTRYLK_LOCK_NB, ENTRYLK_WRLCK,
+ NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+ }
+out:
+ return 0;
+}
+
+int32_t
+afr_nonblocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
+ afr_local_t *local = NULL;
+ int call_count = 0;
+ int child_index = (long) cookie;
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+
+ AFR_TRACE_INODELK_OUT (frame, this, AFR_INODELK_NB_TRANSACTION,
+ AFR_LOCK_OP, NULL, op_ret,
+ op_errno, (long) cookie);
+
+ if (local->fd)
+ fd_ctx = afr_fd_ctx_get (local->fd, this);
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret < 0) {
+ if (op_errno == ENOSYS) {
+ /* return ENOTSUP */
+ gf_log (this->name, GF_LOG_ERROR,
+ "subvolume does not support locking. "
+ "please load features/locks xlator on "
+ "server");
+ local->op_ret = op_ret;
+ int_lock->lock_op_ret = op_ret;
+ int_lock->lock_op_errno = op_errno;
+ local->op_errno = op_errno;
+ }
+ if (local->transaction.eager_lock)
+ local->transaction.eager_lock[child_index] = 0;
+ } else {
+ inodelk->locked_nodes[child_index] |= LOCKED_YES;
+ inodelk->lock_count++;
+
+ if (local->transaction.eager_lock &&
+ local->transaction.eager_lock[child_index] &&
+ local->fd) {
+ /* piggybacked */
+ if (op_ret == 1) {
+ /* piggybacked */
+ } else if (op_ret == 0) {
+ /* lock acquired from server */
+ fd_ctx->lock_acquired[child_index]++;
+ }
+ }
+ }
+
+ call_count = --int_lock->lk_call_count;
+ }
+ UNLOCK (&frame->lock);
+
+ if (call_count == 0) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "Last inode locking reply received");
+ /* all locks successful. Proceed to call FOP */
+ if (inodelk->lock_count == int_lock->lk_expected_count) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "All servers locked. Calling the cbk");
+ int_lock->lock_op_ret = 0;
+ int_lock->lock_cbk (frame, this);
+ }
+ /* Not all locks were successful. Unlock and try locking
+ again, this time with serially blocking locks */
+ else {
+ gf_log (this->name, GF_LOG_TRACE,
+ "%d servers locked. Trying again with blocking calls",
+ int_lock->lock_count);
+
+ afr_unlock(frame, this);
+ }
+ }
+
+ return 0;
+}
+
+int
+afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int32_t call_count = 0;
+ int i = 0;
+ int ret = 0;
+ struct gf_flock flock = {0,};
+ struct gf_flock full_flock = {0,};
+ struct gf_flock *flock_use = NULL;
+ int piggyback = 0;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ priv = this->private;
+
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+
+ flock.l_start = inodelk->flock.l_start;
+ flock.l_len = inodelk->flock.l_len;
+ flock.l_type = inodelk->flock.l_type;
+
+ full_flock.l_type = inodelk->flock.l_type;
+
+ initialize_inodelk_variables (frame, this);
+
+ if (local->fd) {
+ fd_ctx = afr_fd_ctx_get (local->fd, this);
+ if (!fd_ctx) {
+ gf_log (this->name, GF_LOG_INFO,
+ "unable to get fd ctx for fd=%p",
+ local->fd);
+
+ local->op_ret = -1;
+ int_lock->lock_op_ret = -1;
+ local->op_errno = EINVAL;
+ int_lock->lock_op_errno = EINVAL;
+
+ afr_unlock (frame, this);
+ ret = -1;
+ goto out;
+ }
+
+ call_count = internal_lock_count (frame, this);
+ int_lock->lk_call_count = call_count;
+ int_lock->lk_expected_count = call_count;
+
+ if (!call_count) {
+ gf_log (this->name, GF_LOG_INFO,
+ "fd not open on any subvolumes. aborting.");
+ afr_unlock (frame, this);
+ goto out;
+ }
+
+ /* Send non-blocking inodelk calls only on up children
+ and where the fd has been opened */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->child_up[i])
+ continue;
+
+ flock_use = &flock;
+ if (!local->transaction.eager_lock_on) {
+ goto wind;
+ }
+
+ piggyback = 0;
+ local->transaction.eager_lock[i] = 1;
+
+ afr_set_delayed_post_op (frame, this);
+
+ LOCK (&local->fd->lock);
+ {
+ if (fd_ctx->lock_acquired[i]) {
+ fd_ctx->lock_piggyback[i]++;
+ piggyback = 1;
+ }
+ }
+ UNLOCK (&local->fd->lock);
+
+ if (piggyback) {
+ /* (op_ret == 1) => indicate piggybacked lock */
+ afr_nonblocking_inodelk_cbk (frame, (void *) (long) i,
+ this, 1, 0, NULL);
+ if (!--call_count)
+ break;
+ continue;
+ }
+ flock_use = &full_flock;
+ wind:
+ AFR_TRACE_INODELK_IN (frame, this,
+ AFR_INODELK_NB_TRANSACTION,
+ AFR_LOCK_OP, flock_use, F_SETLK, i);
+
+ STACK_WIND_COOKIE (frame, afr_nonblocking_inodelk_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->finodelk,
+ int_lock->domain, local->fd,
+ F_SETLK, flock_use, NULL);
+
+ if (!--call_count)
+ break;
+ }
+ } else {
+ call_count = internal_lock_count (frame, this);
+ int_lock->lk_call_count = call_count;
+ int_lock->lk_expected_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->child_up[i])
+ continue;
+ AFR_TRACE_INODELK_IN (frame, this,
+ AFR_INODELK_NB_TRANSACTION,
+ AFR_LOCK_OP, &flock, F_SETLK, i);
+
+ STACK_WIND_COOKIE (frame, afr_nonblocking_inodelk_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->inodelk,
+ int_lock->domain, &local->loc,
+ F_SETLK, &flock, NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+out:
+ return ret;
+}
+
+int32_t
+afr_unlock (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (transaction_lk_op (local)) {
+ if (is_afr_lock_transaction (local))
+ afr_unlock_inodelk (frame, this);
+ else
+ afr_unlock_entrylk (frame, this);
+
+ } else {
+ if (is_afr_lock_selfheal (local))
+ afr_unlock_inodelk (frame, this);
+ else
+ afr_unlock_entrylk (frame, this);
+ }
+
+ return 0;
+}
+
+int
+afr_mark_locked_nodes (xlator_t *this, fd_t *fd,
+ unsigned char *locked_nodes)
+{
+ afr_private_t *priv = NULL;
+ afr_fd_ctx_t *fdctx = NULL;
+ uint64_t tmp = 0;
+ int ret = 0;
+
+ priv = this->private;
+
+ ret = afr_fd_ctx_set (this, fd);
+ if (ret)
+ goto out;
+
+ ret = fd_ctx_get (fd, this, &tmp);
+ if (ret) {
+ gf_log (this->name, GF_LOG_INFO,
+ "failed to get the fd ctx");
+ goto out;
+ }
+ fdctx = (afr_fd_ctx_t *) (long) tmp;
+
+ GF_ASSERT (fdctx->locked_on);
+
+ memcpy (fdctx->locked_on, locked_nodes,
+ priv->child_count);
+
+out:
+ return ret;
+}
+
+static int
+__is_fd_saved (xlator_t *this, fd_t *fd)
+{
+ afr_locked_fd_t *locked_fd = NULL;
+ afr_private_t *priv = NULL;
+ int found = 0;
+
+ priv = this->private;
+
+ list_for_each_entry (locked_fd, &priv->saved_fds, list) {
+ if (locked_fd->fd == fd) {
+ found = 1;
+ break;
+ }
+ }
+
+ return found;
+}
+
+static int
+__afr_save_locked_fd (xlator_t *this, fd_t *fd)
+{
+ afr_private_t *priv = NULL;
+ afr_locked_fd_t *locked_fd = NULL;
+ int ret = 0;
+
+ priv = this->private;
+
+ locked_fd = GF_CALLOC (1, sizeof (*locked_fd),
+ gf_afr_mt_locked_fd);
+ if (!locked_fd) {
+ ret = -1;
+ goto out;
+ }
+
+ locked_fd->fd = fd;
+ INIT_LIST_HEAD (&locked_fd->list);
+
+ list_add_tail (&locked_fd->list, &priv->saved_fds);
+
+out:
+ return ret;
+}
+
+int
+afr_save_locked_fd (xlator_t *this, fd_t *fd)
+{
+ afr_private_t *priv = NULL;
+ int ret = 0;
+
+ priv = this->private;
+
+ pthread_mutex_lock (&priv->mutex);
+ {
+ if (__is_fd_saved (this, fd)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "fd=%p already saved", fd);
+ goto unlock;
+ }
+
+ ret = __afr_save_locked_fd (this, fd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_INFO,
+ "fd=%p could not be saved", fd);
+ goto unlock;
+ }
+ }
+unlock:
+ pthread_mutex_unlock (&priv->mutex);
+
+ return ret;
+}
+
+static int
+afr_lock_recovery_cleanup (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_locked_fd_t *locked_fd = NULL;
+
+ local = frame->local;
+
+ locked_fd = local->locked_fd;
+
+ STACK_DESTROY (frame->root);
+ afr_local_cleanup (local, this);
+
+ afr_save_locked_fd (this, locked_fd->fd);
+
+ return 0;
+
+}
+
+static int
+afr_get_source_lock_recovery (xlator_t *this, fd_t *fd)
+{
+ afr_fd_ctx_t *fdctx = NULL;
+ afr_private_t *priv = NULL;
+ uint64_t tmp = 0;
+ int i = 0;
+ int source_child = -1;
+ int ret = 0;
+
+ priv = this->private;
+
+ ret = fd_ctx_get (fd, this, &tmp);
+ if (ret)
+ goto out;
+
+ fdctx = (afr_fd_ctx_t *) (long) tmp;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (fdctx->locked_on[i]) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Found lock recovery source=%d", i);
+ source_child = i;
+ break;
+ }
+ }
+
+out:
+ return source_child;
+
+}
+
+int32_t
+afr_get_locks_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata);
+int32_t
+afr_recover_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int32_t source_child = 0;
+ struct gf_flock flock = {0,};
+
+ local = frame->local;
+ priv = this->private;
+
+ if (op_ret) {
+ gf_log (this->name, GF_LOG_INFO,
+ "lock recovery failed");
+ goto cleanup;
+ }
+
+ source_child = local->source_child;
+
+ memcpy (&flock, lock, sizeof (*lock));
+
+ STACK_WIND_COOKIE (frame, afr_get_locks_fd_cbk,
+ (void *) (long) source_child,
+ priv->children[source_child],
+ priv->children[source_child]->fops->lk,
+ local->fd, F_GETLK_FD, &flock, NULL);
+
+ return 0;
+
+cleanup:
+ afr_lock_recovery_cleanup (frame, this);
+ return 0;
+}
+
+int
+afr_recover_lock (call_frame_t *frame, xlator_t *this,
+ struct gf_flock *flock)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int32_t lock_recovery_child = 0;
+
+ priv = this->private;
+ local = frame->local;
+
+ lock_recovery_child = local->lock_recovery_child;
+
+ frame->root->lk_owner = flock->l_owner;
+
+ STACK_WIND_COOKIE (frame, afr_recover_lock_cbk,
+ (void *) (long) lock_recovery_child,
+ priv->children[lock_recovery_child],
+ priv->children[lock_recovery_child]->fops->lk,
+ local->fd, F_SETLK, flock, NULL);
+
+ return 0;
+}
+
+static int
+is_afr_lock_eol (struct gf_flock *lock)
+{
+ int ret = 0;
+
+ if ((lock->l_type == GF_LK_EOL))
+ ret = 1;
+
+ return ret;
+}
+
+int32_t
+afr_get_locks_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
+{
+ if (op_ret) {
+ gf_log (this->name, GF_LOG_INFO,
+ "Failed to get locks on fd");
+ goto cleanup;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Got a lock on fd");
+
+ if (is_afr_lock_eol (lock)) {
+ gf_log (this->name, GF_LOG_INFO,
+ "Reached EOL on locks on fd");
+ goto cleanup;
+ }
+
+ afr_recover_lock (frame, this, lock);
+
+ return 0;
+
+cleanup:
+ afr_lock_recovery_cleanup (frame, this);
+
+ return 0;
+}
+
+static int
+afr_lock_recovery (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ fd_t *fd = NULL;
+ int ret = 0;
+ int32_t source_child = 0;
+ struct gf_flock flock = {0,};
+
+ priv = this->private;
+ local = frame->local;
+
+ fd = local->fd;
+
+ source_child = afr_get_source_lock_recovery (this, fd);
+ if (source_child < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not recover locks due to lock "
+ "split brain");
+ ret = -1;
+ goto out;
+ }
+
+ local->source_child = source_child;
+
+ /* the flock can be zero filled as we're querying incrementally
+ the locks held on the fd.
+ */
+ STACK_WIND_COOKIE (frame, afr_get_locks_fd_cbk,
+ (void *) (long) source_child,
+ priv->children[source_child],
+ priv->children[source_child]->fops->lk,
+ local->fd, F_GETLK_FD, &flock, NULL);
+
+out:
+ return ret;
+}
+
+
+static int
+afr_mark_fd_opened (xlator_t *this, fd_t *fd, int32_t child_index)
+{
+ afr_fd_ctx_t *fdctx = NULL;
+ uint64_t tmp = 0;
+ int ret = 0;
+
+ ret = fd_ctx_get (fd, this, &tmp);
+ if (ret)
+ goto out;
+
+ fdctx = (afr_fd_ctx_t *) (long) tmp;
+
+ fdctx->opened_on[child_index] = AFR_FD_OPENED;
+
+out:
+ return ret;
+}
+
+int32_t
+afr_lock_recovery_preopen_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ dict_t *xdata)
+{
+ int32_t child_index = (long )cookie;
+ int ret = 0;
+
+ if (op_ret) {
+ gf_log (this->name, GF_LOG_INFO,
+ "Reopen during lock-recovery failed");
+ goto cleanup;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Open succeeded => proceed to recover locks");
+
+ ret = afr_lock_recovery (frame, this);
+ if (ret) {
+ gf_log (this->name, GF_LOG_INFO,
+ "Lock recovery failed");
+ goto cleanup;
+ }
+
+ ret = afr_mark_fd_opened (this, fd, child_index);
+ if (ret) {
+ gf_log (this->name, GF_LOG_INFO,
+ "Marking fd open failed");
+ goto cleanup;
+ }
+
+ return 0;
+
+cleanup:
+ afr_lock_recovery_cleanup (frame, this);
+ return 0;
+}
+
+static int
+afr_lock_recovery_preopen (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ uint64_t tmp = 0;
+ afr_fd_ctx_t *fdctx = NULL;
+ loc_t loc = {0,};
+ int32_t child_index = 0;
+ int ret = 0;
+
+ priv = this->private;
+ local = frame->local;
+
+ GF_ASSERT (local && local->fd);
+
+ ret = fd_ctx_get (local->fd, this, &tmp);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to get the context of fd",
+ uuid_utoa (local->fd->inode->gfid));
+ fdctx = (afr_fd_ctx_t *) (long) tmp;
+ /* TODO: instead we should return from the function */
+ GF_ASSERT (fdctx);
+
+ child_index = local->lock_recovery_child;
+
+ inode_path (local->fd->inode, NULL, (char **)&loc.path);
+ loc.name = strrchr (loc.path, '/');
+ loc.inode = inode_ref (local->fd->inode);
+ loc.parent = inode_parent (local->fd->inode, 0, NULL);
+
+
+ STACK_WIND_COOKIE (frame, afr_lock_recovery_preopen_cbk,
+ (void *)(long) child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->open,
+ &loc, fdctx->flags, local->fd, NULL);
+
+ return 0;
+}
+
+static int
+is_fd_opened (fd_t *fd, int32_t child_index)
+{
+ afr_fd_ctx_t *fdctx = NULL;
+ uint64_t tmp = 0;
+ int ret = 0;
+
+ ret = fd_ctx_get (fd, THIS, &tmp);
+ if (ret)
+ goto out;
+
+ fdctx = (afr_fd_ctx_t *) (long) tmp;
+
+ if (fdctx->opened_on[child_index] == AFR_FD_OPENED)
+ ret = 1;
+
+out:
+ return ret;
+}
+
+int
+afr_attempt_lock_recovery (xlator_t *this, int32_t child_index)
+{
+ call_frame_t *frame = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_locked_fd_t *locked_fd = NULL;
+ afr_locked_fd_t *tmp = NULL;
+ int ret = -1;
+ struct list_head locks_list = {0,};
+ int32_t op_errno = 0;
+
+
+ priv = this->private;
+
+ if (list_empty (&priv->saved_fds))
+ goto out;
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0) {
+ ret = -1;
+ goto out;
+ }
+
+ frame->local = local;
+
+ INIT_LIST_HEAD (&locks_list);
+
+ pthread_mutex_lock (&priv->mutex);
+ {
+ list_splice_init (&priv->saved_fds, &locks_list);
+ }
+ pthread_mutex_unlock (&priv->mutex);
+
+ list_for_each_entry_safe (locked_fd, tmp,
+ &locks_list, list) {
+
+ list_del_init (&locked_fd->list);
+
+ local->fd = fd_ref (locked_fd->fd);
+ local->lock_recovery_child = child_index;
+ local->locked_fd = locked_fd;
+
+ if (!is_fd_opened (locked_fd->fd, child_index)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "attempting open before lock "
+ "recovery");
+ afr_lock_recovery_preopen (frame, this);
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "attempting lock recovery "
+ "without a preopen");
+ afr_lock_recovery (frame, this);
+ }
+ }
+
+out:
+ if ((ret < 0) && frame)
+ AFR_STACK_DESTROY (frame);
+ return ret;
+}
+
+int
+afr_lk_transfer_datalock (call_frame_t *dst, call_frame_t *src, char *dom,
+ unsigned int child_count)
+{
+ afr_local_t *dst_local = NULL;
+ afr_local_t *src_local = NULL;
+ afr_internal_lock_t *dst_lock = NULL;
+ afr_internal_lock_t *src_lock = NULL;
+ afr_inodelk_t *dst_inodelk = NULL;
+ afr_inodelk_t *src_inodelk = NULL;
+ int ret = -1;
+
+ src_local = src->local;
+ src_lock = &src_local->internal_lock;
+ src_inodelk = afr_get_inodelk (src_lock, dom);
+ dst_local = dst->local;
+ dst_lock = &dst_local->internal_lock;
+ dst_inodelk = afr_get_inodelk (dst_lock, dom);
+ if (!dst_inodelk || !src_inodelk)
+ goto out;
+ if (src_inodelk->locked_nodes) {
+ memcpy (dst_inodelk->locked_nodes, src_inodelk->locked_nodes,
+ sizeof (*dst_inodelk->locked_nodes) * child_count);
+ memset (src_inodelk->locked_nodes, 0,
+ sizeof (*src_inodelk->locked_nodes) * child_count);
+ }
+
+ dst_lock->transaction_lk_type = src_lock->transaction_lk_type;
+ dst_lock->selfheal_lk_type = src_lock->selfheal_lk_type;
+ dst_inodelk->lock_count = src_inodelk->lock_count;
+ src_inodelk->lock_count = 0;
+ ret = 0;
+out:
+ return ret;
+}
diff --git a/xlators/cluster/afr/src/afr-mem-types.h b/xlators/cluster/afr/src/afr-mem-types.h
new file mode 100644
index 000000000..73594f265
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-mem-types.h
@@ -0,0 +1,51 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef __AFR_MEM_TYPES_H__
+#define __AFR_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_afr_mem_types_ {
+ gf_afr_mt_iovec = gf_common_mt_end + 1,
+ gf_afr_mt_afr_fd_ctx_t,
+ gf_afr_mt_afr_private_t,
+ gf_afr_mt_int32_t,
+ gf_afr_mt_char,
+ gf_afr_mt_xattr_key,
+ gf_afr_mt_dict_t,
+ gf_afr_mt_xlator_t,
+ gf_afr_mt_iatt,
+ gf_afr_mt_int,
+ gf_afr_mt_afr_node_character,
+ gf_afr_mt_sh_diff_loop_state,
+ gf_afr_mt_uint8_t,
+ gf_afr_mt_loc_t,
+ gf_afr_mt_entry_name,
+ gf_afr_mt_pump_priv,
+ gf_afr_mt_locked_fd,
+ gf_afr_mt_inode_ctx_t,
+ gf_afr_fd_paused_call_t,
+ gf_afr_mt_crawl_data_t,
+ gf_afr_mt_brick_pos_t,
+ gf_afr_mt_shd_bool_t,
+ gf_afr_mt_shd_timer_t,
+ gf_afr_mt_shd_event_t,
+ gf_afr_mt_time_t,
+ gf_afr_mt_pos_data_t,
+ gf_afr_mt_reply_t,
+ gf_afr_mt_stats_t,
+ gf_afr_mt_shd_crawl_event_t,
+ gf_afr_mt_uint64_t,
+ gf_afr_mt_end
+};
+#endif
+
diff --git a/xlators/cluster/afr/src/afr-open.c b/xlators/cluster/afr/src/afr-open.c
new file mode 100644
index 000000000..643a5d692
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-open.c
@@ -0,0 +1,382 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <libgen.h>
+#include <unistd.h>
+#include <fnmatch.h>
+#include <sys/time.h>
+#include <stdlib.h>
+#include <signal.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "afr.h"
+#include "dict.h"
+#include "xlator.h"
+#include "hashfn.h"
+#include "logging.h"
+#include "stack.h"
+#include "list.h"
+#include "call-stub.h"
+#include "defaults.h"
+#include "common-utils.h"
+#include "compat-errno.h"
+#include "compat.h"
+#include "byte-order.h"
+#include "statedump.h"
+
+#include "fd.h"
+
+#include "afr-inode-read.h"
+#include "afr-inode-write.h"
+#include "afr-dir-read.h"
+#include "afr-dir-write.h"
+#include "afr-transaction.h"
+#include "afr-self-heal.h"
+#include "afr-self-heal-common.h"
+
+int
+afr_stale_child_up (afr_local_t *local, xlator_t *this)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ int up = -1;
+
+ priv = this->private;
+
+ if (!local->fresh_children)
+ local->fresh_children = afr_children_create (priv->child_count);
+ if (!local->fresh_children)
+ goto out;
+
+ afr_inode_get_read_ctx (this, local->fd->inode, local->fresh_children);
+ if (priv->child_count == afr_get_children_count (local->fresh_children,
+ priv->child_count))
+ goto out;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->child_up[i])
+ continue;
+ if (afr_is_child_present (local->fresh_children,
+ priv->child_count, i))
+ continue;
+ up = i;
+ break;
+ }
+out:
+ return up;
+}
+
+void
+afr_perform_data_self_heal (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ inode_t *inode = NULL;
+ int st_child = -1;
+ char reason[64] = {0};
+
+ local = frame->local;
+ sh = &local->self_heal;
+ inode = local->fd->inode;
+
+ if (!IA_ISREG (inode->ia_type))
+ goto out;
+
+ st_child = afr_stale_child_up (local, this);
+ if (st_child < 0)
+ goto out;
+
+ sh->do_data_self_heal = _gf_true;
+ sh->do_metadata_self_heal = _gf_true;
+ sh->do_gfid_self_heal = _gf_true;
+ sh->do_missing_entry_self_heal = _gf_true;
+
+ snprintf (reason, sizeof (reason), "stale subvolume %d detected",
+ st_child);
+ afr_launch_self_heal (frame, this, inode, _gf_true, inode->ia_type,
+ reason, NULL, NULL);
+out:
+ return;
+}
+
+int
+afr_open_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ afr_local_t * local = frame->local;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ if (afr_open_only_data_self_heal (priv->data_self_heal))
+ afr_perform_data_self_heal (frame, this);
+ AFR_STACK_UNWIND (open, frame, local->op_ret, local->op_errno,
+ local->fd, xdata);
+ return 0;
+}
+
+
+int
+afr_open_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ fd_t *fd, dict_t *xdata)
+{
+ afr_local_t * local = NULL;
+ int ret = 0;
+ int call_count = -1;
+ int child_index = (long) cookie;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ }
+
+ if (op_ret >= 0) {
+ local->op_ret = op_ret;
+ local->success_count++;
+
+ ret = afr_child_fd_ctx_set (this, fd, child_index,
+ local->cont.open.flags);
+ if (ret) {
+ local->op_ret = -1;
+ local->op_errno = -ret;
+ goto unlock;
+ }
+ }
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ if ((local->cont.open.flags & O_TRUNC)
+ && (local->op_ret >= 0)) {
+ STACK_WIND (frame, afr_open_ftruncate_cbk,
+ this, this->fops->ftruncate,
+ fd, 0, NULL);
+ } else {
+ if (afr_open_only_data_self_heal (priv->data_self_heal))
+ afr_perform_data_self_heal (frame, this);
+ AFR_STACK_UNWIND (open, frame, local->op_ret,
+ local->op_errno, local->fd, xdata);
+ }
+ }
+
+ return 0;
+}
+
+int
+afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ int i = 0;
+ int ret = -1;
+ int32_t call_count = 0;
+ int32_t op_errno = 0;
+ int32_t wind_flags = flags & (~O_TRUNC);
+ //We can't let truncation to happen outside transaction.
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+ VALIDATE_OR_GOTO (loc, out);
+
+ priv = this->private;
+
+ if (flags & (O_CREAT|O_TRUNC)) {
+ QUORUM_CHECK(open,out);
+ }
+
+ if (afr_is_split_brain (this, loc->inode)) {
+ /* self-heal failed */
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to open as split brain seen, returning EIO");
+ op_errno = EIO;
+ goto out;
+ }
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ call_count = local->call_count;
+ loc_copy (&local->loc, loc);
+
+ local->cont.open.flags = flags;
+
+ local->fd = fd_ref (fd);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_open_cbk, (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->open,
+ loc, wind_flags, fd, xdata);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ ret = 0;
+out:
+ if (ret < 0)
+ AFR_STACK_UNWIND (open, frame, -1, op_errno, fd, xdata);
+
+ return 0;
+}
+
+int
+afr_openfd_fix_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int call_count = 0;
+ int child_index = (long) cookie;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (op_ret >= 0) {
+ gf_log (this->name, GF_LOG_DEBUG, "fd for %s opened "
+ "successfully on subvolume %s", local->loc.path,
+ priv->children[child_index]->name);
+ } else {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to open %s "
+ "on subvolume %s", local->loc.path,
+ priv->children[child_index]->name);
+ }
+
+ fd_ctx = afr_fd_ctx_get (local->fd, this);
+ if (!fd_ctx) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to get fd context, %p", local->fd);
+ goto out;
+ }
+
+ LOCK (&local->fd->lock);
+ {
+ if (op_ret >= 0) {
+ fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
+ } else {
+ fd_ctx->opened_on[child_index] = AFR_FD_NOT_OPENED;
+ }
+ }
+ UNLOCK (&local->fd->lock);
+out:
+ call_count = afr_frame_return (frame);
+ if (call_count == 0)
+ AFR_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+void
+afr_fix_open (xlator_t *this, fd_t *fd, size_t need_open_count, int *need_open)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ int32_t op_errno = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+ priv = this->private;
+
+ if (!afr_is_fd_fixable (fd) || !need_open || !need_open_count)
+ goto out;
+
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx) {
+ ret = -1;
+ goto out;
+ }
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ local->loc.inode = inode_ref (fd->inode);
+ ret = loc_path (&local->loc, NULL);
+ if (ret < 0)
+ goto out;
+
+ local->fd = fd_ref (fd);
+ local->call_count = need_open_count;
+
+ gf_log (this->name, GF_LOG_DEBUG, "need open count: %zd",
+ need_open_count);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!need_open[i])
+ continue;
+
+ if (IA_IFDIR == fd->inode->ia_type) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "opening fd for dir %s on subvolume %s",
+ local->loc.path, priv->children[i]->name);
+
+ STACK_WIND_COOKIE (frame, afr_openfd_fix_open_cbk,
+ (void*) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->opendir,
+ &local->loc, local->fd,
+ NULL);
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "opening fd for file %s on subvolume %s",
+ local->loc.path, priv->children[i]->name);
+
+ STACK_WIND_COOKIE (frame, afr_openfd_fix_open_cbk,
+ (void *)(long) i,
+ priv->children[i],
+ priv->children[i]->fops->open,
+ &local->loc,
+ fd_ctx->flags & (~O_TRUNC),
+ local->fd, NULL);
+ }
+
+ }
+ op_errno = 0;
+ ret = 0;
+out:
+ if (op_errno)
+ ret = -1; //For handling ALLOC_OR_GOTO
+ if (ret && frame)
+ AFR_STACK_DESTROY (frame);
+}
diff --git a/xlators/cluster/afr/src/afr-self-heal-algorithm.c b/xlators/cluster/afr/src/afr-self-heal-algorithm.c
new file mode 100644
index 000000000..83846f152
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-self-heal-algorithm.c
@@ -0,0 +1,837 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#include <openssl/md5.h>
+#include "glusterfs.h"
+#include "afr.h"
+#include "xlator.h"
+#include "dict.h"
+#include "xlator.h"
+#include "hashfn.h"
+#include "logging.h"
+#include "stack.h"
+#include "list.h"
+#include "call-stub.h"
+#include "defaults.h"
+#include "common-utils.h"
+#include "compat-errno.h"
+#include "compat.h"
+#include "byte-order.h"
+
+#include "afr-transaction.h"
+#include "afr-self-heal.h"
+#include "afr-self-heal-common.h"
+#include "afr-self-heal-algorithm.h"
+
+/*
+ This file contains the various self-heal algorithms
+*/
+
+static int
+sh_loop_driver (call_frame_t *sh_frame, xlator_t *this,
+ gf_boolean_t is_first_call, call_frame_t *old_loop_frame);
+static int
+sh_loop_return (call_frame_t *sh_frame, xlator_t *this, call_frame_t *loop_frame,
+ int32_t op_ret, int32_t op_errno);
+static int
+sh_destroy_frame (call_frame_t *frame, xlator_t *this)
+{
+ if (!frame)
+ goto out;
+
+ AFR_STACK_DESTROY (frame);
+out:
+ return 0;
+}
+
+static void
+sh_private_cleanup (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_sh_algo_private_t *sh_priv = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+
+ sh_priv = sh->private;
+ GF_FREE (sh_priv);
+}
+
+static int
+sh_number_of_writes_needed (unsigned char *write_needed, int child_count)
+{
+ int writes = 0;
+ int i = 0;
+
+ for (i = 0; i < child_count; i++) {
+ if (write_needed[i])
+ writes++;
+ }
+
+ return writes;
+}
+
+
+static int
+sh_loop_driver_done (call_frame_t *sh_frame, xlator_t *this,
+ call_frame_t *last_loop_frame)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_sh_algo_private_t *sh_priv = NULL;
+ int32_t total_blocks = 0;
+ int32_t diff_blocks = 0;
+
+ local = sh_frame->local;
+ sh = &local->self_heal;
+ sh_priv = sh->private;
+ if (sh_priv) {
+ total_blocks = sh_priv->total_blocks;
+ diff_blocks = sh_priv->diff_blocks;
+ }
+
+ sh_private_cleanup (sh_frame, this);
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
+ GF_ASSERT (!last_loop_frame);
+ //loop_finish should have happened and the old_loop should be NULL
+ gf_log (this->name, GF_LOG_DEBUG,
+ "self-heal aborting on %s",
+ local->loc.path);
+
+ local->self_heal.algo_abort_cbk (sh_frame, this);
+ } else {
+ GF_ASSERT (last_loop_frame);
+ if (diff_blocks == total_blocks) {
+ gf_log (this->name, GF_LOG_DEBUG, "full self-heal "
+ "completed on %s",local->loc.path);
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "diff self-heal on %s: completed. "
+ "(%d blocks of %d were different (%.2f%%))",
+ local->loc.path, diff_blocks, total_blocks,
+ ((diff_blocks * 1.0)/total_blocks) * 100);
+ }
+
+ sh->old_loop_frame = last_loop_frame;
+ local->self_heal.algo_completion_cbk (sh_frame, this);
+ }
+
+ return 0;
+}
+
+int
+sh_loop_finish (call_frame_t *loop_frame, xlator_t *this)
+{
+ afr_local_t *loop_local = NULL;
+ afr_self_heal_t *loop_sh = NULL;
+
+ if (!loop_frame)
+ goto out;
+
+ loop_local = loop_frame->local;
+ if (loop_local) {
+ loop_sh = &loop_local->self_heal;
+ }
+
+ if (loop_sh && loop_sh->data_lock_held) {
+ afr_sh_data_unlock (loop_frame, this, this->name,
+ sh_destroy_frame);
+ } else {
+ sh_destroy_frame (loop_frame, this);
+ }
+out:
+ return 0;
+}
+
+static int
+sh_loop_lock_success (call_frame_t *loop_frame, xlator_t *this)
+{
+ afr_local_t *loop_local = NULL;
+ afr_self_heal_t *loop_sh = NULL;
+
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
+
+ sh_loop_finish (loop_sh->old_loop_frame, this);
+ loop_sh->old_loop_frame = NULL;
+
+ gf_log (this->name, GF_LOG_DEBUG, "Acquired lock for range %"PRIu64
+ " %"PRIu64, loop_sh->offset, loop_sh->block_size);
+ loop_sh->data_lock_held = _gf_true;
+ loop_sh->sh_data_algo_start (loop_frame, this);
+ return 0;
+}
+
+static int
+sh_loop_lock_failure (call_frame_t *loop_frame, xlator_t *this)
+{
+ call_frame_t *sh_frame = NULL;
+ afr_local_t *loop_local = NULL;
+ afr_self_heal_t *loop_sh = NULL;
+
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
+ sh_frame = loop_sh->sh_frame;
+
+ gf_log (this->name, GF_LOG_ERROR, "failed lock for range %"PRIu64
+ " %"PRIu64, loop_sh->offset, loop_sh->block_size);
+ sh_loop_finish (loop_sh->old_loop_frame, this);
+ loop_sh->old_loop_frame = NULL;
+ sh_loop_return (sh_frame, this, loop_frame, -1, ENOTCONN);
+ return 0;
+}
+
+static int
+sh_loop_frame_create (call_frame_t *sh_frame, xlator_t *this,
+ call_frame_t *old_loop_frame, call_frame_t **loop_frame)
+{
+ call_frame_t *new_loop_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_local_t *new_loop_local = NULL;
+ afr_self_heal_t *new_loop_sh = NULL;
+ afr_private_t *priv = NULL;
+
+ GF_ASSERT (sh_frame);
+ GF_ASSERT (loop_frame);
+
+ *loop_frame = NULL;
+ local = sh_frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ new_loop_frame = copy_frame (sh_frame);
+ if (!new_loop_frame)
+ goto out;
+ //We want the frame to have same lk_owner as sh_frame
+ //so that locks translator allows conflicting locks
+ new_loop_local = afr_self_heal_local_init (local, this);
+ if (!new_loop_local)
+ goto out;
+ new_loop_frame->local = new_loop_local;
+
+ new_loop_sh = &new_loop_local->self_heal;
+ new_loop_sh->sources = memdup (sh->sources,
+ priv->child_count * sizeof (*sh->sources));
+ if (!new_loop_sh->sources)
+ goto out;
+ new_loop_sh->write_needed = GF_CALLOC (priv->child_count,
+ sizeof (*new_loop_sh->write_needed),
+ gf_afr_mt_char);
+ if (!new_loop_sh->write_needed)
+ goto out;
+ new_loop_sh->checksum = GF_CALLOC (priv->child_count, MD5_DIGEST_LENGTH,
+ gf_afr_mt_uint8_t);
+ if (!new_loop_sh->checksum)
+ goto out;
+ new_loop_sh->inode = inode_ref (sh->inode);
+ new_loop_sh->sh_data_algo_start = sh->sh_data_algo_start;
+ new_loop_sh->source = sh->source;
+ new_loop_sh->active_sinks = sh->active_sinks;
+ new_loop_sh->healing_fd = fd_ref (sh->healing_fd);
+ new_loop_sh->file_has_holes = sh->file_has_holes;
+ new_loop_sh->old_loop_frame = old_loop_frame;
+ new_loop_sh->sh_frame = sh_frame;
+ *loop_frame = new_loop_frame;
+ return 0;
+out:
+ sh_destroy_frame (new_loop_frame, this);
+ return -ENOMEM;
+}
+
+static int
+sh_loop_start (call_frame_t *sh_frame, xlator_t *this, off_t offset,
+ call_frame_t *old_loop_frame)
+{
+ call_frame_t *new_loop_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_local_t *new_loop_local = NULL;
+ afr_self_heal_t *new_loop_sh = NULL;
+ int ret = 0;
+
+ GF_ASSERT (sh_frame);
+
+ local = sh_frame->local;
+ sh = &local->self_heal;
+
+ ret = sh_loop_frame_create (sh_frame, this, old_loop_frame,
+ &new_loop_frame);
+ if (ret)
+ goto out;
+ new_loop_local = new_loop_frame->local;
+ new_loop_sh = &new_loop_local->self_heal;
+ new_loop_sh->offset = offset;
+ new_loop_sh->block_size = sh->block_size;
+ afr_sh_data_lock (new_loop_frame, this, offset, new_loop_sh->block_size,
+ _gf_true, this->name, sh_loop_lock_success, sh_loop_lock_failure);
+ return 0;
+out:
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ if (old_loop_frame)
+ sh_loop_finish (old_loop_frame, this);
+ sh_loop_return (sh_frame, this, new_loop_frame, -1, ENOMEM);
+ return 0;
+}
+
+static int
+sh_loop_driver (call_frame_t *sh_frame, xlator_t *this,
+ gf_boolean_t is_first_call, call_frame_t *old_loop_frame)
+{
+ afr_local_t * local = NULL;
+ afr_self_heal_t * sh = NULL;
+ afr_sh_algo_private_t *sh_priv = NULL;
+ gf_boolean_t is_driver_done = _gf_false;
+ blksize_t block_size = 0;
+ int loop = 0;
+ off_t offset = 0;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ local = sh_frame->local;
+ sh = &local->self_heal;
+ sh_priv = sh->private;
+
+ LOCK (&sh_priv->lock);
+ {
+ if (!is_first_call)
+ sh_priv->loops_running--;
+ offset = sh_priv->offset;
+ block_size = sh->block_size;
+ while ((!sh->eof_reached) &&
+ (!is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) &&
+ (sh_priv->loops_running < priv->data_self_heal_window_size)
+ && (sh_priv->offset < sh->file_size)) {
+
+ loop++;
+ sh_priv->offset += block_size;
+ sh_priv->loops_running++;
+
+ if (!is_first_call)
+ break;
+ }
+ if (0 == sh_priv->loops_running) {
+ is_driver_done = _gf_true;
+ }
+ }
+ UNLOCK (&sh_priv->lock);
+
+ if (0 == loop) {
+ //loop finish does unlock, but the erasing of the pending
+ //xattrs needs to happen before that so do not finish the loop
+ if (is_driver_done &&
+ !is_self_heal_failed (sh, AFR_CHECK_SPECIFIC))
+ goto driver_done;
+ if (old_loop_frame) {
+ sh_loop_finish (old_loop_frame, this);
+ old_loop_frame = NULL;
+ }
+ }
+
+ //If we have more loops to form we should finish previous loop after
+ //the next loop lock
+ while (loop--) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
+ // op failed in other loop, stop spawning more loops
+ if (old_loop_frame) {
+ sh_loop_finish (old_loop_frame, this);
+ old_loop_frame = NULL;
+ }
+ sh_loop_driver (sh_frame, this, _gf_false, NULL);
+ } else {
+ gf_log (this->name, GF_LOG_TRACE, "spawning a loop "
+ "for offset %"PRId64, offset);
+
+ sh_loop_start (sh_frame, this, offset, old_loop_frame);
+ old_loop_frame = NULL;
+ offset += block_size;
+ }
+ }
+
+driver_done:
+ if (is_driver_done) {
+ sh_loop_driver_done (sh_frame, this, old_loop_frame);
+ }
+ return 0;
+}
+
+static int
+sh_loop_return (call_frame_t *sh_frame, xlator_t *this, call_frame_t *loop_frame,
+ int32_t op_ret, int32_t op_errno)
+{
+ afr_local_t * loop_local = NULL;
+ afr_self_heal_t * loop_sh = NULL;
+ afr_local_t * sh_local = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ sh_local = sh_frame->local;
+ sh = &sh_local->self_heal;
+
+ if (loop_frame) {
+ loop_local = loop_frame->local;
+ if (loop_local)
+ loop_sh = &loop_local->self_heal;
+ if (loop_sh)
+ gf_log (this->name, GF_LOG_TRACE, "loop for offset "
+ "%"PRId64" returned", loop_sh->offset);
+ }
+
+ if (op_ret == -1) {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_set_error (sh, op_errno);
+ if (loop_frame) {
+ sh_loop_finish (loop_frame, this);
+ loop_frame = NULL;
+ }
+ }
+
+ sh_loop_driver (sh_frame, this, _gf_false, loop_frame);
+
+ return 0;
+}
+
+static int
+sh_loop_write_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * loop_local = NULL;
+ afr_self_heal_t * loop_sh = NULL;
+ call_frame_t *sh_frame = NULL;
+ afr_local_t * sh_local = NULL;
+ afr_self_heal_t *sh = NULL;
+ int call_count = 0;
+ int child_index = 0;
+
+ priv = this->private;
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
+
+ sh_frame = loop_sh->sh_frame;
+ sh_local = sh_frame->local;
+ sh = &sh_local->self_heal;
+
+ child_index = (long) cookie;
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "wrote %d bytes of data from %s to child %d, offset %"PRId64"",
+ op_ret, sh_local->loc.path, child_index, loop_sh->offset);
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "write to %s failed on subvolume %s (%s)",
+ sh_local->loc.path,
+ priv->children[child_index]->name,
+ strerror (op_errno));
+
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_set_error (loop_sh, op_errno);
+ } else if (op_ret < loop_local->cont.writev.vector->iov_len) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "incomplete write to %s on subvolume %s "
+ "(expected %lu, returned %d)", sh_local->loc.path,
+ priv->children[child_index]->name,
+ loop_local->cont.writev.vector->iov_len, op_ret);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ }
+
+ call_count = afr_frame_return (loop_frame);
+
+ if (call_count == 0) {
+ iobref_unref(loop_local->cont.writev.iobref);
+
+ sh_loop_return (sh_frame, this, loop_frame,
+ loop_sh->op_ret, loop_sh->op_errno);
+ }
+
+ return 0;
+}
+
+static void
+sh_prune_writes_needed (call_frame_t *sh_frame, call_frame_t *loop_frame,
+ afr_private_t *priv)
+{
+ afr_local_t *sh_local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_local_t *loop_local = NULL;
+ afr_self_heal_t *loop_sh = NULL;
+ int i = 0;
+
+ sh_local = sh_frame->local;
+ sh = &sh_local->self_heal;
+
+ if (!strcmp (sh->algo->name, "diff"))
+ return;
+
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
+
+ /* full self-heal guarantees there exists atleast 1 file with size 0
+ * That means for other files we can preserve holes that come after
+ * its size before 'trim'
+ */
+ for (i = 0; i < priv->child_count; i++) {
+ if (loop_sh->write_needed[i] &&
+ ((loop_sh->offset + 1) > sh->buf[i].ia_size))
+ loop_sh->write_needed[i] = 0;
+ }
+}
+
+static int
+sh_loop_read_cbk (call_frame_t *loop_frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ struct iovec *vector, int32_t count, struct iatt *buf,
+ struct iobref *iobref, dict_t *xdata)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * loop_local = NULL;
+ afr_self_heal_t * loop_sh = NULL;
+ call_frame_t *sh_frame = NULL;
+ int i = 0;
+ int call_count = 0;
+ afr_local_t * sh_local = NULL;
+ afr_self_heal_t * sh = NULL;
+
+ priv = this->private;
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
+
+ sh_frame = loop_sh->sh_frame;
+ sh_local = sh_frame->local;
+ sh = &sh_local->self_heal;
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "read %d bytes of data from %s, offset %"PRId64"",
+ op_ret, loop_local->loc.path, loop_sh->offset);
+
+ if (op_ret <= 0) {
+ if (op_ret < 0) {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ gf_log (this->name, GF_LOG_ERROR, "read failed on %d "
+ "for %s reason :%s", sh->source,
+ sh_local->loc.path, strerror (errno));
+ } else {
+ sh->eof_reached = _gf_true;
+ gf_log (this->name, GF_LOG_DEBUG, "Eof reached for %s",
+ sh_local->loc.path);
+ }
+ sh_loop_return (sh_frame, this, loop_frame, op_ret, op_errno);
+ goto out;
+ }
+
+ if (loop_sh->file_has_holes && iov_0filled (vector, count) == 0)
+ sh_prune_writes_needed (sh_frame, loop_frame, priv);
+
+ call_count = sh_number_of_writes_needed (loop_sh->write_needed,
+ priv->child_count);
+ if (call_count == 0) {
+ sh_loop_return (sh_frame, this, loop_frame, 0, 0);
+ goto out;
+ }
+
+ loop_local->call_count = call_count;
+
+ /*
+ * We only really need the request size at the moment, but the buffer
+ * is required if we want to issue a retry in the event of a short write.
+ * Therefore, we duplicate the vector and ref the iobref here...
+ */
+ loop_local->cont.writev.vector = iov_dup(vector, count);
+ loop_local->cont.writev.iobref = iobref_ref(iobref);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!loop_sh->write_needed[i])
+ continue;
+ STACK_WIND_COOKIE (loop_frame, sh_loop_write_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->writev,
+ loop_sh->healing_fd, vector, count,
+ loop_sh->offset, 0, iobref, NULL);
+
+ if (!--call_count)
+ break;
+ }
+
+out:
+ return 0;
+}
+
+
+static int
+sh_loop_read (call_frame_t *loop_frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *loop_local = NULL;
+ afr_self_heal_t *loop_sh = NULL;
+
+ priv = this->private;
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
+
+ STACK_WIND_COOKIE (loop_frame, sh_loop_read_cbk,
+ (void *) (long) loop_sh->source,
+ priv->children[loop_sh->source],
+ priv->children[loop_sh->source]->fops->readv,
+ loop_sh->healing_fd, loop_sh->block_size,
+ loop_sh->offset, 0, NULL);
+
+ return 0;
+}
+
+
+static int
+sh_diff_checksum_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ uint32_t weak_checksum, uint8_t *strong_checksum,
+ dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *loop_local = NULL;
+ afr_self_heal_t *loop_sh = NULL;
+ call_frame_t *sh_frame = NULL;
+ afr_local_t *sh_local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_sh_algo_private_t *sh_priv = NULL;
+ int child_index = 0;
+ int call_count = 0;
+ int i = 0;
+ int write_needed = 0;
+
+ priv = this->private;
+
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
+
+ sh_frame = loop_sh->sh_frame;
+ sh_local = sh_frame->local;
+ sh = &sh_local->self_heal;
+
+ sh_priv = sh->private;
+
+ child_index = (long) cookie;
+
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "checksum on %s failed on subvolume %s (%s)",
+ sh_local->loc.path, priv->children[child_index]->name,
+ strerror (op_errno));
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ } else {
+ memcpy (loop_sh->checksum + child_index * MD5_DIGEST_LENGTH,
+ strong_checksum, MD5_DIGEST_LENGTH);
+ }
+
+ call_count = afr_frame_return (loop_frame);
+
+ if (call_count == 0) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (sh->sources[i] || !sh_local->child_up[i])
+ continue;
+
+ if (memcmp (loop_sh->checksum + (i * MD5_DIGEST_LENGTH),
+ loop_sh->checksum + (sh->source * MD5_DIGEST_LENGTH),
+ MD5_DIGEST_LENGTH)) {
+ /*
+ Checksums differ, so this block
+ must be written to this sink
+ */
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "checksum on subvolume %s at offset %"
+ PRId64" differs from that on source",
+ priv->children[i]->name, loop_sh->offset);
+
+ write_needed = loop_sh->write_needed[i] = 1;
+ }
+ }
+
+ LOCK (&sh_priv->lock);
+ {
+ sh_priv->total_blocks++;
+ if (write_needed)
+ sh_priv->diff_blocks++;
+ }
+ UNLOCK (&sh_priv->lock);
+
+ if (write_needed &&
+ !is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
+ sh_loop_read (loop_frame, this);
+ } else {
+ sh_loop_return (sh_frame, this, loop_frame,
+ op_ret, op_errno);
+ }
+ }
+
+ return 0;
+}
+
+static int
+sh_diff_checksum (call_frame_t *loop_frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *loop_local = NULL;
+ afr_self_heal_t *loop_sh = NULL;
+ int call_count = 0;
+ int i = 0;
+
+ priv = this->private;
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
+
+ call_count = loop_sh->active_sinks + 1; /* sinks and source */
+
+ loop_local->call_count = call_count;
+
+ STACK_WIND_COOKIE (loop_frame, sh_diff_checksum_cbk,
+ (void *) (long) loop_sh->source,
+ priv->children[loop_sh->source],
+ priv->children[loop_sh->source]->fops->rchecksum,
+ loop_sh->healing_fd,
+ loop_sh->offset, loop_sh->block_size, NULL);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (loop_sh->sources[i] || !loop_local->child_up[i])
+ continue;
+
+ STACK_WIND_COOKIE (loop_frame, sh_diff_checksum_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->rchecksum,
+ loop_sh->healing_fd,
+ loop_sh->offset, loop_sh->block_size, NULL);
+
+ if (!--call_count)
+ break;
+ }
+
+ return 0;
+}
+
+static int
+sh_full_read_write_to_sinks (call_frame_t *loop_frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *loop_local = NULL;
+ afr_self_heal_t *loop_sh = NULL;
+ int i = 0;
+
+ priv = this->private;
+ loop_local = loop_frame->local;
+ loop_sh = &loop_local->self_heal;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (loop_sh->sources[i] || !loop_local->child_up[i])
+ continue;
+ loop_sh->write_needed[i] = 1;
+ }
+ sh_loop_read (loop_frame, this);
+ return 0;
+}
+
+afr_sh_algo_private_t*
+afr_sh_priv_init ()
+{
+ afr_sh_algo_private_t *sh_priv = NULL;
+
+ sh_priv = GF_CALLOC (1, sizeof (*sh_priv),
+ gf_afr_mt_afr_private_t);
+ if (!sh_priv)
+ goto out;
+
+ LOCK_INIT (&sh_priv->lock);
+out:
+ return sh_priv;
+}
+
+int
+afr_sh_transfer_lock (call_frame_t *dst, call_frame_t *src, char *dom,
+ unsigned int child_count)
+{
+ afr_local_t *dst_local = NULL;
+ afr_self_heal_t *dst_sh = NULL;
+ afr_local_t *src_local = NULL;
+ afr_self_heal_t *src_sh = NULL;
+ int ret = -1;
+
+ dst_local = dst->local;
+ dst_sh = &dst_local->self_heal;
+ src_local = src->local;
+ src_sh = &src_local->self_heal;
+ GF_ASSERT (src_sh->data_lock_held);
+ GF_ASSERT (!dst_sh->data_lock_held);
+ ret = afr_lk_transfer_datalock (dst, src, dom, child_count);
+ if (ret)
+ return ret;
+ src_sh->data_lock_held = _gf_false;
+ dst_sh->data_lock_held = _gf_true;
+ return 0;
+}
+
+int
+afr_sh_start_loops (call_frame_t *sh_frame, xlator_t *this,
+ afr_sh_algo_fn sh_data_algo_start)
+{
+ call_frame_t *first_loop_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ int ret = 0;
+ afr_private_t *priv = NULL;
+
+ local = sh_frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ sh->sh_data_algo_start = sh_data_algo_start;
+ local->call_count = 0;
+ ret = sh_loop_frame_create (sh_frame, this, NULL, &first_loop_frame);
+ if (ret)
+ goto out;
+ ret = afr_sh_transfer_lock (first_loop_frame, sh_frame, this->name,
+ priv->child_count);
+ if (ret)
+ goto out;
+ sh->private = afr_sh_priv_init ();
+ if (!sh->private) {
+ ret = -1;
+ goto out;
+ }
+ sh_loop_driver (sh_frame, this, _gf_true, first_loop_frame);
+ ret = 0;
+out:
+ if (ret) {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ sh_loop_driver_done (sh_frame, this, NULL);
+ }
+ return 0;
+}
+
+int
+afr_sh_algo_diff (call_frame_t *sh_frame, xlator_t *this)
+{
+ afr_sh_start_loops (sh_frame, this, sh_diff_checksum);
+ return 0;
+}
+
+int
+afr_sh_algo_full (call_frame_t *sh_frame, xlator_t *this)
+{
+ afr_sh_start_loops (sh_frame, this, sh_full_read_write_to_sinks);
+ return 0;
+}
+
+struct afr_sh_algorithm afr_self_heal_algorithms[] = {
+ {.name = "full", .fn = afr_sh_algo_full},
+ {.name = "diff", .fn = afr_sh_algo_diff},
+ {0, 0},
+};
diff --git a/xlators/cluster/afr/src/afr-self-heal-algorithm.h b/xlators/cluster/afr/src/afr-self-heal-algorithm.h
new file mode 100644
index 000000000..6b20789b1
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-self-heal-algorithm.h
@@ -0,0 +1,32 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __AFR_SELF_HEAL_ALGORITHM_H__
+#define __AFR_SELF_HEAL_ALGORITHM_H__
+
+typedef int (*afr_sh_algo_fn) (call_frame_t *frame,
+ xlator_t *this);
+
+struct afr_sh_algorithm {
+ const char *name;
+ afr_sh_algo_fn fn;
+};
+
+extern struct afr_sh_algorithm afr_self_heal_algorithms[3];
+typedef struct {
+ gf_lock_t lock;
+ unsigned int loops_running;
+ off_t offset;
+
+ int32_t total_blocks;
+ int32_t diff_blocks;
+} afr_sh_algo_private_t;
+
+#endif /* __AFR_SELF_HEAL_ALGORITHM_H__ */
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index 38fe1e6e7..ef92b4205 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include "glusterfs.h"
@@ -25,7 +16,68 @@
#include "afr-transaction.h"
#include "afr-self-heal-common.h"
#include "afr-self-heal.h"
+#include "pump.h"
+
+#define ADD_FMT_STRING(msg, off, sh_str, status, print_log) \
+ do { \
+ if (AFR_SELF_HEAL_NOT_ATTEMPTED != status) { \
+ off += snprintf (msg + off, sizeof (msg) - off, \
+ " "sh_str" self heal %s,", \
+ get_sh_completion_status (status));\
+ print_log = 1; \
+ } \
+ } while (0)
+
+#define ADD_FMT_STRING_SYNC(msg, off, sh_str, status, print_log) \
+ do { \
+ if (AFR_SELF_HEAL_SYNC_BEGIN == status || \
+ AFR_SELF_HEAL_FAILED == status) { \
+ off += snprintf (msg + off, sizeof (msg) - off, \
+ " "sh_str" self heal %s,", \
+ get_sh_completion_status (status));\
+ print_log = 1; \
+ } \
+ } while (0)
+
+
+void
+afr_sh_reset (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ memset (sh->child_errno, 0,
+ sizeof (*sh->child_errno) * priv->child_count);
+ memset (sh->buf, 0, sizeof (*sh->buf) * priv->child_count);
+ memset (sh->parentbufs, 0,
+ sizeof (*sh->parentbufs) * priv->child_count);
+ memset (sh->success, 0, sizeof (*sh->success) * priv->child_count);
+ memset (sh->locked_nodes, 0,
+ sizeof (*sh->locked_nodes) * priv->child_count);
+ sh->active_sinks = 0;
+
+ afr_reset_xattr (sh->xattr, priv->child_count);
+}
+
+//Intersection[child]=1 if child is part of intersection
+void
+afr_children_intersection_get (int32_t *set1, int32_t *set2,
+ int *intersection, unsigned int child_count)
+{
+ int i = 0;
+ memset (intersection, 0, sizeof (*intersection) * child_count);
+ for (i = 0; i < child_count; i++) {
+ intersection[i] = afr_is_child_present (set1, child_count, i)
+ && afr_is_child_present (set2, child_count,
+ i);
+ }
+}
/**
* select_source - select a source and return it
@@ -34,115 +86,215 @@
int
afr_sh_select_source (int sources[], int child_count)
{
- int i;
- for (i = 0; i < child_count; i++)
- if (sources[i])
- return i;
+ int i = 0;
+ for (i = 0; i < child_count; i++)
+ if (sources[i])
+ return i;
- return -1;
+ return -1;
}
+void
+afr_sh_mark_source_sinks (call_frame_t *frame, xlator_t *this)
+{
+ int i = 0;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int active_sinks = 0;
-/**
- * sink_count - return number of sinks in sources array
- */
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
-int
-afr_sh_sink_count (int sources[], int child_count)
-{
- int i;
- int sinks = 0;
- for (i = 0; i < child_count; i++)
- if (!sources[i])
- sinks++;
- return sinks;
+ for (i = 0; i < priv->child_count; i++) {
+ if (sh->sources[i] == 0 && local->child_up[i] == 1) {
+ active_sinks++;
+ sh->success[i] = 1;
+ } else if (sh->sources[i] == 1 && local->child_up[i] == 1) {
+ sh->success[i] = 1;
+ }
+ }
+ sh->active_sinks = active_sinks;
}
int
afr_sh_source_count (int sources[], int child_count)
{
- int i;
- int nsource = 0;
+ int i = 0;
+ int nsource = 0;
- for (i = 0; i < child_count; i++)
- if (sources[i])
- nsource++;
- return nsource;
+ for (i = 0; i < child_count; i++)
+ if (sources[i])
+ nsource++;
+ return nsource;
}
+void
+afr_sh_set_error (afr_self_heal_t *sh, int32_t op_errno)
+{
+ sh->op_ret = -1;
+ sh->op_errno = afr_most_important_error(sh->op_errno, op_errno,
+ _gf_false);
+}
-int
-afr_sh_supress_errenous_children (int sources[], int child_errno[],
- int child_count)
+void
+afr_sh_print_pending_matrix (int32_t *pending_matrix[], xlator_t *this)
{
- int i = 0;
+ afr_private_t * priv = this->private;
+ char *buf = NULL;
+ char *ptr = NULL;
+ int i = 0;
+ int j = 0;
- for (i = 0; i < child_count; i++) {
- if (child_errno[i] && sources[i]) {
- sources[i] = 0;
- }
- }
+ /* 10 digits per entry + 1 space + '[' and ']' */
+ buf = GF_MALLOC (priv->child_count * 11 + 8, gf_afr_mt_char);
- return 0;
+ for (i = 0; i < priv->child_count; i++) {
+ ptr = buf;
+ ptr += sprintf (ptr, "[ ");
+ for (j = 0; j < priv->child_count; j++) {
+ ptr += sprintf (ptr, "%d ", pending_matrix[i][j]);
+ }
+ sprintf (ptr, "]");
+ gf_log (this->name, GF_LOG_DEBUG, "pending_matrix: %s", buf);
+ }
+
+ GF_FREE (buf);
}
+char*
+afr_get_pending_matrix_str (int32_t *pending_matrix[], xlator_t *this)
+{
+ afr_private_t * priv = this->private;
+ char *buf = NULL;
+ char *ptr = NULL;
+ int i = 0;
+ int j = 0;
+ int child_count = priv->child_count;
+ char *matrix_begin = "[ [ ";
+ char *matrix_end = "] ]";
+ char *seperator = "] [ ";
+ int pending_entry_strlen = 12; //Including space after entry
+ int matrix_begin_strlen = 0;
+ int matrix_end_strlen = 0;
+ int seperator_strlen = 0;
+ int string_length = 0;
+ char *msg = "- Pending matrix: ";
+
+ /*
+ * for a list of lists of [ [ a b ] [ c d ] ]
+ * */
+
+ matrix_begin_strlen = strlen (matrix_begin);
+ matrix_end_strlen = strlen (matrix_end);
+ seperator_strlen = strlen (seperator);
+ string_length = matrix_begin_strlen + matrix_end_strlen
+ + (child_count -1) * seperator_strlen
+ + (child_count * child_count * pending_entry_strlen);
+
+ buf = GF_CALLOC (1, 1 + strlen (msg) + string_length , gf_afr_mt_char);
+ if (!buf)
+ goto out;
+
+ ptr = buf;
+ ptr += sprintf (ptr, "%s", msg);
+ ptr += sprintf (ptr, "%s", matrix_begin);
+ for (i = 0; i < priv->child_count; i++) {
+ for (j = 0; j < priv->child_count; j++) {
+ ptr += sprintf (ptr, "%d ", pending_matrix[i][j]);
+ }
+ if (i < priv->child_count -1)
+ ptr += sprintf (ptr, "%s", seperator);
+ }
+
+ ptr += sprintf (ptr, "%s", matrix_end);
+
+out:
+ return buf;
+}
void
-afr_sh_print_pending_matrix (int32_t *pending_matrix[], xlator_t *this)
+afr_sh_print_split_brain_log (int32_t *pending_matrix[], xlator_t *this,
+ const char *loc)
{
- afr_private_t * priv = this->private;
+ char *buf = NULL;
+ char *free_ptr = NULL;
- char *buf = NULL;
- char *ptr = NULL;
+ buf = afr_get_pending_matrix_str (pending_matrix, this);
+ if (buf)
+ free_ptr = buf;
+ else
+ buf = "";
- int i, j;
- /* 10 digits per entry + 1 space + '[' and ']' */
- buf = MALLOC (priv->child_count * 11 + 8);
+ gf_log (this->name, GF_LOG_ERROR, "Unable to self-heal contents of '%s'"
+ " (possible split-brain). Please delete the file from all but "
+ "the preferred subvolume.%s", loc, buf);
+ GF_FREE (free_ptr);
+ return;
+}
- for (i = 0; i < priv->child_count; i++) {
- ptr = buf;
- ptr += sprintf (ptr, "[ ");
- for (j = 0; j < priv->child_count; j++) {
- ptr += sprintf (ptr, "%d ", pending_matrix[i][j]);
- }
- ptr += sprintf (ptr, "]");
- gf_log (this->name, GF_LOG_TRACE,
- "pending_matrix: %s", buf);
- }
- FREE (buf);
-}
+void
+afr_init_pending_matrix (int32_t **pending_matrix, size_t child_count)
+{
+ int i = 0;
+ int j = 0;
+ GF_ASSERT (pending_matrix);
+
+ for (i = 0; i < child_count; i++) {
+ for (j = 0; j < child_count; j++) {
+ pending_matrix[i][j] = 0;
+ }
+ }
+}
void
-afr_sh_build_pending_matrix (afr_private_t *priv,
- int32_t *pending_matrix[], dict_t *xattr[],
- int child_count, afr_transaction_type type)
+afr_mark_ignorant_subvols_as_pending (int32_t **pending_matrix,
+ unsigned char *ignorant_subvols,
+ size_t child_count)
{
- int i, j, k;
+ int i = 0;
+ int j = 0;
- int32_t *pending = NULL;
- int ret = -1;
+ GF_ASSERT (pending_matrix);
+ GF_ASSERT (ignorant_subvols);
- unsigned char *ignorant_subvols = NULL;
+ for (i = 0; i < child_count; i++) {
+ if (ignorant_subvols[i]) {
+ for (j = 0; j < child_count; j++) {
+ if (!ignorant_subvols[j])
+ pending_matrix[j][i] += 1;
+ }
+ }
+ }
+}
- ignorant_subvols = CALLOC (sizeof (*ignorant_subvols), child_count);
+int
+afr_build_pending_matrix (char **pending_key, int32_t **pending_matrix,
+ unsigned char *ignorant_subvols,
+ dict_t *xattr[], afr_transaction_type type,
+ size_t child_count)
+{
+ /* Indexable by result of afr_index_for_transaction_type(): 0 -- 2. */
+ int32_t pending[3] = {0,};
+ void *pending_raw = NULL;
+ int ret = -1;
+ int i = 0;
+ int j = 0;
+ int k = 0;
- /* start clean */
- for (i = 0; i < child_count; i++) {
- for (j = 0; j < child_count; j++) {
- pending_matrix[i][j] = 0;
- }
- }
+ afr_init_pending_matrix (pending_matrix, child_count);
- for (i = 0; i < child_count; i++) {
- pending = NULL;
+ for (i = 0; i < child_count; i++) {
+ pending_raw = NULL;
for (j = 0; j < child_count; j++) {
- ret = dict_get_ptr (xattr[i], priv->pending_key[j],
- VOID(&pending));
-
+ ret = dict_get_ptr (xattr[i], pending_key[j],
+ &pending_raw);
+
if (ret != 0) {
/*
* There is no xattr present. This means this
@@ -150,62 +302,26 @@ afr_sh_build_pending_matrix (afr_private_t *priv,
* subvolume.
*/
- ignorant_subvols[i] = 1;
+ if (ignorant_subvols)
+ ignorant_subvols[i] = 1;
continue;
}
+ memcpy (pending, pending_raw, sizeof(pending));
k = afr_index_for_transaction_type (type);
-
- pending_matrix[i][j] = ntoh32 (pending[k]);
- }
- }
-
- /*
- * Make all non-ignorant subvols point towards the ignorant
- * subvolumes.
- */
- for (i = 0; i < child_count; i++) {
- if (ignorant_subvols[i]) {
- for (j = 0; j < child_count; j++) {
- if (!ignorant_subvols[j])
- pending_matrix[j][i] += 1;
- }
+ pending_matrix[i][j] = ntoh32 (pending[k]);
}
}
- FREE (ignorant_subvols);
+ return ret;
}
-
-/**
- * mark_sources: Mark all 'source' nodes and return number of source
- * nodes found
- *
- * A node (a row in the pending matrix) belongs to one of
- * three categories:
- *
- * M is the pending matrix.
- *
- * 'innocent' - M[i] is all zeroes
- * 'fool' - M[i] has i'th element = 1 (self-reference)
- * 'wise' - M[i] has i'th element = 0, others are 1 or 0.
- *
- * All 'innocent' nodes are sinks. If all nodes are innocent, no self-heal is
- * needed.
- *
- * A 'wise' node can be a source. If two 'wise' nodes conflict, it is
- * a split-brain. If one wise node refers to the other but the other doesn't
- * refer back, the referrer is a source.
- *
- * All fools are sinks, unless there are no 'wise' nodes. In that case,
- * one of the fools is made a source.
- */
-
typedef enum {
+ AFR_NODE_INVALID,
AFR_NODE_INNOCENT,
AFR_NODE_FOOL,
- AFR_NODE_WISE
+ AFR_NODE_WISE,
} afr_node_type;
typedef struct {
@@ -246,7 +362,7 @@ afr_sh_is_wise (int32_t *array, int i, int child_count)
static int
-afr_sh_all_nodes_innocent (afr_node_character *characters,
+afr_sh_all_nodes_innocent (afr_node_character *characters,
int child_count)
{
int i = 0;
@@ -282,10 +398,10 @@ afr_sh_wise_nodes_exist (afr_node_character *characters, int child_count)
/*
* The 'wisdom' of a wise node is 0 if any other wise node accuses it.
- * It is 1 if no other wise node accuses it.
+ * It is 1 if no other wise node accuses it.
* Only wise nodes with wisdom 1 are sources.
*
- * If no nodes with wisdom 1 exist, a split-brain has occured.
+ * If no nodes with wisdom 1 exist, a split-brain has occurred.
*/
static void
@@ -302,7 +418,7 @@ afr_sh_compute_wisdom (int32_t *pending_matrix[],
for (j = 0; j < child_count; j++) {
if ((characters[j].type == AFR_NODE_WISE)
&& pending_matrix[j][i]) {
-
+
characters[i].wisdom = 0;
}
}
@@ -312,7 +428,7 @@ afr_sh_compute_wisdom (int32_t *pending_matrix[],
static int
-afr_sh_wise_nodes_conflict (afr_node_character *characters,
+afr_sh_wise_nodes_conflict (afr_node_character *characters,
int child_count)
{
int i = 0;
@@ -333,13 +449,12 @@ afr_sh_wise_nodes_conflict (afr_node_character *characters,
static int
-afr_sh_mark_wisest_as_sources (int sources[],
- afr_node_character *characters,
+afr_sh_mark_wisest_as_sources (int sources[],
+ afr_node_character *characters,
int child_count)
{
int nsources = 0;
-
- int i = 0;
+ int i = 0;
for (i = 0; i < child_count; i++) {
if (characters[i].wisdom == 1) {
@@ -351,1060 +466,2347 @@ afr_sh_mark_wisest_as_sources (int sources[],
return nsources;
}
-
-static int
-afr_sh_mark_if_size_differs (afr_self_heal_t *sh, int child_count)
+static void
+afr_compute_witness_of_fools (int32_t *witnesses, int32_t **pending_matrix,
+ afr_node_character *characters,
+ int32_t child_count)
{
- int32_t ** pending_matrix;
- int i, j;
-
- int size_differs = 0;
+ int i = 0;
+ int j = 0;
+ int witness = 0;
- pending_matrix = sh->pending_matrix;
+ GF_ASSERT (witnesses);
+ GF_ASSERT (pending_matrix);
+ GF_ASSERT (characters);
+ GF_ASSERT (child_count > 0);
for (i = 0; i < child_count; i++) {
+ if (characters[i].type != AFR_NODE_FOOL)
+ continue;
+
+ witness = 0;
for (j = 0; j < child_count; j++) {
- if (SIZE_DIFFERS (&sh->buf[i], &sh->buf[j])
- && (pending_matrix[i][j] == 0)
- && (pending_matrix[j][i] == 0)) {
-
- pending_matrix[i][j] = 1;
- pending_matrix[j][i] = 1;
-
- size_differs = 1;
- }
+ if (i == j)
+ continue;
+ witness += pending_matrix[i][j];
}
+ witnesses[i] = witness;
}
-
- return size_differs;
}
-
-static int
-afr_sh_mark_biggest_fool_as_source (afr_self_heal_t *sh,
- afr_node_character *characters,
- int child_count)
+static int32_t
+afr_find_biggest_witness_among_fools (int32_t *witnesses,
+ afr_node_character *characters,
+ int32_t child_count)
{
- int i = 0;
- int biggest = 0;
-
+ int i = 0;
+ int biggest_witness = -1;
+ int biggest_witness_idx = -1;
+ int biggest_witness_cnt = -1;
+
+ GF_ASSERT (witnesses);
+ GF_ASSERT (characters);
+ GF_ASSERT (child_count > 0);
+
for (i = 0; i < child_count; i++) {
- if (characters[i].type == AFR_NODE_FOOL) {
- biggest = i;
- break;
- }
+ if (characters[i].type != AFR_NODE_FOOL)
+ continue;
+
+ if (biggest_witness < witnesses[i]) {
+ biggest_witness = witnesses[i];
+ biggest_witness_idx = i;
+ biggest_witness_cnt = 1;
+ continue;
+ }
+
+ if (biggest_witness == witnesses[i])
+ biggest_witness_cnt++;
}
+ if (biggest_witness_cnt != 1)
+ return -1;
+
+ return biggest_witness_idx;
+}
+
+int
+afr_mark_fool_as_source_by_witness (int32_t *sources, int32_t *witnesses,
+ afr_node_character *characters,
+ int32_t child_count, int32_t witness)
+{
+ int i = 0;
+ int nsources = 0;
+
+ GF_ASSERT (sources);
+ GF_ASSERT (witnesses);
+ GF_ASSERT (characters);
+ GF_ASSERT (child_count > 0);
+
for (i = 0; i < child_count; i++) {
if (characters[i].type != AFR_NODE_FOOL)
continue;
-
- if (SIZE_GREATER (&sh->buf[i], &sh->buf[biggest])) {
- biggest = i;
+
+ if (witness == witnesses[i]) {
+ sources[i] = 1;
+ nsources++;
}
}
+ return nsources;
+}
- sh->sources[biggest] = 1;
- return 1;
+int
+afr_mark_fool_as_source_by_idx (int32_t *sources, int child_count, int idx)
+{
+ if (idx >= 0 && idx < child_count) {
+ sources[idx] = 1;
+ return 1;
+ }
+ return 0;
}
static int
-afr_sh_mark_biggest_as_source (afr_self_heal_t *sh, int child_count)
+afr_find_largest_file_size (struct iatt *bufs, int32_t *success_children,
+ int child_count)
{
- int biggest = 0;
- int i;
+ int idx = -1;
+ int i = -1;
+ int child = -1;
+ uint64_t max_size = 0;
+ uint64_t min_size = 0;
+ int num_children = 0;
- for (i = 0; i < child_count; i++) {
- if (SIZE_GREATER (&sh->buf[i], &sh->buf[biggest])) {
- biggest = i;
+ for (i = 0; i < child_count; i++) {
+ if (success_children[i] == -1)
+ break;
+
+ child = success_children[i];
+ if (bufs[child].ia_size > max_size) {
+ max_size = bufs[child].ia_size;
+ idx = child;
+ }
+
+ if ((num_children == 0) || (bufs[child].ia_size < min_size)) {
+ min_size = bufs[child].ia_size;
}
+
+ num_children++;
+ }
+
+ /* If sizes are same for all of them, finding sources will have to
+ * happen with pending changelog. So return -1
+ */
+ if ((num_children > 1) && (min_size == max_size))
+ return -1;
+ return idx;
+}
+
+
+static int
+afr_find_newest_file (struct iatt *bufs, int32_t *success_children,
+ int child_count)
+{
+ int idx = -1;
+ int i = -1;
+ int child = -1;
+ uint64_t max_ctime = 0;
+
+ for (i = 0; i < child_count; i++) {
+ if (success_children[i] == -1)
+ break;
+
+ child = success_children[i];
+ if (bufs[child].ia_ctime > max_ctime) {
+ max_ctime = bufs[child].ia_ctime;
+ idx = child;
+ }
+ }
+
+ return idx;
+}
+
+
+static int
+afr_mark_biggest_of_fools_as_source (int32_t *sources, int32_t **pending_matrix,
+ afr_node_character *characters,
+ int32_t *success_children,
+ int child_count, struct iatt *bufs)
+{
+ int32_t biggest_witness = 0;
+ int nsources = 0;
+ int32_t *witnesses = NULL;
+
+ GF_ASSERT (child_count > 0);
+
+ biggest_witness = afr_find_largest_file_size (bufs, success_children,
+ child_count);
+ if (biggest_witness != -1)
+ goto found;
+
+ witnesses = GF_CALLOC (child_count, sizeof (*witnesses),
+ gf_afr_mt_int32_t);
+ if (NULL == witnesses) {
+ nsources = -1;
+ goto out;
}
- sh->sources[biggest] = 1;
+ afr_compute_witness_of_fools (witnesses, pending_matrix, characters,
+ child_count);
+ biggest_witness = afr_find_biggest_witness_among_fools (witnesses,
+ characters,
+ child_count);
+ if (biggest_witness != -1)
+ goto found;
+
+ biggest_witness = afr_find_newest_file (bufs, success_children,
+ child_count);
- return 1;
+found:
+ nsources = afr_mark_fool_as_source_by_idx (sources, child_count,
+ biggest_witness);
+out:
+ GF_FREE (witnesses);
+ return nsources;
}
+int
+afr_mark_child_as_source_by_uid (int32_t *sources, struct iatt *bufs,
+ int32_t *success_children,
+ unsigned int child_count, uint32_t uid)
+{
+ int i = 0;
+ int nsources = 0;
+ int child = 0;
+
+ for (i = 0; i < child_count; i++) {
+ if (-1 == success_children[i])
+ break;
+
+ child = success_children[i];
+ if (uid == bufs[child].ia_uid) {
+ sources[child] = 1;
+ nsources++;
+ }
+ }
+ return nsources;
+}
int
-afr_sh_mark_sources (afr_self_heal_t *sh, int child_count,
- afr_self_heal_type type)
+afr_get_child_with_lowest_uid (struct iatt *bufs, int32_t *success_children,
+ unsigned int child_count)
{
- int i = 0;
+ int i = 0;
+ int smallest = -1;
+ int child = 0;
- int32_t ** pending_matrix;
- int * sources;
+ for (i = 0; i < child_count; i++) {
+ if (-1 == success_children[i])
+ break;
+ child = success_children[i];
+ if ((smallest == -1) ||
+ (bufs[child].ia_uid < bufs[smallest].ia_uid)) {
+ smallest = child;
+ }
+ }
+ return smallest;
+}
- int size_differs = 0;
+static int
+afr_sh_mark_lowest_uid_as_source (struct iatt *bufs, int32_t *success_children,
+ int child_count, int32_t *sources)
+{
+ int nsources = 0;
+ int smallest = 0;
+
+ smallest = afr_get_child_with_lowest_uid (bufs, success_children,
+ child_count);
+ if (smallest < 0) {
+ nsources = -1;
+ goto out;
+ }
+ nsources = afr_mark_child_as_source_by_uid (sources, bufs,
+ success_children, child_count,
+ bufs[smallest].ia_uid);
+out:
+ return nsources;
+}
- pending_matrix = sh->pending_matrix;
- sources = sh->sources;
+int
+afr_get_no_xattr_dir_read_child (xlator_t *this, int32_t *success_children,
+ struct iatt *bufs)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int child = -1;
+ int read_child = -1;
- int nsources = 0;
+ priv = this->private;
+ for (i = 0; i < priv->child_count; i++) {
+ child = success_children[i];
+ if (child < 0)
+ break;
+ if (read_child < 0)
+ read_child = child;
+ else if (bufs[read_child].ia_size < bufs[child].ia_size)
+ read_child = child;
+ }
+ return read_child;
+}
- /* stores the 'characters' (innocent, fool, wise) of the nodes */
- afr_node_character *
- characters = CALLOC (sizeof (afr_node_character),
- child_count);
+int
+afr_sh_mark_zero_size_file_as_sink (struct iatt *bufs, int32_t *success_children,
+ int child_count, int32_t *sources)
+{
+ int nsources = 0;
+ int i = 0;
+ int child = 0;
+ gf_boolean_t sink_exists = _gf_false;
+ gf_boolean_t source_exists = _gf_false;
+ int source = -1;
- /* start clean */
- for (i = 0; i < child_count; i++) {
- sources[i] = 0;
- }
-
for (i = 0; i < child_count; i++) {
- if (afr_sh_is_innocent (pending_matrix[i], child_count)) {
- characters[i].type = AFR_NODE_INNOCENT;
-
- } else if (afr_sh_is_fool (pending_matrix[i], i, child_count)) {
- characters[i].type = AFR_NODE_FOOL;
+ child = success_children[i];
+ if (child < 0)
+ break;
+ if (!bufs[child].ia_size) {
+ sink_exists = _gf_true;
+ continue;
+ }
+ if (!source_exists) {
+ source_exists = _gf_true;
+ source = child;
+ continue;
+ }
+ if (bufs[source].ia_size != bufs[child].ia_size) {
+ nsources = -1;
+ goto out;
+ }
+ }
+ if (!source_exists && !sink_exists) {
+ nsources = -1;
+ goto out;
+ }
- } else if (afr_sh_is_wise (pending_matrix[i], i, child_count)) {
- characters[i].type = AFR_NODE_WISE;
+ if (!source_exists || !sink_exists)
+ goto out;
- } else {
- gf_log ("[module:replicate]", GF_LOG_ERROR,
- "Could not determine the state of subvolume %d!"
- " (This message should never appear."
- " Please file a bug report to "
- "<gluster-devel@nongnu.org>.)", i);
+ for (i = 0; i < child_count; i++) {
+ child = success_children[i];
+ if (child < 0)
+ break;
+ if (bufs[child].ia_size) {
+ sources[child] = 1;
+ nsources++;
}
}
+out:
+ return nsources;
+}
+
+char *
+afr_get_character_str (afr_node_type type)
+{
+ char *character = NULL;
+
+ switch (type) {
+ case AFR_NODE_INNOCENT:
+ character = "innocent";
+ break;
+ case AFR_NODE_FOOL:
+ character = "fool";
+ break;
+ case AFR_NODE_WISE:
+ character = "wise";
+ break;
+ default:
+ character = "invalid";
+ break;
+ }
+ return character;
+}
+
+afr_node_type
+afr_find_child_character_type (int32_t *pending_row, int32_t child,
+ unsigned int child_count)
+{
+ afr_node_type type = AFR_NODE_INVALID;
+
+ GF_ASSERT ((child >= 0) && (child < child_count));
+
+ if (afr_sh_is_innocent (pending_row, child_count))
+ type = AFR_NODE_INNOCENT;
+ else if (afr_sh_is_fool (pending_row, child, child_count))
+ type = AFR_NODE_FOOL;
+ else if (afr_sh_is_wise (pending_row, child, child_count))
+ type = AFR_NODE_WISE;
+ return type;
+}
- if (type == AFR_SELF_HEAL_DATA) {
- size_differs = afr_sh_mark_if_size_differs (sh, child_count);
+int
+afr_build_sources (xlator_t *this, dict_t **xattr, struct iatt *bufs,
+ int32_t **pending_matrix, int32_t *sources,
+ int32_t *success_children, afr_transaction_type type,
+ int32_t *subvol_status, gf_boolean_t ignore_ignorant)
+{
+ afr_private_t *priv = NULL;
+ afr_self_heal_type sh_type = AFR_SELF_HEAL_INVALID;
+ int nsources = -1;
+ unsigned char *ignorant_subvols = NULL;
+ unsigned int child_count = 0;
+
+ priv = this->private;
+ child_count = priv->child_count;
+
+ if (afr_get_children_count (success_children, priv->child_count) == 0)
+ goto out;
+
+ if (!ignore_ignorant) {
+ ignorant_subvols = GF_CALLOC (sizeof (*ignorant_subvols),
+ child_count, gf_afr_mt_char);
+ if (NULL == ignorant_subvols)
+ goto out;
}
+ afr_build_pending_matrix (priv->pending_key, pending_matrix,
+ ignorant_subvols, xattr, type,
+ priv->child_count);
+
+ if (!ignore_ignorant)
+ afr_mark_ignorant_subvols_as_pending (pending_matrix,
+ ignorant_subvols,
+ priv->child_count);
+ sh_type = afr_self_heal_type_for_transaction (type);
+ if (AFR_SELF_HEAL_INVALID == sh_type)
+ goto out;
+
+ afr_sh_print_pending_matrix (pending_matrix, this);
+
+ nsources = afr_mark_sources (this, sources, pending_matrix, bufs,
+ sh_type, success_children, subvol_status);
+out:
+ GF_FREE (ignorant_subvols);
+ return nsources;
+}
+
+void
+afr_find_character_types (afr_node_character *characters,
+ int32_t **pending_matrix, int32_t *success_children,
+ unsigned int child_count)
+{
+ afr_node_type type = AFR_NODE_INVALID;
+ int child = 0;
+ int i = 0;
+
+ for (i = 0; i < child_count; i++) {
+ child = success_children[i];
+ if (child == -1)
+ break;
+ type = afr_find_child_character_type (pending_matrix[child],
+ child, child_count);
+ characters[child].type = type;
+ }
+}
+
+void
+afr_mark_success_children_sources (int32_t *sources, int32_t *success_children,
+ unsigned int child_count)
+{
+ int i = 0;
+ for (i = 0; i < child_count; i++) {
+ if (success_children[i] == -1)
+ break;
+ sources[success_children[i]] = 1;
+ }
+}
+/**
+ * mark_sources: Mark all 'source' nodes and return number of source
+ * nodes found
+ *
+ * A node (a row in the pending matrix) belongs to one of
+ * three categories:
+ *
+ * M is the pending matrix.
+ *
+ * 'innocent' - M[i] is all zeroes
+ * 'fool' - M[i] has i'th element = 1 (self-reference)
+ * 'wise' - M[i] has i'th element = 0, others are 1 or 0.
+ *
+ * All 'innocent' nodes are sinks. If all nodes are innocent, no self-heal is
+ * needed.
+ *
+ * A 'wise' node can be a source. If two 'wise' nodes conflict, it is
+ * a split-brain. If one wise node refers to the other but the other doesn't
+ * refer back, the referrer is a source.
+ *
+ * All fools are sinks, unless there are no 'wise' nodes. In that case,
+ * one of the fools is made a source.
+ */
+
+int
+afr_mark_sources (xlator_t *this, int32_t *sources, int32_t **pending_matrix,
+ struct iatt *bufs, afr_self_heal_type type,
+ int32_t *success_children, int32_t *subvol_status)
+{
+ /* stores the 'characters' (innocent, fool, wise) of the nodes */
+ afr_node_character *characters = NULL;
+ int nsources = -1;
+ unsigned int child_count = 0;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ child_count = priv->child_count;
+ characters = GF_CALLOC (sizeof (afr_node_character),
+ child_count, gf_afr_mt_afr_node_character);
+ if (!characters)
+ goto out;
+
+ this = THIS;
+
+ /* start clean */
+ memset (sources, 0, sizeof (*sources) * child_count);
+ nsources = 0;
+ afr_find_character_types (characters, pending_matrix, success_children,
+ child_count);
if (afr_sh_all_nodes_innocent (characters, child_count)) {
- if (size_differs) {
- nsources = afr_sh_mark_biggest_as_source (sh,
- child_count);
+ switch (type) {
+ case AFR_SELF_HEAL_METADATA:
+ nsources = afr_sh_mark_lowest_uid_as_source (bufs,
+ success_children,
+ child_count,
+ sources);
+ break;
+ case AFR_SELF_HEAL_DATA:
+ nsources = afr_sh_mark_zero_size_file_as_sink (bufs,
+ success_children,
+ child_count,
+ sources);
+ if ((nsources < 0) && subvol_status)
+ *subvol_status |= SPLIT_BRAIN;
+ break;
+ default:
+ break;
}
+ goto out;
+ }
- } else if (afr_sh_wise_nodes_exist (characters, child_count)) {
+ if (afr_sh_wise_nodes_exist (characters, child_count)) {
afr_sh_compute_wisdom (pending_matrix, characters, child_count);
if (afr_sh_wise_nodes_conflict (characters, child_count)) {
- /* split-brain */
-
+ if (subvol_status)
+ *subvol_status |= SPLIT_BRAIN;
nsources = -1;
- goto out;
-
} else {
- nsources = afr_sh_mark_wisest_as_sources (sources,
+ nsources = afr_sh_mark_wisest_as_sources (sources,
characters,
child_count);
}
} else {
- nsources = afr_sh_mark_biggest_fool_as_source (sh, characters,
- child_count);
+ if (subvol_status)
+ *subvol_status |= ALL_FOOLS;
+ nsources = afr_mark_biggest_of_fools_as_source (sources,
+ pending_matrix,
+ characters,
+ success_children,
+ child_count, bufs);
}
out:
- FREE (characters);
+ if (nsources == 0)
+ afr_mark_success_children_sources (sources, success_children,
+ child_count);
+ GF_FREE (characters);
- return nsources;
+ gf_log (this->name, GF_LOG_DEBUG, "Number of sources: %d", nsources);
+ return nsources;
}
-
void
afr_sh_pending_to_delta (afr_private_t *priv, dict_t **xattr,
- int32_t *delta_matrix[], int success[],
+ int32_t *delta_matrix[], unsigned char success[],
int child_count, afr_transaction_type type)
{
- int i = 0;
- int j = 0;
- int k = 0;
+ int tgt = 0;
+ int src = 0;
+ int value = 0;
- int32_t * pending = NULL;
- int ret = 0;
+ afr_build_pending_matrix (priv->pending_key, delta_matrix, NULL,
+ xattr, type, priv->child_count);
- /* start clean */
- for (i = 0; i < child_count; i++) {
- for (j = 0; j < child_count; j++) {
- delta_matrix[i][j] = 0;
- }
- }
+ /*
+ * The algorithm here has two parts. First, for each subvol indexed
+ * as tgt, we try to figure out what count everyone should have for it.
+ * If the self-heal succeeded, that's easy; the value is zero.
+ * Otherwise, the value is the maximum of the succeeding nodes' counts.
+ * Once we know the value, we loop through (possibly for a second time)
+ * setting each count to the difference so that when we're done all
+ * succeeding nodes will have the same count for tgt.
+ */
+ for (tgt = 0; tgt < priv->child_count; ++tgt) {
+ value = 0;
+ if (!success[tgt]) {
+ /* Find the maximum. */
+ for (src = 0; src < priv->child_count; ++src) {
+ if (!success[src]) {
+ continue;
+ }
+ if (delta_matrix[src][tgt] > value) {
+ value = delta_matrix[src][tgt];
+ }
+ }
+ }
+ /* Force everyone who succeeded to the chosen value. */
+ for (src = 0; src < priv->child_count; ++src) {
+ if (success[src]) {
+ delta_matrix[src][tgt] = value
+ - delta_matrix[src][tgt];
+ }
+ else {
+ delta_matrix[src][tgt] = 0;
+ }
+ }
+ }
+}
- for (i = 0; i < child_count; i++) {
- pending = NULL;
+int
+afr_sh_delta_to_xattr (xlator_t *this,
+ int32_t *delta_matrix[], dict_t *xattr[],
+ int child_count, afr_transaction_type type)
+{
+ int i = 0;
+ int j = 0;
+ int k = 0;
+ int ret = 0;
+ int32_t *pending = NULL;
+ int32_t *local_pending = NULL;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ for (i = 0; i < child_count; i++) {
+ if (!xattr[i])
+ continue;
+
+ local_pending = NULL;
for (j = 0; j < child_count; j++) {
- ret = dict_get_ptr (xattr[i], priv->pending_key[j],
- VOID(&pending));
-
- if (!success[j])
+ pending = GF_CALLOC (sizeof (int32_t), 3,
+ gf_afr_mt_int32_t);
+
+ if (!pending) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to allocate pending entry "
+ "for %s[%d] on %s",
+ priv->pending_key[j], type,
+ priv->children[i]->name);
continue;
+ }
+ /* 3 = data+metadata+entry */
k = afr_index_for_transaction_type (type);
-
- if (pending) {
- delta_matrix[i][j] = -(ntoh32 (pending[k]));
- } else {
- delta_matrix[i][j] = 0;
- }
+ pending[k] = hton32 (delta_matrix[i][j]);
+
+ if (j == i) {
+ local_pending = pending;
+ continue;
+ }
+ ret = dict_set_bin (xattr[i], priv->pending_key[j],
+ pending,
+ AFR_NUM_CHANGE_LOGS * sizeof (int32_t));
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Unable to set dict value.");
+ GF_FREE (pending);
+ }
}
- }
+ if (local_pending) {
+ ret = dict_set_bin (xattr[i], priv->pending_key[i],
+ local_pending,
+ AFR_NUM_CHANGE_LOGS * sizeof (int32_t));
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Unable to set dict value.");
+ GF_FREE (local_pending);
+ }
+ }
+ }
+ return 0;
}
int
-afr_sh_delta_to_xattr (afr_private_t *priv,
- int32_t *delta_matrix[], dict_t *xattr[],
- int child_count, afr_transaction_type type)
+afr_sh_missing_entries_done (call_frame_t *frame, xlator_t *this)
{
- int i = 0;
- int j = 0;
- int k = 0;
-
- int ret = 0;
-
- int32_t *pending = 0;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
- for (i = 0; i < child_count; i++) {
- if (!xattr[i])
- continue;
-
- for (j = 0; j < child_count; j++) {
- pending = CALLOC (sizeof (int32_t), 3);
- /* 3 = data+metadata+entry */
+ local = frame->local;
+ sh = &local->self_heal;
- k = afr_index_for_transaction_type (type);
+ afr_sh_reset (frame, this);
- pending[k] = hton32 (delta_matrix[i][j]);
+ if (local->unhealable) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "split brain found, aborting selfheal of %s",
+ local->loc.path);
+ }
- ret = dict_set_bin (xattr[i], priv->pending_key[j],
- pending,
- 3 * sizeof (int32_t));
- }
- }
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
+ sh->completion_cbk (frame, this);
+ } else {
+ gf_log (this->name, GF_LOG_TRACE,
+ "proceeding to metadata check on %s",
+ local->loc.path);
+ afr_self_heal_metadata (frame, this);
+ }
- return 0;
+ return 0;
}
-int
-afr_sh_has_metadata_pending (dict_t *xattr, int child_count, xlator_t *this)
+static int
+afr_sh_missing_entries_finish (call_frame_t *frame, xlator_t *this)
{
- afr_private_t *priv = NULL;
- int32_t *pending = NULL;
- void *tmp_pending = NULL; /* This is required to remove 'type-punned' warnings from gcc */
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
- int ret = -1;
- int i = 0;
- int j = 0;
+ local = frame->local;
+ int_lock = &local->internal_lock;
- priv = this->private;
+ int_lock->lock_cbk = afr_sh_missing_entries_done;
+ afr_unlock (frame, this);
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_get_ptr (xattr, priv->pending_key[i],
- &tmp_pending);
+ return 0;
+}
- if (ret != 0)
- return 0;
-
- pending = tmp_pending;
+int
+afr_sh_common_create (afr_self_heal_t *sh, unsigned int child_count)
+{
+ int ret = -ENOMEM;
+ sh->buf = GF_CALLOC (child_count, sizeof (*sh->buf),
+ gf_afr_mt_iatt);
+ if (!sh->buf)
+ goto out;
+ sh->parentbufs = GF_CALLOC (child_count, sizeof (*sh->parentbufs),
+ gf_afr_mt_iatt);
+ if (!sh->parentbufs)
+ goto out;
+ sh->child_errno = GF_CALLOC (child_count, sizeof (*sh->child_errno),
+ gf_afr_mt_int);
+ if (!sh->child_errno)
+ goto out;
+ sh->success_children = afr_children_create (child_count);
+ if (!sh->success_children)
+ goto out;
+ sh->fresh_children = afr_children_create (child_count);
+ if (!sh->fresh_children)
+ goto out;
+ sh->xattr = GF_CALLOC (child_count, sizeof (*sh->xattr),
+ gf_afr_mt_dict_t);
+ if (!sh->xattr)
+ goto out;
+ ret = 0;
+out:
+ return ret;
+}
- j = afr_index_for_transaction_type (AFR_METADATA_TRANSACTION);
+void
+afr_sh_common_lookup_resp_handler (call_frame_t *frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf,
+ dict_t *xattr, struct iatt *postparent,
+ loc_t *loc)
+{
+ int child_index = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_self_heal_t *sh = NULL;
- if (pending[j])
- return 1;
+ local = frame->local;
+ priv = this->private;
+ sh = &local->self_heal;
+ child_index = (long) cookie;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == 0) {
+ sh->buf[child_index] = *buf;
+ sh->parentbufs[child_index] = *postparent;
+ sh->success_children[sh->success_count] = child_index;
+ sh->success_count++;
+ sh->xattr[child_index] = dict_ref (xattr);
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG, "path %s on subvolume"
+ " %s => -1 (%s)", loc->path,
+ priv->children[child_index]->name,
+ strerror (op_errno));
+ local->self_heal.child_errno[child_index] = op_errno;
+ }
}
-
- return 0;
+ UNLOCK (&frame->lock);
+ return;
}
+gf_boolean_t
+afr_valid_ia_type (ia_type_t ia_type)
+{
+ switch (ia_type) {
+ case IA_IFSOCK:
+ case IA_IFREG:
+ case IA_IFBLK:
+ case IA_IFCHR:
+ case IA_IFIFO:
+ case IA_IFLNK:
+ case IA_IFDIR:
+ return _gf_true;
+ default:
+ return _gf_false;
+ }
+ return _gf_false;
+}
int
-afr_sh_has_data_pending (dict_t *xattr, int child_count, xlator_t *this)
+afr_impunge_frame_create (call_frame_t *frame, xlator_t *this,
+ int active_source, call_frame_t **impunge_frame)
{
- afr_private_t *priv = NULL;
- int32_t *pending = NULL;
- void *tmp_pending = NULL; /* This is required to remove 'type-punned' warnings from gcc */
+ afr_local_t *local = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ int32_t op_errno = 0;
+ afr_private_t *priv = NULL;
+ int ret = 0;
+ call_frame_t *new_frame = NULL;
+
+ op_errno = ENOMEM;
+ priv = this->private;
+ new_frame = copy_frame (frame);
+ if (!new_frame) {
+ goto out;
+ }
+
+ AFR_LOCAL_ALLOC_OR_GOTO (impunge_local, out);
- int ret = -1;
- int i = 0;
- int j = 0;
+ local = frame->local;
+ new_frame->local = impunge_local;
+ impunge_sh = &impunge_local->self_heal;
+ impunge_sh->sh_frame = frame;
+ impunge_sh->active_source = active_source;
+ impunge_local->child_up = memdup (local->child_up,
+ sizeof (*local->child_up) *
+ priv->child_count);
+ if (!impunge_local->child_up)
+ goto out;
+
+ impunge_local->pending = afr_matrix_create (priv->child_count,
+ AFR_NUM_CHANGE_LOGS);
+ if (!impunge_local->pending)
+ goto out;
+
+ ret = afr_sh_common_create (impunge_sh, priv->child_count);
+ if (ret) {
+ op_errno = -ret;
+ goto out;
+ }
+ op_errno = 0;
+ *impunge_frame = new_frame;
+out:
+ if (op_errno && new_frame)
+ AFR_STACK_DESTROY (new_frame);
+ return -op_errno;
+}
- priv = this->private;
+void
+afr_sh_missing_entry_call_impunge_recreate (call_frame_t *frame, xlator_t *this,
+ struct iatt *buf,
+ struct iatt *postparent,
+ afr_impunge_done_cbk_t impunge_done)
+{
+ call_frame_t *impunge_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ int ret = 0;
+ unsigned int enoent_count = 0;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int32_t op_errno = 0;
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ enoent_count = afr_errno_count (NULL, sh->child_errno,
+ priv->child_count, ENOENT);
+ if (!enoent_count) {
+ gf_log (this->name, GF_LOG_INFO,
+ "no missing files - %s. proceeding to metadata check",
+ local->loc.path);
+ goto out;
+ }
+ sh->impunge_done = impunge_done;
+ ret = afr_impunge_frame_create (frame, this, sh->source, &impunge_frame);
+ if (ret)
+ goto out;
+ impunge_local = impunge_frame->local;
+ impunge_sh = &impunge_local->self_heal;
+ loc_copy (&impunge_local->loc, &local->loc);
+ ret = afr_build_parent_loc (&impunge_sh->parent_loc,
+ &impunge_local->loc, &op_errno);
+ if (ret) {
+ ret = -op_errno;
+ goto out;
+ }
+ impunge_local->call_count = enoent_count;
+ impunge_sh->entrybuf = sh->buf[sh->source];
+ impunge_sh->parentbuf = sh->parentbufs[sh->source];
+ for (i = 0; i < priv->child_count; i++) {
+ if (!impunge_local->child_up[i]) {
+ impunge_sh->child_errno[i] = ENOTCONN;
+ continue;
+ }
+ if (sh->child_errno[i] != ENOENT) {
+ impunge_sh->child_errno[i] = EEXIST;
+ continue;
+ }
+ }
for (i = 0; i < priv->child_count; i++) {
- ret = dict_get_ptr (xattr, priv->pending_key[i],
- &tmp_pending);
+ if (sh->child_errno[i] != ENOENT)
+ continue;
+ afr_sh_entry_impunge_create (impunge_frame, this, i);
+ enoent_count--;
+ }
+ GF_ASSERT (!enoent_count);
+ return;
+out:
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "impunge of %s failed, "
+ "reason: %s", local->loc.path, strerror (-ret));
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ }
+ afr_sh_missing_entries_finish (frame, this);
+}
+
+int
+afr_sh_create_entry_cbk (call_frame_t *frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
- if (ret != 0)
- return 0;
-
- pending = tmp_pending;
+ local = frame->local;
+ sh = &local->self_heal;
+ if (op_ret < 0)
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_missing_entries_finish (frame, this);
+ return 0;
+}
- j = afr_index_for_transaction_type (AFR_DATA_TRANSACTION);
+static int
+sh_missing_entries_create (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ int type = 0;
+ struct iatt *buf = NULL;
+ struct iatt *postparent = NULL;
- if (pending[j])
- return 1;
+ local = frame->local;
+ sh = &local->self_heal;
+
+ buf = &sh->buf[sh->source];
+ postparent = &sh->parentbufs[sh->source];
+
+ type = buf->ia_type;
+ if (!afr_valid_ia_type (type)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: unknown file type: 0%o", local->loc.path, type);
+ afr_set_local_for_unhealable (local);
+ afr_sh_missing_entries_finish (frame, this);
+ goto out;
}
- return 0;
+ afr_sh_missing_entry_call_impunge_recreate (frame, this,
+ buf, postparent,
+ afr_sh_create_entry_cbk);
+out:
+ return 0;
}
-
-int
-afr_sh_has_entry_pending (dict_t *xattr, int child_count, xlator_t *this)
+void
+afr_sh_missing_entries_lookup_done (call_frame_t *frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
{
- afr_private_t *priv = NULL;
- int32_t *pending = NULL;
- void *tmp_pending = NULL; /* This is required to remove 'type-punned' warnings from gcc */
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ ia_type_t ia_type = IA_INVAL;
+ int32_t nsources = 0;
+ loc_t *loc = NULL;
+ int32_t subvol_status = 0;
+ afr_transaction_type txn_type = AFR_DATA_TRANSACTION;
+ gf_boolean_t split_brain = _gf_false;
+ int read_child = -1;
- int ret = -1;
- int i = 0;
- int j = 0;
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+ loc = &local->loc;
- priv = this->private;
+ if (op_ret < 0) {
+ if (op_errno == EIO) {
+ afr_set_local_for_unhealable (local);
+ }
+ // EIO can happen if finding the fresh parent dir failed
+ goto out;
+ }
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_get_ptr (xattr, priv->pending_key[i],
- &tmp_pending);
+ //now No chance for the ia_type to conflict
+ ia_type = sh->buf[sh->success_children[0]].ia_type;
+ txn_type = afr_transaction_type_get (ia_type);
+ nsources = afr_build_sources (this, sh->xattr, sh->buf,
+ sh->pending_matrix, sh->sources,
+ sh->success_children, txn_type,
+ &subvol_status, _gf_false);
+ if (nsources < 0) {
+ gf_log (this->name, GF_LOG_INFO, "No sources for dir of %s,"
+ " in missing entry self-heal, continuing with the rest"
+ " of the self-heals", local->loc.path);
+ if (subvol_status & SPLIT_BRAIN) {
+ split_brain = _gf_true;
+ switch (txn_type) {
+ case AFR_DATA_TRANSACTION:
+ nsources = 1;
+ sh->sources[sh->success_children[0]] = 1;
+ break;
+ case AFR_ENTRY_TRANSACTION:
+ read_child = afr_get_no_xattr_dir_read_child
+ (this,
+ sh->success_children,
+ sh->buf);
+ sh->sources[read_child] = 1;
+ nsources = 1;
+ break;
+ default:
+ op_errno = EIO;
+ goto out;
+ }
+ } else {
+ op_errno = EIO;
+ goto out;
+ }
+ }
+
+ afr_get_fresh_children (sh->success_children, sh->sources,
+ sh->fresh_children, priv->child_count);
+ sh->source = sh->fresh_children[0];
+ if (sh->source == -1) {
+ gf_log (this->name, GF_LOG_DEBUG, "No active sources found.");
+ op_errno = EIO;
+ goto out;
+ }
+
+ if (sh->gfid_sh_success_cbk)
+ sh->gfid_sh_success_cbk (frame, this);
+ sh->type = sh->buf[sh->source].ia_type;
+ if (uuid_is_null (loc->inode->gfid))
+ uuid_copy (loc->gfid, sh->buf[sh->source].ia_gfid);
+ if (split_brain) {
+ afr_sh_missing_entries_finish (frame, this);
+ } else {
+ sh_missing_entries_create (frame, this);
+ }
+ return;
+out:
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_set_error (sh, op_errno);
+ afr_sh_missing_entries_finish (frame, this);
+ return;
+}
+
+static int
+afr_sh_common_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ int call_count = 0;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
- if (ret != 0)
- return 0;
-
- pending = tmp_pending;
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
- j = afr_index_for_transaction_type (AFR_ENTRY_TRANSACTION);
+ afr_sh_common_lookup_resp_handler (frame, cookie, this, op_ret,
+ op_errno, inode, buf, xattr,
+ postparent, &sh->lookup_loc);
+ call_count = afr_frame_return (frame);
- if (pending[j])
- return 1;
+ if (call_count)
+ goto out;
+ op_ret = -1;
+ if (!sh->success_count) {
+ op_errno = afr_resultant_errno_get (NULL, sh->child_errno,
+ priv->child_count);
+ gf_log (this->name, GF_LOG_ERROR, "Failed to lookup %s, "
+ "reason %s", sh->lookup_loc.path,
+ strerror (op_errno));
+ goto done;
}
- return 0;
-}
+ if ((sh->lookup_flags & AFR_LOOKUP_FAIL_CONFLICTS) &&
+ (afr_conflicting_iattrs (sh->buf, sh->success_children,
+ priv->child_count,
+ sh->lookup_loc.path, this->name))) {
+ op_errno = EIO;
+ gf_log (this->name, GF_LOG_ERROR, "Conflicting entries "
+ "for %s", sh->lookup_loc.path);
+ goto done;
+ }
+ if ((sh->lookup_flags & AFR_LOOKUP_FAIL_MISSING_GFIDS) &&
+ (afr_gfid_missing_count (this->name, sh->success_children,
+ sh->buf, priv->child_count,
+ sh->lookup_loc.path))) {
+ op_errno = ENODATA;
+ gf_log (this->name, GF_LOG_ERROR, "Missing Gfids "
+ "for %s", sh->lookup_loc.path);
+ goto done;
+ }
+ op_ret = 0;
-/**
- * is_matrix_zero - return true if pending matrix is all zeroes
- */
+done:
+ sh->lookup_done (frame, this, op_ret, op_errno);
+out:
+ return 0;
+}
int
-afr_sh_is_matrix_zero (int32_t *pending_matrix[], int child_count)
+afr_sh_remove_entry_cbk (call_frame_t *frame, xlator_t *this, int child,
+ int32_t op_ret, int32_t op_errno)
{
- int i, j;
+ int call_count = 0;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
- for (i = 0; i < child_count; i++)
- for (j = 0; j < child_count; j++)
- if (pending_matrix[i][j])
- return 0;
- return 1;
+ local = frame->local;
+ sh = &local->self_heal;
+
+ GF_ASSERT (sh->post_remove_call);
+ if ((op_ret == -1) && (op_errno != ENOENT)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "purge entry %s failed, on child %d reason, %s",
+ local->loc.path, child, strerror (op_errno));
+ LOCK (&frame->lock);
+ {
+ afr_sh_set_error (sh, EIO);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ }
+ UNLOCK (&frame->lock);
+ }
+ call_count = afr_frame_return (frame);
+ if (call_count == 0)
+ sh->post_remove_call (frame, this);
+ return 0;
}
-
-int
-afr_sh_missing_entries_done (call_frame_t *frame, xlator_t *this)
+void
+afr_sh_call_entry_expunge_remove (call_frame_t *frame, xlator_t *this,
+ int child_index, struct iatt *buf,
+ struct iatt *parentbuf,
+ afr_expunge_done_cbk_t expunge_done)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
-// memset (sh->child_errno, 0, sizeof (int) * priv->child_count);
- memset (sh->buf, 0, sizeof (struct stat) * priv->child_count);
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i])
- dict_unref (sh->xattr[i]);
- sh->xattr[i] = NULL;
- }
+ call_frame_t *expunge_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_local_t *expunge_local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_self_heal_t *expunge_sh = NULL;
+ int32_t op_errno = 0;
+ int ret = 0;
+
+ expunge_frame = copy_frame (frame);
+ if (!expunge_frame) {
+ goto out;
+ }
- if (local->govinda_gOvinda) {
- gf_log (this->name, GF_LOG_TRACE,
- "aborting selfheal of %s",
- local->loc.path);
- sh->completion_cbk (frame, this);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "proceeding to metadata check on %s",
- local->loc.path);
- afr_self_heal_metadata (frame, this);
- }
+ AFR_LOCAL_ALLOC_OR_GOTO (expunge_local, out);
- return 0;
+ local = frame->local;
+ sh = &local->self_heal;
+ expunge_frame->local = expunge_local;
+ expunge_sh = &expunge_local->self_heal;
+ expunge_sh->sh_frame = frame;
+ loc_copy (&expunge_local->loc, &local->loc);
+ ret = afr_build_parent_loc (&expunge_sh->parent_loc,
+ &expunge_local->loc, &op_errno);
+ if (ret) {
+ ret = -op_errno;
+ goto out;
+ }
+ sh->expunge_done = expunge_done;
+ afr_sh_entry_expunge_remove (expunge_frame, this, child_index, buf,
+ parentbuf);
+ return;
+out:
+ gf_log (this->name, GF_LOG_ERROR, "Expunge of %s failed, reason: %s",
+ local->loc.path, strerror (op_errno));
+ expunge_done (frame, this, child_index, -1, op_errno);
}
+void
+afr_sh_remove_stale_lookup_info (afr_self_heal_t *sh, int32_t *success_children,
+ int32_t *fresh_children,
+ unsigned int child_count)
+{
+ int i = 0;
+
+ for (i = 0; i < child_count; i++) {
+ if (afr_is_child_present (success_children, child_count, i) &&
+ !afr_is_child_present (fresh_children, child_count, i)) {
+ sh->child_errno[i] = ENOENT;
+ GF_ASSERT (sh->xattr[i]);
+ dict_unref (sh->xattr[i]);
+ sh->xattr[i] = NULL;
+ }
+ }
+}
int
-sh_missing_entries_unlck_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+afr_sh_purge_stale_entries_done (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
+ afr_sh_missing_entries_finish (frame, this);
+ } else {
+ if (afr_gfid_missing_count (this->name, sh->fresh_children,
+ sh->buf, priv->child_count,
+ local->loc.path)) {
+ afr_sh_common_lookup (frame, this, &local->loc,
+ afr_sh_missing_entries_lookup_done,
+ sh->sh_gfid_req,
+ AFR_LOOKUP_FAIL_CONFLICTS|
+ AFR_LOOKUP_FAIL_MISSING_GFIDS,
+ NULL);
+ } else {
+ //No need to set gfid so goto missing entries lookup done
+ //Behave as if you have done the lookup
+ afr_sh_remove_stale_lookup_info (sh,
+ sh->success_children,
+ sh->fresh_children,
+ priv->child_count);
+ afr_children_copy (sh->success_children,
+ sh->fresh_children,
+ priv->child_count);
+ afr_sh_missing_entries_lookup_done (frame, this, 0, 0);
+ }
+ }
+ return 0;
+}
- LOCK (&frame->lock);
- {
- }
- UNLOCK (&frame->lock);
+gf_boolean_t
+afr_sh_purge_entry_condition (afr_local_t *local, afr_private_t *priv,
+ int child)
+{
+ afr_self_heal_t *sh = NULL;
- call_count = afr_frame_return (frame);
+ sh = &local->self_heal;
- if (call_count == 0) {
- afr_sh_missing_entries_done (frame, this);
- }
+ if (local->child_up[child] &&
+ (!afr_is_child_present (sh->fresh_parent_dirs, priv->child_count,
+ child))
+ && (sh->child_errno[child] != ENOENT))
+ return _gf_true;
- return 0;
+ return _gf_false;
}
-
-static int
-sh_missing_entries_finish (call_frame_t *frame, xlator_t *this)
+gf_boolean_t
+afr_sh_purge_stale_entry_condition (afr_local_t *local, afr_private_t *priv,
+ int child)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int i = 0;
- int call_count = 0;
- afr_self_heal_t *sh = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ sh = &local->self_heal;
+ if (local->child_up[child] &&
+ (!afr_is_child_present (sh->fresh_children, priv->child_count,
+ child))
+ && (sh->child_errno[child] != ENOENT))
+ return _gf_true;
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ return _gf_false;
+}
- call_count = local->child_count;
+void
+afr_sh_purge_entry_common (call_frame_t *frame, xlator_t *this,
+ gf_boolean_t purge_condition (afr_local_t *local,
+ afr_private_t *priv,
+ int child))
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_self_heal_t *sh = NULL;
+ int i = 0;
+ int call_count = 0;
- local->call_count = call_count;
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- gf_log (this->name, GF_LOG_TRACE,
- "unlocking %"PRId64"/%s on subvolume %s",
- sh->parent_loc.inode->ino, local->loc.name,
- priv->children[i]->name);
+ for (i = 0; i < priv->child_count; i++) {
+ if (purge_condition (local, priv, i))
+ call_count++;
+ }
- STACK_WIND (frame, sh_missing_entries_unlck_cbk,
- priv->children[i],
- priv->children[i]->fops->entrylk,
- this->name,
- &sh->parent_loc, local->loc.name,
- ENTRYLK_UNLOCK, ENTRYLK_WRLCK);
+ if (call_count == 0) {
+ sh->post_remove_call (frame, this);
+ goto out;
+ }
- if (!--call_count)
- break;
- }
- }
- return 0;
+ local->call_count = call_count;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!purge_condition (local, priv, i))
+ continue;
+ gf_log (this->name, GF_LOG_INFO, "purging the stale entry %s "
+ "on %s", local->loc.path, priv->children[i]->name);
+ afr_sh_call_entry_expunge_remove (frame, this,
+ (long) i, &sh->buf[i],
+ &sh->parentbufs[i],
+ afr_sh_remove_entry_cbk);
+ }
+out:
+ return;
}
-
-static int
-sh_destroy_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int op_errno, struct stat *stbuf)
+void
+afr_sh_purge_entry (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ sh->post_remove_call = afr_sh_missing_entries_finish;
+
+ afr_sh_purge_entry_common (frame, this, afr_sh_purge_entry_condition);
+}
- int call_count = 0;
+void
+afr_sh_purge_stale_entry (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ sh->post_remove_call = afr_sh_purge_stale_entries_done;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (afr_is_child_present (sh->fresh_children,
+ priv->child_count, i))
+ continue;
+
+ if ((!local->child_up[i]) || sh->child_errno[i] != 0)
+ continue;
+
+ GF_ASSERT (!uuid_is_null (sh->entrybuf.ia_gfid) ||
+ uuid_is_null (sh->buf[i].ia_gfid));
+
+ if ((sh->entrybuf.ia_type != sh->buf[i].ia_type) ||
+ (uuid_compare (sh->buf[i].ia_gfid,
+ sh->entrybuf.ia_gfid)))
+ continue;
+
+ afr_children_add_child (sh->fresh_children, i,
+ priv->child_count);
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- STACK_DESTROY (frame->root);
}
-
- return 0;
+ afr_sh_purge_entry_common (frame, this,
+ afr_sh_purge_stale_entry_condition);
}
+void
+afr_sh_save_child_iatts_from_policy (int32_t *children, struct iatt *bufs,
+ struct iatt *save,
+ unsigned int child_count)
+{
+ int i = 0;
+ int child = 0;
+ gf_boolean_t saved = _gf_false;
+
+ GF_ASSERT (save);
+ //if iatt buf with gfid exists sets it
+ for (i = 0; i < child_count; i++) {
+ child = children[i];
+ if (child == -1)
+ break;
+ *save = bufs[child];
+ saved = _gf_true;
+ if (!uuid_is_null (save->ia_gfid))
+ break;
+ }
+ GF_ASSERT (saved);
+}
-static int
-sh_missing_entries_newentry_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct stat *stbuf)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- call_frame_t *chown_frame = NULL;
- int call_count = 0;
- int child_index = 0;
- struct stat *buf = NULL;
-
- struct timespec ts[2];
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- buf = &sh->buf[sh->source];
- child_index = (long) cookie;
-
-#ifdef HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC
- ts[0] = sh->buf[sh->source].st_atim;
- ts[1] = sh->buf[sh->source].st_mtim;
-
-#elif HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC
- ts[0] = sh->buf[sh->source].st_atimespec;
- ts[1] = sh->buf[sh->source].st_mtimespec;
-#else
- ts[0].tv_sec = sh->buf[sh->source].st_atime;
- ts[1].tv_sec = sh->buf[sh->source].st_mtime;
-#endif
-
- if (op_ret == 0) {
- chown_frame = copy_frame (frame);
-
- chown_frame->local = CALLOC (1, sizeof (afr_local_t));
-
- ((afr_local_t *)chown_frame->local)->call_count = 2;
-
- gf_log (this->name, GF_LOG_TRACE,
- "chown %s to %d %d on subvolume %s",
- local->loc.path, buf->st_uid, buf->st_gid,
- priv->children[child_index]->name);
-
- STACK_WIND (chown_frame, sh_destroy_cbk,
- priv->children[child_index],
- priv->children[child_index]->fops->chown,
- &local->loc,
- buf->st_uid, buf->st_gid);
-
- STACK_WIND (chown_frame, sh_destroy_cbk,
- priv->children[child_index],
- priv->children[child_index]->fops->utimens,
- &local->loc, ts);
-
- }
+void
+afr_get_children_of_fresh_parent_dirs (afr_self_heal_t *sh,
+ unsigned int child_count)
+{
+ afr_children_intersection_get (sh->success_children,
+ sh->fresh_parent_dirs,
+ sh->sources, child_count);
+ afr_get_fresh_children (sh->success_children, sh->sources,
+ sh->fresh_children, child_count);
+ memset (sh->sources, 0, sizeof (*sh->sources) * child_count);
+}
- LOCK (&frame->lock);
- {
- }
- UNLOCK (&frame->lock);
+void
+afr_sh_children_lookup_done (call_frame_t *frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int32_t fresh_child_enoents = 0;
+ int32_t fresh_parent_count = 0;
- call_count = afr_frame_return (frame);
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ if (op_ret < 0)
+ goto fail;
+ afr_get_children_of_fresh_parent_dirs (sh, priv->child_count);
+ fresh_parent_count = afr_get_children_count (sh->fresh_parent_dirs,
+ priv->child_count);
+ //we need the enoent count of the subvols present in fresh_parent_dirs
+ fresh_child_enoents = afr_errno_count (sh->fresh_parent_dirs,
+ sh->child_errno,
+ priv->child_count, ENOENT);
+ if (fresh_child_enoents == fresh_parent_count) {
+ afr_sh_set_error (sh, ENOENT);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_purge_entry (frame, this);
+ } else if (!afr_conflicting_iattrs (sh->buf, sh->fresh_children,
+ priv->child_count, local->loc.path,
+ this->name)) {
+ afr_sh_save_child_iatts_from_policy (sh->fresh_children,
+ sh->buf, &sh->entrybuf,
+ priv->child_count);
+ afr_update_gfid_from_iatts (sh->sh_gfid_req, sh->buf,
+ sh->fresh_children,
+ priv->child_count);
+ afr_sh_purge_stale_entry (frame, this);
+ } else {
+ op_errno = EIO;
+ afr_set_local_for_unhealable (local);
+ goto fail;
+ }
- if (call_count == 0) {
- sh_missing_entries_finish (frame, this);
- }
+ return;
- return 0;
+fail:
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_set_error (sh, op_errno);
+ afr_sh_missing_entries_finish (frame, this);
+ return;
}
+static void
+afr_sh_find_fresh_parents (call_frame_t *frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int enoent_count = 0;
+ int nsources = 0;
+ int source = -1;
+ int32_t subvol_status = 0;
-static int
-sh_missing_entries_mknod (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- int enoent_count = 0;
- int call_count = 0;
- mode_t st_mode = 0;
- dev_t st_dev = 0;
-
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++)
- if (sh->child_errno[i] == ENOENT)
- enoent_count++;
-
- call_count = enoent_count;
- local->call_count = call_count;
-
- st_mode = sh->buf[sh->source].st_mode;
- st_dev = sh->buf[sh->source].st_dev;
-
- gf_log (this->name, GF_LOG_TRACE,
- "mknod %s mode 0%o on %d subvolumes",
- local->loc.path, st_mode, enoent_count);
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->child_errno[i] == ENOENT) {
- STACK_WIND_COOKIE (frame,
- sh_missing_entries_newentry_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->mknod,
- &local->loc, st_mode, st_dev);
- if (!--call_count)
- break;
- }
- }
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ if (op_ret < 0)
+ goto out;
+ enoent_count = afr_errno_count (NULL, sh->child_errno,
+ priv->child_count, ENOENT);
+ if (enoent_count > 0) {
+ gf_log (this->name, GF_LOG_INFO, "Parent dir missing for %s,"
+ " in missing entry self-heal, aborting missing-entry "
+ "self-heal",
+ local->loc.path);
+ afr_sh_missing_entries_finish (frame, this);
+ return;
+ }
- return 0;
-}
+ nsources = afr_build_sources (this, sh->xattr, sh->buf,
+ sh->pending_matrix, sh->sources,
+ sh->success_children,
+ AFR_ENTRY_TRANSACTION, &subvol_status,
+ _gf_true);
+ if ((subvol_status & ALL_FOOLS) ||
+ (subvol_status & SPLIT_BRAIN)) {
+ gf_log (this->name, GF_LOG_INFO, "%s: Performing conservative "
+ "merge", sh->parent_loc.path);
+ afr_mark_success_children_sources (sh->sources,
+ sh->success_children,
+ priv->child_count);
+ } else if (nsources < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "No sources for dir "
+ "of %s, in missing entry self-heal, aborting "
+ "self-heal", local->loc.path);
+ op_errno = EIO;
+ goto out;
+ }
+
+ source = afr_sh_select_source (sh->sources, priv->child_count);
+ if (source == -1) {
+ GF_ASSERT (0);
+ gf_log (this->name, GF_LOG_DEBUG, "No active sources found.");
+ op_errno = EIO;
+ goto out;
+ }
+ afr_get_fresh_children (sh->success_children, sh->sources,
+ sh->fresh_parent_dirs, priv->child_count);
+ afr_sh_common_lookup (frame, this, &local->loc,
+ afr_sh_children_lookup_done, NULL, 0,
+ NULL);
+ return;
+out:
+ afr_sh_set_error (sh, op_errno);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_missing_entries_finish (frame, this);
+ return;
+}
-static int
-sh_missing_entries_mkdir (call_frame_t *frame, xlator_t *this)
+void
+afr_sh_common_reset (afr_self_heal_t *sh, unsigned int child_count)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- int enoent_count = 0;
- int call_count = 0;
- mode_t st_mode = 0;
+ int i = 0;
+ for (i = 0; i < child_count; i++) {
+ memset (&sh->buf[i], 0, sizeof (sh->buf[i]));
+ memset (&sh->parentbufs[i], 0, sizeof (sh->parentbufs[i]));
+ sh->child_errno[i] = 0;
+ }
+ memset (&sh->parentbuf, 0, sizeof (sh->parentbuf));
+ sh->success_count = 0;
+ afr_reset_children (sh->success_children, child_count);
+ afr_reset_children (sh->fresh_children, child_count);
+ afr_reset_xattr (sh->xattr, child_count);
+ loc_wipe (&sh->lookup_loc);
+}
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+/* afr self-heal state will be lost if this call is made
+ * please check the afr_sh_common_reset that is called in this function
+ */
+int
+afr_sh_common_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ afr_lookup_done_cbk_t lookup_done , uuid_t gfid,
+ int32_t flags, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int i = 0;
+ int call_count = 0;
+ afr_private_t *priv = NULL;
+ dict_t *xattr_req = NULL;
+ afr_self_heal_t *sh = NULL;
- for (i = 0; i < priv->child_count; i++)
- if (sh->child_errno[i] == ENOENT)
- enoent_count++;
+ local = frame->local;
+ priv = this->private;
+ sh = &local->self_heal;
- call_count = enoent_count;
- local->call_count = call_count;
+ call_count = afr_up_children_count (local->child_up, priv->child_count);
- st_mode = sh->buf[sh->source].st_mode;
+ local->call_count = call_count;
- gf_log (this->name, GF_LOG_TRACE,
- "mkdir %s mode 0%o on %d subvolumes",
- local->loc.path, st_mode, enoent_count);
+ xattr_req = dict_new();
- for (i = 0; i < priv->child_count; i++) {
- if (sh->child_errno[i] == ENOENT) {
- if (!strcmp (local->loc.path, "/")) {
- /* We shouldn't try to create "/" */
+ if (xattr_req) {
+ afr_xattr_req_prepare (this, xattr_req, loc->path);
+ if (gfid) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "looking up %s with gfid: %s",
+ loc->path, uuid_utoa (gfid));
+ GF_ASSERT (!uuid_is_null (gfid));
+ afr_set_dict_gfid (xattr_req, gfid);
+ }
+ }
- sh_missing_entries_finish (frame, this);
+ afr_sh_common_reset (sh, priv->child_count);
+ sh->lookup_done = lookup_done;
+ loc_copy (&sh->lookup_loc, loc);
+ sh->lookup_flags = flags;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "looking up %s on subvolume %s",
+ loc->path, priv->children[i]->name);
+
+ STACK_WIND_COOKIE (frame,
+ afr_sh_common_lookup_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->lookup,
+ loc, xattr_req);
+
+ if (!--call_count)
+ break;
+ }
+ }
- return 0;
- } else {
- STACK_WIND_COOKIE (frame,
- sh_missing_entries_newentry_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->mkdir,
- &local->loc, st_mode);
- if (!--call_count)
- break;
- }
- }
- }
+ if (xattr_req)
+ dict_unref (xattr_req);
- return 0;
+ return 0;
}
-static int
-sh_missing_entries_symlink (call_frame_t *frame, xlator_t *this,
- const char *link)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- int enoent_count = 0;
- int call_count = 0;
-
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++)
- if (sh->child_errno[i] == ENOENT)
- enoent_count++;
-
- call_count = enoent_count;
- local->call_count = call_count;
-
- gf_log (this->name, GF_LOG_TRACE,
- "symlink %s -> %s on %d subvolumes",
- local->loc.path, link, enoent_count);
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->child_errno[i] == ENOENT) {
- STACK_WIND_COOKIE (frame,
- sh_missing_entries_newentry_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->symlink,
- link, &local->loc);
- if (!--call_count)
- break;
- }
- }
- return 0;
-}
+int
+afr_sh_post_nb_entrylk_missing_entry_sh_cbk (call_frame_t *frame,
+ xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ sh = &local->self_heal;
+
+ if (int_lock->lock_op_ret < 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "Non blocking entrylks failed.");
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_missing_entries_done (frame, this);
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Non blocking entrylks done. Proceeding to FOP");
+ afr_sh_common_lookup (frame, this, &sh->parent_loc,
+ afr_sh_find_fresh_parents,
+ NULL, AFR_LOOKUP_FAIL_CONFLICTS,
+ NULL);
+ }
-static int
-sh_missing_entries_readlink_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- const char *link)
+ return 0;
+}
+
+int
+afr_sh_entrylk (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ char *base_name, afr_lock_cbk_t lock_cbk)
{
- if (op_ret > 0)
- sh_missing_entries_symlink (frame, this, link);
- else
- sh_missing_entries_finish (frame, this);
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- return 0;
+ priv = this->private;
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ int_lock->transaction_lk_type = AFR_SELFHEAL_LK;
+ int_lock->selfheal_lk_type = AFR_ENTRY_SELF_HEAL_LK;
+
+ afr_set_lock_number (frame, this);
+
+ int_lock->lk_basename = base_name;
+ int_lock->lk_loc = loc;
+ int_lock->lock_cbk = lock_cbk;
+ int_lock->domain = this->name;
+
+ int_lock->lockee_count = 0;
+ afr_init_entry_lockee (&int_lock->lockee[0], local, loc,
+ base_name, priv->child_count);
+ int_lock->lockee_count++;
+ afr_nonblocking_entrylk (frame, this);
+
+ return 0;
}
+static int
+afr_self_heal_parent_entrylk (call_frame_t *frame, xlator_t *this,
+ afr_lock_cbk_t lock_cbk)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ int ret = -1;
+ int32_t op_errno = 0;
+
+ local = frame->local;
+ sh = &local->self_heal;
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "attempting to recreate missing entries for path=%s",
+ local->loc.path);
+
+ ret = afr_build_parent_loc (&sh->parent_loc, &local->loc, &op_errno);
+ if (ret)
+ goto out;
+
+ afr_sh_entrylk (frame, this, &sh->parent_loc, NULL,
+ lock_cbk);
+ return 0;
+out:
+ int_lock = &local->internal_lock;
+ int_lock->lock_op_ret = -1;
+ lock_cbk (frame, this);
+ return 0;
+}
static int
-sh_missing_entries_readlink (call_frame_t *frame, xlator_t *this)
+afr_self_heal_missing_entries (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ local = frame->local;
+ sh = &local->self_heal;
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ sh->sh_type_in_action = AFR_SELF_HEAL_GFID_OR_MISSING_ENTRY;
- STACK_WIND (frame, sh_missing_entries_readlink_cbk,
- priv->children[sh->source],
- priv->children[sh->source]->fops->readlink,
- &local->loc, 4096);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED);
- return 0;
+ afr_self_heal_parent_entrylk (frame, this,
+ afr_sh_post_nb_entrylk_missing_entry_sh_cbk);
+ return 0;
}
-
-static int
-sh_missing_entries_create (call_frame_t *frame, xlator_t *this)
+afr_local_t*
+afr_self_heal_local_init (afr_local_t *l, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int type = 0;
- int i = 0;
- afr_private_t *priv = NULL;
- int enoent_count = 0;
- int govinda_gOvinda = 0;
-
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->child_errno[i]) {
- if (sh->child_errno[i] == ENOENT)
- enoent_count++;
- } else {
- if (type) {
- if (type != (sh->buf[i].st_mode & S_IFMT))
- govinda_gOvinda = 1;
- } else {
- sh->source = i;
- type = sh->buf[i].st_mode & S_IFMT;
- }
- }
- }
+ afr_private_t *priv = NULL;
+ afr_local_t *lc = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_self_heal_t *shc = NULL;
+ int ret = 0;
+
+ priv = this->private;
+
+ sh = &l->self_heal;
+
+ lc = mem_get0 (this->local_pool);
+ if (!lc)
+ goto out;
+
+ shc = &lc->self_heal;
+
+ shc->unwind = sh->unwind;
+ shc->gfid_sh_success_cbk = sh->gfid_sh_success_cbk;
+ shc->do_missing_entry_self_heal = sh->do_missing_entry_self_heal;
+ shc->do_gfid_self_heal = sh->do_gfid_self_heal;
+ shc->do_data_self_heal = sh->do_data_self_heal;
+ shc->do_metadata_self_heal = sh->do_metadata_self_heal;
+ shc->do_entry_self_heal = sh->do_entry_self_heal;
+ shc->force_confirm_spb = sh->force_confirm_spb;
+ shc->forced_merge = sh->forced_merge;
+ shc->background = sh->background;
+ shc->type = sh->type;
+ shc->data_sh_info = "";
+ shc->metadata_sh_info = "";
+
+ uuid_copy (shc->sh_gfid_req, sh->sh_gfid_req);
+ if (l->loc.path) {
+ ret = loc_copy (&lc->loc, &l->loc);
+ if (ret < 0)
+ goto out;
+ }
- if (govinda_gOvinda) {
- gf_log (this->name, GF_LOG_ERROR,
- "conflicing filetypes exist for path %s. returning.",
- local->loc.path);
+ lc->child_up = memdup (l->child_up,
+ sizeof (*lc->child_up) * priv->child_count);
+ if (!lc->child_up) {
+ ret = -1;
+ goto out;
+ }
- local->govinda_gOvinda = 1;
- sh_missing_entries_finish (frame, this);
- return 0;
- }
+ if (l->xattr_req)
+ lc->xattr_req = dict_ref (l->xattr_req);
- if (!type) {
- gf_log (this->name, GF_LOG_ERROR,
- "no source found for %s. all nodes down?. returning.",
- local->loc.path);
- /* subvolumes down and/or file does not exist */
- sh_missing_entries_finish (frame, this);
- return 0;
- }
+ if (l->cont.lookup.inode)
+ lc->cont.lookup.inode = inode_ref (l->cont.lookup.inode);
+ if (l->cont.lookup.xattr)
+ lc->cont.lookup.xattr = dict_ref (l->cont.lookup.xattr);
- if (enoent_count == 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "no missing files - %s. proceeding to metadata check",
- local->loc.path);
- /* proceed to next step - metadata self-heal */
- sh_missing_entries_finish (frame, this);
- return 0;
- }
+ lc->internal_lock.locked_nodes =
+ GF_CALLOC (sizeof (*l->internal_lock.locked_nodes),
+ priv->child_count, gf_afr_mt_char);
+ if (!lc->internal_lock.locked_nodes) {
+ ret = -1;
+ goto out;
+ }
- switch (type) {
- case S_IFSOCK:
- case S_IFREG:
- case S_IFBLK:
- case S_IFCHR:
- case S_IFIFO:
- sh_missing_entries_mknod (frame, this);
- break;
- case S_IFLNK:
- sh_missing_entries_readlink (frame, this);
- break;
- case S_IFDIR:
- sh_missing_entries_mkdir (frame, this);
- break;
- default:
- gf_log (this->name, GF_LOG_ERROR,
- "unknown file type: 0%o", type);
- local->govinda_gOvinda = 1;
- sh_missing_entries_finish (frame, this);
- }
+ ret = afr_inodelk_init (&lc->internal_lock.inodelk[0],
+ this->name, priv->child_count);
+ if (ret)
+ goto out;
- return 0;
+out:
+ if (ret) {
+ afr_local_cleanup (lc, this);
+ lc = NULL;
+ }
+ return lc;
}
+int
+afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ afr_self_heal_t * sh = NULL;
+ afr_local_t * orig_frame_local = NULL;
+ afr_self_heal_t * orig_frame_sh = NULL;
+ char sh_type_str[256] = {0,};
+ gf_loglevel_t loglevel = 0;
+
+ priv = this->private;
+ local = bgsh_frame->local;
+ sh = &local->self_heal;
+
+ if (local->unhealable) {
+ afr_set_split_brain (this, sh->inode, SPB, SPB);
+ }
-static int
-sh_missing_entries_lookup_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct stat *buf, dict_t *xattr)
-{
- int child_index = 0;
- afr_local_t *local = NULL;
- int call_count = 0;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
-
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "path %s on subvolume %s is of mode 0%o",
- local->loc.path,
- priv->children[child_index]->name,
- buf->st_mode);
-
- local->self_heal.buf[child_index] = *buf;
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "path %s on subvolume %s => -1 (%s)",
- local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
-
- local->self_heal.child_errno[child_index] = op_errno;
- }
+ afr_self_heal_type_str_get (sh, sh_type_str,
+ sizeof(sh_type_str));
+ if (is_self_heal_failed (sh, AFR_CHECK_ALL) && !priv->shd.iamshd) {
+ loglevel = GF_LOG_ERROR;
+ } else if (!is_self_heal_failed (sh, AFR_CHECK_ALL)) {
+ loglevel = GF_LOG_INFO;
+ } else {
+ loglevel = GF_LOG_DEBUG;
+ }
- }
- UNLOCK (&frame->lock);
+ afr_log_self_heal_completion_status (local, loglevel);
- call_count = afr_frame_return (frame);
+ FRAME_SU_UNDO (bgsh_frame, afr_local_t);
- if (call_count == 0) {
- sh_missing_entries_create (frame, this);
- }
+ if (!sh->unwound && sh->unwind) {
+ orig_frame_local = sh->orig_frame->local;
+ orig_frame_sh = &orig_frame_local->self_heal;
+ orig_frame_sh->actual_sh_started = _gf_true;
+ sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno,
+ is_self_heal_failed (sh, AFR_CHECK_ALL));
+ }
- return 0;
+ if (sh->background) {
+ LOCK (&priv->lock);
+ {
+ priv->background_self_heals_started--;
+ }
+ UNLOCK (&priv->lock);
+ }
+
+ AFR_STACK_DESTROY (bgsh_frame);
+
+ return 0;
}
+int
+afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int32_t op_errno = 0;
+ int ret = 0;
+ afr_self_heal_t *orig_sh = NULL;
+ call_frame_t *sh_frame = NULL;
+ afr_local_t *sh_local = NULL;
+ loc_t *loc = NULL;
-static int
-sh_missing_entries_lookup (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- int i = 0;
- int call_count = 0;
- afr_private_t *priv = NULL;
- dict_t *xattr_req = NULL;
- int ret = -1;
-
- local = frame->local;
- call_count = local->child_count;
- priv = this->private;
-
- local->call_count = call_count;
-
- xattr_req = dict_new();
-
- if (xattr_req) {
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_set_uint64 (xattr_req,
- priv->pending_key[i],
- 3 * sizeof(int32_t));
+ local = frame->local;
+ orig_sh = &local->self_heal;
+ priv = this->private;
+
+ GF_ASSERT (local->loc.path);
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "performing self heal on %s (metadata=%d data=%d entry=%d)",
+ local->loc.path,
+ local->self_heal.do_metadata_self_heal,
+ local->self_heal.do_data_self_heal,
+ local->self_heal.do_entry_self_heal);
+
+ op_errno = ENOMEM;
+ sh_frame = copy_frame (frame);
+ if (!sh_frame)
+ goto out;
+ afr_set_lk_owner (sh_frame, this, sh_frame->root);
+ afr_set_low_priority (sh_frame);
+
+ sh_local = afr_self_heal_local_init (local, this);
+ if (!sh_local)
+ goto out;
+ sh_frame->local = sh_local;
+ sh = &sh_local->self_heal;
+
+ sh->inode = inode_ref (inode);
+ sh->orig_frame = frame;
+
+ sh->completion_cbk = afr_self_heal_completion_cbk;
+
+ sh->success = GF_CALLOC (priv->child_count, sizeof (*sh->success),
+ gf_afr_mt_char);
+ if (!sh->success)
+ goto out;
+ sh->sources = GF_CALLOC (sizeof (*sh->sources), priv->child_count,
+ gf_afr_mt_int);
+ if (!sh->sources)
+ goto out;
+ sh->locked_nodes = GF_CALLOC (sizeof (*sh->locked_nodes),
+ priv->child_count,
+ gf_afr_mt_int);
+ if (!sh->locked_nodes)
+ goto out;
+
+ sh->pending_matrix = afr_matrix_create (priv->child_count,
+ priv->child_count);
+ if (!sh->pending_matrix)
+ goto out;
+
+ sh->delta_matrix = afr_matrix_create (priv->child_count,
+ priv->child_count);
+ if (!sh->delta_matrix)
+ goto out;
+
+ sh->fresh_parent_dirs = afr_children_create (priv->child_count);
+ if (!sh->fresh_parent_dirs)
+ goto out;
+ ret = afr_sh_common_create (sh, priv->child_count);
+ if (ret) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ if (local->self_heal.background) {
+ LOCK (&priv->lock);
+ {
+ if (priv->background_self_heals_started
+ < priv->background_self_heal_count) {
+ priv->background_self_heals_started++;
+
+
+ } else {
+ local->self_heal.background = _gf_false;
+ sh->background = _gf_false;
+ }
}
+ UNLOCK (&priv->lock);
}
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- gf_log (this->name, GF_LOG_TRACE,
- "looking up %s on subvolume %s",
- local->loc.path, priv->children[i]->name);
+ if (!local->loc.parent) {
+ sh->do_missing_entry_self_heal = _gf_false;
+ sh->do_gfid_self_heal = _gf_false;
+ }
- STACK_WIND_COOKIE (frame,
- sh_missing_entries_lookup_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->lookup,
- &local->loc, xattr_req);
+ sh->sh_type_in_action = AFR_SELF_HEAL_INVALID;
- if (!--call_count)
- break;
- }
- }
-
- if (xattr_req)
- dict_unref (xattr_req);
+ FRAME_SU_DO (sh_frame, afr_local_t);
+ if (sh->do_missing_entry_self_heal || sh->do_gfid_self_heal) {
+ afr_self_heal_missing_entries (sh_frame, this);
+ } else {
+ loc = &sh_local->loc;
+ if (uuid_is_null (loc->inode->gfid) && uuid_is_null (loc->gfid)) {
+ if (!uuid_is_null (inode->gfid))
+ GF_ASSERT (!uuid_compare (inode->gfid,
+ sh->sh_gfid_req));
+ uuid_copy (loc->gfid, sh->sh_gfid_req);
+ }
+ gf_log (this->name, GF_LOG_TRACE,
+ "proceeding to metadata check on %s",
+ local->loc.path);
- return 0;
+ afr_sh_missing_entries_done (sh_frame, this);
+ }
+ op_errno = 0;
+
+out:
+ if (op_errno) {
+ orig_sh->unwind (frame, this, -1, op_errno, 1);
+ if (sh_frame)
+ AFR_STACK_DESTROY (sh_frame);
+ }
+ return 0;
}
+void
+afr_self_heal_type_str_get (afr_self_heal_t *self_heal_p, char *str,
+ size_t size)
+{
+ GF_ASSERT (str && (size > strlen (" missing-entry gfid "
+ "meta-data data entry")));
-static int
-sh_missing_entries_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int call_count = 0;
- int child_index = (long) cookie;
-
-
- local = frame->local;
- sh = &local->self_heal;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- sh->op_failed = 1;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "locking inode of %s on child %d failed: %s",
- local->loc.path, child_index,
- strerror (op_errno));
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "inode of %s on child %d locked",
- local->loc.path, child_index);
- }
- }
- UNLOCK (&frame->lock);
+ if (self_heal_p->do_metadata_self_heal) {
+ snprintf (str, size, " meta-data");
+ }
- call_count = afr_frame_return (frame);
+ if (self_heal_p->do_data_self_heal) {
+ snprintf (str + strlen(str), size - strlen(str), " data");
+ }
- if (call_count == 0) {
- if (sh->op_failed == 1) {
- sh_missing_entries_finish (frame, this);
- return 0;
- }
+ if (self_heal_p->do_entry_self_heal) {
+ snprintf (str + strlen(str), size - strlen(str), " entry");
+ }
- sh_missing_entries_lookup (frame, this);
- }
+ if (self_heal_p->do_missing_entry_self_heal) {
+ snprintf (str + strlen(str), size - strlen(str),
+ " missing-entry");
+ }
- return 0;
+ if (self_heal_p->do_gfid_self_heal) {
+ snprintf (str + strlen(str), size - strlen(str), " gfid");
+ }
}
+afr_self_heal_type
+afr_self_heal_type_for_transaction (afr_transaction_type type)
+{
+ afr_self_heal_type sh_type = AFR_SELF_HEAL_INVALID;
+
+ switch (type) {
+ case AFR_DATA_TRANSACTION:
+ sh_type = AFR_SELF_HEAL_DATA;
+ break;
+ case AFR_METADATA_TRANSACTION:
+ sh_type = AFR_SELF_HEAL_METADATA;
+ break;
+ case AFR_ENTRY_TRANSACTION:
+ sh_type = AFR_SELF_HEAL_ENTRY;
+ break;
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ GF_ASSERT (0);
+ break;
+ }
+ return sh_type;
+}
-static int
-afr_self_heal_missing_entries (call_frame_t *frame, xlator_t *this)
+int
+afr_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- int call_count = 0;
+ int ret = -1;
+ uuid_t pargfid = {0};
+ if (!child)
+ goto out;
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ if (!uuid_is_null (parent->inode->gfid))
+ uuid_copy (pargfid, parent->inode->gfid);
+ else if (!uuid_is_null (parent->gfid))
+ uuid_copy (pargfid, parent->gfid);
- gf_log (this->name, GF_LOG_TRACE,
- "attempting to recreate missing entries for path=%s",
- local->loc.path);
+ if (uuid_is_null (pargfid))
+ goto out;
- afr_build_parent_loc (&sh->parent_loc, &local->loc);
+ if (strcmp (parent->path, "/") == 0)
+ ret = gf_asprintf ((char **)&child->path, "/%s", name);
+ else
+ ret = gf_asprintf ((char **)&child->path, "%s/%s", parent->path,
+ name);
- call_count = local->child_count;
+ if (-1 == ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "asprintf failed while setting child path");
+ }
- local->call_count = call_count;
+ child->name = strrchr (child->path, '/');
+ if (child->name)
+ child->name++;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, sh_missing_entries_lk_cbk,
- priv->children[i],
- priv->children[i]->fops->entrylk,
- this->name,
- &sh->parent_loc, local->loc.name,
- ENTRYLK_LOCK_NB, ENTRYLK_WRLCK);
- if (!--call_count)
- break;
- }
- }
+ child->parent = inode_ref (parent->inode);
+ child->inode = inode_new (parent->inode->table);
+ uuid_copy (child->pargfid, pargfid);
- return 0;
+ if (!child->inode) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if ((ret == -1) && child)
+ loc_wipe (child);
+
+ return ret;
+}
+
+int
+afr_sh_erase_pending (call_frame_t *frame, xlator_t *this,
+ afr_transaction_type type, afr_fxattrop_cbk_t cbk,
+ int (*finish)(call_frame_t *frame, xlator_t *this))
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int i = 0;
+ dict_t **erase_xattr = NULL;
+ int ret = -1;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ afr_sh_pending_to_delta (priv, sh->xattr, sh->delta_matrix,
+ sh->success, priv->child_count, type);
+
+ erase_xattr = GF_CALLOC (sizeof (*erase_xattr), priv->child_count,
+ gf_afr_mt_dict_t);
+ if (!erase_xattr)
+ goto out;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (sh->xattr[i]) {
+ call_count++;
+ erase_xattr[i] = dict_new ();
+ if (!erase_xattr[i])
+ goto out;
+ }
+ }
+
+ afr_sh_delta_to_xattr (this, sh->delta_matrix, erase_xattr,
+ priv->child_count, type);
+
+ gf_log (this->name, GF_LOG_DEBUG, "Delta matrix for: %s",
+ lkowner_utoa (&frame->root->lk_owner));
+ afr_sh_print_pending_matrix (sh->delta_matrix, this);
+ local->call_count = call_count;
+ if (call_count == 0) {
+ ret = 0;
+ finish (frame, this);
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!erase_xattr[i])
+ continue;
+
+ if (sh->healing_fd) {//true for ENTRY, reg file DATA transaction
+ STACK_WIND_COOKIE (frame, cbk, (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->fxattrop,
+ sh->healing_fd,
+ GF_XATTROP_ADD_ARRAY, erase_xattr[i],
+ NULL);
+ } else {
+ STACK_WIND_COOKIE (frame, cbk, (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &local->loc,
+ GF_XATTROP_ADD_ARRAY, erase_xattr[i],
+ NULL);
+ }
+ }
+
+ ret = 0;
+out:
+ if (erase_xattr) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (erase_xattr[i]) {
+ dict_unref (erase_xattr[i]);
+ }
+ }
+ }
+
+ GF_FREE (erase_xattr);
+
+ if (ret < 0) {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ finish (frame, this);
+ }
+
+ return 0;
+}
+
+void
+afr_set_self_heal_status(afr_self_heal_t *sh, afr_self_heal_status status)
+{
+ xlator_t *this = NULL;
+ afr_sh_status_for_all_type *sh_status = &(sh->afr_all_sh_status);
+ afr_self_heal_type sh_type_in_action = sh->sh_type_in_action;
+ this = THIS;
+
+ if (!sh) {
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "Null self heal"
+ "Structure");
+ goto out;
+ }
+
+ switch (sh_type_in_action) {
+ case AFR_SELF_HEAL_GFID_OR_MISSING_ENTRY:
+ sh_status->gfid_or_missing_entry_self_heal = status;
+ break;
+ case AFR_SELF_HEAL_METADATA:
+ sh_status->metadata_self_heal = status;
+ break;
+ case AFR_SELF_HEAL_DATA:
+ sh_status->data_self_heal = status;
+ break;
+ case AFR_SELF_HEAL_ENTRY:
+ sh_status->entry_self_heal = status;
+ break;
+ case AFR_SELF_HEAL_INVALID:
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "Invalid"
+ "self heal type in action");
+ break;
+ }
+out:
+ return;
}
+void
+afr_set_local_for_unhealable (afr_local_t *local)
+{
+ afr_self_heal_t *sh = NULL;
+
+ sh = &local->self_heal;
+
+ local->unhealable = 1;
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+}
int
-afr_self_heal (call_frame_t *frame, xlator_t *this,
- int (*completion_cbk) (call_frame_t *, xlator_t *))
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
-
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- gf_log (this->name, GF_LOG_TRACE,
- "performing self heal on %s (metadata=%d data=%d entry=%d)",
- local->loc.path,
- local->need_metadata_self_heal,
- local->need_data_self_heal,
- local->need_entry_self_heal);
-
- sh->completion_cbk = completion_cbk;
-
- sh->buf = CALLOC (priv->child_count, sizeof (struct stat));
- sh->child_errno = CALLOC (priv->child_count, sizeof (int));
- sh->success = CALLOC (priv->child_count, sizeof (int));
- sh->xattr = CALLOC (priv->child_count, sizeof (dict_t *));
- sh->sources = CALLOC (sizeof (*sh->sources), priv->child_count);
-
- sh->pending_matrix = CALLOC (sizeof (int32_t *), priv->child_count);
- for (i = 0; i < priv->child_count; i++) {
- sh->pending_matrix[i] = CALLOC (sizeof (int32_t),
- priv->child_count);
- }
+is_self_heal_failed (afr_self_heal_t *sh, afr_sh_fail_check_type type)
+{
+ afr_sh_status_for_all_type sh_status = sh->afr_all_sh_status;
+ afr_self_heal_type sh_type_in_action = AFR_SELF_HEAL_INVALID;
+ afr_self_heal_status status = AFR_SELF_HEAL_FAILED;
+ xlator_t *this = NULL;
+ int sh_failed = 0;
+
+ this = THIS;
+
+ if (!sh) {
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "Null self heal "
+ "structure");
+ sh_failed = 1;
+ goto out;
+ }
- sh->delta_matrix = CALLOC (sizeof (int32_t *), priv->child_count);
- for (i = 0; i < priv->child_count; i++) {
- sh->delta_matrix[i] = CALLOC (sizeof (int32_t),
- priv->child_count);
- }
+ if (type == AFR_CHECK_ALL) {
+ if ((sh_status.gfid_or_missing_entry_self_heal == AFR_SELF_HEAL_FAILED)
+ || (sh_status.metadata_self_heal == AFR_SELF_HEAL_FAILED)
+ || (sh_status.data_self_heal == AFR_SELF_HEAL_FAILED)
+ || (sh_status.entry_self_heal == AFR_SELF_HEAL_FAILED))
+ sh_failed = 1;
+ } else if (type == AFR_CHECK_SPECIFIC) {
+ sh_type_in_action = sh->sh_type_in_action;
+ switch (sh_type_in_action) {
+ case AFR_SELF_HEAL_GFID_OR_MISSING_ENTRY:
+ status = sh_status.gfid_or_missing_entry_self_heal;
+ break;
+ case AFR_SELF_HEAL_METADATA:
+ status = sh_status.metadata_self_heal;
+ break;
+ case AFR_SELF_HEAL_ENTRY:
+ status = sh_status.entry_self_heal;
+ break;
+ case AFR_SELF_HEAL_DATA:
+ status = sh_status.data_self_heal;
+ break;
+ case AFR_SELF_HEAL_INVALID:
+ status = AFR_SELF_HEAL_NOT_ATTEMPTED;
+ break;
+ }
+ if (status == AFR_SELF_HEAL_FAILED)
+ sh_failed = 1;
- if (local->success_count && local->enoent_count) {
- afr_self_heal_missing_entries (frame, this);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "proceeding to metadata check on %s",
- local->loc.path);
- afr_sh_missing_entries_done (frame, this);
- }
+ }
- return 0;
+out:
+ return sh_failed;
+}
+
+char *
+get_sh_completion_status (afr_self_heal_status status)
+{
+
+ char *not_attempted = " is not attempted";
+ char *failed = " failed";
+ char *started = " is started";
+ char *sync_begin = " is successfully completed";
+ char *result = " has unknown status";
+
+ switch (status)
+ {
+ case AFR_SELF_HEAL_NOT_ATTEMPTED:
+ result = not_attempted;
+ break;
+ case AFR_SELF_HEAL_FAILED:
+ result = failed;
+ break;
+ case AFR_SELF_HEAL_STARTED:
+ result = started;
+ break;
+ case AFR_SELF_HEAL_SYNC_BEGIN:
+ result = sync_begin;
+ break;
+ }
+
+ return result;
+
+}
+
+void
+afr_log_self_heal_completion_status (afr_local_t *local, gf_loglevel_t loglvl)
+{
+
+ char sh_log[4096] = {0};
+ afr_self_heal_t *sh = &local->self_heal;
+ afr_sh_status_for_all_type all_status = sh->afr_all_sh_status;
+ xlator_t *this = NULL;
+ size_t off = 0;
+ int data_sh = 0;
+ int metadata_sh = 0;
+ int print_log = 0;
+
+ this = THIS;
+
+ ADD_FMT_STRING (sh_log, off, "gfid or missing entry",
+ all_status.gfid_or_missing_entry_self_heal, print_log);
+ ADD_FMT_STRING_SYNC (sh_log, off, "metadata",
+ all_status.metadata_self_heal, print_log);
+ if (sh->background) {
+ ADD_FMT_STRING_SYNC (sh_log, off, "backgroung data",
+ all_status.data_self_heal, print_log);
+ } else {
+ ADD_FMT_STRING_SYNC (sh_log, off, "foreground data",
+ all_status.data_self_heal, print_log);
+ }
+ ADD_FMT_STRING_SYNC (sh_log, off, "entry", all_status.entry_self_heal,
+ print_log);
+
+ if (AFR_SELF_HEAL_SYNC_BEGIN == all_status.data_self_heal &&
+ strcmp (sh->data_sh_info, "") && sh->data_sh_info )
+ data_sh = 1;
+ if (AFR_SELF_HEAL_SYNC_BEGIN == all_status.metadata_self_heal &&
+ strcmp (sh->metadata_sh_info, "") && sh->metadata_sh_info)
+ metadata_sh = 1;
+
+ if (!print_log)
+ return;
+
+ gf_log (this->name, loglvl, "%s %s %s on %s", sh_log,
+ ((data_sh == 1) ? sh->data_sh_info : ""),
+ ((metadata_sh == 1) ? sh->metadata_sh_info : ""),
+ local->loc.path);
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.h b/xlators/cluster/afr/src/afr-self-heal-common.h
index a311cdf5e..473264776 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.h
+++ b/xlators/cluster/afr/src/afr-self-heal-common.h
@@ -1,70 +1,144 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __AFR_SELF_HEAL_COMMON_H__
#define __AFR_SELF_HEAL_COMMON_H__
-#define FILE_HAS_HOLES(buf) (((buf)->st_size) > ((buf)->st_blocks * 512))
+#define FILE_HAS_HOLES(buf) (((buf)->ia_size) > ((buf)->ia_blocks * 512))
+#define AFR_SH_MIN_PARTICIPANTS 2
typedef enum {
- AFR_SELF_HEAL_ENTRY,
- AFR_SELF_HEAL_METADATA,
- AFR_SELF_HEAL_DATA,
-} afr_self_heal_type;
+ AFR_LOOKUP_FAIL_CONFLICTS = 1,
+ AFR_LOOKUP_FAIL_MISSING_GFIDS = 2,
+} afr_lookup_flags_t;
int
afr_sh_select_source (int sources[], int child_count);
int
-afr_sh_sink_count (int sources[], int child_count);
-
-int
afr_sh_source_count (int sources[], int child_count);
-int
-afr_sh_supress_errenous_children (int sources[], int child_errno[],
- int child_count);
-
void
afr_sh_print_pending_matrix (int32_t *pending_matrix[], xlator_t *this);
void
-afr_sh_build_pending_matrix (afr_private_t *priv,
- int32_t *pending_matrix[], dict_t *xattr[],
- int child_count, afr_transaction_type type);
+afr_sh_print_split_brain_log (int32_t *pending_matrix[], xlator_t *this,
+ const char *loc);
+
+int
+afr_build_pending_matrix (char **pending_key, int32_t **pending_matrix,
+ unsigned char *ignorant_subvols,
+ dict_t *xattr[], afr_transaction_type type,
+ size_t child_count);
void
afr_sh_pending_to_delta (afr_private_t *priv, dict_t **xattr,
- int32_t *delta_matrix[], int success[],
+ int32_t *delta_matrix[], unsigned char success[],
int child_count, afr_transaction_type type);
int
-afr_sh_mark_sources (afr_self_heal_t *sh, int child_count,
- afr_self_heal_type type);
+afr_mark_sources (xlator_t *this, int32_t *sources, int32_t **pending_matrix,
+ struct iatt *bufs, afr_self_heal_type type,
+ int32_t *success_children, int32_t *subvol_status);
int
-afr_sh_delta_to_xattr (afr_private_t *priv,
+afr_sh_delta_to_xattr (xlator_t *this,
int32_t *delta_matrix[], dict_t *xattr[],
int child_count, afr_transaction_type type);
+void
+afr_self_heal_type_str_get (afr_self_heal_t *self_heal_p, char *str,
+ size_t size);
+
+afr_self_heal_type
+afr_self_heal_type_for_transaction (afr_transaction_type type);
+
int
-afr_sh_is_matrix_zero (int32_t *pending_matrix[], int child_count);
+afr_build_sources (xlator_t *this, dict_t **xattr, struct iatt *bufs,
+ int32_t **pending_matrix, int32_t *sources,
+ int32_t *success_children, afr_transaction_type type,
+ int32_t *subvol_status, gf_boolean_t ignore_ignorant);
+void
+afr_sh_common_reset (afr_self_heal_t *sh, unsigned int child_count);
+void
+afr_sh_common_lookup_resp_handler (call_frame_t *frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf,
+ dict_t *xattr, struct iatt *postparent,
+ loc_t *loc);
+
+int
+afr_sh_common_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ afr_lookup_done_cbk_t lookup_cbk, uuid_t uuid,
+ int32_t flags, dict_t *xdata);
+int
+afr_sh_entry_expunge_remove (call_frame_t *expunge_frame, xlator_t *this,
+ int active_src, struct iatt *buf,
+ struct iatt *parentbuf);
+int
+afr_sh_entrylk (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ char *base_name, afr_lock_cbk_t lock_cbk);
+int
+afr_sh_entry_impunge_create (call_frame_t *impunge_frame, xlator_t *this,
+ int child_index);
+int
+afr_sh_data_unlock (call_frame_t *frame, xlator_t *this, char *dom,
+ afr_lock_cbk_t lock_cbk);
+afr_local_t *
+afr_self_heal_local_init (afr_local_t *l, xlator_t *this);
+int
+afr_sh_data_lock (call_frame_t *frame, xlator_t *this,
+ off_t start, off_t len, gf_boolean_t block, char *dom,
+ afr_lock_cbk_t success_handler,
+ afr_lock_cbk_t failure_handler);
+void
+afr_sh_set_error (afr_self_heal_t *sh, int32_t op_errno);
+void
+afr_sh_mark_source_sinks (call_frame_t *frame, xlator_t *this);
+typedef int
+(*afr_fxattrop_cbk_t) (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xattr, dict_t *xdata);
+int
+afr_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name);
+int
+afr_impunge_frame_create (call_frame_t *frame, xlator_t *this,
+ int active_source, call_frame_t **impunge_frame);
+void
+afr_sh_reset (call_frame_t *frame, xlator_t *this);
+
+void
+afr_children_intersection_get (int32_t *set1, int32_t *set2,
+ int *intersection, unsigned int child_count);
+int
+afr_get_no_xattr_dir_read_child (xlator_t *this, int32_t *success_children,
+ struct iatt *bufs);
+int
+afr_sh_erase_pending (call_frame_t *frame, xlator_t *this,
+ afr_transaction_type type, afr_fxattrop_cbk_t cbk,
+ int (*finish)(call_frame_t *frame, xlator_t *this));
+
+void
+afr_set_local_for_unhealable (afr_local_t *local);
+
+int
+is_self_heal_failed (afr_self_heal_t *sh, afr_sh_fail_check_type type);
+
+void
+afr_set_self_heal_status (afr_self_heal_t *sh, afr_self_heal_status status);
+
+void
+afr_log_self_heal_completion_status (afr_local_t *local, gf_loglevel_t logl);
+char*
+afr_get_pending_matrix_str (int32_t *pending_matrix[], xlator_t *this);
#endif /* __AFR_SELF_HEAL_COMMON_H__ */
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index 06372d47d..9de26ee56 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include <libgen.h>
@@ -47,1044 +38,1717 @@
#include "afr-transaction.h"
#include "afr-self-heal.h"
#include "afr-self-heal-common.h"
+#include "afr-self-heal-algorithm.h"
+
+int
+afr_sh_data_fail (call_frame_t *frame, xlator_t *this);
+static inline gf_boolean_t
+afr_sh_data_proceed (unsigned int success_count)
+{
+ return (success_count >= AFR_SH_MIN_PARTICIPANTS);
+}
+extern int
+sh_loop_finish (call_frame_t *loop_frame, xlator_t *this);
int
-afr_sh_data_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
+afr_post_sh_big_lock_success (call_frame_t *frame, xlator_t *this);
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+int
+afr_post_sh_big_lock_failure (call_frame_t *frame, xlator_t *this);
- /*
- TODO: cleanup sh->*
- */
+int
+afr_sh_data_finish (call_frame_t *frame, xlator_t *this);
- gf_log (this->name, GF_LOG_TRACE,
- "self heal of %s completed",
- local->loc.path);
+int
+afr_sh_data_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
- sh->completion_cbk (frame, this);
+ local = frame->local;
+ sh = &local->self_heal;
+
+ sh->completion_cbk (frame, this);
- return 0;
+ return 0;
}
int
afr_sh_data_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_self_heal_t *sh = NULL;
- int call_count = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int child_index = (long) cookie;
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- }
- UNLOCK (&frame->lock);
+ local = frame->local;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "flush failed on %s on subvolume %s: %s",
+ local->loc.path, priv->children[child_index]->name,
+ strerror (op_errno));
+ }
+ }
+ UNLOCK (&frame->lock);
- call_count = afr_frame_return (frame);
+ call_count = afr_frame_return (frame);
- if (call_count == 0) {
- fd_unref (sh->healing_fd);
- sh->healing_fd = NULL;
- afr_sh_data_done (frame, this);
- }
+ if (call_count == 0) {
+ afr_sh_data_done (frame, this);
+ }
- return 0;
+ return 0;
}
-
int
-afr_sh_data_utimes_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+afr_sh_data_close (call_frame_t *frame, xlator_t *this)
{
- afr_sh_data_flush_cbk (frame, cookie, this, op_ret, op_errno);
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_self_heal_t *sh = NULL;
+ int i = 0;
+ int call_count = 0;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ if (!sh->healing_fd) {
+ //This happens when file is non-reg
+ afr_sh_data_done (frame, this);
+ return 0;
+ }
+ call_count = afr_set_elem_count_get (sh->success,
+ priv->child_count);
+ local->call_count = call_count;
+
+ if (call_count == 0) {
+ afr_sh_data_done (frame, this);
+ return 0;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sh->success[i])
+ continue;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "closing fd of %s on %s",
+ local->loc.path, priv->children[i]->name);
+
+ STACK_WIND_COOKIE (frame, afr_sh_data_flush_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->flush,
+ sh->healing_fd, NULL);
+
+ if (!--call_count)
+ break;
+ }
return 0;
}
-
int
-afr_sh_data_close (call_frame_t *frame, xlator_t *this)
+afr_sh_dom_unlock (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_self_heal_t *sh = NULL;
-
- int i = 0;
- int call_count = 0;
- int source = 0;
- int active_sinks = 0;
-
- struct timespec ts[2];
-
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+
local = frame->local;
sh = &local->self_heal;
priv = this->private;
- source = sh->source;
- active_sinks = sh->active_sinks;
-
-#ifdef HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC
- ts[0] = sh->buf[source].st_atim;
- ts[1] = sh->buf[source].st_mtim;
-
-#elif HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC
- ts[0] = sh->buf[source].st_atimespec;
- ts[1] = sh->buf[source].st_mtimespec;
-#else
- ts[0].tv_sec = sh->buf[source].st_atime;
- ts[1].tv_sec = sh->buf[source].st_mtime;
-#endif
+ if (sh->sh_dom_lock_held)
+ afr_sh_data_unlock (frame, this, priv->sh_domain,
+ afr_sh_data_close);
+ else
+ afr_sh_data_close (frame, this);
+ return 0;
+}
- if (!sh->healing_fd) {
- afr_sh_data_done (frame, this);
- return 0;
- }
+int
+afr_sh_data_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
- call_count = (sh->active_sinks + 1) * 2;
- local->call_count = call_count;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int child_index = (long) cookie;
- /* closed source */
- gf_log (this->name, GF_LOG_TRACE,
- "closing fd of %s on %s",
- local->loc.path, priv->children[sh->source]->name);
+ local = frame->local;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_INFO,
+ "setattr failed on %s on subvolume %s: %s",
+ local->loc.path, priv->children[child_index]->name,
+ strerror (op_errno));
+ }
+ }
+ UNLOCK (&frame->lock);
- STACK_WIND_COOKIE (frame, afr_sh_data_flush_cbk,
- (void *) (long) sh->source,
- priv->children[sh->source],
- priv->children[sh->source]->fops->flush,
- sh->healing_fd);
- call_count--;
+ call_count = afr_frame_return (frame);
- STACK_WIND_COOKIE (frame, afr_sh_data_utimes_cbk,
- (void *) (long) sh->source,
- priv->children[sh->source],
- priv->children[sh->source]->fops->utimens,
- &local->loc, ts);
-
- call_count--;
+ if (call_count == 0) {
+ afr_sh_data_finish (frame, this);
+ }
- for (i = 0; i < priv->child_count; i++) {
- if (sh->sources[i] || !local->child_up[i])
- continue;
+ return 0;
+}
- gf_log (this->name, GF_LOG_TRACE,
- "closing fd of %s on %s",
- local->loc.path, priv->children[i]->name);
+int
+afr_sh_data_setattr (call_frame_t *frame, xlator_t *this, struct iatt* stbuf)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_self_heal_t *sh = NULL;
+ int i = 0;
+ int call_count = 0;
+ int32_t valid = 0;
- STACK_WIND_COOKIE (frame, afr_sh_data_flush_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->flush,
- sh->healing_fd);
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
- call_count--;
+ valid = (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME);
- STACK_WIND_COOKIE (frame, afr_sh_data_utimes_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->utimens,
- &local->loc, ts);
+ call_count = afr_set_elem_count_get (sh->success,
+ priv->child_count);
+ local->call_count = call_count;
- if (!--call_count)
- break;
- }
+ if (call_count == 0) {
+ GF_ASSERT (0);
+ afr_sh_data_finish (frame, this);
+ return 0;
+ }
- return 0;
-}
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sh->success[i])
+ continue;
+ STACK_WIND_COOKIE (frame, afr_sh_data_setattr_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->setattr,
+ &local->loc, stbuf, valid, NULL);
+
+ if (!--call_count)
+ break;
+ }
+
+ return 0;
+}
int
-afr_sh_data_unlck_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+afr_sh_data_setattr_fstat_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ struct iatt *buf, dict_t *xdata)
{
- afr_local_t * local = NULL;
- int call_count = 0;
- int child_index = (long) cookie;
-
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "locking inode of %s on child %d failed: %s",
- local->loc.path, child_index,
- strerror (op_errno));
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "inode of %s on child %d locked",
- local->loc.path, child_index);
- }
- }
- UNLOCK (&frame->lock);
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ int child_index = (long) cookie;
- call_count = afr_frame_return (frame);
+ local = frame->local;
+ sh = &local->self_heal;
+
+ GF_ASSERT (sh->source == child_index);
+ if (op_ret != -1) {
+ sh->buf[child_index] = *buf;
+ afr_sh_data_setattr (frame, this, buf);
+ } else {
+ gf_log (this->name, GF_LOG_ERROR, "%s: Failed to set "
+ "time-stamps after self-heal", local->loc.path);
+ afr_sh_data_fail (frame, this);
+ }
- if (call_count == 0) {
- afr_sh_data_close (frame, this);
- }
+ return 0;
+}
- return 0;
+/*
+ * If there are any writes after the self-heal is triggered then the
+ * stbuf stored in local->self_heal.buf[] will be invalid so we do one more
+ * stat on the source and then set the [am]times
+ */
+int
+afr_sh_set_timestamps (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ STACK_WIND_COOKIE (frame, afr_sh_data_setattr_fstat_cbk,
+ (void *) (long) sh->source,
+ priv->children[sh->source],
+ priv->children[sh->source]->fops->fstat,
+ sh->healing_fd, NULL);
+ return 0;
}
+//Fun fact, lock_cbk is being used for both lock & unlock
+int
+afr_sh_data_unlock (call_frame_t *frame, xlator_t *this, char *dom,
+ afr_lock_cbk_t lock_cbk)
+{
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int ret = 0;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ if (strcmp (dom, this->name) == 0) {
+ sh->data_lock_held = _gf_false;
+ } else if (strcmp (dom, priv->sh_domain) == 0) {
+ sh->sh_dom_lock_held = _gf_false;
+ } else {
+ ret = -1;
+ goto out;
+ }
+ int_lock->lock_cbk = lock_cbk;
+ int_lock->domain = dom;
+ afr_unlock (frame, this);
+
+out:
+ if (ret) {
+ int_lock->lock_op_ret = -1;
+ int_lock->lock_cbk (frame, this);
+ }
+ return 0;
+}
int
-afr_sh_data_unlock (call_frame_t *frame, xlator_t *this)
+afr_sh_data_finish (call_frame_t *frame, xlator_t *this)
{
- struct flock flock;
- int i = 0;
- int call_count = 0;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- afr_self_heal_t * sh = NULL;
+ local = frame->local;
+ sh = &local->self_heal;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "finishing data selfheal of %s", local->loc.path);
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ if (sh->data_lock_held)
+ afr_sh_data_unlock (frame, this, this->name, afr_sh_dom_unlock);
+ else
+ afr_sh_dom_unlock (frame, this);
- call_count = local->child_count;
+ return 0;
+}
- local->call_count = call_count;
+int
+afr_sh_data_fail (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
- flock.l_start = 0;
- flock.l_len = 0;
- flock.l_type = F_UNLCK;
+ local = frame->local;
+ sh = &local->self_heal;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- gf_log (this->name, GF_LOG_TRACE,
- "unlocking %s on subvolume %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_data_unlck_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->inodelk,
- this->name,
- &local->loc, F_SETLK, &flock);
- if (!--call_count)
- break;
- }
- }
+ gf_log (this->name, GF_LOG_DEBUG,
+ "finishing failed data selfheal of %s", local->loc.path);
- return 0;
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_data_finish (frame, this);
+ return 0;
}
-
int
-afr_sh_data_finish (call_frame_t *frame, xlator_t *this)
+afr_sh_data_erase_pending_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xattr, dict_t *xdata)
{
- afr_local_t *local = NULL;
+ int call_count = 0;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int32_t child_index = (long) cookie;
- local = frame->local;
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Erasing of pending change "
+ "log failed on %s for subvol %s, reason: %s",
+ local->loc.path, priv->children[child_index]->name,
+ strerror (op_errno));
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ }
- gf_log (this->name, GF_LOG_TRACE,
- "finishing data selfheal of %s", local->loc.path);
+ call_count = afr_frame_return (frame);
- afr_sh_data_unlock (frame, this);
+ if (call_count == 0) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
+ if (sh->old_loop_frame)
+ sh_loop_finish (sh->old_loop_frame, this);
+ sh->old_loop_frame = NULL;
+ afr_sh_data_fail (frame, this);
+ goto out;
+ }
+ if (!IA_ISREG (sh->type)) {
+ afr_sh_data_finish (frame, this);
+ goto out;
+ }
+ GF_ASSERT (sh->old_loop_frame);
+ afr_sh_data_lock (frame, this, 0, 0, _gf_true, this->name,
+ afr_post_sh_big_lock_success,
+ afr_post_sh_big_lock_failure);
+ }
+out:
+ return 0;
+}
- return 0;
+int
+afr_sh_data_erase_pending (call_frame_t *frame, xlator_t *this)
+{
+ afr_sh_erase_pending (frame, this, AFR_DATA_TRANSACTION,
+ afr_sh_data_erase_pending_cbk,
+ afr_sh_data_finish);
+ return 0;
}
+int
+afr_sh_data_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_self_heal_t *sh = NULL;
+ int call_count = 0;
+ int child_index = (long) cookie;
+
+ local = frame->local;
+ priv = this->private;
+ sh = &local->self_heal;
+
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "%s: Failed to fsync on "
+ "%s - %s", local->loc.path,
+ priv->children[child_index]->name, strerror (op_errno));
+ LOCK (&frame->lock);
+ {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ }
+ UNLOCK (&frame->lock);
+ if (sh->old_loop_frame)
+ sh_loop_finish (sh->old_loop_frame, this);
+ sh->old_loop_frame = NULL;
+ }
+
+ call_count = afr_frame_return (frame);
+ if (call_count == 0) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC))
+ afr_sh_data_fail (frame, this);
+ else
+ afr_sh_data_erase_pending (frame, this);
+ }
+ return 0;
+}
+/*
+ * Before erasing xattrs, make sure the data is written to disk
+ */
int
-afr_sh_data_erase_pending_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xattr)
+afr_sh_data_fsync (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_self_heal_t *sh = NULL;
+ int i = 0;
+ int call_count = 0;
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ local = frame->local;
+ priv = this->private;
+ sh = &local->self_heal;
- LOCK (&frame->lock);
- {
- }
- UNLOCK (&frame->lock);
+ call_count = sh->active_sinks;
+ if (call_count == 0) {
+ afr_sh_data_erase_pending (frame, this);
+ return 0;
+ }
- call_count = afr_frame_return (frame);
+ local->call_count = call_count;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sh->success[i] || sh->sources[i])
+ continue;
- if (call_count == 0)
- afr_sh_data_finish (frame, this);
+ STACK_WIND_COOKIE (frame, afr_sh_data_fsync_cbk,
+ (void *) (long) i, priv->children[i],
+ priv->children[i]->fops->fsync,
+ sh->healing_fd, 1, NULL);
+ }
- return 0;
+ return 0;
}
-
-int
-afr_sh_data_erase_pending (call_frame_t *frame, xlator_t *this)
+static struct afr_sh_algorithm *
+sh_algo_from_name (xlator_t *this, char *name)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int i = 0;
- dict_t **erase_xattr = NULL;
+ int i = 0;
+ if (name == NULL)
+ goto out;
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ while (afr_self_heal_algorithms[i].name) {
+ if (!strcmp (name, afr_self_heal_algorithms[i].name)) {
+ return &afr_self_heal_algorithms[i];
+ }
- afr_sh_pending_to_delta (priv, sh->xattr, sh->delta_matrix, sh->success,
- priv->child_count, AFR_DATA_TRANSACTION);
+ i++;
+ }
- erase_xattr = CALLOC (sizeof (*erase_xattr), priv->child_count);
+out:
+ return NULL;
+}
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i]) {
- call_count++;
- erase_xattr[i] = get_new_dict();
- dict_ref (erase_xattr[i]);
- }
- }
+static int
+sh_zero_byte_files_exist (afr_local_t *local, int child_count)
+{
+ int i = 0;
+ int ret = 0;
+ afr_self_heal_t *sh = NULL;
+
+ sh = &local->self_heal;
+ for (i = 0; i < child_count; i++) {
+ if (!local->child_up[i] || sh->child_errno[i])
+ continue;
+ if (sh->buf[i].ia_size == 0) {
+ ret = 1;
+ break;
+ }
+ }
- afr_sh_delta_to_xattr (priv, sh->delta_matrix, erase_xattr,
- priv->child_count, AFR_DATA_TRANSACTION);
+ return ret;
+}
- local->call_count = call_count;
- for (i = 0; i < priv->child_count; i++) {
- if (!erase_xattr[i])
- continue;
-
- gf_log (this->name, GF_LOG_TRACE,
- "erasing pending flags from %s on %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_data_erase_pending_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->loc,
- GF_XATTROP_ADD_ARRAY, erase_xattr[i]);
- if (!--call_count)
- break;
- }
- for (i = 0; i < priv->child_count; i++) {
- if (erase_xattr[i]) {
- dict_unref (erase_xattr[i]);
- }
- }
- FREE (erase_xattr);
+struct afr_sh_algorithm *
+afr_sh_data_pick_algo (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t * priv = NULL;
+ struct afr_sh_algorithm * algo = NULL;
+ afr_local_t * local = NULL;
+ afr_self_heal_t * sh = NULL;
+
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+ algo = sh_algo_from_name (this, priv->data_self_heal_algorithm);
+
+ if (algo == NULL) {
+ /* option not set, so fall back on heuristics */
+
+ if (sh_zero_byte_files_exist (local, priv->child_count)
+ || (sh->file_size <= (priv->data_self_heal_window_size *
+ this->ctx->page_size))) {
+
+ /*
+ * If the file does not exist on one of the subvolumes,
+ * or a zero-byte file exists (created by entry self-heal)
+ * the entire content has to be copied anyway, so there
+ * is no benefit from using the "diff" algorithm.
+ *
+ * If the file size is about the same as page size,
+ * the entire file can be read and written with a few
+ * (pipelined) STACK_WINDs, which will be faster
+ * than "diff" which has to read checksums and then
+ * read and write.
+ */
+
+ algo = sh_algo_from_name (this, "full");
+
+ } else {
+ algo = sh_algo_from_name (this, "diff");
+ }
+ }
- return 0;
+ return algo;
}
int
+afr_sh_data_sync_prepare (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ struct afr_sh_algorithm *sh_algo = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+
+ sh->algo_completion_cbk = afr_sh_data_fsync;
+ sh->algo_abort_cbk = afr_sh_data_fail;
+
+ sh_algo = afr_sh_data_pick_algo (frame, this);
+
+ sh->algo = sh_algo;
+ sh_algo->fn (frame, this);
+
+ return 0;
+}
+
+int
afr_sh_data_trim_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t *sh = NULL;
- int call_count = 0;
- int child_index = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1)
- gf_log (this->name, GF_LOG_DEBUG,
- "ftruncate of %s on subvolume %s failed (%s)",
- local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- else
- gf_log (this->name, GF_LOG_TRACE,
- "ftruncate of %s on subvolume %s completed",
- local->loc.path,
- priv->children[child_index]->name);
- }
- UNLOCK (&frame->lock);
+ int call_count = 0;
+ int child_index = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
- call_count = afr_frame_return (frame);
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
- if (call_count == 0) {
- afr_sh_data_erase_pending (frame, this);
- }
+ child_index = (long) cookie;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "ftruncate of %s on subvolume %s failed (%s)",
+ local->loc.path,
+ priv->children[child_index]->name,
+ strerror (op_errno));
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "ftruncate of %s on subvolume %s completed",
+ local->loc.path,
+ priv->children[child_index]->name);
+ }
+ }
+ UNLOCK (&frame->lock);
- return 0;
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC))
+ afr_sh_data_fail (frame, this);
+ else
+ afr_sh_data_sync_prepare (frame, this);
+ }
+
+ return 0;
}
int
afr_sh_data_trim_sinks (call_frame_t *frame, xlator_t *this)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t *sh = NULL;
- int *sources = NULL;
- int call_count = 0;
- int i = 0;
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ afr_self_heal_t *sh = NULL;
+ int *sources = NULL;
+ int call_count = 0;
+ int i = 0;
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
- sources = sh->sources;
- call_count = sh->active_sinks;
+ sources = sh->sources;
+ call_count = sh->active_sinks;
- local->call_count = call_count;
+ local->call_count = call_count;
- for (i = 0; i < priv->child_count; i++) {
- if (sources[i] || !local->child_up[i])
- continue;
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i] || !local->child_up[i])
+ continue;
- STACK_WIND_COOKIE (frame, afr_sh_data_trim_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->ftruncate,
- sh->healing_fd, sh->file_size);
+ STACK_WIND_COOKIE (frame, afr_sh_data_trim_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->ftruncate,
+ sh->healing_fd, sh->file_size,
+ NULL);
- if (!--call_count)
- break;
- }
+ if (!--call_count)
+ break;
+ }
- return 0;
+ return 0;
}
-
int
-afr_sh_data_read_write_iter (call_frame_t *frame, xlator_t *this);
-
-int
-afr_sh_data_write_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+afr_sh_inode_set_read_ctx (afr_self_heal_t *sh, xlator_t *this)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t *sh = NULL;
-
- int child_index = (long) cookie;
- int call_count = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- gf_log (this->name, GF_LOG_TRACE,
- "wrote %d bytes of data from %s to child %d, offset %"PRId64"",
- op_ret, local->loc.path, child_index, sh->offset - op_ret);
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "write to %s failed on subvolume %s (%s)",
- local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- sh->op_failed = 1;
- }
- }
- UNLOCK (&frame->lock);
+ afr_private_t *priv = NULL;
+ int ret = 0;
+ int i = 0;
+
+ priv = this->private;
+ sh->source = afr_sh_select_source (sh->sources, priv->child_count);
+ if (sh->source < 0) {
+ ret = -1;
+ goto out;
+ }
- call_count = afr_frame_return (frame);
+ /* detect changes not visible through pending flags -- JIC */
+ for (i = 0; i < priv->child_count; i++) {
+ if (i == sh->source || sh->child_errno[i])
+ continue;
- if (call_count == 0) {
- afr_sh_data_read_write_iter (frame, this);
- }
+ if (SIZE_DIFFERS (&sh->buf[i], &sh->buf[sh->source]))
+ sh->sources[i] = 0;
+ }
- return 0;
+ afr_reset_children (sh->fresh_children, priv->child_count);
+ afr_get_fresh_children (sh->success_children, sh->sources,
+ sh->fresh_children, priv->child_count);
+ afr_inode_set_read_ctx (this, sh->inode, sh->source,
+ sh->fresh_children);
+out:
+ return ret;
}
-
-int
-afr_sh_data_read_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct iovec *vector, int32_t count, struct stat *buf,
- struct iobref *iobref)
+char*
+afr_get_sizes_str (afr_local_t *local, struct iatt *bufs, xlator_t *this)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ char num[1024] = {0};
+ size_t len = 0;
+ char *sizes_str = NULL;
+ size_t off = 0;
+ char *fmt_str = "%llu bytes on %s, ";
+ char *child_down = " %s,";
+ char *child_unknown = " %s,";
+ int down_child_present = 0;
+ int down_count = 0;
+ int unknown_count = 0;
+ int unknown_child_present = 0;
+ char *down_subvol_1 = " down subvolume is ";
+ char *unknown_subvol_1 = " unknown subvolume is ";
+ char *down_subvol_2 = " down subvolumes are ";
+ char *unknown_subvol_2 = " unknown subvolumes are ";
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i] == 1) {
+ len += snprintf (num, sizeof (num), fmt_str,
+ (unsigned long long) bufs[i].ia_size,
+ priv->children[i]->name);
+ } else if (local->child_up[i] == 0) {
+ len += snprintf (num, sizeof (num), child_down,
+ priv->children[i]->name);
+ if (!down_child_present)
+ down_child_present = 1;
+ down_count ++;
+ } else if (local->child_up[i] == -1) {
+ len += snprintf (num, sizeof (num), child_unknown,
+ priv->children[i]->name);
+ if (!unknown_child_present)
+ unknown_child_present = 1;
+ unknown_count++;
+ }
- int child_index = (long) cookie;
- int i = 0;
- int call_count = 0;
+ }
- off_t offset;
+ if (down_child_present) {
+ if (down_count > 1)
+ len += snprintf (num, sizeof (num), "%s",
+ down_subvol_2);
+ else
+ len += snprintf (num, sizeof (num), "%s",
+ down_subvol_1);
+ }
+ if (unknown_child_present) {
+ if (unknown_count > 1)
+ len += snprintf (num, sizeof (num), "%s",
+ unknown_subvol_2);
+ else
+ len += snprintf (num, sizeof (num), "%s",
+ unknown_subvol_1);
+ }
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
+ len++;//for '\0'
- call_count = sh->active_sinks;
+ sizes_str = GF_CALLOC (len, sizeof (char), gf_common_mt_char);
- local->call_count = call_count;
+ if (!sizes_str)
+ return NULL;
- gf_log (this->name, GF_LOG_TRACE,
- "read %d bytes of data from %s on child %d, offset %"PRId64"",
- op_ret, local->loc.path, child_index, sh->offset);
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i] == 1) {
+ off += snprintf (sizes_str + off, len - off, fmt_str,
+ (unsigned long long) bufs[i].ia_size,
+ priv->children[i]->name);
+ }
+ }
- if (op_ret <= 0) {
- afr_sh_data_trim_sinks (frame, this);
- return 0;
- }
+ if (down_child_present) {
+ if (down_count > 1) {
+ off += snprintf (sizes_str + off, len - off, "%s",
+ down_subvol_2);
+ } else {
+ off += snprintf (sizes_str + off, len - off, "%s",
+ down_subvol_1);
+ }
+ }
- /* what if we read less than block size? */
- offset = sh->offset;
- sh->offset += op_ret;
-
- if (sh->file_has_holes) {
- if (iov_0filled (vector, count) == 0) {
- /* the iter function depends on the
- sh->offset already being updated
- above
- */
- afr_sh_data_read_write_iter (frame, this);
- goto out;
- }
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i] == 0) {
+ off += snprintf (sizes_str + off, len - off, child_down,
+ priv->children[i]->name);
+ }
+ }
- for (i = 0; i < priv->child_count; i++) {
- if (sh->sources[i] || !local->child_up[i])
- continue;
-
- /* this is a sink, so write to it */
- STACK_WIND_COOKIE (frame, afr_sh_data_write_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->writev,
- sh->healing_fd, vector, count, offset,
- iobref);
-
- if (!--call_count)
- break;
- }
+ if (unknown_child_present) {
+ if (unknown_count > 1) {
+ off += snprintf (sizes_str + off, len - off, "%s",
+ unknown_subvol_2);
+ } else {
+ off += snprintf (sizes_str + off, len - off, "%s",
+ unknown_subvol_1);
+ }
+ }
-out:
- return 0;
-}
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i] == -1) {
+ off += snprintf (sizes_str + off, len - off,
+ child_unknown,
+ priv->children[i]->name);
+ }
+ }
-int
-afr_sh_data_read_write (call_frame_t *frame, xlator_t *this)
+ return sizes_str;
+}
+
+char*
+afr_get_sinks_str (xlator_t *this, afr_local_t *local, afr_self_heal_t *sh)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t *sh = NULL;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- STACK_WIND_COOKIE (frame, afr_sh_data_read_cbk,
- (void *) (long) sh->source,
- priv->children[sh->source],
- priv->children[sh->source]->fops->readv,
- sh->healing_fd, sh->block_size,
- sh->offset);
-
- return 0;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ char num[1024] = {0};
+ size_t len = 0;
+ char *sinks_str = NULL;
+ char *temp_str = " to sinks ";
+ char *str_format = " %s,";
+ char off = 0;
+
+ priv = this->private;
+
+ len += snprintf (num, sizeof (num), "%s", temp_str);
+ for (i = 0; i < priv->child_count; i++) {
+ if ((sh->sources[i] == 0) && (local->child_up[i] == 1)) {
+ len += snprintf (num, sizeof (num), str_format,
+ priv->children[i]->name);
+ }
+ }
+
+ len ++;
+
+ sinks_str = GF_CALLOC (len, sizeof (char), gf_common_mt_char);
+
+ if (!sinks_str)
+ return NULL;
+
+ off += snprintf (sinks_str + off, len - off, "%s", temp_str);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if ((sh->sources[i] == 0) && (local->child_up[i] == 1)) {
+ off += snprintf (sinks_str + off, len - off,
+ str_format,
+ priv->children[i]->name);
+ }
+ }
+
+ return sinks_str;
+
}
-int
-afr_sh_data_read_write_iter (call_frame_t *frame, xlator_t *this)
+void
+afr_set_data_sh_info_str (afr_local_t *local, afr_self_heal_t *sh, xlator_t *this)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t *sh = NULL;
+ char *pending_matrix_str = NULL;
+ char *sizes_str = NULL;
+ char *sinks_str = NULL;
+ afr_private_t *priv = NULL;
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
+ priv = this->private;
- if (sh->op_failed) {
- afr_sh_data_finish (frame, this);
- goto out;
- }
+ pending_matrix_str = afr_get_pending_matrix_str (sh->pending_matrix,
+ this);
+ if (!pending_matrix_str)
+ pending_matrix_str = "";
- if (sh->offset >= sh->file_size) {
- gf_log (this->name, GF_LOG_TRACE,
- "closing fd's of %s",
- local->loc.path);
- afr_sh_data_trim_sinks (frame, this);
+ sizes_str = afr_get_sizes_str (local, sh->buf, this);
+ if (!sizes_str)
+ sizes_str = "";
- goto out;
- }
+ sinks_str = afr_get_sinks_str (this, local, sh);
+ if (!sinks_str)
+ sinks_str = "";
- afr_sh_data_read_write (frame, this);
+ gf_asprintf (&sh->data_sh_info, " data self heal from %s %s with "
+ "%s data %s", priv->children[sh->source]->name, sinks_str,
+ sizes_str, pending_matrix_str);
-out:
- return 0;
+ if (pending_matrix_str && strcmp (pending_matrix_str, ""))
+ GF_FREE (pending_matrix_str);
+
+ if (sizes_str && strcmp (sizes_str, ""))
+ GF_FREE (sizes_str);
}
+void
+afr_sh_data_fix (call_frame_t *frame, xlator_t *this)
+{
+ int source = 0;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ source = sh->source;
+ sh->block_size = this->ctx->page_size;
+ sh->file_size = sh->buf[source].ia_size;
+
+ if (FILE_HAS_HOLES (&sh->buf[source]))
+ sh->file_has_holes = 1;
+
+ if (sh->background && sh->unwind && !sh->unwound) {
+ sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno,
+ is_self_heal_failed (sh, AFR_CHECK_SPECIFIC));
+ sh->unwound = _gf_true;
+ }
+
+ afr_sh_mark_source_sinks (frame, this);
+ if (sh->active_sinks == 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "no active sinks for performing self-heal on file %s",
+ local->loc.path);
+ afr_sh_data_finish (frame, this);
+ return;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "self-healing file %s from subvolume %s to %d other",
+ local->loc.path, priv->children[sh->source]->name,
+ sh->active_sinks);
+
+ sh->actual_sh_started = _gf_true;
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_SYNC_BEGIN);
+ afr_sh_data_trim_sinks (frame, this);
+}
int
-afr_sh_data_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+afr_sh_data_fxattrop_fstat_done (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int child_index = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- child_index = (long) cookie;
-
- /* TODO: some of the open's might fail.
- In that case, modify cleanup fn to send flush on those
- fd's which are already open */
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_TRACE,
- "open of %s failed on child %s (%s)",
- local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- sh->op_failed = 1;
- }
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int nsources = 0;
+ int ret = 0;
+ int *old_sources = NULL;
+ int tstamp_source = 0;
+ int i = 0;
- }
- UNLOCK (&frame->lock);
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ gf_log (this->name, GF_LOG_DEBUG, "Pending matrix for: %s",
+ lkowner_utoa (&frame->root->lk_owner));
+ if (sh->sync_done) {
+ //store sources before sync so that mtime can be set using the
+ //iatt buf from one of them.
+ old_sources = alloca (priv->child_count*sizeof (*old_sources));
+ memcpy (old_sources, sh->sources,
+ priv->child_count * sizeof (*old_sources));
+ }
- call_count = afr_frame_return (frame);
+ nsources = afr_build_sources (this, sh->xattr, sh->buf, sh->pending_matrix,
+ sh->sources, sh->success_children,
+ AFR_DATA_TRANSACTION, NULL, _gf_true);
+ if ((nsources == -1)
+ && (priv->favorite_child != -1)
+ && (sh->child_errno[priv->favorite_child] == 0)) {
- if (call_count == 0) {
- if (sh->op_failed) {
- afr_sh_data_finish (frame, this);
- return 0;
- }
- gf_log (this->name, GF_LOG_TRACE,
- "fd for %s opened, commencing sync",
- local->loc.path);
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Picking favorite child %s as authentic source to "
+ "resolve conflicting data of %s",
+ priv->children[priv->favorite_child]->name,
+ local->loc.path);
- gf_log (this->name, GF_LOG_TRACE,
- "sourcing file %s from %s to other sinks",
- local->loc.path, priv->children[sh->source]->name);
+ sh->sources[priv->favorite_child] = 1;
- afr_sh_data_read_write (frame, this);
- }
+ nsources = afr_sh_source_count (sh->sources,
+ priv->child_count);
+ }
- return 0;
-}
+ if (nsources == -1) {
+ afr_sh_print_split_brain_log (sh->pending_matrix, this,
+ local->loc.path);
+ afr_set_split_brain (this, sh->inode, DONT_KNOW, SPB);
+
+ afr_sh_data_fail (frame, this);
+ return 0;
+ }
+
+ afr_set_split_brain (this, sh->inode, DONT_KNOW, NO_SPB);
+
+ ret = afr_sh_inode_set_read_ctx (sh, this);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "No active sources found.");
+
+ afr_sh_data_fail (frame, this);
+ return 0;
+ }
+ if (sh->sync_done) {
+ /* Perform setattr from one of the old_sources if possible
+ * Because only they have the correct mtime, the new sources
+ * (i.e. old sinks) have mtime from last writev in sync.
+ */
+ tstamp_source = sh->source;
+ for (i = 0; i < priv->child_count; i++) {
+ if (old_sources[i] && sh->sources[i])
+ tstamp_source = i;
+ }
+ afr_sh_data_setattr (frame, this, &sh->buf[tstamp_source]);
+ } else {
+ afr_set_data_sh_info_str (local, sh, this);
+ if (nsources == 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "No self-heal needed for %s",
+ local->loc.path);
+
+ afr_sh_data_finish (frame, this);
+ return 0;
+ }
+
+ if (sh->do_data_self_heal &&
+ afr_data_self_heal_enabled (priv->data_self_heal))
+ afr_sh_data_fix (frame, this);
+ else
+ afr_sh_data_finish (frame, this);
+ }
+ return 0;
+}
int
-afr_sh_data_open (call_frame_t *frame, xlator_t *this)
+afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,
+ dict_t **xattr,
+ afr_transaction_type txn_type,
+ uuid_t gfid)
{
- int i = 0;
- int call_count = 0;
-
- int source = -1;
- int *sources = NULL;
+ afr_private_t *priv = NULL;
+ int read_child = -1;
+ int32_t **pending_matrix = NULL;
+ int32_t *sources = NULL;
+ int32_t *success_children = NULL;
+ struct iatt *bufs = NULL;
+ int32_t nsources = 0;
+ int32_t prev_read_child = -1;
+ int32_t config_read_child = -1;
+ int32_t subvol_status = 0;
+
+ priv = this->private;
+ bufs = local->cont.lookup.bufs;
+ success_children = local->cont.lookup.success_children;
+
+ pending_matrix = local->cont.lookup.pending_matrix;
+ sources = local->cont.lookup.sources;
+ memset (sources, 0, sizeof (*sources) * priv->child_count);
+
+ nsources = afr_build_sources (this, xattr, bufs, pending_matrix,
+ sources, success_children, txn_type,
+ &subvol_status, _gf_false);
+ if (subvol_status & SPLIT_BRAIN) {
+ gf_log (this->name, GF_LOG_DEBUG, "%s: Possible split-brain",
+ local->loc.path);
+ switch (txn_type) {
+ case AFR_DATA_TRANSACTION:
+ local->cont.lookup.possible_spb = _gf_true;
+ nsources = 1;
+ sources[success_children[0]] = 1;
+ break;
+ case AFR_ENTRY_TRANSACTION:
+ read_child = afr_get_no_xattr_dir_read_child (this,
+ success_children,
+ bufs);
+ sources[read_child] = 1;
+ nsources = 1;
+ break;
+ default:
+ break;
+ }
+ }
+ if (nsources < 0)
+ goto out;
+
+ prev_read_child = local->read_child_index;
+ config_read_child = priv->read_child;
+ read_child = afr_select_read_child_from_policy (success_children,
+ priv->child_count,
+ prev_read_child,
+ config_read_child,
+ sources,
+ priv->hash_mode, gfid);
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "returning read_child: %d",
+ read_child);
+ return read_child;
+}
- fd_t *fd = NULL;
+int
+afr_sh_data_fstat_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ struct iatt *buf, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ int call_count = -1;
+ int child_index = (long) cookie;
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- afr_self_heal_t *sh = NULL;
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret != -1) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "fstat of %s on %s succeeded",
+ local->loc.path,
+ priv->children[child_index]->name);
+
+ sh->buf[child_index] = *buf;
+ sh->success_children[sh->success_count] = child_index;
+ sh->success_count++;
+ } else {
+ gf_log (this->name, GF_LOG_ERROR, "%s: fstat failed "
+ "on %s, reason %s", local->loc.path,
+ priv->children[child_index]->name,
+ strerror (op_errno));
+ sh->child_errno[child_index] = op_errno;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ /* Previous versions of glusterfs might have set
+ * the pending data xattrs which need to be erased
+ */
+ if (!afr_sh_data_proceed (sh->success_count)) {
+ gf_log (this->name, GF_LOG_ERROR, "inspecting metadata "
+ "succeeded on < %d children, aborting "
+ "self-heal for %s", AFR_SH_MIN_PARTICIPANTS,
+ local->loc.path);
+ afr_sh_data_fail (frame, this);
+ goto out;
+ }
+ afr_sh_data_fxattrop_fstat_done (frame, this);
+ }
+out:
+ return 0;
+}
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
- call_count = sh->active_sinks + 1;
- local->call_count = call_count;
+int
+afr_sh_data_fstat (call_frame_t *frame, xlator_t *this)
+{
+ afr_self_heal_t *sh = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int i = 0;
+ int child = 0;
+ int32_t *fstat_children = NULL;
- fd = fd_create (local->loc.inode, frame->root->pid);
- sh->healing_fd = fd;
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
- source = local->self_heal.source;
- sources = local->self_heal.sources;
+ fstat_children = memdup (sh->success_children,
+ sizeof (*fstat_children) * priv->child_count);
+ if (!fstat_children) {
+ afr_sh_data_fail (frame, this);
+ goto out;
+ }
+ call_count = sh->success_count;
+ local->call_count = call_count;
+
+ memset (sh->buf, 0, sizeof (*sh->buf) * priv->child_count);
+ afr_reset_children (sh->success_children, priv->child_count);
+ sh->success_count = 0;
+ for (i = 0; i < priv->child_count; i++) {
+ child = fstat_children[i];
+ if (child == -1)
+ break;
+ STACK_WIND_COOKIE (frame, afr_sh_data_fstat_cbk,
+ (void *) (long) child,
+ priv->children[child],
+ priv->children[child]->fops->fstat,
+ sh->healing_fd, NULL);
+ --call_count;
+ }
+ GF_ASSERT (!call_count);
+out:
+ GF_FREE (fstat_children);
+ return 0;
+}
- sh->block_size = 65536;
- sh->file_size = sh->buf[source].st_size;
+void
+afr_sh_common_fxattrop_resp_handler (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xattr)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ int child_index = (long) cookie;
- if (FILE_HAS_HOLES (&sh->buf[source]))
- sh->file_has_holes = 1;
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret != -1) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "fxattrop of %s on %s succeeded",
+ local->loc.path,
+ priv->children[child_index]->name);
+
+ sh->xattr[child_index] = dict_ref (xattr);
+ sh->success_children[sh->success_count] = child_index;
+ sh->success_count++;
+ } else {
+ gf_log (this->name, GF_LOG_ERROR, "fxattrop of %s "
+ "failed on %s, reason %s", local->loc.path,
+ priv->children[child_index]->name,
+ strerror (op_errno));
+ sh->child_errno[child_index] = op_errno;
+ }
+ }
+ UNLOCK (&frame->lock);
+}
- /* open source */
- STACK_WIND_COOKIE (frame, afr_sh_data_open_cbk,
- (void *) (long) source,
- priv->children[source],
- priv->children[source]->fops->open,
- &local->loc, O_RDONLY|O_LARGEFILE, fd);
- call_count--;
+int
+afr_sh_data_fxattrop_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xattr, dict_t *xdata)
+{
+ int call_count = -1;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
- /* open sinks */
- for (i = 0; i < priv->child_count; i++) {
- if(sources[i] || !local->child_up[i])
- continue;
-
- STACK_WIND_COOKIE (frame, afr_sh_data_open_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->open,
- &local->loc,
- O_WRONLY|O_LARGEFILE, fd);
-
- if (!--call_count)
- break;
- }
+ local = frame->local;
+ sh = &local->self_heal;
- return 0;
+ afr_sh_common_fxattrop_resp_handler (frame, cookie, this, op_ret,
+ op_errno, xattr);
+
+ call_count = afr_frame_return (frame);
+ if (call_count == 0) {
+ if (!afr_sh_data_proceed (sh->success_count)) {
+ gf_log (this->name, GF_LOG_ERROR, "%s, inspecting "
+ "change log succeeded on < %d children",
+ local->loc.path, AFR_SH_MIN_PARTICIPANTS);
+ afr_sh_data_fail (frame, this);
+ goto out;
+ }
+ afr_sh_data_fstat (frame, this);
+ }
+out:
+ return 0;
}
int
-afr_sh_data_sync_prepare (call_frame_t *frame, xlator_t *this)
+afr_sh_data_fxattrop (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int active_sinks = 0;
- int source = 0;
- int i = 0;
+ afr_self_heal_t *sh = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ dict_t **xattr_req;
+ int32_t *zero_pending = NULL;
+ int call_count = 0;
+ int i = 0;
+ int ret = 0;
+ int j;
+
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ call_count = afr_up_children_count (local->child_up,
+ priv->child_count);
- source = sh->source;
+ local->call_count = call_count;
+
+ xattr_req = GF_CALLOC(priv->child_count, sizeof(struct dict_t *),
+ gf_afr_mt_dict_t);
+ if (!xattr_req)
+ goto out;
for (i = 0; i < priv->child_count; i++) {
- if (sh->sources[i] == 0 && local->child_up[i] == 1) {
- active_sinks++;
- sh->success[i] = 1;
+ xattr_req[i] = dict_new();
+ if (!xattr_req[i]) {
+ ret = -1;
+ goto out;
}
}
- sh->success[source] = 1;
-
- if (active_sinks == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "no active sinks for performing self-heal on file %s",
- local->loc.path);
- afr_sh_data_finish (frame, this);
- return 0;
+
+ for (i = 0; i < priv->child_count; i++) {
+ for (j = 0; j < priv->child_count; j++) {
+ zero_pending = GF_CALLOC (3, sizeof (*zero_pending),
+ gf_afr_mt_int32_t);
+ if (!zero_pending) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_dynptr (xattr_req[i], priv->pending_key[j],
+ zero_pending,
+ 3 * sizeof (*zero_pending));
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Unable to set dict value");
+ goto out;
+ } else {
+ zero_pending = NULL;
+ }
+ }
}
- sh->active_sinks = active_sinks;
- gf_log (this->name, GF_LOG_DEBUG,
- "self-healing file %s from subvolume %s to %d other",
- local->loc.path, priv->children[source]->name, active_sinks);
+ afr_reset_xattr (sh->xattr, priv->child_count);
+ afr_reset_children (sh->success_children, priv->child_count);
+ memset (sh->child_errno, 0,
+ sizeof (*sh->child_errno) * priv->child_count);
+ sh->success_count = 0;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_sh_data_fxattrop_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->fxattrop,
+ sh->healing_fd, GF_XATTROP_ADD_ARRAY,
+ xattr_req[i], NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+out:
+ if (xattr_req) {
+ for (i = 0; i < priv->child_count; i++)
+ if (xattr_req[i])
+ dict_unref(xattr_req[i]);
+ GF_FREE(xattr_req);
+ }
- afr_sh_data_open (frame, this);
+ if (ret) {
+ GF_FREE (zero_pending);
+ afr_sh_data_fail (frame, this);
+ }
- return 0;
+ return 0;
}
+int
+afr_sh_data_big_lock_success (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+
+ sh->data_lock_held = _gf_true;
+ afr_sh_data_fxattrop (frame, this);
+ return 0;
+}
int
-afr_sh_data_fix (call_frame_t *frame, xlator_t *this)
+afr_sh_dom_lock_success (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int nsources = 0;
- int source = 0;
- int i = 0;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
- afr_sh_build_pending_matrix (priv, sh->pending_matrix, sh->xattr,
- priv->child_count, AFR_DATA_TRANSACTION);
+ sh->sh_dom_lock_held = _gf_true;
+ afr_sh_data_lock (frame, this, 0, 0, _gf_true, this->name,
+ afr_sh_data_big_lock_success,
+ afr_sh_data_fail);
+ return 0;
+}
- afr_sh_print_pending_matrix (sh->pending_matrix, this);
+int
+afr_sh_data_post_blocking_inodelk_cbk (call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
- nsources = afr_sh_mark_sources (sh, priv->child_count,
- AFR_SELF_HEAL_DATA);
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ sh = &local->self_heal;
- afr_sh_supress_errenous_children (sh->sources, sh->child_errno,
- priv->child_count);
+ if (int_lock->lock_op_ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Blocking data inodelks "
+ "failed for %s. by %s",
+ local->loc.path, lkowner_utoa (&frame->root->lk_owner));
- if (nsources == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "No self-heal needed for %s",
- local->loc.path);
+ sh->data_lock_failure_handler (frame, this);
+ } else {
- afr_sh_data_finish (frame, this);
- return 0;
+ gf_log (this->name, GF_LOG_DEBUG, "Blocking data inodelks "
+ "done for %s by %s. Proceding to self-heal",
+ local->loc.path, lkowner_utoa (&frame->root->lk_owner));
+
+ sh->data_lock_success_handler (frame, this);
}
- if ((nsources == -1)
- && (priv->favorite_child != -1)
- && (sh->child_errno[priv->favorite_child] == 0)) {
+ return 0;
+}
- gf_log (this->name, GF_LOG_DEBUG,
- "Picking favorite child %s as authentic source to resolve conflicting data of %s",
- priv->children[priv->favorite_child]->name,
- local->loc.path);
+int
+afr_sh_data_post_nonblocking_inodelk_cbk (call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
- sh->sources[priv->favorite_child] = 1;
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ sh = &local->self_heal;
- nsources = afr_sh_source_count (sh->sources,
- priv->child_count);
- }
+ if (int_lock->lock_op_ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG, "Non Blocking data inodelks "
+ "failed for %s. by %s",
+ local->loc.path, lkowner_utoa (&frame->root->lk_owner));
- if (nsources == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "Unable to self-heal contents of '%s' (possible split-brain). "
- "Please delete the file from all but the preferred "
- "subvolume.", local->loc.path);
+ if (!sh->data_lock_block) {
+ sh->data_lock_failure_handler(frame, this);
+ } else {
+ int_lock->lock_cbk =
+ afr_sh_data_post_blocking_inodelk_cbk;
+ afr_blocking_lock (frame, this);
+ }
+ } else {
- local->govinda_gOvinda = 1;
+ gf_log (this->name, GF_LOG_DEBUG, "Non Blocking data inodelks "
+ "done for %s by %s. Proceeding to self-heal",
+ local->loc.path, lkowner_utoa (&frame->root->lk_owner));
+ sh->data_lock_success_handler (frame, this);
+ }
- afr_sh_data_finish (frame, this);
- return 0;
- }
+ return 0;
+}
- source = afr_sh_select_source (sh->sources, priv->child_count);
-
- if (source == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "No active sources found.");
+int
+afr_sh_data_lock_rec (call_frame_t *frame, xlator_t *this, char *dom,
+ off_t start, off_t len)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
+ afr_local_t *local = NULL;
- afr_sh_data_finish (frame, this);
- return 0;
- }
+ local = frame->local;
+ int_lock = &local->internal_lock;
- sh->source = source;
- local->cont.lookup.buf.st_size = sh->buf[source].st_size;
+ int_lock->transaction_lk_type = AFR_SELFHEAL_LK;
+ int_lock->selfheal_lk_type = AFR_DATA_SELF_HEAL_LK;
- /* detect changes not visible through pending flags -- JIC */
- for (i = 0; i < priv->child_count; i++) {
- if (i == source || sh->child_errno[i])
- continue;
+ afr_set_lock_number (frame, this);
- if (SIZE_DIFFERS (&sh->buf[i], &sh->buf[source]))
- sh->sources[i] = 0;
- }
+ int_lock->lock_cbk = afr_sh_data_post_nonblocking_inodelk_cbk;
- afr_sh_data_sync_prepare (frame, this);
+ int_lock->domain = dom;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+ inodelk->flock.l_start = start;
+ inodelk->flock.l_len = len;
+ inodelk->flock.l_type = F_WRLCK;
- return 0;
-}
+ afr_nonblocking_inodelk (frame, this);
+ return 0;
+}
int
-afr_sh_data_lookup_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct stat *buf, dict_t *xattr)
+afr_post_sh_big_lock_success (call_frame_t *frame, xlator_t *this)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- int call_count = -1;
- int child_index = (long) cookie;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- if (op_ret != -1) {
- sh->xattr[child_index] = dict_ref (xattr);
- sh->buf[child_index] = *buf;
- }
- }
- UNLOCK (&frame->lock);
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+
+ GF_ASSERT (sh->old_loop_frame);
+ sh_loop_finish (sh->old_loop_frame, this);
+ sh->old_loop_frame = NULL;
+ sh->data_lock_held = _gf_true;
+ sh->sync_done = _gf_true;
+ afr_sh_data_fxattrop (frame, this);
+ return 0;
+}
- call_count = afr_frame_return (frame);
+int
+afr_post_sh_big_lock_failure (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
- if (call_count == 0) {
- afr_sh_data_fix (frame, this);
- }
+ local = frame->local;
+ sh = &local->self_heal;
- return 0;
+ GF_ASSERT (sh->old_loop_frame);
+ sh_loop_finish (sh->old_loop_frame, this);
+ sh->old_loop_frame = NULL;
+ afr_sh_set_timestamps (frame, this);
+ return 0;
}
int
-afr_sh_data_lookup (call_frame_t *frame, xlator_t *this)
+afr_sh_data_lock (call_frame_t *frame, xlator_t *this,
+ off_t start, off_t len, gf_boolean_t block,
+ char *dom, afr_lock_cbk_t success_handler,
+ afr_lock_cbk_t failure_handler)
{
- afr_self_heal_t *sh = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- dict_t *xattr_req = NULL;
+ afr_local_t * local = NULL;
+ afr_self_heal_t * sh = NULL;
- int call_count = 0;
- int i = 0;
- int ret = 0;
+ local = frame->local;
+ sh = &local->self_heal;
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
+ sh->data_lock_success_handler = success_handler;
+ sh->data_lock_failure_handler = failure_handler;
+ sh->data_lock_block = block;
+ return afr_sh_data_lock_rec (frame, this, dom, start, len);
+}
- call_count = local->child_count;
+int
+afr_sh_data_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int child_index = 0;
- local->call_count = call_count;
-
- xattr_req = dict_new();
- if (xattr_req) {
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_set_uint64 (xattr_req, priv->pending_key[i],
- 3 * sizeof(int32_t));
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ child_index = (long) cookie;
+
+ /* TODO: some of the open's might fail.
+ In that case, modify cleanup fn to send flush on those
+ fd's which are already open */
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "open of %s failed on child %s (%s)",
+ local->loc.path,
+ priv->children[child_index]->name,
+ strerror (op_errno));
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ } else {
+ gf_log (this->name, GF_LOG_TRACE,
+ "open of %s succeeded on child %s",
+ local->loc.path,
+ priv->children[child_index]->name);
}
}
+ UNLOCK (&frame->lock);
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_sh_data_lookup_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->lookup,
- &local->loc, xattr_req);
- if (!--call_count)
- break;
- }
- }
-
- if (xattr_req)
- dict_unref (xattr_req);
+ call_count = afr_frame_return (frame);
- return 0;
+ if (call_count == 0) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
+ afr_sh_data_fail (frame, this);
+ return 0;
+ }
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "fd for %s opened, commencing sync",
+ local->loc.path);
+
+ afr_sh_data_lock (frame, this, 0, 0, _gf_true, priv->sh_domain,
+ afr_sh_dom_lock_success, afr_sh_data_fail);
+ }
+
+ return 0;
}
int
-afr_sh_data_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+afr_sh_data_open (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int call_count = 0;
- int child_index = (long) cookie;
-
- /* TODO: what if lock fails? */
-
- local = frame->local;
- sh = &local->self_heal;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- sh->op_failed = 1;
-
- gf_log (this->name,
- GF_LOG_DEBUG,
- "locking of %s on child %d failed: %s",
- local->loc.path, child_index,
- strerror (op_errno));
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "inode of %s on child %d locked",
- local->loc.path, child_index);
- }
- }
- UNLOCK (&frame->lock);
+ int i = 0;
+ int call_count = 0;
+ fd_t *fd = NULL;
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ afr_self_heal_t *sh = NULL;
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- if (sh->op_failed) {
- afr_sh_data_finish (frame, this);
- return 0;
- }
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
- afr_sh_data_lookup (frame, this);
- }
+ call_count = afr_up_children_count (local->child_up, priv->child_count);
+ local->call_count = call_count;
- return 0;
-}
+ fd = fd_create (local->loc.inode, frame->root->pid);
+ sh->healing_fd = fd;
+ /* open sinks */
+ for (i = 0; i < priv->child_count; i++) {
+ if(!local->child_up[i])
+ continue;
-int
-afr_sh_data_lock (call_frame_t *frame, xlator_t *this)
-{
- struct flock flock;
- int i = 0;
- int call_count = 0;
+ STACK_WIND_COOKIE (frame, afr_sh_data_open_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->open,
+ &local->loc,
+ O_RDWR|O_LARGEFILE, fd, NULL);
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- afr_self_heal_t * sh = NULL;
+ if (!--call_count)
+ break;
+ }
+ return 0;
+}
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+void
+afr_sh_non_reg_fix (call_frame_t *frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ afr_private_t *priv = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
+
+ if (op_ret < 0) {
+ afr_sh_data_fail (frame, this);
+ return;
+ }
- call_count = local->child_count;
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
- local->call_count = call_count;
+ for (i = 0; i < priv->child_count ; i++) {
+ if (1 == local->child_up[i])
+ sh->success[i] = 1;
+ }
- flock.l_start = 0;
- flock.l_len = 0;
- flock.l_type = F_WRLCK;
+ afr_sh_erase_pending (frame, this, AFR_DATA_TRANSACTION,
+ afr_sh_data_erase_pending_cbk,
+ afr_sh_data_finish);
+}
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- gf_log (this->name, GF_LOG_TRACE,
- "locking %s on subvolume %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_data_lock_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->inodelk,
- this->name,
- &local->loc, F_SETLK, &flock);
- if (!--call_count)
- break;
- }
- }
+int
+afr_sh_non_reg_lock_success (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
- return 0;
+ local = frame->local;
+ sh = &local->self_heal;
+ sh->data_lock_held = _gf_true;
+ afr_sh_common_lookup (frame, this, &local->loc,
+ afr_sh_non_reg_fix, NULL,
+ AFR_LOOKUP_FAIL_CONFLICTS |
+ AFR_LOOKUP_FAIL_MISSING_GFIDS,
+ NULL);
+ return 0;
}
+gf_boolean_t
+afr_can_start_data_self_heal (afr_self_heal_t *sh, afr_private_t *priv)
+{
+ if (sh->force_confirm_spb)
+ return _gf_true;
+ if (sh->do_data_self_heal &&
+ afr_data_self_heal_enabled (priv->data_self_heal))
+ return _gf_true;
+ return _gf_false;
+}
int
afr_self_heal_data (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = this->private;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = this->private;
+ int ret = -1;
+ local = frame->local;
+ sh = &local->self_heal;
+
+ sh->sh_type_in_action = AFR_SELF_HEAL_DATA;
+
+ if (afr_can_start_data_self_heal (sh, priv)) {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED);
+ ret = afr_inodelk_init (&local->internal_lock.inodelk[1],
+ priv->sh_domain, priv->child_count);
+ if (ret < 0) {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_data_done (frame, this);
+ return 0;
+ }
- local = frame->local;
- sh = &local->self_heal;
-
- if (local->need_data_self_heal && priv->data_self_heal) {
- afr_sh_data_lock (frame, this);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "not doing data self heal on %s",
- local->loc.path);
- afr_sh_data_done (frame, this);
- }
+ if (IA_ISREG (sh->type)) {
+ afr_sh_data_open (frame, this);
+ } else {
+ afr_sh_data_lock (frame, this, 0, 0, _gf_true,
+ this->name,
+ afr_sh_non_reg_lock_success,
+ afr_sh_data_fail);
+ }
+ } else {
+ gf_log (this->name, GF_LOG_TRACE,
+ "not doing data self heal on %s",
+ local->loc.path);
+ afr_sh_data_done (frame, this);
+ }
- return 0;
+ return 0;
}
-
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index 80608dbd2..53491a1d7 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include <libgen.h>
@@ -30,6 +21,7 @@
#endif
#include "glusterfs.h"
+#include "inode.h"
#include "afr.h"
#include "dict.h"
#include "xlator.h"
@@ -48,2059 +40,2331 @@
#include "afr-self-heal.h"
#include "afr-self-heal-common.h"
+#define AFR_INIT_SH_FRAME_VALS(_frame, _local, _sh, _sh_frame, _sh_local, _sh_sh)\
+ do {\
+ _local = _frame->local;\
+ _sh = &_local->self_heal;\
+ _sh_frame = _sh->sh_frame;\
+ _sh_local = _sh_frame->local;\
+ _sh_sh = &_sh_local->self_heal;\
+ } while (0);
-
+int
+afr_sh_entry_impunge_create_file (call_frame_t *impunge_frame, xlator_t *this,
+ int child_index);
int
afr_sh_entry_done (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- /*
- TODO: cleanup sh->*
- */
-
- gf_log (this->name, GF_LOG_TRACE,
- "self heal of %s completed",
- local->loc.path);
-
- sh->completion_cbk (frame, this);
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
- return 0;
-}
+ local = frame->local;
+ sh = &local->self_heal;
+ sh->completion_cbk (frame, this);
-int
-afr_sh_entry_unlck_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int call_count = 0;
- int child_index = (long) cookie;
-
- /* TODO: what if lock fails? */
-
- local = frame->local;
- sh = &local->self_heal;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "unlocking inode of %s on child %d failed: %s",
- local->loc.path, child_index,
- strerror (op_errno));
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "unlocked inode of %s on child %d",
- local->loc.path, child_index);
- }
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- if (sh->healing_fd)
- fd_unref (sh->healing_fd);
- sh->healing_fd = NULL;
- afr_sh_entry_done (frame, this);
- }
-
- return 0;
+ return 0;
}
int
afr_sh_entry_unlock (call_frame_t *frame, xlator_t *this)
{
- int i = 0;
- int call_count = 0;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- afr_self_heal_t * sh = NULL;
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ int_lock->lock_cbk = afr_sh_entry_done;
+ afr_unlock (frame, this);
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- call_count = local->child_count;
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- gf_log (this->name, GF_LOG_TRACE,
- "unlocking %s on subvolume %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_entry_unlck_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->entrylk,
- this->name,
- &local->loc, NULL,
- ENTRYLK_UNLOCK, ENTRYLK_WRLCK);
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ return 0;
}
int
afr_sh_entry_finish (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
+ afr_local_t *local = NULL;
- local = frame->local;
+ local = frame->local;
- gf_log (this->name, GF_LOG_TRACE,
- "finishing entry selfheal of %s", local->loc.path);
+ gf_log (this->name, GF_LOG_TRACE,
+ "finishing entry selfheal of %s", local->loc.path);
- afr_sh_entry_unlock (frame, this);
+ afr_sh_entry_unlock (frame, this);
- return 0;
+ return 0;
}
int
afr_sh_entry_erase_pending_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xattr)
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xattr, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
+ long i = 0;
+ int call_count = 0;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_local_t *orig_local = NULL;
+ call_frame_t *orig_frame = NULL;
+ afr_private_t *priv = NULL;
+ int32_t read_child = -1;
+
+ local = frame->local;
+ priv = this->private;
+ sh = &local->self_heal;
+ i = (long)cookie;
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
- LOCK (&frame->lock);
- {
- }
- UNLOCK (&frame->lock);
+ afr_children_add_child (sh->fresh_children, i, priv->child_count);
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_INFO,
+ "%s: failed to erase pending xattrs on %s (%s)",
+ local->loc.path, priv->children[i]->name,
+ strerror (op_errno));
+ }
- call_count = afr_frame_return (frame);
+ call_count = afr_frame_return (frame);
- if (call_count == 0)
- afr_sh_entry_finish (frame, this);
+ if (call_count == 0) {
+ if (sh->source == -1) {
+ //this happens if the forced merge option is set
+ read_child = sh->fresh_children[0];
+ } else {
+ read_child = sh->source;
+ }
+ afr_inode_set_read_ctx (this, sh->inode, read_child,
+ sh->fresh_children);
+ orig_frame = sh->orig_frame;
+ orig_local = orig_frame->local;
+
+ if (sh->source != -1) {
+ orig_local->cont.lookup.buf.ia_nlink = sh->buf[sh->source].ia_nlink;
+ }
- return 0;
+ afr_sh_entry_finish (frame, this);
+ }
+
+ return 0;
}
int
afr_sh_entry_erase_pending (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int i = 0;
- dict_t **erase_xattr = NULL;
-
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- afr_sh_pending_to_delta (priv, sh->xattr, sh->delta_matrix, sh->success,
- priv->child_count, AFR_ENTRY_TRANSACTION);
-
- erase_xattr = CALLOC (sizeof (*erase_xattr), priv->child_count);
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i]) {
- call_count++;
-
- erase_xattr[i] = get_new_dict();
- dict_ref (erase_xattr[i]);
- }
- }
-
- afr_sh_delta_to_xattr (priv, sh->delta_matrix, erase_xattr,
- priv->child_count, AFR_ENTRY_TRANSACTION);
-
- local->call_count = call_count;
- for (i = 0; i < priv->child_count; i++) {
- if (!erase_xattr[i])
- continue;
-
- gf_log (this->name, GF_LOG_TRACE,
- "erasing pending flags from %s on %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_entry_erase_pending_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->loc,
- GF_XATTROP_ADD_ARRAY, erase_xattr[i]);
- if (!--call_count)
- break;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (erase_xattr[i]) {
- dict_unref (erase_xattr[i]);
- }
- }
- FREE (erase_xattr);
-
- return 0;
-}
-
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ local = frame->local;
+ sh = &local->self_heal;
-static int
-next_active_source (call_frame_t *frame, xlator_t *this,
- int current_active_source)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int source = -1;
- int next_active_source = -1;
- int i = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- source = sh->source;
-
- if (source != -1) {
- if (current_active_source != source)
- next_active_source = source;
- goto out;
- }
-
- /*
- the next active sink becomes the source for the
- 'conservative decision' of merging all entries
- */
-
- for (i = 0; i < priv->child_count; i++) {
- if ((sh->sources[i] == 0)
- && (local->child_up[i] == 1)
- && (i > current_active_source)) {
-
- next_active_source = i;
- break;
- }
- }
+ if (sh->entries_skipped) {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ goto out;
+ }
+ afr_sh_erase_pending (frame, this, AFR_ENTRY_TRANSACTION,
+ afr_sh_entry_erase_pending_cbk,
+ afr_sh_entry_finish);
+ return 0;
out:
- return next_active_source;
+ afr_sh_entry_finish (frame, this);
+ return 0;
}
static int
-next_active_sink (call_frame_t *frame, xlator_t *this,
- int current_active_sink)
+next_active_source (call_frame_t *frame, xlator_t *this,
+ int current_active_source)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int next_active_sink = -1;
- int i = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- /*
- the next active sink becomes the source for the
- 'conservative decision' of merging all entries
- */
-
- for (i = 0; i < priv->child_count; i++) {
- if ((sh->sources[i] == 0)
- && (local->child_up[i] == 1)
- && (i > current_active_sink)) {
-
- next_active_sink = i;
- break;
- }
- }
-
- return next_active_sink;
-}
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ int source = -1;
+ int next_active_source = -1;
+ int i = 0;
+
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+ source = sh->source;
-int
-build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name)
-{
- int ret = -1;
+ if (source != -1) {
+ if (current_active_source != source)
+ next_active_source = source;
+ goto out;
+ }
- if (!child) {
- goto out;
- }
+ /*
+ the next active sink becomes the source for the
+ 'conservative decision' of merging all entries
+ */
- if (strcmp (parent->path, "/") == 0)
- ret = asprintf ((char **)&child->path, "/%s", name);
- else
- ret = asprintf ((char **)&child->path, "%s/%s", parent->path,
- name);
+ for (i = 0; i < priv->child_count; i++) {
+ if ((sh->sources[i] == 0)
+ && (local->child_up[i] == 1)
+ && (i > current_active_source)) {
- if (-1 == ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "asprintf failed while setting child path");
+ next_active_source = i;
+ break;
+ }
}
+out:
+ return next_active_source;
+}
- if (!child->path) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
- child->name = strrchr (child->path, '/');
- if (child->name)
- child->name++;
- child->parent = inode_ref (parent->inode);
- child->inode = inode_new (parent->inode->table);
+static int
+next_active_sink (call_frame_t *frame, xlator_t *this,
+ int current_active_sink)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ int next_active_sink = -1;
+ int i = 0;
+
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+
+ /*
+ the next active sink becomes the source for the
+ 'conservative decision' of merging all entries
+ */
- if (!child->inode) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if ((sh->sources[i] == 0)
+ && (local->child_up[i] == 1)
+ && (i > current_active_sink)) {
- ret = 0;
-out:
- if (ret == -1)
- loc_wipe (child);
+ next_active_sink = i;
+ break;
+ }
+ }
- return ret;
+ return next_active_sink;
}
+int
+afr_sh_entry_impunge_all (call_frame_t *frame, xlator_t *this);
+
+int
+afr_sh_entry_impunge_subvol (call_frame_t *frame, xlator_t *this);
int
afr_sh_entry_expunge_all (call_frame_t *frame, xlator_t *this);
int
afr_sh_entry_expunge_subvol (call_frame_t *frame, xlator_t *this,
- int active_src);
+ int active_src);
int
afr_sh_entry_expunge_entry_done (call_frame_t *frame, xlator_t *this,
- int active_src)
+ int active_src, int32_t op_ret,
+ int32_t op_errno)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int call_count = 0;
+ int call_count = 0;
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
+ call_count = afr_frame_return (frame);
- LOCK (&frame->lock);
- {
- }
- UNLOCK (&frame->lock);
+ if (call_count == 0)
+ afr_sh_entry_expunge_subvol (frame, this, active_src);
- call_count = afr_frame_return (frame);
+ return 0;
+}
- if (call_count == 0)
- afr_sh_entry_expunge_subvol (frame, this, active_src);
+int
+afr_sh_entry_expunge_parent_setattr_cbk (call_frame_t *expunge_frame,
+ void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preop, struct iatt *postop,
+ dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *expunge_local = NULL;
+ afr_self_heal_t *expunge_sh = NULL;
+ call_frame_t *frame = NULL;
+ int active_src = (long) cookie;
+ afr_self_heal_t *sh = NULL;
+ afr_local_t *local = NULL;
+
+ priv = this->private;
+ expunge_local = expunge_frame->local;
+ expunge_sh = &expunge_local->self_heal;
+ frame = expunge_sh->sh_frame;
+ local = frame->local;
+ sh = &local->self_heal;
+
+ if (op_ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "setattr on parent directory of %s on subvolume %s failed: %s",
+ expunge_local->loc.path,
+ priv->children[active_src]->name, strerror (op_errno));
+ }
+
+ AFR_STACK_DESTROY (expunge_frame);
+ sh->expunge_done (frame, this, active_src, op_ret, op_errno);
- return 0;
+ return 0;
}
int
afr_sh_entry_expunge_remove_cbk (call_frame_t *expunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
- afr_self_heal_t *expunge_sh = NULL;
- int active_src = 0;
- call_frame_t *frame = NULL;
-
-
- priv = this->private;
- expunge_local = expunge_frame->local;
- expunge_sh = &expunge_local->self_heal;
- frame = expunge_sh->sh_frame;
-
- active_src = (long) cookie;
-
- if (op_ret == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "removed %s on %s",
- expunge_local->loc.path,
- priv->children[active_src]->name);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "removing %s on %s failed (%s)",
- expunge_local->loc.path,
- priv->children[active_src]->name,
- strerror (op_errno));
- }
-
- AFR_STACK_DESTROY (expunge_frame);
- afr_sh_entry_expunge_entry_done (frame, this, active_src);
-
- return 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *expunge_local = NULL;
+ afr_self_heal_t *expunge_sh = NULL;
+ int active_src = 0;
+ int32_t valid = 0;
+
+ priv = this->private;
+ expunge_local = expunge_frame->local;
+ expunge_sh = &expunge_local->self_heal;
+
+ active_src = (long) cookie;
+
+ if (op_ret == 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "removed %s on %s",
+ expunge_local->loc.path,
+ priv->children[active_src]->name);
+ } else {
+ gf_log (this->name, GF_LOG_INFO,
+ "removing %s on %s failed (%s)",
+ expunge_local->loc.path,
+ priv->children[active_src]->name,
+ strerror (op_errno));
+ }
+
+ valid = GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
+
+ STACK_WIND_COOKIE (expunge_frame, afr_sh_entry_expunge_parent_setattr_cbk,
+ (void *) (long) active_src,
+ priv->children[active_src],
+ priv->children[active_src]->fops->setattr,
+ &expunge_sh->parent_loc,
+ &expunge_sh->parentbuf,
+ valid, NULL);
+
+ return 0;
}
int
-afr_sh_entry_expunge_rmdir (call_frame_t *expunge_frame, xlator_t *this,
- int active_src)
+afr_sh_entry_expunge_unlink (call_frame_t *expunge_frame, xlator_t *this,
+ int active_src)
{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *expunge_local = NULL;
- priv = this->private;
- expunge_local = expunge_frame->local;
+ priv = this->private;
+ expunge_local = expunge_frame->local;
- gf_log (this->name, GF_LOG_TRACE,
- "removing directory %s on %s",
- expunge_local->loc.path, priv->children[active_src]->name);
+ gf_log (this->name, GF_LOG_TRACE,
+ "expunging file %s on %s",
+ expunge_local->loc.path, priv->children[active_src]->name);
- STACK_WIND_COOKIE (expunge_frame, afr_sh_entry_expunge_remove_cbk,
- (void *) (long) active_src,
- priv->children[active_src],
- priv->children[active_src]->fops->rmdir,
- &expunge_local->loc);
+ STACK_WIND_COOKIE (expunge_frame, afr_sh_entry_expunge_remove_cbk,
+ (void *) (long) active_src,
+ priv->children[active_src],
+ priv->children[active_src]->fops->unlink,
+ &expunge_local->loc, 0, NULL);
- return 0;
+ return 0;
}
+
int
-afr_sh_entry_expunge_unlink (call_frame_t *expunge_frame, xlator_t *this,
- int active_src)
+afr_sh_entry_expunge_rmdir (call_frame_t *expunge_frame, xlator_t *this,
+ int active_src)
{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
-
- priv = this->private;
- expunge_local = expunge_frame->local;
-
- gf_log (this->name, GF_LOG_TRACE,
- "unlinking file %s on %s",
- expunge_local->loc.path, priv->children[active_src]->name);
-
- STACK_WIND_COOKIE (expunge_frame, afr_sh_entry_expunge_remove_cbk,
- (void *) (long) active_src,
- priv->children[active_src],
- priv->children[active_src]->fops->unlink,
- &expunge_local->loc);
-
- return 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *expunge_local = NULL;
+
+ priv = this->private;
+ expunge_local = expunge_frame->local;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "expunging directory %s on %s",
+ expunge_local->loc.path, priv->children[active_src]->name);
+
+ STACK_WIND_COOKIE (expunge_frame, afr_sh_entry_expunge_remove_cbk,
+ (void *) (long) active_src,
+ priv->children[active_src],
+ priv->children[active_src]->fops->rmdir,
+ &expunge_local->loc, 1, NULL);
+
+ return 0;
}
int
afr_sh_entry_expunge_remove (call_frame_t *expunge_frame, xlator_t *this,
- int active_src, struct stat *buf)
+ int active_src, struct iatt *buf,
+ struct iatt *parentbuf)
{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
- afr_self_heal_t *expunge_sh = NULL;
- int source = 0;
- call_frame_t *frame = NULL;
- int type = 0;
-
- priv = this->private;
- expunge_local = expunge_frame->local;
- expunge_sh = &expunge_local->self_heal;
- frame = expunge_sh->sh_frame;
- source = expunge_sh->source;
-
- type = (buf->st_mode & S_IFMT);
-
- switch (type) {
- case S_IFSOCK:
- case S_IFREG:
- case S_IFBLK:
- case S_IFCHR:
- case S_IFIFO:
- case S_IFLNK:
- afr_sh_entry_expunge_unlink (expunge_frame, this, active_src);
-
- break;
- case S_IFDIR:
- afr_sh_entry_expunge_rmdir (expunge_frame, this, active_src);
- break;
- default:
- gf_log (this->name, GF_LOG_ERROR,
- "%s has unknown file type on %s: 0%o",
- expunge_local->loc.path,
- priv->children[source]->name, type);
- goto out;
- break;
- }
-
- return 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *expunge_local = NULL;
+ afr_self_heal_t *expunge_sh = NULL;
+ call_frame_t *frame = NULL;
+ int type = 0;
+ afr_self_heal_t *sh = NULL;
+ afr_local_t *local = NULL;
+ loc_t *loc = NULL;
+
+ priv = this->private;
+ expunge_local = expunge_frame->local;
+ expunge_sh = &expunge_local->self_heal;
+ frame = expunge_sh->sh_frame;
+ local = frame->local;
+ sh = &local->self_heal;
+ loc = &expunge_local->loc;
+
+ type = buf->ia_type;
+ if (loc->parent && uuid_is_null (loc->parent->gfid))
+ uuid_copy (loc->pargfid, parentbuf->ia_gfid);
+
+ switch (type) {
+ case IA_IFSOCK:
+ case IA_IFREG:
+ case IA_IFBLK:
+ case IA_IFCHR:
+ case IA_IFIFO:
+ case IA_IFLNK:
+ afr_sh_entry_expunge_unlink (expunge_frame, this, active_src);
+ break;
+ case IA_IFDIR:
+ afr_sh_entry_expunge_rmdir (expunge_frame, this, active_src);
+ break;
+ default:
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s has unknown file type on %s: 0%o",
+ expunge_local->loc.path,
+ priv->children[active_src]->name, type);
+ goto out;
+ break;
+ }
+
+ return 0;
out:
- AFR_STACK_DESTROY (expunge_frame);
- afr_sh_entry_expunge_entry_done (frame, this, active_src);
+ AFR_STACK_DESTROY (expunge_frame);
+ sh->expunge_done (frame, this, active_src, -1, EINVAL);
- return 0;
+ return 0;
}
int
afr_sh_entry_expunge_lookup_cbk (call_frame_t *expunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct stat *buf, dict_t *x)
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf, dict_t *x,
+ struct iatt *postparent)
{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
- afr_self_heal_t *expunge_sh = NULL;
- call_frame_t *frame = NULL;
- int active_src = 0;
-
- priv = this->private;
- expunge_local = expunge_frame->local;
- expunge_sh = &expunge_local->self_heal;
- frame = expunge_sh->sh_frame;
- active_src = (long) cookie;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_TRACE,
- "lookup of %s on %s failed (%s)",
- expunge_local->loc.path,
- priv->children[active_src]->name,
- strerror (op_errno));
- goto out;
- }
-
- afr_sh_entry_expunge_remove (expunge_frame, this, active_src, buf);
-
- return 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *expunge_local = NULL;
+ afr_self_heal_t *expunge_sh = NULL;
+ call_frame_t *frame = NULL;
+ int active_src = 0;
+ afr_self_heal_t *sh = NULL;
+ afr_local_t *local = NULL;
+
+ priv = this->private;
+ expunge_local = expunge_frame->local;
+ expunge_sh = &expunge_local->self_heal;
+ frame = expunge_sh->sh_frame;
+ active_src = (long) cookie;
+ local = frame->local;
+ sh = &local->self_heal;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "lookup of %s on %s failed (%s)",
+ expunge_local->loc.path,
+ priv->children[active_src]->name,
+ strerror (op_errno));
+ goto out;
+ }
+
+ afr_sh_entry_expunge_remove (expunge_frame, this, active_src, buf,
+ postparent);
+
+ return 0;
out:
- AFR_STACK_DESTROY (expunge_frame);
- afr_sh_entry_expunge_entry_done (frame, this, active_src);
+ AFR_STACK_DESTROY (expunge_frame);
+ sh->expunge_done (frame, this, active_src, op_ret, op_errno);
- return 0;
+ return 0;
}
int
afr_sh_entry_expunge_purge (call_frame_t *expunge_frame, xlator_t *this,
- int active_src)
+ int active_src)
{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
-
- priv = this->private;
- expunge_local = expunge_frame->local;
-
- gf_log (this->name, GF_LOG_TRACE,
- "looking up %s on %s",
- expunge_local->loc.path, priv->children[active_src]->name);
-
- STACK_WIND_COOKIE (expunge_frame, afr_sh_entry_expunge_lookup_cbk,
- (void *) (long) active_src,
- priv->children[active_src],
- priv->children[active_src]->fops->lookup,
- &expunge_local->loc, 0);
-
- return 0;
-}
+ afr_private_t *priv = NULL;
+ afr_local_t *expunge_local = NULL;
+
+ priv = this->private;
+ expunge_local = expunge_frame->local;
+ gf_log (this->name, GF_LOG_TRACE,
+ "looking up %s on %s",
+ expunge_local->loc.path, priv->children[active_src]->name);
+
+ STACK_WIND_COOKIE (expunge_frame, afr_sh_entry_expunge_lookup_cbk,
+ (void *) (long) active_src,
+ priv->children[active_src],
+ priv->children[active_src]->fops->lookup,
+ &expunge_local->loc, NULL);
+
+ return 0;
+}
int
afr_sh_entry_expunge_entry_cbk (call_frame_t *expunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct stat *buf, dict_t *x)
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf, dict_t *x,
+ struct iatt *postparent)
{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
- afr_self_heal_t *expunge_sh = NULL;
- int source = 0;
- call_frame_t *frame = NULL;
- int active_src = 0;
-
-
- priv = this->private;
- expunge_local = expunge_frame->local;
- expunge_sh = &expunge_local->self_heal;
- frame = expunge_sh->sh_frame;
- active_src = expunge_sh->active_source;
- source = (long) cookie;
-
- if (op_ret == -1 && op_errno == ENOENT) {
-
- gf_log (this->name, GF_LOG_TRACE,
- "missing entry %s on %s",
- expunge_local->loc.path,
- priv->children[source]->name);
-
- afr_sh_entry_expunge_purge (expunge_frame, this, active_src);
-
- return 0;
- }
-
- if (op_ret == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "%s exists under %s",
- expunge_local->loc.path,
- priv->children[source]->name);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "looking up %s under %s failed (%s)",
- expunge_local->loc.path,
- priv->children[source]->name,
- strerror (op_errno));
- }
-
- AFR_STACK_DESTROY (expunge_frame);
- afr_sh_entry_expunge_entry_done (frame, this, active_src);
-
- return 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *expunge_local = NULL;
+ afr_self_heal_t *expunge_sh = NULL;
+ int source = 0;
+ call_frame_t *frame = NULL;
+ int active_src = 0;
+ int need_expunge = 0;
+ afr_self_heal_t *sh = NULL;
+ afr_local_t *local = NULL;
+
+ priv = this->private;
+ expunge_local = expunge_frame->local;
+ expunge_sh = &expunge_local->self_heal;
+ frame = expunge_sh->sh_frame;
+ active_src = expunge_sh->active_source;
+ source = (long) cookie;
+ local = frame->local;
+ sh = &local->self_heal;
+
+ if (op_ret == -1 && op_errno == ENOENT)
+ need_expunge = 1;
+ else if (op_ret == -1)
+ goto out;
+
+ if (!uuid_is_null (expunge_sh->entrybuf.ia_gfid) &&
+ !uuid_is_null (buf->ia_gfid) &&
+ (uuid_compare (expunge_sh->entrybuf.ia_gfid, buf->ia_gfid) != 0)) {
+ char uuidbuf1[64];
+ char uuidbuf2[64];
+ gf_log (this->name, GF_LOG_DEBUG,
+ "entry %s found on %s with mismatching gfid (%s/%s)",
+ expunge_local->loc.path,
+ priv->children[source]->name,
+ uuid_utoa_r (expunge_sh->entrybuf.ia_gfid, uuidbuf1),
+ uuid_utoa_r (buf->ia_gfid, uuidbuf2));
+ need_expunge = 1;
+ }
+
+ if (need_expunge) {
+ gf_log (this->name, GF_LOG_INFO,
+ "Entry %s is missing on %s and deleting from "
+ "replica's other bricks",
+ expunge_local->loc.path,
+ priv->children[source]->name);
+
+ if (postparent)
+ expunge_sh->parentbuf = *postparent;
+
+ afr_sh_entry_expunge_purge (expunge_frame, this, active_src);
+
+ return 0;
+ }
+
+out:
+ if (op_ret == 0) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "%s exists under %s",
+ expunge_local->loc.path,
+ priv->children[source]->name);
+ } else {
+ gf_log (this->name, GF_LOG_INFO,
+ "looking up %s under %s failed (%s)",
+ expunge_local->loc.path,
+ priv->children[source]->name,
+ strerror (op_errno));
+ }
+
+ AFR_STACK_DESTROY (expunge_frame);
+ sh->expunge_done (frame, this, active_src, op_ret, op_errno);
+
+ return 0;
}
+static gf_boolean_t
+can_skip_entry_self_heal (char *name, loc_t *parent_loc)
+{
+ if (strcmp (name, ".") == 0) {
+ return _gf_true;
+ } else if (strcmp (name, "..") == 0) {
+ return _gf_true;
+ } else if (loc_is_root (parent_loc) &&
+ (strcmp (name, GF_REPLICATE_TRASH_DIR) == 0)) {
+ return _gf_true;
+ }
+ return _gf_false;
+}
int
afr_sh_entry_expunge_entry (call_frame_t *frame, xlator_t *this,
- char *name)
+ gf_dirent_t *entry)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int ret = -1;
- call_frame_t *expunge_frame = NULL;
- afr_local_t *expunge_local = NULL;
- afr_self_heal_t *expunge_sh = NULL;
- int active_src = 0;
- int source = 0;
- int op_errno = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- active_src = sh->active_source;
- source = sh->source;
-
- if ((strcmp (name, ".") == 0)
- || (strcmp (name, "..") == 0)) {
- gf_log (this->name, GF_LOG_TRACE,
- "skipping inspection of %s under %s",
- name, local->loc.path);
- goto out;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "inspecting existance of %s under %s",
- name, local->loc.path);
-
- expunge_frame = copy_frame (frame);
- if (!expunge_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
-
- ALLOC_OR_GOTO (expunge_local, afr_local_t, out);
-
- expunge_frame->local = expunge_local;
- expunge_sh = &expunge_local->self_heal;
- expunge_sh->sh_frame = frame;
- expunge_sh->active_source = active_src;
-
- ret = build_child_loc (this, &expunge_local->loc, &local->loc, name);
- if (ret != 0) {
- goto out;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "looking up %s on %s", expunge_local->loc.path,
- priv->children[source]->name);
-
- STACK_WIND_COOKIE (expunge_frame,
- afr_sh_entry_expunge_entry_cbk,
- (void *) (long) source,
- priv->children[source],
- priv->children[source]->fops->lookup,
- &expunge_local->loc, 0);
-
- ret = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ int ret = -1;
+ call_frame_t *expunge_frame = NULL;
+ afr_local_t *expunge_local = NULL;
+ afr_self_heal_t *expunge_sh = NULL;
+ int active_src = 0;
+ int source = 0;
+ int op_errno = 0;
+ char *name = NULL;
+ int op_ret = -1;
+
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+
+ active_src = sh->active_source;
+ source = sh->source;
+ sh->expunge_done = afr_sh_entry_expunge_entry_done;
+
+ name = entry->d_name;
+ if (can_skip_entry_self_heal (name, &local->loc)) {
+ op_ret = 0;
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "inspecting existence of %s under %s",
+ name, local->loc.path);
+
+ expunge_frame = copy_frame (frame);
+ if (!expunge_frame) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ AFR_LOCAL_ALLOC_OR_GOTO (expunge_local, out);
+
+ expunge_frame->local = expunge_local;
+ expunge_sh = &expunge_local->self_heal;
+ expunge_sh->sh_frame = frame;
+ expunge_sh->active_source = active_src;
+ expunge_sh->entrybuf = entry->d_stat;
+ loc_copy (&expunge_sh->parent_loc, &local->loc);
+
+ ret = afr_build_child_loc (this, &expunge_local->loc, &local->loc,
+ name);
+ if (ret != 0) {
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "looking up %s on %s", expunge_local->loc.path,
+ priv->children[source]->name);
+
+ STACK_WIND_COOKIE (expunge_frame,
+ afr_sh_entry_expunge_entry_cbk,
+ (void *) (long) source,
+ priv->children[source],
+ priv->children[source]->fops->lookup,
+ &expunge_local->loc, NULL);
+
+ ret = 0;
out:
- if (ret == -1)
- afr_sh_entry_expunge_entry_done (frame, this, active_src);
+ if (ret == -1)
+ sh->expunge_done (frame, this, active_src, op_ret, op_errno);
- return 0;
+ return 0;
}
int
afr_sh_entry_expunge_readdir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- gf_dirent_t *entries)
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ gf_dirent_t *entries, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- gf_dirent_t *entry = NULL;
- off_t last_offset = 0;
- int active_src = 0;
- int entry_count = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- active_src = sh->active_source;
-
- if (op_ret <= 0) {
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "readdir of %s on subvolume %s failed (%s)",
- local->loc.path,
- priv->children[active_src]->name,
- strerror (op_errno));
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "readdir of %s on subvolume %s complete",
- local->loc.path,
- priv->children[active_src]->name);
- }
-
- afr_sh_entry_expunge_all (frame, this);
- return 0;
- }
-
- list_for_each_entry (entry, &entries->list, list) {
- last_offset = entry->d_off;
- entry_count++;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "readdir'ed %d entries from %s",
- entry_count, priv->children[active_src]->name);
-
- sh->offset = last_offset;
- local->call_count = entry_count;
-
- list_for_each_entry (entry, &entries->list, list) {
- afr_sh_entry_expunge_entry (frame, this, entry->d_name);
- }
-
- return 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ gf_dirent_t *entry = NULL;
+ off_t last_offset = 0;
+ int active_src = 0;
+ int entry_count = 0;
+
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+
+ active_src = sh->active_source;
+
+ if (op_ret <= 0) {
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "readdir of %s on subvolume %s failed (%s)",
+ local->loc.path,
+ priv->children[active_src]->name,
+ strerror (op_errno));
+ } else {
+ gf_log (this->name, GF_LOG_TRACE,
+ "readdir of %s on subvolume %s complete",
+ local->loc.path,
+ priv->children[active_src]->name);
+ }
+
+ afr_sh_entry_expunge_all (frame, this);
+ return 0;
+ }
+
+ list_for_each_entry (entry, &entries->list, list) {
+ last_offset = entry->d_off;
+ entry_count++;
+ }
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "readdir'ed %d entries from %s",
+ entry_count, priv->children[active_src]->name);
+
+ sh->offset = last_offset;
+ local->call_count = entry_count;
+
+ list_for_each_entry (entry, &entries->list, list) {
+ afr_sh_entry_expunge_entry (frame, this, entry);
+ }
+
+ return 0;
}
int
afr_sh_entry_expunge_subvol (call_frame_t *frame, xlator_t *this,
- int active_src)
+ int active_src)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
- STACK_WIND (frame, afr_sh_entry_expunge_readdir_cbk,
- priv->children[active_src],
- priv->children[active_src]->fops->readdir,
- sh->healing_fd, sh->block_size, sh->offset);
+ STACK_WIND (frame, afr_sh_entry_expunge_readdir_cbk,
+ priv->children[active_src],
+ priv->children[active_src]->fops->readdirp,
+ sh->healing_fd, sh->block_size, sh->offset, NULL);
- return 0;
+ return 0;
}
int
afr_sh_entry_expunge_all (call_frame_t *frame, xlator_t *this)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int active_src = -1;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ int active_src = -1;
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
- sh->offset = 0;
+ sh->offset = 0;
- if (sh->source == -1) {
- gf_log (this->name, GF_LOG_TRACE,
- "no active sources for %s to expunge entries",
- local->loc.path);
- goto out;
- }
+ if (sh->source == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no active sources for %s to expunge entries",
+ local->loc.path);
+ goto out;
+ }
- active_src = next_active_sink (frame, this, sh->active_source);
- sh->active_source = active_src;
+ active_src = next_active_sink (frame, this, sh->active_source);
+ sh->active_source = active_src;
- if (sh->op_failed) {
- goto out;
- }
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
+ goto out;
+ }
- if (active_src == -1) {
- /* completed creating missing files on all subvolumes */
- goto out;
- }
+ if (active_src == -1) {
+ /* completed creating missing files on all subvolumes */
+ goto out;
+ }
- gf_log (this->name, GF_LOG_TRACE,
- "expunging entries of %s on %s to other sinks",
- local->loc.path, priv->children[active_src]->name);
+ gf_log (this->name, GF_LOG_TRACE,
+ "expunging entries of %s on %s to other sinks",
+ local->loc.path, priv->children[active_src]->name);
- afr_sh_entry_expunge_subvol (frame, this, active_src);
+ afr_sh_entry_expunge_subvol (frame, this, active_src);
- return 0;
+ return 0;
out:
- afr_sh_entry_erase_pending (frame, this);
- return 0;
+ afr_sh_entry_impunge_all (frame, this);
+ return 0;
}
int
-afr_sh_entry_impunge_all (call_frame_t *frame, xlator_t *this);
-
-int
-afr_sh_entry_impunge_subvol (call_frame_t *frame, xlator_t *this,
- int active_src);
-
-int
afr_sh_entry_impunge_entry_done (call_frame_t *frame, xlator_t *this,
- int active_src)
+ int32_t op_ret, int32_t op_errno)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int call_count = 0;
+ int call_count = 0;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
+ local = frame->local;
+ sh = &local->self_heal;
+ if (op_ret < 0)
+ sh->entries_skipped = _gf_true;
+ call_count = afr_frame_return (frame);
+ if (call_count == 0)
+ afr_sh_entry_impunge_subvol (frame, this);
- LOCK (&frame->lock);
- {
- }
- UNLOCK (&frame->lock);
+ return 0;
+}
- call_count = afr_frame_return (frame);
+void
+afr_sh_entry_call_impunge_done (call_frame_t *impunge_frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ afr_local_t *impunge_local = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ call_frame_t *frame = NULL;
- if (call_count == 0)
- afr_sh_entry_impunge_subvol (frame, this, active_src);
+ AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh,
+ frame, local, sh);
- return 0;
+ AFR_STACK_DESTROY (impunge_frame);
+ sh->impunge_done (frame, this, op_ret, op_errno);
}
-
int
-afr_sh_entry_impunge_utimens_cbk (call_frame_t *impunge_frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct stat *stbuf)
+afr_sh_entry_impunge_setattr_cbk (call_frame_t *impunge_frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preop, struct iatt *postop,
+ dict_t *xdata)
{
- int call_count = 0;
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- call_frame_t *frame = NULL;
- int active_src = 0;
- int child_index = 0;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- frame = impunge_sh->sh_frame;
- local = frame->local;
- sh = &local->self_heal;
- active_src = sh->active_source;
- child_index = (long) cookie;
-
- if (op_ret == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "utimes set for %s on %s",
- impunge_local->loc.path,
- priv->children[child_index]->name);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "setting utimes of %s on %s failed (%s)",
- impunge_local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- }
-
- LOCK (&impunge_frame->lock);
- {
- call_count = --impunge_local->call_count;
- }
- UNLOCK (&impunge_frame->lock);
-
- if (call_count == 0) {
- AFR_STACK_DESTROY (impunge_frame);
- afr_sh_entry_impunge_entry_done (frame, this, active_src);
- }
-
- return 0;
-}
+ int call_count = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+ int child_index = 0;
+
+ priv = this->private;
+ impunge_local = impunge_frame->local;
+ child_index = (long) cookie;
+
+ if (op_ret == 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "setattr done for %s on %s",
+ impunge_local->loc.path,
+ priv->children[child_index]->name);
+ } else {
+ gf_log (this->name, GF_LOG_INFO,
+ "setattr (%s) on %s failed (%s)",
+ impunge_local->loc.path,
+ priv->children[child_index]->name,
+ strerror (op_errno));
+ }
+ call_count = afr_frame_return (impunge_frame);
+ if (call_count == 0) {
+ afr_sh_entry_call_impunge_done (impunge_frame, this,
+ 0, op_errno);
+ }
+
+ return 0;
+}
int
-afr_sh_entry_impunge_chown_cbk (call_frame_t *impunge_frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct stat *stbuf)
+afr_sh_entry_impunge_parent_setattr_cbk (call_frame_t *setattr_frame,
+ void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preop, struct iatt *postop,
+ dict_t *xdata)
{
- int call_count = 0;
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- call_frame_t *frame = NULL;
- int active_src = 0;
- int child_index = 0;
- struct timespec ts[2];
-
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- frame = impunge_sh->sh_frame;
- child_index = (long) cookie;
-
- if (op_ret == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "ownership of %s on %s changed",
- impunge_local->loc.path,
- priv->children[child_index]->name);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "setting ownership of %s on %s failed (%s)",
- impunge_local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- goto out;
- }
-
-#ifdef HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC
- ts[0] = impunge_local->cont.lookup.buf.st_atim;
- ts[1] = impunge_local->cont.lookup.buf.st_mtim;
-#elif HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC
- ts[0] = impunge_local->cont.lookup.buf.st_atimespec;
- ts[1] = impunge_local->cont.lookup.buf.st_mtimespec;
-#else
- ts[0].tv_sec = impunge_local->cont.lookup.buf.st_atime;
- ts[1].tv_sec = impunge_local->cont.lookup.buf.st_mtime;
-#endif
- STACK_WIND_COOKIE (impunge_frame,
- afr_sh_entry_impunge_utimens_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->utimens,
- &impunge_local->loc, ts);
+ int call_count = 0;
+ afr_local_t *setattr_local = NULL;
+
+ setattr_local = setattr_frame->local;
+ if (op_ret != 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "setattr on parent directory (%s) failed: %s",
+ setattr_local->loc.path, strerror (op_errno));
+ }
- return 0;
+ call_count = afr_frame_return (setattr_frame);
+ if (call_count == 0)
+ AFR_STACK_DESTROY (setattr_frame);
+ return 0;
+}
+int
+afr_sh_entry_impunge_setattr (call_frame_t *impunge_frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_local_t *setattr_local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ call_frame_t *setattr_frame = NULL;
+ int32_t valid = 0;
+ int32_t op_errno = 0;
+ int child_index = 0;
+ int call_count = 0;
+ int i = 0;
+
+ priv = this->private;
+ impunge_local = impunge_frame->local;
+ impunge_sh = &impunge_local->self_heal;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "setting ownership of %s on %s to %d/%d",
+ impunge_local->loc.path,
+ priv->children[child_index]->name,
+ impunge_sh->entrybuf.ia_uid,
+ impunge_sh->entrybuf.ia_gid);
+
+ setattr_frame = copy_frame (impunge_frame);
+ if (!setattr_frame) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ AFR_LOCAL_ALLOC_OR_GOTO (setattr_frame->local, out);
+ setattr_local = setattr_frame->local;
+ call_count = afr_errno_count (NULL, impunge_sh->child_errno,
+ priv->child_count, 0);
+ loc_copy (&setattr_local->loc, &impunge_sh->parent_loc);
+ impunge_local->call_count = call_count;
+ setattr_local->call_count = call_count;
+ for (i = 0; i < priv->child_count; i++) {
+ if (impunge_sh->child_errno[i])
+ continue;
+ valid = GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
+ STACK_WIND_COOKIE (setattr_frame,
+ afr_sh_entry_impunge_parent_setattr_cbk,
+ (void *) (long) i, priv->children[i],
+ priv->children[i]->fops->setattr,
+ &setattr_local->loc,
+ &impunge_sh->parentbuf, valid, NULL);
+
+ valid = GF_SET_ATTR_UID | GF_SET_ATTR_GID |
+ GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
+ STACK_WIND_COOKIE (impunge_frame,
+ afr_sh_entry_impunge_setattr_cbk,
+ (void *) (long) i, priv->children[i],
+ priv->children[i]->fops->setattr,
+ &impunge_local->loc,
+ &impunge_sh->entrybuf, valid, NULL);
+ call_count--;
+ }
+ GF_ASSERT (!call_count);
+ return 0;
out:
- LOCK (&impunge_frame->lock);
- {
- call_count = --impunge_local->call_count;
- }
- UNLOCK (&impunge_frame->lock);
-
- if (call_count == 0) {
- AFR_STACK_DESTROY (impunge_frame);
- afr_sh_entry_impunge_entry_done (frame, this, active_src);
- }
-
- return 0;
+ if (setattr_frame)
+ AFR_STACK_DESTROY (setattr_frame);
+ afr_sh_entry_call_impunge_done (impunge_frame, this, 0, op_errno);
+ return 0;
}
-
int
afr_sh_entry_impunge_xattrop_cbk (call_frame_t *impunge_frame, void *cookie,
xlator_t *this,
int32_t op_ret, int32_t op_errno,
- dict_t *xattr)
+ dict_t *xattr, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- call_frame_t *frame = NULL;
- int child_index = 0;
-
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- frame = impunge_sh->sh_frame;
-
- child_index = (long) cookie;
-
- gf_log (this->name, GF_LOG_TRACE,
- "setting ownership of %s on %s to %d/%d",
- impunge_local->loc.path,
- priv->children[child_index]->name,
- impunge_local->cont.lookup.buf.st_uid,
- impunge_local->cont.lookup.buf.st_gid);
-
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_chown_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->chown,
- &impunge_local->loc,
- impunge_local->cont.lookup.buf.st_uid,
- impunge_local->cont.lookup.buf.st_gid);
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+ int child_index = 0;
+
+ priv = this->private;
+ impunge_local = impunge_frame->local;
+
+ child_index = (long) cookie;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_INFO,
+ "%s: failed to perform xattrop on %s (%s)",
+ impunge_local->loc.path,
+ priv->children[child_index]->name,
+ strerror (op_errno));
+ goto out;
+ }
+
+ afr_sh_entry_impunge_setattr (impunge_frame, this);
+ return 0;
+out:
+ afr_sh_entry_call_impunge_done (impunge_frame, this,
+ -1, op_errno);
return 0;
}
-
int
-afr_sh_entry_impunge_newfile_cbk (call_frame_t *impunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct stat *stbuf)
+afr_sh_entry_impunge_perform_xattrop (call_frame_t *impunge_frame,
+ xlator_t *this)
{
- int call_count = 0;
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- call_frame_t *frame = NULL;
- int active_src = 0;
- int child_index = 0;
- int pending_array[3] = {0, };
- dict_t *xattr = NULL;
- int ret = 0;
- int idx = 0;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
+ int active_src = 0;
+ dict_t *xattr = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ int32_t op_errno = 0;
+
+ priv = this->private;
+ impunge_local = impunge_frame->local;
+ impunge_sh = &impunge_local->self_heal;
+ active_src = impunge_sh->active_source;
+
+ afr_prepare_new_entry_pending_matrix (impunge_local->pending,
+ afr_is_errno_unset,
+ impunge_sh->child_errno,
+ &impunge_sh->entrybuf,
+ priv->child_count);
+ xattr = dict_new ();
+ if (!xattr) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- frame = impunge_sh->sh_frame;
- local = frame->local;
- sh = &local->self_heal;
- active_src = sh->active_source;
+ afr_set_pending_dict (priv, xattr, impunge_local->pending, active_src,
+ LOCAL_LAST);
- child_index = (long) cookie;
+ STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_xattrop_cbk,
+ (void *) (long) active_src,
+ priv->children[active_src],
+ priv->children[active_src]->fops->xattrop,
+ &impunge_local->loc, GF_XATTROP_ADD_ARRAY, xattr, NULL);
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "creation of %s on %s failed (%s)",
- impunge_local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- goto out;
- }
+ if (xattr)
+ dict_unref (xattr);
+ return 0;
+out:
+ afr_sh_entry_call_impunge_done (impunge_frame, this,
+ -1, op_errno);
+ return 0;
+}
- inode->st_mode = stbuf->st_mode;
+int
+afr_sh_entry_impunge_newfile_cbk (call_frame_t *impunge_frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ int call_count = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ int child_index = 0;
- xattr = get_new_dict ();
- dict_ref (xattr);
+ priv = this->private;
+ impunge_local = impunge_frame->local;
+ impunge_sh = &impunge_local->self_heal;
- idx = afr_index_for_transaction_type (AFR_METADATA_TRANSACTION);
- pending_array[idx] = hton32 (1);
- if (S_ISDIR (stbuf->st_mode))
- idx = afr_index_for_transaction_type (AFR_ENTRY_TRANSACTION);
- else
- idx = afr_index_for_transaction_type (AFR_DATA_TRANSACTION);
- pending_array[idx] = hton32 (1);
+ child_index = (long) cookie;
- ret = dict_set_static_bin (xattr, priv->pending_key[child_index],
- pending_array, sizeof (pending_array));
+ if (op_ret == -1) {
+ impunge_sh->child_errno[child_index] = op_errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "creation of %s on %s failed (%s)",
+ impunge_local->loc.path,
+ priv->children[child_index]->name,
+ strerror (op_errno));
+ } else {
+ impunge_sh->child_errno[child_index] = 0;
+ }
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_xattrop_cbk,
- (void *) (long) child_index,
- priv->children[active_src],
- priv->children[active_src]->fops->xattrop,
- &impunge_local->loc, GF_XATTROP_ADD_ARRAY, xattr);
+ call_count = afr_frame_return (impunge_frame);
+ if (call_count == 0) {
+ if (!afr_errno_count (NULL, impunge_sh->child_errno,
+ priv->child_count, 0)) {
+ // new_file creation failed every where
+ afr_sh_entry_call_impunge_done (impunge_frame, this,
+ -1, op_errno);
+ goto out;
+ }
+ afr_sh_entry_impunge_perform_xattrop (impunge_frame, this);
+ }
+out:
+ return 0;
+}
- dict_unref (xattr);
+int
+afr_sh_entry_impunge_hardlink_cbk (call_frame_t *impunge_frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ int call_count = 0;
+ afr_local_t *impunge_local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+
+ impunge_local = impunge_frame->local;
+ impunge_sh = &impunge_local->self_heal;
+
+ if (IA_IFLNK == impunge_sh->entrybuf.ia_type) {
+ //For symlinks impunge is attempted un-conditionally
+ //So the file can already exist.
+ if ((op_ret < 0) && (op_errno == EEXIST))
+ op_ret = 0;
+ }
- return 0;
+ call_count = afr_frame_return (impunge_frame);
+ if (call_count == 0)
+ afr_sh_entry_call_impunge_done (impunge_frame, this,
+ op_ret, op_errno);
-out:
- LOCK (&impunge_frame->lock);
- {
- call_count = --impunge_local->call_count;
- }
- UNLOCK (&impunge_frame->lock);
-
- if (call_count == 0) {
- AFR_STACK_DESTROY (impunge_frame);
- afr_sh_entry_impunge_entry_done (frame, this, active_src);
- }
-
- return 0;
+ return 0;
}
-
int
-afr_sh_entry_impunge_mknod (call_frame_t *impunge_frame, xlator_t *this,
- int child_index, struct stat *stbuf)
+afr_sh_entry_impunge_hardlink (call_frame_t *impunge_frame, xlator_t *this,
+ int child_index)
{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ loc_t *loc = NULL;
+ struct iatt *buf = NULL;
+ loc_t oldloc = {0};
+
+ priv = this->private;
+ impunge_local = impunge_frame->local;
+ impunge_sh = &impunge_local->self_heal;
+ loc = &impunge_local->loc;
+ buf = &impunge_sh->entrybuf;
+
+ oldloc.inode = inode_ref (loc->inode);
+ uuid_copy (oldloc.gfid, buf->ia_gfid);
+ gf_log (this->name, GF_LOG_DEBUG, "linking missing file %s on %s",
+ loc->path, priv->children[child_index]->name);
+
+ STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_hardlink_cbk,
+ (void *) (long) child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->link,
+ &oldloc, loc, NULL);
+ loc_wipe (&oldloc);
+ return 0;
+}
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
+int
+afr_sh_nameless_lookup_cbk (call_frame_t *impunge_frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ if (op_ret < 0) {
+ afr_sh_entry_impunge_create_file (impunge_frame, this,
+ (long)cookie);
+ } else {
+ afr_sh_entry_impunge_hardlink (impunge_frame, this,
+ (long)cookie);
+ }
+ return 0;
+}
- gf_log (this->name, GF_LOG_DEBUG,
- "creating missing file %s on %s",
- impunge_local->loc.path,
- priv->children[child_index]->name);
+int
+afr_sh_entry_impunge_check_hardlink (call_frame_t *impunge_frame,
+ xlator_t *this,
+ int child_index, struct iatt *stbuf)
+{
+ afr_private_t *priv = NULL;
+ call_frame_t *frame = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ afr_self_heal_t *sh = NULL;
+ loc_t *loc = NULL;
+ dict_t *xattr_req = NULL;
+ loc_t oldloc = {0};
+ int ret = -1;
+
+ priv = this->private;
+ AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh,
+ frame, local, sh);
+ loc = &impunge_local->loc;
+
+ xattr_req = dict_new ();
+ if (!xattr_req)
+ goto out;
+ oldloc.inode = inode_ref (loc->inode);
+ uuid_copy (oldloc.gfid, stbuf->ia_gfid);
+
+ STACK_WIND_COOKIE (impunge_frame, afr_sh_nameless_lookup_cbk,
+ (void *) (long) child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->lookup,
+ &oldloc, xattr_req);
+ ret = 0;
+out:
+ if (xattr_req)
+ dict_unref (xattr_req);
+ loc_wipe (&oldloc);
+ if (ret)
+ sh->impunge_done (frame, this, -1, ENOMEM);
+ return 0;
+}
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_newfile_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->mknod,
- &impunge_local->loc,
- stbuf->st_mode, stbuf->st_rdev);
+int
+afr_sh_entry_impunge_mknod (call_frame_t *impunge_frame, xlator_t *this,
+ int child_index, struct iatt *stbuf)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+ dict_t *dict = NULL;
+ int ret = 0;
+
+ priv = this->private;
+ impunge_local = impunge_frame->local;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "creating missing file %s on %s",
+ impunge_local->loc.path,
+ priv->children[child_index]->name);
+
+ dict = dict_new ();
+ if (!dict)
+ gf_log (this->name, GF_LOG_ERROR, "Out of memory");
+
+ GF_ASSERT (!uuid_is_null (stbuf->ia_gfid));
+ ret = afr_set_dict_gfid (dict, stbuf->ia_gfid);
+ if (ret)
+ gf_log (this->name, GF_LOG_INFO, "%s: gfid set failed",
+ impunge_local->loc.path);
+
+ /*
+ * Reason for adding GLUSTERFS_INTERNAL_FOP_KEY :
+ *
+ * Problem:
+ * While a brick is down in a replica pair, lets say the user creates
+ * one file(file-A) and a hard link to that file(h-file-A). After the
+ * brick comes back up, entry self-heal is attempted on parent dir of
+ * these two files. As part of readdir in self-heal it reads both the
+ * entries file-A and h-file-A for both of them it does name less lookup
+ * to check if there are any hardlinks already present in the
+ * destination brick. It finds that there are no hard links already
+ * present for files file-A, h-file-A. Self-heal does mknods for both
+ * file-A and h-file-A. This leads to file-A and h-file-A not being
+ * hardlinks anymore.
+ *
+ * Fix: (More like shrinking of race-window, the race itself is still
+ * present in posix-mknod).
+ * If mknod comes with the presence of GLUSTERFS_INTERNAL_FOP_KEY then
+ * posix_mknod checks if there are already any gfid-links and does
+ * link() instead of mknod. There still can be a race where two
+ * posix_mknods same gfid see that
+ * gfid-link file is not present and proceeds with mknods and result in
+ * two different files with same gfid.
+ */
+ ret = dict_set_str (dict, GLUSTERFS_INTERNAL_FOP_KEY, "yes");
+ if (ret)
+ gf_log (this->name, GF_LOG_INFO, "%s: %s set failed",
+ impunge_local->loc.path, GLUSTERFS_INTERNAL_FOP_KEY);
+
+ STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_newfile_cbk,
+ (void *) (long) child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->mknod,
+ &impunge_local->loc,
+ st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type),
+ makedev (ia_major (stbuf->ia_rdev),
+ ia_minor (stbuf->ia_rdev)), 0, dict);
+
+ if (dict)
+ dict_unref (dict);
- return 0;
+ return 0;
}
int
afr_sh_entry_impunge_mkdir (call_frame_t *impunge_frame, xlator_t *this,
- int child_index, struct stat *stbuf)
+ int child_index, struct iatt *stbuf)
{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+ dict_t *dict = NULL;
+ int ret = 0;
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
+ priv = this->private;
+ impunge_local = impunge_frame->local;
- gf_log (this->name, GF_LOG_DEBUG,
- "creating missing directory %s on %s",
- impunge_local->loc.path,
- priv->children[child_index]->name);
+ dict = dict_new ();
+ if (!dict) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out of memory");
+ return 0;
+ }
+
+ GF_ASSERT (!uuid_is_null (stbuf->ia_gfid));
+ ret = afr_set_dict_gfid (dict, stbuf->ia_gfid);
+ if (ret)
+ gf_log (this->name, GF_LOG_INFO, "%s: gfid set failed",
+ impunge_local->loc.path);
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "creating missing directory %s on %s",
+ impunge_local->loc.path,
+ priv->children[child_index]->name);
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_newfile_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->mkdir,
- &impunge_local->loc, stbuf->st_mode);
+ STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_newfile_cbk,
+ (void *) (long) child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->mkdir,
+ &impunge_local->loc,
+ st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type),
+ 0, dict);
- return 0;
+ if (dict)
+ dict_unref (dict);
+
+ return 0;
}
int
afr_sh_entry_impunge_symlink (call_frame_t *impunge_frame, xlator_t *this,
- int child_index, const char *linkname)
+ int child_index, const char *linkname)
{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
-
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "creating missing symlink %s -> %s on %s",
- impunge_local->loc.path, linkname,
- priv->children[child_index]->name);
-
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_newfile_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->symlink,
- linkname, &impunge_local->loc);
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+ dict_t *dict = NULL;
+ struct iatt *buf = NULL;
+ int ret = 0;
+
+ priv = this->private;
+ impunge_local = impunge_frame->local;
+
+ buf = &impunge_local->cont.dir_fop.buf;
+
+ dict = dict_new ();
+ if (!dict) {
+ afr_sh_entry_call_impunge_done (impunge_frame, this,
+ -1, ENOMEM);
+ goto out;
+ }
- return 0;
+ GF_ASSERT (!uuid_is_null (buf->ia_gfid));
+ ret = afr_set_dict_gfid (dict, buf->ia_gfid);
+ if (ret)
+ gf_log (this->name, GF_LOG_INFO,
+ "%s: dict set gfid failed",
+ impunge_local->loc.path);
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "creating missing symlink %s -> %s on %s",
+ impunge_local->loc.path, linkname,
+ priv->children[child_index]->name);
+
+ STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_newfile_cbk,
+ (void *) (long) child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->symlink,
+ linkname, &impunge_local->loc, 0, dict);
+
+ if (dict)
+ dict_unref (dict);
+out:
+ return 0;
}
int
-afr_sh_entry_impunge_readlink_cbk (call_frame_t *impunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- const char *linkname)
+afr_sh_entry_impunge_symlink_unlink_cbk (call_frame_t *impunge_frame,
+ void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int child_index = -1;
- call_frame_t *frame = NULL;
- int call_count = -1;
- int active_src = -1;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- frame = impunge_sh->sh_frame;
- active_src = impunge_sh->active_source;
-
- child_index = (long) cookie;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "readlink of %s on %s failed (%s)",
- impunge_local->loc.path,
- priv->children[active_src]->name,
- strerror (op_errno));
- goto out;
- }
-
- afr_sh_entry_impunge_symlink (impunge_frame, this, child_index,
- linkname);
- return 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ int child_index = -1;
+ int call_count = -1;
+
+ priv = this->private;
+ impunge_local = impunge_frame->local;
+ impunge_sh = &impunge_local->self_heal;
+
+ child_index = (long) cookie;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_INFO,
+ "unlink of %s on %s failed (%s)",
+ impunge_local->loc.path,
+ priv->children[child_index]->name,
+ strerror (op_errno));
+ goto out;
+ }
+
+ afr_sh_entry_impunge_symlink (impunge_frame, this, child_index,
+ impunge_sh->linkname);
+ return 0;
out:
- LOCK (&impunge_frame->lock);
- {
- call_count = --impunge_local->call_count;
- }
- UNLOCK (&impunge_frame->lock);
-
- if (call_count == 0) {
- AFR_STACK_DESTROY (impunge_frame);
- afr_sh_entry_impunge_entry_done (frame, this, active_src);
- }
-
- return 0;
-}
+ LOCK (&impunge_frame->lock);
+ {
+ call_count = --impunge_local->call_count;
+ }
+ UNLOCK (&impunge_frame->lock);
+ if (call_count == 0)
+ afr_sh_entry_call_impunge_done (impunge_frame, this,
+ op_ret, op_errno);
-int
-afr_sh_entry_impunge_readlink (call_frame_t *impunge_frame, xlator_t *this,
- int child_index, struct stat *stbuf)
-{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int active_src = -1;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- active_src = impunge_sh->active_source;
-
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_readlink_cbk,
- (void *) (long) child_index,
- priv->children[active_src],
- priv->children[active_src]->fops->readlink,
- &impunge_local->loc, 4096);
-
- return 0;
+ return 0;
}
int
-afr_sh_entry_impunge_recreate_lookup_cbk (call_frame_t *impunge_frame,
- void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct stat *buf,
- dict_t *xattr)
+afr_sh_entry_impunge_symlink_unlink (call_frame_t *impunge_frame, xlator_t *this,
+ int child_index)
{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int active_src = 0;
- int type = 0;
- int child_index = 0;
- call_frame_t *frame = NULL;
- int call_count = 0;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- frame = impunge_sh->sh_frame;
-
- child_index = (long) cookie;
-
- active_src = impunge_sh->active_source;
-
- if (op_ret != 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "looking up %s on %s (for %s) failed (%s)",
- impunge_local->loc.path,
- priv->children[active_src]->name,
- priv->children[child_index]->name,
- strerror (op_errno));
- goto out;
- }
-
- impunge_local->cont.lookup.buf = *buf;
- type = (buf->st_mode & S_IFMT);
-
- switch (type) {
- case S_IFSOCK:
- case S_IFREG:
- case S_IFBLK:
- case S_IFCHR:
- case S_IFIFO:
- afr_sh_entry_impunge_mknod (impunge_frame, this,
- child_index, buf);
- break;
- case S_IFLNK:
- afr_sh_entry_impunge_readlink (impunge_frame, this,
- child_index, buf);
- break;
- case S_IFDIR:
- afr_sh_entry_impunge_mkdir (impunge_frame, this,
- child_index, buf);
- break;
- default:
- gf_log (this->name, GF_LOG_ERROR,
- "%s has unknown file type on %s: 0%o",
- impunge_local->loc.path,
- priv->children[active_src]->name, type);
- goto out;
- break;
- }
-
- return 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
-out:
- LOCK (&impunge_frame->lock);
- {
- call_count = --impunge_local->call_count;
- }
- UNLOCK (&impunge_frame->lock);
-
- if (call_count == 0) {
- AFR_STACK_DESTROY (impunge_frame);
- afr_sh_entry_impunge_entry_done (frame, this, active_src);
- }
-
- return 0;
+ priv = this->private;
+ impunge_local = impunge_frame->local;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "unlinking symlink %s with wrong target on %s",
+ impunge_local->loc.path,
+ priv->children[child_index]->name);
+
+ STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_symlink_unlink_cbk,
+ (void *) (long) child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->unlink,
+ &impunge_local->loc, 0, NULL);
+
+ return 0;
}
int
-afr_sh_entry_impunge_recreate (call_frame_t *impunge_frame, xlator_t *this,
- int child_index)
+afr_sh_entry_impunge_readlink_sink_cbk (call_frame_t *impunge_frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ const char *linkname, struct iatt *sbuf, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int active_src = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ int child_index = -1;
+ int call_count = -1;
+ int active_src = -1;
+
+ priv = this->private;
+ impunge_local = impunge_frame->local;
+ impunge_sh = &impunge_local->self_heal;
+ active_src = impunge_sh->active_source;
+
+ child_index = (long) cookie;
+
+ if ((op_ret == -1) && (op_errno != ENOENT)) {
+ gf_log (this->name, GF_LOG_INFO,
+ "readlink of %s on %s failed (%s)",
+ impunge_local->loc.path,
+ priv->children[active_src]->name,
+ strerror (op_errno));
+ goto out;
+ }
+
+ /* symlink doesn't exist on the sink */
+
+ if ((op_ret == -1) && (op_errno == ENOENT)) {
+ afr_sh_entry_impunge_symlink (impunge_frame, this,
+ child_index, impunge_sh->linkname);
+ return 0;
+ }
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
+ /* symlink exists on the sink, so check if targets match */
+
+ if (strcmp (linkname, impunge_sh->linkname) == 0) {
+ /* targets match, nothing to do */
+
+ goto out;
+ } else {
+ /*
+ * Hah! Sneaky wolf in sheep's clothing!
+ */
+ afr_sh_entry_impunge_symlink_unlink (impunge_frame, this,
+ child_index);
+ return 0;
+ }
- active_src = impunge_sh->active_source;
+out:
+ LOCK (&impunge_frame->lock);
+ {
+ call_count = --impunge_local->call_count;
+ }
+ UNLOCK (&impunge_frame->lock);
- STACK_WIND_COOKIE (impunge_frame,
- afr_sh_entry_impunge_recreate_lookup_cbk,
- (void *) (long) child_index,
- priv->children[active_src],
- priv->children[active_src]->fops->lookup,
- &impunge_local->loc, 0);
+ if (call_count == 0)
+ afr_sh_entry_call_impunge_done (impunge_frame, this,
+ op_ret, op_errno);
- return 0;
+ return 0;
}
int
-afr_sh_entry_impunge_entry_cbk (call_frame_t *impunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct stat *buf, dict_t *x)
+afr_sh_entry_impunge_readlink_sink (call_frame_t *impunge_frame, xlator_t *this,
+ int child_index)
{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int call_count = 0;
- int child_index = 0;
- call_frame_t *frame = NULL;
- int active_src = 0;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- frame = impunge_sh->sh_frame;
- child_index = (long) cookie;
- active_src = impunge_sh->active_source;
-
- if (op_ret == -1 && op_errno == ENOENT) {
- /* decrease call_count in recreate-callback */
- gf_log (this->name, GF_LOG_TRACE,
- "missing entry %s on %s",
- impunge_local->loc.path,
- priv->children[child_index]->name);
-
- afr_sh_entry_impunge_recreate (impunge_frame, this,
- child_index);
- return 0;
- }
-
- if (op_ret == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "%s exists under %s",
- impunge_local->loc.path,
- priv->children[child_index]->name);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "looking up %s under %s failed (%s)",
- impunge_local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- }
-
- LOCK (&impunge_frame->lock);
- {
- call_count = --impunge_local->call_count;
- }
- UNLOCK (&impunge_frame->lock);
-
- if (call_count == 0) {
- AFR_STACK_DESTROY (impunge_frame);
- afr_sh_entry_impunge_entry_done (frame, this, active_src);
- }
-
- return 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+
+ priv = this->private;
+ impunge_local = impunge_frame->local;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "checking symlink target of %s on %s",
+ impunge_local->loc.path, priv->children[child_index]->name);
+
+ STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_readlink_sink_cbk,
+ (void *) (long) child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->readlink,
+ &impunge_local->loc, 4096, NULL);
+
+ return 0;
}
int
-afr_sh_entry_impunge_entry (call_frame_t *frame, xlator_t *this,
- char *name)
+afr_sh_entry_impunge_readlink_cbk (call_frame_t *impunge_frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ const char *linkname, struct iatt *sbuf, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int ret = -1;
- call_frame_t *impunge_frame = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int active_src = 0;
- int i = 0;
- int call_count = 0;
- int op_errno = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- active_src = sh->active_source;
-
- if ((strcmp (name, ".") == 0)
- || (strcmp (name, "..") == 0)) {
- gf_log (this->name, GF_LOG_TRACE,
- "skipping inspection of %s under %s",
- name, local->loc.path);
- goto out;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "inspecting existance of %s under %s",
- name, local->loc.path);
-
- impunge_frame = copy_frame (frame);
- if (!impunge_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
-
- ALLOC_OR_GOTO (impunge_local, afr_local_t, out);
-
- impunge_frame->local = impunge_local;
- impunge_sh = &impunge_local->self_heal;
- impunge_sh->sh_frame = frame;
- impunge_sh->active_source = active_src;
-
- ret = build_child_loc (this, &impunge_local->loc, &local->loc, name);
- if (ret != 0) {
- goto out;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (i == active_src)
- continue;
- if (local->child_up[i] == 0)
- continue;
- if (sh->sources[i] == 1)
- continue;
- call_count++;
- }
-
- impunge_local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (i == active_src)
- continue;
- if (local->child_up[i] == 0)
- continue;
- if (sh->sources[i] == 1)
- continue;
-
- gf_log (this->name, GF_LOG_TRACE,
- "looking up %s on %s", impunge_local->loc.path,
- priv->children[i]->name);
-
- STACK_WIND_COOKIE (impunge_frame,
- afr_sh_entry_impunge_entry_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->lookup,
- &impunge_local->loc, 0);
-
- if (!--call_count)
- break;
- }
-
- ret = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ int child_index = -1;
+ int call_count = -1;
+ int active_src = -1;
+
+ priv = this->private;
+ impunge_local = impunge_frame->local;
+ impunge_sh = &impunge_local->self_heal;
+ active_src = impunge_sh->active_source;
+
+ child_index = (long) cookie;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_INFO,
+ "readlink of %s on %s failed (%s)",
+ impunge_local->loc.path,
+ priv->children[active_src]->name,
+ strerror (op_errno));
+ goto out;
+ }
+
+ impunge_sh->linkname = gf_strdup (linkname);
+ afr_sh_entry_impunge_readlink_sink (impunge_frame, this, child_index);
+
+ return 0;
+
out:
- if (ret == -1)
- afr_sh_entry_impunge_entry_done (frame, this, active_src);
-
- return 0;
+ LOCK (&impunge_frame->lock);
+ {
+ call_count = --impunge_local->call_count;
+ }
+ UNLOCK (&impunge_frame->lock);
+
+ if (call_count == 0)
+ afr_sh_entry_call_impunge_done (impunge_frame, this,
+ op_ret, op_errno);
+
+ return 0;
}
int
-afr_sh_entry_impunge_readdir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- gf_dirent_t *entries)
+afr_sh_entry_impunge_readlink (call_frame_t *impunge_frame, xlator_t *this,
+ int child_index, struct iatt *stbuf)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- gf_dirent_t *entry = NULL;
- off_t last_offset = 0;
- int active_src = 0;
- int entry_count = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- active_src = sh->active_source;
-
- if (op_ret <= 0) {
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "readdir of %s on subvolume %s failed (%s)",
- local->loc.path,
- priv->children[active_src]->name,
- strerror (op_errno));
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "readdir of %s on subvolume %s complete",
- local->loc.path,
- priv->children[active_src]->name);
- }
-
- afr_sh_entry_impunge_all (frame, this);
- return 0;
- }
-
- list_for_each_entry (entry, &entries->list, list) {
- last_offset = entry->d_off;
- entry_count++;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "readdir'ed %d entries from %s",
- entry_count, priv->children[active_src]->name);
-
- sh->offset = last_offset;
- local->call_count = entry_count;
-
- list_for_each_entry (entry, &entries->list, list) {
- afr_sh_entry_impunge_entry (frame, this, entry->d_name);
- }
-
- return 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ int active_src = -1;
+
+ priv = this->private;
+ impunge_local = impunge_frame->local;
+ impunge_sh = &impunge_local->self_heal;
+ active_src = impunge_sh->active_source;
+ impunge_local->cont.dir_fop.buf = *stbuf;
+
+ STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_readlink_cbk,
+ (void *) (long) child_index,
+ priv->children[active_src],
+ priv->children[active_src]->fops->readlink,
+ &impunge_local->loc, 4096, NULL);
+
+ return 0;
}
-
int
-afr_sh_entry_impunge_subvol (call_frame_t *frame, xlator_t *this,
- int active_src)
+afr_sh_entry_impunge_create (call_frame_t *impunge_frame, xlator_t *this,
+ int child_index)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- STACK_WIND (frame, afr_sh_entry_impunge_readdir_cbk,
- priv->children[active_src],
- priv->children[active_src]->fops->readdir,
- sh->healing_fd, sh->block_size, sh->offset);
+ call_frame_t *frame = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ ia_type_t type = IA_INVAL;
+ int active_src = 0;
+ struct iatt *buf = NULL;
+
+ AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh,
+ frame, local, sh);
+ active_src = impunge_sh->active_source;
+ afr_update_loc_gfids (&impunge_local->loc, &impunge_sh->entrybuf,
+ &impunge_sh->parentbuf);
+
+ buf = &impunge_sh->entrybuf;
+ type = buf->ia_type;
+
+ switch (type) {
+ case IA_IFSOCK:
+ case IA_IFREG:
+ case IA_IFBLK:
+ case IA_IFCHR:
+ case IA_IFIFO:
+ case IA_IFLNK:
+ afr_sh_entry_impunge_check_hardlink (impunge_frame, this,
+ child_index, buf);
+ break;
+ case IA_IFDIR:
+ afr_sh_entry_impunge_mkdir (impunge_frame, this,
+ child_index, buf);
+ break;
+ default:
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s has unknown file type on %s: 0%o",
+ impunge_local->loc.path,
+ priv->children[active_src]->name, type);
+ sh->impunge_done (frame, this, -1, EINVAL);
+ break;
+ }
- return 0;
+ return 0;
}
-
int
-afr_sh_entry_impunge_all (call_frame_t *frame, xlator_t *this)
+afr_sh_entry_impunge_create_file (call_frame_t *impunge_frame, xlator_t *this,
+ int child_index)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int active_src = -1;
+ call_frame_t *frame = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ ia_type_t type = IA_INVAL;
+ int active_src = 0;
+ struct iatt *buf = NULL;
+
+ AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh,
+ frame, local, sh);
+ active_src = impunge_sh->active_source;
+ buf = &impunge_sh->entrybuf;
+ type = buf->ia_type;
+
+ switch (type) {
+ case IA_IFSOCK:
+ case IA_IFREG:
+ case IA_IFBLK:
+ case IA_IFCHR:
+ case IA_IFIFO:
+ afr_sh_entry_impunge_mknod (impunge_frame, this,
+ child_index, buf);
+ break;
+ case IA_IFLNK:
+ afr_sh_entry_impunge_readlink (impunge_frame, this,
+ child_index, buf);
+ break;
+ default:
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s has unknown file type on %s: 0%o",
+ impunge_local->loc.path,
+ priv->children[active_src]->name, type);
+ sh->impunge_done (frame, this, -1, EINVAL);
+ break;
+ }
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
+ return 0;
+}
+
+gf_boolean_t
+afr_sh_need_recreate (afr_self_heal_t *impunge_sh, unsigned int child,
+ unsigned int child_count)
+{
+ gf_boolean_t recreate = _gf_false;
- sh->offset = 0;
+ GF_ASSERT (impunge_sh->child_errno);
- active_src = next_active_source (frame, this, sh->active_source);
- sh->active_source = active_src;
+ if (child == impunge_sh->active_source)
+ goto out;
- if (sh->op_failed) {
- afr_sh_entry_finish (frame, this);
- return 0;
- }
+ if (IA_IFLNK == impunge_sh->entrybuf.ia_type) {
+ recreate = _gf_true;
+ goto out;
+ }
- if (active_src == -1) {
- /* completed creating missing files on all subvolumes */
- afr_sh_entry_expunge_all (frame, this);
- return 0;
- }
+ if (impunge_sh->child_errno[child] == ENOENT)
+ recreate = _gf_true;
+out:
+ return recreate;
+}
- gf_log (this->name, GF_LOG_TRACE,
- "impunging entries of %s on %s to other sinks",
- local->loc.path, priv->children[active_src]->name);
+unsigned int
+afr_sh_recreate_count (afr_self_heal_t *impunge_sh, int *sources,
+ unsigned int child_count)
+{
+ int count = 0;
+ int i = 0;
- afr_sh_entry_impunge_subvol (frame, this, active_src);
+ for (i = 0; i < child_count; i++) {
+ if (afr_sh_need_recreate (impunge_sh, i, child_count))
+ count++;
+ }
- return 0;
+ return count;
}
-
int
-afr_sh_entry_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+afr_sh_entry_call_impunge_recreate (call_frame_t *impunge_frame,
+ xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int child_index = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- child_index = (long) cookie;
-
- /* TODO: some of the open's might fail.
- In that case, modify cleanup fn to send flush on those
- fd's which are already open */
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "opendir of %s failed on child %s (%s)",
- local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- sh->op_failed = 1;
- }
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- if (sh->op_failed) {
- afr_sh_entry_finish (frame, this);
- return 0;
- }
- gf_log (this->name, GF_LOG_TRACE,
- "fd for %s opened, commencing sync",
- local->loc.path);
-
- sh->active_source = -1;
- afr_sh_entry_impunge_all (frame, this);
- }
-
- return 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ unsigned int recreate_count = 0;
+ int i = 0;
+ int active_src = 0;
+
+ priv = this->private;
+ AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh,
+ frame, local, sh);
+ active_src = impunge_sh->active_source;
+ impunge_sh->entrybuf = impunge_sh->buf[active_src];
+ impunge_sh->parentbuf = impunge_sh->parentbufs[active_src];
+ recreate_count = afr_sh_recreate_count (impunge_sh, sh->sources,
+ priv->child_count);
+ if (!recreate_count) {
+ afr_sh_entry_call_impunge_done (impunge_frame, this, 0, 0);
+ goto out;
+ }
+ impunge_local->call_count = recreate_count;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!impunge_local->child_up[i]) {
+ impunge_sh->child_errno[i] = ENOTCONN;
+ continue;
+ }
+ if (!afr_sh_need_recreate (impunge_sh, i, priv->child_count)) {
+ impunge_sh->child_errno[i] = EEXIST;
+ continue;
+ }
+ }
+ for (i = 0; i < priv->child_count; i++) {
+ if (!afr_sh_need_recreate (impunge_sh, i, priv->child_count))
+ continue;
+ (void)afr_sh_entry_impunge_create (impunge_frame, this, i);
+ recreate_count--;
+ }
+ GF_ASSERT (!recreate_count);
+out:
+ return 0;
}
+void
+afr_sh_entry_common_lookup_done (call_frame_t *impunge_frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *impunge_local = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ unsigned int gfid_miss_count = 0;
+ unsigned int children_up_count = 0;
+ uuid_t gfid = {0};
+ int active_src = 0;
+
+ priv = this->private;
+ AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh,
+ frame, local, sh);
+ active_src = impunge_sh->active_source;
+
+ if (op_ret < 0)
+ goto done;
+ if (impunge_sh->child_errno[active_src]) {
+ op_ret = -1;
+ op_errno = impunge_sh->child_errno[active_src];
+ goto done;
+ }
+
+ gfid_miss_count = afr_gfid_missing_count (this->name,
+ impunge_sh->success_children,
+ impunge_sh->buf, priv->child_count,
+ impunge_local->loc.path);
+ children_up_count = afr_up_children_count (impunge_local->child_up,
+ priv->child_count);
+ if ((gfid_miss_count == children_up_count) &&
+ (children_up_count < priv->child_count)) {
+ op_ret = -1;
+ op_errno = ENODATA;
+ gf_log (this->name, GF_LOG_ERROR, "Not all children are up, "
+ "gfid should not be assigned in this state for %s",
+ impunge_local->loc.path);
+ goto done;
+ }
+
+ if (gfid_miss_count) {
+ afr_update_gfid_from_iatts (gfid, impunge_sh->buf,
+ impunge_sh->success_children,
+ priv->child_count);
+ if (uuid_is_null (gfid)) {
+ sh->entries_skipped = _gf_true;
+ gf_log (this->name, GF_LOG_INFO, "%s: Skipping entry "
+ "self-heal because of gfid absence",
+ impunge_local->loc.path);
+ goto done;
+ }
+ afr_sh_common_lookup (impunge_frame, this, &impunge_local->loc,
+ afr_sh_entry_common_lookup_done, gfid,
+ AFR_LOOKUP_FAIL_CONFLICTS |
+ AFR_LOOKUP_FAIL_MISSING_GFIDS,
+ NULL);
+ } else {
+ afr_sh_entry_call_impunge_recreate (impunge_frame, this);
+ }
+ return;
+done:
+ afr_sh_entry_call_impunge_done (impunge_frame, this,
+ op_ret, op_errno);
+ return;
+}
int
-afr_sh_entry_open (call_frame_t *frame, xlator_t *this)
+afr_sh_entry_impunge_entry (call_frame_t *frame, xlator_t *this,
+ gf_dirent_t *entry)
{
- int i = 0;
- int call_count = 0;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_self_heal_t *impunge_sh = NULL;
+ int ret = -1;
+ call_frame_t *impunge_frame = NULL;
+ afr_local_t *impunge_local = NULL;
+ int active_src = 0;
+ int op_errno = 0;
+ int op_ret = -1;
- int source = -1;
- int *sources = NULL;
+ local = frame->local;
+ sh = &local->self_heal;
- fd_t *fd = NULL;
+ active_src = sh->active_source;
+ sh->impunge_done = afr_sh_entry_impunge_entry_done;
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- afr_self_heal_t *sh = NULL;
+ if (can_skip_entry_self_heal (entry->d_name, &local->loc)) {
+ op_ret = 0;
+ goto out;
+ }
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ gf_log (this->name, GF_LOG_TRACE,
+ "inspecting existence of %s under %s",
+ entry->d_name, local->loc.path);
- source = local->self_heal.source;
- sources = local->self_heal.sources;
+ ret = afr_impunge_frame_create (frame, this, active_src,
+ &impunge_frame);
+ if (ret) {
+ op_errno = -ret;
+ goto out;
+ }
- sh->block_size = 131072;
- sh->offset = 0;
+ impunge_local = impunge_frame->local;
+ impunge_sh = &impunge_local->self_heal;
+ ret = afr_build_child_loc (this, &impunge_local->loc, &local->loc,
+ entry->d_name);
+ loc_copy (&impunge_sh->parent_loc, &local->loc);
+ if (ret != 0) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- call_count = sh->active_sinks;
- if (source != -1)
- call_count++;
+ afr_sh_common_lookup (impunge_frame, this, &impunge_local->loc,
+ afr_sh_entry_common_lookup_done, NULL,
+ AFR_LOOKUP_FAIL_CONFLICTS, NULL);
- local->call_count = call_count;
+ op_ret = 0;
+out:
+ if (ret) {
+ if (impunge_frame)
+ AFR_STACK_DESTROY (impunge_frame);
+ sh->impunge_done (frame, this, op_ret, op_errno);
+ }
- fd = fd_create (local->loc.inode, frame->root->pid);
- sh->healing_fd = fd;
+ return 0;
+}
+
+
+int
+afr_sh_entry_impunge_readdir_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ gf_dirent_t *entries, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ gf_dirent_t *entry = NULL;
+ off_t last_offset = 0;
+ int active_src = 0;
+ int entry_count = 0;
+
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
- if (source != -1) {
- gf_log (this->name, GF_LOG_TRACE,
- "opening directory %s on subvolume %s (source)",
- local->loc.path, priv->children[source]->name);
+ active_src = sh->active_source;
- /* open source */
- STACK_WIND_COOKIE (frame, afr_sh_entry_opendir_cbk,
- (void *) (long) source,
- priv->children[source],
- priv->children[source]->fops->opendir,
- &local->loc, fd);
- call_count--;
- }
+ if (op_ret <= 0) {
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "readdir of %s on subvolume %s failed (%s)",
+ local->loc.path,
+ priv->children[active_src]->name,
+ strerror (op_errno));
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ } else {
+ gf_log (this->name, GF_LOG_TRACE,
+ "readdir of %s on subvolume %s complete",
+ local->loc.path,
+ priv->children[active_src]->name);
+ }
- /* open sinks */
- for (i = 0; i < priv->child_count; i++) {
- if (sources[i] || !local->child_up[i])
- continue;
+ afr_sh_entry_impunge_all (frame, this);
+ return 0;
+ }
- gf_log (this->name, GF_LOG_TRACE,
- "opening directory %s on subvolume %s (sink)",
- local->loc.path, priv->children[i]->name);
+ list_for_each_entry (entry, &entries->list, list) {
+ last_offset = entry->d_off;
+ entry_count++;
+ }
- STACK_WIND_COOKIE (frame, afr_sh_entry_opendir_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->opendir,
- &local->loc, fd);
+ gf_log (this->name, GF_LOG_DEBUG,
+ "readdir'ed %d entries from %s",
+ entry_count, priv->children[active_src]->name);
- if (!--call_count)
- break;
- }
+ sh->offset = last_offset;
+ local->call_count = entry_count;
- return 0;
+ list_for_each_entry (entry, &entries->list, list) {
+ afr_sh_entry_impunge_entry (frame, this, entry);
+ }
+
+ return 0;
}
int
-afr_sh_entry_sync_prepare (call_frame_t *frame, xlator_t *this)
+afr_sh_entry_impunge_subvol (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int active_sinks = 0;
- int source = 0;
- int i = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- source = sh->source;
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->sources[i] == 0 && local->child_up[i] == 1) {
- active_sinks++;
- sh->success[i] = 1;
- }
- }
- if (source != -1)
- sh->success[source] = 1;
-
- if (active_sinks == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "no active sinks for self-heal on dir %s",
- local->loc.path);
- afr_sh_entry_finish (frame, this);
- return 0;
- }
- if (source == -1 && active_sinks < 2) {
- gf_log (this->name, GF_LOG_TRACE,
- "cannot sync with 0 sources and 1 sink on dir %s",
- local->loc.path);
- afr_sh_entry_finish (frame, this);
- return 0;
- }
- sh->active_sinks = active_sinks;
-
- if (source != -1)
- gf_log (this->name, GF_LOG_DEBUG,
- "self-healing directory %s from subvolume %s to "
- "%d other",
- local->loc.path, priv->children[source]->name,
- active_sinks);
- else
- gf_log (this->name, GF_LOG_DEBUG,
- "no active sources for %s found. "
- "merging all entries as a conservative decision",
- local->loc.path);
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ int32_t active_src = 0;
+
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+ active_src = sh->active_source;
+ gf_log (this->name, GF_LOG_DEBUG, "%s: readdir from offset %zd",
+ local->loc.path, sh->offset);
- afr_sh_entry_open (frame, this);
+ STACK_WIND (frame, afr_sh_entry_impunge_readdir_cbk,
+ priv->children[active_src],
+ priv->children[active_src]->fops->readdirp,
+ sh->healing_fd, sh->block_size, sh->offset, NULL);
- return 0;
+ return 0;
}
int
-afr_sh_entry_fix (call_frame_t *frame, xlator_t *this)
+afr_sh_entry_impunge_all (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int source = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ int active_src = -1;
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
- afr_sh_build_pending_matrix (priv, sh->pending_matrix, sh->xattr,
- priv->child_count, AFR_ENTRY_TRANSACTION);
+ sh->offset = 0;
- afr_sh_print_pending_matrix (sh->pending_matrix, this);
+ active_src = next_active_source (frame, this, sh->active_source);
+ sh->active_source = active_src;
- afr_sh_mark_sources (sh, priv->child_count,
- AFR_SELF_HEAL_ENTRY);
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
+ afr_sh_entry_finish (frame, this);
+ return 0;
+ }
- afr_sh_supress_errenous_children (sh->sources, sh->child_errno,
- priv->child_count);
+ if (active_src == -1) {
+ /* completed creating missing files on all subvolumes */
+ afr_sh_entry_erase_pending (frame, this);
+ return 0;
+ }
- source = afr_sh_select_source (sh->sources, priv->child_count);
- sh->source = source;
+ gf_log (this->name, GF_LOG_TRACE,
+ "impunging entries of %s on %s to other sinks",
+ local->loc.path, priv->children[active_src]->name);
- afr_sh_entry_sync_prepare (frame, this);
+ afr_sh_entry_impunge_subvol (frame, this);
- return 0;
+ return 0;
}
-
int
-afr_sh_entry_lookup_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct stat *buf, dict_t *xattr)
+afr_sh_entry_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- int call_count = -1;
- int child_index = (long) cookie;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int child_index = 0;
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ child_index = (long) cookie;
+
+ /* TODO: some of the open's might fail.
+ In that case, modify cleanup fn to send flush on those
+ fd's which are already open */
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "opendir of %s failed on child %s (%s)",
+ local->loc.path,
+ priv->children[child_index]->name,
+ strerror (op_errno));
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ }
+ }
+ UNLOCK (&frame->lock);
- LOCK (&frame->lock);
- {
- if (op_ret != -1) {
- sh->xattr[child_index] = dict_ref (xattr);
- sh->buf[child_index] = *buf;
- }
- }
- UNLOCK (&frame->lock);
+ call_count = afr_frame_return (frame);
- call_count = afr_frame_return (frame);
+ if (call_count == 0) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
+ afr_sh_entry_finish (frame, this);
+ return 0;
+ }
+ gf_log (this->name, GF_LOG_TRACE,
+ "fd for %s opened, commencing sync",
+ local->loc.path);
- if (call_count == 0) {
- afr_sh_entry_fix (frame, this);
- }
+ sh->active_source = -1;
+ afr_sh_entry_expunge_all (frame, this);
+ }
- return 0;
+ return 0;
}
-
int
-afr_sh_entry_lookup (call_frame_t *frame, xlator_t *this)
+afr_sh_entry_open (call_frame_t *frame, xlator_t *this)
{
- afr_self_heal_t * sh = NULL;
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- dict_t *xattr_req = NULL;
- int ret = 0;
- int call_count = 0;
- int i = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- call_count = local->child_count;
-
- local->call_count = call_count;
-
- xattr_req = dict_new();
- if (xattr_req) {
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_set_uint64 (xattr_req,
- priv->pending_key[i],
- 3 * sizeof(int32_t));
- }
+ int i = 0;
+ int call_count = 0;
+
+ int source = -1;
+ int *sources = NULL;
+
+ fd_t *fd = NULL;
+
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ source = local->self_heal.source;
+ sources = local->self_heal.sources;
+
+ sh->block_size = priv->sh_readdir_size;
+ sh->offset = 0;
+
+ call_count = sh->active_sinks;
+ if (source != -1)
+ call_count++;
+
+ local->call_count = call_count;
+
+ fd = fd_create (local->loc.inode, frame->root->pid);
+ sh->healing_fd = fd;
+
+ if (source != -1) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "opening directory %s on subvolume %s (source)",
+ local->loc.path, priv->children[source]->name);
+
+ /* open source */
+ STACK_WIND_COOKIE (frame, afr_sh_entry_opendir_cbk,
+ (void *) (long) source,
+ priv->children[source],
+ priv->children[source]->fops->opendir,
+ &local->loc, fd, NULL);
+ call_count--;
}
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame,
- afr_sh_entry_lookup_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->lookup,
- &local->loc, xattr_req);
- if (!--call_count)
- break;
- }
- }
-
- if (xattr_req)
- dict_unref (xattr_req);
-
- return 0;
-}
+ /* open sinks */
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i] || !local->child_up[i])
+ continue;
+ gf_log (this->name, GF_LOG_TRACE,
+ "opening directory %s on subvolume %s (sink)",
+ local->loc.path, priv->children[i]->name);
+ STACK_WIND_COOKIE (frame, afr_sh_entry_opendir_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->opendir,
+ &local->loc, fd, NULL);
-int
-afr_sh_entry_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int call_count = 0;
- int child_index = (long) cookie;
-
- /* TODO: what if lock fails? */
-
- local = frame->local;
- sh = &local->self_heal;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- sh->op_failed = 1;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "locking inode of %s on child %d failed: %s",
- local->loc.path, child_index,
- strerror (op_errno));
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "inode of %s on child %d locked",
- local->loc.path, child_index);
- }
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- if (sh->op_failed == 1) {
- afr_sh_entry_finish (frame, this);
- return 0;
- }
-
- afr_sh_entry_lookup (frame, this);
- }
-
- return 0;
+ if (!--call_count)
+ break;
+ }
+
+ return 0;
}
int
-afr_sh_entry_lock (call_frame_t *frame, xlator_t *this)
+afr_sh_entry_sync_prepare (call_frame_t *frame, xlator_t *this)
{
- int i = 0;
- int call_count = 0;
-
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- afr_self_heal_t * sh = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int source = 0;
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ source = sh->source;
- call_count = local->child_count;
+ afr_sh_mark_source_sinks (frame, this);
+ if (source != -1)
+ sh->success[source] = 1;
- local->call_count = call_count;
+ if (sh->active_sinks == 0) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "no active sinks for self-heal on dir %s",
+ local->loc.path);
+ afr_sh_entry_finish (frame, this);
+ return 0;
+ }
+ if (source == -1 && sh->active_sinks < 2) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "cannot sync with 0 sources and 1 sink on dir %s",
+ local->loc.path);
+ afr_sh_entry_finish (frame, this);
+ return 0;
+ }
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- gf_log (this->name, GF_LOG_TRACE,
- "locking %s on subvolume %s",
- local->loc.path, priv->children[i]->name);
+ if (source != -1)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "self-healing directory %s from subvolume %s to "
+ "%d other",
+ local->loc.path, priv->children[source]->name,
+ sh->active_sinks);
+ else
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no active sources for %s found. "
+ "merging all entries as a conservative decision",
+ local->loc.path);
- STACK_WIND_COOKIE (frame, afr_sh_entry_lock_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->entrylk,
- this->name,
- &local->loc, NULL,
- ENTRYLK_LOCK_NB, ENTRYLK_WRLCK);
- if (!--call_count)
- break;
- }
- }
+ sh->actual_sh_started = _gf_true;
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_SYNC_BEGIN);
+ afr_sh_entry_open (frame, this);
- return 0;
+ return 0;
}
-int
-afr_self_heal_entry (call_frame_t *frame, xlator_t *this)
+void
+afr_sh_entry_fix (call_frame_t *frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int source = 0;
+ int nsources = 0;
+ int32_t subvol_status = 0;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ if (op_ret < 0) {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_set_error (sh, op_errno);
+ afr_sh_entry_finish (frame, this);
+ goto out;
+ }
+
+ if (sh->forced_merge) {
+ sh->source = -1;
+ goto heal;
+ }
+
+ nsources = afr_build_sources (this, sh->xattr, sh->buf,
+ sh->pending_matrix, sh->sources,
+ sh->success_children,
+ AFR_ENTRY_TRANSACTION, &subvol_status,
+ _gf_true);
+ if ((subvol_status & ALL_FOOLS) ||
+ (subvol_status & SPLIT_BRAIN)) {
+ gf_log (this->name, GF_LOG_INFO, "%s: Performing conservative "
+ "merge", local->loc.path);
+ source = -1;
+ memset (sh->sources, 0,
+ sizeof (*sh->sources) * priv->child_count);
+ } else if (nsources == 0) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "No self-heal needed for %s",
+ local->loc.path);
+
+ afr_sh_entry_finish (frame, this);
+ return;
+ } else {
+ source = afr_sh_select_source (sh->sources, priv->child_count);
+ }
+ sh->source = source;
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
+ afr_reset_children (sh->fresh_children, priv->child_count);
+ afr_get_fresh_children (sh->success_children, sh->sources,
+ sh->fresh_children, priv->child_count);
+ if (sh->source >= 0)
+ afr_inode_set_read_ctx (this, sh->inode, sh->source,
+ sh->fresh_children);
- if (local->need_entry_self_heal && priv->entry_self_heal) {
- afr_sh_entry_lock (frame, this);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "proceeding to completion on %s",
- local->loc.path);
- afr_sh_entry_done (frame, this);
- }
+heal:
+ afr_sh_entry_sync_prepare (frame, this);
+out:
+ return;
+}
+
+int
+afr_sh_post_nonblocking_entry_cbk (call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ sh = &local->self_heal;
+
+ if (int_lock->lock_op_ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Non Blocking entrylks "
+ "failed for %s.", local->loc.path);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_entry_done (frame, this);
+ } else {
+
+ gf_log (this->name, GF_LOG_DEBUG, "Non Blocking entrylks done "
+ "for %s. Proceeding to FOP", local->loc.path);
+ afr_sh_common_lookup (frame, this, &local->loc,
+ afr_sh_entry_fix, NULL,
+ AFR_LOOKUP_FAIL_CONFLICTS |
+ AFR_LOOKUP_FAIL_MISSING_GFIDS,
+ NULL);
+ }
- return 0;
+ return 0;
}
+int
+afr_self_heal_entry (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_self_heal_t *sh = NULL;
+
+ priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+
+ sh->sh_type_in_action = AFR_SELF_HEAL_ENTRY;
+
+ if (local->self_heal.do_entry_self_heal && priv->entry_self_heal) {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED);
+ afr_sh_entrylk (frame, this, &local->loc, NULL,
+ afr_sh_post_nonblocking_entry_cbk);
+ } else {
+ gf_log (this->name, GF_LOG_TRACE,
+ "proceeding to completion on %s",
+ local->loc.path);
+ afr_sh_entry_done (frame, this);
+ }
+
+ return 0;
+}
diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c
index 966b754b3..fd5da6cfd 100644
--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c
+++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include <libgen.h>
@@ -52,755 +43,728 @@
int
afr_sh_metadata_done (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
-// memset (sh->child_errno, 0, sizeof (int) * priv->child_count);
- memset (sh->buf, 0, sizeof (struct stat) * priv->child_count);
- memset (sh->success, 0, sizeof (int) * priv->child_count);
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i])
- dict_unref (sh->xattr[i]);
- sh->xattr[i] = NULL;
- }
-
- if (local->govinda_gOvinda) {
- gf_log (this->name, GF_LOG_DEBUG,
- "aborting selfheal of %s",
- local->loc.path);
- sh->completion_cbk (frame, this);
- } else {
- if (S_ISREG (local->cont.lookup.buf.st_mode)) {
- gf_log (this->name, GF_LOG_TRACE,
- "proceeding to data check on %s",
- local->loc.path);
- afr_self_heal_data (frame, this);
- return 0;
- }
-
- if (S_ISDIR (local->cont.lookup.buf.st_mode)) {
- gf_log (this->name, GF_LOG_TRACE,
- "proceeding to entry check on %s",
- local->loc.path);
- afr_self_heal_entry (frame, this);
- return 0;
- }
- gf_log (this->name, GF_LOG_DEBUG,
- "completed self heal of %s",
- local->loc.path);
-
- sh->completion_cbk (frame, this);
- }
-
- return 0;
-}
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ local = frame->local;
+ sh = &local->self_heal;
-int
-afr_sh_metadata_unlck_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- afr_local_t *local = NULL;
- int call_count = 0;
+ afr_sh_reset (frame, this);
+ if (IA_ISDIR (sh->type)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "proceeding to entry check on %s",
+ local->loc.path);
+ afr_self_heal_entry (frame, this);
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "proceeding to data check on %s",
+ local->loc.path);
+ afr_self_heal_data (frame, this);
+ }
+ return 0;
+}
- local = frame->local;
+int
+afr_sh_inode_unlock (call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
- call_count = afr_frame_return (frame);
+ local = frame->local;
+ int_lock = &local->internal_lock;
- if (call_count == 0)
- afr_sh_metadata_done (frame, this);
+ int_lock->lock_cbk = afr_sh_metadata_done;
+ afr_unlock (frame, this);
- return 0;
+ return 0;
}
-
int
afr_sh_metadata_finish (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- int call_count = 0;
- struct flock flock = {0, };
-
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- call_count = local->child_count;
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- flock.l_start = 0;
- flock.l_len = 0;
- flock.l_type = F_UNLCK;
-
- if (local->child_up[i]) {
- gf_log (this->name, GF_LOG_TRACE,
- "unlocking %s on subvolume %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND (frame, afr_sh_metadata_unlck_cbk,
- priv->children[i],
- priv->children[i]->fops->inodelk,
- this->name,
- &local->loc, F_SETLK, &flock);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-}
+ afr_sh_inode_unlock (frame, this);
+ return 0;
+}
int
-afr_sh_metadata_erase_pending_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xattr)
+afr_sh_metadata_fail (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
- LOCK (&frame->lock);
- {
- }
- UNLOCK (&frame->lock);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_metadata_finish (frame, this);
+ return 0;
+}
- call_count = afr_frame_return (frame);
+int
+afr_sh_metadata_erase_pending_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xattr, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int call_count = 0;
+ long i = 0;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+ sh = &local->self_heal;
+ i = (long)cookie;
+
+ if ((!IA_ISREG (sh->buf[sh->source].ia_type)) &&
+ (!IA_ISDIR (sh->buf[sh->source].ia_type))) {
+ afr_children_add_child (sh->fresh_children, i,
+ priv->child_count);
+ }
+ call_count = afr_frame_return (frame);
- if (call_count == 0)
- afr_sh_metadata_finish (frame, this);
+ if (call_count == 0) {
+ if ((!IA_ISREG (sh->buf[sh->source].ia_type)) &&
+ (!IA_ISDIR (sh->buf[sh->source].ia_type))) {
+ afr_inode_set_read_ctx (this, sh->inode, sh->source,
+ sh->fresh_children);
+ }
+ afr_sh_metadata_finish (frame, this);
+ }
- return 0;
+ return 0;
}
-
int
afr_sh_metadata_erase_pending (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int i = 0;
- dict_t **erase_xattr = NULL;
-
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- afr_sh_pending_to_delta (priv, sh->xattr, sh->delta_matrix,
- sh->success, priv->child_count,
- AFR_METADATA_TRANSACTION);
-
- erase_xattr = CALLOC (sizeof (*erase_xattr), priv->child_count);
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i]) {
- call_count++;
-
- erase_xattr[i] = get_new_dict();
- dict_ref (erase_xattr[i]);
- }
- }
-
- afr_sh_delta_to_xattr (priv, sh->delta_matrix, erase_xattr,
- priv->child_count, AFR_METADATA_TRANSACTION);
-
- local->call_count = call_count;
-
- if (call_count == 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "metadata of %s not healed on any subvolume",
- local->loc.path);
-
- afr_sh_metadata_finish (frame, this);
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (!erase_xattr[i])
- continue;
-
- gf_log (this->name, GF_LOG_TRACE,
- "erasing pending flags from %s on %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_metadata_erase_pending_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->loc,
- GF_XATTROP_ADD_ARRAY, erase_xattr[i]);
- if (!--call_count)
- break;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (erase_xattr[i]) {
- dict_unref (erase_xattr[i]);
- }
- }
- FREE (erase_xattr);
-
- return 0;
+ afr_sh_erase_pending (frame, this, AFR_METADATA_TRANSACTION,
+ afr_sh_metadata_erase_pending_cbk,
+ afr_sh_metadata_finish);
+ return 0;
}
int
afr_sh_metadata_sync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int child_index = 0;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int child_index = 0;
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
- child_index = (long) cookie;
+ child_index = (long) cookie;
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "setting attributes failed for %s on %s (%s)",
- local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_INFO,
+ "setting attributes failed for %s on %s (%s)",
+ local->loc.path,
+ priv->children[child_index]->name,
+ strerror (op_errno));
- sh->success[child_index] = 0;
- }
- }
- UNLOCK (&frame->lock);
+ sh->success[child_index] = 0;
+ }
+ }
+ UNLOCK (&frame->lock);
- call_count = afr_frame_return (frame);
+ call_count = afr_frame_return (frame);
- if (call_count == 0)
- afr_sh_metadata_erase_pending (frame, this);
+ if (call_count == 0) {
+ if (local->xattr_req) {
+ dict_unref (local->xattr_req);
+ local->xattr_req = NULL;
+ }
+ afr_sh_metadata_erase_pending (frame, this);
+ }
- return 0;
+ return 0;
}
int
-afr_sh_metadata_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+afr_sh_metadata_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
{
- afr_sh_metadata_sync_cbk (frame, cookie, this, op_ret, op_errno);
+ afr_sh_metadata_sync_cbk (frame, cookie, this, op_ret, op_errno, xdata);
- return 0;
+ return 0;
}
int
afr_sh_metadata_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_sh_metadata_sync_cbk (frame, cookie, this, op_ret, op_errno);
+ afr_sh_metadata_sync_cbk (frame, cookie, this, op_ret, op_errno, xdata);
- return 0;
+ return 0;
}
-
int
-afr_sh_metadata_sync (call_frame_t *frame, xlator_t *this, dict_t *xattr)
+afr_sh_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int source = 0;
- int active_sinks = 0;
- int call_count = 0;
- int i = 0;
- struct timespec ts[2];
-
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- source = sh->source;
- active_sinks = sh->active_sinks;
-
- /*
- * 4 calls per sink - chown, chmod, utimes, setxattr
- */
- if (xattr)
- call_count = active_sinks * 4;
- else
- call_count = active_sinks * 3;
-
- local->call_count = call_count;
-
-#ifdef HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC
- ts[0] = sh->buf[source].st_atim;
- ts[1] = sh->buf[source].st_mtim;
-#elif HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC
- ts[0] = sh->buf[source].st_atimespec;
- ts[1] = sh->buf[source].st_mtimespec;
-#else
- ts[0].tv_sec = sh->buf[source].st_atime;
- ts[1].tv_sec = sh->buf[source].st_mtime;
-#endif
+ int i = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (op_ret < 0) {
+ afr_sh_metadata_sync_cbk (frame, cookie,
+ this, -1, op_errno, xdata);
+ goto out;
+ }
- for (i = 0; i < priv->child_count; i++) {
- if (call_count == 0) {
- break;
- }
- if (sh->sources[i] || !local->child_up[i])
- continue;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "self-healing metadata of %s from %s to %s",
- local->loc.path, priv->children[source]->name,
- priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_metadata_attr_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->chown,
- &local->loc,
- sh->buf[source].st_uid,
- sh->buf[source].st_gid);
-
- STACK_WIND_COOKIE (frame, afr_sh_metadata_attr_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->chmod,
- &local->loc, sh->buf[source].st_mode);
-
- STACK_WIND_COOKIE (frame, afr_sh_metadata_attr_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->utimens,
- &local->loc, ts);
-
- call_count = call_count - 3;
-
- if (!xattr)
- continue;
-
- STACK_WIND_COOKIE (frame, afr_sh_metadata_xattr_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->setxattr,
- &local->loc, xattr, 0);
- call_count--;
- }
-
- return 0;
+ i = (long) cookie;
+
+ STACK_WIND_COOKIE (frame, afr_sh_metadata_xattr_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->setxattr,
+ &local->loc, local->xattr_req, 0, NULL);
+
+ out:
+ return 0;
+}
+
+inline void
+afr_prune_special_keys (dict_t *xattr_dict)
+{
+ dict_del (xattr_dict, GF_SELINUX_XATTR_KEY);
}
+inline void
+afr_prune_pending_keys (dict_t *xattr_dict, afr_private_t *priv)
+{
+ int i = 0;
+
+ for (; i < priv->child_count; i++) {
+ dict_del (xattr_dict, priv->pending_key[i]);
+ }
+}
int
-afr_sh_metadata_getxattr_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr)
+afr_sh_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr,
+ dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int source = 0;
+ int i = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
- int i;
+ priv = this->private;
+ local = frame->local;
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ if (op_ret < 0) {
+ afr_sh_metadata_sync_cbk (frame, cookie,
+ this, -1, op_errno, xdata);
+ goto out;
+ }
- source = sh->source;
+ afr_prune_pending_keys (xattr, priv);
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "getxattr of %s failed on subvolume %s (%s). proceeding without xattr",
- local->loc.path, priv->children[source]->name,
- strerror (op_errno));
+ afr_prune_special_keys (xattr);
- afr_sh_metadata_sync (frame, this, NULL);
- } else {
- for (i = 0; i < priv->child_count; i++) {
- dict_del (xattr, priv->pending_key[i]);
- }
+ i = (long) cookie;
- afr_sh_metadata_sync (frame, this, xattr);
- }
+ /* send removexattr in bulk via xdata */
+ STACK_WIND_COOKIE (frame, afr_sh_removexattr_cbk,
+ cookie,
+ priv->children[i],
+ priv->children[i]->fops->removexattr,
+ &local->loc, "", xattr);
- return 0;
+ out:
+ return 0;
}
-
int
-afr_sh_metadata_sync_prepare (call_frame_t *frame, xlator_t *this)
+afr_sh_metadata_sync (call_frame_t *frame, xlator_t *this, dict_t *xattr)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int active_sinks = 0;
- int source = 0;
- int i = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- source = sh->source;
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->sources[i] == 0 && local->child_up[i] == 1) {
- active_sinks++;
- sh->success[i] = 1;
- }
- }
- sh->success[source] = 1;
-
- if (active_sinks == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no active sinks for performing self-heal on file %s",
- local->loc.path);
- afr_sh_metadata_finish (frame, this);
- return 0;
- }
- sh->active_sinks = active_sinks;
-
- gf_log (this->name, GF_LOG_TRACE,
- "syncing metadata of %s from subvolume %s to %d active sinks",
- local->loc.path, priv->children[source]->name, active_sinks);
-
- STACK_WIND (frame, afr_sh_metadata_getxattr_cbk,
- priv->children[source],
- priv->children[source]->fops->getxattr,
- &local->loc, NULL);
-
- return 0;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int source = 0;
+ int active_sinks = 0;
+ int call_count = 0;
+ int i = 0;
+
+ struct iatt stbuf = {0,};
+ int32_t valid = 0;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ source = sh->source;
+ active_sinks = sh->active_sinks;
+
+ /*
+ * 2 calls per sink - setattr, setxattr
+ */
+ if (xattr) {
+ call_count = active_sinks * 2;
+ local->xattr_req = dict_ref (xattr);
+ } else
+ call_count = active_sinks;
+
+ local->call_count = call_count;
+
+ stbuf.ia_atime = sh->buf[source].ia_atime;
+ stbuf.ia_atime_nsec = sh->buf[source].ia_atime_nsec;
+ stbuf.ia_mtime = sh->buf[source].ia_mtime;
+ stbuf.ia_mtime_nsec = sh->buf[source].ia_mtime_nsec;
+
+ stbuf.ia_uid = sh->buf[source].ia_uid;
+ stbuf.ia_gid = sh->buf[source].ia_gid;
+
+ stbuf.ia_type = sh->buf[source].ia_type;
+ stbuf.ia_prot = sh->buf[source].ia_prot;
+
+ valid = GF_SET_ATTR_MODE |
+ GF_SET_ATTR_UID | GF_SET_ATTR_GID |
+ GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (call_count == 0) {
+ break;
+ }
+ if (sh->sources[i] || !local->child_up[i])
+ continue;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "self-healing metadata of %s from %s to %s",
+ local->loc.path, priv->children[source]->name,
+ priv->children[i]->name);
+
+ STACK_WIND_COOKIE (frame, afr_sh_metadata_setattr_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->setattr,
+ &local->loc, &stbuf, valid, NULL);
+
+ call_count--;
+
+ if (!xattr)
+ continue;
+
+ STACK_WIND_COOKIE (frame, afr_sh_getxattr_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->getxattr,
+ &local->loc, NULL, NULL);
+ call_count--;
+ }
+
+ return 0;
}
int
-afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this)
+afr_sh_metadata_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr,
+ dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int nsources = 0;
- int source = 0;
- int i = 0;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int source = 0;
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
- afr_sh_build_pending_matrix (priv, sh->pending_matrix, sh->xattr,
- priv->child_count,
- AFR_METADATA_TRANSACTION);
+ source = sh->source;
- afr_sh_print_pending_matrix (sh->pending_matrix, this);
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "getxattr of %s failed on subvolume %s (%s). proceeding without xattr",
+ local->loc.path, priv->children[source]->name,
+ strerror (op_errno));
+
+ afr_sh_metadata_sync (frame, this, NULL);
+ } else {
+ afr_prune_pending_keys (xattr, priv);
+ afr_sh_metadata_sync (frame, this, xattr);
+ }
- nsources = afr_sh_mark_sources (sh, priv->child_count,
- AFR_SELF_HEAL_METADATA);
+ return 0;
+}
- afr_sh_supress_errenous_children (sh->sources, sh->child_errno,
- priv->child_count);
+static void
+afr_set_metadata_sh_info_str (afr_local_t *local, afr_self_heal_t *sh,
+ xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+ char num[1024] = {0};
+ size_t len = 0;
+ char *string = NULL;
+ size_t off = 0;
+ char *source_child = " from source %s to";
+ char *format = " %s, ";
+ char *string_msg = " metadata self heal";
+ char *pending_matrix_str = NULL;
+ int down_child_present = 0;
+ int unknown_child_present = 0;
+ char *down_subvol_1 = " down subvolume is ";
+ char *unknown_subvol_1 = " unknown subvolume is";
+ char *down_subvol_2 = " down subvolumes are ";
+ char *unknown_subvol_2 = " unknown subvolumes are ";
+ int down_count = 0;
+ int unknown_count = 0;
+
+ priv = this->private;
+
+ pending_matrix_str = afr_get_pending_matrix_str (sh->pending_matrix,
+ this);
+
+ if (!pending_matrix_str)
+ pending_matrix_str = "";
+
+ len += snprintf (num, sizeof (num), "%s", string_msg);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if ((sh->source == i) && (local->child_up[i] == 1)) {
+ len += snprintf (num, sizeof (num), source_child,
+ priv->children[i]->name);
+ } else if ((local->child_up[i] == 1) && (sh->sources[i] == 0)) {
+ len += snprintf (num, sizeof (num), format,
+ priv->children[i]->name);
+ } else if (local->child_up[i] == 0) {
+ len += snprintf (num, sizeof (num), format,
+ priv->children[i]->name);
+ if (!down_child_present)
+ down_child_present = 1;
+ down_count++;
+ } else if (local->child_up[i] == -1) {
+ len += snprintf (num, sizeof (num), format,
+ priv->children[i]->name);
+ if (!unknown_child_present)
+ unknown_child_present = 1;
+ unknown_count++;
+ }
+ }
- if (nsources == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "No self-heal needed for %s",
- local->loc.path);
+ if (down_child_present) {
+ if (down_count > 1) {
+ len += snprintf (num, sizeof (num), "%s",
+ down_subvol_2);
+ } else {
+ len += snprintf (num, sizeof (num), "%s",
+ down_subvol_1);
+ }
+ }
+ if (unknown_child_present) {
+ if (unknown_count > 1) {
+ len += snprintf (num, sizeof (num), "%s",
+ unknown_subvol_2);
+ } else {
+ len += snprintf (num, sizeof (num), "%s",
+ unknown_subvol_1);
+ }
+ }
+
+ len ++;
+
+ string = GF_CALLOC (len, sizeof (char), gf_common_mt_char);
+ if (!string)
+ return;
+ off += snprintf (string + off, len - off, "%s", string_msg);
+ for (i=0; i < priv->child_count; i++) {
+ if ((sh->source == i) && (local->child_up[i] == 1))
+ off += snprintf (string + off, len - off, source_child,
+ priv->children[i]->name);
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if ((local->child_up[i] == 1)&& (sh->sources[i] == 0))
+ off += snprintf (string + off, len - off, format,
+ priv->children[i]->name);
+ }
+
+ if (down_child_present) {
+ if (down_count > 1) {
+ off += snprintf (string + off, len - off, "%s",
+ down_subvol_2);
+ } else {
+ off += snprintf (string + off, len - off, "%s",
+ down_subvol_1);
+ }
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i] == 0)
+ off += snprintf (string + off, len - off, format,
+ priv->children[i]->name);
+ }
+
+ if (unknown_child_present) {
+ if (unknown_count > 1) {
+ off += snprintf (string + off, len - off, "%s",
+ unknown_subvol_2);
+ } else {
+ off += snprintf (string + off, len - off, "%s",
+ unknown_subvol_1);
+ }
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i] == -1)
+ off += snprintf (string + off, len - off, format,
+ priv->children[i]->name);
+ }
+
+ gf_asprintf (&sh->metadata_sh_info, "%s metadata %s,", string,
+ pending_matrix_str);
+
+ if (pending_matrix_str && strcmp (pending_matrix_str, ""))
+ GF_FREE (pending_matrix_str);
+
+ if (string && strcmp (string, ""))
+ GF_FREE (string);
+}
+
+int
+afr_sh_metadata_sync_prepare (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int source = 0;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ source = sh->source;
+
+ afr_sh_mark_source_sinks (frame, this);
+ if (sh->active_sinks == 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no active sinks for performing self-heal on file %s",
+ local->loc.path);
afr_sh_metadata_finish (frame, this);
return 0;
}
- if ((nsources == -1)
- && (priv->favorite_child != -1)
- && (sh->child_errno[priv->favorite_child] == 0)) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "syncing metadata of %s from subvolume %s to %d active sinks",
+ local->loc.path, priv->children[source]->name,
+ sh->active_sinks);
- gf_log (this->name, GF_LOG_WARNING,
- "Picking favorite child %s as authentic source to resolve conflicting metadata of %s",
- priv->children[priv->favorite_child]->name,
- local->loc.path);
+ sh->actual_sh_started = _gf_true;
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_SYNC_BEGIN);
+ afr_set_metadata_sh_info_str (local, sh, this);
+ STACK_WIND (frame, afr_sh_metadata_getxattr_cbk,
+ priv->children[source],
+ priv->children[source]->fops->getxattr,
+ &local->loc, NULL, NULL);
- sh->sources[priv->favorite_child] = 1;
+ return 0;
+}
- nsources = afr_sh_source_count (sh->sources,
- priv->child_count);
- }
- if (nsources == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "Unable to self-heal permissions/ownership of '%s' "
- "(possible split-brain). Please fix the file on "
- "all backend volumes", local->loc.path);
+void
+afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno)
+{
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int nsources = 0;
+ int source = 0;
+ int i = 0;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ priv = this->private;
+
+ if (op_ret < 0) {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_sh_set_error (sh, op_errno);
+ afr_sh_metadata_finish (frame, this);
+ goto out;
+ }
+ nsources = afr_build_sources (this, sh->xattr, sh->buf,
+ sh->pending_matrix, sh->sources,
+ sh->success_children,
+ AFR_METADATA_TRANSACTION, NULL, _gf_false);
+ if ((nsources == -1)
+ && (priv->favorite_child != -1)
+ && (sh->child_errno[priv->favorite_child] == 0)) {
+
+ gf_log (this->name, GF_LOG_WARNING,
+ "Picking favorite child %s as authentic source to resolve conflicting metadata of %s",
+ priv->children[priv->favorite_child]->name,
+ local->loc.path);
- local->govinda_gOvinda = 1;
+ sh->sources[priv->favorite_child] = 1;
- afr_sh_metadata_finish (frame, this);
- return 0;
- }
+ nsources = afr_sh_source_count (sh->sources,
+ priv->child_count);
+ }
- source = afr_sh_select_source (sh->sources, priv->child_count);
+ if (nsources == -1) {
+ afr_sh_print_split_brain_log (sh->pending_matrix, this,
+ local->loc.path);
+ afr_set_split_brain (this, sh->inode, SPB, DONT_KNOW);
+ afr_sh_metadata_fail (frame, this);
+ goto out;
+ }
+
+ afr_set_split_brain (this, sh->inode, NO_SPB, DONT_KNOW);
+ if (nsources == 0) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "No self-heal needed for %s",
+ local->loc.path);
+
+ afr_sh_metadata_finish (frame, this);
+ goto out;
+ }
+
+ source = afr_sh_select_source (sh->sources, priv->child_count);
if (source == -1) {
gf_log (this->name, GF_LOG_DEBUG,
"No active sources found.");
afr_sh_metadata_finish (frame, this);
- return 0;
+ goto out;
}
- sh->source = source;
+ sh->source = source;
- /* detect changes not visible through pending flags -- JIC */
- for (i = 0; i < priv->child_count; i++) {
- if (i == source || sh->child_errno[i])
- continue;
+ /* detect changes not visible through pending flags -- JIC */
+ for (i = 0; i < priv->child_count; i++) {
+ if (i == source || sh->child_errno[i])
+ continue;
- if (PERMISSION_DIFFERS (&sh->buf[i], &sh->buf[source]))
- sh->sources[i] = 0;
+ if (PERMISSION_DIFFERS (&sh->buf[i], &sh->buf[source]))
+ sh->sources[i] = 0;
- if (OWNERSHIP_DIFFERS (&sh->buf[i], &sh->buf[source]))
- sh->sources[i] = 0;
- }
+ if (OWNERSHIP_DIFFERS (&sh->buf[i], &sh->buf[source]))
+ sh->sources[i] = 0;
+ }
- afr_sh_metadata_sync_prepare (frame, this);
+ if ((!IA_ISREG (sh->buf[source].ia_type)) &&
+ (!IA_ISDIR (sh->buf[source].ia_type))) {
+ afr_reset_children (sh->fresh_children, priv->child_count);
+ afr_get_fresh_children (sh->success_children, sh->sources,
+ sh->fresh_children, priv->child_count);
+ afr_inode_set_read_ctx (this, sh->inode, sh->source,
+ sh->fresh_children);
+ }
- return 0;
+ if (sh->do_metadata_self_heal && priv->metadata_self_heal)
+ afr_sh_metadata_sync_prepare (frame, this);
+ else
+ afr_sh_metadata_finish (frame, this);
+out:
+ return;
}
-
int
-afr_sh_metadata_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct stat *buf, dict_t *xattr)
+afr_sh_metadata_post_nonblocking_inodelk_cbk (call_frame_t *frame,
+ xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int child_index = 0;
-
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "path %s on subvolume %s is of mode 0%o",
- local->loc.path,
- priv->children[child_index]->name,
- buf->st_mode);
-
- sh->buf[child_index] = *buf;
- if (xattr)
- sh->xattr[child_index] = dict_ref (xattr);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "path %s on subvolume %s => -1 (%s)",
- local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
-
- sh->child_errno[child_index] = op_errno;
- }
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- afr_sh_metadata_fix (frame, this);
-
- return 0;
-}
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ if (int_lock->lock_op_ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG, "Non Blocking metadata "
+ "inodelks failed for %s.", local->loc.path);
+ gf_log (this->name, GF_LOG_DEBUG, "Metadata self-heal "
+ "failed for %s.", local->loc.path);
+ afr_sh_metadata_done (frame, this);
+ } else {
+
+ gf_log (this->name, GF_LOG_DEBUG, "Non Blocking metadata "
+ "inodelks done for %s. Proceeding to FOP",
+ local->loc.path);
+ afr_sh_common_lookup (frame, this, &local->loc,
+ afr_sh_metadata_fix, NULL,
+ AFR_LOOKUP_FAIL_CONFLICTS |
+ AFR_LOOKUP_FAIL_MISSING_GFIDS,
+ NULL);
+ }
+ return 0;
+}
int
-afr_sh_metadata_lookup (call_frame_t *frame, xlator_t *this)
+afr_sh_metadata_lock (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- int call_count = 0;
- dict_t *xattr_req = NULL;
- int ret = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- call_count = local->child_count;
- local->call_count = call_count;
-
- xattr_req = dict_new();
-
- if (xattr_req) {
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_set_uint64 (xattr_req,
- priv->pending_key[i],
- 3 * sizeof(int32_t));
- }
- }
+ afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
+ afr_local_t *local = NULL;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- gf_log (this->name, GF_LOG_TRACE,
- "looking up %s on %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_metadata_lookup_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->lookup,
- &local->loc, xattr_req);
- if (!--call_count)
- break;
- }
- }
-
- if (xattr_req)
- dict_unref (xattr_req);
-
- return 0;
-}
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ int_lock->domain = this->name;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+ int_lock->transaction_lk_type = AFR_SELFHEAL_LK;
+ int_lock->selfheal_lk_type = AFR_METADATA_SELF_HEAL_LK;
-int
-afr_sh_metadata_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int child_index = (long) cookie;
-
- /* TODO: what if lock fails? */
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- sh->op_failed = 1;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "locking of %s on child %d failed: %s",
- local->loc.path, child_index,
- strerror (op_errno));
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "inode of %s on child %d locked",
- local->loc.path, child_index);
- }
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- if (sh->op_failed) {
- afr_sh_metadata_finish (frame, this);
- return 0;
- }
-
- afr_sh_metadata_lookup (frame, this);
- }
-
- return 0;
-}
+ afr_set_lock_number (frame, this);
+ inodelk->flock.l_start = LLONG_MAX - 1;
+ inodelk->flock.l_len = 0;
+ inodelk->flock.l_type = F_WRLCK;
+ int_lock->lock_cbk = afr_sh_metadata_post_nonblocking_inodelk_cbk;
-int
-afr_sh_metadata_lock (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- int call_count = 0;
- struct flock flock = {0, };
-
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- call_count = local->child_count;
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- flock.l_start = 0;
- flock.l_len = 0;
- flock.l_type = F_WRLCK;
-
- if (local->child_up[i]) {
- gf_log (this->name, GF_LOG_TRACE,
- "locking %s on subvolume %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_metadata_lk_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->inodelk,
- this->name,
- &local->loc, F_SETLK, &flock);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ afr_nonblocking_inodelk (frame, this);
+
+ return 0;
}
+gf_boolean_t
+afr_can_start_metadata_self_heal (afr_self_heal_t *sh, afr_private_t *priv)
+{
+ if (sh->force_confirm_spb)
+ return _gf_true;
+ if (sh->do_metadata_self_heal && priv->metadata_self_heal)
+ return _gf_true;
+ return _gf_false;
+}
int
afr_self_heal_metadata (call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = this->private;
-
-
- local = frame->local;
- sh = &local->self_heal;
-
- if (local->need_metadata_self_heal && priv->metadata_self_heal) {
- afr_sh_metadata_lock (frame, this);
- } else {
- afr_sh_metadata_done (frame, this);
- }
+ afr_local_t *local = NULL;
+ afr_private_t *priv = this->private;
+ afr_self_heal_t *sh = &local->self_heal;
+
+ local = frame->local;
+ sh = &local->self_heal;
+ sh->sh_type_in_action = AFR_SELF_HEAL_METADATA;
+
+ if (afr_can_start_metadata_self_heal (sh, priv)) {
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED);
+ afr_sh_metadata_lock (frame, this);
+ } else {
+ afr_sh_metadata_done (frame, this);
+ }
- return 0;
+ return 0;
}
-
diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
index 73abd8fb9..7c9bc8111 100644
--- a/xlators/cluster/afr/src/afr-self-heal.h
+++ b/xlators/cluster/afr/src/afr-self-heal.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __AFR_SELF_HEAL_H__
@@ -22,19 +13,12 @@
#include <sys/stat.h>
-#define FILETYPE_DIFFERS(buf1,buf2) ((S_IFMT & ((struct stat *)buf1)->st_mode) != (S_IFMT & ((struct stat *)buf2)->st_mode))
-#define PERMISSION_DIFFERS(buf1,buf2) ((((struct stat *)buf1)->st_mode) != (((struct stat *)buf2)->st_mode))
-#define OWNERSHIP_DIFFERS(buf1,buf2) ((((struct stat *)buf1)->st_uid) != (((struct stat *)buf2)->st_uid) || (((struct stat *)buf1)->st_gid != (((struct stat *)buf2)->st_gid)))
-#define SIZE_DIFFERS(buf1,buf2) ((((struct stat *)buf1)->st_size) != (((struct stat *)buf2)->st_size))
-
-#define SIZE_GREATER(buf1,buf2) ((((struct stat *)buf1)->st_size > (((struct stat *)buf2)->st_size)))
+#define FILETYPE_DIFFERS(buf1,buf2) ((buf1)->ia_type != (buf2)->ia_type)
+#define PERMISSION_DIFFERS(buf1,buf2) (st_mode_from_ia ((buf1)->ia_prot, (buf1)->ia_type) != st_mode_from_ia ((buf2)->ia_prot, (buf2)->ia_type))
+#define OWNERSHIP_DIFFERS(buf1,buf2) (((buf1)->ia_uid != (buf2)->ia_uid) || ((buf1)->ia_gid != (buf2)->ia_gid))
+#define SIZE_DIFFERS(buf1,buf2) ((buf1)->ia_size != (buf2)->ia_size)
-int
-afr_sh_has_metadata_pending (dict_t *xattr, int child_count, xlator_t *this);
-int
-afr_sh_has_entry_pending (dict_t *xattr, int child_count, xlator_t *this);
-int
-afr_sh_has_data_pending (dict_t *xattr, int child_count, xlator_t *this);
+#define SIZE_GREATER(buf1,buf2) ((buf1)->ia_size > (buf2)->ia_size)
int
afr_self_heal_entry (call_frame_t *frame, xlator_t *this);
@@ -46,7 +30,14 @@ int
afr_self_heal_metadata (call_frame_t *frame, xlator_t *this);
int
-afr_self_heal (call_frame_t *frame, xlator_t *this,
- int (*completion_cbk) (call_frame_t *, xlator_t *));
+afr_self_heal_get_source (xlator_t *this, afr_local_t *local, dict_t **xattr);
+
+int
+afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode);
+int
+afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,
+ dict_t **xattr,
+ afr_transaction_type txn_type,
+ uuid_t gfid);
#endif /* __AFR_SELF_HEAL_H__ */
diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
new file mode 100644
index 000000000..1b48a1bca
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-self-heald.c
@@ -0,0 +1,1787 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+#include "afr.h"
+#include "syncop.h"
+#include "afr-self-heald.h"
+#include "afr-self-heal-common.h"
+#include "protocol-common.h"
+#include "event-history.h"
+
+typedef enum {
+ STOP_CRAWL_ON_SINGLE_SUBVOL = 1
+} afr_crawl_flags_t;
+
+typedef enum {
+ HEAL = 1,
+ INFO,
+ STATISTICS_TO_BE_HEALED,
+} shd_crawl_op;
+
+typedef struct shd_dump {
+ dict_t *dict;
+ xlator_t *this;
+ int child;
+} shd_dump_t;
+
+typedef struct shd_event_ {
+ int child;
+ char *path;
+} shd_event_t;
+
+typedef struct shd_pos_ {
+ int child;
+ xlator_t *this;
+ afr_child_pos_t pos;
+} shd_pos_t;
+
+typedef int
+(*afr_crawl_done_cbk_t) (int ret, call_frame_t *sync_frame, void *crawl_data);
+
+void
+afr_start_crawl (xlator_t *this, int idx, afr_crawl_type_t crawl,
+ process_entry_cbk_t process_entry, void *op_data,
+ gf_boolean_t exclusive, int crawl_flags,
+ afr_crawl_done_cbk_t crawl_done);
+
+static int
+_crawl_directory (fd_t *fd, loc_t *loc, afr_crawl_data_t *crawl_data);
+
+/* For calling straight through (e.g. already in a synctask). */
+int
+afr_find_child_position (xlator_t *this, int child, afr_child_pos_t *pos);
+
+/* For deferring through a new synctask. */
+int
+afr_syncop_find_child_position (void *data);
+
+static int
+_loc_assign_gfid_path (loc_t *loc)
+{
+ int ret = -1;
+ char gfid_path[64] = {0};
+
+ if (loc->inode && !uuid_is_null (loc->inode->gfid)) {
+ ret = inode_path (loc->inode, NULL, (char**)&loc->path);
+ } else if (!uuid_is_null (loc->gfid)) {
+ snprintf (gfid_path, sizeof (gfid_path), "<gfid:%s>",
+ uuid_utoa (loc->gfid));
+ loc->path = gf_strdup (gfid_path);
+ if (loc->path)
+ ret = 0;
+ }
+ return ret;
+}
+
+void
+_destroy_crawl_event_data (void *data)
+{
+ shd_crawl_event_t *crawl_event = NULL;
+
+ if (!data)
+ goto out;
+
+ crawl_event = (shd_crawl_event_t *)data;
+ GF_FREE (crawl_event->start_time_str);
+ GF_FREE (crawl_event->end_time_str);
+
+out:
+ return;
+}
+
+void
+_destroy_shd_event_data (void *data)
+{
+ shd_event_t *event = NULL;
+ if (!data)
+ goto out;
+ event = (shd_event_t*)data;
+ GF_FREE (event->path);
+out:
+ return;
+}
+void
+shd_cleanup_event (void *event)
+{
+ shd_event_t *shd_event = event;
+
+ if (!shd_event)
+ goto out;
+ GF_FREE (shd_event->path);
+ GF_FREE (shd_event);
+out:
+ return;
+}
+
+int
+afr_get_local_child (afr_self_heald_t *shd, unsigned int child_count)
+{
+ int i = 0;
+ int ret = -1;
+ for (i = 0; i < child_count; i++) {
+ if (shd->pos[i] == AFR_POS_LOCAL) {
+ ret = i;
+ break;
+ }
+ }
+ return ret;
+}
+
+static int
+_build_index_loc (xlator_t *this, loc_t *loc, char *name, loc_t *parent)
+{
+ int ret = 0;
+
+ uuid_copy (loc->pargfid, parent->inode->gfid);
+ loc->path = "";
+ loc->name = name;
+ loc->parent = inode_ref (parent->inode);
+ if (!loc->parent) {
+ loc->path = NULL;
+ loc_wipe (loc);
+ ret = -1;
+ }
+ return ret;
+}
+
+int
+_add_crawl_stats_to_dict (xlator_t *this, dict_t *output, int child,
+ shd_crawl_event_t *shd_event, struct timeval *tv)
+{
+ int ret = 0;
+ uint64_t count = 0;
+ char key[256] = {0};
+ int xl_id = 0;
+ uint64_t healed_count = 0;
+ uint64_t split_brain_count = 0;
+ uint64_t heal_failed_count = 0;
+ char *start_time_str = NULL;
+ char *end_time_str = NULL;
+ char *crawl_type = NULL;
+ int progress = -1;
+
+ healed_count = shd_event->healed_count;
+ split_brain_count = shd_event->split_brain_count;
+ heal_failed_count = shd_event->heal_failed_count;
+ start_time_str = shd_event->start_time_str;
+ end_time_str = shd_event->end_time_str;
+ crawl_type = shd_event->crawl_type;
+
+ if (!start_time_str) {
+ ret = -1;
+ goto out;
+ }
+
+
+ ret = dict_get_int32 (output, this->name, &xl_id);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "xl does not have id");
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "statistics-%d-%d-count", xl_id, child);
+ ret = dict_get_uint64 (output, key, &count);
+
+ snprintf (key, sizeof (key), "statistics_healed_cnt-%d-%d-%"PRIu64,
+ xl_id, child, count);
+ ret = dict_set_uint64(output, key, healed_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_"
+ "healed_count to outout");
+ goto out;
+ }
+ snprintf (key, sizeof (key), "statistics_sb_cnt-%d-%d-%"PRIu64,
+ xl_id, child, count);
+ ret = dict_set_uint64 (output, key, split_brain_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_"
+ "split_brain_count to outout");
+ goto out;
+ }
+ snprintf (key, sizeof (key), "statistics_crawl_type-%d-%d-%"PRIu64,
+ xl_id, child, count);
+ ret = dict_set_dynstr (output, key, gf_strdup (crawl_type));
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_"
+ "crawl_type to output");
+ goto out;
+ }
+ snprintf (key, sizeof (key), "statistics_heal_failed_cnt-%d-%d-%"PRIu64,
+ xl_id, child, count);
+ ret = dict_set_uint64 (output, key, heal_failed_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_"
+ "healed_failed_count to outout");
+ goto out;
+ }
+ snprintf (key, sizeof (key), "statistics_strt_time-%d-%d-%"PRIu64,
+ xl_id, child, count);
+ ret = dict_set_dynstr (output, key, gf_strdup(start_time_str));
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_"
+ "crawl_start_time to outout");
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "statistics_end_time-%d-%d-%"PRIu64,
+ xl_id, child, count);
+
+ if (!end_time_str)
+ end_time_str = "Could not determine the end time";
+ ret = dict_set_dynstr (output, key, gf_strdup(end_time_str));
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_"
+ "crawl_end_time to outout");
+ goto out;
+ }
+ snprintf (key, sizeof (key), "statistics_inprogress-%d-%d-%"PRIu64,
+ xl_id, child, count);
+
+ if (shd_event->crawl_inprogress == _gf_true)
+ progress = 1;
+ else
+ progress = 0;
+
+ ret = dict_set_int32 (output, key, progress);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not add statistics_"
+ "inprogress to outout");
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "statistics-%d-%d-count",xl_id, child);
+ ret = dict_set_uint64 (output, key, count + 1);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not increment the "
+ "counter.");
+ goto out;
+ }
+out:
+ return ret;
+}
+
+int
+_add_path_to_dict (xlator_t *this, dict_t *output, int child, char *path,
+ struct timeval *tv, gf_boolean_t dyn)
+{
+ //subkey not used for now
+ int ret = -1;
+ uint64_t count = 0;
+ char key[256] = {0};
+ int xl_id = 0;
+
+ ret = dict_get_int32 (output, this->name, &xl_id);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "xl does not have id");
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "%d-%d-count", xl_id, child);
+ ret = dict_get_uint64 (output, key, &count);
+
+ snprintf (key, sizeof (key), "%d-%d-%"PRIu64, xl_id, child, count);
+ if (dyn)
+ ret = dict_set_dynstr (output, key, path);
+ else
+ ret = dict_set_str (output, key, path);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s: Could not add to output",
+ path);
+ goto out;
+ }
+
+ if (!tv)
+ goto inc_count;
+ snprintf (key, sizeof (key), "%d-%d-%"PRIu64"-time", xl_id,
+ child, count);
+ ret = dict_set_uint32 (output, key, tv->tv_sec);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s: Could not set time",
+ path);
+ goto out;
+ }
+
+inc_count:
+ snprintf (key, sizeof (key), "%d-%d-count", xl_id, child);
+ ret = dict_set_uint64 (output, key, count + 1);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not increment count");
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+_get_path_from_gfid_loc (xlator_t *this, xlator_t *readdir_xl, loc_t *child,
+ char **fpath, gf_boolean_t *missing)
+{
+ dict_t *xattr = NULL;
+ char *path = NULL;
+ int ret = -1;
+
+ ret = syncop_getxattr (readdir_xl, child, &xattr, GFID_TO_PATH_KEY);
+ if (ret < 0) {
+ if ((errno == ENOENT) && missing)
+ *missing = _gf_true;
+ goto out;
+ }
+ ret = dict_get_str (xattr, GFID_TO_PATH_KEY, &path);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get path for "
+ "gfid %s", uuid_utoa (child->gfid));
+ goto out;
+ }
+ path = gf_strdup (path);
+ if (!path) {
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+out:
+ if (!ret)
+ *fpath = path;
+ if (xattr)
+ dict_unref (xattr);
+ return ret;
+}
+
+int
+_add_event_to_dict (circular_buffer_t *cb, void *data)
+{
+ int ret = 0;
+ shd_dump_t *dump_data = NULL;
+ shd_event_t *shd_event = NULL;
+
+ dump_data = data;
+ shd_event = cb->data;
+ if (shd_event->child != dump_data->child)
+ goto out;
+ ret = _add_path_to_dict (dump_data->this, dump_data->dict,
+ dump_data->child, shd_event->path, &cb->tv,
+ _gf_false);
+out:
+ return ret;
+}
+
+int
+_add_crawl_event_statistics_to_dict (circular_buffer_t *cb, void *data)
+{
+ int ret = 0;
+ shd_dump_t *dump_data = NULL;
+ shd_crawl_event_t *shd_event = NULL;
+
+ dump_data = data;
+ shd_event = cb->data;
+ ret = _add_crawl_stats_to_dict (dump_data->this, dump_data->dict,
+ dump_data->child, shd_event, &cb->tv);
+ return ret;
+}
+
+int
+_add_eh_to_dict (xlator_t *this, eh_t *eh, dict_t *dict, int child)
+{
+ shd_dump_t dump_data = {0};
+
+ dump_data.this = this;
+ dump_data.dict = dict;
+ dump_data.child = child;
+ eh_dump (eh, &dump_data, _add_event_to_dict);
+ return 0;
+}
+
+
+int
+_add_statistics_to_dict (xlator_t *this, dict_t *dict, int child)
+{
+ shd_dump_t dump_data = {0};
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+
+ priv = this->private;
+ shd = &priv->shd;
+
+ dump_data.this = this;
+ dump_data.dict = dict;
+ dump_data.child = child;
+ eh_dump (shd->statistics[child], &dump_data,
+ _add_crawl_event_statistics_to_dict);
+ return 0;
+
+}
+
+void
+_remove_stale_index (xlator_t *this, xlator_t *readdir_xl,
+ loc_t *parent, char *fname)
+{
+ int ret = 0;
+ loc_t index_loc = {0};
+
+ ret = _build_index_loc (this, &index_loc, fname, parent);
+ if (ret)
+ goto out;
+ gf_log (this->name, GF_LOG_DEBUG, "Removing stale index "
+ "for %s on %s", index_loc.name, readdir_xl->name);
+ ret = syncop_unlink (readdir_xl, &index_loc);
+ if(ret && (errno != ENOENT)) {
+ gf_log(this->name, GF_LOG_ERROR, "%s: Failed to remove index "
+ "on %s - %s",index_loc.name, readdir_xl->name,
+ strerror (errno));
+ }
+ index_loc.path = NULL;
+ loc_wipe (&index_loc);
+out:
+ return;
+}
+
+int
+_count_hard_links_under_base_indices_dir (xlator_t *this,
+ afr_crawl_data_t *crawl_data,
+ gf_dirent_t *entry, loc_t *childloc,
+ loc_t *parentloc, struct iatt *iattr)
+{
+ xlator_t *readdir_xl = crawl_data->readdir_xl;
+ struct iatt parent = {0};
+ int ret = 0;
+ dict_t *output = NULL;
+ int xl_id = 0;
+ char key[256] = {0};
+ int child = -1;
+ uint64_t hardlinks = 0;
+
+ output = crawl_data->op_data;
+ child = crawl_data->child;
+
+ ret = syncop_lookup (readdir_xl, childloc, NULL, iattr, NULL, &parent);
+ if (ret)
+ goto out;
+
+ ret = dict_get_int32 (output, this->name, &xl_id);
+ if (ret)
+ goto out;
+
+ snprintf (key, sizeof (key), "%d-%d-hardlinks", xl_id, child);
+ ret = dict_get_uint64 (output, key, &hardlinks);
+
+ /*Removing the count of base_entry under indices/base_indicies and
+ * entry under indices/xattrop */
+ hardlinks = hardlinks + iattr->ia_nlink - 2;
+ ret = dict_set_uint64 (output, key, hardlinks);
+ if (ret)
+ goto out;
+
+out:
+ return ret;
+}
+
+int
+_add_summary_to_dict (xlator_t *this, afr_crawl_data_t *crawl_data,
+ gf_dirent_t *entry,
+ loc_t *childloc, loc_t *parentloc, struct iatt *iattr)
+{
+ dict_t *output = NULL;
+ xlator_t *readdir_xl = NULL;
+ int ret = -1;
+ char *path = NULL;
+ gf_boolean_t missing = _gf_false;
+ char gfid_str[64] = {0};
+
+ if (uuid_is_null (childloc->gfid))
+ goto out;
+
+ output = crawl_data->op_data;
+ readdir_xl = crawl_data->readdir_xl;
+
+ ret = _get_path_from_gfid_loc (this, readdir_xl, childloc, &path,
+ &missing);
+ if (ret == 0) {
+ ret = _add_path_to_dict (this, output, crawl_data->child, path,
+ NULL, _gf_true);
+ } else if (missing) {
+ _remove_stale_index (this, readdir_xl, parentloc,
+ uuid_utoa_r (childloc->gfid, gfid_str));
+ }
+
+out:
+ if (ret && path)
+ GF_FREE (path);
+ return ret;
+}
+
+void
+_crawl_post_sh_action (xlator_t *this, loc_t *parent, loc_t *child,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr_rsp,
+ afr_crawl_data_t *crawl_data)
+{
+ int ret = 0;
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ eh_t *eh = NULL;
+ char *path = NULL;
+ char gfid_str[64] = {0};
+ shd_event_t *event = NULL;
+ int32_t sh_failed = 0;
+ gf_boolean_t split_brain = 0;
+ int32_t actual_sh_done = 0;
+ shd_crawl_event_t **shd_crawl_event = NULL;
+
+ priv = this->private;
+ shd = &priv->shd;
+ if (crawl_data->crawl == INDEX) {
+ if ((op_ret < 0) && (op_errno == ENOENT)) {
+ _remove_stale_index (this, crawl_data->readdir_xl,
+ parent, uuid_utoa_r (child->gfid,
+ gfid_str));
+ goto out;
+ }
+ ret = _get_path_from_gfid_loc (this, crawl_data->readdir_xl,
+ child, &path, NULL);
+ if (ret)
+ goto out;
+ } else {
+ path = gf_strdup (child->path);
+ if (!path) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ if (xattr_rsp) {
+ ret = dict_get_int32 (xattr_rsp, "sh-failed", &sh_failed);
+ ret = dict_get_int32 (xattr_rsp, "actual-sh-done", &actual_sh_done);
+ }
+
+ shd_crawl_event = (shd_crawl_event_t**)(shd->crawl_events);
+
+ split_brain = afr_is_split_brain (this, child->inode);
+ if ((op_ret < 0 && op_errno == EIO) || split_brain) {
+ eh = shd->split_brain;
+ shd_crawl_event[crawl_data->child]->split_brain_count += 1;
+ } else if ((op_ret < 0) || sh_failed) {
+ eh = shd->heal_failed;
+ shd_crawl_event[crawl_data->child]->heal_failed_count += 1;
+ } else if (actual_sh_done == 1) {
+ eh = shd->healed;
+ shd_crawl_event[crawl_data->child]->healed_count += 1;
+ }
+ ret = -1;
+
+ if (eh != NULL) {
+ event = GF_CALLOC (1, sizeof (*event), gf_afr_mt_shd_event_t);
+ if (!event)
+ goto out;
+ event->child = crawl_data->child;
+ event->path = path;
+
+ ret = eh_save_history (eh, event);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "%s:Failed to save "
+ "to event history, (%d, %s)", path, op_ret,
+ strerror (op_errno));
+
+ goto out;
+ }
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG, "%s:Self heal already done ",
+ path);
+
+ }
+ ret = 0;
+out:
+ if (ret && path)
+ GF_FREE (path);
+ return;
+}
+
+int
+_link_inode_update_loc (xlator_t *this, loc_t *loc, struct iatt *iattr)
+{
+ inode_t *link_inode = NULL;
+ int ret = -1;
+
+ link_inode = inode_link (loc->inode, NULL, NULL, iattr);
+ if (link_inode == NULL) {
+ gf_log (this->name, GF_LOG_ERROR, "inode link failed "
+ "on the inode (%s)", uuid_utoa (iattr->ia_gfid));
+ goto out;
+ }
+ inode_unref (loc->inode);
+ loc->inode = link_inode;
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+_self_heal_entry (xlator_t *this, afr_crawl_data_t *crawl_data, gf_dirent_t *entry,
+ loc_t *child, loc_t *parent, struct iatt *iattr)
+{
+ struct iatt parentbuf = {0};
+ int ret = 0;
+ dict_t *xattr_rsp = NULL;
+ dict_t *xattr_req = NULL;
+
+ xattr_req = dict_new ();
+ if (!xattr_req) {
+ errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_int32 (xattr_req, "allow-sh-for-running-transaction", 1);
+
+ gf_log (this->name, GF_LOG_DEBUG, "lookup %s", child->path);
+
+ ret = syncop_lookup (this, child, xattr_req,
+ iattr, &xattr_rsp, &parentbuf);
+ _crawl_post_sh_action (this, parent, child, ret, errno, xattr_rsp,
+ crawl_data);
+ if (xattr_rsp)
+ dict_unref (xattr_rsp);
+ if (ret == 0)
+ ret = _link_inode_update_loc (this, child, iattr);
+
+out:
+ if (xattr_req)
+ dict_unref(xattr_req);
+ return ret;
+}
+
+static int
+afr_crawl_done (int ret, call_frame_t *sync_frame, void *data)
+{
+ GF_FREE (data);
+ STACK_DESTROY (sync_frame->root);
+ return 0;
+}
+
+void
+_do_self_heal_on_subvol (xlator_t *this, int child, afr_crawl_type_t crawl)
+{
+ afr_start_crawl (this, child, crawl, _self_heal_entry,
+ NULL, _gf_true, STOP_CRAWL_ON_SINGLE_SUBVOL,
+ afr_crawl_done);
+}
+
+gf_boolean_t
+_crawl_proceed (xlator_t *this, int child, int crawl_flags, char **reason)
+{
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ gf_boolean_t proceed = _gf_false;
+ char *msg = NULL;
+
+ priv = this->private;
+ shd = &priv->shd;
+ if (!shd->enabled) {
+ msg = "Self-heal daemon is not enabled";
+ gf_log (this->name, GF_LOG_DEBUG, "%s", msg);
+ goto out;
+ }
+ if (!priv->child_up[child]) {
+ gf_log (this->name, GF_LOG_DEBUG, "Stopping crawl for %s , "
+ "subvol went down", priv->children[child]->name);
+ msg = "Brick is Not connected";
+ goto out;
+ }
+
+ if (crawl_flags & STOP_CRAWL_ON_SINGLE_SUBVOL) {
+ if (afr_up_children_count (priv->child_up,
+ priv->child_count) < 2) {
+ gf_log (this->name, GF_LOG_DEBUG, "Stopping crawl as "
+ "< 2 children are up");
+ msg = "< 2 bricks in replica are running";
+ goto out;
+ }
+ }
+ proceed = _gf_true;
+out:
+ if (reason)
+ *reason = msg;
+ return proceed;
+}
+
+int
+_do_crawl_op_on_local_subvols (xlator_t *this, afr_crawl_type_t crawl,
+ shd_crawl_op op, dict_t *output)
+{
+ afr_private_t *priv = NULL;
+ char *status = NULL;
+ char *subkey = NULL;
+ char key[256] = {0};
+ shd_pos_t pos_data = {0};
+ int op_ret = -1;
+ int xl_id = -1;
+ int i = 0;
+ int ret = 0;
+ int crawl_flags = 0;
+
+ priv = this->private;
+ if (op == HEAL)
+ crawl_flags |= STOP_CRAWL_ON_SINGLE_SUBVOL;
+
+ if (output) {
+ ret = dict_get_int32 (output, this->name, &xl_id);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Invalid input, "
+ "translator-id is not available");
+ goto out;
+ }
+ }
+ pos_data.this = this;
+ subkey = "status";
+ for (i = 0; i < priv->child_count; i++) {
+ if (_crawl_proceed (this, i, crawl_flags, &status)) {
+ pos_data.child = i;
+ /*
+ * We're already in a synctask in this case, so we
+ * don't need to defer through a second (and in fact
+ * that can cause deadlock). Just call straight
+ * through instead.
+ */
+ ret = afr_find_child_position(pos_data.this,
+ pos_data.child,
+ &pos_data.pos);
+ if (ret) {
+ status = "Not able to find brick location";
+ } else if (pos_data.pos == AFR_POS_REMOTE) {
+ status = "brick is remote";
+ } else {
+ op_ret = 0;
+ if (op == HEAL) {
+ status = "Started self-heal";
+ _do_self_heal_on_subvol (this, i,
+ crawl);
+ } else if (output && (op == INFO)) {
+ status = "";
+ afr_start_crawl (this, i, INDEX,
+ _add_summary_to_dict,
+ output, _gf_false, 0,
+ NULL);
+ } else if (output &&
+ (op == STATISTICS_TO_BE_HEALED)) {
+ status = "";
+ afr_start_crawl (this, i,
+ INDEX_TO_BE_HEALED,
+ _count_hard_links_under_base_indices_dir,
+ output, _gf_false,
+ 0, NULL);
+ }
+ }
+ if (output) {
+ snprintf (key, sizeof (key), "%d-%d-%s", xl_id,
+ i, subkey);
+ ret = dict_set_str (output, key, status);
+ }
+ if (!op_ret && (crawl == FULL))
+ break;
+ }
+ if (output) {
+ snprintf (key, sizeof (key), "%d-%d-%s", xl_id, i,
+ subkey);
+ ret = dict_set_str (output, key, status);
+ }
+ }
+out:
+ return op_ret;
+}
+
+int
+_do_self_heal_on_local_subvols (xlator_t *this, afr_crawl_type_t crawl,
+ dict_t *output)
+{
+ return _do_crawl_op_on_local_subvols (this, crawl, HEAL, output);
+}
+
+int
+_get_index_summary_on_local_subvols (xlator_t *this, dict_t *output)
+{
+ return _do_crawl_op_on_local_subvols (this, INDEX, INFO, output);
+}
+
+void
+afr_fill_completed_crawl_statistics_to_dict (xlator_t *this, dict_t *dict)
+{
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ int i = 0;
+ priv = this->private;
+ shd= &priv->shd;
+ for (i = 0; i < priv->child_count; i++) {
+ if (shd->pos[i] != AFR_POS_LOCAL)
+ continue;
+ _add_statistics_to_dict (this, dict, i);
+ }
+
+ return ;
+}
+
+static void
+reset_crawl_event (shd_crawl_event_t *crawl_event)
+{
+ crawl_event->healed_count = 0;
+ crawl_event->split_brain_count = 0;
+ crawl_event->heal_failed_count = 0;
+ GF_FREE (crawl_event->start_time_str);
+ crawl_event->start_time_str = NULL;
+ crawl_event->end_time_str = NULL;
+ crawl_event->crawl_type = NULL;
+ crawl_event->crawl_inprogress = _gf_false;
+ return;
+}
+
+static void
+afr_copy_crawl_event_struct (shd_crawl_event_t *src, shd_crawl_event_t *dst)
+{
+ dst->healed_count = src->healed_count;
+ dst->split_brain_count = src->split_brain_count;
+ dst->heal_failed_count = src->heal_failed_count;
+ dst->start_time_str = gf_strdup (src->start_time_str);
+ dst->end_time_str = "Crawl is already in progress";
+ dst->crawl_type = src->crawl_type;
+ dst->crawl_inprogress = _gf_true;
+ return;
+}
+
+static int
+afr_fill_crawl_statistics_of_running_crawl(xlator_t *this, dict_t *dict)
+{
+ shd_crawl_event_t *evnt = NULL;
+ int ret = 0;
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ int i = 0;
+ priv = this->private;
+ shd = &priv->shd;
+
+ evnt = GF_CALLOC (1, sizeof (shd_crawl_event_t),
+ gf_afr_mt_shd_crawl_event_t);
+ if (!evnt) {
+ ret = -1;
+ goto out;
+ }
+ LOCK (&priv->lock);
+ {
+ for (i = 0; i < priv->child_count; i++) {
+ if (shd->pos[i] != AFR_POS_LOCAL)
+ continue;
+
+ reset_crawl_event (evnt);
+
+ if (!shd->crawl_events[i]) {
+ continue;
+ }
+
+ afr_copy_crawl_event_struct (shd->crawl_events[i],
+ evnt);
+ _add_crawl_stats_to_dict (this, dict, i, evnt, NULL);
+
+ }
+ }
+ UNLOCK (&priv->lock);
+ reset_crawl_event (evnt);
+ GF_FREE (evnt);
+
+out:
+ return ret;
+}
+
+static int
+_add_local_subvols_crawl_statistics_to_dict (xlator_t *this, dict_t *dict)
+{
+ int ret = 0;
+ afr_fill_completed_crawl_statistics_to_dict (this, dict);
+ ret = afr_fill_crawl_statistics_of_running_crawl (this, dict);
+ return ret;
+}
+int
+_add_local_subvols_eh_to_dict (xlator_t *this, eh_t *eh, dict_t *dict)
+{
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ int i = 0;
+
+ priv = this->private;
+ shd = &priv->shd;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (shd->pos[i] != AFR_POS_LOCAL)
+ continue;
+ _add_eh_to_dict (this, eh, dict, i);
+ }
+ return 0;
+}
+
+int
+afr_xl_op (xlator_t *this, dict_t *input, dict_t *output)
+{
+ gf_xl_afr_op_t op = GF_AFR_OP_INVALID;
+ int ret = 0;
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ int xl_id = 0;
+
+ priv = this->private;
+ shd = &priv->shd;
+
+ ret = dict_get_int32 (input, "xl-op", (int32_t*)&op);
+ if (ret)
+ goto out;
+ ret = dict_get_int32 (input, this->name, &xl_id);
+ if (ret)
+ goto out;
+ ret = dict_set_int32 (output, this->name, xl_id);
+ if (ret)
+ goto out;
+ switch (op) {
+ case GF_AFR_OP_HEAL_INDEX:
+ ret = _do_self_heal_on_local_subvols (this, INDEX, output);
+ break;
+ case GF_AFR_OP_HEAL_FULL:
+ ret = _do_self_heal_on_local_subvols (this, FULL, output);
+ break;
+ case GF_AFR_OP_INDEX_SUMMARY:
+ (void)_get_index_summary_on_local_subvols (this, output);
+ ret = 0;
+ break;
+ case GF_AFR_OP_HEALED_FILES:
+ ret = _add_local_subvols_eh_to_dict (this, shd->healed, output);
+ break;
+ case GF_AFR_OP_HEAL_FAILED_FILES:
+ ret = _add_local_subvols_eh_to_dict (this, shd->heal_failed,
+ output);
+ break;
+ case GF_AFR_OP_SPLIT_BRAIN_FILES:
+ ret = _add_local_subvols_eh_to_dict (this, shd->split_brain,
+ output);
+ break;
+ case GF_AFR_OP_STATISTICS:
+ ret = _add_local_subvols_crawl_statistics_to_dict (this, output);
+ break;
+ case GF_AFR_OP_STATISTICS_HEAL_COUNT:
+ case GF_AFR_OP_STATISTICS_HEAL_COUNT_PER_REPLICA:
+ ret = _do_crawl_op_on_local_subvols (this, INDEX_TO_BE_HEALED,
+ STATISTICS_TO_BE_HEALED,
+ output);
+ break;
+ default:
+ gf_log (this->name, GF_LOG_ERROR, "Unknown set op %d", op);
+ break;
+ }
+out:
+ dict_del (output, this->name);
+ return ret;
+}
+
+void
+afr_poll_self_heal (void *data)
+{
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ struct timespec timeout = {0};
+ xlator_t *this = NULL;
+ long child = (long)data;
+ gf_timer_t *old_timer = NULL;
+ gf_timer_t *new_timer = NULL;
+ shd_pos_t pos_data = {0};
+ int ret = 0;
+
+ this = THIS;
+ priv = this->private;
+ shd = &priv->shd;
+
+ if (shd->pos[child] == AFR_POS_UNKNOWN) {
+ pos_data.this = this;
+ pos_data.child = child;
+ ret = synctask_new (this->ctx->env,
+ afr_syncop_find_child_position,
+ NULL, NULL, &pos_data);
+ if (!ret)
+ shd->pos[child] = pos_data.pos;
+ }
+ if (shd->enabled && (shd->pos[child] == AFR_POS_LOCAL))
+ _do_self_heal_on_subvol (this, child, INDEX);
+ timeout.tv_sec = shd->timeout;
+ timeout.tv_nsec = 0;
+ //notify and previous timer should be synchronized.
+ LOCK (&priv->lock);
+ {
+ old_timer = shd->timer[child];
+ if (shd->pos[child] == AFR_POS_REMOTE)
+ goto unlock;
+ shd->timer[child] = gf_timer_call_after (this->ctx, timeout,
+ afr_poll_self_heal,
+ data);
+ new_timer = shd->timer[child];
+ }
+unlock:
+ UNLOCK (&priv->lock);
+
+ if (old_timer)
+ gf_timer_call_cancel (this->ctx, old_timer);
+ if (!new_timer && (shd->pos[child] != AFR_POS_REMOTE)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Could not create self-heal polling timer for %s",
+ priv->children[child]->name);
+ }
+ return;
+}
+
+static int
+afr_handle_child_up (int ret, call_frame_t *sync_frame, void *data)
+{
+ afr_self_heald_t *shd = NULL;
+ shd_pos_t *pos_data = data;
+ afr_private_t *priv = NULL;
+
+ if (ret)
+ goto out;
+
+ priv = pos_data->this->private;
+ shd = &priv->shd;
+ shd->pos[pos_data->child] = pos_data->pos;
+ if (pos_data->pos != AFR_POS_REMOTE)
+ afr_poll_self_heal ((void*)(long)pos_data->child);
+ _do_self_heal_on_local_subvols (THIS, INDEX, NULL);
+out:
+ GF_FREE (data);
+ return 0;
+}
+
+void
+afr_proactive_self_heal (void *data)
+{
+ xlator_t *this = NULL;
+ long child = (long)data;
+ shd_pos_t *pos_data = NULL;
+ int ret = 0;
+
+ this = THIS;
+
+ //Position of brick could have changed and it could be local now.
+ //Compute the position again
+ pos_data = GF_CALLOC (1, sizeof (*pos_data), gf_afr_mt_pos_data_t);
+ if (!pos_data)
+ goto out;
+ pos_data->this = this;
+ pos_data->child = child;
+ ret = synctask_new (this->ctx->env, afr_syncop_find_child_position,
+ afr_handle_child_up, NULL, pos_data);
+ if (ret)
+ goto out;
+out:
+ return;
+}
+
+static int
+get_pathinfo_host (char *pathinfo, char *hostname, size_t size)
+{
+ char *start = NULL;
+ char *end = NULL;
+ int ret = -1;
+ int i = 0;
+
+ if (!pathinfo)
+ goto out;
+
+ start = strchr (pathinfo, ':');
+ if (!start)
+ goto out;
+ end = strrchr (pathinfo, ':');
+ if (start == end)
+ goto out;
+
+ memset (hostname, 0, size);
+ i = 0;
+ while (++start != end)
+ hostname[i++] = *start;
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+afr_local_pathinfo (char *pathinfo, gf_boolean_t *local)
+{
+ int ret = 0;
+ char pathinfohost[1024] = {0};
+ char localhost[1024] = {0};
+ xlator_t *this = THIS;
+
+ *local = _gf_false;
+ ret = get_pathinfo_host (pathinfo, pathinfohost, sizeof (pathinfohost));
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Invalid pathinfo: %s",
+ pathinfo);
+ goto out;
+ }
+
+ ret = gethostname (localhost, sizeof (localhost));
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "gethostname() failed, "
+ "reason: %s", strerror (errno));
+ goto out;
+ }
+
+ if (!strcmp (localhost, pathinfohost))
+ *local = _gf_true;
+out:
+ return ret;
+}
+
+int
+afr_crawl_build_start_loc (xlator_t *this, afr_crawl_data_t *crawl_data,
+ loc_t *dirloc)
+{
+ afr_private_t *priv = NULL;
+ dict_t *xattr = NULL;
+ void *index_gfid = NULL;
+ void *base_indices_holder_vgfid = NULL;
+ loc_t rootloc = {0};
+ struct iatt iattr = {0};
+ struct iatt parent = {0};
+ int ret = 0;
+ xlator_t *readdir_xl = crawl_data->readdir_xl;
+
+ priv = this->private;
+ if (crawl_data->crawl == FULL) {
+ afr_build_root_loc (this, dirloc);
+ } else if (crawl_data->crawl == INDEX) {
+ afr_build_root_loc (this, &rootloc);
+ ret = syncop_getxattr (readdir_xl, &rootloc, &xattr,
+ GF_XATTROP_INDEX_GFID);
+ if (ret < 0)
+ goto out;
+ ret = dict_get_ptr (xattr, GF_XATTROP_INDEX_GFID, &index_gfid);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to get index "
+ "dir gfid on %s", readdir_xl->name);
+ goto out;
+ }
+ if (!index_gfid) {
+ gf_log (this->name, GF_LOG_ERROR, "index gfid empty "
+ "on %s", readdir_xl->name);
+ ret = -1;
+ goto out;
+ }
+ uuid_copy (dirloc->gfid, index_gfid);
+ dirloc->path = "";
+ dirloc->inode = inode_new (priv->root_inode->table);
+ ret = syncop_lookup (readdir_xl, dirloc, NULL,
+ &iattr, NULL, &parent);
+ if (ret < 0) {
+ if (errno != ENOENT) {
+ gf_log (this->name, GF_LOG_ERROR, "lookup "
+ "failed on index dir on %s - (%s)",
+ readdir_xl->name, strerror (errno));
+ }
+ goto out;
+ }
+ ret = _link_inode_update_loc (this, dirloc, &iattr);
+ if (ret)
+ goto out;
+ } else if (crawl_data->crawl == INDEX_TO_BE_HEALED) {
+ afr_build_root_loc (this, &rootloc);
+ ret = syncop_getxattr (readdir_xl, &rootloc, &xattr,
+ GF_BASE_INDICES_HOLDER_GFID);
+ if (ret < 0)
+ goto out;
+ ret = dict_get_ptr (xattr, GF_BASE_INDICES_HOLDER_GFID,
+ &base_indices_holder_vgfid);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "index gfid empty "
+ "on %s", readdir_xl->name);
+ ret = -1;
+ goto out;
+ }
+ if (!base_indices_holder_vgfid) {
+ gf_log (this->name, GF_LOG_ERROR, "Base indices holder"
+ "virtual gfid is null on %s", readdir_xl->name);
+ ret = -1;
+ goto out;
+ }
+ uuid_copy (dirloc->gfid, base_indices_holder_vgfid);
+ dirloc->path = "";
+ dirloc->inode = inode_new (priv->root_inode->table);
+ ret = syncop_lookup (readdir_xl, dirloc, NULL, &iattr, NULL,
+ &parent);
+ if (ret < 0) {
+ if (errno != ENOENT) {
+ gf_log (this->name, GF_LOG_ERROR, "lookup "
+ "failed for base_indices_holder dir"
+ " on %s - (%s)", readdir_xl->name,
+ strerror (errno));
+
+ } else {
+ gf_log (this->name, GF_LOG_ERROR, "base_indices"
+ "_holder is not yet created.");
+ }
+ goto out;
+ }
+ ret = _link_inode_update_loc (this, dirloc, &iattr);
+ if (ret)
+ goto out;
+ }
+ ret = 0;
+out:
+ if (xattr)
+ dict_unref (xattr);
+ loc_wipe (&rootloc);
+ return ret;
+}
+
+int
+afr_crawl_opendir (xlator_t *this, afr_crawl_data_t *crawl_data, fd_t **dirfd,
+ loc_t *dirloc)
+{
+ fd_t *fd = NULL;
+ int ret = 0;
+
+ if (crawl_data->crawl == FULL) {
+ fd = fd_create (dirloc->inode, crawl_data->pid);
+ if (!fd) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to create fd for %s", dirloc->path);
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_opendir (crawl_data->readdir_xl, dirloc, fd);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "opendir failed on %s", dirloc->path);
+ goto out;
+ }
+ } else {
+ fd = fd_anonymous (dirloc->inode);
+ }
+ ret = 0;
+out:
+ if (!ret)
+ *dirfd = fd;
+ return ret;
+}
+
+xlator_t*
+afr_crawl_readdir_xl_get (xlator_t *this, afr_crawl_data_t *crawl_data)
+{
+ afr_private_t *priv = this->private;
+
+ if (crawl_data->crawl == FULL) {
+ return this;
+ } else {
+ return priv->children[crawl_data->child];
+ }
+ return NULL;
+}
+
+int
+afr_crawl_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent,
+ gf_dirent_t *entry, afr_crawl_data_t *crawl_data)
+{
+ int ret = -1;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ if (crawl_data->crawl == FULL) {
+ ret = afr_build_child_loc (this, child, parent, entry->d_name);
+ } else if (crawl_data->crawl == INDEX_TO_BE_HEALED) {
+ ret = _build_index_loc (this, child, entry->d_name, parent);
+ if (ret)
+ goto out;
+ child->inode = inode_new (priv->root_inode->table);
+ if (!child->inode) {
+ ret = -1;
+ goto out;
+ }
+ child->path = NULL;
+ } else {
+ child->inode = inode_new (priv->root_inode->table);
+ if (!child->inode)
+ goto out;
+ uuid_parse (entry->d_name, child->gfid);
+ ret = _loc_assign_gfid_path (child);
+ }
+out:
+ return ret;
+}
+
+static int
+_process_entries (xlator_t *this, loc_t *parentloc, gf_dirent_t *entries,
+ off_t *offset, afr_crawl_data_t *crawl_data)
+{
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmp = NULL;
+ int ret = 0;
+ loc_t entry_loc = {0};
+ fd_t *fd = NULL;
+ struct iatt iattr = {0};
+
+ list_for_each_entry_safe (entry, tmp, &entries->list, list) {
+ if (!_crawl_proceed (this, crawl_data->child,
+ crawl_data->crawl_flags, NULL)) {
+ ret = -1;
+ goto out;
+ }
+ *offset = entry->d_off;
+ if (IS_ENTRY_CWD (entry->d_name) ||
+ IS_ENTRY_PARENT (entry->d_name))
+ continue;
+ if ((crawl_data->crawl == FULL) &&
+ uuid_is_null (entry->d_stat.ia_gfid)) {
+ gf_log (this->name, GF_LOG_WARNING, "%s/%s: No "
+ "gfid present skipping",
+ parentloc->path, entry->d_name);
+ continue;
+ }
+
+ loc_wipe (&entry_loc);
+ ret = afr_crawl_build_child_loc (this, &entry_loc, parentloc,
+ entry, crawl_data);
+ if (ret)
+ goto out;
+
+ ret = crawl_data->process_entry (this, crawl_data, entry,
+ &entry_loc, parentloc, &iattr);
+
+ if (crawl_data->crawl == INDEX_TO_BE_HEALED && ret) {
+ goto out;
+ } else if (ret) {
+ continue;
+ }
+
+ if ((crawl_data->crawl == INDEX) ||
+ (crawl_data->crawl == INDEX_TO_BE_HEALED))
+ continue;
+
+ if (!IA_ISDIR (iattr.ia_type))
+ continue;
+ fd = NULL;
+ ret = afr_crawl_opendir (this, crawl_data, &fd, &entry_loc);
+ if (ret)
+ continue;
+ ret = _crawl_directory (fd, &entry_loc, crawl_data);
+ if (fd)
+ fd_unref (fd);
+ }
+ ret = 0;
+out:
+ if ((crawl_data->crawl == INDEX_TO_BE_HEALED) && ret) {
+ gf_log (this->name, GF_LOG_ERROR,"Failed to get the hardlink "
+ "count");
+ }
+ loc_wipe (&entry_loc);
+ return ret;
+}
+
+static int
+_crawl_directory (fd_t *fd, loc_t *loc, afr_crawl_data_t *crawl_data)
+{
+ xlator_t *this = NULL;
+ off_t offset = 0;
+ gf_dirent_t entries;
+ int ret = 0;
+ gf_boolean_t free_entries = _gf_false;
+ xlator_t *readdir_xl = crawl_data->readdir_xl;
+
+ INIT_LIST_HEAD (&entries.list);
+ this = THIS;
+
+ GF_ASSERT (loc->inode);
+
+ if (crawl_data->crawl == FULL)
+ gf_log (this->name, GF_LOG_DEBUG, "crawling %s", loc->path);
+ else
+ gf_log (this->name, GF_LOG_DEBUG, "crawling INDEX %s",
+ uuid_utoa (loc->gfid));
+
+ while (1) {
+ if (crawl_data->crawl == FULL)
+ ret = syncop_readdirp (readdir_xl, fd, 131072, offset,
+ NULL, &entries);
+ else
+ ret = syncop_readdir (readdir_xl, fd, 131072, offset,
+ &entries);
+ if (ret <= 0)
+ break;
+ ret = 0;
+ free_entries = _gf_true;
+
+ if (!_crawl_proceed (this, crawl_data->child,
+ crawl_data->crawl_flags, NULL)) {
+ ret = -1;
+ goto out;
+ }
+ if (list_empty (&entries.list))
+ goto out;
+
+ ret = _process_entries (this, loc, &entries, &offset,
+ crawl_data);
+ if ((ret < 0) && (crawl_data->crawl == INDEX_TO_BE_HEALED)) {
+ goto out;
+ }
+ gf_dirent_free (&entries);
+ free_entries = _gf_false;
+ }
+ ret = 0;
+out:
+ if (free_entries)
+ gf_dirent_free (&entries);
+ return ret;
+}
+
+static char*
+position_str_get (afr_child_pos_t pos)
+{
+ switch (pos) {
+ case AFR_POS_UNKNOWN:
+ return "unknown";
+ case AFR_POS_LOCAL:
+ return "local";
+ case AFR_POS_REMOTE:
+ return "remote";
+ }
+ return NULL;
+}
+
+int
+afr_find_child_position (xlator_t *this, int child, afr_child_pos_t *pos)
+{
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ dict_t *xattr_rsp = NULL;
+ loc_t loc = {0};
+ int ret = 0;
+ char *node_uuid = NULL;
+
+ priv = this->private;
+ shd = &priv->shd;
+
+ afr_build_root_loc (this, &loc);
+
+ ret = syncop_getxattr (priv->children[child], &loc, &xattr_rsp,
+ GF_XATTR_NODE_UUID_KEY);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "getxattr failed on %s - "
+ "(%s)", priv->children[child]->name, strerror (errno));
+ goto out;
+ }
+
+ ret = dict_get_str (xattr_rsp, GF_XATTR_NODE_UUID_KEY, &node_uuid);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "node-uuid key not found on "
+ "child %s", priv->children[child]->name);
+ goto out;
+ }
+
+ if (!strcmp (node_uuid, shd->node_uuid))
+ *pos = AFR_POS_LOCAL;
+ else
+ *pos = AFR_POS_REMOTE;
+
+ gf_log (this->name, GF_LOG_DEBUG, "child %s is %s",
+ priv->children[child]->name, position_str_get (*pos));
+out:
+ if (ret)
+ *pos = AFR_POS_UNKNOWN;
+ loc_wipe (&loc);
+ return ret;
+}
+
+int
+afr_syncop_find_child_position (void *data)
+{
+ shd_pos_t *pos_data = data;
+ int ret = 0;
+
+ ret = afr_find_child_position (pos_data->this, pos_data->child,
+ &pos_data->pos);
+ return ret;
+}
+
+static int
+afr_dir_crawl (void *data)
+{
+ xlator_t *this = NULL;
+ int ret = -1;
+ xlator_t *readdir_xl = NULL;
+ fd_t *fd = NULL;
+ loc_t dirloc = {0};
+ afr_crawl_data_t *crawl_data = data;
+
+ this = THIS;
+
+ if (!_crawl_proceed (this, crawl_data->child, crawl_data->crawl_flags,
+ NULL))
+ goto out;
+
+ readdir_xl = afr_crawl_readdir_xl_get (this, crawl_data);
+ if (!readdir_xl)
+ goto out;
+ crawl_data->readdir_xl = readdir_xl;
+
+ ret = afr_crawl_build_start_loc (this, crawl_data, &dirloc);
+ if (ret)
+ goto out;
+
+ ret = afr_crawl_opendir (this, crawl_data, &fd, &dirloc);
+ if (ret) {
+ if (crawl_data->crawl == INDEX_TO_BE_HEALED) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to open base_"
+ "indices_holder");
+ }
+ goto out;
+ }
+
+ ret = _crawl_directory (fd, &dirloc, crawl_data);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "Crawl failed on %s",
+ readdir_xl->name);
+ else
+ gf_log (this->name, GF_LOG_DEBUG, "Crawl completed "
+ "on %s", readdir_xl->name);
+ if (crawl_data->crawl == INDEX)
+ dirloc.path = NULL;
+out:
+ if (fd)
+ fd_unref (fd);
+ if ((crawl_data->crawl == INDEX) ||
+ (crawl_data->crawl == INDEX_TO_BE_HEALED ))
+ dirloc.path = NULL;
+ loc_wipe (&dirloc);
+ return ret;
+}
+
+char *
+get_crawl_type_in_string (afr_crawl_type_t crawl)
+{
+ char *index = "INDEX";
+ char *full = "FULL";
+ char *crawl_type = NULL;
+
+ if (crawl == INDEX){
+ crawl_type = index;
+ } else if (crawl == FULL) {
+ crawl_type = full;
+ }
+
+ return crawl_type;
+}
+
+static int
+afr_allocate_crawl_event (xlator_t *this, int child, afr_crawl_type_t crawl)
+{
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ int ret = 0;
+ shd_crawl_event_t *crawl_event = NULL;
+ time_t get_time = 0;
+
+ priv = this->private;
+ shd = &priv->shd;
+
+ crawl_event = GF_CALLOC (sizeof (shd_crawl_event_t), 1,
+ gf_afr_mt_shd_crawl_event_t);
+ if (!crawl_event) {
+ ret = -1;
+ goto out;
+ }
+
+ get_time = time(NULL);
+ if (get_time == ((time_t)-1)) {
+ ret = -1;
+ goto out;
+ }
+
+ crawl_event->start_time_str = gf_strdup (ctime(&get_time));
+
+ crawl_event->crawl_type = get_crawl_type_in_string (crawl);
+ if (!crawl_event->crawl_type) {
+ ret = -1;
+ goto out;
+ }
+ LOCK (&priv->lock);
+ {
+ shd->crawl_events[child] = crawl_event;
+ }
+ UNLOCK (&priv->lock);
+ ret = 0;
+out:
+ return ret;
+
+}
+
+static int
+afr_put_crawl_event_in_eh (xlator_t *this, int child)
+{
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ int ret = 0;
+ time_t get_time = 0;
+ shd_crawl_event_t **crawl_event = NULL;
+
+ priv = this->private;
+ shd = &priv->shd;
+
+ get_time = time(NULL);
+ if (get_time == ((time_t)-1)) {
+ ret = -1;
+ goto out;
+ }
+ crawl_event = (shd_crawl_event_t**)shd->crawl_events;
+ LOCK (&priv->lock);
+ {
+ crawl_event[child]->end_time_str = gf_strdup (ctime(&get_time));
+ ret = eh_save_history (shd->statistics[child],
+ crawl_event[child]);
+ crawl_event[child] = NULL;
+ }
+ UNLOCK (&priv->lock);
+out:
+ return ret;
+}
+
+static int
+afr_dir_exclusive_crawl (void *data)
+{
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ gf_boolean_t crawl = _gf_false;
+ int ret = 0;
+ int child = -1;
+ xlator_t *this = NULL;
+ afr_crawl_data_t *crawl_data = data;
+
+ this = THIS;
+ priv = this->private;
+ shd = &priv->shd;
+ child = crawl_data->child;
+
+ LOCK (&priv->lock);
+ {
+ if (shd->inprogress[child]) {
+ if (shd->pending[child] != FULL)
+ shd->pending[child] = crawl_data->crawl;
+ } else {
+ shd->inprogress[child] = _gf_true;
+ crawl = _gf_true;
+ }
+ }
+ UNLOCK (&priv->lock);
+
+ if (!crawl) {
+ gf_log (this->name, GF_LOG_INFO, "Another crawl is in progress "
+ "for %s", priv->children[child]->name);
+ goto out;
+ }
+
+ do {
+ ret = afr_allocate_crawl_event (this, child, crawl_data->crawl);
+ if (ret)
+ goto out;
+ afr_dir_crawl (data);
+
+ ret = afr_put_crawl_event_in_eh (this, child);
+ if (ret < 0)
+ goto out;
+
+ LOCK (&priv->lock);
+ {
+ if (shd->pending[child] != NONE) {
+ crawl_data->crawl = shd->pending[child];
+ shd->pending[child] = NONE;
+ } else {
+ shd->inprogress[child] = _gf_false;
+ crawl = _gf_false;
+ }
+ }
+ UNLOCK (&priv->lock);
+ } while (crawl);
+out:
+ return ret;
+}
+
+void
+afr_start_crawl (xlator_t *this, int idx, afr_crawl_type_t crawl,
+ process_entry_cbk_t process_entry, void *op_data,
+ gf_boolean_t exclusive, int crawl_flags,
+ afr_crawl_done_cbk_t crawl_done)
+{
+ afr_private_t *priv = NULL;
+ call_frame_t *frame = NULL;
+ afr_crawl_data_t *crawl_data = NULL;
+ int ret = 0;
+ int (*crawler) (void*) = NULL;
+
+ priv = this->private;
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame)
+ goto out;
+
+ afr_set_lk_owner (frame, this, frame->root);
+ afr_set_low_priority (frame);
+ crawl_data = GF_CALLOC (1, sizeof (*crawl_data),
+ gf_afr_mt_crawl_data_t);
+ if (!crawl_data)
+ goto out;
+ crawl_data->process_entry = process_entry;
+ crawl_data->child = idx;
+ crawl_data->pid = frame->root->pid;
+ crawl_data->crawl = crawl;
+ crawl_data->op_data = op_data;
+ crawl_data->crawl_flags = crawl_flags;
+ gf_log (this->name, GF_LOG_DEBUG, "starting crawl %d for %s",
+ crawl_data->crawl, priv->children[idx]->name);
+
+ if (exclusive)
+ crawler = afr_dir_exclusive_crawl;
+ else
+ crawler = afr_dir_crawl;
+ ret = synctask_new (this->ctx->env, crawler,
+ crawl_done, frame, crawl_data);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "afr crawl failed for child"
+ " %d with ret %d", idx, ret);
+out:
+ return;
+}
+
+void
+afr_build_root_loc (xlator_t *this, loc_t *loc)
+{
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ loc->path = gf_strdup ("/");
+ loc->name = "";
+ loc->inode = inode_ref (priv->root_inode);
+ uuid_copy (loc->gfid, loc->inode->gfid);
+}
+
+int
+afr_set_root_gfid (dict_t *dict)
+{
+ uuid_t gfid;
+ int ret = 0;
+
+ memset (gfid, 0, 16);
+ gfid[15] = 1;
+
+ ret = afr_set_dict_gfid (dict, gfid);
+
+ return ret;
+}
diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h
new file mode 100644
index 000000000..e0c083754
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-self-heald.h
@@ -0,0 +1,65 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __AFR_SELF_HEALD_H__
+#define __AFR_SELF_HEALD_H__
+#include "xlator.h"
+
+#define IS_ROOT_PATH(path) (!strcmp (path, "/"))
+#define IS_ENTRY_CWD(entry) (!strcmp (entry, "."))
+#define IS_ENTRY_PARENT(entry) (!strcmp (entry, ".."))
+#define AFR_ALL_CHILDREN -1
+
+typedef struct afr_crawl_data_ {
+ int child;
+ pid_t pid;
+ afr_crawl_type_t crawl;
+ xlator_t *readdir_xl;
+ void *op_data;
+ int crawl_flags;
+ int (*process_entry) (xlator_t *this, struct afr_crawl_data_ *crawl_data,
+ gf_dirent_t *entry, loc_t *child, loc_t *parent,
+ struct iatt *iattr);
+} afr_crawl_data_t;
+
+typedef struct crawl_event_stats_ {
+ uint64_t healed_count;
+ uint64_t split_brain_count;
+ uint64_t heal_failed_count;
+ char *start_time_str;
+ char *end_time_str;
+ char *crawl_type;
+ gf_boolean_t crawl_inprogress;
+} shd_crawl_event_t;
+
+void _destroy_crawl_event_data (void *data);
+void _destroy_shd_event_data (void *data);
+
+typedef int (*process_entry_cbk_t) (xlator_t *this, afr_crawl_data_t *crawl_data,
+ gf_dirent_t *entry, loc_t *child, loc_t *parent,
+ struct iatt *iattr);
+
+void afr_build_root_loc (xlator_t *this, loc_t *loc);
+
+int afr_set_root_gfid (dict_t *dict);
+
+void
+afr_proactive_self_heal (void *data);
+
+int
+afr_xl_op (xlator_t *this, dict_t *input, dict_t *output);
+
+/*
+ * In addition to its self-heal use, this is used to find a local default
+ * read_child.
+ */
+int
+afr_local_pathinfo (char *pathinfo, gf_boolean_t *local);
+#endif /* __AFR_SELF_HEALD_H__ */
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index 80c96e887..20306e469 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -1,64 +1,108 @@
/*
- Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include "dict.h"
#include "byte-order.h"
+#include "common-utils.h"
+#include "timer.h"
#include "afr.h"
#include "afr-transaction.h"
#include <signal.h>
-#include <alloca.h>
+
+
+#define LOCKED_NO 0x0 /* no lock held */
+#define LOCKED_YES 0x1 /* for DATA, METADATA, ENTRY and higher_path
+ of RENAME */
+#define LOCKED_LOWER 0x2 /* for lower_path of RENAME */
+
+afr_fd_ctx_t *
+__afr_fd_ctx_get (fd_t *fd, xlator_t *this)
+{
+ uint64_t ctx = 0;
+ int ret = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int i = 0;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ ret = __fd_ctx_get (fd, this, &ctx);
+
+ if (ret < 0 && fd_is_anonymous (fd)) {
+ ret = __afr_fd_ctx_set (this, fd);
+ if (ret < 0)
+ goto out;
+
+ ret = __fd_ctx_get (fd, this, &ctx);
+ if (ret < 0)
+ goto out;
+
+ fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+ for (i = 0; i < priv->child_count; i++)
+ fd_ctx->opened_on[i] = AFR_FD_OPENED;
+ }
+
+ fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+out:
+ return fd_ctx;
+}
+
+
+afr_fd_ctx_t *
+afr_fd_ctx_get (fd_t *fd, xlator_t *this)
+{
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+ LOCK(&fd->lock);
+ {
+ fd_ctx = __afr_fd_ctx_get (fd, this);
+ }
+ UNLOCK(&fd->lock);
+
+ return fd_ctx;
+}
static void
-afr_pid_save (call_frame_t *frame)
+afr_save_lk_owner (call_frame_t *frame)
{
afr_local_t * local = NULL;
local = frame->local;
- local->saved_pid = frame->root->pid;
+ local->saved_lk_owner = frame->root->lk_owner;
}
static void
-afr_pid_restore (call_frame_t *frame)
+afr_restore_lk_owner (call_frame_t *frame)
{
afr_local_t * local = NULL;
local = frame->local;
- frame->root->pid = local->saved_pid;
+ frame->root->lk_owner = local->saved_lk_owner;
}
-
static void
__mark_all_pending (int32_t *pending[], int child_count,
afr_transaction_type type)
-{
- int i;
- int j;
+{
+ int i = 0;
+ int j = 0;
for (i = 0; i < child_count; i++) {
j = afr_index_for_transaction_type (type);
- pending[i][j] = hton32 (1);
+ pending[i][j] = hton32 (1);
}
}
@@ -67,260 +111,170 @@ static void
__mark_child_dead (int32_t *pending[], int child_count, int child,
afr_transaction_type type)
{
- int j;
+ int j = 0;
j = afr_index_for_transaction_type (type);
-
- pending[child][j] = 0;
-}
-
-
-static void
-__mark_fop_failed_on_fd (fd_t *fd, xlator_t *this,
- int child_index)
-{
- uint64_t ctx;
- afr_fd_ctx_t * fd_ctx = NULL;
-
- int ret = 0;
-
- ret = fd_ctx_get (fd, this, &ctx);
-
- if (ret < 0)
- goto out;
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- fd_ctx->child_failed[child_index] = 1;
-out:
- return;
+ pending[child][j] = 0;
}
static void
-__mark_failed_children (int32_t *pending[], int child_count,
- xlator_t *this, fd_t *fd, afr_transaction_type type)
+__mark_pre_op_done_on_fd (call_frame_t *frame, xlator_t *this, int child_index)
{
- uint64_t ctx;
- afr_fd_ctx_t * fd_ctx = NULL;
+ afr_local_t *local = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
- int ret = 0;
- int i = 0;
- int j = 0;
+ local = frame->local;
- ret = fd_ctx_get (fd, this, &ctx);
+ if (!local->fd)
+ return;
- if (ret < 0)
- goto out;
+ fd_ctx = afr_fd_ctx_get (local->fd, this);
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- for (i = 0; i < child_count; i++) {
- j = afr_index_for_transaction_type (type);
+ if (!fd_ctx)
+ goto out;
- if (fd_ctx->child_failed[i])
- pending[i][j] = 0;
+ LOCK (&local->fd->lock);
+ {
+ if (local->transaction.type == AFR_DATA_TRANSACTION)
+ fd_ctx->pre_op_done[child_index]++;
}
-
+ UNLOCK (&local->fd->lock);
out:
return;
}
-
static void
-__mark_down_children (int32_t *pending[], int child_count,
- unsigned char *child_up, afr_transaction_type type)
+__mark_non_participant_children (int32_t *pending[], int child_count,
+ unsigned char *participants,
+ afr_transaction_type type)
{
- int i;
- int j;
+ int i = 0;
+ int j = 0;
- for (i = 0; i < child_count; i++) {
- j = afr_index_for_transaction_type (type);
-
- if (!child_up[i])
- pending[i][j] = 0;
+ j = afr_index_for_transaction_type (type);
+ for (i = 0; i < child_count; i++) {
+ if (!participants[i])
+ pending[i][j] = 0;
}
}
-static void
+void
__mark_all_success (int32_t *pending[], int child_count,
afr_transaction_type type)
{
- int i;
+ int i;
int j;
- for (i = 0; i < child_count; i++) {
+ for (i = 0; i < child_count; i++) {
j = afr_index_for_transaction_type (type);
- pending[i][j] = hton32 (-1);
+ pending[i][j] = hton32 (-1);
}
}
-
-static int
-__is_first_write_on_fd (xlator_t *this, fd_t *fd)
+void
+_set_all_child_errno (int *child_errno, unsigned int child_count)
{
- int op_ret = 0;
- int _ret = -1;
+ int i = 0;
- uint64_t ctx;
- afr_fd_ctx_t * fd_ctx = NULL;
-
- LOCK (&fd->lock);
- {
- _ret = __fd_ctx_get (fd, this, &ctx);
-
- if (_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "could not get fd ctx on fd=%p",
- fd);
- goto out;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- if (fd_ctx->pre_op_done == 0) {
- fd_ctx->pre_op_done = 1;
- op_ret = 1;
- }
- }
-out:
- UNLOCK (&fd->lock);
-
- return op_ret;
+ for (i = 0; i < child_count; i++)
+ if (child_errno[i] == 0)
+ child_errno[i] = ENOTCONN;
}
-
-static int
-__if_fd_pre_op_done (xlator_t *this, fd_t *fd)
+void
+afr_transaction_perform_fop (call_frame_t *frame, xlator_t *this)
{
- int op_ret = 0;
- int _ret = -1;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ fd_t *fd = NULL;
- uint64_t ctx;
- afr_fd_ctx_t * fd_ctx = NULL;
+ local = frame->local;
+ priv = this->private;
+ fd = local->fd;
- LOCK (&fd->lock);
- {
- _ret = __fd_ctx_get (fd, this, &ctx);
+ __mark_all_success (local->pending, priv->child_count,
+ local->transaction.type);
- if (_ret < 0) {
- goto out;
- }
+ _set_all_child_errno (local->child_errno, priv->child_count);
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+ /* Perform fops with the lk-owner from top xlator.
+ * Eg: lk-owner of posix-lk and flush should be same,
+ * flush cant clear the posix-lks without that lk-owner.
+ */
+ afr_save_lk_owner (frame);
+ frame->root->lk_owner =
+ local->transaction.main_frame->root->lk_owner;
- if (fd_ctx->pre_op_done) {
- fd_ctx->pre_op_done = 0;
- op_ret = 1;
- }
- }
-out:
- UNLOCK (&fd->lock);
- return op_ret;
+ /* The wake up needs to happen independent of
+ what type of fop arrives here. If it was
+ a write, then it has already inherited the
+ lock and changelog. If it was not a write,
+ then the presumption of the optimization (of
+ optimizing for successive write operations)
+ fails.
+ */
+ if (fd)
+ afr_delayed_changelog_wake_up (this, fd);
+ local->transaction.fop (frame, this);
}
static int
__changelog_enabled (afr_private_t *priv, afr_transaction_type type)
{
- int ret = 0;
-
- switch (type) {
- case AFR_DATA_TRANSACTION:
- if (priv->data_change_log)
- ret = 1;
-
- break;
-
- case AFR_METADATA_TRANSACTION:
- if (priv->metadata_change_log)
- ret = 1;
-
- break;
-
- case AFR_ENTRY_TRANSACTION:
- case AFR_ENTRY_RENAME_TRANSACTION:
- if (priv->entry_change_log)
- ret = 1;
-
- break;
-
- case AFR_FLUSH_TRANSACTION:
- ret = 1;
- }
+ int ret = 0;
- return ret;
-}
+ switch (type) {
+ case AFR_DATA_TRANSACTION:
+ if (priv->data_change_log)
+ ret = 1;
+ break;
-static int
-__changelog_needed_pre_op (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- fd_t * fd = NULL;
+ case AFR_METADATA_TRANSACTION:
+ if (priv->metadata_change_log)
+ ret = 1;
- int op_ret = 0;
+ break;
- priv = this->private;
- local = frame->local;
-
- if (__changelog_enabled (priv, local->transaction.type)) {
- switch (local->op) {
-
- case GF_FOP_WRITE:
- case GF_FOP_FTRUNCATE:
- /*
- if it's a data transaction, we write the changelog
- only on the first write on an fd
- */
-
- fd = local->fd;
- if (!fd || __is_first_write_on_fd (this, fd))
- op_ret = 1;
-
- break;
-
- case GF_FOP_FLUSH:
- /* only do post-op on flush() */
-
- op_ret = 0;
- break;
-
- default:
- op_ret = 1;
- }
- }
+ case AFR_ENTRY_TRANSACTION:
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ if (priv->entry_change_log)
+ ret = 1;
- return op_ret;
+ break;
+ }
+
+ return ret;
}
static int
-__changelog_needed_post_op (call_frame_t *frame, xlator_t *this)
+__fop_changelog_needed (call_frame_t *frame, xlator_t *this)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
-
- int op_ret = 0;
- afr_transaction_type type = -1;
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ int op_ret = 0;
+ afr_transaction_type type = -1;
- priv = this->private;
- local = frame->local;
- type = local->transaction.type;
+ priv = this->private;
+ local = frame->local;
+ type = local->transaction.type;
- if (__changelog_enabled (priv, type)) {
+ if (__changelog_enabled (priv, type)) {
switch (local->op) {
case GF_FOP_WRITE:
case GF_FOP_FTRUNCATE:
- op_ret = 0;
+ op_ret = 1;
break;
case GF_FOP_FLUSH:
- op_ret = __if_fd_pre_op_done (this, local->fd);
+ op_ret = 0;
break;
default:
@@ -328,817 +282,1536 @@ __changelog_needed_post_op (call_frame_t *frame, xlator_t *this)
}
}
- return op_ret;
+ return op_ret;
}
-
-static int
-afr_set_pending_dict (afr_private_t *priv, dict_t *xattr, int32_t **pending)
+int
+afr_set_pending_dict (afr_private_t *priv, dict_t *xattr, int32_t **pending,
+ int child, afr_xattrop_type_t op)
{
- int i;
+ int i = 0;
int ret = 0;
+ if (op == LOCAL_FIRST) {
+ ret = dict_set_static_bin (xattr, priv->pending_key[child],
+ pending[child],
+ AFR_NUM_CHANGE_LOGS * sizeof (int32_t));
+ if (ret)
+ goto out;
+ }
for (i = 0; i < priv->child_count; i++) {
+ if (i == child)
+ continue;
ret = dict_set_static_bin (xattr, priv->pending_key[i],
- pending[i], 3 * sizeof (int32_t));
+ pending[i],
+ AFR_NUM_CHANGE_LOGS * sizeof (int32_t));
/* 3 = data+metadata+entry */
-
+
if (ret < 0)
goto out;
}
-
+ if (op == LOCAL_LAST) {
+ ret = dict_set_static_bin (xattr, priv->pending_key[child],
+ pending[child],
+ AFR_NUM_CHANGE_LOGS * sizeof (int32_t));
+ if (ret)
+ goto out;
+ }
out:
return ret;
}
-
-static int
+int
afr_lock_server_count (afr_private_t *priv, afr_transaction_type type)
{
- int ret = 0;
-
- switch (type) {
- case AFR_FLUSH_TRANSACTION:
- case AFR_DATA_TRANSACTION:
- ret = priv->data_lock_server_count;
- break;
-
- case AFR_METADATA_TRANSACTION:
- ret = priv->metadata_lock_server_count;
- break;
-
- case AFR_ENTRY_TRANSACTION:
- case AFR_ENTRY_RENAME_TRANSACTION:
- ret = priv->entry_lock_server_count;
- break;
- }
+ int ret = 0;
- return ret;
-}
+ switch (type) {
+ case AFR_DATA_TRANSACTION:
+ ret = priv->child_count;
+ break;
+
+ case AFR_METADATA_TRANSACTION:
+ ret = priv->child_count;
+ break;
+ case AFR_ENTRY_TRANSACTION:
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ ret = priv->child_count;
+ break;
+ }
+
+ return ret;
+}
-/* {{{ unlock */
+/* {{{ pending */
int32_t
-afr_unlock_common_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+afr_changelog_post_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr,
+ dict_t *xdata)
{
- afr_local_t *local;
- int call_count = 0;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int call_count = -1;
- local = frame->local;
+ priv = this->private;
+ local = frame->local;
+ int_lock = &local->internal_lock;
- LOCK (&frame->lock);
- {
- call_count = --local->call_count;
- }
- UNLOCK (&frame->lock);
+ LOCK (&frame->lock);
+ {
+ call_count = --local->call_count;
+ }
+ UNLOCK (&frame->lock);
- if (call_count == 0) {
- local->transaction.done (frame, this);
- }
-
- return 0;
+ if (call_count == 0) {
+ if (local->transaction.resume_stub) {
+ call_resume (local->transaction.resume_stub);
+ local->transaction.resume_stub = NULL;
+ }
+
+ if (afr_lock_server_count (priv, local->transaction.type) == 0) {
+ local->transaction.done (frame, this);
+ } else {
+ int_lock->lock_cbk = local->transaction.done;
+ afr_unlock (frame, this);
+ }
+ }
+
+ return 0;
}
-int
-afr_unlock (call_frame_t *frame, xlator_t *this)
+void
+afr_transaction_rm_stale_children (call_frame_t *frame, xlator_t *this,
+ inode_t *inode, afr_transaction_type type)
{
- struct flock flock;
+ int i = -1;
+ int count = 0;
+ int read_child = -1;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int **pending = NULL;
+ int idx = 0;
+ int32_t *stale_children = NULL;
+ int32_t *fresh_children = NULL;
+ gf_boolean_t rm_stale_children = _gf_false;
+
+ idx = afr_index_for_transaction_type (type);
+
+ priv = this->private;
+ local = frame->local;
+ pending = local->pending;
- int i = 0;
- int call_count = 0;
+ if (local->op_ret < 0)
+ goto out;
+ fresh_children = local->fresh_children;
+ read_child = afr_inode_get_read_ctx (this, inode, fresh_children);
+ if (read_child < 0) {
+ gf_log (this->name, GF_LOG_DEBUG, "Possible split-brain "
+ "for %s", uuid_utoa (inode->gfid));
+ goto out;
+ }
- afr_local_t *local = NULL;
- afr_private_t * priv = this->private;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!afr_is_child_present (fresh_children,
+ priv->child_count, i))
+ continue;
+ if (pending[i][idx])
+ continue;
+ /* child is down or op failed on it */
+ if (!stale_children)
+ stale_children = afr_children_create (priv->child_count);
+ if (!stale_children)
+ goto out;
- local = frame->local;
+ rm_stale_children = _gf_true;
+ stale_children[count++] = i;
+ gf_log (this->name, GF_LOG_DEBUG, "Removing stale child "
+ "%d for %s", i, uuid_utoa (inode->gfid));
+ }
- /*
- pid has been restored to saved_pid in the fop,
- so set it back to frame->root
- */
+ if (!rm_stale_children)
+ goto out;
+
+ afr_inode_rm_stale_children (this, inode, stale_children);
+out:
+ GF_FREE (stale_children);
+ return;
+}
- frame->root->pid = (long) frame->root;
+afr_inodelk_t*
+afr_get_inodelk (afr_internal_lock_t *int_lock, char *dom)
+{
+ afr_inodelk_t *inodelk = NULL;
+ int i = 0;
- call_count = afr_locked_nodes_count (local->transaction.locked_nodes,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.done (frame, this);
- return 0;
- }
+ for (i = 0; int_lock->inodelk[i].domain; i++) {
+ inodelk = &int_lock->inodelk[i];
+ if (strcmp (dom, inodelk->domain) == 0)
+ return inodelk;
+ }
+ return NULL;
+}
- if (local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION)
- call_count *= 2;
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- flock.l_start = local->transaction.start;
- flock.l_len = local->transaction.len;
- flock.l_type = F_UNLCK;
-
- if (local->transaction.locked_nodes[i]) {
- switch (local->transaction.type) {
- case AFR_DATA_TRANSACTION:
- case AFR_METADATA_TRANSACTION:
- case AFR_FLUSH_TRANSACTION:
-
- if (local->fd) {
- STACK_WIND (frame, afr_unlock_common_cbk,
- priv->children[i],
- priv->children[i]->fops->finodelk,
- this->name, local->fd,
- F_SETLK, &flock);
- } else {
- STACK_WIND (frame, afr_unlock_common_cbk,
- priv->children[i],
- priv->children[i]->fops->inodelk,
- this->name, &local->loc,
- F_SETLK, &flock);
- }
-
- break;
-
- case AFR_ENTRY_RENAME_TRANSACTION:
-
- STACK_WIND (frame, afr_unlock_common_cbk,
- priv->children[i],
- priv->children[i]->fops->entrylk,
- this->name,
- &local->transaction.new_parent_loc,
- local->transaction.new_basename,
- ENTRYLK_UNLOCK, ENTRYLK_WRLCK);
-
- call_count--;
-
- /* fall through */
-
- case AFR_ENTRY_TRANSACTION:
- if (local->fd) {
- STACK_WIND (frame, afr_unlock_common_cbk,
- priv->children[i],
- priv->children[i]->fops->fentrylk,
- this->name, local->fd,
- local->transaction.basename,
- ENTRYLK_UNLOCK, ENTRYLK_WRLCK);
- } else {
- STACK_WIND (frame, afr_unlock_common_cbk,
- priv->children[i],
- priv->children[i]->fops->entrylk,
- this->name,
- &local->transaction.parent_loc,
- local->transaction.basename,
- ENTRYLK_UNLOCK, ENTRYLK_WRLCK);
-
- }
- break;
- }
-
- if (!--call_count)
- break;
- }
- }
+unsigned char*
+afr_locked_nodes_get (afr_transaction_type type, afr_internal_lock_t *int_lock)
+{
+ unsigned char *locked_nodes = NULL;
+ afr_inodelk_t *inodelk = NULL;
+ switch (type) {
+ case AFR_DATA_TRANSACTION:
+ case AFR_METADATA_TRANSACTION:
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+ locked_nodes = inodelk->locked_nodes;
+ break;
+
+ case AFR_ENTRY_TRANSACTION:
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ /*Because same set of subvols participate in all lockee
+ * entities*/
+ locked_nodes = int_lock->lockee[0].locked_nodes;
+ break;
+ }
+ return locked_nodes;
+}
+
+int
+afr_changelog_pre_op_call_count (afr_transaction_type type,
+ afr_internal_lock_t *int_lock,
+ unsigned int child_count)
+{
+ int call_count = 0;
+ unsigned char *locked_nodes = NULL;
- return 0;
+ locked_nodes = afr_locked_nodes_get (type, int_lock);
+ GF_ASSERT (locked_nodes);
+
+ call_count = afr_locked_children_count (locked_nodes, child_count);
+ if (type == AFR_ENTRY_RENAME_TRANSACTION)
+ call_count *= 2;
+
+ return call_count;
}
-/* }}} */
+int
+afr_changelog_post_op_call_count (afr_transaction_type type,
+ unsigned char *pre_op,
+ unsigned int child_count)
+{
+ int call_count = 0;
+ call_count = afr_pre_op_done_children_count (pre_op, child_count);
+ if (type == AFR_ENTRY_RENAME_TRANSACTION)
+ call_count *= 2;
-/* {{{ pending */
+ return call_count;
+}
-int32_t
-afr_changelog_post_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr)
+void
+afr_compute_txn_changelog (afr_local_t *local, afr_private_t *priv)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
-
- int call_count = -1;
+ int i = 0;
+ int index = 0;
+ int32_t postop = 0;
+ int32_t preop = 1;
+ int32_t **txn_changelog = NULL;
+
+ txn_changelog = local->transaction.txn_changelog;
+ index = afr_index_for_transaction_type (local->transaction.type);
+ for (i = 0; i < priv->child_count; i++) {
+ postop = ntoh32 (local->pending[i][index]);
+ txn_changelog[i][index] = hton32 (postop + preop);
+ }
+}
- priv = this->private;
- local = frame->local;
+afr_xattrop_type_t
+afr_get_postop_xattrop_type (int32_t **pending, int optimized, int child,
+ afr_transaction_type type)
+{
+ int index = 0;
+ afr_xattrop_type_t op = LOCAL_LAST;
- LOCK (&frame->lock);
- {
- call_count = --local->call_count;
- }
- UNLOCK (&frame->lock);
-
- if (call_count == 0) {
- if (afr_lock_server_count (priv, local->transaction.type) == 0) {
- local->transaction.done (frame, this);
- } else {
- afr_unlock (frame, this);
- }
- }
+ index = afr_index_for_transaction_type (type);
+ if (optimized && !pending[child][index])
+ op = LOCAL_FIRST;
+ return op;
+}
- return 0;
+void
+afr_set_postop_dict (afr_local_t *local, xlator_t *this, dict_t *xattr,
+ int optimized, int child)
+{
+ int32_t **txn_changelog = NULL;
+ int32_t **changelog = NULL;
+ afr_private_t *priv = NULL;
+ int ret = 0;
+ afr_xattrop_type_t op = LOCAL_LAST;
+
+ priv = this->private;
+ txn_changelog = local->transaction.txn_changelog;
+ op = afr_get_postop_xattrop_type (local->pending, optimized, child,
+ local->transaction.type);
+ if (optimized)
+ changelog = txn_changelog;
+ else
+ changelog = local->pending;
+ ret = afr_set_pending_dict (priv, xattr, changelog, child, op);
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_INFO,
+ "failed to set pending entry");
}
-int
-afr_changelog_post_op (call_frame_t *frame, xlator_t *this)
+gf_boolean_t
+afr_txn_nothing_failed (call_frame_t *frame, xlator_t *this)
{
- afr_private_t * priv = this->private;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int index = -1;
+ int i = 0;
- int ret = 0;
- int i = 0;
- int call_count = 0;
-
- afr_local_t * local = NULL;
- dict_t **xattr = NULL;
+ local = frame->local;
+ priv = this->private;
- local = frame->local;
+ index = afr_index_for_transaction_type (local->transaction.type);
- __mark_down_children (local->pending, priv->child_count,
- local->child_up, local->transaction.type);
-
- if (local->op == GF_FOP_FLUSH) {
- __mark_failed_children (local->pending, priv->child_count,
- this, local->fd,
- local->transaction.type);
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->pending[i][index] == 0)
+ return _gf_false;
}
+ return _gf_true;
+}
+
+static void
+afr_dir_fop_handle_all_fop_failures (call_frame_t *frame)
+{
+ xlator_t *this = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ this = frame->this;
+ local = frame->local;
+ priv = this->private;
+
+ if ((local->transaction.type != AFR_ENTRY_TRANSACTION) &&
+ (local->transaction.type != AFR_ENTRY_RENAME_TRANSACTION))
+ return;
+
+ if (local->op_ret >= 0)
+ goto out;
+
+ __mark_all_success (local->pending, priv->child_count,
+ local->transaction.type);
+out:
+ return;
+}
+
+static void
+afr_data_handle_quota_errors (call_frame_t *frame, xlator_t *this)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ gf_boolean_t all_quota_failures = _gf_false;
+
+ local = frame->local;
+ priv = this->private;
+ if (local->transaction.type != AFR_DATA_TRANSACTION)
+ return;
+ /*
+ * Idea is to not leave the file in FOOL-FOOL scenario in case on
+ * all the bricks data transaction failed with EDQUOT to avoid
+ * increasing un-necessary load of self-heals in the system.
+ */
+ all_quota_failures = _gf_true;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i] &&
+ (local->child_errno[i] != EDQUOT)) {
+ all_quota_failures = _gf_false;
+ break;
+ }
+ }
+ if (all_quota_failures)
+ __mark_all_success (local->pending, priv->child_count,
+ local->transaction.type);
+}
+
+int
+afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t * priv = this->private;
+ afr_internal_lock_t *int_lock = NULL;
+ int i = 0;
+ int call_count = 0;
+
+ afr_local_t * local = NULL;
+ afr_fd_ctx_t *fdctx = NULL;
+ dict_t **xattr = NULL;
+ int piggyback = 0;
+ int nothing_failed = 1;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ __mark_non_participant_children (local->pending, priv->child_count,
+ local->transaction.pre_op,
+ local->transaction.type);
+
+ afr_data_handle_quota_errors (frame, this);
+ afr_dir_fop_handle_all_fop_failures (frame);
+
+ if (local->fd)
+ afr_transaction_rm_stale_children (frame, this,
+ local->fd->inode,
+ local->transaction.type);
+
xattr = alloca (priv->child_count * sizeof (*xattr));
memset (xattr, 0, (priv->child_count * sizeof (*xattr)));
- for (i = 0; i < priv->child_count; i++) {
- xattr[i] = get_new_dict ();
- dict_ref (xattr[i]);
+ for (i = 0; i < priv->child_count; i++) {
+ xattr[i] = dict_new ();
}
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ call_count = afr_changelog_post_op_call_count (local->transaction.type,
+ local->transaction.pre_op,
+ priv->child_count);
+ local->call_count = call_count;
- if (local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) {
- call_count *= 2;
- }
+ if (local->fd)
+ fdctx = afr_fd_ctx_get (local->fd, this);
- local->call_count = call_count;
+ if (call_count == 0) {
+ /* no child is up */
+ int_lock->lock_cbk = local->transaction.done;
+ afr_unlock (frame, this);
+ goto out;
+ }
- if (call_count == 0) {
- /* no child is up */
- for (i = 0; i < priv->child_count; i++) {
- dict_unref (xattr[i]);
+ nothing_failed = afr_txn_nothing_failed (frame, this);
+
+ afr_compute_txn_changelog (local , priv);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->transaction.pre_op[i])
+ continue;
+
+ if (local->transaction.type != AFR_DATA_TRANSACTION)
+ afr_set_postop_dict (local, this, xattr[i],
+ local->optimistic_change_log, i);
+ switch (local->transaction.type) {
+ case AFR_DATA_TRANSACTION:
+ {
+ if (!fdctx) {
+ afr_set_postop_dict (local, this, xattr[i],
+ 0, i);
+ STACK_WIND (frame, afr_changelog_post_op_cbk,
+ priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &local->loc,
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
+ break;
+ }
+
+ /* local->transaction.postop_piggybacked[] was
+ precomputed in is_piggyback_postop() when called from
+ afr_changelog_post_op_safe()
+ */
+
+ piggyback = 0;
+ if (local->transaction.postop_piggybacked[i])
+ piggyback = 1;
+
+ afr_set_postop_dict (local, this, xattr[i],
+ piggyback, i);
+
+ if (nothing_failed && piggyback) {
+ afr_changelog_post_op_cbk (frame, (void *)(long)i,
+ this, 1, 0, xattr[i], NULL);
+ } else {
+ STACK_WIND_COOKIE (frame,
+ afr_changelog_post_op_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->fxattrop,
+ local->fd,
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
+ }
}
-
- afr_unlock (frame, this);
- return 0;
- }
+ break;
+ case AFR_METADATA_TRANSACTION:
+ {
+ if (nothing_failed && local->optimistic_change_log) {
+ afr_changelog_post_op_cbk (frame, (void *)(long)i,
+ this, 1, 0, xattr[i],
+ NULL);
+ break;
+ }
+
+ if (local->fd)
+ STACK_WIND (frame, afr_changelog_post_op_cbk,
+ priv->children[i],
+ priv->children[i]->fops->fxattrop,
+ local->fd,
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
+ else
+ STACK_WIND (frame, afr_changelog_post_op_cbk,
+ priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &local->loc,
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
+ }
+ break;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- ret = afr_set_pending_dict (priv, xattr[i],
- local->pending);
-
- if (ret < 0)
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to set pending entry");
-
-
- switch (local->transaction.type) {
- case AFR_DATA_TRANSACTION:
- case AFR_METADATA_TRANSACTION:
- case AFR_FLUSH_TRANSACTION:
- {
- if (local->fd)
- STACK_WIND (frame, afr_changelog_post_op_cbk,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- local->fd,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- else
- STACK_WIND (frame, afr_changelog_post_op_cbk,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->loc,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- }
- break;
-
- case AFR_ENTRY_RENAME_TRANSACTION:
- {
- STACK_WIND_COOKIE (frame, afr_changelog_post_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->transaction.new_parent_loc,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
-
- call_count--;
- }
-
- /*
- set it again because previous stack_wind
- might have already returned (think of case
- where subvolume is posix) and would have
- used the dict as placeholder for return
- value
- */
-
- ret = afr_set_pending_dict (priv, xattr[i],
- local->pending);
-
- if (ret < 0)
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to set pending entry");
-
- /* fall through */
-
- case AFR_ENTRY_TRANSACTION:
- {
- if (local->fd)
- STACK_WIND (frame, afr_changelog_post_op_cbk,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- local->fd,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- else
- STACK_WIND (frame, afr_changelog_post_op_cbk,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->transaction.parent_loc,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- }
- break;
- }
-
- if (!--call_count)
- break;
- }
- }
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ {
+ if (nothing_failed && local->optimistic_change_log) {
+ afr_changelog_post_op_cbk (frame, (void *)(long)i,
+ this, 1, 0, xattr[i],
+ NULL);
+ } else {
+ STACK_WIND_COOKIE (frame, afr_changelog_post_op_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &local->transaction.new_parent_loc,
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
+ }
+ call_count--;
+ }
+
+ /*
+ set it again because previous stack_wind
+ might have already returned (think of case
+ where subvolume is posix) and would have
+ used the dict as placeholder for return
+ value
+ */
+
+ afr_set_postop_dict (local, this, xattr[i],
+ local->optimistic_change_log, i);
+
+ /* fall through */
+
+ case AFR_ENTRY_TRANSACTION:
+ {
+ if (nothing_failed && local->optimistic_change_log) {
+ afr_changelog_post_op_cbk (frame, (void *)(long)i,
+ this, 1, 0, xattr[i],
+ NULL);
+ break;
+ }
+
+ if (local->fd)
+ STACK_WIND (frame, afr_changelog_post_op_cbk,
+ priv->children[i],
+ priv->children[i]->fops->fxattrop,
+ local->fd,
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
+ else
+ STACK_WIND (frame, afr_changelog_post_op_cbk,
+ priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &local->transaction.parent_loc,
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
+ }
+ break;
+ }
+ if (!--call_count)
+ break;
+ }
+
+out:
for (i = 0; i < priv->child_count; i++) {
dict_unref (xattr[i]);
}
- return 0;
+ return 0;
}
int32_t
afr_changelog_pre_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr)
+ int32_t op_ret, int32_t op_errno, dict_t *xattr,
+ dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = this->private;
- loc_t * loc = NULL;
-
- int call_count = -1;
- int child_index = (long) cookie;
-
- local = frame->local;
- loc = &local->loc;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->child_up[child_index] = 0;
-
- if (op_errno == ENOTSUP) {
- gf_log (this->name, GF_LOG_ERROR,
- "xattrop not supported by %s",
- priv->children[child_index]->name);
- local->op_ret = -1;
-
- } else if (!child_went_down (op_ret, op_errno)) {
- gf_log (this->name, GF_LOG_ERROR,
- "xattrop failed on child %s: %s",
- priv->children[child_index]->name,
- strerror (op_errno));
- }
- local->op_errno = op_errno;
- }
-
- call_count = --local->call_count;
- }
- UNLOCK (&frame->lock);
+ afr_local_t * local = NULL;
+ afr_private_t * priv = this->private;
+ int call_count = -1;
+ int child_index = (long) cookie;
- if (call_count == 0) {
- if ((local->op_ret == -1) &&
- (local->op_errno == ENOTSUP)) {
- local->transaction.resume (frame, this);
- } else {
- __mark_all_success (local->pending, priv->child_count,
- local->transaction.type);
+ local = frame->local;
- afr_pid_restore (frame);
+ LOCK (&frame->lock);
+ {
+ switch (op_ret) {
+ case 0:
+ __mark_pre_op_done_on_fd (frame, this, child_index);
+ //fallthrough we need to mark the pre_op
+ case 1:
+ local->transaction.pre_op[child_index] = 1;
+ /* special op_ret for piggyback */
+ break;
+ case -1:
+ if (op_errno == ENOTSUP) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "xattrop not supported by %s",
+ priv->children[child_index]->name);
+ local->op_ret = -1;
+
+ } else if (!child_went_down (op_ret, op_errno)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "xattrop failed on child %s: %s",
+ priv->children[child_index]->name,
+ strerror (op_errno));
+ }
+ local->op_errno = op_errno;
+ break;
+ }
- local->transaction.fop (frame, this);
- }
- }
+ call_count = --local->call_count;
+ }
+ UNLOCK (&frame->lock);
+
+ if (call_count == 0) {
+ if ((local->op_ret == -1) &&
+ (local->op_errno == ENOTSUP)) {
+ local->transaction.resume (frame, this);
+ } else {
+ afr_transaction_perform_fop (frame, this);
+ }
+ }
- return 0;
+ return 0;
}
-
-int
+int
afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
{
- afr_private_t * priv = this->private;
-
- int i = 0;
- int ret = 0;
- int call_count = 0;
- dict_t **xattr = NULL;
+ afr_private_t * priv = this->private;
+ int i = 0;
+ int ret = 0;
+ int call_count = 0;
+ dict_t **xattr = NULL;
+ afr_fd_ctx_t *fdctx = NULL;
+ afr_local_t *local = NULL;
+ int piggyback = 0;
+ afr_internal_lock_t *int_lock = NULL;
+ unsigned char *locked_nodes = NULL;
- afr_local_t *local = NULL;
+ local = frame->local;
+ int_lock = &local->internal_lock;
- local = frame->local;
-
xattr = alloca (priv->child_count * sizeof (*xattr));
memset (xattr, 0, (priv->child_count * sizeof (*xattr)));
- for (i = 0; i < priv->child_count; i++) {
- xattr[i] = get_new_dict ();
- dict_ref (xattr[i]);
+ for (i = 0; i < priv->child_count; i++) {
+ xattr[i] = dict_new ();
}
- call_count = afr_up_children_count (priv->child_count,
- local->child_up);
+ call_count = afr_changelog_pre_op_call_count (local->transaction.type,
+ int_lock,
+ priv->child_count);
+ if (call_count == 0) {
+ local->internal_lock.lock_cbk =
+ local->transaction.done;
+ afr_unlock (frame, this);
+ goto out;
+ }
- if (local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) {
- call_count *= 2;
- }
+ local->call_count = call_count;
- if (call_count == 0) {
- /* no child is up */
- for (i = 0; i < priv->child_count; i++) {
- dict_unref (xattr[i]);
+ __mark_all_pending (local->pending, priv->child_count,
+ local->transaction.type);
+
+ if (local->fd)
+ fdctx = afr_fd_ctx_get (local->fd, this);
+
+ locked_nodes = afr_locked_nodes_get (local->transaction.type, int_lock);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!locked_nodes[i])
+ continue;
+ ret = afr_set_pending_dict (priv, xattr[i], local->pending,
+ i, LOCAL_FIRST);
+
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_INFO,
+ "failed to set pending entry");
+
+
+ switch (local->transaction.type) {
+ case AFR_DATA_TRANSACTION:
+ {
+ if (!fdctx) {
+ STACK_WIND_COOKIE (frame,
+ afr_changelog_pre_op_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &(local->loc),
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
+ break;
+ }
+
+ LOCK (&local->fd->lock);
+ {
+ piggyback = 0;
+ if (fdctx->pre_op_done[i]) {
+ fdctx->pre_op_piggyback[i]++;
+ piggyback = 1;
+ fdctx->hit++;
+ } else {
+ fdctx->miss++;
+ }
+ }
+ UNLOCK (&local->fd->lock);
+
+ afr_set_delayed_post_op (frame, this);
+
+ if (piggyback)
+ afr_changelog_pre_op_cbk (frame, (void *)(long)i,
+ this, 1, 0, xattr[i],
+ NULL);
+ else
+ STACK_WIND_COOKIE (frame,
+ afr_changelog_pre_op_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->fxattrop,
+ local->fd,
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
}
-
- afr_unlock (frame, this);
- return 0;
- }
+ break;
+ case AFR_METADATA_TRANSACTION:
+ {
+ if (local->optimistic_change_log) {
+ afr_changelog_pre_op_cbk (frame, (void *)(long)i,
+ this, 1, 0, xattr[i],
+ NULL);
+ break;
+ }
+
+ if (local->fd)
+ STACK_WIND_COOKIE (frame,
+ afr_changelog_pre_op_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->fxattrop,
+ local->fd,
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
+ else
+ STACK_WIND_COOKIE (frame,
+ afr_changelog_pre_op_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &(local->loc),
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
+ }
+ break;
- local->call_count = call_count;
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ {
+ if (local->optimistic_change_log) {
+ afr_changelog_pre_op_cbk (frame, (void *)(long)i,
+ this, 1, 0, xattr[i],
+ NULL);
+ } else {
+ STACK_WIND_COOKIE (frame,
+ afr_changelog_pre_op_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &local->transaction.new_parent_loc,
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
+ }
+
+ call_count--;
+ }
- __mark_all_pending (local->pending, priv->child_count,
- local->transaction.type);
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- ret = afr_set_pending_dict (priv, xattr[i],
- local->pending);
-
- if (ret < 0)
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to set pending entry");
-
-
- switch (local->transaction.type) {
- case AFR_DATA_TRANSACTION:
- case AFR_METADATA_TRANSACTION:
- case AFR_FLUSH_TRANSACTION:
- {
- if (local->fd)
- STACK_WIND_COOKIE (frame,
- afr_changelog_pre_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- local->fd,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- else
- STACK_WIND_COOKIE (frame,
- afr_changelog_pre_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &(local->loc),
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- }
- break;
-
- case AFR_ENTRY_RENAME_TRANSACTION:
- {
- STACK_WIND_COOKIE (frame,
- afr_changelog_pre_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->transaction.new_parent_loc,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
-
- call_count--;
- }
-
-
- /*
- set it again because previous stack_wind
- might have already returned (think of case
- where subvolume is posix) and would have
- used the dict as placeholder for return
- value
- */
-
- ret = afr_set_pending_dict (priv, xattr[i],
- local->pending);
-
- if (ret < 0)
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to set pending entry");
-
- /* fall through */
-
- case AFR_ENTRY_TRANSACTION:
- {
- if (local->fd)
- STACK_WIND_COOKIE (frame,
- afr_changelog_pre_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- local->fd,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- else
- STACK_WIND_COOKIE (frame,
- afr_changelog_pre_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->transaction.parent_loc,
- GF_XATTROP_ADD_ARRAY, xattr[i]);
- }
-
- break;
- }
-
- if (!--call_count)
- break;
- }
- }
-
+ /*
+ set it again because previous stack_wind
+ might have already returned (think of case
+ where subvolume is posix) and would have
+ used the dict as placeholder for return
+ value
+ */
+
+ ret = afr_set_pending_dict (priv, xattr[i], local->pending,
+ i, LOCAL_FIRST);
+
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_INFO,
+ "failed to set pending entry");
+
+ /* fall through */
+
+ case AFR_ENTRY_TRANSACTION:
+ {
+ if (local->optimistic_change_log) {
+ afr_changelog_pre_op_cbk (frame, (void *)(long)i,
+ this, 1, 0, xattr[i],
+ NULL);
+ break;
+ }
+
+ if (local->fd)
+ STACK_WIND_COOKIE (frame,
+ afr_changelog_pre_op_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->fxattrop,
+ local->fd,
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
+ else
+ STACK_WIND_COOKIE (frame,
+ afr_changelog_pre_op_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &local->transaction.parent_loc,
+ GF_XATTROP_ADD_ARRAY, xattr[i],
+ NULL);
+ }
+ break;
+ }
+
+ if (!--call_count)
+ break;
+ }
+out:
for (i = 0; i < priv->child_count; i++) {
dict_unref (xattr[i]);
}
-
- return 0;
+
+ return 0;
+}
+
+
+int
+afr_post_blocking_inodelk_cbk (call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ if (int_lock->lock_op_ret < 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "Blocking inodelks failed.");
+ local->transaction.done (frame, this);
+ } else {
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Blocking inodelks done. Proceeding to FOP");
+ afr_internal_lock_finish (frame, this);
+ }
+
+ return 0;
+}
+
+
+int
+afr_post_nonblocking_inodelk_cbk (call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ /* Initiate blocking locks if non-blocking has failed */
+ if (int_lock->lock_op_ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Non blocking inodelks failed. Proceeding to blocking");
+ int_lock->lock_cbk = afr_post_blocking_inodelk_cbk;
+ afr_blocking_lock (frame, this);
+ } else {
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Non blocking inodelks done. Proceeding to FOP");
+ afr_internal_lock_finish (frame, this);
+ }
+
+ return 0;
}
+
+int
+afr_post_blocking_entrylk_cbk (call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ if (int_lock->lock_op_ret < 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "Blocking entrylks failed.");
+ local->transaction.done (frame, this);
+ } else {
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Blocking entrylks done. Proceeding to FOP");
+ afr_internal_lock_finish (frame, this);
+ }
+
+ return 0;
+}
+
+
+int
+afr_post_nonblocking_entrylk_cbk (call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ /* Initiate blocking locks if non-blocking has failed */
+ if (int_lock->lock_op_ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Non blocking entrylks failed. Proceeding to blocking");
+ int_lock->lock_cbk = afr_post_blocking_entrylk_cbk;
+ afr_blocking_lock (frame, this);
+ } else {
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Non blocking entrylks done. Proceeding to FOP");
+ afr_internal_lock_finish (frame, this);
+ }
+
+ return 0;
+}
+
+
+int
+afr_post_blocking_rename_cbk (call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ if (int_lock->lock_op_ret < 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "Blocking entrylks failed.");
+ local->transaction.done (frame, this);
+ } else {
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Blocking entrylks done. Proceeding to FOP");
+ afr_internal_lock_finish (frame, this);
+ }
+ return 0;
+}
+
+
+int
+afr_post_lower_unlock_cbk (call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ GF_ASSERT (!int_lock->higher_locked);
+
+ int_lock->lock_cbk = afr_post_blocking_rename_cbk;
+ afr_blocking_lock (frame, this);
+
+ return 0;
+}
+
+
+int
+afr_set_transaction_flock (afr_local_t *local)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
+
+ int_lock = &local->internal_lock;
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+
+ inodelk->flock.l_len = local->transaction.len;
+ inodelk->flock.l_start = local->transaction.start;
+ inodelk->flock.l_type = F_WRLCK;
+
+ return 0;
+}
+
+int
+afr_lock_rec (call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ int_lock->transaction_lk_type = AFR_TRANSACTION_LK;
+ int_lock->domain = this->name;
+
+ switch (local->transaction.type) {
+ case AFR_DATA_TRANSACTION:
+ case AFR_METADATA_TRANSACTION:
+ afr_set_transaction_flock (local);
+
+ int_lock->lock_cbk = afr_post_nonblocking_inodelk_cbk;
+
+ afr_nonblocking_inodelk (frame, this);
+ break;
+
+ case AFR_ENTRY_RENAME_TRANSACTION:
+
+ int_lock->lock_cbk = afr_post_nonblocking_entrylk_cbk;
+ afr_nonblocking_entrylk (frame, this);
+ break;
+
+ case AFR_ENTRY_TRANSACTION:
+ int_lock->lk_basename = local->transaction.basename;
+ if (&local->transaction.parent_loc)
+ int_lock->lk_loc = &local->transaction.parent_loc;
+ else
+ GF_ASSERT (local->fd);
+
+ int_lock->lock_cbk = afr_post_nonblocking_entrylk_cbk;
+ afr_nonblocking_entrylk (frame, this);
+ break;
+ }
+
+ return 0;
+}
+
+
+int
+afr_lock (call_frame_t *frame, xlator_t *this)
+{
+ afr_set_lock_number (frame, this);
+
+ return afr_lock_rec (frame, this);
+}
+
+
/* }}} */
-/* {{{ lock */
+int
+afr_internal_lock_finish (call_frame_t *frame, xlator_t *this)
+{
+ if (__fop_changelog_needed (frame, this)) {
+ afr_changelog_pre_op (frame, this);
+ } else {
+ afr_transaction_perform_fop (frame, this);
+ }
+
+ return 0;
+}
-static
-int afr_lock_rec (call_frame_t *frame, xlator_t *this, int child_index);
-int32_t
-afr_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+void
+afr_set_delayed_post_op (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int done = 0;
- int child_index = (long) cookie;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- int call_count = 0;
+ /* call this function from any of the related optimizations
+ which benefit from delaying post op are enabled, namely:
- local = frame->local;
- priv = this->private;
+ - changelog piggybacking
+ - eager locking
+ */
- LOCK (&frame->lock);
- {
- if (local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) {
- /* wait for the other lock to return */
- call_count = --local->call_count;
- }
-
- if (op_ret == -1) {
- if (op_errno == ENOSYS) {
- /* return ENOTSUP */
- gf_log (this->name, GF_LOG_ERROR,
- "subvolume does not support locking. "
- "please load features/posix-locks xlator on server");
- local->op_ret = op_ret;
- done = 1;
- }
-
- local->child_up[child_index] = 0;
- local->op_errno = op_errno;
- }
- }
- UNLOCK (&frame->lock);
-
- if (call_count == 0) {
- if ((local->op_ret == -1) &&
- (local->op_errno == ENOSYS)) {
- afr_unlock (frame, this);
- } else {
- local->transaction.locked_nodes[child_index] = 1;
- local->transaction.lock_count++;
- afr_lock_rec (frame, this, child_index + 1);
- }
- }
+ priv = this->private;
+ if (!priv)
+ return;
+
+ if (!priv->post_op_delay_secs)
+ return;
+
+ local = frame->local;
+ if (!local->transaction.eager_lock_on)
+ return;
- return 0;
+ if (!local)
+ return;
+
+ if (!local->fd)
+ return;
+
+ if (local->op == GF_FOP_WRITE)
+ local->delayed_post_op = _gf_true;
+}
+
+gf_boolean_t
+afr_are_multiple_fds_opened (inode_t *inode, xlator_t *this)
+{
+ afr_inode_ctx_t *ictx = NULL;
+
+ if (!inode) {
+ /* If false is returned, it may keep on taking eager-lock
+ * which may lead to starvation, so return true to avoid that.
+ */
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "Invalid inode");
+ return _gf_true;
+ }
+ /* Lets say mount1 has eager-lock(full-lock) and after the eager-lock
+ * is taken mount2 opened the same file, it won't be able to
+ * perform any data operations until mount1 releases eager-lock.
+ * To avoid such scenario do not enable eager-lock for this transaction
+ * if open-fd-count is > 1
+ */
+
+ ictx = afr_inode_ctx_get (inode, this);
+ if (!ictx)
+ return _gf_true;
+
+ if (ictx->open_fd_count > 1)
+ return _gf_true;
+
+ return _gf_false;
}
+gf_boolean_t
+afr_any_fops_failed (afr_local_t *local, afr_private_t *priv)
+{
+ if (local->success_count != priv->child_count)
+ return _gf_true;
+ return _gf_false;
+}
-static loc_t *
-lower_path (loc_t *l1, const char *b1, loc_t *l2, const char *b2)
+gf_boolean_t
+is_afr_delayed_changelog_post_op_needed (call_frame_t *frame, xlator_t *this)
{
- int ret = 0;
+ afr_local_t *local = NULL;
+ gf_boolean_t res = _gf_false;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
- ret = strcmp (l1->path, l2->path);
-
- if (ret == 0)
- ret = strcmp (b1, b2);
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (!local->delayed_post_op)
+ goto out;
+
+ //Mark pending changelog ASAP
+ if (afr_any_fops_failed (local, priv))
+ goto out;
- if (ret <= 0)
- return l1;
- else
- return l2;
+ if (local->fd && afr_are_multiple_fds_opened (local->fd->inode, this))
+ goto out;
+
+ res = _gf_true;
+out:
+ return res;
}
-static
-int afr_lock_rec (call_frame_t *frame, xlator_t *this, int child_index)
+void
+afr_delayed_changelog_post_op (xlator_t *this, call_frame_t *frame, fd_t *fd,
+ call_stub_t *stub);
+
+void
+afr_delayed_changelog_wake_up_cbk (void *data)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
+ fd_t *fd = NULL;
- struct flock flock;
+ fd = data;
- loc_t * lower = NULL;
- loc_t * higher = NULL;
+ afr_delayed_changelog_wake_up (THIS, fd);
+}
- const char *lower_name = NULL;
- const char *higher_name = NULL;
- local = frame->local;
- priv = this->private;
+/*
+ Check if the frame is destined to get optimized away
+ with changelog piggybacking
+*/
+static gf_boolean_t
+is_piggyback_post_op (call_frame_t *frame, fd_t *fd)
+{
+ afr_fd_ctx_t *fdctx = NULL;
+ afr_local_t *local = NULL;
+ gf_boolean_t piggyback = _gf_true;
+ afr_private_t *priv = NULL;
+ int i = 0;
- flock.l_start = local->transaction.start;
- flock.l_len = local->transaction.len;
- flock.l_type = F_WRLCK;
+ priv = frame->this->private;
+ local = frame->local;
+ fdctx = afr_fd_ctx_get (fd, frame->this);
- /* skip over children that are down */
- while ((child_index < priv->child_count)
- && !local->child_up[child_index])
- child_index++;
+ LOCK(&fd->lock);
+ {
+ piggyback = _gf_true;
- if ((child_index == priv->child_count) &&
- local->transaction.lock_count == 0) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->transaction.pre_op[i])
+ continue;
+ if (fdctx->pre_op_piggyback[i]) {
+ fdctx->pre_op_piggyback[i]--;
+ local->transaction.postop_piggybacked[i] = 1;
+ } else {
+ /* For at least _one_ subvolume we cannot
+ piggyback on the changelog, and have to
+ perform a hard POST-OP and therefore fsync
+ if necesssary
+ */
+ piggyback = _gf_false;
+ GF_ASSERT (fdctx->pre_op_done[i]);
+ fdctx->pre_op_done[i]--;
+ }
+ }
+ }
+ UNLOCK(&fd->lock);
- gf_log (this->name, GF_LOG_DEBUG,
- "unable to lock on even one child");
+ if (!afr_txn_nothing_failed (frame, frame->this)) {
+ /* something failed in this transaction,
+ we will be performing a hard post-op
+ */
+ return _gf_false;
+ }
- local->op_ret = -1;
- local->op_errno = EAGAIN;
+ return piggyback;
+}
- local->transaction.done (frame, this);
-
- return 0;
- }
+/* SET operation */
+int
+afr_fd_report_unstable_write (xlator_t *this, fd_t *fd)
+{
+ afr_fd_ctx_t *fdctx = NULL;
- if ((child_index == priv->child_count)
- || (local->transaction.lock_count ==
- afr_lock_server_count (priv, local->transaction.type))) {
+ fdctx = afr_fd_ctx_get (fd, this);
- /* we're done locking */
+ LOCK(&fd->lock);
+ {
+ fdctx->witnessed_unstable_write = _gf_true;
+ }
+ UNLOCK(&fd->lock);
- if (__changelog_needed_pre_op (frame, this)) {
- afr_changelog_pre_op (frame, this);
- } else {
- __mark_all_success (local->pending, priv->child_count,
- local->transaction.type);
+ return 0;
+}
- afr_pid_restore (frame);
+/* TEST and CLEAR operation */
+gf_boolean_t
+afr_fd_has_witnessed_unstable_write (xlator_t *this, fd_t *fd)
+{
+ afr_fd_ctx_t *fdctx = NULL;
+ gf_boolean_t witness = _gf_false;
- local->transaction.fop (frame, this);
- }
+ fdctx = afr_fd_ctx_get (fd, this);
+ if (!fdctx)
+ return _gf_true;
- return 0;
- }
+ LOCK(&fd->lock);
+ {
+ if (fdctx->witnessed_unstable_write) {
+ witness = _gf_true;
+ fdctx->witnessed_unstable_write = _gf_false;
+ }
+ }
+ UNLOCK (&fd->lock);
+
+ return witness;
+}
+
+
+int
+afr_changelog_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ int child_index = (long) cookie;
+ int call_count = -1;
+ afr_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (afr_fop_failed (op_ret, op_errno)) {
+ /* Failure of fsync() is as good as failure of previous
+ write(). So treat it like one.
+ */
+ gf_log (this->name, GF_LOG_WARNING,
+ "fsync(%s) failed on subvolume %s. Transaction was %s",
+ uuid_utoa (local->fd->inode->gfid),
+ priv->children[child_index]->name,
+ gf_fop_list[local->op]);
+
+ afr_transaction_fop_failed (frame, this, child_index);
+ }
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0)
+ afr_changelog_post_op_now (frame, this);
+
+ return 0;
+}
+
+
+int
+afr_changelog_fsync (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ int i = 0;
+ int call_count = 0;
+ afr_private_t *priv = NULL;
+ dict_t *xdata = NULL;
+ GF_UNUSED int ret = -1;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
+ priv->child_count);
+
+ if (!call_count) {
+ /* will go straight to unlock */
+ afr_changelog_post_op_now (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ xdata = dict_new();
+ if (xdata)
+ ret = dict_set_int32 (xdata, "batch-fsync", 1);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->transaction.pre_op[i])
+ continue;
+
+ STACK_WIND_COOKIE (frame, afr_changelog_fsync_cbk,
+ (void *) (long) i, priv->children[i],
+ priv->children[i]->fops->fsync, local->fd,
+ 1, xdata);
+ if (!--call_count)
+ break;
+ }
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+
+ int
+afr_changelog_post_op_safe (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (!local->fd || local->transaction.type != AFR_DATA_TRANSACTION) {
+ afr_changelog_post_op_now (frame, this);
+ return 0;
+ }
+
+ if (is_piggyback_post_op (frame, local->fd)) {
+ /* just detected that this post-op is about to
+ be optimized away as a new write() has
+ already piggybacked on this frame's changelog.
+ */
+ afr_changelog_post_op_now (frame, this);
+ return 0;
+ }
+
+ /* Calling afr_changelog_post_op_now() now will result in
+ issuing ->[f]xattrop().
+
+ Performing a hard POST-OP (->[f]xattrop() FOP) is a more
+ responsible operation that what it might appear on the surface.
+
+ The changelog of a file (in the xattr of the file on the server)
+ stores information (pending count) about the state of the file
+ on the OTHER server. This changelog is blindly trusted, and must
+ therefore be updated in such a way it remains trustworthy. This
+ implies that decrementing the pending count (essentially "clearing
+ the dirty flag") must be done STRICTLY after we are sure that the
+ operation on the other server has reached stable storage.
+
+ While the backend filesystem on that server will eventually flush
+ it to stable storage, we (being in userspace) have no mechanism
+ to get notified when the write became "stable".
+
+ This means we need take matter into our own hands and issue an
+ fsync() EVEN IF THE APPLICATION WAS PERFORMING UNSTABLE WRITES,
+ and get an acknowledgement for it. And we need to wait for the
+ fsync() acknowledgement before initiating the hard POST-OP.
+
+ However if the FD itself was opened in O_SYNC or O_DSYNC then
+ we are already guaranteed that the writes were made stable as
+ part of the FOP itself. The same holds true for NFS stable
+ writes which happen on an anonymous FD with O_DSYNC or O_SYNC
+ flag set in the writev() @flags param. For all other write types,
+ mark a flag in the fdctx whenever an unstable write is witnessed.
+ */
+
+ if (!afr_fd_has_witnessed_unstable_write (this, local->fd)) {
+ afr_changelog_post_op_now (frame, this);
+ return 0;
+ }
- switch (local->transaction.type) {
- case AFR_DATA_TRANSACTION:
- case AFR_METADATA_TRANSACTION:
- case AFR_FLUSH_TRANSACTION:
-
- if (local->fd) {
- STACK_WIND_COOKIE (frame, afr_lock_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->finodelk,
- this->name, local->fd,
- F_SETLKW, &flock);
-
- } else {
- STACK_WIND_COOKIE (frame, afr_lock_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->inodelk,
- this->name, &local->loc,
- F_SETLKW, &flock);
- }
-
- break;
-
- case AFR_ENTRY_RENAME_TRANSACTION:
+ /* Check whether users want durability and perform fsync/post-op
+ * accordingly.
+ */
+ if (priv->ensure_durability) {
+ /* Time to fsync() */
+ afr_changelog_fsync (frame, this);
+ } else {
+ afr_changelog_post_op_now (frame, this);
+ }
+
+ return 0;
+}
+
+
+void
+afr_delayed_changelog_post_op (xlator_t *this, call_frame_t *frame, fd_t *fd,
+ call_stub_t *stub)
+{
+ afr_fd_ctx_t *fd_ctx = NULL;
+ call_frame_t *prev_frame = NULL;
+ struct timespec delta = {0, };
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ priv = this->private;
+
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx)
+ goto out;
+
+ delta.tv_sec = priv->post_op_delay_secs;
+ delta.tv_nsec = 0;
+
+ pthread_mutex_lock (&fd_ctx->delay_lock);
{
- local->call_count = 2;
-
- lower = lower_path (&local->transaction.parent_loc,
- local->transaction.basename,
- &local->transaction.new_parent_loc,
- local->transaction.new_basename);
-
- lower_name = (lower == &local->transaction.parent_loc ?
- local->transaction.basename :
- local->transaction.new_basename);
-
- higher = (lower == &local->transaction.parent_loc ?
- &local->transaction.new_parent_loc :
- &local->transaction.parent_loc);
-
- higher_name = (higher == &local->transaction.parent_loc ?
- local->transaction.basename :
- local->transaction.new_basename);
-
-
- /* TODO: these locks should be blocking */
-
- STACK_WIND_COOKIE (frame, afr_lock_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->entrylk,
- this->name, lower, lower_name,
- ENTRYLK_LOCK, ENTRYLK_WRLCK);
-
- STACK_WIND_COOKIE (frame, afr_lock_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->entrylk,
- this->name, higher, higher_name,
- ENTRYLK_LOCK, ENTRYLK_WRLCK);
-
- break;
- }
-
- case AFR_ENTRY_TRANSACTION:
- if (local->fd) {
- STACK_WIND_COOKIE (frame, afr_lock_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->fentrylk,
- this->name, local->fd,
- local->transaction.basename,
- ENTRYLK_LOCK, ENTRYLK_WRLCK);
- } else {
- STACK_WIND_COOKIE (frame, afr_lock_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->entrylk,
- this->name,
- &local->transaction.parent_loc,
- local->transaction.basename,
- ENTRYLK_LOCK, ENTRYLK_WRLCK);
- }
-
- break;
+ prev_frame = fd_ctx->delay_frame;
+ fd_ctx->delay_frame = NULL;
+ if (fd_ctx->delay_timer)
+ gf_timer_call_cancel (this->ctx, fd_ctx->delay_timer);
+ fd_ctx->delay_timer = NULL;
+ if (!frame)
+ goto unlock;
+ fd_ctx->delay_timer = gf_timer_call_after (this->ctx, delta,
+ afr_delayed_changelog_wake_up_cbk,
+ fd);
+ fd_ctx->delay_frame = frame;
}
+unlock:
+ pthread_mutex_unlock (&fd_ctx->delay_lock);
- return 0;
+out:
+ if (prev_frame) {
+ local = prev_frame->local;
+ local->transaction.resume_stub = stub;
+ afr_changelog_post_op_safe (prev_frame, this);
+ } else if (stub) {
+ call_resume (stub);
+ }
}
-int32_t afr_lock (call_frame_t *frame, xlator_t *this)
+void
+afr_changelog_post_op (call_frame_t *frame, xlator_t *this)
{
- afr_pid_save (frame);
+ afr_local_t *local = NULL;
- frame->root->pid = (long) frame->root;
+ local = frame->local;
- return afr_lock_rec (frame, this, 0);
+ if (is_afr_delayed_changelog_post_op_needed (frame, this))
+ afr_delayed_changelog_post_op (this, frame, local->fd, NULL);
+ else
+ afr_changelog_post_op_safe (frame, this);
}
-/* }}} */
-int32_t
+/* Wake up the sleeping/delayed post-op, and also register
+ a stub to have it resumed after this transaction
+ completely finishes.
+
+ The @stub gets saved in @local and gets resumed in
+ afr_local_cleanup()
+ */
+void
+afr_delayed_changelog_wake_resume (xlator_t *this, fd_t *fd, call_stub_t *stub)
+{
+ afr_delayed_changelog_post_op (this, NULL, fd, stub);
+}
+
+
+void
+afr_delayed_changelog_wake_up (xlator_t *this, fd_t *fd)
+{
+ afr_delayed_changelog_post_op (this, NULL, fd, NULL);
+}
+
+
+ int
afr_transaction_resume (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ priv = this->private;
+
+ if (local->transaction.eager_lock_on) {
+ /* We don't need to retain "local" in the
+ fd list anymore, writes to all subvols
+ are finished by now */
+ LOCK (&local->fd->lock);
+ {
+ list_del_init (&local->transaction.eager_locked);
+ }
+ UNLOCK (&local->fd->lock);
+ }
- local = frame->local;
- priv = this->private;
-
- if (__changelog_needed_post_op (frame, this)) {
- afr_changelog_post_op (frame, this);
- } else {
- if (afr_lock_server_count (priv, local->transaction.type) == 0) {
- local->transaction.done (frame, this);
- } else {
- afr_unlock (frame, this);
- }
- }
+ afr_restore_lk_owner (frame);
- return 0;
+ if (__fop_changelog_needed (frame, this)) {
+ afr_changelog_post_op (frame, this);
+ } else {
+ if (afr_lock_server_count (priv, local->transaction.type) == 0) {
+ local->transaction.done (frame, this);
+ } else {
+ int_lock->lock_cbk = local->transaction.done;
+ afr_unlock (frame, this);
+ }
+ }
+
+ return 0;
}
@@ -1147,54 +1820,144 @@ afr_transaction_resume (call_frame_t *frame, xlator_t *this)
*/
void
-afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this, int child_index)
+afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this,
+ int child_index)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
+ priv = this->private;
- switch (local->op) {
- case GF_FOP_WRITE:
- __mark_fop_failed_on_fd (local->fd, this, child_index);
- break;
- default:
- __mark_child_dead (local->pending, priv->child_count,
- child_index, local->transaction.type);
- break;
+ __mark_child_dead (local->pending, priv->child_count,
+ child_index, local->transaction.type);
+}
+
+
+
+ static gf_boolean_t
+afr_locals_overlap (afr_local_t *local1, afr_local_t *local2)
+{
+ uint64_t start1 = local1->transaction.start;
+ uint64_t start2 = local2->transaction.start;
+ uint64_t end1 = 0;
+ uint64_t end2 = 0;
+
+ if (local1->transaction.len)
+ end1 = start1 + local1->transaction.len - 1;
+ else
+ end1 = ULLONG_MAX;
+
+ if (local2->transaction.len)
+ end2 = start2 + local2->transaction.len - 1;
+ else
+ end2 = ULLONG_MAX;
+
+ return ((end1 >= start2) && (end2 >= start1));
+}
+
+void
+afr_transaction_eager_lock_init (afr_local_t *local, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_fd_ctx_t *fdctx = NULL;
+ afr_local_t *each = NULL;
+
+ priv = this->private;
+
+ if (!local->fd)
+ return;
+
+ if (local->transaction.type != AFR_DATA_TRANSACTION)
+ return;
+
+ if (!priv->eager_lock)
+ return;
+
+ fdctx = afr_fd_ctx_get (local->fd, this);
+ if (!fdctx)
+ return;
+
+ if (afr_are_multiple_fds_opened (local->fd->inode, this))
+ return;
+ /*
+ * Once full file lock is acquired in eager-lock phase, overlapping
+ * writes do not compete for inode-locks, instead are transferred to the
+ * next writes. Because of this overlapping writes are not ordered.
+ * This can cause inconsistencies in replication.
+ * Example:
+ * Two overlapping writes w1, w2 are sent in parallel on same fd
+ * in two threads t1, t2.
+ * Both threads can execute afr_writev_wind in the following manner.
+ * t1 winds w1 on brick-0
+ * t2 winds w2 on brick-0
+ * t2 winds w2 on brick-1
+ * t1 winds w1 on brick-1
+ *
+ * This check makes sure the locks are not transferred for
+ * overlapping writes.
+ */
+ LOCK (&local->fd->lock);
+ {
+ list_for_each_entry (each, &fdctx->eager_locked,
+ transaction.eager_locked) {
+ if (afr_locals_overlap (each, local)) {
+ local->transaction.eager_lock_on = _gf_false;
+ goto unlock;
+ }
+ }
+
+ local->transaction.eager_lock_on = _gf_true;
+ list_add_tail (&local->transaction.eager_locked,
+ &fdctx->eager_locked);
}
+unlock:
+ UNLOCK (&local->fd->lock);
}
-int32_t
+int
afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+ fd_t *fd = NULL;
+ int ret = -1;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
+ priv = this->private;
- afr_transaction_local_init (local, priv);
+ local->transaction.resume = afr_transaction_resume;
+ local->transaction.type = type;
- local->transaction.resume = afr_transaction_resume;
- local->transaction.type = type;
+ ret = afr_transaction_local_init (local, this);
+ if (ret < 0)
+ goto out;
- if (afr_lock_server_count (priv, local->transaction.type) == 0) {
- if (__changelog_needed_pre_op (frame, this)) {
- afr_changelog_pre_op (frame, this);
- } else {
- __mark_all_success (local->pending, priv->child_count,
- local->transaction.type);
+ afr_transaction_eager_lock_init (local, this);
- afr_pid_restore (frame);
+ if (local->fd && local->transaction.eager_lock_on)
+ afr_set_lk_owner (frame, this, local->fd);
+ else
+ afr_set_lk_owner (frame, this, frame->root);
- local->transaction.fop (frame, this);
- }
- } else {
- afr_lock (frame, this);
- }
+ if (!local->transaction.eager_lock_on && local->loc.inode) {
+ fd = fd_lookup (local->loc.inode, frame->root->pid);
+ if (fd == NULL)
+ fd = fd_lookup_anonymous (local->loc.inode);
- return 0;
+ if (fd) {
+ afr_delayed_changelog_wake_up (this, fd);
+ fd_unref (fd);
+ }
+ }
+
+ if (afr_lock_server_count (priv, local->transaction.type) == 0) {
+ afr_internal_lock_finish (frame, this);
+ } else {
+ afr_lock (frame, this);
+ }
+ ret = 0;
+out:
+ return ret;
}
diff --git a/xlators/cluster/afr/src/afr-transaction.h b/xlators/cluster/afr/src/afr-transaction.h
index c7a6490e7..fa626fd0d 100644
--- a/xlators/cluster/afr/src/afr-transaction.h
+++ b/xlators/cluster/afr/src/afr-transaction.h
@@ -1,30 +1,51 @@
/*
- Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __TRANSACTION_H__
#define __TRANSACTION_H__
+typedef enum {
+ LOCAL_FIRST = 1,
+ LOCAL_LAST = 2
+} afr_xattrop_type_t;
+
void
afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this,
int child_index);
+int
+afr_lock_server_count (afr_private_t *priv, afr_transaction_type type);
+
+afr_inodelk_t*
+afr_get_inodelk (afr_internal_lock_t *int_lock, char *dom);
+
int32_t
afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type);
+afr_fd_ctx_t *
+afr_fd_ctx_get (fd_t *fd, xlator_t *this);
+int
+afr_set_pending_dict (afr_private_t *priv, dict_t *xattr, int32_t **pending,
+ int child, afr_xattrop_type_t op);
+void
+afr_set_delayed_post_op (call_frame_t *frame, xlator_t *this);
+
+void
+afr_delayed_changelog_wake_up (xlator_t *this, fd_t *fd);
+
+void
+__mark_all_success (int32_t *pending[], int child_count,
+ afr_transaction_type type);
+gf_boolean_t
+afr_any_fops_failed (afr_local_t *local, afr_private_t *priv);
+
+gf_boolean_t
+afr_txn_nothing_failed (call_frame_t *frame, xlator_t *this);
#endif /* __TRANSACTION_H__ */
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index 6b7b007ae..c724eb2ae 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include <libgen.h>
@@ -28,2620 +19,525 @@
#define _CONFIG_H
#include "config.h"
#endif
+#include "afr-common.c"
-#include "glusterfs.h"
-#include "afr.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
-#include "byte-order.h"
-#include "statedump.h"
-
-#include "fd.h"
-
-#include "afr-inode-read.h"
-#include "afr-inode-write.h"
-#include "afr-dir-read.h"
-#include "afr-dir-write.h"
-#include "afr-transaction.h"
-
-#include "afr-self-heal.h"
-
-
-uint64_t
-afr_is_split_brain (xlator_t *this, inode_t *inode)
-{
- int ret = 0;
-
- uint64_t ctx = 0;
- uint64_t split_brain = 0;
-
- VALIDATE_OR_GOTO (inode, out);
-
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_get (inode, this, &ctx);
-
- if (ret < 0)
- goto unlock;
-
- split_brain = ctx & 0xFFFFFFFF00000000ULL;
- }
-unlock:
- UNLOCK (&inode->lock);
-
-out:
- return split_brain;
-}
+#define SHD_INODE_LRU_LIMIT 2048
+#define AFR_EH_HEALED_LIMIT 1024
+#define AFR_EH_HEAL_FAIL_LIMIT 1024
+#define AFR_EH_SPLIT_BRAIN_LIMIT 1024
+struct volume_options options[];
-void
-afr_set_split_brain (xlator_t *this, inode_t *inode, int32_t split_brain)
+int32_t
+notify (xlator_t *this, int32_t event,
+ void *data, ...)
{
- uint64_t ctx = 0;
- int ret = 0;
+ int ret = -1;
+ va_list ap;
+ void *data2 = NULL;
- VALIDATE_OR_GOTO (inode, out);
+ va_start (ap, data);
+ data2 = va_arg (ap, dict_t*);
+ va_end (ap);
+ ret = afr_notify (this, event, data, data2);
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_get (inode, this, &ctx);
-
- if (ret < 0) {
- ctx = 0;
- }
-
- ctx = (0x00000000FFFFFFFFULL & ctx)
- | (split_brain & 0xFFFFFFFF00000000ULL);
-
- __inode_ctx_put (inode, this, ctx);
- }
- UNLOCK (&inode->lock);
-out:
- return;
+ return ret;
}
-
-uint64_t
-afr_read_child (xlator_t *this, inode_t *inode)
+int32_t
+mem_acct_init (xlator_t *this)
{
- int ret = 0;
+ int ret = -1;
- uint64_t ctx = 0;
- uint64_t read_child = 0;
+ if (!this)
+ return ret;
- VALIDATE_OR_GOTO (inode, out);
+ ret = xlator_mem_acct_init (this, gf_afr_mt_end + 1);
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_get (inode, this, &ctx);
-
- if (ret < 0)
- goto unlock;
-
- read_child = ctx & 0x00000000FFFFFFFFULL;
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR, "Memory accounting init"
+ "failed");
+ return ret;
}
-unlock:
- UNLOCK (&inode->lock);
-out:
- return read_child;
+ return ret;
}
-void
-afr_set_read_child (xlator_t *this, inode_t *inode, int32_t read_child)
+int
+xlator_subvolume_index (xlator_t *this, xlator_t *subvol)
{
- uint64_t ctx = 0;
- int ret = 0;
-
- VALIDATE_OR_GOTO (inode, out);
+ int index = -1;
+ int i = 0;
+ xlator_list_t *list = NULL;
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_get (inode, this, &ctx);
+ list = this->children;
- if (ret < 0) {
- ctx = 0;
+ while (list) {
+ if (subvol == list->xlator ||
+ strcmp (subvol->name, list->xlator->name) == 0) {
+ index = i;
+ break;
}
-
- ctx = (0xFFFFFFFF00000000ULL & ctx)
- | (0x00000000FFFFFFFFULL & read_child);
-
- __inode_ctx_put (inode, this, ctx);
+ list = list->next;
+ i++;
}
- UNLOCK (&inode->lock);
-
-out:
- return;
-}
-
-/**
- * afr_local_cleanup - cleanup everything in frame->local
- */
-
-void
-afr_local_sh_cleanup (afr_local_t *local, xlator_t *this)
-{
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
-
-
- sh = &local->self_heal;
- priv = this->private;
-
- if (sh->buf)
- FREE (sh->buf);
-
- if (sh->xattr) {
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i]) {
- dict_unref (sh->xattr[i]);
- sh->xattr[i] = NULL;
- }
- }
- FREE (sh->xattr);
- }
-
- if (sh->child_errno)
- FREE (sh->child_errno);
-
- if (sh->pending_matrix) {
- for (i = 0; i < priv->child_count; i++) {
- FREE (sh->pending_matrix[i]);
- }
- FREE (sh->pending_matrix);
- }
-
- if (sh->delta_matrix) {
- for (i = 0; i < priv->child_count; i++) {
- FREE (sh->delta_matrix[i]);
- }
- FREE (sh->delta_matrix);
- }
-
- if (sh->sources)
- FREE (sh->sources);
-
- if (sh->success)
- FREE (sh->success);
-
- if (sh->healing_fd) {
- fd_unref (sh->healing_fd);
- sh->healing_fd = NULL;
- }
-
- loc_wipe (&sh->parent_loc);
+ return index;
}
-
-void
-afr_local_cleanup (afr_local_t *local, xlator_t *this)
+void
+fix_quorum_options (xlator_t *this, afr_private_t *priv, char *qtype)
{
- int i;
- afr_private_t * priv = NULL;
-
- if (!local)
- return;
-
- afr_local_sh_cleanup (local, this);
-
- FREE (local->child_errno);
-
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->pending && local->pending[i])
- FREE (local->pending[i]);
+ if (priv->quorum_count && strcmp(qtype,"fixed")) {
+ gf_log(this->name,GF_LOG_WARNING,
+ "quorum-type %s overriding quorum-count %u",
+ qtype, priv->quorum_count);
+ }
+ if (!strcmp(qtype,"none")) {
+ priv->quorum_count = 0;
+ }
+ else if (!strcmp(qtype,"auto")) {
+ priv->quorum_count = AFR_QUORUM_AUTO;
}
-
- FREE (local->pending);
-
- loc_wipe (&local->loc);
- loc_wipe (&local->newloc);
-
- FREE (local->transaction.locked_nodes);
- FREE (local->transaction.child_errno);
-
- FREE (local->transaction.basename);
- FREE (local->transaction.new_basename);
-
- loc_wipe (&local->transaction.parent_loc);
- loc_wipe (&local->transaction.new_parent_loc);
-
- if (local->fd)
- fd_unref (local->fd);
-
- if (local->xattr_req)
- dict_unref (local->xattr_req);
-
- FREE (local->child_up);
-
- { /* lookup */
- if (local->cont.lookup.xattr)
- dict_unref (local->cont.lookup.xattr);
- }
-
- { /* getxattr */
- if (local->cont.getxattr.name)
- FREE (local->cont.getxattr.name);
- }
-
- { /* lk */
- if (local->cont.lk.locked_nodes)
- FREE (local->cont.lk.locked_nodes);
- }
-
- { /* checksum */
- if (local->cont.checksum.file_checksum)
- FREE (local->cont.checksum.file_checksum);
- if (local->cont.checksum.dir_checksum)
- FREE (local->cont.checksum.dir_checksum);
- }
-
- { /* create */
- if (local->cont.create.fd)
- fd_unref (local->cont.create.fd);
- }
-
- { /* writev */
- FREE (local->cont.writev.vector);
- }
-
- { /* setxattr */
- if (local->cont.setxattr.dict)
- dict_unref (local->cont.setxattr.dict);
- }
-
- { /* removexattr */
- FREE (local->cont.removexattr.name);
- }
-
- { /* symlink */
- FREE (local->cont.symlink.linkpath);
- }
-}
-
-
-int
-afr_frame_return (call_frame_t *frame)
-{
- afr_local_t *local = NULL;
- int call_count = 0;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- call_count = --local->call_count;
- }
- UNLOCK (&frame->lock);
-
- return call_count;
-}
-
-/**
- * first_up_child - return the index of the first child that is up
- */
-
-int
-afr_first_up_child (afr_private_t *priv)
-{
- xlator_t ** children = NULL;
- int ret = -1;
- int i = 0;
-
- LOCK (&priv->lock);
- {
- children = priv->children;
- for (i = 0; i < priv->child_count; i++) {
- if (priv->child_up[i]) {
- ret = i;
- break;
- }
- }
- }
- UNLOCK (&priv->lock);
-
- return ret;
-}
-
-
-/**
- * up_children_count - return the number of children that are up
- */
-
-int
-afr_up_children_count (int child_count, unsigned char *child_up)
-{
- int i = 0;
- int ret = 0;
-
- for (i = 0; i < child_count; i++)
- if (child_up[i])
- ret++;
- return ret;
}
-
int
-afr_locked_nodes_count (unsigned char *locked_nodes, int child_count)
+reconfigure (xlator_t *this, dict_t *options)
{
- int ret = 0;
- int i;
-
- for (i = 0; i < child_count; i++)
- if (locked_nodes[i])
- ret++;
+ afr_private_t *priv = NULL;
+ xlator_t *read_subvol = NULL;
+ int read_subvol_index = -1;
+ int ret = -1;
+ int index = -1;
+ char *qtype = NULL;
- return ret;
-}
-
-
-ino64_t
-afr_itransform (ino64_t ino, int child_count, int child_index)
-{
- ino64_t scaled_ino = -1;
-
- if (ino == ((uint64_t) -1)) {
- scaled_ino = ((uint64_t) -1);
- goto out;
- }
-
- scaled_ino = (ino * child_count) + child_index;
-
-out:
- return scaled_ino;
-}
+ priv = this->private;
+ GF_OPTION_RECONF ("background-self-heal-count",
+ priv->background_self_heal_count, options, uint32,
+ out);
-int
-afr_deitransform_orig (ino64_t ino, int child_count)
-{
- int index = -1;
+ GF_OPTION_RECONF ("metadata-self-heal",
+ priv->metadata_self_heal, options, bool, out);
- index = ino % child_count;
+ GF_OPTION_RECONF ("data-self-heal", priv->data_self_heal, options, str,
+ out);
- return index;
-}
+ GF_OPTION_RECONF ("entry-self-heal", priv->entry_self_heal, options,
+ bool, out);
+ GF_OPTION_RECONF ("strict-readdir", priv->strict_readdir, options, bool,
+ out);
-int
-afr_deitransform (ino64_t ino, int child_count)
-{
- return 0;
-}
+ GF_OPTION_RECONF ("data-self-heal-window-size",
+ priv->data_self_heal_window_size, options,
+ uint32, out);
+ GF_OPTION_RECONF ("data-change-log", priv->data_change_log, options,
+ bool, out);
-int
-afr_self_heal_cbk (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
-
- local = frame->local;
+ GF_OPTION_RECONF ("metadata-change-log",
+ priv->metadata_change_log, options, bool, out);
- if (local->govinda_gOvinda) {
- afr_set_split_brain (this, local->cont.lookup.inode, 1);
- } else {
- afr_set_split_brain (this, local->cont.lookup.inode, 0);
- }
+ GF_OPTION_RECONF ("entry-change-log", priv->entry_change_log, options,
+ bool, out);
- AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno,
- local->cont.lookup.inode,
- &local->cont.lookup.buf,
- local->cont.lookup.xattr);
+ GF_OPTION_RECONF ("data-self-heal-algorithm",
+ priv->data_self_heal_algorithm, options, str, out);
- return 0;
-}
+ GF_OPTION_RECONF ("self-heal-daemon", priv->shd.enabled, options, bool, out);
+ GF_OPTION_RECONF ("read-subvolume", read_subvol, options, xlator, out);
-int
-afr_lookup_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct stat *buf, dict_t *xattr)
-{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- struct stat * lookup_buf = NULL;
-
- int call_count = -1;
- int child_index = -1;
- int first_up_child = -1;
-
- uint32_t open_fd_count = 0;
- int ret = 0;
-
- child_index = (long) cookie;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- local = frame->local;
-
- lookup_buf = &local->cont.lookup.buf;
-
- if (op_ret == -1) {
- if (op_errno == ENOENT)
- local->enoent_count++;
-
- if (op_errno != ENOTCONN) {
- if (local->op_errno != ESTALE)
- local->op_errno = op_errno;
- }
-
- if (op_errno == ESTALE) {
- /* no matter what other subvolumes return for
- * this call, ESTALE _must_ be sent to parent
- */
- local->op_ret = -1;
- local->op_errno = ESTALE;
- }
- goto unlock;
- }
-
- if (afr_sh_has_metadata_pending (xattr, child_index, this))
- local->need_metadata_self_heal = 1;
-
- if (afr_sh_has_entry_pending (xattr, child_index, this))
- local->need_entry_self_heal = 1;
-
- if (afr_sh_has_data_pending (xattr, child_index, this))
- local->need_data_self_heal = 1;
-
- ret = dict_get_uint32 (xattr, GLUSTERFS_OPEN_FD_COUNT,
- &open_fd_count);
- local->open_fd_count += open_fd_count;
-
-
- first_up_child = afr_first_up_child (priv);
-
- if (child_index == first_up_child) {
- local->cont.lookup.ino =
- afr_itransform (buf->st_ino,
- priv->child_count,
- first_up_child);
- }
+ GF_OPTION_RECONF ("read-hash-mode", priv->hash_mode,
+ options, uint32, out);
- /* in case of revalidate, we need to send stat of the
- * child whose stat was sent during the first lookup.
- * (so that time stamp does not vary with revalidate.
- * in case it is down, stat of the fist success will
- * be replied */
-
- /* inode number should be preserved across revalidates */
-
- if (local->success_count == 0) {
- if (local->op_errno != ESTALE)
- local->op_ret = op_ret;
-
- local->cont.lookup.inode = inode;
- local->cont.lookup.xattr = dict_ref (xattr);
-
- *lookup_buf = *buf;
-
- lookup_buf->st_ino = afr_itransform (buf->st_ino,
- priv->child_count,
- child_index);
-
- if (priv->read_child >= 0) {
- afr_set_read_child (this,
- local->cont.lookup.inode,
- priv->read_child);
- } else {
- afr_set_read_child (this,
- local->cont.lookup.inode,
- child_index);
- }
-
- } else {
- if (FILETYPE_DIFFERS (buf, lookup_buf)) {
- /* mismatching filetypes with same name
- -- Govinda !! GOvinda !!!
- */
- local->govinda_gOvinda = 1;
- }
-
- if (PERMISSION_DIFFERS (buf, lookup_buf)) {
- /* mismatching permissions */
- local->need_metadata_self_heal = 1;
- }
-
- if (OWNERSHIP_DIFFERS (buf, lookup_buf)) {
- /* mismatching permissions */
- local->need_metadata_self_heal = 1;
- }
-
- if (SIZE_DIFFERS (buf, lookup_buf)
- && S_ISREG (buf->st_mode)) {
- local->need_data_self_heal = 1;
- }
-
- if (child_index == local->read_child_index) {
-
- /*
- lookup has succeeded on the read child.
- So use its inode number
- */
- if (local->op_errno != ESTALE)
- local->op_ret = op_ret;
-
- if (local->cont.lookup.xattr)
- dict_unref (local->cont.lookup.xattr);
-
- local->cont.lookup.inode = inode;
- local->cont.lookup.xattr = dict_ref (xattr);
-
- *lookup_buf = *buf;
-
- if (priv->read_child >= 0) {
- afr_set_read_child (this,
- local->cont.lookup.inode,
- priv->read_child);
- } else {
- afr_set_read_child (this,
- local->cont.lookup.inode,
- local->read_child_index);
- }
- }
-
- }
-
- local->success_count++;
- }
-unlock:
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- if (local->cont.lookup.ino) {
- local->cont.lookup.buf.st_ino = local->cont.lookup.ino;
+ if (read_subvol) {
+ index = xlator_subvolume_index (this, read_subvol);
+ if (index == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "%s not a subvolume",
+ read_subvol->name);
+ goto out;
}
+ priv->read_child = index;
+ }
- if (local->op_ret == 0) {
- /* KLUDGE: assuming DHT will not itransform in
- revalidate */
- if (local->cont.lookup.inode->ino)
- local->cont.lookup.buf.st_ino =
- local->cont.lookup.inode->ino;
- }
-
- if (local->success_count && local->enoent_count) {
- local->need_metadata_self_heal = 1;
- local->need_data_self_heal = 1;
- local->need_entry_self_heal = 1;
- }
-
- if (local->success_count) {
- /* check for split-brain case in previous lookup */
- if (afr_is_split_brain (this,
- local->cont.lookup.inode))
- local->need_data_self_heal = 1;
- }
-
- if ((local->need_metadata_self_heal
- || local->need_data_self_heal
- || local->need_entry_self_heal)
- && (!local->open_fd_count)) {
-
- if (!local->cont.lookup.inode->st_mode) {
- /* fix for RT #602 */
- local->cont.lookup.inode->st_mode =
- lookup_buf->st_mode;
- }
-
- afr_self_heal (frame, this, afr_self_heal_cbk);
- } else {
- AFR_STACK_UNWIND (frame, local->op_ret,
- local->op_errno,
- local->cont.lookup.inode,
- &local->cont.lookup.buf,
- local->cont.lookup.xattr);
- }
- }
-
- return 0;
-}
-
-
-int
-afr_lookup (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xattr_req)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int ret = -1;
- int i = 0;
-
- uint64_t ctx;
-
- int32_t op_errno = 0;
-
- priv = this->private;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- local->op_ret = -1;
-
- frame->local = local;
-
- loc_copy (&local->loc, loc);
-
- ret = inode_ctx_get (loc->inode, this, &ctx);
- if (ret == 0) {
- /* lookup is a revalidate */
+ GF_OPTION_RECONF ("read-subvolume-index",read_subvol_index, options,int32,out);
- local->read_child_index = afr_read_child (this, loc->inode);
- } else {
- LOCK (&priv->read_child_lock);
- {
- local->read_child_index = (++priv->read_child_rr)
- % (priv->child_count);
+ if (read_subvol_index >-1) {
+ index=read_subvol_index;
+ if (index >= priv->child_count) {
+ gf_log (this->name, GF_LOG_ERROR, "%d not a subvolume-index",
+ index);
+ goto out;
}
- UNLOCK (&priv->read_child_lock);
+ priv->read_child = index;
}
- local->call_count = priv->child_count;
-
- local->child_up = memdup (priv->child_up, priv->child_count);
- local->child_count = afr_up_children_count (priv->child_count,
- local->child_up);
-
- /* By default assume ENOTCONN. On success it will be set to 0. */
- local->op_errno = ENOTCONN;
-
- if (xattr_req == NULL)
- local->xattr_req = dict_new ();
- else
- local->xattr_req = dict_ref (xattr_req);
-
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_set_uint64 (local->xattr_req, priv->pending_key[i],
- 3 * sizeof(int32_t));
-
- /* 3 = data+metadata+entry */
- }
-
- ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_OPEN_FD_COUNT, 0);
-
- for (i = 0; i < priv->child_count; i++) {
- STACK_WIND_COOKIE (frame, afr_lookup_cbk, (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->lookup,
- loc, local->xattr_req);
- }
-
- ret = 0;
+ GF_OPTION_RECONF ("eager-lock", priv->eager_lock, options, bool, out);
+ GF_OPTION_RECONF ("quorum-type", qtype, options, str, out);
+ GF_OPTION_RECONF ("quorum-count", priv->quorum_count, options,
+ uint32, out);
+ fix_quorum_options(this,priv,qtype);
+ GF_OPTION_RECONF ("heal-timeout", priv->shd.timeout, options,
+ int32, out);
+
+ GF_OPTION_RECONF ("post-op-delay-secs", priv->post_op_delay_secs, options,
+ uint32, out);
+
+ GF_OPTION_RECONF (AFR_SH_READDIR_SIZE_KEY, priv->sh_readdir_size,
+ options, size, out);
+ /* Reset this so we re-discover in case the topology changed. */
+ GF_OPTION_RECONF ("readdir-failover", priv->readdir_failover, options,
+ bool, out);
+ GF_OPTION_RECONF ("ensure-durability", priv->ensure_durability, options,
+ bool, out);
+ priv->did_discovery = _gf_false;
+
+ ret = 0;
out:
- if (ret == -1)
- AFR_STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL);
+ return ret;
- return 0;
}
-/* {{{ open */
+static const char *favorite_child_warning_str = "You have specified subvolume '%s' "
+ "as the 'favorite child'. This means that if a discrepancy in the content "
+ "or attributes (ownership, permission, etc.) of a file is detected among "
+ "the subvolumes, the file on '%s' will be considered the definitive "
+ "version and its contents will OVERWRITE the contents of the file on other "
+ "subvolumes. All versions of the file except that on '%s' "
+ "WILL BE LOST.";
-int
-afr_fd_ctx_set (xlator_t *this, fd_t *fd)
-{
- afr_private_t * priv = NULL;
- int op_ret = 0;
- int ret = 0;
+int32_t
+init (xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int child_count = 0;
+ xlator_list_t *trav = NULL;
+ int i = 0;
+ int ret = -1;
+ GF_UNUSED int op_errno = 0;
+ xlator_t *read_subvol = NULL;
+ int read_subvol_index = -1;
+ xlator_t *fav_child = NULL;
+ char *qtype = NULL;
+
+ if (!this->children) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "replicate translator needs more than one "
+ "subvolume defined.");
+ return -1;
+ }
- uint64_t ctx;
- afr_fd_ctx_t * fd_ctx = NULL;
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Volume is dangling.");
+ }
- VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (fd, out);
+ this->private = GF_CALLOC (1, sizeof (afr_private_t),
+ gf_afr_mt_afr_private_t);
+ if (!this->private)
+ goto out;
priv = this->private;
+ LOCK_INIT (&priv->lock);
+ LOCK_INIT (&priv->read_child_lock);
+ //lock recovery is not done in afr
+ pthread_mutex_init (&priv->mutex, NULL);
+ INIT_LIST_HEAD (&priv->saved_fds);
- LOCK (&fd->lock);
- {
- ret = __fd_ctx_get (fd, this, &ctx);
-
- if (ret == 0)
- goto unlock;
-
- fd_ctx = CALLOC (1, sizeof (afr_fd_ctx_t));
- if (!fd_ctx) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
-
- op_ret = -ENOMEM;
- goto unlock;
- }
+ child_count = xlator_subvolume_count (this);
- fd_ctx->child_failed = CALLOC (sizeof (*fd_ctx->child_failed),
- priv->child_count);
-
- if (!fd_ctx->child_failed) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
+ priv->child_count = child_count;
- op_ret = -ENOMEM;
- goto unlock;
- }
+ priv->read_child = -1;
- ret = __fd_ctx_set (fd, this, (uint64_t)(long) fd_ctx);
- if (ret < 0) {
- op_ret = ret;
+ GF_OPTION_INIT ("read-subvolume", read_subvol, xlator, out);
+ if (read_subvol) {
+ priv->read_child = xlator_subvolume_index (this, read_subvol);
+ if (priv->read_child == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "%s not a subvolume",
+ read_subvol->name);
+ goto out;
}
}
-unlock:
- UNLOCK (&fd->lock);
-out:
- return ret;
-}
-
-
-int
-afr_open_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
-{
- afr_local_t * local = frame->local;
- int ret = 0;
-
- ret = afr_fd_ctx_set (this, local->fd);
-
- if (ret < 0) {
- local->op_ret = -1;
- local->op_errno = -ret;
+ GF_OPTION_INIT ("read-subvolume-index",read_subvol_index,int32,out);
+ if (read_subvol_index > -1) {
+ if (read_subvol_index >= priv->child_count) {
+ gf_log (this->name, GF_LOG_ERROR, "%d not a subvolume-index",
+ read_subvol_index);
+ goto out;
+ }
+ priv->read_child = read_subvol_index;
}
+ GF_OPTION_INIT ("choose-local", priv->choose_local, bool, out);
- AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno,
- local->fd);
- return 0;
-}
-
-
-int
-afr_open_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- fd_t *fd)
-{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int ret = 0;
-
- int call_count = -1;
-
- priv = this->private;
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- }
-
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- }
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- if ((local->cont.open.flags & O_TRUNC)
- && (local->op_ret >= 0)) {
- STACK_WIND (frame, afr_open_ftruncate_cbk,
- this, this->fops->ftruncate,
- fd, 0);
- } else {
- ret = afr_fd_ctx_set (this, fd);
-
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "could not set fd ctx for fd=%p",
- fd);
-
- local->op_ret = -1;
- local->op_errno = -ret;
- }
-
- AFR_STACK_UNWIND (frame, local->op_ret,
- local->op_errno, local->fd);
- }
- }
-
- return 0;
-}
-
-
-int
-afr_open (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, fd_t *fd)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
-
- int i = 0;
- int ret = -1;
-
- int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- int32_t wind_flags = flags & (~O_TRUNC);
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (loc, out);
-
- priv = this->private;
-
- if (afr_is_split_brain (this, loc->inode)) {
- /* self-heal failed */
- op_errno = EIO;
- goto out;
- }
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- frame->local = local;
- call_count = local->call_count;
-
- local->cont.open.flags = flags;
- local->fd = fd_ref (fd);
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_open_cbk, (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->open,
- loc, wind_flags, fd);
-
- if (!--call_count)
- break;
- }
- }
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno, fd);
- }
-
- return 0;
-}
+ GF_OPTION_INIT ("read-hash-mode", priv->hash_mode, uint32, out);
-/* }}} */
+ priv->favorite_child = -1;
+ GF_OPTION_INIT ("favorite-child", fav_child, xlator, out);
+ if (fav_child) {
+ priv->favorite_child = xlator_subvolume_index (this, fav_child);
+ if (priv->favorite_child == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "%s not a subvolume",
+ fav_child->name);
+ goto out;
+ }
+ gf_log (this->name, GF_LOG_WARNING,
+ favorite_child_warning_str, fav_child->name,
+ fav_child->name, fav_child->name);
+ }
-/* {{{ flush */
-int
-afr_flush_unwind (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- call_frame_t *main_frame = NULL;
-
- local = frame->local;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- AFR_STACK_UNWIND (main_frame, local->op_ret, local->op_errno);
- }
-
- return 0;
-}
+ GF_OPTION_INIT ("background-self-heal-count",
+ priv->background_self_heal_count, uint32, out);
+ GF_OPTION_INIT ("data-self-heal", priv->data_self_heal, str, out);
-int
-afr_flush_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int call_count = -1;
- int child_index = (long) cookie;
- int need_unwind = 0;
-
- local = frame->local;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- }
- local->success_count++;
-
- if (local->success_count == priv->wait_count) {
- need_unwind = 1;
- }
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- if (need_unwind)
- afr_flush_unwind (frame, this);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
-
- return 0;
-}
+ GF_OPTION_INIT ("data-self-heal-algorithm",
+ priv->data_self_heal_algorithm, str, out);
+ GF_OPTION_INIT ("data-self-heal-window-size",
+ priv->data_self_heal_window_size, uint32, out);
-int
-afr_flush_wind (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int i = 0;
- int call_count = -1;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_up_children_count (priv->child_count, local->child_up);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_flush_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->flush,
- local->fd);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-}
+ GF_OPTION_INIT ("metadata-self-heal", priv->metadata_self_heal, bool,
+ out);
+ GF_OPTION_INIT ("entry-self-heal", priv->entry_self_heal, bool, out);
-int
-afr_flush_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
+ GF_OPTION_INIT ("self-heal-daemon", priv->shd.enabled, bool, out);
- local = frame->local;
+ GF_OPTION_INIT ("iam-self-heal-daemon", priv->shd.iamshd, bool, out);
- local->transaction.unwind (frame, this);
+ GF_OPTION_INIT ("data-change-log", priv->data_change_log, bool, out);
- AFR_STACK_DESTROY (frame);
+ GF_OPTION_INIT ("metadata-change-log", priv->metadata_change_log, bool,
+ out);
- return 0;
-}
+ GF_OPTION_INIT ("entry-change-log", priv->entry_change_log, bool, out);
+ GF_OPTION_INIT ("optimistic-change-log", priv->optimistic_change_log,
+ bool, out);
-int
-afr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
+ GF_OPTION_INIT ("inodelk-trace", priv->inodelk_trace, bool, out);
- call_frame_t * transaction_frame = NULL;
+ GF_OPTION_INIT ("entrylk-trace", priv->entrylk_trace, bool, out);
- int ret = -1;
+ GF_OPTION_INIT ("strict-readdir", priv->strict_readdir, bool, out);
- int op_ret = -1;
- int op_errno = 0;
+ GF_OPTION_INIT ("eager-lock", priv->eager_lock, bool, out);
+ GF_OPTION_INIT ("quorum-type", qtype, str, out);
+ GF_OPTION_INIT ("quorum-count", priv->quorum_count, uint32, out);
+ GF_OPTION_INIT (AFR_SH_READDIR_SIZE_KEY, priv->sh_readdir_size, size,
+ out);
+ fix_quorum_options(this,priv,qtype);
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ GF_OPTION_INIT ("post-op-delay-secs", priv->post_op_delay_secs, uint32, out);
+ GF_OPTION_INIT ("readdir-failover", priv->readdir_failover, bool, out);
+ GF_OPTION_INIT ("ensure-durability", priv->ensure_durability, bool,
+ out);
- priv = this->private;
+ priv->wait_count = 1;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
+ priv->child_up = GF_CALLOC (sizeof (unsigned char), child_count,
+ gf_afr_mt_char);
+ if (!priv->child_up) {
+ ret = -ENOMEM;
+ goto out;
}
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- transaction_frame->local = local;
-
- local->op = GF_FOP_FLUSH;
-
- local->transaction.fop = afr_flush_wind;
- local->transaction.done = afr_flush_done;
- local->transaction.unwind = afr_flush_unwind;
-
- local->fd = fd_ref (fd);
-
- local->transaction.main_frame = frame;
- local->transaction.start = LLONG_MAX - 1;
- local->transaction.len = 0;
-
- afr_transaction (transaction_frame, this, AFR_FLUSH_TRANSACTION);
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
-
- AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
- }
-
- return 0;
-}
-
-/* }}} */
-
-
-int
-afr_release (xlator_t *this, fd_t *fd)
-{
- uint64_t ctx;
- afr_fd_ctx_t * fd_ctx;
-
- int ret = 0;
-
- ret = fd_ctx_get (fd, this, &ctx);
-
- if (ret < 0)
+ for (i = 0; i < child_count; i++)
+ priv->child_up[i] = -1; /* start with unknown state.
+ this initialization needed
+ for afr_notify() to work
+ reliably
+ */
+
+ priv->children = GF_CALLOC (sizeof (xlator_t *), child_count,
+ gf_afr_mt_xlator_t);
+ if (!priv->children) {
+ ret = -ENOMEM;
goto out;
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- if (fd_ctx) {
- if (fd_ctx->child_failed)
- FREE (fd_ctx->child_failed);
-
- FREE (fd_ctx);
}
-
-out:
- return 0;
-}
-
-
-/* {{{ fsync */
-
-int
-afr_fsync_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-{
- afr_local_t *local = NULL;
-
- int call_count = -1;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno);
-
- return 0;
-}
-
-
-int
-afr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t datasync)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
-
- int ret = -1;
-
- int i = 0;
- int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- call_count = local->call_count;
- frame->local = local;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_fsync_cbk,
- priv->children[i],
- priv->children[i]->fops->fsync,
- fd, datasync);
- if (!--call_count)
- break;
- }
- }
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno);
- }
- return 0;
-}
-
-/* }}} */
-
-/* {{{ fsync */
-
-int32_t
-afr_fsyncdir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-{
- afr_local_t *local = NULL;
-
- int call_count = -1;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno);
-
- return 0;
-}
-
-
-int32_t
-afr_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t datasync)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
-
- int ret = -1;
-
- int i = 0;
- int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- call_count = local->call_count;
- frame->local = local;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_fsync_cbk,
- priv->children[i],
- priv->children[i]->fops->fsyncdir,
- fd, datasync);
- if (!--call_count)
- break;
- }
- }
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno);
- }
- return 0;
-}
-
-/* }}} */
-
-/* {{{ xattrop */
-
-int32_t
-afr_xattrop_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xattr)
-{
- afr_local_t *local = NULL;
-
- int call_count = -1;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno, xattr);
-
- return 0;
-}
-
-
-int32_t
-afr_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t optype, dict_t *xattr)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
-
- int ret = -1;
-
- int i = 0;
- int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- call_count = local->call_count;
- frame->local = local;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_xattrop_cbk,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- loc, optype, xattr);
- if (!--call_count)
- break;
- }
- }
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno);
- }
- return 0;
-}
-
-/* }}} */
-
-/* {{{ fxattrop */
-
-int32_t
-afr_fxattrop_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xattr)
-{
- afr_local_t *local = NULL;
-
- int call_count = -1;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno, xattr);
-
- return 0;
-}
-
-
-int32_t
-afr_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
- gf_xattrop_flags_t optype, dict_t *xattr)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
-
- int ret = -1;
-
- int i = 0;
- int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- call_count = local->call_count;
- frame->local = local;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_fxattrop_cbk,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- fd, optype, xattr);
- if (!--call_count)
- break;
- }
- }
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno);
- }
- return 0;
-}
-
-/* }}} */
-
-
-int32_t
-afr_inodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
-{
- afr_local_t *local = NULL;
-
- int call_count = -1;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno);
-
- return 0;
-}
-
-
-int32_t
-afr_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd, struct flock *flock)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
-
- int ret = -1;
-
- int i = 0;
- int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- call_count = local->call_count;
- frame->local = local;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_inodelk_cbk,
- priv->children[i],
- priv->children[i]->fops->inodelk,
- volume, loc, cmd, flock);
-
- if (!--call_count)
- break;
- }
- }
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno);
- }
- return 0;
-}
-
-
-int32_t
-afr_finodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
-{
- afr_local_t *local = NULL;
-
- int call_count = -1;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno);
-
- return 0;
-}
-
-
-int32_t
-afr_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct flock *flock)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
-
- int ret = -1;
-
- int i = 0;
- int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- call_count = local->call_count;
- frame->local = local;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_finodelk_cbk,
- priv->children[i],
- priv->children[i]->fops->finodelk,
- volume, fd, cmd, flock);
-
- if (!--call_count)
- break;
- }
- }
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno);
- }
- return 0;
-}
-
-
-int32_t
-afr_entrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
-{
- afr_local_t *local = NULL;
-
- int call_count = -1;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno);
-
- return 0;
-}
-
-
-int32_t
-afr_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc,
- const char *basename, entrylk_cmd cmd, entrylk_type type)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
-
- int ret = -1;
-
- int i = 0;
- int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- call_count = local->call_count;
- frame->local = local;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_entrylk_cbk,
- priv->children[i],
- priv->children[i]->fops->entrylk,
- volume, loc, basename, cmd, type);
-
- if (!--call_count)
- break;
- }
- }
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno);
- }
- return 0;
-}
-
-
-
-int32_t
-afr_fentrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
-{
- afr_local_t *local = NULL;
-
- int call_count = -1;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno);
-
- return 0;
-}
-
-
-int32_t
-afr_fentrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd,
- const char *basename, entrylk_cmd cmd, entrylk_type type)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
-
- int ret = -1;
-
- int i = 0;
- int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- call_count = local->call_count;
- frame->local = local;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_fentrylk_cbk,
- priv->children[i],
- priv->children[i]->fops->fentrylk,
- volume, fd, basename, cmd, type);
-
- if (!--call_count)
- break;
- }
- }
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno);
- }
- return 0;
-}
-
-
-int32_t
-afr_checksum_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- uint8_t *file_checksum, uint8_t *dir_checksum)
-
-{
- afr_local_t *local = NULL;
-
- int call_count = -1;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret == 0 && (local->op_ret != 0)) {
- local->op_ret = 0;
-
- local->cont.checksum.file_checksum = MALLOC (NAME_MAX);
- memcpy (local->cont.checksum.file_checksum, file_checksum,
- NAME_MAX);
-
- local->cont.checksum.dir_checksum = MALLOC (NAME_MAX);
- memcpy (local->cont.checksum.dir_checksum, dir_checksum,
- NAME_MAX);
-
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno,
- local->cont.checksum.file_checksum,
- local->cont.checksum.dir_checksum);
-
- return 0;
-}
-
-int32_t
-afr_checksum (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flag)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
-
- int ret = -1;
-
- int i = 0;
- int32_t call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- call_count = local->call_count;
- frame->local = local;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_checksum_cbk,
- priv->children[i],
- priv->children[i]->fops->checksum,
- loc, flag);
-
- if (!--call_count)
- break;
- }
- }
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno);
- }
- return 0;
-}
-
-
-int32_t
-afr_statfs_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct statvfs *statvfs)
-{
- afr_local_t *local = NULL;
-
- int call_count = 0;
-
- LOCK (&frame->lock);
- {
- local = frame->local;
-
- if (op_ret == 0) {
- local->op_ret = op_ret;
-
- if (local->cont.statfs.buf_set) {
- if (statvfs->f_bavail < local->cont.statfs.buf.f_bavail)
- local->cont.statfs.buf = *statvfs;
- } else {
- local->cont.statfs.buf = *statvfs;
- local->cont.statfs.buf_set = 1;
- }
- }
-
- if (op_ret == -1)
- local->op_errno = op_errno;
-
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->cont.statfs.buf);
-
- return 0;
-}
-
-
-int32_t
-afr_statfs (call_frame_t *frame, xlator_t *this,
- loc_t *loc)
-{
- afr_private_t * priv = NULL;
- int child_count = 0;
- afr_local_t * local = NULL;
- int i = 0;
-
- int ret = -1;
- int call_count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (loc, out);
-
- priv = this->private;
- child_count = priv->child_count;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
-
- ret = AFR_LOCAL_INIT (local, priv);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- frame->local = local;
- call_count = local->call_count;
-
- for (i = 0; i < child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_statfs_cbk,
- priv->children[i],
- priv->children[i]->fops->statfs,
- loc);
- if (!--call_count)
- break;
- }
- }
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
- }
- return 0;
-}
-
-
-int32_t
-afr_lk_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct flock *lock)
-{
- afr_local_t * local = NULL;
-
- int call_count = -1;
-
- local = frame->local;
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno,
- lock);
-
- return 0;
-}
-
-
-int32_t
-afr_lk_unlock (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int i;
- int call_count = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_locked_nodes_count (local->cont.lk.locked_nodes,
- priv->child_count);
-
- if (call_count == 0) {
- AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->cont.lk.flock);
- return 0;
- }
-
- local->call_count = call_count;
-
- local->cont.lk.flock.l_type = F_UNLCK;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->cont.lk.locked_nodes[i]) {
- STACK_WIND (frame, afr_lk_unlock_cbk,
- priv->children[i],
- priv->children[i]->fops->lk,
- local->fd, F_SETLK,
- &local->cont.lk.flock);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-}
-
-
-int32_t
-afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct flock *lock)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int call_count = -1;
- int child_index = -1;
-
- local = frame->local;
- priv = this->private;
-
- child_index = (long) cookie;
-
- call_count = --local->call_count;
-
- if (!child_went_down (op_ret, op_errno) && (op_ret == -1)) {
- local->op_ret = -1;
- local->op_errno = op_errno;
-
- afr_lk_unlock (frame, this);
- return 0;
- }
-
- if (op_ret == 0) {
- local->op_ret = 0;
- local->op_errno = 0;
- local->cont.lk.locked_nodes[child_index] = 1;
- }
-
- child_index++;
-
- if (child_index < priv->child_count) {
- STACK_WIND_COOKIE (frame, afr_lk_cbk, (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->lk,
- local->fd, local->cont.lk.cmd,
- &local->cont.lk.flock);
- } else if (local->op_ret == -1) {
- /* all nodes have gone down */
-
- AFR_STACK_UNWIND (frame, -1, ENOTCONN, &local->cont.lk.flock);
- } else {
- /* locking has succeeded on all nodes that are up */
-
- AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->cont.lk.flock);
- }
-
- return 0;
-}
-
-
-int
-afr_lk (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t cmd,
- struct flock *flock)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
-
- int i = 0;
-
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- ALLOC_OR_GOTO (local, afr_local_t, out);
- AFR_LOCAL_INIT (local, priv);
-
- frame->local = local;
-
- local->cont.lk.locked_nodes = CALLOC (priv->child_count,
- sizeof (*local->cont.lk.locked_nodes));
-
- if (!local->cont.lk.locked_nodes) {
- gf_log (this->name, GF_LOG_ERROR, "Out of memory");
- op_errno = ENOMEM;
- goto out;
- }
-
- local->fd = fd_ref (fd);
- local->cont.lk.cmd = cmd;
- local->cont.lk.flock = *flock;
-
- STACK_WIND_COOKIE (frame, afr_lk_cbk, (void *) (long) 0,
- priv->children[i],
- priv->children[i]->fops->lk,
- fd, cmd, flock);
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (frame, op_ret, op_errno, NULL);
- }
- return 0;
-}
-
-int
-afr_priv_dump (xlator_t *this)
-{
- afr_private_t *priv = NULL;
- char key_prefix[GF_DUMP_MAX_BUF_LEN];
- char key[GF_DUMP_MAX_BUF_LEN];
- int i = 0;
-
-
- assert(this);
- priv = this->private;
-
- assert(priv);
- snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name);
- gf_proc_dump_add_section(key_prefix);
- gf_proc_dump_build_key(key, key_prefix, "child_count");
- gf_proc_dump_write(key, "%u", priv->child_count);
- gf_proc_dump_build_key(key, key_prefix, "read_child_rr");
- gf_proc_dump_write(key, "%u", priv->read_child_rr);
- for (i = 0; i < priv->child_count; i++) {
- gf_proc_dump_build_key(key, key_prefix, "child_up[%d]", i);
- gf_proc_dump_write(key, "%d", priv->child_up[i]);
- gf_proc_dump_build_key(key, key_prefix,
- "pending_key[%d]", i);
- gf_proc_dump_write(key, "%s", priv->pending_key[i]);
+ priv->pending_key = GF_CALLOC (sizeof (*priv->pending_key),
+ child_count,
+ gf_afr_mt_char);
+ if (!priv->pending_key) {
+ ret = -ENOMEM;
+ goto out;
}
- gf_proc_dump_build_key(key, key_prefix, "data_self_heal");
- gf_proc_dump_write(key, "%d", priv->data_self_heal);
- gf_proc_dump_build_key(key, key_prefix, "metadata_self_heal");
- gf_proc_dump_write(key, "%d", priv->metadata_self_heal);
- gf_proc_dump_build_key(key, key_prefix, "entry_self_heal");
- gf_proc_dump_write(key, "%d", priv->entry_self_heal);
- gf_proc_dump_build_key(key, key_prefix, "data_change_log");
- gf_proc_dump_write(key, "%d", priv->data_change_log);
- gf_proc_dump_build_key(key, key_prefix, "metadata_change_log");
- gf_proc_dump_write(key, "%d", priv->metadata_change_log);
- gf_proc_dump_build_key(key, key_prefix, "entry_change_log");
- gf_proc_dump_write(key, "%d", priv->entry_change_log);
- gf_proc_dump_build_key(key, key_prefix, "read_child");
- gf_proc_dump_write(key, "%d", priv->read_child);
- gf_proc_dump_build_key(key, key_prefix, "favorite_child");
- gf_proc_dump_write(key, "%u", priv->favorite_child);
- gf_proc_dump_build_key(key, key_prefix, "data_lock_server_count");
- gf_proc_dump_write(key, "%u", priv->data_lock_server_count);
- gf_proc_dump_build_key(key, key_prefix, "metadata_lock_server_count");
- gf_proc_dump_write(key, "%u", priv->metadata_lock_server_count);
- gf_proc_dump_build_key(key, key_prefix, "entry_lock_server_count");
- gf_proc_dump_write(key, "%u", priv->entry_lock_server_count);
- gf_proc_dump_build_key(key, key_prefix, "wait_count");
- gf_proc_dump_write(key, "%u", priv->wait_count);
-
- return 0;
-}
-
-
-/**
- * find_child_index - find the child's index in the array of subvolumes
- * @this: AFR
- * @child: child
- */
-
-static int
-find_child_index (xlator_t *this, xlator_t *child)
-{
- afr_private_t *priv = NULL;
-
- int i = -1;
-
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- if ((xlator_t *) child == priv->children[i])
- break;
- }
-
- return i;
-}
+ trav = this->children;
+ i = 0;
+ while (i < child_count) {
+ priv->children[i] = trav->xlator;
-int32_t
-notify (xlator_t *this, int32_t event,
- void *data, ...)
-{
- afr_private_t * priv = NULL;
- unsigned char * child_up = NULL;
-
- int i = -1;
- int up_children = 0;
-
- priv = this->private;
-
- if (!priv)
- return 0;
-
- child_up = priv->child_up;
-
- switch (event) {
- case GF_EVENT_CHILD_UP:
- i = find_child_index (this, data);
-
- child_up[i] = 1;
-
- /*
- if all the children were down, and one child came up,
- send notify to parent
- */
-
- for (i = 0; i < priv->child_count; i++)
- if (child_up[i])
- up_children++;
-
- if (up_children == 1) {
- gf_log (this->name, GF_LOG_NORMAL,
- "Subvolume '%s' came back up; "
- "going online.", ((xlator_t *)data)->name);
-
- default_notify (this, event, data);
+ ret = gf_asprintf (&priv->pending_key[i], "%s.%s",
+ AFR_XATTR_PREFIX,
+ trav->xlator->name);
+ if (-1 == ret) {
+ ret = -ENOMEM;
+ goto out;
}
- break;
-
- case GF_EVENT_CHILD_DOWN:
- i = find_child_index (this, data);
-
- child_up[i] = 0;
-
- /*
- if all children are down, and this was the last to go down,
- send notify to parent
- */
-
- for (i = 0; i < priv->child_count; i++)
- if (child_up[i])
- up_children++;
+ trav = trav->next;
+ i++;
+ }
- if (up_children == 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "All subvolumes are down. Going offline "
- "until atleast one of them comes back up.");
+ ret = gf_asprintf (&priv->sh_domain, AFR_SH_DATA_DOMAIN_FMT,
+ this->name);
+ if (-1 == ret) {
+ ret = -ENOMEM;
+ goto out;
+ }
- default_notify (this, event, data);
- }
+ priv->last_event = GF_CALLOC (child_count, sizeof (*priv->last_event),
+ gf_afr_mt_int32_t);
+ if (!priv->last_event) {
+ ret = -ENOMEM;
+ goto out;
+ }
- break;
+ /* keep more local here as we may need them for self-heal etc */
+ this->local_pool = mem_pool_new (afr_local_t, 512);
+ if (!this->local_pool) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
+ goto out;
+ }
- default:
- default_notify (this, event, data);
- }
+ priv->first_lookup = 1;
+ priv->root_inode = NULL;
- return 0;
-}
+ if (!priv->shd.iamshd) {
+ ret = 0;
+ goto out;
+ }
+ ret = -ENOMEM;
+ priv->shd.pos = GF_CALLOC (sizeof (*priv->shd.pos), child_count,
+ gf_afr_mt_brick_pos_t);
+ if (!priv->shd.pos)
+ goto out;
-static const char *favorite_child_warning_str = "You have specified subvolume '%s' "
- "as the 'favorite child'. This means that if a discrepancy in the content "
- "or attributes (ownership, permission, etc.) of a file is detected among "
- "the subvolumes, the file on '%s' will be considered the definitive "
- "version and its contents will OVERWRITE the contents of the file on other "
- "subvolumes. All versions of the file except that on '%s' "
- "WILL BE LOST.";
-
-static const char *no_lock_servers_warning_str = "You have set lock-server-count = 0. "
- "This means correctness is NO LONGER GUARANTEED in all cases. If two or more "
- "applications write to the same region of a file, there is a possibility that "
- "its copies will be INCONSISTENT. Set it to a value greater than 0 unless you "
- "are ABSOLUTELY SURE of what you are doing and WILL NOT HOLD GlusterFS "
- "RESPONSIBLE for inconsistent data. If you are in doubt, set it to a value "
- "greater than 0.";
-
-int32_t
-init (xlator_t *this)
-{
- afr_private_t * priv = NULL;
- int child_count = 0;
- xlator_list_t * trav = NULL;
- int i = 0;
- int ret = -1;
- int op_errno = 0;
-
- char * read_subvol = NULL;
- char * fav_child = NULL;
- char * self_heal = NULL;
- char * change_log = NULL;
-
- int32_t lock_server_count = 1;
-
- int fav_ret = -1;
- int read_ret = -1;
- int dict_ret = -1;
-
- if (!this->children) {
- gf_log (this->name, GF_LOG_ERROR,
- "replicate translator needs more than one "
- "subvolume defined.");
- return -1;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "Volume is dangling.");
- }
-
- ALLOC_OR_GOTO (this->private, afr_private_t, out);
-
- priv = this->private;
-
- read_ret = dict_get_str (this->options, "read-subvolume", &read_subvol);
- priv->read_child = -1;
-
- fav_ret = dict_get_str (this->options, "favorite-child", &fav_child);
- priv->favorite_child = -1;
-
- /* Default values */
-
- priv->data_self_heal = 1;
- priv->metadata_self_heal = 1;
- priv->entry_self_heal = 1;
-
- dict_ret = dict_get_str (this->options, "data-self-heal", &self_heal);
- if (dict_ret == 0) {
- ret = gf_string2boolean (self_heal, &priv->data_self_heal);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "Invalid 'option data-self-heal %s'. "
- "Defaulting to data-self-heal as 'on'",
- self_heal);
- priv->data_self_heal = 1;
- }
- }
-
- dict_ret = dict_get_str (this->options, "metadata-self-heal",
- &self_heal);
- if (dict_ret == 0) {
- ret = gf_string2boolean (self_heal, &priv->metadata_self_heal);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "Invalid 'option metadata-self-heal %s'. "
- "Defaulting to metadata-self-heal as 'on'.",
- self_heal);
- priv->metadata_self_heal = 1;
- }
- }
-
- dict_ret = dict_get_str (this->options, "entry-self-heal", &self_heal);
- if (dict_ret == 0) {
- ret = gf_string2boolean (self_heal, &priv->entry_self_heal);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "Invalid 'option entry-self-heal %s'. "
- "Defaulting to entry-self-heal as 'on'.",
- self_heal);
- priv->entry_self_heal = 1;
- }
- }
-
- /* Change log options */
-
- priv->data_change_log = 1;
- priv->metadata_change_log = 0;
- priv->entry_change_log = 1;
-
- dict_ret = dict_get_str (this->options, "data-change-log",
- &change_log);
- if (dict_ret == 0) {
- ret = gf_string2boolean (change_log, &priv->data_change_log);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "Invalid 'option data-change-log %s'. "
- "Defaulting to data-change-log as 'on'.",
- change_log);
- priv->data_change_log = 1;
- }
- }
-
- dict_ret = dict_get_str (this->options, "metadata-change-log",
- &change_log);
- if (dict_ret == 0) {
- ret = gf_string2boolean (change_log,
- &priv->metadata_change_log);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "Invalid 'option metadata-change-log %s'. "
- "Defaulting to metadata-change-log as 'off'.",
- change_log);
- priv->metadata_change_log = 0;
- }
- }
-
- dict_ret = dict_get_str (this->options, "entry-change-log",
- &change_log);
- if (dict_ret == 0) {
- ret = gf_string2boolean (change_log, &priv->entry_change_log);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "Invalid 'option entry-change-log %s'. "
- "Defaulting to entry-change-log as 'on'.",
- change_log);
- priv->entry_change_log = 1;
- }
- }
-
- /* Locking options */
-
- priv->data_lock_server_count = 1;
- priv->metadata_lock_server_count = 0;
- priv->entry_lock_server_count = 1;
-
- dict_ret = dict_get_int32 (this->options, "data-lock-server-count",
- &lock_server_count);
- if (dict_ret == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Setting data lock server count to %d.",
- lock_server_count);
-
- if (lock_server_count == 0)
- gf_log (this->name, GF_LOG_WARNING, "%s",
- no_lock_servers_warning_str);
-
- priv->data_lock_server_count = lock_server_count;
- }
-
-
- dict_ret = dict_get_int32 (this->options,
- "metadata-lock-server-count",
- &lock_server_count);
- if (dict_ret == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Setting metadata lock server count to %d.",
- lock_server_count);
- priv->metadata_lock_server_count = lock_server_count;
- }
-
-
- dict_ret = dict_get_int32 (this->options, "entry-lock-server-count",
- &lock_server_count);
- if (dict_ret == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Setting entry lock server count to %d.",
- lock_server_count);
-
- priv->entry_lock_server_count = lock_server_count;
- }
-
- trav = this->children;
- while (trav) {
- if (!read_ret && !strcmp (read_subvol, trav->xlator->name)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Subvolume '%s' specified as read child.",
- trav->xlator->name);
-
- priv->read_child = child_count;
- }
-
- if (fav_ret == 0 && !strcmp (fav_child, trav->xlator->name)) {
- gf_log (this->name, GF_LOG_WARNING,
- favorite_child_warning_str, trav->xlator->name,
- trav->xlator->name, trav->xlator->name);
- priv->favorite_child = child_count;
- }
-
- child_count++;
- trav = trav->next;
- }
-
- priv->wait_count = 1;
-
- priv->child_count = child_count;
-
- LOCK_INIT (&priv->lock);
- LOCK_INIT (&priv->read_child_lock);
+ priv->shd.pending = GF_CALLOC (sizeof (*priv->shd.pending), child_count,
+ gf_afr_mt_int32_t);
+ if (!priv->shd.pending)
+ goto out;
- priv->child_up = CALLOC (sizeof (unsigned char), child_count);
- if (!priv->child_up) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- op_errno = ENOMEM;
- goto out;
- }
-
- priv->children = CALLOC (sizeof (xlator_t *), child_count);
- if (!priv->children) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- op_errno = ENOMEM;
- goto out;
- }
-
- priv->pending_key = CALLOC (sizeof (*priv->pending_key), child_count);
- if (!priv->pending_key) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- op_errno = ENOMEM;
+ priv->shd.inprogress = GF_CALLOC (sizeof (*priv->shd.inprogress),
+ child_count, gf_afr_mt_shd_bool_t);
+ if (!priv->shd.inprogress)
+ goto out;
+ priv->shd.timer = GF_CALLOC (sizeof (*priv->shd.timer), child_count,
+ gf_afr_mt_shd_timer_t);
+ if (!priv->shd.timer)
goto out;
- }
- trav = this->children;
- i = 0;
- while (i < child_count) {
- priv->children[i] = trav->xlator;
+ priv->shd.healed = eh_new (AFR_EH_HEALED_LIMIT, _gf_false,
+ _destroy_shd_event_data);
+ if (!priv->shd.healed)
+ goto out;
- ret = asprintf (&priv->pending_key[i], "%s.%s", AFR_XATTR_PREFIX,
- trav->xlator->name);
- if (-1 == ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "asprintf failed to set pending key");
- op_errno = ENOMEM;
- goto out;
- }
+ priv->shd.heal_failed = eh_new (AFR_EH_HEAL_FAIL_LIMIT, _gf_false,
+ _destroy_shd_event_data);
+ if (!priv->shd.heal_failed)
+ goto out;
- trav = trav->next;
- i++;
- }
+ priv->shd.split_brain = eh_new (AFR_EH_SPLIT_BRAIN_LIMIT, _gf_false,
+ _destroy_shd_event_data);
+ if (!priv->shd.split_brain)
+ goto out;
- ret = 0;
+ this->itable = inode_table_new (SHD_INODE_LRU_LIMIT, this);
+ if (!this->itable)
+ goto out;
+ priv->root_inode = inode_ref (this->itable->root);
+ GF_OPTION_INIT ("node-uuid", priv->shd.node_uuid, str, out);
+ GF_OPTION_INIT ("heal-timeout", priv->shd.timeout, int32, out);
+ ret = afr_initialise_statistics (this);
+ if (ret)
+ goto out;
+ ret = 0;
out:
- return ret;
+ return ret;
}
int
fini (xlator_t *this)
{
- return 0;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ this->private = NULL;
+ afr_priv_destroy (priv);
+ if (this->itable);//I dont see any destroy func
+
+ return 0;
}
struct xlator_fops fops = {
- .lookup = afr_lookup,
- .open = afr_open,
- .lk = afr_lk,
- .flush = afr_flush,
- .statfs = afr_statfs,
- .fsync = afr_fsync,
- .fsyncdir = afr_fsyncdir,
- .xattrop = afr_xattrop,
- .fxattrop = afr_fxattrop,
- .inodelk = afr_inodelk,
- .finodelk = afr_finodelk,
- .entrylk = afr_entrylk,
- .fentrylk = afr_fentrylk,
- .checksum = afr_checksum,
-
- /* inode read */
- .access = afr_access,
- .stat = afr_stat,
- .fstat = afr_fstat,
- .readlink = afr_readlink,
- .getxattr = afr_getxattr,
- .readv = afr_readv,
-
- /* inode write */
- .chmod = afr_chmod,
- .chown = afr_chown,
- .fchmod = afr_fchmod,
- .fchown = afr_fchown,
- .writev = afr_writev,
- .truncate = afr_truncate,
- .ftruncate = afr_ftruncate,
- .utimens = afr_utimens,
- .setxattr = afr_setxattr,
- .removexattr = afr_removexattr,
-
- /* dir read */
- .opendir = afr_opendir,
- .readdir = afr_readdir,
- .getdents = afr_getdents,
-
- /* dir write */
- .create = afr_create,
- .mknod = afr_mknod,
- .mkdir = afr_mkdir,
- .unlink = afr_unlink,
- .rmdir = afr_rmdir,
- .link = afr_link,
- .symlink = afr_symlink,
- .rename = afr_rename,
- .setdents = afr_setdents,
+ .lookup = afr_lookup,
+ .open = afr_open,
+ .lk = afr_lk,
+ .flush = afr_flush,
+ .statfs = afr_statfs,
+ .fsync = afr_fsync,
+ .fsyncdir = afr_fsyncdir,
+ .xattrop = afr_xattrop,
+ .fxattrop = afr_fxattrop,
+ .inodelk = afr_inodelk,
+ .finodelk = afr_finodelk,
+ .entrylk = afr_entrylk,
+ .fentrylk = afr_fentrylk,
+ .fallocate = afr_fallocate,
+ .discard = afr_discard,
+ .zerofill = afr_zerofill,
+
+ /* inode read */
+ .access = afr_access,
+ .stat = afr_stat,
+ .fstat = afr_fstat,
+ .readlink = afr_readlink,
+ .getxattr = afr_getxattr,
+ .fgetxattr = afr_fgetxattr,
+ .readv = afr_readv,
+
+ /* inode write */
+ .writev = afr_writev,
+ .truncate = afr_truncate,
+ .ftruncate = afr_ftruncate,
+ .setxattr = afr_setxattr,
+ .fsetxattr = afr_fsetxattr,
+ .setattr = afr_setattr,
+ .fsetattr = afr_fsetattr,
+ .removexattr = afr_removexattr,
+ .fremovexattr = afr_fremovexattr,
+
+ /* dir read */
+ .opendir = afr_opendir,
+ .readdir = afr_readdir,
+ .readdirp = afr_readdirp,
+
+ /* dir write */
+ .create = afr_create,
+ .mknod = afr_mknod,
+ .mkdir = afr_mkdir,
+ .unlink = afr_unlink,
+ .rmdir = afr_rmdir,
+ .link = afr_link,
+ .symlink = afr_symlink,
+ .rename = afr_rename,
};
-struct xlator_mops mops = {
-};
-
struct xlator_dumpops dumpops = {
.priv = afr_priv_dump,
};
@@ -2649,45 +545,249 @@ struct xlator_dumpops dumpops = {
struct xlator_cbks cbks = {
.release = afr_release,
+ .releasedir = afr_releasedir,
+ .forget = afr_forget,
};
struct volume_options options[] = {
- { .key = {"read-subvolume" },
- .type = GF_OPTION_TYPE_XLATOR
- },
- { .key = {"favorite-child"},
- .type = GF_OPTION_TYPE_XLATOR
- },
- { .key = {"data-self-heal"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {"metadata-self-heal"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {"entry-self-heal"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {"data-change-log"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {"metadata-change-log"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {"entry-change-log"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {"data-lock-server-count"},
- .type = GF_OPTION_TYPE_INT,
- .min = 0
- },
- { .key = {"metadata-lock-server-count"},
- .type = GF_OPTION_TYPE_INT,
- .min = 0
- },
- { .key = {"entry-lock-server-count"},
- .type = GF_OPTION_TYPE_INT,
- .min = 0
- },
- { .key = {NULL} },
+ { .key = {"read-subvolume" },
+ .type = GF_OPTION_TYPE_XLATOR,
+ .description = "inode-read fops happen only on one of the bricks in "
+ "replicate. Afr will prefer the one specified using "
+ "this option if it is not stale. Option value must be "
+ "one of the xlator names of the children. "
+ "Ex: <volname>-client-0 till "
+ "<volname>-client-<number-of-bricks - 1>"
+ },
+ { .key = {"read-subvolume-index" },
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "-1",
+ .description = "inode-read fops happen only on one of the bricks in "
+ "replicate. AFR will prefer the one specified using "
+ "this option if it is not stale. allowed options"
+ " include -1 till replica-count - 1"
+ },
+ { .key = {"read-hash-mode" },
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = 2,
+ .default_value = "0",
+ .description = "inode-read fops happen only on one of the bricks in "
+ "replicate. AFR will prefer the one computed using "
+ "the method specified using this option"
+ "0 = first responder, "
+ "1 = hash by GFID of file (all clients use "
+ "same subvolume), "
+ "2 = hash by GFID of file and client PID",
+ },
+ { .key = {"choose-local" },
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "true",
+ .description = "Choose a local subvolume(i.e. Brick) to read from if "
+ "read-subvolume is not explicitly set.",
+ },
+ { .key = {"favorite-child"},
+ .type = GF_OPTION_TYPE_XLATOR,
+ .description = "If a split-brain happens choose subvol/brick set by "
+ "this option as source."
+ },
+ { .key = {"background-self-heal-count"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .default_value = "16",
+ .validate = GF_OPT_VALIDATE_MIN,
+ .description = "This specifies the number of self-heals that can be "
+ " performed in background without blocking the fop"
+ },
+ { .key = {"data-self-heal"},
+ .type = GF_OPTION_TYPE_STR,
+ .value = {"1", "on", "yes", "true", "enable",
+ "0", "off", "no", "false", "disable",
+ "open"},
+ .default_value = "on",
+ .description = "Using this option we can enable/disable data "
+ "self-heal on the file. \"open\" means data "
+ "self-heal action will only be triggered by file "
+ "open operations."
+ },
+ { .key = {"data-self-heal-algorithm"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "Select between \"full\", \"diff\". The "
+ "\"full\" algorithm copies the entire file from "
+ "source to sink. The \"diff\" algorithm copies to "
+ "sink only those blocks whose checksums don't match "
+ "with those of source. If no option is configured "
+ "the option is chosen dynamically as follows: "
+ "If the file does not exist on one of the sinks "
+ "or empty file exists or if the source file size is "
+ "about the same as page size the entire file will "
+ "be read and written i.e \"full\" algo, "
+ "otherwise \"diff\" algo is chosen.",
+ .value = { "diff", "full"}
+ },
+ { .key = {"data-self-heal-window-size"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 1024,
+ .default_value = "1",
+ .description = "Maximum number blocks per file for which self-heal "
+ "process would be applied simultaneously."
+ },
+ { .key = {"metadata-self-heal"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Using this option we can enable/disable metadata "
+ "i.e. Permissions, ownerships, xattrs self-heal on "
+ "the file/directory."
+ },
+ { .key = {"entry-self-heal"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Using this option we can enable/disable entry "
+ "self-heal on the directory."
+ },
+ { .key = {"data-change-log"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Data fops like write/truncate will not perform "
+ "pre/post fop changelog operations in afr transaction "
+ "if this option is disabled"
+ },
+ { .key = {"metadata-change-log"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Metadata fops like setattr/setxattr will not perform "
+ "pre/post fop changelog operations in afr transaction "
+ "if this option is disabled"
+ },
+ { .key = {"entry-change-log"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Entry fops like create/unlink will not perform "
+ "pre/post fop changelog operations in afr transaction "
+ "if this option is disabled"
+ },
+ { .key = {"optimistic-change-log"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Entry/Metadata fops will not perform "
+ "pre fop changelog operations in afr transaction "
+ "if this option is enabled."
+ },
+ { .key = {"strict-readdir"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ },
+ { .key = {"inodelk-trace"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "Enabling this option logs inode lock/unlocks"
+ },
+ { .key = {"entrylk-trace"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "Enabling this option logs entry lock/unlocks"
+ },
+ { .key = {"eager-lock"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Lock phase of a transaction has two sub-phases. "
+ "First is an attempt to acquire locks in parallel by "
+ "broadcasting non-blocking lock requests. If lock "
+ "aquistion fails on any server, then the held locks "
+ "are unlocked and revert to a blocking locked mode "
+ "sequentially on one server after another. If this "
+ "option is enabled the initial broadcasting lock "
+ "request attempt to acquire lock on the entire file. "
+ "If this fails, we revert back to the sequential "
+ "\"regional\" blocking lock as before. In the case "
+ "where such an \"eager\" lock is granted in the "
+ "non-blocking phase, it gives rise to an opportunity "
+ "for optimization. i.e, if the next write transaction "
+ "on the same FD arrives before the unlock phase of "
+ "the first transaction, it \"takes over\" the full "
+ "file lock. Similarly if yet another data transaction "
+ "arrives before the unlock phase of the \"optimized\" "
+ "transaction, that in turn \"takes over\" the lock as "
+ "well. The actual unlock now happens at the end of "
+ "the last \"optimzed\" transaction."
+
+ },
+ { .key = {"self-heal-daemon"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "This option applies to only self-heal-daemon. "
+ "Index directory crawl and automatic healing of files"
+ "will not be performed if this option is turned off."
+ },
+ { .key = {"iam-self-heal-daemon"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "This option differentiates if the replicate "
+ "translator is running as part of self-heal-daemon "
+ "or not."
+ },
+ { .key = {"quorum-type"},
+ .type = GF_OPTION_TYPE_STR,
+ .value = { "none", "auto", "fixed"},
+ .default_value = "none",
+ .description = "If value is \"fixed\" only allow writes if "
+ "quorum-count bricks are present. If value is "
+ "\"auto\" only allow writes if more than half of "
+ "bricks, or exactly half including the first, are "
+ "present.",
+ },
+ { .key = {"quorum-count"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = INT_MAX,
+ .default_value = 0,
+ .description = "If quorum-type is \"fixed\" only allow writes if "
+ "this many bricks or present. Other quorum types "
+ "will OVERWRITE this value.",
+ },
+ { .key = {"node-uuid"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "Local glusterd uuid string, used in starting "
+ "self-heal-daemon so that it can crawl only on "
+ "local index directories.",
+ },
+ { .key = {"heal-timeout"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 60,
+ .max = INT_MAX,
+ .default_value = "600",
+ .description = "time interval for checking the need to self-heal "
+ "in self-heal-daemon"
+ },
+ { .key = {"post-op-delay-secs"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = INT_MAX,
+ .default_value = "1",
+ .description = "Time interval induced artificially before "
+ "post-operation phase of the transaction to "
+ "enhance overlap of adjacent write operations.",
+ },
+ { .key = {AFR_SH_READDIR_SIZE_KEY},
+ .type = GF_OPTION_TYPE_SIZET,
+ .description = "readdirp size for performing entry self-heal",
+ .min = 1024,
+ .max = 131072,
+ .default_value = "1KB",
+ },
+ { .key = {"readdir-failover"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .description = "readdir(p) will not failover if this option is off",
+ .default_value = "on",
+ },
+ { .key = {"ensure-durability"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .description = "Afr performs fsyncs for transactions if this "
+ "option is on to make sure the changelogs/data is "
+ "written to the disk",
+ .default_value = "on",
+ },
+ { .key = {NULL} },
};
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index d9a3435c3..21064db58 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -26,85 +17,347 @@
#include "config.h"
#endif
-#include "scheduler.h"
#include "call-stub.h"
#include "compat-errno.h"
+#include "afr-mem-types.h"
+#include "afr-self-heal-algorithm.h"
+
+#include "libxlator.h"
+#include "timer.h"
#define AFR_XATTR_PREFIX "trusted.afr"
+#define AFR_PATHINFO_HEADER "REPLICATE:"
+#define AFR_SH_READDIR_SIZE_KEY "self-heal-readdir-size"
+#define AFR_SH_DATA_DOMAIN_FMT "%s:self-heal"
-typedef struct _afr_private {
- gf_lock_t lock; /* to guard access to child_count, etc */
- unsigned int child_count; /* total number of children */
+#define AFR_LOCKEE_COUNT_MAX 3
+#define AFR_DOM_COUNT_MAX 3
- unsigned int read_child_rr; /* round-robin index of the read_child */
- gf_lock_t read_child_lock; /* lock to protect above */
-
- xlator_t **children;
+struct _pump_private;
- unsigned char *child_up;
+typedef int (*afr_expunge_done_cbk_t) (call_frame_t *frame, xlator_t *this,
+ int child, int32_t op_error,
+ int32_t op_errno);
- char **pending_key;
+typedef int (*afr_impunge_done_cbk_t) (call_frame_t *frame, xlator_t *this,
+ int32_t op_error, int32_t op_errno);
+typedef int (*afr_post_remove_call_t) (call_frame_t *frame, xlator_t *this);
- gf_boolean_t data_self_heal; /* on/off */
- gf_boolean_t metadata_self_heal; /* on/off */
- gf_boolean_t entry_self_heal; /* on/off */
+typedef int (*afr_lock_cbk_t) (call_frame_t *frame, xlator_t *this);
+typedef void (*afr_lookup_done_cbk_t) (call_frame_t *frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno);
- gf_boolean_t data_change_log; /* on/off */
- gf_boolean_t metadata_change_log; /* on/off */
- gf_boolean_t entry_change_log; /* on/off */
+typedef enum {
+ AFR_POS_UNKNOWN,
+ AFR_POS_LOCAL,
+ AFR_POS_REMOTE
+} afr_child_pos_t;
- int read_child; /* read-subvolume */
- unsigned int favorite_child; /* subvolume to be preferred in resolving
- split-brain cases */
+typedef enum {
+ SPLIT_BRAIN = 1,
+ ALL_FOOLS = 2
+} afr_subvol_status_t;
- unsigned int data_lock_server_count;
- unsigned int metadata_lock_server_count;
- unsigned int entry_lock_server_count;
+typedef enum {
+ AFR_INODE_SET_READ_CTX = 1,
+ AFR_INODE_RM_STALE_CHILDREN,
+ AFR_INODE_SET_OPENDIR_DONE,
+ AFR_INODE_GET_READ_CTX,
+ AFR_INODE_GET_OPENDIR_DONE,
+} afr_inode_op_t;
+
+typedef struct afr_inode_params_ {
+ afr_inode_op_t op;
+ union {
+ gf_boolean_t value;
+ struct {
+ int32_t read_child;
+ int32_t *children;
+ } read_ctx;
+ } u;
+} afr_inode_params_t;
+
+typedef enum afr_spb_state {
+ DONT_KNOW,
+ SPB,
+ NO_SPB
+} afr_spb_state_t;
+
+typedef struct afr_inode_ctx_ {
+ uint64_t masks;
+ int32_t *fresh_children;//increasing order of latency
+ afr_spb_state_t mdata_spb;
+ afr_spb_state_t data_spb;
+ uint32_t open_fd_count;
+} afr_inode_ctx_t;
- unsigned int wait_count; /* # of servers to wait for success */
-} afr_private_t;
+typedef enum {
+ NONE,
+ INDEX,
+ INDEX_TO_BE_HEALED,
+ FULL,
+} afr_crawl_type_t;
+
+typedef struct afr_self_heald_ {
+ gf_boolean_t enabled;
+ gf_boolean_t iamshd;
+ afr_crawl_type_t *pending;
+ gf_boolean_t *inprogress;
+ afr_child_pos_t *pos;
+ gf_timer_t **timer;
+ eh_t *healed;
+ eh_t *heal_failed;
+ eh_t *split_brain;
+ eh_t **statistics;
+ void **crawl_events;
+ char *node_uuid;
+ int timeout;
+} afr_self_heald_t;
-typedef struct {
- /* array of stat's, one for each child */
- struct stat *buf;
+typedef struct _afr_private {
+ gf_lock_t lock; /* to guard access to child_count, etc */
+ unsigned int child_count; /* total number of children */
- /* array of xattr's, one for each child */
- dict_t **xattr;
+ unsigned int read_child_rr; /* round-robin index of the read_child */
+ gf_lock_t read_child_lock; /* lock to protect above */
+
+ xlator_t **children;
- /* array of errno's, one for each child */
- int *child_errno;
+ int first_lookup;
+ inode_t *root_inode;
- int32_t **pending_matrix;
- int32_t **delta_matrix;
+ unsigned char *child_up;
- int *sources;
- int source;
- int active_source;
- int active_sinks;
- int *success;
+ char **pending_key;
+
+ char *data_self_heal; /* on/off/open */
+ char * data_self_heal_algorithm; /* name of algorithm */
+ unsigned int data_self_heal_window_size; /* max number of pipelined
+ read/writes */
+
+ unsigned int background_self_heal_count;
+ unsigned int background_self_heals_started;
+ gf_boolean_t metadata_self_heal; /* on/off */
+ gf_boolean_t entry_self_heal; /* on/off */
+
+ gf_boolean_t data_change_log; /* on/off */
+ gf_boolean_t metadata_change_log; /* on/off */
+ gf_boolean_t entry_change_log; /* on/off */
+
+ int read_child; /* read-subvolume */
+ unsigned int hash_mode; /* for when read_child is not set */
+ int favorite_child; /* subvolume to be preferred in resolving
+ split-brain cases */
+
+ gf_boolean_t inodelk_trace;
+ gf_boolean_t entrylk_trace;
+
+ gf_boolean_t strict_readdir;
+
+ unsigned int wait_count; /* # of servers to wait for success */
+
+ uint64_t up_count; /* number of CHILD_UPs we have seen */
+ uint64_t down_count; /* number of CHILD_DOWNs we have seen */
+
+ struct _pump_private *pump_private; /* Set if we are loaded as pump */
+ int use_afr_in_pump;
+
+ pthread_mutex_t mutex;
+ struct list_head saved_fds; /* list of fds on which locks have succeeded */
+ gf_boolean_t optimistic_change_log;
+ gf_boolean_t eager_lock;
+ uint32_t post_op_delay_secs;
+ unsigned int quorum_count;
+
+ char vol_uuid[UUID_SIZE + 1];
+ int32_t *last_event;
+ afr_self_heald_t shd;
+ gf_boolean_t choose_local;
+ gf_boolean_t did_discovery;
+ gf_boolean_t readdir_failover;
+ uint64_t sh_readdir_size;
+ gf_boolean_t ensure_durability;
+ char *sh_domain;
+} afr_private_t;
- fd_t *healing_fd;
- int op_failed;
+typedef enum {
+ AFR_SELF_HEAL_NOT_ATTEMPTED,
+ AFR_SELF_HEAL_STARTED,
+ AFR_SELF_HEAL_FAILED,
+ AFR_SELF_HEAL_SYNC_BEGIN,
+} afr_self_heal_status;
- int file_has_holes;
- blksize_t block_size;
- off_t file_size;
- off_t offset;
+typedef struct {
+ afr_self_heal_status gfid_or_missing_entry_self_heal;
+ afr_self_heal_status metadata_self_heal;
+ afr_self_heal_status data_self_heal;
+ afr_self_heal_status entry_self_heal;
+} afr_sh_status_for_all_type;
- loc_t parent_loc;
- int (*completion_cbk) (call_frame_t *frame, xlator_t *this);
- call_frame_t *sh_frame;
-} afr_self_heal_t;
+typedef enum {
+ AFR_SELF_HEAL_ENTRY,
+ AFR_SELF_HEAL_METADATA,
+ AFR_SELF_HEAL_DATA,
+ AFR_SELF_HEAL_GFID_OR_MISSING_ENTRY,
+ AFR_SELF_HEAL_INVALID = -1,
+} afr_self_heal_type;
+typedef enum {
+ AFR_CHECK_ALL,
+ AFR_CHECK_SPECIFIC,
+} afr_sh_fail_check_type;
+
+struct afr_self_heal_ {
+ /* External interface: These are variables (some optional) that
+ are set by whoever has triggered self-heal */
+
+ gf_boolean_t do_data_self_heal;
+ gf_boolean_t do_metadata_self_heal;
+ gf_boolean_t do_entry_self_heal;
+ gf_boolean_t do_gfid_self_heal;
+ gf_boolean_t do_missing_entry_self_heal;
+ gf_boolean_t force_confirm_spb; /* Check for split-brains even when
+ self-heal is turned off */
+
+ gf_boolean_t forced_merge; /* Is this a self-heal triggered to
+ forcibly merge the directories? */
+
+ gf_boolean_t background; /* do self-heal in background
+ if possible */
+ ia_type_t type; /* st_mode of the entry we're doing
+ self-heal on */
+ inode_t *inode; /* inode on which the self-heal is
+ performed on */
+ uuid_t sh_gfid_req; /* gfid self-heal needs to be done
+ with this gfid if it is not null */
+
+ /* Function to call to unwind. If self-heal is being done in the
+ background, this function will be called as soon as possible. */
+
+ int (*unwind) (call_frame_t *frame, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, int32_t sh_failed);
+
+ /* End of external interface members */
+
+
+ /* array of stat's, one for each child */
+ struct iatt *buf;
+ struct iatt *parentbufs;
+ struct iatt parentbuf;
+ struct iatt entrybuf;
+
+ afr_expunge_done_cbk_t expunge_done;
+ afr_impunge_done_cbk_t impunge_done;
+
+ /* array of xattr's, one for each child */
+ dict_t **xattr;
+
+ /* array containing if the lookups succeeded in the order of response
+ */
+ int32_t *success_children;
+ int success_count;
+ /* array containing the fresh children found in the self-heal process */
+ int32_t *fresh_children;
+ /* array containing the fresh children found in the parent lookup */
+ int32_t *fresh_parent_dirs;
+ /* array of errno's, one for each child */
+ int *child_errno;
+ /*loc used for lookup*/
+ loc_t lookup_loc;
+ int32_t lookup_flags;
+ afr_lookup_done_cbk_t lookup_done;
+
+ int32_t **pending_matrix;
+ int32_t **delta_matrix;
+
+ int32_t op_ret;
+ int32_t op_errno;
+
+ int *sources;
+ int source;
+ int active_source;
+ int active_sinks;
+ unsigned char *success;
+ unsigned char *locked_nodes;
+ int lock_count;
+
+ const char *linkname;
+ gf_boolean_t entries_skipped;
+
+ gf_boolean_t actual_sh_started;
+ gf_boolean_t sync_done;
+ gf_boolean_t data_lock_held;
+ gf_boolean_t sh_dom_lock_held;
+ gf_boolean_t eof_reached;
+ fd_t *healing_fd;
+ int file_has_holes;
+ blksize_t block_size;
+ off_t file_size;
+ off_t offset;
+ unsigned char *write_needed;
+ uint8_t *checksum;
+ afr_post_remove_call_t post_remove_call;
+
+ char *data_sh_info;
+ char *metadata_sh_info;
+
+ loc_t parent_loc;
+ call_frame_t *orig_frame;
+ call_frame_t *old_loop_frame;
+ gf_boolean_t unwound;
+
+ afr_sh_algo_private_t *private;
+ afr_sh_status_for_all_type afr_all_sh_status;
+ afr_self_heal_type sh_type_in_action;
+
+ struct afr_sh_algorithm *algo;
+ afr_lock_cbk_t data_lock_success_handler;
+ afr_lock_cbk_t data_lock_failure_handler;
+ gf_boolean_t data_lock_block;
+ int (*completion_cbk) (call_frame_t *frame, xlator_t *this);
+ int (*sh_data_algo_start) (call_frame_t *frame, xlator_t *this);
+ int (*algo_completion_cbk) (call_frame_t *frame, xlator_t *this);
+ int (*algo_abort_cbk) (call_frame_t *frame, xlator_t *this);
+ void (*gfid_sh_success_cbk) (call_frame_t *sh_frame, xlator_t *this);
+
+ call_frame_t *sh_frame;
+};
+
+typedef struct afr_self_heal_ afr_self_heal_t;
typedef enum {
- AFR_DATA_TRANSACTION, /* truncate, write, ... */
- AFR_METADATA_TRANSACTION, /* chmod, chown, ... */
- AFR_ENTRY_TRANSACTION, /* create, rmdir, ... */
- AFR_ENTRY_RENAME_TRANSACTION, /* rename */
- AFR_FLUSH_TRANSACTION, /* flush */
+ AFR_DATA_TRANSACTION, /* truncate, write, ... */
+ AFR_METADATA_TRANSACTION, /* chmod, chown, ... */
+ AFR_ENTRY_TRANSACTION, /* create, rmdir, ... */
+ AFR_ENTRY_RENAME_TRANSACTION, /* rename */
} afr_transaction_type;
+typedef enum {
+ AFR_TRANSACTION_LK,
+ AFR_SELFHEAL_LK,
+} transaction_lk_type_t;
+
+typedef enum {
+ AFR_LOCK_OP,
+ AFR_UNLOCK_OP,
+} afr_lock_op_type_t;
+
+typedef enum {
+ AFR_DATA_SELF_HEAL_LK,
+ AFR_METADATA_SELF_HEAL_LK,
+ AFR_ENTRY_SELF_HEAL_LK,
+}selfheal_lk_type_t;
+
+typedef enum {
+ AFR_INODELK_TRANSACTION,
+ AFR_INODELK_NB_TRANSACTION,
+ AFR_ENTRYLK_TRANSACTION,
+ AFR_ENTRYLK_NB_TRANSACTION,
+ AFR_INODELK_SELFHEAL,
+ AFR_INODELK_NB_SELFHEAL,
+ AFR_ENTRYLK_SELFHEAL,
+ AFR_ENTRYLK_NB_SELFHEAL,
+} afr_lock_call_type_t;
/*
xattr format: trusted.afr.volume = [x y z]
@@ -117,9 +370,8 @@ static inline int
afr_index_for_transaction_type (afr_transaction_type type)
{
switch (type) {
-
+
case AFR_DATA_TRANSACTION:
- case AFR_FLUSH_TRANSACTION:
return 0;
case AFR_METADATA_TRANSACTION:
@@ -133,370 +385,551 @@ afr_index_for_transaction_type (afr_transaction_type type)
return -1; /* make gcc happy */
}
+typedef struct {
+ loc_t loc;
+ char *basename;
+ unsigned char *locked_nodes;
+ int locked_count;
-typedef struct _afr_local {
- unsigned int call_count;
- unsigned int success_count;
- unsigned int enoent_count;
+} afr_entry_lockee_t;
- unsigned int need_metadata_self_heal;
- unsigned int need_entry_self_heal;
- unsigned int need_data_self_heal;
- unsigned int govinda_gOvinda;
+int
+afr_entry_lockee_cmp (const void *l1, const void *l2);
- unsigned int read_child_index;
- unsigned char read_child_returned;
+typedef struct {
+ char *domain; /* Domain on which inodelk is taken */
+ struct gf_flock flock;
+ unsigned char *locked_nodes;
+ int32_t lock_count;
+} afr_inodelk_t;
- pid_t saved_pid;
+typedef struct {
+ loc_t *lk_loc;
- int32_t op_ret;
- int32_t op_errno;
+ int lockee_count;
+ afr_entry_lockee_t lockee[AFR_LOCKEE_COUNT_MAX];
- int32_t **pending;
+ afr_inodelk_t inodelk[AFR_DOM_COUNT_MAX];
+ const char *lk_basename;
+ const char *lower_basename;
+ const char *higher_basename;
+ char lower_locked;
+ char higher_locked;
- loc_t loc;
- loc_t newloc;
+ unsigned char *locked_nodes;
+ unsigned char *lower_locked_nodes;
- fd_t *fd;
+ selfheal_lk_type_t selfheal_lk_type;
+ transaction_lk_type_t transaction_lk_type;
- glusterfs_fop_t fop;
+ int32_t lock_count;
+ int32_t entrylk_lock_count;
- unsigned char *child_up;
- int child_count;
+ uint64_t lock_number;
+ int32_t lk_call_count;
+ int32_t lk_expected_count;
+ int32_t lk_attempted_count;
- int32_t *child_errno;
-
- dict_t *xattr_req;
- int open_fd_count;
- /*
- This struct contains the arguments for the "continuation"
- (scheme-like) of fops
- */
+ int32_t lock_op_ret;
+ int32_t lock_op_errno;
+ afr_lock_cbk_t lock_cbk;
+ char *domain; /* Domain on which inode/entry lock/unlock in progress.*/
+} afr_internal_lock_t;
- int op;
- struct {
- struct {
- unsigned char buf_set;
- struct statvfs buf;
- } statfs;
+typedef struct _afr_locked_fd {
+ fd_t *fd;
+ struct list_head list;
+} afr_locked_fd_t;
- struct {
- inode_t *inode;
- struct stat buf;
- struct stat read_child_buf;
- ino_t ino;
- dict_t *xattr;
- } lookup;
+struct afr_reply {
+ int valid;
+ int32_t op_ret;
+ int32_t op_errno;
+};
- struct {
- int32_t flags;
- } open;
+typedef struct _afr_local {
+ int uid;
+ int gid;
+ unsigned int call_count;
+ unsigned int success_count;
+ unsigned int enoent_count;
+ uint32_t open_fd_count;
+ gf_boolean_t update_open_fd_count;
- struct {
- int32_t cmd;
- struct flock flock;
- unsigned char *locked_nodes;
- } lk;
- struct {
- uint8_t *file_checksum;
- uint8_t *dir_checksum;
- } checksum;
+ unsigned int unhealable;
- /* inode read */
+ unsigned int read_child_index;
+ unsigned char read_child_returned;
+ unsigned int first_up_child;
- struct {
- int32_t mask;
- int last_tried; /* index of the child we tried previously */
- } access;
+ gf_lkowner_t saved_lk_owner;
- struct {
- int last_tried;
- ino_t ino;
- } stat;
+ int32_t op_ret;
+ int32_t op_errno;
- struct {
- int last_tried;
- ino_t ino;
- } fstat;
+ int32_t **pending;
- struct {
- size_t size;
- int last_tried;
- } readlink;
+ loc_t loc;
+ loc_t newloc;
- struct {
- const char *name;
- int last_tried;
- } getxattr;
+ fd_t *fd;
- struct {
- ino_t ino;
- size_t size;
- off_t offset;
- int last_tried;
- } readv;
+ glusterfs_fop_t fop;
- /* dir read */
+ unsigned char *child_up;
+ int32_t *fresh_children; //in the order of response
- struct {
- int success_count;
- int32_t op_ret;
- int32_t op_errno;
- } opendir;
+ int32_t *child_errno;
- struct {
- int32_t op_ret;
- int32_t op_errno;
- size_t size;
- off_t offset;
+ dict_t *xattr_req;
- int last_tried;
- } readdir;
+ int32_t inodelk_count;
+ int32_t entrylk_count;
- struct {
- int32_t op_ret;
- int32_t op_errno;
+ afr_internal_lock_t internal_lock;
- size_t size;
- off_t offset;
- int32_t flag;
+ afr_locked_fd_t *locked_fd;
+ int32_t source_child;
+ int32_t lock_recovery_child;
- int last_tried;
- } getdents;
+ dict_t *dict;
+ int optimistic_change_log;
+ gf_boolean_t delayed_post_op;
- /* inode write */
- struct {
- ino_t ino;
- mode_t mode;
- struct stat buf;
- struct stat read_child_buf;
- } chmod;
+ /* Is the current writev() going to perform a stable write?
+ i.e, is fd->flags or @flags writev param have O_SYNC or
+ O_DSYNC?
+ */
+ gf_boolean_t stable_write;
+
+ /* This write appended to the file. Nnot necessarily O_APPEND,
+ just means the offset of write was at the end of file.
+ */
+ gf_boolean_t append_write;
+
+ int allow_sh_for_running_transaction;
+
+
+ /* This struct contains the arguments for the "continuation"
+ (scheme-like) of fops
+ */
+
+ int op;
+ struct {
+ struct {
+ unsigned char buf_set;
+ struct statvfs buf;
+ } statfs;
+
+ struct {
+ uint32_t parent_entrylk;
+ uuid_t gfid_req;
+ inode_t *inode;
+ struct iatt buf;
+ struct iatt postparent;
+ dict_t **xattrs;
+ dict_t *xattr;
+ struct iatt *postparents;
+ struct iatt *bufs;
+ int32_t read_child;
+ int32_t *sources;
+ int32_t *success_children;
+ int32_t **pending_matrix;
+ gf_boolean_t fresh_lookup;
+ gf_boolean_t possible_spb;
+ } lookup;
+
+ struct {
+ int32_t flags;
+ } open;
+
+ struct {
+ int32_t cmd;
+ struct gf_flock user_flock;
+ struct gf_flock ret_flock;
+ unsigned char *locked_nodes;
+ } lk;
+
+ /* inode read */
+
+ struct {
+ int32_t mask;
+ int last_index; /* index of the child we tried previously */
+ } access;
+
+ struct {
+ int last_index;
+ } stat;
+
+ struct {
+ int last_index;
+ } fstat;
+
+ struct {
+ size_t size;
+ int last_index;
+ } readlink;
+
+ struct {
+ char *name;
+ int last_index;
+ long xattr_len;
+ } getxattr;
+
+ struct {
+ size_t size;
+ off_t offset;
+ int last_index;
+ uint32_t flags;
+ } readv;
+
+ /* dir read */
+
+ struct {
+ int success_count;
+ int32_t op_ret;
+ int32_t op_errno;
+
+ uint32_t *checksum;
+ } opendir;
+
+ struct {
+ int32_t op_ret;
+ int32_t op_errno;
+ size_t size;
+ off_t offset;
+ dict_t *dict;
+ gf_boolean_t failed;
+ int last_index;
+ } readdir;
+ /* inode write */
+
+ struct {
+ struct iatt prebuf;
+ struct iatt postbuf;
+ } inode_wfop; //common structure for all inode-write-fops
+
+ struct {
+ int32_t op_ret;
+
+ struct iovec *vector;
+ struct iobref *iobref;
+ int32_t count;
+ off_t offset;
+ uint32_t flags;
+ } writev;
+
+ struct {
+ off_t offset;
+ } truncate;
+
+ struct {
+ off_t offset;
+ } ftruncate;
+
+ struct {
+ struct iatt in_buf;
+ int32_t valid;
+ } setattr;
+
+ struct {
+ struct iatt in_buf;
+ int32_t valid;
+ } fsetattr;
+
+ struct {
+ dict_t *dict;
+ int32_t flags;
+ } setxattr;
+
+ struct {
+ dict_t *dict;
+ int32_t flags;
+ } fsetxattr;
+
+ struct {
+ char *name;
+ } removexattr;
+
+ struct {
+ dict_t *xattr;
+ } xattrop;
+
+ struct {
+ dict_t *xattr;
+ } fxattrop;
+
+ /* dir write */
+
+ struct {
+ inode_t *inode;
+ struct iatt buf;
+ struct iatt preparent;
+ struct iatt postparent;
+ struct iatt prenewparent;
+ struct iatt postnewparent;
+ } dir_fop; //common structure for all dir fops
+
+ struct {
+ fd_t *fd;
+ dict_t *params;
+ int32_t flags;
+ mode_t mode;
+ } create;
+
+ struct {
+ dev_t dev;
+ mode_t mode;
+ dict_t *params;
+ } mknod;
+
+ struct {
+ int32_t mode;
+ dict_t *params;
+ } mkdir;
+
+ struct {
+ int flags;
+ } rmdir;
+
+ struct {
+ dict_t *params;
+ char *linkpath;
+ } symlink;
struct {
- ino_t ino;
- mode_t mode;
- struct stat buf;
- struct stat read_child_buf;
- } fchmod;
+ int32_t mode;
+ off_t offset;
+ size_t len;
+ } fallocate;
struct {
- ino_t ino;
- uid_t uid;
- gid_t gid;
- struct stat buf;
- struct stat read_child_buf;
- } chown;
+ off_t offset;
+ size_t len;
+ } discard;
- struct {
- ino_t ino;
- uid_t uid;
- gid_t gid;
- struct stat buf;
- struct stat read_child_buf;
- } fchown;
-
- struct {
- ino_t ino;
- struct stat buf;
- struct stat read_child_buf;
+ struct {
+ off_t offset;
+ size_t len;
+ struct iatt prebuf;
+ struct iatt postbuf;
+ } zerofill;
- int32_t op_ret;
- struct iovec *vector;
- struct iobref *iobref;
- int32_t count;
- off_t offset;
- } writev;
+ } cont;
- struct {
- ino_t ino;
- off_t offset;
- struct stat buf;
- struct stat read_child_buf;
- } truncate;
+ struct {
+ off_t start, len;
- struct {
- ino_t ino;
- off_t offset;
- struct stat buf;
- struct stat read_child_buf;
- } ftruncate;
+ gf_boolean_t eager_lock_on;
+ int *eager_lock;
- struct {
- ino_t ino;
- struct timespec tv[2];
- struct stat buf;
- struct stat read_child_buf;
- } utimens;
+ char *basename;
+ char *new_basename;
- struct {
- dict_t *dict;
- int32_t flags;
- } setxattr;
+ loc_t parent_loc;
+ loc_t new_parent_loc;
- struct {
- const char *name;
- } removexattr;
+ afr_transaction_type type;
- /* dir write */
-
- struct {
- ino_t ino;
- fd_t *fd;
- int32_t flags;
- mode_t mode;
- inode_t *inode;
- struct stat buf;
- struct stat read_child_buf;
- } create;
+ /* pre-compute the post piggyback status before
+ entering POST-OP phase
+ */
+ int *postop_piggybacked;
- struct {
- ino_t ino;
- dev_t dev;
- mode_t mode;
- inode_t *inode;
- struct stat buf;
- struct stat read_child_buf;
- } mknod;
+ /* stub to resume on destruction
+ of the transaction frame */
+ call_stub_t *resume_stub;
- struct {
- ino_t ino;
- int32_t mode;
- inode_t *inode;
- struct stat buf;
- struct stat read_child_buf;
- } mkdir;
+ struct list_head eager_locked;
- struct {
- int32_t op_ret;
- int32_t op_errno;
- } unlink;
+ int32_t **txn_changelog;//changelog after pre+post ops
+ unsigned char *pre_op;
- struct {
- int32_t op_ret;
- int32_t op_errno;
- } rmdir;
+ call_frame_t *main_frame;
- struct {
- ino_t ino;
- struct stat buf;
- struct stat read_child_buf;
- } rename;
+ int (*fop) (call_frame_t *frame, xlator_t *this);
- struct {
- ino_t ino;
- inode_t *inode;
- struct stat buf;
- struct stat read_child_buf;
- } link;
+ int (*done) (call_frame_t *frame, xlator_t *this);
- struct {
- ino_t ino;
- inode_t *inode;
- struct stat buf;
- struct stat read_child_buf;
- char *linkpath;
- } symlink;
+ int (*resume) (call_frame_t *frame, xlator_t *this);
- struct {
- int32_t flags;
- dir_entry_t *entries;
- int32_t count;
- } setdents;
- } cont;
-
- struct {
- off_t start, len;
+ int (*unwind) (call_frame_t *frame, xlator_t *this);
- unsigned char *locked_nodes;
- int lock_count;
+ /* post-op hook */
+ } transaction;
- const char *basename;
- const char *new_basename;
+ afr_self_heal_t self_heal;
- loc_t parent_loc;
- loc_t new_parent_loc;
+ struct marker_str marker;
- afr_transaction_type type;
+ /* extra data for fops */
+ dict_t *xdata_req;
+ dict_t *xdata_rsp;
- int success_count;
- int erase_pending;
- int failure_count;
+ mode_t umask;
+ int xflag;
+ gf_boolean_t do_discovery;
+ struct afr_reply *replies;
+} afr_local_t;
+
+typedef enum {
+ AFR_FD_NOT_OPENED,
+ AFR_FD_OPENED,
+ AFR_FD_OPENING
+} afr_fd_open_status_t;
- int last_tried;
- int32_t *child_errno;
+typedef struct {
+ unsigned int *pre_op_done;
+ afr_fd_open_status_t *opened_on; /* which subvolumes the fd is open on */
+ unsigned int *pre_op_piggyback;
- call_frame_t *main_frame;
+ unsigned int *lock_piggyback;
+ unsigned int *lock_acquired;
- int (*fop) (call_frame_t *frame, xlator_t *this);
+ int flags;
+ uint64_t up_count; /* number of CHILD_UPs this fd has seen */
+ uint64_t down_count; /* number of CHILD_DOWNs this fd has seen */
- int (*done) (call_frame_t *frame, xlator_t *this);
+ int32_t last_tried;
- int (*resume) (call_frame_t *frame, xlator_t *this);
+ int hit, miss;
+ gf_boolean_t failed_over;
+ struct list_head entries; /* needed for readdir failover */
- int (*unwind) (call_frame_t *frame, xlator_t *this);
- } transaction;
+ unsigned char *locked_on; /* which subvolumes locks have been successful */
- afr_self_heal_t self_heal;
-} afr_local_t;
+ /* used for delayed-post-op optimization */
+ pthread_mutex_t delay_lock;
+ gf_timer_t *delay_timer;
+ call_frame_t *delay_frame;
+ int call_child;
+ /* set if any write on this fd was a non stable write
+ (i.e, without O_SYNC or O_DSYNC)
+ */
+ gf_boolean_t witnessed_unstable_write;
-typedef struct {
- unsigned char pre_op_done;
- unsigned char *child_failed;
+ /* list of frames currently in progress */
+ struct list_head eager_locked;
} afr_fd_ctx_t;
/* try alloc and if it fails, goto label */
-#define ALLOC_OR_GOTO(var, type, label) do { \
- var = CALLOC (sizeof (type), 1); \
- if (!var) { \
- gf_log (this->name, GF_LOG_ERROR, \
- "out of memory :("); \
- op_errno = ENOMEM; \
- goto label; \
- } \
- } while (0);
+#define AFR_LOCAL_ALLOC_OR_GOTO(var, label) do { \
+ var = mem_get0 (THIS->local_pool); \
+ if (!var) { \
+ gf_log (this->name, GF_LOG_ERROR, \
+ "out of memory :("); \
+ op_errno = ENOMEM; \
+ goto label; \
+ } \
+ } while (0);
/* did a call fail due to a child failing? */
-#define child_went_down(op_ret, op_errno) (((op_ret) < 0) && \
- ((op_errno == ENOTCONN) || \
- (op_errno == EBADFD)))
+#define child_went_down(op_ret, op_errno) (((op_ret) < 0) && \
+ ((op_errno == ENOTCONN) || \
+ (op_errno == EBADFD)))
#define afr_fop_failed(op_ret, op_errno) ((op_ret) == -1)
/* have we tried all children? */
#define all_tried(i, count) ((i) == (count) - 1)
+int32_t
+afr_set_dict_gfid (dict_t *dict, uuid_t gfid);
+
int
-afr_fd_ctx_set (xlator_t *this, fd_t *fd);
+pump_command_reply (call_frame_t *frame, xlator_t *this);
-uint64_t
-afr_read_child (xlator_t *this, inode_t *inode);
+int32_t
+afr_notify (xlator_t *this, int32_t event, void *data, void *data2);
+
+int
+afr_init_entry_lockee (afr_entry_lockee_t *lockee, afr_local_t *local,
+ loc_t *loc, char *basename, int child_count);
void
-afr_set_read_child (xlator_t *this, inode_t *inode, int32_t read_child);
+afr_entry_lockee_cleanup (afr_internal_lock_t *int_lock);
+
+int
+afr_attempt_lock_recovery (xlator_t *this, int32_t child_index);
+
+int
+afr_save_locked_fd (xlator_t *this, fd_t *fd);
+
+int
+afr_mark_locked_nodes (xlator_t *this, fd_t *fd,
+ unsigned char *locked_nodes);
void
-afr_build_parent_loc (loc_t *parent, loc_t *child);
+afr_set_lk_owner (call_frame_t *frame, xlator_t *this, void *lk_owner);
int
-afr_up_children_count (int child_count, unsigned char *child_up);
+afr_set_lock_number (call_frame_t *frame, xlator_t *this);
+
+
+loc_t *
+lower_path (loc_t *l1, const char *b1, loc_t *l2, const char *b2);
+
+int32_t
+afr_unlock (call_frame_t *frame, xlator_t *this);
int
-afr_locked_nodes_count (unsigned char *locked_nodes, int child_count);
+afr_nonblocking_entrylk (call_frame_t *frame, xlator_t *this);
+
+int
+afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this);
+
+int
+afr_blocking_lock (call_frame_t *frame, xlator_t *this);
+
+int
+afr_internal_lock_finish (call_frame_t *frame, xlator_t *this);
int
-afr_first_up_child (afr_private_t *priv);
+afr_lk_transfer_datalock (call_frame_t *dst, call_frame_t *src, char *dom,
+ unsigned int child_count);
+
+int pump_start (call_frame_t *frame, xlator_t *this);
+
+int
+__afr_fd_ctx_set (xlator_t *this, fd_t *fd);
+
+int
+afr_fd_ctx_set (xlator_t *this, fd_t *fd);
+
+int32_t
+afr_inode_get_read_ctx (xlator_t *this, inode_t *inode, int32_t *fresh_children);
+
+void
+afr_inode_set_read_ctx (xlator_t *this, inode_t *inode, int32_t read_child,
+ int32_t *fresh_children);
+
+int
+afr_build_parent_loc (loc_t *parent, loc_t *child, int32_t *op_errno);
+
+unsigned int
+afr_up_children_count (unsigned char *child_up, unsigned int child_count);
-ino64_t
-afr_itransform (ino64_t ino, int child_count, int child_index);
+unsigned int
+afr_locked_children_count (unsigned char *children, unsigned int child_count);
+
+unsigned int
+afr_pre_op_done_children_count (unsigned char *pre_op,
+ unsigned int child_count);
+
+gf_boolean_t
+afr_is_fresh_lookup (loc_t *loc, xlator_t *this);
+
+void
+afr_update_loc_gfids (loc_t *loc, struct iatt *buf, struct iatt *postparent);
int
-afr_deitransform (ino64_t ino, int child_count);
+afr_locked_nodes_count (unsigned char *locked_nodes, int child_count);
void
afr_local_cleanup (afr_local_t *local, xlator_t *this);
@@ -504,101 +937,276 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this);
int
afr_frame_return (call_frame_t *frame);
-#define AFR_STACK_UNWIND(frame, params ...) \
- do { \
- afr_local_t *__local = NULL; \
- xlator_t *__this = NULL; \
- __local = frame->local; \
- __this = frame->this; \
- frame->local = NULL; \
- STACK_UNWIND (frame, params); \
- afr_local_cleanup (__local, __this); \
- free (__local); \
-} while (0);
-
-#define AFR_STACK_DESTROY(frame) \
- do { \
- afr_local_t *__local = NULL; \
- xlator_t *__this = NULL; \
- __local = frame->local; \
- __this = frame->this; \
- frame->local = NULL; \
- STACK_DESTROY (frame->root); \
- afr_local_cleanup (__local, __this); \
- free (__local); \
-} while (0);
+gf_boolean_t
+afr_is_split_brain (xlator_t *this, inode_t *inode);
+
+void
+afr_set_split_brain (xlator_t *this, inode_t *inode, afr_spb_state_t mdata_spb,
+ afr_spb_state_t data_spb);
+
+int
+afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata);
+
+void
+afr_set_opendir_done (xlator_t *this, inode_t *inode);
+
+gf_boolean_t
+afr_is_opendir_done (xlator_t *this, inode_t *inode);
+void
+afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this);
+
+int
+afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd);
+
+int
+afr_launch_openfd_self_heal (call_frame_t *frame, xlator_t *this, fd_t *fd);
+
+#define AFR_STACK_UNWIND(fop, frame, params ...) \
+ do { \
+ afr_local_t *__local = NULL; \
+ xlator_t *__this = NULL; \
+ if (frame) { \
+ __local = frame->local; \
+ __this = frame->this; \
+ frame->local = NULL; \
+ } \
+ STACK_UNWIND_STRICT (fop, frame, params); \
+ if (__local) { \
+ afr_local_cleanup (__local, __this); \
+ mem_put (__local); \
+ } \
+ } while (0)
+
+#define AFR_STACK_DESTROY(frame) \
+ do { \
+ afr_local_t *__local = NULL; \
+ xlator_t *__this = NULL; \
+ __local = frame->local; \
+ __this = frame->this; \
+ frame->local = NULL; \
+ STACK_DESTROY (frame->root); \
+ if (__local) { \
+ afr_local_cleanup (__local, __this); \
+ mem_put (__local); \
+ } \
+ } while (0);
+
+#define AFR_NUM_CHANGE_LOGS 3 /*data + metadata + entry*/
/* allocate and return a string that is the basename of argument */
-static inline char *
-AFR_BASENAME (const char *str)
+static inline char *
+AFR_BASENAME (const char *str)
{
- char *__tmp_str = NULL;
- char *__basename_str = NULL;
- __tmp_str = strdup (str);
- __basename_str = strdup (basename (__tmp_str));
- FREE (__tmp_str);
- return __basename_str;
+ char *__tmp_str = NULL;
+ char *__basename_str = NULL;
+ __tmp_str = gf_strdup (str);
+ __basename_str = gf_strdup (basename (__tmp_str));
+ GF_FREE (__tmp_str);
+ return __basename_str;
}
-/* initialize local_t */
-static inline int
-AFR_LOCAL_INIT (afr_local_t *local, afr_private_t *priv)
-{
- local->child_up = CALLOC (sizeof (*local->child_up),
- priv->child_count);
- if (!local->child_up) {
- return -ENOMEM;
- }
+int
+afr_transaction_local_init (afr_local_t *local, xlator_t *this);
- memcpy (local->child_up, priv->child_up,
- sizeof (*local->child_up) * priv->child_count);
+int32_t
+afr_marker_getxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name,afr_local_t *local, afr_private_t *priv );
+int32_t *
+afr_children_create (int32_t child_count);
- local->call_count = afr_up_children_count (priv->child_count, local->child_up);
- if (local->call_count == 0)
- return -ENOTCONN;
+int
+afr_local_init (afr_local_t *local, afr_private_t *priv, int32_t *op_errno);
- local->transaction.erase_pending = 1;
+int
+afr_internal_lock_init (afr_internal_lock_t *lk, size_t child_count,
+ transaction_lk_type_t lk_type);
- local->op_ret = -1;
- local->op_errno = EUCLEAN;
+int
+afr_first_up_child (unsigned char *child_up, size_t child_count);
- return 0;
-}
+int
+afr_select_read_child_from_policy (int32_t *fresh_children, int32_t child_count,
+ int32_t prev_read_child,
+ int32_t config_read_child, int32_t *sources,
+ unsigned int hmode, uuid_t gfid);
+
+void
+afr_set_read_ctx_from_policy (xlator_t *this, inode_t *inode,
+ int32_t *fresh_children, int32_t prev_read_child,
+ int32_t config_read_child, uuid_t gfid);
+
+int32_t
+afr_get_call_child (xlator_t *this, unsigned char *child_up, int32_t read_child,
+ int32_t *fresh_children,
+ int32_t *call_child, int32_t *last_index);
+
+int32_t
+afr_next_call_child (int32_t *fresh_children, unsigned char *child_up,
+ size_t child_count, int32_t *last_index,
+ int32_t read_child);
+void
+afr_get_fresh_children (int32_t *success_children, int32_t *sources,
+ int32_t *children, unsigned int child_count);
+void
+afr_children_add_child (int32_t *children, int32_t child,
+ int32_t child_count);
+void
+afr_children_rm_child (int32_t *children, int32_t child,
+ int32_t child_count);
+void
+afr_reset_children (int32_t *children, int32_t child_count);
+int32_t
+afr_most_important_error(int32_t old_errno, int32_t new_errno,
+ gf_boolean_t eio);
+int
+afr_errno_count (int32_t *children, int *child_errno,
+ unsigned int child_count, int32_t op_errno);
+int
+afr_get_children_count (int32_t *children, unsigned int child_count);
+gf_boolean_t
+afr_is_child_present (int32_t *success_children, int32_t child_count,
+ int32_t child);
+void
+afr_update_gfid_from_iatts (uuid_t uuid, struct iatt *bufs,
+ int32_t *success_children,
+ unsigned int child_count);
+void
+afr_reset_xattr (dict_t **xattr, unsigned int child_count);
+gf_boolean_t
+afr_conflicting_iattrs (struct iatt *bufs, int32_t *success_children,
+ unsigned int child_count, const char *path,
+ const char *xlator_name);
+unsigned int
+afr_gfid_missing_count (const char *xlator_name, int32_t *children,
+ struct iatt *bufs, unsigned int child_count,
+ const char *path);
+void
+afr_xattr_req_prepare (xlator_t *this, dict_t *xattr_req, const char *path);
+void
+afr_children_copy (int32_t *dst, int32_t *src, unsigned int child_count);
+afr_transaction_type
+afr_transaction_type_get (ia_type_t ia_type);
+int32_t
+afr_resultant_errno_get (int32_t *children,
+ int *child_errno, unsigned int child_count);
+void
+afr_inode_rm_stale_children (xlator_t *this, inode_t *inode,
+ int32_t *stale_children);
+void
+afr_launch_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ gf_boolean_t background, ia_type_t ia_type, char *reason,
+ void (*gfid_sh_success_cbk) (call_frame_t *sh_frame,
+ xlator_t *this),
+ int (*unwind) (call_frame_t *frame, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ int32_t sh_failed));
+void
+afr_fix_open (xlator_t *this, fd_t *fd, size_t need_open_count, int *need_open);
+
+void
+afr_open_fd_fix (fd_t *fd, xlator_t *this);
+int
+afr_set_elem_count_get (unsigned char *elems, int child_count);
+afr_fd_ctx_t *
+afr_fd_ctx_get (fd_t *fd, xlator_t *this);
-static inline int
-afr_transaction_local_init (afr_local_t *local, afr_private_t *priv)
-{
- int i;
-
- local->child_errno = CALLOC (sizeof (*local->child_errno),
- priv->child_count);
- if (!local->child_errno) {
- return -ENOMEM;
- }
-
- local->pending = CALLOC (sizeof (*local->pending),
- priv->child_count);
-
- if (!local->pending) {
- return -ENOMEM;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- local->pending[i] = CALLOC (sizeof (*local->pending[i]),
- 3); /* data + metadata + entry */
- if (!local->pending[i])
- return -ENOMEM;
- }
-
- local->transaction.locked_nodes = CALLOC (sizeof (*local->transaction.locked_nodes),
- priv->child_count);
+gf_boolean_t
+afr_open_only_data_self_heal (char *data_self_heal);
- local->transaction.child_errno = CALLOC (sizeof (*local->transaction.child_errno),
- priv->child_count);
+gf_boolean_t
+afr_data_self_heal_enabled (char *data_self_heal);
- return 0;
-}
+void
+afr_set_low_priority (call_frame_t *frame);
+int
+afr_child_fd_ctx_set (xlator_t *this, fd_t *fd, int32_t child,
+ int flags);
+
+gf_boolean_t
+afr_have_quorum (char *logname, afr_private_t *priv);
+
+void
+afr_matrix_cleanup (int32_t **pending, unsigned int m);
+
+int32_t**
+afr_matrix_create (unsigned int m, unsigned int n);
+
+gf_boolean_t
+afr_is_errno_set (int *child_errno, int child);
+
+gf_boolean_t
+afr_is_errno_unset (int *child_errno, int child);
+
+gf_boolean_t
+afr_is_fd_fixable (fd_t *fd);
+
+void
+afr_prepare_new_entry_pending_matrix (int32_t **pending,
+ gf_boolean_t (*is_pending) (int *, int),
+ int *ctx, struct iatt *buf,
+ unsigned int child_count);
+void
+afr_xattr_array_destroy (dict_t **xattr, unsigned int child_count);
+/*
+ * Special value indicating we should use the "auto" quorum method instead of
+ * a fixed value (including zero to turn off quorum enforcement).
+ */
+#define AFR_QUORUM_AUTO INT_MAX
+
+/*
+ * Having this as a macro will make debugging a bit weirder, but does reduce
+ * the probability of functions handling this check inconsistently.
+ */
+#define QUORUM_CHECK(_func,_label) do { \
+ if (priv->quorum_count && !afr_have_quorum(this->name,priv)) { \
+ gf_log(this->name,GF_LOG_WARNING, \
+ "failing "#_func" due to lack of quorum"); \
+ op_errno = EROFS; \
+ goto _label; \
+ } \
+} while (0);
+
+
+#define AFR_SBRAIN_MSG "Failed on %s as split-brain is seen. Returning EIO."
+
+#define AFR_SBRAIN_CHECK_FD(fd, label) do { \
+ if (fd->inode && afr_is_split_brain (this, fd->inode)) { \
+ op_errno = EIO; \
+ gf_log (this->name, GF_LOG_WARNING, \
+ AFR_SBRAIN_MSG ,uuid_utoa (fd->inode->gfid)); \
+ goto label; \
+ } \
+} while (0)
+
+#define AFR_SBRAIN_CHECK_LOC(loc, label) do { \
+ if (loc->inode && afr_is_split_brain (this, loc->inode)) { \
+ op_errno = EIO; \
+ loc_path (loc, NULL); \
+ gf_log (this->name, GF_LOG_WARNING, \
+ AFR_SBRAIN_MSG , loc->path); \
+ goto label; \
+ } \
+} while (0)
+
+int
+afr_fd_report_unstable_write (xlator_t *this, fd_t *fd);
+
+gf_boolean_t
+afr_fd_has_witnessed_unstable_write (xlator_t *this, fd_t *fd);
+
+void
+afr_delayed_changelog_wake_resume (xlator_t *this, fd_t *fd, call_stub_t *stub);
+
+int
+afr_inodelk_init (afr_inodelk_t *lk, char *dom, size_t child_count);
+
+void
+afr_handle_open_fd_count (call_frame_t *frame, xlator_t *this);
+
+afr_inode_ctx_t*
+afr_inode_ctx_get (inode_t *inode, xlator_t *this);
#endif /* __AFR_H__ */
diff --git a/xlators/cluster/afr/src/pump.c b/xlators/cluster/afr/src/pump.c
new file mode 100644
index 000000000..a7f72fb30
--- /dev/null
+++ b/xlators/cluster/afr/src/pump.c
@@ -0,0 +1,2641 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <unistd.h>
+#include <sys/time.h>
+#include <stdlib.h>
+#include <fnmatch.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "afr-common.c"
+#include "defaults.c"
+#include "glusterfs.h"
+
+static uint64_t pump_pid = 0;
+static inline void
+pump_fill_loc_info (loc_t *loc, struct iatt *iatt, struct iatt *parent)
+{
+ afr_update_loc_gfids (loc, iatt, parent);
+ uuid_copy (loc->inode->gfid, iatt->ia_gfid);
+}
+
+static int
+pump_mark_start_pending (xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+
+ priv = this->private;
+ pump_priv = priv->pump_private;
+
+ pump_priv->pump_start_pending = 1;
+
+ return 0;
+}
+
+static int
+is_pump_start_pending (xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+
+ priv = this->private;
+ pump_priv = priv->pump_private;
+
+ return (pump_priv->pump_start_pending);
+}
+
+static int
+pump_remove_start_pending (xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+
+ priv = this->private;
+ pump_priv = priv->pump_private;
+
+ pump_priv->pump_start_pending = 0;
+
+ return 0;
+}
+
+static pump_state_t
+pump_get_state ()
+{
+ xlator_t *this = NULL;
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+
+ pump_state_t ret;
+
+ this = THIS;
+ priv = this->private;
+ pump_priv = priv->pump_private;
+
+ LOCK (&pump_priv->pump_state_lock);
+ {
+ ret = pump_priv->pump_state;
+ }
+ UNLOCK (&pump_priv->pump_state_lock);
+
+ return ret;
+}
+
+int
+pump_change_state (xlator_t *this, pump_state_t state)
+{
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+
+ pump_state_t state_old;
+ pump_state_t state_new;
+
+
+ priv = this->private;
+ pump_priv = priv->pump_private;
+
+ GF_ASSERT (pump_priv);
+
+ LOCK (&pump_priv->pump_state_lock);
+ {
+ state_old = pump_priv->pump_state;
+ state_new = state;
+
+ pump_priv->pump_state = state;
+
+ }
+ UNLOCK (&pump_priv->pump_state_lock);
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Pump changing state from %d to %d",
+ state_old,
+ state_new);
+
+ return 0;
+}
+
+static int
+pump_set_resume_path (xlator_t *this, const char *path)
+{
+ int ret = 0;
+
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+
+ priv = this->private;
+ pump_priv = priv->pump_private;
+
+ GF_ASSERT (pump_priv);
+
+ LOCK (&pump_priv->resume_path_lock);
+ {
+ strncpy (pump_priv->resume_path, path, strlen (path) + 1);
+ }
+ UNLOCK (&pump_priv->resume_path_lock);
+
+ return ret;
+}
+
+static int
+pump_save_path (xlator_t *this, const char *path)
+{
+ afr_private_t *priv = NULL;
+ pump_state_t state;
+ dict_t *dict = NULL;
+ loc_t loc = {0};
+ int dict_ret = 0;
+ int ret = -1;
+
+ state = pump_get_state ();
+ if (state == PUMP_STATE_RESUME)
+ return 0;
+
+ priv = this->private;
+
+ GF_ASSERT (priv->root_inode);
+
+ afr_build_root_loc (this, &loc);
+
+ dict = dict_new ();
+ dict_ret = dict_set_str (dict, PUMP_PATH, (char *)path);
+ if (dict_ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to set the key %s", path, PUMP_PATH);
+
+ ret = syncop_setxattr (PUMP_SOURCE_CHILD (this), &loc, dict, 0);
+
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "setxattr failed - could not save path=%s", path);
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "setxattr succeeded - saved path=%s", path);
+ }
+
+ dict_unref (dict);
+
+ loc_wipe (&loc);
+ return 0;
+}
+
+static int
+pump_check_and_update_status (xlator_t *this)
+{
+ pump_state_t state;
+ int ret = -1;
+
+ state = pump_get_state ();
+
+ switch (state) {
+
+ case PUMP_STATE_RESUME:
+ case PUMP_STATE_RUNNING:
+ {
+ ret = 0;
+ break;
+ }
+ case PUMP_STATE_PAUSE:
+ {
+ ret = -1;
+ break;
+ }
+ case PUMP_STATE_ABORT:
+ {
+ pump_save_path (this, "/");
+ ret = -1;
+ break;
+ }
+ default:
+ {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Unknown pump state");
+ ret = -1;
+ break;
+ }
+
+ }
+
+ return ret;
+}
+
+static const char *
+pump_get_resume_path (xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+
+ const char *resume_path = NULL;
+
+ priv = this->private;
+ pump_priv = priv->pump_private;
+
+ resume_path = pump_priv->resume_path;
+
+ return resume_path;
+}
+
+static int
+pump_update_resume_state (xlator_t *this, const char *path)
+{
+ pump_state_t state;
+ const char *resume_path = NULL;
+
+ state = pump_get_state ();
+
+ if (state == PUMP_STATE_RESUME) {
+ resume_path = pump_get_resume_path (this);
+ if (strcmp (resume_path, "/") == 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Reached the resume path (/). Proceeding to change state"
+ " to running");
+ pump_change_state (this, PUMP_STATE_RUNNING);
+ } else if (strcmp (resume_path, path) == 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Reached the resume path. Proceeding to change state"
+ " to running");
+ pump_change_state (this, PUMP_STATE_RUNNING);
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Not yet hit the resume path:res-path=%s,path=%s",
+ resume_path, path);
+ }
+ }
+
+ return 0;
+}
+
+static gf_boolean_t
+is_pump_traversal_allowed (xlator_t *this, const char *path)
+{
+ pump_state_t state;
+ const char *resume_path = NULL;
+ gf_boolean_t ret = _gf_true;
+
+ state = pump_get_state ();
+
+ if (state == PUMP_STATE_RESUME) {
+ resume_path = pump_get_resume_path (this);
+ if (strstr (resume_path, path)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "On the right path to resumption path");
+ ret = _gf_true;
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Not the right path to resuming=> ignoring traverse");
+ ret = _gf_false;
+ }
+ }
+
+ return ret;
+}
+
+static int
+pump_save_file_stats (xlator_t *this, const char *path)
+{
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+
+ priv = this->private;
+ pump_priv = priv->pump_private;
+
+ LOCK (&pump_priv->resume_path_lock);
+ {
+ pump_priv->number_files_pumped++;
+
+ strncpy (pump_priv->current_file, path,
+ PATH_MAX);
+ }
+ UNLOCK (&pump_priv->resume_path_lock);
+
+ return 0;
+}
+
+static int
+gf_pump_traverse_directory (loc_t *loc)
+{
+ xlator_t *this = NULL;
+ fd_t *fd = NULL;
+ off_t offset = 0;
+ loc_t entry_loc = {0};
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmp = NULL;
+ gf_dirent_t entries;
+ struct iatt iatt = {0};
+ struct iatt parent = {0};
+ dict_t *xattr_rsp = NULL;
+ int ret = 0;
+ gf_boolean_t is_directory_empty = _gf_true;
+ gf_boolean_t free_entries = _gf_false;
+
+ INIT_LIST_HEAD (&entries.list);
+ this = THIS;
+
+ GF_ASSERT (loc->inode);
+
+ fd = fd_create (loc->inode, pump_pid);
+ if (!fd) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to create fd for %s", loc->path);
+ goto out;
+ }
+
+ ret = syncop_opendir (this, loc, fd);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "opendir failed on %s", loc->path);
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "pump opendir on %s returned=%d",
+ loc->path, ret);
+
+ while (syncop_readdirp (this, fd, 131072, offset, NULL, &entries)) {
+ free_entries = _gf_true;
+
+ if (list_empty (&entries.list)) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "no more entries in directory");
+ goto out;
+ }
+
+ list_for_each_entry_safe (entry, tmp, &entries.list, list) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "found readdir entry=%s", entry->d_name);
+
+ offset = entry->d_off;
+ if (uuid_is_null (entry->d_stat.ia_gfid)) {
+ gf_log (this->name, GF_LOG_WARNING, "%s/%s: No "
+ "gfid present skipping",
+ loc->path, entry->d_name);
+ continue;
+ }
+ loc_wipe (&entry_loc);
+ ret = afr_build_child_loc (this, &entry_loc, loc,
+ entry->d_name);
+ if (ret)
+ goto out;
+
+ if (!IS_ENTRY_CWD (entry->d_name) &&
+ !IS_ENTRY_PARENT (entry->d_name)) {
+
+ is_directory_empty = _gf_false;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "lookup %s => %"PRId64,
+ entry_loc.path,
+ iatt.ia_ino);
+
+ ret = syncop_lookup (this, &entry_loc, NULL,
+ &iatt, &xattr_rsp, &parent);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: lookup failed",
+ entry_loc.path);
+ continue;
+ }
+ pump_fill_loc_info (&entry_loc, &iatt,
+ &parent);
+
+ pump_update_resume_state (this, entry_loc.path);
+
+ pump_save_path (this, entry_loc.path);
+ pump_save_file_stats (this, entry_loc.path);
+
+ ret = pump_check_and_update_status (this);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Pump beginning to exit out");
+ goto out;
+ }
+
+ if (IA_ISDIR (iatt.ia_type)) {
+ if (is_pump_traversal_allowed (this, entry_loc.path)) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "entering dir=%s",
+ entry->d_name);
+ gf_pump_traverse_directory (&entry_loc);
+ }
+ }
+ }
+ }
+
+ gf_dirent_free (&entries);
+ free_entries = _gf_false;
+ gf_log (this->name, GF_LOG_TRACE,
+ "offset incremented to %d",
+ (int32_t ) offset);
+
+ }
+
+ ret = syncop_close (fd);
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_DEBUG, "closing the fd failed");
+
+ if (is_directory_empty && IS_ROOT_PATH (loc->path)) {
+ pump_change_state (this, PUMP_STATE_RUNNING);
+ gf_log (this->name, GF_LOG_INFO, "Empty source brick. "
+ "Nothing to be done.");
+ }
+
+out:
+ if (entry_loc.path)
+ loc_wipe (&entry_loc);
+ if (free_entries)
+ gf_dirent_free (&entries);
+ return 0;
+}
+
+static int
+pump_update_resume_path (xlator_t *this)
+{
+ const char *resume_path = NULL;
+
+ resume_path = pump_get_resume_path (this);
+
+ if (resume_path) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Found a path to resume from: %s",
+ resume_path);
+
+ }else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Did not find a path=> setting to '/'");
+ pump_set_resume_path (this, "/");
+ }
+
+ pump_change_state (this, PUMP_STATE_RESUME);
+
+ return 0;
+}
+
+static int32_t
+pump_xattr_cleaner (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ loc_t loc = {0};
+ int i = 0;
+ int ret = 0;
+ int source = 0;
+ int sink = 1;
+
+ priv = this->private;
+
+ afr_build_root_loc (this, &loc);
+
+ ret = syncop_removexattr (priv->children[source], &loc,
+ PUMP_PATH);
+
+ ret = syncop_removexattr (priv->children[sink], &loc,
+ PUMP_SINK_COMPLETE);
+
+ for (i = 0; i < priv->child_count; i++) {
+ ret = syncop_removexattr (priv->children[i], &loc,
+ PUMP_SOURCE_COMPLETE);
+ if (ret)
+ gf_log (this->name, GF_LOG_DEBUG, "removexattr "
+ "failed with %s", strerror (errno));
+ }
+
+ loc_wipe (&loc);
+ return pump_command_reply (frame, this);
+}
+
+static int
+pump_complete_migration (xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+ dict_t *dict = NULL;
+ pump_state_t state;
+ loc_t loc = {0};
+ int dict_ret = 0;
+ int ret = -1;
+
+ priv = this->private;
+ pump_priv = priv->pump_private;
+
+ GF_ASSERT (priv->root_inode);
+
+ afr_build_root_loc (this, &loc);
+
+ dict = dict_new ();
+
+ state = pump_get_state ();
+ if (state == PUMP_STATE_RUNNING) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Pump finished pumping");
+
+ pump_priv->pump_finished = _gf_true;
+
+ dict_ret = dict_set_str (dict, PUMP_SOURCE_COMPLETE, "jargon");
+ if (dict_ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to set the key %s",
+ loc.path, PUMP_SOURCE_COMPLETE);
+
+ ret = syncop_setxattr (PUMP_SOURCE_CHILD (this), &loc, dict, 0);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "setxattr failed - while notifying source complete");
+ }
+ dict_ret = dict_set_str (dict, PUMP_SINK_COMPLETE, "jargon");
+ if (dict_ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to set the key %s",
+ loc.path, PUMP_SINK_COMPLETE);
+
+ ret = syncop_setxattr (PUMP_SINK_CHILD (this), &loc, dict, 0);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "setxattr failed - while notifying sink complete");
+ }
+
+ pump_save_path (this, "/");
+
+ } else if (state == PUMP_STATE_ABORT) {
+ gf_log (this->name, GF_LOG_DEBUG, "Starting cleanup "
+ "of pump internal xattrs");
+ call_resume (pump_priv->cleaner);
+ }
+
+ loc_wipe (&loc);
+ return 0;
+}
+
+static int
+pump_lookup_sink (loc_t *loc)
+{
+ xlator_t *this = NULL;
+ struct iatt iatt, parent;
+ dict_t *xattr_rsp;
+ dict_t *xattr_req = NULL;
+ int ret = 0;
+
+ this = THIS;
+
+ xattr_req = dict_new ();
+
+ ret = afr_set_root_gfid (xattr_req);
+ if (ret)
+ goto out;
+
+ ret = syncop_lookup (PUMP_SINK_CHILD (this), loc,
+ xattr_req, &iatt, &xattr_rsp, &parent);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Lookup on sink child failed");
+ goto out;
+ }
+
+out:
+ if (xattr_req)
+ dict_unref (xattr_req);
+
+ return ret;
+}
+
+static int
+pump_task (void *data)
+{
+ xlator_t *this = NULL;
+ afr_private_t *priv = NULL;
+
+
+ loc_t loc = {0};
+ struct iatt iatt, parent;
+ dict_t *xattr_rsp = NULL;
+ dict_t *xattr_req = NULL;
+
+ int ret = -1;
+
+ this = THIS;
+ priv = this->private;
+
+ GF_ASSERT (priv->root_inode);
+
+ afr_build_root_loc (this, &loc);
+ xattr_req = dict_new ();
+ if (!xattr_req) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Out of memory");
+ ret = -1;
+ goto out;
+ }
+
+ afr_set_root_gfid (xattr_req);
+ ret = syncop_lookup (this, &loc, xattr_req,
+ &iatt, &xattr_rsp, &parent);
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "lookup: path=%s gfid=%s",
+ loc.path, uuid_utoa (loc.inode->gfid));
+
+ ret = pump_check_and_update_status (this);
+ if (ret < 0) {
+ goto out;
+ }
+
+ pump_update_resume_path (this);
+
+ afr_set_root_gfid (xattr_req);
+ ret = pump_lookup_sink (&loc);
+ if (ret) {
+ pump_update_resume_path (this);
+ goto out;
+ }
+
+ gf_pump_traverse_directory (&loc);
+
+ pump_complete_migration (this);
+out:
+ if (xattr_req)
+ dict_unref (xattr_req);
+
+ loc_wipe (&loc);
+ return 0;
+}
+
+
+static int
+pump_task_completion (int ret, call_frame_t *sync_frame, void *data)
+{
+ xlator_t *this = NULL;
+ afr_private_t *priv = NULL;
+
+ this = THIS;
+
+ priv = this->private;
+
+ inode_unref (priv->root_inode);
+ STACK_DESTROY (sync_frame->root);
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Pump xlator exiting");
+ return 0;
+}
+
+int
+pump_start (call_frame_t *pump_frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+
+ int ret = -1;
+
+ priv = this->private;
+ pump_priv = priv->pump_private;
+
+ afr_set_lk_owner (pump_frame, this, pump_frame->root);
+ pump_pid = (uint64_t) (unsigned long)pump_frame->root;
+
+ ret = synctask_new (pump_priv->env, pump_task,
+ pump_task_completion,
+ pump_frame, NULL);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "starting pump failed");
+ pump_change_state (this, PUMP_STATE_ABORT);
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "setting pump as started lk_owner: %s %"PRIu64,
+ lkowner_utoa (&pump_frame->root->lk_owner), pump_pid);
+
+ priv->use_afr_in_pump = 1;
+out:
+ return ret;
+}
+
+static int
+pump_start_synctask (xlator_t *this)
+{
+ call_frame_t *frame = NULL;
+ int ret = 0;
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out of memory");
+ ret = -1;
+ goto out;
+ }
+
+ pump_change_state (this, PUMP_STATE_RUNNING);
+
+ ret = pump_start (frame, this);
+
+out:
+ return ret;
+}
+
+int32_t
+pump_cmd_start_setxattr_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+
+{
+ call_frame_t *prev = NULL;
+ afr_local_t *local = NULL;
+ int ret = 0;
+
+ local = frame->local;
+
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not initiate destination "
+ "brick connect");
+ ret = op_ret;
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Successfully initiated destination "
+ "brick connect");
+
+ pump_mark_start_pending (this);
+
+ /* send the PARENT_UP as pump is ready now */
+ prev = cookie;
+ if (prev && prev->this)
+ prev->this->notify (prev->this, GF_EVENT_PARENT_UP, this);
+
+out:
+ local->op_ret = ret;
+ pump_command_reply (frame, this);
+
+ return 0;
+}
+
+static int
+pump_initiate_sink_connect (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ dict_t *dict = NULL;
+ data_t *data = NULL;
+ char *clnt_cmd = NULL;
+ loc_t loc = {0};
+
+ int ret = 0;
+
+ priv = this->private;
+ local = frame->local;
+
+ GF_ASSERT (priv->root_inode);
+
+ afr_build_root_loc (this, &loc);
+
+ data = data_ref (dict_get (local->dict, RB_PUMP_CMD_START));
+ if (!data) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not get destination brick value");
+ goto out;
+ }
+
+ dict = dict_new ();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ clnt_cmd = GF_CALLOC (1, data->len+1, gf_common_mt_char);
+ if (!clnt_cmd) {
+ ret = -1;
+ goto out;
+ }
+
+ memcpy (clnt_cmd, data->data, data->len);
+ clnt_cmd[data->len] = '\0';
+ gf_log (this->name, GF_LOG_DEBUG, "Got destination brick %s\n",
+ clnt_cmd);
+
+ ret = dict_set_dynstr (dict, CLIENT_CMD_CONNECT, clnt_cmd);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not inititiate destination brick "
+ "connect");
+ goto out;
+ }
+
+ STACK_WIND (frame,
+ pump_cmd_start_setxattr_cbk,
+ PUMP_SINK_CHILD(this),
+ PUMP_SINK_CHILD(this)->fops->setxattr,
+ &loc,
+ dict,
+ 0, NULL);
+
+ ret = 0;
+
+out:
+ if (dict)
+ dict_unref (dict);
+
+ if (data)
+ data_unref (data);
+
+ if (ret && clnt_cmd)
+ GF_FREE (clnt_cmd);
+
+ loc_wipe (&loc);
+ return ret;
+}
+
+static int
+is_pump_aborted (xlator_t *this)
+{
+ pump_state_t state;
+
+ state = pump_get_state ();
+
+ return ((state == PUMP_STATE_ABORT));
+}
+
+int32_t
+pump_cmd_start_getxattr_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ char *path = NULL;
+
+ pump_state_t state;
+ int ret = 0;
+ int need_unwind = 0;
+ int dict_ret = -1;
+
+ local = frame->local;
+
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "getxattr failed - changing pump "
+ "state to RUNNING with '/'");
+ path = "/";
+ ret = op_ret;
+ } else {
+ gf_log (this->name, GF_LOG_TRACE,
+ "getxattr succeeded");
+
+ dict_ret = dict_get_str (dict, PUMP_PATH, &path);
+ if (dict_ret < 0)
+ path = "/";
+ }
+
+ state = pump_get_state ();
+ if ((state == PUMP_STATE_RUNNING) ||
+ (state == PUMP_STATE_RESUME)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Pump is already started");
+ ret = -1;
+ goto out;
+ }
+
+ pump_set_resume_path (this, path);
+
+ if (is_pump_aborted (this))
+ /* We're re-starting pump afresh */
+ ret = pump_initiate_sink_connect (frame, this);
+ else {
+ /* We're re-starting pump from a previous
+ pause */
+ gf_log (this->name, GF_LOG_DEBUG,
+ "about to start synctask");
+ ret = pump_start_synctask (this);
+ need_unwind = 1;
+ }
+
+out:
+ if ((ret < 0) || (need_unwind == 1)) {
+ local->op_ret = ret;
+ pump_command_reply (frame, this);
+ }
+ return 0;
+}
+
+int
+pump_execute_status (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+
+ uint64_t number_files = 0;
+
+ char filename[PATH_MAX];
+ char summary[PATH_MAX+256];
+ char *dict_str = NULL;
+
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+
+ dict_t *dict = NULL;
+ int ret = -1;
+
+ priv = this->private;
+ pump_priv = priv->pump_private;
+
+ LOCK (&pump_priv->resume_path_lock);
+ {
+ number_files = pump_priv->number_files_pumped;
+ strncpy (filename, pump_priv->current_file, PATH_MAX);
+ }
+ UNLOCK (&pump_priv->resume_path_lock);
+
+ dict_str = GF_CALLOC (1, PATH_MAX + 256, gf_afr_mt_char);
+ if (!dict_str) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out of memory");
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ if (pump_priv->pump_finished) {
+ snprintf (summary, PATH_MAX+256,
+ "no_of_files=%"PRIu64, number_files);
+ } else {
+ snprintf (summary, PATH_MAX+256,
+ "no_of_files=%"PRIu64":current_file=%s",
+ number_files, filename);
+ }
+ snprintf (dict_str, PATH_MAX+256, "status=%d:%s",
+ (pump_priv->pump_finished)?1:0, summary);
+
+ dict = dict_new ();
+
+ ret = dict_set_dynstr (dict, RB_PUMP_CMD_STATUS, dict_str);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "dict_set_dynstr returned negative value");
+ } else {
+ dict_str = NULL;
+ }
+
+ op_ret = 0;
+
+out:
+
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, NULL);
+
+ if (dict)
+ dict_unref (dict);
+
+ GF_FREE (dict_str);
+
+ return 0;
+}
+
+int
+pump_execute_pause (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ pump_change_state (this, PUMP_STATE_PAUSE);
+
+ local->op_ret = 0;
+ pump_command_reply (frame, this);
+
+ return 0;
+}
+
+int
+pump_execute_start (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ int ret = 0;
+ loc_t loc = {0};
+
+ priv = this->private;
+ local = frame->local;
+
+ if (!priv->root_inode) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Pump xlator cannot be started without an initial "
+ "lookup");
+ ret = -1;
+ goto out;
+ }
+
+ GF_ASSERT (priv->root_inode);
+
+ afr_build_root_loc (this, &loc);
+
+ STACK_WIND (frame,
+ pump_cmd_start_getxattr_cbk,
+ PUMP_SOURCE_CHILD(this),
+ PUMP_SOURCE_CHILD(this)->fops->getxattr,
+ &loc,
+ PUMP_PATH, NULL);
+
+ ret = 0;
+
+out:
+ if (ret < 0) {
+ local->op_ret = ret;
+ pump_command_reply (frame, this);
+ }
+
+ loc_wipe (&loc);
+ return 0;
+}
+
+static int
+pump_cleanup_helper (void *data) {
+ call_frame_t *frame = data;
+
+ pump_xattr_cleaner (frame, 0, frame->this, 0, 0, NULL);
+
+ return 0;
+}
+
+static int
+pump_cleanup_done (int ret, call_frame_t *sync_frame, void *data)
+{
+ STACK_DESTROY (sync_frame->root);
+
+ return 0;
+}
+
+int
+pump_execute_commit (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *sync_frame = NULL;
+ int ret = 0;
+
+ priv = this->private;
+ pump_priv = priv->pump_private;
+ local = frame->local;
+
+ local->op_ret = 0;
+ if (pump_priv->pump_finished) {
+ pump_change_state (this, PUMP_STATE_COMMIT);
+ sync_frame = create_frame (this, this->ctx->pool);
+ ret = synctask_new (pump_priv->env, pump_cleanup_helper,
+ pump_cleanup_done, sync_frame, frame);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG, "Couldn't create "
+ "synctask for cleaning up xattrs.");
+ }
+
+ } else {
+ gf_log (this->name, GF_LOG_ERROR, "Commit can't proceed. "
+ "Migration in progress");
+ local->op_ret = -1;
+ local->op_errno = EINPROGRESS;
+ pump_command_reply (frame, this);
+ }
+
+ return 0;
+}
+int
+pump_execute_abort (call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ pump_private_t *pump_priv = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *sync_frame = NULL;
+ int ret = 0;
+
+ priv = this->private;
+ pump_priv = priv->pump_private;
+ local = frame->local;
+
+ pump_change_state (this, PUMP_STATE_ABORT);
+
+ LOCK (&pump_priv->resume_path_lock);
+ {
+ pump_priv->number_files_pumped = 0;
+ pump_priv->current_file[0] = '\0';
+ }
+ UNLOCK (&pump_priv->resume_path_lock);
+
+ local->op_ret = 0;
+ if (pump_priv->pump_finished) {
+ sync_frame = create_frame (this, this->ctx->pool);
+ ret = synctask_new (pump_priv->env, pump_cleanup_helper,
+ pump_cleanup_done, sync_frame, frame);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG, "Couldn't create "
+ "synctask for cleaning up xattrs.");
+ }
+
+ } else {
+ pump_priv->cleaner = fop_setxattr_cbk_stub (frame,
+ pump_xattr_cleaner,
+ 0, 0, NULL);
+ }
+
+ return 0;
+}
+
+gf_boolean_t
+pump_command_status (xlator_t *this, dict_t *dict)
+{
+ char *cmd = NULL;
+ int dict_ret = -1;
+ int ret = _gf_true;
+
+ dict_ret = dict_get_str (dict, RB_PUMP_CMD_STATUS, &cmd);
+ if (dict_ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Not a pump status command");
+ ret = _gf_false;
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Hit a pump command - status");
+ ret = _gf_true;
+
+out:
+ return ret;
+
+}
+
+gf_boolean_t
+pump_command_pause (xlator_t *this, dict_t *dict)
+{
+ char *cmd = NULL;
+ int dict_ret = -1;
+ int ret = _gf_true;
+
+ dict_ret = dict_get_str (dict, RB_PUMP_CMD_PAUSE, &cmd);
+ if (dict_ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Not a pump pause command");
+ ret = _gf_false;
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Hit a pump command - pause");
+ ret = _gf_true;
+
+out:
+ return ret;
+
+}
+
+gf_boolean_t
+pump_command_commit (xlator_t *this, dict_t *dict)
+{
+ char *cmd = NULL;
+ int dict_ret = -1;
+ int ret = _gf_true;
+
+ dict_ret = dict_get_str (dict, RB_PUMP_CMD_COMMIT, &cmd);
+ if (dict_ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Not a pump commit command");
+ ret = _gf_false;
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Hit a pump command - commit");
+ ret = _gf_true;
+
+out:
+ return ret;
+
+}
+
+gf_boolean_t
+pump_command_abort (xlator_t *this, dict_t *dict)
+{
+ char *cmd = NULL;
+ int dict_ret = -1;
+ int ret = _gf_true;
+
+ dict_ret = dict_get_str (dict, RB_PUMP_CMD_ABORT, &cmd);
+ if (dict_ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Not a pump abort command");
+ ret = _gf_false;
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Hit a pump command - abort");
+ ret = _gf_true;
+
+out:
+ return ret;
+
+}
+
+gf_boolean_t
+pump_command_start (xlator_t *this, dict_t *dict)
+{
+ char *cmd = NULL;
+ int dict_ret = -1;
+ int ret = _gf_true;
+
+ dict_ret = dict_get_str (dict, RB_PUMP_CMD_START, &cmd);
+ if (dict_ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Not a pump start command");
+ ret = _gf_false;
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Hit a pump command - start");
+ ret = _gf_true;
+
+out:
+ return ret;
+
+}
+
+struct _xattr_key {
+ char *key;
+ struct list_head list;
+};
+
+static int
+__gather_xattr_keys (dict_t *dict, char *key, data_t *value,
+ void *data)
+{
+ struct list_head * list = data;
+ struct _xattr_key * xkey = NULL;
+
+ if (!strncmp (key, AFR_XATTR_PREFIX,
+ strlen (AFR_XATTR_PREFIX))) {
+
+ xkey = GF_CALLOC (1, sizeof (*xkey), gf_afr_mt_xattr_key);
+ if (!xkey)
+ return -1;
+
+ xkey->key = key;
+ INIT_LIST_HEAD (&xkey->list);
+
+ list_add_tail (&xkey->list, list);
+ }
+ return 0;
+}
+
+static void
+__filter_xattrs (dict_t *dict)
+{
+ struct list_head keys;
+
+ struct _xattr_key *key;
+ struct _xattr_key *tmp;
+
+ INIT_LIST_HEAD (&keys);
+
+ dict_foreach (dict, __gather_xattr_keys,
+ (void *) &keys);
+
+ list_for_each_entry_safe (key, tmp, &keys, list) {
+ dict_del (dict, key->key);
+
+ list_del_init (&key->list);
+
+ GF_FREE (key);
+ }
+}
+
+int32_t
+pump_getxattr_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ xlator_t **children = NULL;
+ int unwind = 1;
+ int32_t *last_index = NULL;
+ int32_t next_call_child = -1;
+ int32_t read_child = -1;
+ int32_t *fresh_children = NULL;
+
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+
+ read_child = (long) cookie;
+
+ if (op_ret == -1) {
+ last_index = &local->cont.getxattr.last_index;
+ fresh_children = local->fresh_children;
+ next_call_child = afr_next_call_child (fresh_children,
+ local->child_up,
+ priv->child_count,
+ last_index, read_child);
+ if (next_call_child < 0)
+ goto out;
+
+ unwind = 0;
+ STACK_WIND_COOKIE (frame, pump_getxattr_cbk,
+ (void *) (long) read_child,
+ children[next_call_child],
+ children[next_call_child]->fops->getxattr,
+ &local->loc,
+ local->cont.getxattr.name, NULL);
+ }
+
+out:
+ if (unwind) {
+ if (op_ret >= 0 && dict)
+ __filter_xattrs (dict);
+
+ AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, NULL);
+ }
+
+ return 0;
+}
+
+int32_t
+pump_getxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata)
+{
+ afr_private_t * priv = NULL;
+ xlator_t ** children = NULL;
+ int call_child = 0;
+ afr_local_t *local = NULL;
+ int32_t ret = -1;
+ int32_t op_errno = 0;
+ uint64_t read_child = 0;
+
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ priv = this->private;
+ VALIDATE_OR_GOTO (priv->children, out);
+
+ children = priv->children;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame, default_getxattr_cbk,
+ FIRST_CHILD (this),
+ (FIRST_CHILD (this))->fops->getxattr,
+ loc, name, xdata);
+ return 0;
+ }
+
+
+ AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
+ local = frame->local;
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0)
+ goto out;
+
+ if (name) {
+ if (!strncmp (name, AFR_XATTR_PREFIX,
+ strlen (AFR_XATTR_PREFIX))) {
+
+ op_errno = ENODATA;
+ goto out;
+ }
+
+ if (!strcmp (name, RB_PUMP_CMD_STATUS)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Hit pump command - status");
+ pump_execute_status (frame, this);
+ ret = 0;
+ goto out;
+ }
+ }
+
+ local->fresh_children = GF_CALLOC (priv->child_count,
+ sizeof (*local->fresh_children),
+ gf_afr_mt_int32_t);
+ if (!local->fresh_children) {
+ ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ read_child = afr_inode_get_read_ctx (this, loc->inode, local->fresh_children);
+ ret = afr_get_call_child (this, local->child_up, read_child,
+ local->fresh_children,
+ &call_child,
+ &local->cont.getxattr.last_index);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+ loc_copy (&local->loc, loc);
+ if (name)
+ local->cont.getxattr.name = gf_strdup (name);
+
+ STACK_WIND_COOKIE (frame, pump_getxattr_cbk,
+ (void *) (long) call_child,
+ children[call_child], children[call_child]->fops->getxattr,
+ loc, name, xdata);
+
+ ret = 0;
+out:
+ if (ret < 0)
+ AFR_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
+
+static int
+afr_setxattr_unwind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = NULL;
+ call_frame_t *main_frame = NULL;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ if (local->transaction.main_frame)
+ main_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK (&frame->lock);
+
+ if (main_frame) {
+ AFR_STACK_UNWIND (setxattr, main_frame,
+ local->op_ret, local->op_errno, NULL);
+ }
+ return 0;
+}
+
+static int
+afr_setxattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ afr_local_t * local = NULL;
+ afr_private_t * priv = NULL;
+
+ int call_count = -1;
+ int need_unwind = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret != -1) {
+ if (local->success_count == 0) {
+ local->op_ret = op_ret;
+ }
+ local->success_count++;
+
+ if (local->success_count == priv->child_count) {
+ need_unwind = 1;
+ }
+ }
+
+ local->op_errno = op_errno;
+ }
+ UNLOCK (&frame->lock);
+
+ if (need_unwind)
+ local->transaction.unwind (frame, this);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ }
+
+ return 0;
+}
+
+static int
+afr_setxattr_wind (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ int call_count = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ call_count = afr_up_children_count (local->child_up, priv->child_count);
+
+ if (call_count == 0) {
+ local->transaction.resume (frame, this);
+ return 0;
+ }
+
+ local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, afr_setxattr_wind_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->setxattr,
+ &local->loc,
+ local->cont.setxattr.dict,
+ local->cont.setxattr.flags, NULL);
+
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+static int
+afr_setxattr_done (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t * local = frame->local;
+
+ local->transaction.unwind (frame, this);
+
+ AFR_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+int32_t
+pump_setxattr_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ AFR_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
+pump_command_reply (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->op_ret < 0)
+ gf_log (this->name, GF_LOG_INFO,
+ "Command failed");
+ else
+ gf_log (this->name, GF_LOG_INFO,
+ "Command succeeded");
+
+ AFR_STACK_UNWIND (setxattr,
+ frame,
+ local->op_ret,
+ local->op_errno, NULL);
+
+ return 0;
+}
+
+int
+pump_parse_command (call_frame_t *frame, xlator_t *this,
+ afr_local_t *local, dict_t *dict)
+{
+
+ int ret = -1;
+
+ if (pump_command_start (this, dict)) {
+ frame->local = local;
+ local->dict = dict_ref (dict);
+ ret = pump_execute_start (frame, this);
+
+ } else if (pump_command_pause (this, dict)) {
+ frame->local = local;
+ local->dict = dict_ref (dict);
+ ret = pump_execute_pause (frame, this);
+
+ } else if (pump_command_abort (this, dict)) {
+ frame->local = local;
+ local->dict = dict_ref (dict);
+ ret = pump_execute_abort (frame, this);
+
+ } else if (pump_command_commit (this, dict)) {
+ frame->local = local;
+ local->dict = dict_ref (dict);
+ ret = pump_execute_commit (frame, this);
+ }
+ return ret;
+}
+
+int
+pump_setxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ afr_private_t * priv = NULL;
+ afr_local_t * local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (this->private, out);
+
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.pump*", dict,
+ op_errno, out);
+
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame, default_setxattr_cbk,
+ FIRST_CHILD (this),
+ (FIRST_CHILD (this))->fops->setxattr,
+ loc, dict, flags, xdata);
+ return 0;
+ }
+
+
+ AFR_LOCAL_ALLOC_OR_GOTO (local, out);
+
+ ret = afr_local_init (local, priv, &op_errno);
+ if (ret < 0) {
+ afr_local_cleanup (local, this);
+ goto out;
+ }
+
+ ret = pump_parse_command (frame, this,
+ local, dict);
+ if (ret >= 0) {
+ ret = 0;
+ goto out;
+ }
+
+ transaction_frame = copy_frame (frame);
+ if (!transaction_frame) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out of memory.");
+ op_errno = ENOMEM;
+ ret = -1;
+ afr_local_cleanup (local, this);
+ goto out;
+ }
+
+ transaction_frame->local = local;
+
+ local->op_ret = -1;
+
+ local->cont.setxattr.dict = dict_ref (dict);
+ local->cont.setxattr.flags = flags;
+
+ local->transaction.fop = afr_setxattr_wind;
+ local->transaction.done = afr_setxattr_done;
+ local->transaction.unwind = afr_setxattr_unwind;
+
+ loc_copy (&local->loc, loc);
+
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
+
+ afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+
+ ret = 0;
+out:
+ if (ret < 0) {
+ if (transaction_frame)
+ AFR_STACK_DESTROY (transaction_frame);
+ AFR_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
+ }
+
+ return 0;
+}
+
+/* Defaults */
+static int32_t
+pump_lookup (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *xattr_req)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_lookup_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup,
+ loc,
+ xattr_req);
+ return 0;
+ }
+
+ afr_lookup (frame, this, loc, xattr_req);
+ return 0;
+}
+
+
+static int32_t
+pump_truncate (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ off_t offset, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_truncate_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate,
+ loc,
+ offset, xdata);
+ return 0;
+ }
+
+ afr_truncate (frame, this, loc, offset, xdata);
+ return 0;
+}
+
+
+static int32_t
+pump_ftruncate (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ off_t offset, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_ftruncate_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate,
+ fd,
+ offset, xdata);
+ return 0;
+ }
+
+ afr_ftruncate (frame, this, fd, offset, xdata);
+ return 0;
+}
+
+
+
+
+int
+pump_mknod (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame, default_mknod_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod,
+ loc, mode, rdev, umask, xdata);
+ return 0;
+ }
+ afr_mknod (frame, this, loc, mode, rdev, umask, xdata);
+ return 0;
+
+}
+
+
+
+int
+pump_mkdir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame, default_mkdir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir,
+ loc, mode, umask, xdata);
+ return 0;
+ }
+ afr_mkdir (frame, this, loc, mode, umask, xdata);
+ return 0;
+
+}
+
+
+static int32_t
+pump_unlink (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, int xflag, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_unlink_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink,
+ loc, xflag, xdata);
+ return 0;
+ }
+ afr_unlink (frame, this, loc, xflag, xdata);
+ return 0;
+
+}
+
+
+static int
+pump_rmdir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int flags, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame, default_rmdir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rmdir,
+ loc, flags, xdata);
+ return 0;
+ }
+
+ afr_rmdir (frame, this, loc, flags, xdata);
+ return 0;
+
+}
+
+
+
+int
+pump_symlink (call_frame_t *frame, xlator_t *this,
+ const char *linkpath, loc_t *loc, mode_t umask, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame, default_symlink_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->symlink,
+ linkpath, loc, umask, xdata);
+ return 0;
+ }
+ afr_symlink (frame, this, linkpath, loc, umask, xdata);
+ return 0;
+
+}
+
+
+static int32_t
+pump_rename (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_rename_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename,
+ oldloc, newloc, xdata);
+ return 0;
+ }
+ afr_rename (frame, this, oldloc, newloc, xdata);
+ return 0;
+
+}
+
+
+static int32_t
+pump_link (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_link_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link,
+ oldloc, newloc, xdata);
+ return 0;
+ }
+ afr_link (frame, this, oldloc, newloc, xdata);
+ return 0;
+
+}
+
+
+static int32_t
+pump_create (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t flags, mode_t mode,
+ mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame, default_create_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create,
+ loc, flags, mode, umask, fd, xdata);
+ return 0;
+ }
+ afr_create (frame, this, loc, flags, mode, umask, fd, xdata);
+ return 0;
+
+}
+
+
+static int32_t
+pump_open (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flags, fd_t *fd, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_open_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open,
+ loc, flags, fd, xdata);
+ return 0;
+ }
+ afr_open (frame, this, loc, flags, fd, xdata);
+ return 0;
+
+}
+
+
+static int32_t
+pump_writev (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ struct iovec *vector,
+ int32_t count,
+ off_t off, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_writev_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev,
+ fd,
+ vector,
+ count,
+ off, flags,
+ iobref, xdata);
+ return 0;
+ }
+
+ afr_writev (frame, this, fd, vector, count, off, flags, iobref, xdata);
+ return 0;
+}
+
+
+static int32_t
+pump_flush (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_flush_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush,
+ fd, xdata);
+ return 0;
+ }
+ afr_flush (frame, this, fd, xdata);
+ return 0;
+
+}
+
+
+static int32_t
+pump_fsync (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t flags, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_fsync_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsync,
+ fd,
+ flags, xdata);
+ return 0;
+ }
+ afr_fsync (frame, this, fd, flags, xdata);
+ return 0;
+
+}
+
+
+static int32_t
+pump_opendir (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, fd_t *fd, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_opendir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->opendir,
+ loc, fd, xdata);
+ return 0;
+ }
+ afr_opendir (frame, this, loc, fd, xdata);
+ return 0;
+
+}
+
+
+static int32_t
+pump_fsyncdir (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t flags, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_fsyncdir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsyncdir,
+ fd,
+ flags, xdata);
+ return 0;
+ }
+ afr_fsyncdir (frame, this, fd, flags, xdata);
+ return 0;
+
+}
+
+
+static int32_t
+pump_xattrop (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ gf_xattrop_flags_t flags,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_xattrop_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->xattrop,
+ loc,
+ flags,
+ dict, xdata);
+ return 0;
+ }
+ afr_xattrop (frame, this, loc, flags, dict, xdata);
+ return 0;
+
+}
+
+static int32_t
+pump_fxattrop (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ gf_xattrop_flags_t flags,
+ dict_t *dict, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_fxattrop_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fxattrop,
+ fd,
+ flags,
+ dict, xdata);
+ return 0;
+ }
+ afr_fxattrop (frame, this, fd, flags, dict, xdata);
+ return 0;
+
+}
+
+
+static int32_t
+pump_removexattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ int op_errno = -1;
+
+ VALIDATE_OR_GOTO (this, out);
+
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.pump*",
+ name, op_errno, out);
+
+ op_errno = 0;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_removexattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr,
+ loc,
+ name, xdata);
+ return 0;
+ }
+ afr_removexattr (frame, this, loc, name, xdata);
+
+ out:
+ if (op_errno)
+ AFR_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL);
+ return 0;
+
+}
+
+
+
+static int32_t
+pump_readdir (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t off, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_readdir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdir,
+ fd, size, off, xdata);
+ return 0;
+ }
+ afr_readdir (frame, this, fd, size, off, xdata);
+ return 0;
+
+}
+
+
+static int32_t
+pump_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t off, dict_t *dict)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_readdirp_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp,
+ fd, size, off, dict);
+ return 0;
+ }
+ afr_readdirp (frame, this, fd, size, off, dict);
+ return 0;
+
+}
+
+
+
+static int32_t
+pump_releasedir (xlator_t *this,
+ fd_t *fd)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (priv->use_afr_in_pump)
+ afr_releasedir (this, fd);
+ return 0;
+
+}
+
+static int32_t
+pump_release (xlator_t *this,
+ fd_t *fd)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (priv->use_afr_in_pump)
+ afr_release (this, fd);
+ return 0;
+
+}
+
+static int32_t
+pump_forget (xlator_t *this, inode_t *inode)
+{
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ if (priv->use_afr_in_pump)
+ afr_forget (this, inode);
+
+ return 0;
+}
+
+static int32_t
+pump_setattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_setattr_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->setattr,
+ loc, stbuf, valid, xdata);
+ return 0;
+ }
+ afr_setattr (frame, this, loc, stbuf, valid, xdata);
+ return 0;
+
+}
+
+
+static int32_t
+pump_fsetattr (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ if (!priv->use_afr_in_pump) {
+ STACK_WIND (frame,
+ default_fsetattr_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fsetattr,
+ fd, stbuf, valid, xdata);
+ return 0;
+ }
+ afr_fsetattr (frame, this, fd, stbuf, valid, xdata);
+ return 0;
+
+}
+
+
+/* End of defaults */
+
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_afr_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR, "Memory accounting init"
+ "failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+static int
+is_xlator_pump_sink (xlator_t *child)
+{
+ return (child == PUMP_SINK_CHILD(THIS));
+}
+
+static int
+is_xlator_pump_source (xlator_t *child)
+{
+ return (child == PUMP_SOURCE_CHILD(THIS));
+}
+
+int32_t
+notify (xlator_t *this, int32_t event,
+ void *data, ...)
+{
+ int ret = -1;
+ xlator_t *child_xl = NULL;
+
+ child_xl = (xlator_t *) data;
+
+ ret = afr_notify (this, event, data, NULL);
+
+ switch (event) {
+ case GF_EVENT_CHILD_DOWN:
+ if (is_xlator_pump_source (child_xl))
+ pump_change_state (this, PUMP_STATE_ABORT);
+ break;
+
+ case GF_EVENT_CHILD_UP:
+ if (is_xlator_pump_sink (child_xl))
+ if (is_pump_start_pending (this)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "about to start synctask");
+ ret = pump_start_synctask (this);
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Could not start pump "
+ "synctask");
+ else
+ pump_remove_start_pending (this);
+ }
+ }
+
+ return ret;
+}
+
+int32_t
+init (xlator_t *this)
+{
+ afr_private_t * priv = NULL;
+ pump_private_t *pump_priv = NULL;
+ int child_count = 0;
+ xlator_list_t * trav = NULL;
+ int i = 0;
+ int ret = -1;
+ GF_UNUSED int op_errno = 0;
+
+ int source_child = 0;
+
+ if (!this->children) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "pump translator needs a source and sink"
+ "subvolumes defined.");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Volume is dangling.");
+ }
+
+ this->private = GF_CALLOC (1, sizeof (afr_private_t),
+ gf_afr_mt_afr_private_t);
+ if (!this->private)
+ goto out;
+
+ priv = this->private;
+ LOCK_INIT (&priv->lock);
+ LOCK_INIT (&priv->read_child_lock);
+ //lock recovery is not done in afr
+ pthread_mutex_init (&priv->mutex, NULL);
+ INIT_LIST_HEAD (&priv->saved_fds);
+
+ child_count = xlator_subvolume_count (this);
+ if (child_count != 2) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "There should be exactly 2 children - one source "
+ "and one sink");
+ return -1;
+ }
+ priv->child_count = child_count;
+
+ priv->read_child = source_child;
+ priv->favorite_child = source_child;
+ priv->background_self_heal_count = 0;
+
+ priv->data_self_heal = "on";
+ priv->metadata_self_heal = 1;
+ priv->entry_self_heal = 1;
+
+ priv->data_self_heal_window_size = 16;
+
+ priv->data_change_log = 1;
+ priv->metadata_change_log = 1;
+ priv->entry_change_log = 1;
+ priv->use_afr_in_pump = 1;
+ priv->sh_readdir_size = 65536;
+
+ /* Locking options */
+
+ /* Lock server count infact does not matter. Locks are held
+ on all subvolumes, in this case being the source
+ and the sink.
+ */
+
+ priv->strict_readdir = _gf_false;
+
+ priv->wait_count = 1;
+ priv->child_up = GF_CALLOC (sizeof (unsigned char), child_count,
+ gf_afr_mt_char);
+ if (!priv->child_up) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out of memory.");
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ priv->children = GF_CALLOC (sizeof (xlator_t *), child_count,
+ gf_afr_mt_xlator_t);
+ if (!priv->children) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out of memory.");
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ priv->pending_key = GF_CALLOC (sizeof (*priv->pending_key),
+ child_count,
+ gf_afr_mt_char);
+ if (!priv->pending_key) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out of memory.");
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ trav = this->children;
+ i = 0;
+ while (i < child_count) {
+ priv->children[i] = trav->xlator;
+
+ ret = gf_asprintf (&priv->pending_key[i], "%s.%s", AFR_XATTR_PREFIX,
+ trav->xlator->name);
+ if (-1 == ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "asprintf failed to set pending key");
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ trav = trav->next;
+ i++;
+ }
+
+ ret = gf_asprintf (&priv->sh_domain, "%s-self-heal", this->name);
+ if (-1 == ret) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ priv->first_lookup = 1;
+ priv->root_inode = NULL;
+
+ priv->last_event = GF_CALLOC (child_count, sizeof (*priv->last_event),
+ gf_afr_mt_int32_t);
+ if (!priv->last_event) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ pump_priv = GF_CALLOC (1, sizeof (*pump_priv),
+ gf_afr_mt_pump_priv);
+ if (!pump_priv) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out of memory");
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ LOCK_INIT (&pump_priv->resume_path_lock);
+ LOCK_INIT (&pump_priv->pump_state_lock);
+
+ pump_priv->resume_path = GF_CALLOC (1, PATH_MAX,
+ gf_afr_mt_char);
+ if (!pump_priv->resume_path) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out of memory");
+ ret = -1;
+ goto out;
+ }
+
+ pump_priv->env = this->ctx->env;
+ if (!pump_priv->env) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not create new sync-environment");
+ ret = -1;
+ goto out;
+ }
+
+ /* keep more local here as we may need them for self-heal etc */
+ this->local_pool = mem_pool_new (afr_local_t, 128);
+ if (!this->local_pool) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
+ goto out;
+ }
+
+ priv->pump_private = pump_priv;
+
+ pump_change_state (this, PUMP_STATE_ABORT);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+fini (xlator_t *this)
+{
+ afr_private_t * priv = NULL;
+ pump_private_t *pump_priv = NULL;
+
+ priv = this->private;
+ this->private = NULL;
+ if (!priv)
+ goto out;
+
+ pump_priv = priv->pump_private;
+ if (!pump_priv)
+ goto afr_priv;
+
+ GF_FREE (pump_priv->resume_path);
+ LOCK_DESTROY (&pump_priv->resume_path_lock);
+ LOCK_DESTROY (&pump_priv->pump_state_lock);
+ GF_FREE (pump_priv);
+afr_priv:
+ afr_priv_destroy (priv);
+out:
+ return 0;
+}
+
+
+struct xlator_fops fops = {
+ .lookup = pump_lookup,
+ .open = pump_open,
+ .flush = pump_flush,
+ .fsync = pump_fsync,
+ .fsyncdir = pump_fsyncdir,
+ .xattrop = pump_xattrop,
+ .fxattrop = pump_fxattrop,
+ .getxattr = pump_getxattr,
+
+ /* inode write */
+ .writev = pump_writev,
+ .truncate = pump_truncate,
+ .ftruncate = pump_ftruncate,
+ .setxattr = pump_setxattr,
+ .setattr = pump_setattr,
+ .fsetattr = pump_fsetattr,
+ .removexattr = pump_removexattr,
+
+ /* dir read */
+ .opendir = pump_opendir,
+ .readdir = pump_readdir,
+ .readdirp = pump_readdirp,
+
+ /* dir write */
+ .create = pump_create,
+ .mknod = pump_mknod,
+ .mkdir = pump_mkdir,
+ .unlink = pump_unlink,
+ .rmdir = pump_rmdir,
+ .link = pump_link,
+ .symlink = pump_symlink,
+ .rename = pump_rename,
+};
+
+struct xlator_dumpops dumpops = {
+ .priv = afr_priv_dump,
+};
+
+
+struct xlator_cbks cbks = {
+ .release = pump_release,
+ .releasedir = pump_releasedir,
+ .forget = pump_forget,
+};
+
+struct volume_options options[] = {
+ { .key = {NULL} },
+};
diff --git a/xlators/cluster/afr/src/pump.h b/xlators/cluster/afr/src/pump.h
new file mode 100644
index 000000000..bc4c31a78
--- /dev/null
+++ b/xlators/cluster/afr/src/pump.h
@@ -0,0 +1,78 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __PUMP_H__
+#define __PUMP_H__
+
+#include "syncop.h"
+
+/* FIXME: Needs to be defined in a common file */
+#define CLIENT_CMD_CONNECT "trusted.glusterfs.client-connect"
+#define CLIENT_CMD_DISCONNECT "trusted.glusterfs.client-disconnect"
+
+#define PUMP_SOURCE_COMPLETE "trusted.glusterfs.pump-source-complete"
+#define PUMP_SINK_COMPLETE "trusted.glusterfs.pump-sink-complete"
+
+#define PUMP_PATH "trusted.glusterfs.pump-path"
+
+#define PUMP_SOURCE_CHILD(xl) (xl->children->xlator)
+#define PUMP_SINK_CHILD(xl) (xl->children->next->xlator)
+
+typedef enum {
+ PUMP_STATE_RUNNING, /* Pump is running and migrating files */
+ PUMP_STATE_RESUME, /* Pump is resuming from a previous pause */
+ PUMP_STATE_PAUSE, /* Pump is paused */
+ PUMP_STATE_ABORT, /* Pump is aborted */
+ PUMP_STATE_COMMIT, /* Pump is commited */
+} pump_state_t;
+
+typedef struct _pump_private {
+ struct syncenv *env; /* The env pointer to the pump synctask */
+ char *resume_path; /* path to resume from the last pause */
+ gf_lock_t resume_path_lock; /* Synchronize resume_path changes */
+ gf_lock_t pump_state_lock; /* Synchronize pump_state changes */
+ pump_state_t pump_state; /* State of pump */
+ char current_file[PATH_MAX]; /* Current file being pumped */
+ uint64_t number_files_pumped; /* Number of files pumped */
+ gf_boolean_t pump_finished; /* Boolean to indicate pump termination */
+ char pump_start_pending; /* Boolean to mark start pending until
+ CHILD_UP */
+ call_stub_t *cleaner;
+} pump_private_t;
+
+void
+build_root_loc (inode_t *inode, loc_t *loc);
+int pump_start (call_frame_t *frame, xlator_t *this);
+
+gf_boolean_t
+pump_command_start (xlator_t *this, dict_t *dict);
+
+int
+pump_execute_start (call_frame_t *frame, xlator_t *this);
+
+gf_boolean_t
+pump_command_pause (xlator_t *this, dict_t *dict);
+
+int
+pump_execute_pause (call_frame_t *frame, xlator_t *this);
+
+gf_boolean_t
+pump_command_abort (xlator_t *this, dict_t *dict);
+
+int
+pump_execute_abort (call_frame_t *frame, xlator_t *this);
+
+gf_boolean_t
+pump_command_status (xlator_t *this, dict_t *dict);
+
+int
+pump_execute_status (call_frame_t *frame, xlator_t *this);
+
+#endif /* __PUMP_H__ */
diff --git a/xlators/cluster/dht/src/Makefile.am b/xlators/cluster/dht/src/Makefile.am
index f87212699..174bea841 100644
--- a/xlators/cluster/dht/src/Makefile.am
+++ b/xlators/cluster/dht/src/Makefile.am
@@ -1,30 +1,38 @@
-xlator_LTLIBRARIES = dht.la nufa.la
+xlator_LTLIBRARIES = dht.la nufa.la switch.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster
+dht_common_source = dht-layout.c dht-helper.c dht-linkfile.c dht-rebalance.c \
+ dht-selfheal.c dht-rename.c dht-hashfn.c dht-diskusage.c \
+ dht-common.c dht-inode-write.c dht-inode-read.c dht-shared.c \
+ $(top_builddir)/xlators/lib/src/libxlator.c
-dht_common_source = dht-layout.c dht-helper.c dht-linkfile.c \
- dht-selfheal.c dht-rename.c dht-hashfn.c dht-diskusage.c
-
-dht_la_SOURCES = $(dht_common_source) dht.c
+dht_la_SOURCES = $(dht_common_source) dht.c
nufa_la_SOURCES = $(dht_common_source) nufa.c
+switch_la_SOURCES = $(dht_common_source) switch.c
-dht_la_LDFLAGS = -module -avoidversion
+dht_la_LDFLAGS = -module -avoid-version
dht_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-nufa_la_LDFLAGS = -module -avoidversion
+nufa_la_LDFLAGS = -module -avoid-version
nufa_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = dht-common.h dht-common.c
+switch_la_LDFLAGS = -module -avoid-version
+switch_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = dht-common.h dht-mem-types.h \
+ $(top_builddir)/xlators/lib/src/libxlator.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/xlators/lib/src
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CFLAGS = -Wall $(GF_CFLAGS)
-CLEANFILES =
+CLEANFILES =
uninstall-local:
rm -f $(DESTDIR)$(xlatordir)/distribute.so
install-data-hook:
- ln -sf dht.so $(DESTDIR)$(xlatordir)/distribute.so \ No newline at end of file
+ ln -sf dht.so $(DESTDIR)$(xlatordir)/distribute.so
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index 62f3822a5..8f61339e6 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2009-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -27,10 +18,87 @@
#include "glusterfs.h"
#include "xlator.h"
+#include "libxlator.h"
#include "dht-common.h"
#include "defaults.h"
+#include "byte-order.h"
+#include "glusterfs-acl.h"
#include <sys/time.h>
+#include <libgen.h>
+
+int
+dht_aggregate (dict_t *this, char *key, data_t *value, void *data)
+{
+ dict_t *dst = NULL;
+ int64_t *ptr = 0, *size = NULL;
+ int32_t ret = -1;
+ data_t *dict_data = NULL;
+
+ dst = data;
+
+ if (strcmp (key, GF_XATTR_QUOTA_SIZE_KEY) == 0) {
+ ret = dict_get_bin (dst, key, (void **)&size);
+ if (ret < 0) {
+ size = GF_CALLOC (1, sizeof (int64_t),
+ gf_common_mt_char);
+ if (size == NULL) {
+ gf_log ("dht", GF_LOG_WARNING,
+ "memory allocation failed");
+ return -1;
+ }
+ ret = dict_set_bin (dst, key, size, sizeof (int64_t));
+ if (ret < 0) {
+ gf_log ("dht", GF_LOG_WARNING,
+ "dht aggregate dict set failed");
+ GF_FREE (size);
+ return -1;
+ }
+ }
+
+ ptr = data_to_bin (value);
+ if (ptr == NULL) {
+ gf_log ("dht", GF_LOG_WARNING, "data to bin failed");
+ return -1;
+ }
+
+ *size = hton64 (ntoh64 (*size) + ntoh64 (*ptr));
+
+ } else if (fnmatch (GF_XATTR_STIME_PATTERN, key, FNM_NOESCAPE) == 0) {
+ ret = gf_get_min_stime (THIS, dst, key, value);
+ if (ret < 0)
+ return ret;
+ } else {
+ /* compare user xattrs only */
+ if (!strncmp (key, "user.", strlen ("user."))) {
+ ret = dict_lookup (dst, key, &dict_data);
+ if (!ret && dict_data && value) {
+ ret = is_data_equal (dict_data, value);
+ if (!ret)
+ gf_log ("dht", GF_LOG_DEBUG,
+ "xattr mismatch for %s", key);
+ }
+ }
+ ret = dict_set (dst, key, value);
+ if (ret)
+ gf_log ("dht", GF_LOG_WARNING, "xattr dict set failed");
+ }
+
+ return 0;
+}
+
+
+void
+dht_aggregate_xattr (dict_t *dst, dict_t *src)
+{
+ if ((dst == NULL) || (src == NULL)) {
+ goto out;
+ }
+
+ dict_foreach (src, dht_aggregate, dst);
+out:
+ return;
+}
/* TODO:
- use volumename in xattr instead of "dht"
@@ -42,100 +110,369 @@
int
dht_lookup_selfheal_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int op_ret, int op_errno)
+ xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
{
- dht_local_t *local = NULL;
- dht_layout_t *layout = NULL;
- int ret = 0;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int ret = -1;
- local = frame->local;
- ret = op_ret;
-
- if (ret == 0) {
- layout = local->selfheal.layout;
- ret = inode_ctx_put (local->inode, this,
- (uint64_t)(long)layout);
-
- if (ret == 0)
- local->selfheal.layout = NULL;
-
- if (local->st_ino) {
- local->stbuf.st_ino = local->st_ino;
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "could not find hashed subvolume for %s",
- local->loc.path);
- }
- }
+ GF_VALIDATE_OR_GOTO ("dht", frame, out);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
- DHT_STACK_UNWIND (frame, ret, local->op_errno, local->inode,
- &local->stbuf, local->xattr);
+ local = frame->local;
+ ret = op_ret;
- return 0;
+ FRAME_SU_UNDO (frame, dht_local_t);
+
+ if (ret == 0) {
+ layout = local->selfheal.layout;
+ ret = dht_layout_set (this, local->inode, layout);
+ }
+
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->postparent, 1);
+ }
+
+ DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
+
+ DHT_STACK_UNWIND (lookup, frame, ret, local->op_errno, local->inode,
+ &local->stbuf, local->xattr, &local->postparent);
+
+out:
+ return ret;
+}
+
+
+int
+dht_discover_complete (xlator_t *this, call_frame_t *discover_frame)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
+ int op_errno = 0;
+ int ret = -1;
+ dht_layout_t *layout = NULL;
+ dht_conf_t *conf = NULL;
+
+ local = discover_frame->local;
+ layout = local->layout;
+ conf = this->private;
+
+ LOCK(&discover_frame->lock);
+ {
+ main_frame = local->main_frame;
+ local->main_frame = NULL;
+ }
+ UNLOCK(&discover_frame->lock);
+
+ if (!main_frame)
+ return 0;
+
+ if (local->file_count && local->dir_count) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "path %s exists as a file on one subvolume "
+ "and directory on another. "
+ "Please fix it manually",
+ local->loc.path);
+ op_errno = EIO;
+ goto out;
+ }
+
+ if (local->cached_subvol) {
+ ret = dht_layout_preset (this, local->cached_subvol,
+ local->inode);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set layout for subvolume %s",
+ local->cached_subvol ? local->cached_subvol->name : "<nil>");
+ op_errno = EINVAL;
+ goto out;
+ }
+ } else {
+ ret = dht_layout_normalize (this, &local->loc, layout);
+ if ((ret < 0) || ((ret > 0) && (local->op_ret != 0))) {
+ /* either the layout is incorrect or the directory is
+ * not found even in one subvolume.
+ */
+ gf_log (this->name, GF_LOG_DEBUG,
+ "normalizing failed on %s "
+ "(overlaps/holes present: %s, "
+ "ENOENT errors: %d)", local->loc.path,
+ (ret < 0) ? "yes" : "no", (ret > 0) ? ret : 0);
+ if ((ret > 0) && (ret == conf->subvolume_cnt)) {
+ op_errno = ESTALE;
+ goto out;
+ }
+ }
+
+ if (local->inode)
+ dht_layout_set (this, local->inode, layout);
+ }
+
+ DHT_STACK_UNWIND (lookup, main_frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf, local->xattr,
+ &local->postparent);
+ return 0;
+out:
+ DHT_STACK_UNWIND (lookup, main_frame, -1, op_errno, NULL, NULL, NULL,
+ NULL);
+
+ return ret;
+}
+
+
+int
+dht_discover_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+ dht_layout_t *layout = NULL;
+ int ret = -1;
+ int is_dir = 0;
+ int is_linkfile = 0;
+ int attempt_unwind = 0;
+ dht_conf_t *conf = 0;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, out);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", this->private, out);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+
+ layout = local->layout;
+
+ /* Check if the gfid is different for file from other node */
+ if (!op_ret && uuid_compare (local->gfid, stbuf->ia_gfid)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: gfid different on %s",
+ local->loc.path, prev->this->name);
+ }
+
+
+ LOCK (&frame->lock);
+ {
+ /* TODO: assert equal mode on stbuf->st_mode and
+ local->stbuf->st_mode
+
+ else mkdir/chmod/chown and fix
+ */
+ ret = dht_layout_merge (this, layout, prev->this,
+ op_ret, op_errno, xattr);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to merge layouts", local->loc.path);
+
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "lookup of %s on %s returned error (%s)",
+ local->loc.path, prev->this->name,
+ strerror (op_errno));
+
+ goto unlock;
+ }
+
+ is_linkfile = check_is_linkfile (inode, stbuf, xattr,
+ conf->link_xattr_name);
+ is_dir = check_is_dir (inode, stbuf, xattr);
+
+ if (is_dir) {
+ local->dir_count ++;
+ } else {
+ local->file_count ++;
+
+ if (!is_linkfile) {
+ /* real file */
+ local->cached_subvol = prev->this;
+ attempt_unwind = 1;
+ } else {
+ goto unlock;
+ }
+ }
+
+ local->op_ret = 0;
+
+ if (local->xattr == NULL) {
+ local->xattr = dict_ref (xattr);
+ } else {
+ dht_aggregate_xattr (local->xattr, xattr);
+ }
+
+ if (local->inode == NULL)
+ local->inode = inode_ref (inode);
+
+ dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
+ dht_iatt_merge (this, &local->postparent, postparent,
+ prev->this);
+ }
+unlock:
+ UNLOCK (&frame->lock);
+out:
+ this_call_cnt = dht_frame_return (frame);
+
+ if (is_last_call (this_call_cnt) || attempt_unwind) {
+ dht_discover_complete (this, frame);
+ }
+
+ if (is_last_call (this_call_cnt))
+ DHT_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+
+int
+dht_discover (call_frame_t *frame, xlator_t *this, loc_t *loc)
+{
+ int ret;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int call_cnt = 0;
+ int op_errno = EINVAL;
+ int i = 0;
+ call_frame_t *discover_frame = NULL;
+
+ conf = this->private;
+ local = frame->local;
+
+ ret = dict_set_uint32 (local->xattr_req, conf->xattr_name, 4 * 4);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to set '%s' key",
+ loc->path, conf->xattr_name);
+
+ ret = dict_set_uint32 (local->xattr_req, conf->link_xattr_name, 256);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to set '%s' key",
+ loc->path, conf->link_xattr_name);
+
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
+
+ local->layout = dht_layout_new (this, conf->subvolume_cnt);
+
+ if (!local->layout) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ uuid_copy (local->gfid, loc->gfid);
+
+ discover_frame = copy_frame (frame);
+ if (!discover_frame) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ discover_frame->local = local;
+ frame->local = NULL;
+ local->main_frame = frame;
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND (discover_frame, dht_discover_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup,
+ &local->loc, local->xattr_req);
+ }
+
+ return 0;
+
+err:
+ DHT_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL);
+
+ return 0;
}
int
dht_lookup_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno,
- inode_t *inode, struct stat *stbuf, dict_t *xattr)
+ inode_t *inode, struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent)
{
- dht_conf_t *conf = NULL;
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
- dht_layout_t *layout = NULL;
- int ret = 0;
- int is_dir = 0;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+ dht_layout_t *layout = NULL;
+ int ret = -1;
+ int is_dir = 0;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, out);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", this->private, out);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, out);
- conf = this->private;
local = frame->local;
prev = cookie;
- layout = local->layout;
+ layout = local->layout;
+
+ if (!op_ret && uuid_is_null (local->gfid))
+ memcpy (local->gfid, stbuf->ia_gfid, 16);
+
+ /* Check if the gfid is different for file from other node */
+ if (!op_ret && uuid_compare (local->gfid, stbuf->ia_gfid)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: gfid different on %s",
+ local->loc.path, prev->this->name);
+ }
LOCK (&frame->lock);
{
/* TODO: assert equal mode on stbuf->st_mode and
- local->stbuf->st_mode
+ local->stbuf->st_mode
- else mkdir/chmod/chown and fix
- */
- ret = dht_layout_merge (this, layout, prev->this,
- op_ret, op_errno, xattr);
+ else mkdir/chmod/chown and fix
+ */
+ ret = dht_layout_merge (this, layout, prev->this,
+ op_ret, op_errno, xattr);
- if (op_ret == -1) {
- local->op_errno = ENOENT;
- gf_log (this->name, GF_LOG_DEBUG,
- "lookup of %s on %s returned error (%s)",
- local->loc.path, prev->this->name,
- strerror (op_errno));
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "lookup of %s on %s returned error (%s)",
+ local->loc.path, prev->this->name,
+ strerror (op_errno));
- goto unlock;
- }
+ goto unlock;
+ }
- is_dir = check_is_dir (inode, stbuf, xattr);
- if (!is_dir) {
+ is_dir = check_is_dir (inode, stbuf, xattr);
+ if (!is_dir) {
gf_log (this->name, GF_LOG_DEBUG,
"lookup of %s on %s returned non dir 0%o",
local->loc.path, prev->this->name,
- stbuf->st_mode);
+ stbuf->ia_type);
local->need_selfheal = 1;
- goto unlock;
+ goto unlock;
}
- local->op_ret = 0;
- if (local->xattr == NULL)
- local->xattr = dict_ref (xattr);
- if (local->inode == NULL)
- local->inode = inode_ref (inode);
-
- dht_stat_merge (this, &local->stbuf, stbuf, prev->this);
+ local->op_ret = 0;
+ if (local->xattr == NULL) {
+ local->xattr = dict_ref (xattr);
+ } else {
+ dht_aggregate_xattr (local->xattr, xattr);
+ }
- if (prev->this == local->hashed_subvol)
- local->st_ino = local->stbuf.st_ino;
+ if (local->inode == NULL)
+ local->inode = inode_ref (inode);
+ dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
+ dht_iatt_merge (this, &local->postparent, postparent,
+ prev->this);
}
unlock:
UNLOCK (&frame->lock);
@@ -150,176 +487,270 @@ unlock:
return 0;
}
- if (local->op_ret == 0) {
- ret = dht_layout_normalize (this, &local->loc, layout);
+ if (local->op_ret == 0) {
+ ret = dht_layout_normalize (this, &local->loc, layout);
- local->layout = NULL;
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "fixing assignment on %s",
+ local->loc.path);
+ goto selfheal;
+ }
- if (ret != 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "fixing assignment on %s",
- local->loc.path);
- goto selfheal;
- }
-
- inode_ctx_put (local->inode, this,
- (uint64_t)(long)layout);
-
- if (local->st_ino) {
- local->stbuf.st_ino = local->st_ino;
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "could not find hashed subvol for %s",
- local->loc.path);
- }
- }
+ dht_layout_set (this, local->inode, layout);
+ }
- DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno,
- local->inode, &local->stbuf, local->xattr);
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->postparent, 1);
+ }
+
+ DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
+ DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf, local->xattr,
+ &local->postparent);
}
- return 0;
+ return 0;
selfheal:
- ret = dht_selfheal_directory (frame, dht_lookup_selfheal_cbk,
- &local->loc, layout);
-
- return 0;
+ FRAME_SU_DO (frame, dht_local_t);
+ uuid_copy (local->loc.gfid, local->gfid);
+ ret = dht_selfheal_directory (frame, dht_lookup_selfheal_cbk,
+ &local->loc, layout);
+out:
+ return ret;
}
int
dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno,
- inode_t *inode, struct stat *stbuf, dict_t *xattr)
+ inode_t *inode, struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent)
{
dht_local_t *local = NULL;
int this_call_cnt = 0;
call_frame_t *prev = NULL;
- dht_layout_t *layout = NULL;
- dht_conf_t *conf = NULL;
- int ret = -1;
- int is_dir = 0;
- int is_linkfile = 0;
+ dht_layout_t *layout = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+ int is_dir = 0;
+ int is_linkfile = 0;
+ call_frame_t *copy = NULL;
+ dht_local_t *copy_local = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, err);
+ GF_VALIDATE_OR_GOTO ("dht", this, err);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, err);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, err);
local = frame->local;
prev = cookie;
- conf = this->private;
+ conf = this->private;
+ if (!conf)
+ goto out;
LOCK (&frame->lock);
{
- if (op_ret == -1) {
- local->op_errno = op_errno;
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
- if ((op_errno != ENOTCONN)
+ if ((op_errno != ENOTCONN)
&& (op_errno != ENOENT)
&& (op_errno != ESTALE)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
+ gf_log (this->name, GF_LOG_INFO,
+ "subvolume %s for %s returned -1 (%s)",
+ prev->this->name, local->loc.path,
+ strerror (op_errno));
}
-
if (op_errno == ESTALE) {
- /* propogate the ESTALE to parent.
- * setting local->layout_mismatch would send
+ /* propagate the ESTALE to parent.
+ * setting local->return_estale would send
* ESTALE to parent. */
- local->layout_mismatch = 1;
+ local->return_estale = 1;
}
- goto unlock;
- }
+ /* if it is ENOENT, we may have to do a
+ * 'lookup_everywhere()' to make sure
+ * the file is not migrated */
+ if (op_errno == ENOENT) {
+ if (IA_ISREG (local->loc.inode->ia_type)) {
+ local->need_lookup_everywhere = 1;
+ }
+ }
+ goto unlock;
+ }
- if (S_IFMT & (stbuf->st_mode ^ local->inode->st_mode)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "mismatching filetypes 0%o v/s 0%o for %s",
- (stbuf->st_mode & S_IFMT),
- (local->inode->st_mode & S_IFMT),
- local->loc.path);
+ if (stbuf->ia_type != local->inode->ia_type) {
+ gf_log (this->name, GF_LOG_INFO,
+ "mismatching filetypes 0%o v/s 0%o for %s",
+ (stbuf->ia_type), (local->inode->ia_type),
+ local->loc.path);
- local->op_ret = -1;
- local->op_errno = EINVAL;
+ local->op_ret = -1;
+ local->op_errno = EINVAL;
- goto unlock;
- }
+ goto unlock;
+ }
- layout = dht_layout_get (this, inode);
-
- is_dir = check_is_dir (inode, stbuf, xattr);
- is_linkfile = check_is_linkfile (inode, stbuf, xattr);
-
- if (is_linkfile) {
- gf_log (this->name, GF_LOG_DEBUG,
- "linkfile found in revalidate for %s",
- local->loc.path);
- local->layout_mismatch = 1;
-
- goto unlock;
- }
+ layout = local->layout;
- if (is_dir) {
- ret = dht_layout_dir_mismatch (this, layout,
- prev->this, &local->loc,
- xattr);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "mismatching layouts for %s",
- local->loc.path);
-
- local->layout_mismatch = 1;
-
- goto unlock;
- }
- }
-
- dht_stat_merge (this, &local->stbuf, stbuf, prev->this);
-
- local->op_ret = 0;
- local->stbuf.st_ino = local->st_ino;
-
- if (!local->xattr)
- local->xattr = dict_ref (xattr);
- }
-unlock:
- UNLOCK (&frame->lock);
+ is_dir = check_is_dir (inode, stbuf, xattr);
+ is_linkfile = check_is_linkfile (inode, stbuf, xattr,
+ conf->link_xattr_name);
+
+ if (is_linkfile) {
+ gf_log (this->name, GF_LOG_INFO,
+ "linkfile found in revalidate for %s",
+ local->loc.path);
+ local->return_estale = 1;
+
+ goto unlock;
+ }
+
+ if (is_dir) {
+ ret = dht_dir_has_layout (xattr, conf->xattr_name);
+ if (ret >= 0) {
+ if (is_greater_time(local->stbuf.ia_ctime,
+ local->stbuf.ia_ctime_nsec,
+ stbuf->ia_ctime,
+ stbuf->ia_ctime_nsec)) {
+ local->prebuf.ia_gid = stbuf->ia_gid;
+ local->prebuf.ia_uid = stbuf->ia_uid;
+ }
+ }
+ if (local->stbuf.ia_type != IA_INVAL)
+ {
+ if ((local->stbuf.ia_gid != stbuf->ia_gid) ||
+ (local->stbuf.ia_uid != stbuf->ia_uid)) {
+ local->need_selfheal = 1;
+ }
+ }
+ ret = dht_layout_dir_mismatch (this, layout,
+ prev->this, &local->loc,
+ xattr);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "mismatching layouts for %s",
+ local->loc.path);
+
+ local->layout_mismatch = 1;
+
+ goto unlock;
+ }
+ }
+ dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
+ dht_iatt_merge (this, &local->postparent, postparent,
+ prev->this);
+
+ local->op_ret = 0;
+
+ if (!local->xattr) {
+ local->xattr = dict_ref (xattr);
+ } else if (is_dir) {
+ dht_aggregate_xattr (local->xattr, xattr);
+ }
+ }
+unlock:
+ UNLOCK (&frame->lock);
+out:
this_call_cnt = dht_frame_return (frame);
if (is_last_call (this_call_cnt)) {
- if (!S_ISDIR (local->stbuf.st_mode)
- && (local->hashed_subvol != local->cached_subvol)
- && (local->stbuf.st_nlink == 1)
- && (conf->unhashed_sticky_bit)) {
- local->stbuf.st_mode |= S_ISVTX;
- }
-
+ if (!IA_ISDIR (local->stbuf.ia_type)
+ && (local->hashed_subvol != local->cached_subvol)
+ && (local->stbuf.ia_nlink == 1)
+ && (conf && conf->unhashed_sticky_bit)) {
+ local->stbuf.ia_prot.sticky = 1;
+ }
+ if (local->need_selfheal) {
+ local->need_selfheal = 0;
+ uuid_copy (local->gfid, local->stbuf.ia_gfid);
+ local->stbuf.ia_gid = local->prebuf.ia_gid;
+ local->stbuf.ia_uid = local->prebuf.ia_uid;
+ copy = create_frame (this, this->ctx->pool);
+ if (copy) {
+ copy_local = dht_local_init (copy, &local->loc,
+ NULL, 0);
+ if (!copy_local)
+ goto cont;
+ copy_local->stbuf = local->stbuf;
+ copy->local = copy_local;
+ FRAME_SU_DO (copy, dht_local_t);
+ ret = synctask_new (this->ctx->env,
+ dht_dir_attr_heal,
+ dht_dir_attr_heal_done,
+ copy, copy);
+ }
+ }
+cont:
if (local->layout_mismatch) {
- local->op_ret = -1;
- local->op_errno = ESTALE;
- }
-
- DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno,
- local->inode, &local->stbuf, local->xattr);
- }
+ /* Found layout mismatch in the directory, need to
+ fix this in the inode context */
+ dht_layout_unref (this, local->layout);
+ local->layout = NULL;
+ dht_lookup_directory (frame, this, &local->loc);
+ return 0;
+ }
- return 0;
+ if (local->need_lookup_everywhere) {
+ /* As the current layout gave ENOENT error, we would
+ need a new layout */
+ dht_layout_unref (this, local->layout);
+ local->layout = NULL;
+
+ /* We know that current cached subvol is no more
+ valid, get the new one */
+ local->cached_subvol = NULL;
+ dht_lookup_everywhere (frame, this, &local->loc);
+ return 0;
+ }
+ if (local->return_estale) {
+ local->op_ret = -1;
+ local->op_errno = ESTALE;
+ }
+
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->postparent, 1);
+ }
+
+ DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
+ DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf, local->xattr,
+ &local->postparent);
+ }
+
+err:
+ return ret;
}
int
dht_lookup_linkfile_create_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct stat *stbuf)
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- xlator_t *cached_subvol = NULL;
- dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *cached_subvol = NULL;
+ dht_conf_t *conf = NULL;
int ret = -1;
- local = frame->local;
- cached_subvol = local->cached_subvol;
- conf = this->private;
+ GF_VALIDATE_OR_GOTO ("dht", frame, out);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", this->private, out);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, out);
- ret = dht_layout_inode_set (this, local->cached_subvol, inode);
+ local = frame->local;
+ cached_subvol = local->cached_subvol;
+ conf = this->private;
+
+ ret = dht_layout_preset (this, local->cached_subvol, inode);
if (ret < 0) {
gf_log (this->name, GF_LOG_DEBUG,
"failed to set layout for subvolume %s",
@@ -329,65 +760,232 @@ dht_lookup_linkfile_create_cbk (call_frame_t *frame, void *cookie,
goto unwind;
}
- local->op_ret = 0;
- if ((local->stbuf.st_nlink == 1)
- && (conf->unhashed_sticky_bit)) {
- local->stbuf.st_mode |= S_ISVTX;
- }
+ local->op_ret = 0;
+ if ((local->stbuf.ia_nlink == 1)
+ && (conf && conf->unhashed_sticky_bit)) {
+ local->stbuf.ia_prot.sticky = 1;
+ }
+
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ postparent, 1);
+ }
unwind:
- DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno,
- local->inode, &local->stbuf, local->xattr);
- return 0;
+ if (local->linked == _gf_true)
+ dht_linkfile_attr_heal (frame, this);
+
+ DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
+ DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf, local->xattr,
+ &local->postparent);
+out:
+ return ret;
+}
+
+
+int
+dht_lookup_everywhere_done (call_frame_t *frame, xlator_t *this)
+{
+ int ret = 0;
+ dht_local_t *local = NULL;
+ xlator_t *hashed_subvol = NULL;
+ xlator_t *cached_subvol = NULL;
+ dht_layout_t *layout = NULL;
+
+ local = frame->local;
+ hashed_subvol = local->hashed_subvol;
+ cached_subvol = local->cached_subvol;
+
+ if (local->file_count && local->dir_count) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "path %s exists as a file on one subvolume "
+ "and directory on another. "
+ "Please fix it manually",
+ local->loc.path);
+ DHT_STACK_UNWIND (lookup, frame, -1, EIO, NULL, NULL, NULL,
+ NULL);
+ return 0;
+ }
+
+ if (local->dir_count) {
+ dht_lookup_directory (frame, this, &local->loc);
+ return 0;
+ }
+
+ if (!cached_subvol) {
+ DHT_STACK_UNWIND (lookup, frame, -1, ENOENT, NULL, NULL, NULL,
+ NULL);
+ return 0;
+ }
+
+ if (local->need_lookup_everywhere) {
+ if (uuid_compare (local->gfid, local->inode->gfid)) {
+ /* GFID different, return error */
+ DHT_STACK_UNWIND (lookup, frame, -1, ENOENT, NULL,
+ NULL, NULL, NULL);
+ return 0;
+ }
+ local->op_ret = 0;
+ local->op_errno = 0;
+ layout = dht_layout_for_subvol (this, cached_subvol);
+ if (!layout) {
+ gf_log (this->name, GF_LOG_INFO,
+ "%s: no pre-set layout for subvolume %s",
+ local->loc.path, (cached_subvol ?
+ cached_subvol->name :
+ "<nil>"));
+ }
+
+ ret = dht_layout_set (this, local->inode, layout);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "%s: failed to set layout for subvol %s",
+ local->loc.path, (cached_subvol ?
+ cached_subvol->name :
+ "<nil>"));
+ }
+
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->postparent, 1);
+ }
+
+ DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
+ DHT_STACK_UNWIND (lookup, frame, local->op_ret,
+ local->op_errno, local->inode,
+ &local->stbuf, local->xattr,
+ &local->postparent);
+ return 0;
+ }
+
+ if (!hashed_subvol) {
+ gf_log (this->name, GF_LOG_INFO,
+ "cannot create linkfile file for %s on %s: "
+ "hashed subvolume cannot be found.",
+ local->loc.path, cached_subvol->name);
+
+ local->op_ret = 0;
+ local->op_errno = 0;
+
+ ret = dht_layout_preset (frame->this, cached_subvol,
+ local->inode);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "failed to set layout for subvol %s",
+ cached_subvol ? cached_subvol->name :
+ "<nil>");
+ local->op_ret = -1;
+ local->op_errno = EINVAL;
+ }
+
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->postparent, 1);
+ }
+
+ DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
+ DHT_STACK_UNWIND (lookup, frame, local->op_ret,
+ local->op_errno, local->inode,
+ &local->stbuf, local->xattr,
+ &local->postparent);
+ return 0;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "linking file %s existing on %s to %s (hash)",
+ local->loc.path, cached_subvol->name,
+ hashed_subvol->name);
+
+ ret = dht_linkfile_create (frame,
+ dht_lookup_linkfile_create_cbk, this,
+ cached_subvol, hashed_subvol, &local->loc);
+
+ return ret;
+}
+
+
+int
+dht_lookup_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ int this_call_cnt = 0;
+
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ dht_lookup_everywhere_done (frame, this);
+ }
+
+ return 0;
}
int
dht_lookup_everywhere_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct stat *buf, dict_t *xattr)
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf, dict_t *xattr,
+ struct iatt *postparent)
{
- dht_conf_t *conf = NULL;
dht_local_t *local = NULL;
int this_call_cnt = 0;
call_frame_t *prev = NULL;
- int is_linkfile = 0;
- int is_dir = 0;
- xlator_t *subvol = NULL;
- loc_t *loc = NULL;
- xlator_t *link_subvol = NULL;
- xlator_t *hashed_subvol = NULL;
- xlator_t *cached_subvol = NULL;
- int ret = -1;
+ int is_linkfile = 0;
+ int is_dir = 0;
+ xlator_t *subvol = NULL;
+ loc_t *loc = NULL;
+ xlator_t *link_subvol = NULL;
+ int ret = -1;
+ int32_t fd_count = 0;
+ dht_conf_t *conf = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, out);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+ GF_VALIDATE_OR_GOTO ("dht", this->private, out);
+
+ local = frame->local;
+ loc = &local->loc;
+ conf = this->private;
- conf = this->private;
+ prev = cookie;
+ subvol = prev->this;
- local = frame->local;
- loc = &local->loc;
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ if (op_errno != ENOENT)
+ local->op_errno = op_errno;
+ goto unlock;
+ }
- prev = cookie;
- subvol = prev->this;
+ if (uuid_is_null (local->gfid))
+ uuid_copy (local->gfid, buf->ia_gfid);
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- if (op_errno != ENOENT)
- local->op_errno = op_errno;
- goto unlock;
- }
+ if (uuid_compare (local->gfid, buf->ia_gfid)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: gfid differs on subvolume %s",
+ loc->path, prev->this->name);
+ }
- is_linkfile = check_is_linkfile (inode, buf, xattr);
- is_dir = check_is_dir (inode, buf, xattr);
-
- if (is_linkfile) {
- link_subvol = dht_linkfile_subvol (this, inode, buf,
- xattr);
- gf_log (this->name, GF_LOG_DEBUG,
- "found on %s linkfile %s (-> %s)",
- subvol->name, loc->path,
- link_subvol ? link_subvol->name : "''");
- goto unlock;
- }
+ is_linkfile = check_is_linkfile (inode, buf, xattr,
+ conf->link_xattr_name);
+ is_dir = check_is_dir (inode, buf, xattr);
+
+ if (is_linkfile) {
+ link_subvol = dht_linkfile_subvol (this, inode, buf,
+ xattr);
+ gf_log (this->name, GF_LOG_DEBUG,
+ "found on %s linkfile %s (-> %s)",
+ subvol->name, loc->path,
+ link_subvol ? link_subvol->name : "''");
+ goto unlock;
+ }
+
+ /* non linkfile GFID takes precedence */
+ uuid_copy (local->gfid, buf->ia_gfid);
if (is_dir) {
local->dir_count++;
@@ -400,189 +998,183 @@ dht_lookup_everywhere_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (!local->cached_subvol) {
/* found one file */
- dht_stat_merge (this, &local->stbuf, buf,
+ dht_iatt_merge (this, &local->stbuf, buf,
subvol);
local->xattr = dict_ref (xattr);
local->cached_subvol = subvol;
gf_log (this->name, GF_LOG_DEBUG,
"found on %s file %s",
subvol->name, loc->path);
+
+ dht_iatt_merge (this, &local->postparent,
+ postparent, subvol);
} else {
- gf_log (this->name, GF_LOG_DEBUG,
+ /* This is where we need 'rename' both entries logic */
+ gf_log (this->name, GF_LOG_WARNING,
"multiple subvolumes (%s and %s) have "
- "file %s", local->cached_subvol->name,
+ "file %s (preferably rename the file "
+ "in the backend, and do a fresh lookup)",
+ local->cached_subvol->name,
subvol->name, local->loc.path);
}
}
- }
+ }
unlock:
- UNLOCK (&frame->lock);
-
- if (is_linkfile) {
- gf_log (this->name, GF_LOG_DEBUG,
- "deleting stale linkfile %s on %s",
- loc->path, subvol->name);
- dht_linkfile_unlink (frame, this, subvol, loc);
- }
-
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- hashed_subvol = local->hashed_subvol;
- cached_subvol = local->cached_subvol;
-
- if (local->file_count && local->dir_count) {
- gf_log (this->name, GF_LOG_ERROR,
- "path %s exists as a file on one subvolume "
- "and directory on another. "
- "Please fix it manually",
- loc->path);
- DHT_STACK_UNWIND (frame, -1, EIO, NULL, NULL, NULL);
- return 0;
- }
-
- if (local->dir_count) {
- dht_lookup_directory (frame, this, &local->loc);
- return 0;
- }
-
- if (!cached_subvol) {
- DHT_STACK_UNWIND (frame, -1, ENOENT, NULL, NULL, NULL);
- return 0;
- }
-
- if (!hashed_subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "cannot create linkfile file for %s on %s: "
- "hashed subvolume cannot be found.",
- loc->path, cached_subvol->name);
-
- local->op_ret = 0;
- local->op_errno = 0;
-
- ret = dht_layout_inode_set (frame->this, cached_subvol,
- local->inode);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to set layout for subvol %s",
- cached_subvol ? cached_subvol->name :
- "<nil>");
- local->op_ret = -1;
- local->op_errno = EINVAL;
- }
+ UNLOCK (&frame->lock);
- DHT_STACK_UNWIND (frame, local->op_ret,
- local->op_errno, local->inode,
- &local->stbuf, local->xattr);
+ if (is_linkfile) {
+ ret = dict_get_int32 (xattr, GLUSTERFS_OPEN_FD_COUNT, &fd_count);
+ /* Delete the linkfile only if there are no open fds on it.
+ if there is a open-fd, it may be in migration */
+ if (!ret && (fd_count == 0)) {
+ gf_log (this->name, GF_LOG_INFO,
+ "deleting stale linkfile %s on %s",
+ loc->path, subvol->name);
+ STACK_WIND (frame, dht_lookup_unlink_cbk,
+ subvol, subvol->fops->unlink, loc, 0, NULL);
return 0;
}
+ }
- gf_log (this->name, GF_LOG_DEBUG,
- "linking file %s existing on %s to %s (hash)",
- loc->path, cached_subvol->name,
- hashed_subvol->name);
-
- dht_linkfile_create (frame,
- dht_lookup_linkfile_create_cbk,
- cached_subvol, hashed_subvol, loc);
- }
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ dht_lookup_everywhere_done (frame, this);
+ }
- return 0;
+out:
+ return ret;
}
int
dht_lookup_everywhere (call_frame_t *frame, xlator_t *this, loc_t *loc)
{
- dht_conf_t *conf = NULL;
- dht_local_t *local = NULL;
- int i = 0;
- int call_cnt = 0;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ int i = 0;
+ int call_cnt = 0;
- conf = this->private;
- local = frame->local;
+ GF_VALIDATE_OR_GOTO ("dht", frame, err);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", this->private, out);
+ GF_VALIDATE_OR_GOTO ("dht", loc, out);
- call_cnt = conf->subvolume_cnt;
- local->call_cnt = call_cnt;
+ conf = this->private;
+ local = frame->local;
- if (!local->inode)
- local->inode = inode_ref (loc->inode);
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND (frame, dht_lookup_everywhere_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->lookup,
- loc, local->xattr_req);
- }
+ if (!local->inode)
+ local->inode = inode_ref (loc->inode);
- return 0;
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND (frame, dht_lookup_everywhere_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup,
+ loc, local->xattr_req);
+ }
+
+ return 0;
+out:
+ DHT_STACK_UNWIND (lookup, frame, -1, EINVAL, NULL, NULL, NULL, NULL);
+err:
+ return -1;
}
int
dht_lookup_linkfile_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int op_ret, int op_errno,
- inode_t *inode, struct stat *stbuf, dict_t *xattr)
+ inode_t *inode, struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent)
{
- call_frame_t *prev = NULL;
- dht_local_t *local = NULL;
- dht_layout_t *layout = NULL;
- xlator_t *subvol = NULL;
- loc_t *loc = NULL;
- dht_conf_t *conf = NULL;
+ call_frame_t *prev = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ loc_t *loc = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, out);
+ GF_VALIDATE_OR_GOTO ("dht", this, unwind);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, unwind);
+ GF_VALIDATE_OR_GOTO ("dht", this->private, unwind);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, unwind);
prev = cookie;
- subvol = prev->this;
- conf = this->private;
- local = frame->local;
- loc = &local->loc;
+ subvol = prev->this;
+ conf = this->private;
+ local = frame->local;
+ loc = &local->loc;
if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "lookup of %s on %s (following linkfile) failed (%s)",
- local->loc.path, subvol->name, strerror (op_errno));
- goto err;
- }
+ gf_log (this->name, GF_LOG_INFO,
+ "lookup of %s on %s (following linkfile) failed (%s)",
+ local->loc.path, subvol->name, strerror (op_errno));
+
+ /* If cached subvol returned ENOTCONN, do not do
+ lookup_everywhere. We need to make sure linkfile does not get
+ removed, which can take away the namespace, and subvol is
+ anyways down. */
+
+ if (op_errno != ENOTCONN)
+ goto err;
+ else
+ goto unwind;
+ }
if (check_is_dir (inode, stbuf, xattr)) {
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_INFO,
"lookup of %s on %s (following linkfile) reached dir",
local->loc.path, subvol->name);
goto err;
}
- if (check_is_linkfile (inode, stbuf, xattr)) {
- gf_log (this->name, GF_LOG_DEBUG,
+ if (check_is_linkfile (inode, stbuf, xattr, conf->link_xattr_name)) {
+ gf_log (this->name, GF_LOG_INFO,
"lookup of %s on %s (following linkfile) reached link",
local->loc.path, subvol->name);
goto err;
}
- if ((stbuf->st_nlink == 1)
- && (conf->unhashed_sticky_bit)) {
- stbuf->st_mode |= S_ISVTX;
- }
- dht_itransform (this, prev->this, stbuf->st_ino, &stbuf->st_ino);
-
- layout = dht_layout_for_subvol (this, prev->this);
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no pre-set layout for subvolume %s",
- prev->this->name);
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
+ if (uuid_compare (local->gfid, stbuf->ia_gfid)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: gfid different on data file on %s",
+ local->loc.path, subvol->name);
+ goto err;
+ }
- inode_ctx_put (inode, this, (uint64_t)(long)layout);
+ if ((stbuf->ia_nlink == 1)
+ && (conf && conf->unhashed_sticky_bit)) {
+ stbuf->ia_prot.sticky = 1;
+ }
-out:
- DHT_STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf, xattr);
+ ret = dht_layout_preset (this, prev->this, inode);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "failed to set layout for subvolume %s",
+ prev->this->name);
+ op_ret = -1;
+ op_errno = EINVAL;
+ }
+
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ postparent, 1);
+ }
+
+unwind:
+ DHT_STRIP_PHASE1_FLAGS (stbuf);
+ DHT_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, stbuf, xattr,
+ postparent);
return 0;
err:
dht_lookup_everywhere (frame, this, loc);
-
+out:
return 0;
}
@@ -594,21 +1186,38 @@ dht_lookup_directory (call_frame_t *frame, xlator_t *this, loc_t *loc)
int i = 0;
dht_conf_t *conf = NULL;
dht_local_t *local = NULL;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, out);
+ GF_VALIDATE_OR_GOTO ("dht", this, unwind);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, unwind);
+ GF_VALIDATE_OR_GOTO ("dht", this->private, unwind);
+ GF_VALIDATE_OR_GOTO ("dht", loc, unwind);
conf = this->private;
local = frame->local;
call_cnt = conf->subvolume_cnt;
local->call_cnt = call_cnt;
-
+
local->layout = dht_layout_new (this, conf->subvolume_cnt);
if (!local->layout) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- DHT_STACK_UNWIND (frame, -1, ENOMEM, NULL, NULL, NULL);
- return 0;
+ goto unwind;
+ }
+
+ if (local->xattr != NULL) {
+ dict_unref (local->xattr);
+ local->xattr = NULL;
}
-
+
+ if (!uuid_is_null (local->gfid)) {
+ ret = dict_set_static_bin (local->xattr_req, "gfid-req",
+ local->gfid, 16);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to set gfid", local->loc.path);
+ }
+
for (i = 0; i < call_cnt; i++) {
STACK_WIND (frame, dht_lookup_dir_cbk,
conf->subvolumes[i],
@@ -616,23 +1225,35 @@ dht_lookup_directory (call_frame_t *frame, xlator_t *this, loc_t *loc)
&local->loc, local->xattr_req);
}
return 0;
+unwind:
+ DHT_STACK_UNWIND (lookup, frame, -1, ENOMEM, NULL, NULL, NULL, NULL);
+out:
+ return 0;
+
}
int
dht_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno,
- inode_t *inode, struct stat *stbuf, dict_t *xattr)
+ inode_t *inode, struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent)
{
- dht_layout_t *layout = NULL;
- char is_linkfile = 0;
- char is_dir = 0;
- xlator_t *subvol = NULL;
- dht_conf_t *conf = NULL;
- dht_local_t *local = NULL;
- loc_t *loc = NULL;
- call_frame_t *prev = NULL;
+ char is_linkfile = 0;
+ char is_dir = 0;
+ xlator_t *subvol = NULL;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ loc_t *loc = NULL;
+ call_frame_t *prev = NULL;
+ int ret = 0;
+ dht_layout_t *parent_layout = NULL;
+ GF_VALIDATE_OR_GOTO ("dht", frame, err);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+ GF_VALIDATE_OR_GOTO ("dht", this->private, out);
conf = this->private;
@@ -640,76 +1261,140 @@ dht_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
loc = &local->loc;
- if (ENTRY_MISSING (op_ret, op_errno)) {
- if (conf->search_unhashed) {
- local->op_errno = ENOENT;
- dht_lookup_everywhere (frame, this, loc);
- return 0;
- }
- }
+ /* This is required for handling stale linkfile deletion,
+ * or any more call which happens from this 'loc'.
+ */
+ if (!op_ret && uuid_is_null (local->gfid))
+ memcpy (local->gfid, stbuf->ia_gfid, 16);
+
+ if (ENTRY_MISSING (op_ret, op_errno)) {
+ gf_log (this->name, GF_LOG_TRACE, "Entry %s missing on subvol"
+ " %s", loc->path, prev->this->name);
+ if (conf->search_unhashed == GF_DHT_LOOKUP_UNHASHED_ON) {
+ local->op_errno = ENOENT;
+ dht_lookup_everywhere (frame, this, loc);
+ return 0;
+ }
+ if ((conf->search_unhashed == GF_DHT_LOOKUP_UNHASHED_AUTO) &&
+ (loc->parent)) {
+ ret = dht_inode_ctx_layout_get (loc->parent, this,
+ &parent_layout);
+ if (ret || !parent_layout)
+ goto out;
+ if (parent_layout->search_unhashed) {
+ local->op_errno = ENOENT;
+ dht_lookup_everywhere (frame, this, loc);
+ return 0;
+ }
+ }
+ }
- if (op_ret == 0) {
- is_dir = check_is_dir (inode, stbuf, xattr);
- if (is_dir) {
- local->inode = inode_ref (inode);
- local->xattr = dict_ref (xattr);
- }
- }
+ if (op_ret == 0) {
+ is_dir = check_is_dir (inode, stbuf, xattr);
+ if (is_dir) {
+ local->inode = inode_ref (inode);
+ local->xattr = dict_ref (xattr);
+ }
+ }
- if (is_dir || (op_ret == -1 && op_errno == ENOTCONN)) {
+ if (is_dir || (op_ret == -1 && op_errno == ENOTCONN)) {
dht_lookup_directory (frame, this, &local->loc);
return 0;
- }
-
- if (op_ret == -1)
+ }
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG, "Lookup of %s for subvolume"
+ " %s failed with error %s", loc->path, prev->this->name,
+ strerror (op_errno));
goto out;
+ }
- is_linkfile = check_is_linkfile (inode, stbuf, xattr);
- is_dir = check_is_dir (inode, stbuf, xattr);
+ is_linkfile = check_is_linkfile (inode, stbuf, xattr,
+ conf->link_xattr_name);
- if (!is_dir && !is_linkfile) {
+ if (!is_linkfile) {
/* non-directory and not a linkfile */
- dht_itransform (this, prev->this, stbuf->st_ino,
- &stbuf->st_ino);
-
- layout = dht_layout_for_subvol (this, prev->this);
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no pre-set layout for subvolume %s",
- prev->this->name);
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
-
- inode_ctx_put (inode, this, (uint64_t)(long)layout);
- goto out;
- }
-
- if (is_linkfile) {
- subvol = dht_linkfile_subvol (this, inode, stbuf, xattr);
-
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "linkfile not having link subvolume. path=%s",
- loc->path);
- dht_lookup_everywhere (frame, this, loc);
- return 0;
+ ret = dht_layout_preset (this, prev->this, inode);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "could not set pre-set layout for subvolume %s",
+ prev->this->name);
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
}
+ goto out;
+ }
- STACK_WIND (frame, dht_lookup_linkfile_cbk,
- subvol, subvol->fops->lookup,
- &local->loc, local->xattr_req);
+ subvol = dht_linkfile_subvol (this, inode, stbuf, xattr);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "linkfile not having link subvolume. path=%s",
+ loc->path);
+ dht_lookup_everywhere (frame, this, loc);
+ return 0;
}
+ STACK_WIND (frame, dht_lookup_linkfile_cbk,
+ subvol, subvol->fops->lookup,
+ &local->loc, local->xattr_req);
+
return 0;
out:
- DHT_STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf, xattr);
+ /*
+ * FIXME: postparent->ia_size and postparent->st_blocks do not have
+ * correct values. since, postparent corresponds to a directory these
+ * two members should have values equal to sum of corresponding values
+ * from each of the subvolume. See dht_iatt_merge for reference.
+ */
+
+ if (!op_ret && local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ postparent, 1);
+ }
+
+ DHT_STRIP_PHASE1_FLAGS (stbuf);
+ DHT_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, stbuf, xattr,
+ postparent);
+err:
return 0;
}
+/* For directories, check if acl xattrs have been requested (by the acl xlator),
+ * if not, request for them. These xattrs are needed for dht dir self-heal to
+ * perform proper self-healing of dirs
+ */
+void
+dht_check_and_set_acl_xattr_req (inode_t *inode, dict_t *xattr_req)
+{
+ int ret = 0;
+
+ GF_ASSERT (inode);
+ GF_ASSERT (xattr_req);
+
+ if (inode->ia_type != IA_IFDIR)
+ return;
+
+ if (!dict_get (xattr_req, POSIX_ACL_ACCESS_XATTR)) {
+ ret = dict_set_int8 (xattr_req, POSIX_ACL_ACCESS_XATTR, 0);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set key %s",
+ POSIX_ACL_ACCESS_XATTR);
+ }
+
+ if (!dict_get (xattr_req, POSIX_ACL_DEFAULT_XATTR)) {
+ ret = dict_set_int8 (xattr_req, POSIX_ACL_DEFAULT_XATTR, 0);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set key %s",
+ POSIX_ACL_DEFAULT_XATTR);
+ }
+
+ return;
+}
int
dht_lookup (call_frame_t *frame, xlator_t *this,
@@ -717,56 +1402,68 @@ dht_lookup (call_frame_t *frame, xlator_t *this,
{
xlator_t *subvol = NULL;
xlator_t *hashed_subvol = NULL;
- xlator_t *cached_subvol = NULL;
dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
+ dht_conf_t *conf = NULL;
int ret = -1;
int op_errno = -1;
- dht_layout_t *layout = NULL;
- int i = 0;
- int call_cnt = 0;
-
+ dht_layout_t *layout = NULL;
+ int i = 0;
+ int call_cnt = 0;
+ loc_t new_loc = {0,};
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (loc, err);
VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
- conf = this->private;
-
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ conf = this->private;
+ if (!conf)
+ goto err;
- ret = loc_dup (loc, &local->loc);
- if (ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "copying location failed for path=%s",
- loc->path);
+ local = dht_local_init (frame, loc, NULL, GF_FOP_LOOKUP);
+ if (!local) {
+ op_errno = ENOMEM;
goto err;
}
-
- if (xattr_req) {
- local->xattr_req = dict_ref (xattr_req);
- } else {
- local->xattr_req = dict_new ();
- }
- hashed_subvol = dht_subvol_get_hashed (this, loc);
- cached_subvol = dht_subvol_get_cached (this, loc->inode);
+ ret = dht_filter_loc_subvol_key (this, loc, &new_loc,
+ &hashed_subvol);
+ if (ret) {
+ loc_wipe (&local->loc);
+ ret = loc_dup (&new_loc, &local->loc);
- local->cached_subvol = cached_subvol;
- local->hashed_subvol = hashed_subvol;
+ /* we no more need 'new_loc' entries */
+ loc_wipe (&new_loc);
- if (is_revalidate (loc)) {
- layout = dht_layout_get (this, loc->inode);
+ /* check if loc_dup() is successful */
+ if (ret == -1) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "copying location failed for path=%s",
+ loc->path);
+ goto err;
+ }
+ }
+
+ if (xattr_req) {
+ local->xattr_req = dict_ref (xattr_req);
+ } else {
+ local->xattr_req = dict_new ();
+ }
+
+ if (uuid_is_null (loc->pargfid) && !uuid_is_null (loc->gfid) &&
+ !__is_root_gfid (loc->inode->gfid)) {
+ local->cached_subvol = NULL;
+ dht_discover (frame, this, loc);
+ return 0;
+ }
+
+ if (!hashed_subvol)
+ hashed_subvol = dht_subvol_get_hashed (this, loc);
+ local->hashed_subvol = hashed_subvol;
+ if (is_revalidate (loc)) {
+ layout = local->layout;
if (!layout) {
gf_log (this->name, GF_LOG_DEBUG,
"revalidate without cache. path=%s",
@@ -775,67 +1472,93 @@ dht_lookup (call_frame_t *frame, xlator_t *this,
goto err;
}
- if (layout->gen && (layout->gen < conf->gen)) {
- gf_log (this->name, GF_LOG_TRACE,
- "incomplete layout failure for path=%s",
- loc->path);
- op_errno = ESTALE;
- goto err;
- }
+ if (layout->gen && (layout->gen < conf->gen)) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "incomplete layout failure for path=%s",
+ loc->path);
- local->inode = inode_ref (loc->inode);
- local->st_ino = loc->inode->ino;
-
- local->call_cnt = layout->cnt;
- call_cnt = local->call_cnt;
-
- /* NOTE: we don't require 'trusted.glusterfs.dht.linkto' attribute,
- * revalidates directly go to the cached-subvolume.
- */
- ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht", 4 * 4);
-
- for (i = 0; i < layout->cnt; i++) {
+ dht_layout_unref (this, local->layout);
+ local->layout = NULL;
+ local->cached_subvol = NULL;
+ goto do_fresh_lookup;
+ }
+
+ local->inode = inode_ref (loc->inode);
+
+ /* NOTE: we don't require 'trusted.glusterfs.dht.linkto' attribute,
+ * revalidates directly go to the cached-subvolume.
+ */
+ ret = dict_set_uint32 (local->xattr_req,
+ conf->xattr_name, 4 * 4);
+
+ if (IA_ISDIR (local->inode->ia_type)) {
+ local->call_cnt = call_cnt = conf->subvolume_cnt;
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND (frame, dht_revalidate_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup,
+ loc, local->xattr_req);
+ }
+ return 0;
+ }
+
+ call_cnt = local->call_cnt = layout->cnt;
+
+ /* need it for self-healing linkfiles which is
+ 'in-migration' state */
+ ret = dict_set_uint32 (local->xattr_req,
+ GLUSTERFS_OPEN_FD_COUNT, 4);
+
+ /* need it for dir self-heal */
+ dht_check_and_set_acl_xattr_req (loc->inode, local->xattr_req);
+
+ for (i = 0; i < call_cnt; i++) {
subvol = layout->list[i].xlator;
-
+
STACK_WIND (frame, dht_revalidate_cbk,
subvol, subvol->fops->lookup,
- loc, local->xattr_req);
+ &local->loc, local->xattr_req);
- if (!--call_cnt)
- break;
}
} else {
- /* TODO: remove the hard-coding */
- ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht", 4 * 4);
+ do_fresh_lookup:
+ /* TODO: remove the hard-coding */
+ ret = dict_set_uint32 (local->xattr_req,
+ conf->xattr_name, 4 * 4);
+
+ ret = dict_set_uint32 (local->xattr_req,
+ conf->link_xattr_name, 256);
- ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht.linkto", 256);
+ /* need it for self-healing linkfiles which is
+ 'in-migration' state */
+ ret = dict_set_uint32 (local->xattr_req,
+ GLUSTERFS_OPEN_FD_COUNT, 4);
+
+ /* need it for dir self-heal */
+ dht_check_and_set_acl_xattr_req (loc->inode, local->xattr_req);
if (!hashed_subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s, "
- "checking on all the subvols to see if "
- "it is a directory", loc->path);
- call_cnt = conf->subvolume_cnt;
- local->call_cnt = call_cnt;
-
- local->layout = dht_layout_new (this, conf->subvolume_cnt);
- if (!local->layout) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND (frame, dht_lookup_dir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->lookup,
- &local->loc, local->xattr_req);
- }
- return 0;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no subvolume in layout for path=%s, "
+ "checking on all the subvols to see if "
+ "it is a directory", loc->path);
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
+
+ local->layout = dht_layout_new (this,
+ conf->subvolume_cnt);
+ if (!local->layout) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND (frame, dht_lookup_dir_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup,
+ &local->loc, local->xattr_req);
+ }
+ return 0;
}
STACK_WIND (frame, dht_lookup_cbk,
@@ -846,1313 +1569,1660 @@ dht_lookup (call_frame_t *frame, xlator_t *this,
return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL);
return 0;
}
int
-dht_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct stat *stbuf)
+dht_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ local = frame->local;
+ prev = cookie;
- local = frame->local;
- prev = cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto unlock;
- }
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+ goto unlock;
+ }
- dht_stat_merge (this, &local->stbuf, stbuf, prev->this);
-
- if (local->inode)
- local->stbuf.st_ino = local->inode->ino;
- local->op_ret = 0;
- }
+ local->op_ret = 0;
+
+ local->postparent = *postparent;
+ local->preparent = *preparent;
+
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->preparent, 0);
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->postparent, 1);
+ }
+ }
unlock:
- UNLOCK (&frame->lock);
+ UNLOCK (&frame->lock);
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt))
- DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->stbuf);
+ DHT_STACK_UNWIND (unlink, frame, local->op_ret, local->op_errno,
+ &local->preparent, &local->postparent, NULL);
return 0;
}
int
-dht_stat (call_frame_t *frame, xlator_t *this,
- loc_t *loc)
+dht_unlink_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
- dht_layout_t *layout = NULL;
- int i = 0;
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ xlator_t *cached_subvol = NULL;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
+ local = frame->local;
+ prev = cookie;
- layout = dht_layout_get (this, loc->inode);
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no layout for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ LOCK (&frame->lock);
+ {
+ if ((op_ret == -1) && !((op_errno == ENOENT) ||
+ (op_errno == ENOTCONN))) {
+ local->op_errno = op_errno;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+ goto unlock;
+ }
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ local->op_ret = 0;
+ }
+unlock:
+ UNLOCK (&frame->lock);
- local->inode = inode_ref (loc->inode);
- local->call_cnt = layout->cnt;
+ if (local->op_ret == -1)
+ goto err;
- for (i = 0; i < layout->cnt; i++) {
- subvol = layout->list[i].xlator;
+ cached_subvol = dht_subvol_get_cached (this, local->loc.inode);
+ if (!cached_subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for path=%s",
+ local->loc.path);
+ local->op_errno = EINVAL;
+ goto err;
+ }
- STACK_WIND (frame, dht_attr_cbk,
- subvol, subvol->fops->stat,
- loc);
- }
+ STACK_WIND (frame, dht_unlink_cbk,
+ cached_subvol, cached_subvol->fops->unlink,
+ &local->loc, local->flags, NULL);
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
-
- return 0;
+ DHT_STACK_UNWIND (unlink, frame, -1, local->op_errno,
+ NULL, NULL, NULL);
+ return 0;
}
-
int
-dht_fstat (call_frame_t *frame, xlator_t *this,
- fd_t *fd)
+dht_err_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
- dht_layout_t *layout = NULL;
- int i = 0;
-
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- layout = dht_layout_get (this, fd->inode);
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no layout for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ local = frame->local;
+ prev = cookie;
- local->inode = inode_ref (fd->inode);
- local->call_cnt = layout->cnt;;
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+ goto unlock;
+ }
- for (i = 0; i < layout->cnt; i++) {
- subvol = layout->list[i].xlator;
- STACK_WIND (frame, dht_attr_cbk,
- subvol, subvol->fops->fstat,
- fd);
- }
+ local->op_ret = 0;
+ }
+unlock:
+ UNLOCK (&frame->lock);
- return 0;
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ DHT_STACK_UNWIND (setxattr, frame, local->op_ret,
+ local->op_errno, NULL);
+ }
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+ return 0;
+}
- return 0;
+static void
+fill_layout_info (dht_layout_t *layout, char *buf)
+{
+ int i = 0;
+ char tmp_buf[128] = {0,};
+
+ for (i = 0; i < layout->cnt; i++) {
+ snprintf (tmp_buf, 128, "(%s %u %u)",
+ layout->list[i].xlator->name,
+ layout->list[i].start,
+ layout->list[i].stop);
+ if (i)
+ strcat (buf, " ");
+ strcat (buf, tmp_buf);
+ }
}
+void
+dht_fill_pathinfo_xattr (xlator_t *this, dht_local_t *local,
+ char *xattr_buf, int32_t alloc_len,
+ int flag, char *layout_buf)
+{
+ if (flag && local->xattr_val)
+ snprintf (xattr_buf, alloc_len,
+ "((<"DHT_PATHINFO_HEADER"%s> %s) (%s-layout %s))",
+ this->name, local->xattr_val, this->name,
+ layout_buf);
+ else if (local->xattr_val)
+ snprintf (xattr_buf, alloc_len,
+ "(<"DHT_PATHINFO_HEADER"%s> %s)",
+ this->name, local->xattr_val);
+ else if (flag)
+ snprintf (xattr_buf, alloc_len, "(%s-layout %s)",
+ this->name, layout_buf);
+}
int
-dht_chmod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode)
+dht_vgetxattr_alloc_and_fill (dht_local_t *local, dict_t *xattr, xlator_t *this,
+ int op_errno)
{
- dht_layout_t *layout = NULL;
- dht_local_t *local = NULL;
- int op_errno = -1;
- int i = -1;
+ int ret = -1;
+ char *value = NULL;
+ int32_t plen = 0;
+ ret = dict_get_str (xattr, local->xsel, &value);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Subvolume %s returned -1 (%s)", this->name,
+ strerror (op_errno));
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ goto out;
+ }
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
+ local->alloc_len += strlen(value);
- layout = dht_layout_get (this, loc->inode);
+ if (!local->xattr_val) {
+ local->alloc_len += (strlen (DHT_PATHINFO_HEADER) + 10);
+ local->xattr_val = GF_CALLOC (local->alloc_len, sizeof (char),
+ gf_common_mt_char);
+ if (!local->xattr_val) {
+ ret = -1;
+ goto out;
+ }
+ }
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no layout for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ if (local->xattr_val) {
+ plen = strlen (local->xattr_val);
+ if (plen) {
+ /* extra byte(s) for \0 to be safe */
+ local->alloc_len += (plen + 2);
+ local->xattr_val = GF_REALLOC (local->xattr_val,
+ local->alloc_len);
+ if (!local->xattr_val) {
+ ret = -1;
+ goto out;
+ }
+ }
- if (!layout_is_sane (layout)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "layout is not sane for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ (void) strcat (local->xattr_val, value);
+ (void) strcat (local->xattr_val, " ");
+ local->op_ret = 0;
+ }
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_DEBUG,
- "memory allocation failed :(");
- goto err;
- }
+ ret = 0;
- local->inode = inode_ref (loc->inode);
- local->call_cnt = layout->cnt;
+ out:
+ return ret;
+}
- for (i = 0; i < layout->cnt; i++) {
- STACK_WIND (frame, dht_attr_cbk,
- layout->list[i].xlator,
- layout->list[i].xlator->fops->chmod,
- loc, mode);
- }
+int
+dht_vgetxattr_fill_and_set (dht_local_t *local, dict_t **dict, xlator_t *this,
+ gf_boolean_t flag)
+{
+ int ret = -1;
+ char *xattr_buf = NULL;
+ char layout_buf[8192] = {0,};
- return 0;
+ if (flag)
+ fill_layout_info (local->layout, layout_buf);
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+ *dict = dict_new ();
+ if (!*dict)
+ goto out;
- return 0;
-}
+ local->xattr_val[strlen (local->xattr_val) - 1] = '\0';
+
+ /* we would need max this many bytes to create xattr string
+ * extra 40 bytes is just an estimated amount of additional
+ * space required as we include translator name and some
+ * spaces, brackets etc. when forming the pathinfo string.
+ *
+ * For node-uuid we just don't have all the pretty formatting,
+ * but since this is a generic routine for pathinfo & node-uuid
+ * we dont have conditional space allocation and try to be
+ * generic
+ */
+ local->alloc_len += (2 * strlen (this->name))
+ + strlen (layout_buf)
+ + 40;
+ xattr_buf = GF_CALLOC (local->alloc_len, sizeof (char),
+ gf_common_mt_char);
+ if (!xattr_buf)
+ goto out;
+ if (XATTR_IS_PATHINFO (local->xsel)) {
+ (void) dht_fill_pathinfo_xattr (this, local, xattr_buf,
+ local->alloc_len, flag,
+ layout_buf);
+ } else if (XATTR_IS_NODE_UUID (local->xsel)) {
+ (void) snprintf (xattr_buf, local->alloc_len, "%s",
+ local->xattr_val);
+ } else {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Unknown local->xsel (%s)", local->xsel);
+ goto out;
+ }
+
+ ret = dict_set_dynstr (*dict, local->xsel, xattr_buf);
+ GF_FREE (local->xattr_val);
+
+ out:
+ return ret;
+}
int
-dht_chown (call_frame_t *frame, xlator_t *this,
- loc_t *loc, uid_t uid, gid_t gid)
+dht_vgetxattr_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
{
- dht_layout_t *layout = NULL;
- dht_local_t *local = NULL;
- int op_errno = -1;
- int i = -1;
+ int ret = 0;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ dict_t *dict = NULL;
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (frame->local, out);
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
+ local = frame->local;
- layout = dht_layout_get (this, loc->inode);
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no layout for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ LOCK (&frame->lock);
+ {
+ this_call_cnt = --local->call_cnt;
+ if (op_ret < 0) {
+ if (op_errno != ENOTCONN) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "getxattr err (%s) for dir",
+ strerror (op_errno));
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ }
- if (!layout_is_sane (layout)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "layout is not sane for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ goto unlock;
+ }
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ ret = dht_vgetxattr_alloc_and_fill (local, xattr, this,
+ op_errno);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "alloc or fill failure");
+ }
+ unlock:
+ UNLOCK (&frame->lock);
- local->inode = inode_ref (loc->inode);
- local->call_cnt = layout->cnt;
+ if (!is_last_call (this_call_cnt))
+ goto out;
- for (i = 0; i < layout->cnt; i++) {
- STACK_WIND (frame, dht_attr_cbk,
- layout->list[i].xlator,
- layout->list[i].xlator->fops->chown,
- loc, uid, gid);
- }
+ /* -- last call: do patch ups -- */
- return 0;
+ if (local->op_ret == -1) {
+ goto unwind;
+ }
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+ ret = dht_vgetxattr_fill_and_set (local, &dict, this, _gf_true);
+ if (ret)
+ goto unwind;
- return 0;
-}
+ DHT_STACK_UNWIND (getxattr, frame, 0, 0, dict, xdata);
+ goto cleanup;
+ unwind:
+ DHT_STACK_UNWIND (getxattr, frame, -1, local->op_errno, NULL, NULL);
+ cleanup:
+ if (dict)
+ dict_unref (dict);
+ out:
+ return 0;
+}
int
-dht_fchmod (call_frame_t *frame, xlator_t *this,
- fd_t *fd, mode_t mode)
+dht_vgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
{
- dht_layout_t *layout = NULL;
- dht_local_t *local = NULL;
- int op_errno = -1;
- int i = -1;
-
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
+ dht_local_t *local = NULL;
+ int ret = 0;
+ dict_t *dict = NULL;
+ call_frame_t *prev = NULL;
+ gf_boolean_t flag = _gf_true;
- layout = dht_layout_get (this, fd->inode);
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no layout for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ local = frame->local;
+ prev = cookie;
- if (!layout_is_sane (layout)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "layout is not sane for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ gf_log (this->name, GF_LOG_ERROR, "Subvolume %s returned -1 "
+ "(%s)", prev->this->name, strerror (op_errno));
+ goto unwind;
+ }
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ ret = dht_vgetxattr_alloc_and_fill (local, xattr, this,
+ op_errno);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "alloc or fill failure");
+ goto unwind;
+ }
- local->inode = inode_ref (fd->inode);
- local->call_cnt = layout->cnt;
+ flag = (local->layout->cnt > 1) ? _gf_true : _gf_false;
- for (i = 0; i < layout->cnt; i++) {
- STACK_WIND (frame, dht_attr_cbk,
- layout->list[i].xlator,
- layout->list[i].xlator->fops->fchmod,
- fd, mode);
- }
+ ret = dht_vgetxattr_fill_and_set (local, &dict, this, flag);
+ if (ret)
+ goto unwind;
- return 0;
+ DHT_STACK_UNWIND (getxattr, frame, 0, 0, dict, xdata);
+ goto cleanup;
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+ unwind:
+ DHT_STACK_UNWIND (getxattr, frame, -1, local->op_errno,
+ NULL, NULL);
+ cleanup:
+ if (dict)
+ dict_unref (dict);
- return 0;
+ return 0;
}
-
int
-dht_fchown (call_frame_t *frame, xlator_t *this,
- fd_t *fd, uid_t uid, gid_t gid)
+dht_linkinfo_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr,
+ dict_t *xdata)
{
- dht_layout_t *layout = NULL;
- dht_local_t *local = NULL;
- int op_errno = -1;
- int i = -1;
+ int ret = 0;
+ char *value = NULL;
+
+ if (op_ret != -1) {
+ ret = dict_get_str (xattr, GF_XATTR_PATHINFO_KEY, &value);
+ if (!ret) {
+ ret = dict_set_str (xattr, GF_XATTR_LINKINFO_KEY, value);
+ if (!ret)
+ gf_log (this->name, GF_LOG_TRACE,
+ "failed to set linkinfo");
+ }
+ }
+ DHT_STACK_UNWIND (getxattr, frame, op_ret, op_errno, xattr, xdata);
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
+ return 0;
+}
- layout = dht_layout_get (this, fd->inode);
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no layout for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+int
+dht_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
+{
+ int this_call_cnt = 0;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
- if (!layout_is_sane (layout)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "layout is not sane for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (frame->local, out);
+ VALIDATE_OR_GOTO (this->private, out);
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ conf = this->private;
+ local = frame->local;
- local->inode = inode_ref (fd->inode);
- local->call_cnt = layout->cnt;
+ this_call_cnt = dht_frame_return (frame);
- for (i = 0; i < layout->cnt; i++) {
- STACK_WIND (frame, dht_attr_cbk,
- layout->list[i].xlator,
- layout->list[i].xlator->fops->fchown,
- fd, uid, gid);
- }
+ if (!xattr || (op_ret == -1))
+ goto out;
- return 0;
+ if (dict_get (xattr, conf->xattr_name)) {
+ dict_del (xattr, conf->xattr_name);
+ }
+ local->op_ret = 0;
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+ if (!local->xattr) {
+ local->xattr = dict_copy_with_ref (xattr, NULL);
+ } else {
+ /* first aggregate everything into xattr and then copy into
+ * local->xattr. This is required as we want to have
+ * 'local->xattr' as the proper final dictionary passed above
+ * distribute xlator.
+ */
+ dht_aggregate_xattr (xattr, local->xattr);
+ local->xattr = dict_copy (xattr, local->xattr);
+ }
+out:
+ if (is_last_call (this_call_cnt)) {
+ DHT_STACK_UNWIND (getxattr, frame, local->op_ret, op_errno,
+ local->xattr, NULL);
+ }
+ return 0;
+}
- return 0;
+int32_t
+dht_getxattr_unwind (call_frame_t *frame,
+ int op_ret, int op_errno, dict_t *dict, dict_t *xdata)
+{
+ DHT_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
}
int
-dht_utimens (call_frame_t *frame, xlator_t *this,
- loc_t *loc, struct timespec tv[2])
+dht_getxattr_get_real_filename_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ dict_t *xattr, dict_t *xdata)
{
- dht_layout_t *layout = NULL;
- dht_local_t *local = NULL;
- int op_errno = -1;
- int i = -1;
+ int this_call_cnt = 0;
+ dht_local_t *local = NULL;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
+ local = frame->local;
- layout = dht_layout_get (this, loc->inode);
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no layout for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ if (op_ret != -1) {
+ if (local->xattr)
+ dict_unref (local->xattr);
+ local->xattr = dict_ref (xattr);
- if (!layout_is_sane (layout)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "layout is not sane for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
+ if (local->xattr_req)
+ dict_unref (local->xattr_req);
+ local->xattr_req = dict_ref (xdata);
}
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ DHT_STACK_UNWIND (getxattr, frame, local->op_ret, op_errno,
+ local->xattr, local->xattr_req);
}
- local->inode = inode_ref (loc->inode);
- local->call_cnt = layout->cnt;
+ return 0;
+}
- for (i = 0; i < layout->cnt; i++) {
- STACK_WIND (frame, dht_attr_cbk,
- layout->list[i].xlator,
- layout->list[i].xlator->fops->utimens,
- loc, tv);
- }
- return 0;
+int
+dht_getxattr_get_real_filename (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *key, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int i = 0;
+ dht_layout_t *layout = NULL;
+ int cnt = 0;
+ xlator_t *subvol = NULL;
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ local = frame->local;
+ layout = local->layout;
+
+ cnt = local->call_cnt = layout->cnt;
+
+ local->op_ret = -1;
+ local->op_errno = ENODATA;
+
+ for (i = 0; i < cnt; i++) {
+ subvol = layout->list[i].xlator;
+ STACK_WIND (frame, dht_getxattr_get_real_filename_cbk,
+ subvol, subvol->fops->getxattr,
+ loc, key, xdata);
+ }
return 0;
}
int
-dht_truncate (call_frame_t *frame, xlator_t *this,
- loc_t *loc, off_t offset)
+dht_getxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *key, dict_t *xdata)
+#define DHT_IS_DIR(layout) (layout->cnt > 1)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *hashed_subvol = NULL;
+ xlator_t *cached_subvol = NULL;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ xlator_t **sub_volumes = NULL;
+ int op_errno = -1;
+ int i = 0;
+ int cnt = 0;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (loc, err);
VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
+ VALIDATE_OR_GOTO (this->private, err);
- subvol = dht_subvol_get_cached (this, loc->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ conf = this->private;
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ local = dht_local_init (frame, loc, NULL, GF_FOP_GETXATTR);
+ if (!local) {
+ op_errno = ENOMEM;
- local->inode = inode_ref (loc->inode);
- local->call_cnt = 1;
-
- STACK_WIND (frame, dht_attr_cbk,
- subvol, subvol->fops->truncate,
- loc, offset);
-
- return 0;
+ goto err;
+ }
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+ layout = local->layout;
+ if (!layout) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "layout is NULL");
+ op_errno = ENOENT;
+ goto err;
+ }
- return 0;
-}
+ if (key) {
+ local->key = gf_strdup (key);
+ if (!local->key) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ }
+ if (key &&
+ (strncmp (key, GF_XATTR_GET_REAL_FILENAME_KEY,
+ strlen (GF_XATTR_GET_REAL_FILENAME_KEY)) == 0)
+ && DHT_IS_DIR(layout)) {
+ dht_getxattr_get_real_filename (frame, this, loc, key, xdata);
+ return 0;
+ }
-int
-dht_ftruncate (call_frame_t *frame, xlator_t *this,
- fd_t *fd, off_t offset)
-{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
+ /* for file use cached subvolume (obviously!): see if {}
+ * below
+ * for directory:
+ * wind to all subvolumes and exclude subvolumes which
+ * return ENOTCONN (in callback)
+ *
+ * NOTE: Don't trust inode here, as that may not be valid
+ * (until inode_link() happens)
+ */
+ if (key && DHT_IS_DIR(layout) &&
+ ((strcmp (key, GF_XATTR_PATHINFO_KEY) == 0)
+ || (strcmp (key, GF_XATTR_NODE_UUID_KEY) == 0))) {
+ (void) strncpy (local->xsel, key, 256);
+ cnt = local->call_cnt = layout->cnt;
+ for (i = 0; i < cnt; i++) {
+ subvol = layout->list[i].xlator;
+ STACK_WIND (frame, dht_vgetxattr_dir_cbk,
+ subvol, subvol->fops->getxattr,
+ loc, key, NULL);
+ }
+ return 0;
+ }
+ /* node-uuid or pathinfo for files */
+ if (key && ((strcmp (key, GF_XATTR_NODE_UUID_KEY) == 0)
+ || (strcmp (key, GF_XATTR_PATHINFO_KEY) == 0))) {
+ cached_subvol = local->cached_subvol;
+ (void) strncpy (local->xsel, key, 256);
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
+ local->call_cnt = 1;
+ STACK_WIND (frame, dht_vgetxattr_cbk, cached_subvol,
+ cached_subvol->fops->getxattr, loc, key, NULL);
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ return 0;
+ }
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ if (key && (strcmp (key, GF_XATTR_LINKINFO_KEY) == 0)) {
+ hashed_subvol = dht_subvol_get_hashed (this, loc);
+ if (!hashed_subvol) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get"
+ "hashed subvol for %s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
- local->inode = inode_ref (fd->inode);
- local->call_cnt = 1;
+ cached_subvol = dht_subvol_get_cached (this, loc->inode);
+ if (!cached_subvol) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get"
+ "cached subvol for %s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
- STACK_WIND (frame, dht_attr_cbk,
- subvol, subvol->fops->ftruncate,
- fd, offset);
+ if (hashed_subvol == cached_subvol) {
+ op_errno = ENODATA;
+ goto err;
+ }
+ if (hashed_subvol) {
+ STACK_WIND (frame, dht_linkinfo_getxattr_cbk, hashed_subvol,
+ hashed_subvol->fops->getxattr, loc,
+ GF_XATTR_PATHINFO_KEY, NULL);
+ return 0;
+ }
+ op_errno = ENODATA;
+ goto err;
+ }
- return 0;
+ if (key && (!strcmp (GF_XATTR_MARKER_KEY, key))
+ && (GF_CLIENT_PID_GSYNCD == frame->root->pid)) {
+ if (DHT_IS_DIR(layout)) {
+ cnt = layout->cnt;
+ } else {
+ cnt = 1;
+ }
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+ sub_volumes = alloca ( cnt * sizeof (xlator_t *));
+ for (i = 0; i < cnt; i++)
+ *(sub_volumes + i) = layout->list[i].xlator;
- return 0;
-}
+ if (cluster_getmarkerattr (frame, this, loc, key,
+ local, dht_getxattr_unwind,
+ sub_volumes, cnt,
+ MARKER_UUID_TYPE, marker_uuid_default_gauge,
+ conf->vol_uuid)) {
+ op_errno = EINVAL;
+ goto err;
+ }
+ return 0;
+ }
-int
-dht_err_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno)
-{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
+ if (key && *conf->vol_uuid) {
+ if ((match_uuid_local (key, conf->vol_uuid) == 0) &&
+ (GF_CLIENT_PID_GSYNCD == frame->root->pid)) {
+ if (DHT_IS_DIR(layout)) {
+ cnt = layout->cnt;
+ } else {
+ cnt = 1;
+ }
+ sub_volumes = alloca ( cnt * sizeof (xlator_t *));
+ for (i = 0; i < cnt; i++)
+ sub_volumes[i] = layout->list[i].xlator;
+
+ if (cluster_getmarkerattr (frame, this, loc, key,
+ local, dht_getxattr_unwind,
+ sub_volumes, cnt,
+ MARKER_XTIME_TYPE,
+ marker_xtime_default_gauge,
+ conf->vol_uuid)) {
+ op_errno = EINVAL;
+ goto err;
+ }
+ return 0;
+ }
+ }
- local = frame->local;
- prev = cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto unlock;
- }
+ if (DHT_IS_DIR(layout)) {
+ cnt = local->call_cnt = layout->cnt;
+ } else {
+ cnt = local->call_cnt = 1;
+ }
- local->op_ret = 0;
- }
-unlock:
- UNLOCK (&frame->lock);
+ for (i = 0; i < cnt; i++) {
+ subvol = layout->list[i].xlator;
+ STACK_WIND (frame, dht_getxattr_cbk,
+ subvol, subvol->fops->getxattr,
+ loc, key, NULL);
+ }
+ return 0;
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt))
- DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno);
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL, NULL);
return 0;
}
-
+#undef DHT_IS_DIR
int
-dht_access (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t mask)
+dht_fgetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *key, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
+ xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int op_errno = -1;
+ int i = 0;
+ int cnt = 0;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
+ VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (fd->inode, err);
+ VALIDATE_OR_GOTO (this->private, err);
- subvol = dht_subvol_get_cached (this, loc->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ local = dht_local_init (frame, NULL, fd, GF_FOP_FGETXATTR);
+ if (!local) {
+ op_errno = ENOMEM;
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!layout) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "layout is NULL");
+ op_errno = ENOENT;
+ goto err;
+ }
- local->call_cnt = 1;
+ if (key) {
+ local->key = gf_strdup (key);
+ if (!local->key) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ }
- STACK_WIND (frame, dht_err_cbk,
- subvol, subvol->fops->access,
- loc, mask);
+ if ((fd->inode->ia_type == IA_IFDIR)
+ && (strncmp (key, GF_XATTR_LOCKINFO_KEY,
+ strlen (GF_XATTR_LOCKINFO_KEY) != 0))) {
+ cnt = local->call_cnt = layout->cnt;
+ } else {
+ cnt = local->call_cnt = 1;
+ }
- return 0;
+ for (i = 0; i < cnt; i++) {
+ subvol = layout->list[i].xlator;
+ STACK_WIND (frame, dht_getxattr_cbk,
+ subvol, subvol->fops->fgetxattr,
+ fd, key, NULL);
+ }
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (fgetxattr, frame, -1, op_errno, NULL, NULL);
- return 0;
+ return 0;
}
-
int
-dht_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, const char *path)
+dht_fsetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, dict_t *xattr, int flags, dict_t *xdata)
{
- DHT_STACK_UNWIND (frame, op_ret, op_errno, path);
+ xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ int op_errno = EINVAL;
+ dht_conf_t *conf = NULL;
- return 0;
-}
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (fd->inode, err);
+ VALIDATE_OR_GOTO (this->private, err);
+ conf = this->private;
-int
-dht_readlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc, size_t size)
-{
- xlator_t *subvol = NULL;
- int op_errno = -1;
+ GF_IF_INTERNAL_XATTR_GOTO (conf->wild_xattr_name, xattr,
+ op_errno, err);
+ local = dht_local_init (frame, NULL, fd, GF_FOP_FSETXATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
- subvol = dht_subvol_get_cached (this, loc->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ local->call_cnt = 1;
- STACK_WIND (frame, dht_readlink_cbk,
- subvol, subvol->fops->readlink,
- loc, size);
+ STACK_WIND (frame, dht_err_cbk, subvol, subvol->fops->fsetxattr,
+ fd, xattr, flags, NULL);
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL);
- return 0;
+ return 0;
}
-int
-dht_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xattr)
+static int
+dht_common_setxattr_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
{
- DHT_STACK_UNWIND (frame, op_ret, op_errno, xattr);
+ DHT_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata);
return 0;
}
-
int
-dht_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *key)
+dht_checking_pathinfo_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr,
+ dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
+ int i = -1;
+ int ret = -1;
+ char *value = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ call_frame_t *prev = NULL;
+ int this_call_cnt = 0;
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
+ if (op_ret == -1)
+ goto out;
- subvol = dht_subvol_get_cached (this, loc->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
- STACK_WIND (frame, dht_getxattr_cbk,
- subvol, subvol->fops->getxattr,
- loc, key);
+ ret = dict_get_str (xattr, GF_XATTR_PATHINFO_KEY, &value);
+ if (ret)
+ goto out;
- return 0;
+ if (!strcmp (value, local->key)) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == prev->this)
+ conf->decommissioned_bricks[i] = prev->this;
+ }
+ }
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+out:
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ DHT_STACK_UNWIND (setxattr, frame, local->op_ret, ENOTSUP, NULL);
+ }
+ return 0;
- return 0;
}
-
int
dht_setxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xattr, int flags)
+ loc_t *loc, dict_t *xattr, int flags, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
+ xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ dht_layout_t *layout = NULL;
+ int i = 0;
+ int op_errno = EINVAL;
+ int ret = -1;
+ data_t *tmp = NULL;
+ uint32_t dir_spread = 0;
+ char value[4096] = {0,};
+ gf_dht_migrate_data_type_t forced_rebalance = GF_DHT_MIGRATE_DATA;
+ int call_cnt = 0;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (loc, err);
VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
- subvol = dht_subvol_get_cached (this, loc->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ conf = this->private;
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ GF_IF_INTERNAL_XATTR_GOTO (conf->wild_xattr_name, xattr,
+ op_errno, err);
- local->call_cnt = 1;
+ local = dht_local_init (frame, loc, NULL, GF_FOP_SETXATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- STACK_WIND (frame, dht_err_cbk,
- subvol, subvol->fops->setxattr,
- loc, xattr, flags);
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
- return 0;
+ layout = local->layout;
+ if (!layout) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no layout for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+ local->call_cnt = call_cnt = layout->cnt;
- return 0;
-}
+ tmp = dict_get (xattr, "distribute.migrate-data");
+ if (tmp) {
+ if (IA_ISDIR (loc->inode->ia_type)) {
+ op_errno = ENOTSUP;
+ goto err;
+ }
+ /* TODO: need to interpret the 'value' for more meaning
+ (ie, 'target' subvolume given there, etc) */
+ memcpy (value, tmp->data, tmp->len);
+ if (strcmp (value, "force") == 0)
+ forced_rebalance =
+ GF_DHT_MIGRATE_DATA_EVEN_IF_LINK_EXISTS;
-int
-dht_removexattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *key)
-{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
+ if (conf->decommission_in_progress)
+ forced_rebalance = GF_DHT_MIGRATE_HARDLINK;
+ local->rebalance.target_node = dht_subvol_get_hashed (this, loc);
+ if (!local->rebalance.target_node) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get "
+ "hashed subvol for %s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
+ local->rebalance.from_subvol = local->cached_subvol;
- subvol = dht_subvol_get_cached (this, loc->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ if (local->rebalance.target_node == local->rebalance.from_subvol) {
+ op_errno = EEXIST;
+ goto err;
+ }
+ if (local->rebalance.target_node) {
+ local->flags = forced_rebalance;
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ ret = dht_start_rebalance_task (this, frame);
+ if (!ret)
+ return 0;
- local->call_cnt = 1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to create a new synctask",
+ loc->path);
+ }
+ op_errno = EINVAL;
+ goto err;
- STACK_WIND (frame, dht_err_cbk,
- subvol, subvol->fops->removexattr,
- loc, key);
+ }
- return 0;
+ tmp = dict_get (xattr, "decommission-brick");
+ if (tmp) {
+ /* This operation should happen only on '/' */
+ if (!__is_root_gfid (loc->inode->gfid)) {
+ op_errno = ENOTSUP;
+ goto err;
+ }
+
+ memcpy (value, tmp->data, ((tmp->len < 4095) ? tmp->len : 4095));
+ local->key = gf_strdup (value);
+ local->call_cnt = conf->subvolume_cnt;
+
+ for (i = 0 ; i < conf->subvolume_cnt; i++) {
+ /* Get the pathinfo, and then compare */
+ STACK_WIND (frame, dht_checking_pathinfo_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->getxattr,
+ loc, GF_XATTR_PATHINFO_KEY, NULL);
+ }
+ return 0;
+ }
+
+ tmp = dict_get (xattr, GF_XATTR_FIX_LAYOUT_KEY);
+ if (tmp) {
+ gf_log (this->name, GF_LOG_INFO,
+ "fixing the layout of %s", loc->path);
+
+ ret = dht_fix_directory_layout (frame, dht_common_setxattr_cbk,
+ layout);
+ if (ret) {
+ op_errno = ENOTCONN;
+ goto err;
+ }
+ return ret;
+ }
+
+ tmp = dict_get (xattr, "distribute.directory-spread-count");
+ if (tmp) {
+ /* Setxattr value is packed as 'binary', not string */
+ memcpy (value, tmp->data, ((tmp->len < 4095)?tmp->len:4095));
+ ret = gf_string2uint32 (value, &dir_spread);
+ if (!ret && ((dir_spread <= conf->subvolume_cnt) &&
+ (dir_spread > 0))) {
+ layout->spread_cnt = dir_spread;
+
+ ret = dht_fix_directory_layout (frame,
+ dht_common_setxattr_cbk,
+ layout);
+ if (ret) {
+ op_errno = ENOTCONN;
+ goto err;
+ }
+ return ret;
+ }
+ gf_log (this->name, GF_LOG_ERROR,
+ "wrong 'directory-spread-count' value (%s)", value);
+ op_errno = ENOTSUP;
+ goto err;
+ }
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND (frame, dht_err_cbk,
+ layout->list[i].xlator,
+ layout->list[i].xlator->fops->setxattr,
+ loc, xattr, flags, xdata);
+ }
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
- return 0;
+ return 0;
}
int
-dht_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, fd_t *fd)
+dht_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+ local = frame->local;
+ prev = cookie;
- local = frame->local;
- prev = cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto unlock;
- }
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+ goto unlock;
+ }
- local->op_ret = 0;
- }
+ local->op_ret = 0;
+ }
unlock:
- UNLOCK (&frame->lock);
+ UNLOCK (&frame->lock);
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt))
- DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno,
- local->fd);
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ DHT_STACK_UNWIND (removexattr, frame, local->op_ret,
+ local->op_errno, NULL);
+ }
return 0;
}
int
-dht_open (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int flags, fd_t *fd)
+dht_removexattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *key, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int ret = -1;
+ xlator_t *subvol = NULL;
int op_errno = -1;
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int call_cnt = 0;
+ dht_conf_t *conf = NULL;
+ int i;
- VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (this->private, err);
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ conf = this->private;
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ GF_IF_NATIVE_XATTR_GOTO (conf->wild_xattr_name, key, op_errno, err);
- local->fd = fd_ref (fd);
- ret = loc_dup (loc, &local->loc);
- if (ret == -1) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
- local->call_cnt = 1;
+ local = dht_local_init (frame, loc, NULL, GF_FOP_REMOVEXATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- STACK_WIND (frame, dht_fd_cbk,
- subvol, subvol->fops->open,
- loc, flags, fd);
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
- return 0;
+ layout = local->layout;
+ if (!local->layout) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no layout for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+ local->call_cnt = call_cnt = layout->cnt;
+ local->key = gf_strdup (key);
- return 0;
-}
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND (frame, dht_removexattr_cbk,
+ layout->list[i].xlator,
+ layout->list[i].xlator->fops->removexattr,
+ loc, key, NULL);
+ }
+ return 0;
-int
-dht_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- struct iovec *vector, int count, struct stat *stbuf,
- struct iobref *iobref)
-{
- DHT_STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf,
- iobref);
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL);
return 0;
}
-
int
-dht_readv (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t off)
+dht_fremovexattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *key, dict_t *xdata)
{
- xlator_t *subvol = NULL;
+ xlator_t *subvol = NULL;
int op_errno = -1;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int call_cnt = 0;
+ dht_conf_t *conf = 0;
+ int i;
- VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (this->private, err);
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ conf = this->private;
- STACK_WIND (frame, dht_readv_cbk,
- subvol, subvol->fops->readv,
- fd, size, off);
+ GF_IF_NATIVE_XATTR_GOTO (conf->wild_xattr_name, key, op_errno, err);
- return 0;
+ VALIDATE_OR_GOTO (frame, err);
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL, 0, NULL, NULL);
+ local = dht_local_init (frame, NULL, fd, GF_FOP_FREMOVEXATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- return 0;
-}
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for inode=%s",
+ uuid_utoa (fd->inode->gfid));
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!local->layout) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no layout for inode=%s", uuid_utoa (fd->inode->gfid));
+ op_errno = EINVAL;
+ goto err;
+ }
+ local->call_cnt = call_cnt = layout->cnt;
+ local->key = gf_strdup (key);
-int
-dht_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct stat *stbuf)
-{
- DHT_STACK_UNWIND (frame, op_ret, op_errno, stbuf);
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND (frame, dht_removexattr_cbk,
+ layout->list[i].xlator,
+ layout->list[i].xlator->fops->fremovexattr,
+ fd, key, NULL);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (fremovexattr, frame, -1, op_errno, NULL);
return 0;
}
int
-dht_writev (call_frame_t *frame, xlator_t *this,
- fd_t *fd, struct iovec *vector, int count, off_t off,
- struct iobref *iobref)
+dht_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, fd_t *fd, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+ local = frame->local;
+ prev = cookie;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+ goto unlock;
+ }
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ local->op_ret = 0;
+ }
+unlock:
+ UNLOCK (&frame->lock);
- STACK_WIND (frame, dht_writev_cbk,
- subvol, subvol->fops->writev,
- fd, vector, count, off, iobref);
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt))
+ DHT_STACK_UNWIND (open, frame, local->op_ret, local->op_errno,
+ local->fd, NULL);
- return 0;
+ return 0;
+}
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL, 0);
+/*
+ * dht_normalize_stats -
+ */
+static void
+dht_normalize_stats (struct statvfs *buf, unsigned long bsize,
+ unsigned long frsize)
+{
+ double factor = 0;
- return 0;
-}
+ if (buf->f_bsize != bsize) {
+ buf->f_bsize = bsize;
+ }
+
+ if (buf->f_frsize != frsize) {
+ factor = ((double) buf->f_frsize) / frsize;
+ buf->f_frsize = frsize;
+ buf->f_blocks = (fsblkcnt_t) (factor * buf->f_blocks);
+ buf->f_bfree = (fsblkcnt_t) (factor * buf->f_bfree);
+ buf->f_bavail = (fsblkcnt_t) (factor * buf->f_bavail);
+ }
+}
int
-dht_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
+dht_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct statvfs *statvfs, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ int bsize = 0;
+ int frsize = 0;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
+ local = frame->local;
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ goto unlock;
+ }
+ local->op_ret = 0;
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ if (local->statvfs.f_bsize != 0) {
+ bsize = max(local->statvfs.f_bsize, statvfs->f_bsize);
+ frsize = max(local->statvfs.f_frsize, statvfs->f_frsize);
+ dht_normalize_stats(&local->statvfs, bsize, frsize);
+ dht_normalize_stats(statvfs, bsize, frsize);
+ } else {
+ local->statvfs.f_bsize = statvfs->f_bsize;
+ local->statvfs.f_frsize = statvfs->f_frsize;
+ }
- local->fd = fd_ref (fd);
- local->call_cnt = 1;
+ local->statvfs.f_blocks += statvfs->f_blocks;
+ local->statvfs.f_bfree += statvfs->f_bfree;
+ local->statvfs.f_bavail += statvfs->f_bavail;
+ local->statvfs.f_files += statvfs->f_files;
+ local->statvfs.f_ffree += statvfs->f_ffree;
+ local->statvfs.f_favail += statvfs->f_favail;
+ local->statvfs.f_fsid = statvfs->f_fsid;
+ local->statvfs.f_flag = statvfs->f_flag;
+ local->statvfs.f_namemax = statvfs->f_namemax;
- STACK_WIND (frame, dht_err_cbk,
- subvol, subvol->fops->flush, fd);
+ }
+unlock:
+ UNLOCK (&frame->lock);
- return 0;
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno);
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt))
+ DHT_STACK_UNWIND (statfs, frame, local->op_ret, local->op_errno,
+ &local->statvfs, xdata);
- return 0;
+ return 0;
}
int
-dht_fsync (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int datasync)
+dht_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- xlator_t *subvol = NULL;
+ xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
int op_errno = -1;
- dht_local_t *local = NULL;
-
+ int i = -1;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (this->private, err);
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ conf = this->private;
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
- local->call_cnt = 1;
+ local = dht_local_init (frame, NULL, NULL, GF_FOP_STATFS);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- STACK_WIND (frame, dht_err_cbk,
- subvol, subvol->fops->fsync,
- fd, datasync);
+ if (IA_ISDIR (loc->inode->ia_type)) {
+ local->call_cnt = conf->subvolume_cnt;
- return 0;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ STACK_WIND (frame, dht_statfs_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->statfs, loc,
+ xdata);
+ }
+ return 0;
+ }
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno);
+ subvol = dht_subvol_get_cached (this, loc->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
- return 0;
-}
+ local->call_cnt = 1;
+ STACK_WIND (frame, dht_statfs_cbk,
+ subvol, subvol->fops->statfs, loc, xdata);
-int
-dht_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct flock *flock)
-{
- DHT_STACK_UNWIND (frame, op_ret, op_errno, flock);
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (statfs, frame, -1, op_errno, NULL, NULL);
return 0;
}
int
-dht_lk (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int cmd, struct flock *flock)
+dht_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
{
- xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
int op_errno = -1;
-
+ int i = -1;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (this->private, err);
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
-
- STACK_WIND (frame, dht_lk_cbk,
- subvol, subvol->fops->lk,
- fd, cmd, flock);
-
- return 0;
-
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
-
- return 0;
-}
-
-
-int
-dht_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct statvfs *statvfs)
-{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
+ conf = this->private;
+ local = dht_local_init (frame, loc, fd, GF_FOP_OPENDIR);
+ if (!local) {
+ op_errno = ENOMEM;
- local = frame->local;
+ goto err;
+ }
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- goto unlock;
- }
- local->op_ret = 0;
-
- /* TODO: normalize sizes */
- local->statvfs.f_bsize = statvfs->f_bsize;
- local->statvfs.f_frsize = statvfs->f_frsize;
-
- local->statvfs.f_blocks += statvfs->f_blocks;
- local->statvfs.f_bfree += statvfs->f_bfree;
- local->statvfs.f_bavail += statvfs->f_bavail;
- local->statvfs.f_files += statvfs->f_files;
- local->statvfs.f_ffree += statvfs->f_ffree;
- local->statvfs.f_favail += statvfs->f_favail;
- local->statvfs.f_fsid = statvfs->f_fsid;
- local->statvfs.f_flag = statvfs->f_flag;
- local->statvfs.f_namemax = statvfs->f_namemax;
+ local->call_cnt = conf->subvolume_cnt;
- }
-unlock:
- UNLOCK (&frame->lock);
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ STACK_WIND (frame, dht_fd_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->opendir,
+ loc, fd, xdata);
+ }
+ return 0;
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt))
- DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->statvfs);
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (opendir, frame, -1, op_errno, NULL, NULL);
return 0;
}
int
-dht_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
+dht_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, gf_dirent_t *orig_entries, dict_t *xdata)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- int op_errno = -1;
- int i = -1;
+ dht_local_t *local = NULL;
+ gf_dirent_t entries;
+ gf_dirent_t *orig_entry = NULL;
+ gf_dirent_t *entry = NULL;
+ call_frame_t *prev = NULL;
+ xlator_t *next_subvol = NULL;
+ off_t next_offset = 0;
+ int count = 0;
+ dht_layout_t *layout = 0;
+ dht_conf_t *conf = NULL;
+ xlator_t *subvol = 0;
+ int ret = 0;
+ INIT_LIST_HEAD (&entries.list);
+ prev = cookie;
+ local = frame->local;
+ conf = this->private;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
+ if (op_ret < 0)
+ goto done;
- conf = this->private;
+ if (!local->layout)
+ local->layout = dht_layout_get (this, local->fd->inode);
- local = dht_local_init (frame);
- local->call_cnt = conf->subvolume_cnt;
+ layout = local->layout;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- STACK_WIND (frame, dht_statfs_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->statfs, loc);
- }
+ list_for_each_entry (orig_entry, (&orig_entries->list), list) {
+ next_offset = orig_entry->d_off;
+ if (check_is_dir (NULL, (&orig_entry->d_stat), NULL) &&
+ (prev->this != local->first_up_subvol)) {
+ continue;
+ }
+ if (check_is_linkfile (NULL, (&orig_entry->d_stat),
+ orig_entry->dict,
+ conf->link_xattr_name)) {
+ continue;
+ }
- return 0;
+ entry = gf_dirent_for_name (orig_entry->d_name);
+ if (!entry) {
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+ goto unwind;
+ }
- return 0;
-}
+ /* Do this if conf->search_unhashed is set to "auto" */
+ if (conf->search_unhashed == GF_DHT_LOOKUP_UNHASHED_AUTO) {
+ subvol = dht_layout_search (this, layout,
+ orig_entry->d_name);
+ if (!subvol || (subvol != prev->this)) {
+ /* TODO: Count the number of entries which need
+ linkfile to prove its existence in fs */
+ layout->search_unhashed++;
+ }
+ }
+ dht_itransform (this, prev->this, orig_entry->d_off,
+ &entry->d_off);
-int
-dht_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
-{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- int ret = -1;
- int op_errno = -1;
- int i = -1;
+ entry->d_stat = orig_entry->d_stat;
+ entry->d_ino = orig_entry->d_ino;
+ entry->d_type = orig_entry->d_type;
+ entry->d_len = orig_entry->d_len;
+ if (orig_entry->dict)
+ entry->dict = dict_ref (orig_entry->dict);
+
+ /* making sure we set the inode ctx right with layout,
+ currently possible only for non-directories, so for
+ directories don't set entry inodes */
+ if (!IA_ISDIR(entry->d_stat.ia_type)) {
+ ret = dht_layout_preset (this, prev->this,
+ orig_entry->inode);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to link the layout in inode");
+ entry->inode = inode_ref (orig_entry->inode);
+ } else if (orig_entry->inode) {
+ dht_inode_ctx_time_update (orig_entry->inode, this,
+ &entry->d_stat, 1);
+ }
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
+ list_add_tail (&entry->list, &entries.list);
+ count++;
+ }
+ op_ret = count;
+ /* We need to ensure that only the last subvolume's end-of-directory
+ * notification is respected so that directory reading does not stop
+ * before all subvolumes have been read. That could happen because the
+ * posix for each subvolume sends a ENOENT on end-of-directory but in
+ * distribute we're not concerned only with a posix's view of the
+ * directory but the aggregated namespace' view of the directory.
+ */
+ if (prev->this != dht_last_up_subvol (this))
+ op_errno = 0;
- conf = this->private;
+done:
+ if (count == 0) {
+ /* non-zero next_offset means that
+ EOF is not yet hit on the current subvol
+ */
+ if (next_offset == 0) {
+ next_subvol = dht_subvol_next (this, prev->this);
+ } else {
+ next_subvol = prev->this;
+ }
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ if (!next_subvol) {
+ goto unwind;
+ }
- local->fd = fd_ref (fd);
- ret = loc_dup (loc, &local->loc);
- if (ret == -1) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ if (conf->readdir_optimize == _gf_true) {
+ if (next_subvol != local->first_up_subvol) {
+ ret = dict_set_int32 (local->xattr,
+ GF_READDIR_SKIP_DIRS, 1);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "dict set failed");
+ } else {
+ dict_del (local->xattr,
+ GF_READDIR_SKIP_DIRS);
+ }
+ }
- local->call_cnt = conf->subvolume_cnt;
+ STACK_WIND (frame, dht_readdirp_cbk,
+ next_subvol, next_subvol->fops->readdirp,
+ local->fd, local->size, next_offset,
+ local->xattr);
+ return 0;
+ }
- for (i = 0; i < conf->subvolume_cnt; i++) {
- STACK_WIND (frame, dht_fd_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->opendir,
- loc, fd);
- }
+unwind:
+ if (op_ret < 0)
+ op_ret = 0;
- return 0;
+ DHT_STACK_UNWIND (readdirp, frame, op_ret, op_errno, &entries, NULL);
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+ gf_dirent_free (&entries);
- return 0;
+ return 0;
}
+
int
dht_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, gf_dirent_t *orig_entries)
-{
- dht_local_t *local = NULL;
- gf_dirent_t entries;
- gf_dirent_t *orig_entry = NULL;
- gf_dirent_t *entry = NULL;
- call_frame_t *prev = NULL;
- xlator_t *next_subvol = NULL;
+ int op_ret, int op_errno, gf_dirent_t *orig_entries,
+ dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ gf_dirent_t entries;
+ gf_dirent_t *orig_entry = NULL;
+ gf_dirent_t *entry = NULL;
+ call_frame_t *prev = NULL;
+ xlator_t *next_subvol = NULL;
off_t next_offset = 0;
- int count = 0;
+ int count = 0;
+ dht_layout_t *layout = 0;
+ xlator_t *subvol = 0;
+ INIT_LIST_HEAD (&entries.list);
+ prev = cookie;
+ local = frame->local;
- INIT_LIST_HEAD (&entries.list);
- prev = cookie;
- local = frame->local;
+ if (op_ret < 0)
+ goto done;
- if (op_ret < 0)
- goto done;
+ if (!local->layout)
+ local->layout = dht_layout_get (this, local->fd->inode);
- list_for_each_entry (orig_entry, (&orig_entries->list), list) {
+ layout = local->layout;
+
+ list_for_each_entry (orig_entry, (&orig_entries->list), list) {
next_offset = orig_entry->d_off;
- if (check_is_linkfile (NULL, (&orig_entry->d_stat), NULL)
- || (check_is_dir (NULL, (&orig_entry->d_stat), NULL)
- && (prev->this != dht_first_up_subvol (this)))) {
- continue;
- }
+ subvol = dht_layout_search (this, layout, orig_entry->d_name);
- entry = gf_dirent_for_name (orig_entry->d_name);
- if (!entry) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto unwind;
- }
+ if (!subvol || (subvol == prev->this)) {
+ entry = gf_dirent_for_name (orig_entry->d_name);
+ if (!entry) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto unwind;
+ }
- dht_itransform (this, prev->this, orig_entry->d_ino,
- &entry->d_ino);
- dht_itransform (this, prev->this, orig_entry->d_off,
- &entry->d_off);
+ dht_itransform (this, prev->this, orig_entry->d_off,
+ &entry->d_off);
- entry->d_type = orig_entry->d_type;
- entry->d_len = orig_entry->d_len;
+ entry->d_ino = orig_entry->d_ino;
+ entry->d_type = orig_entry->d_type;
+ entry->d_len = orig_entry->d_len;
- list_add_tail (&entry->list, &entries.list);
- count++;
- }
- op_ret = count;
+ list_add_tail (&entry->list, &entries.list);
+ count++;
+ }
+ }
+ op_ret = count;
+ /* We need to ensure that only the last subvolume's end-of-directory
+ * notification is respected so that directory reading does not stop
+ * before all subvolumes have been read. That could happen because the
+ * posix for each subvolume sends a ENOENT on end-of-directory but in
+ * distribute we're not concerned only with a posix's view of the
+ * directory but the aggregated namespace' view of the directory.
+ */
+ if (prev->this != dht_last_up_subvol (this))
+ op_errno = 0;
done:
- if (count == 0) {
+ if (count == 0) {
/* non-zero next_offset means that
EOF is not yet hit on the current subvol
*/
@@ -2162,771 +3232,911 @@ done:
next_subvol = prev->this;
}
- if (!next_subvol) {
- goto unwind;
- }
+ if (!next_subvol) {
+ goto unwind;
+ }
- STACK_WIND (frame, dht_readdir_cbk,
- next_subvol, next_subvol->fops->readdir,
- local->fd, local->size, next_offset);
- return 0;
- }
+ STACK_WIND (frame, dht_readdir_cbk,
+ next_subvol, next_subvol->fops->readdir,
+ local->fd, local->size, next_offset, NULL);
+ return 0;
+ }
unwind:
- if (op_ret < 0)
- op_ret = 0;
+ if (op_ret < 0)
+ op_ret = 0;
- DHT_STACK_UNWIND (frame, op_ret, op_errno, &entries);
+ DHT_STACK_UNWIND (readdir, frame, op_ret, op_errno, &entries, NULL);
- gf_dirent_free (&entries);
+ gf_dirent_free (&entries);
return 0;
}
int
-dht_readdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t yoff)
+dht_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t yoff, int whichop, dict_t *dict)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
int op_errno = -1;
- xlator_t *xvol = NULL;
- off_t xoff = 0;
-
+ xlator_t *xvol = NULL;
+ off_t xoff = 0;
+ int ret = 0;
+ dht_conf_t *conf = NULL;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (this->private, err);
- conf = this->private;
-
- local = dht_local_init (frame);
- if (!local) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_errno = ENOMEM;
- goto err;
- }
+ conf = this->private;
- local->fd = fd_ref (fd);
- local->size = size;
+ local = dht_local_init (frame, NULL, NULL, whichop);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- dht_deitransform (this, yoff, &xvol, (uint64_t *)&xoff);
+ local->fd = fd_ref (fd);
+ local->size = size;
+ local->xattr_req = (dict)? dict_ref (dict) : NULL;
+ local->first_up_subvol = dht_first_up_subvol (this);
+
+ dht_deitransform (this, yoff, &xvol, (uint64_t *)&xoff);
+
+ /* TODO: do proper readdir */
+ if (whichop == GF_FOP_READDIRP) {
+ if (dict)
+ local->xattr = dict_ref (dict);
+ else
+ local->xattr = dict_new ();
+
+ if (local->xattr) {
+ ret = dict_set_uint32 (local->xattr,
+ conf->link_xattr_name, 256);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set '%s' key",
+ conf->link_xattr_name);
+ if (conf->readdir_optimize == _gf_true) {
+ if (xvol != local->first_up_subvol) {
+ ret = dict_set_int32 (local->xattr,
+ GF_READDIR_SKIP_DIRS, 1);
+ if (ret)
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "Dict set failed");
+ } else {
+ dict_del (local->xattr,
+ GF_READDIR_SKIP_DIRS);
+ }
+ }
+ }
- /* TODO: do proper readdir */
- STACK_WIND (frame, dht_readdir_cbk,
- xvol, xvol->fops->readdir,
- fd, size, xoff);
+ STACK_WIND (frame, dht_readdirp_cbk, xvol, xvol->fops->readdirp,
+ fd, size, xoff, local->xattr);
+ } else {
+ STACK_WIND (frame, dht_readdir_cbk, xvol, xvol->fops->readdir,
+ fd, size, xoff, local->xattr);
+ }
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (readdir, frame, -1, op_errno, NULL, NULL);
- return 0;
+ return 0;
}
int
+dht_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t yoff, dict_t *xdata)
+{
+ int op = GF_FOP_READDIR;
+ dht_conf_t *conf = NULL;
+ int i = 0;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (!conf->subvolume_status[i]) {
+ op = GF_FOP_READDIRP;
+ break;
+ }
+ }
+
+ if (conf->use_readdirp)
+ op = GF_FOP_READDIRP;
+
+out:
+ dht_do_readdir (frame, this, fd, size, yoff, op, 0);
+ return 0;
+}
+
+int
+dht_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t yoff, dict_t *dict)
+{
+ dht_do_readdir (frame, this, fd, size, yoff, GF_FOP_READDIRP, dict);
+ return 0;
+}
+
+
+
+int
dht_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno)
+ int op_ret, int op_errno, dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
- local = frame->local;
+ local = frame->local;
- LOCK (&frame->lock);
- {
- if (op_ret == -1)
- local->op_errno = op_errno;
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1)
+ local->op_errno = op_errno;
- if (op_ret == 0)
- local->op_ret = 0;
- }
- UNLOCK (&frame->lock);
+ if (op_ret == 0)
+ local->op_ret = 0;
+ }
+ UNLOCK (&frame->lock);
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt))
- DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno);
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt))
+ DHT_STACK_UNWIND (fsyncdir, frame, local->op_ret,
+ local->op_errno, xdata);
return 0;
}
int
-dht_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync)
+dht_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int datasync, dict_t *xdata)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
int op_errno = -1;
- int i = -1;
-
+ int i = -1;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (this->private, err);
- conf = this->private;
+ conf = this->private;
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ local = dht_local_init (frame, NULL, NULL, GF_FOP_FSYNCDIR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- local->fd = fd_ref (fd);
- local->call_cnt = conf->subvolume_cnt;
+ local->fd = fd_ref (fd);
+ local->call_cnt = conf->subvolume_cnt;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- STACK_WIND (frame, dht_fsyncdir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->fsyncdir,
- fd, datasync);
- }
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ STACK_WIND (frame, dht_fsyncdir_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->fsyncdir,
+ fd, datasync, xdata);
+ }
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (fsyncdir, frame, -1, op_errno, NULL);
- return 0;
+ return 0;
}
int
dht_newfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct stat *stbuf)
+ int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- call_frame_t *prev = NULL;
- dht_layout_t *layout = NULL;
- int ret = -1;
+ xlator_t *prev = NULL;
+ int ret = -1;
+ dht_local_t *local = NULL;
- if (op_ret == -1)
- goto out;
+ if (op_ret == -1)
+ goto out;
- prev = cookie;
+ local = frame->local;
+ if (!local) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
- dht_itransform (this, prev->this, stbuf->st_ino, &stbuf->st_ino);
- layout = dht_layout_for_subvol (this, prev->this);
+ prev = cookie;
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no pre-set layout for subvolume %s",
- prev->this->name);
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
+ if (local->loc.parent) {
- ret = inode_ctx_put (inode, this, (uint64_t)(long)layout);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "could not set inode context");
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ preparent, 0);
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ postparent, 1);
+ }
+ ret = dht_layout_preset (this, prev, inode);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "could not set pre-set layout for subvolume %s",
+ prev? prev->name: NULL);
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+ if (local->linked == _gf_true)
+ dht_linkfile_attr_heal (frame, this);
out:
- DHT_STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf);
- return 0;
+ /*
+ * FIXME: ia_size and st_blocks of preparent and postparent do not have
+ * correct values. since, preparent and postparent buffers correspond
+ * to a directory these two members should have values equal to sum of
+ * corresponding values from each of the subvolume.
+ * See dht_iatt_merge for reference.
+ */
+ DHT_STRIP_PHASE1_FLAGS (stbuf);
+ DHT_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode, stbuf,
+ preparent, postparent, xdata);
+ return 0;
}
int
dht_mknod_linkfile_create_cbk (call_frame_t *frame, void *cookie,
xlator_t *this,
int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct stat *stbuf)
+ inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- xlator_t *cached_subvol = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *cached_subvol = NULL;
if (op_ret == -1)
goto err;
- local = frame->local;
- cached_subvol = local->cached_subvol;
+ local = frame->local;
+ if (!local || !local->cached_subvol) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ cached_subvol = local->cached_subvol;
- STACK_WIND (frame, dht_newfile_cbk,
- cached_subvol, cached_subvol->fops->mknod,
- &local->loc, local->mode, local->rdev);
+ STACK_WIND_COOKIE (frame, dht_newfile_cbk, (void *)cached_subvol,
+ cached_subvol, cached_subvol->fops->mknod,
+ &local->loc, local->mode, local->rdev, local->umask,
+ local->params);
return 0;
- err:
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
- return 0;
+err:
+ DHT_STACK_UNWIND (mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
}
int
dht_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t rdev)
+ loc_t *loc, mode_t mode, dev_t rdev, mode_t umask, dict_t *params)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- int ret = -1;
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
xlator_t *avail_subvol = NULL;
- dht_conf_t *conf = NULL;
- dht_local_t *local = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
+ dht_local_t *local = NULL;
- conf = this->private;
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
dht_get_du_info (frame, this, loc);
- subvol = dht_subvol_get_hashed (this, loc);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s",
- loc->path);
- op_errno = ENOENT;
- goto err;
- }
+ local = dht_local_init (frame, loc, NULL, GF_FOP_MKNOD);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = dht_subvol_get_hashed (this, loc);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no subvolume in layout for path=%s",
+ loc->path);
+ op_errno = ENOENT;
+ goto err;
+ }
if (!dht_is_subvol_filled (this, subvol)) {
gf_log (this->name, GF_LOG_TRACE,
"creating %s on %s", loc->path, subvol->name);
-
- STACK_WIND (frame, dht_newfile_cbk,
- subvol, subvol->fops->mknod,
- loc, mode, rdev);
+
+ STACK_WIND_COOKIE (frame, dht_newfile_cbk, (void *)subvol,
+ subvol, subvol->fops->mknod, loc, mode,
+ rdev, umask, params);
} else {
- avail_subvol = dht_free_disk_available_subvol (this, subvol);
+
+ avail_subvol = dht_free_disk_available_subvol (this, subvol,
+ local);
if (avail_subvol != subvol) {
- /* Choose the minimum filled volume, and create the
+ /* Choose the minimum filled volume, and create the
files there */
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
- ret = loc_dup (loc, &local->loc);
- if (ret == -1) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ local->params = dict_ref (params);
local->cached_subvol = avail_subvol;
- local->mode = mode;
+ local->mode = mode;
local->rdev = rdev;
-
- dht_linkfile_create (frame,
+ local->umask = umask;
+ dht_linkfile_create (frame,
dht_mknod_linkfile_create_cbk,
- avail_subvol, subvol, loc);
+ this, avail_subvol, subvol, loc);
} else {
gf_log (this->name, GF_LOG_TRACE,
"creating %s on %s", loc->path, subvol->name);
-
- STACK_WIND (frame, dht_newfile_cbk,
- subvol, subvol->fops->mknod,
- loc, mode, rdev);
+
+ STACK_WIND_COOKIE (frame, dht_newfile_cbk,
+ (void *)subvol, subvol,
+ subvol->fops->mknod, loc, mode,
+ rdev, umask, params);
}
}
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (mknod, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL, NULL);
- return 0;
+ return 0;
}
int
dht_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkname, loc_t *loc)
+ const char *linkname, loc_t *loc, mode_t umask, dict_t *params)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
+ local = dht_local_init (frame, loc, NULL, GF_FOP_SYMLINK);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- subvol = dht_subvol_get_hashed (this, loc);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s",
- loc->path);
- op_errno = ENOENT;
- goto err;
- }
+ subvol = dht_subvol_get_hashed (this, loc);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no subvolume in layout for path=%s",
+ loc->path);
+ op_errno = ENOENT;
+ goto err;
+ }
- gf_log (this->name, GF_LOG_TRACE,
- "creating %s on %s", loc->path, subvol->name);
+ gf_log (this->name, GF_LOG_TRACE,
+ "creating %s on %s", loc->path, subvol->name);
- STACK_WIND (frame, dht_newfile_cbk,
- subvol, subvol->fops->symlink,
- linkname, loc);
+ STACK_WIND_COOKIE (frame, dht_newfile_cbk, (void *)subvol, subvol,
+ subvol->fops->symlink, linkname, loc, umask,
+ params);
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (link, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL, NULL);
- return 0;
+ return 0;
}
int
-dht_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
+dht_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
{
- xlator_t *cached_subvol = NULL;
- xlator_t *hashed_subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
-
- cached_subvol = dht_subvol_get_cached (this, loc->inode);
- if (!cached_subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ xlator_t *cached_subvol = NULL;
+ xlator_t *hashed_subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
- hashed_subvol = dht_subvol_get_hashed (this, loc);
- if (!hashed_subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s",
- loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ if (dht_filter_loc_subvol_key (this, loc, &local->loc,
+ &cached_subvol)) {
+ gf_log (this->name, GF_LOG_INFO,
+ "unlinking %s on %s (given path %s)",
+ local->loc.path, cached_subvol->name, loc->path);
+ STACK_WIND (frame, dht_unlink_cbk,
+ cached_subvol, cached_subvol->fops->unlink,
+ &local->loc, xflag, xdata);
+ goto done;
+ }
- local->call_cnt = 1;
- if (hashed_subvol != cached_subvol)
- local->call_cnt++;
+ local = dht_local_init (frame, loc, NULL, GF_FOP_UNLINK);
+ if (!local) {
+ op_errno = ENOMEM;
- STACK_WIND (frame, dht_err_cbk,
- cached_subvol, cached_subvol->fops->unlink, loc);
+ goto err;
+ }
- if (hashed_subvol != cached_subvol)
- STACK_WIND (frame, dht_err_cbk,
- hashed_subvol, hashed_subvol->fops->unlink, loc);
+ hashed_subvol = dht_subvol_get_hashed (this, loc);
+ if (!hashed_subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no subvolume in layout for path=%s",
+ loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
- return 0;
+ cached_subvol = local->cached_subvol;
+ if (!cached_subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+ local->flags = xflag;
+ if (hashed_subvol != cached_subvol) {
+ STACK_WIND (frame, dht_unlink_linkfile_cbk,
+ hashed_subvol, hashed_subvol->fops->unlink, loc,
+ xflag, xdata);
+ } else {
+ STACK_WIND (frame, dht_unlink_cbk,
+ cached_subvol, cached_subvol->fops->unlink, loc,
+ xflag, xdata);
+ }
+done:
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
+ return 0;
}
int
dht_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct stat *stbuf)
+ int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
call_frame_t *prev = NULL;
- dht_layout_t *layout = NULL;
- dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ dht_local_t *local = NULL;
prev = cookie;
- local = frame->local;
+
+ local = frame->local;
if (op_ret == -1)
goto out;
- layout = dht_layout_for_subvol (this, prev->this);
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no pre-set layout for subvolume %s",
- prev->this->name);
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
-
- stbuf->st_ino = local->loc.inode->ino;
+ layout = dht_layout_for_subvol (this, prev->this);
+ if (!layout) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no pre-set layout for subvolume %s",
+ prev->this->name);
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ preparent, 0);
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ postparent, 1);
+ }
+ if (local->linked == _gf_true) {
+ local->stbuf = *stbuf;
+ dht_linkfile_attr_heal (frame, this);
+ }
out:
- DHT_STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf);
+ DHT_STRIP_PHASE1_FLAGS (stbuf);
+ DHT_STACK_UNWIND (link, frame, op_ret, op_errno, inode, stbuf, preparent,
+ postparent, NULL);
- return 0;
+ return 0;
}
int
dht_link_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct stat *stbuf)
+ int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- xlator_t *srcvol = NULL;
-
+ dht_local_t *local = NULL;
+ xlator_t *srcvol = NULL;
- if (op_ret == -1)
- goto err;
+ if (op_ret == -1)
+ goto err;
- local = frame->local;
- srcvol = local->linkfile.srcvol;
+ local = frame->local;
+ srcvol = local->linkfile.srcvol;
- STACK_WIND (frame, dht_link_cbk,
- srcvol, srcvol->fops->link,
- &local->loc, &local->loc2);
+ STACK_WIND (frame, dht_link_cbk, srcvol, srcvol->fops->link,
+ &local->loc, &local->loc2, xdata);
- return 0;
+ return 0;
err:
- DHT_STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf);
+ DHT_STRIP_PHASE1_FLAGS (stbuf);
+ DHT_STACK_UNWIND (link, frame, op_ret, op_errno, inode, stbuf, preparent,
+ postparent, NULL);
- return 0;
+ return 0;
}
int
dht_link (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc)
-{
- xlator_t *cached_subvol = NULL;
- xlator_t *hashed_subvol = NULL;
- int op_errno = -1;
- int ret = -1;
- dht_local_t *local = NULL;
-
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (oldloc, err);
- VALIDATE_OR_GOTO (newloc, err);
-
- cached_subvol = dht_subvol_get_cached (this, oldloc->inode);
- if (!cached_subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", oldloc->path);
- op_errno = EINVAL;
- goto err;
- }
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+{
+ xlator_t *cached_subvol = NULL;
+ xlator_t *hashed_subvol = NULL;
+ int op_errno = -1;
+ int ret = -1;
+ dht_local_t *local = NULL;
- hashed_subvol = dht_subvol_get_hashed (this, newloc);
- if (!hashed_subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s",
- newloc->path);
- op_errno = EINVAL;
- goto err;
- }
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (oldloc, err);
+ VALIDATE_OR_GOTO (newloc, err);
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ local = dht_local_init (frame, oldloc, NULL, GF_FOP_LINK);
+ if (!local) {
+ op_errno = ENOMEM;
- ret = loc_copy (&local->loc, oldloc);
- if (ret == -1) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ goto err;
+ }
- ret = loc_copy (&local->loc2, newloc);
- if (ret == -1) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ cached_subvol = local->cached_subvol;
+ if (!cached_subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for path=%s", oldloc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
- if (hashed_subvol != cached_subvol) {
- dht_linkfile_create (frame, dht_link_linkfile_cbk,
- cached_subvol, hashed_subvol, newloc);
- } else {
- STACK_WIND (frame, dht_link_cbk,
- cached_subvol, cached_subvol->fops->link,
- oldloc, newloc);
- }
+ hashed_subvol = dht_subvol_get_hashed (this, newloc);
+ if (!hashed_subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no subvolume in layout for path=%s",
+ newloc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
- return 0;
+ ret = loc_copy (&local->loc2, newloc);
+ if (ret == -1) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ if (hashed_subvol != cached_subvol) {
+ uuid_copy (local->gfid, oldloc->inode->gfid);
+ dht_linkfile_create (frame, dht_link_linkfile_cbk, this,
+ cached_subvol, hashed_subvol, newloc);
+ } else {
+ STACK_WIND (frame, dht_link_cbk,
+ cached_subvol, cached_subvol->fops->link,
+ oldloc, newloc, xdata);
+ }
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
- return 0;
+ return 0;
}
int
dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- fd_t *fd, inode_t *inode, struct stat *stbuf)
+ int op_ret, int op_errno,
+ fd_t *fd, inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- call_frame_t *prev = NULL;
- dht_layout_t *layout = NULL;
- int ret = -1;
+ call_frame_t *prev = NULL;
+ int ret = -1;
+ dht_local_t *local = NULL;
- if (op_ret == -1)
- goto out;
+ if (op_ret == -1)
+ goto out;
- prev = cookie;
+ local = frame->local;
+ if (!local) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
- dht_itransform (this, prev->this, stbuf->st_ino, &stbuf->st_ino);
- layout = dht_layout_for_subvol (this, prev->this);
+ prev = cookie;
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no pre-set layout for subvolume %s",
- prev->this->name);
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ preparent, 0);
- ret = inode_ctx_put (inode, this, (uint64_t)(long)layout);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "could not set inode context");
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ postparent, 1);
+ }
+ ret = dht_layout_preset (this, prev->this, inode);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "could not set preset layout for subvol %s",
+ prev->this->name);
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+ if (local->linked == _gf_true) {
+ local->stbuf = *stbuf;
+ dht_linkfile_attr_heal (frame, this);
+ }
out:
- DHT_STACK_UNWIND (frame, op_ret, op_errno, fd, inode, stbuf);
- return 0;
+ DHT_STRIP_PHASE1_FLAGS (stbuf);
+ DHT_STACK_UNWIND (create, frame, op_ret, op_errno, fd, inode, stbuf, preparent,
+ postparent, NULL);
+ return 0;
}
int
dht_create_linkfile_create_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct stat *stbuf)
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- xlator_t *cached_subvol = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *cached_subvol = NULL;
if (op_ret == -1)
goto err;
- local = frame->local;
- cached_subvol = local->cached_subvol;
+ local = frame->local;
+ cached_subvol = local->cached_subvol;
STACK_WIND (frame, dht_create_cbk,
cached_subvol, cached_subvol->fops->create,
- &local->loc, local->flags, local->mode, local->fd);
+ &local->loc, local->flags, local->mode,
+ local->umask, local->fd, local->params);
return 0;
- err:
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
+err:
+ DHT_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL, NULL);
+ return 0;
}
int
dht_create (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode, fd_t *fd)
+ loc_t *loc, int32_t flags, mode_t mode,
+ mode_t umask, fd_t *fd, dict_t *params)
{
- int op_errno = -1;
- int ret = -1;
- xlator_t *subvol = NULL;
- dht_conf_t *conf = NULL;
+ int op_errno = -1;
+ xlator_t *subvol = NULL;
dht_local_t *local = NULL;
xlator_t *avail_subvol = NULL;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
-
- conf = this->private;
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
dht_get_du_info (frame, this, loc);
- local = dht_local_init (frame);
- if (!local) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_errno = ENOMEM;
- goto err;
- }
+ local = dht_local_init (frame, loc, fd, GF_FOP_CREATE);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- subvol = dht_subvol_get_hashed (this, loc);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s",
- loc->path);
- op_errno = ENOENT;
- goto err;
- }
+ if (dht_filter_loc_subvol_key (this, loc, &local->loc,
+ &subvol)) {
+ gf_log (this->name, GF_LOG_INFO,
+ "creating %s on %s (got create on %s)",
+ local->loc.path, subvol->name, loc->path);
+ STACK_WIND (frame, dht_create_cbk,
+ subvol, subvol->fops->create,
+ &local->loc, flags, mode, umask, fd, params);
+ goto done;
+ }
+
+ subvol = dht_subvol_get_hashed (this, loc);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no subvolume in layout for path=%s",
+ loc->path);
+ op_errno = ENOENT;
+ goto err;
+ }
if (!dht_is_subvol_filled (this, subvol)) {
gf_log (this->name, GF_LOG_TRACE,
"creating %s on %s", loc->path, subvol->name);
STACK_WIND (frame, dht_create_cbk,
subvol, subvol->fops->create,
- loc, flags, mode, fd);
- } else {
- /* Choose the minimum filled volume, and create the
- files there */
- /* TODO */
- avail_subvol = dht_free_disk_available_subvol (this, subvol);
- if (avail_subvol != subvol) {
- ret = loc_dup (loc, &local->loc);
- if (ret == -1) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- local->fd = fd_ref (fd);
- local->flags = flags;
- local->mode = mode;
-
- local->cached_subvol = avail_subvol;
- local->hashed_subvol = subvol;
- gf_log (this->name, GF_LOG_TRACE,
- "creating %s on %s (link at %s)", loc->path,
- avail_subvol->name, subvol->name);
- dht_linkfile_create (frame,
- dht_create_linkfile_create_cbk,
- avail_subvol, subvol, loc);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "creating %s on %s", loc->path, subvol->name);
- STACK_WIND (frame, dht_create_cbk,
- subvol, subvol->fops->create,
- loc, flags, mode, fd);
-
- }
+ loc, flags, mode, umask, fd, params);
+ goto done;
}
-
- return 0;
+ /* Choose the minimum filled volume, and create the
+ files there */
+ avail_subvol = dht_free_disk_available_subvol (this, subvol, local);
+ if (avail_subvol != subvol) {
+ local->params = dict_ref (params);
+ local->flags = flags;
+ local->mode = mode;
+ local->umask = umask;
+ local->cached_subvol = avail_subvol;
+ local->hashed_subvol = subvol;
+ gf_log (this->name, GF_LOG_TRACE,
+ "creating %s on %s (link at %s)", loc->path,
+ avail_subvol->name, subvol->name);
+ dht_linkfile_create (frame, dht_create_linkfile_create_cbk,
+ this, avail_subvol, subvol, loc);
+ goto done;
+ }
+ gf_log (this->name, GF_LOG_TRACE,
+ "creating %s on %s", loc->path, subvol->name);
+ STACK_WIND (frame, dht_create_cbk,
+ subvol, subvol->fops->create,
+ loc, flags, mode, umask, fd, params);
+done:
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL, NULL);
- return 0;
+ return 0;
}
int
dht_mkdir_selfheal_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- dht_local_t *local = NULL;
- dht_layout_t *layout = NULL;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ local = frame->local;
+ layout = local->selfheal.layout;
- local = frame->local;
- layout = local->selfheal.layout;
+ if (op_ret == 0) {
+ dht_layout_set (this, local->inode, layout);
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->preparent, 0);
- if (op_ret == 0) {
- inode_ctx_put (local->inode, this, (uint64_t)(long)layout);
- local->selfheal.layout = NULL;
- local->stbuf.st_ino = local->st_ino;
- }
+ dht_inode_ctx_time_update (local->loc.parent, this,
+ &local->postparent, 1);
+ }
+ }
- DHT_STACK_UNWIND (frame, op_ret, op_errno,
- local->inode, &local->stbuf);
+ DHT_STACK_UNWIND (mkdir, frame, op_ret, op_errno,
+ local->inode, &local->stbuf, &local->preparent,
+ &local->postparent, NULL);
- return 0;
+ return 0;
}
int
dht_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, inode_t *inode, struct stat *stbuf)
+ int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- int ret = -1;
- int subvol_filled = 0;
- call_frame_t *prev = NULL;
- dht_layout_t *layout = NULL;
- dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ int ret = -1;
+ gf_boolean_t subvol_filled = _gf_false;
+ call_frame_t *prev = NULL;
+ dht_layout_t *layout = NULL;
- conf = this->private;
- local = frame->local;
- prev = cookie;
- layout = local->layout;
+ local = frame->local;
+ prev = cookie;
+ layout = local->layout;
subvol_filled = dht_is_subvol_filled (this, prev->this);
- LOCK (&frame->lock);
- {
+ LOCK (&frame->lock);
+ {
if (subvol_filled && (op_ret != -1)) {
ret = dht_layout_merge (this, layout, prev->this,
-1, ENOSPC, NULL);
} else {
+ if (op_ret == -1 && op_errno == EEXIST)
+ /* Very likely just a race between mkdir and
+ self-heal (from lookup of a concurrent mkdir
+ attempt).
+ Ignore error for now. layout setting will
+ anyways fail if this was a different (old)
+ pre-existing different directory.
+ */
+ op_ret = 0;
ret = dht_layout_merge (this, layout, prev->this,
op_ret, op_errno, NULL);
}
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to merge layouts", local->loc.path);
- if (op_ret == -1) {
- local->op_errno = op_errno;
- goto unlock;
- }
- dht_stat_merge (this, &local->stbuf, stbuf, prev->this);
- }
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ goto unlock;
+ }
+ dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
+ dht_iatt_merge (this, &local->preparent, preparent, prev->this);
+ dht_iatt_merge (this, &local->postparent, postparent,
+ prev->this);
+ }
unlock:
- UNLOCK (&frame->lock);
+ UNLOCK (&frame->lock);
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- local->layout = NULL;
- dht_selfheal_new_directory (frame, dht_mkdir_selfheal_cbk,
- layout);
- }
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ dht_selfheal_new_directory (frame, dht_mkdir_selfheal_cbk,
+ layout);
+ }
return 0;
}
int
-dht_mkdir_hashed_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int op_ret, int op_errno,
- inode_t *inode, struct stat *stbuf)
+dht_mkdir_hashed_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- int ret = -1;
- call_frame_t *prev = NULL;
- dht_layout_t *layout = NULL;
- dht_conf_t *conf = NULL;
- int i = 0;
- xlator_t *hashed_subvol = NULL;
+ dht_local_t *local = NULL;
+ int ret = -1;
+ call_frame_t *prev = NULL;
+ dht_layout_t *layout = NULL;
+ dht_conf_t *conf = NULL;
+ int i = 0;
+ xlator_t *hashed_subvol = NULL;
- local = frame->local;
- prev = cookie;
- layout = local->layout;
- conf = this->private;
- hashed_subvol = local->hashed_subvol;
+ VALIDATE_OR_GOTO (this->private, err);
+
+ local = frame->local;
+ prev = cookie;
+ layout = local->layout;
+ conf = this->private;
+ hashed_subvol = local->hashed_subvol;
+
+ if (uuid_is_null (local->loc.gfid) && !op_ret)
+ uuid_copy (local->loc.gfid, stbuf->ia_gfid);
if (dht_is_subvol_filled (this, hashed_subvol))
ret = dht_layout_merge (this, layout, prev->this,
@@ -2934,47 +4144,55 @@ dht_mkdir_hashed_cbk (call_frame_t *frame, void *cookie,
else
ret = dht_layout_merge (this, layout, prev->this,
op_ret, op_errno, NULL);
-
- if (op_ret == -1) {
- local->op_errno = op_errno;
- goto err;
- }
- local->op_ret = 0;
- dht_stat_merge (this, &local->stbuf, stbuf, prev->this);
+ /* TODO: we may have to return from the function
+ if layout merge fails. For now, lets just log an error */
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to merge layouts", local->loc.path);
- local->st_ino = local->stbuf.st_ino;
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ goto err;
+ }
+ local->op_ret = 0;
- local->call_cnt = conf->subvolume_cnt - 1;
-
- if (local->call_cnt == 0) {
- local->layout = NULL;
- dht_selfheal_directory (frame, dht_mkdir_selfheal_cbk,
- &local->loc, layout);
- }
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (conf->subvolumes[i] == hashed_subvol)
- continue;
- STACK_WIND (frame, dht_mkdir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->mkdir,
- &local->loc, local->mode);
- }
- return 0;
+ dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
+ dht_iatt_merge (this, &local->preparent, preparent, prev->this);
+ dht_iatt_merge (this, &local->postparent, postparent, prev->this);
+
+ local->call_cnt = conf->subvolume_cnt - 1;
+
+ if (uuid_is_null (local->loc.gfid))
+ uuid_copy (local->loc.gfid, stbuf->ia_gfid);
+ if (local->call_cnt == 0) {
+ dht_selfheal_directory (frame, dht_mkdir_selfheal_cbk,
+ &local->loc, layout);
+ }
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == hashed_subvol)
+ continue;
+ STACK_WIND (frame, dht_mkdir_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->mkdir, &local->loc,
+ local->mode, local->umask, local->params);
+ }
+ return 0;
err:
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+ DHT_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL);
return 0;
}
-int
+
+ int
dht_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode)
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *params)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
int op_errno = -1;
- int ret = -1;
- xlator_t *hashed_subvol = NULL;
+ xlator_t *hashed_subvol = NULL;
VALIDATE_OR_GOTO (frame, err);
@@ -2982,722 +4200,1013 @@ dht_mkdir (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (loc, err);
VALIDATE_OR_GOTO (loc->inode, err);
VALIDATE_OR_GOTO (loc->path, err);
+ VALIDATE_OR_GOTO (this->private, err);
- conf = this->private;
+ conf = this->private;
dht_get_du_info (frame, this, loc);
- local = dht_local_init (frame);
- if (!local) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_errno = ENOMEM;
- goto err;
- }
-
- hashed_subvol = dht_subvol_get_hashed (this, loc);
+ local = dht_local_init (frame, loc, NULL, GF_FOP_MKDIR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- if (hashed_subvol == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "hashed subvol not found for %s",
+ hashed_subvol = dht_subvol_get_hashed (this, loc);
+ if (hashed_subvol == NULL) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "hashed subvol not found for %s",
loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ op_errno = EINVAL;
+ goto err;
+ }
- local->hashed_subvol = hashed_subvol;
- local->inode = inode_ref (loc->inode);
- ret = loc_copy (&local->loc, loc);
- local->mode = mode;
+ local->hashed_subvol = hashed_subvol;
+ local->mode = mode;
+ local->umask = umask;
+ local->params = dict_ref (params);
+ local->inode = inode_ref (loc->inode);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_errno = ENOMEM;
- goto err;
- }
-
- local->layout = dht_layout_new (this, conf->subvolume_cnt);
- if (!local->layout) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_errno = ENOMEM;
- goto err;
- }
+ local->layout = dht_layout_new (this, conf->subvolume_cnt);
+ if (!local->layout) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- STACK_WIND (frame, dht_mkdir_hashed_cbk,
- hashed_subvol,
- hashed_subvol->fops->mkdir,
- loc, mode);
+ STACK_WIND (frame, dht_mkdir_hashed_cbk,
+ hashed_subvol,
+ hashed_subvol->fops->mkdir,
+ loc, mode, umask, params);
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL);
- return 0;
+ return 0;
}
int
dht_rmdir_selfheal_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno)
+ int op_ret, int op_errno, dict_t *xdata)
{
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
- local = frame->local;
- local->layout = NULL;
+ local = frame->local;
- DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno);
+ DHT_STACK_UNWIND (rmdir, frame, local->op_ret, local->op_errno,
+ &local->preparent, &local->postparent, NULL);
- return 0;
+ return 0;
}
int
-dht_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno)
+dht_rmdir_hashed_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- uint64_t tmp_layout = 0;
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
- dht_layout_t *layout = NULL;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
- local = frame->local;
- prev = cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- local->op_ret = -1;
-
- if (op_errno != ENOENT)
- local->need_selfheal = 1;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "rmdir on %s for %s failed (%s)",
- prev->this->name, local->loc.path,
- strerror (op_errno));
- goto unlock;
- }
- }
+ local = frame->local;
+ prev = cookie;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ if (op_errno != ENOENT && op_errno != EACCES) {
+ local->need_selfheal = 1;
+ }
+
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "rmdir on %s for %s failed (%s)",
+ prev->this->name, local->loc.path,
+ strerror (op_errno));
+ goto unlock;
+ }
+
+ dht_iatt_merge (this, &local->preparent, preparent, prev->this);
+ dht_iatt_merge (this, &local->postparent, postparent,
+ prev->this);
+
+ }
unlock:
- UNLOCK (&frame->lock);
+ UNLOCK (&frame->lock);
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ if (local->need_selfheal) {
+ local->layout =
+ dht_layout_get (this, local->loc.inode);
+
+ /* TODO: neater interface needed below */
+ local->stbuf.ia_type = local->loc.inode->ia_type;
+
+ uuid_copy (local->gfid, local->loc.inode->gfid);
+ dht_selfheal_restore (frame, dht_rmdir_selfheal_cbk,
+ &local->loc, local->layout);
+ } else {
+
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update (local->loc.parent,
+ this,
+ &local->preparent,
+ 0);
+
+ dht_inode_ctx_time_update (local->loc.parent,
+ this,
+ &local->postparent,
+ 1);
+ }
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- if (local->need_selfheal) {
- inode_ctx_get (local->loc.inode, this,
- &tmp_layout);
- layout = (dht_layout_t *)(long)tmp_layout;
-
- /* TODO: neater interface needed below */
- local->stbuf.st_mode = local->loc.inode->st_mode;
-
- dht_selfheal_restore (frame, dht_rmdir_selfheal_cbk,
- &local->loc, layout);
- } else {
- DHT_STACK_UNWIND (frame, local->op_ret,
- local->op_errno);
- }
- }
+ DHT_STACK_UNWIND (rmdir, frame, local->op_ret,
+ local->op_errno, &local->preparent,
+ &local->postparent, NULL);
+ }
+ }
return 0;
}
int
-dht_rmdir_do (call_frame_t *frame, xlator_t *this)
+dht_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- int i = 0;
-
- conf = this->private;
- local = frame->local;
-
- if (local->op_ret == -1)
- goto err;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+ int done = 0;
- local->call_cnt = conf->subvolume_cnt;
+ local = frame->local;
+ prev = cookie;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- STACK_WIND (frame, dht_rmdir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->rmdir,
- &local->loc);
- }
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
- return 0;
+ if (op_errno != ENOENT && op_errno != EACCES) {
+ local->need_selfheal = 1;
+ }
-err:
- DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno);
- return 0;
-}
+ gf_log (this->name, GF_LOG_DEBUG,
+ "rmdir on %s for %s failed (%s)",
+ prev->this->name, local->loc.path,
+ strerror (op_errno));
+ goto unlock;
+ }
+ /* Track if rmdir succeeded on atleast one subvol*/
+ local->fop_succeeded = 1;
+ dht_iatt_merge (this, &local->preparent, preparent, prev->this);
+ dht_iatt_merge (this, &local->postparent, postparent,
+ prev->this);
+ }
+unlock:
+ UNLOCK (&frame->lock);
-int
-dht_rmdir_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, gf_dirent_t *entries)
-{
- dht_local_t *local = NULL;
- int this_call_cnt = -1;
- call_frame_t *prev = NULL;
- local = frame->local;
- prev = cookie;
-
- if (op_ret > 2) {
- gf_log (this->name, GF_LOG_TRACE,
- "readdir on %s for %s returned %d entries",
- prev->this->name, local->loc.path, op_ret);
- local->op_ret = -1;
- local->op_errno = ENOTEMPTY;
- }
+ this_call_cnt = dht_frame_return (frame);
- this_call_cnt = dht_frame_return (frame);
+ /* if local->hashed_subvol, we are yet to wind to hashed_subvol. */
+ if (local->hashed_subvol && (this_call_cnt == 1)) {
+ done = 1;
+ } else if (!local->hashed_subvol && !this_call_cnt) {
+ done = 1;
+ }
- if (is_last_call (this_call_cnt)) {
- dht_rmdir_do (frame, this);
- }
- return 0;
-}
+ if (done) {
+ if (local->need_selfheal && local->fop_succeeded) {
+ local->layout =
+ dht_layout_get (this, local->loc.inode);
+ /* TODO: neater interface needed below */
+ local->stbuf.ia_type = local->loc.inode->ia_type;
-int
-dht_rmdir_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, fd_t *fd)
-{
- dht_local_t *local = NULL;
- int this_call_cnt = -1;
- call_frame_t *prev = NULL;
+ uuid_copy (local->gfid, local->loc.inode->gfid);
+ dht_selfheal_restore (frame, dht_rmdir_selfheal_cbk,
+ &local->loc, local->layout);
+ } else if (this_call_cnt) {
+ /* If non-hashed subvol's have responded, proceed */
+ local->need_selfheal = 0;
+ STACK_WIND (frame, dht_rmdir_hashed_subvol_cbk,
+ local->hashed_subvol,
+ local->hashed_subvol->fops->rmdir,
+ &local->loc, local->flags, NULL);
+ } else if (!this_call_cnt) {
+ /* All subvol's have responded, proceed */
- local = frame->local;
- prev = cookie;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "opendir on %s for %s failed (%s)",
- prev->this->name, local->loc.path,
- strerror (op_errno));
- goto err;
- }
+ if (local->loc.parent) {
- STACK_WIND (frame, dht_rmdir_readdir_cbk,
- prev->this, prev->this->fops->readdir,
- local->fd, 4096, 0);
+ dht_inode_ctx_time_update (local->loc.parent,
+ this,
+ &local->preparent,
+ 0);
- return 0;
+ dht_inode_ctx_time_update (local->loc.parent,
+ this,
+ &local->postparent,
+ 1);
-err:
- this_call_cnt = dht_frame_return (frame);
+ }
- if (is_last_call (this_call_cnt)) {
- dht_rmdir_do (frame, this);
- }
+ DHT_STACK_UNWIND (rmdir, frame, local->op_ret,
+ local->op_errno, &local->preparent,
+ &local->postparent, NULL);
+ }
+ }
- return 0;
+ return 0;
}
int
-dht_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc)
+dht_rmdir_do (call_frame_t *frame, xlator_t *this)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- int op_errno = -1;
- int i = -1;
- int ret = -1;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int i = 0;
+ xlator_t *hashed_subvol = NULL;
+ VALIDATE_OR_GOTO (this->private, err);
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
+ conf = this->private;
+ local = frame->local;
- conf = this->private;
+ if (local->op_ret == -1)
+ goto err;
- local = dht_local_init (frame);
- if (!local) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_errno = ENOMEM;
- goto err;
- }
+ local->call_cnt = conf->subvolume_cnt;
- local->call_cnt = conf->subvolume_cnt;
- local->op_ret = 0;
+ /* first remove from non-hashed_subvol */
+ hashed_subvol = dht_subvol_get_hashed (this, &local->loc);
- ret = loc_copy (&local->loc, loc);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_errno = ENOMEM;
- goto err;
- }
+ if (!hashed_subvol) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to get hashed "
+ "subvol for %s",local->loc.path);
+ } else {
+ local->hashed_subvol = hashed_subvol;
+ }
- local->fd = fd_create (local->loc.inode, frame->root->pid);
- if (!local->fd) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_errno = ENOMEM;
- goto err;
- }
+ /* When DHT has only 1 child */
+ if (conf->subvolume_cnt == 1) {
+ STACK_WIND (frame, dht_rmdir_hashed_subvol_cbk,
+ conf->subvolumes[0],
+ conf->subvolumes[0]->fops->rmdir,
+ &local->loc, local->flags, NULL);
+ return 0;
+ }
- for (i = 0; i < conf->subvolume_cnt; i++) {
- STACK_WIND (frame, dht_rmdir_opendir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->opendir,
- loc, local->fd);
- }
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (hashed_subvol &&
+ (hashed_subvol == conf->subvolumes[i]))
+ continue;
- return 0;
+ STACK_WIND (frame, dht_rmdir_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->rmdir,
+ &local->loc, local->flags, NULL);
+ }
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno);
+ return 0;
- return 0;
+err:
+ DHT_STACK_UNWIND (rmdir, frame, local->op_ret, local->op_errno,
+ &local->preparent, &local->postparent, NULL);
+ return 0;
}
int
-dht_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+dht_rmdir_linkfile_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- DHT_STACK_UNWIND (frame, op_ret, op_errno, dict);
- return 0;
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ xlator_t *src = NULL;
+ call_frame_t *main_frame = NULL;
+ dht_local_t *main_local = NULL;
+ int this_call_cnt = 0;
+
+ local = frame->local;
+ prev = cookie;
+ src = prev->this;
+
+ main_frame = local->main_frame;
+ main_local = main_frame->local;
+
+ if (op_ret == 0) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "unlinked linkfile %s on %s",
+ local->loc.path, src->name);
+ } else {
+ main_local->op_ret = -1;
+ main_local->op_errno = op_errno;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "unlink of %s on %s failed (%s)",
+ local->loc.path, src->name, strerror (op_errno));
+ }
+
+ this_call_cnt = dht_frame_return (main_frame);
+ if (is_last_call (this_call_cnt))
+ dht_rmdir_do (main_frame, this);
+
+ DHT_STACK_DESTROY (frame);
+ return 0;
}
int
-dht_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t flags, dict_t *dict)
+dht_rmdir_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *xattr, struct iatt *parent)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ xlator_t *src = NULL;
+ call_frame_t *main_frame = NULL;
+ dht_local_t *main_local = NULL;
+ int this_call_cnt = 0;
+ dht_conf_t *conf = this->private;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- subvol = dht_subvol_get_cached (this, loc->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ local = frame->local;
+ prev = cookie;
+ src = prev->this;
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ main_frame = local->main_frame;
+ main_local = main_frame->local;
- local->inode = inode_ref (loc->inode);
- local->call_cnt = 1;
+ if (op_ret != 0)
+ goto err;
- STACK_WIND (frame,
- dht_xattrop_cbk,
- subvol, subvol->fops->xattrop,
- loc, flags, dict);
+ if (!check_is_linkfile (inode, stbuf, xattr, conf->link_xattr_name)) {
+ main_local->op_ret = -1;
+ main_local->op_errno = ENOTEMPTY;
- return 0;
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s on %s found to be not a linkfile (type=0%o)",
+ local->loc.path, src->name, stbuf->ia_type);
+ goto err;
+ }
+ STACK_WIND (frame, dht_rmdir_linkfile_unlink_cbk,
+ src, src->fops->unlink, &local->loc, 0, NULL);
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
+ this_call_cnt = dht_frame_return (main_frame);
+ if (is_last_call (this_call_cnt))
+ dht_rmdir_do (main_frame, this);
+
+ DHT_STACK_DESTROY (frame);
+ return 0;
}
int
-dht_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this,
+ gf_dirent_t *entries, xlator_t *src)
{
- DHT_STACK_UNWIND (frame, op_ret, op_errno, dict);
- return 0;
-}
+ int ret = 0;
+ int build_ret = 0;
+ gf_dirent_t *trav = NULL;
+ call_frame_t *lookup_frame = NULL;
+ dht_local_t *lookup_local = NULL;
+ dht_local_t *local = NULL;
+ dict_t *xattrs = NULL;
+ dht_conf_t *conf = this->private;
+ local = frame->local;
-int
-dht_fxattrop (call_frame_t *frame, xlator_t *this,
- fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict)
-{
- xlator_t *subvol = NULL;
- int op_errno = -1;
+ list_for_each_entry (trav, &entries->list, list) {
+ if (strcmp (trav->d_name, ".") == 0)
+ continue;
+ if (strcmp (trav->d_name, "..") == 0)
+ continue;
+ if (check_is_linkfile (NULL, (&trav->d_stat), trav->dict,
+ conf->link_xattr_name)) {
+ ret++;
+ continue;
+ }
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
+ /* this entry is either a directory which is neither "." nor "..",
+ or a non directory which is not a linkfile. the directory is to
+ be treated as non-empty
+ */
+ return 0;
+ }
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ xattrs = dict_new ();
+ if (!xattrs) {
+ gf_log (this->name, GF_LOG_ERROR, "dict_new failed");
+ return -1;
+ }
+
+ ret = dict_set_uint32 (xattrs, conf->link_xattr_name, 256);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to set linkto key"
+ " in dict");
+ if (xattrs)
+ dict_unref (xattrs);
+ return -1;
+ }
- STACK_WIND (frame,
- dht_fxattrop_cbk,
- subvol, subvol->fops->fxattrop,
- fd, flags, dict);
+ list_for_each_entry (trav, &entries->list, list) {
+ if (strcmp (trav->d_name, ".") == 0)
+ continue;
+ if (strcmp (trav->d_name, "..") == 0)
+ continue;
- return 0;
+ lookup_frame = NULL;
+ lookup_local = NULL;
+ lookup_frame = copy_frame (frame);
+ if (!lookup_frame) {
+ /* out of memory, let the rmdir fail
+ (as non-empty, unfortunately) */
+ goto err;
+ }
+
+ lookup_local = mem_get0 (this->local_pool);
+ if (!lookup_local) {
+ goto err;
+ }
+
+ lookup_frame->local = lookup_local;
+ lookup_local->main_frame = frame;
+
+ build_ret = dht_build_child_loc (this, &lookup_local->loc,
+ &local->loc, trav->d_name);
+ if (build_ret != 0)
+ goto err;
+
+ uuid_copy (lookup_local->loc.gfid, trav->d_stat.ia_gfid);
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "looking up %s on %s",
+ lookup_local->loc.path, src->name);
+
+ LOCK (&frame->lock);
+ {
+ local->call_cnt++;
+ }
+ UNLOCK (&frame->lock);
+
+ STACK_WIND (lookup_frame, dht_rmdir_lookup_cbk,
+ src, src->fops->lookup,
+ &lookup_local->loc, xattrs);
+ ret++;
+ }
+
+ if (xattrs)
+ dict_unref (xattrs);
+
+ return ret;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
+ if (xattrs)
+ dict_unref (xattrs);
- return 0;
+ DHT_STACK_DESTROY (lookup_frame);
+ return 0;
}
int
-dht_inodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
+dht_rmdir_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
{
- DHT_STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
+ dht_local_t *local = NULL;
+ int this_call_cnt = -1;
+ call_frame_t *prev = NULL;
+ xlator_t *src = NULL;
+ int ret = 0;
+
+ local = frame->local;
+ prev = cookie;
+ src = prev->this;
+
+ if (op_ret > 2) {
+ ret = dht_rmdir_is_subvol_empty (frame, this, entries, src);
+
+ switch (ret) {
+ case 0: /* non linkfiles exist */
+ gf_log (this->name, GF_LOG_TRACE,
+ "readdir on %s for %s returned %d entries",
+ prev->this->name, local->loc.path, op_ret);
+ local->op_ret = -1;
+ local->op_errno = ENOTEMPTY;
+ break;
+ default:
+ /* @ret number of linkfiles are getting unlinked */
+ gf_log (this->name, GF_LOG_TRACE,
+ "readdir on %s for %s found %d linkfiles",
+ prev->this->name, local->loc.path, ret);
+ break;
+ }
+ }
+
+ this_call_cnt = dht_frame_return (frame);
+
+ if (is_last_call (this_call_cnt)) {
+ dht_rmdir_do (frame, this);
+ }
+
+ return 0;
}
-int32_t
-dht_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd, struct flock *lock)
+int
+dht_rmdir_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, fd_t *fd, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
+ int this_call_cnt = -1;
+ call_frame_t *prev = NULL;
+ dict_t *dict = NULL;
+ int ret = 0;
+ dht_conf_t *conf = this->private;
+ local = frame->local;
+ prev = cookie;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "opendir on %s for %s failed (%s)",
+ prev->this->name, local->loc.path,
+ strerror (op_errno));
+ if (op_errno != ENOENT) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ }
+ goto err;
+ }
- subvol = dht_subvol_get_cached (this, loc->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ dict = dict_new ();
+ if (!dict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto err;
+ }
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ ret = dict_set_uint32 (dict, conf->link_xattr_name, 256);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to set '%s' key",
+ local->loc.path, conf->link_xattr_name);
- local->inode = inode_ref (loc->inode);
- local->call_cnt = 1;
+ STACK_WIND (frame, dht_rmdir_readdirp_cbk,
+ prev->this, prev->this->fops->readdirp,
+ local->fd, 4096, 0, dict);
- STACK_WIND (frame,
- dht_inodelk_cbk,
- subvol, subvol->fops->inodelk,
- volume, loc, cmd, lock);
+ if (dict)
+ dict_unref (dict);
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno);
-
- return 0;
-}
-
+ this_call_cnt = dht_frame_return (frame);
-int
-dht_finodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+ if (is_last_call (this_call_cnt)) {
+ dht_rmdir_do (frame, this);
+ }
-{
- DHT_STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
+ return 0;
}
int
-dht_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct flock *lock)
+dht_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
{
- xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
int op_errno = -1;
+ int i = -1;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+ VALIDATE_OR_GOTO (this->private, err);
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ conf = this->private;
+ local = dht_local_init (frame, loc, NULL, GF_FOP_RMDIR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- STACK_WIND (frame,
- dht_finodelk_cbk,
- subvol, subvol->fops->finodelk,
- volume, fd, cmd, lock);
+ local->call_cnt = conf->subvolume_cnt;
+ local->op_ret = 0;
+ local->fop_succeeded = 0;
- return 0;
+ local->flags = flags;
+
+ local->fd = fd_create (local->loc.inode, frame->root->pid);
+ if (!local->fd) {
+
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ STACK_WIND (frame, dht_rmdir_opendir_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->opendir,
+ loc, local->fd, NULL);
+ }
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (rmdir, frame, -1, op_errno,
+ NULL, NULL, NULL);
- return 0;
+ return 0;
}
-
int
dht_entrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+ xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- DHT_STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
+ DHT_STACK_UNWIND (entrylk, frame, op_ret, op_errno, xdata);
+ return 0;
}
int
dht_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+ const char *volume, loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
- xlator_t *subvol = NULL;
+ xlator_t *subvol = NULL;
int op_errno = -1;
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (loc, err);
VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
- subvol = dht_subvol_get_cached (this, loc->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ local = dht_local_init (frame, loc, NULL, GF_FOP_ENTRYLK);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
- local->inode = inode_ref (loc->inode);
- local->call_cnt = 1;
+ local->call_cnt = 1;
- STACK_WIND (frame, dht_entrylk_cbk,
- subvol, subvol->fops->entrylk,
- volume, loc, basename, cmd, type);
+ STACK_WIND (frame, dht_entrylk_cbk,
+ subvol, subvol->fops->entrylk,
+ volume, loc, basename, cmd, type, xdata);
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (entrylk, frame, -1, op_errno, NULL);
- return 0;
+ return 0;
}
int
dht_fentrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+ xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- DHT_STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
+ DHT_STACK_UNWIND (fentrylk, frame, op_ret, op_errno, NULL);
+ return 0;
}
int
dht_fentrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+ const char *volume, fd_t *fd, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
- xlator_t *subvol = NULL;
+ xlator_t *subvol = NULL;
int op_errno = -1;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (fd, err);
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ subvol = dht_subvol_get_cached (this, fd->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
- STACK_WIND (frame, dht_fentrylk_cbk,
- subvol, subvol->fops->fentrylk,
- volume, fd, basename, cmd, type);
+ STACK_WIND (frame, dht_fentrylk_cbk,
+ subvol, subvol->fops->fentrylk,
+ volume, fd, basename, cmd, type, xdata);
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (fentrylk, frame, -1, op_errno, NULL);
- return 0;
+ return 0;
}
int
dht_forget (xlator_t *this, inode_t *inode)
{
- uint64_t tmp_layout = 0;
- dht_layout_t *layout = NULL;
+ uint64_t ctx_int = 0;
+ dht_inode_ctx_t *ctx = NULL;
+ dht_layout_t *layout = NULL;
- inode_ctx_get (inode, this, &tmp_layout);
+ inode_ctx_del (inode, this, &ctx_int);
- if (!tmp_layout)
- return 0;
+ if (!ctx_int)
+ return 0;
- layout = (dht_layout_t *)(long)tmp_layout;
- if (!layout->preset)
- FREE (layout);
+ ctx = (dht_inode_ctx_t *) (long) ctx_int;
- return 0;
-}
+ layout = ctx->layout;
+ ctx->layout = NULL;
+ dht_layout_unref (this, layout);
+ GF_FREE (ctx);
+ return 0;
+}
int
-dht_init_subvolumes (xlator_t *this, dht_conf_t *conf)
+dht_notify (xlator_t *this, int event, void *data, ...)
{
- xlator_list_t *subvols = NULL;
- int cnt = 0;
+ xlator_t *subvol = NULL;
+ int cnt = -1;
+ int i = -1;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+ int propagate = 0;
+
+ int had_heard_from_all = 0;
+ int have_heard_from_all = 0;
+ struct timeval time = {0,};
+ gf_defrag_info_t *defrag = NULL;
+ dict_t *dict = NULL;
+ gf_defrag_type cmd = 0;
+ dict_t *output = NULL;
+ va_list ap;
- for (subvols = this->children; subvols; subvols = subvols->next)
- cnt++;
-
- conf->subvolumes = CALLOC (cnt, sizeof (xlator_t *));
- if (!conf->subvolumes) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- return -1;
+ conf = this->private;
+ if (!conf)
+ return ret;
+
+ /* had all subvolumes reported status once till now? */
+ had_heard_from_all = 1;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (!conf->last_event[i]) {
+ had_heard_from_all = 0;
+ }
}
- conf->subvolume_cnt = cnt;
- cnt = 0;
- for (subvols = this->children; subvols; subvols = subvols->next)
- conf->subvolumes[cnt++] = subvols->xlator;
+ switch (event) {
+ case GF_EVENT_CHILD_UP:
+ subvol = data;
- conf->subvolume_status = CALLOC (cnt, sizeof (char));
- if (!conf->subvolume_status) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- return -1;
- }
+ conf->gen++;
- return 0;
-}
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (subvol == conf->subvolumes[i]) {
+ cnt = i;
+ break;
+ }
+ }
+ if (cnt == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "got GF_EVENT_CHILD_UP bad subvolume %s",
+ subvol->name);
+ break;
+ }
-int
-dht_notify (xlator_t *this, int event, void *data, ...)
-{
- xlator_t *subvol = NULL;
- int cnt = -1;
- int i = -1;
- dht_conf_t *conf = NULL;
- int ret = -1;
+ gettimeofday (&time, NULL);
+ LOCK (&conf->subvolume_lock);
+ {
+ conf->subvolume_status[cnt] = 1;
+ conf->last_event[cnt] = event;
+ conf->subvol_up_time[cnt] = time.tv_sec;
+ }
+ UNLOCK (&conf->subvolume_lock);
+ /* one of the node came back up, do a stat update */
+ dht_get_du_info_for_subvol (this, cnt);
- conf = this->private;
+ break;
- switch (event) {
- case GF_EVENT_CHILD_UP:
- subvol = data;
+ case GF_EVENT_CHILD_MODIFIED:
+ subvol = data;
- conf->gen++;
+ conf->gen++;
+ propagate = 1;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (subvol == conf->subvolumes[i]) {
- cnt = i;
- break;
- }
- }
+ break;
- if (cnt == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "got GF_EVENT_CHILD_UP bad subvolume %s",
- subvol->name);
- break;
- }
+ case GF_EVENT_CHILD_DOWN:
+ subvol = data;
- LOCK (&conf->subvolume_lock);
- {
- conf->subvolume_status[cnt] = 1;
- }
- UNLOCK (&conf->subvolume_lock);
+ if (conf->assert_no_child_down) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Received CHILD_DOWN. Exiting");
+ if (conf->defrag) {
+ gf_defrag_stop (conf->defrag, NULL);
+ } else {
+ kill (getpid(), SIGTERM);
+ }
+ }
- /* one of the node came back up, do a stat update */
- dht_get_du_info_for_subvol (this, cnt);
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (subvol == conf->subvolumes[i]) {
+ cnt = i;
+ break;
+ }
+ }
- break;
+ if (cnt == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "got GF_EVENT_CHILD_DOWN bad subvolume %s",
+ subvol->name);
+ break;
+ }
- case GF_EVENT_CHILD_DOWN:
- subvol = data;
+ LOCK (&conf->subvolume_lock);
+ {
+ conf->subvolume_status[cnt] = 0;
+ conf->last_event[cnt] = event;
+ conf->subvol_up_time[cnt] = 0;
+ }
+ UNLOCK (&conf->subvolume_lock);
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (subvol == conf->subvolumes[i]) {
- cnt = i;
- break;
- }
- }
+ break;
- if (cnt == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "got GF_EVENT_CHILD_DOWN bad subvolume %s",
- subvol->name);
- break;
- }
+ case GF_EVENT_CHILD_CONNECTING:
+ subvol = data;
- LOCK (&conf->subvolume_lock);
- {
- conf->subvolume_status[cnt] = 0;
- }
- UNLOCK (&conf->subvolume_lock);
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (subvol == conf->subvolumes[i]) {
+ cnt = i;
+ break;
+ }
+ }
- break;
- }
+ if (cnt == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "got GF_EVENT_CHILD_CONNECTING bad subvolume %s",
+ subvol->name);
+ break;
+ }
+
+ LOCK (&conf->subvolume_lock);
+ {
+ conf->last_event[cnt] = event;
+ }
+ UNLOCK (&conf->subvolume_lock);
+
+ break;
+ case GF_EVENT_VOLUME_DEFRAG:
+ {
+ if (!conf->defrag) {
+ return ret;
+ }
+ defrag = conf->defrag;
+
+ dict = data;
+ va_start (ap, data);
+ output = va_arg (ap, dict_t*);
+
+ ret = dict_get_int32 (dict, "rebalance-command",
+ (int32_t*)&cmd);
+ if (ret)
+ return ret;
+ LOCK (&defrag->lock);
+ {
+ if (defrag->is_exiting)
+ goto unlock;
+ if (cmd == GF_DEFRAG_CMD_STATUS)
+ gf_defrag_status_get (defrag, output);
+ else if (cmd == GF_DEFRAG_CMD_STOP)
+ gf_defrag_stop (defrag, output);
+ }
+unlock:
+ UNLOCK (&defrag->lock);
+ return 0;
+ break;
+ }
+
+ default:
+ propagate = 1;
+ break;
+ }
+
+
+ /* have all subvolumes reported status once by now? */
+ have_heard_from_all = 1;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (!conf->last_event[i])
+ have_heard_from_all = 0;
+ }
+
+ /* if all subvols have reported status, no need to hide anything
+ or wait for anything else. Just propagate blindly */
+ if (have_heard_from_all) {
+ propagate = 1;
- ret = default_notify (this, event, data);
+ }
+
+
+ if (!had_heard_from_all && have_heard_from_all) {
+ /* This is the first event which completes aggregation
+ of events from all subvolumes. If at least one subvol
+ had come up, propagate CHILD_UP, but only this time
+ */
+ event = GF_EVENT_CHILD_DOWN;
- return ret;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->last_event[i] == GF_EVENT_CHILD_UP) {
+ event = GF_EVENT_CHILD_UP;
+ break;
+ }
+
+ if (conf->last_event[i] == GF_EVENT_CHILD_CONNECTING) {
+ event = GF_EVENT_CHILD_CONNECTING;
+ /* continue to check other events for CHILD_UP */
+ }
+ }
+
+ /* rebalance is started with assert_no_child_down. So we do
+ * not need to handle CHILD_DOWN event here.
+ */
+ if (conf->defrag) {
+ ret = gf_thread_create (&conf->defrag->th, NULL,
+ gf_defrag_start, this);
+ if (ret) {
+ conf->defrag = NULL;
+ GF_FREE (conf->defrag);
+ kill (getpid(), SIGTERM);
+ }
+ }
+ }
+
+ ret = 0;
+ if (propagate)
+ ret = default_notify (this, event, data);
+
+ return ret;
}
+int
+dht_inode_ctx_layout_get (inode_t *inode, xlator_t *this, dht_layout_t **layout)
+{
+ dht_inode_ctx_t *ctx = NULL;
+ int ret = -1;
+
+ ret = dht_inode_ctx_get (inode, this, &ctx);
+
+ if (!ret && ctx) {
+ if (ctx->layout) {
+ if (layout)
+ *layout = ctx->layout;
+ ret = 0;
+ } else {
+ ret = -1;
+ }
+ }
+
+ return ret;
+}
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index f5c95b4cb..5ccd66799 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
@@ -22,223 +13,775 @@
#include "config.h"
#endif
+#include <regex.h>
+
+#include "dht-mem-types.h"
+#include "libxlator.h"
+#include "syncop.h"
+
#ifndef _DHT_H
#define _DHT_H
+#define GF_XATTR_FIX_LAYOUT_KEY "distribute.fix.layout"
+#define GF_DHT_LOOKUP_UNHASHED_ON 1
+#define GF_DHT_LOOKUP_UNHASHED_AUTO 2
+#define DHT_PATHINFO_HEADER "DISTRIBUTE:"
+
+#include <fnmatch.h>
typedef int (*dht_selfheal_dir_cbk_t) (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno);
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ dict_t *xdata);
+typedef int (*dht_defrag_cbk_fn_t) (xlator_t *this, call_frame_t *frame,
+ int ret);
struct dht_layout {
- int cnt;
- int preset;
- int gen;
- int type;
+ int spread_cnt; /* layout spread count per directory,
+ is controlled by 'setxattr()' with
+ special key */
+ int cnt;
+ int preset;
+ int gen;
+ int type;
+ int ref; /* use with dht_conf_t->layout_lock */
+ int search_unhashed;
struct {
- int err; /* 0 = normal
- -1 = dir exists and no xattr
- >0 = dir lookup failed with errno
- */
- uint32_t start;
- uint32_t stop;
- xlator_t *xlator;
- } list[0];
+ int err; /* 0 = normal
+ -1 = dir exists and no xattr
+ >0 = dir lookup failed with errno
+ */
+ uint32_t start;
+ uint32_t stop;
+ xlator_t *xlator;
+ } list[];
};
-typedef struct dht_layout dht_layout_t;
+typedef struct dht_layout dht_layout_t;
+
+struct dht_stat_time {
+ uint32_t atime;
+ uint32_t atime_nsec;
+ uint32_t ctime;
+ uint32_t ctime_nsec;
+ uint32_t mtime;
+ uint32_t mtime_nsec;
+};
+
+typedef struct dht_stat_time dht_stat_time_t;
+struct dht_inode_ctx {
+ dht_layout_t *layout;
+ dht_stat_time_t time;
+};
+
+typedef struct dht_inode_ctx dht_inode_ctx_t;
+
+
+typedef enum {
+ DHT_HASH_TYPE_DM,
+ DHT_HASH_TYPE_DM_USER,
+} dht_hashfn_type_t;
+
+/* rebalance related */
+struct dht_rebalance_ {
+ xlator_t *from_subvol;
+ xlator_t *target_node;
+ off_t offset;
+ size_t size;
+ int32_t flags;
+ int count;
+ struct iobref *iobref;
+ struct iovec *vector;
+ struct iatt stbuf;
+ dht_defrag_cbk_fn_t target_op_fn;
+ dict_t *xdata;
+};
struct dht_local {
- int call_cnt;
- loc_t loc;
- loc_t loc2;
- int op_ret;
- int op_errno;
- int layout_mismatch;
- struct stat stbuf;
- struct statvfs statvfs;
- fd_t *fd;
- inode_t *inode;
- dict_t *xattr;
- dict_t *xattr_req;
- dht_layout_t *layout;
- size_t size;
- ino_t st_ino;
- xlator_t *src_hashed, *src_cached;
- xlator_t *dst_hashed, *dst_cached;
- xlator_t *cached_subvol;
- xlator_t *hashed_subvol;
- char need_selfheal;
+ int call_cnt;
+ loc_t loc;
+ loc_t loc2;
+ int op_ret;
+ int op_errno;
+ int layout_mismatch;
+ /* Use stbuf as the postbuf, when we require both
+ * pre and post attrs */
+ struct iatt stbuf;
+ struct iatt prebuf;
+ struct iatt preoldparent;
+ struct iatt postoldparent;
+ struct iatt preparent;
+ struct iatt postparent;
+ struct statvfs statvfs;
+ fd_t *fd;
+ inode_t *inode;
+ dict_t *params;
+ dict_t *xattr;
+ dict_t *xattr_req;
+ dht_layout_t *layout;
+ size_t size;
+ ino_t ia_ino;
+ xlator_t *src_hashed, *src_cached;
+ xlator_t *dst_hashed, *dst_cached;
+ xlator_t *cached_subvol;
+ xlator_t *hashed_subvol;
+ char need_selfheal;
int file_count;
int dir_count;
- struct {
- fop_mknod_cbk_t linkfile_cbk;
- struct stat stbuf;
- loc_t loc;
- inode_t *inode;
- dict_t *xattr;
- xlator_t *srcvol;
- } linkfile;
- struct {
- uint32_t hole_cnt;
- uint32_t overlaps_cnt;
- uint32_t missing;
- uint32_t down;
- uint32_t misc;
- dht_selfheal_dir_cbk_t dir_cbk;
- dht_layout_t *layout;
- } selfheal;
-
- /* needed by nufa */
- int32_t flags;
- mode_t mode;
- dev_t rdev;
+ call_frame_t *main_frame;
+ int fop_succeeded;
+ struct {
+ fop_mknod_cbk_t linkfile_cbk;
+ struct iatt stbuf;
+ loc_t loc;
+ inode_t *inode;
+ dict_t *xattr;
+ xlator_t *srcvol;
+ } linkfile;
+ struct {
+ uint32_t hole_cnt;
+ uint32_t overlaps_cnt;
+ uint32_t down;
+ uint32_t misc;
+ dht_selfheal_dir_cbk_t dir_cbk;
+ dht_layout_t *layout;
+ } selfheal;
+ uint32_t uid;
+ uint32_t gid;
+
+ /* needed by nufa */
+ int32_t flags;
+ mode_t mode;
+ dev_t rdev;
+ mode_t umask;
+
+ /* need for file-info */
+ char *xattr_val;
+ char *key;
+
+ /* which xattr request? */
+ char xsel[256];
+ int32_t alloc_len;
+
+ char *newpath;
+
+ /* gfid related */
+ uuid_t gfid;
+
+ /*Marker Related*/
+ struct marker_str marker;
+
+ /* flag used to make sure we need to return estale in
+ {lookup,revalidate}_cbk */
+ char return_estale;
+ char need_lookup_everywhere;
+
+ glusterfs_fop_t fop;
+
+ gf_boolean_t linked;
+ xlator_t *link_subvol;
+
+ struct dht_rebalance_ rebalance;
+ xlator_t *first_up_subvol;
+
};
typedef struct dht_local dht_local_t;
/* du - disk-usage */
struct dht_du {
double avail_percent;
+ double avail_inodes;
uint64_t avail_space;
uint32_t log;
};
typedef struct dht_du dht_du_t;
+enum gf_defrag_type {
+ GF_DEFRAG_CMD_START = 1,
+ GF_DEFRAG_CMD_STOP = 1 + 1,
+ GF_DEFRAG_CMD_STATUS = 1 + 2,
+ GF_DEFRAG_CMD_START_LAYOUT_FIX = 1 + 3,
+ GF_DEFRAG_CMD_START_FORCE = 1 + 4,
+};
+typedef enum gf_defrag_type gf_defrag_type;
+
+enum gf_defrag_status_t {
+ GF_DEFRAG_STATUS_NOT_STARTED,
+ GF_DEFRAG_STATUS_STARTED,
+ GF_DEFRAG_STATUS_STOPPED,
+ GF_DEFRAG_STATUS_COMPLETE,
+ GF_DEFRAG_STATUS_FAILED,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_STOPPED,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_FAILED,
+};
+typedef enum gf_defrag_status_t gf_defrag_status_t;
+
+typedef struct gf_defrag_pattern_list gf_defrag_pattern_list_t;
+
+struct gf_defrag_pattern_list {
+ char path_pattern[256];
+ uint64_t size;
+ gf_defrag_pattern_list_t *next;
+};
+
+struct gf_defrag_info_ {
+ uint64_t total_files;
+ uint64_t total_data;
+ uint64_t num_files_lookedup;
+ uint64_t total_failures;
+ uint64_t skipped;
+ gf_lock_t lock;
+ int cmd;
+ pthread_t th;
+ gf_defrag_status_t defrag_status;
+ struct rpc_clnt *rpc;
+ uint32_t connected;
+ uint32_t is_exiting;
+ pid_t pid;
+ inode_t *root_inode;
+ uuid_t node_uuid;
+ struct timeval start_time;
+ gf_boolean_t stats;
+ gf_defrag_pattern_list_t *defrag_pattern;
+};
+
+typedef struct gf_defrag_info_ gf_defrag_info_t;
+
struct dht_conf {
- gf_lock_t subvolume_lock;
+ gf_lock_t subvolume_lock;
int subvolume_cnt;
xlator_t **subvolumes;
- xlator_t *local_volume; /* Needed by NUFA */
- char *subvolume_status;
- dht_layout_t **file_layouts;
- dht_layout_t **dir_layouts;
- dht_layout_t *default_dir_layout;
- gf_boolean_t search_unhashed;
- int gen;
+ char *subvolume_status;
+ int *last_event;
+ dht_layout_t **file_layouts;
+ dht_layout_t **dir_layouts;
+ gf_boolean_t search_unhashed;
+ int gen;
dht_du_t *du_stats;
- uint64_t min_free_disk;
+ double min_free_disk;
+ double min_free_inodes;
char disk_unit;
int32_t refresh_interval;
gf_boolean_t unhashed_sticky_bit;
- struct timeval last_stat_fetch;
+ struct timeval last_stat_fetch;
+ gf_lock_t layout_lock;
+ void *private; /* Can be used by wrapper xlators over
+ dht */
+ gf_boolean_t use_readdirp;
+ char vol_uuid[UUID_SIZE + 1];
+ gf_boolean_t assert_no_child_down;
+ time_t *subvol_up_time;
+
+ /* This is the count used as the distribute layout for a directory */
+ /* Will be a global flag to control the layout spread count */
+ uint32_t dir_spread_cnt;
+
+ /* to keep track of nodes which are decomissioned */
+ xlator_t **decommissioned_bricks;
+ int decommission_in_progress;
+ int decommission_subvols_cnt;
+
+ /* defrag related */
+ gf_defrag_info_t *defrag;
+
+ /* Request to filter directory entries in readdir request */
+
+ gf_boolean_t readdir_optimize;
+
+ /* Support regex-based name reinterpretation. */
+ regex_t rsync_regex;
+ gf_boolean_t rsync_regex_valid;
+ regex_t extra_regex;
+ gf_boolean_t extra_regex_valid;
+
+ /* Support variable xattr names. */
+ char *xattr_name;
+ char *link_xattr_name;
+ char *wild_xattr_name;
};
typedef struct dht_conf dht_conf_t;
struct dht_disk_layout {
- uint32_t cnt;
- uint32_t type;
- struct {
- uint32_t start;
- uint32_t stop;
- } list[1];
+ uint32_t cnt;
+ uint32_t type;
+ struct {
+ uint32_t start;
+ uint32_t stop;
+ } list[1];
};
typedef struct dht_disk_layout dht_disk_layout_t;
-
-#define ENTRY_MISSING(op_ret, op_errno) (op_ret == -1 && op_errno == ENOENT)
-#define is_fs_root(loc) (strcmp (loc->path, "/") == 0)
+typedef enum {
+ GF_DHT_MIGRATE_DATA,
+ GF_DHT_MIGRATE_DATA_EVEN_IF_LINK_EXISTS,
+ GF_DHT_MIGRATE_HARDLINK,
+ GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS
+} gf_dht_migrate_data_type_t;
+
+#define ENTRY_MISSING(op_ret, op_errno) (op_ret == -1 && op_errno == ENOENT)
-#define is_revalidate(loc) (inode_ctx_get (loc->inode, this, NULL) == 0)
+#define is_revalidate(loc) (dht_inode_ctx_layout_get (loc->inode, this, NULL) == 0)
#define is_last_call(cnt) (cnt == 0)
-#define DHT_LINKFILE_MODE (S_ISVTX)
-#define check_is_linkfile(i,s,x) ((s->st_mode & ~S_IFMT) == DHT_LINKFILE_MODE)
+#define DHT_MIGRATION_IN_PROGRESS 1
+#define DHT_MIGRATION_COMPLETED 2
+
+#define DHT_LINKFILE_MODE (S_ISVTX)
+
+#define check_is_linkfile(i,s,x,n) ( \
+ ((st_mode_from_ia ((s)->ia_prot, (s)->ia_type) & ~S_IFMT) \
+ == DHT_LINKFILE_MODE) && \
+ dict_get (x, n))
+
+#define IS_DHT_MIGRATION_PHASE2(buf) ( \
+ IA_ISREG ((buf)->ia_type) && \
+ ((st_mode_from_ia ((buf)->ia_prot, (buf)->ia_type) & \
+ ~S_IFMT) == DHT_LINKFILE_MODE))
-#define check_is_dir(i,s,x) (S_ISDIR(s->st_mode))
+#define IS_DHT_MIGRATION_PHASE1(buf) ( \
+ IA_ISREG ((buf)->ia_type) && \
+ ((buf)->ia_prot.sticky == 1) && \
+ ((buf)->ia_prot.sgid == 1))
+
+#define DHT_STRIP_PHASE1_FLAGS(buf) do { \
+ if ((buf) && IS_DHT_MIGRATION_PHASE1(buf)) { \
+ (buf)->ia_prot.sticky = 0; \
+ (buf)->ia_prot.sgid = 0; \
+ } \
+ } while (0)
+
+#define check_is_dir(i,s,x) (IA_ISDIR(s->ia_type))
#define layout_is_sane(layout) ((layout) && (layout->cnt > 0))
-#define DHT_STACK_UNWIND(frame, params ...) do { \
- dht_local_t *__local = NULL; \
- __local = frame->local; \
- frame->local = NULL; \
- STACK_UNWIND (frame, params); \
- dht_local_wipe (__local); \
- } while (0)
-
-#define DHT_STACK_DESTROY(frame) do { \
- dht_local_t *__local = NULL; \
- __local = frame->local; \
- frame->local = NULL; \
- STACK_DESTROY (frame->root); \
- dht_local_wipe (__local); \
- } while (0)
-
-dht_layout_t *dht_layout_new (xlator_t *this, int cnt);
-dht_layout_t *dht_layout_get (xlator_t *this, inode_t *inode);
-dht_layout_t *dht_layout_for_subvol (xlator_t *this, xlator_t *subvol);
-xlator_t *dht_layout_search (xlator_t *this, dht_layout_t *layout,
- const char *name);
-int dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout);
-int dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout,
- uint32_t *holes_p, uint32_t *overlaps_p,
- uint32_t *missing_p, uint32_t *down_p,
- uint32_t *misc_p);
-int dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout,
- xlator_t *subvol, loc_t *loc, dict_t *xattr);
+#define DHT_STACK_UNWIND(fop, frame, params ...) do { \
+ dht_local_t *__local = NULL; \
+ xlator_t *__xl = NULL; \
+ if (frame) { \
+ __xl = frame->this; \
+ __local = frame->local; \
+ frame->local = NULL; \
+ } \
+ STACK_UNWIND_STRICT (fop, frame, params); \
+ dht_local_wipe (__xl, __local); \
+ } while (0)
+
+#define DHT_STACK_DESTROY(frame) do { \
+ dht_local_t *__local = NULL; \
+ xlator_t *__xl = NULL; \
+ __xl = frame->this; \
+ __local = frame->local; \
+ frame->local = NULL; \
+ STACK_DESTROY (frame->root); \
+ dht_local_wipe (__xl, __local); \
+ } while (0)
+
+#define DHT_UPDATE_TIME(ctx_sec, ctx_nsec, new_sec, new_nsec, inode, post) do {\
+ int32_t sec = 0; \
+ sec = new_sec; \
+ LOCK (&inode->lock); \
+ { \
+ new_sec = max(new_sec, ctx_sec); \
+ if (sec < new_sec) \
+ new_nsec = ctx_nsec; \
+ if (sec == new_sec) \
+ new_nsec = max (new_nsec, ctx_nsec); \
+ if (post) { \
+ ctx_sec = new_sec; \
+ ctx_nsec = new_nsec; \
+ } \
+ } \
+ UNLOCK (&inode->lock); \
+ } while (0)
+
+#define is_greater_time(a, an, b, bn) (((a) < (b)) || (((a) == (b)) && ((an) < (bn))))
+dht_layout_t *dht_layout_new (xlator_t *this, int cnt);
+dht_layout_t *dht_layout_get (xlator_t *this, inode_t *inode);
+dht_layout_t *dht_layout_for_subvol (xlator_t *this, xlator_t *subvol);
+xlator_t *dht_layout_search (xlator_t *this, dht_layout_t *layout,
+ const char *name);
+int dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout);
+int dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout,
+ uint32_t *holes_p, uint32_t *overlaps_p,
+ uint32_t *missing_p, uint32_t *down_p,
+ uint32_t *misc_p, uint32_t *no_space_p);
+int dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout,
+ xlator_t *subvol, loc_t *loc, dict_t *xattr);
xlator_t *dht_linkfile_subvol (xlator_t *this, inode_t *inode,
- struct stat *buf, dict_t *xattr);
-int dht_linkfile_unlink (call_frame_t *frame, xlator_t *this,
- xlator_t *subvol, loc_t *loc);
+ struct iatt *buf, dict_t *xattr);
+int dht_linkfile_unlink (call_frame_t *frame, xlator_t *this,
+ xlator_t *subvol, loc_t *loc);
int dht_layouts_init (xlator_t *this, dht_conf_t *conf);
int dht_layout_merge (xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
- int op_ret, int op_errno, dict_t *xattr);
+ int op_ret, int op_errno, dict_t *xattr);
int dht_disk_layout_extract (xlator_t *this, dht_layout_t *layout,
- int pos, int32_t **disk_layout_p);
-int dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout,
- int pos, int32_t *disk_layout);
+ int pos, int32_t **disk_layout_p);
+int dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout,
+ int pos, void *disk_layout_raw, int disk_layout_len);
int dht_frame_return (call_frame_t *frame);
-int dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y);
+int dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y);
int dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol,
- uint64_t *x);
+ uint64_t *x);
-void dht_local_wipe (dht_local_t *local);
-dht_local_t *dht_local_init (call_frame_t *frame);
-int dht_stat_merge (xlator_t *this, struct stat *to, struct stat *from,
- xlator_t *subvol);
+void dht_local_wipe (xlator_t *this, dht_local_t *local);
+dht_local_t *dht_local_init (call_frame_t *frame, loc_t *loc, fd_t *fd,
+ glusterfs_fop_t fop);
+int dht_iatt_merge (xlator_t *this, struct iatt *to, struct iatt *from,
+ xlator_t *subvol);
xlator_t *dht_subvol_get_hashed (xlator_t *this, loc_t *loc);
xlator_t *dht_subvol_get_cached (xlator_t *this, inode_t *inode);
xlator_t *dht_subvol_next (xlator_t *this, xlator_t *prev);
-int dht_subvol_cnt (xlator_t *this, xlator_t *subvol);
+xlator_t *dht_subvol_next_available (xlator_t *this, xlator_t *prev);
+int dht_subvol_cnt (xlator_t *this, xlator_t *subvol);
-int dht_hash_compute (int type, const char *name, uint32_t *hash_p);
+int dht_hash_compute (xlator_t *this, int type, const char *name, uint32_t *hash_p);
-int dht_linkfile_create (call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk,
- xlator_t *tovol, xlator_t *fromvol, loc_t *loc);
-int dht_lookup_directory (call_frame_t *frame, xlator_t *this, loc_t *loc);
-int dht_lookup_everywhere (call_frame_t *frame, xlator_t *this, loc_t *loc);
+int dht_linkfile_create (call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk,
+ xlator_t *this, xlator_t *tovol,
+ xlator_t *fromvol, loc_t *loc);
+int dht_lookup_directory (call_frame_t *frame, xlator_t *this, loc_t *loc);
+int dht_lookup_everywhere (call_frame_t *frame, xlator_t *this, loc_t *loc);
int
-dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
- loc_t *loc, dht_layout_t *layout);
+dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
+ loc_t *loc, dht_layout_t *layout);
int
dht_selfheal_new_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
- dht_layout_t *layout);
+ dht_layout_t *layout);
int
-dht_selfheal_restore (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
- loc_t *loc, dht_layout_t *layout);
+dht_selfheal_restore (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
+ loc_t *loc, dht_layout_t *layout);
int
dht_layout_sort_volname (dht_layout_t *layout);
-int dht_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc);
-
int dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc);
-int dht_is_subvol_filled (xlator_t *this, xlator_t *subvol);
-xlator_t *dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol);
-int dht_get_du_info_for_subvol (xlator_t *this, int subvol_idx);
+gf_boolean_t dht_is_subvol_filled (xlator_t *this, xlator_t *subvol);
+xlator_t *dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol,
+ dht_local_t *layout);
+int dht_get_du_info_for_subvol (xlator_t *this, int subvol_idx);
+
+int dht_layout_preset (xlator_t *this, xlator_t *subvol, inode_t *inode);
+int dht_layout_set (xlator_t *this, inode_t *inode, dht_layout_t *layout);;
+void dht_layout_unref (xlator_t *this, dht_layout_t *layout);
+dht_layout_t *dht_layout_ref (xlator_t *this, dht_layout_t *layout);
+xlator_t *dht_first_up_subvol (xlator_t *this);
+xlator_t *dht_last_up_subvol (xlator_t *this);
+
+int dht_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name);
+
+int dht_filter_loc_subvol_key (xlator_t *this, loc_t *loc, loc_t *new_loc,
+ xlator_t **subvol);
+
+int dht_rename_cleanup (call_frame_t *frame);
+int dht_rename_links_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata);
+
+int dht_fix_directory_layout (call_frame_t *frame,
+ dht_selfheal_dir_cbk_t dir_cbk,
+ dht_layout_t *layout);
+
+int dht_init_subvolumes (xlator_t *this, dht_conf_t *conf);
+
+/* migration/rebalance */
+int dht_start_rebalance_task (xlator_t *this, call_frame_t *frame);
+
+int dht_rebalance_in_progress_check (xlator_t *this, call_frame_t *frame);
+int dht_rebalance_complete_check (xlator_t *this, call_frame_t *frame);
+
+
+/* FOPS */
+int32_t dht_lookup (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *xattr_req);
+
+int32_t dht_stat (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, dict_t *xdata);
+
+int32_t dht_fstat (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd, dict_t *xdata);
+
+int32_t dht_truncate (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ off_t offset, dict_t *xdata);
+
+int32_t dht_ftruncate (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ off_t offset, dict_t *xdata);
+
+int32_t dht_access (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t mask, dict_t *xdata);
+
+int32_t dht_readlink (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ size_t size, dict_t *xdata);
+
+int32_t dht_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata);
+
+int32_t dht_mkdir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata);
+
+int32_t dht_unlink (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, int xflag, dict_t *xdata);
+
+int32_t dht_rmdir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int flags, dict_t *xdata);
+
+int32_t dht_symlink (call_frame_t *frame, xlator_t *this,
+ const char *linkpath, loc_t *loc, mode_t umask,
+ dict_t *xdata);
+
+int32_t dht_rename (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata);
+
+int32_t dht_link (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata);
+
+int32_t dht_create (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t flags, mode_t mode,
+ mode_t umask, fd_t *fd, dict_t *params);
+
+int32_t dht_open (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flags, fd_t *fd, dict_t *xdata);
+
+int32_t dht_readv (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata);
+
+int32_t dht_writev (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ struct iovec *vector,
+ int32_t count,
+ off_t offset,
+ uint32_t flags,
+ struct iobref *iobref, dict_t *xdata);
+
+int32_t dht_flush (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd, dict_t *xdata);
+
+int32_t dht_fsync (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t datasync, dict_t *xdata);
+
+int32_t dht_opendir (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, fd_t *fd, dict_t *xdata);
+
+int32_t dht_fsyncdir (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t datasync, dict_t *xdata);
+
+int32_t dht_statfs (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, dict_t *xdata);
+
+int32_t dht_setxattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *dict,
+ int32_t flags, dict_t *xdata);
+
+int32_t dht_getxattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ const char *name, dict_t *xdata);
+
+int32_t dht_fsetxattr (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ dict_t *dict,
+ int32_t flags, dict_t *xdata);
+
+int32_t dht_fgetxattr (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ const char *name, dict_t *xdata);
+
+int32_t dht_removexattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ const char *name, dict_t *xdata);
+int32_t dht_fremovexattr (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ const char *name, dict_t *xdata);
+
+int32_t dht_lk (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata);
+
+int32_t dht_inodelk (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata);
+
+int32_t dht_finodelk (call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata);
+
+int32_t dht_entrylk (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
+
+int32_t dht_fentrylk (call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
+
+int32_t dht_readdir (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size, off_t off, dict_t *xdata);
+
+int32_t dht_readdirp (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size, off_t off, dict_t *dict);
+
+int32_t dht_xattrop (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ gf_xattrop_flags_t flags,
+ dict_t *dict, dict_t *xdata);
+
+int32_t dht_fxattrop (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ gf_xattrop_flags_t flags,
+ dict_t *dict, dict_t *xdata);
+
+int32_t dht_forget (xlator_t *this, inode_t *inode);
+int32_t dht_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata);
+int32_t dht_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata);
+int32_t dht_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t mode, off_t offset, size_t len, dict_t *xdata);
+int32_t dht_discard(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, size_t len, dict_t *xdata);
+int32_t dht_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, size_t len, dict_t *xdata);
+
+int32_t dht_init (xlator_t *this);
+void dht_fini (xlator_t *this);
+int dht_reconfigure (xlator_t *this, dict_t *options);
+int32_t dht_notify (xlator_t *this, int32_t event, void *data, ...);
+
+/* definitions for nufa/switch */
+int dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent);
+int dht_lookup_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent);
+int dht_lookup_linkfile_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent);
+int dht_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent);
+int dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ fd_t *fd, inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata);
+int dht_newfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+int
+gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict);
+
+int
+gf_defrag_stop (gf_defrag_info_t *defrag, dict_t *output);
+
+void*
+gf_defrag_start (void *this);
+
+int32_t
+gf_defrag_handle_hardlink (xlator_t *this, loc_t *loc, dict_t *xattrs,
+ struct iatt *stbuf);
+int
+dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
+ int flag);
+int
+dht_inode_ctx_layout_get (inode_t *inode, xlator_t *this,
+ dht_layout_t **layout_int);
+int
+dht_inode_ctx_layout_set (inode_t *inode, xlator_t *this,
+ dht_layout_t* layout_int);
+int
+dht_inode_ctx_time_update (inode_t *inode, xlator_t *this, struct iatt *stat,
+ int32_t update_ctx);
-int dht_layout_inode_set (xlator_t *this, xlator_t *subvol, inode_t *inode);
-xlator_t *dht_first_up_subvol (xlator_t *this);
+int dht_inode_ctx_get (inode_t *inode, xlator_t *this, dht_inode_ctx_t **ctx);
+int dht_inode_ctx_set (inode_t *inode, xlator_t *this, dht_inode_ctx_t *ctx);
+int
+dht_dir_attr_heal (void *data);
+int
+dht_dir_attr_heal_done (int ret, call_frame_t *sync_frame, void *data);
+int
+dht_dir_has_layout (dict_t *xattr, char *name);
+gf_boolean_t
+dht_is_subvol_in_layout (dht_layout_t *layout, xlator_t *xlator);
+xlator_t *
+dht_subvol_with_free_space_inodes (xlator_t *this, xlator_t *subvol,
+ dht_layout_t *layout);
+xlator_t *
+dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol,
+ dht_layout_t *layout);
+int
+dht_linkfile_attr_heal (call_frame_t *frame, xlator_t *this);
+
+void
+dht_layout_dump (dht_layout_t *layout, const char *prefix);
+int32_t
+dht_priv_dump (xlator_t *this);
+int32_t
+dht_inodectx_dump (xlator_t *this, inode_t *inode);
+
+int
+dht_inode_ctx_get1 (xlator_t *this, inode_t *inode, xlator_t **subvol);
-#endif /* _DHT_H */
+#endif/* _DHT_H */
diff --git a/xlators/cluster/dht/src/dht-diskusage.c b/xlators/cluster/dht/src/dht-diskusage.c
index 468e88798..fe3955ecb 100644
--- a/xlators/cluster/dht/src/dht-diskusage.c
+++ b/xlators/cluster/dht/src/dht-diskusage.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -33,52 +24,70 @@
#include <sys/time.h>
-int
+int
dht_du_info_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct statvfs *statvfs)
+ int op_ret, int op_errno, struct statvfs *statvfs,
+ dict_t *xdata)
{
dht_conf_t *conf = NULL;
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
+ call_frame_t *prev = NULL;
int this_call_cnt = 0;
- int i = 0;
- double percent = 0;
- uint64_t bytes = 0;
-
- local = frame->local;
- conf = this->private;
- prev = cookie;
-
- if (op_ret == -1)
- goto out;
-
- if (statvfs && statvfs->f_blocks) {
- percent = (statvfs->f_bfree * 100) / statvfs->f_blocks;
- bytes = (statvfs->f_bfree * statvfs->f_bsize);
- }
-
- LOCK (&conf->subvolume_lock);
- {
- for (i = 0; i < conf->subvolume_cnt; i++)
- if (prev->this == conf->subvolumes[i]) {
- conf->du_stats[i].avail_percent = percent;
- conf->du_stats[i].avail_space = bytes;
- gf_log (this->name, GF_LOG_DEBUG,
- "on subvolume '%s': avail_percent is: "
- "%.2f and avail_space is: %"PRIu64"",
- prev->this->name,
- conf->du_stats[i].avail_percent,
- conf->du_stats[i].avail_space);
- }
- }
- UNLOCK (&conf->subvolume_lock);
+ int i = 0;
+ double percent = 0;
+ double percent_inodes = 0;
+ uint64_t bytes = 0;
- out:
+ conf = this->private;
+ prev = cookie;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to get disk info from %s", prev->this->name);
+ goto out;
+ }
+
+ if (statvfs && statvfs->f_blocks) {
+ percent = (statvfs->f_bavail * 100) / statvfs->f_blocks;
+ bytes = (statvfs->f_bavail * statvfs->f_frsize);
+ }
+
+ if (statvfs && statvfs->f_files) {
+ percent_inodes = (statvfs->f_ffree * 100) / statvfs->f_files;
+ } else {
+ /* set percent inodes to 100 for dynamically allocated inode filesystems
+ this logic holds good so that, distribute has nothing to worry about
+ total inodes rather let the 'create()' to be scheduled on the hashed
+ subvol regardless of the total inodes. since we have no awareness on
+ loosing inodes this logic fits well
+ */
+ percent_inodes = 100;
+ }
+
+ LOCK (&conf->subvolume_lock);
+ {
+ for (i = 0; i < conf->subvolume_cnt; i++)
+ if (prev->this == conf->subvolumes[i]) {
+ conf->du_stats[i].avail_percent = percent;
+ conf->du_stats[i].avail_space = bytes;
+ conf->du_stats[i].avail_inodes = percent_inodes;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "on subvolume '%s': avail_percent is: "
+ "%.2f and avail_space is: %"PRIu64" "
+ "and avail_inodes is: %.2f",
+ prev->this->name,
+ conf->du_stats[i].avail_percent,
+ conf->du_stats[i].avail_space,
+ conf->du_stats[i].avail_inodes);
+ }
+ }
+ UNLOCK (&conf->subvolume_lock);
+
+out:
this_call_cnt = dht_frame_return (frame);
if (is_last_call (this_call_cnt))
DHT_STACK_DESTROY (frame);
- return 0;
+ return 0;
}
int
@@ -87,177 +96,319 @@ dht_get_du_info_for_subvol (xlator_t *this, int subvol_idx)
dht_conf_t *conf = NULL;
call_frame_t *statfs_frame = NULL;
dht_local_t *statfs_local = NULL;
- call_pool_t *pool = NULL;
+ call_pool_t *pool = NULL;
+ loc_t tmp_loc = {0,};
conf = this->private;
- pool = this->ctx->pool;
-
- statfs_frame = create_frame (this, pool);
- if (!statfs_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- statfs_local = dht_local_init (statfs_frame);
- if (!statfs_local) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- loc_t tmp_loc = { .inode = NULL,
- .path = "/",
- };
-
- statfs_local->call_cnt = 1;
- STACK_WIND (statfs_frame, dht_du_info_cbk,
- conf->subvolumes[subvol_idx],
- conf->subvolumes[subvol_idx]->fops->statfs,
- &tmp_loc);
-
- return 0;
- err:
+ pool = this->ctx->pool;
+
+ statfs_frame = create_frame (this, pool);
+ if (!statfs_frame) {
+ goto err;
+ }
+
+ /* local->fop value is not used in this case */
+ statfs_local = dht_local_init (statfs_frame, NULL, NULL,
+ GF_FOP_MAXVALUE);
+ if (!statfs_local) {
+ goto err;
+ }
+
+ /* make it root gfid, should be enough to get the proper info back */
+ tmp_loc.gfid[15] = 1;
+
+ statfs_local->call_cnt = 1;
+ STACK_WIND (statfs_frame, dht_du_info_cbk,
+ conf->subvolumes[subvol_idx],
+ conf->subvolumes[subvol_idx]->fops->statfs,
+ &tmp_loc, NULL);
+
+ return 0;
+err:
if (statfs_frame)
DHT_STACK_DESTROY (statfs_frame);
-
- return -1;
+
+ return -1;
}
int
dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc)
{
- int i = 0;
+ int i = 0;
dht_conf_t *conf = NULL;
call_frame_t *statfs_frame = NULL;
dht_local_t *statfs_local = NULL;
- struct timeval tv = {0,};
+ struct timeval tv = {0,};
+ loc_t tmp_loc = {0,};
conf = this->private;
gettimeofday (&tv, NULL);
- if (tv.tv_sec > (conf->refresh_interval
- + conf->last_stat_fetch.tv_sec)) {
- statfs_frame = copy_frame (frame);
- if (!statfs_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- statfs_local = dht_local_init (statfs_frame);
- if (!statfs_local) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ /* make it root gfid, should be enough to get the proper
+ info back */
+ tmp_loc.gfid[15] = 1;
- loc_copy (&statfs_local->loc, loc);
- loc_t tmp_loc = { .inode = NULL,
- .path = "/",
- };
-
- statfs_local->call_cnt = conf->subvolume_cnt;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- STACK_WIND (statfs_frame, dht_du_info_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->statfs,
- &tmp_loc);
- }
+ if (tv.tv_sec > (conf->refresh_interval
+ + conf->last_stat_fetch.tv_sec)) {
- conf->last_stat_fetch.tv_sec = tv.tv_sec;
- }
- return 0;
+ statfs_frame = copy_frame (frame);
+ if (!statfs_frame) {
+ goto err;
+ }
+
+ /* In this case, 'local->fop' is not used */
+ statfs_local = dht_local_init (statfs_frame, loc, NULL,
+ GF_FOP_MAXVALUE);
+ if (!statfs_local) {
+ goto err;
+ }
+
+ statfs_local->call_cnt = conf->subvolume_cnt;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ STACK_WIND (statfs_frame, dht_du_info_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->statfs,
+ &tmp_loc, NULL);
+ }
+
+ conf->last_stat_fetch.tv_sec = tv.tv_sec;
+ }
+ return 0;
err:
if (statfs_frame)
DHT_STACK_DESTROY (statfs_frame);
- return -1;
+ return -1;
}
-int
+gf_boolean_t
dht_is_subvol_filled (xlator_t *this, xlator_t *subvol)
{
- int i = 0;
- int subvol_filled = 0;
+ int i = 0;
dht_conf_t *conf = NULL;
+ gf_boolean_t subvol_filled_inodes = _gf_false;
+ gf_boolean_t subvol_filled_space = _gf_false;
+ gf_boolean_t is_subvol_filled = _gf_false;
- conf = this->private;
+ conf = this->private;
+
+ /* Check for values above specified percent or free disk */
+ LOCK (&conf->subvolume_lock);
+ {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (subvol == conf->subvolumes[i]) {
+ if (conf->disk_unit == 'p') {
+ if (conf->du_stats[i].avail_percent <
+ conf->min_free_disk) {
+ subvol_filled_space = _gf_true;
+ break;
+ }
+
+ } else {
+ if (conf->du_stats[i].avail_space <
+ conf->min_free_disk) {
+ subvol_filled_space = _gf_true;
+ break;
+ }
+ }
+ if (conf->du_stats[i].avail_inodes <
+ conf->min_free_inodes) {
+ subvol_filled_inodes = _gf_true;
+ break;
+ }
+ }
+ }
+ }
+ UNLOCK (&conf->subvolume_lock);
+
+ if (subvol_filled_space && conf->subvolume_status[i]) {
+ if (!(conf->du_stats[i].log++ % (GF_UNIVERSAL_ANSWER * 10))) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "disk space on subvolume '%s' is getting "
+ "full (%.2f %%), consider adding more nodes",
+ subvol->name,
+ (100 - conf->du_stats[i].avail_percent));
+ }
+ }
+
+ if (subvol_filled_inodes && conf->subvolume_status[i]) {
+ if (!(conf->du_stats[i].log++ % (GF_UNIVERSAL_ANSWER * 10))) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "inodes on subvolume '%s' are at "
+ "(%.2f %%), consider adding more nodes",
+ subvol->name,
+ (100 - conf->du_stats[i].avail_inodes));
+ }
+ }
+
+ is_subvol_filled = (subvol_filled_space || subvol_filled_inodes);
+
+ return is_subvol_filled;
+}
+
+
+/*Get the best subvolume to create the file in*/
+xlator_t *
+dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol,
+ dht_local_t *local)
+{
+ xlator_t *avail_subvol = NULL;
+ dht_conf_t *conf = NULL;
+ dht_layout_t *layout = NULL;
+ loc_t *loc = NULL;
+
+ conf = this->private;
+ if (!local)
+ goto out;
+ loc = &local->loc;
+ if (!local->layout) {
+ layout = dht_layout_get (this, loc->parent);
+
+ if (!layout) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "layout missing path=%s parent=%s",
+ loc->path, uuid_utoa (loc->parent->gfid));
+ goto out;
+ }
+ } else {
+ layout = dht_layout_ref (this, local->layout);
+ }
- /* Check for values above specified percent or free disk */
LOCK (&conf->subvolume_lock);
- {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (subvol == conf->subvolumes[i]) {
- if (conf->disk_unit == 'p') {
- if (conf->du_stats[i].avail_percent <
- conf->min_free_disk) {
- subvol_filled = 1;
- break;
- }
- } else {
- if (conf->du_stats[i].avail_space <
- conf->min_free_disk) {
- subvol_filled = 1;
- break;
- }
- }
- }
+ {
+ avail_subvol = dht_subvol_with_free_space_inodes(this, subvol,
+ layout);
+ if(!avail_subvol)
+ {
+ avail_subvol = dht_subvol_maxspace_nonzeroinode(this,
+ subvol,
+ layout);
+ }
+
+ }
+ UNLOCK (&conf->subvolume_lock);
+out:
+ if (!avail_subvol) {
+ gf_log (this->name,
+ GF_LOG_DEBUG,
+ "no subvolume has enough free space and/or inodes\
+ to create");
+ avail_subvol = subvol;
+ }
+
+ if (layout)
+ dht_layout_unref (this, layout);
+ return avail_subvol;
+}
+
+static inline
+int32_t dht_subvol_has_err (xlator_t *this, dht_layout_t *layout)
+{
+ int ret = -1;
+ int i = 0;
+
+ if (!this || !layout)
+ goto out;
+
+ /* check if subvol has layout errors, before selecting it */
+ for (i = 0; i < layout->cnt; i++) {
+ if (!strcmp (layout->list[i].xlator->name, this->name) &&
+ (layout->list[i].err != 0)) {
+ ret = -1;
+ goto out;
}
}
- UNLOCK (&conf->subvolume_lock);
-
- if (subvol_filled) {
- if (!(conf->du_stats[i].log++ % GF_UNIVERSAL_ANSWER)) {
- gf_log (this->name, GF_LOG_WARNING,
- "disk space on subvolume '%s' is getting "
- "full (%.2f %%), consider adding more nodes",
- subvol->name,
- (100 - conf->du_stats[i].avail_percent));
+ ret = 0;
+out:
+ return ret;
+}
+
+/*Get subvolume which has both space and inodes more than the min criteria*/
+xlator_t *
+dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol,
+ dht_layout_t *layout)
+{
+ int i = 0;
+ double max = 0;
+ double max_inodes = 0;
+ int ignore_subvol = 0;
+
+ xlator_t *avail_subvol = NULL;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ for(i=0; i < conf->subvolume_cnt; i++) {
+ /* check if subvol has layout errors, before selecting it */
+ ignore_subvol = dht_subvol_has_err (conf->subvolumes[i],
+ layout);
+ if (ignore_subvol)
+ continue;
+
+ if ((conf->disk_unit == 'p') &&
+ (conf->du_stats[i].avail_percent > conf->min_free_disk) &&
+ (conf->du_stats[i].avail_inodes > conf->min_free_inodes)) {
+ if ((conf->du_stats[i].avail_inodes > max_inodes) ||
+ (conf->du_stats[i].avail_percent > max)) {
+ max = conf->du_stats[i].avail_percent;
+ max_inodes = conf->du_stats[i].avail_inodes;
+ avail_subvol = conf->subvolumes[i];
+ }
+ }
+
+ if ((conf->disk_unit != 'p') &&
+ (conf->du_stats[i].avail_space > conf->min_free_disk) &&
+ (conf->du_stats[i].avail_inodes > conf->min_free_inodes)) {
+ if ((conf->du_stats[i].avail_inodes > max_inodes) ||
+ (conf->du_stats[i].avail_space > max)) {
+ max = conf->du_stats[i].avail_space;
+ max_inodes = conf->du_stats[i].avail_inodes;
+ avail_subvol = conf->subvolumes[i];
+ }
}
}
- return subvol_filled;
+ return avail_subvol;
}
+
+/* Get subvol which has atleast one inode and maximum space */
xlator_t *
-dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol)
+dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol,
+ dht_layout_t *layout)
{
int i = 0;
- double max= 0;
+ double max = 0;
+ int ignore_subvol = 0;
+
xlator_t *avail_subvol = NULL;
- dht_conf_t *conf = NULL;
+ dht_conf_t *conf = NULL;
conf = this->private;
- avail_subvol = subvol;
- LOCK (&conf->subvolume_lock);
- {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (conf->disk_unit == 'p') {
- if (conf->du_stats[i].avail_percent > max) {
- max = conf->du_stats[i].avail_percent;
- avail_subvol = conf->subvolumes[i];
- }
- } else {
- if (conf->du_stats[i].avail_space > max) {
- max = conf->du_stats[i].avail_space;
- avail_subvol = conf->subvolumes[i];
- }
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ /* check if subvol has layout errors, before selecting it */
+ ignore_subvol = dht_subvol_has_err (conf->subvolumes[i],
+ layout);
+ if (ignore_subvol)
+ continue;
+
+ if (conf->disk_unit == 'p') {
+ if ((conf->du_stats[i].avail_percent > max)
+ && (conf->du_stats[i].avail_inodes > 0 )) {
+ max = conf->du_stats[i].avail_percent;
+ avail_subvol = conf->subvolumes[i];
}
- }
+ } else {
+ if ((conf->du_stats[i].avail_space > max)
+ && (conf->du_stats[i].avail_inodes > 0)) {
+ max = conf->du_stats[i].avail_space;
+ avail_subvol = conf->subvolumes[i];
+ }
+ }
}
- UNLOCK (&conf->subvolume_lock);
-
- if (max < conf->min_free_disk)
- avail_subvol = subvol;
- if (avail_subvol == subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume has enough free space to create");
- }
-
return avail_subvol;
}
diff --git a/xlators/cluster/dht/src/dht-hashfn.c b/xlators/cluster/dht/src/dht-hashfn.c
index 31cb828a4..656cf23a0 100644
--- a/xlators/cluster/dht/src/dht-hashfn.c
+++ b/xlators/cluster/dht/src/dht-hashfn.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
@@ -29,58 +20,92 @@
#include "hashfn.h"
-typedef enum {
- DHT_HASH_TYPE_DM,
-} dht_hashfn_type_t;
-
-
int
dht_hash_compute_internal (int type, const char *name, uint32_t *hash_p)
{
- int ret = 0;
- uint32_t hash = 0;
-
- switch (type) {
- case DHT_HASH_TYPE_DM:
- hash = gf_dm_hashfn (name, strlen (name));
- break;
- default:
- ret = -1;
- break;
- }
-
- if (ret == 0) {
- *hash_p = hash;
- }
-
- return ret;
+ int ret = 0;
+ uint32_t hash = 0;
+
+ switch (type) {
+ case DHT_HASH_TYPE_DM:
+ case DHT_HASH_TYPE_DM_USER:
+ hash = gf_dm_hashfn (name, strlen (name));
+ break;
+ default:
+ ret = -1;
+ break;
+ }
+
+ if (ret == 0) {
+ *hash_p = hash;
+ }
+
+ return ret;
}
-#define MAKE_RSYNC_FRIENDLY_NAME(rsync_frndly_name, name) do { \
- rsync_frndly_name = (char *) name; \
- if (name[0] == '.') { \
- char *dot = 0; \
- int namelen = 0; \
- \
- dot = strrchr (name, '.'); \
- if (dot && dot > (name + 1) && *(dot + 1)) { \
- namelen = (dot - name); \
- rsync_frndly_name = alloca (namelen); \
- strncpy (rsync_frndly_name, name + 1, \
- namelen); \
- rsync_frndly_name[namelen - 1] = 0; \
- } \
- } \
- } while (0);
-
+static inline
+gf_boolean_t
+dht_munge_name (const char *original, char *modified, size_t len, regex_t *re)
+{
+ regmatch_t matches[2];
+ size_t new_len;
+
+ if (regexec(re,original,2,matches,0) != REG_NOMATCH) {
+ if (matches[1].rm_so != -1) {
+ new_len = matches[1].rm_eo - matches[1].rm_so;
+ /* Equal would fail due to the NUL at the end. */
+ if (new_len < len) {
+ memcpy (modified,original+matches[1].rm_so,
+ new_len);
+ modified[new_len] = '\0';
+ return _gf_true;
+ }
+ }
+ }
+
+ /* This is guaranteed safe because of how the dest was allocated. */
+ strcpy(modified,original);
+ return _gf_false;
+}
int
-dht_hash_compute (int type, const char *name, uint32_t *hash_p)
+dht_hash_compute (xlator_t *this, int type, const char *name, uint32_t *hash_p)
{
- char *rsync_friendly_name = NULL;
-
- MAKE_RSYNC_FRIENDLY_NAME (rsync_friendly_name, name);
-
- return dht_hash_compute_internal (type, rsync_friendly_name, hash_p);
+ char *rsync_friendly_name = NULL;
+ dht_conf_t *priv = this->private;
+ size_t len = 0;
+ gf_boolean_t munged = _gf_false;
+
+ /*
+ * It wouldn't be safe to use alloca in an inline function that doesn't
+ * actually get inlined, and it wouldn't be efficient to do a real
+ * allocation, so we use alloca here (if needed) and pass that to the
+ * inline.
+ */
+
+ if (priv->extra_regex_valid) {
+ len = strlen(name) + 1;
+ rsync_friendly_name = alloca(len);
+ munged = dht_munge_name (name, rsync_friendly_name, len,
+ &priv->extra_regex);
+ }
+
+ if (!munged && priv->rsync_regex_valid) {
+ len = strlen(name) + 1;
+ rsync_friendly_name = alloca(len);
+ gf_log (this->name, GF_LOG_TRACE, "trying regex for %s", name);
+ munged = dht_munge_name (name, rsync_friendly_name, len,
+ &priv->rsync_regex);
+ if (munged) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "munged down to %s", rsync_friendly_name);
+ }
+ }
+
+ if (!munged) {
+ rsync_friendly_name = (char *)name;
+ }
+
+ return dht_hash_compute_internal (type, rsync_friendly_name, hash_p);
}
diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c
index 0ef88a59b..311a48112 100644
--- a/xlators/cluster/dht/src/dht-helper.c
+++ b/xlators/cluster/dht/src/dht-helper.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
@@ -27,177 +18,406 @@
#include "xlator.h"
#include "dht-common.h"
+static inline int
+dht_inode_ctx_set1 (xlator_t *this, inode_t *inode, xlator_t *subvol)
+{
+ uint64_t tmp_subvol = 0;
+
+ tmp_subvol = (long)subvol;
+ return inode_ctx_set1 (inode, this, &tmp_subvol);
+}
+
+int
+dht_inode_ctx_get1 (xlator_t *this, inode_t *inode, xlator_t **subvol)
+{
+ int ret = -1;
+ uint64_t tmp_subvol = 0;
+
+ ret = inode_ctx_get1 (inode, this, &tmp_subvol);
+ if (tmp_subvol && subvol)
+ *subvol = (xlator_t *)tmp_subvol;
+
+ return ret;
+}
+
int
dht_frame_return (call_frame_t *frame)
{
- dht_local_t *local = NULL;
- int this_call_cnt = -1;
+ dht_local_t *local = NULL;
+ int this_call_cnt = -1;
+
+ if (!frame)
+ return -1;
+
+ local = frame->local;
- if (!frame)
- return -1;
+ LOCK (&frame->lock);
+ {
+ this_call_cnt = --local->call_cnt;
+ }
+ UNLOCK (&frame->lock);
+
+ return this_call_cnt;
+}
- local = frame->local;
- LOCK (&frame->lock);
- {
- this_call_cnt = --local->call_cnt;
+static uint64_t
+dht_bits_for (uint64_t num)
+{
+ uint64_t bits = 0, ctrl = 1;
+
+ while (ctrl < num) {
+ ctrl *= 2;
+ bits ++;
}
- UNLOCK (&frame->lock);
- return this_call_cnt;
+ return bits;
}
+/*
+ * A slightly "updated" version of the algorithm described in the commit log
+ * is used here.
+ *
+ * The only enhancement is that:
+ *
+ * - The number of bits used by the backend filesystem for HUGE d_off which
+ * is described as 63, and
+ * - The number of bits used by the d_off presented by the transformation
+ * upwards which is described as 64, are both made "configurable."
+ */
+
+
+#define BACKEND_D_OFF_BITS 63
+#define PRESENT_D_OFF_BITS 63
+
+#define ONE 1ULL
+#define MASK (~0ULL)
+#define PRESENT_MASK (MASK >> (64 - PRESENT_D_OFF_BITS))
+#define BACKEND_MASK (MASK >> (64 - BACKEND_D_OFF_BITS))
+
+#define TOP_BIT (ONE << (PRESENT_D_OFF_BITS - 1))
+#define SHIFT_BITS (max (0, (BACKEND_D_OFF_BITS - PRESENT_D_OFF_BITS + 1)))
int
dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y_p)
{
- dht_conf_t *conf = NULL;
- int cnt = 0;
- int max = 0;
- uint64_t y = 0;
+ dht_conf_t *conf = NULL;
+ int cnt = 0;
+ int max = 0;
+ uint64_t y = 0;
+ uint64_t hi_mask = 0;
+ uint64_t off_mask = 0;
+ int max_bits = 0;
+
+ if (x == ((uint64_t) -1)) {
+ y = (uint64_t) -1;
+ goto out;
+ }
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ max = conf->subvolume_cnt;
+ cnt = dht_subvol_cnt (this, subvol);
- if (x == ((uint64_t) -1)) {
- y = (uint64_t) -1;
+ if (max == 1) {
+ y = x;
goto out;
}
- conf = this->private;
+ max_bits = dht_bits_for (max);
- max = conf->subvolume_cnt;
- cnt = dht_subvol_cnt (this, subvol);
+ hi_mask = ~(PRESENT_MASK >> (max_bits + 1));
- y = ((x * max) + cnt);
+ if (x & hi_mask) {
+ /* HUGE d_off */
+ off_mask = MASK << max_bits;
+ y = TOP_BIT | ((x >> SHIFT_BITS) & off_mask) | cnt;
+ } else {
+ /* small d_off */
+ y = ((x * max) + cnt);
+ }
out:
- if (y_p)
- *y_p = y;
+ if (y_p)
+ *y_p = y;
- return 0;
+ return 0;
}
+int
+dht_filter_loc_subvol_key (xlator_t *this, loc_t *loc, loc_t *new_loc,
+ xlator_t **subvol)
+{
+ char *new_name = NULL;
+ char *new_path = NULL;
+ xlator_list_t *trav = NULL;
+ char key[1024] = {0,};
+ int ret = 0; /* not found */
+
+ /* Why do other tasks if first required 'char' itself is not there */
+ if (!new_loc || !loc || !loc->name || !strchr (loc->name, '@'))
+ goto out;
+
+ trav = this->children;
+ while (trav) {
+ snprintf (key, 1024, "*@%s:%s", this->name, trav->xlator->name);
+ if (fnmatch (key, loc->name, FNM_NOESCAPE) == 0) {
+ new_name = GF_CALLOC(strlen (loc->name),
+ sizeof (char),
+ gf_common_mt_char);
+ if (!new_name)
+ goto out;
+ if (fnmatch (key, loc->path, FNM_NOESCAPE) == 0) {
+ new_path = GF_CALLOC(strlen (loc->path),
+ sizeof (char),
+ gf_common_mt_char);
+ if (!new_path)
+ goto out;
+ strncpy (new_path, loc->path, (strlen (loc->path) -
+ strlen (key) + 1));
+ }
+ strncpy (new_name, loc->name, (strlen (loc->name) -
+ strlen (key) + 1));
+
+ if (new_loc) {
+ new_loc->path = ((new_path) ? new_path:
+ gf_strdup (loc->path));
+ new_loc->name = new_name;
+ new_loc->inode = inode_ref (loc->inode);
+ new_loc->parent = inode_ref (loc->parent);
+ }
+ *subvol = trav->xlator;
+ ret = 1; /* success */
+ goto out;
+ }
+ trav = trav->next;
+ }
+out:
+ if (!ret) {
+ /* !success */
+ GF_FREE (new_path);
+ GF_FREE (new_name);
+ }
+ return ret;
+}
int
dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol_p,
- uint64_t *x_p)
+ uint64_t *x_p)
{
- dht_conf_t *conf = NULL;
- int cnt = 0;
- int max = 0;
- uint64_t x = 0;
- xlator_t *subvol = 0;
+ dht_conf_t *conf = NULL;
+ int cnt = 0;
+ int max = 0;
+ uint64_t x = 0;
+ xlator_t *subvol = 0;
+ int max_bits = 0;
+ uint64_t off_mask = 0;
+ uint64_t host_mask = 0;
+
+ if (!this->private)
+ return -1;
+
+ conf = this->private;
+ max = conf->subvolume_cnt;
+
+ if (max == 1) {
+ x = y;
+ cnt = 0;
+ goto out;
+ }
+ if (y & TOP_BIT) {
+ /* HUGE d_off */
+ max_bits = dht_bits_for (max);
+ off_mask = (MASK << max_bits);
+ host_mask = ~(off_mask);
- conf = this->private;
- max = conf->subvolume_cnt;
+ x = ((y & ~TOP_BIT) & off_mask) << SHIFT_BITS;
- cnt = y % max;
- x = y / max;
+ cnt = y & host_mask;
+ } else {
+ /* small d_off */
+ cnt = y % max;
+ x = y / max;
+ }
- subvol = conf->subvolumes[cnt];
+out:
+ subvol = conf->subvolumes[cnt];
- if (subvol_p)
- *subvol_p = subvol;
+ if (subvol_p)
+ *subvol_p = subvol;
- if (x_p)
- *x_p = x;
+ if (x_p)
+ *x_p = x;
- return 0;
+ return 0;
}
void
-dht_local_wipe (dht_local_t *local)
+dht_local_wipe (xlator_t *this, dht_local_t *local)
{
- if (!local)
- return;
+ if (!local)
+ return;
- loc_wipe (&local->loc);
- loc_wipe (&local->loc2);
+ loc_wipe (&local->loc);
+ loc_wipe (&local->loc2);
- if (local->xattr)
- dict_unref (local->xattr);
+ if (local->xattr)
+ dict_unref (local->xattr);
- if (local->inode)
- inode_unref (local->inode);
+ if (local->inode)
+ inode_unref (local->inode);
- if (local->layout)
- FREE (local->layout);
+ if (local->layout) {
+ dht_layout_unref (this, local->layout);
+ local->layout = NULL;
+ }
- loc_wipe (&local->linkfile.loc);
+ loc_wipe (&local->linkfile.loc);
- if (local->linkfile.xattr)
- dict_unref (local->linkfile.xattr);
+ if (local->linkfile.xattr)
+ dict_unref (local->linkfile.xattr);
- if (local->linkfile.inode)
- inode_unref (local->linkfile.inode);
+ if (local->linkfile.inode)
+ inode_unref (local->linkfile.inode);
- if (local->fd) {
- fd_unref (local->fd);
- local->fd = NULL;
- }
-
- if (local->xattr_req)
- dict_unref (local->xattr_req);
+ if (local->fd) {
+ fd_unref (local->fd);
+ local->fd = NULL;
+ }
+
+ if (local->params) {
+ dict_unref (local->params);
+ local->params = NULL;
+ }
+
+ if (local->xattr_req)
+ dict_unref (local->xattr_req);
+
+ if (local->selfheal.layout) {
+ dht_layout_unref (this, local->selfheal.layout);
+ local->selfheal.layout = NULL;
+ }
+
+ GF_FREE (local->newpath);
+
+ GF_FREE (local->key);
- FREE (local);
+ GF_FREE (local->rebalance.vector);
+
+ if (local->rebalance.iobref)
+ iobref_unref (local->rebalance.iobref);
+
+ mem_put (local);
}
dht_local_t *
-dht_local_init (call_frame_t *frame)
+dht_local_init (call_frame_t *frame, loc_t *loc, fd_t *fd, glusterfs_fop_t fop)
{
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
+ inode_t *inode = NULL;
+ int ret = 0;
+
+ local = mem_get0 (THIS->local_pool);
+ if (!local)
+ goto out;
- /* TODO: use mem-pool */
- local = CALLOC (1, sizeof (*local));
+ if (loc) {
+ ret = loc_copy (&local->loc, loc);
+ if (ret)
+ goto out;
- if (!local)
- return NULL;
+ inode = loc->inode;
+ }
- local->op_ret = -1;
- local->op_errno = EUCLEAN;
+ if (fd) {
+ local->fd = fd_ref (fd);
+ if (!inode)
+ inode = fd->inode;
+ }
- frame->local = local;
+ local->op_ret = -1;
+ local->op_errno = EUCLEAN;
+ local->fop = fop;
- return local;
-}
+ if (inode) {
+ local->layout = dht_layout_get (frame->this, inode);
+ local->cached_subvol = dht_subvol_get_cached (frame->this,
+ inode);
+ }
+ frame->local = local;
-char *
-basestr (const char *str)
+out:
+ if (ret) {
+ if (local)
+ mem_put (local);
+ local = NULL;
+ }
+ return local;
+}
+
+xlator_t *
+dht_first_up_subvol (xlator_t *this)
{
- char *basestr = NULL;
+ dht_conf_t *conf = NULL;
+ xlator_t *child = NULL;
+ int i = 0;
+ time_t time = 0;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
- basestr = strrchr (str, '/');
- if (basestr)
- basestr ++;
+ LOCK (&conf->subvolume_lock);
+ {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvol_up_time[i]) {
+ if (!time) {
+ time = conf->subvol_up_time[i];
+ child = conf->subvolumes[i];
+ } else if (time > conf->subvol_up_time[i]) {
+ time = conf->subvol_up_time[i];
+ child = conf->subvolumes[i];
+ }
+ }
+ }
+ }
+ UNLOCK (&conf->subvolume_lock);
- return basestr;
+out:
+ return child;
}
xlator_t *
-dht_first_up_subvol (xlator_t *this)
+dht_last_up_subvol (xlator_t *this)
{
- dht_conf_t *conf = NULL;
- xlator_t *child = NULL;
- int i = 0;
-
- conf = this->private;
-
- LOCK (&conf->subvolume_lock);
- {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (conf->subvolume_status[i]) {
- child = conf->subvolumes[i];
- break;
- }
- }
- }
- UNLOCK (&conf->subvolume_lock);
-
- return child;
+ dht_conf_t *conf = NULL;
+ xlator_t *child = NULL;
+ int i = 0;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ LOCK (&conf->subvolume_lock);
+ {
+ for (i = conf->subvolume_cnt-1; i >= 0; i--) {
+ if (conf->subvolume_status[i]) {
+ child = conf->subvolumes[i];
+ break;
+ }
+ }
+ }
+ UNLOCK (&conf->subvolume_lock);
+
+out:
+ return child;
}
xlator_t *
@@ -206,17 +426,23 @@ dht_subvol_get_hashed (xlator_t *this, loc_t *loc)
dht_layout_t *layout = NULL;
xlator_t *subvol = NULL;
- if (is_fs_root (loc)) {
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc, out);
+
+ if (__is_root_gfid (loc->gfid)) {
subvol = dht_first_up_subvol (this);
goto out;
}
+ GF_VALIDATE_OR_GOTO (this->name, loc->parent, out);
+ GF_VALIDATE_OR_GOTO (this->name, loc->name, out);
+
layout = dht_layout_get (this, loc->parent);
if (!layout) {
gf_log (this->name, GF_LOG_DEBUG,
- "layout missing path=%s parent=%"PRId64,
- loc->path, loc->parent->ino);
+ "layout missing path=%s parent=%s",
+ loc->path, uuid_utoa (loc->parent->gfid));
goto out;
}
@@ -230,6 +456,10 @@ dht_subvol_get_hashed (xlator_t *this, loc_t *loc)
}
out:
+ if (layout) {
+ dht_layout_unref (this, layout);
+ }
+
return subvol;
}
@@ -240,6 +470,8 @@ dht_subvol_get_cached (xlator_t *this, inode_t *inode)
dht_layout_t *layout = NULL;
xlator_t *subvol = NULL;
+ GF_VALIDATE_OR_GOTO (this->name, this, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
layout = dht_layout_get (this, inode);
@@ -247,9 +479,13 @@ dht_subvol_get_cached (xlator_t *this, inode_t *inode)
goto out;
}
- subvol = layout->list[0].xlator;
+ subvol = layout->list[0].xlator;
out:
+ if (layout) {
+ dht_layout_unref (this, layout);
+ }
+
return subvol;
}
@@ -257,71 +493,655 @@ out:
xlator_t *
dht_subvol_next (xlator_t *this, xlator_t *prev)
{
- dht_conf_t *conf = NULL;
- int i = 0;
- xlator_t *next = NULL;
-
- conf = this->private;
-
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (conf->subvolumes[i] == prev) {
- if ((i + 1) < conf->subvolume_cnt)
- next = conf->subvolumes[i + 1];
- break;
- }
- }
+ dht_conf_t *conf = NULL;
+ int i = 0;
+ xlator_t *next = NULL;
- return next;
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == prev) {
+ if ((i + 1) < conf->subvolume_cnt)
+ next = conf->subvolumes[i + 1];
+ break;
+ }
+ }
+
+out:
+ return next;
}
+/* This func wraps around, if prev is actually the last subvol.
+ */
+xlator_t *
+dht_subvol_next_available (xlator_t *this, xlator_t *prev)
+{
+ dht_conf_t *conf = NULL;
+ int i = 0;
+ xlator_t *next = NULL;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == prev) {
+ /* if prev is last in conf->subvolumes, then wrap
+ * around.
+ */
+ if ((i + 1) < conf->subvolume_cnt) {
+ next = conf->subvolumes[i + 1];
+ } else {
+ next = conf->subvolumes[0];
+ }
+ break;
+ }
+ }
+out:
+ return next;
+}
int
dht_subvol_cnt (xlator_t *this, xlator_t *subvol)
{
- int i = 0;
- int ret = -1;
- dht_conf_t *conf = NULL;
+ int i = 0;
+ int ret = -1;
+ dht_conf_t *conf = NULL;
+ conf = this->private;
+ if (!conf)
+ goto out;
- conf = this->private;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (subvol == conf->subvolumes[i]) {
+ ret = i;
+ break;
+ }
+ }
+
+out:
+ return ret;
+}
+
+
+#define set_if_greater(a, b) do { \
+ if ((a) < (b)) \
+ (a) = (b); \
+ } while (0)
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (subvol == conf->subvolumes[i]) {
- ret = i;
- break;
- }
- }
- return ret;
+#define set_if_greater_time(a, an, b, bn) do { \
+ if (((a) < (b)) || (((a) == (b)) && ((an) < (bn)))){ \
+ (a) = (b); \
+ (an) = (bn); \
+ } \
+ } while (0) \
+
+
+int
+dht_iatt_merge (xlator_t *this, struct iatt *to,
+ struct iatt *from, xlator_t *subvol)
+{
+ if (!from || !to)
+ return 0;
+
+ to->ia_dev = from->ia_dev;
+
+ uuid_copy (to->ia_gfid, from->ia_gfid);
+
+ to->ia_ino = from->ia_ino;
+ to->ia_prot = from->ia_prot;
+ to->ia_type = from->ia_type;
+ to->ia_nlink = from->ia_nlink;
+ to->ia_rdev = from->ia_rdev;
+ to->ia_size += from->ia_size;
+ to->ia_blksize = from->ia_blksize;
+ to->ia_blocks += from->ia_blocks;
+
+ set_if_greater (to->ia_uid, from->ia_uid);
+ set_if_greater (to->ia_gid, from->ia_gid);
+
+ set_if_greater_time(to->ia_atime, to->ia_atime_nsec,
+ from->ia_atime, from->ia_atime_nsec);
+ set_if_greater_time (to->ia_mtime, to->ia_mtime_nsec,
+ from->ia_mtime, from->ia_mtime_nsec);
+ set_if_greater_time (to->ia_ctime, to->ia_ctime_nsec,
+ from->ia_ctime, from->ia_ctime_nsec);
+
+ return 0;
+}
+
+int
+dht_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name)
+{
+ if (!child) {
+ goto err;
+ }
+
+ if (strcmp (parent->path, "/") == 0)
+ gf_asprintf ((char **)&child->path, "/%s", name);
+ else
+ gf_asprintf ((char **)&child->path, "%s/%s", parent->path, name);
+
+ if (!child->path) {
+ goto err;
+ }
+
+ child->name = strrchr (child->path, '/');
+ if (child->name)
+ child->name++;
+
+ child->parent = inode_ref (parent->inode);
+ child->inode = inode_new (parent->inode->table);
+
+ if (!child->inode) {
+ goto err;
+ }
+
+ return 0;
+err:
+ loc_wipe (child);
+ return -1;
}
-#define set_if_greater(a, b) do { \
- if ((a) < (b)) \
- (a) = (b); \
- } while (0)
int
-dht_stat_merge (xlator_t *this, struct stat *to,
- struct stat *from, xlator_t *subvol)
+dht_init_subvolumes (xlator_t *this, dht_conf_t *conf)
{
- to->st_dev = from->st_dev;
+ xlator_list_t *subvols = NULL;
+ int cnt = 0;
+
+ if (!conf)
+ return -1;
- dht_itransform (this, subvol, from->st_ino, &to->st_ino);
+ for (subvols = this->children; subvols; subvols = subvols->next)
+ cnt++;
- to->st_mode = from->st_mode;
- to->st_nlink = from->st_nlink;
- to->st_rdev = from->st_rdev;
- to->st_size += from->st_size;
- to->st_blksize = from->st_blksize;
- to->st_blocks += from->st_blocks;
+ conf->subvolumes = GF_CALLOC (cnt, sizeof (xlator_t *),
+ gf_dht_mt_xlator_t);
+ if (!conf->subvolumes) {
+ return -1;
+ }
+ conf->subvolume_cnt = cnt;
+
+ cnt = 0;
+ for (subvols = this->children; subvols; subvols = subvols->next)
+ conf->subvolumes[cnt++] = subvols->xlator;
- set_if_greater (to->st_uid, from->st_uid);
- set_if_greater (to->st_gid, from->st_gid);
+ conf->subvolume_status = GF_CALLOC (cnt, sizeof (char),
+ gf_dht_mt_char);
+ if (!conf->subvolume_status) {
+ return -1;
+ }
- set_if_greater (to->st_atime, from->st_atime);
- set_if_greater (to->st_mtime, from->st_mtime);
- set_if_greater (to->st_ctime, from->st_ctime);
+ conf->last_event = GF_CALLOC (cnt, sizeof (int),
+ gf_dht_mt_char);
+ if (!conf->last_event) {
+ return -1;
+ }
- return 0;
+ conf->subvol_up_time = GF_CALLOC (cnt, sizeof (time_t),
+ gf_dht_mt_subvol_time);
+ if (!conf->subvol_up_time) {
+ return -1;
+ }
+
+ conf->du_stats = GF_CALLOC (conf->subvolume_cnt, sizeof (dht_du_t),
+ gf_dht_mt_dht_du_t);
+ if (!conf->du_stats) {
+ return -1;
+ }
+
+ conf->decommissioned_bricks = GF_CALLOC (cnt, sizeof (xlator_t *),
+ gf_dht_mt_xlator_t);
+ if (!conf->decommissioned_bricks) {
+ return -1;
+ }
+
+ return 0;
+}
+
+
+
+
+static int
+dht_migration_complete_check_done (int op_ret, call_frame_t *frame, void *data)
+{
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+
+ local->rebalance.target_op_fn (THIS, frame, op_ret);
+
+ return 0;
+}
+
+
+int
+dht_migration_complete_check_task (void *data)
+{
+ int ret = -1;
+ xlator_t *src_node = NULL;
+ xlator_t *dst_node = NULL;
+ dht_local_t *local = NULL;
+ dict_t *dict = NULL;
+ dht_layout_t *layout = NULL;
+ struct iatt stbuf = {0,};
+ xlator_t *this = NULL;
+ call_frame_t *frame = NULL;
+ loc_t tmp_loc = {0,};
+ char *path = NULL;
+ dht_conf_t *conf = NULL;
+ inode_t *inode = NULL;
+ fd_t *iter_fd = NULL;
+ uint64_t tmp_subvol = 0;
+ int open_failed = 0;
+
+ this = THIS;
+ frame = data;
+ local = frame->local;
+ conf = this->private;
+
+ src_node = local->cached_subvol;
+
+ if (!local->loc.inode && !local->fd)
+ goto out;
+
+ inode = (!local->fd) ? local->loc.inode : local->fd->inode;
+
+ /* getxattr on cached_subvol for 'linkto' value. Do path based getxattr
+ * as root:root. If a fd is already open, access check wont be done*/
+
+ if (!local->loc.inode) {
+ ret = syncop_fgetxattr (src_node, local->fd, &dict,
+ conf->link_xattr_name);
+ } else {
+ SYNCTASK_SETID (0, 0);
+ ret = syncop_getxattr (src_node, &local->loc, &dict,
+ conf->link_xattr_name);
+ SYNCTASK_SETID (frame->root->uid, frame->root->gid);
+ }
+
+ if (!ret)
+ dst_node = dht_linkfile_subvol (this, NULL, NULL, dict);
+
+ if (ret) {
+ if ((errno != ENOENT) || (!local->loc.inode)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to get the 'linkto' xattr %s",
+ local->loc.path, strerror (errno));
+ goto out;
+ }
+ /* Need to do lookup on hashed subvol, then get the file */
+ ret = syncop_lookup (this, &local->loc, NULL, &stbuf, NULL,
+ NULL);
+ if (ret)
+ goto out;
+ dst_node = dht_subvol_get_cached (this, local->loc.inode);
+ }
+
+ if (!dst_node) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to get the destination node",
+ local->loc.path);
+ ret = -1;
+ goto out;
+ }
+
+ /* lookup on dst */
+ if (local->loc.inode) {
+ ret = syncop_lookup (dst_node, &local->loc, NULL, &stbuf, NULL, NULL);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to lookup the file on %s",
+ local->loc.path, dst_node->name);
+ goto out;
+ }
+
+ if (uuid_compare (stbuf.ia_gfid, local->loc.inode->gfid)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: gfid different on the target file on %s",
+ local->loc.path, dst_node->name);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ /* update inode ctx (the layout) */
+ dht_layout_unref (this, local->layout);
+
+ ret = dht_layout_preset (this, dst_node, inode);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s: could not set preset layout for subvol %s",
+ local->loc.path, dst_node->name);
+ ret = -1;
+ goto out;
+ }
+
+ layout = dht_layout_for_subvol (this, dst_node);
+ if (!layout) {
+ gf_log (this->name, GF_LOG_INFO,
+ "%s: no pre-set layout for subvolume %s",
+ local->loc.path, dst_node ? dst_node->name : "<nil>");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dht_layout_set (this, inode, layout);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to set the new layout",
+ local->loc.path);
+ goto out;
+ }
+
+ local->cached_subvol = dst_node;
+ ret = 0;
+
+ /* once we detect the migration complete, the inode-ctx2 is no more
+ required.. delete the ctx and also, it means, open() already
+ done on all the fd of inode */
+ ret = inode_ctx_reset1 (inode, this, &tmp_subvol);
+ if (tmp_subvol)
+ goto out;
+
+ if (list_empty (&inode->fd_list))
+ goto out;
+
+ /* perform open as root:root. There is window between linkfile
+ * creation(root:root) and setattr with the correct uid/gid
+ */
+ SYNCTASK_SETID(0, 0);
+
+ /* perform 'open()' on all the fd's present on the inode */
+ tmp_loc.inode = inode;
+ inode_path (inode, NULL, &path);
+ if (path)
+ tmp_loc.path = path;
+ list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
+ if (fd_is_anonymous (iter_fd))
+ continue;
+
+ ret = syncop_open (dst_node, &tmp_loc,
+ iter_fd->flags, iter_fd);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to open "
+ "the fd (%p, flags=0%o) on file %s @ %s",
+ iter_fd, iter_fd->flags, path, dst_node->name);
+ open_failed = 1;
+ }
+ }
+ GF_FREE (path);
+
+ SYNCTASK_SETID (frame->root->uid, frame->root->gid);
+ if (open_failed) {
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+out:
+
+ return ret;
+}
+
+int
+dht_rebalance_complete_check (xlator_t *this, call_frame_t *frame)
+{
+ int ret = -1;
+
+ ret = synctask_new (this->ctx->env, dht_migration_complete_check_task,
+ dht_migration_complete_check_done,
+ frame, frame);
+ return ret;
+}
+
+/* During 'in-progress' state, both nodes should have the file */
+static int
+dht_inprogress_check_done (int op_ret, call_frame_t *sync_frame, void *data)
+{
+ dht_local_t *local = NULL;
+
+ local = sync_frame->local;
+
+ local->rebalance.target_op_fn (THIS, sync_frame, op_ret);
+
+ return 0;
+}
+
+static int
+dht_rebalance_inprogress_task (void *data)
+{
+ int ret = -1;
+ xlator_t *src_node = NULL;
+ xlator_t *dst_node = NULL;
+ dht_local_t *local = NULL;
+ dict_t *dict = NULL;
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ char *path = NULL;
+ struct iatt stbuf = {0,};
+ loc_t tmp_loc = {0,};
+ dht_conf_t *conf = NULL;
+ inode_t *inode = NULL;
+ fd_t *iter_fd = NULL;
+ int open_failed = 0;
+
+ this = THIS;
+ frame = data;
+ local = frame->local;
+ conf = this->private;
+
+ src_node = local->cached_subvol;
+
+ if (!local->loc.inode && !local->fd)
+ goto out;
+
+ inode = (!local->fd) ? local->loc.inode : local->fd->inode;
+
+ /* getxattr on cached_subvol for 'linkto' value. Do path based getxattr
+ * as root:root. If a fd is already open, access check wont be done*/
+ if (local->loc.inode) {
+ SYNCTASK_SETID (0, 0);
+ ret = syncop_getxattr (src_node, &local->loc, &dict,
+ conf->link_xattr_name);
+ SYNCTASK_SETID (frame->root->uid, frame->root->gid);
+ } else {
+ ret = syncop_fgetxattr (src_node, local->fd, &dict,
+ conf->link_xattr_name);
+ }
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to get the 'linkto' xattr %s",
+ local->loc.path, strerror (errno));
+ goto out;
+ }
+
+ dst_node = dht_linkfile_subvol (this, NULL, NULL, dict);
+ if (!dst_node) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to get the 'linkto' xattr from dict",
+ local->loc.path);
+ ret = -1;
+ goto out;
+ }
+
+ local->rebalance.target_node = dst_node;
+
+ if (local->loc.inode) {
+ /* lookup on dst */
+ ret = syncop_lookup (dst_node, &local->loc, NULL,
+ &stbuf, NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to lookup the file on %s",
+ local->loc.path, dst_node->name);
+ goto out;
+ }
+
+ if (uuid_compare (stbuf.ia_gfid, local->loc.inode->gfid)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: gfid different on the target file on %s",
+ local->loc.path, dst_node->name);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = 0;
+
+ if (list_empty (&inode->fd_list))
+ goto done;
+
+ /* perform open as root:root. There is window between linkfile
+ * creation(root:root) and setattr with the correct uid/gid
+ */
+ SYNCTASK_SETID (0, 0);
+
+ tmp_loc.inode = inode;
+ inode_path (inode, NULL, &path);
+ if (path)
+ tmp_loc.path = path;
+
+ list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
+ if (fd_is_anonymous (iter_fd))
+ continue;
+
+ ret = syncop_open (dst_node, &tmp_loc,
+ iter_fd->flags, iter_fd);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to send open "
+ "the fd (%p, flags=0%o) on file %s @ %s",
+ iter_fd, iter_fd->flags, path, dst_node->name);
+ open_failed = 1;
+ }
+ }
+ GF_FREE (path);
+
+ SYNCTASK_SETID (frame->root->uid, frame->root->gid);
+
+ if (open_failed) {
+ ret = -1;
+ goto out;
+ }
+
+done:
+ ret = dht_inode_ctx_set1 (this, inode, dst_node);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to set inode-ctx target file at %s",
+ local->loc.path, dst_node->name);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+dht_rebalance_in_progress_check (xlator_t *this, call_frame_t *frame)
+{
+
+ int ret = -1;
+
+ ret = synctask_new (this->ctx->env, dht_rebalance_inprogress_task,
+ dht_inprogress_check_done,
+ frame, frame);
+ return ret;
+}
+
+int
+dht_inode_ctx_layout_set (inode_t *inode, xlator_t *this,
+ dht_layout_t *layout_int)
+{
+ dht_inode_ctx_t *ctx = NULL;
+ int ret = -1;
+
+ ret = dht_inode_ctx_get (inode, this, &ctx);
+ if (!ret && ctx) {
+ ctx->layout = layout_int;
+ } else {
+ ctx = GF_CALLOC (1, sizeof (*ctx), gf_dht_mt_inode_ctx_t);
+ if (!ctx)
+ return ret;
+ ctx->layout = layout_int;
+ }
+
+ ret = dht_inode_ctx_set (inode, this, ctx);
+
+ return ret;
+}
+
+int
+dht_inode_ctx_time_update (inode_t *inode, xlator_t *this, struct iatt *stat,
+ int32_t post)
+{
+ dht_inode_ctx_t *ctx = NULL;
+ dht_stat_time_t *time = 0;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO (this->name, stat, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+
+ ret = dht_inode_ctx_get (inode, this, &ctx);
+
+ if (ret) {
+ ctx = GF_CALLOC (1, sizeof (*ctx), gf_dht_mt_inode_ctx_t);
+ if (!ctx)
+ return -1;
+ }
+
+ time = &ctx->time;
+
+ DHT_UPDATE_TIME(time->mtime, time->mtime_nsec,
+ stat->ia_mtime, stat->ia_mtime_nsec, inode, post);
+ DHT_UPDATE_TIME(time->ctime, time->ctime_nsec,
+ stat->ia_ctime, stat->ia_ctime_nsec, inode, post);
+ DHT_UPDATE_TIME(time->atime, time->atime_nsec,
+ stat->ia_atime, stat->ia_atime_nsec, inode, post);
+
+ ret = dht_inode_ctx_set (inode, this, ctx);
+out:
+ return 0;
+}
+
+int
+dht_inode_ctx_get (inode_t *inode, xlator_t *this, dht_inode_ctx_t **ctx)
+{
+ int ret = -1;
+ uint64_t ctx_int = 0;
+
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+
+ ret = inode_ctx_get (inode, this, &ctx_int);
+
+ if (ret)
+ return ret;
+
+ if (ctx)
+ *ctx = (dht_inode_ctx_t *) ctx_int;
+out:
+ return ret;
+}
+
+int dht_inode_ctx_set (inode_t *inode, xlator_t *this, dht_inode_ctx_t *ctx)
+{
+ int ret = -1;
+ uint64_t ctx_int = 0;
+
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+ GF_VALIDATE_OR_GOTO (this->name, ctx, out);
+
+ ctx_int = (long)ctx;
+ ret = inode_ctx_set (inode, this, &ctx_int);
+out:
+ return ret;
}
diff --git a/xlators/cluster/dht/src/dht-inode-read.c b/xlators/cluster/dht/src/dht-inode-read.c
new file mode 100644
index 000000000..ece84151a
--- /dev/null
+++ b/xlators/cluster/dht/src/dht-inode-read.c
@@ -0,0 +1,1139 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "dht-common.h"
+
+int dht_access2 (xlator_t *this, call_frame_t *frame, int ret);
+int dht_readv2 (xlator_t *this, call_frame_t *frame, int ret);
+int dht_attr2 (xlator_t *this, call_frame_t *frame, int ret);
+int dht_open2 (xlator_t *this, call_frame_t *frame, int ret);
+int dht_flush2 (xlator_t *this, call_frame_t *frame, int ret);
+int dht_lk2 (xlator_t *this, call_frame_t *frame, int ret);
+int dht_fsync2 (xlator_t *this, call_frame_t *frame, int ret);
+
+int
+dht_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, fd_t *fd, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int ret = 0;
+
+ local = frame->local;
+ prev = cookie;
+
+ local->op_errno = op_errno;
+ if ((op_ret == -1) && (op_errno != ENOENT)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+ goto out;
+ }
+
+ if (!op_ret || (local->call_cnt != 1))
+ goto out;
+
+ /* rebalance would have happened */
+ local->rebalance.target_op_fn = dht_open2;
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
+
+out:
+ DHT_STACK_UNWIND (open, frame, op_ret, op_errno, local->fd, xdata);
+
+ return 0;
+}
+
+int
+dht_open2 (xlator_t *this, call_frame_t *frame, int op_ret)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ int op_errno = EINVAL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ op_errno = ENOENT;
+ if (op_ret)
+ goto out;
+
+ local->call_cnt = 2;
+ subvol = local->cached_subvol;
+
+ STACK_WIND (frame, dht_open_cbk, subvol, subvol->fops->open,
+ &local->loc, local->rebalance.flags, local->fd,
+ NULL);
+ return 0;
+
+out:
+ DHT_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
+
+int
+dht_open (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int flags, fd_t *fd, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, loc, fd, GF_FOP_OPEN);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->rebalance.flags = flags;
+ local->call_cnt = 1;
+
+ STACK_WIND (frame, dht_open_cbk, subvol, subvol->fops->open,
+ loc, flags, fd, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (open, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+int
+dht_file_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *stbuf, dict_t *xdata)
+{
+ xlator_t *subvol = 0;
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int ret = -1;
+ inode_t *inode = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, err);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+
+ if ((op_ret == -1) && (op_errno != ENOENT)) {
+ local->op_errno = op_errno;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+ goto out;
+ }
+
+ if (local->call_cnt != 1)
+ goto out;
+
+ local->op_errno = op_errno;
+ /* Check if the rebalance phase2 is true */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (stbuf)) {
+ inode = (local->fd) ? local->fd->inode : local->loc.inode;
+ ret = dht_inode_ctx_get1 (this, inode, &subvol);
+ if (!subvol) {
+ /* Phase 2 of migration */
+ local->rebalance.target_op_fn = dht_attr2;
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
+ } else {
+ /* value is already set in fd_ctx, that means no need
+ to check for whether its complete or not. */
+ dht_attr2 (this, frame, 0);
+ return 0;
+ }
+ }
+
+out:
+ DHT_STRIP_PHASE1_FLAGS (stbuf);
+ DHT_STACK_UNWIND (stat, frame, op_ret, op_errno, stbuf, xdata);
+err:
+ return 0;
+}
+
+int
+dht_attr2 (xlator_t *this, call_frame_t *frame, int op_ret)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ int op_errno = EINVAL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ op_errno = local->op_errno;
+ if (op_ret == -1)
+ goto out;
+
+ subvol = local->cached_subvol;
+ local->call_cnt = 2;
+
+ if (local->fop == GF_FOP_FSTAT) {
+ STACK_WIND (frame, dht_file_attr_cbk, subvol,
+ subvol->fops->fstat, local->fd, NULL);
+ } else {
+ STACK_WIND (frame, dht_file_attr_cbk, subvol,
+ subvol->fops->stat, &local->loc, NULL);
+ }
+ return 0;
+
+out:
+ DHT_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
+
+int
+dht_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *stbuf, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, err);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+
+ goto unlock;
+ }
+
+ dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
+
+ local->op_ret = 0;
+ }
+unlock:
+ UNLOCK (&frame->lock);
+out:
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ DHT_STACK_UNWIND (stat, frame, local->op_ret, local->op_errno,
+ &local->stbuf, xdata);
+ }
+err:
+ return 0;
+}
+
+int
+dht_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int i = 0;
+ int call_cnt = 0;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+
+ local = dht_local_init (frame, loc, NULL, GF_FOP_STAT);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!layout) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no layout for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (IA_ISREG (loc->inode->ia_type)) {
+ local->call_cnt = 1;
+
+ subvol = local->cached_subvol;
+
+ STACK_WIND (frame, dht_file_attr_cbk, subvol,
+ subvol->fops->stat, loc, xdata);
+
+ return 0;
+ }
+
+ local->call_cnt = call_cnt = layout->cnt;
+
+ for (i = 0; i < call_cnt; i++) {
+ subvol = layout->list[i].xlator;
+
+ STACK_WIND (frame, dht_attr_cbk,
+ subvol, subvol->fops->stat,
+ loc, xdata);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+
+int
+dht_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int i = 0;
+ int call_cnt = 0;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_FSTAT);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!layout) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no layout for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (IA_ISREG (fd->inode->ia_type)) {
+ local->call_cnt = 1;
+
+ subvol = local->cached_subvol;
+
+ STACK_WIND (frame, dht_file_attr_cbk, subvol,
+ subvol->fops->fstat, fd, xdata);
+
+ return 0;
+ }
+
+ local->call_cnt = call_cnt = layout->cnt;
+
+ for (i = 0; i < call_cnt; i++) {
+ subvol = layout->list[i].xlator;
+ STACK_WIND (frame, dht_attr_cbk,
+ subvol, subvol->fops->fstat,
+ fd, xdata);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (fstat, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+int
+dht_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ struct iovec *vector, int count, struct iatt *stbuf,
+ struct iobref *iobref, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int ret = 0;
+ inode_t *inode = NULL;
+ xlator_t *subvol = 0;
+
+ local = frame->local;
+ if (!local) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ /* This is already second try, no need for re-check */
+ if (local->call_cnt != 1)
+ goto out;
+
+ if ((op_ret == -1) && (op_errno != ENOENT))
+ goto out;
+
+ local->op_errno = op_errno;
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (stbuf)) {
+ /* File would be migrated to other node */
+ ret = dht_inode_ctx_get1 (this, inode, &subvol);
+ if (!subvol) {
+ local->rebalance.target_op_fn = dht_readv2;
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
+ } else {
+ /* value is already set in fd_ctx, that means no need
+ to check for whether its complete or not. */
+ dht_readv2 (this, frame, 0);
+ return 0;
+ }
+ }
+
+out:
+ DHT_STRIP_PHASE1_FLAGS (stbuf);
+ DHT_STACK_UNWIND (readv, frame, op_ret, op_errno, vector, count, stbuf,
+ iobref, xdata);
+
+ return 0;
+}
+
+int
+dht_readv2 (xlator_t *this, call_frame_t *frame, int op_ret)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ int op_errno = EINVAL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ op_errno = local->op_errno;
+ if (op_ret == -1)
+ goto out;
+
+ local->call_cnt = 2;
+ subvol = local->cached_subvol;
+
+ STACK_WIND (frame, dht_readv_cbk, subvol, subvol->fops->readv,
+ local->fd, local->rebalance.size, local->rebalance.offset,
+ local->rebalance.flags, NULL);
+
+ return 0;
+
+out:
+ DHT_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL);
+ return 0;
+}
+
+int
+dht_readv (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t off, uint32_t flags, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_READ);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->rebalance.offset = off;
+ local->rebalance.size = size;
+ local->rebalance.flags = flags;
+ local->call_cnt = 1;
+
+ STACK_WIND (frame, dht_readv_cbk,
+ subvol, subvol->fops->readv,
+ fd, size, off, flags, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int
+dht_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ int ret = -1;
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ call_frame_t *prev = NULL;
+
+ local = frame->local;
+ prev = cookie;
+
+ if (!prev || !prev->this)
+ goto out;
+ if (local->call_cnt != 1)
+ goto out;
+ if ((op_ret == -1) && (op_errno == ENOTCONN) &&
+ IA_ISDIR(local->loc.inode->ia_type)) {
+
+ subvol = dht_subvol_next_available (this, prev->this);
+ if (!subvol)
+ goto out;
+
+ /* check if we are done with visiting every node */
+ if (subvol == local->cached_subvol) {
+ goto out;
+ }
+
+ STACK_WIND (frame, dht_access_cbk, subvol, subvol->fops->access,
+ &local->loc, local->rebalance.flags, NULL);
+ return 0;
+ }
+ if ((op_ret == -1) && (op_errno == ENOENT)) {
+ /* File would be migrated to other node */
+ local->op_errno = op_errno;
+ local->rebalance.target_op_fn = dht_access2;
+ ret = dht_rebalance_complete_check (frame->this, frame);
+ if (!ret)
+ return 0;
+ }
+
+out:
+ DHT_STACK_UNWIND (access, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
+dht_access2 (xlator_t *this, call_frame_t *frame, int op_ret)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ int op_errno = EINVAL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ op_errno = local->op_errno;
+ if (op_ret == -1)
+ goto out;
+
+ local->call_cnt = 2;
+ subvol = local->cached_subvol;
+
+ STACK_WIND (frame, dht_access_cbk, subvol, subvol->fops->access,
+ &local->loc, local->rebalance.flags, NULL);
+
+ return 0;
+
+out:
+ DHT_STACK_UNWIND (access, frame, -1, op_errno, NULL);
+ return 0;
+}
+
+
+int
+dht_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ local = dht_local_init (frame, loc, NULL, GF_FOP_ACCESS);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->rebalance.flags = mask;
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_access_cbk, subvol, subvol->fops->access,
+ loc, mask, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (access, frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int
+dht_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ inode_t *inode = NULL;
+ xlator_t *subvol = 0;
+
+ local = frame->local;
+
+ local->op_errno = op_errno;
+
+ if (local->call_cnt != 1)
+ goto out;
+
+ /* If context is set, then send flush() it to the destination */
+ dht_inode_ctx_get1 (this, inode, &subvol);
+ if (subvol) {
+ dht_flush2 (this, frame, 0);
+ return 0;
+ }
+
+out:
+ DHT_STACK_UNWIND (flush, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+int
+dht_flush2 (xlator_t *this, call_frame_t *frame, int op_ret)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+
+ local = frame->local;
+
+ dht_inode_ctx_get1 (this, local->fd->inode, &subvol);
+
+ if (!subvol)
+ subvol = local->cached_subvol;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ STACK_WIND (frame, dht_flush_cbk,
+ subvol, subvol->fops->flush, local->fd, NULL);
+
+ return 0;
+}
+
+
+int
+dht_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_FLUSH);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->call_cnt = 1;
+
+ STACK_WIND (frame, dht_flush_cbk,
+ subvol, subvol->fops->flush, fd, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (flush, frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int
+dht_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int ret = -1;
+ inode_t *inode = NULL;
+ xlator_t *subvol = 0;
+
+ local = frame->local;
+ prev = cookie;
+
+ local->op_errno = op_errno;
+ if (op_ret == -1 && (op_errno != ENOENT)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+ goto out;
+ }
+
+ if (local->call_cnt != 1) {
+ if (local->stbuf.ia_blocks) {
+ dht_iatt_merge (this, postbuf, &local->stbuf, NULL);
+ dht_iatt_merge (this, prebuf, &local->prebuf, NULL);
+ }
+ goto out;
+ }
+
+ local->op_errno = op_errno;
+ dht_inode_ctx_get1 (this, inode, &subvol);
+ if (!subvol) {
+ local->rebalance.target_op_fn = dht_fsync2;
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1 (postbuf)) {
+ dht_iatt_merge (this, &local->stbuf, postbuf, NULL);
+ dht_iatt_merge (this, &local->prebuf, prebuf, NULL);
+
+ ret = dht_rebalance_in_progress_check (this, frame);
+ }
+
+ /* Check if the rebalance phase2 is true */
+ if (IS_DHT_MIGRATION_PHASE2 (postbuf)) {
+ ret = dht_rebalance_complete_check (this, frame);
+ }
+ if (!ret)
+ return 0;
+ } else {
+ dht_fsync2 (this, frame, 0);
+ return 0;
+ }
+
+out:
+ DHT_STRIP_PHASE1_FLAGS (postbuf);
+ DHT_STRIP_PHASE1_FLAGS (prebuf);
+ DHT_STACK_UNWIND (fsync, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+
+ return 0;
+}
+
+int
+dht_fsync2 (xlator_t *this, call_frame_t *frame, int op_ret)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+
+ local = frame->local;
+
+ dht_inode_ctx_get1 (this, local->fd->inode, &subvol);
+ if (!subvol)
+ subvol = local->cached_subvol;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ STACK_WIND (frame, dht_fsync_cbk, subvol, subvol->fops->fsync,
+ local->fd, local->rebalance.flags, NULL);
+
+ return 0;
+}
+
+int
+dht_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync,
+ dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_FSYNC);
+ if (!local) {
+ op_errno = ENOMEM;
+
+ goto err;
+ }
+
+ local->call_cnt = 1;
+ local->rebalance.flags = datasync;
+
+ subvol = local->cached_subvol;
+
+ STACK_WIND (frame, dht_fsync_cbk, subvol, subvol->fops->fsync,
+ fd, datasync, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+/* TODO: for 'lk()' call, we need some other special error, may be ESTALE to
+ indicate that lock migration happened on the fd, so we can consider it as
+ phase 2 of migration */
+int
+dht_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct gf_flock *flock, dict_t *xdata)
+{
+ DHT_STACK_UNWIND (lk, frame, op_ret, op_errno, flock, xdata);
+
+ return 0;
+}
+
+
+int
+dht_lk (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int cmd, struct gf_flock *flock, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ subvol = dht_subvol_get_cached (this, fd->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ /* TODO: for rebalance, we need to preserve the fop arguments */
+ STACK_WIND (frame, dht_lk_cbk, subvol, subvol->fops->lk, fd,
+ cmd, flock, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (lk, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+/* Symlinks are currently not migrated, so no need for any check here */
+int
+dht_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, const char *path,
+ struct iatt *stbuf, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+ if (op_ret == -1)
+ goto err;
+
+ if (!local) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ }
+
+err:
+ DHT_STRIP_PHASE1_FLAGS (stbuf);
+ DHT_STACK_UNWIND (readlink, frame, op_ret, op_errno, path, stbuf, xdata);
+
+ return 0;
+}
+
+
+int
+dht_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ local = dht_local_init (frame, loc, NULL, GF_FOP_READLINK);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_readlink_cbk,
+ subvol, subvol->fops->readlink,
+ loc, size, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (readlink, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+/* Currently no translators on top of 'distribute' will be using
+ * below fops, hence not implementing 'migration' related checks
+ */
+
+int
+dht_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ DHT_STACK_UNWIND (xattrop, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+
+int
+dht_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ local = dht_local_init (frame, loc, NULL, GF_FOP_XATTROP);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->call_cnt = 1;
+
+ STACK_WIND (frame,
+ dht_xattrop_cbk,
+ subvol, subvol->fops->xattrop,
+ loc, flags, dict, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+
+int
+dht_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ DHT_STACK_UNWIND (fxattrop, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+
+int
+dht_fxattrop (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ subvol = dht_subvol_get_cached (this, fd->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame,
+ dht_fxattrop_cbk,
+ subvol, subvol->fops->fxattrop,
+ fd, flags, dict, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (fxattrop, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+
+int
+dht_inodelk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
+
+{
+ DHT_STACK_UNWIND (inodelk, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+
+int32_t
+dht_inodelk (call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ local = dht_local_init (frame, loc, NULL, GF_FOP_INODELK);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->call_cnt = 1;
+
+ STACK_WIND (frame,
+ dht_inodelk_cbk,
+ subvol, subvol->fops->inodelk,
+ volume, loc, cmd, lock, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (inodelk, frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+
+int
+dht_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+
+{
+ DHT_STACK_UNWIND (finodelk, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+
+int
+dht_finodelk (call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ subvol = dht_subvol_get_cached (this, fd->inode);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+
+ STACK_WIND (frame, dht_finodelk_cbk, subvol, subvol->fops->finodelk,
+ volume, fd, cmd, lock, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (finodelk, frame, -1, op_errno, NULL);
+
+ return 0;
+}
diff --git a/xlators/cluster/dht/src/dht-inode-write.c b/xlators/cluster/dht/src/dht-inode-write.c
new file mode 100644
index 000000000..4b3f3a049
--- /dev/null
+++ b/xlators/cluster/dht/src/dht-inode-write.c
@@ -0,0 +1,1013 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "dht-common.h"
+
+int dht_writev2 (xlator_t *this, call_frame_t *frame, int ret);
+int dht_truncate2 (xlator_t *this, call_frame_t *frame, int ret);
+int dht_setattr2 (xlator_t *this, call_frame_t *frame, int ret);
+int dht_fallocate2(xlator_t *this, call_frame_t *frame, int op_ret);
+int dht_discard2(xlator_t *this, call_frame_t *frame, int op_ret);
+int dht_zerofill2(xlator_t *this, call_frame_t *frame, int op_ret);
+
+int
+dht_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int ret = -1;
+ xlator_t *subvol = NULL;
+
+ if (op_ret == -1 && (op_errno != ENOENT)) {
+ goto out;
+ }
+
+ local = frame->local;
+ if (!local) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ if (local->call_cnt != 1) {
+ /* preserve the modes of source */
+ if (local->stbuf.ia_blocks) {
+ dht_iatt_merge (this, postbuf, &local->stbuf, NULL);
+ dht_iatt_merge (this, prebuf, &local->prebuf, NULL);
+ }
+ goto out;
+ }
+
+ local->rebalance.target_op_fn = dht_writev2;
+
+ local->op_errno = op_errno;
+ /* Phase 2 of migration */
+ if (IS_DHT_MIGRATION_PHASE2 (postbuf)) {
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1 (postbuf)) {
+ dht_iatt_merge (this, &local->stbuf, postbuf, NULL);
+ dht_iatt_merge (this, &local->prebuf, prebuf, NULL);
+
+ ret = dht_inode_ctx_get1 (this, local->fd->inode, &subvol);
+ if (subvol) {
+ dht_writev2 (this, frame, 0);
+ return 0;
+ }
+ ret = dht_rebalance_in_progress_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+out:
+ DHT_STRIP_PHASE1_FLAGS (postbuf);
+ DHT_STRIP_PHASE1_FLAGS (prebuf);
+
+ DHT_STACK_UNWIND (writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+
+ return 0;
+}
+
+int
+dht_writev2 (xlator_t *this, call_frame_t *frame, int op_ret)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+
+ local = frame->local;
+
+ dht_inode_ctx_get1 (this, local->fd->inode, &subvol);
+
+ if (!subvol)
+ subvol = local->cached_subvol;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ STACK_WIND (frame, dht_writev_cbk,
+ subvol, subvol->fops->writev,
+ local->fd, local->rebalance.vector, local->rebalance.count,
+ local->rebalance.offset, local->rebalance.flags,
+ local->rebalance.iobref, NULL);
+
+ return 0;
+}
+
+int
+dht_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int count, off_t off, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_WRITE);
+ if (!local) {
+
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+
+ local->rebalance.vector = iov_dup (vector, count);
+ local->rebalance.offset = off;
+ local->rebalance.count = count;
+ local->rebalance.flags = flags;
+ local->rebalance.iobref = iobref_ref (iobref);
+ local->call_cnt = 1;
+
+ STACK_WIND (frame, dht_writev_cbk,
+ subvol, subvol->fops->writev,
+ fd, vector, count, off, flags, iobref, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+
+int
+dht_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, err);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+
+ if ((op_ret == -1) && (op_errno != ENOENT)) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+
+ goto out;
+ }
+
+ if (local->call_cnt != 1) {
+ if (local->stbuf.ia_blocks) {
+ dht_iatt_merge (this, postbuf, &local->stbuf, NULL);
+ dht_iatt_merge (this, prebuf, &local->prebuf, NULL);
+ }
+ goto out;
+ }
+
+ local->rebalance.target_op_fn = dht_truncate2;
+
+ local->op_errno = op_errno;
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) {
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1 (postbuf)) {
+ dht_iatt_merge (this, &local->stbuf, postbuf, NULL);
+ dht_iatt_merge (this, &local->prebuf, prebuf, NULL);
+ inode = (local->fd) ? local->fd->inode : local->loc.inode;
+ dht_inode_ctx_get1 (this, inode, &subvol);
+ if (subvol) {
+ dht_truncate2 (this, frame, 0);
+ return 0;
+ }
+ ret = dht_rebalance_in_progress_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+out:
+ DHT_STRIP_PHASE1_FLAGS (postbuf);
+ DHT_STRIP_PHASE1_FLAGS (prebuf);
+ DHT_STACK_UNWIND (truncate, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+err:
+ return 0;
+}
+
+
+int
+dht_truncate2 (xlator_t *this, call_frame_t *frame, int op_ret)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+
+ local = frame->local;
+
+ inode = local->fd ? local->fd->inode : local->loc.inode;
+
+ dht_inode_ctx_get1 (this, inode, &subvol);
+ if (!subvol)
+ subvol = local->cached_subvol;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ if (local->fop == GF_FOP_TRUNCATE) {
+ STACK_WIND (frame, dht_truncate_cbk, subvol,
+ subvol->fops->truncate, &local->loc,
+ local->rebalance.offset, NULL);
+ } else {
+ STACK_WIND (frame, dht_truncate_cbk, subvol,
+ subvol->fops->ftruncate, local->fd,
+ local->rebalance.offset, NULL);
+ }
+
+ return 0;
+}
+
+int
+dht_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ local = dht_local_init (frame, loc, NULL, GF_FOP_TRUNCATE);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->rebalance.offset = offset;
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_truncate_cbk,
+ subvol, subvol->fops->truncate,
+ loc, offset, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int
+dht_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_FTRUNCATE);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->rebalance.offset = offset;
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_truncate_cbk,
+ subvol, subvol->fops->ftruncate,
+ fd, offset, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+int
+dht_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int ret = -1;
+ xlator_t *subvol = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, err);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+
+ if ((op_ret == -1) && (op_errno != ENOENT)) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+
+ goto out;
+ }
+
+ if (local->call_cnt != 1) {
+ if (local->stbuf.ia_blocks) {
+ dht_iatt_merge (this, postbuf, &local->stbuf, NULL);
+ dht_iatt_merge (this, prebuf, &local->prebuf, NULL);
+ }
+ goto out;
+ }
+ local->rebalance.target_op_fn = dht_fallocate2;
+
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) {
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1 (postbuf)) {
+ dht_iatt_merge (this, &local->stbuf, postbuf, NULL);
+ dht_iatt_merge (this, &local->prebuf, prebuf, NULL);
+ dht_inode_ctx_get1 (this, local->fd->inode, &subvol);
+ if (subvol) {
+ dht_fallocate2 (this, frame, 0);
+ return 0;
+ }
+ ret = dht_rebalance_in_progress_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+out:
+ DHT_STRIP_PHASE1_FLAGS (postbuf);
+ DHT_STRIP_PHASE1_FLAGS (prebuf);
+ DHT_STACK_UNWIND (fallocate, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+err:
+ return 0;
+}
+
+int
+dht_fallocate2(xlator_t *this, call_frame_t *frame, int op_ret)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+
+ local = frame->local;
+
+ dht_inode_ctx_get1 (this, local->fd->inode, &subvol);
+
+ if (!subvol)
+ subvol = local->cached_subvol;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ STACK_WIND(frame, dht_fallocate_cbk, subvol, subvol->fops->fallocate,
+ local->fd, local->rebalance.flags, local->rebalance.offset,
+ local->rebalance.size, NULL);
+
+ return 0;
+}
+
+int
+dht_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_FALLOCATE);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->rebalance.flags = mode;
+ local->rebalance.offset = offset;
+ local->rebalance.size = len;
+
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_fallocate_cbk,
+ subvol, subvol->fops->fallocate,
+ fd, mode, offset, len, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+int
+dht_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int ret = -1;
+ xlator_t *subvol = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, err);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+
+ if ((op_ret == -1) && (op_errno != ENOENT)) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+
+ goto out;
+ }
+
+ if (local->call_cnt != 1) {
+ if (local->stbuf.ia_blocks) {
+ dht_iatt_merge (this, postbuf, &local->stbuf, NULL);
+ dht_iatt_merge (this, prebuf, &local->prebuf, NULL);
+ }
+ goto out;
+ }
+ local->rebalance.target_op_fn = dht_discard2;
+
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) {
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1 (postbuf)) {
+ dht_iatt_merge (this, &local->stbuf, postbuf, NULL);
+ dht_iatt_merge (this, &local->prebuf, prebuf, NULL);
+ dht_inode_ctx_get1 (this, local->fd->inode, &subvol);
+ if (subvol) {
+ dht_discard2 (this, frame, 0);
+ return 0;
+ }
+ ret = dht_rebalance_in_progress_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+out:
+ DHT_STRIP_PHASE1_FLAGS (postbuf);
+ DHT_STRIP_PHASE1_FLAGS (prebuf);
+ DHT_STACK_UNWIND (discard, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+err:
+ return 0;
+}
+
+int
+dht_discard2(xlator_t *this, call_frame_t *frame, int op_ret)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+
+ local = frame->local;
+
+ dht_inode_ctx_get1 (this, local->fd->inode, &subvol);
+
+ if (!subvol)
+ subvol = local->cached_subvol;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ STACK_WIND(frame, dht_discard_cbk, subvol, subvol->fops->discard,
+ local->fd, local->rebalance.offset, local->rebalance.size,
+ NULL);
+
+ return 0;
+}
+
+int
+dht_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_DISCARD);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->rebalance.offset = offset;
+ local->rebalance.size = len;
+
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_discard_cbk, subvol, subvol->fops->discard,
+ fd, offset, len, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (discard, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int
+dht_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, err);
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+
+ if ((op_ret == -1) && (op_errno != ENOENT)) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+ goto out;
+ }
+
+ if (local->call_cnt != 1) {
+ if (local->stbuf.ia_blocks) {
+ dht_iatt_merge (this, postbuf, &local->stbuf, NULL);
+ dht_iatt_merge (this, prebuf, &local->prebuf, NULL);
+ }
+ goto out;
+ }
+ local->rebalance.target_op_fn = dht_zerofill2;
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) {
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1 (postbuf)) {
+ dht_iatt_merge (this, &local->stbuf, postbuf, NULL);
+ dht_iatt_merge (this, &local->prebuf, prebuf, NULL);
+ ret = fd_ctx_get (local->fd, this, NULL);
+ if (!ret) {
+ dht_zerofill2 (this, frame, 0);
+ return 0;
+ }
+ ret = dht_rebalance_in_progress_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+out:
+ DHT_STRIP_PHASE1_FLAGS (postbuf);
+ DHT_STRIP_PHASE1_FLAGS (prebuf);
+ DHT_STACK_UNWIND (zerofill, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+err:
+ return 0;
+}
+
+int
+dht_zerofill2(xlator_t *this, call_frame_t *frame, int op_ret)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ uint64_t tmp_subvol = 0;
+ int ret = -1;
+
+ local = frame->local;
+
+ if (local->fd)
+ ret = fd_ctx_get (local->fd, this, &tmp_subvol);
+ if (!ret)
+ subvol = (xlator_t *)(long)tmp_subvol;
+
+ if (!subvol)
+ subvol = local->cached_subvol;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ STACK_WIND(frame, dht_zerofill_cbk, subvol, subvol->fops->zerofill,
+ local->fd, local->rebalance.offset, local->rebalance.size,
+ NULL);
+
+ return 0;
+}
+
+int
+dht_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_ZEROFILL);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->rebalance.offset = offset;
+ local->rebalance.size = len;
+
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND (frame, dht_zerofill_cbk, subvol, subvol->fops->zerofill,
+ fd, offset, len, xdata);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (zerofill, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+
+/* handle cases of migration here for 'setattr()' calls */
+int
+dht_file_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int ret = -1;
+
+ local = frame->local;
+ prev = cookie;
+
+ local->op_errno = op_errno;
+ if ((op_ret == -1) && (op_errno != ENOENT)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+ goto out;
+ }
+
+ if (local->call_cnt != 1)
+ goto out;
+
+ local->rebalance.target_op_fn = dht_setattr2;
+
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) {
+ ret = dht_rebalance_complete_check (this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* At the end of the migration process, whatever 'attr' we
+ have on source file will be migrated to destination file
+ in one shot, hence we don't need to check for in progress
+ state here (ie, PHASE1) */
+out:
+ DHT_STRIP_PHASE1_FLAGS (postbuf);
+ DHT_STRIP_PHASE1_FLAGS (prebuf);
+ DHT_STACK_UNWIND (setattr, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+
+ return 0;
+}
+
+int
+dht_setattr2 (xlator_t *this, call_frame_t *frame, int op_ret)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+
+ local = frame->local;
+
+ inode = (local->fd) ? local->fd->inode : local->loc.inode;
+
+ dht_inode_ctx_get1 (this, inode, &subvol);
+
+ if (!subvol)
+ subvol = local->cached_subvol;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ if (local->fop == GF_FOP_SETATTR) {
+ STACK_WIND (frame, dht_file_setattr_cbk, subvol,
+ subvol->fops->setattr, &local->loc,
+ &local->rebalance.stbuf, local->rebalance.flags,
+ NULL);
+ } else {
+ STACK_WIND (frame, dht_file_setattr_cbk, subvol,
+ subvol->fops->fsetattr, local->fd,
+ &local->rebalance.stbuf, local->rebalance.flags,
+ NULL);
+ }
+
+ return 0;
+}
+
+
+/* Keep the existing code same for all the cases other than regular file */
+int
+dht_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+
+
+ local = frame->local;
+ prev = cookie;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "subvolume %s returned -1 (%s)",
+ prev->this->name, strerror (op_errno));
+ goto unlock;
+ }
+
+ dht_iatt_merge (this, &local->prebuf, statpre, prev->this);
+ dht_iatt_merge (this, &local->stbuf, statpost, prev->this);
+
+ local->op_ret = 0;
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt))
+ DHT_STACK_UNWIND (setattr, frame, local->op_ret, local->op_errno,
+ &local->prebuf, &local->stbuf, xdata);
+
+ return 0;
+}
+
+
+int
+dht_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ dht_layout_t *layout = NULL;
+ dht_local_t *local = NULL;
+ int op_errno = -1;
+ int i = -1;
+ int call_cnt = 0;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ local = dht_local_init (frame, loc, NULL, GF_FOP_SETATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!layout) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no layout for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (!layout_is_sane (layout)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "layout is not sane for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (IA_ISREG (loc->inode->ia_type)) {
+ /* in the regular file _cbk(), we need to check for
+ migration possibilities */
+ local->rebalance.stbuf = *stbuf;
+ local->rebalance.flags = valid;
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+
+ STACK_WIND (frame, dht_file_setattr_cbk, subvol,
+ subvol->fops->setattr,
+ loc, stbuf, valid, xdata);
+
+ return 0;
+ }
+
+ local->call_cnt = call_cnt = layout->cnt;
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND (frame, dht_setattr_cbk,
+ layout->list[i].xlator,
+ layout->list[i].xlator->fops->setattr,
+ loc, stbuf, valid, xdata);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+int
+dht_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ dht_layout_t *layout = NULL;
+ dht_local_t *local = NULL;
+ int op_errno = -1;
+ int i = -1;
+ int call_cnt = 0;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = dht_local_init (frame, NULL, fd, GF_FOP_FSETATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!layout) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no layout for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (!layout_is_sane (layout)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "layout is not sane for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (IA_ISREG (fd->inode->ia_type)) {
+ /* in the regular file _cbk(), we need to check for
+ migration possibilities */
+ local->rebalance.stbuf = *stbuf;
+ local->rebalance.flags = valid;
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+
+ STACK_WIND (frame, dht_file_setattr_cbk, subvol,
+ subvol->fops->fsetattr,
+ fd, stbuf, valid, xdata);
+
+ return 0;
+ }
+
+ local->call_cnt = call_cnt = layout->cnt;
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND (frame, dht_setattr_cbk,
+ layout->list[i].xlator,
+ layout->list[i].xlator->fops->fsetattr,
+ fd, stbuf, valid, xdata);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
diff --git a/xlators/cluster/dht/src/dht-layout.c b/xlators/cluster/dht/src/dht-layout.c
index f2e3de14b..38e9970a7 100644
--- a/xlators/cluster/dht/src/dht-layout.c
+++ b/xlators/cluster/dht/src/dht-layout.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
@@ -38,583 +29,763 @@
dht_layout_t *
dht_layout_new (xlator_t *this, int cnt)
{
- dht_layout_t *layout = NULL;
+ dht_layout_t *layout = NULL;
dht_conf_t *conf = NULL;
conf = this->private;
- layout = CALLOC (1, layout_size (cnt));
- if (!layout) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto out;
- }
+ layout = GF_CALLOC (1, layout_size (cnt),
+ gf_dht_mt_dht_layout_t);
+ if (!layout) {
+ goto out;
+ }
- layout->cnt = cnt;
- if (conf)
+ layout->type = DHT_HASH_TYPE_DM;
+ layout->cnt = cnt;
+
+ if (conf) {
+ layout->spread_cnt = conf->dir_spread_cnt;
layout->gen = conf->gen;
+ }
+
+ layout->ref = 1;
out:
- return layout;
+ return layout;
}
dht_layout_t *
dht_layout_get (xlator_t *this, inode_t *inode)
{
- uint64_t layout = 0;
- int ret = -1;
+ dht_conf_t *conf = NULL;
+ dht_layout_t *layout = NULL;
- ret = inode_ctx_get (inode, this, &layout);
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ LOCK (&conf->layout_lock);
+ {
+ dht_inode_ctx_layout_get (inode, this, &layout);
+ if (layout) {
+ layout->ref++;
+ }
+ }
+ UNLOCK (&conf->layout_lock);
- return (dht_layout_t *)(long)layout;
+out:
+ return layout;
}
-xlator_t *
-dht_layout_search (xlator_t *this, dht_layout_t *layout, const char *name)
+int
+dht_layout_set (xlator_t *this, inode_t *inode, dht_layout_t *layout)
{
- uint32_t hash = 0;
- xlator_t *subvol = NULL;
- int i = 0;
- int ret = 0;
+ dht_conf_t *conf = NULL;
+ int oldret = -1;
+ int ret = 0;
+ dht_layout_t *old_layout;
+ conf = this->private;
+ if (!conf)
+ goto out;
- ret = dht_hash_compute (layout->type, name, &hash);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "hash computation failed for type=%d name=%s",
- layout->type, name);
- goto out;
- }
+ LOCK (&conf->layout_lock);
+ {
+ oldret = dht_inode_ctx_layout_get (inode, this, &old_layout);
+ layout->ref++;
+ dht_inode_ctx_layout_set (inode, this, layout);
+ }
+ UNLOCK (&conf->layout_lock);
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].start <= hash
- && layout->list[i].stop >= hash) {
- subvol = layout->list[i].xlator;
- break;
- }
- }
-
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume for hash (value) = %u", hash);
- }
+ if (!oldret) {
+ dht_layout_unref (this, old_layout);
+ }
out:
- return subvol;
+ return ret;
}
-dht_layout_t *
-dht_layout_for_subvol (xlator_t *this, xlator_t *subvol)
+void
+dht_layout_unref (xlator_t *this, dht_layout_t *layout)
{
- dht_conf_t *conf = NULL;
- dht_layout_t *layout = NULL;
- int i = 0;
+ dht_conf_t *conf = NULL;
+ int ref = 0;
+ if (!layout || layout->preset || !this->private)
+ return;
- conf = this->private;
+ conf = this->private;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (conf->subvolumes[i] == subvol) {
- layout = conf->file_layouts[i];
- break;
- }
- }
+ LOCK (&conf->layout_lock);
+ {
+ ref = --layout->ref;
+ }
+ UNLOCK (&conf->layout_lock);
- return layout;
+ if (!ref)
+ GF_FREE (layout);
}
-int
-dht_layouts_init (xlator_t *this, dht_conf_t *conf)
+dht_layout_t *
+dht_layout_ref (xlator_t *this, dht_layout_t *layout)
{
- dht_layout_t *layout = NULL;
- int i = 0;
- int ret = -1;
-
-
- conf->file_layouts = CALLOC (conf->subvolume_cnt,
- sizeof (dht_layout_t *));
- if (!conf->file_layouts) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto out;
- }
+ dht_conf_t *conf = NULL;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- layout = dht_layout_new (this, 1);
+ if (layout->preset || !this->private)
+ return layout;
- if (!layout) {
- goto out;
- }
+ conf = this->private;
+ LOCK (&conf->layout_lock);
+ {
+ layout->ref++;
+ }
+ UNLOCK (&conf->layout_lock);
- layout->preset = 1;
+ return layout;
+}
- layout->list[0].xlator = conf->subvolumes[i];
- conf->file_layouts[i] = layout;
- }
+xlator_t *
+dht_layout_search (xlator_t *this, dht_layout_t *layout, const char *name)
+{
+ uint32_t hash = 0;
+ xlator_t *subvol = NULL;
+ int i = 0;
+ int ret = 0;
+
+
+ ret = dht_hash_compute (this, layout->type, name, &hash);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "hash computation failed for type=%d name=%s",
+ layout->type, name);
+ goto out;
+ }
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].start <= hash
+ && layout->list[i].stop >= hash) {
+ subvol = layout->list[i].xlator;
+ break;
+ }
+ }
+
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "no subvolume for hash (value) = %u", hash);
+ }
- ret = 0;
out:
- return ret;
+ return subvol;
}
-int
-dht_disk_layout_extract (xlator_t *this, dht_layout_t *layout,
- int pos, int32_t **disk_layout_p)
+dht_layout_t *
+dht_layout_for_subvol (xlator_t *this, xlator_t *subvol)
{
- int ret = -1;
- int32_t *disk_layout = NULL;
-
- disk_layout = CALLOC (5, sizeof (int));
- if (!disk_layout) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto out;
- }
+ dht_conf_t *conf = NULL;
+ dht_layout_t *layout = NULL;
+ int i = 0;
- disk_layout[0] = hton32 (1);
- disk_layout[1] = hton32 (layout->type);
- disk_layout[2] = hton32 (layout->list[pos].start);
- disk_layout[3] = hton32 (layout->list[pos].stop);
+ conf = this->private;
+ if (!conf)
+ goto out;
- if (disk_layout_p)
- *disk_layout_p = disk_layout;
- ret = 0;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == subvol) {
+ layout = conf->file_layouts[i];
+ break;
+ }
+ }
out:
- return ret;
+ return layout;
}
int
-dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout,
- int pos, int32_t *disk_layout)
+dht_layouts_init (xlator_t *this, dht_conf_t *conf)
{
- int cnt = 0;
- int type = 0;
- int start_off = 0;
- int stop_off = 0;
+ dht_layout_t *layout = NULL;
+ int i = 0;
+ int ret = -1;
- /* TODO: assert disk_layout_ptr is of required length */
+ if (!conf)
+ goto out;
- cnt = ntoh32 (disk_layout[0]);
- if (cnt != 1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "disk layout has invalid count %d", cnt);
- return -1;
- }
+ conf->file_layouts = GF_CALLOC (conf->subvolume_cnt,
+ sizeof (dht_layout_t *),
+ gf_dht_mt_dht_layout_t);
+ if (!conf->file_layouts) {
+ goto out;
+ }
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ layout = dht_layout_new (this, 1);
+
+ if (!layout) {
+ goto out;
+ }
- /* TODO: assert type is compatible */
- type = ntoh32 (disk_layout[1]);
- start_off = ntoh32 (disk_layout[2]);
- stop_off = ntoh32 (disk_layout[3]);
+ layout->preset = 1;
- layout->list[pos].start = start_off;
- layout->list[pos].stop = stop_off;
+ layout->list[0].xlator = conf->subvolumes[i];
- gf_log (this->name, GF_LOG_TRACE,
- "merged to layout: %u - %u (type %d) from %s",
- start_off, stop_off, type,
- layout->list[pos].xlator->name);
+ conf->file_layouts[i] = layout;
+ }
- return 0;
+ ret = 0;
+out:
+ return ret;
}
int
-dht_layout_merge (xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
- int op_ret, int op_errno, dict_t *xattr)
+dht_disk_layout_extract (xlator_t *this, dht_layout_t *layout,
+ int pos, int32_t **disk_layout_p)
{
- int i = 0;
- int ret = -1;
- int err = -1;
- int32_t *disk_layout = NULL;
+ int ret = -1;
+ int32_t *disk_layout = NULL;
+ disk_layout = GF_CALLOC (5, sizeof (int),
+ gf_dht_mt_int32_t);
+ if (!disk_layout) {
+ goto out;
+ }
- if (op_ret != 0) {
- err = op_errno;
- }
+ disk_layout[0] = hton32 (1);
+ disk_layout[1] = hton32 (layout->type);
+ disk_layout[2] = hton32 (layout->list[pos].start);
+ disk_layout[3] = hton32 (layout->list[pos].stop);
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].xlator == NULL) {
- layout->list[i].err = err;
- layout->list[i].xlator = subvol;
- break;
- }
- }
+ if (disk_layout_p)
+ *disk_layout_p = disk_layout;
+ else
+ GF_FREE (disk_layout);
- if (op_ret != 0) {
- ret = 0;
- goto out;
- }
+ ret = 0;
+
+out:
+ return ret;
+}
- if (xattr) {
- /* during lookup and not mkdir */
- ret = dict_get_ptr (xattr, "trusted.glusterfs.dht",
- VOID(&disk_layout));
- }
- if (ret != 0) {
- layout->list[i].err = -1;
- gf_log (this->name, GF_LOG_TRACE,
- "missing disk layout on %s. err = %d",
- subvol->name, err);
- ret = 0;
- goto out;
+int
+dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout,
+ int pos, void *disk_layout_raw, int disk_layout_len)
+{
+ int cnt = 0;
+ int type = 0;
+ int start_off = 0;
+ int stop_off = 0;
+ int disk_layout[4];
+
+ if (!disk_layout_raw) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "error no layout on disk for merge");
+ return -1;
}
- ret = dht_disk_layout_merge (this, layout, i, disk_layout);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "layout merge from subvolume %s failed",
- subvol->name);
- goto out;
+ GF_ASSERT (disk_layout_len == sizeof (disk_layout));
+
+ memcpy (disk_layout, disk_layout_raw, disk_layout_len);
+
+ cnt = ntoh32 (disk_layout[0]);
+ if (cnt != 1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "disk layout has invalid count %d", cnt);
+ return -1;
+ }
+
+ type = ntoh32 (disk_layout[1]);
+ switch (type) {
+ case DHT_HASH_TYPE_DM_USER:
+ gf_log (this->name, GF_LOG_DEBUG, "found user-set layout");
+ layout->type = type;
+ /* Fall through. */
+ case DHT_HASH_TYPE_DM:
+ break;
+ default:
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Catastrophic error layout with unknown type found %d",
+ disk_layout[1]);
+ return -1;
}
- layout->list[i].err = 0;
+
+ start_off = ntoh32 (disk_layout[2]);
+ stop_off = ntoh32 (disk_layout[3]);
+
+ layout->list[pos].start = start_off;
+ layout->list[pos].stop = stop_off;
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "merged to layout: %u - %u (type %d) from %s",
+ start_off, stop_off, type,
+ layout->list[pos].xlator->name);
+
+ return 0;
+}
+
+
+int
+dht_layout_merge (xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
+ int op_ret, int op_errno, dict_t *xattr)
+{
+ int i = 0;
+ int ret = -1;
+ int err = -1;
+ void *disk_layout_raw = NULL;
+ int disk_layout_len = 0;
+ dht_conf_t *conf = this->private;
+
+ if (op_ret != 0) {
+ err = op_errno;
+ }
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].xlator == NULL) {
+ layout->list[i].err = err;
+ layout->list[i].xlator = subvol;
+ break;
+ }
+ }
+
+ if (op_ret != 0) {
+ ret = 0;
+ goto out;
+ }
+
+ if (xattr) {
+ /* during lookup and not mkdir */
+ ret = dict_get_ptr_and_len (xattr, conf->xattr_name,
+ &disk_layout_raw, &disk_layout_len);
+ }
+
+ if (ret != 0) {
+ layout->list[i].err = 0;
+ gf_log (this->name, GF_LOG_TRACE,
+ "missing disk layout on %s. err = %d",
+ subvol->name, err);
+ ret = 0;
+ goto out;
+ }
+
+ ret = dht_disk_layout_merge (this, layout, i, disk_layout_raw,
+ disk_layout_len);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "layout merge from subvolume %s failed",
+ subvol->name);
+ goto out;
+ }
+ layout->list[i].err = 0;
out:
- return ret;
+ return ret;
}
void
dht_layout_entry_swap (dht_layout_t *layout, int i, int j)
{
- uint32_t start_swap = 0;
- uint32_t stop_swap = 0;
- xlator_t *xlator_swap = 0;
- int err_swap = 0;
-
-
- start_swap = layout->list[i].start;
- stop_swap = layout->list[i].stop;
- xlator_swap = layout->list[i].xlator;
- err_swap = layout->list[i].err;
-
- layout->list[i].start = layout->list[j].start;
- layout->list[i].stop = layout->list[j].stop;
- layout->list[i].xlator = layout->list[j].xlator;
- layout->list[i].err = layout->list[j].err;
-
- layout->list[j].start = start_swap;
- layout->list[j].stop = stop_swap;
- layout->list[j].xlator = xlator_swap;
- layout->list[j].err = err_swap;
+ uint32_t start_swap = 0;
+ uint32_t stop_swap = 0;
+ xlator_t *xlator_swap = 0;
+ int err_swap = 0;
+
+ start_swap = layout->list[i].start;
+ stop_swap = layout->list[i].stop;
+ xlator_swap = layout->list[i].xlator;
+ err_swap = layout->list[i].err;
+
+ layout->list[i].start = layout->list[j].start;
+ layout->list[i].stop = layout->list[j].stop;
+ layout->list[i].xlator = layout->list[j].xlator;
+ layout->list[i].err = layout->list[j].err;
+
+ layout->list[j].start = start_swap;
+ layout->list[j].stop = stop_swap;
+ layout->list[j].xlator = xlator_swap;
+ layout->list[j].err = err_swap;
+}
+
+void
+dht_layout_range_swap (dht_layout_t *layout, int i, int j)
+{
+ uint32_t start_swap = 0;
+ uint32_t stop_swap = 0;
+
+ start_swap = layout->list[i].start;
+ stop_swap = layout->list[i].stop;
+
+ layout->list[i].start = layout->list[j].start;
+ layout->list[i].stop = layout->list[j].stop;
+
+ layout->list[j].start = start_swap;
+ layout->list[j].stop = stop_swap;
}
int64_t
dht_layout_entry_cmp_volname (dht_layout_t *layout, int i, int j)
{
- return (strcmp (layout->list[i].xlator->name,
- layout->list[j].xlator->name));
+ return (strcmp (layout->list[i].xlator->name,
+ layout->list[j].xlator->name));
+}
+
+
+gf_boolean_t
+dht_is_subvol_in_layout (dht_layout_t *layout, xlator_t *xlator)
+{
+ int i = 0;
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (!strcmp (layout->list[i].xlator->name, xlator->name))
+ return _gf_true;
+ }
+ return _gf_false;
}
int64_t
dht_layout_entry_cmp (dht_layout_t *layout, int i, int j)
{
- int64_t diff = 0;
-
- if (layout->list[i].err || layout->list[j].err)
- diff = layout->list[i].err - layout->list[j].err;
- else
- diff = (int64_t) layout->list[i].start
- - (int64_t) layout->list[j].start;
+ int64_t diff = 0;
+
+ /* swap zero'ed out layouts to front, if needed */
+ if (!layout->list[j].start && !layout->list[j].stop) {
+ diff = (int64_t) layout->list[i].stop
+ - (int64_t) layout->list[j].stop;
+ goto out;
+ }
+ if (layout->list[i].err || layout->list[j].err)
+ diff = layout->list[i].err - layout->list[j].err;
+ else
+ diff = (int64_t) layout->list[i].start
+ - (int64_t) layout->list[j].start;
- return diff;
+out:
+ return diff;
}
int
dht_layout_sort (dht_layout_t *layout)
{
- int i = 0;
- int j = 0;
- int64_t ret = 0;
-
- /* TODO: O(n^2) -- bad bad */
-
- for (i = 0; i < layout->cnt - 1; i++) {
- for (j = i + 1; j < layout->cnt; j++) {
- ret = dht_layout_entry_cmp (layout, i, j);
- if (ret > 0)
- dht_layout_entry_swap (layout, i, j);
- }
- }
+ int i = 0;
+ int j = 0;
+ int64_t ret = 0;
- return 0;
+ /* TODO: O(n^2) -- bad bad */
+
+ for (i = 0; i < layout->cnt - 1; i++) {
+ for (j = i + 1; j < layout->cnt; j++) {
+ ret = dht_layout_entry_cmp (layout, i, j);
+ if (ret > 0)
+ dht_layout_entry_swap (layout, i, j);
+ }
+ }
+
+ return 0;
}
int
dht_layout_sort_volname (dht_layout_t *layout)
{
- int i = 0;
- int j = 0;
- int64_t ret = 0;
-
- /* TODO: O(n^2) -- bad bad */
-
- for (i = 0; i < layout->cnt - 1; i++) {
- for (j = i + 1; j < layout->cnt; j++) {
- ret = dht_layout_entry_cmp_volname (layout, i, j);
- if (ret > 0)
- dht_layout_entry_swap (layout, i, j);
- }
- }
+ int i = 0;
+ int j = 0;
+ int64_t ret = 0;
+
+ /* TODO: O(n^2) -- bad bad */
- return 0;
+ for (i = 0; i < layout->cnt - 1; i++) {
+ for (j = i + 1; j < layout->cnt; j++) {
+ ret = dht_layout_entry_cmp_volname (layout, i, j);
+ if (ret > 0)
+ dht_layout_entry_swap (layout, i, j);
+ }
+ }
+
+ return 0;
}
int
dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout,
- uint32_t *holes_p, uint32_t *overlaps_p,
- uint32_t *missing_p, uint32_t *down_p, uint32_t *misc_p)
+ uint32_t *holes_p, uint32_t *overlaps_p,
+ uint32_t *missing_p, uint32_t *down_p, uint32_t *misc_p,
+ uint32_t *no_space_p)
{
- dht_conf_t *conf = NULL;
- uint32_t holes = 0;
- uint32_t overlaps = 0;
- uint32_t missing = 0;
- uint32_t down = 0;
- uint32_t misc = 0;
- uint32_t hole_cnt = 0;
- uint32_t overlap_cnt = 0;
- int i = 0;
- int ret = 0;
- uint32_t prev_stop = 0;
- uint32_t last_stop = 0;
- char is_virgin = 1;
-
-
- conf = this->private;
-
- /* TODO: explain WTF is happening */
-
- last_stop = layout->list[0].start - 1;
- prev_stop = last_stop;
-
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].err) {
- switch (layout->list[i].err) {
- case -1:
- case ENOENT:
- missing++;
- break;
- case ENOTCONN:
- down++;
- break;
- case ENOSPC:
- down++;
- break;
- default:
- misc++;
- }
- continue;
- }
-
- is_virgin = 0;
-
- if ((prev_stop + 1) < layout->list[i].start) {
- hole_cnt++;
- holes += (layout->list[i].start - (prev_stop + 1));
- }
-
- if ((prev_stop + 1) > layout->list[i].start) {
- overlap_cnt++;
- overlaps += ((prev_stop + 1) - layout->list[i].start);
- }
- prev_stop = layout->list[i].stop;
- }
+ uint32_t overlaps = 0;
+ uint32_t missing = 0;
+ uint32_t down = 0;
+ uint32_t misc = 0;
+ uint32_t hole_cnt = 0;
+ uint32_t overlap_cnt = 0;
+ int i = 0;
+ int ret = 0;
+ uint32_t prev_stop = 0;
+ uint32_t last_stop = 0;
+ char is_virgin = 1;
+ uint32_t no_space = 0;
+
+ /* TODO: explain what is happening */
+
+ last_stop = layout->list[0].start - 1;
+ prev_stop = last_stop;
+
+ for (i = 0; i < layout->cnt; i++) {
+ switch (layout->list[i].err) {
+ case -1:
+ case ENOENT:
+ missing++;
+ continue;
+ case ENOTCONN:
+ down++;
+ continue;
+ case ENOSPC:
+ no_space++;
+ continue;
+ case 0:
+ /* if err == 0 and start == stop, then it is a non misc++;
+ * participating subvolume(spread-cnt). Then, do not
+ * check for anomalies. If start != stop, then treat it
+ * as misc err */
+ if (layout->list[i].start == layout->list[i].stop) {
+ continue;
+ }
+ break;
+ default:
+ misc++;
+ continue;
+ }
+
+ is_virgin = 0;
+
+ if ((prev_stop + 1) < layout->list[i].start) {
+ hole_cnt++;
+ }
- if ((last_stop - prev_stop) || is_virgin)
- hole_cnt++;
- holes += (last_stop - prev_stop);
+ if ((prev_stop + 1) > layout->list[i].start) {
+ overlap_cnt++;
+ overlaps += ((prev_stop + 1) - layout->list[i].start);
+ }
+ prev_stop = layout->list[i].stop;
+ }
+
+ if ((last_stop - prev_stop) || is_virgin)
+ hole_cnt++;
+
+ if (holes_p)
+ *holes_p = hole_cnt;
- if (holes_p)
- *holes_p = hole_cnt;
+ if (overlaps_p)
+ *overlaps_p = overlap_cnt;
- if (overlaps_p)
- *overlaps_p = overlap_cnt;
+ if (missing_p)
+ *missing_p = missing;
- if (missing_p)
- *missing_p = missing;
+ if (down_p)
+ *down_p = down;
- if (down_p)
- *down_p = down;
+ if (misc_p)
+ *misc_p = misc;
- if (misc_p)
- *misc_p = misc;
+ if (no_space_p)
+ *no_space_p = no_space;
- return ret;
+ return ret;
}
int
dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout)
{
- int ret = 0;
- int i = 0;
- uint32_t holes = 0;
- uint32_t overlaps = 0;
- uint32_t missing = 0;
- uint32_t down = 0;
- uint32_t misc = 0;
-
-
- ret = dht_layout_sort (layout);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "sort failed?! how the ....");
- goto out;
- }
-
- ret = dht_layout_anomalies (this, loc, layout,
- &holes, &overlaps,
- &missing, &down, &misc);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "error while finding anomalies in %s -- not good news",
- loc->path);
- goto out;
- }
-
- if (holes || overlaps) {
- if (missing == layout->cnt) {
- gf_log (this->name, GF_LOG_DEBUG,
- "directory %s looked up first time",
- loc->path);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "found anomalies in %s. holes=%d overlaps=%d",
- loc->path, holes, overlaps);
- }
- ret = 1;
- }
+ int ret = 0;
+ int i = 0;
+ uint32_t holes = 0;
+ uint32_t overlaps = 0;
+ uint32_t missing = 0;
+ uint32_t down = 0;
+ uint32_t misc = 0;
+
+ ret = dht_layout_sort (layout);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "sort failed?! how the ....");
+ goto out;
+ }
+
+ ret = dht_layout_anomalies (this, loc, layout,
+ &holes, &overlaps,
+ &missing, &down, &misc, NULL);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "error while finding anomalies in %s -- not good news",
+ loc->path);
+ goto out;
+ }
+
+ if (holes || overlaps) {
+ if (missing == layout->cnt) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "directory %s looked up first time",
+ loc->path);
+ } else {
+ gf_log (this->name, GF_LOG_INFO,
+ "found anomalies in %s. holes=%d overlaps=%d",
+ loc->path, holes, overlaps);
+ }
+ ret = -1;
+ }
+
+ for (i = 0; i < layout->cnt; i++) {
+ /* TODO During DHT selfheal rewrite (almost) find a better place
+ * to detect this - probably in dht_layout_anomalies()
+ */
+ if (layout->list[i].err > 0) {
+ gf_log_callingfn (this->name, GF_LOG_DEBUG,
+ "path=%s err=%s on subvol=%s",
+ loc->path,
+ strerror (layout->list[i].err),
+ (layout->list[i].xlator ?
+ layout->list[i].xlator->name
+ : "<>"));
+ if ((layout->list[i].err == ENOENT) && (ret >= 0)) {
+ ret++;
+ }
+ }
+ }
- for (i = 0; i < layout->cnt; i++) {
- /* TODO During DHT selfheal rewrite (almost) find a better place to
- * detect this - probably in dht_layout_anomalies()
- */
- if (layout->list[i].err > 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "path=%s err=%s on subvol=%s",
- loc->path, strerror (layout->list[i].err),
- (layout->list[i].xlator ?
- layout->list[i].xlator->name : "<>"));
- if (layout->list[i].err == ENOENT)
- ret = 1;
- }
- }
out:
- return ret;
+ return ret;
}
+int
+dht_dir_has_layout (dict_t *xattr, char *name)
+{
+
+ void *disk_layout_raw = NULL;
+
+ return dict_get_ptr (xattr, name, &disk_layout_raw);
+}
int
dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
- loc_t *loc, dict_t *xattr)
+ loc_t *loc, dict_t *xattr)
{
- int idx = 0;
- int pos = -1;
- int ret = 0;
- int err = 0;
- int dict_ret = 0;
- int32_t *disk_layout = NULL;
- int32_t count = -1;
- uint32_t start_off = -1;
- uint32_t stop_off = -1;
-
-
- for (idx = 0; idx < layout->cnt; idx++) {
- if (layout->list[idx].xlator == subvol) {
- pos = idx;
- break;
- }
- }
-
- if (pos == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s - no layout info for subvolume %s",
- loc->path, subvol->name);
- ret = 1;
- goto out;
- }
+ int idx = 0;
+ int pos = -1;
+ int ret = 0;
+ int err = 0;
+ int dict_ret = 0;
+ int32_t disk_layout[4];
+ void *disk_layout_raw = NULL;
+ int32_t count = -1;
+ uint32_t start_off = -1;
+ uint32_t stop_off = -1;
+ dht_conf_t *conf = this->private;
+
+
+ for (idx = 0; idx < layout->cnt; idx++) {
+ if (layout->list[idx].xlator == subvol) {
+ pos = idx;
+ break;
+ }
+ }
+
+ if (pos == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s - no layout info for subvolume %s",
+ loc->path, subvol->name);
+ ret = 1;
+ goto out;
+ }
err = layout->list[pos].err;
- if (!xattr) {
+ if (!xattr) {
if (err == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_INFO,
"%s - xattr dictionary is NULL",
loc->path);
ret = -1;
}
- goto out;
- }
+ goto out;
+ }
+
+ dict_ret = dict_get_ptr (xattr, conf->xattr_name,
+ &disk_layout_raw);
- dict_ret = dict_get_ptr (xattr, "trusted.glusterfs.dht",
- VOID(&disk_layout));
-
- if (dict_ret < 0) {
+ if (dict_ret < 0) {
if (err == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_INFO,
"%s - disk layout missing", loc->path);
ret = -1;
}
- goto out;
- }
-
- count = ntoh32 (disk_layout[0]);
- if (count != 1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s - disk layout has invalid count %d",
- loc->path, count);
- ret = -1;
- goto out;
- }
-
- start_off = ntoh32 (disk_layout[2]);
- stop_off = ntoh32 (disk_layout[3]);
-
- if ((layout->list[pos].start != start_off)
- || (layout->list[pos].stop != stop_off)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "subvol: %s; inode layout - %"PRId32" - %"PRId32"; "
- "disk layout - %"PRId32" - %"PRId32,
- layout->list[pos].xlator->name,
- layout->list[pos].start, layout->list[pos].stop,
- start_off, stop_off);
- ret = 1;
- } else {
- ret = 0;
- }
+ goto out;
+ }
+
+ memcpy (disk_layout, disk_layout_raw, sizeof (disk_layout));
+
+ count = ntoh32 (disk_layout[0]);
+ if (count != 1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s - disk layout has invalid count %d",
+ loc->path, count);
+ ret = -1;
+ goto out;
+ }
+
+ start_off = ntoh32 (disk_layout[2]);
+ stop_off = ntoh32 (disk_layout[3]);
+
+ if ((layout->list[pos].start != start_off)
+ || (layout->list[pos].stop != stop_off)) {
+ gf_log (this->name, GF_LOG_INFO,
+ "subvol: %s; inode layout - %"PRIu32" - %"PRIu32"; "
+ "disk layout - %"PRIu32" - %"PRIu32,
+ layout->list[pos].xlator->name,
+ layout->list[pos].start, layout->list[pos].stop,
+ start_off, stop_off);
+ ret = 1;
+ } else {
+ ret = 0;
+ }
out:
- return ret;
+ return ret;
}
int
-dht_layout_inode_set (xlator_t *this, xlator_t *subvol, inode_t *inode)
+dht_layout_preset (xlator_t *this, xlator_t *subvol, inode_t *inode)
{
dht_layout_t *layout = NULL;
- int ret = -1;
-
- layout = dht_layout_for_subvol (this, subvol);
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no pre-set layout for subvolume %s",
- subvol ? subvol->name : "<nil>");
- ret = -1;
- goto out;
- }
+ int ret = -1;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ layout = dht_layout_for_subvol (this, subvol);
+ if (!layout) {
+ gf_log (this->name, GF_LOG_INFO,
+ "no pre-set layout for subvolume %s",
+ subvol ? subvol->name : "<nil>");
+ ret = -1;
+ goto out;
+ }
+
+ LOCK (&conf->layout_lock);
+ {
+ dht_inode_ctx_layout_set (inode, this, layout);
+ }
+ UNLOCK (&conf->layout_lock);
- inode_ctx_put (inode, this, (uint64_t)(long)layout);
-
ret = 0;
out:
return ret;
diff --git a/xlators/cluster/dht/src/dht-linkfile.c b/xlators/cluster/dht/src/dht-linkfile.c
index 213d3f2bf..dbc9d0b3c 100644
--- a/xlators/cluster/dht/src/dht-linkfile.c
+++ b/xlators/cluster/dht/src/dht-linkfile.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
@@ -28,197 +19,310 @@
#include "compat.h"
#include "dht-common.h"
-
-
int
-dht_linkfile_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno)
+dht_linkfile_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent)
{
- dht_local_t *local = NULL;
-
-
- local = frame->local;
- local->linkfile.linkfile_cbk (frame, cookie, this, op_ret, op_errno,
- local->linkfile.inode,
- &local->linkfile.stbuf);
-
- return 0;
+ char is_linkfile = 0;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+
+ if (op_ret)
+ goto out;
+
+ is_linkfile = check_is_linkfile (inode, stbuf, xattr,
+ conf->link_xattr_name);
+ if (!is_linkfile)
+ gf_log (this->name, GF_LOG_WARNING, "got non-linkfile %s:%s",
+ prev->this->name, local->loc.path);
+out:
+ local->linkfile.linkfile_cbk (frame, cookie, this, op_ret, op_errno,
+ inode, stbuf, postparent, postparent,
+ xattr);
+ return 0;
}
-
+#define is_equal(a, b) (a == b)
int
dht_linkfile_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct stat *stbuf)
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- dict_t *xattr = NULL;
- data_t *str_data = NULL;
- int ret = -1;
-
- local = frame->local;
- prev = cookie;
-
- if (op_ret == -1)
- goto err;
-
- xattr = get_new_dict ();
- if (!xattr) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_errno = ENOMEM;
- goto err;
- }
-
- local->linkfile.xattr = dict_ref (xattr);
- local->linkfile.inode = inode_ref (inode);
-
- str_data = str_to_data (local->linkfile.srcvol->name);
- if (!str_data) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_errno = ENOMEM;
- goto err;
- }
-
- ret = dict_set (xattr, "trusted.glusterfs.dht.linkto", str_data);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to initialize linkfile data");
- op_errno = EINVAL;
- }
- str_data = NULL;
-
- local->linkfile.stbuf = *stbuf;
-
- STACK_WIND (frame, dht_linkfile_xattr_cbk,
- prev->this, prev->this->fops->setxattr,
- &local->linkfile.loc, local->linkfile.xattr, 0);
-
- return 0;
-
-err:
- if (str_data) {
- data_destroy (str_data);
- str_data = NULL;
- }
-
- local->linkfile.linkfile_cbk (frame, cookie, this,
- op_ret, op_errno, inode, stbuf);
- return 0;
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ call_frame_t *prev = NULL;
+ dict_t *xattrs = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+
+ local = frame->local;
+
+ if (!op_ret)
+ local->linked = _gf_true;
+
+ FRAME_SU_UNDO (frame, dht_local_t);
+
+ if (op_ret && (op_errno == EEXIST)) {
+ conf = this->private;
+ prev = cookie;
+ subvol = prev->this;
+ if (!subvol)
+ goto out;
+ xattrs = dict_new ();
+ if (!xattrs)
+ goto out;
+ ret = dict_set_uint32 (xattrs, conf->link_xattr_name, 256);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set linkto key");
+ goto out;
+ }
+
+ STACK_WIND (frame, dht_linkfile_lookup_cbk, subvol,
+ subvol->fops->lookup, &local->loc, xattrs);
+ if (xattrs)
+ dict_unref (xattrs);
+ return 0;
+ }
+out:
+ local->linkfile.linkfile_cbk (frame, cookie, this, op_ret, op_errno,
+ inode, stbuf, preparent, postparent,
+ xdata);
+ if (xattrs)
+ dict_unref (xattrs);
+ return 0;
}
int
dht_linkfile_create (call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk,
- xlator_t *tovol, xlator_t *fromvol, loc_t *loc)
+ xlator_t *this,
+ xlator_t *tovol, xlator_t *fromvol, loc_t *loc)
{
- dht_local_t *local = NULL;
-
-
- local = frame->local;
- local->linkfile.linkfile_cbk = linkfile_cbk;
- local->linkfile.srcvol = tovol;
- loc_copy (&local->linkfile.loc, loc);
+ dht_local_t *local = NULL;
+ dict_t *dict = NULL;
+ int need_unref = 0;
+ int ret = 0;
+ dht_conf_t *conf = this->private;
+
+ local = frame->local;
+ local->linkfile.linkfile_cbk = linkfile_cbk;
+ local->linkfile.srcvol = tovol;
+
+ local->linked = _gf_false;
+
+ dict = local->params;
+ if (!dict) {
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+ need_unref = 1;
+ }
+
+ if (!uuid_is_null (local->gfid)) {
+ ret = dict_set_static_bin (dict, "gfid-req", local->gfid, 16);
+ if (ret)
+ gf_log ("dht-linkfile", GF_LOG_INFO,
+ "%s: gfid set failed", loc->path);
+ }
+
+ ret = dict_set_str (dict, GLUSTERFS_INTERNAL_FOP_KEY, "yes");
+ if (ret)
+ gf_log ("dht-linkfile", GF_LOG_INFO,
+ "%s: internal-fop set failed", loc->path);
+
+ ret = dict_set_str (dict, conf->link_xattr_name, tovol->name);
+
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_INFO,
+ "%s: failed to initialize linkfile data",
+ loc->path);
+ goto out;
+ }
+
+ local->link_subvol = fromvol;
+ /* Always create as root:root. dht_linkfile_attr_heal fixes the
+ * ownsership */
+ FRAME_SU_DO (frame, dht_local_t);
+ STACK_WIND (frame, dht_linkfile_create_cbk,
+ fromvol, fromvol->fops->mknod, loc,
+ S_IFREG | DHT_LINKFILE_MODE, 0, 0, dict);
+
+ if (need_unref && dict)
+ dict_unref (dict);
+
+ return 0;
+out:
+ local->linkfile.linkfile_cbk (frame, NULL, frame->this, -1, ENOMEM,
+ loc->inode, NULL, NULL, NULL, NULL);
- STACK_WIND (frame, dht_linkfile_create_cbk,
- fromvol, fromvol->fops->mknod, loc,
- S_IFREG | DHT_LINKFILE_MODE, 0);
+ if (need_unref && dict)
+ dict_unref (dict);
- return 0;
+ return 0;
}
int
dht_linkfile_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ xlator_t *subvol = NULL;
- local = frame->local;
- prev = cookie;
- subvol = prev->this;
+ local = frame->local;
+ prev = cookie;
+ subvol = prev->this;
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "unlinking linkfile %s on %s failed (%s)",
- local->loc.path, subvol->name, strerror (op_errno));
- }
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_INFO,
+ "unlinking linkfile %s on %s failed (%s)",
+ local->loc.path, subvol->name, strerror (op_errno));
+ }
- DHT_STACK_DESTROY (frame);
+ DHT_STACK_DESTROY (frame);
- return 0;
+ return 0;
}
int
dht_linkfile_unlink (call_frame_t *frame, xlator_t *this,
- xlator_t *subvol, loc_t *loc)
+ xlator_t *subvol, loc_t *loc)
{
- call_frame_t *unlink_frame = NULL;
- dht_local_t *unlink_local = NULL;
-
- unlink_frame = copy_frame (frame);
- if (!unlink_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- unlink_local = dht_local_init (unlink_frame);
- if (!unlink_local) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- loc_copy (&unlink_local->loc, loc);
-
- STACK_WIND (unlink_frame, dht_linkfile_unlink_cbk,
- subvol, subvol->fops->unlink,
- &unlink_local->loc);
-
- return 0;
+ call_frame_t *unlink_frame = NULL;
+ dht_local_t *unlink_local = NULL;
+
+ unlink_frame = copy_frame (frame);
+ if (!unlink_frame) {
+ goto err;
+ }
+
+ /* Using non-fop value here, as anyways, 'local->fop' is not used in
+ this particular case */
+ unlink_local = dht_local_init (unlink_frame, loc, NULL,
+ GF_FOP_MAXVALUE);
+ if (!unlink_local) {
+ goto err;
+ }
+
+ STACK_WIND (unlink_frame, dht_linkfile_unlink_cbk,
+ subvol, subvol->fops->unlink,
+ &unlink_local->loc, 0, NULL);
+
+ return 0;
err:
- if (unlink_frame)
- DHT_STACK_DESTROY (unlink_frame);
+ if (unlink_frame)
+ DHT_STACK_DESTROY (unlink_frame);
- return -1;
+ return -1;
}
xlator_t *
-dht_linkfile_subvol (xlator_t *this, inode_t *inode, struct stat *stbuf,
- dict_t *xattr)
+dht_linkfile_subvol (xlator_t *this, inode_t *inode, struct iatt *stbuf,
+ dict_t *xattr)
{
- dht_conf_t *conf = NULL;
- xlator_t *subvol = NULL;
- void *volname = NULL;
- int i = 0, ret = 0;
-
+ dht_conf_t *conf = NULL;
+ xlator_t *subvol = NULL;
+ void *volname = NULL;
+ int i = 0, ret = 0;
- conf = this->private;
+ conf = this->private;
- if (!xattr)
- goto out;
+ if (!xattr)
+ goto out;
- ret = dict_get_ptr (xattr, "trusted.glusterfs.dht.linkto", &volname);
+ ret = dict_get_ptr (xattr, conf->link_xattr_name, &volname);
- if ((-1 == ret) || !volname)
- goto out;
+ if ((-1 == ret) || !volname)
+ goto out;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (strcmp (conf->subvolumes[i]->name, (char *)volname) == 0) {
- subvol = conf->subvolumes[i];
- break;
- }
- }
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (strcmp (conf->subvolumes[i]->name, (char *)volname) == 0) {
+ subvol = conf->subvolumes[i];
+ break;
+ }
+ }
out:
- return subvol;
+ return subvol;
+}
+
+int
+dht_linkfile_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ loc_t *loc = NULL;
+
+ local = frame->local;
+ loc = &local->loc;
+
+ if (op_ret)
+ gf_log (this->name, GF_LOG_ERROR, "setattr of uid/gid on %s"
+ " :<gfid:%s> failed (%s)",
+ (loc->path? loc->path: "NULL"),
+ uuid_utoa(local->gfid), strerror(op_errno));
+
+ DHT_STACK_DESTROY (frame);
+
+ return 0;
}
+int
+dht_linkfile_attr_heal (call_frame_t *frame, xlator_t *this)
+{
+ int ret = -1;
+ call_frame_t *copy = NULL;
+ dht_local_t *local = NULL;
+ dht_local_t *copy_local = NULL;
+ xlator_t *subvol = NULL;
+ struct iatt stbuf = {0,};
+
+ local = frame->local;
+
+ GF_VALIDATE_OR_GOTO ("dht", local, out);
+ GF_VALIDATE_OR_GOTO ("dht", local->link_subvol, out);
+
+ if (local->stbuf.ia_type == IA_INVAL)
+ return 0;
+
+ uuid_copy (local->loc.gfid, local->stbuf.ia_gfid);
+ copy = copy_frame (frame);
+
+ if (!copy)
+ goto out;
+
+ copy_local = dht_local_init (copy, &local->loc, NULL, 0);
+
+ if (!copy_local)
+ goto out;
+
+ stbuf = local->stbuf;
+ subvol = local->link_subvol;
+
+ copy->local = copy_local;
+
+ FRAME_SU_DO (copy, dht_local_t);
+
+ STACK_WIND (copy, dht_linkfile_setattr_cbk, subvol,
+ subvol->fops->setattr, &copy_local->loc,
+ &stbuf, (GF_SET_ATTR_UID | GF_SET_ATTR_GID), NULL);
+ ret = 0;
+out:
+ return ret;
+}
diff --git a/xlators/cluster/dht/src/dht-mem-types.h b/xlators/cluster/dht/src/dht-mem-types.h
new file mode 100644
index 000000000..e893eb48f
--- /dev/null
+++ b/xlators/cluster/dht/src/dht-mem-types.h
@@ -0,0 +1,35 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef __DHT_MEM_TYPES_H__
+#define __DHT_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_dht_mem_types_ {
+ gf_dht_mt_dht_du_t = gf_common_mt_end + 1,
+ gf_dht_mt_dht_conf_t,
+ gf_dht_mt_char,
+ gf_dht_mt_int32_t,
+ gf_dht_mt_xlator_t,
+ gf_dht_mt_dht_layout_t,
+ gf_switch_mt_dht_conf_t,
+ gf_switch_mt_dht_du_t,
+ gf_switch_mt_switch_sched_array,
+ gf_switch_mt_switch_struct,
+ gf_dht_mt_subvol_time,
+ gf_dht_mt_loc_t,
+ gf_defrag_info_mt,
+ gf_dht_mt_inode_ctx_t,
+ gf_dht_mt_ctx_stat_time_t,
+ gf_dht_mt_end
+};
+#endif
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
new file mode 100644
index 000000000..bcb19f23e
--- /dev/null
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -0,0 +1,1815 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "dht-common.h"
+#include "xlator.h"
+#include <fnmatch.h>
+
+#define GF_DISK_SECTOR_SIZE 512
+#define DHT_REBALANCE_PID 4242 /* Change it if required */
+#define DHT_REBALANCE_BLKSIZE (128 * 1024)
+
+static int
+dht_write_with_holes (xlator_t *to, fd_t *fd, struct iovec *vec, int count,
+ int32_t size, off_t offset, struct iobref *iobref)
+{
+ int i = 0;
+ int ret = -1;
+ int start_idx = 0;
+ int tmp_offset = 0;
+ int write_needed = 0;
+ int buf_len = 0;
+ int size_pending = 0;
+ char *buf = NULL;
+
+ /* loop through each vector */
+ for (i = 0; i < count; i++) {
+ buf = vec[i].iov_base;
+ buf_len = vec[i].iov_len;
+
+ for (start_idx = 0; (start_idx + GF_DISK_SECTOR_SIZE) <= buf_len;
+ start_idx += GF_DISK_SECTOR_SIZE) {
+
+ if (mem_0filled (buf + start_idx, GF_DISK_SECTOR_SIZE) != 0) {
+ write_needed = 1;
+ continue;
+ }
+
+ if (write_needed) {
+ ret = syncop_write (to, fd, (buf + tmp_offset),
+ (start_idx - tmp_offset),
+ (offset + tmp_offset),
+ iobref, 0);
+ /* 'path' will be logged in calling function */
+ if (ret < 0) {
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to write (%s)",
+ strerror (errno));
+ goto out;
+ }
+
+ write_needed = 0;
+ }
+ tmp_offset = start_idx + GF_DISK_SECTOR_SIZE;
+ }
+
+ if ((start_idx < buf_len) || write_needed) {
+ /* This means, last chunk is not yet written.. write it */
+ ret = syncop_write (to, fd, (buf + tmp_offset),
+ (buf_len - tmp_offset),
+ (offset + tmp_offset), iobref, 0);
+ if (ret < 0) {
+ /* 'path' will be logged in calling function */
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to write (%s)",
+ strerror (errno));
+ goto out;
+ }
+ }
+
+ size_pending = (size - buf_len);
+ if (!size_pending)
+ break;
+ }
+
+ ret = size;
+out:
+ return ret;
+
+}
+
+int32_t
+gf_defrag_handle_hardlink (xlator_t *this, loc_t *loc, dict_t *xattrs,
+ struct iatt *stbuf)
+{
+ int32_t ret = -1;
+ xlator_t *cached_subvol = NULL;
+ xlator_t *hashed_subvol = NULL;
+ xlator_t *linkto_subvol = NULL;
+ data_t *data = NULL;
+ struct iatt iatt = {0,};
+ int32_t op_errno = 0;
+ dht_conf_t *conf = NULL;
+
+ GF_VALIDATE_OR_GOTO ("defrag", loc, out);
+ GF_VALIDATE_OR_GOTO ("defrag", loc->name, out);
+ GF_VALIDATE_OR_GOTO ("defrag", stbuf, out);
+ GF_VALIDATE_OR_GOTO ("defrag", this, out);
+ GF_VALIDATE_OR_GOTO ("defrag", xattrs, out);
+ GF_VALIDATE_OR_GOTO ("defrag", this->private, out);
+
+ conf = this->private;
+
+ if (uuid_is_null (loc->pargfid)) {
+ gf_log ("", GF_LOG_ERROR, "loc->pargfid is NULL for "
+ "%s", loc->path);
+ goto out;
+ }
+
+ if (uuid_is_null (loc->gfid)) {
+ gf_log ("", GF_LOG_ERROR, "loc->gfid is NULL for "
+ "%s", loc->path);
+ goto out;
+ }
+
+ cached_subvol = dht_subvol_get_cached (this, loc->inode);
+ if (!cached_subvol) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get cached subvol"
+ " for %s on %s", loc->name, this->name);
+ goto out;
+ }
+
+ hashed_subvol = dht_subvol_get_hashed (this, loc);
+ if (!hashed_subvol) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get hashed subvol"
+ " for %s on %s", loc->name, this->name);
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_INFO, "Attempting to migrate hardlink %s "
+ "with gfid %s from %s -> %s", loc->name, uuid_utoa (loc->gfid),
+ cached_subvol->name, hashed_subvol->name);
+ data = dict_get (xattrs, conf->link_xattr_name);
+ /* set linkto on cached -> hashed if not present, else link it */
+ if (!data) {
+ ret = dict_set_str (xattrs, conf->link_xattr_name,
+ hashed_subvol->name);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set "
+ "linkto xattr in dict for %s", loc->name);
+ goto out;
+ }
+
+ ret = syncop_setxattr (cached_subvol, loc, xattrs, 0);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Linkto setxattr "
+ "failed %s -> %s (%s)", cached_subvol->name,
+ loc->name, strerror (errno));
+ goto out;
+ }
+ goto out;
+ } else {
+ linkto_subvol = dht_linkfile_subvol (this, NULL, NULL, xattrs);
+ if (!linkto_subvol) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get "
+ "linkto subvol for %s", loc->name);
+ } else {
+ hashed_subvol = linkto_subvol;
+ }
+
+ ret = syncop_link (hashed_subvol, loc, loc);
+ if (ret) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR, "link of %s -> %s"
+ " failed on subvol %s (%s)", loc->name,
+ uuid_utoa(loc->gfid),
+ hashed_subvol->name, strerror (op_errno));
+ if (op_errno != EEXIST)
+ goto out;
+ }
+ }
+ ret = syncop_lookup (hashed_subvol, loc, NULL, &iatt, NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed lookup %s on %s (%s)"
+ , loc->name, hashed_subvol->name, strerror (errno));
+ goto out;
+ }
+
+ if (iatt.ia_nlink == stbuf->ia_nlink) {
+ ret = dht_migrate_file (this, loc, cached_subvol, hashed_subvol,
+ GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS);
+ if (ret)
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+
+static inline int
+__is_file_migratable (xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, dict_t *xattrs, int flags)
+{
+ int ret = -1;
+
+ if (IA_ISDIR (stbuf->ia_type)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: migrate-file called on directory", loc->path);
+ ret = -1;
+ goto out;
+ }
+
+ if (flags == GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS) {
+ ret = 0;
+ goto out;
+ }
+ if (stbuf->ia_nlink > 1) {
+ /* support for decomission */
+ if (flags == GF_DHT_MIGRATE_HARDLINK) {
+ ret = gf_defrag_handle_hardlink (this, loc,
+ xattrs, stbuf);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to migrate file with link",
+ loc->path);
+ }
+ } else {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: file has hardlinks", loc->path);
+ }
+ ret = ENOTSUP;
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+static inline int
+__dht_rebalance_create_dst_file (xlator_t *to, xlator_t *from, loc_t *loc, struct iatt *stbuf,
+ dict_t *dict, fd_t **dst_fd, dict_t *xattr)
+{
+ xlator_t *this = NULL;
+ int ret = -1;
+ fd_t *fd = NULL;
+ struct iatt new_stbuf = {0,};
+ dht_conf_t *conf = NULL;
+
+ this = THIS;
+ conf = this->private;
+
+ ret = dict_set_static_bin (dict, "gfid-req", stbuf->ia_gfid, 16);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to set gfid in dict for create", loc->path);
+ goto out;
+ }
+
+ ret = dict_set_str (dict, conf->link_xattr_name, from->name);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to set gfid in dict for create", loc->path);
+ goto out;
+ }
+
+ fd = fd_create (loc->inode, DHT_REBALANCE_PID);
+ if (!fd) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: fd create failed (destination) (%s)",
+ loc->path, strerror (errno));
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_lookup (to, loc, NULL, &new_stbuf, NULL, NULL);
+ if (!ret) {
+ /* File exits in the destination, check if gfid matches */
+ if (uuid_compare (stbuf->ia_gfid, new_stbuf.ia_gfid) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "file %s exits in %s with different gfid",
+ loc->path, to->name);
+ fd_unref (fd);
+ goto out;
+ }
+ }
+ if ((ret == -1) && (errno != ENOENT)) {
+ /* File exists in destination, but not accessible */
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "%s: failed to lookup file (%s)",
+ loc->path, strerror (errno));
+ goto out;
+ }
+
+ /* Create the destination with LINKFILE mode, and linkto xattr,
+ if the linkfile already exists, it will just open the file */
+ ret = syncop_create (to, loc, O_RDWR, DHT_LINKFILE_MODE, fd,
+ dict, &new_stbuf);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create %s on %s (%s)",
+ loc->path, to->name, strerror (errno));
+ goto out;
+ }
+
+ ret = syncop_fsetxattr (to, fd, xattr, 0);
+ if (ret == -1)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to set xattr on %s (%s)",
+ loc->path, to->name, strerror (errno));
+
+ ret = syncop_ftruncate (to, fd, stbuf->ia_size);
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_ERROR,
+ "ftruncate failed for %s on %s (%s)",
+ loc->path, to->name, strerror (errno));
+
+ ret = syncop_fsetattr (to, fd, stbuf,
+ (GF_SET_ATTR_UID | GF_SET_ATTR_GID),
+ NULL, NULL);
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_ERROR,
+ "chown failed for %s on %s (%s)",
+ loc->path, to->name, strerror (errno));
+
+ if (dst_fd)
+ *dst_fd = fd;
+
+ /* success */
+ ret = 0;
+
+out:
+ return ret;
+}
+
+static inline int
+__dht_check_free_space (xlator_t *to, xlator_t *from, loc_t *loc,
+ struct iatt *stbuf, int flag)
+{
+ struct statvfs src_statfs = {0,};
+ struct statvfs dst_statfs = {0,};
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ uint64_t src_statfs_blocks = 1;
+ uint64_t dst_statfs_blocks = 1;
+
+ this = THIS;
+
+ ret = syncop_statfs (from, loc, &src_statfs);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to get statfs of %s on %s (%s)",
+ loc->path, from->name, strerror (errno));
+ goto out;
+ }
+
+ ret = syncop_statfs (to, loc, &dst_statfs);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to get statfs of %s on %s (%s)",
+ loc->path, to->name, strerror (errno));
+ goto out;
+ }
+
+ /* if force option is given, do not check for space @ dst.
+ * Check only if space is avail for the file */
+ if (flag != GF_DHT_MIGRATE_DATA)
+ goto check_avail_space;
+
+ /* Check:
+ During rebalance `migrate-data` - Destination subvol experiences
+ a `reduction` in 'blocks' of free space, at the same time source
+ subvol gains certain 'blocks' of free space. A valid check is
+ necessary here to avoid errorneous move to destination where
+ the space could be scantily available.
+ */
+ if (stbuf) {
+ dst_statfs_blocks = ((dst_statfs.f_bavail *
+ dst_statfs.f_bsize) /
+ GF_DISK_SECTOR_SIZE);
+ src_statfs_blocks = ((src_statfs.f_bavail *
+ src_statfs.f_bsize) /
+ GF_DISK_SECTOR_SIZE);
+ if ((dst_statfs_blocks - stbuf->ia_blocks) <
+ (src_statfs_blocks + stbuf->ia_blocks)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "data movement attempted from node (%s) with"
+ " higher disk space to a node (%s) with "
+ "lesser disk space (%s)", from->name,
+ to->name, loc->path);
+
+ /* this is not a 'failure', but we don't want to
+ consider this as 'success' too :-/ */
+ ret = 1;
+ goto out;
+ }
+ }
+check_avail_space:
+ if (((dst_statfs.f_bavail * dst_statfs.f_bsize) /
+ GF_DISK_SECTOR_SIZE) < stbuf->ia_blocks) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "data movement attempted from node (%s) with "
+ "to node (%s) which does not have required free space"
+ " for %s", from->name, to->name, loc->path);
+ ret = 1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static inline int
+__dht_rebalance_migrate_data (xlator_t *from, xlator_t *to, fd_t *src, fd_t *dst,
+ uint64_t ia_size, int hole_exists)
+{
+ int ret = 0;
+ int count = 0;
+ off_t offset = 0;
+ struct iovec *vector = NULL;
+ struct iobref *iobref = NULL;
+ uint64_t total = 0;
+ size_t read_size = 0;
+
+ /* if file size is '0', no need to enter this loop */
+ while (total < ia_size) {
+ read_size = (((ia_size - total) > DHT_REBALANCE_BLKSIZE) ?
+ DHT_REBALANCE_BLKSIZE : (ia_size - total));
+ ret = syncop_readv (from, src, read_size,
+ offset, 0, &vector, &count, &iobref);
+ if (!ret || (ret < 0)) {
+ break;
+ }
+
+ if (hole_exists)
+ ret = dht_write_with_holes (to, dst, vector, count,
+ ret, offset, iobref);
+ else
+ ret = syncop_writev (to, dst, vector, count,
+ offset, iobref, 0);
+ if (ret < 0) {
+ break;
+ }
+ offset += ret;
+ total += ret;
+
+ GF_FREE (vector);
+ if (iobref)
+ iobref_unref (iobref);
+ iobref = NULL;
+ vector = NULL;
+ }
+ if (iobref)
+ iobref_unref (iobref);
+ GF_FREE (vector);
+
+ if (ret >= 0)
+ ret = 0;
+
+ return ret;
+}
+
+
+static inline int
+__dht_rebalance_open_src_file (xlator_t *from, xlator_t *to, loc_t *loc,
+ struct iatt *stbuf, fd_t **src_fd)
+{
+ int ret = 0;
+ fd_t *fd = NULL;
+ dict_t *dict = NULL;
+ xlator_t *this = NULL;
+ struct iatt iatt = {0,};
+ dht_conf_t *conf = NULL;
+
+ this = THIS;
+ conf = this->private;
+
+ fd = fd_create (loc->inode, DHT_REBALANCE_PID);
+ if (!fd) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: fd create failed (source)", loc->path);
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_open (from, loc, O_RDWR, fd);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to open file %s on %s (%s)",
+ loc->path, from->name, strerror (errno));
+ goto out;
+ }
+
+ ret = -1;
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ ret = dict_set_str (dict, conf->link_xattr_name, to->name);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to set xattr in dict for %s (linkto:%s)",
+ loc->path, to->name);
+ goto out;
+ }
+
+ /* Once the migration starts, the source should have 'linkto' key set
+ to show which is the target, so other clients can work around it */
+ ret = syncop_setxattr (from, loc, dict, 0);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to set xattr on %s in %s (%s)",
+ loc->path, from->name, strerror (errno));
+ goto out;
+ }
+
+ /* mode should be (+S+T) to indicate migration is in progress */
+ iatt.ia_prot = stbuf->ia_prot;
+ iatt.ia_type = stbuf->ia_type;
+ iatt.ia_prot.sticky = 1;
+ iatt.ia_prot.sgid = 1;
+
+ ret = syncop_setattr (from, loc, &iatt, GF_SET_ATTR_MODE, NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to set mode on %s in %s (%s)",
+ loc->path, from->name, strerror (errno));
+ goto out;
+ }
+
+ if (src_fd)
+ *src_fd = fd;
+
+ /* success */
+ ret = 0;
+out:
+ if (dict)
+ dict_unref (dict);
+
+ return ret;
+}
+
+int
+migrate_special_files (xlator_t *this, xlator_t *from, xlator_t *to, loc_t *loc,
+ struct iatt *buf)
+{
+ int ret = -1;
+ dict_t *rsp_dict = NULL;
+ dict_t *dict = NULL;
+ char *link = NULL;
+ struct iatt stbuf = {0,};
+ dht_conf_t *conf = this->private;
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ ret = dict_set_int32 (dict, conf->link_xattr_name, 256);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to set 'linkto' key in dict", loc->path);
+ goto out;
+ }
+
+ /* check in the destination if the file is link file */
+ ret = syncop_lookup (to, loc, dict, &stbuf, &rsp_dict, NULL);
+ if ((ret == -1) && (errno != ENOENT)) {
+ gf_log (this->name, GF_LOG_WARNING, "%s: lookup failed (%s)",
+ loc->path, strerror (errno));
+ goto out;
+ }
+
+ /* we no more require this key */
+ dict_del (dict, conf->link_xattr_name);
+
+ /* file exists in target node, only if it is 'linkfile' its valid,
+ otherwise, error out */
+ if (!ret) {
+ if (!check_is_linkfile (loc->inode, &stbuf, rsp_dict,
+ conf->link_xattr_name)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: file exists in destination", loc->path);
+ ret = -1;
+ goto out;
+ }
+
+ /* as file is linkfile, delete it */
+ ret = syncop_unlink (to, loc);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to delete the linkfile (%s)",
+ loc->path, strerror (errno));
+ goto out;
+ }
+ }
+
+ /* Set the gfid of the source file in dict */
+ ret = dict_set_static_bin (dict, "gfid-req", buf->ia_gfid, 16);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to set gfid in dict for create", loc->path);
+ goto out;
+ }
+
+ /* Create the file in target */
+ if (IA_ISLNK (buf->ia_type)) {
+ /* Handle symlinks separately */
+ ret = syncop_readlink (from, loc, &link, buf->ia_size);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: readlink on symlink failed (%s)",
+ loc->path, strerror (errno));
+ goto out;
+ }
+
+ ret = syncop_symlink (to, loc, link, dict, 0);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: creating symlink failed (%s)",
+ loc->path, strerror (errno));
+ goto out;
+ }
+
+ goto done;
+ }
+
+ ret = syncop_mknod (to, loc, st_mode_from_ia (buf->ia_prot,
+ buf->ia_type),
+ makedev (ia_major (buf->ia_rdev),
+ ia_minor (buf->ia_rdev)), dict, 0);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "%s: mknod failed (%s)",
+ loc->path, strerror (errno));
+ goto out;
+ }
+
+done:
+ ret = syncop_setattr (to, loc, buf,
+ (GF_SET_ATTR_UID | GF_SET_ATTR_GID |
+ GF_SET_ATTR_MODE), NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to perform setattr on %s (%s)",
+ loc->path, to->name, strerror (errno));
+ }
+
+ ret = syncop_unlink (from, loc);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING, "%s: unlink failed (%s)",
+ loc->path, strerror (errno));
+
+out:
+ if (dict)
+ dict_unref (dict);
+
+ if (rsp_dict)
+ dict_unref (rsp_dict);
+
+ return ret;
+}
+
+/*
+ return values:
+
+ -1 : failure
+ 0 : successfully migrated data
+ 1 : not a failure, but we can't migrate data as of now
+*/
+int
+dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
+ int flag)
+{
+ int ret = -1;
+ struct iatt new_stbuf = {0,};
+ struct iatt stbuf = {0,};
+ struct iatt empty_iatt = {0,};
+ ia_prot_t src_ia_prot = {0,};
+ fd_t *src_fd = NULL;
+ fd_t *dst_fd = NULL;
+ dict_t *dict = NULL;
+ dict_t *xattr = NULL;
+ dict_t *xattr_rsp = NULL;
+ int file_has_holes = 0;
+ dht_conf_t *conf = this->private;
+
+ gf_log (this->name, GF_LOG_INFO, "%s: attempting to move from %s to %s",
+ loc->path, from->name, to->name);
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ ret = dict_set_int32 (dict, conf->link_xattr_name, 256);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to set 'linkto' key in dict", loc->path);
+ goto out;
+ }
+
+ /* Phase 1 - Data migration is in progress from now on */
+ ret = syncop_lookup (from, loc, dict, &stbuf, &xattr_rsp, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s: lookup failed on %s (%s)",
+ loc->path, from->name, strerror (errno));
+ goto out;
+ }
+
+ /* we no more require this key */
+ dict_del (dict, conf->link_xattr_name);
+
+ /* preserve source mode, so set the same to the destination */
+ src_ia_prot = stbuf.ia_prot;
+
+ /* Check if file can be migrated */
+ ret = __is_file_migratable (this, loc, &stbuf, xattr_rsp, flag);
+ if (ret)
+ goto out;
+
+ /* Take care of the special files */
+ if (!IA_ISREG (stbuf.ia_type)) {
+ /* Special files */
+ ret = migrate_special_files (this, from, to, loc, &stbuf);
+ goto out;
+ }
+
+ /* TODO: move all xattr related operations to fd based operations */
+ ret = syncop_listxattr (from, loc, &xattr);
+ if (ret == -1)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to get xattr from %s (%s)",
+ loc->path, from->name, strerror (errno));
+
+ /* create the destination, with required modes/xattr */
+ ret = __dht_rebalance_create_dst_file (to, from, loc, &stbuf,
+ dict, &dst_fd, xattr);
+ if (ret)
+ goto out;
+
+ ret = __dht_check_free_space (to, from, loc, &stbuf, flag);
+ if (ret) {
+ goto out;
+ }
+
+ /* Open the source, and also update mode/xattr */
+ ret = __dht_rebalance_open_src_file (from, to, loc, &stbuf, &src_fd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to open %s on %s",
+ loc->path, from->name);
+ goto out;
+ }
+
+
+ ret = syncop_fstat (from, src_fd, &stbuf);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to lookup %s on %s (%s)",
+ loc->path, from->name, strerror (errno));
+ goto out;
+ }
+
+ /* Try to preserve 'holes' while migrating data */
+ if (stbuf.ia_size > (stbuf.ia_blocks * GF_DISK_SECTOR_SIZE))
+ file_has_holes = 1;
+
+ /* All I/O happens in this function */
+ ret = __dht_rebalance_migrate_data (from, to, src_fd, dst_fd,
+ stbuf.ia_size, file_has_holes);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s: failed to migrate data",
+ loc->path);
+ /* reset the destination back to 0 */
+ ret = syncop_ftruncate (to, dst_fd, 0);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to reset target size back to 0 (%s)",
+ loc->path, strerror (errno));
+ }
+
+ ret = -1;
+ goto out;
+ }
+
+ /* TODO: Sync the locks */
+
+ ret = syncop_fsync (to, dst_fd, 0);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to fsync on %s (%s)",
+ loc->path, to->name, strerror (errno));
+
+
+ /* Phase 2 - Data-Migration Complete, Housekeeping updates pending */
+
+ ret = syncop_fstat (from, src_fd, &new_stbuf);
+ if (ret < 0) {
+ /* Failed to get the stat info */
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to fstat file %s on %s (%s)",
+ loc->path, from->name, strerror (errno));
+ goto out;
+ }
+
+ /* source would have both sticky bit and sgid bit set, reset it to 0,
+ and set the source permission on destination, if it was not set
+ prior to setting rebalance-modes in source */
+ if (!src_ia_prot.sticky)
+ new_stbuf.ia_prot.sticky = 0;
+
+ if (!src_ia_prot.sgid)
+ new_stbuf.ia_prot.sgid = 0;
+
+ /* TODO: if the source actually had sticky bit, or sgid bit set,
+ we are not handling it */
+
+ ret = syncop_fsetattr (to, dst_fd, &new_stbuf,
+ (GF_SET_ATTR_UID | GF_SET_ATTR_GID |
+ GF_SET_ATTR_MODE), NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to perform setattr on %s (%s)",
+ loc->path, to->name, strerror (errno));
+ goto out;
+ }
+
+ /* Because 'futimes' is not portable */
+ ret = syncop_setattr (to, loc, &new_stbuf,
+ (GF_SET_ATTR_MTIME | GF_SET_ATTR_ATIME),
+ NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to perform setattr on %s (%s)",
+ loc->path, to->name, strerror (errno));
+ }
+
+ /* Make the source as a linkfile first before deleting it */
+ empty_iatt.ia_prot.sticky = 1;
+ ret = syncop_fsetattr (from, src_fd, &empty_iatt,
+ GF_SET_ATTR_MODE, NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, \
+ "%s: failed to perform setattr on %s (%s)",
+ loc->path, from->name, strerror (errno));
+ goto out;
+ }
+
+ /* Free up the data blocks on the source node, as the whole
+ file is migrated */
+ ret = syncop_ftruncate (from, src_fd, 0);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to perform truncate on %s (%s)",
+ loc->path, from->name, strerror (errno));
+ }
+
+ /* remove the 'linkto' xattr from the destination */
+ ret = syncop_fremovexattr (to, dst_fd, conf->link_xattr_name);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to perform removexattr on %s (%s)",
+ loc->path, to->name, strerror (errno));
+ }
+
+ /* Do a stat and check the gfid before unlink */
+ ret = syncop_stat (from, loc, &empty_iatt);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to do a stat on %s (%s)",
+ loc->path, from->name, strerror (errno));
+ goto out;
+ }
+
+ if (uuid_compare (empty_iatt.ia_gfid, loc->gfid) == 0) {
+ /* take out the source from namespace */
+ ret = syncop_unlink (from, loc);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to perform unlink on %s (%s)",
+ loc->path, from->name, strerror (errno));
+ goto out;
+ }
+ }
+
+ ret = syncop_lookup (this, loc, NULL, NULL, NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s: failed to lookup the file on subvolumes (%s)",
+ loc->path, strerror (errno));
+ }
+
+ gf_log (this->name, GF_LOG_INFO,
+ "completed migration of %s from subvolume %s to %s",
+ loc->path, from->name, to->name);
+
+ ret = 0;
+out:
+ if (dict)
+ dict_unref (dict);
+
+ if (xattr)
+ dict_unref (xattr);
+ if (xattr_rsp)
+ dict_unref (xattr_rsp);
+
+ if (dst_fd)
+ syncop_close (dst_fd);
+ if (src_fd)
+ syncop_close (src_fd);
+
+ return ret;
+}
+
+static int
+rebalance_task (void *data)
+{
+ int ret = -1;
+ dht_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+
+ frame = data;
+
+ local = frame->local;
+
+ /* This function is 'synchrounous', hence if it returns,
+ we are done with the task */
+ ret = dht_migrate_file (THIS, &local->loc, local->rebalance.from_subvol,
+ local->rebalance.target_node, local->flags);
+
+ return ret;
+}
+
+static int
+rebalance_task_completion (int op_ret, call_frame_t *sync_frame, void *data)
+{
+ int ret = -1;
+ uint64_t layout_int = 0;
+ dht_layout_t *layout = 0;
+ xlator_t *this = NULL;
+ dht_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+
+ this = THIS;
+ local = sync_frame->local;
+
+ if (!op_ret) {
+ /* Make sure we have valid 'layout' in inode ctx
+ after the operation */
+ ret = inode_ctx_del (local->loc.inode, this, &layout_int);
+ if (!ret && layout_int) {
+ layout = (dht_layout_t *)(long)layout_int;
+ dht_layout_unref (this, layout);
+ }
+
+ ret = dht_layout_preset (this, local->rebalance.target_node,
+ local->loc.inode);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to set inode ctx", local->loc.path);
+ }
+
+ if (op_ret == -1) {
+ /* Failure of migration process, mostly due to write process.
+ as we can't preserve the exact errno, lets say there was
+ no space to migrate-data
+ */
+ op_errno = ENOSPC;
+ }
+
+ if (op_ret == 1) {
+ /* migration didn't happen, but is not a failure, let the user
+ understand that he doesn't have permission to migrate the
+ file.
+ */
+ op_ret = -1;
+ op_errno = EPERM;
+ }
+
+ DHT_STACK_UNWIND (setxattr, sync_frame, op_ret, op_errno, NULL);
+ return 0;
+}
+
+int
+dht_start_rebalance_task (xlator_t *this, call_frame_t *frame)
+{
+ int ret = -1;
+
+ ret = synctask_new (this->ctx->env, rebalance_task,
+ rebalance_task_completion,
+ frame, frame);
+ return ret;
+}
+
+int
+gf_listener_stop (xlator_t *this)
+{
+ glusterfs_ctx_t *ctx = NULL;
+ cmd_args_t *cmd_args = NULL;
+ int ret = 0;
+
+ ctx = this->ctx;
+ GF_ASSERT (ctx);
+ cmd_args = &ctx->cmd_args;
+ if (cmd_args->sock_file) {
+ ret = unlink (cmd_args->sock_file);
+ if (ret && (ENOENT == errno)) {
+ ret = 0;
+ }
+ }
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to unlink listener "
+ "socket %s, error: %s", cmd_args->sock_file,
+ strerror (errno));
+ }
+ return ret;
+}
+
+void
+dht_build_root_inode (xlator_t *this, inode_t **inode)
+{
+ inode_table_t *itable = NULL;
+ uuid_t root_gfid = {0, };
+
+ itable = inode_table_new (0, this);
+ if (!itable)
+ return;
+
+ root_gfid[15] = 1;
+ *inode = inode_find (itable, root_gfid);
+}
+
+void
+dht_build_root_loc (inode_t *inode, loc_t *loc)
+{
+ loc->path = "/";
+ loc->inode = inode;
+ loc->inode->ia_type = IA_IFDIR;
+ memset (loc->gfid, 0, 16);
+ loc->gfid[15] = 1;
+}
+
+
+/* return values: 1 -> error, bug ignore and continue
+ 0 -> proceed
+ -1 -> error, handle it */
+int32_t
+gf_defrag_handle_migrate_error (int32_t op_errno, gf_defrag_info_t *defrag)
+{
+ /* if errno is not ENOSPC or ENOTCONN, we can still continue
+ with rebalance process */
+ if ((errno != ENOSPC) || (errno != ENOTCONN))
+ return 1;
+
+ if (errno == ENOTCONN) {
+ /* Most probably mount point went missing (mostly due
+ to a brick down), say rebalance failure to user,
+ let him restart it if everything is fine */
+ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
+ return -1;
+ }
+
+ if (errno == ENOSPC) {
+ /* rebalance process itself failed, may be
+ remote brick went down, or write failed due to
+ disk full etc etc.. */
+ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
+ return -1;
+ }
+
+ return 0;
+}
+
+static gf_boolean_t
+gf_defrag_pattern_match (gf_defrag_info_t *defrag, char *name, uint64_t size)
+{
+ gf_defrag_pattern_list_t *trav = NULL;
+ gf_boolean_t match = _gf_false;
+ gf_boolean_t ret = _gf_false;
+
+ GF_VALIDATE_OR_GOTO ("dht", defrag, out);
+
+ trav = defrag->defrag_pattern;
+ while (trav) {
+ if (!fnmatch (trav->path_pattern, name, FNM_NOESCAPE)) {
+ match = _gf_true;
+ break;
+ }
+ trav = trav->next;
+ }
+
+ if ((match == _gf_true) && (size >= trav->size))
+ ret = _gf_true;
+
+ out:
+ return ret;
+}
+
+/* We do a depth first traversal of directories. But before we move into
+ * subdirs, we complete the data migration of those directories whose layouts
+ * have been fixed
+ */
+
+int
+gf_defrag_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
+ dict_t *migrate_data)
+{
+ int ret = -1;
+ loc_t entry_loc = {0,};
+ fd_t *fd = NULL;
+ gf_dirent_t entries;
+ gf_dirent_t *tmp = NULL;
+ gf_dirent_t *entry = NULL;
+ gf_boolean_t free_entries = _gf_false;
+ off_t offset = 0;
+ dict_t *dict = NULL;
+ struct iatt iatt = {0,};
+ int32_t op_errno = 0;
+ char *uuid_str = NULL;
+ uuid_t node_uuid = {0,};
+ int readdir_operrno = 0;
+ struct timeval dir_start = {0,};
+ struct timeval end = {0,};
+ double elapsed = {0,};
+ struct timeval start = {0,};
+ int32_t err = 0;
+
+ gf_log (this->name, GF_LOG_INFO, "migrate data called on %s",
+ loc->path);
+ gettimeofday (&dir_start, NULL);
+
+ fd = fd_create (loc->inode, defrag->pid);
+ if (!fd) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to create fd");
+ goto out;
+ }
+
+ ret = syncop_opendir (this, loc, fd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to open dir %s",
+ loc->path);
+ goto out;
+ }
+
+ INIT_LIST_HEAD (&entries.list);
+
+ while ((ret = syncop_readdirp (this, fd, 131072, offset, NULL,
+ &entries)) != 0) {
+
+ if (ret < 0) {
+
+ gf_log (this->name, GF_LOG_ERROR, "Readdir returned %s."
+ " Aborting migrate-data",
+ strerror(readdir_operrno));
+ goto out;
+ }
+
+ /* Need to keep track of ENOENT errno, that means, there is no
+ need to send more readdirp() */
+ readdir_operrno = errno;
+
+ if (list_empty (&entries.list))
+ break;
+
+ free_entries = _gf_true;
+
+ list_for_each_entry_safe (entry, tmp, &entries.list, list) {
+ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
+ ret = 1;
+ goto out;
+ }
+
+ offset = entry->d_off;
+
+ if (!strcmp (entry->d_name, ".") ||
+ !strcmp (entry->d_name, ".."))
+ continue;
+
+ if (IA_ISDIR (entry->d_stat.ia_type))
+ continue;
+
+ defrag->num_files_lookedup++;
+ if (defrag->stats == _gf_true) {
+ gettimeofday (&start, NULL);
+ }
+ if (defrag->defrag_pattern &&
+ (gf_defrag_pattern_match (defrag, entry->d_name,
+ entry->d_stat.ia_size)
+ == _gf_false)) {
+ continue;
+ }
+ loc_wipe (&entry_loc);
+ ret =dht_build_child_loc (this, &entry_loc, loc,
+ entry->d_name);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Child loc"
+ " build failed");
+ goto out;
+ }
+
+ if (uuid_is_null (entry->d_stat.ia_gfid)) {
+ gf_log (this->name, GF_LOG_ERROR, "%s/%s"
+ " gfid not present", loc->path,
+ entry->d_name);
+ continue;
+ }
+
+ uuid_copy (entry_loc.gfid, entry->d_stat.ia_gfid);
+
+ if (uuid_is_null (loc->gfid)) {
+ gf_log (this->name, GF_LOG_ERROR, "%s/%s"
+ " gfid not present", loc->path,
+ entry->d_name);
+ continue;
+ }
+
+ uuid_copy (entry_loc.pargfid, loc->gfid);
+
+ entry_loc.inode->ia_type = entry->d_stat.ia_type;
+
+ ret = syncop_lookup (this, &entry_loc, NULL, &iatt,
+ NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s"
+ " lookup failed", entry_loc.path);
+ continue;
+ }
+
+ ret = syncop_getxattr (this, &entry_loc, &dict,
+ GF_XATTR_NODE_UUID_KEY);
+ if(ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to "
+ "get node-uuid for %s", entry_loc.path);
+ continue;
+ }
+
+ ret = dict_get_str (dict, GF_XATTR_NODE_UUID_KEY,
+ &uuid_str);
+ if(ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to "
+ "get node-uuid from dict for %s",
+ entry_loc.path);
+ continue;
+ }
+
+ if (uuid_parse (uuid_str, node_uuid)) {
+ gf_log (this->name, GF_LOG_ERROR, "uuid_parse "
+ "failed for %s", entry_loc.path);
+ continue;
+ }
+
+ /* if file belongs to different node, skip migration
+ * the other node will take responsibility of migration
+ */
+ if (uuid_compare (node_uuid, defrag->node_uuid)) {
+ gf_log (this->name, GF_LOG_TRACE, "%s does not"
+ "belong to this node", entry_loc.path);
+ continue;
+ }
+
+ uuid_str = NULL;
+
+ dict_del (dict, GF_XATTR_NODE_UUID_KEY);
+
+
+ /* if distribute is present, it will honor this key.
+ * -1 is returned if distribute is not present or file
+ * doesn't have a link-file. If file has link-file, the
+ * path of link-file will be the value, and also that
+ * guarantees that file has to be mostly migrated */
+
+ ret = syncop_getxattr (this, &entry_loc, &dict,
+ GF_XATTR_LINKINFO_KEY);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_TRACE, "failed to "
+ "get link-to key for %s",
+ entry_loc.path);
+ continue;
+ }
+
+ ret = syncop_setxattr (this, &entry_loc, migrate_data,
+ 0);
+ if (ret) {
+ err = op_errno;
+ /* errno is overloaded. See
+ * rebalance_task_completion () */
+ if (err != ENOSPC) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "migrate-data skipped for %s"
+ " due to space constraints",
+ entry_loc.path);
+ defrag->skipped +=1;
+ } else{
+ gf_log (this->name, GF_LOG_ERROR,
+ "migrate-data failed for %s",
+ entry_loc.path);
+ defrag->total_failures +=1;
+ }
+ }
+
+ if (ret == -1) {
+ op_errno = errno;
+ ret = gf_defrag_handle_migrate_error (op_errno,
+ defrag);
+
+ if (!ret)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "migrate-data on %s failed: %s",
+ entry_loc.path,
+ strerror (op_errno));
+ else if (ret == 1)
+ continue;
+ else if (ret == -1)
+ goto out;
+ }
+
+ LOCK (&defrag->lock);
+ {
+ defrag->total_files += 1;
+ defrag->total_data += iatt.ia_size;
+ }
+ UNLOCK (&defrag->lock);
+ if (defrag->stats == _gf_true) {
+ gettimeofday (&end, NULL);
+ elapsed = (end.tv_sec - start.tv_sec) * 1e6 +
+ (end.tv_usec - start.tv_usec);
+ gf_log (this->name, GF_LOG_INFO, "Migration of "
+ "file:%s size:%"PRIu64" bytes took %.2f"
+ "secs", entry_loc.path, iatt.ia_size,
+ elapsed/1e6);
+ }
+ }
+
+ gf_dirent_free (&entries);
+ free_entries = _gf_false;
+ INIT_LIST_HEAD (&entries.list);
+
+ if (readdir_operrno == ENOENT)
+ break;
+ }
+
+ gettimeofday (&end, NULL);
+ elapsed = (end.tv_sec - dir_start.tv_sec) * 1e6 +
+ (end.tv_usec - dir_start.tv_usec);
+ gf_log (this->name, GF_LOG_INFO, "Migration operation on dir %s took "
+ "%.2f secs", loc->path, elapsed/1e6);
+ ret = 0;
+out:
+ if (free_entries)
+ gf_dirent_free (&entries);
+
+ loc_wipe (&entry_loc);
+
+ if (dict)
+ dict_unref(dict);
+
+ if (fd)
+ fd_unref (fd);
+ return ret;
+
+}
+
+
+int
+gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
+ dict_t *fix_layout, dict_t *migrate_data)
+{
+ int ret = -1;
+ loc_t entry_loc = {0,};
+ fd_t *fd = NULL;
+ gf_dirent_t entries;
+ gf_dirent_t *tmp = NULL;
+ gf_dirent_t *entry = NULL;
+ gf_boolean_t free_entries = _gf_false;
+ dict_t *dict = NULL;
+ off_t offset = 0;
+ struct iatt iatt = {0,};
+ int readdirp_errno = 0;
+
+ ret = syncop_lookup (this, loc, NULL, &iatt, NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Lookup failed on %s",
+ loc->path);
+ goto out;
+ }
+
+ if (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) {
+ ret = gf_defrag_migrate_data (this, defrag, loc, migrate_data);
+ if (ret)
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_TRACE, "fix layout called on %s", loc->path);
+
+ fd = fd_create (loc->inode, defrag->pid);
+ if (!fd) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to create fd");
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_opendir (this, loc, fd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to open dir %s",
+ loc->path);
+ ret = -1;
+ goto out;
+ }
+
+ INIT_LIST_HEAD (&entries.list);
+ while ((ret = syncop_readdirp (this, fd, 131072, offset, NULL,
+ &entries)) != 0)
+ {
+
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Readdir returned %s"
+ ". Aborting fix-layout",strerror(errno));
+ goto out;
+ }
+
+ /* Need to keep track of ENOENT errno, that means, there is no
+ need to send more readdirp() */
+ readdirp_errno = errno;
+
+ if (list_empty (&entries.list))
+ break;
+
+ free_entries = _gf_true;
+
+ list_for_each_entry_safe (entry, tmp, &entries.list, list) {
+ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
+ ret = 1;
+ goto out;
+ }
+
+ offset = entry->d_off;
+
+ if (!strcmp (entry->d_name, ".") ||
+ !strcmp (entry->d_name, ".."))
+ continue;
+
+ if (!IA_ISDIR (entry->d_stat.ia_type))
+ continue;
+
+ loc_wipe (&entry_loc);
+ ret =dht_build_child_loc (this, &entry_loc, loc,
+ entry->d_name);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Child loc"
+ " build failed");
+ goto out;
+ }
+
+ if (uuid_is_null (entry->d_stat.ia_gfid)) {
+ gf_log (this->name, GF_LOG_ERROR, "%s/%s"
+ " gfid not present", loc->path,
+ entry->d_name);
+ continue;
+ }
+
+ entry_loc.inode->ia_type = entry->d_stat.ia_type;
+
+ uuid_copy (entry_loc.gfid, entry->d_stat.ia_gfid);
+ if (uuid_is_null (loc->gfid)) {
+ gf_log (this->name, GF_LOG_ERROR, "%s/%s"
+ " gfid not present", loc->path,
+ entry->d_name);
+ continue;
+ }
+
+ uuid_copy (entry_loc.pargfid, loc->gfid);
+
+ ret = syncop_lookup (this, &entry_loc, NULL, &iatt,
+ NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s"
+ " lookup failed", entry_loc.path);
+ continue;
+ }
+
+ ret = syncop_setxattr (this, &entry_loc, fix_layout,
+ 0);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Setxattr "
+ "failed for %s", entry_loc.path);
+ defrag->defrag_status =
+ GF_DEFRAG_STATUS_FAILED;
+ defrag->total_failures ++;
+ goto out;
+ }
+ ret = gf_defrag_fix_layout (this, defrag, &entry_loc,
+ fix_layout, migrate_data);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Fix layout "
+ "failed for %s", entry_loc.path);
+ defrag->total_failures++;
+ goto out;
+ }
+
+ }
+ gf_dirent_free (&entries);
+ free_entries = _gf_false;
+ INIT_LIST_HEAD (&entries.list);
+ if (readdirp_errno == ENOENT)
+ break;
+ }
+
+ ret = 0;
+out:
+ if (free_entries)
+ gf_dirent_free (&entries);
+
+ loc_wipe (&entry_loc);
+
+ if (dict)
+ dict_unref(dict);
+
+ if (fd)
+ fd_unref (fd);
+
+ return ret;
+
+}
+
+
+int
+gf_defrag_start_crawl (void *data)
+{
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ gf_defrag_info_t *defrag = NULL;
+ int ret = -1;
+ loc_t loc = {0,};
+ struct iatt iatt = {0,};
+ struct iatt parent = {0,};
+ dict_t *fix_layout = NULL;
+ dict_t *migrate_data = NULL;
+ dict_t *status = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+
+ this = data;
+ if (!this)
+ goto out;
+
+ ctx = this->ctx;
+ if (!ctx)
+ goto out;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ defrag = conf->defrag;
+ if (!defrag)
+ goto out;
+
+ gettimeofday (&defrag->start_time, NULL);
+ dht_build_root_inode (this, &defrag->root_inode);
+ if (!defrag->root_inode)
+ goto out;
+
+ dht_build_root_loc (defrag->root_inode, &loc);
+
+ /* fix-layout on '/' first */
+
+ ret = syncop_lookup (this, &loc, NULL, &iatt, NULL, &parent);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "look up on / failed");
+ goto out;
+ }
+
+ fix_layout = dict_new ();
+ if (!fix_layout) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_str (fix_layout, GF_XATTR_FIX_LAYOUT_KEY, "yes");
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set dict str");
+ goto out;
+ }
+
+ ret = syncop_setxattr (this, &loc, fix_layout, 0);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "fix layout on %s failed",
+ loc.path);
+ defrag->total_failures++;
+ goto out;
+ }
+
+ if (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) {
+ migrate_data = dict_new ();
+ if (!migrate_data) {
+ ret = -1;
+ goto out;
+ }
+ if (defrag->cmd == GF_DEFRAG_CMD_START_FORCE)
+ ret = dict_set_str (migrate_data,
+ "distribute.migrate-data", "force");
+ else
+ ret = dict_set_str (migrate_data,
+ "distribute.migrate-data",
+ "non-force");
+ if (ret)
+ goto out;
+ }
+ ret = gf_defrag_fix_layout (this, defrag, &loc, fix_layout,
+ migrate_data);
+ if ((defrag->defrag_status != GF_DEFRAG_STATUS_STOPPED) &&
+ (defrag->defrag_status != GF_DEFRAG_STATUS_FAILED)) {
+ defrag->defrag_status = GF_DEFRAG_STATUS_COMPLETE;
+ }
+
+
+
+out:
+ LOCK (&defrag->lock);
+ {
+ status = dict_new ();
+ gf_defrag_status_get (defrag, status);
+ if (ctx->notify)
+ ctx->notify (GF_EN_DEFRAG_STATUS, status);
+ if (status)
+ dict_unref (status);
+ defrag->is_exiting = 1;
+ }
+ UNLOCK (&defrag->lock);
+
+ if (defrag) {
+ GF_FREE (defrag);
+ conf->defrag = NULL;
+ }
+
+ return ret;
+}
+
+
+static int
+gf_defrag_done (int ret, call_frame_t *sync_frame, void *data)
+{
+ gf_listener_stop (sync_frame->this);
+
+ STACK_DESTROY (sync_frame->root);
+ kill (getpid(), SIGTERM);
+ return 0;
+}
+
+void *
+gf_defrag_start (void *data)
+{
+ int ret = -1;
+ call_frame_t *frame = NULL;
+ dht_conf_t *conf = NULL;
+ gf_defrag_info_t *defrag = NULL;
+ xlator_t *this = NULL;
+
+ this = data;
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ defrag = conf->defrag;
+ if (!defrag)
+ goto out;
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame)
+ goto out;
+
+ frame->root->pid = GF_CLIENT_PID_DEFRAG;
+
+ defrag->pid = frame->root->pid;
+
+ defrag->defrag_status = GF_DEFRAG_STATUS_STARTED;
+
+ ret = synctask_new (this->ctx->env, gf_defrag_start_crawl,
+ gf_defrag_done, frame, this);
+
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "Could not create"
+ " task for rebalance");
+out:
+ return NULL;
+}
+
+int
+gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict)
+{
+ int ret = 0;
+ uint64_t files = 0;
+ uint64_t size = 0;
+ uint64_t lookup = 0;
+ uint64_t failures = 0;
+ uint64_t skipped = 0;
+ char *status = "";
+ double elapsed = 0;
+ struct timeval end = {0,};
+
+
+ if (!defrag)
+ goto out;
+
+ ret = 0;
+ if (defrag->defrag_status == GF_DEFRAG_STATUS_NOT_STARTED)
+ goto out;
+
+ files = defrag->total_files;
+ size = defrag->total_data;
+ lookup = defrag->num_files_lookedup;
+ failures = defrag->total_failures;
+ skipped = defrag->skipped;
+
+ gettimeofday (&end, NULL);
+
+ elapsed = end.tv_sec - defrag->start_time.tv_sec;
+
+ if (!dict)
+ goto log;
+
+ ret = dict_set_uint64 (dict, "files", files);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set file count");
+
+ ret = dict_set_uint64 (dict, "size", size);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set size of xfer");
+
+ ret = dict_set_uint64 (dict, "lookups", lookup);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set lookedup file count");
+
+
+ ret = dict_set_int32 (dict, "status", defrag->defrag_status);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set status");
+ if (elapsed) {
+ ret = dict_set_double (dict, "run-time", elapsed);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set run-time");
+ }
+
+ ret = dict_set_uint64 (dict, "failures", failures);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set failure count");
+
+ ret = dict_set_uint64 (dict, "skipped", skipped);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set skipped file count");
+log:
+ switch (defrag->defrag_status) {
+ case GF_DEFRAG_STATUS_NOT_STARTED:
+ status = "not started";
+ break;
+ case GF_DEFRAG_STATUS_STARTED:
+ status = "in progress";
+ break;
+ case GF_DEFRAG_STATUS_STOPPED:
+ status = "stopped";
+ break;
+ case GF_DEFRAG_STATUS_COMPLETE:
+ status = "completed";
+ break;
+ case GF_DEFRAG_STATUS_FAILED:
+ status = "failed";
+ break;
+ default:
+ break;
+ }
+
+ gf_log (THIS->name, GF_LOG_INFO, "Rebalance is %s. Time taken is %.2f "
+ "secs", status, elapsed);
+ gf_log (THIS->name, GF_LOG_INFO, "Files migrated: %"PRIu64", size: %"
+ PRIu64", lookups: %"PRIu64", failures: %"PRIu64", skipped: "
+ "%"PRIu64, files, size, lookup, failures, skipped);
+
+
+out:
+ return 0;
+}
+
+int
+gf_defrag_stop (gf_defrag_info_t *defrag, dict_t *output)
+{
+ /* TODO: set a variable 'stop_defrag' here, it should be checked
+ in defrag loop */
+ int ret = -1;
+ GF_ASSERT (defrag);
+
+ if (defrag->defrag_status == GF_DEFRAG_STATUS_NOT_STARTED) {
+ goto out;
+ }
+
+ gf_log ("", GF_LOG_INFO, "Received stop command on rebalance");
+ defrag->defrag_status = GF_DEFRAG_STATUS_STOPPED;
+
+ if (output)
+ gf_defrag_status_get (defrag, output);
+ ret = 0;
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
diff --git a/xlators/cluster/dht/src/dht-rename.c b/xlators/cluster/dht/src/dht-rename.c
index 6c34dabad..5d6f4f232 100644
--- a/xlators/cluster/dht/src/dht-rename.c
+++ b/xlators/cluster/dht/src/dht-rename.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
/* TODO: link(oldpath, newpath) fails if newpath already exists. DHT should
@@ -33,373 +24,735 @@
int
dht_rename_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *stbuf)
+ int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ call_frame_t *prev = NULL;
+
+ local = frame->local;
+ prev = cookie;
+
+ if (op_ret == -1) {
+ /* TODO: undo the damage */
+
+ gf_log (this->name, GF_LOG_INFO,
+ "rename %s -> %s on %s failed (%s)",
+ local->loc.path, local->loc2.path,
+ prev->this->name, strerror (op_errno));
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto unwind;
+ }
+ /* TODO: construct proper stbuf for dir */
+ /*
+ * FIXME: is this the correct way to build stbuf and
+ * parent bufs?
+ */
+ dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
+ dht_iatt_merge (this, &local->preoldparent, preoldparent,
+ prev->this);
+ dht_iatt_merge (this, &local->postoldparent, postoldparent,
+ prev->this);
+ dht_iatt_merge (this, &local->preparent, prenewparent,
+ prev->this);
+ dht_iatt_merge (this, &local->postparent, postnewparent,
+ prev->this);
- local = frame->local;
- prev = cookie;
-
- if (op_ret == -1) {
- /* TODO: undo the damage */
-
- gf_log (this->name, GF_LOG_DEBUG,
- "rename %s -> %s on %s failed (%s)",
- local->loc.path, local->loc2.path,
- prev->this->name, strerror (op_errno));
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- } else {
- /* TODO: construct proper stbuf for dir */
- local->stbuf = *stbuf;
- }
+unwind:
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ WIPE (&local->preoldparent);
+ WIPE (&local->postoldparent);
+ WIPE (&local->preparent);
+ WIPE (&local->postparent);
+
+ DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
+ DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
+ &local->stbuf, &local->preoldparent,
+ &local->postoldparent,
+ &local->preparent, &local->postparent, xdata);
+ }
+
+ return 0;
+}
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->stbuf);
- }
- return 0;
+int
+dht_rename_hashed_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
+ struct iatt *preoldparent,
+ struct iatt *postoldparent,
+ struct iatt *prenewparent,
+ struct iatt *postnewparent, dict_t *xdata)
+{
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ int call_cnt = 0;
+ call_frame_t *prev = NULL;
+ int i = 0;
+
+ conf = this->private;
+ local = frame->local;
+ prev = cookie;
+
+ if (op_ret == -1) {
+ /* TODO: undo the damage */
+
+ gf_log (this->name, GF_LOG_INFO,
+ "rename %s -> %s on %s failed (%s)",
+ local->loc.path, local->loc2.path,
+ prev->this->name, strerror (op_errno));
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto unwind;
+ }
+ /* TODO: construct proper stbuf for dir */
+ /*
+ * FIXME: is this the correct way to build stbuf and
+ * parent bufs?
+ */
+ dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
+ dht_iatt_merge (this, &local->preoldparent, preoldparent,
+ prev->this);
+ dht_iatt_merge (this, &local->postoldparent, postoldparent,
+ prev->this);
+ dht_iatt_merge (this, &local->preparent, prenewparent,
+ prev->this);
+ dht_iatt_merge (this, &local->postparent, postnewparent,
+ prev->this);
+
+ call_cnt = local->call_cnt = conf->subvolume_cnt - 1;
+
+ if (!local->call_cnt)
+ goto unwind;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == local->dst_hashed)
+ continue;
+ STACK_WIND (frame, dht_rename_dir_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->rename,
+ &local->loc, &local->loc2, NULL);
+ if (!--call_cnt)
+ break;
+ }
+
+
+ return 0;
+unwind:
+ WIPE (&local->preoldparent);
+ WIPE (&local->postoldparent);
+ WIPE (&local->preparent);
+ WIPE (&local->postparent);
+
+ DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
+ DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
+ &local->stbuf, &local->preoldparent,
+ &local->postoldparent,
+ &local->preparent, &local->postparent, NULL);
+
+ return 0;
}
-
int
dht_rename_dir_do (call_frame_t *frame, xlator_t *this)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- int i = 0;
-
- conf = this->private;
- local = frame->local;
+ dht_local_t *local = NULL;
- if (local->op_ret == -1)
- goto err;
+ local = frame->local;
- local->call_cnt = conf->subvolume_cnt;
- local->op_ret = 0;
+ if (local->op_ret == -1)
+ goto err;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- STACK_WIND (frame, dht_rename_dir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->rename,
- &local->loc, &local->loc2);
- }
+ local->op_ret = 0;
- return 0;
+ STACK_WIND (frame, dht_rename_hashed_dir_cbk,
+ local->dst_hashed,
+ local->dst_hashed->fops->rename,
+ &local->loc, &local->loc2, NULL);
+ return 0;
err:
- DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno);
- return 0;
+ DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno, NULL, NULL,
+ NULL, NULL, NULL, NULL);
+ return 0;
}
int
dht_rename_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, gf_dirent_t *entries)
+ int op_ret, int op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = -1;
- call_frame_t *prev = NULL;
+ dht_local_t *local = NULL;
+ int this_call_cnt = -1;
+ call_frame_t *prev = NULL;
- local = frame->local;
- prev = cookie;
+ local = frame->local;
+ prev = cookie;
- if (op_ret > 2) {
- gf_log (this->name, GF_LOG_TRACE,
- "readdir on %s for %s returned %d entries",
- prev->this->name, local->loc.path, op_ret);
- local->op_ret = -1;
- local->op_errno = ENOTEMPTY;
- }
+ if (op_ret > 2) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "readdir on %s for %s returned %d entries",
+ prev->this->name, local->loc.path, op_ret);
+ local->op_ret = -1;
+ local->op_errno = ENOTEMPTY;
+ }
- this_call_cnt = dht_frame_return (frame);
+ this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- dht_rename_dir_do (frame, this);
- }
+ if (is_last_call (this_call_cnt)) {
+ dht_rename_dir_do (frame, this);
+ }
- return 0;
+ return 0;
}
int
dht_rename_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, fd_t *fd)
+ int op_ret, int op_errno, fd_t *fd, dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = -1;
- call_frame_t *prev = NULL;
+ dht_local_t *local = NULL;
+ int this_call_cnt = -1;
+ call_frame_t *prev = NULL;
- local = frame->local;
- prev = cookie;
+ local = frame->local;
+ prev = cookie;
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "opendir on %s for %s failed (%s)",
- prev->this->name, local->loc.path,
- strerror (op_errno));
- goto err;
- }
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_INFO,
+ "opendir on %s for %s failed (%s)",
+ prev->this->name, local->loc.path,
+ strerror (op_errno));
+ goto err;
+ }
- STACK_WIND (frame, dht_rename_readdir_cbk,
- prev->this, prev->this->fops->readdir,
- local->fd, 4096, 0);
+ STACK_WIND (frame, dht_rename_readdir_cbk,
+ prev->this, prev->this->fops->readdir,
+ local->fd, 4096, 0, NULL);
- return 0;
+ return 0;
err:
- this_call_cnt = dht_frame_return (frame);
+ this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- dht_rename_dir_do (frame, this);
- }
+ if (is_last_call (this_call_cnt)) {
+ dht_rename_dir_do (frame, this);
+ }
- return 0;
+ return 0;
}
int
dht_rename_dir (call_frame_t *frame, xlator_t *this)
{
- dht_conf_t *conf = NULL;
- dht_local_t *local = NULL;
- int i = 0;
- int op_errno = -1;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ int i = 0;
+ int op_errno = -1;
- conf = frame->this->private;
- local = frame->local;
+ conf = frame->this->private;
+ local = frame->local;
- local->call_cnt = conf->subvolume_cnt;
+ local->call_cnt = conf->subvolume_cnt;
- local->fd = fd_create (local->loc.inode, frame->root->pid);
- if (!local->fd) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_errno = ENOMEM;
- goto err;
- }
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (!conf->subvolume_status[i]) {
+ gf_log (this->name, GF_LOG_INFO,
+ "one of the subvolumes down (%s)",
+ conf->subvolumes[i]->name);
+ op_errno = ENOTCONN;
+ goto err;
+ }
+ }
- local->op_ret = 0;
+ local->fd = fd_create (local->loc.inode, frame->root->pid);
+ if (!local->fd) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- if (!local->dst_cached) {
- dht_rename_dir_do (frame, this);
- return 0;
- }
+ local->op_ret = 0;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- STACK_WIND (frame, dht_rename_opendir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->opendir,
- &local->loc2, local->fd);
- }
+ if (!local->dst_cached) {
+ dht_rename_dir_do (frame, this);
+ return 0;
+ }
- return 0;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ STACK_WIND (frame, dht_rename_opendir_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->opendir,
+ &local->loc2, local->fd, NULL);
+ }
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL, NULL);
+ return 0;
}
+#define DHT_MARK_FOP_INTERNAL(xattr) do { \
+ int tmp = -1; \
+ if (!xattr) { \
+ xattr = dict_new (); \
+ if (!xattr) \
+ break; \
+ } \
+ tmp = dict_set_str (xattr, GLUSTERFS_INTERNAL_FOP_KEY, "yes"); \
+ if (tmp) { \
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set" \
+ " internal dict key for %s", local->loc.path); \
+ } \
+ }while (0)
+int
+dht_rename_done (call_frame_t *frame, xlator_t *this)
+{
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+ if (local->linked == _gf_true) {
+ local->linked = _gf_false;
+ dht_linkfile_attr_heal (frame, this);
+ }
+ DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
+ DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
+ &local->stbuf, &local->preoldparent,
+ &local->postoldparent, &local->preparent,
+ &local->postparent, NULL);
+
+ return 0;
+}
int
dht_rename_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- int this_call_cnt = 0;
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int this_call_cnt = 0;
+
+ local = frame->local;
+ prev = cookie;
+
+ if (!local) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "!local, should not happen");
+ goto out;
+ }
+
+ this_call_cnt = dht_frame_return (frame);
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: unlink on %s failed (%s)",
+ local->loc.path, prev->this->name, strerror (op_errno));
+ }
+
+ WIPE (&local->preoldparent);
+ WIPE (&local->postoldparent);
+ WIPE (&local->preparent);
+ WIPE (&local->postparent);
+
+ if (is_last_call (this_call_cnt)) {
+ dht_rename_done (frame, this);
+ }
+
+out:
+ return 0;
+}
- local = frame->local;
- prev = cookie;
- this_call_cnt = dht_frame_return (frame);
+int
+dht_rename_cleanup (call_frame_t *frame)
+{
+ dht_local_t *local = NULL;
+ xlator_t *this = NULL;
+ xlator_t *src_hashed = NULL;
+ xlator_t *src_cached = NULL;
+ xlator_t *dst_hashed = NULL;
+ xlator_t *dst_cached = NULL;
+ int call_cnt = 0;
+ dict_t *xattr = NULL;
+
+ local = frame->local;
+ this = frame->this;
+
+ src_hashed = local->src_hashed;
+ src_cached = local->src_cached;
+ dst_hashed = local->dst_hashed;
+ dst_cached = local->dst_cached;
+
+ if (src_cached == dst_cached)
+ goto nolinks;
+
+ if (dst_hashed != src_hashed && dst_hashed != src_cached)
+ call_cnt++;
+
+ if (src_cached != dst_hashed)
+ call_cnt++;
+
+ local->call_cnt = call_cnt;
+
+ if (!call_cnt)
+ goto nolinks;
+
+ DHT_MARK_FOP_INTERNAL (xattr);
+
+ if (dst_hashed != src_hashed && dst_hashed != src_cached) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "unlinking linkfile %s @ %s => %s",
+ local->loc.path, dst_hashed->name, src_cached->name);
+ STACK_WIND (frame, dht_rename_unlink_cbk,
+ dst_hashed, dst_hashed->fops->unlink,
+ &local->loc, 0, xattr);
+ }
+
+ if (src_cached != dst_hashed) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "unlinking link %s => %s (%s)", local->loc.path,
+ local->loc2.path, src_cached->name);
+ STACK_WIND (frame, dht_rename_unlink_cbk,
+ src_cached, src_cached->fops->unlink,
+ &local->loc2, 0, xattr);
+ }
+
+ if (xattr)
+ dict_unref (xattr);
+
+ return 0;
+
+nolinks:
+ WIPE (&local->preoldparent);
+ WIPE (&local->postoldparent);
+ WIPE (&local->preparent);
+ WIPE (&local->postparent);
+
+ DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
+ DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
+ &local->stbuf, &local->preoldparent,
+ &local->postoldparent, &local->preparent,
+ &local->postparent, NULL);
+
+ return 0;
+}
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "unlink on %s failed (%s)",
- prev->this->name, strerror (op_errno));
- }
- if (is_last_call (this_call_cnt))
- DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->stbuf);
+int
+dht_rename_links_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ call_frame_t *prev = NULL;
+ dht_local_t *local = NULL;
- return 0;
+ prev = cookie;
+ local = frame->local;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "link/file %s on %s failed (%s)",
+ local->loc.path, prev->this->name, strerror (op_errno));
+ }
+
+ if (local->linked == _gf_true) {
+ local->linked = _gf_false;
+ dht_linkfile_attr_heal (frame, this);
+ }
+ DHT_STACK_DESTROY (frame);
+
+ return 0;
}
int
dht_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *stbuf)
+ int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- xlator_t *src_hashed = NULL;
- xlator_t *src_cached = NULL;
- xlator_t *dst_hashed = NULL;
- xlator_t *dst_cached = NULL;
- xlator_t *rename_subvol = NULL;
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ xlator_t *src_hashed = NULL;
+ xlator_t *src_cached = NULL;
+ xlator_t *dst_hashed = NULL;
+ xlator_t *dst_cached = NULL;
+ xlator_t *rename_subvol = NULL;
+ call_frame_t *link_frame = NULL;
+ dht_local_t *link_local = NULL;
+ dict_t *xattr = NULL;
+
+ local = frame->local;
+ prev = cookie;
+
+ src_hashed = local->src_hashed;
+ src_cached = local->src_cached;
+ dst_hashed = local->dst_hashed;
+ dst_cached = local->dst_cached;
+
+ if (local->linked == _gf_true)
+ FRAME_SU_UNDO (frame, dht_local_t);
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: rename on %s failed (%s)", local->loc.path,
+ prev->this->name, strerror (op_errno));
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto cleanup;
+ }
+
+ if ((src_cached == dst_cached) && (dst_hashed != dst_cached)) {
+ link_frame = copy_frame (frame);
+ if (!link_frame) {
+ goto err;
+ }
+
+ /* fop value sent as maxvalue because it is not used
+ anywhere in this case */
+ link_local = dht_local_init (link_frame, &local->loc2, NULL,
+ GF_FOP_MAXVALUE);
+ if (!link_local) {
+ goto err;
+ }
+
+ if (link_local->loc.inode)
+ inode_unref (link_local->loc.inode);
+ link_local->loc.inode = inode_ref (local->loc.inode);
+ uuid_copy (link_local->gfid, local->loc.inode->gfid);
+
+ dht_linkfile_create (link_frame, dht_rename_links_create_cbk,
+ this, src_cached, dst_hashed,
+ &link_local->loc);
+ }
- local = frame->local;
- prev = cookie;
-
- src_hashed = local->src_hashed;
- src_cached = local->src_cached;
- dst_hashed = local->dst_hashed;
- dst_cached = local->dst_cached;
+err:
+ /* Merge attrs only from src_cached. In case there of src_cached !=
+ * dst_hashed, this ignores linkfile attrs. */
+ if (prev->this == src_cached) {
+ dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
+ dht_iatt_merge (this, &local->preoldparent, preoldparent,
+ prev->this);
+ dht_iatt_merge (this, &local->postoldparent, postoldparent,
+ prev->this);
+ dht_iatt_merge (this, &local->preparent, prenewparent,
+ prev->this);
+ dht_iatt_merge (this, &local->postparent, postnewparent,
+ prev->this);
+ }
+
+
+ /* NOTE: rename_subvol is the same subvolume from which dht_rename_cbk
+ * is called. since rename has already happened on rename_subvol,
+ * unlink should not be sent for oldpath (either linkfile or cached-file)
+ * on rename_subvol. */
+ if (src_cached == dst_cached)
+ rename_subvol = src_cached;
+ else
+ rename_subvol = dst_hashed;
+
+ /* TODO: delete files in background */
+
+ if (src_cached != dst_hashed && src_cached != dst_cached)
+ local->call_cnt++;
+
+ if (src_hashed != rename_subvol && src_hashed != src_cached)
+ local->call_cnt++;
+
+ if (dst_cached && dst_cached != dst_hashed && dst_cached != src_cached)
+ local->call_cnt++;
+
+ if (local->call_cnt == 0)
+ goto unwind;
+
+ DHT_MARK_FOP_INTERNAL (xattr);
+
+ if (src_cached != dst_hashed && src_cached != dst_cached) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "deleting old src datafile %s @ %s",
+ local->loc.path, src_cached->name);
+
+ STACK_WIND (frame, dht_rename_unlink_cbk,
+ src_cached, src_cached->fops->unlink,
+ &local->loc, 0, xattr);
+ }
+
+ if (src_hashed != rename_subvol && src_hashed != src_cached) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "deleting old src linkfile %s @ %s",
+ local->loc.path, src_hashed->name);
+
+ STACK_WIND (frame, dht_rename_unlink_cbk,
+ src_hashed, src_hashed->fops->unlink,
+ &local->loc, 0, xattr);
+ }
+
+ if (dst_cached
+ && (dst_cached != dst_hashed)
+ && (dst_cached != src_cached)) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "deleting old dst datafile %s @ %s",
+ local->loc2.path, dst_cached->name);
+
+ STACK_WIND (frame, dht_rename_unlink_cbk,
+ dst_cached, dst_cached->fops->unlink,
+ &local->loc2, 0, xattr);
+ }
+ if (xattr)
+ dict_unref (xattr);
+ return 0;
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "rename on %s failed (%s)", prev->this->name,
- strerror (op_errno));
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- goto unwind;
- }
-
- /* NOTE: rename_subvol is the same subvolume from which dht_rename_cbk
- * is called. since rename has already happened on rename_subvol,
- * unlink should not be sent for oldpath (either linkfile or cached-file)
- * on rename_subvol. */
- if (src_cached == dst_cached)
- rename_subvol = src_cached;
- else
- rename_subvol = dst_hashed;
+unwind:
+ WIPE (&local->preoldparent);
+ WIPE (&local->postoldparent);
+ WIPE (&local->preparent);
+ WIPE (&local->postparent);
+ if (xattr)
+ dict_unref (xattr);
- /* TODO: delete files in background */
+ dht_rename_done (frame, this);
- if (src_cached != dst_hashed && src_cached != dst_cached)
- local->call_cnt++;
+ return 0;
- if (src_hashed != rename_subvol && src_hashed != src_cached)
- local->call_cnt++;
+cleanup:
+ if (xattr)
+ dict_unref (xattr);
+ dht_rename_cleanup (frame);
- if (dst_cached && dst_cached != dst_hashed && dst_cached != src_cached)
- local->call_cnt++;
+ return 0;
+}
- if (local->call_cnt == 0)
- goto unwind;
- if (src_cached != dst_hashed && src_cached != dst_cached) {
- gf_log (this->name, GF_LOG_TRACE,
- "deleting old src datafile %s @ %s",
- local->loc.path, src_cached->name);
+int
+dht_do_rename (call_frame_t *frame)
+{
+ dht_local_t *local = NULL;
+ xlator_t *dst_hashed = NULL;
+ xlator_t *src_cached = NULL;
+ xlator_t *dst_cached = NULL;
+ xlator_t *this = NULL;
+ xlator_t *rename_subvol = NULL;
- STACK_WIND (frame, dht_rename_unlink_cbk,
- src_cached, src_cached->fops->unlink,
- &local->loc);
- }
- if (src_hashed != rename_subvol && src_hashed != src_cached) {
- gf_log (this->name, GF_LOG_TRACE,
- "deleting old src linkfile %s @ %s",
- local->loc.path, src_hashed->name);
+ local = frame->local;
+ this = frame->this;
- STACK_WIND (frame, dht_rename_unlink_cbk,
- src_hashed, src_hashed->fops->unlink,
- &local->loc);
- }
+ dst_hashed = local->dst_hashed;
+ dst_cached = local->dst_cached;
+ src_cached = local->src_cached;
- if (dst_cached
- && (dst_cached != dst_hashed)
- && (dst_cached != src_cached)) {
- gf_log (this->name, GF_LOG_TRACE,
- "deleting old dst datafile %s @ %s",
- local->loc2.path, dst_cached->name);
+ if (src_cached == dst_cached)
+ rename_subvol = src_cached;
+ else
+ rename_subvol = dst_hashed;
- STACK_WIND (frame, dht_rename_unlink_cbk,
- dst_cached, dst_cached->fops->unlink,
- &local->loc2);
- }
- return 0;
+ gf_log (this->name, GF_LOG_TRACE,
+ "renaming %s => %s (%s)",
+ local->loc.path, local->loc2.path, rename_subvol->name);
-unwind:
- DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->stbuf);
+ if (local->linked == _gf_true)
+ FRAME_SU_DO (frame, dht_local_t);
+ STACK_WIND (frame, dht_rename_cbk,
+ rename_subvol, rename_subvol->fops->rename,
+ &local->loc, &local->loc2, NULL);
- return 0;
+ return 0;
}
int
-dht_do_rename (call_frame_t *frame)
+dht_rename_links_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- xlator_t *dst_hashed = NULL;
- xlator_t *src_cached = NULL;
- xlator_t *dst_cached = NULL;
- xlator_t *this = NULL;
- xlator_t *rename_subvol = NULL;
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int this_call_cnt = 0;
- local = frame->local;
- this = frame->this;
+ local = frame->local;
+ prev = cookie;
- dst_hashed = local->dst_hashed;
- dst_cached = local->dst_cached;
- src_cached = local->src_cached;
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "link/file on %s failed (%s)",
+ prev->this->name, strerror (op_errno));
+ local->op_ret = -1;
+ if (op_errno != ENOENT)
+ local->op_errno = op_errno;
+ } else if (local->src_cached == prev->this) {
+ /* merge of attr returned only from linkfile creation */
+ dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
+ }
- if (src_cached == dst_cached)
- rename_subvol = src_cached;
- else
- rename_subvol = dst_hashed;
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ if (local->op_ret == -1)
+ goto cleanup;
- gf_log (this->name, GF_LOG_TRACE,
- "renaming %s => %s (%s)",
- local->loc.path, local->loc2.path, rename_subvol->name);
+ dht_do_rename (frame);
+ }
- STACK_WIND (frame, dht_rename_cbk,
- rename_subvol, rename_subvol->fops->rename,
- &local->loc, &local->loc2);
+ return 0;
- return 0;
+cleanup:
+ dht_rename_cleanup (frame);
+
+ return 0;
}
int
-dht_rename_links_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct stat *stbuf)
+dht_rename_unlink_links_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
dht_local_t *local = NULL;
call_frame_t *prev = NULL;
- int this_call_cnt = 0;
local = frame->local;
prev = cookie;
-
- if (op_ret == -1) {
+
+ if ((op_ret == -1) && (op_errno != ENOENT)) {
gf_log (this->name, GF_LOG_DEBUG,
- "link/file on %s failed (%s)",
- prev->this->name, strerror (op_errno));
+ "unlink of %s on %s failed (%s)",
+ local->loc2.path, prev->this->name,
+ strerror (op_errno));
local->op_ret = -1;
local->op_errno = op_errno;
}
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- if (local->op_ret == -1)
- goto unwind;
-
- dht_do_rename (frame);
- }
+ if (local->op_ret == -1)
+ goto cleanup;
+
+ dht_do_rename (frame);
return 0;
-unwind:
- DHT_STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->stbuf);
+cleanup:
+ dht_rename_cleanup (frame);
return 0;
}
@@ -408,39 +761,54 @@ unwind:
int
dht_rename_create_links (call_frame_t *frame)
{
- dht_local_t *local = NULL;
- xlator_t *this = NULL;
- xlator_t *src_hashed = NULL;
- xlator_t *src_cached = NULL;
- xlator_t *dst_hashed = NULL;
- xlator_t *dst_cached = NULL;
- int call_cnt = 0;
+ dht_local_t *local = NULL;
+ xlator_t *this = NULL;
+ xlator_t *src_hashed = NULL;
+ xlator_t *src_cached = NULL;
+ xlator_t *dst_hashed = NULL;
+ xlator_t *dst_cached = NULL;
+ int call_cnt = 0;
+ dict_t *xattr = NULL;
- local = frame->local;
- this = frame->this;
+ local = frame->local;
+ this = frame->this;
- src_hashed = local->src_hashed;
- src_cached = local->src_cached;
- dst_hashed = local->dst_hashed;
- dst_cached = local->dst_cached;
+ src_hashed = local->src_hashed;
+ src_cached = local->src_cached;
+ dst_hashed = local->dst_hashed;
+ dst_cached = local->dst_cached;
- if (src_cached == dst_cached)
- goto nolinks;
+ DHT_MARK_FOP_INTERNAL (xattr);
- if (dst_hashed != src_hashed && dst_hashed != src_cached)
- call_cnt++;
+ if (src_cached == dst_cached) {
+ if (dst_hashed == dst_cached)
+ goto nolinks;
- if (src_cached != dst_hashed)
- call_cnt++;
+ gf_log (this->name, GF_LOG_TRACE,
+ "unlinking dst linkfile %s @ %s",
+ local->loc2.path, dst_hashed->name);
- local->call_cnt = call_cnt;
+ STACK_WIND (frame, dht_rename_unlink_links_cbk,
+ dst_hashed, dst_hashed->fops->unlink,
+ &local->loc2, 0, xattr);
+ return 0;
+ }
- if (dst_hashed != src_hashed && dst_hashed != src_cached) {
- gf_log (this->name, GF_LOG_TRACE,
- "linkfile %s @ %s => %s",
- local->loc.path, dst_hashed->name, src_cached->name);
- dht_linkfile_create (frame, dht_rename_links_cbk,
+ if (dst_hashed != src_hashed && dst_hashed != src_cached)
+ call_cnt++;
+
+ if (src_cached != dst_hashed)
+ call_cnt++;
+
+ local->call_cnt = call_cnt;
+
+ if (dst_hashed != src_hashed && dst_hashed != src_cached) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "linkfile %s @ %s => %s",
+ local->loc.path, dst_hashed->name, src_cached->name);
+ memcpy (local->gfid, local->loc.inode->gfid, 16);
+ dht_linkfile_create (frame, dht_rename_links_cbk, this,
src_cached, dst_hashed, &local->loc);
}
@@ -450,113 +818,107 @@ dht_rename_create_links (call_frame_t *frame)
local->loc2.path, src_cached->name);
STACK_WIND (frame, dht_rename_links_cbk,
src_cached, src_cached->fops->link,
- &local->loc, &local->loc2);
+ &local->loc, &local->loc2, xattr);
}
nolinks:
- if (!call_cnt) {
- /* skip to next step */
- dht_do_rename (frame);
- }
-
- return 0;
+ if (!call_cnt) {
+ /* skip to next step */
+ dht_do_rename (frame);
+ }
+ if (xattr)
+ dict_unref (xattr);
+
+ return 0;
}
int
dht_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc)
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
- xlator_t *src_cached = NULL;
- xlator_t *src_hashed = NULL;
- xlator_t *dst_cached = NULL;
- xlator_t *dst_hashed = NULL;
- int op_errno = -1;
- int ret = -1;
- dht_local_t *local = NULL;
-
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (oldloc, err);
- VALIDATE_OR_GOTO (newloc, err);
-
- src_hashed = dht_subvol_get_hashed (this, oldloc);
- if (!src_hashed) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s",
- oldloc->path);
- op_errno = EINVAL;
- goto err;
- }
-
- src_cached = dht_subvol_get_cached (this, oldloc->inode);
- if (!src_cached) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", oldloc->path);
- op_errno = EINVAL;
- goto err;
- }
-
- dst_hashed = dht_subvol_get_hashed (this, newloc);
- if (!dst_hashed) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s",
- newloc->path);
- op_errno = EINVAL;
- goto err;
- }
-
- if (newloc->inode)
- dst_cached = dht_subvol_get_cached (this, newloc->inode);
-
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- ret = loc_copy (&local->loc, oldloc);
- if (ret == -1) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- ret = loc_copy (&local->loc2, newloc);
- if (ret == -1) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- local->src_hashed = src_hashed;
- local->src_cached = src_cached;
- local->dst_hashed = dst_hashed;
- local->dst_cached = dst_cached;
-
- gf_log (this->name, GF_LOG_TRACE,
- "renaming %s (hash=%s/cache=%s) => %s (hash=%s/cache=%s)",
- oldloc->path, src_hashed->name, src_cached->name,
- newloc->path, dst_hashed->name,
- dst_cached ? dst_cached->name : "<nul>");
-
- if (S_ISDIR (oldloc->inode->st_mode)) {
- dht_rename_dir (frame, this);
- } else {
- local->op_ret = 0;
- dht_rename_create_links (frame);
- }
-
- return 0;
+ xlator_t *src_cached = NULL;
+ xlator_t *src_hashed = NULL;
+ xlator_t *dst_cached = NULL;
+ xlator_t *dst_hashed = NULL;
+ int op_errno = -1;
+ int ret = -1;
+ dht_local_t *local = NULL;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (oldloc, err);
+ VALIDATE_OR_GOTO (newloc, err);
+
+ src_hashed = dht_subvol_get_hashed (this, oldloc);
+ if (!src_hashed) {
+ gf_log (this->name, GF_LOG_INFO,
+ "no subvolume in layout for path=%s",
+ oldloc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ src_cached = dht_subvol_get_cached (this, oldloc->inode);
+ if (!src_cached) {
+ gf_log (this->name, GF_LOG_INFO,
+ "no cached subvolume for path=%s", oldloc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ dst_hashed = dht_subvol_get_hashed (this, newloc);
+ if (!dst_hashed) {
+ gf_log (this->name, GF_LOG_INFO,
+ "no subvolume in layout for path=%s",
+ newloc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (newloc->inode)
+ dst_cached = dht_subvol_get_cached (this, newloc->inode);
+
+ local = dht_local_init (frame, oldloc, NULL, GF_FOP_RENAME);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ /* cached_subvol will be set from dht_local_init, reset it to NULL,
+ as the logic of handling rename is different */
+ local->cached_subvol = NULL;
+
+ ret = loc_copy (&local->loc2, newloc);
+ if (ret == -1) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->src_hashed = src_hashed;
+ local->src_cached = src_cached;
+ local->dst_hashed = dst_hashed;
+ local->dst_cached = dst_cached;
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "renaming %s (hash=%s/cache=%s) => %s (hash=%s/cache=%s)",
+ oldloc->path, src_hashed->name, src_cached->name,
+ newloc->path, dst_hashed->name,
+ dst_cached ? dst_cached->name : "<nul>");
+
+ if (IA_ISDIR (oldloc->inode->ia_type)) {
+ dht_rename_dir (frame, this);
+ } else {
+ local->op_ret = 0;
+ dht_rename_create_links (frame);
+ }
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL, NULL);
- return 0;
+ return 0;
}
diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c
index df8b2047b..3fe96b1c7 100644
--- a/xlators/cluster/dht/src/dht-selfheal.c
+++ b/xlators/cluster/dht/src/dht-selfheal.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
@@ -26,459 +17,1013 @@
#include "glusterfs.h"
#include "xlator.h"
#include "dht-common.h"
+#include "glusterfs-acl.h"
+
+#define DHT_SET_LAYOUT_RANGE(layout,i,srt,chunk,cnt,path) do { \
+ layout->list[i].start = srt; \
+ layout->list[i].stop = srt + chunk - 1; \
+ \
+ gf_log (this->name, GF_LOG_TRACE, \
+ "gave fix: %u - %u on %s for %s", \
+ layout->list[i].start, layout->list[i].stop, \
+ layout->list[i].xlator->name, path); \
+ } while (0)
+
+#define DHT_RESET_LAYOUT_RANGE(layout) do { \
+ int cnt = 0; \
+ for (cnt = 0; cnt < layout->cnt; cnt++ ) { \
+ layout->list[cnt].start = 0; \
+ layout->list[cnt].stop = 0; \
+ } \
+ } while (0)
+
+static uint32_t
+dht_overlap_calc (dht_layout_t *old, int o, dht_layout_t *new, int n)
+{
+ if (o >= old->cnt || n >= new->cnt)
+ return 0;
+
+ if (old->list[o].err > 0 || new->list[n].err > 0)
+ return 0;
+
+ if (old->list[o].start == old->list[o].stop) {
+ return 0;
+ }
+
+ if (new->list[n].start == new->list[n].stop) {
+ return 0;
+ }
+
+ if ((old->list[o].start > new->list[n].stop) ||
+ (old->list[o].stop < new->list[n].start))
+ return 0;
+
+ return min (old->list[o].stop, new->list[n].stop) -
+ max (old->list[o].start, new->list[n].start) + 1;
+}
int
dht_selfheal_dir_finish (call_frame_t *frame, xlator_t *this, int ret)
{
- dht_local_t *local = NULL;
-
+ dht_local_t *local = NULL;
- local = frame->local;
- local->selfheal.dir_cbk (frame, NULL, frame->this, ret,
- local->op_errno);
+ local = frame->local;
+ local->selfheal.dir_cbk (frame, NULL, frame->this, ret,
+ local->op_errno, NULL);
- return 0;
+ return 0;
}
int
dht_selfheal_dir_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno)
+ int op_ret, int op_errno, dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- xlator_t *subvol = NULL;
- int i = 0;
- dht_layout_t *layout = NULL;
- int err = 0;
- int this_call_cnt = 0;
-
- local = frame->local;
- layout = local->selfheal.layout;
- prev = cookie;
- subvol = prev->this;
-
- if (op_ret == 0)
- err = 0;
- else
- err = op_errno;
-
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].xlator == subvol) {
- layout->list[i].err = err;
- break;
- }
- }
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ xlator_t *subvol = NULL;
+ int i = 0;
+ dht_layout_t *layout = NULL;
+ int err = 0;
+ int this_call_cnt = 0;
+
+ local = frame->local;
+ layout = local->selfheal.layout;
+ prev = cookie;
+ subvol = prev->this;
+
+ if (op_ret == 0)
+ err = 0;
+ else
+ err = op_errno;
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].xlator == subvol) {
+ layout->list[i].err = err;
+ break;
+ }
+ }
- this_call_cnt = dht_frame_return (frame);
+ this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- dht_selfheal_dir_finish (frame, this, 0);
- }
+ if (is_last_call (this_call_cnt)) {
+ dht_selfheal_dir_finish (frame, this, 0);
+ }
- return 0;
+ return 0;
}
int
dht_selfheal_dir_xattr_persubvol (call_frame_t *frame, loc_t *loc,
- dht_layout_t *layout, int i)
+ dht_layout_t *layout, int i,
+ xlator_t *req_subvol)
{
- xlator_t *subvol = NULL;
- dict_t *xattr = NULL;
- int ret = 0;
- xlator_t *this = NULL;
- int32_t *disk_layout = NULL;
-
-
- subvol = layout->list[i].xlator;
- this = frame->this;
+ xlator_t *subvol = NULL;
+ dict_t *xattr = NULL;
+ int ret = 0;
+ xlator_t *this = NULL;
+ int32_t *disk_layout = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+
+ local = frame->local;
+ if (req_subvol)
+ subvol = req_subvol;
+ else
+ subvol = layout->list[i].xlator;
+ this = frame->this;
+
+ GF_VALIDATE_OR_GOTO ("", this, err);
+ GF_VALIDATE_OR_GOTO (this->name, layout, err);
+ GF_VALIDATE_OR_GOTO (this->name, local, err);
+ GF_VALIDATE_OR_GOTO (this->name, subvol, err);
+ VALIDATE_OR_GOTO (this->private, err);
+
+ conf = this->private;
+
+ xattr = get_new_dict ();
+ if (!xattr) {
+ goto err;
+ }
- xattr = get_new_dict ();
- if (!xattr) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ ret = dht_disk_layout_extract (this, layout, i, &disk_layout);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: (subvol %s) failed to extract disk layout",
+ loc->path, subvol->name);
+ goto err;
+ }
- ret = dht_disk_layout_extract (this, layout, i, &disk_layout);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to extract disk layout");
- goto err;
- }
+ ret = dict_set_bin (xattr, conf->xattr_name, disk_layout, 4 * 4);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: (subvol %s) failed to set xattr dictionary",
+ loc->path, subvol->name);
+ goto err;
+ }
+ disk_layout = NULL;
- ret = dict_set_bin (xattr, "trusted.glusterfs.dht",
- disk_layout, 4 * 4);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to set xattr dictionary");
- goto err;
- }
- disk_layout = NULL;
+ gf_log (this->name, GF_LOG_TRACE,
+ "setting hash range %u - %u (type %d) on subvolume %s for %s",
+ layout->list[i].start, layout->list[i].stop,
+ layout->type, subvol->name, loc->path);
- gf_log (this->name, GF_LOG_TRACE,
- "setting hash range %u - %u (type %d) on subvolume %s for %s",
- layout->list[i].start, layout->list[i].stop,
- layout->type, subvol->name, loc->path);
+ dict_ref (xattr);
- dict_ref (xattr);
+ if (!uuid_is_null (local->gfid))
+ uuid_copy (loc->gfid, local->gfid);
- STACK_WIND (frame, dht_selfheal_dir_xattr_cbk,
- subvol, subvol->fops->setxattr,
- loc, xattr, 0);
+ STACK_WIND (frame, dht_selfheal_dir_xattr_cbk,
+ subvol, subvol->fops->setxattr,
+ loc, xattr, 0, NULL);
- dict_unref (xattr);
+ dict_unref (xattr);
- return 0;
+ return 0;
err:
- if (xattr)
- dict_destroy (xattr);
+ if (xattr)
+ dict_destroy (xattr);
- if (disk_layout)
- FREE (disk_layout);
+ GF_FREE (disk_layout);
- dht_selfheal_dir_xattr_cbk (frame, subvol, frame->this,
- -1, ENOMEM);
- return 0;
+ dht_selfheal_dir_xattr_cbk (frame, subvol, frame->this,
+ -1, ENOMEM, NULL);
+ return 0;
}
+int
+dht_fix_dir_xattr (call_frame_t *frame, loc_t *loc, dht_layout_t *layout)
+{
+ dht_local_t *local = NULL;
+ int i = 0;
+ int count = 0;
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ dht_layout_t *dummy = NULL;
+
+ local = frame->local;
+ this = frame->this;
+ conf = this->private;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "writing the new range for all subvolumes");
+
+ local->call_cnt = count = conf->subvolume_cnt;
+
+ for (i = 0; i < layout->cnt; i++) {
+ dht_selfheal_dir_xattr_persubvol (frame, loc, layout, i, NULL);
+
+ if (--count == 0)
+ goto out;
+ }
+ /* if we are here, subvolcount > layout_count. subvols-per-directory
+ * option might be set here. We need to clear out layout from the
+ * non-participating subvolumes, else it will result in overlaps */
+ dummy = dht_layout_new (this, 1);
+ if (!dummy)
+ goto out;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (_gf_false ==
+ dht_is_subvol_in_layout (layout, conf->subvolumes[i])) {
+ dht_selfheal_dir_xattr_persubvol (frame, loc, dummy, 0,
+ conf->subvolumes[i]);
+ if (--count == 0)
+ break;
+ }
+ }
+
+ dht_layout_unref (this, dummy);
+out:
+ return 0;
+}
int
dht_selfheal_dir_xattr (call_frame_t *frame, loc_t *loc, dht_layout_t *layout)
{
- dht_local_t *local = NULL;
- int missing_xattr = 0;
- int i = 0;
- int ret = 0;
- xlator_t *this = NULL;
-
- local = frame->local;
- this = frame->this;
-
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].err != -1 || !layout->list[i].stop) {
- /* err != -1 would mean xattr present on the directory
- * or the directory is itself non existant.
- * !layout->list[i].stop would mean layout absent
- */
- continue;
- }
- missing_xattr++;
- }
+ dht_local_t *local = NULL;
+ int missing_xattr = 0;
+ int i = 0;
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ dht_layout_t *dummy = NULL;
+
+ local = frame->local;
+ this = frame->this;
+ conf = this->private;
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err != -1 || !layout->list[i].stop) {
+ /* err != -1 would mean xattr present on the directory
+ * or the directory is non existent.
+ * !layout->list[i].stop would mean layout absent
+ */
+
+ continue;
+ }
+ missing_xattr++;
+ }
- gf_log (this->name, GF_LOG_TRACE,
- "%d subvolumes missing xattr for %s",
- missing_xattr, loc->path);
+ gf_log (this->name, GF_LOG_TRACE,
+ "%d subvolumes missing xattr for %s",
+ missing_xattr, loc->path);
- if (missing_xattr == 0) {
- dht_selfheal_dir_finish (frame, this, 0);
- return 0;
- }
+ if (missing_xattr == 0) {
+ dht_selfheal_dir_finish (frame, this, 0);
+ return 0;
+ }
- local->call_cnt = missing_xattr;
+ local->call_cnt = missing_xattr;
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].err != -1 || !layout->list[i].stop)
- continue;
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err != -1 || !layout->list[i].stop)
+ continue;
- ret = dht_selfheal_dir_xattr_persubvol (frame, loc, layout, i);
+ dht_selfheal_dir_xattr_persubvol (frame, loc, layout, i, NULL);
- if (--missing_xattr == 0)
- break;
- }
- return 0;
+ if (--missing_xattr == 0)
+ break;
+ }
+ dummy = dht_layout_new (this, 1);
+ if (!dummy)
+ goto out;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (_gf_false ==
+ dht_is_subvol_in_layout (layout, conf->subvolumes[i])) {
+ dht_selfheal_dir_xattr_persubvol (frame, loc, dummy, 0,
+ conf->subvolumes[i]);
+ }
+ }
+ dht_layout_unref (this, dummy);
+out:
+ return 0;
+}
+
+int
+dht_selfheal_dir_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int this_call_cnt = 0;
+
+ local = frame->local;
+ layout = local->selfheal.layout;
+
+ this_call_cnt = dht_frame_return (frame);
+
+ if (is_last_call (this_call_cnt)) {
+ dht_selfheal_dir_xattr (frame, &local->loc, layout);
+ }
+
+ return 0;
}
int
+dht_selfheal_dir_setattr (call_frame_t *frame, loc_t *loc, struct iatt *stbuf,
+ int32_t valid, dht_layout_t *layout)
+{
+ int missing_attr = 0;
+ int i = 0;
+ dht_local_t *local = NULL;
+ xlator_t *this = NULL;
+
+ local = frame->local;
+ this = frame->this;
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err == -1)
+ missing_attr++;
+ }
+
+ if (missing_attr == 0) {
+ dht_selfheal_dir_xattr (frame, loc, layout);
+ return 0;
+ }
+
+ if (!uuid_is_null (local->gfid))
+ uuid_copy (loc->gfid, local->gfid);
+
+ local->call_cnt = missing_attr;
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err == -1) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "setattr for %s on subvol %s",
+ loc->path, layout->list[i].xlator->name);
+
+ STACK_WIND (frame, dht_selfheal_dir_setattr_cbk,
+ layout->list[i].xlator,
+ layout->list[i].xlator->fops->setattr,
+ loc, stbuf, valid, NULL);
+ }
+ }
+
+ return 0;
+}
+
+int
dht_selfheal_dir_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct stat *stbuf)
+ int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- dht_layout_t *layout = NULL;
- call_frame_t *prev = NULL;
- xlator_t *subvol = NULL;
- int i = 0;
- int this_call_cnt = 0;
-
-
- local = frame->local;
- layout = local->selfheal.layout;
- prev = cookie;
- subvol = prev->this;
-
- if ((op_ret == 0) || (op_errno == EEXIST)) {
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].xlator == subvol) {
- layout->list[i].err = -1;
- break;
- }
- }
- }
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ call_frame_t *prev = NULL;
+ xlator_t *subvol = NULL;
+ int i = 0;
+ int this_call_cnt = 0;
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- dht_selfheal_dir_xattr (frame, &local->loc, layout);
- }
+ local = frame->local;
+ layout = local->selfheal.layout;
+ prev = cookie;
+ subvol = prev->this;
- return 0;
+ if ((op_ret == 0) || ((op_ret == -1) && (op_errno == EEXIST))) {
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].xlator == subvol) {
+ layout->list[i].err = -1;
+ break;
+ }
+ }
+ }
+
+ if (op_ret) {
+ gf_log (this->name, ((op_errno == EEXIST) ? GF_LOG_DEBUG :
+ GF_LOG_WARNING),
+ "selfhealing directory %s failed: %s",
+ local->loc.path, strerror (op_errno));
+ goto out;
+ }
+
+ dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
+ dht_iatt_merge (this, &local->preparent, preparent, prev->this);
+ dht_iatt_merge (this, &local->postparent, postparent, prev->this);
+
+out:
+ this_call_cnt = dht_frame_return (frame);
+
+ if (is_last_call (this_call_cnt)) {
+ dht_selfheal_dir_setattr (frame, &local->loc, &local->stbuf, 0xffffff, layout);
+ }
+
+ return 0;
}
+void
+dht_selfheal_dir_mkdir_setacl (dict_t *xattr, dict_t *dict)
+{
+ data_t *acl_default = NULL;
+ data_t *acl_access = NULL;
+ xlator_t *this = NULL;
+ int ret = -1;
+
+ GF_ASSERT (xattr);
+ GF_ASSERT (dict);
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ acl_default = dict_get (xattr, POSIX_ACL_DEFAULT_XATTR);
+
+ if (!acl_default) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "ACL_DEFAULT xattr not present");
+ goto cont;
+ }
+ ret = dict_set (dict, POSIX_ACL_DEFAULT_XATTR, acl_default);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "Could not set ACL_DEFAULT xattr");
+cont:
+ acl_access = dict_get (xattr, POSIX_ACL_ACCESS_XATTR);
+ if (!acl_access) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "ACL_ACCESS xattr not present");
+ goto out;
+ }
+ ret = dict_set (dict, POSIX_ACL_ACCESS_XATTR, acl_access);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "Could not set ACL_ACCESS xattr");
+
+out:
+ return;
+}
int
dht_selfheal_dir_mkdir (call_frame_t *frame, loc_t *loc,
- dht_layout_t *layout, int force)
+ dht_layout_t *layout, int force)
{
- int missing_dirs = 0;
- int i = 0;
- dht_local_t *local = NULL;
- xlator_t *this = NULL;
+ int missing_dirs = 0;
+ int i = 0;
+ int ret = -1;
+ dht_local_t *local = NULL;
+ xlator_t *this = NULL;
+ dict_t *dict = NULL;
+
+ local = frame->local;
+ this = frame->this;
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err == ENOENT || force)
+ missing_dirs++;
+ }
+ if (missing_dirs == 0) {
+ dht_selfheal_dir_setattr (frame, loc, &local->stbuf, 0xffffffff, layout);
+ return 0;
+ }
- local = frame->local;
- this = frame->this;
+ local->call_cnt = missing_dirs;
+ if (!uuid_is_null (local->gfid)) {
+ dict = dict_new ();
+ if (!dict)
+ return -1;
+
+ ret = dict_set_static_bin (dict, "gfid-req", local->gfid, 16);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to set gfid in dict", loc->path);
+ } else if (local->params) {
+ /* Send the dictionary from higher layers directly */
+ dict = dict_ref (local->params);
+ }
+ /* Set acls */
+ if (local->xattr && dict)
+ dht_selfheal_dir_mkdir_setacl (local->xattr, dict);
+
+ if (!dict)
+ gf_log (this->name, GF_LOG_WARNING,
+ "dict is NULL, need to make sure gfids are same");
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err == ENOENT || force) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "creating directory %s on subvol %s",
+ loc->path, layout->list[i].xlator->name);
+
+ STACK_WIND (frame, dht_selfheal_dir_mkdir_cbk,
+ layout->list[i].xlator,
+ layout->list[i].xlator->fops->mkdir,
+ loc,
+ st_mode_from_ia (local->stbuf.ia_prot,
+ local->stbuf.ia_type),
+ 0, dict);
+ }
+ }
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].err == ENOENT || force)
- missing_dirs++;
- }
+ if (dict)
+ dict_unref (dict);
- if (missing_dirs == 0) {
- dht_selfheal_dir_xattr (frame, loc, layout);
- return 0;
- }
+ return 0;
+}
- local->call_cnt = missing_dirs;
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].err == ENOENT || force) {
- gf_log (this->name, GF_LOG_TRACE,
- "creating directory %s on subvol %s",
- loc->path, layout->list[i].xlator->name);
-
- STACK_WIND (frame, dht_selfheal_dir_mkdir_cbk,
- layout->list[i].xlator,
- layout->list[i].xlator->fops->mkdir,
- loc, local->stbuf.st_mode);
- }
- }
- return 0;
+int
+dht_selfheal_layout_alloc_start (xlator_t *this, loc_t *loc,
+ dht_layout_t *layout)
+{
+ int start = 0;
+ uint32_t hashval = 0;
+ int ret = 0;
+
+ ret = dht_hash_compute (this, layout->type, loc->path, &hashval);
+ if (ret == 0) {
+ start = (hashval % layout->cnt);
+ }
+
+ return start;
}
-void
-dht_selfheal_layout_new_directory (call_frame_t *frame, loc_t *loc,
- dht_layout_t *layout)
+static inline int
+dht_get_layout_count (xlator_t *this, dht_layout_t *layout, int new_layout)
{
- dht_conf_t *conf = NULL;
- xlator_t *this = NULL;
- uint32_t chunk = 0;
- int i = 0;
- uint32_t start = 0;
- int cnt = 0;
- int err = 0;
-
- this = frame->this;
- conf = this->private;
-
- for (i = 0; i < layout->cnt; i++) {
- err = layout->list[i].err;
- if (err == -1 || err == 0) {
- layout->list[i].err = -1;
- cnt++;
- }
- }
+ int i = 0;
+ int j = 0;
+ int err = 0;
+ int count = 0;
+ dht_conf_t *conf = NULL;
+
+ /* Gets in use only for replace-brick, remove-brick */
+ conf = this->private;
+ for (i = 0; i < layout->cnt; i++) {
+ for (j = 0; j < conf->subvolume_cnt; j++) {
+ if (conf->decommissioned_bricks[j] &&
+ conf->decommissioned_bricks[j] == layout->list[i].xlator) {
+ layout->list[i].err = EINVAL;
+ break;
+ }
+ }
+ }
+
+ for (i = 0; i < layout->cnt; i++) {
+ err = layout->list[i].err;
+ if (err == -1 || err == 0 || err == ENOENT) {
+ /* Setting list[i].err = -1 is an indication for
+ dht_selfheal_layout_new_directory() to assign
+ a range. We set it to -1 based on any one of
+ the three criteria:
+
+ - err == -1 already, which means directory
+ existed but layout was not set on it.
+
+ - err == 0, which means directory exists and
+ has an old layout piece which will be
+ overwritten now.
+
+ - err == ENOENT, which means directory does
+ not exist (possibly racing with mkdir or
+ finishing half done mkdir). The missing
+ directory will be attempted to be recreated.
+
+ It is important to note that it is safe
+ to race with mkdir() as self-heal and
+ mkdir are idempotent operations. Both will
+ strive to set the directory and layouts to
+ the same final state.
+ */
+ count++;
+ if (!err)
+ layout->list[i].err = -1;
+ }
+ }
/* no subvolume has enough space, but can't stop directory creation */
- if (!cnt) {
+ if (!count || !new_layout) {
for (i = 0; i < layout->cnt; i++) {
err = layout->list[i].err;
if (err == ENOSPC) {
layout->list[i].err = -1;
- cnt++;
+ count++;
}
}
}
- chunk = ((unsigned long) 0xffffffff) / cnt;
-
- start = 0;
- for (i = 0; i < layout->cnt; i++) {
- err = layout->list[i].err;
- if (err == -1) {
- layout->list[i].start = start;
- layout->list[i].stop = start + chunk - 1;
-
- start = start + chunk;
-
- gf_log (this->name, GF_LOG_TRACE,
- "gave fix: %u - %u on %s for %s",
- layout->list[i].start, layout->list[i].stop,
- layout->list[i].xlator->name, loc->path);
- if (--cnt == 0) {
- layout->list[i].stop = 0xffffffff;
- break;
- }
- }
+ /* if layout->spread_cnt is set, check if it is <= available
+ * subvolumes (down brick and decommissioned bricks are considered
+ * un-availbale). Else return count (available up bricks) */
+ count = ((layout->spread_cnt &&
+ (layout->spread_cnt <= count)) ?
+ layout->spread_cnt : ((count) ? count : 1));
+
+ return count;
+}
+
+
+void dht_selfheal_layout_new_directory (call_frame_t *frame, loc_t *loc,
+ dht_layout_t *new_layout);
+
+void dht_layout_entry_swap (dht_layout_t *layout, int i, int j);
+void dht_layout_range_swap (dht_layout_t *layout, int i, int j);
+
+/*
+ * It's a bit icky using local variables in a macro, but it makes the rest
+ * of the code a lot clearer.
+ */
+#define OV_ENTRY(x,y) table[x*new->cnt+y]
+
+void
+dht_selfheal_layout_maximize_overlap (call_frame_t *frame, loc_t *loc,
+ dht_layout_t *new, dht_layout_t *old)
+{
+ int i = 0;
+ int j = 0;
+ uint32_t curr_overlap = 0;
+ uint32_t max_overlap = 0;
+ int max_overlap_idx = -1;
+ uint32_t overlap = 0;
+ uint32_t *table = NULL;
+
+ dht_layout_sort_volname (old);
+ /* Now both old_layout->list[] and new_layout->list[]
+ are match the same xlators/subvolumes. i.e,
+ old_layout->[i] and new_layout->[i] are referring
+ to the same subvolumes
+ */
+
+ /* Build a table of overlaps between new[i] and old[j]. */
+ table = alloca(sizeof(overlap)*old->cnt*new->cnt);
+ if (!table) {
+ return;
+ }
+ memset(table,0,sizeof(overlap)*old->cnt*new->cnt);
+ for (i = 0; i < new->cnt; ++i) {
+ for (j = 0; j < old->cnt; ++j) {
+ OV_ENTRY(i,j) = dht_overlap_calc(old,j,new,i);
+ }
+ }
+
+ for (i = 0; i < new->cnt; i++) {
+ if (new->list[i].err > 0) {
+ /* Subvol might be marked for decommission
+ with EINVAL, or some other serious error
+ marked with positive errno.
+ */
+ continue;
+ }
+
+ max_overlap = 0;
+ max_overlap_idx = i;
+ for (j = (i + 1); j < new->cnt; ++j) {
+ if (new->list[j].err > 0) {
+ /* Subvol might be marked for decommission
+ with EINVAL, or some other serious error
+ marked with positive errno.
+ */
+ continue;
+ }
+ /* Calculate the overlap now. */
+ curr_overlap = OV_ENTRY(i,i) + OV_ENTRY(j,j);
+ /* Calculate the overlap after the proposed swap. */
+ overlap = OV_ENTRY(i,j) + OV_ENTRY(j,i);
+ /* Are we better than status quo? */
+ if (overlap > curr_overlap) {
+ overlap -= curr_overlap;
+ /* Are we better than the previous choice? */
+ if (overlap > max_overlap) {
+ max_overlap = overlap;
+ max_overlap_idx = j;
+ }
+ }
+ }
+
+ if (max_overlap_idx != i) {
+ dht_layout_range_swap (new, i, max_overlap_idx);
+ /* Need to swap the table values too. */
+ for (j = 0; j < old->cnt; ++j) {
+ overlap = OV_ENTRY(i,j);
+ OV_ENTRY(i,j) = OV_ENTRY(max_overlap_idx,j);
+ OV_ENTRY(max_overlap_idx,j) = overlap;
+ }
+ }
}
}
+dht_layout_t *
+dht_fix_layout_of_directory (call_frame_t *frame, loc_t *loc,
+ dht_layout_t *layout)
+{
+ int i = 0;
+ xlator_t *this = NULL;
+ dht_layout_t *new_layout = NULL;
+ dht_conf_t *priv = NULL;
+ dht_local_t *local = NULL;
+ uint32_t subvol_down = 0;
+ int ret = 0;
+
+ this = frame->this;
+ priv = this->private;
+ local = frame->local;
+
+ if (layout->type == DHT_HASH_TYPE_DM_USER) {
+ gf_log (THIS->name, GF_LOG_DEBUG, "leaving %s alone",
+ loc->path);
+ goto done;
+ }
+
+ new_layout = dht_layout_new (this, priv->subvolume_cnt);
+ if (!new_layout)
+ goto done;
+
+ /* If a subvolume is down, do not re-write the layout. */
+ ret = dht_layout_anomalies (this, loc, layout, NULL, NULL, NULL,
+ &subvol_down, NULL, NULL);
+
+ if (subvol_down || (ret == -1)) {
+ gf_log (this->name, GF_LOG_WARNING, "%u subvolume(s) are down"
+ ". Skipping fix layout.", subvol_down);
+ GF_FREE (new_layout);
+ return NULL;
+ }
+
+ for (i = 0; i < new_layout->cnt; i++) {
+ if (layout->list[i].err != ENOSPC)
+ new_layout->list[i].err = layout->list[i].err;
+ else
+ new_layout->list[i].err = -1;
+
+ new_layout->list[i].xlator = layout->list[i].xlator;
+ }
+
+ /* First give it a layout as though it is a new directory. This
+ ensures rotation to kick in */
+ dht_layout_sort_volname (new_layout);
+ dht_selfheal_layout_new_directory (frame, loc, new_layout);
+
+ /* Now selectively re-assign ranges only when it helps */
+ dht_selfheal_layout_maximize_overlap (frame, loc, new_layout, layout);
+
+done:
+ if (new_layout) {
+ /* Now that the new layout has all the proper layout, change the
+ inode context */
+ dht_layout_set (this, loc->inode, new_layout);
+
+ /* Make sure the extra 'ref' for existing layout is removed */
+ dht_layout_unref (this, local->layout);
+
+ local->layout = new_layout;
+ }
+
+ return local->layout;
+}
+
+
+void
+dht_selfheal_layout_new_directory (call_frame_t *frame, loc_t *loc,
+ dht_layout_t *layout)
+{
+ xlator_t *this = NULL;
+ uint32_t chunk = 0;
+ int i = 0;
+ uint32_t start = 0;
+ int cnt = 0;
+ int err = 0;
+ int start_subvol = 0;
+
+ this = frame->this;
+
+ cnt = dht_get_layout_count (this, layout, 1);
+
+ chunk = ((unsigned long) 0xffffffff) / ((cnt) ? cnt : 1);
+
+ start_subvol = dht_selfheal_layout_alloc_start (this, loc, layout);
+
+ /* clear out the range, as we are re-computing here */
+ DHT_RESET_LAYOUT_RANGE (layout);
+ for (i = start_subvol; i < layout->cnt; i++) {
+ err = layout->list[i].err;
+ if (err == -1 || err == ENOENT) {
+ DHT_SET_LAYOUT_RANGE(layout, i, start, chunk,
+ cnt, loc->path);
+ if (--cnt == 0) {
+ layout->list[i].stop = 0xffffffff;
+ goto done;
+ }
+ start += chunk;
+ }
+ }
+
+ for (i = 0; i < start_subvol; i++) {
+ err = layout->list[i].err;
+ if (err == -1 || err == ENOENT) {
+ DHT_SET_LAYOUT_RANGE(layout, i, start, chunk,
+ cnt, loc->path);
+ if (--cnt == 0) {
+ layout->list[i].stop = 0xffffffff;
+ goto done;
+ }
+ start += chunk;
+ }
+ }
+
+done:
+ return;
+}
+
int
dht_selfheal_dir_getafix (call_frame_t *frame, loc_t *loc,
- dht_layout_t *layout)
+ dht_layout_t *layout)
{
- dht_conf_t *conf = NULL;
- xlator_t *this = NULL;
- dht_local_t *local = NULL;
- int missing = -1;
- int down = -1;
- int holes = -1;
- int ret = -1;
- int i = -1;
- int overlaps = -1;
-
- this = frame->this;
- conf = this->private;
- local = frame->local;
-
- missing = local->selfheal.missing;
- down = local->selfheal.down;
- holes = local->selfheal.hole_cnt;
- overlaps = local->selfheal.overlaps_cnt;
-
- if ((missing + down) == conf->subvolume_cnt) {
- dht_selfheal_layout_new_directory (frame, loc, layout);
- ret = 0;
- }
+ dht_local_t *local = NULL;
+ uint32_t holes = 0;
+ int ret = -1;
+ int i = -1;
+ uint32_t overlaps = 0;
- if (holes <= down) {
- /* the down subvol might fill up the holes */
- ret = 0;
- }
+ local = frame->local;
- if (holes || missing || overlaps) {
- dht_selfheal_layout_new_directory (frame, loc, layout);
- ret = 0;
- }
+ holes = local->selfheal.hole_cnt;
+ overlaps = local->selfheal.overlaps_cnt;
- for (i = 0; i < layout->cnt; i++) {
- /* directory not present */
- if (layout->list[i].err == ENOENT) {
- ret = 0;
- break;
- }
- }
+ if (holes || overlaps) {
+ dht_selfheal_layout_new_directory (frame, loc, layout);
+ ret = 0;
+ }
- /* TODO: give a fix to these non-virgins */
+ for (i = 0; i < layout->cnt; i++) {
+ /* directory not present */
+ if (layout->list[i].err == ENOENT) {
+ ret = 0;
+ break;
+ }
+ }
- return ret;
+ /* TODO: give a fix to these non-virgins */
+
+ return ret;
}
int
-dht_selfheal_new_directory (call_frame_t *frame,
- dht_selfheal_dir_cbk_t dir_cbk,
- dht_layout_t *layout)
+dht_selfheal_new_directory (call_frame_t *frame,
+ dht_selfheal_dir_cbk_t dir_cbk,
+ dht_layout_t *layout)
{
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
- local = frame->local;
+ local = frame->local;
- local->selfheal.dir_cbk = dir_cbk;
- local->selfheal.layout = layout;
+ local->selfheal.dir_cbk = dir_cbk;
+ local->selfheal.layout = dht_layout_ref (frame->this, layout);
- dht_layout_sort_volname (layout);
- dht_selfheal_layout_new_directory (frame, &local->loc, layout);
- dht_selfheal_dir_xattr (frame, &local->loc, layout);
- return 0;
+ dht_layout_sort_volname (layout);
+ dht_selfheal_layout_new_directory (frame, &local->loc, layout);
+ dht_selfheal_dir_xattr (frame, &local->loc, layout);
+ return 0;
+}
+
+int
+dht_fix_directory_layout (call_frame_t *frame,
+ dht_selfheal_dir_cbk_t dir_cbk,
+ dht_layout_t *layout)
+{
+ dht_local_t *local = NULL;
+ dht_layout_t *tmp_layout = NULL;
+
+ local = frame->local;
+
+ local->selfheal.dir_cbk = dir_cbk;
+ local->selfheal.layout = dht_layout_ref (frame->this, layout);
+
+ /* No layout sorting required here */
+ tmp_layout = dht_fix_layout_of_directory (frame, &local->loc, layout);
+ if (!tmp_layout) {
+ return -1;
+ }
+ dht_fix_dir_xattr (frame, &local->loc, tmp_layout);
+
+ return 0;
}
int
dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
- loc_t *loc, dht_layout_t *layout)
+ loc_t *loc, dht_layout_t *layout)
{
- dht_local_t *local = NULL;
- uint32_t holes = 0;
- uint32_t overlaps = 0;
- uint32_t missing = 0;
- uint32_t down = 0;
- uint32_t misc = 0;
- int ret = 0;
- xlator_t *this = NULL;
-
- local = frame->local;
- this = frame->this;
-
- ret = dht_layout_anomalies (this, loc, layout,
- &local->selfheal.hole_cnt,
- &local->selfheal.overlaps_cnt,
- &local->selfheal.missing,
- &local->selfheal.down,
- &local->selfheal.misc);
-
- holes = local->selfheal.hole_cnt;
- overlaps = local->selfheal.overlaps_cnt;
- missing = local->selfheal.missing;
- down = local->selfheal.down;
- misc = local->selfheal.misc;
-
- local->selfheal.dir_cbk = dir_cbk;
- local->selfheal.layout = layout;
-
- if (down) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%d subvolumes down -- not fixing", down);
- ret = 0;
- goto sorry_no_fix;
- }
+ dht_local_t *local = NULL;
+ uint32_t down = 0;
+ uint32_t misc = 0;
+ int ret = 0;
+ xlator_t *this = NULL;
+
+ local = frame->local;
+ this = frame->this;
+
+ dht_layout_anomalies (this, loc, layout,
+ &local->selfheal.hole_cnt,
+ &local->selfheal.overlaps_cnt,
+ NULL, &local->selfheal.down,
+ &local->selfheal.misc, NULL);
+
+ down = local->selfheal.down;
+ misc = local->selfheal.misc;
+
+ local->selfheal.dir_cbk = dir_cbk;
+ local->selfheal.layout = dht_layout_ref (this, layout);
+
+ if (down) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%d subvolumes down -- not fixing", down);
+ ret = 0;
+ goto sorry_no_fix;
+ }
- if (misc) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%d subvolumes have unrecoverable errors", misc);
- ret = 0;
- goto sorry_no_fix;
- }
+ if (misc) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%d subvolumes have unrecoverable errors", misc);
+ ret = 0;
+ goto sorry_no_fix;
+ }
- dht_layout_sort_volname (layout);
- ret = dht_selfheal_dir_getafix (frame, loc, layout);
+ dht_layout_sort_volname (layout);
+ ret = dht_selfheal_dir_getafix (frame, loc, layout);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "not able to form layout for the directory");
- goto sorry_no_fix;
- }
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "not able to form layout for the directory");
+ goto sorry_no_fix;
+ }
- dht_selfheal_dir_mkdir (frame, loc, layout, 0);
+ dht_selfheal_dir_mkdir (frame, loc, layout, 0);
- return 0;
+ return 0;
sorry_no_fix:
- /* TODO: need to put appropriate local->op_errno */
- dht_selfheal_dir_finish (frame, this, ret);
+ /* TODO: need to put appropriate local->op_errno */
+ dht_selfheal_dir_finish (frame, this, ret);
- return 0;
+ return 0;
}
int
dht_selfheal_restore (call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
- loc_t *loc, dht_layout_t *layout)
+ loc_t *loc, dht_layout_t *layout)
{
- int ret = 0;
- dht_local_t *local = NULL;
+ int ret = 0;
+ dht_local_t *local = NULL;
+ local = frame->local;
- local = frame->local;
+ local->selfheal.dir_cbk = dir_cbk;
+ local->selfheal.layout = dht_layout_ref (frame->this, layout);
- local->selfheal.dir_cbk = dir_cbk;
- local->selfheal.layout = layout;
+ ret = dht_selfheal_dir_mkdir (frame, loc, layout, 1);
- ret = dht_selfheal_dir_mkdir (frame, loc, layout, 1);
+ return ret;
+}
- return 0;
+int
+dht_dir_attr_heal (void *data)
+{
+ call_frame_t *frame = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ int call_cnt = 0;
+ int ret = -1;
+ int i = 0;
+
+ GF_VALIDATE_OR_GOTO ("dht", data, out);
+
+ frame = data;
+ local = frame->local;
+ this = frame->this;
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", local, out);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO ("dht", conf, out);
+
+ call_cnt = conf->subvolume_cnt;
+
+ for (i = 0; i < call_cnt; i++) {
+ subvol = conf->subvolumes[i];
+ if (!subvol || (subvol == dht_first_up_subvol (this)))
+ continue;
+ ret = syncop_setattr (subvol, &local->loc, &local->stbuf,
+ (GF_SET_ATTR_UID | GF_SET_ATTR_GID),
+ NULL, NULL);
+ if (ret)
+ gf_log ("dht", GF_LOG_ERROR, "Failed to set uid/gid on"
+ " %s on %s subvol (%s)", local->loc.path,
+ subvol->name, strerror (errno));
+ }
+out:
+ return 0;
+}
+
+int
+dht_dir_attr_heal_done (int ret, call_frame_t *sync_frame, void *data)
+{
+ DHT_STACK_DESTROY (sync_frame);
+ return 0;
}
diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c
new file mode 100644
index 000000000..70aac7710
--- /dev/null
+++ b/xlators/cluster/dht/src/dht-shared.c
@@ -0,0 +1,758 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+/* TODO: add NS locking */
+
+#include "statedump.h"
+#include "dht-common.h"
+
+/* TODO:
+ - use volumename in xattr instead of "dht"
+ - use NS locks
+ - handle all cases in self heal layout reconstruction
+ - complete linkfile selfheal
+*/
+struct volume_options options[];
+
+void
+dht_layout_dump (dht_layout_t *layout, const char *prefix)
+{
+
+ char key[GF_DUMP_MAX_BUF_LEN];
+ int i = 0;
+
+ if (!layout)
+ goto out;
+ if (!prefix)
+ goto out;
+
+ gf_proc_dump_build_key(key, prefix, "cnt");
+ gf_proc_dump_write(key, "%d", layout->cnt);
+ gf_proc_dump_build_key(key, prefix, "preset");
+ gf_proc_dump_write(key, "%d", layout->preset);
+ gf_proc_dump_build_key(key, prefix, "gen");
+ gf_proc_dump_write(key, "%d", layout->gen);
+ if (layout->type != IA_INVAL) {
+ gf_proc_dump_build_key(key, prefix, "inode type");
+ gf_proc_dump_write(key, "%d", layout->type);
+ }
+
+ if (!IA_ISDIR (layout->type))
+ goto out;
+
+ for (i = 0; i < layout->cnt; i++) {
+ gf_proc_dump_build_key(key, prefix,"list[%d].err", i);
+ gf_proc_dump_write(key, "%d", layout->list[i].err);
+ gf_proc_dump_build_key(key, prefix,"list[%d].start", i);
+ gf_proc_dump_write(key, "%u", layout->list[i].start);
+ gf_proc_dump_build_key(key, prefix,"list[%d].stop", i);
+ gf_proc_dump_write(key, "%u", layout->list[i].stop);
+ if (layout->list[i].xlator) {
+ gf_proc_dump_build_key(key, prefix,
+ "list[%d].xlator.type", i);
+ gf_proc_dump_write(key, "%s",
+ layout->list[i].xlator->type);
+ gf_proc_dump_build_key(key, prefix,
+ "list[%d].xlator.name", i);
+ gf_proc_dump_write(key, "%s",
+ layout->list[i].xlator->name);
+ }
+ }
+
+out:
+ return;
+}
+
+
+int32_t
+dht_priv_dump (xlator_t *this)
+{
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+ char key[GF_DUMP_MAX_BUF_LEN];
+ int i = 0;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+
+ if (!this)
+ goto out;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ ret = TRY_LOCK(&conf->subvolume_lock);
+ if (ret != 0) {
+ return ret;
+ }
+
+ gf_proc_dump_add_section("xlator.cluster.dht.%s.priv", this->name);
+ gf_proc_dump_build_key(key_prefix,"xlator.cluster.dht","%s.priv",
+ this->name);
+ gf_proc_dump_write("subvol_cnt","%d", conf->subvolume_cnt);
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ sprintf (key, "subvolumes[%d]", i);
+ gf_proc_dump_write(key, "%s.%s", conf->subvolumes[i]->type,
+ conf->subvolumes[i]->name);
+ if (conf->file_layouts && conf->file_layouts[i]){
+ sprintf (key, "file_layouts[%d]", i);
+ dht_layout_dump(conf->file_layouts[i], key);
+ }
+ if (conf->dir_layouts && conf->dir_layouts[i]) {
+ sprintf (key, "dir_layouts[%d]", i);
+ dht_layout_dump(conf->dir_layouts[i], key);
+ }
+ if (conf->subvolume_status) {
+
+ sprintf (key, "subvolume_status[%d]", i);
+ gf_proc_dump_write(key, "%d",
+ (int)conf->subvolume_status[i]);
+ }
+
+ }
+
+ gf_proc_dump_write("search_unhashed", "%d", conf->search_unhashed);
+ gf_proc_dump_write("gen", "%d", conf->gen);
+ gf_proc_dump_write("min_free_disk", "%lf", conf->min_free_disk);
+ gf_proc_dump_write("min_free_inodes", "%lf", conf->min_free_inodes);
+ gf_proc_dump_write("disk_unit", "%c", conf->disk_unit);
+ gf_proc_dump_write("refresh_interval", "%d", conf->refresh_interval);
+ gf_proc_dump_write("unhashed_sticky_bit", "%d", conf->unhashed_sticky_bit);
+ if (conf ->du_stats) {
+ gf_proc_dump_write("du_stats.avail_percent", "%lf",
+ conf->du_stats->avail_percent);
+ gf_proc_dump_write("du_stats.avail_space", "%lu",
+ conf->du_stats->avail_space);
+ gf_proc_dump_write("du_stats.avail_inodes", "%lf",
+ conf->du_stats->avail_inodes);
+ gf_proc_dump_write("du_stats.log", "%lu", conf->du_stats->log);
+ }
+
+ if (conf->last_stat_fetch.tv_sec)
+ gf_proc_dump_write("last_stat_fetch", "%s",
+ ctime(&conf->last_stat_fetch.tv_sec));
+
+ UNLOCK(&conf->subvolume_lock);
+
+out:
+ return ret;
+}
+
+int32_t
+dht_inodectx_dump (xlator_t *this, inode_t *inode)
+{
+ int ret = -1;
+ dht_layout_t *layout = NULL;
+
+ if (!this)
+ goto out;
+ if (!inode)
+ goto out;
+
+ ret = dht_inode_ctx_layout_get (inode, this, &layout);
+
+ if ((ret != 0) || !layout)
+ return ret;
+
+ gf_proc_dump_add_section("xlator.cluster.dht.%s.inode", this->name);
+ dht_layout_dump(layout, "layout");
+
+out:
+ return ret;
+}
+
+void
+dht_fini (xlator_t *this)
+{
+ int i = 0;
+ dht_conf_t *conf = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+
+ conf = this->private;
+ this->private = NULL;
+ if (conf) {
+ if (conf->file_layouts) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ GF_FREE (conf->file_layouts[i]);
+ }
+ GF_FREE (conf->file_layouts);
+ }
+
+ GF_FREE (conf->subvolumes);
+
+ GF_FREE (conf->subvolume_status);
+
+ GF_FREE (conf);
+ }
+out:
+ return;
+}
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+
+ ret = xlator_mem_acct_init (this, gf_dht_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
+ "failed");
+ return ret;
+ }
+out:
+ return ret;
+}
+
+
+int
+dht_parse_decommissioned_bricks (xlator_t *this, dht_conf_t *conf,
+ const char *bricks)
+{
+ int i = 0;
+ int ret = -1;
+ char *tmpstr = NULL;
+ char *dup_brick = NULL;
+ char *node = NULL;
+
+ if (!conf || !bricks)
+ goto out;
+
+ dup_brick = gf_strdup (bricks);
+ node = strtok_r (dup_brick, ",", &tmpstr);
+ while (node) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (!strcmp (conf->subvolumes[i]->name, node)) {
+ conf->decommissioned_bricks[i] =
+ conf->subvolumes[i];
+ conf->decommission_subvols_cnt++;
+ gf_log (this->name, GF_LOG_INFO,
+ "decommissioning subvolume %s",
+ conf->subvolumes[i]->name);
+ break;
+ }
+ }
+ if (i == conf->subvolume_cnt) {
+ /* Wrong node given. */
+ goto out;
+ }
+ node = strtok_r (NULL, ",", &tmpstr);
+ }
+
+ ret = 0;
+ conf->decommission_in_progress = 1;
+out:
+ GF_FREE (dup_brick);
+
+ return ret;
+}
+
+
+int
+dht_decommissioned_remove (xlator_t *this, dht_conf_t *conf)
+{
+ int i = 0;
+ int ret = -1;
+
+ if (!conf)
+ goto out;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->decommissioned_bricks[i]) {
+ conf->decommissioned_bricks[i] = NULL;
+ conf->decommission_subvols_cnt--;
+ }
+ }
+
+ ret = 0;
+out:
+
+ return ret;
+}
+void
+dht_init_regex (xlator_t *this, dict_t *odict, char *name,
+ regex_t *re, gf_boolean_t *re_valid)
+{
+ char *temp_str;
+
+ if (dict_get_str (odict, name, &temp_str) != 0) {
+ if (strcmp(name,"rsync-hash-regex")) {
+ return;
+ }
+ temp_str = "^\\.(.+)\\.[^.]+$";
+ }
+
+ if (*re_valid) {
+ regfree(re);
+ *re_valid = _gf_false;
+ }
+
+ if (!strcmp(temp_str,"none")) {
+ return;
+ }
+
+ if (regcomp(re,temp_str,REG_EXTENDED) == 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "using regex %s = %s", name, temp_str);
+ *re_valid = _gf_true;
+ }
+ else {
+ gf_log (this->name, GF_LOG_WARNING,
+ "compiling regex %s failed", temp_str);
+ }
+}
+
+int
+dht_reconfigure (xlator_t *this, dict_t *options)
+{
+ dht_conf_t *conf = NULL;
+ char *temp_str = NULL;
+ gf_boolean_t search_unhashed;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("dht", this, out);
+ GF_VALIDATE_OR_GOTO ("dht", options, out);
+
+ conf = this->private;
+ if (!conf)
+ return 0;
+
+ if (dict_get_str (options, "lookup-unhashed", &temp_str) == 0) {
+ /* If option is not "auto", other options _should_ be boolean*/
+ if (strcasecmp (temp_str, "auto")) {
+ if (!gf_string2boolean (temp_str, &search_unhashed)) {
+ gf_log(this->name, GF_LOG_DEBUG, "Reconfigure:"
+ " lookup-unhashed reconfigured (%s)",
+ temp_str);
+ conf->search_unhashed = search_unhashed;
+ } else {
+ gf_log(this->name, GF_LOG_ERROR, "Reconfigure:"
+ " lookup-unhashed should be boolean,"
+ " not (%s), defaulting to (%d)",
+ temp_str, conf->search_unhashed);
+ //return -1;
+ ret = -1;
+ goto out;
+ }
+ } else {
+ gf_log(this->name, GF_LOG_DEBUG, "Reconfigure:"
+ " lookup-unhashed reconfigured auto ");
+ conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_AUTO;
+ }
+ }
+
+ GF_OPTION_RECONF ("min-free-disk", conf->min_free_disk, options,
+ percent_or_size, out);
+ /* option can be any one of percent or bytes */
+ conf->disk_unit = 0;
+ if (conf->min_free_disk < 100.0)
+ conf->disk_unit = 'p';
+
+ GF_OPTION_RECONF ("min-free-inodes", conf->min_free_inodes, options,
+ percent, out);
+
+ GF_OPTION_RECONF ("directory-layout-spread", conf->dir_spread_cnt,
+ options, uint32, out);
+
+ GF_OPTION_RECONF ("readdir-optimize", conf->readdir_optimize, options,
+ bool, out);
+ if (conf->defrag) {
+ GF_OPTION_RECONF ("rebalance-stats", conf->defrag->stats,
+ options, bool, out);
+ }
+
+ if (dict_get_str (options, "decommissioned-bricks", &temp_str) == 0) {
+ ret = dht_parse_decommissioned_bricks (this, conf, temp_str);
+ if (ret == -1)
+ goto out;
+ } else {
+ ret = dht_decommissioned_remove (this, conf);
+ if (ret == -1)
+ goto out;
+ }
+
+ dht_init_regex (this, options, "rsync-hash-regex",
+ &conf->rsync_regex, &conf->rsync_regex_valid);
+ dht_init_regex (this, options, "extra-hash-regex",
+ &conf->extra_regex, &conf->extra_regex_valid);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+gf_defrag_pattern_list_fill (xlator_t *this, gf_defrag_info_t *defrag, char *data)
+{
+ int ret = -1;
+ char *tmp_str = NULL;
+ char *tmp_str1 = NULL;
+ char *dup_str = NULL;
+ char *num = NULL;
+ char *pattern_str = NULL;
+ char *pattern = NULL;
+ gf_defrag_pattern_list_t *temp_list = NULL;
+ gf_defrag_pattern_list_t *pattern_list = NULL;
+
+ if (!this || !defrag || !data)
+ goto out;
+
+ /* Get the pattern for pattern list. "pattern:<optional-size>"
+ * eg: *avi, *pdf:10MB, *:1TB
+ */
+ pattern_str = strtok_r (data, ",", &tmp_str);
+ while (pattern_str) {
+ dup_str = gf_strdup (pattern_str);
+ pattern_list = GF_CALLOC (1, sizeof (gf_defrag_pattern_list_t),
+ 1);
+ if (!pattern_list) {
+ goto out;
+ }
+ pattern = strtok_r (dup_str, ":", &tmp_str1);
+ num = strtok_r (NULL, ":", &tmp_str1);
+ if (!pattern)
+ goto out;
+ if (!num) {
+ if (gf_string2bytesize(pattern, &pattern_list->size)
+ == 0) {
+ pattern = "*";
+ }
+ } else if (gf_string2bytesize (num, &pattern_list->size) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "invalid number format \"%s\"", num);
+ goto out;
+ }
+ memcpy (pattern_list->path_pattern, pattern, strlen (dup_str));
+
+ if (!defrag->defrag_pattern)
+ temp_list = NULL;
+ else
+ temp_list = defrag->defrag_pattern;
+
+ pattern_list->next = temp_list;
+
+ defrag->defrag_pattern = pattern_list;
+ pattern_list = NULL;
+
+ GF_FREE (dup_str);
+ dup_str = NULL;
+
+ pattern_str = strtok_r (NULL, ",", &tmp_str);
+ }
+
+ ret = 0;
+out:
+ if (ret)
+ GF_FREE (pattern_list);
+ GF_FREE (dup_str);
+
+ return ret;
+}
+
+int
+dht_init (xlator_t *this)
+{
+ dht_conf_t *conf = NULL;
+ char *temp_str = NULL;
+ int ret = -1;
+ int i = 0;
+ gf_defrag_info_t *defrag = NULL;
+ int cmd = 0;
+ char *node_uuid = NULL;
+
+
+ GF_VALIDATE_OR_GOTO ("dht", this, err);
+
+ if (!this->children) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Distribute needs more than one subvolume");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile");
+ }
+
+ conf = GF_CALLOC (1, sizeof (*conf), gf_dht_mt_dht_conf_t);
+ if (!conf) {
+ goto err;
+ }
+
+ ret = dict_get_int32 (this->options, "rebalance-cmd", &cmd);
+
+ if (cmd) {
+ defrag = GF_CALLOC (1, sizeof (gf_defrag_info_t),
+ gf_defrag_info_mt);
+
+ GF_VALIDATE_OR_GOTO (this->name, defrag, err);
+
+ LOCK_INIT (&defrag->lock);
+
+ defrag->is_exiting = 0;
+
+ conf->defrag = defrag;
+
+ ret = dict_get_str (this->options, "node-uuid", &node_uuid);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "node-uuid not "
+ "specified");
+ goto err;
+ }
+
+ if (uuid_parse (node_uuid, defrag->node_uuid)) {
+ gf_log (this->name, GF_LOG_ERROR, "Cannot parse "
+ "glusterd node uuid");
+ goto err;
+ }
+
+ defrag->cmd = cmd;
+
+ defrag->stats = _gf_false;
+ }
+
+ conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_ON;
+ if (dict_get_str (this->options, "lookup-unhashed", &temp_str) == 0) {
+ /* If option is not "auto", other options _should_ be boolean */
+ if (strcasecmp (temp_str, "auto"))
+ gf_string2boolean (temp_str, &conf->search_unhashed);
+ else
+ conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_AUTO;
+ }
+
+ GF_OPTION_INIT ("unhashed-sticky-bit", conf->unhashed_sticky_bit, bool,
+ err);
+
+ GF_OPTION_INIT ("use-readdirp", conf->use_readdirp, bool, err);
+
+ GF_OPTION_INIT ("min-free-disk", conf->min_free_disk, percent_or_size,
+ err);
+
+ GF_OPTION_INIT ("min-free-inodes", conf->min_free_inodes, percent,
+ err);
+
+ conf->dir_spread_cnt = conf->subvolume_cnt;
+ GF_OPTION_INIT ("directory-layout-spread", conf->dir_spread_cnt,
+ uint32, err);
+
+ GF_OPTION_INIT ("assert-no-child-down", conf->assert_no_child_down,
+ bool, err);
+
+ GF_OPTION_INIT ("readdir-optimize", conf->readdir_optimize, bool, err);
+
+ if (defrag) {
+ GF_OPTION_INIT ("rebalance-stats", defrag->stats, bool, err);
+ if (dict_get_str (this->options, "rebalance-filter", &temp_str)
+ == 0) {
+ if (gf_defrag_pattern_list_fill (this, defrag, temp_str)
+ == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "Cannot parse"
+ " rebalance-filter (%s)", temp_str);
+ goto err;
+ }
+ }
+ }
+
+ /* option can be any one of percent or bytes */
+ conf->disk_unit = 0;
+ if (conf->min_free_disk < 100)
+ conf->disk_unit = 'p';
+
+ ret = dht_init_subvolumes (this, conf);
+ if (ret == -1) {
+ goto err;
+ }
+
+ if (dict_get_str (this->options, "decommissioned-bricks", &temp_str) == 0) {
+ ret = dht_parse_decommissioned_bricks (this, conf, temp_str);
+ if (ret == -1)
+ goto err;
+ }
+
+ dht_init_regex (this, this->options, "rsync-hash-regex",
+ &conf->rsync_regex, &conf->rsync_regex_valid);
+ dht_init_regex (this, this->options, "extra-hash-regex",
+ &conf->extra_regex, &conf->extra_regex_valid);
+
+ ret = dht_layouts_init (this, conf);
+ if (ret == -1) {
+ goto err;
+ }
+
+ LOCK_INIT (&conf->subvolume_lock);
+ LOCK_INIT (&conf->layout_lock);
+
+ conf->gen = 1;
+
+ this->local_pool = mem_pool_new (dht_local_t, 512);
+ if (!this->local_pool) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
+ goto err;
+ }
+
+ GF_OPTION_INIT ("xattr-name", conf->xattr_name, str, err);
+ gf_asprintf (&conf->link_xattr_name, "%s.linkto", conf->xattr_name);
+ gf_asprintf (&conf->wild_xattr_name, "%s*", conf->xattr_name);
+ if (!conf->link_xattr_name || !conf->wild_xattr_name) {
+ goto err;
+ }
+
+ this->private = conf;
+
+ return 0;
+
+err:
+ if (conf) {
+ if (conf->file_layouts) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ GF_FREE (conf->file_layouts[i]);
+ }
+ GF_FREE (conf->file_layouts);
+ }
+
+ GF_FREE (conf->subvolumes);
+
+ GF_FREE (conf->subvolume_status);
+
+ GF_FREE (conf->du_stats);
+
+ GF_FREE (conf->defrag);
+
+ GF_FREE (conf->xattr_name);
+ GF_FREE (conf->link_xattr_name);
+ GF_FREE (conf->wild_xattr_name);
+
+ GF_FREE (conf);
+ }
+
+ return -1;
+}
+
+
+struct volume_options options[] = {
+ { .key = {"lookup-unhashed"},
+ .value = {"auto", "yes", "no", "enable", "disable", "1", "0",
+ "on", "off"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "on",
+ .description = "This option if set to ON, does a lookup through "
+ "all the sub-volumes, in case a lookup didn't return any result "
+ "from the hash subvolume. If set to OFF, it does not do a lookup "
+ "on the remaining subvolumes."
+ },
+ { .key = {"min-free-disk"},
+ .type = GF_OPTION_TYPE_PERCENT_OR_SIZET,
+ .default_value = "10%",
+ .description = "Percentage/Size of disk space, after which the "
+ "process starts balancing out the cluster, and logs will appear "
+ "in log files",
+ },
+ { .key = {"min-free-inodes"},
+ .type = GF_OPTION_TYPE_PERCENT,
+ .default_value = "5%",
+ .description = "after system has only N% of inodes, warnings "
+ "starts to appear in log files",
+ },
+ { .key = {"unhashed-sticky-bit"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ },
+ { .key = {"use-readdirp"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "This option if set to ON, forces the use of "
+ "readdirp, and hence also displays the stats of the files."
+ },
+ { .key = {"assert-no-child-down"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "This option if set to ON, in the event of "
+ "CHILD_DOWN, will call exit."
+ },
+ { .key = {"directory-layout-spread"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .validate = GF_OPT_VALIDATE_MIN,
+ .description = "Specifies the directory layout spread."
+ },
+ { .key = {"decommissioned-bricks"},
+ .type = GF_OPTION_TYPE_ANY,
+ .description = "This option if set to ON, decommissions "
+ "the brick, so that no new data is allowed to be created "
+ "on that brick."
+ },
+ { .key = {"rebalance-cmd"},
+ .type = GF_OPTION_TYPE_INT,
+ },
+ { .key = {"node-uuid"},
+ .type = GF_OPTION_TYPE_STR,
+ },
+ { .key = {"rebalance-stats"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "This option if set to ON displays and logs the "
+ " time taken for migration of each file, during the rebalance "
+ "process. If set to OFF, the rebalance logs will only display the "
+ "time spent in each directory."
+ },
+ { .key = {"readdir-optimize"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "This option if set to ON enables the optimization "
+ "that allows DHT to requests non-first subvolumes to filter out "
+ "directory entries."
+ },
+ { .key = {"rsync-hash-regex"},
+ .type = GF_OPTION_TYPE_STR,
+ /* Setting a default here doesn't work. See dht_init_regex. */
+ .description = "Regular expression for stripping temporary-file "
+ "suffix and prefix used by rsync, to prevent relocation when the "
+ "file is renamed."
+ },
+ { .key = {"extra-hash-regex"},
+ .type = GF_OPTION_TYPE_STR,
+ /* Setting a default here doesn't work. See dht_init_regex. */
+ .description = "Regular expression for stripping temporary-file "
+ "suffix and prefix used by an application, to prevent relocation when "
+ "the file is renamed."
+ },
+ { .key = {"rebalance-filter"},
+ .type = GF_OPTION_TYPE_STR,
+ },
+
+ { .key = {"xattr-name"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "trusted.glusterfs.dht",
+ .description = "Base for extended attributes used by this "
+ "translator instance, to avoid conflicts with others above or "
+ "below it."
+ },
+
+ /* NUFA option */
+ { .key = {"local-volume-name"},
+ .type = GF_OPTION_TYPE_XLATOR
+ },
+
+ /* switch option */
+ { .key = {"pattern.switch.case"},
+ .type = GF_OPTION_TYPE_ANY
+ },
+
+ { .key = {NULL} },
+};
diff --git a/xlators/cluster/dht/src/dht.c b/xlators/cluster/dht/src/dht.c
index 46f3b8700..fc0ca2f77 100644
--- a/xlators/cluster/dht/src/dht.c
+++ b/xlators/cluster/dht/src/dht.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -23,380 +14,65 @@
#include "config.h"
#endif
-/* TODO: add NS locking */
-
#include "statedump.h"
-#include "dht-common.c"
-
-/* TODO:
- - use volumename in xattr instead of "dht"
- - use NS locks
- - handle all cases in self heal layout reconstruction
- - complete linkfile selfheal
-*/
-
-
-void
-dht_layout_dump (dht_layout_t *layout, const char *prefix)
-{
-
- char key[GF_DUMP_MAX_BUF_LEN];
- int i = 0;
-
- if (!layout)
- return;
-
- gf_proc_dump_build_key(key, prefix, "cnt");
- gf_proc_dump_write(key, "%d", layout->cnt);
- gf_proc_dump_build_key(key, prefix, "preset");
- gf_proc_dump_write(key, "%d", layout->preset);
- gf_proc_dump_build_key(key, prefix, "gen");
- gf_proc_dump_write(key, "%d", layout->gen);
- gf_proc_dump_build_key(key, prefix, "type");
- gf_proc_dump_write(key, "%d", layout->type);
-
- for (i = 0; i < layout->cnt; i++) {
- gf_proc_dump_build_key(key, prefix,"list[%d].err", i);
- gf_proc_dump_write(key, "%d", layout->list[i].err);
- gf_proc_dump_build_key(key, prefix,"list[%d].start", i);
- gf_proc_dump_write(key, "%u", layout->list[i].start);
- gf_proc_dump_build_key(key, prefix,"list[%d].stop", i);
- gf_proc_dump_write(key, "%u", layout->list[i].stop);
- if (layout->list[i].xlator) {
- gf_proc_dump_build_key(key, prefix,
- "list[%d].xlator.type", i);
- gf_proc_dump_write(key, "%s",
- layout->list[i].xlator->type);
- gf_proc_dump_build_key(key, prefix,
- "list[%d].xlator.name", i);
- gf_proc_dump_write(key, "%s",
- layout->list[i].xlator->name);
- }
- }
-}
-
-
-int32_t
-dht_priv_dump (xlator_t *this)
-{
- char key_prefix[GF_DUMP_MAX_BUF_LEN];
- char key[GF_DUMP_MAX_BUF_LEN];
- int i = 0;
- dht_conf_t *conf = NULL;
- int ret = 0;
-
- if (!this)
- return -1;
-
- conf = this->private;
-
- if (!conf)
- return -1;
-
- ret = TRY_LOCK(&conf->subvolume_lock);
-
- if (ret != 0) {
- gf_log("", GF_LOG_WARNING, "Unable to lock dht subvolume %s",
- this->name);
- return ret;
- }
-
- gf_proc_dump_add_section("xlator.cluster.dht.%s.priv", this->name);
- gf_proc_dump_build_key(key_prefix,"xlator.cluster.dht","%s.priv",
- this->name);
- gf_proc_dump_build_key(key, key_prefix, "subvolume_cnt");
- gf_proc_dump_write(key,"%d", conf->subvolume_cnt);
- for (i = 0; i < conf->subvolume_cnt; i++) {
- gf_proc_dump_build_key(key, key_prefix, "subvolumes[%d]", i);
- gf_proc_dump_write(key, "%s.%s", conf->subvolumes[i]->type,
- conf->subvolumes[i]->name);
- if (conf->file_layouts && conf->file_layouts[i]){
- gf_proc_dump_build_key(key, key_prefix,
- "file_layouts[%d]",i);
- dht_layout_dump(conf->file_layouts[i], key);
- }
- if (conf->dir_layouts && conf->dir_layouts[i]) {
- gf_proc_dump_build_key(key, key_prefix,
- "dir_layouts[%d]",i);
- dht_layout_dump(conf->dir_layouts[i], key);
- }
- if (conf->subvolume_status) {
- gf_proc_dump_build_key(key, key_prefix,
- "subvolume_status[%d]", i);
- gf_proc_dump_write(key, "%d",
- (int)conf->subvolume_status[i]);
- }
-
- }
-
- gf_proc_dump_build_key(key, key_prefix,"default_dir_layout");
- dht_layout_dump(conf->default_dir_layout, key);
-
- gf_proc_dump_build_key(key, key_prefix, "local_volume");
- gf_proc_dump_write(key,"%s",
- conf->local_volume?(conf->local_volume->name):"None");
- gf_proc_dump_build_key(key, key_prefix, "search_unhashed");
- gf_proc_dump_write(key, "%d", conf->search_unhashed);
- gf_proc_dump_build_key(key, key_prefix, "gen");
- gf_proc_dump_write(key, "%d", conf->gen);
- gf_proc_dump_build_key(key, key_prefix, "min_free_disk");
- gf_proc_dump_write(key, "%lu", conf->min_free_disk);
- gf_proc_dump_build_key(key, key_prefix, "disk_unit");
- gf_proc_dump_write(key, "%c", conf->disk_unit);
- gf_proc_dump_build_key(key, key_prefix, "refresh_interval");
- gf_proc_dump_write(key, "%d", conf->refresh_interval);
- gf_proc_dump_build_key(key, key_prefix, "unhashed_sticky_bit");
- gf_proc_dump_write(key, "%d", conf->unhashed_sticky_bit);
- if (conf ->du_stats) {
- gf_proc_dump_build_key(key, key_prefix,
- "du_stats.avail_percent");
- gf_proc_dump_write(key, "%lf", conf->du_stats->avail_percent);
- gf_proc_dump_build_key(key, key_prefix,
- "du_stats.avail_space");
- gf_proc_dump_write(key, "%lu", conf->du_stats->avail_space);
- gf_proc_dump_build_key(key, key_prefix,
- "du_stats.log");
- gf_proc_dump_write(key, "%lu", conf->du_stats->log);
- }
- gf_proc_dump_build_key(key, key_prefix, "last_stat_fetch");
- gf_proc_dump_write(key, "%s", ctime(&conf->last_stat_fetch.tv_sec));
-
- UNLOCK(&conf->subvolume_lock);
-
- return 0;
-}
-
-int32_t
-dht_inodectx_dump (xlator_t *this, inode_t *inode)
-{
- int ret = -1;
- char key_prefix[GF_DUMP_MAX_BUF_LEN];
- dht_layout_t *layout = NULL;
- uint64_t tmp_layout = 0;
-
- if (!inode)
- return -1;
-
- ret = inode_ctx_get (inode, this, &tmp_layout);
-
- if (ret != 0)
- return ret;
-
- layout = (dht_layout_t *)(long)tmp_layout;
-
- if (!layout)
- return -1;
-
- gf_proc_dump_build_key(key_prefix, "xlator.cluster.dht",
- "%s.inode.%ld", this->name, inode->ino);
- dht_layout_dump(layout, key_prefix);
-
- return 0;
-}
-
-int
-notify (xlator_t *this, int event, void *data, ...)
-{
- int ret = -1;
-
- ret = dht_notify (this, event, data);
-
- return ret;
-}
-
-void
-fini (xlator_t *this)
-{
- int i = 0;
- dht_conf_t *conf = NULL;
-
- conf = this->private;
-
- if (conf) {
- if (conf->file_layouts) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- FREE (conf->file_layouts[i]);
- }
- FREE (conf->file_layouts);
- }
+#include "dht-common.h"
- if (conf->default_dir_layout)
- FREE (conf->default_dir_layout);
-
- if (conf->subvolumes)
- FREE (conf->subvolumes);
-
- if (conf->subvolume_status)
- FREE (conf->subvolume_status);
-
- FREE (conf);
- }
-
- return;
-}
-
-int
-init (xlator_t *this)
-{
- dht_conf_t *conf = NULL;
- char *temp_str = NULL;
- int ret = -1;
- int i = 0;
- uint32_t temp_free_disk = 0;
-
- if (!this->children) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "Distribute needs more than one subvolume");
- return -1;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile");
- }
-
- conf = CALLOC (1, sizeof (*conf));
- if (!conf) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- conf->search_unhashed = 0;
-
- if (dict_get_str (this->options, "lookup-unhashed", &temp_str) == 0) {
- gf_string2boolean (temp_str, &conf->search_unhashed);
- }
-
- conf->unhashed_sticky_bit = 0;
-
- if (dict_get_str (this->options, "unhashed-sticky-bit",
- &temp_str) == 0) {
- gf_string2boolean (temp_str, &conf->unhashed_sticky_bit);
- }
-
- conf->disk_unit = 'p';
- conf->min_free_disk = 10;
-
- if (dict_get_str (this->options, "min-free-disk", &temp_str) == 0) {
- if (gf_string2percent (temp_str, &temp_free_disk) == 0) {
- if (temp_free_disk > 100) {
- gf_string2bytesize (temp_str,
- &conf->min_free_disk);
- conf->disk_unit = 'b';
- } else {
- conf->min_free_disk = (uint64_t)temp_free_disk;
- }
- } else {
- gf_string2bytesize (temp_str, &conf->min_free_disk);
- conf->disk_unit = 'b';
- }
- }
-
-
- ret = dht_init_subvolumes (this, conf);
- if (ret == -1) {
- goto err;
- }
-
- ret = dht_layouts_init (this, conf);
- if (ret == -1) {
- goto err;
- }
-
- conf->du_stats = CALLOC (conf->subvolume_cnt, sizeof (dht_du_t));
- if (!conf->du_stats) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- LOCK_INIT (&conf->subvolume_lock);
-
- conf->gen = 1;
-
- this->private = conf;
-
- return 0;
-
-err:
- if (conf) {
- if (conf->file_layouts) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- FREE (conf->file_layouts[i]);
- }
- FREE (conf->file_layouts);
- }
-
- if (conf->default_dir_layout)
- FREE (conf->default_dir_layout);
-
- if (conf->subvolumes)
- FREE (conf->subvolumes);
-
- if (conf->subvolume_status)
- FREE (conf->subvolume_status);
-
- if (conf->du_stats)
- FREE (conf->du_stats);
-
- FREE (conf);
- }
-
- return -1;
-}
-
-
-struct xlator_fops fops = {
- .lookup = dht_lookup,
- .mknod = dht_mknod,
- .create = dht_create,
-
- .stat = dht_stat,
- .chmod = dht_chmod,
- .chown = dht_chown,
- .fchown = dht_fchown,
- .fchmod = dht_fchmod,
- .fstat = dht_fstat,
- .utimens = dht_utimens,
- .truncate = dht_truncate,
- .ftruncate = dht_ftruncate,
- .access = dht_access,
- .readlink = dht_readlink,
- .setxattr = dht_setxattr,
- .getxattr = dht_getxattr,
- .removexattr = dht_removexattr,
- .open = dht_open,
- .readv = dht_readv,
- .writev = dht_writev,
- .flush = dht_flush,
- .fsync = dht_fsync,
- .statfs = dht_statfs,
- .lk = dht_lk,
- .opendir = dht_opendir,
- .readdir = dht_readdir,
- .fsyncdir = dht_fsyncdir,
- .symlink = dht_symlink,
- .unlink = dht_unlink,
- .link = dht_link,
- .mkdir = dht_mkdir,
- .rmdir = dht_rmdir,
- .rename = dht_rename,
- .inodelk = dht_inodelk,
- .finodelk = dht_finodelk,
- .entrylk = dht_entrylk,
- .fentrylk = dht_fentrylk,
- .xattrop = dht_xattrop,
- .fxattrop = dht_fxattrop,
-#if 0
- .setdents = dht_setdents,
- .getdents = dht_getdents,
- .checksum = dht_checksum,
-#endif
+class_methods_t class_methods = {
+ .init = dht_init,
+ .fini = dht_fini,
+ .reconfigure = dht_reconfigure,
+ .notify = dht_notify
};
-
-struct xlator_mops mops = {
+struct xlator_fops fops = {
+ .lookup = dht_lookup,
+ .mknod = dht_mknod,
+ .create = dht_create,
+
+ .open = dht_open,
+ .statfs = dht_statfs,
+ .opendir = dht_opendir,
+ .readdir = dht_readdir,
+ .readdirp = dht_readdirp,
+ .fsyncdir = dht_fsyncdir,
+ .symlink = dht_symlink,
+ .unlink = dht_unlink,
+ .link = dht_link,
+ .mkdir = dht_mkdir,
+ .rmdir = dht_rmdir,
+ .rename = dht_rename,
+ .entrylk = dht_entrylk,
+ .fentrylk = dht_fentrylk,
+
+ /* Inode read operations */
+ .stat = dht_stat,
+ .fstat = dht_fstat,
+ .access = dht_access,
+ .readlink = dht_readlink,
+ .getxattr = dht_getxattr,
+ .fgetxattr = dht_fgetxattr,
+ .readv = dht_readv,
+ .flush = dht_flush,
+ .fsync = dht_fsync,
+ .inodelk = dht_inodelk,
+ .finodelk = dht_finodelk,
+ .lk = dht_lk,
+
+ /* Inode write operations */
+ .fremovexattr = dht_fremovexattr,
+ .removexattr = dht_removexattr,
+ .setxattr = dht_setxattr,
+ .fsetxattr = dht_fsetxattr,
+ .truncate = dht_truncate,
+ .ftruncate = dht_ftruncate,
+ .writev = dht_writev,
+ .xattrop = dht_xattrop,
+ .fxattrop = dht_fxattrop,
+ .setattr = dht_setattr,
+ .fsetattr = dht_fsetattr,
+ .fallocate = dht_fallocate,
+ .discard = dht_discard,
+ .zerofill = dht_zerofill,
};
struct xlator_dumpops dumpops = {
@@ -406,18 +82,8 @@ struct xlator_dumpops dumpops = {
struct xlator_cbks cbks = {
-// .release = dht_release,
+// .release = dht_release,
// .releasedir = dht_releasedir,
- .forget = dht_forget
-};
-
-
-struct volume_options options[] = {
- { .key = {"lookup-unhashed"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {"min-free-disk"},
- .type = GF_OPTION_TYPE_PERCENT_OR_SIZET,
- },
- { .key = {NULL} },
+ .forget = dht_forget
};
+;
diff --git a/xlators/cluster/dht/src/nufa.c b/xlators/cluster/dht/src/nufa.c
index c8ce7fdee..e934acdf0 100644
--- a/xlators/cluster/dht/src/nufa.c
+++ b/xlators/cluster/dht/src/nufa.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -23,16 +14,18 @@
#include "config.h"
#endif
-#include "dht-common.c"
+#include "dht-common.h"
/* TODO: all 'TODO's in dht.c holds good */
-int
+extern struct volume_options options[];
+
+int
nufa_local_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct stat *stbuf, dict_t *xattr)
+ int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent)
{
- dht_layout_t *layout = NULL;
xlator_t *subvol = NULL;
char is_linkfile = 0;
char is_dir = 0;
@@ -41,8 +34,8 @@ nufa_local_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
loc_t *loc = NULL;
int i = 0;
call_frame_t *prev = NULL;
- int call_cnt = 0;
-
+ int call_cnt = 0;
+ int ret = 0;
conf = this->private;
@@ -50,58 +43,52 @@ nufa_local_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
loc = &local->loc;
- if (ENTRY_MISSING (op_ret, op_errno)) {
- if (conf->search_unhashed) {
- local->op_errno = ENOENT;
- dht_lookup_everywhere (frame, this, loc);
- return 0;
- }
- }
+ if (ENTRY_MISSING (op_ret, op_errno)) {
+ if (conf->search_unhashed) {
+ local->op_errno = ENOENT;
+ dht_lookup_everywhere (frame, this, loc);
+ return 0;
+ }
+ }
if (op_ret == -1)
goto out;
- is_linkfile = check_is_linkfile (inode, stbuf, xattr);
+ is_linkfile = check_is_linkfile (inode, stbuf, xattr,
+ conf->link_xattr_name);
is_dir = check_is_dir (inode, stbuf, xattr);
if (!is_dir && !is_linkfile) {
/* non-directory and not a linkfile */
+ ret = dht_layout_preset (this, prev->this, inode);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "could not set pre-set layout for subvol %s",
+ prev->this->name);
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto err;
+ }
- dht_itransform (this, prev->this, stbuf->st_ino,
- &stbuf->st_ino);
-
- layout = dht_layout_for_subvol (this, prev->this);
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no pre-set layout for subvolume %s",
- prev->this->name);
- op_ret = -1;
- op_errno = EINVAL;
- goto err;
- }
-
- inode_ctx_put (inode, this, (uint64_t)(long)layout);
goto out;
}
if (is_dir) {
call_cnt = conf->subvolume_cnt;
- local->call_cnt = call_cnt;
+ local->call_cnt = call_cnt;
local->inode = inode_ref (inode);
local->xattr = dict_ref (xattr);
- local->op_ret = 0;
- local->op_errno = 0;
+ local->op_ret = 0;
+ local->op_errno = 0;
- local->layout = dht_layout_new (this, conf->subvolume_cnt);
- if (!local->layout) {
- op_ret = -1;
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_DEBUG,
- "memory allocation failed :(");
- goto err;
- }
+ local->layout = dht_layout_new (this, conf->subvolume_cnt);
+ if (!local->layout) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto err;
+ }
for (i = 0; i < call_cnt; i++) {
STACK_WIND (frame, dht_lookup_dir_cbk,
@@ -118,51 +105,52 @@ nufa_local_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
gf_log (this->name, GF_LOG_DEBUG,
"linkfile not having link subvolume. path=%s",
loc->path);
- dht_lookup_everywhere (frame, this, loc);
- return 0;
+ dht_lookup_everywhere (frame, this, loc);
+ return 0;
}
- STACK_WIND (frame, dht_lookup_linkfile_cbk,
- subvol, subvol->fops->lookup,
- &local->loc, local->xattr_req);
+ STACK_WIND (frame, dht_lookup_linkfile_cbk,
+ subvol, subvol->fops->lookup,
+ &local->loc, local->xattr_req);
}
return 0;
out:
- if (!local->hashed_subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s",
- local->loc.path);
- op_errno = EINVAL;
- goto err;
- }
-
- STACK_WIND (frame, dht_lookup_cbk,
- local->hashed_subvol, local->hashed_subvol->fops->lookup,
- &local->loc, local->xattr_req);
-
- return 0;
-
- err:
- DHT_STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf, xattr);
+ if (!local->hashed_subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no subvolume in layout for path=%s",
+ local->loc.path);
+ local->op_errno = ENOENT;
+ dht_lookup_everywhere (frame, this, loc);
+ return 0;
+ }
+
+ STACK_WIND (frame, dht_lookup_cbk,
+ local->hashed_subvol, local->hashed_subvol->fops->lookup,
+ &local->loc, local->xattr_req);
+
+ return 0;
+
+err:
+ DHT_STACK_UNWIND (lookup, frame, op_ret, op_errno,
+ inode, stbuf, xattr, postparent);
return 0;
}
int
nufa_lookup (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xattr_req)
+ loc_t *loc, dict_t *xattr_req)
{
xlator_t *hashed_subvol = NULL;
- xlator_t *cached_subvol = NULL;
xlator_t *subvol = NULL;
dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
+ dht_conf_t *conf = NULL;
int ret = -1;
int op_errno = -1;
- dht_layout_t *layout = NULL;
- int i = 0;
- int call_cnt = 0;
+ dht_layout_t *layout = NULL;
+ int i = 0;
+ int call_cnt = 0;
VALIDATE_OR_GOTO (frame, err);
@@ -171,40 +159,26 @@ nufa_lookup (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (loc->inode, err);
VALIDATE_OR_GOTO (loc->path, err);
- conf = this->private;
-
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
+ conf = this->private;
- ret = loc_dup (loc, &local->loc);
- if (ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "copying location failed for path=%s",
- loc->path);
+ local = dht_local_init (frame, loc, NULL, GF_FOP_LOOKUP);
+ if (!local) {
+ op_errno = ENOMEM;
goto err;
}
- if (xattr_req) {
- local->xattr_req = dict_ref (xattr_req);
- } else {
- local->xattr_req = dict_new ();
- }
+ if (xattr_req) {
+ local->xattr_req = dict_ref (xattr_req);
+ } else {
+ local->xattr_req = dict_new ();
+ }
- hashed_subvol = dht_subvol_get_hashed (this, &local->loc);
- cached_subvol = dht_subvol_get_cached (this, local->loc.inode);
-
- local->cached_subvol = cached_subvol;
- local->hashed_subvol = hashed_subvol;
+ hashed_subvol = dht_subvol_get_hashed (this, &local->loc);
- if (is_revalidate (loc)) {
- layout = dht_layout_get (this, loc->inode);
+ local->hashed_subvol = hashed_subvol;
+ if (is_revalidate (loc)) {
+ layout = local->layout;
if (!layout) {
gf_log (this->name, GF_LOG_DEBUG,
"revalidate without cache. path=%s",
@@ -213,530 +187,490 @@ nufa_lookup (call_frame_t *frame, xlator_t *this,
goto err;
}
- if (layout->gen && (layout->gen < conf->gen)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "incomplete layout failure for path=%s",
- loc->path);
- op_errno = ESTALE;
- goto err;
- }
-
- local->inode = inode_ref (loc->inode);
- local->st_ino = loc->inode->ino;
-
- local->call_cnt = layout->cnt;
- call_cnt = local->call_cnt;
-
- /* NOTE: we don't require 'trusted.glusterfs.dht.linkto' attribute,
- * revalidates directly go to the cached-subvolume.
- */
- ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht", 4 * 4);
-
- for (i = 0; i < layout->cnt; i++) {
- subvol = layout->list[i].xlator;
-
- STACK_WIND (frame, dht_revalidate_cbk,
- subvol, subvol->fops->lookup,
- loc, local->xattr_req);
-
- if (!--call_cnt)
- break;
- }
- } else {
- ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht", 4 * 4);
-
- ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht.linkto", 256);
-
- /* Send it to only local volume */
- STACK_WIND (frame, nufa_local_lookup_cbk,
- conf->local_volume,
- conf->local_volume->fops->lookup,
- loc, local->xattr_req);
- }
+ if (layout->gen && (layout->gen < conf->gen)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "incomplete layout failure for path=%s",
+ loc->path);
+ dht_layout_unref (this, local->layout);
+ goto do_fresh_lookup;
+ }
+
+ local->inode = inode_ref (loc->inode);
+
+ local->call_cnt = layout->cnt;
+ call_cnt = local->call_cnt;
+
+ /* NOTE: we don't require 'trusted.glusterfs.dht.linkto' attribute,
+ * revalidates directly go to the cached-subvolume.
+ */
+ ret = dict_set_uint32 (local->xattr_req,
+ conf->xattr_name, 4 * 4);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set dict value.");
+ op_errno = -1;
+ goto err;
+ }
+
+ for (i = 0; i < layout->cnt; i++) {
+ subvol = layout->list[i].xlator;
+
+ STACK_WIND (frame, dht_revalidate_cbk,
+ subvol, subvol->fops->lookup,
+ loc, local->xattr_req);
+
+ if (!--call_cnt)
+ break;
+ }
+ } else {
+ do_fresh_lookup:
+ ret = dict_set_uint32 (local->xattr_req,
+ conf->xattr_name, 4 * 4);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set dict value.");
+ op_errno = -1;
+ goto err;
+ }
+
+ ret = dict_set_uint32 (local->xattr_req,
+ conf->link_xattr_name, 256);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set dict value.");
+ op_errno = -1;
+ goto err;
+ }
+
+ /* Send it to only local volume */
+ STACK_WIND (frame, nufa_local_lookup_cbk,
+ (xlator_t *)conf->private,
+ ((xlator_t *)conf->private)->fops->lookup,
+ loc, local->xattr_req);
+ }
return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL);
+ return 0;
}
int
-nufa_create_linkfile_create_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int op_ret, int op_errno,
- inode_t *inode, struct stat *stbuf)
+nufa_create_linkfile_create_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- dht_conf_t *conf = NULL;
-
- local = frame->local;
- prev = cookie;
- conf = this->private;
-
- if (op_ret == -1)
- goto err;
-
- STACK_WIND (frame, dht_create_cbk,
- local->cached_subvol, local->cached_subvol->fops->create,
- &local->loc, local->flags, local->mode, local->fd);
-
- return 0;
-
- err:
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret == -1)
+ goto err;
+
+ STACK_WIND (frame, dht_create_cbk,
+ local->cached_subvol, local->cached_subvol->fops->create,
+ &local->loc, local->flags, local->mode, local->umask,
+ local->fd, local->params);
+
+ return 0;
+
+err:
+ DHT_STACK_UNWIND (create, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL, NULL, NULL);
+ return 0;
}
int
nufa_create (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode, fd_t *fd)
+ loc_t *loc, int32_t flags, mode_t mode,
+ mode_t umask, fd_t *fd, dict_t *params)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ xlator_t *subvol = NULL;
xlator_t *avail_subvol = NULL;
- int op_errno = -1;
- int ret = -1;
+ int op_errno = -1;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
- conf = this->private;
+ conf = this->private;
dht_get_du_info (frame, this, loc);
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- subvol = dht_subvol_get_hashed (this, loc);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s",
- loc->path);
- op_errno = ENOENT;
- goto err;
- }
-
- avail_subvol = conf->local_volume;
- if (dht_is_subvol_filled (this, conf->local_volume)) {
- avail_subvol =
- dht_free_disk_available_subvol (this,
- conf->local_volume);
+ local = dht_local_init (frame, loc, fd, GF_FOP_CREATE);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
}
-
+
+ subvol = dht_subvol_get_hashed (this, loc);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no subvolume in layout for path=%s",
+ loc->path);
+ op_errno = ENOENT;
+ goto err;
+ }
+
+ avail_subvol = conf->private;
+ if (dht_is_subvol_filled (this, (xlator_t *)conf->private)) {
+ avail_subvol =
+ dht_free_disk_available_subvol (this,
+ (xlator_t *)conf->private,
+ local);
+ }
+
if (subvol != avail_subvol) {
/* create a link file instead of actual file */
- ret = loc_copy (&local->loc, loc);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_errno = ENOMEM;
- goto err;
- }
-
- local->fd = fd_ref (fd);
+ local->params = dict_ref (params);
local->mode = mode;
local->flags = flags;
-
+ local->umask = umask;
local->cached_subvol = avail_subvol;
- dht_linkfile_create (frame,
- nufa_create_linkfile_create_cbk,
- avail_subvol, subvol, loc);
+ dht_linkfile_create (frame, nufa_create_linkfile_create_cbk,
+ this, avail_subvol, subvol, loc);
return 0;
}
gf_log (this->name, GF_LOG_TRACE,
"creating %s on %s", loc->path, subvol->name);
-
+
STACK_WIND (frame, dht_create_cbk,
subvol, subvol->fops->create,
- loc, flags, mode, fd);
-
+ loc, flags, mode, umask, fd, params);
+
return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (create, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL, NULL, NULL);
- return 0;
+ return 0;
}
int
nufa_mknod_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct stat *stbuf)
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- dht_conf_t *conf = NULL;
-
- local = frame->local;
- prev = cookie;
- conf = this->private;
-
- if (op_ret >= 0) {
- STACK_WIND (frame, dht_newfile_cbk,
- local->cached_subvol,
- local->cached_subvol->fops->mknod,
- &local->loc, local->mode, local->rdev);
-
- return 0;
- }
-
- DHT_STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf);
- return 0;
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local || !local->cached_subvol) {
+ op_errno = EINVAL;
+ op_ret = -1;
+ goto err;
+ }
+
+ if (op_ret >= 0) {
+ STACK_WIND_COOKIE (frame, dht_newfile_cbk,
+ (void *)local->cached_subvol, local->cached_subvol,
+ local->cached_subvol->fops->mknod,
+ &local->loc, local->mode, local->rdev,
+ local->umask, local->params);
+
+ return 0;
+ }
+err:
+ WIPE (postparent);
+ WIPE (preparent);
+
+ DHT_STACK_UNWIND (link, frame, op_ret, op_errno,
+ inode, stbuf, preparent, postparent, xdata);
+ return 0;
}
int
nufa_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t rdev)
+ loc_t *loc, mode_t mode, dev_t rdev, mode_t umask, dict_t *params)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ xlator_t *subvol = NULL;
xlator_t *avail_subvol = NULL;
- int op_errno = -1;
- int ret = -1;
+ int op_errno = -1;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
- conf = this->private;
+ conf = this->private;
dht_get_du_info (frame, this, loc);
- local = dht_local_init (frame);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- subvol = dht_subvol_get_hashed (this, loc);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s",
- loc->path);
- op_errno = ENOENT;
- goto err;
- }
+ local = dht_local_init (frame, loc, NULL, GF_FOP_MKNOD);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = dht_subvol_get_hashed (this, loc);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no subvolume in layout for path=%s",
+ loc->path);
+ op_errno = ENOENT;
+ goto err;
+ }
/* Consider the disksize in consideration */
- avail_subvol = conf->local_volume;
- if (dht_is_subvol_filled (this, conf->local_volume)) {
- avail_subvol =
- dht_free_disk_available_subvol (this,
- conf->local_volume);
+ avail_subvol = conf->private;
+ if (dht_is_subvol_filled (this, (xlator_t *)conf->private)) {
+ avail_subvol =
+ dht_free_disk_available_subvol (this,
+ (xlator_t *)conf->private,
+ local);
}
- if (avail_subvol != subvol) {
- /* Create linkfile first */
- ret = loc_copy (&local->loc, loc);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_errno = ENOMEM;
- goto err;
- }
-
- local->mode = mode;
- local->rdev = rdev;
- local->cached_subvol = avail_subvol;
-
- dht_linkfile_create (frame, nufa_mknod_linkfile_cbk,
+ if (avail_subvol != subvol) {
+ /* Create linkfile first */
+
+ local->params = dict_ref (params);
+ local->mode = mode;
+ local->umask = umask;
+ local->rdev = rdev;
+ local->cached_subvol = avail_subvol;
+
+ dht_linkfile_create (frame, nufa_mknod_linkfile_cbk, this,
avail_subvol, subvol, loc);
- return 0;
- }
+ return 0;
+ }
- gf_log (this->name, GF_LOG_TRACE,
- "creating %s on %s", loc->path, subvol->name);
+ gf_log (this->name, GF_LOG_TRACE,
+ "creating %s on %s", loc->path, subvol->name);
- STACK_WIND (frame, dht_newfile_cbk,
- subvol, subvol->fops->mknod,
- loc, mode, rdev);
+ STACK_WIND_COOKIE (frame, dht_newfile_cbk, (void *)subvol, subvol,
+ subvol->fops->mknod, loc, mode, rdev, umask,
+ params);
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (mknod, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL, NULL);
- return 0;
+ return 0;
}
-int
-notify (xlator_t *this, int event, void *data, ...)
-{
- int ret = -1;
-
- ret = dht_notify (this, event, data);
-
- return ret;
-}
-
-void
-fini (xlator_t *this)
+gf_boolean_t
+same_first_part (char *str1, char term1, char *str2, char term2)
{
- int i = 0;
- dht_conf_t *conf = NULL;
-
- conf = this->private;
-
- if (conf) {
- if (conf->file_layouts) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- FREE (conf->file_layouts[i]);
- }
- FREE (conf->file_layouts);
+ gf_boolean_t ended1;
+ gf_boolean_t ended2;
+
+ for (;;) {
+ ended1 = ((*str1 == '\0') || (*str1 == term1));
+ ended2 = ((*str2 == '\0') || (*str2 == term2));
+ if (ended1 && ended2) {
+ return _gf_true;
}
+ if (ended1 || ended2 || (*str1 != *str2)) {
+ return _gf_false;
+ }
+ ++str1;
+ ++str2;
+ }
+}
- if (conf->default_dir_layout)
- FREE (conf->default_dir_layout);
-
- if (conf->subvolumes)
- FREE (conf->subvolumes);
+typedef struct nufa_args {
+ xlator_t *this;
+ char *volname;
+ gf_boolean_t addr_match;
+} nufa_args_t;
- if (conf->subvolume_status)
- FREE (conf->subvolume_status);
+static void
+nufa_find_local_brick (xlator_t *xl, void *data)
+{
+ nufa_args_t *args = data;
+ xlator_t *this = args->this;
+ char *local_volname = args->volname;
+ gf_boolean_t addr_match = args->addr_match;
+ char *brick_host = NULL;
+ dht_conf_t *conf = this->private;
+ int ret = -1;
+
+ /*This means a local subvol was already found. We pick the first brick
+ * that is local*/
+ if (conf->private)
+ return;
+
+ if (strcmp (xl->name, local_volname) == 0) {
+ conf->private = xl;
+ gf_log (this->name, GF_LOG_INFO, "Using specified subvol %s",
+ local_volname);
+ return;
+ }
- FREE (conf);
+ if (!addr_match)
+ return;
+
+ ret = dict_get_str (xl->options, "remote-host", &brick_host);
+ if ((ret == 0) &&
+ (gf_is_same_address (local_volname, brick_host) ||
+ gf_is_local_addr (brick_host))) {
+ conf->private = xl;
+ gf_log (this->name, GF_LOG_INFO, "Using the first local "
+ "subvol %s", xl->name);
+ return;
}
- return;
}
-int
-init (xlator_t *this)
+static void
+nufa_to_dht (xlator_t *this)
{
- dht_conf_t *conf = NULL;
- xlator_list_t *trav = NULL;
- data_t *data = NULL;
- char *local_volname = NULL;
- char *temp_str = NULL;
- int ret = -1;
- int i = 0;
- char my_hostname[256];
- uint32_t temp_free_disk = 0;
-
- if (!this->children) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "NUFA needs more than one subvolume");
- return -1;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile");
- }
-
- conf = CALLOC (1, sizeof (*conf));
- if (!conf) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
- }
-
- conf->search_unhashed = 0;
+ GF_ASSERT (this);
+ GF_ASSERT (this->fops);
- if (dict_get_str (this->options, "lookup-unhashed",
- &temp_str) == 0) {
- gf_string2boolean (temp_str,
- &conf->search_unhashed);
- }
-
- ret = dht_init_subvolumes (this, conf);
- if (ret == -1) {
- goto err;
- }
+ this->fops->lookup = dht_lookup;
+ this->fops->create = dht_create;
+ this->fops->mknod = dht_mknod;
+}
- ret = dht_layouts_init (this, conf);
- if (ret == -1) {
- goto err;
+int
+nufa_find_local_subvol (xlator_t *this,
+ void (*fn) (xlator_t *each, void* data), void *data)
+{
+ int ret = -1;
+ dht_conf_t *conf = this->private;
+ xlator_list_t *trav = NULL;
+ xlator_t *parent = NULL;
+ xlator_t *candidate = NULL;
+
+ xlator_foreach_depth_first (this, fn, data);
+ if (!conf->private) {
+ gf_log (this->name, GF_LOG_ERROR, "Couldn't find a local "
+ "brick");
+ return -1;
}
- LOCK_INIT (&conf->subvolume_lock);
-
- conf->gen = 1;
-
- local_volname = "localhost";
- ret = gethostname (my_hostname, 256);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "could not find hostname (%s)",
- strerror (errno));
- }
-
- if (ret == 0)
- local_volname = my_hostname;
-
- data = dict_get (this->options, "local-volume-name");
- if (data) {
- local_volname = data->data;
- }
-
- trav = this->children;
- while (trav) {
- if (strcmp (trav->xlator->name, local_volname) == 0)
- break;
- trav = trav->next;
- }
-
- if (!trav) {
- gf_log (this->name, GF_LOG_ERROR,
- "Could not find subvolume named '%s'. "
- "Please define volume with the name as the hostname "
- "or override it with 'option local-volume-name'",
- local_volname);
- goto err;
- }
- /* The volume specified exists */
- conf->local_volume = trav->xlator;
-
- conf->min_free_disk = 10;
- conf->disk_unit = 'p';
-
- if (dict_get_str (this->options, "min-free-disk",
- &temp_str) == 0) {
- if (gf_string2percent (temp_str,
- &temp_free_disk) == 0) {
- if (temp_free_disk > 100) {
- gf_string2bytesize (temp_str,
- &conf->min_free_disk);
- conf->disk_unit = 'b';
- } else {
- conf->min_free_disk = (uint64_t)temp_free_disk;
- conf->disk_unit = 'p';
- }
- } else {
- gf_string2bytesize (temp_str,
- &conf->min_free_disk);
- conf->disk_unit = 'b';
+ candidate = conf->private;
+ trav = candidate->parents;
+ while (trav) {
+
+ parent = trav->xlator;
+ if (strcmp (parent->type, "cluster/nufa") == 0) {
+ gf_log (this->name, GF_LOG_INFO, "Found local subvol, "
+ "%s", candidate->name);
+ ret = 0;
+ conf->private = candidate;
+ break;
}
- }
- conf->du_stats = CALLOC (conf->subvolume_cnt, sizeof (dht_du_t));
- if (!conf->du_stats) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto err;
+ candidate = parent;
+ trav = parent->parents;
}
- this->private = conf;
-
- return 0;
+ return ret;
+}
-err:
- if (conf) {
- if (conf->file_layouts) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- FREE (conf->file_layouts[i]);
- }
- FREE (conf->file_layouts);
- }
+int
+nufa_init (xlator_t *this)
+{
+ data_t *data = NULL;
+ char *local_volname = NULL;
+ int ret = -1;
+ char my_hostname[256];
+ gf_boolean_t addr_match = _gf_false;
+ nufa_args_t args = {0, };
- if (conf->default_dir_layout)
- FREE (conf->default_dir_layout);
+ ret = dht_init(this);
+ if (ret) {
+ return ret;
+ }
- if (conf->subvolumes)
- FREE (conf->subvolumes);
+ if ((data = dict_get (this->options, "local-volume-name"))) {
+ local_volname = data->data;
- if (conf->subvolume_status)
- FREE (conf->subvolume_status);
+ } else {
+ addr_match = _gf_true;
+ local_volname = "localhost";
+ ret = gethostname (my_hostname, 256);
+ if (ret == 0)
+ local_volname = my_hostname;
- if (conf->du_stats)
- FREE (conf->du_stats);
+ else
+ gf_log (this->name, GF_LOG_WARNING,
+ "could not find hostname (%s)",
+ strerror (errno));
- FREE (conf);
}
- return -1;
+ args.this = this;
+ args.volname = local_volname;
+ args.addr_match = addr_match;
+ ret = nufa_find_local_subvol (this, nufa_find_local_brick, &args);
+ if (ret) {
+ gf_log (this->name, GF_LOG_INFO,
+ "Unable to find local subvolume, switching "
+ "to dht mode");
+ nufa_to_dht (this);
+ }
+ return 0;
}
-struct xlator_fops fops = {
- .lookup = nufa_lookup,
- .create = nufa_create,
- .mknod = nufa_mknod,
-
- .stat = dht_stat,
- .chmod = dht_chmod,
- .chown = dht_chown,
- .fchown = dht_fchown,
- .fchmod = dht_fchmod,
- .fstat = dht_fstat,
- .utimens = dht_utimens,
- .truncate = dht_truncate,
- .ftruncate = dht_ftruncate,
- .access = dht_access,
- .readlink = dht_readlink,
- .setxattr = dht_setxattr,
- .getxattr = dht_getxattr,
- .removexattr = dht_removexattr,
- .open = dht_open,
- .readv = dht_readv,
- .writev = dht_writev,
- .flush = dht_flush,
- .fsync = dht_fsync,
- .statfs = dht_statfs,
- .lk = dht_lk,
- .opendir = dht_opendir,
- .readdir = dht_readdir,
- .fsyncdir = dht_fsyncdir,
- .symlink = dht_symlink,
- .unlink = dht_unlink,
- .link = dht_link,
- .mkdir = dht_mkdir,
- .rmdir = dht_rmdir,
- .rename = dht_rename,
- .inodelk = dht_inodelk,
- .finodelk = dht_finodelk,
- .entrylk = dht_entrylk,
- .fentrylk = dht_fentrylk,
- .xattrop = dht_xattrop,
- .fxattrop = dht_fxattrop,
-#if 0
- .setdents = dht_setdents,
- .getdents = dht_getdents,
- .checksum = dht_checksum,
-#endif
+class_methods_t class_methods = {
+ .init = nufa_init,
+ .fini = dht_fini,
+ .reconfigure = dht_reconfigure,
+ .notify = dht_notify
};
-struct xlator_mops mops = {
+struct xlator_fops fops = {
+ .lookup = nufa_lookup,
+ .create = nufa_create,
+ .mknod = nufa_mknod,
+
+ .stat = dht_stat,
+ .fstat = dht_fstat,
+ .truncate = dht_truncate,
+ .ftruncate = dht_ftruncate,
+ .access = dht_access,
+ .readlink = dht_readlink,
+ .setxattr = dht_setxattr,
+ .getxattr = dht_getxattr,
+ .removexattr = dht_removexattr,
+ .open = dht_open,
+ .readv = dht_readv,
+ .writev = dht_writev,
+ .flush = dht_flush,
+ .fsync = dht_fsync,
+ .statfs = dht_statfs,
+ .lk = dht_lk,
+ .opendir = dht_opendir,
+ .readdir = dht_readdir,
+ .readdirp = dht_readdirp,
+ .fsyncdir = dht_fsyncdir,
+ .symlink = dht_symlink,
+ .unlink = dht_unlink,
+ .link = dht_link,
+ .mkdir = dht_mkdir,
+ .rmdir = dht_rmdir,
+ .rename = dht_rename,
+ .inodelk = dht_inodelk,
+ .finodelk = dht_finodelk,
+ .entrylk = dht_entrylk,
+ .fentrylk = dht_fentrylk,
+ .xattrop = dht_xattrop,
+ .fxattrop = dht_fxattrop,
+ .setattr = dht_setattr,
};
struct xlator_cbks cbks = {
-// .release = dht_release,
-// .releasedir = dht_releasedir,
- .forget = dht_forget
-};
-
-
-struct volume_options options[] = {
- { .key = {"local-volume-name"},
- .type = GF_OPTION_TYPE_XLATOR
- },
- { .key = {"lookup-unhashed"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {"min-free-disk"},
- .type = GF_OPTION_TYPE_PERCENT_OR_SIZET,
- },
- { .key = {NULL} },
+ .forget = dht_forget
};
diff --git a/xlators/cluster/dht/src/switch.c b/xlators/cluster/dht/src/switch.c
new file mode 100644
index 000000000..d3ea90ba8
--- /dev/null
+++ b/xlators/cluster/dht/src/switch.c
@@ -0,0 +1,904 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "dht-common.h"
+#include "dht-mem-types.h"
+
+#include <sys/time.h>
+#include <stdlib.h>
+#include <fnmatch.h>
+#include <string.h>
+
+extern struct volume_options options[];
+
+struct switch_sched_array {
+ xlator_t *xl;
+ int32_t eligible;
+ int32_t considered;
+};
+
+/* Select one of this struct based on the path's pattern match */
+struct switch_struct {
+ struct switch_struct *next;
+ struct switch_sched_array *array;
+ int32_t node_index; /* Index of the node in
+ this pattern. */
+ int32_t num_child; /* Total num of child nodes
+ with this pattern. */
+ char path_pattern[256];
+};
+
+/* TODO: all 'TODO's in dht.c holds good */
+/* This function should return child node as '*:subvolumes' is inserterd */
+
+static int32_t
+gf_switch_valid_child (xlator_t *this, const char *child)
+{
+ xlator_list_t *children = NULL;
+ int32_t ret = 0;
+
+ children = this->children;
+ while (children) {
+ if (!strcmp (child, children->xlator->name)) {
+ ret = 1;
+ break;
+ }
+ children = children->next;
+ }
+
+ return ret;
+}
+
+static xlator_t *
+get_switch_matching_subvol (const char *path, dht_conf_t *conf,
+ xlator_t *hashed_subvol)
+{
+ struct switch_struct *cond = NULL;
+ struct switch_struct *trav = NULL;
+ char *pathname = NULL;
+ int idx = 0;
+ xlator_t *subvol = NULL;
+
+ cond = conf->private;
+ subvol = hashed_subvol;
+ if (!cond)
+ goto out;
+
+ pathname = gf_strdup (path);
+ if (!pathname)
+ goto out;
+
+ trav = cond;
+ while (trav) {
+ if (fnmatch (trav->path_pattern,
+ pathname, FNM_NOESCAPE) == 0) {
+ for (idx = 0; idx < trav->num_child; idx++) {
+ if (trav->array[idx].xl == hashed_subvol)
+ goto out;
+ }
+ idx = trav->node_index++;
+ trav->node_index %= trav->num_child;
+ subvol = trav->array[idx].xl;
+ goto out;
+ }
+ trav = trav->next;
+ }
+out:
+ GF_FREE (pathname);
+
+ return subvol;
+}
+
+
+int
+switch_local_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ xlator_t *subvol = NULL;
+ char is_linkfile = 0;
+ char is_dir = 0;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ loc_t *loc = NULL;
+ int i = 0;
+ call_frame_t *prev = NULL;
+ int call_cnt = 0;
+ int ret = 0;
+
+ conf = this->private;
+
+ prev = cookie;
+ local = frame->local;
+ loc = &local->loc;
+
+ if (ENTRY_MISSING (op_ret, op_errno)) {
+ if (conf->search_unhashed) {
+ local->op_errno = ENOENT;
+ dht_lookup_everywhere (frame, this, loc);
+ return 0;
+ }
+ }
+
+ if (op_ret == -1)
+ goto out;
+
+ is_linkfile = check_is_linkfile (inode, stbuf, xattr,
+ conf->link_xattr_name);
+ is_dir = check_is_dir (inode, stbuf, xattr);
+
+ if (!is_dir && !is_linkfile) {
+ /* non-directory and not a linkfile */
+
+ ret = dht_layout_preset (this, prev->this, inode);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "could not set pre-set layout for subvol %s",
+ prev->this->name);
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ goto out;
+ }
+
+ if (is_dir) {
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
+
+ local->inode = inode_ref (inode);
+ local->xattr = dict_ref (xattr);
+
+ local->op_ret = 0;
+ local->op_errno = 0;
+
+ local->layout = dht_layout_new (this, conf->subvolume_cnt);
+ if (!local->layout) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND (frame, dht_lookup_dir_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup,
+ &local->loc, local->xattr_req);
+ }
+ }
+
+ if (is_linkfile) {
+ subvol = dht_linkfile_subvol (this, inode, stbuf, xattr);
+
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "linkfile not having link subvolume. path=%s",
+ loc->path);
+ dht_lookup_everywhere (frame, this, loc);
+ return 0;
+ }
+
+ STACK_WIND (frame, dht_lookup_linkfile_cbk,
+ subvol, subvol->fops->lookup,
+ &local->loc, local->xattr_req);
+ }
+
+ return 0;
+
+out:
+ if (!local->hashed_subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no subvolume in layout for path=%s",
+ local->loc.path);
+ local->op_errno = ENOENT;
+ dht_lookup_everywhere (frame, this, loc);
+ return 0;
+ }
+
+ STACK_WIND (frame, dht_lookup_cbk,
+ local->hashed_subvol, local->hashed_subvol->fops->lookup,
+ &local->loc, local->xattr_req);
+
+ return 0;
+
+err:
+ DHT_STACK_UNWIND (lookup, frame, op_ret, op_errno,
+ inode, stbuf, xattr, NULL);
+ return 0;
+}
+
+int
+switch_lookup (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *xattr_req)
+{
+ xlator_t *hashed_subvol = NULL;
+ xlator_t *cached_subvol = NULL;
+ xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+ int op_errno = -1;
+ dht_layout_t *layout = NULL;
+ int i = 0;
+ int call_cnt = 0;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+
+ conf = this->private;
+
+ local = dht_local_init (frame, loc, NULL, GF_FOP_LOOKUP);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ if (xattr_req) {
+ local->xattr_req = dict_ref (xattr_req);
+ } else {
+ local->xattr_req = dict_new ();
+ }
+
+ hashed_subvol = dht_subvol_get_hashed (this, &local->loc);
+ cached_subvol = local->cached_subvol;
+
+ local->hashed_subvol = hashed_subvol;
+
+ if (is_revalidate (loc)) {
+ layout = local->layout;
+ if (!layout) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "revalidate without cache. path=%s",
+ loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (layout->gen && (layout->gen < conf->gen)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "incomplete layout failure for path=%s",
+ loc->path);
+ dht_layout_unref (this, local->layout);
+ goto do_fresh_lookup;
+ }
+
+ local->inode = inode_ref (loc->inode);
+
+ local->call_cnt = layout->cnt;
+ call_cnt = local->call_cnt;
+
+ /* NOTE: we don't require 'trusted.glusterfs.dht.linkto'
+ * attribute, revalidates directly go to the cached-subvolume.
+ */
+ ret = dict_set_uint32 (local->xattr_req,
+ conf->xattr_name, 4 * 4);
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set dict value for %s",
+ conf->xattr_name);
+
+ for (i = 0; i < layout->cnt; i++) {
+ subvol = layout->list[i].xlator;
+
+ STACK_WIND (frame, dht_revalidate_cbk,
+ subvol, subvol->fops->lookup,
+ loc, local->xattr_req);
+
+ if (!--call_cnt)
+ break;
+ }
+ } else {
+ do_fresh_lookup:
+ ret = dict_set_uint32 (local->xattr_req,
+ conf->xattr_name, 4 * 4);
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set dict value for %s",
+ conf->xattr_name);
+
+ ret = dict_set_uint32 (local->xattr_req,
+ conf->link_xattr_name, 256);
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set dict value for %s",
+ conf->link_xattr_name);
+
+ if (!hashed_subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no subvolume in layout for path=%s, "
+ "checking on all the subvols to see if "
+ "it is a directory", loc->path);
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
+
+ local->layout = dht_layout_new (this,
+ conf->subvolume_cnt);
+ if (!local->layout) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND (frame, dht_lookup_dir_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup,
+ &local->loc, local->xattr_req);
+ }
+ return 0;
+ }
+
+ /* */
+ cached_subvol = get_switch_matching_subvol (loc->path, conf,
+ hashed_subvol);
+ if (cached_subvol == hashed_subvol) {
+ STACK_WIND (frame, dht_lookup_cbk,
+ hashed_subvol,
+ hashed_subvol->fops->lookup,
+ loc, local->xattr_req);
+ } else {
+ STACK_WIND (frame, switch_local_lookup_cbk,
+ cached_subvol,
+ cached_subvol->fops->lookup,
+ loc, local->xattr_req);
+ }
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (lookup, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+int
+switch_create_linkfile_create_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret == -1)
+ goto err;
+
+ STACK_WIND (frame, dht_create_cbk,
+ local->cached_subvol, local->cached_subvol->fops->create,
+ &local->loc, local->flags, local->mode, local->umask,
+ local->fd, local->params);
+
+ return 0;
+
+err:
+ DHT_STACK_UNWIND (create, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+int
+switch_create (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t flags, mode_t mode,
+ mode_t umask, fd_t *fd, dict_t *params)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *avail_subvol = NULL;
+ int op_errno = -1;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+
+ conf = this->private;
+
+ dht_get_du_info (frame, this, loc);
+
+ local = dht_local_init (frame, loc, fd, GF_FOP_CREATE);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = dht_subvol_get_hashed (this, loc);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no subvolume in layout for path=%s",
+ loc->path);
+ op_errno = ENOENT;
+ goto err;
+ }
+
+ avail_subvol = get_switch_matching_subvol (loc->path, conf, subvol);
+ if (dht_is_subvol_filled (this, avail_subvol)) {
+ avail_subvol =
+ dht_free_disk_available_subvol (this, avail_subvol,
+ local);
+ }
+
+ if (subvol != avail_subvol) {
+ /* create a link file instead of actual file */
+ local->mode = mode;
+ local->flags = flags;
+ local->umask = umask;
+ local->cached_subvol = avail_subvol;
+ dht_linkfile_create (frame, switch_create_linkfile_create_cbk,
+ this, avail_subvol, subvol, loc);
+ return 0;
+ }
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "creating %s on %s", loc->path, subvol->name);
+
+ STACK_WIND (frame, dht_create_cbk,
+ subvol, subvol->fops->create,
+ loc, flags, mode, umask, fd, params);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (create, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int
+switch_mknod_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local || !local->cached_subvol) {
+ op_errno = EINVAL;
+ op_ret = -1;
+ goto err;
+ }
+
+ if (op_ret >= 0) {
+ STACK_WIND_COOKIE (frame, dht_newfile_cbk,
+ (void *)local->cached_subvol, local->cached_subvol,
+ local->cached_subvol->fops->mknod,
+ &local->loc, local->mode, local->rdev,
+ local->umask, local->params);
+
+ return 0;
+ }
+err:
+ DHT_STACK_UNWIND (link, frame, op_ret, op_errno,
+ inode, stbuf, preparent, postparent, xdata);
+ return 0;
+}
+
+
+int
+switch_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *params)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *avail_subvol = NULL;
+ int op_errno = -1;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+
+ conf = this->private;
+
+ dht_get_du_info (frame, this, loc);
+
+ local = dht_local_init (frame, loc, NULL, GF_FOP_MKNOD);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = dht_subvol_get_hashed (this, loc);
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no subvolume in layout for path=%s",
+ loc->path);
+ op_errno = ENOENT;
+ goto err;
+ }
+
+ /* Consider the disksize in consideration */
+ avail_subvol = get_switch_matching_subvol (loc->path, conf, subvol);
+ if (dht_is_subvol_filled (this, avail_subvol)) {
+ avail_subvol =
+ dht_free_disk_available_subvol (this, avail_subvol,
+ local);
+ }
+
+ if (avail_subvol != subvol) {
+ /* Create linkfile first */
+
+ local->params = dict_ref (params);
+ local->mode = mode;
+ local->umask = umask;
+ local->rdev = rdev;
+ local->cached_subvol = avail_subvol;
+
+ dht_linkfile_create (frame, switch_mknod_linkfile_cbk,
+ this, avail_subvol, subvol, loc);
+ return 0;
+ }
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "creating %s on %s", loc->path, subvol->name);
+
+ STACK_WIND_COOKIE (frame, dht_newfile_cbk, (void *)subvol, subvol,
+ subvol->fops->mknod, loc, mode, rdev, umask,
+ params);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND (mknod, frame, -1, op_errno,
+ NULL, NULL, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+void
+switch_fini (xlator_t *this)
+{
+ dht_conf_t *conf = NULL;
+ struct switch_struct *trav = NULL;
+ struct switch_struct *prev = NULL;
+
+ conf = this->private;
+
+ if (conf) {
+ trav = (struct switch_struct *)conf->private;
+ conf->private = NULL;
+ while (trav) {
+ GF_FREE (trav->array);
+ prev = trav;
+ trav = trav->next;
+ GF_FREE (prev);
+ }
+ }
+
+ dht_fini(this);
+}
+
+int
+set_switch_pattern (xlator_t *this, dht_conf_t *conf,
+ const char *pattern_str)
+{
+ int flag = 0;
+ int idx = 0;
+ int index = 0;
+ int child_count = 0;
+ char *tmp = NULL;
+ char *tmp1 = NULL;
+ char *child = NULL;
+ char *tmp_str = NULL;
+ char *tmp_str1 = NULL;
+ char *dup_str = NULL;
+ char *dup_childs = NULL;
+ char *switch_str = NULL;
+ char *pattern = NULL;
+ char *childs = NULL;
+ char *option_string = NULL;
+ struct switch_struct *switch_buf = NULL;
+ struct switch_struct *switch_opt = NULL;
+ struct switch_struct *trav = NULL;
+ struct switch_sched_array *switch_buf_array = NULL;
+ xlator_list_t *trav_xl = NULL;
+
+ trav_xl = this->children;
+ while (trav_xl) {
+ index++;
+ trav_xl = trav_xl->next;
+ }
+ child_count = index;
+ switch_buf_array = GF_CALLOC ((index + 1),
+ sizeof (struct switch_sched_array),
+ gf_switch_mt_switch_sched_array);
+ if (!switch_buf_array)
+ goto err;
+
+ trav_xl = this->children;
+ index = 0;
+
+ while (trav_xl) {
+ switch_buf_array[index].xl = trav_xl->xlator;
+ switch_buf_array[index].eligible = 1;
+ trav_xl = trav_xl->next;
+ index++;
+ }
+
+ /* *jpg:child1,child2;*mpg:child3;*:child4,child5,child6 */
+
+ /* Get the pattern for considering switch case.
+ "option block-size *avi:10MB" etc */
+ option_string = gf_strdup (pattern_str);
+ switch_str = strtok_r (option_string, ";", &tmp_str);
+ while (switch_str) {
+ dup_str = gf_strdup (switch_str);
+ switch_opt = GF_CALLOC (1, sizeof (struct switch_struct),
+ gf_switch_mt_switch_struct);
+ if (!switch_opt) {
+ GF_FREE (dup_str);
+ goto err;
+ }
+
+ pattern = strtok_r (dup_str, ":", &tmp_str1);
+ childs = strtok_r (NULL, ":", &tmp_str1);
+ if (strncmp (pattern, "*", 2) == 0) {
+ gf_log ("switch", GF_LOG_INFO,
+ "'*' pattern will be taken by default "
+ "for all the unconfigured child nodes,"
+ " hence neglecting current option");
+ switch_str = strtok_r (NULL, ";", &tmp_str);
+ GF_FREE (switch_opt);
+ GF_FREE (dup_str);
+ continue;
+ }
+ memcpy (switch_opt->path_pattern, pattern, strlen (pattern));
+ if (childs) {
+ dup_childs = gf_strdup (childs);
+ child = strtok_r (dup_childs, ",", &tmp);
+ while (child) {
+ if (gf_switch_valid_child (this, child)) {
+ idx++;
+ child = strtok_r (NULL, ",", &tmp);
+ } else {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s is not a subvolume of %s. "
+ "pattern can only be scheduled "
+ "only to a subvolume of %s",
+ child, this->name, this->name);
+ goto err;
+ }
+ }
+ GF_FREE (dup_childs);
+ child = strtok_r (childs, ",", &tmp1);
+ switch_opt->num_child = idx;
+ switch_opt->array = GF_CALLOC (1, (idx *
+ sizeof (struct switch_sched_array)),
+ gf_switch_mt_switch_sched_array);
+ if (!switch_opt->array)
+ goto err;
+ idx = 0;
+ while (child) {
+ for (index = 0; index < child_count; index++) {
+ if (strcmp (switch_buf_array[index].xl->name,
+ child) == 0) {
+ gf_log ("switch", GF_LOG_DEBUG,
+ "'%s' pattern will be "
+ "scheduled to \"%s\"",
+ switch_opt->path_pattern, child);
+ /*
+ if (switch_buf_array[index-1].considered) {
+ gf_log ("switch", GF_LOG_DEBUG,
+ "ambiguity found, exiting");
+ return -1;
+ }
+ */
+ switch_opt->array[idx].xl = switch_buf_array[index].xl;
+ switch_buf_array[index].considered = 1;
+ idx++;
+ break;
+ }
+ }
+ child = strtok_r (NULL, ",", &tmp1);
+ }
+ } else {
+ /* error */
+ gf_log ("switch", GF_LOG_ERROR,
+ "Check \"scheduler.switch.case\" "
+ "option in unify volume. Exiting");
+ goto err;
+ }
+ GF_FREE (dup_str);
+
+ /* Link it to the main structure */
+ if (switch_buf) {
+ /* there are already few entries */
+ trav = switch_buf;
+ while (trav->next)
+ trav = trav->next;
+ trav->next = switch_opt;
+ } else {
+ /* First entry */
+ switch_buf = switch_opt;
+ }
+ switch_opt = NULL;
+ switch_str = strtok_r (NULL, ";", &tmp_str);
+ }
+
+ /* Now, all the pattern based considerations done, so for all the
+ * remaining pattern, '*' to all the remaining child nodes
+ */
+ {
+ for (index=0; index < child_count; index++) {
+ /* check for considered flag */
+ if (switch_buf_array[index].considered)
+ continue;
+ flag++;
+ }
+ if (!flag) {
+ gf_log ("switch", GF_LOG_ERROR,
+ "No nodes left for pattern '*'. Exiting");
+ goto err;
+ }
+ switch_opt = GF_CALLOC (1, sizeof (struct switch_struct),
+ gf_switch_mt_switch_struct);
+ if (!switch_opt)
+ goto err;
+
+ /* Add the '*' pattern to the array */
+ memcpy (switch_opt->path_pattern, "*", 2);
+ switch_opt->num_child = flag;
+ switch_opt->array =
+ GF_CALLOC (1,
+ flag * sizeof (struct switch_sched_array),
+ gf_switch_mt_switch_sched_array);
+ if (!switch_opt->array)
+ goto err;
+ flag = 0;
+ for (index=0; index < child_count; index++) {
+ /* check for considered flag */
+ if (switch_buf_array[index].considered)
+ continue;
+ gf_log ("switch", GF_LOG_DEBUG,
+ "'%s' pattern will be scheduled to \"%s\"",
+ switch_opt->path_pattern,
+ switch_buf_array[index].xl->name);
+ switch_opt->array[flag].xl =
+ switch_buf_array[index].xl;
+ switch_buf_array[index].considered = 1;
+ flag++;
+ }
+ if (switch_buf) {
+ /* there are already few entries */
+ trav = switch_buf;
+ while (trav->next)
+ trav = trav->next;
+ trav->next = switch_opt;
+ } else {
+ /* First entry */
+ switch_buf = switch_opt;
+ }
+ switch_opt = NULL;
+ }
+ /* */
+ conf->private = switch_buf;
+
+ return 0;
+err:
+ GF_FREE (switch_buf_array);
+ GF_FREE (switch_opt);
+
+ if (switch_buf) {
+ trav = switch_buf;
+ while (trav) {
+ GF_FREE (trav->array);
+ switch_opt = trav;
+ trav = trav->next;
+ GF_FREE (switch_opt);
+ }
+ }
+ return -1;
+}
+
+
+int32_t
+switch_init (xlator_t *this)
+{
+ dht_conf_t *conf = NULL;
+ data_t *data = NULL;
+ int ret = -1;
+
+ ret = dht_init(this);
+ if (ret) {
+ return ret;
+ }
+ conf = this->private;
+
+ data = dict_get (this->options, "pattern.switch.case");
+ if (data) {
+ /* TODO: */
+ ret = set_switch_pattern (this, conf, data->data);
+ if (ret) {
+ goto err;
+ }
+ }
+
+ this->private = conf;
+ return 0;
+
+err:
+ dht_fini(this);
+ return -1;
+}
+
+
+class_methods_t class_methods = {
+ .init = switch_init,
+ .fini = switch_fini,
+ .reconfigure = dht_reconfigure,
+ .notify = dht_notify
+};
+
+
+struct xlator_fops fops = {
+ .lookup = switch_lookup,
+ .create = switch_create,
+ .mknod = switch_mknod,
+
+ .stat = dht_stat,
+ .fstat = dht_fstat,
+ .truncate = dht_truncate,
+ .ftruncate = dht_ftruncate,
+ .access = dht_access,
+ .readlink = dht_readlink,
+ .setxattr = dht_setxattr,
+ .getxattr = dht_getxattr,
+ .removexattr = dht_removexattr,
+ .open = dht_open,
+ .readv = dht_readv,
+ .writev = dht_writev,
+ .flush = dht_flush,
+ .fsync = dht_fsync,
+ .statfs = dht_statfs,
+ .lk = dht_lk,
+ .opendir = dht_opendir,
+ .readdir = dht_readdir,
+ .readdirp = dht_readdirp,
+ .fsyncdir = dht_fsyncdir,
+ .symlink = dht_symlink,
+ .unlink = dht_unlink,
+ .link = dht_link,
+ .mkdir = dht_mkdir,
+ .rmdir = dht_rmdir,
+ .rename = dht_rename,
+ .inodelk = dht_inodelk,
+ .finodelk = dht_finodelk,
+ .entrylk = dht_entrylk,
+ .fentrylk = dht_fentrylk,
+ .xattrop = dht_xattrop,
+ .fxattrop = dht_fxattrop,
+ .setattr = dht_setattr,
+};
+
+
+struct xlator_cbks cbks = {
+ .forget = dht_forget
+};
diff --git a/xlators/cluster/ha/src/Makefile.am b/xlators/cluster/ha/src/Makefile.am
index 5f78a2965..5c1364b7f 100644
--- a/xlators/cluster/ha/src/Makefile.am
+++ b/xlators/cluster/ha/src/Makefile.am
@@ -1,15 +1,16 @@
xlator_LTLIBRARIES = ha.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/testing/cluster
-ha_la_LDFLAGS = -module -avoidversion
+ha_la_LDFLAGS = -module -avoid-version
ha_la_SOURCES = ha-helpers.c ha.c
ha_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
noinst_HEADERS = ha.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/cluster/ha/src/ha-helpers.c b/xlators/cluster/ha/src/ha-helpers.c
index 4bc7d5e20..19be1ed27 100644
--- a/xlators/cluster/ha/src/ha-helpers.c
+++ b/xlators/cluster/ha/src/ha-helpers.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include "xlator.h"
#include "call-stub.h"
#include "defaults.h"
@@ -49,12 +39,14 @@ int ha_alloc_init_fd (call_frame_t *frame, fd_t *fd)
goto out;
}
hafdp = (hafd_t *)(long)tmp_hafdp;
- local = frame->local = CALLOC (1, sizeof (*local));
+ local = frame->local = GF_CALLOC (1, sizeof (*local),
+ gf_ha_mt_ha_local_t);
if (local == NULL) {
ret = -ENOMEM;
goto out;
}
- local->state = CALLOC (1, child_count);
+ local->state = GF_CALLOC (1, child_count,
+ gf_ha_mt_child_count);
if (local->state == NULL) {
ret = -ENOMEM;
goto out;
@@ -141,11 +133,17 @@ int ha_handle_cbk (call_frame_t *frame, void *cookie, int op_ret, int op_errno)
}
}
}
- if (local->stub)
+ if (local->stub) {
call_stub_destroy (local->stub);
+ local->stub = NULL;
+ }
+
if (local->fd) {
- FREE (local->state);
+ GF_FREE (local->state);
+ local->state = NULL;
+
fd_unref (local->fd);
+ local->fd = NULL;
}
return 0;
}
@@ -164,7 +162,8 @@ int ha_alloc_init_inode (call_frame_t *frame, inode_t *inode)
local = frame->local;
if (local == NULL) {
- local = frame->local = CALLOC (1, sizeof (*local));
+ local = frame->local = GF_CALLOC (1, sizeof (*local),
+ gf_ha_mt_ha_local_t);
if (local == NULL) {
ret = -ENOMEM;
goto out;
diff --git a/xlators/cluster/ha/src/ha-mem-types.h b/xlators/cluster/ha/src/ha-mem-types.h
new file mode 100644
index 000000000..e5e97d237
--- /dev/null
+++ b/xlators/cluster/ha/src/ha-mem-types.h
@@ -0,0 +1,26 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __HA_MEM_TYPES_H__
+#define __HA_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_ha_mem_types_ {
+ gf_ha_mt_ha_local_t = gf_common_mt_end + 1,
+ gf_ha_mt_hafd_t,
+ gf_ha_mt_char,
+ gf_ha_mt_child_count,
+ gf_ha_mt_xlator_t,
+ gf_ha_mt_ha_private_t,
+ gf_ha_mt_end
+};
+#endif
+
diff --git a/xlators/cluster/ha/src/ha.c b/xlators/cluster/ha/src/ha.c
index b938071ed..3eccb516b 100644
--- a/xlators/cluster/ha/src/ha.c
+++ b/xlators/cluster/ha/src/ha.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
/* generate errors randomly, code is simple now, better alogorithm
* can be written to decide what error to be returned and when
*/
@@ -41,6 +31,41 @@
* - do not alloc the call-stub in case only one subvol is up.
*/
+void
+ha_local_wipe (ha_local_t *local)
+{
+ if (local->stub) {
+ call_stub_destroy (local->stub);
+ local->stub = NULL;
+ }
+
+ if (local->state) {
+ GF_FREE (local->state);
+ local->state = NULL;
+ }
+
+ if (local->dict) {
+ dict_unref (local->dict);
+ local->dict = NULL;
+ }
+
+ loc_wipe (&local->loc);
+
+ if (local->fd) {
+ fd_unref (local->fd);
+ local->fd = NULL;
+ }
+
+ if (local->inode) {
+ inode_unref (local->inode);
+ local->inode = NULL;
+ }
+
+ GF_FREE (local);
+ return;
+}
+
+
int
ha_forget (xlator_t *this,
inode_t *inode)
@@ -49,7 +74,7 @@ ha_forget (xlator_t *this,
char *state = NULL;
if (!inode_ctx_del (inode, this, &stateino)) {
state = ((char *)(long)stateino);
- FREE (state);
+ GF_FREE (state);
}
return 0;
@@ -63,8 +88,9 @@ ha_lookup_cbk (call_frame_t *frame,
int32_t op_ret,
int32_t op_errno,
inode_t *inode,
- struct stat *buf,
- dict_t *dict)
+ struct iatt *buf,
+ dict_t *dict,
+ struct iatt *postparent)
{
ha_local_t *local = NULL;
ha_private_t *pvt = NULL;
@@ -106,6 +132,7 @@ ha_lookup_cbk (call_frame_t *frame,
if (local->op_ret == -1 && op_ret == 0) {
local->op_ret = 0;
local->buf = *buf;
+ local->postparent = *postparent;
if (dict)
local->dict = dict_ref (dict);
}
@@ -127,7 +154,8 @@ ha_lookup_cbk (call_frame_t *frame,
local->op_errno,
inode,
&local->buf,
- ctx);
+ ctx,
+ &local->postparent);
if (inode)
inode_unref (inode);
if (ctx)
@@ -148,19 +176,33 @@ ha_lookup (call_frame_t *frame,
char *state = NULL;
xlator_t **children = NULL;
int ret = -1;
+ int32_t op_errno = EINVAL;
local = frame->local;
pvt = this->private;
child_count = pvt->child_count;
children = pvt->children;
- frame->local = local = CALLOC (1, sizeof (*local));
+ frame->local = local = GF_CALLOC (1, sizeof (*local),
+ gf_ha_mt_ha_local_t);
+ if (!local) {
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
child_count = pvt->child_count;
local->inode = inode_ref (loc->inode);
ret = inode_ctx_get (loc->inode, this, NULL);
if (ret) {
- state = CALLOC (1, child_count);
+ state = GF_CALLOC (1, child_count, gf_ha_mt_child_count);
+ if (state == NULL) {
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
inode_ctx_put (loc->inode, this, (uint64_t)(long)state);
} else
local->revalidate = 1;
@@ -178,6 +220,14 @@ ha_lookup (call_frame_t *frame,
xattr_req);
}
return 0;
+
+unwind:
+ local = frame->local;
+ frame->local = NULL;
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL, NULL);
+
+ ha_local_wipe (local);
+ return 0;
}
int32_t
@@ -186,7 +236,7 @@ ha_stat_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- struct stat *buf)
+ struct iatt *buf)
{
int ret = -1;
@@ -229,135 +279,25 @@ err:
return 0;
}
- int32_t
-ha_chmod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- int ret = -1;
-
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
-
- if (ret == 0) {
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- }
- return 0;
-}
-
int32_t
-ha_chmod (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode)
-{
- ha_local_t *local = NULL;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_inode (frame, loc->inode);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
- local->stub = fop_chmod_stub (frame, ha_chmod, loc, mode);
-
- STACK_WIND_COOKIE (frame,
- ha_chmod_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->chmod,
- loc,
- mode);
- return 0;
-err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
-}
-
- int32_t
-ha_fchmod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+ha_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost)
{
int ret = -1;
ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
if (ret == 0) {
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- }
- return 0;
-}
-
-int32_t
-ha_fchmod (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- mode_t mode)
-{
- ha_local_t *local = NULL;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_fd (frame, fd);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
+ STACK_UNWIND (frame, op_ret, op_errno, statpre, statpost);
}
- local = frame->local;
- local->stub = fop_fchmod_stub (frame, ha_fchmod, fd, mode);
-
- STACK_WIND_COOKIE (frame,
- ha_fchmod_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->fchmod,
- fd,
- mode);
- return 0;
-err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
return 0;
}
- int32_t
-ha_chown_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- int ret = -1;
-
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
-
- if (ret == 0) {
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- }
- return 0;
-}
int32_t
-ha_chown (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- uid_t uid,
- gid_t gid)
+ha_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
+ int32_t valid)
{
ha_local_t *local = NULL;
int op_errno = 0;
@@ -368,49 +308,24 @@ ha_chown (call_frame_t *frame,
goto err;
}
local = frame->local;
- local->stub = fop_chown_stub (frame, ha_chown, loc, uid, gid);
+ local->stub = fop_setattr_stub (frame, ha_setattr, loc, stbuf, valid);
- STACK_WIND_COOKIE (frame,
- ha_chown_cbk,
+ STACK_WIND_COOKIE (frame,
+ ha_setattr_cbk,
(void *)(long)local->active,
HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->chown,
- loc,
- uid,
- gid);
+ HA_ACTIVE_CHILD(this, local)->fops->setattr,
+ loc, stbuf, valid);
return 0;
err:
- STACK_UNWIND (frame, -1, ENOTCONN, NULL);
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
return 0;
}
- int32_t
-ha_fchown_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- int ret = -1;
-
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
- if (ret == 0) {
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- }
- return 0;
-}
-
-int32_t
-ha_fchown (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- uid_t uid,
- gid_t gid)
+int32_t
+ha_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
+ int32_t valid)
{
ha_local_t *local = NULL;
int op_errno = 0;
@@ -421,29 +336,29 @@ ha_fchown (call_frame_t *frame,
goto err;
}
local = frame->local;
- local->stub = fop_fchown_stub (frame, ha_fchown, fd, uid, gid);
+ local->stub = fop_fsetattr_stub (frame, ha_fsetattr, fd, stbuf, valid);
STACK_WIND_COOKIE (frame,
- ha_fchown_cbk,
+ ha_setattr_cbk,
(void *)(long)local->active,
HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->fchown,
- fd,
- uid,
- gid);
+ HA_ACTIVE_CHILD(this, local)->fops->fsetattr,
+ fd, stbuf, valid);
return 0;
err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
return 0;
}
- int32_t
+
+int32_t
ha_truncate_cbk (call_frame_t *frame,
void *cookie,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- struct stat *buf)
+ struct iatt *prebuf,
+ struct iatt *postbuf)
{
int ret = -1;
@@ -452,7 +367,8 @@ ha_truncate_cbk (call_frame_t *frame,
STACK_UNWIND (frame,
op_ret,
op_errno,
- buf);
+ prebuf,
+ postbuf);
}
return 0;
}
@@ -473,6 +389,11 @@ ha_truncate (call_frame_t *frame,
}
local = frame->local;
local->stub = fop_truncate_stub (frame, ha_truncate, loc, offset);
+ if (!local->stub) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
STACK_WIND_COOKIE (frame,
ha_truncate_cbk,
@@ -483,7 +404,7 @@ ha_truncate (call_frame_t *frame,
offset);
return 0;
err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
return 0;
}
@@ -493,7 +414,8 @@ ha_ftruncate_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- struct stat *buf)
+ struct iatt *prebuf,
+ struct iatt *postbuf)
{
int ret = -1;
@@ -503,7 +425,7 @@ ha_ftruncate_cbk (call_frame_t *frame,
STACK_UNWIND (frame,
op_ret,
op_errno,
- buf);
+ prebuf, postbuf);
}
return 0;
}
@@ -524,6 +446,11 @@ ha_ftruncate (call_frame_t *frame,
}
local = frame->local;
local->stub = fop_ftruncate_stub (frame, ha_ftruncate, fd, offset);
+ if (!local->stub) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
STACK_WIND_COOKIE (frame,
ha_ftruncate_cbk,
@@ -534,58 +461,12 @@ ha_ftruncate (call_frame_t *frame,
offset);
return 0;
err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
-}
-
-int32_t
-ha_utimens_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- int ret = -1;
-
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
-
- if (ret == 0) {
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- }
- return 0;
-}
-
-int32_t
-ha_utimens (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- struct timespec tv[2])
-{
- ha_local_t *local = NULL;
- int op_errno = 0;
+ local = frame->local;
+ frame->local = NULL;
- op_errno = ha_alloc_init_inode (frame, loc->inode);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
- local->stub = fop_utimens_stub (frame, ha_utimens, loc, tv);
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
- STACK_WIND_COOKIE (frame,
- ha_utimens_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->utimens,
- loc,
- tv);
- return 0;
-err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
+ ha_local_wipe (local);
return 0;
}
@@ -624,6 +505,11 @@ ha_access (call_frame_t *frame,
}
local = frame->local;
local->stub = fop_access_stub (frame, ha_access, loc, mask);
+ if (!local->stub) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
STACK_WIND_COOKIE (frame,
ha_access_cbk,
@@ -634,7 +520,12 @@ ha_access (call_frame_t *frame,
mask);
return 0;
err:
+ local = frame->local;
+ frame->local = NULL;
+
STACK_UNWIND (frame, -1, op_errno);
+
+ ha_local_wipe (local);
return 0;
}
@@ -645,7 +536,8 @@ ha_readlink_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- const char *path)
+ const char *path,
+ struct iatt *sbuf)
{
int ret = -1;
@@ -655,7 +547,8 @@ ha_readlink_cbk (call_frame_t *frame,
STACK_UNWIND (frame,
op_ret,
op_errno,
- path);
+ path,
+ sbuf);
}
return 0;
}
@@ -686,7 +579,7 @@ ha_readlink (call_frame_t *frame,
size);
return 0;
err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
return 0;
}
@@ -697,8 +590,9 @@ ha_mknod_lookup_cbk (call_frame_t *frame,
int32_t op_ret,
int32_t op_errno,
inode_t *inode,
- struct stat *buf,
- dict_t *dict)
+ struct iatt *buf,
+ dict_t *dict,
+ struct iatt *postparent)
{
ha_local_t *local = NULL;
ha_private_t *pvt = NULL;
@@ -742,12 +636,13 @@ ha_mknod_lookup_cbk (call_frame_t *frame,
if (cnt == 0) {
call_stub_t *stub = local->stub;
- FREE (local->state);
+ GF_FREE (local->state);
STACK_UNWIND (frame,
local->op_ret,
local->op_errno,
local->stub->args.mknod.loc.inode,
- &local->buf);
+ &local->buf, &local->preparent,
+ &local->postparent);
call_stub_destroy (stub);
}
return 0;
@@ -760,7 +655,9 @@ ha_mknod_cbk (call_frame_t *frame,
int32_t op_ret,
int32_t op_errno,
inode_t *inode,
- struct stat *buf)
+ struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent)
{
ha_local_t *local = NULL;
ha_private_t *pvt = NULL;
@@ -798,6 +695,8 @@ ha_mknod_cbk (call_frame_t *frame,
local->op_ret = 0;
local->first_success = 1;
local->buf = *buf;
+ local->preparent = *preparent;
+ local->postparent = *postparent;
}
cnt = --local->call_count;
for (i = local->active + 1; i < child_count; i++) {
@@ -807,9 +706,11 @@ ha_mknod_cbk (call_frame_t *frame,
if (cnt == 0 || i == child_count) {
call_stub_t *stub = local->stub;
- FREE (local->state);
+ GF_FREE (local->state);
stub = local->stub;
- STACK_UNWIND (frame, local->op_ret, local->op_errno, local->stub->args.mknod.loc.inode, &local->buf);
+ STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ local->stub->args.mknod.loc.inode, &local->buf,
+ &local->preparent, &local->postparent);
call_stub_destroy (stub);
return 0;
}
@@ -854,20 +755,46 @@ ha_mknod (call_frame_t *frame,
ha_private_t *pvt = NULL;
int child_count = 0, i = 0;
char *stateino = NULL;
+ int32_t op_errno = EINVAL;
local = frame->local;
pvt = this->private;
child_count = pvt->child_count;
- frame->local = local = CALLOC (1, sizeof (*local));
+ frame->local = local = GF_CALLOC (1, sizeof (*local),
+ gf_ha_mt_ha_local_t);
+ if (!local) {
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
local->stub = fop_mknod_stub (frame, ha_mknod, loc, mode, rdev);
+ if (!local->stub) {
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
local->op_ret = -1;
local->op_errno = ENOTCONN;
- local->state = CALLOC (1, child_count);
+ local->state = GF_CALLOC (1, child_count, gf_ha_mt_char);
+ if (!local->state) {
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
memcpy (local->state, pvt->state, child_count);
local->active = -1;
- stateino = CALLOC (1, child_count);
+ stateino = GF_CALLOC (1, child_count, gf_ha_mt_char);
+ if (!stateino) {
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
inode_ctx_put (loc->inode, this, (uint64_t)(long)stateino);
for (i = 0; i < child_count; i++) {
@@ -884,6 +811,14 @@ ha_mknod (call_frame_t *frame,
HA_ACTIVE_CHILD(this, local)->fops->mknod,
loc, mode, rdev);
return 0;
+
+err:
+ local = frame->local;
+ frame->local = NULL;
+
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ ha_local_wipe (local);
+ return 0;
}
@@ -894,8 +829,9 @@ ha_mkdir_lookup_cbk (call_frame_t *frame,
int32_t op_ret,
int32_t op_errno,
inode_t *inode,
- struct stat *buf,
- dict_t *dict)
+ struct iatt *buf,
+ dict_t *dict,
+ struct iatt *postparent)
{
ha_local_t *local = NULL;
ha_private_t *pvt = NULL;
@@ -931,12 +867,12 @@ ha_mkdir_lookup_cbk (call_frame_t *frame,
if (cnt == 0) {
call_stub_t *stub = local->stub;
- FREE (local->state);
+ GF_FREE (local->state);
STACK_UNWIND (frame,
local->op_ret,
local->op_errno,
- local->stub->args.mkdir.loc.inode,
- &local->buf);
+ local->stub->args.mkdir.loc.inode, &local->buf,
+ &local->preparent, &local->postparent);
call_stub_destroy (stub);
}
return 0;
@@ -949,7 +885,9 @@ ha_mkdir_cbk (call_frame_t *frame,
int32_t op_ret,
int32_t op_errno,
inode_t *inode,
- struct stat *buf)
+ struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent)
{
ha_local_t *local = NULL;
ha_private_t *pvt = NULL;
@@ -983,6 +921,8 @@ ha_mkdir_cbk (call_frame_t *frame,
local->op_ret = 0;
local->first_success = 1;
local->buf = *buf;
+ local->preparent = *preparent;
+ local->postparent = *postparent;
}
cnt = --local->call_count;
for (i = local->active + 1; i < child_count; i++) {
@@ -992,9 +932,11 @@ ha_mkdir_cbk (call_frame_t *frame,
if (cnt == 0 || i == child_count) {
call_stub_t *stub = local->stub;
- FREE (local->state);
+ GF_FREE (local->state);
stub = local->stub;
- STACK_UNWIND (frame, local->op_ret, local->op_errno, local->stub->args.mkdir.loc.inode, &local->buf);
+ STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ local->stub->args.mkdir.loc.inode, &local->buf,
+ &local->preparent, &local->postparent);
call_stub_destroy (stub);
return 0;
}
@@ -1037,20 +979,46 @@ ha_mkdir (call_frame_t *frame,
ha_private_t *pvt = NULL;
int child_count = 0, i = 0;
char *stateino = NULL;
+ int32_t op_errno = EINVAL;
local = frame->local;
pvt = this->private;
child_count = pvt->child_count;
- frame->local = local = CALLOC (1, sizeof (*local));
+ frame->local = local = GF_CALLOC (1, sizeof (*local),
+ gf_ha_mt_ha_local_t);
+ if (!frame->local) {
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
local->stub = fop_mkdir_stub (frame, ha_mkdir, loc, mode);
+ if (!local->stub) {
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
local->op_ret = -1;
local->op_errno = ENOTCONN;
- local->state = CALLOC (1, child_count);
+ local->state = GF_CALLOC (1, child_count, gf_ha_mt_char);
+ if (!local->state) {
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
memcpy (local->state, pvt->state, child_count);
local->active = -1;
- stateino = CALLOC (1, child_count);
+ stateino = GF_CALLOC (1, child_count, gf_ha_mt_char);
+ if (!stateino) {
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
inode_ctx_put (loc->inode, this, (uint64_t)(long)stateino);
for (i = 0; i < child_count; i++) {
if (local->state[i]) {
@@ -1066,6 +1034,13 @@ ha_mkdir (call_frame_t *frame,
HA_ACTIVE_CHILD(this, local)->fops->mkdir,
loc, mode);
return 0;
+err:
+ local = frame->local;
+ frame->local = NULL;
+
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ ha_local_wipe (local);
+ return 0;
}
int32_t
@@ -1073,13 +1048,15 @@ ha_unlink_cbk (call_frame_t *frame,
void *cookie,
xlator_t *this,
int32_t op_ret,
- int32_t op_errno)
+ int32_t op_errno,
+ struct iatt *preparent,
+ struct iatt *postparent)
{
int ret = -1;
ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
if (ret == 0) {
- STACK_UNWIND (frame, op_ret, op_errno);
+ STACK_UNWIND (frame, op_ret, op_errno, preparent, postparent);
}
return 0;
}
@@ -1100,6 +1077,11 @@ ha_unlink (call_frame_t *frame,
}
local = frame->local;
local->stub = fop_unlink_stub (frame, ha_unlink, loc);
+ if (!local->stub) {
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ op_errno = ENOMEM;
+ goto err;
+ }
STACK_WIND_COOKIE (frame,
ha_unlink_cbk,
@@ -1109,7 +1091,7 @@ ha_unlink (call_frame_t *frame,
loc);
return 0;
err:
- STACK_UNWIND (frame, -1, op_errno);
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
return 0;
}
@@ -1118,7 +1100,9 @@ ha_rmdir_cbk (call_frame_t *frame,
void *cookie,
xlator_t *this,
int32_t op_ret,
- int32_t op_errno)
+ int32_t op_errno,
+ struct iatt *preparent,
+ struct iatt *postparent)
{
int ret = -1;
@@ -1127,7 +1111,9 @@ ha_rmdir_cbk (call_frame_t *frame,
if (ret == 0) {
STACK_UNWIND (frame,
op_ret,
- op_errno);
+ op_errno,
+ preparent,
+ postparent);
}
return 0;
}
@@ -1147,6 +1133,11 @@ ha_rmdir (call_frame_t *frame,
}
local = frame->local;
local->stub = fop_rmdir_stub (frame, ha_rmdir, loc);
+ if (!local->stub) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
STACK_WIND_COOKIE (frame,
ha_rmdir_cbk,
@@ -1156,7 +1147,7 @@ ha_rmdir (call_frame_t *frame,
loc);
return 0;
err:
- STACK_UNWIND (frame, -1, op_errno);
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
return 0;
}
@@ -1168,8 +1159,9 @@ ha_symlink_lookup_cbk (call_frame_t *frame,
int32_t op_ret,
int32_t op_errno,
inode_t *inode,
- struct stat *buf,
- dict_t *dict)
+ struct iatt *buf,
+ dict_t *dict,
+ struct iatt *postparent)
{
ha_local_t *local = NULL;
ha_private_t *pvt = NULL;
@@ -1205,12 +1197,12 @@ ha_symlink_lookup_cbk (call_frame_t *frame,
if (cnt == 0) {
call_stub_t *stub = local->stub;
- FREE (local->state);
+ GF_FREE (local->state);
STACK_UNWIND (frame,
local->op_ret,
local->op_errno,
- local->stub->args.symlink.loc.inode,
- &local->buf);
+ local->stub->args.symlink.loc.inode, &local->buf,
+ &local->preparent, &local->postparent);
call_stub_destroy (stub);
}
return 0;
@@ -1223,7 +1215,9 @@ ha_symlink_cbk (call_frame_t *frame,
int32_t op_ret,
int32_t op_errno,
inode_t *inode,
- struct stat *buf)
+ struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent)
{
ha_local_t *local = NULL;
ha_private_t *pvt = NULL;
@@ -1256,6 +1250,8 @@ ha_symlink_cbk (call_frame_t *frame,
local->op_ret = 0;
local->first_success = 1;
local->buf = *buf;
+ local->preparent = *preparent;
+ local->postparent = *postparent;
}
cnt = --local->call_count;
for (i = local->active + 1; i < child_count; i++) {
@@ -1265,10 +1261,11 @@ ha_symlink_cbk (call_frame_t *frame,
if (cnt == 0 || i == child_count) {
call_stub_t *stub = local->stub;
- FREE (local->state);
+ GF_FREE (local->state);
stub = local->stub;
STACK_UNWIND (frame, local->op_ret, local->op_errno,
- local->stub->args.symlink.loc.inode, &local->buf);
+ local->stub->args.symlink.loc.inode, &local->buf,
+ &local->preparent, &local->postparent);
call_stub_destroy (stub);
return 0;
}
@@ -1311,20 +1308,46 @@ ha_symlink (call_frame_t *frame,
ha_private_t *pvt = NULL;
int child_count = 0, i = 0;
char *stateino = NULL;
+ int32_t op_errno = EINVAL;
local = frame->local;
pvt = this->private;
child_count = pvt->child_count;
- frame->local = local = CALLOC (1, sizeof (*local));
+ frame->local = local = GF_CALLOC (1, sizeof (*local),
+ gf_ha_mt_ha_local_t);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
local->stub = fop_symlink_stub (frame, ha_symlink, linkname, loc);
+ if (!local->stub) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
local->op_ret = -1;
local->op_errno = ENOTCONN;
- local->state = CALLOC (1, child_count);
+ local->state = GF_CALLOC (1, child_count, gf_ha_mt_char);
+ if (!local->state) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
memcpy (local->state, pvt->state, child_count);
local->active = -1;
- stateino = CALLOC (1, child_count);
+ stateino = GF_CALLOC (1, child_count, gf_ha_mt_char);
+ if (!stateino) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
inode_ctx_put (loc->inode, this, (uint64_t)(long)stateino);
for (i = 0; i < child_count; i++) {
@@ -1342,6 +1365,12 @@ ha_symlink (call_frame_t *frame,
HA_ACTIVE_CHILD(this, local)->fops->symlink,
linkname, loc);
return 0;
+err:
+ local = frame->local;
+ frame->local = NULL;
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ ha_local_wipe (local);
+ return 0;
}
int32_t
@@ -1350,14 +1379,19 @@ ha_rename_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- struct stat *buf)
+ struct iatt *buf,
+ struct iatt *preoldparent,
+ struct iatt *postoldparent,
+ struct iatt *prenewparent,
+ struct iatt *postnewparent)
{
int ret = -1;
ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
if (ret == 0) {
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ STACK_UNWIND (frame, op_ret, op_errno, buf, preoldparent,
+ postoldparent, prenewparent, postnewparent);
}
return 0;
}
@@ -1378,6 +1412,12 @@ ha_rename (call_frame_t *frame,
}
local = frame->local;
local->stub = fop_rename_stub (frame, ha_rename, oldloc, newloc);
+ if (!local->stub) {
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
STACK_WIND_COOKIE (frame,
ha_rename_cbk,
(void *)(long)local->active,
@@ -1386,7 +1426,7 @@ ha_rename (call_frame_t *frame,
oldloc, newloc);
return 0;
err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
return 0;
}
@@ -1397,8 +1437,9 @@ ha_link_lookup_cbk (call_frame_t *frame,
int32_t op_ret,
int32_t op_errno,
inode_t *inode,
- struct stat *buf,
- dict_t *dict)
+ struct iatt *buf,
+ dict_t *dict,
+ struct iatt *postparent)
{
ha_local_t *local = NULL;
ha_private_t *pvt = NULL;
@@ -1434,12 +1475,12 @@ ha_link_lookup_cbk (call_frame_t *frame,
if (cnt == 0) {
call_stub_t *stub = local->stub;
- FREE (local->state);
+ GF_FREE (local->state);
STACK_UNWIND (frame,
local->op_ret,
local->op_errno,
- local->stub->args.link.oldloc.inode,
- &local->buf);
+ local->stub->args.link.oldloc.inode, &local->buf,
+ &local->preparent, &local->postparent);
call_stub_destroy (stub);
}
return 0;
@@ -1452,7 +1493,9 @@ ha_link_cbk (call_frame_t *frame,
int32_t op_ret,
int32_t op_errno,
inode_t *inode,
- struct stat *buf)
+ struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent)
{
ha_local_t *local = NULL;
ha_private_t *pvt = NULL;
@@ -1485,6 +1528,8 @@ ha_link_cbk (call_frame_t *frame,
local->op_ret = 0;
local->first_success = 1;
local->buf = *buf;
+ local->preparent = *preparent;
+ local->postparent = *postparent;
}
cnt = --local->call_count;
for (i = local->active + 1; i < child_count; i++) {
@@ -1494,9 +1539,11 @@ ha_link_cbk (call_frame_t *frame,
if (cnt == 0 || i == child_count) {
call_stub_t *stub = local->stub;
- FREE (local->state);
+ GF_FREE (local->state);
stub = local->stub;
- STACK_UNWIND (frame, local->op_ret, local->op_errno, local->stub->args.link.oldloc.inode, &local->buf);
+ STACK_UNWIND (frame, local->op_ret, local->op_errno,
+ local->stub->args.link.oldloc.inode, &local->buf,
+ &local->preparent, &local->postparent);
call_stub_destroy (stub);
return 0;
}
@@ -1539,7 +1586,7 @@ ha_link (call_frame_t *frame,
ha_private_t *pvt = NULL;
int child_count = 0, i = 0;
char *stateino = NULL;
- int32_t ret = 0;
+ int32_t ret = 0, op_errno = 0;
uint64_t tmp_stateino = 0;
ret = inode_ctx_get (newloc->inode, this, &tmp_stateino);
@@ -1551,7 +1598,8 @@ ha_link (call_frame_t *frame,
if (stateino == NULL) {
gf_log (this->name, GF_LOG_ERROR,
"newloc->inode's ctx is NULL, returning EINVAL");
- STACK_UNWIND (frame, -1, EINVAL, oldloc->inode, NULL);
+ STACK_UNWIND (frame, -1, EINVAL, oldloc->inode, NULL, NULL,
+ NULL);
return 0;
}
@@ -1559,11 +1607,30 @@ ha_link (call_frame_t *frame,
pvt = this->private;
child_count = pvt->child_count;
- frame->local = local = CALLOC (1, sizeof (*local));
+ frame->local = local = GF_CALLOC (1, sizeof (*local),
+ gf_ha_mt_ha_local_t);
+ if (!frame->local) {
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
local->stub = fop_link_stub (frame, ha_link, oldloc, newloc);
+ if (!local->stub) {
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
local->op_ret = -1;
local->op_errno = ENOTCONN;
- local->state = CALLOC (1, child_count);
+ local->state = GF_CALLOC (1, child_count, gf_ha_mt_char);
+ if (!local->state) {
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
memcpy (local->state, pvt->state, child_count);
local->active = -1;
@@ -1582,6 +1649,11 @@ ha_link (call_frame_t *frame,
oldloc,
newloc);
return 0;
+err:
+ local = frame->local;
+ frame->local = NULL;
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ return 0;
}
int32_t
@@ -1592,7 +1664,9 @@ ha_create_cbk (call_frame_t *frame,
int32_t op_errno,
fd_t *fd,
inode_t *inode,
- struct stat *buf)
+ struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent)
{
ha_local_t *local = NULL;
ha_private_t *pvt = NULL;
@@ -1640,6 +1714,8 @@ ha_create_cbk (call_frame_t *frame,
if (local->op_ret == -1) {
local->op_ret = 0;
local->buf = *buf;
+ local->preparent = *preparent;
+ local->postparent = *postparent;
local->first_success = 1;
}
local->stub->args.create.flags &= (~O_EXCL);
@@ -1658,8 +1734,9 @@ ha_create_cbk (call_frame_t *frame,
call_stub_t *stub = local->stub;
STACK_UNWIND (frame, local->op_ret, local->op_errno,
stub->args.create.fd,
- stub->args.create.loc.inode, &local->buf);
- FREE (state);
+ stub->args.create.loc.inode, &local->buf,
+ &local->preparent, &local->postparent);
+ GF_FREE (state);
call_stub_destroy (stub);
return 0;
}
@@ -1695,6 +1772,7 @@ ha_create (call_frame_t *frame,
char *stateino = NULL;
xlator_t **children = NULL;
hafd_t *hafdp = NULL;
+ int32_t op_errno = EINVAL;
local = frame->local;
pvt = this->private;
@@ -1702,9 +1780,28 @@ ha_create (call_frame_t *frame,
children = pvt->children;
if (local == NULL) {
- local = frame->local = CALLOC (1, sizeof (*local));
+ frame->local = local = GF_CALLOC (1, sizeof (*local),
+ gf_ha_mt_ha_local_t);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
local->stub = fop_create_stub (frame, ha_create, loc, flags, mode, fd);
- local->state = CALLOC (1, child_count);
+ if (!local->stub) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
+ local->state = GF_CALLOC (1, child_count, gf_ha_mt_char);
+ if (!local->state) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
local->active = -1;
local->op_ret = -1;
local->op_errno = ENOTCONN;
@@ -1718,10 +1815,34 @@ ha_create (call_frame_t *frame,
}
}
/* FIXME handle active -1 */
- stateino = CALLOC (1, child_count);
- hafdp = CALLOC (1, sizeof (*hafdp));
- hafdp->fdstate = CALLOC (1, child_count);
- hafdp->path = strdup(loc->path);
+ stateino = GF_CALLOC (1, child_count, gf_ha_mt_char);
+ if (!stateino) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
+ hafdp = GF_CALLOC (1, sizeof (*hafdp), gf_ha_mt_hafd_t);
+ if (!hafdp) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
+ hafdp->fdstate = GF_CALLOC (1, child_count, gf_ha_mt_char);
+ if (!hafdp->fdstate) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
+ hafdp->path = gf_strdup(loc->path);
+ if (!hafdp->path) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
LOCK_INIT (&hafdp->lock);
fd_ctx_set (fd, this, (uint64_t)(long)hafdp);
inode_ctx_put (loc->inode, this, (uint64_t)(long)stateino);
@@ -1733,6 +1854,26 @@ ha_create (call_frame_t *frame,
children[local->active]->fops->create,
loc, flags, mode, fd);
return 0;
+err:
+ local = frame->local;
+ frame->local = NULL;
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
+ ha_local_wipe (local);
+
+ if (stateino) {
+ GF_FREE (stateino);
+ stateino = NULL;
+ }
+
+ if (hafdp) {
+ GF_FREE (hafdp->fdstate);
+
+ GF_FREE (hafdp->path);
+
+ GF_FREE (hafdp);
+ }
+
+ return 0;
}
int32_t
@@ -1789,7 +1930,7 @@ int32_t
ha_open (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
- int32_t flags, fd_t *fd)
+ int32_t flags, fd_t *fd, int wbflags)
{
ha_local_t *local = NULL;
ha_private_t *pvt = NULL;
@@ -1798,6 +1939,7 @@ ha_open (call_frame_t *frame,
int cnt = 0, i, child_count = 0, ret = 0;
hafd_t *hafdp = NULL;
uint64_t tmp_stateino = 0;
+ int32_t op_errno = ENOMEM;
local = frame->local;
pvt = this->private;
@@ -1805,14 +1947,39 @@ ha_open (call_frame_t *frame,
child_count = pvt->child_count;
- local = frame->local = CALLOC (1, sizeof (*local));
+ frame->local = local = GF_CALLOC (1, sizeof (*local),
+ gf_ha_mt_ha_local_t);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
local->op_ret = -1;
local->op_errno = ENOTCONN;
local->fd = fd;
- hafdp = CALLOC (1, sizeof (*hafdp));
- hafdp->fdstate = CALLOC (1, child_count);
- hafdp->path = strdup (loc->path);
+ hafdp = GF_CALLOC (1, sizeof (*hafdp), gf_ha_mt_hafd_t);
+ if (!hafdp) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
+ hafdp->fdstate = GF_CALLOC (1, child_count, gf_ha_mt_char);
+ if (!hafdp->fdstate) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
+ hafdp->path = gf_strdup (loc->path);
+ if (!hafdp->path) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
hafdp->active = -1;
if (pvt->pref_subvol == -1) {
hafdp->active = fd->inode->ino % child_count;
@@ -1833,12 +2000,33 @@ ha_open (call_frame_t *frame,
ha_open_cbk,
children[i],
children[i]->fops->open,
- loc, flags, fd);
+ loc, flags, fd, wbflags);
if (--cnt == 0)
break;
}
}
return 0;
+err:
+ local = frame->local;
+ frame->local = NULL;
+
+ STACK_UNWIND (frame, -1, op_errno, fd);
+ if (hafdp) {
+ if (hafdp->fdstate) {
+ GF_FREE (hafdp->fdstate);
+ hafdp->fdstate = NULL;
+ }
+
+ if (hafdp->path) {
+ GF_FREE (hafdp->path);
+ hafdp->path = NULL;
+ }
+
+ GF_FREE (hafdp);
+ }
+
+ ha_local_wipe (local);
+ return 0;
}
int32_t
@@ -1849,7 +2037,7 @@ ha_readv_cbk (call_frame_t *frame,
int32_t op_errno,
struct iovec *vector,
int32_t count,
- struct stat *stbuf,
+ struct iatt *stbuf,
struct iobref *iobref)
{
int ret = 0;
@@ -1885,6 +2073,11 @@ ha_readv (call_frame_t *frame,
}
local = frame->local;
local->stub = fop_readv_stub (frame, ha_readv, fd, size, offset);
+ if (!local->stub) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
STACK_WIND_COOKIE (frame,
ha_readv_cbk,
@@ -1896,7 +2089,12 @@ ha_readv (call_frame_t *frame,
offset);
return 0;
err:
+ local = frame->local;
+ frame->local = NULL;
+
STACK_UNWIND (frame, -1, op_errno, NULL, 0, NULL, NULL);
+
+ ha_local_wipe (local);
return 0;
}
@@ -1906,7 +2104,8 @@ ha_writev_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- struct stat *stbuf)
+ struct iatt *prebuf,
+ struct iatt *postbuf)
{
int ret = 0;
ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
@@ -1915,7 +2114,8 @@ ha_writev_cbk (call_frame_t *frame,
STACK_UNWIND (frame,
op_ret,
op_errno,
- stbuf);
+ prebuf,
+ postbuf);
}
return 0;
}
@@ -1938,7 +2138,13 @@ ha_writev (call_frame_t *frame,
goto err;
}
local = frame->local;
- local->stub = fop_writev_stub (frame, ha_writev, fd, vector, count, off, iobref);
+ local->stub = fop_writev_stub (frame, ha_writev, fd, vector, count, off,
+ iobref);
+ if (!local->stub) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
STACK_WIND_COOKIE (frame,
ha_writev_cbk,
@@ -1952,7 +2158,12 @@ ha_writev (call_frame_t *frame,
iobref);
return 0;
err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
+ local = frame->local;
+ frame->local = NULL;
+
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ ha_local_wipe (local);
return 0;
}
@@ -1989,6 +2200,12 @@ ha_flush (call_frame_t *frame,
}
local = frame->local;
local->stub = fop_flush_stub (frame, ha_flush, fd);
+ if (!local->stub) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
STACK_WIND_COOKIE (frame,
ha_flush_cbk,
(void *)(long)local->active,
@@ -1997,7 +2214,12 @@ ha_flush (call_frame_t *frame,
fd);
return 0;
err:
+ local = frame->local;
+ frame->local = NULL;
+
STACK_UNWIND (frame, -1, op_errno);
+
+ ha_local_wipe (local);
return 0;
}
@@ -2007,7 +2229,9 @@ ha_fsync_cbk (call_frame_t *frame,
void *cookie,
xlator_t *this,
int32_t op_ret,
- int32_t op_errno)
+ int32_t op_errno,
+ struct iatt *prebuf,
+ struct iatt *postbuf)
{
int ret = 0;
ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
@@ -2036,6 +2260,12 @@ ha_fsync (call_frame_t *frame,
}
local = frame->local;
local->stub = fop_fsync_stub (frame, ha_fsync, fd, flags);
+ if (!local->stub) {
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
STACK_WIND_COOKIE (frame,
ha_fsync_cbk,
(void *)(long)local->active,
@@ -2045,7 +2275,12 @@ ha_fsync (call_frame_t *frame,
flags);
return 0;
err:
+ local = frame->local;
+ frame->local = NULL;
+
STACK_UNWIND (frame, -1, op_errno);
+
+ ha_local_wipe (local);
return 0;
}
@@ -2055,7 +2290,7 @@ ha_fstat_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- struct stat *buf)
+ struct iatt *buf)
{
int ret = 0;
@@ -2086,6 +2321,12 @@ ha_fstat (call_frame_t *frame,
}
local = frame->local;
local->stub = fop_fstat_stub (frame, ha_fstat, fd);
+ if (!local->stub) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
STACK_WIND_COOKIE (frame,
ha_fstat_cbk,
(void *)(long)local->active,
@@ -2094,7 +2335,12 @@ ha_fstat (call_frame_t *frame,
fd);
return 0;
err:
+ local = frame->local;
+ frame->local = NULL;
+
STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ ha_local_wipe (local);
return 0;
}
@@ -2160,20 +2406,46 @@ ha_opendir (call_frame_t *frame,
int cnt = 0, i, child_count = 0, ret = 0;
hafd_t *hafdp = NULL;
uint64_t tmp_stateino = 0;
+ int32_t op_errno = EINVAL;
local = frame->local;
pvt = this->private;
children = pvt->children;
child_count = pvt->child_count;
- local = frame->local = CALLOC (1, sizeof (*local));
+ frame->local = local = GF_CALLOC (1, sizeof (*local),
+ gf_ha_mt_ha_local_t);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
local->op_ret = -1;
local->op_errno = ENOTCONN;
local->fd = fd;
- hafdp = CALLOC (1, sizeof (*hafdp));
- hafdp->fdstate = CALLOC (1, child_count);
- hafdp->path = strdup (loc->path);
+ hafdp = GF_CALLOC (1, sizeof (*hafdp), gf_ha_mt_hafd_t);
+ if (!hafdp) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
+ hafdp->fdstate = GF_CALLOC (1, child_count, gf_ha_mt_char);
+ if (!hafdp->fdstate) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
+ hafdp->path = gf_strdup (loc->path);
+ if (!hafdp->path) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
LOCK_INIT (&hafdp->lock);
fd_ctx_set (fd, this, (uint64_t)(long)hafdp);
ret = inode_ctx_get (loc->inode, this, &tmp_stateino);
@@ -2198,6 +2470,26 @@ ha_opendir (call_frame_t *frame,
}
}
return 0;
+err:
+ local = frame->local;
+ frame->local = NULL;
+
+ STACK_UNWIND (frame, -1, op_errno, NULL);
+ ha_local_wipe (local);
+ if (hafdp) {
+ if (hafdp->fdstate) {
+ GF_FREE (hafdp->fdstate);
+ hafdp->fdstate = NULL;
+ }
+
+ if (hafdp->path) {
+ GF_FREE (hafdp->path);
+ hafdp->path = NULL;
+ }
+
+ GF_FREE (hafdp);
+ }
+ return 0;
}
int32_t
@@ -2239,7 +2531,14 @@ ha_getdents (call_frame_t *frame,
goto err;
}
local = frame->local;
- local->stub = fop_getdents_stub (frame, ha_getdents, fd, size, offset, flag);
+ local->stub = fop_getdents_stub (frame, ha_getdents, fd, size, offset,
+ flag);
+ if (!local->stub) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
STACK_WIND_COOKIE (frame,
ha_getdents_cbk,
(void *)(long)local->active,
@@ -2251,7 +2550,12 @@ ha_getdents (call_frame_t *frame,
flag);
return 0;
err:
+ local = frame->local;
+ frame->local = NULL;
+
STACK_UNWIND (frame, -1, op_errno, NULL, 0);
+
+ ha_local_wipe (local);
return 0;
}
@@ -2292,7 +2596,13 @@ ha_setdents (call_frame_t *frame,
}
local = frame->local;
- local->stub = fop_setdents_stub (frame, ha_setdents, fd, flags, entries, count);
+ local->stub = fop_setdents_stub (frame, ha_setdents, fd, flags, entries,
+ count);
+ if (!local->stub) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
STACK_WIND_COOKIE (frame,
ha_setdents_cbk,
@@ -2305,7 +2615,12 @@ ha_setdents (call_frame_t *frame,
count);
return 0;
err:
+ local = frame->local;
+ frame->local = NULL;
+
STACK_UNWIND (frame, -1, op_errno);
+
+ ha_local_wipe (local);
return 0;
}
@@ -2343,6 +2658,12 @@ ha_fsyncdir (call_frame_t *frame,
}
local = frame->local;
local->stub = fop_fsyncdir_stub (frame, ha_fsyncdir, fd, flags);
+ if (!local->stub) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
STACK_WIND_COOKIE (frame,
ha_fsyncdir_cbk,
(void *)(long)local->active,
@@ -2352,7 +2673,12 @@ ha_fsyncdir (call_frame_t *frame,
flags);
return 0;
err:
- STACK_UNWIND (frame, -1, op_errno);
+ local = frame->local;
+ frame->local = NULL;
+
+ STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ ha_local_wipe (local);
return 0;
}
@@ -2401,7 +2727,8 @@ ha_statfs (call_frame_t *frame,
/* The normal way of handling failover doesn't work here
* as loc->inode may be null in this case.
*/
- local = CALLOC (1, sizeof (*local));
+ local = GF_CALLOC (1, sizeof (*local),
+ gf_ha_mt_ha_local_t);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -2458,6 +2785,12 @@ ha_setxattr (call_frame_t *frame,
}
local = frame->local;
local->stub = fop_setxattr_stub (frame, ha_setxattr, loc, dict, flags);
+ if (!local->stub) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
STACK_WIND_COOKIE (frame,
ha_setxattr_cbk,
(void *)(long)local->active,
@@ -2468,7 +2801,12 @@ ha_setxattr (call_frame_t *frame,
flags);
return 0;
err:
+ local = frame->local;
+ frame->local = NULL;
+
STACK_UNWIND (frame, -1, op_errno);
+
+ ha_local_wipe (local);
return 0;
}
@@ -2509,6 +2847,12 @@ ha_getxattr (call_frame_t *frame,
}
local = frame->local;
local->stub = fop_getxattr_stub (frame, ha_getxattr, loc, name);
+ if (!local->stub) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
STACK_WIND_COOKIE (frame,
ha_getxattr_cbk,
(void *)(long)local->active,
@@ -2518,7 +2862,12 @@ ha_getxattr (call_frame_t *frame,
name);
return 0;
err:
+ local = frame->local;
+ frame->local = NULL;
+
STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ ha_local_wipe (local);
return 0;
}
@@ -2557,6 +2906,11 @@ ha_xattrop (call_frame_t *frame,
local = frame->local;
local->stub = fop_xattrop_stub (frame, ha_xattrop, loc, flags, dict);
+ if (!local->stub) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
STACK_WIND_COOKIE (frame,
ha_xattrop_cbk,
@@ -2568,7 +2922,12 @@ ha_xattrop (call_frame_t *frame,
dict);
return 0;
err:
+ local = frame->local;
+ frame->local = NULL;
+
STACK_UNWIND (frame, -1, op_errno, dict);
+
+ ha_local_wipe (local);
return 0;
}
@@ -2604,6 +2963,11 @@ ha_fxattrop (call_frame_t *frame,
}
local = frame->local;
local->stub = fop_fxattrop_stub (frame, ha_fxattrop, fd, flags, dict);
+ if (!local->stub) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
STACK_WIND_COOKIE (frame,
ha_fxattrop_cbk,
@@ -2615,7 +2979,12 @@ ha_fxattrop (call_frame_t *frame,
dict);
return 0;
err:
+ local = frame->local;
+ frame->local = NULL;
+
STACK_UNWIND (frame, -1, op_errno, dict);
+
+ ha_local_wipe (local);
return 0;
}
@@ -2653,6 +3022,11 @@ ha_removexattr (call_frame_t *frame,
local = frame->local;
local->stub = fop_removexattr_stub (frame, ha_removexattr, loc, name);
+ if (!local->stub) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
STACK_WIND_COOKIE (frame,
ha_removexattr_cbk,
@@ -2663,7 +3037,12 @@ ha_removexattr (call_frame_t *frame,
name);
return 0;
err:
+ local = frame->local;
+ frame->local = NULL;
+
STACK_UNWIND (frame, -1, op_errno);
+
+ ha_local_wipe (local);
return 0;
}
@@ -2673,7 +3052,7 @@ ha_lk_setlk_unlck_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- struct flock *lock)
+ struct gf_flock *lock)
{
ha_local_t *local = NULL;
int cnt = 0;
@@ -2689,7 +3068,7 @@ ha_lk_setlk_unlck_cbk (call_frame_t *frame,
if (cnt == 0) {
stub = local->stub;
- FREE (local->state);
+ GF_FREE (local->state);
if (stub->args.lk.lock.l_type == F_UNLCK) {
STACK_UNWIND (frame, local->op_ret, local->op_errno, &stub->args.lk.lock);
} else {
@@ -2706,7 +3085,7 @@ ha_lk_setlk_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- struct flock *lock)
+ struct gf_flock *lock)
{
ha_local_t *local = NULL;
ha_private_t *pvt = NULL;
@@ -2738,7 +3117,7 @@ ha_lk_setlk_cbk (call_frame_t *frame,
}
if (i == child_count) {
call_stub_t *stub = local->stub;
- FREE (local->state);
+ GF_FREE (local->state);
STACK_UNWIND (frame, 0, op_errno, &stub->args.lk.lock);
call_stub_destroy (stub);
return 0;
@@ -2762,7 +3141,7 @@ ha_lk_setlk_cbk (call_frame_t *frame,
cnt++;
}
if (cnt) {
- struct flock lock;
+ struct gf_flock lock;
lock = local->stub->args.lk.lock;
for (i = 0; i < child_count; i++) {
if (state[i]) {
@@ -2779,7 +3158,7 @@ ha_lk_setlk_cbk (call_frame_t *frame,
}
return 0;
} else {
- FREE (local->state);
+ GF_FREE (local->state);
call_stub_destroy (local->stub);
STACK_UNWIND (frame,
op_ret,
@@ -2796,7 +3175,7 @@ ha_lk_getlk_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- struct flock *lock)
+ struct gf_flock *lock)
{
ha_local_t *local = NULL;
ha_private_t *pvt = NULL;
@@ -2813,7 +3192,7 @@ ha_lk_getlk_cbk (call_frame_t *frame,
prev_frame = cookie;
if (op_ret == 0) {
- FREE (local->state);
+ GF_FREE (local->state);
call_stub_destroy (local->stub);
STACK_UNWIND (frame, 0, 0, lock);
return 0;
@@ -2830,7 +3209,7 @@ ha_lk_getlk_cbk (call_frame_t *frame,
}
if (i == child_count) {
- FREE (local->state);
+ GF_FREE (local->state);
call_stub_destroy (local->stub);
STACK_UNWIND (frame, op_ret, op_errno, lock);
return 0;
@@ -2851,7 +3230,7 @@ ha_lk (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
int32_t cmd,
- struct flock *lock)
+ struct gf_flock *lock)
{
ha_local_t *local = NULL;
ha_private_t *pvt = NULL;
@@ -2860,6 +3239,7 @@ ha_lk (call_frame_t *frame,
int child_count = 0, i = 0, cnt = 0, ret = 0;
xlator_t **children = NULL;
uint64_t tmp_hafdp = 0;
+ int32_t op_errno = EINVAL;
local = frame->local;
pvt = this->private;
@@ -2870,7 +3250,14 @@ ha_lk (call_frame_t *frame,
gf_log (this->name, GF_LOG_ERROR, "fd_ctx_get failed");
if (local == NULL) {
- local = frame->local = CALLOC (1, sizeof (*local));
+ local = frame->local = GF_CALLOC (1, sizeof (*local),
+ gf_ha_mt_ha_local_t);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
local->active = -1;
local->op_ret = -1;
local->op_errno = ENOTCONN;
@@ -2878,12 +3265,24 @@ ha_lk (call_frame_t *frame,
hafdp = (hafd_t *)(long)tmp_hafdp;
if (local->active == -1) {
- STACK_UNWIND (frame, -1, ENOTCONN, NULL);
- return 0;
+ op_errno = ENOTCONN;
+ goto err;
}
local->stub = fop_lk_stub (frame, ha_lk, fd, cmd, lock);
- local->state = CALLOC (1, child_count);
+ if (!local->stub) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
+ local->state = GF_CALLOC (1, child_count, gf_ha_mt_char);
+ if (!local->state) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
state = hafdp->fdstate;
LOCK (&hafdp->lock);
memcpy (local->state, state, child_count);
@@ -2931,6 +3330,14 @@ ha_lk (call_frame_t *frame,
lock);
}
return 0;
+err:
+ local = frame->local;
+ frame->local = NULL;
+
+ STACK_UNWIND (frame, -1, op_errno, NULL);
+
+ ha_local_wipe (local);
+ return 0;
}
int32_t
@@ -2957,7 +3364,7 @@ ha_inodelk (call_frame_t *frame,
const char *volume,
loc_t *loc,
int32_t cmd,
- struct flock *lock)
+ struct gf_flock *lock)
{
ha_local_t *local = NULL;
int op_errno = 0;
@@ -3055,6 +3462,11 @@ ha_checksum (call_frame_t *frame,
}
local = frame->local;
local->stub = fop_checksum_stub (frame, ha_checksum, loc, flag);
+ if (!local->stub) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
STACK_WIND_COOKIE (frame,
ha_checksum_cbk,
@@ -3065,17 +3477,18 @@ ha_checksum (call_frame_t *frame,
flag);
return 0;
err:
+ local = frame->local;
+ frame->local = NULL;
+
STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+
+ ha_local_wipe (local);
return 0;
}
int32_t
-ha_readdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- gf_dirent_t *entries)
+ha_common_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries)
{
int ret = 0;
@@ -3086,11 +3499,15 @@ ha_readdir_cbk (call_frame_t *frame,
}
int32_t
-ha_readdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t off)
+ha_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off);
+
+int32_t
+ha_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off);
+int32_t
+ha_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, int whichop)
{
ha_local_t *local = NULL;
int op_errno = 0;
@@ -3101,19 +3518,58 @@ ha_readdir (call_frame_t *frame,
goto err;
}
local = frame->local;
- local->stub = fop_readdir_stub (frame, ha_readdir, fd, size, off);
- STACK_WIND_COOKIE (frame,
- ha_readdir_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->readdir,
- fd, size, off);
+ if (whichop == GF_FOP_READDIR)
+ local->stub = fop_readdir_stub (frame, ha_readdir, fd, size,
+ off);
+ else
+ local->stub = fop_readdirp_stub (frame, ha_readdirp, fd, size,
+ off);
+ if (!local->stub) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
+
+ if (whichop == GF_FOP_READDIR)
+ STACK_WIND_COOKIE (frame, ha_common_readdir_cbk,
+ (void *)(long)local->active,
+ HA_ACTIVE_CHILD(this, local),
+ HA_ACTIVE_CHILD(this, local)->fops->readdir,
+ fd, size, off);
+ else
+ STACK_WIND_COOKIE (frame, ha_common_readdir_cbk,
+ (void *)(long)local->active,
+ HA_ACTIVE_CHILD(this, local),
+ HA_ACTIVE_CHILD(this, local)->fops->readdirp,
+ fd, size, off);
return 0;
err:
+ local = frame->local;
+ frame->local = NULL;
+
STACK_UNWIND (frame, -1, ENOTCONN, NULL);
+
+ ha_local_wipe (local);
return 0;
}
+
+int32_t
+ha_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off)
+{
+ ha_do_readdir (frame, this, fd, size, off, GF_FOP_READDIR);
+ return 0;
+}
+
+int32_t
+ha_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off)
+{
+ ha_do_readdir (frame, this, fd, size, off, GF_FOP_READDIRP);
+ return 0;
+}
+
/* Management operations */
int32_t
@@ -3174,8 +3630,16 @@ ha_stats (call_frame_t *frame,
ha_private_t *pvt = NULL;
xlator_t **children = NULL;
int i = 0;
+ int32_t op_errno = EINVAL;
+
+ local = frame->local = GF_CALLOC (1, sizeof (*local),
+ gf_ha_mt_ha_local_t);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
- local = frame->local = CALLOC (1, sizeof (*local));
pvt = this->private;
children = pvt->children;
for (i = 0; i < pvt->child_count; i++) {
@@ -3184,8 +3648,8 @@ ha_stats (call_frame_t *frame,
}
if (i == pvt->child_count) {
- STACK_UNWIND (frame, -1, ENOTCONN, NULL);
- return 0;
+ op_errno = ENOTCONN;
+ goto err;
}
local->flags = flags;
@@ -3195,6 +3659,16 @@ ha_stats (call_frame_t *frame,
children[i]->mops->stats,
flags);
return 0;
+
+err:
+ local = frame->local;
+ frame->local = NULL;
+
+ STACK_UNWIND (frame, -1, ENOTCONN, NULL);
+
+ ha_local_wipe (local);
+ return 0;
+
}
@@ -3258,20 +3732,27 @@ ha_getspec (call_frame_t *frame,
ha_private_t *pvt = NULL;
xlator_t **children = NULL;
int i = 0;
+ int32_t op_errno = EINVAL;
+
+ local = frame->local = GF_CALLOC (1, sizeof (*local),
+ gf_ha_mt_ha_local_t);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto err;
+ }
- local = frame->local = CALLOC (1, sizeof (*local));
pvt = this->private;
children = pvt->children;
- local = frame->local = CALLOC (1, sizeof (*local));
for (i = 0; i < pvt->child_count; i++) {
if (pvt->state[i])
break;
}
if (i == pvt->child_count) {
- STACK_UNWIND (frame, -1, ENOTCONN, NULL);
- return 0;
+ op_errno = ENOTCONN;
+ goto err;
}
local->flags = flags;
local->pattern = (char *)key;
@@ -3282,6 +3763,15 @@ ha_getspec (call_frame_t *frame,
children[i]->mops->getspec,
key, flags);
return 0;
+err:
+ local = frame->local;
+ frame->local = NULL;
+
+ STACK_UNWIND (frame, -1, ENOTCONN, NULL);
+
+ ha_local_wipe (local);
+ return 0;
+
}
int32_t
@@ -3299,8 +3789,8 @@ ha_closedir (xlator_t *this,
}
hafdp = (hafd_t *)(long)tmp_hafdp;
- FREE (hafdp->fdstate);
- FREE (hafdp->path);
+ GF_FREE (hafdp->fdstate);
+ GF_FREE (hafdp->path);
LOCK_DESTROY (&hafdp->lock);
return 0;
}
@@ -3320,8 +3810,8 @@ ha_close (xlator_t *this,
}
hafdp = (hafd_t *)(long)tmp_hafdp;
- FREE (hafdp->fdstate);
- FREE (hafdp->path);
+ GF_FREE (hafdp->fdstate);
+ GF_FREE (hafdp->path);
LOCK_DESTROY (&hafdp->lock);
return 0;
}
@@ -3392,6 +3882,25 @@ notify (xlator_t *this,
return 0;
}
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_ha_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
+ "failed");
+ return ret;
+ }
+
+ return ret;
+}
+
int
init (xlator_t *this)
{
@@ -3399,6 +3908,7 @@ init (xlator_t *this)
xlator_list_t *trav = NULL;
int count = 0, ret = 0;
+
if (!this->children) {
gf_log (this->name,GF_LOG_ERROR,
"FATAL: ha should have one or more child defined");
@@ -3411,7 +3921,7 @@ init (xlator_t *this)
}
trav = this->children;
- pvt = CALLOC (1, sizeof (ha_private_t));
+ pvt = GF_CALLOC (1, sizeof (ha_private_t), gf_ha_mt_ha_private_t);
ret = dict_get_int32 (this->options, "preferred-subvolume",
&pvt->pref_subvol);
@@ -3426,7 +3936,8 @@ init (xlator_t *this)
}
pvt->child_count = count;
- pvt->children = CALLOC (count, sizeof (xlator_t*));
+ pvt->children = GF_CALLOC (count, sizeof (xlator_t*),
+ gf_ha_mt_xlator_t);
trav = this->children;
count = 0;
@@ -3436,7 +3947,7 @@ init (xlator_t *this)
trav = trav->next;
}
- pvt->state = CALLOC (1, count);
+ pvt->state = GF_CALLOC (1, count, gf_ha_mt_char);
this->private = pvt;
return 0;
}
@@ -3446,7 +3957,7 @@ fini (xlator_t *this)
{
ha_private_t *priv = NULL;
priv = this->private;
- FREE (priv);
+ GF_FREE (priv);
return;
}
@@ -3462,10 +3973,7 @@ struct xlator_fops fops = {
.symlink = ha_symlink,
.rename = ha_rename,
.link = ha_link,
- .chmod = ha_chmod,
- .chown = ha_chown,
.truncate = ha_truncate,
- .utimens = ha_utimens,
.create = ha_create,
.open = ha_open,
.readv = ha_readv,
@@ -3478,24 +3986,20 @@ struct xlator_fops fops = {
.removexattr = ha_removexattr,
.opendir = ha_opendir,
.readdir = ha_readdir,
+ .readdirp = ha_readdirp,
.getdents = ha_getdents,
.fsyncdir = ha_fsyncdir,
.access = ha_access,
.ftruncate = ha_ftruncate,
.fstat = ha_fstat,
.lk = ha_lk,
- .fchmod = ha_fchmod,
- .fchown = ha_fchown,
.setdents = ha_setdents,
.lookup_cbk = ha_lookup_cbk,
.checksum = ha_checksum,
.xattrop = ha_xattrop,
- .fxattrop = ha_fxattrop
-};
-
-struct xlator_mops mops = {
- .stats = ha_stats,
- .getspec = ha_getspec,
+ .fxattrop = ha_fxattrop,
+ .setattr = ha_setattr,
+ .fsetattr = ha_fsetattr,
};
struct xlator_cbks cbks = {
diff --git a/xlators/cluster/ha/src/ha.h b/xlators/cluster/ha/src/ha.h
index d47d96535..e2ed7eaa6 100644
--- a/xlators/cluster/ha/src/ha.h
+++ b/xlators/cluster/ha/src/ha.h
@@ -1,25 +1,17 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef __HA_H_
#define __HA_H_
+#include "ha-mem-types.h"
+
typedef struct {
call_stub_t *stub;
int32_t op_ret, op_errno;
@@ -28,7 +20,9 @@ typedef struct {
char *state, *pattern;
dict_t *dict;
loc_t loc;
- struct stat buf;
+ struct iatt buf;
+ struct iatt postparent;
+ struct iatt preparent;
fd_t *fd;
inode_t *inode;
int32_t flags;
diff --git a/xlators/cluster/map/src/Makefile.am b/xlators/cluster/map/src/Makefile.am
index 26e19137a..a278b05e2 100644
--- a/xlators/cluster/map/src/Makefile.am
+++ b/xlators/cluster/map/src/Makefile.am
@@ -1,15 +1,16 @@
xlator_LTLIBRARIES = map.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/testing/cluster
-map_la_LDFLAGS = -module -avoidversion
+map_la_LDFLAGS = -module -avoid-version
map_la_SOURCES = map.c map-helper.c
map_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
noinst_HEADERS = map.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/cluster/map/src/map-helper.c b/xlators/cluster/map/src/map-helper.c
index 365eeb490..851397b68 100644
--- a/xlators/cluster/map/src/map-helper.c
+++ b/xlators/cluster/map/src/map-helper.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2009-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2009-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -256,14 +246,15 @@ verify_dir_and_assign_subvol (xlator_t *this,
goto out;
}
- tmp_map = CALLOC (1, sizeof (struct map_pattern));
+ tmp_map = GF_CALLOC (1, sizeof (struct map_pattern),
+ gf_map_mt_map_pattern);
tmp_map->xl = trav->xlator;
tmp_map->dir_len = strlen (directory);
/* make sure that the top level directory starts
* with '/' and ends without '/'
*/
- tmp_map->directory = strdup (directory);
+ tmp_map->directory = gf_strdup (directory);
if (directory[tmp_map->dir_len - 1] == '/') {
tmp_map->dir_len--;
}
diff --git a/xlators/cluster/map/src/map-mem-types.h b/xlators/cluster/map/src/map-mem-types.h
new file mode 100644
index 000000000..3e89f4736
--- /dev/null
+++ b/xlators/cluster/map/src/map-mem-types.h
@@ -0,0 +1,24 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __MAP_MEM_TYPES_H__
+#define __MAP_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_map_mem_types_ {
+ gf_map_mt_map_private_t = gf_common_mt_end + 1,
+ gf_map_mt_map_local_t,
+ gf_map_mt_map_xlator_array,
+ gf_map_mt_map_pattern,
+ gf_map_mt_end
+};
+#endif
+
diff --git a/xlators/cluster/map/src/map.c b/xlators/cluster/map/src/map.c
index 98d6b33b0..6150a33ce 100644
--- a/xlators/cluster/map/src/map.c
+++ b/xlators/cluster/map/src/map.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -36,87 +26,53 @@ map_stat_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- struct stat *buf)
+ struct iatt *buf)
{
call_frame_t *prev = NULL;
prev = cookie;
- map_itransform (this, prev->this, buf->st_ino, &buf->st_ino);
+ map_itransform (this, prev->this, buf->ia_ino, &buf->ia_ino);
STACK_UNWIND (frame, op_ret, op_errno, buf);
return 0;
}
-
static int32_t
-map_chmod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+map_setattr_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *statpre,
+ struct iatt *statpost)
{
call_frame_t *prev = NULL;
prev = cookie;
-
- map_itransform (this, prev->this, buf->st_ino, &buf->st_ino);
-
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
+ map_itransform (this, prev->this, statpre->ia_ino, &statpre->ia_ino);
+ map_itransform (this, prev->this, statpost->ia_ino, &statpost->ia_ino);
-static int32_t
-map_fchmod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- call_frame_t *prev = NULL;
- prev = cookie;
-
- map_itransform (this, prev->this, buf->st_ino, &buf->st_ino);
-
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ STACK_UNWIND (frame, op_ret, op_errno, statpre, statpost);
return 0;
}
-
static int32_t
-map_chown_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+map_fsetattr_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *statpre,
+ struct iatt *statpost)
{
call_frame_t *prev = NULL;
prev = cookie;
-
- map_itransform (this, prev->this, buf->st_ino, &buf->st_ino);
-
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
+ map_itransform (this, prev->this, statpre->ia_ino, &statpre->ia_ino);
+ map_itransform (this, prev->this, statpost->ia_ino, &statpost->ia_ino);
-static int32_t
-map_fchown_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- call_frame_t *prev = NULL;
- prev = cookie;
-
- map_itransform (this, prev->this, buf->st_ino, &buf->st_ino);
-
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ STACK_UNWIND (frame, op_ret, op_errno, statpre, statpost);
return 0;
}
@@ -126,14 +82,15 @@ map_truncate_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- struct stat *buf)
+ struct iatt *prebuf,
+ struct iatt *postbuf)
{
call_frame_t *prev = NULL;
prev = cookie;
- map_itransform (this, prev->this, buf->st_ino, &buf->st_ino);
+ map_itransform (this, prev->this, postbuf->ia_ino, &postbuf->ia_ino);
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ STACK_UNWIND (frame, op_ret, op_errno, prebuf, postbuf);
return 0;
}
@@ -143,31 +100,15 @@ map_ftruncate_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- struct stat *buf)
-{
- call_frame_t *prev = NULL;
- prev = cookie;
-
- map_itransform (this, prev->this, buf->st_ino, &buf->st_ino);
-
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
-
-int32_t
-map_utimens_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+ struct iatt *prebuf,
+ struct iatt *postbuf)
{
call_frame_t *prev = NULL;
prev = cookie;
- map_itransform (this, prev->this, buf->st_ino, &buf->st_ino);
+ map_itransform (this, prev->this, postbuf->ia_ino, &postbuf->ia_ino);
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ STACK_UNWIND (frame, op_ret, op_errno, prebuf, postbuf);
return 0;
}
@@ -189,9 +130,10 @@ map_readlink_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- const char *path)
+ const char *path,
+ struct iatt *sbuf)
{
- STACK_UNWIND (frame, op_ret, op_errno, path);
+ STACK_UNWIND (frame, op_ret, op_errno, path, sbuf);
return 0;
}
@@ -200,9 +142,11 @@ map_unlink_cbk (call_frame_t *frame,
void *cookie,
xlator_t *this,
int32_t op_ret,
- int32_t op_errno)
+ int32_t op_errno,
+ struct iatt *preparent,
+ struct iatt *postparent)
{
- STACK_UNWIND (frame, op_ret, op_errno);
+ STACK_UNWIND (frame, op_ret, op_errno, preparent, postparent);
return 0;
}
@@ -211,9 +155,11 @@ map_rmdir_cbk (call_frame_t *frame,
void *cookie,
xlator_t *this,
int32_t op_ret,
- int32_t op_errno)
+ int32_t op_errno,
+ struct iatt *preparent,
+ struct iatt *postparent)
{
- STACK_UNWIND (frame, op_ret, op_errno);
+ STACK_UNWIND (frame, op_ret, op_errno, preparent, postparent);
return 0;
}
@@ -224,12 +170,16 @@ map_rename_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- struct stat *buf)
+ struct iatt *buf,
+ struct iatt *preoldparent,
+ struct iatt *postoldparent,
+ struct iatt *prenewparent,
+ struct iatt *postnewparent)
{
call_frame_t *prev = NULL;
prev = cookie;
- map_itransform (this, prev->this, buf->st_ino, &buf->st_ino);
+ map_itransform (this, prev->this, buf->ia_ino, &buf->ia_ino);
STACK_UNWIND (frame, op_ret, op_errno, buf);
return 0;
@@ -242,12 +192,14 @@ map_link_cbk (call_frame_t *frame,
int32_t op_ret,
int32_t op_errno,
inode_t *inode,
- struct stat *buf)
+ struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent)
{
call_frame_t *prev = NULL;
prev = cookie;
- map_itransform (this, prev->this, buf->st_ino, &buf->st_ino);
+ map_itransform (this, prev->this, buf->ia_ino, &buf->ia_ino);
STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
return 0;
@@ -273,13 +225,13 @@ map_readv_cbk (call_frame_t *frame,
int32_t op_errno,
struct iovec *vector,
int32_t count,
- struct stat *stbuf,
+ struct iatt *stbuf,
struct iobref *iobref)
{
call_frame_t *prev = NULL;
prev = cookie;
- map_itransform (this, prev->this, stbuf->st_ino, &stbuf->st_ino);
+ map_itransform (this, prev->this, stbuf->ia_ino, &stbuf->ia_ino);
STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf, iobref);
return 0;
@@ -291,14 +243,15 @@ map_writev_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- struct stat *stbuf)
+ struct iatt *prebuf,
+ struct iatt *postbuf)
{
call_frame_t *prev = NULL;
prev = cookie;
- map_itransform (this, prev->this, stbuf->st_ino, &stbuf->st_ino);
+ map_itransform (this, prev->this, postbuf->ia_ino, &postbuf->ia_ino);
- STACK_UNWIND (frame, op_ret, op_errno, stbuf);
+ STACK_UNWIND (frame, op_ret, op_errno, prebuf, postbuf);
return 0;
}
@@ -319,9 +272,11 @@ map_fsync_cbk (call_frame_t *frame,
void *cookie,
xlator_t *this,
int32_t op_ret,
- int32_t op_errno)
+ int32_t op_errno,
+ struct iatt *prebuf,
+ struct iatt *postbuf)
{
- STACK_UNWIND (frame, op_ret, op_errno);
+ STACK_UNWIND (frame, op_ret, op_errno, prebuf, postbuf);
return 0;
}
@@ -332,12 +287,12 @@ map_fstat_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- struct stat *buf)
+ struct iatt *buf)
{
call_frame_t *prev = NULL;
prev = cookie;
- map_itransform (this, prev->this, buf->st_ino, &buf->st_ino);
+ map_itransform (this, prev->this, buf->ia_ino, &buf->ia_ino);
STACK_UNWIND (frame, op_ret, op_errno, buf);
return 0;
@@ -473,7 +428,7 @@ map_lk_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- struct flock *lock)
+ struct gf_flock *lock)
{
STACK_UNWIND (frame, op_ret, op_errno, lock);
return 0;
@@ -527,12 +482,14 @@ map_newentry_cbk (call_frame_t *frame,
int32_t op_ret,
int32_t op_errno,
inode_t *inode,
- struct stat *buf)
+ struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent)
{
call_frame_t *prev = NULL;
prev = cookie;
- map_itransform (this, prev->this, buf->st_ino, &buf->st_ino);
+ map_itransform (this, prev->this, buf->ia_ino, &buf->ia_ino);
STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
return 0;
@@ -548,12 +505,14 @@ map_create_cbk (call_frame_t *frame,
int32_t op_errno,
fd_t *fd,
inode_t *inode,
- struct stat *buf)
+ struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent)
{
call_frame_t *prev = NULL;
prev = cookie;
- map_itransform (this, prev->this, buf->st_ino, &buf->st_ino);
+ map_itransform (this, prev->this, buf->ia_ino, &buf->ia_ino);
STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf);
return 0;
@@ -655,13 +614,14 @@ map_single_lookup_cbk (call_frame_t *frame,
int32_t op_ret,
int32_t op_errno,
inode_t *inode,
- struct stat *buf,
- dict_t *dict)
+ struct iatt *buf,
+ dict_t *dict,
+ struct iatt *postparent)
{
call_frame_t *prev = NULL;
prev = cookie;
- map_itransform (this, prev->this, buf->st_ino, &buf->st_ino);
+ map_itransform (this, prev->this, buf->ia_ino, &buf->ia_ino);
STACK_UNWIND (frame, op_ret, op_errno, inode, buf, dict);
@@ -675,8 +635,9 @@ map_root_lookup_cbk (call_frame_t *frame,
int32_t op_ret,
int32_t op_errno,
inode_t *inode,
- struct stat *buf,
- dict_t *dict)
+ struct iatt *buf,
+ dict_t *dict,
+ struct iatt *postparent)
{
int callcnt = 0;
map_local_t *local = NULL;
@@ -779,9 +740,36 @@ map_single_readdir_cbk (call_frame_t *frame,
}
+int32_t
+map_single_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries)
+{
+ call_frame_t *prev = NULL;
+ gf_dirent_t *orig_entry = NULL;
+
+ prev = cookie;
+
+ list_for_each_entry (orig_entry, &entries->list, list) {
+ map_itransform (this, prev->this, orig_entry->d_ino,
+ &orig_entry->d_ino);
+ orig_entry->d_stat.ia_ino = orig_entry->d_ino;
+ }
+ STACK_UNWIND (frame, op_ret, op_errno, entries);
+ return 0;
+}
+
int
map_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, gf_dirent_t *orig_entries)
+ int op_ret, int op_errno, gf_dirent_t *orig_entries);
+
+int
+map_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *orig_entries);
+
+int
+map_generic_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *orig_entries,
+ int whichop)
{
map_local_t *local = NULL;
gf_dirent_t entries;
@@ -816,6 +804,8 @@ map_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
map_itransform (this, subvol, orig_entry->d_off,
&entry->d_off);
+ if (whichop == GF_FOP_READDIRP)
+ entry->d_stat.ia_ino = entry->d_ino;
entry->d_type = orig_entry->d_type;
entry->d_len = orig_entry->d_len;
@@ -841,9 +831,14 @@ done:
goto unwind;
}
- STACK_WIND (frame, map_readdir_cbk,
- next_subvol, next_subvol->fops->readdir,
- local->fd, local->size, 0);
+ if (whichop == GF_FOP_READDIR)
+ STACK_WIND (frame, map_readdir_cbk, next_subvol,
+ next_subvol->fops->readdir, local->fd,
+ local->size, 0);
+ else
+ STACK_WIND (frame, map_readdirp_cbk, next_subvol,
+ next_subvol->fops->readdirp, local->fd,
+ local->size, 0);
return 0;
}
@@ -864,6 +859,26 @@ unwind:
}
+int
+map_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *orig_entries)
+{
+ map_generic_readdir_cbk (frame, cookie, this, op_ret, op_errno,
+ orig_entries, GF_FOP_READDIR);
+ return 0;
+}
+
+
+int
+map_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *orig_entries)
+{
+ map_generic_readdir_cbk (frame, cookie, this, op_ret, op_errno,
+ orig_entries, GF_FOP_READDIRP);
+ return 0;
+}
+
+
/* Management operations */
static int32_t
@@ -880,23 +895,6 @@ map_checksum_cbk (call_frame_t *frame,
}
-int32_t
-map_lock_notify_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-
-int32_t
-map_lock_fnotify_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
/* Fops starts here */
int32_t
@@ -928,21 +926,22 @@ map_stat (call_frame_t *frame,
return 0;
}
-
int32_t
-map_chmod (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode)
+map_setattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ struct iatt *stbuf,
+ int32_t valid)
{
int32_t op_errno = 1;
xlator_t *subvol = NULL;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
+ GF_VALIDATE_OR_GOTO ("map", this, err);
+ GF_VALIDATE_OR_GOTO (this->name, frame, err);
+ GF_VALIDATE_OR_GOTO (this->name, loc, err);
+ GF_VALIDATE_OR_GOTO (this->name, loc->inode, err);
+ GF_VALIDATE_OR_GOTO (this->name, loc->path, err);
+ GF_VALIDATE_OR_GOTO (this->name, stbuf, err);
subvol = get_mapping_subvol_from_ctx (this, loc->inode);
if (!subvol) {
@@ -950,8 +949,8 @@ map_chmod (call_frame_t *frame,
goto err;
}
- STACK_WIND (frame, map_chmod_cbk, subvol,
- subvol->fops->chmod, loc, mode);
+ STACK_WIND (frame, map_setattr_cbk, subvol,
+ subvol->fops->setattr, loc, stbuf, valid);
return 0;
err:
STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
@@ -960,18 +959,19 @@ map_chmod (call_frame_t *frame,
}
int32_t
-map_fchmod (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- mode_t mode)
+map_fsetattr (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ struct iatt *stbuf,
+ int32_t valid)
{
int32_t op_errno = 1;
xlator_t *subvol = NULL;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
- VALIDATE_OR_GOTO (fd->inode, err);
+ GF_VALIDATE_OR_GOTO ("map", this, err);
+ GF_VALIDATE_OR_GOTO (this->name, frame, err);
+ GF_VALIDATE_OR_GOTO (this->name, fd, err);
+ GF_VALIDATE_OR_GOTO (this->name, stbuf, err);
subvol = get_mapping_subvol_from_ctx (this, fd->inode);
if (!subvol) {
@@ -979,71 +979,8 @@ map_fchmod (call_frame_t *frame,
goto err;
}
- STACK_WIND (frame, map_fchmod_cbk, subvol,
- subvol->fops->fchmod, fd, mode);
-
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-int32_t
-map_chown (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- uid_t uid,
- gid_t gid)
-{
- int32_t op_errno = 1;
- xlator_t *subvol = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- subvol = get_mapping_subvol_from_ctx (this, loc->inode);
- if (!subvol) {
- op_errno = EINVAL;
- goto err;
- }
-
- STACK_WIND (frame, map_chown_cbk, subvol,
- subvol->fops->chown, loc, uid, gid);
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-int32_t
-map_fchown (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- uid_t uid,
- gid_t gid)
-{
- int32_t op_errno = 1;
- xlator_t *subvol = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
- VALIDATE_OR_GOTO (fd->inode, err);
-
- subvol = get_mapping_subvol_from_ctx (this, fd->inode);
- if (!subvol) {
- op_errno = EINVAL;
- goto err;
- }
-
- STACK_WIND (frame, map_fchown_cbk, subvol,
- subvol->fops->fchown, fd, uid, gid);
-
+ STACK_WIND (frame, map_fsetattr_cbk, subvol,
+ subvol->fops->fsetattr, fd, stbuf, valid);
return 0;
err:
STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
@@ -1113,37 +1050,6 @@ map_ftruncate (call_frame_t *frame,
}
int32_t
-map_utimens (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- struct timespec tv[2])
-{
- int32_t op_errno = 1;
- xlator_t *subvol = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- subvol = get_mapping_subvol_from_ctx (this, loc->inode);
- if (!subvol) {
- op_errno = EINVAL;
- goto err;
- }
-
- STACK_WIND (frame, map_utimens_cbk, subvol,
- subvol->fops->utimens, loc, tv);
-
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-int32_t
map_access (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
@@ -1353,7 +1259,7 @@ int32_t
map_open (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
- int32_t flags, fd_t *fd)
+ int32_t flags, fd_t *fd, int wbflags)
{
int32_t op_errno = 1;
xlator_t *subvol = NULL;
@@ -1371,7 +1277,7 @@ map_open (call_frame_t *frame,
}
STACK_WIND (frame, map_open_cbk, subvol,
- subvol->fops->open, loc, flags, fd);
+ subvol->fops->open, loc, flags, fd, wbflags);
return 0;
err:
@@ -1854,7 +1760,7 @@ map_lk (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
int32_t cmd,
- struct flock *lock)
+ struct gf_flock *lock)
{
int32_t op_errno = 1;
xlator_t *subvol = NULL;
@@ -1883,7 +1789,7 @@ map_lk (call_frame_t *frame,
int32_t
map_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd, struct flock *lock)
+ const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *lock)
{
int32_t op_errno = 1;
xlator_t *subvol = NULL;
@@ -1913,7 +1819,7 @@ map_inodelk (call_frame_t *frame, xlator_t *this,
int32_t
map_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct flock *lock)
+ const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *lock)
{
int32_t op_errno = 1;
xlator_t *subvol = NULL;
@@ -2231,7 +2137,8 @@ map_lookup (call_frame_t *frame,
return 0;
root_inode:
- local = CALLOC (1, sizeof (map_local_t));
+ local = GF_CALLOC (1, sizeof (map_local_t),
+ gf_map_mt_map_local_t);
frame->local = local;
local->call_count = priv->child_count;
@@ -2283,7 +2190,8 @@ map_statfs (call_frame_t *frame,
return 0;
root_inode:
- local = CALLOC (1, sizeof (map_local_t));
+ local = GF_CALLOC (1, sizeof (map_local_t),
+ gf_map_mt_map_local_t);
priv = this->private;
frame->local = local;
@@ -2335,7 +2243,8 @@ map_opendir (call_frame_t *frame,
return 0;
root_inode:
- local = CALLOC (1, sizeof (map_local_t));
+ local = GF_CALLOC (1, sizeof (map_local_t),
+ gf_map_mt_map_local_t);
priv = this->private;
frame->local = local;
@@ -2359,11 +2268,8 @@ map_opendir (call_frame_t *frame,
int32_t
-map_readdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t yoff)
+map_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t yoff, int whichop)
{
int32_t op_errno = EINVAL;
xlator_t *subvol = NULL;
@@ -2386,14 +2292,19 @@ map_readdir (call_frame_t *frame,
}
/* Just one callback */
- STACK_WIND (frame, map_single_readdir_cbk, subvol,
- subvol->fops->readdir, fd, size, yoff);
+ if (whichop == GF_FOP_READDIR)
+ STACK_WIND (frame, map_single_readdir_cbk, subvol,
+ subvol->fops->readdir, fd, size, yoff);
+ else
+ STACK_WIND (frame, map_single_readdirp_cbk, subvol,
+ subvol->fops->readdirp, fd, size, yoff);
return 0;
root_inode:
/* readdir on '/' */
- local = CALLOC (1, sizeof (map_local_t));
+ local = GF_CALLOC (1, sizeof (map_local_t),
+ gf_map_mt_map_local_t);
if (!local) {
gf_log (this->name, GF_LOG_ERROR,
"memory allocation failed :(");
@@ -2411,8 +2322,12 @@ map_readdir (call_frame_t *frame,
map_deitransform (this, yoff, &xvol, (uint64_t *)&xoff);
- STACK_WIND (frame, map_readdir_cbk, xvol,
- xvol->fops->readdir, fd, size, xoff);
+ if (whichop == GF_FOP_READDIR)
+ STACK_WIND (frame, map_readdir_cbk, xvol, xvol->fops->readdir,
+ fd, size, xoff);
+ else
+ STACK_WIND (frame, map_readdirp_cbk, xvol, xvol->fops->readdirp,
+ fd, size, xoff);
return 0;
err:
@@ -2422,6 +2337,24 @@ map_readdir (call_frame_t *frame,
}
+
+int32_t
+map_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t yoff)
+{
+ map_do_readdir (frame, this, fd, size, yoff, GF_FOP_READDIR);
+ return 0;
+}
+
+int32_t
+map_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t yoff)
+{
+ map_do_readdir (frame, this, fd, size, yoff, GF_FOP_READDIRP);
+ return 0;
+}
+
+
void
fini (xlator_t *this)
{
@@ -2432,22 +2365,40 @@ fini (xlator_t *this)
priv = this->private;
if (priv) {
- if (priv->xlarray)
- FREE (priv->xlarray);
+ GF_FREE (priv->xlarray);
trav_map = priv->map;
while (trav_map) {
tmp_map = trav_map;
trav_map = trav_map->next;
- FREE (tmp_map);
+ GF_FREE (tmp_map);
}
- FREE(priv);
+ GF_FREE(priv);
}
return;
}
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_map_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
+ "failed");
+ return ret;
+ }
+
+ return ret;
+}
+
int
init (xlator_t *this)
{
@@ -2464,6 +2415,7 @@ init (xlator_t *this)
char *subvol_str = NULL;
char *map_xl = NULL;
+
if (!this->children) {
gf_log (this->name,GF_LOG_ERROR,
"FATAL: map should have one or more child defined");
@@ -2475,7 +2427,8 @@ init (xlator_t *this)
"dangling volume. check volfile ");
}
- priv = CALLOC (1, sizeof (map_private_t));
+ priv = GF_CALLOC (1, sizeof (map_private_t),
+ gf_map_mt_map_private_t);
this->private = priv;
/* allocate xlator array */
@@ -2484,7 +2437,8 @@ init (xlator_t *this)
count++;
trav = trav->next;
}
- priv->xlarray = CALLOC (1, sizeof (struct map_xlator_array) * count);
+ priv->xlarray = GF_CALLOC (1, sizeof (struct map_xlator_array) * count,
+ gf_map_mt_map_xlator_array);
priv->child_count = count;
/* build xlator array */
@@ -2504,7 +2458,7 @@ init (xlator_t *this)
}
map_pair_str = strtok_r (pattern_string, ";", &tmp_str);
while (map_pair_str) {
- dup_map_pair = strdup (map_pair_str);
+ dup_map_pair = gf_strdup (map_pair_str);
dir_str = strtok_r (dup_map_pair, ":", &tmp_str1);
if (!dir_str) {
gf_log (this->name, GF_LOG_ERROR,
@@ -2526,7 +2480,7 @@ init (xlator_t *this)
goto err;
}
- FREE (dup_map_pair);
+ GF_FREE (dup_map_pair);
map_pair_str = strtok_r (NULL, ";", &tmp_str);
}
@@ -2557,12 +2511,7 @@ struct xlator_fops fops = {
.create = map_create,
.stat = map_stat,
- .chmod = map_chmod,
- .chown = map_chown,
- .fchown = map_fchown,
- .fchmod = map_fchmod,
.fstat = map_fstat,
- .utimens = map_utimens,
.truncate = map_truncate,
.ftruncate = map_ftruncate,
.access = map_access,
@@ -2581,6 +2530,7 @@ struct xlator_fops fops = {
.lk = map_lk,
.opendir = map_opendir,
.readdir = map_readdir,
+ .readdirp = map_readdirp,
.fsyncdir = map_fsyncdir,
.symlink = map_symlink,
.unlink = map_unlink,
@@ -2597,9 +2547,8 @@ struct xlator_fops fops = {
.setdents = map_setdents,
.getdents = map_getdents,
.checksum = map_checksum,
-};
-
-struct xlator_mops mops = {
+ .setattr = map_setattr,
+ .fsetattr = map_fsetattr,
};
struct xlator_cbks cbks = {
diff --git a/xlators/cluster/map/src/map.h b/xlators/cluster/map/src/map.h
index 72b4f5640..7703a543e 100644
--- a/xlators/cluster/map/src/map.h
+++ b/xlators/cluster/map/src/map.h
@@ -1,26 +1,17 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __MAP_H__
#define __MAP_H__
#include "xlator.h"
+#include "map-mem-types.h"
struct map_pattern {
struct map_pattern *next;
@@ -46,7 +37,7 @@ typedef struct {
int32_t op_errno;
int call_count;
struct statvfs statvfs;
- struct stat stbuf;
+ struct iatt stbuf;
inode_t *inode;
dict_t *dict;
fd_t *fd;
diff --git a/xlators/cluster/stripe/src/Makefile.am b/xlators/cluster/stripe/src/Makefile.am
index 180d0da1f..2d151422a 100644
--- a/xlators/cluster/stripe/src/Makefile.am
+++ b/xlators/cluster/stripe/src/Makefile.am
@@ -2,15 +2,19 @@
xlator_LTLIBRARIES = stripe.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster
-stripe_la_LDFLAGS = -module -avoidversion
+stripe_la_LDFLAGS = -module -avoid-version
+
+stripe_la_SOURCES = stripe.c stripe-helpers.c \
+ $(top_builddir)/xlators/lib/src/libxlator.c
-stripe_la_SOURCES = stripe.c
stripe_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = stripe.h
+noinst_HEADERS = stripe.h stripe-mem-types.h $(top_builddir)/xlators/lib/src/libxlator.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/xlators/lib/src
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/cluster/stripe/src/stripe-helpers.c b/xlators/cluster/stripe/src/stripe-helpers.c
new file mode 100644
index 000000000..a83abdc72
--- /dev/null
+++ b/xlators/cluster/stripe/src/stripe-helpers.c
@@ -0,0 +1,675 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <fnmatch.h>
+
+#include "stripe.h"
+#include "byte-order.h"
+#include "mem-types.h"
+
+void
+stripe_local_wipe (stripe_local_t *local)
+{
+ if (!local)
+ goto out;
+
+ loc_wipe (&local->loc);
+ loc_wipe (&local->loc2);
+
+ if (local->fd)
+ fd_unref (local->fd);
+
+ if (local->inode)
+ inode_unref (local->inode);
+
+ if (local->xattr)
+ dict_unref (local->xattr);
+
+ if (local->xdata)
+ dict_unref (local->xdata);
+
+out:
+ return;
+}
+
+
+
+int
+stripe_aggregate (dict_t *this, char *key, data_t *value, void *data)
+{
+ dict_t *dst = NULL;
+ int64_t *ptr = 0, *size = NULL;
+ int32_t ret = -1;
+
+ dst = data;
+
+ if (strcmp (key, GF_XATTR_QUOTA_SIZE_KEY) == 0) {
+ ret = dict_get_bin (dst, key, (void **)&size);
+ if (ret < 0) {
+ size = GF_CALLOC (1, sizeof (int64_t),
+ gf_common_mt_char);
+ if (size == NULL) {
+ gf_log ("stripe", GF_LOG_WARNING,
+ "memory allocation failed");
+ goto out;
+ }
+ ret = dict_set_bin (dst, key, size, sizeof (int64_t));
+ if (ret < 0) {
+ gf_log ("stripe", GF_LOG_WARNING,
+ "stripe aggregate dict set failed");
+ GF_FREE (size);
+ goto out;
+ }
+ }
+
+ ptr = data_to_bin (value);
+ if (ptr == NULL) {
+ gf_log ("stripe", GF_LOG_WARNING, "data to bin failed");
+ goto out;
+ }
+
+ *size = hton64 (ntoh64 (*size) + ntoh64 (*ptr));
+ } else if (strcmp (key, GF_CONTENT_KEY)) {
+ /* No need to aggregate 'CONTENT' data */
+ ret = dict_set (dst, key, value);
+ if (ret)
+ gf_log ("stripe", GF_LOG_WARNING, "xattr dict set failed");
+ }
+
+out:
+ return 0;
+}
+
+
+void
+stripe_aggregate_xattr (dict_t *dst, dict_t *src)
+{
+ if ((dst == NULL) || (src == NULL)) {
+ goto out;
+ }
+
+ dict_foreach (src, stripe_aggregate, dst);
+out:
+ return;
+}
+
+
+int32_t
+stripe_xattr_aggregate (char *buffer, stripe_local_t *local, int32_t *total)
+{
+ int32_t i = 0;
+ int32_t ret = -1;
+ int32_t len = 0;
+ char *sbuf = NULL;
+ stripe_xattr_sort_t *xattr = NULL;
+
+ if (!buffer || !local || !local->xattr_list)
+ goto out;
+
+ sbuf = buffer;
+
+ for (i = 0; i < local->nallocs; i++) {
+ xattr = local->xattr_list + i;
+ len = xattr->xattr_len;
+
+ if (len && xattr && xattr->xattr_value) {
+ memcpy (buffer, xattr->xattr_value, len);
+ buffer += len;
+ *buffer++ = ' ';
+ }
+ }
+
+ *--buffer = '\0';
+ if (total)
+ *total = buffer - sbuf;
+ ret = 0;
+
+ out:
+ return ret;
+}
+
+int32_t
+stripe_free_xattr_str (stripe_local_t *local)
+{
+ int32_t i = 0;
+ int32_t ret = -1;
+ stripe_xattr_sort_t *xattr = NULL;
+
+ if (!local || !local->xattr_list)
+ goto out;
+
+ for (i = 0; i < local->nallocs; i++) {
+ xattr = local->xattr_list + i;
+
+ if (xattr && xattr->xattr_value)
+ GF_FREE (xattr->xattr_value);
+ }
+
+ ret = 0;
+ out:
+ return ret;
+}
+
+
+int32_t
+stripe_fill_lockinfo_xattr (xlator_t *this, stripe_local_t *local,
+ void **xattr_serz)
+{
+ int32_t ret = -1, i = 0, len = 0;
+ dict_t *tmp1 = NULL, *tmp2 = NULL;
+ char *buf = NULL;
+ stripe_xattr_sort_t *xattr = NULL;
+
+ if (xattr_serz == NULL) {
+ goto out;
+ }
+
+ tmp2 = dict_new ();
+
+ if (tmp2 == NULL) {
+ goto out;
+ }
+
+ for (i = 0; i < local->nallocs; i++) {
+ xattr = local->xattr_list + i;
+ len = xattr->xattr_len;
+
+ if (len && xattr && xattr->xattr_value) {
+ ret = dict_reset (tmp2);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "dict_reset failed (%s)",
+ strerror (-ret));
+ }
+
+ ret = dict_unserialize (xattr->xattr_value,
+ xattr->xattr_len,
+ &tmp2);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dict_unserialize failed (%s)",
+ strerror (-ret));
+ ret = -1;
+ goto out;
+ }
+
+ tmp1 = dict_copy (tmp2, tmp1);
+ if (tmp1 == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dict_copy failed (%s)",
+ strerror (-ret));
+ ret = -1;
+ goto out;
+ }
+ }
+ }
+
+ len = dict_serialized_length (tmp1);
+ if (len > 0) {
+ buf = GF_CALLOC (1, len, gf_common_mt_dict_t);
+ if (buf == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_serialize (tmp1, buf);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dict_serialize failed (%s)", strerror (-ret));
+ ret = -1;
+ goto out;
+ }
+
+ *xattr_serz = buf;
+ }
+
+ ret = 0;
+out:
+ if (tmp1 != NULL) {
+ dict_unref (tmp1);
+ }
+
+ if (tmp2 != NULL) {
+ dict_unref (tmp2);
+ }
+
+ return ret;
+}
+
+
+int32_t
+stripe_fill_pathinfo_xattr (xlator_t *this, stripe_local_t *local,
+ char **xattr_serz)
+{
+ int ret = -1;
+ int32_t padding = 0;
+ int32_t tlen = 0;
+ char stripe_size_str[20] = {0,};
+ char *pathinfo_serz = NULL;
+
+ if (!local) {
+ gf_log (this->name, GF_LOG_ERROR, "Possible NULL deref");
+ goto out;
+ }
+
+ (void) snprintf (stripe_size_str, 20, "%ld",
+ (local->fctx) ? local->fctx->stripe_size : 0);
+
+ /* extra bytes for decorations (brackets and <>'s) */
+ padding = strlen (this->name) + strlen (STRIPE_PATHINFO_HEADER)
+ + strlen (stripe_size_str) + 7;
+ local->xattr_total_len += (padding + 2);
+
+ pathinfo_serz = GF_CALLOC (local->xattr_total_len, sizeof (char),
+ gf_common_mt_char);
+ if (!pathinfo_serz)
+ goto out;
+
+ /* xlator info */
+ (void) sprintf (pathinfo_serz, "(<"STRIPE_PATHINFO_HEADER"%s:[%s]> ",
+ this->name, stripe_size_str);
+
+ ret = stripe_xattr_aggregate (pathinfo_serz + padding, local, &tlen);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Cannot aggregate pathinfo list");
+ goto out;
+ }
+
+ *(pathinfo_serz + padding + tlen) = ')';
+ *(pathinfo_serz + padding + tlen + 1) = '\0';
+
+ *xattr_serz = pathinfo_serz;
+
+ ret = 0;
+ out:
+ return ret;
+}
+
+/**
+ * stripe_get_matching_bs - Get the matching block size for the given path.
+ */
+int32_t
+stripe_get_matching_bs (const char *path, stripe_private_t *priv)
+{
+ struct stripe_options *trav = NULL;
+ uint64_t block_size = 0;
+
+ GF_VALIDATE_OR_GOTO ("stripe", priv, out);
+ GF_VALIDATE_OR_GOTO ("stripe", path, out);
+
+ LOCK (&priv->lock);
+ {
+ block_size = priv->block_size;
+ trav = priv->pattern;
+ while (trav) {
+ if (!fnmatch (trav->path_pattern, path, FNM_NOESCAPE)) {
+ block_size = trav->block_size;
+ break;
+ }
+ trav = trav->next;
+ }
+ }
+ UNLOCK (&priv->lock);
+
+out:
+ return block_size;
+}
+
+int32_t
+stripe_ctx_handle (xlator_t *this, call_frame_t *prev, stripe_local_t *local,
+ dict_t *dict)
+{
+ char key[256] = {0,};
+ data_t *data = NULL;
+ int32_t index = 0;
+ stripe_private_t *priv = NULL;
+
+ priv = this->private;
+
+
+ if (!local->fctx) {
+ local->fctx = GF_CALLOC (1, sizeof (stripe_fd_ctx_t),
+ gf_stripe_mt_stripe_fd_ctx_t);
+ if (!local->fctx) {
+ local->op_errno = ENOMEM;
+ local->op_ret = -1;
+ goto out;
+ }
+
+ local->fctx->static_array = 0;
+ }
+ /* Stripe block size */
+ sprintf (key, "trusted.%s.stripe-size", this->name);
+ data = dict_get (dict, key);
+ if (!data) {
+ local->xattr_self_heal_needed = 1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to get stripe-size");
+ goto out;
+ } else {
+ if (!local->fctx->stripe_size) {
+ local->fctx->stripe_size =
+ data_to_int64 (data);
+ }
+
+ if (local->fctx->stripe_size != data_to_int64 (data)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "stripe-size mismatch in blocks");
+ local->xattr_self_heal_needed = 1;
+ }
+ }
+
+ /* Stripe count */
+ sprintf (key, "trusted.%s.stripe-count", this->name);
+ data = dict_get (dict, key);
+
+ if (!data) {
+ local->xattr_self_heal_needed = 1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to get stripe-count");
+ goto out;
+ }
+ if (!local->fctx->xl_array) {
+ local->fctx->stripe_count = data_to_int32 (data);
+ if (!local->fctx->stripe_count) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "error with stripe-count xattr");
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto out;
+ }
+
+ local->fctx->xl_array = GF_CALLOC (local->fctx->stripe_count,
+ sizeof (xlator_t *),
+ gf_stripe_mt_xlator_t);
+
+ if (!local->fctx->xl_array) {
+ local->op_errno = ENOMEM;
+ local->op_ret = -1;
+ goto out;
+ }
+ }
+ if (local->fctx->stripe_count != data_to_int32 (data)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "error with stripe-count xattr (%d != %d)",
+ local->fctx->stripe_count, data_to_int32 (data));
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto out;
+ }
+
+ /* index */
+ sprintf (key, "trusted.%s.stripe-index", this->name);
+ data = dict_get (dict, key);
+ if (!data) {
+ local->xattr_self_heal_needed = 1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to get stripe-index");
+ goto out;
+ }
+ index = data_to_int32 (data);
+ if (index > priv->child_count) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "error with stripe-index xattr (%d)", index);
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto out;
+ }
+ if (local->fctx->xl_array) {
+ if (!local->fctx->xl_array[index])
+ local->fctx->xl_array[index] = prev->this;
+ }
+
+ sprintf(key, "trusted.%s.stripe-coalesce", this->name);
+ data = dict_get(dict, key);
+ if (!data) {
+ /*
+ * The file was probably created prior to coalesce support.
+ * Assume non-coalesce mode for this file to maintain backwards
+ * compatibility.
+ */
+ gf_log(this->name, GF_LOG_DEBUG, "missing stripe-coalesce "
+ "attr, assume non-coalesce mode");
+ local->fctx->stripe_coalesce = 0;
+ } else {
+ local->fctx->stripe_coalesce = data_to_int32(data);
+ }
+
+
+out:
+ return 0;
+}
+
+int32_t
+stripe_xattr_request_build (xlator_t *this, dict_t *dict, uint64_t stripe_size,
+ uint32_t stripe_count, uint32_t stripe_index,
+ uint32_t stripe_coalesce)
+{
+ char key[256] = {0,};
+ int32_t ret = -1;
+
+ sprintf (key, "trusted.%s.stripe-size", this->name);
+ ret = dict_set_int64 (dict, key, stripe_size);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set %s in xattr_req dict", key);
+ goto out;
+ }
+
+ sprintf (key, "trusted.%s.stripe-count", this->name);
+ ret = dict_set_int32 (dict, key, stripe_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set %s in xattr_req dict", key);
+ goto out;
+ }
+
+ sprintf (key, "trusted.%s.stripe-index", this->name);
+ ret = dict_set_int32 (dict, key, stripe_index);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set %s in xattr_req dict", key);
+ goto out;
+ }
+
+ sprintf(key, "trusted.%s.stripe-coalesce", this->name);
+ ret = dict_set_int32(dict, key, stripe_coalesce);
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "failed to set %s in xattr_req_dict", key);
+ goto out;
+ }
+out:
+ return ret;
+}
+
+
+static int
+set_default_block_size (stripe_private_t *priv, char *num)
+{
+
+ int ret = -1;
+ GF_VALIDATE_OR_GOTO ("stripe", THIS, out);
+ GF_VALIDATE_OR_GOTO (THIS->name, priv, out);
+ GF_VALIDATE_OR_GOTO (THIS->name, num, out);
+
+
+ if (gf_string2bytesize (num, &priv->block_size) != 0) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "invalid number format \"%s\"", num);
+ goto out;
+ }
+
+ ret = 0;
+
+ out:
+ return ret;
+
+}
+
+
+int
+set_stripe_block_size (xlator_t *this, stripe_private_t *priv, char *data)
+{
+ int ret = -1;
+ char *tmp_str = NULL;
+ char *tmp_str1 = NULL;
+ char *dup_str = NULL;
+ char *stripe_str = NULL;
+ char *pattern = NULL;
+ char *num = NULL;
+ struct stripe_options *temp_stripeopt = NULL;
+ struct stripe_options *stripe_opt = NULL;
+
+ if (!this || !priv || !data)
+ goto out;
+
+ /* Get the pattern for striping.
+ "option block-size *avi:10MB" etc */
+ stripe_str = strtok_r (data, ",", &tmp_str);
+ while (stripe_str) {
+ dup_str = gf_strdup (stripe_str);
+ stripe_opt = GF_CALLOC (1, sizeof (struct stripe_options),
+ gf_stripe_mt_stripe_options);
+ if (!stripe_opt) {
+ goto out;
+ }
+
+ pattern = strtok_r (dup_str, ":", &tmp_str1);
+ num = strtok_r (NULL, ":", &tmp_str1);
+ if (!num) {
+ num = pattern;
+ pattern = "*";
+ ret = set_default_block_size (priv, num);
+ if (ret)
+ goto out;
+ }
+ if (gf_string2bytesize (num, &stripe_opt->block_size) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "invalid number format \"%s\"", num);
+ goto out;
+ }
+
+ if (stripe_opt->block_size < STRIPE_MIN_BLOCK_SIZE) {
+ gf_log (this->name, GF_LOG_ERROR, "Invalid Block-size: "
+ "%s. Should be atleast %llu bytes", num,
+ STRIPE_MIN_BLOCK_SIZE);
+ goto out;
+ }
+ if (stripe_opt->block_size % 512) {
+ gf_log (this->name, GF_LOG_ERROR, "Block-size: %s should"
+ " be a multiple of 512 bytes", num);
+ goto out;
+ }
+
+ memcpy (stripe_opt->path_pattern, pattern, strlen (pattern));
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "block-size : pattern %s : size %"PRId64,
+ stripe_opt->path_pattern, stripe_opt->block_size);
+
+ if (priv->pattern)
+ temp_stripeopt = NULL;
+ else
+ temp_stripeopt = priv->pattern;
+
+ stripe_opt->next = temp_stripeopt;
+
+ priv->pattern = stripe_opt;
+ stripe_opt = NULL;
+
+ GF_FREE (dup_str);
+ dup_str = NULL;
+
+ stripe_str = strtok_r (NULL, ",", &tmp_str);
+ }
+
+ ret = 0;
+out:
+
+ GF_FREE (dup_str);
+
+ GF_FREE (stripe_opt);
+
+ return ret;
+}
+
+int32_t
+stripe_iatt_merge (struct iatt *from, struct iatt *to)
+{
+ if (to->ia_size < from->ia_size)
+ to->ia_size = from->ia_size;
+ if (to->ia_mtime < from->ia_mtime)
+ to->ia_mtime = from->ia_mtime;
+ if (to->ia_ctime < from->ia_ctime)
+ to->ia_ctime = from->ia_ctime;
+ if (to->ia_atime < from->ia_atime)
+ to->ia_atime = from->ia_atime;
+ return 0;
+}
+
+off_t
+coalesced_offset(off_t offset, uint64_t stripe_size, int stripe_count)
+{
+ size_t line_size = 0;
+ uint64_t stripe_num = 0;
+ off_t coalesced_offset = 0;
+
+ line_size = stripe_size * stripe_count;
+ stripe_num = offset / line_size;
+
+ coalesced_offset = (stripe_num * stripe_size) +
+ (offset % stripe_size);
+
+ return coalesced_offset;
+}
+
+off_t
+uncoalesced_size(off_t size, uint64_t stripe_size, int stripe_count,
+ int stripe_index)
+{
+ uint64_t nr_full_stripe_chunks = 0, mod = 0;
+
+ if (!size)
+ return size;
+
+ /*
+ * Estimate the number of fully written stripes from the
+ * local file size. Each stripe_size chunk corresponds to
+ * a stripe.
+ */
+ nr_full_stripe_chunks = (size / stripe_size) * stripe_count;
+ mod = size % stripe_size;
+
+ if (!mod) {
+ /*
+ * There is no remainder, thus we could have overestimated
+ * the size of the file in terms of chunks. Trim the number
+ * of chunks by the following stripe members and leave it
+ * up to those nodes to respond with a larger size (if
+ * necessary).
+ */
+ nr_full_stripe_chunks -= stripe_count -
+ (stripe_index + 1);
+ size = nr_full_stripe_chunks * stripe_size;
+ } else {
+ /*
+ * There is a remainder and thus we own the last chunk of the
+ * file. Add the preceding stripe members of the final stripe
+ * along with the remainder to calculate the exact size.
+ */
+ nr_full_stripe_chunks += stripe_index;
+ size = nr_full_stripe_chunks * stripe_size + mod;
+ }
+
+ return size;
+}
+
diff --git a/xlators/cluster/stripe/src/stripe-mem-types.h b/xlators/cluster/stripe/src/stripe-mem-types.h
new file mode 100644
index 000000000..e9ac9cf46
--- /dev/null
+++ b/xlators/cluster/stripe/src/stripe-mem-types.h
@@ -0,0 +1,31 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef __STRIPE_MEM_TYPES_H__
+#define __STRIPE_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_stripe_mem_types_ {
+ gf_stripe_mt_iovec = gf_common_mt_end + 1,
+ gf_stripe_mt_stripe_replies,
+ gf_stripe_mt_stripe_fd_ctx_t,
+ gf_stripe_mt_char,
+ gf_stripe_mt_int8_t,
+ gf_stripe_mt_int32_t,
+ gf_stripe_mt_xlator_t,
+ gf_stripe_mt_stripe_private_t,
+ gf_stripe_mt_stripe_options,
+ gf_stripe_mt_xattr_sort_t,
+ gf_stripe_mt_end
+};
+#endif
+
diff --git a/xlators/cluster/stripe/src/stripe.c b/xlators/cluster/stripe/src/stripe.c
index ee230a9c3..69b510e23 100644
--- a/xlators/cluster/stripe/src/stripe.c
+++ b/xlators/cluster/stripe/src/stripe.c
@@ -1,305 +1,182 @@
/*
- Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
/**
* xlators/cluster/stripe:
- * Stripe translator, stripes the data accross its child nodes,
- * as per the options given in the volfile. The striping works
- * fairly simple. It writes files at different offset as per
- * calculation. So, 'ls -l' output at the real posix level will
- * show file size bigger than the actual size. But when one does
+ * Stripe translator, stripes the data across its child nodes,
+ * as per the options given in the volfile. The striping works
+ * fairly simple. It writes files at different offset as per
+ * calculation. So, 'ls -l' output at the real posix level will
+ * show file size bigger than the actual size. But when one does
* 'df' or 'du <file>', real size of the file on the server is shown.
*
* WARNING:
* Stripe translator can't regenerate data if a child node gets disconnected.
- * So, no 'self-heal' for stripe. Hence the advice, use stripe only when its
- * very much necessary, or else, use it in combination with AFR, to have a
- * backup copy.
- */
-
-/* TODO:
- * 1. Implement basic self-heal ability to manage the basic backend
- * layout missmatch.
- *
+ * So, no 'self-heal' for stripe. Hence the advice, use stripe only when its
+ * very much necessary, or else, use it in combination with AFR, to have a
+ * backup copy.
*/
+#include <fnmatch.h>
#include "stripe.h"
+#include "libxlator.h"
+#include "byte-order.h"
+#include "statedump.h"
-/**
- * stripe_get_matching_bs - Get the matching block size for the given path.
- */
-int32_t
-stripe_get_matching_bs (const char *path, struct stripe_options *opts,
- uint64_t default_bs)
-{
- struct stripe_options *trav = NULL;
- char *pathname = NULL;
- uint64_t block_size = 0;
-
- block_size = default_bs;
- pathname = strdup (path);
- trav = opts;
-
- while (trav) {
- if (!fnmatch (trav->path_pattern, pathname, FNM_NOESCAPE)) {
- block_size = trav->block_size;
- break;
- }
- trav = trav->next;
- }
- free (pathname);
-
- return block_size;
-}
-
+struct volume_options options[];
-/*
- * stripe_common_cbk -
- */
int32_t
-stripe_common_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-/**
- * stripe_stack_unwind_cbk - This function is used for all the _cbk without
- * any extra arguments (other than the minimum given)
- * This is called from functions like fsync,unlink,rmdir etc.
- *
- */
-int32_t
-stripe_stack_unwind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+stripe_sh_chown_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
{
- int32_t callcnt = 0;
+ int callcnt = -1;
stripe_local_t *local = NULL;
+ if (!this || !frame || !frame->local) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
local = frame->local;
LOCK (&frame->lock);
{
callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s returned %s",
- ((call_frame_t *)cookie)->this->name,
- strerror (op_errno));
- local->op_errno = op_errno;
- if (op_errno == ENOTCONN)
- local->failed = 1;
- }
- if (op_ret >= 0)
- local->op_ret = op_ret;
}
UNLOCK (&frame->lock);
if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- if (local->loc.path)
- loc_wipe (&local->loc);
- if (local->loc2.path)
- loc_wipe (&local->loc2);
-
- STACK_UNWIND (frame, local->op_ret, local->op_errno);
+ STRIPE_STACK_DESTROY (frame);
}
+out:
return 0;
}
-int32_t
-stripe_common_buf_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
-
-/**
- * stripe_stack_unwind_buf_cbk - This function is used for all the _cbk with
- * 'struct stat *buf' as extra argument (other than minimum)
- * This is called from functions like, chmod, fchmod, chown, fchown,
- * truncate, ftruncate, utimens etc.
- *
- * @cookie - this argument should be always 'xlator_t *' of child node
- */
-int32_t
-stripe_stack_unwind_buf_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct stat *buf)
+int32_t
+stripe_sh_make_entry_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- int32_t callcnt = 0;
stripe_local_t *local = NULL;
+ call_frame_t *prev = NULL;
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s returned error %s",
- ((call_frame_t *)cookie)->this->name,
- strerror (op_errno));
- local->op_errno = op_errno;
- if (op_errno == ENOTCONN)
- local->failed = 1;
- }
-
- if (op_ret == 0) {
- local->op_ret = 0;
- if (local->stbuf.st_blksize == 0) {
- local->stbuf = *buf;
- /* Because st_blocks gets added again */
- local->stbuf.st_blocks = 0;
- }
-
- if (FIRST_CHILD(this) ==
- ((call_frame_t *)cookie)->this) {
- /* Always, pass the inode number of
- first child to the above layer */
- local->stbuf.st_ino = buf->st_ino;
- local->stbuf.st_mtime = buf->st_mtime;
- }
-
- local->stbuf.st_blocks += buf->st_blocks;
- if (local->stbuf.st_size < buf->st_size)
- local->stbuf.st_size = buf->st_size;
- if (local->stbuf.st_blksize != buf->st_blksize) {
- /* TODO: add to blocks in terms of
- original block size */
- }
- }
+ if (!frame || !frame->local || !cookie || !this) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
}
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
- if (local->loc.path)
- loc_wipe (&local->loc);
- if (local->loc2.path)
- loc_wipe (&local->loc2);
+ prev = cookie;
+ local = frame->local;
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->stbuf);
- }
+ STACK_WIND (frame, stripe_sh_chown_cbk, prev->this,
+ prev->this->fops->setattr, &local->loc,
+ &local->stbuf, (GF_SET_ATTR_UID | GF_SET_ATTR_GID), NULL);
+out:
return 0;
}
-/* In case of symlink, mknod, the file is created on just first node */
-int32_t
-stripe_common_inode_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct stat *buf)
+int32_t
+stripe_entry_self_heal (call_frame_t *frame, xlator_t *this,
+ stripe_local_t *local)
{
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
- return 0;
-}
+ xlator_list_t *trav = NULL;
+ call_frame_t *rframe = NULL;
+ stripe_local_t *rlocal = NULL;
+ stripe_private_t *priv = NULL;
+ dict_t *xdata = NULL;
+ int ret = 0;
-/**
- * stripe_stack_unwind_inode_cbk - This is called by the function like,
- * link (), symlink (), mkdir (), mknod ()
- * This creates a inode for new inode. It keeps a list of all
- * the inodes received from the child nodes. It is used while
- * forwarding any fops to child nodes.
- *
- */
-int32_t
-stripe_stack_unwind_inode_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, inode_t *inode,
- struct stat *buf)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
+ if (!local || !this || !frame) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
- local = frame->local;
+ if (!(IA_ISREG (local->stbuf.ia_type) ||
+ IA_ISDIR (local->stbuf.ia_type)))
+ return 0;
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s returned error %s",
- ((call_frame_t *)cookie)->this->name,
- strerror (op_errno));
- local->op_errno = op_errno;
- if (op_errno == ENOTCONN)
- local->failed = 1;
- }
-
- if (op_ret >= 0) {
- local->op_ret = 0;
+ priv = this->private;
+ trav = this->children;
+ rframe = copy_frame (frame);
+ if (!rframe) {
+ goto out;
+ }
+ rlocal = mem_get0 (this->local_pool);
+ if (!rlocal) {
+ goto out;
+ }
+ rframe->local = rlocal;
+ rlocal->call_count = priv->child_count;
+ loc_copy (&rlocal->loc, &local->loc);
+ memcpy (&rlocal->stbuf, &local->stbuf, sizeof (struct iatt));
- if (local->stbuf.st_blksize == 0) {
- local->inode = inode;
- local->stbuf = *buf;
- /* Because st_blocks gets added again */
- local->stbuf.st_blocks = 0;
- }
- if (FIRST_CHILD(this) ==
- ((call_frame_t *)cookie)->this) {
- local->stbuf.st_ino = buf->st_ino;
- local->stbuf.st_mtime = buf->st_mtime;
- }
+ xdata = dict_new ();
+ if (!xdata)
+ goto out;
- local->stbuf.st_blocks += buf->st_blocks;
- if (local->stbuf.st_size < buf->st_size)
- local->stbuf.st_size = buf->st_size;
- if (local->stbuf.st_blksize != buf->st_blksize) {
- /* TODO: add to blocks in terms of
- original block size */
- }
+ ret = dict_set_static_bin (xdata, "gfid-req", local->stbuf.ia_gfid, 16);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to set gfid-req", local->loc.path);
+
+ while (trav) {
+ if (IA_ISREG (local->stbuf.ia_type)) {
+ STACK_WIND (rframe, stripe_sh_make_entry_cbk,
+ trav->xlator, trav->xlator->fops->mknod,
+ &local->loc,
+ st_mode_from_ia (local->stbuf.ia_prot,
+ local->stbuf.ia_type),
+ 0, 0, xdata);
}
+ if (IA_ISDIR (local->stbuf.ia_type)) {
+ STACK_WIND (rframe, stripe_sh_make_entry_cbk,
+ trav->xlator, trav->xlator->fops->mkdir,
+ &local->loc,
+ st_mode_from_ia (local->stbuf.ia_prot,
+ local->stbuf.ia_type),
+ 0, xdata);
+ }
+ trav = trav->next;
}
- UNLOCK (&frame->lock);
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
+ if (xdata)
+ dict_unref (xdata);
+ return 0;
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- local->inode, &local->stbuf);
- }
+out:
+ if (rframe)
+ STRIPE_STACK_DESTROY (rframe);
+ if (xdata)
+ dict_unref (xdata);
return 0;
}
-int32_t
-stripe_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+
+int32_t
+stripe_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct stat *buf, dict_t *dict)
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
{
- int32_t callcnt = 0;
- dict_t *tmp_dict = NULL;
- inode_t *tmp_inode = NULL;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
+ int32_t callcnt = 0;
+ stripe_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int ret = 0;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
prev = cookie;
local = frame->local;
@@ -307,89 +184,115 @@ stripe_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
LOCK (&frame->lock);
{
callcnt = --local->call_count;
-
+
if (op_ret == -1) {
if (op_errno != ENOENT)
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_DEBUG,
"%s returned error %s",
prev->this->name,
strerror (op_errno));
if (local->op_errno != ESTALE)
local->op_errno = op_errno;
- if ((op_errno == ENOTCONN) || (op_errno == ESTALE))
+ if (((op_errno != ENOENT) && (op_errno != ENOTCONN)) ||
+ (prev->this == FIRST_CHILD (this)))
local->failed = 1;
- /* TODO: bring in self-heal ability */
- /*
- * if (local->op_ret == 0) {
- * if (S_ISREG (local->stbuf.st_mode) ||
- * S_ISDIR (local->stbuf.st_mode))
- * local->entry_self_heal_needed = 1;
- * }
- */
- }
-
+ if (op_errno == ENOENT)
+ local->entry_self_heal_needed = 1;
+ }
+
if (op_ret >= 0) {
local->op_ret = 0;
+ if (IA_ISREG (buf->ia_type)) {
+ ret = stripe_ctx_handle (this, prev, local,
+ xdata);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error getting fctx info from"
+ " dict");
+ }
- if (local->stbuf.st_blksize == 0) {
+ if (FIRST_CHILD(this) == prev->this) {
+ local->stbuf = *buf;
+ local->postparent = *postparent;
local->inode = inode_ref (inode);
- local->stbuf = *buf;
- /* Because st_blocks gets added again */
- local->stbuf.st_blocks = 0;
+ if (xdata)
+ local->xdata = dict_ref (xdata);
+ if (local->xattr) {
+ stripe_aggregate_xattr (local->xdata,
+ local->xattr);
+ dict_unref (local->xattr);
+ local->xattr = NULL;
+ }
}
- if (FIRST_CHILD(this) == prev->this) {
- local->stbuf.st_ino = buf->st_ino;
- local->stbuf.st_mtime = buf->st_mtime;
- if (local->dict)
- dict_unref (local->dict);
- local->dict = dict_ref (dict);
- } else {
- if (!local->dict)
- local->dict = dict_ref (dict);
+
+ if (!local->xdata && !local->xattr) {
+ local->xattr = dict_ref (xdata);
+ } else if (local->xdata) {
+ stripe_aggregate_xattr (local->xdata, xdata);
+ } else if (local->xattr) {
+ stripe_aggregate_xattr (local->xattr, xdata);
}
- local->stbuf.st_blocks += buf->st_blocks;
- if (local->stbuf.st_size < buf->st_size)
- local->stbuf.st_size = buf->st_size;
- if (local->stbuf.st_blksize != buf->st_blksize) {
- /* TODO: add to blocks in terms of
- original block size */
+
+ local->stbuf_blocks += buf->ia_blocks;
+ local->postparent_blocks += postparent->ia_blocks;
+
+ correct_file_size(buf, local->fctx, prev);
+
+ if (local->stbuf_size < buf->ia_size)
+ local->stbuf_size = buf->ia_size;
+ if (local->postparent_size < postparent->ia_size)
+ local->postparent_size = postparent->ia_size;
+
+ if (uuid_is_null (local->ia_gfid))
+ uuid_copy (local->ia_gfid, buf->ia_gfid);
+
+ /* Make sure the gfid on all the nodes are same */
+ if (uuid_compare (local->ia_gfid, buf->ia_gfid)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: gfid different on subvolume %s",
+ local->loc.path, prev->this->name);
}
}
}
UNLOCK (&frame->lock);
if (!callcnt) {
+ if (local->op_ret == 0 && local->entry_self_heal_needed &&
+ !uuid_is_null (local->loc.inode->gfid))
+ stripe_entry_self_heal (frame, this, local);
+
if (local->failed)
local->op_ret = -1;
- tmp_dict = local->dict;
- tmp_inode = local->inode;
-
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- local->inode, &local->stbuf, local->dict);
+ if (local->op_ret != -1) {
+ local->stbuf.ia_blocks = local->stbuf_blocks;
+ local->stbuf.ia_size = local->stbuf_size;
+ local->postparent.ia_blocks = local->postparent_blocks;
+ local->postparent.ia_size = local->postparent_size;
+ inode_ctx_put (local->inode, this,
+ (uint64_t) (long)local->fctx);
+ }
- if (tmp_inode)
- inode_unref (tmp_inode);
- if (tmp_dict)
- dict_unref (tmp_dict);
+ STRIPE_STACK_UNWIND (lookup, frame, local->op_ret,
+ local->op_errno, local->inode,
+ &local->stbuf, local->xdata,
+ &local->postparent);
}
-
+out:
return 0;
}
-
-/**
- * stripe_lookup -
- */
-int32_t
-stripe_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *xattr_req)
+int32_t
+stripe_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
{
- stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
- stripe_private_t *priv = NULL;
- char send_lookup_to_all = 0;
- int32_t op_errno = 1;
+ stripe_local_t *local = NULL;
+ xlator_list_t *trav = NULL;
+ stripe_private_t *priv = NULL;
+ int32_t op_errno = EINVAL;
+ int64_t filesize = 0;
+ int ret = 0;
+ uint64_t tmpctx = 0;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -401,172 +304,136 @@ stripe_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
trav = this->children;
/* Initialization */
- local = CALLOC (1, sizeof (stripe_local_t));
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
local->op_ret = -1;
frame->local = local;
+ loc_copy (&local->loc, loc);
- if ((!loc->inode->st_mode) || S_ISDIR (loc->inode->st_mode) ||
- S_ISREG (loc->inode->st_mode)) {
- send_lookup_to_all = 1;
- }
+ inode_ctx_get (local->inode, this, &tmpctx);
+ if (tmpctx)
+ local->fctx = (stripe_fd_ctx_t*) (long)tmpctx;
- if (send_lookup_to_all) {
- /* Everytime in stripe lookup, all child nodes
- should be looked up */
- local->call_count = priv->child_count;
- while (trav) {
- STACK_WIND (frame, stripe_lookup_cbk, trav->xlator,
- trav->xlator->fops->lookup,
- loc, xattr_req);
- trav = trav->next;
- }
- } else {
- local->call_count = 1;
-
- STACK_WIND (frame, stripe_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup,
- loc, xattr_req);
+ /* quick-read friendly changes */
+ if (xdata && dict_get (xdata, GF_CONTENT_KEY)) {
+ ret = dict_get_int64 (xdata, GF_CONTENT_KEY, &filesize);
+ if (!ret && (filesize > priv->block_size))
+ dict_del (xdata, GF_CONTENT_KEY);
}
-
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-/**
- * stripe_stat -
- */
-int32_t
-stripe_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
-{
- int send_lookup_to_all = 0;
- xlator_list_t *trav = NULL;
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- int32_t op_errno = 1;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->path, err);
- VALIDATE_OR_GOTO (loc->inode, err);
-
- priv = this->private;
- trav = this->children;
+ /* get stripe-size xattr on lookup. This would be required for
+ * open/read/write/pathinfo calls. Hence we send down the request
+ * even when type == IA_INVAL */
+
+ /*
+ * We aren't guaranteed to have xdata here. We need the format info for
+ * the file, so allocate xdata if necessary.
+ */
+ if (!xdata)
+ xdata = dict_new();
+ else
+ xdata = dict_ref(xdata);
+
+ if (xdata && (IA_ISREG (loc->inode->ia_type) ||
+ (loc->inode->ia_type == IA_INVAL))) {
+ ret = stripe_xattr_request_build (this, xdata, 8, 4, 4, 0);
+ if (ret)
+ gf_log (this->name , GF_LOG_ERROR, "Failed to build"
+ " xattr request for %s", loc->path);
- if (priv->first_child_down) {
- op_errno = ENOTCONN;
- goto err;
}
- if (S_ISDIR (loc->inode->st_mode) || S_ISREG (loc->inode->st_mode))
- send_lookup_to_all = 1;
-
- if (!send_lookup_to_all) {
- STACK_WIND (frame, stripe_common_buf_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat, loc);
- } else {
- /* Initialization */
- local = CALLOC (1, sizeof (stripe_local_t));
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- local->inode = loc->inode;
- local->call_count = priv->child_count;
-
- while (trav) {
- STACK_WIND (frame, stripe_stack_unwind_buf_cbk,
- trav->xlator, trav->xlator->fops->stat,
- loc);
- trav = trav->next;
- }
+ /* Everytime in stripe lookup, all child nodes
+ should be looked up */
+ local->call_count = priv->child_count;
+ while (trav) {
+ STACK_WIND (frame, stripe_lookup_cbk, trav->xlator,
+ trav->xlator->fops->lookup, loc, xdata);
+ trav = trav->next;
}
+ dict_unref(xdata);
+
return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
+err:
+ STRIPE_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
return 0;
}
-/**
- * stripe_chmod -
- */
int32_t
-stripe_chmod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode)
+stripe_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
- int send_fop_to_all = 0;
- xlator_list_t *trav = NULL;
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- int32_t op_errno = 1;
+ int32_t callcnt = 0;
+ stripe_local_t *local = NULL;
+ call_frame_t *prev = NULL;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->path, err);
- VALIDATE_OR_GOTO (loc->inode, err);
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+ prev = cookie;
+ local = frame->local;
- priv = this->private;
- trav = this->children;
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
- if (priv->first_child_down) {
- op_errno = ENOTCONN;
- goto err;
- }
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s returned error %s",
+ prev->this->name, strerror (op_errno));
+ local->op_errno = op_errno;
+ if ((op_errno != ENOENT) ||
+ (prev->this == FIRST_CHILD (this)))
+ local->failed = 1;
+ }
- if (S_ISDIR (loc->inode->st_mode) || S_ISREG (loc->inode->st_mode))
- send_fop_to_all = 1;
+ if (op_ret == 0) {
+ local->op_ret = 0;
- if (!send_fop_to_all) {
- STACK_WIND (frame, stripe_common_buf_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->chmod, loc, mode);
- } else {
- /* Initialization */
- local = CALLOC (1, sizeof (stripe_local_t));
- if (!local) {
- op_errno = ENOMEM;
- goto err;
+ if (FIRST_CHILD(this) == prev->this) {
+ local->stbuf = *buf;
+ }
+
+ local->stbuf_blocks += buf->ia_blocks;
+
+ correct_file_size(buf, local->fctx, prev);
+
+ if (local->stbuf_size < buf->ia_size)
+ local->stbuf_size = buf->ia_size;
}
- local->op_ret = -1;
- frame->local = local;
- local->inode = loc->inode;
- local->call_count = priv->child_count;
+ }
+ UNLOCK (&frame->lock);
- while (trav) {
- STACK_WIND (frame, stripe_stack_unwind_buf_cbk,
- trav->xlator, trav->xlator->fops->chmod,
- loc, mode);
- trav = trav->next;
+ if (!callcnt) {
+ if (local->failed)
+ local->op_ret = -1;
+
+ if (local->op_ret != -1) {
+ local->stbuf.ia_size = local->stbuf_size;
+ local->stbuf.ia_blocks = local->stbuf_blocks;
}
+
+ STRIPE_STACK_UNWIND (stat, frame, local->op_ret,
+ local->op_errno, &local->stbuf, NULL);
}
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
+out:
return 0;
}
-
-/**
- * stripe_chown -
- */
int32_t
-stripe_chown (call_frame_t *frame, xlator_t *this, loc_t *loc, uid_t uid,
- gid_t gid)
+stripe_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- int send_fop_to_all = 0;
xlator_list_t *trav = NULL;
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
- int32_t op_errno = 1;
+ stripe_fd_ctx_t *fctx = NULL;
+ int32_t op_errno = EINVAL;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -582,49 +449,48 @@ stripe_chown (call_frame_t *frame, xlator_t *this, loc_t *loc, uid_t uid,
goto err;
}
- if (S_ISDIR (loc->inode->st_mode) || S_ISREG (loc->inode->st_mode))
- send_fop_to_all = 1;
+ /* Initialization */
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ local->op_ret = -1;
+ frame->local = local;
+ local->call_count = priv->child_count;
- if (!send_fop_to_all) {
- STACK_WIND (frame, stripe_common_buf_cbk, trav->xlator,
- trav->xlator->fops->chown, loc, uid, gid);
- } else {
- /* Initialization */
- local = CALLOC (1, sizeof (stripe_local_t));
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- local->inode = loc->inode;
- local->call_count = priv->child_count;
+ if (IA_ISREG(loc->inode->ia_type)) {
+ inode_ctx_get(loc->inode, this, (uint64_t *) &fctx);
+ if (!fctx)
+ goto err;
+ local->fctx = fctx;
+ }
- while (trav) {
- STACK_WIND (frame, stripe_stack_unwind_buf_cbk,
- trav->xlator, trav->xlator->fops->chown,
- loc, uid, gid);
- trav = trav->next;
- }
+ while (trav) {
+ STACK_WIND (frame, stripe_stat_cbk, trav->xlator,
+ trav->xlator->fops->stat, loc, NULL);
+ trav = trav->next;
}
return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
+
+err:
+ STRIPE_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL);
return 0;
}
-/**
- * stripe_statfs_cbk -
- */
int32_t
stripe_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *stbuf)
+ int32_t op_ret, int32_t op_errno, struct statvfs *stbuf, dict_t *xdata)
{
stripe_local_t *local = NULL;
int32_t callcnt = 0;
+ if (!this || !frame || !frame->local) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
local = frame->local;
LOCK(&frame->lock);
@@ -651,32 +517,32 @@ stripe_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
}
UNLOCK (&frame->lock);
-
+
if (!callcnt) {
- STACK_UNWIND (frame, local->op_ret,
- local->op_errno, &local->statvfs_buf);
+ STRIPE_STACK_UNWIND (statfs, frame, local->op_ret,
+ local->op_errno, &local->statvfs_buf, NULL);
}
-
+out:
return 0;
}
-
-/**
- * stripe_statfs -
- */
int32_t
-stripe_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
+stripe_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
stripe_local_t *local = NULL;
xlator_list_t *trav = NULL;
stripe_private_t *priv = NULL;
- int32_t op_errno = 1;
+ int32_t op_errno = EINVAL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
trav = this->children;
priv = this->private;
/* Initialization */
- local = CALLOC (1, sizeof (stripe_local_t));
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -688,28 +554,99 @@ stripe_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
local->call_count = priv->child_count;
while (trav) {
STACK_WIND (frame, stripe_statfs_cbk, trav->xlator,
- trav->xlator->fops->statfs, loc);
+ trav->xlator->fops->statfs, loc, NULL);
trav = trav->next;
}
return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
+err:
+ STRIPE_STACK_UNWIND (statfs, frame, -1, op_errno, NULL, NULL);
return 0;
}
-/**
- * stripe_truncate -
- */
+
int32_t
-stripe_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
+stripe_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ int32_t callcnt = 0;
+ stripe_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s returned error %s",
+ prev->this->name, strerror (op_errno));
+ local->op_errno = op_errno;
+ if ((op_errno != ENOENT) ||
+ (prev->this == FIRST_CHILD (this)))
+ local->failed = 1;
+ }
+
+ if (op_ret == 0) {
+ local->op_ret = 0;
+ if (FIRST_CHILD(this) == prev->this) {
+ local->pre_buf = *prebuf;
+ local->post_buf = *postbuf;
+ }
+
+ local->prebuf_blocks += prebuf->ia_blocks;
+ local->postbuf_blocks += postbuf->ia_blocks;
+
+ correct_file_size(prebuf, local->fctx, prev);
+ correct_file_size(postbuf, local->fctx, prev);
+
+ if (local->prebuf_size < prebuf->ia_size)
+ local->prebuf_size = prebuf->ia_size;
+
+ if (local->postbuf_size < postbuf->ia_size)
+ local->postbuf_size = postbuf->ia_size;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ if (local->failed)
+ local->op_ret = -1;
+
+ if (local->op_ret != -1) {
+ local->pre_buf.ia_blocks = local->prebuf_blocks;
+ local->pre_buf.ia_size = local->prebuf_size;
+ local->post_buf.ia_blocks = local->postbuf_blocks;
+ local->post_buf.ia_size = local->postbuf_size;
+ }
+
+ STRIPE_STACK_UNWIND (truncate, frame, local->op_ret,
+ local->op_errno, &local->pre_buf,
+ &local->post_buf, NULL);
+ }
+out:
+ return 0;
+}
+
+int32_t
+stripe_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, dict_t *xdata)
{
- int send_fop_to_all = 0;
- xlator_list_t *trav = NULL;
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
- int32_t op_errno = 1;
+ stripe_fd_ctx_t *fctx = NULL;
+ int32_t op_errno = EINVAL;
+ int i, eof_idx;
+ off_t dest_offset, tmp_offset;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -718,58 +655,157 @@ stripe_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
VALIDATE_OR_GOTO (loc->inode, err);
priv = this->private;
- trav = this->children;
if (priv->first_child_down) {
op_errno = ENOTCONN;
goto err;
}
- if (S_ISDIR (loc->inode->st_mode) || S_ISREG (loc->inode->st_mode))
- send_fop_to_all = 1;
+ /* Initialization */
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ local->op_ret = -1;
+ frame->local = local;
+ local->call_count = priv->child_count;
+
+ inode_ctx_get(loc->inode, this, (uint64_t *) &fctx);
+ if (!fctx) {
+ gf_log(this->name, GF_LOG_ERROR, "no stripe context");
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->fctx = fctx;
+ eof_idx = (offset / fctx->stripe_size) % fctx->stripe_count;
+
+ for (i = 0; i < fctx->stripe_count; i++) {
+ if (!fctx->xl_array[i]) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "no xlator at index %d", i);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (fctx->stripe_coalesce) {
+ /*
+ * The node that owns EOF is truncated to the exact
+ * coalesced offset. Nodes prior to this index should
+ * be rounded up to the size of the complete stripe,
+ * while nodes after this index should be rounded down
+ * to the size of the previous stripe.
+ */
+ if (i < eof_idx)
+ tmp_offset = roof(offset, fctx->stripe_size *
+ fctx->stripe_count);
+ else if (i > eof_idx)
+ tmp_offset = floor(offset, fctx->stripe_size *
+ fctx->stripe_count);
+ else
+ tmp_offset = offset;
+
+ dest_offset = coalesced_offset(tmp_offset,
+ fctx->stripe_size, fctx->stripe_count);
+ } else {
+ dest_offset = offset;
+ }
+
+ STACK_WIND(frame, stripe_truncate_cbk, fctx->xl_array[i],
+ fctx->xl_array[i]->fops->truncate, loc, dest_offset,
+ NULL);
+ }
- if (!send_fop_to_all) {
- STACK_WIND (frame, stripe_common_buf_cbk, trav->xlator,
- trav->xlator->fops->truncate, loc, offset);
- } else {
- /* Initialization */
- local = CALLOC (1, sizeof (stripe_local_t));
- if (!local) {
- op_errno = ENOMEM;
- goto err;
+ return 0;
+err:
+ STRIPE_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+
+int32_t
+stripe_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
+{
+ int32_t callcnt = 0;
+ stripe_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s returned error %s",
+ prev->this->name, strerror (op_errno));
+ local->op_errno = op_errno;
+ if ((op_errno != ENOENT) ||
+ (prev->this == FIRST_CHILD (this)))
+ local->failed = 1;
}
- local->op_ret = -1;
- frame->local = local;
- local->inode = loc->inode;
- local->call_count = priv->child_count;
-
- while (trav) {
- STACK_WIND (frame, stripe_stack_unwind_buf_cbk,
- trav->xlator, trav->xlator->fops->truncate,
- loc, offset);
- trav = trav->next;
+
+ if (op_ret == 0) {
+ local->op_ret = 0;
+
+ if (FIRST_CHILD(this) == prev->this) {
+ local->pre_buf = *preop;
+ local->post_buf = *postop;
+ }
+
+ local->prebuf_blocks += preop->ia_blocks;
+ local->postbuf_blocks += postop->ia_blocks;
+
+ correct_file_size(preop, local->fctx, prev);
+ correct_file_size(postop, local->fctx, prev);
+
+ if (local->prebuf_size < preop->ia_size)
+ local->prebuf_size = preop->ia_size;
+ if (local->postbuf_size < postop->ia_size)
+ local->postbuf_size = postop->ia_size;
}
}
+ UNLOCK (&frame->lock);
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
+ if (!callcnt) {
+ if (local->failed)
+ local->op_ret = -1;
+
+ if (local->op_ret != -1) {
+ local->pre_buf.ia_blocks = local->prebuf_blocks;
+ local->pre_buf.ia_size = local->prebuf_size;
+ local->post_buf.ia_blocks = local->postbuf_blocks;
+ local->post_buf.ia_size = local->postbuf_size;
+ }
+
+ STRIPE_STACK_UNWIND (setattr, frame, local->op_ret,
+ local->op_errno, &local->pre_buf,
+ &local->post_buf, NULL);
+ }
+out:
return 0;
}
-/**
- * stripe_utimens -
- */
-int32_t
-stripe_utimens (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct timespec tv[2])
+int32_t
+stripe_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- int send_fop_to_all = 0;
xlator_list_t *trav = NULL;
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
- int32_t op_errno = 1;
+ stripe_fd_ctx_t *fctx = NULL;
+ int32_t op_errno = EINVAL;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -785,46 +821,175 @@ stripe_utimens (call_frame_t *frame, xlator_t *this, loc_t *loc,
goto err;
}
- if (S_ISDIR (loc->inode->st_mode) || S_ISREG (loc->inode->st_mode))
- send_fop_to_all = 1;
+ /* Initialization */
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ local->op_ret = -1;
+ frame->local = local;
+ if (!IA_ISDIR (loc->inode->ia_type) &&
+ !IA_ISREG (loc->inode->ia_type)) {
+ local->call_count = 1;
+ STACK_WIND (frame, stripe_setattr_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->setattr,
+ loc, stbuf, valid, NULL);
+ return 0;
+ }
- if (!send_fop_to_all) {
- STACK_WIND (frame, stripe_common_buf_cbk, trav->xlator,
- trav->xlator->fops->utimens, loc, tv);
- } else {
- /* Initialization */
- local = CALLOC (1, sizeof (stripe_local_t));
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- local->inode = loc->inode;
- local->call_count = priv->child_count;
-
- while (trav) {
- STACK_WIND (frame, stripe_stack_unwind_buf_cbk,
- trav->xlator, trav->xlator->fops->utimens,
- loc, tv);
- trav = trav->next;
- }
+ if (IA_ISREG(loc->inode->ia_type)) {
+ inode_ctx_get(loc->inode, this, (uint64_t *) &fctx);
+ if (!fctx)
+ goto err;
+ local->fctx = fctx;
+ }
+
+ local->call_count = priv->child_count;
+ while (trav) {
+ STACK_WIND (frame, stripe_setattr_cbk,
+ trav->xlator, trav->xlator->fops->setattr,
+ loc, stbuf, valid, NULL);
+ trav = trav->next;
}
return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
+err:
+ STRIPE_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
-int32_t
+int32_t
+stripe_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ stripe_local_t *local = NULL;
+ stripe_private_t *priv = NULL;
+ xlator_list_t *trav = NULL;
+ int32_t op_errno = EINVAL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (fd->inode, err);
+
+ priv = this->private;
+ trav = this->children;
+
+ /* Initialization */
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ local->op_ret = -1;
+ frame->local = local;
+ local->call_count = priv->child_count;
+
+ while (trav) {
+ STACK_WIND (frame, stripe_setattr_cbk, trav->xlator,
+ trav->xlator->fops->fsetattr, fd, stbuf, valid, NULL);
+ trav = trav->next;
+ }
+
+ return 0;
+err:
+ STRIPE_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+stripe_stack_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ int32_t callcnt = 0;
+ stripe_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s returned error %s",
+ prev->this->name, strerror (op_errno));
+ local->op_errno = op_errno;
+ if ((op_errno != ENOENT) ||
+ (prev->this == FIRST_CHILD (this)))
+ local->failed = 1;
+ }
+
+ if (op_ret == 0) {
+ local->op_ret = 0;
+
+ local->stbuf.ia_blocks += buf->ia_blocks;
+ local->preparent.ia_blocks += preoldparent->ia_blocks;
+ local->postparent.ia_blocks += postoldparent->ia_blocks;
+ local->pre_buf.ia_blocks += prenewparent->ia_blocks;
+ local->post_buf.ia_blocks += postnewparent->ia_blocks;
+
+ correct_file_size(buf, local->fctx, prev);
+
+ if (local->stbuf.ia_size < buf->ia_size)
+ local->stbuf.ia_size = buf->ia_size;
+
+ if (local->preparent.ia_size < preoldparent->ia_size)
+ local->preparent.ia_size = preoldparent->ia_size;
+
+ if (local->postparent.ia_size < postoldparent->ia_size)
+ local->postparent.ia_size = postoldparent->ia_size;
+
+ if (local->pre_buf.ia_size < prenewparent->ia_size)
+ local->pre_buf.ia_size = prenewparent->ia_size;
+
+ if (local->post_buf.ia_size < postnewparent->ia_size)
+ local->post_buf.ia_size = postnewparent->ia_size;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ if (local->failed)
+ local->op_ret = -1;
+
+ STRIPE_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
+ &local->stbuf, &local->preparent,
+ &local->postparent, &local->pre_buf,
+ &local->post_buf, NULL);
+ }
+out:
+ return 0;
+}
+
+int32_t
stripe_first_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
stripe_local_t *local = NULL;
xlator_list_t *trav = NULL;
+ if (!this || !frame || !frame->local) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ op_errno = EINVAL;
+ goto unwind;
+ }
+
if (op_ret == -1) {
goto unwind;
}
@@ -832,34 +997,39 @@ stripe_first_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
trav = this->children;
+ local->stbuf = *buf;
+ local->preparent = *preoldparent;
+ local->postparent = *postoldparent;
+ local->pre_buf = *prenewparent;
+ local->post_buf = *postnewparent;
+
local->op_ret = 0;
- local->stbuf = *buf;
local->call_count--;
- trav = trav->next; /* Skip first child */
+ trav = trav->next; /* Skip first child */
while (trav) {
- STACK_WIND (frame, stripe_stack_unwind_buf_cbk,
+ STACK_WIND (frame, stripe_stack_rename_cbk,
trav->xlator, trav->xlator->fops->rename,
- &local->loc, &local->loc2);
+ &local->loc, &local->loc2, NULL);
trav = trav->next;
}
return 0;
- unwind:
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+unwind:
+ STRIPE_STACK_UNWIND (rename, frame, -1, op_errno, buf, preoldparent,
+ postoldparent, prenewparent, postnewparent, NULL);
return 0;
}
-/**
- * stripe_rename -
- */
+
int32_t
-stripe_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
- loc_t *newloc)
+stripe_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
{
stripe_private_t *priv = NULL;
stripe_local_t *local = NULL;
xlator_list_t *trav = NULL;
- int32_t op_errno = 1;
+ stripe_fd_ctx_t *fctx = NULL;
+ int32_t op_errno = EINVAL;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -878,101 +1048,131 @@ stripe_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
}
/* Initialization */
- local = CALLOC (1, sizeof (stripe_local_t));
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
local->op_ret = -1;
- local->inode = oldloc->inode;
loc_copy (&local->loc, oldloc);
loc_copy (&local->loc2, newloc);
local->call_count = priv->child_count;
-
+
+ if (IA_ISREG(oldloc->inode->ia_type)) {
+ inode_ctx_get(oldloc->inode, this, (uint64_t *) &fctx);
+ if (!fctx)
+ goto err;
+ local->fctx = fctx;
+ }
+
frame->local = local;
STACK_WIND (frame, stripe_first_rename_cbk, trav->xlator,
- trav->xlator->fops->rename, oldloc, newloc);
+ trav->xlator->fops->rename, oldloc, newloc, NULL);
return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
+err:
+ STRIPE_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL, NULL);
return 0;
}
-
-
-/**
- * stripe_access -
- */
int32_t
-stripe_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask)
+stripe_first_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- int32_t op_errno = 1;
+ stripe_local_t *local = NULL;
+ call_frame_t *prev = NULL;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->path, err);
- VALIDATE_OR_GOTO (loc->inode, err);
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
- STACK_WIND (frame, stripe_common_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->access, loc, mask);
+ prev = cookie;
+ local = frame->local;
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG, "%s returned %s",
+ prev->this->name, strerror (op_errno));
+ goto out;
+ }
+ local->op_ret = 0;
+ local->preparent = *preparent;
+ local->postparent = *postparent;
+ local->preparent_blocks += preparent->ia_blocks;
+ local->postparent_blocks += postparent->ia_blocks;
+
+ STRIPE_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
+ &local->preparent, &local->postparent, xdata);
return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno);
+out:
+ STRIPE_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL);
+
return 0;
}
-/**
- * stripe_readlink_cbk -
- */
-int32_t
-stripe_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, const char *path)
-{
- STACK_UNWIND (frame, op_ret, op_errno, path);
- return 0;
-}
-/**
- * stripe_readlink -
- */
int32_t
-stripe_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size)
+stripe_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- int32_t op_errno = 1;
+ int32_t callcnt = 0;
+ stripe_local_t *local = NULL;
+ call_frame_t *prev = NULL;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->path, err);
- VALIDATE_OR_GOTO (loc->inode, err);
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
+ local = frame->local;
- STACK_WIND (frame, stripe_readlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readlink, loc, size);
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG, "%s returned %s",
+ prev->this->name, strerror (op_errno));
+ local->op_errno = op_errno;
+ if (op_errno != ENOENT) {
+ local->failed = 1;
+ local->op_ret = op_ret;
+ }
+ }
+ }
+ UNLOCK (&frame->lock);
+ if (callcnt == 1) {
+ if (local->failed) {
+ op_errno = local->op_errno;
+ goto out;
+ }
+ STACK_WIND(frame, stripe_first_unlink_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->unlink, &local->loc,
+ local->xflag, local->xdata);
+ }
return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
+out:
+ STRIPE_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL);
+
return 0;
}
-
-/**
- * stripe_unlink -
- */
int32_t
-stripe_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
+stripe_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int xflag, dict_t *xdata)
{
- int send_fop_to_all = 0;
xlator_list_t *trav = NULL;
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
- int32_t op_errno = 1;
+ int32_t op_errno = EINVAL;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -987,82 +1187,132 @@ stripe_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
op_errno = ENOTCONN;
goto err;
}
-
- if (S_ISREG (loc->inode->st_mode))
- send_fop_to_all = 1;
- if (!send_fop_to_all) {
- STACK_WIND (frame, stripe_common_cbk, trav->xlator,
- trav->xlator->fops->unlink, loc);
- } else {
- /* Don't unlink a file if a node is down */
- if (priv->nodes_down) {
- op_errno = ENOTCONN;
- goto err;
- }
+ /* Don't unlink a file if a node is down */
+ if (priv->nodes_down) {
+ op_errno = ENOTCONN;
+ goto err;
+ }
- /* Initialization */
- local = CALLOC (1, sizeof (stripe_local_t));
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- local->call_count = priv->child_count;
-
- while (trav) {
- STACK_WIND (frame, stripe_stack_unwind_cbk,
- trav->xlator, trav->xlator->fops->unlink,
- loc);
- trav = trav->next;
- }
+ /* Initialization */
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ local->op_ret = -1;
+ loc_copy (&local->loc, loc);
+ local->xflag = xflag;
+
+ if (xdata)
+ local->xdata = dict_ref (xdata);
+
+ frame->local = local;
+ local->call_count = priv->child_count;
+ trav = trav->next; /* Skip the first child */
+
+ while (trav) {
+ STACK_WIND (frame, stripe_unlink_cbk,
+ trav->xlator, trav->xlator->fops->unlink,
+ loc, xflag, xdata);
+ trav = trav->next;
}
return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno);
+err:
+ STRIPE_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
-int32_t
+int32_t
stripe_first_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno,struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- xlator_list_t *trav = NULL;
stripe_local_t *local = NULL;
+ if (!this || !frame || !frame->local) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ op_errno = EINVAL;
+ goto err;
+ }
+
if (op_ret == -1) {
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
+ goto err;
}
- trav = this->children;
local = frame->local;
+ local->op_ret = 0;
local->call_count--; /* First child successful */
- trav = trav->next; /* Skip first child */
- while (trav) {
- STACK_WIND (frame, stripe_stack_unwind_cbk, trav->xlator,
- trav->xlator->fops->rmdir, &local->loc);
- trav = trav->next;
+ local->preparent = *preparent;
+ local->postparent = *postparent;
+ local->preparent_size = preparent->ia_size;
+ local->postparent_size = postparent->ia_size;
+ local->preparent_blocks += preparent->ia_blocks;
+ local->postparent_blocks += postparent->ia_blocks;
+
+ STRIPE_STACK_UNWIND (rmdir, frame, local->op_ret, local->op_errno,
+ &local->preparent, &local->postparent, xdata);
+ return 0;
+err:
+ STRIPE_STACK_UNWIND (rmdir, frame, op_ret, op_errno, NULL, NULL, NULL);
+ return 0;
+
+}
+
+int32_t
+stripe_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ int32_t callcnt = 0;
+ stripe_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG, "%s returned %s",
+ prev->this->name, strerror (op_errno));
+ if (op_errno != ENOENT)
+ local->failed = 1;
+ }
}
+ UNLOCK (&frame->lock);
+ if (callcnt == 1) {
+ if (local->failed)
+ goto out;
+ STACK_WIND (frame, stripe_first_rmdir_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->rmdir, &local->loc,
+ local->flags, NULL);
+ }
+ return 0;
+out:
+ STRIPE_STACK_UNWIND (rmdir, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
-/**
- * stripe_rmdir -
- */
int32_t
-stripe_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc)
+stripe_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, dict_t *xdata)
{
xlator_list_t *trav = NULL;
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
- int32_t op_errno = 1;
+ int32_t op_errno = EINVAL;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -1080,68 +1330,45 @@ stripe_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc)
}
/* Initialization */
- local = CALLOC (1, sizeof (stripe_local_t));
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
local->op_ret = -1;
frame->local = local;
- local->inode = loc->inode;
loc_copy (&local->loc, loc);
+ local->flags = flags;
local->call_count = priv->child_count;
-
- STACK_WIND (frame, stripe_first_rmdir_cbk, trav->xlator,
- trav->xlator->fops->rmdir, loc);
-
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno);
- return 0;
-}
-
-
-/**
- * stripe_setxattr -
- */
-int32_t
-stripe_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *dict, int32_t flags)
-{
- stripe_private_t *priv = NULL;
- int32_t op_errno = 1;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->path, err);
- VALIDATE_OR_GOTO (loc->inode, err);
+ trav = trav->next; /* skip the first child */
- priv = this->private;
-
- if (priv->first_child_down) {
- op_errno = ENOTCONN;
- goto err;
+ while (trav) {
+ STACK_WIND (frame, stripe_rmdir_cbk, trav->xlator,
+ trav->xlator->fops->rmdir, loc, flags, NULL);
+ trav = trav->next;
}
- STACK_WIND (frame, stripe_common_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr, loc, dict, flags);
-
return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno);
+err:
+ STRIPE_STACK_UNWIND (rmdir, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
-int32_t
+int32_t
stripe_mknod_ifreg_fail_unlink_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
- int32_t op_errno)
+ int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
+ if (!this || !frame || !frame->local) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
local = frame->local;
LOCK (&frame->lock);
@@ -1151,11 +1378,11 @@ stripe_mknod_ifreg_fail_unlink_cbk (call_frame_t *frame, void *cookie,
UNLOCK (&frame->lock);
if (!callcnt) {
- loc_wipe (&local->loc);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- local->inode, &local->stbuf);
+ STRIPE_STACK_UNWIND (mknod, frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf,
+ &local->preparent, &local->postparent, NULL);
}
-
+out:
return 0;
}
@@ -1165,23 +1392,31 @@ stripe_mknod_ifreg_fail_unlink_cbk (call_frame_t *frame, void *cookie,
int32_t
stripe_mknod_ifreg_setxattr_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
- int32_t op_errno)
+ int32_t op_errno, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
xlator_list_t *trav = NULL;
+ call_frame_t *prev = NULL;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
+ priv = this->private;
+ local = frame->local;
- priv = this->private;
LOCK (&frame->lock);
{
callcnt = --local->call_count;
-
+
if (op_ret == -1) {
gf_log (this->name, GF_LOG_DEBUG,
"%s returned error %s",
- ((call_frame_t *)cookie)->this->name,
- strerror (op_errno));
+ prev->this->name, strerror (op_errno));
local->op_ret = -1;
local->op_errno = op_errno;
}
@@ -1196,142 +1431,239 @@ stripe_mknod_ifreg_setxattr_cbk (call_frame_t *frame, void *cookie,
stripe_mknod_ifreg_fail_unlink_cbk,
trav->xlator,
trav->xlator->fops->unlink,
- &local->loc);
+ &local->loc, 0, NULL);
trav = trav->next;
}
return 0;
}
- loc_wipe (&local->loc);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- local->inode, &local->stbuf);
+ STRIPE_STACK_UNWIND (mknod, frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf,
+ &local->preparent, &local->postparent, NULL);
}
+out:
return 0;
}
-/**
- */
int32_t
stripe_mknod_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct stat *buf)
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- int ret = 0;
int32_t callcnt = 0;
stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
stripe_private_t *priv = NULL;
+ call_frame_t *prev = NULL;
+ xlator_list_t *trav = NULL;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
- priv = this->private;
+ prev = cookie;
+ priv = this->private;
local = frame->local;
LOCK (&frame->lock);
{
callcnt = --local->call_count;
-
+
if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_DEBUG,
"%s returned error %s",
- ((call_frame_t *)cookie)->this->name,
- strerror (op_errno));
- local->failed = 1;
+ prev->this->name, strerror (op_errno));
+ if ((op_errno != ENOENT) ||
+ (prev->this == FIRST_CHILD (this)))
+ local->failed = 1;
local->op_errno = op_errno;
}
-
if (op_ret >= 0) {
local->op_ret = op_ret;
- /* Get the mapping in inode private */
- /* Get the stat buf right */
- if (local->stbuf.st_blksize == 0) {
- local->stbuf = *buf;
- /* Because st_blocks gets added again */
- local->stbuf.st_blocks = 0;
- }
- /* Always, pass the inode number of first child
- to the above layer */
- if (FIRST_CHILD(this) ==
- ((call_frame_t *)cookie)->this)
- local->stbuf.st_ino = buf->st_ino;
-
- local->stbuf.st_blocks += buf->st_blocks;
- if (local->stbuf.st_size < buf->st_size)
- local->stbuf.st_size = buf->st_size;
- if (local->stbuf.st_blksize != buf->st_blksize) {
- /* TODO: add to blocks in terms of
- original block size */
- }
+ /* Can be used as a mechanism to understand if mknod
+ was successful in at least one place */
+ if (uuid_is_null (local->ia_gfid))
+ uuid_copy (local->ia_gfid, buf->ia_gfid);
+
+ if (stripe_ctx_handle(this, prev, local, xdata))
+ gf_log(this->name, GF_LOG_ERROR,
+ "Error getting fctx info from dict");
+
+ local->stbuf_blocks += buf->ia_blocks;
+ local->preparent_blocks += preparent->ia_blocks;
+ local->postparent_blocks += postparent->ia_blocks;
+
+ correct_file_size(buf, local->fctx, prev);
+
+ if (local->stbuf_size < buf->ia_size)
+ local->stbuf_size = buf->ia_size;
+ if (local->preparent_size < preparent->ia_size)
+ local->preparent_size = preparent->ia_size;
+ if (local->postparent_size < postparent->ia_size)
+ local->postparent_size = postparent->ia_size;
}
}
UNLOCK (&frame->lock);
if (!callcnt) {
- if (local->failed)
+ if (local->failed)
local->op_ret = -1;
- if ((local->op_ret != -1) && priv->xattr_supported) {
- /* Send a setxattr request to nodes where the
- files are created */
- int32_t index = 0;
- char size_key[256] = {0,};
- char index_key[256] = {0,};
- char count_key[256] = {0,};
- dict_t *dict = NULL;
-
- trav = this->children;
- sprintf (size_key,
- "trusted.%s.stripe-size", this->name);
- sprintf (count_key,
- "trusted.%s.stripe-count", this->name);
- sprintf (index_key,
- "trusted.%s.stripe-index", this->name);
-
+ if ((local->op_ret == -1) && !uuid_is_null (local->ia_gfid)) {
+ /* ia_gfid set means, at least on one node 'mknod'
+ is successful */
local->call_count = priv->child_count;
-
+ trav = this->children;
while (trav) {
- dict = get_new_dict ();
- dict_ref (dict);
- /* TODO: check return value */
- ret = dict_set_int64 (dict, size_key,
- local->stripe_size);
- ret = dict_set_int32 (dict, count_key,
- priv->child_count);
- ret = dict_set_int32 (dict, index_key, index);
-
STACK_WIND (frame,
- stripe_mknod_ifreg_setxattr_cbk,
+ stripe_mknod_ifreg_fail_unlink_cbk,
trav->xlator,
- trav->xlator->fops->setxattr,
- &local->loc, dict, 0);
-
- dict_unref (dict);
- index++;
+ trav->xlator->fops->unlink,
+ &local->loc, 0, NULL);
trav = trav->next;
}
+ return 0;
+ }
+
+
+ if (local->op_ret != -1) {
+ local->preparent.ia_blocks = local->preparent_blocks;
+ local->preparent.ia_size = local->preparent_size;
+ local->postparent.ia_blocks = local->postparent_blocks;
+ local->postparent.ia_size = local->postparent_size;
+ local->stbuf.ia_size = local->stbuf_size;
+ local->stbuf.ia_blocks = local->stbuf_blocks;
+ inode_ctx_put (local->inode, this,
+ (uint64_t)(long) local->fctx);
+
+ }
+ STRIPE_STACK_UNWIND (mknod, frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf,
+ &local->preparent, &local->postparent, NULL);
+ }
+out:
+ return 0;
+}
+
+
+int32_t
+stripe_mknod_first_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ stripe_local_t *local = NULL;
+ stripe_private_t *priv = NULL;
+ call_frame_t *prev = NULL;
+ xlator_list_t *trav = NULL;
+ int i = 1;
+ dict_t *dict = NULL;
+ int ret = 0;
+ int need_unref = 0;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
+ priv = this->private;
+ local = frame->local;
+ trav = this->children;
+
+ local->call_count--;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG, "%s returned error %s",
+ prev->this->name, strerror (op_errno));
+ local->failed = 1;
+ local->op_errno = op_errno;
+ goto out;
+ }
+
+ local->op_ret = op_ret;
+
+ local->stbuf = *buf;
+ local->preparent = *preparent;
+ local->postparent = *postparent;
+
+ if (uuid_is_null (local->ia_gfid))
+ uuid_copy (local->ia_gfid, buf->ia_gfid);
+ local->preparent.ia_blocks = local->preparent_blocks;
+ local->preparent.ia_size = local->preparent_size;
+ local->postparent.ia_blocks = local->postparent_blocks;
+ local->postparent.ia_size = local->postparent_size;
+ local->stbuf.ia_size = local->stbuf_size;
+ local->stbuf.ia_blocks = local->stbuf_blocks;
+
+ trav = trav->next;
+ while (trav) {
+ if (priv->xattr_supported) {
+ dict = dict_new ();
+ if (!dict) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to allocate dict %s", local->loc.path);
+ }
+ need_unref = 1;
+
+ dict_copy (local->xattr, dict);
+
+ ret = stripe_xattr_request_build (this, dict,
+ local->stripe_size,
+ priv->child_count, i,
+ priv->coalesce);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to build xattr request");
+
} else {
- /* Create itself has failed.. so return
- without setxattring */
- loc_wipe (&local->loc);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- local->inode, &local->stbuf);
+ dict = local->xattr;
}
+
+ STACK_WIND (frame, stripe_mknod_ifreg_cbk,
+ trav->xlator, trav->xlator->fops->mknod,
+ &local->loc, local->mode, local->rdev, 0, dict);
+ trav = trav->next;
+ i++;
+
+ if (dict && need_unref)
+ dict_unref (dict);
}
-
+
return 0;
+
+out:
+
+ STRIPE_STACK_UNWIND (mknod, frame, op_ret, op_errno, NULL, NULL, NULL, NULL, NULL);
+ return 0;
}
-/**
- * stripe_mknod -
- */
int32_t
+stripe_single_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ STRIPE_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+
+int
stripe_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dev_t rdev)
+ dev_t rdev, mode_t umask, dict_t *xdata)
{
- stripe_private_t *priv = NULL;
- stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
- int32_t op_errno = 1;
+ stripe_private_t *priv = NULL;
+ stripe_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+ int32_t i = 0;
+ dict_t *dict = NULL;
+ int ret = 0;
+ int need_unref = 0;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -1340,71 +1672,216 @@ stripe_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
VALIDATE_OR_GOTO (loc->inode, err);
priv = this->private;
- trav = this->children;
-
+
if (priv->first_child_down) {
op_errno = ENOTCONN;
goto err;
}
if (S_ISREG(mode)) {
- /* NOTE: on older kernels (older than 2.6.9),
- creat() fops is sent as mknod() + open(). Hence handling
+ /* NOTE: on older kernels (older than 2.6.9),
+ creat() fops is sent as mknod() + open(). Hence handling
S_IFREG files is necessary */
if (priv->nodes_down) {
- gf_log (this->name, GF_LOG_WARNING,
+ gf_log (this->name, GF_LOG_WARNING,
"Some node down, returning EIO");
op_errno = EIO;
goto err;
}
-
+
/* Initialization */
- local = CALLOC (1, sizeof (stripe_local_t));
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
local->op_ret = -1;
local->op_errno = ENOTCONN;
- local->stripe_size = stripe_get_matching_bs (loc->path,
- priv->pattern,
- priv->block_size);
+ local->stripe_size = stripe_get_matching_bs (loc->path, priv);
frame->local = local;
- local->inode = loc->inode;
+ local->inode = inode_ref (loc->inode);
loc_copy (&local->loc, loc);
+ local->xattr = dict_copy_with_ref (xdata, NULL);
+ local->mode = mode;
+ local->umask = umask;
+ local->rdev = rdev;
/* Everytime in stripe lookup, all child nodes should
be looked up */
local->call_count = priv->child_count;
-
- while (trav) {
- STACK_WIND (frame, stripe_mknod_ifreg_cbk,
- trav->xlator, trav->xlator->fops->mknod,
- loc, mode, rdev);
- trav = trav->next;
+
+ if (priv->xattr_supported) {
+ dict = dict_new ();
+ if (!dict) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to allocate dict %s", loc->path);
+ }
+ need_unref = 1;
+
+ dict_copy (xdata, dict);
+
+ ret = stripe_xattr_request_build (this, dict,
+ local->stripe_size,
+ priv->child_count,
+ i, priv->coalesce);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to build xattr request");
+ } else {
+ dict = xdata;
}
- /* This case is handled, no need to continue further. */
- return 0;
- }
+ STACK_WIND (frame, stripe_mknod_first_ifreg_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->mknod,
+ loc, mode, rdev, umask, dict);
+ if (dict && need_unref)
+ dict_unref (dict);
+ return 0;
+ }
- STACK_WIND (frame, stripe_common_inode_cbk,
+ STACK_WIND (frame, stripe_single_mknod_cbk,
FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod,
- loc, mode, rdev);
+ loc, mode, rdev, umask, xdata);
return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+err:
+ STRIPE_STACK_UNWIND (mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+
+int32_t
+stripe_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ int32_t callcnt = 0;
+ stripe_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s returned error %s",
+ prev->this->name, strerror (op_errno));
+ local->op_errno = op_errno;
+ if ((op_errno != ENOENT) ||
+ (prev->this == FIRST_CHILD (this)))
+ local->failed = 1;
+ }
+
+ if (op_ret >= 0) {
+ local->op_ret = 0;
+
+ local->stbuf_blocks += buf->ia_blocks;
+ local->preparent_blocks += preparent->ia_blocks;
+ local->postparent_blocks += postparent->ia_blocks;
+
+ if (local->stbuf_size < buf->ia_size)
+ local->stbuf_size = buf->ia_size;
+ if (local->preparent_size < preparent->ia_size)
+ local->preparent_size = preparent->ia_size;
+ if (local->postparent_size < postparent->ia_size)
+ local->postparent_size = postparent->ia_size;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ if (local->failed != -1) {
+ local->preparent.ia_blocks = local->preparent_blocks;
+ local->preparent.ia_size = local->preparent_size;
+ local->postparent.ia_blocks = local->postparent_blocks;
+ local->postparent.ia_size = local->postparent_size;
+ local->stbuf.ia_size = local->stbuf_size;
+ local->stbuf.ia_blocks = local->stbuf_blocks;
+ }
+ STRIPE_STACK_UNWIND (mkdir, frame, local->op_ret,
+ local->op_errno, local->inode,
+ &local->stbuf, &local->preparent,
+ &local->postparent, NULL);
+ }
+out:
return 0;
}
-/**
- * stripe_mkdir -
- */
int32_t
-stripe_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode)
+stripe_first_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ stripe_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ xlator_list_t *trav = NULL;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
+ local = frame->local;
+ trav = this->children;
+
+ local->call_count--; /* first child is successful */
+ trav = trav->next; /* skip first child */
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG, "%s returned error %s",
+ prev->this->name, strerror (op_errno));
+ local->op_errno = op_errno;
+ goto out;
+ }
+
+ local->op_ret = 0;
+
+ local->inode = inode_ref (inode);
+ local->stbuf = *buf;
+ local->postparent = *postparent;
+ local->preparent = *preparent;
+
+ local->stbuf_blocks += buf->ia_blocks;
+ local->preparent_blocks += preparent->ia_blocks;
+ local->postparent_blocks += postparent->ia_blocks;
+
+ local->stbuf_size = buf->ia_size;
+ local->preparent_size = preparent->ia_size;
+ local->postparent_size = postparent->ia_size;
+
+ while (trav) {
+ STACK_WIND (frame, stripe_mkdir_cbk, trav->xlator,
+ trav->xlator->fops->mkdir, &local->loc, local->mode,
+ local->umask, local->xdata);
+ trav = trav->next;
+ }
+ return 0;
+out:
+ STRIPE_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL);
+
+ return 0;
+
+}
+
+
+int
+stripe_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
{
stripe_private_t *priv = NULL;
stripe_local_t *local = NULL;
@@ -1419,72 +1896,130 @@ stripe_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode)
priv = this->private;
trav = this->children;
-
+
if (priv->first_child_down) {
op_errno = ENOTCONN;
goto err;
}
/* Initialization */
- local = CALLOC (1, sizeof (stripe_local_t));
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
local->op_ret = -1;
local->call_count = priv->child_count;
+ if (xdata)
+ local->xdata = dict_ref (xdata);
+ local->mode = mode;
+ local->umask = umask;
+ loc_copy (&local->loc, loc);
frame->local = local;
/* Everytime in stripe lookup, all child nodes should be looked up */
- while (trav) {
- STACK_WIND (frame, stripe_stack_unwind_inode_cbk,
- trav->xlator, trav->xlator->fops->mkdir,
- loc, mode);
- trav = trav->next;
- }
+ STACK_WIND (frame, stripe_first_mkdir_cbk, trav->xlator,
+ trav->xlator->fops->mkdir, loc, mode, umask, xdata);
return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+err:
+ STRIPE_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
return 0;
}
-/**
- * stripe_symlink -
- */
int32_t
-stripe_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
- loc_t *loc)
+stripe_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- int32_t op_errno = 1;
- stripe_private_t *priv = NULL;
-
- priv = this->private;
+ int32_t callcnt = 0;
+ stripe_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
- if (priv->first_child_down) {
- op_errno = ENOTCONN;
- goto err;
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
}
- /* send symlink to only first node */
- STACK_WIND (frame, stripe_common_inode_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->symlink, linkpath, loc);
+ prev = cookie;
+ local = frame->local;
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s returned error %s",
+ prev->this->name, strerror (op_errno));
+ local->op_errno = op_errno;
+ if ((op_errno != ENOENT) ||
+ (prev->this == FIRST_CHILD (this)))
+ local->failed = 1;
+ }
+
+ if (op_ret >= 0) {
+ local->op_ret = 0;
+
+ if (IA_ISREG(inode->ia_type)) {
+ inode_ctx_get(inode, this, (uint64_t *) &fctx);
+ if (!fctx) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to get stripe context");
+ op_ret = -1;
+ op_errno = EINVAL;
+ }
+ }
+
+ if (FIRST_CHILD(this) == prev->this) {
+ local->inode = inode_ref (inode);
+ local->stbuf = *buf;
+ local->postparent = *postparent;
+ local->preparent = *preparent;
+ }
+ local->stbuf_blocks += buf->ia_blocks;
+ local->preparent_blocks += preparent->ia_blocks;
+ local->postparent_blocks += postparent->ia_blocks;
+
+ correct_file_size(buf, fctx, prev);
+
+ if (local->stbuf_size < buf->ia_size)
+ local->stbuf_size = buf->ia_size;
+ if (local->preparent_size < preparent->ia_size)
+ local->preparent_size = preparent->ia_size;
+ if (local->postparent_size < postparent->ia_size)
+ local->postparent_size = postparent->ia_size;
+ }
+ }
+ UNLOCK (&frame->lock);
+ if (!callcnt) {
+ if (local->failed)
+ local->op_ret = -1;
+
+ if (local->op_ret != -1) {
+ local->preparent.ia_blocks = local->preparent_blocks;
+ local->preparent.ia_size = local->preparent_size;
+ local->postparent.ia_blocks = local->postparent_blocks;
+ local->postparent.ia_size = local->postparent_size;
+ local->stbuf.ia_size = local->stbuf_size;
+ local->stbuf.ia_blocks = local->stbuf_blocks;
+ }
+ STRIPE_STACK_UNWIND (link, frame, local->op_ret,
+ local->op_errno, local->inode,
+ &local->stbuf, &local->preparent,
+ &local->postparent, NULL);
+ }
+out:
return 0;
}
-/**
- * stripe_link -
- */
int32_t
-stripe_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
+stripe_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
- int send_fop_to_all = 0;
xlator_list_t *trav = NULL;
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
@@ -1505,49 +2040,45 @@ stripe_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
goto err;
}
- if (S_ISREG (oldloc->inode->st_mode))
- send_fop_to_all = 1;
+ /* Initialization */
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ local->op_ret = -1;
+ frame->local = local;
+ local->call_count = priv->child_count;
- if (!send_fop_to_all) {
- STACK_WIND (frame, stripe_common_inode_cbk,
+ /* Everytime in stripe lookup, all child
+ nodes should be looked up */
+ while (trav) {
+ STACK_WIND (frame, stripe_link_cbk,
trav->xlator, trav->xlator->fops->link,
- oldloc, newloc);
- } else {
- /* Initialization */
- local = CALLOC (1, sizeof (stripe_local_t));
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- local->call_count = priv->child_count;
-
- /* Everytime in stripe lookup, all child
- nodes should be looked up */
- while (trav) {
- STACK_WIND (frame, stripe_stack_unwind_inode_cbk,
- trav->xlator, trav->xlator->fops->link,
- oldloc, newloc);
- trav = trav->next;
- }
+ oldloc, newloc, NULL);
+ trav = trav->next;
}
return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+err:
+ STRIPE_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
return 0;
}
-int32_t
+int32_t
stripe_create_fail_unlink_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
- int32_t op_errno)
+ int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
int32_t callcnt = 0;
- fd_t *lfd = NULL;
stripe_local_t *local = NULL;
+ if (!this || !frame || !frame->local) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
local = frame->local;
LOCK (&frame->lock);
@@ -1557,48 +2088,78 @@ stripe_create_fail_unlink_cbk (call_frame_t *frame, void *cookie,
UNLOCK (&frame->lock);
if (!callcnt) {
- lfd = local->fd;
- loc_wipe (&local->loc);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- local->fd, local->inode, &local->stbuf);
- fd_unref (lfd);
+ STRIPE_STACK_UNWIND (create, frame, local->op_ret, local->op_errno,
+ local->fd, local->inode, &local->stbuf,
+ &local->preparent, &local->postparent, NULL);
}
+out:
return 0;
}
-/**
- * stripe_create_setxattr_cbk -
- */
int32_t
-stripe_create_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+stripe_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ inode_t *inode, struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- fd_t *lfd = NULL;
+ int32_t callcnt = 0;
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
+ call_frame_t *prev = NULL;
xlator_list_t *trav = NULL;
- int32_t callcnt = 0;
- priv = this->private;
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
+ priv = this->private;
local = frame->local;
LOCK (&frame->lock);
{
callcnt = --local->call_count;
-
+
if (op_ret == -1) {
gf_log (this->name, GF_LOG_DEBUG,
"%s returned error %s",
- ((call_frame_t *)cookie)->this->name,
- strerror (op_errno));
- local->op_ret = -1;
+ prev->this->name, strerror (op_errno));
+ local->failed = 1;
local->op_errno = op_errno;
}
+
+ if (op_ret >= 0) {
+ if (IA_ISREG(buf->ia_type)) {
+ if (stripe_ctx_handle(this, prev, local, xdata))
+ gf_log(this->name, GF_LOG_ERROR,
+ "Error getting fctx info from "
+ "dict");
+ }
+
+ local->op_ret = op_ret;
+
+ local->stbuf_blocks += buf->ia_blocks;
+ local->preparent_blocks += preparent->ia_blocks;
+ local->postparent_blocks += postparent->ia_blocks;
+
+ correct_file_size(buf, local->fctx, prev);
+
+ if (local->stbuf_size < buf->ia_size)
+ local->stbuf_size = buf->ia_size;
+ if (local->preparent_size < preparent->ia_size)
+ local->preparent_size = preparent->ia_size;
+ if (local->postparent_size < postparent->ia_size)
+ local->postparent_size = postparent->ia_size;
+ }
}
UNLOCK (&frame->lock);
if (!callcnt) {
+ if (local->failed)
+ local->op_ret = -1;
+
if (local->op_ret == -1) {
local->call_count = priv->child_count;
trav = this->children;
@@ -1607,165 +2168,180 @@ stripe_create_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
stripe_create_fail_unlink_cbk,
trav->xlator,
trav->xlator->fops->unlink,
- &local->loc);
+ &local->loc, 0, NULL);
trav = trav->next;
}
-
+
return 0;
}
- lfd = local->fd;
- loc_wipe (&local->loc);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- local->fd, local->inode, &local->stbuf);
- fd_unref (lfd);
+ if (local->op_ret >= 0) {
+ local->preparent.ia_blocks = local->preparent_blocks;
+ local->preparent.ia_size = local->preparent_size;
+ local->postparent.ia_blocks = local->postparent_blocks;
+ local->postparent.ia_size = local->postparent_size;
+ local->stbuf.ia_size = local->stbuf_size;
+ local->stbuf.ia_blocks = local->stbuf_blocks;
+
+ stripe_copy_xl_array(local->fctx->xl_array,
+ priv->xl_array,
+ local->fctx->stripe_count);
+ inode_ctx_put(local->inode, this,
+ (uint64_t) local->fctx);
+ }
+
+ /* Create itself has failed.. so return
+ without setxattring */
+ STRIPE_STACK_UNWIND (create, frame, local->op_ret,
+ local->op_errno, local->fd,
+ local->inode, &local->stbuf,
+ &local->preparent, &local->postparent, NULL);
}
+out:
return 0;
}
-/**
- * stripe_create_cbk -
- */
+
+
int32_t
-stripe_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+stripe_first_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, fd_t *fd,
- inode_t *inode, struct stat *buf)
+ inode_t *inode, struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- int32_t callcnt = 0;
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
- fd_t *lfd = NULL;
- stripe_fd_ctx_t *fctx = NULL;
+ call_frame_t *prev = NULL;
+ xlator_list_t *trav = NULL;
+ int i = 1;
+ dict_t *dict = NULL;
+ loc_t *loc = NULL;
+ int32_t need_unref = 0;
+ int32_t ret = -1;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+ prev = cookie;
priv = this->private;
local = frame->local;
+ trav = this->children;
+ loc = &local->loc;
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s returned error %s",
- ((call_frame_t *)cookie)->this->name,
- strerror (op_errno));
- local->failed = 1;
- local->op_errno = op_errno;
- }
-
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- /* Get the mapping in inode private */
- /* Get the stat buf right */
- if (local->stbuf.st_blksize == 0) {
- local->stbuf = *buf;
- /* Because st_blocks gets added again */
- local->stbuf.st_blocks = 0;
- }
-
- /* Always, pass the inode number of first
- child to the above layer */
- if (FIRST_CHILD(this) ==
- ((call_frame_t *)cookie)->this)
- local->stbuf.st_ino = buf->st_ino;
-
- local->stbuf.st_blocks += buf->st_blocks;
- if (local->stbuf.st_size < buf->st_size)
- local->stbuf.st_size = buf->st_size;
- if (local->stbuf.st_blksize != buf->st_blksize) {
- /* TODO: add to blocks in terms of
- original block size */
- }
- }
+ --local->call_count;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG, "%s returned error %s",
+ prev->this->name, strerror (op_errno));
+ local->failed = 1;
+ local->op_errno = op_errno;
}
- UNLOCK (&frame->lock);
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
+ local->op_ret = 0;
+ /* Get the mapping in inode private */
+ /* Get the stat buf right */
+ local->stbuf = *buf;
+ local->preparent = *preparent;
+ local->postparent = *postparent;
+
+ local->stbuf_blocks += buf->ia_blocks;
+ local->preparent_blocks += preparent->ia_blocks;
+ local->postparent_blocks += postparent->ia_blocks;
+
+ if (local->stbuf_size < buf->ia_size)
+ local->stbuf_size = buf->ia_size;
+ if (local->preparent_size < preparent->ia_size)
+ local->preparent_size = preparent->ia_size;
+ if (local->postparent_size < postparent->ia_size)
+ local->postparent_size = postparent->ia_size;
+
+ if (local->failed)
+ local->op_ret = -1;
- /* */
- if (local->op_ret >= 0) {
- fctx = CALLOC (1, sizeof (stripe_fd_ctx_t));
- if (fctx) {
- fctx->stripe_size = local->stripe_size;
- fctx->stripe_count = priv->child_count;
- fctx->static_array = 1;
- fctx->xl_array = priv->xl_array;
- fd_ctx_set (local->fd, this,
- (uint64_t)(long)fctx);
- }
- }
+ if (local->op_ret == -1) {
+ local->call_count = 1;
+ STACK_WIND (frame, stripe_create_fail_unlink_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->unlink,
+ &local->loc, 0, NULL);
+ return 0;
+ }
- if ((local->op_ret != -1) &&
- local->stripe_size && priv->xattr_supported) {
- /* Send a setxattr request to nodes where
- the files are created */
- int ret = 0;
- int32_t i = 0;
- char size_key[256] = {0,};
- char index_key[256] = {0,};
- char count_key[256] = {0,};
- dict_t *dict = NULL;
-
- sprintf (size_key,
- "trusted.%s.stripe-size", this->name);
- sprintf (count_key,
- "trusted.%s.stripe-count", this->name);
- sprintf (index_key,
- "trusted.%s.stripe-index", this->name);
+ if (local->op_ret >= 0) {
+ local->preparent.ia_blocks = local->preparent_blocks;
+ local->preparent.ia_size = local->preparent_size;
+ local->postparent.ia_blocks = local->postparent_blocks;
+ local->postparent.ia_size = local->postparent_size;
+ local->stbuf.ia_size = local->stbuf_size;
+ local->stbuf.ia_blocks = local->stbuf_blocks;
+ }
- local->call_count = priv->child_count;
-
- for (i = 0; i < priv->child_count; i++) {
- dict = get_new_dict ();
- dict_ref (dict);
-
- /* TODO: check return values */
- ret = dict_set_int64 (dict, size_key,
- local->stripe_size);
- ret = dict_set_int32 (dict, count_key,
- priv->child_count);
- ret = dict_set_int32 (dict, index_key, i);
-
- STACK_WIND (frame, stripe_create_setxattr_cbk,
- priv->xl_array[i],
- priv->xl_array[i]->fops->setxattr,
- &local->loc, dict, 0);
-
- dict_unref (dict);
+ /* Send a setxattr request to nodes where the
+ files are created */
+ trav = trav->next;
+ while (trav) {
+ if (priv->xattr_supported) {
+ dict = dict_new ();
+ if (!dict) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to allocate dict %s", loc->path);
}
+ need_unref = 1;
+
+ dict_copy (local->xattr, dict);
+
+ ret = stripe_xattr_request_build (this, dict,
+ local->stripe_size,
+ priv->child_count,
+ i, priv->coalesce);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to build xattr request");
} else {
- /* Create itself has failed.. so return
- without setxattring */
- lfd = local->fd;
- loc_wipe (&local->loc);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- local->fd, local->inode, &local->stbuf);
-
- fd_unref (lfd);
+ dict = local->xattr;
}
+
+ STACK_WIND (frame, stripe_create_cbk, trav->xlator,
+ trav->xlator->fops->create, &local->loc,
+ local->flags, local->mode, local->umask, local->fd,
+ dict);
+ trav = trav->next;
+ if (need_unref && dict)
+ dict_unref (dict);
+ i++;
}
-
+
+out:
return 0;
}
+
/**
- * stripe_create - If a block-size is specified for the 'name', create the
+ * stripe_create - If a block-size is specified for the 'name', create the
* file in all the child nodes. If not, create it in only first child.
*
* @name- complete path of the file to be created.
*/
int32_t
stripe_create (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags, mode_t mode, fd_t *fd)
+ int32_t flags, mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
stripe_private_t *priv = NULL;
stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
- int32_t op_errno = 1;
+ int32_t op_errno = EINVAL;
+ int ret = 0;
+ int need_unref = 0;
+ int i = 0;
+ dict_t *dict = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
priv = this->private;
@@ -1773,121 +2349,86 @@ stripe_create (call_frame_t *frame, xlator_t *this, loc_t *loc,
flags &= ~O_APPEND;
if (priv->first_child_down || priv->nodes_down) {
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_DEBUG,
"First node down, returning EIO");
op_errno = EIO;
goto err;
}
/* Initialization */
- local = CALLOC (1, sizeof (stripe_local_t));
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
local->op_ret = -1;
local->op_errno = ENOTCONN;
- local->stripe_size = stripe_get_matching_bs (loc->path,
- priv->pattern,
- priv->block_size);
+ local->stripe_size = stripe_get_matching_bs (loc->path, priv);
frame->local = local;
- local->inode = loc->inode;
+ local->inode = inode_ref (loc->inode);
loc_copy (&local->loc, loc);
local->fd = fd_ref (fd);
+ local->flags = flags;
+ local->mode = mode;
+ local->umask = umask;
+ if (xdata)
+ local->xattr = dict_ref (xdata);
local->call_count = priv->child_count;
-
- trav = this->children;
- while (trav) {
- STACK_WIND (frame, stripe_create_cbk, trav->xlator,
- trav->xlator->fops->create, loc, flags, mode, fd);
- trav = trav->next;
- }
-
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-/**
- * stripe_open_cbk -
- */
-int32_t
-stripe_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- fd_t *lfd = NULL;
+ /* Send a setxattr request to nodes where the
+ files are created */
- local = frame->local;
+ if (priv->xattr_supported) {
+ dict = dict_new ();
+ if (!dict) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to allocate dict %s", loc->path);
+ }
+ need_unref = 1;
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
+ dict_copy (xdata, dict);
- if (op_ret == -1) {
- local->failed = 1;
- gf_log (this->name, GF_LOG_DEBUG,
- "%s returned error %s",
- ((call_frame_t *)cookie)->this->name,
- strerror (op_errno));
- local->op_ret = -1;
- local->op_errno = op_errno;
- }
-
- if (op_ret >= 0)
- local->op_ret = op_ret;
+ ret = stripe_xattr_request_build (this, dict,
+ local->stripe_size,
+ priv->child_count,
+ i, priv->coalesce);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to build xattr request");
+ } else {
+ dict = xdata;
}
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
- if (local->op_ret == -1) {
- if (local->fctx) {
- if (!local->fctx->static_array)
- FREE (local->fctx->xl_array);
- FREE (local->fctx);
- }
- } else {
- fd_ctx_set (local->fd, this,
- (uint64_t)(long)local->fctx);
- }
- lfd = local->fd;
- loc_wipe (&local->loc);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- local->fd);
- fd_unref (lfd);
+ STACK_WIND (frame, stripe_first_create_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->create, loc, flags, mode,
+ umask, fd, dict);
+
+ if (need_unref && dict)
+ dict_unref (dict);
- }
return 0;
+err:
+ STRIPE_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL, xdata);
+ return 0;
}
-
-/**
- * stripe_getxattr_cbk -
- */
int32_t
-stripe_open_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+stripe_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- int32_t index = 0;
- int32_t callcnt = 0;
- char key[256] = {0,};
- stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
- stripe_private_t *priv = NULL;
- data_t *data = NULL;
- call_frame_t *prev = NULL;
- fd_t *lfd = NULL;
+ int32_t callcnt = 0;
+ stripe_local_t *local = NULL;
+ call_frame_t *prev = NULL;
- prev = (call_frame_t *)cookie;
- priv = this->private;
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
local = frame->local;
LOCK (&frame->lock);
@@ -1895,145 +2436,39 @@ stripe_open_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
callcnt = --local->call_count;
if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
+
+ gf_log (this->name, GF_LOG_DEBUG,
"%s returned error %s",
- ((call_frame_t *)cookie)->this->name,
- strerror (op_errno));
- local->op_ret = -1;
- if (local->op_errno != EIO)
- local->op_errno = op_errno;
- if (op_errno == ENOTCONN)
+ prev->this->name, strerror (op_errno));
+ if ((op_errno != ENOENT) ||
+ (prev->this == FIRST_CHILD (this)))
local->failed = 1;
- goto unlock;
- }
-
- if (!local->fctx) {
- local->fctx = CALLOC (1, sizeof (stripe_fd_ctx_t));
- if (!local->fctx) {
- local->op_errno = ENOMEM;
- local->op_ret = -1;
- goto unlock;
- }
-
- local->fctx->static_array = 0;
- }
- /* Stripe block size */
- sprintf (key, "trusted.%s.stripe-size", this->name);
- data = dict_get (dict, key);
- if (!data) {
- local->xattr_self_heal_needed = 1;
- } else {
- if (!local->fctx->stripe_size) {
- local->fctx->stripe_size =
- data_to_int64 (data);
- }
-
- if (local->fctx->stripe_size != data_to_int64 (data)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "stripe-size mismatch in blocks");
- local->xattr_self_heal_needed = 1;
- }
- }
- /* Stripe count */
- sprintf (key, "trusted.%s.stripe-count", this->name);
- data = dict_get (dict, key);
- if (!data) {
- local->xattr_self_heal_needed = 1;
- goto unlock;
- }
- if (!local->fctx->xl_array) {
- local->fctx->stripe_count = data_to_int32 (data);
- if (!local->fctx->stripe_count) {
- gf_log (this->name, GF_LOG_ERROR,
- "error with stripe-count xattr");
- local->op_ret = -1;
- local->op_errno = EIO;
- goto unlock;
- }
- local->fctx->xl_array =
- CALLOC (local->fctx->stripe_count,
- sizeof (xlator_t *));
- }
- if (local->fctx->stripe_count != data_to_int32 (data)) {
- gf_log (this->name, GF_LOG_ERROR,
- "error with stripe-count xattr");
- local->op_ret = -1;
- local->op_errno = EIO;
- goto unlock;
+ local->op_errno = op_errno;
}
- /* index */
- sprintf (key, "trusted.%s.stripe-index", this->name);
- data = dict_get (dict, key);
- if (!data) {
- local->xattr_self_heal_needed = 1;
- goto unlock;
- }
- index = data_to_int32 (data);
- if (index > priv->child_count) {
- gf_log (this->name, GF_LOG_ERROR,
- "error with stripe-index xattr");
- local->op_ret = -1;
- local->op_errno = EIO;
- goto unlock;
- }
- if (local->fctx->xl_array)
- local->fctx->xl_array[index] = prev->this;
- local->entry_count++;
- local->op_ret = 0;
+ if (op_ret >= 0)
+ local->op_ret = op_ret;
}
- unlock:
UNLOCK (&frame->lock);
-
- if (!callcnt) {
- /* TODO: if self-heal flag is set, do it */
- if (local->xattr_self_heal_needed) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s: stripe info need to be healed",
- local->loc.path);
- }
-
- if (local->op_ret)
- goto err;
- if (local->entry_count != local->fctx->stripe_count) {
- local->op_ret = -1;
- local->op_errno = EIO;
- goto err;
- }
- if (!local->fctx->stripe_size) {
+ if (!callcnt) {
+ if (local->failed)
local->op_ret = -1;
- local->op_errno = EIO;
- goto err;
- }
-
- local->call_count = local->fctx->stripe_count;
- trav = this->children;
- while (trav) {
- STACK_WIND (frame, stripe_open_cbk, trav->xlator,
- trav->xlator->fops->open, &local->loc,
- local->flags, local->fd);
- trav = trav->next;
- }
+ STRIPE_STACK_UNWIND (open, frame, local->op_ret,
+ local->op_errno, local->fd, xdata);
}
-
- return 0;
- err:
- lfd = local->fd;
- loc_wipe (&local->loc);
- STACK_UNWIND (frame, local->op_ret, local->op_errno, local->fd);
- fd_unref (lfd);
-
+out:
return 0;
}
+
/**
- * stripe_open -
+ * stripe_open -
*/
int32_t
stripe_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags, fd_t *fd)
+ int32_t flags, fd_t *fd, dict_t *xdata)
{
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
@@ -2055,7 +2490,7 @@ stripe_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
}
/* Initialization */
- local = CALLOC (1, sizeof (stripe_local_t));
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -2066,97 +2501,76 @@ stripe_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
local->fd = fd_ref (fd);
frame->local = local;
- local->inode = loc->inode;
loc_copy (&local->loc, loc);
/* Striped files */
local->flags = flags;
local->call_count = priv->child_count;
- local->stripe_size = stripe_get_matching_bs (loc->path,
- priv->pattern,
- priv->block_size);
-
- if (priv->xattr_supported) {
- while (trav) {
- STACK_WIND (frame, stripe_open_getxattr_cbk,
- trav->xlator, trav->xlator->fops->getxattr,
- loc, NULL);
- trav = trav->next;
- }
- } else {
- local->fctx = CALLOC (1, sizeof (stripe_fd_ctx_t));
- if (!local->fctx) {
- op_errno = ENOMEM;
- goto err;
- }
-
- local->fctx->static_array = 1;
- local->fctx->stripe_size = local->stripe_size;
- local->fctx->stripe_count = priv->child_count;
- local->fctx->xl_array = priv->xl_array;
-
- while (trav) {
- STACK_WIND (frame, stripe_open_cbk, trav->xlator,
- trav->xlator->fops->open,
- &local->loc, local->flags, local->fd);
- trav = trav->next;
- }
- }
+ local->stripe_size = stripe_get_matching_bs (loc->path, priv);
+ while (trav) {
+ STACK_WIND (frame, stripe_open_cbk, trav->xlator,
+ trav->xlator->fops->open,
+ &local->loc, local->flags, local->fd,
+ xdata);
+ trav = trav->next;
+ }
return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
+err:
+ STRIPE_STACK_UNWIND (open, frame, -1, op_errno, NULL, NULL);
return 0;
}
-/**
- * stripe_opendir_cbk -
- */
+
int32_t
stripe_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
int32_t callcnt = 0;
- stripe_local_t *local = frame->local;
+ stripe_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
+ local = frame->local;
LOCK (&frame->lock);
{
callcnt = --local->call_count;
if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_DEBUG,
"%s returned error %s",
- ((call_frame_t *)cookie)->this->name,
- strerror (op_errno));
+ prev->this->name, strerror (op_errno));
local->op_ret = -1;
- local->failed = 1;
local->op_errno = op_errno;
}
-
- if (op_ret >= 0)
+
+ if (op_ret >= 0)
local->op_ret = op_ret;
}
UNLOCK (&frame->lock);
if (!callcnt) {
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- local->fd);
+ STRIPE_STACK_UNWIND (opendir, frame, local->op_ret,
+ local->op_errno, local->fd, NULL);
}
-
+out:
return 0;
}
-/**
- * stripe_opendir -
- */
int32_t
-stripe_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
+stripe_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, dict_t *xdata)
{
xlator_list_t *trav = NULL;
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
- int32_t op_errno = 1;
+ int32_t op_errno = EINVAL;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -2173,118 +2587,61 @@ stripe_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
}
/* Initialization */
- local = CALLOC (1, sizeof (stripe_local_t));
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
frame->local = local;
- local->inode = loc->inode;
- local->fd = fd;
local->call_count = priv->child_count;
+ local->fd = fd_ref (fd);
while (trav) {
STACK_WIND (frame, stripe_opendir_cbk, trav->xlator,
- trav->xlator->fops->opendir, loc, fd);
+ trav->xlator->fops->opendir, loc, fd, NULL);
trav = trav->next;
}
-
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
-}
-
-
-/**
- * stripe_getxattr_cbk -
- */
-int32_t
-stripe_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *value)
-{
- STACK_UNWIND (frame, op_ret, op_errno, value);
- return 0;
-}
-
-
-/**
- * stripe_getxattr -
- */
-int32_t
-stripe_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name)
-{
- int32_t op_errno = 1;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->path, err);
- VALIDATE_OR_GOTO (loc->inode, err);
-
- STACK_WIND (frame, stripe_getxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr, loc, name);
return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
+err:
+ STRIPE_STACK_UNWIND (opendir, frame, -1, op_errno, NULL, NULL);
return 0;
}
-/**
- * stripe_removexattr -
- */
-int32_t
-stripe_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name)
-{
- int32_t op_errno = 1;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->path, err);
- VALIDATE_OR_GOTO (loc->inode, err);
-
- STACK_WIND (frame, stripe_common_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->removexattr, loc, name);
-
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno);
- return 0;
-}
-
-
-/**
- * stripe_lk_cbk -
- */
int32_t
stripe_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct flock *lock)
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
local = frame->local;
LOCK (&frame->lock);
{
callcnt = --local->call_count;
if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_DEBUG,
"%s returned error %s",
- ((call_frame_t *)cookie)->this->name,
- strerror (op_errno));
+ prev->this->name, strerror (op_errno));
local->op_errno = op_errno;
- if (op_errno == ENOTCONN)
+ if ((op_errno != ENOENT) ||
+ (prev->this == FIRST_CHILD (this)))
local->failed = 1;
}
- if (op_ret == 0 && local->op_ret == -1) {
- /* First successful call, copy the *lock */
- local->op_ret = 0;
- local->lock = *lock;
+ if (op_ret >= 0) {
+ if (FIRST_CHILD(this) == prev->this) {
+ /* First successful call, copy the *lock */
+ local->op_ret = op_ret;
+ local->lock = *lock;
+ }
}
}
UNLOCK (&frame->lock);
@@ -2292,24 +2649,21 @@ stripe_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (!callcnt) {
if (local->failed)
local->op_ret = -1;
- STACK_UNWIND (frame, local->op_ret,
- local->op_errno, &local->lock);
+ STRIPE_STACK_UNWIND (lk, frame, local->op_ret,
+ local->op_errno, &local->lock, NULL);
}
+out:
return 0;
}
-
-/**
- * stripe_lk -
- */
int32_t
stripe_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
- struct flock *lock)
+ struct gf_flock *lock, dict_t *xdata)
{
stripe_local_t *local = NULL;
xlator_list_t *trav = NULL;
stripe_private_t *priv = NULL;
- int32_t op_errno = 1;
+ int32_t op_errno = EINVAL;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -2320,77 +2674,75 @@ stripe_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
priv = this->private;
/* Initialization */
- local = CALLOC (1, sizeof (stripe_local_t));
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
local->op_ret = -1;
frame->local = local;
-
local->call_count = priv->child_count;
-
+
while (trav) {
STACK_WIND (frame, stripe_lk_cbk, trav->xlator,
- trav->xlator->fops->lk, fd, cmd, lock);
+ trav->xlator->fops->lk, fd, cmd, lock, NULL);
trav = trav->next;
}
return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
+err:
+ STRIPE_STACK_UNWIND (lk, frame, -1, op_errno, NULL, NULL);
return 0;
}
-/**
- * stripe_writedir -
- */
+
int32_t
-stripe_setdents (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t flags, dir_entry_t *entries, int32_t count)
+stripe_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
- int32_t op_errno = 1;
+ int32_t callcnt = 0;
+ stripe_local_t *local = NULL;
+ call_frame_t *prev = NULL;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
- VALIDATE_OR_GOTO (fd->inode, err);
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
- priv = this->private;
- trav = this->children;
+ prev = cookie;
+ local = frame->local;
- /* Initialization */
- local = CALLOC (1, sizeof (stripe_local_t));
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- local->call_count = priv->child_count;
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
- while (trav) {
- STACK_WIND (frame, stripe_stack_unwind_cbk, trav->xlator,
- trav->xlator->fops->setdents, fd, flags, entries,
- count);
- trav = trav->next;
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s returned %s",
+ prev->this->name, strerror (op_errno));
+ local->op_errno = op_errno;
+ if ((op_errno != ENOENT) ||
+ (prev->this == FIRST_CHILD (this)))
+ local->failed = 1;
+ }
+ if (op_ret >= 0)
+ local->op_ret = op_ret;
}
+ UNLOCK (&frame->lock);
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno);
+ if (!callcnt) {
+ if (local->failed)
+ local->op_ret = -1;
+
+ STRIPE_STACK_UNWIND (flush, frame, local->op_ret,
+ local->op_errno, NULL);
+ }
+out:
return 0;
}
-
-/**
- * stripe_flush -
- */
int32_t
-stripe_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
+stripe_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
@@ -2410,7 +2762,7 @@ stripe_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
goto err;
}
/* Initialization */
- local = CALLOC (1, sizeof (stripe_local_t));
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -2418,73 +2770,98 @@ stripe_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
local->op_ret = -1;
frame->local = local;
local->call_count = priv->child_count;
-
+
while (trav) {
- STACK_WIND (frame, stripe_stack_unwind_cbk, trav->xlator,
- trav->xlator->fops->flush, fd);
+ STACK_WIND (frame, stripe_flush_cbk, trav->xlator,
+ trav->xlator->fops->flush, fd, NULL);
trav = trav->next;
}
return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno);
+err:
+ STRIPE_STACK_UNWIND (flush, frame, -1, op_errno, NULL);
return 0;
}
-/**
- * stripe_fsync -
- */
+
int32_t
-stripe_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
+stripe_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
- int32_t op_errno = 1;
+ int32_t callcnt = 0;
+ stripe_local_t *local = NULL;
+ call_frame_t *prev = NULL;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
- VALIDATE_OR_GOTO (fd->inode, err);
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
- priv = this->private;
- trav = this->children;
+ prev = cookie;
+ local = frame->local;
- /* Initialization */
- local = CALLOC (1, sizeof (stripe_local_t));
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- local->call_count = priv->child_count;
-
- while (trav) {
- STACK_WIND (frame, stripe_stack_unwind_cbk, trav->xlator,
- trav->xlator->fops->fsync, fd, flags);
- trav = trav->next;
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s returned %s",
+ prev->this->name, strerror (op_errno));
+ local->op_errno = op_errno;
+ if ((op_errno != ENOENT) ||
+ (prev->this == FIRST_CHILD (this)))
+ local->failed = 1;
+ }
+ if (op_ret >= 0) {
+ local->op_ret = op_ret;
+ if (FIRST_CHILD(this) == prev->this) {
+ local->pre_buf = *prebuf;
+ local->post_buf = *postbuf;
+ }
+ local->prebuf_blocks += prebuf->ia_blocks;
+ local->postbuf_blocks += postbuf->ia_blocks;
+
+ correct_file_size(prebuf, local->fctx, prev);
+ correct_file_size(postbuf, local->fctx, prev);
+
+ if (local->prebuf_size < prebuf->ia_size)
+ local->prebuf_size = prebuf->ia_size;
+
+ if (local->postbuf_size < postbuf->ia_size)
+ local->postbuf_size = postbuf->ia_size;
+ }
}
+ UNLOCK (&frame->lock);
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno);
+ if (!callcnt) {
+ if (local->failed)
+ local->op_ret = -1;
+
+ if (local->op_ret != -1) {
+ local->pre_buf.ia_blocks = local->prebuf_blocks;
+ local->pre_buf.ia_size = local->prebuf_size;
+ local->post_buf.ia_blocks = local->postbuf_blocks;
+ local->post_buf.ia_size = local->postbuf_size;
+ }
+
+ STRIPE_STACK_UNWIND (fsync, frame, local->op_ret,
+ local->op_errno, &local->pre_buf,
+ &local->post_buf, NULL);
+ }
+out:
return 0;
}
-
-/**
- * stripe_fstat -
- */
int32_t
-stripe_fstat (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd)
+stripe_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict_t *xdata)
{
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
xlator_list_t *trav = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
int32_t op_errno = 1;
VALIDATE_OR_GOTO (frame, err);
@@ -2496,82 +2873,107 @@ stripe_fstat (call_frame_t *frame,
trav = this->children;
/* Initialization */
- local = CALLOC (1, sizeof (stripe_local_t));
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
+
+ inode_ctx_get(fd->inode, this, (uint64_t *) &fctx);
+ if (!fctx) {
+ op_errno = EINVAL;
+ goto err;
+ }
+ local->fctx = fctx;
+
local->op_ret = -1;
frame->local = local;
- local->inode = fd->inode;
local->call_count = priv->child_count;
-
+
while (trav) {
- STACK_WIND (frame, stripe_stack_unwind_buf_cbk, trav->xlator,
- trav->xlator->fops->fstat, fd);
+ STACK_WIND (frame, stripe_fsync_cbk, trav->xlator,
+ trav->xlator->fops->fsync, fd, flags, NULL);
trav = trav->next;
}
return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
+err:
+ STRIPE_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
-
-/**
- * stripe_fchmod -
- */
-int32_t
-stripe_fchmod (call_frame_t *frame, xlator_t *this, fd_t *fd, mode_t mode)
+int32_t
+stripe_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
- int32_t op_errno = 1;
+ int32_t callcnt = 0;
+ stripe_local_t *local = NULL;
+ call_frame_t *prev = NULL;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
- VALIDATE_OR_GOTO (fd->inode, err);
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
- priv = this->private;
- trav = this->children;
+ prev = cookie;
+ local = frame->local;
- /* Initialization */
- local = CALLOC (1, sizeof (stripe_local_t));
- if (!local) {
- op_errno = ENOMEM;
- goto err;
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s returned error %s",
+ prev->this->name, strerror (op_errno));
+ local->op_errno = op_errno;
+ if ((op_errno != ENOENT) ||
+ (prev->this == FIRST_CHILD (this)))
+ local->failed = 1;
+ }
+
+ if (op_ret == 0) {
+ local->op_ret = 0;
+
+ if (FIRST_CHILD(this) == prev->this)
+ local->stbuf = *buf;
+
+ local->stbuf_blocks += buf->ia_blocks;
+
+ correct_file_size(buf, local->fctx, prev);
+
+ if (local->stbuf_size < buf->ia_size)
+ local->stbuf_size = buf->ia_size;
+ }
}
- local->op_ret = -1;
- frame->local = local;
- local->inode = fd->inode;
- local->call_count = priv->child_count;
-
- while (trav) {
- STACK_WIND (frame, stripe_stack_unwind_buf_cbk, trav->xlator,
- trav->xlator->fops->fchmod, fd, mode);
- trav = trav->next;
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ if (local->failed)
+ local->op_ret = -1;
+
+ if (local->op_ret != -1) {
+ local->stbuf.ia_size = local->stbuf_size;
+ local->stbuf.ia_blocks = local->stbuf_blocks;
+ }
+
+ STRIPE_STACK_UNWIND (fstat, frame, local->op_ret,
+ local->op_errno, &local->stbuf, NULL);
}
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
+out:
return 0;
}
-
-/**
- * stripe_fchown -
- */
-int32_t
-stripe_fchown (call_frame_t *frame, xlator_t *this, fd_t *fd, uid_t uid,
- gid_t gid)
+int32_t
+stripe_fstat (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd, dict_t *xdata)
{
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
xlator_list_t *trav = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
int32_t op_errno = 1;
VALIDATE_OR_GOTO (frame, err);
@@ -2583,39 +2985,44 @@ stripe_fchown (call_frame_t *frame, xlator_t *this, fd_t *fd, uid_t uid,
trav = this->children;
/* Initialization */
- local = CALLOC (1, sizeof (stripe_local_t));
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
local->op_ret = -1;
frame->local = local;
- local->inode = fd->inode;
local->call_count = priv->child_count;
-
+
+ if (IA_ISREG(fd->inode->ia_type)) {
+ inode_ctx_get(fd->inode, this, (uint64_t *) &fctx);
+ if (!fctx)
+ goto err;
+ local->fctx = fctx;
+ }
+
while (trav) {
- STACK_WIND (frame, stripe_stack_unwind_buf_cbk, trav->xlator,
- trav->xlator->fops->fchown, fd, uid, gid);
+ STACK_WIND (frame, stripe_fstat_cbk, trav->xlator,
+ trav->xlator->fops->fstat, fd, NULL);
trav = trav->next;
}
return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
+err:
+ STRIPE_STACK_UNWIND (fstat, frame, -1, op_errno, NULL, NULL);
return 0;
}
-/**
- * stripe_ftruncate -
- */
int32_t
-stripe_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
+stripe_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, dict_t *xdata)
{
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
- int32_t op_errno = 1;
+ stripe_fd_ctx_t *fctx = NULL;
+ int i, eof_idx;
+ off_t dest_offset, tmp_offset;
+ int32_t op_errno = 1;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -2623,37 +3030,115 @@ stripe_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
VALIDATE_OR_GOTO (fd->inode, err);
priv = this->private;
- trav = this->children;
/* Initialization */
- local = CALLOC (1, sizeof (stripe_local_t));
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
local->op_ret = -1;
frame->local = local;
- local->inode = fd->inode;
local->call_count = priv->child_count;
-
- while (trav) {
- STACK_WIND (frame, stripe_stack_unwind_buf_cbk, trav->xlator,
- trav->xlator->fops->ftruncate, fd, offset);
- trav = trav->next;
- }
+
+ inode_ctx_get(fd->inode, this, (uint64_t *) &fctx);
+ if (!fctx) {
+ gf_log(this->name, GF_LOG_ERROR, "no stripe context");
+ op_errno = EINVAL;
+ goto err;
+ }
+ if (!fctx->stripe_count) {
+ gf_log(this->name, GF_LOG_ERROR, "no stripe count");
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->fctx = fctx;
+ eof_idx = (offset / fctx->stripe_size) % fctx->stripe_count;
+
+ for (i = 0; i < fctx->stripe_count; i++) {
+ if (!fctx->xl_array[i]) {
+ gf_log(this->name, GF_LOG_ERROR, "no xlator at index "
+ "%d", i);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (fctx->stripe_coalesce) {
+ if (i < eof_idx)
+ tmp_offset = roof(offset, fctx->stripe_size *
+ fctx->stripe_count);
+ else if (i > eof_idx)
+ tmp_offset = floor(offset, fctx->stripe_size *
+ fctx->stripe_count);
+ else
+ tmp_offset = offset;
+
+ dest_offset = coalesced_offset(tmp_offset,
+ fctx->stripe_size, fctx->stripe_count);
+ } else {
+ dest_offset = offset;
+ }
+
+ STACK_WIND(frame, stripe_truncate_cbk, fctx->xl_array[i],
+ fctx->xl_array[i]->fops->ftruncate, fd, dest_offset,
+ NULL);
+ }
return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
+err:
+ STRIPE_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
-/**
- * stripe_fsyncdir -
- */
int32_t
-stripe_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
+stripe_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ int32_t callcnt = 0;
+ stripe_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s returned %s",
+ prev->this->name, strerror (op_errno));
+ local->op_errno = op_errno;
+ if ((op_errno != ENOENT) ||
+ (prev->this == FIRST_CHILD (this)))
+ local->failed = 1;
+ }
+ if (op_ret >= 0)
+ local->op_ret = op_ret;
+ }
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ if (local->failed)
+ local->op_ret = -1;
+
+ STRIPE_STACK_UNWIND (fsyncdir, frame, local->op_ret,
+ local->op_errno, NULL);
+ }
+out:
+ return 0;
+}
+
+int32_t
+stripe_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict_t *xdata)
{
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
@@ -2669,7 +3154,7 @@ stripe_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
trav = this->children;
/* Initialization */
- local = CALLOC (1, sizeof (stripe_local_t));
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
@@ -2679,29 +3164,114 @@ stripe_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
local->call_count = priv->child_count;
while (trav) {
- STACK_WIND (frame, stripe_stack_unwind_cbk, trav->xlator,
- trav->xlator->fops->fsyncdir, fd, flags);
+ STACK_WIND (frame, stripe_fsyncdir_cbk, trav->xlator,
+ trav->xlator->fops->fsyncdir, fd, flags, NULL);
trav = trav->next;
}
return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
+err:
+ STRIPE_STACK_UNWIND (fsyncdir, frame, -1, op_errno, NULL);
return 0;
}
-/**
- * stripe_single_readv_cbk - This function is used as return fn, when the
- * file name doesn't match the pattern specified for striping.
- */
int32_t
-stripe_single_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iovec *vector, int32_t count,
- struct stat *stbuf, struct iobref *iobref)
+stripe_readv_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf, iobref);
+ int32_t i = 0;
+ int32_t callcnt = 0;
+ int32_t count = 0;
+ stripe_local_t *local = NULL;
+ struct iovec *vec = NULL;
+ struct iatt tmp_stbuf = {0,};
+ struct iobref *tmp_iobref = NULL;
+ struct iobuf *iobuf = NULL;
+ call_frame_t *prev = NULL;
+
+ if (!this || !frame || !frame->local) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ local = frame->local;
+ prev = cookie;
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+ if (op_ret != -1) {
+ correct_file_size(buf, local->fctx, prev);
+ if (local->stbuf_size < buf->ia_size)
+ local->stbuf_size = buf->ia_size;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ op_ret = 0;
+
+ /* Keep extra space for filling in '\0's */
+ vec = GF_CALLOC ((local->count * 2), sizeof (struct iovec),
+ gf_stripe_mt_iovec);
+ if (!vec) {
+ op_ret = -1;
+ goto done;
+ }
+
+ for (i = 0; i < local->wind_count; i++) {
+ if (local->replies[i].op_ret) {
+ memcpy ((vec + count), local->replies[i].vector,
+ (local->replies[i].count * sizeof (struct iovec)));
+ count += local->replies[i].count;
+ op_ret += local->replies[i].op_ret;
+ }
+ if ((local->replies[i].op_ret <
+ local->replies[i].requested_size) &&
+ (local->stbuf_size > (local->offset + op_ret))) {
+ /* Fill in 0s here */
+ vec[count].iov_len =
+ (local->replies[i].requested_size -
+ local->replies[i].op_ret);
+ iobuf = iobuf_get2 (this->ctx->iobuf_pool,
+ vec[count].iov_len);
+ if (!iobuf) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out of memory.");
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto done;
+ }
+ memset (iobuf->ptr, 0, vec[count].iov_len);
+ vec[count].iov_base = iobuf->ptr;
+
+ iobref_add (local->iobref, iobuf);
+ iobuf_unref(iobuf);
+
+ op_ret += vec[count].iov_len;
+ count++;
+ }
+ GF_FREE (local->replies[i].vector);
+ }
+
+ /* FIXME: notice that st_ino, and st_dev (gen) will be
+ * different than what inode will have. Make sure this doesn't
+ * cause any bugs at higher levels */
+ memcpy (&tmp_stbuf, &local->replies[0].stbuf,
+ sizeof (struct iatt));
+ tmp_stbuf.ia_size = local->stbuf_size;
+
+ done:
+ GF_FREE (local->replies);
+ tmp_iobref = local->iobref;
+ STRIPE_STACK_UNWIND (readv, frame, op_ret, op_errno, vec,
+ count, &tmp_stbuf, tmp_iobref, NULL);
+
+ iobref_unref (tmp_iobref);
+ GF_FREE (vec);
+ }
+out:
return 0;
}
@@ -2710,116 +3280,155 @@ stripe_single_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
* to above layer after putting it in a single vector.
*/
int32_t
-stripe_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+stripe_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct stat *stbuf, struct iobref *iobref)
+ int32_t count, struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
{
int32_t index = 0;
int32_t callcnt = 0;
- call_frame_t *main_frame = NULL;
- stripe_local_t *main_local = NULL;
- stripe_local_t *local = frame->local;
+ int32_t final_count = 0;
+ int32_t need_to_check_proper_size = 0;
+ call_frame_t *mframe = NULL;
+ stripe_local_t *mlocal = NULL;
+ stripe_local_t *local = NULL;
+ struct iovec *final_vec = NULL;
+ struct iatt tmp_stbuf = {0,};
+ struct iatt *tmp_stbuf_p = NULL; //need it for a warning
+ struct iobref *tmp_iobref = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
+ call_frame_t *prev = NULL;
- index = local->node_index;
- main_frame = local->orig_frame;
- main_local = main_frame->local;
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto end;
+ }
+
+ local = frame->local;
+ index = local->node_index;
+ prev = cookie;
+ mframe = local->orig_frame;
+ if (!mframe)
+ goto out;
- LOCK (&main_frame->lock);
+ mlocal = mframe->local;
+ if (!mlocal)
+ goto out;
+
+ fctx = mlocal->fctx;
+
+ LOCK (&mframe->lock);
{
- main_local->replies[index].op_ret = op_ret;
- main_local->replies[index].op_errno = op_errno;
+ mlocal->replies[index].op_ret = op_ret;
+ mlocal->replies[index].op_errno = op_errno;
+ mlocal->replies[index].requested_size = local->readv_size;
if (op_ret >= 0) {
- main_local->replies[index].stbuf = *stbuf;
- main_local->replies[index].count = count;
- main_local->replies[index].vector =
- iov_dup (vector, count);
+ mlocal->replies[index].stbuf = *stbuf;
+ mlocal->replies[index].count = count;
+ mlocal->replies[index].vector = iov_dup (vector, count);
+
+ correct_file_size(stbuf, fctx, prev);
+
+ if (local->stbuf_size < stbuf->ia_size)
+ local->stbuf_size = stbuf->ia_size;
+ local->stbuf_blocks += stbuf->ia_blocks;
- if (!main_local->iobref)
- main_local->iobref = iobref_new ();
- iobref_merge (main_local->iobref, iobref);
+ if (!mlocal->iobref)
+ mlocal->iobref = iobref_new ();
+ iobref_merge (mlocal->iobref, iobref);
}
- callcnt = ++main_local->call_count;
+ callcnt = ++mlocal->call_count;
}
- UNLOCK(&main_frame->lock);
-
- if (callcnt == main_local->wind_count) {
- int32_t final_count = 0;
- struct iovec *final_vec = NULL;
- struct stat tmp_stbuf = {0,};
- struct iobref *iobref = NULL;
+ UNLOCK(&mframe->lock);
+ if (callcnt == mlocal->wind_count) {
op_ret = 0;
- memcpy (&tmp_stbuf, &main_local->replies[0].stbuf,
- sizeof (struct stat));
- for (index=0; index < main_local->wind_count; index++) {
- /* TODO: check whether each stripe returned 'expected'
- * number of bytes
- */
- if (main_local->replies[index].op_ret == -1) {
+
+ for (index=0; index < mlocal->wind_count; index++) {
+ /* check whether each stripe returned
+ * 'expected' number of bytes */
+ if (mlocal->replies[index].op_ret == -1) {
op_ret = -1;
- op_errno = main_local->replies[index].op_errno;
+ op_errno = mlocal->replies[index].op_errno;
break;
}
- op_ret += main_local->replies[index].op_ret;
- final_count += main_local->replies[index].count;
- /* TODO: Do I need to send anything more in stbuf? */
- if (tmp_stbuf.st_size <
- main_local->replies[index].stbuf.st_size) {
- tmp_stbuf.st_size =
- main_local->replies[index].stbuf.st_size;
+ /* TODO: handle the 'holes' within the read range
+ properly */
+ if (mlocal->replies[index].op_ret <
+ mlocal->replies[index].requested_size) {
+ need_to_check_proper_size = 1;
}
+
+ op_ret += mlocal->replies[index].op_ret;
+ mlocal->count += mlocal->replies[index].count;
}
- if (op_ret != -1) {
- final_vec = CALLOC (final_count,
- sizeof (struct iovec));
- if (!final_vec) {
- op_ret = -1;
- final_count = 0;
- goto done;
- }
+ if (op_ret == -1)
+ goto done;
+ if (need_to_check_proper_size)
+ goto check_size;
- final_count = 0;
+ final_vec = GF_CALLOC (mlocal->count, sizeof (struct iovec),
+ gf_stripe_mt_iovec);
- for (index=0;
- index < main_local->wind_count; index++) {
- memcpy (final_vec + final_count,
- main_local->replies[index].vector,
- (main_local->replies[index].count *
- sizeof (struct iovec)));
- final_count +=
- main_local->replies[index].count;
+ if (!final_vec) {
+ op_ret = -1;
+ goto done;
+ }
- free (main_local->replies[index].vector);
- }
- } else {
- final_vec = NULL;
- final_count = 0;
+ for (index = 0; index < mlocal->wind_count; index++) {
+ memcpy ((final_vec + final_count),
+ mlocal->replies[index].vector,
+ (mlocal->replies[index].count *
+ sizeof (struct iovec)));
+ final_count += mlocal->replies[index].count;
+ GF_FREE (mlocal->replies[index].vector);
}
+ /* FIXME: notice that st_ino, and st_dev (gen) will be
+ * different than what inode will have. Make sure this doesn't
+ * cause any bugs at higher levels */
+ memcpy (&tmp_stbuf, &mlocal->replies[0].stbuf,
+ sizeof (struct iatt));
+ tmp_stbuf.ia_size = local->stbuf_size;
+ tmp_stbuf.ia_blocks = local->stbuf_blocks;
+
done:
/* */
- FREE (main_local->replies);
- iobref = main_local->iobref;
- STACK_UNWIND (main_frame, op_ret, op_errno,
- final_vec, final_count, &tmp_stbuf, iobref);
+ GF_FREE (mlocal->replies);
+ tmp_iobref = mlocal->iobref;
+ /* work around for nfs truncated read. Bug 3774 */
+ tmp_stbuf_p = &tmp_stbuf;
+ WIPE (tmp_stbuf_p);
+ STRIPE_STACK_UNWIND (readv, mframe, op_ret, op_errno, final_vec,
+ final_count, &tmp_stbuf, tmp_iobref, NULL);
+
+ iobref_unref (tmp_iobref);
+ GF_FREE (final_vec);
+ }
+
+ goto out;
+
+check_size:
+ mlocal->call_count = fctx->stripe_count;
- iobref_unref (iobref);
- if (final_vec)
- FREE (final_vec);
+ for (index = 0; index < fctx->stripe_count; index++) {
+ STACK_WIND (mframe, stripe_readv_fstat_cbk,
+ (fctx->xl_array[index]),
+ (fctx->xl_array[index])->fops->fstat,
+ mlocal->fd, NULL);
}
- STACK_DESTROY (frame->root);
+out:
+ STRIPE_STACK_DESTROY (frame);
+end:
return 0;
}
-/**
- * stripe_readv -
- */
+
int32_t
stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t offset)
+ size_t size, off_t offset, uint32_t flags, dict_t *xdata)
{
- int32_t op_errno = 1;
+ int32_t op_errno = EINVAL;
int32_t idx = 0;
int32_t index = 0;
int32_t num_stripe = 0;
@@ -2830,17 +3439,18 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
uint64_t stripe_size = 0;
off_t rounded_start = 0;
off_t frame_offset = offset;
+ off_t dest_offset = 0;
stripe_local_t *local = NULL;
call_frame_t *rframe = NULL;
stripe_local_t *rlocal = NULL;
- xlator_list_t *trav = NULL;
- stripe_private_t *priv = NULL;
stripe_fd_ctx_t *fctx = NULL;
- trav = this->children;
- priv = this->private;
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (fd->inode, err);
- fd_ctx_get (fd, this, &tmp_fctx);
+ inode_ctx_get (fd->inode, this, &tmp_fctx);
if (!tmp_fctx) {
op_errno = EBADFD;
goto err;
@@ -2848,122 +3458,183 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
stripe_size = fctx->stripe_size;
- /* The file is stripe across the child nodes. Send the read request
- * to the child nodes appropriately after checking which region of
+ STRIPE_VALIDATE_FCTX (fctx, err);
+
+ if (!stripe_size) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Wrong stripe size for the file");
+ goto err;
+ }
+ /* The file is stripe across the child nodes. Send the read request
+ * to the child nodes appropriately after checking which region of
* the file is in which child node. Always '0-<stripe_size>' part of
* the file resides in the first child.
*/
rounded_start = floor (offset, stripe_size);
rounded_end = roof (offset+size, stripe_size);
- num_stripe = (rounded_end - rounded_start) / stripe_size;
-
- local = CALLOC (1, sizeof (stripe_local_t));
+ num_stripe = (rounded_end- rounded_start)/stripe_size;
+
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
- local->wind_count = num_stripe;
frame->local = local;
-
+
/* This is where all the vectors should be copied. */
- local->replies = CALLOC (num_stripe, sizeof (struct readv_replies));
+ local->replies = GF_CALLOC (num_stripe, sizeof (struct stripe_replies),
+ gf_stripe_mt_stripe_replies);
if (!local->replies) {
op_errno = ENOMEM;
goto err;
}
-
+
off_index = (offset / stripe_size) % fctx->stripe_count;
-
+ local->wind_count = num_stripe;
+ local->readv_size = size;
+ local->offset = offset;
+ local->fd = fd_ref (fd);
+ local->fctx = fctx;
+
for (index = off_index; index < (num_stripe + off_index); index++) {
rframe = copy_frame (frame);
- rlocal = CALLOC (1, sizeof (stripe_local_t));
+ rlocal = mem_get0 (this->local_pool);
if (!rlocal) {
op_errno = ENOMEM;
goto err;
}
-
+
frame_size = min (roof (frame_offset+1, stripe_size),
(offset + size)) - frame_offset;
-
+
rlocal->node_index = index - off_index;
rlocal->orig_frame = frame;
+ rlocal->readv_size = frame_size;
rframe->local = rlocal;
idx = (index % fctx->stripe_count);
+
+ if (fctx->stripe_coalesce)
+ dest_offset = coalesced_offset(frame_offset,
+ stripe_size, fctx->stripe_count);
+ else
+ dest_offset = frame_offset;
+
STACK_WIND (rframe, stripe_readv_cbk, fctx->xl_array[idx],
fctx->xl_array[idx]->fops->readv,
- fd, frame_size, frame_offset);
-
+ fd, frame_size, dest_offset, flags, xdata);
+
frame_offset += frame_size;
}
return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
+err:
+ if (rframe)
+ STRIPE_STACK_DESTROY (rframe);
+
+ STRIPE_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL);
return 0;
}
-/**
- * stripe_writev_cbk -
- */
int32_t
stripe_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *stbuf)
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
+ stripe_local_t *mlocal = NULL;
+ call_frame_t *prev = NULL;
+ call_frame_t *mframe = NULL;
+ struct stripe_replies *reply = NULL;
+ int32_t i = 0;
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
local = frame->local;
+ mframe = local->orig_frame;
+ mlocal = mframe->local;
LOCK(&frame->lock);
{
- callcnt = ++local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s returned error %s",
- ((call_frame_t *)cookie)->this->name,
- strerror (op_errno));
- local->op_errno = op_errno;
- local->op_ret = -1;
- }
+ callcnt = ++mlocal->call_count;
+
+ mlocal->replies[local->node_index].op_ret = op_ret;
+ mlocal->replies[local->node_index].op_errno = op_errno;
+
if (op_ret >= 0) {
- local->op_ret += op_ret;
- local->stbuf = *stbuf;
+ mlocal->post_buf = *postbuf;
+ mlocal->pre_buf = *prebuf;
+
+ mlocal->prebuf_blocks += prebuf->ia_blocks;
+ mlocal->postbuf_blocks += postbuf->ia_blocks;
+
+ correct_file_size(prebuf, mlocal->fctx, prev);
+ correct_file_size(postbuf, mlocal->fctx, prev);
+
+ if (mlocal->prebuf_size < prebuf->ia_size)
+ mlocal->prebuf_size = prebuf->ia_size;
+ if (mlocal->postbuf_size < postbuf->ia_size)
+ mlocal->postbuf_size = postbuf->ia_size;
}
}
UNLOCK (&frame->lock);
- if ((callcnt == local->wind_count) && local->unwind) {
- STACK_UNWIND (frame, local->op_ret,
- local->op_errno, &local->stbuf);
+ if ((callcnt == mlocal->wind_count) && mlocal->unwind) {
+ mlocal->pre_buf.ia_size = mlocal->prebuf_size;
+ mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks;
+ mlocal->post_buf.ia_size = mlocal->postbuf_size;
+ mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks;
+
+ /*
+ * Only return the number of consecutively written bytes up until
+ * the first error. Only return an error if it occurs first.
+ *
+ * When a short write occurs, the application should retry at the
+ * appropriate offset, at which point we'll potentially pass back
+ * the error.
+ */
+ for (i = 0, reply = mlocal->replies; i < mlocal->wind_count;
+ i++, reply++) {
+ if (reply->op_ret == -1) {
+ gf_log(this->name, GF_LOG_DEBUG, "reply %d "
+ "returned error %s", i,
+ strerror(reply->op_errno));
+ if (!mlocal->op_ret) {
+ mlocal->op_ret = -1;
+ mlocal->op_errno = reply->op_errno;
+ }
+ break;
+ }
+
+ mlocal->op_ret += reply->op_ret;
+
+ if (reply->op_ret < reply->requested_size)
+ break;
+ }
+
+ GF_FREE(mlocal->replies);
+
+ STRIPE_STACK_UNWIND (writev, mframe, mlocal->op_ret,
+ mlocal->op_errno, &mlocal->pre_buf,
+ &mlocal->post_buf, NULL);
}
+out:
+ STRIPE_STACK_DESTROY(frame);
return 0;
}
-
-/**
- * stripe_single_writev_cbk -
- */
-int32_t
-stripe_single_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *stbuf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, stbuf);
- return 0;
-}
-/**
- * stripe_writev -
- */
int32_t
stripe_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iovec *vector, int32_t count, off_t offset,
- struct iobref *iobref)
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
- struct iovec *tmp_vec = vector;
- stripe_private_t *priv = NULL;
+ struct iovec *tmp_vec = NULL;
stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
stripe_fd_ctx_t *fctx = NULL;
int32_t op_errno = 1;
int32_t idx = 0;
@@ -2974,10 +3645,19 @@ stripe_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
off_t fill_size = 0;
uint64_t stripe_size = 0;
uint64_t tmp_fctx = 0;
+ off_t dest_offset = 0;
+ off_t rounded_start = 0;
+ off_t rounded_end = 0;
+ int32_t total_chunks = 0;
+ call_frame_t *wframe = NULL;
+ stripe_local_t *wlocal = NULL;
- priv = this->private;
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (fd->inode, err);
- fd_ctx_get (fd, this, &tmp_fctx);
+ inode_ctx_get (fd->inode, this, &tmp_fctx);
if (!tmp_fctx) {
op_errno = EINVAL;
goto err;
@@ -2985,29 +3665,57 @@ stripe_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
stripe_size = fctx->stripe_size;
+ STRIPE_VALIDATE_FCTX (fctx, err);
+
/* File has to be stripped across the child nodes */
for (idx = 0; idx< count; idx ++) {
- total_size += tmp_vec[idx].iov_len;
+ total_size += vector[idx].iov_len;
}
remaining_size = total_size;
- local = CALLOC (1, sizeof (stripe_local_t));
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
frame->local = local;
local->stripe_size = stripe_size;
+ local->fctx = fctx;
+
+ if (!stripe_size) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Wrong stripe size for the file");
+ op_errno = EINVAL;
+ goto err;
+ }
+ rounded_start = floor(offset, stripe_size);
+ rounded_end = roof(offset + total_size, stripe_size);
+ total_chunks = (rounded_end - rounded_start) / stripe_size;
+ local->replies = GF_CALLOC(total_chunks, sizeof(struct stripe_replies),
+ gf_stripe_mt_stripe_replies);
+ if (!local->replies) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ total_chunks = 0;
while (1) {
- /* Send striped chunk of the vector to child
+ wframe = copy_frame(frame);
+ wlocal = mem_get0(this->local_pool);
+ if (!wlocal) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ wlocal->orig_frame = frame;
+ wframe->local = wlocal;
+
+ /* Send striped chunk of the vector to child
nodes appropriately. */
- trav = this->children;
-
- idx = (((offset + offset_offset) /
+ idx = (((offset + offset_offset) /
local->stripe_size) % fctx->stripe_count);
- fill_size = (local->stripe_size -
+ fill_size = (local->stripe_size -
((offset + offset_offset) % local->stripe_size));
if (fill_size > remaining_size)
fill_size = remaining_size;
@@ -3016,158 +3724,619 @@ stripe_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
tmp_count = iov_subset (vector, count, offset_offset,
offset_offset + fill_size, NULL);
- tmp_vec = CALLOC (tmp_count, sizeof (struct iovec));
+ tmp_vec = GF_CALLOC (tmp_count, sizeof (struct iovec),
+ gf_stripe_mt_iovec);
if (!tmp_vec) {
op_errno = ENOMEM;
goto err;
}
tmp_count = iov_subset (vector, count, offset_offset,
offset_offset + fill_size, tmp_vec);
-
+
local->wind_count++;
if (remaining_size == 0)
local->unwind = 1;
- STACK_WIND(frame, stripe_writev_cbk, fctx->xl_array[idx],
- fctx->xl_array[idx]->fops->writev, fd, tmp_vec,
- tmp_count, offset + offset_offset, iobref);
- FREE (tmp_vec);
+ /*
+ * Store off the request index (with respect to the chunk of the
+ * initial offset) and the size of the request. This is required
+ * in the callback to calculate an appropriate return value in
+ * the event of a write failure in one or more requests.
+ */
+ wlocal->node_index = total_chunks;
+ local->replies[total_chunks].requested_size = fill_size;
+
+ dest_offset = offset + offset_offset;
+ if (fctx->stripe_coalesce)
+ dest_offset = coalesced_offset(dest_offset,
+ local->stripe_size, fctx->stripe_count);
+
+ STACK_WIND (wframe, stripe_writev_cbk, fctx->xl_array[idx],
+ fctx->xl_array[idx]->fops->writev, fd, tmp_vec,
+ tmp_count, dest_offset, flags, iobref,
+ xdata);
+
+ GF_FREE (tmp_vec);
offset_offset += fill_size;
+ total_chunks++;
if (remaining_size == 0)
break;
}
return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
+err:
+ if (wframe)
+ STRIPE_STACK_DESTROY(wframe);
+
+ STRIPE_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
+int32_t
+stripe_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ int32_t callcnt = 0;
+ stripe_local_t *local = NULL;
+ stripe_local_t *mlocal = NULL;
+ call_frame_t *prev = NULL;
+ call_frame_t *mframe = NULL;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
+ local = frame->local;
+ mframe = local->orig_frame;
+ mlocal = mframe->local;
+
+ LOCK(&frame->lock);
+ {
+ callcnt = ++mlocal->call_count;
+
+ if (op_ret == 0) {
+ mlocal->post_buf = *postbuf;
+ mlocal->pre_buf = *prebuf;
+
+ mlocal->prebuf_blocks += prebuf->ia_blocks;
+ mlocal->postbuf_blocks += postbuf->ia_blocks;
+
+ correct_file_size(prebuf, mlocal->fctx, prev);
+ correct_file_size(postbuf, mlocal->fctx, prev);
+
+ if (mlocal->prebuf_size < prebuf->ia_size)
+ mlocal->prebuf_size = prebuf->ia_size;
+ if (mlocal->postbuf_size < postbuf->ia_size)
+ mlocal->postbuf_size = postbuf->ia_size;
+ }
+
+ /* return the first failure */
+ if (mlocal->op_ret == 0) {
+ mlocal->op_ret = op_ret;
+ mlocal->op_errno = op_errno;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if ((callcnt == mlocal->wind_count) && mlocal->unwind) {
+ mlocal->pre_buf.ia_size = mlocal->prebuf_size;
+ mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks;
+ mlocal->post_buf.ia_size = mlocal->postbuf_size;
+ mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks;
+
+ STRIPE_STACK_UNWIND (fallocate, mframe, mlocal->op_ret,
+ mlocal->op_errno, &mlocal->pre_buf,
+ &mlocal->post_buf, NULL);
+ }
+out:
+ STRIPE_STACK_DESTROY(frame);
+ return 0;
+}
+
+int32_t
+stripe_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ stripe_local_t *local = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
+ int32_t op_errno = 1;
+ int32_t idx = 0;
+ int32_t offset_offset = 0;
+ int32_t remaining_size = 0;
+ off_t fill_size = 0;
+ uint64_t stripe_size = 0;
+ uint64_t tmp_fctx = 0;
+ off_t dest_offset = 0;
+ call_frame_t *fframe = NULL;
+ stripe_local_t *flocal = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (fd->inode, err);
+
+ inode_ctx_get (fd->inode, this, &tmp_fctx);
+ if (!tmp_fctx) {
+ op_errno = EINVAL;
+ goto err;
+ }
+ fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
+ stripe_size = fctx->stripe_size;
+
+ STRIPE_VALIDATE_FCTX (fctx, err);
+
+ remaining_size = len;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ frame->local = local;
+ local->stripe_size = stripe_size;
+ local->fctx = fctx;
+
+ if (!stripe_size) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Wrong stripe size for the file");
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ while (1) {
+ fframe = copy_frame(frame);
+ flocal = mem_get0(this->local_pool);
+ if (!flocal) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ flocal->orig_frame = frame;
+ fframe->local = flocal;
+
+ /* send fallocate request to the associated child node */
+ idx = (((offset + offset_offset) /
+ local->stripe_size) % fctx->stripe_count);
+
+ fill_size = (local->stripe_size -
+ ((offset + offset_offset) % local->stripe_size));
+ if (fill_size > remaining_size)
+ fill_size = remaining_size;
+
+ remaining_size -= fill_size;
+
+ local->wind_count++;
+ if (remaining_size == 0)
+ local->unwind = 1;
+
+ dest_offset = offset + offset_offset;
+ if (fctx->stripe_coalesce)
+ dest_offset = coalesced_offset(dest_offset,
+ local->stripe_size, fctx->stripe_count);
+
+ /*
+ * TODO: Create a separate handler for coalesce mode that sends a
+ * single fallocate per-child (since the ranges are linear).
+ */
+ STACK_WIND(fframe, stripe_fallocate_cbk, fctx->xl_array[idx],
+ fctx->xl_array[idx]->fops->fallocate, fd, mode,
+ dest_offset, fill_size, xdata);
+
+ offset_offset += fill_size;
+ if (remaining_size == 0)
+ break;
+ }
+
+ return 0;
+err:
+ if (fframe)
+ STRIPE_STACK_DESTROY(fframe);
+
+ STRIPE_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
-/* Management operations */
-/**
- * stripe_stats_cbk - Add all the fields received from different clients.
- * Once all the clients return, send stats to above layer.
- *
- */
int32_t
-stripe_stats_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct xlator_stats *stats)
+stripe_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
int32_t callcnt = 0;
stripe_local_t *local = NULL;
+ stripe_local_t *mlocal = NULL;
+ call_frame_t *prev = NULL;
+ call_frame_t *mframe = NULL;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+ prev = cookie;
local = frame->local;
+ mframe = local->orig_frame;
+ mlocal = mframe->local;
LOCK(&frame->lock);
{
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s returned error %s",
- ((call_frame_t *)cookie)->this->name,
- strerror (op_errno));
- local->op_ret = -1;
- local->op_errno = op_errno;
- }
+ callcnt = ++mlocal->call_count;
+
if (op_ret == 0) {
- if (local->op_ret == -2) {
- /* This is to make sure this is the
- first time */
- local->stats = *stats;
- local->op_ret = 0;
- } else {
- local->stats.nr_files += stats->nr_files;
- local->stats.free_disk += stats->free_disk;
- local->stats.disk_usage += stats->disk_usage;
- local->stats.nr_clients += stats->nr_clients;
- }
+ mlocal->post_buf = *postbuf;
+ mlocal->pre_buf = *prebuf;
+
+ mlocal->prebuf_blocks += prebuf->ia_blocks;
+ mlocal->postbuf_blocks += postbuf->ia_blocks;
+
+ correct_file_size(prebuf, mlocal->fctx, prev);
+ correct_file_size(postbuf, mlocal->fctx, prev);
+
+ if (mlocal->prebuf_size < prebuf->ia_size)
+ mlocal->prebuf_size = prebuf->ia_size;
+ if (mlocal->postbuf_size < postbuf->ia_size)
+ mlocal->postbuf_size = postbuf->ia_size;
}
+
+ /* return the first failure */
+ if (mlocal->op_ret == 0) {
+ mlocal->op_ret = op_ret;
+ mlocal->op_errno = op_errno;
+ }
}
UNLOCK (&frame->lock);
- if (!callcnt) {
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->stats);
- }
+ if ((callcnt == mlocal->wind_count) && mlocal->unwind) {
+ mlocal->pre_buf.ia_size = mlocal->prebuf_size;
+ mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks;
+ mlocal->post_buf.ia_size = mlocal->postbuf_size;
+ mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks;
+ STRIPE_STACK_UNWIND (discard, mframe, mlocal->op_ret,
+ mlocal->op_errno, &mlocal->pre_buf,
+ &mlocal->post_buf, NULL);
+ }
+out:
+ STRIPE_STACK_DESTROY(frame);
return 0;
}
-/**
- * stripe_stats -
- */
int32_t
-stripe_stats (call_frame_t *frame, xlator_t *this, int32_t flags)
+stripe_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
{
stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
- stripe_private_t *priv = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
int32_t op_errno = 1;
+ int32_t idx = 0;
+ int32_t offset_offset = 0;
+ int32_t remaining_size = 0;
+ off_t fill_size = 0;
+ uint64_t stripe_size = 0;
+ uint64_t tmp_fctx = 0;
+ off_t dest_offset = 0;
+ call_frame_t *fframe = NULL;
+ stripe_local_t *flocal = NULL;
- priv = this->private;
- trav = this->children;
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (fd->inode, err);
+
+ inode_ctx_get (fd->inode, this, &tmp_fctx);
+ if (!tmp_fctx) {
+ op_errno = EINVAL;
+ goto err;
+ }
+ fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
+ stripe_size = fctx->stripe_size;
+
+ STRIPE_VALIDATE_FCTX (fctx, err);
+
+ remaining_size = len;
- local = CALLOC (1, sizeof (stripe_local_t));
+ local = mem_get0 (this->local_pool);
if (!local) {
op_errno = ENOMEM;
goto err;
}
frame->local = local;
- local->op_ret = -2; /* to be used as a flag in _cbk */
- local->call_count = priv->child_count;
+ local->stripe_size = stripe_size;
+ local->fctx = fctx;
- while (trav) {
- STACK_WIND (frame, stripe_stats_cbk, trav->xlator,
- trav->xlator->mops->stats, flags);
- trav = trav->next;
+ if (!stripe_size) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Wrong stripe size for the file");
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ while (1) {
+ fframe = copy_frame(frame);
+ flocal = mem_get0(this->local_pool);
+ if (!flocal) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ flocal->orig_frame = frame;
+ fframe->local = flocal;
+
+ /* send discard request to the associated child node */
+ idx = (((offset + offset_offset) /
+ local->stripe_size) % fctx->stripe_count);
+
+ fill_size = (local->stripe_size -
+ ((offset + offset_offset) % local->stripe_size));
+ if (fill_size > remaining_size)
+ fill_size = remaining_size;
+
+ remaining_size -= fill_size;
+
+ local->wind_count++;
+ if (remaining_size == 0)
+ local->unwind = 1;
+
+ dest_offset = offset + offset_offset;
+ if (fctx->stripe_coalesce)
+ dest_offset = coalesced_offset(dest_offset,
+ local->stripe_size, fctx->stripe_count);
+
+ /*
+ * TODO: Create a separate handler for coalesce mode that sends a
+ * single discard per-child (since the ranges are linear).
+ */
+ STACK_WIND(fframe, stripe_discard_cbk, fctx->xl_array[idx],
+ fctx->xl_array[idx]->fops->discard, fd, dest_offset,
+ fill_size, xdata);
+
+ offset_offset += fill_size;
+ if (remaining_size == 0)
+ break;
}
+
return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
+err:
+ if (fframe)
+ STRIPE_STACK_DESTROY(fframe);
+
+ STRIPE_STACK_UNWIND (discard, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+stripe_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ int32_t callcnt = 0;
+ stripe_local_t *local = NULL;
+ stripe_local_t *mlocal = NULL;
+ call_frame_t *prev = NULL;
+ call_frame_t *mframe = NULL;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+
+ prev = cookie;
+ local = frame->local;
+ mframe = local->orig_frame;
+ mlocal = mframe->local;
+
+ LOCK(&frame->lock);
+ {
+ callcnt = ++mlocal->call_count;
+
+ if (op_ret == 0) {
+ mlocal->post_buf = *postbuf;
+ mlocal->pre_buf = *prebuf;
+
+ mlocal->prebuf_blocks += prebuf->ia_blocks;
+ mlocal->postbuf_blocks += postbuf->ia_blocks;
+
+ correct_file_size(prebuf, mlocal->fctx, prev);
+ correct_file_size(postbuf, mlocal->fctx, prev);
+
+ if (mlocal->prebuf_size < prebuf->ia_size)
+ mlocal->prebuf_size = prebuf->ia_size;
+ if (mlocal->postbuf_size < postbuf->ia_size)
+ mlocal->postbuf_size = postbuf->ia_size;
+ }
+
+ /* return the first failure */
+ if (mlocal->op_ret == 0) {
+ mlocal->op_ret = op_ret;
+ mlocal->op_errno = op_errno;
+ }
+ }
+ UNLOCK (&frame->lock);
+
+ if ((callcnt == mlocal->wind_count) && mlocal->unwind) {
+ mlocal->pre_buf.ia_size = mlocal->prebuf_size;
+ mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks;
+ mlocal->post_buf.ia_size = mlocal->postbuf_size;
+ mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks;
+
+ STRIPE_STACK_UNWIND (zerofill, mframe, mlocal->op_ret,
+ mlocal->op_errno, &mlocal->pre_buf,
+ &mlocal->post_buf, NULL);
+ }
+out:
+ STRIPE_STACK_DESTROY(frame);
+ return 0;
+}
+
+int32_t
+stripe_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ stripe_local_t *local = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
+ int32_t op_errno = 1;
+ int32_t idx = 0;
+ int32_t offset_offset = 0;
+ int32_t remaining_size = 0;
+ off_t fill_size = 0;
+ uint64_t stripe_size = 0;
+ uint64_t tmp_fctx = 0;
+ off_t dest_offset = 0;
+ call_frame_t *fframe = NULL;
+ stripe_local_t *flocal = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO (fd->inode, err);
+
+ inode_ctx_get (fd->inode, this, &tmp_fctx);
+ if (!tmp_fctx) {
+ op_errno = EINVAL;
+ goto err;
+ }
+ fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
+ stripe_size = fctx->stripe_size;
+
+ STRIPE_VALIDATE_FCTX (fctx, err);
+
+ remaining_size = len;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
+ stripe_size = fctx->stripe_size;
+
+ STRIPE_VALIDATE_FCTX (fctx, err);
+
+ remaining_size = len;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
+ stripe_size = fctx->stripe_size;
+
+ STRIPE_VALIDATE_FCTX (fctx, err);
+
+ remaining_size = len;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ frame->local = local;
+ local->stripe_size = stripe_size;
+ local->fctx = fctx;
+
+ if (!stripe_size) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Wrong stripe size for the file");
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ while (1) {
+ fframe = copy_frame(frame);
+ flocal = mem_get0(this->local_pool);
+ if (!flocal) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ flocal->orig_frame = frame;
+ fframe->local = flocal;
+
+ idx = (((offset + offset_offset) /
+ local->stripe_size) % fctx->stripe_count);
+
+ fill_size = (local->stripe_size -
+ ((offset + offset_offset) % local->stripe_size));
+ if (fill_size > remaining_size)
+ fill_size = remaining_size;
+
+ remaining_size -= fill_size;
+
+ local->wind_count++;
+ if (remaining_size == 0)
+ local->unwind = 1;
+
+ dest_offset = offset + offset_offset;
+ if (fctx->stripe_coalesce)
+ dest_offset = coalesced_offset(dest_offset,
+ local->stripe_size,
+ fctx->stripe_count);
+
+ STACK_WIND(fframe, stripe_zerofill_cbk, fctx->xl_array[idx],
+ fctx->xl_array[idx]->fops->zerofill, fd,
+ dest_offset, fill_size, xdata);
+ offset_offset += fill_size;
+ if (remaining_size == 0)
+ break;
+ }
+
+ return 0;
+err:
+ if (fframe)
+ STRIPE_STACK_DESTROY(fframe);
+
+ STRIPE_STACK_UNWIND (zerofill, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
int32_t
stripe_release (xlator_t *this, fd_t *fd)
{
+ return 0;
+}
+
+int
+stripe_forget (xlator_t *this, inode_t *inode)
+{
uint64_t tmp_fctx = 0;
stripe_fd_ctx_t *fctx = NULL;
- fd_ctx_del (fd, this, &tmp_fctx);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (inode, err);
+
+ (void) inode_ctx_del (inode, this, &tmp_fctx);
if (!tmp_fctx) {
- goto out;
+ goto err;
}
-
+
fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
if (!fctx->static_array)
- FREE (fctx->xl_array);
-
- FREE (fctx);
-
- out:
- return 0;
+ GF_FREE (fctx->xl_array);
+
+ GF_FREE (fctx);
+err:
+ return 0;
}
-/**
- * notify
- */
int32_t
notify (xlator_t *this, int32_t event, void *data, ...)
{
stripe_private_t *priv = NULL;
int down_client = 0;
int i = 0;
+ gf_boolean_t heard_from_all_children = _gf_false;
+
+ if (!this)
+ return 0;
priv = this->private;
if (!priv)
return 0;
- switch (event)
+ switch (event)
{
case GF_EVENT_CHILD_UP:
{
@@ -3176,24 +4345,28 @@ notify (xlator_t *this, int32_t event, void *data, ...)
if (data == priv->xl_array[i])
break;
}
- priv->state[i] = 1;
- for (i = 0; i < priv->child_count; i++) {
- if (!priv->state[i])
- down_client++;
+
+ if (priv->child_count == i) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "got GF_EVENT_CHILD_UP bad subvolume %s",
+ data? ((xlator_t *)data)->name: NULL);
+ break;
}
LOCK (&priv->lock);
{
- priv->nodes_down = down_client;
-
- if (data == FIRST_CHILD (this)) {
+ if (data == FIRST_CHILD (this))
priv->first_child_down = 0;
- default_notify (this, event, data);
- }
+ priv->last_event[i] = event;
}
UNLOCK (&priv->lock);
}
break;
+ case GF_EVENT_CHILD_CONNECTING:
+ {
+ // 'CONNECTING' doesn't ensure its CHILD_UP, so do nothing
+ goto out;
+ }
case GF_EVENT_CHILD_DOWN:
{
/* get an index number to set */
@@ -3201,20 +4374,19 @@ notify (xlator_t *this, int32_t event, void *data, ...)
if (data == priv->xl_array[i])
break;
}
- priv->state[i] = 0;
- for (i = 0; i < priv->child_count; i++) {
- if (!priv->state[i])
- down_client++;
+
+ if (priv->child_count == i) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "got GF_EVENT_CHILD_DOWN bad subvolume %s",
+ data? ((xlator_t *)data)->name: NULL);
+ break;
}
LOCK (&priv->lock);
{
- priv->nodes_down = down_client;
-
- if (data == FIRST_CHILD (this)) {
+ if (data == FIRST_CHILD (this))
priv->first_child_down = 1;
- default_notify (this, event, data);
- }
+ priv->last_event[i] = event;
}
UNLOCK (&priv->lock);
}
@@ -3224,82 +4396,751 @@ notify (xlator_t *this, int32_t event, void *data, ...)
{
/* */
default_notify (this, event, data);
+ goto out;
}
break;
}
+ // Consider child as down if it's last_event is not CHILD_UP
+ for (i = 0, down_client = 0; i < priv->child_count; i++)
+ if (priv->last_event[i] != GF_EVENT_CHILD_UP)
+ down_client++;
+
+ LOCK (&priv->lock);
+ {
+ priv->nodes_down = down_client;
+ }
+ UNLOCK (&priv->lock);
+
+ heard_from_all_children = _gf_true;
+ for (i = 0; i < priv->child_count; i++)
+ if (!priv->last_event[i])
+ heard_from_all_children = _gf_false;
+
+ if (heard_from_all_children)
+ default_notify (this, event, data);
+out:
return 0;
}
int
-set_stripe_block_size (xlator_t *this, stripe_private_t *priv, char *data)
+stripe_setxattr_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno, dict_t *xdata)
{
- int ret = -1;
- char *tmp_str = NULL;
- char *tmp_str1 = NULL;
- char *dup_str = NULL;
- char *stripe_str = NULL;
- char *pattern = NULL;
- char *num = NULL;
- struct stripe_options *temp_stripeopt = NULL;
- struct stripe_options *stripe_opt = NULL;
-
- /* Get the pattern for striping.
- "option block-size *avi:10MB" etc */
- stripe_str = strtok_r (data, ",", &tmp_str);
- while (stripe_str) {
- dup_str = strdup (stripe_str);
- stripe_opt = CALLOC (1, sizeof (struct stripe_options));
- if (!stripe_opt)
- goto out;
+ int ret = -1;
+ int call_cnt = 0;
+ stripe_local_t *local = NULL;
+
+ if (!frame || !frame->local || !this) {
+ gf_log ("", GF_LOG_ERROR, "Possible NULL deref");
+ return ret;
+ }
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ call_cnt = --local->wind_count;
+
+ /**
+ * We overwrite ->op_* values here for subsequent faliure
+ * conditions, hence we propogate the last errno down the
+ * stack.
+ */
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto unlock;
+ }
+ }
+
+ unlock:
+ UNLOCK (&frame->lock);
+
+ if (!call_cnt) {
+ STRIPE_STACK_UNWIND (setxattr, frame, local->op_ret,
+ local->op_errno, xdata);
+ }
+
+ return 0;
+}
+
+#ifdef HAVE_BD_XLATOR
+int
+stripe_is_bd (dict_t *this, char *key, data_t *value, void *data)
+{
+ gf_boolean_t *is_bd = data;
+
+ if (data == NULL)
+ return 0;
+
+ if (XATTR_IS_BD (key))
+ *is_bd = _gf_true;
+
+ return 0;
+}
+
+inline gf_boolean_t
+stripe_setxattr_is_bd (dict_t *dict)
+{
+ gf_boolean_t is_bd = _gf_false;
+
+ if (dict == NULL)
+ goto out;
+
+ dict_foreach (dict, stripe_is_bd, &is_bd);
+out:
+ return is_bd;
+}
+#else
+#define stripe_setxattr_is_bd(dict) _gf_false
+#endif
+
+int
+stripe_setxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *dict, int flags, dict_t *xdata)
+{
+ int32_t op_errno = EINVAL;
+ xlator_list_t *trav = NULL;
+ stripe_private_t *priv = NULL;
+ stripe_local_t *local = NULL;
+ int i = 0;
+ gf_boolean_t is_bd = _gf_false;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.*stripe*", dict,
+ op_errno, err);
+
+ priv = this->private;
+ trav = this->children;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ frame->local = local;
+ local->wind_count = priv->child_count;
+ local->op_ret = local->op_errno = 0;
+
+ is_bd = stripe_setxattr_is_bd (dict);
+
+ /**
+ * Set xattrs for directories on all subvolumes. Additionally
+ * this power is only given to a special client. Bd xlator
+ * also needs xattrs for regular files (ie LVs)
+ */
+ if (((frame->root->pid == GF_CLIENT_PID_GSYNCD) &&
+ IA_ISDIR (loc->inode->ia_type)) || is_bd) {
+ for (i = 0; i < priv->child_count; i++, trav = trav->next) {
+ STACK_WIND (frame, stripe_setxattr_cbk,
+ trav->xlator, trav->xlator->fops->setxattr,
+ loc, dict, flags, xdata);
+ }
+ } else {
+ local->wind_count = 1;
+ STACK_WIND (frame, stripe_setxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr,
+ loc, dict, flags, xdata);
+ }
+
+ return 0;
+err:
+ STRIPE_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
+ return 0;
+}
+
+
+int
+stripe_fsetxattr_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno, dict_t *xdata)
+{
+ STRIPE_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+
+int
+stripe_is_special_key (dict_t *this,
+ char *key,
+ data_t *value,
+ void *data)
+{
+ gf_boolean_t *is_special = NULL;
+
+ if (data == NULL) {
+ goto out;
+ }
+
+ is_special = data;
+
+ if (XATTR_IS_LOCKINFO (key) || XATTR_IS_BD (key))
+ *is_special = _gf_true;
+
+out:
+ return 0;
+}
+
+int32_t
+stripe_fsetxattr_everyone_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
+{
+ int call_count = 0;
+ stripe_local_t *local = NULL;
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ call_count = --local->wind_count;
- pattern = strtok_r (dup_str, ":", &tmp_str1);
- num = strtok_r (NULL, ":", &tmp_str1);
- if (!num) {
- num = pattern;
- pattern = "*";
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
}
- if (gf_string2bytesize (num, &stripe_opt->block_size) != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "invalid number format \"%s\"", num);
+ }
+ UNLOCK (&frame->lock);
+
+ if (call_count == 0) {
+ STRIPE_STACK_UNWIND (fsetxattr, frame, local->op_ret,
+ local->op_errno, NULL);
+ }
+ return 0;
+}
+
+int
+stripe_fsetxattr_to_everyone (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *dict, int flags, dict_t *xdata)
+{
+ xlator_list_t *trav = NULL;
+ stripe_private_t *priv = NULL;
+ int ret = -1;
+ stripe_local_t *local = NULL;
+
+ priv = this->private;
+
+ local = mem_get0 (this->local_pool);
+ if (local == NULL) {
+ goto out;
+ }
+
+ frame->local = local;
+
+ local->wind_count = priv->child_count;
+
+ trav = this->children;
+
+ while (trav) {
+ STACK_WIND (frame, stripe_fsetxattr_everyone_cbk,
+ trav->xlator, trav->xlator->fops->fsetxattr,
+ fd, dict, flags, xdata);
+ trav = trav->next;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+inline gf_boolean_t
+stripe_fsetxattr_is_special (dict_t *dict)
+{
+ gf_boolean_t is_spl = _gf_false;
+
+ if (dict == NULL) {
+ goto out;
+ }
+
+ dict_foreach (dict, stripe_is_special_key, &is_spl);
+
+out:
+ return is_spl;
+}
+
+int
+stripe_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *dict, int flags, dict_t *xdata)
+{
+ int32_t op_ret = -1, ret = -1, op_errno = EINVAL;
+ gf_boolean_t is_spl = _gf_false;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.*stripe*", dict,
+ op_errno, err);
+
+ is_spl = stripe_fsetxattr_is_special (dict);
+ if (is_spl) {
+ ret = stripe_fsetxattr_to_everyone (frame, this, fd, dict,
+ flags, xdata);
+ if (ret < 0) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ goto out;
+ }
+
+ STACK_WIND (frame, stripe_fsetxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ fd, dict, flags, xdata);
+out:
+ return 0;
+err:
+ STRIPE_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, NULL);
+ return 0;
+}
+
+int
+stripe_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ STRIPE_STACK_UNWIND (removexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
+stripe_removexattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata)
+{
+ int32_t op_errno = EINVAL;
+
+ VALIDATE_OR_GOTO (this, err);
+
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.*stripe*",
+ name, op_errno, err);
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (loc, err);
+
+ STACK_WIND (frame, stripe_removexattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr,
+ loc, name, xdata);
+ return 0;
+err:
+ STRIPE_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL);
+ return 0;
+}
+
+
+int
+stripe_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ STRIPE_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
+stripe_fremovexattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.*stripe*",
+ name, op_errno, err);
+
+ STACK_WIND (frame, stripe_fremovexattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr,
+ fd, name, xdata);
+ return 0;
+ err:
+ STRIPE_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+stripe_readdirp_lookup_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ dict_t *xattr, struct iatt *parent)
+{
+ stripe_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
+ stripe_local_t *main_local = NULL;
+ gf_dirent_t *entry = NULL;
+ call_frame_t *prev = NULL;
+ int done = 0;
+
+ local = frame->local;
+ prev = cookie;
+
+ entry = local->dirent;
+
+ main_frame = local->orig_frame;
+ main_local = main_frame->local;
+ LOCK (&frame->lock);
+ {
+
+ local->call_count--;
+ if (!local->call_count)
+ done = 1;
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ local->op_ret = op_ret;
+ goto unlock;
+ }
+
+ if (stripe_ctx_handle(this, prev, local, xattr))
+ gf_log(this->name, GF_LOG_ERROR,
+ "Error getting fctx info from dict.");
+
+ correct_file_size(stbuf, local->fctx, prev);
+
+ stripe_iatt_merge (stbuf, &entry->d_stat);
+ local->stbuf_blocks += stbuf->ia_blocks;
+ }
+unlock:
+ UNLOCK(&frame->lock);
+
+ if (done) {
+ inode_ctx_put (entry->inode, this,
+ (uint64_t) (long)local->fctx);
+
+ done = 0;
+ LOCK (&main_frame->lock);
+ {
+ main_local->wind_count--;
+ if (!main_local->wind_count)
+ done = 1;
+ if (local->op_ret == -1) {
+ main_local->op_errno = local->op_errno;
+ main_local->op_ret = local->op_ret;
+ }
+ entry->d_stat.ia_blocks = local->stbuf_blocks;
+ }
+ UNLOCK (&main_frame->lock);
+ if (done) {
+ main_frame->local = NULL;
+ STRIPE_STACK_UNWIND (readdir, main_frame,
+ main_local->op_ret,
+ main_local->op_errno,
+ &main_local->entries, NULL);
+ gf_dirent_free (&main_local->entries);
+ stripe_local_wipe (main_local);
+ mem_put (main_local);
+ }
+ frame->local = NULL;
+ stripe_local_wipe (local);
+ mem_put (local);
+ STRIPE_STACK_DESTROY (frame);
+ }
+
+ return 0;
+}
+
+int32_t
+stripe_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ gf_dirent_t *orig_entries, dict_t *xdata)
+{
+ stripe_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ gf_dirent_t *local_entry = NULL;
+ gf_dirent_t *tmp_entry = NULL;
+ xlator_list_t *trav = NULL;
+ loc_t loc = {0, };
+ int32_t count = 0;
+ stripe_private_t *priv = NULL;
+ int32_t subvols = 0;
+ dict_t *xattrs = NULL;
+ call_frame_t *local_frame = NULL;
+ stripe_local_t *local_ent = NULL;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
+ goto out;
+ }
+ prev = cookie;
+ local = frame->local;
+ trav = this->children;
+ priv = this->private;
+
+ subvols = priv->child_count;
+
+ LOCK (&frame->lock);
+ {
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s returned error %s",
+ prev->this->name, strerror (op_errno));
+ local->op_errno = op_errno;
+ local->op_ret = op_ret;
+ goto unlock;
+ } else {
+ local->op_ret = op_ret;
+ list_splice_init (&orig_entries->list,
+ &local->entries.list);
+ local->wind_count = op_ret;
+ }
+
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ if (op_ret == -1)
+ goto out;
+
+ xattrs = dict_new ();
+ if (xattrs)
+ (void) stripe_xattr_request_build (this, xattrs, 0, 0, 0, 0);
+ count = op_ret;
+ list_for_each_entry_safe (local_entry, tmp_entry,
+ (&local->entries.list), list) {
+
+ if (!local_entry)
+ break;
+ if (!IA_ISREG (local_entry->d_stat.ia_type)) {
+ LOCK (&frame->lock);
+ {
+ local->wind_count--;
+ count = local->wind_count;
+ }
+ UNLOCK (&frame->lock);
+ continue;
+ }
+
+ local_frame = copy_frame (frame);
+
+ if (!local_frame) {
+ op_errno = ENOMEM;
+ op_ret = -1;
+ goto out;
+ }
+
+ local_ent = mem_get0 (this->local_pool);
+ if (!local_ent) {
+ op_errno = ENOMEM;
+ op_ret = -1;
goto out;
- }
- memcpy (stripe_opt->path_pattern, pattern, strlen (pattern));
-
- gf_log (this->name, GF_LOG_DEBUG,
- "block-size : pattern %s : size %"PRId64,
- stripe_opt->path_pattern, stripe_opt->block_size);
-
- if (!priv->pattern) {
- priv->pattern = stripe_opt;
+ }
+
+ loc.inode = inode_ref (local_entry->inode);
+
+ uuid_copy (loc.gfid, local_entry->d_stat.ia_gfid);
+
+ local_ent->orig_frame = frame;
+
+ local_ent->call_count = subvols;
+
+ local_ent->dirent = local_entry;
+
+ local_frame->local = local_ent;
+
+ trav = this->children;
+ while (trav) {
+ STACK_WIND (local_frame, stripe_readdirp_lookup_cbk,
+ trav->xlator, trav->xlator->fops->lookup,
+ &loc, xattrs);
+ trav = trav->next;
+ }
+ loc_wipe (&loc);
+ }
+out:
+ if (!count) {
+ /* all entries are directories */
+ frame->local = NULL;
+ STRIPE_STACK_UNWIND (readdir, frame, local->op_ret,
+ local->op_errno, &local->entries, NULL);
+ gf_dirent_free (&local->entries);
+ stripe_local_wipe (local);
+ mem_put (local);
+ }
+ if (xattrs)
+ dict_unref (xattrs);
+ return 0;
+
+}
+int32_t
+stripe_readdirp (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t off, dict_t *xdata)
+{
+ stripe_local_t *local = NULL;
+ stripe_private_t *priv = NULL;
+ xlator_list_t *trav = NULL;
+ int op_errno = -1;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ priv = this->private;
+ trav = this->children;
+
+ if (priv->first_child_down) {
+ op_errno = ENOTCONN;
+ goto err;
+ }
+
+ /* Initialization */
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ frame->local = local;
+
+ local->fd = fd_ref (fd);
+
+ local->wind_count = 0;
+
+ local->count = 0;
+ local->op_ret = -1;
+ INIT_LIST_HEAD(&local->entries);
+
+ if (!trav)
+ goto err;
+
+ STACK_WIND (frame, stripe_readdirp_cbk, trav->xlator,
+ trav->xlator->fops->readdirp, fd, size, off, xdata);
+ return 0;
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ STRIPE_STACK_UNWIND (readdir, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+
+}
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ goto out;
+
+ ret = xlator_mem_acct_init (this, gf_stripe_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
+ "failed");
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+static int
+clear_pattern_list (stripe_private_t *priv)
+{
+ struct stripe_options *prev = NULL;
+ struct stripe_options *trav = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("stripe", priv, out);
+
+ trav = priv->pattern;
+ priv->pattern = NULL;
+ while (trav) {
+ prev = trav;
+ trav = trav->next;
+ GF_FREE (prev);
+ }
+
+ ret = 0;
+ out:
+ return ret;
+
+
+}
+
+
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+
+ stripe_private_t *priv = NULL;
+ data_t *data = NULL;
+ int ret = -1;
+ volume_option_t *opt = NULL;
+
+ GF_ASSERT (this);
+ GF_ASSERT (this->private);
+
+ priv = this->private;
+
+
+ ret = 0;
+ LOCK (&priv->lock);
+ {
+ ret = clear_pattern_list (priv);
+ if (ret)
+ goto unlock;
+
+ data = dict_get (options, "block-size");
+ if (data) {
+ ret = set_stripe_block_size (this, priv, data->data);
+ if (ret)
+ goto unlock;
} else {
- temp_stripeopt = priv->pattern;
- while (temp_stripeopt->next)
- temp_stripeopt = temp_stripeopt->next;
- temp_stripeopt->next = stripe_opt;
+ opt = xlator_volume_option_get (this, "block-size");
+ if (!opt) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "option 'block-size' not found");
+ ret = -1;
+ goto unlock;
+ }
+
+ if (gf_string2bytesize (opt->default_value, &priv->block_size)){
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to set default block-size ");
+ ret = -1;
+ goto unlock;
+ }
}
- stripe_str = strtok_r (NULL, ",", &tmp_str);
+
+ GF_OPTION_RECONF("coalesce", priv->coalesce, options, bool,
+ unlock);
}
+ unlock:
+ UNLOCK (&priv->lock);
+ if (ret)
+ goto out;
ret = 0;
out:
return ret;
+
}
/**
- * init - This function is called when xlator-graph gets initialized.
+ * init - This function is called when xlator-graph gets initialized.
* The option given in volfiles are parsed here.
- * @this -
+ * @this -
*/
int32_t
init (xlator_t *this)
{
stripe_private_t *priv = NULL;
+ volume_option_t *opt = NULL;
xlator_list_t *trav = NULL;
data_t *data = NULL;
int32_t count = 0;
int ret = -1;
+ if (!this)
+ goto out;
+
trav = this->children;
while (trav) {
count++;
@@ -3317,16 +5158,27 @@ init (xlator_t *this)
gf_log (this->name, GF_LOG_WARNING,
"dangling volume. check volfile ");
}
-
- priv = CALLOC (1, sizeof (stripe_private_t));
+
+ if (count == 1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "stripe configured with only one \"subvolumes\" option."
+ " please check the volume. exiting");
+ goto out;
+ }
+
+ priv = GF_CALLOC (1, sizeof (stripe_private_t),
+ gf_stripe_mt_stripe_private_t);
+
if (!priv)
goto out;
- priv->xl_array = CALLOC (count, sizeof (xlator_t *));
+ priv->xl_array = GF_CALLOC (count, sizeof (xlator_t *),
+ gf_stripe_mt_xlator_t);
if (!priv->xl_array)
goto out;
- priv->state = CALLOC (count, sizeof (int8_t));
- if (!priv->xl_array)
+ priv->last_event = GF_CALLOC (count, sizeof (int),
+ gf_stripe_mt_int32_t);
+ if (!priv->last_event)
goto out;
priv->child_count = count;
@@ -3346,128 +5198,617 @@ init (xlator_t *this)
goto out;
}
- priv->block_size = (128 * GF_UNIT_KB);
- /* option stripe-pattern *avi:1GB,*pdf:4096 */
- data = dict_get (this->options, "block-size");
- if (!data) {
- gf_log (this->name, GF_LOG_DEBUG,
- "No \"option block-size <x>\" given, defaulting "
- "to 128KB");
- } else {
- ret = set_stripe_block_size (this, priv, data->data);
- if (ret)
- goto out;
- }
-
- priv->xattr_supported = 1;
- data = dict_get (this->options, "use-xattr");
- if (data) {
- if (gf_string2boolean (data->data,
- &priv->xattr_supported) == -1) {
+ ret = 0;
+ LOCK (&priv->lock);
+ {
+ opt = xlator_volume_option_get (this, "block-size");
+ if (!opt) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "option 'block-size' not found");
+ ret = -1;
+ goto unlock;
+ }
+ if (gf_string2bytesize (opt->default_value, &priv->block_size)){
gf_log (this->name, GF_LOG_ERROR,
- "error setting hard check for extended "
- "attribute");
- //return -1;
+ "Unable to set default block-size ");
+ ret = -1;
+ goto unlock;
+ }
+ /* option stripe-pattern *avi:1GB,*pdf:16K */
+ data = dict_get (this->options, "block-size");
+ if (data) {
+ ret = set_stripe_block_size (this, priv, data->data);
+ if (ret)
+ goto unlock;
}
}
+ unlock:
+ UNLOCK (&priv->lock);
+ if (ret)
+ goto out;
+ GF_OPTION_INIT ("use-xattr", priv->xattr_supported, bool, out);
/* notify related */
priv->nodes_down = priv->child_count;
+
+ GF_OPTION_INIT("coalesce", priv->coalesce, bool, out);
+
+ this->local_pool = mem_pool_new (stripe_local_t, 128);
+ if (!this->local_pool) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
+ goto out;
+ }
+
this->private = priv;
ret = 0;
- out:
+out:
if (ret) {
if (priv) {
- if (priv->xl_array)
- FREE (priv->xl_array);
- FREE (priv);
+ GF_FREE (priv->xl_array);
+ GF_FREE (priv);
}
}
return ret;
-}
+}
-/**
+/**
* fini - Free all the private variables
- * @this -
+ * @this -
*/
-void
+void
fini (xlator_t *this)
{
stripe_private_t *priv = NULL;
struct stripe_options *prev = NULL;
struct stripe_options *trav = NULL;
+ if (!this)
+ goto out;
+
priv = this->private;
if (priv) {
- if (priv->xl_array)
- FREE (priv->xl_array);
+ this->private = NULL;
+ GF_FREE (priv->xl_array);
trav = priv->pattern;
while (trav) {
prev = trav;
trav = trav->next;
- FREE (prev);
+ GF_FREE (prev);
}
+ GF_FREE (priv->last_event);
LOCK_DESTROY (&priv->lock);
- FREE (priv);
+ GF_FREE (priv);
}
+out:
return;
}
+int32_t
+stripe_getxattr_unwind (call_frame_t *frame,
+ int op_ret, int op_errno, dict_t *dict, dict_t *xdata)
-struct xlator_fops fops = {
- .stat = stripe_stat,
- .unlink = stripe_unlink,
- .symlink = stripe_symlink,
- .rename = stripe_rename,
- .link = stripe_link,
- .chmod = stripe_chmod,
- .chown = stripe_chown,
- .truncate = stripe_truncate,
- .utimens = stripe_utimens,
- .create = stripe_create,
- .open = stripe_open,
- .readv = stripe_readv,
- .writev = stripe_writev,
- .statfs = stripe_statfs,
- .flush = stripe_flush,
- .fsync = stripe_fsync,
- .setxattr = stripe_setxattr,
- .getxattr = stripe_getxattr,
- .removexattr = stripe_removexattr,
- .access = stripe_access,
- .ftruncate = stripe_ftruncate,
- .fstat = stripe_fstat,
- .readlink = stripe_readlink,
- .mkdir = stripe_mkdir,
- .rmdir = stripe_rmdir,
- .lk = stripe_lk,
- .opendir = stripe_opendir,
- .fsyncdir = stripe_fsyncdir,
- .fchmod = stripe_fchmod,
- .fchown = stripe_fchown,
- .lookup = stripe_lookup,
- .setdents = stripe_setdents,
- .mknod = stripe_mknod,
-};
+{
+ STRIPE_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+int
+stripe_internal_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr,
+ dict_t *xdata)
+{
+
+ char size_key[256] = {0,};
+ char index_key[256] = {0,};
+ char count_key[256] = {0,};
+ char coalesce_key[256] = {0,};
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (frame->local, out);
+
+ if (!xattr || (op_ret == -1))
+ goto out;
+
+ sprintf (size_key, "trusted.%s.stripe-size", this->name);
+ sprintf (count_key, "trusted.%s.stripe-count", this->name);
+ sprintf (index_key, "trusted.%s.stripe-index", this->name);
+ sprintf (coalesce_key, "trusted.%s.stripe-coalesce", this->name);
+
+ dict_del (xattr, size_key);
+ dict_del (xattr, count_key);
+ dict_del (xattr, index_key);
+ dict_del (xattr, coalesce_key);
+
+out:
+ STRIPE_STACK_UNWIND (getxattr, frame, op_ret, op_errno, xattr, xdata);
+
+ return 0;
+
+}
+
+int
+stripe_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
+{
+ int call_cnt = 0;
+ stripe_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (frame->local, out);
+
+ local = frame->local;
+
+ LOCK (&frame->lock);
+ {
+ call_cnt = --local->wind_count;
+ }
+ UNLOCK (&frame->lock);
+
+ if (!xattr || (op_ret < 0))
+ goto out;
+
+ local->op_ret = 0;
+
+ if (!local->xattr) {
+ local->xattr = dict_ref (xattr);
+ } else {
+ stripe_aggregate_xattr (local->xattr, xattr);
+ }
+
+out:
+ if (!call_cnt) {
+ STRIPE_STACK_UNWIND (getxattr, frame, local->op_ret, op_errno,
+ local->xattr, xdata);
+ }
+
+ return 0;
+}
+
+int32_t
+stripe_vgetxattr_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ stripe_local_t *local = NULL;
+ int32_t callcnt = 0;
+ int32_t ret = -1;
+ long cky = 0;
+ void *xattr_val = NULL;
+ void *xattr_serz = NULL;
+ stripe_xattr_sort_t *xattr = NULL;
+ dict_t *stripe_xattr = NULL;
+
+ if (!frame || !frame->local || !this) {
+ gf_log ("", GF_LOG_ERROR, "Possible NULL deref");
+ return ret;
+ }
+
+ local = frame->local;
+ cky = (long) cookie;
-struct xlator_mops mops = {
- .stats = stripe_stats,
+ if (local->xsel[0] == '\0') {
+ gf_log (this->name, GF_LOG_ERROR, "Empty xattr in cbk");
+ return ret;
+ }
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->wind_count;
+
+ if (!dict || (op_ret < 0))
+ goto out;
+
+ if (!local->xattr_list)
+ local->xattr_list = (stripe_xattr_sort_t *)
+ GF_CALLOC (local->nallocs,
+ sizeof (stripe_xattr_sort_t),
+ gf_stripe_mt_xattr_sort_t);
+
+ if (local->xattr_list) {
+ xattr = local->xattr_list + (int32_t) cky;
+
+ ret = dict_get_ptr_and_len (dict, local->xsel,
+ &xattr_val,
+ &xattr->xattr_len);
+ if (xattr->xattr_len == 0)
+ goto out;
+
+ xattr->pos = cky;
+ xattr->xattr_value = gf_memdup (xattr_val,
+ xattr->xattr_len);
+
+ if (xattr->xattr_value != NULL)
+ local->xattr_total_len += xattr->xattr_len + 1;
+ }
+ }
+ out:
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ if (!local->xattr_total_len)
+ goto unwind;
+
+ stripe_xattr = dict_new ();
+ if (!stripe_xattr)
+ goto unwind;
+
+ /* select filler based on ->xsel */
+ if (XATTR_IS_PATHINFO (local->xsel))
+ ret = stripe_fill_pathinfo_xattr (this, local,
+ (char **)&xattr_serz);
+ else if (XATTR_IS_LOCKINFO (local->xsel)) {
+ ret = stripe_fill_lockinfo_xattr (this, local,
+ &xattr_serz);
+ } else {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Unknown xattr in xattr request");
+ goto unwind;
+ }
+
+ if (!ret) {
+ ret = dict_set_dynptr (stripe_xattr, local->xsel,
+ xattr_serz,
+ local->xattr_total_len);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Can't set %s key in dict",
+ local->xsel);
+ }
+
+ unwind:
+ STRIPE_STACK_UNWIND (getxattr, frame, op_ret, op_errno,
+ stripe_xattr, NULL);
+
+ ret = stripe_free_xattr_str (local);
+
+ GF_FREE (local->xattr_list);
+
+ if (stripe_xattr)
+ dict_unref (stripe_xattr);
+ }
+
+ return ret;
+}
+
+int32_t
+stripe_getxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata)
+{
+ stripe_local_t *local = NULL;
+ xlator_list_t *trav = NULL;
+ stripe_private_t *priv = NULL;
+ int32_t op_errno = EINVAL;
+ int i = 0;
+ xlator_t **sub_volumes;
+ int ret = 0;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+
+ priv = this->private;
+ trav = this->children;
+
+ /* Initialization */
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ local->op_ret = -1;
+ frame->local = local;
+ loc_copy (&local->loc, loc);
+
+
+ if (name && (strcmp (GF_XATTR_MARKER_KEY, name) == 0)
+ && (GF_CLIENT_PID_GSYNCD == frame->root->pid)) {
+ local->marker.call_count = priv->child_count;
+
+ sub_volumes = alloca ( priv->child_count *
+ sizeof (xlator_t *));
+ for (i = 0, trav = this->children; trav ;
+ trav = trav->next, i++) {
+
+ *(sub_volumes + i) = trav->xlator;
+
+ }
+
+ if (cluster_getmarkerattr (frame, this, loc, name,
+ local, stripe_getxattr_unwind,
+ sub_volumes, priv->child_count,
+ MARKER_UUID_TYPE, marker_uuid_default_gauge,
+ priv->vol_uuid)) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ return 0;
+ }
+
+ if (name && strncmp (name, GF_XATTR_QUOTA_SIZE_KEY,
+ strlen (GF_XATTR_QUOTA_SIZE_KEY)) == 0) {
+ local->wind_count = priv->child_count;
+
+ for (i = 0, trav=this->children; i < priv->child_count; i++,
+ trav = trav->next) {
+ STACK_WIND (frame, stripe_getxattr_cbk,
+ trav->xlator, trav->xlator->fops->getxattr,
+ loc, name, xdata);
+ }
+
+ return 0;
+ }
+
+ if (name &&
+ ((strncmp (name, GF_XATTR_PATHINFO_KEY,
+ strlen (GF_XATTR_PATHINFO_KEY)) == 0))) {
+ if (IA_ISREG (loc->inode->ia_type)) {
+ ret = inode_ctx_get (loc->inode, this,
+ (uint64_t *) &local->fctx);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "stripe size unavailable from fctx"
+ " relying on pathinfo could lead to"
+ " wrong results");
+ }
+
+ local->nallocs = local->wind_count = priv->child_count;
+ (void) strncpy (local->xsel, name, strlen (name));
+
+ /**
+ * for xattrs that need info from all childs, fill ->xsel
+ * as above and call the filler function in cbk based on
+ * it
+ */
+ for (i = 0, trav = this->children; i < priv->child_count; i++,
+ trav = trav->next) {
+ STACK_WIND_COOKIE (frame, stripe_vgetxattr_cbk,
+ (void *) (long) i, trav->xlator,
+ trav->xlator->fops->getxattr,
+ loc, name, xdata);
+ }
+
+ return 0;
+ }
+
+ if (name &&(*priv->vol_uuid)) {
+ if ((match_uuid_local (name, priv->vol_uuid) == 0)
+ && (GF_CLIENT_PID_GSYNCD == frame->root->pid)) {
+
+ if (!IA_FILE_OR_DIR (loc->inode->ia_type))
+ local->marker.call_count = 1;
+ else
+ local->marker.call_count = priv->child_count;
+
+ sub_volumes = alloca (local->marker.call_count *
+ sizeof (xlator_t *));
+
+ for (i = 0, trav = this->children;
+ i < local->marker.call_count;
+ i++, trav = trav->next) {
+ *(sub_volumes + i) = trav->xlator;
+
+ }
+
+ if (cluster_getmarkerattr (frame, this, loc, name,
+ local,
+ stripe_getxattr_unwind,
+ sub_volumes,
+ local->marker.call_count,
+ MARKER_XTIME_TYPE,
+ marker_xtime_default_gauge,
+ priv->vol_uuid)) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ return 0;
+ }
+ }
+
+
+ STACK_WIND (frame, stripe_internal_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
+
+ return 0;
+
+err:
+ STRIPE_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
+
+inline gf_boolean_t
+stripe_is_special_xattr (const char *name)
+{
+ gf_boolean_t is_spl = _gf_false;
+
+ if (!name) {
+ goto out;
+ }
+
+ if (!strncmp (name, GF_XATTR_LOCKINFO_KEY,
+ strlen (GF_XATTR_LOCKINFO_KEY))
+ || !strncmp (name, GF_XATTR_PATHINFO_KEY,
+ strlen (GF_XATTR_PATHINFO_KEY)))
+ is_spl = _gf_true;
+out:
+ return is_spl;
+}
+
+int32_t
+stripe_fgetxattr_from_everyone (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ stripe_local_t *local = NULL;
+ stripe_private_t *priv = NULL;
+ int32_t ret = -1, op_errno = 0;
+ int i = 0;
+ xlator_list_t *trav = NULL;
+
+ priv = this->private;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->op_ret = -1;
+ frame->local = local;
+
+ strncpy (local->xsel, name, strlen (name));
+ local->nallocs = local->wind_count = priv->child_count;
+
+ for (i = 0, trav = this->children; i < priv->child_count; i++,
+ trav = trav->next) {
+ STACK_WIND_COOKIE (frame, stripe_vgetxattr_cbk,
+ (void *) (long) i, trav->xlator,
+ trav->xlator->fops->fgetxattr,
+ fd, name, xdata);
+ }
+
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT (fgetxattr, frame, -1, op_errno, NULL, NULL);
+ return ret;
+}
+
+int32_t
+stripe_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ if (stripe_is_special_xattr (name)) {
+ stripe_fgetxattr_from_everyone (frame, this, fd, name, xdata);
+ goto out;
+ }
+
+ STACK_WIND (frame, stripe_internal_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata);
+
+out:
+ return 0;
+}
+
+
+
+int32_t
+stripe_priv_dump (xlator_t *this)
+{
+ char key[GF_DUMP_MAX_BUF_LEN];
+ int i = 0;
+ stripe_private_t *priv = NULL;
+ int ret = -1;
+ struct stripe_options *options = NULL;
+
+ GF_VALIDATE_OR_GOTO ("stripe", this, out);
+
+ priv = this->private;
+ if (!priv)
+ goto out;
+
+ ret = TRY_LOCK (&priv->lock);
+ if (ret != 0)
+ goto out;
+
+ gf_proc_dump_add_section("xlator.cluster.stripe.%s.priv", this->name);
+ gf_proc_dump_write("child_count","%d", priv->child_count);
+
+ for (i = 0; i < priv->child_count; i++) {
+ sprintf (key, "subvolumes[%d]", i);
+ gf_proc_dump_write (key, "%s.%s", priv->xl_array[i]->type,
+ priv->xl_array[i]->name);
+ }
+
+ options = priv->pattern;
+ while (options != NULL) {
+ gf_proc_dump_write ("path_pattern", "%s", priv->pattern->path_pattern);
+ gf_proc_dump_write ("options_block_size", "%ul", options->block_size);
+
+ options = options->next;
+ }
+
+ gf_proc_dump_write ("block_size", "%ul", priv->block_size);
+ gf_proc_dump_write ("nodes-down", "%d", priv->nodes_down);
+ gf_proc_dump_write ("first-child_down", "%d", priv->first_child_down);
+ gf_proc_dump_write ("xattr_supported", "%d", priv->xattr_supported);
+
+ UNLOCK (&priv->lock);
+
+out:
+ return ret;
+}
+
+struct xlator_fops fops = {
+ .stat = stripe_stat,
+ .unlink = stripe_unlink,
+ .rename = stripe_rename,
+ .link = stripe_link,
+ .truncate = stripe_truncate,
+ .create = stripe_create,
+ .open = stripe_open,
+ .readv = stripe_readv,
+ .writev = stripe_writev,
+ .statfs = stripe_statfs,
+ .flush = stripe_flush,
+ .fsync = stripe_fsync,
+ .ftruncate = stripe_ftruncate,
+ .fstat = stripe_fstat,
+ .mkdir = stripe_mkdir,
+ .rmdir = stripe_rmdir,
+ .lk = stripe_lk,
+ .opendir = stripe_opendir,
+ .fsyncdir = stripe_fsyncdir,
+ .setattr = stripe_setattr,
+ .fsetattr = stripe_fsetattr,
+ .lookup = stripe_lookup,
+ .mknod = stripe_mknod,
+ .setxattr = stripe_setxattr,
+ .fsetxattr = stripe_fsetxattr,
+ .getxattr = stripe_getxattr,
+ .fgetxattr = stripe_fgetxattr,
+ .removexattr = stripe_removexattr,
+ .fremovexattr = stripe_fremovexattr,
+ .readdirp = stripe_readdirp,
+ .fallocate = stripe_fallocate,
+ .discard = stripe_discard,
+ .zerofill = stripe_zerofill,
};
struct xlator_cbks cbks = {
.release = stripe_release,
+ .forget = stripe_forget,
};
+struct xlator_dumpops dumpops = {
+ .priv = stripe_priv_dump,
+};
struct volume_options options[] = {
- { .key = {"block-size"},
- .type = GF_OPTION_TYPE_ANY
+ { .key = {"block-size"},
+ .type = GF_OPTION_TYPE_SIZE_LIST,
+ .default_value = "128KB",
+ .min = STRIPE_MIN_BLOCK_SIZE,
+ .description = "Size of the stripe unit that would be read "
+ "from or written to the striped servers."
},
- { .key = {"use-xattr"},
- .type = GF_OPTION_TYPE_BOOL
+ { .key = {"use-xattr"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "true"
},
+ { .key = {"coalesce"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "true",
+ .description = "Enable/Disable coalesce mode to flatten striped "
+ "files as stored on the server (i.e., eliminate holes "
+ "caused by the traditional format)."
+ },
{ .key = {NULL} },
};
diff --git a/xlators/cluster/stripe/src/stripe.h b/xlators/cluster/stripe/src/stripe.h
index 5ffbc3670..5673d18f3 100644
--- a/xlators/cluster/stripe/src/stripe.h
+++ b/xlators/cluster/stripe/src/stripe.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -32,12 +23,63 @@
#include "common-utils.h"
#include "compat.h"
#include "compat-errno.h"
+#include "stripe-mem-types.h"
+#include "libxlator.h"
#include <fnmatch.h>
#include <signal.h>
+#define STRIPE_PATHINFO_HEADER "STRIPE:"
+#define STRIPE_MIN_BLOCK_SIZE (16*GF_UNIT_KB)
+
+#define STRIPE_STACK_UNWIND(fop, frame, params ...) do { \
+ stripe_local_t *__local = NULL; \
+ if (frame) { \
+ __local = frame->local; \
+ frame->local = NULL; \
+ } \
+ STACK_UNWIND_STRICT (fop, frame, params); \
+ if (__local) { \
+ stripe_local_wipe(__local); \
+ mem_put (__local); \
+ } \
+ } while (0)
+
+#define STRIPE_STACK_DESTROY(frame) do { \
+ stripe_local_t *__local = NULL; \
+ __local = frame->local; \
+ frame->local = NULL; \
+ STACK_DESTROY (frame->root); \
+ if (__local) { \
+ stripe_local_wipe (__local); \
+ mem_put (__local); \
+ } \
+ } while (0)
+
+#define STRIPE_VALIDATE_FCTX(fctx, label) do { \
+ int idx = 0; \
+ if (!fctx) { \
+ op_errno = EINVAL; \
+ goto label; \
+ } \
+ for (idx = 0; idx < fctx->stripe_count; idx++) { \
+ if (!fctx->xl_array[idx]) { \
+ gf_log (this->name, GF_LOG_ERROR, \
+ "fctx->xl_array[%d] is NULL", \
+ idx); \
+ op_errno = ESTALE; \
+ goto label; \
+ } \
+ } \
+ } while (0)
+
+typedef struct stripe_xattr_sort {
+ int pos;
+ int xattr_len;
+ char *xattr_value;
+} stripe_xattr_sort_t;
/**
- * struct stripe_options : This keeps the pattern and the block-size
+ * struct stripe_options : This keeps the pattern and the block-size
* information, which is used for striping on a file.
*/
struct stripe_options {
@@ -47,7 +89,7 @@ struct stripe_options {
};
/**
- * Private structure for stripe translator
+ * Private structure for stripe translator
*/
struct stripe_private {
struct stripe_options *pattern;
@@ -56,27 +98,31 @@ struct stripe_private {
gf_lock_t lock;
uint8_t nodes_down;
int8_t first_child_down;
+ int *last_event;
int8_t child_count;
- int8_t *state; /* Current state of child node */
gf_boolean_t xattr_supported; /* default yes */
+ gf_boolean_t coalesce;
+ char vol_uuid[UUID_SIZE + 1];
};
/**
- * Used to keep info about the replies received from fops->readv calls
+ * Used to keep info about the replies received from readv/writev calls
*/
-struct readv_replies {
+struct stripe_replies {
struct iovec *vector;
int32_t count; //count of vector
int32_t op_ret; //op_ret of readv
int32_t op_errno;
- struct stat stbuf; /* 'stbuf' is also a part of reply */
+ int32_t requested_size;
+ struct iatt stbuf; /* 'stbuf' is also a part of reply */
};
typedef struct _stripe_fd_ctx {
off_t stripe_size;
int stripe_count;
+ int stripe_coalesce;
int static_array;
- xlator_t **xl_array;
+ xlator_t **xl_array;
} stripe_fd_ctx_t;
@@ -87,28 +133,45 @@ struct stripe_local; /* this itself is used inside the structure; */
struct stripe_local {
struct stripe_local *next;
- call_frame_t *orig_frame;
-
+ call_frame_t *orig_frame;
+
stripe_fd_ctx_t *fctx;
/* Used by _cbk functions */
- struct stat stbuf;
- struct readv_replies *replies;
- struct statvfs statvfs_buf;
- dir_entry_t *entry;
- struct xlator_stats stats;
+ struct iatt stbuf;
+ struct iatt pre_buf;
+ struct iatt post_buf;
+ struct iatt preparent;
+ struct iatt postparent;
+
+ off_t stbuf_size;
+ off_t prebuf_size;
+ off_t postbuf_size;
+ off_t preparent_size;
+ off_t postparent_size;
+
+ blkcnt_t stbuf_blocks;
+ blkcnt_t prebuf_blocks;
+ blkcnt_t postbuf_blocks;
+ blkcnt_t preparent_blocks;
+ blkcnt_t postparent_blocks;
+
+ struct stripe_replies *replies;
+ struct statvfs statvfs_buf;
+ dir_entry_t *entry;
int8_t revalidate;
int8_t failed;
int8_t unwind;
+ size_t readv_size;
int32_t entry_count;
int32_t node_index;
int32_t call_count;
- int32_t wind_count; /* used instead of child_cound
+ int32_t wind_count; /* used instead of child_cound
in case of read and write */
int32_t op_ret;
- int32_t op_errno;
+ int32_t op_errno;
int32_t count;
int32_t flags;
char *name;
@@ -117,8 +180,17 @@ struct stripe_local {
loc_t loc;
loc_t loc2;
+ mode_t mode;
+ dev_t rdev;
/* For File I/O fops */
- dict_t *dict;
+ dict_t *xdata;
+
+ stripe_xattr_sort_t *xattr_list;
+ int32_t xattr_total_len;
+ int32_t nallocs;
+ char xsel[256];
+
+ struct marker_str marker;
/* General usage */
off_t offset;
@@ -128,13 +200,89 @@ struct stripe_local {
int entry_self_heal_needed;
int8_t *list;
- struct flock lock;
+ struct gf_flock lock;
fd_t *fd;
void *value;
struct iobref *iobref;
+ gf_dirent_t entries;
+ gf_dirent_t *dirent;
+ dict_t *xattr;
+ uuid_t ia_gfid;
+
+ int xflag;
+ mode_t umask;
};
typedef struct stripe_local stripe_local_t;
typedef struct stripe_private stripe_private_t;
+/*
+ * Determine the stripe index of a particular frame based on the translator.
+ */
+static inline int32_t stripe_get_frame_index(stripe_fd_ctx_t *fctx,
+ call_frame_t *prev)
+{
+ int32_t i, idx = -1;
+
+ for (i = 0; i < fctx->stripe_count; i++) {
+ if (fctx->xl_array[i] == prev->this) {
+ idx = i;
+ break;
+ }
+ }
+
+ return idx;
+}
+
+static inline void stripe_copy_xl_array(xlator_t **dst, xlator_t **src,
+ int count)
+{
+ int i;
+
+ for (i = 0; i < count; i++)
+ dst[i] = src[i];
+}
+
+void stripe_local_wipe (stripe_local_t *local);
+int32_t stripe_ctx_handle (xlator_t *this, call_frame_t *prev,
+ stripe_local_t *local, dict_t *dict);
+void stripe_aggregate_xattr (dict_t *dst, dict_t *src);
+int32_t stripe_xattr_request_build (xlator_t *this, dict_t *dict,
+ uint64_t stripe_size, uint32_t stripe_count,
+ uint32_t stripe_index,
+ uint32_t stripe_coalesce);
+int32_t stripe_get_matching_bs (const char *path, stripe_private_t *priv);
+int set_stripe_block_size (xlator_t *this, stripe_private_t *priv, char *data);
+int32_t stripe_iatt_merge (struct iatt *from, struct iatt *to);
+int32_t stripe_fill_pathinfo_xattr (xlator_t *this, stripe_local_t *local,
+ char **xattr_serz);
+int32_t stripe_free_xattr_str (stripe_local_t *local);
+int32_t stripe_xattr_aggregate (char *buffer, stripe_local_t *local,
+ int32_t *total);
+off_t coalesced_offset(off_t offset, uint64_t stripe_size, int stripe_count);
+off_t uncoalesced_size(off_t size, uint64_t stripe_size, int stripe_count,
+ int stripe_index);
+int32_t
+stripe_fill_lockinfo_xattr (xlator_t *this, stripe_local_t *local,
+ void **xattr_serz);
+
+/*
+ * Adjust the size attribute for files if coalesce is enabled.
+ */
+static inline void correct_file_size(struct iatt *buf, stripe_fd_ctx_t *fctx,
+ call_frame_t *prev)
+{
+ int index;
+
+ if (!IA_ISREG(buf->ia_type))
+ return;
+
+ if (!fctx || !fctx->stripe_coalesce)
+ return;
+
+ index = stripe_get_frame_index(fctx, prev);
+ buf->ia_size = uncoalesced_size(buf->ia_size, fctx->stripe_size,
+ fctx->stripe_count, index);
+}
+
#endif /* _STRIPE_H_ */
diff --git a/xlators/cluster/unify/Makefile.am b/xlators/cluster/unify/Makefile.am
deleted file mode 100644
index d471a3f92..000000000
--- a/xlators/cluster/unify/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = src
-
-CLEANFILES =
diff --git a/xlators/cluster/unify/src/Makefile.am b/xlators/cluster/unify/src/Makefile.am
deleted file mode 100644
index b9e6f63e9..000000000
--- a/xlators/cluster/unify/src/Makefile.am
+++ /dev/null
@@ -1,16 +0,0 @@
-
-xlator_LTLIBRARIES = unify.la
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster
-
-unify_la_LDFLAGS = -module -avoidversion
-
-unify_la_SOURCES = unify.c unify-self-heal.c
-unify_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-noinst_HEADERS = unify.h
-
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
-
-CLEANFILES =
-
diff --git a/xlators/cluster/unify/src/unify-self-heal.c b/xlators/cluster/unify/src/unify-self-heal.c
deleted file mode 100644
index 7c62495c2..000000000
--- a/xlators/cluster/unify/src/unify-self-heal.c
+++ /dev/null
@@ -1,1225 +0,0 @@
-/*
- Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-/**
- * unify-self-heal.c :
- * This file implements few functions which enables 'unify' translator
- * to be consistent in its behaviour when
- * > a node fails,
- * > a node gets added,
- * > a failed node comes back
- * > a new namespace server is added (ie, an fresh namespace server).
- *
- * This functionality of 'unify' will enable glusterfs to support storage
- * system failure, and maintain consistancy. This works both ways, ie, when
- * an entry (either file or directory) is found on namespace server, and not
- * on storage nodes, its created in storage nodes and vica-versa.
- *
- * The two fops, where it can be implemented are 'getdents ()' and 'lookup ()'
- *
- */
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "unify.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "common-utils.h"
-
-int32_t
-unify_sh_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count);
-
-int32_t
-unify_sh_ns_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count);
-
-int32_t
-unify_bgsh_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count);
-
-int32_t
-unify_bgsh_ns_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count);
-
-/**
- * unify_local_wipe - free all the extra allocation of local->* here.
- */
-static void
-unify_local_wipe (unify_local_t *local)
-{
- /* Free the strdup'd variables in the local structure */
- if (local->name) {
- FREE (local->name);
- }
-
- if (local->sh_struct) {
- if (local->sh_struct->offset_list)
- FREE (local->sh_struct->offset_list);
-
- if (local->sh_struct->entry_list)
- FREE (local->sh_struct->entry_list);
-
- if (local->sh_struct->count_list)
- FREE (local->sh_struct->count_list);
-
- FREE (local->sh_struct);
- }
-
- loc_wipe (&local->loc1);
- loc_wipe (&local->loc2);
-}
-
-int32_t
-unify_sh_setdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int32_t callcnt = -1;
- unify_local_t *local = frame->local;
- inode_t *inode = NULL;
- dict_t *tmp_dict = NULL;
- dir_entry_t *prev, *entry, *trav;
-
- LOCK (&frame->lock);
- {
- /* if local->call_count == 0, that means, setdents on
- * storagenodes is still pending.
- */
- if (local->call_count)
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
-
- if (callcnt == 0) {
- if (local->sh_struct->entry_list[0]) {
- prev = entry = local->sh_struct->entry_list[0];
- if (!entry)
- return 0;
- trav = entry->next;
- while (trav) {
- prev->next = trav->next;
- FREE (trav->name);
- if (S_ISLNK (trav->buf.st_mode))
- FREE (trav->link);
- FREE (trav);
- trav = prev->next;
- }
- FREE (entry);
- }
-
- if (!local->flags) {
- if (local->sh_struct->count_list[0] >=
- UNIFY_SELF_HEAL_GETDENTS_COUNT) {
- /* count == size, that means, there are more entries
- to read from */
- //local->call_count = 0;
- local->sh_struct->offset_list[0] +=
- UNIFY_SELF_HEAL_GETDENTS_COUNT;
- STACK_WIND (frame,
- unify_sh_ns_getdents_cbk,
- NS(this),
- NS(this)->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- local->sh_struct->offset_list[0],
- GF_GET_DIR_ONLY);
- }
- } else {
- inode = local->loc1.inode;
- fd_unref (local->fd);
- tmp_dict = local->dict;
-
- unify_local_wipe (local);
-
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- inode, &local->stbuf, local->dict);
- if (tmp_dict)
- dict_unref (tmp_dict);
- }
- }
-
- return 0;
-}
-
-
-int32_t
-unify_sh_ns_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count)
-{
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- long index = 0;
- unsigned long final = 0;
- dir_entry_t *tmp = CALLOC (1, sizeof (dir_entry_t));
-
- local->sh_struct->entry_list[0] = tmp;
- local->sh_struct->count_list[0] = count;
- if (entry) {
- tmp->next = entry->next;
- entry->next = NULL;
- }
-
- if ((count < UNIFY_SELF_HEAL_GETDENTS_COUNT) || !entry) {
- final = 1;
- }
-
- LOCK (&frame->lock);
- {
- /* local->call_count will be '0' till now. make it 1 so, it
- can be UNWIND'ed for the last call. */
- local->call_count = priv->child_count;
- if (final)
- local->flags = 1;
- }
- UNLOCK (&frame->lock);
-
- for (index = 0; index < priv->child_count; index++)
- {
- STACK_WIND_COOKIE (frame,
- unify_sh_setdents_cbk,
- (void *)index,
- priv->xl_array[index],
- priv->xl_array[index]->fops->setdents,
- local->fd, GF_SET_DIR_ONLY,
- local->sh_struct->entry_list[0], count);
- }
-
- return 0;
-}
-
-int32_t
-unify_sh_ns_setdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int32_t callcnt = -1;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- long index = (long)cookie;
- dir_entry_t *prev, *entry, *trav;
-
- LOCK (&frame->lock);
- {
- if (local->sh_struct->entry_list[index]) {
- prev = entry = local->sh_struct->entry_list[index];
- trav = entry->next;
- while (trav) {
- prev->next = trav->next;
- FREE (trav->name);
- if (S_ISLNK (trav->buf.st_mode))
- FREE (trav->link);
- FREE (trav);
- trav = prev->next;
- }
- FREE (entry);
- }
- }
- UNLOCK (&frame->lock);
-
- if (local->sh_struct->count_list[index] <
- UNIFY_SELF_HEAL_GETDENTS_COUNT) {
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
- } else {
- /* count == size, that means, there are more entries
- to read from */
- local->sh_struct->offset_list[index] +=
- UNIFY_SELF_HEAL_GETDENTS_COUNT;
- STACK_WIND_COOKIE (frame,
- unify_sh_getdents_cbk,
- cookie,
- priv->xl_array[index],
- priv->xl_array[index]->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- local->sh_struct->offset_list[index],
- GF_GET_ALL);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "readdir on (%s) with offset %"PRId64"",
- priv->xl_array[index]->name,
- local->sh_struct->offset_list[index]);
- }
-
- if (!callcnt) {
- /* All storage nodes have done unified setdents on NS node.
- * Now, do getdents from NS and do setdents on storage nodes.
- */
-
- /* sh_struct->offset_list is no longer required for
- storage nodes now */
- local->sh_struct->offset_list[0] = 0; /* reset */
-
- STACK_WIND (frame,
- unify_sh_ns_getdents_cbk,
- NS(this),
- NS(this)->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- 0, /* In this call, do send '0' as offset */
- GF_GET_DIR_ONLY);
- }
-
- return 0;
-}
-
-
-/**
- * unify_sh_getdents_cbk -
- */
-int32_t
-unify_sh_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count)
-{
- int32_t callcnt = -1;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- long index = (long)cookie;
- dir_entry_t *tmp = NULL;
-
- if (op_ret >= 0 && count > 0) {
- /* There is some dentry found, just send the dentry to NS */
- tmp = CALLOC (1, sizeof (dir_entry_t));
- local->sh_struct->entry_list[index] = tmp;
- local->sh_struct->count_list[index] = count;
- if (entry) {
- tmp->next = entry->next;
- entry->next = NULL;
- }
- STACK_WIND_COOKIE (frame,
- unify_sh_ns_setdents_cbk,
- cookie,
- NS(this),
- NS(this)->fops->setdents,
- local->fd,
- GF_SET_IF_NOT_PRESENT,
- local->sh_struct->entry_list[index],
- count);
- return 0;
- }
-
- if (count < UNIFY_SELF_HEAL_GETDENTS_COUNT) {
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
- } else {
- /* count == size, that means, there are more entries
- to read from */
- local->sh_struct->offset_list[index] +=
- UNIFY_SELF_HEAL_GETDENTS_COUNT;
- STACK_WIND_COOKIE (frame,
- unify_sh_getdents_cbk,
- cookie,
- priv->xl_array[index],
- priv->xl_array[index]->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- local->sh_struct->offset_list[index],
- GF_GET_ALL);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "readdir on (%s) with offset %"PRId64"",
- priv->xl_array[index]->name,
- local->sh_struct->offset_list[index]);
- }
-
- if (!callcnt) {
- /* All storage nodes have done unified setdents on NS node.
- * Now, do getdents from NS and do setdents on storage nodes.
- */
-
- /* sh_struct->offset_list is no longer required for
- storage nodes now */
- local->sh_struct->offset_list[0] = 0; /* reset */
-
- STACK_WIND (frame,
- unify_sh_ns_getdents_cbk,
- NS(this),
- NS(this)->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- 0, /* In this call, do send '0' as offset */
- GF_GET_DIR_ONLY);
- }
-
- return 0;
-}
-
-/**
- * unify_sh_opendir_cbk -
- *
- * @cookie:
- */
-int32_t
-unify_sh_opendir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- int32_t callcnt = 0;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- int16_t index = 0;
- inode_t *inode = NULL;
- dict_t *tmp_dict = NULL;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- } else {
- gf_log (this->name, GF_LOG_WARNING, "failed");
- local->failed = 1;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- local->call_count = priv->child_count + 1;
-
- if (!local->failed) {
- /* send getdents() namespace after finishing
- storage nodes */
- local->call_count--;
-
- fd_bind (fd);
-
- if (local->call_count) {
- /* Used as the offset index. This list keeps
- * track of offset sent to each node during
- * STACK_WIND.
- */
- local->sh_struct->offset_list =
- calloc (priv->child_count,
- sizeof (off_t));
- ERR_ABORT (local->sh_struct->offset_list);
-
- local->sh_struct->entry_list =
- calloc (priv->child_count,
- sizeof (dir_entry_t *));
- ERR_ABORT (local->sh_struct->entry_list);
-
- local->sh_struct->count_list =
- calloc (priv->child_count,
- sizeof (int));
- ERR_ABORT (local->sh_struct->count_list);
-
- /* Send getdents on all the fds */
- for (index = 0;
- index < priv->child_count; index++) {
- STACK_WIND_COOKIE (frame,
- unify_sh_getdents_cbk,
- (void *)(long)index,
- priv->xl_array[index],
- priv->xl_array[index]->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- 0, /* In this call, do send '0' as offset */
- GF_GET_ALL);
- }
-
- /* did stack wind, so no need to unwind here */
- return 0;
- } /* (local->call_count) */
- } /* (!local->failed) */
-
- /* Opendir failed on one node. */
- inode = local->loc1.inode;
- fd_unref (local->fd);
- tmp_dict = local->dict;
-
- unify_local_wipe (local);
- /* Only 'self-heal' failed, lookup() was successful. */
- local->op_ret = 0;
-
- /* This is lookup_cbk ()'s UNWIND. */
- STACK_UNWIND (frame, local->op_ret, local->op_errno, inode,
- &local->stbuf, local->dict);
- if (tmp_dict)
- dict_unref (tmp_dict);
- }
-
- return 0;
-}
-
-/**
- * gf_sh_checksum_cbk -
- *
- * @frame: frame used in lookup. get a copy of it, and use that copy.
- * @this: pointer to unify xlator.
- * @inode: pointer to inode, for which the consistency check is required.
- *
- */
-int32_t
-unify_sh_checksum_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- uint8_t *file_checksum,
- uint8_t *dir_checksum)
-{
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- int16_t index = 0;
- int32_t callcnt = 0;
- inode_t *inode = NULL;
- dict_t *tmp_dict = NULL;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret >= 0) {
- if (NS(this) == (xlator_t *)cookie) {
- memcpy (local->sh_struct->ns_file_checksum,
- file_checksum, NAME_MAX);
- memcpy (local->sh_struct->ns_dir_checksum,
- dir_checksum, NAME_MAX);
- } else {
- if (local->entry_count == 0) {
- /* Initialize the dir_checksum to be
- * used for comparision with other
- * storage nodes. Should be done for
- * the first successful call *only*.
- */
- /* Using 'entry_count' as a flag */
- local->entry_count = 1;
- memcpy (local->sh_struct->dir_checksum,
- dir_checksum, NAME_MAX);
- }
-
- /* Reply from the storage nodes */
- for (index = 0;
- index < NAME_MAX; index++) {
- /* Files should be present in
- only one node */
- local->sh_struct->file_checksum[index] ^= file_checksum[index];
-
- /* directory structure should be
- same accross */
- if (local->sh_struct->dir_checksum[index] != dir_checksum[index])
- local->failed = 1;
- }
- }
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- for (index = 0; index < NAME_MAX ; index++) {
- if (local->sh_struct->file_checksum[index] !=
- local->sh_struct->ns_file_checksum[index]) {
- local->failed = 1;
- break;
- }
- if (local->sh_struct->dir_checksum[index] !=
- local->sh_struct->ns_dir_checksum[index]) {
- local->failed = 1;
- break;
- }
- }
-
- if (local->failed) {
- /* Log it, it should be a rare event */
- gf_log (this->name, GF_LOG_WARNING,
- "Self-heal triggered on directory %s",
- local->loc1.path);
-
- /* Any self heal will be done at directory level */
- local->call_count = 0;
- local->op_ret = -1;
- local->failed = 0;
-
- local->fd = fd_create (local->loc1.inode,
- frame->root->pid);
-
- local->call_count = priv->child_count + 1;
-
- for (index = 0;
- index < (priv->child_count + 1); index++) {
- STACK_WIND_COOKIE (frame,
- unify_sh_opendir_cbk,
- priv->xl_array[index]->name,
- priv->xl_array[index],
- priv->xl_array[index]->fops->opendir,
- &local->loc1,
- local->fd);
- }
- /* opendir can be done on the directory */
- return 0;
- }
-
- /* no mismatch */
- inode = local->loc1.inode;
- tmp_dict = local->dict;
-
- unify_local_wipe (local);
-
- /* This is lookup_cbk ()'s UNWIND. */
- STACK_UNWIND (frame,
- local->op_ret,
- local->op_errno,
- inode,
- &local->stbuf,
- local->dict);
- if (tmp_dict)
- dict_unref (tmp_dict);
- }
-
- return 0;
-}
-
-/* Foreground self-heal part over */
-
-/* Background self-heal part */
-
-int32_t
-unify_bgsh_setdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int32_t callcnt = -1;
- unify_local_t *local = frame->local;
- dir_entry_t *prev, *entry, *trav;
-
- LOCK (&frame->lock);
- {
- /* if local->call_count == 0, that means, setdents
- on storagenodes is still pending. */
- if (local->call_count)
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
-
-
- if (callcnt == 0) {
- if (local->sh_struct->entry_list[0]) {
- prev = entry = local->sh_struct->entry_list[0];
- trav = entry->next;
- while (trav) {
- prev->next = trav->next;
- FREE (trav->name);
- if (S_ISLNK (trav->buf.st_mode))
- FREE (trav->link);
- FREE (trav);
- trav = prev->next;
- }
- FREE (entry);
- }
-
- if (!local->flags) {
- if (local->sh_struct->count_list[0] >=
- UNIFY_SELF_HEAL_GETDENTS_COUNT) {
- /* count == size, that means, there are more
- entries to read from */
- //local->call_count = 0;
- local->sh_struct->offset_list[0] +=
- UNIFY_SELF_HEAL_GETDENTS_COUNT;
- STACK_WIND (frame,
- unify_bgsh_ns_getdents_cbk,
- NS(this),
- NS(this)->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- local->sh_struct->offset_list[0],
- GF_GET_DIR_ONLY);
- }
- } else {
- fd_unref (local->fd);
- unify_local_wipe (local);
- STACK_DESTROY (frame->root);
- }
- }
-
- return 0;
-}
-
-
-int32_t
-unify_bgsh_ns_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count)
-{
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- long index = 0;
- unsigned long final = 0;
- dir_entry_t *tmp = CALLOC (1, sizeof (dir_entry_t));
-
- local->sh_struct->entry_list[0] = tmp;
- local->sh_struct->count_list[0] = count;
- if (entry) {
- tmp->next = entry->next;
- entry->next = NULL;
- }
-
- if ((count < UNIFY_SELF_HEAL_GETDENTS_COUNT) || !entry) {
- final = 1;
- }
-
- LOCK (&frame->lock);
- {
- /* local->call_count will be '0' till now. make it 1 so,
- it can be UNWIND'ed for the last call. */
- local->call_count = priv->child_count;
- if (final)
- local->flags = 1;
- }
- UNLOCK (&frame->lock);
-
- for (index = 0; index < priv->child_count; index++)
- {
- STACK_WIND_COOKIE (frame,
- unify_bgsh_setdents_cbk,
- (void *)index,
- priv->xl_array[index],
- priv->xl_array[index]->fops->setdents,
- local->fd, GF_SET_DIR_ONLY,
- local->sh_struct->entry_list[0], count);
- }
-
- return 0;
-}
-
-int32_t
-unify_bgsh_ns_setdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int32_t callcnt = -1;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- long index = (long)cookie;
- dir_entry_t *prev, *entry, *trav;
-
- if (local->sh_struct->entry_list[index]) {
- prev = entry = local->sh_struct->entry_list[index];
- if (!entry)
- return 0;
- trav = entry->next;
- while (trav) {
- prev->next = trav->next;
- FREE (trav->name);
- if (S_ISLNK (trav->buf.st_mode))
- FREE (trav->link);
- FREE (trav);
- trav = prev->next;
- }
- FREE (entry);
- }
-
- if (local->sh_struct->count_list[index] <
- UNIFY_SELF_HEAL_GETDENTS_COUNT) {
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
- } else {
- /* count == size, that means, there are more entries
- to read from */
- local->sh_struct->offset_list[index] +=
- UNIFY_SELF_HEAL_GETDENTS_COUNT;
- STACK_WIND_COOKIE (frame,
- unify_bgsh_getdents_cbk,
- cookie,
- priv->xl_array[index],
- priv->xl_array[index]->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- local->sh_struct->offset_list[index],
- GF_GET_ALL);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "readdir on (%s) with offset %"PRId64"",
- priv->xl_array[index]->name,
- local->sh_struct->offset_list[index]);
- }
-
- if (!callcnt) {
- /* All storage nodes have done unified setdents on NS node.
- * Now, do getdents from NS and do setdents on storage nodes.
- */
-
- /* sh_struct->offset_list is no longer required for
- storage nodes now */
- local->sh_struct->offset_list[0] = 0; /* reset */
-
- STACK_WIND (frame,
- unify_bgsh_ns_getdents_cbk,
- NS(this),
- NS(this)->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- 0, /* In this call, do send '0' as offset */
- GF_GET_DIR_ONLY);
- }
-
- return 0;
-}
-
-
-/**
- * unify_bgsh_getdents_cbk -
- */
-int32_t
-unify_bgsh_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count)
-{
- int32_t callcnt = -1;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- long index = (long)cookie;
- dir_entry_t *tmp = NULL;
-
- if (op_ret >= 0 && count > 0) {
- /* There is some dentry found, just send the dentry to NS */
- tmp = CALLOC (1, sizeof (dir_entry_t));
- local->sh_struct->entry_list[index] = tmp;
- local->sh_struct->count_list[index] = count;
- if (entry) {
- tmp->next = entry->next;
- entry->next = NULL;
- }
- STACK_WIND_COOKIE (frame,
- unify_bgsh_ns_setdents_cbk,
- cookie,
- NS(this),
- NS(this)->fops->setdents,
- local->fd,
- GF_SET_IF_NOT_PRESENT,
- local->sh_struct->entry_list[index],
- count);
- return 0;
- }
-
- if (count < UNIFY_SELF_HEAL_GETDENTS_COUNT) {
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
- } else {
- /* count == size, that means, there are more entries to read from */
- local->sh_struct->offset_list[index] +=
- UNIFY_SELF_HEAL_GETDENTS_COUNT;
-
- STACK_WIND_COOKIE (frame,
- unify_bgsh_getdents_cbk,
- cookie,
- priv->xl_array[index],
- priv->xl_array[index]->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- local->sh_struct->offset_list[index],
- GF_GET_ALL);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "readdir on (%s) with offset %"PRId64"",
- priv->xl_array[index]->name,
- local->sh_struct->offset_list[index]);
- }
-
- if (!callcnt) {
- /* All storage nodes have done unified setdents on NS node.
- * Now, do getdents from NS and do setdents on storage nodes.
- */
-
- /* sh_struct->offset_list is no longer required for
- storage nodes now */
- local->sh_struct->offset_list[0] = 0; /* reset */
-
- STACK_WIND (frame,
- unify_bgsh_ns_getdents_cbk,
- NS(this),
- NS(this)->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- 0, /* In this call, do send '0' as offset */
- GF_GET_DIR_ONLY);
- }
-
- return 0;
-}
-
-/**
- * unify_bgsh_opendir_cbk -
- *
- * @cookie:
- */
-int32_t
-unify_bgsh_opendir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- int32_t callcnt = 0;
- int16_t index = 0;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- } else {
- local->failed = 1;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- local->call_count = priv->child_count + 1;
-
- if (!local->failed) {
- /* send getdents() namespace after finishing
- storage nodes */
- local->call_count--;
- callcnt = local->call_count;
-
- fd_bind (fd);
-
- if (local->call_count) {
- /* Used as the offset index. This list keeps
- track of offset sent to each node during
- STACK_WIND. */
- local->sh_struct->offset_list =
- calloc (priv->child_count,
- sizeof (off_t));
- ERR_ABORT (local->sh_struct->offset_list);
-
- local->sh_struct->entry_list =
- calloc (priv->child_count,
- sizeof (dir_entry_t *));
- ERR_ABORT (local->sh_struct->entry_list);
-
- local->sh_struct->count_list =
- calloc (priv->child_count,
- sizeof (int));
- ERR_ABORT (local->sh_struct->count_list);
-
- /* Send getdents on all the fds */
- for (index = 0;
- index < priv->child_count; index++) {
- STACK_WIND_COOKIE (frame,
- unify_bgsh_getdents_cbk,
- (void *)(long)index,
- priv->xl_array[index],
- priv->xl_array[index]->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- 0, /* In this call, do send '0' as offset */
- GF_GET_ALL);
- }
- /* did a stack wind, so no need to unwind here */
- return 0;
- } /* (local->call_count) */
- } /* (!local->failed) */
-
- /* Opendir failed on one node. */
- fd_unref (local->fd);
-
- unify_local_wipe (local);
- STACK_DESTROY (frame->root);
- }
-
- return 0;
-}
-
-/**
- * gf_bgsh_checksum_cbk -
- *
- * @frame: frame used in lookup. get a copy of it, and use that copy.
- * @this: pointer to unify xlator.
- * @inode: pointer to inode, for which the consistency check is required.
- *
- */
-int32_t
-unify_bgsh_checksum_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- uint8_t *file_checksum,
- uint8_t *dir_checksum)
-{
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- int16_t index = 0;
- int32_t callcnt = 0;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret >= 0) {
- if (NS(this) == (xlator_t *)cookie) {
- memcpy (local->sh_struct->ns_file_checksum,
- file_checksum, NAME_MAX);
- memcpy (local->sh_struct->ns_dir_checksum,
- dir_checksum, NAME_MAX);
- } else {
- if (local->entry_count == 0) {
- /* Initialize the dir_checksum to be
- * used for comparision with other
- * storage nodes. Should be done for
- * the first successful call *only*.
- */
- /* Using 'entry_count' as a flag */
- local->entry_count = 1;
- memcpy (local->sh_struct->dir_checksum,
- dir_checksum, NAME_MAX);
- }
-
- /* Reply from the storage nodes */
- for (index = 0;
- index < NAME_MAX; index++) {
- /* Files should be present in only
- one node */
- local->sh_struct->file_checksum[index] ^= file_checksum[index];
-
- /* directory structure should be same
- accross */
- if (local->sh_struct->dir_checksum[index] != dir_checksum[index])
- local->failed = 1;
- }
- }
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- for (index = 0; index < NAME_MAX ; index++) {
- if (local->sh_struct->file_checksum[index] !=
- local->sh_struct->ns_file_checksum[index]) {
- local->failed = 1;
- break;
- }
- if (local->sh_struct->dir_checksum[index] !=
- local->sh_struct->ns_dir_checksum[index]) {
- local->failed = 1;
- break;
- }
- }
-
- if (local->failed) {
- /* Log it, it should be a rare event */
- gf_log (this->name, GF_LOG_WARNING,
- "Self-heal triggered on directory %s",
- local->loc1.path);
-
- /* Any self heal will be done at the directory level */
- local->op_ret = -1;
- local->failed = 0;
-
- local->fd = fd_create (local->loc1.inode,
- frame->root->pid);
- local->call_count = priv->child_count + 1;
-
- for (index = 0;
- index < (priv->child_count + 1); index++) {
- STACK_WIND_COOKIE (frame,
- unify_bgsh_opendir_cbk,
- priv->xl_array[index]->name,
- priv->xl_array[index],
- priv->xl_array[index]->fops->opendir,
- &local->loc1,
- local->fd);
- }
-
- /* opendir can be done on the directory */
- return 0;
- }
-
- /* no mismatch */
- unify_local_wipe (local);
- STACK_DESTROY (frame->root);
- }
-
- return 0;
-}
-
-/* Background self-heal part over */
-
-
-
-
-/**
- * zr_unify_self_heal -
- *
- * @frame: frame used in lookup. get a copy of it, and use that copy.
- * @this: pointer to unify xlator.
- * @inode: pointer to inode, for which the consistency check is required.
- *
- */
-int32_t
-zr_unify_self_heal (call_frame_t *frame,
- xlator_t *this,
- unify_local_t *local)
-{
- unify_private_t *priv = this->private;
- call_frame_t *bg_frame = NULL;
- unify_local_t *bg_local = NULL;
- inode_t *tmp_inode = NULL;
- dict_t *tmp_dict = NULL;
- int16_t index = 0;
-
- if (local->inode_generation < priv->inode_generation) {
- /* Any self heal will be done at the directory level */
- /* Update the inode's generation to the current generation
- value. */
- local->inode_generation = priv->inode_generation;
- inode_ctx_put (local->loc1.inode, this,
- (uint64_t)(long)local->inode_generation);
-
- if (priv->self_heal == ZR_UNIFY_FG_SELF_HEAL) {
- local->op_ret = 0;
- local->failed = 0;
- local->call_count = priv->child_count + 1;
- local->sh_struct =
- calloc (1, sizeof (struct unify_self_heal_struct));
-
- /* +1 is for NS */
- for (index = 0;
- index < (priv->child_count + 1); index++) {
- STACK_WIND_COOKIE (frame,
- unify_sh_checksum_cbk,
- priv->xl_array[index],
- priv->xl_array[index],
- priv->xl_array[index]->fops->checksum,
- &local->loc1,
- 0);
- }
-
- /* Self-heal in foreground, hence no need
- to UNWIND here */
- return 0;
- }
-
- /* Self Heal done in background */
- bg_frame = copy_frame (frame);
- INIT_LOCAL (bg_frame, bg_local);
- loc_copy (&bg_local->loc1, &local->loc1);
- bg_local->op_ret = 0;
- bg_local->failed = 0;
- bg_local->call_count = priv->child_count + 1;
- bg_local->sh_struct =
- calloc (1, sizeof (struct unify_self_heal_struct));
-
- /* +1 is for NS */
- for (index = 0; index < (priv->child_count + 1); index++) {
- STACK_WIND_COOKIE (bg_frame,
- unify_bgsh_checksum_cbk,
- priv->xl_array[index],
- priv->xl_array[index],
- priv->xl_array[index]->fops->checksum,
- &bg_local->loc1,
- 0);
- }
- }
-
- /* generation number matches, self heal already done or
- * self heal done in background: just do STACK_UNWIND
- */
- tmp_inode = local->loc1.inode;
- tmp_dict = local->dict;
-
- unify_local_wipe (local);
-
- /* This is lookup_cbk ()'s UNWIND. */
- STACK_UNWIND (frame,
- local->op_ret,
- local->op_errno,
- tmp_inode,
- &local->stbuf,
- local->dict);
-
- if (tmp_dict)
- dict_unref (tmp_dict);
-
- return 0;
-}
-
diff --git a/xlators/cluster/unify/src/unify.c b/xlators/cluster/unify/src/unify.c
deleted file mode 100644
index 35c108963..000000000
--- a/xlators/cluster/unify/src/unify.c
+++ /dev/null
@@ -1,4506 +0,0 @@
-/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-/**
- * xlators/cluster/unify:
- * - This xlator is one of the main translator in GlusterFS, which
- * actually does the clustering work of the file system. One need to
- * understand that, unify assumes file to be existing in only one of
- * the child node, and directories to be present on all the nodes.
- *
- * NOTE:
- * Now, unify has support for global namespace, which is used to keep a
- * global view of fs's namespace tree. The stat for directories are taken
- * just from the namespace, where as for files, just 'st_ino' is taken from
- * Namespace node, and other stat info is taken from the actual storage node.
- * Also Namespace node helps to keep consistant inode for files across
- * glusterfs (re-)mounts.
- */
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "unify.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include <signal.h>
-#include <libgen.h>
-#include "compat-errno.h"
-#include "compat.h"
-
-#define UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR(_loc) do { \
- if (!(_loc && _loc->inode)) { \
- STACK_UNWIND (frame, -1, EINVAL, NULL, NULL, NULL); \
- return 0; \
- } \
-} while(0)
-
-
-#define UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR(_fd) do { \
- if (!(_fd && !fd_ctx_get (_fd, this, NULL))) { \
- STACK_UNWIND (frame, -1, EBADFD, NULL, NULL); \
- return 0; \
- } \
-} while(0)
-
-#define UNIFY_CHECK_FD_AND_UNWIND_ON_ERR(_fd) do { \
- if (!_fd) { \
- STACK_UNWIND (frame, -1, EBADFD, NULL, NULL); \
- return 0; \
- } \
-} while(0)
-
-/**
- * unify_local_wipe - free all the extra allocation of local->* here.
- */
-static void
-unify_local_wipe (unify_local_t *local)
-{
- /* Free the strdup'd variables in the local structure */
- if (local->name) {
- FREE (local->name);
- }
- loc_wipe (&local->loc1);
- loc_wipe (&local->loc2);
-}
-
-
-
-/*
- * unify_normalize_stats -
- */
-void
-unify_normalize_stats (struct statvfs *buf,
- unsigned long bsize,
- unsigned long frsize)
-{
- double factor;
-
- if (buf->f_bsize != bsize) {
- factor = ((double) buf->f_bsize) / bsize;
- buf->f_bsize = bsize;
- buf->f_bfree = (fsblkcnt_t) (factor * buf->f_bfree);
- buf->f_bavail = (fsblkcnt_t) (factor * buf->f_bavail);
- }
-
- if (buf->f_frsize != frsize) {
- factor = ((double) buf->f_frsize) / frsize;
- buf->f_frsize = frsize;
- buf->f_blocks = (fsblkcnt_t) (factor * buf->f_blocks);
- }
-}
-
-
-xlator_t *
-unify_loc_subvol (loc_t *loc, xlator_t *this)
-{
- unify_private_t *priv = NULL;
- xlator_t *subvol = NULL;
- int16_t *list = NULL;
- long index = 0;
- xlator_t *subvol_i = NULL;
- int ret = 0;
- uint64_t tmp_list = 0;
-
- priv = this->private;
- subvol = NS (this);
-
- if (!S_ISDIR (loc->inode->st_mode)) {
- ret = inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
- if (!list)
- goto out;
-
- for (index = 0; list[index] != -1; index++) {
- subvol_i = priv->xl_array[list[index]];
- if (subvol_i != NS (this)) {
- subvol = subvol_i;
- break;
- }
- }
- }
-out:
- return subvol;
-}
-
-
-
-/**
- * unify_statfs_cbk -
- */
-int32_t
-unify_statfs_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct statvfs *stbuf)
-{
- int32_t callcnt = 0;
- struct statvfs *dict_buf = NULL;
- unsigned long bsize;
- unsigned long frsize;
- unify_local_t *local = (unify_local_t *)frame->local;
- call_frame_t *prev_frame = cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret >= 0) {
- /* when a call is successfull, add it to local->dict */
- dict_buf = &local->statvfs_buf;
-
- if (dict_buf->f_bsize != 0) {
- bsize = max (dict_buf->f_bsize,
- stbuf->f_bsize);
-
- frsize = max (dict_buf->f_frsize,
- stbuf->f_frsize);
- unify_normalize_stats(dict_buf, bsize, frsize);
- unify_normalize_stats(stbuf, bsize, frsize);
- } else {
- dict_buf->f_bsize = stbuf->f_bsize;
- dict_buf->f_frsize = stbuf->f_frsize;
- }
-
- dict_buf->f_blocks += stbuf->f_blocks;
- dict_buf->f_bfree += stbuf->f_bfree;
- dict_buf->f_bavail += stbuf->f_bavail;
- dict_buf->f_files += stbuf->f_files;
- dict_buf->f_ffree += stbuf->f_ffree;
- dict_buf->f_favail += stbuf->f_favail;
- dict_buf->f_fsid = stbuf->f_fsid;
- dict_buf->f_flag = stbuf->f_flag;
- dict_buf->f_namemax = stbuf->f_namemax;
- local->op_ret = op_ret;
- } else {
- /* fop on storage node has failed due to some error */
- if (op_errno != ENOTCONN) {
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): %s",
- prev_frame->this->name,
- strerror (op_errno));
- }
- local->op_errno = op_errno;
- }
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->statvfs_buf);
- }
-
- return 0;
-}
-
-/**
- * unify_statfs -
- */
-int32_t
-unify_statfs (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- unify_local_t *local = NULL;
- xlator_list_t *trav = this->children;
-
- INIT_LOCAL (frame, local);
- local->call_count = ((unify_private_t *)this->private)->child_count;
-
- while(trav) {
- STACK_WIND (frame,
- unify_statfs_cbk,
- trav->xlator,
- trav->xlator->fops->statfs,
- loc);
- trav = trav->next;
- }
-
- return 0;
-}
-
-/**
- * unify_buf_cbk -
- */
-int32_t
-unify_buf_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- int32_t callcnt = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s(): child(%s): path(%s): %s",
- gf_fop_list[frame->root->op],
- prev_frame->this->name,
- (local->loc1.path)?local->loc1.path:"",
- strerror (op_errno));
-
- local->op_errno = op_errno;
- if ((op_errno == ENOENT) && priv->optimist)
- local->op_ret = 0;
- }
-
- if (op_ret >= 0) {
- local->op_ret = 0;
-
- if (NS (this) == prev_frame->this) {
- local->st_ino = buf->st_ino;
- /* If the entry is directory, get the stat
- from NS node */
- if (S_ISDIR (buf->st_mode) ||
- !local->stbuf.st_blksize) {
- local->stbuf = *buf;
- }
- }
-
- if ((!S_ISDIR (buf->st_mode)) &&
- (NS (this) != prev_frame->this)) {
- /* If file, take the stat info from Storage
- node. */
- local->stbuf = *buf;
- }
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- /* If the inode number is not filled, operation should
- fail */
- if (!local->st_ino)
- local->op_ret = -1;
-
- local->stbuf.st_ino = local->st_ino;
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->stbuf);
- }
-
- return 0;
-}
-
-#define check_if_dht_linkfile(s) ((s->st_mode & ~S_IFMT) == S_ISVTX)
-
-/**
- * unify_lookup_cbk -
- */
-int32_t
-unify_lookup_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf,
- dict_t *dict)
-{
- int32_t callcnt = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- inode_t *tmp_inode = NULL;
- dict_t *local_dict = NULL;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- if (local->revalidate &&
- (op_errno == ESTALE)) {
- /* ESTALE takes priority */
- local->op_errno = op_errno;
- local->failed = 1;
- }
-
- if ((op_errno != ENOTCONN)
- && (op_errno != ENOENT)
- && (local->op_errno != ESTALE)) {
- /* if local->op_errno is already ESTALE, then
- * ESTALE has to propogated to the parent first.
- * do not enter here.
- */
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- priv->xl_array[(long)cookie]->name,
- local->loc1.path, strerror (op_errno));
- local->op_errno = op_errno;
- local->failed = 1;
-
- } else if (local->revalidate &&
- (local->op_errno != ESTALE) &&
- !(priv->optimist && (op_errno == ENOENT))) {
-
- gf_log (this->name,
- (op_errno == ENOTCONN) ?
- GF_LOG_DEBUG:GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- priv->xl_array[(long)cookie]->name,
- local->loc1.path, strerror (op_errno));
- local->op_errno = op_errno;
- local->failed = 1;
- }
- }
-
- if (op_ret == 0) {
- local->op_ret = 0;
-
- if (check_if_dht_linkfile(buf)) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "file %s may be DHT link file on %s, "
- "make sure the backend is not shared "
- "between unify and DHT",
- local->loc1.path,
- priv->xl_array[(long)cookie]->name);
- }
-
- if (local->stbuf.st_mode && local->stbuf.st_blksize) {
- /* make sure we already have a stbuf
- stored in local->stbuf */
- if (S_ISDIR (local->stbuf.st_mode) &&
- !S_ISDIR (buf->st_mode)) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "[CRITICAL] '%s' is directory "
- "on namespace, non-directory "
- "on node '%s', returning EIO",
- local->loc1.path,
- priv->xl_array[(long)cookie]->name);
- local->return_eio = 1;
- }
- if (!S_ISDIR (local->stbuf.st_mode) &&
- S_ISDIR (buf->st_mode)) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "[CRITICAL] '%s' is directory "
- "on node '%s', non-directory "
- "on namespace, returning EIO",
- local->loc1.path,
- priv->xl_array[(long)cookie]->name);
- local->return_eio = 1;
- }
- }
-
- if (!local->revalidate && !S_ISDIR (buf->st_mode)) {
- /* This is the first time lookup on file*/
- if (!local->list) {
- /* list is not allocated, allocate
- the max possible range */
- local->list = CALLOC (1, 2 * (priv->child_count + 2));
- if (!local->list) {
- gf_log (this->name,
- GF_LOG_CRITICAL,
- "Not enough memory");
- STACK_UNWIND (frame, -1,
- ENOMEM, inode,
- NULL, NULL);
- return 0;
- }
- }
- /* update the index of the list */
- local->list [local->index++] =
- (int16_t)(long)cookie;
- }
-
- if (!local->revalidate && S_ISDIR (buf->st_mode)) {
- /* fresh lookup of a directory */
- inode_ctx_put (local->loc1.inode, this,
- priv->inode_generation);
- }
-
- if ((!local->dict) && dict &&
- (priv->xl_array[(long)cookie] != NS(this))) {
- local->dict = dict_ref (dict);
- }
-
- /* index of NS node is == total child count */
- if (priv->child_count == (int16_t)(long)cookie) {
- /* Take the inode number from namespace */
- local->st_ino = buf->st_ino;
- if (S_ISDIR (buf->st_mode) ||
- !(local->stbuf.st_blksize)) {
- local->stbuf = *buf;
- }
- } else if (!S_ISDIR (buf->st_mode)) {
- /* If file, then get the stat from
- storage node */
- local->stbuf = *buf;
- }
-
- if (local->st_nlink < buf->st_nlink) {
- local->st_nlink = buf->st_nlink;
- }
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- local_dict = local->dict;
- if (local->return_eio) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "[CRITICAL] Unable to fix the path (%s) with "
- "self-heal, try manual verification. "
- "returning EIO.", local->loc1.path);
- unify_local_wipe (local);
- STACK_UNWIND (frame, -1, EIO, inode, NULL, NULL);
- if (local_dict) {
- dict_unref (local_dict);
- }
- return 0;
- }
-
- if (!local->stbuf.st_blksize) {
- /* Inode not present */
- local->op_ret = -1;
- } else {
- if (!local->revalidate &&
- !S_ISDIR (local->stbuf.st_mode)) {
- /* If its a file, big array is useless,
- allocate the smaller one */
- int16_t *list = NULL;
- list = CALLOC (1, 2 * (local->index + 1));
- ERR_ABORT (list);
- memcpy (list, local->list, 2 * local->index);
- /* Make the end of the list as -1 */
- FREE (local->list);
- local->list = list;
- local->list [local->index] = -1;
- /* Update the inode's ctx with proper array */
- /* TODO: log on failure */
- inode_ctx_put (local->loc1.inode, this,
- (uint64_t)(long)local->list);
- }
-
- if (S_ISDIR(local->loc1.inode->st_mode)) {
- /* lookup is done for directory */
- if (local->failed && priv->self_heal) {
- /* Triggering self-heal */
- /* means, self-heal required for this
- inode */
- local->inode_generation = 0;
- priv->inode_generation++;
- }
- } else {
- local->stbuf.st_ino = local->st_ino;
- }
-
- local->stbuf.st_nlink = local->st_nlink;
- }
- if (local->op_ret == -1) {
- if (!local->revalidate && local->list)
- FREE (local->list);
- }
-
- if ((local->op_ret >= 0) && local->failed &&
- local->revalidate) {
- /* Done revalidate, but it failed */
- if ((op_errno != ENOTCONN)
- && (local->op_errno != ESTALE)) {
- gf_log (this->name, GF_LOG_ERROR,
- "Revalidate failed for path(%s): %s",
- local->loc1.path, strerror (op_errno));
- }
- local->op_ret = -1;
- }
-
- if ((priv->self_heal && !priv->optimist) &&
- (!local->revalidate && (local->op_ret == 0) &&
- S_ISDIR(local->stbuf.st_mode))) {
- /* Let the self heal be done here */
- zr_unify_self_heal (frame, this, local);
- local_dict = NULL;
- } else {
- if (local->failed) {
- /* NOTE: directory lookup is sent to all
- * subvolumes and success from a subvolume
- * might set local->op_ret to 0 (zero) */
- local->op_ret = -1;
- }
-
- /* either no self heal, or op_ret == -1 (failure) */
- tmp_inode = local->loc1.inode;
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- tmp_inode, &local->stbuf, local->dict);
- }
- if (local_dict) {
- dict_unref (local_dict);
- }
- }
-
- return 0;
-}
-
-/**
- * unify_lookup -
- */
-int32_t
-unify_lookup (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *xattr_req)
-{
- unify_local_t *local = NULL;
- unify_private_t *priv = this->private;
- int16_t *list = NULL;
- long index = 0;
-
- if (!(loc && loc->inode)) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: Argument not right", loc?loc->path:"(null)");
- STACK_UNWIND (frame, -1, EINVAL, NULL, NULL, NULL);
- return 0;
- }
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, loc);
- if (local->loc1.path == NULL) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, loc->inode, NULL, NULL);
- return 0;
- }
-
- if (inode_ctx_get (loc->inode, this, NULL)
- && S_ISDIR (loc->inode->st_mode)) {
- local->revalidate = 1;
- }
-
- if (!inode_ctx_get (loc->inode, this, NULL) &&
- loc->inode->st_mode &&
- !S_ISDIR (loc->inode->st_mode)) {
- uint64_t tmp_list = 0;
- /* check if revalidate or fresh lookup */
- inode_ctx_get (loc->inode, this, &tmp_list);
- local->list = (int16_t *)(long)tmp_list;
- }
-
- if (local->list) {
- list = local->list;
- for (index = 0; list[index] != -1; index++);
- if (index != 2) {
- if (index < 2) {
- gf_log (this->name, GF_LOG_ERROR,
- "returning ESTALE for %s: file "
- "count is %ld", loc->path, index);
- /* Print where all the file is present */
- for (index = 0;
- local->list[index] != -1; index++) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: found on %s", loc->path,
- priv->xl_array[list[index]]->name);
- }
- unify_local_wipe (local);
- STACK_UNWIND (frame, -1, ESTALE,
- NULL, NULL, NULL);
- return 0;
- } else {
- /* There are more than 2 presences */
- /* Just log and continue */
- gf_log (this->name, GF_LOG_ERROR,
- "%s: file count is %ld",
- loc->path, index);
- /* Print where all the file is present */
- for (index = 0;
- local->list[index] != -1; index++) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: found on %s", loc->path,
- priv->xl_array[list[index]]->name);
- }
- }
- }
-
- /* is revalidate */
- local->revalidate = 1;
-
- for (index = 0; list[index] != -1; index++)
- local->call_count++;
-
- for (index = 0; list[index] != -1; index++) {
- char need_break = (list[index+1] == -1);
- STACK_WIND_COOKIE (frame,
- unify_lookup_cbk,
- (void *)(long)list[index], //cookie
- priv->xl_array [list[index]],
- priv->xl_array [list[index]]->fops->lookup,
- loc,
- xattr_req);
- if (need_break)
- break;
- }
- } else {
- if (loc->inode->st_mode) {
- if (inode_ctx_get (loc->inode, this, NULL)) {
- inode_ctx_get (loc->inode, this,
- &local->inode_generation);
- }
- }
- /* This is first call, there is no list */
- /* call count should be all child + 1 namespace */
- local->call_count = priv->child_count + 1;
-
- for (index = 0; index <= priv->child_count; index++) {
- STACK_WIND_COOKIE (frame,
- unify_lookup_cbk,
- (void *)index, //cookie
- priv->xl_array[index],
- priv->xl_array[index]->fops->lookup,
- loc,
- xattr_req);
- }
- }
-
- return 0;
-}
-
-/**
- * unify_stat - if directory, get the stat directly from NameSpace child.
- * if file, check for a hint and send it only there (also to NS).
- * if its a fresh stat, then do it on all the nodes.
- *
- * NOTE: for all the call, sending cookie as xlator pointer, which will be
- * used in cbk.
- */
-int32_t
-unify_stat (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- unify_local_t *local = NULL;
- unify_private_t *priv = this->private;
- int16_t index = 0;
- int16_t *list = NULL;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, loc);
- if (local->loc1.path == NULL) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, NULL);
- return 0;
- }
- local->st_ino = loc->inode->ino;
- if (S_ISDIR (loc->inode->st_mode)) {
- /* Directory */
- local->call_count = 1;
- STACK_WIND (frame, unify_buf_cbk, NS(this),
- NS(this)->fops->stat, loc);
- } else {
- /* File */
- inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
-
- for (index = 0; list[index] != -1; index++)
- local->call_count++;
-
- for (index = 0; list[index] != -1; index++) {
- char need_break = (list[index+1] == -1);
- STACK_WIND (frame,
- unify_buf_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->stat,
- loc);
- if (need_break)
- break;
- }
- }
-
- return 0;
-}
-
-/**
- * unify_access_cbk -
- */
-int32_t
-unify_access_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-
-/**
- * unify_access - Send request to only namespace, which has all the
- * attributes set for the file.
- */
-int32_t
-unify_access (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t mask)
-{
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- STACK_WIND (frame,
- unify_access_cbk,
- NS(this),
- NS(this)->fops->access,
- loc,
- mask);
-
- return 0;
-}
-
-int32_t
-unify_mkdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf)
-{
- int32_t callcnt = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- inode_t *tmp_inode = NULL;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if ((op_ret == -1) && !(priv->optimist &&
- (op_errno == ENOENT ||
- op_errno == EEXIST))) {
- /* TODO: Decrement the inode_generation of
- * this->inode's parent inode, hence the missing
- * directory is created properly by self-heal.
- * Currently, there is no way to get the parent
- * inode directly.
- */
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- priv->xl_array[(long)cookie]->name,
- local->loc1.path, strerror (op_errno));
- if (op_errno != EEXIST)
- local->failed = 1;
- local->op_errno = op_errno;
- }
-
- if (op_ret >= 0)
- local->op_ret = 0;
-
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (!local->failed) {
- inode_ctx_put (local->loc1.inode, this,
- priv->inode_generation);
- }
-
- tmp_inode = local->loc1.inode;
- unify_local_wipe (local);
-
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- tmp_inode, &local->stbuf);
- }
-
- return 0;
-}
-
-/**
- * unify_ns_mkdir_cbk -
- */
-int32_t
-unify_ns_mkdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf)
-{
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- long index = 0;
-
- if (op_ret == -1) {
- /* No need to send mkdir request to other servers,
- * as namespace action failed
- */
- gf_log (this->name, GF_LOG_ERROR,
- "namespace: path(%s): %s",
- local->name, strerror (op_errno));
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, inode, NULL);
- return 0;
- }
-
- /* Create one inode for this entry */
- local->op_ret = 0;
- local->stbuf = *buf;
-
- local->call_count = priv->child_count;
-
- /* Send mkdir request to all the nodes now */
- for (index = 0; index < priv->child_count; index++) {
- STACK_WIND_COOKIE (frame,
- unify_mkdir_cbk,
- (void *)index, //cookie
- priv->xl_array[index],
- priv->xl_array[index]->fops->mkdir,
- &local->loc1,
- local->mode);
- }
-
- return 0;
-}
-
-
-/**
- * unify_mkdir -
- */
-int32_t
-unify_mkdir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode)
-{
- unify_local_t *local = NULL;
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- local->mode = mode;
-
- loc_copy (&local->loc1, loc);
-
- if (local->loc1.path == NULL) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, NULL, NULL);
- return 0;
- }
-
- STACK_WIND (frame,
- unify_ns_mkdir_cbk,
- NS(this),
- NS(this)->fops->mkdir,
- loc,
- mode);
- return 0;
-}
-
-/**
- * unify_rmdir_cbk -
- */
-int32_t
-unify_rmdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int32_t callcnt = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret == 0 || (priv->optimist && (op_errno == ENOENT)))
- local->op_ret = 0;
- if (op_ret == -1)
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno);
- }
-
- return 0;
-}
-
-/**
- * unify_ns_rmdir_cbk -
- */
-int32_t
-unify_ns_rmdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int16_t index = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
-
- if (op_ret == -1) {
- /* No need to send rmdir request to other servers,
- * as namespace action failed
- */
- gf_log (this->name,
- ((op_errno != ENOTEMPTY) ?
- GF_LOG_ERROR : GF_LOG_DEBUG),
- "namespace: path(%s): %s",
- local->loc1.path, strerror (op_errno));
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
- }
-
- local->call_count = priv->child_count;
-
- for (index = 0; index < priv->child_count; index++) {
- STACK_WIND (frame,
- unify_rmdir_cbk,
- priv->xl_array[index],
- priv->xl_array[index]->fops->rmdir,
- &local->loc1);
- }
-
- return 0;
-}
-
-/**
- * unify_rmdir -
- */
-int32_t
-unify_rmdir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- unify_local_t *local = NULL;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
-
- loc_copy (&local->loc1, loc);
- if (local->loc1.path == NULL) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM);
- return 0;
- }
-
- STACK_WIND (frame,
- unify_ns_rmdir_cbk,
- NS(this),
- NS(this)->fops->rmdir,
- loc);
-
- return 0;
-}
-
-/**
- * unify_open_cbk -
- */
-int32_t
-unify_open_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- int32_t callcnt = 0;
- unify_local_t *local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- if (NS(this) != (xlator_t *)cookie) {
- /* Store child node's ptr, used in
- all the f*** / FileIO calls */
- fd_ctx_set (fd, this, (uint64_t)(long)cookie);
- }
- }
- if (op_ret == -1) {
- local->op_errno = op_errno;
- local->failed = 1;
- }
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if ((local->failed == 1) && (local->op_ret >= 0)) {
- local->call_count = 1;
- /* return -1 to user */
- local->op_ret = -1;
- //local->op_errno = EIO;
-
- if (!fd_ctx_get (local->fd, this, NULL)) {
- gf_log (this->name, GF_LOG_ERROR,
- "Open success on child node, "
- "failed on namespace");
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "Open success on namespace, "
- "failed on child node");
- }
- }
-
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret,
- local->op_errno, local->fd);
- }
-
- return 0;
-}
-
-#ifdef GF_DARWIN_HOST_OS
-/**
- * unify_create_lookup_cbk -
- */
-int32_t
-unify_open_lookup_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf,
- dict_t *dict)
-{
- int32_t callcnt = 0;
- int16_t index = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if ((op_ret == -1) && (op_errno != ENOENT)) {
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- priv->xl_array[(long)cookie]->name,
- local->loc1.path, strerror (op_errno));
- local->op_errno = op_errno;
- }
-
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- local->index++;
- if (NS(this) == priv->xl_array[(long)cookie]) {
- local->list[0] = (int16_t)(long)cookie;
- } else {
- local->list[1] = (int16_t)(long)cookie;
- }
- if (S_ISDIR (buf->st_mode))
- local->failed = 1;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- int16_t file_list[3] = {0,};
- local->op_ret = -1;
-
- file_list[0] = local->list[0];
- file_list[1] = local->list[1];
- file_list[2] = -1;
-
- if (local->index != 2) {
- /* Lookup failed, can't do open */
- gf_log (this->name, GF_LOG_ERROR,
- "%s: present on %d nodes",
- local->name, local->index);
-
- if (local->index < 2) {
- unify_local_wipe (local);
- gf_log (this->name, GF_LOG_ERROR,
- "returning as file found on less "
- "than 2 nodes");
- STACK_UNWIND (frame, local->op_ret,
- local->op_errno, local->fd);
- return 0;
- }
- }
-
- if (local->failed) {
- /* Open on directory, return EISDIR */
- unify_local_wipe (local);
- STACK_UNWIND (frame, -1, EISDIR, local->fd);
- return 0;
- }
-
- /* Everything is perfect :) */
- local->call_count = 2;
-
- for (index = 0; file_list[index] != -1; index++) {
- char need_break = (file_list[index+1] == -1);
- STACK_WIND_COOKIE (frame,
- unify_open_cbk,
- priv->xl_array[file_list[index]],
- priv->xl_array[file_list[index]],
- priv->xl_array[file_list[index]]->fops->open,
- &local->loc1,
- local->flags,
- local->fd);
- if (need_break)
- break;
- }
- }
-
- return 0;
-}
-
-
-int32_t
-unify_open_readlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- const char *path)
-{
- int16_t index = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
-
- if (op_ret == -1) {
- STACK_UNWIND (frame, -1, ENOENT);
- return 0;
- }
-
- if (path[0] == '/') {
- local->name = strdup (path);
- ERR_ABORT (local->name);
- } else {
- char *tmp_str = strdup (local->loc1.path);
- char *tmp_base = dirname (tmp_str);
- local->name = CALLOC (1, ZR_PATH_MAX);
- strcpy (local->name, tmp_base);
- strncat (local->name, "/", 1);
- strcat (local->name, path);
- FREE (tmp_str);
- }
-
- local->list = CALLOC (1, sizeof (int16_t) * 3);
- ERR_ABORT (local->list);
- local->call_count = priv->child_count + 1;
- local->op_ret = -1;
- for (index = 0; index <= priv->child_count; index++) {
- /* Send the lookup to all the nodes including namespace */
- STACK_WIND_COOKIE (frame,
- unify_open_lookup_cbk,
- (void *)(long)index,
- priv->xl_array[index],
- priv->xl_array[index]->fops->lookup,
- &local->loc1,
- NULL);
- }
-
- return 0;
-}
-#endif /* GF_DARWIN_HOST_OS */
-
-/**
- * unify_open -
- */
-int32_t
-unify_open (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags,
- fd_t *fd)
-{
- unify_private_t *priv = this->private;
- unify_local_t *local = NULL;
- int16_t *list = NULL;
- int16_t index = 0;
- int16_t file_list[3] = {0,};
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Init */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, loc);
- local->fd = fd;
- local->flags = flags;
- inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
-
- local->list = list;
- file_list[0] = priv->child_count; /* Thats namespace */
- file_list[2] = -1;
- for (index = 0; list[index] != -1; index++) {
- local->call_count++;
- if (list[index] != priv->child_count)
- file_list[1] = list[index];
- }
-
- if (local->call_count != 2) {
- /* If the lookup was done for file */
- gf_log (this->name, GF_LOG_ERROR,
- "%s: entry_count is %d",
- loc->path, local->call_count);
- for (index = 0; local->list[index] != -1; index++)
- gf_log (this->name, GF_LOG_ERROR, "%s: found on %s",
- loc->path, priv->xl_array[list[index]]->name);
-
- if (local->call_count < 2) {
- gf_log (this->name, GF_LOG_ERROR,
- "returning EIO as file found on onlyone node");
- STACK_UNWIND (frame, -1, EIO, fd);
- return 0;
- }
- }
-
-#ifdef GF_DARWIN_HOST_OS
- /* Handle symlink here */
- if (S_ISLNK (loc->inode->st_mode)) {
- /* Callcount doesn't matter here */
- STACK_WIND (frame,
- unify_open_readlink_cbk,
- NS(this),
- NS(this)->fops->readlink,
- loc, ZR_PATH_MAX);
- return 0;
- }
-#endif /* GF_DARWIN_HOST_OS */
-
- local->call_count = 2;
- for (index = 0; file_list[index] != -1; index++) {
- char need_break = (file_list[index+1] == -1);
- STACK_WIND_COOKIE (frame,
- unify_open_cbk,
- priv->xl_array[file_list[index]], //cookie
- priv->xl_array[file_list[index]],
- priv->xl_array[file_list[index]]->fops->open,
- loc,
- flags,
- fd);
- if (need_break)
- break;
- }
-
- return 0;
-}
-
-
-int32_t
-unify_create_unlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- unify_local_t *local = frame->local;
- inode_t *inode = local->loc1.inode;
-
- unify_local_wipe (local);
-
- STACK_UNWIND (frame, local->op_ret, local->op_errno, local->fd,
- inode, &local->stbuf);
-
- return 0;
-}
-
-/**
- * unify_create_open_cbk -
- */
-int32_t
-unify_create_open_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- int ret = 0;
- int32_t callcnt = 0;
- unify_local_t *local = frame->local;
- inode_t *inode = NULL;
- xlator_t *child = NULL;
- uint64_t tmp_value = 0;
-
- LOCK (&frame->lock);
- {
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- if (NS(this) != (xlator_t *)cookie) {
- /* Store child node's ptr, used in all
- the f*** / FileIO calls */
- /* TODO: log on failure */
- ret = fd_ctx_get (fd, this, &tmp_value);
- cookie = (void *)(long)tmp_value;
- } else {
- /* NOTE: open successful on namespace.
- * fd's ctx can be used to identify open
- * failure on storage subvolume. cool
- * ide ;) */
- local->failed = 0;
- }
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- ((xlator_t *)cookie)->name,
- local->loc1.path, strerror (op_errno));
- local->op_errno = op_errno;
- local->failed = 1;
- }
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (local->failed == 1 && (local->op_ret >= 0)) {
- local->call_count = 1;
- /* return -1 to user */
- local->op_ret = -1;
- local->op_errno = EIO;
- local->fd = fd;
- local->call_count = 1;
-
- if (!fd_ctx_get (local->fd, this, &tmp_value)) {
- child = (xlator_t *)(long)tmp_value;
-
- gf_log (this->name, GF_LOG_ERROR,
- "Create success on child node, "
- "failed on namespace");
-
- STACK_WIND (frame,
- unify_create_unlink_cbk,
- child,
- child->fops->unlink,
- &local->loc1);
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "Create success on namespace, "
- "failed on child node");
-
- STACK_WIND (frame,
- unify_create_unlink_cbk,
- NS(this),
- NS(this)->fops->unlink,
- &local->loc1);
- }
- return 0;
- }
- inode = local->loc1.inode;
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno, fd,
- inode, &local->stbuf);
- }
- return 0;
-}
-
-/**
- * unify_create_lookup_cbk -
- */
-int32_t
-unify_create_lookup_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf,
- dict_t *dict)
-{
- int32_t callcnt = 0;
- int16_t index = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- priv->xl_array[(long)cookie]->name,
- local->loc1.path, strerror (op_errno));
- local->op_errno = op_errno;
- local->failed = 1;
- }
-
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- local->list[local->index++] = (int16_t)(long)cookie;
- if (NS(this) == priv->xl_array[(long)cookie]) {
- local->st_ino = buf->st_ino;
- } else {
- local->stbuf = *buf;
- }
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- int16_t *list = local->list;
- int16_t file_list[3] = {0,};
- local->op_ret = -1;
-
- local->list [local->index] = -1;
- file_list[0] = list[0];
- file_list[1] = list[1];
- file_list[2] = -1;
-
- local->stbuf.st_ino = local->st_ino;
- /* TODO: log on failure */
- inode_ctx_put (local->loc1.inode, this,
- (uint64_t)(long)local->list);
-
- if (local->index != 2) {
- /* Lookup failed, can't do open */
- gf_log (this->name, GF_LOG_ERROR,
- "%s: present on %d nodes",
- local->loc1.path, local->index);
- file_list[0] = priv->child_count;
- for (index = 0; list[index] != -1; index++) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: found on %s", local->loc1.path,
- priv->xl_array[list[index]]->name);
- if (list[index] != priv->child_count)
- file_list[1] = list[index];
- }
-
- if (local->index < 2) {
- unify_local_wipe (local);
- gf_log (this->name, GF_LOG_ERROR,
- "returning EIO as file found on "
- "only one node");
- STACK_UNWIND (frame, -1, EIO,
- local->fd, inode, NULL);
- return 0;
- }
- }
- /* Everything is perfect :) */
- local->call_count = 2;
-
- for (index = 0; file_list[index] != -1; index++) {
- char need_break = (file_list[index+1] == -1);
- STACK_WIND_COOKIE (frame,
- unify_create_open_cbk,
- priv->xl_array[file_list[index]],
- priv->xl_array[file_list[index]],
- priv->xl_array[file_list[index]]->fops->open,
- &local->loc1,
- local->flags,
- local->fd);
- if (need_break)
- break;
- }
- }
-
- return 0;
-}
-
-
-/**
- * unify_create_cbk -
- */
-int32_t
-unify_create_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd,
- inode_t *inode,
- struct stat *buf)
-{
- int ret = 0;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
- inode_t *tmp_inode = NULL;
-
- if (op_ret == -1) {
- /* send unlink () on Namespace */
- local->op_errno = op_errno;
- local->op_ret = -1;
- local->call_count = 1;
- gf_log (this->name, GF_LOG_ERROR,
- "create failed on %s (file %s, error %s), "
- "sending unlink to namespace",
- prev_frame->this->name,
- local->loc1.path, strerror (op_errno));
-
- STACK_WIND (frame,
- unify_create_unlink_cbk,
- NS(this),
- NS(this)->fops->unlink,
- &local->loc1);
-
- return 0;
- }
-
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- local->stbuf = *buf;
- /* Just inode number should be from NS node */
- local->stbuf.st_ino = local->st_ino;
-
- /* TODO: log on failure */
- ret = fd_ctx_set (fd, this, (uint64_t)(long)prev_frame->this);
- }
-
- tmp_inode = local->loc1.inode;
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno, local->fd,
- tmp_inode, &local->stbuf);
-
- return 0;
-}
-
-/**
- * unify_ns_create_cbk -
- *
- */
-int32_t
-unify_ns_create_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd,
- inode_t *inode,
- struct stat *buf)
-{
- struct sched_ops *sched_ops = NULL;
- xlator_t *sched_xl = NULL;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- int16_t *list = NULL;
- int16_t index = 0;
-
- if (op_ret == -1) {
- /* No need to send create request to other servers, as
- namespace action failed. Handle exclusive create here. */
- if ((op_errno != EEXIST) ||
- ((op_errno == EEXIST) &&
- ((local->flags & O_EXCL) == O_EXCL))) {
- /* If its just a create call without O_EXCL,
- don't do this */
- gf_log (this->name, GF_LOG_ERROR,
- "namespace: path(%s): %s",
- local->loc1.path, strerror (op_errno));
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf);
- return 0;
- }
- }
-
- if (op_ret >= 0) {
- /* Get the inode number from the NS node */
- local->st_ino = buf->st_ino;
-
- local->op_ret = -1;
-
- /* Start the mapping list */
- list = CALLOC (1, sizeof (int16_t) * 3);
- ERR_ABORT (list);
- inode_ctx_put (inode, this, (uint64_t)(long)list);
- list[0] = priv->child_count;
- list[2] = -1;
-
- /* This means, file doesn't exist anywhere in the Filesystem */
- sched_ops = priv->sched_ops;
-
- /* Send create request to the scheduled node now */
- sched_xl = sched_ops->schedule (this, local->loc1.path);
- if (sched_xl == NULL)
- {
- /* send unlink () on Namespace */
- local->op_errno = ENOTCONN;
- local->op_ret = -1;
- local->call_count = 1;
- gf_log (this->name, GF_LOG_ERROR,
- "no node online to schedule create:(file %s) "
- "sending unlink to namespace",
- (local->loc1.path)?local->loc1.path:"");
-
- STACK_WIND (frame,
- unify_create_unlink_cbk,
- NS(this),
- NS(this)->fops->unlink,
- &local->loc1);
-
- return 0;
- }
-
- for (index = 0; index < priv->child_count; index++)
- if (sched_xl == priv->xl_array[index])
- break;
- list[1] = index;
-
- STACK_WIND (frame, unify_create_cbk,
- sched_xl, sched_xl->fops->create,
- &local->loc1, local->flags, local->mode, fd);
- } else {
- /* File already exists, and there is no O_EXCL flag */
-
- gf_log (this->name, GF_LOG_DEBUG,
- "File(%s) already exists on namespace, sending "
- "open instead", local->loc1.path);
-
- local->list = CALLOC (1, sizeof (int16_t) * 3);
- ERR_ABORT (local->list);
- local->call_count = priv->child_count + 1;
- local->op_ret = -1;
- for (index = 0; index <= priv->child_count; index++) {
- /* Send lookup() to all nodes including namespace */
- STACK_WIND_COOKIE (frame,
- unify_create_lookup_cbk,
- (void *)(long)index,
- priv->xl_array[index],
- priv->xl_array[index]->fops->lookup,
- &local->loc1,
- NULL);
- }
- }
- return 0;
-}
-
-/**
- * unify_create - create a file in global namespace first, so other
- * clients can see them. Create the file in storage nodes in background.
- */
-int32_t
-unify_create (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags,
- mode_t mode,
- fd_t *fd)
-{
- unify_local_t *local = NULL;
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- local->mode = mode;
- local->flags = flags;
- local->fd = fd;
-
- loc_copy (&local->loc1, loc);
- if (local->loc1.path == NULL) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, fd, loc->inode, NULL);
- return 0;
- }
-
- STACK_WIND (frame,
- unify_ns_create_cbk,
- NS(this),
- NS(this)->fops->create,
- loc,
- flags | O_EXCL,
- mode,
- fd);
-
- return 0;
-}
-
-
-/**
- * unify_opendir_cbk -
- */
-int32_t
-unify_opendir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- STACK_UNWIND (frame, op_ret, op_errno, fd);
-
- return 0;
-}
-
-/**
- * unify_opendir -
- */
-int32_t
-unify_opendir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- fd_t *fd)
-{
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- STACK_WIND (frame, unify_opendir_cbk,
- NS(this), NS(this)->fops->opendir, loc, fd);
-
- return 0;
-}
-
-
-/**
- * unify_chmod -
- */
-int32_t
-unify_chmod (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode)
-{
- unify_local_t *local = NULL;
- unify_private_t *priv = this->private;
- int32_t index = 0;
- int32_t callcnt = 0;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
-
- loc_copy (&local->loc1, loc);
- local->st_ino = loc->inode->ino;
-
- if (S_ISDIR (loc->inode->st_mode)) {
- local->call_count = priv->child_count + 1;
-
- for (index = 0; index < (priv->child_count + 1); index++) {
- STACK_WIND (frame,
- unify_buf_cbk,
- priv->xl_array[index],
- priv->xl_array[index]->fops->chmod,
- loc, mode);
- }
- } else {
- inode_ctx_get (loc->inode, this, &tmp_list);
- local->list = (int16_t *)(long)tmp_list;
-
- for (index = 0; local->list[index] != -1; index++) {
- local->call_count++;
- callcnt++;
- }
-
- for (index = 0; local->list[index] != -1; index++) {
- STACK_WIND (frame,
- unify_buf_cbk,
- priv->xl_array[local->list[index]],
- priv->xl_array[local->list[index]]->fops->chmod,
- loc,
- mode);
- if (!--callcnt)
- break;
- }
- }
-
- return 0;
-}
-
-/**
- * unify_chown -
- */
-int32_t
-unify_chown (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- uid_t uid,
- gid_t gid)
-{
- unify_local_t *local = NULL;
- unify_private_t *priv = this->private;
- int32_t index = 0;
- int32_t callcnt = 0;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, loc);
- local->st_ino = loc->inode->ino;
-
- if (S_ISDIR (loc->inode->st_mode)) {
- local->call_count = priv->child_count + 1;
-
- for (index = 0; index < (priv->child_count + 1); index++) {
- STACK_WIND (frame,
- unify_buf_cbk,
- priv->xl_array[index],
- priv->xl_array[index]->fops->chown,
- loc, uid, gid);
- }
- } else {
- inode_ctx_get (loc->inode, this, &tmp_list);
- local->list = (int16_t *)(long)tmp_list;
-
- for (index = 0; local->list[index] != -1; index++) {
- local->call_count++;
- callcnt++;
- }
-
- for (index = 0; local->list[index] != -1; index++) {
- STACK_WIND (frame,
- unify_buf_cbk,
- priv->xl_array[local->list[index]],
- priv->xl_array[local->list[index]]->fops->chown,
- loc, uid, gid);
- if (!--callcnt)
- break;
- }
- }
-
- return 0;
-}
-
-
-/**
- * unify_truncate_cbk -
- */
-int32_t
-unify_truncate_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- int32_t callcnt = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- prev_frame->this->name,
- (local->loc1.path)?local->loc1.path:"",
- strerror (op_errno));
- local->op_errno = op_errno;
- if (!((op_errno == ENOENT) && priv->optimist))
- local->op_ret = -1;
- }
-
- if (op_ret >= 0) {
- if (NS (this) == prev_frame->this) {
- local->st_ino = buf->st_ino;
- /* If the entry is directory, get the
- stat from NS node */
- if (S_ISDIR (buf->st_mode) ||
- !local->stbuf.st_blksize) {
- local->stbuf = *buf;
- }
- }
-
- if ((!S_ISDIR (buf->st_mode)) &&
- (NS (this) != prev_frame->this)) {
- /* If file, take the stat info from
- Storage node. */
- local->stbuf = *buf;
- }
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (local->st_ino)
- local->stbuf.st_ino = local->st_ino;
- else
- local->op_ret = -1;
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->stbuf);
- }
-
- return 0;
-}
-
-/**
- * unify_truncate -
- */
-int32_t
-unify_truncate (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- off_t offset)
-{
- unify_local_t *local = NULL;
- unify_private_t *priv = this->private;
- int32_t index = 0;
- int32_t callcnt = 0;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, loc);
- local->st_ino = loc->inode->ino;
-
- if (S_ISDIR (loc->inode->st_mode)) {
- local->call_count = 1;
-
- STACK_WIND (frame,
- unify_buf_cbk,
- NS(this),
- NS(this)->fops->stat,
- loc);
- } else {
- local->op_ret = 0;
- inode_ctx_get (loc->inode, this, &tmp_list);
- local->list = (int16_t *)(long)tmp_list;
-
- for (index = 0; local->list[index] != -1; index++) {
- local->call_count++;
- callcnt++;
- }
-
- /* Don't send truncate to NS node */
- STACK_WIND (frame, unify_truncate_cbk, NS(this),
- NS(this)->fops->stat, loc);
- callcnt--;
-
- for (index = 0; local->list[index] != -1; index++) {
- if (NS(this) != priv->xl_array[local->list[index]]) {
- STACK_WIND (frame,
- unify_truncate_cbk,
- priv->xl_array[local->list[index]],
- priv->xl_array[local->list[index]]->fops->truncate,
- loc,
- offset);
- if (!--callcnt)
- break;
- }
- }
- }
-
- return 0;
-}
-
-/**
- * unify_utimens -
- */
-int32_t
-unify_utimens (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- struct timespec tv[2])
-{
- unify_local_t *local = NULL;
- unify_private_t *priv = this->private;
- int32_t index = 0;
- int32_t callcnt = 0;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, loc);
- local->st_ino = loc->inode->ino;
-
- if (S_ISDIR (loc->inode->st_mode)) {
- local->call_count = priv->child_count + 1;
-
- for (index = 0; index < (priv->child_count + 1); index++) {
- STACK_WIND (frame,
- unify_buf_cbk,
- priv->xl_array[index],
- priv->xl_array[index]->fops->utimens,
- loc, tv);
- }
- } else {
- inode_ctx_get (loc->inode, this, &tmp_list);
- local->list = (int16_t *)(long)tmp_list;
-
- for (index = 0; local->list[index] != -1; index++) {
- local->call_count++;
- callcnt++;
- }
-
- for (index = 0; local->list[index] != -1; index++) {
- STACK_WIND (frame,
- unify_buf_cbk,
- priv->xl_array[local->list[index]],
- priv->xl_array[local->list[index]]->fops->utimens,
- loc,
- tv);
- if (!--callcnt)
- break;
- }
- }
-
- return 0;
-}
-
-/**
- * unify_readlink_cbk -
- */
-int32_t
-unify_readlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- const char *path)
-{
- STACK_UNWIND (frame, op_ret, op_errno, path);
- return 0;
-}
-
-/**
- * unify_readlink - Read the link only from the storage node.
- */
-int32_t
-unify_readlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- size_t size)
-{
- unify_private_t *priv = this->private;
- int32_t entry_count = 0;
- int16_t *list = NULL;
- int16_t index = 0;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
-
- for (index = 0; list[index] != -1; index++)
- entry_count++;
-
- if (entry_count >= 2) {
- for (index = 0; list[index] != -1; index++) {
- if (priv->xl_array[list[index]] != NS(this)) {
- STACK_WIND (frame,
- unify_readlink_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->readlink,
- loc,
- size);
- break;
- }
- }
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "returning ENOENT, no softlink files found "
- "on storage node");
- STACK_UNWIND (frame, -1, ENOENT, NULL);
- }
-
- return 0;
-}
-
-
-/**
- * unify_unlink_cbk -
- */
-int32_t
-unify_unlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int32_t callcnt = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret == 0 || ((op_errno == ENOENT) && priv->optimist))
- local->op_ret = 0;
- if (op_ret == -1)
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno);
- }
-
- return 0;
-}
-
-
-/**
- * unify_unlink -
- */
-int32_t
-unify_unlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- unify_private_t *priv = this->private;
- unify_local_t *local = NULL;
- int16_t *list = NULL;
- int16_t index = 0;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, loc);
-
- inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
-
- for (index = 0; list[index] != -1; index++)
- local->call_count++;
-
- if (local->call_count) {
- for (index = 0; list[index] != -1; index++) {
- char need_break = (list[index+1] == -1);
- STACK_WIND (frame,
- unify_unlink_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->unlink,
- loc);
- if (need_break)
- break;
- }
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: returning ENOENT", loc->path);
- STACK_UNWIND (frame, -1, ENOENT);
- }
-
- return 0;
-}
-
-
-/**
- * unify_readv_cbk -
- */
-int32_t
-unify_readv_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iovec *vector,
- int32_t count,
- struct stat *stbuf,
- struct iobref *iobref)
-{
- STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf, iobref);
- return 0;
-}
-
-/**
- * unify_readv -
- */
-int32_t
-unify_readv (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset)
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame,
- unify_readv_cbk,
- child,
- child->fops->readv,
- fd,
- size,
- offset);
-
-
- return 0;
-}
-
-/**
- * unify_writev_cbk -
- */
-int32_t
-unify_writev_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *stbuf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, stbuf);
- return 0;
-}
-
-/**
- * unify_writev -
- */
-int32_t
-unify_writev (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iovec *vector,
- int32_t count,
- off_t off,
- struct iobref *iobref)
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame,
- unify_writev_cbk,
- child,
- child->fops->writev,
- fd,
- vector,
- count,
- off,
- iobref);
-
- return 0;
-}
-
-/**
- * unify_ftruncate -
- */
-int32_t
-unify_ftruncate (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- off_t offset)
-{
- xlator_t *child = NULL;
- unify_local_t *local = NULL;
- uint64_t tmp_child = 0;
-
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR(fd);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- local->op_ret = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- local->call_count = 2;
-
- STACK_WIND (frame, unify_truncate_cbk,
- child, child->fops->ftruncate,
- fd, offset);
-
- STACK_WIND (frame, unify_truncate_cbk,
- NS(this), NS(this)->fops->fstat,
- fd);
-
- return 0;
-}
-
-
-/**
- * unify_fchmod -
- */
-int32_t
-unify_fchmod (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- mode_t mode)
-{
- unify_local_t *local = NULL;
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- UNIFY_CHECK_FD_AND_UNWIND_ON_ERR(fd);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- local->st_ino = fd->inode->ino;
-
- if (!fd_ctx_get (fd, this, &tmp_child)) {
- /* If its set, then its file */
- child = (xlator_t *)(long)tmp_child;
-
- local->call_count = 2;
-
- STACK_WIND (frame, unify_buf_cbk, child,
- child->fops->fchmod, fd, mode);
-
- STACK_WIND (frame, unify_buf_cbk, NS(this),
- NS(this)->fops->fchmod, fd, mode);
-
- } else {
- /* this is an directory */
- local->call_count = 1;
-
- STACK_WIND (frame, unify_buf_cbk,
- NS(this), NS(this)->fops->fchmod, fd, mode);
- }
-
- return 0;
-}
-
-/**
- * unify_fchown -
- */
-int32_t
-unify_fchown (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- uid_t uid,
- gid_t gid)
-{
- unify_local_t *local = NULL;
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- UNIFY_CHECK_FD_AND_UNWIND_ON_ERR(fd);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- local->st_ino = fd->inode->ino;
-
- if (!fd_ctx_get (fd, this, &tmp_child)) {
- /* If its set, then its file */
- child = (xlator_t *)(long)tmp_child;
-
- local->call_count = 2;
-
- STACK_WIND (frame, unify_buf_cbk, child,
- child->fops->fchown, fd, uid, gid);
-
- STACK_WIND (frame, unify_buf_cbk, NS(this),
- NS(this)->fops->fchown, fd, uid, gid);
- } else {
- local->call_count = 1;
-
- STACK_WIND (frame, unify_buf_cbk,
- NS(this), NS(this)->fops->fchown,
- fd, uid, gid);
- }
-
- return 0;
-}
-
-/**
- * unify_flush_cbk -
- */
-int32_t
-unify_flush_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-/**
- * unify_flush -
- */
-int32_t
-unify_flush (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd)
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame, unify_flush_cbk, child,
- child->fops->flush, fd);
-
- return 0;
-}
-
-
-/**
- * unify_fsync_cbk -
- */
-int32_t
-unify_fsync_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-/**
- * unify_fsync -
- */
-int32_t
-unify_fsync (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t flags)
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame, unify_fsync_cbk, child,
- child->fops->fsync, fd, flags);
-
- return 0;
-}
-
-/**
- * unify_fstat - Send fstat FOP to Namespace only if its directory, and to
- * both namespace and the storage node if its a file.
- */
-int32_t
-unify_fstat (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd)
-{
- unify_local_t *local = NULL;
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- UNIFY_CHECK_FD_AND_UNWIND_ON_ERR(fd);
-
- INIT_LOCAL (frame, local);
- local->st_ino = fd->inode->ino;
-
- if (!fd_ctx_get (fd, this, &tmp_child)) {
- /* If its set, then its file */
- child = (xlator_t *)(long)tmp_child;
- local->call_count = 2;
-
- STACK_WIND (frame, unify_buf_cbk, child,
- child->fops->fstat, fd);
-
- STACK_WIND (frame, unify_buf_cbk, NS(this),
- NS(this)->fops->fstat, fd);
-
- } else {
- /* this is an directory */
- local->call_count = 1;
- STACK_WIND (frame, unify_buf_cbk, NS(this),
- NS(this)->fops->fstat, fd);
- }
-
- return 0;
-}
-
-/**
- * unify_getdents_cbk -
- */
-int32_t
-unify_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count)
-{
- STACK_UNWIND (frame, op_ret, op_errno, entry, count);
- return 0;
-}
-
-/**
- * unify_getdents - send the FOP request to all the nodes.
- */
-int32_t
-unify_getdents (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset,
- int32_t flag)
-{
- UNIFY_CHECK_FD_AND_UNWIND_ON_ERR (fd);
-
- STACK_WIND (frame, unify_getdents_cbk, NS(this),
- NS(this)->fops->getdents, fd, size, offset, flag);
-
- return 0;
-}
-
-
-/**
- * unify_readdir_cbk -
- */
-int32_t
-unify_readdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- gf_dirent_t *buf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
-
- return 0;
-}
-
-/**
- * unify_readdir - send the FOP request to all the nodes.
- */
-int32_t
-unify_readdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset)
-{
- UNIFY_CHECK_FD_AND_UNWIND_ON_ERR (fd);
-
- STACK_WIND (frame, unify_readdir_cbk, NS(this),
- NS(this)->fops->readdir, fd, size, offset);
-
- return 0;
-}
-
-
-/**
- * unify_fsyncdir_cbk -
- */
-int32_t
-unify_fsyncdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
-
- return 0;
-}
-
-/**
- * unify_fsyncdir -
- */
-int32_t
-unify_fsyncdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t flags)
-{
- UNIFY_CHECK_FD_AND_UNWIND_ON_ERR (fd);
-
- STACK_WIND (frame, unify_fsyncdir_cbk,
- NS(this), NS(this)->fops->fsyncdir, fd, flags);
-
- return 0;
-}
-
-/**
- * unify_lk_cbk - UNWIND frame with the proper return arguments.
- */
-int32_t
-unify_lk_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct flock *lock)
-{
- STACK_UNWIND (frame, op_ret, op_errno, lock);
- return 0;
-}
-
-/**
- * unify_lk - Send it to all the storage nodes, (should be 1) which has file.
- */
-int32_t
-unify_lk (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t cmd,
- struct flock *lock)
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame, unify_lk_cbk, child,
- child->fops->lk, fd, cmd, lock);
-
- return 0;
-}
-
-
-int32_t
-unify_setxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno);
-
-static int32_t
-unify_setxattr_file_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- unify_private_t *private = this->private;
- unify_local_t *local = frame->local;
- xlator_t *sched_xl = NULL;
- struct sched_ops *sched_ops = NULL;
-
- if (op_ret == -1) {
- if (!ENOTSUP)
- gf_log (this->name, GF_LOG_ERROR,
- "setxattr with XATTR_CREATE on ns: "
- "path(%s) key(%s): %s",
- local->loc1.path, local->name,
- strerror (op_errno));
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
- }
-
- LOCK (&frame->lock);
- {
- local->failed = 0;
- local->op_ret = 0;
- local->op_errno = 0;
- local->call_count = 1;
- }
- UNLOCK (&frame->lock);
-
- /* schedule XATTR_CREATE on one of the child node */
- sched_ops = private->sched_ops;
-
- /* Send create request to the scheduled node now */
- sched_xl = sched_ops->schedule (this, local->name);
- if (!sched_xl) {
- STACK_UNWIND (frame, -1, ENOTCONN);
- return 0;
- }
-
- STACK_WIND (frame,
- unify_setxattr_cbk,
- sched_xl,
- sched_xl->fops->setxattr,
- &local->loc1,
- local->dict,
- local->flags);
- return 0;
-}
-
-/**
- * unify_setxattr_cbk - When all the child nodes return, UNWIND frame.
- */
-int32_t
-unify_setxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int32_t callcnt = 0;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
- dict_t *dict = NULL;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, (((op_errno == ENOENT) ||
- (op_errno == ENOTSUP))?
- GF_LOG_DEBUG : GF_LOG_ERROR),
- "child(%s): path(%s): %s",
- prev_frame->this->name,
- (local->loc1.path)?local->loc1.path:"",
- strerror (op_errno));
- if (local->failed == -1) {
- local->failed = 1;
- }
- local->op_errno = op_errno;
- } else {
- local->failed = 0;
- local->op_ret = op_ret;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (local->failed && local->name &&
- ZR_FILE_CONTENT_REQUEST(local->name)) {
- dict = get_new_dict ();
- dict_set (dict, local->dict->members_list->key,
- data_from_dynptr(NULL, 0));
- dict_ref (dict);
-
- local->call_count = 1;
-
- STACK_WIND (frame,
- unify_setxattr_file_cbk,
- NS(this),
- NS(this)->fops->setxattr,
- &local->loc1,
- dict,
- XATTR_CREATE);
-
- dict_unref (dict);
- return 0;
- }
-
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno);
- }
-
- return 0;
-}
-
-/**
- * unify_sexattr - This function should be sent to all the storage nodes,
- * which contains the file, (excluding namespace).
- */
-int32_t
-unify_setxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *dict,
- int32_t flags)
-{
- unify_private_t *priv = this->private;
- unify_local_t *local = NULL;
- int16_t *list = NULL;
- int16_t index = 0;
- int32_t call_count = 0;
- uint64_t tmp_list = 0;
- data_pair_t *trav = dict->members_list;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- local->failed = -1;
- loc_copy (&local->loc1, loc);
-
- if (S_ISDIR (loc->inode->st_mode)) {
-
- if (trav && trav->key && ZR_FILE_CONTENT_REQUEST(trav->key)) {
- /* direct the storage xlators to change file
- content only if file exists */
- local->flags = flags;
- local->dict = dict;
- local->name = strdup (trav->key);
- flags |= XATTR_REPLACE;
- }
-
- local->call_count = priv->child_count;
- for (index = 0; index < priv->child_count; index++) {
- STACK_WIND (frame,
- unify_setxattr_cbk,
- priv->xl_array[index],
- priv->xl_array[index]->fops->setxattr,
- loc, dict, flags);
- }
- return 0;
- }
-
- inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
-
- for (index = 0; list[index] != -1; index++) {
- if (NS(this) != priv->xl_array[list[index]]) {
- local->call_count++;
- call_count++;
- }
- }
-
- if (local->call_count) {
- for (index = 0; list[index] != -1; index++) {
- if (priv->xl_array[list[index]] != NS(this)) {
- STACK_WIND (frame,
- unify_setxattr_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->setxattr,
- loc,
- dict,
- flags);
- if (!--call_count)
- break;
- }
- }
- return 0;
- }
-
- /* No entry in storage nodes */
- gf_log (this->name, GF_LOG_DEBUG,
- "returning ENOENT, file not found on storage node.");
- STACK_UNWIND (frame, -1, ENOENT);
-
- return 0;
-}
-
-
-/**
- * unify_getxattr_cbk - This function is called from only one child, so, no
- * need of any lock or anything else, just send it to above layer
- */
-int32_t
-unify_getxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *value)
-{
- int32_t callcnt = 0;
- dict_t *local_value = NULL;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_log (this->name,
- (((op_errno == ENOENT) ||
- (op_errno == ENODATA) ||
- (op_errno == ENOTSUP)) ?
- GF_LOG_DEBUG : GF_LOG_ERROR),
- "child(%s): path(%s): %s",
- prev_frame->this->name,
- (local->loc1.path)?local->loc1.path:"",
- strerror (op_errno));
- } else {
- if (!local->dict)
- local->dict = dict_ref (value);
- local->op_ret = op_ret;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- local_value = local->dict;
- local->dict = NULL;
-
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- local_value);
-
- if (local_value)
- dict_unref (local_value);
- }
-
- return 0;
-}
-
-
-/**
- * unify_getxattr - This FOP is sent to only the storage node.
- */
-int32_t
-unify_getxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name)
-{
- unify_private_t *priv = this->private;
- int16_t *list = NULL;
- int16_t index = 0;
- int16_t count = 0;
- unify_local_t *local = NULL;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
- INIT_LOCAL (frame, local);
-
- if (S_ISDIR (loc->inode->st_mode)) {
- local->call_count = priv->child_count;
- for (index = 0; index < priv->child_count; index++)
- STACK_WIND (frame,
- unify_getxattr_cbk,
- priv->xl_array[index],
- priv->xl_array[index]->fops->getxattr,
- loc,
- name);
- return 0;
- }
-
- inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
-
- for (index = 0; list[index] != -1; index++) {
- if (NS(this) != priv->xl_array[list[index]]) {
- local->call_count++;
- count++;
- }
- }
-
- if (count) {
- for (index = 0; list[index] != -1; index++) {
- if (priv->xl_array[list[index]] != NS(this)) {
- STACK_WIND (frame,
- unify_getxattr_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->getxattr,
- loc,
- name);
- if (!--count)
- break;
- }
- }
- } else {
- dict_t *tmp_dict = get_new_dict ();
- gf_log (this->name, GF_LOG_DEBUG,
- "%s: returning ENODATA, no file found on storage node",
- loc->path);
- STACK_UNWIND (frame, -1, ENODATA, tmp_dict);
- dict_destroy (tmp_dict);
- }
-
- return 0;
-}
-
-/**
- * unify_removexattr_cbk - Wait till all the child node returns the call
- * and then UNWIND to above layer.
- */
-int32_t
-unify_removexattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int32_t callcnt = 0;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret == -1) {
- local->op_errno = op_errno;
- if (op_errno != ENOTSUP)
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- prev_frame->this->name,
- local->loc1.path, strerror (op_errno));
- } else {
- local->op_ret = op_ret;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- STACK_UNWIND (frame, local->op_ret, local->op_errno);
- }
-
- return 0;
-}
-
-/**
- * unify_removexattr - Send it to all the child nodes which has the files.
- */
-int32_t
-unify_removexattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name)
-{
- unify_private_t *priv = this->private;
- unify_local_t *local = NULL;
- int16_t *list = NULL;
- int16_t index = 0;
- int32_t call_count = 0;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
-
- if (S_ISDIR (loc->inode->st_mode)) {
- local->call_count = priv->child_count;
- for (index = 0; index < priv->child_count; index++)
- STACK_WIND (frame,
- unify_removexattr_cbk,
- priv->xl_array[index],
- priv->xl_array[index]->fops->removexattr,
- loc,
- name);
-
- return 0;
- }
-
- inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
-
- for (index = 0; list[index] != -1; index++) {
- if (NS(this) != priv->xl_array[list[index]]) {
- local->call_count++;
- call_count++;
- }
- }
-
- if (local->call_count) {
- for (index = 0; list[index] != -1; index++) {
- if (priv->xl_array[list[index]] != NS(this)) {
- STACK_WIND (frame,
- unify_removexattr_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->removexattr,
- loc,
- name);
- if (!--call_count)
- break;
- }
- }
- return 0;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "%s: returning ENOENT, not found on storage node.", loc->path);
- STACK_UNWIND (frame, -1, ENOENT);
-
- return 0;
-}
-
-
-int32_t
-unify_mknod_unlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- unify_local_t *local = frame->local;
-
- if (op_ret == -1)
- gf_log (this->name, GF_LOG_ERROR,
- "%s: %s", local->loc1.path, strerror (op_errno));
-
- unify_local_wipe (local);
- /* No log required here as this -1 is for mknod call */
- STACK_UNWIND (frame, -1, local->op_errno, NULL, NULL);
- return 0;
-}
-
-/**
- * unify_mknod_cbk -
- */
-int32_t
-unify_mknod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf)
-{
- unify_local_t *local = frame->local;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "mknod failed on storage node, sending unlink to "
- "namespace");
- local->op_errno = op_errno;
- STACK_WIND (frame,
- unify_mknod_unlink_cbk,
- NS(this),
- NS(this)->fops->unlink,
- &local->loc1);
- return 0;
- }
-
- local->stbuf = *buf;
- local->stbuf.st_ino = local->st_ino;
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, inode, &local->stbuf);
- return 0;
-}
-
-/**
- * unify_ns_mknod_cbk -
- */
-int32_t
-unify_ns_mknod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf)
-{
- struct sched_ops *sched_ops = NULL;
- xlator_t *sched_xl = NULL;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- int16_t *list = NULL;
- int16_t index = 0;
- call_frame_t *prev_frame = cookie;
-
- if (op_ret == -1) {
- /* No need to send mknod request to other servers,
- * as namespace action failed
- */
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- prev_frame->this->name, local->loc1.path,
- strerror (op_errno));
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
- return 0;
- }
-
- /* Create one inode for this entry */
- local->op_ret = 0;
- local->stbuf = *buf;
- local->st_ino = buf->st_ino;
-
- list = CALLOC (1, sizeof (int16_t) * 3);
- ERR_ABORT (list);
- list[0] = priv->child_count;
- list[2] = -1;
- inode_ctx_put (inode, this, (uint64_t)(long)list);
-
- sched_ops = priv->sched_ops;
-
- /* Send mknod request to scheduled node now */
- sched_xl = sched_ops->schedule (this, local->loc1.path);
- if (!sched_xl) {
- gf_log (this->name, GF_LOG_ERROR,
- "mknod failed on storage node, no node online "
- "at the moment, sending unlink to NS");
- local->op_errno = ENOTCONN;
- STACK_WIND (frame,
- unify_mknod_unlink_cbk,
- NS(this),
- NS(this)->fops->unlink,
- &local->loc1);
-
- return 0;
- }
-
- for (index = 0; index < priv->child_count; index++)
- if (sched_xl == priv->xl_array[index])
- break;
- list[1] = index;
-
- STACK_WIND (frame, unify_mknod_cbk,
- sched_xl, sched_xl->fops->mknod,
- &local->loc1, local->mode, local->dev);
-
- return 0;
-}
-
-/**
- * unify_mknod - Create a device on namespace first, and later create on
- * the storage node.
- */
-int32_t
-unify_mknod (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode,
- dev_t rdev)
-{
- unify_local_t *local = NULL;
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- local->mode = mode;
- local->dev = rdev;
- loc_copy (&local->loc1, loc);
- if (local->loc1.path == NULL) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, loc->inode, NULL);
- return 0;
- }
-
- STACK_WIND (frame,
- unify_ns_mknod_cbk,
- NS(this),
- NS(this)->fops->mknod,
- loc,
- mode,
- rdev);
-
- return 0;
-}
-
-int32_t
-unify_symlink_unlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- unify_local_t *local = frame->local;
- if (op_ret == -1)
- gf_log (this->name, GF_LOG_ERROR,
- "%s: %s", local->loc1.path, strerror (op_errno));
-
- unify_local_wipe (local);
- STACK_UNWIND (frame, -1, local->op_errno, NULL, NULL);
- return 0;
-}
-
-/**
- * unify_symlink_cbk -
- */
-int32_t
-unify_symlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf)
-{
- unify_local_t *local = frame->local;
-
- if (op_ret == -1) {
- /* Symlink on storage node failed, hence send unlink
- to the NS node */
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_ERROR,
- "symlink on storage node failed, sending unlink "
- "to namespace");
-
- STACK_WIND (frame,
- unify_symlink_unlink_cbk,
- NS(this),
- NS(this)->fops->unlink,
- &local->loc1);
-
- return 0;
- }
-
- local->stbuf = *buf;
- local->stbuf.st_ino = local->st_ino;
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, inode, &local->stbuf);
-
- return 0;
-}
-
-/**
- * unify_ns_symlink_cbk -
- */
-int32_t
-unify_ns_symlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf)
-{
-
- struct sched_ops *sched_ops = NULL;
- xlator_t *sched_xl = NULL;
- int16_t *list = NULL;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- int16_t index = 0;
-
- if (op_ret == -1) {
- /* No need to send symlink request to other servers,
- * as namespace action failed
- */
- gf_log (this->name, GF_LOG_ERROR,
- "namespace: path(%s): %s",
- local->loc1.path, strerror (op_errno));
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, NULL, buf);
- return 0;
- }
-
- /* Create one inode for this entry */
- local->op_ret = 0;
- local->st_ino = buf->st_ino;
-
- /* Start the mapping list */
-
- list = CALLOC (1, sizeof (int16_t) * 3);
- ERR_ABORT (list);
- list[0] = priv->child_count; //namespace's index
- list[2] = -1;
- inode_ctx_put (inode, this, (uint64_t)(long)list);
-
- sched_ops = priv->sched_ops;
-
- /* Send symlink request to all the nodes now */
- sched_xl = sched_ops->schedule (this, local->loc1.path);
- if (!sched_xl) {
- /* Symlink on storage node failed, hence send unlink
- to the NS node */
- local->op_errno = ENOTCONN;
- gf_log (this->name, GF_LOG_ERROR,
- "symlink on storage node failed, no node online, "
- "sending unlink to namespace");
-
- STACK_WIND (frame,
- unify_symlink_unlink_cbk,
- NS(this),
- NS(this)->fops->unlink,
- &local->loc1);
-
- return 0;
- }
-
- for (index = 0; index < priv->child_count; index++)
- if (sched_xl == priv->xl_array[index])
- break;
- list[1] = index;
-
- STACK_WIND (frame,
- unify_symlink_cbk,
- sched_xl,
- sched_xl->fops->symlink,
- local->name,
- &local->loc1);
-
- return 0;
-}
-
-/**
- * unify_symlink -
- */
-int32_t
-unify_symlink (call_frame_t *frame,
- xlator_t *this,
- const char *linkpath,
- loc_t *loc)
-{
- unify_local_t *local = NULL;
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, loc);
- local->name = strdup (linkpath);
-
- if ((local->name == NULL) ||
- (local->loc1.path == NULL)) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, loc->inode, NULL);
- return 0;
- }
-
- STACK_WIND (frame,
- unify_ns_symlink_cbk,
- NS(this),
- NS(this)->fops->symlink,
- linkpath,
- loc);
-
- return 0;
-}
-
-
-int32_t
-unify_rename_unlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int32_t callcnt = 0;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s -> %s): %s",
- prev_frame->this->name,
- local->loc1.path, local->loc2.path,
- strerror (op_errno));
-
- }
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- local->stbuf.st_ino = local->st_ino;
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->stbuf);
- }
- return 0;
-}
-
-int32_t
-unify_ns_rename_undo_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- unify_local_t *local = frame->local;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "namespace: path(%s -> %s): %s",
- local->loc1.path, local->loc2.path,
- strerror (op_errno));
- }
-
- local->stbuf.st_ino = local->st_ino;
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno, &local->stbuf);
- return 0;
-}
-
-int32_t
-unify_rename_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- int32_t index = 0;
- int32_t callcnt = 0;
- int16_t *list = NULL;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret >= 0) {
- if (!S_ISDIR (buf->st_mode))
- local->stbuf = *buf;
- local->op_ret = op_ret;
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s -> %s): %s",
- prev_frame->this->name,
- local->loc1.path, local->loc2.path,
- strerror (op_errno));
- local->op_errno = op_errno;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- local->stbuf.st_ino = local->st_ino;
- if (S_ISDIR (local->loc1.inode->st_mode)) {
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno, &local->stbuf);
- return 0;
- }
-
- if (local->op_ret == -1) {
- /* TODO: check this logic */
-
- /* Rename failed in storage node, successful on NS,
- * hence, rename back the entries in NS */
- /* NOTE: this will be done only if the destination
- * doesn't exists, if the destination exists, the
- * job of correcting NS is left to self-heal
- */
- if (!local->index) {
- loc_t tmp_oldloc = {
- /* its actual 'newloc->path' */
- .path = local->loc2.path,
- .inode = local->loc1.inode,
- .parent = local->loc2.parent
- };
-
- loc_t tmp_newloc = {
- /* Actual 'oldloc->path' */
- .path = local->loc1.path,
- .parent = local->loc1.parent
- };
-
- gf_log (this->name, GF_LOG_ERROR,
- "rename succussful on namespace, on "
- "stroage node failed, reverting back");
-
- STACK_WIND (frame,
- unify_ns_rename_undo_cbk,
- NS(this),
- NS(this)->fops->rename,
- &tmp_oldloc,
- &tmp_newloc);
- return 0;
- }
- } else {
- /* Rename successful on storage nodes */
-
- int32_t idx = 0;
- int16_t *tmp_list = NULL;
- uint64_t tmp_list_int64 = 0;
- if (local->loc2.inode) {
- inode_ctx_get (local->loc2.inode,
- this, &tmp_list_int64);
- list = (int16_t *)(long)tmp_list_int64;
-
- }
-
- if (list) {
- for (index = 0; list[index] != -1; index++);
- tmp_list = CALLOC (1, index * 2);
- memcpy (tmp_list, list, index * 2);
-
- for (index = 0; list[index] != -1; index++) {
- /* TODO: Check this logic. */
- /* If the destination file exists in
- * the same storage node where we sent
- * 'rename' call, no need to send
- * unlink
- */
- for (idx = 0;
- local->list[idx] != -1; idx++) {
- if (tmp_list[index] == local->list[idx]) {
- tmp_list[index] = priv->child_count;
- continue;
- }
- }
-
- if (NS(this) != priv->xl_array[tmp_list[index]]) {
- local->call_count++;
- callcnt++;
- }
- }
-
- if (local->call_count) {
- if (callcnt > 1)
- gf_log (this->name,
- GF_LOG_ERROR,
- "%s->%s: more (%d) "
- "subvolumes have the "
- "newloc entry",
- local->loc1.path,
- local->loc2.path,
- callcnt);
-
- for (index=0;
- tmp_list[index] != -1; index++) {
- if (NS(this) != priv->xl_array[tmp_list[index]]) {
- STACK_WIND (frame,
- unify_rename_unlink_cbk,
- priv->xl_array[tmp_list[index]],
- priv->xl_array[tmp_list[index]]->fops->unlink,
- &local->loc2);
- if (!--callcnt)
- break;
- }
- }
-
- FREE (tmp_list);
- return 0;
- }
- if (tmp_list)
- FREE (tmp_list);
- }
- }
-
- /* Need not send 'unlink' to storage node */
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret,
- local->op_errno, &local->stbuf);
- }
-
- return 0;
-}
-
-int32_t
-unify_ns_rename_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- int32_t index = 0;
- int32_t callcnt = 0;
- int16_t *list = NULL;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
-
- if (op_ret == -1) {
- /* Free local->new_inode */
- gf_log (this->name, GF_LOG_ERROR,
- "namespace: path(%s -> %s): %s",
- local->loc1.path, local->loc2.path,
- strerror (op_errno));
-
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
- }
-
- local->stbuf = *buf;
- local->st_ino = buf->st_ino;
-
- /* Everything is fine. */
- if (S_ISDIR (buf->st_mode)) {
- local->call_count = priv->child_count;
- for (index=0; index < priv->child_count; index++) {
- STACK_WIND (frame,
- unify_rename_cbk,
- priv->xl_array[index],
- priv->xl_array[index]->fops->rename,
- &local->loc1,
- &local->loc2);
- }
-
- return 0;
- }
-
- local->call_count = 0;
- /* send rename */
- list = local->list;
- for (index=0; list[index] != -1; index++) {
- if (NS(this) != priv->xl_array[list[index]]) {
- local->call_count++;
- callcnt++;
- }
- }
-
- if (local->call_count) {
- for (index=0; list[index] != -1; index++) {
- if (NS(this) != priv->xl_array[list[index]]) {
- STACK_WIND (frame,
- unify_rename_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->rename,
- &local->loc1,
- &local->loc2);
- if (!--callcnt)
- break;
- }
- }
- } else {
- /* file doesn't seem to be present in storage nodes */
- gf_log (this->name, GF_LOG_CRITICAL,
- "CRITICAL: source file not in storage node, "
- "rename successful on namespace :O");
- unify_local_wipe (local);
- STACK_UNWIND (frame, -1, EIO, NULL);
- }
- return 0;
-}
-
-
-/**
- * unify_rename - One of the tricky function. The deadliest of all :O
- */
-int32_t
-unify_rename (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc)
-{
- unify_local_t *local = NULL;
- uint64_t tmp_list = 0;
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, oldloc);
- loc_copy (&local->loc2, newloc);
-
- if ((local->loc1.path == NULL) ||
- (local->loc2.path == NULL)) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, NULL);
- return 0;
- }
-
- inode_ctx_get (oldloc->inode, this, &tmp_list);
- local->list = (int16_t *)(long)tmp_list;
-
- STACK_WIND (frame,
- unify_ns_rename_cbk,
- NS(this),
- NS(this)->fops->rename,
- oldloc,
- newloc);
- return 0;
-}
-
-/**
- * unify_link_cbk -
- */
-int32_t
-unify_link_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf)
-{
- unify_local_t *local = frame->local;
-
- if (op_ret >= 0)
- local->stbuf = *buf;
- local->stbuf.st_ino = local->st_ino;
-
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, inode, &local->stbuf);
-
- return 0;
-}
-
-/**
- * unify_ns_link_cbk -
- */
-int32_t
-unify_ns_link_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf)
-{
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- int16_t *list = local->list;
- int16_t index = 0;
-
- if (op_ret == -1) {
- /* No need to send link request to other servers,
- * as namespace action failed
- */
- gf_log (this->name, GF_LOG_ERROR,
- "namespace: path(%s -> %s): %s",
- local->loc1.path, local->loc2.path,
- strerror (op_errno));
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
- return 0;
- }
-
- /* Update inode for this entry */
- local->op_ret = 0;
- local->st_ino = buf->st_ino;
-
- /* Send link request to the node now */
- for (index = 0; list[index] != -1; index++) {
- char need_break = (list[index+1] == -1);
- if (priv->xl_array[list[index]] != NS (this)) {
- STACK_WIND (frame,
- unify_link_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->link,
- &local->loc1,
- &local->loc2);
- break;
- }
- if (need_break)
- break;
- }
-
- return 0;
-}
-
-/**
- * unify_link -
- */
-int32_t
-unify_link (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc)
-{
- unify_local_t *local = NULL;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (oldloc);
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (newloc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
-
- loc_copy (&local->loc1, oldloc);
- loc_copy (&local->loc2, newloc);
-
- inode_ctx_get (oldloc->inode, this, &tmp_list);
- local->list = (int16_t *)(long)tmp_list;
-
- STACK_WIND (frame,
- unify_ns_link_cbk,
- NS(this),
- NS(this)->fops->link,
- oldloc,
- newloc);
-
- return 0;
-}
-
-
-/**
- * unify_checksum_cbk -
- */
-int32_t
-unify_checksum_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- uint8_t *fchecksum,
- uint8_t *dchecksum)
-{
- STACK_UNWIND (frame, op_ret, op_errno, fchecksum, dchecksum);
-
- return 0;
-}
-
-/**
- * unify_checksum -
- */
-int32_t
-unify_checksum (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flag)
-{
- STACK_WIND (frame,
- unify_checksum_cbk,
- NS(this),
- NS(this)->fops->checksum,
- loc,
- flag);
-
- return 0;
-}
-
-
-/**
- * unify_finodelk_cbk -
- */
-int
-unify_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-/**
- * unify_finodelk
- */
-int
-unify_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int cmd, struct flock *flock)
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame, unify_finodelk_cbk,
- child, child->fops->finodelk,
- volume, fd, cmd, flock);
-
- return 0;
-}
-
-
-
-/**
- * unify_fentrylk_cbk -
- */
-int
-unify_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-/**
- * unify_fentrylk
- */
-int
-unify_fentrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
-
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame, unify_fentrylk_cbk,
- child, child->fops->fentrylk,
- volume, fd, basename, cmd, type);
-
- return 0;
-}
-
-
-
-/**
- * unify_fxattrop_cbk -
- */
-int
-unify_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr)
-{
- STACK_UNWIND (frame, op_ret, op_errno, xattr);
- return 0;
-}
-
-/**
- * unify_fxattrop
- */
-int
-unify_fxattrop (call_frame_t *frame, xlator_t *this,
- fd_t *fd, gf_xattrop_flags_t optype, dict_t *xattr)
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame, unify_fxattrop_cbk,
- child, child->fops->fxattrop,
- fd, optype, xattr);
-
- return 0;
-}
-
-
-/**
- * unify_inodelk_cbk -
- */
-int
-unify_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-
-/**
- * unify_inodelk
- */
-int
-unify_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int cmd, struct flock *flock)
-{
- xlator_t *child = NULL;
-
- child = unify_loc_subvol (loc, this);
-
- STACK_WIND (frame, unify_inodelk_cbk,
- child, child->fops->inodelk,
- volume, loc, cmd, flock);
-
- return 0;
-}
-
-
-
-/**
- * unify_entrylk_cbk -
- */
-int
-unify_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-/**
- * unify_entrylk
- */
-int
-unify_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
-
-{
- xlator_t *child = NULL;
-
- child = unify_loc_subvol (loc, this);
-
- STACK_WIND (frame, unify_entrylk_cbk,
- child, child->fops->entrylk,
- volume, loc, basename, cmd, type);
-
- return 0;
-}
-
-
-
-/**
- * unify_xattrop_cbk -
- */
-int
-unify_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr)
-{
- STACK_UNWIND (frame, op_ret, op_errno, xattr);
- return 0;
-}
-
-/**
- * unify_xattrop
- */
-int
-unify_xattrop (call_frame_t *frame, xlator_t *this,
- loc_t *loc, gf_xattrop_flags_t optype, dict_t *xattr)
-{
- xlator_t *child = NULL;
-
- child = unify_loc_subvol (loc, this);
-
- STACK_WIND (frame, unify_xattrop_cbk,
- child, child->fops->xattrop,
- loc, optype, xattr);
-
- return 0;
-}
-
-int
-unify_forget (xlator_t *this,
- inode_t *inode)
-{
- int16_t *list = NULL;
- uint64_t tmp_list = 0;
-
- if (inode->st_mode && (!S_ISDIR(inode->st_mode))) {
- inode_ctx_get (inode, this, &tmp_list);
- if (tmp_list) {
- list = (int16_t *)(long)tmp_list;
- FREE (list);
- }
- }
-
- return 0;
-}
-
-/**
- * notify
- */
-int32_t
-notify (xlator_t *this,
- int32_t event,
- void *data,
- ...)
-{
- unify_private_t *priv = this->private;
- struct sched_ops *sched = NULL;
-
- if (!priv) {
- return 0;
- }
-
- sched = priv->sched_ops;
- if (!sched) {
- gf_log (this->name, GF_LOG_CRITICAL, "No scheduler :O");
- raise (SIGTERM);
- return 0;
- }
- if (priv->namespace == data) {
- if (event == GF_EVENT_CHILD_UP) {
- sched->notify (this, event, data);
- }
- return 0;
- }
-
- switch (event)
- {
- case GF_EVENT_CHILD_UP:
- {
- /* Call scheduler's update () to enable it for scheduling */
- sched->notify (this, event, data);
-
- LOCK (&priv->lock);
- {
- /* Increment the inode's generation, which is
- used for self_heal */
- ++priv->inode_generation;
- ++priv->num_child_up;
- }
- UNLOCK (&priv->lock);
-
- if (!priv->is_up) {
- default_notify (this, event, data);
- priv->is_up = 1;
- }
- }
- break;
- case GF_EVENT_CHILD_DOWN:
- {
- /* Call scheduler's update () to disable the child node
- * for scheduling
- */
- sched->notify (this, event, data);
- LOCK (&priv->lock);
- {
- --priv->num_child_up;
- }
- UNLOCK (&priv->lock);
-
- if (priv->num_child_up == 0) {
- /* Send CHILD_DOWN to upper layer */
- default_notify (this, event, data);
- priv->is_up = 0;
- }
- }
- break;
-
- default:
- {
- default_notify (this, event, data);
- }
- break;
- }
-
- return 0;
-}
-
-/**
- * init - This function is called first in the xlator, while initializing.
- * All the config file options are checked and appropriate flags are set.
- *
- * @this -
- */
-int32_t
-init (xlator_t *this)
-{
- int32_t ret = 0;
- int32_t count = 0;
- data_t *scheduler = NULL;
- data_t *data = NULL;
- xlator_t *ns_xl = NULL;
- xlator_list_t *trav = NULL;
- xlator_list_t *xlparent = NULL;
- xlator_list_t *parent = NULL;
- unify_private_t *_private = NULL;
-
- /* Check for number of child nodes, if there is no child nodes, exit */
- if (!this->children) {
- gf_log (this->name, GF_LOG_ERROR,
- "No child nodes specified. check \"subvolumes \" "
- "option in volfile");
- return -1;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile ");
- }
-
- /* Check for 'scheduler' in volume */
- scheduler = dict_get (this->options, "scheduler");
- if (!scheduler) {
- gf_log (this->name, GF_LOG_ERROR,
- "\"option scheduler <x>\" is missing in volfile");
- return -1;
- }
-
- /* Setting "option namespace <node>" */
- data = dict_get (this->options, "namespace");
- if(!data) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "namespace option not specified, Exiting");
- return -1;
- }
- /* Search namespace in the child node, if found, exit */
- trav = this->children;
- while (trav) {
- if (strcmp (trav->xlator->name, data->data) == 0)
- break;
- trav = trav->next;
- }
- if (trav) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "namespace node used as a subvolume, Exiting");
- return -1;
- }
-
- /* Search for the namespace node, if found, continue */
- ns_xl = this->next;
- while (ns_xl) {
- if (strcmp (ns_xl->name, data->data) == 0)
- break;
- ns_xl = ns_xl->next;
- }
- if (!ns_xl) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "namespace node not found in volfile, Exiting");
- return -1;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "namespace node specified as %s", data->data);
-
- _private = CALLOC (1, sizeof (*_private));
- ERR_ABORT (_private);
- _private->sched_ops = get_scheduler (this, scheduler->data);
- if (!_private->sched_ops) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "Error while loading scheduler. Exiting");
- FREE (_private);
- return -1;
- }
-
- if (ns_xl->parents) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "Namespace node should not be a child of any other node. Exiting");
- FREE (_private);
- return -1;
- }
-
- _private->namespace = ns_xl;
-
- /* update _private structure */
- {
- count = 0;
- trav = this->children;
- /* Get the number of child count */
- while (trav) {
- count++;
- trav = trav->next;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Child node count is %d", count);
-
- _private->child_count = count;
- if (count == 1) {
- /* TODO: Should I error out here? */
- gf_log (this->name, GF_LOG_CRITICAL,
- "WARNING: You have defined only one "
- "\"subvolumes\" for unify volume. It may not "
- "be the desired config, review your volume "
- "volfile. If this is how you are testing it,"
- " you may hit some performance penalty");
- }
-
- _private->xl_array = CALLOC (1,
- sizeof (xlator_t) * (count + 1));
- ERR_ABORT (_private->xl_array);
-
- count = 0;
- trav = this->children;
- while (trav) {
- _private->xl_array[count++] = trav->xlator;
- trav = trav->next;
- }
- _private->xl_array[count] = _private->namespace;
-
- /* self-heal part, start with generation '1' */
- _private->inode_generation = 1;
- /* Because, Foreground part is tested well */
- _private->self_heal = ZR_UNIFY_FG_SELF_HEAL;
- data = dict_get (this->options, "self-heal");
- if (data) {
- if (strcasecmp (data->data, "off") == 0)
- _private->self_heal = ZR_UNIFY_SELF_HEAL_OFF;
-
- if (strcasecmp (data->data, "foreground") == 0)
- _private->self_heal = ZR_UNIFY_FG_SELF_HEAL;
-
- if (strcasecmp (data->data, "background") == 0)
- _private->self_heal = ZR_UNIFY_BG_SELF_HEAL;
- }
-
- /* optimist - ask bulde for more about it */
- data = dict_get (this->options, "optimist");
- if (data) {
- if (gf_string2boolean (data->data,
- &_private->optimist) == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "optimist excepts only boolean "
- "options");
- }
- }
-
- LOCK_INIT (&_private->lock);
- }
-
- /* Now that everything is fine. */
- this->private = (void *)_private;
- {
- /* Initialize scheduler, if everything else is successful */
- ret = _private->sched_ops->init (this);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "Initializing scheduler failed, Exiting");
- FREE (_private);
- return -1;
- }
-
- ret = 0;
-
- /* This section is required because some fops may look
- * for 'xl->parent' variable
- */
- xlparent = CALLOC (1, sizeof (*xlparent));
- xlparent->xlator = this;
- if (!ns_xl->parents) {
- ns_xl->parents = xlparent;
- } else {
- parent = ns_xl->parents;
- while (parent->next)
- parent = parent->next;
- parent->next = xlparent;
- }
- /* Initialize the namespace volume */
- if (!ns_xl->ready) {
- ret = xlator_tree_init (ns_xl);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "initializing namespace node failed, "
- "Exiting");
- FREE (_private);
- return -1;
- }
- }
- }
-
- /* Tell namespace node that init is done */
- xlator_notify (ns_xl, GF_EVENT_PARENT_UP, this);
-
- return 0;
-}
-
-/**
- * fini - Free all the allocated memory
- */
-void
-fini (xlator_t *this)
-{
- unify_private_t *priv = this->private;
- priv->sched_ops->fini (this);
- this->private = NULL;
- LOCK_DESTROY (&priv->lock);
- FREE (priv->xl_array);
- FREE (priv);
- return;
-}
-
-
-struct xlator_fops fops = {
- .stat = unify_stat,
- .chmod = unify_chmod,
- .readlink = unify_readlink,
- .mknod = unify_mknod,
- .mkdir = unify_mkdir,
- .unlink = unify_unlink,
- .rmdir = unify_rmdir,
- .symlink = unify_symlink,
- .rename = unify_rename,
- .link = unify_link,
- .chown = unify_chown,
- .truncate = unify_truncate,
- .create = unify_create,
- .open = unify_open,
- .readv = unify_readv,
- .writev = unify_writev,
- .statfs = unify_statfs,
- .flush = unify_flush,
- .fsync = unify_fsync,
- .setxattr = unify_setxattr,
- .getxattr = unify_getxattr,
- .removexattr = unify_removexattr,
- .opendir = unify_opendir,
- .readdir = unify_readdir,
- .fsyncdir = unify_fsyncdir,
- .access = unify_access,
- .ftruncate = unify_ftruncate,
- .fstat = unify_fstat,
- .lk = unify_lk,
- .fchown = unify_fchown,
- .fchmod = unify_fchmod,
- .utimens = unify_utimens,
- .lookup = unify_lookup,
- .getdents = unify_getdents,
- .checksum = unify_checksum,
- .inodelk = unify_inodelk,
- .finodelk = unify_finodelk,
- .entrylk = unify_entrylk,
- .fentrylk = unify_fentrylk,
- .xattrop = unify_xattrop,
- .fxattrop = unify_fxattrop
-};
-
-struct xlator_mops mops = {
-};
-
-struct xlator_cbks cbks = {
- .forget = unify_forget,
-};
-
-struct volume_options options[] = {
- { .key = { "namespace" },
- .type = GF_OPTION_TYPE_XLATOR
- },
- { .key = { "scheduler" },
- .value = { "alu", "rr", "random", "nufa", "switch" },
- .type = GF_OPTION_TYPE_STR
- },
- { .key = {"self-heal"},
- .value = { "foreground", "background", "off" },
- .type = GF_OPTION_TYPE_STR
- },
- /* TODO: remove it some time later */
- { .key = {"optimist"},
- .type = GF_OPTION_TYPE_BOOL
- },
-
- { .key = {NULL} },
-};
diff --git a/xlators/cluster/unify/src/unify.h b/xlators/cluster/unify/src/unify.h
deleted file mode 100644
index 9841e5c7a..000000000
--- a/xlators/cluster/unify/src/unify.h
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#ifndef _UNIFY_H
-#define _UNIFY_H
-
-#include "scheduler.h"
-#include "list.h"
-
-#define MAX_DIR_ENTRY_STRING (32 * 1024)
-
-#define ZR_UNIFY_SELF_HEAL_OFF 0
-#define ZR_UNIFY_FG_SELF_HEAL 1
-#define ZR_UNIFY_BG_SELF_HEAL 2
-
-/* Sometimes one should use completely random numbers.. its good :p */
-#define UNIFY_SELF_HEAL_GETDENTS_COUNT 512
-
-#define NS(xl) (((unify_private_t *)xl->private)->namespace)
-
-/* This is used to allocate memory for local structure */
-#define INIT_LOCAL(fr, loc) \
-do { \
- loc = CALLOC (1, sizeof (unify_local_t)); \
- ERR_ABORT (loc); \
- if (!loc) { \
- STACK_UNWIND (fr, -1, ENOMEM); \
- return 0; \
- } \
- fr->local = loc; \
- loc->op_ret = -1; \
- loc->op_errno = ENOENT; \
-} while (0)
-
-
-
-struct unify_private {
- /* Update this structure depending on requirement */
- void *scheduler; /* THIS SHOULD BE THE FIRST VARIABLE,
- if xlator is using scheduler */
- struct sched_ops *sched_ops; /* Scheduler options */
- xlator_t *namespace; /* ptr to namespace xlator */
- xlator_t **xl_array;
- gf_boolean_t optimist;
- int16_t child_count;
- int16_t num_child_up;
- uint8_t self_heal;
- uint8_t is_up;
- uint64_t inode_generation;
- gf_lock_t lock;
-};
-typedef struct unify_private unify_private_t;
-
-struct unify_self_heal_struct {
- uint8_t dir_checksum[NAME_MAX];
- uint8_t ns_dir_checksum[NAME_MAX];
- uint8_t file_checksum[NAME_MAX];
- uint8_t ns_file_checksum[NAME_MAX];
- off_t *offset_list;
- int *count_list;
- dir_entry_t **entry_list;
-};
-
-
-struct _unify_local_t {
- int32_t call_count;
- int32_t op_ret;
- int32_t op_errno;
- mode_t mode;
- off_t offset;
- dev_t dev;
- uid_t uid;
- gid_t gid;
- int32_t flags;
- int32_t entry_count;
- int32_t count; // dir_entry_t count;
- fd_t *fd;
- struct stat stbuf;
- struct statvfs statvfs_buf;
- struct timespec tv[2];
- char *name;
- int32_t revalidate;
-
- ino_t st_ino;
- nlink_t st_nlink;
-
- dict_t *dict;
-
- int16_t *list;
- int16_t *new_list; /* Used only in case of rename */
- int16_t index;
-
- int32_t failed;
- int32_t return_eio; /* Used in case of different st-mode
- present for a given path */
-
- uint64_t inode_generation; /* used to store the per directory
- * inode_generation. Got from inode's ctx
- * of directory inodes
- */
-
- struct unify_self_heal_struct *sh_struct;
- loc_t loc1, loc2;
-};
-typedef struct _unify_local_t unify_local_t;
-
-int32_t zr_unify_self_heal (call_frame_t *frame,
- xlator_t *this,
- unify_local_t *local);
-
-#endif /* _UNIFY_H */
diff --git a/xlators/debug/error-gen/src/Makefile.am b/xlators/debug/error-gen/src/Makefile.am
index 1bd7f332c..5075c59a8 100644
--- a/xlators/debug/error-gen/src/Makefile.am
+++ b/xlators/debug/error-gen/src/Makefile.am
@@ -2,13 +2,16 @@
xlator_LTLIBRARIES = error-gen.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/debug
-error_gen_la_LDFLAGS = -module -avoidversion
+error_gen_la_LDFLAGS = -module -avoid-version
error_gen_la_SOURCES = error-gen.c
error_gen_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+noinst_HEADERS = error-gen.h error-gen-mem-types.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/debug/error-gen/src/error-gen-mem-types.h b/xlators/debug/error-gen/src/error-gen-mem-types.h
new file mode 100644
index 000000000..f02280535
--- /dev/null
+++ b/xlators/debug/error-gen/src/error-gen-mem-types.h
@@ -0,0 +1,20 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __ERROR_GEN_MEM_TYPES_H__
+#define __ERROR_GEN_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_error_gen_mem_types_ {
+ gf_error_gen_mt_eg_t = gf_common_mt_end + 1,
+ gf_error_gen_mt_end
+};
+#endif
diff --git a/xlators/debug/error-gen/src/error-gen.c b/xlators/debug/error-gen/src/error-gen.c
index a14d7beda..ec0874b35 100644
--- a/xlators/debug/error-gen/src/error-gen.c
+++ b/xlators/debug/error-gen/src/error-gen.c
@@ -1,1733 +1,2163 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
#endif
#include "xlator.h"
+#include "error-gen.h"
+#include "statedump.h"
+
+sys_error_t error_no_list[] = {
+ [GF_FOP_LOOKUP] = { .error_no_count = 4,
+ .error_no = {ENOENT,ENOTDIR,
+ ENAMETOOLONG,EAGAIN}},
+ [GF_FOP_STAT] = { .error_no_count = 7,
+ .error_no = {EACCES,EBADF,EFAULT,
+ ENAMETOOLONG,ENOENT,
+ ENOMEM,ENOTDIR}},
+ [GF_FOP_READLINK] = { .error_no_count = 8,
+ .error_no = {EACCES,EFAULT,EINVAL,EIO,
+ ENAMETOOLONG,ENOENT,ENOMEM,
+ ENOTDIR}},
+ [GF_FOP_MKNOD] = { .error_no_count = 11,
+ .error_no = {EACCES,EEXIST,EFAULT,
+ EINVAL,ENAMETOOLONG,
+ ENOENT,ENOMEM,ENOSPC,
+ ENOTDIR,EPERM,EROFS}},
+ [GF_FOP_MKDIR] = { .error_no_count = 10,
+ .error_no = {EACCES,EEXIST,EFAULT,
+ ENAMETOOLONG,ENOENT,
+ ENOMEM,ENOSPC,ENOTDIR,
+ EPERM,EROFS}},
+ [GF_FOP_UNLINK] = { .error_no_count = 10,
+ .error_no = {EACCES,EBUSY,EFAULT,EIO,
+ EISDIR,ENAMETOOLONG,
+ ENOENT,ENOMEM,ENOTDIR,
+ EPERM,EROFS}},
+ [GF_FOP_RMDIR] = { .error_no_count = 8,
+ .error_no = {EACCES,EBUSY,EFAULT,
+ ENOMEM,ENOTDIR,ENOTEMPTY,
+ EPERM,EROFS}},
+ [GF_FOP_SYMLINK] = { .error_no_count = 11,
+ .error_no = {EACCES,EEXIST,EFAULT,EIO,
+ ENAMETOOLONG,ENOENT,ENOMEM,
+ ENOSPC,ENOTDIR,EPERM,
+ EROFS}},
+ [GF_FOP_RENAME] = { .error_no_count = 13,
+ .error_no = {EACCES,EBUSY,EFAULT,
+ EINVAL,EISDIR,EMLINK,
+ ENAMETOOLONG,ENOENT,ENOMEM,
+ ENOSPC,ENOTDIR,EEXIST,
+ EXDEV}},
+ [GF_FOP_LINK] = { .error_no_count = 13,
+ .error_no = {EACCES,EFAULT,EEXIST,EIO,
+ EMLINK,ENAMETOOLONG,
+ ENOENT,ENOMEM,ENOSPC,
+ ENOTDIR,EPERM,EROFS,
+ EXDEV}},
+ [GF_FOP_TRUNCATE] = { .error_no_count = 10,
+ .error_no = {EACCES,EFAULT,EFBIG,
+ EINTR,EINVAL,EIO,EISDIR,
+ ENAMETOOLONG,ENOENT,
+ EISDIR}},
+ [GF_FOP_CREATE] = {.error_no_count = 10,
+ .error_no = {EACCES,EEXIST,EFAULT,
+ EISDIR,EMFILE,ENAMETOOLONG,
+ ENFILE,ENODEV,ENOENT,
+ ENODEV}},
+ [GF_FOP_OPEN] = { .error_no_count = 10,
+ .error_no = {EACCES,EEXIST,EFAULT,
+ EISDIR,EMFILE,
+ ENAMETOOLONG,ENFILE,
+ ENODEV,ENOENT,ENOMEM}},
+ [GF_FOP_READ] = { .error_no_count = 5,
+ .error_no = {EINVAL,EBADF,EFAULT,EISDIR,
+ ENAMETOOLONG}},
+ [GF_FOP_WRITE] = { .error_no_count = 7,
+ .error_no = {EINVAL,EBADF,EFAULT,EISDIR,
+ ENAMETOOLONG,ENOSPC,
+ GF_ERROR_SHORT_WRITE}},
+ [GF_FOP_STATFS] = {.error_no_count = 10,
+ .error_no = {EACCES,EBADF,EFAULT,EINTR,
+ EIO,ENAMETOOLONG,ENOENT,
+ ENOMEM,ENOSYS,ENOTDIR}},
+ [GF_FOP_FLUSH] = { .error_no_count = 5,
+ .error_no = {EACCES,EFAULT,
+ ENAMETOOLONG,ENOSYS,
+ ENOENT}},
+ [GF_FOP_FSYNC] = { .error_no_count = 4,
+ .error_no = {EBADF,EIO,EROFS,EINVAL}},
+ [GF_FOP_SETXATTR] = { .error_no_count = 4,
+ .error_no = {EACCES,EBADF,EINTR,
+ ENAMETOOLONG}},
+ [GF_FOP_GETXATTR] = { .error_no_count = 4,
+ .error_no = {EACCES,EBADF,ENAMETOOLONG,
+ EINTR}},
+ [GF_FOP_REMOVEXATTR] = { .error_no_count = 4,
+ .error_no = {EACCES,EBADF,ENAMETOOLONG,
+ EINTR}},
+ [GF_FOP_FSETXATTR] = { .error_no_count = 4,
+ .error_no = {EACCES,EBADF,EINTR,
+ ENAMETOOLONG}},
+ [GF_FOP_FGETXATTR] = { .error_no_count = 4,
+ .error_no = {EACCES,EBADF,ENAMETOOLONG,
+ EINTR}},
+ [GF_FOP_FREMOVEXATTR] = { .error_no_count = 4,
+ .error_no = {EACCES,EBADF,ENAMETOOLONG,
+ EINTR}},
+ [GF_FOP_OPENDIR] = { .error_no_count = 8,
+ .error_no = {EACCES,EEXIST,EFAULT,
+ EISDIR,EMFILE,
+ ENAMETOOLONG,ENFILE,
+ ENODEV}},
+ [GF_FOP_READDIR] = { .error_no_count = 5,
+ .error_no = {EINVAL,EACCES,EBADF,
+ EMFILE,ENOENT}},
+ [GF_FOP_READDIRP] = { .error_no_count = 5,
+ .error_no = {EINVAL,EACCES,EBADF,
+ EMFILE,ENOENT}},
+ [GF_FOP_FSYNCDIR] = { .error_no_count = 4,
+ .error_no = {EBADF,EIO,EROFS,EINVAL}},
+ [GF_FOP_ACCESS] = { .error_no_count = 8,
+ .error_no = {EACCES,ENAMETOOLONG,
+ ENOENT,ENOTDIR,EROFS,
+ EFAULT,EINVAL,EIO}},
+ [GF_FOP_FTRUNCATE] = { .error_no_count = 9,
+ .error_no = {EACCES,EFAULT,EFBIG,
+ EINTR,EINVAL,EIO,EISDIR,
+ ENAMETOOLONG,ENOENT}},
+ [GF_FOP_FSTAT] = { .error_no_count = 7,
+ .error_no = {EACCES,EBADF,EFAULT,
+ ENAMETOOLONG,ENOENT,
+ ENOMEM,ENOTDIR}},
+ [GF_FOP_LK] = { .error_no_count = 4,
+ .error_no = {EACCES,EFAULT,ENOENT,
+ EINTR}},
+ [GF_FOP_XATTROP] = { .error_no_count = 5,
+ .error_no = {EACCES,EFAULT,
+ ENAMETOOLONG,ENOSYS,
+ ENOENT}},
+ [GF_FOP_FXATTROP] = { .error_no_count = 4,
+ .error_no = {EBADF,EIO,EROFS,EINVAL}},
+ [GF_FOP_INODELK] = { .error_no_count = 4,
+ .error_no = {EACCES,EBADF,EINTR,
+ ENAMETOOLONG}},
+ [GF_FOP_FINODELK] = { .error_no_count = 4,
+ .error_no = {EACCES,EBADF,EINTR,
+ ENAMETOOLONG}},
+ [GF_FOP_ENTRYLK] = { .error_no_count = 4,
+ .error_no = {EACCES,EBADF,
+ ENAMETOOLONG,EINTR}},
+ [GF_FOP_FENTRYLK] = { .error_no_count = 10,
+ .error_no = {EACCES,EEXIST,EFAULT,
+ EISDIR,EMFILE,
+ ENAMETOOLONG,ENFILE,
+ ENODEV,ENOENT,ENOMEM}},
+ [GF_FOP_SETATTR] = {.error_no_count = 11,
+ .error_no = {EACCES,EFAULT,EIO,
+ ENAMETOOLONG,ENOENT,
+ ENOMEM,ENOTDIR,EPERM,
+ EROFS,EBADF,EIO}},
+ [GF_FOP_FSETATTR] = { .error_no_count = 11,
+ .error_no = {EACCES,EFAULT,EIO,
+ ENAMETOOLONG,ENOENT,
+ ENOMEM,ENOTDIR,EPERM,
+ EROFS,EBADF,EIO}},
+ [GF_FOP_GETSPEC] = { .error_no_count = 4,
+ .error_no = {EACCES,EBADF,ENAMETOOLONG,
+ EINTR}}
+};
-typedef struct {
- int op_count;
-} eg_t;
-
-int error_gen (xlator_t *this)
-{
- eg_t *egp = NULL;
- int count = 0;
- egp = this->private;
- count = ++egp->op_count;
- if((count % 10) == 0) {
- count = count / 10;
- if ((count % 2) == 0)
- return ENOTCONN;
- else
- return EIO;
- }
- return 0;
+int
+generate_rand_no (int op_no)
+{
+ int rand_no = 0;
+
+ if (op_no < GF_FOP_MAXVALUE)
+ rand_no = rand () % error_no_list[op_no].error_no_count;
+ return rand_no;
}
-static int32_t
-error_gen_lookup_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf,
- dict_t *dict)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- inode,
- buf,
- dict);
- return 0;
+int
+conv_errno_to_int (char **error_no)
+{
+ if (!strcmp ((*error_no), "ENOENT"))
+ return ENOENT;
+ else if (!strcmp ((*error_no), "ENOTDIR"))
+ return ENOTDIR;
+ else if (!strcmp ((*error_no), "ENAMETOOLONG"))
+ return ENAMETOOLONG;
+ else if (!strcmp ((*error_no), "EACCES"))
+ return EACCES;
+ else if (!strcmp ((*error_no), "EBADF"))
+ return EBADF;
+ else if (!strcmp ((*error_no), "EFAULT"))
+ return EFAULT;
+ else if (!strcmp ((*error_no), "ENOMEM"))
+ return ENOMEM;
+ else if (!strcmp ((*error_no), "EINVAL"))
+ return EINVAL;
+ else if (!strcmp ((*error_no), "EIO"))
+ return EIO;
+ else if (!strcmp ((*error_no), "EEXIST"))
+ return EEXIST;
+ else if (!strcmp ((*error_no), "ENOSPC"))
+ return ENOSPC;
+ else if (!strcmp ((*error_no), "EPERM"))
+ return EPERM;
+ else if (!strcmp ((*error_no), "EROFS"))
+ return EROFS;
+ else if (!strcmp ((*error_no), "EBUSY"))
+ return EBUSY;
+ else if (!strcmp ((*error_no), "EISDIR"))
+ return EISDIR;
+ else if (!strcmp ((*error_no), "ENOTEMPTY"))
+ return ENOTEMPTY;
+ else if (!strcmp ((*error_no), "EMLINK"))
+ return EMLINK;
+ else if (!strcmp ((*error_no), "ENODEV"))
+ return ENODEV;
+ else if (!strcmp ((*error_no), "EXDEV"))
+ return EXDEV;
+ else if (!strcmp ((*error_no), "EMFILE"))
+ return EMFILE;
+ else if (!strcmp ((*error_no), "ENFILE"))
+ return ENFILE;
+ else if (!strcmp ((*error_no), "ENOSYS"))
+ return ENOSYS;
+ else if (!strcmp ((*error_no), "EINTR"))
+ return EINTR;
+ else if (!strcmp ((*error_no), "EFBIG"))
+ return EFBIG;
+ else if (!strcmp((*error_no), "GF_ERROR_SHORT_WRITE"))
+ return GF_ERROR_SHORT_WRITE;
+ else
+ return EAGAIN;
}
-int32_t
-error_gen_lookup (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *xattr_req)
-{
- int op_errno = 0;
- op_errno = error_gen(this);
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
- }
- STACK_WIND (frame,
- error_gen_lookup_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup,
- loc,
- xattr_req);
- return 0;
+int
+get_fop_int (char **op_no_str)
+{
+ if (!strcmp ((*op_no_str), "lookup"))
+ return GF_FOP_LOOKUP;
+ else if (!strcmp ((*op_no_str), "stat"))
+ return GF_FOP_STAT;
+ else if (!strcmp ((*op_no_str), "readlink"))
+ return GF_FOP_READLINK;
+ else if (!strcmp ((*op_no_str), "mknod"))
+ return GF_FOP_MKNOD;
+ else if (!strcmp ((*op_no_str), "mkdir"))
+ return GF_FOP_MKDIR;
+ else if (!strcmp ((*op_no_str), "unlink"))
+ return GF_FOP_UNLINK;
+ else if (!strcmp ((*op_no_str), "rmdir"))
+ return GF_FOP_RMDIR;
+ else if (!strcmp ((*op_no_str), "symlink"))
+ return GF_FOP_SYMLINK;
+ else if (!strcmp ((*op_no_str), "rename"))
+ return GF_FOP_RENAME;
+ else if (!strcmp ((*op_no_str), "link"))
+ return GF_FOP_LINK;
+ else if (!strcmp ((*op_no_str), "truncate"))
+ return GF_FOP_TRUNCATE;
+ else if (!strcmp ((*op_no_str), "create"))
+ return GF_FOP_CREATE;
+ else if (!strcmp ((*op_no_str), "open"))
+ return GF_FOP_OPEN;
+ else if (!strcmp ((*op_no_str), "readv"))
+ return GF_FOP_READ;
+ else if (!strcmp ((*op_no_str), "writev"))
+ return GF_FOP_WRITE;
+ else if (!strcmp ((*op_no_str), "statfs"))
+ return GF_FOP_STATFS;
+ else if (!strcmp ((*op_no_str), "flush"))
+ return GF_FOP_FLUSH;
+ else if (!strcmp ((*op_no_str), "fsync"))
+ return GF_FOP_FSYNC;
+ else if (!strcmp ((*op_no_str), "setxattr"))
+ return GF_FOP_SETXATTR;
+ else if (!strcmp ((*op_no_str), "getxattr"))
+ return GF_FOP_GETXATTR;
+ else if (!strcmp ((*op_no_str), "removexattr"))
+ return GF_FOP_REMOVEXATTR;
+ else if (!strcmp ((*op_no_str), "fsetxattr"))
+ return GF_FOP_FSETXATTR;
+ else if (!strcmp ((*op_no_str), "fgetxattr"))
+ return GF_FOP_FGETXATTR;
+ else if (!strcmp ((*op_no_str), "fremovexattr"))
+ return GF_FOP_FREMOVEXATTR;
+ else if (!strcmp ((*op_no_str), "opendir"))
+ return GF_FOP_OPENDIR;
+ else if (!strcmp ((*op_no_str), "readdir"))
+ return GF_FOP_READDIR;
+ else if (!strcmp ((*op_no_str), "readdirp"))
+ return GF_FOP_READDIRP;
+ else if (!strcmp ((*op_no_str), "fsyncdir"))
+ return GF_FOP_FSYNCDIR;
+ else if (!strcmp ((*op_no_str), "access"))
+ return GF_FOP_ACCESS;
+ else if (!strcmp ((*op_no_str), "ftruncate"))
+ return GF_FOP_FTRUNCATE;
+ else if (!strcmp ((*op_no_str), "fstat"))
+ return GF_FOP_FSTAT;
+ else if (!strcmp ((*op_no_str), "lk"))
+ return GF_FOP_LK;
+ else if (!strcmp ((*op_no_str), "xattrop"))
+ return GF_FOP_XATTROP;
+ else if (!strcmp ((*op_no_str), "fxattrop"))
+ return GF_FOP_FXATTROP;
+ else if (!strcmp ((*op_no_str), "inodelk"))
+ return GF_FOP_INODELK;
+ else if (!strcmp ((*op_no_str), "finodelk"))
+ return GF_FOP_FINODELK;
+ else if (!strcmp ((*op_no_str), "etrylk"))
+ return GF_FOP_ENTRYLK;
+ else if (!strcmp ((*op_no_str), "fentrylk"))
+ return GF_FOP_FENTRYLK;
+ else if (!strcmp ((*op_no_str), "setattr"))
+ return GF_FOP_SETATTR;
+ else if (!strcmp ((*op_no_str), "fsetattr"))
+ return GF_FOP_FSETATTR;
+ else if (!strcmp ((*op_no_str), "getspec"))
+ return GF_FOP_GETSPEC;
+ else
+ return -1;
}
+int
+error_gen (xlator_t *this, int op_no)
+{
+ eg_t *egp = NULL;
+ int count = 0;
+ int failure_iter_no = GF_FAILURE_DEFAULT;
+ char *error_no = NULL;
+ int rand_no = 0;
+ int ret = 0;
+
+ egp = this->private;
+
+ LOCK (&egp->lock);
+ {
+ count = ++egp->op_count;
+ failure_iter_no = egp->failure_iter_no;
+ error_no = egp->error_no;
+ }
+ UNLOCK (&egp->lock);
+
+ if((count % failure_iter_no) == 0) {
+ LOCK (&egp->lock);
+ {
+ egp->op_count = 0;
+ }
+ UNLOCK (&egp->lock);
+
+ if (error_no)
+ ret = conv_errno_to_int (&error_no);
+ else {
+
+ rand_no = generate_rand_no (op_no);
+ if (op_no >= GF_FOP_MAXVALUE)
+ op_no = 0;
+ if (rand_no >= error_no_list[op_no].error_no_count)
+ rand_no = 0;
+ ret = error_no_list[op_no].error_no[rand_no];
+ }
+ if (egp->random_failure == _gf_true)
+ egp->failure_iter_no = 3 + (rand () % GF_UNIVERSAL_ANSWER);
+ }
+ return ret;
+}
-int32_t
-error_gen_forget (xlator_t *this,
- inode_t *inode)
+
+int
+error_gen_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
{
- return 0;
+ STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode,
+ buf, xdata, postparent);
+ return 0;
}
-int32_t
-error_gen_stat_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
-}
-int32_t
-error_gen_stat (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
+int
+error_gen_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_LOOKUP];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_LOOKUP);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (lookup, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_stat_cbk,
+
+ STACK_WIND (frame, error_gen_lookup_cbk,
FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat,
- loc);
- return 0;
+ FIRST_CHILD(this)->fops->lookup,
+ loc, xdata);
+ return 0;
}
-int32_t
-error_gen_chmod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
+
+int
+error_gen_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
}
-int32_t
-error_gen_chmod (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode)
+int
+error_gen_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_STAT];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_STAT);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (stat, frame, -1, op_errno, NULL, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_chmod_cbk,
+
+ STACK_WIND (frame, error_gen_stat_cbk,
FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->chmod,
- loc,
- mode);
- return 0;
+ FIRST_CHILD(this)->fops->stat,
+ loc, xdata);
+ return 0;
}
-int32_t
-error_gen_fchmod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
+int
+error_gen_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, preop, postop, xdata);
+ return 0;
}
-int32_t
-error_gen_fchmod (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- mode_t mode)
+
+int
+error_gen_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
- }
- STACK_WIND (frame,
- error_gen_fchmod_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fchmod,
- fd,
- mode);
- return 0;
-}
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
-int32_t
-error_gen_chown_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
-}
+ egp = this->private;
+ enable = egp->enable[GF_FOP_SETATTR];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_SETATTR);
-int32_t
-error_gen_chown (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- uid_t uid,
- gid_t gid)
-{
- int op_errno = 0;
- op_errno = error_gen(this);
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (setattr, frame, -1, op_errno, NULL, NULL, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_chown_cbk,
+
+ STACK_WIND (frame, error_gen_setattr_cbk,
FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->chown,
- loc,
- uid,
- gid);
- return 0;
+ FIRST_CHILD(this)->fops->setattr,
+ loc, stbuf, valid, xdata);
+ return 0;
}
-int32_t
-error_gen_fchown_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
-}
-int32_t
-error_gen_fchown (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- uid_t uid,
- gid_t gid)
-{
- int op_errno = 0;
- op_errno = error_gen(this);
+int
+error_gen_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_FSETATTR];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_FSETATTR);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (fsetattr, frame, -1, op_errno, NULL, NULL, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_fchown_cbk,
+
+ STACK_WIND (frame, error_gen_setattr_cbk,
FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fchown,
- fd,
- uid,
- gid);
- return 0;
+ FIRST_CHILD(this)->fops->fsetattr,
+ fd, stbuf, valid, xdata);
+ return 0;
}
-int32_t
-error_gen_truncate_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
+
+int
+error_gen_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+ return 0;
}
-int32_t
-error_gen_truncate (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- off_t offset)
+
+int
+error_gen_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ off_t offset, dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_TRUNCATE];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_TRUNCATE);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (truncate, frame, -1, op_errno,
+ NULL, NULL, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_truncate_cbk,
+
+ STACK_WIND (frame, error_gen_truncate_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->truncate,
- loc,
- offset);
- return 0;
+ loc, offset, xdata);
+ return 0;
}
-int32_t
-error_gen_ftruncate_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
+
+int
+error_gen_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+ return 0;
}
-int32_t
-error_gen_ftruncate (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- off_t offset)
+
+int
+error_gen_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp =NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_FTRUNCATE];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_FTRUNCATE);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (ftruncate, frame, -1, op_errno,
+ NULL, NULL, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_ftruncate_cbk,
+
+ STACK_WIND (frame, error_gen_ftruncate_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->ftruncate,
- fd,
- offset);
- return 0;
-}
-
-int32_t
-error_gen_utimens_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
+ fd, offset, xdata);
+ return 0;
}
-int32_t
-error_gen_utimens (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- struct timespec tv[2])
+int
+error_gen_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
- }
- STACK_WIND (frame,
- error_gen_utimens_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->utimens,
- loc,
- tv);
- return 0;
+ STACK_UNWIND_STRICT (access, frame, op_ret, op_errno, xdata);
+ return 0;
}
-int32_t
-error_gen_access_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- return 0;
-}
-int32_t
-error_gen_access (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t mask)
+int
+error_gen_access (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t mask, dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_ACCESS];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_ACCESS);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (access, frame, -1, op_errno, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_access_cbk,
+
+ STACK_WIND (frame, error_gen_access_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->access,
- loc,
- mask);
- return 0;
+ loc, mask, xdata);
+ return 0;
}
-int32_t
-error_gen_readlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- const char *path)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- path);
- return 0;
+int
+error_gen_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ const char *path, struct iatt *sbuf, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, path, sbuf, xdata);
+ return 0;
}
-int32_t
-error_gen_readlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- size_t size)
+
+int
+error_gen_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ size_t size, dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_READLINK];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_READLINK);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (readlink, frame, -1, op_errno, NULL, NULL, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_readlink_cbk,
+
+ STACK_WIND (frame, error_gen_readlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readlink,
- loc,
- size);
- return 0;
+ loc, size, xdata);
+ return 0;
}
-int32_t
-error_gen_mknod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- inode,
- buf);
- return 0;
+int
+error_gen_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno,
+ inode, buf,
+ preparent, postparent, xdata);
+ return 0;
}
-int32_t
-error_gen_mknod (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode,
- dev_t rdev)
-{
- int op_errno = 0;
- op_errno = error_gen(this);
+
+int
+error_gen_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata)
+{
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_MKNOD];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_MKNOD);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
- return 0;
+ STACK_UNWIND_STRICT (mknod, frame, -1, op_errno, NULL, NULL,
+ NULL, NULL, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_mknod_cbk,
+
+ STACK_WIND (frame, error_gen_mknod_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mknod,
- loc, mode, rdev);
- return 0;
+ loc, mode, rdev, umask, xdata);
+ return 0;
}
-int32_t
-error_gen_mkdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- inode,
- buf);
- return 0;
+
+int
+error_gen_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno,
+ inode, buf,
+ preparent, postparent, xdata);
+ return 0;
}
-int32_t
-error_gen_mkdir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode)
+int
+error_gen_mkdir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_MKDIR];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_MKDIR);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
- return 0;
+ STACK_UNWIND_STRICT (mkdir, frame, -1, op_errno, NULL, NULL,
+ NULL, NULL, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_mkdir_cbk,
+
+ STACK_WIND (frame, error_gen_mkdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mkdir,
- loc, mode);
- return 0;
+ loc, mode, umask, xdata);
+ return 0;
}
-int32_t
-error_gen_unlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+
+int
+error_gen_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno,
+ preparent, postparent, xdata);
+ return 0;
}
-int32_t
-error_gen_unlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
+
+int
+error_gen_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_UNLINK];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_UNLINK);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (unlink, frame, -1, op_errno, NULL, NULL,
+ xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_unlink_cbk,
+
+ STACK_WIND (frame, error_gen_unlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->unlink,
- loc);
- return 0;
+ loc, xflag, xdata);
+ return 0;
}
-int32_t
-error_gen_rmdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- return 0;
+
+int
+error_gen_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno,
+ preparent, postparent, xdata);
+ return 0;
}
-int32_t
-error_gen_rmdir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
+
+int
+error_gen_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_RMDIR];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_RMDIR);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (rmdir, frame, -1, op_errno, NULL, NULL, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_rmdir_cbk,
+
+ STACK_WIND (frame, error_gen_rmdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rmdir,
- loc);
- return 0;
+ loc, flags, xdata);
+ return 0;
}
-int32_t
-error_gen_symlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
- return 0;
+int
+error_gen_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
}
-int32_t
-error_gen_symlink (call_frame_t *frame,
- xlator_t *this,
- const char *linkpath,
- loc_t *loc)
+
+int
+error_gen_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_SYMLINK];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_SYMLINK);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+ STACK_UNWIND_STRICT (symlink, frame, -1, op_errno, NULL, NULL,
+ NULL, NULL, NULL); /* pre & post parent attr */
return 0;
}
- STACK_WIND (frame,
- error_gen_symlink_cbk,
+
+ STACK_WIND (frame, error_gen_symlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->symlink,
- linkpath, loc);
- return 0;
+ linkpath, loc, umask, xdata);
+ return 0;
}
-int32_t
-error_gen_rename_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
+int
+error_gen_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, buf,
+ preoldparent, postoldparent,
+ prenewparent, postnewparent, xdata);
+ return 0;
}
-int32_t
-error_gen_rename (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc)
+
+int
+error_gen_rename (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_RENAME];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_RENAME);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL);
+ STACK_UNWIND_STRICT (rename, frame, -1, op_errno, NULL,
+ NULL, NULL, NULL, NULL, NULL);
return 0;
}
- STACK_WIND (frame,
- error_gen_rename_cbk,
+
+ STACK_WIND (frame, error_gen_rename_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rename,
- oldloc, newloc);
- return 0;
+ oldloc, newloc, xdata);
+ return 0;
}
-int32_t
-error_gen_link_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
- return 0;
+int
+error_gen_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
}
-int32_t
-error_gen_link (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc)
+
+int
+error_gen_link (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_LINK];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_LINK);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+ STACK_UNWIND_STRICT (link, frame, -1, op_errno, NULL, NULL,
+ NULL, NULL, NULL);
return 0;
}
- STACK_WIND (frame,
- error_gen_link_cbk,
+
+ STACK_WIND (frame, error_gen_link_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->link,
- oldloc, newloc);
- return 0;
+ oldloc, newloc, xdata);
+ return 0;
}
-int32_t
-error_gen_create_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd,
- inode_t *inode,
- struct stat *buf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf);
- return 0;
+int
+error_gen_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ fd_t *fd, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
}
-int32_t
-error_gen_create (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags,
- mode_t mode, fd_t *fd)
-{
- int op_errno = 0;
- op_errno = error_gen(this);
+
+int
+error_gen_create (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flags, mode_t mode, mode_t umask, fd_t *fd,
+ dict_t *xdata)
+{
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_CREATE];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_CREATE);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL);
+ STACK_UNWIND_STRICT (create, frame, -1, op_errno, NULL, NULL,
+ NULL, NULL, NULL, NULL);
return 0;
}
STACK_WIND (frame, error_gen_create_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->create,
- loc, flags, mode, fd);
- return 0;
+ loc, flags, mode, umask, fd, xdata);
+ return 0;
}
-int32_t
-error_gen_open_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- fd);
- return 0;
+
+int
+error_gen_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
+ return 0;
}
-int32_t
-error_gen_open (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags, fd_t *fd)
+
+int
+error_gen_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flags, fd_t *fd, dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_OPEN];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_OPEN);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (open, frame, -1, op_errno, NULL, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_open_cbk,
+ STACK_WIND (frame, error_gen_open_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->open,
- loc, flags, fd);
- return 0;
+ loc, flags, fd, xdata);
+ return 0;
}
-int32_t
-error_gen_readv_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iovec *vector,
- int32_t count,
- struct stat *stbuf,
- struct iobref *iobref)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- vector,
- count,
- stbuf,
- iobref);
- return 0;
+
+int
+error_gen_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iovec *vector, int32_t count,
+ struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno,
+ vector, count, stbuf, iobref, xdata);
+ return 0;
}
-int32_t
-error_gen_readv (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset)
-{
- int op_errno = 0;
- op_errno = error_gen(this);
+
+int
+error_gen_readv (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata)
+{
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_READ];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_READ);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL, 0, NULL);
- return 0;
+ STACK_UNWIND_STRICT (readv, frame, -1, op_errno, NULL, 0,
+ NULL, NULL, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_readv_cbk,
+ STACK_WIND (frame, error_gen_readv_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readv,
- fd,
- size,
- offset);
- return 0;
+ fd, size, offset, flags, xdata);
+ return 0;
}
-int32_t
-error_gen_writev_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *stbuf)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- stbuf);
- return 0;
+int
+error_gen_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
}
-int32_t
-error_gen_writev (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iovec *vector,
- int32_t count,
- off_t off,
- struct iobref *iobref)
-{
- int op_errno = 0;
- op_errno = error_gen(this);
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL);
+
+int
+error_gen_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count,
+ off_t off, uint32_t flags, struct iobref *iobref, dict_t *xdata)
+{
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_WRITE];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_WRITE);
+
+ if (op_errno == GF_ERROR_SHORT_WRITE) {
+ struct iovec *shortvec;
+
+ /*
+ * A short write error returns some value less than what was
+ * requested from a write. To simulate this, replace the vector
+ * with one half the size;
+ */
+ shortvec = iov_dup(vector, 1);
+ shortvec->iov_len /= 2;
+
+ STACK_WIND(frame, error_gen_writev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, shortvec, count,
+ off, flags, iobref, xdata);
+ GF_FREE(shortvec);
return 0;
+ } else if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND_STRICT (writev, frame, -1, op_errno, NULL, NULL, xdata);
+ return 0;
}
-
- STACK_WIND (frame,
- error_gen_writev_cbk,
+ STACK_WIND (frame, error_gen_writev_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->writev,
- fd,
- vector,
- count,
- off,
- iobref);
- return 0;
+ fd, vector, count, off, flags, iobref, xdata);
+ return 0;
}
-int32_t
-error_gen_flush_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- return 0;
+
+int
+error_gen_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, xdata);
+ return 0;
}
-int32_t
-error_gen_flush (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd)
+
+int
+error_gen_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_FLUSH];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_FLUSH);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (flush, frame, -1, op_errno, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_flush_cbk,
- FIRST_CHILD(this),
+ STACK_WIND (frame, error_gen_flush_cbk,
+ FIRST_CHILD(this),
FIRST_CHILD(this)->fops->flush,
- fd);
- return 0;
+ fd, xdata);
+ return 0;
}
-int32_t
-error_gen_fsync_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- return 0;
+int
+error_gen_fsync_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
}
-int32_t
-error_gen_fsync (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t flags)
+
+int
+error_gen_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_FSYNC];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_FSYNC);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (fsync, frame, -1, op_errno, NULL, NULL, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_fsync_cbk,
+ STACK_WIND (frame, error_gen_fsync_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsync,
- fd,
- flags);
- return 0;
+ fd, flags, xdata);
+ return 0;
}
-int32_t
-error_gen_fstat_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
+
+int
+error_gen_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
}
-int32_t
-error_gen_fstat (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd)
+
+int
+error_gen_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_FSTAT];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_FSTAT);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (fstat, frame, -1, op_errno, NULL, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_fstat_cbk,
+ STACK_WIND (frame, error_gen_fstat_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fstat,
- fd);
- return 0;
+ fd, xdata);
+ return 0;
}
-int32_t
-error_gen_opendir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- fd);
- return 0;
+
+int
+error_gen_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd, xdata);
+ return 0;
}
-int32_t
-error_gen_opendir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc, fd_t *fd)
+
+int
+error_gen_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_OPENDIR];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_OPENDIR);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (opendir, frame, -1, op_errno, NULL, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_opendir_cbk,
+ STACK_WIND (frame, error_gen_opendir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->opendir,
- loc, fd);
- return 0;
+ loc, fd, xdata);
+ return 0;
}
-
-int32_t
-error_gen_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entries,
- int32_t count)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- entries,
- count);
- return 0;
+int
+error_gen_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno, xdata);
+ return 0;
}
-int32_t
-error_gen_getdents (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset,
- int32_t flag)
-{
- int op_errno = 0;
- op_errno = error_gen(this);
+
+int
+error_gen_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t flags, dict_t *xdata)
+{
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_FSYNCDIR];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_FSYNCDIR);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL, 0);
- return 0;
+ STACK_UNWIND_STRICT (fsyncdir, frame, -1, op_errno, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_getdents_cbk,
+ STACK_WIND (frame, error_gen_fsyncdir_cbk,
FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getdents,
- fd,
- size,
- offset,
- flag);
- return 0;
+ FIRST_CHILD(this)->fops->fsyncdir,
+ fd, flags, xdata);
+ return 0;
}
-int32_t
-error_gen_setdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- return 0;
+int
+error_gen_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf, xdata);
+ return 0;
}
-int32_t
-error_gen_setdents (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t flags,
- dir_entry_t *entries,
- int32_t count)
-{
- int op_errno = 0;
- op_errno = error_gen(this);
+
+int
+error_gen_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_STATFS];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_STATFS);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL, 0);
- return 0;
+ STACK_UNWIND_STRICT (statfs, frame, -1, op_errno, NULL, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_setdents_cbk,
+ STACK_WIND (frame, error_gen_statfs_cbk,
FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setdents,
- fd,
- flags,
- entries,
- count);
- return 0;
+ FIRST_CHILD(this)->fops->statfs,
+ loc, xdata);
+ return 0;
}
-int32_t
-error_gen_fsyncdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- return 0;
+int
+error_gen_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata);
+ return 0;
}
-int32_t
-error_gen_fsyncdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t flags)
+
+int
+error_gen_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *dict, int32_t flags, dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_SETXATTR];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_SETXATTR);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (setxattr, frame, -1, op_errno, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_fsyncdir_cbk,
+ STACK_WIND (frame, error_gen_setxattr_cbk,
FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsyncdir,
- fd,
- flags);
- return 0;
+ FIRST_CHILD(this)->fops->setxattr,
+ loc, dict, flags, xdata);
+ return 0;
}
-int32_t
-error_gen_statfs_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct statvfs *buf)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
+int
+error_gen_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
}
-int32_t
-error_gen_statfs (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
+
+int
+error_gen_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_GETXATTR];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_GETXATTR);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (getxattr, frame, -1, op_errno, NULL, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_statfs_cbk,
+ STACK_WIND (frame, error_gen_getxattr_cbk,
FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->statfs,
- loc);
- return 0;
+ FIRST_CHILD(this)->fops->getxattr,
+ loc, name, xdata);
+ return 0;
}
-
-int32_t
-error_gen_setxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- return 0;
+int
+error_gen_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xdata);
+ return 0;
}
-int32_t
-error_gen_setxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *dict,
- int32_t flags)
-{
- int op_errno = 0;
- op_errno = error_gen(this);
+
+int
+error_gen_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_FSETXATTR];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_FSETXATTR);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (fsetxattr, frame, -1, op_errno, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_setxattr_cbk,
+ STACK_WIND (frame, error_gen_fsetxattr_cbk,
FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr,
- loc,
- dict,
- flags);
- return 0;
+ FIRST_CHILD(this)->fops->fsetxattr,
+ fd, dict, flags, xdata);
+ return 0;
}
-int32_t
-error_gen_getxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- dict);
- return 0;
+
+int
+error_gen_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
}
-int32_t
-error_gen_getxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name)
+
+int
+error_gen_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_FGETXATTR];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_FGETXATTR);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (fgetxattr, frame, -1, op_errno, NULL, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_getxattr_cbk,
+ STACK_WIND (frame, error_gen_fgetxattr_cbk,
FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr,
- loc,
- name);
- return 0;
+ FIRST_CHILD(this)->fops->fgetxattr,
+ fd, name, xdata);
+ return 0;
}
-int32_t
-error_gen_xattrop_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict)
-{
- STACK_UNWIND (frame, op_ret, op_errno, dict);
- return 0;
+
+int
+error_gen_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, dict, xdata);
+ return 0;
}
-int32_t
-error_gen_xattrop (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- gf_xattrop_flags_t flags,
- dict_t *dict)
-{
- int op_errno = 0;
- op_errno = error_gen(this);
+
+int
+error_gen_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+{
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_XATTROP];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_XATTROP);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (xattrop, frame, -1, op_errno, NULL, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_xattrop_cbk,
+ STACK_WIND (frame, error_gen_xattrop_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->xattrop,
- loc, flags, dict);
- return 0;
+ loc, flags, dict, xdata);
+ return 0;
}
-int32_t
-error_gen_fxattrop_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict)
-{
- STACK_UNWIND (frame, op_ret, op_errno, dict);
- return 0;
+
+int
+error_gen_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (fxattrop, frame, op_ret, op_errno, dict, xdata);
+ return 0;
}
-int32_t
-error_gen_fxattrop (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- gf_xattrop_flags_t flags,
- dict_t *dict)
-{
- int op_errno = 0;
- op_errno = error_gen(this);
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
- }
-
- STACK_WIND (frame,
- error_gen_fxattrop_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fxattrop,
- fd, flags, dict);
- return 0;
+
+int
+error_gen_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+{
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_FXATTROP];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_FXATTROP);
+
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND_STRICT (fxattrop, frame, -1, op_errno, NULL, xdata);
+ return 0;
+ }
+
+ STACK_WIND (frame, error_gen_fxattrop_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fxattrop,
+ fd, flags, dict, xdata);
+ return 0;
}
-int32_t
-error_gen_removexattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- return 0;
+
+int
+error_gen_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, xdata);
+ return 0;
}
-int32_t
-error_gen_removexattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name)
+
+int
+error_gen_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_REMOVEXATTR];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_REMOVEXATTR);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (removexattr, frame, -1, op_errno, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_removexattr_cbk,
+ STACK_WIND (frame, error_gen_removexattr_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->removexattr,
- loc,
- name);
- return 0;
+ loc, name, xdata);
+ return 0;
}
-int32_t
-error_gen_lk_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct flock *lock)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- lock);
- return 0;
+int
+error_gen_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (fremovexattr, frame, op_ret, op_errno, xdata);
+ return 0;
}
-int32_t
-error_gen_lk (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t cmd,
- struct flock *lock)
-{
- int op_errno = 0;
- op_errno = error_gen(this);
+
+int
+error_gen_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_FREMOVEXATTR];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_FREMOVEXATTR);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
+ STACK_UNWIND_STRICT (fremovexattr, frame, -1, op_errno, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_lk_cbk,
+ STACK_WIND (frame, error_gen_fremovexattr_cbk,
FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lk,
- fd,
- cmd,
- lock);
- return 0;
+ FIRST_CHILD(this)->fops->fremovexattr,
+ fd, name, xdata);
+ return 0;
}
-int32_t
-error_gen_inodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+int
+error_gen_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, lock, xdata);
+ return 0;
+}
+
+int
+error_gen_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_LK];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_LK);
+
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
+ STACK_UNWIND_STRICT (lk, frame, -1, op_errno, NULL, xdata);
+ return 0;
+ }
+
+ STACK_WIND (frame, error_gen_lk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lk,
+ fd, cmd, lock, xdata);
+ return 0;
}
-int32_t
+int
+error_gen_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+
+int
error_gen_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd,
- struct flock *lock)
+ const char *volume, loc_t *loc, int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_INODELK];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_INODELK);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (inodelk, frame, -1, op_errno, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_inodelk_cbk,
+ STACK_WIND (frame, error_gen_inodelk_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->inodelk,
- volume, loc, cmd, lock);
- return 0;
+ volume, loc, cmd, lock, xdata);
+ return 0;
}
-int32_t
-error_gen_finodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
+int
+error_gen_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (finodelk, frame, op_ret, op_errno, xdata);
+ return 0;
}
-int32_t
+int
error_gen_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd,
- struct flock *lock)
+ const char *volume, fd_t *fd, int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_FINODELK];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_FINODELK);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (finodelk, frame, -1, op_errno, xdata);
+ return 0;
}
- STACK_WIND (frame,
- error_gen_finodelk_cbk,
+ STACK_WIND (frame, error_gen_finodelk_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->finodelk,
- volume, fd, cmd, lock);
- return 0;
+ volume, fd, cmd, lock, xdata);
+ return 0;
}
-int32_t
-error_gen_entrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
+int
+error_gen_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno, xdata);
+ return 0;
}
-int32_t
+
+int
error_gen_entrylk (call_frame_t *frame, xlator_t *this,
const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_ENTRYLK];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_ENTRYLK);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (entrylk, frame, -1, op_errno, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_entrylk_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->entrylk,
- volume, loc, basename, cmd, type);
- return 0;
+ volume, loc, basename, cmd, type, xdata);
+ return 0;
}
-int32_t
-error_gen_fentrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
+int
+error_gen_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (fentrylk, frame, op_ret, op_errno, xdata);
+ return 0;
}
-int32_t
+
+int
error_gen_fentrylk (call_frame_t *frame, xlator_t *this,
const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_FENTRYLK];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_FENTRYLK);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno);
- return 0;
+ STACK_UNWIND_STRICT (fentrylk, frame, -1, op_errno, xdata);
+ return 0;
}
STACK_WIND (frame, error_gen_fentrylk_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fentrylk,
- volume, fd, basename, cmd, type);
- return 0;
+ volume, fd, basename, cmd, type, xdata);
+ return 0;
}
/* Management operations */
-int32_t
-error_gen_stats_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct xlator_stats *stats)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- stats);
- return 0;
+
+int
+error_gen_getspec_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, char *spec_data)
+{
+ STACK_UNWIND_STRICT (getspec, frame, op_ret, op_errno, spec_data);
+ return 0;
}
-int32_t
-error_gen_stats (call_frame_t *frame,
- xlator_t *this,
- int32_t flags)
+int
+error_gen_getspec (call_frame_t *frame, xlator_t *this, const char *key,
+ int32_t flags)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_GETSPEC];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_GETSPEC);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL);
+ STACK_UNWIND_STRICT (getspec, frame, -1, op_errno, NULL);
return 0;
}
- STACK_WIND (frame,
- error_gen_stats_cbk,
+ STACK_WIND (frame, error_gen_getspec_cbk,
FIRST_CHILD(this),
- FIRST_CHILD(this)->mops->stats,
- flags);
+ FIRST_CHILD(this)->fops->getspec,
+ key, flags);
return 0;
}
-
-int32_t
-error_gen_getspec_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- char *spec_data)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- spec_data);
+int
+error_gen_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, entries, xdata);
return 0;
}
-int32_t
-error_gen_getspec (call_frame_t *frame,
- xlator_t *this,
- const char *key,
- int32_t flags)
+int
+error_gen_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t off, dict_t *xdata)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_READDIR];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_READDIR);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL);
+ STACK_UNWIND_STRICT (readdir, frame, -1, op_errno, NULL, xdata);
return 0;
}
- STACK_WIND (frame,
- error_gen_getspec_cbk,
+ STACK_WIND (frame, error_gen_readdir_cbk,
FIRST_CHILD(this),
- FIRST_CHILD(this)->mops->getspec,
- key, flags);
+ FIRST_CHILD(this)->fops->readdir,
+ fd, size, off, xdata);
return 0;
}
-int32_t
-error_gen_checksum_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- uint8_t *file_checksum,
- uint8_t *dir_checksum)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- file_checksum,
- dir_checksum);
+int
+error_gen_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata);
return 0;
}
-int32_t
-error_gen_checksum (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flag)
+int
+error_gen_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *dict)
{
- int op_errno = 0;
- op_errno = error_gen(this);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_READDIRP];
+
+ if (enable)
+ op_errno = error_gen (this, GF_FOP_READDIRP);
+
if (op_errno) {
GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+ STACK_UNWIND_STRICT (readdirp, frame, -1, op_errno, NULL, NULL);
return 0;
}
- STACK_WIND (frame,
- error_gen_checksum_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->checksum,
- loc,
- flag);
+ STACK_WIND (frame, error_gen_readdirp_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp,
+ fd, size, off, dict);
return 0;
}
-int32_t
-error_gen_readdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- gf_dirent_t *entries)
-{
- STACK_UNWIND (frame, op_ret, op_errno, entries);
- return 0;
+static void
+error_gen_set_failure (eg_t *pvt, int percent)
+{
+ GF_ASSERT (pvt);
+
+ if (percent)
+ pvt->failure_iter_no = 100/percent;
+ else
+ pvt->failure_iter_no = 100/GF_FAILURE_DEFAULT;
}
+static void
+error_gen_parse_fill_fops (eg_t *pvt, char *enable_fops)
+{
+ char *op_no_str = NULL;
+ int op_no = -1;
+ int i = 0;
+ xlator_t *this = THIS;
+ char *saveptr = NULL;
-int32_t
-error_gen_readdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t off)
-{
- int op_errno = 0;
- op_errno = error_gen(this);
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
- }
+ GF_ASSERT (pvt);
+ GF_ASSERT (this);
- STACK_WIND (frame,
- error_gen_readdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readdir,
- fd, size, off);
- return 0;
+ for (i = 0; i < GF_FOP_MAXVALUE; i++)
+ pvt->enable[i] = 0;
+
+ if (!enable_fops) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "All fops are enabled.");
+ for (i = 0; i < GF_FOP_MAXVALUE; i++)
+ pvt->enable[i] = 1;
+ } else {
+ op_no_str = strtok_r (enable_fops, ",", &saveptr);
+ while (op_no_str) {
+ op_no = get_fop_int (&op_no_str);
+ if (op_no == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Wrong option value %s", op_no_str);
+ } else
+ pvt->enable[op_no] = 1;
+
+ op_no_str = strtok_r (NULL, ",", &saveptr);
+ }
+ }
}
int32_t
-error_gen_closedir (xlator_t *this,
- fd_t *fd)
+error_gen_priv_dump (xlator_t *this)
{
- return 0;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+ int ret = -1;
+ eg_t *conf = NULL;
+
+ if (!this)
+ goto out;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ ret = TRY_LOCK(&conf->lock);
+ if (ret != 0) {
+ return ret;
+ }
+
+ gf_proc_dump_add_section("xlator.debug.error-gen.%s.priv", this->name);
+ gf_proc_dump_build_key(key_prefix,"xlator.debug.error-gen","%s.priv",
+ this->name);
+
+ gf_proc_dump_write("op_count", "%d", conf->op_count);
+ gf_proc_dump_write("failure_iter_no", "%d", conf->failure_iter_no);
+ gf_proc_dump_write("error_no", "%s", conf->error_no);
+ gf_proc_dump_write("random_failure", "%d", conf->random_failure);
+
+ UNLOCK(&conf->lock);
+out:
+ return ret;
}
int32_t
-error_gen_close (xlator_t *this,
- fd_t *fd)
+mem_acct_init (xlator_t *this)
{
- return 0;
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_error_gen_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
+ " failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ eg_t *pvt = NULL;
+ int32_t ret = 0;
+ char *error_enable_fops = NULL;
+ int32_t failure_percent_int = 0;
+
+ if (!this || !this->private)
+ goto out;
+
+ pvt = this->private;
+
+ GF_OPTION_RECONF ("error-no", pvt->error_no, options, str, out);
+
+ GF_OPTION_RECONF ("failure", failure_percent_int, options, int32,
+ out);
+
+ GF_OPTION_RECONF ("enable", error_enable_fops, options, str, out);
+
+ GF_OPTION_RECONF ("random-failure", pvt->random_failure, options,
+ bool, out);
+
+ error_gen_parse_fill_fops (pvt, error_enable_fops);
+ error_gen_set_failure (pvt, failure_percent_int);
+
+ ret = 0;
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "reconfigure returning %d", ret);
+ return ret;
}
int
init (xlator_t *this)
{
- eg_t *pvt = NULL;
+ eg_t *pvt = NULL;
+ int32_t ret = 0;
+ char *error_enable_fops = NULL;
+ int32_t failure_percent_int = 0;
- if (!this->children || this->children->next) {
- gf_log (this->name, GF_LOG_ERROR,
- "error-gen not configured with one subvolume");
- return -1;
- }
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "error-gen not configured with one subvolume");
+ ret = -1;
+ goto out;
+ }
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile ");
- }
-
- pvt = CALLOC (1, sizeof (eg_t));
- this->private = pvt;
- return 0;
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ pvt = GF_CALLOC (1, sizeof (eg_t), gf_error_gen_mt_eg_t);
+
+ if (!pvt) {
+ ret = -1;
+ goto out;
+ }
+
+ LOCK_INIT (&pvt->lock);
+
+ GF_OPTION_INIT ("error-no", pvt->error_no, str, out);
+
+ GF_OPTION_INIT ("failure", failure_percent_int, int32, out);
+
+ GF_OPTION_INIT ("enable", error_enable_fops, str, out);
+
+ GF_OPTION_INIT ("random-failure", pvt->random_failure, bool, out);
+
+
+ error_gen_parse_fill_fops (pvt, error_enable_fops);
+ error_gen_set_failure (pvt, failure_percent_int);
+
+ this->private = pvt;
+
+ /* Give some seed value here */
+ srand (time(NULL));
+out:
+ if (ret)
+ GF_FREE (pvt);
+ return ret;
}
+
void
fini (xlator_t *this)
{
- gf_log (this->name, GF_LOG_DEBUG, "fini called");
- return;
+ eg_t *pvt = NULL;
+
+ if (!this)
+ return;
+ pvt = this->private;
+
+ if (pvt) {
+ LOCK_DESTROY (&pvt->lock);
+ GF_FREE (pvt);
+ gf_log (this->name, GF_LOG_DEBUG, "fini called");
+ }
+ return;
}
+struct xlator_dumpops dumpops = {
+ .priv = error_gen_priv_dump,
+};
+
+struct xlator_fops cbks;
struct xlator_fops fops = {
.lookup = error_gen_lookup,
@@ -1740,10 +2170,7 @@ struct xlator_fops fops = {
.symlink = error_gen_symlink,
.rename = error_gen_rename,
.link = error_gen_link,
- .chmod = error_gen_chmod,
- .chown = error_gen_chown,
.truncate = error_gen_truncate,
- .utimens = error_gen_utimens,
.create = error_gen_create,
.open = error_gen_open,
.readv = error_gen_readv,
@@ -1754,33 +2181,52 @@ struct xlator_fops fops = {
.setxattr = error_gen_setxattr,
.getxattr = error_gen_getxattr,
.removexattr = error_gen_removexattr,
+ .fsetxattr = error_gen_fsetxattr,
+ .fgetxattr = error_gen_fgetxattr,
+ .fremovexattr = error_gen_fremovexattr,
.opendir = error_gen_opendir,
.readdir = error_gen_readdir,
- .getdents = error_gen_getdents,
+ .readdirp = error_gen_readdirp,
.fsyncdir = error_gen_fsyncdir,
.access = error_gen_access,
.ftruncate = error_gen_ftruncate,
.fstat = error_gen_fstat,
.lk = error_gen_lk,
- .fchmod = error_gen_fchmod,
- .fchown = error_gen_fchown,
- .setdents = error_gen_setdents,
.lookup_cbk = error_gen_lookup_cbk,
- .checksum = error_gen_checksum,
.xattrop = error_gen_xattrop,
.fxattrop = error_gen_fxattrop,
.inodelk = error_gen_inodelk,
.finodelk = error_gen_finodelk,
.entrylk = error_gen_entrylk,
- .fentrylk = error_gen_fentrylk
-};
-
-struct xlator_mops mops = {
- .stats = error_gen_stats,
- .getspec = error_gen_getspec,
+ .fentrylk = error_gen_fentrylk,
+ .setattr = error_gen_setattr,
+ .fsetattr = error_gen_fsetattr,
+ .getspec = error_gen_getspec,
};
-struct xlator_cbks cbks = {
- .release = error_gen_close,
- .releasedir = error_gen_closedir,
+struct volume_options options[] = {
+ { .key = {"failure"},
+ .type = GF_OPTION_TYPE_INT,
+ .description = "Percentage failure of operations when enabled.",
+ },
+
+ { .key = {"error-no"},
+ .value = {"ENOENT","ENOTDIR","ENAMETOOLONG","EACCES","EBADF",
+ "EFAULT","ENOMEM","EINVAL","EIO","EEXIST","ENOSPC",
+ "EPERM","EROFS","EBUSY","EISDIR","ENOTEMPTY","EMLINK"
+ "ENODEV","EXDEV","EMFILE","ENFILE","ENOSYS","EINTR",
+ "EFBIG","EAGAIN","GF_ERROR_SHORT_WRITE"},
+ .type = GF_OPTION_TYPE_STR,
+ },
+
+ { .key = {"random-failure"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ },
+
+ { .key = {"enable"},
+ .type = GF_OPTION_TYPE_STR,
+ },
+
+ { .key = {NULL} }
};
diff --git a/xlators/debug/error-gen/src/error-gen.h b/xlators/debug/error-gen/src/error-gen.h
new file mode 100644
index 000000000..d92c23062
--- /dev/null
+++ b/xlators/debug/error-gen/src/error-gen.h
@@ -0,0 +1,48 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _ERROR_GEN_H
+#define _ERROR_GEN_H
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "error-gen-mem-types.h"
+
+#define GF_FAILURE_DEFAULT 10
+
+/*
+ * Pseudo-errors refer to errors beyond the scope of traditional <-1, op_errno>
+ * returns. This facilitates the ability to return unexpected, but not -1 values
+ * and/or to inject operations that lead to implicit error conditions. The range
+ * for pseudo errors resides at a high value to avoid conflicts with the errno
+ * range.
+ */
+enum GF_PSEUDO_ERRORS {
+ GF_ERROR_SHORT_WRITE = 1000, /* short writev return value */
+ GF_ERROR_MAX
+};
+
+typedef struct {
+ int enable[GF_FOP_MAXVALUE];
+ int op_count;
+ int failure_iter_no;
+ char *error_no;
+ gf_boolean_t random_failure;
+ gf_lock_t lock;
+} eg_t;
+
+typedef struct {
+ int error_no_count;
+ int error_no[20];
+} sys_error_t;
+
+#endif
diff --git a/xlators/debug/io-stats/src/Makefile.am b/xlators/debug/io-stats/src/Makefile.am
index 91aea6ebb..332d79015 100644
--- a/xlators/debug/io-stats/src/Makefile.am
+++ b/xlators/debug/io-stats/src/Makefile.am
@@ -2,12 +2,15 @@
xlator_LTLIBRARIES = io-stats.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/debug
-io_stats_la_LDFLAGS = -module -avoidversion
+io_stats_la_LDFLAGS = -module -avoid-version
io_stats_la_SOURCES = io-stats.c
io_stats_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+noinst_HEADERS = io-stats-mem-types.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/debug/io-stats/src/io-stats-mem-types.h b/xlators/debug/io-stats/src/io-stats-mem-types.h
new file mode 100644
index 000000000..c30dfb17e
--- /dev/null
+++ b/xlators/debug/io-stats/src/io-stats-mem-types.h
@@ -0,0 +1,24 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __IO_STATS_MEM_TYPES_H__
+#define __IO_STATS_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_io_stats_mem_types_ {
+ gf_io_stats_mt_ios_conf = gf_common_mt_end + 1,
+ gf_io_stats_mt_ios_fd,
+ gf_io_stats_mt_ios_stat,
+ gf_io_stats_mt_ios_stat_list,
+ gf_io_stats_mt_end
+};
+#endif
+
diff --git a/xlators/debug/io-stats/src/io-stats.c b/xlators/debug/io-stats/src/io-stats.c
index c07685017..7fb697ae4 100644
--- a/xlators/debug/io-stats/src/io-stats.c
+++ b/xlators/debug/io-stats/src/io-stats.c
@@ -1,1508 +1,2855 @@
/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
+#include "xlator.h"
#endif
/**
* xlators/debug/io_stats :
- * This translator maintains a counter for each fop (that is,
- * a counter which stores the number of invocations for the certain
- * fop), and makes these counters accessible as extended attributes.
+ * This translator maintains statistics of all filesystem activity
+ * happening through it. The kind of statistics include:
+ *
+ * a) total read data - since process start, last interval and per fd
+ * b) total write data - since process start, last interval and per fd
+ * c) counts of read IO block size - since process start, last interval and per fd
+ * d) counts of write IO block size - since process start, last interval and per fd
+ * e) counts of all FOP types passing through it
+ *
+ * Usage: setfattr -n io-stats-dump /tmp/filename /mnt/gluster
+ *
*/
+#include <fnmatch.h>
#include <errno.h>
#include "glusterfs.h"
#include "xlator.h"
+#include "io-stats-mem-types.h"
+#include <stdarg.h>
+#include "defaults.h"
+#include "logging.h"
+
+#define MAX_LIST_MEMBERS 100
+
+typedef enum {
+ IOS_STATS_TYPE_NONE,
+ IOS_STATS_TYPE_OPEN,
+ IOS_STATS_TYPE_READ,
+ IOS_STATS_TYPE_WRITE,
+ IOS_STATS_TYPE_OPENDIR,
+ IOS_STATS_TYPE_READDIRP,
+ IOS_STATS_TYPE_READ_THROUGHPUT,
+ IOS_STATS_TYPE_WRITE_THROUGHPUT,
+ IOS_STATS_TYPE_MAX
+}ios_stats_type_t;
-#define BUMP_HIT(op) \
-do { \
- LOCK (&((io_stats_private_t *)this->private)->lock); \
- { \
- ((io_stats_private_t *)this->private) \
- ->fop_records[GF_FOP_##op].hits++; \
- } \
- UNLOCK (&((io_stats_private_t *)this->private)->lock); \
- } while (0)
-
-struct io_stats_io_count {
- size_t size;
- int64_t hits;
+typedef enum {
+ IOS_STATS_THRU_READ,
+ IOS_STATS_THRU_WRITE,
+ IOS_STATS_THRU_MAX,
+}ios_stats_thru_t;
+
+struct ios_stat_lat {
+ struct timeval time;
+ double throughput;
};
+
+struct ios_stat {
+ gf_lock_t lock;
+ uuid_t gfid;
+ char *filename;
+ uint64_t counters [IOS_STATS_TYPE_MAX];
+ struct ios_stat_lat thru_counters [IOS_STATS_THRU_MAX];
+ int refcnt;
+};
+
+struct ios_stat_list {
+ struct list_head list;
+ struct ios_stat *iosstat;
+ double value;
+};
+
+struct ios_stat_head {
+ gf_lock_t lock;
+ double min_cnt;
+ uint64_t members;
+ struct ios_stat_list *iosstats;
+};
+
+struct ios_lat {
+ double min;
+ double max;
+ double avg;
+};
+
+struct ios_global_stats {
+ uint64_t data_written;
+ uint64_t data_read;
+ uint64_t block_count_write[32];
+ uint64_t block_count_read[32];
+ uint64_t fop_hits[GF_FOP_MAXVALUE];
+ struct timeval started_at;
+ struct ios_lat latency[GF_FOP_MAXVALUE];
+ uint64_t nr_opens;
+ uint64_t max_nr_opens;
+ struct timeval max_openfd_time;
+};
+
+
+struct ios_conf {
+ gf_lock_t lock;
+ struct ios_global_stats cumulative;
+ uint64_t increment;
+ struct ios_global_stats incremental;
+ gf_boolean_t dump_fd_stats;
+ gf_boolean_t count_fop_hits;
+ gf_boolean_t measure_latency;
+ struct ios_stat_head list[IOS_STATS_TYPE_MAX];
+ struct ios_stat_head thru_list[IOS_STATS_THRU_MAX];
+};
+
+
+struct ios_fd {
+ char *filename;
+ uint64_t data_written;
+ uint64_t data_read;
+ uint64_t block_count_write[32];
+ uint64_t block_count_read[32];
+ struct timeval opened_at;
+};
+
typedef enum {
- GF_IO_STAT_BLK_SIZE_1K,
- GF_IO_STAT_BLK_SIZE_2K,
- GF_IO_STAT_BLK_SIZE_4K,
- GF_IO_STAT_BLK_SIZE_8K,
- GF_IO_STAT_BLK_SIZE_16K,
- GF_IO_STAT_BLK_SIZE_32K,
- GF_IO_STAT_BLK_SIZE_64K,
- GF_IO_STAT_BLK_SIZE_128K,
- GF_IO_STAT_BLK_SIZE_MAX,
-} gf_io_stat_blk_t;
-
-struct io_stats_private {
- gf_lock_t lock;
- struct {
- char *name;
- int enabled;
- uint32_t hits;
- } fop_records[GF_FOP_MAXVALUE];
- struct io_stats_io_count read[GF_IO_STAT_BLK_SIZE_MAX + 1];
- struct io_stats_io_count write[GF_IO_STAT_BLK_SIZE_MAX + 1];
+ IOS_DUMP_TYPE_NONE = 0,
+ IOS_DUMP_TYPE_FILE = 1,
+ IOS_DUMP_TYPE_DICT = 2,
+ IOS_DUMP_TYPE_MAX = 3
+} ios_dump_type_t;
+
+struct ios_dump_args {
+ ios_dump_type_t type;
+ union {
+ FILE *logfp;
+ dict_t *dict;
+ } u;
};
-typedef struct io_stats_private io_stats_private_t;
-int32_t
-io_stats_create_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd,
- inode_t *inode,
- struct stat *buf)
+typedef int (*block_dump_func) (xlator_t *, struct ios_dump_args*,
+ int , int , uint64_t ) ;
+
+struct ios_local {
+ struct timeval wind_at;
+ struct timeval unwind_at;
+};
+
+struct volume_options options[];
+
+inline static int
+is_fop_latency_started (call_frame_t *frame)
+{
+ GF_ASSERT (frame);
+ struct timeval epoch = {0,};
+ return memcmp (&frame->begin, &epoch, sizeof (epoch));
+}
+
+#define END_FOP_LATENCY(frame, op) \
+ do { \
+ struct ios_conf *conf = NULL; \
+ \
+ conf = this->private; \
+ if (conf && conf->measure_latency) { \
+ gettimeofday (&frame->end, NULL); \
+ update_ios_latency (conf, frame, GF_FOP_##op); \
+ } \
+ } while (0)
+
+#define START_FOP_LATENCY(frame) \
+ do { \
+ struct ios_conf *conf = NULL; \
+ \
+ conf = this->private; \
+ if (conf && conf->measure_latency) { \
+ gettimeofday (&frame->begin, NULL); \
+ } else { \
+ memset (&frame->begin, 0, sizeof (frame->begin));\
+ } \
+ } while (0)
+
+
+#define BUMP_FOP(op) \
+ do { \
+ struct ios_conf *conf = NULL; \
+ \
+ conf = this->private; \
+ if (!conf) \
+ break; \
+ conf->cumulative.fop_hits[GF_FOP_##op]++; \
+ conf->incremental.fop_hits[GF_FOP_##op]++; \
+ } while (0)
+
+#define UPDATE_PROFILE_STATS(frame, op) \
+ do { \
+ struct ios_conf *conf = NULL; \
+ \
+ if (!is_fop_latency_started (frame)) \
+ break; \
+ conf = this->private; \
+ LOCK (&conf->lock); \
+ { \
+ if (conf && conf->measure_latency && \
+ conf->count_fop_hits) { \
+ BUMP_FOP(op); \
+ gettimeofday (&frame->end, NULL); \
+ update_ios_latency (conf, frame, GF_FOP_##op);\
+ } \
+ } \
+ UNLOCK (&conf->lock); \
+ } while (0)
+
+#define BUMP_READ(fd, len) \
+ do { \
+ struct ios_conf *conf = NULL; \
+ struct ios_fd *iosfd = NULL; \
+ int lb2 = 0; \
+ \
+ conf = this->private; \
+ lb2 = log_base2 (len); \
+ ios_fd_ctx_get (fd, this, &iosfd); \
+ if (!conf) \
+ break; \
+ \
+ LOCK (&conf->lock); \
+ { \
+ conf->cumulative.data_read += len; \
+ conf->incremental.data_read += len; \
+ conf->cumulative.block_count_read[lb2]++; \
+ conf->incremental.block_count_read[lb2]++; \
+ \
+ if (iosfd) { \
+ iosfd->data_read += len; \
+ iosfd->block_count_read[lb2]++; \
+ } \
+ } \
+ UNLOCK (&conf->lock); \
+ } while (0)
+
+
+#define BUMP_WRITE(fd, len) \
+ do { \
+ struct ios_conf *conf = NULL; \
+ struct ios_fd *iosfd = NULL; \
+ int lb2 = 0; \
+ \
+ conf = this->private; \
+ lb2 = log_base2 (len); \
+ ios_fd_ctx_get (fd, this, &iosfd); \
+ if (!conf) \
+ break; \
+ \
+ LOCK (&conf->lock); \
+ { \
+ conf->cumulative.data_written += len; \
+ conf->incremental.data_written += len; \
+ conf->cumulative.block_count_write[lb2]++; \
+ conf->incremental.block_count_write[lb2]++; \
+ \
+ if (iosfd) { \
+ iosfd->data_written += len; \
+ iosfd->block_count_write[lb2]++; \
+ } \
+ } \
+ UNLOCK (&conf->lock); \
+ } while (0)
+
+
+#define BUMP_STATS(iosstat, type) \
+ do { \
+ struct ios_conf *conf = NULL; \
+ uint64_t value = 0; \
+ \
+ conf = this->private; \
+ \
+ LOCK(&iosstat->lock); \
+ { \
+ iosstat->counters[type]++; \
+ value = iosstat->counters[type]; \
+ } \
+ UNLOCK (&iosstat->lock); \
+ ios_stat_add_to_list (&conf->list[type], \
+ value, iosstat); \
+ \
+ } while (0)
+
+
+#define BUMP_THROUGHPUT(iosstat, type) \
+ do { \
+ struct ios_conf *conf = NULL; \
+ double elapsed; \
+ struct timeval *begin, *end; \
+ double throughput; \
+ int flag = 0; \
+ \
+ begin = &frame->begin; \
+ end = &frame->end; \
+ \
+ elapsed = (end->tv_sec - begin->tv_sec) * 1e6 \
+ + (end->tv_usec - begin->tv_usec); \
+ throughput = op_ret / elapsed; \
+ \
+ conf = this->private; \
+ LOCK(&iosstat->lock); \
+ { \
+ if (iosstat->thru_counters[type].throughput \
+ <= throughput) { \
+ iosstat->thru_counters[type].throughput = \
+ throughput; \
+ gettimeofday (&iosstat-> \
+ thru_counters[type].time, NULL); \
+ flag = 1; \
+ } \
+ } \
+ UNLOCK (&iosstat->lock); \
+ if (flag) \
+ ios_stat_add_to_list (&conf->thru_list[type], \
+ throughput, iosstat); \
+ } while (0)
+
+int
+ios_fd_ctx_get (fd_t *fd, xlator_t *this, struct ios_fd **iosfd)
+{
+ uint64_t iosfd64 = 0;
+ unsigned long iosfdlong = 0;
+ int ret = 0;
+
+ ret = fd_ctx_get (fd, this, &iosfd64);
+ iosfdlong = iosfd64;
+ if (ret != -1)
+ *iosfd = (void *) iosfdlong;
+
+ return ret;
+}
+
+
+
+int
+ios_fd_ctx_set (fd_t *fd, xlator_t *this, struct ios_fd *iosfd)
+{
+ uint64_t iosfd64 = 0;
+ int ret = 0;
+
+ iosfd64 = (unsigned long) iosfd;
+ ret = fd_ctx_set (fd, this, iosfd64);
+
+ return ret;
+}
+
+int
+ios_stat_ref (struct ios_stat *iosstat)
+{
+ LOCK (&iosstat->lock);
+ {
+ iosstat->refcnt++;
+ }
+ UNLOCK (&iosstat->lock);
+
+ return iosstat->refcnt;
+}
+
+int
+ios_stat_unref (struct ios_stat *iosstat)
{
- STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf);
+ int cleanup = 0;
+ LOCK (&iosstat->lock);
+ {
+ iosstat->refcnt--;
+ if (iosstat->refcnt == 0) {
+ if (iosstat->filename) {
+ GF_FREE (iosstat->filename);
+ iosstat->filename = NULL;
+ }
+ cleanup = 1;
+ }
+ }
+ UNLOCK (&iosstat->lock);
+
+ if (cleanup) {
+ GF_FREE (iosstat);
+ iosstat = NULL;
+ }
+
return 0;
}
-int32_t
-io_stats_open_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
+int
+ios_inode_ctx_set (inode_t *inode, xlator_t *this, struct ios_stat *iosstat)
+{
+ uint64_t iosstat64 = 0;
+ int ret = 0;
+
+ ios_stat_ref (iosstat);
+ iosstat64 = (unsigned long )iosstat;
+ ret = inode_ctx_put (inode, this, iosstat64);
+ return ret;
+}
+
+int
+ios_inode_ctx_get (inode_t *inode, xlator_t *this, struct ios_stat **iosstat)
+{
+ uint64_t iosstat64 = 0;
+ unsigned long iosstatlong = 0;
+ int ret = 0;
+
+ ret = inode_ctx_get (inode, this, &iosstat64);
+ iosstatlong = iosstat64;
+ if (ret != -1)
+ *iosstat = (void *) iosstatlong;
+
+ return ret;
+
+}
+
+int
+ios_stat_add_to_list (struct ios_stat_head *list_head, uint64_t value,
+ struct ios_stat *iosstat)
{
- STACK_UNWIND (frame, op_ret, op_errno, fd);
+ struct ios_stat_list *new = NULL;
+ struct ios_stat_list *entry = NULL;
+ struct ios_stat_list *t = NULL;
+ struct ios_stat_list *list_entry = NULL;
+ struct ios_stat_list *tmp = NULL;
+ struct ios_stat_list *last = NULL;
+ struct ios_stat *stat = NULL;
+ int cnt = 0;
+ int found = 0;
+ int reposition = 0;
+ double min_count = 0;
+
+ LOCK (&list_head->lock);
+ {
+
+ if (list_head->min_cnt == 0)
+ list_head->min_cnt = value;
+ if ((list_head->members == MAX_LIST_MEMBERS) &&
+ (list_head->min_cnt > value))
+ goto out;
+
+ list_for_each_entry_safe (entry, t,
+ &list_head->iosstats->list, list) {
+ cnt++;
+ if (cnt == list_head->members)
+ last = entry;
+
+ if (!uuid_compare (iosstat->gfid,
+ entry->iosstat->gfid)) {
+ list_entry = entry;
+ found = cnt;
+ entry->value = value;
+ if (!reposition) {
+ if (cnt == list_head->members)
+ list_head->min_cnt = value;
+ goto out;
+ }
+ break;
+ } else if (entry->value <= value && !reposition) {
+ reposition = cnt;
+ tmp = entry;
+ if (cnt == list_head->members - 1)
+ min_count = entry->value;
+ }
+ }
+ if (found) {
+ list_del (&list_entry->list);
+ list_add_tail (&list_entry->list, &tmp->list);
+ if (min_count)
+ list_head->min_cnt = min_count;
+ goto out;
+ } else if (list_head->members == MAX_LIST_MEMBERS && reposition) {
+ new = GF_CALLOC (1, sizeof (*new),
+ gf_io_stats_mt_ios_stat_list);
+ new->iosstat = iosstat;
+ new->value = value;
+ ios_stat_ref (iosstat);
+ list_add_tail (&new->list, &tmp->list);
+ stat = last->iosstat;
+ last->iosstat = NULL;
+ ios_stat_unref (stat);
+ list_del (&last->list);
+ GF_FREE (last);
+ if (reposition == MAX_LIST_MEMBERS)
+ list_head->min_cnt = value;
+ else if (min_count) {
+ list_head->min_cnt = min_count;
+ }
+ } else if (list_head->members < MAX_LIST_MEMBERS) {
+ new = GF_CALLOC (1, sizeof (*new),
+ gf_io_stats_mt_ios_stat_list);
+ new->iosstat = iosstat;
+ new->value = value;
+ ios_stat_ref (iosstat);
+ if (reposition) {
+ list_add_tail (&new->list, &tmp->list);
+ } else {
+ list_add_tail (&new->list, &entry->list);
+ }
+ list_head->members++;
+ if (list_head->min_cnt > value)
+ list_head->min_cnt = value;
+ }
+ }
+out:
+ UNLOCK (&list_head->lock);
return 0;
}
-int32_t
-io_stats_stat_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+inline int
+ios_stats_cleanup (xlator_t *this, inode_t *inode)
{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+
+ struct ios_stat *iosstat = NULL;
+ uint64_t iosstat64 = 0;
+
+ inode_ctx_del (inode, this, &iosstat64);
+ if (!iosstat64) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "could not get inode ctx");
+ return 0;
+ }
+ iosstat = (void *) (long)iosstat64;
+ if (iosstat) {
+ ios_stat_unref (iosstat);
+ }
return 0;
}
-int32_t
-io_stats_readv_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iovec *vector,
- int32_t count,
- struct stat *buf,
- struct iobref *iobref)
+#define ios_log(this, logfp, fmt ...) \
+ do { \
+ if (logfp) { \
+ fprintf (logfp, fmt); \
+ fprintf (logfp, "\n"); \
+ } \
+ gf_log (this->name, GF_LOG_INFO, fmt); \
+ } while (0)
+
+int
+ios_dump_file_stats (struct ios_stat_head *list_head, xlator_t *this, FILE* logfp)
{
- int i = 0;
- io_stats_private_t *priv = NULL;
+ struct ios_stat_list *entry = NULL;
- priv = this->private;
+ LOCK (&list_head->lock);
+ {
+ list_for_each_entry (entry, &list_head->iosstats->list, list) {
+ ios_log (this, logfp, "%-12.0f %s",
+ entry->value, entry->iosstat->filename);
+ }
+ }
+ UNLOCK (&list_head->lock);
+ return 0;
+}
- if (op_ret > 0) {
- for (i=0; i < GF_IO_STAT_BLK_SIZE_MAX; i++) {
- if (priv->read[i].size > iov_length (vector, count)) {
- break;
+int
+ios_dump_throughput_stats (struct ios_stat_head *list_head, xlator_t *this,
+ FILE* logfp, ios_stats_type_t type)
+{
+ struct ios_stat_list *entry = NULL;
+ struct timeval time = {0, };
+ char timestr[256] = {0, };
+
+ LOCK (&list_head->lock);
+ {
+ list_for_each_entry (entry, &list_head->iosstats->list, list) {
+ gf_time_fmt (timestr, sizeof timestr,
+ entry->iosstat->thru_counters[type].time.tv_sec,
+ gf_timefmt_FT);
+ snprintf (timestr + strlen (timestr), sizeof timestr - strlen (timestr),
+ ".%"GF_PRI_SUSECONDS, time.tv_usec);
+
+ ios_log (this, logfp, "%s \t %-10.2f \t %s",
+ timestr, entry->value, entry->iosstat->filename);
+ }
+ }
+ UNLOCK (&list_head->lock);
+ return 0;
+}
+
+int
+io_stats_dump_global_to_logfp (xlator_t *this, struct ios_global_stats *stats,
+ struct timeval *now, int interval, FILE* logfp)
+{
+ int i = 0;
+ int per_line = 0;
+ int index = 0;
+ struct ios_stat_head *list_head = NULL;
+ struct ios_conf *conf = NULL;
+ char timestr[256] = {0, };
+ char str_header[128] = {0};
+ char str_read[128] = {0};
+ char str_write[128] = {0};
+
+ conf = this->private;
+
+ if (interval == -1)
+ ios_log (this, logfp, "\n=== Cumulative stats ===");
+ else
+ ios_log (this, logfp, "\n=== Interval %d stats ===",
+ interval);
+ ios_log (this, logfp, " Duration : %"PRId64" secs",
+ (uint64_t) (now->tv_sec - stats->started_at.tv_sec));
+ ios_log (this, logfp, " BytesRead : %"PRId64,
+ stats->data_read);
+ ios_log (this, logfp, " BytesWritten : %"PRId64"\n",
+ stats->data_written);
+
+ snprintf (str_header, sizeof (str_header), "%-12s %c", "Block Size", ':');
+ snprintf (str_read, sizeof (str_read), "%-12s %c", "Read Count", ':');
+ snprintf (str_write, sizeof (str_write), "%-12s %c", "Write Count", ':');
+ index = 14;
+ for (i = 0; i < 32; i++) {
+ if ((stats->block_count_read[i] == 0) &&
+ (stats->block_count_write[i] == 0))
+ continue;
+ per_line++;
+
+ snprintf (str_header+index, sizeof (str_header)-index,
+ "%16dB+", (1<<i));
+ if (stats->block_count_read[i])
+ snprintf (str_read+index, sizeof (str_read)-index,
+ "%18"PRId64, stats->block_count_read[i]);
+ else snprintf (str_read+index, sizeof (str_read)-index,
+ "%18s", "0");
+ if (stats->block_count_write[i])
+ snprintf (str_write+index, sizeof (str_write)-index,
+ "%18"PRId64, stats->block_count_write[i]);
+ else snprintf (str_write+index, sizeof (str_write)-index,
+ "%18s", "0");
+
+ index += 18;
+ if (per_line == 3) {
+ ios_log (this, logfp, "%s", str_header);
+ ios_log (this, logfp, "%s", str_read);
+ ios_log (this, logfp, "%s\n", str_write);
+
+ memset (str_header, 0, sizeof (str_header));
+ memset (str_read, 0, sizeof (str_read));
+ memset (str_write, 0, sizeof (str_write));
+
+ snprintf (str_header, sizeof (str_header), "%-12s %c",
+ "Block Size", ':');
+ snprintf (str_read, sizeof (str_read), "%-12s %c",
+ "Read Count", ':');
+ snprintf (str_write, sizeof (str_write), "%-12s %c",
+ "Write Count", ':');
+
+ index = 14;
+ per_line = 0;
+ }
+ }
+
+ if (per_line != 0) {
+ ios_log (this, logfp, "%s", str_header);
+ ios_log (this, logfp, "%s", str_read);
+ ios_log (this, logfp, "%s\n", str_write);
+ }
+
+ ios_log (this, logfp, "%-13s %10s %14s %14s %14s", "Fop",
+ "Call Count", "Avg-Latency", "Min-Latency",
+ "Max-Latency");
+ ios_log (this, logfp, "%-13s %10s %14s %14s %14s", "---", "----------",
+ "-----------", "-----------", "-----------");
+
+ for (i = 0; i < GF_FOP_MAXVALUE; i++) {
+ if (stats->fop_hits[i] && !stats->latency[i].avg)
+ ios_log (this, logfp, "%-13s %10"PRId64" %11s "
+ "us %11s us %11s us", gf_fop_list[i],
+ stats->fop_hits[i], "0", "0", "0");
+ else if (stats->fop_hits[i] && stats->latency[i].avg)
+ ios_log (this, logfp, "%-13s %10"PRId64" %11.2lf us "
+ "%11.2lf us %11.2lf us", gf_fop_list[i],
+ stats->fop_hits[i], stats->latency[i].avg,
+ stats->latency[i].min, stats->latency[i].max);
+ }
+ ios_log (this, logfp, "------ ----- ----- ----- ----- ----- ----- ----- "
+ " ----- ----- ----- -----\n");
+
+ if (interval == -1) {
+ LOCK (&conf->lock);
+ {
+ gf_time_fmt (timestr, sizeof timestr,
+ conf->cumulative.max_openfd_time.tv_sec,
+ gf_timefmt_FT);
+ snprintf (timestr + strlen (timestr), sizeof timestr - strlen (timestr),
+ ".%"GF_PRI_SUSECONDS,
+ conf->cumulative.max_openfd_time.tv_usec);
+ ios_log (this, logfp, "Current open fd's: %"PRId64
+ " Max open fd's: %"PRId64" time %s",
+ conf->cumulative.nr_opens,
+ conf->cumulative.max_nr_opens, timestr);
+ }
+ UNLOCK (&conf->lock);
+ ios_log (this, logfp, "\n==========Open File Stats========");
+ ios_log (this, logfp, "\nCOUNT: \t FILE NAME");
+ list_head = &conf->list[IOS_STATS_TYPE_OPEN];
+ ios_dump_file_stats (list_head, this, logfp);
+
+
+ ios_log (this, logfp, "\n==========Read File Stats========");
+ ios_log (this, logfp, "\nCOUNT: \t FILE NAME");
+ list_head = &conf->list[IOS_STATS_TYPE_READ];
+ ios_dump_file_stats (list_head, this, logfp);
+
+ ios_log (this, logfp, "\n==========Write File Stats========");
+ ios_log (this, logfp, "\nCOUNT: \t FILE NAME");
+ list_head = &conf->list[IOS_STATS_TYPE_WRITE];
+ ios_dump_file_stats (list_head, this, logfp);
+
+ ios_log (this, logfp, "\n==========Directory open stats========");
+ ios_log (this, logfp, "\nCOUNT: \t DIRECTORY NAME");
+ list_head = &conf->list[IOS_STATS_TYPE_OPENDIR];
+ ios_dump_file_stats (list_head, this, logfp);
+
+ ios_log (this, logfp, "\n========Directory readdirp Stats=======");
+ ios_log (this, logfp, "\nCOUNT: \t DIRECTORY NAME");
+ list_head = &conf->list[IOS_STATS_TYPE_READDIRP];
+ ios_dump_file_stats (list_head, this, logfp);
+
+ ios_log (this, logfp, "\n========Read Throughput File Stats=====");
+ ios_log (this, logfp, "\nTIMESTAMP \t\t\t THROUGHPUT(KBPS)"
+ "\tFILE NAME");
+ list_head = &conf->thru_list[IOS_STATS_THRU_READ];
+ ios_dump_throughput_stats(list_head, this, logfp, IOS_STATS_THRU_READ);
+
+ ios_log (this, logfp, "\n======Write Throughput File Stats======");
+ ios_log (this, logfp, "\nTIMESTAMP \t\t\t THROUGHPUT(KBPS)"
+ "\tFILE NAME");
+ list_head = &conf->thru_list[IOS_STATS_THRU_WRITE];
+ ios_dump_throughput_stats (list_head, this, logfp, IOS_STATS_THRU_WRITE);
+ }
+ return 0;
+}
+
+int
+io_stats_dump_global_to_dict (xlator_t *this, struct ios_global_stats *stats,
+ struct timeval *now, int interval, dict_t *dict)
+{
+ int ret = 0;
+ char key[256] = {0};
+ uint64_t sec = 0;
+ int i = 0;
+ uint64_t count = 0;
+
+ GF_ASSERT (stats);
+ GF_ASSERT (now);
+ GF_ASSERT (dict);
+ GF_ASSERT (this);
+
+ if (interval == -1)
+ snprintf (key, sizeof (key), "cumulative");
+ else
+ snprintf (key, sizeof (key), "interval");
+ ret = dict_set_int32 (dict, key, interval);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "failed to set "
+ "interval %d", interval);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-duration", interval);
+ sec = (uint64_t) (now->tv_sec - stats->started_at.tv_sec);
+ ret = dict_set_uint64 (dict, key, sec);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to set "
+ "duration(%d) - %"PRId64, interval, sec);
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-total-read", interval);
+ ret = dict_set_uint64 (dict, key, stats->data_read);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to set total "
+ "read(%d) - %"PRId64, interval, stats->data_read);
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-total-write", interval);
+ ret = dict_set_uint64 (dict, key, stats->data_written);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to set total "
+ "write(%d) - %"PRId64, interval, stats->data_written);
+ goto out;
+ }
+ for (i = 0; i < 32; i++) {
+ if (stats->block_count_read[i]) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%d-read-%d", interval,
+ (1 << i));
+ count = stats->block_count_read[i];
+ ret = dict_set_uint64 (dict, key, count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to "
+ "set read-%db+, with: %"PRId64,
+ (1<<i), count);
+ goto out;
+ }
+ }
+ }
+
+ for (i = 0; i < 32; i++) {
+ if (stats->block_count_write[i]) {
+ snprintf (key, sizeof (key), "%d-write-%d", interval,
+ (1<<i));
+ count = stats->block_count_write[i];
+ ret = dict_set_uint64 (dict, key, count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to "
+ "set write-%db+, with: %"PRId64,
+ (1<<i), count);
+ goto out;
}
}
- priv->read[i].hits++;
}
- STACK_UNWIND (frame, op_ret, op_errno, vector, count, buf, iobref);
+ for (i = 0; i < GF_FOP_MAXVALUE; i++) {
+ if (stats->fop_hits[i] == 0)
+ continue;
+ snprintf (key, sizeof (key), "%d-%d-hits", interval, i);
+ ret = dict_set_uint64 (dict, key, stats->fop_hits[i]);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to "
+ "set %s-fop-hits: %"PRIu64, gf_fop_list[i],
+ stats->fop_hits[i]);
+ goto out;
+ }
+
+ if (stats->latency[i].avg == 0)
+ continue;
+ snprintf (key, sizeof (key), "%d-%d-avglatency", interval, i);
+ ret = dict_set_double (dict, key, stats->latency[i].avg);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to set %s "
+ "avglatency(%d) with %f", gf_fop_list[i],
+ interval, stats->latency[i].avg);
+ goto out;
+ }
+ snprintf (key, sizeof (key), "%d-%d-minlatency", interval, i);
+ ret = dict_set_double (dict, key, stats->latency[i].min);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to set %s "
+ "minlatency(%d) with %f", gf_fop_list[i],
+ interval, stats->latency[i].min);
+ goto out;
+ }
+ snprintf (key, sizeof (key), "%d-%d-maxlatency", interval, i);
+ ret = dict_set_double (dict, key, stats->latency[i].max);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to set %s "
+ "maxlatency(%d) with %f", gf_fop_list[i],
+ interval, stats->latency[i].max);
+ goto out;
+ }
+ }
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "returning %d", ret);
+ return ret;
+}
+
+int
+io_stats_dump_global (xlator_t *this, struct ios_global_stats *stats,
+ struct timeval *now, int interval,
+ struct ios_dump_args *args)
+{
+ int ret = -1;
+
+ GF_ASSERT (args);
+ GF_ASSERT (now);
+ GF_ASSERT (stats);
+ GF_ASSERT (this);
+
+
+
+ switch (args->type) {
+ case IOS_DUMP_TYPE_FILE:
+ ret = io_stats_dump_global_to_logfp (this, stats, now,
+ interval, args->u.logfp);
+ break;
+ case IOS_DUMP_TYPE_DICT:
+ ret = io_stats_dump_global_to_dict (this, stats, now,
+ interval, args->u.dict);
+ break;
+ default:
+ GF_ASSERT (0);
+ ret = -1;
+ break;
+ }
+ return ret;
+}
+
+int
+ios_dump_args_init (struct ios_dump_args *args, ios_dump_type_t type,
+ void *output)
+{
+ int ret = 0;
+
+ GF_ASSERT (args);
+ GF_ASSERT (type > IOS_DUMP_TYPE_NONE && type < IOS_DUMP_TYPE_MAX);
+ GF_ASSERT (output);
+
+ args->type = type;
+ switch (args->type) {
+ case IOS_DUMP_TYPE_FILE:
+ args->u.logfp = output;
+ break;
+ case IOS_DUMP_TYPE_DICT:
+ args->u.dict = output;
+ break;
+ default:
+ GF_ASSERT (0);
+ ret = -1;
+ }
+
+ return ret;
+}
+
+int
+io_stats_dump (xlator_t *this, struct ios_dump_args *args)
+{
+ struct ios_conf *conf = NULL;
+ struct ios_global_stats cumulative = {0, };
+ struct ios_global_stats incremental = {0, };
+ int increment = 0;
+ struct timeval now;
+
+ GF_ASSERT (this);
+ GF_ASSERT (args);
+ GF_ASSERT (args->type > IOS_DUMP_TYPE_NONE);
+ GF_ASSERT (args->type < IOS_DUMP_TYPE_MAX);
+
+ conf = this->private;
+
+ gettimeofday (&now, NULL);
+ LOCK (&conf->lock);
+ {
+ cumulative = conf->cumulative;
+ incremental = conf->incremental;
+
+ increment = conf->increment++;
+
+ memset (&conf->incremental, 0, sizeof (conf->incremental));
+ conf->incremental.started_at = now;
+ }
+ UNLOCK (&conf->lock);
+
+ io_stats_dump_global (this, &cumulative, &now, -1, args);
+ io_stats_dump_global (this, &incremental, &now, increment, args);
+
return 0;
}
-int32_t
-io_stats_writev_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+
+int
+io_stats_dump_fd (xlator_t *this, struct ios_fd *iosfd)
{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ struct ios_conf *conf = NULL;
+ struct timeval now;
+ uint64_t sec = 0;
+ uint64_t usec = 0;
+ int i = 0;
+
+ conf = this->private;
+
+ if (!conf->dump_fd_stats)
+ return 0;
+
+ if (!iosfd)
+ return 0;
+
+ gettimeofday (&now, NULL);
+
+ if (iosfd->opened_at.tv_usec > now.tv_usec) {
+ now.tv_usec += 1000000;
+ now.tv_usec--;
+ }
+
+ sec = now.tv_sec - iosfd->opened_at.tv_sec;
+ usec = now.tv_usec - iosfd->opened_at.tv_usec;
+
+ gf_log (this->name, GF_LOG_INFO,
+ "--- fd stats ---");
+
+ if (iosfd->filename)
+ gf_log (this->name, GF_LOG_INFO,
+ " Filename : %s",
+ iosfd->filename);
+
+ if (sec)
+ gf_log (this->name, GF_LOG_INFO,
+ " Lifetime : %"PRId64"secs, %"PRId64"usecs",
+ sec, usec);
+
+ if (iosfd->data_read)
+ gf_log (this->name, GF_LOG_INFO,
+ " BytesRead : %"PRId64" bytes",
+ iosfd->data_read);
+
+ if (iosfd->data_written)
+ gf_log (this->name, GF_LOG_INFO,
+ " BytesWritten : %"PRId64" bytes",
+ iosfd->data_written);
+
+ for (i = 0; i < 32; i++) {
+ if (iosfd->block_count_read[i])
+ gf_log (this->name, GF_LOG_INFO,
+ " Read %06db+ : %"PRId64,
+ (1 << i), iosfd->block_count_read[i]);
+ }
+ for (i = 0; i < 32; i++) {
+ if (iosfd->block_count_write[i])
+ gf_log (this->name, GF_LOG_INFO,
+ "Write %06db+ : %"PRId64,
+ (1 << i), iosfd->block_count_write[i]);
+ }
return 0;
}
-int32_t
-io_stats_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entries,
- int32_t count)
+static void
+update_ios_latency_stats (struct ios_global_stats *stats, double elapsed,
+ glusterfs_fop_t op)
{
- STACK_UNWIND (frame, op_ret, op_errno, entries, count);
+ double avg;
+
+ GF_ASSERT (stats);
+
+ if (!stats->latency[op].min)
+ stats->latency[op].min = elapsed;
+ if (stats->latency[op].min > elapsed)
+ stats->latency[op].min = elapsed;
+ if (stats->latency[op].max < elapsed)
+ stats->latency[op].max = elapsed;
+
+ avg = stats->latency[op].avg;
+
+ stats->latency[op].avg = avg + (elapsed - avg) / stats->fop_hits[op];
+}
+
+int
+update_ios_latency (struct ios_conf *conf, call_frame_t *frame,
+ glusterfs_fop_t op)
+{
+ double elapsed;
+ struct timeval *begin, *end;
+
+ begin = &frame->begin;
+ end = &frame->end;
+
+ elapsed = (end->tv_sec - begin->tv_sec) * 1e6
+ + (end->tv_usec - begin->tv_usec);
+
+ update_ios_latency_stats (&conf->cumulative, elapsed, op);
+ update_ios_latency_stats (&conf->incremental, elapsed, op);
+
return 0;
}
int32_t
-io_stats_readdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- gf_dirent_t *buf)
+io_stats_dump_stats_to_dict (xlator_t *this, dict_t *resp,
+ ios_stats_type_t flags, int32_t list_cnt)
{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ struct ios_conf *conf = NULL;
+ int cnt = 0;
+ char key[256];
+ struct ios_stat_head *list_head = NULL;
+ struct ios_stat_list *entry = NULL;
+ int ret = -1;
+ ios_stats_thru_t index = IOS_STATS_THRU_MAX;
+ char timestr[256] = {0, };
+ char *dict_timestr = NULL;
+
+ conf = this->private;
+
+ switch (flags) {
+ case IOS_STATS_TYPE_OPEN:
+ list_head = &conf->list[IOS_STATS_TYPE_OPEN];
+ LOCK (&conf->lock);
+ {
+ ret = dict_set_uint64 (resp, "current-open",
+ conf->cumulative.nr_opens);
+ if (ret)
+ goto unlock;
+ ret = dict_set_uint64 (resp, "max-open",
+ conf->cumulative.max_nr_opens);
+
+ gf_time_fmt (timestr, sizeof timestr,
+ conf->cumulative.max_openfd_time.tv_sec,
+ gf_timefmt_FT);
+ if (conf->cumulative.max_openfd_time.tv_sec)
+ snprintf (timestr + strlen (timestr), sizeof timestr - strlen (timestr),
+ ".%"GF_PRI_SUSECONDS,
+ conf->cumulative.max_openfd_time.tv_usec);
+
+ dict_timestr = gf_strdup (timestr);
+ if (!dict_timestr)
+ goto unlock;
+ ret = dict_set_dynstr (resp, "max-openfd-time",
+ dict_timestr);
+ if (ret)
+ goto unlock;
+ }
+ unlock:
+ UNLOCK (&conf->lock);
+ /* Do not proceed if we came here because of some error
+ * during the dict operation */
+ if (ret)
+ goto out;
+ break;
+ case IOS_STATS_TYPE_READ:
+ list_head = &conf->list[IOS_STATS_TYPE_READ];
+ break;
+ case IOS_STATS_TYPE_WRITE:
+ list_head = &conf->list[IOS_STATS_TYPE_WRITE];
+ break;
+ case IOS_STATS_TYPE_OPENDIR:
+ list_head = &conf->list[IOS_STATS_TYPE_OPENDIR];
+ break;
+ case IOS_STATS_TYPE_READDIRP:
+ list_head = &conf->list[IOS_STATS_TYPE_READDIRP];
+ break;
+ case IOS_STATS_TYPE_READ_THROUGHPUT:
+ list_head = &conf->thru_list[IOS_STATS_THRU_READ];
+ index = IOS_STATS_THRU_READ;
+ break;
+ case IOS_STATS_TYPE_WRITE_THROUGHPUT:
+ list_head = &conf->thru_list[IOS_STATS_THRU_WRITE];
+ index = IOS_STATS_THRU_WRITE;
+ break;
+
+ default:
+ goto out;
+ }
+ ret = dict_set_int32 (resp, "top-op", flags);
+ if (!list_cnt)
+ goto out;
+ LOCK (&list_head->lock);
+ {
+ list_for_each_entry (entry, &list_head->iosstats->list, list) {
+
+ cnt++;
+ snprintf (key, 256, "%s-%d", "filename", cnt);
+ ret = dict_set_str (resp, key, entry->iosstat->filename);
+ if (ret)
+ goto unlock_list_head;
+ snprintf (key, 256, "%s-%d", "value",cnt);
+ ret = dict_set_uint64 (resp, key, entry->value);
+ if (ret)
+ goto unlock_list_head;
+ if (index != IOS_STATS_THRU_MAX) {
+ snprintf (key, 256, "%s-%d", "time-sec", cnt);
+ ret = dict_set_int32 (resp, key,
+ entry->iosstat->thru_counters[index].time.tv_sec);
+ if (ret)
+ goto unlock_list_head;
+ snprintf (key, 256, "%s-%d", "time-usec", cnt);
+ ret = dict_set_int32 (resp, key,
+ entry->iosstat->thru_counters[index].time.tv_usec);
+ if (ret)
+ goto unlock_list_head;
+ }
+ if (cnt == list_cnt)
+ break;
+
+ }
+ }
+unlock_list_head:
+ UNLOCK (&list_head->lock);
+ /* ret is !=0 if some dict operation in the above critical region
+ * failed. */
+ if (ret)
+ goto out;
+ ret = dict_set_int32 (resp, "members", cnt);
+ out:
+ return ret;
+}
+
+int
+io_stats_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ struct ios_fd *iosfd = NULL;
+ char *path = NULL;
+ struct ios_stat *iosstat = NULL;
+ struct ios_conf *conf = NULL;
+
+ conf = this->private;
+
+ path = frame->local;
+ frame->local = NULL;
+
+ if (!path)
+ goto unwind;
+
+ if (op_ret < 0) {
+ GF_FREE (path);
+ goto unwind;
+ }
+
+ iosfd = GF_CALLOC (1, sizeof (*iosfd), gf_io_stats_mt_ios_fd);
+ if (!iosfd) {
+ GF_FREE (path);
+ goto unwind;
+ }
+
+ iosfd->filename = path;
+ gettimeofday (&iosfd->opened_at, NULL);
+
+ ios_fd_ctx_set (fd, this, iosfd);
+ LOCK (&conf->lock);
+ {
+ conf->cumulative.nr_opens++;
+ if (conf->cumulative.nr_opens > conf->cumulative.max_nr_opens) {
+ conf->cumulative.max_nr_opens = conf->cumulative.nr_opens;
+ conf->cumulative.max_openfd_time = iosfd->opened_at;
+ }
+ }
+ UNLOCK (&conf->lock);
+
+ iosstat = GF_CALLOC (1, sizeof (*iosstat), gf_io_stats_mt_ios_stat);
+ if (!iosstat) {
+ GF_FREE (path);
+ goto unwind;
+ }
+ iosstat->filename = gf_strdup (path);
+ uuid_copy (iosstat->gfid, buf->ia_gfid);
+ LOCK_INIT (&iosstat->lock);
+ ios_inode_ctx_set (fd->inode, this, iosstat);
+
+unwind:
+ UPDATE_PROFILE_STATS (frame, CREATE);
+ STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
return 0;
}
-int32_t
-io_stats_fsync_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+
+int
+io_stats_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
+ struct ios_fd *iosfd = NULL;
+ char *path = NULL;
+ struct ios_stat *iosstat = NULL;
+ struct ios_conf *conf = NULL;
+
+ conf = this->private;
+ path = frame->local;
+ frame->local = NULL;
+
+ if (!path)
+ goto unwind;
+
+ if (op_ret < 0) {
+ GF_FREE (path);
+ goto unwind;
+ }
+
+ iosfd = GF_CALLOC (1, sizeof (*iosfd), gf_io_stats_mt_ios_fd);
+ if (!iosfd) {
+ GF_FREE (path);
+ goto unwind;
+ }
+
+ iosfd->filename = path;
+ gettimeofday (&iosfd->opened_at, NULL);
+
+ ios_fd_ctx_set (fd, this, iosfd);
+
+ ios_inode_ctx_get (fd->inode, this, &iosstat);
+ if (!iosstat) {
+ iosstat = GF_CALLOC (1, sizeof (*iosstat),
+ gf_io_stats_mt_ios_stat);
+ if (iosstat) {
+ iosstat->filename = gf_strdup (path);
+ uuid_copy (iosstat->gfid, fd->inode->gfid);
+ LOCK_INIT (&iosstat->lock);
+ ios_inode_ctx_set (fd->inode, this, iosstat);
+ }
+ }
+
+ LOCK (&conf->lock);
+ {
+ conf->cumulative.nr_opens++;
+ if (conf->cumulative.nr_opens > conf->cumulative.max_nr_opens) {
+ conf->cumulative.max_nr_opens = conf->cumulative.nr_opens;
+ conf->cumulative.max_openfd_time = iosfd->opened_at;
+ }
+ }
+ UNLOCK (&conf->lock);
+ if (iosstat) {
+ BUMP_STATS (iosstat, IOS_STATS_TYPE_OPEN);
+ iosstat = NULL;
+ }
+unwind:
+ UPDATE_PROFILE_STATS (frame, OPEN);
+
+ STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
return 0;
+
}
-int32_t
-io_stats_chown_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+
+int
+io_stats_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ UPDATE_PROFILE_STATS (frame, STAT);
+ STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf, xdata);
return 0;
}
-int32_t
-io_stats_chmod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+
+int
+io_stats_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iovec *vector, int32_t count,
+ struct iatt *buf, struct iobref *iobref, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ int len = 0;
+ fd_t *fd = NULL;
+ struct ios_stat *iosstat = NULL;
+
+ fd = frame->local;
+ frame->local = NULL;
+
+ if (op_ret > 0) {
+ len = iov_length (vector, count);
+ BUMP_READ (fd, len);
+ }
+
+ UPDATE_PROFILE_STATS (frame, READ);
+ ios_inode_ctx_get (fd->inode, this, &iosstat);
+
+ if (iosstat) {
+ BUMP_STATS (iosstat, IOS_STATS_TYPE_READ);
+ BUMP_THROUGHPUT (iosstat, IOS_STATS_THRU_READ);
+ iosstat = NULL;
+ }
+
+ STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno,
+ vector, count, buf, iobref, xdata);
return 0;
+
}
-int32_t
-io_stats_fchmod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+
+int
+io_stats_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ struct ios_stat *iosstat = NULL;
+ inode_t *inode = NULL;
+
+ UPDATE_PROFILE_STATS (frame, WRITE);
+ if (frame->local){
+ inode = frame->local;
+ frame->local = NULL;
+ ios_inode_ctx_get (inode, this, &iosstat);
+ if (iosstat) {
+ BUMP_STATS (iosstat, IOS_STATS_TYPE_WRITE);
+ BUMP_THROUGHPUT (iosstat, IOS_STATS_THRU_WRITE);
+ inode = NULL;
+ iosstat = NULL;
+ }
+ }
+
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf, xdata);
return 0;
+
}
-int32_t
-io_stats_fchown_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+
+
+
+int
+io_stats_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *buf, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ struct ios_stat *iosstat = NULL;
+ inode_t *inode = frame->local;
+
+ frame->local = NULL;
+
+ UPDATE_PROFILE_STATS (frame, READDIRP);
+
+ ios_inode_ctx_get (inode, this, &iosstat);
+
+ if (iosstat) {
+ BUMP_STATS (iosstat, IOS_STATS_TYPE_READDIRP);
+ iosstat = NULL;
+ }
+
+ STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, buf, xdata);
return 0;
}
-int32_t
-io_stats_unlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+
+int
+io_stats_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *buf, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
+ UPDATE_PROFILE_STATS (frame, READDIR);
+ STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, buf, xdata);
return 0;
}
-int32_t
-io_stats_rename_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+
+int
+io_stats_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ UPDATE_PROFILE_STATS (frame, FSYNC);
+ STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata);
return 0;
}
-int32_t
-io_stats_readlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- const char *buf)
+
+int
+io_stats_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ UPDATE_PROFILE_STATS (frame, SETATTR);
+ STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, preop, postop, xdata);
return 0;
}
-int32_t
-io_stats_lookup_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf,
- dict_t *xattr)
+
+int
+io_stats_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf, xattr);
+ UPDATE_PROFILE_STATS (frame, UNLINK);
+ STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno,
+ preparent, postparent, xdata);
return 0;
+
}
-int32_t
-io_stats_symlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf)
+
+int
+io_stats_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
+ UPDATE_PROFILE_STATS (frame, RENAME);
+ STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, buf,
+ preoldparent, postoldparent,
+ prenewparent, postnewparent, xdata);
return 0;
}
-int32_t
-io_stats_mknod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf)
+
+int
+io_stats_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, const char *buf,
+ struct iatt *sbuf, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
+ UPDATE_PROFILE_STATS (frame, READLINK);
+ STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, buf, sbuf, xdata);
return 0;
}
-int32_t
-io_stats_mkdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf)
+int
+io_stats_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf,
+ dict_t *xdata, struct iatt *postparent)
{
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
+ UPDATE_PROFILE_STATS (frame, LOOKUP);
+ STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf, xdata,
+ postparent);
return 0;
}
-int32_t
-io_stats_link_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf)
+
+int
+io_stats_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
+ UPDATE_PROFILE_STATS (frame, SYMLINK);
+ STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
return 0;
}
-int32_t
-io_stats_flush_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+
+int
+io_stats_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
+ UPDATE_PROFILE_STATS (frame, MKNOD);
+ STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
return 0;
}
-int32_t
-io_stats_opendir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
+int
+io_stats_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, fd);
+ struct ios_stat *iosstat = NULL;
+ char *path = frame->local;
+
+ UPDATE_PROFILE_STATS (frame, MKDIR);
+ if (op_ret < 0)
+ goto unwind;
+
+ iosstat = GF_CALLOC (1, sizeof (*iosstat), gf_io_stats_mt_ios_stat);
+ if (iosstat) {
+ LOCK_INIT (&iosstat->lock);
+ iosstat->filename = gf_strdup(path);
+ uuid_copy (iosstat->gfid, buf->ia_gfid);
+ ios_inode_ctx_set (inode, this, iosstat);
+ }
+
+unwind:
+ /* local is assigned with path */
+ GF_FREE (frame->local);
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
return 0;
}
-int32_t
-io_stats_rmdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+
+int
+io_stats_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
+ UPDATE_PROFILE_STATS (frame, LINK);
+ STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
return 0;
}
-int32_t
-io_stats_truncate_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+
+int
+io_stats_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ UPDATE_PROFILE_STATS (frame, FLUSH);
+ STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, xdata);
return 0;
}
-int32_t
-io_stats_utimens_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+
+int
+io_stats_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ struct ios_stat *iosstat = NULL;
+ int ret = -1;
+
+ UPDATE_PROFILE_STATS (frame, OPENDIR);
+ if (op_ret < 0)
+ goto unwind;
+
+ ios_fd_ctx_set (fd, this, 0);
+
+ ret = ios_inode_ctx_get (fd->inode, this, &iosstat);
+ if (!ret)
+ BUMP_STATS (iosstat, IOS_STATS_TYPE_OPENDIR);
+
+unwind:
+ STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd, xdata);
return 0;
}
-int32_t
-io_stats_statfs_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct statvfs *buf)
+
+int
+io_stats_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+
+ UPDATE_PROFILE_STATS (frame, RMDIR);
+
+ STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno,
+ preparent, postparent, xdata);
return 0;
}
-int32_t
-io_stats_setxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+
+int
+io_stats_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
+ UPDATE_PROFILE_STATS (frame, TRUNCATE);
+ STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
return 0;
}
-int32_t
-io_stats_getxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict)
-{
- io_stats_private_t *priv = this->private;
- int i = 0;
- char keycont[] = "trusted.glusterfs.hits." /* 23 chars */
- "0123456789"
- "0123456789"
- "0123456789"
- "0123456789";
- int ret = -1;
-
- memset (keycont + 23, '\0', 40);
- for (i = 0; i < GF_FOP_MAXVALUE; i++) {
- if (!(priv->fop_records[i].enabled &&
- priv->fop_records[i].name))
- continue;
+int
+io_stats_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf, dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS (frame, STATFS);
+ STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
- strncpy (keycont + 23, priv->fop_records[i].name, 40);
- LOCK(&priv->lock);
- {
- ret = dict_set_uint32 (dict, keycont,
- priv->fop_records[i].hits);
- }
- UNLOCK(&priv->lock);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "dict set failed for xattr %s",
- keycont);
- break;
- }
- }
- STACK_UNWIND (frame, op_ret, op_errno, dict);
+int
+io_stats_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS (frame, SETXATTR);
+ STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata);
return 0;
}
-int32_t
-io_stats_removexattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+
+int
+io_stats_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
+ UPDATE_PROFILE_STATS (frame, GETXATTR);
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata);
return 0;
}
-int32_t
-io_stats_fsyncdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+int
+io_stats_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
+ UPDATE_PROFILE_STATS (frame, REMOVEXATTR);
+ STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, xdata);
return 0;
}
-int32_t
-io_stats_access_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+int
+io_stats_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
+ UPDATE_PROFILE_STATS (frame, FSETXATTR);
+ STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xdata);
return 0;
}
-int32_t
-io_stats_ftruncate_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+
+int
+io_stats_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ UPDATE_PROFILE_STATS (frame, FGETXATTR);
+ STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict, xdata);
return 0;
}
-int32_t
-io_stats_fstat_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+
+int
+io_stats_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ UPDATE_PROFILE_STATS (frame, FREMOVEXATTR);
+ STACK_UNWIND_STRICT (fremovexattr, frame, op_ret, op_errno, xdata);
return 0;
}
-int32_t
-io_stats_lk_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct flock *lock)
+
+int
+io_stats_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, lock);
+ UPDATE_PROFILE_STATS (frame, FSYNCDIR);
+ STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno, xdata);
return 0;
}
-int32_t
-io_stats_setdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+int
+io_stats_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
+ UPDATE_PROFILE_STATS (frame, ACCESS);
+ STACK_UNWIND_STRICT (access, frame, op_ret, op_errno, xdata);
return 0;
}
-int32_t
-io_stats_entrylk_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+
+int
+io_stats_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
+ UPDATE_PROFILE_STATS (frame, FTRUNCATE);
+ STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
return 0;
}
-int32_t
-io_stats_xattrop_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict)
+int
+io_stats_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, dict);
+ UPDATE_PROFILE_STATS (frame, FSTAT);
+ STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf, xdata);
return 0;
}
-int32_t
-io_stats_fxattrop_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict)
+
+int
+io_stats_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS(frame, FALLOCATE);
+ STACK_UNWIND_STRICT(fallocate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
+
+
+int
+io_stats_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, dict);
+ UPDATE_PROFILE_STATS(frame, DISCARD);
+ STACK_UNWIND_STRICT(discard, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
+
+int
+io_stats_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS(frame, ZEROFILL);
+ STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
return 0;
}
-int32_t
-io_stats_inodelk_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+int
+io_stats_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
+ UPDATE_PROFILE_STATS (frame, LK);
+ STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, lock, xdata);
return 0;
}
-int32_t
+int
+io_stats_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS (frame, ENTRYLK);
+ STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+
+int
+io_stats_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS (frame, XATTROP);
+ STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+
+int
+io_stats_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS (frame, FXATTROP);
+ STACK_UNWIND_STRICT (fxattrop, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+
+int
+io_stats_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS (frame, INODELK);
+ STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
io_stats_entrylk (call_frame_t *frame, xlator_t *this,
const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
- BUMP_HIT(ENTRYLK);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_entrylk_cbk,
+ STACK_WIND (frame, io_stats_entrylk_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->entrylk,
- volume, loc, basename, cmd, type);
+ volume, loc, basename, cmd, type, xdata);
return 0;
}
-int32_t
-io_stats_inodelk (call_frame_t *frame,
- xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd, struct flock *flock)
+
+int
+io_stats_inodelk (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
- BUMP_HIT(INODELK);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_inodelk_cbk,
+ STACK_WIND (frame, io_stats_inodelk_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->inodelk,
- volume, loc, cmd, flock);
+ volume, loc, cmd, flock, xdata);
return 0;
}
-int32_t
-io_stats_finodelk_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+int
+io_stats_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
+ UPDATE_PROFILE_STATS (frame, FINODELK);
+ STACK_UNWIND_STRICT (finodelk, frame, op_ret, op_errno, xdata);
return 0;
}
-int32_t
-io_stats_finodelk (call_frame_t *frame,
- xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct flock *flock)
+
+int
+io_stats_finodelk (call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
- BUMP_HIT(FINODELK);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_finodelk_cbk,
+ STACK_WIND (frame, io_stats_finodelk_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->finodelk,
- volume, fd, cmd, flock);
+ volume, fd, cmd, flock, xdata);
return 0;
}
-int32_t
-io_stats_xattrop (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- gf_xattrop_flags_t flags,
- dict_t *dict)
+int
+io_stats_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
- BUMP_HIT(XATTROP);
+ START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_xattrop_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->xattrop,
- loc, flags, dict);
-
+ loc, flags, dict, xdata);
return 0;
}
-int32_t
-io_stats_fxattrop (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- gf_xattrop_flags_t flags,
- dict_t *dict)
+
+int
+io_stats_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
- BUMP_HIT(FXATTROP);
+ START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_fxattrop_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fxattrop,
- fd, flags, dict);
-
+ fd, flags, dict, xdata);
return 0;
}
-int32_t
-io_stats_lookup (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *xattr_req)
+
+int
+io_stats_lookup (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *xdata)
{
- BUMP_HIT(LOOKUP);
+ START_FOP_LATENCY (frame);
STACK_WIND (frame, io_stats_lookup_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->lookup,
- loc, xattr_req);
-
+ loc, xdata);
return 0;
}
-int32_t
-io_stats_stat (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
+
+int
+io_stats_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- BUMP_HIT(STAT);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_stat_cbk,
+ STACK_WIND (frame, io_stats_stat_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->stat,
- loc);
-
+ loc, xdata);
return 0;
}
-int32_t
-io_stats_readlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- size_t size)
+
+int
+io_stats_readlink (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, size_t size, dict_t *xdata)
{
- BUMP_HIT(READLINK);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_readlink_cbk,
+ STACK_WIND (frame, io_stats_readlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readlink,
- loc,
- size);
-
+ loc, size, xdata);
return 0;
}
-int32_t
-io_stats_mknod (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode,
- dev_t dev)
+
+int
+io_stats_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ mode_t mode, dev_t dev, mode_t umask, dict_t *xdata)
{
- BUMP_HIT(MKNOD);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_mknod_cbk,
+ STACK_WIND (frame, io_stats_mknod_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mknod,
- loc,
- mode,
- dev);
-
+ loc, mode, dev, umask, xdata);
return 0;
}
-int32_t
-io_stats_mkdir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode)
+
+int
+io_stats_mkdir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata)
{
- BUMP_HIT(MKDIR);
+ frame->local = gf_strdup (loc->path);
- STACK_WIND (frame,
- io_stats_mkdir_cbk,
+ START_FOP_LATENCY (frame);
+
+ STACK_WIND (frame, io_stats_mkdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->mkdir,
- loc,
- mode);
+ loc, mode, umask, xdata);
return 0;
}
-int32_t
-io_stats_unlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
+
+int
+io_stats_unlink (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int xflag, dict_t *xdata)
{
- BUMP_HIT(UNLINK);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_unlink_cbk,
+ STACK_WIND (frame, io_stats_unlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->unlink,
- loc);
+ loc, xflag, xdata);
return 0;
}
-int32_t
-io_stats_rmdir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
+
+int
+io_stats_rmdir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int flags, dict_t *xdata)
{
- BUMP_HIT(RMDIR);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_rmdir_cbk,
+ STACK_WIND (frame, io_stats_rmdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rmdir,
- loc);
-
+ loc, flags, xdata);
return 0;
}
-int32_t
-io_stats_symlink (call_frame_t *frame,
- xlator_t *this,
- const char *linkpath,
- loc_t *loc)
+
+int
+io_stats_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
- BUMP_HIT(SYMLINK);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_symlink_cbk,
+ STACK_WIND (frame, io_stats_symlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->symlink,
- linkpath,
- loc);
-
+ linkpath, loc, umask, xdata);
return 0;
}
-int32_t
-io_stats_rename (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc)
+
+int
+io_stats_rename (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
- BUMP_HIT(RENAME);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_rename_cbk,
+ STACK_WIND (frame, io_stats_rename_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rename,
- oldloc,
- newloc);
-
+ oldloc, newloc, xdata);
return 0;
}
-int32_t
-io_stats_link (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc)
+
+int
+io_stats_link (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
- BUMP_HIT(LINK);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_link_cbk,
+ STACK_WIND (frame, io_stats_link_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->link,
- oldloc,
- newloc);
+ oldloc, newloc, xdata);
return 0;
}
-int32_t
-io_stats_chmod (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode)
+
+int
+io_stats_setattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- BUMP_HIT(CHMOD);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_chmod_cbk,
+ STACK_WIND (frame, io_stats_setattr_cbk,
FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->chmod,
- loc,
- mode);
-
+ FIRST_CHILD(this)->fops->setattr,
+ loc, stbuf, valid, xdata);
return 0;
}
-int32_t
-io_stats_chown (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- uid_t uid,
- gid_t gid)
+
+int
+io_stats_truncate (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, off_t offset, dict_t *xdata)
{
- BUMP_HIT(CHOWN);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_chown_cbk,
+ STACK_WIND (frame, io_stats_truncate_cbk,
FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->chown,
- loc,
- uid,
- gid);
-
+ FIRST_CHILD(this)->fops->truncate,
+ loc, offset, xdata);
return 0;
}
-int32_t
-io_stats_truncate (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- off_t offset)
+
+int
+io_stats_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flags, fd_t *fd, dict_t *xdata)
{
- BUMP_HIT(TRUNCATE);
+ frame->local = gf_strdup (loc->path);
- STACK_WIND (frame,
- io_stats_truncate_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate,
- loc,
- offset);
+ START_FOP_LATENCY (frame);
+ STACK_WIND (frame, io_stats_open_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open,
+ loc, flags, fd, xdata);
return 0;
}
-int32_t
-io_stats_utimens (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- struct timespec tv[2])
+
+int
+io_stats_create (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t flags, mode_t mode,
+ mode_t umask, fd_t *fd, dict_t *xdata)
{
- BUMP_HIT(UTIMENS);
+ frame->local = gf_strdup (loc->path);
- STACK_WIND (frame,
- io_stats_utimens_cbk,
+ START_FOP_LATENCY (frame);
+
+ STACK_WIND (frame, io_stats_create_cbk,
FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->utimens,
- loc,
- tv);
+ FIRST_CHILD(this)->fops->create,
+ loc, flags, mode, umask, fd, xdata);
+ return 0;
+}
+
+int
+io_stats_readv (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata)
+{
+ frame->local = fd;
+
+ START_FOP_LATENCY (frame);
+
+ STACK_WIND (frame, io_stats_readv_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv,
+ fd, size, offset, flags, xdata);
return 0;
}
-int32_t
-io_stats_open (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags,
- fd_t *fd)
+
+int
+io_stats_writev (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, struct iovec *vector,
+ int32_t count, off_t offset,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
- BUMP_HIT(OPEN);
+ int len = 0;
+
+ if (fd->inode)
+ frame->local = fd->inode;
+ len = iov_length (vector, count);
- STACK_WIND (frame,
- io_stats_open_cbk,
+ BUMP_WRITE (fd, len);
+ START_FOP_LATENCY (frame);
+
+ STACK_WIND (frame, io_stats_writev_cbk,
FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open,
- loc,
- flags,
- fd);
+ FIRST_CHILD(this)->fops->writev,
+ fd, vector, count, offset, flags, iobref, xdata);
return 0;
+
}
-int32_t
-io_stats_create (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags,
- mode_t mode,
- fd_t *fd)
+
+int
+io_stats_statfs (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *xdata)
{
- BUMP_HIT(CREATE);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_create_cbk,
+ STACK_WIND (frame, io_stats_statfs_cbk,
FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create,
- loc,
- flags,
- mode,
- fd);
+ FIRST_CHILD(this)->fops->statfs,
+ loc, xdata);
return 0;
}
-int32_t
-io_stats_readv (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset)
+
+int
+io_stats_flush (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, dict_t *xdata)
{
- BUMP_HIT(READ);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_readv_cbk,
+ STACK_WIND (frame, io_stats_flush_cbk,
FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readv,
- fd,
- size,
- offset);
+ FIRST_CHILD(this)->fops->flush,
+ fd, xdata);
return 0;
}
-int32_t
-io_stats_writev (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iovec *vector,
- int32_t count,
- off_t offset,
- struct iobref *iobref)
+
+int
+io_stats_fsync (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int32_t flags, dict_t *xdata)
{
- int i = 0;
- io_stats_private_t *priv = NULL;
+ START_FOP_LATENCY (frame);
- priv = this->private;
+ STACK_WIND (frame, io_stats_fsync_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsync,
+ fd, flags, xdata);
+ return 0;
+}
- BUMP_HIT(WRITE);
- for (i=0; i < GF_IO_STAT_BLK_SIZE_MAX; i++) {
- if (priv->write[i].size > iov_length (vector, count)) {
- break;
+int
+conditional_dump (dict_t *dict, char *key, data_t *value, void *data)
+{
+ struct {
+ xlator_t *this;
+ inode_t *inode;
+ const char *path;
+ } *stub;
+ xlator_t *this = NULL;
+ char *filename = NULL;
+ FILE *logfp = NULL;
+ struct ios_dump_args args = {0};
+
+ stub = data;
+ this = stub->this;
+
+ filename = alloca (value->len + 1);
+ memset (filename, 0, value->len + 1);
+ memcpy (filename, data_to_str (value), value->len);
+
+ if (fnmatch ("*io*stat*dump", key, 0) == 0) {
+
+ if (!strncmp (filename, "", 1)) {
+ gf_log (this->name, GF_LOG_ERROR, "No filename given");
+ return -1;
+ }
+ logfp = fopen (filename, "w+");
+ if (!logfp) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to open %s "
+ "for writing", filename);
+ return -1;
}
+ (void) ios_dump_args_init (&args, IOS_DUMP_TYPE_FILE,
+ logfp);
+ io_stats_dump (this, &args);
+ fclose (logfp);
}
- priv->write[i].hits++;
-
- STACK_WIND (frame,
- io_stats_writev_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev,
- fd,
- vector,
- count,
- offset,
- iobref);
return 0;
}
-int32_t
-io_stats_statfs (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
+
+int
+io_stats_setxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
{
- BUMP_HIT(STATFS);
+ struct {
+ xlator_t *this;
+ inode_t *inode;
+ const char *path;
+ } stub;
- STACK_WIND (frame,
- io_stats_statfs_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->statfs,
- loc);
+ stub.this = this;
+ stub.inode = loc->inode;
+ stub.path = loc->path;
+
+ dict_foreach (dict, conditional_dump, &stub);
+
+ START_FOP_LATENCY (frame);
+
+ STACK_WIND (frame, io_stats_setxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr,
+ loc, dict, flags, xdata);
return 0;
}
-int32_t
-io_stats_flush (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd)
+
+int
+io_stats_getxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata)
{
- BUMP_HIT(FLUSH);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_flush_cbk,
+ STACK_WIND (frame, io_stats_getxattr_cbk,
FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush,
- fd);
+ FIRST_CHILD(this)->fops->getxattr,
+ loc, name, xdata);
return 0;
}
-int32_t
-io_stats_fsync (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t flags)
+int
+io_stats_removexattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata)
{
- BUMP_HIT(FSYNC);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_fsync_cbk,
+ STACK_WIND (frame, io_stats_removexattr_cbk,
FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsync,
- fd,
- flags);
+ FIRST_CHILD(this)->fops->removexattr,
+ loc, name, xdata);
return 0;
}
-int32_t
-io_stats_setxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *dict,
- int32_t flags)
+
+int
+io_stats_fsetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
{
- BUMP_HIT(SETXATTR);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_setxattr_cbk,
+ STACK_WIND (frame, io_stats_fsetxattr_cbk,
FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr,
- loc,
- dict,
- flags);
+ FIRST_CHILD(this)->fops->fsetxattr,
+ fd, dict, flags, xdata);
return 0;
}
-int32_t
-io_stats_getxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name)
+
+int
+io_stats_fgetxattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata)
{
- BUMP_HIT(GETXATTR);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_getxattr_cbk,
+ STACK_WIND (frame, io_stats_fgetxattr_cbk,
FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr,
- loc,
- name);
+ FIRST_CHILD(this)->fops->fgetxattr,
+ fd, name, xdata);
return 0;
}
-int32_t
-io_stats_removexattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name)
+
+int
+io_stats_fremovexattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata)
{
- BUMP_HIT(REMOVEXATTR);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_removexattr_cbk,
+ STACK_WIND (frame, io_stats_fremovexattr_cbk,
FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->removexattr,
- loc,
- name);
-
+ FIRST_CHILD(this)->fops->fremovexattr,
+ fd, name, xdata);
return 0;
}
-int32_t
-io_stats_opendir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- fd_t *fd)
+
+int
+io_stats_opendir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, fd_t *fd, dict_t *xdata)
{
- BUMP_HIT(OPENDIR);
- STACK_WIND (frame,
- io_stats_opendir_cbk,
+ START_FOP_LATENCY (frame);
+
+ STACK_WIND (frame, io_stats_opendir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->opendir,
- loc,
- fd);
+ loc, fd, xdata);
return 0;
}
-int32_t
-io_stats_getdents (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset,
- int32_t flag)
+int
+io_stats_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *dict)
{
- BUMP_HIT(GETDENTS);
+ frame->local = fd->inode;
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_getdents_cbk,
+ STACK_WIND (frame, io_stats_readdirp_cbk,
FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getdents,
- fd,
- size,
- offset,
- flag);
+ FIRST_CHILD(this)->fops->readdirp,
+ fd, size, offset, dict);
return 0;
}
-int32_t
-io_stats_readdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset)
+int
+io_stats_readdir (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t offset, dict_t *xdata)
{
- BUMP_HIT(READDIR);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_readdir_cbk,
+ STACK_WIND (frame, io_stats_readdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readdir,
- fd,
- size,
- offset);
-
+ fd, size, offset, xdata);
return 0;
}
-int32_t
-io_stats_fsyncdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t datasync)
+int
+io_stats_fsyncdir (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int32_t datasync, dict_t *xdata)
{
- BUMP_HIT(FSYNCDIR);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_fsyncdir_cbk,
+ STACK_WIND (frame, io_stats_fsyncdir_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fsyncdir,
- fd,
- datasync);
+ fd, datasync, xdata);
return 0;
}
-int32_t
-io_stats_access (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t mask)
+
+int
+io_stats_access (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t mask, dict_t *xdata)
{
- BUMP_HIT(ACCESS);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_access_cbk,
+ STACK_WIND (frame, io_stats_access_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->access,
- loc,
- mask);
+ loc, mask, xdata);
return 0;
}
-int32_t
-io_stats_ftruncate (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- off_t offset)
+
+int
+io_stats_ftruncate (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, off_t offset, dict_t *xdata)
{
- BUMP_HIT(FTRUNCATE);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_ftruncate_cbk,
+ STACK_WIND (frame, io_stats_ftruncate_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->ftruncate,
- fd,
- offset);
-
+ fd, offset, xdata);
return 0;
}
-int32_t
-io_stats_fchown (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- uid_t uid,
- gid_t gid)
+
+int
+io_stats_fsetattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- BUMP_HIT(FCHOWN);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_fchown_cbk,
+ STACK_WIND (frame, io_stats_setattr_cbk,
FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fchown,
- fd,
- uid,
- gid);
+ FIRST_CHILD(this)->fops->fsetattr,
+ fd, stbuf, valid, xdata);
return 0;
}
-int32_t
-io_stats_fchmod (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- mode_t mode)
+
+int
+io_stats_fstat (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, dict_t *xdata)
{
- BUMP_HIT(FCHMOD);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_fchmod_cbk,
+ STACK_WIND (frame, io_stats_fstat_cbk,
FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fchmod,
- fd,
- mode);
+ FIRST_CHILD(this)->fops->fstat,
+ fd, xdata);
return 0;
}
-int32_t
-io_stats_fstat (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd)
+
+int
+io_stats_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
{
- BUMP_HIT(FSTAT);
+ START_FOP_LATENCY(frame);
- STACK_WIND (frame,
- io_stats_fstat_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat,
- fd);
- return 0;
+ STACK_WIND(frame, io_stats_fallocate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len,
+ xdata);
+
+ return 0;
}
-int32_t
-io_stats_lk (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t cmd,
- struct flock *lock)
+
+int
+io_stats_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
{
- BUMP_HIT(LK);
+ START_FOP_LATENCY(frame);
+
+ STACK_WIND(frame, io_stats_discard_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata);
+
+ return 0;
+}
+
+int
+io_stats_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ START_FOP_LATENCY(frame);
+
+ STACK_WIND(frame, io_stats_zerofill_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata);
- STACK_WIND (frame,
- io_stats_lk_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lk,
- fd,
- cmd,
- lock);
return 0;
}
-int32_t
-io_stats_setdents (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t flags,
- dir_entry_t *entries,
- int32_t count)
+
+int
+io_stats_lk (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
{
- BUMP_HIT(SETDENTS);
+ START_FOP_LATENCY (frame);
- STACK_WIND (frame,
- io_stats_setdents_cbk,
+ STACK_WIND (frame, io_stats_lk_cbk,
FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setdents,
- fd,
- flags,
- entries,
- count);
+ FIRST_CHILD(this)->fops->lk,
+ fd, cmd, lock, xdata);
return 0;
}
-int32_t
-io_stats_checksum_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- uint8_t *fchecksum,
- uint8_t *dchecksum)
+int
+io_stats_release (xlator_t *this, fd_t *fd)
{
- STACK_UNWIND (frame, op_ret, op_errno, fchecksum, dchecksum);
+ struct ios_fd *iosfd = NULL;
+ struct ios_conf *conf = NULL;
+
+ BUMP_FOP (RELEASE);
+
+ conf = this->private;
+
+ LOCK (&conf->lock);
+ {
+ conf->cumulative.nr_opens--;
+ }
+ UNLOCK (&conf->lock);
+
+ ios_fd_ctx_get (fd, this, &iosfd);
+ if (iosfd) {
+ io_stats_dump_fd (this, iosfd);
+
+ GF_FREE (iosfd->filename);
+ GF_FREE (iosfd);
+ }
return 0;
}
-int32_t
-io_stats_checksum (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flag)
+
+int
+io_stats_releasedir (xlator_t *this, fd_t *fd)
{
- STACK_WIND (frame,
- io_stats_checksum_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->checksum,
- loc,
- flag);
+ BUMP_FOP (RELEASEDIR);
return 0;
}
-int32_t
-io_stats_stats_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct xlator_stats *stats)
+int
+io_stats_forget (xlator_t *this, inode_t *inode)
{
- STACK_UNWIND (frame, op_ret, op_errno, stats);
+ BUMP_FOP (FORGET);
+ ios_stats_cleanup (this, inode);
return 0;
}
-int32_t
-io_stats_stats (call_frame_t *frame,
- xlator_t *this,
- int32_t flags)
+static int
+ios_init_top_stats (struct ios_conf *conf)
{
- STACK_WIND (frame,
- io_stats_stats_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->mops->stats,
- flags);
+ int i = 0;
+
+ GF_ASSERT (conf);
+
+ for (i = 0; i <IOS_STATS_TYPE_MAX; i++) {
+ conf->list[i].iosstats = GF_CALLOC (1,
+ sizeof(*conf->list[i].iosstats),
+ gf_io_stats_mt_ios_stat);
+
+ if (!conf->list[i].iosstats)
+ return -1;
+
+ INIT_LIST_HEAD(&conf->list[i].iosstats->list);
+ LOCK_INIT (&conf->list[i].lock);
+ }
+
+ for (i = 0; i < IOS_STATS_THRU_MAX; i ++) {
+ conf->thru_list[i].iosstats = GF_CALLOC (1,
+ sizeof (*conf->thru_list[i].iosstats),
+ gf_io_stats_mt_ios_stat);
+
+ if (!conf->thru_list[i].iosstats)
+ return -1;
+
+ INIT_LIST_HEAD(&conf->thru_list[i].iosstats->list);
+ LOCK_INIT (&conf->thru_list[i].lock);
+ }
return 0;
}
-void
-enable_all_calls (io_stats_private_t *priv, int enabled)
+static void
+ios_destroy_top_stats (struct ios_conf *conf)
{
- int i;
- for (i = 0; i < GF_FOP_MAXVALUE; i++)
- priv->fop_records[i].enabled = enabled;
-}
+ int i = 0;
+ struct ios_stat_head *list_head = NULL;
+ struct ios_stat_list *entry = NULL;
+ struct ios_stat_list *tmp = NULL;
+ struct ios_stat_list *list = NULL;
+ struct ios_stat *stat = NULL;
-void
-enable_call (io_stats_private_t *priv, const char *name, int enabled)
-{
- int i;
- for (i = 0; i < GF_FOP_MAXVALUE; i++) {
- if (!priv->fop_records[i].name)
+ GF_ASSERT (conf);
+
+ LOCK (&conf->lock);
+
+ conf->cumulative.nr_opens = 0;
+ conf->cumulative.max_nr_opens = 0;
+ conf->cumulative.max_openfd_time.tv_sec = 0;
+ conf->cumulative.max_openfd_time.tv_usec = 0;
+
+ for (i = 0; i < IOS_STATS_TYPE_MAX; i++) {
+ list_head = &conf->list[i];
+ if (!list_head)
+ continue;
+ list_for_each_entry_safe (entry, tmp,
+ &list_head->iosstats->list, list) {
+ list = entry;
+ stat = list->iosstat;
+ ios_stat_unref (stat);
+ list_del (&list->list);
+ GF_FREE (list);
+ list_head->members--;
+ }
+ }
+
+ for (i = 0; i < IOS_STATS_THRU_MAX; i++) {
+ list_head = &conf->thru_list[i];
+ if (!list_head)
continue;
- if (!strcasecmp(priv->fop_records[i].name, name))
- priv->fop_records[i].enabled = enabled;
+ list_for_each_entry_safe (entry, tmp,
+ &list_head->iosstats->list, list) {
+ list = entry;
+ stat = list->iosstat;
+ ios_stat_unref (stat);
+ list_del (&list->list);
+ GF_FREE (list);
+ list_head->members--;
+ }
}
-}
+ UNLOCK (&conf->lock);
-/*
- include = 1 for "include-ops"
- = 0 for "exclude-ops"
-*/
-void
-process_call_list (io_stats_private_t *priv, const char *list, int include)
+ return;
+}
+
+int
+reconfigure (xlator_t *this, dict_t *options)
{
- enable_all_calls (priv, include ? 0 : 1);
+ struct ios_conf *conf = NULL;
+ int ret = -1;
+ char *sys_log_str = NULL;
+ int sys_log_level = -1;
+ char *log_str = NULL;
+ int log_level = -1;
+
+ if (!this || !this->private)
+ goto out;
+
+ conf = this->private;
+
+ GF_OPTION_RECONF ("dump-fd-stats", conf->dump_fd_stats, options, bool,
+ out);
+
+ GF_OPTION_RECONF ("count-fop-hits", conf->count_fop_hits, options, bool,
+ out);
+
+ GF_OPTION_RECONF ("latency-measurement", conf->measure_latency,
+ options, bool, out);
+
+ GF_OPTION_RECONF ("sys-log-level", sys_log_str, options, str, out);
+ if (sys_log_str) {
+ sys_log_level = glusterd_check_log_level (sys_log_str);
+ set_sys_log_level (sys_log_level);
+ }
- char *call = strsep ((char **)&list, ",");
- while (call) {
- enable_call (priv, call, include);
- call = strsep ((char **)&list, ",");
+ GF_OPTION_RECONF ("log-level", log_str, options, str, out);
+ if (log_str) {
+ log_level = glusterd_check_log_level (log_str);
+ gf_log_set_loglevel (log_level);
}
+
+ ret = 0;
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "reconfigure returning %d", ret);
+ return ret;
}
int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_io_stats_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
+ " failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+int
init (xlator_t *this)
{
- dict_t *options = this->options;
- char *includes = NULL, *excludes = NULL;
- io_stats_private_t *priv = NULL;
- size_t size = 0;
- int i = 0;
+ struct ios_conf *conf = NULL;
+ char *sys_log_str = NULL;
+ int sys_log_level = -1;
+ char *log_str = NULL;
+ int log_level = -1;
+ int ret = -1;
if (!this)
return -1;
- if (!this->children || this->children->next) {
+ if (!this->children) {
gf_log (this->name, GF_LOG_ERROR,
- "io_stats translator requires one subvolume");
+ "io_stats translator requires atleast one subvolume");
return -1;
}
+
if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
+ /* This is very much valid as io-stats currently is loaded
+ * on top of volumes on both client and server, hence this is
+ * not an warning message */
+ gf_log (this->name, GF_LOG_DEBUG,
"dangling volume. check volfile ");
}
- priv = CALLOC (1, sizeof(*priv));
- ERR_ABORT (priv);
+ conf = GF_CALLOC (1, sizeof(*conf), gf_io_stats_mt_ios_conf);
- includes = data_to_str (dict_get (options, "include-ops"));
- excludes = data_to_str (dict_get (options, "exclude-ops"));
-
- for (i = 0; i < GF_FOP_MAXVALUE; i++) {
- priv->fop_records[i].name = gf_fop_list[i];
- priv->fop_records[i].enabled = 1;
- }
-
- if (includes && excludes) {
+ if (!conf) {
gf_log (this->name, GF_LOG_ERROR,
- "must specify only one of 'include-ops' and "
- "'exclude-ops'");
+ "Out of memory.");
return -1;
}
- if (includes)
- process_call_list (priv, includes, 1);
- if (excludes)
- process_call_list (priv, excludes, 0);
-
- LOCK_INIT (&priv->lock);
-
- /* Set this translator's inode table pointer to child node's pointer. */
- size = GF_UNIT_KB;
- for (i=0; i < GF_IO_STAT_BLK_SIZE_MAX; i++) {
- priv->read[i].size = size;
- priv->write[i].size = size;
- size *= 2;
+
+ LOCK_INIT (&conf->lock);
+
+ gettimeofday (&conf->cumulative.started_at, NULL);
+ gettimeofday (&conf->incremental.started_at, NULL);
+
+ ret = ios_init_top_stats (conf);
+ if (ret)
+ return -1;
+
+ GF_OPTION_INIT ("dump-fd-stats", conf->dump_fd_stats, bool, out);
+
+ GF_OPTION_INIT ("count-fop-hits", conf->count_fop_hits, bool, out);
+
+ GF_OPTION_INIT ("latency-measurement", conf->measure_latency,
+ bool, out);
+
+ GF_OPTION_INIT ("sys-log-level", sys_log_str, str, out);
+ if (sys_log_str) {
+ sys_log_level = glusterd_check_log_level (sys_log_str);
+ set_sys_log_level (sys_log_level);
}
- this->itable = FIRST_CHILD (this)->itable;
- this->private = priv;
+ GF_OPTION_INIT ("log-level", log_str, str, out);
+ if (log_str) {
+ log_level = glusterd_check_log_level (log_str);
+ gf_log_set_loglevel (log_level);
+ }
- return 0;
+ this->private = conf;
+ ret = 0;
+out:
+ return ret;
}
+
void
fini (xlator_t *this)
{
- io_stats_private_t *priv = NULL;
+ struct ios_conf *conf = NULL;
if (!this)
return;
- priv = this->private;
- if (priv) {
- LOCK_DESTROY (&priv->lock);
- FREE (priv);
- }
+ conf = this->private;
+
+ if (!conf)
+ return;
+ this->private = NULL;
+
+ ios_destroy_top_stats (conf);
+
+ GF_FREE(conf);
- gf_log (this->name, GF_LOG_NORMAL,
+ gf_log (this->name, GF_LOG_INFO,
"io-stats translator unloaded");
return;
}
+int
+notify (xlator_t *this, int32_t event, void *data, ...)
+{
+ int ret = 0;
+ struct ios_dump_args args = {0};
+ dict_t *output = NULL;
+ dict_t *dict = NULL;
+ int32_t top_op = 0;
+ int32_t list_cnt = 0;
+ double throughput = 0;
+ double time = 0;
+ va_list ap;
+
+ dict = data;
+ va_start (ap, data);
+ output = va_arg (ap, dict_t*);
+ va_end (ap);
+ switch (event) {
+ case GF_EVENT_TRANSLATOR_INFO:
+ ret = dict_get_str_boolean (dict, "clear-stats", _gf_false);
+ if (ret) {
+ ret = dict_set_int32 (output, "top-op", top_op);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set top-op in dict");
+ goto out;
+ }
+ ios_destroy_top_stats (this->private);
+ ret = ios_init_top_stats (this->private);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to reset top stats");
+ ret = dict_set_int32 (output, "stats-cleared",
+ ret ? 0 : 1);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set stats-cleared"
+ " in dict");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "top-op", &top_op);
+ if (!ret) {
+ ret = dict_get_int32 (dict, "list-cnt", &list_cnt);
+ if (top_op > IOS_STATS_TYPE_NONE &&
+ top_op < IOS_STATS_TYPE_MAX)
+ ret = io_stats_dump_stats_to_dict (this, output,
+ top_op, list_cnt);
+ if (top_op == IOS_STATS_TYPE_READ_THROUGHPUT ||
+ top_op == IOS_STATS_TYPE_WRITE_THROUGHPUT) {
+ ret = dict_get_double (dict, "throughput",
+ &throughput);
+ if (!ret) {
+ ret = dict_get_double (dict, "time",
+ &time);
+ if (ret)
+ goto out;
+ ret = dict_set_double (output,
+ "throughput", throughput);
+ if (ret)
+ goto out;
+ ret = dict_set_double (output, "time",
+ time);
+ if (ret)
+ goto out;
+ }
+ ret = 0;
+
+ }
+ } else {
+ (void) ios_dump_args_init (&args, IOS_DUMP_TYPE_DICT,
+ output);
+ ret = io_stats_dump (this, &args);
+ }
+ break;
+ default:
+ default_notify (this, event, data);
+ break;
+
+ }
+out:
+ return ret;
+}
+
struct xlator_fops fops = {
.stat = io_stats_stat,
.readlink = io_stats_readlink,
@@ -1513,10 +2860,7 @@ struct xlator_fops fops = {
.symlink = io_stats_symlink,
.rename = io_stats_rename,
.link = io_stats_link,
- .chmod = io_stats_chmod,
- .chown = io_stats_chown,
.truncate = io_stats_truncate,
- .utimens = io_stats_utimens,
.open = io_stats_open,
.readv = io_stats_readv,
.writev = io_stats_writev,
@@ -1526,42 +2870,81 @@ struct xlator_fops fops = {
.setxattr = io_stats_setxattr,
.getxattr = io_stats_getxattr,
.removexattr = io_stats_removexattr,
+ .fsetxattr = io_stats_fsetxattr,
+ .fgetxattr = io_stats_fgetxattr,
+ .fremovexattr = io_stats_fremovexattr,
.opendir = io_stats_opendir,
.readdir = io_stats_readdir,
+ .readdirp = io_stats_readdirp,
.fsyncdir = io_stats_fsyncdir,
.access = io_stats_access,
.ftruncate = io_stats_ftruncate,
.fstat = io_stats_fstat,
.create = io_stats_create,
- .fchown = io_stats_fchown,
- .fchmod = io_stats_fchmod,
.lk = io_stats_lk,
.inodelk = io_stats_inodelk,
.finodelk = io_stats_finodelk,
.entrylk = io_stats_entrylk,
.lookup = io_stats_lookup,
- .setdents = io_stats_setdents,
- .getdents = io_stats_getdents,
- .checksum = io_stats_checksum,
.xattrop = io_stats_xattrop,
.fxattrop = io_stats_fxattrop,
-};
-
-struct xlator_mops mops = {
- .stats = io_stats_stats,
+ .setattr = io_stats_setattr,
+ .fsetattr = io_stats_fsetattr,
+ .fallocate = io_stats_fallocate,
+ .discard = io_stats_discard,
+ .zerofill = io_stats_zerofill,
};
struct xlator_cbks cbks = {
+ .release = io_stats_release,
+ .releasedir = io_stats_releasedir,
+ .forget = io_stats_forget,
};
struct volume_options options[] = {
- { .key = {"include-ops", "include"},
+ { .key = {"dump-fd-stats"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "If on stats related to file-operations would be "
+ "tracked inside GlusterFS data-structures."
+ },
+ { .key = { "latency-measurement" },
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "If on stats related to the latency of each operation "
+ "would be tracked inside GlusterFS data-structures. "
+ },
+ { .key = {"count-fop-hits"},
+ .type = GF_OPTION_TYPE_BOOL,
+ },
+ { .key = {"log-level"},
.type = GF_OPTION_TYPE_STR,
- /*.value = { ""} */
+ .value = { "DEBUG", "WARNING", "ERROR", "INFO",
+ "CRITICAL", "NONE", "TRACE"}
},
- { .key = {"exclude-ops", "exclude"},
- .type = GF_OPTION_TYPE_STR
- /*.value = { ""} */
+
+ /* These are synthetic entries to assist validation of CLI's *
+ * volume set command */
+ { .key = {"client-log-level"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "INFO",
+ .description = "Changes the log-level of the clients",
+ .value = { "DEBUG", "WARNING", "ERROR", "INFO",
+ "CRITICAL", "NONE", "TRACE"}
+ },
+ { .key = {"sys-log-level"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "CRITICAL",
+ .description = "Gluster's syslog log-level",
+ .value = { "WARNING", "ERROR", "INFO", "CRITICAL"}
+ },
+ { .key = {"brick-log-level"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "INFO",
+ .description = "Changes the log-level of the bricks",
+ .value = { "DEBUG", "WARNING", "ERROR", "INFO",
+ "CRITICAL", "NONE", "TRACE"}
},
{ .key = {NULL} },
+
};
diff --git a/xlators/debug/trace/src/Makefile.am b/xlators/debug/trace/src/Makefile.am
index 0f1679a04..7b2597b4d 100644
--- a/xlators/debug/trace/src/Makefile.am
+++ b/xlators/debug/trace/src/Makefile.am
@@ -2,13 +2,15 @@
xlator_LTLIBRARIES = trace.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/debug
-trace_la_LDFLAGS = -module -avoidversion
+trace_la_LDFLAGS = -module -avoid-version
trace_la_SOURCES = trace.c
trace_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+noinst_HEADERS = trace.h trace-mem-types.h
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/debug/trace/src/trace-mem-types.h b/xlators/debug/trace/src/trace-mem-types.h
new file mode 100644
index 000000000..9fa7d97c2
--- /dev/null
+++ b/xlators/debug/trace/src/trace-mem-types.h
@@ -0,0 +1,21 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef __TRACE_MEM_TYPES_H__
+#define __TRACE_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_trace_mem_types_ {
+ gf_trace_mt_trace_conf_t = gf_common_mt_end + 1,
+ gf_trace_mt_end
+};
+#endif
diff --git a/xlators/debug/trace/src/trace.c b/xlators/debug/trace/src/trace.c
index 2c10c7515..c9d839356 100644
--- a/xlators/debug/trace/src/trace.c
+++ b/xlators/debug/trace/src/trace.c
@@ -1,2330 +1,3274 @@
/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+#include "trace.h"
+#include "trace-mem-types.h"
/**
* xlators/debug/trace :
- * This translator logs all the arguments to the fops/mops and also
- * their _cbk functions, which later passes the call to next layer.
+ * This translator logs all the arguments to the fops/mops and also
+ * their _cbk functions, which later passes the call to next layer.
* Very helpful translator for debugging.
*/
-#include <time.h>
-#include <errno.h>
-#include "glusterfs.h"
-#include "xlator.h"
-#include "common-utils.h"
-
-#define ERR_EINVAL_NORETURN(cond) \
-do \
- { \
- if ((cond)) \
- { \
- gf_log ("ERROR", GF_LOG_ERROR, \
- "%s: %s: (%s) is true", \
- __FILE__, __FUNCTION__, #cond); \
- } \
- } while (0)
-
-typedef struct trace_private {
- int32_t debug_flag;
-} trace_private_t;
-
-struct {
- char *name;
- int enabled;
-} trace_fop_names[GF_FOP_MAXVALUE];
-
-int32_t
-trace_create_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd,
- inode_t *inode,
- struct stat *buf)
-{
- char atime_buf[256], mtime_buf[256], ctime_buf[256];
- ERR_EINVAL_NORETURN (!this);
-
- if (trace_fop_names[GF_FOP_CREATE].enabled) {
- if (op_ret >= 0) {
- strftime (atime_buf, 256, "[%b %d %H:%M:%S]",
- localtime (&buf->st_atime));
- strftime (mtime_buf, 256, "[%b %d %H:%M:%S]",
- localtime (&buf->st_mtime));
- strftime (ctime_buf, 256, "[%b %d %H:%M:%S]",
- localtime (&buf->st_ctime));
-
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, fd=%p, ino=%"PRIu64"), "
- "*buf {st_dev=%"GF_PRI_DEV", st_ino=%"PRIu64", "
- "st_mode=%o, st_nlink=%"GF_PRI_NLINK", st_uid=%d, "
- "st_gid=%d, st_rdev=%"GF_PRI_DEV", st_size=%"PRId64", "
- "st_blksize=%"GF_PRI_BLKSIZE", st_blocks=%"PRId64", "
- "st_atime=%s, st_mtime=%s, st_ctime=%s})",
- frame->root->unique, op_ret, fd, inode->ino, buf->st_dev,
- buf->st_ino, buf->st_mode, buf->st_nlink,
- buf->st_uid, buf->st_gid, buf->st_rdev, buf->st_size,
- buf->st_blksize,
- buf->st_blocks, atime_buf, mtime_buf, ctime_buf);
- } else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf);
- return 0;
-}
-
-int32_t
-trace_open_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- ERR_EINVAL_NORETURN (!this);
-
- if (trace_fop_names[GF_FOP_OPEN].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d, *fd=%p)",
- frame->root->unique, op_ret, op_errno, fd);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, fd);
- return 0;
-}
-
-int32_t
-trace_stat_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- char atime_buf[256], mtime_buf[256], ctime_buf[256];
- ERR_EINVAL_NORETURN (!this);
-
- if (trace_fop_names[GF_FOP_STAT].enabled) {
-
- if (op_ret >= 0) {
- strftime (atime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_atime));
- strftime (mtime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_mtime));
- strftime (ctime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_ctime));
-
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, buf {st_dev=%"GF_PRI_DEV", "
- "st_ino=%"PRIu64", st_mode=%o, st_nlink=%"GF_PRI_NLINK", "
- "st_uid=%d, st_gid=%d, st_rdev=%"GF_PRI_DEV", st_size=%"PRId64
- ", st_blksize=%"GF_PRI_BLKSIZE", st_blocks=%"PRId64", "
- "st_atime=%s, st_mtime=%s, st_ctime=%s})",
- frame->root->unique, op_ret, buf->st_dev, buf->st_ino,
- buf->st_mode, buf->st_nlink, buf->st_uid, buf->st_gid,
- buf->st_rdev, buf->st_size, buf->st_blksize,
- buf->st_blocks, atime_buf, mtime_buf, ctime_buf);
- } else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
-
-int32_t
-trace_readv_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iovec *vector,
- int32_t count,
- struct stat *buf,
- struct iobref *iobref)
-{
- char atime_buf[256], mtime_buf[256], ctime_buf[256];
- ERR_EINVAL_NORETURN (!this);
-
- if (trace_fop_names[GF_FOP_READ].enabled) {
-
- if (op_ret >= 0) {
- strftime (atime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_atime));
- strftime (mtime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_mtime));
- strftime (ctime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_ctime));
-
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, *buf {st_dev=%"GF_PRI_DEV", "
- "st_ino=%"PRIu64", st_mode=%o, st_nlink=%"GF_PRI_NLINK", "
- "st_uid=%d, st_gid=%d, st_rdev=%"GF_PRI_DEV", "
- "st_size=%"PRId64", st_blksize=%"GF_PRI_BLKSIZE", "
- "st_blocks=%"PRId64", st_atime=%s, st_mtime=%s, st_ctime=%s})",
- frame->root->unique, op_ret, buf->st_dev, buf->st_ino,
- buf->st_mode, buf->st_nlink, buf->st_uid, buf->st_gid,
- buf->st_rdev, buf->st_size, buf->st_blksize, buf->st_blocks,
- atime_buf, mtime_buf, ctime_buf);
- } else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, vector, count, buf, iobref);
- return 0;
-}
-
-int32_t
-trace_writev_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- char atime_buf[256], mtime_buf[256], ctime_buf[256];
- ERR_EINVAL_NORETURN (!this);
-
- if (trace_fop_names[GF_FOP_WRITE].enabled) {
- if (op_ret >= 0) {
- strftime (atime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_atime));
- strftime (mtime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_mtime));
- strftime (ctime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_ctime));
-
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, *buf {st_ino=%"PRIu64", "
- "st_size=%"PRId64", st_blocks=%"PRId64", st_atime=%s, "
- "st_mtime=%s, st_ctime=%s})",
- frame->root->unique, op_ret, buf->st_ino, buf->st_size,
- buf->st_blocks, atime_buf, mtime_buf, ctime_buf);
- } else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
-
-int32_t
-trace_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entries,
- int32_t count)
-{
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_GETDENTS].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d, count=%d)",
- frame->root->unique, op_ret, op_errno, count);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, entries, count);
- return 0;
-}
-
-int32_t
-trace_readdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- gf_dirent_t *buf)
-{
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_READDIR].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64" :(op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, buf);
-
- return 0;
-}
-
-int32_t
-trace_fsync_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_FSYNC].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-int32_t
-trace_chown_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- char atime_buf[256], mtime_buf[256], ctime_buf[256];
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_CHOWN].enabled) {
- if (op_ret >= 0) {
- strftime (atime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_atime));
- strftime (mtime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_mtime));
- strftime (ctime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_ctime));
-
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, *buf {st_ino=%"PRIu64", st_mode=%o, "
- "st_uid=%d, st_gid=%d, st_atime=%s, st_mtime=%s, st_ctime=%s})",
- frame->root->unique, op_ret, buf->st_ino, buf->st_mode,
- buf->st_uid, buf->st_gid, atime_buf, mtime_buf, ctime_buf);
- } else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
-
-int32_t
-trace_chmod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- char atime_buf[256], mtime_buf[256], ctime_buf[256];
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_CHMOD].enabled) {
- if (op_ret >= 0) {
- strftime (atime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_atime));
- strftime (mtime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_mtime));
- strftime (ctime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_ctime));
-
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, *buf {st_ino=%"PRIu64", st_mode=%o, "
- "st_atime=%s, st_mtime=%s, st_ctime=%s})",
- frame->root->unique, op_ret, buf->st_ino, buf->st_mode,
- atime_buf, mtime_buf, ctime_buf);
- } else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
-
-int32_t
-trace_fchmod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- char atime_buf[256], mtime_buf[256], ctime_buf[256];
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_FCHMOD].enabled) {
- if (op_ret >= 0) {
- strftime (atime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_atime));
- strftime (mtime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_mtime));
- strftime (ctime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_ctime));
-
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, *buf {st_ino=%"PRIu64", st_mode=%o, "
- "st_atime=%s, st_mtime=%s, st_ctime=%s})",
- frame->root->unique, op_ret, buf->st_ino, buf->st_mode,
- atime_buf, mtime_buf, ctime_buf);
- } else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
-
-int32_t
-trace_fchown_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- char atime_buf[256], mtime_buf[256], ctime_buf[256];
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_FCHOWN].enabled) {
- if (op_ret >= 0) {
- strftime (atime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_atime));
- strftime (mtime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_mtime));
- strftime (ctime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_ctime));
-
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, *buf {st_ino=%"PRIu64", st_mode=%o, "
- "st_uid=%d, st_gid=%d, st_atime=%s, st_mtime=%s, st_ctime=%s})",
- frame->root->unique, op_ret, buf->st_ino, buf->st_mode,
- buf->st_uid, buf->st_gid, atime_buf, mtime_buf, ctime_buf);
- } else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
-
-int32_t
-trace_unlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_UNLINK].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-int32_t
-trace_rename_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_RENAME].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d, buf {st_ino=%"PRIu64"})",
- frame->root->unique, op_ret, op_errno,
- (buf? buf->st_ino : 0));
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
-
-int32_t
-trace_readlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- const char *buf)
-{
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_READLINK].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d, buf=%s)",
- frame->root->unique, op_ret, op_errno, buf);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
-
-int32_t
-trace_lookup_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf,
- dict_t *xattr)
-{
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_LOOKUP].enabled) {
- if (op_ret >= 0) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, ino=%"PRIu64", "
- "*buf {st_dev=%"GF_PRI_DEV", st_ino=%"PRIu64", st_mode=%o, "
- "st_nlink=%"GF_PRI_NLINK", st_uid=%d, st_gid=%d, "
- "st_rdev=%"GF_PRI_DEV", st_size=%"PRId64", "
- "st_blksize=%"GF_PRI_BLKSIZE", st_blocks=%"PRId64"})",
- frame->root->unique, op_ret, inode->ino, buf->st_dev, buf->st_ino,
- buf->st_mode, buf->st_nlink, buf->st_uid, buf->st_gid,
- buf->st_rdev, buf->st_size, buf->st_blksize, buf->st_blocks);
- } else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf, xattr);
- return 0;
-}
-
-int32_t
-trace_symlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf)
-{
- char atime_buf[256], mtime_buf[256], ctime_buf[256];
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_SYMLINK].enabled) {
- if (op_ret >= 0) {
- strftime (atime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_atime));
- strftime (mtime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_mtime));
- strftime (ctime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_ctime));
-
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, ino=%"PRIu64", *buf {st_ino=%"PRIu64", "
- "st_mode=%o, st_nlink=%"GF_PRI_NLINK", st_uid=%d, st_gid=%d, "
- "st_size=%"PRId64", st_blocks=%"PRId64", st_atime=%s, "
- "st_mtime=%s, st_ctime=%s})",
- frame->root->unique, op_ret, inode->ino, buf->st_ino,
- buf->st_mode, buf->st_nlink, buf->st_uid, buf->st_gid,
- buf->st_size, buf->st_blocks, atime_buf, mtime_buf, ctime_buf);
- } else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
- return 0;
-}
-
-int32_t
-trace_mknod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf)
-{
- char atime_buf[256], mtime_buf[256], ctime_buf[256];
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_MKNOD].enabled) {
- if (op_ret >= 0) {
- strftime (atime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_atime));
- strftime (mtime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_mtime));
- strftime (ctime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_ctime));
-
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, ino=%"PRIu64", *buf {st_dev=%"GF_PRI_DEV
- ", st_ino=%"PRIu64", st_mode=%o, st_nlink=%"GF_PRI_NLINK", "
- "st_uid=%d, st_gid=%d, st_rdev=%"GF_PRI_DEV", st_size=%"PRId64
- ", st_blksize=%"GF_PRI_BLKSIZE", st_blocks=%"PRId64", st_atime=%s, "
- "st_mtime=%s, st_ctime=%s})",
- frame->root->unique, op_ret, inode->ino, buf->st_dev, buf->st_ino,
- buf->st_mode, buf->st_nlink, buf->st_uid, buf->st_gid,
- buf->st_rdev, buf->st_size, buf->st_blksize, buf->st_blocks,
- atime_buf, mtime_buf, ctime_buf);
- } else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
- return 0;
-}
-
-
-int32_t
-trace_mkdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf)
-{
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_MKDIR].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d, ino=%"PRIu64"",
- frame->root->unique, op_ret, op_errno,
- (inode? inode->ino : 0));
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
- return 0;
-}
-
-int32_t
-trace_link_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf)
-{
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_LINK].enabled) {
- if (op_ret >= 0) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, ino=%"PRIu64", "
- "*buf {st_nlink=%"GF_PRI_NLINK"})",
- frame->root->unique, op_ret, inode->ino, buf->st_nlink);
- } else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
- return 0;
-}
-
-int32_t
-trace_flush_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_FLUSH].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-
-int32_t
-trace_opendir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_OPENDIR].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d, fd=%p)",
- frame->root->unique, op_ret, op_errno, fd);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, fd);
- return 0;
-}
-
-int32_t
-trace_rmdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_RMDIR].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-int32_t
-trace_truncate_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_TRUNCATE].enabled) {
- if (op_ret >= 0) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, *buf {st_size=%"PRId64", st_blksize=%"
- GF_PRI_BLKSIZE", st_blocks=%"PRId64"})",
- frame->root->unique, op_ret, buf->st_size, buf->st_blksize,
- buf->st_blocks);
- } else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
-
-int32_t
-trace_utimens_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- char atime_buf[256], mtime_buf[256], ctime_buf[256];
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_UTIMENS].enabled) {
- if (op_ret >= 0) {
- strftime (atime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_atime));
- strftime (mtime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_mtime));
- strftime (ctime_buf, 256, "[%b %d %H:%M:%S]", localtime (&buf->st_ctime));
-
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, *buf {st_atime=%s, st_mtime=%s, "
- "st_ctime=%s})",
- frame->root->unique, op_ret, atime_buf, mtime_buf, ctime_buf);
- } else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
-
-int32_t
-trace_statfs_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct statvfs *buf)
-{
- ERR_EINVAL_NORETURN (!this);
-
- if (trace_fop_names[GF_FOP_STATFS].enabled) {
- if (op_ret >= 0) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": ({f_bsize=%lu, f_frsize=%lu, f_blocks=%"GF_PRI_FSBLK
- ", f_bfree=%"GF_PRI_FSBLK", f_bavail=%"GF_PRI_FSBLK", "
- "f_files=%"GF_PRI_FSBLK", f_ffree=%"GF_PRI_FSBLK", f_favail=%"
- GF_PRI_FSBLK", f_fsid=%lu, f_flag=%lu, f_namemax=%lu}) => ret=%d",
- frame->root->unique, buf->f_bsize, buf->f_frsize, buf->f_blocks,
- buf->f_bfree, buf->f_bavail, buf->f_files, buf->f_ffree,
- buf->f_favail, buf->f_fsid, buf->f_flag, buf->f_namemax, op_ret);
- } else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
-
-int32_t
-trace_setxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_SETXATTR].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-int32_t
-trace_getxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict)
-{
- ERR_EINVAL_NORETURN (!this || !dict);
-
- if (trace_fop_names[GF_FOP_GETXATTR].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d, dict=%p)",
- frame->root->unique, op_ret, op_errno, dict);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, dict);
- return 0;
-}
-
-int32_t
-trace_removexattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_REMOVEXATTR].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-
-int32_t
-trace_fsyncdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_FSYNCDIR].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-int32_t
-trace_access_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_ACCESS].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-int32_t
-trace_ftruncate_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_FTRUNCATE].enabled) {
- if (op_ret >= 0) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, *buf {st_size=%"PRId64", "
- "st_blksize=%"GF_PRI_BLKSIZE", st_blocks=%"PRId64"})",
- frame->root->unique, op_ret, buf->st_size, buf->st_blksize,
- buf->st_blocks);
- } else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
-
-int32_t
-trace_fstat_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- char atime_buf[256], mtime_buf[256], ctime_buf[256];
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_FSTAT].enabled) {
- if (op_ret >= 0) {
- strftime (atime_buf, 256, "[%b %d %H:%M:%S]",
- localtime (&buf->st_atime));
- strftime (mtime_buf, 256, "[%b %d %H:%M:%S]",
- localtime (&buf->st_mtime));
- strftime (ctime_buf, 256, "[%b %d %H:%M:%S]",
- localtime (&buf->st_ctime));
-
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, *buf {st_dev=%"GF_PRI_DEV", "
- "st_ino=%"PRIu64", st_mode=%o, st_nlink=%"GF_PRI_NLINK", "
- "st_uid=%d, st_gid=%d, st_rdev=%"GF_PRI_DEV", st_size=%"PRId64", "
- "st_blksize=%"GF_PRI_BLKSIZE", st_blocks=%"PRId64", st_atime=%s, "
- "st_mtime=%s, st_ctime=%s})",
- frame->root->unique, op_ret, buf->st_dev, buf->st_ino,
- buf->st_mode, buf->st_nlink, buf->st_uid, buf->st_gid,
- buf->st_rdev, buf->st_size, buf->st_blksize,
- buf->st_blocks, atime_buf, mtime_buf, ctime_buf);
- } else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
-
-int32_t
-trace_lk_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct flock *lock)
-{
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_LK].enabled) {
- if (op_ret >= 0) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, {l_type=%d, l_whence=%d, "
- "l_start=%"PRId64", l_len=%"PRId64", l_pid=%u})",
- frame->root->unique, op_ret, lock->l_type, lock->l_whence,
- lock->l_start, lock->l_len, lock->l_pid);
- } else {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, lock);
- return 0;
-}
-
-
-int32_t
-trace_setdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_SETDENTS].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": op_ret=%d, op_errno=%d",
- frame->root->unique, op_ret, op_errno);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-int32_t
-trace_entrylk_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- ERR_EINVAL_NORETURN (!this );
-
- if (trace_fop_names[GF_FOP_ENTRYLK].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": op_ret=%d, op_errno=%d",
- frame->root->unique, op_ret, op_errno);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-
-int32_t
-trace_xattrop_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict)
-{
- ERR_EINVAL_NORETURN (!this || !dict);
-
- if (trace_fop_names[GF_FOP_XATTROP].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
+int
+dump_history_trace (circular_buffer_t *cb, void *data)
+{
+ char *string = NULL;
+ struct tm *tm = NULL;
+ char timestr[256] = {0,};
+
+ string = (char *)cb->data;
+ tm = localtime (&cb->tv.tv_sec);
+
+ /* Since we are continuing with adding entries to the buffer even when
+ gettimeofday () fails, it's safe to check tm and then dump the time
+ at which the entry was added to the buffer */
+ if (tm) {
+ strftime (timestr, 256, "%Y-%m-%d %H:%M:%S", tm);
+ snprintf (timestr + strlen (timestr), 256 - strlen (timestr),
+ ".%"GF_PRI_SUSECONDS, cb->tv.tv_usec);
+ gf_proc_dump_write ("TIME", "%s", timestr);
+ }
+
+ gf_proc_dump_write ("FOP", "%s\n", string);
+
+ return 0;
+}
+
+int
+trace_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ char statstr[4096] = {0, };
+ char preparentstr[4096] = {0, };
+ char postparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_CREATE].enabled) {
+ char string[4096] = {0,};
+ if (op_ret >= 0) {
+ trace_stat_to_str (buf, statstr);
+ trace_stat_to_str (preparent, preparentstr);
+ trace_stat_to_str (postparent, postparentstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s (op_ret=%d, fd=%p"
+ "*stbuf {%s}, *preparent {%s}, "
+ "*postparent = {%s})",
+ frame->root->unique,
+ uuid_utoa (inode->gfid), op_ret, fd,
+ statstr, preparentstr, postparentstr);
+
+ /* for 'release' log */
+ fd_ctx_set (fd, this, 0);
+ } else {
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, op_errno=%d)",
+ frame->root->unique, op_ret,
+ op_errno);
+ }
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int
+trace_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_OPEN].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d, "
+ "*fd=%p", frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno,
+ fd);
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ /* for 'release' log */
+ if (op_ret >= 0)
+ fd_ctx_set (fd, this, 0);
+
+ TRACE_STACK_UNWIND (open, frame, op_ret, op_errno, fd, xdata);
+ return 0;
+}
+
+int
+trace_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ char statstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_STAT].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (buf, statstr);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d buf=%s",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ statstr);
+ } else {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d)",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
+ }
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (stat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+int
+trace_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *buf, struct iobref *iobref,
+ dict_t *xdata)
+{
+ char statstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_READ].enabled) {
+ char string[4096] = {0,};
+ if (op_ret >= 0) {
+ trace_stat_to_str (buf, statstr);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d buf=%s",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ statstr);
+ } else {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d)",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
+ }
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (readv, frame, op_ret, op_errno, vector, count,
+ buf, iobref, xdata);
+ return 0;
+}
+
+int
+trace_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+{
+ char preopstr[4096] = {0, };
+ char postopstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_WRITE].enabled) {
+ char string[4096] = {0,};
+ if (op_ret >= 0) {
+ trace_stat_to_str (prebuf, preopstr);
+ trace_stat_to_str (postbuf, postopstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, "
+ "*prebuf = {%s}, *postbuf = {%s})",
+ frame->root->unique, op_ret,
+ preopstr, postopstr);
+ } else {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d", frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
+ }
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
+
+int
+trace_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *buf,
+ dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_READDIR].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64" : gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique, uuid_utoa (frame->local),
+ op_ret, op_errno);
+
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (readdir, frame, op_ret, op_errno, buf, xdata);
+
+ return 0;
+}
+
+int
+trace_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *buf,
+ dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_READDIRP].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64" : gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique, uuid_utoa (frame->local),
+ op_ret, op_errno);
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ TRACE_STACK_UNWIND (readdirp, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+int
+trace_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+{
+ char preopstr[4096] = {0, };
+ char postopstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FSYNC].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (prebuf, preopstr);
+ trace_stat_to_str (postbuf, postopstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, "
+ "*prebuf = {%s}, *postbuf = {%s}",
+ frame->root->unique, op_ret,
+ preopstr, postopstr);
+ } else {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d", frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
+
+ }
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (fsync, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+
+ return 0;
+}
+
+int
+trace_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *statpre, struct iatt *statpost, dict_t *xdata)
+{
+ char preopstr[4096] = {0, };
+ char postopstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_SETATTR].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (statpre, preopstr);
+ trace_stat_to_str (statpost, postopstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, "
+ "*prebuf = {%s}, *postbuf = {%s})",
+ frame->root->unique, op_ret,
+ preopstr, postopstr);
+ } else {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d)", frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
+ }
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (setattr, frame, op_ret, op_errno, statpre,
+ statpost, xdata);
+ return 0;
+}
+
+int
+trace_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *statpre, struct iatt *statpost, dict_t *xdata)
+{
+ char preopstr[4096] = {0, };
+ char postopstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FSETATTR].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (statpre, preopstr);
+ trace_stat_to_str (statpost, postopstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, "
+ "*prebuf = {%s}, *postbuf = {%s})",
+ frame->root->unique, op_ret,
+ preopstr, postopstr);
+ } else {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d)",
+ frame->root->unique, uuid_utoa (frame->local),
+ op_ret, op_errno);
+ }
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (fsetattr, frame, op_ret, op_errno,
+ statpre, statpost, xdata);
+ return 0;
+}
+
+int
+trace_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ char preparentstr[4096] = {0, };
+ char postparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_UNLINK].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (preparent, preparentstr);
+ trace_stat_to_str (postparent, postparentstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ " *preparent = {%s}, "
+ "*postparent = {%s})",
+ frame->root->unique,
+ uuid_utoa (frame->local),
+ op_ret, preparentstr,
+ postparentstr);
+ } else {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d)",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
+ }
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (unlink, frame, op_ret, op_errno,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int
+trace_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ char statstr[4096] = {0, };
+ char preoldparentstr[4096] = {0, };
+ char postoldparentstr[4096] = {0, };
+ char prenewparentstr[4096] = {0, };
+ char postnewparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_RENAME].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (buf, statstr);
+ trace_stat_to_str (preoldparent, preoldparentstr);
+ trace_stat_to_str (postoldparent, postoldparentstr);
+ trace_stat_to_str (prenewparent, prenewparentstr);
+ trace_stat_to_str (postnewparent, postnewparentstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, "
+ "*stbuf = {%s}, *preoldparent = {%s},"
+ " *postoldparent = {%s}"
+ " *prenewparent = {%s}, "
+ "*postnewparent = {%s})",
+ frame->root->unique, op_ret, statstr,
+ preoldparentstr, postoldparentstr,
+ prenewparentstr, postnewparentstr);
+ } else {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d", frame->root->unique,
+ uuid_utoa (frame->local),
+ op_ret, op_errno);
+
+ }
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (rename, frame, op_ret, op_errno, buf,
+ preoldparent, postoldparent,
+ prenewparent, postnewparent, xdata);
+ return 0;
+}
+
+int
+trace_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ const char *buf, struct iatt *stbuf, dict_t *xdata)
+{
+ char statstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_READLINK].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (stbuf, statstr);
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, op_errno=%d,"
+ "buf=%s, stbuf = { %s })",
+ frame->root->unique, op_ret, op_errno,
+ buf, statstr);
+ } else {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
+ }
+
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (readlink, frame, op_ret, op_errno, buf, stbuf,
+ xdata);
+ return 0;
+}
+
+int
+trace_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf,
+ dict_t *xdata, struct iatt *postparent)
+{
+ char statstr[4096] = {0, };
+ char postparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_LOOKUP].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (buf, statstr);
+ trace_stat_to_str (postparent, postparentstr);
+ /* print buf->ia_gfid instead of inode->gfid,
+ * since if the inode is not yet linked to the
+ * inode table (fresh lookup) then null gfid
+ * will be printed.
+ */
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s (op_ret=%d "
+ "*buf {%s}, *postparent {%s}",
+ frame->root->unique,
+ uuid_utoa (buf->ia_gfid),
+ op_ret, statstr, postparentstr);
+
+ /* For 'forget' */
+ inode_ctx_put (inode, this, 0);
+ } else {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d)",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
+ }
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, buf,
+ xdata, postparent);
+ return 0;
+}
+
+int
+trace_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ char statstr[4096] = {0, };
+ char preparentstr[4096] = {0, };
+ char postparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_SYMLINK].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (buf, statstr);
+ trace_stat_to_str (preparent, preparentstr);
+ trace_stat_to_str (postparent, postparentstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s (op_ret=%d "
+ "*stbuf = {%s}, *preparent = {%s}, "
+ "*postparent = {%s})",
+ frame->root->unique,
+ uuid_utoa (inode->gfid),
+ op_ret, statstr, preparentstr,
+ postparentstr);
+ } else {
+ snprintf (string, sizeof (string),
+ "%"PRId64": op_ret=%d, op_errno=%d",
+ frame->root->unique, op_ret,
+ op_errno);
+ }
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (symlink, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int
+trace_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ char statstr[4096] = {0, };
+ char preparentstr[4096] = {0, };
+ char postparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ char string[4096] = {0,};
+ if (trace_fop_names[GF_FOP_MKNOD].enabled) {
+ if (op_ret == 0) {
+ trace_stat_to_str (buf, statstr);
+ trace_stat_to_str (preparent, preparentstr);
+ trace_stat_to_str (postparent, postparentstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s (op_ret=%d "
+ "*stbuf = {%s}, *preparent = {%s}, "
+ "*postparent = {%s})",
+ frame->root->unique,
+ uuid_utoa (inode->gfid),
+ op_ret, statstr, preparentstr,
+ postparentstr);
+ } else {
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, op_errno=%d)",
+ frame->root->unique, op_ret,
+ op_errno);
+ }
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int
+trace_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ char statstr[4096] = {0, };
+ char preparentstr[4096] = {0, };
+ char postparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_MKDIR].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (buf, statstr);
+ trace_stat_to_str (preparent, preparentstr);
+ trace_stat_to_str (postparent, postparentstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s (op_ret=%d "
+ ", *stbuf = {%s}, *prebuf = {%s}, "
+ "*postbuf = {%s} )",
+ frame->root->unique,
+ uuid_utoa (inode->gfid),
+ op_ret, statstr, preparentstr,
+ postparentstr);
+ } else {
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, op_errno=%d)",
+ frame->root->unique, op_ret,
+ op_errno);
+ }
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (mkdir, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int
+trace_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ char statstr[4096] = {0, };
+ char preparentstr[4096] = {0, };
+ char postparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ char string[4096] = {0,};
+ if (trace_fop_names[GF_FOP_LINK].enabled) {
+ if (op_ret == 0) {
+ trace_stat_to_str (buf, statstr);
+ trace_stat_to_str (preparent, preparentstr);
+ trace_stat_to_str (postparent, postparentstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, "
+ "*stbuf = {%s}, *prebuf = {%s},"
+ " *postbuf = {%s})",
+ frame->root->unique, op_ret,
+ statstr, preparentstr, postparentstr);
+ } else {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local),
+ op_ret, op_errno);
+ }
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (link, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int
+trace_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ char string[4096] = {0,};
+ if (trace_fop_names[GF_FOP_FLUSH].enabled) {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique, uuid_utoa (frame->local),
+ op_ret, op_errno);
+
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (flush, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
+trace_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ char string[4096] = {0,};
+ if (trace_fop_names[GF_FOP_OPENDIR].enabled) {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d,"
+ " fd=%p",
+ frame->root->unique, uuid_utoa (frame->local),
+ op_ret, op_errno, fd);
+
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ /* for 'releasedir' log */
+ if (op_ret >= 0)
+ fd_ctx_set (fd, this, 0);
+
+ TRACE_STACK_UNWIND (opendir, frame, op_ret, op_errno, fd, xdata);
+ return 0;
+}
+
+int
+trace_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ char preparentstr[4096] = {0, };
+ char postparentstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_RMDIR].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (preparent, preparentstr);
+ trace_stat_to_str (postparent, postparentstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "*prebuf={%s}, *postbuf={%s}",
+ frame->root->unique,
+ uuid_utoa (frame->local),
+ op_ret, preparentstr, postparentstr);
+ } else {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d", frame->root->unique,
+ uuid_utoa (frame->local),
+ op_ret, op_errno);
+ }
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (rmdir, frame, op_ret, op_errno,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int
+trace_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+{
+ char preopstr[4096] = {0, };
+ char postopstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_TRUNCATE].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (prebuf, preopstr);
+ trace_stat_to_str (postbuf, postopstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, "
+ "*prebuf = {%s}, *postbuf = {%s} )",
+ frame->root->unique, op_ret,
+ preopstr, postopstr);
+ } else {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d", frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
+ }
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (truncate, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+ return 0;
+}
+
+int
+trace_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf,
+ dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_STATFS].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ snprintf (string, sizeof (string),
+ "%"PRId64": ({f_bsize=%lu, "
+ "f_frsize=%lu, "
+ "f_blocks=%"GF_PRI_FSBLK
+ ", f_bfree=%"GF_PRI_FSBLK", "
+ "f_bavail=%"GF_PRI_FSBLK", "
+ "f_files=%"GF_PRI_FSBLK", "
+ "f_ffree=%"GF_PRI_FSBLK", "
+ "f_favail=%"GF_PRI_FSBLK", "
+ "f_fsid=%lu, f_flag=%lu, "
+ "f_namemax=%lu}) => ret=%d",
+ frame->root->unique, buf->f_bsize,
+ buf->f_frsize, buf->f_blocks,
+ buf->f_bfree, buf->f_bavail,
+ buf->f_files, buf->f_ffree,
+ buf->f_favail, buf->f_fsid,
+ buf->f_flag, buf->f_namemax, op_ret);
+ } else {
+ snprintf (string, sizeof (string),
+ "%"PRId64": (op_ret=%d, "
+ "op_errno=%d)",
+ frame->root->unique, op_ret,
+ op_errno);
+ }
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (statfs, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+int
+trace_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_SETXATTR].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
+
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
+trace_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_GETXATTR].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d,"
+ " dict=%p", frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno,
+ dict);
+
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata);
+
+ return 0;
+}
+
+int
+trace_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FSETXATTR].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
+
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
+trace_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FGETXATTR].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d,"
+ " dict=%p", frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno,
+ dict);
+
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, dict, xdata);
+
+ return 0;
+}
+
+int
+trace_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_REMOVEXATTR].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
+
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (removexattr, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+int
+trace_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FSYNCDIR].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
+
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (fsyncdir, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
+trace_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_ACCESS].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d)", frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
+
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (access, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
+trace_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+{
+ char prebufstr[4096] = {0, };
+ char postbufstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FTRUNCATE].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ trace_stat_to_str (prebuf, prebufstr);
+ trace_stat_to_str (postbuf, postbufstr);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": op_ret=%d, "
+ "*prebuf = {%s}, *postbuf = {%s} )",
+ frame->root->unique, op_ret,
+ prebufstr, postbufstr);
+ } else {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d", frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
+ }
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
+
+int
+trace_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
+{
+ char statstr[4096] = {0, };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FSTAT].enabled) {
+ char string[4096] = {0.};
+ if (op_ret == 0) {
+ trace_stat_to_str (buf, statstr);
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d "
+ "buf=%s", frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ statstr);
+ } else {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d", frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
+ }
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (fstat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+int
+trace_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_LK].enabled) {
+ char string[4096] = {0,};
+ if (op_ret == 0) {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "{l_type=%d, l_whence=%d, "
+ "l_start=%"PRId64", "
+ "l_len=%"PRId64", l_pid=%u})",
+ frame->root->unique,
+ uuid_utoa (frame->local),
+ op_ret, lock->l_type, lock->l_whence,
+ lock->l_start, lock->l_len,
+ lock->l_pid);
+ } else {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, "
+ "op_errno=%d)", frame->root->unique,
+ uuid_utoa (frame->local), op_ret,
+ op_errno);
+ }
+
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (lk, frame, op_ret, op_errno, lock, xdata);
+ return 0;
+}
+
+int
+trace_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_ENTRYLK].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
+
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (entrylk, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
+trace_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FENTRYLK].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
+
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (fentrylk, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
+trace_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_XATTROP].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
+
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (xattrop, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+int
+trace_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FXATTROP].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
- STACK_UNWIND (frame, op_ret, op_errno, dict);
- return 0;
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (fxattrop, frame, op_ret, op_errno, dict, xdata);
+ return 0;
}
-int32_t
-trace_fxattrop_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict)
+int
+trace_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- ERR_EINVAL_NORETURN (!this || !dict);
+ trace_conf_t *conf = NULL;
- if (trace_fop_names[GF_FOP_FXATTROP].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
+ conf = this->private;
- STACK_UNWIND (frame, op_ret, op_errno, dict);
- return 0;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_INODELK].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local),op_ret, op_errno);
+
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (inodelk, frame, op_ret, op_errno, xdata);
+ return 0;
}
-int32_t
-trace_inodelk_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+int
+trace_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- ERR_EINVAL_NORETURN (!this );
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
- if (trace_fop_names[GF_FOP_INODELK].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": op_ret=%d, op_errno=%d",
- frame->root->unique, op_ret, op_errno);
- }
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FINODELK].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND (finodelk, frame, op_ret, op_errno, xdata);
+ return 0;
}
+int
+trace_rchecksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ uint32_t weak_checksum, uint8_t *strong_checksum,
+ dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
-int32_t
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_RCHECKSUM].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s op_ret=%d op_errno=%d",
+ frame->root->unique,
+ uuid_utoa (frame->local), op_ret, op_errno);
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ TRACE_STACK_UNWIND (rchecksum, frame, op_ret, op_errno, weak_checksum,
+ strong_checksum, xdata);
+
+ return 0;
+}
+
+/* *_cbk section over <----------> fop section start */
+
+int
trace_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+ const char *volume, loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_ENTRYLK].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s volume=%s, (path=%s "
+ "basename=%s, cmd=%s, type=%s)",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid),
+ volume, loc->path, basename,
+ ((cmd == ENTRYLK_LOCK) ? "ENTRYLK_LOCK" :
+ "ENTRYLK_UNLOCK"),
+ ((type == ENTRYLK_RDLCK) ? "ENTRYLK_RDLCK" :
+ "ENTRYLK_WRLCK"));
+
+ frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_entrylk_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->entrylk,
+ volume, loc, basename, cmd, type, xdata);
+ return 0;
+}
+
+int
+trace_inodelk (call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
+{
+ char *cmd_str = NULL;
+ char *type_str = NULL;
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_INODELK].enabled) {
+ char string[4096] = {0,};
+ switch (cmd) {
+#if F_GETLK != F_GETLK64
+ case F_GETLK64:
+#endif
+ case F_GETLK:
+ cmd_str = "GETLK";
+ break;
+
+#if F_SETLK != F_SETLK64
+ case F_SETLK64:
+#endif
+ case F_SETLK:
+ cmd_str = "SETLK";
+ break;
+
+#if F_SETLKW != F_SETLKW64
+ case F_SETLKW64:
+#endif
+ case F_SETLKW:
+ cmd_str = "SETLKW";
+ break;
+
+ default:
+ cmd_str = "UNKNOWN";
+ break;
+ }
+
+ switch (flock->l_type) {
+ case F_RDLCK:
+ type_str = "READ";
+ break;
+ case F_WRLCK:
+ type_str = "WRITE";
+ break;
+ case F_UNLCK:
+ type_str = "UNLOCK";
+ break;
+ default:
+ type_str = "UNKNOWN";
+ break;
+ }
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s volume=%s, (path=%s "
+ "cmd=%s, type=%s, start=%llu, len=%llu, "
+ "pid=%llu)", frame->root->unique,
+ uuid_utoa (loc->inode->gfid), volume,
+ loc->path, cmd_str, type_str,
+ (unsigned long long)flock->l_start,
+ (unsigned long long) flock->l_len,
+ (unsigned long long) flock->l_pid);
+
+ frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_inodelk_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->inodelk,
+ volume, loc, cmd, flock, xdata);
+ return 0;
+}
+
+int
+trace_finodelk (call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
+{
+ char *cmd_str = NULL;
+ char *type_str = NULL;
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FINODELK].enabled) {
+ char string[4096] = {0,};
+ switch (cmd) {
+#if F_GETLK != F_GETLK64
+ case F_GETLK64:
+#endif
+ case F_GETLK:
+ cmd_str = "GETLK";
+ break;
+
+#if F_SETLK != F_SETLK64
+ case F_SETLK64:
+#endif
+ case F_SETLK:
+ cmd_str = "SETLK";
+ break;
+
+#if F_SETLKW != F_SETLKW64
+ case F_SETLKW64:
+#endif
+ case F_SETLKW:
+ cmd_str = "SETLKW";
+ break;
+
+ default:
+ cmd_str = "UNKNOWN";
+ break;
+ }
+
+ switch (flock->l_type) {
+ case F_RDLCK:
+ type_str = "READ";
+ break;
+ case F_WRLCK:
+ type_str = "WRITE";
+ break;
+ case F_UNLCK:
+ type_str = "UNLOCK";
+ break;
+ default:
+ type_str = "UNKNOWN";
+ break;
+ }
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s volume=%s, (fd =%p "
+ "cmd=%s, type=%s, start=%llu, len=%llu, "
+ "pid=%llu)", frame->root->unique,
+ uuid_utoa (fd->inode->gfid), volume, fd,
+ cmd_str, type_str,
+ (unsigned long long) flock->l_start,
+ (unsigned long long) flock->l_len,
+ (unsigned long long) flock->l_pid);
+
+ frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ STACK_WIND (frame, trace_finodelk_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->finodelk,
+ volume, fd, cmd, flock, xdata);
+ return 0;
+}
+
+int
+trace_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
- ERR_EINVAL_NORETURN (!this || !loc || !basename);
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_XATTROP].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s (path=%s flags=%d)",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ flags);
+
+ frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_xattrop_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->xattrop,
+ loc, flags, dict, xdata);
- if (trace_fop_names[GF_FOP_ENTRYLK].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": volume=%s, (loc= {path=%s, ino=%"PRIu64"} basename=%s, cmd=%s, type=%s)",
- frame->root->unique, volume, loc->path, loc->inode->ino, basename,
- ((cmd == ENTRYLK_LOCK) ? "ENTRYLK_LOCK" : "ENTRYLK_UNLOCK"),
- ((type == ENTRYLK_RDLCK) ? "ENTRYLK_RDLCK" : "ENTRYLK_WRLCK"));
- }
+ return 0;
+}
+
+int
+trace_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
- STACK_WIND (frame,
- trace_entrylk_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->entrylk,
- volume, loc, basename, cmd, type);
- return 0;
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FXATTROP].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p, flags=%d",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd, flags);
+
+ frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_fxattrop_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fxattrop,
+ fd, flags, dict, xdata);
+
+ return 0;
}
-int32_t
-trace_inodelk (call_frame_t *frame,
- xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd, struct flock *flock)
+int
+trace_lookup (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_LOOKUP].enabled) {
+ char string[4096] = {0,};
+ /* TODO: print all the keys mentioned in xattr_req */
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path);
+
+ frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_lookup_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup,
+ loc, xdata);
+
+ return 0;
+}
+
+int
+trace_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_STAT].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path);
+
+ frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_stat_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat,
+ loc, xdata);
+
+ return 0;
+}
+
+int
+trace_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_READLINK].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s, "
+ "size=%"GF_PRI_SIZET")", frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ size);
+
+ frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_readlink_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readlink,
+ loc, size, xdata);
+
+ return 0;
+}
+
+int
+trace_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ mode_t mode, dev_t dev, mode_t umask, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_MKNOD].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s mode=%d "
+ "umask=0%o, dev=%"GF_PRI_DEV")",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ mode, umask, dev);
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_mknod_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod,
+ loc, mode, dev, umask, xdata);
+
+ return 0;
+}
+
+int
+trace_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_MKDIR].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s mode=%d"
+ " umask=0%o", frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ mode, umask);
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_mkdir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir,
+ loc, mode, umask, xdata);
+ return 0;
+}
+
+int
+trace_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_UNLINK].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s flag=%d",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ xflag);
+
+ frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+out:
+ STACK_WIND (frame, trace_unlink_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink,
+ loc, xflag, xdata);
+ return 0;
+}
+
+int
+trace_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_RMDIR].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s flags=%d",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ flags);
+
+ frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_rmdir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rmdir,
+ loc, flags, xdata);
+
+ return 0;
+}
+
+int
+trace_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_SYMLINK].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s linkpath=%s, path=%s"
+ " umask=0%o", frame->root->unique,
+ uuid_utoa (loc->inode->gfid), linkpath,
+ loc->path, umask);
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_symlink_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->symlink,
+ linkpath, loc, umask, xdata);
+
+ return 0;
+}
+
+int
+trace_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ char oldgfid[50] = {0,};
+ char newgfid[50] = {0,};
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_RENAME].enabled) {
+ char string[4096] = {0,};
+ if (newloc->inode)
+ uuid_utoa_r (newloc->inode->gfid, newgfid);
+ else
+ strcpy (newgfid, "0");
+
+ uuid_utoa_r (oldloc->inode->gfid, oldgfid);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": oldgfid=%s oldpath=%s --> "
+ "newgfid=%s newpath=%s",
+ frame->root->unique, oldgfid,
+ oldloc->path, newgfid, newloc->path);
+
+ frame->local = oldloc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_rename_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename,
+ oldloc, newloc, xdata);
+
+ return 0;
+}
+
+int
+trace_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ char oldgfid[50] = {0,};
+ char newgfid[50] = {0,};
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_LINK].enabled) {
+ char string[4096] = {0,};
+ if (newloc->inode)
+ uuid_utoa_r (newloc->inode->gfid, newgfid);
+ else
+ strcpy (newgfid, "0");
+
+ uuid_utoa_r (oldloc->inode->gfid, oldgfid);
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": oldgfid=%s oldpath=%s --> "
+ "newgfid=%s newpath=%s", frame->root->unique,
+ oldgfid, oldloc->path, newgfid,
+ newloc->path);
+
+ frame->local = oldloc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_link_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link,
+ oldloc, newloc, xdata);
+ return 0;
+}
+
+int
+trace_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ uint64_t ia_time = 0;
+ char actime_str[256] = {0,};
+ char modtime_str[256] = {0,};
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_SETATTR].enabled) {
+ char string[4096] = {0,};
+ if (valid & GF_SET_ATTR_MODE) {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s mode=%o)",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid),
+ loc->path,
+ st_mode_from_ia (stbuf->ia_prot,
+ stbuf->ia_type));
+
+ LOG_ELEMENT (conf, string);
+ memset (string, 0 , sizeof (string));
+ }
+
+ if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)) {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s uid=%o,"
+ " gid=%o", frame->root->unique,
+ uuid_utoa (loc->inode->gfid),
+ loc->path, stbuf->ia_uid,
+ stbuf->ia_gid);
+
+ LOG_ELEMENT (conf, string);
+ memset (string, 0 , sizeof (string));
+ }
+
+ if (valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME)) {
+ ia_time = stbuf->ia_atime;
+ strftime (actime_str, 256, "[%b %d %H:%M:%S]",
+ localtime ((time_t *)&ia_time));
+
+ ia_time = stbuf->ia_mtime;
+ strftime (modtime_str, 256, "[%b %d %H:%M:%S]",
+ localtime ((time_t *)&ia_time));
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s "
+ "ia_atime=%s, ia_mtime=%s",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid),
+ loc->path, actime_str, modtime_str);
+
+ LOG_ELEMENT (conf, string);
+ memset (string, 0 , sizeof (string));
+ }
+ frame->local = loc->inode->gfid;
+ }
+
+out:
+ STACK_WIND (frame, trace_setattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setattr,
+ loc, stbuf, valid, xdata);
+
+ return 0;
+}
+
+int
+trace_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ uint64_t ia_time = 0;
+ char actime_str[256] = {0,};
+ char modtime_str[256] = {0,};
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FSETATTR].enabled) {
+ char string[4096] = {0,};
+ if (valid & GF_SET_ATTR_MODE) {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p, mode=%o",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd,
+ st_mode_from_ia (stbuf->ia_prot,
+ stbuf->ia_type));
+
+ LOG_ELEMENT (conf, string);
+ memset (string, 0, sizeof (string));
+ }
+
+ if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)) {
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p, uid=%o, "
+ "gid=%o", frame->root->unique,
+ uuid_utoa (fd->inode->gfid),
+ fd, stbuf->ia_uid, stbuf->ia_gid);
+
+ LOG_ELEMENT (conf, string);
+ memset (string, 0, sizeof (string));
+ }
+
+ if (valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME)) {
+ ia_time = stbuf->ia_atime;
+ strftime (actime_str, 256, "[%b %d %H:%M:%S]",
+ localtime ((time_t *)&ia_time));
+
+ ia_time = stbuf->ia_mtime;
+ strftime (modtime_str, 256, "[%b %d %H:%M:%S]",
+ localtime ((time_t *)&ia_time));
+
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p "
+ "ia_atime=%s, ia_mtime=%s",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid),
+ fd, actime_str, modtime_str);
+
+ LOG_ELEMENT (conf, string);
+ memset (string, 0, sizeof (string));
+ }
+ frame->local = fd->inode->gfid;
+ }
+
+out:
+ STACK_WIND (frame, trace_fsetattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetattr,
+ fd, stbuf, valid, xdata);
+
+ return 0;
+}
+
+int
+trace_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ off_t offset, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_TRUNCATE].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s, "
+ "offset=%"PRId64"", frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ offset);
+
+ frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_truncate_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate,
+ loc, offset, xdata);
+
+ return 0;
+}
+
+int
+trace_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flags, fd_t *fd, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_OPEN].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s flags=%d fd=%p",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ flags, fd);
+
+ frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_open_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open,
+ loc, flags, fd, xdata);
+ return 0;
+}
+
+int
+trace_create (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flags, mode_t mode, mode_t umask, fd_t *fd,
+ dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_CREATE].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s, fd=%p, "
+ "flags=0%o mode=0%o umask=0%o",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ fd, flags, mode, umask);
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_create_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create,
+ loc, flags, mode, umask, fd, xdata);
+ return 0;
+}
+
+int
+trace_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t offset, uint32_t flags, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_READ].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p, size=%"
+ GF_PRI_SIZET"offset=%"PRId64" flags=0%x)",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd, size,
+ offset, flags);
+
+ frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_readv_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv,
+ fd, size, offset, flags, xdata);
+ return 0;
+}
+
+int
+trace_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count,
+ off_t offset, uint32_t flags, struct iobref *iobref, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_WRITE].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p, count=%d, "
+ " offset=%"PRId64" flags=0%x)",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd, count,
+ offset, flags);
+
+ frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_writev_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev,
+ fd, vector, count, offset, flags, iobref, xdata);
+ return 0;
+}
+
+int
+trace_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_STATFS].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s",
+ frame->root->unique, (loc->inode)?
+ uuid_utoa (loc->inode->gfid):"0", loc->path);
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_statfs_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->statfs,
+ loc, xdata);
+ return 0;
+}
+
+int
+trace_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FLUSH].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd);
+
+ frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_flush_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush,
+ fd, xdata);
+ return 0;
+}
+
+int
+trace_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FSYNC].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s flags=%d fd=%p",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), flags, fd);
+
+ frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_fsync_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsync,
+ fd, flags, xdata);
+ return 0;
+}
+
+int
+trace_setxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_SETXATTR].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s flags=%d",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ flags);
+
+ frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_setxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr,
+ loc, dict, flags, xdata);
+ return 0;
+}
+
+int
+trace_getxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_GETXATTR].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s name=%s",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ name);
+
+ frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_getxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr,
+ loc, name, xdata);
+ return 0;
+}
+
+int
+trace_removexattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_REMOVEXATTR].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s name=%s",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path,
+ name);
+
+ frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_removexattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr,
+ loc, name, xdata);
+
+ return 0;
+}
+
+int
+trace_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_OPENDIR].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s fd=%p",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid), loc->path, fd);
+
+ frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_opendir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->opendir,
+ loc, fd, xdata);
+ return 0;
+}
+
+int
+trace_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *dict)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_READDIRP].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p, size=%"GF_PRI_SIZET
+ ", offset=%"PRId64" dict=%p",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd, size,
+ offset, dict);
+
+ frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_readdirp_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp,
+ fd, size, offset, dict);
+
+ return 0;
+}
+
+int
+trace_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t offset, dict_t *xdata)
{
- ERR_EINVAL_NORETURN (!this || !loc);
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_READDIR].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p, size=%"GF_PRI_SIZET
+ ", offset=%"PRId64,
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd, size,
+ offset);
+
+ frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
- if (trace_fop_names[GF_FOP_INODELK].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": volume=%s, (loc {path=%s, ino=%"PRIu64"}, cmd=%s)",
- frame->root->unique, volume, loc->path, loc->inode->ino,
- ((cmd == F_SETLK)? "F_SETLK" : "unknown"));
- }
+out:
+ STACK_WIND (frame, trace_readdir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdir,
+ fd, size, offset, xdata);
- STACK_WIND (frame,
- trace_inodelk_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->inodelk,
- volume, loc, cmd, flock);
- return 0;
+ return 0;
}
+int
+trace_fsyncdir (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int32_t datasync, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FSYNCDIR].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s datasync=%d fd=%p",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), datasync, fd);
+
+ frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
-int32_t
-trace_finodelk_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+out:
+ STACK_WIND (frame, trace_fsyncdir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsyncdir,
+ fd, datasync, xdata);
+ return 0;
+}
+
+int
+trace_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata)
{
- ERR_EINVAL_NORETURN (!this );
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_ACCESS].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s path=%s mask=0%o",
+ frame->root->unique,
+ uuid_utoa (loc->inode->gfid),
+ loc->path, mask);
+
+ frame->local = loc->inode->gfid;
- if (trace_fop_names[GF_FOP_FINODELK].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": op_ret=%d, op_errno=%d",
- frame->root->unique, op_ret, op_errno);
- }
+ LOG_ELEMENT (conf, string);
+ }
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
+out:
+ STACK_WIND (frame, trace_access_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->access,
+ loc, mask, xdata);
+ return 0;
}
int32_t
-trace_finodelk (call_frame_t *frame,
- xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct flock *flock)
+trace_rchecksum (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ int32_t len, dict_t *xdata)
{
- ERR_EINVAL_NORETURN (!this || !fd);
- if (trace_fop_names[GF_FOP_FINODELK].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": volume=%s, (fd=%p, cmd=%s)",
- frame->root->unique, volume, fd,
- ((cmd == F_SETLK) ? "F_SETLK" : "unknown"));
- }
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_RCHECKSUM].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s offset=%"PRId64
+ "len=%u fd=%p", frame->root->unique,
+ uuid_utoa (fd->inode->gfid), offset, len, fd);
+
+ frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_rchecksum_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rchecksum,
+ fd, offset, len, xdata);
+
+ return 0;
- STACK_WIND (frame,
- trace_finodelk_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->finodelk,
- volume, fd, cmd, flock);
- return 0;
}
+int32_t
+trace_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FENTRYLK].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s volume=%s, (fd=%p "
+ "basename=%s, cmd=%s, type=%s)",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), volume, fd,
+ basename,
+ ((cmd == ENTRYLK_LOCK) ? "ENTRYLK_LOCK" :
+ "ENTRYLK_UNLOCK"),
+ ((type == ENTRYLK_RDLCK) ? "ENTRYLK_RDLCK" :
+ "ENTRYLK_WRLCK"));
+
+ frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_fentrylk_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fentrylk,
+ volume, fd, basename, cmd, type, xdata);
+ return 0;
+
+}
int32_t
-trace_xattrop (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- gf_xattrop_flags_t flags,
- dict_t *dict)
+trace_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
{
- ERR_EINVAL_NORETURN (!this || !loc);
+ trace_conf_t *conf = NULL;
- if (trace_fop_names[GF_FOP_XATTROP].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (path=%s, ino=%"PRIu64" flags=%d)",
- frame->root->unique, loc->path, loc->inode->ino, flags);
-
- }
-
- STACK_WIND (frame, trace_xattrop_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->xattrop,
- loc, flags, dict);
+ conf = this->private;
- return 0;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FGETXATTR].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p name=%s",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd, name);
+
+ frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_fgetxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr,
+ fd, name, xdata);
+ return 0;
}
int32_t
-trace_fxattrop (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- gf_xattrop_flags_t flags,
- dict_t *dict)
-{
- ERR_EINVAL_NORETURN (!this || !fd);
-
- if (trace_fop_names[GF_FOP_FXATTROP].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (fd=%p, flags=%d)",
- frame->root->unique, fd, flags);
-
- }
-
- STACK_WIND (frame, trace_fxattrop_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fxattrop,
- fd, flags, dict);
-
- return 0;
-}
-
-int32_t
-trace_lookup (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *xattr_req)
-{
- ERR_EINVAL_NORETURN (!this || !loc);
-
- if (trace_fop_names[GF_FOP_LOOKUP].enabled) {
- /* TODO: print all the keys mentioned in xattr_req */
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (loc {path=%s, ino=%"PRIu64"})",
- frame->root->unique, loc->path,
- loc->inode->ino);
- }
-
- STACK_WIND (frame, trace_lookup_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup,
- loc, xattr_req);
-
- return 0;
-}
-
-int32_t
-trace_stat (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- ERR_EINVAL_NORETURN (!this || !loc );
-
-
- if (trace_fop_names[GF_FOP_STAT].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (loc {path=%s, ino=%"PRIu64"})",
- frame->root->unique, loc->path, loc->inode->ino);
- }
-
- STACK_WIND (frame,
- trace_stat_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat,
- loc);
-
- return 0;
-}
-
-int32_t
-trace_readlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- size_t size)
-{
- ERR_EINVAL_NORETURN (!this || !loc || (size < 1));
-
- if (trace_fop_names[GF_FOP_READLINK].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (loc {path=%s, ino=%"PRIu64"}, size=%"GF_PRI_SIZET")",
- frame->root->unique, loc->path, loc->inode->ino, size);
- }
-
- STACK_WIND (frame,
- trace_readlink_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readlink,
- loc,
- size);
-
- return 0;
-}
-
-int32_t
-trace_mknod (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode,
- dev_t dev)
-{
- ERR_EINVAL_NORETURN (!this || !loc->path);
-
- if (trace_fop_names[GF_FOP_MKNOD].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (loc {path=%s, ino=%"PRIu64"}, mode=%d, dev=%"GF_PRI_DEV")",
- frame->root->unique, loc->path, loc->inode->ino, mode, dev);
- }
-
- STACK_WIND (frame,
- trace_mknod_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mknod,
- loc,
- mode,
- dev);
-
- return 0;
-}
-
-int32_t
-trace_mkdir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode)
-{
- ERR_EINVAL_NORETURN (!this || !loc || !loc->path);
-
- if (trace_fop_names[GF_FOP_MKDIR].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (path=%s, ino=%"PRIu64", mode=%d)",
- frame->root->unique, loc->path,
- ((loc->inode)? loc->inode->ino : 0), mode);
- }
-
- STACK_WIND (frame,
- trace_mkdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mkdir,
- loc,
- mode);
- return 0;
-}
-
-int32_t
-trace_unlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- ERR_EINVAL_NORETURN (!this || !loc);
-
- if (trace_fop_names[GF_FOP_UNLINK].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (loc {path=%s, ino=%"PRIu64"})",
- frame->root->unique, loc->path, loc->inode->ino);
- }
-
- STACK_WIND (frame,
- trace_unlink_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink,
- loc);
- return 0;
-}
-
-int32_t
-trace_rmdir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- ERR_EINVAL_NORETURN (!this || !loc);
-
- if (trace_fop_names[GF_FOP_RMDIR].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (loc {path=%s, ino=%"PRIu64"})",
- frame->root->unique, loc->path, loc->inode->ino);
- }
-
- STACK_WIND (frame,
- trace_rmdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rmdir,
- loc);
-
- return 0;
-}
-
-int32_t
-trace_symlink (call_frame_t *frame,
- xlator_t *this,
- const char *linkpath,
- loc_t *loc)
-{
- ERR_EINVAL_NORETURN (!this || !linkpath || !loc || !loc->path);
-
- if (trace_fop_names[GF_FOP_SYMLINK].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (linkpath=%s, loc {path=%s, ino=%"PRIu64"})",
- frame->root->unique, linkpath, loc->path,
- ((loc->inode)? loc->inode->ino : 0));
- }
-
- STACK_WIND (frame,
- trace_symlink_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->symlink,
- linkpath,
- loc);
-
- return 0;
-}
-
-int32_t
-trace_rename (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc)
-{
- ERR_EINVAL_NORETURN (!this || !oldloc || !newloc);
-
- if (trace_fop_names[GF_FOP_RENAME].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (oldloc {path=%s, ino=%"PRIu64"}, "
- "newloc{path=%s, ino=%"PRIu64"})",
- frame->root->unique, oldloc->path, oldloc->ino,
- newloc->path, newloc->ino);
- }
-
- STACK_WIND (frame,
- trace_rename_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rename,
- oldloc,
- newloc);
-
- return 0;
-}
-
-int32_t
-trace_link (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc)
-{
- ERR_EINVAL_NORETURN (!this || !oldloc || !newloc);
-
- if (trace_fop_names[GF_FOP_LINK].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (oldloc {path=%s, ino=%"PRIu64"}, "
- "newloc {path=%s, ino=%"PRIu64"})",
- frame->root->unique, oldloc->path, oldloc->inode->ino,
- newloc->path, newloc->inode->ino);
- }
-
- STACK_WIND (frame,
- trace_link_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->link,
- oldloc,
- newloc);
- return 0;
-}
-
-int32_t
-trace_chmod (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode)
-{
- ERR_EINVAL_NORETURN (!this || !loc);
-
- if (trace_fop_names[GF_FOP_CHMOD].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (loc {path=%s, ino=%"PRIu64"}, mode=%o)",
- frame->root->unique, loc->path, loc->inode->ino, mode);
- }
-
- STACK_WIND (frame,
- trace_chmod_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->chmod,
- loc,
- mode);
-
- return 0;
-}
-
-int32_t
-trace_chown (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- uid_t uid,
- gid_t gid)
-{
- ERR_EINVAL_NORETURN (!this || !loc);
-
- if (trace_fop_names[GF_FOP_CHOWN].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (loc {path=%s, ino=%"PRIu64"}, uid=%d, gid=%d)",
- frame->root->unique, loc->path, loc->inode->ino, uid, gid);
- }
-
- STACK_WIND (frame,
- trace_chown_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->chown,
- loc,
- uid,
- gid);
-
- return 0;
-}
-
-int32_t
-trace_truncate (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- off_t offset)
-{
- ERR_EINVAL_NORETURN (!this || !loc);
-
- if (trace_fop_names[GF_FOP_TRUNCATE].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (loc {path=%s, ino=%"PRIu64"}, offset=%"PRId64")",
- frame->root->unique, loc->path, loc->inode->ino, offset);
- }
-
- STACK_WIND (frame,
- trace_truncate_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate,
- loc,
- offset);
-
- return 0;
-}
-
-int32_t
-trace_utimens (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- struct timespec tv[2])
-{
- char actime_str[256];
- char modtime_str[256];
-
- ERR_EINVAL_NORETURN (!this || !loc || !tv);
-
- if (trace_fop_names[GF_FOP_UTIMENS].enabled) {
- strftime (actime_str, 256, "[%b %d %H:%M:%S]", localtime (&tv[0].tv_sec));
- strftime (modtime_str, 256, "[%b %d %H:%M:%S]", localtime (&tv[1].tv_sec));
-
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (loc {path=%s, ino=%"PRIu64"}, "
- "*tv=%p {actime=%s, modtime=%s})",
- frame->root->unique, loc->path, loc->inode->ino,
- tv, actime_str, modtime_str);
- }
-
- STACK_WIND (frame,
- trace_utimens_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->utimens,
- loc,
- tv);
-
- return 0;
-}
-
-int32_t
-trace_open (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags,
- fd_t *fd)
-{
- ERR_EINVAL_NORETURN (!this || !loc);
-
- if (trace_fop_names[GF_FOP_OPEN].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (loc {path=%s, ino=%"PRIu64"}, flags=%d, fd=%p)",
- frame->root->unique, loc->path, loc->inode->ino, flags, fd);
- }
-
- STACK_WIND (frame,
- trace_open_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open,
- loc,
- flags,
- fd);
- return 0;
-}
-
-int32_t
-trace_create (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags,
- mode_t mode,
- fd_t *fd)
-{
- ERR_EINVAL_NORETURN (!this || !loc->path);
-
- if (trace_fop_names[GF_FOP_CREATE].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (loc {path=%s, ino=%"PRIu64"}, flags=0%o mode=0%o)",
- frame->root->unique, loc->path, loc->inode->ino, flags, mode);
- }
-
- STACK_WIND (frame,
- trace_create_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create,
- loc,
- flags,
- mode,
- fd);
- return 0;
-}
-
-int32_t
-trace_readv (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset)
-{
- ERR_EINVAL_NORETURN (!this || !fd || (size < 1));
-
- if (trace_fop_names[GF_FOP_READ].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (*fd=%p, size=%"GF_PRI_SIZET", offset=%"PRId64")",
- frame->root->unique, fd, size, offset);
- }
-
- STACK_WIND (frame,
- trace_readv_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readv,
- fd,
- size,
- offset);
- return 0;
-}
-
-int32_t
-trace_writev (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iovec *vector,
- int32_t count,
- off_t offset,
- struct iobref *iobref)
-{
- ERR_EINVAL_NORETURN (!this || !fd || !vector || (count < 1));
-
- if (trace_fop_names[GF_FOP_WRITE].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (*fd=%p, *vector=%p, count=%d, offset=%"PRId64")",
- frame->root->unique, fd, vector, count, offset);
- }
-
- STACK_WIND (frame,
- trace_writev_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev,
- fd,
- vector,
- count,
- offset,
- iobref);
- return 0;
-}
-
-int32_t
-trace_statfs (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- ERR_EINVAL_NORETURN (!this || !loc);
-
- if (trace_fop_names[GF_FOP_STATFS].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (loc {path=%s, ino=%"PRIu64"})",
- frame->root->unique, loc->path,
- ((loc->inode)? loc->inode->ino : 0));
- }
-
- STACK_WIND (frame,
- trace_statfs_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->statfs,
- loc);
- return 0;
-}
-
-int32_t
-trace_flush (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd)
-{
- ERR_EINVAL_NORETURN (!this || !fd);
-
- if (trace_fop_names[GF_FOP_FLUSH].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (*fd=%p)",
- frame->root->unique, fd);
- }
-
- STACK_WIND (frame,
- trace_flush_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush,
- fd);
- return 0;
-}
-
-
-int32_t
-trace_fsync (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t flags)
-{
- ERR_EINVAL_NORETURN (!this || !fd);
-
- if (trace_fop_names[GF_FOP_FSYNC].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (flags=%d, *fd=%p)",
- frame->root->unique, flags, fd);
- }
-
- STACK_WIND (frame,
- trace_fsync_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsync,
- fd,
- flags);
- return 0;
-}
-
-int32_t
-trace_setxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *dict,
- int32_t flags)
-{
- ERR_EINVAL_NORETURN (!this || !loc || !dict);
-
- if (trace_fop_names[GF_FOP_SETXATTR].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (loc {path=%s, ino=%"PRIu64"}, dict=%p, flags=%d)",
- frame->root->unique, loc->path,
- ((loc->inode)? loc->inode->ino : 0), dict, flags);
- }
-
- STACK_WIND (frame,
- trace_setxattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr,
- loc,
- dict,
- flags);
- return 0;
-}
-
-int32_t
-trace_getxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name)
-{
- ERR_EINVAL_NORETURN (!this || !loc);
-
- if (trace_fop_names[GF_FOP_GETXATTR].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (loc {path=%s, ino=%"PRIu64"}), name=%s",
- frame->root->unique, loc->path,
- ((loc->inode)? loc->inode->ino : 0), name);
- }
-
- STACK_WIND (frame,
- trace_getxattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr,
- loc,
- name);
- return 0;
-}
-
-int32_t
-trace_removexattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name)
-{
- ERR_EINVAL_NORETURN (!this || !loc || !name);
-
- if (trace_fop_names[GF_FOP_REMOVEXATTR].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (loc {path=%s, ino=%"PRIu64"}, name=%s)",
- frame->root->unique, loc->path,
- ((loc->inode)? loc->inode->ino : 0), name);
- }
-
- STACK_WIND (frame,
- trace_removexattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->removexattr,
- loc,
- name);
-
- return 0;
-}
-
-int32_t
-trace_opendir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- fd_t *fd)
-{
- ERR_EINVAL_NORETURN (!this || !loc );
-
- if (trace_fop_names[GF_FOP_OPENDIR].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64":( loc {path=%s, ino=%"PRIu64"}, fd=%p)",
- frame->root->unique, loc->path, loc->inode->ino, fd);
- }
-
- STACK_WIND (frame,
- trace_opendir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->opendir,
- loc,
- fd);
- return 0;
-}
-
-int32_t
-trace_getdents (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset,
- int32_t flag)
-{
- ERR_EINVAL_NORETURN (!this || !fd);
-
- if (trace_fop_names[GF_FOP_GETDENTS].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (fd=%p, size=%"GF_PRI_SIZET", offset=%"PRId64", flag=0x%x)",
- frame->root->unique, fd, size, offset, flag);
- }
-
- STACK_WIND (frame,
- trace_getdents_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getdents,
- fd,
- size,
- offset,
- flag);
- return 0;
-}
-
-
-int32_t
-trace_readdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset)
-{
- ERR_EINVAL_NORETURN (!this || !fd);
-
- if (trace_fop_names[GF_FOP_READDIR].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (fd=%p, size=%"GF_PRI_SIZET", offset=%"PRId64")",
- frame->root->unique, fd, size, offset);
- }
-
- STACK_WIND (frame,
- trace_readdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readdir,
- fd,
- size,
- offset);
-
- return 0;
-}
-
-
-int32_t
-trace_fsyncdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t datasync)
-{
- ERR_EINVAL_NORETURN (!this || !fd);
-
- if (trace_fop_names[GF_FOP_FSYNCDIR].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (datasync=%d, *fd=%p)",
- frame->root->unique, datasync, fd);
- }
-
- STACK_WIND (frame,
- trace_fsyncdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsyncdir,
- fd,
- datasync);
- return 0;
-}
-
-int32_t
-trace_access (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t mask)
-{
- ERR_EINVAL_NORETURN (!this || !loc);
-
- if (trace_fop_names[GF_FOP_ACCESS].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (*loc {path=%s, ino=%"PRIu64"}, mask=0%o)",
- frame->root->unique, loc->path,
- ((loc->inode)? loc->inode->ino : 0), mask);
- }
-
- STACK_WIND (frame,
- trace_access_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->access,
- loc,
- mask);
- return 0;
-}
-
-int32_t
-trace_ftruncate (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- off_t offset)
-{
- ERR_EINVAL_NORETURN (!this || !fd);
-
- if (trace_fop_names[GF_FOP_FTRUNCATE].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (offset=%"PRId64", *fd=%p)",
- frame->root->unique, offset, fd);
- }
-
- STACK_WIND (frame,
- trace_ftruncate_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate,
- fd,
- offset);
-
- return 0;
-}
-
-int32_t
-trace_fchown (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- uid_t uid,
- gid_t gid)
-{
- ERR_EINVAL_NORETURN (!this || !fd);
-
- if (trace_fop_names[GF_FOP_FCHOWN].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (*fd=%p, uid=%d, gid=%d)",
- frame->root->unique, fd, uid, gid);
- }
-
- STACK_WIND (frame,
- trace_fchown_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fchown,
- fd,
- uid,
- gid);
- return 0;
-}
-
-int32_t
-trace_fchmod (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- mode_t mode)
-{
- ERR_EINVAL_NORETURN (!this || !fd);
-
- if (trace_fop_names[GF_FOP_FCHMOD].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (mode=%o, *fd=%p)",
- frame->root->unique, mode, fd);
- }
-
- STACK_WIND (frame,
- trace_fchmod_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fchmod,
- fd,
- mode);
- return 0;
-}
-
-int32_t
-trace_fstat (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd)
-{
- ERR_EINVAL_NORETURN (!this || !fd);
-
- if (trace_fop_names[GF_FOP_FSTAT].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (*fd=%p)",
- frame->root->unique, fd);
- }
-
- STACK_WIND (frame,
- trace_fstat_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat,
- fd);
- return 0;
-}
-
-int32_t
-trace_lk (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t cmd,
- struct flock *lock)
-{
- ERR_EINVAL_NORETURN (!this || !fd);
-
- if (trace_fop_names[GF_FOP_LK].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (*fd=%p, cmd=%d, lock {l_type=%d, l_whence=%d, "
- "l_start=%"PRId64", l_len=%"PRId64", l_pid=%u})",
- frame->root->unique, fd, cmd, lock->l_type, lock->l_whence,
- lock->l_start, lock->l_len, lock->l_pid);
- }
-
- STACK_WIND (frame,
- trace_lk_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lk,
- fd,
- cmd,
- lock);
- return 0;
-}
-
-int32_t
-trace_setdents (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t flags,
- dir_entry_t *entries,
- int32_t count)
-{
- if (trace_fop_names[GF_FOP_SETDENTS].enabled) {
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (*fd=%p, flags=%d, count=%d",
- frame->root->unique, fd, flags, count);
- }
-
- STACK_WIND (frame,
- trace_setdents_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setdents,
- fd,
- flags,
- entries,
- count);
- return 0;
+trace_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FSETXATTR].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p flags=%d",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd, flags);
+
+ frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_fsetxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ fd, dict, flags, xdata);
+ return 0;
}
+int
+trace_ftruncate (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, off_t offset, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FTRUNCATE].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s offset=%"PRId64" fd=%p",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), offset, fd);
+
+ frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_ftruncate_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate,
+ fd, offset, xdata);
+
+ return 0;
+}
+
+int
+trace_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FSTAT].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd);
+
+ frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_fstat_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat,
+ fd, xdata);
+ return 0;
+}
+
+int
+trace_lk (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t cmd, struct gf_flock *lock, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_LK].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "%"PRId64": gfid=%s fd=%p, cmd=%d, "
+ "lock {l_type=%d, "
+ "l_whence=%d, l_start=%"PRId64", "
+ "l_len=%"PRId64", l_pid=%u})",
+ frame->root->unique,
+ uuid_utoa (fd->inode->gfid), fd, cmd,
+ lock->l_type, lock->l_whence,
+ lock->l_start, lock->l_len, lock->l_pid);
+
+ frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ STACK_WIND (frame, trace_lk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lk,
+ fd, cmd, lock, xdata);
+ return 0;
+}
int32_t
-trace_checksum_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- uint8_t *fchecksum,
- uint8_t *dchecksum)
+trace_forget (xlator_t *this, inode_t *inode)
{
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": op_ret (%d), op_errno(%d)",
- frame->root->unique, op_ret, op_errno);
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+ /* If user want to understand when a lookup happens,
+ he should know about 'forget' too */
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_LOOKUP].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "gfid=%s", uuid_utoa (inode->gfid));
- STACK_UNWIND (frame, op_ret, op_errno, fchecksum, dchecksum);
+ LOG_ELEMENT (conf, string);
+ }
- return 0;
+out:
+ return 0;
}
int32_t
-trace_checksum (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flag)
-{
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": loc->path (%s) flag (%d)",
- frame->root->unique, loc->path, flag);
-
- STACK_WIND (frame,
- trace_checksum_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->checksum,
- loc,
- flag);
-
- return 0;
-}
-
-
-int32_t
-trace_stats_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct xlator_stats *stats)
-{
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": op_ret (%d), op_errno(%d)",
- frame->root->unique, op_ret, op_errno);
-
- STACK_UNWIND (frame, op_ret, op_errno, stats);
- return 0;
-}
-
-int32_t
-trace_stats (call_frame_t *frame,
- xlator_t *this,
- int32_t flags)
-{
- ERR_EINVAL_NORETURN (!this);
-
- gf_log (this->name, GF_LOG_NORMAL,
- "%"PRId64": (flags=%d)",
- frame->root->unique, flags);
-
- STACK_WIND (frame,
- trace_stats_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->mops->stats,
- flags);
-
- return 0;
+trace_releasedir (xlator_t *this, fd_t *fd)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_OPENDIR].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "gfid=%s fd=%p",
+ uuid_utoa (fd->inode->gfid), fd);
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ return 0;
}
+int32_t
+trace_release (xlator_t *this, fd_t *fd)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_OPEN].enabled ||
+ trace_fop_names[GF_FOP_CREATE].enabled) {
+ char string[4096] = {0,};
+ snprintf (string, sizeof (string),
+ "gfid=%s fd=%p",
+ uuid_utoa (fd->inode->gfid), fd);
+
+ LOG_ELEMENT (conf, string);
+ }
+
+out:
+ return 0;
+}
+
+
void
enable_all_calls (int enabled)
{
- int i;
- for (i = 0; i < GF_FOP_MAXVALUE; i++)
- trace_fop_names[i].enabled = enabled;
+ int i;
+
+ for (i = 0; i < GF_FOP_MAXVALUE; i++)
+ trace_fop_names[i].enabled = enabled;
}
-void
+void
enable_call (const char *name, int enabled)
{
- int i;
- for (i = 0; i < GF_FOP_MAXVALUE; i++)
- if (!strcasecmp(trace_fop_names[i].name, name))
- trace_fop_names[i].enabled = enabled;
+ int i;
+ for (i = 0; i < GF_FOP_MAXVALUE; i++)
+ if (!strcasecmp(trace_fop_names[i].name, name))
+ trace_fop_names[i].enabled = enabled;
}
-/*
- include = 1 for "include-ops"
- = 0 for "exclude-ops"
+/*
+ include = 1 for "include-ops"
+ = 0 for "exclude-ops"
*/
void
process_call_list (const char *list, int include)
{
- enable_all_calls (include ? 0 : 1);
-
- char *call = strsep ((char **)&list, ",");
- while (call) {
- enable_call (call, include);
- call = strsep ((char **)&list, ",");
- }
+ enable_all_calls (include ? 0 : 1);
+
+ char *call = strsep ((char **)&list, ",");
+
+ while (call) {
+ enable_call (call, include);
+ call = strsep ((char **)&list, ",");
+ }
+}
+
+int32_t
+trace_dump_history (xlator_t *this)
+{
+ int ret = -1;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0,};
+ trace_conf_t *conf = NULL;
+
+ GF_VALIDATE_OR_GOTO ("trace", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->history, out);
+
+ conf = this->private;
+ // Is it ok to return silently if log-history option his off?
+ if (conf && conf->log_history == _gf_true) {
+ gf_proc_dump_build_key (key_prefix, "xlator.debug.trace",
+ "history");
+ gf_proc_dump_add_section (key_prefix);
+ eh_dump (this->history, NULL, dump_history_trace);
+ }
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_trace_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
+ " failed");
+ return ret;
+ }
+
+ return ret;
}
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ int32_t ret = -1;
+ trace_conf_t *conf = NULL;
+ char *includes = NULL, *excludes = NULL;
+
+ GF_VALIDATE_OR_GOTO ("quick-read", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+ GF_VALIDATE_OR_GOTO (this->name, options, out);
+
+ conf = this->private;
+
+ includes = data_to_str (dict_get (options, "include-ops"));
+ excludes = data_to_str (dict_get (options, "exclude-ops"));
+
+ {
+ int i;
+ for (i = 0; i < GF_FOP_MAXVALUE; i++) {
+ if (gf_fop_list[i])
+ strncpy (trace_fop_names[i].name,
+ gf_fop_list[i],
+ strlen (gf_fop_list[i]));
+ else
+ strncpy (trace_fop_names[i].name, ":O",
+ strlen (":O"));
+ trace_fop_names[i].enabled = 1;
+ }
+ }
+
+ if (includes && excludes) {
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "must specify only one of 'include-ops' and "
+ "'exclude-ops'");
+ goto out;
+ }
+
+ if (includes)
+ process_call_list (includes, 1);
+ if (excludes)
+ process_call_list (excludes, 0);
+
+ /* Should resizing of the event-history be allowed in reconfigure?
+ * for which a new event_history might have to be allocated and the
+ * older history has to be freed.
+ */
+ GF_OPTION_RECONF ("log-file", conf->log_file, options, bool, out);
+
+ GF_OPTION_RECONF ("log-history", conf->log_history, options, bool, out);
-int32_t
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int32_t
init (xlator_t *this)
{
- dict_t *options = this->options;
- char *includes = NULL, *excludes = NULL;
-
- if (!this)
- return -1;
-
- if (!this->children || this->children->next) {
- gf_log (this->name, GF_LOG_ERROR,
- "trace translator requires one subvolume");
- return -1;
- }
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile ");
- }
-
-
- includes = data_to_str (dict_get (options, "include-ops"));
- excludes = data_to_str (dict_get (options, "exclude-ops"));
-
- {
- int i;
- for (i = 0; i < GF_FOP_MAXVALUE; i++) {
- trace_fop_names[i].name = (gf_fop_list[i] ?
- gf_fop_list[i] : ":O");
- trace_fop_names[i].enabled = 1;
- }
- }
-
- if (includes && excludes) {
- gf_log (this->name,
- GF_LOG_ERROR,
- "must specify only one of 'include-ops' and 'exclude-ops'");
- return -1;
- }
- if (includes)
- process_call_list (includes, 1);
- if (excludes)
- process_call_list (excludes, 0);
-
- gf_log_set_loglevel (GF_LOG_NORMAL);
-
- /* Set this translator's inode table pointer to child node's pointer. */
- this->itable = FIRST_CHILD (this)->itable;
-
- return 0;
+ dict_t *options = NULL;
+ char *includes = NULL, *excludes = NULL;
+ char *forced_loglevel = NULL;
+ eh_t *history = NULL;
+ int ret = -1;
+ size_t history_size = TRACE_DEFAULT_HISTORY_SIZE;
+ trace_conf_t *conf = NULL;
+
+ if (!this)
+ return -1;
+
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "trace translator requires one subvolume");
+ return -1;
+ }
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ conf = GF_CALLOC (1, sizeof (trace_conf_t), gf_trace_mt_trace_conf_t);
+ if (!conf) {
+ gf_log (this->name, GF_LOG_ERROR, "cannot allocate "
+ "xl->private");
+ return -1;
+ }
+
+ options = this->options;
+ includes = data_to_str (dict_get (options, "include-ops"));
+ excludes = data_to_str (dict_get (options, "exclude-ops"));
+
+ {
+ int i;
+ for (i = 0; i < GF_FOP_MAXVALUE; i++) {
+ if (gf_fop_list[i])
+ strncpy (trace_fop_names[i].name,
+ gf_fop_list[i],
+ strlen (gf_fop_list[i]));
+ else
+ strncpy (trace_fop_names[i].name, ":O",
+ strlen (":O"));
+ trace_fop_names[i].enabled = 1;
+ }
+ }
+
+ if (includes && excludes) {
+ gf_log (this->name,
+ GF_LOG_ERROR,
+ "must specify only one of 'include-ops' and "
+ "'exclude-ops'");
+ return -1;
+ }
+
+ if (includes)
+ process_call_list (includes, 1);
+ if (excludes)
+ process_call_list (excludes, 0);
+
+
+ GF_OPTION_INIT ("history-size", conf->history_size, size, out);
+
+ gf_log (this->name, GF_LOG_INFO, "history size %"GF_PRI_SIZET,
+ history_size);
+
+ GF_OPTION_INIT ("log-file", conf->log_file, bool, out);
+
+ gf_log (this->name, GF_LOG_INFO, "logging to file %s",
+ (conf->log_file == _gf_true)?"enabled":"disabled");
+
+ GF_OPTION_INIT ("log-history", conf->log_history, bool, out);
+
+ gf_log (this->name, GF_LOG_DEBUG, "logging to history %s",
+ (conf->log_history == _gf_true)?"enabled":"disabled");
+
+ history = eh_new (history_size, _gf_false, NULL);
+ if (!history) {
+ gf_log (this->name, GF_LOG_ERROR, "event history cannot be "
+ "initialized");
+ return -1;
+ }
+
+ this->history = history;
+
+ conf->trace_log_level = GF_LOG_INFO;
+
+ if (dict_get (options, "force-log-level")) {
+ forced_loglevel = data_to_str (dict_get (options,
+ "force-log-level"));
+ if (!forced_loglevel)
+ goto setloglevel;
+
+ if (strcmp (forced_loglevel, "INFO") == 0)
+ conf->trace_log_level = GF_LOG_INFO;
+ else if (strcmp (forced_loglevel, "TRACE") == 0)
+ conf->trace_log_level = GF_LOG_TRACE;
+ else if (strcmp (forced_loglevel, "ERROR") == 0)
+ conf->trace_log_level = GF_LOG_ERROR;
+ else if (strcmp (forced_loglevel, "DEBUG") == 0)
+ conf->trace_log_level = GF_LOG_DEBUG;
+ else if (strcmp (forced_loglevel, "WARNING") == 0)
+ conf->trace_log_level = GF_LOG_WARNING;
+ else if (strcmp (forced_loglevel, "CRITICAL") == 0)
+ conf->trace_log_level = GF_LOG_CRITICAL;
+ else if (strcmp (forced_loglevel, "NONE") == 0)
+ conf->trace_log_level = GF_LOG_NONE;
+ }
+
+setloglevel:
+ gf_log_set_loglevel (conf->trace_log_level);
+ this->private = conf;
+ ret = 0;
+out:
+ if (ret == -1) {
+ if (history)
+ GF_FREE (history);
+ if (conf)
+ GF_FREE (conf);
+ }
+
+ return ret;
}
void
fini (xlator_t *this)
{
- if (!this)
- return;
-
- gf_log (this->name, GF_LOG_NORMAL,
- "trace translator unloaded");
- return;
+ if (!this)
+ return;
+
+ if (this->history)
+ eh_destroy (this->history);
+
+ gf_log (this->name, GF_LOG_INFO,
+ "trace translator unloaded");
+ return;
}
struct xlator_fops fops = {
- .stat = trace_stat,
- .readlink = trace_readlink,
- .mknod = trace_mknod,
- .mkdir = trace_mkdir,
- .unlink = trace_unlink,
- .rmdir = trace_rmdir,
- .symlink = trace_symlink,
- .rename = trace_rename,
- .link = trace_link,
- .chmod = trace_chmod,
- .chown = trace_chown,
- .truncate = trace_truncate,
- .utimens = trace_utimens,
- .open = trace_open,
- .readv = trace_readv,
- .writev = trace_writev,
- .statfs = trace_statfs,
- .flush = trace_flush,
- .fsync = trace_fsync,
- .setxattr = trace_setxattr,
- .getxattr = trace_getxattr,
- .removexattr = trace_removexattr,
- .opendir = trace_opendir,
- .readdir = trace_readdir,
- .fsyncdir = trace_fsyncdir,
- .access = trace_access,
- .ftruncate = trace_ftruncate,
- .fstat = trace_fstat,
- .create = trace_create,
- .fchown = trace_fchown,
- .fchmod = trace_fchmod,
- .lk = trace_lk,
- .inodelk = trace_inodelk,
- .finodelk = trace_finodelk,
- .entrylk = trace_entrylk,
- .lookup = trace_lookup,
- .setdents = trace_setdents,
- .getdents = trace_getdents,
- .checksum = trace_checksum,
- .xattrop = trace_xattrop,
- .fxattrop = trace_fxattrop,
-};
-
-struct xlator_mops mops = {
- .stats = trace_stats,
+ .stat = trace_stat,
+ .readlink = trace_readlink,
+ .mknod = trace_mknod,
+ .mkdir = trace_mkdir,
+ .unlink = trace_unlink,
+ .rmdir = trace_rmdir,
+ .symlink = trace_symlink,
+ .rename = trace_rename,
+ .link = trace_link,
+ .truncate = trace_truncate,
+ .open = trace_open,
+ .readv = trace_readv,
+ .writev = trace_writev,
+ .statfs = trace_statfs,
+ .flush = trace_flush,
+ .fsync = trace_fsync,
+ .setxattr = trace_setxattr,
+ .getxattr = trace_getxattr,
+ .fsetxattr = trace_fsetxattr,
+ .fgetxattr = trace_fgetxattr,
+ .removexattr = trace_removexattr,
+ .opendir = trace_opendir,
+ .readdir = trace_readdir,
+ .readdirp = trace_readdirp,
+ .fsyncdir = trace_fsyncdir,
+ .access = trace_access,
+ .ftruncate = trace_ftruncate,
+ .fstat = trace_fstat,
+ .create = trace_create,
+ .lk = trace_lk,
+ .inodelk = trace_inodelk,
+ .finodelk = trace_finodelk,
+ .entrylk = trace_entrylk,
+ .fentrylk = trace_fentrylk,
+ .lookup = trace_lookup,
+ .rchecksum = trace_rchecksum,
+ .xattrop = trace_xattrop,
+ .fxattrop = trace_fxattrop,
+ .setattr = trace_setattr,
+ .fsetattr = trace_fsetattr,
};
struct xlator_cbks cbks = {
+ .release = trace_release,
+ .releasedir = trace_releasedir,
+ .forget = trace_forget,
};
struct volume_options options[] = {
- { .key = {"include-ops", "include"},
- .type = GF_OPTION_TYPE_STR,
- /*.value = { ""} */
- },
- { .key = {"exclude-ops", "exclude"},
- .type = GF_OPTION_TYPE_STR
- /*.value = { ""} */
- },
- { .key = {NULL} },
+ { .key = {"include-ops", "include"},
+ .type = GF_OPTION_TYPE_STR,
+ /*.value = { ""} */
+ },
+ { .key = {"exclude-ops", "exclude"},
+ .type = GF_OPTION_TYPE_STR
+ /*.value = { ""} */
+ },
+ { .key = {"history-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .default_value = "1024",
+ },
+ { .key = {"log-file"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ },
+ { .key = {"log-history"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ },
+ { .key = {NULL} },
};
+struct xlator_dumpops dumpops = {
+ .history = trace_dump_history
+};
diff --git a/xlators/debug/trace/src/trace.h b/xlators/debug/trace/src/trace.h
new file mode 100644
index 000000000..045eefb36
--- /dev/null
+++ b/xlators/debug/trace/src/trace.h
@@ -0,0 +1,98 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <time.h>
+#include <errno.h>
+#include "glusterfs.h"
+#include "xlator.h"
+#include "common-utils.h"
+#include "event-history.h"
+#include "logging.h"
+#include "circ-buff.h"
+#include "statedump.h"
+#include "options.h"
+
+#define TRACE_DEFAULT_HISTORY_SIZE 1024
+
+typedef struct {
+ /* Since the longest fop name is fremovexattr i.e 12 characters, array size
+ * is kept 24, i.e double of the maximum.
+ */
+ char name[24];
+ int enabled;
+} trace_fop_name_t;
+
+trace_fop_name_t trace_fop_names[GF_FOP_MAXVALUE];
+
+typedef struct {
+ gf_boolean_t log_file;
+ gf_boolean_t log_history;
+ size_t history_size;
+ int trace_log_level;
+} trace_conf_t;
+
+#define TRACE_STACK_UNWIND(op, frame, params ...) \
+ do { \
+ frame->local = NULL; \
+ STACK_UNWIND_STRICT (op, frame, params); \
+ } while (0);
+
+#define LOG_ELEMENT(_conf, _string) \
+ do { \
+ if (_conf) { \
+ if ((_conf->log_history) == _gf_true) \
+ gf_log_eh ("%s", _string); \
+ if ((_conf->log_file) == _gf_true) \
+ gf_log (THIS->name, _conf->trace_log_level, \
+ "%s", _string); \
+ } \
+ } while (0);
+
+#define trace_stat_to_str(buf, statstr) \
+ do { \
+ char atime_buf[256] = {0,}; \
+ char mtime_buf[256] = {0,}; \
+ char ctime_buf[256] = {0,}; \
+ uint64_t ia_time = 0; \
+ \
+ if (!buf) \
+ break; \
+ \
+ ia_time = buf->ia_atime; \
+ strftime (atime_buf, 256, "[%b %d %H:%M:%S]", \
+ localtime ((time_t *)&ia_time)); \
+ \
+ ia_time = buf->ia_mtime; \
+ strftime (mtime_buf, 256, "[%b %d %H:%M:%S]", \
+ localtime ((time_t *)&ia_time)); \
+ \
+ ia_time = buf->ia_ctime; \
+ strftime (ctime_buf, 256, "[%b %d %H:%M:%S]", \
+ localtime ((time_t *)&ia_time)); \
+ \
+ snprintf (statstr, sizeof (statstr), \
+ "gfid=%s ino=%"PRIu64", mode=%o, " \
+ "nlink=%"GF_PRI_NLINK", uid=%u, " \
+ "gid=%u, size=%"PRIu64", " \
+ "blocks=%"PRIu64", atime=%s, " \
+ "mtime=%s, ctime=%s", \
+ uuid_utoa (buf->ia_gfid), buf->ia_ino, \
+ st_mode_from_ia (buf->ia_prot, \
+ buf->ia_type), \
+ buf->ia_nlink, buf->ia_uid, \
+ buf->ia_gid, buf->ia_size, \
+ buf->ia_blocks, atime_buf, \
+ mtime_buf, ctime_buf); \
+ } while (0);
diff --git a/xlators/encryption/Makefile.am b/xlators/encryption/Makefile.am
index 2cbde680f..36efc6698 100644
--- a/xlators/encryption/Makefile.am
+++ b/xlators/encryption/Makefile.am
@@ -1,3 +1,3 @@
-SUBDIRS = rot-13
+SUBDIRS = rot-13 crypt
CLEANFILES =
diff --git a/auth/addr/Makefile.am b/xlators/encryption/crypt/Makefile.am
index d471a3f92..d471a3f92 100644
--- a/auth/addr/Makefile.am
+++ b/xlators/encryption/crypt/Makefile.am
diff --git a/xlators/encryption/crypt/src/Makefile.am b/xlators/encryption/crypt/src/Makefile.am
new file mode 100644
index 000000000..faadd117f
--- /dev/null
+++ b/xlators/encryption/crypt/src/Makefile.am
@@ -0,0 +1,24 @@
+if ENABLE_CRYPT_XLATOR
+
+xlator_LTLIBRARIES = crypt.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/encryption
+
+crypt_la_LDFLAGS = -module -avoidversion -lssl -lcrypto
+
+crypt_la_SOURCES = keys.c data.c metadata.c atom.c crypt.c
+crypt_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = crypt-common.h crypt-mem-types.h crypt.h metadata.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
+
+else
+
+noinst_DIST = keys.c data.c metadata.c atom.c crypt.c
+noinst_HEADERS = crypt-common.h crypt-mem-types.h crypt.h metadata.h
+
+endif \ No newline at end of file
diff --git a/xlators/encryption/crypt/src/atom.c b/xlators/encryption/crypt/src/atom.c
new file mode 100644
index 000000000..1ec41495c
--- /dev/null
+++ b/xlators/encryption/crypt/src/atom.c
@@ -0,0 +1,962 @@
+/*
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "defaults.h"
+#include "crypt-common.h"
+#include "crypt.h"
+
+/*
+ * Glossary
+ *
+ *
+ * cblock (or cipher block). A logical unit in a file.
+ * cblock size is defined as the number of bits
+ * in an input (or output) block of the block
+ * cipher (*). Cipher block size is a property of
+ * cipher algorithm. E.g. cblock size is 64 bits
+ * for DES, 128 bits for AES, etc.
+ *
+ * atomic cipher A cipher algorithm, which requires some chunks of
+ * algorithm text to be padded at left and(or) right sides before
+ * cipher transaform.
+ *
+ *
+ * block (atom) Minimal chunk of file's data, which doesn't require
+ * padding. We'll consider logical units in a file of
+ * block size (atom size).
+ *
+ * cipher algorithm Atomic cipher algorithm, which requires the last
+ * with EOF issue incomplete cblock in a file to be padded with some
+ * data (usually zeros).
+ *
+ *
+ * operation, which reading/writing from offset, which is not aligned to
+ * forms a gap at to atom size
+ * the beginning
+ *
+ *
+ * operation, which reading/writing count bytes starting from offset off,
+ * forms a gap at so that off+count is not aligned to atom_size
+ * the end
+ *
+ * head block the first atom affected by an operation, which forms
+ * a gap at the beginning, or(and) at the end.
+ * Сomment. Head block has at least one gap (either at
+ * the beginning, or at the end)
+ *
+ *
+ * tail block the last atom different from head, affected by an
+ * operation, which forms a gap at the end.
+ * Сomment: Tail block has exactly one gap (at the end).
+ *
+ *
+ * partial block head or tail block
+ *
+ *
+ * full block block without gaps.
+ *
+ *
+ * (*) Recommendation for Block Cipher Modes of Operation
+ * Methods and Techniques
+ * NIST Special Publication 800-38A Edition 2001
+ */
+
+/*
+ * atom->offset_at()
+ */
+static off_t offset_at_head(struct avec_config *conf)
+{
+ return conf->aligned_offset;
+}
+
+static off_t offset_at_hole_head(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_at_head(get_hole_conf(frame));
+}
+
+static off_t offset_at_data_head(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_at_head(get_data_conf(frame));
+}
+
+
+static off_t offset_at_tail(struct avec_config *conf,
+ struct object_cipher_info *object)
+{
+ return conf->aligned_offset +
+ (conf->off_in_head ? get_atom_size(object) : 0) +
+ (conf->nr_full_blocks << get_atom_bits(object));
+}
+
+static off_t offset_at_hole_tail(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_at_tail(get_hole_conf(frame), object);
+}
+
+
+static off_t offset_at_data_tail(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_at_tail(get_data_conf(frame), object);
+}
+
+static off_t offset_at_full(struct avec_config *conf,
+ struct object_cipher_info *object)
+{
+ return conf->aligned_offset +
+ (conf->off_in_head ? get_atom_size(object) : 0);
+}
+
+static off_t offset_at_data_full(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_at_full(get_data_conf(frame), object);
+}
+
+static off_t offset_at_hole_full(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_at_full(get_hole_conf(frame), object);
+}
+
+/*
+ * atom->io_size_nopad()
+ */
+
+static uint32_t io_size_nopad_head(struct avec_config *conf,
+ struct object_cipher_info *object)
+{
+ uint32_t gap_at_beg;
+ uint32_t gap_at_end;
+
+ check_head_block(conf);
+
+ gap_at_beg = conf->off_in_head;
+
+ if (has_tail_block(conf) || has_full_blocks(conf) || conf->off_in_tail == 0 )
+ gap_at_end = 0;
+ else
+ gap_at_end = get_atom_size(object) - conf->off_in_tail;
+
+ return get_atom_size(object) - (gap_at_beg + gap_at_end);
+}
+
+static uint32_t io_size_nopad_tail(struct avec_config *conf,
+ struct object_cipher_info *object)
+{
+ check_tail_block(conf);
+ return conf->off_in_tail;
+}
+
+static uint32_t io_size_nopad_full(struct avec_config *conf,
+ struct object_cipher_info *object)
+{
+ check_full_block(conf);
+ return get_atom_size(object);
+}
+
+static uint32_t io_size_nopad_data_head(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return io_size_nopad_head(get_data_conf(frame), object);
+}
+
+static uint32_t io_size_nopad_hole_head(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return io_size_nopad_head(get_hole_conf(frame), object);
+}
+
+static uint32_t io_size_nopad_data_tail(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return io_size_nopad_tail(get_data_conf(frame), object);
+}
+
+static uint32_t io_size_nopad_hole_tail(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return io_size_nopad_tail(get_hole_conf(frame), object);
+}
+
+static uint32_t io_size_nopad_data_full(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return io_size_nopad_full(get_data_conf(frame), object);
+}
+
+static uint32_t io_size_nopad_hole_full(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return io_size_nopad_full(get_hole_conf(frame), object);
+}
+
+static uint32_t offset_in_head(struct avec_config *conf)
+{
+ check_cursor_head(conf);
+
+ return conf->off_in_head;
+}
+
+static uint32_t offset_in_tail(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return 0;
+}
+
+static uint32_t offset_in_full(struct avec_config *conf,
+ struct object_cipher_info *object)
+{
+ check_cursor_full(conf);
+
+ if (has_head_block(conf))
+ return (conf->cursor - 1) << get_atom_bits(object);
+ else
+ return conf->cursor << get_atom_bits(object);
+}
+
+static uint32_t offset_in_data_head(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_in_head(get_data_conf(frame));
+}
+
+static uint32_t offset_in_hole_head(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_in_head(get_hole_conf(frame));
+}
+
+static uint32_t offset_in_data_full(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_in_full(get_data_conf(frame), object);
+}
+
+static uint32_t offset_in_hole_full(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return offset_in_full(get_hole_conf(frame), object);
+}
+
+/*
+ * atom->rmw()
+ */
+/*
+ * Pre-conditions:
+ * @vec contains plain text of the latest
+ * version.
+ *
+ * Uptodate gaps of the @partial block with
+ * this plain text, encrypt the whole block
+ * and write the result to disk.
+ */
+static int32_t rmw_partial_block(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vec,
+ int32_t count,
+ struct iatt *stbuf,
+ struct iobref *iobref,
+ struct rmw_atom *atom)
+{
+ size_t was_read = 0;
+ uint64_t file_size;
+ crypt_local_t *local = frame->local;
+ struct object_cipher_info *object = &local->info->cinfo;
+
+ struct iovec *partial = atom->get_iovec(frame, 0);
+ struct avec_config *conf = atom->get_config(frame);
+ end_writeback_handler_t end_writeback_partial_block;
+#if DEBUG_CRYPT
+ gf_boolean_t check_last_cblock = _gf_false;
+#endif
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret < 0)
+ goto exit;
+
+ file_size = local->cur_file_size;
+ was_read = op_ret;
+
+ if (atom->locality == HEAD_ATOM && conf->off_in_head) {
+ /*
+ * head atom with a non-uptodate gap
+ * at the beginning
+ *
+ * fill the gap with plain text of the
+ * latest version. Convert a part of hole
+ * (if any) to zeros.
+ */
+ int32_t i;
+ int32_t copied = 0;
+ int32_t to_gap; /* amount of data needed to uptodate
+ the gap at the beginning */
+#if 0
+ int32_t hole = 0; /* The part of the hole which
+ * got in the head block */
+#endif /* 0 */
+ to_gap = conf->off_in_head;
+
+ if (was_read < to_gap) {
+ if (file_size >
+ offset_at_head(conf) + was_read) {
+ /*
+ * It is impossible to uptodate
+ * head block: too few bytes have
+ * been read from disk, so that
+ * partial write is impossible.
+ *
+ * It could happen because of many
+ * reasons: IO errors, (meta)data
+ * corruption in the local file system,
+ * etc.
+ */
+ gf_log(this->name, GF_LOG_WARNING,
+ "Can not uptodate a gap at the beginning");
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto exit;
+ }
+#if 0
+ hole = to_gap - was_read;
+#endif /* 0 */
+ to_gap = was_read;
+ }
+ /*
+ * uptodate the gap at the beginning
+ */
+ for (i = 0; i < count && copied < to_gap; i++) {
+ int32_t to_copy;
+
+ to_copy = vec[i].iov_len;
+ if (to_copy > to_gap - copied)
+ to_copy = to_gap - copied;
+
+ memcpy(partial->iov_base, vec[i].iov_base, to_copy);
+ copied += to_copy;
+ }
+#if 0
+ /*
+ * If possible, convert part of the
+ * hole, which got in the head block
+ */
+ ret = TRY_LOCK(&local->hole_lock);
+ if (!ret) {
+ if (local->hole_handled)
+ /*
+ * already converted by
+ * crypt_writev_cbk()
+ */
+ UNLOCK(&local->hole_lock);
+ else {
+ /*
+ * convert the part of the hole
+ * which got in the head block
+ * to zeros.
+ *
+ * Update the orig_offset to make
+ * sure writev_cbk() won't care
+ * about this part of the hole.
+ *
+ */
+ memset(partial->iov_base + to_gap, 0, hole);
+
+ conf->orig_offset -= hole;
+ conf->orig_size += hole;
+ UNLOCK(&local->hole_lock);
+ }
+ }
+ else /*
+ * conversion is being performed
+ * by crypt_writev_cbk()
+ */
+ ;
+#endif /* 0 */
+ }
+ if (atom->locality == TAIL_ATOM ||
+ (!has_tail_block(conf) && conf->off_in_tail)) {
+ /*
+ * tail atom, or head atom with a non-uptodate
+ * gap at the end.
+ *
+ * fill the gap at the end of the block
+ * with plain text of the latest version.
+ * Pad the result, (if needed)
+ */
+ int32_t i;
+ int32_t to_gap;
+ int copied;
+ off_t off_in_tail;
+ int32_t to_copy;
+
+ off_in_tail = conf->off_in_tail;
+ to_gap = conf->gap_in_tail;
+
+ if (to_gap && was_read < off_in_tail + to_gap) {
+ /*
+ * It is impossible to uptodate
+ * the gap at the end: too few bytes
+ * have been read from disk, so that
+ * partial write is impossible.
+ *
+ * It could happen because of many
+ * reasons: IO errors, (meta)data
+ * corruption in the local file system,
+ * etc.
+ */
+ gf_log(this->name, GF_LOG_WARNING,
+ "Can not uptodate a gap at the end");
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto exit;
+ }
+ /*
+ * uptodate the gap at the end
+ */
+ copied = 0;
+ to_copy = to_gap;
+ for(i = count - 1; i >= 0 && to_copy > 0; i--) {
+ uint32_t from_vec, off_in_vec;
+
+ off_in_vec = 0;
+ from_vec = vec[i].iov_len;
+ if (from_vec > to_copy) {
+ off_in_vec = from_vec - to_copy;
+ from_vec = to_copy;
+ }
+ memcpy(partial->iov_base +
+ off_in_tail + to_gap - copied - from_vec,
+ vec[i].iov_base + off_in_vec,
+ from_vec);
+
+ gf_log(this->name, GF_LOG_DEBUG,
+ "uptodate %d bytes at tail. Offset at target(source): %d(%d)",
+ (int)from_vec,
+ (int)off_in_tail + to_gap - copied - from_vec,
+ (int)off_in_vec);
+
+ copied += from_vec;
+ to_copy -= from_vec;
+ }
+ partial->iov_len = off_in_tail + to_gap;
+
+ if (object_alg_should_pad(object)) {
+ int32_t resid = 0;
+ resid = partial->iov_len & (object_alg_blksize(object) - 1);
+ if (resid) {
+ /*
+ * append a new EOF padding
+ */
+ local->eof_padding_size =
+ object_alg_blksize(object) - resid;
+
+ gf_log(this->name, GF_LOG_DEBUG,
+ "set padding size %d",
+ local->eof_padding_size);
+
+ memset(partial->iov_base + partial->iov_len,
+ 1,
+ local->eof_padding_size);
+ partial->iov_len += local->eof_padding_size;
+#if DEBUG_CRYPT
+ gf_log(this->name, GF_LOG_DEBUG,
+ "pad cblock with %d zeros:",
+ local->eof_padding_size);
+ dump_cblock(this,
+ (unsigned char *)partial->iov_base +
+ partial->iov_len - object_alg_blksize(object));
+ check_last_cblock = _gf_true;
+#endif
+ }
+ }
+ }
+ /*
+ * encrypt the whole block
+ */
+ encrypt_aligned_iov(object,
+ partial,
+ 1,
+ atom->offset_at(frame, object));
+#if DEBUG_CRYPT
+ if (check_last_cblock == _gf_true) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "encrypt last cblock with offset %llu",
+ (unsigned long long)atom->offset_at(frame, object));
+ dump_cblock(this, (unsigned char *)partial->iov_base +
+ partial->iov_len - object_alg_blksize(object));
+ }
+#endif
+ set_local_io_params_writev(frame, object, atom,
+ atom->offset_at(frame, object),
+ iovec_get_size(partial, 1));
+ /*
+ * write the whole block to disk
+ */
+ end_writeback_partial_block = dispatch_end_writeback(local->fop);
+ conf->cursor ++;
+ STACK_WIND(frame,
+ end_writeback_partial_block,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev,
+ local->fd,
+ partial,
+ 1,
+ atom->offset_at(frame, object),
+ local->flags,
+ local->iobref_data,
+ local->xdata);
+
+ gf_log("crypt", GF_LOG_DEBUG,
+ "submit partial block: %d bytes from %d offset",
+ (int)iovec_get_size(partial, 1),
+ (int)atom->offset_at(frame, object));
+ exit:
+ return 0;
+}
+
+/*
+ * Perform a (read-)modify-write sequence.
+ * This should be performed only after approval
+ * of upper server-side manager, i.e. the caller
+ * needs to make sure this is his turn to rmw.
+ */
+void submit_partial(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ atom_locality_type ltype)
+{
+ int32_t ret;
+ dict_t *dict;
+ struct rmw_atom *atom;
+ crypt_local_t *local = frame->local;
+ struct object_cipher_info *object = &local->info->cinfo;
+
+ atom = atom_by_types(local->active_setup, ltype);
+ /*
+ * To perform the "read" component of the read-modify-write
+ * sequence the crypt translator does stack_wind to itself.
+ *
+ * Pass current file size to crypt_readv()
+ */
+ dict = dict_new();
+ if (!dict) {
+ /*
+ * FIXME: Handle the error
+ */
+ gf_log("crypt", GF_LOG_WARNING, "Can not alloc dict");
+ return;
+ }
+ ret = dict_set(dict,
+ FSIZE_XATTR_PREFIX,
+ data_from_uint64(local->cur_file_size));
+ if (ret) {
+ /*
+ * FIXME: Handle the error
+ */
+ dict_unref(dict);
+ gf_log("crypt", GF_LOG_WARNING, "Can not set dict");
+ goto exit;
+ }
+ STACK_WIND(frame,
+ atom->rmw,
+ this,
+ this->fops->readv, /* crypt_readv */
+ fd,
+ atom->count_to_uptodate(frame, object), /* count */
+ atom->offset_at(frame, object), /* offset to read from */
+ 0,
+ dict);
+ exit:
+ dict_unref(dict);
+}
+
+/*
+ * submit blocks of FULL_ATOM type
+ */
+void submit_full(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+ struct object_cipher_info *object = &local->info->cinfo;
+ struct rmw_atom *atom = atom_by_types(local->active_setup, FULL_ATOM);
+ uint32_t count; /* total number of full blocks to submit */
+ uint32_t granularity; /* number of blocks to submit in one iteration */
+
+ uint64_t off_in_file; /* start offset in the file, bytes */
+ uint32_t off_in_atom; /* start offset in the atom, blocks */
+ uint32_t blocks_written = 0; /* blocks written for this submit */
+
+ struct avec_config *conf = atom->get_config(frame);
+ end_writeback_handler_t end_writeback_full_block;
+ /*
+ * Write full blocks by groups of granularity size.
+ */
+ end_writeback_full_block = dispatch_end_writeback(local->fop);
+
+ if (is_ordered_mode(frame)) {
+ uint32_t skip = has_head_block(conf) ? 1 : 0;
+ count = 1;
+ granularity = 1;
+ /*
+ * calculate start offset using cursor value;
+ * here we should take into accout head block,
+ * which corresponds to cursor value 0.
+ */
+ off_in_file = atom->offset_at(frame, object) +
+ ((conf->cursor - skip) << get_atom_bits(object));
+ off_in_atom = conf->cursor - skip;
+ }
+ else {
+ /*
+ * in parallel mode
+ */
+ count = conf->nr_full_blocks;
+ granularity = MAX_IOVEC;
+ off_in_file = atom->offset_at(frame, object);
+ off_in_atom = 0;
+ }
+ while (count) {
+ uint32_t blocks_to_write = count;
+
+ if (blocks_to_write > granularity)
+ blocks_to_write = granularity;
+ if (conf->type == HOLE_ATOM)
+ /*
+ * reset iovec before encryption
+ */
+ memset(atom->get_iovec(frame, 0)->iov_base,
+ 0,
+ get_atom_size(object));
+ /*
+ * encrypt the group
+ */
+ encrypt_aligned_iov(object,
+ atom->get_iovec(frame,
+ off_in_atom +
+ blocks_written),
+ blocks_to_write,
+ off_in_file + (blocks_written <<
+ get_atom_bits(object)));
+
+ set_local_io_params_writev(frame, object, atom,
+ off_in_file + (blocks_written << get_atom_bits(object)),
+ blocks_to_write << get_atom_bits(object));
+
+ conf->cursor += blocks_to_write;
+
+ STACK_WIND(frame,
+ end_writeback_full_block,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev,
+ local->fd,
+ atom->get_iovec(frame, off_in_atom + blocks_written),
+ blocks_to_write,
+ off_in_file + (blocks_written << get_atom_bits(object)),
+ local->flags,
+ local->iobref_data ? local->iobref_data : local->iobref,
+ local->xdata);
+
+ gf_log("crypt", GF_LOG_DEBUG, "submit %d full blocks from %d offset",
+ blocks_to_write,
+ (int)(off_in_file + (blocks_written << get_atom_bits(object))));
+
+ count -= blocks_to_write;
+ blocks_written += blocks_to_write;
+ }
+ return;
+}
+
+static int32_t rmw_data_head(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vec,
+ int32_t count,
+ struct iatt *stbuf,
+ struct iobref *iobref,
+ dict_t *xdata)
+{
+ return rmw_partial_block(frame,
+ cookie,
+ this,
+ op_ret,
+ op_errno,
+ vec,
+ count,
+ stbuf,
+ iobref,
+ atom_by_types(DATA_ATOM, HEAD_ATOM));
+}
+
+static int32_t rmw_data_tail(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vec,
+ int32_t count,
+ struct iatt *stbuf,
+ struct iobref *iobref,
+ dict_t *xdata)
+{
+ return rmw_partial_block(frame,
+ cookie,
+ this,
+ op_ret,
+ op_errno,
+ vec,
+ count,
+ stbuf,
+ iobref,
+ atom_by_types(DATA_ATOM, TAIL_ATOM));
+}
+
+static int32_t rmw_hole_head(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vec,
+ int32_t count,
+ struct iatt *stbuf,
+ struct iobref *iobref,
+ dict_t *xdata)
+{
+ return rmw_partial_block(frame,
+ cookie,
+ this,
+ op_ret,
+ op_errno,
+ vec,
+ count,
+ stbuf,
+ iobref,
+ atom_by_types(HOLE_ATOM, HEAD_ATOM));
+}
+
+static int32_t rmw_hole_tail(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vec,
+ int32_t count,
+ struct iatt *stbuf,
+ struct iobref *iobref,
+ dict_t *xdata)
+{
+ return rmw_partial_block(frame,
+ cookie,
+ this,
+ op_ret,
+ op_errno,
+ vec,
+ count,
+ stbuf,
+ iobref,
+ atom_by_types(HOLE_ATOM, TAIL_ATOM));
+}
+
+/*
+ * atom->count_to_uptodate()
+ */
+static uint32_t count_to_uptodate_head(struct avec_config *conf,
+ struct object_cipher_info *object)
+{
+ if (conf->acount == 1 && conf->off_in_tail)
+ return get_atom_size(object);
+ else
+ /* there is no need to read the whole head block */
+ return conf->off_in_head;
+}
+
+static uint32_t count_to_uptodate_tail(struct avec_config *conf,
+ struct object_cipher_info *object)
+{
+ /* we need to read the whole tail block */
+ return get_atom_size(object);
+}
+
+static uint32_t count_to_uptodate_data_head(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return count_to_uptodate_head(get_data_conf(frame), object);
+}
+
+static uint32_t count_to_uptodate_data_tail(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return count_to_uptodate_tail(get_data_conf(frame), object);
+}
+
+static uint32_t count_to_uptodate_hole_head(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return count_to_uptodate_head(get_hole_conf(frame), object);
+}
+
+static uint32_t count_to_uptodate_hole_tail(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ return count_to_uptodate_tail(get_hole_conf(frame), object);
+}
+
+/* atom->get_config() */
+
+static struct avec_config *get_config_data(call_frame_t *frame)
+{
+ return &((crypt_local_t *)frame->local)->data_conf;
+}
+
+static struct avec_config *get_config_hole(call_frame_t *frame)
+{
+ return &((crypt_local_t *)frame->local)->hole_conf;
+}
+
+/*
+ * atom->get_iovec()
+ */
+static struct iovec *get_iovec_hole_head(call_frame_t *frame,
+ uint32_t count)
+{
+ struct avec_config *conf = get_hole_conf(frame);
+
+ return conf->avec;
+}
+
+static struct iovec *get_iovec_hole_full(call_frame_t *frame,
+ uint32_t count)
+{
+ struct avec_config *conf = get_hole_conf(frame);
+
+ return conf->avec + (conf->off_in_head ? 1 : 0);
+}
+
+static inline struct iovec *get_iovec_hole_tail(call_frame_t *frame,
+ uint32_t count)
+{
+ struct avec_config *conf = get_hole_conf(frame);
+
+ return conf->avec + (conf->blocks_in_pool - 1);
+}
+
+static inline struct iovec *get_iovec_data_head(call_frame_t *frame,
+ uint32_t count)
+{
+ struct avec_config *conf = get_data_conf(frame);
+
+ return conf->avec;
+}
+
+static inline struct iovec *get_iovec_data_full(call_frame_t *frame,
+ uint32_t count)
+{
+ struct avec_config *conf = get_data_conf(frame);
+
+ return conf->avec + (conf->off_in_head ? 1 : 0) + count;
+}
+
+static inline struct iovec *get_iovec_data_tail(call_frame_t *frame,
+ uint32_t count)
+{
+ struct avec_config *conf = get_data_conf(frame);
+
+ return conf->avec +
+ (conf->off_in_head ? 1 : 0) +
+ conf->nr_full_blocks;
+}
+
+static struct rmw_atom atoms[LAST_DATA_TYPE][LAST_LOCALITY_TYPE] = {
+ [DATA_ATOM][HEAD_ATOM] =
+ { .locality = HEAD_ATOM,
+ .rmw = rmw_data_head,
+ .offset_at = offset_at_data_head,
+ .offset_in = offset_in_data_head,
+ .get_iovec = get_iovec_data_head,
+ .io_size_nopad = io_size_nopad_data_head,
+ .count_to_uptodate = count_to_uptodate_data_head,
+ .get_config = get_config_data
+ },
+ [DATA_ATOM][TAIL_ATOM] =
+ { .locality = TAIL_ATOM,
+ .rmw = rmw_data_tail,
+ .offset_at = offset_at_data_tail,
+ .offset_in = offset_in_tail,
+ .get_iovec = get_iovec_data_tail,
+ .io_size_nopad = io_size_nopad_data_tail,
+ .count_to_uptodate = count_to_uptodate_data_tail,
+ .get_config = get_config_data
+ },
+ [DATA_ATOM][FULL_ATOM] =
+ { .locality = FULL_ATOM,
+ .offset_at = offset_at_data_full,
+ .offset_in = offset_in_data_full,
+ .get_iovec = get_iovec_data_full,
+ .io_size_nopad = io_size_nopad_data_full,
+ .get_config = get_config_data
+ },
+ [HOLE_ATOM][HEAD_ATOM] =
+ { .locality = HEAD_ATOM,
+ .rmw = rmw_hole_head,
+ .offset_at = offset_at_hole_head,
+ .offset_in = offset_in_hole_head,
+ .get_iovec = get_iovec_hole_head,
+ .io_size_nopad = io_size_nopad_hole_head,
+ .count_to_uptodate = count_to_uptodate_hole_head,
+ .get_config = get_config_hole
+ },
+ [HOLE_ATOM][TAIL_ATOM] =
+ { .locality = TAIL_ATOM,
+ .rmw = rmw_hole_tail,
+ .offset_at = offset_at_hole_tail,
+ .offset_in = offset_in_tail,
+ .get_iovec = get_iovec_hole_tail,
+ .io_size_nopad = io_size_nopad_hole_tail,
+ .count_to_uptodate = count_to_uptodate_hole_tail,
+ .get_config = get_config_hole
+ },
+ [HOLE_ATOM][FULL_ATOM] =
+ { .locality = FULL_ATOM,
+ .offset_at = offset_at_hole_full,
+ .offset_in = offset_in_hole_full,
+ .get_iovec = get_iovec_hole_full,
+ .io_size_nopad = io_size_nopad_hole_full,
+ .get_config = get_config_hole
+ }
+};
+
+struct rmw_atom *atom_by_types(atom_data_type data,
+ atom_locality_type locality)
+{
+ return &atoms[data][locality];
+}
+
+/*
+ Local variables:
+ c-indentation-style: "K&R"
+ mode-name: "LC"
+ c-basic-offset: 8
+ tab-width: 8
+ fill-column: 80
+ scroll-step: 1
+ End:
+*/
diff --git a/xlators/encryption/crypt/src/crypt-common.h b/xlators/encryption/crypt/src/crypt-common.h
new file mode 100644
index 000000000..7c212ad5d
--- /dev/null
+++ b/xlators/encryption/crypt/src/crypt-common.h
@@ -0,0 +1,141 @@
+/*
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __CRYPT_COMMON_H__
+#define __CRYPT_COMMON_H__
+
+#define INVAL_SUBVERSION_NUMBER (0xff)
+#define CRYPT_INVAL_OP (GF_FOP_NULL)
+
+#define CRYPTO_FORMAT_PREFIX "trusted.glusterfs.crypt.att.cfmt"
+#define FSIZE_XATTR_PREFIX "trusted.glusterfs.crypt.att.size"
+#define SUBREQ_PREFIX "trusted.glusterfs.crypt.msg.sreq"
+#define FSIZE_MSG_PREFIX "trusted.glusterfs.crypt.msg.size"
+#define DE_MSG_PREFIX "trusted.glusterfs.crypt.msg.dent"
+#define REQUEST_ID_PREFIX "trusted.glusterfs.crypt.msg.rqid"
+#define MSGFLAGS_PREFIX "trusted.glusterfs.crypt.msg.xfgs"
+
+
+/* messages for crypt_open() */
+#define MSGFLAGS_REQUEST_MTD_RLOCK 1 /* take read lock and don't unlock */
+#define MSGFLAGS_REQUEST_MTD_WLOCK 2 /* take write lock and don't unlock */
+
+#define AES_BLOCK_BITS (4) /* AES_BLOCK_SIZE == 1 << AES_BLOCK_BITS */
+
+#define noop do {; } while (0)
+#define cassert(cond) ({ switch (-1) { case (cond): case 0: break; } })
+#define __round_mask(x, y) ((__typeof__(x))((y)-1))
+#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
+
+/*
+ * Format of file's metadata
+ */
+struct crypt_format {
+ uint8_t loader_id; /* version of metadata loader */
+ uint8_t versioned[0]; /* file's metadata of specific version */
+} __attribute__((packed));
+
+typedef enum {
+ AES_CIPHER_ALG,
+ LAST_CIPHER_ALG
+} cipher_alg_t;
+
+typedef enum {
+ XTS_CIPHER_MODE,
+ LAST_CIPHER_MODE
+} cipher_mode_t;
+
+typedef enum {
+ MTD_LOADER_V1,
+ LAST_MTD_LOADER
+} mtd_loader_id;
+
+static inline void msgflags_set_mtd_rlock(uint32_t *flags)
+{
+ *flags |= MSGFLAGS_REQUEST_MTD_RLOCK;
+}
+
+static inline void msgflags_set_mtd_wlock(uint32_t *flags)
+{
+ *flags |= MSGFLAGS_REQUEST_MTD_WLOCK;
+}
+
+static inline gf_boolean_t msgflags_check_mtd_rlock(uint32_t *flags)
+{
+ return *flags & MSGFLAGS_REQUEST_MTD_RLOCK;
+}
+
+static inline gf_boolean_t msgflags_check_mtd_wlock(uint32_t *flags)
+{
+ return *flags & MSGFLAGS_REQUEST_MTD_WLOCK;
+}
+
+static inline gf_boolean_t msgflags_check_mtd_lock(uint32_t *flags)
+{
+ return msgflags_check_mtd_rlock(flags) ||
+ msgflags_check_mtd_wlock(flags);
+}
+
+/*
+ * returns number of logical blocks occupied
+ * (maybe partially) by @count bytes
+ * at offset @start.
+ */
+static inline off_t logical_blocks_occupied(uint64_t start, off_t count,
+ int blkbits)
+{
+ return ((start + count - 1) >> blkbits) - (start >> blkbits) + 1;
+}
+
+/*
+ * are two bytes (represented by offsets @off1
+ * and @off2 respectively) in the same logical
+ * block.
+ */
+static inline int in_same_lblock(uint64_t off1, uint64_t off2,
+ int blkbits)
+{
+ return off1 >> blkbits == off2 >> blkbits;
+}
+
+static inline void dump_cblock(xlator_t *this, unsigned char *buf)
+{
+ gf_log(this->name, GF_LOG_DEBUG,
+ "dump cblock: %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x",
+ (buf)[0],
+ (buf)[1],
+ (buf)[2],
+ (buf)[3],
+ (buf)[4],
+ (buf)[5],
+ (buf)[6],
+ (buf)[7],
+ (buf)[8],
+ (buf)[9],
+ (buf)[10],
+ (buf)[11],
+ (buf)[12],
+ (buf)[13],
+ (buf)[14],
+ (buf)[15]);
+}
+
+#endif /* __CRYPT_COMMON_H__ */
+
+/*
+ Local variables:
+ c-indentation-style: "K&R"
+ mode-name: "LC"
+ c-basic-offset: 8
+ tab-width: 8
+ fill-column: 80
+ scroll-step: 1
+ End:
+*/
diff --git a/xlators/encryption/crypt/src/crypt-mem-types.h b/xlators/encryption/crypt/src/crypt-mem-types.h
new file mode 100644
index 000000000..799727573
--- /dev/null
+++ b/xlators/encryption/crypt/src/crypt-mem-types.h
@@ -0,0 +1,43 @@
+/*
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef __CRYPT_MEM_TYPES_H__
+#define __CRYPT_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_crypt_mem_types_ {
+ gf_crypt_mt_priv = gf_common_mt_end + 1,
+ gf_crypt_mt_inode,
+ gf_crypt_mt_data,
+ gf_crypt_mt_mtd,
+ gf_crypt_mt_loc,
+ gf_crypt_mt_iatt,
+ gf_crypt_mt_key,
+ gf_crypt_mt_iovec,
+ gf_crypt_mt_char,
+};
+
+#endif /* __CRYPT_MEM_TYPES_H__ */
+
+/*
+ Local variables:
+ c-indentation-style: "K&R"
+ mode-name: "LC"
+ c-basic-offset: 8
+ tab-width: 8
+ fill-column: 80
+ scroll-step: 1
+ End:
+*/
+
+
+
diff --git a/xlators/encryption/crypt/src/crypt.c b/xlators/encryption/crypt/src/crypt.c
new file mode 100644
index 000000000..db2e6d83c
--- /dev/null
+++ b/xlators/encryption/crypt/src/crypt.c
@@ -0,0 +1,4498 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <ctype.h>
+#include <sys/uio.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "xlator.h"
+#include "logging.h"
+#include "defaults.h"
+
+#include "crypt-common.h"
+#include "crypt.h"
+
+static void init_inode_info_head(struct crypt_inode_info *info, fd_t *fd);
+static int32_t init_inode_info_tail(struct crypt_inode_info *info,
+ struct master_cipher_info *master);
+static int32_t prepare_for_submit_hole(call_frame_t *frame, xlator_t *this,
+ uint64_t from, off_t size);
+static int32_t load_file_size(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *dict, dict_t *xdata);
+static void do_ordered_submit(call_frame_t *frame, xlator_t *this,
+ atom_data_type dtype);
+static void do_parallel_submit(call_frame_t *frame, xlator_t *this,
+ atom_data_type dtype);
+static void put_one_call_open(call_frame_t *frame);
+static void put_one_call_readv(call_frame_t *frame, xlator_t *this);
+static void put_one_call_writev(call_frame_t *frame, xlator_t *this);
+static void put_one_call_ftruncate(call_frame_t *frame, xlator_t *this);
+static void free_avec(struct iovec *avec, char **pool, int blocks_in_pool);
+static void free_avec_data(crypt_local_t *local);
+static void free_avec_hole(crypt_local_t *local);
+
+static crypt_local_t *crypt_alloc_local(call_frame_t *frame, xlator_t *this,
+ glusterfs_fop_t fop)
+{
+ crypt_local_t *local = NULL;
+
+ local = mem_get0(this->local_pool);
+ if (!local) {
+ gf_log(this->name, GF_LOG_ERROR, "out of memory");
+ return NULL;
+ }
+ local->fop = fop;
+ LOCK_INIT(&local->hole_lock);
+ LOCK_INIT(&local->call_lock);
+ LOCK_INIT(&local->rw_count_lock);
+
+ frame->local = local;
+ return local;
+}
+
+struct crypt_inode_info *get_crypt_inode_info(inode_t *inode, xlator_t *this)
+{
+ int ret;
+ uint64_t value = 0;
+ struct crypt_inode_info *info;
+
+ ret = inode_ctx_get(inode, this, &value);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Can not get inode info");
+ return NULL;
+ }
+ info = (struct crypt_inode_info *)(long)value;
+ if (info == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Can not obtain inode info");
+ return NULL;
+ }
+ return info;
+}
+
+static struct crypt_inode_info *local_get_inode_info(crypt_local_t *local,
+ xlator_t *this)
+{
+ if (local->info)
+ return local->info;
+ local->info = get_crypt_inode_info(local->fd->inode, this);
+ return local->info;
+}
+
+static struct crypt_inode_info *alloc_inode_info(crypt_local_t *local,
+ loc_t *loc)
+{
+ struct crypt_inode_info *info;
+
+ info = GF_CALLOC(1, sizeof(*info), gf_crypt_mt_inode);
+ if (!info) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ gf_log ("crypt", GF_LOG_WARNING,
+ "Can not allocate inode info");
+ return NULL;
+ }
+ memset(info, 0, sizeof(*info));
+#if DEBUG_CRYPT
+ info->loc = GF_CALLOC(1, sizeof(*loc), gf_crypt_mt_loc);
+ if (!info->loc) {
+ gf_log("crypt", GF_LOG_WARNING, "Can not allocate loc");
+ GF_FREE(info);
+ return NULL;
+ }
+ if (loc_copy(info->loc, loc)){
+ GF_FREE(info->loc);
+ GF_FREE(info);
+ return NULL;
+ }
+#endif /* DEBUG_CRYPT */
+
+ local->info = info;
+ return info;
+}
+
+static void free_inode_info(struct crypt_inode_info *info)
+{
+#if DEBUG_CRYPT
+ loc_wipe(info->loc);
+ GF_FREE(info->loc);
+#endif
+ memset(info, 0, sizeof(*info));
+ GF_FREE(info);
+}
+
+int crypt_forget (xlator_t *this, inode_t *inode)
+{
+ uint64_t ctx_addr = 0;
+ if (!inode_ctx_del (inode, this, &ctx_addr))
+ free_inode_info((struct crypt_inode_info *)(long)ctx_addr);
+ return 0;
+}
+
+#if DEBUG_CRYPT
+static void check_read(call_frame_t *frame, xlator_t *this, int32_t read,
+ struct iovec *vec, int32_t count, struct iatt *stbuf)
+{
+ crypt_local_t *local = frame->local;
+ struct object_cipher_info *object = get_object_cinfo(local->info);
+ struct avec_config *conf = &local->data_conf;
+ uint32_t resid = stbuf->ia_size & (object_alg_blksize(object) - 1);
+
+ if (read <= 0)
+ return;
+ if (read != iovec_get_size(vec, count))
+ gf_log ("crypt", GF_LOG_DEBUG,
+ "op_ret differs from amount of read bytes");
+
+ if (object_alg_should_pad(object) && (read & (object_alg_blksize(object) - 1)))
+ gf_log ("crypt", GF_LOG_DEBUG,
+ "bad amount of read bytes (!= 0 mod(cblock size))");
+
+ if (conf->aligned_offset + read >
+ stbuf->ia_size + (resid ? object_alg_blksize(object) - resid : 0))
+ gf_log ("crypt", GF_LOG_DEBUG,
+ "bad amount of read bytes (too large))");
+
+}
+
+#define PT_BYTES_TO_DUMP (32)
+static void dump_plain_text(crypt_local_t *local, struct iovec *avec)
+{
+ int32_t to_dump;
+ char str[PT_BYTES_TO_DUMP + 1];
+
+ if (!avec)
+ return;
+ to_dump = avec->iov_len;
+ if (to_dump > PT_BYTES_TO_DUMP)
+ to_dump = PT_BYTES_TO_DUMP;
+ memcpy(str, avec->iov_base, to_dump);
+ memset(str + to_dump, '0', 1);
+ gf_log("crypt", GF_LOG_DEBUG, "Read file: %s", str);
+}
+
+static int32_t data_conf_invariant(struct avec_config *conf)
+{
+ return conf->acount ==
+ !!has_head_block(conf) +
+ !!has_tail_block(conf)+
+ conf->nr_full_blocks;
+}
+
+static int32_t hole_conf_invariant(struct avec_config *conf)
+{
+ return conf->blocks_in_pool ==
+ !!has_head_block(conf) +
+ !!has_tail_block(conf)+
+ !!has_full_blocks(conf);
+}
+
+static void crypt_check_conf(struct avec_config *conf)
+{
+ int32_t ret = 0;
+ const char *msg;
+
+ switch (conf->type) {
+ case DATA_ATOM:
+ msg = "data";
+ ret = data_conf_invariant(conf);
+ break;
+ case HOLE_ATOM:
+ msg = "hole";
+ ret = hole_conf_invariant(conf);
+ break;
+ default:
+ msg = "unknown";
+ }
+ if (!ret)
+ gf_log("crypt", GF_LOG_DEBUG, "bad %s conf", msg);
+}
+
+static void check_buf(call_frame_t *frame, xlator_t *this, struct iatt *buf)
+{
+ crypt_local_t *local = frame->local;
+ struct object_cipher_info *object = &local->info->cinfo;
+ uint64_t local_file_size;
+
+ switch(local->fop) {
+ case GF_FOP_FTRUNCATE:
+ return;
+ case GF_FOP_WRITE:
+ local_file_size = local->new_file_size;
+ break;
+ case GF_FOP_READ:
+ if (parent_is_crypt_xlator(frame, this))
+ return;
+ local_file_size = local->cur_file_size;
+ break;
+ default:
+ gf_log("crypt", GF_LOG_DEBUG, "bad file operation");
+ return;
+ }
+ if (buf->ia_size != round_up(local_file_size,
+ object_alg_blksize(object)))
+ gf_log("crypt", GF_LOG_DEBUG,
+ "bad ia_size in buf (%llu), should be %llu",
+ (unsigned long long)buf->ia_size,
+ (unsigned long long)round_up(local_file_size,
+ object_alg_blksize(object)));
+}
+
+#else
+#define check_read(frame, this, op_ret, vec, count, stbuf) noop
+#define dump_plain_text(local, avec) noop
+#define crypt_check_conf(conf) noop
+#define check_buf(frame, this, buf) noop
+#endif /* DEBUG_CRYPT */
+
+/*
+ * Pre-conditions:
+ * @vec represents a ciphertext of expanded size and
+ * aligned offset.
+ *
+ * Compound a temporal vector @avec with block-aligned
+ * components, decrypt and fix it up to represent a chunk
+ * of data corresponding to the original size and offset.
+ * Pass the result to the next translator.
+ */
+int32_t crypt_readv_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vec,
+ int32_t count,
+ struct iatt *stbuf,
+ struct iobref *iobref,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf = &local->data_conf;
+ struct object_cipher_info *object = &local->info->cinfo;
+
+ struct iovec *avec;
+ uint32_t i;
+ uint32_t to_vec;
+ uint32_t to_user;
+
+ check_buf(frame, this, stbuf);
+ check_read(frame, this, op_ret, vec, count, stbuf);
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ local->iobref = iobref_ref(iobref);
+
+ local->buf = *stbuf;
+ local->buf.ia_size = local->cur_file_size;
+
+ if (op_ret <= 0 || count == 0 || vec[0].iov_len == 0)
+ goto put_one_call;
+
+ if (conf->orig_offset >= local->cur_file_size) {
+ local->op_ret = 0;
+ goto put_one_call;
+ }
+ /*
+ * correct config params with real file size
+ * and actual amount of bytes read
+ */
+ set_config_offsets(frame, this,
+ conf->orig_offset, op_ret, DATA_ATOM, 0);
+
+ if (conf->orig_offset + conf->orig_size > local->cur_file_size)
+ conf->orig_size = local->cur_file_size - conf->orig_offset;
+ /*
+ * calculate amount of data to be returned
+ * to user.
+ */
+ to_user = op_ret;
+ if (conf->aligned_offset + to_user <= conf->orig_offset) {
+ gf_log(this->name, GF_LOG_WARNING, "Incomplete read");
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto put_one_call;
+ }
+ to_user -= (conf->aligned_offset - conf->orig_offset);
+
+ if (to_user > conf->orig_size)
+ to_user = conf->orig_size;
+ local->rw_count = to_user;
+
+ op_errno = set_config_avec_data(this, local,
+ conf, object, vec, count);
+ if (op_errno) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ goto put_one_call;
+ }
+ avec = conf->avec;
+#if DEBUG_CRYPT
+ if (conf->off_in_tail != 0 &&
+ conf->off_in_tail < object_alg_blksize(object) &&
+ object_alg_should_pad(object))
+ gf_log(this->name, GF_LOG_DEBUG, "Bad offset in tail %d",
+ conf->off_in_tail);
+ if (iovec_get_size(vec, count) != 0 &&
+ in_same_lblock(conf->orig_offset + iovec_get_size(vec, count) - 1,
+ local->cur_file_size - 1,
+ object_alg_blkbits(object))) {
+ gf_log(this->name, GF_LOG_DEBUG, "Compound last cblock");
+ dump_cblock(this,
+ (unsigned char *)(avec[conf->acount - 1].iov_base) +
+ avec[conf->acount - 1].iov_len - object_alg_blksize(object));
+ dump_cblock(this,
+ (unsigned char *)(vec[count - 1].iov_base) +
+ vec[count - 1].iov_len - object_alg_blksize(object));
+ }
+#endif
+ decrypt_aligned_iov(object, avec,
+ conf->acount, conf->aligned_offset);
+ /*
+ * pass proper plain data to user
+ */
+ avec[0].iov_base += (conf->aligned_offset - conf->orig_offset);
+ avec[0].iov_len -= (conf->aligned_offset - conf->orig_offset);
+
+ to_vec = to_user;
+ for (i = 0; i < conf->acount; i++) {
+ if (avec[i].iov_len > to_vec)
+ avec[i].iov_len = to_vec;
+ to_vec -= avec[i].iov_len;
+ }
+ put_one_call:
+ put_one_call_readv(frame, this);
+ return 0;
+}
+
+static int32_t do_readv(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict,
+ dict_t *xdata)
+{
+ data_t *data;
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0)
+ goto error;
+ /*
+ * extract regular file size
+ */
+ data = dict_get(dict, FSIZE_XATTR_PREFIX);
+ if (!data) {
+ gf_log("crypt", GF_LOG_WARNING, "Regular file size not found");
+ op_errno = EIO;
+ goto error;
+ }
+ local->cur_file_size = data_to_uint64(data);
+
+ get_one_call(frame);
+ STACK_WIND(frame,
+ crypt_readv_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->readv,
+ local->fd,
+ /*
+ * FIXME: read amount can be reduced
+ */
+ local->data_conf.expanded_size,
+ local->data_conf.aligned_offset,
+ local->flags,
+ local->xdata);
+ return 0;
+ error:
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+
+ get_one_call(frame);
+ put_one_call_readv(frame, this);
+ return 0;
+}
+
+static int32_t crypt_readv_finodelk_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0)
+ goto error;
+ /*
+ * An access has been granted,
+ * retrieve file size
+ */
+ STACK_WIND(frame,
+ do_readv,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr,
+ local->fd,
+ FSIZE_XATTR_PREFIX,
+ NULL);
+ return 0;
+ error:
+ fd_unref(local->fd);
+ if (local->xdata)
+ dict_unref(local->xdata);
+ STACK_UNWIND_STRICT(readv,
+ frame,
+ -1,
+ op_errno,
+ NULL,
+ 0,
+ NULL,
+ NULL,
+ NULL);
+ return 0;
+}
+
+static int32_t readv_trivial_completion(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *buf,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret < 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "stat failed (%d)", op_errno);
+ goto error;
+ }
+ local->buf = *buf;
+ STACK_WIND(frame,
+ load_file_size,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr,
+ local->loc,
+ FSIZE_XATTR_PREFIX,
+ NULL);
+ return 0;
+ error:
+ STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno,
+ NULL, 0, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t crypt_readv(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t offset,
+ uint32_t flags, dict_t *xdata)
+{
+ int32_t ret;
+ crypt_local_t *local;
+ struct crypt_inode_info *info;
+ struct gf_flock lock = {0, };
+
+#if DEBUG_CRYPT
+ gf_log("crypt", GF_LOG_DEBUG, "reading %d bytes from offset %llu",
+ (int)size, (long long)offset);
+ if (parent_is_crypt_xlator(frame, this))
+ gf_log("crypt", GF_LOG_DEBUG, "parent is crypt");
+#endif
+ local = crypt_alloc_local(frame, this, GF_FOP_READ);
+ if (!local) {
+ ret = ENOMEM;
+ goto error;
+ }
+ if (size == 0)
+ goto trivial;
+
+ local->fd = fd_ref(fd);
+ local->flags = flags;
+
+ info = local_get_inode_info(local, this);
+ if (info == NULL) {
+ ret = EINVAL;
+ fd_unref(fd);
+ goto error;
+ }
+ if (!object_alg_atomic(&info->cinfo)) {
+ ret = EINVAL;
+ fd_unref(fd);
+ goto error;
+ }
+ set_config_offsets(frame, this, offset, size,
+ DATA_ATOM, 0);
+ if (parent_is_crypt_xlator(frame, this)) {
+ data_t *data;
+ /*
+ * We are called by crypt_writev (or cypt_ftruncate)
+ * to perform the "read" component of the read-modify-write
+ * (or read-prune-write) sequence for some atom;
+ *
+ * don't ask for access:
+ * it has already been acquired
+ *
+ * Retrieve current file size
+ */
+ if (!xdata) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "Regular file size hasn't been passed");
+ ret = EIO;
+ goto error;
+ }
+ data = dict_get(xdata, FSIZE_XATTR_PREFIX);
+ if (!data) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "Regular file size not found");
+ ret = EIO;
+ goto error;
+ }
+ local->old_file_size =
+ local->cur_file_size = data_to_uint64(data);
+
+ get_one_call(frame);
+ STACK_WIND(frame,
+ crypt_readv_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv,
+ local->fd,
+ /*
+ * FIXME: read amount can be reduced
+ */
+ local->data_conf.expanded_size,
+ local->data_conf.aligned_offset,
+ flags,
+ NULL);
+ return 0;
+ }
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+
+ lock.l_len = 0;
+ lock.l_start = 0;
+ lock.l_type = F_RDLCK;
+ lock.l_whence = SEEK_SET;
+
+ STACK_WIND(frame,
+ crypt_readv_finodelk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ return 0;
+ trivial:
+ STACK_WIND(frame,
+ readv_trivial_completion,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat,
+ fd,
+ NULL);
+ return 0;
+ error:
+ STACK_UNWIND_STRICT(readv,
+ frame,
+ -1,
+ ret,
+ NULL,
+ 0,
+ NULL,
+ NULL,
+ NULL);
+ return 0;
+}
+
+void set_local_io_params_writev(call_frame_t *frame,
+ struct object_cipher_info *object,
+ struct rmw_atom *atom,
+ off_t io_offset,
+ uint32_t io_size)
+{
+ crypt_local_t *local = frame->local;
+
+ local->io_offset = io_offset;
+ local->io_size = io_size;
+
+ local->io_offset_nopad =
+ atom->offset_at(frame, object) + atom->offset_in(frame, object);
+
+ gf_log("crypt", GF_LOG_DEBUG,
+ "set nopad offset to %llu",
+ (unsigned long long)local->io_offset_nopad);
+
+ local->io_size_nopad = atom->io_size_nopad(frame, object);
+
+ gf_log("crypt", GF_LOG_DEBUG,
+ "set nopad size to %llu",
+ (unsigned long long)local->io_size_nopad);
+
+ local->update_disk_file_size = 0;
+ /*
+ * NOTE: eof_padding_size is 0 for all full atoms;
+ * For head and tail atoms it will be set up at rmw_partial block()
+ */
+ local->new_file_size = local->cur_file_size;
+
+ if (local->io_offset_nopad + local->io_size_nopad > local->cur_file_size) {
+
+ local->new_file_size = local->io_offset_nopad + local->io_size_nopad;
+
+ gf_log("crypt", GF_LOG_DEBUG,
+ "set new file size to %llu",
+ (unsigned long long)local->new_file_size);
+
+ local->update_disk_file_size = 1;
+ }
+}
+
+void set_local_io_params_ftruncate(call_frame_t *frame,
+ struct object_cipher_info *object)
+{
+ uint32_t resid;
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf = &local->data_conf;
+
+ resid = conf->orig_offset & (object_alg_blksize(object) - 1);
+ if (resid) {
+ local->eof_padding_size =
+ object_alg_blksize(object) - resid;
+ local->new_file_size = conf->aligned_offset;
+ local->update_disk_file_size = 0;
+ /*
+ * file size will be updated
+ * in the ->writev() stack,
+ * when submitting file tail
+ */
+ }
+ else {
+ local->eof_padding_size = 0;
+ local->new_file_size = conf->orig_offset;
+ local->update_disk_file_size = 1;
+ /*
+ * file size will be updated
+ * in this ->ftruncate stack
+ */
+ }
+}
+
+static inline void submit_head(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+ submit_partial(frame, this, local->fd, HEAD_ATOM);
+}
+
+static inline void submit_tail(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+ submit_partial(frame, this, local->fd, TAIL_ATOM);
+}
+
+static void submit_hole(call_frame_t *frame, xlator_t *this)
+{
+ /*
+ * hole conversion always means
+ * appended write and goes in ordered fashion
+ */
+ do_ordered_submit(frame, this, HOLE_ATOM);
+}
+
+static void submit_data(call_frame_t *frame, xlator_t *this)
+{
+ if (is_ordered_mode(frame)) {
+ do_ordered_submit(frame, this, DATA_ATOM);
+ return;
+ }
+ gf_log("crypt", GF_LOG_WARNING, "Bad submit mode");
+ get_nr_calls(frame, nr_calls_data(frame));
+ do_parallel_submit(frame, this, DATA_ATOM);
+ return;
+}
+
+/*
+ * heplers called by writev_cbk, fruncate_cbk in ordered mode
+ */
+
+static inline int32_t should_submit_hole(crypt_local_t *local)
+{
+ struct avec_config *conf = &local->hole_conf;
+
+ return conf->avec != NULL;
+}
+
+static inline int32_t should_resume_submit_hole(crypt_local_t *local)
+{
+ struct avec_config *conf = &local->hole_conf;
+
+ if (local->fop == GF_FOP_WRITE && has_tail_block(conf))
+ /*
+ * Don't submit a part of hole, which
+ * fits into a data block:
+ * this part of hole will be converted
+ * as a gap filled by zeros in data head
+ * block.
+ */
+ return conf->cursor < conf->acount - 1;
+ else
+ return conf->cursor < conf->acount;
+}
+
+static inline int32_t should_resume_submit_data(call_frame_t *frame)
+{
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf = &local->data_conf;
+
+ if (is_ordered_mode(frame))
+ return conf->cursor < conf->acount;
+ /*
+ * parallel writes
+ */
+ return 0;
+}
+
+static inline int32_t should_submit_data_after_hole(crypt_local_t *local)
+{
+ return local->data_conf.avec != NULL;
+}
+
+static void update_local_file_params(call_frame_t *frame,
+ xlator_t *this,
+ struct iatt *prebuf,
+ struct iatt *postbuf)
+{
+ crypt_local_t *local = frame->local;
+
+ check_buf(frame, this, postbuf);
+
+ local->prebuf = *prebuf;
+ local->postbuf = *postbuf;
+
+ local->prebuf.ia_size = local->cur_file_size;
+ local->postbuf.ia_size = local->new_file_size;
+
+ local->cur_file_size = local->new_file_size;
+}
+
+static int32_t end_writeback_writev(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *prebuf,
+ struct iatt *postbuf,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret <= 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "writev iteration failed");
+ goto put_one_call;
+ }
+ /*
+ * op_ret includes paddings (atom's head, atom's tail and EOF)
+ */
+ if (op_ret < local->io_size) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Incomplete writev iteration");
+ goto put_one_call;
+ }
+ op_ret -= local->eof_padding_size;
+ local->op_ret = op_ret;
+
+ update_local_file_params(frame, this, prebuf, postbuf);
+
+ if (data_write_in_progress(local)) {
+
+ LOCK(&local->rw_count_lock);
+ local->rw_count += op_ret;
+ UNLOCK(&local->rw_count_lock);
+
+ if (should_resume_submit_data(frame))
+ submit_data(frame, this);
+ }
+ else {
+ /*
+ * hole conversion is going on;
+ * don't take into account written zeros
+ */
+ if (should_resume_submit_hole(local))
+ submit_hole(frame, this);
+
+ else if (should_submit_data_after_hole(local))
+ submit_data(frame, this);
+ }
+ put_one_call:
+ put_one_call_writev(frame, this);
+ return 0;
+}
+
+#define crypt_writev_cbk end_writeback_writev
+
+#define HOLE_WRITE_CHUNK_BITS 12
+#define HOLE_WRITE_CHUNK_SIZE (1 << HOLE_WRITE_CHUNK_BITS)
+
+/*
+ * Convert hole of size @size at offset @off to
+ * zeros and prepare respective iovecs for submit.
+ * The hole lock should be held.
+ *
+ * Pre-conditions:
+ * @local->file_size is set and valid.
+ */
+int32_t prepare_for_submit_hole(call_frame_t *frame, xlator_t *this,
+ uint64_t off, off_t size)
+{
+ int32_t ret;
+ crypt_local_t *local = frame->local;
+ struct object_cipher_info *object = &local->info->cinfo;
+
+ set_config_offsets(frame, this, off, size, HOLE_ATOM, 1);
+
+ ret = set_config_avec_hole(this, local,
+ &local->hole_conf, object, local->fop);
+ crypt_check_conf(&local->hole_conf);
+
+ return ret;
+}
+
+/*
+ * prepare for submit @count bytes at offset @from
+ */
+int32_t prepare_for_submit_data(call_frame_t *frame, xlator_t *this,
+ off_t from, int32_t size, struct iovec *vec,
+ int32_t vec_count, int32_t setup_gap)
+{
+ uint32_t ret;
+ crypt_local_t *local = frame->local;
+ struct object_cipher_info *object = &local->info->cinfo;
+
+ set_config_offsets(frame, this, from, size,
+ DATA_ATOM, setup_gap);
+
+ ret = set_config_avec_data(this, local,
+ &local->data_conf, object, vec, vec_count);
+ crypt_check_conf(&local->data_conf);
+
+ return ret;
+}
+
+static void free_avec(struct iovec *avec,
+ char **pool, int blocks_in_pool)
+{
+ if (!avec)
+ return;
+ GF_FREE(pool);
+ GF_FREE(avec);
+}
+
+static void free_avec_data(crypt_local_t *local)
+{
+ return free_avec(local->data_conf.avec,
+ local->data_conf.pool,
+ local->data_conf.blocks_in_pool);
+}
+
+static void free_avec_hole(crypt_local_t *local)
+{
+ return free_avec(local->hole_conf.avec,
+ local->hole_conf.pool,
+ local->hole_conf.blocks_in_pool);
+}
+
+
+static void do_parallel_submit(call_frame_t *frame, xlator_t *this,
+ atom_data_type dtype)
+{
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf;
+
+ local->active_setup = dtype;
+ conf = conf_by_type(frame, dtype);
+
+ if (has_head_block(conf))
+ submit_head(frame, this);
+
+ if (has_full_blocks(conf))
+ submit_full(frame, this);
+
+ if (has_tail_block(conf))
+ submit_tail(frame, this);
+ return;
+}
+
+static void do_ordered_submit(call_frame_t *frame, xlator_t *this,
+ atom_data_type dtype)
+{
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf;
+
+ local->active_setup = dtype;
+ conf = conf_by_type(frame, dtype);
+
+ if (should_submit_head_block(conf)) {
+ get_one_call_nolock(frame);
+ submit_head(frame, this);
+ }
+ else if (should_submit_full_block(conf)) {
+ get_one_call_nolock(frame);
+ submit_full(frame, this);
+ }
+ else if (should_submit_tail_block(conf)) {
+ get_one_call_nolock(frame);
+ submit_tail(frame, this);
+ }
+ else
+ gf_log("crypt", GF_LOG_DEBUG,
+ "nothing has been submitted in ordered mode");
+ return;
+}
+
+static int32_t do_writev(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict,
+ dict_t *xdata)
+{
+ data_t *data;
+ crypt_local_t *local = frame->local;
+ struct object_cipher_info *object = &local->info->cinfo;
+ /*
+ * extract regular file size
+ */
+ data = dict_get(dict, FSIZE_XATTR_PREFIX);
+ if (!data) {
+ gf_log("crypt", GF_LOG_WARNING, "Regular file size not found");
+ op_ret = -1;
+ op_errno = EIO;
+ goto error;
+ }
+ local->old_file_size = local->cur_file_size = data_to_uint64(data);
+
+ set_gap_at_end(frame, object, &local->data_conf, DATA_ATOM);
+
+ if (local->cur_file_size < local->data_conf.orig_offset) {
+ /*
+ * Set up hole config
+ */
+ op_errno = prepare_for_submit_hole(frame,
+ this,
+ local->cur_file_size,
+ local->data_conf.orig_offset - local->cur_file_size);
+ if (op_errno) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ goto error;
+ }
+ }
+ if (should_submit_hole(local))
+ submit_hole(frame, this);
+ else
+ submit_data(frame, this);
+ return 0;
+ error:
+ get_one_call_nolock(frame);
+ put_one_call_writev(frame, this);
+ return 0;
+}
+
+static int32_t crypt_writev_finodelk_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret < 0)
+ goto error;
+ /*
+ * An access has been granted,
+ * retrieve file size first
+ */
+ STACK_WIND(frame,
+ do_writev,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr,
+ local->fd,
+ FSIZE_XATTR_PREFIX,
+ NULL);
+ return 0;
+ error:
+ get_one_call_nolock(frame);
+ put_one_call_writev(frame, this);
+ return 0;
+}
+
+static int32_t writev_trivial_completion(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *buf,
+ dict_t *dict)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ local->prebuf = *buf;
+ local->postbuf = *buf;
+
+ local->prebuf.ia_size = local->cur_file_size;
+ local->postbuf.ia_size = local->cur_file_size;
+
+ get_one_call(frame);
+ put_one_call_writev(frame, this);
+ return 0;
+}
+
+int crypt_writev(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ struct iovec *vec,
+ int32_t count,
+ off_t offset,
+ uint32_t flags,
+ struct iobref *iobref,
+ dict_t *xdata)
+{
+ int32_t ret;
+ crypt_local_t *local;
+ struct crypt_inode_info *info;
+ struct gf_flock lock = {0, };
+#if DEBUG_CRYPT
+ gf_log ("crypt", GF_LOG_DEBUG, "writing %d bytes from offset %llu",
+ (int)iovec_get_size(vec, count), (long long)offset);
+#endif
+ local = crypt_alloc_local(frame, this, GF_FOP_WRITE);
+ if (!local) {
+ ret = ENOMEM;
+ goto error;
+ }
+ local->fd = fd_ref(fd);
+
+ if (iobref)
+ local->iobref = iobref_ref(iobref);
+ /*
+ * to update real file size on the server
+ */
+ local->xattr = dict_new();
+ if (!local->xattr) {
+ ret = ENOMEM;
+ goto error;
+ }
+ local->flags = flags;
+
+ info = local_get_inode_info(local, this);
+ if (info == NULL) {
+ ret = EINVAL;
+ goto error;
+ }
+ if (!object_alg_atomic(&info->cinfo)) {
+ ret = EINVAL;
+ goto error;
+ }
+ if (iovec_get_size(vec, count) == 0)
+ goto trivial;
+
+ ret = prepare_for_submit_data(frame, this, offset,
+ iovec_get_size(vec, count),
+ vec, count, 0 /* don't setup gup
+ in tail: we don't
+ know file size yet */);
+ if (ret)
+ goto error;
+
+ if (parent_is_crypt_xlator(frame, this)) {
+ data_t *data;
+ /*
+ * we are called by shinking crypt_ftruncate(),
+ * which doesn't perform hole conversion;
+ *
+ * don't ask for access:
+ * it has already been acquired
+ */
+
+ /*
+ * extract file size
+ */
+ if (!xdata) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "Regular file size hasn't been passed");
+ ret = EIO;
+ goto error;
+ }
+ data = dict_get(xdata, FSIZE_XATTR_PREFIX);
+ if (!data) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "Regular file size not found");
+ ret = EIO;
+ goto error;
+ }
+ local->old_file_size =
+ local->cur_file_size = data_to_uint64(data);
+
+ submit_data(frame, this);
+ return 0;
+ }
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+ /*
+ * lock the file and retrieve its size
+ */
+ lock.l_len = 0;
+ lock.l_start = 0;
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+
+ STACK_WIND(frame,
+ crypt_writev_finodelk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ return 0;
+ trivial:
+ STACK_WIND(frame,
+ writev_trivial_completion,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat,
+ fd,
+ NULL);
+ return 0;
+ error:
+ if (local && local->fd)
+ fd_unref(fd);
+ if (local && local->iobref)
+ iobref_unref(iobref);
+ if (local && local->xdata)
+ dict_unref(xdata);
+ if (local && local->xattr)
+ dict_unref(local->xattr);
+ if (local && local->info)
+ free_inode_info(local->info);
+
+ STACK_UNWIND_STRICT(writev, frame, -1, ret, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t prepare_for_prune(call_frame_t *frame, xlator_t *this, uint64_t offset)
+{
+ set_config_offsets(frame, this,
+ offset,
+ 0, /* count */
+ DATA_ATOM,
+ 0 /* since we prune, there is no
+ gap in tail to uptodate */);
+ return 0;
+}
+
+/*
+ * Finish the read-prune-modify sequence
+ *
+ * Can be invoked as
+ * 1) ->ftruncate_cbk() for cblock-aligned, or trivial prune
+ * 2) ->writev_cbk() for non-cblock-aligned prune
+ */
+
+static int32_t prune_complete(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *prebuf,
+ struct iatt *postbuf,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ update_local_file_params(frame, this, prebuf, postbuf);
+
+ put_one_call_ftruncate(frame, this);
+ return 0;
+}
+
+/*
+ * This is called as ->ftruncate_cbk()
+ *
+ * Perform the "write" component of the
+ * read-prune-write sequence.
+ *
+ * submuit the rest of the file
+ */
+static int32_t prune_submit_file_tail(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *prebuf,
+ struct iatt *postbuf,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf = &local->data_conf;
+ dict_t *dict;
+
+ if (op_ret < 0)
+ goto put_one_call;
+
+ if (local->xdata) {
+ dict_unref(local->xdata);
+ local->xdata = NULL;
+ }
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+
+ dict = dict_new();
+ if (!dict) {
+ op_errno = ENOMEM;
+ goto error;
+ }
+
+ update_local_file_params(frame, this, prebuf, postbuf);
+ local->new_file_size = conf->orig_offset;
+
+ /*
+ * The rest of the file is a partial block and, hence,
+ * should be written via RMW sequence, so the crypt xlator
+ * does STACK_WIND to itself.
+ *
+ * Pass current file size to crypt_writev()
+ */
+ op_errno = dict_set(dict,
+ FSIZE_XATTR_PREFIX,
+ data_from_uint64(local->cur_file_size));
+ if (op_errno) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "can not set key to update file size");
+ dict_unref(dict);
+ goto error;
+ }
+ gf_log("crypt", GF_LOG_DEBUG,
+ "passing current file size (%llu) to crypt_writev",
+ (unsigned long long)local->cur_file_size);
+ /*
+ * Padding will be filled with
+ * zeros by rmw_partial_block()
+ */
+ STACK_WIND(frame,
+ prune_complete,
+ this,
+ this->fops->writev, /* crypt_writev */
+ local->fd,
+ &local->vec,
+ 1,
+ conf->aligned_offset, /* offset to write from */
+ 0,
+ local->iobref,
+ dict);
+
+ dict_unref(dict);
+ return 0;
+ error:
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ put_one_call:
+ put_one_call_ftruncate(frame, this);
+ return 0;
+}
+
+/*
+ * This is called as a callback of ->writev() invoked in behalf
+ * of ftruncate(): it can be
+ * 1) ordered writes issued by hole conversion in the case of
+ * expanded truncate, or
+ * 2) an rmw partial data block issued by non-cblock-aligned
+ * prune.
+ */
+int32_t end_writeback_ftruncate(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *prebuf,
+ struct iatt *postbuf,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ /*
+ * if nothing has been written,
+ * then it must be an error
+ */
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret < 0)
+ goto put_one_call;
+
+ update_local_file_params(frame, this, prebuf, postbuf);
+
+ if (data_write_in_progress(local))
+ /* case (2) */
+ goto put_one_call;
+ /* case (1) */
+ if (should_resume_submit_hole(local))
+ submit_hole(frame, this);
+ /*
+ * case of hole, when we should't resume
+ */
+ put_one_call:
+ put_one_call_ftruncate(frame, this);
+ return 0;
+}
+
+/*
+ * Perform prune and write components of the
+ * read-prune-write sequence.
+ *
+ * Called as ->readv_cbk()
+ *
+ * Pre-conditions:
+ * @vec contains the latest atom of the file
+ * (plain text)
+ */
+static int32_t prune_write(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vec,
+ int32_t count,
+ struct iatt *stbuf,
+ struct iobref *iobref,
+ dict_t *xdata)
+{
+ int32_t i;
+ size_t to_copy;
+ size_t copied = 0;
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf = &local->data_conf;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ if (op_ret == -1)
+ goto put_one_call;
+
+ /*
+ * At first, uptodate head block
+ */
+ if (iovec_get_size(vec, count) < conf->off_in_head) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Failed to uptodate head block for prune");
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto put_one_call;
+ }
+ local->vec.iov_len = conf->off_in_head;
+ local->vec.iov_base = GF_CALLOC(1, local->vec.iov_len,
+ gf_crypt_mt_data);
+
+ if (local->vec.iov_base == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ }
+ for (i = 0; i < count; i++) {
+ to_copy = vec[i].iov_len;
+ if (to_copy > local->vec.iov_len - copied)
+ to_copy = local->vec.iov_len - copied;
+
+ memcpy((char *)local->vec.iov_base + copied,
+ vec[i].iov_base,
+ to_copy);
+ copied += to_copy;
+ if (copied == local->vec.iov_len)
+ break;
+ }
+ /*
+ * perform prune with aligned offset
+ * (i.e. at this step we prune a bit
+ * more then it is needed
+ */
+ STACK_WIND(frame,
+ prune_submit_file_tail,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate,
+ local->fd,
+ conf->aligned_offset,
+ local->xdata);
+ return 0;
+ put_one_call:
+ put_one_call_ftruncate(frame, this);
+ return 0;
+}
+
+/*
+ * Perform a read-prune-write sequence
+ */
+int32_t read_prune_write(call_frame_t *frame, xlator_t *this)
+{
+ int32_t ret = 0;
+ dict_t *dict = NULL;
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf = &local->data_conf;
+ struct object_cipher_info *object = &local->info->cinfo;
+
+ set_local_io_params_ftruncate(frame, object);
+ get_one_call_nolock(frame);
+
+ if ((conf->orig_offset & (object_alg_blksize(object) - 1)) == 0) {
+ /*
+ * cblock-aligned prune:
+ * we don't need read and write components,
+ * just cut file body
+ */
+ gf_log("crypt", GF_LOG_DEBUG,
+ "prune without RMW (at offset %llu",
+ (unsigned long long)conf->orig_offset);
+
+ STACK_WIND(frame,
+ prune_complete,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate,
+ local->fd,
+ conf->orig_offset,
+ local->xdata);
+ return 0;
+ }
+ gf_log("crypt", GF_LOG_DEBUG,
+ "prune with RMW (at offset %llu",
+ (unsigned long long)conf->orig_offset);
+ /*
+ * We are about to perform the "read" component of the
+ * read-prune-write sequence. It means that we need to
+ * read encrypted data from disk and decrypt it.
+ * So, the crypt translator does STACK_WIND to itself.
+ *
+ * Pass current file size to crypt_readv()
+
+ */
+ dict = dict_new();
+ if (!dict) {
+ gf_log("crypt", GF_LOG_WARNING, "Can not alloc dict");
+ ret = ENOMEM;
+ goto exit;
+ }
+ ret = dict_set(dict,
+ FSIZE_XATTR_PREFIX,
+ data_from_uint64(local->cur_file_size));
+ if (ret) {
+ gf_log("crypt", GF_LOG_WARNING, "Can not set dict");
+ goto exit;
+ }
+ STACK_WIND(frame,
+ prune_write,
+ this,
+ this->fops->readv, /* crypt_readv */
+ local->fd,
+ get_atom_size(object), /* bytes to read */
+ conf->aligned_offset, /* offset to read from */
+ 0,
+ dict);
+ exit:
+ if (dict)
+ dict_unref(dict);
+ return ret;
+}
+
+/*
+ * File prune is more complicated than expand.
+ * First we need to read the latest atom to not lose info
+ * needed for proper update. Also we need to make sure that
+ * every component of read-prune-write sequence leaves data
+ * consistent
+ *
+ * Non-cblock aligned prune is performed as read-prune-write
+ * sequence:
+ *
+ * 1) read the latest atom;
+ * 2) perform cblock-aligned prune
+ * 3) issue a write request for the end-of-file
+ */
+int32_t prune_file(call_frame_t *frame, xlator_t *this, uint64_t offset)
+{
+ int32_t ret;
+
+ ret = prepare_for_prune(frame, this, offset);
+ if (ret)
+ return ret;
+ return read_prune_write(frame, this);
+}
+
+int32_t expand_file(call_frame_t *frame, xlator_t *this,
+ uint64_t offset)
+{
+ int32_t ret;
+ crypt_local_t *local = frame->local;
+
+ ret = prepare_for_submit_hole(frame, this,
+ local->old_file_size,
+ offset - local->old_file_size);
+ if (ret)
+ return ret;
+ submit_hole(frame, this);
+ return 0;
+}
+
+static int32_t ftruncate_trivial_completion(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *buf,
+ dict_t *dict)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ local->prebuf = *buf;
+ local->postbuf = *buf;
+
+ local->prebuf.ia_size = local->cur_file_size;
+ local->postbuf.ia_size = local->cur_file_size;
+
+ get_one_call(frame);
+ put_one_call_ftruncate(frame, this);
+ return 0;
+}
+
+static int32_t do_ftruncate(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict,
+ dict_t *xdata)
+{
+ data_t *data;
+ crypt_local_t *local = frame->local;
+
+ if (op_ret)
+ goto error;
+ /*
+ * extract regular file size
+ */
+ data = dict_get(dict, FSIZE_XATTR_PREFIX);
+ if (!data) {
+ gf_log("crypt", GF_LOG_WARNING, "Regular file size not found");
+ op_errno = EIO;
+ goto error;
+ }
+ local->old_file_size = local->cur_file_size = data_to_uint64(data);
+
+ if (local->data_conf.orig_offset == local->cur_file_size) {
+#if DEBUG_CRYPT
+ gf_log("crypt", GF_LOG_DEBUG,
+ "trivial ftruncate (current file size %llu)",
+ (unsigned long long)local->cur_file_size);
+#endif
+ goto trivial;
+ }
+ else if (local->data_conf.orig_offset < local->cur_file_size) {
+#if DEBUG_CRYPT
+ gf_log("crypt", GF_LOG_DEBUG, "prune from %llu to %llu",
+ (unsigned long long)local->cur_file_size,
+ (unsigned long long)local->data_conf.orig_offset);
+#endif
+ op_errno = prune_file(frame,
+ this,
+ local->data_conf.orig_offset);
+ }
+ else {
+#if DEBUG_CRYPT
+ gf_log("crypt", GF_LOG_DEBUG, "expand from %llu to %llu",
+ (unsigned long long)local->cur_file_size,
+ (unsigned long long)local->data_conf.orig_offset);
+#endif
+ op_errno = expand_file(frame,
+ this,
+ local->data_conf.orig_offset);
+ }
+ if (op_errno)
+ goto error;
+ return 0;
+ trivial:
+ STACK_WIND(frame,
+ ftruncate_trivial_completion,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat,
+ local->fd,
+ NULL);
+ return 0;
+ error:
+ /*
+ * finish with ftruncate
+ */
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+
+ get_one_call_nolock(frame);
+ put_one_call_ftruncate(frame, this);
+ return 0;
+}
+
+static int32_t crypt_ftruncate_finodelk_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret < 0)
+ goto error;
+ /*
+ * An access has been granted,
+ * retrieve file size first
+ */
+ STACK_WIND(frame,
+ do_ftruncate,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr,
+ local->fd,
+ FSIZE_XATTR_PREFIX,
+ NULL);
+ return 0;
+ error:
+ get_one_call_nolock(frame);
+ put_one_call_ftruncate(frame, this);
+ return 0;
+}
+
+/*
+ * ftruncate is performed in 2 steps:
+ * . recieve file size;
+ * . expand or prune file.
+ */
+static int32_t crypt_ftruncate(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ off_t offset,
+ dict_t *xdata)
+{
+ int32_t ret;
+ crypt_local_t *local;
+ struct crypt_inode_info *info;
+ struct gf_flock lock = {0, };
+
+ local = crypt_alloc_local(frame, this, GF_FOP_FTRUNCATE);
+ if (!local) {
+ ret = ENOMEM;
+ goto error;
+ }
+ local->xattr = dict_new();
+ if (!local->xattr) {
+ ret = ENOMEM;
+ goto error;
+ }
+ local->fd = fd_ref(fd);
+ info = local_get_inode_info(local, this);
+ if (info == NULL) {
+ ret = EINVAL;
+ goto error;
+ }
+ if (!object_alg_atomic(&info->cinfo)) {
+ ret = EINVAL;
+ goto error;
+ }
+ local->data_conf.orig_offset = offset;
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+
+ lock.l_len = 0;
+ lock.l_start = 0;
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+
+ STACK_WIND(frame,
+ crypt_ftruncate_finodelk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ return 0;
+ error:
+ if (local && local->fd)
+ fd_unref(fd);
+ if (local && local->xdata)
+ dict_unref(xdata);
+ if (local && local->xattr)
+ dict_unref(local->xattr);
+ if (local && local->info)
+ free_inode_info(local->info);
+
+ STACK_UNWIND_STRICT(ftruncate, frame, -1, ret, NULL, NULL, NULL);
+ return 0;
+}
+
+/* ->flush_cbk() */
+int32_t truncate_end(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ STACK_UNWIND_STRICT(truncate,
+ frame,
+ op_ret,
+ op_errno,
+ &local->prebuf,
+ &local->postbuf,
+ local->xdata);
+ return 0;
+}
+
+/* ftruncate_cbk() */
+int32_t truncate_flush(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *prebuf,
+ struct iatt *postbuf,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ fd_t *fd = local->fd;
+ local->prebuf = *prebuf;
+ local->postbuf = *postbuf;
+
+ STACK_WIND(frame,
+ truncate_end,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush,
+ fd,
+ NULL);
+ fd_unref(fd);
+ return 0;
+}
+
+/*
+ * is called as ->open_cbk()
+ */
+static int32_t truncate_begin(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0) {
+ fd_unref(fd);
+ STACK_UNWIND_STRICT(truncate,
+ frame,
+ op_ret,
+ op_errno, NULL, NULL, NULL);
+ return 0;
+ }
+ /*
+ * crypt_truncate() is implemented via crypt_ftruncate(),
+ * so the crypt xlator does STACK_WIND to itself here
+ */
+ STACK_WIND(frame,
+ truncate_flush,
+ this,
+ this->fops->ftruncate, /* crypt_ftruncate */
+ fd,
+ local->offset,
+ NULL);
+ return 0;
+}
+
+/*
+ * crypt_truncate() is implemented via crypt_ftruncate() as a
+ * sequence crypt_open() - crypt_ftruncate() - truncate_flush()
+ */
+int32_t crypt_truncate(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ off_t offset,
+ dict_t *xdata)
+{
+ fd_t *fd;
+ crypt_local_t *local;
+
+#if DEBUG_CRYPT
+ gf_log(this->name, GF_LOG_DEBUG,
+ "truncate file %s at offset %llu",
+ loc->path, (unsigned long long)offset);
+#endif
+ local = crypt_alloc_local(frame, this, GF_FOP_TRUNCATE);
+ if (!local)
+ goto error;
+
+ fd = fd_create(loc->inode, frame->root->pid);
+ if (!fd) {
+ gf_log(this->name, GF_LOG_ERROR, "Can not create fd");
+ goto error;
+ }
+ local->fd = fd;
+ local->offset = offset;
+ local->xdata = xdata;
+ STACK_WIND(frame,
+ truncate_begin,
+ this,
+ this->fops->open, /* crypt_open() */
+ loc,
+ O_RDWR,
+ fd,
+ NULL);
+ return 0;
+ error:
+ STACK_UNWIND_STRICT(truncate, frame, -1, EINVAL, NULL, NULL, NULL);
+ return 0;
+}
+
+end_writeback_handler_t dispatch_end_writeback(glusterfs_fop_t fop)
+{
+ switch (fop) {
+ case GF_FOP_WRITE:
+ return end_writeback_writev;
+ case GF_FOP_FTRUNCATE:
+ return end_writeback_ftruncate;
+ default:
+ gf_log("crypt", GF_LOG_WARNING, "Bad wb operation %d", fop);
+ return NULL;
+ }
+}
+
+/*
+ * true, if the caller needs metadata string
+ */
+static int32_t is_custom_mtd(dict_t *xdata)
+{
+ data_t *data;
+ uint32_t flags;
+
+ if (!xdata)
+ return 0;
+
+ data = dict_get(xdata, MSGFLAGS_PREFIX);
+ if (!data)
+ return 0;
+ if (data->len != sizeof(uint32_t)) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "Bad msgflags size (%d)", data->len);
+ return -1;
+ }
+ flags = *((uint32_t *)data->data);
+ return msgflags_check_mtd_lock(&flags);
+}
+
+static int32_t crypt_open_done(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ if (op_ret < 0)
+ gf_log(this->name, GF_LOG_WARNING, "mtd unlock failed (%d)",
+ op_errno);
+ put_one_call_open(frame);
+ return 0;
+}
+
+static void crypt_open_tail(call_frame_t *frame, xlator_t *this)
+{
+ struct gf_flock lock = {0, };
+ crypt_local_t *local = frame->local;
+
+ lock.l_type = F_UNLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+ lock.l_pid = 0;
+
+ STACK_WIND(frame,
+ crypt_open_done,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ local->fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+}
+
+/*
+ * load private inode info at open time
+ * called as ->fgetxattr_cbk()
+ */
+static int load_mtd_open(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict,
+ dict_t *xdata)
+{
+ int32_t ret;
+ gf_boolean_t upload_info;
+ data_t *mtd;
+ uint64_t value = 0;
+ struct crypt_inode_info *info;
+ crypt_local_t *local = frame->local;
+ crypt_private_t *priv = this->private;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (local->fd->inode->ia_type == IA_IFLNK)
+ goto exit;
+ if (op_ret < 0)
+ goto exit;
+ /*
+ * first, check for cached info
+ */
+ ret = inode_ctx_get(local->fd->inode, this, &value);
+ if (ret != -1) {
+ info = (struct crypt_inode_info *)(long)value;
+ if (info == NULL) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Inode info expected, but not found");
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto exit;
+ }
+ /*
+ * info has been found in the cache
+ */
+ upload_info = _gf_false;
+ }
+ else {
+ /*
+ * info hasn't been found in the cache.
+ */
+ info = alloc_inode_info(local, local->loc);
+ if (!info) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto exit;
+ }
+ init_inode_info_head(info, local->fd);
+ upload_info = _gf_true;
+ }
+ /*
+ * extract metadata
+ */
+ mtd = dict_get(dict, CRYPTO_FORMAT_PREFIX);
+ if (!mtd) {
+ local->op_ret = -1;
+ local->op_errno = ENOENT;
+ gf_log (this->name, GF_LOG_WARNING,
+ "Format string wasn't found");
+ goto exit;
+ }
+ /*
+ * authenticate metadata against the path
+ */
+ ret = open_format((unsigned char *)mtd->data,
+ mtd->len,
+ local->loc,
+ info,
+ get_master_cinfo(priv),
+ local,
+ upload_info);
+ if (ret) {
+ local->op_ret = -1;
+ local->op_errno = ret;
+ goto exit;
+ }
+ if (upload_info) {
+ ret = init_inode_info_tail(info, get_master_cinfo(priv));
+ if (ret) {
+ local->op_ret = -1;
+ local->op_errno = ret;
+ goto exit;
+ }
+ ret = inode_ctx_put(local->fd->inode,
+ this, (uint64_t)(long)info);
+ if (ret == -1) {
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto exit;
+ }
+ }
+ if (local->custom_mtd) {
+ /*
+ * pass the metadata string to the customer
+ */
+ ret = dict_set_static_bin(local->xdata,
+ CRYPTO_FORMAT_PREFIX,
+ mtd->data,
+ mtd->len);
+ if (ret) {
+ local->op_ret = -1;
+ local->op_errno = ret;
+ goto exit;
+ }
+ }
+ exit:
+ if (!local->custom_mtd)
+ crypt_open_tail(frame, this);
+ else
+ put_one_call_open(frame);
+ return 0;
+}
+
+static int32_t crypt_open_finodelk_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret < 0) {
+ gf_log(this->name, GF_LOG_WARNING, "finodelk (LOCK) failed");
+ goto exit;
+ }
+ STACK_WIND(frame,
+ load_mtd_open,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr,
+ local->fd,
+ CRYPTO_FORMAT_PREFIX,
+ NULL);
+ return 0;
+ exit:
+ put_one_call_open(frame);
+ return 0;
+}
+
+/*
+ * verify metadata against the specified pathname
+ */
+static int32_t crypt_open_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd,
+ dict_t *xdata)
+{
+ struct gf_flock lock = {0, };
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (local->fd->inode->ia_type == IA_IFLNK)
+ goto exit;
+ if (op_ret < 0)
+ goto exit;
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+ else if (local->custom_mtd){
+ local->xdata = dict_new();
+ if (!local->xdata) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ gf_log ("crypt", GF_LOG_ERROR,
+ "Can not get new dict for mtd string");
+ goto exit;
+ }
+ }
+ lock.l_len = 0;
+ lock.l_start = 0;
+ lock.l_type = local->custom_mtd ? F_WRLCK : F_RDLCK;
+ lock.l_whence = SEEK_SET;
+
+ STACK_WIND(frame,
+ crypt_open_finodelk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ return 0;
+ exit:
+ put_one_call_open(frame);
+ return 0;
+}
+
+static int32_t crypt_open(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flags,
+ fd_t *fd,
+ dict_t *xdata)
+{
+ int32_t ret = ENOMEM;
+ crypt_local_t *local;
+
+ local = crypt_alloc_local(frame, this, GF_FOP_OPEN);
+ if (!local)
+ goto error;
+ local->loc = GF_CALLOC(1, sizeof(*loc), gf_crypt_mt_loc);
+ if (!local->loc) {
+ ret = ENOMEM;
+ goto error;
+ }
+ memset(local->loc, 0, sizeof(*local->loc));
+ ret = loc_copy(local->loc, loc);
+ if (ret) {
+ GF_FREE(local->loc);
+ goto error;
+ }
+ local->fd = fd_ref(fd);
+
+ ret = is_custom_mtd(xdata);
+ if (ret < 0) {
+ loc_wipe(local->loc);
+ GF_FREE(local->loc);
+ ret = EINVAL;
+ goto error;
+ }
+ local->custom_mtd = ret;
+
+ if ((flags & O_ACCMODE) == O_WRONLY)
+ /*
+ * we can't open O_WRONLY, because
+ * we need to do read-modify-write
+ */
+ flags = (flags & ~O_ACCMODE) | O_RDWR;
+ /*
+ * Make sure that out translated offsets
+ * and counts won't be ignored
+ */
+ flags &= ~O_APPEND;
+ get_one_call_nolock(frame);
+ STACK_WIND(frame,
+ crypt_open_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open,
+ loc,
+ flags,
+ fd,
+ xdata);
+ return 0;
+ error:
+ STACK_UNWIND_STRICT(open,
+ frame,
+ -1,
+ ret,
+ NULL,
+ NULL);
+ return 0;
+}
+
+static int32_t init_inode_info_tail(struct crypt_inode_info *info,
+ struct master_cipher_info *master)
+{
+ int32_t ret;
+ struct object_cipher_info *object = &info->cinfo;
+
+#if DEBUG_CRYPT
+ gf_log("crypt", GF_LOG_DEBUG, "Init inode info for object %s",
+ uuid_utoa(info->oid));
+#endif
+ ret = data_cipher_algs[object->o_alg][object->o_mode].set_private(info,
+ master);
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR, "Set private info failed");
+ return ret;
+ }
+ return 0;
+}
+
+/*
+ * Init inode info at ->create() time
+ */
+static void init_inode_info_create(struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ data_t *data)
+{
+ struct object_cipher_info *object;
+
+ info->nr_minor = CRYPT_XLATOR_ID;
+ memcpy(info->oid, data->data, data->len);
+
+ object = &info->cinfo;
+
+ object->o_alg = master->m_alg;
+ object->o_mode = master->m_mode;
+ object->o_block_bits = master->m_block_bits;
+ object->o_dkey_size = master->m_dkey_size;
+}
+
+static void init_inode_info_head(struct crypt_inode_info *info, fd_t *fd)
+{
+ memcpy(info->oid, fd->inode->gfid, sizeof(uuid_t));
+}
+
+static int32_t crypt_create_done(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ crypt_private_t *priv = this->private;
+ crypt_local_t *local = frame->local;
+ struct crypt_inode_info *info = local->info;
+ fd_t *local_fd = local->fd;
+ dict_t *local_xdata = local->xdata;
+ inode_t *local_inode = local->inode;
+
+ if (op_ret < 0) {
+ free_inode_info(info);
+ goto unwind;
+ }
+ op_errno = init_inode_info_tail(info, get_master_cinfo(priv));
+ if (op_errno) {
+ op_ret = -1;
+ free_inode_info(info);
+ goto unwind;
+ }
+ /*
+ * FIXME: drop major subversion number
+ */
+ op_ret = inode_ctx_put(local->fd->inode, this, (uint64_t)(long)info);
+ if (op_ret == -1) {
+ op_errno = EIO;
+ free_inode_info(info);
+ goto unwind;
+ }
+ unwind:
+ free_format(local);
+ STACK_UNWIND_STRICT(create,
+ frame,
+ op_ret,
+ op_errno,
+ local_fd,
+ local_inode,
+ &local->buf,
+ &local->prebuf,
+ &local->postbuf,
+ local_xdata);
+ fd_unref(local_fd);
+ inode_unref(local_inode);
+ if (local_xdata)
+ dict_unref(local_xdata);
+ return 0;
+}
+
+static int crypt_create_tail(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ struct gf_flock lock = {0, };
+ crypt_local_t *local = frame->local;
+ fd_t *local_fd = local->fd;
+ dict_t *local_xdata = local->xdata;
+ inode_t *local_inode = local->inode;
+
+ dict_unref(local->xattr);
+
+ if (op_ret < 0)
+ goto error;
+
+ lock.l_type = F_UNLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+ lock.l_pid = 0;
+
+ STACK_WIND(frame,
+ crypt_create_done,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ local->fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ return 0;
+ error:
+ free_inode_info(local->info);
+ free_format(local);
+
+ STACK_UNWIND_STRICT(create,
+ frame,
+ op_ret,
+ op_errno,
+ local_fd,
+ local_inode,
+ &local->buf,
+ &local->prebuf,
+ &local->postbuf,
+ local_xdata);
+
+ fd_unref(local_fd);
+ inode_unref(local_inode);
+ if (local_xdata)
+ dict_unref(local_xdata);
+ return 0;
+}
+
+static int32_t crypt_create_finodelk_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ struct crypt_inode_info *info = local->info;
+
+ if (op_ret < 0)
+ goto error;
+
+ STACK_WIND(frame,
+ crypt_create_tail,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ local->fd,
+ local->xattr, /* CRYPTO_FORMAT_PREFIX */
+ 0,
+ NULL);
+ return 0;
+ error:
+ free_inode_info(info);
+ free_format(local);
+ fd_unref(local->fd);
+ dict_unref(local->xattr);
+ if (local->xdata)
+ dict_unref(local->xdata);
+
+ STACK_UNWIND_STRICT(create,
+ frame,
+ op_ret,
+ op_errno,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL);
+ return 0;
+}
+
+/*
+ * Create and store crypt-specific format on disk;
+ * Populate cache with private inode info
+ */
+static int32_t crypt_create_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd,
+ inode_t *inode,
+ struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent,
+ dict_t *xdata)
+{
+ struct gf_flock lock = {0, };
+ crypt_local_t *local = frame->local;
+ struct crypt_inode_info *info = local->info;
+
+ if (op_ret < 0)
+ goto error;
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+ local->inode = inode_ref(inode);
+ local->buf = *buf;
+ local->prebuf = *preparent;
+ local->postbuf = *postparent;
+
+ lock.l_len = 0;
+ lock.l_start = 0;
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+
+ STACK_WIND(frame,
+ crypt_create_finodelk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ local->fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ return 0;
+ error:
+ free_inode_info(info);
+ free_format(local);
+ fd_unref(local->fd);
+ dict_unref(local->xattr);
+
+ STACK_UNWIND_STRICT(create,
+ frame,
+ op_ret,
+ op_errno,
+ NULL, NULL, NULL,
+ NULL, NULL, NULL);
+ return 0;
+}
+
+static int32_t crypt_create(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flags,
+ mode_t mode,
+ mode_t umask,
+ fd_t *fd,
+ dict_t *xdata)
+{
+ int ret;
+ data_t *data;
+ crypt_local_t *local;
+ crypt_private_t *priv;
+ struct master_cipher_info *master;
+ struct crypt_inode_info *info;
+
+ priv = this->private;
+ master = get_master_cinfo(priv);
+
+ if (master_alg_atomic(master)) {
+ /*
+ * We can't open O_WRONLY, because we
+ * need to do read-modify-write.
+ */
+ if ((flags & O_ACCMODE) == O_WRONLY)
+ flags = (flags & ~O_ACCMODE) | O_RDWR;
+ /*
+ * Make sure that out translated offsets
+ * and counts won't be ignored
+ */
+ flags &= ~O_APPEND;
+ }
+ local = crypt_alloc_local(frame, this, GF_FOP_CREATE);
+ if (!local) {
+ ret = ENOMEM;
+ goto error;
+ }
+ data = dict_get(xdata, "gfid-req");
+ if (!data) {
+ ret = EINVAL;
+ gf_log("crypt", GF_LOG_WARNING, "gfid not found");
+ goto error;
+ }
+ if (data->len != sizeof(uuid_t)) {
+ ret = EINVAL;
+ gf_log("crypt", GF_LOG_WARNING,
+ "bad gfid size (%d), should be %d",
+ (int)data->len, (int)sizeof(uuid_t));
+ goto error;
+ }
+ info = alloc_inode_info(local, loc);
+ if (!info){
+ ret = ENOMEM;
+ goto error;
+ }
+ /*
+ * NOTE:
+ * format has to be created BEFORE
+ * proceeding to the untrusted server
+ */
+ ret = alloc_format_create(local);
+ if (ret) {
+ free_inode_info(info);
+ goto error;
+ }
+ init_inode_info_create(info, master, data);
+
+ ret = create_format(local->format,
+ loc,
+ info,
+ master);
+ if (ret) {
+ free_inode_info(info);
+ goto error;
+ }
+ local->xattr = dict_new();
+ if (!local->xattr) {
+ free_inode_info(info);
+ free_format(local);
+ goto error;
+ }
+ ret = dict_set_static_bin(local->xattr,
+ CRYPTO_FORMAT_PREFIX,
+ local->format,
+ new_format_size());
+ if (ret) {
+ dict_unref(local->xattr);
+ free_inode_info(info);
+ free_format(local);
+ goto error;
+ }
+ ret = dict_set(local->xattr, FSIZE_XATTR_PREFIX, data_from_uint64(0));
+ if (ret) {
+ dict_unref(local->xattr);
+ free_inode_info(info);
+ free_format(local);
+ goto error;
+ }
+ local->fd = fd_ref(fd);
+
+ STACK_WIND(frame,
+ crypt_create_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create,
+ loc,
+ flags,
+ mode,
+ umask,
+ fd,
+ xdata);
+ return 0;
+ error:
+ gf_log("crypt", GF_LOG_WARNING, "can not create file");
+ STACK_UNWIND_STRICT(create,
+ frame,
+ -1,
+ ret,
+ NULL, NULL, NULL,
+ NULL, NULL, NULL);
+ return 0;
+}
+
+/*
+ * FIXME: this should depends on the version of format string
+ */
+static int32_t filter_crypt_xattr(dict_t *dict,
+ char *key, data_t *value, void *data)
+{
+ dict_del(dict, key);
+ return 0;
+}
+
+static int32_t crypt_fsetxattr(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ dict_foreach_fnmatch(dict, "trusted.glusterfs.crypt*",
+ filter_crypt_xattr, NULL);
+ STACK_WIND(frame,
+ default_fsetxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ fd,
+ dict,
+ flags,
+ xdata);
+ return 0;
+}
+
+/*
+ * TBD: verify file metadata before wind
+ */
+static int32_t crypt_setxattr(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ dict_foreach_fnmatch(dict, "trusted.glusterfs.crypt*",
+ filter_crypt_xattr, NULL);
+ STACK_WIND(frame,
+ default_setxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr,
+ loc,
+ dict,
+ flags,
+ xdata);
+ return 0;
+}
+
+/*
+ * called as flush_cbk()
+ */
+static int32_t linkop_end(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ linkop_unwind_handler_t unwind_fn;
+ unwind_fn = linkop_unwind_dispatch(local->fop);
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret < 0 &&
+ op_errno == ENOENT &&
+ local->loc->inode->ia_type == IA_IFLNK) {
+ local->op_ret = 0;
+ local->op_errno = 0;
+ }
+ unwind_fn(frame);
+ return 0;
+}
+
+/*
+ * unpin inode on the server
+ */
+static int32_t link_flush(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0)
+ goto error;
+ if (local->xdata) {
+ dict_unref(local->xdata);
+ local->xdata = NULL;
+ }
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+ local->inode = inode_ref(inode);
+ local->buf = *buf;
+ local->prebuf = *preparent;
+ local->postbuf = *postparent;
+
+ STACK_WIND(frame,
+ linkop_end,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush,
+ local->fd,
+ NULL);
+ return 0;
+ error:
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ link_unwind(frame);
+ return 0;
+}
+
+void link_unwind(call_frame_t *frame)
+{
+ crypt_local_t *local = frame->local;
+ dict_t *xdata;
+ dict_t *xattr;
+ inode_t *inode;
+
+ if (!local) {
+ STACK_UNWIND_STRICT(link,
+ frame,
+ -1,
+ ENOMEM,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL);
+ return;
+ }
+ xdata = local->xdata;
+ xattr = local->xattr;
+ inode = local->inode;
+
+ if (local->loc){
+ loc_wipe(local->loc);
+ GF_FREE(local->loc);
+ }
+ if (local->newloc) {
+ loc_wipe(local->newloc);
+ GF_FREE(local->newloc);
+ }
+ if (local->fd)
+ fd_unref(local->fd);
+ if (local->format)
+ GF_FREE(local->format);
+
+ STACK_UNWIND_STRICT(link,
+ frame,
+ local->op_ret,
+ local->op_errno,
+ inode,
+ &local->buf,
+ &local->prebuf,
+ &local->postbuf,
+ xdata);
+ if (xdata)
+ dict_unref(xdata);
+ if (xattr)
+ dict_unref(xattr);
+ if (inode)
+ inode_unref(inode);
+}
+
+void link_wind(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+
+ STACK_WIND(frame,
+ link_flush,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link,
+ local->loc,
+ local->newloc,
+ local->xdata);
+}
+
+/*
+ * unlink()
+ */
+static int32_t unlink_flush(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0)
+ goto error;
+ local->prebuf = *preparent;
+ local->postbuf = *postparent;
+ if (local->xdata) {
+ dict_unref(local->xdata);
+ local->xdata = NULL;
+ }
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+
+ STACK_WIND(frame,
+ linkop_end,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush,
+ local->fd,
+ NULL);
+ return 0;
+ error:
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ unlink_unwind(frame);
+ return 0;
+}
+
+void unlink_unwind(call_frame_t *frame)
+{
+ crypt_local_t *local = frame->local;
+ dict_t *xdata;
+ dict_t *xattr;
+
+ if (!local) {
+ STACK_UNWIND_STRICT(unlink,
+ frame,
+ -1,
+ ENOMEM,
+ NULL,
+ NULL,
+ NULL);
+ return;
+ }
+ xdata = local->xdata;
+ xattr = local->xattr;
+ if (local->loc){
+ loc_wipe(local->loc);
+ GF_FREE(local->loc);
+ }
+ if (local->fd)
+ fd_unref(local->fd);
+ if (local->format)
+ GF_FREE(local->format);
+
+ STACK_UNWIND_STRICT(unlink,
+ frame,
+ local->op_ret,
+ local->op_errno,
+ &local->prebuf,
+ &local->postbuf,
+ xdata);
+ if (xdata)
+ dict_unref(xdata);
+ if (xattr)
+ dict_unref(xattr);
+}
+
+void unlink_wind(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+
+ STACK_WIND(frame,
+ unlink_flush,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink,
+ local->loc,
+ local->flags,
+ local->xdata);
+}
+
+void rename_unwind(call_frame_t *frame)
+{
+ crypt_local_t *local = frame->local;
+ dict_t *xdata;
+ dict_t *xattr;
+ struct iatt *prenewparent;
+ struct iatt *postnewparent;
+
+ if (!local) {
+ STACK_UNWIND_STRICT(rename,
+ frame,
+ -1,
+ ENOMEM,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL);
+ return;
+ }
+ xdata = local->xdata;
+ xattr = local->xattr;
+ prenewparent = local->prenewparent;
+ postnewparent = local->postnewparent;
+
+ if (local->loc){
+ loc_wipe(local->loc);
+ GF_FREE(local->loc);
+ }
+ if (local->newloc){
+ loc_wipe(local->newloc);
+ GF_FREE(local->newloc);
+ }
+ if (local->fd)
+ fd_unref(local->fd);
+ if (local->format)
+ GF_FREE(local->format);
+
+ STACK_UNWIND_STRICT(rename,
+ frame,
+ local->op_ret,
+ local->op_errno,
+ &local->buf,
+ &local->prebuf,
+ &local->postbuf,
+ prenewparent,
+ postnewparent,
+ xdata);
+ if (xdata)
+ dict_unref(xdata);
+ if (xattr)
+ dict_unref(xattr);
+ if (prenewparent)
+ GF_FREE(prenewparent);
+ if (postnewparent)
+ GF_FREE(postnewparent);
+}
+
+/*
+ * called as flush_cbk()
+ */
+static int32_t rename_end(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ rename_unwind(frame);
+ return 0;
+}
+
+static int32_t rename_flush(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *buf,
+ struct iatt *preoldparent,
+ struct iatt *postoldparent,
+ struct iatt *prenewparent,
+ struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0)
+ goto error;
+ dict_unref(local->xdata);
+ local->xdata = NULL;
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+
+ local->buf = *buf;
+ local->prebuf = *preoldparent;
+ local->postbuf = *postoldparent;
+ if (prenewparent) {
+ local->prenewparent = GF_CALLOC(1, sizeof(*prenewparent),
+ gf_crypt_mt_iatt);
+ if (!local->prenewparent) {
+ op_errno = ENOMEM;
+ goto error;
+ }
+ *local->prenewparent = *prenewparent;
+ }
+ if (postnewparent) {
+ local->postnewparent = GF_CALLOC(1, sizeof(*postnewparent),
+ gf_crypt_mt_iatt);
+ if (!local->postnewparent) {
+ op_errno = ENOMEM;
+ goto error;
+ }
+ *local->postnewparent = *postnewparent;
+ }
+ STACK_WIND(frame,
+ rename_end,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush,
+ local->fd,
+ NULL);
+ return 0;
+ error:
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ rename_unwind(frame);
+ return 0;
+}
+
+void rename_wind(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+
+ STACK_WIND(frame,
+ rename_flush,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename,
+ local->loc,
+ local->newloc,
+ local->xdata);
+}
+
+static int32_t __do_linkop(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ linkop_wind_handler_t wind_fn;
+ linkop_unwind_handler_t unwind_fn;
+
+ wind_fn = linkop_wind_dispatch(local->fop);
+ unwind_fn = linkop_unwind_dispatch(local->fop);
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (op_ret >= 0)
+ wind_fn(frame, this);
+ else {
+ gf_log(this->name, GF_LOG_WARNING, "mtd unlock failed (%d)",
+ op_errno);
+ unwind_fn(frame);
+ }
+ return 0;
+}
+
+static int32_t do_linkop(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ struct gf_flock lock = {0, };
+ crypt_local_t *local = frame->local;
+ linkop_unwind_handler_t unwind_fn;
+
+ unwind_fn = linkop_unwind_dispatch(local->fop);
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if(op_ret < 0)
+ goto error;
+
+ lock.l_type = F_UNLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+ lock.l_pid = 0;
+
+ STACK_WIND(frame,
+ __do_linkop,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ local->fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ return 0;
+ error:
+ unwind_fn(frame);
+ return 0;
+}
+
+/*
+ * Update the metadata string (against the new pathname);
+ * submit the result
+ */
+static int32_t linkop_begin(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd,
+ dict_t *xdata)
+{
+ gf_boolean_t upload_info;
+ crypt_local_t *local = frame->local;
+ crypt_private_t *priv = this->private;
+ struct crypt_inode_info *info;
+ data_t *old_mtd;
+ uint32_t new_mtd_size;
+ uint64_t value = 0;
+ void (*unwind_fn)(call_frame_t *frame);
+ void (*wind_fn)(call_frame_t *frame, xlator_t *this);
+ mtd_op_t mop;
+
+ wind_fn = linkop_wind_dispatch(local->fop);
+ unwind_fn = linkop_unwind_dispatch(local->fop);
+ mop = linkop_mtdop_dispatch(local->fop);
+
+ if (local->fd->inode->ia_type == IA_IFLNK)
+ goto wind;
+ if (op_ret < 0)
+ /*
+ * verification failed
+ */
+ goto error;
+
+ old_mtd = dict_get(xdata, CRYPTO_FORMAT_PREFIX);
+ if (!old_mtd) {
+ op_errno = EIO;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Metadata string wasn't found");
+ goto error;
+ }
+ new_mtd_size = format_size(mop, old_mtd->len);
+ op_errno = alloc_format(local, new_mtd_size);
+ if (op_errno)
+ goto error;
+ /*
+ * check for cached info
+ */
+ op_ret = inode_ctx_get(fd->inode, this, &value);
+ if (op_ret != -1) {
+ info = (struct crypt_inode_info *)(long)value;
+ if (info == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Inode info was not found");
+ op_errno = EINVAL;
+ goto error;
+ }
+ /*
+ * info was found in the cache
+ */
+ local->info = info;
+ upload_info = _gf_false;
+ }
+ else {
+ /*
+ * info wasn't found in the cache;
+ */
+ info = alloc_inode_info(local, local->loc);
+ if (!info)
+ goto error;
+ init_inode_info_head(info, fd);
+ local->info = info;
+ upload_info = _gf_true;
+ }
+ op_errno = open_format((unsigned char *)old_mtd->data,
+ old_mtd->len,
+ local->loc,
+ info,
+ get_master_cinfo(priv),
+ local,
+ upload_info);
+ if (op_errno)
+ goto error;
+ if (upload_info == _gf_true) {
+ op_errno = init_inode_info_tail(info,
+ get_master_cinfo(priv));
+ if (op_errno)
+ goto error;
+ op_errno = inode_ctx_put(fd->inode, this,
+ (uint64_t)(long)(info));
+ if (op_errno == -1) {
+ op_errno = EIO;
+ goto error;
+ }
+ }
+ /*
+ * update the format string (append/update/cup a MAC)
+ */
+ op_errno = update_format(local->format,
+ (unsigned char *)old_mtd->data,
+ old_mtd->len,
+ local->mac_idx,
+ mop,
+ local->newloc,
+ info,
+ get_master_cinfo(priv),
+ local);
+ if (op_errno)
+ goto error;
+ /*
+ * store the new format string on the server
+ */
+ if (new_mtd_size) {
+ op_errno = dict_set_static_bin(local->xattr,
+ CRYPTO_FORMAT_PREFIX,
+ local->format,
+ new_mtd_size);
+ if (op_errno)
+ goto error;
+ }
+ STACK_WIND(frame,
+ do_linkop,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr,
+ local->loc,
+ local->xattr,
+ 0,
+ NULL);
+ return 0;
+ wind:
+ wind_fn(frame, this);
+ return 0;
+ error:
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ unwind_fn(frame);
+ return 0;
+}
+
+static int32_t linkop_grab_local(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc,
+ int flags, dict_t *xdata,
+ glusterfs_fop_t op)
+{
+ int32_t ret = ENOMEM;
+ fd_t *fd;
+ crypt_local_t *local;
+
+ local = crypt_alloc_local(frame, this, op);
+ if (!local)
+ goto error;
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+
+ fd = fd_create(oldloc->inode, frame->root->pid);
+ if (!fd) {
+ gf_log(this->name, GF_LOG_ERROR, "Can not create fd");
+ goto error;
+ }
+ local->fd = fd;
+ local->flags = flags;
+ local->loc = GF_CALLOC(1, sizeof(*oldloc), gf_crypt_mt_loc);
+ if (!local->loc)
+ goto error;
+ memset(local->loc, 0, sizeof(*local->loc));
+ ret = loc_copy(local->loc, oldloc);
+ if (ret) {
+ GF_FREE(local->loc);
+ local->loc = NULL;
+ goto error;
+ }
+ if (newloc) {
+ local->newloc = GF_CALLOC(1, sizeof(*newloc), gf_crypt_mt_loc);
+ if (!local->newloc) {
+ GF_FREE(local->loc);
+ loc_wipe(local->loc);
+ goto error;
+ }
+ memset(local->newloc, 0, sizeof(*local->newloc));
+ ret = loc_copy(local->newloc, newloc);
+ if (ret) {
+ GF_FREE(local->loc);
+ loc_wipe(local->loc);
+ GF_FREE(local->newloc);
+ goto error;
+ }
+ }
+ local->xattr = dict_new();
+ if (!local->xattr) {
+ gf_log(this->name, GF_LOG_ERROR, "Can not create dict");
+ ret = ENOMEM;
+ goto error;
+ }
+ return 0;
+ error:
+ if (local->xdata)
+ dict_unref(local->xdata);
+ if (local->fd)
+ fd_unref(local->fd);
+ local->fd = 0;
+ local->loc = NULL;
+ local->newloc = NULL;
+
+ local->op_ret = -1;
+ local->op_errno = ret;
+
+ return ret;
+}
+
+/*
+ * read and verify locked metadata against the old pathname (via open);
+ * update the metadata string in accordance with the new pathname;
+ * submit modified metadata;
+ * wind;
+ */
+static int32_t linkop(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc,
+ int flags,
+ dict_t *xdata,
+ glusterfs_fop_t op)
+{
+ int32_t ret;
+ dict_t *dict;
+ crypt_local_t *local;
+ void (*unwind_fn)(call_frame_t *frame);
+
+ unwind_fn = linkop_unwind_dispatch(op);
+
+ ret = linkop_grab_local(frame, this, oldloc, newloc, flags, xdata, op);
+ local = frame->local;
+ if (ret)
+ goto error;
+ dict = dict_new();
+ if (!dict) {
+ gf_log(this->name, GF_LOG_ERROR, "Can not create dict");
+ ret = ENOMEM;
+ goto error;
+ }
+ /*
+ * Set a message to crypt_open() that we need
+ * locked metadata string.
+ * All link operations (link, unlink, rename)
+ * need write lock
+ */
+ msgflags_set_mtd_wlock(&local->msgflags);
+ ret = dict_set_static_bin(dict,
+ MSGFLAGS_PREFIX,
+ &local->msgflags,
+ sizeof(local->msgflags));
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR, "Can not set dict");
+ dict_unref(dict);
+ goto error;
+ }
+ /*
+ * verify metadata against the old pathname
+ * and retrieve locked metadata string
+ */
+ STACK_WIND(frame,
+ linkop_begin,
+ this,
+ this->fops->open, /* crypt_open() */
+ oldloc,
+ O_RDWR,
+ local->fd,
+ dict);
+ dict_unref(dict);
+ return 0;
+ error:
+ local->op_ret = -1;
+ local->op_errno = ret;
+ unwind_fn(frame);
+ return 0;
+}
+
+static int32_t crypt_link(call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+{
+ return linkop(frame, this, oldloc, newloc, 0, xdata, GF_FOP_LINK);
+}
+
+static int32_t crypt_unlink(call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int flags, dict_t *xdata)
+{
+ return linkop(frame, this, loc, NULL, flags, xdata, GF_FOP_UNLINK);
+}
+
+static int32_t crypt_rename(call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+{
+ return linkop(frame, this, oldloc, newloc, 0, xdata, GF_FOP_RENAME);
+}
+
+static void put_one_call_open(call_frame_t *frame)
+{
+ crypt_local_t *local = frame->local;
+ if (put_one_call(local)) {
+ fd_t *fd = local->fd;
+ loc_t *loc = local->loc;
+ dict_t *xdata = local->xdata;
+
+ STACK_UNWIND_STRICT(open,
+ frame,
+ local->op_ret,
+ local->op_errno,
+ fd,
+ xdata);
+ fd_unref(fd);
+ if (xdata)
+ dict_unref(xdata);
+ loc_wipe(loc);
+ GF_FREE(loc);
+ }
+}
+
+static int32_t __crypt_readv_done(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ fd_t *local_fd = local->fd;
+ dict_t *local_xdata = local->xdata;
+ /* read deals with data configs only */
+ struct iovec *avec = local->data_conf.avec;
+ char **pool = local->data_conf.pool;
+ int blocks_in_pool = local->data_conf.blocks_in_pool;
+ struct iobref *iobref = local->iobref;
+ struct iobref *iobref_data = local->iobref_data;
+
+ if (op_ret < 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "readv unlock failed (%d)", op_errno);
+ if (local->op_ret >= 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ }
+ }
+ dump_plain_text(local, avec);
+
+ gf_log("crypt", GF_LOG_DEBUG,
+ "readv: ret_to_user: %d, iovec len: %d, ia_size: %llu",
+ (int)(local->rw_count > 0 ? local->rw_count : local->op_ret),
+ (int)(local->rw_count > 0 ? iovec_get_size(avec, local->data_conf.acount) : 0),
+ (unsigned long long)local->buf.ia_size);
+
+ STACK_UNWIND_STRICT(readv,
+ frame,
+ local->rw_count > 0 ? local->rw_count : local->op_ret,
+ local->op_errno,
+ avec,
+ avec ? local->data_conf.acount : 0,
+ &local->buf,
+ local->iobref,
+ local_xdata);
+
+ free_avec(avec, pool, blocks_in_pool);
+ fd_unref(local_fd);
+ if (local_xdata)
+ dict_unref(local_xdata);
+ if (iobref)
+ iobref_unref(iobref);
+ if (iobref_data)
+ iobref_unref(iobref_data);
+ return 0;
+}
+
+static void crypt_readv_done(call_frame_t *frame, xlator_t *this)
+{
+ if (parent_is_crypt_xlator(frame, this))
+ /*
+ * don't unlock (it will be done by the parent)
+ */
+ __crypt_readv_done(frame, NULL, this, 0, 0, NULL);
+ else {
+ crypt_local_t *local = frame->local;
+ struct gf_flock lock = {0, };
+
+ lock.l_type = F_UNLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+ lock.l_pid = 0;
+
+ STACK_WIND(frame,
+ __crypt_readv_done,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ local->fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ }
+}
+
+static void put_one_call_readv(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+ if (put_one_call(local))
+ crypt_readv_done(frame, this);
+}
+
+static int32_t __crypt_writev_done(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ fd_t *local_fd = local->fd;
+ dict_t *local_xdata = local->xdata;
+ int32_t ret_to_user;
+
+ if (local->xattr)
+ dict_unref(local->xattr);
+ /*
+ * Calculate amout of butes to be returned
+ * to user. We need to subtract paddings that
+ * have been written as a part of atom.
+ */
+ /*
+ * subtract head padding
+ */
+ if (local->rw_count == 0)
+ /*
+ * Nothing has been written, it must be an error
+ */
+ ret_to_user = local->op_ret;
+ else if (local->rw_count <= local->data_conf.off_in_head) {
+ gf_log("crypt", GF_LOG_WARNING, "Incomplete write");
+ ret_to_user = 0;
+ }
+ else
+ ret_to_user = local->rw_count -
+ local->data_conf.off_in_head;
+ /*
+ * subtract tail padding
+ */
+ if (ret_to_user > local->data_conf.orig_size)
+ ret_to_user = local->data_conf.orig_size;
+
+ if (local->iobref)
+ iobref_unref(local->iobref);
+ if (local->iobref_data)
+ iobref_unref(local->iobref_data);
+ free_avec_data(local);
+ free_avec_hole(local);
+
+ gf_log("crypt", GF_LOG_DEBUG,
+ "writev: ret_to_user: %d", ret_to_user);
+
+ STACK_UNWIND_STRICT(writev,
+ frame,
+ ret_to_user,
+ local->op_errno,
+ &local->prebuf,
+ &local->postbuf,
+ local_xdata);
+ fd_unref(local_fd);
+ if (local_xdata)
+ dict_unref(local_xdata);
+ return 0;
+}
+
+static int32_t crypt_writev_done(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0)
+ gf_log("crypt", GF_LOG_WARNING, "can not update file size");
+
+ if (parent_is_crypt_xlator(frame, this))
+ /*
+ * don't unlock (it will be done by the parent)
+ */
+ __crypt_writev_done(frame, NULL, this, 0, 0, NULL);
+ else {
+ struct gf_flock lock = {0, };
+
+ lock.l_type = F_UNLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+ lock.l_pid = 0;
+
+ STACK_WIND(frame,
+ __crypt_writev_done,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ local->fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ }
+ return 0;
+}
+
+static void put_one_call_writev(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+ if (put_one_call(local)) {
+ if (local->update_disk_file_size) {
+ int32_t ret;
+ /*
+ * update file size, unlock the file and unwind
+ */
+ ret = dict_set(local->xattr,
+ FSIZE_XATTR_PREFIX,
+ data_from_uint64(local->cur_file_size));
+ if (ret) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "can not set key to update file size");
+ crypt_writev_done(frame, NULL,
+ this, 0, 0, NULL);
+ return;
+ }
+ gf_log("crypt", GF_LOG_DEBUG,
+ "Updating disk file size to %llu",
+ (unsigned long long)local->cur_file_size);
+ STACK_WIND(frame,
+ crypt_writev_done,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ local->fd,
+ local->xattr, /* CRYPTO_FORMAT_PREFIX */
+ 0,
+ NULL);
+ }
+ else
+ crypt_writev_done(frame, NULL, this, 0, 0, NULL);
+ }
+}
+
+static int32_t __crypt_ftruncate_done(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ fd_t *local_fd = local->fd;
+ dict_t *local_xdata = local->xdata;
+ char *iobase = local->vec.iov_base;
+
+ if (op_ret < 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "ftruncate unlock failed (%d)", op_errno);
+ if (local->op_ret >= 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ }
+ }
+ if (local->iobref_data)
+ iobref_unref(local->iobref_data);
+ free_avec_data(local);
+ free_avec_hole(local);
+
+ gf_log("crypt", GF_LOG_DEBUG,
+ "ftruncate, return to user: presize=%llu, postsize=%llu",
+ (unsigned long long)local->prebuf.ia_size,
+ (unsigned long long)local->postbuf.ia_size);
+
+ STACK_UNWIND_STRICT(ftruncate,
+ frame,
+ local->op_ret < 0 ? -1 : 0,
+ local->op_errno,
+ &local->prebuf,
+ &local->postbuf,
+ local_xdata);
+ fd_unref(local_fd);
+ if (local_xdata)
+ dict_unref(local_xdata);
+ if (iobase)
+ GF_FREE(iobase);
+ return 0;
+}
+
+static int32_t crypt_ftruncate_done(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+ struct gf_flock lock = {0, };
+
+ dict_unref(local->xattr);
+ if (op_ret < 0)
+ gf_log("crypt", GF_LOG_WARNING, "can not update file size");
+
+ lock.l_type = F_UNLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+ lock.l_pid = 0;
+
+ STACK_WIND(frame,
+ __crypt_ftruncate_done,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ this->name,
+ local->fd,
+ F_SETLKW,
+ &lock,
+ NULL);
+ return 0;
+}
+
+static void put_one_call_ftruncate(call_frame_t *frame, xlator_t *this)
+{
+ crypt_local_t *local = frame->local;
+ if (put_one_call(local)) {
+ if (local->update_disk_file_size) {
+ int32_t ret;
+ /*
+ * update file size, unlock the file and unwind
+ */
+ ret = dict_set(local->xattr,
+ FSIZE_XATTR_PREFIX,
+ data_from_uint64(local->cur_file_size));
+ if (ret) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "can not set key to update file size");
+ crypt_ftruncate_done(frame, NULL,
+ this, 0, 0, NULL);
+ return;
+ }
+ gf_log("crypt", GF_LOG_DEBUG,
+ "Updating disk file size to %llu",
+ (unsigned long long)local->cur_file_size);
+ STACK_WIND(frame,
+ crypt_ftruncate_done,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ local->fd,
+ local->xattr, /* CRYPTO_FORMAT_PREFIX */
+ 0,
+ NULL);
+ }
+ else
+ crypt_ftruncate_done(frame, NULL, this, 0, 0, NULL);
+ }
+}
+
+/*
+ * load regular file size for some FOPs
+ */
+static int32_t load_file_size(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict,
+ dict_t *xdata)
+{
+ data_t *data;
+ crypt_local_t *local = frame->local;
+
+ dict_t *local_xdata = local->xdata;
+ inode_t *local_inode = local->inode;
+
+ if (op_ret < 0)
+ goto unwind;
+ /*
+ * load regular file size
+ */
+ data = dict_get(dict, FSIZE_XATTR_PREFIX);
+ if (!data) {
+ if (local->xdata)
+ dict_unref(local->xdata);
+ gf_log("crypt", GF_LOG_WARNING, "Regular file size not found");
+ op_ret = -1;
+ op_errno = EIO;
+ goto unwind;
+ }
+ local->buf.ia_size = data_to_uint64(data);
+
+ gf_log(this->name, GF_LOG_DEBUG,
+ "FOP %d: Translate regular file to %llu",
+ local->fop,
+ (unsigned long long)local->buf.ia_size);
+ unwind:
+ if (local->fd)
+ fd_unref(local->fd);
+ if (local->loc) {
+ loc_wipe(local->loc);
+ GF_FREE(local->loc);
+ }
+ switch (local->fop) {
+ case GF_FOP_FSTAT:
+ STACK_UNWIND_STRICT(fstat,
+ frame,
+ op_ret,
+ op_errno,
+ op_ret >= 0 ? &local->buf : NULL,
+ local->xdata);
+ break;
+ case GF_FOP_STAT:
+ STACK_UNWIND_STRICT(stat,
+ frame,
+ op_ret,
+ op_errno,
+ op_ret >= 0 ? &local->buf : NULL,
+ local->xdata);
+ break;
+ case GF_FOP_LOOKUP:
+ STACK_UNWIND_STRICT(lookup,
+ frame,
+ op_ret,
+ op_errno,
+ op_ret >= 0 ? local->inode : NULL,
+ op_ret >= 0 ? &local->buf : NULL,
+ local->xdata,
+ op_ret >= 0 ? &local->postbuf : NULL);
+ break;
+ case GF_FOP_READ:
+ STACK_UNWIND_STRICT(readv,
+ frame,
+ op_ret,
+ op_errno,
+ NULL,
+ 0,
+ op_ret >= 0 ? &local->buf : NULL,
+ NULL,
+ NULL);
+ break;
+ default:
+ gf_log(this->name, GF_LOG_WARNING,
+ "Improper file operation %d", local->fop);
+ }
+ if (local_xdata)
+ dict_unref(local_xdata);
+ if (local_inode)
+ inode_unref(local_inode);
+ return 0;
+}
+
+static int32_t crypt_stat_common_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *buf, dict_t *xdata)
+{
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0)
+ goto unwind;
+ if (!IA_ISREG(buf->ia_type))
+ goto unwind;
+
+ local->buf = *buf;
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+
+ switch (local->fop) {
+ case GF_FOP_FSTAT:
+ STACK_WIND(frame,
+ load_file_size,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr,
+ local->fd,
+ FSIZE_XATTR_PREFIX,
+ NULL);
+ break;
+ case GF_FOP_STAT:
+ STACK_WIND(frame,
+ load_file_size,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr,
+ local->loc,
+ FSIZE_XATTR_PREFIX,
+ NULL);
+ break;
+ default:
+ gf_log (this->name, GF_LOG_WARNING,
+ "Improper file operation %d", local->fop);
+ }
+ return 0;
+ unwind:
+ if (local->fd)
+ fd_unref(local->fd);
+ if (local->loc) {
+ loc_wipe(local->loc);
+ GF_FREE(local->loc);
+ }
+ switch (local->fop) {
+ case GF_FOP_FSTAT:
+ STACK_UNWIND_STRICT(fstat,
+ frame,
+ op_ret,
+ op_errno,
+ op_ret >= 0 ? buf : NULL,
+ op_ret >= 0 ? xdata : NULL);
+ break;
+ case GF_FOP_STAT:
+ STACK_UNWIND_STRICT(stat,
+ frame,
+ op_ret,
+ op_errno,
+ op_ret >= 0 ? buf : NULL,
+ op_ret >= 0 ? xdata : NULL);
+ break;
+ default:
+ gf_log (this->name, GF_LOG_WARNING,
+ "Improper file operation %d", local->fop);
+ }
+ return 0;
+}
+
+static int32_t crypt_fstat(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd, dict_t *xdata)
+{
+ crypt_local_t *local;
+
+ local = crypt_alloc_local(frame, this, GF_FOP_FSTAT);
+ if (!local)
+ goto error;
+ local->fd = fd_ref(fd);
+ STACK_WIND(frame,
+ crypt_stat_common_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat,
+ fd,
+ xdata);
+ return 0;
+ error:
+ STACK_UNWIND_STRICT(fstat,
+ frame,
+ -1,
+ ENOMEM,
+ NULL,
+ NULL);
+ return 0;
+}
+
+static int32_t crypt_stat(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, dict_t *xdata)
+{
+ int32_t ret;
+ crypt_local_t *local;
+
+ local = crypt_alloc_local(frame, this, GF_FOP_STAT);
+ if (!local)
+ goto error;
+ local->loc = GF_CALLOC(1, sizeof(*loc), gf_crypt_mt_loc);
+ if (!local->loc)
+ goto error;
+ memset(local->loc, 0, sizeof(*local->loc));
+ ret = loc_copy(local->loc, loc);
+ if (ret) {
+ GF_FREE(local->loc);
+ goto error;
+ }
+ STACK_WIND(frame,
+ crypt_stat_common_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat,
+ loc,
+ xdata);
+ return 0;
+ error:
+ STACK_UNWIND_STRICT(stat,
+ frame,
+ -1,
+ ENOMEM,
+ NULL,
+ NULL);
+ return 0;
+}
+
+static int32_t crypt_lookup_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ crypt_local_t *local = frame->local;
+
+ if (op_ret < 0)
+ goto unwind;
+ if (!IA_ISREG(buf->ia_type))
+ goto unwind;
+
+ local->inode = inode_ref(inode);
+ local->buf = *buf;
+ local->postbuf = *postparent;
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+ uuid_copy(local->loc->gfid, buf->ia_gfid);
+
+ STACK_WIND(frame,
+ load_file_size,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr,
+ local->loc,
+ FSIZE_XATTR_PREFIX,
+ NULL);
+ return 0;
+ unwind:
+ loc_wipe(local->loc);
+ GF_FREE(local->loc);
+ STACK_UNWIND_STRICT(lookup,
+ frame,
+ op_ret,
+ op_errno,
+ inode,
+ buf,
+ xdata,
+ postparent);
+ return 0;
+}
+
+static int32_t crypt_lookup(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, dict_t *xdata)
+{
+ int32_t ret;
+ crypt_local_t *local;
+
+ local = crypt_alloc_local(frame, this, GF_FOP_LOOKUP);
+ if (!local)
+ goto error;
+ local->loc = GF_CALLOC(1, sizeof(*loc), gf_crypt_mt_loc);
+ if (!local->loc)
+ goto error;
+ memset(local->loc, 0, sizeof(*local->loc));
+ ret = loc_copy(local->loc, loc);
+ if (ret) {
+ GF_FREE(local->loc);
+ goto error;
+ }
+ gf_log(this->name, GF_LOG_DEBUG, "Lookup %s", loc->path);
+ STACK_WIND(frame,
+ crypt_lookup_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup,
+ loc,
+ xdata);
+ return 0;
+ error:
+ STACK_UNWIND_STRICT(lookup,
+ frame,
+ -1,
+ ENOMEM,
+ NULL,
+ NULL,
+ NULL,
+ NULL);
+ return 0;
+}
+
+/*
+ * for every regular directory entry find its real file size
+ * and update stat's buf properly
+ */
+static int32_t crypt_readdirp_cbk(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ gf_dirent_t *entries, dict_t *xdata)
+{
+ gf_dirent_t *entry = NULL;
+
+ if (op_ret < 0)
+ goto unwind;
+
+ list_for_each_entry (entry, (&entries->list), list) {
+ data_t *data;
+
+ if (!IA_ISREG(entry->d_stat.ia_type))
+ continue;
+ data = dict_get(entry->dict, FSIZE_XATTR_PREFIX);
+ if (!data){
+ gf_log("crypt", GF_LOG_WARNING,
+ "Regular file size of direntry not found");
+ op_errno = EIO;
+ op_ret = -1;
+ break;
+ }
+ entry->d_stat.ia_size = data_to_uint64(data);
+ }
+ unwind:
+ STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, entries, xdata);
+ return 0;
+}
+
+/*
+ * ->readdirp() fills in-core inodes, so we need to set proper
+ * file sizes for all directory entries of the parent @fd.
+ * Actual updates take place in ->crypt_readdirp_cbk()
+ */
+static int32_t crypt_readdirp(call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t offset,
+ dict_t *xdata)
+{
+ int32_t ret = ENOMEM;
+
+ if (!xdata) {
+ xdata = dict_new();
+ if (!xdata)
+ goto error;
+ }
+ else
+ dict_ref(xdata);
+ /*
+ * make sure that we'll have real file sizes at ->readdirp_cbk()
+ */
+ ret = dict_set(xdata, FSIZE_XATTR_PREFIX, data_from_uint64(0));
+ if (ret) {
+ dict_unref(xdata);
+ goto error;
+ }
+ STACK_WIND(frame,
+ crypt_readdirp_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp,
+ fd,
+ size,
+ offset,
+ xdata);
+ dict_unref(xdata);
+ return 0;
+ error:
+ STACK_UNWIND_STRICT(readdirp, frame, -1, ret, NULL, NULL);
+ return 0;
+}
+
+static int32_t crypt_access(call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t mask, dict_t *xdata)
+{
+ gf_log(this->name, GF_LOG_WARNING,
+ "NFS mounts of encrypted volumes are unsupported");
+ STACK_UNWIND_STRICT(access, frame, -1, EPERM, NULL);
+ return 0;
+}
+
+int32_t master_set_block_size (xlator_t *this, crypt_private_t *priv,
+ dict_t *options)
+{
+ uint64_t block_size = 0;
+ struct master_cipher_info *master = get_master_cinfo(priv);
+
+ if (options != NULL)
+ GF_OPTION_RECONF("block-size", block_size, options,
+ size, error);
+ else
+ GF_OPTION_INIT("block-size", block_size, size, error);
+
+ switch (block_size) {
+ case 512:
+ master->m_block_bits = 9;
+ break;
+ case 1024:
+ master->m_block_bits = 10;
+ break;
+ case 2048:
+ master->m_block_bits = 11;
+ break;
+ case 4096:
+ master->m_block_bits = 12;
+ break;
+ default:
+ gf_log("crypt", GF_LOG_ERROR,
+ "FATAL: unsupported block size %llu",
+ (unsigned long long)block_size);
+ goto error;
+ }
+ return 0;
+ error:
+ return -1;
+}
+
+int32_t master_set_alg(xlator_t *this, crypt_private_t *priv)
+{
+ struct master_cipher_info *master = get_master_cinfo(priv);
+ master->m_alg = AES_CIPHER_ALG;
+ return 0;
+}
+
+int32_t master_set_mode(xlator_t *this, crypt_private_t *priv)
+{
+ struct master_cipher_info *master = get_master_cinfo(priv);
+ master->m_mode = XTS_CIPHER_MODE;
+ return 0;
+}
+
+/*
+ * set key size in bits to the master info
+ * Pre-conditions: cipher mode in the master info is uptodate.
+ */
+static int master_set_data_key_size (xlator_t *this, crypt_private_t *priv,
+ dict_t *options)
+{
+ int32_t ret;
+ uint64_t key_size = 0;
+ struct master_cipher_info *master = get_master_cinfo(priv);
+
+ if (options != NULL)
+ GF_OPTION_RECONF("data-key-size", key_size, options,
+ size, error);
+ else
+ GF_OPTION_INIT("data-key-size", key_size, size, error);
+
+ ret = data_cipher_algs[master->m_alg][master->m_mode].check_key(key_size);
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR,
+ "FATAL: wrong bin key size %llu for alg %d mode %d",
+ (unsigned long long)key_size,
+ (int)master->m_alg,
+ (int)master->m_mode);
+ goto error;
+ }
+ master->m_dkey_size = key_size;
+ return 0;
+ error:
+ return -1;
+}
+
+static int is_hex(char *s) {
+ return ('0' <= *s && *s <= '9') || ('a' <= *s && *s <= 'f');
+}
+
+static int parse_hex_buf(xlator_t *this, char *src, unsigned char *dst,
+ int hex_size)
+{
+ int i;
+ int hex_byte = 0;
+
+ for (i = 0; i < (hex_size / 2); i++) {
+ if (!is_hex(src + i*2) || !is_hex(src + i*2 + 1)) {
+ gf_log("crypt", GF_LOG_ERROR,
+ "FATAL: not hex symbol in key");
+ return -1;
+ }
+ if (sscanf(src + i*2, "%2x", &hex_byte) != 1) {
+ gf_log("crypt", GF_LOG_ERROR,
+ "FATAL: can not parse hex key");
+ return -1;
+ }
+ dst[i] = hex_byte & 0xff;
+ }
+ return 0;
+}
+
+/*
+ * Parse options;
+ * install master volume key
+ */
+int32_t master_set_master_vol_key(xlator_t *this, crypt_private_t *priv)
+{
+ int32_t ret;
+ FILE *file = NULL;
+
+ int32_t key_size;
+ char *opt_key_file_pathname = NULL;
+
+ unsigned char bin_buf[MASTER_VOL_KEY_SIZE];
+ char hex_buf[2 * MASTER_VOL_KEY_SIZE];
+
+ struct master_cipher_info *master = get_master_cinfo(priv);
+ /*
+ * extract master key passed via option
+ */
+ GF_OPTION_INIT("master-key", opt_key_file_pathname, path, bad_key);
+
+ if (!opt_key_file_pathname) {
+ gf_log(this->name, GF_LOG_ERROR, "FATAL: missing master key");
+ return -1;
+ }
+ gf_log(this->name, GF_LOG_DEBUG, "handling file key %s",
+ opt_key_file_pathname);
+
+ file = fopen(opt_key_file_pathname, "r");
+ if (file == NULL) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "FATAL: can not open file with master key");
+ return -1;
+ }
+ /*
+ * extract hex key
+ */
+ key_size = fread(hex_buf, 1, sizeof(hex_buf), file);
+ if (key_size < sizeof(hex_buf)) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "FATAL: master key is too short");
+ goto bad_key;
+ }
+ ret = parse_hex_buf(this, hex_buf, bin_buf, key_size);
+ if (ret)
+ goto bad_key;
+ memcpy(master->m_key, bin_buf, MASTER_VOL_KEY_SIZE);
+ memset(hex_buf, 0, sizeof(hex_buf));
+ fclose(file);
+
+ memset(bin_buf, 0, sizeof(bin_buf));
+ return 0;
+ bad_key:
+ gf_log(this->name, GF_LOG_ERROR, "FATAL: bad master key");
+ if (file)
+ fclose(file);
+ memset(bin_buf, 0, sizeof(bin_buf));
+ return -1;
+}
+
+/*
+ * Derive volume key for object-id authentication
+ */
+int32_t master_set_nmtd_vol_key(xlator_t *this, crypt_private_t *priv)
+{
+ return get_nmtd_vol_key(get_master_cinfo(priv));
+}
+
+int32_t crypt_init_xlator(xlator_t *this)
+{
+ int32_t ret;
+ crypt_private_t *priv = this->private;
+
+ ret = master_set_alg(this, priv);
+ if (ret)
+ return ret;
+ ret = master_set_mode(this, priv);
+ if (ret)
+ return ret;
+ ret = master_set_block_size(this, priv, NULL);
+ if (ret)
+ return ret;
+ ret = master_set_data_key_size(this, priv, NULL);
+ if (ret)
+ return ret;
+ ret = master_set_master_vol_key(this, priv);
+ if (ret)
+ return ret;
+ return master_set_nmtd_vol_key(this, priv);
+}
+
+static int32_t crypt_alloc_private(xlator_t *this)
+{
+ this->private = GF_CALLOC(1, sizeof(crypt_private_t), gf_crypt_mt_priv);
+ if (!this->private) {
+ gf_log("crypt", GF_LOG_ERROR,
+ "Can not allocate memory for private data");
+ return ENOMEM;
+ }
+ return 0;
+}
+
+static void crypt_free_private(xlator_t *this)
+{
+ crypt_private_t *priv = this->private;
+ if (priv) {
+ memset(priv, 0, sizeof(*priv));
+ GF_FREE(priv);
+ }
+}
+
+int32_t reconfigure (xlator_t *this, dict_t *options)
+{
+ int32_t ret = -1;
+ crypt_private_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO ("crypt", this, error);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, error);
+ GF_VALIDATE_OR_GOTO (this->name, options, error);
+
+ priv = this->private;
+
+ ret = master_set_block_size(this, priv, options);
+ if (ret) {
+ gf_log("this->name", GF_LOG_ERROR,
+ "Failed to reconfure block size");
+ goto error;
+ }
+ ret = master_set_data_key_size(this, priv, options);
+ if (ret) {
+ gf_log("this->name", GF_LOG_ERROR,
+ "Failed to reconfure data key size");
+ goto error;
+ }
+ return 0;
+ error:
+ return ret;
+}
+
+int32_t init(xlator_t *this)
+{
+ int32_t ret;
+
+ if (!this->children || this->children->next) {
+ gf_log ("crypt", GF_LOG_ERROR,
+ "FATAL: crypt should have exactly one child");
+ return EINVAL;
+ }
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+ ret = crypt_alloc_private(this);
+ if (ret)
+ return ret;
+ ret = crypt_init_xlator(this);
+ if (ret)
+ goto error;
+ this->local_pool = mem_pool_new(crypt_local_t, 64);
+ if (!this->local_pool) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
+ ret = ENOMEM;
+ goto error;
+ }
+ gf_log ("crypt", GF_LOG_INFO, "crypt xlator loaded");
+ return 0;
+ error:
+ crypt_free_private(this);
+ return ret;
+}
+
+void fini (xlator_t *this)
+{
+ crypt_free_private(this);
+}
+
+struct xlator_fops fops = {
+ .readv = crypt_readv,
+ .writev = crypt_writev,
+ .truncate = crypt_truncate,
+ .ftruncate = crypt_ftruncate,
+ .setxattr = crypt_setxattr,
+ .fsetxattr = crypt_fsetxattr,
+ .link = crypt_link,
+ .unlink = crypt_unlink,
+ .rename = crypt_rename,
+ .open = crypt_open,
+ .create = crypt_create,
+ .stat = crypt_stat,
+ .fstat = crypt_fstat,
+ .lookup = crypt_lookup,
+ .readdirp = crypt_readdirp,
+ .access = crypt_access
+};
+
+struct xlator_cbks cbks = {
+ .forget = crypt_forget
+};
+
+struct volume_options options[] = {
+ { .key = {"master-key"},
+ .type = GF_OPTION_TYPE_PATH,
+ .description = "Pathname of regular file which contains master volume key"
+ },
+ { .key = {"data-key-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .description = "Data key size (bits)",
+ .min = 256,
+ .max = 512,
+ .default_value = "256",
+ },
+ { .key = {"block-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .description = "Atom size (bits)",
+ .min = 512,
+ .max = 4096,
+ .default_value = "4096"
+ },
+ { .key = {NULL} },
+};
+
+/*
+ Local variables:
+ c-indentation-style: "K&R"
+ mode-name: "LC"
+ c-basic-offset: 8
+ tab-width: 8
+ fill-column: 80
+ scroll-step: 1
+ End:
+*/
diff --git a/xlators/encryption/crypt/src/crypt.h b/xlators/encryption/crypt/src/crypt.h
new file mode 100644
index 000000000..01a8542ab
--- /dev/null
+++ b/xlators/encryption/crypt/src/crypt.h
@@ -0,0 +1,899 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __CRYPT_H__
+#define __CRYPT_H__
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+#include <openssl/aes.h>
+#include <openssl/evp.h>
+#include <openssl/sha.h>
+#include <openssl/hmac.h>
+#include <openssl/cmac.h>
+#include <openssl/modes.h>
+#include "crypt-mem-types.h"
+
+#define CRYPT_XLATOR_ID (0)
+
+#define MAX_IOVEC_BITS (3)
+#define MAX_IOVEC (1 << MAX_IOVEC_BITS)
+#define KEY_FACTOR_BITS (6)
+
+#define DEBUG_CRYPT (0)
+#define TRIVIAL_TFM (0)
+
+#define CRYPT_MIN_BLOCK_BITS (9)
+#define CRYPT_MAX_BLOCK_BITS (12)
+
+#define MASTER_VOL_KEY_SIZE (32)
+#define NMTD_VOL_KEY_SIZE (16)
+
+struct crypt_key {
+ uint32_t len;
+ const char *label;
+};
+
+/*
+ * Add new key types to the end of this
+ * enumeration but before LAST_KEY_TYPE
+ */
+typedef enum {
+ MASTER_VOL_KEY,
+ NMTD_VOL_KEY,
+ NMTD_LINK_KEY,
+ EMTD_FILE_KEY,
+ DATA_FILE_KEY_256,
+ DATA_FILE_KEY_512,
+ LAST_KEY_TYPE
+}crypt_key_type;
+
+struct kderive_context {
+ const unsigned char *pkey;/* parent key */
+ uint32_t pkey_len; /* parent key size, bits */
+ uint32_t ckey_len; /* child key size, bits */
+ unsigned char *fid; /* fixed input data, NIST 800-108, 5.1 */
+ uint32_t fid_len; /* fid len, bytes */
+ unsigned char *out; /* contains child keying material */
+ uint32_t out_len; /* out len, bytes */
+};
+
+typedef enum {
+ DATA_ATOM,
+ HOLE_ATOM,
+ LAST_DATA_TYPE
+}atom_data_type;
+
+typedef enum {
+ HEAD_ATOM,
+ TAIL_ATOM,
+ FULL_ATOM,
+ LAST_LOCALITY_TYPE
+}atom_locality_type;
+
+typedef enum {
+ MTD_CREATE,
+ MTD_APPEND,
+ MTD_OVERWRITE,
+ MTD_CUT,
+ MTD_LAST_OP
+} mtd_op_t;
+
+struct xts128_context {
+ void *key1, *key2;
+ block128_f block1,block2;
+};
+
+struct object_cipher_info {
+ cipher_alg_t o_alg;
+ cipher_mode_t o_mode;
+ uint32_t o_block_bits;
+ uint32_t o_dkey_size; /* raw data key size in bits */
+ union {
+ struct {
+ unsigned char ivec[16];
+ AES_KEY dkey[2];
+ AES_KEY tkey; /* key used for tweaking */
+ XTS128_CONTEXT xts;
+ } aes_xts;
+ } u;
+};
+
+struct master_cipher_info {
+ /*
+ * attributes inherited by newly created regular files
+ */
+ cipher_alg_t m_alg;
+ cipher_mode_t m_mode;
+ uint32_t m_block_bits;
+ uint32_t m_dkey_size; /* raw key size in bits */
+ /*
+ * master key
+ */
+ unsigned char m_key[MASTER_VOL_KEY_SIZE];
+ /*
+ * volume key for oid authentication
+ */
+ unsigned char m_nmtd_key[NMTD_VOL_KEY_SIZE];
+};
+
+/*
+* This info is not changed during file's life
+ */
+struct crypt_inode_info {
+#if DEBUG_CRYPT
+ loc_t *loc; /* pathname that the file has been
+ opened, or created with */
+#endif
+ uint16_t nr_minor;
+ uuid_t oid;
+ struct object_cipher_info cinfo;
+};
+
+/*
+ * this should locate in secure memory
+ */
+typedef struct {
+ struct master_cipher_info master;
+} crypt_private_t;
+
+static inline struct master_cipher_info *get_master_cinfo(crypt_private_t *priv)
+{
+ return &priv->master;
+}
+
+static inline struct object_cipher_info *get_object_cinfo(struct crypt_inode_info
+ *info)
+{
+ return &info->cinfo;
+}
+
+/*
+ * this describes layouts and properties
+ * of atoms in an aligned vector
+ */
+struct avec_config {
+ uint32_t atom_size;
+ atom_data_type type;
+ size_t orig_size;
+ off_t orig_offset;
+ size_t expanded_size;
+ off_t aligned_offset;
+
+ uint32_t off_in_head;
+ uint32_t off_in_tail;
+ uint32_t gap_in_tail;
+ uint32_t nr_full_blocks;
+
+ struct iovec *avec; /* aligned vector */
+ uint32_t acount; /* number of avec components. The same
+ * as number of occupied logical blocks */
+ char **pool;
+ uint32_t blocks_in_pool;
+ uint32_t cursor; /* makes sense only for ordered writes,
+ * so there is no races on this counter.
+ *
+ * Cursor is per-config object, we don't
+ * reset cursor for atoms of different
+ * localities (head, tail, full)
+ */
+};
+
+
+typedef struct {
+ glusterfs_fop_t fop; /* code of FOP this local info built for */
+ fd_t *fd;
+ inode_t *inode;
+ loc_t *loc;
+ int32_t mac_idx;
+ loc_t *newloc;
+ int32_t flags;
+ int32_t wbflags;
+ struct crypt_inode_info *info;
+ struct iobref *iobref;
+ struct iobref *iobref_data;
+ off_t offset;
+
+ uint64_t old_file_size; /* per FOP, retrieved under lock held */
+ uint64_t cur_file_size; /* per iteration, before issuing IOs */
+ uint64_t new_file_size; /* per iteration, after issuing IOs */
+
+ uint64_t io_offset; /* offset of IOs issued per iteration */
+ uint64_t io_offset_nopad; /* offset of user's data in the atom */
+ uint32_t io_size; /* size of IOs issued per iteration */
+ uint32_t io_size_nopad; /* size of user's data in the IOs */
+ uint32_t eof_padding_size; /* size od EOF padding in the IOs */
+
+ gf_lock_t call_lock; /* protect nr_calls from many cbks */
+ int32_t nr_calls;
+
+ atom_data_type active_setup; /* which setup (hole or date)
+ is currently active */
+ /* data setup */
+ struct avec_config data_conf;
+
+ /* hole setup */
+ int hole_conv_in_proggress;
+ gf_lock_t hole_lock; /* protect hole config from many cbks */
+ int hole_handled;
+ struct avec_config hole_conf;
+ struct iatt buf;
+ struct iatt prebuf;
+ struct iatt postbuf;
+ struct iatt *prenewparent;
+ struct iatt *postnewparent;
+ int32_t op_ret;
+ int32_t op_errno;
+ int32_t rw_count; /* total read or written */
+ gf_lock_t rw_count_lock; /* protect the counter above */
+ unsigned char *format; /* for create, update format string */
+ uint32_t format_size;
+ uint32_t msgflags; /* messages for crypt_open() */
+ dict_t *xdata;
+ dict_t *xattr;
+ struct iovec vec; /* contains last file's atom for
+ read-prune-write sequence */
+ gf_boolean_t custom_mtd;
+ /*
+ * the next 3 fields are used by readdir and friends
+ */
+ gf_dirent_t *de; /* directory entry */
+ char *de_path; /* pathname of directory entry */
+ uint32_t de_prefix_len; /* lenght of the parent's pathname */
+ gf_dirent_t *entries;
+
+ uint32_t update_disk_file_size:1;
+} crypt_local_t;
+
+/* This represents a (read)modify-write atom */
+struct rmw_atom {
+ atom_locality_type locality;
+ /*
+ * read-modify-write sequence of the atom
+ */
+ int32_t (*rmw)(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iovec *vec,
+ int32_t count,
+ struct iatt *stbuf,
+ struct iobref *iobref,
+ dict_t *xdata);
+ /*
+ * offset of the logical block in a file
+ */
+ loff_t (*offset_at)(call_frame_t *frame,
+ struct object_cipher_info *object);
+ /*
+ * IO offset in an atom
+ */
+ uint32_t (*offset_in)(call_frame_t *frame,
+ struct object_cipher_info *object);
+ /*
+ * number of bytes of plain text of this atom that user
+ * wants to read/write.
+ * It can be smaller than atom_size in the case of head
+ * or tail atoms.
+ */
+ uint32_t (*io_size_nopad)(call_frame_t *frame,
+ struct object_cipher_info *object);
+ /*
+ * which iovec represents the atom
+ */
+ struct iovec *(*get_iovec)(call_frame_t *frame, uint32_t count);
+ /*
+ * how many bytes of partial block should be uptodated by
+ * reading from disk.
+ * This is used to perform a read component of RMW (read-modify-write).
+ */
+ uint32_t (*count_to_uptodate)(call_frame_t *frame, struct object_cipher_info *object);
+ struct avec_config *(*get_config)(call_frame_t *frame);
+};
+
+struct data_cipher_alg {
+ gf_boolean_t atomic; /* true means that algorithm requires
+ to pad data before cipher transform */
+ gf_boolean_t should_pad; /* true means that algorithm requires
+ to pad the end of file with extra-data */
+ uint32_t blkbits; /* blksize = 1 << blkbits */
+ /*
+ * any preliminary sanity checks goes here
+ */
+ int32_t (*init)(void);
+ /*
+ * set alg-mode specific inode info
+ */
+ int32_t (*set_private)(struct crypt_inode_info *info,
+ struct master_cipher_info *master);
+ /*
+ * check alg-mode specific data key
+ */
+ int32_t (*check_key)(uint32_t key_size);
+ void (*set_iv)(off_t offset, struct object_cipher_info *object);
+ int32_t (*encrypt)(const unsigned char *from, unsigned char *to,
+ size_t length, off_t offset, const int enc,
+ struct object_cipher_info *object);
+};
+
+/*
+ * version-dependent metadata loader
+ */
+struct crypt_mtd_loader {
+ /*
+ * return core format size
+ */
+ size_t (*format_size)(mtd_op_t op, size_t old_size);
+ /*
+ * pack version-specific metadata of an object
+ * at ->create()
+ */
+ int32_t (*create_format)(unsigned char *wire,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master);
+ /*
+ * extract version-specific metadata of an object
+ * at ->open() time
+ */
+ int32_t (*open_format)(unsigned char *wire,
+ int32_t len,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ crypt_local_t *local,
+ gf_boolean_t load_info);
+ int32_t (*update_format)(unsigned char *new,
+ unsigned char *old,
+ size_t old_len,
+ int32_t mac_idx,
+ mtd_op_t op,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ crypt_local_t *local);
+};
+
+typedef int32_t (*end_writeback_handler_t)(call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *prebuf,
+ struct iatt *postbuf,
+ dict_t *xdata);
+typedef void (*linkop_wind_handler_t)(call_frame_t *frame, xlator_t *this);
+typedef void (*linkop_unwind_handler_t)(call_frame_t *frame);
+
+
+/* Declarations */
+
+/* keys.c */
+extern struct crypt_key crypt_keys[LAST_KEY_TYPE];
+int32_t get_nmtd_vol_key(struct master_cipher_info *master);
+int32_t get_nmtd_link_key(loc_t *loc,
+ struct master_cipher_info *master,
+ unsigned char *result);
+int32_t get_emtd_file_key(struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ unsigned char *result);
+int32_t get_data_file_key(struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ uint32_t keysize,
+ unsigned char *key);
+/* data.c */
+extern struct data_cipher_alg data_cipher_algs[LAST_CIPHER_ALG][LAST_CIPHER_MODE];
+void encrypt_aligned_iov(struct object_cipher_info *object,
+ struct iovec *vec,
+ int count,
+ off_t off);
+void decrypt_aligned_iov(struct object_cipher_info *object,
+ struct iovec *vec,
+ int count,
+ off_t off);
+int32_t align_iov_by_atoms(xlator_t *this,
+ crypt_local_t *local,
+ struct object_cipher_info *object,
+ struct iovec *vec /* input vector */,
+ int32_t count /* number of vec components */,
+ struct iovec *avec /* aligned vector */,
+ char **blocks /* pool of blocks */,
+ uint32_t *blocks_allocated,
+ struct avec_config *conf);
+int32_t set_config_avec_data(xlator_t *this,
+ crypt_local_t *local,
+ struct avec_config *conf,
+ struct object_cipher_info *object,
+ struct iovec *vec,
+ int32_t vec_count);
+int32_t set_config_avec_hole(xlator_t *this,
+ crypt_local_t *local,
+ struct avec_config *conf,
+ struct object_cipher_info *object,
+ glusterfs_fop_t fop);
+void set_gap_at_end(call_frame_t *frame, struct object_cipher_info *object,
+ struct avec_config *conf, atom_data_type dtype);
+void set_config_offsets(call_frame_t *frame,
+ xlator_t *this,
+ uint64_t offset,
+ uint64_t count,
+ atom_data_type dtype,
+ int32_t setup_gap_in_tail);
+
+/* metadata.c */
+extern struct crypt_mtd_loader mtd_loaders [LAST_MTD_LOADER];
+
+int32_t alloc_format(crypt_local_t *local, size_t size);
+int32_t alloc_format_create(crypt_local_t *local);
+void free_format(crypt_local_t *local);
+size_t format_size(mtd_op_t op, size_t old_size);
+size_t new_format_size(void);
+int32_t open_format(unsigned char *str, int32_t len, loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master, crypt_local_t *local,
+ gf_boolean_t load_info);
+int32_t update_format(unsigned char *new, unsigned char *old,
+ size_t old_len, int32_t mac_idx, mtd_op_t op, loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ crypt_local_t *local);
+int32_t create_format(unsigned char *wire,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master);
+
+/* atom.c */
+struct rmw_atom *atom_by_types(atom_data_type data,
+ atom_locality_type locality);
+void submit_partial(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ atom_locality_type ltype);
+void submit_full(call_frame_t *frame, xlator_t *this);
+
+/* crypt.c */
+
+end_writeback_handler_t dispatch_end_writeback(glusterfs_fop_t fop);
+static size_t iovec_get_size(struct iovec *vec, uint32_t count);
+void set_local_io_params_writev(call_frame_t *frame,
+ struct object_cipher_info *object,
+ struct rmw_atom *atom, off_t io_offset,
+ uint32_t io_size);
+void link_wind(call_frame_t *frame, xlator_t *this);
+void unlink_wind(call_frame_t *frame, xlator_t *this);
+void link_unwind(call_frame_t *frame);
+void unlink_unwind(call_frame_t *frame);
+void rename_wind(call_frame_t *frame, xlator_t *this);
+void rename_unwind(call_frame_t *frame);
+
+/* Inline functions */
+
+static inline size_t iovec_get_size(struct iovec *vec, uint32_t count)
+{
+ int i;
+ size_t size = 0;
+ for (i = 0; i < count; i++)
+ size += vec[i].iov_len;
+ return size;
+}
+
+static inline int32_t crypt_xlator_id(void)
+{
+ return CRYPT_XLATOR_ID;
+}
+
+static inline mtd_loader_id current_mtd_loader(void)
+{
+ return MTD_LOADER_V1;
+}
+
+static inline uint32_t master_key_size (void)
+{
+ return crypt_keys[MASTER_VOL_KEY].len >> 3;
+}
+
+static inline uint32_t nmtd_vol_key_size (void)
+{
+ return crypt_keys[NMTD_VOL_KEY].len >> 3;
+}
+
+static inline uint32_t alg_mode_blkbits(cipher_alg_t alg,
+ cipher_mode_t mode)
+{
+ return data_cipher_algs[alg][mode].blkbits;
+}
+
+static inline uint32_t alg_mode_blksize(cipher_alg_t alg,
+ cipher_mode_t mode)
+{
+ return 1 << alg_mode_blkbits(alg, mode);
+}
+
+static inline gf_boolean_t alg_mode_atomic(cipher_alg_t alg,
+ cipher_mode_t mode)
+{
+ return data_cipher_algs[alg][mode].atomic;
+}
+
+static inline gf_boolean_t alg_mode_should_pad(cipher_alg_t alg,
+ cipher_mode_t mode)
+{
+ return data_cipher_algs[alg][mode].should_pad;
+}
+
+static inline uint32_t master_alg_blksize(struct master_cipher_info *mr)
+{
+ return alg_mode_blksize(mr->m_alg, mr->m_mode);
+}
+
+static inline uint32_t master_alg_blkbits(struct master_cipher_info *mr)
+{
+ return alg_mode_blkbits(mr->m_alg, mr->m_mode);
+}
+
+static inline gf_boolean_t master_alg_atomic(struct master_cipher_info *mr)
+{
+ return alg_mode_atomic(mr->m_alg, mr->m_mode);
+}
+
+static inline gf_boolean_t master_alg_should_pad(struct master_cipher_info *mr)
+{
+ return alg_mode_should_pad(mr->m_alg, mr->m_mode);
+}
+
+static inline uint32_t object_alg_blksize(struct object_cipher_info *ob)
+{
+ return alg_mode_blksize(ob->o_alg, ob->o_mode);
+}
+
+static inline uint32_t object_alg_blkbits(struct object_cipher_info *ob)
+{
+ return alg_mode_blkbits(ob->o_alg, ob->o_mode);
+}
+
+static inline gf_boolean_t object_alg_atomic(struct object_cipher_info *ob)
+{
+ return alg_mode_atomic(ob->o_alg, ob->o_mode);
+}
+
+static inline gf_boolean_t object_alg_should_pad(struct object_cipher_info *ob)
+{
+ return alg_mode_should_pad(ob->o_alg, ob->o_mode);
+}
+
+static inline uint32_t aes_raw_key_size(struct master_cipher_info *master)
+{
+ return master->m_dkey_size >> 3;
+}
+
+static inline struct avec_config *get_hole_conf(call_frame_t *frame)
+{
+ return &(((crypt_local_t *)frame->local)->hole_conf);
+}
+
+static inline struct avec_config *get_data_conf(call_frame_t *frame)
+{
+ return &(((crypt_local_t *)frame->local)->data_conf);
+}
+
+static inline int32_t get_atom_bits (struct object_cipher_info *object)
+{
+ return object->o_block_bits;
+}
+
+static inline int32_t get_atom_size (struct object_cipher_info *object)
+{
+ return 1 << get_atom_bits(object);
+}
+
+static inline int32_t has_head_block(struct avec_config *conf)
+{
+ return conf->off_in_head ||
+ (conf->acount == 1 && conf->off_in_tail);
+}
+
+static inline int32_t has_tail_block(struct avec_config *conf)
+{
+ return conf->off_in_tail && conf->acount > 1;
+}
+
+static inline int32_t has_full_blocks(struct avec_config *conf)
+{
+ return conf->nr_full_blocks;
+}
+
+static inline int32_t should_submit_head_block(struct avec_config *conf)
+{
+ return has_head_block(conf) && (conf->cursor == 0);
+}
+
+static inline int32_t should_submit_tail_block(struct avec_config *conf)
+{
+ return has_tail_block(conf) && (conf->cursor == conf->acount - 1);
+}
+
+static inline int32_t should_submit_full_block(struct avec_config *conf)
+{
+ uint32_t start = has_head_block(conf) ? 1 : 0;
+
+ return has_full_blocks(conf) &&
+ conf->cursor >= start &&
+ conf->cursor < start + conf->nr_full_blocks;
+}
+
+#if DEBUG_CRYPT
+static inline void crypt_check_input_len(size_t len,
+ struct object_cipher_info *object)
+{
+ if (object_alg_should_pad(object) && (len & (object_alg_blksize(object) - 1)))
+ gf_log ("crypt", GF_LOG_DEBUG, "bad input len: %d", (int)len);
+}
+
+static inline void check_head_block(struct avec_config *conf)
+{
+ if (!has_head_block(conf))
+ gf_log("crypt", GF_LOG_DEBUG, "not a head atom");
+}
+
+static inline void check_tail_block(struct avec_config *conf)
+{
+ if (!has_tail_block(conf))
+ gf_log("crypt", GF_LOG_DEBUG, "not a tail atom");
+}
+
+static inline void check_full_block(struct avec_config *conf)
+{
+ if (!has_full_blocks(conf))
+ gf_log("crypt", GF_LOG_DEBUG, "not a full atom");
+}
+
+static inline void check_cursor_head(struct avec_config *conf)
+{
+ if (!has_head_block(conf))
+ gf_log("crypt",
+ GF_LOG_DEBUG, "Illegal call of head atom method");
+ else if (conf->cursor != 0)
+ gf_log("crypt",
+ GF_LOG_DEBUG, "Cursor (%d) is not at head atom",
+ conf->cursor);
+}
+
+static inline void check_cursor_full(struct avec_config *conf)
+{
+ if (!has_full_blocks(conf))
+ gf_log("crypt",
+ GF_LOG_DEBUG, "Illegal call of full atom method");
+ if (has_head_block(conf) && (conf->cursor == 0))
+ gf_log("crypt",
+ GF_LOG_DEBUG, "Cursor is not at full atom");
+}
+
+/*
+ * FIXME: use avec->iov_len to check setup
+ */
+static inline int data_local_invariant(crypt_local_t *local)
+{
+ return 0;
+}
+
+#else
+#define crypt_check_input_len(len, object) noop
+#define check_head_block(conf) noop
+#define check_tail_block(conf) noop
+#define check_full_block(conf) noop
+#define check_cursor_head(conf) noop
+#define check_cursor_full(conf) noop
+
+#endif /* DEBUG_CRYPT */
+
+static inline struct avec_config *conf_by_type(call_frame_t *frame,
+ atom_data_type dtype)
+{
+ struct avec_config *conf = NULL;
+
+ switch (dtype) {
+ case HOLE_ATOM:
+ conf = get_hole_conf(frame);
+ break;
+ case DATA_ATOM:
+ conf = get_data_conf(frame);
+ break;
+ default:
+ gf_log("crypt", GF_LOG_DEBUG, "bad atom type");
+ }
+ return conf;
+}
+
+static inline uint32_t nr_calls_head(struct avec_config *conf)
+{
+ return has_head_block(conf) ? 1 : 0;
+}
+
+static inline uint32_t nr_calls_tail(struct avec_config *conf)
+{
+ return has_tail_block(conf) ? 1 : 0;
+}
+
+static inline uint32_t nr_calls_full(struct avec_config *conf)
+{
+ switch(conf->type) {
+ case HOLE_ATOM:
+ return has_full_blocks(conf);
+ case DATA_ATOM:
+ return has_full_blocks(conf) ?
+ logical_blocks_occupied(0,
+ conf->nr_full_blocks,
+ MAX_IOVEC_BITS) : 0;
+ default:
+ gf_log("crypt", GF_LOG_DEBUG, "bad atom data type");
+ return 0;
+ }
+}
+
+static inline uint32_t nr_calls(struct avec_config *conf)
+{
+ return nr_calls_head(conf) + nr_calls_tail(conf) + nr_calls_full(conf);
+}
+
+static inline uint32_t nr_calls_data(call_frame_t *frame)
+{
+ return nr_calls(get_data_conf(frame));
+}
+
+static inline uint32_t nr_calls_hole(call_frame_t *frame)
+{
+ return nr_calls(get_hole_conf(frame));
+}
+
+static inline void get_one_call_nolock(call_frame_t *frame)
+{
+ crypt_local_t *local = frame->local;
+
+ ++local->nr_calls;
+
+ //gf_log("crypt", GF_LOG_DEBUG, "get %d calls", 1);
+}
+
+static inline void get_one_call(call_frame_t *frame)
+{
+ crypt_local_t *local = frame->local;
+
+ LOCK(&local->call_lock);
+ get_one_call_nolock(frame);
+ UNLOCK(&local->call_lock);
+}
+
+static inline void get_nr_calls_nolock(call_frame_t *frame, int32_t nr)
+{
+ crypt_local_t *local = frame->local;
+
+ local->nr_calls += nr;
+
+ //gf_log("crypt", GF_LOG_DEBUG, "get %d calls", nr);
+}
+
+static inline void get_nr_calls(call_frame_t *frame, int32_t nr)
+{
+ crypt_local_t *local = frame->local;
+
+ LOCK(&local->call_lock);
+ get_nr_calls_nolock(frame, nr);
+ UNLOCK(&local->call_lock);
+}
+
+static inline int put_one_call(crypt_local_t *local)
+{
+ uint32_t last = 0;
+
+ LOCK(&local->call_lock);
+ if (--local->nr_calls == 0)
+ last = 1;
+
+ //gf_log("crypt", GF_LOG_DEBUG, "put %d calls", 1);
+
+ UNLOCK(&local->call_lock);
+ return last;
+}
+
+static inline int is_appended_write(call_frame_t *frame)
+{
+ crypt_local_t *local = frame->local;
+ struct avec_config *conf = get_data_conf(frame);
+
+ return conf->orig_offset + conf->orig_size > local->old_file_size;
+}
+
+static inline int is_ordered_mode(call_frame_t *frame)
+{
+#if 0
+ crypt_local_t *local = frame->local;
+ return local->fop == GF_FOP_FTRUNCATE ||
+ (local->fop == GF_FOP_WRITE && is_appended_write(frame));
+#endif
+ return 1;
+}
+
+static inline int32_t hole_conv_completed(crypt_local_t *local)
+{
+ struct avec_config *conf = &local->hole_conf;
+ return conf->cursor == conf->acount;
+}
+
+static inline int32_t data_write_in_progress(crypt_local_t *local)
+{
+ return local->active_setup == DATA_ATOM;
+}
+
+static inline int32_t parent_is_crypt_xlator(call_frame_t *frame,
+ xlator_t *this)
+{
+ return frame->parent->this == this;
+}
+
+static inline linkop_wind_handler_t linkop_wind_dispatch(glusterfs_fop_t fop)
+{
+ switch(fop){
+ case GF_FOP_LINK:
+ return link_wind;
+ case GF_FOP_UNLINK:
+ return unlink_wind;
+ case GF_FOP_RENAME:
+ return rename_wind;
+ default:
+ gf_log("crypt", GF_LOG_ERROR, "Bad link operation %d", fop);
+ return NULL;
+ }
+}
+
+static inline linkop_unwind_handler_t linkop_unwind_dispatch(glusterfs_fop_t fop)
+{
+ switch(fop){
+ case GF_FOP_LINK:
+ return link_unwind;
+ case GF_FOP_UNLINK:
+ return unlink_unwind;
+ case GF_FOP_RENAME:
+ return rename_unwind;
+ default:
+ gf_log("crypt", GF_LOG_ERROR, "Bad link operation %d", fop);
+ return NULL;
+ }
+}
+
+static inline mtd_op_t linkop_mtdop_dispatch(glusterfs_fop_t fop)
+{
+ switch (fop) {
+ case GF_FOP_LINK:
+ return MTD_APPEND;
+ case GF_FOP_UNLINK:
+ return MTD_CUT;
+ case GF_FOP_RENAME:
+ return MTD_OVERWRITE;
+ default:
+ gf_log("crypt", GF_LOG_WARNING, "Bad link operation %d", fop);
+ return MTD_LAST_OP;
+ }
+}
+
+#endif /* __CRYPT_H__ */
+
+/*
+ Local variables:
+ c-indentation-style: "K&R"
+ mode-name: "LC"
+ c-basic-offset: 8
+ tab-width: 8
+ fill-column: 80
+ scroll-step: 1
+ End:
+*/
diff --git a/xlators/encryption/crypt/src/data.c b/xlators/encryption/crypt/src/data.c
new file mode 100644
index 000000000..762fa554a
--- /dev/null
+++ b/xlators/encryption/crypt/src/data.c
@@ -0,0 +1,769 @@
+/*
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "defaults.h"
+#include "crypt-common.h"
+#include "crypt.h"
+
+static void set_iv_aes_xts(off_t offset, struct object_cipher_info *object)
+{
+ unsigned char *ivec;
+
+ ivec = object->u.aes_xts.ivec;
+
+ /* convert the tweak into a little-endian byte
+ * array (IEEE P1619/D16, May 2007, section 5.1)
+ */
+
+ *((uint64_t *)ivec) = htole64(offset);
+
+ /* ivec is padded with zeroes */
+}
+
+static int32_t aes_set_keys_common(unsigned char *raw_key, uint32_t key_size,
+ AES_KEY *keys)
+{
+ int32_t ret;
+
+ ret = AES_set_encrypt_key(raw_key,
+ key_size,
+ &keys[AES_ENCRYPT]);
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR, "Set encrypt key failed");
+ return ret;
+ }
+ ret = AES_set_decrypt_key(raw_key,
+ key_size,
+ &keys[AES_DECRYPT]);
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR, "Set decrypt key failed");
+ return ret;
+ }
+ return 0;
+}
+
+/*
+ * set private cipher info for xts mode
+ */
+static int32_t set_private_aes_xts(struct crypt_inode_info *info,
+ struct master_cipher_info *master)
+{
+ int ret;
+ struct object_cipher_info *object = get_object_cinfo(info);
+ unsigned char *data_key;
+ uint32_t subkey_size;
+
+ /* init tweak value */
+ memset(object->u.aes_xts.ivec, 0, 16);
+
+ data_key = GF_CALLOC(1, object->o_dkey_size, gf_crypt_mt_key);
+ if (!data_key)
+ return ENOMEM;
+
+ /*
+ * retrieve data keying meterial
+ */
+ ret = get_data_file_key(info, master, object->o_dkey_size, data_key);
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR, "Failed to retrieve data key");
+ GF_FREE(data_key);
+ return ret;
+ }
+ /*
+ * parse compound xts key
+ */
+ subkey_size = object->o_dkey_size >> 4; /* (xts-key-size-in-bytes / 2) */
+ /*
+ * install key for data encryption
+ */
+ ret = aes_set_keys_common(data_key,
+ subkey_size << 3, object->u.aes_xts.dkey);
+ if (ret) {
+ GF_FREE(data_key);
+ return ret;
+ }
+ /*
+ * set up key used to encrypt tweaks
+ */
+ ret = AES_set_encrypt_key(data_key + subkey_size,
+ object->o_dkey_size / 2,
+ &object->u.aes_xts.tkey);
+ if (ret < 0)
+ gf_log("crypt", GF_LOG_ERROR, "Set tweak key failed");
+
+ GF_FREE(data_key);
+ return ret;
+}
+
+static int32_t aes_xts_init(void)
+{
+ cassert(AES_BLOCK_SIZE == (1 << AES_BLOCK_BITS));
+ return 0;
+}
+
+static int32_t check_key_aes_xts(uint32_t keysize)
+{
+ switch(keysize) {
+ case 256:
+ case 512:
+ return 0;
+ default:
+ break;
+ }
+ return -1;
+}
+
+static int32_t encrypt_aes_xts(const unsigned char *from,
+ unsigned char *to, size_t length,
+ off_t offset, const int enc,
+ struct object_cipher_info *object)
+{
+ XTS128_CONTEXT ctx;
+ if (enc) {
+ ctx.key1 = &object->u.aes_xts.dkey[AES_ENCRYPT];
+ ctx.block1 = (block128_f)AES_encrypt;
+ }
+ else {
+ ctx.key1 = &object->u.aes_xts.dkey[AES_DECRYPT];
+ ctx.block1 = (block128_f)AES_decrypt;
+ }
+ ctx.key2 = &object->u.aes_xts.tkey;
+ ctx.block2 = (block128_f)AES_encrypt;
+
+ return CRYPTO_xts128_encrypt(&ctx,
+ object->u.aes_xts.ivec,
+ from,
+ to,
+ length, enc);
+}
+
+/*
+ * Cipher input chunk @from of length @len;
+ * @to: result of cipher transform;
+ * @off: offset in a file (must be cblock-aligned);
+ */
+static void cipher_data(struct object_cipher_info *object,
+ char *from,
+ char *to,
+ off_t off,
+ size_t len,
+ const int enc)
+{
+ crypt_check_input_len(len, object);
+
+#if TRIVIAL_TFM && DEBUG_CRYPT
+ return;
+#endif
+ data_cipher_algs[object->o_alg][object->o_mode].set_iv(off, object);
+ data_cipher_algs[object->o_alg][object->o_mode].encrypt
+ ((const unsigned char *)from,
+ (unsigned char *)to,
+ len,
+ off,
+ enc,
+ object);
+}
+
+#define MAX_CIPHER_CHUNK (1 << 30)
+
+/*
+ * Do cipher (encryption/decryption) transform of a
+ * continuous region of memory.
+ *
+ * @len: a number of bytes to transform;
+ * @buf: data to transform;
+ * @off: offset in a file, should be block-aligned
+ * for atomic cipher modes and ksize-aligned
+ * for other modes).
+ * @dir: direction of transform (encrypt/decrypt).
+ */
+static void cipher_region(struct object_cipher_info *object,
+ char *from,
+ char *to,
+ off_t off,
+ size_t len,
+ int dir)
+{
+ while (len > 0) {
+ size_t to_cipher;
+
+ to_cipher = len;
+ if (to_cipher > MAX_CIPHER_CHUNK)
+ to_cipher = MAX_CIPHER_CHUNK;
+
+ /* this will reset IV */
+ cipher_data(object,
+ from,
+ to,
+ off,
+ to_cipher,
+ dir);
+ from += to_cipher;
+ to += to_cipher;
+ off += to_cipher;
+ len -= to_cipher;
+ }
+}
+
+/*
+ * Do cipher transform (encryption/decryption) of
+ * plaintext/ciphertext represented by @vec.
+ *
+ * Pre-conditions: @vec represents a continuous piece
+ * of data in a file at offset @off to be ciphered
+ * (encrypted/decrypted).
+ * @count is the number of vec's components. All the
+ * components must be block-aligned, the caller is
+ * responsible for this. @dir is "direction" of
+ * transform (encrypt/decrypt).
+ */
+static void cipher_aligned_iov(struct object_cipher_info *object,
+ struct iovec *vec,
+ int count,
+ off_t off,
+ int32_t dir)
+{
+ int i;
+ int len = 0;
+
+ for (i = 0; i < count; i++) {
+ cipher_region(object,
+ vec[i].iov_base,
+ vec[i].iov_base,
+ off + len,
+ vec[i].iov_len,
+ dir);
+ len += vec[i].iov_len;
+ }
+}
+
+void encrypt_aligned_iov(struct object_cipher_info *object,
+ struct iovec *vec,
+ int count,
+ off_t off)
+{
+ cipher_aligned_iov(object, vec, count, off, 1);
+}
+
+void decrypt_aligned_iov(struct object_cipher_info *object,
+ struct iovec *vec,
+ int count,
+ off_t off)
+{
+ cipher_aligned_iov(object, vec, count, off, 0);
+}
+
+#if DEBUG_CRYPT
+static void compound_stream(struct iovec *vec, int count, char *buf, off_t skip)
+{
+ int i;
+ int off = 0;
+ for (i = 0; i < count; i++) {
+ memcpy(buf + off,
+ vec[i].iov_base + skip,
+ vec[i].iov_len - skip);
+
+ off += (vec[i].iov_len - skip);
+ skip = 0;
+ }
+}
+
+static void check_iovecs(struct iovec *vec, int cnt,
+ struct iovec *avec, int acnt, uint32_t off_in_head)
+{
+ char *s1, *s2;
+ uint32_t size, asize;
+
+ size = iovec_get_size(vec, cnt);
+ asize = iovec_get_size(avec, acnt) - off_in_head;
+ if (size != asize) {
+ gf_log("crypt", GF_LOG_DEBUG, "size %d is not eq asize %d",
+ size, asize);
+ return;
+ }
+ s1 = GF_CALLOC(1, size, gf_crypt_mt_data);
+ if (!s1) {
+ gf_log("crypt", GF_LOG_DEBUG, "Can not allocate stream ");
+ return;
+ }
+ s2 = GF_CALLOC(1, asize, gf_crypt_mt_data);
+ if (!s2) {
+ GF_FREE(s1);
+ gf_log("crypt", GF_LOG_DEBUG, "Can not allocate stream ");
+ return;
+ }
+ compound_stream(vec, cnt, s1, 0);
+ compound_stream(avec, acnt, s2, off_in_head);
+ if (memcmp(s1, s2, size))
+ gf_log("crypt", GF_LOG_DEBUG, "chunks of different data");
+ GF_FREE(s1);
+ GF_FREE(s2);
+}
+
+#else
+#define check_iovecs(vec, count, avec, avecn, off) noop
+#endif /* DEBUG_CRYPT */
+
+static char *data_alloc_block(xlator_t *this, crypt_local_t *local,
+ int32_t block_size)
+{
+ struct iobuf *iobuf = NULL;
+
+ iobuf = iobuf_get2(this->ctx->iobuf_pool, block_size);
+ if (!iobuf) {
+ gf_log("crypt", GF_LOG_ERROR,
+ "Failed to get iobuf");
+ return NULL;
+ }
+ if (!local->iobref_data) {
+ local->iobref_data = iobref_new();
+ if (!local->iobref_data) {
+ gf_log("crypt", GF_LOG_ERROR,
+ "Failed to get iobref");
+ iobuf_unref(iobuf);
+ return NULL;
+ }
+ }
+ iobref_add(local->iobref_data, iobuf);
+ return iobuf->ptr;
+}
+
+/*
+ * Compound @avec, which represent the same data
+ * chunk as @vec, but has aligned components of
+ * specified block size. Alloc blocks, if needed.
+ * In particular, incomplete head and tail blocks
+ * must be allocated.
+ * Put number of allocated blocks to @num_blocks.
+ *
+ * Example:
+ *
+ * input: data chunk represented by 4 components
+ * [AB],[BC],[CD],[DE];
+ * output: 5 logical blocks (0, 1, 2, 3, 4).
+ *
+ * A B C D E
+ * *-----*+------*-+---*----+--------+-*
+ * | || | | | | | |
+ * *-+-----+*------+-*---+----*--------*-+------*
+ * 0 1 2 3 4
+ *
+ * 0 - incomplete compound (head);
+ * 1, 2 - full compound;
+ * 3 - full non-compound (the case of reuse);
+ * 4 - incomplete non-compound (tail).
+ */
+int32_t align_iov_by_atoms(xlator_t *this,
+ crypt_local_t *local,
+ struct object_cipher_info *object,
+ struct iovec *vec /* input vector */,
+ int32_t count /* number of vec components */,
+ struct iovec *avec /* aligned vector */,
+ char **blocks /* pool of blocks */,
+ uint32_t *blocks_allocated,
+ struct avec_config *conf)
+{
+ int vecn = 0; /* number of the current component in vec */
+ int avecn = 0; /* number of the current component in avec */
+ off_t vec_off = 0; /* offset in the current vec component,
+ * i.e. the number of bytes have already
+ * been copied */
+ int32_t block_size = get_atom_size(object);
+ size_t to_process; /* number of vec's bytes to copy and(or) re-use */
+ int32_t off_in_head = conf->off_in_head;
+
+ to_process = iovec_get_size(vec, count);
+
+ while (to_process > 0) {
+ if (off_in_head ||
+ vec[vecn].iov_len - vec_off < block_size) {
+ /*
+ * less than block_size:
+ * the case of incomplete (head or tail),
+ * or compound block
+ */
+ size_t copied = 0;
+ /*
+ * populate the pool with a new block
+ */
+ blocks[*blocks_allocated] = data_alloc_block(this,
+ local,
+ block_size);
+ if (!blocks[*blocks_allocated])
+ return -ENOMEM;
+ memset(blocks[*blocks_allocated], 0, off_in_head);
+ /*
+ * fill the block with vec components
+ */
+ do {
+ size_t to_copy;
+
+ to_copy = vec[vecn].iov_len - vec_off;
+ if (to_copy > block_size - off_in_head)
+ to_copy = block_size - off_in_head;
+
+ memcpy(blocks[*blocks_allocated] + off_in_head + copied,
+ vec[vecn].iov_base + vec_off,
+ to_copy);
+
+ copied += to_copy;
+ to_process -= to_copy;
+
+ vec_off += to_copy;
+ if (vec_off == vec[vecn].iov_len) {
+ /* finished with this vecn */
+ vec_off = 0;
+ vecn++;
+ }
+ } while (copied < (block_size - off_in_head) && to_process > 0);
+ /*
+ * update avec
+ */
+ avec[avecn].iov_len = off_in_head + copied;
+ avec[avecn].iov_base = blocks[*blocks_allocated];
+
+ (*blocks_allocated)++;
+ off_in_head = 0;
+ } else {
+ /*
+ * the rest of the current vec component
+ * is not less than block_size, so reuse
+ * the memory buffer of the component.
+ */
+ size_t to_reuse;
+ to_reuse = (to_process > block_size ?
+ block_size :
+ to_process);
+ avec[avecn].iov_len = to_reuse;
+ avec[avecn].iov_base = vec[vecn].iov_base + vec_off;
+
+ vec_off += to_reuse;
+ if (vec_off == vec[vecn].iov_len) {
+ /* finished with this vecn */
+ vec_off = 0;
+ vecn++;
+ }
+ to_process -= to_reuse;
+ }
+ avecn++;
+ }
+ check_iovecs(vec, count, avec, avecn, conf->off_in_head);
+ return 0;
+}
+
+/*
+ * allocate and setup aligned vector for data submission
+ * Pre-condition: @conf is set.
+ */
+int32_t set_config_avec_data(xlator_t *this,
+ crypt_local_t *local,
+ struct avec_config *conf,
+ struct object_cipher_info *object,
+ struct iovec *vec,
+ int32_t vec_count)
+{
+ int32_t ret = ENOMEM;
+ struct iovec *avec;
+ char **pool;
+ uint32_t blocks_in_pool = 0;
+
+ conf->type = DATA_ATOM;
+
+ avec = GF_CALLOC(conf->acount, sizeof(*avec), gf_crypt_mt_iovec);
+ if (!avec)
+ return ret;
+ pool = GF_CALLOC(conf->acount, sizeof(pool), gf_crypt_mt_char);
+ if (!pool) {
+ GF_FREE(avec);
+ return ret;
+ }
+ if (!vec) {
+ /*
+ * degenerated case: no data
+ */
+ pool[0] = data_alloc_block(this, local, get_atom_size(object));
+ if (!pool[0])
+ goto free;
+ blocks_in_pool = 1;
+ avec->iov_base = pool[0];
+ avec->iov_len = conf->off_in_tail;
+ }
+ else {
+ ret = align_iov_by_atoms(this, local, object, vec, vec_count,
+ avec, pool, &blocks_in_pool, conf);
+ if (ret)
+ goto free;
+ }
+ conf->avec = avec;
+ conf->pool = pool;
+ conf->blocks_in_pool = blocks_in_pool;
+ return 0;
+ free:
+ GF_FREE(avec);
+ GF_FREE(pool);
+ return ret;
+}
+
+/*
+ * allocate and setup aligned vector for hole submission
+ */
+int32_t set_config_avec_hole(xlator_t *this,
+ crypt_local_t *local,
+ struct avec_config *conf,
+ struct object_cipher_info *object,
+ glusterfs_fop_t fop)
+{
+ uint32_t i, idx;
+ struct iovec *avec;
+ char **pool;
+ uint32_t num_blocks;
+ uint32_t blocks_in_pool = 0;
+
+ conf->type = HOLE_ATOM;
+
+ num_blocks = conf->acount -
+ (conf->nr_full_blocks ? conf->nr_full_blocks - 1 : 0);
+
+ switch (fop) {
+ case GF_FOP_WRITE:
+ /*
+ * hole goes before data
+ */
+ if (num_blocks == 1 && conf->off_in_tail != 0)
+ /*
+ * we won't submit a hole which fits into
+ * a data atom: this part of hole will be
+ * submitted with data write
+ */
+ return 0;
+ break;
+ case GF_FOP_FTRUNCATE:
+ /*
+ * expanding truncate, hole goes after data,
+ * and will be submited in any case.
+ */
+ break;
+ default:
+ gf_log("crypt", GF_LOG_WARNING,
+ "bad file operation %d", fop);
+ return 0;
+ }
+ avec = GF_CALLOC(num_blocks, sizeof(*avec), gf_crypt_mt_iovec);
+ if (!avec)
+ return ENOMEM;
+ pool = GF_CALLOC(num_blocks, sizeof(pool), gf_crypt_mt_char);
+ if (!pool) {
+ GF_FREE(avec);
+ return ENOMEM;
+ }
+ for (i = 0; i < num_blocks; i++) {
+ pool[i] = data_alloc_block(this, local, get_atom_size(object));
+ if (pool[i] == NULL)
+ goto free;
+ blocks_in_pool++;
+ }
+ if (has_head_block(conf)) {
+ /* set head block */
+ idx = 0;
+ avec[idx].iov_base = pool[idx];
+ avec[idx].iov_len = get_atom_size(object);
+ memset(avec[idx].iov_base + conf->off_in_head,
+ 0,
+ get_atom_size(object) - conf->off_in_head);
+ }
+ if (has_tail_block(conf)) {
+ /* set tail block */
+ idx = num_blocks - 1;
+ avec[idx].iov_base = pool[idx];
+ avec[idx].iov_len = get_atom_size(object);
+ memset(avec[idx].iov_base, 0, conf->off_in_tail);
+ }
+ if (has_full_blocks(conf)) {
+ /* set full block */
+ idx = conf->off_in_head ? 1 : 0;
+ avec[idx].iov_base = pool[idx];
+ avec[idx].iov_len = get_atom_size(object);
+ /*
+ * since we re-use the buffer,
+ * zeroes will be set every time
+ * before encryption, see submit_full()
+ */
+ }
+ conf->avec = avec;
+ conf->pool = pool;
+ conf->blocks_in_pool = blocks_in_pool;
+ return 0;
+ free:
+ GF_FREE(avec);
+ GF_FREE(pool);
+ return ENOMEM;
+}
+
+/* A helper for setting up config of partial atoms (which
+ * participate in read-modify-write sequence).
+ *
+ * Calculate and setup precise amount of "extra-bytes"
+ * that should be uptodated at the end of partial (not
+ * necessarily tail!) block.
+ *
+ * Pre-condition: local->old_file_size is valid!
+ * @conf contains setup, which is enough for correct calculation
+ * of has_tail_block(), ->get_offset().
+ */
+void set_gap_at_end(call_frame_t *frame, struct object_cipher_info *object,
+ struct avec_config *conf, atom_data_type dtype)
+{
+ uint32_t to_block;
+ crypt_local_t *local = frame->local;
+ uint64_t old_file_size = local->old_file_size;
+ struct rmw_atom *partial = atom_by_types(dtype,
+ has_tail_block(conf) ?
+ TAIL_ATOM : HEAD_ATOM);
+
+ if (old_file_size <= partial->offset_at(frame, object))
+ to_block = 0;
+ else {
+ to_block = old_file_size - partial->offset_at(frame, object);
+ if (to_block > get_atom_size(object))
+ to_block = get_atom_size(object);
+ }
+ if (to_block > conf->off_in_tail)
+ conf->gap_in_tail = to_block - conf->off_in_tail;
+ else
+ /*
+ * nothing to uptodate
+ */
+ conf->gap_in_tail = 0;
+}
+
+/*
+ * fill struct avec_config with offsets layouts
+ */
+void set_config_offsets(call_frame_t *frame,
+ xlator_t *this,
+ uint64_t offset,
+ uint64_t count,
+ atom_data_type dtype,
+ int32_t set_gap)
+{
+ crypt_local_t *local;
+ struct object_cipher_info *object;
+ struct avec_config *conf;
+ uint32_t resid;
+
+ uint32_t atom_size;
+ uint32_t atom_bits;
+
+ size_t orig_size;
+ off_t orig_offset;
+ size_t expanded_size;
+ off_t aligned_offset;
+
+ uint32_t off_in_head = 0;
+ uint32_t off_in_tail = 0;
+ uint32_t nr_full_blocks;
+ int32_t size_full_blocks;
+
+ uint32_t acount; /* number of alifned components to write.
+ * The same as number of occupied logical
+ * blocks (atoms)
+ */
+ local = frame->local;
+ object = &local->info->cinfo;
+ conf = (dtype == DATA_ATOM ?
+ get_data_conf(frame) : get_hole_conf(frame));
+
+ orig_offset = offset;
+ orig_size = count;
+
+ atom_size = get_atom_size(object);
+ atom_bits = get_atom_bits(object);
+
+ /*
+ * Round-down the start,
+ * round-up the end.
+ */
+ resid = offset & (uint64_t)(atom_size - 1);
+
+ if (resid)
+ off_in_head = resid;
+ aligned_offset = offset - off_in_head;
+ expanded_size = orig_size + off_in_head;
+
+ /* calculate tail,
+ expand size forward */
+ resid = (offset + orig_size) & (uint64_t)(atom_size - 1);
+
+ if (resid) {
+ off_in_tail = resid;
+ expanded_size += (atom_size - off_in_tail);
+ }
+ /*
+ * calculate number of occupied blocks
+ */
+ acount = expanded_size >> atom_bits;
+ /*
+ * calculate number of full blocks
+ */
+ size_full_blocks = expanded_size;
+ if (off_in_head)
+ size_full_blocks -= atom_size;
+ if (off_in_tail && size_full_blocks > 0)
+ size_full_blocks -= atom_size;
+ nr_full_blocks = size_full_blocks >> atom_bits;
+
+ conf->atom_size = atom_size;
+ conf->orig_size = orig_size;
+ conf->orig_offset = orig_offset;
+ conf->expanded_size = expanded_size;
+ conf->aligned_offset = aligned_offset;
+
+ conf->off_in_head = off_in_head;
+ conf->off_in_tail = off_in_tail;
+ conf->nr_full_blocks = nr_full_blocks;
+ conf->acount = acount;
+ /*
+ * Finally, calculate precise amount of
+ * "extra-bytes" that should be uptodated
+ * at the end.
+ * Only if RMW is expected.
+ */
+ if (off_in_tail && set_gap)
+ set_gap_at_end(frame, object, conf, dtype);
+}
+
+struct data_cipher_alg data_cipher_algs[LAST_CIPHER_ALG][LAST_CIPHER_MODE] = {
+ [AES_CIPHER_ALG][XTS_CIPHER_MODE] =
+ { .atomic = _gf_true,
+ .should_pad = _gf_true,
+ .blkbits = AES_BLOCK_BITS,
+ .init = aes_xts_init,
+ .set_private = set_private_aes_xts,
+ .check_key = check_key_aes_xts,
+ .set_iv = set_iv_aes_xts,
+ .encrypt = encrypt_aes_xts
+ }
+};
+
+/*
+ Local variables:
+ c-indentation-style: "K&R"
+ mode-name: "LC"
+ c-basic-offset: 8
+ tab-width: 8
+ fill-column: 80
+ scroll-step: 1
+ End:
+*/
diff --git a/xlators/encryption/crypt/src/keys.c b/xlators/encryption/crypt/src/keys.c
new file mode 100644
index 000000000..4a1d3bb5a
--- /dev/null
+++ b/xlators/encryption/crypt/src/keys.c
@@ -0,0 +1,302 @@
+/*
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "defaults.h"
+#include "crypt-common.h"
+#include "crypt.h"
+
+/* Key hierarchy
+
+ +----------------+
+ | MASTER_VOL_KEY |
+ +-------+--------+
+ |
+ |
+ +----------------+----------------+
+ | | |
+ | | |
+ +-------+------+ +-------+-------+ +------+--------+
+ | NMTD_VOL_KEY | | EMTD_FILE_KEY | | DATA_FILE_KEY |
+ +-------+------+ +---------------+ +---------------+
+ |
+ |
+ +-------+-------+
+ | NMTD_LINK_KEY |
+ +---------------+
+
+ */
+
+#if DEBUG_CRYPT
+static void check_prf_iters(uint32_t num_iters)
+{
+ if (num_iters == 0)
+ gf_log ("crypt", GF_LOG_DEBUG,
+ "bad number of prf iterations : %d", num_iters);
+}
+#else
+#define check_prf_iters(num_iters) noop
+#endif /* DEBUG_CRYPT */
+
+unsigned char crypt_fake_oid[16] =
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+/*
+ * derive key in the counter mode using
+ * sha256-based HMAC as PRF, see
+ * NIST Special Publication 800-108, 5.1)
+ */
+
+#define PRF_OUTPUT_SIZE SHA256_DIGEST_LENGTH
+
+static int32_t kderive_init(struct kderive_context *ctx,
+ const unsigned char *pkey, /* parent key */
+ uint32_t pkey_size, /* parent key size */
+ const unsigned char *idctx, /* id-context */
+ uint32_t idctx_size,
+ crypt_key_type type /* type of child key */)
+{
+ unsigned char *pos;
+ uint32_t llen = strlen(crypt_keys[type].label);
+ /*
+ * Compoud the fixed input data for KDF:
+ * [i]_2 || Label || 0x00 || Id-Context || [L]_2),
+ * NIST SP 800-108, 5.1
+ */
+ ctx->fid_len =
+ sizeof(uint32_t) +
+ llen +
+ 1 +
+ idctx_size +
+ sizeof(uint32_t);
+
+ ctx->fid = GF_CALLOC(ctx->fid_len, 1, gf_crypt_mt_key);
+ if (!ctx->fid)
+ return ENOMEM;
+ ctx->out_len = round_up(crypt_keys[type].len >> 3,
+ PRF_OUTPUT_SIZE);
+ ctx->out = GF_CALLOC(ctx->out_len, 1, gf_crypt_mt_key);
+ if (!ctx->out) {
+ GF_FREE(ctx->fid);
+ return ENOMEM;
+ }
+ ctx->pkey = pkey;
+ ctx->pkey_len = pkey_size;
+ ctx->ckey_len = crypt_keys[type].len;
+
+ pos = ctx->fid;
+
+ /* counter will be set up in kderive_rfn() */
+ pos += sizeof(uint32_t);
+
+ memcpy(pos, crypt_keys[type].label, llen);
+ pos += llen;
+
+ /* set up zero octet */
+ *pos = 0;
+ pos += 1;
+
+ memcpy(pos, idctx, idctx_size);
+ pos += idctx_size;
+
+ *((uint32_t *)pos) = htobe32(ctx->ckey_len);
+
+ return 0;
+}
+
+static void kderive_update(struct kderive_context *ctx)
+{
+ uint32_t i;
+ HMAC_CTX hctx;
+ unsigned char *pos = ctx->out;
+ uint32_t *p_iter = (uint32_t *)ctx->fid;
+ uint32_t num_iters = ctx->out_len / PRF_OUTPUT_SIZE;
+
+ check_prf_iters(num_iters);
+
+ HMAC_CTX_init(&hctx);
+ for (i = 0; i < num_iters; i++) {
+ /*
+ * update the iteration number in the fid
+ */
+ *p_iter = htobe32(i);
+ HMAC_Init_ex(&hctx,
+ ctx->pkey, ctx->pkey_len >> 3,
+ EVP_sha256(),
+ NULL);
+ HMAC_Update(&hctx, ctx->fid, ctx->fid_len);
+ HMAC_Final(&hctx, pos, NULL);
+
+ pos += PRF_OUTPUT_SIZE;
+ }
+ HMAC_CTX_cleanup(&hctx);
+}
+
+static void kderive_final(struct kderive_context *ctx, unsigned char *child)
+{
+ memcpy(child, ctx->out, ctx->ckey_len >> 3);
+ GF_FREE(ctx->fid);
+ GF_FREE(ctx->out);
+ memset(ctx, 0, sizeof(*ctx));
+}
+
+/*
+ * derive per-volume key for object ids aithentication
+ */
+int32_t get_nmtd_vol_key(struct master_cipher_info *master)
+{
+ int32_t ret;
+ struct kderive_context ctx;
+
+ ret = kderive_init(&ctx,
+ master->m_key,
+ master_key_size(),
+ crypt_fake_oid, sizeof(uuid_t), NMTD_VOL_KEY);
+ if (ret)
+ return ret;
+ kderive_update(&ctx);
+ kderive_final(&ctx, master->m_nmtd_key);
+ return 0;
+}
+
+/*
+ * derive per-link key for aithentication of non-encrypted
+ * meta-data (nmtd)
+ */
+int32_t get_nmtd_link_key(loc_t *loc,
+ struct master_cipher_info *master,
+ unsigned char *result)
+{
+ int32_t ret;
+ struct kderive_context ctx;
+
+ ret = kderive_init(&ctx,
+ master->m_nmtd_key,
+ nmtd_vol_key_size(),
+ (const unsigned char *)loc->path,
+ strlen(loc->path), NMTD_LINK_KEY);
+ if (ret)
+ return ret;
+ kderive_update(&ctx);
+ kderive_final(&ctx, result);
+ return 0;
+}
+
+/*
+ * derive per-file key for encryption and authentication
+ * of encrypted part of metadata (emtd)
+ */
+int32_t get_emtd_file_key(struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ unsigned char *result)
+{
+ int32_t ret;
+ struct kderive_context ctx;
+
+ ret = kderive_init(&ctx,
+ master->m_key,
+ master_key_size(),
+ info->oid, sizeof(uuid_t), EMTD_FILE_KEY);
+ if (ret)
+ return ret;
+ kderive_update(&ctx);
+ kderive_final(&ctx, result);
+ return 0;
+}
+
+static int32_t data_key_type_by_size(uint32_t keysize, crypt_key_type *type)
+{
+ int32_t ret = 0;
+ switch (keysize) {
+ case 256:
+ *type = DATA_FILE_KEY_256;
+ break;
+ case 512:
+ *type = DATA_FILE_KEY_512;
+ break;
+ default:
+ gf_log("crypt", GF_LOG_ERROR, "Unsupported data key size %d",
+ keysize);
+ ret = ENOTSUP;
+ break;
+ }
+ return ret;
+}
+
+/*
+ * derive per-file key for data encryption
+ */
+int32_t get_data_file_key(struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ uint32_t keysize,
+ unsigned char *key)
+{
+ int32_t ret;
+ struct kderive_context ctx;
+ crypt_key_type type;
+
+ ret = data_key_type_by_size(keysize, &type);
+ if (ret)
+ return ret;
+ ret = kderive_init(&ctx,
+ master->m_key,
+ master_key_size(),
+ info->oid, sizeof(uuid_t), type);
+ if (ret)
+ return ret;
+ kderive_update(&ctx);
+ kderive_final(&ctx, key);
+ return 0;
+}
+
+/*
+ * NOTE: Don't change existing keys: it will break compatibility;
+ */
+struct crypt_key crypt_keys[LAST_KEY_TYPE] = {
+ [MASTER_VOL_KEY] =
+ { .len = MASTER_VOL_KEY_SIZE << 3,
+ .label = "volume-master",
+ },
+ [NMTD_VOL_KEY] =
+ { .len = NMTD_VOL_KEY_SIZE << 3,
+ .label = "volume-nmtd-key-generation"
+ },
+ [NMTD_LINK_KEY] =
+ { .len = 128,
+ .label = "link-nmtd-authentication"
+ },
+ [EMTD_FILE_KEY] =
+ { .len = 128,
+ .label = "file-emtd-encryption-and-auth"
+ },
+ [DATA_FILE_KEY_256] =
+ { .len = 256,
+ .label = "file-data-encryption-256"
+ },
+ [DATA_FILE_KEY_512] =
+ { .len = 512,
+ .label = "file-data-encryption-512"
+ }
+};
+
+/*
+ Local variables:
+ c-indentation-style: "K&R"
+ mode-name: "LC"
+ c-basic-offset: 8
+ tab-width: 8
+ fill-column: 80
+ scroll-step: 1
+ End:
+*/
diff --git a/xlators/encryption/crypt/src/metadata.c b/xlators/encryption/crypt/src/metadata.c
new file mode 100644
index 000000000..36b14c055
--- /dev/null
+++ b/xlators/encryption/crypt/src/metadata.c
@@ -0,0 +1,605 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "defaults.h"
+#include "crypt-common.h"
+#include "crypt.h"
+#include "metadata.h"
+
+int32_t alloc_format(crypt_local_t *local, size_t size)
+{
+ if (size > 0) {
+ local->format = GF_CALLOC(1, size, gf_crypt_mt_mtd);
+ if (!local->format)
+ return ENOMEM;
+ }
+ local->format_size = size;
+ return 0;
+}
+
+int32_t alloc_format_create(crypt_local_t *local)
+{
+ return alloc_format(local, new_format_size());
+}
+
+void free_format(crypt_local_t *local)
+{
+ GF_FREE(local->format);
+}
+
+/*
+ * Check compatibility with extracted metadata
+ */
+static int32_t check_file_metadata(struct crypt_inode_info *info)
+{
+ struct object_cipher_info *object = &info->cinfo;
+
+ if (info->nr_minor != CRYPT_XLATOR_ID) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "unsupported minor subversion %d", info->nr_minor);
+ return EINVAL;
+ }
+ if (object->o_alg > LAST_CIPHER_ALG) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "unsupported cipher algorithm %d",
+ object->o_alg);
+ return EINVAL;
+ }
+ if (object->o_mode > LAST_CIPHER_MODE) {
+ gf_log("crypt", GF_LOG_WARNING,
+ "unsupported cipher mode %d",
+ object->o_mode);
+ return EINVAL;
+ }
+ if (object->o_block_bits < CRYPT_MIN_BLOCK_BITS ||
+ object->o_block_bits > CRYPT_MAX_BLOCK_BITS) {
+ gf_log("crypt", GF_LOG_WARNING, "unsupported block bits %d",
+ object->o_block_bits);
+ return EINVAL;
+ }
+ /* TBD: check data key size */
+ return 0;
+}
+
+static size_t format_size_v1(mtd_op_t op, size_t old_size)
+{
+
+ switch (op) {
+ case MTD_CREATE:
+ return sizeof(struct mtd_format_v1);
+ case MTD_OVERWRITE:
+ return old_size;
+ case MTD_APPEND:
+ return old_size + NMTD_8_MAC_SIZE;
+ case MTD_CUT:
+ if (old_size > sizeof(struct mtd_format_v1))
+ return old_size - NMTD_8_MAC_SIZE;
+ else
+ return 0;
+ default:
+ gf_log("crypt", GF_LOG_WARNING, "Bad mtd operation");
+ return 0;
+ }
+}
+
+/*
+ * Calculate size of the updated format string.
+ * Returned zero means that we don't need to update the format string.
+ */
+size_t format_size(mtd_op_t op, size_t old_size)
+{
+ size_t versioned;
+
+ versioned = mtd_loaders[current_mtd_loader()].format_size(op,
+ old_size - sizeof(struct crypt_format));
+ if (versioned != 0)
+ return versioned + sizeof(struct crypt_format);
+ return 0;
+}
+
+/*
+ * size of the format string of newly created file (nr_links = 1)
+ */
+size_t new_format_size(void)
+{
+ return format_size(MTD_CREATE, 0);
+}
+
+/*
+ * Calculate per-link MAC by pathname
+ */
+static int32_t calc_link_mac_v1(struct mtd_format_v1 *fmt,
+ loc_t *loc,
+ unsigned char *result,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master)
+{
+ int32_t ret;
+ unsigned char nmtd_link_key[16];
+ CMAC_CTX *cctx;
+ size_t len;
+
+ ret = get_nmtd_link_key(loc, master, nmtd_link_key);
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR, "Can not get nmtd link key");
+ return -1;
+ }
+ cctx = CMAC_CTX_new();
+ if (!cctx) {
+ gf_log("crypt", GF_LOG_ERROR, "CMAC_CTX_new failed");
+ return -1;
+ }
+ ret = CMAC_Init(cctx, nmtd_link_key, sizeof(nmtd_link_key),
+ EVP_aes_128_cbc(), 0);
+ if (!ret) {
+ gf_log("crypt", GF_LOG_ERROR, "CMAC_Init failed");
+ CMAC_CTX_free(cctx);
+ return -1;
+ }
+ ret = CMAC_Update(cctx, get_NMTD_V1(info), SIZE_OF_NMTD_V1);
+ if (!ret) {
+ gf_log("crypt", GF_LOG_ERROR, "CMAC_Update failed");
+ CMAC_CTX_free(cctx);
+ return -1;
+ }
+ ret = CMAC_Final(cctx, result, &len);
+ CMAC_CTX_free(cctx);
+ if (!ret) {
+ gf_log("crypt", GF_LOG_ERROR, "CMAC_Final failed");
+ return -1;
+ }
+ return 0;
+}
+
+/*
+ * Create per-link MAC of index @idx by pathname
+ */
+static int32_t create_link_mac_v1(struct mtd_format_v1 *fmt,
+ uint32_t idx,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master)
+{
+ int32_t ret;
+ unsigned char *mac;
+ unsigned char cmac[16];
+
+ mac = get_NMTD_V1_MAC(fmt) + idx * SIZE_OF_NMTD_V1_MAC;
+
+ ret = calc_link_mac_v1(fmt, loc, cmac, info, master);
+ if (ret)
+ return -1;
+ memcpy(mac, cmac, SIZE_OF_NMTD_V1_MAC);
+ return 0;
+}
+
+static int32_t create_format_v1(unsigned char *wire,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master)
+{
+ int32_t ret;
+ struct mtd_format_v1 *fmt;
+ unsigned char mtd_key[16];
+ AES_KEY EMTD_KEY;
+ unsigned char nmtd_link_key[16];
+ uint32_t ad;
+ GCM128_CONTEXT *gctx;
+
+ fmt = (struct mtd_format_v1 *)wire;
+
+ fmt->minor_id = info->nr_minor;
+ fmt->alg_id = AES_CIPHER_ALG;
+ fmt->dkey_factor = master->m_dkey_size >> KEY_FACTOR_BITS;
+ fmt->block_bits = master->m_block_bits;
+ fmt->mode_id = master->m_mode;
+ /*
+ * retrieve keys for the parts of metadata
+ */
+ ret = get_emtd_file_key(info, master, mtd_key);
+ if (ret)
+ return ret;
+ ret = get_nmtd_link_key(loc, master, nmtd_link_key);
+ if (ret)
+ return ret;
+
+ AES_set_encrypt_key(mtd_key, sizeof(mtd_key)*8, &EMTD_KEY);
+
+ gctx = CRYPTO_gcm128_new(&EMTD_KEY, (block128_f)AES_encrypt);
+
+ /* TBD: Check return values */
+
+ CRYPTO_gcm128_setiv(gctx, info->oid, sizeof(uuid_t));
+
+ ad = htole32(MTD_LOADER_V1);
+ ret = CRYPTO_gcm128_aad(gctx, (const unsigned char *)&ad, sizeof(ad));
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR, " CRYPTO_gcm128_aad failed");
+ CRYPTO_gcm128_release(gctx);
+ return ret;
+ }
+ ret = CRYPTO_gcm128_encrypt(gctx,
+ get_EMTD_V1(fmt),
+ get_EMTD_V1(fmt),
+ SIZE_OF_EMTD_V1);
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR, " CRYPTO_gcm128_encrypt failed");
+ CRYPTO_gcm128_release(gctx);
+ return ret;
+ }
+ /*
+ * set MAC of encrypted part of metadata
+ */
+ CRYPTO_gcm128_tag(gctx, get_EMTD_V1_MAC(fmt), SIZE_OF_EMTD_V1_MAC);
+ CRYPTO_gcm128_release(gctx);
+ /*
+ * set the first MAC of non-encrypted part of metadata
+ */
+ return create_link_mac_v1(fmt, 0, loc, info, master);
+}
+
+/*
+ * Called by fops:
+ * ->create();
+ * ->link();
+ *
+ * Pack common and version-specific parts of file's metadata
+ * Pre-conditions: @info contains valid object-id.
+ */
+int32_t create_format(unsigned char *wire,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master)
+{
+ struct crypt_format *fmt = (struct crypt_format *)wire;
+
+ fmt->loader_id = current_mtd_loader();
+
+ wire += sizeof(struct crypt_format);
+ return mtd_loaders[current_mtd_loader()].create_format(wire, loc,
+ info, master);
+}
+
+/*
+ * Append or overwrite per-link mac of @mac_idx index
+ * in accordance with the new pathname
+ */
+int32_t appov_link_mac_v1(unsigned char *new,
+ unsigned char *old,
+ uint32_t old_size,
+ int32_t mac_idx,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ crypt_local_t *local)
+{
+ memcpy(new, old, old_size);
+ return create_link_mac_v1((struct mtd_format_v1 *)new, mac_idx,
+ loc, info, master);
+}
+
+/*
+ * Cut per-link mac of @mac_idx index
+ */
+static int32_t cut_link_mac_v1(unsigned char *new,
+ unsigned char *old,
+ uint32_t old_size,
+ int32_t mac_idx,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ crypt_local_t *local)
+{
+ memcpy(new,
+ old,
+ sizeof(struct mtd_format_v1) + NMTD_8_MAC_SIZE * (mac_idx - 1));
+
+ memcpy(new + sizeof(struct mtd_format_v1) + NMTD_8_MAC_SIZE * (mac_idx - 1),
+ old + sizeof(struct mtd_format_v1) + NMTD_8_MAC_SIZE * mac_idx,
+ old_size - (sizeof(struct mtd_format_v1) + NMTD_8_MAC_SIZE * mac_idx));
+ return 0;
+}
+
+int32_t update_format_v1(unsigned char *new,
+ unsigned char *old,
+ size_t old_len,
+ int32_t mac_idx, /* of old name */
+ mtd_op_t op,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ crypt_local_t *local)
+{
+ switch (op) {
+ case MTD_APPEND:
+ mac_idx = 1 + (old_len - sizeof(struct mtd_format_v1))/8;
+ case MTD_OVERWRITE:
+ return appov_link_mac_v1(new, old, old_len, mac_idx,
+ loc, info, master, local);
+ case MTD_CUT:
+ return cut_link_mac_v1(new, old, old_len, mac_idx,
+ loc, info, master, local);
+ default:
+ gf_log("crypt", GF_LOG_ERROR, "Bad mtd operation %d", op);
+ return -1;
+ }
+}
+
+/*
+ * Called by fops:
+ *
+ * ->link()
+ * ->unlink()
+ * ->rename()
+ *
+ */
+int32_t update_format(unsigned char *new,
+ unsigned char *old,
+ size_t old_len,
+ int32_t mac_idx,
+ mtd_op_t op,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ crypt_local_t *local)
+{
+ if (!new)
+ return 0;
+ memcpy(new, old, sizeof(struct crypt_format));
+
+ old += sizeof(struct crypt_format);
+ new += sizeof(struct crypt_format);
+ old_len -= sizeof(struct crypt_format);
+
+ return mtd_loaders[current_mtd_loader()].update_format(new, old,
+ old_len,
+ mac_idx, op,
+ loc, info,
+ master, local);
+}
+
+/*
+ * Perform preliminary checks of found metadata
+ * Return < 0 on errors;
+ * Return number of object-id MACs (>= 1) on success
+ */
+int32_t check_format_v1(uint32_t len, unsigned char *wire)
+{
+ uint32_t nr_links;
+
+ if (len < sizeof(struct mtd_format_v1)) {
+ gf_log("crypt", GF_LOG_ERROR,
+ "v1-loader: bad metadata size %d", len);
+ goto error;
+ }
+ len -= sizeof(struct mtd_format_v1);
+ if (len % sizeof(nmtd_8_mac_t)) {
+ gf_log("crypt", GF_LOG_ERROR,
+ "v1-loader: bad metadata format");
+ goto error;
+ }
+ nr_links = 1 + len / sizeof(nmtd_8_mac_t);
+ if (nr_links > _POSIX_LINK_MAX)
+ goto error;
+ return nr_links;
+ error:
+ return EIO;
+}
+
+/*
+ * Verify per-link MAC specified by index @idx
+ *
+ * return:
+ * -1 on errors;
+ * 0 on failed verification;
+ * 1 on sucessful verification
+ */
+static int32_t verify_link_mac_v1(struct mtd_format_v1 *fmt,
+ uint32_t idx /* index of the mac to verify */,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master)
+{
+ int32_t ret;
+ unsigned char *mac;
+ unsigned char cmac[16];
+
+ mac = get_NMTD_V1_MAC(fmt) + idx * SIZE_OF_NMTD_V1_MAC;
+
+ ret = calc_link_mac_v1(fmt, loc, cmac, info, master);
+ if (ret)
+ return -1;
+ if (memcmp(cmac, mac, SIZE_OF_NMTD_V1_MAC))
+ return 0;
+ return 1;
+}
+
+/*
+ * Lookup per-link MAC by pathname.
+ *
+ * return index of the MAC, if it was found;
+ * return < 0 on errors, or if the MAC wasn't found
+ */
+static int32_t lookup_link_mac_v1(struct mtd_format_v1 *fmt,
+ uint32_t nr_macs,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master)
+{
+ int32_t ret;
+ uint32_t idx;
+
+ for (idx = 0; idx < nr_macs; idx++) {
+ ret = verify_link_mac_v1(fmt, idx, loc, info, master);
+ if (ret < 0)
+ return ret;
+ if (ret > 0)
+ return idx;
+ }
+ return -ENOENT;
+}
+
+/*
+ * Extract version-specific part of metadata
+ */
+static int32_t open_format_v1(unsigned char *wire,
+ int32_t len,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ crypt_local_t *local,
+ gf_boolean_t load_info)
+{
+ int32_t ret;
+ int32_t num_nmtd_macs;
+ struct mtd_format_v1 *fmt;
+ unsigned char mtd_key[16];
+ AES_KEY EMTD_KEY;
+ GCM128_CONTEXT *gctx;
+ uint32_t ad;
+ emtd_8_mac_t gmac;
+ struct object_cipher_info *object;
+
+ num_nmtd_macs = check_format_v1(len, wire);
+ if (num_nmtd_macs <= 0)
+ return EIO;
+ fmt = (struct mtd_format_v1 *)wire;
+
+ ret = lookup_link_mac_v1(fmt, num_nmtd_macs, loc, info, master);
+ if (ret < 0) {
+ gf_log("crypt", GF_LOG_ERROR, "NMTD verification failed");
+ return EINVAL;
+ }
+ local->mac_idx = ret;
+ if (load_info == _gf_false)
+ /* the case of partial open */
+ return 0;
+
+ object = &info->cinfo;
+
+ ret = get_emtd_file_key(info, master, mtd_key);
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR, "Can not retrieve metadata key");
+ return ret;
+ }
+ /*
+ * decrypt encrypted meta-data
+ */
+ ret = AES_set_encrypt_key(mtd_key, sizeof(mtd_key)*8, &EMTD_KEY);
+ if (ret < 0) {
+ gf_log("crypt", GF_LOG_ERROR, "Can not set encrypt key");
+ return ret;
+ }
+ gctx = CRYPTO_gcm128_new(&EMTD_KEY, (block128_f)AES_encrypt);
+ if (!gctx) {
+ gf_log("crypt", GF_LOG_ERROR, "Can not alloc gcm context");
+ return ENOMEM;
+ }
+ CRYPTO_gcm128_setiv(gctx, info->oid, sizeof(uuid_t));
+
+ ad = htole32(MTD_LOADER_V1);
+ ret = CRYPTO_gcm128_aad(gctx, (const unsigned char *)&ad, sizeof(ad));
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR, " CRYPTO_gcm128_aad failed");
+ CRYPTO_gcm128_release(gctx);
+ return ret;
+ }
+ ret = CRYPTO_gcm128_decrypt(gctx,
+ get_EMTD_V1(fmt),
+ get_EMTD_V1(fmt),
+ SIZE_OF_EMTD_V1);
+ if (ret) {
+ gf_log("crypt", GF_LOG_ERROR, " CRYPTO_gcm128_decrypt failed");
+ CRYPTO_gcm128_release(gctx);
+ return ret;
+ }
+ /*
+ * verify metadata
+ */
+ CRYPTO_gcm128_tag(gctx, gmac, sizeof(gmac));
+ CRYPTO_gcm128_release(gctx);
+ if (memcmp(gmac, get_EMTD_V1_MAC(fmt), SIZE_OF_EMTD_V1_MAC)) {
+ gf_log("crypt", GF_LOG_ERROR, "EMTD verification failed");
+ return EINVAL;
+ }
+ /*
+ * load verified metadata to the private part of inode
+ */
+ info->nr_minor = fmt->minor_id;
+
+ object->o_alg = fmt->alg_id;
+ object->o_dkey_size = fmt->dkey_factor << KEY_FACTOR_BITS;
+ object->o_block_bits = fmt->block_bits;
+ object->o_mode = fmt->mode_id;
+
+ return check_file_metadata(info);
+}
+
+/*
+ * perform metadata authentication against @loc->path;
+ * extract crypt-specific attribtes and populate @info
+ * with them (optional)
+ */
+int32_t open_format(unsigned char *str,
+ int32_t len,
+ loc_t *loc,
+ struct crypt_inode_info *info,
+ struct master_cipher_info *master,
+ crypt_local_t *local,
+ gf_boolean_t load_info)
+{
+ struct crypt_format *fmt;
+ if (len < sizeof(*fmt)) {
+ gf_log("crypt", GF_LOG_ERROR, "Bad core format");
+ return EIO;
+ }
+ fmt = (struct crypt_format *)str;
+
+ if (fmt->loader_id >= LAST_MTD_LOADER) {
+ gf_log("crypt", GF_LOG_ERROR,
+ "Unsupported loader id %d", fmt->loader_id);
+ return EINVAL;
+ }
+ str += sizeof(*fmt);
+ len -= sizeof(*fmt);
+
+ return mtd_loaders[fmt->loader_id].open_format(str,
+ len,
+ loc,
+ info,
+ master,
+ local,
+ load_info);
+}
+
+struct crypt_mtd_loader mtd_loaders [LAST_MTD_LOADER] = {
+ [MTD_LOADER_V1] =
+ {.format_size = format_size_v1,
+ .create_format = create_format_v1,
+ .open_format = open_format_v1,
+ .update_format = update_format_v1
+ }
+};
+
+/*
+ Local variables:
+ c-indentation-style: "K&R"
+ mode-name: "LC"
+ c-basic-offset: 8
+ tab-width: 8
+ fill-column: 80
+ scroll-step: 1
+ End:
+*/
diff --git a/xlators/encryption/crypt/src/metadata.h b/xlators/encryption/crypt/src/metadata.h
new file mode 100644
index 000000000..a92f149ef
--- /dev/null
+++ b/xlators/encryption/crypt/src/metadata.h
@@ -0,0 +1,74 @@
+/*
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __METADATA_H__
+#define __METADATA_H__
+
+#define NMTD_8_MAC_SIZE (8)
+#define EMTD_8_MAC_SIZE (8)
+
+typedef uint8_t nmtd_8_mac_t[NMTD_8_MAC_SIZE];
+typedef uint8_t emtd_8_mac_t[EMTD_8_MAC_SIZE] ;
+
+/*
+ * Version "v1" of file's metadata.
+ * Metadata of this version has 4 components:
+ *
+ * 1) EMTD (Encrypted part of MeTaData);
+ * 2) NMTD (Non-encrypted part of MeTaData);
+ * 3) EMTD_MAC; (EMTD Message Authentication Code);
+ * 4) Array of per-link NMTD MACs (for every (hard)link it includes
+ * exactly one MAC)
+ */
+struct mtd_format_v1 {
+ /* EMTD, encrypted part of meta-data */
+ uint8_t alg_id; /* cipher algorithm id (only AES for now) */
+ uint8_t mode_id; /* cipher mode id; (only XTS for now) */
+ uint8_t block_bits; /* encoded block size */
+ uint8_t minor_id; /* client translator id */
+ uint8_t dkey_factor; /* encoded size of the data key */
+ /* MACs */
+ emtd_8_mac_t gmac; /* MAC of the encrypted meta-data, 8 bytes */
+ nmtd_8_mac_t omac; /* per-link MACs of the non-encrypted
+ * meta-data: at least one such MAC is always
+ * present */
+} __attribute__((packed));
+
+/*
+ * NMTD, the non-encrypted part of metadata of version "v1"
+ * is file's gfid, which is generated on trusted machines.
+ */
+#define SIZE_OF_NMTD_V1 (sizeof(uuid_t))
+#define SIZE_OF_EMTD_V1 (offsetof(struct mtd_format_v1, gmac) - \
+ offsetof(struct mtd_format_v1, alg_id))
+#define SIZE_OF_NMTD_V1_MAC (NMTD_8_MAC_SIZE)
+#define SIZE_OF_EMTD_V1_MAC (EMTD_8_MAC_SIZE)
+
+static inline unsigned char *get_EMTD_V1(struct mtd_format_v1 *format)
+{
+ return &format->alg_id;
+}
+
+static inline unsigned char *get_NMTD_V1(struct crypt_inode_info *info)
+{
+ return info->oid;
+}
+
+static inline unsigned char *get_EMTD_V1_MAC(struct mtd_format_v1 *format)
+{
+ return format->gmac;
+}
+
+static inline unsigned char *get_NMTD_V1_MAC(struct mtd_format_v1 *format)
+{
+ return format->omac;
+}
+
+#endif /* __METADATA_H__ */
diff --git a/xlators/encryption/rot-13/src/Makefile.am b/xlators/encryption/rot-13/src/Makefile.am
index ba5e623d8..94e8d18e7 100644
--- a/xlators/encryption/rot-13/src/Makefile.am
+++ b/xlators/encryption/rot-13/src/Makefile.am
@@ -1,14 +1,15 @@
xlator_LTLIBRARIES = rot-13.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/encryption
-rot_13_la_LDFLAGS = -module -avoidversion
+rot_13_la_LDFLAGS = -module -avoid-version
rot_13_la_SOURCES = rot-13.c
rot_13_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
noinst_HEADERS = rot-13.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/encryption/rot-13/src/rot-13.c b/xlators/encryption/rot-13/src/rot-13.c
index 738b8414f..b9ac29a72 100644
--- a/xlators/encryption/rot-13/src/rot-13.c
+++ b/xlators/encryption/rot-13/src/rot-13.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <ctype.h>
#include <sys/uio.h>
@@ -32,13 +22,13 @@
#include "rot-13.h"
/*
- * This is a rot13 ``encryption'' xlator. It rot13's data when
- * writing to disk and rot13's it back when reading it.
+ * This is a rot13 ``encryption'' xlator. It rot13's data when
+ * writing to disk and rot13's it back when reading it.
* This xlator is meant as an example, NOT FOR PRODUCTION
* USE ;) (hence no error-checking)
*/
-void
+void
rot13 (char *buf, int len)
{
int i;
@@ -67,15 +57,16 @@ rot13_readv_cbk (call_frame_t *frame,
int32_t op_errno,
struct iovec *vector,
int32_t count,
- struct stat *stbuf,
- struct iobref *iobref)
+ struct iatt *stbuf,
+ struct iobref *iobref, dict_t *xdata)
{
rot_13_private_t *priv = (rot_13_private_t *)this->private;
-
+
if (priv->decrypt_read)
rot13_iovec (vector, count);
- STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf, iobref);
+ STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count,
+ stbuf, iobref, xdata);
return 0;
}
@@ -84,13 +75,13 @@ rot13_readv (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
size_t size,
- off_t offset)
+ off_t offset, uint32_t flags, dict_t *xdata)
{
STACK_WIND (frame,
rot13_readv_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->readv,
- fd, size, offset);
+ fd, size, offset, flags, xdata);
return 0;
}
@@ -100,9 +91,11 @@ rot13_writev_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- struct stat *stbuf)
+ struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, stbuf);
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
return 0;
}
@@ -111,20 +104,20 @@ rot13_writev (call_frame_t *frame,
xlator_t *this,
fd_t *fd,
struct iovec *vector,
- int32_t count,
- off_t offset,
- struct iobref *iobref)
+ int32_t count,
+ off_t offset, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
{
rot_13_private_t *priv = (rot_13_private_t *)this->private;
if (priv->encrypt_write)
rot13_iovec (vector, count);
- STACK_WIND (frame,
+ STACK_WIND (frame,
rot13_writev_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->writev,
- fd, vector, count, offset,
- iobref);
+ fd, vector, count, offset, flags,
+ iobref, xdata);
return 0;
}
@@ -135,7 +128,7 @@ init (xlator_t *this)
rot_13_private_t *priv = NULL;
if (!this->children || this->children->next) {
- gf_log ("rot13", GF_LOG_ERROR,
+ gf_log ("rot13", GF_LOG_ERROR,
"FATAL: rot13 should have exactly one child");
return -1;
}
@@ -144,9 +137,11 @@ init (xlator_t *this)
gf_log (this->name, GF_LOG_WARNING,
"dangling volume. check volfile ");
}
-
- priv = CALLOC (sizeof (rot_13_private_t), 1);
- ERR_ABORT (priv);
+
+ priv = GF_CALLOC (sizeof (rot_13_private_t), 1, 0);
+ if (!priv)
+ return -1;
+
priv->decrypt_read = 1;
priv->encrypt_write = 1;
@@ -173,13 +168,16 @@ init (xlator_t *this)
return 0;
}
-void
+void
fini (xlator_t *this)
{
rot_13_private_t *priv = this->private;
-
- FREE (priv);
-
+
+ if (!priv)
+ return;
+ this->private = NULL;
+ GF_FREE (priv);
+
return;
}
@@ -188,18 +186,14 @@ struct xlator_fops fops = {
.writev = rot13_writev
};
-struct xlator_mops mops = {
-};
-
-struct xlator_cbks cbks = {
-};
+struct xlator_cbks cbks;
struct volume_options options[] = {
- { .key = {"encrypt-write"},
+ { .key = {"encrypt-write"},
.type = GF_OPTION_TYPE_BOOL
},
- { .key = {"decrypt-read"},
- .type = GF_OPTION_TYPE_BOOL
+ { .key = {"decrypt-read"},
+ .type = GF_OPTION_TYPE_BOOL
},
{ .key = {NULL} },
};
diff --git a/xlators/encryption/rot-13/src/rot-13.h b/xlators/encryption/rot-13/src/rot-13.h
index d45803517..3e9fc19c7 100644
--- a/xlators/encryption/rot-13/src/rot-13.h
+++ b/xlators/encryption/rot-13/src/rot-13.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __ROT_13_H__
#define __ROT_13_H__
diff --git a/xlators/features/Makefile.am b/xlators/features/Makefile.am
index 9ac9b6f19..d2f5ef192 100644
--- a/xlators/features/Makefile.am
+++ b/xlators/features/Makefile.am
@@ -1,3 +1,4 @@
-SUBDIRS = locks trash path-convertor filter quota
+SUBDIRS = locks quota read-only mac-compat quiesce marker index \
+ protect compress changelog gfid-access $(GLUPY_SUBDIR) qemu-block # trash path-converter # filter
-CLEANFILES =
+CLEANFILES =
diff --git a/xlators/features/changelog/Makefile.am b/xlators/features/changelog/Makefile.am
new file mode 100644
index 000000000..153bb6850
--- /dev/null
+++ b/xlators/features/changelog/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src lib
+
+CLEANFILES =
diff --git a/scheduler/nufa/Makefile.am b/xlators/features/changelog/lib/Makefile.am
index d471a3f92..a985f42a8 100644
--- a/scheduler/nufa/Makefile.am
+++ b/xlators/features/changelog/lib/Makefile.am
@@ -1,3 +1,3 @@
SUBDIRS = src
-CLEANFILES =
+CLEANFILES =
diff --git a/xlators/features/changelog/lib/examples/c/get-changes.c b/xlators/features/changelog/lib/examples/c/get-changes.c
new file mode 100644
index 000000000..14562585a
--- /dev/null
+++ b/xlators/features/changelog/lib/examples/c/get-changes.c
@@ -0,0 +1,87 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/**
+ * get set of new changes every 10 seconds (just print the file names)
+ *
+ * Compile it using:
+ * gcc -o getchanges `pkg-config --cflags libgfchangelog` get-changes.c \
+ * `pkg-config --libs libgfchangelog`
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/un.h>
+#include <limits.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <errno.h>
+
+#include "changelog.h"
+
+#define handle_error(fn) \
+ printf ("%s (reason: %s)\n", fn, strerror (errno))
+
+int
+main (int argc, char ** argv)
+{
+ int i = 0;
+ int ret = 0;
+ ssize_t nr_changes = 0;
+ ssize_t changes = 0;
+ char fbuf[PATH_MAX] = {0,};
+
+ /* get changes for brick "/home/vshankar/export/yow/yow-1" */
+ ret = gf_changelog_register ("/home/vshankar/export/yow/yow-1",
+ "/tmp/scratch", "/tmp/change.log", 9, 5);
+ if (ret) {
+ handle_error ("register failed");
+ goto out;
+ }
+
+ while (1) {
+ i = 0;
+ nr_changes = gf_changelog_scan ();
+ if (nr_changes < 0) {
+ handle_error ("scan(): ");
+ break;
+ }
+
+ if (nr_changes == 0)
+ goto next;
+
+ printf ("Got %ld changelog files\n", nr_changes);
+
+ while ( (changes =
+ gf_changelog_next_change (fbuf, PATH_MAX)) > 0) {
+ printf ("changelog file [%d]: %s\n", ++i, fbuf);
+
+ /* process changelog */
+ /* ... */
+ /* ... */
+ /* ... */
+ /* done processing */
+
+ ret = gf_changelog_done (fbuf);
+ if (ret)
+ handle_error ("gf_changelog_done");
+ }
+
+ if (changes == -1)
+ handle_error ("gf_changelog_next_change");
+
+ next:
+ sleep (10);
+ }
+
+ out:
+ return ret;
+}
diff --git a/xlators/features/changelog/lib/examples/python/changes.py b/xlators/features/changelog/lib/examples/python/changes.py
new file mode 100644
index 000000000..d21db8eab
--- /dev/null
+++ b/xlators/features/changelog/lib/examples/python/changes.py
@@ -0,0 +1,32 @@
+#!/usr/bin/python
+
+import os
+import sys
+import time
+import libgfchangelog
+
+cl = libgfchangelog.Changes()
+
+def get_changes(brick, scratch_dir, log_file, log_level, interval):
+ change_list = []
+ try:
+ cl.cl_register(brick, scratch_dir, log_file, log_level)
+ while True:
+ cl.cl_scan()
+ change_list = cl.cl_getchanges()
+ if change_list:
+ print change_list
+ for change in change_list:
+ print('done with %s' % (change))
+ cl.cl_done(change)
+ time.sleep(interval)
+ except OSError:
+ ex = sys.exc_info()[1]
+ print ex
+
+if __name__ == '__main__':
+ if len(sys.argv) != 5:
+ print("usage: %s <brick> <scratch-dir> <log-file> <fetch-interval>"
+ % (sys.argv[0]))
+ sys.exit(1)
+ get_changes(sys.argv[1], sys.argv[2], sys.argv[3], 9, int(sys.argv[4]))
diff --git a/xlators/features/changelog/lib/examples/python/libgfchangelog.py b/xlators/features/changelog/lib/examples/python/libgfchangelog.py
new file mode 100644
index 000000000..68ec3baf1
--- /dev/null
+++ b/xlators/features/changelog/lib/examples/python/libgfchangelog.py
@@ -0,0 +1,64 @@
+import os
+from ctypes import *
+from ctypes.util import find_library
+
+class Changes(object):
+ libgfc = CDLL(find_library("gfchangelog"), use_errno=True)
+
+ @classmethod
+ def geterrno(cls):
+ return get_errno()
+
+ @classmethod
+ def raise_oserr(cls):
+ errn = cls.geterrno()
+ raise OSError(errn, os.strerror(errn))
+
+ @classmethod
+ def _get_api(cls, call):
+ return getattr(cls.libgfc, call)
+
+ @classmethod
+ def cl_register(cls, brick, path, log_file, log_level, retries = 0):
+ ret = cls._get_api('gf_changelog_register')(brick, path,
+ log_file, log_level, retries)
+ if ret == -1:
+ cls.raise_oserr()
+
+ @classmethod
+ def cl_scan(cls):
+ ret = cls._get_api('gf_changelog_scan')()
+ if ret == -1:
+ cls.raise_oserr()
+
+ @classmethod
+ def cl_startfresh(cls):
+ ret = cls._get_api('gf_changelog_start_fresh')()
+ if ret == -1:
+ cls.raise_oserr()
+
+ @classmethod
+ def cl_getchanges(cls):
+ """ remove hardcoding for path name length """
+ def clsort(f):
+ return f.split('.')[-1]
+ changes = []
+ buf = create_string_buffer('\0', 4096)
+ call = cls._get_api('gf_changelog_next_change')
+
+ while True:
+ ret = call(buf, 4096)
+ if ret in (0, -1):
+ break;
+ changes.append(buf.raw[:ret-1])
+ if ret == -1:
+ cls.raise_oserr()
+ # cleanup tracker
+ cls.cl_startfresh()
+ return sorted(changes, key=clsort)
+
+ @classmethod
+ def cl_done(cls, clfile):
+ ret = cls._get_api('gf_changelog_done')(clfile)
+ if ret == -1:
+ cls.raise_oserr()
diff --git a/xlators/features/changelog/lib/src/Makefile.am b/xlators/features/changelog/lib/src/Makefile.am
new file mode 100644
index 000000000..fbaaea628
--- /dev/null
+++ b/xlators/features/changelog/lib/src/Makefile.am
@@ -0,0 +1,37 @@
+libgfchangelog_la_CFLAGS = -Wall $(GF_CFLAGS) $(GF_DARWIN_LIBGLUSTERFS_CFLAGS) \
+ -DDATADIR=\"$(localstatedir)\"
+
+libgfchangelog_la_CPPFLAGS = $(GF_CPPFLAGS) -D__USE_FILE_OFFSET64 -fpic \
+ -I../../../src/ -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/xlators/features/changelog/src \
+ -DDATADIR=\"$(localstatedir)\"
+
+libgfchangelog_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
+ $(GF_GLUSTERFS_LIBS)
+
+libgfchangelog_la_LDFLAGS = $(GF_LDFLAGS)
+
+libgfchangelogdir = $(includedir)/glusterfs/gfchangelog
+lib_LTLIBRARIES = libgfchangelog.la
+
+CONTRIB_BUILDDIR = $(top_builddir)/contrib
+
+libgfchangelog_la_SOURCES = gf-changelog.c gf-changelog-process.c \
+ gf-changelog-helpers.c $(CONTRIBDIR)/uuid/clear.c \
+ $(CONTRIBDIR)/uuid/copy.c $(CONTRIBDIR)/uuid/gen_uuid.c \
+ $(CONTRIBDIR)/uuid/pack.c $(CONTRIBDIR)/uuid/parse.c \
+ $(CONTRIBDIR)/uuid/unparse.c $(CONTRIBDIR)/uuid/uuid_time.c \
+ $(CONTRIBDIR)/uuid/compare.c $(CONTRIBDIR)/uuid/isnull.c \
+ $(CONTRIBDIR)/uuid/unpack.c
+
+noinst_HEADERS = gf-changelog-helpers.h $(CONTRIBDIR)/uuid/uuidd.h \
+ $(CONTRIBDIR)/uuid/uuid.h $(CONTRIBDIR)/uuid/uuidP.h \
+ $(CONTRIB_BUILDDIR)/uuid/uuid_types.h
+
+libgfchangelog_HEADERS = changelog.h
+
+CLEANFILES =
+CONFIG_CLEAN_FILES = $(CONTRIB_BUILDDIR)/uuid/uuid_types.h
+
+$(top_builddir)/libglusterfs/src/libglusterfs.la:
+ $(MAKE) -C $(top_builddir)/libglusterfs/src/ all
diff --git a/xlators/features/changelog/lib/src/changelog.h b/xlators/features/changelog/lib/src/changelog.h
new file mode 100644
index 000000000..5cddfb583
--- /dev/null
+++ b/xlators/features/changelog/lib/src/changelog.h
@@ -0,0 +1,31 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GF_CHANGELOG_H
+#define _GF_CHANGELOG_H
+
+/* API set */
+
+int
+gf_changelog_register (char *brick_path, char *scratch_dir,
+ char *log_file, int log_levl, int max_reconnects);
+ssize_t
+gf_changelog_scan ();
+
+int
+gf_changelog_start_fresh ();
+
+ssize_t
+gf_changelog_next_change (char *bufptr, size_t maxlen);
+
+int
+gf_changelog_done (char *file);
+
+#endif
diff --git a/xlators/features/changelog/lib/src/gf-changelog-helpers.c b/xlators/features/changelog/lib/src/gf-changelog-helpers.c
new file mode 100644
index 000000000..1eef8bf04
--- /dev/null
+++ b/xlators/features/changelog/lib/src/gf-changelog-helpers.c
@@ -0,0 +1,180 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "changelog-mem-types.h"
+#include "gf-changelog-helpers.h"
+
+ssize_t gf_changelog_read_path (int fd, char *buffer, size_t bufsize)
+{
+ return read (fd, buffer, bufsize);
+}
+
+size_t
+gf_changelog_write (int fd, char *buffer, size_t len)
+{
+ ssize_t size = 0;
+ size_t writen = 0;
+
+ while (writen < len) {
+ size = write (fd,
+ buffer + writen, len - writen);
+ if (size <= 0)
+ break;
+
+ writen += size;
+ }
+
+ return writen;
+}
+
+void
+gf_rfc3986_encode (unsigned char *s, char *enc, char *estr)
+{
+ for (; *s; s++) {
+ if (estr[*s])
+ sprintf(enc, "%c", estr[*s]);
+ else
+ sprintf(enc, "%%%02X", *s);
+ while (*++enc);
+ }
+}
+
+/**
+ * thread safe version of readline with buffering
+ * (taken from Unix Network Programming Volume I, W.R. Stevens)
+ *
+ * This is favoured over fgets() as we'd need to ftruncate()
+ * (see gf_changelog_scan() API) to record new changelog files.
+ * stream open functions does have a truncate like api (although
+ * that can be done via @fflush(fp), @ftruncate(fd) and @fseek(fp),
+ * but this involves mixing POSIX file descriptors and stream FILE *).
+ *
+ * NOTE: This implmentation still does work with more than one fd's
+ * used to perform gf_readline(). For this very reason it's not
+ * made a part of libglusterfs.
+ */
+
+static pthread_key_t rl_key;
+static pthread_once_t rl_once = PTHREAD_ONCE_INIT;
+
+static void
+readline_destructor (void *ptr)
+{
+ GF_FREE (ptr);
+}
+
+static void
+readline_once (void)
+{
+ pthread_key_create (&rl_key, readline_destructor);
+}
+
+static ssize_t
+my_read (read_line_t *tsd, int fd, char *ptr)
+{
+ if (tsd->rl_cnt <= 0) {
+ if ( (tsd->rl_cnt = read (fd, tsd->rl_buf, MAXLINE)) < 0 )
+ return -1;
+ else if (tsd->rl_cnt == 0)
+ return 0;
+ tsd->rl_bufptr = tsd->rl_buf;
+ }
+
+ tsd->rl_cnt--;
+ *ptr = *tsd->rl_bufptr++;
+ return 1;
+}
+
+static int
+gf_readline_init_once (read_line_t **tsd)
+{
+ if (pthread_once (&rl_once, readline_once) != 0)
+ return -1;
+
+ *tsd = pthread_getspecific (rl_key);
+ if (*tsd)
+ goto out;
+
+ *tsd = GF_CALLOC (1, sizeof (**tsd),
+ gf_changelog_mt_libgfchangelog_rl_t);
+ if (!*tsd)
+ return -1;
+
+ if (pthread_setspecific (rl_key, *tsd) != 0)
+ return -1;
+
+ out:
+ return 0;
+}
+
+ssize_t
+gf_readline (int fd, void *vptr, size_t maxlen)
+{
+ size_t n = 0;
+ size_t rc = 0;
+ char c = ' ';
+ char *ptr = NULL;
+ read_line_t *tsd = NULL;
+
+ if (gf_readline_init_once (&tsd))
+ return -1;
+
+ ptr = vptr;
+ for (n = 1; n < maxlen; n++) {
+ if ( (rc = my_read (tsd, fd, &c)) == 1 ) {
+ *ptr++ = c;
+ if (c == '\n')
+ break;
+ } else if (rc == 0) {
+ *ptr = '\0';
+ return (n - 1);
+ } else
+ return -1;
+ }
+
+ *ptr = '\0';
+ return n;
+
+}
+
+off_t
+gf_lseek (int fd, off_t offset, int whence)
+{
+ off_t off = 0;
+ read_line_t *tsd = NULL;
+
+ if (gf_readline_init_once (&tsd))
+ return -1;
+
+ if ( (off = lseek (fd, offset, whence)) == -1)
+ return -1;
+
+ tsd->rl_cnt = 0;
+ tsd->rl_bufptr = tsd->rl_buf;
+
+ return off;
+}
+
+int
+gf_ftruncate (int fd, off_t length)
+{
+ read_line_t *tsd = NULL;
+
+ if (gf_readline_init_once (&tsd))
+ return -1;
+
+ if (ftruncate (fd, 0))
+ return -1;
+
+ tsd->rl_cnt = 0;
+ tsd->rl_bufptr = tsd->rl_buf;
+
+ return 0;
+}
diff --git a/xlators/features/changelog/lib/src/gf-changelog-helpers.h b/xlators/features/changelog/lib/src/gf-changelog-helpers.h
new file mode 100644
index 000000000..3aa6ed7b8
--- /dev/null
+++ b/xlators/features/changelog/lib/src/gf-changelog-helpers.h
@@ -0,0 +1,97 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GF_CHANGELOG_HELPERS_H
+#define _GF_CHANGELOG_HELPERS_H
+
+#include <unistd.h>
+#include <dirent.h>
+#include <limits.h>
+#include <pthread.h>
+
+#include <xlator.h>
+
+#define GF_CHANGELOG_TRACKER "tracker"
+
+#define GF_CHANGELOG_CURRENT_DIR ".current"
+#define GF_CHANGELOG_PROCESSED_DIR ".processed"
+#define GF_CHANGELOG_PROCESSING_DIR ".processing"
+
+#ifndef MAXLINE
+#define MAXLINE 4096
+#endif
+
+#define GF_CHANGELOG_FILL_BUFFER(ptr, ascii, off, len) do { \
+ memcpy (ascii + off, ptr, len); \
+ off += len; \
+ } while (0)
+
+typedef struct read_line {
+ int rl_cnt;
+ char *rl_bufptr;
+ char rl_buf[MAXLINE];
+} read_line_t;
+
+typedef struct gf_changelog {
+ xlator_t *this;
+
+ /* 'processing' directory stream */
+ DIR *gfc_dir;
+
+ /* fd to the tracker file */
+ int gfc_fd;
+
+ /* connection retries */
+ int gfc_connretries;
+
+ char gfc_sockpath[PATH_MAX];
+
+ char gfc_brickpath[PATH_MAX];
+
+ /* socket for recieving notifications */
+ int gfc_sockfd;
+
+ char *gfc_working_dir;
+
+ /* RFC 3986 string encoding */
+ char rfc3986[256];
+
+ char gfc_current_dir[PATH_MAX];
+ char gfc_processed_dir[PATH_MAX];
+ char gfc_processing_dir[PATH_MAX];
+
+ pthread_t gfc_changelog_processor;
+} gf_changelog_t;
+
+int
+gf_changelog_notification_init (xlator_t *this, gf_changelog_t *gfc);
+
+void *
+gf_changelog_process (void *data);
+
+ssize_t
+gf_changelog_read_path (int fd, char *buffer, size_t bufsize);
+
+void
+gf_rfc3986_encode (unsigned char *s, char *enc, char *estr);
+
+size_t
+gf_changelog_write (int fd, char *buffer, size_t len);
+
+ssize_t
+gf_readline (int fd, void *vptr, size_t maxlen);
+
+int
+gf_ftruncate (int fd, off_t length);
+
+off_t
+gf_lseek (int fd, off_t offset, int whence);
+
+#endif
diff --git a/xlators/features/changelog/lib/src/gf-changelog-process.c b/xlators/features/changelog/lib/src/gf-changelog-process.c
new file mode 100644
index 000000000..df7204931
--- /dev/null
+++ b/xlators/features/changelog/lib/src/gf-changelog-process.c
@@ -0,0 +1,571 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <unistd.h>
+#include <pthread.h>
+
+#include "uuid.h"
+#include "globals.h"
+#include "glusterfs.h"
+
+#include "gf-changelog-helpers.h"
+
+/* from the changelog translator */
+#include "changelog-misc.h"
+
+extern int byebye;
+
+/**
+ * number of gfid records after fop number
+ */
+int nr_gfids[] = {
+ [GF_FOP_MKNOD] = 1,
+ [GF_FOP_MKDIR] = 1,
+ [GF_FOP_UNLINK] = 1,
+ [GF_FOP_RMDIR] = 1,
+ [GF_FOP_SYMLINK] = 1,
+ [GF_FOP_RENAME] = 2,
+ [GF_FOP_LINK] = 1,
+ [GF_FOP_CREATE] = 1,
+};
+
+static char *
+binary_to_ascii (uuid_t uuid)
+{
+ return uuid_utoa (uuid);
+}
+
+static char *
+conv_noop (char *ptr) { return ptr; }
+
+#define VERIFY_SEPARATOR(ptr, plen, perr) \
+ { \
+ if (*(ptr + plen) != '\0') { \
+ perr = 1; \
+ break; \
+ } \
+ }
+
+#define MOVER_MOVE(mover, nleft, bytes) \
+ { \
+ mover += bytes; \
+ nleft -= bytes; \
+ } \
+
+#define PARSE_GFID(mov, ptr, le, fn, perr) \
+ { \
+ VERIFY_SEPARATOR (mov, le, perr); \
+ ptr = fn (mov); \
+ if (!ptr) { \
+ perr = 1; \
+ break; \
+ } \
+ }
+
+#define FILL_AND_MOVE(pt, buf, of, mo, nl, le) \
+ { \
+ GF_CHANGELOG_FILL_BUFFER (pt, buf, of, strlen (pt)); \
+ MOVER_MOVE (mo, nl, le); \
+ }
+
+
+#define PARSE_GFID_MOVE(ptr, uuid, mover, nleft, perr) \
+ { \
+ memcpy (uuid, mover, sizeof (uuid_t)); \
+ ptr = binary_to_ascii (uuid); \
+ if (!ptr) { \
+ perr = 1; \
+ break; \
+ } \
+ MOVER_MOVE (mover, nleft, sizeof (uuid_t)); \
+ } \
+
+#define LINE_BUFSIZE 3*PATH_MAX /* enough buffer for extra chars too */
+
+/**
+ * using mmap() makes parsing easy. fgets() cannot be used here as
+ * the binary gfid could contain a line-feed (0x0A), in that case fgets()
+ * would read an incomplete line and parsing would fail. using POSIX fds
+ * would result is additional code to maintain state in case of partial
+ * reads of data (where multiple entries do not fit extirely in the buffer).
+ *
+ * mmap() gives the flexibility of pointing to an offset in the file
+ * without us worrying about reading it in memory (VM does that for us for
+ * free).
+ */
+
+static int
+gf_changelog_parse_binary (xlator_t *this,
+ gf_changelog_t *gfc, int from_fd, int to_fd,
+ size_t start_offset, struct stat *stbuf)
+
+{
+ int ret = -1;
+ off_t off = 0;
+ off_t nleft = 0;
+ uuid_t uuid = {0,};
+ char *ptr = NULL;
+ char *bname_start = NULL;
+ char *bname_end = NULL;
+ char *mover = NULL;
+ char *start = NULL;
+ char current_mover = ' ';
+ size_t blen = 0;
+ int parse_err = 0;
+ char ascii[LINE_BUFSIZE] = {0,};
+
+ nleft = stbuf->st_size;
+
+ start = (char *) mmap (NULL, nleft,
+ PROT_READ, MAP_PRIVATE, from_fd, 0);
+ if (!start) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "mmap() error (reason: %s)", strerror (errno));
+ goto out;
+ }
+
+ mover = start;
+
+ MOVER_MOVE (mover, nleft, start_offset);
+
+ while (nleft > 0) {
+
+ off = blen = 0;
+ ptr = bname_start = bname_end = NULL;
+
+ current_mover = *mover;
+
+ switch (current_mover) {
+ case 'D':
+ case 'M':
+ MOVER_MOVE (mover, nleft, 1);
+ PARSE_GFID_MOVE (ptr, uuid, mover, nleft, parse_err);
+
+ break;
+
+ case 'E':
+ MOVER_MOVE (mover, nleft, 1);
+ PARSE_GFID_MOVE (ptr, uuid, mover, nleft, parse_err);
+
+ bname_start = mover;
+ if ( (bname_end = strchr (mover, '\n')) == NULL ) {
+ parse_err = 1;
+ break;
+ }
+
+ blen = bname_end - bname_start;
+ MOVER_MOVE (mover, nleft, blen);
+
+ break;
+
+ default:
+ parse_err = 1;
+ }
+
+ if (parse_err)
+ break;
+
+ GF_CHANGELOG_FILL_BUFFER (&current_mover, ascii, off, 1);
+ GF_CHANGELOG_FILL_BUFFER (" ", ascii, off, 1);
+ GF_CHANGELOG_FILL_BUFFER (ptr, ascii, off, strlen (ptr));
+ if (blen)
+ GF_CHANGELOG_FILL_BUFFER (bname_start,
+ ascii, off, blen);
+ GF_CHANGELOG_FILL_BUFFER ("\n", ascii, off, 1);
+
+ if (gf_changelog_write (to_fd, ascii, off) != off) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "processing binary changelog failed due to "
+ " error in writing ascii change (reason: %s)",
+ strerror (errno));
+ break;
+ }
+
+ MOVER_MOVE (mover, nleft, 1);
+ }
+
+ if ( (nleft == 0) && (!parse_err))
+ ret = 0;
+
+ if (munmap (start, stbuf->st_size))
+ gf_log (this->name, GF_LOG_ERROR,
+ "munmap() error (reason: %s)", strerror (errno));
+ out:
+ return ret;
+}
+
+/**
+ * ascii decoder:
+ * - separate out one entry from another
+ * - use fop name rather than fop number
+ */
+static int
+gf_changelog_parse_ascii (xlator_t *this,
+ gf_changelog_t *gfc, int from_fd, int to_fd,
+ size_t start_offset, struct stat *stbuf)
+{
+ int ng = 0;
+ int ret = -1;
+ int fop = 0;
+ int len = 0;
+ off_t off = 0;
+ off_t nleft = 0;
+ char *ptr = NULL;
+ char *eptr = NULL;
+ char *start = NULL;
+ char *mover = NULL;
+ int parse_err = 0;
+ char current_mover = ' ';
+ char ascii[LINE_BUFSIZE] = {0,};
+ const char *fopname = NULL;
+
+ nleft = stbuf->st_size;
+
+ start = (char *) mmap (NULL, nleft,
+ PROT_READ, MAP_PRIVATE, from_fd, 0);
+ if (!start) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "mmap() error (reason: %s)", strerror (errno));
+ goto out;
+ }
+
+ mover = start;
+
+ MOVER_MOVE (mover, nleft, start_offset);
+
+ while (nleft > 0) {
+ off = 0;
+ current_mover = *mover;
+
+ GF_CHANGELOG_FILL_BUFFER (&current_mover, ascii, off, 1);
+ GF_CHANGELOG_FILL_BUFFER (" ", ascii, off, 1);
+
+ switch (current_mover) {
+ case 'D':
+ case 'M':
+ MOVER_MOVE (mover, nleft, 1);
+
+ /* target gfid */
+ PARSE_GFID (mover, ptr, UUID_CANONICAL_FORM_LEN,
+ conv_noop, parse_err);
+ FILL_AND_MOVE(ptr, ascii, off,
+ mover, nleft, UUID_CANONICAL_FORM_LEN);
+ break;
+
+ case 'E':
+ MOVER_MOVE (mover, nleft, 1);
+
+ /* target gfid */
+ PARSE_GFID (mover, ptr, UUID_CANONICAL_FORM_LEN,
+ conv_noop, parse_err);
+ FILL_AND_MOVE (ptr, ascii, off,
+ mover, nleft, UUID_CANONICAL_FORM_LEN);
+ FILL_AND_MOVE (" ", ascii, off,
+ mover, nleft, 1);
+
+ /* fop */
+ len = strlen (mover);
+ VERIFY_SEPARATOR (mover, len, parse_err);
+
+ fop = atoi (mover);
+ if ( (fopname = gf_fop_list[fop]) == NULL) {
+ parse_err = 1;
+ break;
+ }
+
+ MOVER_MOVE (mover, nleft, len);
+
+ len = strlen (fopname);
+ GF_CHANGELOG_FILL_BUFFER (fopname, ascii, off, len);
+
+ /* pargfid + bname */
+ ng = nr_gfids[fop];
+ while (ng-- > 0) {
+ MOVER_MOVE (mover, nleft, 1);
+ len = strlen (mover);
+ GF_CHANGELOG_FILL_BUFFER (" ", ascii, off, 1);
+
+ PARSE_GFID (mover, ptr, len,
+ conv_noop, parse_err);
+ eptr = calloc (3, strlen (ptr));
+ if (!eptr) {
+ parse_err = 1;
+ break;
+ }
+
+ gf_rfc3986_encode ((unsigned char *) ptr,
+ eptr, gfc->rfc3986);
+ FILL_AND_MOVE (eptr, ascii, off,
+ mover, nleft, len);
+ free (eptr);
+ }
+
+ break;
+ default:
+ parse_err = 1;
+ }
+
+ if (parse_err)
+ break;
+
+ GF_CHANGELOG_FILL_BUFFER ("\n", ascii, off, 1);
+
+ if (gf_changelog_write (to_fd, ascii, off) != off) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "processing ascii changelog failed due to "
+ " wrror in writing change (reason: %s)",
+ strerror (errno));
+ break;
+ }
+
+ MOVER_MOVE (mover, nleft, 1);
+
+ }
+
+ if ( (nleft == 0) && (!parse_err))
+ ret = 0;
+
+ if (munmap (start, stbuf->st_size))
+ gf_log (this->name, GF_LOG_ERROR,
+ "munmap() error (reason: %s)", strerror (errno));
+
+ out:
+ return ret;
+}
+
+#define COPY_BUFSIZE 8192
+static int
+gf_changelog_copy (xlator_t *this, int from_fd, int to_fd)
+{
+ ssize_t size = 0;
+ char buffer[COPY_BUFSIZE+1] = {0,};
+
+ while (1) {
+ size = read (from_fd, buffer, COPY_BUFSIZE);
+ if (size <= 0)
+ break;
+
+ if (gf_changelog_write (to_fd,
+ buffer, size) != size) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "error processing ascii changlog");
+ size = -1;
+ break;
+ }
+ }
+
+ return (size < 0 ? -1 : 0);
+}
+
+static int
+gf_changelog_decode (xlator_t *this, gf_changelog_t *gfc, int from_fd,
+ int to_fd, struct stat *stbuf, int *zerob)
+{
+ int ret = -1;
+ int encoding = -1;
+ size_t elen = 0;
+ char buffer[1024] = {0,};
+
+ CHANGELOG_GET_ENCODING (from_fd, buffer, 1024, encoding, elen);
+ if (encoding == -1) /* unknown encoding */
+ goto out;
+
+ if (!CHANGELOG_VALID_ENCODING (encoding))
+ goto out;
+
+ if (elen == stbuf->st_size) {
+ *zerob = 1;
+ goto out;
+ }
+
+ /**
+ * start processing after the header
+ */
+ lseek (from_fd, elen, SEEK_SET);
+
+ switch (encoding) {
+ case CHANGELOG_ENCODE_BINARY:
+ /**
+ * this ideally should have been a part of changelog-encoders.c
+ * (ie. part of the changelog translator).
+ */
+ ret = gf_changelog_parse_binary (this, gfc, from_fd,
+ to_fd, elen, stbuf);
+ break;
+
+ case CHANGELOG_ENCODE_ASCII:
+ ret = gf_changelog_parse_ascii (this, gfc, from_fd,
+ to_fd, elen, stbuf);
+ break;
+ default:
+ ret = gf_changelog_copy (this, from_fd, to_fd);
+ }
+
+ out:
+ return ret;
+}
+
+static int
+gf_changelog_consume (xlator_t *this, gf_changelog_t *gfc, char *from_path)
+{
+ int ret = -1;
+ int fd1 = 0;
+ int fd2 = 0;
+ int zerob = 0;
+ struct stat stbuf = {0,};
+ char dest[PATH_MAX] = {0,};
+ char to_path[PATH_MAX] = {0,};
+
+ ret = stat (from_path, &stbuf);
+ if (ret || !S_ISREG(stbuf.st_mode)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "stat failed on changelog file: %s", from_path);
+ goto out;
+ }
+
+ fd1 = open (from_path, O_RDONLY);
+ if (fd1 < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "cannot open changelog file: %s (reason: %s)",
+ from_path, strerror (errno));
+ goto out;
+ }
+
+ (void) snprintf (to_path, PATH_MAX, "%s%s",
+ gfc->gfc_current_dir, basename (from_path));
+ (void) snprintf (dest, PATH_MAX, "%s%s",
+ gfc->gfc_processing_dir, basename (from_path));
+
+ fd2 = open (to_path, O_CREAT | O_TRUNC | O_RDWR,
+ S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
+ if (fd2 < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "cannot create ascii changelog file %s (reason %s)",
+ to_path, strerror (errno));
+ goto close_fd;
+ } else {
+ ret = gf_changelog_decode (this, gfc, fd1,
+ fd2, &stbuf, &zerob);
+
+ close (fd2);
+
+ if (!ret) {
+ /* move it to processing on a successfull
+ decode */
+ ret = rename (to_path, dest);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "error moving %s to processing dir"
+ " (reason: %s)", to_path,
+ strerror (errno));
+ }
+
+ /* remove it from .current if it's an empty file */
+ if (zerob) {
+ ret = unlink (to_path);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not unlink %s (reason: %s",
+ to_path, strerror (errno));
+ }
+ }
+
+ close_fd:
+ close (fd1);
+
+ out:
+ return ret;
+}
+
+static char *
+gf_changelog_ext_change (xlator_t *this,
+ gf_changelog_t *gfc, char *path, size_t readlen)
+{
+ int alo = 0;
+ int ret = 0;
+ size_t len = 0;
+ char *buf = NULL;
+
+ buf = path;
+ while (len < readlen) {
+ if (*buf == '\0') {
+ alo = 1;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "processing changelog: %s", path);
+ ret = gf_changelog_consume (this, gfc, path);
+ }
+
+ if (ret)
+ break;
+
+ len++; buf++;
+ if (alo) {
+ alo = 0;
+ path = buf;
+ }
+ }
+
+ return (ret) ? NULL : path;
+}
+
+void *
+gf_changelog_process (void *data)
+{
+ ssize_t len = 0;
+ ssize_t offlen = 0;
+ xlator_t *this = NULL;
+ char *sbuf = NULL;
+ gf_changelog_t *gfc = NULL;
+ char from_path[PATH_MAX] = {0,};
+
+ gfc = (gf_changelog_t *) data;
+ this = gfc->this;
+
+ pthread_detach (pthread_self());
+
+ for (;;) {
+ len = gf_changelog_read_path (gfc->gfc_sockfd,
+ from_path + offlen,
+ PATH_MAX - offlen);
+ if (len < 0)
+ continue; /* ignore it for now */
+
+ if (len == 0) { /* close() from the changelog translator */
+ gf_log (this->name, GF_LOG_INFO, "close from changelog"
+ " notification translator.");
+
+ if (gfc->gfc_connretries != 1) {
+ if (!gf_changelog_notification_init(this, gfc))
+ continue;
+ }
+
+ byebye = 1;
+ break;
+ }
+
+ len += offlen;
+ sbuf = gf_changelog_ext_change (this, gfc, from_path, len);
+ if (!sbuf) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not extract changelog filename");
+ continue;
+ }
+
+ offlen = 0;
+ if (sbuf != (from_path + len)) {
+ offlen = from_path + len - sbuf;
+ memmove (from_path, sbuf, offlen);
+ }
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "byebye (%d) from processing thread...", byebye);
+ return NULL;
+}
diff --git a/xlators/features/changelog/lib/src/gf-changelog.c b/xlators/features/changelog/lib/src/gf-changelog.c
new file mode 100644
index 000000000..ca8e373e7
--- /dev/null
+++ b/xlators/features/changelog/lib/src/gf-changelog.c
@@ -0,0 +1,515 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <errno.h>
+#include <dirent.h>
+#include <stddef.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <string.h>
+
+#include "globals.h"
+#include "glusterfs.h"
+#include "logging.h"
+
+#include "gf-changelog-helpers.h"
+
+/* from the changelog translator */
+#include "changelog-misc.h"
+#include "changelog-mem-types.h"
+
+int byebye = 0;
+
+static void
+gf_changelog_cleanup (gf_changelog_t *gfc)
+{
+ /* socket */
+ if (gfc->gfc_sockfd != -1)
+ close (gfc->gfc_sockfd);
+ /* tracker fd */
+ if (gfc->gfc_fd != -1)
+ close (gfc->gfc_fd);
+ /* processing dir */
+ if (gfc->gfc_dir)
+ closedir (gfc->gfc_dir);
+
+ if (gfc->gfc_working_dir)
+ free (gfc->gfc_working_dir); /* allocated by realpath */
+}
+
+void
+__attribute__ ((constructor)) gf_changelog_ctor (void)
+{
+ glusterfs_ctx_t *ctx = NULL;
+
+ ctx = glusterfs_ctx_new ();
+ if (!ctx)
+ return;
+
+ if (glusterfs_globals_init (ctx)) {
+ free (ctx);
+ ctx = NULL;
+ return;
+ }
+
+ THIS->ctx = ctx;
+}
+
+void
+__attribute__ ((destructor)) gf_changelog_dtor (void)
+{
+ xlator_t *this = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ gf_changelog_t *gfc = NULL;
+
+ this = THIS;
+ if (!this)
+ return;
+
+ ctx = this->ctx;
+ gfc = this->private;
+
+ if (gfc) {
+ gf_changelog_cleanup (gfc);
+ GF_FREE (gfc);
+ }
+
+ if (ctx) {
+ pthread_mutex_destroy (&ctx->lock);
+ free (ctx);
+ ctx = NULL;
+ }
+}
+
+
+static int
+gf_changelog_open_dirs (gf_changelog_t *gfc)
+{
+ int ret = -1;
+ DIR *dir = NULL;
+ int tracker_fd = 0;
+ char tracker_path[PATH_MAX] = {0,};
+
+ (void) snprintf (gfc->gfc_current_dir, PATH_MAX,
+ "%s/"GF_CHANGELOG_CURRENT_DIR"/",
+ gfc->gfc_working_dir);
+ ret = mkdir_p (gfc->gfc_current_dir, 0600, _gf_false);
+ if (ret)
+ goto out;
+
+ (void) snprintf (gfc->gfc_processed_dir, PATH_MAX,
+ "%s/"GF_CHANGELOG_PROCESSED_DIR"/",
+ gfc->gfc_working_dir);
+ ret = mkdir_p (gfc->gfc_processed_dir, 0600, _gf_false);
+ if (ret)
+ goto out;
+
+ (void) snprintf (gfc->gfc_processing_dir, PATH_MAX,
+ "%s/"GF_CHANGELOG_PROCESSING_DIR"/",
+ gfc->gfc_working_dir);
+ ret = mkdir_p (gfc->gfc_processing_dir, 0600, _gf_false);
+ if (ret)
+ goto out;
+
+ dir = opendir (gfc->gfc_processing_dir);
+ if (!dir) {
+ gf_log ("", GF_LOG_ERROR,
+ "opendir() error [reason: %s]", strerror (errno));
+ goto out;
+ }
+
+ gfc->gfc_dir = dir;
+
+ (void) snprintf (tracker_path, PATH_MAX,
+ "%s/"GF_CHANGELOG_TRACKER, gfc->gfc_working_dir);
+
+ tracker_fd = open (tracker_path, O_CREAT | O_APPEND | O_RDWR,
+ S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
+ if (tracker_fd < 0) {
+ closedir (gfc->gfc_dir);
+ ret = -1;
+ goto out;
+ }
+
+ gfc->gfc_fd = tracker_fd;
+ ret = 0;
+ out:
+ return ret;
+}
+
+int
+gf_changelog_notification_init (xlator_t *this, gf_changelog_t *gfc)
+{
+ int ret = 0;
+ int len = 0;
+ int tries = 0;
+ int sockfd = 0;
+ struct sockaddr_un remote;
+
+ this = gfc->this;
+
+ if (gfc->gfc_sockfd != -1) {
+ gf_log (this->name, GF_LOG_INFO,
+ "Reconnecting...");
+ close (gfc->gfc_sockfd);
+ }
+
+ sockfd = socket (AF_UNIX, SOCK_STREAM, 0);
+ if (sockfd < 0) {
+ ret = -1;
+ goto out;
+ }
+
+ CHANGELOG_MAKE_SOCKET_PATH (gfc->gfc_brickpath,
+ gfc->gfc_sockpath, PATH_MAX);
+ gf_log (this->name, GF_LOG_INFO,
+ "connecting to changelog socket: %s (brick: %s)",
+ gfc->gfc_sockpath, gfc->gfc_brickpath);
+
+ remote.sun_family = AF_UNIX;
+ strcpy (remote.sun_path, gfc->gfc_sockpath);
+
+ len = strlen (remote.sun_path) + sizeof (remote.sun_family);
+
+ while (tries < gfc->gfc_connretries) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "connection attempt %d/%d...",
+ tries + 1, gfc->gfc_connretries);
+
+ /* initiate a connect */
+ if (connect (sockfd, (struct sockaddr *) &remote, len) == 0) {
+ gfc->gfc_sockfd = sockfd;
+ break;
+ }
+
+ tries++;
+ sleep (2);
+ }
+
+ if (tries == gfc->gfc_connretries) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not connect to changelog socket!"
+ " bailing out...");
+ ret = -1;
+ } else
+ gf_log (this->name, GF_LOG_INFO,
+ "connection successful");
+
+ out:
+ return ret;
+}
+
+int
+gf_changelog_done (char *file)
+{
+ int ret = -1;
+ char *buffer = NULL;
+ xlator_t *this = NULL;
+ gf_changelog_t *gfc = NULL;
+ char to_path[PATH_MAX] = {0,};
+
+ errno = EINVAL;
+
+ this = THIS;
+ if (!this)
+ goto out;
+
+ gfc = (gf_changelog_t *) this->private;
+ if (!gfc)
+ goto out;
+
+ if (!file || !strlen (file))
+ goto out;
+
+ /* make sure 'file' is inside ->gfc_working_dir */
+ buffer = realpath (file, NULL);
+ if (!buffer)
+ goto out;
+
+ if (strncmp (gfc->gfc_working_dir,
+ buffer, strlen (gfc->gfc_working_dir)))
+ goto out;
+
+ (void) snprintf (to_path, PATH_MAX, "%s%s",
+ gfc->gfc_processed_dir, basename (buffer));
+ gf_log (this->name, GF_LOG_DEBUG,
+ "moving %s to processed directory", file);
+ ret = rename (buffer, to_path);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "cannot move %s to %s (reason: %s)",
+ file, to_path, strerror (errno));
+ goto out;
+ }
+
+ ret = 0;
+
+ out:
+ if (buffer)
+ free (buffer); /* allocated by realpath() */
+ return ret;
+}
+
+/**
+ * @API
+ * for a set of changelogs, start from the begining
+ */
+int
+gf_changelog_start_fresh ()
+{
+ xlator_t *this = NULL;
+ gf_changelog_t *gfc = NULL;
+
+ this = THIS;
+ if (!this)
+ goto out;
+
+ errno = EINVAL;
+
+ gfc = (gf_changelog_t *) this->private;
+ if (!gfc)
+ goto out;
+
+ if (gf_ftruncate (gfc->gfc_fd, 0))
+ goto out;
+
+ return 0;
+
+ out:
+ return -1;
+}
+
+/**
+ * @API
+ * return the next changelog file entry. zero means all chanelogs
+ * consumed.
+ */
+ssize_t
+gf_changelog_next_change (char *bufptr, size_t maxlen)
+{
+ ssize_t size = 0;
+ int tracker_fd = 0;
+ xlator_t *this = NULL;
+ gf_changelog_t *gfc = NULL;
+ char buffer[PATH_MAX] = {0,};
+
+ errno = EINVAL;
+
+ this = THIS;
+ if (!this)
+ goto out;
+
+ gfc = (gf_changelog_t *) this->private;
+ if (!gfc)
+ goto out;
+
+ tracker_fd = gfc->gfc_fd;
+
+ size = gf_readline (tracker_fd, buffer, maxlen);
+ if (size < 0)
+ goto out;
+ if (size == 0)
+ return 0;
+
+ memcpy (bufptr, buffer, size - 1);
+ *(buffer + size) = '\0';
+
+ return size;
+
+ out:
+ return -1;
+}
+
+/**
+ * @API
+ * gf_changelog_scan() - scan and generate a list of change entries
+ *
+ * calling this api multiple times (without calling gf_changlog_done())
+ * would result new changelogs(s) being refreshed in the tracker file.
+ * This call also acts as a cancellation point for the consumer.
+ */
+ssize_t
+gf_changelog_scan ()
+{
+ int ret = 0;
+ int tracker_fd = 0;
+ size_t len = 0;
+ size_t off = 0;
+ xlator_t *this = NULL;
+ size_t nr_entries = 0;
+ gf_changelog_t *gfc = NULL;
+ struct dirent *entryp = NULL;
+ struct dirent *result = NULL;
+ char buffer[PATH_MAX] = {0,};
+
+ this = THIS;
+ if (!this)
+ goto out;
+
+ gfc = (gf_changelog_t *) this->private;
+ if (!gfc)
+ goto out;
+
+ /**
+ * do we need to protect 'byebye' with locks? worst, the
+ * consumer would get notified during next scan().
+ */
+ if (byebye) {
+ errno = ECONNREFUSED;
+ goto out;
+ }
+
+ errno = EINVAL;
+
+ tracker_fd = gfc->gfc_fd;
+
+ if (gf_ftruncate (tracker_fd, 0))
+ goto out;
+
+ len = offsetof(struct dirent, d_name)
+ + pathconf(gfc->gfc_processing_dir, _PC_NAME_MAX) + 1;
+ entryp = GF_CALLOC (1, len,
+ gf_changelog_mt_libgfchangelog_dirent_t);
+ if (!entryp)
+ goto out;
+
+ rewinddir (gfc->gfc_dir);
+ while (1) {
+ ret = readdir_r (gfc->gfc_dir, entryp, &result);
+ if (ret || !result)
+ break;
+
+ if ( !strcmp (basename (entryp->d_name), ".")
+ || !strcmp (basename (entryp->d_name), "..") )
+ continue;
+
+ nr_entries++;
+
+ GF_CHANGELOG_FILL_BUFFER (gfc->gfc_processing_dir,
+ buffer, off,
+ strlen (gfc->gfc_processing_dir));
+ GF_CHANGELOG_FILL_BUFFER (entryp->d_name, buffer,
+ off, strlen (entryp->d_name));
+ GF_CHANGELOG_FILL_BUFFER ("\n", buffer, off, 1);
+
+ if (gf_changelog_write (tracker_fd, buffer, off) != off) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "error writing changelog filename"
+ " to tracker file");
+ break;
+ }
+ off = 0;
+ }
+
+ GF_FREE (entryp);
+
+ if (!result) {
+ if (gf_lseek (tracker_fd, 0, SEEK_SET) != -1)
+ return nr_entries;
+ }
+ out:
+ return -1;
+}
+
+/**
+ * @API
+ * gf_changelog_register() - register a client for updates.
+ */
+int
+gf_changelog_register (char *brick_path, char *scratch_dir,
+ char *log_file, int log_level, int max_reconnects)
+{
+ int i = 0;
+ int ret = -1;
+ int errn = 0;
+ xlator_t *this = NULL;
+ gf_changelog_t *gfc = NULL;
+
+ this = THIS;
+ if (!this->ctx)
+ goto out;
+
+ errno = ENOMEM;
+
+ gfc = GF_CALLOC (1, sizeof (*gfc),
+ gf_changelog_mt_libgfchangelog_t);
+ if (!gfc)
+ goto out;
+
+ gfc->this = this;
+
+ gfc->gfc_dir = NULL;
+ gfc->gfc_fd = gfc->gfc_sockfd = -1;
+
+ gfc->gfc_working_dir = realpath (scratch_dir, NULL);
+ if (!gfc->gfc_working_dir) {
+ errn = errno;
+ goto cleanup;
+ }
+
+ ret = gf_changelog_open_dirs (gfc);
+ if (ret) {
+ errn = errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not create entries in scratch dir");
+ goto cleanup;
+ }
+
+ /* passing ident as NULL means to use default ident for syslog */
+ if (gf_log_init (this->ctx, log_file, NULL))
+ goto cleanup;
+
+ gf_log_set_loglevel ((log_level == -1) ? GF_LOG_INFO :
+ log_level);
+
+ gfc->gfc_connretries = (max_reconnects <= 0) ? 1 : max_reconnects;
+ (void) strncpy (gfc->gfc_brickpath, brick_path, PATH_MAX);
+
+ ret = gf_changelog_notification_init (this, gfc);
+ if (ret) {
+ errn = errno;
+ goto cleanup;
+ }
+
+ ret = gf_thread_create (&gfc->gfc_changelog_processor,
+ NULL, gf_changelog_process, gfc);
+ if (ret) {
+ errn = errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "error creating changelog processor thread"
+ " new changes won't be recorded!!!");
+ goto cleanup;
+ }
+
+ for (; i < 256; i++) {
+ gfc->rfc3986[i] =
+ (isalnum(i) || i == '~' ||
+ i == '-' || i == '.' || i == '_') ? i : 0;
+ }
+
+ ret = 0;
+ this->private = gfc;
+
+ goto out;
+
+ cleanup:
+ gf_changelog_cleanup (gfc);
+ GF_FREE (gfc);
+ this->private = NULL;
+ errno = errn;
+
+ out:
+ return ret;
+}
diff --git a/xlators/features/changelog/src/Makefile.am b/xlators/features/changelog/src/Makefile.am
new file mode 100644
index 000000000..e85031ad4
--- /dev/null
+++ b/xlators/features/changelog/src/Makefile.am
@@ -0,0 +1,19 @@
+xlator_LTLIBRARIES = changelog.la
+
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+noinst_HEADERS = changelog-helpers.h changelog-mem-types.h changelog-rt.h \
+ changelog-misc.h changelog-encoders.h changelog-notifier.h
+
+changelog_la_LDFLAGS = -module -avoidversion
+
+changelog_la_SOURCES = changelog.c changelog-rt.c changelog-helpers.c \
+ changelog-encoders.c changelog-notifier.c
+changelog_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src -fPIC -D_FILE_OFFSET_BITS=64 \
+ -D_GNU_SOURCE -D$(GF_HOST_OS) -shared -nostartfiles -DDATADIR=\"$(localstatedir)\"
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/changelog/src/changelog-encoders.c b/xlators/features/changelog/src/changelog-encoders.c
new file mode 100644
index 000000000..553eec85c
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-encoders.c
@@ -0,0 +1,176 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "changelog-encoders.h"
+
+size_t
+entry_fn (void *data, char *buffer, gf_boolean_t encode)
+{
+ char *tmpbuf = NULL;
+ size_t bufsz = 0;
+ struct changelog_entry_fields *ce = NULL;
+
+ ce = (struct changelog_entry_fields *) data;
+
+ if (encode) {
+ tmpbuf = uuid_utoa (ce->cef_uuid);
+ CHANGELOG_FILL_BUFFER (buffer, bufsz, tmpbuf, strlen (tmpbuf));
+ } else {
+ CHANGELOG_FILL_BUFFER (buffer, bufsz,
+ ce->cef_uuid, sizeof (uuid_t));
+ }
+
+ CHANGELOG_FILL_BUFFER (buffer, bufsz, "/", 1);
+ CHANGELOG_FILL_BUFFER (buffer, bufsz,
+ ce->cef_bname, strlen (ce->cef_bname));
+ return bufsz;
+}
+
+size_t
+fop_fn (void *data, char *buffer, gf_boolean_t encode)
+{
+ char buf[10] = {0,};
+ size_t bufsz = 0;
+ glusterfs_fop_t fop = 0;
+
+ fop = *(glusterfs_fop_t *) data;
+
+ if (encode) {
+ (void) snprintf (buf, sizeof (buf), "%d", fop);
+ CHANGELOG_FILL_BUFFER (buffer, bufsz, buf, strlen (buf));
+ } else
+ CHANGELOG_FILL_BUFFER (buffer, bufsz, &fop, sizeof (fop));
+
+ return bufsz;
+}
+
+void
+entry_free_fn (void *data)
+{
+ changelog_opt_t *co = data;
+
+ if (!co)
+ return;
+
+ GF_FREE (co->co_entry.cef_bname);
+}
+
+/**
+ * try to write all data in one shot
+ */
+
+static inline void
+changelog_encode_write_xtra (changelog_log_data_t *cld,
+ char *buffer, size_t *off, gf_boolean_t encode)
+{
+ int i = 0;
+ size_t offset = 0;
+ void *data = NULL;
+ changelog_opt_t *co = NULL;
+
+ offset = *off;
+
+ co = (changelog_opt_t *) cld->cld_ptr;
+
+ for (; i < cld->cld_xtra_records; i++, co++) {
+ CHANGELOG_FILL_BUFFER (buffer, offset, "\0", 1);
+
+ switch (co->co_type) {
+ case CHANGELOG_OPT_REC_FOP:
+ data = &co->co_fop;
+ break;
+ case CHANGELOG_OPT_REC_ENTRY:
+ data = &co->co_entry;
+ break;
+ }
+
+ if (co->co_convert)
+ offset += co->co_convert (data,
+ buffer + offset, encode);
+ else /* no coversion: write it out as it is */
+ CHANGELOG_FILL_BUFFER (buffer, offset,
+ data, co->co_len);
+ }
+
+ *off = offset;
+}
+
+int
+changelog_encode_ascii (xlator_t *this, changelog_log_data_t *cld)
+{
+ size_t off = 0;
+ size_t gfid_len = 0;
+ char *gfid_str = NULL;
+ char *buffer = NULL;
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+
+ gfid_str = uuid_utoa (cld->cld_gfid);
+ gfid_len = strlen (gfid_str);
+
+ /* extra bytes for decorations */
+ buffer = alloca (gfid_len + cld->cld_ptr_len + 10);
+ CHANGELOG_STORE_ASCII (priv, buffer,
+ off, gfid_str, gfid_len, cld);
+
+ if (cld->cld_xtra_records)
+ changelog_encode_write_xtra (cld, buffer, &off, _gf_true);
+
+ CHANGELOG_FILL_BUFFER (buffer, off, "\0", 1);
+
+ return changelog_write_change (priv, buffer, off);
+}
+
+int
+changelog_encode_binary (xlator_t *this, changelog_log_data_t *cld)
+{
+ size_t off = 0;
+ char *buffer = NULL;
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+
+ /* extra bytes for decorations */
+ buffer = alloca (sizeof (uuid_t) + cld->cld_ptr_len + 10);
+ CHANGELOG_STORE_BINARY (priv, buffer, off, cld->cld_gfid, cld);
+
+ if (cld->cld_xtra_records)
+ changelog_encode_write_xtra (cld, buffer, &off, _gf_false);
+
+ CHANGELOG_FILL_BUFFER (buffer, off, "\0", 1);
+
+ return changelog_write_change (priv, buffer, off);
+}
+
+static struct changelog_encoder
+cb_encoder[] = {
+ [CHANGELOG_ENCODE_BINARY] =
+ {
+ .encoder = CHANGELOG_ENCODE_BINARY,
+ .encode = changelog_encode_binary,
+ },
+ [CHANGELOG_ENCODE_ASCII] =
+ {
+ .encoder = CHANGELOG_ENCODE_ASCII,
+ .encode = changelog_encode_ascii,
+ },
+};
+
+void
+changelog_encode_change( changelog_priv_t * priv)
+{
+ priv->ce = &cb_encoder[priv->encode_mode];
+}
diff --git a/xlators/features/changelog/src/changelog-encoders.h b/xlators/features/changelog/src/changelog-encoders.h
new file mode 100644
index 000000000..a3efbee05
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-encoders.h
@@ -0,0 +1,46 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CHANGELOG_ENCODERS_H
+#define _CHANGELOG_ENCODERS_H
+
+#include "xlator.h"
+#include "defaults.h"
+
+#include "changelog-helpers.h"
+
+#define CHANGELOG_STORE_ASCII(priv, buf, off, gfid, gfid_len, cld) do { \
+ CHANGELOG_FILL_BUFFER (buffer, off, \
+ priv->maps[cld->cld_type], 1); \
+ CHANGELOG_FILL_BUFFER (buffer, \
+ off, gfid, gfid_len); \
+ } while (0)
+
+#define CHANGELOG_STORE_BINARY(priv, buf, off, gfid, cld) do { \
+ CHANGELOG_FILL_BUFFER (buffer, off, \
+ priv->maps[cld->cld_type], 1); \
+ CHANGELOG_FILL_BUFFER (buffer, \
+ off, gfid, sizeof (uuid_t)); \
+ } while (0)
+
+size_t
+entry_fn (void *data, char *buffer, gf_boolean_t encode);
+size_t
+fop_fn (void *data, char *buffer, gf_boolean_t encode);
+void
+entry_free_fn (void *data);
+int
+changelog_encode_binary (xlator_t *, changelog_log_data_t *);
+int
+changelog_encode_ascii (xlator_t *, changelog_log_data_t *);
+void
+changelog_encode_change(changelog_priv_t *);
+
+#endif /* _CHANGELOG_ENCODERS_H */
diff --git a/xlators/features/changelog/src/changelog-helpers.c b/xlators/features/changelog/src/changelog-helpers.c
new file mode 100644
index 000000000..7ab0091b5
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-helpers.c
@@ -0,0 +1,693 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include "defaults.h"
+#include "logging.h"
+#include "iobuf.h"
+
+#include "changelog-helpers.h"
+#include "changelog-mem-types.h"
+
+#include "changelog-encoders.h"
+#include <pthread.h>
+
+void
+changelog_thread_cleanup (xlator_t *this, pthread_t thr_id)
+{
+ int ret = 0;
+ void *retval = NULL;
+
+ /* send a cancel request to the thread */
+ ret = pthread_cancel (thr_id);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not cancel thread (reason: %s)",
+ strerror (errno));
+ goto out;
+ }
+
+ ret = pthread_join (thr_id, &retval);
+ if (ret || (retval != PTHREAD_CANCELED)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "cancel request not adhered as expected"
+ " (reason: %s)", strerror (errno));
+ }
+
+ out:
+ return;
+}
+
+inline void *
+changelog_get_usable_buffer (changelog_local_t *local)
+{
+ changelog_log_data_t *cld = NULL;
+
+ cld = &local->cld;
+ if (!cld->cld_iobuf)
+ return NULL;
+
+ return cld->cld_iobuf->ptr;
+}
+
+inline void
+changelog_set_usable_record_and_length (changelog_local_t *local,
+ size_t len, int xr)
+{
+ changelog_log_data_t *cld = NULL;
+
+ cld = &local->cld;
+
+ cld->cld_ptr_len = len;
+ cld->cld_xtra_records = xr;
+}
+
+void
+changelog_local_cleanup (xlator_t *xl, changelog_local_t *local)
+{
+ int i = 0;
+ changelog_opt_t *co = NULL;
+ changelog_log_data_t *cld = NULL;
+
+ if (!local)
+ return;
+
+ cld = &local->cld;
+
+ /* cleanup dynamic allocation for extra records */
+ if (cld->cld_xtra_records) {
+ co = (changelog_opt_t *) cld->cld_ptr;
+ for (; i < cld->cld_xtra_records; i++, co++)
+ if (co->co_free)
+ co->co_free (co);
+ }
+
+ CHANGELOG_IOBUF_UNREF (cld->cld_iobuf);
+
+ if (local->inode)
+ inode_unref (local->inode);
+
+ mem_put (local);
+}
+
+inline int
+changelog_write (int fd, char *buffer, size_t len)
+{
+ ssize_t size = 0;
+ size_t writen = 0;
+
+ while (writen < len) {
+ size = write (fd,
+ buffer + writen, len - writen);
+ if (size <= 0)
+ break;
+
+ writen += size;
+ }
+
+ return (writen != len);
+}
+
+static int
+changelog_rollover_changelog (xlator_t *this,
+ changelog_priv_t *priv, unsigned long ts)
+{
+ int ret = -1;
+ int notify = 0;
+ char *bname = NULL;
+ char ofile[PATH_MAX] = {0,};
+ char nfile[PATH_MAX] = {0,};
+
+ if (priv->changelog_fd != -1) {
+ close (priv->changelog_fd);
+ priv->changelog_fd = -1;
+ }
+
+ (void) snprintf (ofile, PATH_MAX,
+ "%s/"CHANGELOG_FILE_NAME, priv->changelog_dir);
+ (void) snprintf (nfile, PATH_MAX,
+ "%s/"CHANGELOG_FILE_NAME".%lu",
+ priv->changelog_dir, ts);
+
+ ret = rename (ofile, nfile);
+ if (!ret)
+ notify = 1;
+
+ if (ret && (errno == ENOENT)) {
+ ret = 0;
+ }
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "error renaming %s -> %s (reason %s)",
+ ofile, nfile, strerror (errno));
+ }
+
+ if (notify) {
+ bname = basename (nfile);
+ gf_log (this->name, GF_LOG_DEBUG, "notifying: %s", bname);
+ ret = changelog_write (priv->wfd, bname, strlen (bname) + 1);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to send file name to notify thread"
+ " (reason: %s)", strerror (errno));
+ }
+ }
+
+ return ret;
+}
+
+int
+changelog_open (xlator_t *this,
+ changelog_priv_t *priv)
+{
+ int fd = 0;
+ int ret = -1;
+ int flags = 0;
+ char buffer[1024] = {0,};
+ char changelog_path[PATH_MAX] = {0,};
+
+ (void) snprintf (changelog_path, PATH_MAX,
+ "%s/"CHANGELOG_FILE_NAME,
+ priv->changelog_dir);
+
+ flags |= (O_CREAT | O_RDWR);
+ if (priv->fsync_interval == 0)
+ flags |= O_SYNC;
+
+ fd = open (changelog_path, flags,
+ S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
+ if (fd < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "unable to open/create changelog file %s"
+ " (reason: %s). change-logging will be"
+ " inactive", changelog_path, strerror (errno));
+ goto out;
+ }
+
+ priv->changelog_fd = fd;
+
+ (void) snprintf (buffer, 1024, CHANGELOG_HEADER,
+ CHANGELOG_VERSION_MAJOR,
+ CHANGELOG_VERSION_MINOR,
+ priv->ce->encoder);
+ ret = changelog_write_change (priv, buffer, strlen (buffer));
+ if (ret) {
+ close (priv->changelog_fd);
+ priv->changelog_fd = -1;
+ goto out;
+ }
+
+ ret = 0;
+
+ out:
+ return ret;
+}
+
+int
+changelog_start_next_change (xlator_t *this,
+ changelog_priv_t *priv,
+ unsigned long ts, gf_boolean_t finale)
+{
+ int ret = -1;
+
+ ret = changelog_rollover_changelog (this, priv, ts);
+
+ if (!ret && !finale)
+ ret = changelog_open (this, priv);
+
+ return ret;
+}
+
+/**
+ * return the length of entry
+ */
+inline size_t
+changelog_entry_length ()
+{
+ return sizeof (changelog_log_data_t);
+}
+
+int
+changelog_fill_rollover_data (changelog_log_data_t *cld, gf_boolean_t is_last)
+{
+ struct timeval tv = {0,};
+
+ cld->cld_type = CHANGELOG_TYPE_ROLLOVER;
+
+ if (gettimeofday (&tv, NULL))
+ return -1;
+
+ cld->cld_roll_time = (unsigned long) tv.tv_sec;
+ cld->cld_finale = is_last;
+ return 0;
+}
+
+int
+changelog_write_change (changelog_priv_t *priv, char *buffer, size_t len)
+{
+ return changelog_write (priv->changelog_fd, buffer, len);
+}
+
+inline int
+changelog_handle_change (xlator_t *this,
+ changelog_priv_t *priv, changelog_log_data_t *cld)
+{
+ int ret = 0;
+
+ if (CHANGELOG_TYPE_IS_ROLLOVER (cld->cld_type)) {
+ changelog_encode_change(priv);
+ ret = changelog_start_next_change (this, priv,
+ cld->cld_roll_time,
+ cld->cld_finale);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Problem rolling over changelog(s)");
+ goto out;
+ }
+
+ /**
+ * case when there is reconfigure done (disabling changelog) and there
+ * are still fops that have updates in prgress.
+ */
+ if (priv->changelog_fd == -1)
+ return 0;
+
+ if (CHANGELOG_TYPE_IS_FSYNC (cld->cld_type)) {
+ ret = fsync (priv->changelog_fd);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "fsync failed (reason: %s)",
+ strerror (errno));
+ }
+ goto out;
+ }
+
+ ret = priv->ce->encode (this, cld);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "error writing changelog to disk");
+ }
+
+ out:
+ return ret;
+}
+
+changelog_local_t *
+changelog_local_init (xlator_t *this, inode_t *inode,
+ uuid_t gfid, int xtra_records,
+ gf_boolean_t update_flag)
+{
+ changelog_local_t *local = NULL;
+ struct iobuf *iobuf = NULL;
+
+ /**
+ * We relax the presence of inode if @update_flag is true.
+ * The caller (implmentation of the fop) needs to be careful to
+ * not blindly use local->inode.
+ */
+ if (!update_flag && !inode) {
+ gf_log_callingfn (this->name, GF_LOG_WARNING,
+ "inode needed for version checking !!!");
+ goto out;
+ }
+
+ if (xtra_records) {
+ iobuf = iobuf_get2 (this->ctx->iobuf_pool,
+ xtra_records * CHANGELOG_OPT_RECORD_LEN);
+ if (!iobuf)
+ goto out;
+ }
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ CHANGELOG_IOBUF_UNREF (iobuf);
+ goto out;
+ }
+
+ local->update_no_check = update_flag;
+
+ uuid_copy (local->cld.cld_gfid, gfid);
+
+ local->cld.cld_iobuf = iobuf;
+ local->cld.cld_xtra_records = 0; /* set by the caller */
+
+ if (inode)
+ local->inode = inode_ref (inode);
+
+ out:
+ return local;
+}
+
+int
+changelog_forget (xlator_t *this, inode_t *inode)
+{
+ uint64_t ctx_addr = 0;
+ changelog_inode_ctx_t *ctx = NULL;
+
+ inode_ctx_del (inode, this, &ctx_addr);
+ if (!ctx_addr)
+ return 0;
+
+ ctx = (changelog_inode_ctx_t *) (long) ctx_addr;
+ GF_FREE (ctx);
+
+ return 0;
+}
+
+int
+changelog_inject_single_event (xlator_t *this,
+ changelog_priv_t *priv,
+ changelog_log_data_t *cld)
+{
+ return priv->cd.dispatchfn (this, priv, priv->cd.cd_data, cld, NULL);
+}
+
+/**
+ * TODO: these threads have many thing in common (wake up after
+ * a certain time etc..). move them into separate routine.
+ */
+void *
+changelog_rollover (void *data)
+{
+ int ret = 0;
+ xlator_t *this = NULL;
+ struct timeval tv = {0,};
+ changelog_log_data_t cld = {0,};
+ changelog_time_slice_t *slice = NULL;
+ changelog_priv_t *priv = data;
+
+ this = priv->cr.this;
+ slice = &priv->slice;
+
+ while (1) {
+ tv.tv_sec = priv->rollover_time;
+ tv.tv_usec = 0;
+
+ ret = select (0, NULL, NULL, NULL, &tv);
+ if (ret)
+ continue;
+
+ ret = changelog_fill_rollover_data (&cld, _gf_false);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to fill rollover data");
+ continue;
+ }
+
+ LOCK (&priv->lock);
+ {
+ ret = changelog_inject_single_event (this, priv, &cld);
+ if (!ret)
+ SLICE_VERSION_UPDATE (slice);
+ }
+ UNLOCK (&priv->lock);
+ }
+
+ return NULL;
+}
+
+void *
+changelog_fsync_thread (void *data)
+{
+ int ret = 0;
+ xlator_t *this = NULL;
+ struct timeval tv = {0,};
+ changelog_log_data_t cld = {0,};
+ changelog_priv_t *priv = data;
+
+ this = priv->cf.this;
+ cld.cld_type = CHANGELOG_TYPE_FSYNC;
+
+ while (1) {
+ tv.tv_sec = priv->fsync_interval;
+ tv.tv_usec = 0;
+
+ ret = select (0, NULL, NULL, NULL, &tv);
+ if (ret)
+ continue;
+
+ ret = changelog_inject_single_event (this, priv, &cld);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to inject fsync event");
+ }
+
+ return NULL;
+}
+
+/* macros for inode/changelog version checks */
+
+#define INODE_VERSION_UPDATE(priv, inode, iver, slice, type) do { \
+ LOCK (&inode->lock); \
+ { \
+ LOCK (&priv->lock); \
+ { \
+ *iver = slice->changelog_version[type]; \
+ } \
+ UNLOCK (&priv->lock); \
+ } \
+ UNLOCK (&inode->lock); \
+ } while (0)
+
+#define INODE_VERSION_EQUALS_SLICE(priv, ver, slice, type, upd) do { \
+ LOCK (&priv->lock); \
+ { \
+ upd = (ver == slice->changelog_version[type]) \
+ ? _gf_false : _gf_true; \
+ } \
+ UNLOCK (&priv->lock); \
+ } while (0)
+
+static int
+__changelog_inode_ctx_set (xlator_t *this,
+ inode_t *inode, changelog_inode_ctx_t *ctx)
+{
+ uint64_t ctx_addr = (uint64_t) ctx;
+ return __inode_ctx_set (inode, this, &ctx_addr);
+}
+
+/**
+ * one shot routine to get the address and the value of a inode version
+ * for a particular type.
+ */
+static changelog_inode_ctx_t *
+__changelog_inode_ctx_get (xlator_t *this,
+ inode_t *inode, unsigned long **iver,
+ unsigned long *version, changelog_log_type type)
+{
+ int ret = 0;
+ uint64_t ctx_addr = 0;
+ changelog_inode_ctx_t *ctx = NULL;
+
+ ret = __inode_ctx_get (inode, this, &ctx_addr);
+ if (ret < 0)
+ ctx_addr = 0;
+ if (ctx_addr != 0) {
+ ctx = (changelog_inode_ctx_t *) (long)ctx_addr;
+ goto out;
+ }
+
+ ctx = GF_CALLOC (1, sizeof (*ctx), gf_changelog_mt_inode_ctx_t);
+ if (!ctx)
+ goto out;
+
+ ret = __changelog_inode_ctx_set (this, inode, ctx);
+ if (ret) {
+ GF_FREE (ctx);
+ ctx = NULL;
+ }
+
+ out:
+ if (ctx && iver && version) {
+ *iver = CHANGELOG_INODE_VERSION_TYPE (ctx, type);
+ *version = **iver;
+ }
+
+ return ctx;
+}
+
+static changelog_inode_ctx_t *
+changelog_inode_ctx_get (xlator_t *this,
+ inode_t *inode, unsigned long **iver,
+ unsigned long *version, changelog_log_type type)
+{
+ changelog_inode_ctx_t *ctx = NULL;
+
+ LOCK (&inode->lock);
+ {
+ ctx = __changelog_inode_ctx_get (this,
+ inode, iver, version, type);
+ }
+ UNLOCK (&inode->lock);
+
+ return ctx;
+}
+
+/**
+ * This is the main update routine. Locking has been made granular so as to
+ * maximize parallelism of fops - I'll try to explain it below using execution
+ * timelines.
+ *
+ * Basically, the contention is between multiple execution threads of this
+ * routine and the roll-over thread. So, instead of having a big lock, we hold
+ * granular locks: inode->lock and priv->lock. Now I'll explain what happens
+ * when there is an update and a roll-over at just about the same time.
+ * NOTE:
+ * - the dispatcher itself synchronizes updates via it's own lock
+ * - the slice version in incremented by the roll-over thread
+ *
+ * Case 1: When the rollover thread wins before the inode version can be
+ * compared with the slice version.
+ *
+ * [updater] | [rollover]
+ * |
+ * | <SLICE: 1, 1, 1>
+ * <changelog_update> |
+ * <changelog_inode_ctx_get> |
+ * <CTX: 1, 1, 1> |
+ * | <dispatch-rollover-event>
+ * | LOCK (&priv->lock)
+ * | <SLICE_VERSION_UPDATE>
+ * | <SLICE: 2, 2, 2>
+ * | UNLOCK (&priv->lock)
+ * |
+ * LOCK (&priv->lock) |
+ * <INODE_VERSION_EQUALS_SLICE> |
+ * I: 1 <-> S: 2 |
+ * update: true |
+ * UNLOCK (&priv->lock) |
+ * |
+ * <if update == true> |
+ * <dispath-update-event> |
+ * <INODE_VERSION_UPDATE> |
+ * LOCK (&inode->lock) |
+ * LOCK (&priv->lock) |
+ * <CTX: 2, 1, 1> |
+ * UNLOCK (&priv->lock) |
+ * UNLOCK (&inode->lock) |
+ *
+ * Therefore, the change gets recorded in the next change (no lost change). If
+ * the slice version was ahead of the inode version (say I:1, S: 2), then
+ * anyway the comparison would result in a update (I: 1, S: 3).
+ *
+ * If the rollover time is too less, then there is another contention when the
+ * updater tries to bring up inode version to the slice version (this is also
+ * the case when the roll-over thread wakes up during INODE_VERSION_UPDATE.
+ *
+ * <CTX: 1, 1, 1> | <SLICE: 2, 2, 2>
+ * |
+ * |
+ * <dispath-update-event> |
+ * <INODE_VERSION_UPDATE> |
+ * LOCK (&inode->lock) |
+ * LOCK (&priv->lock) |
+ * <CTX: 2, 1, 1> |
+ * UNLOCK (&priv->lock) |
+ * UNLOCK (&inode->lock) |
+ * | <dispatch-rollover-event>
+ * | LOCK (&priv->lock)
+ * | <SLICE_VERSION_UPDATE>
+ * | <SLICE: 3, 3, 3>
+ * | UNLOCK (&priv->lock)
+ *
+ *
+ * Case 2: When the fop thread wins
+ *
+ * [updater] | [rollover]
+ * |
+ * | <SLICE: 1, 1, 1>
+ * <changelog_update> |
+ * <changelog_inode_ctx_get> |
+ * <CTX: 0, 0, 0> |
+ * |
+ * LOCK (&priv->lock) |
+ * <INODE_VERSION_EQUALS_SLICE> |
+ * I: 0 <-> S: 1 |
+ * update: true |
+ * UNLOCK (&priv->lock) |
+ * | <dispatch-rollover-event>
+ * | LOCK (&priv->lock)
+ * | <SLICE_VERSION_UPDATE>
+ * | <SLICE: 2, 2, 2>
+ * | UNLOCK (&priv->lock)
+ * <if update == true> |
+ * <dispath-update-event> |
+ * <INODE_VERSION_UPDATE> |
+ * LOCK (&inode->lock) |
+ * LOCK (&priv->lock) |
+ * <CTX: 2, 0, 0> |
+ * UNLOCK (&priv->lock) |
+ * UNLOCK (&inode->lock) |
+ *
+ * Here again, if the inode version was equal to the slice version (I: 1, S: 1)
+ * then there is no need to record an update (as the equality of the two version
+ * signifies an update was recorded in the current time slice).
+ */
+inline void
+changelog_update (xlator_t *this, changelog_priv_t *priv,
+ changelog_local_t *local, changelog_log_type type)
+{
+ int ret = 0;
+ unsigned long *iver = NULL;
+ unsigned long version = 0;
+ inode_t *inode = NULL;
+ changelog_time_slice_t *slice = NULL;
+ changelog_inode_ctx_t *ctx = NULL;
+ changelog_log_data_t *cld_0 = NULL;
+ changelog_log_data_t *cld_1 = NULL;
+ changelog_local_t *next_local = NULL;
+ gf_boolean_t need_upd = _gf_true;
+
+ slice = &priv->slice;
+
+ /**
+ * for fops that do not require inode version checking
+ */
+ if (local->update_no_check)
+ goto update;
+
+ inode = local->inode;
+
+ ctx = changelog_inode_ctx_get (this,
+ inode, &iver, &version, type);
+ if (!ctx)
+ goto update;
+
+ INODE_VERSION_EQUALS_SLICE (priv, version, slice, type, need_upd);
+
+ update:
+ if (need_upd) {
+ cld_0 = &local->cld;
+ cld_0->cld_type = type;
+
+ if ( (next_local = local->prev_entry) != NULL ) {
+ cld_1 = &next_local->cld;
+ cld_1->cld_type = type;
+ }
+
+ ret = priv->cd.dispatchfn (this, priv,
+ priv->cd.cd_data, cld_0, cld_1);
+
+ /**
+ * update after the dispatcher has successfully done
+ * it's job.
+ */
+ if (!local->update_no_check && iver && !ret)
+ INODE_VERSION_UPDATE (priv, inode, iver, slice, type);
+ }
+
+ return;
+}
diff --git a/xlators/features/changelog/src/changelog-helpers.h b/xlators/features/changelog/src/changelog-helpers.h
new file mode 100644
index 000000000..ad79636b0
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-helpers.h
@@ -0,0 +1,395 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CHANGELOG_HELPERS_H
+#define _CHANGELOG_HELPERS_H
+
+#include "locking.h"
+#include "timer.h"
+#include "pthread.h"
+#include "iobuf.h"
+
+#include "changelog-misc.h"
+
+/**
+ * the changelog entry
+ */
+typedef struct changelog_log_data {
+ /* rollover related */
+ unsigned long cld_roll_time;
+
+ /* reopen changelog? */
+ gf_boolean_t cld_finale;
+
+ changelog_log_type cld_type;
+
+ /**
+ * sincd gfid is _always_ a necessity, it's not a part
+ * of the iobuf. by doing this we do not add any overhead
+ * for data and metadata related fops.
+ */
+ uuid_t cld_gfid;
+
+ /**
+ * iobufs are used for optionals records: pargfid, path,
+ * write offsets etc.. It's the fop implementers job
+ * to allocate (iobuf_get() in the fop) and get unref'ed
+ * in the callback (CHANGELOG_STACK_UNWIND).
+ */
+ struct iobuf *cld_iobuf;
+
+#define cld_ptr cld_iobuf->ptr
+
+ /**
+ * after allocation you can point this to the length of
+ * usable data, but make sure it does not exceed the
+ * the size of the requested iobuf.
+ */
+ size_t cld_iobuf_len;
+
+#define cld_ptr_len cld_iobuf_len
+
+ /**
+ * number of optional records
+ */
+ int cld_xtra_records;
+} changelog_log_data_t;
+
+/**
+ * holder for dispatch function and private data
+ */
+
+typedef struct changelog_priv changelog_priv_t;
+
+typedef struct changelog_dispatcher {
+ void *cd_data;
+ int (*dispatchfn) (xlator_t *, changelog_priv_t *, void *,
+ changelog_log_data_t *, changelog_log_data_t *);
+} changelog_dispatcher_t;
+
+struct changelog_bootstrap {
+ changelog_mode_t mode;
+ int (*ctor) (xlator_t *, changelog_dispatcher_t *);
+ int (*dtor) (xlator_t *, changelog_dispatcher_t *);
+};
+
+struct changelog_encoder {
+ changelog_encoder_t encoder;
+ int (*encode) (xlator_t *, changelog_log_data_t *);
+};
+
+
+/* xlator private */
+
+typedef struct changelog_time_slice {
+ /**
+ * just in case we need nanosecond granularity some day.
+ * field is unused as of now (maybe we'd need it later).
+ */
+ struct timeval tv_start;
+
+ /**
+ * version of changelog file, incremented each time changes
+ * rollover.
+ */
+ unsigned long changelog_version[CHANGELOG_MAX_TYPE];
+} changelog_time_slice_t;
+
+typedef struct changelog_rollover {
+ /* rollover thread */
+ pthread_t rollover_th;
+
+ xlator_t *this;
+} changelog_rollover_t;
+
+typedef struct changelog_fsync {
+ /* fsync() thread */
+ pthread_t fsync_th;
+
+ xlator_t *this;
+} changelog_fsync_t;
+
+# define CHANGELOG_MAX_CLIENTS 5
+typedef struct changelog_notify {
+ /* reader end of the pipe */
+ int rfd;
+
+ /* notifier thread */
+ pthread_t notify_th;
+
+ /* unique socket path */
+ char sockpath[PATH_MAX];
+
+ int socket_fd;
+
+ /**
+ * simple array of accept()'ed fds. Not scalable at all
+ * for large number of clients, but it's okay as we have
+ * a ahrd limit in this version (@CHANGELOG_MAX_CLIENTS).
+ */
+ int client_fd[CHANGELOG_MAX_CLIENTS];
+
+ xlator_t *this;
+} changelog_notify_t;
+
+struct changelog_priv {
+ gf_boolean_t active;
+
+ /* to generate unique socket file per brick */
+ char *changelog_brick;
+
+ /* logging directory */
+ char *changelog_dir;
+
+ /* one file for all changelog types */
+ int changelog_fd;
+
+ gf_lock_t lock;
+
+ /* writen end of the pipe */
+ int wfd;
+
+ /* rollover time */
+ int32_t rollover_time;
+
+ /* fsync() interval */
+ int32_t fsync_interval;
+
+ /* changelog type maps */
+ const char *maps[CHANGELOG_MAX_TYPE];
+
+ /* time slicer */
+ changelog_time_slice_t slice;
+
+ /* context of the updater */
+ changelog_dispatcher_t cd;
+
+ /* context of the rollover thread */
+ changelog_rollover_t cr;
+
+ /* context of fsync thread */
+ changelog_fsync_t cf;
+
+ /* context of the notifier thread */
+ changelog_notify_t cn;
+
+ /* operation mode */
+ changelog_mode_t op_mode;
+
+ /* bootstrap routine for 'current' logger */
+ struct changelog_bootstrap *cb;
+
+ /* encoder mode */
+ changelog_encoder_t encode_mode;
+
+ /* encoder */
+ struct changelog_encoder *ce;
+};
+
+struct changelog_local {
+ inode_t *inode;
+ gf_boolean_t update_no_check;
+
+ changelog_log_data_t cld;
+
+ /**
+ * ->prev_entry is used in cases when there needs to be
+ * additional changelog entry for the parent (eg. rename)
+ * It's analogous to ->next in single linked list world,
+ * but we call it as ->prev_entry... ha ha ha
+ */
+ struct changelog_local *prev_entry;
+};
+
+typedef struct changelog_local changelog_local_t;
+
+/* inode version is stored in inode ctx */
+typedef struct changelog_inode_ctx {
+ unsigned long iversion[CHANGELOG_MAX_TYPE];
+} changelog_inode_ctx_t;
+
+#define CHANGELOG_INODE_VERSION_TYPE(ctx, type) &(ctx->iversion[type])
+
+/**
+ * Optional Records:
+ * fops that need to save additional information request a array of
+ * @changelog_opt_t struct. The array is allocated via @iobufs.
+ */
+typedef enum {
+ CHANGELOG_OPT_REC_FOP,
+ CHANGELOG_OPT_REC_ENTRY,
+} changelog_optional_rec_type_t;
+
+struct changelog_entry_fields {
+ uuid_t cef_uuid;
+ char *cef_bname;
+};
+
+typedef struct {
+ /**
+ * @co_covert can be used to do post-processing of the record before
+ * it's persisted to the CHANGELOG. If this is NULL, then the record
+ * is persisted as per it's in memory format.
+ */
+ size_t (*co_convert) (void *data, char *buffer, gf_boolean_t encode);
+
+ /* release routines */
+ void (*co_free) (void *data);
+
+ /* type of the field */
+ changelog_optional_rec_type_t co_type;
+
+ /**
+ * sizeof of the 'valid' field in the union. This field is not used if
+ * @co_convert is specified.
+ */
+ size_t co_len;
+
+ union {
+ glusterfs_fop_t co_fop;
+ struct changelog_entry_fields co_entry;
+ };
+} changelog_opt_t;
+
+#define CHANGELOG_OPT_RECORD_LEN sizeof (changelog_opt_t)
+
+/**
+ * helpers routines
+ */
+
+void
+changelog_thread_cleanup (xlator_t *this, pthread_t thr_id);
+inline void *
+changelog_get_usable_buffer (changelog_local_t *local);
+inline void
+changelog_set_usable_record_and_length (changelog_local_t *local,
+ size_t len, int xr);
+void
+changelog_local_cleanup (xlator_t *xl, changelog_local_t *local);
+changelog_local_t *
+changelog_local_init (xlator_t *this, inode_t *inode, uuid_t gfid,
+ int xtra_records, gf_boolean_t update_flag);
+int
+changelog_start_next_change (xlator_t *this,
+ changelog_priv_t *priv,
+ unsigned long ts, gf_boolean_t finale);
+int
+changelog_open (xlator_t *this, changelog_priv_t *priv);
+int
+changelog_fill_rollover_data (changelog_log_data_t *cld, gf_boolean_t is_last);
+int
+changelog_inject_single_event (xlator_t *this,
+ changelog_priv_t *priv,
+ changelog_log_data_t *cld);
+inline size_t
+changelog_entry_length ();
+inline int
+changelog_write (int fd, char *buffer, size_t len);
+int
+changelog_write_change (changelog_priv_t *priv, char *buffer, size_t len);
+inline int
+changelog_handle_change (xlator_t *this,
+ changelog_priv_t *priv, changelog_log_data_t *cld);
+inline void
+changelog_update (xlator_t *this, changelog_priv_t *priv,
+ changelog_local_t *local, changelog_log_type type);
+void *
+changelog_rollover (void *data);
+void *
+changelog_fsync_thread (void *data);
+int
+changelog_forget (xlator_t *this, inode_t *inode);
+
+/* macros */
+
+#define CHANGELOG_STACK_UNWIND(fop, frame, params ...) do { \
+ changelog_local_t *__local = NULL; \
+ xlator_t *__xl = NULL; \
+ if (frame) { \
+ __local = frame->local; \
+ __xl = frame->this; \
+ frame->local = NULL; \
+ } \
+ STACK_UNWIND_STRICT (fop, frame, params); \
+ changelog_local_cleanup (__xl, __local); \
+ if (__local && __local->prev_entry) \
+ changelog_local_cleanup (__xl, \
+ __local->prev_entry); \
+ } while (0)
+
+#define CHANGELOG_IOBUF_REF(iobuf) do { \
+ if (iobuf) \
+ iobuf_ref (iobuf); \
+ } while (0)
+
+#define CHANGELOG_IOBUF_UNREF(iobuf) do { \
+ if (iobuf) \
+ iobuf_unref (iobuf); \
+ } while (0)
+
+#define CHANGELOG_FILL_BUFFER(buffer, off, val, len) do { \
+ memcpy (buffer + off, val, len); \
+ off += len; \
+ } while (0)
+
+#define SLICE_VERSION_UPDATE(slice) do { \
+ int i = 0; \
+ for (; i < CHANGELOG_MAX_TYPE; i++) { \
+ slice->changelog_version[i]++; \
+ } \
+ } while (0)
+
+#define CHANGLOG_FILL_FOP_NUMBER(co, fop, converter, xlen) do { \
+ co->co_convert = converter; \
+ co->co_free = NULL; \
+ co->co_type = CHANGELOG_OPT_REC_FOP; \
+ co->co_fop = fop; \
+ xlen += sizeof (fop); \
+ } while (0)
+
+#define CHANGELOG_FILL_ENTRY(co, pargfid, bname, \
+ converter, freefn, xlen, label) \
+ do { \
+ co->co_convert = converter; \
+ co->co_free = freefn; \
+ co->co_type = CHANGELOG_OPT_REC_ENTRY; \
+ uuid_copy (co->co_entry.cef_uuid, pargfid); \
+ co->co_entry.cef_bname = gf_strdup(bname); \
+ if (!co->co_entry.cef_bname) \
+ goto label; \
+ xlen += (UUID_CANONICAL_FORM_LEN + strlen (bname)); \
+ } while (0)
+
+#define CHANGELOG_INIT(this, local, inode, gfid, xrec) \
+ local = changelog_local_init (this, inode, gfid, xrec, _gf_false)
+
+#define CHANGELOG_INIT_NOCHECK(this, local, inode, gfid, xrec) \
+ local = changelog_local_init (this, inode, gfid, xrec, _gf_true)
+
+#define CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, label) do { \
+ if (!priv->active) \
+ goto label; \
+ /* ignore rebalance process's activity. */ \
+ if (frame->root->pid == GF_CLIENT_PID_DEFRAG) \
+ goto label; \
+ } while (0)
+
+/* ignore internal fops */
+#define CHANGELOG_IF_INTERNAL_FOP_THEN_GOTO(dict, label) do { \
+ if (dict && dict_get (dict, GLUSTERFS_INTERNAL_FOP_KEY)) \
+ goto label; \
+ } while (0)
+
+#define CHANGELOG_COND_GOTO(priv, cond, label) do { \
+ if (!priv->active || cond) \
+ goto label; \
+ } while (0)
+
+#endif /* _CHANGELOG_HELPERS_H */
diff --git a/xlators/features/changelog/src/changelog-mem-types.h b/xlators/features/changelog/src/changelog-mem-types.h
new file mode 100644
index 000000000..d72464eab
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-mem-types.h
@@ -0,0 +1,29 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CHANGELOG_MEM_TYPES_H
+#define _CHANGELOG_MEM_TYPES_H
+
+#include "mem-types.h"
+
+enum gf_changelog_mem_types {
+ gf_changelog_mt_priv_t = gf_common_mt_end + 1,
+ gf_changelog_mt_str_t = gf_common_mt_end + 2,
+ gf_changelog_mt_batch_t = gf_common_mt_end + 3,
+ gf_changelog_mt_rt_t = gf_common_mt_end + 4,
+ gf_changelog_mt_inode_ctx_t = gf_common_mt_end + 5,
+ gf_changelog_mt_libgfchangelog_t = gf_common_mt_end + 6,
+ gf_changelog_mt_libgfchangelog_rl_t = gf_common_mt_end + 7,
+ gf_changelog_mt_libgfchangelog_dirent_t = gf_common_mt_end + 8,
+ gf_changelog_mt_changelog_buffer_t = gf_common_mt_end + 9,
+ gf_changelog_mt_end
+};
+
+#endif
diff --git a/xlators/features/changelog/src/changelog-misc.h b/xlators/features/changelog/src/changelog-misc.h
new file mode 100644
index 000000000..0712a3771
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-misc.h
@@ -0,0 +1,101 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CHANGELOG_MISC_H
+#define _CHANGELOG_MISC_H
+
+#include "glusterfs.h"
+#include "common-utils.h"
+
+#define CHANGELOG_MAX_TYPE 3
+#define CHANGELOG_FILE_NAME "CHANGELOG"
+
+#define CHANGELOG_VERSION_MAJOR 1
+#define CHANGELOG_VERSION_MINOR 0
+
+#define CHANGELOG_UNIX_SOCK DEFAULT_VAR_RUN_DIRECTORY"/changelog-%s.sock"
+
+/**
+ * header starts with the version and the format of the changelog.
+ * 'version' not much of a use now.
+ */
+#define CHANGELOG_HEADER \
+ "GlusterFS Changelog | version: v%d.%d | encoding : %d\n"
+
+#define CHANGELOG_MAKE_SOCKET_PATH(brick_path, sockpath, len) do { \
+ char md5_sum[MD5_DIGEST_LENGTH*2+1] = {0,}; \
+ md5_wrapper((unsigned char *) brick_path, \
+ strlen(brick_path), \
+ md5_sum); \
+ (void) snprintf (sockpath, len, \
+ CHANGELOG_UNIX_SOCK, md5_sum); \
+ } while (0)
+
+/**
+ * ... used by libgfchangelog.
+ */
+#define CHANGELOG_GET_ENCODING(fd, buffer, len, enc, enc_len) do { \
+ FILE *fp; \
+ int fd_dup, maj, min; \
+ \
+ enc = -1; \
+ fd_dup = dup (fd); \
+ \
+ if (fd_dup != -1) { \
+ fp = fdopen (fd_dup, "r"); \
+ if (fp) { \
+ if (fgets (buffer, len, fp)) { \
+ elen = strlen (buffer); \
+ sscanf (buffer, \
+ CHANGELOG_HEADER, \
+ &maj, &min, &enc); \
+ } \
+ fclose (fp); \
+ } else { \
+ close (fd_dup); \
+ } \
+ } \
+ } while (0)
+
+/**
+ * everything after 'CHANGELOG_TYPE_ENTRY' are internal types
+ * (ie. none of the fops trigger this type of event), hence
+ * CHANGELOG_MAX_TYPE = 3
+ */
+typedef enum {
+ CHANGELOG_TYPE_DATA = 0,
+ CHANGELOG_TYPE_METADATA,
+ CHANGELOG_TYPE_ENTRY,
+ CHANGELOG_TYPE_ROLLOVER,
+ CHANGELOG_TYPE_FSYNC,
+} changelog_log_type;
+
+/* operation modes - RT for now */
+typedef enum {
+ CHANGELOG_MODE_RT = 0,
+} changelog_mode_t;
+
+/* encoder types */
+
+typedef enum {
+ CHANGELOG_ENCODE_MIN = 0,
+ CHANGELOG_ENCODE_BINARY,
+ CHANGELOG_ENCODE_ASCII,
+ CHANGELOG_ENCODE_MAX,
+} changelog_encoder_t;
+
+#define CHANGELOG_VALID_ENCODING(enc) \
+ (enc > CHANGELOG_ENCODE_MIN && enc < CHANGELOG_ENCODE_MAX)
+
+#define CHANGELOG_TYPE_IS_ENTRY(type) (type == CHANGELOG_TYPE_ENTRY)
+#define CHANGELOG_TYPE_IS_ROLLOVER(type) (type == CHANGELOG_TYPE_ROLLOVER)
+#define CHANGELOG_TYPE_IS_FSYNC(type) (type == CHANGELOG_TYPE_FSYNC)
+
+#endif /* _CHANGELOG_MISC_H */
diff --git a/xlators/features/changelog/src/changelog-notifier.c b/xlators/features/changelog/src/changelog-notifier.c
new file mode 100644
index 000000000..1f8b31253
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-notifier.c
@@ -0,0 +1,314 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "changelog-notifier.h"
+
+#include <pthread.h>
+
+inline static void
+changelog_notify_clear_fd (changelog_notify_t *cn, int i)
+{
+ cn->client_fd[i] = -1;
+}
+
+inline static void
+changelog_notify_save_fd (changelog_notify_t *cn, int i, int fd)
+{
+ cn->client_fd[i] = fd;
+}
+
+static int
+changelog_notify_insert_fd (xlator_t *this, changelog_notify_t *cn, int fd)
+{
+ int i = 0;
+ int ret = 0;
+
+ for (; i < CHANGELOG_MAX_CLIENTS; i++) {
+ if (cn->client_fd[i] == -1)
+ break;
+ }
+
+ if (i == CHANGELOG_MAX_CLIENTS) {
+ /**
+ * this case should not be hit as listen() would limit
+ * the number of completely established connections.
+ */
+ gf_log (this->name, GF_LOG_WARNING,
+ "hit max client limit (%d)", CHANGELOG_MAX_CLIENTS);
+ ret = -1;
+ }
+ else
+ changelog_notify_save_fd (cn, i, fd);
+
+ return ret;
+}
+
+static void
+changelog_notify_fill_rset (changelog_notify_t *cn, fd_set *rset, int *maxfd)
+{
+ int i = 0;
+
+ FD_ZERO (rset);
+
+ FD_SET (cn->socket_fd, rset);
+ *maxfd = cn->socket_fd;
+
+ FD_SET (cn->rfd, rset);
+ *maxfd = max (*maxfd, cn->rfd);
+
+ for (; i < CHANGELOG_MAX_CLIENTS; i++) {
+ if (cn->client_fd[i] != -1) {
+ FD_SET (cn->client_fd[i], rset);
+ *maxfd = max (*maxfd, cn->client_fd[i]);
+ }
+ }
+
+ *maxfd = *maxfd + 1;
+}
+
+static int
+changelog_notify_client (changelog_notify_t *cn, char *path, ssize_t len)
+{
+ int i = 0;
+ int ret = 0;
+
+ for (; i < CHANGELOG_MAX_CLIENTS; i++) {
+ if (cn->client_fd[i] == -1)
+ continue;
+
+ if (changelog_write (cn->client_fd[i],
+ path, len)) {
+ ret = -1;
+
+ close (cn->client_fd[i]);
+ changelog_notify_clear_fd (cn, i);
+ }
+ }
+
+ return ret;
+}
+
+static void
+changelog_notifier_init (changelog_notify_t *cn)
+{
+ int i = 0;
+
+ cn->socket_fd = -1;
+
+ for (; i < CHANGELOG_MAX_CLIENTS; i++) {
+ changelog_notify_clear_fd (cn, i);
+ }
+}
+
+static void
+changelog_close_client_conn (changelog_notify_t *cn)
+{
+ int i = 0;
+
+ for (; i < CHANGELOG_MAX_CLIENTS; i++) {
+ if (cn->client_fd[i] == -1)
+ continue;
+
+ close (cn->client_fd[i]);
+ changelog_notify_clear_fd (cn, i);
+ }
+}
+
+static void
+changelog_notifier_cleanup (void *arg)
+{
+ changelog_notify_t *cn = NULL;
+
+ cn = (changelog_notify_t *) arg;
+
+ changelog_close_client_conn (cn);
+
+ if (cn->socket_fd != -1)
+ close (cn->socket_fd);
+
+ if (cn->rfd)
+ close (cn->rfd);
+
+ if (unlink (cn->sockpath))
+ gf_log ("", GF_LOG_WARNING,
+ "could not unlink changelog socket file"
+ " %s (reason: %s", cn->sockpath, strerror (errno));
+}
+
+void *
+changelog_notifier (void *data)
+{
+ int i = 0;
+ int fd = 0;
+ int max_fd = 0;
+ int len = 0;
+ ssize_t readlen = 0;
+ xlator_t *this = NULL;
+ changelog_priv_t *priv = NULL;
+ changelog_notify_t *cn = NULL;
+ struct sockaddr_un local = {0,};
+ char path[PATH_MAX] = {0,};
+ char abspath[PATH_MAX] = {0,};
+
+ char buffer;
+ fd_set rset;
+
+ priv = (changelog_priv_t *) data;
+
+ cn = &priv->cn;
+ this = cn->this;
+
+ pthread_cleanup_push (changelog_notifier_cleanup, cn);
+
+ changelog_notifier_init (cn);
+
+ cn->socket_fd = socket (AF_UNIX, SOCK_STREAM, 0);
+ if (cn->socket_fd < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "changelog socket error (reason: %s)",
+ strerror (errno));
+ goto out;
+ }
+
+ CHANGELOG_MAKE_SOCKET_PATH (priv->changelog_brick,
+ cn->sockpath, PATH_MAX);
+ if (unlink (cn->sockpath) < 0) {
+ if (errno != ENOENT) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not unlink changelog socket file (%s)"
+ " (reason: %s)",
+ CHANGELOG_UNIX_SOCK, strerror (errno));
+ goto cleanup;
+ }
+ }
+
+ local.sun_family = AF_UNIX;
+ strcpy (local.sun_path, cn->sockpath);
+
+ len = strlen (local.sun_path) + sizeof (local.sun_family);
+
+ /* bind to the unix domain socket */
+ if (bind (cn->socket_fd, (struct sockaddr *) &local, len) < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not bind to changelog socket (reason: %s)",
+ strerror (errno));
+ goto cleanup;
+ }
+
+ /* listen for incoming connections */
+ if (listen (cn->socket_fd, CHANGELOG_MAX_CLIENTS) < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "listen() error on changelog socket (reason: %s)",
+ strerror (errno));
+ goto cleanup;
+ }
+
+ /**
+ * simple select() on all to-be-read file descriptors. This method
+ * though old school works pretty well when you have a handfull of
+ * fd's to be watched (clients).
+ *
+ * Future TODO: move this to epoll based notification facility if
+ * number of clients increase.
+ */
+ for (;;) {
+ changelog_notify_fill_rset (cn, &rset, &max_fd);
+
+ if (select (max_fd, &rset, NULL, NULL, NULL) < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "select() returned -1 (reason: %s)",
+ strerror (errno));
+ sleep (2);
+ continue;
+ }
+
+ if (FD_ISSET (cn->socket_fd, &rset)) {
+ fd = accept (cn->socket_fd, NULL, NULL);
+ if (fd < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "accept error on changelog socket"
+ " (reason: %s)", strerror (errno));
+ } else if (changelog_notify_insert_fd (this, cn, fd)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "hit max client limit");
+ }
+ }
+
+ if (FD_ISSET (cn->rfd, &rset)) {
+ /**
+ * read changelog filename and notify all connected
+ * clients.
+ */
+ readlen = 0;
+ while (readlen < PATH_MAX) {
+ len = read (cn->rfd, &path[readlen++], 1);
+ if (len == -1) {
+ break;
+ }
+
+ if (len == 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "rollover thread sent EOF"
+ " on pipe - possibly a crash.");
+ /* be blunt and close all connections */
+ pthread_exit(NULL);
+ }
+
+ if (path[readlen - 1] == '\0')
+ break;
+ }
+
+ /* should we close all client connections here too? */
+ if (len < 0 || readlen == PATH_MAX) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not get pathname from rollover"
+ " thread or pathname too long");
+ goto process_rest;
+ }
+
+ (void) snprintf (abspath, PATH_MAX,
+ "%s/%s", priv->changelog_dir, path);
+ if (changelog_notify_client (cn, abspath,
+ strlen (abspath) + 1))
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not notify some clients with new"
+ " changelogs");
+ }
+
+ process_rest:
+ for (i = 0; i < CHANGELOG_MAX_CLIENTS; i++) {
+ if ( (fd = cn->client_fd[i]) == -1 )
+ continue;
+
+ if (FD_ISSET (fd, &rset)) {
+ /**
+ * the only data we accept from the client is a
+ * disconnect. Anything else is treated as bogus
+ * and is silently discarded (also warned!!!).
+ */
+ if ( (readlen = read (fd, &buffer, 1)) <= 0 ) {
+ close (fd);
+ changelog_notify_clear_fd (cn, i);
+ } else {
+ /* silently discard data and log */
+ gf_log (this->name, GF_LOG_WARNING,
+ "misbehaving changelog client");
+ }
+ }
+ }
+
+ }
+
+ cleanup:;
+ pthread_cleanup_pop (1);
+
+ out:
+ return NULL;
+}
diff --git a/xlators/features/changelog/src/changelog-notifier.h b/xlators/features/changelog/src/changelog-notifier.h
new file mode 100644
index 000000000..55e728356
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-notifier.h
@@ -0,0 +1,19 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CHANGELOG_NOTIFIER_H
+#define _CHANGELOG_NOTIFIER_H
+
+#include "changelog-helpers.h"
+
+void *
+changelog_notifier (void *data);
+
+#endif
diff --git a/xlators/features/changelog/src/changelog-rt.c b/xlators/features/changelog/src/changelog-rt.c
new file mode 100644
index 000000000..c147f68ca
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-rt.c
@@ -0,0 +1,72 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include "defaults.h"
+#include "logging.h"
+
+#include "changelog-rt.h"
+#include "changelog-mem-types.h"
+
+int
+changelog_rt_init (xlator_t *this, changelog_dispatcher_t *cd)
+{
+ changelog_rt_t *crt = NULL;
+
+ crt = GF_CALLOC (1, sizeof (*crt),
+ gf_changelog_mt_rt_t);
+ if (!crt)
+ return -1;
+
+ LOCK_INIT (&crt->lock);
+
+ cd->cd_data = crt;
+ cd->dispatchfn = &changelog_rt_enqueue;
+
+ return 0;
+}
+
+int
+changelog_rt_fini (xlator_t *this, changelog_dispatcher_t *cd)
+{
+ changelog_rt_t *crt = NULL;
+
+ crt = cd->cd_data;
+
+ LOCK_DESTROY (&crt->lock);
+ GF_FREE (crt);
+
+ return 0;
+}
+
+int
+changelog_rt_enqueue (xlator_t *this, changelog_priv_t *priv, void *cbatch,
+ changelog_log_data_t *cld_0, changelog_log_data_t *cld_1)
+{
+ int ret = 0;
+ changelog_rt_t *crt = NULL;
+
+ crt = (changelog_rt_t *) cbatch;
+
+ LOCK (&crt->lock);
+ {
+ ret = changelog_handle_change (this, priv, cld_0);
+ if (!ret && cld_1)
+ ret = changelog_handle_change (this, priv, cld_1);
+ }
+ UNLOCK (&crt->lock);
+
+ return ret;
+}
diff --git a/xlators/features/changelog/src/changelog-rt.h b/xlators/features/changelog/src/changelog-rt.h
new file mode 100644
index 000000000..1fc2bbc5b
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-rt.h
@@ -0,0 +1,33 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CHANGELOG_RT_H
+#define _CHANGELOG_RT_H
+
+#include "locking.h"
+#include "timer.h"
+#include "pthread.h"
+
+#include "changelog-helpers.h"
+
+/* unused as of now - may be you would need it later */
+typedef struct changelog_rt {
+ gf_lock_t lock;
+} changelog_rt_t;
+
+int
+changelog_rt_init (xlator_t *this, changelog_dispatcher_t *cd);
+int
+changelog_rt_fini (xlator_t *this, changelog_dispatcher_t *cd);
+int
+changelog_rt_enqueue (xlator_t *this, changelog_priv_t *priv, void *cbatch,
+ changelog_log_data_t *cld_0, changelog_log_data_t *cld_1);
+
+#endif /* _CHANGELOG_RT_H */
diff --git a/xlators/features/changelog/src/changelog.c b/xlators/features/changelog/src/changelog.c
new file mode 100644
index 000000000..cea0e8c70
--- /dev/null
+++ b/xlators/features/changelog/src/changelog.c
@@ -0,0 +1,1477 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include "defaults.h"
+#include "logging.h"
+#include "iobuf.h"
+
+#include "changelog-rt.h"
+
+#include "changelog-encoders.h"
+#include "changelog-mem-types.h"
+
+#include <pthread.h>
+
+#include "changelog-notifier.h"
+
+static struct changelog_bootstrap
+cb_bootstrap[] = {
+ {
+ .mode = CHANGELOG_MODE_RT,
+ .ctor = changelog_rt_init,
+ .dtor = changelog_rt_fini,
+ },
+};
+
+/* Entry operations - TYPE III */
+
+/**
+ * entry operations do not undergo inode version checking.
+ */
+
+/* {{{ */
+
+/* rmdir */
+
+int32_t
+changelog_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY);
+
+ unwind:
+ CHANGELOG_STACK_UNWIND (rmdir, frame, op_ret, op_errno,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+changelog_rmdir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int xflags, dict_t *xdata)
+{
+ size_t xtra_len = 0;
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ CHANGELOG_INIT_NOCHECK (this, frame->local,
+ NULL, loc->inode->gfid, 2);
+
+ co = changelog_get_usable_buffer (frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len);
+
+ co++;
+ CHANGELOG_FILL_ENTRY (co, loc->pargfid, loc->name,
+ entry_fn, entry_free_fn, xtra_len, wind);
+
+ changelog_set_usable_record_and_length (frame->local, xtra_len, 2);
+
+ wind:
+ STACK_WIND (frame, changelog_rmdir_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->rmdir,
+ loc, xflags, xdata);
+ return 0;
+}
+
+/* unlink */
+
+int32_t
+changelog_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY);
+
+ unwind:
+ CHANGELOG_STACK_UNWIND (unlink, frame, op_ret, op_errno,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+changelog_unlink (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int xflags, dict_t *xdata)
+{
+ size_t xtra_len = 0;
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+ CHANGELOG_IF_INTERNAL_FOP_THEN_GOTO (xdata, wind);
+
+ CHANGELOG_INIT_NOCHECK (this, frame->local, NULL, loc->inode->gfid, 2);
+
+ co = changelog_get_usable_buffer (frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len);
+
+ co++;
+ CHANGELOG_FILL_ENTRY (co, loc->pargfid, loc->name,
+ entry_fn, entry_free_fn, xtra_len, wind);
+
+ changelog_set_usable_record_and_length (frame->local, xtra_len, 2);
+
+ wind:
+ STACK_WIND (frame, changelog_unlink_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->unlink,
+ loc, xflags, xdata);
+ return 0;
+}
+
+/* rename */
+
+int32_t
+changelog_rename_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *buf, struct iatt *preoldparent,
+ struct iatt *postoldparent, struct iatt *prenewparent,
+ struct iatt *postnewparent, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY);
+
+ unwind:
+ CHANGELOG_STACK_UNWIND (rename, frame, op_ret, op_errno,
+ buf, preoldparent, postoldparent,
+ prenewparent, postnewparent, xdata);
+ return 0;
+}
+
+
+int32_t
+changelog_rename (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+{
+ size_t xtra_len = 0;
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ /* 3 == fop + oldloc + newloc */
+ CHANGELOG_INIT_NOCHECK (this, frame->local,
+ NULL, oldloc->inode->gfid, 3);
+
+ co = changelog_get_usable_buffer (frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len);
+
+ co++;
+ CHANGELOG_FILL_ENTRY (co, oldloc->pargfid, oldloc->name,
+ entry_fn, entry_free_fn, xtra_len, wind);
+
+ co++;
+ CHANGELOG_FILL_ENTRY (co, newloc->pargfid, newloc->name,
+ entry_fn, entry_free_fn, xtra_len, wind);
+
+ changelog_set_usable_record_and_length (frame->local, xtra_len, 3);
+
+ wind:
+ STACK_WIND (frame, changelog_rename_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->rename,
+ oldloc, newloc, xdata);
+ return 0;
+}
+
+/* link */
+
+int32_t
+changelog_link_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY);
+
+ unwind:
+ CHANGELOG_STACK_UNWIND (link, frame, op_ret, op_errno,
+ inode, buf, preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+changelog_link (call_frame_t *frame,
+ xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
+{
+ size_t xtra_len = 0;
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+
+ priv = this->private;
+
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+ CHANGELOG_IF_INTERNAL_FOP_THEN_GOTO (xdata, wind);
+
+ CHANGELOG_INIT_NOCHECK (this, frame->local, NULL, oldloc->gfid, 2);
+
+ co = changelog_get_usable_buffer (frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len);
+
+ co++;
+ CHANGELOG_FILL_ENTRY (co, newloc->pargfid, newloc->name,
+ entry_fn, entry_free_fn, xtra_len, wind);
+
+ changelog_set_usable_record_and_length (frame->local, xtra_len, 2);
+
+ wind:
+ STACK_WIND (frame, changelog_link_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->link,
+ oldloc, newloc, xdata);
+ return 0;
+}
+
+/* mkdir */
+
+int32_t
+changelog_mkdir_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY);
+
+ unwind:
+ CHANGELOG_STACK_UNWIND (mkdir, frame, op_ret, op_errno,
+ inode, buf, preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+changelog_mkdir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata)
+{
+ int ret = -1;
+ uuid_t gfid = {0,};
+ void *uuid_req = NULL;
+ size_t xtra_len = 0;
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ ret = dict_get_ptr (xdata, "gfid-req", &uuid_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "failed to get gfid from dict");
+ goto wind;
+ }
+ uuid_copy (gfid, uuid_req);
+
+ CHANGELOG_INIT_NOCHECK (this, frame->local, NULL, gfid, 2);
+
+ co = changelog_get_usable_buffer (frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len);
+
+ co++;
+ CHANGELOG_FILL_ENTRY (co, loc->pargfid, loc->name,
+ entry_fn, entry_free_fn, xtra_len, wind);
+
+ changelog_set_usable_record_and_length (frame->local, xtra_len, 2);
+
+ wind:
+ STACK_WIND (frame, changelog_mkdir_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->mkdir,
+ loc, mode, umask, xdata);
+ return 0;
+}
+
+/* symlink */
+
+int32_t
+changelog_symlink_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY);
+
+ unwind:
+ CHANGELOG_STACK_UNWIND (symlink, frame, op_ret, op_errno,
+ inode, buf, preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+changelog_symlink (call_frame_t *frame, xlator_t *this,
+ const char *linkname, loc_t *loc,
+ mode_t umask, dict_t *xdata)
+{
+ int ret = -1;
+ size_t xtra_len = 0;
+ uuid_t gfid = {0,};
+ void *uuid_req = NULL;
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ ret = dict_get_ptr (xdata, "gfid-req", &uuid_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "failed to get gfid from dict");
+ goto wind;
+ }
+ uuid_copy (gfid, uuid_req);
+
+ CHANGELOG_INIT_NOCHECK (this, frame->local, NULL, gfid, 2);
+
+ co = changelog_get_usable_buffer (frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len);
+
+ co++;
+ CHANGELOG_FILL_ENTRY (co, loc->pargfid, loc->name,
+ entry_fn, entry_free_fn, xtra_len, wind);
+
+ changelog_set_usable_record_and_length (frame->local, xtra_len, 2);
+
+ wind:
+ STACK_WIND (frame, changelog_symlink_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->symlink,
+ linkname, loc, umask, xdata);
+ return 0;
+}
+
+/* mknod */
+
+int32_t
+changelog_mknod_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY);
+
+ unwind:
+ CHANGELOG_STACK_UNWIND (mknod, frame, op_ret, op_errno,
+ inode, buf, preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+changelog_mknod (call_frame_t *frame,
+ xlator_t *this, loc_t *loc,
+ mode_t mode, dev_t dev, mode_t umask, dict_t *xdata)
+{
+ int ret = -1;
+ uuid_t gfid = {0,};
+ void *uuid_req = NULL;
+ size_t xtra_len = 0;
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ ret = dict_get_ptr (xdata, "gfid-req", &uuid_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "failed to get gfid from dict");
+ goto wind;
+ }
+ uuid_copy (gfid, uuid_req);
+
+ CHANGELOG_INIT_NOCHECK (this, frame->local, NULL, gfid, 2);
+
+ co = changelog_get_usable_buffer (frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len);
+
+ co++;
+ CHANGELOG_FILL_ENTRY (co, loc->pargfid, loc->name,
+ entry_fn, entry_free_fn, xtra_len, wind);
+
+ changelog_set_usable_record_and_length (frame->local, xtra_len, 2);
+
+ wind:
+ STACK_WIND (frame, changelog_mknod_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->mknod,
+ loc, mode, dev, umask, xdata);
+ return 0;
+}
+
+/* creat */
+
+int32_t
+changelog_create_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ fd_t *fd, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_ENTRY);
+
+ unwind:
+ CHANGELOG_STACK_UNWIND (create, frame,
+ op_ret, op_errno, fd, inode,
+ buf, preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+changelog_create (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t flags, mode_t mode,
+ mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ int ret = -1;
+ uuid_t gfid = {0,};
+ void *uuid_req = NULL;
+ changelog_opt_t *co = NULL;
+ changelog_priv_t *priv = NULL;
+ size_t xtra_len = 0;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ ret = dict_get_ptr (xdata, "gfid-req", &uuid_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "failed to get gfid from dict");
+ goto wind;
+ }
+ uuid_copy (gfid, uuid_req);
+
+ /* init with two extra records */
+ CHANGELOG_INIT_NOCHECK (this, frame->local, NULL, gfid, 2);
+ if (!frame->local)
+ goto wind;
+
+ co = changelog_get_usable_buffer (frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len);
+
+ co++;
+ CHANGELOG_FILL_ENTRY (co, loc->pargfid, loc->name,
+ entry_fn, entry_free_fn, xtra_len, wind);
+
+ changelog_set_usable_record_and_length (frame->local, xtra_len, 2);
+
+ wind:
+ STACK_WIND (frame, changelog_create_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->create,
+ loc, flags, mode, umask, fd, xdata);
+ return 0;
+}
+
+/* }}} */
+
+
+/* Metadata modification fops - TYPE II */
+
+/* {{{ */
+
+/* {f}setattr */
+
+int32_t
+changelog_fsetattr_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *preop_stbuf,
+ struct iatt *postop_stbuf, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_METADATA);
+
+ unwind:
+ CHANGELOG_STACK_UNWIND (fsetattr, frame, op_ret, op_errno,
+ preop_stbuf, postop_stbuf, xdata);
+
+ return 0;
+
+
+}
+
+int32_t
+changelog_fsetattr (call_frame_t *frame,
+ xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ CHANGELOG_INIT (this, frame->local,
+ fd->inode, fd->inode->gfid, 0);
+
+ wind:
+ STACK_WIND (frame, changelog_fsetattr_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->fsetattr,
+ fd, stbuf, valid, xdata);
+ return 0;
+
+
+}
+
+int32_t
+changelog_setattr_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *preop_stbuf,
+ struct iatt *postop_stbuf, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_METADATA);
+
+ unwind:
+ CHANGELOG_STACK_UNWIND (setattr, frame, op_ret, op_errno,
+ preop_stbuf, postop_stbuf, xdata);
+
+ return 0;
+}
+
+int32_t
+changelog_setattr (call_frame_t *frame,
+ xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ CHANGELOG_INIT (this, frame->local,
+ loc->inode, loc->inode->gfid, 0);
+
+ wind:
+ STACK_WIND (frame, changelog_setattr_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->setattr,
+ loc, stbuf, valid, xdata);
+ return 0;
+}
+
+/* {f}removexattr */
+
+int32_t
+changelog_fremovexattr_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_METADATA);
+
+ unwind:
+ CHANGELOG_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+int32_t
+changelog_fremovexattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ CHANGELOG_INIT (this, frame->local,
+ fd->inode, fd->inode->gfid, 0);
+
+ wind:
+ STACK_WIND (frame, changelog_fremovexattr_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->fremovexattr,
+ fd, name, xdata);
+ return 0;
+}
+
+int32_t
+changelog_removexattr_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_METADATA);
+
+ unwind:
+ CHANGELOG_STACK_UNWIND (removexattr, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+int32_t
+changelog_removexattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ CHANGELOG_INIT (this, frame->local,
+ loc->inode, loc->inode->gfid, 0);
+
+ wind:
+ STACK_WIND (frame, changelog_removexattr_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->removexattr,
+ loc, name, xdata);
+ return 0;
+}
+
+/* {f}setxattr */
+
+int32_t
+changelog_setxattr_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_METADATA);
+
+ unwind:
+ CHANGELOG_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+int32_t
+changelog_setxattr (call_frame_t *frame,
+ xlator_t *this, loc_t *loc,
+ dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ CHANGELOG_INIT (this, frame->local,
+ loc->inode, loc->inode->gfid, 0);
+
+ wind:
+ STACK_WIND (frame, changelog_setxattr_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->setxattr,
+ loc, dict, flags, xdata);
+ return 0;
+}
+
+int32_t
+changelog_fsetxattr_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_METADATA);
+
+ unwind:
+ CHANGELOG_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+int32_t
+changelog_fsetxattr (call_frame_t *frame,
+ xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ CHANGELOG_INIT (this, frame->local,
+ fd->inode, fd->inode->gfid, 0);
+
+ wind:
+ STACK_WIND (frame, changelog_fsetxattr_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->fsetxattr,
+ fd, dict, flags, xdata);
+ return 0;
+}
+
+/* }}} */
+
+
+/* Data modification fops - TYPE I */
+
+/* {{{ */
+
+/* {f}truncate() */
+
+int32_t
+changelog_truncate_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_DATA);
+
+ unwind:
+ CHANGELOG_STACK_UNWIND (truncate, frame,
+ op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
+}
+
+int32_t
+changelog_truncate (call_frame_t *frame,
+ xlator_t *this, loc_t *loc, off_t offset, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ CHANGELOG_INIT (this, frame->local,
+ loc->inode, loc->inode->gfid, 0);
+
+ wind:
+ STACK_WIND (frame, changelog_truncate_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->truncate,
+ loc, offset, xdata);
+ return 0;
+}
+
+int32_t
+changelog_ftruncate_cbk (call_frame_t *frame,
+ void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_DATA);
+
+ unwind:
+ CHANGELOG_STACK_UNWIND (ftruncate, frame,
+ op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
+}
+
+int32_t
+changelog_ftruncate (call_frame_t *frame,
+ xlator_t *this, fd_t *fd, off_t offset, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ CHANGELOG_INIT (this, frame->local,
+ fd->inode, fd->inode->gfid, 0);
+
+ wind:
+ STACK_WIND (frame, changelog_ftruncate_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->ftruncate,
+ fd, offset, xdata);
+ return 0;
+}
+
+/* writev() */
+
+int32_t
+changelog_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf,
+ dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO (priv, ((op_ret <= 0) || !local), unwind);
+
+ changelog_update (this, priv, local, CHANGELOG_TYPE_DATA);
+
+ unwind:
+ CHANGELOG_STACK_UNWIND (writev, frame,
+ op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
+}
+
+int32_t
+changelog_writev (call_frame_t *frame,
+ xlator_t *this, fd_t *fd, struct iovec *vector,
+ int32_t count, off_t offset, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind);
+
+ CHANGELOG_INIT (this, frame->local,
+ fd->inode, fd->inode->gfid, 0);
+
+ wind:
+ STACK_WIND (frame, changelog_writev_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->writev, fd, vector,
+ count, offset, flags, iobref, xdata);
+ return 0;
+}
+
+/* }}} */
+
+/**
+ * The
+ * - @init ()
+ * - @fini ()
+ * - @reconfigure ()
+ * ... and helper routines
+ */
+
+/**
+ * needed if there are more operation modes in the future.
+ */
+static void
+changelog_assign_opmode (changelog_priv_t *priv, char *mode)
+{
+ if ( strncmp (mode, "realtime", 8) == 0 ) {
+ priv->op_mode = CHANGELOG_MODE_RT;
+ }
+}
+
+static void
+changelog_assign_encoding (changelog_priv_t *priv, char *enc)
+{
+ if ( strncmp (enc, "binary", 6) == 0 ) {
+ priv->encode_mode = CHANGELOG_ENCODE_BINARY;
+ } else if ( strncmp (enc, "ascii", 5) == 0 ) {
+ priv->encode_mode = CHANGELOG_ENCODE_ASCII;
+ }
+}
+
+/* cleanup any helper threads that are running */
+static void
+changelog_cleanup_helper_threads (xlator_t *this, changelog_priv_t *priv)
+{
+ if (priv->cr.rollover_th) {
+ changelog_thread_cleanup (this, priv->cr.rollover_th);
+ priv->cr.rollover_th = 0;
+ }
+
+ if (priv->cf.fsync_th) {
+ changelog_thread_cleanup (this, priv->cf.fsync_th);
+ priv->cf.fsync_th = 0;
+ }
+}
+
+/* spawn helper thread; cleaning up in case of errors */
+static int
+changelog_spawn_helper_threads (xlator_t *this, changelog_priv_t *priv)
+{
+ int ret = 0;
+
+ priv->cr.this = this;
+ ret = gf_thread_create (&priv->cr.rollover_th,
+ NULL, changelog_rollover, priv);
+ if (ret)
+ goto out;
+
+ if (priv->fsync_interval) {
+ priv->cf.this = this;
+ ret = gf_thread_create (&priv->cf.fsync_th,
+ NULL, changelog_fsync_thread, priv);
+ }
+
+ if (ret)
+ changelog_cleanup_helper_threads (this, priv);
+
+ out:
+ return ret;
+}
+
+/* cleanup the notifier thread */
+static int
+changelog_cleanup_notifier (xlator_t *this, changelog_priv_t *priv)
+{
+ int ret = 0;
+
+ if (priv->cn.notify_th) {
+ changelog_thread_cleanup (this, priv->cn.notify_th);
+ priv->cn.notify_th = 0;
+
+ ret = close (priv->wfd);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "error closing writer end of notifier pipe"
+ " (reason: %s)", strerror (errno));
+ }
+
+ return ret;
+}
+
+/* spawn the notifier thread - nop if already running */
+static int
+changelog_spawn_notifier (xlator_t *this, changelog_priv_t *priv)
+{
+ int ret = 0;
+ int flags = 0;
+ int pipe_fd[2] = {0, 0};
+
+ if (priv->cn.notify_th)
+ goto out; /* notifier thread already running */
+
+ ret = pipe (pipe_fd);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Cannot create pipe (reason: %s)", strerror (errno));
+ goto out;
+ }
+
+ /* writer is non-blocking */
+ flags = fcntl (pipe_fd[1], F_GETFL);
+ flags |= O_NONBLOCK;
+
+ ret = fcntl (pipe_fd[1], F_SETFL, flags);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to set O_NONBLOCK flag");
+ goto out;
+ }
+
+ priv->wfd = pipe_fd[1];
+
+ priv->cn.this = this;
+ priv->cn.rfd = pipe_fd[0];
+
+ ret = gf_thread_create (&priv->cn.notify_th,
+ NULL, changelog_notifier, priv);
+
+ out:
+ return ret;
+}
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_changelog_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_WARNING, "Memory accounting"
+ " init failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+static int
+changelog_init (xlator_t *this, changelog_priv_t *priv)
+{
+ int i = 0;
+ int ret = -1;
+ struct timeval tv = {0,};
+ changelog_log_data_t cld = {0,};
+
+ ret = gettimeofday (&tv, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "gettimeofday() failure");
+ goto out;
+ }
+
+ priv->slice.tv_start = tv;
+
+ priv->maps[CHANGELOG_TYPE_DATA] = "D ";
+ priv->maps[CHANGELOG_TYPE_METADATA] = "M ";
+ priv->maps[CHANGELOG_TYPE_ENTRY] = "E ";
+
+ for (; i < CHANGELOG_MAX_TYPE; i++) {
+ /* start with version 1 */
+ priv->slice.changelog_version[i] = 1;
+ }
+
+ if (!priv->active)
+ return ret;
+
+ /* spawn the notifier thread */
+ ret = changelog_spawn_notifier (this, priv);
+ if (ret)
+ goto out;
+
+ /**
+ * start with a fresh changelog file every time. this is done
+ * in case there was an encoding change. so... things are kept
+ * simple here.
+ */
+ ret = changelog_fill_rollover_data (&cld, _gf_false);
+ if (ret)
+ goto out;
+
+ LOCK (&priv->lock);
+ {
+ ret = changelog_inject_single_event (this, priv, &cld);
+ }
+ UNLOCK (&priv->lock);
+
+ /* ... and finally spawn the helpers threads */
+ ret = changelog_spawn_helper_threads (this, priv);
+
+ out:
+ return ret;
+}
+
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ int ret = 0;
+ char *tmp = NULL;
+ changelog_priv_t *priv = NULL;
+ gf_boolean_t active_earlier = _gf_true;
+ gf_boolean_t active_now = _gf_true;
+ changelog_time_slice_t *slice = NULL;
+ changelog_log_data_t cld = {0,};
+
+ priv = this->private;
+ if (!priv)
+ goto out;
+
+ ret = -1;
+ active_earlier = priv->active;
+
+ /* first stop the rollover and the fsync thread */
+ changelog_cleanup_helper_threads (this, priv);
+
+ GF_OPTION_RECONF ("changelog-dir", tmp, options, str, out);
+ if (!tmp) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "\"changelog-dir\" option is not set");
+ goto out;
+ }
+
+ GF_FREE (priv->changelog_dir);
+ priv->changelog_dir = gf_strdup (tmp);
+ if (!priv->changelog_dir)
+ goto out;
+
+ ret = mkdir_p (priv->changelog_dir, 0600, _gf_true);
+ if (ret)
+ goto out;
+
+ GF_OPTION_RECONF ("changelog", active_now, options, bool, out);
+
+ /**
+ * changelog_handle_change() handles changes that could possibly
+ * have been submit changes before changelog deactivation.
+ */
+ if (!active_now)
+ priv->active = _gf_false;
+
+ GF_OPTION_RECONF ("op-mode", tmp, options, str, out);
+ changelog_assign_opmode (priv, tmp);
+
+ tmp = NULL;
+
+ GF_OPTION_RECONF ("encoding", tmp, options, str, out);
+ changelog_assign_encoding (priv, tmp);
+
+ GF_OPTION_RECONF ("rollover-time",
+ priv->rollover_time, options, int32, out);
+ GF_OPTION_RECONF ("fsync-interval",
+ priv->fsync_interval, options, int32, out);
+
+ if (active_now || active_earlier) {
+ ret = changelog_fill_rollover_data (&cld, !active_now);
+ if (ret)
+ goto out;
+
+ slice = &priv->slice;
+
+ LOCK (&priv->lock);
+ {
+ ret = changelog_inject_single_event (this, priv, &cld);
+ if (!ret && active_now)
+ SLICE_VERSION_UPDATE (slice);
+ }
+ UNLOCK (&priv->lock);
+
+ if (ret)
+ goto out;
+
+ if (active_now) {
+ ret = changelog_spawn_notifier (this, priv);
+ if (!ret)
+ ret = changelog_spawn_helper_threads (this,
+ priv);
+ } else
+ ret = changelog_cleanup_notifier (this, priv);
+ }
+
+ out:
+ if (ret) {
+ ret = changelog_cleanup_notifier (this, priv);
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "changelog reconfigured");
+ if (active_now)
+ priv->active = _gf_true;
+ }
+
+ return ret;
+}
+
+int32_t
+init (xlator_t *this)
+{
+ int ret = -1;
+ char *tmp = NULL;
+ changelog_priv_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO ("changelog", this, out);
+
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "translator needs a single subvolume");
+ goto out;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "dangling volume. please check volfile");
+ goto out;
+ }
+
+ priv = GF_CALLOC (1, sizeof (*priv), gf_changelog_mt_priv_t);
+ if (!priv)
+ goto out;
+
+ this->local_pool = mem_pool_new (changelog_local_t, 64);
+ if (!this->local_pool) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create local memory pool");
+ goto out;
+ }
+
+ LOCK_INIT (&priv->lock);
+
+ GF_OPTION_INIT ("changelog-brick", tmp, str, out);
+ if (!tmp) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "\"changelog-brick\" option is not set");
+ goto out;
+ }
+
+ priv->changelog_brick = gf_strdup (tmp);
+ if (!priv->changelog_brick)
+ goto out;
+ tmp = NULL;
+
+ GF_OPTION_INIT ("changelog-dir", tmp, str, out);
+ if (!tmp) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "\"changelog-dir\" option is not set");
+ goto out;
+ }
+
+ priv->changelog_dir = gf_strdup (tmp);
+ if (!priv->changelog_dir)
+ goto out;
+ tmp = NULL;
+
+ /**
+ * create the directory even if change-logging would be inactive
+ * so that consumers can _look_ into it (finding nothing...)
+ */
+ ret = mkdir_p (priv->changelog_dir, 0600, _gf_true);
+ if (ret)
+ goto out;
+
+ GF_OPTION_INIT ("changelog", priv->active, bool, out);
+
+ GF_OPTION_INIT ("op-mode", tmp, str, out);
+ changelog_assign_opmode (priv, tmp);
+
+ tmp = NULL;
+
+ GF_OPTION_INIT ("encoding", tmp, str, out);
+ changelog_assign_encoding (priv, tmp);
+
+ GF_OPTION_INIT ("rollover-time", priv->rollover_time, int32, out);
+
+ GF_OPTION_INIT ("fsync-interval", priv->fsync_interval, int32, out);
+
+ changelog_encode_change(priv);
+
+ GF_ASSERT (cb_bootstrap[priv->op_mode].mode == priv->op_mode);
+ priv->cb = &cb_bootstrap[priv->op_mode];
+
+ /* ... now bootstrap the logger */
+ ret = priv->cb->ctor (this, &priv->cd);
+ if (ret)
+ goto out;
+
+ priv->changelog_fd = -1;
+ ret = changelog_init (this, priv);
+ if (ret)
+ goto out;
+
+ gf_log (this->name, GF_LOG_DEBUG, "changelog translator loaded");
+
+ out:
+ if (ret) {
+ if (this->local_pool)
+ mem_pool_destroy (this->local_pool);
+ if (priv->cb) {
+ ret = priv->cb->dtor (this, &priv->cd);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "error in cleanup during init()");
+ }
+ GF_FREE (priv->changelog_brick);
+ GF_FREE (priv->changelog_dir);
+ GF_FREE (priv);
+ this->private = NULL;
+ } else
+ this->private = priv;
+
+ return ret;
+}
+
+void
+fini (xlator_t *this)
+{
+ int ret = -1;
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv) {
+ ret = priv->cb->dtor (this, &priv->cd);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "error in fini");
+ mem_pool_destroy (this->local_pool);
+ GF_FREE (priv->changelog_brick);
+ GF_FREE (priv->changelog_dir);
+ GF_FREE (priv);
+ }
+
+ this->private = NULL;
+
+ return;
+}
+
+struct xlator_fops fops = {
+ .mknod = changelog_mknod,
+ .mkdir = changelog_mkdir,
+ .create = changelog_create,
+ .symlink = changelog_symlink,
+ .writev = changelog_writev,
+ .truncate = changelog_truncate,
+ .ftruncate = changelog_ftruncate,
+ .link = changelog_link,
+ .rename = changelog_rename,
+ .unlink = changelog_unlink,
+ .rmdir = changelog_rmdir,
+ .setattr = changelog_setattr,
+ .fsetattr = changelog_fsetattr,
+ .setxattr = changelog_setxattr,
+ .fsetxattr = changelog_fsetxattr,
+ .removexattr = changelog_removexattr,
+ .fremovexattr = changelog_fremovexattr,
+};
+
+struct xlator_cbks cbks = {
+ .forget = changelog_forget,
+};
+
+struct volume_options options[] = {
+ {.key = {"changelog"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "enable/disable change-logging"
+ },
+ {.key = {"changelog-brick"},
+ .type = GF_OPTION_TYPE_PATH,
+ .description = "brick path to generate unique socket file name."
+ " should be the export directory of the volume strictly."
+ },
+ {.key = {"changelog-dir"},
+ .type = GF_OPTION_TYPE_PATH,
+ .description = "directory for the changelog files"
+ },
+ {.key = {"op-mode"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "realtime",
+ .value = {"realtime"},
+ .description = "operation mode - futuristic operation modes"
+ },
+ {.key = {"encoding"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "ascii",
+ .value = {"binary", "ascii"},
+ .description = "encoding type for changelogs"
+ },
+ {.key = {"rollover-time"},
+ .default_value = "60",
+ .type = GF_OPTION_TYPE_TIME,
+ .description = "time to switch to a new changelog file (in seconds)"
+ },
+ {.key = {"fsync-interval"},
+ .type = GF_OPTION_TYPE_TIME,
+ .default_value = "0",
+ .description = "do not open CHANGELOG file with O_SYNC mode."
+ " instead perform fsync() at specified intervals"
+ },
+ {.key = {NULL}
+ },
+};
diff --git a/scheduler/rr/Makefile.am b/xlators/features/compress/Makefile.am
index d471a3f92..a985f42a8 100644
--- a/scheduler/rr/Makefile.am
+++ b/xlators/features/compress/Makefile.am
@@ -1,3 +1,3 @@
SUBDIRS = src
-CLEANFILES =
+CLEANFILES =
diff --git a/xlators/features/compress/src/Makefile.am b/xlators/features/compress/src/Makefile.am
new file mode 100644
index 000000000..4a64b52a9
--- /dev/null
+++ b/xlators/features/compress/src/Makefile.am
@@ -0,0 +1,17 @@
+xlator_LTLIBRARIES = cdc.la
+
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+noinst_HEADERS = cdc.h cdc-mem-types.h
+
+cdc_la_LDFLAGS = -module -avoidversion $(LIBZ_LIBS)
+
+cdc_la_SOURCES = cdc.c cdc-helper.c
+cdc_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D$(GF_HOST_OS) \
+-shared -nostartfiles $(LIBZ_CFLAGS)
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/compress/src/cdc-helper.c b/xlators/features/compress/src/cdc-helper.c
new file mode 100644
index 000000000..54432ff45
--- /dev/null
+++ b/xlators/features/compress/src/cdc-helper.c
@@ -0,0 +1,547 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "logging.h"
+
+#include "cdc.h"
+#include "cdc-mem-types.h"
+
+#ifdef HAVE_LIB_Z
+#include "zlib.h"
+#endif
+
+#ifdef HAVE_LIB_Z
+/* gzip header looks something like this
+ * (RFC 1950)
+ *
+ * +---+---+---+---+---+---+---+---+---+---+
+ * |ID1|ID2|CM |FLG| MTIME |XFL|OS |
+ * +---+---+---+---+---+---+---+---+---+---+
+ *
+ * Data is usually sent without this header i.e
+ * Data sent = <compressed-data> + trailer(8)
+ * The trailer contains the checksum.
+ *
+ * gzip_header is added only during debugging.
+ * Refer to the function cdc_dump_iovec_to_disk
+ */
+static const char gzip_header[10] =
+ {
+ '\037', '\213', Z_DEFLATED, 0,
+ 0, 0, 0, 0,
+ 0, GF_CDC_OS_ID
+ };
+
+static int32_t
+cdc_next_iovec (xlator_t *this, cdc_info_t *ci)
+{
+ int ret = -1;
+
+ ci->ncount++;
+ /* check for iovec overflow -- should not happen */
+ if (ci->ncount == MAX_IOVEC) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Zlib output buffer overflow"
+ " ->ncount (%d) | ->MAX_IOVEC (%d)",
+ ci->ncount, MAX_IOVEC);
+ goto out;
+ }
+
+ ret = 0;
+
+ out:
+ return ret;
+}
+
+static void
+cdc_put_long (unsigned char *string, unsigned long x)
+{
+ string[0] = (unsigned char) (x & 0xff);
+ string[1] = (unsigned char) ((x & 0xff00) >> 8);
+ string[2] = (unsigned char) ((x & 0xff0000) >> 16);
+ string[3] = (unsigned char) ((x & 0xff000000) >> 24);
+}
+
+static unsigned long
+cdc_get_long (unsigned char *buf)
+{
+ return ((unsigned long) buf[0])
+ | (((unsigned long) buf[1]) << 8)
+ | (((unsigned long) buf[2]) << 16)
+ | (((unsigned long) buf[3]) << 24);
+}
+
+static int32_t
+cdc_init_gzip_trailer (xlator_t *this, cdc_priv_t *priv, cdc_info_t *ci)
+{
+ int ret = -1;
+ char *buf = NULL;
+
+ ret = cdc_next_iovec (this, ci);
+ if (ret)
+ goto out;
+
+ buf = CURR_VEC(ci).iov_base =
+ (char *) GF_CALLOC (1, GF_CDC_VALIDATION_SIZE,
+ gf_cdc_mt_gzip_trailer_t);
+
+ if (!CURR_VEC(ci).iov_base)
+ goto out;
+
+ CURR_VEC(ci).iov_len = GF_CDC_VALIDATION_SIZE;
+
+ cdc_put_long ((unsigned char *)&buf[0], ci->crc);
+ cdc_put_long ((unsigned char *)&buf[4], ci->stream.total_in);
+
+ ret = 0;
+
+ out:
+ return ret;
+}
+
+static int32_t
+cdc_alloc_iobuf_and_init_vec (xlator_t *this,
+ cdc_priv_t *priv, cdc_info_t *ci,
+ int size)
+{
+ int ret = -1;
+ int alloc_len = 0;
+ struct iobuf *iobuf = NULL;
+
+ ret = cdc_next_iovec (this, ci);
+ if (ret)
+ goto out;
+
+ alloc_len = size ? size : ci->buffer_size;
+
+ iobuf = iobuf_get2 (this->ctx->iobuf_pool, alloc_len);
+ if (!iobuf)
+ goto out;
+
+ ret = iobref_add (ci->iobref, iobuf);
+ if (ret)
+ goto out;
+
+ /* Initialize this iovec */
+ CURR_VEC(ci).iov_base = iobuf->ptr;
+ CURR_VEC(ci).iov_len = alloc_len;
+
+ ret = 0;
+
+ out:
+ return ret;
+}
+
+static void
+cdc_init_zlib_output_stream (cdc_priv_t *priv, cdc_info_t *ci, int size)
+{
+ ci->stream.next_out = (unsigned char *) CURR_VEC(ci).iov_base;
+ ci->stream.avail_out = size ? size : ci->buffer_size;
+}
+
+/* This routine is for testing and debugging only.
+ * Data written = header(10) + <compressed-data> + trailer(8)
+ * So each gzip dump file is at least 18 bytes in size.
+ */
+void
+cdc_dump_iovec_to_disk (xlator_t *this, cdc_info_t *ci, const char *file)
+{
+ int i = 0;
+ int fd = 0;
+ size_t writen = 0;
+ size_t total_writen = 0;
+
+ fd = open (file, O_WRONLY|O_CREAT|O_TRUNC, 0777 );
+ if (fd < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Cannot open file: %s", file);
+ return;
+ }
+
+ writen = write (fd, (char *) gzip_header, 10);
+ total_writen += writen;
+ for (i = 0; i < ci->ncount; i++) {
+ writen = write (fd, (char *) ci->vec[i].iov_base, ci->vec[i].iov_len);
+ total_writen += writen;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "dump'd %zu bytes to %s", total_writen, GF_CDC_DEBUG_DUMP_FILE );
+
+ close (fd);
+}
+
+static int32_t
+cdc_flush_libz_buffer (cdc_priv_t *priv, xlator_t *this, cdc_info_t *ci,
+ int (*libz_func)(z_streamp, int),
+ int flush)
+{
+ int32_t ret = Z_OK;
+ int done = 0;
+ unsigned int deflate_len = 0;
+
+ for (;;) {
+ deflate_len = ci->buffer_size - ci->stream.avail_out;
+
+ if (deflate_len != 0) {
+ CURR_VEC(ci).iov_len = deflate_len;
+
+ ret = cdc_alloc_iobuf_and_init_vec (this, priv, ci, 0);
+ if (ret) {
+ ret = Z_MEM_ERROR;
+ break;
+ }
+
+ /* Re-position Zlib output buffer */
+ cdc_init_zlib_output_stream (priv, ci, 0);
+ }
+
+ if (done) {
+ ci->ncount--;
+ break;
+ }
+
+ ret = libz_func (&ci->stream, flush);
+
+ if (ret == Z_BUF_ERROR) {
+ ret = Z_OK;
+ ci->ncount--;
+ break;
+ }
+
+ done = (ci->stream.avail_out != 0 || ret == Z_STREAM_END);
+
+ if (ret != Z_OK && ret != Z_STREAM_END)
+ break;
+ }
+
+ return ret;
+}
+
+static int32_t
+do_cdc_compress (struct iovec *vec, xlator_t *this, cdc_priv_t *priv,
+ cdc_info_t *ci)
+{
+ int ret = -1;
+
+ /* Initialize defalte */
+ ret = deflateInit2 (&ci->stream, priv->cdc_level, Z_DEFLATED,
+ priv->window_size, priv->mem_level,
+ Z_DEFAULT_STRATEGY);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "unable to init Zlib (retval: %d)", ret);
+ goto out;
+ }
+
+ ret = cdc_alloc_iobuf_and_init_vec (this, priv, ci, 0);
+ if (ret)
+ goto out;
+
+ /* setup output buffer */
+ cdc_init_zlib_output_stream (priv, ci, 0);
+
+ /* setup input buffer */
+ ci->stream.next_in = (unsigned char *) vec->iov_base;
+ ci->stream.avail_in = vec->iov_len;
+
+ ci->crc = crc32 (ci->crc, (const Bytef *) vec->iov_base, vec->iov_len);
+
+ gf_log (this->name, GF_LOG_DEBUG, "crc=%lu len=%d buffer_size=%d",
+ ci->crc, ci->stream.avail_in, ci->buffer_size);
+
+ /* compress !! */
+ while (ci->stream.avail_in != 0) {
+ if (ci->stream.avail_out == 0) {
+
+ CURR_VEC(ci).iov_len = ci->buffer_size;
+
+ ret = cdc_alloc_iobuf_and_init_vec (this, priv, ci, 0);
+ if (ret)
+ break;
+
+ /* Re-position Zlib output buffer */
+ cdc_init_zlib_output_stream (priv, ci, 0);
+ }
+
+ ret = deflate (&ci->stream, Z_NO_FLUSH);
+ if (ret != Z_OK)
+ break;
+ }
+
+ out:
+ return ret;
+}
+
+int32_t
+cdc_compress (xlator_t *this, cdc_priv_t *priv, cdc_info_t *ci,
+ dict_t **xdata)
+{
+ int ret = -1;
+ int i = 0;
+
+ ci->iobref = iobref_new ();
+ if (!ci->iobref)
+ goto out;
+
+ if (!*xdata) {
+ *xdata = dict_new ();
+ if (!*xdata) {
+ gf_log (this->name, GF_LOG_ERROR, "Cannot allocate xdata"
+ " dict");
+ goto out;
+ }
+ }
+
+ /* data */
+ for (i = 0; i < ci->count; i++) {
+ ret = do_cdc_compress (&ci->vector[i], this, priv, ci);
+ if (ret != Z_OK)
+ goto deflate_cleanup_out;
+ }
+
+ /* flush zlib buffer */
+ ret = cdc_flush_libz_buffer (priv, this, ci, deflate, Z_FINISH);
+ if (!(ret == Z_OK || ret == Z_STREAM_END)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Compression Error: ret (%d)", ret);
+ ret = -1;
+ goto deflate_cleanup_out;
+ }
+
+ /* trailer */
+ ret = cdc_init_gzip_trailer (this, priv, ci);
+ if (ret)
+ goto deflate_cleanup_out;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Compressed %ld to %ld bytes",
+ ci->stream.total_in, ci->stream.total_out);
+
+ ci->nbytes = ci->stream.total_out + GF_CDC_VALIDATION_SIZE;
+
+ /* set deflated canary value for identification */
+ ret = dict_set_int32 (*xdata, GF_CDC_DEFLATE_CANARY_VAL, 1);
+ if (ret) {
+ /* Send uncompressed data if we can't _tell_ the client
+ * that deflated data is on it's way. So, we just log
+ * the faliure and continue as usual.
+ */
+ gf_log (this->name, GF_LOG_ERROR,
+ "Data deflated, but could not set canary"
+ " value in dict for identification");
+ }
+
+ /* This is to be used in testing */
+ if ( priv->debug ) {
+ cdc_dump_iovec_to_disk (this, ci, GF_CDC_DEBUG_DUMP_FILE );
+ }
+
+ deflate_cleanup_out:
+ (void) deflateEnd(&ci->stream);
+
+ out:
+ return ret;
+}
+
+
+/* deflate content is checked by the presence of a canary
+ * value in the dict as the key
+ */
+static int32_t
+cdc_check_content_for_deflate (dict_t *xdata)
+{
+ return dict_get (xdata, GF_CDC_DEFLATE_CANARY_VAL) ? -1 : 0;
+}
+
+static unsigned long
+cdc_extract_crc (char *trailer)
+{
+ return cdc_get_long ((unsigned char *) &trailer[0]);
+}
+
+static unsigned long
+cdc_extract_size (char *trailer)
+{
+ return cdc_get_long ((unsigned char *) &trailer[4]);
+}
+
+static int32_t
+cdc_validate_inflate (cdc_info_t *ci, unsigned long crc,
+ unsigned long len)
+{
+ return !((crc == ci->crc)
+ /* inflated length is hidden inside
+ * Zlib stream struct */
+ && (len == ci->stream.total_out));
+}
+
+static int32_t
+do_cdc_decompress (xlator_t *this, cdc_priv_t *priv, cdc_info_t *ci)
+{
+ int ret = -1;
+ int i = 0;
+ int len = 0;
+ char *inflte = NULL;
+ char *trailer = NULL;
+ struct iovec vec = {0,};
+ unsigned long computed_crc = 0;
+ unsigned long computed_len = 0;
+
+ ret = inflateInit2 (&ci->stream, priv->window_size);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Zlib: Unable to initialize inflate");
+ goto out;
+ }
+
+ vec = THIS_VEC(ci, 0);
+
+ trailer = (char *) (((char *) vec.iov_base) + vec.iov_len
+ - GF_CDC_VALIDATION_SIZE);
+
+ /* CRC of uncompressed data */
+ computed_crc = cdc_extract_crc (trailer);
+
+ /* size of uncomrpessed data */
+ computed_len = cdc_extract_size (trailer);
+
+ gf_log (this->name, GF_LOG_DEBUG, "crc=%lu len=%lu buffer_size=%d",
+ computed_crc, computed_len, ci->buffer_size);
+
+ inflte = vec.iov_base ;
+ len = vec.iov_len - GF_CDC_VALIDATION_SIZE;
+
+ /* allocate buffer of the original length of the data */
+ ret = cdc_alloc_iobuf_and_init_vec (this, priv, ci, 0);
+ if (ret)
+ goto out;
+
+ /* setup output buffer */
+ cdc_init_zlib_output_stream (priv, ci, 0);
+
+ /* setup input buffer */
+ ci->stream.next_in = (unsigned char *) inflte;
+ ci->stream.avail_in = len;
+
+ while (ci->stream.avail_in != 0) {
+ if (ci->stream.avail_out == 0) {
+ CURR_VEC(ci).iov_len = ci->buffer_size;
+
+ ret = cdc_alloc_iobuf_and_init_vec (this, priv, ci, 0);
+ if (ret)
+ break;
+
+ /* Re-position Zlib output buffer */
+ cdc_init_zlib_output_stream (priv, ci, 0);
+ }
+
+ ret = inflate (&ci->stream, Z_NO_FLUSH);
+ if (ret == Z_STREAM_ERROR)
+ break;
+ }
+
+ /* flush zlib buffer */
+ ret = cdc_flush_libz_buffer (priv, this, ci, inflate, Z_SYNC_FLUSH);
+ if (!(ret == Z_OK || ret == Z_STREAM_END)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Decompression Error: ret (%d)", ret);
+ ret = -1;
+ goto out;
+ }
+
+ /* compute CRC of the uncompresses data to check for
+ * correctness */
+
+ for (i = 0; i < ci->ncount; i++) {
+ ci->crc = crc32 (ci->crc,
+ (const Bytef *) ci->vec[i].iov_base,
+ ci->vec[i].iov_len);
+ }
+
+ /* validate inflated data */
+ ret = cdc_validate_inflate (ci, computed_crc, computed_len);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Checksum or length mismatched in inflated data");
+ }
+
+ out:
+ return ret;
+}
+
+int32_t
+cdc_decompress (xlator_t *this, cdc_priv_t *priv, cdc_info_t *ci,
+ dict_t *xdata)
+{
+ int32_t ret = -1;
+
+ /* check for deflate content */
+ if (!cdc_check_content_for_deflate (xdata)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Content not deflated, passing through ...");
+ goto passthrough_out;
+ }
+
+ ci->iobref = iobref_new ();
+ if (!ci->iobref)
+ goto passthrough_out;
+
+ /* do we need to do this? can we assume that one iovec
+ * will hold per request data everytime?
+ *
+ * server/client protocol seems to deal with a single
+ * iovec even if op_ret > 1M. So, it looks ok to
+ * assume that a single iovec will contain all the
+ * data (This saves us a lot from finding the trailer
+ * and the data since it could have been split-up onto
+ * two adjacent iovec's.
+ *
+ * But, in case this translator is loaded above quick-read
+ * for some reason, then it's entirely possible that we get
+ * multiple iovec's...
+ *
+ * This case (handled below) is not tested. (by loading the
+ * xlator below quick-read)
+ */
+
+ /* @@ I_HOPE_THIS_IS_NEVER_HIT */
+ if (ci->count > 1) {
+ gf_log (this->name, GF_LOG_WARNING, "unable to handle"
+ " multiple iovecs (%d in number)", ci->count);
+ goto inflate_cleanup_out;
+ /* TODO: coallate all iovecs in one */
+ }
+
+ ret = do_cdc_decompress (this, priv, ci);
+ if (ret)
+ goto inflate_cleanup_out;
+
+ ci->nbytes = ci->stream.total_out;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Inflated %ld to %ld bytes",
+ ci->stream.total_in, ci->stream.total_out);
+
+ inflate_cleanup_out:
+ (void) inflateEnd (&ci->stream);
+
+ passthrough_out:
+ return ret;
+}
+
+#endif
diff --git a/xlators/features/compress/src/cdc-mem-types.h b/xlators/features/compress/src/cdc-mem-types.h
new file mode 100644
index 000000000..efa008059
--- /dev/null
+++ b/xlators/features/compress/src/cdc-mem-types.h
@@ -0,0 +1,22 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __CDC_MEM_TYPES_H
+#define __CDC_MEM_TYPES_H
+
+#include "mem-types.h"
+
+enum gf_cdc_mem_types {
+ gf_cdc_mt_priv_t = gf_common_mt_end + 1,
+ gf_cdc_mt_vec_t = gf_common_mt_end + 2,
+ gf_cdc_mt_gzip_trailer_t = gf_common_mt_end + 3,
+};
+
+#endif
diff --git a/xlators/features/compress/src/cdc.c b/xlators/features/compress/src/cdc.c
new file mode 100644
index 000000000..eb7d87c56
--- /dev/null
+++ b/xlators/features/compress/src/cdc.c
@@ -0,0 +1,342 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <sys/uio.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include "defaults.h"
+#include "logging.h"
+
+#include "cdc.h"
+#include "cdc-mem-types.h"
+
+static void
+cdc_cleanup_iobref (cdc_info_t *ci)
+{
+ assert(ci->iobref != NULL);
+ iobref_clear (ci->iobref);
+}
+
+int32_t
+cdc_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iovec *vector, int32_t count,
+ struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata)
+{
+ int ret = -1;
+ cdc_priv_t *priv = NULL;
+ cdc_info_t ci = {0,};
+
+ GF_VALIDATE_OR_GOTO ("cdc", this, default_out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, default_out);
+
+ priv = this->private;
+
+ if (op_ret <= 0)
+ goto default_out;
+
+ if ( (priv->min_file_size != 0)
+ && (op_ret < priv->min_file_size) )
+ goto default_out;
+
+ ci.count = count;
+ ci.ibytes = op_ret;
+ ci.vector = vector;
+ ci.buf = NULL;
+ ci.iobref = NULL;
+ ci.ncount = 0;
+ ci.crc = 0;
+ ci.buffer_size = GF_CDC_DEF_BUFFERSIZE;
+
+/* A readv compresses on the server side and decompresses on the client side
+ */
+ if (priv->op_mode == GF_CDC_MODE_SERVER) {
+ ret = cdc_compress (this, priv, &ci, &xdata);
+ } else if (priv->op_mode == GF_CDC_MODE_CLIENT) {
+ ret = cdc_decompress (this, priv, &ci, xdata);
+ } else {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Invalid operation mode (%d)", priv->op_mode);
+ }
+
+ if (ret)
+ goto default_out;
+
+ STACK_UNWIND_STRICT (readv, frame, ci.nbytes, op_errno,
+ ci.vec, ci.ncount, stbuf, iobref,
+ xdata);
+ cdc_cleanup_iobref (&ci);
+ return 0;
+
+ default_out:
+ STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno,
+ vector, count, stbuf, iobref, xdata);
+ return 0;
+}
+
+int32_t
+cdc_readv (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t offset, uint32_t flags,
+ dict_t *xdata)
+{
+ fop_readv_cbk_t cbk = NULL;
+
+#ifdef HAVE_LIB_Z
+ cbk = cdc_readv_cbk;
+#else
+ cbk = default_readv_cbk;
+#endif
+ STACK_WIND (frame, cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv,
+ fd, size, offset, flags, xdata);
+ return 0;
+}
+
+int32_t
+cdc_writev_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
+}
+
+int32_t
+cdc_writev (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ struct iovec *vector,
+ int32_t count,
+ off_t offset,
+ uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
+{
+ int ret = -1;
+ cdc_priv_t *priv = NULL;
+ cdc_info_t ci = {0,};
+ size_t isize = 0;
+
+ GF_VALIDATE_OR_GOTO ("cdc", this, default_out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, default_out);
+
+ priv = this->private;
+
+ isize = iov_length(vector, count);
+
+ if (isize <= 0)
+ goto default_out;
+
+ if ( (priv->min_file_size != 0)
+ && (isize < priv->min_file_size) )
+ goto default_out;
+
+ ci.count = count;
+ ci.ibytes = isize;
+ ci.vector = vector;
+ ci.buf = NULL;
+ ci.iobref = NULL;
+ ci.ncount = 0;
+ ci.crc = 0;
+ ci.buffer_size = GF_CDC_DEF_BUFFERSIZE;
+
+/* A writev compresses on the client side and decompresses on the server side
+ */
+ if (priv->op_mode == GF_CDC_MODE_CLIENT) {
+ ret = cdc_compress (this, priv, &ci, &xdata);
+ } else if (priv->op_mode == GF_CDC_MODE_SERVER) {
+ ret = cdc_decompress (this, priv, &ci, xdata);
+ } else {
+ gf_log (this->name, GF_LOG_ERROR, "Invalid operation mode (%d) ", priv->op_mode);
+ }
+
+ if (ret)
+ goto default_out;
+
+ STACK_WIND (frame,
+ cdc_writev_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->writev,
+ fd, ci.vec, ci.ncount, offset, flags,
+ iobref, xdata);
+
+ cdc_cleanup_iobref (&ci);
+ return 0;
+
+ default_out:
+ STACK_WIND (frame,
+ cdc_writev_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->writev,
+ fd, vector, count, offset, flags,
+ iobref, xdata);
+ return 0;
+}
+
+int32_t
+init (xlator_t *this)
+{
+ int ret = -1;
+ char *temp_str = NULL;
+ cdc_priv_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO ("cdc", this, err);
+
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Need subvolume == 1");
+ goto err;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Dangling volume. Check volfile");
+ }
+
+ priv = GF_CALLOC (1, sizeof (*priv), gf_cdc_mt_priv_t);
+ if (!priv) {
+ goto err;
+ }
+
+ /* Check if debug mode is turned on */
+ GF_OPTION_INIT ("debug", priv->debug, bool, err);
+ if( priv->debug ) {
+ gf_log (this->name, GF_LOG_DEBUG, "CDC debug option turned on");
+ }
+
+ /* Set Gzip Window Size */
+ GF_OPTION_INIT ("window-size", priv->window_size, int32, err);
+ if ( (priv->window_size > GF_CDC_MAX_WINDOWSIZE)
+ || (priv->window_size < GF_CDC_DEF_WINDOWSIZE) ) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Invalid gzip window size (%d), using default",
+ priv->window_size);
+ priv->window_size = GF_CDC_DEF_WINDOWSIZE;
+ }
+
+ /* Set Gzip (De)Compression Level */
+ GF_OPTION_INIT ("compression-level", priv->cdc_level, int32, err);
+ if ( ((priv->cdc_level < 1) || (priv->cdc_level > 9))
+ && (priv->cdc_level != GF_CDC_DEF_COMPRESSION) ) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Invalid gzip (de)compression level (%d),"
+ " using default", priv->cdc_level);
+ priv->cdc_level = GF_CDC_DEF_COMPRESSION;
+ }
+
+ /* Set Gzip Memory Level */
+ GF_OPTION_INIT ("mem-level", priv->mem_level, int32, err);
+ if ( (priv->mem_level < 1) || (priv->mem_level > 9) ) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Invalid gzip memory level, using the default");
+ priv->mem_level = GF_CDC_DEF_MEMLEVEL;
+ }
+
+ /* Set min file size to enable compression */
+ GF_OPTION_INIT ("min-size", priv->min_file_size, int32, err);
+
+ /* Mode of operation - Server/Client */
+ ret = dict_get_str (this->options, "mode", &temp_str);
+ if (ret) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Operation mode not specified !!");
+ goto err;
+ }
+
+ if (GF_CDC_MODE_IS_CLIENT (temp_str)) {
+ priv->op_mode = GF_CDC_MODE_CLIENT;
+ } else if (GF_CDC_MODE_IS_SERVER (temp_str)) {
+ priv->op_mode = GF_CDC_MODE_SERVER;
+ } else {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Bogus operation mode (%s) specified", temp_str);
+ goto err;
+ }
+
+ this->private = priv;
+ gf_log (this->name, GF_LOG_DEBUG, "CDC xlator loaded in (%s) mode",temp_str);
+ return 0;
+
+ err:
+ if (priv)
+ GF_FREE (priv);
+
+ return -1;
+}
+
+void
+fini (xlator_t *this)
+{
+ cdc_priv_t *priv = this->private;
+
+ if (priv)
+ GF_FREE (priv);
+ this->private = NULL;
+ return;
+}
+
+struct xlator_fops fops = {
+ .readv = cdc_readv,
+ .writev = cdc_writev,
+};
+
+struct xlator_cbks cbks = {
+};
+
+struct volume_options options[] = {
+ { .key = {"window-size"},
+ .default_value = "-15",
+ .type = GF_OPTION_TYPE_INT,
+ .description = "Size of the zlib history buffer."
+ },
+ { .key = {"mem-level"},
+ .default_value = "8",
+ .type = GF_OPTION_TYPE_INT,
+ .description = "Memory allocated for internal compression state.\
+ 1 uses minimum memory but is slow and reduces \
+ compression ratio; memLevel=9 uses maximum memory \
+ for optimal speed. The default value is 8."
+ },
+ { .key = {"compression-level"},
+ .default_value = "-1",
+ .type = GF_OPTION_TYPE_INT,
+ .description = "Compression levels \
+ 0 : no compression, 1 : best speed, \
+ 9 : best compression, -1 : default compression "
+ },
+ { .key = {"min-size"},
+ .default_value = "0",
+ .type = GF_OPTION_TYPE_INT,
+ .description = "Data is compressed only when its size exceeds this."
+ },
+ { .key = {"mode"},
+ .value = {"server", "client"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "Set on the basis of where the xlator is loaded."
+ },
+ { .key = {"debug"},
+ .default_value = "false",
+ .type = GF_OPTION_TYPE_BOOL,
+ .description = "This is used in testing. Will dump compressed data \
+ to disk as a gzip file."
+ },
+ { .key = {NULL}
+ },
+};
diff --git a/xlators/features/compress/src/cdc.h b/xlators/features/compress/src/cdc.h
new file mode 100644
index 000000000..71f4d2317
--- /dev/null
+++ b/xlators/features/compress/src/cdc.h
@@ -0,0 +1,107 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __CDC_H
+#define __CDC_H
+
+#ifdef HAVE_LIB_Z
+#include "zlib.h"
+#endif
+
+#include "xlator.h"
+
+#ifndef MAX_IOVEC
+#define MAX_IOVEC 16
+#endif
+
+typedef struct cdc_priv {
+ int window_size;
+ int mem_level;
+ int cdc_level;
+ int min_file_size;
+ int op_mode;
+ gf_boolean_t debug;
+ gf_lock_t lock;
+} cdc_priv_t;
+
+typedef struct cdc_info {
+ /* input bits */
+ int count;
+ int32_t ibytes;
+ struct iovec *vector;
+ struct iatt *buf;
+
+ /* output bits */
+ int ncount;
+ int nbytes;
+ int buffer_size;
+ struct iovec vec[MAX_IOVEC];
+ struct iobref *iobref;
+
+ /* zlib bits */
+#ifdef HAVE_LIB_Z
+ z_stream stream;
+#endif
+ unsigned long crc;
+} cdc_info_t;
+
+#define NVEC(ci) (ci->ncount - 1)
+#define CURR_VEC(ci) ci->vec[ci->ncount - 1]
+#define THIS_VEC(ci, i) ci->vector[i]
+
+/* Gzip defaults */
+#define GF_CDC_DEF_WINDOWSIZE -15 /* default value */
+#define GF_CDC_MAX_WINDOWSIZE -8 /* max value */
+
+#ifdef HAVE_LIB_Z
+#define GF_CDC_DEF_COMPRESSION Z_DEFAULT_COMPRESSION
+#else
+#define GF_CDC_DEF_COMPRESSION -1
+#endif
+
+#define GF_CDC_DEF_MEMLEVEL 8
+#define GF_CDC_DEF_BUFFERSIZE 262144 // 256K - default compression buffer size
+
+/* Operation mode
+ * If xlator is loaded on client, readv decompresses and writev compresses
+ * If xlator is loaded on server, readv compresses and writev decompresses
+ */
+#define GF_CDC_MODE_CLIENT 0
+#define GF_CDC_MODE_SERVER 1
+
+/* min size of data to do cmpression
+ * 0 == compress even 1byte
+ */
+#define GF_CDC_MIN_CHUNK_SIZE 0
+
+#define GF_CDC_VALIDATION_SIZE 8
+
+#define GF_CDC_OS_ID 0xFF
+#define GF_CDC_DEFLATE_CANARY_VAL "deflate"
+#define GF_CDC_DEBUG_DUMP_FILE "/tmp/cdcdump.gz"
+
+#define GF_CDC_MODE_IS_CLIENT(m) \
+ (strcmp (m, "client") == 0)
+
+#define GF_CDC_MODE_IS_SERVER(m) \
+ (strcmp (m, "server") == 0)
+
+int32_t
+cdc_compress (xlator_t *this,
+ cdc_priv_t *priv,
+ cdc_info_t *ci,
+ dict_t **xdata);
+int32_t
+cdc_decompress (xlator_t *this,
+ cdc_priv_t *priv,
+ cdc_info_t *ci,
+ dict_t *xdata);
+
+#endif
diff --git a/xlators/features/filter/src/Makefile.am b/xlators/features/filter/src/Makefile.am
index cda5f0767..d1fda8b0a 100644
--- a/xlators/features/filter/src/Makefile.am
+++ b/xlators/features/filter/src/Makefile.am
@@ -1,13 +1,16 @@
xlator_LTLIBRARIES = filter.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/testing/features
-filter_la_LDFLAGS = -module -avoidversion
+filter_la_LDFLAGS = -module -avoid-version
filter_la_SOURCES = filter.c
filter_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+noinst_HEADERS = filter-mem-types.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/features/filter/src/filter-mem-types.h b/xlators/features/filter/src/filter-mem-types.h
new file mode 100644
index 000000000..47a17249b
--- /dev/null
+++ b/xlators/features/filter/src/filter-mem-types.h
@@ -0,0 +1,20 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef __FILTER_MEM_TYPES_H__
+#define __FILTER_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_filter_mem_types_ {
+ gf_filter_mt_gf_filter = gf_common_mt_end + 1,
+ gf_filter_mt_end
+};
+#endif
+
diff --git a/xlators/features/filter/src/filter.c b/xlators/features/filter/src/filter.c
index c9cd8dbfa..1d4887b71 100644
--- a/xlators/features/filter/src/filter.c
+++ b/xlators/features/filter/src/filter.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -30,6 +20,7 @@
#include "logging.h"
#include "dict.h"
#include "xlator.h"
+#include "filter-mem-types.h"
#define GF_FILTER_NOBODY_UID 65534
#define GF_FILTER_NOBODY_GID 65534
@@ -159,36 +150,36 @@ update_frame (call_frame_t *frame,
/* if 'root' don't change the uid/gid */
static int32_t
-update_stat (struct stat *stbuf,
+update_stat (struct iatt *stbuf,
struct gf_filter *filter)
{
int32_t idx = 0;
for (idx = 0; idx < filter->translate_num_uid_entries; idx++) {
- if (stbuf->st_uid == GF_FILTER_ROOT_UID)
+ if (stbuf->ia_uid == GF_FILTER_ROOT_UID)
continue;
- if ((stbuf->st_uid >= filter->translate_input_uid[idx][0]) &&
- (stbuf->st_uid <= filter->translate_input_uid[idx][1])) {
- stbuf->st_uid = filter->translate_output_uid[idx];
+ if ((stbuf->ia_uid >= filter->translate_input_uid[idx][0]) &&
+ (stbuf->ia_uid <= filter->translate_input_uid[idx][1])) {
+ stbuf->ia_uid = filter->translate_output_uid[idx];
break;
}
}
for (idx = 0; idx < filter->translate_num_gid_entries; idx++) {
- if (stbuf->st_gid == GF_FILTER_ROOT_GID)
+ if (stbuf->ia_gid == GF_FILTER_ROOT_GID)
continue;
- if ((stbuf->st_gid >= filter->translate_input_gid[idx][0]) &&
- (stbuf->st_gid <= filter->translate_input_gid[idx][1])) {
- stbuf->st_gid = filter->translate_output_gid[idx];
+ if ((stbuf->ia_gid >= filter->translate_input_gid[idx][0]) &&
+ (stbuf->ia_gid <= filter->translate_input_gid[idx][1])) {
+ stbuf->ia_gid = filter->translate_output_gid[idx];
break;
}
}
if (filter->fixed_uid_set) {
- stbuf->st_uid = filter->fixed_uid;
+ stbuf->ia_uid = filter->fixed_uid;
}
if (filter->fixed_gid_set) {
- stbuf->st_gid = filter->fixed_gid;
+ stbuf->ia_gid = filter->fixed_gid;
}
return 0;
@@ -201,19 +192,22 @@ filter_lookup_cbk (call_frame_t *frame,
int32_t op_ret,
int32_t op_errno,
inode_t *inode,
- struct stat *buf,
- dict_t *dict)
+ struct iatt *buf,
+ dict_t *dict,
+ struct iatt *postparent)
{
int ret = 0;
if (op_ret >= 0) {
update_stat (buf, this->private);
- ret = inode_ctx_put (inode, this, (uint64_t)(long)buf->st_uid);
+ ret = inode_ctx_put (inode, this, (uint64_t)(long)buf->ia_uid);
if (ret == -1) {
gf_log (this->name, GF_LOG_ERROR,
"couldn't set context");
}
+
+ update_stat (postparent, this->private);
}
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf, dict);
+ STACK_UNWIND (frame, op_ret, op_errno, inode, buf, dict, postparent);
return 0;
}
@@ -239,7 +233,7 @@ filter_stat_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- struct stat *buf)
+ struct iatt *buf)
{
if (op_ret >= 0) {
update_stat (buf, this->private);
@@ -262,25 +256,28 @@ filter_stat (call_frame_t *frame,
}
static int32_t
-filter_chmod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+filter_setattr_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *preop,
+ struct iatt *postop)
{
if (op_ret >= 0) {
- update_stat (buf, this->private);
+ update_stat (preop, this->private);
+ update_stat (postop, this->private);
}
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ STACK_UNWIND (frame, op_ret, op_errno, preop, postop);
return 0;
}
int32_t
-filter_chmod (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode)
+filter_setattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ struct iatt *stbuf,
+ int32_t valid)
{
int32_t ret = 0;
ret = update_frame (frame, loc->inode, this->private);
@@ -291,147 +288,65 @@ filter_chmod (call_frame_t *frame,
case GF_FILTER_MAP_BOTH:
if (loc->inode->st_mode & S_IWOTH)
break;
- gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path);
- STACK_UNWIND (frame, -1, EPERM, NULL);
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s: returning permission denied", loc->path);
+ STACK_UNWIND (frame, -1, EPERM, NULL, NULL, NULL);
return 0;
-
+
case GF_FILTER_FILTER_UID:
case GF_FILTER_FILTER_GID:
case GF_FILTER_RO_FS:
- STACK_UNWIND (frame, -1, EROFS, NULL);
+ STACK_UNWIND (frame, -1, EROFS, NULL, NULL);
return 0;
default:
break;
}
STACK_WIND (frame,
- filter_chmod_cbk,
+ filter_setattr_cbk,
FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->chmod,
+ FIRST_CHILD(this)->fops->setattr,
loc,
- mode);
+ stbuf, valid);
return 0;
}
-
static int32_t
-filter_fchmod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+filter_fsetattr_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *preop,
+ struct iatt *postop)
{
if (op_ret >= 0) {
- update_stat (buf, this->private);
+ update_stat (preop, this->private);
+ update_stat (postop, this->private);
}
STACK_UNWIND (frame,
op_ret,
op_errno,
- buf);
+ preop, postop);
return 0;
}
-int32_t
-filter_fchmod (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- mode_t mode)
+int32_t
+filter_fsetattr (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ struct iatt *stbuf,
+ int32_t valid)
{
STACK_WIND (frame,
- filter_fchmod_cbk,
+ filter_fsetattr_cbk,
FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fchmod,
+ FIRST_CHILD(this)->fops->fsetattr,
fd,
- mode);
- return 0;
-}
-
-static int32_t
-filter_chown_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- if (op_ret >= 0) {
- update_stat (buf, this->private);
- }
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
-
-int32_t
-filter_chown (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- uid_t uid,
- gid_t gid)
-{
- int32_t ret = 0;
- ret = update_frame (frame, loc->inode, this->private);
- switch (ret) {
- case GF_FILTER_MAP_UID:
- if (loc->inode->st_mode & S_IWGRP)
- break;
- case GF_FILTER_MAP_BOTH:
- if (loc->inode->st_mode & S_IWOTH)
- break;
- gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path);
- STACK_UNWIND (frame, -1, EPERM, NULL);
- return 0;
-
- case GF_FILTER_FILTER_UID:
- case GF_FILTER_FILTER_GID:
- case GF_FILTER_RO_FS:
- STACK_UNWIND (frame, -1, EROFS, NULL);
- return 0;
- default:
- break;
- }
-
- STACK_WIND (frame,
- filter_chown_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->chown,
- loc,
- uid,
- gid);
+ stbuf, valid);
return 0;
}
-static int32_t
-filter_fchown_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- if (op_ret >= 0) {
- update_stat (buf, this->private);
- }
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
-
-int32_t
-filter_fchown (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- uid_t uid,
- gid_t gid)
-{
- STACK_WIND (frame,
- filter_fchown_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fchown,
- fd,
- uid,
- gid);
- return 0;
-}
static int32_t
filter_truncate_cbk (call_frame_t *frame,
@@ -439,12 +354,14 @@ filter_truncate_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- struct stat *buf)
+ struct iatt *prebuf,
+ struct iatt *postbuf)
{
if (op_ret >= 0) {
- update_stat (buf, this->private);
+ update_stat (prebuf, this->private);
+ update_stat (postbuf, this->private);
}
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ STACK_UNWIND (frame, op_ret, op_errno, prebuf, postbuf);
return 0;
}
@@ -464,13 +381,13 @@ filter_truncate (call_frame_t *frame,
if (loc->inode->st_mode & S_IWOTH)
break;
gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path);
- STACK_UNWIND (frame, -1, EPERM, NULL);
+ STACK_UNWIND (frame, -1, EPERM, NULL, NULL);
return 0;
case GF_FILTER_FILTER_UID:
case GF_FILTER_FILTER_GID:
case GF_FILTER_RO_FS:
- STACK_UNWIND (frame, -1, EROFS, NULL);
+ STACK_UNWIND (frame, -1, EROFS, NULL, NULL);
return 0;
}
@@ -489,12 +406,14 @@ filter_ftruncate_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- struct stat *buf)
+ struct iatt *prebuf,
+ struct iatt *postbuf)
{
if (op_ret >= 0) {
- update_stat (buf, this->private);
+ update_stat (prebuf, this->private);
+ update_stat (postbuf, this->private);
}
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ STACK_UNWIND (frame, op_ret, op_errno, prebuf, postbuf);
return 0;
}
@@ -513,56 +432,6 @@ filter_ftruncate (call_frame_t *frame,
return 0;
}
-int32_t
-filter_utimens_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- if (op_ret >= 0) {
- update_stat (buf, this->private);
- }
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
-
-
-int32_t
-filter_utimens (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- struct timespec tv[2])
-{
- int32_t ret = 0;
- ret = update_frame (frame, loc->inode, this->private);
- switch (ret) {
- case GF_FILTER_MAP_UID:
- if (loc->inode->st_mode & S_IWGRP)
- break;
- case GF_FILTER_MAP_BOTH:
- if (loc->inode->st_mode & S_IWOTH)
- break;
- gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path);
- STACK_UNWIND (frame, -1, EPERM, NULL);
- return 0;
-
- case GF_FILTER_FILTER_UID:
- case GF_FILTER_FILTER_GID:
- case GF_FILTER_RO_FS:
- STACK_UNWIND (frame, -1, EROFS, NULL);
- return 0;
- }
-
- STACK_WIND (frame,
- filter_utimens_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->utimens,
- loc,
- tv);
- return 0;
-}
static int32_t
filter_readlink_cbk (call_frame_t *frame,
@@ -570,9 +439,13 @@ filter_readlink_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- const char *path)
+ const char *path,
+ struct iatt *sbuf)
{
- STACK_UNWIND (frame, op_ret, op_errno, path);
+ if (op_ret >= 0)
+ update_stat (sbuf, this->private);
+
+ STACK_UNWIND (frame, op_ret, op_errno, path, sbuf);
return 0;
}
@@ -612,19 +485,25 @@ filter_mknod_cbk (call_frame_t *frame,
int32_t op_ret,
int32_t op_errno,
inode_t *inode,
- struct stat *buf)
+ struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent)
{
int ret = 0;
if (op_ret >= 0) {
update_stat (buf, this->private);
- ret = inode_ctx_put (inode, this, (uint64_t)(long)buf->st_uid);
+ ret = inode_ctx_put (inode, this, (uint64_t)(long)buf->ia_uid);
if (ret == -1) {
gf_log (this->name, GF_LOG_ERROR,
"couldn't set context");
}
+
+ update_stat (preparent, this->private);
+ update_stat (postparent, this->private);
}
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
+ STACK_UNWIND (frame, op_ret, op_errno, inode, buf,
+ preparent, postparent);
return 0;
}
@@ -646,13 +525,15 @@ filter_mknod (call_frame_t *frame,
if (parent->st_mode & S_IWOTH)
break;
gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path);
- STACK_UNWIND (frame, -1, EPERM);
+ STACK_UNWIND (frame, -1, EPERM, NULL, NULL,
+ NULL, NULL);
return 0;
case GF_FILTER_FILTER_UID:
case GF_FILTER_FILTER_GID:
case GF_FILTER_RO_FS:
- STACK_UNWIND (frame, -1, EROFS, NULL, NULL);
+ STACK_UNWIND (frame, -1, EROFS, NULL, NULL,
+ NULL, NULL);
return 0;
}
STACK_WIND (frame,
@@ -670,18 +551,24 @@ filter_mkdir_cbk (call_frame_t *frame,
int32_t op_ret,
int32_t op_errno,
inode_t *inode,
- struct stat *buf)
+ struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent)
{
int ret = 0;
if (op_ret >= 0) {
update_stat (buf, this->private);
- ret = inode_ctx_put (inode, this, (uint64_t)(long)buf->st_uid);
+ ret = inode_ctx_put (inode, this, (uint64_t)(long)buf->ia_uid);
if (ret == -1) {
gf_log (this->name, GF_LOG_ERROR,
"couldn't set context");
}
+
+ update_stat (preparent, this->private);
+ update_stat (postparent, this->private);
}
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
+ STACK_UNWIND (frame, op_ret, op_errno, inode, buf,
+ preparent, postparent);
return 0;
}
@@ -702,13 +589,15 @@ filter_mkdir (call_frame_t *frame,
if (parent->st_mode & S_IWOTH)
break;
gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path);
- STACK_UNWIND (frame, -1, EPERM);
+ STACK_UNWIND (frame, -1, EPERM, NULL, NULL,
+ NULL, NULL);
return 0;
case GF_FILTER_FILTER_UID:
case GF_FILTER_FILTER_GID:
case GF_FILTER_RO_FS:
- STACK_UNWIND (frame, -1, EROFS, NULL, NULL);
+ STACK_UNWIND (frame, -1, EROFS, NULL, NULL,
+ NULL, NULL);
return 0;
}
STACK_WIND (frame,
@@ -724,9 +613,16 @@ filter_unlink_cbk (call_frame_t *frame,
void *cookie,
xlator_t *this,
int32_t op_ret,
- int32_t op_errno)
+ int32_t op_errno,
+ struct iatt *preparent,
+ struct iatt *postparent)
{
- STACK_UNWIND (frame, op_ret, op_errno);
+ if (op_ret >= 0) {
+ update_stat (preparent, this->private);
+ update_stat (postparent, this->private);
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, preparent, postparent);
return 0;
}
@@ -752,12 +648,12 @@ filter_unlink (call_frame_t *frame,
if (loc->inode->st_mode & S_IWOTH)
break;
gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path);
- STACK_UNWIND (frame, -1, EPERM);
+ STACK_UNWIND (frame, -1, EPERM, NULL, NULL);
return 0;
case GF_FILTER_FILTER_UID:
case GF_FILTER_FILTER_GID:
case GF_FILTER_RO_FS:
- STACK_UNWIND (frame, -1, EROFS);
+ STACK_UNWIND (frame, -1, EROFS, NULL, NULL);
return 0;
}
STACK_WIND (frame,
@@ -773,9 +669,16 @@ filter_rmdir_cbk (call_frame_t *frame,
void *cookie,
xlator_t *this,
int32_t op_ret,
- int32_t op_errno)
+ int32_t op_errno,
+ struct iatt *preparent,
+ struct iatt *postparent)
{
- STACK_UNWIND (frame, op_ret, op_errno);
+ if (op_ret >= 0) {
+ update_stat (preparent, this->private);
+ update_stat (postparent, this->private);
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, preparent, postparent);
return 0;
}
@@ -801,13 +704,13 @@ filter_rmdir (call_frame_t *frame,
if (loc->inode->st_mode & S_IWOTH)
break;
gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path);
- STACK_UNWIND (frame, -1, EPERM);
+ STACK_UNWIND (frame, -1, EPERM, NULL, NULL);
return 0;
case GF_FILTER_FILTER_UID:
case GF_FILTER_FILTER_GID:
case GF_FILTER_RO_FS:
- STACK_UNWIND (frame, -1, EROFS);
- return 0;
+ STACK_UNWIND (frame, -1, EROFS, NULL, NULL);
+ return 0;
}
STACK_WIND (frame,
filter_rmdir_cbk,
@@ -824,18 +727,24 @@ filter_symlink_cbk (call_frame_t *frame,
int32_t op_ret,
int32_t op_errno,
inode_t *inode,
- struct stat *buf)
+ struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent)
{
int ret = 0;
if (op_ret >= 0) {
update_stat (buf, this->private);
- ret = inode_ctx_put (inode, this, (uint64_t)(long)buf->st_uid);
+ ret = inode_ctx_put (inode, this, (uint64_t)(long)buf->ia_uid);
if (ret == -1) {
gf_log (this->name, GF_LOG_ERROR,
"couldn't set context");
}
+
+ update_stat (preparent, this->private);
+ update_stat (postparent, this->private);
}
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
+ STACK_UNWIND (frame, op_ret, op_errno, inode, buf,
+ preparent, postparent);
return 0;
}
@@ -856,13 +765,15 @@ filter_symlink (call_frame_t *frame,
if (parent->st_mode & S_IWOTH)
break;
gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path);
- STACK_UNWIND (frame, -1, EPERM);
+ STACK_UNWIND (frame, -1, EPERM, NULL, NULL,
+ NULL, NULL);
return 0;
case GF_FILTER_FILTER_UID:
case GF_FILTER_FILTER_GID:
case GF_FILTER_RO_FS:
- STACK_UNWIND (frame, -1, EROFS, NULL, NULL);
+ STACK_UNWIND (frame, -1, EROFS, NULL, NULL,
+ NULL, NULL);
return 0;
}
STACK_WIND (frame,
@@ -880,12 +791,25 @@ filter_rename_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- struct stat *buf)
+ struct iatt *buf,
+ struct iatt *preoldparent,
+ struct iatt *postoldparent,
+ struct iatt *prenewparent,
+ struct iatt *postnewparent)
{
if (op_ret >= 0) {
update_stat (buf, this->private);
+
+ update_stat (preoldparent, this->private);
+ update_stat (postoldparent, this->private);
+
+ update_stat (prenewparent, this->private);
+ update_stat (postnewparent, this->private);
}
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+
+ STACK_UNWIND (frame, op_ret, op_errno, buf,
+ preoldparent, postoldparent,
+ prenewparent, postnewparent);
return 0;
}
@@ -913,13 +837,17 @@ filter_rename (call_frame_t *frame,
break;
gf_log (this->name, GF_LOG_DEBUG,
"%s -> %s: returning permission denied", oldloc->path, newloc->path);
- STACK_UNWIND (frame, -1, EPERM);
+ STACK_UNWIND (frame, -1, EPERM, NULL,
+ NULL, NULL,
+ NULL, NULL);
return 0;
case GF_FILTER_FILTER_UID:
case GF_FILTER_FILTER_GID:
case GF_FILTER_RO_FS:
- STACK_UNWIND (frame, -1, EROFS, NULL);
+ STACK_UNWIND (frame, -1, EROFS, NULL,
+ NULL, NULL,
+ NULL, NULL);
return 0;
}
STACK_WIND (frame,
@@ -938,18 +866,24 @@ filter_link_cbk (call_frame_t *frame,
int32_t op_ret,
int32_t op_errno,
inode_t *inode,
- struct stat *buf)
+ struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent)
{
int ret = 0;
if (op_ret >= 0) {
update_stat (buf, this->private);
- ret = inode_ctx_put (inode, this, (uint64_t)(long)buf->st_uid);
+ ret = inode_ctx_put (inode, this, (uint64_t)(long)buf->ia_uid);
if (ret == -1) {
gf_log (this->name, GF_LOG_ERROR,
"couldn't set context");
}
+
+ update_stat (preparent, this->private);
+ update_stat (postparent, this->private);
}
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
+ STACK_UNWIND (frame, op_ret, op_errno, inode, buf,
+ preparent, postparent);
return 0;
}
@@ -965,7 +899,8 @@ filter_link (call_frame_t *frame,
case GF_FILTER_FILTER_UID:
case GF_FILTER_FILTER_GID:
case GF_FILTER_RO_FS:
- STACK_UNWIND (frame, -1, EROFS, NULL, NULL);
+ STACK_UNWIND (frame, -1, EROFS, NULL, NULL,
+ NULL, NULL);
return 0;
}
STACK_WIND (frame,
@@ -985,18 +920,23 @@ filter_create_cbk (call_frame_t *frame,
int32_t op_errno,
fd_t *fd,
inode_t *inode,
- struct stat *buf)
+ struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent)
{
int ret = 0;
if (op_ret >= 0) {
update_stat (buf, this->private);
- ret = inode_ctx_put (inode, this, (uint64_t)(long)buf->st_uid);
+ ret = inode_ctx_put (inode, this, (uint64_t)(long)buf->ia_uid);
if (ret == -1) {
gf_log (this->name, GF_LOG_ERROR,
"couldn't set context");
}
+ update_stat (preparent, this->private);
+ update_stat (postparent, this->private);
}
- STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf);
+ STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent);
return 0;
}
@@ -1018,13 +958,15 @@ filter_create (call_frame_t *frame,
if (parent->st_mode & S_IWOTH)
break;
gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path);
- STACK_UNWIND (frame, -1, EPERM);
+ STACK_UNWIND (frame, -1, EPERM, NULL, NULL, NULL,
+ NULL, NULL);
return 0;
case GF_FILTER_FILTER_UID:
case GF_FILTER_FILTER_GID:
case GF_FILTER_RO_FS:
- STACK_UNWIND (frame, -1, EROFS, NULL, NULL, NULL);
+ STACK_UNWIND (frame, -1, EROFS, NULL, NULL, NULL,
+ NULL, NULL);
return 0;
}
STACK_WIND (frame, filter_create_cbk,
@@ -1051,7 +993,8 @@ filter_open (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
int32_t flags,
- fd_t *fd)
+ fd_t *fd,
+ int32_t wbflags)
{
int32_t ret = 0;
ret = update_frame (frame, loc->inode, this->private);
@@ -1089,7 +1032,7 @@ filter_open (call_frame_t *frame,
filter_open_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->open,
- loc, flags, fd);
+ loc, flags, fd, wbflags);
return 0;
}
@@ -1101,7 +1044,7 @@ filter_readv_cbk (call_frame_t *frame,
int32_t op_errno,
struct iovec *vector,
int32_t count,
- struct stat *stbuf,
+ struct iatt *stbuf,
struct iobref *iobref)
{
if (op_ret >= 0) {
@@ -1141,15 +1084,18 @@ filter_writev_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- struct stat *stbuf)
+ struct iatt *prebuf,
+ struct iatt *postbuf)
{
if (op_ret >= 0) {
- update_stat (stbuf, this->private);
+ update_stat (prebuf, this->private);
+ update_stat (postbuf, this->private);
}
STACK_UNWIND (frame,
op_ret,
op_errno,
- stbuf);
+ prebuf,
+ postbuf);
return 0;
}
@@ -1168,7 +1114,7 @@ filter_writev (call_frame_t *frame,
case GF_FILTER_FILTER_UID:
case GF_FILTER_FILTER_GID:
case GF_FILTER_RO_FS:
- STACK_UNWIND (frame, -1, EROFS, NULL);
+ STACK_UNWIND (frame, -1, EROFS, NULL, NULL);
return 0;
}
@@ -1190,7 +1136,7 @@ filter_fstat_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- struct stat *buf)
+ struct iatt *buf)
{
if (op_ret >= 0) {
update_stat (buf, this->private);
@@ -1400,6 +1346,25 @@ filter_removexattr (call_frame_t *frame,
return 0;
}
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_filter_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
+ "failed");
+ return ret;
+ }
+
+ return ret;
+}
+
int32_t
init (xlator_t *this)
{
@@ -1429,7 +1394,7 @@ init (xlator_t *this)
"dangling volume. check volfile ");
}
- filter = CALLOC (sizeof (*filter), 1);
+ filter = GF_CALLOC (sizeof (*filter), 1, gf_filter_mt_gf_filter);
ERR_ABORT (filter);
if (dict_get (this->options, "read-only")) {
@@ -1464,11 +1429,11 @@ init (xlator_t *this)
option_data = dict_get (this->options, "translate-uid");
value = strtok_r (option_data->data, ",", &tmp_str);
while (value) {
- dup_str = strdup (value);
+ dup_str = gf_strdup (value);
input_value_str1 = strtok_r (dup_str, "=", &tmp_str1);
if (input_value_str1) {
/* Check for n-m */
- char *temp_string = strdup (input_value_str1);
+ char *temp_string = gf_strdup (input_value_str1);
input_value_str2 = strtok_r (temp_string, "-", &tmp_str2);
if (gf_string2int (input_value_str2, &input_value) != 0) {
gf_log (this->name, GF_LOG_ERROR,
@@ -1487,7 +1452,7 @@ init (xlator_t *this)
}
}
filter->translate_input_uid[filter->translate_num_uid_entries][1] = input_value;
- FREE (temp_string);
+ GF_FREE (temp_string);
output_value_str = strtok_r (NULL, "=", &tmp_str1);
if (output_value_str) {
if (gf_string2int (output_value_str, &output_value) != 0) {
@@ -1516,7 +1481,7 @@ init (xlator_t *this)
if (filter->translate_num_uid_entries == GF_MAXIMUM_FILTERING_ALLOWED)
break;
value = strtok_r (NULL, ",", &tmp_str);
- FREE (dup_str);
+ GF_FREE (dup_str);
}
}
@@ -1528,11 +1493,11 @@ init (xlator_t *this)
option_data = dict_get (this->options, "translate-gid");
value = strtok_r (option_data->data, ",", &tmp_str);
while (value) {
- dup_str = strdup (value);
+ dup_str = gf_strdup (value);
input_value_str1 = strtok_r (dup_str, "=", &tmp_str1);
if (input_value_str1) {
/* Check for n-m */
- char *temp_string = strdup (input_value_str1);
+ char *temp_string = gf_strdup (input_value_str1);
input_value_str2 = strtok_r (temp_string, "-", &tmp_str2);
if (gf_string2int (input_value_str2, &input_value) != 0) {
gf_log (this->name, GF_LOG_ERROR,
@@ -1551,7 +1516,7 @@ init (xlator_t *this)
}
}
filter->translate_input_gid[filter->translate_num_gid_entries][1] = input_value;
- FREE (temp_string);
+ GF_FREE (temp_string);
output_value_str = strtok_r (NULL, "=", &tmp_str1);
if (output_value_str) {
if (gf_string2int (output_value_str, &output_value) != 0) {
@@ -1581,7 +1546,7 @@ init (xlator_t *this)
if (filter->translate_num_gid_entries == GF_MAXIMUM_FILTERING_ALLOWED)
break;
value = strtok_r (NULL, ",", &tmp_str);
- FREE (dup_str);
+ GF_FREE (dup_str);
}
}
@@ -1592,7 +1557,7 @@ init (xlator_t *this)
option_data = dict_get (this->options, "filter-uid");
value = strtok_r (option_data->data, ",", &tmp_str);
while (value) {
- dup_str = strdup (value);
+ dup_str = gf_strdup (value);
/* Check for n-m */
input_value_str1 = strtok_r (dup_str, "-", &tmp_str1);
if (gf_string2int (input_value_str1, &input_value) != 0) {
@@ -1622,7 +1587,7 @@ init (xlator_t *this)
if (filter->filter_num_uid_entries == GF_MAXIMUM_FILTERING_ALLOWED)
break;
value = strtok_r (NULL, ",", &tmp_str);
- FREE (dup_str);
+ GF_FREE (dup_str);
}
filter->partial_filter = 1;
}
@@ -1634,7 +1599,7 @@ init (xlator_t *this)
option_data = dict_get (this->options, "filter-gid");
value = strtok_r (option_data->data, ",", &tmp_str);
while (value) {
- dup_str = strdup (value);
+ dup_str = gf_strdup (value);
/* Check for n-m */
input_value_str1 = strtok_r (dup_str, "-", &tmp_str1);
if (gf_string2int (input_value_str1, &input_value) != 0) {
@@ -1664,7 +1629,7 @@ init (xlator_t *this)
if (filter->filter_num_gid_entries == GF_MAXIMUM_FILTERING_ALLOWED)
break;
value = strtok_r (NULL, ",", &tmp_str);
- FREE (dup_str);
+ GF_FREE (dup_str);
}
gf_log (this->name, GF_LOG_ERROR, "this option is not supported currently.. exiting");
return -1;
@@ -1705,7 +1670,7 @@ fini (xlator_t *this)
{
struct gf_filter *filter = this->private;
- FREE (filter);
+ GF_FREE (filter);
return;
}
@@ -1715,8 +1680,6 @@ struct xlator_fops fops = {
.lookup = filter_lookup,
.stat = filter_stat,
.fstat = filter_fstat,
- .chmod = filter_chmod,
- .fchmod = filter_fchmod,
.readlink = filter_readlink,
.mknod = filter_mknod,
.mkdir = filter_mkdir,
@@ -1725,8 +1688,6 @@ struct xlator_fops fops = {
.symlink = filter_symlink,
.rename = filter_rename,
.link = filter_link,
- .chown = filter_chown,
- .fchown = filter_fchown,
.truncate = filter_truncate,
.ftruncate = filter_ftruncate,
.create = filter_create,
@@ -1737,10 +1698,8 @@ struct xlator_fops fops = {
.getxattr = filter_getxattr,
.removexattr = filter_removexattr,
.opendir = filter_opendir,
- .utimens = filter_utimens,
-};
-
-struct xlator_mops mops = {
+ .setattr = filter_setattr,
+ .fsetattr = filter_fsetattr,
};
struct xlator_cbks cbks = {
diff --git a/xlators/features/gfid-access/Makefile.am b/xlators/features/gfid-access/Makefile.am
new file mode 100644
index 000000000..af437a64d
--- /dev/null
+++ b/xlators/features/gfid-access/Makefile.am
@@ -0,0 +1 @@
+SUBDIRS = src
diff --git a/xlators/features/gfid-access/src/Makefile.am b/xlators/features/gfid-access/src/Makefile.am
new file mode 100644
index 000000000..db53affaa
--- /dev/null
+++ b/xlators/features/gfid-access/src/Makefile.am
@@ -0,0 +1,15 @@
+xlator_LTLIBRARIES = gfid-access.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+gfid_access_la_LDFLAGS = -module -avoid-version
+
+gfid_access_la_SOURCES = gfid-access.c
+gfid_access_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = gfid-access.h gfid-access-mem-types.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/gfid-access/src/gfid-access-mem-types.h b/xlators/features/gfid-access/src/gfid-access-mem-types.h
new file mode 100644
index 000000000..168d67b43
--- /dev/null
+++ b/xlators/features/gfid-access/src/gfid-access-mem-types.h
@@ -0,0 +1,23 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GFID_ACCESS_MEM_TYPES_H
+#define _GFID_ACCESS_MEM_TYPES_H
+
+#include "mem-types.h"
+
+enum gf_changelog_mem_types {
+ gf_gfid_access_mt_priv_t = gf_common_mt_end + 1,
+ gf_gfid_access_mt_gfid_t,
+ gf_gfid_access_mt_end
+};
+
+#endif
+
diff --git a/xlators/features/gfid-access/src/gfid-access.c b/xlators/features/gfid-access/src/gfid-access.c
new file mode 100644
index 000000000..da0ba7e50
--- /dev/null
+++ b/xlators/features/gfid-access/src/gfid-access.c
@@ -0,0 +1,1172 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "gfid-access.h"
+#include "inode.h"
+#include "byte-order.h"
+
+
+
+void
+ga_newfile_args_free (ga_newfile_args_t *args)
+{
+ if (!args)
+ goto out;
+
+ GF_FREE (args->bname);
+
+ if (S_ISLNK (args->st_mode) && args->args.symlink.linkpath) {
+ GF_FREE (args->args.symlink.linkpath);
+ args->args.symlink.linkpath = NULL;
+ }
+
+ mem_put (args);
+out:
+ return;
+}
+
+
+void
+ga_heal_args_free (ga_heal_args_t *args)
+{
+ if (!args)
+ goto out;
+
+ GF_FREE (args->bname);
+
+ mem_put (args);
+out:
+ return;
+}
+
+
+ga_newfile_args_t *
+ga_newfile_parse_args (xlator_t *this, data_t *data)
+{
+ ga_newfile_args_t *args = NULL;
+ ga_private_t *priv = NULL;
+ int len = 0;
+ int blob_len = 0;
+ int min_len = 0;
+ void *blob = NULL;
+
+ priv = this->private;
+
+ blob = data->data;
+ blob_len = data->len;
+
+ min_len = sizeof (args->uid) + sizeof (args->gid) + sizeof (args->gfid)
+ + sizeof (args->st_mode) + 2 + 2;
+ if (blob_len < min_len) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Invalid length: Total length is less "
+ "than minimum length.");
+ goto err;
+ }
+
+ args = mem_get0 (priv->newfile_args_pool);
+ if (args == NULL)
+ goto err;
+
+ args->uid = ntoh32 (*(uint32_t *)blob);
+ blob += sizeof (uint32_t);
+ blob_len -= sizeof (uint32_t);
+
+ args->gid = ntoh32 (*(uint32_t *)blob);
+ blob += sizeof (uint32_t);
+ blob_len -= sizeof (uint32_t);
+
+ memcpy (args->gfid, blob, sizeof (args->gfid));
+ blob += sizeof (args->gfid);
+ blob_len -= sizeof (args->gfid);
+
+ args->st_mode = ntoh32 (*(uint32_t *)blob);
+ blob += sizeof (uint32_t);
+ blob_len -= sizeof (uint32_t);
+
+ len = strnlen (blob, blob_len);
+ if (len == blob_len)
+ if (len == blob_len) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "gfid: %s. No null byte present.",
+ args->gfid);
+ goto err;
+ }
+
+ args->bname = GF_CALLOC (1, (len + 1), gf_common_mt_char);
+ if (args->bname == NULL)
+ goto err;
+
+ memcpy (args->bname, blob, (len + 1));
+ blob += (len + 1);
+ blob_len -= (len + 1);
+
+ if (S_ISDIR (args->st_mode)) {
+ if (blob_len < sizeof (uint32_t)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "gfid: %s. Invalid length",
+ args->gfid);
+ goto err;
+ }
+ args->args.mkdir.mode = ntoh32 (*(uint32_t *)blob);
+ blob += sizeof (uint32_t);
+ blob_len -= sizeof (uint32_t);
+
+ if (blob_len < sizeof (uint32_t)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "gfid: %s. Invalid length",
+ args->gfid);
+ goto err;
+ }
+ args->args.mkdir.umask = ntoh32 (*(uint32_t *)blob);
+ blob += sizeof (uint32_t);
+ blob_len -= sizeof (uint32_t);
+ if (blob_len < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "gfid: %s. Invalid length",
+ args->gfid);
+ goto err;
+ }
+ } else if (S_ISLNK (args->st_mode)) {
+ len = strnlen (blob, blob_len);
+ if (len == blob_len) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "gfid: %s. Invalid length",
+ args->gfid);
+ goto err;
+ }
+ args->args.symlink.linkpath = GF_CALLOC (1, len + 1,
+ gf_common_mt_char);
+ if (args->args.symlink.linkpath == NULL)
+ goto err;
+
+ memcpy (args->args.symlink.linkpath, blob, (len + 1));
+ blob += (len + 1);
+ blob_len -= (len + 1);
+ } else {
+ if (blob_len < sizeof (uint32_t)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "gfid: %s. Invalid length",
+ args->gfid);
+ goto err;
+ }
+ args->args.mknod.mode = ntoh32 (*(uint32_t *)blob);
+ blob += sizeof (uint32_t);
+ blob_len -= sizeof (uint32_t);
+
+ if (blob_len < sizeof (uint32_t)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "gfid: %s. Invalid length",
+ args->gfid);
+ goto err;
+ }
+ args->args.mknod.rdev = ntoh32 (*(uint32_t *)blob);
+ blob += sizeof (uint32_t);
+ blob_len -= sizeof (uint32_t);
+
+ if (blob_len < sizeof (uint32_t)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "gfid: %s. Invalid length",
+ args->gfid);
+ goto err;
+ }
+ args->args.mknod.umask = ntoh32 (*(uint32_t *)blob);
+ blob += sizeof (uint32_t);
+ blob_len -= sizeof (uint32_t);
+ }
+
+ if (blob_len) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "gfid: %s. Invalid length",
+ args->gfid);
+ goto err;
+ }
+
+ return args;
+
+err:
+ if (args)
+ ga_newfile_args_free (args);
+
+ return NULL;
+}
+
+ga_heal_args_t *
+ga_heal_parse_args (xlator_t *this, data_t *data)
+{
+ ga_heal_args_t *args = NULL;
+ ga_private_t *priv = NULL;
+ void *blob = NULL;
+ int len = 0;
+ int blob_len = 0;
+
+ blob = data->data;
+ blob_len = data->len;
+
+ priv = this->private;
+
+ /* bname should at least contain a character */
+ if (blob_len < (sizeof (args->gfid) + 2))
+ goto err;
+
+ args = mem_get0 (priv->heal_args_pool);
+ if (!args)
+ goto err;
+
+ memcpy (args->gfid, blob, sizeof (args->gfid));
+ blob += sizeof (args->gfid);
+ blob_len -= sizeof (args->gfid);
+
+ len = strnlen (blob, blob_len);
+ if (len == blob_len)
+ goto err;
+
+ args->bname = GF_CALLOC (1, len + 1, gf_common_mt_char);
+ if (!args->bname)
+ goto err;
+
+ memcpy (args->bname, blob, len);
+ blob_len -= (len + 1);
+
+ if (blob_len)
+ goto err;
+
+ return args;
+
+err:
+ if (args)
+ ga_heal_args_free (args);
+
+ return NULL;
+}
+
+static int32_t
+ga_fill_tmp_loc (loc_t *loc, xlator_t *this, char *gfid,
+ char *bname, dict_t *xdata, loc_t *new_loc)
+{
+ int ret = -1;
+ uint64_t value = 0;
+ inode_t *parent = NULL;
+
+ parent = loc->inode;
+ ret = inode_ctx_get (loc->inode, this, &value);
+ if (!ret) {
+ parent = (void *)value;
+ }
+
+ /* parent itself should be looked up */
+ uuid_copy (new_loc->pargfid, parent->gfid);
+ new_loc->parent = inode_ref (parent);
+
+ new_loc->inode = inode_grep (parent->table, parent, bname);
+ if (!new_loc->inode)
+ new_loc->inode = inode_new (parent->table);
+
+ loc_path (new_loc, bname);
+ new_loc->name = basename (new_loc->path);
+
+ /* As GFID would not be set on the entry yet, lets not send entry
+ gfid in the request */
+ /*uuid_copy (new_loc->gfid, (const unsigned char *)gfid); */
+
+ ret = dict_set_static_bin (xdata, "gfid-req", gfid, 16);
+ if (ret < 0)
+ goto out;
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+
+
+static gf_boolean_t
+__is_gfid_access_dir (uuid_t gfid)
+{
+ uuid_t aux_gfid;
+
+ memset (aux_gfid, 0, 16);
+ aux_gfid[15] = GF_AUX_GFID;
+
+ if (uuid_compare (gfid, aux_gfid) == 0)
+ return _gf_true;
+
+ return _gf_false;
+}
+
+int32_t
+ga_forget (xlator_t *this, inode_t *inode)
+{
+ int ret = -1;
+ uint64_t value = 0;
+ inode_t *tmp_inode = NULL;
+
+ ret = inode_ctx_del (inode, this, &value);
+ if (ret)
+ goto out;
+
+ tmp_inode = (void *)value;
+ inode_unref (tmp_inode);
+
+out:
+ return 0;
+}
+
+
+static int
+ga_heal_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *stat, dict_t *dict,
+ struct iatt *postparent)
+{
+ call_frame_t *orig_frame = NULL;
+
+ orig_frame = frame->local;
+ frame->local = NULL;
+
+ /* don't worry about inode linking and other stuff. They'll happen on
+ * the next lookup.
+ */
+ STACK_DESTROY (frame->root);
+
+ STACK_UNWIND_STRICT (setxattr, orig_frame, op_ret, op_errno, dict);
+
+ return 0;
+}
+
+static int
+ga_newentry_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ call_frame_t *orig_frame = NULL;
+
+ orig_frame = frame->local;
+ frame->local = NULL;
+
+ /* don't worry about inode linking and other stuff. They'll happen on
+ * the next lookup.
+ */
+ STACK_DESTROY (frame->root);
+
+ STACK_UNWIND_STRICT (setxattr, orig_frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+int32_t
+ga_new_entry (call_frame_t *frame, xlator_t *this, loc_t *loc, data_t *data,
+ dict_t *xdata)
+{
+ int ret = -1;
+ ga_newfile_args_t *args = NULL;
+ loc_t tmp_loc = {0,};
+ call_frame_t *new_frame = NULL;
+ mode_t mode = 0;
+
+ args = ga_newfile_parse_args (this, data);
+ if (!args)
+ goto out;
+
+ if (!xdata)
+ xdata = dict_new ();
+
+ ret = ga_fill_tmp_loc (loc, this, args->gfid,
+ args->bname, xdata, &tmp_loc);
+ if (ret)
+ goto out;
+
+ new_frame = copy_frame (frame);
+ if (!new_frame)
+ goto out;
+ new_frame->local = (void *)frame;
+
+ new_frame->root->uid = args->uid;
+ new_frame->root->gid = args->gid;
+
+ if (S_ISDIR (args->st_mode)) {
+ STACK_WIND (new_frame, ga_newentry_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir,
+ &tmp_loc, args->args.mkdir.mode,
+ args->args.mkdir.umask, xdata);
+ } else if (S_ISLNK (args->st_mode)) {
+ STACK_WIND (new_frame, ga_newentry_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->symlink,
+ args->args.symlink.linkpath,
+ &tmp_loc, 0, xdata);
+ } else {
+ /* use 07777 (4 7s) for considering the Sticky bits etc) */
+ mode = (S_IFMT & args->st_mode) |
+ (07777 | args->args.mknod.mode);;
+
+ STACK_WIND (new_frame, ga_newentry_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod,
+ &tmp_loc, mode,
+ args->args.mknod.rdev, args->args.mknod.umask,
+ xdata);
+ }
+
+ ret = 0;
+out:
+ ga_newfile_args_free (args);
+
+ return ret;
+}
+
+int32_t
+ga_heal_entry (call_frame_t *frame, xlator_t *this, loc_t *loc, data_t *data,
+ dict_t *xdata)
+{
+ int ret = -1;
+ ga_heal_args_t *args = NULL;
+ loc_t tmp_loc = {0,};
+ call_frame_t *new_frame = NULL;
+
+ args = ga_heal_parse_args (this, data);
+ if (!args)
+ goto out;
+
+ if (!xdata)
+ xdata = dict_new ();
+
+ ret = ga_fill_tmp_loc (loc, this, args->gfid, args->bname,
+ xdata, &tmp_loc);
+ if (ret)
+ goto out;
+
+ new_frame = copy_frame (frame);
+ if (!new_frame)
+ goto out;
+ new_frame->local = (void *)frame;
+
+ STACK_WIND (new_frame, ga_heal_cbk, FIRST_CHILD (this),
+ FIRST_CHILD(this)->fops->lookup,
+ &tmp_loc, xdata);
+
+ ret = 0;
+out:
+ if (args)
+ ga_heal_args_free (args);
+
+ return ret;
+}
+
+int32_t
+ga_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+ga_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+
+ data_t *data = NULL;
+ int op_errno = ENOMEM;
+ int ret = 0;
+ inode_t *unref = NULL;
+
+ if ((loc->name && !strcmp (GF_GFID_DIR, loc->name)) &&
+ ((loc->parent &&
+ __is_root_gfid (loc->parent->gfid)) ||
+ __is_root_gfid (loc->pargfid))) {
+ op_errno = EPERM;
+ goto err;
+ }
+
+ data = dict_get (dict, GF_FUSE_AUX_GFID_NEWFILE);
+ if (data) {
+ ret = ga_new_entry (frame, this, loc, data, xdata);
+ if (ret)
+ goto err;
+ return 0;
+ }
+
+ data = dict_get (dict, GF_FUSE_AUX_GFID_HEAL);
+ if (data) {
+ ret = ga_heal_entry (frame, this, loc, data, xdata);
+ if (ret)
+ goto err;
+ return 0;
+ }
+
+ //If the inode is a virtual inode change the inode otherwise perform
+ //the operation on same inode
+ GFID_ACCESS_GET_VALID_DIR_INODE (this, loc, unref, wind);
+
+wind:
+ STACK_WIND (frame, ga_setxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, loc, dict, flags,
+ xdata);
+ if (unref)
+ inode_unref (unref);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (setxattr, frame, -1, op_errno, xdata);
+ return 0;
+}
+
+
+int32_t
+ga_virtual_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
+{
+ int j = 0;
+ int i = 0;
+ int ret = 0;
+ uint64_t temp_ino = 0;
+ inode_t *cbk_inode = NULL;
+ inode_t *true_inode = NULL;
+ uuid_t random_gfid = {0,};
+
+ if (frame->local)
+ cbk_inode = frame->local;
+ else
+ cbk_inode = inode;
+
+ frame->local = NULL;
+ if (op_ret)
+ goto unwind;
+
+ if (!IA_ISDIR (buf->ia_type))
+ goto unwind;
+
+ /* need to send back a different inode for linking in itable */
+ if (cbk_inode == inode) {
+ /* check if the inode is in the 'itable' or
+ if its just previously discover()'d inode */
+ true_inode = inode_find (inode->table, buf->ia_gfid);
+ if (!true_inode) {
+ cbk_inode = inode_new (inode->table);
+
+ if (!cbk_inode) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ /* the inode is not present in itable, ie, the actual
+ path is not yet looked up. Use the current inode
+ itself for now */
+ inode_ref (inode);
+ } else {
+ /* 'inode_ref()' has been done in inode_find() */
+ inode = true_inode;
+ }
+
+ ret = inode_ctx_put (cbk_inode, this, (uint64_t)inode);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set the inode ctx with"
+ "the actual inode");
+ if (inode)
+ inode_unref (inode);
+ }
+ inode = NULL;
+ }
+
+ if (!uuid_is_null (cbk_inode->gfid)) {
+ /* if the previous linked inode is used, use the
+ same gfid */
+ uuid_copy (random_gfid, cbk_inode->gfid);
+ } else {
+ /* replace the buf->ia_gfid to a random gfid
+ for directory, for files, what we received is fine */
+ uuid_generate (random_gfid);
+ }
+
+ uuid_copy (buf->ia_gfid, random_gfid);
+
+ for (i = 15; i > (15 - 8); i--) {
+ temp_ino += (uint64_t)(buf->ia_gfid[i]) << j;
+ j += 8;
+ }
+ buf->ia_ino = temp_ino;
+
+unwind:
+ STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, cbk_inode, buf,
+ xdata, postparent);
+
+ return 0;
+}
+
+int32_t
+ga_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
+{
+ ga_private_t *priv = NULL;
+
+ /* if the entry in question is not 'root',
+ then follow the normal path */
+ if (op_ret || !__is_root_gfid(buf->ia_gfid))
+ goto unwind;
+
+ priv = this->private;
+
+ /* do we need to copy root stbuf everytime? */
+ /* mostly yes, as we want to have the 'stat' info show latest
+ in every _cbk() */
+
+ /* keep the reference for root stat buf */
+ priv->root_stbuf = *buf;
+ priv->gfiddir_stbuf = priv->root_stbuf;
+ priv->gfiddir_stbuf.ia_gfid[15] = GF_AUX_GFID;
+ priv->gfiddir_stbuf.ia_ino = GF_AUX_GFID;
+
+unwind:
+ STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf,
+ xdata, postparent);
+ return 0;
+}
+
+int32_t
+ga_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ ga_private_t *priv = NULL;
+ int ret = -1;
+ uuid_t tmp_gfid = {0,};
+ loc_t tmp_loc = {0,};
+ uint64_t value = 0;
+ inode_t *inode = NULL;
+ inode_t *true_inode = NULL;
+ int32_t op_errno = ENOENT;
+
+ /* if its discover(), no need for any action here */
+ if (!loc->name)
+ goto wind;
+
+ /* if its revalidate, and inode is not of type directory,
+ proceed with 'wind' */
+ if (loc->inode && loc->inode->ia_type &&
+ !IA_ISDIR (loc->inode->ia_type))
+ goto wind;
+
+ priv = this->private;
+
+ /* need to check if the lookup is on virtual dir */
+ if ((loc->name && !strcmp (GF_GFID_DIR, loc->name)) &&
+ ((loc->parent && __is_root_gfid (loc->parent->gfid)) ||
+ __is_root_gfid (loc->pargfid))) {
+ /* this means, the query is on '/.gfid', return the fake stat,
+ and say success */
+
+ STACK_UNWIND_STRICT (lookup, frame, 0, 0, loc->inode,
+ &priv->gfiddir_stbuf, xdata,
+ &priv->root_stbuf);
+ return 0;
+ }
+
+ /* now, check if the lookup() is on an existing entry,
+ but on gfid-path */
+ if (!((loc->parent && __is_gfid_access_dir (loc->parent->gfid)) ||
+ __is_gfid_access_dir (loc->pargfid)))
+ goto wind;
+
+ /* make sure the 'basename' is actually a 'canonical-gfid',
+ otherwise, return error */
+ ret = uuid_parse (loc->name, tmp_gfid);
+ if (ret)
+ goto err;
+
+ /* if its fresh lookup, go ahead and send it down, if not,
+ for directory, we need indirection to actual dir inode */
+ if (!(loc->inode && loc->inode->ia_type))
+ goto discover;
+
+ /* revalidate on directory */
+ ret = inode_ctx_get (loc->inode, this, &value);
+ if (ret)
+ goto err;
+
+ inode = (void *)value;
+
+ /* valid inode, already looked up, work on that */
+ if (inode->ia_type)
+ goto discover;
+
+ /* check if the inode is in the 'itable' or
+ if its just previously discover()'d inode */
+ true_inode = inode_find (loc->inode->table, tmp_gfid);
+ if (true_inode) {
+ /* time do another lookup and update the context
+ with proper inode */
+ op_errno = ESTALE;
+ goto err;
+ }
+
+discover:
+ /* for the virtual entries, we don't need to send 'gfid-req' key, as
+ for these entries, we don't want to 'set' a new gfid */
+ if (xdata)
+ dict_del (xdata, "gfid-req");
+
+ uuid_copy (tmp_loc.gfid, tmp_gfid);
+
+ /* if revalidate, then we need to have the proper reference */
+ if (inode) {
+ tmp_loc.inode = inode_ref (inode);
+ frame->local = loc->inode;
+ } else {
+ tmp_loc.inode = inode_ref (loc->inode);
+ }
+
+ STACK_WIND (frame, ga_virtual_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, &tmp_loc, xdata);
+
+ inode_unref (tmp_loc.inode);
+
+ return 0;
+
+wind:
+ /* used for all the normal lookup path */
+ STACK_WIND (frame, ga_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
+
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT (lookup, frame, -1, op_errno, loc->inode,
+ &priv->gfiddir_stbuf, xdata,
+ &priv->root_stbuf);
+ return 0;
+}
+
+int
+ga_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
+{
+ int op_errno = 0;
+
+ GFID_ACCESS_ENTRY_OP_CHECK (loc, op_errno, err);
+
+ STACK_WIND (frame, default_mkdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir, loc, mode, umask,
+ xdata);
+
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT (mkdir, frame, -1, op_errno, loc->inode,
+ NULL, NULL, NULL, xdata);
+ return 0;
+}
+
+
+int
+ga_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ int op_errno = 0;
+
+ GFID_ACCESS_ENTRY_OP_CHECK (loc, op_errno, err);
+
+ STACK_WIND (frame, default_create_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->create,
+ loc, flags, mode, umask, fd, xdata);
+ return 0;
+err:
+ STACK_UNWIND_STRICT (create, frame, -1, op_errno, NULL,
+ NULL, NULL, NULL, NULL, xdata);
+
+ return 0;
+
+}
+
+int
+ga_symlink (call_frame_t *frame, xlator_t *this, const char *linkname,
+ loc_t *loc, mode_t umask, dict_t *xdata)
+{
+ int op_errno = 0;
+
+ GFID_ACCESS_ENTRY_OP_CHECK (loc, op_errno, err);
+
+ STACK_WIND (frame, default_symlink_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->symlink,
+ linkname, loc, umask, xdata);
+ return 0;
+err:
+ STACK_UNWIND_STRICT (symlink, frame, -1, op_errno, NULL,
+ NULL, NULL, NULL, xdata);
+
+ return 0;
+}
+
+int
+ga_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata)
+{
+ int op_errno = 0;
+
+ GFID_ACCESS_ENTRY_OP_CHECK (loc, op_errno, err);
+
+ STACK_WIND (frame, default_mknod_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod, loc, mode, rdev,
+ umask, xdata);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (mknod, frame, -1, op_errno, NULL,
+ NULL, NULL, NULL, xdata);
+
+ return 0;
+}
+
+int
+ga_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flag,
+ dict_t *xdata)
+{
+ int op_errno = 0;
+ inode_t *unref = NULL;
+
+ GFID_ACCESS_ENTRY_OP_CHECK (loc, op_errno, err);
+
+ GFID_ACCESS_GET_VALID_DIR_INODE (this, loc, unref, wind);
+
+wind:
+ STACK_WIND (frame, default_rmdir_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->rmdir,
+ loc, flag, xdata);
+ if (unref)
+ inode_unref (unref);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (rmdir, frame, -1, op_errno, NULL,
+ NULL, xdata);
+
+ return 0;
+}
+
+int
+ga_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t xflag,
+ dict_t *xdata)
+{
+ int op_errno = 0;
+ inode_t *unref = NULL;
+
+ GFID_ACCESS_ENTRY_OP_CHECK (loc, op_errno, err);
+
+ GFID_ACCESS_GET_VALID_DIR_INODE (this, loc, unref, wind);
+
+wind:
+ STACK_WIND (frame, default_unlink_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->unlink,
+ loc, xflag, xdata);
+
+ if (unref)
+ inode_unref (unref);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (unlink, frame, -1, op_errno, NULL,
+ NULL, xdata);
+
+ return 0;
+}
+
+int
+ga_rename (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+{
+ int op_errno = 0;
+ inode_t *oldloc_unref = NULL;
+ inode_t *newloc_unref = NULL;
+
+ GFID_ACCESS_ENTRY_OP_CHECK (oldloc, op_errno, err);
+ GFID_ACCESS_ENTRY_OP_CHECK (newloc, op_errno, err);
+
+ GFID_ACCESS_GET_VALID_DIR_INODE (this, oldloc, oldloc_unref,
+ handle_newloc);
+
+handle_newloc:
+ GFID_ACCESS_GET_VALID_DIR_INODE (this, newloc, newloc_unref, wind);
+
+wind:
+ STACK_WIND (frame, default_rename_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->rename,
+ oldloc, newloc, xdata);
+
+ if (oldloc_unref)
+ inode_unref (oldloc_unref);
+
+ if (newloc_unref)
+ inode_unref (newloc_unref);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (rename, frame, -1, op_errno, NULL,
+ NULL, NULL, NULL, NULL, xdata);
+
+ return 0;
+}
+
+
+int
+ga_link (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+{
+ int op_errno = 0;
+ inode_t *oldloc_unref = NULL;
+ inode_t *newloc_unref = NULL;
+
+ GFID_ACCESS_ENTRY_OP_CHECK (oldloc, op_errno, err);
+ GFID_ACCESS_ENTRY_OP_CHECK (newloc, op_errno, err);
+
+ GFID_ACCESS_GET_VALID_DIR_INODE (this, oldloc, oldloc_unref,
+ handle_newloc);
+
+handle_newloc:
+ GFID_ACCESS_GET_VALID_DIR_INODE (this, newloc, newloc_unref, wind);
+
+wind:
+ STACK_WIND (frame, default_link_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->link,
+ oldloc, newloc, xdata);
+
+ if (oldloc_unref)
+ inode_unref (oldloc_unref);
+
+ if (newloc_unref)
+ inode_unref (newloc_unref);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (link, frame, -1, op_errno, NULL,
+ NULL, NULL, NULL, xdata);
+
+ return 0;
+}
+
+int32_t
+ga_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ fd_t *fd, dict_t *xdata)
+{
+ int op_errno = 0;
+
+ GFID_ACCESS_ENTRY_OP_CHECK (loc, op_errno, err);
+
+ /* also check if the loc->inode itself is virtual
+ inode, if yes, return with failure, mainly because we
+ can't handle all the readdirp and other things on it. */
+ if (inode_ctx_get (loc->inode, this, NULL) == 0) {
+ op_errno = ENOTSUP;
+ goto err;
+ }
+
+ STACK_WIND (frame, default_opendir_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->opendir,
+ loc, fd, xdata);
+ return 0;
+err:
+ STACK_UNWIND_STRICT (opendir, frame, -1, op_errno, NULL, xdata);
+
+ return 0;
+}
+
+int32_t
+ga_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ inode_t *unref = NULL;
+
+ GFID_ACCESS_GET_VALID_DIR_INODE (this, loc, unref, wind);
+
+wind:
+ STACK_WIND (frame, default_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
+
+ if (unref)
+ inode_unref (unref);
+
+ return 0;
+}
+
+int32_t
+ga_stat (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ inode_t *unref = NULL;
+
+ GFID_ACCESS_GET_VALID_DIR_INODE (this, loc, unref, wind);
+
+wind:
+ STACK_WIND (frame, default_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, loc, xdata);
+ if (unref)
+ inode_unref (unref);
+
+ return 0;
+}
+
+int32_t
+ga_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid,
+ dict_t *xdata)
+{
+ inode_t *unref = NULL;
+
+ GFID_ACCESS_GET_VALID_DIR_INODE (this, loc, unref, wind);
+
+wind:
+ STACK_WIND (frame, default_setattr_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->setattr, loc, stbuf, valid,
+ xdata);
+ if (unref)
+ inode_unref (unref);
+
+ return 0;
+}
+
+int32_t
+ga_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ inode_t *unref = NULL;
+
+ GFID_ACCESS_GET_VALID_DIR_INODE (this, loc, unref, wind);
+
+wind:
+ STACK_WIND (frame, default_removexattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, loc, name,
+ xdata);
+ if (unref)
+ inode_unref (unref);
+
+ return 0;
+}
+
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_gfid_access_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_WARNING, "Memory accounting"
+ " init failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+int32_t
+init (xlator_t *this)
+{
+ ga_private_t *priv = NULL;
+ int ret = -1;
+
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "not configured with exactly one child. exiting");
+ goto out;
+ }
+
+ /* This can be the top of graph in certain cases */
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "dangling volume. check volfile ");
+ }
+
+ /* TODO: define a mem-type structure */
+ priv = GF_CALLOC (1, sizeof (*priv), gf_gfid_access_mt_priv_t);
+ if (!priv)
+ goto out;
+
+ priv->newfile_args_pool = mem_pool_new (ga_newfile_args_t, 512);
+ if (!priv->newfile_args_pool)
+ goto out;
+
+ priv->heal_args_pool = mem_pool_new (ga_heal_args_t, 512);
+ if (!priv->heal_args_pool)
+ goto out;
+
+ this->private = priv;
+
+ ret = 0;
+out:
+ if (ret && priv) {
+ if (priv->newfile_args_pool)
+ mem_pool_destroy (priv->newfile_args_pool);
+ GF_FREE (priv);
+ }
+
+ return ret;
+}
+
+void
+fini (xlator_t *this)
+{
+ ga_private_t *priv = NULL;
+ priv = this->private;
+ this->private = NULL;
+
+ if (priv) {
+ if (priv->newfile_args_pool)
+ mem_pool_destroy (priv->newfile_args_pool);
+ if (priv->heal_args_pool)
+ mem_pool_destroy (priv->heal_args_pool);
+ GF_FREE (priv);
+ }
+
+ return;
+}
+
+
+struct xlator_fops fops = {
+ .lookup = ga_lookup,
+
+ /* entry fops */
+ .mkdir = ga_mkdir,
+ .mknod = ga_mknod,
+ .create = ga_create,
+ .symlink = ga_symlink,
+ .link = ga_link,
+ .unlink = ga_unlink,
+ .rmdir = ga_rmdir,
+ .rename = ga_rename,
+
+ /* handle any other directory operations here */
+ .opendir = ga_opendir,
+ .stat = ga_stat,
+ .setattr = ga_setattr,
+ .getxattr = ga_getxattr,
+ .removexattr = ga_removexattr,
+
+ /* special fop to handle more entry creations */
+ .setxattr = ga_setxattr,
+};
+
+struct xlator_cbks cbks = {
+ .forget = ga_forget,
+};
+
+struct volume_options options[] = {
+ /* This translator doesn't take any options, or provide any options */
+ { .key = {NULL} },
+};
diff --git a/xlators/features/gfid-access/src/gfid-access.h b/xlators/features/gfid-access/src/gfid-access.h
new file mode 100644
index 000000000..e13c9b724
--- /dev/null
+++ b/xlators/features/gfid-access/src/gfid-access.h
@@ -0,0 +1,128 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef __GFID_ACCESS_H__
+#define __GFID_ACCESS_H__
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "dict.h"
+#include "xlator.h"
+#include "defaults.h"
+#include "gfid-access-mem-types.h"
+
+#define UUID_CANONICAL_FORM_LEN 36
+
+#define GF_FUSE_AUX_GFID_NEWFILE "glusterfs.gfid.newfile"
+#define GF_FUSE_AUX_GFID_HEAL "glusterfs.gfid.heal"
+
+#define GF_GFID_KEY "GLUSTERFS_GFID"
+#define GF_GFID_DIR ".gfid"
+#define GF_AUX_GFID 0xd
+
+#define GFID_ACCESS_GET_VALID_DIR_INODE(x,l,unref,lbl) do { \
+ int ret = 0; \
+ uint64_t value = 0; \
+ inode_t *tmp_inode = NULL; \
+ \
+ /* if its an entry operation, on the virtual */ \
+ /* directory inode as parent, we need to handle */ \
+ /* it properly */ \
+ if (l->parent) { \
+ ret = inode_ctx_get (l->parent, x, &value); \
+ if (ret) \
+ goto lbl; \
+ tmp_inode = (inode_t *)value; \
+ unref = inode_ref (tmp_inode); \
+ l->parent = tmp_inode; \
+ /* if parent is virtual, no need to handle */ \
+ /* loc->inode */ \
+ break; \
+ } \
+ \
+ /* if its an inode operation, on the virtual */ \
+ /* directory inode itself, we need to handle */ \
+ /* it properly */ \
+ if (l->inode) { \
+ ret = inode_ctx_get (l->inode, x, &value); \
+ if (ret) \
+ goto lbl; \
+ tmp_inode = (inode_t *)value; \
+ unref = inode_ref (tmp_inode); \
+ l->inode = tmp_inode; \
+ } \
+ \
+ } while (0)
+
+#define GFID_ACCESS_ENTRY_OP_CHECK(loc,err,lbl) do { \
+ /* need to check if the lookup is on virtual dir */ \
+ if ((loc->name && !strcmp (GF_GFID_DIR, loc->name)) && \
+ ((loc->parent && \
+ __is_root_gfid (loc->parent->gfid)) || \
+ __is_root_gfid (loc->pargfid))) { \
+ err = EEXIST; \
+ goto lbl; \
+ } \
+ \
+ /* now, check if the lookup() is on an existing */ \
+ /* entry, but on gfid-path */ \
+ if ((loc->parent && \
+ __is_gfid_access_dir (loc->parent->gfid)) || \
+ __is_gfid_access_dir (loc->pargfid)) { \
+ err = EPERM; \
+ goto lbl; \
+ } \
+ } while (0)
+
+
+typedef struct {
+ unsigned int uid;
+ unsigned int gid;
+ char gfid[UUID_CANONICAL_FORM_LEN + 1];
+ unsigned int st_mode;
+ char *bname;
+
+ union {
+ struct _symlink_in {
+ char *linkpath;
+ } __attribute__ ((__packed__)) symlink;
+
+ struct _mknod_in {
+ unsigned int mode;
+ unsigned int rdev;
+ unsigned int umask;
+ } __attribute__ ((__packed__)) mknod;
+
+ struct _mkdir_in {
+ unsigned int mode;
+ unsigned int umask;
+ } __attribute__ ((__packed__)) mkdir;
+ } __attribute__ ((__packed__)) args;
+} __attribute__((__packed__)) ga_newfile_args_t;
+
+typedef struct {
+ char gfid[UUID_CANONICAL_FORM_LEN + 1];
+ char *bname; /* a null terminated basename */
+} __attribute__((__packed__)) ga_heal_args_t;
+
+struct ga_private {
+ /* root inode's stbuf */
+ struct iatt root_stbuf;
+ struct iatt gfiddir_stbuf;
+ struct mem_pool *newfile_args_pool;
+ struct mem_pool *heal_args_pool;
+};
+typedef struct ga_private ga_private_t;
+
+#endif /* __GFID_ACCESS_H__ */
diff --git a/scheduler/alu/Makefile.am b/xlators/features/glupy/Makefile.am
index d471a3f92..a985f42a8 100644
--- a/scheduler/alu/Makefile.am
+++ b/xlators/features/glupy/Makefile.am
@@ -1,3 +1,3 @@
SUBDIRS = src
-CLEANFILES =
+CLEANFILES =
diff --git a/xlators/features/glupy/doc/README.md b/xlators/features/glupy/doc/README.md
new file mode 100644
index 000000000..2d7b30ef6
--- /dev/null
+++ b/xlators/features/glupy/doc/README.md
@@ -0,0 +1,44 @@
+This is just the very start for a GlusterFS[1] meta-translator that will
+allow translator code to be written in Python. It's based on the standard
+Python embedding (not extending) techniques, plus a dash of the ctypes module.
+The interface is a pretty minimal adaptation of the dispatches and callbacks
+from the C API[2] to Python, as follows:
+
+* Dispatch functions and callbacks must be defined on an "xlator" class
+ derived from gluster.Translator so that they'll be auto-registered with
+ the C translator during initialization.
+
+* For each dispatch or callback function you want to intercept, you define a
+ Python function using the xxx\_fop\_t or xxx\_cbk\_t decorator.
+
+* The arguments for each operation are different, so you'll need to refer to
+ the C API. GlusterFS-specific types are used (though only loc\_t is fully
+ defined so far) and type correctness is enforced by ctypes.
+
+* If you do intercept a dispatch function, it is your responsibility to call
+ xxx\_wind (like STACK\_WIND in the C API but operation-specific) to pass
+ the request to the next translator. If you do not intercept a function, it
+ will default the same way as for C (pass through to the same operation with
+ the same arguments on the first child translator).
+
+* If you intercept a callback function, it is your responsibility to call
+ xxx\_unwind (like STACK\_UNWIND\_STRICT in the C API) to pass the request back
+ to the caller.
+
+So far only the lookup and create operations are handled this way, to support
+the "negative lookup" example. Now that the basic infrastructure is in place,
+adding more functions should be very quick, though with that much boilerplate I
+might pause to write a code generator. I also plan to add structure
+definitions and interfaces for some of the utility functions in libglusterfs
+(especially those having to do with inode and fd context) in the fairly near
+future. Note that you can also use ctypes to get at anything not explicitly
+exposed to Python already.
+
+_If you're coming here because of the Linux Journal article, please note that
+the code has evolved since that was written. The version that matches the
+article is here:_
+
+https://github.com/jdarcy/glupy/tree/4bbae91ba459ea46ef32f2966562492e4ca9187a
+
+[1] http://www.gluster.org
+[2] http://hekafs.org/dist/xlator_api_2.html
diff --git a/xlators/features/glupy/doc/TESTING b/xlators/features/glupy/doc/TESTING
new file mode 100644
index 000000000..e05f17f49
--- /dev/null
+++ b/xlators/features/glupy/doc/TESTING
@@ -0,0 +1,9 @@
+Loading a translator written in Python using the glupy meta translator
+-------------------------------------------------------------------------------
+'test.vol' is a simple volfile with the debug-trace Python translator on top
+of a brick. The volfile can be mounted using the following command.
+
+$ glusterfs --debug -f test.vol /path/to/mntpt
+
+If then file operations are performed on the newly mounted file system, log
+output would be printed by the Python translator on the standard output.
diff --git a/xlators/features/glupy/doc/test.vol b/xlators/features/glupy/doc/test.vol
new file mode 100644
index 000000000..0751a488c
--- /dev/null
+++ b/xlators/features/glupy/doc/test.vol
@@ -0,0 +1,10 @@
+volume vol-posix
+ type storage/posix
+ option directory /path/to/brick
+end-volume
+
+volume vol-glupy
+ type features/glupy
+ option module-name debug-trace
+ subvolumes vol-posix
+end-volume
diff --git a/xlators/features/glupy/src/Makefile.am b/xlators/features/glupy/src/Makefile.am
new file mode 100644
index 000000000..960862839
--- /dev/null
+++ b/xlators/features/glupy/src/Makefile.am
@@ -0,0 +1,20 @@
+xlator_LTLIBRARIES = glupy.la
+
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+glupydir = $(xlatordir)/glupy
+
+glupy_PYTHON = gluster.py negative.py helloworld.py debug-trace.py
+
+glupy_la_LDFLAGS = -module -avoid-version -shared -nostartfiles
+glupy_la_SOURCES = glupy.c
+glupy_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
+ -lpthread -l$(BUILD_PYTHON_LIB)
+
+noinst_HEADERS = glupy.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src -isystem $(BUILD_PYTHON_INC)
+
+AM_CFLAGS = -Wall -fno-strict-aliasing -DGLUSTER_PYTHON_PATH=\"$(glupydir)\" $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/glupy/src/debug-trace.py b/xlators/features/glupy/src/debug-trace.py
new file mode 100644
index 000000000..53e76546b
--- /dev/null
+++ b/xlators/features/glupy/src/debug-trace.py
@@ -0,0 +1,774 @@
+import sys
+import stat
+from uuid import UUID
+from time import strftime, localtime
+from gluster import *
+# This translator was written primarily to test the fop entry point definitions
+# and structure definitions in 'gluster.py'.
+# It is similar to the debug-trace translator, one of the already available
+# translator types written in C, that logs the arguments passed to the fops and
+# their corresponding cbk functions.
+
+dl.get_id.restype = c_long
+dl.get_id.argtypes = [ POINTER(call_frame_t) ]
+
+dl.get_rootunique.restype = c_uint64
+dl.get_rootunique.argtypes = [ POINTER(call_frame_t) ]
+
+def uuid2str (gfid):
+ return str(UUID(''.join(map("{0:02x}".format, gfid))))
+
+
+def st_mode_from_ia (prot, filetype):
+ st_mode = 0
+ type_bit = 0
+ prot_bit = 0
+
+ if filetype == IA_IFREG:
+ type_bit = stat.S_IFREG
+ elif filetype == IA_IFDIR:
+ type_bit = stat.S_IFDIR
+ elif filetype == IA_IFLNK:
+ type_bit = stat.S_IFLNK
+ elif filetype == IA_IFBLK:
+ type_bit = stat.S_IFBLK
+ elif filetype == IA_IFCHR:
+ type_bit = stat.S_IFCHR
+ elif filetype == IA_IFIFO:
+ type_bit = stat.S_IFIFO
+ elif filetype == IA_IFSOCK:
+ type_bit = stat.S_IFSOCK
+ elif filetype == IA_INVAL:
+ pass
+
+
+ if prot.suid:
+ prot_bit |= stat.S_ISUID
+ if prot.sgid:
+ prot_bit |= stat.S_ISGID
+ if prot.sticky:
+ prot_bit |= stat.S_ISVTX
+
+ if prot.owner.read:
+ prot_bit |= stat.S_IRUSR
+ if prot.owner.write:
+ prot_bit |= stat.S_IWUSR
+ if prot.owner.execn:
+ prot_bit |= stat.S_IXUSR
+
+ if prot.group.read:
+ prot_bit |= stat.S_IRGRP
+ if prot.group.write:
+ prot_bit |= stat.S_IWGRP
+ if prot.group.execn:
+ prot_bit |= stat.S_IXGRP
+
+ if prot.other.read:
+ prot_bit |= stat.S_IROTH
+ if prot.other.write:
+ prot_bit |= stat.S_IWOTH
+ if prot.other.execn:
+ prot_bit |= stat.S_IXOTH
+
+ st_mode = (type_bit | prot_bit)
+
+ return st_mode
+
+
+def trace_stat2str (buf):
+ gfid = uuid2str(buf.contents.ia_gfid)
+ mode = st_mode_from_ia(buf.contents.ia_prot, buf.contents.ia_type)
+ atime_buf = strftime("[%b %d %H:%M:%S]",
+ localtime(buf.contents.ia_atime))
+ mtime_buf = strftime("[%b %d %H:%M:%S]",
+ localtime(buf.contents.ia_mtime))
+ ctime_buf = strftime("[%b %d %H:%M:%S]",
+ localtime(buf.contents.ia_ctime))
+ return ("(gfid={0:s}, ino={1:d}, mode={2:o}, nlink={3:d}, uid ={4:d}, "+
+ "gid ={5:d}, size={6:d}, blocks={7:d}, atime={8:s}, mtime={9:s}, "+
+ "ctime={10:s})").format(gfid, buf.contents.ia_no, mode,
+ buf.contents.ia_nlink,
+ buf.contents.ia_uid,
+ buf.contents.ia_gid,
+ buf.contents.ia_size,
+ buf.contents.ia_blocks,
+ atime_buf, mtime_buf,
+ ctime_buf)
+
+class xlator(Translator):
+
+ def __init__(self, c_this):
+ Translator.__init__(self, c_this)
+ self.gfids = {}
+
+ def lookup_fop(self, frame, this, loc, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(loc.contents.gfid)
+ print("GLUPY TRACE LOOKUP FOP- {0:d}: gfid={1:s}; " +
+ "path={2:s}").format(unique, gfid, loc.contents.path)
+ self.gfids[key] = gfid
+ dl.wind_lookup(frame, POINTER(xlator_t)(), loc, xdata)
+ return 0
+
+ def lookup_cbk(self, frame, cookie, this, op_ret, op_errno,
+ inode, buf, xdata, postparent):
+ unique =dl.get_rootunique(frame)
+ key =dl.get_id(frame)
+ if op_ret == 0:
+ gfid = uuid2str(buf.contents.ia_gfid)
+ statstr = trace_stat2str(buf)
+ postparentstr = trace_stat2str(postparent)
+ print("GLUPY TRACE LOOKUP CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; *buf={3:s}; " +
+ "*postparent={4:s}").format(unique, gfid,
+ op_ret, statstr,
+ postparentstr)
+ else:
+ gfid = self.gfids[key]
+ print("GLUPY TRACE LOOKUP CBK - {0:d}: gfid={1:s};" +
+ " op_ret={2:d}; op_errno={3:d}").format(unique,
+ gfid,
+ op_ret,
+ op_errno)
+ del self.gfids[key]
+ dl.unwind_lookup(frame, cookie, this, op_ret, op_errno,
+ inode, buf, xdata, postparent)
+ return 0
+
+ def create_fop(self, frame, this, loc, flags, mode, umask, fd,
+ xdata):
+ unique = dl.get_rootunique(frame)
+ gfid = uuid2str(loc.contents.gfid)
+ print("GLUPY TRACE CREATE FOP- {0:d}: gfid={1:s}; path={2:s}; " +
+ "fd={3:s}; flags=0{4:o}; mode=0{5:o}; " +
+ "umask=0{6:o}").format(unique, gfid, loc.contents.path,
+ fd, flags, mode, umask)
+ dl.wind_create(frame, POINTER(xlator_t)(), loc, flags,mode,
+ umask, fd, xdata)
+ return 0
+
+ def create_cbk(self, frame, cookie, this, op_ret, op_errno, fd,
+ inode, buf, preparent, postparent, xdata):
+ unique = dl.get_rootunique(frame)
+ if op_ret >= 0:
+ gfid = uuid2str(inode.contents.gfid)
+ statstr = trace_stat2str(buf)
+ preparentstr = trace_stat2str(preparent)
+ postparentstr = trace_stat2str(postparent)
+ print("GLUPY TRACE CREATE CBK- {0:d}: gfid={1:s};" +
+ " op_ret={2:d}; fd={3:s}; *stbuf={4:s}; " +
+ "*preparent={5:s};" +
+ " *postparent={6:s}").format(unique, gfid, op_ret,
+ fd, statstr,
+ preparentstr,
+ postparentstr)
+ else:
+ print ("GLUPY TRACE CREATE CBK- {0:d}: op_ret={1:d}; " +
+ "op_errno={2:d}").format(unique, op_ret, op_errno)
+ dl.unwind_create(frame, cookie, this, op_ret, op_errno, fd,
+ inode, buf, preparent, postparent, xdata)
+ return 0
+
+ def open_fop(self, frame, this, loc, flags, fd, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(loc.contents.inode.contents.gfid)
+ print("GLUPY TRACE OPEN FOP- {0:d}: gfid={1:s}; path={2:s}; "+
+ "flags={3:d}; fd={4:s}").format(unique, gfid,
+ loc.contents.path, flags,
+ fd)
+ self.gfids[key] = gfid
+ dl.wind_open(frame, POINTER(xlator_t)(), loc, flags, fd, xdata)
+ return 0
+
+ def open_cbk(self, frame, cookie, this, op_ret, op_errno, fd, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ print("GLUPY TRACE OPEN CBK- {0:d}: gfid={1:s}; op_ret={2:d}; "
+ "op_errno={3:d}; *fd={4:s}").format(unique, gfid,
+ op_ret, op_errno, fd)
+ del self.gfids[key]
+ dl.unwind_open(frame, cookie, this, op_ret, op_errno, fd,
+ xdata)
+ return 0
+
+ def readv_fop(self, frame, this, fd, size, offset, flags, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(fd.contents.inode.contents.gfid)
+ print("GLUPY TRACE READV FOP- {0:d}: gfid={1:s}; "+
+ "fd={2:s}; size ={3:d}; offset={4:d}; " +
+ "flags=0{5:x}").format(unique, gfid, fd, size, offset,
+ flags)
+ self.gfids[key] = gfid
+ dl.wind_readv (frame, POINTER(xlator_t)(), fd, size, offset,
+ flags, xdata)
+ return 0
+
+ def readv_cbk(self, frame, cookie, this, op_ret, op_errno, vector,
+ count, buf, iobref, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ if op_ret >= 0:
+ statstr = trace_stat2str(buf)
+ print("GLUPY TRACE READV CBK- {0:d}: gfid={1:s}, "+
+ "op_ret={2:d}; *buf={3:s};").format(unique, gfid,
+ op_ret,
+ statstr)
+
+ else:
+ print("GLUPY TRACE READV CBK- {0:d}: gfid={1:s}, "+
+ "op_ret={2:d}; op_errno={3:d}").format(unique,
+ gfid,
+ op_ret,
+ op_errno)
+ del self.gfids[key]
+ dl.unwind_readv (frame, cookie, this, op_ret, op_errno,
+ vector, count, buf, iobref, xdata)
+ return 0
+
+ def writev_fop(self, frame, this, fd, vector, count, offset, flags,
+ iobref, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(fd.contents.inode.contents.gfid)
+ print("GLUPY TRACE WRITEV FOP- {0:d}: gfid={1:s}; " +
+ "fd={2:s}; count={3:d}; offset={4:d}; " +
+ "flags=0{5:x}").format(unique, gfid, fd, count, offset,
+ flags)
+ self.gfids[key] = gfid
+ dl.wind_writev(frame, POINTER(xlator_t)(), fd, vector, count,
+ offset, flags, iobref, xdata)
+ return 0
+
+ def writev_cbk(self, frame, cookie, this, op_ret, op_errno, prebuf,
+ postbuf, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ if op_ret >= 0:
+ preopstr = trace_stat2str(prebuf)
+ postopstr = trace_stat2str(postbuf)
+ print("GLUPY TRACE WRITEV CBK- {0:d}: op_ret={1:d}; " +
+ "*prebuf={2:s}; " +
+ "*postbuf={3:s}").format(unique, op_ret, preopstr,
+ postopstr)
+ else:
+ gfid = self.gfids[key]
+ print("GLUPY TRACE WRITEV CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; op_errno={3:d}").format(unique,
+ gfid,
+ op_ret,
+ op_errno)
+ del self.gfids[key]
+ dl.unwind_writev (frame, cookie, this, op_ret, op_errno,
+ prebuf, postbuf, xdata)
+ return 0
+
+ def opendir_fop(self, frame, this, loc, fd, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(loc.contents.inode.contents.gfid)
+ print("GLUPY TRACE OPENDIR FOP- {0:d}: gfid={1:s}; path={2:s}; "+
+ "fd={3:s}").format(unique, gfid, loc.contents.path, fd)
+ self.gfids[key] = gfid
+ dl.wind_opendir(frame, POINTER(xlator_t)(), loc, fd, xdata)
+ return 0
+
+ def opendir_cbk(self, frame, cookie, this, op_ret, op_errno, fd,
+ xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ print("GLUPY TRACE OPENDIR CBK- {0:d}: gfid={1:s}; op_ret={2:d};"+
+ " op_errno={3:d}; fd={4:s}").format(unique, gfid, op_ret,
+ op_errno, fd)
+ del self.gfids[key]
+ dl.unwind_opendir(frame, cookie, this, op_ret, op_errno,
+ fd, xdata)
+ return 0
+
+ def readdir_fop(self, frame, this, fd, size, offset, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(fd.contents.inode.contents.gfid)
+ print("GLUPY TRACE READDIR FOP- {0:d}: gfid={1:s}; fd={2:s}; " +
+ "size={3:d}; offset={4:d}").format(unique, gfid, fd, size,
+ offset)
+ self.gfids[key] = gfid
+ dl.wind_readdir(frame, POINTER(xlator_t)(), fd, size, offset,
+ xdata)
+ return 0
+
+ def readdir_cbk(self, frame, cookie, this, op_ret, op_errno, buf,
+ xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ print("GLUPY TRACE READDIR CBK- {0:d}: gfid={1:s}; op_ret={2:d};"+
+ " op_errno={3:d}").format(unique, gfid, op_ret, op_errno)
+ del self.gfids[key]
+ dl.unwind_readdir(frame, cookie, this, op_ret, op_errno, buf,
+ xdata)
+ return 0
+
+ def readdirp_fop(self, frame, this, fd, size, offset, dictionary):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(fd.contents.inode.contents.gfid)
+ print("GLUPY TRACE READDIRP FOP- {0:d}: gfid={1:s}; fd={2:s}; "+
+ " size={3:d}; offset={4:d}").format(unique, gfid, fd, size,
+ offset)
+ self.gfids[key] = gfid
+ dl.wind_readdirp(frame, POINTER(xlator_t)(), fd, size, offset,
+ dictionary)
+ return 0
+
+ def readdirp_cbk(self, frame, cookie, this, op_ret, op_errno, buf,
+ xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ print("GLUPY TRACE READDIRP CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; op_errno={3:d}").format(unique, gfid,
+ op_ret, op_errno)
+ del self.gfids[key]
+ dl.unwind_readdirp(frame, cookie, this, op_ret, op_errno, buf,
+ xdata)
+ return 0
+
+ def mkdir_fop(self, frame, this, loc, mode, umask, xdata):
+ unique = dl.get_rootunique(frame)
+ gfid = uuid2str(loc.contents.inode.contents.gfid)
+ print("GLUPY TRACE MKDIR FOP- {0:d}: gfid={1:s}; path={2:s}; " +
+ "mode={3:d}; umask=0{4:o}").format(unique, gfid,
+ loc.contents.path, mode,
+ umask)
+ dl.wind_mkdir(frame, POINTER(xlator_t)(), loc, mode, umask,
+ xdata)
+ return 0
+
+ def mkdir_cbk(self, frame, cookie, this, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata):
+ unique = dl.get_rootunique(frame)
+ if op_ret == 0:
+ gfid = uuid2str(inode.contents.gfid)
+ statstr = trace_stat2str(buf)
+ preparentstr = trace_stat2str(preparent)
+ postparentstr = trace_stat2str(postparent)
+ print("GLUPY TRACE MKDIR CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; *stbuf={3:s}; *prebuf={4:s}; "+
+ "*postbuf={5:s} ").format(unique, gfid, op_ret,
+ statstr,
+ preparentstr,
+ postparentstr)
+ else:
+ print("GLUPY TRACE MKDIR CBK- {0:d}: op_ret={1:d}; "+
+ "op_errno={2:d}").format(unique, op_ret, op_errno)
+ dl.unwind_mkdir(frame, cookie, this, op_ret, op_errno, inode,
+ buf, preparent, postparent, xdata)
+ return 0
+
+ def rmdir_fop(self, frame, this, loc, flags, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(loc.contents.inode.contents.gfid)
+ print("GLUPY TRACE RMDIR FOP- {0:d}: gfid={1:s}; path={2:s}; "+
+ "flags={3:d}").format(unique, gfid, loc.contents.path,
+ flags)
+ self.gfids[key] = gfid
+ dl.wind_rmdir(frame, POINTER(xlator_t)(), loc, flags, xdata)
+ return 0
+
+ def rmdir_cbk(self, frame, cookie, this, op_ret, op_errno, preparent,
+ postparent, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ if op_ret == 0:
+ preparentstr = trace_stat2str(preparent)
+ postparentstr = trace_stat2str(postparent)
+ print("GLUPY TRACE RMDIR CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; *prebuf={3:s}; "+
+ "*postbuf={4:s}").format(unique, gfid, op_ret,
+ preparentstr,
+ postparentstr)
+ else:
+ print("GLUPY TRACE RMDIR CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; op_errno={3:d}").format(unique,
+ gfid,
+ op_ret,
+ op_errno)
+ del self.gfids[key]
+ dl.unwind_rmdir(frame, cookie, this, op_ret, op_errno,
+ preparent, postparent, xdata)
+ return 0
+
+ def stat_fop(self, frame, this, loc, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(loc.contents.inode.contents.gfid)
+ print("GLUPY TRACE STAT FOP- {0:d}: gfid={1:s}; " +
+ " path={2:s}").format(unique, gfid, loc.contents.path)
+ self.gfids[key] = gfid
+ dl.wind_stat(frame, POINTER(xlator_t)(), loc, xdata)
+ return 0
+
+ def stat_cbk(self, frame, cookie, this, op_ret, op_errno, buf,
+ xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ if op_ret == 0:
+ statstr = trace_stat2str(buf)
+ print("GLUPY TRACE STAT CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; *buf={3:s};").format(unique,
+ gfid,
+ op_ret,
+ statstr)
+ else:
+ print("GLUPY TRACE STAT CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; op_errno={3:d}").format(unique,
+ gfid,
+ op_ret,
+ op_errno)
+ del self.gfids[key]
+ dl.unwind_stat(frame, cookie, this, op_ret, op_errno,
+ buf, xdata)
+ return 0
+
+ def fstat_fop(self, frame, this, fd, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(fd.contents.inode.contents.gfid)
+ print("GLUPY TRACE FSTAT FOP- {0:d}: gfid={1:s}; " +
+ "fd={2:s}").format(unique, gfid, fd)
+ self.gfids[key] = gfid
+ dl.wind_fstat(frame, POINTER(xlator_t)(), fd, xdata)
+ return 0
+
+ def fstat_cbk(self, frame, cookie, this, op_ret, op_errno, buf,
+ xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ if op_ret == 0:
+ statstr = trace_stat2str(buf)
+ print("GLUPY TRACE FSTAT CBK- {0:d}: gfid={1:s} "+
+ " op_ret={2:d}; *buf={3:s}").format(unique,
+ gfid,
+ op_ret,
+ statstr)
+ else:
+ print("GLUPY TRACE FSTAT CBK- {0:d}: gfid={1:s} "+
+ "op_ret={2:d}; op_errno={3:d}").format(unique.
+ gfid,
+ op_ret,
+ op_errno)
+ del self.gfids[key]
+ dl.unwind_fstat(frame, cookie, this, op_ret, op_errno,
+ buf, xdata)
+ return 0
+
+ def statfs_fop(self, frame, this, loc, xdata):
+ unique = dl.get_rootunique(frame)
+ if loc.contents.inode:
+ gfid = uuid2str(loc.contents.inode.contents.gfid)
+ else:
+ gfid = "0"
+ print("GLUPY TRACE STATFS FOP- {0:d}: gfid={1:s}; "+
+ "path={2:s}").format(unique, gfid, loc.contents.path)
+ dl.wind_statfs(frame, POINTER(xlator_t)(), loc, xdata)
+ return 0
+
+ def statfs_cbk(self, frame, cookie, this, op_ret, op_errno, buf,
+ xdata):
+ unique = dl.get_rootunique(frame)
+ if op_ret == 0:
+ #TBD: print buf (pointer to an iovec type object)
+ print("GLUPY TRACE STATFS CBK {0:d}: "+
+ "op_ret={1:d}").format(unique, op_ret)
+ else:
+ print("GLUPY TRACE STATFS CBK- {0:d}"+
+ "op_ret={1:d}; op_errno={2:d}").format(unique,
+ op_ret,
+ op_errno)
+ dl.unwind_statfs(frame, cookie, this, op_ret, op_errno,
+ buf, xdata)
+ return 0
+
+ def getxattr_fop(self, frame, this, loc, name, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(loc.contents.inode.contents.gfid)
+ print("GLUPY TRACE GETXATTR FOP- {0:d}: gfid={1:s}; path={2:s};"+
+ " name={3:s}").format(unique, gfid, loc.contents.path,
+ name)
+ self.gfids[key]=gfid
+ dl.wind_getxattr(frame, POINTER(xlator_t)(), loc, name, xdata)
+ return 0
+
+ def getxattr_cbk(self, frame, cookie, this, op_ret, op_errno,
+ dictionary, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ print("GLUPY TRACE GETXATTR CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; op_errno={3:d}; "+
+ " dictionary={4:s}").format(unique, gfid, op_ret, op_errno,
+ dictionary)
+ del self.gfids[key]
+ dl.unwind_getxattr(frame, cookie, this, op_ret, op_errno,
+ dictionary, xdata)
+ return 0
+
+ def fgetxattr_fop(self, frame, this, fd, name, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(fd.contents.inode.contents.gfid)
+ print("GLUPY TRACE FGETXATTR FOP- {0:d}: gfid={1:s}; fd={2:s}; "+
+ "name={3:s}").format(unique, gfid, fd, name)
+ self.gfids[key] = gfid
+ dl.wind_fgetxattr(frame, POINTER(xlator_t)(), fd, name, xdata)
+ return 0
+
+ def fgetxattr_cbk(self, frame, cookie, this, op_ret, op_errno,
+ dictionary, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ print("GLUPY TRACE FGETXATTR CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; op_errno={3:d};"+
+ " dictionary={4:s}").format(unique, gfid, op_ret,
+ op_errno, dictionary)
+ del self.gfids[key]
+ dl.unwind_fgetxattr(frame, cookie, this, op_ret, op_errno,
+ dictionary, xdata)
+ return 0
+
+ def setxattr_fop(self, frame, this, loc, dictionary, flags, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(loc.contents.inode.contents.gfid)
+ print("GLUPY TRACE SETXATTR FOP- {0:d}: gfid={1:s}; path={2:s};"+
+ " flags={3:d}").format(unique, gfid, loc.contents.path,
+ flags)
+ self.gfids[key] = gfid
+ dl.wind_setxattr(frame, POINTER(xlator_t)(), loc, dictionary,
+ flags, xdata)
+ return 0
+
+ def setxattr_cbk(self, frame, cookie, this, op_ret, op_errno, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ print("GLUPY TRACE SETXATTR CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; op_errno={3:d}").format(unique, gfid,
+ op_ret, op_errno)
+ del self.gfids[key]
+ dl.unwind_setxattr(frame, cookie, this, op_ret, op_errno,
+ xdata)
+ return 0
+
+ def fsetxattr_fop(self, frame, this, fd, dictionary, flags, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(fd.contents.inode.contents.gfid)
+ print("GLUPY TRACE FSETXATTR FOP- {0:d}: gfid={1:s}; fd={2:p}; "+
+ "flags={3:d}").format(unique, gfid, fd, flags)
+ self.gfids[key] = gfid
+ dl.wind_fsetxattr(frame, POINTER(xlator_t)(), fd, dictionary,
+ flags, xdata)
+ return 0
+
+ def fsetxattr_cbk(self, frame, cookie, this, op_ret, op_errno, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ print("GLUPY TRACE FSETXATTR CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; op_errno={3:d}").format(unique, gfid,
+ op_ret, op_errno)
+ del self.gfids[key]
+ dl.unwind_fsetxattr(frame, cookie, this, op_ret, op_errno,
+ xdata)
+ return 0
+
+ def removexattr_fop(self, frame, this, loc, name, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(loc.contents.inode.contents.gfid)
+ print("GLUPY TRACE REMOVEXATTR FOP- {0:d}: gfid={1:s}; "+
+ "path={2:s}; name={3:s}").format(unique, gfid,
+ loc.contents.path,
+ name)
+ self.gfids[key] = gfid
+ dl.wind_removexattr(frame, POINTER(xlator_t)(), loc, name,
+ xdata)
+ return 0
+
+ def removexattr_cbk(self, frame, cookie, this, op_ret, op_errno,
+ xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ print("GLUPY TRACE REMOVEXATTR CBK- {0:d}: gfid={1:s} "+
+ " op_ret={2:d}; op_errno={3:d}").format(unique, gfid,
+ op_ret, op_errno)
+ del self.gfids[key]
+ dl.unwind_removexattr(frame, cookie, this, op_ret, op_errno,
+ xdata)
+ return 0
+
+ def link_fop(self, frame, this, oldloc, newloc, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ if (newloc.contents.inode):
+ newgfid = uuid2str(newloc.contents.inode.contents.gfid)
+ else:
+ newgfid = "0"
+ oldgfid = uuid2str(oldloc.contents.inode.contents.gfid)
+ print("GLUPY TRACE LINK FOP-{0:d}: oldgfid={1:s}; oldpath={2:s};"+
+ "newgfid={3:s};"+
+ "newpath={4:s}").format(unique, oldgfid,
+ oldloc.contents.path,
+ newgfid,
+ newloc.contents.path)
+ self.gfids[key] = oldgfid
+ dl.wind_link(frame, POINTER(xlator_t)(), oldloc, newloc,
+ xdata)
+ return 0
+
+ def link_cbk(self, frame, cookie, this, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ if op_ret == 0:
+ statstr = trace_stat2str(buf)
+ preparentstr = trace_stat2str(preparent)
+ postparentstr = trace_stat2str(postparent)
+ print("GLUPY TRACE LINK CBK- {0:d}: op_ret={1:d} "+
+ "*stbuf={2:s}; *prebuf={3:s}; "+
+ "*postbuf={4:s} ").format(unique, op_ret, statstr,
+ preparentstr,
+ postparentstr)
+ else:
+ print("GLUPY TRACE LINK CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; "+
+ "op_errno={3:d}").format(unique, gfid,
+ op_ret, op_errno)
+ del self.gfids[key]
+ dl.unwind_link(frame, cookie, this, op_ret, op_errno, inode,
+ buf, preparent, postparent, xdata)
+ return 0
+
+ def unlink_fop(self, frame, this, loc, xflag, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(loc.contents.inode.contents.gfid)
+ print("GLUPY TRACE UNLINK FOP- {0:d}; gfid={1:s}; path={2:s}; "+
+ "flag={3:d}").format(unique, gfid, loc.contents.path,
+ xflag)
+ self.gfids[key] = gfid
+ dl.wind_unlink(frame, POINTER(xlator_t)(), loc, xflag,
+ xdata)
+ return 0
+
+ def unlink_cbk(self, frame, cookie, this, op_ret, op_errno,
+ preparent, postparent, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ if op_ret == 0:
+ preparentstr = trace_stat2str(preparent)
+ postparentstr = trace_stat2str(postparent)
+ print("GLUPY TRACE UNLINK CBK- {0:d}: gfid ={1:s}; "+
+ "op_ret={2:d}; *prebuf={3:s}; "+
+ "*postbuf={4:s} ").format(unique, gfid, op_ret,
+ preparentstr,
+ postparentstr)
+ else:
+ print("GLUPY TRACE UNLINK CBK: {0:d}: gfid ={1:s}; "+
+ "op_ret={2:d}; "+
+ "op_errno={3:d}").format(unique, gfid, op_ret,
+ op_errno)
+ del self.gfids[key]
+ dl.unwind_unlink(frame, cookie, this, op_ret, op_errno,
+ preparent, postparent, xdata)
+ return 0
+
+ def readlink_fop(self, frame, this, loc, size, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(loc.contents.inode.contents.gfid)
+ print("GLUPY TRACE READLINK FOP- {0:d}: gfid={1:s}; path={2:s};"+
+ " size={3:d}").format(unique, gfid, loc.contents.path,
+ size)
+ self.gfids[key] = gfid
+ dl.wind_readlink(frame, POINTER(xlator_t)(), loc, size,
+ xdata)
+ return 0
+
+ def readlink_cbk(self, frame, cookie, this, op_ret, op_errno,
+ buf, stbuf, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ if op_ret == 0:
+ statstr = trace_stat2str(stbuf)
+ print("GLUPY TRACE READLINK CBK- {0:d}: gfid={1:s} "+
+ " op_ret={2:d}; op_errno={3:d}; *prebuf={4:s}; "+
+ "*postbuf={5:s} ").format(unique, gfid,
+ op_ret, op_errno,
+ buf, statstr)
+ else:
+ print("GLUPY TRACE READLINK CBK- {0:d}: gfid={1:s} "+
+ " op_ret={2:d}; op_errno={3:d}").format(unique,
+ gfid,
+ op_ret,
+ op_errno)
+ del self.gfids[key]
+ dl.unwind_readlink(frame, cookie, this, op_ret, op_errno, buf,
+ stbuf, xdata)
+ return 0
+
+ def symlink_fop(self, frame, this, linkpath, loc, umask, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = uuid2str(loc.contents.inode.contents.gfid)
+ print("GLUPY TRACE SYMLINK FOP- {0:d}: gfid={1:s}; "+
+ "linkpath={2:s}; path={3:s};"+
+ "umask=0{4:o}").format(unique, gfid, linkpath,
+ loc.contents.path, umask)
+ self.gfids[key] = gfid
+ dl.wind_symlink(frame, POINTER(xlator_t)(), linkpath, loc,
+ umask, xdata)
+ return 0
+
+ def symlink_cbk(self, frame, cookie, this, op_ret, op_errno,
+ inode, buf, preparent, postparent, xdata):
+ unique = dl.get_rootunique(frame)
+ key = dl.get_id(frame)
+ gfid = self.gfids[key]
+ if op_ret == 0:
+ statstr = trace_stat2str(buf)
+ preparentstr = trace_stat2str(preparent)
+ postparentstr = trace_stat2str(postparent)
+ print("GLUPY TRACE SYMLINK CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; *stbuf={3:s}; *preparent={4:s}; "+
+ "*postparent={5:s}").format(unique, gfid,
+ op_ret, statstr,
+ preparentstr,
+ postparentstr)
+ else:
+ print("GLUPY TRACE SYMLINK CBK- {0:d}: gfid={1:s}; "+
+ "op_ret={2:d}; op_errno={3:d}").format(unique,
+ gfid,
+ op_ret,
+ op_errno)
+ del self.gfids[key]
+ dl.unwind_symlink(frame, cookie, this, op_ret, op_errno,
+ inode, buf, preparent, postparent, xdata)
+ return 0
diff --git a/xlators/features/glupy/src/glupy.c b/xlators/features/glupy/src/glupy.c
new file mode 100644
index 000000000..dc86c0071
--- /dev/null
+++ b/xlators/features/glupy/src/glupy.c
@@ -0,0 +1,2470 @@
+/*
+ Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#include <ctype.h>
+#include <sys/uio.h>
+#include <Python.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "xlator.h"
+#include "logging.h"
+#include "defaults.h"
+
+#include "glupy.h"
+
+/* UTILITY FUNCTIONS FOR FOP-SPECIFIC CODE */
+
+pthread_key_t gil_init_key;
+
+PyGILState_STATE
+glupy_enter (void)
+{
+#if 0
+ if (!pthread_getspecific(gil_init_key)) {
+ PyEval_ReleaseLock();
+ (void)pthread_setspecific(gil_init_key,(void *)1);
+ }
+#endif
+
+ return PyGILState_Ensure();
+}
+
+void
+glupy_leave (PyGILState_STATE gstate)
+{
+ PyGILState_Release(gstate);
+}
+
+/* FOP: LOOKUP */
+
+int32_t
+glupy_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_LOOKUP]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_lookup_cbk_t)(priv->cbks[GLUPY_LOOKUP]))(
+ frame, cookie, this, op_ret, op_errno,
+ inode, buf, xdata, postparent);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf,
+ xdata, postparent);
+ return 0;
+}
+
+int32_t
+glupy_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_LOOKUP]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_lookup_t)(priv->fops[GLUPY_LOOKUP]))(
+ frame, this, loc, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
+ return 0;
+}
+
+void
+wind_lookup (call_frame_t *frame, xlator_t *xl, loc_t *loc, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND(frame,glupy_lookup_cbk,xl,xl->fops->lookup,loc,xdata);
+}
+
+void
+unwind_lookup (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT(lookup,frame,op_ret,op_errno,
+ inode,buf,xdata,postparent);
+}
+
+void
+set_lookup_fop (long py_this, fop_lookup_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_LOOKUP] = (long)fop;
+}
+
+void
+set_lookup_cbk (long py_this, fop_lookup_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_LOOKUP] = (long)cbk;
+}
+
+/* FOP: CREATE */
+
+int32_t
+glupy_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_CREATE]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_create_cbk_t)(priv->cbks[GLUPY_CREATE]))(
+ frame, cookie, this, op_ret, op_errno,
+ fd, inode, buf, preparent, postparent, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+glupy_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_CREATE]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_create_t)(priv->fops[GLUPY_CREATE]))(
+ frame, this, loc, flags, mode, umask, fd, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_create_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create, loc, flags, mode, umask,
+ fd, xdata);
+ return 0;
+}
+
+void
+wind_create (call_frame_t *frame, xlator_t *xl, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_create_cbk,xl, xl->fops->create,
+ loc, flags, mode, umask, fd, xdata);
+}
+
+void
+unwind_create (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
+}
+
+void
+set_create_fop (long py_this, fop_create_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_CREATE] = (long)fop;
+}
+
+void
+set_create_cbk (long py_this, fop_create_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_CREATE] = (long)cbk;
+}
+
+/* FOP: OPEN */
+
+int32_t
+glupy_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_OPEN]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_open_cbk_t)(priv->cbks[GLUPY_OPEN]))(
+ frame, cookie, this, op_ret, op_errno,
+ fd, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
+ return 0;
+}
+
+int32_t
+glupy_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flags, fd_t *fd, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_OPEN]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_open_t)(priv->fops[GLUPY_OPEN]))(
+ frame, this, loc, flags, fd, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_open_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
+ return 0;
+}
+
+void
+wind_open (call_frame_t *frame, xlator_t *xl, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_open_cbk, xl, xl->fops->open, loc, flags,
+ fd, xdata);
+}
+
+void
+unwind_open (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
+}
+
+void
+set_open_fop (long py_this, fop_open_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+ priv->fops[GLUPY_OPEN] = (long)fop;
+}
+
+void
+set_open_cbk (long py_this, fop_open_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+ priv->cbks[GLUPY_OPEN] = (long)cbk;
+}
+
+/* FOP: READV */
+
+int32_t
+glupy_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_READV]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_readv_cbk_t)(priv->cbks[GLUPY_READV]))(
+ frame, cookie, this, op_ret, op_errno,
+ vector, count, stbuf, iobref, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector,
+ count, stbuf, iobref, xdata);
+ return 0;
+}
+
+int32_t
+glupy_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t offset, uint32_t flags, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_READV]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_readv_t)(priv->fops[GLUPY_READV]))(
+ frame, this, fd, size, offset, flags, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_readv_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset,
+ flags, xdata);
+ return 0;
+}
+
+void
+wind_readv (call_frame_t *frame, xlator_t *xl, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_readv_cbk, xl, xl->fops->readv, fd, size,
+ offset, flags, xdata);
+}
+
+void
+unwind_readv (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector,
+ count, stbuf, iobref, xdata);
+}
+
+void
+set_readv_fop (long py_this, fop_readv_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+ priv->fops[GLUPY_READV] = (long)fop;
+}
+
+void
+set_readv_cbk (long py_this, fop_readv_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+ priv->cbks[GLUPY_READV] = (long)cbk;
+}
+
+/* FOP: WRITEV */
+
+int32_t
+glupy_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_WRITEV]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_writev_cbk_t)(priv->cbks[GLUPY_WRITEV]))(
+ frame, cookie, this, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+ return 0;
+}
+
+int32_t
+glupy_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_WRITEV]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_writev_t)(priv->fops[GLUPY_WRITEV]))(
+ frame, this, fd, vector, count, offset, flags,
+ iobref, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_writev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, vector, count,
+ offset, flags, iobref, xdata);
+ return 0;
+}
+
+void
+wind_writev (call_frame_t *frame, xlator_t *xl, fd_t *fd, struct iovec *vector,
+ int32_t count, off_t offset, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_writev_cbk, xl, xl->fops->writev, fd, vector,
+ count, offset, flags, iobref, xdata);
+}
+
+void
+unwind_writev (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+}
+
+void
+set_writev_fop (long py_this, fop_writev_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+ priv->fops[GLUPY_WRITEV] = (long)fop;
+}
+
+void
+set_writev_cbk (long py_this, fop_writev_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+ priv->cbks[GLUPY_WRITEV] = (long)cbk;
+}
+
+
+/* FOP: OPENDIR */
+
+int32_t
+glupy_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_OPENDIR]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_opendir_cbk_t)(priv->cbks[GLUPY_OPENDIR]))(
+ frame, cookie, this, op_ret, op_errno,
+ fd, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd, xdata);
+ return 0;
+}
+
+int32_t
+glupy_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ fd_t *fd, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_OPENDIR]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_opendir_t)(priv->fops[GLUPY_OPENDIR]))(
+ frame, this, loc, fd, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_opendir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->opendir, loc, fd, xdata);
+ return 0;
+}
+
+void
+wind_opendir (call_frame_t *frame, xlator_t *xl, loc_t *loc, fd_t *fd, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND(frame,glupy_opendir_cbk,xl,xl->fops->opendir,loc,fd,xdata);
+}
+
+void
+unwind_opendir (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT(opendir,frame,op_ret,op_errno,
+ fd,xdata);
+}
+
+void
+set_opendir_fop (long py_this, fop_opendir_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_OPENDIR] = (long)fop;
+}
+
+void
+set_opendir_cbk (long py_this, fop_opendir_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_OPENDIR] = (long)cbk;
+}
+
+/* FOP: READDIR */
+
+int32_t
+glupy_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_READDIR]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_readdir_cbk_t)(priv->cbks[GLUPY_READDIR]))(
+ frame, cookie, this, op_ret, op_errno,
+ entries, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, entries,
+ xdata);
+ return 0;
+}
+
+int32_t
+glupy_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t offset, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_READDIR]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_readdir_t)(priv->fops[GLUPY_READDIR]))(
+ frame, this, fd, size, offset, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_readdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdir,fd, size, offset, xdata);
+ return 0;
+}
+
+void
+wind_readdir(call_frame_t *frame, xlator_t *xl, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND(frame,glupy_readdir_cbk,xl,xl->fops->readdir,fd,size,offset,xdata);
+}
+
+void
+unwind_readdir (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT(readdir,frame,op_ret,op_errno,
+ entries, xdata);
+}
+
+void
+set_readdir_fop (long py_this, fop_readdir_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_READDIR] = (long)fop;
+}
+
+void
+set_readdir_cbk (long py_this, fop_readdir_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_READDIR] = (long)cbk;
+}
+
+
+/* FOP: READDIRP */
+
+int32_t
+glupy_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_READDIRP]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_readdirp_cbk_t)(priv->cbks[GLUPY_READDIRP]))(
+ frame, cookie, this, op_ret, op_errno,
+ entries, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries,
+ xdata);
+ return 0;
+}
+
+int32_t
+glupy_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t offset, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_READDIRP]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_readdirp_t)(priv->fops[GLUPY_READDIRP]))(
+ frame, this, fd, size, offset, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_readdirp_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp,fd, size, offset, xdata);
+ return 0;
+}
+
+void
+wind_readdirp (call_frame_t *frame, xlator_t *xl, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND(frame,glupy_readdirp_cbk,xl,xl->fops->readdirp,fd,size,offset,xdata);
+}
+
+void
+unwind_readdirp (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT(readdirp,frame,op_ret,op_errno,
+ entries, xdata);
+}
+
+void
+set_readdirp_fop (long py_this, fop_readdirp_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_READDIRP] = (long)fop;
+}
+
+void
+set_readdirp_cbk (long py_this, fop_readdirp_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_READDIRP] = (long)cbk;
+}
+
+
+/* FOP:STAT */
+
+int32_t
+glupy_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_STAT]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_stat_cbk_t)(priv->cbks[GLUPY_STAT]))(
+ frame, cookie, this, op_ret, op_errno,
+ buf, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+int32_t
+glupy_stat (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_STAT]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_stat_t)(priv->fops[GLUPY_STAT]))(
+ frame, this, loc, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, loc, xdata);
+ return 0;
+}
+
+void
+wind_stat (call_frame_t *frame, xlator_t *xl, loc_t *loc, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND(frame,glupy_stat_cbk,xl,xl->fops->stat,loc,xdata);
+}
+
+void
+unwind_stat (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT(stat,frame,op_ret,op_errno,
+ buf,xdata);
+}
+
+void
+set_stat_fop (long py_this, fop_stat_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_STAT] = (long)fop;
+}
+
+void
+set_stat_cbk (long py_this, fop_stat_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_STAT] = (long)cbk;
+}
+
+
+/* FOP: FSTAT */
+
+int32_t
+glupy_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_FSTAT]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_fstat_cbk_t)(priv->cbks[GLUPY_FSTAT]))(
+ frame, cookie, this, op_ret, op_errno,
+ buf, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+int32_t
+glupy_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_FSTAT]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_fstat_t)(priv->fops[GLUPY_FSTAT]))(
+ frame, this, fd, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_fstat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
+ return 0;
+}
+
+void
+wind_fstat (call_frame_t *frame, xlator_t *xl, fd_t *fd, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND(frame,glupy_fstat_cbk,xl,xl->fops->fstat,fd,xdata);
+}
+
+void
+unwind_fstat (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT(fstat,frame,op_ret,op_errno,
+ buf,xdata);
+}
+
+void
+set_fstat_fop (long py_this, fop_fstat_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_FSTAT] = (long)fop;
+}
+
+void
+set_fstat_cbk (long py_this, fop_fstat_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_FSTAT] = (long)cbk;
+}
+
+/* FOP:STATFS */
+
+int32_t
+glupy_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_STATFS]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_statfs_cbk_t)(priv->cbks[GLUPY_STATFS]))(
+ frame, cookie, this, op_ret, op_errno,
+ buf, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+int32_t
+glupy_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_STATFS]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_statfs_t)(priv->fops[GLUPY_STATFS]))(
+ frame, this, loc, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_statfs_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->statfs, loc, xdata);
+ return 0;
+}
+
+void
+wind_statfs (call_frame_t *frame, xlator_t *xl, loc_t *loc, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND(frame,glupy_statfs_cbk,xl,xl->fops->statfs,loc,xdata);
+}
+
+void
+unwind_statfs (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf,
+ dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT(statfs,frame,op_ret,op_errno,
+ buf,xdata);
+}
+
+void
+set_statfs_fop (long py_this, fop_statfs_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_STATFS] = (long)fop;
+}
+
+void
+set_statfs_cbk (long py_this, fop_statfs_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_STATFS] = (long)cbk;
+}
+
+
+/* FOP: SETXATTR */
+
+int32_t
+glupy_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_SETXATTR]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_setxattr_cbk_t)(priv->cbks[GLUPY_SETXATTR]))(
+ frame, cookie, this, op_ret, op_errno,
+ xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+glupy_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_SETXATTR]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_setxattr_t)(priv->fops[GLUPY_SETXATTR]))(
+ frame, this, loc, dict, flags, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_setxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, loc, dict,
+ flags, xdata);
+ return 0;
+}
+
+void
+wind_setxattr (call_frame_t *frame, xlator_t *xl, loc_t *loc,
+ dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_setxattr_cbk, xl, xl->fops->setxattr,
+ loc, dict, flags, xdata);
+}
+
+
+void
+unwind_setxattr (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata);
+
+}
+
+void
+set_setxattr_fop (long py_this, fop_setxattr_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_SETXATTR] = (long)fop;
+}
+
+void
+set_setxattr_cbk (long py_this, fop_setxattr_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_SETXATTR] = (long)cbk;
+}
+
+/* FOP: GETXATTR */
+
+int32_t
+glupy_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_GETXATTR]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_getxattr_cbk_t)(priv->cbks[GLUPY_GETXATTR]))(
+ frame, cookie, this, op_ret, op_errno, dict,
+ xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict,
+ xdata);
+ return 0;
+}
+
+int32_t
+glupy_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_GETXATTR]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_getxattr_t)(priv->fops[GLUPY_GETXATTR]))(
+ frame, this, loc, name, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, loc, name,
+ xdata);
+ return 0;
+}
+
+void
+wind_getxattr (call_frame_t *frame, xlator_t *xl, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_getxattr_cbk, xl, xl->fops->getxattr,
+ loc, name, xdata);
+}
+
+
+void
+unwind_getxattr (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict,
+ xdata);
+
+}
+
+
+void
+set_getxattr_fop (long py_this, fop_getxattr_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_GETXATTR] = (long)fop;
+}
+
+
+void
+set_getxattr_cbk (long py_this, fop_getxattr_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_GETXATTR] = (long)cbk;
+}
+
+/* FOP: FSETXATTR */
+
+int32_t
+glupy_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_FSETXATTR]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_fsetxattr_cbk_t)(priv->cbks[GLUPY_FSETXATTR]))(
+ frame, cookie, this, op_ret, op_errno,
+ xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+glupy_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_FSETXATTR]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_fsetxattr_t)(priv->fops[GLUPY_FSETXATTR]))(
+ frame, this, fd, dict, flags, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_fsetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, fd, dict,
+ flags, xdata);
+ return 0;
+}
+
+void
+wind_fsetxattr (call_frame_t *frame, xlator_t *xl, fd_t *fd,
+ dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_fsetxattr_cbk, xl, xl->fops->fsetxattr,
+ fd, dict, flags, xdata);
+}
+
+
+void
+unwind_fsetxattr (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xdata);
+
+}
+
+void
+set_fsetxattr_fop (long py_this, fop_fsetxattr_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_FSETXATTR] = (long)fop;
+}
+
+void
+set_fsetxattr_cbk (long py_this, fop_fsetxattr_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_FSETXATTR] = (long)cbk;
+}
+
+/* FOP: FGETXATTR */
+
+int32_t
+glupy_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_FGETXATTR]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_fgetxattr_cbk_t)(priv->cbks[GLUPY_FGETXATTR]))(
+ frame, cookie, this, op_ret, op_errno, dict,
+ xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict,
+ xdata);
+ return 0;
+}
+
+int32_t
+glupy_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_FGETXATTR]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_fgetxattr_t)(priv->fops[GLUPY_FGETXATTR]))(
+ frame, this, fd, name, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_fgetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr, fd, name,
+ xdata);
+ return 0;
+}
+
+void
+wind_fgetxattr (call_frame_t *frame, xlator_t *xl, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_fgetxattr_cbk, xl, xl->fops->fgetxattr,
+ fd, name, xdata);
+}
+
+
+void
+unwind_fgetxattr (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict,
+ xdata);
+
+}
+
+
+void
+set_fgetxattr_fop (long py_this, fop_fgetxattr_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_FGETXATTR] = (long)fop;
+}
+
+
+void
+set_fgetxattr_cbk (long py_this, fop_fgetxattr_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_FGETXATTR] = (long)cbk;
+}
+
+/* FOP:REMOVEXATTR */
+
+int32_t
+glupy_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_REMOVEXATTR]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_removexattr_cbk_t)(priv->cbks[GLUPY_REMOVEXATTR]))(
+ frame, cookie, this, op_ret, op_errno, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+glupy_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_REMOVEXATTR]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_removexattr_t)(priv->fops[GLUPY_REMOVEXATTR]))(
+ frame, this, loc, name, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_removexattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, loc, name,
+ xdata);
+ return 0;
+}
+
+void
+wind_removexattr (call_frame_t *frame, xlator_t *xl, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_removexattr_cbk, xl, xl->fops->removexattr,
+ loc, name, xdata);
+}
+
+
+void
+unwind_removexattr (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, xdata);
+
+}
+
+void
+set_removexattr_fop (long py_this, fop_removexattr_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_REMOVEXATTR] = (long)fop;
+}
+
+void
+set_removexattr_cbk (long py_this, fop_removexattr_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_REMOVEXATTR] = (long)cbk;
+}
+
+
+/* FOP:FREMOVEXATTR */
+
+int32_t
+glupy_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_FREMOVEXATTR]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_fremovexattr_cbk_t)(priv->cbks[GLUPY_FREMOVEXATTR]))(
+ frame, cookie, this, op_ret, op_errno, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (fremovexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+glupy_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_FREMOVEXATTR]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_fremovexattr_t)(priv->fops[GLUPY_FREMOVEXATTR]))(
+ frame, this, fd, name, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_fremovexattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr, fd, name,
+ xdata);
+ return 0;
+}
+
+void
+wind_fremovexattr (call_frame_t *frame, xlator_t *xl, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_fremovexattr_cbk, xl, xl->fops->fremovexattr,
+ fd, name, xdata);
+}
+
+
+void
+unwind_fremovexattr (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (fremovexattr, frame, op_ret, op_errno, xdata);
+
+}
+
+void
+set_fremovexattr_fop (long py_this, fop_fremovexattr_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_FREMOVEXATTR] = (long)fop;
+}
+
+void
+set_fremovexattr_cbk (long py_this, fop_fremovexattr_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_FREMOVEXATTR] = (long)cbk;
+}
+
+
+/* FOP: LINK*/
+int32_t
+glupy_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_LINK]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_link_cbk_t)(priv->cbks[GLUPY_LINK]))(
+ frame, cookie, this, op_ret, op_errno,
+ inode, buf, preparent, postparent, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+glupy_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_LINK]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_link_t)(priv->fops[GLUPY_LINK]))(
+ frame, this, oldloc, newloc, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_link_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link, oldloc, newloc,
+ xdata);
+ return 0;
+}
+
+void
+wind_link (call_frame_t *frame, xlator_t *xl, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_link_cbk, xl, xl->fops->link,
+ oldloc, newloc, xdata);
+}
+
+void
+unwind_link (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+}
+
+void
+set_link_fop (long py_this, fop_link_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_LINK] = (long)fop;
+}
+
+void
+set_link_cbk (long py_this, fop_link_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_LINK] = (long)cbk;
+}
+
+/* FOP: SYMLINK*/
+int32_t
+glupy_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_SYMLINK]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_symlink_cbk_t)(priv->cbks[GLUPY_SYMLINK]))(
+ frame, cookie, this, op_ret, op_errno,
+ inode, buf, preparent, postparent, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+glupy_symlink(call_frame_t *frame, xlator_t *this, const char *linkname,
+ loc_t *loc, mode_t umask, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_SYMLINK]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_symlink_t)(priv->fops[GLUPY_SYMLINK]))(
+ frame, this, linkname, loc, umask, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_symlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->symlink, linkname, loc,
+ umask, xdata);
+ return 0;
+}
+
+void
+wind_symlink (call_frame_t *frame, xlator_t *xl, const char *linkname,
+ loc_t *loc, mode_t umask, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_symlink_cbk, xl, xl->fops->symlink,
+ linkname, loc, umask, xdata);
+}
+
+void
+unwind_symlink (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+}
+
+void
+set_symlink_fop (long py_this, fop_symlink_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_SYMLINK] = (long)fop;
+}
+
+void
+set_symlink_cbk (long py_this, fop_symlink_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_SYMLINK] = (long)cbk;
+}
+
+
+/* FOP: READLINK */
+int32_t
+glupy_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, const char *path,
+ struct iatt *buf, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_READLINK]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_readlink_cbk_t)(priv->cbks[GLUPY_READLINK]))(
+ frame, cookie, this, op_ret, op_errno,
+ path, buf, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, path,
+ buf, xdata);
+ return 0;
+}
+
+int32_t
+glupy_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ size_t size, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_READLINK]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_readlink_t)(priv->fops[GLUPY_READLINK]))(
+ frame, this, loc, size, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_readlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readlink, loc,
+ size, xdata);
+ return 0;
+}
+
+void
+wind_readlink (call_frame_t *frame, xlator_t *xl, loc_t *loc,
+ size_t size, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_readlink_cbk, xl, xl->fops->readlink,
+ loc, size, xdata);
+}
+
+void
+unwind_readlink (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, const char *path,
+ struct iatt *buf, dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, path, buf,
+ xdata);
+}
+
+void
+set_readlink_fop (long py_this, fop_readlink_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_READLINK] = (long)fop;
+}
+
+void
+set_readlink_cbk (long py_this, fop_readlink_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_READLINK] = (long)cbk;
+}
+
+
+/* FOP: UNLINK */
+
+int32_t
+glupy_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_UNLINK]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_unlink_cbk_t)(priv->cbks[GLUPY_UNLINK]))(
+ frame, cookie, this, op_ret, op_errno,
+ preparent, postparent, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno, preparent,
+ postparent, xdata);
+ return 0;
+}
+
+int32_t
+glupy_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int xflags, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_UNLINK]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_unlink_t)(priv->fops[GLUPY_UNLINK]))(
+ frame, this, loc, xflags, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_unlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc,
+ xflags, xdata);
+ return 0;
+}
+
+void
+wind_unlink (call_frame_t *frame, xlator_t *xl, loc_t *loc,
+ int xflags, dict_t *xdata)
+{
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_unlink_cbk, xl, xl->fops->unlink,
+ loc, xflags, xdata);
+}
+
+void
+unwind_unlink (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno,
+ preparent, postparent, xdata);
+}
+
+void
+set_unlink_fop (long py_this, fop_unlink_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_UNLINK] = (long)fop;
+}
+
+void
+set_unlink_cbk (long py_this, fop_unlink_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_UNLINK] = (long)cbk;
+}
+
+
+/* FOP: MKDIR */
+
+int32_t
+glupy_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_MKDIR]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_mkdir_cbk_t)(priv->cbks[GLUPY_MKDIR]))(
+ frame, cookie, this, op_ret, op_errno,
+ inode, buf, preparent, postparent, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+glupy_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_MKDIR]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_mkdir_t)(priv->fops[GLUPY_MKDIR]))(
+ frame, this, loc, mode, umask, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_mkdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir, loc, mode, umask,
+ xdata);
+ return 0;
+}
+
+void
+wind_mkdir (call_frame_t *frame, xlator_t *xl, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
+{
+
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_mkdir_cbk, xl, xl->fops->mkdir,
+ loc, mode, umask, xdata);
+}
+
+void
+unwind_mkdir (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+}
+
+void
+set_mkdir_fop (long py_this, fop_mkdir_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_MKDIR] = (long)fop;
+}
+
+void
+set_mkdir_cbk (long py_this, fop_mkdir_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_MKDIR] = (long)cbk;
+}
+
+
+/* FOP: RMDIR */
+
+int32_t
+glupy_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+
+ if (!priv->cbks[GLUPY_RMDIR]) {
+ goto unwind;
+ }
+
+ gstate = glupy_enter();
+ ret = ((fop_rmdir_cbk_t)(priv->cbks[GLUPY_RMDIR]))(
+ frame, cookie, this, op_ret, op_errno,
+ preparent, postparent, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno, preparent,
+ postparent, xdata);
+ return 0;
+}
+
+int32_t
+glupy_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int xflags, dict_t *xdata)
+{
+ glupy_private_t *priv = this->private;
+ PyGILState_STATE gstate;
+ int32_t ret;
+ static long next_id = 0;
+
+ if (!priv->fops[GLUPY_RMDIR]) {
+ goto wind;
+ }
+
+ gstate = glupy_enter();
+ frame->local = (void *)++next_id;
+ ret = ((fop_rmdir_t)(priv->fops[GLUPY_RMDIR]))(
+ frame, this, loc, xflags, xdata);
+ glupy_leave(gstate);
+
+ return ret;
+
+wind:
+ STACK_WIND (frame, glupy_rmdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rmdir, loc,
+ xflags, xdata);
+ return 0;
+}
+
+void
+wind_rmdir (call_frame_t *frame, xlator_t *xl, loc_t *loc,
+ int xflags, dict_t *xdata)
+{
+
+ xlator_t *this = THIS;
+
+ if (!xl || (xl == this)) {
+ xl = FIRST_CHILD(this);
+ }
+
+ STACK_WIND (frame, glupy_rmdir_cbk, xl, xl->fops->rmdir,
+ loc, xflags, xdata);
+}
+
+void
+unwind_rmdir (call_frame_t *frame, long cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno,
+ preparent, postparent, xdata);
+}
+
+void
+set_rmdir_fop (long py_this, fop_rmdir_t fop)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->fops[GLUPY_RMDIR] = (long)fop;
+}
+
+void
+set_rmdir_cbk (long py_this, fop_rmdir_cbk_t cbk)
+{
+ glupy_private_t *priv = ((xlator_t *)py_this)->private;
+
+ priv->cbks[GLUPY_RMDIR] = (long)cbk;
+}
+
+
+/* NON-FOP-SPECIFIC CODE */
+
+
+long
+get_id (call_frame_t *frame)
+{
+ return (long)(frame->local);
+}
+
+uint64_t
+get_rootunique (call_frame_t *frame)
+{
+ return frame->root->unique;
+}
+
+int32_t
+init (xlator_t *this)
+{
+ glupy_private_t *priv = NULL;
+ char *module_name = NULL;
+ PyObject *py_mod_name = NULL;
+ PyObject *py_init_func = NULL;
+ PyObject *py_args = NULL;
+ PyObject *syspath = NULL;
+ PyObject *path = NULL;
+ static gf_boolean_t py_inited = _gf_false;
+ void * err_cleanup = &&err_return;
+
+ if (dict_get_str(this->options,"module-name",&module_name) != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "missing module-name");
+ return -1;
+ }
+
+ priv = GF_CALLOC (1, sizeof (glupy_private_t), gf_glupy_mt_priv);
+ if (!priv) {
+ goto *err_cleanup;
+ }
+ this->private = priv;
+ err_cleanup = &&err_free_priv;
+
+ if (!py_inited) {
+ Py_Initialize();
+ PyEval_InitThreads();
+#if 0
+ (void)pthread_key_create(&gil_init_key,NULL);
+ (void)pthread_setspecific(gil_init_key,(void *)1);
+#endif
+ /* PyEval_InitThreads takes this "for" us. No thanks. */
+ PyEval_ReleaseLock();
+ py_inited = _gf_true;
+ }
+
+ /* Adjust python's path */
+ syspath = PySys_GetObject("path");
+ path = PyString_FromString(GLUSTER_PYTHON_PATH);
+ PyList_Append(syspath, path);
+ Py_DECREF(path);
+
+ py_mod_name = PyString_FromString(module_name);
+ if (!py_mod_name) {
+ gf_log (this->name, GF_LOG_ERROR, "could not create name");
+ if (PyErr_Occurred()) {
+ PyErr_Print();
+ }
+ goto *err_cleanup;
+ }
+
+ gf_log (this->name, GF_LOG_ERROR, "py_mod_name = %s", module_name);
+ priv->py_module = PyImport_Import(py_mod_name);
+ Py_DECREF(py_mod_name);
+ if (!priv->py_module) {
+ gf_log (this->name, GF_LOG_ERROR, "Python import failed");
+ if (PyErr_Occurred()) {
+ PyErr_Print();
+ }
+ goto *err_cleanup;
+ }
+ err_cleanup = &&err_deref_module;
+
+ py_init_func = PyObject_GetAttrString(priv->py_module, "xlator");
+ if (!py_init_func || !PyCallable_Check(py_init_func)) {
+ gf_log (this->name, GF_LOG_ERROR, "missing init func");
+ if (PyErr_Occurred()) {
+ PyErr_Print();
+ }
+ goto *err_cleanup;
+ }
+ err_cleanup = &&err_deref_init;
+
+ py_args = PyTuple_New(1);
+ if (!py_args) {
+ gf_log (this->name, GF_LOG_ERROR, "could not create args");
+ if (PyErr_Occurred()) {
+ PyErr_Print();
+ }
+ goto *err_cleanup;
+ }
+ PyTuple_SetItem(py_args,0,PyLong_FromLong((long)this));
+
+ /* TBD: pass in list of children */
+ priv->py_xlator = PyObject_CallObject(py_init_func, py_args);
+ Py_DECREF(py_args);
+ if (!priv->py_xlator) {
+ gf_log (this->name, GF_LOG_ERROR, "Python init failed");
+ if (PyErr_Occurred()) {
+ PyErr_Print();
+ }
+ goto *err_cleanup;
+ }
+ gf_log (this->name, GF_LOG_INFO, "init returned %p", priv->py_xlator);
+
+ return 0;
+
+err_deref_init:
+ Py_DECREF(py_init_func);
+err_deref_module:
+ Py_DECREF(priv->py_module);
+err_free_priv:
+ GF_FREE(priv);
+err_return:
+ return -1;
+}
+
+void
+fini (xlator_t *this)
+{
+ glupy_private_t *priv = this->private;
+
+ if (!priv)
+ return;
+ Py_DECREF(priv->py_xlator);
+ Py_DECREF(priv->py_module);
+ this->private = NULL;
+ GF_FREE (priv);
+
+ return;
+}
+
+struct xlator_fops fops = {
+ .lookup = glupy_lookup,
+ .create = glupy_create,
+ .open = glupy_open,
+ .readv = glupy_readv,
+ .writev = glupy_writev,
+ .opendir = glupy_opendir,
+ .readdir = glupy_readdir,
+ .stat = glupy_stat,
+ .fstat = glupy_fstat,
+ .setxattr = glupy_setxattr,
+ .getxattr = glupy_getxattr,
+ .fsetxattr = glupy_fsetxattr,
+ .fgetxattr = glupy_fgetxattr,
+ .removexattr = glupy_removexattr,
+ .fremovexattr = glupy_fremovexattr,
+ .link = glupy_link,
+ .unlink = glupy_unlink,
+ .readlink = glupy_readlink,
+ .symlink = glupy_symlink,
+ .mkdir = glupy_mkdir,
+ .rmdir = glupy_rmdir,
+ .statfs = glupy_statfs,
+ .readdirp = glupy_readdirp
+};
+
+struct xlator_cbks cbks = {
+};
+
+struct volume_options options[] = {
+ { .key = {NULL} },
+};
diff --git a/xlators/features/glupy/src/glupy.h b/xlators/features/glupy/src/glupy.h
new file mode 100644
index 000000000..8661fce88
--- /dev/null
+++ b/xlators/features/glupy/src/glupy.h
@@ -0,0 +1,69 @@
+/*
+ Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __GLUPY_H__
+#define __GLUPY_H__
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+#include "mem-types.h"
+
+enum {
+ GLUPY_LOOKUP = 0,
+ GLUPY_CREATE,
+ GLUPY_OPEN,
+ GLUPY_READV,
+ GLUPY_WRITEV,
+ GLUPY_OPENDIR,
+ GLUPY_READDIR,
+ GLUPY_READDIRP,
+ GLUPY_STAT,
+ GLUPY_FSTAT,
+ GLUPY_STATFS,
+ GLUPY_SETXATTR,
+ GLUPY_GETXATTR,
+ GLUPY_FSETXATTR,
+ GLUPY_FGETXATTR,
+ GLUPY_REMOVEXATTR,
+ GLUPY_FREMOVEXATTR,
+ GLUPY_LINK,
+ GLUPY_UNLINK,
+ GLUPY_READLINK,
+ GLUPY_SYMLINK,
+ GLUPY_MKNOD,
+ GLUPY_MKDIR,
+ GLUPY_RMDIR,
+ GLUPY_N_FUNCS
+};
+
+typedef struct {
+ PyObject *py_module;
+ PyObject *py_xlator;
+ long fops[GLUPY_N_FUNCS];
+ long cbks[GLUPY_N_FUNCS];
+} glupy_private_t;
+
+enum gf_glupy_mem_types_ {
+ gf_glupy_mt_priv = gf_common_mt_end + 1,
+ gf_glupy_mt_end
+};
+
+#endif /* __GLUPY_H__ */
diff --git a/xlators/features/glupy/src/gluster.py b/xlators/features/glupy/src/gluster.py
new file mode 100644
index 000000000..a5daa77d3
--- /dev/null
+++ b/xlators/features/glupy/src/gluster.py
@@ -0,0 +1,841 @@
+import sys
+from ctypes import *
+
+dl = CDLL("",RTLD_GLOBAL)
+
+
+class call_frame_t (Structure):
+ pass
+
+class dev_t (Structure):
+ pass
+
+
+class dict_t (Structure):
+ pass
+
+
+class gf_dirent_t (Structure):
+ pass
+
+
+class iobref_t (Structure):
+ pass
+
+
+class iovec_t (Structure):
+ pass
+
+
+class list_head (Structure):
+ pass
+
+list_head._fields_ = [
+ ("next", POINTER(list_head)),
+ ("prev", POINTER(list_head))
+ ]
+
+
+class rwxperm_t (Structure):
+ _fields_ = [
+ ("read", c_uint8, 1),
+ ("write", c_uint8, 1),
+ ("execn", c_uint8, 1)
+ ]
+
+
+class statvfs_t (Structure):
+ pass
+
+
+class xlator_t (Structure):
+ pass
+
+
+class ia_prot_t (Structure):
+ _fields_ = [
+ ("suid", c_uint8, 1),
+ ("sgid", c_uint8, 1),
+ ("sticky", c_uint8, 1),
+ ("owner", rwxperm_t),
+ ("group", rwxperm_t),
+ ("other", rwxperm_t)
+ ]
+
+# For checking file type.
+(IA_INVAL, IA_IFREG, IA_IFDIR, IA_IFLNK, IA_IFBLK, IA_IFCHR, IA_IFIFO,
+ IA_IFSOCK) = xrange(8)
+
+
+class iatt_t (Structure):
+ _fields_ = [
+ ("ia_no", c_uint64),
+ ("ia_gfid", c_ubyte * 16),
+ ("ia_dev", c_uint64),
+ ("ia_type", c_uint),
+ ("ia_prot", ia_prot_t),
+ ("ia_nlink", c_uint32),
+ ("ia_uid", c_uint32),
+ ("ia_gid", c_uint32),
+ ("ia_rdev", c_uint64),
+ ("ia_size", c_uint64),
+ ("ia_blksize", c_uint32),
+ ("ia_blocks", c_uint64),
+ ("ia_atime", c_uint32 ),
+ ("ia_atime_nsec", c_uint32),
+ ("ia_mtime", c_uint32),
+ ("ia_mtime_nsec", c_uint32),
+ ("ia_ctime", c_uint32),
+ ("ia_ctime_nsec", c_uint32)
+ ]
+
+
+class mem_pool (Structure):
+ _fields_ = [
+ ("list", list_head),
+ ("hot_count", c_int),
+ ("cold_count", c_int),
+ ("lock", c_void_p),
+ ("padded_sizeof_type", c_ulong),
+ ("pool", c_void_p),
+ ("pool_end", c_void_p),
+ ("real_sizeof_type", c_int),
+ ("alloc_count", c_uint64),
+ ("pool_misses", c_uint64),
+ ("max_alloc", c_int),
+ ("curr_stdalloc", c_int),
+ ("max_stdalloc", c_int),
+ ("name", c_char_p),
+ ("global_list", list_head)
+ ]
+
+
+class U_ctx_key_inode (Union):
+ _fields_ = [
+ ("key", c_uint64),
+ ("xl_key", POINTER(xlator_t))
+ ]
+
+
+class U_ctx_value1 (Union):
+ _fields_ = [
+ ("value1", c_uint64),
+ ("ptr1", c_void_p)
+ ]
+
+
+class U_ctx_value2 (Union):
+ _fields_ = [
+ ("value2", c_uint64),
+ ("ptr2", c_void_p)
+ ]
+
+class inode_ctx (Structure):
+ _anonymous_ = ("u_key","u_value1","u_value2",)
+ _fields_ = [
+ ("u_key", U_ctx_key_inode),
+ ("u_value1", U_ctx_value1),
+ ("u_value2", U_ctx_value2)
+ ]
+
+class inode_t (Structure):
+ pass
+
+class inode_table_t (Structure):
+ _fields_ = [
+ ("lock", c_void_p),
+ ("hashsize", c_size_t),
+ ("name", c_char_p),
+ ("root", POINTER(inode_t)),
+ ("xl", POINTER(xlator_t)),
+ ("lru_limit", c_uint32),
+ ("inode_hash", POINTER(list_head)),
+ ("name_hash", POINTER(list_head)),
+ ("active", list_head),
+ ("active_size", c_uint32),
+ ("lru", list_head),
+ ("lru_size", c_uint32),
+ ("purge", list_head),
+ ("purge_size", c_uint32),
+ ("inode_pool", POINTER(mem_pool)),
+ ("dentry_pool", POINTER(mem_pool)),
+ ("fd_mem_pool", POINTER(mem_pool))
+ ]
+
+inode_t._fields_ = [
+ ("table", POINTER(inode_table_t)),
+ ("gfid", c_ubyte * 16),
+ ("lock", c_void_p),
+ ("nlookup", c_uint64),
+ ("fd_count", c_uint32),
+ ("ref", c_uint32),
+ ("ia_type", c_uint),
+ ("fd_list", list_head),
+ ("dentry_list", list_head),
+ ("hashv", list_head),
+ ("listv", list_head),
+ ("ctx", POINTER(inode_ctx))
+ ]
+
+
+
+class U_ctx_key_fd (Union):
+ _fields_ = [
+ ("key", c_uint64),
+ ("xl_key", c_void_p)
+ ]
+
+class fd_lk_ctx (Structure):
+ _fields_ = [
+ ("lk_list", list_head),
+ ("ref", c_int),
+ ("lock", c_void_p)
+ ]
+
+class fd_ctx (Structure):
+ _anonymous_ = ("u_key","u_value1")
+ _fields_ = [
+ ("u_key", U_ctx_key_fd),
+ ("u_value1", U_ctx_value1)
+ ]
+
+class fd_t (Structure):
+ _fields_ = [
+ ("pid", c_uint64),
+ ("flags", c_int32),
+ ("refcount", c_int32),
+ ("inode_list", list_head),
+ ("inode", POINTER(inode_t)),
+ ("lock", c_void_p),
+ ("ctx", POINTER(fd_ctx)),
+ ("xl_count", c_int),
+ ("lk_ctx", POINTER(fd_lk_ctx)),
+ ("anonymous", c_uint)
+ ]
+
+class loc_t (Structure):
+ _fields_ = [
+ ("path", c_char_p),
+ ("name", c_char_p),
+ ("inode", POINTER(inode_t)),
+ ("parent", POINTER(inode_t)),
+ ("gfid", c_ubyte * 16),
+ ("pargfid", c_ubyte * 16),
+ ]
+
+
+
+def _init_op (a_class, fop, cbk, wind, unwind):
+ # Decorators, used by translators. We could pass the signatures as
+ # parameters, but it's actually kind of nice to keep them around for
+ # inspection.
+ a_class.fop_type = apply(CFUNCTYPE,a_class.fop_sig)
+ a_class.cbk_type = apply(CFUNCTYPE,a_class.cbk_sig)
+ # Dispatch-function registration.
+ fop.restype = None
+ fop.argtypes = [ c_long, a_class.fop_type ]
+ # Callback-function registration.
+ cbk.restype = None
+ cbk.argtypes = [ c_long, a_class.cbk_type ]
+ # STACK_WIND function.
+ wind.restype = None
+ wind.argtypes = list(a_class.fop_sig[1:])
+ # STACK_UNWIND function.
+ unwind.restype = None
+ unwind.argtypes = list(a_class.cbk_sig[1:])
+
+class OpLookup:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(inode_t), POINTER(iatt_t),
+ POINTER(dict_t), POINTER(iatt_t))
+_init_op (OpLookup, dl.set_lookup_fop, dl.set_lookup_cbk,
+ dl.wind_lookup, dl.unwind_lookup)
+
+class OpCreate:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), c_int, c_uint, c_uint, POINTER(fd_t),
+ POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(fd_t), POINTER(inode_t),
+ POINTER(iatt_t), POINTER(iatt_t), POINTER(iatt_t),
+ POINTER(dict_t))
+_init_op (OpCreate, dl.set_create_fop, dl.set_create_cbk,
+ dl.wind_create, dl.unwind_create)
+
+class OpOpen:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), c_int, POINTER(fd_t), POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(fd_t), POINTER(dict_t))
+_init_op (OpOpen, dl.set_open_fop, dl.set_open_cbk,
+ dl.wind_open, dl.unwind_open)
+
+class OpReadv:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(fd_t), c_size_t, c_long, c_uint32, POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(iovec_t), c_int, POINTER(iatt_t),
+ POINTER(iobref_t), POINTER(dict_t))
+_init_op (OpReadv, dl.set_readv_fop, dl.set_readv_cbk,
+ dl.wind_readv, dl.unwind_readv)
+class OpWritev:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(fd_t), POINTER(iovec_t), c_int, c_long, c_uint32,
+ POINTER(iobref_t), POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(iatt_t), POINTER(iatt_t),
+ POINTER(dict_t))
+_init_op (OpWritev, dl.set_writev_fop, dl.set_writev_cbk,
+ dl.wind_writev, dl.unwind_writev)
+
+class OpOpendir:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), POINTER(fd_t) ,POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(fd_t), POINTER(dict_t))
+_init_op (OpOpendir, dl.set_opendir_fop, dl.set_opendir_cbk,
+ dl.wind_opendir, dl.unwind_opendir)
+
+class OpReaddir:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(fd_t), c_size_t, c_long, POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(gf_dirent_t), POINTER(dict_t))
+_init_op (OpReaddir, dl.set_readdir_fop, dl.set_readdir_cbk,
+ dl.wind_readdir, dl.unwind_readdir)
+
+class OpReaddirp:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(fd_t), c_size_t, c_long, POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(gf_dirent_t), POINTER(dict_t))
+_init_op (OpReaddirp, dl.set_readdirp_fop, dl.set_readdirp_cbk,
+ dl.wind_readdirp, dl.unwind_readdirp)
+
+class OpStat:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(iatt_t), POINTER(dict_t))
+_init_op (OpStat, dl.set_stat_fop, dl.set_stat_cbk,
+ dl.wind_stat, dl.unwind_stat)
+
+class OpFstat:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(fd_t), POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(iatt_t), POINTER(dict_t))
+_init_op (OpFstat, dl.set_fstat_fop, dl.set_fstat_cbk,
+ dl.wind_fstat, dl.unwind_fstat)
+
+class OpStatfs:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(statvfs_t), POINTER(dict_t))
+_init_op (OpStatfs, dl.set_statfs_fop, dl.set_statfs_cbk,
+ dl.wind_statfs, dl.unwind_statfs)
+
+
+class OpSetxattr:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), POINTER(dict_t), c_int32,
+ POINTER (dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(dict_t))
+_init_op (OpSetxattr, dl.set_setxattr_fop, dl.set_setxattr_cbk,
+ dl.wind_setxattr, dl.unwind_setxattr)
+
+class OpGetxattr:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), c_char_p, POINTER (dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(dict_t), POINTER(dict_t))
+_init_op (OpGetxattr, dl.set_getxattr_fop, dl.set_getxattr_cbk,
+ dl.wind_getxattr, dl.unwind_getxattr)
+
+class OpFsetxattr:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(fd_t), POINTER(dict_t), c_int32,
+ POINTER (dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(dict_t))
+_init_op (OpFsetxattr, dl.set_fsetxattr_fop, dl.set_fsetxattr_cbk,
+ dl.wind_fsetxattr, dl.unwind_fsetxattr)
+
+class OpFgetxattr:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(fd_t), c_char_p, POINTER (dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(dict_t), POINTER(dict_t))
+_init_op (OpFgetxattr, dl.set_fgetxattr_fop, dl.set_fgetxattr_cbk,
+ dl.wind_fgetxattr, dl.unwind_fgetxattr)
+
+class OpRemovexattr:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), c_char_p, POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(dict_t))
+_init_op (OpRemovexattr, dl.set_removexattr_fop, dl.set_removexattr_cbk,
+ dl.wind_removexattr, dl.unwind_removexattr)
+
+
+class OpFremovexattr:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(fd_t), c_char_p, POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(dict_t))
+_init_op (OpFremovexattr, dl.set_fremovexattr_fop, dl.set_fremovexattr_cbk,
+ dl.wind_fremovexattr, dl.unwind_fremovexattr)
+
+class OpLink:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), POINTER(loc_t), POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(inode_t), POINTER(iatt_t),
+ POINTER(iatt_t), POINTER(iatt_t), POINTER(dict_t))
+_init_op (OpLink, dl.set_link_fop, dl.set_link_cbk,
+ dl.wind_link, dl.unwind_link)
+
+class OpSymlink:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ c_char_p, POINTER(loc_t), c_uint, POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(inode_t), POINTER(iatt_t),
+ POINTER(iatt_t), POINTER(iatt_t), POINTER(dict_t))
+_init_op (OpSymlink, dl.set_symlink_fop, dl.set_symlink_cbk,
+ dl.wind_symlink, dl.unwind_symlink)
+
+class OpUnlink:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), c_int, POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(iatt_t), POINTER(iatt_t),
+ POINTER(dict_t))
+_init_op (OpUnlink, dl.set_unlink_fop, dl.set_unlink_cbk,
+ dl.wind_unlink, dl.unwind_unlink)
+
+class OpReadlink:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), c_size_t, POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, c_char_p, POINTER(iatt_t), POINTER(dict_t))
+_init_op (OpReadlink, dl.set_readlink_fop, dl.set_readlink_cbk,
+ dl.wind_readlink, dl.unwind_readlink)
+
+class OpMkdir:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), c_uint, c_uint, POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(inode_t), POINTER(iatt_t),
+ POINTER(iatt_t), POINTER(iatt_t), POINTER(dict_t))
+_init_op (OpMkdir, dl.set_mkdir_fop, dl.set_mkdir_cbk,
+ dl.wind_mkdir, dl.unwind_mkdir)
+
+class OpRmdir:
+ fop_sig = (c_int, POINTER(call_frame_t), POINTER(xlator_t),
+ POINTER(loc_t), c_int, POINTER(dict_t))
+ cbk_sig = (c_int, POINTER(call_frame_t), c_long, POINTER(xlator_t),
+ c_int, c_int, POINTER(iatt_t), POINTER(iatt_t),
+ POINTER(dict_t))
+_init_op (OpRmdir, dl.set_rmdir_fop, dl.set_rmdir_cbk,
+ dl.wind_rmdir, dl.unwind_rmdir)
+
+
+class Translator:
+ def __init__ (self, c_this):
+ # This is only here to keep references to the stubs we create,
+ # because ctypes doesn't and glupy.so can't because it doesn't
+ # get a pointer to the actual Python object. It's a dictionary
+ # instead of a list in case we ever allow changing fops/cbks
+ # after initialization and need to look them up.
+ self.stub_refs = {}
+ funcs = dir(self.__class__)
+ if "lookup_fop" in funcs:
+ @OpLookup.fop_type
+ def stub (frame, this, loc, xdata, s=self):
+ return s.lookup_fop (frame, this, loc, xdata)
+ self.stub_refs["lookup_fop"] = stub
+ dl.set_lookup_fop(c_this,stub)
+ if "lookup_cbk" in funcs:
+ @OpLookup.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno, inode,
+ buf, xdata, postparent, s=self):
+ return s.lookup_cbk(frame, cookie, this, op_ret,
+ op_errno, inode, buf, xdata,
+ postparent)
+ self.stub_refs["lookup_cbk"] = stub
+ dl.set_lookup_cbk(c_this,stub)
+ if "create_fop" in funcs:
+ @OpCreate.fop_type
+ def stub (frame, this, loc, flags, mode, umask, fd,
+ xdata, s=self):
+ return s.create_fop (frame, this, loc, flags,
+ mode, umask, fd, xdata)
+ self.stub_refs["create_fop"] = stub
+ dl.set_create_fop(c_this,stub)
+ if "create_cbk" in funcs:
+ @OpCreate.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno, fd,
+ inode, buf, preparent, postparent, xdata,
+ s=self):
+ return s.create_cbk (frame, cookie, this,
+ op_ret, op_errno, fd,
+ inode, buf, preparent,
+ postparent, xdata)
+ self.stub_refs["create_cbk"] = stub
+ dl.set_create_cbk(c_this,stub)
+ if "open_fop" in funcs:
+ @OpOpen.fop_type
+ def stub (frame, this, loc, flags, fd,
+ xdata, s=self):
+ return s.open_fop (frame, this, loc, flags,
+ fd, xdata)
+ self.stub_refs["open_fop"] = stub
+ dl.set_open_fop(c_this,stub)
+ if "open_cbk" in funcs:
+ @OpOpen.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno, fd,
+ xdata, s=self):
+ return s.open_cbk (frame, cookie, this,
+ op_ret, op_errno, fd,
+ xdata)
+ self.stub_refs["open_cbk"] = stub
+ dl.set_open_cbk(c_this,stub)
+ if "readv_fop" in funcs:
+ @OpReadv.fop_type
+ def stub (frame, this, fd, size, offset, flags,
+ xdata, s=self):
+ return s.readv_fop (frame, this, fd, size,
+ offset, flags, xdata)
+ self.stub_refs["readv_fop"] = stub
+ dl.set_readv_fop(c_this,stub)
+ if "readv_cbk" in funcs:
+ @OpReadv.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno,
+ vector, count, stbuf, iobref, xdata,
+ s=self):
+ return s.readv_cbk (frame, cookie, this,
+ op_ret, op_errno, vector,
+ count, stbuf, iobref,
+ xdata)
+ self.stub_refs["readv_cbk"] = stub
+ dl.set_readv_cbk(c_this,stub)
+ if "writev_fop" in funcs:
+ @OpWritev.fop_type
+ def stub (frame, this, fd, vector, count,
+ offset, flags, iobref, xdata, s=self):
+ return s.writev_fop (frame, this, fd, vector,
+ count, offset, flags,
+ iobref, xdata)
+ self.stub_refs["writev_fop"] = stub
+ dl.set_writev_fop(c_this,stub)
+ if "writev_cbk" in funcs:
+ @OpWritev.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno,
+ prebuf, postbuf, xdata, s=self):
+ return s.writev_cbk (frame, cookie, this,
+ op_ret, op_errno, prebuf,
+ postbuf, xdata)
+ self.stub_refs["writev_cbk"] = stub
+ dl.set_writev_cbk(c_this,stub)
+ if "opendir_fop" in funcs:
+ @OpOpendir.fop_type
+ def stub (frame, this, loc, fd, xdata, s=self):
+ return s.opendir_fop (frame, this, loc, fd,
+ xdata)
+ self.stub_refs["opendir_fop"] = stub
+ dl.set_opendir_fop(c_this,stub)
+ if "opendir_cbk" in funcs:
+ @OpOpendir.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno, fd,
+ xdata, s=self):
+ return s.opendir_cbk(frame, cookie, this,
+ op_ret, op_errno, fd,
+ xdata)
+ self.stub_refs["opendir_cbk"] = stub
+ dl.set_opendir_cbk(c_this,stub)
+ if "readdir_fop" in funcs:
+ @OpReaddir.fop_type
+ def stub (frame, this, fd, size, offset, xdata, s=self):
+ return s.readdir_fop (frame, this, fd, size,
+ offset, xdata)
+ self.stub_refs["readdir_fop"] = stub
+ dl.set_readdir_fop(c_this,stub)
+ if "readdir_cbk" in funcs:
+ @OpReaddir.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno,
+ entries, xdata, s=self):
+ return s.readdir_cbk(frame, cookie, this,
+ op_ret, op_errno, entries,
+ xdata)
+ self.stub_refs["readdir_cbk"] = stub
+ dl.set_readdir_cbk(c_this,stub)
+ if "readdirp_fop" in funcs:
+ @OpReaddirp.fop_type
+ def stub (frame, this, fd, size, offset, xdata, s=self):
+ return s.readdirp_fop (frame, this, fd, size,
+ offset, xdata)
+ self.stub_refs["readdirp_fop"] = stub
+ dl.set_readdirp_fop(c_this,stub)
+ if "readdirp_cbk" in funcs:
+ @OpReaddirp.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno,
+ entries, xdata, s=self):
+ return s.readdirp_cbk (frame, cookie, this,
+ op_ret, op_errno,
+ entries, xdata)
+ self.stub_refs["readdirp_cbk"] = stub
+ dl.set_readdirp_cbk(c_this,stub)
+ if "stat_fop" in funcs:
+ @OpStat.fop_type
+ def stub (frame, this, loc, xdata, s=self):
+ return s.stat_fop (frame, this, loc, xdata)
+ self.stub_refs["stat_fop"] = stub
+ dl.set_stat_fop(c_this,stub)
+ if "stat_cbk" in funcs:
+ @OpStat.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno, buf,
+ xdata, s=self):
+ return s.stat_cbk(frame, cookie, this, op_ret,
+ op_errno, buf, xdata)
+ self.stub_refs["stat_cbk"] = stub
+ dl.set_stat_cbk(c_this,stub)
+ if "fstat_fop" in funcs:
+ @OpFstat.fop_type
+ def stub (frame, this, fd, xdata, s=self):
+ return s.fstat_fop (frame, this, fd, xdata)
+ self.stub_refs["fstat_fop"] = stub
+ dl.set_fstat_fop(c_this,stub)
+ if "fstat_cbk" in funcs:
+ @OpFstat.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno, buf,
+ xdata, s=self):
+ return s.fstat_cbk(frame, cookie, this, op_ret,
+ op_errno, buf, xdata)
+ self.stub_refs["fstat_cbk"] = stub
+ dl.set_fstat_cbk(c_this,stub)
+ if "statfs_fop" in funcs:
+ @OpStatfs.fop_type
+ def stub (frame, this, loc, xdata, s=self):
+ return s.statfs_fop (frame, this, loc, xdata)
+ self.stub_refs["statfs_fop"] = stub
+ dl.set_statfs_fop(c_this,stub)
+ if "statfs_cbk" in funcs:
+ @OpStatfs.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno, buf,
+ xdata, s=self):
+ return s.statfs_cbk (frame, cookie, this,
+ op_ret, op_errno, buf,
+ xdata)
+ self.stub_refs["statfs_cbk"] = stub
+ dl.set_statfs_cbk(c_this,stub)
+ if "setxattr_fop" in funcs:
+ @OpSetxattr.fop_type
+ def stub (frame, this, loc, dictionary, flags, xdata,
+ s=self):
+ return s.setxattr_fop (frame, this, loc,
+ dictionary, flags,
+ xdata)
+ self.stub_refs["setxattr_fop"] = stub
+ dl.set_setxattr_fop(c_this,stub)
+ if "setxattr_cbk" in funcs:
+ @OpSetxattr.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno, xdata,
+ s=self):
+ return s.setxattr_cbk(frame, cookie, this,
+ op_ret, op_errno, xdata)
+ self.stub_refs["setxattr_cbk"] = stub
+ dl.set_setxattr_cbk(c_this,stub)
+ if "getxattr_fop" in funcs:
+ @OpGetxattr.fop_type
+ def stub (frame, this, loc, name, xdata, s=self):
+ return s.getxattr_fop (frame, this, loc, name,
+ xdata)
+ self.stub_refs["getxattr_fop"] = stub
+ dl.set_getxattr_fop(c_this,stub)
+ if "getxattr_cbk" in funcs:
+ @OpGetxattr.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno,
+ dictionary, xdata, s=self):
+ return s.getxattr_cbk(frame, cookie, this,
+ op_ret, op_errno,
+ dictionary, xdata)
+ self.stub_refs["getxattr_cbk"] = stub
+ dl.set_getxattr_cbk(c_this,stub)
+ if "fsetxattr_fop" in funcs:
+ @OpFsetxattr.fop_type
+ def stub (frame, this, fd, dictionary, flags, xdata,
+ s=self):
+ return s.fsetxattr_fop (frame, this, fd,
+ dictionary, flags,
+ xdata)
+ self.stub_refs["fsetxattr_fop"] = stub
+ dl.set_fsetxattr_fop(c_this,stub)
+ if "fsetxattr_cbk" in funcs:
+ @OpFsetxattr.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno, xdata,
+ s=self):
+ return s.fsetxattr_cbk(frame, cookie, this,
+ op_ret, op_errno, xdata)
+ self.stub_refs["fsetxattr_cbk"] = stub
+ dl.set_fsetxattr_cbk(c_this,stub)
+ if "fgetxattr_fop" in funcs:
+ @OpFgetxattr.fop_type
+ def stub (frame, this, fd, name, xdata, s=self):
+ return s.fgetxattr_fop (frame, this, fd, name,
+ xdata)
+ self.stub_refs["fgetxattr_fop"] = stub
+ dl.set_fgetxattr_fop(c_this,stub)
+ if "fgetxattr_cbk" in funcs:
+ @OpFgetxattr.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno,
+ dictionary, xdata, s=self):
+ return s.fgetxattr_cbk(frame, cookie, this,
+ op_ret, op_errno,
+ dictionary, xdata)
+ self.stub_refs["fgetxattr_cbk"] = stub
+ dl.set_fgetxattr_cbk(c_this,stub)
+ if "removexattr_fop" in funcs:
+ @OpRemovexattr.fop_type
+ def stub (frame, this, loc, name, xdata, s=self):
+ return s.removexattr_fop (frame, this, loc,
+ name, xdata)
+ self.stub_refs["removexattr_fop"] = stub
+ dl.set_removexattr_fop(c_this,stub)
+ if "removexattr_cbk" in funcs:
+ @OpRemovexattr.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno,
+ xdata, s=self):
+ return s.removexattr_cbk(frame, cookie, this,
+ op_ret, op_errno,
+ xdata)
+ self.stub_refs["removexattr_cbk"] = stub
+ dl.set_removexattr_cbk(c_this,stub)
+ if "fremovexattr_fop" in funcs:
+ @OpFremovexattr.fop_type
+ def stub (frame, this, fd, name, xdata, s=self):
+ return s.fremovexattr_fop (frame, this, fd,
+ name, xdata)
+ self.stub_refs["fremovexattr_fop"] = stub
+ dl.set_fremovexattr_fop(c_this,stub)
+ if "fremovexattr_cbk" in funcs:
+ @OpFremovexattr.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno,
+ xdata, s=self):
+ return s.fremovexattr_cbk(frame, cookie, this,
+ op_ret, op_errno,
+ xdata)
+ self.stub_refs["fremovexattr_cbk"] = stub
+ dl.set_fremovexattr_cbk(c_this,stub)
+ if "link_fop" in funcs:
+ @OpLink.fop_type
+ def stub (frame, this, oldloc, newloc,
+ xdata, s=self):
+ return s.link_fop (frame, this, oldloc,
+ newloc, xdata)
+ self.stub_refs["link_fop"] = stub
+ dl.set_link_fop(c_this,stub)
+ if "link_cbk" in funcs:
+ @OpLink.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno,
+ inode, buf, preparent, postparent, xdata,
+ s=self):
+ return s.link_cbk (frame, cookie, this,
+ op_ret, op_errno, inode,
+ buf, preparent,
+ postparent, xdata)
+ self.stub_refs["link_cbk"] = stub
+ dl.set_link_cbk(c_this,stub)
+ if "symlink_fop" in funcs:
+ @OpSymlink.fop_type
+ def stub (frame, this, linkname, loc,
+ umask, xdata, s=self):
+ return s.symlink_fop (frame, this, linkname,
+ loc, umask, xdata)
+ self.stub_refs["symlink_fop"] = stub
+ dl.set_symlink_fop(c_this,stub)
+ if "symlink_cbk" in funcs:
+ @OpSymlink.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno,
+ inode, buf, preparent, postparent, xdata,
+ s=self):
+ return s.symlink_cbk (frame, cookie, this,
+ op_ret, op_errno, inode,
+ buf, preparent,
+ postparent, xdata)
+ self.stub_refs["symlink_cbk"] = stub
+ dl.set_symlink_cbk(c_this,stub)
+ if "unlink_fop" in funcs:
+ @OpUnlink.fop_type
+ def stub (frame, this, loc, xflags,
+ xdata, s=self):
+ return s.unlink_fop (frame, this, loc,
+ xflags, xdata)
+ self.stub_refs["unlink_fop"] = stub
+ dl.set_unlink_fop(c_this,stub)
+ if "unlink_cbk" in funcs:
+ @OpUnlink.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno,
+ preparent, postparent, xdata, s=self):
+ return s.unlink_cbk (frame, cookie, this,
+ op_ret, op_errno,
+ preparent, postparent,
+ xdata)
+ self.stub_refs["unlink_cbk"] = stub
+ dl.set_unlink_cbk(c_this,stub)
+ if "readlink_fop" in funcs:
+ @OpReadlink.fop_type
+ def stub (frame, this, loc, size,
+ xdata, s=self):
+ return s.readlink_fop (frame, this, loc,
+ size, xdata)
+ self.stub_refs["readlink_fop"] = stub
+ dl.set_readlink_fop(c_this,stub)
+ if "readlink_cbk" in funcs:
+ @OpReadlink.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno,
+ path, buf, xdata, s=self):
+ return s.readlink_cbk (frame, cookie, this,
+ op_ret, op_errno,
+ path, buf, xdata)
+ self.stub_refs["readlink_cbk"] = stub
+ dl.set_readlink_cbk(c_this,stub)
+ if "mkdir_fop" in funcs:
+ @OpMkdir.fop_type
+ def stub (frame, this, loc, mode, umask, xdata,
+ s=self):
+ return s.mkdir_fop (frame, this, loc, mode,
+ umask, xdata)
+ self.stub_refs["mkdir_fop"] = stub
+ dl.set_mkdir_fop(c_this,stub)
+ if "mkdir_cbk" in funcs:
+ @OpMkdir.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno, inode,
+ buf, preparent, postparent, xdata, s=self):
+ return s.mkdir_cbk (frame, cookie, this,
+ op_ret, op_errno, inode,
+ buf, preparent,
+ postparent, xdata)
+ self.stub_refs["mkdir_cbk"] = stub
+ dl.set_mkdir_cbk(c_this,stub)
+ if "rmdir_fop" in funcs:
+ @OpRmdir.fop_type
+ def stub (frame, this, loc, xflags,
+ xdata, s=self):
+ return s.rmdir_fop (frame, this, loc,
+ xflags, xdata)
+ self.stub_refs["rmdir_fop"] = stub
+ dl.set_rmdir_fop(c_this,stub)
+ if "rmdir_cbk" in funcs:
+ @OpRmdir.cbk_type
+ def stub (frame, cookie, this, op_ret, op_errno,
+ preparent, postparent, xdata, s=self):
+ return s.rmdir_cbk (frame, cookie, this,
+ op_ret, op_errno,
+ preparent, postparent,
+ xdata)
+ self.stub_refs["rmdir_cbk"] = stub
+ dl.set_rmdir_cbk(c_this,stub)
diff --git a/xlators/features/glupy/src/helloworld.py b/xlators/features/glupy/src/helloworld.py
new file mode 100644
index 000000000..8fe403711
--- /dev/null
+++ b/xlators/features/glupy/src/helloworld.py
@@ -0,0 +1,19 @@
+import sys
+from gluster import *
+
+class xlator (Translator):
+
+ def __init__(self, c_this):
+ Translator.__init__(self, c_this)
+
+ def lookup_fop(self, frame, this, loc, xdata):
+ print "Python xlator: Hello!"
+ dl.wind_lookup(frame, POINTER(xlator_t)(), loc, xdata)
+ return 0
+
+ def lookup_cbk(self, frame, cookie, this, op_ret, op_errno, inode, buf,
+ xdata, postparent):
+ print "Python xlator: Hello again!"
+ dl.unwind_lookup(frame, cookie, this, op_ret, op_errno, inode, buf,
+ xdata, postparent)
+ return 0
diff --git a/xlators/features/glupy/src/negative.py b/xlators/features/glupy/src/negative.py
new file mode 100644
index 000000000..1023602b9
--- /dev/null
+++ b/xlators/features/glupy/src/negative.py
@@ -0,0 +1,92 @@
+import sys
+from uuid import UUID
+from gluster import *
+
+# Negative-lookup-caching example. If a file wasn't there the last time we
+# looked, it's probably still not there. This translator keeps track of
+# those failed lookups for us, and returns ENOENT without needing to pass the
+# call any further for repeated requests.
+
+# If we were doing this for real, we'd need separate caches for each xlator
+# instance. The easiest way to do this would be to have xlator.__init__
+# "register" each instance in a module-global dict, with the key as the C
+# translator address and the value as the xlator object itself. For testing
+# and teaching, it's sufficient just to have one cache. The keys are parent
+# GFIDs, and the entries are lists of names within that parent that we know
+# don't exist.
+cache = {}
+
+# TBD: we need a better way of handling per-request data (frame->local in C).
+dl.get_id.restype = c_long
+dl.get_id.argtypes = [ POINTER(call_frame_t) ]
+
+def uuid2str (gfid):
+ return str(UUID(''.join(map("{0:02x}".format, gfid))))
+
+class xlator (Translator):
+
+ def __init__ (self, c_this):
+ self.requests = {}
+ Translator.__init__(self,c_this)
+
+ def lookup_fop (self, frame, this, loc, xdata):
+ pargfid = uuid2str(loc.contents.pargfid)
+ print "lookup FOP: %s:%s" % (pargfid, loc.contents.name)
+ # Check the cache.
+ if cache.has_key(pargfid):
+ if loc.contents.name in cache[pargfid]:
+ print "short-circuiting for %s:%s" % (pargfid,
+ loc.contents.name)
+ dl.unwind_lookup(frame,0,this,-1,2,None,None,None,None)
+ return 0
+ key = dl.get_id(frame)
+ self.requests[key] = (pargfid, loc.contents.name[:])
+ # TBD: get real child xl from init, pass it here
+ dl.wind_lookup(frame,POINTER(xlator_t)(),loc,xdata)
+ return 0
+
+ def lookup_cbk (self, frame, cookie, this, op_ret, op_errno, inode, buf,
+ xdata, postparent):
+ print "lookup CBK: %d (%d)" % (op_ret, op_errno)
+ key = dl.get_id(frame)
+ pargfid, name = self.requests[key]
+ # Update the cache.
+ if op_ret == 0:
+ print "found %s, removing from cache" % name
+ if cache.has_key(pargfid):
+ cache[pargfid].discard(name)
+ elif op_errno == 2: # ENOENT
+ print "failed to find %s, adding to cache" % name
+ if cache.has_key(pargfid):
+ cache[pargfid].add(name)
+ else:
+ cache[pargfid] = set([name])
+ del self.requests[key]
+ dl.unwind_lookup(frame,cookie,this,op_ret,op_errno,
+ inode,buf,xdata,postparent)
+ return 0
+
+ def create_fop (self, frame, this, loc, flags, mode, umask, fd, xdata):
+ pargfid = uuid2str(loc.contents.pargfid)
+ print "create FOP: %s:%s" % (pargfid, loc.contents.name)
+ key = dl.get_id(frame)
+ self.requests[key] = (pargfid, loc.contents.name[:])
+ # TBD: get real child xl from init, pass it here
+ dl.wind_create(frame,POINTER(xlator_t)(),loc,flags,mode,umask,fd,xdata)
+ return 0
+
+ def create_cbk (self, frame, cookie, this, op_ret, op_errno, fd, inode,
+ buf, preparent, postparent, xdata):
+ print "create CBK: %d (%d)" % (op_ret, op_errno)
+ key = dl.get_id(frame)
+ pargfid, name = self.requests[key]
+ # Update the cache.
+ if op_ret == 0:
+ print "created %s, removing from cache" % name
+ if cache.has_key(pargfid):
+ cache[pargfid].discard(name)
+ del self.requests[key]
+ dl.unwind_create(frame,cookie,this,op_ret,op_errno,fd,inode,buf,
+ preparent,postparent,xdata)
+ return 0
+
diff --git a/xlators/features/index/Makefile.am b/xlators/features/index/Makefile.am
new file mode 100644
index 000000000..a985f42a8
--- /dev/null
+++ b/xlators/features/index/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/features/index/src/Makefile.am b/xlators/features/index/src/Makefile.am
new file mode 100644
index 000000000..73bb8972e
--- /dev/null
+++ b/xlators/features/index/src/Makefile.am
@@ -0,0 +1,17 @@
+xlator_LTLIBRARIES = index.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+index_la_LDFLAGS = -module -avoid-version
+
+index_la_SOURCES = index.c
+index_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = index.h index-mem-types.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) \
+ -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/xdr/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/index/src/index-mem-types.h b/xlators/features/index/src/index-mem-types.h
new file mode 100644
index 000000000..553d492df
--- /dev/null
+++ b/xlators/features/index/src/index-mem-types.h
@@ -0,0 +1,22 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __QUIESCE_MEM_TYPES_H__
+#define __QUIESCE_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_index_mem_types_ {
+ gf_index_mt_priv_t = gf_common_mt_end + 1,
+ gf_index_inode_ctx_t = gf_common_mt_end + 2,
+ gf_index_fd_ctx_t = gf_common_mt_end + 3,
+ gf_index_mt_end
+};
+#endif
diff --git a/xlators/features/index/src/index.c b/xlators/features/index/src/index.c
new file mode 100644
index 000000000..9253120f3
--- /dev/null
+++ b/xlators/features/index/src/index.c
@@ -0,0 +1,1489 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "index.h"
+#include "options.h"
+#include "glusterfs3-xdr.h"
+#include "syncop.h"
+
+#define XATTROP_SUBDIR "xattrop"
+#define BASE_INDICES_HOLDER_SUBDIR "base_indices_holder"
+
+call_stub_t *
+__index_dequeue (struct list_head *callstubs)
+{
+ call_stub_t *stub = NULL;
+
+ if (!list_empty (callstubs)) {
+ stub = list_entry (callstubs->next, call_stub_t, list);
+ list_del_init (&stub->list);
+ }
+
+ return stub;
+}
+
+inline static void
+__index_enqueue (struct list_head *callstubs, call_stub_t *stub)
+{
+ list_add_tail (&stub->list, callstubs);
+}
+
+static void
+worker_enqueue (xlator_t *this, call_stub_t *stub)
+{
+ index_priv_t *priv = NULL;
+
+ priv = this->private;
+ pthread_mutex_lock (&priv->mutex);
+ {
+ __index_enqueue (&priv->callstubs, stub);
+ pthread_cond_signal (&priv->cond);
+ }
+ pthread_mutex_unlock (&priv->mutex);
+}
+
+void *
+index_worker (void *data)
+{
+ index_priv_t *priv = NULL;
+ xlator_t *this = NULL;
+ call_stub_t *stub = NULL;
+ int ret = 0;
+
+ THIS = data;
+ this = data;
+ priv = this->private;
+
+ for (;;) {
+ pthread_mutex_lock (&priv->mutex);
+ {
+ while (list_empty (&priv->callstubs)) {
+ ret = pthread_cond_wait (&priv->cond,
+ &priv->mutex);
+ }
+
+ stub = __index_dequeue (&priv->callstubs);
+ }
+ pthread_mutex_unlock (&priv->mutex);
+
+ if (stub) /* guard against spurious wakeups */
+ call_resume (stub);
+ }
+
+ return NULL;
+}
+int
+__index_inode_ctx_get (inode_t *inode, xlator_t *this, index_inode_ctx_t **ctx)
+{
+ int ret = 0;
+ index_inode_ctx_t *ictx = NULL;
+ uint64_t tmpctx = 0;
+
+ ret = __inode_ctx_get (inode, this, &tmpctx);
+ if (!ret) {
+ ictx = (index_inode_ctx_t*) (long) tmpctx;
+ goto out;
+ }
+ ictx = GF_CALLOC (1, sizeof (*ictx), gf_index_inode_ctx_t);
+ if (!ictx) {
+ ret = -1;
+ goto out;
+ }
+
+ INIT_LIST_HEAD (&ictx->callstubs);
+ ret = __inode_ctx_put (inode, this, (uint64_t)ictx);
+ if (ret) {
+ GF_FREE (ictx);
+ ictx = NULL;
+ goto out;
+ }
+out:
+ if (ictx)
+ *ctx = ictx;
+ return ret;
+}
+
+int
+index_inode_ctx_get (inode_t *inode, xlator_t *this, index_inode_ctx_t **ctx)
+{
+ int ret = 0;
+
+ LOCK (&inode->lock);
+ {
+ ret = __index_inode_ctx_get (inode, this, ctx);
+ }
+ UNLOCK (&inode->lock);
+
+ return ret;
+}
+
+static void
+make_index_dir_path (char *base, const char *subdir,
+ char *index_dir, size_t len)
+{
+ snprintf (index_dir, len, "%s/%s", base, subdir);
+}
+
+int
+index_dir_create (xlator_t *this, const char *subdir)
+{
+ int ret = 0;
+ struct stat st = {0};
+ char fullpath[PATH_MAX] = {0};
+ char path[PATH_MAX] = {0};
+ char *dir = NULL;
+ index_priv_t *priv = NULL;
+ size_t len = 0;
+ size_t pathlen = 0;
+
+ priv = this->private;
+ make_index_dir_path (priv->index_basepath, subdir, fullpath,
+ sizeof (fullpath));
+ ret = stat (fullpath, &st);
+ if (!ret) {
+ if (!S_ISDIR (st.st_mode))
+ ret = -2;
+ goto out;
+ }
+
+ pathlen = strlen (fullpath);
+ if ((pathlen > 1) && fullpath[pathlen - 1] == '/')
+ fullpath[pathlen - 1] = '\0';
+ dir = strchr (fullpath, '/');
+ while (dir) {
+ dir = strchr (dir + 1, '/');
+ if (dir)
+ len = pathlen - strlen (dir);
+ else
+ len = pathlen;
+ strncpy (path, fullpath, len);
+ path[len] = '\0';
+ ret = mkdir (path, 0600);
+ if (ret && (errno != EEXIST))
+ goto out;
+ }
+ ret = 0;
+out:
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "%s/%s: Failed to "
+ "create (%s)", priv->index_basepath, subdir,
+ strerror (errno));
+ } else if (ret == -2) {
+ gf_log (this->name, GF_LOG_ERROR, "%s/%s: Failed to create, "
+ "path exists, not a directory ", priv->index_basepath,
+ subdir);
+ }
+ return ret;
+}
+
+void
+index_get_index (index_priv_t *priv, uuid_t index)
+{
+ LOCK (&priv->lock);
+ {
+ uuid_copy (index, priv->index);
+ }
+ UNLOCK (&priv->lock);
+}
+
+void
+index_generate_index (index_priv_t *priv, uuid_t index)
+{
+ LOCK (&priv->lock);
+ {
+ //To prevent duplicate generates.
+ //This method fails if number of contending threads is greater
+ //than MAX_LINK count of the fs
+ if (!uuid_compare (priv->index, index))
+ uuid_generate (priv->index);
+ uuid_copy (index, priv->index);
+ }
+ UNLOCK (&priv->lock);
+}
+
+static void
+make_index_path (char *base, const char *subdir, uuid_t index,
+ char *index_path, size_t len)
+{
+ make_index_dir_path (base, subdir, index_path, len);
+ snprintf (index_path + strlen (index_path), len - strlen (index_path),
+ "/%s-%s", subdir, uuid_utoa (index));
+}
+
+static void
+make_gfid_path (char *base, const char *subdir, uuid_t gfid,
+ char *gfid_path, size_t len)
+{
+ make_index_dir_path (base, subdir, gfid_path, len);
+ snprintf (gfid_path + strlen (gfid_path), len - strlen (gfid_path),
+ "/%s", uuid_utoa (gfid));
+}
+
+static void
+make_file_path (char *base, const char *subdir, const char *filename,
+ char *file_path, size_t len)
+{
+ make_index_dir_path (base, subdir, file_path, len);
+ snprintf (file_path + strlen (file_path), len - strlen (file_path),
+ "/%s", filename);
+}
+
+static void
+check_delete_stale_index_file (xlator_t *this, char *filename)
+{
+ int ret = 0;
+ struct stat st = {0};
+ struct stat base_index_st = {0};
+ char filepath[PATH_MAX] = {0};
+ char filepath_under_base_indices_holder[PATH_MAX] = {0};
+ index_priv_t *priv = NULL;
+
+ priv = this->private;
+ if (priv->to_be_healed_states != synced_state)
+ return;
+
+ make_file_path (priv->index_basepath, XATTROP_SUBDIR,
+ filename, filepath, sizeof (filepath));
+
+ make_file_path (priv->index_basepath, BASE_INDICES_HOLDER_SUBDIR,
+ filename, filepath_under_base_indices_holder,
+ sizeof (filepath_under_base_indices_holder));
+
+
+ ret = stat (filepath_under_base_indices_holder, &base_index_st);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Base index is not created"
+ "under index/base_indices_holder");
+ return;
+ }
+
+ ret = stat (filepath, &st);
+ if (!ret && st.st_nlink == 2) {
+ unlink (filepath);
+ unlink (filepath_under_base_indices_holder);
+ }
+}
+
+static int
+index_fill_readdir (fd_t *fd, DIR *dir, off_t off,
+ size_t size, gf_dirent_t *entries, readdir_directory type)
+{
+ off_t in_case = -1;
+ size_t filled = 0;
+ int count = 0;
+ char entrybuf[sizeof(struct dirent) + 256 + 8];
+ struct dirent *entry = NULL;
+ int32_t this_size = -1;
+ gf_dirent_t *this_entry = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ if (!off) {
+ rewinddir (dir);
+ } else {
+ seekdir (dir, off);
+ }
+
+ while (filled <= size) {
+ in_case = telldir (dir);
+
+ if (in_case == -1) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "telldir failed on dir=%p: %s",
+ dir, strerror (errno));
+ goto out;
+ }
+
+ errno = 0;
+ entry = NULL;
+ readdir_r (dir, (struct dirent *)entrybuf, &entry);
+
+ if (!entry) {
+ if (errno == EBADF) {
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "readdir failed on dir=%p: %s",
+ dir, strerror (errno));
+ goto out;
+ }
+ break;
+ }
+
+ if (!strncmp (entry->d_name, XATTROP_SUBDIR"-",
+ strlen (XATTROP_SUBDIR"-")) &&
+ (type == INDEX_XATTROP)) {
+ check_delete_stale_index_file (this, entry->d_name);
+ continue;
+ }
+
+ this_size = max (sizeof (gf_dirent_t),
+ sizeof (gfs3_dirplist))
+ + strlen (entry->d_name) + 1;
+
+ if (this_size + filled > size) {
+ seekdir (dir, in_case);
+ break;
+ }
+
+ this_entry = gf_dirent_for_name (entry->d_name);
+
+ if (!this_entry) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "could not create gf_dirent for entry %s: (%s)",
+ entry->d_name, strerror (errno));
+ goto out;
+ }
+ this_entry->d_off = telldir (dir);
+ this_entry->d_ino = entry->d_ino;
+
+ list_add_tail (&this_entry->list, &entries->list);
+
+ filled += this_size;
+ count ++;
+ }
+
+ if ((!readdir (dir) && (errno == 0)))
+ /* Indicate EOF */
+ errno = ENOENT;
+out:
+ return count;
+}
+
+int
+sync_base_indices (void *index_priv)
+{
+ index_priv_t *priv = NULL;
+ DIR *dir_base_holder = NULL;
+ DIR *xattrop_dir = NULL;
+ struct dirent *entry = NULL;
+ char base_indices_holder[PATH_MAX] = {0};
+ char xattrop_directory[PATH_MAX] = {0};
+ char base_index_path[PATH_MAX] = {0};
+ char xattrop_index_path[PATH_MAX] = {0};
+ int ret = 0;
+
+ priv = index_priv;
+
+ snprintf (base_indices_holder, PATH_MAX, "%s/%s", priv->index_basepath,
+ BASE_INDICES_HOLDER_SUBDIR);
+ snprintf (xattrop_directory, PATH_MAX, "%s/%s", priv->index_basepath,
+ XATTROP_SUBDIR);
+
+ if ((dir_base_holder = opendir(base_indices_holder)) == NULL) {
+ ret = -1;
+ goto out;
+ }
+ if ((xattrop_dir = opendir (xattrop_directory)) == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ priv->to_be_healed_states = sync_started;
+ while ((entry = readdir(xattrop_dir)) != NULL) {
+ if (!strcmp (entry->d_name, ".") ||
+ !strcmp (entry->d_name, "..")) {
+ continue;
+ }
+ if (strncmp (entry->d_name, XATTROP_SUBDIR"-",
+ strlen (XATTROP_SUBDIR"-"))) {
+ continue;
+ }
+ if (!strncmp (entry->d_name, XATTROP_SUBDIR"-",
+ strlen (XATTROP_SUBDIR"-"))) {
+
+ snprintf (xattrop_index_path, PATH_MAX, "%s/%s",
+ xattrop_directory, entry->d_name);
+
+ snprintf (base_index_path, PATH_MAX, "%s/%s",
+ base_indices_holder, entry->d_name);
+
+ ret = link (xattrop_index_path, base_index_path);
+ if (ret && errno != EEXIST)
+ goto out;
+
+ }
+ }
+ ret = closedir (xattrop_dir);
+ if (ret)
+ goto out;
+ ret = closedir (dir_base_holder);
+ if (ret)
+ goto out;
+
+ ret = 0;
+out:
+ return ret;
+
+}
+
+int
+base_indices_syncing_done (int ret, call_frame_t *frame, void *data)
+{
+ index_priv_t *priv = NULL;
+ priv = data;
+
+ if (!priv)
+ goto out;
+
+ if (ret) {
+ priv->to_be_healed_states = sync_not_started;
+ } else {
+ priv->to_be_healed_states = synced_state;
+ }
+
+ STACK_DESTROY (frame->root);
+
+out:
+ return 0;
+}
+
+int
+sync_base_indices_from_xattrop (xlator_t *this)
+{
+
+ index_priv_t *priv = NULL;
+ char base_indices_holder[PATH_MAX] = {0};
+ int ret = 0;
+ struct stat st = {0};
+ DIR *dir = NULL;
+ struct dirent *entry = NULL;
+ call_frame_t *frame = NULL;
+
+ priv = this->private;
+
+ if (priv->to_be_healed_states != sync_not_started) {
+ ret = -1;
+ goto out;
+ }
+
+ snprintf (base_indices_holder, PATH_MAX, "%s/%s", priv->index_basepath,
+ BASE_INDICES_HOLDER_SUBDIR);
+
+ ret = stat (base_indices_holder, &st);
+
+ if (ret && (errno != ENOENT)) {
+ goto out;
+ } else if (errno == ENOENT) {
+ ret = index_dir_create (this, BASE_INDICES_HOLDER_SUBDIR);
+ if (ret)
+ goto out;
+ } else {
+ if ((dir = opendir (base_indices_holder)) == NULL) {
+ ret = -1;
+ goto out;
+ }
+ while ((entry = readdir (dir)) != NULL) {
+ if (!strcmp (entry->d_name, ".") ||
+ !strcmp (entry->d_name,"..")) {
+ continue;
+ }
+ ret = unlink (entry->d_name);
+ if (ret)
+ goto out;
+ }
+ closedir (dir);
+ }
+
+ /*At this point of time we have index/base_indicies_holder directory
+ *is with no entries*/
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+ set_lk_owner_from_ptr (&frame->root->lk_owner, frame->root);
+
+ frame->root->pid = LOW_PRIO_PROC_PID;
+
+ ret = synctask_new (this->ctx->env, sync_base_indices,
+ base_indices_syncing_done,frame, priv);
+
+
+
+out:
+ return ret;
+
+}
+
+int
+index_add (xlator_t *this, uuid_t gfid, const char *subdir)
+{
+ int32_t op_errno = 0;
+ char gfid_path[PATH_MAX] = {0};
+ char index_path[PATH_MAX] = {0};
+ char base_path[PATH_MAX] = {0};
+ int ret = 0;
+ uuid_t index = {0};
+ index_priv_t *priv = NULL;
+ struct stat st = {0};
+ int fd = 0;
+ int index_created = 0;
+
+ priv = this->private;
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name, !uuid_is_null (gfid),
+ out, op_errno, EINVAL);
+
+ make_gfid_path (priv->index_basepath, subdir, gfid,
+ gfid_path, sizeof (gfid_path));
+
+ ret = stat (gfid_path, &st);
+ if (!ret)
+ goto out;
+ index_get_index (priv, index);
+ make_index_path (priv->index_basepath, subdir,
+ index, index_path, sizeof (index_path));
+ ret = link (index_path, gfid_path);
+ if (!ret || (errno == EEXIST)) {
+ ret = 0;
+ index_created = 1;
+ goto out;
+ }
+
+
+ op_errno = errno;
+ if (op_errno == ENOENT) {
+ ret = index_dir_create (this, subdir);
+ if (ret)
+ goto out;
+ } else if (op_errno == EMLINK) {
+ index_generate_index (priv, index);
+ make_index_path (priv->index_basepath, subdir,
+ index, index_path, sizeof (index_path));
+ } else {
+ goto out;
+ }
+
+ fd = creat (index_path, 0);
+ if ((fd < 0) && (errno != EEXIST)) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR, "%s: Not able to "
+ "create index (%s)", uuid_utoa (gfid),
+ strerror (errno));
+ goto out;
+ }
+
+ if (fd >= 0)
+ close (fd);
+
+ ret = link (index_path, gfid_path);
+ if (ret && (errno != EEXIST)) {
+ gf_log (this->name, GF_LOG_ERROR, "%s: Not able to "
+ "add to index (%s)", uuid_utoa (gfid),
+ strerror (errno));
+ goto out;
+ } else {
+ index_created = 1;
+ }
+
+ if (priv->to_be_healed_states != sync_not_started) {
+ make_index_path (priv->index_basepath,
+ GF_BASE_INDICES_HOLDER_GFID,
+ index, base_path, sizeof (base_path));
+ ret = link (index_path, base_path);
+ if (ret)
+ goto out;
+ }
+
+ ret = 0;
+out:
+ /*If base_indices_holder is not created: create and sync
+ *If directory is present: delete contents and start syncing
+ *If syncing is in progress :No need to do any thing
+ *If syncing is done: No need to do anything*/
+ if (!ret) {
+ switch (priv->to_be_healed_states) {
+ case sync_not_started:
+ ret = sync_base_indices_from_xattrop (this);
+ break;
+ case sync_started:
+ case synced_state:
+ /*No need to do anything*/
+ break;
+ }
+ }
+ return ret;
+}
+
+int
+index_del (xlator_t *this, uuid_t gfid, const char *subdir)
+{
+ int32_t op_errno __attribute__((unused)) = 0;
+ index_priv_t *priv = NULL;
+ int ret = 0;
+ char gfid_path[PATH_MAX] = {0};
+
+ priv = this->private;
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name, !uuid_is_null (gfid),
+ out, op_errno, EINVAL);
+ make_gfid_path (priv->index_basepath, subdir, gfid,
+ gfid_path, sizeof (gfid_path));
+ ret = unlink (gfid_path);
+ if (ret && (errno != ENOENT)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to delete from index (%s)",
+ gfid_path, strerror (errno));
+ ret = -errno;
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+_check_key_is_zero_filled (dict_t *d, char *k, data_t *v,
+ void *tmp)
+{
+ if (mem_0filled ((const char*)v->data, v->len)) {
+ /* -1 means, no more iterations, treat as 'break' */
+ return -1;
+ }
+ return 0;
+}
+
+
+void
+_xattrop_index_action (xlator_t *this, inode_t *inode, dict_t *xattr)
+{
+ gf_boolean_t zero_xattr = _gf_true;
+ index_inode_ctx_t *ctx = NULL;
+ int ret = 0;
+
+ ret = dict_foreach (xattr, _check_key_is_zero_filled, NULL);
+ if (ret == -1)
+ zero_xattr = _gf_false;
+
+ ret = index_inode_ctx_get (inode, this, &ctx);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Not able to %s %s -> index",
+ zero_xattr?"add":"del", uuid_utoa (inode->gfid));
+ goto out;
+ }
+ if (zero_xattr) {
+ if (ctx->state == NOTIN)
+ goto out;
+ ret = index_del (this, inode->gfid, XATTROP_SUBDIR);
+ if (!ret)
+ ctx->state = NOTIN;
+ } else {
+ if (ctx->state == IN)
+ goto out;
+ ret = index_add (this, inode->gfid, XATTROP_SUBDIR);
+ if (!ret)
+ ctx->state = IN;
+ }
+out:
+ return;
+}
+
+void
+fop_xattrop_index_action (xlator_t *this, inode_t *inode, dict_t *xattr)
+{
+ _xattrop_index_action (this, inode, xattr);
+}
+
+void
+fop_fxattrop_index_action (xlator_t *this, inode_t *inode, dict_t *xattr)
+{
+ _xattrop_index_action (this, inode, xattr);
+}
+
+inline gf_boolean_t
+index_xattrop_track (loc_t *loc, gf_xattrop_flags_t flags, dict_t *dict)
+{
+ return (flags == GF_XATTROP_ADD_ARRAY);
+}
+
+inline gf_boolean_t
+index_fxattrop_track (fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict)
+{
+ return (flags == GF_XATTROP_ADD_ARRAY);
+}
+
+int
+__index_fd_ctx_get (fd_t *fd, xlator_t *this, index_fd_ctx_t **ctx)
+{
+ int ret = 0;
+ index_fd_ctx_t *fctx = NULL;
+ uint64_t tmpctx = 0;
+ char index_dir[PATH_MAX] = {0};
+ index_priv_t *priv = NULL;
+
+ priv = this->private;
+ if (uuid_compare (fd->inode->gfid, priv->xattrop_vgfid)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = __fd_ctx_get (fd, this, &tmpctx);
+ if (!ret) {
+ fctx = (index_fd_ctx_t*) (long) tmpctx;
+ goto out;
+ }
+
+ fctx = GF_CALLOC (1, sizeof (*fctx), gf_index_fd_ctx_t);
+ if (!fctx) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ make_index_dir_path (priv->index_basepath, XATTROP_SUBDIR,
+ index_dir, sizeof (index_dir));
+ fctx->dir = opendir (index_dir);
+ if (!fctx->dir) {
+ ret = -errno;
+ GF_FREE (fctx);
+ fctx = NULL;
+ goto out;
+ }
+
+ ret = __fd_ctx_set (fd, this, (uint64_t)(long)fctx);
+ if (ret) {
+ GF_FREE (fctx);
+ fctx = NULL;
+ ret = -EINVAL;
+ goto out;
+ }
+out:
+ if (fctx)
+ *ctx = fctx;
+ return ret;
+}
+
+int
+index_fd_ctx_get (fd_t *fd, xlator_t *this, index_fd_ctx_t **ctx)
+{
+ int ret = 0;
+ LOCK (&fd->lock);
+ {
+ ret = __index_fd_ctx_get (fd, this, ctx);
+ }
+ UNLOCK (&fd->lock);
+ return ret;
+}
+
+//new - Not NULL means start a fop
+//new - NULL means done processing the fop
+void
+index_queue_process (xlator_t *this, inode_t *inode, call_stub_t *new)
+{
+ call_stub_t *stub = NULL;
+ index_inode_ctx_t *ctx = NULL;
+ int ret = 0;
+ call_frame_t *frame = NULL;
+
+ LOCK (&inode->lock);
+ {
+ ret = __index_inode_ctx_get (inode, this, &ctx);
+ if (ret)
+ goto unlock;
+
+ if (new) {
+ __index_enqueue (&ctx->callstubs, new);
+ new = NULL;
+ } else {
+ ctx->processing = _gf_false;
+ }
+
+ if (!ctx->processing) {
+ stub = __index_dequeue (&ctx->callstubs);
+ if (stub)
+ ctx->processing = _gf_true;
+ else
+ ctx->processing = _gf_false;
+ }
+ }
+unlock:
+ UNLOCK (&inode->lock);
+
+ if (ret && new) {
+ frame = new->frame;
+ if (new->fop == GF_FOP_XATTROP) {
+ INDEX_STACK_UNWIND (xattrop, frame, -1, ENOMEM,
+ NULL, NULL);
+ } else if (new->fop == GF_FOP_FXATTROP) {
+ INDEX_STACK_UNWIND (fxattrop, frame, -1, ENOMEM,
+ NULL, NULL);
+ }
+ call_stub_destroy (new);
+ } else if (stub) {
+ call_resume (stub);
+ }
+ return;
+}
+
+int32_t
+index_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr, dict_t *xdata)
+{
+ inode_t *inode = NULL;
+
+ inode = inode_ref (frame->local);
+ if (op_ret < 0)
+ goto out;
+ fop_xattrop_index_action (this, frame->local, xattr);
+out:
+ INDEX_STACK_UNWIND (xattrop, frame, op_ret, op_errno, xattr, xdata);
+ index_queue_process (this, inode, NULL);
+ inode_unref (inode);
+
+ return 0;
+}
+
+int32_t
+index_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr,
+ dict_t *xdata)
+{
+ inode_t *inode = NULL;
+
+ inode = inode_ref (frame->local);
+ if (op_ret < 0)
+ goto out;
+
+ fop_fxattrop_index_action (this, frame->local, xattr);
+out:
+ INDEX_STACK_UNWIND (fxattrop, frame, op_ret, op_errno, xattr, xdata);
+ index_queue_process (this, inode, NULL);
+ inode_unref (inode);
+
+ return 0;
+}
+
+int
+index_xattrop_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+{
+ STACK_WIND (frame, index_xattrop_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->xattrop, loc, optype, xattr,
+ xdata);
+ return 0;
+}
+
+int
+index_fxattrop_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+{
+ STACK_WIND (frame, index_fxattrop_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fxattrop, fd, optype, xattr,
+ xdata);
+ return 0;
+}
+
+int32_t
+index_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+
+ if (!index_xattrop_track (loc, flags, dict))
+ goto out;
+
+ frame->local = inode_ref (loc->inode);
+ stub = fop_xattrop_stub (frame, index_xattrop_wrapper,
+ loc, flags, dict, xdata);
+ if (!stub) {
+ INDEX_STACK_UNWIND (xattrop, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+ }
+
+ index_queue_process (this, loc->inode, stub);
+ return 0;
+out:
+ STACK_WIND (frame, default_xattrop_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->xattrop, loc, flags, dict, xdata);
+ return 0;
+}
+
+int32_t
+index_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+
+ if (!index_fxattrop_track (fd, flags, dict))
+ goto out;
+
+ frame->local = inode_ref (fd->inode);
+ stub = fop_fxattrop_stub (frame, index_fxattrop_wrapper,
+ fd, flags, dict, xdata);
+ if (!stub) {
+ INDEX_STACK_UNWIND (fxattrop, frame, -1, ENOMEM, NULL, xdata);
+ return 0;
+ }
+
+ index_queue_process (this, fd->inode, stub);
+ return 0;
+out:
+ STACK_WIND (frame, default_fxattrop_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fxattrop, fd, flags, dict, xdata);
+ return 0;
+}
+
+int32_t
+index_getxattr_wrapper (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata)
+{
+ index_priv_t *priv = NULL;
+ dict_t *xattr = NULL;
+ int ret = 0;
+
+ priv = this->private;
+
+ xattr = dict_new ();
+ if (!xattr) {
+ ret = -ENOMEM;
+ goto done;
+ }
+
+ if (!strcmp (name, GF_XATTROP_INDEX_GFID)) {
+
+ ret = dict_set_static_bin (xattr, (char*)name,
+ priv->xattrop_vgfid,
+ sizeof (priv->xattrop_vgfid));
+
+ } else if (!strcmp (name, GF_BASE_INDICES_HOLDER_GFID)) {
+
+ ret = dict_set_static_bin (xattr, (char*)name,
+ priv->base_indices_holder_vgfid,
+ sizeof (priv->base_indices_holder_vgfid));
+ }
+ if (ret) {
+ ret = -ENOMEM;
+ gf_log (THIS->name, GF_LOG_ERROR, "xattrop index "
+ "gfid set failed");
+ goto done;
+ }
+done:
+ if (ret)
+ STACK_UNWIND_STRICT (getxattr, frame, -1, -ret, xattr, xdata);
+ else
+ STACK_UNWIND_STRICT (getxattr, frame, 0, 0, xattr, xdata);
+
+ if (xattr)
+ dict_unref (xattr);
+
+ return 0;
+}
+
+int32_t
+index_lookup_wrapper (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *xattr_req)
+{
+ index_priv_t *priv = NULL;
+ struct stat lstatbuf = {0};
+ int ret = 0;
+ int32_t op_errno = EINVAL;
+ int32_t op_ret = -1;
+ char path[PATH_MAX] = {0};
+ struct iatt stbuf = {0, };
+ struct iatt postparent = {0,};
+ dict_t *xattr = NULL;
+ gf_boolean_t is_dir = _gf_false;
+
+ priv = this->private;
+
+ VALIDATE_OR_GOTO (loc, done);
+ if (!uuid_compare (loc->gfid, priv->xattrop_vgfid)) {
+ make_index_dir_path (priv->index_basepath, XATTROP_SUBDIR,
+ path, sizeof (path));
+ is_dir = _gf_true;
+ } else if (!uuid_compare (loc->pargfid, priv->xattrop_vgfid)) {
+ make_file_path (priv->index_basepath, XATTROP_SUBDIR,
+ loc->name, path, sizeof (path));
+ } else if (!uuid_compare (loc->gfid,priv->base_indices_holder_vgfid)){
+ make_index_dir_path (priv->index_basepath,
+ BASE_INDICES_HOLDER_SUBDIR, path,
+ sizeof (path));
+ is_dir = _gf_true;
+ } else if (!uuid_compare (loc->pargfid, priv->base_indices_holder_vgfid)) {
+ make_file_path (priv->index_basepath,
+ BASE_INDICES_HOLDER_SUBDIR,loc->name, path,
+ sizeof (path));
+ }
+
+ ret = lstat (path, &lstatbuf);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG, "Stat failed on index dir "
+ "(%s)", strerror (errno));
+ op_errno = errno;
+ goto done;
+ } else if (!S_ISDIR (lstatbuf.st_mode) && is_dir) {
+ gf_log (this->name, GF_LOG_DEBUG, "Stat failed on index dir, "
+ "not a directory");
+ op_errno = ENOENT;
+ goto done;
+ }
+ xattr = dict_new ();
+ if (!xattr) {
+ op_errno = ENOMEM;
+ goto done;
+ }
+
+ iatt_from_stat (&stbuf, &lstatbuf);
+ if (is_dir && !uuid_compare (loc->gfid, priv->xattrop_vgfid)) {
+ uuid_copy (stbuf.ia_gfid, priv->xattrop_vgfid);
+ } else if (is_dir &&
+ !uuid_compare (loc->gfid, priv->base_indices_holder_vgfid)) {
+ uuid_copy (stbuf.ia_gfid, priv->base_indices_holder_vgfid);
+ } else {
+ uuid_generate (stbuf.ia_gfid);
+ }
+ stbuf.ia_ino = -1;
+ op_ret = 0;
+done:
+ STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno,
+ loc->inode, &stbuf, xattr, &postparent);
+ if (xattr)
+ dict_unref (xattr);
+ return 0;
+}
+
+int32_t
+base_indices_readdir_wrapper (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t off, dict_t *xdata)
+{
+ index_priv_t *priv = NULL;
+ char base_indices_holder[PATH_MAX] = {0};
+ DIR *dir = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int count = 0;
+ gf_dirent_t entries;
+
+ priv = this->private;
+
+ make_index_dir_path (priv->index_basepath, BASE_INDICES_HOLDER_SUBDIR,
+ base_indices_holder, sizeof (base_indices_holder));
+
+ dir = opendir (base_indices_holder);
+ if (!dir) {
+ op_errno = EINVAL;
+ goto done;
+ }
+
+
+ INIT_LIST_HEAD (&entries.list);
+
+ count = index_fill_readdir (fd, dir, off, size, &entries,
+ BASE_INDICES_HOLDER);
+ /* pick ENOENT to indicate EOF */
+ op_errno = errno;
+ op_ret = count;
+ closedir (dir);
+done:
+ STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, &entries, xdata);
+ gf_dirent_free (&entries);
+ return 0;
+}
+
+int32_t
+index_readdir_wrapper (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t off, dict_t *xdata)
+{
+ index_fd_ctx_t *fctx = NULL;
+ DIR *dir = NULL;
+ int ret = -1;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int count = 0;
+ gf_dirent_t entries;
+
+ INIT_LIST_HEAD (&entries.list);
+
+ ret = index_fd_ctx_get (fd, this, &fctx);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "pfd is NULL, fd=%p", fd);
+ op_errno = -ret;
+ goto done;
+ }
+
+ dir = fctx->dir;
+
+ if (!dir) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dir is NULL for fd=%p", fd);
+ op_errno = EINVAL;
+ goto done;
+ }
+
+ count = index_fill_readdir (fd, dir, off, size, &entries,
+ INDEX_XATTROP);
+
+ /* pick ENOENT to indicate EOF */
+ op_errno = errno;
+ op_ret = count;
+done:
+ STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, &entries, xdata);
+ gf_dirent_free (&entries);
+ return 0;
+}
+
+int
+index_unlink_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc, int flag,
+ dict_t *xdata)
+{
+ index_priv_t *priv = NULL;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+ int ret = 0;
+ struct iatt preparent = {0};
+ struct iatt postparent = {0};
+ char index_dir[PATH_MAX] = {0};
+ struct stat lstatbuf = {0};
+ uuid_t gfid = {0};
+
+ priv = this->private;
+ make_index_dir_path (priv->index_basepath, XATTROP_SUBDIR,
+ index_dir, sizeof (index_dir));
+ ret = lstat (index_dir, &lstatbuf);
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = errno;
+ goto done;
+ }
+
+ iatt_from_stat (&preparent, &lstatbuf);
+ uuid_copy (preparent.ia_gfid, priv->xattrop_vgfid);
+ preparent.ia_ino = -1;
+ uuid_parse (loc->name, gfid);
+ ret = index_del (this, gfid, XATTROP_SUBDIR);
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = -ret;
+ goto done;
+ }
+ memset (&lstatbuf, 0, sizeof (lstatbuf));
+ ret = lstat (index_dir, &lstatbuf);
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = errno;
+ goto done;
+ }
+ iatt_from_stat (&postparent, &lstatbuf);
+ uuid_copy (postparent.ia_gfid, priv->xattrop_vgfid);
+ postparent.ia_ino = -1;
+done:
+ INDEX_STACK_UNWIND (unlink, frame, op_ret, op_errno, &preparent,
+ &postparent, xdata);
+ return 0;
+}
+
+int32_t
+index_getxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+
+ if (!name)
+ goto out;
+ if (strcmp (GF_XATTROP_INDEX_GFID, name) &&
+ strcmp (GF_BASE_INDICES_HOLDER_GFID, name))
+ goto out;
+
+ stub = fop_getxattr_stub (frame, index_getxattr_wrapper, loc, name,
+ xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (getxattr, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+ }
+ worker_enqueue (this, stub);
+ return 0;
+out:
+ STACK_WIND (frame, default_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
+ return 0;
+}
+
+int32_t
+index_lookup (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *xattr_req)
+{
+ call_stub_t *stub = NULL;
+ index_priv_t *priv = NULL;
+
+ priv = this->private;
+
+ if (uuid_compare (loc->gfid, priv->xattrop_vgfid) &&
+ uuid_compare (loc->pargfid, priv->xattrop_vgfid) &&
+ uuid_compare (loc->gfid, priv->base_indices_holder_vgfid) &&
+ uuid_compare (loc->pargfid, priv->base_indices_holder_vgfid))
+ goto normal;
+
+ stub = fop_lookup_stub (frame, index_lookup_wrapper, loc, xattr_req);
+ if (!stub) {
+ STACK_UNWIND_STRICT (lookup, frame, -1, ENOMEM, loc->inode,
+ NULL, NULL, NULL);
+ return 0;
+ }
+ worker_enqueue (this, stub);
+ return 0;
+normal:
+ STACK_WIND (frame, default_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
+
+ return 0;
+}
+
+int32_t
+index_readdir (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t off, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ index_priv_t *priv = NULL;
+
+ priv = this->private;
+ if (uuid_compare (fd->inode->gfid, priv->xattrop_vgfid) &&
+ uuid_compare (fd->inode->gfid, priv->base_indices_holder_vgfid))
+ goto out;
+
+ if (!uuid_compare (fd->inode->gfid, priv->xattrop_vgfid)) {
+ stub = fop_readdir_stub (frame, index_readdir_wrapper, fd, size,
+ off, xdata);
+ } else if (!uuid_compare (fd->inode->gfid,
+ priv->base_indices_holder_vgfid)) {
+ stub = fop_readdir_stub (frame, base_indices_readdir_wrapper,
+ fd, size, off, xdata);
+ }
+
+ if (!stub) {
+ STACK_UNWIND_STRICT (readdir, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+ }
+ worker_enqueue (this, stub);
+ return 0;
+out:
+ STACK_WIND (frame, default_readdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdir, fd, size, off, xdata);
+ return 0;
+}
+
+int
+index_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ index_priv_t *priv = NULL;
+
+ priv = this->private;
+ if (uuid_compare (loc->pargfid, priv->xattrop_vgfid))
+ goto out;
+
+ stub = fop_unlink_stub (frame, index_unlink_wrapper, loc, xflag, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (unlink, frame, -1, ENOMEM, NULL, NULL,
+ NULL);
+ return 0;
+ }
+ worker_enqueue (this, stub);
+ return 0;
+out:
+ STACK_WIND (frame, default_unlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
+ return 0;
+}
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ ret = xlator_mem_acct_init (this, gf_index_mt_end + 1);
+
+ return ret;
+}
+
+int
+init (xlator_t *this)
+{
+ int ret = -1;
+ index_priv_t *priv = NULL;
+ pthread_t thread;
+ pthread_attr_t w_attr;
+ gf_boolean_t mutex_inited = _gf_false;
+ gf_boolean_t cond_inited = _gf_false;
+ gf_boolean_t attr_inited = _gf_false;
+
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "'index' not configured with exactly one child");
+ goto out;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ priv = GF_CALLOC (1, sizeof (*priv), gf_index_mt_priv_t);
+ if (!priv)
+ goto out;
+
+ LOCK_INIT (&priv->lock);
+ if ((ret = pthread_cond_init(&priv->cond, NULL)) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "pthread_cond_init failed (%d)", ret);
+ goto out;
+ }
+ cond_inited = _gf_true;
+
+ if ((ret = pthread_mutex_init(&priv->mutex, NULL)) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "pthread_mutex_init failed (%d)", ret);
+ goto out;
+ }
+ mutex_inited = _gf_true;
+
+ if ((ret = pthread_attr_init (&w_attr)) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "pthread_attr_init failed (%d)", ret);
+ goto out;
+ }
+ attr_inited = _gf_true;
+
+ ret = pthread_attr_setstacksize (&w_attr, INDEX_THREAD_STACK_SIZE);
+ if (ret == EINVAL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Using default thread stack size");
+ }
+ GF_OPTION_INIT ("index-base", priv->index_basepath, path, out);
+ uuid_generate (priv->index);
+ uuid_generate (priv->xattrop_vgfid);
+ /*base_indices_holder is a directory which contains hard links to
+ * all base indices inside indices/xattrop directory*/
+ uuid_generate (priv->base_indices_holder_vgfid);
+ INIT_LIST_HEAD (&priv->callstubs);
+
+ this->private = priv;
+ ret = gf_thread_create (&thread, &w_attr, index_worker, this);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "Failed to create "
+ "worker thread, aborting");
+ goto out;
+ }
+ ret = 0;
+out:
+ if (ret) {
+ if (cond_inited)
+ pthread_cond_destroy (&priv->cond);
+ if (mutex_inited)
+ pthread_mutex_destroy (&priv->mutex);
+ if (priv)
+ GF_FREE (priv);
+ this->private = NULL;
+ }
+ if (attr_inited)
+ pthread_attr_destroy (&w_attr);
+ return ret;
+}
+
+void
+fini (xlator_t *this)
+{
+ index_priv_t *priv = NULL;
+
+ priv = this->private;
+ if (!priv)
+ goto out;
+ this->private = NULL;
+ LOCK_DESTROY (&priv->lock);
+ pthread_cond_destroy (&priv->cond);
+ pthread_mutex_destroy (&priv->mutex);
+ GF_FREE (priv);
+out:
+ return;
+}
+
+int
+index_forget (xlator_t *this, inode_t *inode)
+{
+ uint64_t tmp_cache = 0;
+ if (!inode_ctx_del (inode, this, &tmp_cache))
+ GF_FREE ((index_inode_ctx_t*) (long)tmp_cache);
+
+ return 0;
+}
+
+int32_t
+index_releasedir (xlator_t *this, fd_t *fd)
+{
+ index_fd_ctx_t *fctx = NULL;
+ uint64_t ctx = 0;
+ int ret = 0;
+
+ ret = fd_ctx_del (fd, this, &ctx);
+ if (ret < 0)
+ goto out;
+
+ fctx = (index_fd_ctx_t*) (long) ctx;
+ if (fctx->dir)
+ closedir (fctx->dir);
+
+ GF_FREE (fctx);
+out:
+ return 0;
+}
+
+int32_t
+index_release (xlator_t *this, fd_t *fd)
+{
+ index_fd_ctx_t *fctx = NULL;
+ uint64_t ctx = 0;
+ int ret = 0;
+
+ ret = fd_ctx_del (fd, this, &ctx);
+ if (ret < 0)
+ goto out;
+
+ fctx = (index_fd_ctx_t*) (long) ctx;
+ GF_FREE (fctx);
+out:
+ return 0;
+}
+
+int
+notify (xlator_t *this, int event, void *data, ...)
+{
+ int ret = 0;
+ ret = default_notify (this, event, data);
+ return ret;
+}
+
+struct xlator_fops fops = {
+ .xattrop = index_xattrop,
+ .fxattrop = index_fxattrop,
+
+ //interface functions follow
+ .getxattr = index_getxattr,
+ .lookup = index_lookup,
+ .readdir = index_readdir,
+ .unlink = index_unlink
+};
+
+struct xlator_dumpops dumpops;
+
+struct xlator_cbks cbks = {
+ .forget = index_forget,
+ .release = index_release,
+ .releasedir = index_releasedir
+};
+
+struct volume_options options[] = {
+ { .key = {"index-base" },
+ .type = GF_OPTION_TYPE_PATH,
+ .description = "path where the index files need to be stored",
+ },
+ { .key = {NULL} },
+};
diff --git a/xlators/features/index/src/index.h b/xlators/features/index/src/index.h
new file mode 100644
index 000000000..d6dcb1c23
--- /dev/null
+++ b/xlators/features/index/src/index.h
@@ -0,0 +1,73 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __INDEX_H__
+#define __INDEX_H__
+
+#include "xlator.h"
+#include "call-stub.h"
+#include "defaults.h"
+#include "byte-order.h"
+#include "common-utils.h"
+#include "index-mem-types.h"
+
+#define INDEX_THREAD_STACK_SIZE ((size_t)(1024*1024))
+
+typedef enum {
+ UNKNOWN,
+ IN,
+ NOTIN
+} index_state_t;
+
+typedef struct index_inode_ctx {
+ gf_boolean_t processing;
+ struct list_head callstubs;
+ index_state_t state;
+} index_inode_ctx_t;
+
+typedef struct index_fd_ctx {
+ DIR *dir;
+} index_fd_ctx_t;
+
+typedef enum {
+ sync_not_started,
+ sync_started,
+ synced_state,
+} to_be_healed_states_t;
+
+typedef enum {
+ INDEX_XATTROP,
+ BASE_INDICES_HOLDER,
+} readdir_directory;
+
+typedef struct index_priv {
+ char *index_basepath;
+ uuid_t index;
+ gf_lock_t lock;
+ uuid_t xattrop_vgfid;//virtual gfid of the xattrop index dir
+ uuid_t base_indices_holder_vgfid; //virtual gfid of the
+ //to_be_healed_xattrop directory
+ struct list_head callstubs;
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+ to_be_healed_states_t to_be_healed_states;
+} index_priv_t;
+
+#define INDEX_STACK_UNWIND(fop, frame, params ...) \
+do { \
+ if (frame) { \
+ inode_t *_inode = frame->local; \
+ frame->local = NULL; \
+ inode_unref (_inode); \
+ } \
+ STACK_UNWIND_STRICT (fop, frame, params); \
+} while (0)
+
+#endif
diff --git a/xlators/features/locks/src/Makefile.am b/xlators/features/locks/src/Makefile.am
index ec4a953eb..0f79731b4 100644
--- a/xlators/features/locks/src/Makefile.am
+++ b/xlators/features/locks/src/Makefile.am
@@ -1,15 +1,18 @@
xlator_LTLIBRARIES = locks.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
-locks_la_LDFLAGS = -module -avoidversion
+locks_la_LDFLAGS = -module -avoid-version
-locks_la_SOURCES = common.c posix.c internal.c
-locks_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+locks_la_SOURCES = common.c posix.c entrylk.c inodelk.c reservelk.c \
+ clear.c
+locks_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = locks.h common.h
+noinst_HEADERS = locks.h common.h locks-mem-types.h clear.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -fno-strict-aliasing -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src $(GF_CFLAGS) -shared -nostartfiles
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+
+AM_CFLAGS = -Wall -fno-strict-aliasing $(GF_CFLAGS)
CLEANFILES =
@@ -17,4 +20,4 @@ uninstall-local:
rm -f $(DESTDIR)$(xlatordir)/posix-locks.so
install-data-hook:
- ln -sf locks.so $(DESTDIR)$(xlatordir)/posix-locks.so \ No newline at end of file
+ ln -sf locks.so $(DESTDIR)$(xlatordir)/posix-locks.so
diff --git a/xlators/features/locks/src/clear.c b/xlators/features/locks/src/clear.c
new file mode 100644
index 000000000..124b9ad0f
--- /dev/null
+++ b/xlators/features/locks/src/clear.c
@@ -0,0 +1,424 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <unistd.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <pthread.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "compat.h"
+#include "xlator.h"
+#include "inode.h"
+#include "logging.h"
+#include "common-utils.h"
+
+#include "locks.h"
+#include "common.h"
+#include "statedump.h"
+#include "clear.h"
+
+int
+clrlk_get_kind (char *kind)
+{
+ char *clrlk_kinds[CLRLK_KIND_MAX] = {"dummy", "blocked", "granted",
+ "all"};
+ int ret_kind = CLRLK_KIND_MAX;
+ int i = 0;
+
+ for (i = CLRLK_BLOCKED; i < CLRLK_KIND_MAX; i++) {
+ if (!strcmp (clrlk_kinds[i], kind)) {
+ ret_kind = i;
+ break;
+ }
+ }
+
+ return ret_kind;
+}
+
+int
+clrlk_get_type (char *type)
+{
+ char *clrlk_types[CLRLK_TYPE_MAX] = {"inode", "entry", "posix"};
+ int ret_type = CLRLK_TYPE_MAX;
+ int i = 0;
+
+ for (i = CLRLK_INODE; i < CLRLK_TYPE_MAX; i++) {
+ if (!strcmp (clrlk_types[i], type)) {
+ ret_type = i;
+ break;
+ }
+ }
+
+ return ret_type;
+}
+
+int
+clrlk_get_lock_range (char *range_str, struct gf_flock *ulock,
+ gf_boolean_t *chk_range)
+{
+ int ret = -1;
+
+ if (!chk_range)
+ goto out;
+
+ if (!range_str) {
+ ret = 0;
+ *chk_range = _gf_false;
+ goto out;
+ }
+
+ if (sscanf (range_str, "%hd,%"PRId64"-""%"PRId64, &ulock->l_whence,
+ &ulock->l_start, &ulock->l_len) != 3) {
+ goto out;
+ }
+
+ ret = 0;
+ *chk_range = _gf_true;
+out:
+ return ret;
+}
+
+int
+clrlk_parse_args (const char* cmd, clrlk_args *args)
+{
+ char *opts = NULL;
+ char *cur = NULL;
+ char *tok = NULL;
+ char *sptr = NULL;
+ char *free_ptr = NULL;
+ char kw[KW_MAX] = {[KW_TYPE] = 't',
+ [KW_KIND] = 'k',
+ };
+ int ret = -1;
+ int i = 0;
+
+ GF_ASSERT (cmd);
+ free_ptr = opts = GF_CALLOC (1, strlen (cmd), gf_common_mt_char);
+ if (!opts)
+ goto out;
+
+ if (sscanf (cmd, GF_XATTR_CLRLK_CMD".%s", opts) < 1) {
+ ret = -1;
+ goto out;
+ }
+
+ /*clr_lk_prefix.ttype.kkind.args, args - type specific*/
+ cur = opts;
+ for (i = 0; i < KW_MAX && (tok = strtok_r (cur, ".", &sptr));
+ cur = NULL, i++) {
+ if (tok[0] != kw[i]) {
+ ret = -1;
+ goto out;
+ }
+ if (i == KW_TYPE)
+ args->type = clrlk_get_type (tok+1);
+ if (i == KW_KIND)
+ args->kind = clrlk_get_kind (tok+1);
+ }
+
+ if ((args->type == CLRLK_TYPE_MAX) || (args->kind == CLRLK_KIND_MAX))
+ goto out;
+
+ /*optional args, neither range nor basename can 'legally' contain
+ * "/" in them*/
+ tok = strtok_r (NULL, "/", &sptr);
+ if (tok)
+ args->opts = gf_strdup (tok);
+
+ ret = 0;
+out:
+ GF_FREE (free_ptr);
+ return ret;
+}
+
+int
+clrlk_clear_posixlk (xlator_t *this, pl_inode_t *pl_inode, clrlk_args *args,
+ int *blkd, int *granted, int *op_errno)
+{
+ posix_lock_t *plock = NULL;
+ posix_lock_t *tmp = NULL;
+ struct gf_flock ulock = {0, };
+ int ret = -1;
+ int bcount = 0;
+ int gcount = 0;
+ gf_boolean_t chk_range = _gf_false;
+
+ if (clrlk_get_lock_range (args->opts, &ulock, &chk_range)) {
+ *op_errno = EINVAL;
+ goto out;
+ }
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry_safe (plock, tmp, &pl_inode->ext_list,
+ list) {
+ if ((plock->blocked &&
+ !(args->kind & CLRLK_BLOCKED)) ||
+ (!plock->blocked &&
+ !(args->kind & CLRLK_GRANTED)))
+ continue;
+
+ if (chk_range &&
+ (plock->user_flock.l_whence != ulock.l_whence
+ || plock->user_flock.l_start != ulock.l_start
+ || plock->user_flock.l_len != ulock.l_len))
+ continue;
+
+ list_del_init (&plock->list);
+ if (plock->blocked) {
+ bcount++;
+ pl_trace_out (this, plock->frame, NULL, NULL,
+ F_SETLKW, &plock->user_flock,
+ -1, EAGAIN, NULL);
+
+ STACK_UNWIND_STRICT (lk, plock->frame, -1, EAGAIN,
+ &plock->user_flock, NULL);
+
+ } else {
+ gcount++;
+ }
+ GF_FREE (plock);
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+ grant_blocked_locks (this, pl_inode);
+ ret = 0;
+out:
+ *blkd = bcount;
+ *granted = gcount;
+ return ret;
+}
+
+/* Returns 0 on success and -1 on failure */
+int
+clrlk_clear_inodelk (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom,
+ clrlk_args *args, int *blkd, int *granted, int *op_errno)
+{
+ pl_inode_lock_t *ilock = NULL;
+ pl_inode_lock_t *tmp = NULL;
+ struct gf_flock ulock = {0, };
+ int ret = -1;
+ int bcount = 0;
+ int gcount = 0;
+ gf_boolean_t chk_range = _gf_false;
+ struct list_head released;
+
+ INIT_LIST_HEAD (&released);
+ if (clrlk_get_lock_range (args->opts, &ulock, &chk_range)) {
+ *op_errno = EINVAL;
+ goto out;
+ }
+
+ if (args->kind & CLRLK_BLOCKED)
+ goto blkd;
+
+ if (args->kind & CLRLK_GRANTED)
+ goto granted;
+
+blkd:
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry_safe (ilock, tmp, &dom->blocked_inodelks,
+ blocked_locks) {
+ if (chk_range &&
+ (ilock->user_flock.l_whence != ulock.l_whence
+ || ilock->user_flock.l_start != ulock.l_start
+ || ilock->user_flock.l_len != ulock.l_len))
+ continue;
+
+ bcount++;
+ list_del_init (&ilock->blocked_locks);
+ list_add (&ilock->blocked_locks, &released);
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ list_for_each_entry_safe (ilock, tmp, &released, blocked_locks) {
+ list_del_init (&ilock->blocked_locks);
+ pl_trace_out (this, ilock->frame, NULL, NULL, F_SETLKW,
+ &ilock->user_flock, -1, EAGAIN,
+ ilock->volume);
+ STACK_UNWIND_STRICT (inodelk, ilock->frame, -1,
+ EAGAIN, NULL);
+ //No need to take lock as the locks are only in one list
+ __pl_inodelk_unref (ilock);
+ }
+
+ if (!(args->kind & CLRLK_GRANTED)) {
+ ret = 0;
+ goto out;
+ }
+
+granted:
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry_safe (ilock, tmp, &dom->inodelk_list,
+ list) {
+ if (chk_range &&
+ (ilock->user_flock.l_whence != ulock.l_whence
+ || ilock->user_flock.l_start != ulock.l_start
+ || ilock->user_flock.l_len != ulock.l_len))
+ continue;
+
+ gcount++;
+ list_del_init (&ilock->list);
+ list_add (&ilock->list, &released);
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ list_for_each_entry_safe (ilock, tmp, &released, list) {
+ list_del_init (&ilock->list);
+ //No need to take lock as the locks are only in one list
+ __pl_inodelk_unref (ilock);
+ }
+
+ ret = 0;
+out:
+ grant_blocked_inode_locks (this, pl_inode, dom);
+ *blkd = bcount;
+ *granted = gcount;
+ return ret;
+}
+
+/* Returns 0 on success and -1 on failure */
+int
+clrlk_clear_entrylk (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom,
+ clrlk_args *args, int *blkd, int *granted, int *op_errno)
+{
+ pl_entry_lock_t *elock = NULL;
+ pl_entry_lock_t *tmp = NULL;
+ int bcount = 0;
+ int gcount = 0;
+ int ret = -1;
+ struct list_head removed;
+ struct list_head released;
+
+ INIT_LIST_HEAD (&released);
+ if (args->kind & CLRLK_BLOCKED)
+ goto blkd;
+
+ if (args->kind & CLRLK_GRANTED)
+ goto granted;
+
+blkd:
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry_safe (elock, tmp, &dom->blocked_entrylks,
+ blocked_locks) {
+ if (args->opts) {
+ if (!elock->basename ||
+ strcmp (elock->basename, args->opts))
+ continue;
+ }
+
+ bcount++;
+
+ list_del_init (&elock->blocked_locks);
+ list_add_tail (&elock->blocked_locks, &released);
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ list_for_each_entry_safe (elock, tmp, &released, blocked_locks) {
+ list_del_init (&elock->blocked_locks);
+ entrylk_trace_out (this, elock->frame, elock->volume, NULL, NULL,
+ elock->basename, ENTRYLK_LOCK, elock->type,
+ -1, EAGAIN);
+ STACK_UNWIND_STRICT (entrylk, elock->frame, -1, EAGAIN, NULL);
+ GF_FREE ((char *) elock->basename);
+ GF_FREE (elock->connection_id);
+ GF_FREE (elock);
+ }
+
+ if (!(args->kind & CLRLK_GRANTED)) {
+ ret = 0;
+ goto out;
+ }
+
+granted:
+ INIT_LIST_HEAD (&removed);
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry_safe (elock, tmp, &dom->entrylk_list,
+ domain_list) {
+ if (args->opts) {
+ if (!elock->basename ||
+ strcmp (elock->basename, args->opts))
+ continue;
+ }
+
+ gcount++;
+ list_del_init (&elock->domain_list);
+ list_add_tail (&elock->domain_list, &removed);
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ list_for_each_entry_safe (elock, tmp, &removed, domain_list) {
+ grant_blocked_entry_locks (this, pl_inode, elock, dom);
+ }
+
+ ret = 0;
+out:
+ *blkd = bcount;
+ *granted = gcount;
+ return ret;
+}
+
+int
+clrlk_clear_lks_in_all_domains (xlator_t *this, pl_inode_t *pl_inode,
+ clrlk_args *args, int *blkd, int *granted,
+ int *op_errno)
+{
+ pl_dom_list_t *dom = NULL;
+ int ret = -1;
+ int tmp_bcount = 0;
+ int tmp_gcount = 0;
+
+ if (list_empty (&pl_inode->dom_list)) {
+ ret = 0;
+ goto out;
+ }
+
+ list_for_each_entry (dom, &pl_inode->dom_list, inode_list) {
+ tmp_bcount = tmp_gcount = 0;
+
+ switch (args->type)
+ {
+ case CLRLK_INODE:
+ ret = clrlk_clear_inodelk (this, pl_inode, dom, args,
+ &tmp_bcount, &tmp_gcount,
+ op_errno);
+ if (ret)
+ goto out;
+ break;
+ case CLRLK_ENTRY:
+ ret = clrlk_clear_entrylk (this, pl_inode, dom, args,
+ &tmp_bcount, &tmp_gcount,
+ op_errno);
+ if (ret)
+ goto out;
+ break;
+ }
+
+ *blkd += tmp_bcount;
+ *granted += tmp_gcount;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
diff --git a/xlators/features/locks/src/clear.h b/xlators/features/locks/src/clear.h
new file mode 100644
index 000000000..511f3f74a
--- /dev/null
+++ b/xlators/features/locks/src/clear.h
@@ -0,0 +1,76 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef __CLEAR_H__
+#define __CLEAR_H__
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "compat-errno.h"
+#include "stack.h"
+#include "call-stub.h"
+#include "locks.h"
+
+typedef enum {
+ CLRLK_INODE,
+ CLRLK_ENTRY,
+ CLRLK_POSIX,
+ CLRLK_TYPE_MAX
+} clrlk_type;
+
+typedef enum {
+ CLRLK_BLOCKED = 1,
+ CLRLK_GRANTED,
+ CLRLK_ALL,
+ CLRLK_KIND_MAX
+} clrlk_kind;
+
+typedef enum {
+ KW_TYPE,
+ KW_KIND,
+ /*add new keywords here*/
+ KW_MAX
+} clrlk_opts;
+
+struct _clrlk_args;
+typedef struct _clrlk_args clrlk_args;
+
+struct _clrlk_args {
+ int type;
+ int kind;
+ char *opts;
+};
+
+int
+clrlk_get__kind (char *kind);
+int
+clrlk_get_type (char *type);
+int
+clrlk_get_lock_range (char *range_str, struct gf_flock *ulock,
+ gf_boolean_t *chk_range);
+int
+clrlk_parse_args (const char* cmd, clrlk_args *args);
+
+int
+clrlk_clear_posixlk (xlator_t *this, pl_inode_t *pl_inode, clrlk_args *args,
+ int *blkd, int *granted, int *op_errno);
+int
+clrlk_clear_inodelk (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom,
+ clrlk_args *args, int *blkd, int *granted, int *op_errno);
+int
+clrlk_clear_entrylk (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom,
+ clrlk_args *args, int *blkd, int *granted, int *op_errno);
+int
+clrlk_clear_lks_in_all_domains (xlator_t *this, pl_inode_t *pl_inode,
+ clrlk_args *args, int *blkd, int *granted,
+ int *op_errno);
+#endif /* __CLEAR_H__ */
diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c
index 1f10aa20c..b3309580d 100644
--- a/xlators/features/locks/src/common.c
+++ b/xlators/features/locks/src/common.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <unistd.h>
#include <fcntl.h>
#include <limits.h>
@@ -35,83 +25,477 @@
#include "common-utils.h"
#include "locks.h"
+#include "common.h"
static int
-__is_lock_grantable (pl_inode_t *pl_inode, posix_lock_t *lock,
- gf_lk_domain_t dom);
+__is_lock_grantable (pl_inode_t *pl_inode, posix_lock_t *lock);
static void
-__insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock,
- gf_lk_domain_t dom);
+__insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock);
+static int
+pl_send_prelock_unlock (xlator_t *this, pl_inode_t *pl_inode,
+ posix_lock_t *old_lock);
+
+static pl_dom_list_t *
+__allocate_domain (const char *volume)
+{
+ pl_dom_list_t *dom = NULL;
-#define DOMAIN_HEAD(pl_inode, dom) (dom == GF_LOCK_POSIX \
- ? &pl_inode->ext_list \
- : &pl_inode->int_list)
+ dom = GF_CALLOC (1, sizeof (*dom),
+ gf_locks_mt_pl_dom_list_t);
+ if (!dom)
+ goto out;
-pl_inode_t *
-pl_inode_get (xlator_t *this, inode_t *inode)
+ dom->domain = gf_strdup(volume);
+ if (!dom->domain)
+ goto out;
+
+ gf_log ("posix-locks", GF_LOG_TRACE,
+ "New domain allocated: %s", dom->domain);
+
+ INIT_LIST_HEAD (&dom->inode_list);
+ INIT_LIST_HEAD (&dom->entrylk_list);
+ INIT_LIST_HEAD (&dom->blocked_entrylks);
+ INIT_LIST_HEAD (&dom->inodelk_list);
+ INIT_LIST_HEAD (&dom->blocked_inodelks);
+
+out:
+ if (dom && (NULL == dom->domain)) {
+ GF_FREE (dom);
+ dom = NULL;
+ }
+
+ return dom;
+}
+
+/* Returns domain for the lock. If domain is not present,
+ * allocates a domain and returns it
+ */
+pl_dom_list_t *
+get_domain (pl_inode_t *pl_inode, const char *volume)
{
- uint64_t tmp_pl_inode = 0;
- pl_inode_t *pl_inode = NULL;
- mode_t st_mode = 0;
- int ret = 0;
+ pl_dom_list_t *dom = NULL;
+
+ GF_VALIDATE_OR_GOTO ("posix-locks", pl_inode, out);
+ GF_VALIDATE_OR_GOTO ("posix-locks", volume, out);
- ret = inode_ctx_get (inode, this,&tmp_pl_inode);
- if (ret == 0) {
- pl_inode = (pl_inode_t *)(long)tmp_pl_inode;
- goto out;
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry (dom, &pl_inode->dom_list, inode_list) {
+ if (strcmp (dom->domain, volume) == 0)
+ goto unlock;
+ }
+
+ dom = __allocate_domain (volume);
+ if (dom)
+ list_add (&dom->inode_list, &pl_inode->dom_list);
+ }
+unlock:
+ pthread_mutex_unlock (&pl_inode->mutex);
+ if (dom) {
+ gf_log ("posix-locks", GF_LOG_TRACE, "Domain %s found", volume);
+ } else {
+ gf_log ("posix-locks", GF_LOG_TRACE, "Domain %s not found", volume);
}
- pl_inode = CALLOC (1, sizeof (*pl_inode));
- if (!pl_inode) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
+out:
+ return dom;
+}
- st_mode = inode->st_mode;
- if ((st_mode & S_ISGID) && !(st_mode & S_IXGRP))
- pl_inode->mandatory = 1;
+unsigned long
+fd_to_fdnum (fd_t *fd)
+{
+ return ((unsigned long) fd);
+}
+fd_t *
+fd_from_fdnum (posix_lock_t *lock)
+{
+ return ((fd_t *) lock->fd_num);
+}
- pthread_mutex_init (&pl_inode->mutex, NULL);
+int
+__pl_inode_is_empty (pl_inode_t *pl_inode)
+{
+ pl_dom_list_t *dom = NULL;
+ int is_empty = 1;
- INIT_LIST_HEAD (&pl_inode->dir_list);
- INIT_LIST_HEAD (&pl_inode->ext_list);
- INIT_LIST_HEAD (&pl_inode->int_list);
- INIT_LIST_HEAD (&pl_inode->rw_list);
+ if (!list_empty (&pl_inode->ext_list))
+ is_empty = 0;
- ret = inode_ctx_put (inode, this, (uint64_t)(long)(pl_inode));
+ list_for_each_entry (dom, &pl_inode->dom_list, inode_list) {
+ if (!list_empty (&dom->entrylk_list))
+ is_empty = 0;
-out:
- return pl_inode;
+ if (!list_empty (&dom->inodelk_list))
+ is_empty = 0;
+ }
+
+ return is_empty;
+}
+
+void
+pl_print_locker (char *str, int size, xlator_t *this, call_frame_t *frame)
+{
+ snprintf (str, size, "Pid=%llu, lk-owner=%s, Client=%p, Frame=%llu",
+ (unsigned long long) frame->root->pid,
+ lkowner_utoa (&frame->root->lk_owner),
+ frame->root->client,
+ (unsigned long long) frame->root->unique);
+}
+
+
+void
+pl_print_lockee (char *str, int size, fd_t *fd, loc_t *loc)
+{
+ inode_t *inode = NULL;
+ char *ipath = NULL;
+ int ret = 0;
+
+ if (fd)
+ inode = fd->inode;
+ if (loc)
+ inode = loc->inode;
+
+ if (!inode) {
+ snprintf (str, size, "<nul>");
+ return;
+ }
+
+ if (loc && loc->path) {
+ ipath = gf_strdup (loc->path);
+ } else {
+ ret = inode_path (inode, NULL, &ipath);
+ if (ret <= 0)
+ ipath = NULL;
+ }
+
+ snprintf (str, size, "gfid=%s, fd=%p, path=%s",
+ uuid_utoa (inode->gfid), fd,
+ ipath ? ipath : "<nul>");
+
+ GF_FREE (ipath);
+}
+
+
+void
+pl_print_lock (char *str, int size, int cmd,
+ struct gf_flock *flock, gf_lkowner_t *owner)
+{
+ char *cmd_str = NULL;
+ char *type_str = NULL;
+
+ switch (cmd) {
+#if F_GETLK != F_GETLK64
+ case F_GETLK64:
+#endif
+ case F_GETLK:
+ cmd_str = "GETLK";
+ break;
+
+#if F_SETLK != F_SETLK64
+ case F_SETLK64:
+#endif
+ case F_SETLK:
+ cmd_str = "SETLK";
+ break;
+
+#if F_SETLKW != F_SETLKW64
+ case F_SETLKW64:
+#endif
+ case F_SETLKW:
+ cmd_str = "SETLKW";
+ break;
+
+ default:
+ cmd_str = "UNKNOWN";
+ break;
+ }
+
+ switch (flock->l_type) {
+ case F_RDLCK:
+ type_str = "READ";
+ break;
+ case F_WRLCK:
+ type_str = "WRITE";
+ break;
+ case F_UNLCK:
+ type_str = "UNLOCK";
+ break;
+ default:
+ type_str = "UNKNOWN";
+ break;
+ }
+
+ snprintf (str, size, "lock=FCNTL, cmd=%s, type=%s, "
+ "start=%llu, len=%llu, pid=%llu, lk-owner=%s",
+ cmd_str, type_str, (unsigned long long) flock->l_start,
+ (unsigned long long) flock->l_len,
+ (unsigned long long) flock->l_pid,
+ lkowner_utoa (owner));
+}
+
+
+void
+pl_trace_in (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc,
+ int cmd, struct gf_flock *flock, const char *domain)
+{
+ posix_locks_private_t *priv = NULL;
+ char pl_locker[256];
+ char pl_lockee[256];
+ char pl_lock[256];
+
+ priv = this->private;
+
+ if (!priv->trace)
+ return;
+
+ pl_print_locker (pl_locker, 256, this, frame);
+ pl_print_lockee (pl_lockee, 256, fd, loc);
+ if (domain)
+ pl_print_inodelk (pl_lock, 256, cmd, flock, domain);
+ else
+ pl_print_lock (pl_lock, 256, cmd, flock, &frame->root->lk_owner);
+
+ gf_log (this->name, GF_LOG_INFO,
+ "[REQUEST] Locker = {%s} Lockee = {%s} Lock = {%s}",
+ pl_locker, pl_lockee, pl_lock);
+}
+
+
+void
+pl_print_verdict (char *str, int size, int op_ret, int op_errno)
+{
+ char *verdict = NULL;
+
+ if (op_ret == 0) {
+ verdict = "GRANTED";
+ } else {
+ switch (op_errno) {
+ case EAGAIN:
+ verdict = "TRYAGAIN";
+ break;
+ default:
+ verdict = strerror (op_errno);
+ }
+ }
+
+ snprintf (str, size, "%s", verdict);
+}
+
+
+void
+pl_trace_out (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc,
+ int cmd, struct gf_flock *flock, int op_ret, int op_errno, const char *domain)
+
+{
+ posix_locks_private_t *priv = NULL;
+ char pl_locker[256];
+ char pl_lockee[256];
+ char pl_lock[256];
+ char verdict[32];
+
+ priv = this->private;
+
+ if (!priv->trace)
+ return;
+
+ pl_print_locker (pl_locker, 256, this, frame);
+ pl_print_lockee (pl_lockee, 256, fd, loc);
+ if (domain)
+ pl_print_inodelk (pl_lock, 256, cmd, flock, domain);
+ else
+ pl_print_lock (pl_lock, 256, cmd, flock, &frame->root->lk_owner);
+
+ pl_print_verdict (verdict, 32, op_ret, op_errno);
+
+ gf_log (this->name, GF_LOG_INFO,
+ "[%s] Locker = {%s} Lockee = {%s} Lock = {%s}",
+ verdict, pl_locker, pl_lockee, pl_lock);
+}
+
+
+void
+pl_trace_block (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc,
+ int cmd, struct gf_flock *flock, const char *domain)
+
+{
+ posix_locks_private_t *priv = NULL;
+ char pl_locker[256];
+ char pl_lockee[256];
+ char pl_lock[256];
+
+ priv = this->private;
+
+ if (!priv->trace)
+ return;
+
+ pl_print_locker (pl_locker, 256, this, frame);
+ pl_print_lockee (pl_lockee, 256, fd, loc);
+ if (domain)
+ pl_print_inodelk (pl_lock, 256, cmd, flock, domain);
+ else
+ pl_print_lock (pl_lock, 256, cmd, flock, &frame->root->lk_owner);
+
+ gf_log (this->name, GF_LOG_INFO,
+ "[BLOCKED] Locker = {%s} Lockee = {%s} Lock = {%s}",
+ pl_locker, pl_lockee, pl_lock);
+}
+
+
+void
+pl_trace_flush (xlator_t *this, call_frame_t *frame, fd_t *fd)
+{
+ posix_locks_private_t *priv = NULL;
+ char pl_locker[256];
+ char pl_lockee[256];
+ pl_inode_t *pl_inode = NULL;
+
+ priv = this->private;
+
+ if (!priv->trace)
+ return;
+
+ pl_inode = pl_inode_get (this, fd->inode);
+
+ if (pl_inode && __pl_inode_is_empty (pl_inode))
+ return;
+
+ pl_print_locker (pl_locker, 256, this, frame);
+ pl_print_lockee (pl_lockee, 256, fd, NULL);
+
+ gf_log (this->name, GF_LOG_INFO,
+ "[FLUSH] Locker = {%s} Lockee = {%s}",
+ pl_locker, pl_lockee);
+}
+
+void
+pl_trace_release (xlator_t *this, fd_t *fd)
+{
+ posix_locks_private_t *priv = NULL;
+ char pl_lockee[256];
+
+ priv = this->private;
+
+ if (!priv->trace)
+ return;
+
+ pl_print_lockee (pl_lockee, 256, fd, NULL);
+
+ gf_log (this->name, GF_LOG_INFO,
+ "[RELEASE] Lockee = {%s}", pl_lockee);
+}
+
+
+void
+pl_update_refkeeper (xlator_t *this, inode_t *inode)
+{
+ pl_inode_t *pl_inode = NULL;
+ int is_empty = 0;
+ int need_unref = 0;
+ int need_ref = 0;
+
+ pl_inode = pl_inode_get (this, inode);
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ is_empty = __pl_inode_is_empty (pl_inode);
+
+ if (is_empty && pl_inode->refkeeper) {
+ need_unref = 1;
+ pl_inode->refkeeper = NULL;
+ }
+
+ if (!is_empty && !pl_inode->refkeeper) {
+ need_ref = 1;
+ pl_inode->refkeeper = inode;
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ if (need_unref)
+ inode_unref (inode);
+
+ if (need_ref)
+ inode_ref (inode);
+}
+
+
+pl_inode_t *
+pl_inode_get (xlator_t *this, inode_t *inode)
+{
+ uint64_t tmp_pl_inode = 0;
+ pl_inode_t *pl_inode = NULL;
+ int ret = 0;
+
+ LOCK (&inode->lock);
+ {
+ ret = __inode_ctx_get (inode, this, &tmp_pl_inode);
+ if (ret == 0) {
+ pl_inode = (pl_inode_t *)(long)tmp_pl_inode;
+ goto unlock;
+ }
+ pl_inode = GF_CALLOC (1, sizeof (*pl_inode),
+ gf_locks_mt_pl_inode_t);
+ if (!pl_inode) {
+ goto unlock;
+ }
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "Allocating new pl inode");
+
+ pthread_mutex_init (&pl_inode->mutex, NULL);
+
+ INIT_LIST_HEAD (&pl_inode->dom_list);
+ INIT_LIST_HEAD (&pl_inode->ext_list);
+ INIT_LIST_HEAD (&pl_inode->rw_list);
+ INIT_LIST_HEAD (&pl_inode->reservelk_list);
+ INIT_LIST_HEAD (&pl_inode->blocked_reservelks);
+ INIT_LIST_HEAD (&pl_inode->blocked_calls);
+
+ __inode_ctx_put (inode, this, (uint64_t)(long)(pl_inode));
+ }
+unlock:
+ UNLOCK (&inode->lock);
+
+ return pl_inode;
}
/* Create a new posix_lock_t */
posix_lock_t *
-new_posix_lock (struct flock *flock, transport_t *transport, pid_t client_pid)
+new_posix_lock (struct gf_flock *flock, client_t *client, pid_t client_pid,
+ gf_lkowner_t *owner, fd_t *fd)
{
- posix_lock_t *lock = NULL;
+ posix_lock_t *lock = NULL;
- lock = CALLOC (1, sizeof (posix_lock_t));
- if (!lock) {
- return NULL;
- }
+ GF_VALIDATE_OR_GOTO ("posix-locks", flock, out);
+ GF_VALIDATE_OR_GOTO ("posix-locks", client, out);
+ GF_VALIDATE_OR_GOTO ("posix-locks", fd, out);
- lock->fl_start = flock->l_start;
- lock->fl_type = flock->l_type;
+ lock = GF_CALLOC (1, sizeof (posix_lock_t),
+ gf_locks_mt_posix_lock_t);
+ if (!lock) {
+ goto out;
+ }
- if (flock->l_len == 0)
- lock->fl_end = LLONG_MAX;
- else
- lock->fl_end = flock->l_start + flock->l_len - 1;
+ lock->fl_start = flock->l_start;
+ lock->fl_type = flock->l_type;
- lock->transport = transport;
- lock->client_pid = client_pid;
+ if (flock->l_len == 0)
+ lock->fl_end = LLONG_MAX;
+ else
+ lock->fl_end = flock->l_start + flock->l_len - 1;
- INIT_LIST_HEAD (&lock->list);
+ lock->client = client;
+ lock->fd_num = fd_to_fdnum (fd);
+ lock->fd = fd;
+ lock->client_pid = client_pid;
+ lock->owner = *owner;
- return lock;
+ INIT_LIST_HEAD (&lock->list);
+
+out:
+ return lock;
}
@@ -119,7 +503,7 @@ new_posix_lock (struct flock *flock, transport_t *transport, pid_t client_pid)
void
__delete_lock (pl_inode_t *pl_inode, posix_lock_t *lock)
{
- list_del_init (&lock->list);
+ list_del_init (&lock->list);
}
@@ -127,32 +511,37 @@ __delete_lock (pl_inode_t *pl_inode, posix_lock_t *lock)
void
__destroy_lock (posix_lock_t *lock)
{
- free (lock);
+ GF_FREE (lock);
}
-/* Convert a posix_lock to a struct flock */
+/* Convert a posix_lock to a struct gf_flock */
void
-posix_lock_to_flock (posix_lock_t *lock, struct flock *flock)
+posix_lock_to_flock (posix_lock_t *lock, struct gf_flock *flock)
{
- flock->l_pid = lock->client_pid;
- flock->l_type = lock->fl_type;
- flock->l_start = lock->fl_start;
-
- if (lock->fl_end == LLONG_MAX)
- flock->l_len = 0;
- else
- flock->l_len = lock->fl_end - lock->fl_start + 1;
+ flock->l_pid = lock->client_pid;
+ flock->l_type = lock->fl_type;
+ flock->l_start = lock->fl_start;
+ flock->l_owner = lock->owner;
+
+ if (lock->fl_end == LLONG_MAX)
+ flock->l_len = 0;
+ else
+ flock->l_len = lock->fl_end - lock->fl_start + 1;
}
-
/* Insert the lock into the inode's lock list */
static void
-__insert_lock (pl_inode_t *pl_inode, posix_lock_t *lock, gf_lk_domain_t dom)
+__insert_lock (pl_inode_t *pl_inode, posix_lock_t *lock)
{
- list_add_tail (&lock->list, DOMAIN_HEAD (pl_inode, dom));
+ if (lock->blocked)
+ gettimeofday (&lock->blkd_time, NULL);
+ else
+ gettimeofday (&lock->granted_time, NULL);
- return;
+ list_add_tail (&lock->list, &pl_inode->ext_list);
+
+ return;
}
@@ -160,14 +549,14 @@ __insert_lock (pl_inode_t *pl_inode, posix_lock_t *lock, gf_lk_domain_t dom)
int
locks_overlap (posix_lock_t *l1, posix_lock_t *l2)
{
- /*
- Note:
- FUSE always gives us absolute offsets, so no need to worry
- about SEEK_CUR or SEEK_END
- */
-
- return ((l1->fl_end >= l2->fl_start) &&
- (l2->fl_end >= l1->fl_start));
+ /*
+ Note:
+ FUSE always gives us absolute offsets, so no need to worry
+ about SEEK_CUR or SEEK_END
+ */
+
+ return ((l1->fl_end >= l2->fl_start) &&
+ (l2->fl_end >= l1->fl_start));
}
@@ -175,24 +564,26 @@ locks_overlap (posix_lock_t *l1, posix_lock_t *l2)
int
same_owner (posix_lock_t *l1, posix_lock_t *l2)
{
- return ((l1->client_pid == l2->client_pid) &&
- (l1->transport == l2->transport));
+
+ return (is_same_lkowner (&l1->owner, &l2->owner) &&
+ (l1->client == l2->client));
+
}
/* Delete all F_UNLCK locks */
void
-__delete_unlck_locks (pl_inode_t *pl_inode, gf_lk_domain_t dom)
+__delete_unlck_locks (pl_inode_t *pl_inode)
{
- posix_lock_t *l = NULL;
- posix_lock_t *tmp = NULL;
-
- list_for_each_entry_safe (l, tmp, DOMAIN_HEAD (pl_inode, dom), list) {
- if (l->fl_type == F_UNLCK) {
- __delete_lock (pl_inode, l);
- __destroy_lock (l);
- }
- }
+ posix_lock_t *l = NULL;
+ posix_lock_t *tmp = NULL;
+
+ list_for_each_entry_safe (l, tmp, &pl_inode->ext_list, list) {
+ if (l->fl_type == F_UNLCK) {
+ __delete_lock (pl_inode, l);
+ __destroy_lock (l);
+ }
+ }
}
@@ -200,130 +591,202 @@ __delete_unlck_locks (pl_inode_t *pl_inode, gf_lk_domain_t dom)
static posix_lock_t *
add_locks (posix_lock_t *l1, posix_lock_t *l2)
{
- posix_lock_t *sum = NULL;
+ posix_lock_t *sum = NULL;
- sum = CALLOC (1, sizeof (posix_lock_t));
- if (!sum)
- return NULL;
+ sum = GF_CALLOC (1, sizeof (posix_lock_t),
+ gf_locks_mt_posix_lock_t);
+ if (!sum)
+ return NULL;
- sum->fl_start = min (l1->fl_start, l2->fl_start);
- sum->fl_end = max (l1->fl_end, l2->fl_end);
+ sum->fl_start = min (l1->fl_start, l2->fl_start);
+ sum->fl_end = max (l1->fl_end, l2->fl_end);
- return sum;
+ return sum;
}
/* Subtract two locks */
struct _values {
- posix_lock_t *locks[3];
+ posix_lock_t *locks[3];
};
/* {big} must always be contained inside {small} */
static struct _values
subtract_locks (posix_lock_t *big, posix_lock_t *small)
{
- struct _values v = { .locks = {0, 0, 0} };
-
- if ((big->fl_start == small->fl_start) &&
- (big->fl_end == small->fl_end)) {
- /* both edges coincide with big */
- v.locks[0] = CALLOC (1, sizeof (posix_lock_t));
- ERR_ABORT (v.locks[0]);
- memcpy (v.locks[0], big, sizeof (posix_lock_t));
- v.locks[0]->fl_type = small->fl_type;
- }
- else if ((small->fl_start > big->fl_start) &&
- (small->fl_end < big->fl_end)) {
- /* both edges lie inside big */
- v.locks[0] = CALLOC (1, sizeof (posix_lock_t));
- ERR_ABORT (v.locks[0]);
- v.locks[1] = CALLOC (1, sizeof (posix_lock_t));
- ERR_ABORT (v.locks[1]);
- v.locks[2] = CALLOC (1, sizeof (posix_lock_t));
- ERR_ABORT (v.locks[2]);
-
- memcpy (v.locks[0], big, sizeof (posix_lock_t));
- v.locks[0]->fl_end = small->fl_start - 1;
-
- memcpy (v.locks[1], small, sizeof (posix_lock_t));
- memcpy (v.locks[2], big, sizeof (posix_lock_t));
- v.locks[2]->fl_start = small->fl_end + 1;
- }
- /* one edge coincides with big */
- else if (small->fl_start == big->fl_start) {
- v.locks[0] = CALLOC (1, sizeof (posix_lock_t));
- ERR_ABORT (v.locks[0]);
- v.locks[1] = CALLOC (1, sizeof (posix_lock_t));
- ERR_ABORT (v.locks[1]);
-
- memcpy (v.locks[0], big, sizeof (posix_lock_t));
- v.locks[0]->fl_start = small->fl_end + 1;
-
- memcpy (v.locks[1], small, sizeof (posix_lock_t));
- }
- else if (small->fl_end == big->fl_end) {
- v.locks[0] = CALLOC (1, sizeof (posix_lock_t));
- ERR_ABORT (v.locks[0]);
- v.locks[1] = CALLOC (1, sizeof (posix_lock_t));
- ERR_ABORT (v.locks[1]);
-
- memcpy (v.locks[0], big, sizeof (posix_lock_t));
- v.locks[0]->fl_end = small->fl_start - 1;
-
- memcpy (v.locks[1], small, sizeof (posix_lock_t));
- }
- else {
- gf_log ("posix-locks", GF_LOG_ERROR,
- "Unexpected case in subtract_locks. Please send "
- "a bug report to gluster-devel@nongnu.org");
- }
-
- return v;
-}
-
-/*
- Start searching from {begin}, and return the first lock that
- conflicts, NULL if no conflict
- If {begin} is NULL, then start from the beginning of the list
+
+ struct _values v = { .locks = {0, 0, 0} };
+
+ if ((big->fl_start == small->fl_start) &&
+ (big->fl_end == small->fl_end)) {
+ /* both edges coincide with big */
+ v.locks[0] = GF_CALLOC (1, sizeof (posix_lock_t),
+ gf_locks_mt_posix_lock_t);
+ if (!v.locks[0])
+ goto out;
+ memcpy (v.locks[0], big, sizeof (posix_lock_t));
+ v.locks[0]->fl_type = small->fl_type;
+ goto done;
+ }
+
+ if ((small->fl_start > big->fl_start) &&
+ (small->fl_end < big->fl_end)) {
+ /* both edges lie inside big */
+ v.locks[0] = GF_CALLOC (1, sizeof (posix_lock_t),
+ gf_locks_mt_posix_lock_t);
+ if (!v.locks[0])
+ goto out;
+
+ v.locks[1] = GF_CALLOC (1, sizeof (posix_lock_t),
+ gf_locks_mt_posix_lock_t);
+ if (!v.locks[1])
+ goto out;
+
+ v.locks[2] = GF_CALLOC (1, sizeof (posix_lock_t),
+ gf_locks_mt_posix_lock_t);
+ if (!v.locks[1])
+ goto out;
+
+ memcpy (v.locks[0], big, sizeof (posix_lock_t));
+ v.locks[0]->fl_end = small->fl_start - 1;
+
+ memcpy (v.locks[1], small, sizeof (posix_lock_t));
+
+ memcpy (v.locks[2], big, sizeof (posix_lock_t));
+ v.locks[2]->fl_start = small->fl_end + 1;
+ goto done;
+
+ }
+
+ /* one edge coincides with big */
+ if (small->fl_start == big->fl_start) {
+ v.locks[0] = GF_CALLOC (1, sizeof (posix_lock_t),
+ gf_locks_mt_posix_lock_t);
+ if (!v.locks[0])
+ goto out;
+
+ v.locks[1] = GF_CALLOC (1, sizeof (posix_lock_t),
+ gf_locks_mt_posix_lock_t);
+ if (!v.locks[1])
+ goto out;
+
+ memcpy (v.locks[0], big, sizeof (posix_lock_t));
+ v.locks[0]->fl_start = small->fl_end + 1;
+
+ memcpy (v.locks[1], small, sizeof (posix_lock_t));
+ goto done;
+ }
+
+ if (small->fl_end == big->fl_end) {
+ v.locks[0] = GF_CALLOC (1, sizeof (posix_lock_t),
+ gf_locks_mt_posix_lock_t);
+ if (!v.locks[0])
+ goto out;
+
+ v.locks[1] = GF_CALLOC (1, sizeof (posix_lock_t),
+ gf_locks_mt_posix_lock_t);
+ if (!v.locks[1])
+ goto out;
+
+ memcpy (v.locks[0], big, sizeof (posix_lock_t));
+ v.locks[0]->fl_end = small->fl_start - 1;
+
+ memcpy (v.locks[1], small, sizeof (posix_lock_t));
+ goto done;
+ }
+
+ GF_ASSERT (0);
+ gf_log ("posix-locks", GF_LOG_ERROR, "Unexpected case in subtract_locks");
+
+out:
+ if (v.locks[0]) {
+ GF_FREE (v.locks[0]);
+ v.locks[0] = NULL;
+ }
+ if (v.locks[1]) {
+ GF_FREE (v.locks[1]);
+ v.locks[1] = NULL;
+ }
+ if (v.locks[2]) {
+ GF_FREE (v.locks[2]);
+ v.locks[2] = NULL;
+ }
+
+done:
+ return v;
+}
+
+static posix_lock_t *
+first_conflicting_overlap (pl_inode_t *pl_inode, posix_lock_t *lock)
+{
+ posix_lock_t *l = NULL;
+ posix_lock_t *conf = NULL;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry (l, &pl_inode->ext_list, list) {
+ if (l->blocked)
+ continue;
+
+ if (locks_overlap (l, lock)) {
+ if (same_owner (l, lock))
+ continue;
+
+ if ((l->fl_type == F_WRLCK) ||
+ (lock->fl_type == F_WRLCK)) {
+ conf = l;
+ goto unlock;
+ }
+ }
+ }
+ }
+unlock:
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ return conf;
+}
+
+/*
+ Start searching from {begin}, and return the first lock that
+ conflicts, NULL if no conflict
+ If {begin} is NULL, then start from the beginning of the list
*/
static posix_lock_t *
-first_overlap (pl_inode_t *pl_inode, posix_lock_t *lock,
- gf_lk_domain_t dom)
+first_overlap (pl_inode_t *pl_inode, posix_lock_t *lock)
{
- posix_lock_t *l = NULL;
+ posix_lock_t *l = NULL;
- list_for_each_entry (l, DOMAIN_HEAD (pl_inode, dom), list) {
- if (l->blocked)
- continue;
+ list_for_each_entry (l, &pl_inode->ext_list, list) {
+ if (l->blocked)
+ continue;
- if (locks_overlap (l, lock))
- return l;
- }
+ if (locks_overlap (l, lock))
+ return l;
+ }
- return NULL;
+ return NULL;
}
/* Return true if lock is grantable */
static int
-__is_lock_grantable (pl_inode_t *pl_inode, posix_lock_t *lock,
- gf_lk_domain_t dom)
+__is_lock_grantable (pl_inode_t *pl_inode, posix_lock_t *lock)
{
- posix_lock_t *l = NULL;
- int ret = 1;
-
- list_for_each_entry (l, DOMAIN_HEAD (pl_inode, dom), list) {
- if (!l->blocked && locks_overlap (lock, l)) {
- if (((l->fl_type == F_WRLCK)
- || (lock->fl_type == F_WRLCK))
- && (lock->fl_type != F_UNLCK)
- && !same_owner (l, lock)) {
- ret = 0;
- break;
- }
- }
- }
- return ret;
+ posix_lock_t *l = NULL;
+ int ret = 1;
+
+ list_for_each_entry (l, &pl_inode->ext_list, list) {
+ if (!l->blocked && locks_overlap (lock, l)) {
+ if (((l->fl_type == F_WRLCK)
+ || (lock->fl_type == F_WRLCK))
+ && (lock->fl_type != F_UNLCK)
+ && !same_owner (l, lock)) {
+ ret = 0;
+ break;
+ }
+ }
+ }
+ return ret;
}
@@ -331,231 +794,428 @@ extern void do_blocked_rw (pl_inode_t *);
static void
-__insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock,
- gf_lk_domain_t dom)
+__insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock)
{
- posix_lock_t *conf = NULL;
- posix_lock_t *t = NULL;
- posix_lock_t *sum = NULL;
- int i = 0;
- struct _values v = { .locks = {0, 0, 0} };
+ posix_lock_t *conf = NULL;
+ posix_lock_t *t = NULL;
+ posix_lock_t *sum = NULL;
+ int i = 0;
+ struct _values v = { .locks = {0, 0, 0} };
+
+ list_for_each_entry_safe (conf, t, &pl_inode->ext_list, list) {
+ if (conf->blocked)
+ continue;
+ if (!locks_overlap (conf, lock))
+ continue;
+
+ if (same_owner (conf, lock)) {
+ if (conf->fl_type == lock->fl_type) {
+ sum = add_locks (lock, conf);
+
+ sum->fl_type = lock->fl_type;
+ sum->client = lock->client;
+ sum->fd_num = lock->fd_num;
+ sum->client_pid = lock->client_pid;
+ sum->owner = lock->owner;
+
+ __delete_lock (pl_inode, conf);
+ __destroy_lock (conf);
+
+ __destroy_lock (lock);
+ INIT_LIST_HEAD (&sum->list);
+ posix_lock_to_flock (sum, &sum->user_flock);
+ __insert_and_merge (pl_inode, sum);
+
+ return;
+ } else {
+ sum = add_locks (lock, conf);
+
+ sum->fl_type = conf->fl_type;
+ sum->client = conf->client;
+ sum->fd_num = conf->fd_num;
+ sum->client_pid = conf->client_pid;
+ sum->owner = conf->owner;
+
+ v = subtract_locks (sum, lock);
+
+ __delete_lock (pl_inode, conf);
+ __destroy_lock (conf);
+
+ __delete_lock (pl_inode, lock);
+ __destroy_lock (lock);
+
+ __destroy_lock (sum);
+
+ for (i = 0; i < 3; i++) {
+ if (!v.locks[i])
+ continue;
+
+ INIT_LIST_HEAD (&v.locks[i]->list);
+ posix_lock_to_flock (v.locks[i],
+ &v.locks[i]->user_flock);
+ __insert_and_merge (pl_inode,
+ v.locks[i]);
+ }
+
+ __delete_unlck_locks (pl_inode);
+ return;
+ }
+ }
+
+ if (lock->fl_type == F_UNLCK) {
+ continue;
+ }
+
+ if ((conf->fl_type == F_RDLCK) && (lock->fl_type == F_RDLCK)) {
+ __insert_lock (pl_inode, lock);
+ return;
+ }
+ }
- list_for_each_entry_safe (conf, t, DOMAIN_HEAD (pl_inode, dom), list) {
- if (!locks_overlap (conf, lock))
- continue;
+ /* no conflicts, so just insert */
+ if (lock->fl_type != F_UNLCK) {
+ __insert_lock (pl_inode, lock);
+ } else {
+ __destroy_lock (lock);
+ }
+}
- if (same_owner (conf, lock)) {
- if (conf->fl_type == lock->fl_type) {
- sum = add_locks (lock, conf);
- sum->fl_type = lock->fl_type;
- sum->transport = lock->transport;
- sum->client_pid = lock->client_pid;
+void
+__grant_blocked_locks (xlator_t *this, pl_inode_t *pl_inode, struct list_head *granted)
+{
+ struct list_head tmp_list;
+ posix_lock_t *l = NULL;
+ posix_lock_t *tmp = NULL;
+ posix_lock_t *conf = NULL;
+
+ INIT_LIST_HEAD (&tmp_list);
+
+ list_for_each_entry_safe (l, tmp, &pl_inode->ext_list, list) {
+ if (l->blocked) {
+ conf = first_overlap (pl_inode, l);
+ if (conf)
+ continue;
+
+ l->blocked = 0;
+ list_move_tail (&l->list, &tmp_list);
+ }
+ }
- __delete_lock (pl_inode, conf);
- __destroy_lock (conf);
+ list_for_each_entry_safe (l, tmp, &tmp_list, list) {
+ list_del_init (&l->list);
- __destroy_lock (lock);
- __insert_and_merge (pl_inode, sum, dom);
+ if (__is_lock_grantable (pl_inode, l)) {
+ conf = GF_CALLOC (1, sizeof (*conf),
+ gf_locks_mt_posix_lock_t);
- return;
- } else {
- sum = add_locks (lock, conf);
+ if (!conf) {
+ l->blocked = 1;
+ __insert_lock (pl_inode, l);
+ continue;
+ }
- sum->fl_type = conf->fl_type;
- sum->transport = conf->transport;
- sum->client_pid = conf->client_pid;
+ conf->frame = l->frame;
+ l->frame = NULL;
- v = subtract_locks (sum, lock);
-
- __delete_lock (pl_inode, conf);
- __destroy_lock (conf);
+ posix_lock_to_flock (l, &conf->user_flock);
- __delete_lock (pl_inode, lock);
- __destroy_lock (lock);
+ gf_log (this->name, GF_LOG_TRACE,
+ "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => Granted",
+ l->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ l->client_pid, lkowner_utoa (&l->owner),
+ l->user_flock.l_start,
+ l->user_flock.l_len);
- __destroy_lock (sum);
+ __insert_and_merge (pl_inode, l);
- for (i = 0; i < 3; i++) {
- if (!v.locks[i])
- continue;
+ list_add (&conf->list, granted);
+ } else {
+ l->blocked = 1;
+ __insert_lock (pl_inode, l);
+ }
+ }
+}
- if (v.locks[i]->fl_type == F_UNLCK) {
- __destroy_lock (v.locks[i]);
- continue;
- }
- __insert_and_merge (pl_inode,
- v.locks[i], dom);
- }
- __delete_unlck_locks (pl_inode, dom);
- return;
- }
- }
+void
+grant_blocked_locks (xlator_t *this, pl_inode_t *pl_inode)
+{
+ struct list_head granted_list;
+ posix_lock_t *tmp = NULL;
+ posix_lock_t *lock = NULL;
- if (lock->fl_type == F_UNLCK) {
- continue;
- }
+ INIT_LIST_HEAD (&granted_list);
- if ((conf->fl_type == F_RDLCK) && (lock->fl_type == F_RDLCK)) {
- __insert_lock (pl_inode, lock, dom);
- return;
- }
- }
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ __grant_blocked_locks (this, pl_inode, &granted_list);
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
- /* no conflicts, so just insert */
- if (lock->fl_type != F_UNLCK) {
- __insert_lock (pl_inode, lock, dom);
- } else {
- __destroy_lock (lock);
- }
-}
+ list_for_each_entry_safe (lock, tmp, &granted_list, list) {
+ list_del_init (&lock->list);
+ pl_trace_out (this, lock->frame, NULL, NULL, F_SETLKW,
+ &lock->user_flock, 0, 0, NULL);
-void
-__grant_blocked_locks (xlator_t *this, pl_inode_t *pl_inode,
- gf_lk_domain_t dom, struct list_head *granted)
+ STACK_UNWIND_STRICT (lk, lock->frame, 0, 0,
+ &lock->user_flock, NULL);
+
+ GF_FREE (lock);
+ }
+
+ return;
+}
+
+static int
+pl_send_prelock_unlock (xlator_t *this, pl_inode_t *pl_inode,
+ posix_lock_t *old_lock)
{
- struct list_head tmp_list;
- posix_lock_t *l = NULL;
- posix_lock_t *tmp = NULL;
- posix_lock_t *conf = NULL;
+ struct gf_flock flock = {0,};
+ posix_lock_t *unlock_lock = NULL;
- INIT_LIST_HEAD (&tmp_list);
+ struct list_head granted_list;
+ posix_lock_t *tmp = NULL;
+ posix_lock_t *lock = NULL;
- list_for_each_entry_safe (l, tmp, DOMAIN_HEAD (pl_inode, dom), list) {
- if (l->blocked) {
- conf = first_overlap (pl_inode, l, dom);
- if (conf)
- continue;
+ int ret = -1;
- l->blocked = 0;
- list_move_tail (&l->list, &tmp_list);
- }
- }
+ INIT_LIST_HEAD (&granted_list);
- list_for_each_entry_safe (l, tmp, &tmp_list, list) {
- list_del_init (&l->list);
+ flock.l_type = F_UNLCK;
+ flock.l_whence = old_lock->user_flock.l_whence;
+ flock.l_start = old_lock->user_flock.l_start;
+ flock.l_len = old_lock->user_flock.l_len;
- if (__is_lock_grantable (pl_inode, l, dom)) {
- conf = CALLOC (1, sizeof (*conf));
- if (!conf) {
- l->blocked = 1;
- __insert_lock (pl_inode, l, dom);
- continue;
- }
+ unlock_lock = new_posix_lock (&flock, old_lock->client,
+ old_lock->client_pid, &old_lock->owner,
+ old_lock->fd);
+ GF_VALIDATE_OR_GOTO (this->name, unlock_lock, out);
+ ret = 0;
- conf->frame = l->frame;
- l->frame = NULL;
+ __insert_and_merge (pl_inode, unlock_lock);
- posix_lock_to_flock (l, &conf->user_flock);
+ __grant_blocked_locks (this, pl_inode, &granted_list);
- gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) %"PRId64" - %"PRId64" => Granted",
- l->fl_type == F_UNLCK ? "Unlock" : "Lock",
- l->client_pid,
- l->user_flock.l_start,
- l->user_flock.l_len);
+ list_for_each_entry_safe (lock, tmp, &granted_list, list) {
+ list_del_init (&lock->list);
- __insert_and_merge (pl_inode, l, dom);
+ pl_trace_out (this, lock->frame, NULL, NULL, F_SETLKW,
+ &lock->user_flock, 0, 0, NULL);
- list_add (&conf->list, granted);
- } else {
- l->blocked = 1;
- __insert_lock (pl_inode, l, dom);
- }
- }
+ STACK_UNWIND_STRICT (lk, lock->frame, 0, 0,
+ &lock->user_flock, NULL);
+
+ GF_FREE (lock);
+ }
+
+out:
+ return ret;
}
+int
+pl_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
+ int can_block)
+{
+ int ret = 0;
+
+ errno = 0;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ /* Send unlock before the actual lock to
+ prevent lock upgrade / downgrade
+ problems only if:
+ - it is a blocking call
+ - it has other conflicting locks
+ */
+
+ if (can_block &&
+ !(__is_lock_grantable (pl_inode, lock))) {
+ ret = pl_send_prelock_unlock (this, pl_inode,
+ lock);
+ if (ret)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Could not send pre-lock "
+ "unlock");
+ }
+
+ if (__is_lock_grantable (pl_inode, lock)) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => OK",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid,
+ lkowner_utoa (&lock->owner),
+ lock->user_flock.l_start,
+ lock->user_flock.l_len);
+ __insert_and_merge (pl_inode, lock);
+ } else if (can_block) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => Blocked",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid,
+ lkowner_utoa (&lock->owner),
+ lock->user_flock.l_start,
+ lock->user_flock.l_len);
+ lock->blocked = 1;
+ __insert_lock (pl_inode, lock);
+ ret = -1;
+ } else {
+ gf_log (this->name, GF_LOG_TRACE,
+ "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => NOK",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid,
+ lkowner_utoa (&lock->owner),
+ lock->user_flock.l_start,
+ lock->user_flock.l_len);
+ errno = EAGAIN;
+ ret = -1;
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ grant_blocked_locks (this, pl_inode);
-void
-grant_blocked_locks (xlator_t *this, pl_inode_t *pl_inode, gf_lk_domain_t dom)
+ do_blocked_rw (pl_inode);
+
+ return ret;
+}
+
+
+posix_lock_t *
+pl_getlk (pl_inode_t *pl_inode, posix_lock_t *lock)
{
- struct list_head granted_list;
- posix_lock_t *tmp = NULL;
- posix_lock_t *lock = NULL;
+ posix_lock_t *conf = NULL;
- INIT_LIST_HEAD (&granted_list);
+ conf = first_conflicting_overlap (pl_inode, lock);
- pthread_mutex_lock (&pl_inode->mutex);
- {
- __grant_blocked_locks (this, pl_inode, dom, &granted_list);
- }
- pthread_mutex_unlock (&pl_inode->mutex);
+ if (conf == NULL) {
+ lock->fl_type = F_UNLCK;
+ return lock;
+ }
- list_for_each_entry_safe (lock, tmp, &granted_list, list) {
- list_del_init (&lock->list);
+ return conf;
+}
- STACK_UNWIND (lock->frame, 0, 0, &lock->user_flock);
- FREE (lock);
- }
+struct _lock_table *
+pl_lock_table_new (void)
+{
+ struct _lock_table *new = NULL;
- return;
+ new = GF_CALLOC (1, sizeof (struct _lock_table), gf_common_mt_lock_table);
+ if (new == NULL) {
+ goto out;
+ }
+ INIT_LIST_HEAD (&new->entrylk_lockers);
+ INIT_LIST_HEAD (&new->inodelk_lockers);
+ LOCK_INIT (&new->lock);
+out:
+ return new;
}
int
-pl_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
- int can_block, gf_lk_domain_t dom)
-{
- int ret = 0;
-
- errno = 0;
-
- pthread_mutex_lock (&pl_inode->mutex);
- {
- if (__is_lock_grantable (pl_inode, lock, dom)) {
- gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) %"PRId64" - %"PRId64" => OK",
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
- lock->client_pid,
- lock->user_flock.l_start,
- lock->user_flock.l_len);
- __insert_and_merge (pl_inode, lock, dom);
- } else if (can_block) {
- gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) %"PRId64" - %"PRId64" => Blocked",
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
- lock->client_pid,
- lock->user_flock.l_start,
- lock->user_flock.l_len);
- lock->blocked = 1;
- __insert_lock (pl_inode, lock, dom);
- ret = -1;
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) %"PRId64" - %"PRId64" => NOK",
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
- lock->client_pid,
- lock->user_flock.l_start,
- lock->user_flock.l_len);
- errno = EAGAIN;
- ret = -1;
- }
- }
- pthread_mutex_unlock (&pl_inode->mutex);
-
- grant_blocked_locks (this, pl_inode, dom);
-
- do_blocked_rw (pl_inode);
-
- return ret;
-}
+pl_add_locker (struct _lock_table *table, const char *volume,
+ loc_t *loc, fd_t *fd, pid_t pid, gf_lkowner_t *owner,
+ glusterfs_fop_t type)
+{
+ int32_t ret = -1;
+ struct _locker *new = NULL;
+ GF_VALIDATE_OR_GOTO ("lock-table", table, out);
+ GF_VALIDATE_OR_GOTO ("lock-table", volume, out);
-posix_lock_t *
-pl_getlk (pl_inode_t *pl_inode, posix_lock_t *lock, gf_lk_domain_t dom)
+ new = GF_CALLOC (1, sizeof (struct _locker), gf_common_mt_locker);
+ if (new == NULL) {
+ goto out;
+ }
+ INIT_LIST_HEAD (&new->lockers);
+
+ new->volume = gf_strdup (volume);
+
+ if (fd == NULL) {
+ loc_copy (&new->loc, loc);
+ } else {
+ new->fd = fd_ref (fd);
+ }
+
+ new->pid = pid;
+ new->owner = *owner;
+
+ LOCK (&table->lock);
+ {
+ if (type == GF_FOP_ENTRYLK)
+ list_add_tail (&new->lockers, &table->entrylk_lockers);
+ else
+ list_add_tail (&new->lockers, &table->inodelk_lockers);
+ }
+ UNLOCK (&table->lock);
+out:
+ return ret;
+}
+
+int
+pl_del_locker (struct _lock_table *table, const char *volume,
+ loc_t *loc, fd_t *fd, gf_lkowner_t *owner, glusterfs_fop_t type)
{
- posix_lock_t *conf = NULL;
+ struct _locker *locker = NULL;
+ struct _locker *tmp = NULL;
+ int32_t ret = -1;
+ struct list_head *head = NULL;
+ struct list_head del;
+
+ GF_VALIDATE_OR_GOTO ("lock-table", table, out);
+ GF_VALIDATE_OR_GOTO ("lock-table", volume, out);
+
+ INIT_LIST_HEAD (&del);
+
+ LOCK (&table->lock);
+ {
+ if (type == GF_FOP_ENTRYLK) {
+ head = &table->entrylk_lockers;
+ } else {
+ head = &table->inodelk_lockers;
+ }
+
+ list_for_each_entry_safe (locker, tmp, head, lockers) {
+ if (!is_same_lkowner (&locker->owner, owner) ||
+ strcmp (locker->volume, volume))
+ continue;
+
+ /*
+ * It is possible for inodelk lock to come on anon-fd
+ * and inodelk unlock to come on normal fd in case of
+ * client re-opens. So don't check for fds to be equal.
+ */
+ if (locker->fd && fd)
+ list_move_tail (&locker->lockers, &del);
+ else if (locker->loc.inode && loc &&
+ (locker->loc.inode == loc->inode))
+ list_move_tail (&locker->lockers, &del);
+ }
+ }
+ UNLOCK (&table->lock);
- conf = first_overlap (pl_inode, lock, dom);
+ tmp = NULL;
+ locker = NULL;
- if (conf == NULL) {
- lock->fl_type = F_UNLCK;
- return lock;
- }
+ list_for_each_entry_safe (locker, tmp, &del, lockers) {
+ list_del_init (&locker->lockers);
+ if (locker->fd)
+ fd_unref (locker->fd);
+ else
+ loc_wipe (&locker->loc);
+
+ GF_FREE (locker->volume);
+ GF_FREE (locker);
+ }
+
+ ret = 0;
+out:
+ return ret;
- return conf;
}
+
diff --git a/xlators/features/locks/src/common.h b/xlators/features/locks/src/common.h
index ee17b0087..db19ec978 100644
--- a/xlators/features/locks/src/common.h
+++ b/xlators/features/locks/src/common.h
@@ -1,43 +1,71 @@
/*
- Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef __COMMON_H__
#define __COMMON_H__
+#include "lkowner.h"
+/*dump locks format strings */
+#define RANGE_FMT "type=%s, whence=%hd, start=%llu, len=%llu"
+#define ENTRY_FMT "type=%s on basename=%s"
+#define DUMP_GEN_FMT "pid = %llu, owner=%s, client=%p"
+#define GRNTD_AT "granted at %s"
+#define BLKD_AT "blocked at %s"
+#define CONN_ID "connection-id=%s"
+#define DUMP_BLKD_FMT DUMP_GEN_FMT", "CONN_ID", "BLKD_AT
+#define DUMP_GRNTD_FMT DUMP_GEN_FMT", "CONN_ID", "GRNTD_AT
+#define DUMP_BLKD_GRNTD_FMT DUMP_GEN_FMT", "CONN_ID", "BLKD_AT", "GRNTD_AT
+
+#define ENTRY_BLKD_FMT ENTRY_FMT", "DUMP_BLKD_FMT
+#define ENTRY_GRNTD_FMT ENTRY_FMT", "DUMP_GRNTD_FMT
+#define ENTRY_BLKD_GRNTD_FMT ENTRY_FMT", "DUMP_BLKD_GRNTD_FMT
+
+#define RANGE_BLKD_FMT RANGE_FMT", "DUMP_BLKD_FMT
+#define RANGE_GRNTD_FMT RANGE_FMT", "DUMP_GRNTD_FMT
+#define RANGE_BLKD_GRNTD_FMT RANGE_FMT", "DUMP_BLKD_GRNTD_FMT
+
+#define SET_FLOCK_PID(flock, lock) ((flock)->l_pid = lock->client_pid)
+
+struct _locker {
+ struct list_head lockers;
+ char *volume;
+ loc_t loc;
+ fd_t *fd;
+ gf_lkowner_t owner;
+ pid_t pid;
+};
+
+struct _lock_table {
+ struct list_head inodelk_lockers;
+ struct list_head entrylk_lockers;
+ gf_lock_t lock;
+};
+
posix_lock_t *
-new_posix_lock (struct flock *flock, transport_t *transport, pid_t client_pid);
+new_posix_lock (struct gf_flock *flock, client_t *client, pid_t client_pid,
+ gf_lkowner_t *owner, fd_t *fd);
pl_inode_t *
pl_inode_get (xlator_t *this, inode_t *inode);
posix_lock_t *
-pl_getlk (pl_inode_t *inode, posix_lock_t *lock, gf_lk_domain_t domain);
+pl_getlk (pl_inode_t *inode, posix_lock_t *lock);
int
pl_setlk (xlator_t *this, pl_inode_t *inode, posix_lock_t *lock,
- int can_block, gf_lk_domain_t domain);
+ int can_block);
void
-grant_blocked_locks (xlator_t *this, pl_inode_t *inode, gf_lk_domain_t domain);
+grant_blocked_locks (xlator_t *this, pl_inode_t *inode);
void
-posix_lock_to_flock (posix_lock_t *lock, struct flock *flock);
+posix_lock_to_flock (posix_lock_t *lock, struct gf_flock *flock);
int
locks_overlap (posix_lock_t *l1, posix_lock_t *l2);
@@ -49,4 +77,111 @@ void __delete_lock (pl_inode_t *, posix_lock_t *);
void __destroy_lock (posix_lock_t *);
+pl_dom_list_t *
+get_domain (pl_inode_t *pl_inode, const char *volume);
+
+void
+grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode,
+ pl_dom_list_t *dom);
+
+void
+__delete_inode_lock (pl_inode_lock_t *lock);
+
+void
+__pl_inodelk_unref (pl_inode_lock_t *lock);
+
+void
+grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
+ pl_entry_lock_t *unlocked, pl_dom_list_t *dom);
+
+void pl_update_refkeeper (xlator_t *this, inode_t *inode);
+
+int32_t
+__get_inodelk_count (xlator_t *this, pl_inode_t *pl_inode, char *domname);
+int32_t
+get_inodelk_count (xlator_t *this, inode_t *inode, char *domname);
+
+int32_t
+__get_entrylk_count (xlator_t *this, pl_inode_t *pl_inode);
+int32_t
+get_entrylk_count (xlator_t *this, inode_t *inode);
+
+void pl_trace_in (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc,
+ int cmd, struct gf_flock *flock, const char *domain);
+
+void pl_trace_out (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc,
+ int cmd, struct gf_flock *flock, int op_ret, int op_errno, const char *domain);
+
+void pl_trace_block (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc,
+ int cmd, struct gf_flock *flock, const char *domain);
+
+void pl_trace_flush (xlator_t *this, call_frame_t *frame, fd_t *fd);
+
+void entrylk_trace_in (xlator_t *this, call_frame_t *frame, const char *volume,
+ fd_t *fd, loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type);
+
+void entrylk_trace_out (xlator_t *this, call_frame_t *frame, const char *volume,
+ fd_t *fd, loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type,
+ int op_ret, int op_errno);
+
+void entrylk_trace_block (xlator_t *this, call_frame_t *frame, const char *volume,
+ fd_t *fd, loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type);
+
+void
+pl_print_verdict (char *str, int size, int op_ret, int op_errno);
+
+void
+pl_print_lockee (char *str, int size, fd_t *fd, loc_t *loc);
+
+void
+pl_print_locker (char *str, int size, xlator_t *this, call_frame_t *frame);
+
+void
+pl_print_inodelk (char *str, int size, int cmd, struct gf_flock *flock, const char *domain);
+
+void
+pl_trace_release (xlator_t *this, fd_t *fd);
+
+unsigned long
+fd_to_fdnum (fd_t *fd);
+
+fd_t *
+fd_from_fdnum (posix_lock_t *lock);
+
+int
+pl_reserve_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
+ int can_block);
+int
+reservelks_equal (posix_lock_t *l1, posix_lock_t *l2);
+
+int
+pl_verify_reservelk (xlator_t *this, pl_inode_t *pl_inode,
+ posix_lock_t *lock, int can_block);
+int
+pl_reserve_unlock (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *reqlock);
+
+uint32_t
+check_entrylk_on_basename (xlator_t *this, inode_t *parent, char *basename);
+
+int32_t
+pl_add_locker (struct _lock_table *table, const char *volume,
+ loc_t *loc,
+ fd_t *fd,
+ pid_t pid,
+ gf_lkowner_t *owner,
+ glusterfs_fop_t type);
+
+int32_t
+pl_del_locker (struct _lock_table *table, const char *volume,
+ loc_t *loc,
+ fd_t *fd,
+ gf_lkowner_t *owner,
+ glusterfs_fop_t type);
+
+struct _lock_table *
+pl_lock_table_new (void);
+
#endif /* __COMMON_H__ */
diff --git a/xlators/features/locks/src/entrylk.c b/xlators/features/locks/src/entrylk.c
new file mode 100644
index 000000000..0785dc547
--- /dev/null
+++ b/xlators/features/locks/src/entrylk.c
@@ -0,0 +1,848 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "compat.h"
+#include "xlator.h"
+#include "inode.h"
+#include "logging.h"
+#include "common-utils.h"
+#include "list.h"
+
+#include "locks.h"
+#include "common.h"
+
+static pl_entry_lock_t *
+new_entrylk_lock (pl_inode_t *pinode, const char *basename, entrylk_type type,
+ client_t *client, pid_t client_pid, gf_lkowner_t *owner,
+ const char *volume)
+
+{
+ pl_entry_lock_t *newlock = NULL;
+
+ newlock = GF_CALLOC (1, sizeof (pl_entry_lock_t),
+ gf_locks_mt_pl_entry_lock_t);
+ if (!newlock) {
+ goto out;
+ }
+
+ newlock->basename = basename ? gf_strdup (basename) : NULL;
+ newlock->type = type;
+ newlock->trans = client;
+ newlock->volume = volume;
+ newlock->client_pid = client_pid;
+ newlock->owner = *owner;
+
+ INIT_LIST_HEAD (&newlock->domain_list);
+ INIT_LIST_HEAD (&newlock->blocked_locks);
+
+out:
+ return newlock;
+}
+
+
+/**
+ * all_names - does a basename represent all names?
+ * @basename: name to check
+ */
+
+#define all_names(basename) ((basename == NULL) ? 1 : 0)
+
+/**
+ * names_conflict - do two names conflict?
+ * @n1: name
+ * @n2: name
+ */
+
+static int
+names_conflict (const char *n1, const char *n2)
+{
+ return all_names (n1) || all_names (n2) || !strcmp (n1, n2);
+}
+
+
+static inline int
+__same_entrylk_owner (pl_entry_lock_t *l1, pl_entry_lock_t *l2)
+{
+
+ return (is_same_lkowner (&l1->owner, &l2->owner) &&
+ (l1->trans == l2->trans));
+}
+
+
+/**
+ * lock_grantable - is this lock grantable?
+ * @inode: inode in which to look
+ * @basename: name we're trying to lock
+ * @type: type of lock
+ */
+static pl_entry_lock_t *
+__lock_grantable (pl_dom_list_t *dom, const char *basename, entrylk_type type)
+{
+ pl_entry_lock_t *lock = NULL;
+
+ if (list_empty (&dom->entrylk_list))
+ return NULL;
+
+ list_for_each_entry (lock, &dom->entrylk_list, domain_list) {
+ if (names_conflict (lock->basename, basename))
+ return lock;
+ }
+
+ return NULL;
+}
+
+static pl_entry_lock_t *
+__blocked_lock_conflict (pl_dom_list_t *dom, const char *basename, entrylk_type type)
+{
+ pl_entry_lock_t *lock = NULL;
+
+ if (list_empty (&dom->blocked_entrylks))
+ return NULL;
+
+ list_for_each_entry (lock, &dom->blocked_entrylks, blocked_locks) {
+ if (names_conflict (lock->basename, basename))
+ return lock;
+ }
+
+ return NULL;
+}
+
+static int
+__owner_has_lock (pl_dom_list_t *dom, pl_entry_lock_t *newlock)
+{
+ pl_entry_lock_t *lock = NULL;
+
+ list_for_each_entry (lock, &dom->entrylk_list, domain_list) {
+ if (__same_entrylk_owner (lock, newlock))
+ return 1;
+ }
+
+ list_for_each_entry (lock, &dom->blocked_entrylks, blocked_locks) {
+ if (__same_entrylk_owner (lock, newlock))
+ return 1;
+ }
+
+ return 0;
+}
+
+static int
+names_equal (const char *n1, const char *n2)
+{
+ return (n1 == NULL && n2 == NULL) || (n1 && n2 && !strcmp (n1, n2));
+}
+
+void
+pl_print_entrylk (char *str, int size, entrylk_cmd cmd, entrylk_type type,
+ const char *basename, const char *domain)
+{
+ char *cmd_str = NULL;
+ char *type_str = NULL;
+
+ switch (cmd) {
+ case ENTRYLK_LOCK:
+ cmd_str = "LOCK";
+ break;
+
+ case ENTRYLK_LOCK_NB:
+ cmd_str = "LOCK_NB";
+ break;
+
+ case ENTRYLK_UNLOCK:
+ cmd_str = "UNLOCK";
+ break;
+
+ default:
+ cmd_str = "UNKNOWN";
+ break;
+ }
+
+ switch (type) {
+ case ENTRYLK_RDLCK:
+ type_str = "READ";
+ break;
+ case ENTRYLK_WRLCK:
+ type_str = "WRITE";
+ break;
+ default:
+ type_str = "UNKNOWN";
+ break;
+ }
+
+ snprintf (str, size, "lock=ENTRYLK, cmd=%s, type=%s, basename=%s, domain: %s",
+ cmd_str, type_str, basename, domain);
+}
+
+
+void
+entrylk_trace_in (xlator_t *this, call_frame_t *frame, const char *domain,
+ fd_t *fd, loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type)
+{
+ posix_locks_private_t *priv = NULL;
+ char pl_locker[256];
+ char pl_lockee[256];
+ char pl_entrylk[256];
+
+ priv = this->private;
+
+ if (!priv->trace)
+ return;
+
+ pl_print_locker (pl_locker, 256, this, frame);
+ pl_print_lockee (pl_lockee, 256, fd, loc);
+ pl_print_entrylk (pl_entrylk, 256, cmd, type, basename, domain);
+
+ gf_log (this->name, GF_LOG_INFO,
+ "[REQUEST] Locker = {%s} Lockee = {%s} Lock = {%s}",
+ pl_locker, pl_lockee, pl_entrylk);
+}
+
+
+void
+entrylk_trace_out (xlator_t *this, call_frame_t *frame, const char *domain,
+ fd_t *fd, loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, int op_ret, int op_errno)
+{
+ posix_locks_private_t *priv = NULL;
+ char pl_locker[256];
+ char pl_lockee[256];
+ char pl_entrylk[256];
+ char verdict[32];
+
+ priv = this->private;
+
+ if (!priv->trace)
+ return;
+
+ pl_print_locker (pl_locker, 256, this, frame);
+ pl_print_lockee (pl_lockee, 256, fd, loc);
+ pl_print_entrylk (pl_entrylk, 256, cmd, type, basename, domain);
+ pl_print_verdict (verdict, 32, op_ret, op_errno);
+
+ gf_log (this->name, GF_LOG_INFO,
+ "[%s] Locker = {%s} Lockee = {%s} Lock = {%s}",
+ verdict, pl_locker, pl_lockee, pl_entrylk);
+}
+
+
+void
+entrylk_trace_block (xlator_t *this, call_frame_t *frame, const char *volume,
+ fd_t *fd, loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type)
+
+{
+ posix_locks_private_t *priv = NULL;
+ char pl_locker[256];
+ char pl_lockee[256];
+ char pl_entrylk[256];
+
+ priv = this->private;
+
+ if (!priv->trace)
+ return;
+
+ pl_print_locker (pl_locker, 256, this, frame);
+ pl_print_lockee (pl_lockee, 256, fd, loc);
+ pl_print_entrylk (pl_entrylk, 256, cmd, type, basename, volume);
+
+ gf_log (this->name, GF_LOG_INFO,
+ "[BLOCKED] Locker = {%s} Lockee = {%s} Lock = {%s}",
+ pl_locker, pl_lockee, pl_entrylk);
+}
+
+/**
+ * __find_most_matching_lock - find the lock struct which most matches in order of:
+ * lock on the exact basename ||
+ * an all_names lock
+ *
+ *
+ * @inode: inode in which to look
+ * @basename: name to search for
+ */
+
+static pl_entry_lock_t *
+__find_most_matching_lock (pl_dom_list_t *dom, const char *basename)
+{
+ pl_entry_lock_t *lock;
+ pl_entry_lock_t *all = NULL;
+ pl_entry_lock_t *exact = NULL;
+
+ if (list_empty (&dom->entrylk_list))
+ return NULL;
+
+ list_for_each_entry (lock, &dom->entrylk_list, domain_list) {
+ if (all_names (lock->basename))
+ all = lock;
+ else if (names_equal (lock->basename, basename))
+ exact = lock;
+ }
+
+ return (exact ? exact : all);
+}
+
+/**
+ * __lock_name - lock a name in a directory
+ * @inode: inode for the directory in which to lock
+ * @basename: name of the entry to lock
+ * if null, lock the entire directory
+ *
+ * the entire directory being locked is represented as: a single
+ * pl_entry_lock_t present in the entrylk_locks list with its
+ * basename = NULL
+ */
+
+int
+__lock_name (pl_inode_t *pinode, const char *basename, entrylk_type type,
+ call_frame_t *frame, pl_dom_list_t *dom, xlator_t *this,
+ int nonblock, char *conn_id)
+{
+ pl_entry_lock_t *lock = NULL;
+ pl_entry_lock_t *conf = NULL;
+ int ret = -EINVAL;
+
+ lock = new_entrylk_lock (pinode, basename, type,
+ frame->root->client, frame->root->pid,
+ &frame->root->lk_owner, dom->domain);
+ if (!lock) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ lock->frame = frame;
+ lock->this = this;
+ lock->trans = frame->root->client;
+
+ if (conn_id) {
+ lock->connection_id = gf_strdup (conn_id);
+ }
+
+ conf = __lock_grantable (dom, basename, type);
+ if (conf) {
+ ret = -EAGAIN;
+ if (nonblock){
+ GF_FREE (lock->connection_id);
+ GF_FREE ((char *)lock->basename);
+ GF_FREE (lock);
+ goto out;
+
+ }
+
+ gettimeofday (&lock->blkd_time, NULL);
+ list_add_tail (&lock->blocked_locks, &dom->blocked_entrylks);
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "Blocking lock: {pinode=%p, basename=%s}",
+ pinode, basename);
+
+ goto out;
+ }
+
+ if ( __blocked_lock_conflict (dom, basename, type) && !(__owner_has_lock (dom, lock))) {
+ ret = -EAGAIN;
+ if (nonblock) {
+ GF_FREE (lock->connection_id);
+ GF_FREE ((char *) lock->basename);
+ GF_FREE (lock);
+ goto out;
+
+ }
+ lock->frame = frame;
+ lock->this = this;
+
+ gettimeofday (&lock->blkd_time, NULL);
+ list_add_tail (&lock->blocked_locks, &dom->blocked_entrylks);
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "Lock is grantable, but blocking to prevent starvation");
+ gf_log (this->name, GF_LOG_TRACE,
+ "Blocking lock: {pinode=%p, basename=%s}",
+ pinode, basename);
+
+ ret = -EAGAIN;
+ goto out;
+ }
+ switch (type) {
+
+ case ENTRYLK_WRLCK:
+ gettimeofday (&lock->granted_time, NULL);
+ list_add_tail (&lock->domain_list, &dom->entrylk_list);
+ break;
+
+ default:
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Invalid type for entrylk specified: %d", type);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/**
+ * __unlock_name - unlock a name in a directory
+ * @inode: inode for the directory to unlock in
+ * @basename: name of the entry to unlock
+ * if null, unlock the entire directory
+ */
+
+pl_entry_lock_t *
+__unlock_name (pl_dom_list_t *dom, const char *basename, entrylk_type type)
+{
+ pl_entry_lock_t *lock = NULL;
+ pl_entry_lock_t *ret_lock = NULL;
+
+ lock = __find_most_matching_lock (dom, basename);
+
+ if (!lock) {
+ gf_log ("locks", GF_LOG_DEBUG,
+ "unlock on %s (type=ENTRYLK_WRLCK) attempted but no matching lock found",
+ basename);
+ goto out;
+ }
+
+ if (names_equal (lock->basename, basename)
+ && lock->type == type) {
+
+ if (type == ENTRYLK_WRLCK) {
+ list_del_init (&lock->domain_list);
+ ret_lock = lock;
+ }
+ } else {
+ gf_log ("locks", GF_LOG_DEBUG,
+ "Unlock for a non-existing lock!");
+ goto out;
+ }
+
+out:
+ return ret_lock;
+}
+
+uint32_t
+check_entrylk_on_basename (xlator_t *this, inode_t *parent, char *basename)
+{
+ uint32_t entrylk = 0;
+ pl_inode_t *pinode = 0;
+ pl_dom_list_t *dom = NULL;
+ pl_entry_lock_t *conf = NULL;
+
+ pinode = pl_inode_get (this, parent);
+ if (!pinode)
+ goto out;
+ pthread_mutex_lock (&pinode->mutex);
+ {
+ list_for_each_entry (dom, &pinode->dom_list, inode_list) {
+ conf = __lock_grantable (dom, basename, ENTRYLK_WRLCK);
+ if (conf && conf->basename) {
+ entrylk = 1;
+ break;
+ }
+ }
+ }
+ pthread_mutex_unlock (&pinode->mutex);
+
+out:
+ return entrylk;
+}
+
+void
+__grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
+ pl_dom_list_t *dom, struct list_head *granted)
+{
+ int bl_ret = 0;
+ pl_entry_lock_t *bl = NULL;
+ pl_entry_lock_t *tmp = NULL;
+
+ struct list_head blocked_list;
+
+ INIT_LIST_HEAD (&blocked_list);
+ list_splice_init (&dom->blocked_entrylks, &blocked_list);
+
+ list_for_each_entry_safe (bl, tmp, &blocked_list,
+ blocked_locks) {
+
+ list_del_init (&bl->blocked_locks);
+
+
+ gf_log ("locks", GF_LOG_TRACE,
+ "Trying to unblock: {pinode=%p, basename=%s}",
+ pl_inode, bl->basename);
+
+ bl_ret = __lock_name (pl_inode, bl->basename, bl->type,
+ bl->frame, dom, bl->this, 0,
+ bl->connection_id);
+
+ if (bl_ret == 0) {
+ list_add (&bl->blocked_locks, granted);
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "should never happen");
+ GF_FREE (bl->connection_id);
+ GF_FREE ((char *)bl->basename);
+ GF_FREE (bl);
+ }
+ }
+ return;
+}
+
+/* Grants locks if possible which are blocked on a lock */
+void
+grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
+ pl_entry_lock_t *unlocked, pl_dom_list_t *dom)
+{
+ struct list_head granted_list;
+ pl_entry_lock_t *tmp = NULL;
+ pl_entry_lock_t *lock = NULL;
+
+ INIT_LIST_HEAD (&granted_list);
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ __grant_blocked_entry_locks (this, pl_inode, dom,
+ &granted_list);
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ list_for_each_entry_safe (lock, tmp, &granted_list, blocked_locks) {
+ list_del_init (&lock->blocked_locks);
+
+ entrylk_trace_out (this, lock->frame, NULL, NULL, NULL,
+ lock->basename, ENTRYLK_LOCK, lock->type,
+ 0, 0);
+
+ STACK_UNWIND_STRICT (entrylk, lock->frame, 0, 0, NULL);
+
+ GF_FREE (lock->connection_id);
+ GF_FREE ((char *)lock->basename);
+ GF_FREE (lock);
+ }
+
+ GF_FREE ((char *)unlocked->basename);
+ GF_FREE (unlocked->connection_id);
+ GF_FREE (unlocked);
+
+ return;
+}
+
+/**
+ * release_entry_locks_for_client: release all entry locks from this
+ * client for this loc_t
+ */
+
+static int
+release_entry_locks_for_client (xlator_t *this, pl_inode_t *pinode,
+ pl_dom_list_t *dom, client_t *client)
+{
+ pl_entry_lock_t *lock = NULL;
+ pl_entry_lock_t *tmp = NULL;
+ struct list_head granted;
+ struct list_head released;
+
+ INIT_LIST_HEAD (&granted);
+ INIT_LIST_HEAD (&released);
+
+ pthread_mutex_lock (&pinode->mutex);
+ {
+ list_for_each_entry_safe (lock, tmp, &dom->blocked_entrylks,
+ blocked_locks) {
+ if (lock->trans != client)
+ continue;
+
+ list_del_init (&lock->blocked_locks);
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "releasing lock on held by "
+ "{client=%p}", client);
+
+ list_add (&lock->blocked_locks, &released);
+
+ }
+
+ list_for_each_entry_safe (lock, tmp, &dom->entrylk_list,
+ domain_list) {
+ if (lock->trans != client)
+ continue;
+
+ list_del_init (&lock->domain_list);
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "releasing lock on held by "
+ "{client=%p}", client);
+
+ GF_FREE ((char *)lock->basename);
+ GF_FREE (lock->connection_id);
+ GF_FREE (lock);
+ }
+
+ __grant_blocked_entry_locks (this, pinode, dom, &granted);
+
+ }
+
+ pthread_mutex_unlock (&pinode->mutex);
+
+ list_for_each_entry_safe (lock, tmp, &released, blocked_locks) {
+ list_del_init (&lock->blocked_locks);
+
+ STACK_UNWIND_STRICT (entrylk, lock->frame, -1, EAGAIN, NULL);
+
+ GF_FREE ((char *)lock->basename);
+ GF_FREE (lock->connection_id);
+ GF_FREE (lock);
+
+ }
+
+ list_for_each_entry_safe (lock, tmp, &granted, blocked_locks) {
+ list_del_init (&lock->blocked_locks);
+
+ STACK_UNWIND_STRICT (entrylk, lock->frame, 0, 0, NULL);
+
+ GF_FREE ((char *)lock->basename);
+ GF_FREE (lock->connection_id);
+ GF_FREE (lock);
+ }
+
+ return 0;
+}
+
+/* Common entrylk code called by pl_entrylk and pl_fentrylk */
+int
+pl_common_entrylk (call_frame_t *frame, xlator_t *this,
+ const char *volume, inode_t *inode, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
+
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int ret = -1;
+ char unwind = 1;
+ GF_UNUSED int dict_ret = -1;
+ pl_inode_t *pinode = NULL;
+ pl_entry_lock_t *unlocked = NULL;
+ pl_dom_list_t *dom = NULL;
+ char *conn_id = NULL;
+ pl_ctx_t *ctx = NULL;
+
+ if (xdata)
+ dict_ret = dict_get_str (xdata, "connection-id", &conn_id);
+
+ pinode = pl_inode_get (this, inode);
+ if (!pinode) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ dom = get_domain (pinode, volume);
+ if (!dom){
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ entrylk_trace_in (this, frame, volume, fd, loc, basename, cmd, type);
+
+ if (frame->root->lk_owner.len == 0) {
+ /*
+ this is a special case that means release
+ all locks from this client
+ */
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "Releasing locks for client %p", frame->root->client);
+
+ release_entry_locks_for_client (this, pinode, dom,
+ frame->root->client);
+ op_ret = 0;
+
+ goto out;
+ }
+
+ switch (cmd) {
+ case ENTRYLK_LOCK:
+ pthread_mutex_lock (&pinode->mutex);
+ {
+ ret = __lock_name (pinode, basename, type,
+ frame, dom, this, 0, conn_id);
+ }
+ pthread_mutex_unlock (&pinode->mutex);
+
+ op_errno = -ret;
+ if (ret < 0) {
+ if (ret == -EAGAIN)
+ unwind = 0;
+ else
+ unwind = 1;
+ goto out;
+ } else {
+ op_ret = 0;
+ op_errno = 0;
+ unwind = 1;
+ goto out;
+ }
+
+ break;
+
+ case ENTRYLK_LOCK_NB:
+ unwind = 1;
+ pthread_mutex_lock (&pinode->mutex);
+ {
+ ret = __lock_name (pinode, basename, type,
+ frame, dom, this, 1, conn_id);
+ }
+ pthread_mutex_unlock (&pinode->mutex);
+
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ break;
+
+ case ENTRYLK_UNLOCK:
+ pthread_mutex_lock (&pinode->mutex);
+ {
+ unlocked = __unlock_name (dom, basename, type);
+ }
+ pthread_mutex_unlock (&pinode->mutex);
+
+ if (unlocked)
+ grant_blocked_entry_locks (this, pinode, unlocked, dom);
+
+ break;
+
+ default:
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unexpected case in entrylk (cmd=%d). Please file"
+ "a bug report at http://bugs.gluster.com", cmd);
+ goto out;
+ }
+
+ op_ret = 0;
+out:
+ pl_update_refkeeper (this, inode);
+ if (unwind) {
+ entrylk_trace_out (this, frame, volume, fd, loc, basename,
+ cmd, type, op_ret, op_errno);
+
+ ctx = pl_ctx_get (frame->root->client, this);
+
+ if (ctx == NULL) {
+ gf_log (this->name, GF_LOG_INFO, "pl_ctx_get() failed");
+ goto unwind;
+ }
+
+ if (cmd == ENTRYLK_UNLOCK)
+ pl_del_locker (ctx->ltable, volume, loc, fd,
+ &frame->root->lk_owner,
+ GF_FOP_ENTRYLK);
+ else
+ pl_add_locker (ctx->ltable, volume, loc, fd,
+ frame->root->pid,
+ &frame->root->lk_owner,
+ GF_FOP_ENTRYLK);
+
+unwind:
+ STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno, NULL);
+ } else {
+ entrylk_trace_block (this, frame, volume, fd, loc, basename,
+ cmd, type);
+ }
+
+
+ return 0;
+}
+
+/**
+ * pl_entrylk:
+ *
+ * Locking on names (directory entries)
+ */
+
+int
+pl_entrylk (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
+{
+ pl_common_entrylk (frame, this, volume, loc->inode, basename, cmd,
+ type, loc, NULL, xdata);
+
+ return 0;
+}
+
+
+/**
+ * pl_fentrylk:
+ *
+ * Locking on names (directory entries)
+ */
+
+int
+pl_fentrylk (call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
+{
+ pl_common_entrylk (frame, this, volume, fd->inode, basename, cmd,
+ type, NULL, fd, xdata);
+
+ return 0;
+}
+
+
+int32_t
+__get_entrylk_count (xlator_t *this, pl_inode_t *pl_inode)
+{
+ int32_t count = 0;
+ pl_entry_lock_t *lock = NULL;
+ pl_dom_list_t *dom = NULL;
+
+ list_for_each_entry (dom, &pl_inode->dom_list, inode_list) {
+ list_for_each_entry (lock, &dom->entrylk_list, domain_list) {
+ count++;
+ }
+
+ list_for_each_entry (lock, &dom->blocked_entrylks, blocked_locks) {
+ count++;
+ }
+
+ }
+
+ return count;
+}
+
+int32_t
+get_entrylk_count (xlator_t *this, inode_t *inode)
+{
+ pl_inode_t *pl_inode = NULL;
+ uint64_t tmp_pl_inode = 0;
+ int ret = 0;
+ int32_t count = 0;
+
+ ret = inode_ctx_get (inode, this, &tmp_pl_inode);
+ if (ret != 0) {
+ goto out;
+ }
+
+ pl_inode = (pl_inode_t *)(long) tmp_pl_inode;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ count = __get_entrylk_count (this, pl_inode);
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+out:
+ return count;
+}
diff --git a/xlators/features/locks/src/inodelk.c b/xlators/features/locks/src/inodelk.c
new file mode 100644
index 000000000..508523e11
--- /dev/null
+++ b/xlators/features/locks/src/inodelk.c
@@ -0,0 +1,825 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "compat.h"
+#include "xlator.h"
+#include "inode.h"
+#include "logging.h"
+#include "common-utils.h"
+#include "list.h"
+
+#include "locks.h"
+#include "common.h"
+
+inline void
+__delete_inode_lock (pl_inode_lock_t *lock)
+{
+ list_del (&lock->list);
+}
+
+static inline void
+__pl_inodelk_ref (pl_inode_lock_t *lock)
+{
+ lock->ref++;
+}
+
+inline void
+__pl_inodelk_unref (pl_inode_lock_t *lock)
+{
+ lock->ref--;
+ if (!lock->ref) {
+ GF_FREE (lock->connection_id);
+ GF_FREE (lock);
+ }
+}
+
+/* Check if 2 inodelks are conflicting on type. Only 2 shared locks don't conflict */
+static inline int
+inodelk_type_conflict (pl_inode_lock_t *l1, pl_inode_lock_t *l2)
+{
+ if (l2->fl_type == F_WRLCK || l1->fl_type == F_WRLCK)
+ return 1;
+
+ return 0;
+}
+
+void
+pl_print_inodelk (char *str, int size, int cmd, struct gf_flock *flock, const char *domain)
+{
+ char *cmd_str = NULL;
+ char *type_str = NULL;
+
+ switch (cmd) {
+#if F_GETLK != F_GETLK64
+ case F_GETLK64:
+#endif
+ case F_GETLK:
+ cmd_str = "GETLK";
+ break;
+
+#if F_SETLK != F_SETLK64
+ case F_SETLK64:
+#endif
+ case F_SETLK:
+ cmd_str = "SETLK";
+ break;
+
+#if F_SETLKW != F_SETLKW64
+ case F_SETLKW64:
+#endif
+ case F_SETLKW:
+ cmd_str = "SETLKW";
+ break;
+
+ default:
+ cmd_str = "UNKNOWN";
+ break;
+ }
+
+ switch (flock->l_type) {
+ case F_RDLCK:
+ type_str = "READ";
+ break;
+ case F_WRLCK:
+ type_str = "WRITE";
+ break;
+ case F_UNLCK:
+ type_str = "UNLOCK";
+ break;
+ default:
+ type_str = "UNKNOWN";
+ break;
+ }
+
+ snprintf (str, size, "lock=INODELK, cmd=%s, type=%s, "
+ "domain: %s, start=%llu, len=%llu, pid=%llu",
+ cmd_str, type_str, domain,
+ (unsigned long long) flock->l_start,
+ (unsigned long long) flock->l_len,
+ (unsigned long long) flock->l_pid);
+}
+
+/* Determine if the two inodelks overlap reach other's lock regions */
+static int
+inodelk_overlap (pl_inode_lock_t *l1, pl_inode_lock_t *l2)
+{
+ return ((l1->fl_end >= l2->fl_start) &&
+ (l2->fl_end >= l1->fl_start));
+}
+
+/* Returns true if the 2 inodelks have the same owner */
+static inline int
+same_inodelk_owner (pl_inode_lock_t *l1, pl_inode_lock_t *l2)
+{
+ return (is_same_lkowner (&l1->owner, &l2->owner) &&
+ (l1->client == l2->client));
+}
+
+/* Returns true if the 2 inodelks conflict with each other */
+static int
+inodelk_conflict (pl_inode_lock_t *l1, pl_inode_lock_t *l2)
+{
+ return (inodelk_overlap (l1, l2) &&
+ inodelk_type_conflict (l1, l2));
+}
+
+/* Determine if lock is grantable or not */
+static pl_inode_lock_t *
+__inodelk_grantable (pl_dom_list_t *dom, pl_inode_lock_t *lock)
+{
+ pl_inode_lock_t *l = NULL;
+ pl_inode_lock_t *ret = NULL;
+ if (list_empty (&dom->inodelk_list))
+ goto out;
+ list_for_each_entry (l, &dom->inodelk_list, list){
+ if (inodelk_conflict (lock, l) &&
+ !same_inodelk_owner (lock, l)) {
+ ret = l;
+ goto out;
+ }
+ }
+out:
+ return ret;
+}
+
+static pl_inode_lock_t *
+__blocked_lock_conflict (pl_dom_list_t *dom, pl_inode_lock_t *lock)
+{
+ pl_inode_lock_t *l = NULL;
+ pl_inode_lock_t *ret = NULL;
+
+ if (list_empty (&dom->blocked_inodelks))
+ return NULL;
+
+ list_for_each_entry (l, &dom->blocked_inodelks, blocked_locks) {
+ if (inodelk_conflict (lock, l)) {
+ ret = l;
+ goto out;
+ }
+ }
+
+out:
+ return ret;
+}
+
+static int
+__owner_has_lock (pl_dom_list_t *dom, pl_inode_lock_t *newlock)
+{
+ pl_inode_lock_t *lock = NULL;
+
+ list_for_each_entry (lock, &dom->inodelk_list, list) {
+ if (same_inodelk_owner (lock, newlock))
+ return 1;
+ }
+
+ list_for_each_entry (lock, &dom->blocked_inodelks, blocked_locks) {
+ if (same_inodelk_owner (lock, newlock))
+ return 1;
+ }
+
+ return 0;
+}
+
+
+/* Determines if lock can be granted and adds the lock. If the lock
+ * is blocking, adds it to the blocked_inodelks list of the domain.
+ */
+static int
+__lock_inodelk (xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock,
+ int can_block, pl_dom_list_t *dom)
+{
+ pl_inode_lock_t *conf = NULL;
+ int ret = -EINVAL;
+
+ conf = __inodelk_grantable (dom, lock);
+ if (conf){
+ ret = -EAGAIN;
+ if (can_block == 0)
+ goto out;
+
+ gettimeofday (&lock->blkd_time, NULL);
+ list_add_tail (&lock->blocked_locks, &dom->blocked_inodelks);
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => Blocked",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid,
+ lkowner_utoa (&lock->owner),
+ lock->user_flock.l_start,
+ lock->user_flock.l_len);
+
+
+ goto out;
+ }
+
+ if (__blocked_lock_conflict (dom, lock) && !(__owner_has_lock (dom, lock))) {
+ ret = -EAGAIN;
+ if (can_block == 0)
+ goto out;
+
+ gettimeofday (&lock->blkd_time, NULL);
+ list_add_tail (&lock->blocked_locks, &dom->blocked_inodelks);
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "Lock is grantable, but blocking to prevent starvation");
+ gf_log (this->name, GF_LOG_TRACE,
+ "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => Blocked",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid,
+ lkowner_utoa (&lock->owner),
+ lock->user_flock.l_start,
+ lock->user_flock.l_len);
+
+
+ goto out;
+ }
+ __pl_inodelk_ref (lock);
+ gettimeofday (&lock->granted_time, NULL);
+ list_add (&lock->list, &dom->inodelk_list);
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+/* Return true if the two inodelks have exactly same lock boundaries */
+static int
+inodelks_equal (pl_inode_lock_t *l1, pl_inode_lock_t *l2)
+{
+ if ((l1->fl_start == l2->fl_start) &&
+ (l1->fl_end == l2->fl_end))
+ return 1;
+
+ return 0;
+}
+
+
+static pl_inode_lock_t *
+find_matching_inodelk (pl_inode_lock_t *lock, pl_dom_list_t *dom)
+{
+ pl_inode_lock_t *l = NULL;
+ list_for_each_entry (l, &dom->inodelk_list, list) {
+ if (inodelks_equal (l, lock) &&
+ same_inodelk_owner (l, lock))
+ return l;
+ }
+ return NULL;
+}
+
+/* Set F_UNLCK removes a lock which has the exact same lock boundaries
+ * as the UNLCK lock specifies. If such a lock is not found, returns invalid
+ */
+static pl_inode_lock_t *
+__inode_unlock_lock (xlator_t *this, pl_inode_lock_t *lock, pl_dom_list_t *dom)
+{
+
+ pl_inode_lock_t *conf = NULL;
+
+ conf = find_matching_inodelk (lock, dom);
+ if (!conf) {
+ gf_log (this->name, GF_LOG_ERROR,
+ " Matching lock not found for unlock %llu-%llu, by %s "
+ "on %p", (unsigned long long)lock->fl_start,
+ (unsigned long long)lock->fl_end,
+ lkowner_utoa (&lock->owner), lock->client);
+ goto out;
+ }
+ __delete_inode_lock (conf);
+ gf_log (this->name, GF_LOG_DEBUG,
+ " Matching lock found for unlock %llu-%llu, by %s on %p",
+ (unsigned long long)lock->fl_start,
+ (unsigned long long)lock->fl_end, lkowner_utoa (&lock->owner),
+ lock->client);
+
+out:
+ return conf;
+}
+static void
+__grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode,
+ struct list_head *granted, pl_dom_list_t *dom)
+{
+ int bl_ret = 0;
+ pl_inode_lock_t *bl = NULL;
+ pl_inode_lock_t *tmp = NULL;
+
+ struct list_head blocked_list;
+
+ INIT_LIST_HEAD (&blocked_list);
+ list_splice_init (&dom->blocked_inodelks, &blocked_list);
+
+ list_for_each_entry_safe (bl, tmp, &blocked_list, blocked_locks) {
+
+ list_del_init (&bl->blocked_locks);
+
+ bl_ret = __lock_inodelk (this, pl_inode, bl, 1, dom);
+
+ if (bl_ret == 0) {
+ list_add (&bl->blocked_locks, granted);
+ }
+ }
+ return;
+}
+
+/* Grant all inodelks blocked on a lock */
+void
+grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode,
+ pl_dom_list_t *dom)
+{
+ struct list_head granted;
+ pl_inode_lock_t *lock;
+ pl_inode_lock_t *tmp;
+
+ INIT_LIST_HEAD (&granted);
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ __grant_blocked_inode_locks (this, pl_inode, &granted, dom);
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ list_for_each_entry_safe (lock, tmp, &granted, blocked_locks) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => Granted",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid,
+ lkowner_utoa (&lock->owner),
+ lock->user_flock.l_start,
+ lock->user_flock.l_len);
+
+ pl_trace_out (this, lock->frame, NULL, NULL, F_SETLKW,
+ &lock->user_flock, 0, 0, lock->volume);
+
+ STACK_UNWIND_STRICT (inodelk, lock->frame, 0, 0, NULL);
+ }
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry_safe (lock, tmp, &granted, blocked_locks) {
+ list_del_init (&lock->blocked_locks);
+ __pl_inodelk_unref (lock);
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+}
+
+/* Release all inodelks from this client */
+static int
+release_inode_locks_of_client (xlator_t *this, pl_dom_list_t *dom,
+ inode_t *inode, client_t *client)
+{
+ pl_inode_lock_t *tmp = NULL;
+ pl_inode_lock_t *l = NULL;
+
+ pl_inode_t * pinode = NULL;
+
+ struct list_head released;
+
+ char *path = NULL;
+ char *file = NULL;
+
+ INIT_LIST_HEAD (&released);
+
+ pinode = pl_inode_get (this, inode);
+
+ pthread_mutex_lock (&pinode->mutex);
+ {
+
+ list_for_each_entry_safe (l, tmp, &dom->blocked_inodelks, blocked_locks) {
+ if (l->client != client)
+ continue;
+
+ list_del_init (&l->blocked_locks);
+
+ inode_path (inode, NULL, &path);
+ if (path)
+ file = path;
+ else
+ file = uuid_utoa (inode->gfid);
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "releasing blocking lock on %s held by "
+ "{client=%p, pid=%"PRId64" lk-owner=%s}",
+ file, client, (uint64_t) l->client_pid,
+ lkowner_utoa (&l->owner));
+
+ list_add (&l->blocked_locks, &released);
+ if (path) {
+ GF_FREE (path);
+ path = NULL;
+ }
+ }
+
+ list_for_each_entry_safe (l, tmp, &dom->inodelk_list, list) {
+ if (l->client != client)
+ continue;
+
+ inode_path (inode, NULL, &path);
+ if (path)
+ file = path;
+ else
+ file = uuid_utoa (inode->gfid);
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "releasing granted lock on %s held by "
+ "{client=%p, pid=%"PRId64" lk-owner=%s}",
+ file, client, (uint64_t) l->client_pid,
+ lkowner_utoa (&l->owner));
+
+ if (path) {
+ GF_FREE (path);
+ path = NULL;
+ }
+
+ __delete_inode_lock (l);
+ __pl_inodelk_unref (l);
+ }
+ }
+ GF_FREE (path);
+
+ pthread_mutex_unlock (&pinode->mutex);
+
+ list_for_each_entry_safe (l, tmp, &released, blocked_locks) {
+ list_del_init (&l->blocked_locks);
+
+ STACK_UNWIND_STRICT (inodelk, l->frame, -1, EAGAIN, NULL);
+ //No need to take lock as the locks are only in one list
+ __pl_inodelk_unref (l);
+ }
+
+ grant_blocked_inode_locks (this, pinode, dom);
+ return 0;
+}
+
+
+static int
+pl_inode_setlk (xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock,
+ int can_block, pl_dom_list_t *dom)
+{
+ int ret = -EINVAL;
+ pl_inode_lock_t *retlock = NULL;
+ gf_boolean_t unref = _gf_true;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ if (lock->fl_type != F_UNLCK) {
+ ret = __lock_inodelk (this, pl_inode, lock, can_block, dom);
+ if (ret == 0) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => OK",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid,
+ lkowner_utoa (&lock->owner),
+ lock->fl_start,
+ lock->fl_end);
+ } else if (ret == -EAGAIN) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => NOK",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid,
+ lkowner_utoa (&lock->owner),
+ lock->user_flock.l_start,
+ lock->user_flock.l_len);
+ if (can_block)
+ unref = _gf_false;
+ }
+ } else {
+ retlock = __inode_unlock_lock (this, lock, dom);
+ if (!retlock) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Bad Unlock issued on Inode lock");
+ ret = -EINVAL;
+ goto out;
+ }
+ __pl_inodelk_unref (retlock);
+
+ ret = 0;
+ }
+ }
+out:
+ if (unref)
+ __pl_inodelk_unref (lock);
+ pthread_mutex_unlock (&pl_inode->mutex);
+ grant_blocked_inode_locks (this, pl_inode, dom);
+ return ret;
+}
+
+/* Create a new inode_lock_t */
+pl_inode_lock_t *
+new_inode_lock (struct gf_flock *flock, client_t *client, pid_t client_pid,
+ call_frame_t *frame, xlator_t *this, const char *volume,
+ char *conn_id)
+
+{
+ pl_inode_lock_t *lock = NULL;
+
+ lock = GF_CALLOC (1, sizeof (*lock),
+ gf_locks_mt_pl_inode_lock_t);
+ if (!lock) {
+ return NULL;
+ }
+
+ lock->fl_start = flock->l_start;
+ lock->fl_type = flock->l_type;
+
+ if (flock->l_len == 0)
+ lock->fl_end = LLONG_MAX;
+ else
+ lock->fl_end = flock->l_start + flock->l_len - 1;
+
+ lock->client = client;
+ lock->client_pid = client_pid;
+ lock->volume = volume;
+ lock->owner = frame->root->lk_owner;
+ lock->frame = frame;
+ lock->this = this;
+
+ if (conn_id) {
+ lock->connection_id = gf_strdup (conn_id);
+ }
+
+ INIT_LIST_HEAD (&lock->list);
+ INIT_LIST_HEAD (&lock->blocked_locks);
+ __pl_inodelk_ref (lock);
+
+ return lock;
+}
+
+int32_t
+_pl_convert_volume (const char *volume, char **res)
+{
+ char *mdata_vol = NULL;
+ int ret = 0;
+
+ mdata_vol = strrchr (volume, ':');
+ //if the volume already ends with :metadata don't bother
+ if (mdata_vol && (strcmp (mdata_vol, ":metadata") == 0))
+ return 0;
+
+ ret = gf_asprintf (res, "%s:metadata", volume);
+ if (ret <= 0)
+ return ENOMEM;
+ return 0;
+}
+
+int32_t
+_pl_convert_volume_for_special_range (struct gf_flock *flock,
+ const char *volume, char **res)
+{
+ int32_t ret = 0;
+
+ if ((flock->l_start == LLONG_MAX -1) &&
+ (flock->l_len == 0)) {
+ ret = _pl_convert_volume (volume, res);
+ }
+
+ return ret;
+}
+
+/* Common inodelk code called from pl_inodelk and pl_finodelk */
+int
+pl_common_inodelk (call_frame_t *frame, xlator_t *this,
+ const char *volume, inode_t *inode, int32_t cmd,
+ struct gf_flock *flock, loc_t *loc, fd_t *fd, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int ret = -1;
+ GF_UNUSED int dict_ret = -1;
+ int can_block = 0;
+ pl_inode_t * pinode = NULL;
+ pl_inode_lock_t * reqlock = NULL;
+ pl_dom_list_t * dom = NULL;
+ char *res = NULL;
+ char *res1 = NULL;
+ char *conn_id = NULL;
+ pl_ctx_t *ctx = NULL;
+
+ if (xdata)
+ dict_ret = dict_get_str (xdata, "connection-id", &conn_id);
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (inode, unwind);
+ VALIDATE_OR_GOTO (flock, unwind);
+
+ if ((flock->l_start < 0) || (flock->l_len < 0)) {
+ op_errno = EINVAL;
+ goto unwind;
+ }
+
+ op_errno = _pl_convert_volume_for_special_range (flock, volume, &res);
+ if (op_errno)
+ goto unwind;
+ if (res)
+ volume = res;
+
+ pl_trace_in (this, frame, fd, loc, cmd, flock, volume);
+
+ pinode = pl_inode_get (this, inode);
+ if (!pinode) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ dom = get_domain (pinode, volume);
+ if (!dom) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ if (frame->root->lk_owner.len == 0) {
+ /*
+ special case: this means release all locks
+ from this client
+ */
+ gf_log (this->name, GF_LOG_TRACE,
+ "Releasing all locks from client %p", frame->root->client);
+
+ release_inode_locks_of_client (this, dom, inode, frame->root->client);
+ _pl_convert_volume (volume, &res1);
+ if (res1) {
+ dom = get_domain (pinode, res1);
+ if (dom)
+ release_inode_locks_of_client (this, dom,
+ inode, frame->root->client);
+ }
+
+ op_ret = 0;
+ goto unwind;
+ }
+
+ reqlock = new_inode_lock (flock, frame->root->client, frame->root->pid,
+ frame, this, volume, conn_id);
+
+ if (!reqlock) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+
+ switch (cmd) {
+ case F_SETLKW:
+ can_block = 1;
+
+ /* fall through */
+
+ case F_SETLK:
+ memcpy (&reqlock->user_flock, flock, sizeof (struct gf_flock));
+ ret = pl_inode_setlk (this, pinode, reqlock,
+ can_block, dom);
+
+ if (ret < 0) {
+ if ((can_block) && (F_UNLCK != flock->l_type)) {
+ pl_trace_block (this, frame, fd, loc,
+ cmd, flock, volume);
+ goto out;
+ }
+ gf_log (this->name, GF_LOG_TRACE, "returning EAGAIN");
+ op_errno = -ret;
+ goto unwind;
+ }
+ break;
+
+ default:
+ op_errno = ENOTSUP;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Lock command F_GETLK not supported for [f]inodelk "
+ "(cmd=%d)",
+ cmd);
+ goto unwind;
+ }
+
+ op_ret = 0;
+
+ ctx = pl_ctx_get (frame->root->client, this);
+
+ if (ctx == NULL) {
+ gf_log (this->name, GF_LOG_INFO, "pl_ctx_get() failed");
+ goto unwind;
+ }
+
+ if (flock->l_type == F_UNLCK)
+ pl_del_locker (ctx->ltable, volume, loc, fd,
+ &frame->root->lk_owner,
+ GF_FOP_INODELK);
+ else
+ pl_add_locker (ctx->ltable, volume, loc, fd,
+ frame->root->pid,
+ &frame->root->lk_owner,
+ GF_FOP_INODELK);
+
+unwind:
+ if ((inode != NULL) && (flock !=NULL)) {
+ pl_update_refkeeper (this, inode);
+ pl_trace_out (this, frame, fd, loc, cmd, flock, op_ret, op_errno, volume);
+ }
+
+ STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno, NULL);
+out:
+ GF_FREE (res);
+ GF_FREE (res1);
+ return 0;
+}
+
+int
+pl_inodelk (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *flock,
+ dict_t *xdata)
+{
+ pl_common_inodelk (frame, this, volume, loc->inode, cmd, flock,
+ loc, NULL, xdata);
+
+ return 0;
+}
+
+int
+pl_finodelk (call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *flock,
+ dict_t *xdata)
+{
+ pl_common_inodelk (frame, this, volume, fd->inode, cmd, flock,
+ NULL, fd, xdata);
+
+ return 0;
+
+}
+
+static inline int32_t
+__get_inodelk_dom_count (pl_dom_list_t *dom)
+{
+ pl_inode_lock_t *lock = NULL;
+ int32_t count = 0;
+
+ list_for_each_entry (lock, &dom->inodelk_list, list) {
+ count++;
+ }
+ list_for_each_entry (lock, &dom->blocked_inodelks, blocked_locks) {
+ count++;
+ }
+ return count;
+}
+
+/* Returns the no. of locks (blocked/granted) held on a given domain name
+ * If @domname is NULL, returns the no. of locks in all the domains present.
+ * If @domname is non-NULL and non-existent, returns 0 */
+int32_t
+__get_inodelk_count (xlator_t *this, pl_inode_t *pl_inode, char *domname)
+{
+ int32_t count = 0;
+ pl_dom_list_t *dom = NULL;
+
+ list_for_each_entry (dom, &pl_inode->dom_list, inode_list) {
+ if (domname) {
+ if (strcmp (domname, dom->domain) == 0) {
+ count = __get_inodelk_dom_count (dom);
+ goto out;
+ }
+
+ } else {
+ /* Counting locks from all domains */
+ count += __get_inodelk_dom_count (dom);
+
+ }
+ }
+
+out:
+ return count;
+}
+
+int32_t
+get_inodelk_count (xlator_t *this, inode_t *inode, char *domname)
+{
+ pl_inode_t *pl_inode = NULL;
+ uint64_t tmp_pl_inode = 0;
+ int ret = 0;
+ int32_t count = 0;
+
+ ret = inode_ctx_get (inode, this, &tmp_pl_inode);
+ if (ret != 0) {
+ goto out;
+ }
+
+ pl_inode = (pl_inode_t *)(long) tmp_pl_inode;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ count = __get_inodelk_count (this, pl_inode, domname);
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+out:
+ return count;
+}
diff --git a/xlators/features/locks/src/internal.c b/xlators/features/locks/src/internal.c
deleted file mode 100644
index 6524721b4..000000000
--- a/xlators/features/locks/src/internal.c
+++ /dev/null
@@ -1,896 +0,0 @@
-/*
- Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "compat.h"
-#include "xlator.h"
-#include "inode.h"
-#include "logging.h"
-#include "common-utils.h"
-#include "list.h"
-
-#include "locks.h"
-#include "common.h"
-
-
-static int
-release_inode_locks_of_transport (xlator_t *this,
- inode_t *inode, transport_t *trans)
-{
- posix_lock_t *tmp = NULL;
- posix_lock_t *l = NULL;
-
- pl_inode_t * pinode = NULL;
-
- struct list_head granted;
-
- char *path = NULL;
-
- INIT_LIST_HEAD (&granted);
-
- pinode = pl_inode_get (this, inode);
-
- pthread_mutex_lock (&pinode->mutex);
- {
- if (list_empty (&pinode->int_list)) {
- goto unlock;
- }
-
- list_for_each_entry_safe (l, tmp, &pinode->int_list, list) {
- if (l->transport != trans)
- continue;
-
- list_del_init (&l->list);
-
- __delete_lock (pinode, l);
-
- inode_path (inode, NULL, &path);
-
- gf_log (this->name, GF_LOG_TRACE,
- "releasing lock on %s held by "
- "{transport=%p, pid=%"PRId64"}",
- path, trans,
- (uint64_t) l->client_pid);
-
- if (path)
- FREE (path);
-
- __destroy_lock (l);
- }
- }
-unlock:
- pthread_mutex_unlock (&pinode->mutex);
-
- grant_blocked_locks (this, pinode, GF_LOCK_INTERNAL);
-
- return 0;
-}
-
-
-/**
- * pl_inodelk:
- *
- * This fop provides fcntl-style locking on files for internal
- * purposes. Locks held through this fop reside in a domain different
- * from those held by applications. This fop is for the use of AFR.
- */
-
-int
-pl_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd, struct flock *flock)
-{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- int ret = -1;
- int can_block = 0;
- posix_locks_private_t * priv = NULL;
- transport_t * transport = NULL;
- pid_t client_pid = -1;
- pl_inode_t * pinode = NULL;
- posix_lock_t * reqlock = NULL;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (loc, out);
- VALIDATE_OR_GOTO (flock, out);
-
- if ((flock->l_start < 0) || (flock->l_len < 0)) {
- op_errno = EINVAL;
- goto unwind;
- }
-
- transport = frame->root->trans;
- client_pid = frame->root->pid;
-
- priv = (posix_locks_private_t *) this->private;
-
- VALIDATE_OR_GOTO (priv, out);
-
- pinode = pl_inode_get (this, loc->inode);
- if (!pinode) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- op_errno = ENOMEM;
- goto unwind;
- }
-
- if (client_pid == 0) {
- /*
- special case: this means release all locks
- from this transport
- */
- gf_log (this->name, GF_LOG_TRACE,
- "Releasing all locks from transport %p", transport);
-
- release_inode_locks_of_transport (this, loc->inode, transport);
- goto unwind;
- }
-
- reqlock = new_posix_lock (flock, transport, client_pid);
- if (!reqlock) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- op_ret = -1;
- op_errno = ENOMEM;
- goto unwind;
- }
-
- switch (cmd) {
- case F_SETLKW:
- can_block = 1;
- reqlock->frame = frame;
- reqlock->this = this;
-
- /* fall through */
-
- case F_SETLK:
- memcpy (&reqlock->user_flock, flock, sizeof (struct flock));
- ret = pl_setlk (this, pinode, reqlock,
- can_block, GF_LOCK_INTERNAL);
-
- if (ret == -1) {
- if (can_block)
- goto out;
-
- gf_log (this->name, GF_LOG_TRACE, "returning EAGAIN");
- op_errno = EAGAIN;
- __destroy_lock (reqlock);
- goto unwind;
- }
- break;
-
- default:
- op_errno = ENOTSUP;
- gf_log (this->name, GF_LOG_ERROR,
- "Unexpected case in inodelk (cmd=%d). "
- "Please file a bug report at http://bugs.gluster.com",
- cmd);
- goto unwind;
- }
-
- op_ret = 0;
-
-unwind:
- STACK_UNWIND (frame, op_ret, op_errno);
-out:
- return 0;
-}
-
-
-int
-pl_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct flock *flock)
-{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- int ret = -1;
- int can_block = 0;
- posix_locks_private_t * priv = NULL;
- transport_t * transport = NULL;
- pid_t client_pid = -1;
- pl_inode_t * pinode = NULL;
- posix_lock_t * reqlock = NULL;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (fd, out);
- VALIDATE_OR_GOTO (flock, out);
-
- if ((flock->l_start < 0) || (flock->l_len < 0)) {
- op_errno = EINVAL;
- goto unwind;
- }
-
- transport = frame->root->trans;
- client_pid = frame->root->pid;
-
- priv = (posix_locks_private_t *) this->private;
-
- VALIDATE_OR_GOTO (priv, out);
-
- pinode = pl_inode_get (this, fd->inode);
- if (!pinode) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- op_errno = ENOMEM;
- goto unwind;
- }
-
- if (client_pid == 0) {
- /*
- special case: this means release all locks
- from this transport
- */
- gf_log (this->name, GF_LOG_TRACE,
- "Releasing all locks from transport %p", transport);
-
- release_inode_locks_of_transport (this, fd->inode, transport);
- goto unwind;
- }
-
- reqlock = new_posix_lock (flock, transport, client_pid);
- if (!reqlock) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- op_ret = -1;
- op_errno = ENOMEM;
- goto unwind;
- }
-
- switch (cmd) {
- case F_SETLKW:
- can_block = 1;
- reqlock->frame = frame;
- reqlock->this = this;
- reqlock->fd = fd;
-
- /* fall through */
-
- case F_SETLK:
- memcpy (&reqlock->user_flock, flock, sizeof (struct flock));
- ret = pl_setlk (this, pinode, reqlock,
- can_block, GF_LOCK_INTERNAL);
-
- if (ret == -1) {
- if (can_block)
- goto out;
-
- gf_log (this->name, GF_LOG_TRACE, "Returning EAGAIN");
- op_errno = EAGAIN;
- __destroy_lock (reqlock);
- goto unwind;
- }
- break;
-
- default:
- op_errno = ENOTSUP;
- gf_log (this->name, GF_LOG_ERROR,
- "Unexpected case in finodelk (cmd=%d). "
- "Please file a bug report at http://bugs.gluster.com",
- cmd);
- goto unwind;
- }
-
- op_ret = 0;
-
-unwind:
- STACK_UNWIND (frame, op_ret, op_errno);
-out:
- return 0;
-}
-
-
-/**
- * types_conflict - do two types of lock conflict?
- * @t1: type
- * @t2: type
- *
- * two read locks do not conflict
- * any other case conflicts
- */
-
-static int
-types_conflict (entrylk_type t1, entrylk_type t2)
-{
- return !((t1 == ENTRYLK_RDLCK) && (t2 == ENTRYLK_RDLCK));
-}
-
-/**
- * all_names - does a basename represent all names?
- * @basename: name to check
- */
-
-#define all_names(basename) ((basename == NULL) ? 1 : 0)
-
-/**
- * names_conflict - do two names conflict?
- * @n1: name
- * @n2: name
- */
-
-static int
-names_conflict (const char *n1, const char *n2)
-{
- return all_names (n1) || all_names (n2) || !strcmp (n1, n2);
-}
-
-
-static int
-names_equal (const char *n1, const char *n2)
-{
- return (n1 == NULL && n2 == NULL) || (n1 && n2 && !strcmp (n1, n2));
-}
-
-/**
- * lock_grantable - is this lock grantable?
- * @inode: inode in which to look
- * @basename: name we're trying to lock
- * @type: type of lock
- */
-
-static pl_entry_lock_t *
-__lock_grantable (pl_inode_t *pinode, const char *basename, entrylk_type type)
-{
- pl_entry_lock_t *lock = NULL;
-
- if (list_empty (&pinode->dir_list))
- return NULL;
-
- list_for_each_entry (lock, &pinode->dir_list, inode_list) {
- if (names_conflict (lock->basename, basename) &&
- types_conflict (lock->type, type))
- return lock;
- }
-
- return NULL;
-}
-
-/**
- * find_most_matching_lock - find the lock struct which most matches in order of:
- * lock on the exact basename ||
- * an all_names lock
- *
- *
- * @inode: inode in which to look
- * @basename: name to search for
- */
-
-static pl_entry_lock_t *
-__find_most_matching_lock (pl_inode_t *pinode, const char *basename)
-{
- pl_entry_lock_t *lock;
- pl_entry_lock_t *all = NULL;
- pl_entry_lock_t *exact = NULL;
-
- if (list_empty (&pinode->dir_list))
- return NULL;
-
- list_for_each_entry (lock, &pinode->dir_list, inode_list) {
- if (all_names (lock->basename))
- all = lock;
- else if (names_equal (lock->basename, basename))
- exact = lock;
- }
-
- return (exact ? exact : all);
-}
-
-
-/**
- * insert_new_lock - insert a new dir lock into the inode with the given parameters
- * @pinode: inode to insert into
- * @basename: basename for the lock
- * @type: type of the lock
- */
-
-static pl_entry_lock_t *
-new_entrylk_lock (pl_inode_t *pinode, const char *basename, entrylk_type type,
- transport_t *trans)
-{
- pl_entry_lock_t *newlock = NULL;
-
- newlock = CALLOC (sizeof (pl_entry_lock_t), 1);
- if (!newlock) {
- goto out;
- }
-
- newlock->basename = basename ? strdup (basename) : NULL;
- newlock->type = type;
- newlock->trans = trans;
-
- if (type == ENTRYLK_RDLCK)
- newlock->read_count = 1;
-
- INIT_LIST_HEAD (&newlock->inode_list);
- INIT_LIST_HEAD (&newlock->blocked_locks);
-
-out:
- return newlock;
-}
-
-/**
- * lock_name - lock a name in a directory
- * @inode: inode for the directory in which to lock
- * @basename: name of the entry to lock
- * if null, lock the entire directory
- *
- * the entire directory being locked is represented as: a single
- * pl_entry_lock_t present in the entrylk_locks list with its
- * basename = NULL
- */
-
-int
-__lock_name (pl_inode_t *pinode, const char *basename, entrylk_type type,
- call_frame_t *frame, xlator_t *this, int nonblock)
-{
- pl_entry_lock_t *lock = NULL;
- pl_entry_lock_t *conf = NULL;
-
- transport_t *trans = NULL;
-
- int ret = -EINVAL;
-
- trans = frame->root->trans;
-
- conf = __lock_grantable (pinode, basename, type);
- if (conf) {
- ret = -EAGAIN;
- if (nonblock)
- goto out;
-
- lock = new_entrylk_lock (pinode, basename, type, trans);
-
- if (!lock) {
- ret = -ENOMEM;
- goto out;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "Blocking lock: {pinode=%p, basename=%s}",
- pinode, basename);
-
- lock->frame = frame;
- lock->this = this;
- lock->blocked = 1;
-
- list_add (&lock->blocked_locks, &conf->blocked_locks);
-
-
- goto out;
- }
-
- switch (type) {
- case ENTRYLK_RDLCK:
- lock = __find_most_matching_lock (pinode, basename);
-
- if (lock && names_equal (lock->basename, basename)) {
- lock->read_count++;
-
- FREE (lock->basename);
- FREE (lock);
-
- lock = NULL;
- } else {
- lock = new_entrylk_lock (pinode, basename, type, trans);
-
- if (!lock) {
- ret = -ENOMEM;
- goto out;
- }
-
- list_add (&lock->inode_list, &pinode->dir_list);
- }
- break;
-
- case ENTRYLK_WRLCK:
- lock = new_entrylk_lock (pinode, basename, type, trans);
-
- if (!lock) {
- ret = -ENOMEM;
- goto out;
- }
-
- list_add (&lock->inode_list, &pinode->dir_list);
- break;
- }
-
- ret = 0;
-out:
- return ret;
-}
-
-
-/**
- * unlock_name - unlock a name in a directory
- * @inode: inode for the directory to unlock in
- * @basename: name of the entry to unlock
- * if null, unlock the entire directory
- */
-
-pl_entry_lock_t *
-__unlock_name (pl_inode_t *pinode, const char *basename, entrylk_type type)
-{
- pl_entry_lock_t *lock = NULL;
- pl_entry_lock_t *ret_lock = NULL;
-
- lock = __find_most_matching_lock (pinode, basename);
-
- if (!lock) {
- gf_log ("locks", GF_LOG_DEBUG,
- "unlock on %s (type=%s) attempted but no matching lock found",
- basename, type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" :
- "ENTRYLK_WRLCK");
- goto out;
- }
-
- if (names_equal (lock->basename, basename)
- && lock->type == type) {
- if (type == ENTRYLK_RDLCK) {
- lock->read_count--;
- }
- if (type == ENTRYLK_WRLCK || lock->read_count == 0) {
- list_del (&lock->inode_list);
- ret_lock = lock;
- }
- } else {
- gf_log ("locks", GF_LOG_DEBUG,
- "Unlock for a non-existing lock!");
- goto out;
- }
-
-out:
- return ret_lock;
-}
-
-
-void
-__grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
- pl_entry_lock_t *lock,
- struct list_head *granted)
-{
- int bl_ret = 0;
- pl_entry_lock_t *bl = NULL;
- pl_entry_lock_t *tmp = NULL;
-
- list_for_each_entry_safe (bl, tmp, &lock->blocked_locks,
- blocked_locks) {
- list_del_init (&bl->blocked_locks);
-
- /* TODO: error checking */
-
- gf_log ("locks", GF_LOG_TRACE,
- "Trying to unblock: {pinode=%p, basename=%s}",
- pl_inode, bl->basename);
-
- bl_ret = __lock_name (pl_inode, bl->basename, bl->type,
- bl->frame, bl->this, 0);
-
- if (bl_ret == 0) {
- list_add (&bl->blocked_locks, granted);
- } else {
- if (bl->basename)
- FREE (bl->basename);
- FREE (bl);
- }
- }
- return;
-}
-
-
-void
-grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
- pl_entry_lock_t *unlocked)
-{
- struct list_head granted_list;
- pl_entry_lock_t *tmp = NULL;
- pl_entry_lock_t *lock = NULL;
-
- INIT_LIST_HEAD (&granted_list);
-
- pthread_mutex_lock (&pl_inode->mutex);
- {
- __grant_blocked_entry_locks (this, pl_inode, unlocked,
- &granted_list);
- }
- pthread_mutex_unlock (&pl_inode->mutex);
-
- list_for_each_entry_safe (lock, tmp, &granted_list, blocked_locks) {
- list_del_init (&lock->blocked_locks);
-
- STACK_UNWIND (lock->frame, 0, 0);
-
- FREE (lock->basename);
- FREE (lock);
- }
-
- FREE (unlocked->basename);
- FREE (unlocked);
-
- return;
-}
-
-
-/**
- * release_entry_locks_for_transport: release all entry locks from this
- * transport for this loc_t
- */
-
-static int
-release_entry_locks_for_transport (xlator_t *this, pl_inode_t *pinode,
- transport_t *trans)
-{
- pl_entry_lock_t *lock;
- pl_entry_lock_t *tmp;
- struct list_head granted;
-
- INIT_LIST_HEAD (&granted);
-
- pthread_mutex_lock (&pinode->mutex);
- {
- if (list_empty (&pinode->dir_list)) {
- goto unlock;
- }
-
- list_for_each_entry_safe (lock, tmp, &pinode->dir_list,
- inode_list) {
- if (lock->trans != trans)
- continue;
-
- list_del_init (&lock->inode_list);
- __grant_blocked_entry_locks (this, pinode, lock,
- &granted);
-
- FREE (lock->basename);
- FREE (lock);
- }
- }
-unlock:
- pthread_mutex_unlock (&pinode->mutex);
-
- list_for_each_entry_safe (lock, tmp, &granted, blocked_locks) {
- list_del_init (&lock->blocked_locks);
-
- STACK_UNWIND (lock->frame, 0, 0);
-
- FREE (lock->basename);
- FREE (lock);
- }
-
- return 0;
-}
-
-
-/**
- * pl_entrylk:
- *
- * Locking on names (directory entries)
- */
-
-int
-pl_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
-{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- transport_t * transport = NULL;
- pid_t pid = -1;
-
- pl_inode_t * pinode = NULL;
- int ret = -1;
- pl_entry_lock_t *unlocked = NULL;
- char unwind = 1;
-
-
- pinode = pl_inode_get (this, loc->inode);
- if (!pinode) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- op_errno = ENOMEM;
- goto out;
- }
-
- pid = frame->root->pid;
- transport = frame->root->trans;
-
- if (pid == 0) {
- /*
- this is a special case that means release
- all locks from this transport
- */
-
- gf_log (this->name, GF_LOG_TRACE,
- "Releasing locks for transport %p", transport);
-
- release_entry_locks_for_transport (this, pinode, transport);
- op_ret = 0;
-
- goto out;
- }
-
- switch (cmd) {
- case ENTRYLK_LOCK:
- pthread_mutex_lock (&pinode->mutex);
- {
- ret = __lock_name (pinode, basename, type,
- frame, this, 0);
- }
- pthread_mutex_unlock (&pinode->mutex);
-
- if (ret < 0) {
- if (ret == -EAGAIN)
- unwind = 0;
- op_errno = -ret;
- goto out;
- }
-
- break;
-
- case ENTRYLK_LOCK_NB:
- pthread_mutex_lock (&pinode->mutex);
- {
- ret = __lock_name (pinode, basename, type,
- frame, this, 1);
- }
- pthread_mutex_unlock (&pinode->mutex);
-
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- break;
-
- case ENTRYLK_UNLOCK:
- pthread_mutex_lock (&pinode->mutex);
- {
- unlocked = __unlock_name (pinode, basename, type);
- }
- pthread_mutex_unlock (&pinode->mutex);
-
- if (unlocked)
- grant_blocked_entry_locks (this, pinode, unlocked);
-
- break;
-
- default:
- gf_log (this->name, GF_LOG_ERROR,
- "Unexpected case in entrylk (cmd=%d). Please file"
- "a bug report at http://bugs.gluster.com", cmd);
- goto out;
- }
-
- op_ret = 0;
-out:
- if (unwind) {
- STACK_UNWIND (frame, op_ret, op_errno);
- }
-
- return 0;
-}
-
-
-/**
- * pl_entrylk:
- *
- * Locking on names (directory entries)
- */
-
-int
-pl_fentrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
-{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- transport_t * transport = NULL;
- pid_t pid = -1;
-
- pl_inode_t * pinode = NULL;
- int ret = -1;
- pl_entry_lock_t *unlocked = NULL;
- char unwind = 1;
-
- pinode = pl_inode_get (this, fd->inode);
- if (!pinode) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory :(");
- goto out;
- }
-
- pid = frame->root->pid;
- transport = frame->root->trans;
-
- if (pid == 0) {
- /*
- this is a special case that means release
- all locks from this transport
- */
-
- gf_log (this->name, GF_LOG_TRACE,
- "Releasing locks for transport %p", transport);
-
- release_entry_locks_for_transport (this, pinode, transport);
- op_ret = 0;
- goto out;
- }
-
- switch (cmd) {
- case ENTRYLK_LOCK:
- pthread_mutex_lock (&pinode->mutex);
- {
- ret = __lock_name (pinode, basename, type,
- frame, this, 0);
- }
- pthread_mutex_unlock (&pinode->mutex);
-
- if (ret < 0) {
- if (ret == -EAGAIN)
- unwind = 0;
- op_errno = -ret;
- goto out;
- }
- break;
-
- case ENTRYLK_LOCK_NB:
- pthread_mutex_lock (&pinode->mutex);
- {
- ret = __lock_name (pinode, basename, type,
- frame, this, 1);
- }
- pthread_mutex_unlock (&pinode->mutex);
-
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
- break;
-
- case ENTRYLK_UNLOCK:
- pthread_mutex_lock (&pinode->mutex);
- {
- unlocked = __unlock_name (pinode, basename, type);
- }
- pthread_mutex_unlock (&pinode->mutex);
-
- if (unlocked)
- grant_blocked_entry_locks (this, pinode, unlocked);
- break;
-
- default:
- gf_log (this->name, GF_LOG_ERROR,
- "Unexpected case in fentrylk (cmd=%d). "
- "Please file a bug report at http://bugs.gluster.com",
- cmd);
- goto out;
- }
-
- op_ret = 0;
-out:
- if (unwind) {
- STACK_UNWIND (frame, op_ret, op_errno);
- }
-
- return 0;
-}
diff --git a/xlators/features/locks/src/locks-mem-types.h b/xlators/features/locks/src/locks-mem-types.h
new file mode 100644
index 000000000..08aeb0a79
--- /dev/null
+++ b/xlators/features/locks/src/locks-mem-types.h
@@ -0,0 +1,29 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __LOCKS_MEM_TYPES_H__
+#define __LOCKS_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_locks_mem_types_ {
+ gf_locks_mt_pl_dom_list_t = gf_common_mt_end + 1,
+ gf_locks_mt_pl_inode_t,
+ gf_locks_mt_posix_lock_t,
+ gf_locks_mt_pl_entry_lock_t,
+ gf_locks_mt_pl_inode_lock_t,
+ gf_locks_mt_truncate_ops,
+ gf_locks_mt_pl_rw_req_t,
+ gf_locks_mt_posix_locks_private_t,
+ gf_locks_mt_pl_fdctx_t,
+ gf_locks_mt_end
+};
+#endif
+
diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h
index 5a834657d..76fc941d7 100644
--- a/xlators/features/locks/src/locks.h
+++ b/xlators/features/locks/src/locks.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __POSIX_LOCKS_H__
#define __POSIX_LOCKS_H__
@@ -26,86 +16,177 @@
#endif
#include "compat-errno.h"
-#include "transport.h"
#include "stack.h"
#include "call-stub.h"
+#include "locks-mem-types.h"
+#include "client_t.h"
+
+#include "lkowner.h"
struct __pl_fd;
struct __posix_lock {
- struct list_head list;
+ struct list_head list;
+
+ short fl_type;
+ off_t fl_start;
+ off_t fl_end;
- short fl_type;
- off_t fl_start;
- off_t fl_end;
+ short blocked; /* waiting to acquire */
+ struct gf_flock user_flock; /* the flock supplied by the user */
+ xlator_t *this; /* required for blocked locks */
+ unsigned long fd_num;
- short blocked; /* waiting to acquire */
- struct flock user_flock; /* the flock supplied by the user */
- xlator_t *this; /* required for blocked locks */
- fd_t *fd;
+ fd_t *fd;
+ call_frame_t *frame;
- call_frame_t *frame;
+ struct timeval blkd_time; /*time at which lock was queued into blkd list*/
+ struct timeval granted_time; /*time at which lock was queued into active list*/
- /* These two together serve to uniquely identify each process
- across nodes */
+ /* These two together serve to uniquely identify each process
+ across nodes */
- transport_t *transport; /* to identify client node */
- pid_t client_pid; /* pid of client process */
+ void *client; /* to identify client node */
+ gf_lkowner_t owner;
+ pid_t client_pid; /* pid of client process */
};
typedef struct __posix_lock posix_lock_t;
+struct __pl_inode_lock {
+ struct list_head list;
+ struct list_head blocked_locks; /* list_head pointing to blocked_inodelks */
+ int ref;
+
+ short fl_type;
+ off_t fl_start;
+ off_t fl_end;
+
+ const char *volume;
+
+ struct gf_flock user_flock; /* the flock supplied by the user */
+ xlator_t *this; /* required for blocked locks */
+ fd_t *fd;
+
+ call_frame_t *frame;
+
+ struct timeval blkd_time; /*time at which lock was queued into blkd list*/
+ struct timeval granted_time; /*time at which lock was queued into active list*/
+
+ /* These two together serve to uniquely identify each process
+ across nodes */
+
+ void *client; /* to identify client node */
+ gf_lkowner_t owner;
+ pid_t client_pid; /* pid of client process */
+
+ char *connection_id; /* stores the client connection id */
+};
+typedef struct __pl_inode_lock pl_inode_lock_t;
+
struct __pl_rw_req_t {
- struct list_head list;
- call_stub_t *stub;
- posix_lock_t region;
+ struct list_head list;
+ call_stub_t *stub;
+ posix_lock_t region;
};
typedef struct __pl_rw_req_t pl_rw_req_t;
+struct __pl_dom_list_t {
+ struct list_head inode_list; /* list_head back to pl_inode_t */
+ const char *domain;
+ struct list_head entrylk_list; /* List of entry locks */
+ struct list_head blocked_entrylks; /* List of all blocked entrylks */
+ struct list_head inodelk_list; /* List of inode locks */
+ struct list_head blocked_inodelks; /* List of all blocked inodelks */
+};
+typedef struct __pl_dom_list_t pl_dom_list_t;
struct __entry_lock {
- struct list_head inode_list; /* list_head back to pl_inode_t */
- struct list_head blocked_locks; /* locks blocked due to this lock */
-
- call_frame_t *frame;
- xlator_t *this;
- int blocked;
-
- const char *basename;
- entrylk_type type;
- unsigned int read_count; /* number of read locks */
- transport_t *trans;
+ struct list_head domain_list; /* list_head back to pl_dom_list_t */
+ struct list_head blocked_locks; /* list_head back to blocked_entrylks */
+
+ call_frame_t *frame;
+ xlator_t *this;
+
+ const char *volume;
+
+ const char *basename;
+ entrylk_type type;
+
+ struct timeval blkd_time; /*time at which lock was queued into blkd list*/
+ struct timeval granted_time; /*time at which lock was queued into active list*/
+
+ void *trans;
+ gf_lkowner_t owner;
+ pid_t client_pid; /* pid of client process */
+
+ char *connection_id; /* stores the client connection id */
};
typedef struct __entry_lock pl_entry_lock_t;
-/* The "simulated" inode. This contains a list of all the locks associated
+/* The "simulated" inode. This contains a list of all the locks associated
with this file */
struct __pl_inode {
- pthread_mutex_t mutex;
-
- struct list_head dir_list; /* list of entry locks */
- struct list_head ext_list; /* list of fcntl locks */
- struct list_head int_list; /* list of internal locks */
- struct list_head rw_list; /* list of waiting r/w requests */
- int mandatory; /* if mandatory locking is enabled */
+ pthread_mutex_t mutex;
+
+ struct list_head dom_list; /* list of domains */
+ struct list_head ext_list; /* list of fcntl locks */
+ struct list_head rw_list; /* list of waiting r/w requests */
+ struct list_head reservelk_list; /* list of reservelks */
+ struct list_head blocked_reservelks; /* list of blocked reservelks */
+ struct list_head blocked_calls; /* List of blocked lock calls while a reserve is held*/
+ int mandatory; /* if mandatory locking is enabled */
+
+ inode_t *refkeeper; /* hold refs on an inode while locks are
+ held to prevent pruning */
};
typedef struct __pl_inode pl_inode_t;
-#define LOCKS_FOR_DOMAIN(inode,domain) (domain == GF_LOCK_POSIX \
- ? inode->fcntl_locks \
- : inode->inodelk_locks)
-
struct __pl_fd {
- gf_boolean_t nonblocking; /* whether O_NONBLOCK has been set */
+ gf_boolean_t nonblocking; /* whether O_NONBLOCK has been set */
};
typedef struct __pl_fd pl_fd_t;
typedef struct {
- gf_boolean_t mandatory; /* if mandatory locking is enabled */
+ gf_boolean_t mandatory; /* if mandatory locking is enabled */
+ gf_boolean_t trace; /* trace lock requests in and out */
+ char *brickname;
} posix_locks_private_t;
+typedef struct {
+ gf_boolean_t entrylk_count_req;
+ gf_boolean_t inodelk_count_req;
+ gf_boolean_t inodelk_dom_count_req;
+ gf_boolean_t posixlk_count_req;
+ gf_boolean_t parent_entrylk_req;
+
+ /* used by {f,}truncate */
+ loc_t loc;
+ fd_t *fd;
+ off_t offset;
+ dict_t *xdata;
+ enum {TRUNCATE, FTRUNCATE} op;
+} pl_local_t;
+
+
+typedef struct {
+ struct list_head locks_list;
+} pl_fdctx_t;
+
+
+typedef struct _locks_ctx {
+ gf_lock_t ltable_lock; /* only for replace,
+ ltable has its own internal
+ lock for operations */
+ struct _lock_table *ltable;
+} pl_ctx_t;
+
+
+pl_ctx_t *
+pl_ctx_get (client_t *client, xlator_t *xlator);
+
#endif /* __POSIX_LOCKS_H__ */
diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c
index 5514d4414..7bfb38a51 100644
--- a/xlators/features/locks/src/posix.c
+++ b/xlators/features/locks/src/posix.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <unistd.h>
#include <fcntl.h>
#include <limits.h>
@@ -37,6 +27,9 @@
#include "locks.h"
#include "common.h"
#include "statedump.h"
+#include "clear.h"
+#include "defaults.h"
+#include "syncop.h"
#ifndef LLONG_MAX
#define LLONG_MAX LONG_LONG_MAX /* compat with old gcc */
@@ -47,727 +40,2366 @@
void do_blocked_rw (pl_inode_t *);
static int __rw_allowable (pl_inode_t *, posix_lock_t *, glusterfs_fop_t);
+static int format_brickname(char *);
+int pl_lockinfo_get_brickname (xlator_t *, inode_t *, int32_t *);
+static int fetch_pathinfo(xlator_t *, inode_t *, int32_t *, char **);
-struct _truncate_ops {
- loc_t loc;
- fd_t *fd;
- off_t offset;
- enum {TRUNCATE, FTRUNCATE} op;
-};
+static pl_fdctx_t *
+pl_new_fdctx ()
+{
+ pl_fdctx_t *fdctx = NULL;
+
+ fdctx = GF_CALLOC (1, sizeof (*fdctx),
+ gf_locks_mt_pl_fdctx_t);
+ GF_VALIDATE_OR_GOTO ("posix-locks", fdctx, out);
+
+ INIT_LIST_HEAD (&fdctx->locks_list);
+
+out:
+ return fdctx;
+}
+
+static pl_fdctx_t *
+pl_check_n_create_fdctx (xlator_t *this, fd_t *fd)
+{
+ int ret = 0;
+ uint64_t tmp = 0;
+ pl_fdctx_t *fdctx = NULL;
+
+ GF_VALIDATE_OR_GOTO ("posix-locks", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+
+ LOCK (&fd->lock);
+ {
+ ret = __fd_ctx_get (fd, this, &tmp);
+ if ((ret != 0) || (tmp == 0)) {
+ fdctx = pl_new_fdctx ();
+ if (fdctx == NULL) {
+ goto unlock;
+ }
+ }
+ ret = __fd_ctx_set (fd, this, (uint64_t)(long)fdctx);
+ if (ret != 0) {
+ GF_FREE (fdctx);
+ fdctx = NULL;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "failed to set fd ctx");
+ }
+ }
+unlock:
+ UNLOCK (&fd->lock);
+
+out:
+ return fdctx;
+}
int
pl_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- struct _truncate_ops *local = NULL;
+ pl_local_t *local = NULL;
+
+ local = frame->local;
- local = frame->local;
+ if (local->op == TRUNCATE)
+ loc_wipe (&local->loc);
- if (local->op == TRUNCATE)
- loc_wipe (&local->loc);
+ if (local->xdata)
+ dict_unref (local->xdata);
+ if (local->fd)
+ fd_unref (local->fd);
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
+ STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+ return 0;
}
static int
-truncate_allowed (pl_inode_t *pl_inode,
- transport_t *transport, pid_t client_pid,
- off_t offset)
+truncate_allowed (pl_inode_t *pl_inode,
+ client_t *client, pid_t client_pid,
+ gf_lkowner_t *owner, off_t offset)
{
- posix_lock_t *l = NULL;
- posix_lock_t region = {.list = {0, }, };
- int ret = 1;
-
- region.fl_start = offset;
- region.fl_end = LLONG_MAX;
- region.transport = transport;
- region.client_pid = client_pid;
-
- pthread_mutex_lock (&pl_inode->mutex);
- {
- list_for_each_entry (l, &pl_inode->ext_list, list) {
- if (!l->blocked
- && locks_overlap (&region, l)
- && !same_owner (&region, l)) {
- ret = 0;
- break;
- }
- }
- }
- pthread_mutex_unlock (&pl_inode->mutex);
+ posix_lock_t *l = NULL;
+ posix_lock_t region = {.list = {0, }, };
+ int ret = 1;
+
+ region.fl_start = offset;
+ region.fl_end = LLONG_MAX;
+ region.client = client;
+ region.client_pid = client_pid;
+ region.owner = *owner;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry (l, &pl_inode->ext_list, list) {
+ if (!l->blocked
+ && locks_overlap (&region, l)
+ && !same_owner (&region, l)) {
+ ret = 0;
+ gf_log ("posix-locks", GF_LOG_TRACE, "Truncate "
+ "allowed");
+ break;
+ }
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
- return ret;
+ return ret;
}
static int
truncate_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
{
- posix_locks_private_t *priv = NULL;
- struct _truncate_ops *local = NULL;
- inode_t *inode = NULL;
- pl_inode_t *pl_inode = NULL;
+ posix_locks_private_t *priv = NULL;
+ pl_local_t *local = NULL;
+ inode_t *inode = NULL;
+ pl_inode_t *pl_inode = NULL;
- priv = this->private;
- local = frame->local;
+ priv = this->private;
+ local = frame->local;
- if (op_ret != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "got error (errno=%d, stderror=%s) from child",
- op_errno, strerror (op_errno));
- goto unwind;
- }
+ if (op_ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "got error (errno=%d, stderror=%s) from child",
+ op_errno, strerror (op_errno));
+ goto unwind;
+ }
- if (local->op == TRUNCATE)
- inode = local->loc.inode;
- else
- inode = local->fd->inode;
+ if (local->op == TRUNCATE)
+ inode = local->loc.inode;
+ else
+ inode = local->fd->inode;
- pl_inode = pl_inode_get (this, inode);
- if (!pl_inode) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
+ pl_inode = pl_inode_get (this, inode);
+ if (!pl_inode) {
op_ret = -1;
- op_errno = ENOMEM;
- goto unwind;
- }
-
- if (priv->mandatory
- && pl_inode->mandatory
- && !truncate_allowed (pl_inode, frame->root->trans,
- frame->root->pid, local->offset)) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ if (priv->mandatory
+ && pl_inode->mandatory
+ && !truncate_allowed (pl_inode, frame->root->client,
+ frame->root->pid, &frame->root->lk_owner,
+ local->offset)) {
op_ret = -1;
- op_errno = EAGAIN;
- goto unwind;
- }
-
- switch (local->op) {
- case TRUNCATE:
- STACK_WIND (frame, pl_truncate_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->truncate,
- &local->loc, local->offset);
- break;
- case FTRUNCATE:
- STACK_WIND (frame, pl_truncate_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->ftruncate,
- local->fd, local->offset);
- break;
- }
-
- return 0;
+ op_errno = EAGAIN;
+ goto unwind;
+ }
-unwind:
- if (local->op == TRUNCATE)
- loc_wipe (&local->loc);
+ switch (local->op) {
+ case TRUNCATE:
+ STACK_WIND (frame, pl_truncate_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->truncate,
+ &local->loc, local->offset, local->xdata);
+ break;
+ case FTRUNCATE:
+ STACK_WIND (frame, pl_truncate_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->ftruncate,
+ local->fd, local->offset, local->xdata);
+ break;
+ }
+
+ return 0;
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
+unwind:
+ gf_log (this->name, GF_LOG_ERROR, "truncate failed with ret: %d, "
+ "error: %s", op_ret, strerror (op_errno));
+ if (local->op == TRUNCATE)
+ loc_wipe (&local->loc);
+ if (local->xdata)
+ dict_unref (local->xdata);
+ if (local->fd)
+ fd_unref (local->fd);
+
+ STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, buf, NULL, xdata);
+ return 0;
}
int
pl_truncate (call_frame_t *frame, xlator_t *this,
- loc_t *loc, off_t offset)
+ loc_t *loc, off_t offset, dict_t *xdata)
{
- struct _truncate_ops *local = NULL;
+ pl_local_t *local = NULL;
- local = CALLOC (1, sizeof (struct _truncate_ops));
- if (!local) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto unwind;
- }
+ local = mem_get0 (this->local_pool);
+ GF_VALIDATE_OR_GOTO (this->name, local, unwind);
- local->op = TRUNCATE;
- local->offset = offset;
- loc_copy (&local->loc, loc);
+ local->op = TRUNCATE;
+ local->offset = offset;
+ loc_copy (&local->loc, loc);
+ if (xdata)
+ local->xdata = dict_ref (xdata);
- frame->local = local;
+ frame->local = local;
- STACK_WIND (frame, truncate_stat_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->stat, loc);
+ STACK_WIND (frame, truncate_stat_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->stat, loc, NULL);
- return 0;
+ return 0;
unwind:
- STACK_UNWIND (frame, -1, ENOMEM, NULL);
+ gf_log (this->name, GF_LOG_ERROR, "truncate for %s failed with ret: %d, "
+ "error: %s", loc->path, -1, strerror (ENOMEM));
+ STACK_UNWIND_STRICT (truncate, frame, -1, ENOMEM, NULL, NULL, NULL);
- return 0;
+ return 0;
}
int
pl_ftruncate (call_frame_t *frame, xlator_t *this,
- fd_t *fd, off_t offset)
+ fd_t *fd, off_t offset, dict_t *xdata)
{
- struct _truncate_ops *local = NULL;
+ pl_local_t *local = NULL;
- local = CALLOC (1, sizeof (struct _truncate_ops));
- if (!local) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto unwind;
- }
+ local = mem_get0 (this->local_pool);
+ GF_VALIDATE_OR_GOTO (this->name, local, unwind);
- local->op = FTRUNCATE;
- local->offset = offset;
- local->fd = fd;
+ local->op = FTRUNCATE;
+ local->offset = offset;
+ local->fd = fd_ref (fd);
+ if (xdata)
+ local->xdata = dict_ref (xdata);
- frame->local = local;
+ frame->local = local;
- STACK_WIND (frame, truncate_stat_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat, fd);
- return 0;
+ STACK_WIND (frame, truncate_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
+ return 0;
unwind:
- STACK_UNWIND (frame, -1, ENOMEM, NULL);
+ gf_log (this->name, GF_LOG_ERROR, "ftruncate failed with ret: %d, "
+ "error: %s", -1, strerror (ENOMEM));
+ STACK_UNWIND_STRICT (ftruncate, frame, -1, ENOMEM, NULL, NULL, NULL);
- return 0;
+ return 0;
+}
+
+int
+pl_locks_by_fd (pl_inode_t *pl_inode, fd_t *fd)
+{
+ posix_lock_t *l = NULL;
+ int found = 0;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+
+ list_for_each_entry (l, &pl_inode->ext_list, list) {
+ if ((l->fd_num == fd_to_fdnum(fd))) {
+ found = 1;
+ break;
+ }
+ }
+
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+ return found;
}
+static void
+delete_locks_of_fd (xlator_t *this, pl_inode_t *pl_inode, fd_t *fd)
+{
+ posix_lock_t *tmp = NULL;
+ posix_lock_t *l = NULL;
+
+ struct list_head blocked_list;
+
+ INIT_LIST_HEAD (&blocked_list);
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+
+ list_for_each_entry_safe (l, tmp, &pl_inode->ext_list, list) {
+ if ((l->fd_num == fd_to_fdnum(fd))) {
+ if (l->blocked) {
+ list_move_tail (&l->list, &blocked_list);
+ continue;
+ }
+ __delete_lock (pl_inode, l);
+ __destroy_lock (l);
+ }
+ }
+
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ list_for_each_entry_safe (l, tmp, &blocked_list, list) {
+ list_del_init(&l->list);
+ STACK_UNWIND_STRICT (lk, l->frame, -1, EAGAIN, &l->user_flock,
+ NULL);
+ __destroy_lock (l);
+ }
+
+ grant_blocked_locks (this, pl_inode);
+
+ do_blocked_rw (pl_inode);
+
+}
static void
__delete_locks_of_owner (pl_inode_t *pl_inode,
- transport_t *transport, pid_t pid)
+ client_t *client, gf_lkowner_t *owner)
+{
+ posix_lock_t *tmp = NULL;
+ posix_lock_t *l = NULL;
+
+ /* TODO: what if it is a blocked lock with pending l->frame */
+
+ list_for_each_entry_safe (l, tmp, &pl_inode->ext_list, list) {
+ if (l->blocked)
+ continue;
+ if ((l->client == client) &&
+ is_same_lkowner (&l->owner, owner)) {
+ gf_log ("posix-locks", GF_LOG_TRACE,
+ " Flushing lock"
+ "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" state: %s",
+ l->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ l->client_pid,
+ lkowner_utoa (&l->owner),
+ l->user_flock.l_start,
+ l->user_flock.l_len,
+ l->blocked == 1 ? "Blocked" : "Active");
+
+ __delete_lock (pl_inode, l);
+ __destroy_lock (l);
+ }
+ }
+
+ return;
+}
+
+
+int32_t
+pl_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+
+}
+
+int32_t
+pl_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ int32_t op_errno = EINVAL;
+ int op_ret = -1;
+ int32_t bcount = 0;
+ int32_t gcount = 0;
+ char key[PATH_MAX] = {0, };
+ char *lk_summary = NULL;
+ pl_inode_t *pl_inode = NULL;
+ dict_t *dict = NULL;
+ clrlk_args args = {0,};
+ char *brickname = NULL;
+
+ if (!name)
+ goto usual;
+
+ if (strncmp (name, GF_XATTR_CLRLK_CMD, strlen (GF_XATTR_CLRLK_CMD)))
+ goto usual;
+
+ if (clrlk_parse_args (name, &args)) {
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ dict = dict_new ();
+ if (!dict) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ pl_inode = pl_inode_get (this, loc->inode);
+ if (!pl_inode) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ switch (args.type) {
+ case CLRLK_INODE:
+ case CLRLK_ENTRY:
+ op_ret = clrlk_clear_lks_in_all_domains (this, pl_inode,
+ &args, &bcount,
+ &gcount,
+ &op_errno);
+ if (op_ret)
+ goto out;
+ break;
+ case CLRLK_POSIX:
+ op_ret = clrlk_clear_posixlk (this, pl_inode, &args,
+ &bcount, &gcount,
+ &op_errno);
+ if (op_ret)
+ goto out;
+ break;
+ case CLRLK_TYPE_MAX:
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ op_ret = fetch_pathinfo (this, loc->inode, &op_errno, &brickname);
+ if (op_ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Couldn't get brickname");
+ } else {
+ op_ret = format_brickname(brickname);
+ if (op_ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Couldn't format brickname");
+ GF_FREE(brickname);
+ brickname = NULL;
+ }
+ }
+
+ if (!gcount && !bcount) {
+ if (gf_asprintf (&lk_summary, "No locks cleared.") == -1) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+ } else if (gf_asprintf (&lk_summary, "%s: %s blocked locks=%d "
+ "granted locks=%d",
+ (brickname == NULL)? this->name : brickname,
+ (args.type == CLRLK_INODE)? "inode":
+ (args.type == CLRLK_ENTRY)? "entry":
+ (args.type == CLRLK_POSIX)? "posix": " ",
+ bcount, gcount) == -1) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ strncpy (key, name, strlen (name));
+ if (dict_set_dynstr (dict, key, lk_summary)) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ op_ret = 0;
+out:
+ GF_FREE(brickname);
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata);
+
+ GF_FREE (args.opts);
+ if (op_ret && lk_summary)
+ GF_FREE (lk_summary);
+ if (dict)
+ dict_unref (dict);
+ return 0;
+
+usual:
+ STACK_WIND (frame, pl_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
+ return 0;
+}
+
+static int
+format_brickname(char *brickname)
+{
+ int ret = -1;
+ char *hostname = NULL;
+ char *volume = NULL;
+ char *saveptr = NULL;
+
+ if (!brickname)
+ goto out;
+
+ strtok_r(brickname, ":", &saveptr);
+ hostname = gf_strdup(strtok_r(NULL, ":", &saveptr));
+ if (hostname == NULL)
+ goto out;
+ volume = gf_strdup(strtok_r(NULL, ".", &saveptr));
+ if (volume == NULL)
+ goto out;
+
+ sprintf(brickname, "%s:%s", hostname, volume);
+
+ ret = 0;
+out:
+ GF_FREE(hostname);
+ GF_FREE(volume);
+ return ret;
+}
+
+static int
+fetch_pathinfo (xlator_t *this, inode_t *inode, int32_t *op_errno,
+ char **brickname)
+{
+ int ret = -1;
+ loc_t loc = {0, };
+ dict_t *dict = NULL;
+
+ if (!brickname)
+ goto out;
+
+ if (!op_errno)
+ goto out;
+
+ uuid_copy (loc.gfid, inode->gfid);
+ loc.inode = inode_ref (inode);
+
+ ret = syncop_getxattr (FIRST_CHILD(this), &loc, &dict,
+ GF_XATTR_PATHINFO_KEY);
+ if (ret < 0) {
+ *op_errno = errno;
+ goto out;
+ }
+
+ ret = dict_get_str (dict, GF_XATTR_PATHINFO_KEY, brickname);
+ if (ret)
+ goto out;
+
+ *brickname = gf_strdup(*brickname);
+ if (*brickname == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (dict != NULL) {
+ dict_unref (dict);
+ }
+ loc_wipe(&loc);
+
+ return ret;
+}
+
+
+int
+pl_lockinfo_get_brickname (xlator_t *this, inode_t *inode, int32_t *op_errno)
+{
+ int ret = -1;
+ posix_locks_private_t *priv = NULL;
+ char *brickname = NULL;
+ char *end = NULL;
+ char *tmp = NULL;
+
+ priv = this->private;
+
+ ret = fetch_pathinfo (this, inode, op_errno, &brickname);
+ if (ret)
+ goto out;
+
+ end = strrchr (brickname, ':');
+ if (!end) {
+ GF_FREE(brickname);
+ ret = -1;
+ goto out;
+ }
+
+ tmp = brickname;
+ brickname = gf_strndup (brickname, (end - brickname));
+ if (brickname == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ priv->brickname = brickname;
+ ret = 0;
+out:
+ GF_FREE(tmp);
+ return ret;
+}
+
+char *
+pl_lockinfo_key (xlator_t *this, inode_t *inode, int32_t *op_errno)
+{
+ posix_locks_private_t *priv = NULL;
+ char *key = NULL;
+ int ret = 0;
+
+ priv = this->private;
+
+ if (priv->brickname == NULL) {
+ ret = pl_lockinfo_get_brickname (this, inode, op_errno);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "cannot get brickname");
+ goto out;
+ }
+ }
+
+ key = priv->brickname;
+out:
+ return key;
+}
+
+int32_t
+pl_fgetxattr_handle_lockinfo (xlator_t *this, fd_t *fd,
+ dict_t *dict, int32_t *op_errno)
+{
+ pl_inode_t *pl_inode = NULL;
+ char *key = NULL, *buf = NULL;
+ int32_t op_ret = 0;
+ unsigned long fdnum = 0, len = 0;
+ dict_t *tmp = NULL;
+
+ pl_inode = pl_inode_get (this, fd->inode);
+
+ if (!pl_inode) {
+ gf_log (this->name, GF_LOG_DEBUG, "Could not get inode.");
+ *op_errno = EBADFD;
+ op_ret = -1;
+ goto out;
+ }
+
+ if (!pl_locks_by_fd (pl_inode, fd)) {
+ op_ret = 0;
+ goto out;
+ }
+
+ fdnum = fd_to_fdnum (fd);
+
+ key = pl_lockinfo_key (this, fd->inode, op_errno);
+ if (key == NULL) {
+ op_ret = -1;
+ goto out;
+ }
+
+ tmp = dict_new ();
+ if (tmp == NULL) {
+ op_ret = -1;
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ op_ret = dict_set_uint64 (tmp, key, fdnum);
+ if (op_ret < 0) {
+ *op_errno = -op_ret;
+ op_ret = -1;
+ gf_log (this->name, GF_LOG_WARNING, "setting lockinfo value "
+ "(%lu) for fd (ptr:%p inode-gfid:%s) failed (%s)",
+ fdnum, fd, uuid_utoa (fd->inode->gfid),
+ strerror (*op_errno));
+ goto out;
+ }
+
+ len = dict_serialized_length (tmp);
+ if (len < 0) {
+ *op_errno = -op_ret;
+ op_ret = -1;
+ gf_log (this->name, GF_LOG_WARNING,
+ "dict_serialized_length failed (%s) while handling "
+ "lockinfo for fd (ptr:%p inode-gfid:%s)",
+ strerror (*op_errno), fd, uuid_utoa (fd->inode->gfid));
+ goto out;
+ }
+
+ buf = GF_CALLOC (1, len, gf_common_mt_char);
+ if (buf == NULL) {
+ op_ret = -1;
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ op_ret = dict_serialize (tmp, buf);
+ if (op_ret < 0) {
+ *op_errno = -op_ret;
+ op_ret = -1;
+ gf_log (this->name, GF_LOG_WARNING,
+ "dict_serialize failed (%s) while handling lockinfo "
+ "for fd (ptr: %p inode-gfid:%s)", strerror (*op_errno),
+ fd, uuid_utoa (fd->inode->gfid));
+ goto out;
+ }
+
+ op_ret = dict_set_dynptr (dict, GF_XATTR_LOCKINFO_KEY, buf, len);
+ if (op_ret < 0) {
+ *op_errno = -op_ret;
+ op_ret = -1;
+ gf_log (this->name, GF_LOG_WARNING, "setting lockinfo value "
+ "(%lu) for fd (ptr:%p inode-gfid:%s) failed (%s)",
+ fdnum, fd, uuid_utoa (fd->inode->gfid),
+ strerror (*op_errno));
+ goto out;
+ }
+
+ buf = NULL;
+out:
+ if (tmp != NULL) {
+ dict_unref (tmp);
+ }
+
+ if (buf != NULL) {
+ GF_FREE (buf);
+ }
+
+ return op_ret;
+}
+
+
+int32_t
+pl_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ int32_t op_ret = 0, op_errno = 0;
+ dict_t *dict = NULL;
+
+ if (!name) {
+ goto usual;
+ }
+
+ if (strcmp (name, GF_XATTR_LOCKINFO_KEY) == 0) {
+ dict = dict_new ();
+ if (dict == NULL) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ op_ret = pl_fgetxattr_handle_lockinfo (this, fd, dict,
+ &op_errno);
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "getting lockinfo on fd (ptr:%p inode-gfid:%s) "
+ "failed (%s)", fd, uuid_utoa (fd->inode->gfid),
+ strerror (op_errno));
+ }
+
+ goto unwind;
+ } else {
+ goto usual;
+ }
+
+unwind:
+ STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict, NULL);
+ if (dict != NULL) {
+ dict_unref (dict);
+ }
+
+ return 0;
+
+usual:
+ STACK_WIND (frame, default_fgetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata);
+ return 0;
+}
+
+int32_t
+pl_migrate_locks (call_frame_t *frame, fd_t *newfd, uint64_t oldfd_num,
+ int32_t *op_errno)
+{
+ pl_inode_t *pl_inode = NULL;
+ uint64_t newfd_num = 0;
+ posix_lock_t *l = NULL;
+ int32_t op_ret = 0;
+
+ newfd_num = fd_to_fdnum (newfd);
+
+ pl_inode = pl_inode_get (frame->this, newfd->inode);
+ if (pl_inode == NULL) {
+ op_ret = -1;
+ *op_errno = EBADFD;
+ goto out;
+ }
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry (l, &pl_inode->ext_list, list) {
+ if (l->fd_num == oldfd_num) {
+ l->fd_num = newfd_num;
+ l->client = frame->root->client;
+ }
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ op_ret = 0;
+out:
+ return op_ret;
+}
+
+int32_t
+pl_fsetxattr_handle_lockinfo (call_frame_t *frame, fd_t *fd, char *lockinfo_buf,
+ int len, int32_t *op_errno)
{
- posix_lock_t *tmp = NULL;
- posix_lock_t *l = NULL;
+ int32_t op_ret = -1;
+ dict_t *lockinfo = NULL;
+ uint64_t oldfd_num = 0;
+ char *key = NULL;
+
+ lockinfo = dict_new ();
+ if (lockinfo == NULL) {
+ op_ret = -1;
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ op_ret = dict_unserialize (lockinfo_buf, len, &lockinfo);
+ if (op_ret < 0) {
+ *op_errno = -op_ret;
+ op_ret = -1;
+ goto out;
+ }
+
+ key = pl_lockinfo_key (frame->this, fd->inode, op_errno);
+ if (key == NULL) {
+ op_ret = -1;
+ goto out;
+ }
+
+ op_ret = dict_get_uint64 (lockinfo, key, &oldfd_num);
+
+ if (oldfd_num == 0) {
+ op_ret = 0;
+ goto out;
+ }
+
+ op_ret = pl_migrate_locks (frame, fd, oldfd_num, op_errno);
+ if (op_ret < 0) {
+ gf_log (frame->this->name, GF_LOG_WARNING,
+ "migration of locks from oldfd (ptr:%p) to newfd "
+ "(ptr:%p) (inode-gfid:%s)", (void *)oldfd_num, fd,
+ uuid_utoa (fd->inode->gfid));
+ goto out;
+ }
+
+out:
+ dict_unref (lockinfo);
+
+ return op_ret;
+}
+
+int32_t
+pl_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ int32_t op_ret = 0, op_errno = 0;
+ void *lockinfo_buf = NULL;
+ int len = 0;
+
+ op_ret = dict_get_ptr_and_len (dict, GF_XATTR_LOCKINFO_KEY,
+ &lockinfo_buf, &len);
+ if (lockinfo_buf == NULL) {
+ goto usual;
+ }
+
+ op_ret = pl_fsetxattr_handle_lockinfo (frame, fd, lockinfo_buf, len,
+ &op_errno);
+ if (op_ret < 0) {
+ goto unwind;
+ }
+
+usual:
+ STACK_WIND (frame, default_fsetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
+ return 0;
- /* TODO: what if it is a blocked lock with pending l->frame */
+unwind:
+ STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, NULL);
+ return 0;
+}
- list_for_each_entry_safe (l, tmp, &pl_inode->ext_list, list) {
- if ((l->transport == transport)
- && (l->client_pid == pid)) {
- __delete_lock (pl_inode, l);
- __destroy_lock (l);
- }
- }
+int32_t
+pl_opendir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd, dict_t *xdata)
+{
+ pl_fdctx_t *fdctx = NULL;
- list_for_each_entry_safe (l, tmp, &pl_inode->int_list, list) {
- if ((l->transport == transport)
- && (l->client_pid == pid)) {
- __delete_lock (pl_inode, l);
- __destroy_lock (l);
- }
- }
+ if (op_ret < 0)
+ goto unwind;
- return;
+ fdctx = pl_check_n_create_fdctx (this, fd);
+ if (!fdctx) {
+ op_errno = ENOMEM;
+ op_ret = -1;
+ goto unwind;
+ }
+
+unwind:
+ STACK_UNWIND_STRICT (opendir,
+ frame,
+ op_ret,
+ op_errno,
+ fd, xdata);
+ return 0;
}
+int32_t
+pl_opendir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, fd_t *fd, dict_t *xdata)
+{
+ STACK_WIND (frame,
+ pl_opendir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->opendir,
+ loc, fd, xdata);
+ return 0;
+
+}
int
pl_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, xdata);
- return 0;
+ return 0;
}
int
pl_flush (call_frame_t *frame, xlator_t *this,
- fd_t *fd)
+ fd_t *fd, dict_t *xdata)
{
- posix_locks_private_t *priv = NULL;
- pl_inode_t *pl_inode = NULL;
+ pl_inode_t *pl_inode = NULL;
- priv = this->private;
+ pl_inode = pl_inode_get (this, fd->inode);
- pl_inode = pl_inode_get (this, fd->inode);
+ if (!pl_inode) {
+ gf_log (this->name, GF_LOG_DEBUG, "Could not get inode.");
+ STACK_UNWIND_STRICT (flush, frame, -1, EBADFD, NULL);
+ return 0;
+ }
- if (!pl_inode) {
- gf_log (this->name, GF_LOG_DEBUG, "Could not get inode.");
- STACK_UNWIND (frame, -1, EBADFD);
- return 0;
- }
+ pl_trace_flush (this, frame, fd);
- pthread_mutex_lock (&pl_inode->mutex);
- {
- __delete_locks_of_owner (pl_inode, frame->root->trans,
- frame->root->pid);
- }
- pthread_mutex_unlock (&pl_inode->mutex);
+ if (frame->root->lk_owner.len == 0) {
+ /* Handle special case when protocol/server sets lk-owner to zero.
+ * This usually happens due to a client disconnection. Hence, free
+ * all locks opened with this fd.
+ */
+ gf_log (this->name, GF_LOG_TRACE,
+ "Releasing all locks with fd %p", fd);
+ delete_locks_of_fd (this, pl_inode, fd);
+ goto wind;
- grant_blocked_locks (this, pl_inode, GF_LOCK_POSIX);
- grant_blocked_locks (this, pl_inode, GF_LOCK_INTERNAL);
+ }
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ __delete_locks_of_owner (pl_inode, frame->root->client,
+ &frame->root->lk_owner);
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
- do_blocked_rw (pl_inode);
+ grant_blocked_locks (this, pl_inode);
- STACK_WIND (frame, pl_flush_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush, fd);
- return 0;
+ do_blocked_rw (pl_inode);
+
+wind:
+ STACK_WIND (frame, pl_flush_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush, fd, xdata);
+ return 0;
}
int
pl_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, fd);
+ pl_fdctx_t *fdctx = NULL;
- return 0;
+ if (op_ret < 0)
+ goto unwind;
+
+ fdctx = pl_check_n_create_fdctx (this, fd);
+ if (!fdctx) {
+ op_errno = ENOMEM;
+ op_ret = -1;
+ goto unwind;
+ }
+
+unwind:
+ STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
+
+ return 0;
}
int
-pl_open (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, fd_t *fd)
+pl_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
{
- /* why isn't O_TRUNC being handled ? */
- STACK_WIND (frame, pl_open_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->open,
- loc, flags & ~O_TRUNC, fd);
+ STACK_WIND (frame, pl_open_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->open,
+ loc, flags, fd, xdata);
- return 0;
+ return 0;
}
int
pl_create_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- fd_t *fd, inode_t *inode, struct stat *buf)
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ fd_t *fd, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf);
+ pl_fdctx_t *fdctx = NULL;
+
+ if (op_ret < 0)
+ goto unwind;
- return 0;
+ fdctx = pl_check_n_create_fdctx (this, fd);
+ if (!fdctx) {
+ op_errno = ENOMEM;
+ op_ret = -1;
+ goto unwind;
+ }
+
+unwind:
+ STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
+
+ return 0;
}
int
pl_create (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode, fd_t *fd)
+ loc_t *loc, int32_t flags, mode_t mode, mode_t umask, fd_t *fd,
+ dict_t *xdata)
{
- STACK_WIND (frame, pl_create_cbk,
- FIRST_CHILD (this), FIRST_CHILD (this)->fops->create,
- loc, flags, mode, fd);
- return 0;
+ STACK_WIND (frame, pl_create_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->create,
+ loc, flags, mode, umask, fd, xdata);
+ return 0;
}
int
pl_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iovec *vector, int32_t count, struct stat *stbuf,
- struct iobref *iobref)
+ int32_t op_ret, int32_t op_errno,
+ struct iovec *vector, int32_t count, struct iatt *stbuf,
+ struct iobref *iobref, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf, iobref);
+ STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno,
+ vector, count, stbuf, iobref, xdata);
- return 0;
+ return 0;
}
int
-pl_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *stbuf)
+pl_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, stbuf);
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
- return 0;
+ return 0;
}
void
do_blocked_rw (pl_inode_t *pl_inode)
{
- struct list_head wind_list;
- pl_rw_req_t *rw = NULL;
- pl_rw_req_t *tmp = NULL;
-
- INIT_LIST_HEAD (&wind_list);
-
- pthread_mutex_lock (&pl_inode->mutex);
- {
- list_for_each_entry_safe (rw, tmp, &pl_inode->rw_list, list) {
- if (__rw_allowable (pl_inode, &rw->region,
- rw->stub->fop)) {
- list_del_init (&rw->list);
- list_add_tail (&rw->list, &wind_list);
- }
- }
- }
- pthread_mutex_unlock (&pl_inode->mutex);
+ struct list_head wind_list;
+ pl_rw_req_t *rw = NULL;
+ pl_rw_req_t *tmp = NULL;
+
+ INIT_LIST_HEAD (&wind_list);
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ list_for_each_entry_safe (rw, tmp, &pl_inode->rw_list, list) {
+ if (__rw_allowable (pl_inode, &rw->region,
+ rw->stub->fop)) {
+ list_del_init (&rw->list);
+ list_add_tail (&rw->list, &wind_list);
+ }
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
- list_for_each_entry_safe (rw, tmp, &wind_list, list) {
- list_del_init (&rw->list);
- call_resume (rw->stub);
- free (rw);
- }
+ list_for_each_entry_safe (rw, tmp, &wind_list, list) {
+ list_del_init (&rw->list);
+ call_resume (rw->stub);
+ GF_FREE (rw);
+ }
- return;
+ return;
}
static int
__rw_allowable (pl_inode_t *pl_inode, posix_lock_t *region,
- glusterfs_fop_t op)
+ glusterfs_fop_t op)
{
- posix_lock_t *l = NULL;
- int ret = 1;
-
- list_for_each_entry (l, &pl_inode->ext_list, list) {
- if (locks_overlap (l, region) && !same_owner (l, region)) {
- if ((op == GF_FOP_READ) && (l->fl_type != F_WRLCK))
- continue;
- ret = 0;
- break;
- }
- }
+ posix_lock_t *l = NULL;
+ int ret = 1;
+
+ list_for_each_entry (l, &pl_inode->ext_list, list) {
+ if (locks_overlap (l, region) && !same_owner (l, region)) {
+ if ((op == GF_FOP_READ) && (l->fl_type != F_WRLCK))
+ continue;
+ ret = 0;
+ break;
+ }
+ }
- return ret;
+ return ret;
}
int
-pl_readv_cont (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset)
+pl_readv_cont (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
{
- STACK_WIND (frame, pl_readv_cbk,
- FIRST_CHILD (this), FIRST_CHILD (this)->fops->readv,
- fd, size, offset);
+ STACK_WIND (frame, pl_readv_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->readv,
+ fd, size, offset, flags, xdata);
- return 0;
+ return 0;
}
int
pl_readv (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset)
-{
- posix_locks_private_t *priv = NULL;
- pl_inode_t *pl_inode = NULL;
- pl_rw_req_t *rw = NULL;
- posix_lock_t region = {.list = {0, }, };
- int op_ret = 0;
- int op_errno = 0;
- char wind_needed = 1;
-
-
- priv = this->private;
- pl_inode = pl_inode_get (this, fd->inode);
-
- if (priv->mandatory && pl_inode->mandatory) {
- region.fl_start = offset;
- region.fl_end = offset + size - 1;
- region.transport = frame->root->trans;
- region.client_pid = frame->root->pid;
-
- pthread_mutex_lock (&pl_inode->mutex);
- {
- wind_needed = __rw_allowable (pl_inode, &region,
+ fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata)
+{
+ posix_locks_private_t *priv = NULL;
+ pl_inode_t *pl_inode = NULL;
+ pl_rw_req_t *rw = NULL;
+ posix_lock_t region = {.list = {0, }, };
+ int op_ret = 0;
+ int op_errno = 0;
+ char wind_needed = 1;
+
+
+ priv = this->private;
+ pl_inode = pl_inode_get (this, fd->inode);
+
+ if (priv->mandatory && pl_inode->mandatory) {
+ region.fl_start = offset;
+ region.fl_end = offset + size - 1;
+ region.client = frame->root->client;
+ region.fd_num = fd_to_fdnum(fd);
+ region.client_pid = frame->root->pid;
+ region.owner = frame->root->lk_owner;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ wind_needed = __rw_allowable (pl_inode, &region,
GF_FOP_READ);
- if (wind_needed) {
- goto unlock;
+ if (wind_needed) {
+ goto unlock;
+ }
+
+ if (fd->flags & O_NONBLOCK) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "returning EAGAIN as fd is O_NONBLOCK");
+ op_errno = EAGAIN;
+ op_ret = -1;
+ goto unlock;
+ }
+
+ rw = GF_CALLOC (1, sizeof (*rw),
+ gf_locks_mt_pl_rw_req_t);
+ if (!rw) {
+ op_errno = ENOMEM;
+ op_ret = -1;
+ goto unlock;
+ }
+
+ rw->stub = fop_readv_stub (frame, pl_readv_cont,
+ fd, size, offset, flags,
+ xdata);
+ if (!rw->stub) {
+ op_errno = ENOMEM;
+ op_ret = -1;
+ GF_FREE (rw);
+ goto unlock;
}
- if (fd->flags & O_NONBLOCK) {
- gf_log (this->name, GF_LOG_TRACE,
- "returning EAGAIN as fd is O_NONBLOCK");
- op_errno = EAGAIN;
- op_ret = -1;
- goto unlock;
- }
-
- rw = CALLOC (1, sizeof (*rw));
- if (!rw) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- op_errno = ENOMEM;
- op_ret = -1;
- goto unlock;
- }
-
- rw->stub = fop_readv_stub (frame, pl_readv_cont,
- fd, size, offset);
- if (!rw->stub) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- op_errno = ENOMEM;
- op_ret = -1;
- free (rw);
- goto unlock;
- }
-
- rw->region = region;
-
- list_add_tail (&rw->list, &pl_inode->rw_list);
- }
- unlock:
- pthread_mutex_unlock (&pl_inode->mutex);
- }
+ rw->region = region;
+
+ list_add_tail (&rw->list, &pl_inode->rw_list);
+ }
+ unlock:
+ pthread_mutex_unlock (&pl_inode->mutex);
+ }
if (wind_needed) {
- STACK_WIND (frame, pl_readv_cbk,
+ STACK_WIND (frame, pl_readv_cbk,
FIRST_CHILD (this), FIRST_CHILD (this)->fops->readv,
- fd, size, offset);
+ fd, size, offset, flags, xdata);
}
- if (op_ret == -1)
- STACK_UNWIND (frame, -1, op_errno, NULL, 0, NULL, NULL);
+ if (op_ret == -1)
+ STACK_UNWIND_STRICT (readv, frame, -1, op_errno,
+ NULL, 0, NULL, NULL, NULL);
- return 0;
+ return 0;
}
int
pl_writev_cont (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int count, off_t offset,
- struct iobref *iobref)
+ struct iovec *vector, int count, off_t offset,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
- STACK_WIND (frame, pl_writev_cbk,
- FIRST_CHILD (this), FIRST_CHILD (this)->fops->writev,
- fd, vector, count, offset, iobref);
+ STACK_WIND (frame, pl_writev_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->writev,
+ fd, vector, count, offset, flags, iobref, xdata);
- return 0;
+ return 0;
}
int
pl_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int32_t count, off_t offset,
- struct iobref *iobref)
-{
- posix_locks_private_t *priv = NULL;
- pl_inode_t *pl_inode = NULL;
- pl_rw_req_t *rw = NULL;
- posix_lock_t region = {.list = {0, }, };
- int op_ret = 0;
- int op_errno = 0;
- char wind_needed = 1;
-
-
- priv = this->private;
- pl_inode = pl_inode_get (this, fd->inode);
-
- if (priv->mandatory && pl_inode->mandatory) {
- region.fl_start = offset;
- region.fl_end = offset + iov_length (vector, count) - 1;
- region.transport = frame->root->trans;
- region.client_pid = frame->root->pid;
-
- pthread_mutex_lock (&pl_inode->mutex);
- {
- wind_needed = __rw_allowable (pl_inode, &region,
+ struct iovec *vector, int32_t count, off_t offset,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
+{
+ posix_locks_private_t *priv = NULL;
+ pl_inode_t *pl_inode = NULL;
+ pl_rw_req_t *rw = NULL;
+ posix_lock_t region = {.list = {0, }, };
+ int op_ret = 0;
+ int op_errno = 0;
+ char wind_needed = 1;
+
+ priv = this->private;
+ pl_inode = pl_inode_get (this, fd->inode);
+
+ if (priv->mandatory && pl_inode->mandatory) {
+ region.fl_start = offset;
+ region.fl_end = offset + iov_length (vector, count) - 1;
+ region.client = frame->root->client;
+ region.fd_num = fd_to_fdnum(fd);
+ region.client_pid = frame->root->pid;
+ region.owner = frame->root->lk_owner;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ wind_needed = __rw_allowable (pl_inode, &region,
GF_FOP_WRITE);
- if (wind_needed)
- goto unlock;
+ if (wind_needed)
+ goto unlock;
- if (fd->flags & O_NONBLOCK) {
- gf_log (this->name, GF_LOG_TRACE,
- "returning EAGAIN because fd is "
+ if (fd->flags & O_NONBLOCK) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "returning EAGAIN because fd is "
"O_NONBLOCK");
- op_errno = EAGAIN;
- op_ret = -1;
- goto unlock;
- }
-
- rw = CALLOC (1, sizeof (*rw));
- if (!rw) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- op_errno = ENOMEM;
- op_ret = -1;
- goto unlock;
- }
-
- rw->stub = fop_writev_stub (frame, pl_writev_cont,
- fd, vector, count, offset,
- iobref);
- if (!rw->stub) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- op_errno = ENOMEM;
- op_ret = -1;
- free (rw);
- goto unlock;
- }
-
- rw->region = region;
-
- list_add_tail (&rw->list, &pl_inode->rw_list);
- }
- unlock:
- pthread_mutex_unlock (&pl_inode->mutex);
- }
+ op_errno = EAGAIN;
+ op_ret = -1;
+ goto unlock;
+ }
+
+ rw = GF_CALLOC (1, sizeof (*rw),
+ gf_locks_mt_pl_rw_req_t);
+ if (!rw) {
+ op_errno = ENOMEM;
+ op_ret = -1;
+ goto unlock;
+ }
+
+ rw->stub = fop_writev_stub (frame, pl_writev_cont,
+ fd, vector, count, offset,
+ flags, iobref, xdata);
+ if (!rw->stub) {
+ op_errno = ENOMEM;
+ op_ret = -1;
+ GF_FREE (rw);
+ goto unlock;
+ }
+
+ rw->region = region;
+
+ list_add_tail (&rw->list, &pl_inode->rw_list);
+ }
+ unlock:
+ pthread_mutex_unlock (&pl_inode->mutex);
+ }
if (wind_needed)
- STACK_WIND (frame, pl_writev_cbk,
+ STACK_WIND (frame, pl_writev_cbk,
FIRST_CHILD (this), FIRST_CHILD (this)->fops->writev,
- fd, vector, count, offset, iobref);
+ fd, vector, count, offset, flags, iobref, xdata);
+
+ if (op_ret == -1)
+ STACK_UNWIND_STRICT (writev, frame, -1, op_errno, NULL, NULL,
+ NULL);
+
+ return 0;
+}
+
+static int
+__fd_has_locks (pl_inode_t *pl_inode, fd_t *fd)
+{
+ int found = 0;
+ posix_lock_t *l = NULL;
+
+ list_for_each_entry (l, &pl_inode->ext_list, list) {
+ if ((l->fd_num == fd_to_fdnum(fd))) {
+ found = 1;
+ break;
+ }
+ }
+
+ return found;
+}
+
+static posix_lock_t *
+lock_dup (posix_lock_t *lock)
+{
+ posix_lock_t *new_lock = NULL;
- if (op_ret == -1)
- STACK_UNWIND (frame, -1, op_errno, NULL, 0, NULL);
+ new_lock = new_posix_lock (&lock->user_flock, lock->client,
+ lock->client_pid, &lock->owner,
+ (fd_t *)lock->fd_num);
+ return new_lock;
+}
+
+static int
+__dup_locks_to_fdctx (pl_inode_t *pl_inode, fd_t *fd,
+ pl_fdctx_t *fdctx)
+{
+ posix_lock_t *l = NULL;
+ posix_lock_t *duplock = NULL;
+ int ret = 0;
+
+ list_for_each_entry (l, &pl_inode->ext_list, list) {
+ if ((l->fd_num == fd_to_fdnum(fd))) {
+ duplock = lock_dup (l);
+ if (!duplock) {
+ ret = -1;
+ break;
+ }
+
+ list_add_tail (&duplock->list, &fdctx->locks_list);
+ }
+ }
+
+ return ret;
+}
+
+static int
+__copy_locks_to_fdctx (pl_inode_t *pl_inode, fd_t *fd,
+ pl_fdctx_t *fdctx)
+{
+ int ret = 0;
+
+ ret = __dup_locks_to_fdctx (pl_inode, fd, fdctx);
+ if (ret)
+ goto out;
+
+out:
+ return ret;
- return 0;
}
+static void
+pl_mark_eol_lock (posix_lock_t *lock)
+{
+ lock->user_flock.l_type = GF_LK_EOL;
+ return;
+}
+
+static posix_lock_t *
+__get_next_fdctx_lock (pl_fdctx_t *fdctx)
+{
+ posix_lock_t *lock = NULL;
+
+ GF_ASSERT (fdctx);
+
+ if (list_empty (&fdctx->locks_list)) {
+ gf_log (THIS->name, GF_LOG_DEBUG,
+ "fdctx lock list empty");
+ goto out;
+ }
+
+ lock = list_entry (fdctx->locks_list.next, typeof (*lock),
+ list);
+
+ GF_ASSERT (lock);
+
+ list_del_init (&lock->list);
+
+out:
+ return lock;
+}
+
+static int
+__set_next_lock_fd (pl_fdctx_t *fdctx, posix_lock_t *reqlock)
+{
+ posix_lock_t *lock = NULL;
+ int ret = 0;
+
+ GF_ASSERT (fdctx);
+
+ lock = __get_next_fdctx_lock (fdctx);
+ if (!lock) {
+ gf_log (THIS->name, GF_LOG_DEBUG,
+ "marking EOL in reqlock");
+ pl_mark_eol_lock (reqlock);
+ goto out;
+ }
+
+ reqlock->user_flock = lock->user_flock;
+ reqlock->fl_start = lock->fl_start;
+ reqlock->fl_type = lock->fl_type;
+ reqlock->fl_end = lock->fl_end;
+ reqlock->owner = lock->owner;
+
+out:
+ if (lock)
+ __destroy_lock (lock);
+
+ return ret;
+}
+
+static int
+pl_getlk_fd (xlator_t *this, pl_inode_t *pl_inode,
+ fd_t *fd, posix_lock_t *reqlock)
+{
+ uint64_t tmp = 0;
+ pl_fdctx_t *fdctx = NULL;
+ int ret = 0;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ if (!__fd_has_locks (pl_inode, fd)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "fd=%p has no active locks", fd);
+ ret = 0;
+ goto unlock;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "There are active locks on fd");
+
+ ret = fd_ctx_get (fd, this, &tmp);
+ fdctx = (pl_fdctx_t *)(long) tmp;
+
+ if (list_empty (&fdctx->locks_list)) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "no fdctx -> copying all locks on fd");
+
+ ret = __copy_locks_to_fdctx (pl_inode, fd, fdctx);
+ if (ret) {
+ goto unlock;
+ }
+
+ ret = __set_next_lock_fd (fdctx, reqlock);
+
+ } else {
+ gf_log (this->name, GF_LOG_TRACE,
+ "fdctx present -> returning the next lock");
+ ret = __set_next_lock_fd (fdctx, reqlock);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "could not get next lock of fd");
+ goto unlock;
+ }
+ }
+ }
+
+unlock:
+ pthread_mutex_unlock (&pl_inode->mutex);
+ return ret;
+
+}
int
pl_lk (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t cmd, struct flock *flock)
-{
- transport_t *transport = NULL;
- pid_t client_pid = 0;
- posix_locks_private_t *priv = NULL;
- pl_inode_t *pl_inode = NULL;
- int op_ret = 0;
- int op_errno = 0;
- int can_block = 0;
- posix_lock_t *reqlock = NULL;
- posix_lock_t *conf = NULL;
- int ret = 0;
-
- transport = frame->root->trans;
- client_pid = frame->root->pid;
- priv = this->private;
-
- pl_inode = pl_inode_get (this, fd->inode);
- if (!pl_inode) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- op_ret = -1;
- op_errno = ENOMEM;
- goto unwind;
- }
-
- reqlock = new_posix_lock (flock, transport, client_pid);
- if (!reqlock) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- op_ret = -1;
- op_errno = ENOMEM;
- goto unwind;
- }
-
- switch (cmd) {
+ fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
+{
+ pl_inode_t *pl_inode = NULL;
+ int op_ret = 0;
+ int op_errno = 0;
+ int can_block = 0;
+ posix_lock_t *reqlock = NULL;
+ posix_lock_t *conf = NULL;
+ int ret = 0;
+
+ if ((flock->l_start < 0) || (flock->l_len < 0)) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto unwind;
+ }
+
+ pl_inode = pl_inode_get (this, fd->inode);
+ if (!pl_inode) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ reqlock = new_posix_lock (flock, frame->root->client, frame->root->pid,
+ &frame->root->lk_owner, fd);
+
+ if (!reqlock) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ pl_trace_in (this, frame, fd, NULL, cmd, flock, NULL);
+
+ switch (cmd) {
+
+ case F_RESLK_LCKW:
+ can_block = 1;
+
+ /* fall through */
+ case F_RESLK_LCK:
+ memcpy (&reqlock->user_flock, flock, sizeof (struct gf_flock));
+ reqlock->frame = frame;
+ reqlock->this = this;
+
+ ret = pl_reserve_setlk (this, pl_inode, reqlock,
+ can_block);
+ if (ret < 0) {
+ if (can_block)
+ goto out;
+
+ op_ret = -1;
+ op_errno = -ret;
+ __destroy_lock (reqlock);
+ goto unwind;
+ }
+ /* Finally a getlk and return the call */
+ conf = pl_getlk (pl_inode, reqlock);
+ if (conf)
+ posix_lock_to_flock (conf, flock);
+ break;
+
+ case F_RESLK_UNLCK:
+ reqlock->frame = frame;
+ reqlock->this = this;
+ ret = pl_reserve_unlock (this, pl_inode, reqlock);
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = -ret;
+ }
+ __destroy_lock (reqlock);
+ goto unwind;
+
+ break;
+
+ case F_GETLK_FD:
+ reqlock->frame = frame;
+ reqlock->this = this;
+ ret = pl_verify_reservelk (this, pl_inode, reqlock, can_block);
+ GF_ASSERT (ret >= 0);
+
+ ret = pl_getlk_fd (this, pl_inode, fd, reqlock);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "getting locks on fd failed");
+ op_ret = -1;
+ op_errno = ENOLCK;
+ goto unwind;
+ }
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "Replying with a lock on fd for healing");
+
+ posix_lock_to_flock (reqlock, flock);
+ __destroy_lock (reqlock);
+
+ break;
#if F_GETLK != F_GETLK64
- case F_GETLK64:
+ case F_GETLK64:
#endif
- case F_GETLK:
- conf = pl_getlk (pl_inode, reqlock, GF_LOCK_POSIX);
- posix_lock_to_flock (conf, flock);
- __destroy_lock (reqlock);
+ case F_GETLK:
+ conf = pl_getlk (pl_inode, reqlock);
+ posix_lock_to_flock (conf, flock);
+ __destroy_lock (reqlock);
- break;
+ break;
#if F_SETLKW != F_SETLKW64
- case F_SETLKW64:
+ case F_SETLKW64:
#endif
- case F_SETLKW:
- can_block = 1;
- reqlock->frame = frame;
- reqlock->this = this;
- reqlock->fd = fd;
+ case F_SETLKW:
+ can_block = 1;
+ reqlock->frame = frame;
+ reqlock->this = this;
- /* fall through */
+ /* fall through */
#if F_SETLK != F_SETLK64
- case F_SETLK64:
+ case F_SETLK64:
#endif
- case F_SETLK:
- memcpy (&reqlock->user_flock, flock, sizeof (struct flock));
- ret = pl_setlk (this, pl_inode, reqlock,
- can_block, GF_LOCK_POSIX);
-
- if (ret == -1) {
- if (can_block)
- goto out;
-
- gf_log (this->name, GF_LOG_DEBUG, "returning EAGAIN");
- op_ret = -1;
- op_errno = EAGAIN;
- __destroy_lock (reqlock);
- }
- }
+ case F_SETLK:
+ memcpy (&reqlock->user_flock, flock, sizeof (struct gf_flock));
+ ret = pl_verify_reservelk (this, pl_inode, reqlock, can_block);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "Lock blocked due to conflicting reserve lock");
+ goto out;
+ }
+ ret = pl_setlk (this, pl_inode, reqlock,
+ can_block);
+
+ if (ret == -1) {
+ if ((can_block) && (F_UNLCK != flock->l_type)) {
+ pl_trace_block (this, frame, fd, NULL, cmd, flock, NULL);
+ goto out;
+ }
+ gf_log (this->name, GF_LOG_DEBUG, "returning EAGAIN");
+ op_ret = -1;
+ op_errno = EAGAIN;
+ __destroy_lock (reqlock);
+
+ } else if ((0 == ret) && (F_UNLCK == flock->l_type)) {
+ /* For NLM's last "unlock on fd" detection */
+ if (pl_locks_by_fd (pl_inode, fd))
+ flock->l_type = F_RDLCK;
+ else
+ flock->l_type = F_UNLCK;
+ }
+ }
unwind:
- STACK_UNWIND (frame, op_ret, op_errno, flock);
+ pl_trace_out (this, frame, fd, NULL, cmd, flock, op_ret, op_errno, NULL);
+ pl_update_refkeeper (this, fd->inode);
+
+
+ STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, flock, xdata);
out:
- return 0;
+ return 0;
}
/* TODO: this function just logs, no action required?? */
int
pl_forget (xlator_t *this,
- inode_t *inode)
+ inode_t *inode)
{
- pl_inode_t *pl_inode = NULL;
-
+ pl_inode_t *pl_inode = NULL;
+
posix_lock_t *ext_tmp = NULL;
posix_lock_t *ext_l = NULL;
+ struct list_head posixlks_released;
- posix_lock_t *int_tmp = NULL;
- posix_lock_t *int_l = NULL;
+ pl_inode_lock_t *ino_tmp = NULL;
+ pl_inode_lock_t *ino_l = NULL;
+ struct list_head inodelks_released;
pl_rw_req_t *rw_tmp = NULL;
pl_rw_req_t *rw_req = NULL;
pl_entry_lock_t *entry_tmp = NULL;
pl_entry_lock_t *entry_l = NULL;
+ struct list_head entrylks_released;
- pl_inode = pl_inode_get (this, inode);
+ pl_dom_list_t *dom = NULL;
+ pl_dom_list_t *dom_tmp = NULL;
- if (!list_empty (&pl_inode->rw_list)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Pending R/W requests found, releasing.");
-
- list_for_each_entry_safe (rw_req, rw_tmp, &pl_inode->rw_list,
- list) {
-
- list_del (&rw_req->list);
- FREE (rw_req);
+ INIT_LIST_HEAD (&posixlks_released);
+ INIT_LIST_HEAD (&inodelks_released);
+ INIT_LIST_HEAD (&entrylks_released);
+
+ pl_inode = pl_inode_get (this, inode);
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+
+ if (!list_empty (&pl_inode->rw_list)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Pending R/W requests found, releasing.");
+
+ list_for_each_entry_safe (rw_req, rw_tmp, &pl_inode->rw_list,
+ list) {
+
+ list_del (&rw_req->list);
+ GF_FREE (rw_req);
+ }
}
- }
- if (!list_empty (&pl_inode->ext_list)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Pending fcntl locks found, releasing.");
+ if (!list_empty (&pl_inode->ext_list)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Pending fcntl locks found, releasing.");
- list_for_each_entry_safe (ext_l, ext_tmp, &pl_inode->ext_list,
- list) {
-
- __delete_lock (pl_inode, ext_l);
- __destroy_lock (ext_l);
+ list_for_each_entry_safe (ext_l, ext_tmp, &pl_inode->ext_list,
+ list) {
+
+ __delete_lock (pl_inode, ext_l);
+ if (ext_l->blocked) {
+ list_add_tail (&ext_l->list, &posixlks_released);
+ continue;
+ }
+ __destroy_lock (ext_l);
+ }
}
- }
- if (!list_empty (&pl_inode->int_list)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Pending inode locks found, releasing.");
- list_for_each_entry_safe (int_l, int_tmp, &pl_inode->int_list,
- list) {
-
- __delete_lock (pl_inode, int_l);
- __destroy_lock (int_l);
+ list_for_each_entry_safe (dom, dom_tmp, &pl_inode->dom_list, inode_list) {
+
+ if (!list_empty (&dom->inodelk_list)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Pending inode locks found, releasing.");
+
+ list_for_each_entry_safe (ino_l, ino_tmp, &dom->inodelk_list, list) {
+ __delete_inode_lock (ino_l);
+ __pl_inodelk_unref (ino_l);
+ }
+
+ list_splice_init (&dom->blocked_inodelks, &inodelks_released);
+
+
+ }
+ if (!list_empty (&dom->entrylk_list)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Pending entry locks found, releasing.");
+
+ list_for_each_entry_safe (entry_l, entry_tmp, &dom->entrylk_list, domain_list) {
+ list_del_init (&entry_l->domain_list);
+
+ GF_FREE ((char *)entry_l->basename);
+ GF_FREE (entry_l->connection_id);
+ GF_FREE (entry_l);
+ }
+
+ list_splice_init (&dom->blocked_entrylks, &entrylks_released);
+ }
+
+ list_del (&dom->inode_list);
+ gf_log ("posix-locks", GF_LOG_TRACE,
+ " Cleaning up domain: %s", dom->domain);
+ GF_FREE ((char *)(dom->domain));
+ GF_FREE (dom);
}
- }
- if (!list_empty (&pl_inode->dir_list)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Pending entry locks found, releasing.");
-
- list_for_each_entry_safe (entry_l, entry_tmp,
- &pl_inode->dir_list, inode_list) {
-
- list_del (&entry_l->inode_list);
- FREE (entry_l);
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ list_for_each_entry_safe (ext_l, ext_tmp, &posixlks_released, list) {
+
+ STACK_UNWIND_STRICT (lk, ext_l->frame, -1, 0,
+ &ext_l->user_flock, NULL);
+ __destroy_lock (ext_l);
+ }
+
+ list_for_each_entry_safe (ino_l, ino_tmp, &inodelks_released, blocked_locks) {
+
+ STACK_UNWIND_STRICT (inodelk, ino_l->frame, -1, 0, NULL);
+ __pl_inodelk_unref (ino_l);
+ }
+
+ list_for_each_entry_safe (entry_l, entry_tmp, &entrylks_released, blocked_locks) {
+
+ STACK_UNWIND_STRICT (entrylk, entry_l->frame, -1, 0, NULL);
+ GF_FREE ((char *)entry_l->basename);
+ GF_FREE (entry_l->connection_id);
+ GF_FREE (entry_l);
+
+ }
+
+ GF_FREE (pl_inode);
+
+ return 0;
+}
+
+int
+pl_release (xlator_t *this, fd_t *fd)
+{
+ pl_inode_t *pl_inode = NULL;
+ uint64_t tmp_pl_inode = 0;
+ int ret = -1;
+ uint64_t tmp = 0;
+ pl_fdctx_t *fdctx = NULL;
+
+ if (fd == NULL) {
+ goto out;
+ }
+
+ ret = inode_ctx_get (fd->inode, this, &tmp_pl_inode);
+ if (ret != 0)
+ goto out;
+
+ pl_inode = (pl_inode_t *)(long)tmp_pl_inode;
+
+ pl_trace_release (this, fd);
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "Releasing all locks with fd %p", fd);
+
+ delete_locks_of_fd (this, pl_inode, fd);
+ pl_update_refkeeper (this, fd->inode);
+
+ ret = fd_ctx_del (fd, this, &tmp);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Could not get fdctx");
+ goto out;
+ }
+
+ fdctx = (pl_fdctx_t *)(long)tmp;
+
+ GF_FREE (fdctx);
+out:
+ return ret;
+}
+
+int
+pl_releasedir (xlator_t *this, fd_t *fd)
+{
+ int ret = -1;
+ uint64_t tmp = 0;
+ pl_fdctx_t *fdctx = NULL;
+
+ if (fd == NULL) {
+ goto out;
+ }
+
+ ret = fd_ctx_del (fd, this, &tmp);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Could not get fdctx");
+ goto out;
+ }
+
+ fdctx = (pl_fdctx_t *)(long)tmp;
+
+ GF_FREE (fdctx);
+out:
+ return ret;
+}
+
+int32_t
+__get_posixlk_count (xlator_t *this, pl_inode_t *pl_inode)
+{
+ posix_lock_t *lock = NULL;
+ int32_t count = 0;
+
+ list_for_each_entry (lock, &pl_inode->ext_list, list) {
+
+ count++;
+ }
+
+ return count;
+}
+
+int32_t
+get_posixlk_count (xlator_t *this, inode_t *inode)
+{
+ pl_inode_t *pl_inode = NULL;
+ uint64_t tmp_pl_inode = 0;
+ int ret = 0;
+ int32_t count = 0;
+
+ ret = inode_ctx_get (inode, this, &tmp_pl_inode);
+ if (ret != 0) {
+ goto out;
+ }
+
+ pl_inode = (pl_inode_t *)(long) tmp_pl_inode;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ count =__get_posixlk_count (this, pl_inode);
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+out:
+ return count;
+}
+
+void
+pl_parent_entrylk_xattr_fill (xlator_t *this, inode_t *parent,
+ char *basename, dict_t *dict)
+{
+ uint32_t entrylk = 0;
+ int ret = -1;
+
+ if (!parent || !basename || !strlen (basename))
+ goto out;
+ entrylk = check_entrylk_on_basename (this, parent, basename);
+out:
+ ret = dict_set_uint32 (dict, GLUSTERFS_PARENT_ENTRYLK, entrylk);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ " dict_set failed on key %s", GLUSTERFS_PARENT_ENTRYLK);
+ }
+}
+
+void
+pl_entrylk_xattr_fill (xlator_t *this, inode_t *inode,
+ dict_t *dict)
+{
+ int32_t count = 0;
+ int ret = -1;
+
+ count = get_entrylk_count (this, inode);
+ ret = dict_set_int32 (dict, GLUSTERFS_ENTRYLK_COUNT, count);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ " dict_set failed on key %s", GLUSTERFS_ENTRYLK_COUNT);
+ }
+
+}
+
+void
+pl_inodelk_xattr_fill (xlator_t *this, inode_t *inode, dict_t *dict,
+ gf_boolean_t per_dom)
+{
+ int32_t count = 0;
+ int ret = -1;
+ char *domname = NULL;
+
+
+ if (per_dom){
+ ret = dict_get_str (dict, GLUSTERFS_INODELK_DOM_COUNT,
+ &domname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get "
+ "value for key %s",GLUSTERFS_INODELK_DOM_COUNT);
+ goto out;
}
- }
+ }
+
+ count = get_inodelk_count (this, inode, domname);
- FREE (pl_inode);
+ ret = dict_set_int32 (dict, GLUSTERFS_INODELK_COUNT, count);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG, "Failed to set count for "
+ "key %s", GLUSTERFS_INODELK_COUNT);
+ }
- return 0;
+out:
+ return;
}
+void
+pl_posixlk_xattr_fill (xlator_t *this, inode_t *inode,
+ dict_t *dict)
+{
+ int32_t count = 0;
+ int ret = -1;
+
+ count = get_posixlk_count (this, inode);
+ ret = dict_set_int32 (dict, GLUSTERFS_POSIXLK_COUNT, count);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ " dict_set failed on key %s", GLUSTERFS_POSIXLK_COUNT);
+ }
+
+}
+
+int32_t
+pl_lookup_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct iatt *buf,
+ dict_t *xdata,
+ struct iatt *postparent)
+{
+ pl_local_t *local = NULL;
+
+ GF_VALIDATE_OR_GOTO (this->name, frame->local, out);
+
+ if (op_ret)
+ goto out;
+
+ local = frame->local;
+
+ if (local->parent_entrylk_req)
+ pl_parent_entrylk_xattr_fill (this, local->loc.parent,
+ (char*)local->loc.name, xdata);
+ if (local->entrylk_count_req)
+ pl_entrylk_xattr_fill (this, inode, xdata);
+ if (local->inodelk_count_req)
+ pl_inodelk_xattr_fill (this, inode, xdata, _gf_false);
+ if (local->inodelk_dom_count_req)
+ pl_inodelk_xattr_fill (this, inode, xdata, _gf_true);
+ if (local->posixlk_count_req)
+ pl_posixlk_xattr_fill (this, inode, xdata);
+
+
+out:
+ local = frame->local;
+ frame->local = NULL;
+
+ if (local != NULL) {
+ loc_wipe (&local->loc);
+ mem_put (local);
+ }
+
+ STACK_UNWIND_STRICT (
+ lookup,
+ frame,
+ op_ret,
+ op_errno,
+ inode,
+ buf,
+ xdata,
+ postparent);
+ return 0;
+}
+
+int32_t
+pl_lookup (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *xdata)
+{
+ pl_local_t *local = NULL;
+ int ret = -1;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (loc, out);
+
+ local = mem_get0 (this->local_pool);
+ GF_VALIDATE_OR_GOTO (this->name, local, out);
+
+ if (xdata) {
+ if (dict_get (xdata, GLUSTERFS_ENTRYLK_COUNT))
+ local->entrylk_count_req = 1;
+ if (dict_get (xdata, GLUSTERFS_INODELK_COUNT))
+ local->inodelk_count_req = 1;
+ if (dict_get (xdata, GLUSTERFS_INODELK_DOM_COUNT))
+ local->inodelk_dom_count_req = 1;
+ if (dict_get (xdata, GLUSTERFS_POSIXLK_COUNT))
+ local->posixlk_count_req = 1;
+ if (dict_get (xdata, GLUSTERFS_PARENT_ENTRYLK))
+ local->parent_entrylk_req = 1;
+ }
+
+ frame->local = local;
+ loc_copy (&local->loc, loc);
+
+ STACK_WIND (frame,
+ pl_lookup_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup,
+ loc, xdata);
+ ret = 0;
+out:
+ if (ret == -1)
+ STACK_UNWIND_STRICT (lookup, frame, -1, 0, NULL,
+ NULL, NULL, NULL);
+
+ return 0;
+}
+int
+pl_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *entries, dict_t *xdata)
+{
+ pl_local_t *local = NULL;
+ gf_dirent_t *entry = NULL;
+
+ local = frame->local;
+
+ if (op_ret <= 0)
+ goto unwind;
+
+ list_for_each_entry (entry, &entries->list, list) {
+ if (local->entrylk_count_req)
+ pl_entrylk_xattr_fill (this, entry->inode, entry->dict);
+ if (local->inodelk_count_req)
+ pl_inodelk_xattr_fill (this, entry->inode, entry->dict,
+ _gf_false);
+ if (local->inodelk_dom_count_req)
+ pl_inodelk_xattr_fill (this, entry->inode, entry->dict,
+ _gf_true);
+ if (local->posixlk_count_req)
+ pl_posixlk_xattr_fill (this, entry->inode, entry->dict);
+ }
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata);
+
+ if (local)
+ mem_put (local);
+
+ return 0;
+}
+
+int
+pl_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *dict)
+{
+ pl_local_t *local = NULL;
+
+ local = mem_get0 (this->local_pool);
+ GF_VALIDATE_OR_GOTO (this->name, local, out);
+
+ if (dict) {
+ if (dict_get (dict, GLUSTERFS_ENTRYLK_COUNT))
+ local->entrylk_count_req = 1;
+ if (dict_get (dict, GLUSTERFS_INODELK_COUNT))
+ local->inodelk_count_req = 1;
+ if (dict_get (dict, GLUSTERFS_INODELK_DOM_COUNT))
+ local->inodelk_dom_count_req = 1;
+ if (dict_get (dict, GLUSTERFS_POSIXLK_COUNT))
+ local->posixlk_count_req = 1;
+ }
+
+ frame->local = local;
+
+ STACK_WIND (frame, pl_readdirp_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp,
+ fd, size, offset, dict);
+
+ return 0;
+out:
+ STACK_UNWIND_STRICT (readdirp, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+}
+
+
+void
+pl_dump_lock (char *str, int size, struct gf_flock *flock,
+ gf_lkowner_t *owner, void *trans, char *conn_id,
+ time_t *granted_time, time_t *blkd_time, gf_boolean_t active)
+{
+ char *type_str = NULL;
+ char granted[32] = {0,};
+ char blocked[32] = {0,};
+
+ switch (flock->l_type) {
+ case F_RDLCK:
+ type_str = "READ";
+ break;
+ case F_WRLCK:
+ type_str = "WRITE";
+ break;
+ case F_UNLCK:
+ type_str = "UNLOCK";
+ break;
+ default:
+ type_str = "UNKNOWN";
+ break;
+ }
+
+ if (active) {
+ if (blkd_time && *blkd_time == 0) {
+ snprintf (str, size, RANGE_GRNTD_FMT,
+ type_str, flock->l_whence,
+ (unsigned long long) flock->l_start,
+ (unsigned long long) flock->l_len,
+ (unsigned long long) flock->l_pid,
+ lkowner_utoa (owner), trans, conn_id,
+ ctime_r (granted_time, granted));
+ } else {
+ snprintf (str, size, RANGE_BLKD_GRNTD_FMT,
+ type_str, flock->l_whence,
+ (unsigned long long) flock->l_start,
+ (unsigned long long) flock->l_len,
+ (unsigned long long) flock->l_pid,
+ lkowner_utoa (owner), trans, conn_id,
+ ctime_r (blkd_time, blocked),
+ ctime_r (granted_time, granted));
+ }
+ }
+ else {
+ snprintf (str, size, RANGE_BLKD_FMT,
+ type_str, flock->l_whence,
+ (unsigned long long) flock->l_start,
+ (unsigned long long) flock->l_len,
+ (unsigned long long) flock->l_pid,
+ lkowner_utoa (owner), trans, conn_id,
+ ctime_r (blkd_time, blocked));
+ }
+
+}
+
+void
+__dump_entrylks (pl_inode_t *pl_inode)
+{
+ pl_dom_list_t *dom = NULL;
+ pl_entry_lock_t *lock = NULL;
+ char blocked[32] = {0,};
+ char granted[32] = {0,};
+ int count = 0;
+ char key[GF_DUMP_MAX_BUF_LEN] = {0,};
+
+ char tmp[256];
+
+ list_for_each_entry (dom, &pl_inode->dom_list, inode_list) {
+
+ count = 0;
+
+ gf_proc_dump_build_key(key,
+ "lock-dump.domain",
+ "domain");
+ gf_proc_dump_write(key, "%s", dom->domain);
+
+ list_for_each_entry (lock, &dom->entrylk_list, domain_list) {
+
+ gf_proc_dump_build_key(key,
+ "xlator.feature.locks.lock-dump.domain.entrylk",
+ "entrylk[%d](ACTIVE)", count );
+ if (lock->blkd_time.tv_sec == 0 && lock->blkd_time.tv_usec == 0) {
+ snprintf (tmp, 256, ENTRY_GRNTD_FMT,
+ lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" :
+ "ENTRYLK_WRLCK", lock->basename,
+ (unsigned long long) lock->client_pid,
+ lkowner_utoa (&lock->owner), lock->trans,
+ lock->connection_id,
+ ctime_r (&lock->granted_time.tv_sec, granted));
+ } else {
+ snprintf (tmp, 256, ENTRY_BLKD_GRNTD_FMT,
+ lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" :
+ "ENTRYLK_WRLCK", lock->basename,
+ (unsigned long long) lock->client_pid,
+ lkowner_utoa (&lock->owner), lock->trans,
+ lock->connection_id,
+ ctime_r (&lock->blkd_time.tv_sec, blocked),
+ ctime_r (&lock->granted_time.tv_sec, granted));
+ }
+
+ gf_proc_dump_write(key, tmp);
+
+ count++;
+ }
+
+ list_for_each_entry (lock, &dom->blocked_entrylks, blocked_locks) {
+
+ gf_proc_dump_build_key(key,
+ "xlator.feature.locks.lock-dump.domain.entrylk",
+ "entrylk[%d](BLOCKED)", count );
+ snprintf (tmp, 256, ENTRY_BLKD_FMT,
+ lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" :
+ "ENTRYLK_WRLCK", lock->basename,
+ (unsigned long long) lock->client_pid,
+ lkowner_utoa (&lock->owner), lock->trans,
+ lock->connection_id,
+ ctime_r (&lock->blkd_time.tv_sec, blocked));
+
+ gf_proc_dump_write(key, tmp);
+
+ count++;
+ }
+
+ }
+
+}
+
+void
+dump_entrylks (pl_inode_t *pl_inode)
+{
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ __dump_entrylks (pl_inode);
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+}
+
+void
+__dump_inodelks (pl_inode_t *pl_inode)
+{
+ pl_dom_list_t *dom = NULL;
+ pl_inode_lock_t *lock = NULL;
+ int count = 0;
+ char key[GF_DUMP_MAX_BUF_LEN];
+
+ char tmp[256];
+
+ list_for_each_entry (dom, &pl_inode->dom_list, inode_list) {
+
+ count = 0;
+
+ gf_proc_dump_build_key(key,
+ "lock-dump.domain",
+ "domain");
+ gf_proc_dump_write(key, "%s", dom->domain);
+
+ list_for_each_entry (lock, &dom->inodelk_list, list) {
+
+ gf_proc_dump_build_key(key,
+ "inodelk",
+ "inodelk[%d](ACTIVE)",count );
+
+ SET_FLOCK_PID (&lock->user_flock, lock);
+ pl_dump_lock (tmp, 256, &lock->user_flock,
+ &lock->owner,
+ lock->client, lock->connection_id,
+ &lock->granted_time.tv_sec,
+ &lock->blkd_time.tv_sec,
+ _gf_true);
+ gf_proc_dump_write(key, tmp);
+
+ count++;
+ }
+
+ list_for_each_entry (lock, &dom->blocked_inodelks, blocked_locks) {
+
+ gf_proc_dump_build_key(key,
+ "inodelk",
+ "inodelk[%d](BLOCKED)",count );
+ SET_FLOCK_PID (&lock->user_flock, lock);
+ pl_dump_lock (tmp, 256, &lock->user_flock,
+ &lock->owner,
+ lock->client, lock->connection_id,
+ 0, &lock->blkd_time.tv_sec,
+ _gf_false);
+ gf_proc_dump_write(key, tmp);
+
+ count++;
+ }
+
+ }
+
+}
+
+void
+dump_inodelks (pl_inode_t *pl_inode)
+{
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ __dump_inodelks (pl_inode);
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+}
+
+void
+__dump_posixlks (pl_inode_t *pl_inode)
+{
+ posix_lock_t *lock = NULL;
+ int count = 0;
+ char key[GF_DUMP_MAX_BUF_LEN];
+
+ char tmp[256];
+
+ list_for_each_entry (lock, &pl_inode->ext_list, list) {
+
+ SET_FLOCK_PID (&lock->user_flock, lock);
+ gf_proc_dump_build_key(key,
+ "posixlk",
+ "posixlk[%d](%s)",
+ count,
+ lock->blocked ? "BLOCKED" : "ACTIVE");
+ pl_dump_lock (tmp, 256, &lock->user_flock,
+ &lock->owner, lock->client, NULL,
+ &lock->granted_time.tv_sec, &lock->blkd_time.tv_sec,
+ (lock->blocked)? _gf_false: _gf_true);
+ gf_proc_dump_write(key, tmp);
+
+ count++;
+ }
+}
+
+void
+dump_posixlks (pl_inode_t *pl_inode)
+{
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ __dump_posixlks (pl_inode);
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+}
int32_t
pl_dump_inode_priv (xlator_t *this, inode_t *inode)
@@ -776,145 +2408,344 @@ pl_dump_inode_priv (xlator_t *this, inode_t *inode)
int ret = -1;
uint64_t tmp_pl_inode = 0;
pl_inode_t *pl_inode = NULL;
- char key[GF_DUMP_MAX_BUF_LEN];
+ char *pathname = NULL;
+ gf_boolean_t section_added = _gf_false;
- if (!inode)
- return -1;
+ int count = 0;
- ret = inode_ctx_get (inode, this, &tmp_pl_inode);
+ if (!inode) {
+ errno = EINVAL;
+ goto out;
+ }
+
+ ret = TRY_LOCK (&inode->lock);
+ if (ret)
+ goto out;
+ {
+ ret = __inode_ctx_get (inode, this, &tmp_pl_inode);
+ if (ret)
+ goto unlock;
+ }
+unlock:
+ UNLOCK (&inode->lock);
+ if (ret)
+ goto out;
- if (ret != 0)
- return ret;
-
pl_inode = (pl_inode_t *)(long)tmp_pl_inode;
+ if (!pl_inode) {
+ ret = -1;
+ goto out;
+ }
- if (!pl_inode)
- return -1;
+ gf_proc_dump_add_section("xlator.features.locks.%s.inode", this->name);
+ section_added = _gf_true;
+
+ /*We are safe to call __inode_path since we have the
+ * inode->table->lock */
+ __inode_path (inode, NULL, &pathname);
+ if (pathname)
+ gf_proc_dump_write ("path", "%s", pathname);
+
+ gf_proc_dump_write("mandatory", "%d", pl_inode->mandatory);
+
+ ret = pthread_mutex_trylock (&pl_inode->mutex);
+ if (ret)
+ goto out;
+ {
+ count = __get_entrylk_count (this, pl_inode);
+ if (count) {
+ gf_proc_dump_write("entrylk-count", "%d", count);
+ __dump_entrylks (pl_inode);
+ }
- gf_proc_dump_build_key(key,
- "xlator.feature.locks.inode",
- "%ld.mandatory",inode->ino);
- gf_proc_dump_write(key, "%d", pl_inode->mandatory);
+ count = __get_inodelk_count (this, pl_inode, NULL);
+ if (count) {
+ gf_proc_dump_write("inodelk-count", "%d", count);
+ __dump_inodelks (pl_inode);
+ }
- return 0;
+ count = __get_posixlk_count (this, pl_inode);
+ if (count) {
+ gf_proc_dump_write("posixlk-count", "%d", count);
+ __dump_posixlks (pl_inode);
+ }
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+out:
+ GF_FREE (pathname);
+
+ if (ret && inode) {
+ if (!section_added)
+ gf_proc_dump_add_section ("xlator.features.locks.%s."
+ "inode", this->name);
+ gf_proc_dump_write ("Unable to print lock state", "(Lock "
+ "acquisition failure) %s",
+ uuid_utoa (inode->gfid));
+ }
+ return ret;
}
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+ if (!this)
+ return ret;
-/*
- * pl_dump_inode - inode dump function for posix locks
- *
- */
-int
-pl_dump_inode (xlator_t *this)
+ ret = xlator_mem_acct_init (this, gf_locks_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
+ "failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+
+pl_ctx_t*
+pl_ctx_get (client_t *client, xlator_t *xlator)
{
+ void *tmp = NULL;
+ pl_ctx_t *ctx = NULL;
+
+ client_ctx_get (client, xlator, &tmp);
+
+ ctx = tmp;
- assert(this);
-
- if (this->itable) {
- inode_table_dump(this->itable,
- "xlator.features.locks.inode_table");
+ if (ctx != NULL)
+ goto out;
+
+ ctx = GF_CALLOC (1, sizeof (pl_ctx_t), gf_locks_mt_posix_lock_t);
+
+ if (ctx == NULL)
+ goto out;
+
+ ctx->ltable = pl_lock_table_new();
+
+ if (ctx->ltable == NULL) {
+ GF_FREE (ctx);
+ ctx = NULL;
+ goto out;
}
- return 0;
+ LOCK_INIT (&ctx->ltable_lock);
+
+ if (client_ctx_set (client, xlator, ctx) != 0) {
+ LOCK_DESTROY (&ctx->ltable_lock);
+ GF_FREE (ctx->ltable);
+ GF_FREE (ctx);
+ ctx = NULL;
+ }
+out:
+ return ctx;
}
+static void
+ltable_delete_locks (struct _lock_table *ltable)
+{
+ struct _locker *locker = NULL;
+ struct _locker *tmp = NULL;
+
+ list_for_each_entry_safe (locker, tmp, &ltable->inodelk_lockers, lockers) {
+ if (locker->fd)
+ pl_del_locker (ltable, locker->volume, &locker->loc,
+ locker->fd, &locker->owner,
+ GF_FOP_INODELK);
+ GF_FREE (locker->volume);
+ GF_FREE (locker);
+ }
+
+ list_for_each_entry_safe (locker, tmp, &ltable->entrylk_lockers, lockers) {
+ if (locker->fd)
+ pl_del_locker (ltable, locker->volume, &locker->loc,
+ locker->fd, &locker->owner,
+ GF_FOP_ENTRYLK);
+ GF_FREE (locker->volume);
+ GF_FREE (locker);
+ }
+ GF_FREE (ltable);
+}
+
+
+static int32_t
+destroy_cbk (xlator_t *this, client_t *client)
+{
+ void *tmp = NULL;
+ pl_ctx_t *locks_ctx = NULL;
+
+ client_ctx_del (client, this, &tmp);
+
+ if (tmp == NULL)
+ return 0
+;
+ locks_ctx = tmp;
+ if (locks_ctx->ltable)
+ ltable_delete_locks (locks_ctx->ltable);
+
+ LOCK_DESTROY (&locks_ctx->ltable_lock);
+ GF_FREE (locks_ctx);
+
+ return 0;
+}
+
+
+static int32_t
+disconnect_cbk (xlator_t *this, client_t *client)
+{
+ int32_t ret = 0;
+ pl_ctx_t *locks_ctx = NULL;
+ struct _lock_table *ltable = NULL;
+
+ locks_ctx = pl_ctx_get (client, this);
+ if (locks_ctx == NULL) {
+ gf_log (this->name, GF_LOG_INFO, "pl_ctx_get() failed");
+ goto out;
+ }
+
+ LOCK (&locks_ctx->ltable_lock);
+ {
+ if (locks_ctx->ltable) {
+ ltable = locks_ctx->ltable;
+ locks_ctx->ltable = pl_lock_table_new ();
+ }
+ }
+ UNLOCK (&locks_ctx->ltable_lock);
+
+ if (ltable)
+ ltable_delete_locks (ltable);
+
+out:
+ return ret;
+}
int
init (xlator_t *this)
{
- posix_locks_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
- data_t *mandatory = NULL;
-
- if (!this->children || this->children->next) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "FATAL: posix-locks should have exactly one child");
- return -1;
- }
+ posix_locks_private_t *priv = NULL;
+ xlator_list_t *trav = NULL;
+ data_t *mandatory = NULL;
+ data_t *trace = NULL;
+ int ret = -1;
+
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "FATAL: posix-locks should have exactly one child");
+ goto out;
+ }
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "Volume is dangling. Please check the volume file.");
- }
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Volume is dangling. Please check the volume file.");
+ }
- trav = this->children;
- while (trav->xlator->children)
- trav = trav->xlator->children;
+ trav = this->children;
+ while (trav->xlator->children)
+ trav = trav->xlator->children;
- if (strncmp ("storage/", trav->xlator->type, 8)) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "'locks' translator is not loaded over a storage "
+ if (strncmp ("storage/", trav->xlator->type, 8)) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "'locks' translator is not loaded over a storage "
"translator");
- return -1;
- }
+ goto out;
+ }
- priv = CALLOC (1, sizeof (*priv));
+ priv = GF_CALLOC (1, sizeof (*priv),
+ gf_locks_mt_posix_locks_private_t);
+
+ mandatory = dict_get (this->options, "mandatory-locks");
+ if (mandatory)
+ gf_log (this->name, GF_LOG_WARNING,
+ "mandatory locks not supported in this minor release.");
+
+ trace = dict_get (this->options, "trace");
+ if (trace) {
+ if (gf_string2boolean (trace->data,
+ &priv->trace) == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "'trace' takes on only boolean values.");
+ goto out;
+ }
+ }
- mandatory = dict_get (this->options, "mandatory-locks");
- if (mandatory) {
- if (gf_string2boolean (mandatory->data,
- &priv->mandatory) == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "'mandatory-locks' takes on only boolean "
- "values.");
- return -1;
- }
- }
+ this->local_pool = mem_pool_new (pl_local_t, 32);
+ if (!this->local_pool) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
+ goto out;
+ }
- this->private = priv;
- return 0;
+ this->private = priv;
+ ret = 0;
+
+out:
+ if (ret) {
+ GF_FREE (priv);
+ }
+ return ret;
}
int
fini (xlator_t *this)
{
- posix_locks_private_t *priv = NULL;
+ posix_locks_private_t *priv = NULL;
- priv = this->private;
- free (priv);
+ priv = this->private;
+ if (!priv)
+ return 0;
+ this->private = NULL;
+ GF_FREE (priv->brickname);
+ GF_FREE (priv);
- return 0;
+ return 0;
}
int
-pl_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd, struct flock *flock);
+pl_inodelk (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *flock,
+ dict_t *xdata);
int
-pl_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct flock *flock);
+pl_finodelk (call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *flock,
+ dict_t *xdata);
int
-pl_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type);
+pl_entrylk (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
int
-pl_fentrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type);
+pl_fentrylk (call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
struct xlator_fops fops = {
- .create = pl_create,
- .truncate = pl_truncate,
- .ftruncate = pl_ftruncate,
- .open = pl_open,
- .readv = pl_readv,
- .writev = pl_writev,
- .lk = pl_lk,
- .inodelk = pl_inodelk,
- .finodelk = pl_finodelk,
- .entrylk = pl_entrylk,
- .fentrylk = pl_fentrylk,
- .flush = pl_flush,
-};
-
-
-struct xlator_mops mops = {
+ .lookup = pl_lookup,
+ .create = pl_create,
+ .truncate = pl_truncate,
+ .ftruncate = pl_ftruncate,
+ .open = pl_open,
+ .readv = pl_readv,
+ .writev = pl_writev,
+ .lk = pl_lk,
+ .inodelk = pl_inodelk,
+ .finodelk = pl_finodelk,
+ .entrylk = pl_entrylk,
+ .fentrylk = pl_fentrylk,
+ .flush = pl_flush,
+ .opendir = pl_opendir,
+ .readdirp = pl_readdirp,
+ .getxattr = pl_getxattr,
+ .fgetxattr = pl_fgetxattr,
+ .fsetxattr = pl_fsetxattr,
};
struct xlator_dumpops dumpops = {
@@ -922,13 +2753,20 @@ struct xlator_dumpops dumpops = {
};
struct xlator_cbks cbks = {
- .forget = pl_forget,
+ .forget = pl_forget,
+ .release = pl_release,
+ .releasedir = pl_releasedir,
+ .client_destroy = destroy_cbk,
+ .client_disconnect = disconnect_cbk,
};
struct volume_options options[] = {
- { .key = { "mandatory-locks", "mandatory" },
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {NULL} },
+ { .key = { "mandatory-locks", "mandatory" },
+ .type = GF_OPTION_TYPE_BOOL
+ },
+ { .key = { "trace" },
+ .type = GF_OPTION_TYPE_BOOL
+ },
+ { .key = {NULL} },
};
diff --git a/xlators/features/locks/src/reservelk.c b/xlators/features/locks/src/reservelk.c
new file mode 100644
index 000000000..11abd26d8
--- /dev/null
+++ b/xlators/features/locks/src/reservelk.c
@@ -0,0 +1,443 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "compat.h"
+#include "xlator.h"
+#include "inode.h"
+#include "logging.h"
+#include "common-utils.h"
+#include "list.h"
+
+#include "locks.h"
+#include "common.h"
+
+void
+__delete_reserve_lock (posix_lock_t *lock)
+{
+ list_del (&lock->list);
+}
+
+void
+__destroy_reserve_lock (posix_lock_t *lock)
+{
+ GF_FREE (lock);
+}
+
+/* Return true if the two reservelks have exactly same lock boundaries */
+int
+reservelks_equal (posix_lock_t *l1, posix_lock_t *l2)
+{
+ if ((l1->fl_start == l2->fl_start) &&
+ (l1->fl_end == l2->fl_end))
+ return 1;
+
+ return 0;
+}
+
+/* Determine if lock is grantable or not */
+static posix_lock_t *
+__reservelk_grantable (pl_inode_t *pl_inode, posix_lock_t *lock)
+{
+ xlator_t *this = NULL;
+ posix_lock_t *l = NULL;
+ posix_lock_t *ret_lock = NULL;
+
+ this = THIS;
+
+ if (list_empty (&pl_inode->reservelk_list)) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "No reservelks in list");
+ goto out;
+ }
+ list_for_each_entry (l, &pl_inode->reservelk_list, list){
+ if (reservelks_equal (lock, l)) {
+ ret_lock = l;
+ break;
+ }
+ }
+out:
+ return ret_lock;
+}
+
+static inline int
+__same_owner_reservelk (posix_lock_t *l1, posix_lock_t *l2)
+{
+ return (is_same_lkowner (&l1->owner, &l2->owner));
+
+}
+
+static posix_lock_t *
+__matching_reservelk (pl_inode_t *pl_inode, posix_lock_t *lock)
+{
+ posix_lock_t *l = NULL;
+
+ if (list_empty (&pl_inode->reservelk_list)) {
+ gf_log ("posix-locks", GF_LOG_TRACE,
+ "reservelk list empty");
+ return NULL;
+ }
+
+ list_for_each_entry (l, &pl_inode->reservelk_list, list) {
+ if (reservelks_equal (l, lock)) {
+ gf_log ("posix-locks", GF_LOG_TRACE,
+ "equal reservelk found");
+ break;
+ }
+ }
+
+ return l;
+}
+
+static int
+__reservelk_conflict (xlator_t *this, pl_inode_t *pl_inode,
+ posix_lock_t *lock)
+{
+ posix_lock_t *conf = NULL;
+ int ret = 0;
+
+ conf = __matching_reservelk (pl_inode, lock);
+ if (conf) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "Matching reservelk found");
+ if (__same_owner_reservelk (lock, conf)) {
+ list_del_init (&conf->list);
+ gf_log (this->name, GF_LOG_TRACE,
+ "Removing the matching reservelk for setlk to progress");
+ GF_FREE (conf);
+ ret = 0;
+ } else {
+ gf_log (this->name, GF_LOG_TRACE,
+ "Conflicting reservelk found");
+ ret = 1;
+ }
+
+ }
+ return ret;
+
+}
+
+int
+pl_verify_reservelk (xlator_t *this, pl_inode_t *pl_inode,
+ posix_lock_t *lock, int can_block)
+{
+ int ret = 0;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ if (__reservelk_conflict (this, pl_inode, lock)) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "Found conflicting reservelk. Blocking until reservelk is unlocked.");
+ lock->blocked = can_block;
+ list_add_tail (&lock->list, &pl_inode->blocked_calls);
+ ret = -1;
+ goto unlock;
+ }
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "no conflicting reservelk found. Call continuing");
+ ret = 0;
+
+ }
+unlock:
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ return ret;
+
+}
+
+
+/* Determines if lock can be granted and adds the lock. If the lock
+ * is blocking, adds it to the blocked_reservelks.
+ */
+static int
+__lock_reservelk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
+ int can_block)
+{
+ posix_lock_t *conf = NULL;
+ int ret = -EINVAL;
+
+ conf = __reservelk_grantable (pl_inode, lock);
+ if (conf){
+ ret = -EAGAIN;
+ if (can_block == 0)
+ goto out;
+
+ list_add_tail (&lock->list, &pl_inode->blocked_reservelks);
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => Blocked",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid,
+ lkowner_utoa (&lock->owner),
+ lock->user_flock.l_start,
+ lock->user_flock.l_len);
+
+
+ goto out;
+ }
+
+ list_add (&lock->list, &pl_inode->reservelk_list);
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+static posix_lock_t *
+find_matching_reservelk (posix_lock_t *lock, pl_inode_t *pl_inode)
+{
+ posix_lock_t *l = NULL;
+ list_for_each_entry (l, &pl_inode->reservelk_list, list) {
+ if (reservelks_equal (l, lock))
+ return l;
+ }
+ return NULL;
+}
+
+/* Set F_UNLCK removes a lock which has the exact same lock boundaries
+ * as the UNLCK lock specifies. If such a lock is not found, returns invalid
+ */
+static posix_lock_t *
+__reserve_unlock_lock (xlator_t *this, posix_lock_t *lock, pl_inode_t *pl_inode)
+{
+
+ posix_lock_t *conf = NULL;
+
+ conf = find_matching_reservelk (lock, pl_inode);
+ if (!conf) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ " Matching lock not found for unlock");
+ goto out;
+ }
+ __delete_reserve_lock (conf);
+ gf_log (this->name, GF_LOG_DEBUG,
+ " Matching lock found for unlock");
+
+out:
+ return conf;
+
+
+}
+
+static void
+__grant_blocked_reserve_locks (xlator_t *this, pl_inode_t *pl_inode,
+ struct list_head *granted)
+{
+ int bl_ret = 0;
+ posix_lock_t *bl = NULL;
+ posix_lock_t *tmp = NULL;
+
+ struct list_head blocked_list;
+
+ INIT_LIST_HEAD (&blocked_list);
+ list_splice_init (&pl_inode->blocked_reservelks, &blocked_list);
+
+ list_for_each_entry_safe (bl, tmp, &blocked_list, list) {
+
+ list_del_init (&bl->list);
+
+ bl_ret = __lock_reservelk (this, pl_inode, bl, 1);
+
+ if (bl_ret == 0) {
+ list_add (&bl->list, granted);
+ }
+ }
+ return;
+}
+
+/* Grant all reservelks blocked on lock(s) */
+void
+grant_blocked_reserve_locks (xlator_t *this, pl_inode_t *pl_inode)
+{
+ struct list_head granted;
+ posix_lock_t *lock = NULL;
+ posix_lock_t *tmp = NULL;
+
+ INIT_LIST_HEAD (&granted);
+
+ if (list_empty (&pl_inode->blocked_reservelks)) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "No blocked locks to be granted");
+ return;
+ }
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ __grant_blocked_reserve_locks (this, pl_inode, &granted);
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ list_for_each_entry_safe (lock, tmp, &granted, list) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => Granted",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid,
+ lkowner_utoa (&lock->owner),
+ lock->user_flock.l_start,
+ lock->user_flock.l_len);
+
+ STACK_UNWIND_STRICT (lk, lock->frame, 0, 0, &lock->user_flock,
+ NULL);
+ }
+
+}
+
+static void
+__grant_blocked_lock_calls (xlator_t *this, pl_inode_t *pl_inode,
+ struct list_head *granted)
+{
+ int bl_ret = 0;
+ posix_lock_t *bl = NULL;
+ posix_lock_t *tmp = NULL;
+
+ struct list_head blocked_list;
+
+ INIT_LIST_HEAD (&blocked_list);
+ list_splice_init (&pl_inode->blocked_reservelks, &blocked_list);
+
+ list_for_each_entry_safe (bl, tmp, &blocked_list, list) {
+
+ list_del_init (&bl->list);
+
+ bl_ret = pl_verify_reservelk (this, pl_inode, bl, bl->blocked);
+
+ if (bl_ret == 0) {
+ list_add_tail (&bl->list, granted);
+ }
+ }
+ return;
+}
+
+void
+grant_blocked_lock_calls (xlator_t *this, pl_inode_t *pl_inode)
+{
+ struct list_head granted;
+ posix_lock_t *lock = NULL;
+ posix_lock_t *tmp = NULL;
+ fd_t *fd = NULL;
+
+ int can_block = 0;
+ int32_t cmd = 0;
+ int ret = 0;
+
+ if (list_empty (&pl_inode->blocked_calls)) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "No blocked lock calls to be granted");
+ return;
+ }
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ __grant_blocked_lock_calls (this, pl_inode, &granted);
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ list_for_each_entry_safe (lock, tmp, &granted, list) {
+ fd = fd_from_fdnum (lock);
+
+ if (lock->blocked) {
+ can_block = 1;
+ cmd = F_SETLKW;
+ }
+ else
+ cmd = F_SETLK;
+
+ lock->blocked = 0;
+ ret = pl_setlk (this, pl_inode, lock, can_block);
+ if (ret == -1) {
+ if (can_block) {
+ pl_trace_block (this, lock->frame, fd, NULL,
+ cmd, &lock->user_flock, NULL);
+ continue;
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG, "returning EAGAIN");
+ pl_trace_out (this, lock->frame, fd, NULL, cmd,
+ &lock->user_flock, -1, EAGAIN, NULL);
+ pl_update_refkeeper (this, fd->inode);
+ STACK_UNWIND_STRICT (lk, lock->frame, -1,
+ EAGAIN, &lock->user_flock,
+ NULL);
+ __destroy_lock (lock);
+ }
+ }
+
+ }
+
+}
+
+
+int
+pl_reserve_unlock (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock)
+{
+ posix_lock_t *retlock = NULL;
+ int ret = -1;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+ retlock = __reserve_unlock_lock (this, lock, pl_inode);
+ if (!retlock) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Bad Unlock issued on Inode lock");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "Reservelk Unlock successful");
+ __destroy_reserve_lock (retlock);
+ ret = 0;
+ }
+out:
+ pthread_mutex_unlock (&pl_inode->mutex);
+
+ grant_blocked_reserve_locks (this, pl_inode);
+ grant_blocked_lock_calls (this, pl_inode);
+
+ return ret;
+
+}
+
+int
+pl_reserve_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
+ int can_block)
+{
+ int ret = -EINVAL;
+
+ pthread_mutex_lock (&pl_inode->mutex);
+ {
+
+ ret = __lock_reservelk (this, pl_inode, lock, can_block);
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_TRACE,
+ "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => NOK",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid,
+ lkowner_utoa (&lock->owner),
+ lock->user_flock.l_start,
+ lock->user_flock.l_len);
+ else
+ gf_log (this->name, GF_LOG_TRACE,
+ "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => OK",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid,
+ lkowner_utoa (&lock->owner),
+ lock->fl_start,
+ lock->fl_end);
+
+ }
+ pthread_mutex_unlock (&pl_inode->mutex);
+ return ret;
+}
diff --git a/xlators/features/locks/tests/unit-test.c b/xlators/features/locks/tests/unit-test.c
index 06e77d56b..d2cca32de 100644
--- a/xlators/features/locks/tests/unit-test.c
+++ b/xlators/features/locks/tests/unit-test.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
diff --git a/auth/login/Makefile.am b/xlators/features/mac-compat/Makefile.am
index d471a3f92..d471a3f92 100644
--- a/auth/login/Makefile.am
+++ b/xlators/features/mac-compat/Makefile.am
diff --git a/xlators/features/mac-compat/src/Makefile.am b/xlators/features/mac-compat/src/Makefile.am
new file mode 100644
index 000000000..f8567edce
--- /dev/null
+++ b/xlators/features/mac-compat/src/Makefile.am
@@ -0,0 +1,14 @@
+xlator_LTLIBRARIES = mac-compat.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+mac_compat_la_LDFLAGS = -module -avoid-version
+
+mac_compat_la_SOURCES = mac-compat.c
+mac_compat_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
+
diff --git a/xlators/features/mac-compat/src/mac-compat.c b/xlators/features/mac-compat/src/mac-compat.c
new file mode 100644
index 000000000..7cb550ad5
--- /dev/null
+++ b/xlators/features/mac-compat/src/mac-compat.c
@@ -0,0 +1,237 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include "defaults.h"
+#include "compat-errno.h"
+
+
+enum apple_xattr {
+ GF_FINDER_INFO_XATTR,
+ GF_RESOURCE_FORK_XATTR,
+ GF_XATTR_ALL,
+ GF_XATTR_NONE
+};
+
+static char *apple_xattr_name[] = {
+ [GF_FINDER_INFO_XATTR] = "com.apple.FinderInfo",
+ [GF_RESOURCE_FORK_XATTR] = "com.apple.ResourceFork"
+};
+
+static const char *apple_xattr_value[] = {
+ [GF_FINDER_INFO_XATTR] =
+ /* 1 2 3 4 5 6 7 8 */
+ "\0\0\0\0\0\0\0\0"
+ "\0\0\0\0\0\0\0\0"
+ "\0\0\0\0\0\0\0\0"
+ "\0\0\0\0\0\0\0\0",
+ [GF_RESOURCE_FORK_XATTR] = ""
+};
+
+static int32_t apple_xattr_len[] = {
+ [GF_FINDER_INFO_XATTR] = 32,
+ [GF_RESOURCE_FORK_XATTR] = 1
+};
+
+
+int32_t
+maccomp_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ intptr_t ax = (intptr_t)this->private;
+ int i = 0;
+
+ if ((ax == GF_XATTR_ALL && op_ret >= 0) || ax != GF_XATTR_NONE) {
+ op_ret = op_errno = 0;
+
+ for (i = 0; i < GF_XATTR_ALL; i++) {
+ if (dict_get (dict, apple_xattr_name[i]))
+ continue;
+
+ if (dict_set (dict, apple_xattr_name[i],
+ bin_to_data ((void *)apple_xattr_value[i],
+ apple_xattr_len[i])) == -1) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+
+ break;
+ }
+ }
+ }
+
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata);
+
+ return 0;
+}
+
+
+int32_t
+maccomp_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ intptr_t ax = GF_XATTR_NONE;
+ int i = 0;
+
+ if (name) {
+ for (i = 0; i < GF_XATTR_ALL; i++) {
+ if (strcmp (apple_xattr_name[i], name) == 0) {
+ ax = i;
+
+ break;
+ }
+ }
+ } else
+ ax = GF_XATTR_ALL;
+
+ this->private = (void *)ax;
+
+ STACK_WIND (frame, maccomp_getxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr,
+ loc, name, xdata);
+ return 0;
+}
+
+
+int32_t
+maccomp_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ intptr_t ax = GF_XATTR_NONE;
+ int i = 0;
+
+ if (name) {
+ for (i = 0; i < GF_XATTR_ALL; i++) {
+ if (strcmp (apple_xattr_name[i], name) == 0) {
+ ax = i;
+
+ break;
+ }
+ }
+ } else
+ ax = GF_XATTR_ALL;
+
+ this->private = (void *)ax;
+
+ STACK_WIND (frame, maccomp_getxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr,
+ fd, name, xdata);
+ return 0;
+}
+
+
+int32_t
+maccomp_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ intptr_t ax = (intptr_t)this->private;
+
+ if (op_ret == -1 && ax != GF_XATTR_NONE)
+ op_ret = op_errno = 0;
+
+ STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+
+int32_t
+maccomp_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ intptr_t ax = GF_XATTR_NONE;
+ int i = 0;
+
+ for (i = 0; i < GF_XATTR_ALL; i++) {
+ if (dict_get (dict, apple_xattr_name[i])) {
+ ax = i;
+
+ break;
+ }
+ }
+
+ this->private = (void *)ax;
+
+ STACK_WIND (frame, maccomp_setxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr,
+ loc, dict, flags, xdata);
+ return 0;
+}
+
+
+int32_t
+maccomp_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ intptr_t ax = GF_XATTR_NONE;
+ int i = 0;
+
+ for (i = 0; i < GF_XATTR_ALL; i++) {
+ if (dict_get (dict, apple_xattr_name[i])) {
+ ax = i;
+
+ break;
+ }
+ }
+
+ this->private = (void *)ax;
+
+ STACK_WIND (frame, maccomp_setxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ fd, dict, flags, xdata);
+ return 0;
+}
+
+
+int32_t
+init (xlator_t *this)
+{
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "translator not configured with exactly one child");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ return 0;
+}
+
+
+void
+fini (xlator_t *this)
+{
+ return;
+}
+
+
+struct xlator_fops fops = {
+ .getxattr = maccomp_getxattr,
+ .fgetxattr = maccomp_fgetxattr,
+ .setxattr = maccomp_setxattr,
+ .fsetxattr = maccomp_fsetxattr,
+};
+
+struct xlator_cbks cbks;
+
+struct volume_options options[] = {
+ { .key = {NULL} },
+};
diff --git a/xlators/features/marker/Makefile.am b/xlators/features/marker/Makefile.am
new file mode 100644
index 000000000..a985f42a8
--- /dev/null
+++ b/xlators/features/marker/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/features/marker/src/Makefile.am b/xlators/features/marker/src/Makefile.am
new file mode 100644
index 000000000..a7c676472
--- /dev/null
+++ b/xlators/features/marker/src/Makefile.am
@@ -0,0 +1,17 @@
+xlator_LTLIBRARIES = marker.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+marker_la_LDFLAGS = -module -avoid-version
+
+marker_la_SOURCES = marker.c marker-quota.c marker-quota-helper.c marker-common.c
+marker_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = marker-mem-types.h marker.h marker-quota.h marker-quota-helper.h marker-common.h $(top_builddir)/xlators/lib/src/libxlator.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/xlators/lib/src
+
+AM_CFLAGS = -Wall -fno-strict-aliasing $(GF_CFLAGS)
+
+CLEANFILES =
+
diff --git a/xlators/features/marker/src/marker-common.c b/xlators/features/marker/src/marker-common.c
new file mode 100644
index 000000000..84a718add
--- /dev/null
+++ b/xlators/features/marker/src/marker-common.c
@@ -0,0 +1,69 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+#include <fnmatch.h>
+#include "marker-common.h"
+
+marker_inode_ctx_t *
+marker_inode_ctx_new ()
+{
+ marker_inode_ctx_t *ctx = NULL;
+
+ ctx = GF_CALLOC (1, sizeof (marker_inode_ctx_t),
+ gf_marker_mt_marker_inode_ctx_t);
+ if (ctx == NULL)
+ goto out;
+
+ ctx->quota_ctx = NULL;
+out:
+ return ctx;
+}
+
+int32_t
+marker_force_inode_ctx_get (inode_t *inode, xlator_t *this,
+ marker_inode_ctx_t **ctx)
+{
+ int32_t ret = -1;
+ uint64_t ctx_int = 0;
+
+ LOCK (&inode->lock);
+ {
+ ret = __inode_ctx_get (inode, this, &ctx_int);
+ if (ret == 0)
+ *ctx = (marker_inode_ctx_t *) (unsigned long)ctx_int;
+ else {
+ *ctx = marker_inode_ctx_new ();
+ if (*ctx == NULL)
+ goto unlock;
+
+ ret = __inode_ctx_put (inode, this,
+ (uint64_t )(unsigned long) *ctx);
+ if (ret == -1) {
+ GF_FREE (*ctx);
+ goto unlock;
+ }
+ ret = 0;
+ }
+ }
+unlock: UNLOCK (&inode->lock);
+
+ return ret;
+}
+
+int
+marker_filter_quota_xattr (dict_t *dict, char *key,
+ data_t *value, void *data)
+{
+ dict_del (dict, key);
+ return 0;
+}
diff --git a/xlators/features/marker/src/marker-common.h b/xlators/features/marker/src/marker-common.h
new file mode 100644
index 000000000..23dd846cb
--- /dev/null
+++ b/xlators/features/marker/src/marker-common.h
@@ -0,0 +1,27 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _MARKER_COMMON_H
+#define _MARKER_COMMON_H
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "inode.h"
+#include "xlator.h"
+#include "marker.h"
+
+int32_t
+marker_force_inode_ctx_get (inode_t *, xlator_t *, marker_inode_ctx_t **);
+
+int
+marker_filter_quota_xattr (dict_t *, char *, data_t *, void *);
+#endif
diff --git a/xlators/features/marker/src/marker-mem-types.h b/xlators/features/marker/src/marker-mem-types.h
new file mode 100644
index 000000000..1f74d5048
--- /dev/null
+++ b/xlators/features/marker/src/marker-mem-types.h
@@ -0,0 +1,25 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef __MARKER_MEM_TYPES_H__
+#define __MARKER_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_marker_mem_types_ {
+ gf_marker_mt_marker_conf_t = gf_common_mt_end + 1,
+ gf_marker_mt_loc_t,
+ gf_marker_mt_volume_mark,
+ gf_marker_mt_int64_t,
+ gf_marker_mt_quota_inode_ctx_t,
+ gf_marker_mt_marker_inode_ctx_t,
+ gf_marker_mt_inode_contribution_t,
+ gf_marker_mt_end
+};
+#endif
diff --git a/xlators/features/marker/src/marker-quota-helper.c b/xlators/features/marker/src/marker-quota-helper.c
new file mode 100644
index 000000000..af5fed132
--- /dev/null
+++ b/xlators/features/marker/src/marker-quota-helper.c
@@ -0,0 +1,414 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "locking.h"
+#include "marker-quota.h"
+#include "marker-common.h"
+#include "marker-quota-helper.h"
+#include "marker-mem-types.h"
+
+int
+mq_loc_fill (loc_t *loc, inode_t *inode, inode_t *parent, char *path)
+{
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("marker", loc, out);
+ GF_VALIDATE_OR_GOTO ("marker", inode, out);
+ GF_VALIDATE_OR_GOTO ("marker", path, out);
+ /* Not checking for parent because while filling
+ * loc of root, parent will be NULL
+ */
+
+ if (inode) {
+ loc->inode = inode_ref (inode);
+ }
+
+ if (parent)
+ loc->parent = inode_ref (parent);
+
+ loc->path = gf_strdup (path);
+ if (!loc->path) {
+ gf_log ("loc fill", GF_LOG_ERROR, "strdup failed");
+ goto loc_wipe;
+ }
+
+ loc->name = strrchr (loc->path, '/');
+ if (loc->name)
+ loc->name++;
+ else
+ goto loc_wipe;
+
+ ret = 0;
+loc_wipe:
+ if (ret < 0)
+ loc_wipe (loc);
+out:
+ return ret;
+}
+
+
+int32_t
+mq_inode_loc_fill (const char *parent_gfid, inode_t *inode, loc_t *loc)
+{
+ char *resolvedpath = NULL;
+ inode_t *parent = NULL;
+ int ret = -1;
+
+ if ((!inode) || (!loc))
+ return ret;
+
+ if ((inode) && __is_root_gfid (inode->gfid)) {
+ loc->parent = NULL;
+ goto ignore_parent;
+ }
+
+ if (parent_gfid == NULL)
+ parent = inode_parent (inode, 0, NULL);
+ else
+ parent = inode_find (inode->table,
+ (unsigned char *) parent_gfid);
+
+ if (parent == NULL)
+ goto err;
+
+ignore_parent:
+ ret = inode_path (inode, NULL, &resolvedpath);
+ if (ret < 0)
+ goto err;
+
+ ret = mq_loc_fill (loc, inode, parent, resolvedpath);
+ if (ret < 0)
+ goto err;
+
+err:
+ if (parent)
+ inode_unref (parent);
+
+ GF_FREE (resolvedpath);
+
+ return ret;
+}
+
+
+quota_inode_ctx_t *
+mq_alloc_inode_ctx ()
+{
+ int32_t ret = -1;
+ quota_inode_ctx_t *ctx = NULL;
+
+ QUOTA_ALLOC (ctx, quota_inode_ctx_t, ret);
+ if (ret == -1)
+ goto out;
+
+ ctx->size = 0;
+ ctx->dirty = 0;
+ ctx->updation_status = _gf_false;
+ LOCK_INIT (&ctx->lock);
+ INIT_LIST_HEAD (&ctx->contribution_head);
+out:
+ return ctx;
+}
+
+inode_contribution_t *
+mq_get_contribution_node (inode_t *inode, quota_inode_ctx_t *ctx)
+{
+ inode_contribution_t *contri = NULL;
+ inode_contribution_t *temp = NULL;
+
+ if (!inode || !ctx)
+ goto out;
+
+ list_for_each_entry (temp, &ctx->contribution_head, contri_list) {
+ if (uuid_compare (temp->gfid, inode->gfid) == 0) {
+ contri = temp;
+ goto out;
+ }
+ }
+out:
+ return contri;
+}
+
+
+int32_t
+mq_delete_contribution_node (dict_t *dict, char *key,
+ inode_contribution_t *contribution)
+{
+ if (dict_get (dict, key) != NULL)
+ goto out;
+
+ QUOTA_FREE_CONTRIBUTION_NODE (contribution);
+out:
+ return 0;
+}
+
+
+inode_contribution_t *
+__mq_add_new_contribution_node (xlator_t *this, quota_inode_ctx_t *ctx, loc_t *loc)
+{
+ int32_t ret = 0;
+ inode_contribution_t *contribution = NULL;
+
+ if (!loc->parent) {
+ if (!uuid_is_null (loc->pargfid))
+ loc->parent = inode_find (loc->inode->table,
+ loc->pargfid);
+ if (!loc->parent)
+ loc->parent = inode_parent (loc->inode, loc->pargfid,
+ loc->name);
+ if (!loc->parent)
+ goto out;
+ }
+
+ list_for_each_entry (contribution, &ctx->contribution_head, contri_list) {
+ if (loc->parent &&
+ uuid_compare (contribution->gfid, loc->parent->gfid) == 0) {
+ goto out;
+ }
+ }
+
+ QUOTA_ALLOC (contribution, inode_contribution_t, ret);
+ if (ret == -1)
+ goto out;
+
+ contribution->contribution = 0;
+
+ uuid_copy (contribution->gfid, loc->parent->gfid);
+
+ LOCK_INIT (&contribution->lock);
+ INIT_LIST_HEAD (&contribution->contri_list);
+
+ list_add_tail (&contribution->contri_list, &ctx->contribution_head);
+
+out:
+ return contribution;
+}
+
+
+inode_contribution_t *
+mq_add_new_contribution_node (xlator_t *this, quota_inode_ctx_t *ctx, loc_t *loc)
+{
+ inode_contribution_t *contribution = NULL;
+
+ if ((ctx == NULL) || (loc == NULL))
+ return NULL;
+
+ if (strcmp (loc->path, "/") == 0)
+ return NULL;
+
+ LOCK (&ctx->lock);
+ {
+ contribution = __mq_add_new_contribution_node (this, ctx, loc);
+ }
+ UNLOCK (&ctx->lock);
+
+ return contribution;
+}
+
+
+int32_t
+mq_dict_set_contribution (xlator_t *this, dict_t *dict,
+ loc_t *loc)
+{
+ int32_t ret = -1;
+ char contri_key [512] = {0, };
+
+ GF_VALIDATE_OR_GOTO ("marker", this, out);
+ GF_VALIDATE_OR_GOTO ("marker", dict, out);
+ GF_VALIDATE_OR_GOTO ("marker", loc, out);
+ GF_VALIDATE_OR_GOTO ("marker", loc->parent, out);
+
+ GET_CONTRI_KEY (contri_key, loc->parent->gfid, ret);
+ if (ret < 0) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_int64 (dict, contri_key, 0);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "unable to set dict value on %s.",
+ loc->path);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+int32_t
+mq_inode_ctx_get (inode_t *inode, xlator_t *this,
+ quota_inode_ctx_t **ctx)
+{
+ int32_t ret = -1;
+ uint64_t ctx_int = 0;
+ marker_inode_ctx_t *mark_ctx = NULL;
+
+ GF_VALIDATE_OR_GOTO ("marker", inode, out);
+ GF_VALIDATE_OR_GOTO ("marker", this, out);
+ GF_VALIDATE_OR_GOTO ("marker", ctx, out);
+
+ ret = inode_ctx_get (inode, this, &ctx_int);
+ if (ret < 0) {
+ ret = -1;
+ *ctx = NULL;
+ goto out;
+ }
+
+ mark_ctx = (marker_inode_ctx_t *) (unsigned long)ctx_int;
+ if (mark_ctx->quota_ctx == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ *ctx = mark_ctx->quota_ctx;
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+
+quota_inode_ctx_t *
+__mq_inode_ctx_new (inode_t *inode, xlator_t *this)
+{
+ int32_t ret = -1;
+ quota_inode_ctx_t *quota_ctx = NULL;
+ marker_inode_ctx_t *mark_ctx = NULL;
+
+ ret = marker_force_inode_ctx_get (inode, this, &mark_ctx);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "marker_force_inode_ctx_get() failed");
+ goto out;
+ }
+
+ LOCK (&inode->lock);
+ {
+ if (mark_ctx->quota_ctx == NULL) {
+ quota_ctx = mq_alloc_inode_ctx ();
+ if (quota_ctx == NULL) {
+ ret = -1;
+ goto unlock;
+ }
+ mark_ctx->quota_ctx = quota_ctx;
+ } else {
+ quota_ctx = mark_ctx->quota_ctx;
+ }
+
+ ret = 0;
+ }
+unlock:
+ UNLOCK (&inode->lock);
+out:
+ return quota_ctx;
+}
+
+
+quota_inode_ctx_t *
+mq_inode_ctx_new (inode_t * inode, xlator_t *this)
+{
+ return __mq_inode_ctx_new (inode, this);
+}
+
+quota_local_t *
+mq_local_new ()
+{
+ quota_local_t *local = NULL;
+
+ local = mem_get0 (THIS->local_pool);
+ if (!local)
+ goto out;
+
+ local->ref = 1;
+ LOCK_INIT (&local->lock);
+
+ local->ctx = NULL;
+ local->contri = NULL;
+
+out:
+ return local;
+}
+
+quota_local_t *
+mq_local_ref (quota_local_t *local)
+{
+ LOCK (&local->lock);
+ {
+ local->ref ++;
+ }
+ UNLOCK (&local->lock);
+
+ return local;
+}
+
+
+int32_t
+mq_local_unref (xlator_t *this, quota_local_t *local)
+{
+ int32_t ref = 0;
+ if (local == NULL)
+ goto out;
+
+ QUOTA_SAFE_DECREMENT (&local->lock, local->ref, ref);
+
+ if (ref != 0)
+ goto out;
+
+ if (local->fd != NULL)
+ fd_unref (local->fd);
+
+ loc_wipe (&local->loc);
+
+ loc_wipe (&local->parent_loc);
+
+ LOCK_DESTROY (&local->lock);
+
+ mem_put (local);
+out:
+ return 0;
+}
+
+
+inode_contribution_t *
+mq_get_contribution_from_loc (xlator_t *this, loc_t *loc)
+{
+ int32_t ret = 0;
+ quota_inode_ctx_t *ctx = NULL;
+ inode_contribution_t *contribution = NULL;
+
+ ret = mq_inode_ctx_get (loc->inode, this, &ctx);
+ if (ret < 0) {
+ gf_log_callingfn (this->name, GF_LOG_WARNING,
+ "cannot get marker-quota context from inode "
+ "(gfid:%s, path:%s)",
+ uuid_utoa (loc->inode->gfid), loc->path);
+ goto err;
+ }
+
+ contribution = mq_get_contribution_node (loc->parent, ctx);
+ if (contribution == NULL) {
+ gf_log_callingfn (this->name, GF_LOG_WARNING,
+ "inode (gfid:%s, path:%s) has "
+ "no contribution towards parent (gfid:%s)",
+ uuid_utoa (loc->inode->gfid),
+ loc->path, uuid_utoa (loc->parent->gfid));
+ goto err;
+ }
+
+err:
+ return contribution;
+}
diff --git a/xlators/features/marker/src/marker-quota-helper.h b/xlators/features/marker/src/marker-quota-helper.h
new file mode 100644
index 000000000..6cdd14881
--- /dev/null
+++ b/xlators/features/marker/src/marker-quota-helper.h
@@ -0,0 +1,76 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _MARKER_QUOTA_HELPER_H
+#define _MARKER_QUOTA_HELPER
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "marker.h"
+
+#define QUOTA_FREE_CONTRIBUTION_NODE(_contribution) \
+ do { \
+ list_del (&_contribution->contri_list); \
+ GF_FREE (_contribution); \
+ } while (0)
+
+#define QUOTA_SAFE_INCREMENT(lock, var) \
+ do { \
+ LOCK (lock); \
+ var ++; \
+ UNLOCK (lock); \
+ } while (0)
+
+#define QUOTA_SAFE_DECREMENT(lock, var, value) \
+ do { \
+ LOCK (lock); \
+ { \
+ value = --var; \
+ } \
+ UNLOCK (lock); \
+ } while (0)
+
+inode_contribution_t *
+mq_add_new_contribution_node (xlator_t *, quota_inode_ctx_t *, loc_t *);
+
+int32_t
+mq_dict_set_contribution (xlator_t *, dict_t *, loc_t *);
+
+quota_inode_ctx_t *
+mq_inode_ctx_new (inode_t *, xlator_t *);
+
+int32_t
+mq_inode_ctx_get (inode_t *, xlator_t *, quota_inode_ctx_t **);
+
+int32_t
+mq_delete_contribution_node (dict_t *, char *, inode_contribution_t *);
+
+int32_t
+mq_inode_loc_fill (const char *, inode_t *, loc_t *);
+
+quota_local_t *
+mq_local_new ();
+
+quota_local_t *
+mq_local_ref (quota_local_t *);
+
+int32_t
+mq_local_unref (xlator_t *, quota_local_t *);
+
+inode_contribution_t *
+mq_get_contribution_node (inode_t *, quota_inode_ctx_t *);
+
+inode_contribution_t *
+mq_get_contribution_from_loc (xlator_t *this, loc_t *loc);
+
+#endif
diff --git a/xlators/features/marker/src/marker-quota.c b/xlators/features/marker/src/marker-quota.c
new file mode 100644
index 000000000..6f9af6e13
--- /dev/null
+++ b/xlators/features/marker/src/marker-quota.c
@@ -0,0 +1,2520 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "dict.h"
+#include "xlator.h"
+#include "defaults.h"
+#include "libxlator.h"
+#include "common-utils.h"
+#include "byte-order.h"
+#include "marker-quota.h"
+#include "marker-quota-helper.h"
+
+int
+mq_loc_copy (loc_t *dst, loc_t *src)
+{
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("marker", dst, out);
+ GF_VALIDATE_OR_GOTO ("marker", src, out);
+
+ if (src->inode == NULL ||
+ src->path == NULL) {
+ gf_log ("marker", GF_LOG_WARNING,
+ "src loc is not valid");
+ goto out;
+ }
+
+ ret = loc_copy (dst, src);
+out:
+ return ret;
+}
+
+int32_t
+mq_get_local_err (quota_local_t *local,
+ int32_t *val)
+{
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("marker", local, out);
+ GF_VALIDATE_OR_GOTO ("marker", val, out);
+
+ LOCK (&local->lock);
+ {
+ *val = local->err;
+ }
+ UNLOCK (&local->lock);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int32_t
+mq_get_ctx_updation_status (quota_inode_ctx_t *ctx,
+ gf_boolean_t *status)
+{
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("marker", ctx, out);
+ GF_VALIDATE_OR_GOTO ("marker", status, out);
+
+ LOCK (&ctx->lock);
+ {
+ *status = ctx->updation_status;
+ }
+ UNLOCK (&ctx->lock);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+int32_t
+mq_set_ctx_updation_status (quota_inode_ctx_t *ctx,
+ gf_boolean_t status)
+{
+ int32_t ret = -1;
+
+ if (ctx == NULL)
+ goto out;
+
+ LOCK (&ctx->lock);
+ {
+ ctx->updation_status = status;
+ }
+ UNLOCK (&ctx->lock);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int32_t
+mq_test_and_set_ctx_updation_status (quota_inode_ctx_t *ctx,
+ gf_boolean_t *status)
+{
+ int32_t ret = -1;
+ gf_boolean_t temp = _gf_false;
+
+ GF_VALIDATE_OR_GOTO ("marker", ctx, out);
+ GF_VALIDATE_OR_GOTO ("marker", status, out);
+
+ LOCK (&ctx->lock);
+ {
+ temp = *status;
+ *status = ctx->updation_status;
+ ctx->updation_status = temp;
+ }
+ UNLOCK (&ctx->lock);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+void
+mq_assign_lk_owner (xlator_t *this, call_frame_t *frame)
+{
+ marker_conf_t *conf = NULL;
+ uint64_t lk_owner = 0;
+
+ conf = this->private;
+
+ LOCK (&conf->lock);
+ {
+ if (++conf->quota_lk_owner == 0) {
+ ++conf->quota_lk_owner;
+ }
+
+ lk_owner = conf->quota_lk_owner;
+ }
+ UNLOCK (&conf->lock);
+
+ set_lk_owner_from_uint64 (&frame->root->lk_owner, lk_owner);
+
+ return;
+}
+
+
+int32_t
+mq_loc_fill_from_name (xlator_t *this, loc_t *newloc, loc_t *oldloc,
+ uint64_t ino, char *name)
+{
+ int32_t ret = -1;
+ int32_t len = 0;
+ char *path = NULL;
+
+ GF_VALIDATE_OR_GOTO ("marker", this, out);
+ GF_VALIDATE_OR_GOTO ("marker", newloc, out);
+ GF_VALIDATE_OR_GOTO ("marker", oldloc, out);
+ GF_VALIDATE_OR_GOTO ("marker", name, out);
+
+ newloc->inode = inode_new (oldloc->inode->table);
+
+ if (!newloc->inode) {
+ ret = -1;
+ goto out;
+ }
+
+ newloc->parent = inode_ref (oldloc->inode);
+ uuid_copy (newloc->pargfid, oldloc->inode->gfid);
+
+ len = strlen (oldloc->path);
+
+ if (oldloc->path [len - 1] == '/')
+ ret = gf_asprintf ((char **) &path, "%s%s",
+ oldloc->path, name);
+ else
+ ret = gf_asprintf ((char **) &path, "%s/%s",
+ oldloc->path, name);
+
+ if (ret < 0)
+ goto out;
+
+ newloc->path = path;
+
+ newloc->name = strrchr (newloc->path, '/');
+
+ if (newloc->name)
+ newloc->name++;
+
+ gf_log (this->name, GF_LOG_DEBUG, "path = %s name =%s",
+ newloc->path, newloc->name);
+out:
+ return ret;
+}
+
+int32_t
+mq_dirty_inode_updation_done (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ QUOTA_STACK_DESTROY (frame, this);
+
+ return 0;
+}
+
+int32_t
+mq_release_lock_on_dirty_inode (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ struct gf_flock lock = {0, };
+ quota_local_t *local = NULL;
+ loc_t loc = {0, };
+ int ret = -1;
+
+ local = frame->local;
+
+ if (op_ret == -1) {
+ local->err = -1;
+
+ mq_dirty_inode_updation_done (frame, NULL, this, 0, 0, NULL);
+
+ return 0;
+ }
+
+ if (op_ret == 0)
+ local->ctx->dirty = 0;
+
+ lock.l_type = F_UNLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+ lock.l_pid = 0;
+
+ ret = loc_copy (&loc, &local->loc);
+ if (ret == -1) {
+ local->err = -1;
+ frame->local = NULL;
+ mq_dirty_inode_updation_done (frame, NULL, this, 0, 0, NULL);
+ return 0;
+ }
+
+ if (local->loc.inode == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Inode is NULL, so can't stackwind.");
+ goto out;
+ }
+
+ STACK_WIND (frame,
+ mq_dirty_inode_updation_done,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->inodelk,
+ this->name, &loc, F_SETLKW, &lock, NULL);
+
+ loc_wipe (&loc);
+
+ return 0;
+out:
+ mq_dirty_inode_updation_done (frame, NULL, this, -1, 0, NULL);
+
+ return 0;
+}
+
+int32_t
+mq_mark_inode_undirty (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ int32_t ret = -1;
+ int64_t *size = NULL;
+ dict_t *newdict = NULL;
+ quota_local_t *local = NULL;
+
+ local = (quota_local_t *) frame->local;
+
+ if (op_ret == -1)
+ goto err;
+
+ if (!dict)
+ goto wind;
+
+ ret = dict_get_bin (dict, QUOTA_SIZE_KEY, (void **) &size);
+ if (ret)
+ goto wind;
+
+ LOCK (&local->ctx->lock);
+ {
+ local->ctx->size = ntoh64 (*size);
+ }
+ UNLOCK (&local->ctx->lock);
+
+wind:
+ newdict = dict_new ();
+ if (!newdict)
+ goto err;
+
+ ret = dict_set_int8 (newdict, QUOTA_DIRTY_KEY, 0);
+ if (ret)
+ goto err;
+
+ if (uuid_is_null (local->loc.gfid))
+ uuid_copy (local->loc.gfid, local->loc.inode->gfid);
+
+ GF_UUID_ASSERT (local->loc.gfid);
+ STACK_WIND (frame, mq_release_lock_on_dirty_inode,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr,
+ &local->loc, newdict, 0, NULL);
+ ret = 0;
+
+err:
+ if (op_ret == -1 || ret == -1) {
+ local->err = -1;
+
+ mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0, NULL);
+ }
+
+ if (newdict)
+ dict_unref (newdict);
+
+ return 0;
+}
+
+int32_t
+mq_update_size_xattr (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *dict, struct iatt *postparent)
+{
+ int32_t ret = -1;
+ dict_t *new_dict = NULL;
+ int64_t *size = NULL;
+ int64_t *delta = NULL;
+ quota_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret == -1)
+ goto err;
+
+ if (dict == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Dict is null while updating the size xattr %s",
+ local->loc.path?local->loc.path:"");
+ goto err;
+ }
+
+ ret = dict_get_bin (dict, QUOTA_SIZE_KEY, (void **) &size);
+ if (!size) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to get the size, %s",
+ local->loc.path?local->loc.path:"");
+ goto err;
+ }
+
+ QUOTA_ALLOC_OR_GOTO (delta, int64_t, ret, err);
+
+ *delta = hton64 (local->sum - ntoh64 (*size));
+
+ gf_log (this->name, GF_LOG_DEBUG, "calculated size = %"PRId64", "
+ "original size = %"PRIu64
+ " path = %s diff = %"PRIu64, local->sum, ntoh64 (*size),
+ local->loc.path, ntoh64 (*delta));
+
+ new_dict = dict_new ();
+ if (!new_dict);
+
+ ret = dict_set_bin (new_dict, QUOTA_SIZE_KEY, delta, 8);
+ if (ret)
+ goto err;
+
+ if (uuid_is_null (local->loc.gfid))
+ uuid_copy (local->loc.gfid, buf->ia_gfid);
+
+ GF_UUID_ASSERT (local->loc.gfid);
+
+ STACK_WIND (frame, mq_mark_inode_undirty, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->xattrop, &local->loc,
+ GF_XATTROP_ADD_ARRAY64, new_dict, NULL);
+
+ ret = 0;
+
+err:
+ if (op_ret == -1 || ret == -1) {
+ local->err = -1;
+
+ mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0, NULL);
+ }
+
+ if (new_dict)
+ dict_unref (new_dict);
+
+ return 0;
+}
+
+int32_t
+mq_test_and_set_local_err(quota_local_t *local,
+ int32_t *val)
+{
+ int tmp = 0;
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("marker", local, out);
+ GF_VALIDATE_OR_GOTO ("marker", val, out);
+
+ LOCK (&local->lock);
+ {
+ tmp = local->err;
+ local->err = *val;
+ *val = tmp;
+ }
+ UNLOCK (&local->lock);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int32_t
+mq_get_dirty_inode_size (call_frame_t *frame, xlator_t *this)
+{
+ int32_t ret = -1;
+ dict_t *dict = NULL;
+ quota_local_t *local = NULL;
+
+ local = (quota_local_t *) frame->local;
+
+ dict = dict_new ();
+ if (!dict) {
+ ret = -1;
+ goto err;
+ }
+
+ ret = dict_set_int64 (dict, QUOTA_SIZE_KEY, 0);
+ if (ret)
+ goto err;
+
+ if (uuid_is_null (local->loc.gfid))
+ uuid_copy (local->loc.gfid, local->loc.inode->gfid);
+
+ GF_UUID_ASSERT (local->loc.gfid);
+
+ STACK_WIND (frame, mq_update_size_xattr, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, &local->loc, dict);
+ ret =0;
+
+err:
+ if (ret) {
+ local->err = -1;
+
+ mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0, NULL);
+ }
+
+ if (dict)
+ dict_unref (dict);
+
+ return 0;
+}
+
+int32_t
+mq_get_child_contribution (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct iatt *buf,
+ dict_t *dict,
+ struct iatt *postparent)
+{
+ int32_t ret = -1;
+ int32_t val = 0;
+ char contri_key [512] = {0, };
+ int64_t *contri = NULL;
+ quota_local_t *local = NULL;
+
+ local = frame->local;
+
+ frame->local = NULL;
+
+ QUOTA_STACK_DESTROY (frame, this);
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "%s",
+ strerror (op_errno));
+ val = -2;
+ if (!mq_test_and_set_local_err (local, &val) &&
+ val != -2)
+ mq_release_lock_on_dirty_inode (local->frame, NULL,
+ this, 0, 0, NULL);
+
+ goto exit;
+ }
+
+ ret = mq_get_local_err (local, &val);
+ if (!ret && val == -2)
+ goto exit;
+
+ GET_CONTRI_KEY (contri_key, local->loc.inode->gfid, ret);
+ if (ret < 0)
+ goto out;
+
+ if (!dict)
+ goto out;
+
+ if (dict_get_bin (dict, contri_key, (void **) &contri) == 0)
+ local->sum += ntoh64 (*contri);
+
+out:
+ LOCK (&local->lock);
+ {
+ val = --local->dentry_child_count;
+ }
+ UNLOCK (&local->lock);
+
+ if (val == 0) {
+ mq_dirty_inode_readdir (local->frame, NULL, this,
+ 0, 0, NULL, NULL);
+ }
+ mq_local_unref (this, local);
+
+ return 0;
+exit:
+ mq_local_unref (this, local);
+ return 0;
+}
+
+int32_t
+mq_readdir_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ gf_dirent_t *entries, dict_t *xdata)
+{
+ char contri_key [512] = {0, };
+ int32_t ret = 0;
+ int32_t val = 0;
+ off_t offset = 0;
+ int32_t count = 0;
+ dict_t *dict = NULL;
+ quota_local_t *local = NULL;
+ gf_dirent_t *entry = NULL;
+ call_frame_t *newframe = NULL;
+ loc_t loc = {0, };
+
+ local = mq_local_ref (frame->local);
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "readdir failed %s", strerror (op_errno));
+ local->err = -1;
+
+ mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0, NULL);
+
+ goto end;
+ } else if (op_ret == 0) {
+ mq_get_dirty_inode_size (frame, this);
+
+ goto end;
+ }
+
+ local->dentry_child_count = 0;
+
+ list_for_each_entry (entry, (&entries->list), list) {
+ gf_log (this->name, GF_LOG_DEBUG, "entry = %s", entry->d_name);
+
+ if ((!strcmp (entry->d_name, ".")) || (!strcmp (entry->d_name,
+ ".."))) {
+ gf_log (this->name, GF_LOG_DEBUG, "entry = %s",
+ entry->d_name);
+ continue;
+ }
+
+ offset = entry->d_off;
+ count++;
+ }
+
+ if (count == 0) {
+ mq_get_dirty_inode_size (frame, this);
+ goto end;
+
+ }
+
+ local->frame = frame;
+
+ LOCK (&local->lock);
+ {
+ local->dentry_child_count = count;
+ local->d_off = offset;
+ }
+ UNLOCK (&local->lock);
+
+
+ list_for_each_entry (entry, (&entries->list), list) {
+ gf_log (this->name, GF_LOG_DEBUG, "entry = %s", entry->d_name);
+
+ if ((!strcmp (entry->d_name, ".")) || (!strcmp (entry->d_name,
+ ".."))) {
+ gf_log (this->name, GF_LOG_DEBUG, "entry = %s",
+ entry->d_name);
+ continue;
+ }
+
+ ret = mq_loc_fill_from_name (this, &loc, &local->loc,
+ entry->d_ino, entry->d_name);
+ if (ret < 0)
+ goto out;
+
+ ret = 0;
+
+ LOCK (&local->lock);
+ {
+ if (local->err != -2) {
+ newframe = copy_frame (frame);
+ if (!newframe) {
+ ret = -1;
+ }
+ } else
+ ret = -1;
+ }
+ UNLOCK (&local->lock);
+
+ if (ret == -1)
+ goto out;
+
+ newframe->local = mq_local_ref (local);
+
+ dict = dict_new ();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ GET_CONTRI_KEY (contri_key, local->loc.inode->gfid, ret);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_set_int64 (dict, contri_key, 0);
+ if (ret)
+ goto out;
+
+ STACK_WIND (newframe,
+ mq_get_child_contribution,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup,
+ &loc, dict);
+
+ offset = entry->d_off;
+
+ loc_wipe (&loc);
+
+ newframe = NULL;
+
+ out:
+ if (dict) {
+ dict_unref (dict);
+ dict = NULL;
+ }
+
+ if (ret) {
+ val = -2;
+ mq_test_and_set_local_err (local, &val);
+
+ if (newframe) {
+ newframe->local = NULL;
+ mq_local_unref(this, local);
+ QUOTA_STACK_DESTROY (newframe, this);
+ }
+
+ break;
+ }
+ }
+
+ if (ret && val != -2) {
+ mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0, NULL);
+ }
+end:
+ mq_local_unref (this, local);
+
+ return 0;
+}
+
+int32_t
+mq_dirty_inode_readdir (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ fd_t *fd, dict_t *xdata)
+{
+ quota_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret == -1) {
+ local->err = -1;
+ mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0, NULL);
+ return 0;
+ }
+
+ if (local->fd == NULL)
+ local->fd = fd_ref (fd);
+
+ STACK_WIND (frame,
+ mq_readdir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdir,
+ local->fd, READDIR_BUF, local->d_off, xdata);
+
+ return 0;
+}
+
+int32_t
+mq_check_if_still_dirty (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ inode_t *inode,
+ struct iatt *buf,
+ dict_t *dict,
+ struct iatt *postparent)
+{
+ int8_t dirty = -1;
+ int32_t ret = -1;
+ fd_t *fd = NULL;
+ quota_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to get "
+ "the dirty xattr for %s", local->loc.path);
+ goto err;
+ }
+
+ if (!dict) {
+ ret = -1;
+ goto err;
+ }
+
+ ret = dict_get_int8 (dict, QUOTA_DIRTY_KEY, &dirty);
+ if (ret)
+ goto err;
+
+ //the inode is not dirty anymore
+ if (dirty == 0) {
+ mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0, NULL);
+
+ return 0;
+ }
+
+ fd = fd_create (local->loc.inode, frame->root->pid);
+
+ local->d_off = 0;
+
+ if (uuid_is_null (local->loc.gfid))
+ uuid_copy (local->loc.gfid, buf->ia_gfid);
+
+ GF_UUID_ASSERT (local->loc.gfid);
+ STACK_WIND(frame,
+ mq_dirty_inode_readdir,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->opendir,
+ &local->loc, fd, NULL);
+
+ ret = 0;
+
+err:
+ if (op_ret == -1 || ret == -1) {
+ local->err = -1;
+ mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0, NULL);
+ }
+
+ if (fd != NULL) {
+ fd_unref (fd);
+ }
+
+ return 0;
+}
+
+int32_t
+mq_get_dirty_xattr (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ int32_t ret = -1;
+ dict_t *xattr_req = NULL;
+ quota_local_t *local = NULL;
+
+ if (op_ret == -1) {
+ mq_dirty_inode_updation_done (frame, NULL, this, 0, 0, NULL);
+ return 0;
+ }
+
+ local = frame->local;
+
+ xattr_req = dict_new ();
+ if (xattr_req == NULL) {
+ ret = -1;
+ goto err;
+ }
+
+ ret = dict_set_int8 (xattr_req, QUOTA_DIRTY_KEY, 0);
+ if (ret)
+ goto err;
+
+ if (uuid_is_null (local->loc.gfid))
+ uuid_copy (local->loc.gfid, local->loc.inode->gfid);
+
+ GF_UUID_ASSERT (local->loc.gfid);
+
+ STACK_WIND (frame,
+ mq_check_if_still_dirty,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup,
+ &local->loc,
+ xattr_req);
+ ret = 0;
+
+err:
+ if (ret) {
+ local->err = -1;
+ mq_release_lock_on_dirty_inode(frame, NULL, this, 0, 0, NULL);
+ }
+
+ if (xattr_req)
+ dict_unref (xattr_req);
+
+ return 0;
+}
+
+/* return 1 when dirty updation started
+ * 0 other wise
+ */
+int32_t
+mq_update_dirty_inode (xlator_t *this,
+ loc_t *loc,
+ quota_inode_ctx_t *ctx,
+ inode_contribution_t *contribution)
+{
+ int32_t ret = -1;
+ quota_local_t *local = NULL;
+ gf_boolean_t status = _gf_false;
+ struct gf_flock lock = {0, };
+ call_frame_t *frame = NULL;
+
+ ret = mq_get_ctx_updation_status (ctx, &status);
+ if (ret == -1 || status == _gf_true) {
+ ret = 0;
+ goto out;
+ }
+
+ frame = create_frame (this, this->ctx->pool);
+ if (frame == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ mq_assign_lk_owner (this, frame);
+
+ local = mq_local_new ();
+ if (local == NULL)
+ goto fr_destroy;
+
+ frame->local = local;
+ ret = mq_loc_copy (&local->loc, loc);
+ if (ret < 0)
+ goto fr_destroy;
+
+ local->ctx = ctx;
+
+ local->contri = contribution;
+
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+
+ if (local->loc.inode == NULL) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_WARNING,
+ "Inode is NULL, so can't stackwind.");
+ goto fr_destroy;
+ }
+
+ STACK_WIND (frame,
+ mq_get_dirty_xattr,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->inodelk,
+ this->name, &local->loc, F_SETLKW, &lock, NULL);
+ return 1;
+
+fr_destroy:
+ QUOTA_STACK_DESTROY (frame, this);
+out:
+
+ return 0;
+}
+
+
+int32_t
+mq_inode_creation_done (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ quota_local_t *local = NULL;
+
+ if (frame == NULL)
+ return 0;
+
+ local = frame->local;
+
+ if (local != NULL) {
+ mq_initiate_quota_txn (this, &local->loc);
+ }
+
+ QUOTA_STACK_DESTROY (frame, this);
+
+ return 0;
+}
+
+
+int32_t
+mq_xattr_creation_release_lock (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ struct gf_flock lock = {0, };
+ quota_local_t *local = NULL;
+
+ local = frame->local;
+
+ lock.l_type = F_UNLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+ lock.l_pid = 0;
+
+ STACK_WIND (frame,
+ mq_inode_creation_done,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->inodelk,
+ this->name, &local->loc,
+ F_SETLKW, &lock, NULL);
+
+ return 0;
+}
+
+
+int32_t
+mq_create_dirty_xattr (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ int32_t ret = -1;
+ dict_t *newdict = NULL;
+ quota_local_t *local = NULL;
+
+ if (op_ret < 0) {
+ goto err;
+ }
+
+ local = frame->local;
+
+ if (local->loc.inode->ia_type == IA_IFDIR) {
+ newdict = dict_new ();
+ if (!newdict) {
+ goto err;
+ }
+
+ ret = dict_set_int8 (newdict, QUOTA_DIRTY_KEY, 0);
+ if (ret == -1) {
+ goto err;
+ }
+
+ uuid_copy (local->loc.gfid, local->loc.inode->gfid);
+ GF_UUID_ASSERT (local->loc.gfid);
+
+ STACK_WIND (frame, mq_xattr_creation_release_lock,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr,
+ &local->loc, newdict, 0, NULL);
+ } else {
+ mq_xattr_creation_release_lock (frame, NULL, this, 0, 0, NULL);
+ }
+
+ ret = 0;
+
+err:
+ if (ret < 0) {
+ mq_xattr_creation_release_lock (frame, NULL, this, 0, 0, NULL);
+ }
+
+ if (newdict != NULL)
+ dict_unref (newdict);
+
+ return 0;
+}
+
+
+int32_t
+mq_create_xattr (xlator_t *this, call_frame_t *frame)
+{
+ int32_t ret = 0;
+ int64_t *value = NULL;
+ int64_t *size = NULL;
+ dict_t *dict = NULL;
+ char key[512] = {0, };
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ inode_contribution_t *contri = NULL;
+
+ if (frame == NULL || this == NULL)
+ return 0;
+
+ local = frame->local;
+
+ ret = mq_inode_ctx_get (local->loc.inode, this, &ctx);
+ if (ret < 0) {
+ ctx = mq_inode_ctx_new (local->loc.inode, this);
+ if (ctx == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "mq_inode_ctx_new failed");
+ ret = -1;
+ goto out;
+ }
+ }
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ if (local->loc.inode->ia_type == IA_IFDIR) {
+ QUOTA_ALLOC_OR_GOTO (size, int64_t, ret, err);
+ ret = dict_set_bin (dict, QUOTA_SIZE_KEY, size, 8);
+ if (ret < 0)
+ goto free_size;
+ }
+
+ if (strcmp (local->loc.path, "/") != 0) {
+ contri = mq_add_new_contribution_node (this, ctx, &local->loc);
+ if (contri == NULL)
+ goto err;
+
+ QUOTA_ALLOC_OR_GOTO (value, int64_t, ret, err);
+ GET_CONTRI_KEY (key, local->loc.parent->gfid, ret);
+
+ ret = dict_set_bin (dict, key, value, 8);
+ if (ret < 0)
+ goto free_value;
+ }
+
+ GF_UUID_ASSERT (local->loc.gfid);
+
+ STACK_WIND (frame, mq_create_dirty_xattr, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->xattrop, &local->loc,
+ GF_XATTROP_ADD_ARRAY64, dict, NULL);
+ ret = 0;
+
+free_size:
+ if (ret < 0) {
+ GF_FREE (size);
+ }
+
+free_value:
+ if (ret < 0) {
+ GF_FREE (value);
+ }
+
+err:
+ dict_unref (dict);
+
+out:
+ if (ret < 0) {
+ mq_xattr_creation_release_lock (frame, NULL, this, 0, 0, NULL);
+ }
+
+ return 0;
+}
+
+
+int32_t
+mq_check_n_set_inode_xattr (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf, dict_t *dict,
+ struct iatt *postparent)
+{
+ quota_local_t *local = NULL;
+ int64_t *size = NULL, *contri = NULL;
+ int8_t dirty = 0;
+ int32_t ret = 0;
+ char contri_key[512] = {0, };
+
+ if (op_ret < 0) {
+ goto out;
+ }
+
+ local = frame->local;
+
+ ret = dict_get_bin (dict, QUOTA_SIZE_KEY, (void **) &size);
+ if (ret < 0)
+ goto create_xattr;
+
+ ret = dict_get_int8 (dict, QUOTA_DIRTY_KEY, &dirty);
+ if (ret < 0)
+ goto create_xattr;
+
+ //check contribution xattr if not root
+ if (strcmp (local->loc.path, "/") != 0) {
+ GET_CONTRI_KEY (contri_key, local->loc.parent->gfid, ret);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_get_bin (dict, contri_key, (void **) &contri);
+ if (ret < 0)
+ goto create_xattr;
+ }
+
+out:
+ mq_xattr_creation_release_lock (frame, NULL, this, 0, 0, NULL);
+ return 0;
+
+create_xattr:
+ if (uuid_is_null (local->loc.gfid)) {
+ uuid_copy (local->loc.gfid, buf->ia_gfid);
+ }
+
+ mq_create_xattr (this, frame);
+ return 0;
+}
+
+
+int32_t
+mq_get_xattr (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dict_t *xattr_req = NULL;
+ quota_local_t *local = NULL;
+ int32_t ret = 0;
+
+ if (op_ret < 0) {
+ goto lock_err;
+ }
+
+ local = frame->local;
+
+ xattr_req = dict_new ();
+ if (xattr_req == NULL) {
+ goto err;
+ }
+
+ ret = mq_req_xattr (this, &local->loc, xattr_req);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING, "cannot request xattr");
+ goto err;
+ }
+
+ if (uuid_is_null (local->loc.gfid))
+ uuid_copy (local->loc.gfid, local->loc.inode->gfid);
+
+ GF_UUID_ASSERT (local->loc.gfid);
+
+ STACK_WIND (frame, mq_check_n_set_inode_xattr, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, &local->loc, xattr_req);
+
+ dict_unref (xattr_req);
+
+ return 0;
+
+err:
+ mq_xattr_creation_release_lock (frame, NULL, this, 0, 0, NULL);
+
+ if (xattr_req)
+ dict_unref (xattr_req);
+ return 0;
+
+lock_err:
+ mq_inode_creation_done (frame, NULL, this, 0, 0, NULL);
+ return 0;
+}
+
+
+int32_t
+mq_set_inode_xattr (xlator_t *this, loc_t *loc)
+{
+ struct gf_flock lock = {0, };
+ quota_local_t *local = NULL;
+ int32_t ret = 0;
+ call_frame_t *frame = NULL;
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto err;
+ }
+
+ local = mq_local_new ();
+ if (local == NULL) {
+ goto err;
+ }
+
+ frame->local = local;
+
+ ret = loc_copy (&local->loc, loc);
+ if (ret < 0) {
+ goto err;
+ }
+
+ frame->local = local;
+
+ lock.l_len = 0;
+ lock.l_start = 0;
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+
+ STACK_WIND (frame,
+ mq_get_xattr,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->inodelk,
+ this->name, &local->loc, F_SETLKW, &lock, NULL);
+
+ return 0;
+
+err:
+ QUOTA_STACK_DESTROY (frame, this);
+
+ return 0;
+}
+
+
+int32_t
+mq_get_parent_inode_local (xlator_t *this, quota_local_t *local)
+{
+ int32_t ret = -1;
+ quota_inode_ctx_t *ctx = NULL;
+
+ GF_VALIDATE_OR_GOTO ("marker", this, out);
+ GF_VALIDATE_OR_GOTO ("marker", local, out);
+
+ local->contri = NULL;
+
+ loc_wipe (&local->loc);
+
+ ret = mq_loc_copy (&local->loc, &local->parent_loc);
+ if (ret < 0) {
+ gf_log_callingfn (this->name, GF_LOG_WARNING,
+ "loc copy failed");
+ goto out;
+ }
+
+ loc_wipe (&local->parent_loc);
+
+ ret = mq_inode_loc_fill (NULL, local->loc.parent,
+ &local->parent_loc);
+ if (ret < 0) {
+ gf_log_callingfn (this->name, GF_LOG_WARNING,
+ "failed to build parent loc of %s",
+ local->loc.path);
+ goto out;
+ }
+
+ ret = mq_inode_ctx_get (local->loc.inode, this, &ctx);
+ if (ret < 0) {
+ gf_log_callingfn (this->name, GF_LOG_WARNING,
+ "inode ctx get failed");
+ goto out;
+ }
+
+ local->ctx = ctx;
+
+ if (list_empty (&ctx->contribution_head)) {
+ gf_log_callingfn (this->name, GF_LOG_WARNING,
+ "contribution node list is empty which "
+ "is an error");
+ ret = -1;
+ goto out;
+ }
+
+ local->contri = (inode_contribution_t *) ctx->contribution_head.next;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+int32_t
+mq_xattr_updation_done (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ QUOTA_STACK_DESTROY (frame, this);
+ return 0;
+}
+
+
+int32_t
+mq_inodelk_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ int32_t ret = 0;
+ gf_boolean_t status = _gf_false;
+ quota_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret == -1 || local->err) {
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "unlocking failed on path (%s)(%s)",
+ local->parent_loc.path, strerror (op_errno));
+ }
+ mq_xattr_updation_done (frame, NULL, this, 0, 0, NULL, NULL);
+
+ return 0;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "inodelk released on %s", local->parent_loc.path);
+
+ if ((strcmp (local->parent_loc.path, "/") == 0)
+ || (local->delta == 0)) {
+ mq_xattr_updation_done (frame, NULL, this, 0, 0, NULL, NULL);
+ } else {
+ ret = mq_get_parent_inode_local (this, local);
+ if (ret < 0) {
+ mq_xattr_updation_done (frame, NULL, this, 0, 0, NULL,
+ NULL);
+ goto out;
+ }
+ status = _gf_true;
+
+ ret = mq_test_and_set_ctx_updation_status (local->ctx, &status);
+ if (ret == 0 && status == _gf_false) {
+ mq_get_lock_on_parent (frame, this);
+ } else {
+ mq_xattr_updation_done (frame, NULL, this, 0, 0, NULL,
+ NULL);
+ }
+ }
+out:
+ return 0;
+}
+
+
+//now release lock on the parent inode
+int32_t
+mq_release_parent_lock (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ int32_t ret = 0;
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ struct gf_flock lock = {0, };
+
+ local = frame->local;
+
+ if (local->err != 0) {
+ gf_log_callingfn (this->name,
+ (local->err == ENOENT) ? GF_LOG_DEBUG
+ : GF_LOG_WARNING,
+ "An operation during quota updation "
+ "of path (%s) failed (%s)", local->loc.path,
+ strerror (local->err));
+ }
+
+ ret = mq_inode_ctx_get (local->parent_loc.inode, this, &ctx);
+ if (ret < 0)
+ goto wind;
+
+ LOCK (&ctx->lock);
+ {
+ ctx->dirty = 0;
+ }
+ UNLOCK (&ctx->lock);
+
+ if (local->parent_loc.inode == NULL) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Invalid parent inode.");
+ goto err;
+ }
+
+wind:
+ lock.l_type = F_UNLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+ lock.l_pid = 0;
+
+ STACK_WIND (frame,
+ mq_inodelk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->inodelk,
+ this->name, &local->parent_loc,
+ F_SETLKW, &lock, NULL);
+
+ return 0;
+err:
+ mq_xattr_updation_done (frame, NULL, this,
+ 0, 0 , NULL, NULL);
+ return 0;
+}
+
+
+int32_t
+mq_mark_undirty (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ int32_t ret = -1;
+ int64_t *size = NULL;
+ dict_t *newdict = NULL;
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+
+ local = frame->local;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING, "%s occurred while"
+ " updating the size of %s", strerror (op_errno),
+ local->parent_loc.path);
+
+ goto err;
+ }
+
+ //update the size of the parent inode
+ if (dict != NULL) {
+ ret = mq_inode_ctx_get (local->parent_loc.inode, this, &ctx);
+ if (ret < 0) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ ret = dict_get_bin (dict, QUOTA_SIZE_KEY, (void **) &size);
+ if (ret < 0) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ LOCK (&ctx->lock);
+ {
+ if (size)
+ ctx->size = ntoh64 (*size);
+ gf_log (this->name, GF_LOG_DEBUG, "%s %"PRId64,
+ local->parent_loc.path, ctx->size);
+ }
+ UNLOCK (&ctx->lock);
+ }
+
+ newdict = dict_new ();
+ if (!newdict) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ ret = dict_set_int8 (newdict, QUOTA_DIRTY_KEY, 0);
+
+ if (ret == -1) {
+ op_errno = -ret;
+ goto err;
+ }
+
+ uuid_copy (local->parent_loc.gfid, local->parent_loc.inode->gfid);
+ GF_UUID_ASSERT (local->parent_loc.gfid);
+
+ STACK_WIND (frame, mq_release_parent_lock,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr,
+ &local->parent_loc, newdict, 0, NULL);
+
+ ret = 0;
+err:
+ if (op_ret == -1 || ret == -1) {
+ local->err = op_errno;
+
+ mq_release_parent_lock (frame, NULL, this, 0, 0, NULL);
+ }
+
+ if (newdict)
+ dict_unref (newdict);
+
+ return 0;
+}
+
+
+int32_t
+mq_update_parent_size (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ dict_t *dict, dict_t *xdata)
+{
+ int64_t *size = NULL;
+ int32_t ret = -1;
+ dict_t *newdict = NULL;
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+
+ local = frame->local;
+
+ if (op_ret == -1) {
+ gf_log (this->name, ((op_errno == ENOENT) ? GF_LOG_DEBUG :
+ GF_LOG_WARNING),
+ "xattrop call failed: %s", strerror (op_errno));
+
+ goto err;
+ }
+
+ LOCK (&local->contri->lock);
+ {
+ local->contri->contribution += local->delta;
+ }
+ UNLOCK (&local->contri->lock);
+
+ gf_log (this->name, GF_LOG_DEBUG, "%s %"PRId64 "%"PRId64,
+ local->loc.path, local->ctx->size,
+ local->contri->contribution);
+
+ if (dict == NULL) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ ret = mq_inode_ctx_get (local->parent_loc.inode, this, &ctx);
+ if (ret < 0) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ newdict = dict_new ();
+ if (!newdict) {
+ op_errno = ENOMEM;
+ ret = -1;
+ goto err;
+ }
+
+ QUOTA_ALLOC_OR_GOTO (size, int64_t, ret, err);
+
+ *size = hton64 (local->delta);
+
+ ret = dict_set_bin (newdict, QUOTA_SIZE_KEY, size, 8);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto err;
+ }
+
+ if (uuid_is_null (local->parent_loc.gfid))
+ uuid_copy (local->parent_loc.gfid,
+ local->parent_loc.inode->gfid);
+ GF_UUID_ASSERT (local->parent_loc.gfid);
+
+ STACK_WIND (frame,
+ mq_mark_undirty,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->xattrop,
+ &local->parent_loc,
+ GF_XATTROP_ADD_ARRAY64,
+ newdict, NULL);
+ ret = 0;
+err:
+ if (op_ret == -1 || ret < 0) {
+ local->err = op_errno;
+ mq_release_parent_lock (frame, NULL, this, 0, 0, NULL);
+ }
+
+ if (newdict)
+ dict_unref (newdict);
+
+ return 0;
+}
+
+int32_t
+mq_update_inode_contribution (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *dict,
+ struct iatt *postparent)
+{
+ int32_t ret = -1;
+ int64_t *size = NULL, size_int = 0, contri_int = 0;
+ int64_t *contri = NULL;
+ int64_t *delta = NULL;
+ char contri_key [512] = {0, };
+ dict_t *newdict = NULL;
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ inode_contribution_t *contribution = NULL;
+
+ local = frame->local;
+
+ if (op_ret == -1) {
+ gf_log (this->name, ((op_errno == ENOENT) ? GF_LOG_DEBUG :
+ GF_LOG_WARNING),
+ "failed to get size and contribution of path (%s)(%s)",
+ local->loc.path, strerror (op_errno));
+ goto err;
+ }
+
+ ctx = local->ctx;
+ contribution = local->contri;
+
+ //prepare to update size & contribution of the inode
+ GET_CONTRI_KEY (contri_key, contribution->gfid, ret);
+ if (ret == -1) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ LOCK (&ctx->lock);
+ {
+ if (local->loc.inode->ia_type == IA_IFDIR ) {
+ ret = dict_get_bin (dict, QUOTA_SIZE_KEY,
+ (void **) &size);
+ if (ret < 0) {
+ op_errno = EINVAL;
+ goto unlock;
+ }
+
+ ctx->size = ntoh64 (*size);
+ } else
+ ctx->size = buf->ia_blocks * 512;
+
+ size_int = ctx->size;
+ }
+unlock:
+ UNLOCK (&ctx->lock);
+
+ if (ret < 0) {
+ goto err;
+ }
+
+ ret = dict_get_bin (dict, contri_key, (void **) &contri);
+
+ LOCK (&contribution->lock);
+ {
+ if (ret < 0)
+ contribution->contribution = 0;
+ else
+ contribution->contribution = ntoh64 (*contri);
+
+ contri_int = contribution->contribution;
+ }
+ UNLOCK (&contribution->lock);
+
+ gf_log (this->name, GF_LOG_DEBUG, "%s %"PRId64 "%"PRId64,
+ local->loc.path, size_int, contri_int);
+
+ local->delta = size_int - contri_int;
+
+ if (local->delta == 0) {
+ mq_mark_undirty (frame, NULL, this, 0, 0, NULL, NULL);
+ return 0;
+ }
+
+ newdict = dict_new ();
+ if (newdict == NULL) {
+ op_errno = ENOMEM;
+ ret = -1;
+ goto err;
+ }
+
+ QUOTA_ALLOC_OR_GOTO (delta, int64_t, ret, err);
+
+ *delta = hton64 (local->delta);
+
+ ret = dict_set_bin (newdict, contri_key, delta, 8);
+ if (ret < 0) {
+ op_errno = -ret;
+ ret = -1;
+ goto err;
+ }
+
+ if (uuid_is_null (local->loc.gfid))
+ uuid_copy (local->loc.gfid, buf->ia_gfid);
+
+ GF_UUID_ASSERT (local->loc.gfid);
+
+ STACK_WIND (frame,
+ mq_update_parent_size,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->xattrop,
+ &local->loc,
+ GF_XATTROP_ADD_ARRAY64,
+ newdict, NULL);
+ ret = 0;
+
+err:
+ if (op_ret == -1 || ret < 0) {
+ local->err = op_errno;
+
+ mq_release_parent_lock (frame, NULL, this, 0, 0, NULL);
+ }
+
+ if (newdict)
+ dict_unref (newdict);
+
+ return 0;
+}
+
+int32_t
+mq_fetch_child_size_and_contri (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ int32_t ret = -1;
+ char contri_key [512] = {0, };
+ dict_t *newdict = NULL;
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+
+ local = frame->local;
+
+ if (op_ret == -1) {
+ gf_log (this->name, (op_errno == ENOENT) ? GF_LOG_DEBUG
+ : GF_LOG_WARNING,
+ "couldnt mark inode corresponding to path (%s) dirty "
+ "(%s)", local->parent_loc.path, strerror (op_errno));
+ goto err;
+ }
+
+ VALIDATE_OR_GOTO (local->ctx, err);
+ VALIDATE_OR_GOTO (local->contri, err);
+
+ gf_log (this->name, GF_LOG_DEBUG, "%s marked dirty", local->parent_loc.path);
+
+ //update parent ctx
+ ret = mq_inode_ctx_get (local->parent_loc.inode, this, &ctx);
+ if (ret == -1) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ LOCK (&ctx->lock);
+ {
+ ctx->dirty = 1;
+ }
+ UNLOCK (&ctx->lock);
+
+ newdict = dict_new ();
+ if (newdict == NULL) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ if (local->loc.inode->ia_type == IA_IFDIR) {
+ ret = dict_set_int64 (newdict, QUOTA_SIZE_KEY, 0);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dict_set failed.");
+ goto err;
+ }
+ }
+
+ GET_CONTRI_KEY (contri_key, local->contri->gfid, ret);
+ if (ret < 0) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ ret = dict_set_int64 (newdict, contri_key, 0);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dict_set failed.");
+ goto err;
+ }
+
+ mq_set_ctx_updation_status (local->ctx, _gf_false);
+
+ if (uuid_is_null (local->loc.gfid))
+ uuid_copy (local->loc.gfid, local->loc.inode->gfid);
+
+ GF_UUID_ASSERT (local->loc.gfid);
+
+ STACK_WIND (frame, mq_update_inode_contribution, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, &local->loc, newdict);
+
+ ret = 0;
+
+err:
+ if ((op_ret == -1) || (ret < 0)) {
+ local->err = op_errno;
+
+ mq_set_ctx_updation_status (local->ctx, _gf_false);
+
+ mq_release_parent_lock (frame, NULL, this, 0, 0, NULL);
+ }
+
+ if (newdict)
+ dict_unref (newdict);
+
+ return 0;
+}
+
+int32_t
+mq_markdirty (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ int32_t ret = -1;
+ dict_t *dict = NULL;
+ quota_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret == -1){
+ gf_log (this->name, (op_errno == ENOENT) ? GF_LOG_DEBUG
+ : GF_LOG_WARNING, "acquiring locks failed on %s (%s)",
+ local->parent_loc.path, strerror (op_errno));
+
+ local->err = op_errno;
+
+ mq_set_ctx_updation_status (local->ctx, _gf_false);
+
+ mq_inodelk_cbk (frame, NULL, this, 0, 0, NULL);
+
+ return 0;
+ }
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "inodelk succeeded on %s", local->parent_loc.path);
+
+ dict = dict_new ();
+ if (!dict) {
+ ret = -1;
+ goto err;
+ }
+
+ ret = dict_set_int8 (dict, QUOTA_DIRTY_KEY, 1);
+ if (ret == -1)
+ goto err;
+
+ uuid_copy (local->parent_loc.gfid,
+ local->parent_loc.inode->gfid);
+ GF_UUID_ASSERT (local->parent_loc.gfid);
+
+ STACK_WIND (frame, mq_fetch_child_size_and_contri,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr,
+ &local->parent_loc, dict, 0, NULL);
+
+ ret = 0;
+err:
+ if (ret == -1) {
+ local->err = 1;
+
+ mq_set_ctx_updation_status (local->ctx, _gf_false);
+
+ mq_release_parent_lock (frame, NULL, this, 0, 0, NULL);
+ }
+
+ if (dict)
+ dict_unref (dict);
+
+ return 0;
+}
+
+
+int32_t
+mq_get_lock_on_parent (call_frame_t *frame, xlator_t *this)
+{
+ struct gf_flock lock = {0, };
+ quota_local_t *local = NULL;
+
+ GF_VALIDATE_OR_GOTO ("marker", frame, fr_destroy);
+
+ local = frame->local;
+ gf_log (this->name, GF_LOG_DEBUG, "taking lock on %s",
+ local->parent_loc.path);
+
+ if (local->parent_loc.inode == NULL) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "parent inode is not valid, aborting "
+ "transaction.");
+ goto fr_destroy;
+ }
+
+ lock.l_len = 0;
+ lock.l_start = 0;
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+
+ STACK_WIND (frame,
+ mq_markdirty,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->inodelk,
+ this->name, &local->parent_loc, F_SETLKW, &lock, NULL);
+
+ return 0;
+
+fr_destroy:
+ QUOTA_STACK_DESTROY (frame, this);
+
+ return -1;
+}
+
+
+int
+mq_start_quota_txn (xlator_t *this, loc_t *loc,
+ quota_inode_ctx_t *ctx,
+ inode_contribution_t *contri)
+{
+ int32_t ret = -1;
+ call_frame_t *frame = NULL;
+ quota_local_t *local = NULL;
+
+ frame = create_frame (this, this->ctx->pool);
+ if (frame == NULL)
+ goto err;
+
+ mq_assign_lk_owner (this, frame);
+
+ local = mq_local_new ();
+ if (local == NULL)
+ goto fr_destroy;
+
+ frame->local = local;
+
+ ret = mq_loc_copy (&local->loc, loc);
+ if (ret < 0)
+ goto fr_destroy;
+
+ ret = mq_inode_loc_fill (NULL, local->loc.parent,
+ &local->parent_loc);
+ if (ret < 0)
+ goto fr_destroy;
+
+ local->ctx = ctx;
+ local->contri = contri;
+
+ ret = mq_get_lock_on_parent (frame, this);
+ if (ret == -1)
+ goto err;
+
+ return 0;
+
+fr_destroy:
+ QUOTA_STACK_DESTROY (frame, this);
+err:
+ mq_set_ctx_updation_status (ctx, _gf_false);
+
+ return -1;
+}
+
+
+int
+mq_initiate_quota_txn (xlator_t *this, loc_t *loc)
+{
+ int32_t ret = -1;
+ gf_boolean_t status = _gf_false;
+ quota_inode_ctx_t *ctx = NULL;
+ inode_contribution_t *contribution = NULL;
+
+ GF_VALIDATE_OR_GOTO ("marker", this, out);
+ GF_VALIDATE_OR_GOTO ("marker", loc, out);
+ GF_VALIDATE_OR_GOTO ("marker", loc->inode, out);
+
+ ret = mq_inode_ctx_get (loc->inode, this, &ctx);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "inode ctx get failed, aborting quota txn");
+ ret = -1;
+ goto out;
+ }
+
+ contribution = mq_get_contribution_node (loc->parent, ctx);
+ if (contribution == NULL)
+ goto out;
+
+ /* To improve performance, donot start another transaction
+ * if one is already in progress for same inode
+ */
+ status = _gf_true;
+
+ ret = mq_test_and_set_ctx_updation_status (ctx, &status);
+ if (ret < 0)
+ goto out;
+
+ if (status == _gf_false) {
+ mq_start_quota_txn (this, loc, ctx, contribution);
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+/* int32_t */
+/* validate_inode_size_contribution (xlator_t *this, loc_t *loc, int64_t size, */
+/* int64_t contribution) */
+/* { */
+/* if (size != contribution) { */
+/* mq_initiate_quota_txn (this, loc); */
+/* } */
+
+/* return 0; */
+/* } */
+
+
+int32_t
+mq_inspect_directory_xattr (xlator_t *this,
+ loc_t *loc,
+ dict_t *dict,
+ struct iatt buf)
+{
+ int32_t ret = 0;
+ int8_t dirty = -1;
+ int64_t *size = NULL, size_int = 0;
+ int64_t *contri = NULL, contri_int = 0;
+ char contri_key [512] = {0, };
+ gf_boolean_t not_root = _gf_false;
+ quota_inode_ctx_t *ctx = NULL;
+ inode_contribution_t *contribution = NULL;
+
+ ret = mq_inode_ctx_get (loc->inode, this, &ctx);
+ if (ret < 0) {
+ ctx = mq_inode_ctx_new (loc->inode, this);
+ if (ctx == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "mq_inode_ctx_new failed");
+ ret = -1;
+ goto err;
+ }
+ }
+
+ if (strcmp (loc->path, "/") != 0) {
+ contribution = mq_add_new_contribution_node (this, ctx, loc);
+ if (contribution == NULL) {
+ if (!uuid_is_null (loc->inode->gfid))
+ gf_log (this->name, GF_LOG_WARNING,
+ "cannot add a new contribution node");
+ ret = -1;
+ goto err;
+ }
+ }
+
+ ret = dict_get_bin (dict, QUOTA_SIZE_KEY, (void **) &size);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_get_int8 (dict, QUOTA_DIRTY_KEY, &dirty);
+ if (ret < 0)
+ goto out;
+
+ if (strcmp (loc->path, "/") != 0) {
+ not_root = _gf_true;
+
+ GET_CONTRI_KEY (contri_key, contribution->gfid, ret);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_get_bin (dict, contri_key, (void **) &contri);
+ if (ret < 0)
+ goto out;
+
+ LOCK (&contribution->lock);
+ {
+ contribution->contribution = ntoh64 (*contri);
+ contri_int = contribution->contribution;
+ }
+ UNLOCK (&contribution->lock);
+ }
+
+ LOCK (&ctx->lock);
+ {
+ ctx->size = ntoh64 (*size);
+ ctx->dirty = dirty;
+ size_int = ctx->size;
+ }
+ UNLOCK (&ctx->lock);
+
+ gf_log (this->name, GF_LOG_DEBUG, "size=%"PRId64
+ " contri=%"PRId64, size_int, contri_int);
+
+ if (dirty) {
+ ret = mq_update_dirty_inode (this, loc, ctx, contribution);
+ }
+
+ if ((!dirty || ret == 0) && (not_root == _gf_true) &&
+ (size_int != contri_int)) {
+ mq_initiate_quota_txn (this, loc);
+ }
+
+ ret = 0;
+out:
+ if (ret)
+ mq_set_inode_xattr (this, loc);
+err:
+ return ret;
+}
+
+int32_t
+mq_inspect_file_xattr (xlator_t *this,
+ loc_t *loc,
+ dict_t *dict,
+ struct iatt buf)
+{
+ int32_t ret = -1;
+ uint64_t contri_int = 0, size = 0;
+ int64_t *contri_ptr = NULL;
+ char contri_key [512] = {0, };
+ quota_inode_ctx_t *ctx = NULL;
+ inode_contribution_t *contribution = NULL;
+
+ ret = mq_inode_ctx_get (loc->inode, this, &ctx);
+ if (ret < 0) {
+ ctx = mq_inode_ctx_new (loc->inode, this);
+ if (ctx == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "mq_inode_ctx_new failed");
+ ret = -1;
+ goto out;
+ }
+ }
+
+ contribution = mq_add_new_contribution_node (this, ctx, loc);
+ if (contribution == NULL)
+ goto out;
+
+ LOCK (&ctx->lock);
+ {
+ ctx->size = 512 * buf.ia_blocks;
+ size = ctx->size;
+ }
+ UNLOCK (&ctx->lock);
+
+ list_for_each_entry (contribution, &ctx->contribution_head,
+ contri_list) {
+ GET_CONTRI_KEY (contri_key, contribution->gfid, ret);
+ if (ret < 0)
+ continue;
+
+ ret = dict_get_bin (dict, contri_key, (void **) &contri_int);
+ if (ret == 0) {
+ contri_ptr = (int64_t *)(unsigned long)contri_int;
+
+ LOCK (&contribution->lock);
+ {
+ contribution->contribution = ntoh64 (*contri_ptr);
+ contri_int = contribution->contribution;
+ }
+ UNLOCK (&contribution->lock);
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "size=%"PRId64 " contri=%"PRId64, size, contri_int);
+
+ if (size != contri_int) {
+ mq_initiate_quota_txn (this, loc);
+ }
+ } else
+ mq_initiate_quota_txn (this, loc);
+ }
+
+out:
+ return ret;
+}
+
+int32_t
+mq_xattr_state (xlator_t *this,
+ loc_t *loc,
+ dict_t *dict,
+ struct iatt buf)
+{
+ if (buf.ia_type == IA_IFREG ||
+ buf.ia_type == IA_IFLNK) {
+ mq_inspect_file_xattr (this, loc, dict, buf);
+ } else if (buf.ia_type == IA_IFDIR)
+ mq_inspect_directory_xattr (this, loc, dict, buf);
+
+ return 0;
+}
+
+int32_t
+mq_req_xattr (xlator_t *this,
+ loc_t *loc,
+ dict_t *dict)
+{
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("marker", this, out);
+ GF_VALIDATE_OR_GOTO ("marker", dict, out);
+
+ if (!loc)
+ goto set_size;
+
+ //if not "/" then request contribution
+ if (strcmp (loc->path, "/") == 0)
+ goto set_size;
+
+ ret = mq_dict_set_contribution (this, dict, loc);
+ if (ret == -1)
+ goto out;
+
+set_size:
+ ret = dict_set_uint64 (dict, QUOTA_SIZE_KEY, 0);
+ if (ret < 0) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_int8 (dict, QUOTA_DIRTY_KEY, 0);
+ if (ret < 0) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+
+int32_t
+mq_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ QUOTA_STACK_DESTROY (frame, this);
+
+ return 0;
+}
+
+int32_t
+_mq_inode_remove_done (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ int32_t ret = 0;
+ char contri_key [512] = {0, };
+ quota_local_t *local = NULL;
+
+ local = (quota_local_t *) frame->local;
+
+ if (op_ret == -1 || local->err == -1) {
+ mq_removexattr_cbk (frame, NULL, this, -1, 0, NULL);
+ return 0;
+ }
+
+ frame->local = NULL;
+
+ if (local->hl_count > 1) {
+ GET_CONTRI_KEY (contri_key, local->contri->gfid, ret);
+
+ STACK_WIND (frame, mq_removexattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr,
+ &local->loc, contri_key, NULL);
+ ret = 0;
+ } else {
+ mq_removexattr_cbk (frame, NULL, this, 0, 0, NULL);
+ }
+
+ if (strcmp (local->parent_loc.path, "/") != 0) {
+ ret = mq_get_parent_inode_local (this, local);
+ if (ret < 0)
+ goto out;
+
+ mq_start_quota_txn (this, &local->loc, local->ctx, local->contri);
+ }
+out:
+ mq_local_unref (this, local);
+
+ return 0;
+}
+
+int32_t
+mq_inode_remove_done (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ int32_t ret = -1;
+ struct gf_flock lock = {0, };
+ quota_inode_ctx_t *ctx = NULL;
+ quota_local_t *local = NULL;
+ int64_t contribution = 0;
+
+ local = frame->local;
+ if (op_ret == -1)
+ local->err = -1;
+
+ ret = mq_inode_ctx_get (local->parent_loc.inode, this, &ctx);
+
+ LOCK (&local->contri->lock);
+ {
+ contribution = local->contri->contribution;
+ }
+ UNLOCK (&local->contri->lock);
+
+ if (contribution == local->size) {
+ if (ret == 0) {
+ LOCK (&ctx->lock);
+ {
+ ctx->size -= contribution;
+ }
+ UNLOCK (&ctx->lock);
+
+ LOCK (&local->contri->lock);
+ {
+ local->contri->contribution = 0;
+ }
+ UNLOCK (&local->contri->lock);
+ }
+ }
+
+ lock.l_type = F_UNLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+ lock.l_pid = 0;
+
+ STACK_WIND (frame,
+ _mq_inode_remove_done,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->inodelk,
+ this->name, &local->parent_loc,
+ F_SETLKW, &lock, NULL);
+ return 0;
+}
+
+int32_t
+mq_reduce_parent_size_xattr (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ int32_t ret = -1;
+ int64_t *size = NULL;
+ dict_t *dict = NULL;
+ quota_local_t *local = NULL;
+
+ local = frame->local;
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "inodelk set failed on %s", local->parent_loc.path);
+ QUOTA_STACK_DESTROY (frame, this);
+ return 0;
+ }
+
+ VALIDATE_OR_GOTO (local->contri, err);
+
+ dict = dict_new ();
+ if (dict == NULL) {
+ ret = -1;
+ goto err;
+ }
+
+ QUOTA_ALLOC_OR_GOTO (size, int64_t, ret, err);
+
+ *size = hton64 (-local->size);
+
+ ret = dict_set_bin (dict, QUOTA_SIZE_KEY, size, 8);
+ if (ret < 0)
+ goto err;
+
+ uuid_copy (local->parent_loc.gfid,
+ local->parent_loc.inode->gfid);
+ GF_UUID_ASSERT (local->parent_loc.gfid);
+
+ STACK_WIND (frame, mq_inode_remove_done, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->xattrop, &local->parent_loc,
+ GF_XATTROP_ADD_ARRAY64, dict, NULL);
+ dict_unref (dict);
+ return 0;
+
+err:
+ local->err = 1;
+ mq_inode_remove_done (frame, NULL, this, -1, 0, NULL, NULL);
+ if (dict)
+ dict_unref (dict);
+ return 0;
+}
+
+int32_t
+mq_reduce_parent_size (xlator_t *this, loc_t *loc, int64_t contri)
+{
+ int32_t ret = -1;
+ struct gf_flock lock = {0,};
+ call_frame_t *frame = NULL;
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ inode_contribution_t *contribution = NULL;
+
+ GF_VALIDATE_OR_GOTO ("marker", this, out);
+ GF_VALIDATE_OR_GOTO ("marker", loc, out);
+
+ ret = mq_inode_ctx_get (loc->inode, this, &ctx);
+ if (ret < 0)
+ goto out;
+
+ contribution = mq_get_contribution_node (loc->parent, ctx);
+ if (contribution == NULL)
+ goto out;
+
+ local = mq_local_new ();
+ if (local == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ if (contri >= 0) {
+ local->size = contri;
+ } else {
+ LOCK (&contribution->lock);
+ {
+ local->size = contribution->contribution;
+ }
+ UNLOCK (&contribution->lock);
+ }
+
+ if (local->size == 0) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = mq_loc_copy (&local->loc, loc);
+ if (ret < 0)
+ goto out;
+
+ local->ctx = ctx;
+ local->contri = contribution;
+
+ ret = mq_inode_loc_fill (NULL, loc->parent, &local->parent_loc);
+ if (ret < 0)
+ goto out;
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
+ mq_assign_lk_owner (this, frame);
+
+ frame->local = local;
+
+ lock.l_len = 0;
+ lock.l_start = 0;
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+
+ if (local->parent_loc.inode == NULL) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Inode is NULL, so can't stackwind.");
+ goto out;
+ }
+
+ STACK_WIND (frame,
+ mq_reduce_parent_size_xattr,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->inodelk,
+ this->name, &local->parent_loc, F_SETLKW, &lock, NULL);
+ local = NULL;
+ ret = 0;
+
+out:
+ if (local != NULL)
+ mq_local_unref (this, local);
+
+ return ret;
+}
+
+
+int32_t
+init_quota_priv (xlator_t *this)
+{
+ return 0;
+}
+
+
+int32_t
+mq_rename_update_newpath (xlator_t *this, loc_t *loc)
+{
+ int32_t ret = -1;
+ quota_inode_ctx_t *ctx = NULL;
+ inode_contribution_t *contribution = NULL;
+
+ GF_VALIDATE_OR_GOTO ("marker", this, out);
+ GF_VALIDATE_OR_GOTO ("marker", loc, out);
+ GF_VALIDATE_OR_GOTO ("marker", loc->inode, out);
+
+ ret = mq_inode_ctx_get (loc->inode, this, &ctx);
+ if (ret < 0)
+ goto out;
+
+ contribution = mq_add_new_contribution_node (this, ctx, loc);
+ if (contribution == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ mq_initiate_quota_txn (this, loc);
+out:
+ return ret;
+}
+
+int32_t
+mq_forget (xlator_t *this, quota_inode_ctx_t *ctx)
+{
+ inode_contribution_t *contri = NULL;
+ inode_contribution_t *next = NULL;
+
+ GF_VALIDATE_OR_GOTO ("marker", this, out);
+ GF_VALIDATE_OR_GOTO ("marker", ctx, out);
+
+ list_for_each_entry_safe (contri, next, &ctx->contribution_head,
+ contri_list) {
+ list_del (&contri->contri_list);
+ GF_FREE (contri);
+ }
+
+ LOCK_DESTROY (&ctx->lock);
+ GF_FREE (ctx);
+out:
+ return 0;
+}
diff --git a/xlators/features/marker/src/marker-quota.h b/xlators/features/marker/src/marker-quota.h
new file mode 100644
index 000000000..385760ac4
--- /dev/null
+++ b/xlators/features/marker/src/marker-quota.h
@@ -0,0 +1,130 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _MARKER_QUOTA_H
+#define _MARKER_QUOTA_H
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include "marker-mem-types.h"
+
+#define QUOTA_XATTR_PREFIX "trusted.glusterfs"
+#define QUOTA_DIRTY_KEY "trusted.glusterfs.quota.dirty"
+
+#define CONTRIBUTION "contri"
+#define CONTRI_KEY_MAX 512
+#define READDIR_BUF 4096
+
+
+#define QUOTA_STACK_DESTROY(_frame, _this) \
+ do { \
+ quota_local_t *_local = NULL; \
+ _local = _frame->local; \
+ _frame->local = NULL; \
+ STACK_DESTROY (_frame->root); \
+ mq_local_unref (_this, _local); \
+ } while (0)
+
+
+#define QUOTA_ALLOC(var, type, ret) \
+ do { \
+ ret = 0; \
+ var = GF_CALLOC (sizeof (type), 1, \
+ gf_marker_mt_##type); \
+ if (!var) { \
+ gf_log ("", GF_LOG_ERROR, \
+ "out of memory"); \
+ ret = -1; \
+ } \
+ } while (0);
+
+#define QUOTA_ALLOC_OR_GOTO(var, type, ret, label) \
+ do { \
+ var = GF_CALLOC (sizeof (type), 1, \
+ gf_marker_mt_##type); \
+ if (!var) { \
+ gf_log ("", GF_LOG_ERROR, \
+ "out of memory"); \
+ ret = -1; \
+ goto label; \
+ } \
+ ret = 0; \
+ } while (0);
+
+#define GET_CONTRI_KEY(var, _gfid, _ret) \
+ do { \
+ char _gfid_unparsed[40]; \
+ uuid_unparse (_gfid, _gfid_unparsed); \
+ _ret = snprintf (var, CONTRI_KEY_MAX, QUOTA_XATTR_PREFIX \
+ ".%s.%s." CONTRIBUTION, "quota", \
+ _gfid_unparsed); \
+ } while (0);
+
+#define QUOTA_SAFE_INCREMENT(lock, var) \
+ do { \
+ LOCK (lock); \
+ var ++; \
+ UNLOCK (lock); \
+ } while (0)
+
+struct quota_inode_ctx {
+ int64_t size;
+ int8_t dirty;
+ gf_boolean_t updation_status;
+ gf_lock_t lock;
+ struct list_head contribution_head;
+};
+typedef struct quota_inode_ctx quota_inode_ctx_t;
+
+struct inode_contribution {
+ struct list_head contri_list;
+ int64_t contribution;
+ uuid_t gfid;
+ gf_lock_t lock;
+};
+typedef struct inode_contribution inode_contribution_t;
+
+int32_t
+mq_get_lock_on_parent (call_frame_t *, xlator_t *);
+
+int32_t
+mq_req_xattr (xlator_t *, loc_t *, dict_t *);
+
+int32_t
+init_quota_priv (xlator_t *);
+
+int32_t
+mq_xattr_state (xlator_t *, loc_t *, dict_t *, struct iatt);
+
+int32_t
+mq_set_inode_xattr (xlator_t *, loc_t *);
+
+int
+mq_initiate_quota_txn (xlator_t *, loc_t *);
+
+int32_t
+mq_dirty_inode_readdir (call_frame_t *, void *, xlator_t *,
+ int32_t, int32_t, fd_t *, dict_t *);
+
+int32_t
+mq_reduce_parent_size (xlator_t *, loc_t *, int64_t);
+
+int32_t
+mq_rename_update_newpath (xlator_t *, loc_t *);
+
+int32_t
+mq_inspect_file_xattr (xlator_t *this, loc_t *loc, dict_t *dict, struct iatt buf);
+
+int32_t
+mq_forget (xlator_t *, quota_inode_ctx_t *);
+#endif
diff --git a/xlators/features/marker/src/marker.c b/xlators/features/marker/src/marker.c
new file mode 100644
index 000000000..6a2c85691
--- /dev/null
+++ b/xlators/features/marker/src/marker.c
@@ -0,0 +1,2862 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include "defaults.h"
+#include "libxlator.h"
+#include "marker.h"
+#include "marker-mem-types.h"
+#include "marker-quota.h"
+#include "marker-quota-helper.h"
+#include "marker-common.h"
+#include "byte-order.h"
+
+#define _GF_UID_GID_CHANGED 1
+
+void
+fini (xlator_t *this);
+
+int32_t
+marker_start_setxattr (call_frame_t *, xlator_t *);
+
+marker_local_t *
+marker_local_ref (marker_local_t *local)
+{
+ GF_VALIDATE_OR_GOTO ("marker", local, err);
+
+ LOCK (&local->lock);
+ {
+ local->ref++;
+ }
+ UNLOCK (&local->lock);
+
+ return local;
+err:
+ return NULL;
+}
+
+int
+marker_loc_fill (loc_t *loc, inode_t *inode, inode_t *parent, char *path)
+{
+ int ret = -1;
+
+ if (!loc)
+ return ret;
+
+ if (inode) {
+ loc->inode = inode_ref (inode);
+ if (uuid_is_null (loc->gfid)) {
+ uuid_copy (loc->gfid, loc->inode->gfid);
+ }
+ }
+
+ if (parent)
+ loc->parent = inode_ref (parent);
+
+ if (path) {
+ loc->path = gf_strdup (path);
+ if (!loc->path) {
+ gf_log ("loc fill", GF_LOG_ERROR, "strdup failed");
+ goto loc_wipe;
+ }
+
+ loc->name = strrchr (loc->path, '/');
+ if (loc->name)
+ loc->name++;
+ }
+
+ ret = 0;
+loc_wipe:
+ if (ret < 0)
+ loc_wipe (loc);
+
+ return ret;
+}
+
+int
+marker_inode_loc_fill (inode_t *inode, loc_t *loc)
+{
+ char *resolvedpath = NULL;
+ int ret = -1;
+ inode_t *parent = NULL;
+
+ if ((!inode) || (!loc))
+ return ret;
+
+ parent = inode_parent (inode, NULL, NULL);
+
+ ret = inode_path (inode, NULL, &resolvedpath);
+ if (ret < 0)
+ goto err;
+
+ ret = marker_loc_fill (loc, inode, parent, resolvedpath);
+ if (ret < 0)
+ goto err;
+
+err:
+ if (parent)
+ inode_unref (parent);
+
+ GF_FREE (resolvedpath);
+
+ return ret;
+}
+
+int32_t
+marker_trav_parent (marker_local_t *local)
+{
+ int32_t ret = 0;
+ loc_t loc = {0, };
+ inode_t *parent = NULL;
+ int8_t need_unref = 0;
+
+ if (!local->loc.parent) {
+ parent = inode_parent (local->loc.inode, NULL, NULL);
+ if (parent)
+ need_unref = 1;
+ } else
+ parent = local->loc.parent;
+
+ ret = marker_inode_loc_fill (parent, &loc);
+
+ if (ret < 0) {
+ ret = -1;
+ goto out;
+ }
+
+ loc_wipe (&local->loc);
+
+ local->loc = loc;
+out:
+ if (need_unref)
+ inode_unref (parent);
+
+ return ret;
+}
+
+int32_t
+marker_error_handler (xlator_t *this, marker_local_t *local, int32_t op_errno)
+{
+ marker_conf_t *priv = NULL;
+ const char *path = NULL;
+
+ priv = (marker_conf_t *) this->private;
+ path = local
+ ? (local->loc.path
+ ? local->loc.path : uuid_utoa(local->loc.gfid))
+ : "<nul>";
+
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Indexing gone corrupt at %s (reason: %s)."
+ " Geo-replication slave content needs to be revalidated",
+ path, strerror (op_errno));
+ unlink (priv->timestamp_file);
+
+ return 0;
+}
+
+int32_t
+marker_local_unref (marker_local_t *local)
+{
+ int32_t var = 0;
+
+ if (local == NULL)
+ return -1;
+
+ LOCK (&local->lock);
+ {
+ var = --local->ref;
+ }
+ UNLOCK (&local->lock);
+
+ if (var != 0)
+ goto out;
+
+ loc_wipe (&local->loc);
+ loc_wipe (&local->parent_loc);
+ if (local->xdata)
+ dict_unref (local->xdata);
+
+ if (local->oplocal) {
+ marker_local_unref (local->oplocal);
+ local->oplocal = NULL;
+ }
+ mem_put (local);
+out:
+ return 0;
+}
+
+int32_t
+stat_stampfile (xlator_t *this, marker_conf_t *priv,
+ struct volume_mark **status)
+{
+ struct stat buf = {0, };
+ struct volume_mark *vol_mark = NULL;
+
+ vol_mark = GF_CALLOC (sizeof (struct volume_mark), 1,
+ gf_marker_mt_volume_mark);
+
+ vol_mark->major = 1;
+ vol_mark->minor = 0;
+
+ GF_ASSERT (sizeof (priv->volume_uuid_bin) == 16);
+ memcpy (vol_mark->uuid, priv->volume_uuid_bin, 16);
+
+ if (stat (priv->timestamp_file, &buf) != -1) {
+ vol_mark->retval = 0;
+ vol_mark->sec = htonl (buf.st_ctime);
+ vol_mark->usec = htonl (ST_CTIM_NSEC (&buf)/1000);
+ } else
+ vol_mark->retval = 1;
+
+ *status = vol_mark;
+
+ return 0;
+}
+
+int32_t
+marker_getxattr_stampfile_cbk (call_frame_t *frame, xlator_t *this,
+ const char *name, struct volume_mark *vol_mark,
+ dict_t *xdata)
+{
+ int32_t ret = -1;
+ dict_t *dict = NULL;
+
+ if (vol_mark == NULL){
+ STACK_UNWIND_STRICT (getxattr, frame, -1, ENOMEM, NULL, NULL);
+
+ goto out;
+ }
+
+ dict = dict_new ();
+
+ ret = dict_set_bin (dict, (char *)name, vol_mark,
+ sizeof (struct volume_mark));
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING, "failed to set key %s",
+ name);
+
+ STACK_UNWIND_STRICT (getxattr, frame, 0, 0, dict, xdata);
+
+ dict_unref (dict);
+out:
+ return 0;
+}
+
+int32_t
+call_from_special_client (call_frame_t *frame, xlator_t *this, const char *name)
+{
+ struct volume_mark *vol_mark = NULL;
+ marker_conf_t *priv = NULL;
+ gf_boolean_t ret = _gf_true;
+
+ priv = (marker_conf_t *)this->private;
+
+ if (frame->root->pid != GF_CLIENT_PID_GSYNCD || name == NULL ||
+ strcmp (name, MARKER_XATTR_PREFIX "." VOLUME_MARK) != 0) {
+ ret = _gf_false;
+ goto out;
+ }
+
+ stat_stampfile (this, priv, &vol_mark);
+
+ marker_getxattr_stampfile_cbk (frame, this, name, vol_mark, NULL);
+out:
+ return ret;
+}
+
+int32_t
+marker_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ if (cookie) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Filtering the quota extended attributes");
+
+ dict_foreach_fnmatch (dict, "trusted.glusterfs.quota*",
+ marker_filter_quota_xattr, NULL);
+ }
+
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+int32_t
+marker_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ gf_boolean_t ret = _gf_false;
+ marker_conf_t *priv = NULL;
+ unsigned long cookie = 0;
+
+ priv = this->private;
+
+ if (priv == NULL || (priv->feature_enabled & GF_XTIME) == 0)
+ goto wind;
+
+ gf_log (this->name, GF_LOG_DEBUG, "USER:PID = %d", frame->root->pid);
+
+ ret = call_from_special_client (frame, this, name);
+wind:
+ if (ret == _gf_false) {
+ if (name == NULL) {
+ /* Signifies that marker translator
+ * has to filter the quota's xattr's,
+ * this is to prevent afr from performing
+ * self healing on marker-quota xattrs'
+ */
+ cookie = 1;
+ }
+ STACK_WIND_COOKIE (frame, marker_getxattr_cbk, (void *)cookie,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, loc,
+ name, xdata);
+ }
+
+ return 0;
+}
+
+
+int32_t
+marker_setxattr_done (call_frame_t *frame)
+{
+ marker_local_t *local = NULL;
+
+ local = (marker_local_t *) frame->local;
+
+ frame->local = NULL;
+
+ STACK_DESTROY (frame->root);
+
+ marker_local_unref (local);
+
+ return 0;
+}
+
+int
+marker_specific_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ int32_t ret = 0;
+ int32_t done = 0;
+ marker_local_t *local = NULL;
+
+ local = (marker_local_t*) frame->local;
+
+ if (op_ret == -1 && op_errno == ENOSPC) {
+ marker_error_handler (this, local, op_errno);
+ done = 1;
+ goto out;
+ }
+
+ if (local) {
+ if (local->loc.path && strcmp (local->loc.path, "/") == 0) {
+ done = 1;
+ goto out;
+ }
+ if (__is_root_gfid (local->loc.gfid)) {
+ done = 1;
+ goto out;
+ }
+ }
+
+ ret = marker_trav_parent (local);
+
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG, "Error occurred "
+ "while traversing to the parent, stopping marker");
+
+ done = 1;
+
+ goto out;
+ }
+
+ marker_start_setxattr (frame, this);
+
+out:
+ if (done) {
+ marker_setxattr_done (frame);
+ }
+
+ return 0;
+}
+
+int32_t
+marker_start_setxattr (call_frame_t *frame, xlator_t *this)
+{
+ int32_t ret = -1;
+ dict_t *dict = NULL;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ priv = this->private;
+
+ local = (marker_local_t*) frame->local;
+
+ if (!local)
+ goto out;
+
+ dict = dict_new ();
+
+ if (!dict)
+ goto out;
+
+ if (local->loc.inode && uuid_is_null (local->loc.gfid))
+ uuid_copy (local->loc.gfid, local->loc.inode->gfid);
+
+ GF_UUID_ASSERT (local->loc.gfid);
+
+ ret = dict_set_static_bin (dict, priv->marker_xattr,
+ (void *)local->timebuf, 8);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set marker xattr (%s)", local->loc.path);
+ goto out;
+ }
+
+ STACK_WIND (frame, marker_specific_setxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, &local->loc, dict, 0,
+ NULL);
+
+ ret = 0;
+out:
+ if (dict)
+ dict_unref (dict);
+
+ return ret;
+}
+
+void
+marker_gettimeofday (marker_local_t *local)
+{
+ struct timeval tv = {0, };
+
+ gettimeofday (&tv, NULL);
+
+ local->timebuf [0] = htonl (tv.tv_sec);
+ local->timebuf [1] = htonl (tv.tv_usec);
+
+ return;
+}
+
+int32_t
+marker_create_frame (xlator_t *this, marker_local_t *local)
+{
+ call_frame_t *frame = NULL;
+
+ frame = create_frame (this, this->ctx->pool);
+
+ frame->local = (void *) local;
+
+ marker_start_setxattr (frame, this);
+
+ return 0;
+}
+
+int32_t
+marker_xtime_update_marks (xlator_t *this, marker_local_t *local)
+{
+ marker_conf_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO ("marker", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, local, out);
+
+ priv = this->private;
+
+ if ((local->pid == GF_CLIENT_PID_GSYNCD
+ && !(priv->feature_enabled & GF_XTIME_GSYNC_FORCE))
+ || (local->pid == GF_CLIENT_PID_DEFRAG))
+ goto out;
+
+ marker_gettimeofday (local);
+
+ marker_local_ref (local);
+
+ marker_create_frame (this, local);
+out:
+ return 0;
+}
+
+
+int32_t
+marker_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ marker_conf_t *priv = NULL;
+ marker_local_t *local = NULL;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_TRACE, "error occurred "
+ "while Creating a file %s", strerror (op_errno));
+ }
+
+ local = (marker_local_t *) frame->local;
+
+ frame->local = NULL;
+
+ STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, inode,
+ buf, preparent, postparent, xdata);
+
+ if (op_ret == -1 || local == NULL)
+ goto out;
+
+ if (uuid_is_null (local->loc.gfid))
+ uuid_copy (local->loc.gfid, buf->ia_gfid);
+
+ priv = this->private;
+
+ if (priv->feature_enabled & GF_QUOTA)
+ mq_set_inode_xattr (this, &local->loc);
+
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks (this, local);
+
+out:
+ marker_local_unref (local);
+
+ return 0;
+}
+
+int
+marker_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
+{
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->feature_enabled == 0)
+ goto wind;
+
+ local = mem_get0 (this->local_pool);
+
+ MARKER_INIT_LOCAL (frame, local);
+
+ ret = loc_copy (&local->loc, loc);
+
+ if (ret == -1)
+ goto err;
+wind:
+ STACK_WIND (frame, marker_mkdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (mkdir, frame, -1, ENOMEM, NULL,
+ NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+
+int32_t
+marker_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_TRACE, "error occurred "
+ "while Creating a file %s", strerror (op_errno));
+ }
+
+ local = (marker_local_t *) frame->local;
+
+ frame->local = NULL;
+
+ STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
+
+ if (op_ret == -1 || local == NULL)
+ goto out;
+
+ if (uuid_is_null (local->loc.gfid))
+ uuid_copy (local->loc.gfid, buf->ia_gfid);
+
+ priv = this->private;
+
+ if (priv->feature_enabled & GF_QUOTA)
+ mq_set_inode_xattr (this, &local->loc);
+
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks (this, local);
+
+out:
+ marker_local_unref (local);
+
+ return 0;
+}
+
+int32_t
+marker_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->feature_enabled == 0)
+ goto wind;
+
+ local = mem_get0 (this->local_pool);
+
+ MARKER_INIT_LOCAL (frame, local);
+
+ ret = loc_copy (&local->loc, loc);
+
+ if (ret == -1)
+ goto err;
+wind:
+ STACK_WIND (frame, marker_create_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create, loc, flags, mode, umask,
+ fd, xdata);
+ return 0;
+err:
+ STACK_UNWIND_STRICT (create, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,
+ NULL, NULL);
+
+ return 0;
+}
+
+
+int32_t
+marker_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ marker_conf_t *priv = NULL;
+ marker_local_t *local = NULL;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_TRACE, "error occurred "
+ "while write, %s", strerror (op_errno));
+ }
+
+ local = (marker_local_t *) frame->local;
+
+ frame->local = NULL;
+
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+
+ if (op_ret == -1 || local == NULL)
+ goto out;
+
+ priv = this->private;
+
+ if (priv->feature_enabled & GF_QUOTA)
+ mq_initiate_quota_txn (this, &local->loc);
+
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks (this, local);
+
+out:
+ marker_local_unref (local);
+
+ return 0;
+}
+
+int32_t
+marker_writev (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ struct iovec *vector,
+ int32_t count,
+ off_t offset, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
+{
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->feature_enabled == 0)
+ goto wind;
+
+ local = mem_get0 (this->local_pool);
+
+ MARKER_INIT_LOCAL (frame, local);
+
+ ret = marker_inode_loc_fill (fd->inode, &local->loc);
+
+ if (ret == -1)
+ goto err;
+wind:
+ STACK_WIND (frame, marker_writev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, vector, count, offset,
+ flags, iobref, xdata);
+ return 0;
+err:
+ STACK_UNWIND_STRICT (writev, frame, -1, ENOMEM, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+int32_t
+marker_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ marker_conf_t *priv = NULL;
+ marker_local_t *local = NULL;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_TRACE, "error occurred "
+ "rmdir %s", strerror (op_errno));
+ }
+
+ local = (marker_local_t *) frame->local;
+
+ frame->local = NULL;
+
+ STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno, preparent,
+ postparent, xdata);
+
+ if (op_ret == -1 || local == NULL)
+ goto out;
+
+ priv = this->private;
+
+ if (priv->feature_enabled & GF_QUOTA)
+ mq_reduce_parent_size (this, &local->loc, -1);
+
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks (this, local);
+out:
+ marker_local_unref (local);
+
+ return 0;
+}
+
+int32_t
+marker_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
+{
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->feature_enabled == 0)
+ goto wind;
+
+ local = mem_get0 (this->local_pool);
+
+ MARKER_INIT_LOCAL (frame, local);
+
+ ret = loc_copy (&local->loc, loc);
+
+ if (ret == -1)
+ goto err;
+wind:
+ STACK_WIND (frame, marker_rmdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rmdir, loc, flags, xdata);
+ return 0;
+err:
+ STACK_UNWIND_STRICT (rmdir, frame, -1, ENOMEM, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+int32_t
+marker_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ marker_conf_t *priv = NULL;
+ marker_local_t *local = NULL;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "%s occurred in unlink", strerror (op_errno));
+ }
+
+ local = (marker_local_t *) frame->local;
+
+ frame->local = NULL;
+
+ STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno, preparent,
+ postparent, xdata);
+
+ if (op_ret == -1 || local == NULL)
+ goto out;
+
+ priv = this->private;
+
+ if ((priv->feature_enabled & GF_QUOTA) && (local->ia_nlink == 1))
+ mq_reduce_parent_size (this, &local->loc, -1);
+
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks (this, local);
+out:
+ marker_local_unref (local);
+
+ return 0;
+}
+
+
+int32_t
+marker_unlink_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ marker_local_t *local = NULL;
+
+ local = frame->local;
+ if (op_ret < 0) {
+ goto err;
+ }
+
+ if (local == NULL) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->ia_nlink = buf->ia_nlink;
+
+ STACK_WIND (frame, marker_unlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, &local->loc, local->xflag,
+ local->xdata);
+ return 0;
+err:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (unlink, frame, -1, op_errno, NULL, NULL, NULL);
+ marker_local_unref (local);
+ return 0;
+}
+
+
+int32_t
+marker_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
+{
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->feature_enabled == 0)
+ goto unlink_wind;
+
+ local = mem_get0 (this->local_pool);
+ local->xflag = xflag;
+ if (xdata)
+ local->xdata = dict_ref (xdata);
+ MARKER_INIT_LOCAL (frame, local);
+
+ ret = loc_copy (&local->loc, loc);
+
+ if (ret == -1)
+ goto err;
+
+ if (uuid_is_null (loc->gfid) && loc->inode)
+ uuid_copy (loc->gfid, loc->inode->gfid);
+
+ STACK_WIND (frame, marker_unlink_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, loc, xdata);
+ return 0;
+
+unlink_wind:
+ STACK_WIND (frame, marker_unlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
+ return 0;
+err:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (unlink, frame, -1, ENOMEM, NULL, NULL, NULL);
+ marker_local_unref (local);
+ return 0;
+}
+
+
+int32_t
+marker_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_TRACE, "%s occurred while "
+ "linking a file ", strerror (op_errno));
+ }
+
+ local = (marker_local_t *) frame->local;
+
+ frame->local = NULL;
+
+ STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+
+ if (op_ret == -1 || local == NULL)
+ goto out;
+
+ priv = this->private;
+
+ if (priv->feature_enabled & GF_QUOTA)
+ mq_initiate_quota_txn (this, &local->loc);
+
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks (this, local);
+out:
+ marker_local_unref (local);
+
+ return 0;
+}
+
+int32_t
+marker_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->feature_enabled == 0)
+ goto wind;
+
+ local = mem_get0 (this->local_pool);
+
+ MARKER_INIT_LOCAL (frame, local);
+
+ ret = loc_copy (&local->loc, newloc);
+
+ if (ret == -1)
+ goto err;
+wind:
+ STACK_WIND (frame, marker_link_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
+ return 0;
+err:
+ STACK_UNWIND_STRICT (link, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,
+ NULL);
+
+ return 0;
+}
+
+
+int32_t
+marker_rename_done (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ marker_local_t *local = NULL, *oplocal = NULL;
+ loc_t newloc = {0, };
+ marker_conf_t *priv = NULL;
+
+ local = frame->local;
+ oplocal = local->oplocal;
+
+ priv = this->private;
+
+ frame->local = NULL;
+
+ if (op_ret < 0) {
+ if (local->err == 0) {
+ local->err = op_errno;
+ }
+
+ gf_log (this->name, GF_LOG_WARNING,
+ "inodelk (UNLOCK) failed on path:%s (gfid:%s) (%s)",
+ local->parent_loc.path,
+ uuid_utoa (local->parent_loc.inode->gfid),
+ strerror (op_errno));
+ }
+
+ if (local->stub != NULL) {
+ call_resume (local->stub);
+ local->stub = NULL;
+ } else if (local->err != 0) {
+ STACK_UNWIND_STRICT (rename, frame, -1, local->err, NULL, NULL,
+ NULL, NULL, NULL, NULL);
+ }
+
+ mq_reduce_parent_size (this, &oplocal->loc, oplocal->contribution);
+
+ if (local->loc.inode != NULL) {
+ mq_reduce_parent_size (this, &local->loc, local->contribution);
+ }
+
+ newloc.inode = inode_ref (oplocal->loc.inode);
+ newloc.path = gf_strdup (local->loc.path);
+ newloc.name = strrchr (newloc.path, '/');
+ if (newloc.name)
+ newloc.name++;
+ newloc.parent = inode_ref (local->loc.parent);
+
+ mq_rename_update_newpath (this, &newloc);
+
+ loc_wipe (&newloc);
+
+ if (priv->feature_enabled & GF_XTIME) {
+ //update marks on oldpath
+ uuid_copy (local->loc.gfid, oplocal->loc.inode->gfid);
+ marker_xtime_update_marks (this, oplocal);
+ marker_xtime_update_marks (this, local);
+ }
+
+ marker_local_unref (local);
+ marker_local_unref (oplocal);
+ return 0;
+}
+
+
+int32_t
+marker_rename_release_newp_lock (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ marker_local_t *local = NULL, *oplocal = NULL;
+ struct gf_flock lock = {0, };
+
+ local = frame->local;
+ oplocal = local->oplocal;
+
+ if (op_ret < 0) {
+ if (local->err == 0) {
+ local->err = op_errno;
+ }
+
+ gf_log (this->name, GF_LOG_WARNING,
+ "inodelk (UNLOCK) failed on %s (gfid:%s) (%s)",
+ oplocal->parent_loc.path,
+ uuid_utoa (oplocal->parent_loc.inode->gfid),
+ strerror (op_errno));
+ }
+
+ if (local->next_lock_on == NULL) {
+ marker_rename_done (frame, NULL, this, 0, 0, NULL);
+ goto out;
+ }
+
+ lock.l_type = F_UNLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+ lock.l_pid = 0;
+
+ STACK_WIND (frame,
+ marker_rename_done,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->inodelk,
+ this->name, &local->parent_loc, F_SETLKW, &lock, NULL);
+
+out:
+ return 0;
+}
+
+
+int32_t
+marker_rename_release_oldp_lock (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ marker_local_t *local = NULL, *oplocal = NULL;
+ struct gf_flock lock = {0, };
+
+ local = frame->local;
+ oplocal = local->oplocal;
+
+ if ((op_ret < 0) && (op_errno != ENOATTR)) {
+ local->err = op_errno;
+ }
+
+ //Reset frame uid and gid if set.
+ if (cookie == (void *) _GF_UID_GID_CHANGED)
+ MARKER_RESET_UID_GID (frame, frame->root, local);
+
+ lock.l_type = F_UNLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+ lock.l_pid = 0;
+
+ STACK_WIND (frame,
+ marker_rename_release_newp_lock,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->inodelk,
+ this->name, &oplocal->parent_loc, F_SETLKW, &lock, NULL);
+ return 0;
+}
+
+
+int32_t
+marker_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ marker_conf_t *priv = NULL;
+ marker_local_t *local = NULL;
+ marker_local_t *oplocal = NULL;
+ call_stub_t *stub = NULL;
+ int32_t ret = 0;
+ char contri_key [512] = {0, };
+ loc_t newloc = {0, };
+
+ local = (marker_local_t *) frame->local;
+
+ if (local != NULL) {
+ oplocal = local->oplocal;
+ }
+
+ priv = this->private;
+
+ if (op_ret < 0) {
+ if (local != NULL) {
+ local->err = op_errno;
+ }
+
+ gf_log (this->name, GF_LOG_TRACE, "%s occurred while "
+ "renaming a file ", strerror (op_errno));
+ }
+
+ if (priv->feature_enabled & GF_QUOTA) {
+ if ((op_ret < 0) || (local == NULL)) {
+ goto quota_err;
+ }
+
+ stub = fop_rename_cbk_stub (frame, default_rename_cbk, op_ret,
+ op_errno, buf, preoldparent,
+ postoldparent, prenewparent,
+ postnewparent, xdata);
+ if (stub == NULL) {
+ local->err = ENOMEM;
+ goto quota_err;
+ }
+
+ local->stub = stub;
+
+ GET_CONTRI_KEY (contri_key, oplocal->loc.parent->gfid, ret);
+ if (ret < 0) {
+ local->err = ENOMEM;
+ goto quota_err;
+ }
+
+ /* Removexattr requires uid and gid to be 0,
+ * reset them in the callback.
+ */
+ MARKER_SET_UID_GID (frame, local, frame->root);
+
+ newloc.inode = inode_ref (oplocal->loc.inode);
+ newloc.path = gf_strdup (local->loc.path);
+ newloc.name = strrchr (newloc.path, '/');
+ if (newloc.name)
+ newloc.name++;
+ newloc.parent = inode_ref (local->loc.parent);
+ uuid_copy (newloc.gfid, oplocal->loc.inode->gfid);
+
+ STACK_WIND_COOKIE (frame, marker_rename_release_oldp_lock,
+ frame->cookie, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr,
+ &newloc, contri_key, NULL);
+
+ loc_wipe (&newloc);
+ } else {
+ frame->local = NULL;
+
+ STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, buf,
+ preoldparent, postoldparent, prenewparent,
+ postnewparent, xdata);
+
+ if ((op_ret < 0) || (local == NULL)) {
+ goto out;
+ }
+
+ if (priv->feature_enabled & GF_XTIME) {
+ //update marks on oldpath
+ uuid_copy (local->loc.gfid, oplocal->loc.inode->gfid);
+ marker_xtime_update_marks (this, oplocal);
+ marker_xtime_update_marks (this, local);
+ }
+ }
+
+out:
+ if (!(priv->feature_enabled & GF_QUOTA)) {
+ marker_local_unref (local);
+ marker_local_unref (oplocal);
+ }
+
+ return 0;
+
+quota_err:
+ marker_rename_release_oldp_lock (frame, NULL, this, 0, 0, NULL);
+ return 0;
+}
+
+
+int32_t
+marker_do_rename (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ marker_local_t *local = NULL, *oplocal = NULL;
+ char contri_key[512] = {0, };
+ int32_t ret = 0;
+ int64_t *contribution = 0;
+
+ local = frame->local;
+ oplocal = local->oplocal;
+
+ //Reset frame uid and gid if set.
+ if (cookie == (void *) _GF_UID_GID_CHANGED)
+ MARKER_RESET_UID_GID (frame, frame->root, local);
+
+ if ((op_ret < 0) && (op_errno != ENOATTR)) {
+ local->err = op_errno;
+ gf_log (this->name, GF_LOG_WARNING,
+ "fetching contribution values from %s (gfid:%s) "
+ "failed (%s)", local->loc.path,
+ uuid_utoa (local->loc.inode->gfid),
+ strerror (op_errno));
+ goto err;
+ }
+
+ if (local->loc.inode != NULL) {
+ GET_CONTRI_KEY (contri_key, local->loc.parent->gfid, ret);
+ if (ret < 0) {
+ local->err = errno;
+ goto err;
+ }
+
+ if (dict_get_bin (dict, contri_key,
+ (void **) &contribution) == 0) {
+ local->contribution = ntoh64 (*contribution);
+ }
+ }
+
+ STACK_WIND (frame, marker_rename_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename, &oplocal->loc,
+ &local->loc, NULL);
+
+ return 0;
+
+err:
+ marker_rename_release_oldp_lock (frame, NULL, this, 0, 0, NULL);
+ return 0;
+}
+
+
+int32_t
+marker_get_newpath_contribution (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ marker_local_t *local = NULL, *oplocal = NULL;
+ char contri_key[512] = {0, };
+ int32_t ret = 0;
+ int64_t *contribution = 0;
+
+ local = frame->local;
+ oplocal = local->oplocal;
+
+ //Reset frame uid and gid if set.
+ if (cookie == (void *) _GF_UID_GID_CHANGED)
+ MARKER_RESET_UID_GID (frame, frame->root, local);
+
+ if ((op_ret < 0) && (op_errno != ENOATTR)) {
+ local->err = op_errno;
+ gf_log (this->name, GF_LOG_WARNING,
+ "fetching contribution values from %s (gfid:%s) "
+ "failed (%s)", oplocal->loc.path,
+ uuid_utoa (oplocal->loc.inode->gfid),
+ strerror (op_errno));
+ goto err;
+ }
+
+ GET_CONTRI_KEY (contri_key, oplocal->loc.parent->gfid, ret);
+ if (ret < 0) {
+ local->err = errno;
+ goto err;
+ }
+
+ if (dict_get_bin (dict, contri_key, (void **) &contribution) == 0)
+ oplocal->contribution = ntoh64 (*contribution);
+
+ if (local->loc.inode != NULL) {
+ GET_CONTRI_KEY (contri_key, local->loc.parent->gfid, ret);
+ if (ret < 0) {
+ local->err = errno;
+ goto err;
+ }
+
+ /* getxattr requires uid and gid to be 0,
+ * reset them in the callback.
+ */
+ MARKER_SET_UID_GID (frame, local, frame->root);
+ if (uuid_is_null (local->loc.gfid))
+ uuid_copy (local->loc.gfid, local->loc.inode->gfid);
+
+ GF_UUID_ASSERT (local->loc.gfid);
+
+ STACK_WIND_COOKIE (frame, marker_do_rename,
+ frame->cookie, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr,
+ &local->loc, contri_key, NULL);
+ } else {
+ marker_do_rename (frame, NULL, this, 0, 0, NULL, NULL);
+ }
+
+ return 0;
+err:
+ marker_rename_release_oldp_lock (frame, NULL, this, 0, 0, NULL);
+ return 0;
+}
+
+
+int32_t
+marker_get_oldpath_contribution (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ marker_local_t *local = NULL, *oplocal = NULL;
+ char contri_key[512] = {0, };
+ int32_t ret = 0;
+
+ local = frame->local;
+ oplocal = local->oplocal;
+
+ if (op_ret < 0) {
+ local->err = op_errno;
+ gf_log (this->name, GF_LOG_WARNING,
+ "cannot hold inodelk on %s (gfid:%s) (%s)",
+ local->next_lock_on->path,
+ uuid_utoa (local->next_lock_on->inode->gfid),
+ strerror (op_errno));
+ goto lock_err;
+ }
+
+ GET_CONTRI_KEY (contri_key, oplocal->loc.parent->gfid, ret);
+ if (ret < 0) {
+ local->err = errno;
+ goto quota_err;
+ }
+
+ /* getxattr requires uid and gid to be 0,
+ * reset them in the callback.
+ */
+ MARKER_SET_UID_GID (frame, local, frame->root);
+
+ if (uuid_is_null (oplocal->loc.gfid))
+ uuid_copy (oplocal->loc.gfid,
+ oplocal->loc.inode->gfid);
+
+ GF_UUID_ASSERT (oplocal->loc.gfid);
+
+ STACK_WIND_COOKIE (frame, marker_get_newpath_contribution,
+ frame->cookie, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr,
+ &oplocal->loc, contri_key, NULL);
+ return 0;
+
+quota_err:
+ marker_rename_release_oldp_lock (frame, NULL, this, 0, 0, NULL);
+ return 0;
+
+lock_err:
+ if ((local->next_lock_on == NULL)
+ || (local->next_lock_on == &local->parent_loc)) {
+ local->next_lock_on = NULL;
+ marker_rename_release_oldp_lock (frame, NULL, this, 0, 0, NULL);
+ } else {
+ marker_rename_release_newp_lock (frame, NULL, this, 0, 0, NULL);
+ }
+
+ return 0;
+}
+
+
+int32_t
+marker_rename_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ marker_local_t *local = NULL, *oplocal = NULL;
+ loc_t *loc = NULL;
+ struct gf_flock lock = {0, };
+
+ local = frame->local;
+ oplocal = local->oplocal;
+
+ if (op_ret < 0) {
+ if (local->next_lock_on != &oplocal->parent_loc) {
+ loc = &oplocal->parent_loc;
+ } else {
+ loc = &local->parent_loc;
+ }
+
+ local->err = op_errno;
+ gf_log (this->name, GF_LOG_WARNING,
+ "cannot hold inodelk on %s (gfid:%s) (%s)",
+ loc->path, uuid_utoa (loc->inode->gfid),
+ strerror (op_errno));
+ goto err;
+ }
+
+ if (local->next_lock_on != NULL) {
+ lock.l_len = 0;
+ lock.l_start = 0;
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+
+ STACK_WIND (frame,
+ marker_get_oldpath_contribution,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->inodelk,
+ this->name, local->next_lock_on,
+ F_SETLKW, &lock, NULL);
+ } else {
+ marker_get_oldpath_contribution (frame, 0, this, 0, 0, NULL);
+ }
+
+ return 0;
+
+err:
+ marker_rename_done (frame, NULL, this, 0, 0, NULL);
+ return 0;
+}
+
+
+int32_t
+marker_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
+{
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_local_t *oplocal = NULL;
+ marker_conf_t *priv = NULL;
+ struct gf_flock lock = {0, };
+ loc_t *lock_on = NULL;
+
+ priv = this->private;
+
+ if (priv->feature_enabled == 0)
+ goto rename_wind;
+
+ local = mem_get0 (this->local_pool);
+
+ MARKER_INIT_LOCAL (frame, local);
+
+ oplocal = mem_get0 (this->local_pool);
+
+ MARKER_INIT_LOCAL (frame, oplocal);
+
+ frame->local = local;
+
+ local->oplocal = marker_local_ref (oplocal);
+
+ ret = loc_copy (&local->loc, newloc);
+ if (ret < 0)
+ goto err;
+
+ ret = loc_copy (&oplocal->loc, oldloc);
+ if (ret < 0)
+ goto err;
+
+ if (!(priv->feature_enabled & GF_QUOTA)) {
+ goto rename_wind;
+ }
+
+ ret = mq_inode_loc_fill (NULL, newloc->parent, &local->parent_loc);
+ if (ret < 0)
+ goto err;
+
+ ret = mq_inode_loc_fill (NULL, oldloc->parent, &oplocal->parent_loc);
+ if (ret < 0)
+ goto err;
+
+ if ((newloc->inode != NULL) && (newloc->parent != oldloc->parent)
+ && (uuid_compare (newloc->parent->gfid,
+ oldloc->parent->gfid) < 0)) {
+ lock_on = &local->parent_loc;
+ local->next_lock_on = &oplocal->parent_loc;
+ } else {
+ lock_on = &oplocal->parent_loc;
+ if ((newloc->inode != NULL) && (newloc->parent
+ != oldloc->parent)) {
+ local->next_lock_on = &local->parent_loc;
+ }
+ }
+
+ lock.l_len = 0;
+ lock.l_start = 0;
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+
+ STACK_WIND (frame,
+ marker_rename_inodelk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->inodelk,
+ this->name, lock_on,
+ F_SETLKW, &lock, NULL);
+
+ return 0;
+
+rename_wind:
+ STACK_WIND (frame, marker_rename_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (rename, frame, -1, ENOMEM, NULL,
+ NULL, NULL, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+int32_t
+marker_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_TRACE, "%s occurred while "
+ "truncating a file ", strerror (op_errno));
+ }
+
+ local = (marker_local_t *) frame->local;
+
+ frame->local = NULL;
+
+ STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+
+ if (op_ret == -1 || local == NULL)
+ goto out;
+
+ priv = this->private;
+
+ if (priv->feature_enabled & GF_QUOTA)
+ mq_initiate_quota_txn (this, &local->loc);
+
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks (this, local);
+
+out:
+ marker_local_unref (local);
+
+ return 0;
+}
+
+int32_t
+marker_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
+{
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->feature_enabled == 0)
+ goto wind;
+
+ local = mem_get0 (this->local_pool);
+
+ MARKER_INIT_LOCAL (frame, local);
+
+ ret = loc_copy (&local->loc, loc);
+
+ if (ret == -1)
+ goto err;
+wind:
+ STACK_WIND (frame, marker_truncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
+ return 0;
+err:
+ STACK_UNWIND_STRICT (truncate, frame, -1, ENOMEM, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+int32_t
+marker_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_TRACE, "%s occurred while "
+ "truncating a file ", strerror (op_errno));
+ }
+
+ local = (marker_local_t *) frame->local;
+
+ frame->local = NULL;
+
+ STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+
+ if (op_ret == -1 || local == NULL)
+ goto out;
+
+ priv = this->private;
+
+ if (priv->feature_enabled & GF_QUOTA)
+ mq_initiate_quota_txn (this, &local->loc);
+
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks (this, local);
+out:
+ marker_local_unref (local);
+
+ return 0;
+}
+
+int32_t
+marker_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+{
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->feature_enabled == 0)
+ goto wind;
+
+ local = mem_get0 (this->local_pool);
+
+ MARKER_INIT_LOCAL (frame, local);
+
+ ret = marker_inode_loc_fill (fd->inode, &local->loc);
+
+ if (ret == -1)
+ goto err;
+wind:
+ STACK_WIND (frame, marker_ftruncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
+ return 0;
+err:
+ STACK_UNWIND_STRICT (ftruncate, frame, -1, ENOMEM, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+int32_t
+marker_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ marker_conf_t *priv = NULL;
+ marker_local_t *local = NULL;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_TRACE, "%s occurred while "
+ "creating symlinks ", strerror (op_errno));
+ }
+
+ local = (marker_local_t *) frame->local;
+
+ frame->local = NULL;
+
+ STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+
+ if (op_ret == -1 || local == NULL)
+ goto out;
+
+ if (uuid_is_null (local->loc.gfid))
+ uuid_copy (local->loc.gfid, buf->ia_gfid);
+
+ priv = this->private;
+
+ if (priv->feature_enabled & GF_QUOTA)
+ mq_set_inode_xattr (this, &local->loc);
+
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks (this, local);
+out:
+ marker_local_unref (local);
+
+ return 0;
+}
+
+int
+marker_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata)
+{
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->feature_enabled == 0)
+ goto wind;
+
+ local = mem_get0 (this->local_pool);
+
+ MARKER_INIT_LOCAL (frame, local);
+
+ ret = loc_copy (&local->loc, loc);
+
+ if (ret == -1)
+ goto err;
+wind:
+ STACK_WIND (frame, marker_symlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->symlink, linkpath, loc, umask,
+ xdata);
+ return 0;
+err:
+ STACK_UNWIND_STRICT (symlink, frame, -1, ENOMEM, NULL,
+ NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+
+int32_t
+marker_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_TRACE, "%s occurred while "
+ "creating symlinks ", strerror (op_errno));
+ }
+
+ local = (marker_local_t *) frame->local;
+
+ frame->local = NULL;
+
+ STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno, inode,
+ buf, preparent, postparent, xdata);
+
+ if (op_ret == -1 || local == NULL)
+ goto out;
+
+ if (uuid_is_null (local->loc.gfid))
+ uuid_copy (local->loc.gfid, buf->ia_gfid);
+
+ priv = this->private;
+
+ if ((priv->feature_enabled & GF_QUOTA) && (S_ISREG (local->mode))) {
+ mq_set_inode_xattr (this, &local->loc);
+ }
+
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks (this, local);
+out:
+ marker_local_unref (local);
+
+ return 0;
+}
+
+int
+marker_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata)
+{
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->feature_enabled == 0)
+ goto wind;
+
+ local = mem_get0 (this->local_pool);
+
+ MARKER_INIT_LOCAL (frame, local);
+
+ ret = loc_copy (&local->loc, loc);
+
+ local->mode = mode;
+
+ if (ret == -1)
+ goto err;
+wind:
+ STACK_WIND (frame, marker_mknod_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask,
+ xdata);
+ return 0;
+err:
+ STACK_UNWIND_STRICT (mknod, frame, -1, ENOMEM, NULL,
+ NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+
+int32_t
+marker_fallocate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_TRACE, "%s occurred while "
+ "fallocating a file ", strerror (op_errno));
+ }
+
+ local = (marker_local_t *) frame->local;
+
+ frame->local = NULL;
+
+ STACK_UNWIND_STRICT (fallocate, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+
+ if (op_ret == -1 || local == NULL)
+ goto out;
+
+ priv = this->private;
+
+ if (priv->feature_enabled & GF_QUOTA)
+ mq_initiate_quota_txn (this, &local->loc);
+
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks (this, local);
+out:
+ marker_local_unref (local);
+
+ return 0;
+}
+
+int32_t
+marker_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->feature_enabled == 0)
+ goto wind;
+
+ local = mem_get0 (this->local_pool);
+
+ MARKER_INIT_LOCAL (frame, local);
+
+ ret = marker_inode_loc_fill (fd->inode, &local->loc);
+
+ if (ret == -1)
+ goto err;
+wind:
+ STACK_WIND (frame, marker_fallocate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len,
+ xdata);
+ return 0;
+err:
+ STACK_UNWIND_STRICT (fallocate, frame, -1, ENOMEM, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+int32_t
+marker_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_TRACE, "%s occurred during discard",
+ strerror (op_errno));
+ }
+
+ local = (marker_local_t *) frame->local;
+
+ frame->local = NULL;
+
+ STACK_UNWIND_STRICT (discard, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+
+ if (op_ret == -1 || local == NULL)
+ goto out;
+
+ priv = this->private;
+
+ if (priv->feature_enabled & GF_QUOTA)
+ mq_initiate_quota_txn (this, &local->loc);
+
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks (this, local);
+out:
+ marker_local_unref (local);
+
+ return 0;
+}
+
+int32_t
+marker_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->feature_enabled == 0)
+ goto wind;
+
+ local = mem_get0 (this->local_pool);
+
+ MARKER_INIT_LOCAL (frame, local);
+
+ ret = marker_inode_loc_fill (fd->inode, &local->loc);
+
+ if (ret == -1)
+ goto err;
+wind:
+ STACK_WIND (frame, marker_discard_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata);
+ return 0;
+err:
+ STACK_UNWIND_STRICT (discard, frame, -1, ENOMEM, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+marker_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_TRACE, "%s occurred during zerofill",
+ strerror (op_errno));
+ }
+
+ local = (marker_local_t *) frame->local;
+
+ frame->local = NULL;
+
+ STACK_UNWIND_STRICT (zerofill, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+
+ if (op_ret == -1 || local == NULL)
+ goto out;
+
+ priv = this->private;
+
+ if (priv->feature_enabled & GF_QUOTA)
+ mq_initiate_quota_txn (this, &local->loc);
+
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks (this, local);
+out:
+ marker_local_unref (local);
+
+ return 0;
+}
+
+int32_t
+marker_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->feature_enabled == 0)
+ goto wind;
+
+ local = mem_get0 (this->local_pool);
+
+ MARKER_INIT_LOCAL (frame, local);
+
+ ret = marker_inode_loc_fill (fd->inode, &local->loc);
+
+ if (ret == -1)
+ goto err;
+wind:
+ STACK_WIND (frame, marker_zerofill_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata);
+ return 0;
+err:
+ STACK_UNWIND_STRICT (zerofill, frame, -1, ENOMEM, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+/* when a call from the special client is received on
+ * key trusted.glusterfs.volume-mark with value "RESET"
+ * or if the value is 0length, update the change the
+ * access time and modification time via touching the
+ * timestamp file.
+ */
+int32_t
+call_from_sp_client_to_reset_tmfile (call_frame_t *frame,
+ xlator_t *this,
+ dict_t *dict)
+{
+ int32_t fd = 0;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+ data_t *data = NULL;
+ marker_conf_t *priv = NULL;
+
+ if (frame == NULL || this == NULL || dict == NULL)
+ return -1;
+
+ priv = this->private;
+
+ data = dict_get (dict, "trusted.glusterfs.volume-mark");
+ if (data == NULL)
+ return -1;
+
+ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
+ op_ret = -1;
+ op_errno = EPERM;
+
+ goto out;
+ }
+
+ if (data->len == 0 || (data->len == 5 &&
+ memcmp (data->data, "RESET", 5) == 0)) {
+ fd = open (priv->timestamp_file, O_WRONLY|O_TRUNC);
+ if (fd != -1) {
+ /* TODO check whether the O_TRUNC would update the
+ * timestamps on a zero length file on all machies.
+ */
+ close (fd);
+ }
+
+ if (fd != -1 || errno == ENOENT) {
+ op_ret = 0;
+ op_errno = 0;
+ } else {
+ op_ret = -1;
+ op_errno = errno;
+ }
+ } else {
+ op_ret = -1;
+ op_errno = EINVAL;
+ }
+out:
+ STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, NULL);
+
+ return 0;
+}
+
+
+int32_t
+marker_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_TRACE, "%s occurred in "
+ "setxattr ", strerror (op_errno));
+ }
+
+ local = (marker_local_t *) frame->local;
+
+ frame->local = NULL;
+
+ STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata);
+
+ if (op_ret == -1 || local == NULL)
+ goto out;
+
+ priv = this->private;
+
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks (this, local);
+out:
+ marker_local_unref (local);
+
+ return 0;
+}
+
+int32_t
+marker_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->feature_enabled == 0)
+ goto wind;
+
+ ret = call_from_sp_client_to_reset_tmfile (frame, this, dict);
+ if (ret == 0)
+ return 0;
+
+ local = mem_get0 (this->local_pool);
+
+ MARKER_INIT_LOCAL (frame, local);
+
+ ret = loc_copy (&local->loc, loc);
+
+ if (ret == -1)
+ goto err;
+wind:
+ STACK_WIND (frame, marker_setxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, xdata);
+ return 0;
+err:
+ STACK_UNWIND_STRICT (setxattr, frame, -1, ENOMEM, NULL);
+
+ return 0;
+}
+
+
+int32_t
+marker_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_TRACE, "%s occurred while "
+ "creating symlinks ", strerror (op_errno));
+ }
+
+ local = (marker_local_t *) frame->local;
+
+ frame->local = NULL;
+
+ STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xdata);
+
+ if (op_ret == -1 || local == NULL)
+ goto out;
+
+ priv = this->private;
+
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks (this, local);
+out:
+ marker_local_unref (local);
+
+ return 0;
+}
+
+int32_t
+marker_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->feature_enabled == 0)
+ goto wind;
+
+ ret = call_from_sp_client_to_reset_tmfile (frame, this, dict);
+ if (ret == 0)
+ return 0;
+
+ local = mem_get0 (this->local_pool);
+
+ MARKER_INIT_LOCAL (frame, local);
+
+ ret = marker_inode_loc_fill (fd->inode, &local->loc);
+
+ if (ret == -1)
+ goto err;
+wind:
+ STACK_WIND (frame, marker_fsetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
+ return 0;
+err:
+ STACK_UNWIND_STRICT (fsetxattr, frame, -1, ENOMEM, NULL);
+
+ return 0;
+}
+
+
+int32_t
+marker_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "%s occurred while "
+ "creating symlinks ", strerror (op_errno));
+ }
+
+ local = (marker_local_t *) frame->local;
+
+ frame->local = NULL;
+
+ STACK_UNWIND_STRICT (fsetattr, frame, op_ret, op_errno, statpre,
+ statpost, xdata);
+
+ if (op_ret == -1 || local == NULL)
+ goto out;
+
+ priv = this->private;
+
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks (this, local);
+out:
+ marker_local_unref (local);
+
+ return 0;
+}
+
+
+int32_t
+marker_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->feature_enabled == 0)
+ goto wind;
+
+ local = mem_get0 (this->local_pool);
+
+ MARKER_INIT_LOCAL (frame, local);
+
+ ret = marker_inode_loc_fill (fd->inode, &local->loc);
+
+ if (ret == -1)
+ goto err;
+wind:
+ STACK_WIND (frame, marker_fsetattr_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fsetattr, fd, stbuf, valid, xdata);
+ return 0;
+err:
+ STACK_UNWIND_STRICT (fsetattr, frame, -1, ENOMEM, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+int32_t
+marker_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ local = (marker_local_t *) frame->local;
+
+ frame->local = NULL;
+
+ if (op_ret == -1) {
+ gf_log (this->name, ((op_errno == ENOENT) ? GF_LOG_DEBUG :
+ GF_LOG_ERROR),
+ "%s occurred during setattr of %s",
+ strerror (op_errno),
+ (local ? local->loc.path : "<nul>"));
+ }
+
+ STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, statpre,
+ statpost, xdata);
+
+ if (op_ret == -1 || local == NULL)
+ goto out;
+
+ priv = this->private;
+
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks (this, local);
+out:
+ marker_local_unref (local);
+
+ return 0;
+}
+
+int32_t
+marker_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->feature_enabled == 0)
+ goto wind;
+
+ local = mem_get0 (this->local_pool);
+
+ MARKER_INIT_LOCAL (frame, local);
+
+ ret = loc_copy (&local->loc, loc);
+
+ if (ret == -1)
+ goto err;
+wind:
+ STACK_WIND (frame, marker_setattr_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->setattr, loc, stbuf, valid, xdata);
+ return 0;
+err:
+ STACK_UNWIND_STRICT (setattr, frame, -1, ENOMEM, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+int32_t
+marker_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "%s occurred while "
+ "creating symlinks ", strerror (op_errno));
+ }
+
+ local = (marker_local_t *) frame->local;
+
+ frame->local = NULL;
+
+ STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, xdata);
+
+ if (op_ret == -1 || local == NULL)
+ goto out;
+
+ priv = this->private;
+
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks (this, local);
+out:
+ marker_local_unref (local);
+
+ return 0;
+}
+
+int32_t
+marker_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->feature_enabled == 0)
+ goto wind;
+
+ local = mem_get0 (this->local_pool);
+
+ MARKER_INIT_LOCAL (frame, local);
+
+ ret = loc_copy (&local->loc, loc);
+
+ if (ret == -1)
+ goto err;
+wind:
+ STACK_WIND (frame, marker_removexattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
+ return 0;
+err:
+ STACK_UNWIND_STRICT (removexattr, frame, -1, ENOMEM, NULL);
+
+ return 0;
+}
+
+
+int32_t
+marker_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *dict, struct iatt *postparent)
+{
+ marker_conf_t *priv = NULL;
+ marker_local_t *local = NULL;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_TRACE, "lookup failed with %s",
+ strerror (op_errno));
+ }
+
+ local = (marker_local_t *) frame->local;
+
+ frame->local = NULL;
+
+ STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf,
+ dict, postparent);
+
+ if (op_ret == -1 || local == NULL)
+ goto out;
+
+ /* copy the gfid from the stat structure instead of inode,
+ * since if the lookup is fresh lookup, then the inode
+ * would have not yet linked to the inode table which happens
+ * in protocol/server.
+ */
+ if (uuid_is_null (local->loc.gfid))
+ uuid_copy (local->loc.gfid, buf->ia_gfid);
+
+
+ priv = this->private;
+
+ if (priv->feature_enabled & GF_QUOTA) {
+ mq_xattr_state (this, &local->loc, dict, *buf);
+ }
+
+out:
+ marker_local_unref (local);
+
+ return 0;
+}
+
+int32_t
+marker_lookup (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *xattr_req)
+{
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->feature_enabled == 0)
+ goto wind;
+
+ local = mem_get0 (this->local_pool);
+
+ MARKER_INIT_LOCAL (frame, local);
+
+ ret = loc_copy (&local->loc, loc);
+ if (ret == -1)
+ goto err;
+
+ if ((priv->feature_enabled & GF_QUOTA) && xattr_req)
+ mq_req_xattr (this, loc, xattr_req);
+wind:
+ STACK_WIND (frame, marker_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
+ return 0;
+err:
+ STACK_UNWIND_STRICT (lookup, frame, -1, 0, NULL, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int
+marker_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ gf_dirent_t *entry = NULL;
+
+ if (op_ret <= 0)
+ goto unwind;
+
+ list_for_each_entry (entry, &entries->list, list) {
+ /* TODO: fill things */
+ }
+
+unwind:
+ STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata);
+
+ return 0;
+}
+
+int
+marker_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *dict)
+{
+ marker_conf_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->feature_enabled == 0)
+ goto wind;
+
+ if ((priv->feature_enabled & GF_QUOTA) && dict)
+ mq_req_xattr (this, NULL, dict);
+
+wind:
+ STACK_WIND (frame, marker_readdirp_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp,
+ fd, size, offset, dict);
+
+ return 0;
+}
+
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_marker_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR, "Memory accounting init"
+ "failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+
+int32_t
+init_xtime_priv (xlator_t *this, dict_t *options)
+{
+ data_t *data = NULL;
+ int32_t ret = -1;
+ marker_conf_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO ("marker", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, options, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+
+ priv = this->private;
+
+ if((data = dict_get (options, VOLUME_UUID)) != NULL) {
+ priv->volume_uuid = data->data;
+
+ ret = uuid_parse (priv->volume_uuid, priv->volume_uuid_bin);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "invalid volume uuid %s", priv->volume_uuid);
+ goto out;
+ }
+
+ ret = gf_asprintf (& (priv->marker_xattr), "%s.%s.%s",
+ MARKER_XATTR_PREFIX, priv->volume_uuid,
+ XTIME);
+
+ if (ret == -1){
+ priv->marker_xattr = NULL;
+
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to allocate memory");
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "the volume-uuid = %s", priv->volume_uuid);
+ } else {
+ priv->volume_uuid = NULL;
+
+ gf_log (this->name, GF_LOG_ERROR,
+ "please specify the volume-uuid"
+ "in the translator options");
+
+ return -1;
+ }
+
+ if ((data = dict_get (options, TIMESTAMP_FILE)) != NULL) {
+ priv->timestamp_file = data->data;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "the timestamp-file is = %s",
+ priv->timestamp_file);
+
+ } else {
+ priv->timestamp_file = NULL;
+
+ gf_log (this->name, GF_LOG_ERROR,
+ "please specify the timestamp-file"
+ "in the translator options");
+
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+void
+marker_xtime_priv_cleanup (xlator_t *this)
+{
+ marker_conf_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO ("marker", this, out);
+
+ priv = (marker_conf_t *) this->private;
+
+ GF_VALIDATE_OR_GOTO (this->name, priv, out);
+
+ GF_FREE (priv->volume_uuid);
+
+ GF_FREE (priv->timestamp_file);
+
+ GF_FREE (priv->marker_xattr);
+out:
+ return;
+}
+
+void
+marker_priv_cleanup (xlator_t *this)
+{
+ marker_conf_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO ("marker", this, out);
+
+ priv = (marker_conf_t *) this->private;
+
+ GF_VALIDATE_OR_GOTO (this->name, priv, out);
+
+ marker_xtime_priv_cleanup (this);
+
+ LOCK_DESTROY (&priv->lock);
+
+ GF_FREE (priv);
+out:
+ return;
+}
+
+int32_t
+reconfigure (xlator_t *this, dict_t *options)
+{
+ int32_t ret = 0;
+ data_t *data = NULL;
+ gf_boolean_t flag = _gf_false;
+ marker_conf_t *priv = NULL;
+
+ GF_ASSERT (this);
+ GF_ASSERT (this->private);
+
+ priv = this->private;
+
+ priv->feature_enabled = 0;
+
+ GF_VALIDATE_OR_GOTO (this->name, options, out);
+
+ data = dict_get (options, "quota");
+ if (data) {
+ ret = gf_string2boolean (data->data, &flag);
+ if (ret == 0 && flag == _gf_true) {
+ ret = init_quota_priv (this);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to initialize quota private");
+ } else {
+ priv->feature_enabled |= GF_QUOTA;
+ }
+ }
+ }
+
+ data = dict_get (options, "xtime");
+ if (data) {
+ ret = gf_string2boolean (data->data, &flag);
+ if (ret == 0 && flag == _gf_true) {
+ marker_xtime_priv_cleanup (this);
+
+ ret = init_xtime_priv (this, options);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to initialize xtime private, "
+ "xtime updation will fail");
+ } else {
+ priv->feature_enabled |= GF_XTIME;
+ data = dict_get (options, "gsync-force-xtime");
+ if (!data)
+ goto out;
+ ret = gf_string2boolean (data->data, &flag);
+ if (ret == 0 && flag)
+ priv->feature_enabled |= GF_XTIME_GSYNC_FORCE;
+ }
+ }
+ }
+out:
+ return ret;
+}
+
+
+int32_t
+init (xlator_t *this)
+{
+ dict_t *options = NULL;
+ data_t *data = NULL;
+ int32_t ret = 0;
+ gf_boolean_t flag = _gf_false;
+ marker_conf_t *priv = NULL;
+
+ if (!this->children) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "marker translator needs subvolume defined.");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Volume is dangling.");
+ return -1;
+ }
+
+ options = this->options;
+
+ ALLOCATE_OR_GOTO (this->private, marker_conf_t, err);
+
+ priv = this->private;
+
+ priv->feature_enabled = 0;
+
+ LOCK_INIT (&priv->lock);
+
+ data = dict_get (options, "quota");
+ if (data) {
+ ret = gf_string2boolean (data->data, &flag);
+ if (ret == 0 && flag == _gf_true) {
+ ret = init_quota_priv (this);
+ if (ret < 0)
+ goto err;
+
+ priv->feature_enabled |= GF_QUOTA;
+ }
+ }
+
+ data = dict_get (options, "xtime");
+ if (data) {
+ ret = gf_string2boolean (data->data, &flag);
+ if (ret == 0 && flag == _gf_true) {
+ ret = init_xtime_priv (this, options);
+ if (ret < 0)
+ goto err;
+
+ priv->feature_enabled |= GF_XTIME;
+ data = dict_get (options, "gsync-force-xtime");
+ if (!data)
+ goto cont;
+ ret = gf_string2boolean (data->data, &flag);
+ if (ret == 0 && flag)
+ priv->feature_enabled |= GF_XTIME_GSYNC_FORCE;
+ }
+ }
+
+ cont:
+ this->local_pool = mem_pool_new (marker_local_t, 128);
+ if (!this->local_pool) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
+ goto err;
+ }
+
+ return 0;
+err:
+ marker_priv_cleanup (this);
+
+ return -1;
+}
+
+int32_t
+marker_forget (xlator_t *this, inode_t *inode)
+{
+ marker_inode_ctx_t *ctx = NULL;
+ uint64_t value = 0;
+
+ if (inode_ctx_del (inode, this, &value) != 0)
+ goto out;
+
+ ctx = (marker_inode_ctx_t *)(unsigned long)value;
+ if (ctx == NULL) {
+ goto out;
+ }
+
+ mq_forget (this, ctx->quota_ctx);
+
+ GF_FREE (ctx);
+out:
+ return 0;
+}
+
+void
+fini (xlator_t *this)
+{
+ marker_priv_cleanup (this);
+}
+
+struct xlator_fops fops = {
+ .lookup = marker_lookup,
+ .create = marker_create,
+ .mkdir = marker_mkdir,
+ .writev = marker_writev,
+ .truncate = marker_truncate,
+ .ftruncate = marker_ftruncate,
+ .symlink = marker_symlink,
+ .link = marker_link,
+ .unlink = marker_unlink,
+ .rmdir = marker_rmdir,
+ .rename = marker_rename,
+ .mknod = marker_mknod,
+ .setxattr = marker_setxattr,
+ .fsetxattr = marker_fsetxattr,
+ .setattr = marker_setattr,
+ .fsetattr = marker_fsetattr,
+ .removexattr = marker_removexattr,
+ .getxattr = marker_getxattr,
+ .readdirp = marker_readdirp,
+ .fallocate = marker_fallocate,
+ .discard = marker_discard,
+ .zerofill = marker_zerofill,
+};
+
+struct xlator_cbks cbks = {
+ .forget = marker_forget
+};
+
+struct volume_options options[] = {
+ {.key = {"volume-uuid"}},
+ {.key = {"timestamp-file"}},
+ {.key = {"quota"}},
+ {.key = {"xtime"}},
+ {.key = {"gsync-force-xtime"}},
+ {.key = {NULL}}
+};
diff --git a/xlators/features/marker/src/marker.h b/xlators/features/marker/src/marker.h
new file mode 100644
index 000000000..1a58f8cfc
--- /dev/null
+++ b/xlators/features/marker/src/marker.h
@@ -0,0 +1,138 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _MARKER_H
+#define _MARKER_H
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "marker-quota.h"
+#include "xlator.h"
+#include "defaults.h"
+#include "uuid.h"
+#include "call-stub.h"
+
+#define MARKER_XATTR_PREFIX "trusted.glusterfs"
+#define XTIME "xtime"
+#define VOLUME_MARK "volume-mark"
+#define VOLUME_UUID "volume-uuid"
+#define TIMESTAMP_FILE "timestamp-file"
+
+enum {
+ GF_QUOTA = 1,
+ GF_XTIME = 2,
+ GF_XTIME_GSYNC_FORCE = 4,
+};
+
+/*initialize the local variable*/
+#define MARKER_INIT_LOCAL(_frame,_local) do { \
+ _frame->local = _local; \
+ _local->pid = _frame->root->pid; \
+ memset (&_local->loc, 0, sizeof (loc_t)); \
+ _local->ref = 1; \
+ _local->uid = -1; \
+ _local->gid = -1; \
+ LOCK_INIT (&_local->lock); \
+ _local->oplocal = NULL; \
+ } while (0)
+
+/* try alloc and if it fails, goto label */
+#define ALLOCATE_OR_GOTO(var, type, label) do { \
+ var = GF_CALLOC (sizeof (type), 1, \
+ gf_marker_mt_##type); \
+ if (!var) { \
+ gf_log (this->name, GF_LOG_ERROR, \
+ "out of memory :("); \
+ goto label; \
+ } \
+ } while (0)
+
+#define _MARKER_SET_UID_GID(dest, src) \
+ do { \
+ if (src->uid != -1 && \
+ src->gid != -1) { \
+ dest->uid = src->uid; \
+ dest->gid = src->gid; \
+ } \
+ } while (0)
+
+#define MARKER_SET_UID_GID(frame, dest, src) \
+ do { \
+ _MARKER_SET_UID_GID (dest, src); \
+ frame->root->uid = 0; \
+ frame->root->gid = 0; \
+ frame->cookie = (void *) _GF_UID_GID_CHANGED; \
+ } while (0)
+
+#define MARKER_RESET_UID_GID(frame, dest, src) \
+ do { \
+ _MARKER_SET_UID_GID (dest, src); \
+ frame->cookie = NULL; \
+ } while (0)
+
+struct marker_local{
+ uint32_t timebuf[2];
+ pid_t pid;
+ loc_t loc;
+ loc_t parent_loc;
+ loc_t *next_lock_on;
+ uid_t uid;
+ gid_t gid;
+ int32_t ref;
+ int32_t ia_nlink;
+ gf_lock_t lock;
+ mode_t mode;
+ int32_t err;
+ call_stub_t *stub;
+ int64_t contribution;
+ struct marker_local *oplocal;
+
+ /* marker quota specific */
+ int64_t delta;
+ int64_t d_off;
+ int64_t sum;
+ int64_t size;
+ int32_t hl_count;
+ int32_t dentry_child_count;
+
+ fd_t *fd;
+ call_frame_t *frame;
+
+ quota_inode_ctx_t *ctx;
+ inode_contribution_t *contri;
+
+ int xflag;
+ dict_t *xdata;
+};
+typedef struct marker_local marker_local_t;
+
+#define quota_local_t marker_local_t
+
+struct marker_inode_ctx {
+ struct quota_inode_ctx *quota_ctx;
+};
+typedef struct marker_inode_ctx marker_inode_ctx_t;
+
+struct marker_conf{
+ char feature_enabled;
+ char *size_key;
+ char *dirty_key;
+ char *volume_uuid;
+ uuid_t volume_uuid_bin;
+ char *timestamp_file;
+ char *marker_xattr;
+ uint64_t quota_lk_owner;
+ gf_lock_t lock;
+};
+typedef struct marker_conf marker_conf_t;
+
+#endif
diff --git a/xlators/features/path-convertor/src/Makefile.am b/xlators/features/path-convertor/src/Makefile.am
index 58cfed0f9..393a7bd08 100644
--- a/xlators/features/path-convertor/src/Makefile.am
+++ b/xlators/features/path-convertor/src/Makefile.am
@@ -2,13 +2,14 @@
xlator_LTLIBRARIES = path-converter.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/testing/features
-path_converter_la_LDFLAGS = -module -avoidversion
+path_converter_la_LDFLAGS = -module -avoid-version
path_converter_la_SOURCES = path.c
path_converter_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/features/path-convertor/src/path-mem-types.h b/xlators/features/path-convertor/src/path-mem-types.h
new file mode 100644
index 000000000..77ada8d53
--- /dev/null
+++ b/xlators/features/path-convertor/src/path-mem-types.h
@@ -0,0 +1,22 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef __PATH_MEM_TYPES_H__
+#define __PATH_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_path_mem_types_ {
+ gf_path_mt_path_private_t = gf_common_mt_end + 1,
+ gf_path_mt_char,
+ gf_path_mt_regex_t,
+ gf_path_mt_end
+};
+#endif
+
diff --git a/xlators/features/path-convertor/src/path.c b/xlators/features/path-convertor/src/path.c
index 8badef38c..5c52e0a8d 100644
--- a/xlators/features/path-convertor/src/path.c
+++ b/xlators/features/path-convertor/src/path.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
/* TODO: add gf_log to all the cases returning errors */
#ifndef _CONFIG_H
@@ -35,6 +25,7 @@
#include <errno.h>
#include "glusterfs.h"
#include "xlator.h"
+#include "path-mem-types.h"
typedef struct path_private
{
@@ -51,7 +42,7 @@ static char *
name_this_to_that (xlator_t *xl, const char *path, const char *name)
{
path_private_t *priv = xl->private;
- char priv_path[ZR_PATH_MAX] = {0,};
+ char priv_path[PATH_MAX] = {0,};
char *tmp_name = NULL;
int32_t path_len = strlen (path);
int32_t name_len = strlen (name) - ZR_FILE_CONTENT_STRLEN;
@@ -63,7 +54,9 @@ name_this_to_that (xlator_t *xl, const char *path, const char *name)
if (priv->end_off && (total_len > priv->end_off)) {
j = priv->start_off;
- tmp_name = CALLOC (1, (total_len + ZR_FILE_CONTENT_STRLEN));
+ tmp_name = GF_CALLOC (1, (total_len +
+ ZR_FILE_CONTENT_STRLEN),
+ gf_path_mt_char);
ERR_ABORT (tmp_name);
/* Get the complete path for the file first */
@@ -104,7 +97,7 @@ path_this_to_that (xlator_t *xl, const char *path)
int32_t i = 0, j = 0;
if (priv->end_off && (path_len > priv->start_off)) {
- priv_path = CALLOC (1, path_len);
+ priv_path = GF_CALLOC (1, path_len, gf_path_mt_char);
ERR_ABORT (priv_path);
if (priv->start_off && (path_len > priv->start_off))
@@ -134,7 +127,9 @@ path_create_cbk (call_frame_t *frame,
int32_t op_errno,
fd_t *fd,
inode_t *inode,
- struct stat *buf)
+ struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent)
{
STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf);
return 0;
@@ -184,9 +179,10 @@ path_readlink_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- const char *buf)
+ const char *buf,
+ struct iatt *sbuf)
{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ STACK_UNWIND (frame, op_ret, op_errno, buf, sbuf);
return 0;
}
@@ -197,8 +193,9 @@ path_lookup_cbk (call_frame_t *frame,
int32_t op_ret,
int32_t op_errno,
inode_t *inode,
- struct stat *buf,
- dict_t *xattr)
+ struct iatt *buf,
+ dict_t *xattr,
+ struct iatt *postparent)
{
STACK_UNWIND (frame, op_ret, op_errno, inode, buf, xattr);
return 0;
@@ -212,7 +209,9 @@ path_symlink_cbk (call_frame_t *frame,
int32_t op_ret,
int32_t op_errno,
inode_t *inode,
- struct stat *buf)
+ struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent)
{
STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
return 0;
@@ -225,7 +224,9 @@ path_mknod_cbk (call_frame_t *frame,
int32_t op_ret,
int32_t op_errno,
inode_t *inode,
- struct stat *buf)
+ struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent)
{
STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
return 0;
@@ -239,7 +240,9 @@ path_mkdir_cbk (call_frame_t *frame,
int32_t op_ret,
int32_t op_errno,
inode_t *inode,
- struct stat *buf)
+ struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent)
{
STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
return 0;
@@ -252,7 +255,9 @@ path_link_cbk (call_frame_t *frame,
int32_t op_ret,
int32_t op_errno,
inode_t *inode,
- struct stat *buf)
+ struct iatt *buf,
+ struct iatt *preparent,
+ struct iatt *postparent)
{
STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
return 0;
@@ -271,13 +276,31 @@ path_opendir_cbk (call_frame_t *frame,
}
-int32_t
+int32_t
+path_rename_buf_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *buf,
+ struct iatt *preoldparent,
+ struct iatt *postoldparent,
+ struct iatt *prenewparent,
+ struct iatt *postnewparent)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, buf);
+ return 0;
+}
+
+
+
+int32_t
path_common_buf_cbk (call_frame_t *frame,
void *cookie,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- struct stat *buf)
+ struct iatt *buf)
{
STACK_UNWIND (frame, op_ret, op_errno, buf);
return 0;
@@ -296,6 +319,25 @@ path_common_dict_cbk (call_frame_t *frame,
}
int32_t
+path_common_remove_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,struct iatt *preparent,
+ struct iatt *postparent)
+{
+ STACK_UNWIND (frame, op_ret, op_errno);
+ return 0;
+}
+
+int32_t
+path_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,struct iatt *prebuf,
+ struct iatt *postbuf)
+{
+ STACK_UNWIND (frame, op_ret, op_errno, prebuf, postbuf);
+ return 0;
+}
+
+
+int32_t
path_common_cbk (call_frame_t *frame,
void *cookie,
xlator_t *this,
@@ -329,7 +371,7 @@ path_lookup (call_frame_t *frame,
loc->path = loc_path;
if (tmp_path != loc_path)
- FREE (tmp_path);
+ GF_FREE (tmp_path);
return 0;
}
@@ -356,7 +398,7 @@ path_stat (call_frame_t *frame,
loc->path = loc_path;
if (tmp_path != loc_path)
- FREE (tmp_path);
+ GF_FREE (tmp_path);
return 0;
}
@@ -385,7 +427,7 @@ path_readlink (call_frame_t *frame,
loc->path = loc_path;
if (tmp_path != loc_path)
- FREE (tmp_path);
+ GF_FREE (tmp_path);
return 0;
}
@@ -416,7 +458,7 @@ path_mknod (call_frame_t *frame,
loc->path = loc_path;
if (tmp_path != loc_path)
- FREE (tmp_path);
+ GF_FREE (tmp_path);
return 0;
}
@@ -445,7 +487,7 @@ path_mkdir (call_frame_t *frame,
loc->path = loc_path;
if (tmp_path != loc_path)
- FREE (tmp_path);
+ GF_FREE (tmp_path);
return 0;
}
@@ -465,14 +507,14 @@ path_unlink (call_frame_t *frame,
loc->path = tmp_path;
STACK_WIND (frame,
- path_common_cbk,
+ path_common_remove_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->unlink,
loc);
loc->path = loc_path;
if (tmp_path != loc_path)
- FREE (tmp_path);
+ GF_FREE (tmp_path);
return 0;
}
@@ -492,14 +534,14 @@ path_rmdir (call_frame_t *frame,
loc->path = tmp_path;
STACK_WIND (frame,
- path_common_cbk,
+ path_common_remove_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rmdir,
loc);
loc->path = loc_path;
if (tmp_path != loc_path)
- FREE (tmp_path);
+ GF_FREE (tmp_path);
return 0;
}
@@ -528,7 +570,7 @@ path_symlink (call_frame_t *frame,
loc->path = loc_path;
if (tmp_path != loc_path)
- FREE (tmp_path);
+ GF_FREE (tmp_path);
return 0;
}
@@ -558,7 +600,7 @@ path_rename (call_frame_t *frame,
newloc->path = tmp_newloc_path;
STACK_WIND (frame,
- path_common_buf_cbk,
+ path_rename_buf_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->rename,
oldloc,
@@ -566,11 +608,11 @@ path_rename (call_frame_t *frame,
oldloc->path = oldloc_path;
if (tmp_oldloc_path != oldloc_path)
- FREE (tmp_oldloc_path);
+ GF_FREE (tmp_oldloc_path);
newloc->path = newloc_path;
if (tmp_newloc_path != newloc_path)
- FREE (tmp_newloc_path);
+ GF_FREE (tmp_newloc_path);
return 0;
}
@@ -608,75 +650,59 @@ path_link (call_frame_t *frame,
oldloc->path = oldloc_path;
if (tmp_oldloc_path != oldloc_path)
- FREE (tmp_oldloc_path);
+ GF_FREE (tmp_oldloc_path);
newloc->path = newloc_path;
if (tmp_newloc_path != newloc_path)
- FREE (tmp_newloc_path);
+ GF_FREE (tmp_newloc_path);
return 0;
}
int32_t
-path_chmod (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode)
+path_setattr_cbk (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *preop,
+ struct iatt *postop)
{
- char *loc_path = (char *)loc->path;
- char *tmp_path = NULL;
-
- if (!(tmp_path = path_this_to_that (this, loc->path))) {
- STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
- return 0;
- }
- loc->path = tmp_path;
-
- STACK_WIND (frame,
- path_common_buf_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->chmod,
- loc,
- mode);
-
- loc->path = loc_path;
- if (tmp_path != loc_path)
- FREE (tmp_path);
-
+ STACK_UNWIND (frame, op_ret, op_errno, preop, postop);
return 0;
}
-int32_t
-path_chown (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- uid_t uid,
- gid_t gid)
+int32_t
+path_setattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ struct iatt *stbuf,
+ int32_t valid)
{
char *loc_path = (char *)loc->path;
char *tmp_path = NULL;
-
+
if (!(tmp_path = path_this_to_that (this, loc->path))) {
STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
return 0;
}
loc->path = tmp_path;
- STACK_WIND (frame,
- path_common_buf_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->chown,
- loc,
- uid,
- gid);
+ STACK_WIND (frame,
+ path_setattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setattr,
+ loc,
+ stbuf, valid);
- loc->path = loc_path;
+ loc->path = loc_path;
if (tmp_path != loc_path)
- FREE (tmp_path);
+ GF_FREE (tmp_path);
return 0;
}
+
int32_t
path_truncate (call_frame_t *frame,
xlator_t *this,
@@ -693,7 +719,7 @@ path_truncate (call_frame_t *frame,
loc->path = tmp_path;
STACK_WIND (frame,
- path_common_buf_cbk,
+ path_truncate_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->truncate,
loc,
@@ -701,46 +727,19 @@ path_truncate (call_frame_t *frame,
loc->path = loc_path;
if (tmp_path != loc_path)
- FREE (tmp_path);
+ GF_FREE (tmp_path);
return 0;
}
-int32_t
-path_utimens (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- struct timespec tv[2])
-{
- char *loc_path = (char *)loc->path;
- char *tmp_path = NULL;
-
- if (!(tmp_path = path_this_to_that (this, loc->path))) {
- STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
- return 0;
- }
- loc->path = tmp_path;
-
- STACK_WIND (frame,
- path_common_buf_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->utimens,
- loc,
- tv);
-
- loc->path = loc_path;
- if (tmp_path != loc_path)
- FREE (tmp_path);
-
- return 0;
-}
int32_t
path_open (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
int32_t flags,
- fd_t *fd)
+ fd_t *fd,
+ int32_t wbflags)
{
char *loc_path = (char *)loc->path;
char *tmp_path = NULL;
@@ -757,11 +756,12 @@ path_open (call_frame_t *frame,
FIRST_CHILD(this)->fops->open,
loc,
flags,
- fd);
+ fd,
+ wbflags);
loc->path = loc_path;
if (tmp_path != loc_path)
- FREE (tmp_path);
+ GF_FREE (tmp_path);
return 0;
}
@@ -794,7 +794,7 @@ path_create (call_frame_t *frame,
loc->path = loc_path;
if (tmp_path != loc_path)
- FREE (tmp_path);
+ GF_FREE (tmp_path);
return 0;
}
@@ -836,10 +836,9 @@ path_setxattr (call_frame_t *frame,
loc->path = loc_path;
if (tmp_path != loc_path)
- FREE (tmp_path);
+ GF_FREE (tmp_path);
- if (tmp_name)
- FREE (tmp_name);
+ GF_FREE (tmp_name);
return 0;
}
@@ -873,10 +872,10 @@ path_getxattr (call_frame_t *frame,
loc->path = loc_path;
if (tmp_path != loc_path)
- FREE (tmp_path);
+ GF_FREE (tmp_path);
if (tmp_name != name)
- FREE (tmp_name);
+ GF_FREE (tmp_name);
return 0;
}
@@ -910,10 +909,10 @@ path_removexattr (call_frame_t *frame,
loc->path = loc_path;
if (tmp_path != loc_path)
- FREE (tmp_path);
+ GF_FREE (tmp_path);
if (tmp_name != name)
- FREE (tmp_name);
+ GF_FREE (tmp_name);
return 0;
}
@@ -942,7 +941,7 @@ path_opendir (call_frame_t *frame,
loc->path = loc_path;
if (tmp_path != loc_path)
- FREE (tmp_path);
+ GF_FREE (tmp_path);
return 0;
}
@@ -971,7 +970,7 @@ path_access (call_frame_t *frame,
loc->path = loc_path;
if (tmp_path != loc_path)
- FREE (tmp_path);
+ GF_FREE (tmp_path);
return 0;
}
@@ -1013,7 +1012,7 @@ path_checksum (call_frame_t *frame,
loc->path = loc_path;
if (tmp_path != loc_path)
- FREE (tmp_path);
+ GF_FREE (tmp_path);
return 0;
}
@@ -1040,14 +1039,14 @@ path_entrylk (call_frame_t *frame, xlator_t *this,
loc->path = loc_path;
if (tmp_path != loc_path)
- FREE (tmp_path);
+ GF_FREE (tmp_path);
return 0;
}
int32_t
path_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd, struct flock *lock)
+ const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *lock)
{
char *loc_path = (char *)loc->path;
char *tmp_path = NULL;
@@ -1066,7 +1065,7 @@ path_inodelk (call_frame_t *frame, xlator_t *this,
loc->path = loc_path;
if (tmp_path != loc_path)
- FREE (tmp_path);
+ GF_FREE (tmp_path);
return 0;
}
@@ -1098,11 +1097,29 @@ path_xattrop (call_frame_t *frame,
loc->path = loc_path;
if (tmp_path != loc_path)
- FREE (tmp_path);
+ GF_FREE (tmp_path);
return 0;
}
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_path_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
+ "failed");
+ return ret;
+ }
+
+ return ret;
+}
int32_t
init (xlator_t *this)
@@ -1121,7 +1138,7 @@ init (xlator_t *this)
"dangling volume. check volfile ");
}
- priv = CALLOC (1, sizeof (*priv));
+ priv = GF_CALLOC (1, sizeof (*priv), gf_path_mt_path_private_t);
ERR_ABORT (priv);
if (dict_get (options, "start-offset")) {
priv->start_off = data_to_int32 (dict_get (options,
@@ -1134,7 +1151,8 @@ init (xlator_t *this)
if (dict_get (options, "regex")) {
int32_t ret = 0;
- priv->preg = CALLOC (1, sizeof (regex_t));
+ priv->preg = GF_CALLOC (1, sizeof (regex_t),
+ gf_path_mt_regex_t);
ERR_ABORT (priv->preg);
ret = regcomp (priv->preg,
data_to_str (dict_get (options, "regex")),
@@ -1142,7 +1160,7 @@ init (xlator_t *this)
if (ret) {
gf_log (this->name, GF_LOG_ERROR,
"Failed to compile the 'option regex'");
- FREE (priv);
+ GF_FREE (priv);
return -1;
}
if (dict_get (options, "replace-with")) {
@@ -1173,10 +1191,7 @@ struct xlator_fops fops = {
.symlink = path_symlink,
.rename = path_rename,
.link = path_link,
- .chmod = path_chmod,
- .chown = path_chown,
.truncate = path_truncate,
- .utimens = path_utimens,
.open = path_open,
.setxattr = path_setxattr,
.getxattr = path_getxattr,
@@ -1189,13 +1204,9 @@ struct xlator_fops fops = {
.xattrop = path_xattrop,
.entrylk = path_entrylk,
.inodelk = path_inodelk,
+ .setattr = path_setattr,
};
-
-struct xlator_mops mops = {
-};
-
-
struct xlator_cbks cbks = {
};
diff --git a/libglusterfsclient/Makefile.am b/xlators/features/protect/Makefile.am
index d471a3f92..d471a3f92 100644
--- a/libglusterfsclient/Makefile.am
+++ b/xlators/features/protect/Makefile.am
diff --git a/xlators/features/protect/src/Makefile.am b/xlators/features/protect/src/Makefile.am
new file mode 100644
index 000000000..7eb93f32e
--- /dev/null
+++ b/xlators/features/protect/src/Makefile.am
@@ -0,0 +1,21 @@
+xlator_LTLIBRARIES = prot_dht.la prot_client.la prot_server.la
+
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+prot_dht_la_LDFLAGS = -module -avoidversion
+prot_dht_la_SOURCES = prot_dht.c
+prot_dht_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+prot_client_la_LDFLAGS = -module -avoidversion
+prot_client_la_SOURCES = prot_client.c
+prot_client_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+prot_server_la_LDFLAGS = -module -avoidversion
+prot_server_la_SOURCES = prot_server.c
+prot_server_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
+
diff --git a/xlators/features/protect/src/prot_client.c b/xlators/features/protect/src/prot_client.c
new file mode 100644
index 000000000..a27216d0a
--- /dev/null
+++ b/xlators/features/protect/src/prot_client.c
@@ -0,0 +1,215 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include "defaults.h"
+
+#include <execinfo.h>
+
+#define NUM_FRAMES 20
+
+static char PROTECT_KEY[] = "trusted.glusterfs.protect";
+
+enum {
+ PROT_ACT_NONE = 0,
+ PROT_ACT_LOG,
+ PROT_ACT_REJECT,
+};
+
+void
+pcli_print_trace (char *name, call_frame_t *frame)
+{
+ void *frames[NUM_FRAMES];
+ char **symbols;
+ int size;
+ int i;
+
+ gf_log (name, GF_LOG_INFO, "Translator stack:");
+ while (frame) {
+ gf_log (name, GF_LOG_INFO, "%s (%s)",
+ frame->wind_from, frame->this->name);
+ frame = frame->next;
+ }
+
+ size = backtrace(frames,NUM_FRAMES);
+ if (size <= 0) {
+ return;
+ }
+ symbols = backtrace_symbols(frames,size);
+ if (!symbols) {
+ return;
+ }
+
+ gf_log(name, GF_LOG_INFO, "Processor stack:");
+ for (i = 0; i < size; ++i) {
+ gf_log (name, GF_LOG_INFO, "%s", symbols[i]);
+ }
+ free(symbols);
+}
+
+int32_t
+pcli_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
+{
+ uint64_t value;
+
+ if (newloc->parent == oldloc->parent) {
+ gf_log (this->name, GF_LOG_DEBUG, "rename in same directory");
+ goto simple_unwind;
+ }
+ if (!oldloc->parent) {
+ goto simple_unwind;
+ }
+ if (inode_ctx_get(oldloc->parent,this,&value) != 0) {
+ goto simple_unwind;
+ }
+
+ if (value != PROT_ACT_NONE) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "got rename for protected %s", oldloc->path);
+ pcli_print_trace(this->name,frame->next);
+ if (value == PROT_ACT_REJECT) {
+ STACK_UNWIND_STRICT (rename, frame, -1, EPERM,
+ NULL, NULL, NULL, NULL, NULL,
+ xdata);
+ return 0;
+ }
+ }
+
+simple_unwind:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename, oldloc, newloc,
+ xdata);
+ return 0;
+}
+
+int32_t
+pcli_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ data_t *data;
+ uint64_t value;
+
+ /*
+ * We can't use dict_get_str and strcmp here, because the value comes
+ * directly from the user and might not be NUL-terminated (it would
+ * be if we had set it ourselves.
+ */
+
+ data = dict_get(dict,PROTECT_KEY);
+ if (!data) {
+ goto simple_wind;
+ }
+
+ if (dict->count > 1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "attempted to mix %s with other keys", PROTECT_KEY);
+ goto simple_wind;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG, "got %s request", PROTECT_KEY);
+ if (!strncmp(data->data,"log",data->len)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "logging removals on %s", loc->path);
+ value = PROT_ACT_LOG;
+ }
+ else if (!strncmp(data->data,"reject",data->len)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "rejecting removals on %s", loc->path);
+ value = PROT_ACT_REJECT;
+ }
+ else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "removing protection on %s", loc->path);
+ value = PROT_ACT_NONE;
+ }
+ /* Right now the value doesn't matter - just the presence. */
+ if (inode_ctx_set(loc->inode,this,&value) != 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set protection status for %s", loc->path);
+ }
+ STACK_UNWIND_STRICT (setxattr, frame, 0, 0, NULL);
+ return 0;
+
+simple_wind:
+ STACK_WIND_TAIL (frame,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr,
+ loc, dict, flags, xdata);
+ return 0;
+}
+
+int32_t
+pcli_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
+{
+ uint64_t value;
+
+ if (!loc->parent || (inode_ctx_get(loc->parent,this,&value) != 0)) {
+ goto simple_unwind;
+ }
+
+ if (value != PROT_ACT_NONE) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "got unlink for protected %s", loc->path);
+ pcli_print_trace(this->name,frame->next);
+ if (value == PROT_ACT_REJECT) {
+ STACK_UNWIND_STRICT (unlink, frame, -1, EPERM,
+ NULL, NULL, NULL);
+ return 0;
+ }
+ }
+
+simple_unwind:
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
+ return 0;
+}
+
+int32_t
+init (xlator_t *this)
+{
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "translator not configured with exactly one child");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ return 0;
+}
+
+
+void
+fini (xlator_t *this)
+{
+ return;
+}
+
+
+struct xlator_fops fops = {
+ .rename = pcli_rename,
+ .setxattr = pcli_setxattr,
+ .unlink = pcli_unlink,
+};
+
+struct xlator_cbks cbks = {
+};
+
+struct volume_options options[] = {
+ { .key = {NULL} },
+};
diff --git a/xlators/features/protect/src/prot_dht.c b/xlators/features/protect/src/prot_dht.c
new file mode 100644
index 000000000..feec6ffd6
--- /dev/null
+++ b/xlators/features/protect/src/prot_dht.c
@@ -0,0 +1,168 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include "defaults.h"
+
+enum gf_pdht_mem_types_ {
+ gf_pdht_mt_coord_t = gf_common_mt_end + 1,
+ gf_pdht_mt_end
+};
+
+typedef struct {
+ pthread_mutex_t lock;
+ uint16_t refs;
+ int32_t op_ret;
+ int32_t op_errno;
+ dict_t *xdata;
+} pdht_coord_t;
+
+static char PROTECT_KEY[] = "trusted.glusterfs.protect";
+
+void
+pdht_unref_and_unlock (call_frame_t *frame, xlator_t *this,
+ pdht_coord_t *coord)
+{
+ gf_boolean_t should_unwind;
+
+ should_unwind = (--(coord->refs) == 0);
+ pthread_mutex_unlock(&coord->lock);
+
+ if (should_unwind) {
+ STACK_UNWIND_STRICT (setxattr, frame,
+ coord->op_ret, coord->op_errno,
+ coord->xdata);
+ if (coord->xdata) {
+ dict_unref(coord->xdata);
+ }
+ GF_FREE(coord);
+ }
+}
+
+int32_t
+pdht_recurse_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ pdht_coord_t *coord = cookie;
+
+ pthread_mutex_lock(&coord->lock);
+ if (op_ret) {
+ coord->op_ret = op_ret;
+ coord->op_errno = op_errno;
+ }
+ if (xdata) {
+ if (coord->xdata) {
+ dict_unref(coord->xdata);
+ }
+ coord->xdata = dict_ref(xdata);
+ }
+ pdht_unref_and_unlock(frame,this,coord);
+
+ return 0;
+}
+
+void
+pdht_recurse (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata, xlator_t *xl, pdht_coord_t *coord)
+{
+ xlator_list_t *iter;
+
+ if (!strcmp(xl->type,"features/prot_client")) {
+ pthread_mutex_lock(&coord->lock);
+ ++(coord->refs);
+ pthread_mutex_unlock(&coord->lock);
+ STACK_WIND_COOKIE (frame, pdht_recurse_cbk, coord, xl,
+ xl->fops->setxattr, loc, dict, flags, xdata);
+ }
+
+ else for (iter = xl->children; iter; iter = iter->next) {
+ pdht_recurse (frame, this, loc, dict, flags, xdata,
+ iter->xlator, coord);
+ }
+}
+
+int32_t
+pdht_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ pdht_coord_t *coord;
+
+ if (!dict_get(dict,PROTECT_KEY)) {
+ goto simple_wind;
+ }
+
+ if (dict->count > 1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "attempted to mix %s with other keys", PROTECT_KEY);
+ goto simple_wind;
+ }
+
+ coord = GF_CALLOC(1,sizeof(*coord),gf_pdht_mt_coord_t);
+ if (!coord) {
+ gf_log (this->name, GF_LOG_WARNING, "allocation failed");
+ goto simple_wind;
+ }
+
+ pthread_mutex_init(&coord->lock,NULL);
+ coord->refs = 1;
+ coord->op_ret = 0;
+ coord->xdata = NULL;
+
+ pdht_recurse(frame,this,loc,dict,flags,xdata,this,coord);
+ pthread_mutex_lock(&coord->lock);
+ pdht_unref_and_unlock(frame,this,coord);
+
+ return 0;
+
+simple_wind:
+ STACK_WIND_TAIL (frame,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr,
+ loc, dict, flags, xdata);
+ return 0;
+}
+
+int32_t
+init (xlator_t *this)
+{
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "translator not configured with exactly one child");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ return 0;
+}
+
+
+void
+fini (xlator_t *this)
+{
+ return;
+}
+
+struct xlator_fops fops = {
+ .setxattr = pdht_setxattr,
+};
+
+struct xlator_cbks cbks = {
+};
+
+struct volume_options options[] = {
+ { .key = {NULL} },
+};
diff --git a/xlators/features/protect/src/prot_server.c b/xlators/features/protect/src/prot_server.c
new file mode 100644
index 000000000..beaee0889
--- /dev/null
+++ b/xlators/features/protect/src/prot_server.c
@@ -0,0 +1,51 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include "defaults.h"
+
+int32_t
+init (xlator_t *this)
+{
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "translator not configured with exactly one child");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ return 0;
+}
+
+
+void
+fini (xlator_t *this)
+{
+ return;
+}
+
+
+struct xlator_fops fops = {
+};
+
+struct xlator_cbks cbks = {
+};
+
+struct volume_options options[] = {
+ { .key = {NULL} },
+};
diff --git a/xlators/features/qemu-block/Makefile.am b/xlators/features/qemu-block/Makefile.am
new file mode 100644
index 000000000..af437a64d
--- /dev/null
+++ b/xlators/features/qemu-block/Makefile.am
@@ -0,0 +1 @@
+SUBDIRS = src
diff --git a/xlators/features/qemu-block/src/Makefile.am b/xlators/features/qemu-block/src/Makefile.am
new file mode 100644
index 000000000..08a7b62a0
--- /dev/null
+++ b/xlators/features/qemu-block/src/Makefile.am
@@ -0,0 +1,155 @@
+if ENABLE_QEMU_BLOCK
+xlator_LTLIBRARIES = qemu-block.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+qemu_block_la_LDFLAGS = -module -avoid-version
+qemu_block_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(GLIB_LIBS) -lz -lrt
+
+qemu_block_la_SOURCES_qemu = \
+ $(CONTRIBDIR)/qemu/qemu-coroutine.c \
+ $(CONTRIBDIR)/qemu/qemu-coroutine-lock.c \
+ $(CONTRIBDIR)/qemu/qemu-coroutine-sleep.c \
+ $(CONTRIBDIR)/qemu/coroutine-ucontext.c \
+ $(CONTRIBDIR)/qemu/block.c \
+ $(CONTRIBDIR)/qemu/nop-symbols.c
+
+qemu_block_la_SOURCES_qemu_util = \
+ $(CONTRIBDIR)/qemu/util/aes.c \
+ $(CONTRIBDIR)/qemu/util/bitmap.c \
+ $(CONTRIBDIR)/qemu/util/bitops.c \
+ $(CONTRIBDIR)/qemu/util/cutils.c \
+ $(CONTRIBDIR)/qemu/util/error.c \
+ $(CONTRIBDIR)/qemu/util/hbitmap.c \
+ $(CONTRIBDIR)/qemu/util/iov.c \
+ $(CONTRIBDIR)/qemu/util/module.c \
+ $(CONTRIBDIR)/qemu/util/oslib-posix.c \
+ $(CONTRIBDIR)/qemu/util/qemu-option.c \
+ $(CONTRIBDIR)/qemu/util/qemu-error.c \
+ $(CONTRIBDIR)/qemu/util/qemu-thread-posix.c \
+ $(CONTRIBDIR)/qemu/util/unicode.c \
+ $(CONTRIBDIR)/qemu/util/hexdump.c
+
+qemu_block_la_SOURCES_qemu_block = \
+ $(CONTRIBDIR)/qemu/block/snapshot.c \
+ $(CONTRIBDIR)/qemu/block/qcow2-cache.c \
+ $(CONTRIBDIR)/qemu/block/qcow2-cluster.c \
+ $(CONTRIBDIR)/qemu/block/qcow2-refcount.c \
+ $(CONTRIBDIR)/qemu/block/qcow2-snapshot.c \
+ $(CONTRIBDIR)/qemu/block/qcow2.c \
+ $(CONTRIBDIR)/qemu/block/qed-check.c \
+ $(CONTRIBDIR)/qemu/block/qed-cluster.c \
+ $(CONTRIBDIR)/qemu/block/qed-gencb.c \
+ $(CONTRIBDIR)/qemu/block/qed-l2-cache.c \
+ $(CONTRIBDIR)/qemu/block/qed-table.c \
+ $(CONTRIBDIR)/qemu/block/qed.c
+
+qemu_block_la_SOURCES_qemu_qobject = \
+ $(CONTRIBDIR)/qemu/qobject/json-lexer.c \
+ $(CONTRIBDIR)/qemu/qobject/json-parser.c \
+ $(CONTRIBDIR)/qemu/qobject/json-streamer.c \
+ $(CONTRIBDIR)/qemu/qobject/qbool.c \
+ $(CONTRIBDIR)/qemu/qobject/qdict.c \
+ $(CONTRIBDIR)/qemu/qobject/qerror.c \
+ $(CONTRIBDIR)/qemu/qobject/qfloat.c \
+ $(CONTRIBDIR)/qemu/qobject/qint.c \
+ $(CONTRIBDIR)/qemu/qobject/qjson.c \
+ $(CONTRIBDIR)/qemu/qobject/qlist.c \
+ $(CONTRIBDIR)/qemu/qobject/qstring.c
+
+qemu_block_la_SOURCES = \
+ $(qemu_block_la_SOURCES_qemu) \
+ $(qemu_block_la_SOURCES_qemu_util) \
+ $(qemu_block_la_SOURCES_qemu_block) \
+ $(qemu_block_la_SOURCES_qemu_qobject) \
+ bdrv-xlator.c \
+ coroutine-synctask.c \
+ bh-syncop.c \
+ monitor-logging.c \
+ clock-timer.c \
+ qemu-block.c \
+ qb-coroutines.c
+
+noinst_HEADERS_qemu = \
+ $(CONTRIBDIR)/qemu/config-host.h \
+ $(CONTRIBDIR)/qemu/qapi-types.h \
+ $(CONTRIBDIR)/qemu/qmp-commands.h \
+ $(CONTRIBDIR)/qemu/trace/generated-tracers.h \
+ $(CONTRIBDIR)/qemu/include/config.h \
+ $(CONTRIBDIR)/qemu/include/glib-compat.h \
+ $(CONTRIBDIR)/qemu/include/qemu-common.h \
+ $(CONTRIBDIR)/qemu/include/trace.h \
+ $(CONTRIBDIR)/qemu/include/block/coroutine.h \
+ $(CONTRIBDIR)/qemu/include/block/aio.h \
+ $(CONTRIBDIR)/qemu/include/block/block.h \
+ $(CONTRIBDIR)/qemu/include/block/block_int.h \
+ $(CONTRIBDIR)/qemu/include/block/blockjob.h \
+ $(CONTRIBDIR)/qemu/include/block/coroutine.h \
+ $(CONTRIBDIR)/qemu/include/block/coroutine_int.h \
+ $(CONTRIBDIR)/qemu/include/block/snapshot.h \
+ $(CONTRIBDIR)/qemu/include/exec/cpu-common.h \
+ $(CONTRIBDIR)/qemu/include/exec/hwaddr.h \
+ $(CONTRIBDIR)/qemu/include/exec/poison.h \
+ $(CONTRIBDIR)/qemu/include/fpu/softfloat.h \
+ $(CONTRIBDIR)/qemu/include/migration/migration.h \
+ $(CONTRIBDIR)/qemu/include/migration/qemu-file.h \
+ $(CONTRIBDIR)/qemu/include/migration/vmstate.h \
+ $(CONTRIBDIR)/qemu/include/monitor/monitor.h \
+ $(CONTRIBDIR)/qemu/include/monitor/readline.h \
+ $(CONTRIBDIR)/qemu/include/qapi/error.h \
+ $(CONTRIBDIR)/qemu/include/qapi/qmp/json-lexer.h \
+ $(CONTRIBDIR)/qemu/include/qapi/qmp/json-parser.h \
+ $(CONTRIBDIR)/qemu/include/qapi/qmp/json-streamer.h \
+ $(CONTRIBDIR)/qemu/include/qapi/qmp/qbool.h \
+ $(CONTRIBDIR)/qemu/include/qapi/qmp/qdict.h \
+ $(CONTRIBDIR)/qemu/include/qapi/qmp/qerror.h \
+ $(CONTRIBDIR)/qemu/include/qapi/qmp/qfloat.h \
+ $(CONTRIBDIR)/qemu/include/qapi/qmp/qint.h \
+ $(CONTRIBDIR)/qemu/include/qapi/qmp/qjson.h \
+ $(CONTRIBDIR)/qemu/include/qapi/qmp/qlist.h \
+ $(CONTRIBDIR)/qemu/include/qapi/qmp/qobject.h \
+ $(CONTRIBDIR)/qemu/include/qapi/qmp/qstring.h \
+ $(CONTRIBDIR)/qemu/include/qapi/qmp/types.h \
+ $(CONTRIBDIR)/qemu/include/qemu/aes.h \
+ $(CONTRIBDIR)/qemu/include/qemu/atomic.h \
+ $(CONTRIBDIR)/qemu/include/qemu/bitmap.h \
+ $(CONTRIBDIR)/qemu/include/qemu/bitops.h \
+ $(CONTRIBDIR)/qemu/include/qemu/bswap.h \
+ $(CONTRIBDIR)/qemu/include/qemu/compiler.h \
+ $(CONTRIBDIR)/qemu/include/qemu/error-report.h \
+ $(CONTRIBDIR)/qemu/include/qemu/event_notifier.h \
+ $(CONTRIBDIR)/qemu/include/qemu/hbitmap.h \
+ $(CONTRIBDIR)/qemu/include/qemu/host-utils.h \
+ $(CONTRIBDIR)/qemu/include/qemu/iov.h \
+ $(CONTRIBDIR)/qemu/include/qemu/main-loop.h \
+ $(CONTRIBDIR)/qemu/include/qemu/module.h \
+ $(CONTRIBDIR)/qemu/include/qemu/notify.h \
+ $(CONTRIBDIR)/qemu/include/qemu/option.h \
+ $(CONTRIBDIR)/qemu/include/qemu/option_int.h \
+ $(CONTRIBDIR)/qemu/include/qemu/osdep.h \
+ $(CONTRIBDIR)/qemu/include/qemu/queue.h \
+ $(CONTRIBDIR)/qemu/include/qemu/sockets.h \
+ $(CONTRIBDIR)/qemu/include/qemu/thread-posix.h \
+ $(CONTRIBDIR)/qemu/include/qemu/thread.h \
+ $(CONTRIBDIR)/qemu/include/qemu/timer.h \
+ $(CONTRIBDIR)/qemu/include/qemu/typedefs.h \
+ $(CONTRIBDIR)/qemu/include/sysemu/sysemu.h \
+ $(CONTRIBDIR)/qemu/include/sysemu/os-posix.h \
+ $(CONTRIBDIR)/qemu/block/qcow2.h \
+ $(CONTRIBDIR)/qemu/block/qed.h
+
+noinst_HEADERS = \
+ $(noinst_HEADERS_qemu) \
+ qemu-block.h \
+ qemu-block-memory-types.h \
+ qb-coroutines.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(CONTRIBDIR)/qemu \
+ -I$(CONTRIBDIR)/qemu/include \
+ -DGLUSTER_XLATOR
+
+AM_CFLAGS = -fno-strict-aliasing -Wall $(GF_CFLAGS) $(GLIB_CFLAGS)
+
+CLEANFILES =
+
+endif
diff --git a/xlators/features/qemu-block/src/bdrv-xlator.c b/xlators/features/qemu-block/src/bdrv-xlator.c
new file mode 100644
index 000000000..106c59775
--- /dev/null
+++ b/xlators/features/qemu-block/src/bdrv-xlator.c
@@ -0,0 +1,397 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "inode.h"
+#include "syncop.h"
+#include "qemu-block.h"
+#include "block/block_int.h"
+
+typedef struct BDRVGlusterState {
+ inode_t *inode;
+} BDRVGlusterState;
+
+static QemuOptsList runtime_opts = {
+ .name = "gluster",
+ .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
+ .desc = {
+ {
+ .name = "filename",
+ .type = QEMU_OPT_STRING,
+ .help = "GFID of file",
+ },
+ { /* end of list */ }
+ },
+};
+
+inode_t *
+qb_inode_from_filename (const char *filename)
+{
+ const char *iptr = NULL;
+ inode_t *inode = NULL;
+
+ iptr = filename + 17;
+ sscanf (iptr, "%p", &inode);
+
+ return inode;
+}
+
+
+int
+qb_inode_to_filename (inode_t *inode, char *filename, int size)
+{
+ return snprintf (filename, size, "gluster://inodep:%p", inode);
+}
+
+
+static fd_t *
+fd_from_bs (BlockDriverState *bs)
+{
+ BDRVGlusterState *s = bs->opaque;
+
+ return fd_anonymous (s->inode);
+}
+
+
+static int
+qemu_gluster_open (BlockDriverState *bs, QDict *options, int bdrv_flags)
+{
+ inode_t *inode = NULL;
+ BDRVGlusterState *s = bs->opaque;
+ QemuOpts *opts = NULL;
+ Error *local_err = NULL;
+ const char *filename = NULL;
+ char gfid_str[128];
+ int ret;
+ qb_conf_t *conf = THIS->private;
+
+ opts = qemu_opts_create_nofail(&runtime_opts);
+ qemu_opts_absorb_qdict(opts, options, &local_err);
+ if (error_is_set(&local_err)) {
+ qerror_report_err(local_err);
+ error_free(local_err);
+ return -EINVAL;
+ }
+
+ filename = qemu_opt_get(opts, "filename");
+
+ /*
+ * gfid:<gfid> format means we're opening a backing image.
+ */
+ ret = sscanf(filename, "gluster://gfid:%s", gfid_str);
+ if (ret) {
+ loc_t loc = {0,};
+ struct iatt buf = {0,};
+ uuid_t gfid;
+
+ uuid_parse(gfid_str, gfid);
+
+ loc.inode = inode_find(conf->root_inode->table, gfid);
+ if (!loc.inode) {
+ loc.inode = inode_new(conf->root_inode->table);
+ uuid_copy(loc.inode->gfid, gfid);
+ }
+
+ uuid_copy(loc.gfid, loc.inode->gfid);
+ ret = syncop_lookup(FIRST_CHILD(THIS), &loc, NULL, &buf, NULL,
+ NULL);
+ if (ret) {
+ loc_wipe(&loc);
+ return -errno;
+ }
+
+ s->inode = inode_ref(loc.inode);
+ loc_wipe(&loc);
+ } else {
+ inode = qb_inode_from_filename (filename);
+ if (!inode)
+ return -EINVAL;
+
+ s->inode = inode_ref(inode);
+ }
+
+ return 0;
+}
+
+
+static int
+qemu_gluster_create (const char *filename, QEMUOptionParameter *options)
+{
+ uint64_t total_size = 0;
+ inode_t *inode = NULL;
+ fd_t *fd = NULL;
+ struct iatt stat = {0, };
+ int ret = 0;
+
+ inode = qb_inode_from_filename (filename);
+ if (!inode)
+ return -EINVAL;
+
+ while (options && options->name) {
+ if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
+ total_size = options->value.n / BDRV_SECTOR_SIZE;
+ }
+ options++;
+ }
+
+ fd = fd_anonymous (inode);
+ if (!fd)
+ return -ENOMEM;
+
+ ret = syncop_fstat (FIRST_CHILD(THIS), fd, &stat);
+ if (ret) {
+ fd_unref (fd);
+ return -errno;
+ }
+
+ if (stat.ia_size) {
+ /* format ONLY if the filesize is 0 bytes */
+ fd_unref (fd);
+ return -EFBIG;
+ }
+
+ if (total_size) {
+ ret = syncop_ftruncate (FIRST_CHILD(THIS), fd, total_size);
+ if (ret) {
+ fd_unref (fd);
+ return -errno;
+ }
+ }
+
+ fd_unref (fd);
+ return 0;
+}
+
+
+static int
+qemu_gluster_co_readv (BlockDriverState *bs, int64_t sector_num, int nb_sectors,
+ QEMUIOVector *qiov)
+{
+ fd_t *fd = NULL;
+ off_t offset = 0;
+ size_t size = 0;
+ struct iovec *iov = NULL;
+ int count = 0;
+ struct iobref *iobref = NULL;
+ int ret = 0;
+
+ fd = fd_from_bs (bs);
+ if (!fd)
+ return -EIO;
+
+ offset = sector_num * BDRV_SECTOR_SIZE;
+ size = nb_sectors * BDRV_SECTOR_SIZE;
+
+ ret = syncop_readv (FIRST_CHILD(THIS), fd, size, offset, 0,
+ &iov, &count, &iobref);
+ if (ret < 0) {
+ ret = -errno;
+ goto out;
+ }
+
+ iov_copy (qiov->iov, qiov->niov, iov, count); /* *choke!* */
+
+out:
+ GF_FREE (iov);
+ if (iobref)
+ iobref_unref (iobref);
+ fd_unref (fd);
+ return ret;
+}
+
+
+static int
+qemu_gluster_co_writev (BlockDriverState *bs, int64_t sector_num, int nb_sectors,
+ QEMUIOVector *qiov)
+{
+ fd_t *fd = NULL;
+ off_t offset = 0;
+ size_t size = 0;
+ struct iobref *iobref = NULL;
+ struct iobuf *iobuf = NULL;
+ struct iovec iov = {0, };
+ int ret = -ENOMEM;
+
+ fd = fd_from_bs (bs);
+ if (!fd)
+ return -EIO;
+
+ offset = sector_num * BDRV_SECTOR_SIZE;
+ size = nb_sectors * BDRV_SECTOR_SIZE;
+
+ iobuf = iobuf_get2 (THIS->ctx->iobuf_pool, size);
+ if (!iobuf)
+ goto out;
+
+ iobref = iobref_new ();
+ if (!iobref) {
+ iobuf_unref (iobuf);
+ goto out;
+ }
+
+ iobref_add (iobref, iobuf);
+
+ iov_unload (iobuf_ptr (iobuf), qiov->iov, qiov->niov); /* *choke!* */
+
+ iov.iov_base = iobuf_ptr (iobuf);
+ iov.iov_len = size;
+
+ ret = syncop_writev (FIRST_CHILD(THIS), fd, &iov, 1, offset, iobref, 0);
+ if (ret < 0)
+ ret = -errno;
+
+out:
+ if (iobuf)
+ iobuf_unref (iobuf);
+ if (iobref)
+ iobref_unref (iobref);
+ fd_unref (fd);
+ return ret;
+}
+
+
+static int
+qemu_gluster_co_flush (BlockDriverState *bs)
+{
+ fd_t *fd = NULL;
+ int ret = 0;
+
+ fd = fd_from_bs (bs);
+
+ ret = syncop_flush (FIRST_CHILD(THIS), fd);
+
+ fd_unref (fd);
+
+ return ret;
+}
+
+
+static int
+qemu_gluster_co_fsync (BlockDriverState *bs)
+{
+ fd_t *fd = NULL;
+ int ret = 0;
+
+ fd = fd_from_bs (bs);
+
+ ret = syncop_fsync (FIRST_CHILD(THIS), fd, 0);
+
+ fd_unref (fd);
+
+ return ret;
+}
+
+
+static int
+qemu_gluster_truncate (BlockDriverState *bs, int64_t offset)
+{
+ fd_t *fd = NULL;
+ int ret = 0;
+
+ fd = fd_from_bs (bs);
+
+ ret = syncop_ftruncate (FIRST_CHILD(THIS), fd, offset);
+
+ fd_unref (fd);
+
+ if (ret < 0)
+ return ret;
+
+ return ret;
+}
+
+
+static int64_t
+qemu_gluster_getlength (BlockDriverState *bs)
+{
+ fd_t *fd = NULL;
+ int ret = 0;
+ struct iatt iatt = {0, };
+
+ fd = fd_from_bs (bs);
+
+ ret = syncop_fstat (FIRST_CHILD(THIS), fd, &iatt);
+ if (ret < 0)
+ return -1;
+
+ return iatt.ia_size;
+}
+
+
+static int64_t
+qemu_gluster_allocated_file_size (BlockDriverState *bs)
+{
+ fd_t *fd = NULL;
+ int ret = 0;
+ struct iatt iatt = {0, };
+
+ fd = fd_from_bs (bs);
+
+ ret = syncop_fstat (FIRST_CHILD(THIS), fd, &iatt);
+ if (ret < 0)
+ return -1;
+
+ return iatt.ia_blocks * 512;
+}
+
+
+static void
+qemu_gluster_close (BlockDriverState *bs)
+{
+ BDRVGlusterState *s = NULL;
+
+ s = bs->opaque;
+
+ inode_unref (s->inode);
+
+ return;
+}
+
+
+static QEMUOptionParameter qemu_gluster_create_options[] = {
+ {
+ .name = BLOCK_OPT_SIZE,
+ .type = OPT_SIZE,
+ .help = "Virtual disk size"
+ },
+ { NULL }
+};
+
+
+static BlockDriver bdrv_gluster = {
+ .format_name = "gluster",
+ .protocol_name = "gluster",
+ .instance_size = sizeof(BDRVGlusterState),
+ .bdrv_file_open = qemu_gluster_open,
+ .bdrv_close = qemu_gluster_close,
+ .bdrv_create = qemu_gluster_create,
+ .bdrv_getlength = qemu_gluster_getlength,
+ .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
+ .bdrv_co_readv = qemu_gluster_co_readv,
+ .bdrv_co_writev = qemu_gluster_co_writev,
+ .bdrv_co_flush_to_os = qemu_gluster_co_flush,
+ .bdrv_co_flush_to_disk = qemu_gluster_co_fsync,
+ .bdrv_truncate = qemu_gluster_truncate,
+ .create_options = qemu_gluster_create_options,
+};
+
+
+static void bdrv_gluster_init(void)
+{
+ bdrv_register(&bdrv_gluster);
+}
+
+
+block_init(bdrv_gluster_init);
diff --git a/xlators/features/qemu-block/src/bh-syncop.c b/xlators/features/qemu-block/src/bh-syncop.c
new file mode 100644
index 000000000..e8686f6d4
--- /dev/null
+++ b/xlators/features/qemu-block/src/bh-syncop.c
@@ -0,0 +1,48 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "dict.h"
+#include "xlator.h"
+#include "syncop.h"
+#include "qemu-block-memory-types.h"
+
+#include "block/aio.h"
+
+void
+qemu_bh_schedule (QEMUBH *bh)
+{
+ return;
+}
+
+void
+qemu_bh_cancel (QEMUBH *bh)
+{
+ return;
+}
+
+void
+qemu_bh_delete (QEMUBH *bh)
+{
+
+}
+
+QEMUBH *
+qemu_bh_new (QEMUBHFunc *cb, void *opaque)
+{
+ return NULL;
+}
diff --git a/xlators/features/qemu-block/src/clock-timer.c b/xlators/features/qemu-block/src/clock-timer.c
new file mode 100644
index 000000000..fcbec6ad1
--- /dev/null
+++ b/xlators/features/qemu-block/src/clock-timer.c
@@ -0,0 +1,60 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "dict.h"
+#include "xlator.h"
+#include "syncop.h"
+#include "qemu-block-memory-types.h"
+
+#include "qemu/timer.h"
+
+QEMUClock *vm_clock;
+int use_rt_clock = 0;
+
+QEMUTimer *qemu_new_timer (QEMUClock *clock, int scale,
+ QEMUTimerCB *cb, void *opaque)
+{
+ return NULL;
+}
+
+int64_t qemu_get_clock_ns (QEMUClock *clock)
+{
+ return 0;
+}
+
+void qemu_mod_timer (QEMUTimer *ts, int64_t expire_time)
+{
+ return;
+}
+
+void qemu_free_timer (QEMUTimer *ts)
+{
+
+}
+
+void qemu_del_timer (QEMUTimer *ts)
+{
+
+}
+
+bool qemu_aio_wait()
+{
+ synctask_wake (synctask_get());
+ synctask_yield (synctask_get());
+ return 0;
+}
diff --git a/xlators/features/qemu-block/src/coroutine-synctask.c b/xlators/features/qemu-block/src/coroutine-synctask.c
new file mode 100644
index 000000000..e43988a95
--- /dev/null
+++ b/xlators/features/qemu-block/src/coroutine-synctask.c
@@ -0,0 +1,116 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "dict.h"
+#include "xlator.h"
+#include "syncop.h"
+#include "qemu-block-memory-types.h"
+
+#include "qemu-block.h"
+
+/*
+ * This code serves as the bridge from the main glusterfs context to the qemu
+ * coroutine context via synctask. We create a single threaded syncenv with a
+ * single synctask responsible for processing a queue of coroutines. The qemu
+ * code invoked from within the synctask function handlers uses the ucontext
+ * coroutine implementation and scheduling logic internal to qemu. This
+ * effectively donates a thread of execution to qemu and its internal coroutine
+ * management.
+ *
+ * NOTE: The existence of concurrent synctasks has proven quite racy with regard
+ * to qemu coroutine management, particularly related to the lifecycle
+ * differences with top-level synctasks and internally created coroutines and
+ * interactions with qemu-internal queues (and locks, in turn). We explicitly
+ * disallow this scenario, via the queue, until it is more well supported.
+ */
+
+static struct {
+ struct list_head queue;
+ gf_lock_t lock;
+ struct synctask *task;
+} qb_co;
+
+static void
+init_qbco()
+{
+ INIT_LIST_HEAD(&qb_co.queue);
+ LOCK_INIT(&qb_co.lock);
+}
+
+static int
+synctask_nop_cbk (int ret, call_frame_t *frame, void *opaque)
+{
+ return 0;
+}
+
+static int
+qb_synctask_wrap (void *opaque)
+{
+ qb_local_t *qb_local, *tmp;
+
+ LOCK(&qb_co.lock);
+
+ while (!list_empty(&qb_co.queue)) {
+ list_for_each_entry_safe(qb_local, tmp, &qb_co.queue, list) {
+ list_del_init(&qb_local->list);
+ break;
+ }
+
+ UNLOCK(&qb_co.lock);
+
+ qb_local->synctask_fn(qb_local);
+ /* qb_local is now unwound and gone! */
+
+ LOCK(&qb_co.lock);
+ }
+
+ qb_co.task = NULL;
+
+ UNLOCK(&qb_co.lock);
+
+ return 0;
+}
+
+int
+qb_coroutine (call_frame_t *frame, synctask_fn_t fn)
+{
+ qb_local_t *qb_local = NULL;
+ qb_conf_t *qb_conf = NULL;
+ static int init = 0;
+
+ qb_local = frame->local;
+ qb_local->synctask_fn = fn;
+ qb_conf = frame->this->private;
+
+ if (!init) {
+ init = 1;
+ init_qbco();
+ }
+
+ LOCK(&qb_co.lock);
+
+ if (!qb_co.task)
+ qb_co.task = synctask_create(qb_conf->env, qb_synctask_wrap,
+ synctask_nop_cbk, frame, NULL);
+
+ list_add_tail(&qb_local->list, &qb_co.queue);
+
+ UNLOCK(&qb_co.lock);
+
+ return 0;
+}
diff --git a/xlators/features/qemu-block/src/monitor-logging.c b/xlators/features/qemu-block/src/monitor-logging.c
new file mode 100644
index 000000000..d37c37f0f
--- /dev/null
+++ b/xlators/features/qemu-block/src/monitor-logging.c
@@ -0,0 +1,50 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "dict.h"
+#include "xlator.h"
+#include "qemu-block-memory-types.h"
+
+#include "block/block_int.h"
+
+Monitor *cur_mon;
+
+int
+monitor_cur_is_qmp()
+{
+ /* No QMP support here */
+ return 0;
+}
+
+void
+monitor_set_error (Monitor *mon, QError *qerror)
+{
+ /* NOP here */
+ return;
+}
+
+
+void
+monitor_vprintf(Monitor *mon, const char *fmt, va_list ap)
+{
+ char buf[4096];
+
+ vsnprintf(buf, sizeof(buf), fmt, ap);
+
+ gf_log (THIS->name, GF_LOG_ERROR, "%s", buf);
+}
diff --git a/xlators/features/qemu-block/src/qb-coroutines.c b/xlators/features/qemu-block/src/qb-coroutines.c
new file mode 100644
index 000000000..7c52adb21
--- /dev/null
+++ b/xlators/features/qemu-block/src/qb-coroutines.c
@@ -0,0 +1,662 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "dict.h"
+#include "xlator.h"
+#include "inode.h"
+#include "call-stub.h"
+#include "defaults.h"
+#include "qemu-block-memory-types.h"
+#include "qemu-block.h"
+#include "qb-coroutines.h"
+
+
+int
+qb_format_and_resume (void *opaque)
+{
+ qb_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ call_stub_t *stub = NULL;
+ inode_t *inode = NULL;
+ char filename[64];
+ char base_filename[128];
+ int use_base = 0;
+ qb_inode_t *qb_inode = NULL;
+ Error *local_err = NULL;
+ fd_t *fd = NULL;
+ dict_t *xattr = NULL;
+ qb_conf_t *qb_conf = NULL;
+ int ret = -1;
+
+ local = opaque;
+ frame = local->frame;
+ stub = local->stub;
+ inode = local->inode;
+ qb_conf = frame->this->private;
+
+ qb_inode_to_filename (inode, filename, 64);
+
+ qb_inode = qb_inode_ctx_get (frame->this, inode);
+
+ /*
+ * See if the caller specified a backing image.
+ */
+ if (!uuid_is_null(qb_inode->backing_gfid) || qb_inode->backing_fname) {
+ loc_t loc = {0,};
+ char gfid_str[64];
+ struct iatt buf;
+
+ if (!uuid_is_null(qb_inode->backing_gfid)) {
+ loc.inode = inode_find(qb_conf->root_inode->table,
+ qb_inode->backing_gfid);
+ if (!loc.inode) {
+ loc.inode = inode_new(qb_conf->root_inode->table);
+ uuid_copy(loc.inode->gfid,
+ qb_inode->backing_gfid);
+ }
+ uuid_copy(loc.gfid, loc.inode->gfid);
+ } else if (qb_inode->backing_fname) {
+ loc.inode = inode_new(qb_conf->root_inode->table);
+ loc.name = qb_inode->backing_fname;
+ loc.parent = inode_parent(inode, NULL, NULL);
+ loc_path(&loc, loc.name);
+ }
+
+ /*
+ * Lookup the backing image. Verify existence and/or get the
+ * gfid if we don't already have it.
+ */
+ ret = syncop_lookup(FIRST_CHILD(frame->this), &loc, NULL, &buf,
+ NULL, NULL);
+ GF_FREE(qb_inode->backing_fname);
+ if (ret) {
+ loc_wipe(&loc);
+ ret = errno;
+ goto err;
+ }
+
+ uuid_copy(qb_inode->backing_gfid, buf.ia_gfid);
+ loc_wipe(&loc);
+
+ /*
+ * We pass the filename of the backing image into the qemu block
+ * subsystem as the associated gfid. This is embedded into the
+ * clone image and passed along to the gluster bdrv backend when
+ * the block subsystem needs to operate on the backing image on
+ * behalf of the clone.
+ */
+ uuid_unparse(qb_inode->backing_gfid, gfid_str);
+ snprintf(base_filename, sizeof(base_filename),
+ "gluster://gfid:%s", gfid_str);
+ use_base = 1;
+ }
+
+ bdrv_img_create (filename, qb_inode->fmt,
+ use_base ? base_filename : NULL, 0, 0, qb_inode->size,
+ 0, &local_err, true);
+
+ if (error_is_set (&local_err)) {
+ gf_log (frame->this->name, GF_LOG_ERROR, "%s",
+ error_get_pretty (local_err));
+ error_free (local_err);
+ QB_STUB_UNWIND (stub, -1, EIO);
+ return 0;
+ }
+
+ fd = fd_anonymous (inode);
+ if (!fd) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "could not create anonymous fd for %s",
+ uuid_utoa (inode->gfid));
+ QB_STUB_UNWIND (stub, -1, ENOMEM);
+ return 0;
+ }
+
+ xattr = dict_new ();
+ if (!xattr) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "could not allocate xattr dict for %s",
+ uuid_utoa (inode->gfid));
+ QB_STUB_UNWIND (stub, -1, ENOMEM);
+ fd_unref (fd);
+ return 0;
+ }
+
+ ret = dict_set_str (xattr, qb_conf->qb_xattr_key, local->fmt);
+ if (ret) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "could not dict_set for %s",
+ uuid_utoa (inode->gfid));
+ QB_STUB_UNWIND (stub, -1, ENOMEM);
+ fd_unref (fd);
+ dict_unref (xattr);
+ return 0;
+ }
+
+ ret = syncop_fsetxattr (FIRST_CHILD(THIS), fd, xattr, 0);
+ if (ret) {
+ ret = errno;
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "failed to setxattr for %s",
+ uuid_utoa (inode->gfid));
+ QB_STUB_UNWIND (stub, -1, ret);
+ fd_unref (fd);
+ dict_unref (xattr);
+ return 0;
+ }
+
+ fd_unref (fd);
+ dict_unref (xattr);
+
+ QB_STUB_UNWIND (stub, 0, 0);
+
+ return 0;
+
+err:
+ QB_STUB_UNWIND(stub, -1, ret);
+ return 0;
+}
+
+
+static BlockDriverState *
+qb_bs_create (inode_t *inode, const char *fmt)
+{
+ char filename[64];
+ BlockDriverState *bs = NULL;
+ BlockDriver *drv = NULL;
+ int op_errno = 0;
+ int ret = 0;
+
+ bs = bdrv_new (uuid_utoa (inode->gfid));
+ if (!bs) {
+ op_errno = ENOMEM;
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "could not allocate @bdrv for gfid:%s",
+ uuid_utoa (inode->gfid));
+ goto err;
+ }
+
+ drv = bdrv_find_format (fmt);
+ if (!drv) {
+ op_errno = EINVAL;
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Unknown file format: %s for gfid:%s",
+ fmt, uuid_utoa (inode->gfid));
+ goto err;
+ }
+
+ qb_inode_to_filename (inode, filename, 64);
+
+ ret = bdrv_open (bs, filename, NULL, BDRV_O_RDWR, drv);
+ if (ret < 0) {
+ op_errno = -ret;
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Unable to bdrv_open() gfid:%s (%s)",
+ uuid_utoa (inode->gfid), strerror (op_errno));
+ goto err;
+ }
+
+ return bs;
+err:
+ errno = op_errno;
+ return NULL;
+}
+
+
+int
+qb_co_open (void *opaque)
+{
+ qb_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ call_stub_t *stub = NULL;
+ inode_t *inode = NULL;
+ qb_inode_t *qb_inode = NULL;
+
+ local = opaque;
+ frame = local->frame;
+ stub = local->stub;
+ inode = local->inode;
+
+ qb_inode = qb_inode_ctx_get (frame->this, inode);
+ if (!qb_inode->bs) {
+ /* FIXME: we need locks around this when
+ enabling multithreaded syncop/coroutine
+ for qemu-block
+ */
+
+ qb_inode->bs = qb_bs_create (inode, qb_inode->fmt);
+ if (!qb_inode->bs) {
+ QB_STUB_UNWIND (stub, -1, errno);
+ return 0;
+ }
+ }
+ qb_inode->refcnt++;
+
+ QB_STUB_RESUME (stub);
+
+ return 0;
+}
+
+
+int
+qb_co_writev (void *opaque)
+{
+ qb_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ call_stub_t *stub = NULL;
+ inode_t *inode = NULL;
+ qb_inode_t *qb_inode = NULL;
+ QEMUIOVector qiov = {0, };
+ int ret = 0;
+
+ local = opaque;
+ frame = local->frame;
+ stub = local->stub;
+ inode = local->inode;
+
+ qb_inode = qb_inode_ctx_get (frame->this, inode);
+ if (!qb_inode->bs) {
+ /* FIXME: we need locks around this when
+ enabling multithreaded syncop/coroutine
+ for qemu-block
+ */
+
+ qb_inode->bs = qb_bs_create (inode, qb_inode->fmt);
+ if (!qb_inode->bs) {
+ QB_STUB_UNWIND (stub, -1, errno);
+ return 0;
+ }
+ }
+
+ qemu_iovec_init_external (&qiov, stub->args.vector, stub->args.count);
+
+ ret = bdrv_pwritev (qb_inode->bs, stub->args.offset, &qiov);
+
+ if (ret < 0) {
+ QB_STUB_UNWIND (stub, -1, -ret);
+ } else {
+ QB_STUB_UNWIND (stub, ret, 0);
+ }
+
+ return 0;
+}
+
+
+int
+qb_co_readv (void *opaque)
+{
+ qb_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ call_stub_t *stub = NULL;
+ inode_t *inode = NULL;
+ qb_inode_t *qb_inode = NULL;
+ struct iobuf *iobuf = NULL;
+ struct iobref *iobref = NULL;
+ struct iovec iov = {0, };
+ int ret = 0;
+
+ local = opaque;
+ frame = local->frame;
+ stub = local->stub;
+ inode = local->inode;
+
+ qb_inode = qb_inode_ctx_get (frame->this, inode);
+ if (!qb_inode->bs) {
+ /* FIXME: we need locks around this when
+ enabling multithreaded syncop/coroutine
+ for qemu-block
+ */
+
+ qb_inode->bs = qb_bs_create (inode, qb_inode->fmt);
+ if (!qb_inode->bs) {
+ QB_STUB_UNWIND (stub, -1, errno);
+ return 0;
+ }
+ }
+
+ if (stub->args.offset >= qb_inode->size) {
+ QB_STUB_UNWIND (stub, 0, 0);
+ return 0;
+ }
+
+ iobuf = iobuf_get2 (frame->this->ctx->iobuf_pool, stub->args.size);
+ if (!iobuf) {
+ QB_STUB_UNWIND (stub, -1, ENOMEM);
+ return 0;
+ }
+
+ iobref = iobref_new ();
+ if (!iobref) {
+ QB_STUB_UNWIND (stub, -1, ENOMEM);
+ iobuf_unref (iobuf);
+ return 0;
+ }
+
+ if (iobref_add (iobref, iobuf) < 0) {
+ iobuf_unref (iobuf);
+ iobref_unref (iobref);
+ QB_STUB_UNWIND (stub, -1, ENOMEM);
+ return 0;
+ }
+
+ ret = bdrv_pread (qb_inode->bs, stub->args.offset, iobuf_ptr (iobuf),
+ stub->args.size);
+
+ if (ret < 0) {
+ QB_STUB_UNWIND (stub, -1, -ret);
+ iobref_unref (iobref);
+ return 0;
+ }
+
+ iov.iov_base = iobuf_ptr (iobuf);
+ iov.iov_len = ret;
+
+ stub->args_cbk.vector = iov_dup (&iov, 1);
+ stub->args_cbk.count = 1;
+ stub->args_cbk.iobref = iobref;
+
+ QB_STUB_UNWIND (stub, ret, 0);
+
+ return 0;
+}
+
+
+int
+qb_co_fsync (void *opaque)
+{
+ qb_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ call_stub_t *stub = NULL;
+ inode_t *inode = NULL;
+ qb_inode_t *qb_inode = NULL;
+ int ret = 0;
+
+ local = opaque;
+ frame = local->frame;
+ stub = local->stub;
+ inode = local->inode;
+
+ qb_inode = qb_inode_ctx_get (frame->this, inode);
+ if (!qb_inode->bs) {
+ /* FIXME: we need locks around this when
+ enabling multithreaded syncop/coroutine
+ for qemu-block
+ */
+
+ qb_inode->bs = qb_bs_create (inode, qb_inode->fmt);
+ if (!qb_inode->bs) {
+ QB_STUB_UNWIND (stub, -1, errno);
+ return 0;
+ }
+ }
+
+ ret = bdrv_flush (qb_inode->bs);
+
+ if (ret < 0) {
+ QB_STUB_UNWIND (stub, -1, -ret);
+ } else {
+ QB_STUB_UNWIND (stub, ret, 0);
+ }
+
+ return 0;
+}
+
+
+static void
+qb_update_size_xattr (xlator_t *this, fd_t *fd, const char *fmt, off_t offset)
+{
+ char val[QB_XATTR_VAL_MAX];
+ qb_conf_t *qb_conf = NULL;
+ dict_t *xattr = NULL;
+
+ qb_conf = this->private;
+
+ snprintf (val, QB_XATTR_VAL_MAX, "%s:%llu",
+ fmt, (long long unsigned) offset);
+
+ xattr = dict_new ();
+ if (!xattr)
+ return;
+
+ if (dict_set_str (xattr, qb_conf->qb_xattr_key, val) != 0) {
+ dict_unref (xattr);
+ return;
+ }
+
+ syncop_fsetxattr (FIRST_CHILD(this), fd, xattr, 0);
+ dict_unref (xattr);
+}
+
+
+int
+qb_co_truncate (void *opaque)
+{
+ qb_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ call_stub_t *stub = NULL;
+ inode_t *inode = NULL;
+ qb_inode_t *qb_inode = NULL;
+ int ret = 0;
+ off_t offset = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ local = opaque;
+ frame = local->frame;
+ stub = local->stub;
+ inode = local->inode;
+
+ qb_inode = qb_inode_ctx_get (frame->this, inode);
+ if (!qb_inode->bs) {
+ /* FIXME: we need locks around this when
+ enabling multithreaded syncop/coroutine
+ for qemu-block
+ */
+
+ qb_inode->bs = qb_bs_create (inode, qb_inode->fmt);
+ if (!qb_inode->bs) {
+ QB_STUB_UNWIND (stub, -1, errno);
+ return 0;
+ }
+ }
+
+ syncop_fstat (FIRST_CHILD(this), local->fd, &stub->args_cbk.prestat);
+ stub->args_cbk.prestat.ia_size = qb_inode->size;
+
+ ret = bdrv_truncate (qb_inode->bs, stub->args.offset);
+ if (ret < 0)
+ goto out;
+
+ offset = bdrv_getlength (qb_inode->bs);
+
+ qb_inode->size = offset;
+
+ syncop_fstat (FIRST_CHILD(this), local->fd, &stub->args_cbk.poststat);
+ stub->args_cbk.poststat.ia_size = qb_inode->size;
+
+ qb_update_size_xattr (this, local->fd, qb_inode->fmt, qb_inode->size);
+
+out:
+ if (ret < 0) {
+ QB_STUB_UNWIND (stub, -1, -ret);
+ } else {
+ QB_STUB_UNWIND (stub, ret, 0);
+ }
+
+ return 0;
+}
+
+
+int
+qb_co_close (void *opaque)
+{
+ qb_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ inode_t *inode = NULL;
+ qb_inode_t *qb_inode = NULL;
+ BlockDriverState *bs = NULL;
+
+ local = opaque;
+ inode = local->inode;
+
+ qb_inode = qb_inode_ctx_get (THIS, inode);
+
+ if (!--qb_inode->refcnt) {
+ bs = qb_inode->bs;
+ qb_inode->bs = NULL;
+ bdrv_delete (bs);
+ }
+
+ frame = local->frame;
+ frame->local = NULL;
+ qb_local_free (THIS, local);
+ STACK_DESTROY (frame->root);
+
+ return 0;
+}
+
+
+int
+qb_snapshot_create (void *opaque)
+{
+ qb_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ call_stub_t *stub = NULL;
+ inode_t *inode = NULL;
+ qb_inode_t *qb_inode = NULL;
+ QEMUSnapshotInfo sn;
+ struct timeval tv = {0, };
+ int ret = 0;
+
+ local = opaque;
+ frame = local->frame;
+ stub = local->stub;
+ inode = local->inode;
+
+ qb_inode = qb_inode_ctx_get (frame->this, inode);
+ if (!qb_inode->bs) {
+ /* FIXME: we need locks around this when
+ enabling multithreaded syncop/coroutine
+ for qemu-block
+ */
+
+ qb_inode->bs = qb_bs_create (inode, qb_inode->fmt);
+ if (!qb_inode->bs) {
+ QB_STUB_UNWIND (stub, -1, errno);
+ return 0;
+ }
+ }
+
+ memset (&sn, 0, sizeof (sn));
+ pstrcpy (sn.name, sizeof(sn.name), local->name);
+ gettimeofday (&tv, NULL);
+ sn.date_sec = tv.tv_sec;
+ sn.date_nsec = tv.tv_usec * 1000;
+
+ ret = bdrv_snapshot_create (qb_inode->bs, &sn);
+ if (ret < 0) {
+ QB_STUB_UNWIND (stub, -1, -ret);
+ } else {
+ QB_STUB_UNWIND (stub, ret, 0);
+ }
+
+ return 0;
+}
+
+
+int
+qb_snapshot_delete (void *opaque)
+{
+ qb_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ call_stub_t *stub = NULL;
+ inode_t *inode = NULL;
+ qb_inode_t *qb_inode = NULL;
+ int ret = 0;
+
+ local = opaque;
+ frame = local->frame;
+ stub = local->stub;
+ inode = local->inode;
+
+ qb_inode = qb_inode_ctx_get (frame->this, inode);
+ if (!qb_inode->bs) {
+ /* FIXME: we need locks around this when
+ enabling multithreaded syncop/coroutine
+ for qemu-block
+ */
+
+ qb_inode->bs = qb_bs_create (inode, qb_inode->fmt);
+ if (!qb_inode->bs) {
+ QB_STUB_UNWIND (stub, -1, errno);
+ return 0;
+ }
+ }
+
+ ret = bdrv_snapshot_delete (qb_inode->bs, local->name);
+
+ if (ret < 0) {
+ QB_STUB_UNWIND (stub, -1, -ret);
+ } else {
+ QB_STUB_UNWIND (stub, ret, 0);
+ }
+
+ return 0;
+}
+
+
+int
+qb_snapshot_goto (void *opaque)
+{
+ qb_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ call_stub_t *stub = NULL;
+ inode_t *inode = NULL;
+ qb_inode_t *qb_inode = NULL;
+ int ret = 0;
+
+ local = opaque;
+ frame = local->frame;
+ stub = local->stub;
+ inode = local->inode;
+
+ qb_inode = qb_inode_ctx_get (frame->this, inode);
+ if (!qb_inode->bs) {
+ /* FIXME: we need locks around this when
+ enabling multithreaded syncop/coroutine
+ for qemu-block
+ */
+
+ qb_inode->bs = qb_bs_create (inode, qb_inode->fmt);
+ if (!qb_inode->bs) {
+ QB_STUB_UNWIND (stub, -1, errno);
+ return 0;
+ }
+ }
+
+ ret = bdrv_snapshot_goto (qb_inode->bs, local->name);
+
+ if (ret < 0) {
+ QB_STUB_UNWIND (stub, -1, -ret);
+ } else {
+ QB_STUB_UNWIND (stub, ret, 0);
+ }
+
+ return 0;
+}
diff --git a/xlators/features/qemu-block/src/qb-coroutines.h b/xlators/features/qemu-block/src/qb-coroutines.h
new file mode 100644
index 000000000..583319f3b
--- /dev/null
+++ b/xlators/features/qemu-block/src/qb-coroutines.h
@@ -0,0 +1,30 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __QB_COROUTINES_H
+#define __QB_COROUTINES_H
+
+#include "syncop.h"
+#include "call-stub.h"
+#include "block/block_int.h"
+#include "monitor/monitor.h"
+
+int qb_format_and_resume (void *opaque);
+int qb_snapshot_create (void *opaque);
+int qb_snapshot_delete (void *opaque);
+int qb_snapshot_goto (void *opaque);
+int qb_co_open (void *opaque);
+int qb_co_close (void *opaque);
+int qb_co_writev (void *opaque);
+int qb_co_readv (void *opaque);
+int qb_co_fsync (void *opaque);
+int qb_co_truncate (void *opaque);
+
+#endif /* __QB_COROUTINES_H */
diff --git a/xlators/features/qemu-block/src/qemu-block-memory-types.h b/xlators/features/qemu-block/src/qemu-block-memory-types.h
new file mode 100644
index 000000000..267b3893f
--- /dev/null
+++ b/xlators/features/qemu-block/src/qemu-block-memory-types.h
@@ -0,0 +1,25 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef __QB_MEM_TYPES_H__
+#define __QB_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_qb_mem_types_ {
+ gf_qb_mt_qb_conf_t = gf_common_mt_end + 1,
+ gf_qb_mt_qb_inode_t,
+ gf_qb_mt_qb_local_t,
+ gf_qb_mt_coroutinesynctask_t,
+ gf_qb_mt_end
+};
+#endif
+
diff --git a/xlators/features/qemu-block/src/qemu-block.c b/xlators/features/qemu-block/src/qemu-block.c
new file mode 100644
index 000000000..48bbf3140
--- /dev/null
+++ b/xlators/features/qemu-block/src/qemu-block.c
@@ -0,0 +1,1140 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "dict.h"
+#include "xlator.h"
+#include "inode.h"
+#include "call-stub.h"
+#include "defaults.h"
+#include "qemu-block-memory-types.h"
+#include "qemu-block.h"
+#include "qb-coroutines.h"
+
+
+qb_inode_t *
+__qb_inode_ctx_get (xlator_t *this, inode_t *inode)
+{
+ uint64_t value = 0;
+ qb_inode_t *qb_inode = NULL;
+
+ __inode_ctx_get (inode, this, &value);
+ qb_inode = (qb_inode_t *)(unsigned long) value;
+
+ return qb_inode;
+}
+
+
+qb_inode_t *
+qb_inode_ctx_get (xlator_t *this, inode_t *inode)
+{
+ qb_inode_t *qb_inode = NULL;
+
+ LOCK (&inode->lock);
+ {
+ qb_inode = __qb_inode_ctx_get (this, inode);
+ }
+ UNLOCK (&inode->lock);
+
+ return qb_inode;
+}
+
+
+qb_inode_t *
+qb_inode_ctx_del (xlator_t *this, inode_t *inode)
+{
+ uint64_t value = 0;
+ qb_inode_t *qb_inode = NULL;
+
+ inode_ctx_del (inode, this, &value);
+ qb_inode = (qb_inode_t *)(unsigned long) value;
+
+ return qb_inode;
+}
+
+
+int
+qb_inode_cleanup (xlator_t *this, inode_t *inode, int warn)
+{
+ qb_inode_t *qb_inode = NULL;
+
+ qb_inode = qb_inode_ctx_del (this, inode);
+
+ if (!qb_inode)
+ return 0;
+
+ if (warn)
+ gf_log (this->name, GF_LOG_WARNING,
+ "inode %s no longer block formatted",
+ uuid_utoa (inode->gfid));
+
+ /* free (qb_inode->bs); */
+
+ GF_FREE (qb_inode);
+
+ return 0;
+}
+
+
+int
+qb_iatt_fixup (xlator_t *this, inode_t *inode, struct iatt *iatt)
+{
+ qb_inode_t *qb_inode = NULL;
+
+ qb_inode = qb_inode_ctx_get (this, inode);
+ if (!qb_inode)
+ return 0;
+
+ iatt->ia_size = qb_inode->size;
+
+ return 0;
+}
+
+
+int
+qb_format_extract (xlator_t *this, char *format, inode_t *inode)
+{
+ char *s, *save;
+ uint64_t size = 0;
+ char fmt[QB_XATTR_VAL_MAX+1] = {0, };
+ qb_inode_t *qb_inode = NULL;
+ char *formatstr = NULL;
+ uuid_t gfid = {0,};
+ char gfid_str[64] = {0,};
+ int ret;
+
+ strncpy(fmt, format, QB_XATTR_VAL_MAX);
+
+ s = strtok_r(fmt, ":", &save);
+ if (!s)
+ goto invalid;
+ formatstr = gf_strdup(s);
+
+ s = strtok_r(NULL, ":", &save);
+ if (!s)
+ goto invalid;
+ if (gf_string2bytesize (s, &size))
+ goto invalid;
+ if (!size)
+ goto invalid;
+
+ s = strtok_r(NULL, "\0", &save);
+ if (s && !strncmp(s, "<gfid:", strlen("<gfid:"))) {
+ /*
+ * Check for valid gfid backing image specifier.
+ */
+ if (strlen(s) + 1 > sizeof(gfid_str))
+ goto invalid;
+ ret = sscanf(s, "<gfid:%[^>]s", gfid_str);
+ if (ret == 1) {
+ ret = uuid_parse(gfid_str, gfid);
+ if (ret < 0)
+ goto invalid;
+ }
+ }
+
+ qb_inode = qb_inode_ctx_get (this, inode);
+ if (!qb_inode)
+ qb_inode = GF_CALLOC (1, sizeof (*qb_inode),
+ gf_qb_mt_qb_inode_t);
+ if (!qb_inode) {
+ GF_FREE(formatstr);
+ return ENOMEM;
+ }
+
+ strncpy(qb_inode->fmt, formatstr, QB_XATTR_VAL_MAX);
+ qb_inode->size = size;
+
+ /*
+ * If a backing gfid was not specified, interpret any remaining bytes
+ * associated with a backing image as a filename local to the parent
+ * directory. The format processing will validate further.
+ */
+ if (!uuid_is_null(gfid))
+ uuid_copy(qb_inode->backing_gfid, gfid);
+ else if (s)
+ qb_inode->backing_fname = gf_strdup(s);
+
+ inode_ctx_set (inode, this, (void *)&qb_inode);
+
+ GF_FREE(formatstr);
+
+ return 0;
+
+invalid:
+ GF_FREE(formatstr);
+
+ gf_log (this->name, GF_LOG_WARNING,
+ "invalid format '%s' in inode %s", format,
+ uuid_utoa (inode->gfid));
+ return EINVAL;
+}
+
+
+void
+qb_local_free (xlator_t *this, qb_local_t *local)
+{
+ if (local->inode)
+ inode_unref (local->inode);
+ if (local->fd)
+ fd_unref (local->fd);
+ GF_FREE (local);
+}
+
+
+int
+qb_local_init (call_frame_t *frame)
+{
+ qb_local_t *qb_local = NULL;
+
+ qb_local = GF_CALLOC (1, sizeof (*qb_local), gf_qb_mt_qb_local_t);
+ if (!qb_local)
+ return -1;
+ INIT_LIST_HEAD(&qb_local->list);
+
+ qb_local->frame = frame;
+ frame->local = qb_local;
+
+ return 0;
+}
+
+
+int
+qb_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode, struct iatt *buf,
+ dict_t *xdata, struct iatt *postparent)
+{
+ char *format = NULL;
+ qb_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (op_ret == -1)
+ goto out;
+
+ /*
+ * Cache the root inode for dealing with backing images. The format
+ * coroutine and the gluster qemu backend driver both use the root inode
+ * table to verify and/or redirect I/O to the backing image via
+ * anonymous fd's.
+ */
+ if (!conf->root_inode && __is_root_gfid(inode->gfid))
+ conf->root_inode = inode_ref(inode);
+
+ if (!xdata)
+ goto out;
+
+ if (dict_get_str (xdata, conf->qb_xattr_key, &format))
+ goto out;
+
+ if (!format) {
+ qb_inode_cleanup (this, inode, 1);
+ goto out;
+ }
+
+ op_errno = qb_format_extract (this, format, inode);
+ if (op_errno)
+ op_ret = -1;
+
+ qb_iatt_fixup (this, inode, buf);
+out:
+ QB_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, buf,
+ xdata, postparent);
+ return 0;
+}
+
+
+int
+qb_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ qb_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ xdata = xdata ? dict_ref (xdata) : dict_new ();
+
+ if (!xdata)
+ goto enomem;
+
+ if (dict_set_int32 (xdata, conf->qb_xattr_key, 0))
+ goto enomem;
+
+ STACK_WIND (frame, qb_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
+ dict_unref (xdata);
+ return 0;
+enomem:
+ QB_STACK_UNWIND (lookup, frame, -1, ENOMEM, 0, 0, 0, 0);
+ if (xdata)
+ dict_unref (xdata);
+ return 0;
+}
+
+
+int
+qb_setxattr_format (call_frame_t *frame, xlator_t *this, call_stub_t *stub,
+ dict_t *xattr, inode_t *inode)
+{
+ char *format = NULL;
+ int op_errno = 0;
+ qb_local_t *qb_local = NULL;
+ data_t *data = NULL;
+ qb_inode_t *qb_inode;
+
+ if (!(data = dict_get (xattr, "trusted.glusterfs.block-format"))) {
+ QB_STUB_RESUME (stub);
+ return 0;
+ }
+
+ format = alloca (data->len + 1);
+ memcpy (format, data->data, data->len);
+ format[data->len] = 0;
+
+ op_errno = qb_format_extract (this, format, inode);
+ if (op_errno) {
+ QB_STUB_UNWIND (stub, -1, op_errno);
+ return 0;
+ }
+ qb_inode = qb_inode_ctx_get(this, inode);
+
+ qb_local = frame->local;
+
+ qb_local->stub = stub;
+ qb_local->inode = inode_ref (inode);
+
+ snprintf(qb_local->fmt, QB_XATTR_VAL_MAX, "%s:%lu", qb_inode->fmt,
+ qb_inode->size);
+
+ qb_coroutine (frame, qb_format_and_resume);
+
+ return 0;
+}
+
+
+int
+qb_setxattr_snapshot_create (call_frame_t *frame, xlator_t *this,
+ call_stub_t *stub, dict_t *xattr, inode_t *inode)
+{
+ qb_local_t *qb_local = NULL;
+ char *name = NULL;
+ data_t *data = NULL;
+
+ if (!(data = dict_get (xattr, "trusted.glusterfs.block-snapshot-create"))) {
+ QB_STUB_RESUME (stub);
+ return 0;
+ }
+
+ name = alloca (data->len + 1);
+ memcpy (name, data->data, data->len);
+ name[data->len] = 0;
+
+ qb_local = frame->local;
+
+ qb_local->stub = stub;
+ qb_local->inode = inode_ref (inode);
+ strncpy (qb_local->name, name, 128);
+
+ qb_coroutine (frame, qb_snapshot_create);
+
+ return 0;
+}
+
+
+int
+qb_setxattr_snapshot_delete (call_frame_t *frame, xlator_t *this,
+ call_stub_t *stub, dict_t *xattr, inode_t *inode)
+{
+ qb_local_t *qb_local = NULL;
+ char *name = NULL;
+ data_t *data = NULL;
+
+ if (!(data = dict_get (xattr, "trusted.glusterfs.block-snapshot-delete"))) {
+ QB_STUB_RESUME (stub);
+ return 0;
+ }
+
+ name = alloca (data->len + 1);
+ memcpy (name, data->data, data->len);
+ name[data->len] = 0;
+
+ qb_local = frame->local;
+
+ qb_local->stub = stub;
+ qb_local->inode = inode_ref (inode);
+ strncpy (qb_local->name, name, 128);
+
+ qb_coroutine (frame, qb_snapshot_delete);
+
+ return 0;
+}
+
+int
+qb_setxattr_snapshot_goto (call_frame_t *frame, xlator_t *this,
+ call_stub_t *stub, dict_t *xattr, inode_t *inode)
+{
+ qb_local_t *qb_local = NULL;
+ char *name = NULL;
+ data_t *data = NULL;
+
+ if (!(data = dict_get (xattr, "trusted.glusterfs.block-snapshot-goto"))) {
+ QB_STUB_RESUME (stub);
+ return 0;
+ }
+
+ name = alloca (data->len + 1);
+ memcpy (name, data->data, data->len);
+ name[data->len] = 0;
+
+ qb_local = frame->local;
+
+ qb_local->stub = stub;
+ qb_local->inode = inode_ref (inode);
+ strncpy (qb_local->name, name, 128);
+
+ qb_coroutine (frame, qb_snapshot_goto);
+
+ return 0;
+}
+
+
+int
+qb_setxattr_common (call_frame_t *frame, xlator_t *this, call_stub_t *stub,
+ dict_t *xattr, inode_t *inode)
+{
+ data_t *data = NULL;
+
+ if ((data = dict_get (xattr, "trusted.glusterfs.block-format"))) {
+ qb_setxattr_format (frame, this, stub, xattr, inode);
+ return 0;
+ }
+
+ if ((data = dict_get (xattr, "trusted.glusterfs.block-snapshot-create"))) {
+ qb_setxattr_snapshot_create (frame, this, stub, xattr, inode);
+ return 0;
+ }
+
+ if ((data = dict_get (xattr, "trusted.glusterfs.block-snapshot-delete"))) {
+ qb_setxattr_snapshot_delete (frame, this, stub, xattr, inode);
+ return 0;
+ }
+
+ if ((data = dict_get (xattr, "trusted.glusterfs.block-snapshot-goto"))) {
+ qb_setxattr_snapshot_goto (frame, this, stub, xattr, inode);
+ return 0;
+ }
+
+ QB_STUB_RESUME (stub);
+
+ return 0;
+}
+
+
+int
+qb_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr,
+ int flags, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+
+ if (qb_local_init (frame) != 0)
+ goto enomem;
+
+ stub = fop_setxattr_stub (frame, default_setxattr_resume, loc, xattr,
+ flags, xdata);
+ if (!stub)
+ goto enomem;
+
+ qb_setxattr_common (frame, this, stub, xattr, loc->inode);
+
+ return 0;
+enomem:
+ QB_STACK_UNWIND (setxattr, frame, -1, ENOMEM, 0);
+ return 0;
+}
+
+
+int
+qb_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xattr,
+ int flags, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+
+ if (qb_local_init (frame) != 0)
+ goto enomem;
+
+ stub = fop_fsetxattr_stub (frame, default_fsetxattr_resume, fd, xattr,
+ flags, xdata);
+ if (!stub)
+ goto enomem;
+
+ qb_setxattr_common (frame, this, stub, xattr, fd->inode);
+
+ return 0;
+enomem:
+ QB_STACK_UNWIND (fsetxattr, frame, -1, ENOMEM, 0);
+ return 0;
+}
+
+
+int
+qb_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, fd_t *fd, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ qb_local_t *qb_local = NULL;
+
+ qb_local = frame->local;
+
+ if (op_ret < 0)
+ goto unwind;
+
+ if (!qb_inode_ctx_get (this, qb_local->inode))
+ goto unwind;
+
+ stub = fop_open_cbk_stub (frame, NULL, op_ret, op_errno, fd, xdata);
+ if (!stub) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ qb_local->stub = stub;
+
+ qb_coroutine (frame, qb_co_open);
+
+ return 0;
+unwind:
+ QB_STACK_UNWIND (open, frame, op_ret, op_errno, fd, xdata);
+ return 0;
+}
+
+
+int
+qb_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ fd_t *fd, dict_t *xdata)
+{
+ qb_local_t *qb_local = NULL;
+ qb_inode_t *qb_inode = NULL;
+
+ qb_inode = qb_inode_ctx_get (this, loc->inode);
+ if (!qb_inode) {
+ STACK_WIND (frame, default_open_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, loc, flags, fd,
+ xdata);
+ return 0;
+ }
+
+ if (qb_local_init (frame) != 0)
+ goto enomem;
+
+ qb_local = frame->local;
+
+ qb_local->inode = inode_ref (loc->inode);
+ qb_local->fd = fd_ref (fd);
+
+ STACK_WIND (frame, qb_open_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
+ return 0;
+enomem:
+ QB_STACK_UNWIND (open, frame, -1, ENOMEM, 0, 0);
+ return 0;
+}
+
+
+int
+qb_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
+ int count, off_t offset, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
+{
+ qb_local_t *qb_local = NULL;
+ qb_inode_t *qb_inode = NULL;
+
+ qb_inode = qb_inode_ctx_get (this, fd->inode);
+ if (!qb_inode) {
+ STACK_WIND (frame, default_writev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, vector, count,
+ offset, flags, iobref, xdata);
+ return 0;
+ }
+
+ if (qb_local_init (frame) != 0)
+ goto enomem;
+
+ qb_local = frame->local;
+
+ qb_local->inode = inode_ref (fd->inode);
+ qb_local->fd = fd_ref (fd);
+
+ qb_local->stub = fop_writev_stub (frame, NULL, fd, vector, count,
+ offset, flags, iobref, xdata);
+ if (!qb_local->stub)
+ goto enomem;
+
+ qb_coroutine (frame, qb_co_writev);
+
+ return 0;
+enomem:
+ QB_STACK_UNWIND (writev, frame, -1, ENOMEM, 0, 0, 0);
+ return 0;
+}
+
+
+int
+qb_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ qb_local_t *qb_local = NULL;
+ qb_inode_t *qb_inode = NULL;
+
+ qb_inode = qb_inode_ctx_get (this, fd->inode);
+ if (!qb_inode) {
+ STACK_WIND (frame, default_readv_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset,
+ flags, xdata);
+ return 0;
+ }
+
+ if (qb_local_init (frame) != 0)
+ goto enomem;
+
+ qb_local = frame->local;
+
+ qb_local->inode = inode_ref (fd->inode);
+ qb_local->fd = fd_ref (fd);
+
+ qb_local->stub = fop_readv_stub (frame, NULL, fd, size, offset,
+ flags, xdata);
+ if (!qb_local->stub)
+ goto enomem;
+
+ qb_coroutine (frame, qb_co_readv);
+
+ return 0;
+enomem:
+ QB_STACK_UNWIND (readv, frame, -1, ENOMEM, 0, 0, 0, 0, 0);
+ return 0;
+}
+
+
+int
+qb_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int dsync,
+ dict_t *xdata)
+{
+ qb_local_t *qb_local = NULL;
+ qb_inode_t *qb_inode = NULL;
+
+ qb_inode = qb_inode_ctx_get (this, fd->inode);
+ if (!qb_inode) {
+ STACK_WIND (frame, default_fsync_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsync, fd, dsync, xdata);
+ return 0;
+ }
+
+ if (qb_local_init (frame) != 0)
+ goto enomem;
+
+ qb_local = frame->local;
+
+ qb_local->inode = inode_ref (fd->inode);
+ qb_local->fd = fd_ref (fd);
+
+ qb_local->stub = fop_fsync_stub (frame, NULL, fd, dsync, xdata);
+
+ if (!qb_local->stub)
+ goto enomem;
+
+ qb_coroutine (frame, qb_co_fsync);
+
+ return 0;
+enomem:
+ QB_STACK_UNWIND (fsync, frame, -1, ENOMEM, 0, 0, 0);
+ return 0;
+}
+
+
+int
+qb_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ qb_local_t *qb_local = NULL;
+ qb_inode_t *qb_inode = NULL;
+
+ qb_inode = qb_inode_ctx_get (this, fd->inode);
+ if (!qb_inode) {
+ STACK_WIND (frame, default_flush_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush, fd, xdata);
+ return 0;
+ }
+
+ if (qb_local_init (frame) != 0)
+ goto enomem;
+
+ qb_local = frame->local;
+
+ qb_local->inode = inode_ref (fd->inode);
+ qb_local->fd = fd_ref (fd);
+
+ qb_local->stub = fop_flush_stub (frame, NULL, fd, xdata);
+
+ if (!qb_local->stub)
+ goto enomem;
+
+ qb_coroutine (frame, qb_co_fsync);
+
+ return 0;
+enomem:
+ QB_STACK_UNWIND (flush, frame, -1, ENOMEM, 0);
+ return 0;
+}
+
+static int32_t
+qb_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ qb_conf_t *conf = this->private;
+ gf_dirent_t *entry;
+ char *format;
+
+ list_for_each_entry(entry, &entries->list, list) {
+ if (!entry->inode || !entry->dict)
+ continue;
+
+ format = NULL;
+ if (dict_get_str(entry->dict, conf->qb_xattr_key, &format))
+ continue;
+
+ if (!format) {
+ qb_inode_cleanup(this, entry->inode, 1);
+ continue;
+ }
+
+ if (qb_format_extract(this, format, entry->inode))
+ continue;
+
+ qb_iatt_fixup(this, entry->inode, &entry->d_stat);
+ }
+
+ STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, entries, xdata);
+ return 0;
+}
+
+static int32_t
+qb_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata)
+{
+ qb_conf_t *conf = this->private;
+
+ xdata = xdata ? dict_ref(xdata) : dict_new();
+ if (!xdata)
+ goto enomem;
+
+ if (dict_set_int32 (xdata, conf->qb_xattr_key, 0))
+ goto enomem;
+
+ STACK_WIND(frame, qb_readdirp_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp, fd, size, off, xdata);
+
+ dict_unref(xdata);
+ return 0;
+
+enomem:
+ QB_STACK_UNWIND(readdirp, frame, -1, ENOMEM, NULL, NULL);
+ if (xdata)
+ dict_unref(xdata);
+ return 0;
+}
+
+int
+qb_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
+{
+ qb_local_t *qb_local = NULL;
+ qb_inode_t *qb_inode = NULL;
+
+ qb_inode = qb_inode_ctx_get (this, loc->inode);
+ if (!qb_inode) {
+ STACK_WIND (frame, default_truncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset,
+ xdata);
+ return 0;
+ }
+
+ if (qb_local_init (frame) != 0)
+ goto enomem;
+
+ qb_local = frame->local;
+
+ qb_local->inode = inode_ref (loc->inode);
+ qb_local->fd = fd_anonymous (loc->inode);
+
+ qb_local->stub = fop_truncate_stub (frame, NULL, loc, offset, xdata);
+
+ if (!qb_local->stub)
+ goto enomem;
+
+ qb_coroutine (frame, qb_co_truncate);
+
+ return 0;
+enomem:
+ QB_STACK_UNWIND (truncate, frame, -1, ENOMEM, 0, 0, 0);
+ return 0;
+}
+
+
+int
+qb_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+{
+ qb_local_t *qb_local = NULL;
+ qb_inode_t *qb_inode = NULL;
+
+ qb_inode = qb_inode_ctx_get (this, fd->inode);
+ if (!qb_inode) {
+ STACK_WIND (frame, default_ftruncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset,
+ xdata);
+ return 0;
+ }
+
+ if (qb_local_init (frame) != 0)
+ goto enomem;
+
+ qb_local = frame->local;
+
+ qb_local->inode = inode_ref (fd->inode);
+ qb_local->fd = fd_ref (fd);
+
+ qb_local->stub = fop_ftruncate_stub (frame, NULL, fd, offset, xdata);
+
+ if (!qb_local->stub)
+ goto enomem;
+
+ qb_coroutine (frame, qb_co_truncate);
+
+ return 0;
+enomem:
+ QB_STACK_UNWIND (ftruncate, frame, -1, ENOMEM, 0, 0, 0);
+ return 0;
+}
+
+
+int
+qb_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *iatt, dict_t *xdata)
+{
+ inode_t *inode = NULL;
+
+ inode = frame->local;
+ frame->local = NULL;
+
+ if (inode) {
+ qb_iatt_fixup (this, inode, iatt);
+ inode_unref (inode);
+ }
+
+ QB_STACK_UNWIND (stat, frame, op_ret, op_errno, iatt, xdata);
+
+ return 0;
+}
+
+int
+qb_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ if (qb_inode_ctx_get (this, loc->inode))
+ frame->local = inode_ref (loc->inode);
+
+ STACK_WIND (frame, qb_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, loc, xdata);
+ return 0;
+}
+
+
+int
+qb_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *iatt, dict_t *xdata)
+{
+ inode_t *inode = NULL;
+
+ inode = frame->local;
+ frame->local = NULL;
+
+ if (inode) {
+ qb_iatt_fixup (this, inode, iatt);
+ inode_unref (inode);
+ }
+
+ QB_STACK_UNWIND (fstat, frame, op_ret, op_errno, iatt, xdata);
+
+ return 0;
+}
+
+
+int
+qb_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ if (qb_inode_ctx_get (this, fd->inode))
+ frame->local = inode_ref (fd->inode);
+
+ STACK_WIND (frame, qb_fstat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
+ return 0;
+}
+
+
+int
+qb_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *pre, struct iatt *post,
+ dict_t *xdata)
+{
+ inode_t *inode = NULL;
+
+ inode = frame->local;
+ frame->local = NULL;
+
+ if (inode) {
+ qb_iatt_fixup (this, inode, pre);
+ qb_iatt_fixup (this, inode, post);
+ inode_unref (inode);
+ }
+
+ QB_STACK_UNWIND (setattr, frame, op_ret, op_errno, pre, post, xdata);
+
+ return 0;
+}
+
+
+int
+qb_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *buf,
+ int valid, dict_t *xdata)
+{
+ if (qb_inode_ctx_get (this, loc->inode))
+ frame->local = inode_ref (loc->inode);
+
+ STACK_WIND (frame, qb_setattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setattr, loc, buf, valid, xdata);
+ return 0;
+}
+
+
+int
+qb_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *pre, struct iatt *post,
+ dict_t *xdata)
+{
+ inode_t *inode = NULL;
+
+ inode = frame->local;
+ frame->local = NULL;
+
+ if (inode) {
+ qb_iatt_fixup (this, inode, pre);
+ qb_iatt_fixup (this, inode, post);
+ inode_unref (inode);
+ }
+
+ QB_STACK_UNWIND (fsetattr, frame, op_ret, op_errno, pre, post, xdata);
+
+ return 0;
+}
+
+
+int
+qb_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *buf,
+ int valid, dict_t *xdata)
+{
+ if (qb_inode_ctx_get (this, fd->inode))
+ frame->local = inode_ref (fd->inode);
+
+ STACK_WIND (frame, qb_setattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetattr, fd, buf, valid, xdata);
+ return 0;
+}
+
+
+int
+qb_forget (xlator_t *this, inode_t *inode)
+{
+ return qb_inode_cleanup (this, inode, 0);
+}
+
+
+int
+qb_release (xlator_t *this, fd_t *fd)
+{
+ call_frame_t *frame = NULL;
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not allocate frame. "
+ "Leaking QEMU BlockDriverState");
+ return -1;
+ }
+
+ if (qb_local_init (frame) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not allocate local. "
+ "Leaking QEMU BlockDriverState");
+ STACK_DESTROY (frame->root);
+ return -1;
+ }
+
+ if (qb_coroutine (frame, qb_co_close) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not allocate coroutine. "
+ "Leaking QEMU BlockDriverState");
+ qb_local_free (this, frame->local);
+ frame->local = NULL;
+ STACK_DESTROY (frame->root);
+ }
+
+ return 0;
+}
+
+int
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ ret = xlator_mem_acct_init (this, gf_qb_mt_end + 1);
+
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "Memory accounting init "
+ "failed");
+ return ret;
+}
+
+
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ return 0;
+}
+
+
+int
+init (xlator_t *this)
+{
+ qb_conf_t *conf = NULL;
+ int32_t ret = -1;
+ static int bdrv_inited = 0;
+
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "FATAL: qemu-block (%s) not configured with exactly "
+ "one child", this->name);
+ goto out;
+ }
+
+ conf = GF_CALLOC (1, sizeof (*conf), gf_qb_mt_qb_conf_t);
+ if (!conf)
+ goto out;
+
+ /* configure 'option window-size <size>' */
+ GF_OPTION_INIT ("default-password", conf->default_password, str, out);
+
+ /* qemu coroutines use "co_mutex" for synchronizing among themselves.
+ However "co_mutex" itself is not threadsafe if the coroutine framework
+ is multithreaded (which usually is not). However synctasks are
+ fundamentally multithreaded, so for now create a syncenv which has
+ scaling limits set to max 1 thread so that the qemu coroutines can
+ execute "safely".
+
+ Future work: provide an implementation of "co_mutex" which is
+ threadsafe and use the global multithreaded ctx->env syncenv.
+ */
+ conf->env = syncenv_new (0, 1, 1);
+
+ this->private = conf;
+
+ ret = 0;
+
+ snprintf (conf->qb_xattr_key, QB_XATTR_KEY_MAX, QB_XATTR_KEY_FMT,
+ this->name);
+
+ cur_mon = (void *) 1;
+
+ if (!bdrv_inited) {
+ bdrv_init ();
+ bdrv_inited = 1;
+ }
+
+out:
+ if (ret)
+ GF_FREE (conf);
+
+ return ret;
+}
+
+
+void
+fini (xlator_t *this)
+{
+ qb_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ this->private = NULL;
+
+ if (conf->root_inode)
+ inode_unref(conf->root_inode);
+ GF_FREE (conf);
+
+ return;
+}
+
+
+struct xlator_fops fops = {
+ .lookup = qb_lookup,
+ .fsetxattr = qb_fsetxattr,
+ .setxattr = qb_setxattr,
+ .open = qb_open,
+ .writev = qb_writev,
+ .readv = qb_readv,
+ .fsync = qb_fsync,
+ .truncate = qb_truncate,
+ .ftruncate = qb_ftruncate,
+ .stat = qb_stat,
+ .fstat = qb_fstat,
+ .setattr = qb_setattr,
+ .fsetattr = qb_fsetattr,
+ .flush = qb_flush,
+/*
+ .getxattr = qb_getxattr,
+ .fgetxattr = qb_fgetxattr
+*/
+ .readdirp = qb_readdirp,
+};
+
+
+struct xlator_cbks cbks = {
+ .forget = qb_forget,
+ .release = qb_release,
+};
+
+
+struct xlator_dumpops dumpops = {
+};
+
+
+struct volume_options options[] = {
+ { .key = {"default-password"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "",
+ .description = "Default password for the AES encrypted block images."
+ },
+ { .key = {NULL} },
+};
diff --git a/xlators/features/qemu-block/src/qemu-block.h b/xlators/features/qemu-block/src/qemu-block.h
new file mode 100644
index 000000000..c95f2799a
--- /dev/null
+++ b/xlators/features/qemu-block/src/qemu-block.h
@@ -0,0 +1,109 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __QEMU_BLOCK_H
+#define __QEMU_BLOCK_H
+
+#include "syncop.h"
+#include "call-stub.h"
+#include "block/block_int.h"
+#include "monitor/monitor.h"
+
+/* QB_XATTR_KEY_FMT is the on-disk xattr stored in the inode which
+ indicates that the file must be "interpreted" by the block format
+ logic. The value of the key is of the pattern:
+
+ "format:virtual_size"
+
+ e.g
+
+ "qcow2:20GB" or "qed:100GB"
+
+ The format and virtual size are colon separated. The format is
+ a case sensitive string which qemu recognizes. virtual_size is
+ specified as a size which glusterfs recognizes as size (i.e.,
+ value accepted by gf_string2bytesize())
+*/
+#define QB_XATTR_KEY_FMT "trusted.glusterfs.%s.format"
+
+#define QB_XATTR_KEY_MAX 64
+
+#define QB_XATTR_VAL_MAX 64
+
+
+typedef struct qb_inode {
+ char fmt[QB_XATTR_VAL_MAX]; /* this is only the format, not "format:size" */
+ size_t size; /* virtual size in bytes */
+ BlockDriverState *bs;
+ int refcnt;
+ uuid_t backing_gfid;
+ char *backing_fname;
+} qb_inode_t;
+
+
+typedef struct qb_conf {
+ Monitor *mon;
+ struct syncenv *env;
+ char qb_xattr_key[QB_XATTR_KEY_MAX];
+ char *default_password;
+ inode_t *root_inode;
+} qb_conf_t;
+
+
+typedef struct qb_local {
+ call_frame_t *frame; /* backpointer */
+ call_stub_t *stub;
+ inode_t *inode;
+ fd_t *fd;
+ char fmt[QB_XATTR_VAL_MAX+1];
+ char name[256];
+ synctask_fn_t synctask_fn;
+ struct list_head list;
+} qb_local_t;
+
+void qb_local_free (xlator_t *this, qb_local_t *local);
+int qb_coroutine (call_frame_t *frame, synctask_fn_t fn);
+inode_t *qb_inode_from_filename (const char *filename);
+int qb_inode_to_filename (inode_t *inode, char *filename, int size);
+int qb_format_extract (xlator_t *this, char *format, inode_t *inode);
+
+qb_inode_t *qb_inode_ctx_get (xlator_t *this, inode_t *inode);
+
+#define QB_STACK_UNWIND(typ, frame, args ...) do { \
+ qb_local_t *__local = frame->local; \
+ xlator_t *__this = frame->this; \
+ \
+ frame->local = NULL; \
+ STACK_UNWIND_STRICT (typ, frame, args); \
+ if (__local) \
+ qb_local_free (__this, __local); \
+ } while (0)
+
+#define QB_STUB_UNWIND(stub, op_ret, op_errno) do { \
+ qb_local_t *__local = stub->frame->local; \
+ xlator_t *__this = stub->frame->this; \
+ \
+ stub->frame->local = NULL; \
+ call_unwind_error (stub, op_ret, op_errno); \
+ if (__local) \
+ qb_local_free (__this, __local); \
+ } while (0)
+
+#define QB_STUB_RESUME(stub_errno) do { \
+ qb_local_t *__local = stub->frame->local; \
+ xlator_t *__this = stub->frame->this; \
+ \
+ stub->frame->local = NULL; \
+ call_resume (stub); \
+ if (__local) \
+ qb_local_free (__this, __local); \
+ } while (0)
+
+#endif /* !__QEMU_BLOCK_H */
diff --git a/xlators/features/quiesce/Makefile.am b/xlators/features/quiesce/Makefile.am
new file mode 100644
index 000000000..a985f42a8
--- /dev/null
+++ b/xlators/features/quiesce/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/features/quiesce/src/Makefile.am b/xlators/features/quiesce/src/Makefile.am
new file mode 100644
index 000000000..15e46629e
--- /dev/null
+++ b/xlators/features/quiesce/src/Makefile.am
@@ -0,0 +1,15 @@
+xlator_LTLIBRARIES = quiesce.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+quiesce_la_LDFLAGS = -module -avoid-version
+
+quiesce_la_SOURCES = quiesce.c
+quiesce_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = quiesce.h quiesce-mem-types.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/quiesce/src/quiesce-mem-types.h b/xlators/features/quiesce/src/quiesce-mem-types.h
new file mode 100644
index 000000000..6e582f424
--- /dev/null
+++ b/xlators/features/quiesce/src/quiesce-mem-types.h
@@ -0,0 +1,20 @@
+/*
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __QUIESCE_MEM_TYPES_H__
+#define __QUIESCE_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_quiesce_mem_types_ {
+ gf_quiesce_mt_priv_t = gf_common_mt_end + 1,
+ gf_quiesce_mt_end
+};
+#endif
diff --git a/xlators/features/quiesce/src/quiesce.c b/xlators/features/quiesce/src/quiesce.c
new file mode 100644
index 000000000..24c7dc6ed
--- /dev/null
+++ b/xlators/features/quiesce/src/quiesce.c
@@ -0,0 +1,2610 @@
+/*
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "quiesce.h"
+#include "defaults.h"
+#include "call-stub.h"
+
+/* TODO: */
+/* Think about 'writev/_*_lk/setattr/xattrop/' fops to do re-transmittion */
+
+
+/* Quiesce Specific Functions */
+void
+gf_quiesce_local_wipe (xlator_t *this, quiesce_local_t *local)
+{
+ if (!local || !this || !this->private)
+ return;
+
+ if (local->loc.inode)
+ loc_wipe (&local->loc);
+ if (local->fd)
+ fd_unref (local->fd);
+ GF_FREE (local->name);
+ GF_FREE (local->volname);
+ if (local->dict)
+ dict_unref (local->dict);
+ if (local->iobref)
+ iobref_unref (local->iobref);
+ GF_FREE (local->vector);
+
+ mem_put (local);
+}
+
+call_stub_t *
+gf_quiesce_dequeue (xlator_t *this)
+{
+ call_stub_t *stub = NULL;
+ quiesce_priv_t *priv = NULL;
+
+ priv = this->private;
+
+ if (!priv || list_empty (&priv->req))
+ return NULL;
+
+ LOCK (&priv->lock);
+ {
+ stub = list_entry (priv->req.next, call_stub_t, list);
+ list_del_init (&stub->list);
+ priv->queue_size--;
+ }
+ UNLOCK (&priv->lock);
+
+ return stub;
+}
+
+void *
+gf_quiesce_dequeue_start (void *data)
+{
+ xlator_t *this = NULL;
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ this = data;
+ priv = this->private;
+ THIS = this;
+
+ while (!list_empty (&priv->req)) {
+ stub = gf_quiesce_dequeue (this);
+ if (stub) {
+ call_resume (stub);
+ }
+ }
+
+ return 0;
+}
+
+
+void
+gf_quiesce_timeout (void *data)
+{
+ xlator_t *this = NULL;
+ quiesce_priv_t *priv = NULL;
+
+ this = data;
+ priv = this->private;
+ THIS = this;
+
+ LOCK (&priv->lock);
+ {
+ priv->pass_through = _gf_true;
+ }
+ UNLOCK (&priv->lock);
+
+ gf_quiesce_dequeue_start (this);
+
+ return;
+}
+
+void
+gf_quiesce_enqueue (xlator_t *this, call_stub_t *stub)
+{
+ quiesce_priv_t *priv = NULL;
+ struct timespec timeout = {0,};
+
+ priv = this->private;
+ if (!priv) {
+ gf_log_callingfn (this->name, GF_LOG_ERROR,
+ "this->private == NULL");
+ return;
+ }
+
+ LOCK (&priv->lock);
+ {
+ list_add_tail (&stub->list, &priv->req);
+ priv->queue_size++;
+ }
+ UNLOCK (&priv->lock);
+
+ if (!priv->timer) {
+ timeout.tv_sec = 20;
+ timeout.tv_nsec = 0;
+
+ priv->timer = gf_timer_call_after (this->ctx,
+ timeout,
+ gf_quiesce_timeout,
+ (void *) this);
+ }
+
+ return;
+}
+
+
+
+/* _CBK function section */
+
+int32_t
+quiesce_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *dict, struct iatt *postparent)
+{
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_lookup_stub (frame, default_lookup_resume,
+ &local->loc, local->dict);
+ if (!stub) {
+ STACK_UNWIND_STRICT (lookup, frame, -1, ENOMEM,
+ NULL, NULL, NULL, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf,
+ dict, postparent);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+int32_t
+quiesce_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_stat_stub (frame, default_stat_resume,
+ &local->loc, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (stat, frame, -1, ENOMEM,
+ NULL, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf, xdata);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+int32_t
+quiesce_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_access_stub (frame, default_access_resume,
+ &local->loc, local->flag, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (access, frame, -1, ENOMEM, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (access, frame, op_ret, op_errno, xdata);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+int32_t
+quiesce_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, const char *path,
+ struct iatt *buf, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_readlink_stub (frame, default_readlink_resume,
+ &local->loc, local->size, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (readlink, frame, -1, ENOMEM,
+ NULL, NULL, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, path, buf, xdata);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+int32_t
+quiesce_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_open_stub (frame, default_open_resume,
+ &local->loc, local->flag, local->fd,
+ xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (open, frame, -1, ENOMEM,
+ NULL, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+int32_t
+quiesce_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_readv_stub (frame, default_readv_resume,
+ local->fd, local->size, local->offset,
+ local->io_flag, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (readv, frame, -1, ENOMEM,
+ NULL, 0, NULL, NULL, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count,
+ stbuf, iobref, xdata);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+int32_t
+quiesce_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_flush_stub (frame, default_flush_resume,
+ local->fd, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, xdata);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+
+
+int32_t
+quiesce_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_fsync_stub (frame, default_fsync_resume,
+ local->fd, local->flag, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (fsync, frame, -1, ENOMEM,
+ NULL, NULL, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+int32_t
+quiesce_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_fstat_stub (frame, default_fstat_resume,
+ local->fd, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (fstat, frame, -1, ENOMEM,
+ NULL, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf, xdata);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+int32_t
+quiesce_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_opendir_stub (frame, default_opendir_resume,
+ &local->loc, local->fd, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (opendir, frame, -1, ENOMEM,
+ NULL, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd, xdata);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+int32_t
+quiesce_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_fsyncdir_stub (frame, default_fsyncdir_resume,
+ local->fd, local->flag, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (fsyncdir, frame, -1, ENOMEM, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno, xdata);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+int32_t
+quiesce_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_statfs_stub (frame, default_statfs_resume,
+ &local->loc, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (statfs, frame, -1, ENOMEM,
+ NULL, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf, xdata);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+int32_t
+quiesce_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_fgetxattr_stub (frame, default_fgetxattr_resume,
+ local->fd, local->name, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (fgetxattr, frame, -1, ENOMEM,
+ NULL, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict, xdata);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+
+int32_t
+quiesce_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_getxattr_stub (frame, default_getxattr_resume,
+ &local->loc, local->name, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (getxattr, frame, -1, ENOMEM,
+ NULL, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+
+int32_t
+quiesce_rchecksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, uint32_t weak_checksum,
+ uint8_t *strong_checksum, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_rchecksum_stub (frame, default_rchecksum_resume,
+ local->fd, local->offset, local->flag, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (rchecksum, frame, -1, ENOMEM,
+ 0, NULL, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (rchecksum, frame, op_ret, op_errno, weak_checksum,
+ strong_checksum, xdata);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+
+int32_t
+quiesce_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_readdir_stub (frame, default_readdir_resume,
+ local->fd, local->size, local->offset, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (readdir, frame, -1, ENOMEM,
+ NULL, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, entries, xdata);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+
+int32_t
+quiesce_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_readdirp_stub (frame, default_readdirp_resume,
+ local->fd, local->size, local->offset,
+ local->dict);
+ if (!stub) {
+ STACK_UNWIND_STRICT (readdirp, frame, -1, ENOMEM,
+ NULL, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+
+#if 0
+
+int32_t
+quiesce_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_writev_stub (frame, default_writev_resume,
+ local->fd, local->vector, local->flag,
+ local->offset, local->io_flags,
+ local->iobref, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (writev, frame, -1, ENOMEM,
+ NULL, NULL, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+int32_t
+quiesce_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_xattrop_stub (frame, default_xattrop_resume,
+ &local->loc, local->xattrop_flags,
+ local->dict, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (xattrop, frame, -1, ENOMEM,
+ NULL, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, dict, xdata);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+int32_t
+quiesce_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_fxattrop_stub (frame, default_fxattrop_resume,
+ local->fd, local->xattrop_flags,
+ local->dict, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (fxattrop, frame, -1, ENOMEM,
+ NULL, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (fxattrop, frame, op_ret, op_errno, dict, xdata);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+int32_t
+quiesce_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_lk_stub (frame, default_lk_resume,
+ local->fd, local->flag, &local->flock, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (lk, frame, -1, ENOMEM,
+ NULL, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, lock, xdata);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+int32_t
+quiesce_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_inodelk_stub (frame, default_inodelk_resume,
+ local->volname, &local->loc,
+ local->flag, &local->flock, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (inodelk, frame, -1, ENOMEM, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno, xdata);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+
+int32_t
+quiesce_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_finodelk_stub (frame, default_finodelk_resume,
+ local->volname, local->fd,
+ local->flag, &local->flock, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (finodelk, frame, -1, ENOMEM, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (finodelk, frame, op_ret, op_errno, xdata);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+int32_t
+quiesce_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_entrylk_stub (frame, default_entrylk_resume,
+ local->volname, &local->loc,
+ local->name, local->cmd, local->type, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (entrylk, frame, -1, ENOMEM, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno, xdata);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+int32_t
+quiesce_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_fentrylk_stub (frame, default_fentrylk_resume,
+ local->volname, local->fd,
+ local->name, local->cmd, local->type, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (fentrylk, frame, -1, ENOMEM, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (fentrylk, frame, op_ret, op_errno, xdata);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+int32_t
+quiesce_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_setattr_stub (frame, default_setattr_resume,
+ &local->loc, &local->stbuf, local->flag, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (setattr, frame, -1, ENOMEM,
+ NULL, NULL, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, statpre,
+ statpost, xdata);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+int32_t
+quiesce_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ local = frame->local;
+ frame->local = NULL;
+
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_fsetattr_stub (frame, default_fsetattr_resume,
+ local->fd, &local->stbuf, local->flag, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (fsetattr, frame, -1, ENOMEM,
+ NULL, NULL, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT (fsetattr, frame, op_ret, op_errno, statpre,
+ statpost, xdata);
+out:
+ gf_quiesce_local_wipe (this, local);
+
+ return 0;
+}
+
+#endif /* if 0 */
+
+
+/* FOP */
+
+/* No retransmittion */
+
+int32_t
+quiesce_removexattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ if (priv->pass_through) {
+ STACK_WIND (frame,
+ default_removexattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr,
+ loc,
+ name, xdata);
+ return 0;
+ }
+
+ stub = fop_removexattr_stub (frame, default_removexattr_resume,
+ loc, name, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (removexattr, frame, -1, ENOMEM, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_truncate (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ off_t offset, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ if (priv->pass_through) {
+ STACK_WIND (frame,
+ default_truncate_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate,
+ loc,
+ offset, xdata);
+ return 0;
+ }
+
+ stub = fop_truncate_stub (frame, default_truncate_resume, loc, offset, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (truncate, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_fsetxattr (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ if (priv->pass_through) {
+ STACK_WIND (frame,
+ default_fsetxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ fd,
+ dict,
+ flags, xdata);
+ return 0;
+ }
+
+ stub = fop_fsetxattr_stub (frame, default_fsetxattr_resume,
+ fd, dict, flags, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (fsetxattr, frame, -1, ENOMEM, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_setxattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ if (priv->pass_through) {
+ STACK_WIND (frame,
+ default_setxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr,
+ loc,
+ dict,
+ flags, xdata);
+ return 0;
+ }
+
+ stub = fop_setxattr_stub (frame, default_setxattr_resume,
+ loc, dict, flags, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (setxattr, frame, -1, ENOMEM, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_create (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, int32_t flags, mode_t mode,
+ mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ if (priv->pass_through) {
+ /* Don't send O_APPEND below, as write() re-transmittions can
+ fail with O_APPEND */
+ STACK_WIND (frame, default_create_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create,
+ loc, (flags & ~O_APPEND), mode, umask, fd, xdata);
+ return 0;
+ }
+
+ stub = fop_create_stub (frame, default_create_resume,
+ loc, (flags & ~O_APPEND), mode, umask, fd, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (create, frame, -1, ENOMEM,
+ NULL, NULL, NULL, NULL, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_link (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ if (priv->pass_through) {
+ STACK_WIND (frame,
+ default_link_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link,
+ oldloc, newloc, xdata);
+ return 0;
+ }
+
+ stub = fop_link_stub (frame, default_link_resume, oldloc, newloc, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (link, frame, -1, ENOMEM,
+ NULL, NULL, NULL, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_rename (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ if (priv->pass_through) {
+ STACK_WIND (frame,
+ default_rename_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename,
+ oldloc, newloc, xdata);
+ return 0;
+ }
+
+ stub = fop_rename_stub (frame, default_rename_resume, oldloc, newloc, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (rename, frame, -1, ENOMEM,
+ NULL, NULL, NULL, NULL, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+
+int
+quiesce_symlink (call_frame_t *frame, xlator_t *this,
+ const char *linkpath, loc_t *loc, mode_t umask, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ if (priv->pass_through) {
+ STACK_WIND (frame, default_symlink_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->symlink,
+ linkpath, loc, umask, xdata);
+ return 0;
+ }
+
+ stub = fop_symlink_stub (frame, default_symlink_resume,
+ linkpath, loc, umask, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (symlink, frame, -1, ENOMEM,
+ NULL, NULL, NULL, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+
+int
+quiesce_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ if (priv->pass_through) {
+ STACK_WIND (frame, default_rmdir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rmdir,
+ loc, flags, xdata);
+ return 0;
+ }
+
+ stub = fop_rmdir_stub (frame, default_rmdir_resume, loc, flags, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (rmdir, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_unlink (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, int xflag, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ if (priv->pass_through) {
+ STACK_WIND (frame,
+ default_unlink_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink,
+ loc, xflag, xdata);
+ return 0;
+ }
+
+ stub = fop_unlink_stub (frame, default_unlink_resume, loc, xflag, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (unlink, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int
+quiesce_mkdir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ if (priv->pass_through) {
+ STACK_WIND (frame, default_mkdir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir,
+ loc, mode, umask, xdata);
+ return 0;
+ }
+
+ stub = fop_mkdir_stub (frame, default_mkdir_resume,
+ loc, mode, umask, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (mkdir, frame, -1, ENOMEM,
+ NULL, NULL, NULL, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+
+int
+quiesce_mknod (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ if (priv->pass_through) {
+ STACK_WIND (frame, default_mknod_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod,
+ loc, mode, rdev, umask, xdata);
+ return 0;
+ }
+
+ stub = fop_mknod_stub (frame, default_mknod_resume,
+ loc, mode, rdev, umask, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (mknod, frame, -1, ENOMEM,
+ NULL, NULL, NULL, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_ftruncate (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ off_t offset, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ if (priv->pass_through) {
+ STACK_WIND (frame,
+ default_ftruncate_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate,
+ fd,
+ offset, xdata);
+ return 0;
+ }
+
+ stub = fop_ftruncate_stub (frame, default_ftruncate_resume, fd, offset, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (ftruncate, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+/* Re-transmittion */
+
+int32_t
+quiesce_readlink (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ size_t size, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ local = mem_get0 (priv->local_pool);
+ loc_dup (loc, &local->loc);
+ local->size = size;
+ frame->local = local;
+
+ STACK_WIND (frame,
+ quiesce_readlink_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readlink,
+ loc,
+ size, xdata);
+ return 0;
+ }
+
+ stub = fop_readlink_stub (frame, default_readlink_resume, loc, size, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (readlink, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+
+int32_t
+quiesce_access (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t mask, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ local = mem_get0 (priv->local_pool);
+ loc_dup (loc, &local->loc);
+ local->flag = mask;
+ frame->local = local;
+
+ STACK_WIND (frame,
+ quiesce_access_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->access,
+ loc,
+ mask, xdata);
+ return 0;
+ }
+
+ stub = fop_access_stub (frame, default_access_resume, loc, mask, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (access, frame, -1, ENOMEM, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_fgetxattr (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ local = mem_get0 (priv->local_pool);
+ local->fd = fd_ref (fd);
+ if (name)
+ local->name = gf_strdup (name);
+
+ frame->local = local;
+
+ STACK_WIND (frame,
+ quiesce_fgetxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr,
+ fd,
+ name, xdata);
+ return 0;
+ }
+
+ stub = fop_fgetxattr_stub (frame, default_fgetxattr_resume, fd, name, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (fgetxattr, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_statfs (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ local = mem_get0 (priv->local_pool);
+ loc_dup (loc, &local->loc);
+ frame->local = local;
+
+ STACK_WIND (frame,
+ quiesce_statfs_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->statfs,
+ loc, xdata);
+ return 0;
+ }
+
+ stub = fop_statfs_stub (frame, default_statfs_resume, loc, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (statfs, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_fsyncdir (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t flags, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ local = mem_get0 (priv->local_pool);
+ local->fd = fd_ref (fd);
+ local->flag = flags;
+ frame->local = local;
+
+ STACK_WIND (frame,
+ quiesce_fsyncdir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsyncdir,
+ fd,
+ flags, xdata);
+ return 0;
+ }
+
+ stub = fop_fsyncdir_stub (frame, default_fsyncdir_resume, fd, flags, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (fsyncdir, frame, -1, ENOMEM, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_opendir (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, fd_t *fd, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ local = mem_get0 (priv->local_pool);
+ loc_dup (loc, &local->loc);
+ local->fd = fd_ref (fd);
+ frame->local = local;
+
+ STACK_WIND (frame,
+ quiesce_opendir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->opendir,
+ loc, fd, xdata);
+ return 0;
+ }
+
+ stub = fop_opendir_stub (frame, default_opendir_resume, loc, fd, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (opendir, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_fstat (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ local = mem_get0 (priv->local_pool);
+ local->fd = fd_ref (fd);
+ frame->local = local;
+
+ STACK_WIND (frame,
+ quiesce_fstat_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat,
+ fd, xdata);
+ return 0;
+ }
+
+ stub = fop_fstat_stub (frame, default_fstat_resume, fd, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (fstat, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_fsync (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t flags, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ local = mem_get0 (priv->local_pool);
+ local->fd = fd_ref (fd);
+ local->flag = flags;
+ frame->local = local;
+
+ STACK_WIND (frame,
+ quiesce_fsync_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsync,
+ fd,
+ flags, xdata);
+ return 0;
+ }
+
+ stub = fop_fsync_stub (frame, default_fsync_resume, fd, flags, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (fsync, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_flush (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ local = mem_get0 (priv->local_pool);
+ local->fd = fd_ref (fd);
+ frame->local = local;
+
+ STACK_WIND (frame,
+ quiesce_flush_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush,
+ fd, xdata);
+ return 0;
+ }
+
+ stub = fop_flush_stub (frame, default_flush_resume, fd, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_writev (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ struct iovec *vector,
+ int32_t count,
+ off_t off, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ STACK_WIND (frame,
+ default_writev_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev,
+ fd,
+ vector,
+ count,
+ off, flags,
+ iobref, xdata);
+ return 0;
+ }
+
+ stub = fop_writev_stub (frame, default_writev_resume,
+ fd, vector, count, off, flags, iobref, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (writev, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_readv (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ local = mem_get0 (priv->local_pool);
+ local->fd = fd_ref (fd);
+ local->size = size;
+ local->offset = offset;
+ local->io_flag = flags;
+ frame->local = local;
+
+ STACK_WIND (frame,
+ quiesce_readv_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv,
+ fd,
+ size,
+ offset, flags, xdata);
+ return 0;
+ }
+
+ stub = fop_readv_stub (frame, default_readv_resume, fd, size, offset,
+ flags, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (readv, frame, -1, ENOMEM,
+ NULL, 0, NULL, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+
+int32_t
+quiesce_open (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ int32_t flags, fd_t *fd,
+ dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ local = mem_get0 (priv->local_pool);
+ loc_dup (loc, &local->loc);
+ local->fd = fd_ref (fd);
+
+ /* Don't send O_APPEND below, as write() re-transmittions can
+ fail with O_APPEND */
+ local->flag = (flags & ~O_APPEND);
+ frame->local = local;
+
+ STACK_WIND (frame,
+ quiesce_open_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open,
+ loc, (flags & ~O_APPEND), fd, xdata);
+ return 0;
+ }
+
+ stub = fop_open_stub (frame, default_open_resume, loc,
+ (flags & ~O_APPEND), fd, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (open, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_getxattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ local = mem_get0 (priv->local_pool);
+ loc_dup (loc, &local->loc);
+ if (name)
+ local->name = gf_strdup (name);
+
+ frame->local = local;
+
+ STACK_WIND (frame,
+ quiesce_getxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr,
+ loc,
+ name, xdata);
+ return 0;
+ }
+
+ stub = fop_getxattr_stub (frame, default_getxattr_resume, loc, name, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (getxattr, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+
+int32_t
+quiesce_xattrop (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ gf_xattrop_flags_t flags,
+ dict_t *dict, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ STACK_WIND (frame,
+ default_xattrop_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->xattrop,
+ loc,
+ flags,
+ dict, xdata);
+ return 0;
+ }
+
+ stub = fop_xattrop_stub (frame, default_xattrop_resume,
+ loc, flags, dict, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (xattrop, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_fxattrop (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ gf_xattrop_flags_t flags,
+ dict_t *dict, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ STACK_WIND (frame,
+ default_fxattrop_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fxattrop,
+ fd,
+ flags,
+ dict, xdata);
+ return 0;
+ }
+
+ stub = fop_fxattrop_stub (frame, default_fxattrop_resume,
+ fd, flags, dict, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (fxattrop, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_lk (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ STACK_WIND (frame,
+ default_lk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lk,
+ fd,
+ cmd,
+ lock, xdata);
+ return 0;
+ }
+
+ stub = fop_lk_stub (frame, default_lk_resume, fd, cmd, lock, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (lk, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+
+int32_t
+quiesce_inodelk (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc, int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ STACK_WIND (frame,
+ default_inodelk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->inodelk,
+ volume, loc, cmd, lock, xdata);
+ return 0;
+ }
+
+ stub = fop_inodelk_stub (frame, default_inodelk_resume,
+ volume, loc, cmd, lock, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (inodelk, frame, -1, ENOMEM, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_finodelk (call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ STACK_WIND (frame,
+ default_finodelk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk,
+ volume, fd, cmd, lock, xdata);
+ return 0;
+ }
+
+ stub = fop_finodelk_stub (frame, default_finodelk_resume,
+ volume, fd, cmd, lock, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (finodelk, frame, -1, ENOMEM, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_entrylk (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ STACK_WIND (frame, default_entrylk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->entrylk,
+ volume, loc, basename, cmd, type, xdata);
+ return 0;
+ }
+
+ stub = fop_entrylk_stub (frame, default_entrylk_resume,
+ volume, loc, basename, cmd, type, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (entrylk, frame, -1, ENOMEM, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_fentrylk (call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ STACK_WIND (frame, default_fentrylk_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fentrylk,
+ volume, fd, basename, cmd, type, xdata);
+ return 0;
+ }
+
+ stub = fop_fentrylk_stub (frame, default_fentrylk_resume,
+ volume, fd, basename, cmd, type, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (fentrylk, frame, -1, ENOMEM, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_rchecksum (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd, off_t offset,
+ int32_t len, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ local = mem_get0 (priv->local_pool);
+ local->fd = fd_ref (fd);
+ local->offset = offset;
+ local->flag = len;
+ frame->local = local;
+
+ STACK_WIND (frame,
+ quiesce_rchecksum_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rchecksum,
+ fd, offset, len, xdata);
+ return 0;
+ }
+
+ stub = fop_rchecksum_stub (frame, default_rchecksum_resume,
+ fd, offset, len, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (rchecksum, frame, -1, ENOMEM, 0, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+
+int32_t
+quiesce_readdir (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t off, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ local = mem_get0 (priv->local_pool);
+ local->fd = fd_ref (fd);
+ local->size = size;
+ local->offset = off;
+ frame->local = local;
+
+ STACK_WIND (frame,
+ quiesce_readdir_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdir,
+ fd, size, off, xdata);
+ return 0;
+ }
+
+ stub = fop_readdir_stub (frame, default_readdir_resume, fd, size, off, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (readdir, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+
+int32_t
+quiesce_readdirp (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ size_t size,
+ off_t off, dict_t *dict)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ local = mem_get0 (priv->local_pool);
+ local->fd = fd_ref (fd);
+ local->size = size;
+ local->offset = off;
+ local->dict = dict_ref (dict);
+ frame->local = local;
+
+ STACK_WIND (frame,
+ quiesce_readdirp_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp,
+ fd, size, off, dict);
+ return 0;
+ }
+
+ stub = fop_readdirp_stub (frame, default_readdirp_resume, fd, size,
+ off, dict);
+ if (!stub) {
+ STACK_UNWIND_STRICT (readdirp, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_setattr (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ STACK_WIND (frame,
+ default_setattr_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->setattr,
+ loc, stbuf, valid, xdata);
+ return 0;
+ }
+
+ stub = fop_setattr_stub (frame, default_setattr_resume,
+ loc, stbuf, valid, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (setattr, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+
+int32_t
+quiesce_stat (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ local = mem_get0 (priv->local_pool);
+ loc_dup (loc, &local->loc);
+ frame->local = local;
+
+ STACK_WIND (frame,
+ quiesce_stat_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat,
+ loc, xdata);
+ return 0;
+ }
+
+ stub = fop_stat_stub (frame, default_stat_resume, loc, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (stat, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_lookup (call_frame_t *frame,
+ xlator_t *this,
+ loc_t *loc,
+ dict_t *xattr_req)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ local = mem_get0 (priv->local_pool);
+ loc_dup (loc, &local->loc);
+ local->dict = dict_ref (xattr_req);
+ frame->local = local;
+
+ STACK_WIND (frame,
+ quiesce_lookup_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup,
+ loc, xattr_req);
+ return 0;
+ }
+
+ stub = fop_lookup_stub (frame, default_lookup_resume, loc, xattr_req);
+ if (!stub) {
+ STACK_UNWIND_STRICT (lookup, frame, -1, ENOMEM,
+ NULL, NULL, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+quiesce_fsetattr (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ if (priv && priv->pass_through) {
+ STACK_WIND (frame,
+ default_fsetattr_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fsetattr,
+ fd, stbuf, valid, xdata);
+ return 0;
+ }
+
+ stub = fop_fsetattr_stub (frame, default_fsetattr_resume,
+ fd, stbuf, valid, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (fsetattr, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue (this, stub);
+
+ return 0;
+}
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ ret = xlator_mem_acct_init (this, gf_quiesce_mt_end + 1);
+
+ return ret;
+}
+
+int
+init (xlator_t *this)
+{
+ int ret = -1;
+ quiesce_priv_t *priv = NULL;
+
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "'quiesce' not configured with exactly one child");
+ goto out;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ priv = GF_CALLOC (1, sizeof (*priv), gf_quiesce_mt_priv_t);
+ if (!priv)
+ goto out;
+
+ priv->local_pool = mem_pool_new (quiesce_local_t,
+ GF_FOPS_EXPECTED_IN_PARALLEL);
+
+ LOCK_INIT (&priv->lock);
+ priv->pass_through = _gf_false;
+
+ INIT_LIST_HEAD (&priv->req);
+
+ this->private = priv;
+ ret = 0;
+out:
+ return ret;
+}
+
+void
+fini (xlator_t *this)
+{
+ quiesce_priv_t *priv = NULL;
+
+ priv = this->private;
+ if (!priv)
+ goto out;
+ this->private = NULL;
+
+ mem_pool_destroy (priv->local_pool);
+ LOCK_DESTROY (&priv->lock);
+ GF_FREE (priv);
+out:
+ return;
+}
+
+int
+notify (xlator_t *this, int event, void *data, ...)
+{
+ int ret = 0;
+ quiesce_priv_t *priv = NULL;
+ struct timespec timeout = {0,};
+
+ priv = this->private;
+ if (!priv)
+ goto out;
+
+ switch (event) {
+ case GF_EVENT_CHILD_UP:
+ {
+ ret = pthread_create (&priv->thr, NULL, gf_quiesce_dequeue_start,
+ this);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create the quiesce-dequeue thread");
+ }
+
+ LOCK (&priv->lock);
+ {
+ priv->pass_through = _gf_true;
+ }
+ UNLOCK (&priv->lock);
+ break;
+ }
+ case GF_EVENT_CHILD_DOWN:
+ LOCK (&priv->lock);
+ {
+ priv->pass_through = _gf_false;
+ }
+ UNLOCK (&priv->lock);
+
+ if (priv->timer)
+ break;
+ timeout.tv_sec = 20;
+ timeout.tv_nsec = 0;
+
+ priv->timer = gf_timer_call_after (this->ctx,
+ timeout,
+ gf_quiesce_timeout,
+ (void *) this);
+
+ if (priv->timer == NULL) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Cannot create timer");
+ }
+
+ break;
+ default:
+ break;
+ }
+
+ ret = default_notify (this, event, data);
+out:
+ return ret;
+}
+
+
+struct xlator_fops fops = {
+ /* write/modifying fops */
+ .mknod = quiesce_mknod,
+ .create = quiesce_create,
+ .truncate = quiesce_truncate,
+ .ftruncate = quiesce_ftruncate,
+ .setxattr = quiesce_setxattr,
+ .removexattr = quiesce_removexattr,
+ .symlink = quiesce_symlink,
+ .unlink = quiesce_unlink,
+ .link = quiesce_link,
+ .mkdir = quiesce_mkdir,
+ .rmdir = quiesce_rmdir,
+ .rename = quiesce_rename,
+
+ /* The below calls are known to change state, hence
+ re-transmittion is not advised */
+ .lk = quiesce_lk,
+ .inodelk = quiesce_inodelk,
+ .finodelk = quiesce_finodelk,
+ .entrylk = quiesce_entrylk,
+ .fentrylk = quiesce_fentrylk,
+ .xattrop = quiesce_xattrop,
+ .fxattrop = quiesce_fxattrop,
+ .setattr = quiesce_setattr,
+ .fsetattr = quiesce_fsetattr,
+
+ /* Special case, re-transmittion is not harmful *
+ * as offset is properly sent from above layers */
+ /* TODO: not re-transmitted as of now */
+ .writev = quiesce_writev,
+
+ /* re-transmittable fops */
+ .lookup = quiesce_lookup,
+ .stat = quiesce_stat,
+ .fstat = quiesce_fstat,
+ .access = quiesce_access,
+ .readlink = quiesce_readlink,
+ .getxattr = quiesce_getxattr,
+ .open = quiesce_open,
+ .readv = quiesce_readv,
+ .flush = quiesce_flush,
+ .fsync = quiesce_fsync,
+ .statfs = quiesce_statfs,
+ .opendir = quiesce_opendir,
+ .readdir = quiesce_readdir,
+ .readdirp = quiesce_readdirp,
+ .fsyncdir = quiesce_fsyncdir,
+
+};
+
+struct xlator_dumpops dumpops;
+
+
+struct xlator_cbks cbks;
+
+
+struct volume_options options[] = {
+ { .key = {NULL} },
+};
diff --git a/xlators/features/quiesce/src/quiesce.h b/xlators/features/quiesce/src/quiesce.h
new file mode 100644
index 000000000..878ed77e9
--- /dev/null
+++ b/xlators/features/quiesce/src/quiesce.h
@@ -0,0 +1,51 @@
+/*
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __QUIESCE_H__
+#define __QUIESCE_H__
+
+#include "quiesce-mem-types.h"
+#include "xlator.h"
+#include "timer.h"
+
+#define GF_FOPS_EXPECTED_IN_PARALLEL 512
+
+typedef struct {
+ gf_timer_t *timer;
+ gf_boolean_t pass_through;
+ gf_lock_t lock;
+ struct list_head req;
+ int queue_size;
+ pthread_t thr;
+ struct mem_pool *local_pool;
+} quiesce_priv_t;
+
+typedef struct {
+ fd_t *fd;
+ char *name;
+ char *volname;
+ loc_t loc;
+ off_t size;
+ off_t offset;
+ mode_t mode;
+ int32_t flag;
+ struct iatt stbuf;
+ struct iovec *vector;
+ struct iobref *iobref;
+ dict_t *dict;
+ struct gf_flock flock;
+ entrylk_cmd cmd;
+ entrylk_type type;
+ gf_xattrop_flags_t xattrop_flags;
+ int32_t wbflags;
+ uint32_t io_flag;
+} quiesce_local_t;
+
+#endif
diff --git a/xlators/features/quota/src/Makefile.am b/xlators/features/quota/src/Makefile.am
index 886d83964..9546f4276 100644
--- a/xlators/features/quota/src/Makefile.am
+++ b/xlators/features/quota/src/Makefile.am
@@ -1,13 +1,17 @@
xlator_LTLIBRARIES = quota.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
-quota_la_LDFLAGS = -module -avoidversion
+quota_la_LDFLAGS = -module -avoid-version
quota_la_SOURCES = quota.c
-quota_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+quota_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+noinst_HEADERS = quota-mem-types.h quota.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/xlators/cluster/dht/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/features/quota/src/quota-mem-types.h b/xlators/features/quota/src/quota-mem-types.h
new file mode 100644
index 000000000..3082865da
--- /dev/null
+++ b/xlators/features/quota/src/quota-mem-types.h
@@ -0,0 +1,27 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef __QUOTA_MEM_TYPES_H__
+#define __QUOTA_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_quota_mem_types_ {
+ gf_quota_mt_quota_priv_t = gf_common_mt_end + 1,
+ gf_quota_mt_quota_inode_ctx_t,
+ gf_quota_mt_loc_t,
+ gf_quota_mt_char,
+ gf_quota_mt_int64_t,
+ gf_quota_mt_int32_t,
+ gf_quota_mt_limits_t,
+ gf_quota_mt_quota_dentry_t,
+ gf_quota_mt_end
+};
+#endif
+
diff --git a/xlators/features/quota/src/quota.c b/xlators/features/quota/src/quota.c
index 94a7ec83b..c527e7ca7 100644
--- a/xlators/features/quota/src/quota.c
+++ b/xlators/features/quota/src/quota.c
@@ -1,1059 +1,3503 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
+#include <fnmatch.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+#include "quota.h"
+#include "common-utils.h"
+#include "defaults.h"
-#include <sys/time.h>
+int32_t
+quota_check_limit (call_frame_t *frame, inode_t *inode, xlator_t *this,
+ char *name, uuid_t par);
+struct volume_options options[];
-#include "xlator.h"
-#include "defaults.h"
-#include "common-utils.h"
+int
+quota_loc_fill (loc_t *loc, inode_t *inode, inode_t *parent, char *path)
+{
+ int ret = -1;
-struct quota_local {
- struct stat stbuf;
- inode_t *inode;
- char *path;
- fd_t *fd;
- off_t offset;
- int32_t count;
- struct iovec *vector;
- struct iobref *iobref;
- loc_t loc;
-};
+ if (!loc) {
+ return ret;
+ }
+ if (inode) {
+ loc->inode = inode_ref (inode);
+ }
-struct quota_priv {
- char only_first_time; /* Used to make sure a call is done only one time */
- gf_lock_t lock; /* Used while updating variables */
+ if (parent) {
+ loc->parent = inode_ref (parent);
+ }
- uint64_t disk_usage_limit; /* Used for Disk usage quota */
- uint64_t current_disk_usage; /* Keep the current usage value */
+ loc->path = gf_strdup (path);
+ if (!loc->path) {
+ goto loc_wipe;
+ }
- uint32_t min_free_disk_limit; /* user specified limit, in %*/
- uint32_t current_free_disk; /* current free disk space available, in % */
- uint32_t refresh_interval; /* interval in seconds */
- uint32_t min_disk_last_updated_time; /* used for interval calculation */
-};
+ loc->name = strrchr (loc->path, '/');
+ if (loc->name) {
+ loc->name++;
+ } else {
+ goto loc_wipe;
+ }
+
+ ret = 0;
+
+loc_wipe:
+ if (ret < 0) {
+ loc_wipe (loc);
+ }
+
+ return ret;
+}
int
-quota_statvfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *stbuf)
-{
- struct quota_priv *priv = this->private;
-
- if (op_ret >= 0) {
- priv->current_free_disk =
- (stbuf->f_bavail * 100) / stbuf->f_blocks;
- }
+quota_inode_loc_fill (inode_t *inode, loc_t *loc)
+{
+ char *resolvedpath = NULL;
+ inode_t *parent = NULL;
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ if ((!inode) || (!loc)) {
+ return ret;
+ }
+
+ this = THIS;
+
+ if ((inode) && __is_root_gfid (inode->gfid)) {
+ loc->parent = NULL;
+ goto ignore_parent;
+ }
+
+ parent = inode_parent (inode, 0, NULL);
+ if (!parent) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "cannot find parent for inode (gfid:%s)",
+ uuid_utoa (inode->gfid));
+ goto err;
+ }
+
+ignore_parent:
+ ret = inode_path (inode, NULL, &resolvedpath);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "cannot construct path for inode (gfid:%s)",
+ uuid_utoa (inode->gfid));
+ goto err;
+ }
+
+ ret = quota_loc_fill (loc, inode, parent, resolvedpath);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING, "cannot fill loc");
+ goto err;
+ }
+
+err:
+ if (parent) {
+ inode_unref (parent);
+ }
+
+ GF_FREE (resolvedpath);
- STACK_DESTROY (frame->root);
- return 0;
+ return ret;
}
-static void
-build_root_loc (xlator_t *this, loc_t *loc)
+int32_t
+quota_local_cleanup (xlator_t *this, quota_local_t *local)
{
- memset (loc, 0, sizeof (*loc));
- loc->path = "/";
+ if (local == NULL) {
+ goto out;
+ }
+
+ loc_wipe (&local->loc);
+ loc_wipe (&local->newloc);
+ loc_wipe (&local->oldloc);
+ loc_wipe (&local->validate_loc);
+
+ inode_unref (local->inode);
+ LOCK_DESTROY (&local->lock);
+
+ mem_put (local);
+out:
+ return 0;
}
-void
-gf_quota_usage_subtract (xlator_t *this, size_t size)
+static inline quota_local_t *
+quota_local_new ()
{
- struct quota_priv *priv = NULL;
+ quota_local_t *local = NULL;
+ local = mem_get0 (THIS->local_pool);
+ if (local)
+ LOCK_INIT (&local->lock);
+ return local;
+}
- priv = this->private;
- LOCK (&priv->lock);
- {
- if (priv->current_disk_usage < size)
- priv->current_disk_usage = 0;
- else
- priv->current_disk_usage -= size;
- }
- UNLOCK (&priv->lock);
+quota_dentry_t *
+__quota_dentry_new (quota_inode_ctx_t *ctx, char *name, uuid_t par)
+{
+ quota_dentry_t *dentry = NULL;
+ GF_UNUSED int32_t ret = 0;
+
+ QUOTA_ALLOC_OR_GOTO (dentry, quota_dentry_t, err);
+
+ INIT_LIST_HEAD (&dentry->next);
+
+ dentry->name = gf_strdup (name);
+ if (dentry->name == NULL) {
+ GF_FREE (dentry);
+ goto err;
+ }
+
+ uuid_copy (dentry->par, par);
+
+ list_add_tail (&dentry->next, &ctx->parents);
+err:
+ return dentry;
}
void
-gf_quota_usage_add (xlator_t *this, size_t size)
+__quota_dentry_free (quota_dentry_t *dentry)
{
- struct quota_priv *priv = this->private;
+ if (dentry == NULL) {
+ goto out;
+ }
- LOCK (&priv->lock);
- {
- priv->current_disk_usage += size;
- }
- UNLOCK (&priv->lock);
+ list_del_init (&dentry->next);
+
+ GF_FREE (dentry->name);
+ GF_FREE (dentry);
+out:
+ return;
}
-void
-gf_quota_update_current_free_disk (xlator_t *this)
+int32_t
+quota_validate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
- call_frame_t *frame = NULL;
- call_pool_t *pool = NULL;
- loc_t loc;
+ quota_local_t *local = NULL;
+ uint32_t validate_count = 0, link_count = 0;
+ int32_t ret = 0;
+ quota_inode_ctx_t *ctx = NULL;
+ int64_t *size = 0;
+ uint64_t value = 0;
+ call_stub_t *stub = NULL;
+
+ local = frame->local;
+
+ if (op_ret < 0) {
+ goto unwind;
+ }
+
+ GF_ASSERT (local);
+ GF_ASSERT (frame);
+ GF_VALIDATE_OR_GOTO_WITH_ERROR ("quota", this, unwind, op_errno,
+ EINVAL);
+ GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, dict, unwind, op_errno,
+ EINVAL);
+
+ ret = inode_ctx_get (local->validate_loc.inode, this, &value);
+
+ ctx = (quota_inode_ctx_t *)(unsigned long)value;
+ if ((ret == -1) || (ctx == NULL)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "quota context is not present in inode (gfid:%s)",
+ uuid_utoa (local->validate_loc.inode->gfid));
+ op_errno = EINVAL;
+ goto unwind;
+ }
+
+ ret = dict_get_bin (dict, QUOTA_SIZE_KEY, (void **) &size);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "size key not present in dict");
+ op_errno = EINVAL;
+ goto unwind;
+ }
- pool = this->ctx->pool;
- frame = create_frame (this, pool);
-
- build_root_loc (this, &loc);
+ local->just_validated = 1; /* so that we don't go into infinite
+ * loop of validation and checking
+ * limit when timeout is zero.
+ */
+ LOCK (&ctx->lock);
+ {
+ ctx->size = ntoh64 (*size);
+ gettimeofday (&ctx->tv, NULL);
+ }
+ UNLOCK (&ctx->lock);
+
+ quota_check_limit (frame, local->validate_loc.inode, this, NULL, NULL);
+ return 0;
+
+unwind:
+ LOCK (&local->lock);
+ {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
- STACK_WIND (frame, quota_statvfs_cbk,
- this->children->xlator,
- this->children->xlator->fops->statfs, &loc);
+ validate_count = --local->validate_count;
+ link_count = local->link_count;
- return ;
+ if ((validate_count == 0) && (link_count == 0)) {
+ stub = local->stub;
+ local->stub = NULL;
+ }
+ }
+ UNLOCK (&local->lock);
+
+ if (stub != NULL) {
+ call_resume (stub);
+ }
+
+ return 0;
}
-int
-gf_quota_check_free_disk (xlator_t *this)
-{
- struct quota_priv * priv = NULL;
- struct timeval tv = {0, 0};
-
- priv = this->private;
- if (priv->min_free_disk_limit) {
- gettimeofday (&tv, NULL);
- if (tv.tv_sec > (priv->refresh_interval +
- priv->min_disk_last_updated_time)) {
- priv->min_disk_last_updated_time = tv.tv_sec;
- gf_quota_update_current_free_disk (this);
- }
- if (priv->current_free_disk <= priv->min_free_disk_limit)
- return -1;
- }
+static inline uint64_t
+quota_time_elapsed (struct timeval *now, struct timeval *then)
+{
+ return (now->tv_sec - then->tv_sec);
+}
+
+
+int32_t
+quota_timeout (struct timeval *tv, int32_t timeout)
+{
+ struct timeval now = {0,};
+ int32_t timed_out = 0;
+
+ gettimeofday (&now, NULL);
+
+ if (quota_time_elapsed (&now, tv) >= timeout) {
+ timed_out = 1;
+ }
- return 0;
+ return timed_out;
}
-int
-quota_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+int32_t
+quota_check_limit (call_frame_t *frame, inode_t *inode, xlator_t *this,
+ char *name, uuid_t par)
{
- struct quota_priv *priv = this->private;
- struct quota_local *local = NULL;
+ int32_t ret = -1;
+ inode_t *_inode = NULL, *parent = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ quota_priv_t *priv = NULL;
+ quota_local_t *local = NULL;
+ char need_validate = 0, need_unwind = 0;
+ int64_t delta = 0;
+ call_stub_t *stub = NULL;
+ int32_t validate_count = 0, link_count = 0;
+ uint64_t value = 0;
+ char just_validated = 0;
+ uuid_t trav_uuid = {0,};
- local = frame->local;
+ GF_VALIDATE_OR_GOTO ("quota", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
- if ((op_ret >= 0) && priv->disk_usage_limit) {
- gf_quota_usage_subtract (this, (local->stbuf.st_blocks -
- buf->st_blocks) * 512);
- loc_wipe (&local->loc);
- }
+ local = frame->local;
+ GF_VALIDATE_OR_GOTO (this->name, local, out);
+
+ delta = local->delta;
+
+ GF_VALIDATE_OR_GOTO (this->name, local->stub, out);
+
+ priv = this->private;
+
+ inode_ctx_get (inode, this, &value);
+ ctx = (quota_inode_ctx_t *)(unsigned long)value;
+
+ _inode = inode_ref (inode);
+
+ LOCK (&local->lock);
+ {
+ just_validated = local->just_validated;
+ local->just_validated = 0;
+
+ if (just_validated) {
+ local->validate_count--;
+ }
+ }
+ UNLOCK (&local->lock);
+
+ if ( par != NULL ) {
+ uuid_copy (trav_uuid, par);
+ }
+
+ do {
+ if (ctx != NULL) {
+ LOCK (&ctx->lock);
+ {
+ if (ctx->limit >= 0) {
+ if (!just_validated
+ && quota_timeout (&ctx->tv,
+ priv->timeout)) {
+ need_validate = 1;
+ } else if ((ctx->size + delta)
+ >= ctx->limit) {
+ local->op_ret = -1;
+ local->op_errno = EDQUOT;
+ need_unwind = 1;
+ }
+ }
+ }
+ UNLOCK (&ctx->lock);
+
+ if (need_validate) {
+ goto validate;
+ }
+
+ if (need_unwind) {
+ break;
+ }
+ }
+
+ if (__is_root_gfid (_inode->gfid)) {
+ break;
+ }
+
+ parent = inode_parent (_inode, trav_uuid, name);
+
+ if (name != NULL) {
+ name = NULL;
+ uuid_clear (trav_uuid);
+ }
+
+ if (parent == NULL) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "cannot find parent for inode (gfid:%s), hence "
+ "aborting enforcing quota-limits and continuing"
+ " with the fop", uuid_utoa (_inode->gfid));
+ }
+
+ inode_unref (_inode);
+ _inode = parent;
+ just_validated = 0;
+
+ if (_inode == NULL) {
+ break;
+ }
+
+ value = 0;
+ inode_ctx_get (_inode, this, &value);
+ ctx = (quota_inode_ctx_t *)(unsigned long)value;
+ } while (1);
+
+ ret = 0;
+
+ if (_inode != NULL) {
+ inode_unref (_inode);
+ }
+
+ LOCK (&local->lock);
+ {
+ validate_count = local->validate_count;
+ link_count = local->link_count;
+ if ((validate_count == 0) && (link_count == 0)) {
+ stub = local->stub;
+ local->stub = NULL;
+ }
+ }
+ UNLOCK (&local->lock);
+
+ if (stub != NULL) {
+ call_resume (stub);
+ }
+
+out:
+ return ret;
+
+validate:
+ LOCK (&local->lock);
+ {
+ loc_wipe (&local->validate_loc);
+
+ if (just_validated) {
+ local->validate_count--;
+ }
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
+ local->validate_count++;
+ ret = quota_inode_loc_fill (_inode, &local->validate_loc);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "cannot fill loc for inode (gfid:%s), hence "
+ "aborting quota-checks and continuing with fop",
+ uuid_utoa (_inode->gfid));
+ local->validate_count--;
+ }
+ }
+ UNLOCK (&local->lock);
+
+ if (ret < 0) {
+ goto loc_fill_failed;
+ }
+
+ STACK_WIND (frame, quota_validate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, &local->validate_loc,
+ QUOTA_SIZE_KEY, NULL);
+
+loc_fill_failed:
+ inode_unref (_inode);
+ return 0;
}
-int
-quota_truncate_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+int32_t
+quota_get_limit_value (inode_t *inode, xlator_t *this, int64_t *n)
{
- struct quota_local *local = NULL;
- struct quota_priv *priv = NULL;
+ int32_t ret = 0;
+ char *path = NULL;
+ limits_t *limit_node = NULL;
+ quota_priv_t *priv = NULL;
+
+ if (inode == NULL || n == NULL) {
+ ret = -1;
+ goto out;
+ }
- priv = this->private;
- local = frame->local;
+ *n = 0;
- if (op_ret >= 0) {
- local->stbuf = *buf;
- }
+ ret = inode_path (inode, NULL, &path);
+ if (ret < 0) {
+ ret = -1;
+ goto out;
+ }
+
+ priv = this->private;
+
+ list_for_each_entry (limit_node, &priv->limit_head, limit_list) {
+ if (strcmp (limit_node->path, path) == 0) {
+ *n = limit_node->value;
+ break;
+ }
+ }
- STACK_WIND (frame, quota_truncate_cbk,
- FIRST_CHILD (this), FIRST_CHILD (this)->fops->truncate,
- &local->loc, local->offset);
- return 0;
+out:
+ GF_FREE (path);
+
+ return ret;
}
-int
-quota_truncate (call_frame_t *frame, xlator_t *this,
- loc_t *loc, off_t offset)
+static int32_t
+__quota_init_inode_ctx (inode_t *inode, int64_t limit, xlator_t *this,
+ dict_t *dict, struct iatt *buf,
+ quota_inode_ctx_t **context)
{
- struct quota_local *local = NULL;
- struct quota_priv *priv = NULL;
+ int32_t ret = -1;
+ int64_t *size = 0;
+ quota_inode_ctx_t *ctx = NULL;
- priv = this->private;
+ if (inode == NULL) {
+ goto out;
+ }
- if (priv->disk_usage_limit) {
- local = CALLOC (1, sizeof (struct quota_local));
- frame->local = local;
+ QUOTA_ALLOC_OR_GOTO (ctx, quota_inode_ctx_t, out);
- loc_copy (&local->loc, loc);
- local->offset = offset;
+ ctx->limit = limit;
+ if (buf)
+ ctx->buf = *buf;
- STACK_WIND (frame, quota_truncate_stat_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat, loc);
- return 0;
- }
+ LOCK_INIT(&ctx->lock);
- STACK_WIND (frame, quota_truncate_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate,
- loc, offset);
- return 0;
+ if (context != NULL) {
+ *context = ctx;
+ }
+
+ INIT_LIST_HEAD (&ctx->parents);
+
+ if (dict != NULL) {
+ ret = dict_get_bin (dict, QUOTA_SIZE_KEY, (void **) &size);
+ if (ret == 0) {
+ ctx->size = ntoh64 (*size);
+ gettimeofday (&ctx->tv, NULL);
+ }
+ }
+
+ ret = __inode_ctx_put (inode, this, (uint64_t )(long)ctx);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "cannot set quota context in inode (gfid:%s)",
+ uuid_utoa (inode->gfid));
+ }
+out:
+ return ret;
}
-int
-quota_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+static int32_t
+quota_inode_ctx_get (inode_t *inode, int64_t limit, xlator_t *this,
+ dict_t *dict, struct iatt *buf, quota_inode_ctx_t **ctx,
+ char create_if_absent)
+{
+ int32_t ret = 0;
+ uint64_t ctx_int;
+
+ LOCK (&inode->lock);
+ {
+ ret = __inode_ctx_get (inode, this, &ctx_int);
+
+ if ((ret == 0) && (ctx != NULL)) {
+ *ctx = (quota_inode_ctx_t *) (unsigned long)ctx_int;
+ } else if (create_if_absent) {
+ ret = __quota_init_inode_ctx (inode, limit, this, dict,
+ buf, ctx);
+ }
+ }
+ UNLOCK (&inode->lock);
+
+ return ret;
+}
+
+
+int32_t
+quota_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *dict, struct iatt *postparent)
{
- struct quota_priv *priv = NULL;
- struct quota_local *local = NULL;
+ int32_t ret = -1;
+ char found = 0;
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ quota_dentry_t *dentry = NULL;
+ int64_t *size = 0;
+ uint64_t value = 0;
+ limits_t *limit_node = NULL;
+ quota_priv_t *priv = NULL;
+
+ local = frame->local;
+
+ priv = this->private;
+
+ inode_ctx_get (inode, this, &value);
+ ctx = (quota_inode_ctx_t *)(unsigned long)value;
+
+ if ((op_ret < 0) || (local == NULL)
+ || (((ctx == NULL) || (ctx->limit == local->limit))
+ && (local->limit < 0) && !((IA_ISREG (buf->ia_type))
+ || (IA_ISLNK (buf->ia_type))))) {
+ goto unwind;
+ }
- local = frame->local;
- priv = this->private;
+ LOCK (&priv->lock);
+ {
+ list_for_each_entry (limit_node, &priv->limit_head,
+ limit_list) {
+ if (strcmp (local->loc.path, limit_node->path) == 0) {
+ uuid_copy (limit_node->gfid, buf->ia_gfid);
+ break;
+ }
+ }
+ }
+ UNLOCK (&priv->lock);
+
+ ret = quota_inode_ctx_get (local->loc.inode, local->limit, this, dict,
+ buf, &ctx, 1);
+ if ((ret == -1) || (ctx == NULL)) {
+ gf_log (this->name, GF_LOG_WARNING, "cannot create quota "
+ "context in inode(gfid:%s)",
+ uuid_utoa (local->loc.inode->gfid));
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
- if ((op_ret >= 0) && priv->disk_usage_limit) {
- gf_quota_usage_subtract (this, (local->stbuf.st_blocks -
- buf->st_blocks) * 512);
- fd_unref (local->fd);
- }
+ LOCK (&ctx->lock);
+ {
+
+ if (dict != NULL) {
+ ret = dict_get_bin (dict, QUOTA_SIZE_KEY,
+ (void **) &size);
+ if (ret == 0) {
+ ctx->size = ntoh64 (*size);
+ gettimeofday (&ctx->tv, NULL);
+ }
+ }
+
+ if (local->limit != ctx->limit) {
+ ctx->limit = local->limit;
+ }
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
+ ctx->buf = *buf;
+
+ if (!(IA_ISREG (buf->ia_type) || IA_ISLNK (buf->ia_type))) {
+ goto unlock;
+ }
+
+ if (local->loc.name == NULL)
+ goto unlock;
+
+ list_for_each_entry (dentry, &ctx->parents, next) {
+ if ((strcmp (dentry->name, local->loc.name) == 0) &&
+ (uuid_compare (local->loc.parent->gfid,
+ dentry->par) == 0)) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (!found) {
+ dentry = __quota_dentry_new (ctx,
+ (char *)local->loc.name,
+ local->loc.parent->gfid);
+ if (dentry == NULL) {
+ /*
+ gf_log (this->name, GF_LOG_WARNING,
+ "cannot create a new dentry (par:%"
+ PRId64", name:%s) for inode(ino:%"
+ PRId64", gfid:%s)",
+ uuid_utoa (local->loc.inode->gfid));
+ */
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+ }
+unlock:
+ UNLOCK (&ctx->lock);
+
+unwind:
+ QUOTA_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, buf,
+ dict, postparent);
+ return 0;
}
-int
-quota_ftruncate_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+int32_t
+quota_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xattr_req)
{
- struct quota_local *local = NULL;
- struct quota_priv *priv = NULL;
+ int32_t ret = -1;
+ int64_t limit = -1;
+ limits_t *limit_node = NULL;
+ gf_boolean_t dict_newed = _gf_false;
+ quota_priv_t *priv = NULL;
+ quota_local_t *local = NULL;
+
+ priv = this->private;
+
+ list_for_each_entry (limit_node, &priv->limit_head, limit_list) {
+ if (strcmp (limit_node->path, loc->path) == 0) {
+ limit = limit_node->value;
+ }
+ }
- priv = this->private;
- local = frame->local;
+ local = quota_local_new ();
+ if (local == NULL) {
+ goto err;
+ }
- if (op_ret >= 0) {
- local->stbuf = *buf;
- }
+ ret = loc_copy (&local->loc, loc);
+ if (ret == -1) {
+ goto err;
+ }
+
+ frame->local = local;
+
+ local->limit = limit;
+
+ if (limit < 0) {
+ goto wind;
+ }
+
+ if (xattr_req == NULL) {
+ xattr_req = dict_new ();
+ dict_newed = _gf_true;
+ }
+
+ ret = dict_set_uint64 (xattr_req, QUOTA_SIZE_KEY, 0);
+ if (ret < 0) {
+ goto err;
+ }
+
+wind:
+ STACK_WIND (frame, quota_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
+
+ ret = 0;
+
+err:
+ if (ret < 0) {
+ QUOTA_STACK_UNWIND (lookup, frame, -1, ENOMEM,
+ NULL, NULL, NULL, NULL);
+ }
+
+ if (dict_newed == _gf_true) {
+ dict_unref (xattr_req);
+ }
- STACK_WIND (frame, quota_ftruncate_cbk,
- FIRST_CHILD (this), FIRST_CHILD (this)->fops->ftruncate,
- local->fd, local->offset);
- return 0;
+ return 0;
}
-int
-quota_ftruncate (call_frame_t *frame, xlator_t *this,
- fd_t *fd, off_t offset)
+void
+quota_update_size (xlator_t *this, inode_t *inode, char *name, uuid_t par,
+ int64_t delta)
{
- struct quota_local *local = NULL;
- struct quota_priv *priv = NULL;
+ inode_t *_inode = NULL;
+ inode_t *parent = NULL;
+ uint64_t value = 0;
+ quota_inode_ctx_t *ctx = NULL;
+ uuid_t trav_uuid = {0,};
+
+ GF_VALIDATE_OR_GOTO ("quota", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+ inode_ctx_get (inode, this, &value);
+ ctx = (quota_inode_ctx_t *)(unsigned long)value;
- priv = this->private;
+ _inode = inode_ref (inode);
- if (priv->disk_usage_limit) {
- local = CALLOC (1, sizeof (struct quota_local));
- frame->local = local;
+ if ( par != NULL ) {
+ uuid_copy (trav_uuid, par);
+ }
- local->fd = fd_ref (fd);
- local->offset = offset;
+ do {
+ if ((ctx != NULL) && (ctx->limit >= 0)) {
+ LOCK (&ctx->lock);
+ {
+ ctx->size += delta;
+ }
+ UNLOCK (&ctx->lock);
+ }
- STACK_WIND (frame, quota_ftruncate_fstat_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat, fd);
- return 0;
- }
+ if (__is_root_gfid (_inode->gfid)) {
+ break;
+ }
+
+ parent = inode_parent (_inode, trav_uuid, name);
+ if (parent == NULL) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "cannot find parent for inode (gfid:%s), hence "
+ "aborting size updation of parents",
+ uuid_utoa (_inode->gfid));
+ }
+
+ if (name != NULL) {
+ name = NULL;
+ uuid_clear (trav_uuid);
+ }
+
+ inode_unref (_inode);
+ _inode = parent;
+
+ if (_inode == NULL) {
+ break;
+ }
- STACK_WIND (frame, quota_ftruncate_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate,
- fd, offset);
- return 0;
+ inode_ctx_get (_inode, this, &value);
+ ctx = (quota_inode_ctx_t *)(unsigned long)value;
+ } while (1);
+
+out:
+ return;
}
-int
-quota_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct stat *buf)
+int32_t
+quota_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- struct quota_priv *priv = NULL;
+ int32_t ret = 0;
+ uint64_t ctx_int = 0;
+ quota_inode_ctx_t *ctx = NULL;
+ quota_local_t *local = NULL;
+ quota_dentry_t *dentry = NULL;
+ int64_t delta = 0;
- priv = this->private;
+ local = frame->local;
- if ((op_ret >= 0) && priv->disk_usage_limit) {
- gf_quota_usage_add (this, buf->st_blocks * 512);
- }
+ if ((op_ret < 0) || (local == NULL)) {
+ goto out;
+ }
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
- return 0;
+ ret = inode_ctx_get (local->loc.inode, this, &ctx_int);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to get the context", local->loc.path);
+ goto out;
+ }
+
+ ctx = (quota_inode_ctx_t *)(unsigned long) ctx_int;
+
+ if (ctx == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "quota context not set in %s (gfid:%s)",
+ local->loc.path, uuid_utoa (local->loc.inode->gfid));
+ goto out;
+ }
+
+ LOCK (&ctx->lock);
+ {
+ ctx->buf = *postbuf;
+ }
+ UNLOCK (&ctx->lock);
+
+ list_for_each_entry (dentry, &ctx->parents, next) {
+ delta = (postbuf->ia_blocks - prebuf->ia_blocks) * 512;
+ quota_update_size (this, local->loc.inode,
+ dentry->name, dentry->par, delta);
+ }
+
+out:
+ QUOTA_STACK_UNWIND (writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+
+ return 0;
}
-int
-quota_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t rdev)
+int32_t
+quota_writev_helper (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t off,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
- struct quota_priv *priv = NULL;
+ quota_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
- priv = this->private;
+ local = frame->local;
+ if (local == NULL) {
+ gf_log (this->name, GF_LOG_WARNING, "local is NULL");
+ goto unwind;
+ }
- if (gf_quota_check_free_disk (this) == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "min-free-disk limit (%u) crossed, current available is %u",
- priv->min_free_disk_limit, priv->current_free_disk);
- STACK_UNWIND (frame, -1, ENOSPC, NULL, NULL);
- return 0;
- }
+ if (local->op_ret == -1) {
+ op_errno = local->op_errno;
+ goto unwind;
+ }
- if (priv->current_disk_usage > priv->disk_usage_limit) {
- gf_log (this->name, GF_LOG_ERROR,
- "Disk usage limit (%"PRIu64") crossed, current usage is %"PRIu64"",
- priv->disk_usage_limit, priv->current_disk_usage);
- STACK_UNWIND (frame, -1, ENOSPC, NULL, NULL);
- return 0;
+ STACK_WIND (frame, quota_writev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, vector, count, off,
+ flags, iobref, xdata);
+ return 0;
+
+unwind:
+ QUOTA_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+
+int32_t
+quota_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t off,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
+{
+ int32_t ret = -1, op_errno = EINVAL;
+ int32_t parents = 0;
+ uint64_t size = 0;
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ quota_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quota_dentry_t *dentry = NULL;
+
+ GF_ASSERT (frame);
+ GF_VALIDATE_OR_GOTO ("quota", this, unwind);
+ GF_VALIDATE_OR_GOTO (this->name, fd, unwind);
+
+ local = quota_local_new ();
+ if (local == NULL) {
+ goto unwind;
+ }
+
+ frame->local = local;
+ local->loc.inode = inode_ref (fd->inode);
+
+ ret = quota_inode_ctx_get (fd->inode, -1, this, NULL, NULL, &ctx, 0);
+ if (ctx == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "quota context not set in inode (gfid:%s)",
+ uuid_utoa (fd->inode->gfid));
+ goto unwind;
+ }
+
+ stub = fop_writev_stub (frame, quota_writev_helper, fd, vector, count,
+ off, flags, iobref, xdata);
+ if (stub == NULL) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, priv, unwind);
+
+ size = iov_length (vector, count);
+ LOCK (&ctx->lock);
+ {
+ list_for_each_entry (dentry, &ctx->parents, next) {
+ parents++;
+ }
+ }
+ UNLOCK (&ctx->lock);
+
+ local->delta = size;
+ local->stub = stub;
+ local->link_count = parents;
+
+ list_for_each_entry (dentry, &ctx->parents, next) {
+ ret = quota_check_limit (frame, fd->inode, this, dentry->name,
+ dentry->par);
+ if (ret == -1) {
+ break;
+ }
+ }
+
+ stub = NULL;
+
+ LOCK (&local->lock);
+ {
+ local->link_count = 0;
+ if (local->validate_count == 0) {
+ stub = local->stub;
+ local->stub = NULL;
+ }
+ }
+ UNLOCK (&local->lock);
+
+ if (stub != NULL) {
+ call_resume (stub);
}
- STACK_WIND (frame, quota_mknod_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mknod,
- loc, mode, rdev);
- return 0;
+ return 0;
+
+unwind:
+ QUOTA_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
-int
+int32_t
quota_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct stat *buf)
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- struct quota_priv *priv = NULL;
+ QUOTA_STACK_UNWIND (mkdir, frame, op_ret, op_errno, inode,
+ buf, preparent, postparent, xdata);
+ return 0;
+}
- priv = this->private;
- if ((op_ret >= 0) && priv->disk_usage_limit) {
- gf_quota_usage_subtract (this, buf->st_blocks * 512);
- }
+int32_t
+quota_mkdir_helper (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ mode_t mode, mode_t umask, dict_t *xdata)
+{
+ quota_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+
+ local = frame->local;
+ if (local == NULL) {
+ gf_log (this->name, GF_LOG_WARNING, "local is NULL");
+ goto unwind;
+ }
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
- return 0;
+ op_errno = local->op_errno;
+
+ if (local->op_ret == -1) {
+ goto unwind;
+ }
+
+ STACK_WIND (frame, quota_mkdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata);
+ return 0;
+
+unwind:
+ QUOTA_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL,
+ NULL, NULL, NULL);
+ return 0;
}
-int
-quota_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode)
+int32_t
+quota_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
{
- struct quota_priv *priv = NULL;
+ int32_t ret = 0, op_errno = 0;
+ quota_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+
+ local = quota_local_new ();
+ if (local == NULL) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- priv = this->private;
+ frame->local = local;
- if (gf_quota_check_free_disk (this) == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "min-free-disk limit (%u) crossed, current available is %u",
- priv->min_free_disk_limit, priv->current_free_disk);
- STACK_UNWIND (frame, -1, ENOSPC, NULL, NULL);
- return 0;
-
- }
+ local->link_count = 1;
+
+ ret = loc_copy (&local->loc, loc);
+ if (ret) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_WARNING, "loc_copy failed");
+ goto err;
+ }
+
+ stub = fop_mkdir_stub (frame, quota_mkdir_helper, loc, mode, umask,
+ xdata);
+ if (stub == NULL) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->stub = stub;
+ local->delta = 0;
+
+ quota_check_limit (frame, loc->parent, this, NULL, NULL);
- if (priv->current_disk_usage > priv->disk_usage_limit) {
- gf_log (this->name, GF_LOG_ERROR,
- "Disk usage limit (%"PRIu64") crossed, current usage is %"PRIu64"",
- priv->disk_usage_limit, priv->current_disk_usage);
- STACK_UNWIND (frame, -1, ENOSPC, NULL, NULL);
- return 0;
+ stub = NULL;
+
+ LOCK (&local->lock);
+ {
+ if (local->validate_count == 0) {
+ stub = local->stub;
+ local->stub = NULL;
+ }
+
+ local->link_count = 0;
+ }
+ UNLOCK (&local->lock);
+
+ if (stub != NULL) {
+ call_resume (stub);
}
- STACK_WIND (frame, quota_mkdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mkdir,
- loc, mode);
+ return 0;
+err:
+ QUOTA_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL);
- return 0;
+ return 0;
}
-int
+int32_t
+quota_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ int32_t ret = -1;
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ quota_dentry_t *dentry = NULL;
+
+ local = frame->local;
+ if (op_ret < 0) {
+ goto unwind;
+ }
+
+ ret = quota_inode_ctx_get (inode, -1, this, NULL, buf, &ctx, 1);
+ if ((ret == -1) || (ctx == NULL)) {
+ gf_log (this->name, GF_LOG_WARNING, "cannot create quota "
+ "context in inode(gfid:%s)",
+ uuid_utoa (inode->gfid));
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ LOCK (&ctx->lock);
+ {
+ ctx->buf = *buf;
+
+ dentry = __quota_dentry_new (ctx, (char *)local->loc.name,
+ local->loc.parent->gfid);
+ if (dentry == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "cannot create a new dentry (name:%s) for "
+ "inode(gfid:%s)", local->loc.name,
+ uuid_utoa (local->loc.inode->gfid));
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+unlock:
+ UNLOCK (&ctx->lock);
+
+unwind:
+ QUOTA_STACK_UNWIND (create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+
+int32_t
+quota_create_helper (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flags, mode_t mode, mode_t umask, fd_t *fd,
+ dict_t *xdata)
+{
+ quota_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+
+ local = frame->local;
+ if (local == NULL) {
+ gf_log (this->name, GF_LOG_WARNING, "local is NULL");
+ goto unwind;
+ }
+
+ if (local->op_ret == -1) {
+ op_errno = local->op_errno;
+ goto unwind;
+ }
+
+ STACK_WIND (frame, quota_create_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create, loc, flags, mode, umask,
+ fd, xdata);
+ return 0;
+
+unwind:
+ QUOTA_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL,
+ NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+
+int32_t
+quota_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ int32_t ret = -1;
+ quota_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+
+ local = quota_local_new ();
+ if (local == NULL) {
+ goto err;
+ }
+
+ frame->local = local;
+
+ ret = loc_copy (&local->loc, loc);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "loc_copy failed");
+ goto err;
+ }
+
+ stub = fop_create_stub (frame, quota_create_helper, loc, flags, mode,
+ umask, fd, xdata);
+ if (stub == NULL) {
+ goto err;
+ }
+
+ local->link_count = 1;
+ local->stub = stub;
+ local->delta = 0;
+
+ quota_check_limit (frame, loc->parent, this, NULL, NULL);
+
+ stub = NULL;
+
+ LOCK (&local->lock);
+ {
+ local->link_count = 0;
+ if (local->validate_count == 0) {
+ stub = local->stub;
+ local->stub = NULL;
+ }
+ }
+ UNLOCK (&local->lock);
+
+ if (stub != NULL) {
+ call_resume (stub);
+ }
+
+ return 0;
+err:
+ QUOTA_STACK_UNWIND (create, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,
+ NULL, NULL);
+
+ return 0;
+}
+
+
+int32_t
quota_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- struct quota_local *local = NULL;
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ uint64_t value = 0;
- local = frame->local;
+ if (op_ret < 0) {
+ goto out;
+ }
- if (local) {
- if (op_ret >= 0) {
- gf_quota_usage_subtract (this,
- local->stbuf.st_blocks * 512);
- }
- loc_wipe (&local->loc);
- }
+ local = (quota_local_t *) frame->local;
+
+ inode_ctx_get (local->loc.inode, this, &value);
+ ctx = (quota_inode_ctx_t *)(unsigned long)value;
+
+ if (ctx == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "quota context not set in inode (gfid:%s)",
+ uuid_utoa (local->loc.inode->gfid));
+ goto out;
+ }
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
+ quota_update_size (this, local->loc.inode, (char *)local->loc.name,
+ local->loc.parent->gfid,
+ (-(ctx->buf.ia_blocks * 512)));
+
+out:
+ QUOTA_STACK_UNWIND (unlink, frame, op_ret, op_errno, preparent,
+ postparent, xdata);
+ return 0;
}
-int
-quota_unlink_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+int32_t
+quota_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
{
- struct quota_local *local = NULL;
+ int32_t ret = 0;
+ quota_local_t *local = NULL;
- local = frame->local;
+ local = quota_local_new ();
+ if (local == NULL) {
+ goto err;
+ }
- if (op_ret >= 0) {
- if (buf->st_nlink == 1) {
- local->stbuf = *buf;
- }
- }
+ frame->local = local;
+
+ ret = loc_copy (&local->loc, loc);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "loc_copy failed");
+ goto err;
+ }
- STACK_WIND (frame, quota_unlink_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink,
- &local->loc);
+ STACK_WIND (frame, quota_unlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
- return 0;
+ ret = 0;
+
+err:
+ if (ret == -1) {
+ QUOTA_STACK_UNWIND (unlink, frame, -1, 0, NULL, NULL, NULL);
+ }
+
+ return 0;
}
-int
-quota_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
+int32_t
+quota_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ int32_t ret = -1;
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ quota_dentry_t *dentry = NULL;
+ char found = 0;
+
+ if (op_ret < 0) {
+ goto out;
+ }
+
+ local = (quota_local_t *) frame->local;
+
+ quota_update_size (this, local->loc.parent, NULL, NULL,
+ (buf->ia_blocks * 512));
+
+ ret = quota_inode_ctx_get (inode, -1, this, NULL, NULL, &ctx, 0);
+ if ((ret == -1) || (ctx == NULL)) {
+ gf_log (this->name, GF_LOG_WARNING, "cannot find quota "
+ "context in %s (gfid:%s)", local->loc.path,
+ uuid_utoa (inode->gfid));
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ LOCK (&ctx->lock);
+ {
+ list_for_each_entry (dentry, &ctx->parents, next) {
+ if ((strcmp (dentry->name, local->loc.name) == 0) &&
+ (uuid_compare (local->loc.parent->gfid,
+ dentry->par) == 0)) {
+ found = 1;
+ gf_log (this->name, GF_LOG_WARNING,
+ "new entry being linked (name:%s) for "
+ "inode (gfid:%s) is already present "
+ "in inode-dentry-list", dentry->name,
+ uuid_utoa (local->loc.inode->gfid));
+ break;
+ }
+ }
+
+ if (!found) {
+ dentry = __quota_dentry_new (ctx,
+ (char *)local->loc.name,
+ local->loc.parent->gfid);
+ if (dentry == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "cannot create a new dentry (name:%s) "
+ "for inode(gfid:%s)", local->loc.name,
+ uuid_utoa (local->loc.inode->gfid));
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+
+ ctx->buf = *buf;
+ }
+unlock:
+ UNLOCK (&ctx->lock);
+
+out:
+ QUOTA_STACK_UNWIND (link, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+
+ return 0;
+}
+
+
+int32_t
+quota_link_helper (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
{
- struct quota_local *local = NULL;
- struct quota_priv *priv = NULL;
+ quota_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
- priv = this->private;
+ local = frame->local;
+ if (local == NULL) {
+ gf_log (this->name, GF_LOG_WARNING, "local is NULL");
+ goto unwind;
+ }
- if (priv->disk_usage_limit) {
- local = CALLOC (1, sizeof (struct quota_local));
- frame->local = local;
+ op_errno = local->op_errno;
- loc_copy (&local->loc, loc);
+ if (local->op_ret == -1) {
+ goto unwind;
+ }
- STACK_WIND (frame,
- quota_unlink_stat_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat,
- loc);
- return 0;
- }
+ STACK_WIND (frame, quota_link_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
+ return 0;
- STACK_WIND (frame, quota_unlink_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink,
- loc);
- return 0;
+unwind:
+ QUOTA_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL,
+ NULL, NULL, NULL);
+ return 0;
}
-int
-quota_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+int32_t
+quota_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
- struct quota_local *local = NULL;
+ int32_t ret = -1, op_errno = ENOMEM;
+ quota_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+
+ local = quota_local_new ();
+ if (local == NULL) {
+ goto err;
+ }
- local = frame->local;
+ frame->local = (void *) local;
- if (local) {
- if (op_ret >= 0) {
- gf_quota_usage_subtract (this, local->stbuf.st_blocks * 512);
- }
- loc_wipe (&local->loc);
- }
+ ret = loc_copy (&local->loc, newloc);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING, "loc_copy failed");
+ goto err;
+ }
+
+ stub = fop_link_stub (frame, quota_link_helper, oldloc, newloc, xdata);
+ if (stub == NULL) {
+ goto err;
+ }
+
+ local->link_count = 1;
+ local->stub = stub;
+
+ ret = quota_inode_ctx_get (oldloc->inode, -1, this, NULL, NULL, &ctx,
+ 0);
+ if (ctx == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "quota context not set in inode (gfid:%s)",
+ oldloc->inode ? uuid_utoa (oldloc->inode->gfid) : "0");
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->delta = ctx->buf.ia_blocks * 512;
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
+ quota_check_limit (frame, newloc->parent, this, NULL, NULL);
+
+ stub = NULL;
+
+ LOCK (&local->lock);
+ {
+ if (local->validate_count == 0) {
+ stub = local->stub;
+ local->stub = NULL;
+ }
+
+ local->link_count = 0;
+ }
+ UNLOCK (&local->lock);
+
+ if (stub != NULL) {
+ call_resume (stub);
+ }
+
+ ret = 0;
+err:
+ if (ret < 0) {
+ QUOTA_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL,
+ NULL, NULL, NULL);
+ }
+
+ return 0;
}
-int
-quota_rmdir_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+int32_t
+quota_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
- struct quota_local *local = NULL;
+ int32_t ret = -1;
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ quota_dentry_t *old_dentry = NULL, *dentry = NULL;
+ char new_dentry_found = 0;
+ int64_t size = 0;
+
+ if (op_ret < 0) {
+ goto out;
+ }
- local = frame->local;
+ local = frame->local;
+ if (local == NULL) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ gf_log (this->name, GF_LOG_WARNING, "local is NULL");
+ goto out;
+ }
- if (op_ret >= 0) {
- local->stbuf = *buf;
- }
+ if (IA_ISREG (local->oldloc.inode->ia_type)
+ || IA_ISLNK (local->oldloc.inode->ia_type)) {
+ size = buf->ia_blocks * 512;
+ }
+
+ if (local->oldloc.parent != local->newloc.parent) {
+ quota_update_size (this, local->oldloc.parent, NULL, NULL, (-size));
+ quota_update_size (this, local->newloc.parent, NULL, NULL, size);
+ }
+
+ if (!(IA_ISREG (local->oldloc.inode->ia_type)
+ || IA_ISLNK (local->oldloc.inode->ia_type))) {
+ goto out;
+ }
+
+ ret = quota_inode_ctx_get (local->oldloc.inode, -1, this, NULL, NULL,
+ &ctx, 0);
+ if ((ret == -1) || (ctx == NULL)) {
+ gf_log (this->name, GF_LOG_WARNING, "quota context not"
+ "set in inode(gfid:%s)",
+ uuid_utoa (local->oldloc.inode->gfid));
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ LOCK (&ctx->lock);
+ {
+ /* decision of whether to create a context in newloc->inode
+ * depends on fuse_rename_cbk's choice of inode it retains
+ * after rename. currently it just associates oldloc->inode
+ * with new parent and name. If this changes, following code
+ * should be changed to set a new context in newloc->inode.
+ */
+ list_for_each_entry (dentry, &ctx->parents, next) {
+ if ((strcmp (dentry->name, local->oldloc.name) == 0) &&
+ (uuid_compare (local->oldloc.parent->gfid,
+ dentry->par) == 0)) {
+ old_dentry = dentry;
+ } else if ((strcmp (dentry->name,
+ local->newloc.name) == 0) &&
+ (uuid_compare (local->oldloc.parent->gfid,
+ dentry->par) == 0)) {
+ new_dentry_found = 1;
+ gf_log (this->name, GF_LOG_WARNING,
+ "new entry being linked (name:%s) for "
+ "inode (gfid:%s) is already present "
+ "in inode-dentry-list", dentry->name,
+ uuid_utoa (local->newloc.inode->gfid));
+ break;
+ }
+ }
+
+ if (old_dentry != NULL) {
+ __quota_dentry_free (old_dentry);
+ } else {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dentry corresponding to the path just renamed "
+ "(name:%s) is not present", local->oldloc.name);
+ }
+
+ if (!new_dentry_found) {
+ dentry = __quota_dentry_new (ctx,
+ (char *)local->newloc.name,
+ local->newloc.parent->gfid);
+ if (dentry == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "cannot create a new dentry (name:%s) "
+ "for inode(gfid:%s)", local->newloc.name,
+ uuid_utoa (local->newloc.inode->gfid));
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+
+ ctx->buf = *buf;
+ }
+unlock:
+ UNLOCK (&ctx->lock);
- STACK_WIND (frame, quota_rmdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rmdir,
- &local->loc);
+out:
+ QUOTA_STACK_UNWIND (rename, frame, op_ret, op_errno, buf, preoldparent,
+ postoldparent, prenewparent, postnewparent, xdata);
- return 0;
+ return 0;
}
-int
-quota_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc)
+int32_t
+quota_rename_helper (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
{
- struct quota_local *local = NULL;
- struct quota_priv *priv = NULL;
+ quota_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
- priv = this->private;
+ local = frame->local;
+ if (local == NULL) {
+ gf_log (this->name, GF_LOG_WARNING, "local is NULL");
+ goto unwind;
+ }
- if (priv->disk_usage_limit) {
- local = CALLOC (1, sizeof (struct quota_local));
- frame->local = local;
+ op_errno = local->op_errno;
- loc_copy (&local->loc, loc);
+ if (local->op_ret == -1) {
+ goto unwind;
+ }
- STACK_WIND (frame, quota_rmdir_stat_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat, loc);
- return 0;
- }
+ STACK_WIND (frame, quota_rename_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
+ return 0;
- STACK_WIND (frame, quota_rmdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rmdir,
- loc);
- return 0;
+unwind:
+ QUOTA_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL,
+ NULL, NULL, NULL, NULL);
+ return 0;
}
-int
+int32_t
+quota_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
+{
+ int32_t ret = -1, op_errno = ENOMEM;
+ quota_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+
+ local = quota_local_new ();
+ if (local == NULL) {
+ goto err;
+ }
+
+ frame->local = local;
+
+ ret = loc_copy (&local->oldloc, oldloc);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING, "loc_copy failed");
+ goto err;
+ }
+
+ ret = loc_copy (&local->newloc, newloc);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING, "loc_copy failed");
+ goto err;
+ }
+
+ stub = fop_rename_stub (frame, quota_rename_helper, oldloc, newloc,
+ xdata);
+ if (stub == NULL) {
+ goto err;
+ }
+
+ local->link_count = 1;
+ local->stub = stub;
+
+ if (IA_ISREG (oldloc->inode->ia_type)
+ || IA_ISLNK (oldloc->inode->ia_type)) {
+ ret = quota_inode_ctx_get (oldloc->inode, -1, this, NULL, NULL,
+ &ctx, 0);
+ if (ctx == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "quota context not set in inode (gfid:%s)",
+ oldloc->inode ? uuid_utoa (oldloc->inode->gfid)
+ : "0");
+ op_errno = EINVAL;
+ goto err;
+ }
+ local->delta = ctx->buf.ia_blocks * 512;
+ } else {
+ local->delta = 0;
+ }
+
+ quota_check_limit (frame, newloc->parent, this, NULL, NULL);
+
+ stub = NULL;
+
+ LOCK (&local->lock);
+ {
+ if (local->validate_count == 0) {
+ stub = local->stub;
+ local->stub = NULL;
+ }
+
+ local->link_count = 0;
+ }
+ UNLOCK (&local->lock);
+
+ if (stub != NULL) {
+ call_resume (stub);
+ }
+
+ ret = 0;
+err:
+ if (ret == -1) {
+ QUOTA_STACK_UNWIND (rename, frame, -1, op_errno, NULL,
+ NULL, NULL, NULL, NULL, NULL);
+ }
+
+ return 0;
+}
+
+
+int32_t
quota_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct stat *buf)
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- struct quota_priv *priv = NULL;
+ int64_t size = 0;
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ quota_dentry_t *dentry = NULL;
+
+ if (op_ret < 0) {
+ goto out;
+ }
- priv = this->private;
+ local = frame->local;
+ size = buf->ia_blocks * 512;
- if ((op_ret >= 0) && priv->disk_usage_limit) {
- gf_quota_usage_add (this, buf->st_blocks * 512);
- }
+ quota_update_size (this, local->loc.parent, NULL, NULL, size);
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
- return 0;
+ quota_inode_ctx_get (local->loc.inode, -1, this, NULL, NULL,
+ &ctx, 1);
+ if (ctx == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "quota context not set in inode (gfid:%s)",
+ uuid_utoa (local->loc.inode->gfid));
+ goto out;
+ }
+
+ LOCK (&ctx->lock);
+ {
+ ctx->buf = *buf;
+
+ dentry = __quota_dentry_new (ctx, (char *)local->loc.name,
+ local->loc.parent->gfid);
+ if (dentry == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "cannot create a new dentry (name:%s) for "
+ "inode(gfid:%s)", local->loc.name,
+ uuid_utoa (local->loc.inode->gfid));
+ op_ret = -1;
+ op_errno = ENOMEM;
+ }
+ }
+ UNLOCK (&ctx->lock);
+
+out:
+ QUOTA_STACK_UNWIND (symlink, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+
+ return 0;
}
int
-quota_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkpath, loc_t *loc)
+quota_symlink_helper (call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
- struct quota_priv *priv = NULL;
+ quota_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
- priv = this->private;
+ local = frame->local;
+ if (local == NULL) {
+ gf_log (this->name, GF_LOG_WARNING, "local is NULL");
+ goto unwind;
+ }
- if (gf_quota_check_free_disk (this) == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "min-free-disk limit (%u) crossed, current available is %u",
- priv->min_free_disk_limit, priv->current_free_disk);
- STACK_UNWIND (frame, -1, ENOSPC, NULL, NULL);
- return 0;
-
- }
- if (priv->current_disk_usage > priv->disk_usage_limit) {
- gf_log (this->name, GF_LOG_ERROR,
- "Disk usage limit (%"PRIu64") crossed, current usage is %"PRIu64"",
- priv->disk_usage_limit, priv->current_disk_usage);
- STACK_UNWIND (frame, -1, ENOSPC, NULL, NULL);
- return 0;
+ if (local->op_ret == -1) {
+ op_errno = local->op_errno;
+ goto unwind;
}
- STACK_WIND (frame, quota_symlink_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->symlink,
- linkpath, loc);
- return 0;
+ STACK_WIND (frame, quota_symlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->symlink, linkpath, loc, umask,
+ xdata);
+ return 0;
+
+unwind:
+ QUOTA_STACK_UNWIND (symlink, frame, -1, op_errno, NULL, NULL,
+ NULL, NULL, NULL);
+ return 0;
}
int
-quota_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- fd_t *fd, inode_t *inode, struct stat *buf)
+quota_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
- struct quota_priv *priv = this->private;
- int ret = 0;
+ int32_t ret = -1;
+ int32_t op_errno = ENOMEM;
+ quota_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+
+ local = quota_local_new ();
+ if (local == NULL) {
+ goto err;
+ }
- if ((op_ret >= 0) && priv->disk_usage_limit) {
- gf_quota_usage_add (this, buf->st_blocks * 512);
+ frame->local = local;
- ret = fd_ctx_set (fd, this, 1);
- }
+ ret = loc_copy (&local->loc, loc);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING, "loc_copy failed");
+ goto err;
+ }
- STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf);
- return 0;
+ local->link_count = 1;
+
+ stub = fop_symlink_stub (frame, quota_symlink_helper, linkpath, loc,
+ umask, xdata);
+ if (stub == NULL) {
+ goto err;
+ }
+
+ local->stub = stub;
+ local->delta = strlen (linkpath);
+
+ quota_check_limit (frame, loc->parent, this, NULL, NULL);
+
+ stub = NULL;
+
+ LOCK (&local->lock);
+ {
+ if (local->validate_count == 0) {
+ stub = local->stub;
+ local->stub = NULL;
+ }
+
+ local->link_count = 0;
+ }
+ UNLOCK (&local->lock);
+
+ if (stub != NULL) {
+ call_resume (stub);
+ }
+
+ return 0;
+
+err:
+ QUOTA_STACK_UNWIND (symlink, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL);
+
+ return 0;
}
-int
-quota_create (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode, fd_t *fd)
+int32_t
+quota_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- struct quota_priv *priv = NULL;
+ quota_local_t *local = NULL;
+ int64_t delta = 0;
+ quota_inode_ctx_t *ctx = NULL;
- priv = this->private;
+ if (op_ret < 0) {
+ goto out;
+ }
- if (gf_quota_check_free_disk (this) == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "min-free-disk limit (%u) crossed, current available is %u",
- priv->min_free_disk_limit, priv->current_free_disk);
- STACK_UNWIND (frame, -1, ENOSPC, NULL, NULL, NULL);
- return 0;
-
- }
- if (priv->current_disk_usage > priv->disk_usage_limit) {
- gf_log (this->name, GF_LOG_ERROR,
- "Disk usage limit (%"PRIu64") crossed, current usage is %"PRIu64"",
- priv->disk_usage_limit, priv->current_disk_usage);
- STACK_UNWIND (frame, -1, ENOSPC, NULL, NULL, NULL);
- return 0;
+ local = frame->local;
+ if (local == NULL) {
+ gf_log (this->name, GF_LOG_WARNING, "local is NULL");
+ goto out;
+ }
+
+ delta = (postbuf->ia_blocks - prebuf->ia_blocks) * 512;
+
+ quota_update_size (this, local->loc.inode, NULL, NULL, delta);
+
+ quota_inode_ctx_get (local->loc.inode, -1, this, NULL, NULL,
+ &ctx, 0);
+ if (ctx == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "quota context not set in inode (gfid:%s)",
+ uuid_utoa (local->loc.inode->gfid));
+ goto out;
+ }
+
+ LOCK (&ctx->lock);
+ {
+ ctx->buf = *postbuf;
}
+ UNLOCK (&ctx->lock);
- STACK_WIND (frame, quota_create_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create,
- loc, flags, mode, fd);
- return 0;
+out:
+ QUOTA_STACK_UNWIND (truncate, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+ return 0;
}
-int
-quota_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+int32_t
+quota_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
{
- int ret = 0;
+ int32_t ret = -1;
+ quota_local_t *local = NULL;
+
+ local = quota_local_new ();
+ if (local == NULL) {
+ goto err;
+ }
- if (op_ret >= 0)
- ret = fd_ctx_set (fd, this, 1);
+ frame->local = local;
- STACK_UNWIND (frame, op_ret, op_errno, fd);
- return 0;
+ ret = loc_copy (&local->loc, loc);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING, "loc_copy failed");
+ goto err;
+ }
+
+ STACK_WIND (frame, quota_truncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
+
+ return 0;
+err:
+ QUOTA_STACK_UNWIND (truncate, frame, -1, ENOMEM, NULL, NULL, NULL);
+
+ return 0;
}
-int
-quota_open (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, fd_t *fd)
+int32_t
+quota_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ quota_local_t *local = NULL;
+ int64_t delta = 0;
+ quota_inode_ctx_t *ctx = NULL;
+
+ if (op_ret < 0) {
+ goto out;
+ }
+
+ local = frame->local;
+ if (local == NULL) {
+ gf_log (this->name, GF_LOG_WARNING, "local is NULL");
+ goto out;
+ }
+
+ delta = (postbuf->ia_blocks - prebuf->ia_blocks) * 512;
+
+ quota_update_size (this, local->loc.inode, NULL, NULL, delta);
+
+ quota_inode_ctx_get (local->loc.inode, -1, this, NULL, NULL,
+ &ctx, 0);
+ if (ctx == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "quota context not set in inode (gfid:%s)",
+ uuid_utoa (local->loc.inode->gfid));
+ goto out;
+ }
+
+ LOCK (&ctx->lock);
+ {
+ ctx->buf = *postbuf;
+ }
+ UNLOCK (&ctx->lock);
+
+out:
+ QUOTA_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+ return 0;
+}
+
+
+int32_t
+quota_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
{
- STACK_WIND (frame, quota_open_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open,
- loc, flags, fd);
- return 0;
+ quota_local_t *local = NULL;
+
+ local = quota_local_new ();
+ if (local == NULL)
+ goto err;
+
+ frame->local = local;
+
+ local->loc.inode = inode_ref (fd->inode);
+
+ STACK_WIND (frame, quota_ftruncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
+
+ return 0;
+err:
+ QUOTA_STACK_UNWIND (ftruncate, frame, -1, ENOMEM, NULL, NULL, NULL);
+
+ return 0;
}
-int
-quota_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *stbuf)
+int32_t
+quota_send_dir_limit_to_cli (call_frame_t *frame, xlator_t *this,
+ inode_t *inode, const char *name)
{
- struct quota_priv *priv = NULL;
- struct quota_local *local = NULL;
+ int32_t ret = 0;
+ char dir_limit [1024] = {0, };
+ dict_t *dict = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ uint64_t value = 0;
+
+ ret = inode_ctx_get (inode, this, &value);
+ if (ret < 0)
+ goto out;
+
+ ctx = (quota_inode_ctx_t *)(unsigned long)value;
+ snprintf (dir_limit, 1024, "%"PRId64",%"PRId64, ctx->size, ctx->limit);
+
+ dict = dict_new ();
+ if (dict == NULL) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_str (dict, (char *) name, dir_limit);
+ if (ret < 0)
+ goto out;
- priv = this->private;
- local = frame->local;
+ gf_log (this->name, GF_LOG_INFO, "str = %s", dir_limit);
- if (priv->disk_usage_limit) {
- if (op_ret >= 0) {
- gf_quota_usage_add (this, (stbuf->st_blocks -
- local->stbuf.st_blocks) * 512);
- }
- fd_unref (local->fd);
- iobref_unref (local->iobref);
- }
+ QUOTA_STACK_UNWIND (getxattr, frame, 0, 0, dict, NULL);
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+
+int32_t
+quota_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ int32_t ret = 0;
+
+ if (name && strcasecmp (name, "trusted.limit.list") == 0) {
+ ret = quota_send_dir_limit_to_cli (frame, this, fd->inode,
+ name);
+ if (ret == 0) {
+ return 0;
+ }
+ }
+
+ STACK_WIND (frame, default_fgetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata);
+ return 0;
+}
+
+
+int32_t
+quota_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ int32_t ret = 0;
+
+ if ((name != NULL) && strcasecmp (name, "trusted.limit.list") == 0) {
+ ret = quota_send_dir_limit_to_cli (frame, this, loc->inode,
+ name);
+ if (ret == 0)
+ return 0;
+ }
+
+ STACK_WIND (frame, default_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
+ return 0;
+}
+
+
+int32_t
+quota_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
+{
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+
+ if (op_ret < 0) {
+ goto out;
+ }
+
+ local = frame->local;
+ if (local == NULL) {
+ gf_log (this->name, GF_LOG_WARNING, "local is NULL");
+ goto out;
+ }
+
+ quota_inode_ctx_get (local->loc.inode, -1, this, NULL, NULL,
+ &ctx, 0);
+ if (ctx == NULL) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "quota context not set in inode (gfid:%s)",
+ uuid_utoa (local->loc.inode->gfid));
+ goto out;
+ }
+
+ LOCK (&ctx->lock);
+ {
+ if (buf)
+ ctx->buf = *buf;
+ }
+ UNLOCK (&ctx->lock);
+
+out:
+ QUOTA_STACK_UNWIND (stat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+
+int32_t
+quota_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ quota_local_t *local = NULL;
+ int32_t ret = -1;
+
+ local = quota_local_new ();
+ if (local == NULL) {
+ goto unwind;
+ }
+
+ frame->local = local;
+ ret = loc_copy (&local->loc, loc);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING, "loc_copy failed");
+ goto unwind;
+ }
+
+ STACK_WIND (frame, quota_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, loc, xdata);
+ return 0;
+
+unwind:
+ QUOTA_STACK_UNWIND (stat, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+}
+
+
+int32_t
+quota_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+
+ if (op_ret < 0) {
+ goto out;
+ }
+
+ local = frame->local;
+ if (local == NULL) {
+ gf_log (this->name, GF_LOG_WARNING, "local is NULL");
+ goto out;
+ }
+
+ quota_inode_ctx_get (local->loc.inode, -1, this, NULL, NULL,
+ &ctx, 0);
+ if (ctx == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "quota context not set in inode (gfid:%s)",
+ uuid_utoa (local->loc.inode->gfid));
+ goto out;
+ }
+
+ LOCK (&ctx->lock);
+ {
+ if (buf)
+ ctx->buf = *buf;
+ }
+ UNLOCK (&ctx->lock);
+
+out:
+ QUOTA_STACK_UNWIND (fstat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+
+int32_t
+quota_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ quota_local_t *local = NULL;
+
+ local = quota_local_new ();
+ if (local == NULL) {
+ goto unwind;
+ }
+
+ frame->local = local;
+
+ local->loc.inode = inode_ref (fd->inode);
+
+ STACK_WIND (frame, quota_fstat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
+ return 0;
+
+unwind:
+ QUOTA_STACK_UNWIND (fstat, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+}
+
+
+int32_t
+quota_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, const char *path,
+ struct iatt *buf, dict_t *xdata)
+{
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+
+ if (op_ret < 0) {
+ goto out;
+ }
+
+ local = frame->local;
+ if (local == NULL) {
+ gf_log (this->name, GF_LOG_WARNING, "local is NULL");
+ goto out;
+ }
+
+ quota_inode_ctx_get (local->loc.inode, -1, this, NULL, NULL,
+ &ctx, 0);
+ if (ctx == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "quota context not set in inode (gfid:%s)",
+ uuid_utoa (local->loc.inode->gfid));
+ goto out;
+ }
+
+ LOCK (&ctx->lock);
+ {
+ ctx->buf = *buf;
+ }
+ UNLOCK (&ctx->lock);
+
+out:
+ QUOTA_STACK_UNWIND (readlink, frame, op_ret, op_errno, path, buf, xdata);
+ return 0;
+}
+
+
+int32_t
+quota_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata)
+{
+ quota_local_t *local = NULL;
+ int32_t ret = -1;
+
+ local = quota_local_new ();
+ if (local == NULL) {
+ goto unwind;
+ }
+
+ frame->local = local;
+
+ ret = loc_copy (&local->loc, loc);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING, "loc_copy failed");
+ goto unwind;
+ }
+
+ STACK_WIND (frame, quota_readlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readlink, loc, size, xdata);
+ return 0;
+
+unwind:
+ QUOTA_STACK_UNWIND (readlink, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+}
+
+
+int32_t
+quota_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *buf, struct iobref *iobref,
+ dict_t *xdata)
+{
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+
+ if (op_ret < 0) {
+ goto out;
+ }
+
+ local = frame->local;
+ if (local == NULL) {
+ gf_log (this->name, GF_LOG_WARNING, "local is NULL");
+ goto out;
+ }
+
+ quota_inode_ctx_get (local->loc.inode, -1, this, NULL, NULL,
+ &ctx, 0);
+ if (ctx == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "quota context not set in inode (gfid:%s)",
+ uuid_utoa (local->loc.inode->gfid));
+ goto out;
+ }
+
+ LOCK (&ctx->lock);
+ {
+ ctx->buf = *buf;
+ }
+ UNLOCK (&ctx->lock);
+
+out:
+ QUOTA_STACK_UNWIND (readv, frame, op_ret, op_errno, vector, count,
+ buf, iobref, xdata);
+ return 0;
+}
+
+
+int32_t
+quota_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ quota_local_t *local = NULL;
+
+ local = quota_local_new ();
+ if (local == NULL) {
+ goto unwind;
+ }
+
+ frame->local = local;
+
+ local->loc.inode = inode_ref (fd->inode);
+
+ STACK_WIND (frame, quota_readv_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags,
+ xdata);
+ return 0;
+
+unwind:
+ QUOTA_STACK_UNWIND (readv, frame, -1, ENOMEM, NULL, -1, NULL, NULL, NULL);
+ return 0;
+}
+
+
+int32_t
+quota_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+
+ if (op_ret < 0) {
+ goto out;
+ }
+
+ local = frame->local;
+ if (local == NULL) {
+ gf_log (this->name, GF_LOG_WARNING, "local is NULL");
+ goto out;
+ }
+
+ quota_inode_ctx_get (local->loc.inode, -1, this, NULL, NULL,
+ &ctx, 0);
+ if (ctx == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "quota context not set in inode (gfid:%s)",
+ uuid_utoa (local->loc.inode->gfid));
+ goto out;
+ }
+
+ LOCK (&ctx->lock);
+ {
+ ctx->buf = *postbuf;
+ }
+ UNLOCK (&ctx->lock);
+
+out:
+ QUOTA_STACK_UNWIND (fsync, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
+
+
+int32_t
+quota_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ dict_t *xdata)
+{
+ quota_local_t *local = NULL;
+
+ local = quota_local_new ();
+ if (local == NULL) {
+ goto unwind;
+ }
+
+ local->loc.inode = inode_ref (fd->inode);
+
+ frame->local = local;
+
+ STACK_WIND (frame, quota_fsync_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsync, fd, flags, xdata);
+ return 0;
+
+unwind:
+ QUOTA_STACK_UNWIND (fsync, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+
+}
+
+
+int32_t
+quota_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+
+ if (op_ret < 0) {
+ goto out;
+ }
+
+ local = frame->local;
+ if (local == NULL) {
+ gf_log (this->name, GF_LOG_WARNING, "local is NULL");
+ goto out;
+ }
+
+ quota_inode_ctx_get (local->loc.inode, -1, this, NULL, NULL,
+ &ctx, 0);
+ if (ctx == NULL) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "quota context not set in inode (gfid:%s)",
+ uuid_utoa (local->loc.inode->gfid));
+ goto out;
+ }
+
+ LOCK (&ctx->lock);
+ {
+ if (statpost)
+ ctx->buf = *statpost;
+ }
+ UNLOCK (&ctx->lock);
+
+out:
+ QUOTA_STACK_UNWIND (setattr, frame, op_ret, op_errno, statpre,
+ statpost, xdata);
+ return 0;
+}
+
+
+int32_t
+quota_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ quota_local_t *local = NULL;
+ int32_t ret = -1;
+
+ local = quota_local_new ();
+ if (local == NULL) {
+ goto unwind;
+ }
+
+ frame->local = local;
+
+ ret = loc_copy (&local->loc, loc);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING, "loc_copy failed");
+ goto unwind;
+ }
+
+ STACK_WIND (frame, quota_setattr_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->setattr, loc, stbuf, valid, xdata);
+ return 0;
+
+unwind:
+ QUOTA_STACK_UNWIND (setattr, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+}
+
+
+int32_t
+quota_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+
+ if (op_ret < 0) {
+ goto out;
+ }
+
+ local = frame->local;
+ if (local == NULL) {
+ gf_log (this->name, GF_LOG_WARNING, "local is NULL");
+ goto out;
+ }
+
+ quota_inode_ctx_get (local->loc.inode, -1, this, NULL, NULL,
+ &ctx, 0);
+ if (ctx == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "quota context not set in inode (gfid:%s)",
+ uuid_utoa (local->loc.inode->gfid));
+ goto out;
+ }
+
+ LOCK (&ctx->lock);
+ {
+ ctx->buf = *statpost;
+ }
+ UNLOCK (&ctx->lock);
+
+out:
+ QUOTA_STACK_UNWIND (fsetattr, frame, op_ret, op_errno, statpre,
+ statpost, xdata);
+ return 0;
+}
+
+
+int32_t
+quota_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ quota_local_t *local = NULL;
+
+ local = quota_local_new ();
+ if (local == NULL) {
+ goto unwind;
+ }
+
+ frame->local = local;
+
+ local->loc.inode = inode_ref (fd->inode);
+
+ STACK_WIND (frame, quota_fsetattr_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fsetattr, fd, stbuf, valid, xdata);
+ return 0;
+
+unwind:
+ QUOTA_STACK_UNWIND (fsetattr, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+}
+
+
+int32_t
+quota_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ int32_t ret = -1;
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ quota_dentry_t *dentry = NULL;
+
+ local = frame->local;
+ if (op_ret < 0) {
+ goto unwind;
+ }
+
+ ret = quota_inode_ctx_get (inode, -1, this, NULL, buf, &ctx, 1);
+ if ((ret == -1) || (ctx == NULL)) {
+ gf_log (this->name, GF_LOG_WARNING, "cannot create quota "
+ "context in inode (gfid:%s)", uuid_utoa (inode->gfid));
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ LOCK (&ctx->lock);
+ {
+ ctx->buf = *buf;
+
+ dentry = __quota_dentry_new (ctx, (char *)local->loc.name,
+ local->loc.parent->gfid);
+ if (dentry == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "cannot create a new dentry (name:%s) for "
+ "inode(gfid:%s)", local->loc.name,
+ uuid_utoa (local->loc.inode->gfid));
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+unlock:
+ UNLOCK (&ctx->lock);
- STACK_UNWIND (frame, op_ret, op_errno, stbuf);
- return 0;
+unwind:
+ QUOTA_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode,
+ buf, preparent, postparent, xdata);
+ return 0;
}
int
-quota_writev_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+quota_mknod_helper (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata)
{
- struct quota_local *local = NULL;
- struct quota_priv *priv = NULL;
- int iovlen = 0;
+ quota_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+ local = frame->local;
+ if (local == NULL) {
+ gf_log (this->name, GF_LOG_WARNING, "local is NULL");
+ goto unwind;
+ }
- local = frame->local;
- priv = this->private;
+ if (local->op_ret == -1) {
+ op_errno = local->op_errno;
+ goto unwind;
+ }
- if (op_ret >= 0) {
- if (priv->current_disk_usage > priv->disk_usage_limit) {
- iovlen = iov_length (local->vector, local->count);
+ STACK_WIND (frame, quota_mknod_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask,
+ xdata);
- if (iovlen > (buf->st_blksize - (buf->st_size % buf->st_blksize))) {
- fd_unref (local->fd);
- iobref_unref (local->iobref);
- STACK_UNWIND (frame, -1, ENOSPC, NULL);
- return 0;
- }
- }
- local->stbuf = *buf;
- }
-
- STACK_WIND (frame, quota_writev_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev,
- local->fd, local->vector, local->count, local->offset,
- local->iobref);
+ return 0;
- return 0;
+unwind:
+ QUOTA_STACK_UNWIND (mknod, frame, -1, op_errno, NULL, NULL,
+ NULL, NULL, NULL);
+ return 0;
}
int
-quota_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int32_t count, off_t off,
- struct iobref *iobref)
+quota_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata)
{
- struct quota_local *local = NULL;
- struct quota_priv *priv = NULL;
+ int32_t ret = -1;
+ quota_local_t *local = NULL;
+ call_stub_t *stub = NULL;
- priv = this->private;
+ local = quota_local_new ();
+ if (local == NULL) {
+ goto err;
+ }
- if (gf_quota_check_free_disk (this) == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "min-free-disk limit (%u) crossed, current available is %u",
- priv->min_free_disk_limit, priv->current_free_disk);
- STACK_UNWIND (frame, -1, ENOSPC, NULL);
- return 0;
- }
+ frame->local = local;
- if (priv->disk_usage_limit) {
- local = CALLOC (1, sizeof (struct quota_local));
- local->fd = fd_ref (fd);
- local->iobref = iobref_ref (iobref);
- local->vector = vector;
- local->count = count;
- local->offset = off;
- frame->local = local;
-
- STACK_WIND (frame, quota_writev_fstat_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat, fd);
- return 0;
- }
+ ret = loc_copy (&local->loc, loc);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "loc_copy failed");
+ goto err;
+ }
- STACK_WIND (frame, quota_writev_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev,
- fd, vector, count, off, iobref);
- return 0;
+ stub = fop_mknod_stub (frame, quota_mknod_helper, loc, mode, rdev,
+ umask, xdata);
+ if (stub == NULL) {
+ goto err;
+ }
+
+ local->link_count = 1;
+ local->stub = stub;
+ local->delta = 0;
+
+ quota_check_limit (frame, loc->parent, this, NULL, NULL);
+
+ stub = NULL;
+
+ LOCK (&local->lock);
+ {
+ local->link_count = 0;
+ if (local->validate_count == 0) {
+ stub = local->stub;
+ local->stub = NULL;
+ }
+ }
+ UNLOCK (&local->lock);
+
+ if (stub != NULL) {
+ call_resume (stub);
+ }
+
+ return 0;
+err:
+ QUOTA_STACK_UNWIND (mknod, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,
+ NULL);
+
+ return 0;
}
+int
+quota_setxattr_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno, dict_t *xdata)
+{
+ QUOTA_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
int
-quota_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+quota_setxattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *dict, int flags, dict_t *xdata)
{
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "failed to remove the disk-usage value: %s",
- strerror (op_errno));
- }
-
- STACK_DESTROY (frame->root);
- return 0;
+ int op_errno = EINVAL;
+ int op_ret = -1;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.quota*", dict,
+ op_errno, err);
+
+ STACK_WIND (frame, quota_setxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr,
+ loc, dict, flags, xdata);
+ return 0;
+err:
+ QUOTA_STACK_UNWIND (setxattr, frame, op_ret, op_errno, NULL);
+ return 0;
}
+int
+quota_fsetxattr_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno, dict_t *xdata)
+{
+ QUOTA_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
int
-quota_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+quota_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *dict, int flags, dict_t *xdata)
{
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "failed to set the disk-usage value: %s",
- strerror (op_errno));
- }
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.quota*", dict,
+ op_errno, err);
+
+ STACK_WIND (frame, quota_fsetxattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr,
+ fd, dict, flags, xdata);
+ return 0;
+ err:
+ QUOTA_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, NULL);
+ return 0;
+}
+
+
+int
+quota_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ QUOTA_STACK_UNWIND (removexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
+quota_removexattr (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata)
+{
+ int32_t op_errno = EINVAL;
+
+ VALIDATE_OR_GOTO (this, err);
+
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.quota*",
+ name, op_errno, err);
- STACK_DESTROY (frame->root);
- return 0;
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (loc, err);
+
+ STACK_WIND (frame, quota_removexattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr,
+ loc, name, xdata);
+ return 0;
+err:
+ QUOTA_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL);
+ return 0;
}
int
-quota_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *statvfs)
+quota_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- struct quota_priv *priv = NULL;
- uint64_t f_blocks = 0;
- int64_t f_bfree = 0;
- uint64_t f_bused = 0;
+ QUOTA_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+int
+quota_fremovexattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ GF_IF_NATIVE_XATTR_GOTO ("trusted.quota*",
+ name, op_errno, err);
+
+ STACK_WIND (frame, quota_fremovexattr_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr,
+ fd, name, xdata);
+ return 0;
+ err:
+ QUOTA_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, NULL);
+ return 0;
+}
- priv = this->private;
- if (op_ret != 0)
+int32_t
+quota_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf,
+ dict_t *xdata)
+{
+ inode_t *root_inode = NULL;
+ quota_priv_t *priv = NULL;
+ uint64_t value = 0;
+ quota_inode_ctx_t *ctx = NULL;
+ limits_t *limit_node = NULL;
+ int64_t usage = -1;
+ int64_t avail = -1;
+ int64_t blocks = 0;
+
+ root_inode = cookie;
+
+ /* This fop will fail mostly in case of client disconnect's,
+ * which is already logged. Hence, not logging here */
+ if (op_ret == -1)
+ goto unwind;
+ /*
+ * We should never get here unless quota_statfs (below) sent us a
+ * cookie, and it would only do so if the value was non-NULL. This
+ * check is therefore just routine defensive coding.
+ */
+ if (!root_inode) {
+ gf_log(this->name,GF_LOG_WARNING,
+ "null inode, cannot adjust for quota");
+ goto unwind;
+ }
+ if (!root_inode->table || (root_inode != root_inode->table->root)) {
+ gf_log(this->name,GF_LOG_WARNING,
+ "non-root inode, cannot adjust for quota");
goto unwind;
+ }
- f_blocks = priv->disk_usage_limit / statvfs->f_frsize;
- f_bused = priv->current_disk_usage / statvfs->f_frsize;
+ inode_ctx_get (root_inode, this, &value);
+ if (!value) {
+ goto unwind;
+ }
+ ctx = (quota_inode_ctx_t *)(unsigned long)value;
+ usage = (ctx->size) / buf->f_bsize;
+ priv = this->private;
+
+ list_for_each_entry (limit_node, &priv->limit_head, limit_list) {
+ /* Notice that this only works for volume-level quota. */
+ if (strcmp (limit_node->path, "/") == 0) {
+ blocks = limit_node->value / buf->f_bsize;
+ if (usage > blocks) {
+ break;
+ }
+
+ buf->f_blocks = blocks;
+ avail = buf->f_blocks - usage;
+ if (buf->f_bfree > avail) {
+ buf->f_bfree = avail;
+ }
+ /*
+ * We have to assume that the total assigned quota
+ * won't cause us to dip into the reserved space,
+ * because dealing with the overcommitted cases is
+ * just too hairy (especially when different bricks
+ * might be using different reserved percentages and
+ * such).
+ */
+ buf->f_bavail = buf->f_bfree;
+ break;
+ }
+ }
- if (f_blocks && (f_blocks < statvfs->f_blocks))
- statvfs->f_blocks = f_blocks;
+unwind:
+ if (root_inode) {
+ inode_unref(root_inode);
+ }
+ STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
- f_bfree = (statvfs->f_blocks - f_bused);
- if (f_bfree >= 0)
- statvfs->f_bfree = statvfs->f_bavail = f_bfree;
- else
- statvfs->f_bfree = statvfs->f_bavail = 0;
+int32_t
+quota_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ inode_t *root_inode = NULL;
+ quota_priv_t *priv = NULL;
-unwind:
- STACK_UNWIND (frame, op_ret, op_errno, statvfs);
- return 0;
+ priv = this->private;
+
+ if (priv->consider_statfs && loc->inode) {
+ root_inode = loc->inode->table->root;
+ inode_ref(root_inode);
+ STACK_WIND_COOKIE (frame, quota_statfs_cbk, root_inode,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->statfs, loc, xdata);
+ }
+ else {
+ /*
+ * We have to make sure that we never get to quota_statfs_cbk
+ * with a cookie that points to something other than an inode,
+ * which is exactly what would happen with STACK_UNWIND using
+ * that as a callback. Therefore, use default_statfs_cbk in
+ * this case instead.
+ *
+ * Also if the option deem-statfs is not set to "on" don't
+ * bother calculating quota limit on / in statfs_cbk.
+ */
+ if (priv->consider_statfs)
+ gf_log(this->name,GF_LOG_WARNING,
+ "missing inode, cannot adjust for quota");
+ STACK_WIND (frame, default_statfs_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->statfs, loc, xdata);
+ }
+ return 0;
}
int
-quota_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
+quota_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
{
- STACK_WIND (frame, quota_statfs_cbk,
- FIRST_CHILD (this), FIRST_CHILD (this)->fops->statfs, loc);
+ gf_dirent_t *entry = NULL;
- return 0;
-}
+ if (op_ret <= 0)
+ goto unwind;
+ list_for_each_entry (entry, &entries->list, list) {
+ /* TODO: fill things */
+ }
+
+unwind:
+ STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata);
+ return 0;
+}
int
-quota_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *value)
-{
- data_t *data = NULL;
- struct quota_priv *priv = this->private;
-
- if (op_ret >= 0) {
- data = dict_get (value, "trusted.glusterfs-quota-du");
- if (data) {
- LOCK (&priv->lock);
- {
- priv->current_disk_usage = data_to_uint64 (data);
- }
- UNLOCK (&priv->lock);
+quota_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *dict)
+{
+ int ret = 0;
+
+ if (dict) {
+ ret = dict_set_uint64 (dict, QUOTA_SIZE_KEY, 0);
+ if (ret < 0) {
+ goto err;
+ }
+ }
+
+ STACK_WIND (frame, quota_readdirp_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp,
+ fd, size, offset, dict);
+ return 0;
+err:
+ STACK_UNWIND_STRICT (readdirp, frame, -1, EINVAL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+quota_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ int32_t ret = 0;
+ uint64_t ctx_int = 0;
+ quota_inode_ctx_t *ctx = NULL;
+ quota_local_t *local = NULL;
+ quota_dentry_t *dentry = NULL;
+ int64_t delta = 0;
+
+ local = frame->local;
+
+ if ((op_ret < 0) || (local == NULL)) {
+ goto out;
+ }
- return 0;
- }
- }
+ ret = inode_ctx_get (local->loc.inode, this, &ctx_int);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to get the context", local->loc.path);
+ goto out;
+ }
+
+ ctx = (quota_inode_ctx_t *)(unsigned long) ctx_int;
+
+ if (ctx == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "quota context not set in %s (gfid:%s)",
+ local->loc.path, uuid_utoa (local->loc.inode->gfid));
+ goto out;
+ }
+
+ LOCK (&ctx->lock);
+ {
+ ctx->buf = *postbuf;
+ }
+ UNLOCK (&ctx->lock);
+
+ list_for_each_entry (dentry, &ctx->parents, next) {
+ delta = (postbuf->ia_blocks - prebuf->ia_blocks) * 512;
+ quota_update_size (this, local->loc.inode,
+ dentry->name, dentry->par, delta);
+ }
- STACK_DESTROY (frame->root);
+out:
+ QUOTA_STACK_UNWIND (fallocate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
- return 0;
+ return 0;
}
+int32_t
+quota_fallocate_helper(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t mode, off_t offset, size_t len, dict_t *xdata)
+{
+ quota_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
-void
-gf_quota_get_disk_usage (xlator_t *this)
+ local = frame->local;
+ if (local == NULL) {
+ gf_log (this->name, GF_LOG_WARNING, "local is NULL");
+ goto unwind;
+ }
+
+ if (local->op_ret == -1) {
+ op_errno = local->op_errno;
+ goto unwind;
+ }
+
+ STACK_WIND (frame, quota_fallocate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len,
+ xdata);
+ return 0;
+
+unwind:
+ QUOTA_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+quota_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ int32_t ret = -1, op_errno = EINVAL;
+ int32_t parents = 0;
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ quota_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quota_dentry_t *dentry = NULL;
+
+ GF_ASSERT (frame);
+ GF_VALIDATE_OR_GOTO ("quota", this, unwind);
+ GF_VALIDATE_OR_GOTO (this->name, fd, unwind);
+
+ local = quota_local_new ();
+ if (local == NULL) {
+ goto unwind;
+ }
+
+ frame->local = local;
+ local->loc.inode = inode_ref (fd->inode);
+
+ ret = quota_inode_ctx_get (fd->inode, -1, this, NULL, NULL, &ctx, 0);
+ if (ctx == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "quota context not set in inode (gfid:%s)",
+ uuid_utoa (fd->inode->gfid));
+ goto unwind;
+ }
+
+ stub = fop_fallocate_stub(frame, quota_fallocate_helper, fd, mode, offset, len,
+ xdata);
+ if (stub == NULL) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, priv, unwind);
+
+ LOCK (&ctx->lock);
+ {
+ list_for_each_entry (dentry, &ctx->parents, next) {
+ parents++;
+ }
+ }
+ UNLOCK (&ctx->lock);
+
+ /*
+ * Note that by using len as the delta we're assuming the range from
+ * offset to offset+len has not already been allocated. This can result
+ * in ENOSPC errors attempting to allocate an already allocated range.
+ */
+ local->delta = len;
+ local->stub = stub;
+ local->link_count = parents;
+
+ list_for_each_entry (dentry, &ctx->parents, next) {
+ ret = quota_check_limit (frame, fd->inode, this, dentry->name,
+ dentry->par);
+ if (ret == -1) {
+ break;
+ }
+ }
+
+ stub = NULL;
+
+ LOCK (&local->lock);
+ {
+ local->link_count = 0;
+ if (local->validate_count == 0) {
+ stub = local->stub;
+ local->stub = NULL;
+ }
+ }
+ UNLOCK (&local->lock);
+
+ if (stub != NULL) {
+ call_resume (stub);
+ }
+
+ return 0;
+
+unwind:
+ QUOTA_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+
+int32_t
+mem_acct_init (xlator_t *this)
{
- call_frame_t *frame = NULL;
- call_pool_t *pool = NULL;
- loc_t loc;
+ int ret = -1;
+
+ if (!this)
+ return ret;
- pool = this->ctx->pool;
- frame = create_frame (this, pool);
- build_root_loc (this, &loc);
+ ret = xlator_mem_acct_init (this, gf_quota_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_WARNING, "Memory accounting"
+ "init failed");
+ return ret;
+ }
- STACK_WIND (frame, quota_getxattr_cbk,
- this->children->xlator,
- this->children->xlator->fops->getxattr,
- &loc,
- "trusted.glusterfs-quota-du");
- return ;
+ return ret;
}
-void
-gf_quota_cache_sync (xlator_t *this)
+int32_t
+quota_forget (xlator_t *this, inode_t *inode)
{
- struct quota_priv *priv = NULL;
- call_frame_t *frame = NULL;
- dict_t *dict = get_new_dict ();
- loc_t loc;
+ int32_t ret = 0;
+ uint64_t ctx_int = 0;
+ quota_inode_ctx_t *ctx = NULL;
+ quota_dentry_t *dentry = NULL, *tmp;
+ ret = inode_ctx_del (inode, this, &ctx_int);
- priv = this->private;
- build_root_loc (this, &loc);
+ if (ret < 0) {
+ return 0;
+ }
- frame = create_frame (this, this->ctx->pool);
- dict_set (dict, "trusted.glusterfs-quota-du",
- data_from_uint64 (priv->current_disk_usage));
+ ctx = (quota_inode_ctx_t *) (long)ctx_int;
- STACK_WIND (frame, quota_setxattr_cbk,
- this->children->xlator,
- this->children->xlator->fops->setxattr,
- &loc, dict, 0);
+ LOCK (&ctx->lock);
+ {
+ list_for_each_entry_safe (dentry, tmp, &ctx->parents, next) {
+ __quota_dentry_free (dentry);
+ }
+ }
+ UNLOCK (&ctx->lock);
+
+ LOCK_DESTROY (&ctx->lock);
+
+ GF_FREE (ctx);
+
+ return 0;
}
int
-quota_release (xlator_t *this, fd_t *fd)
+quota_parse_limits (quota_priv_t *priv, xlator_t *this, dict_t *xl_options,
+ struct list_head *old_list)
{
- gf_quota_cache_sync (this);
+ int32_t ret = -1;
+ char *str = NULL;
+ char *str_val = NULL;
+ char *path = NULL, *saveptr = NULL;
+ uint64_t value = 0;
+ limits_t *quota_lim = NULL, *old = NULL;
+ char *last_colon= NULL;
+
+ ret = dict_get_str (xl_options, "limit-set", &str);
+
+ if (str) {
+ path = strtok_r (str, ",", &saveptr);
+
+ while (path) {
+ last_colon = strrchr (path, ':');
+ *last_colon = '\0';
+ str_val = last_colon + 1;
+
+ ret = gf_string2bytesize (str_val, &value);
+ if (ret != 0)
+ goto err;
+
+ QUOTA_ALLOC_OR_GOTO (quota_lim, limits_t, err);
+
+ quota_lim->path = path;
+
+ quota_lim->value = value;
+
+ gf_log (this->name, GF_LOG_INFO, "%s:%"PRId64,
+ quota_lim->path, quota_lim->value);
+
+ if (old_list != NULL) {
+ list_for_each_entry (old, old_list,
+ limit_list) {
+ if (strcmp (old->path, quota_lim->path)
+ == 0) {
+ uuid_copy (quota_lim->gfid,
+ old->gfid);
+ break;
+ }
+ }
+ }
+
+ LOCK (&priv->lock);
+ {
+ list_add_tail (&quota_lim->limit_list,
+ &priv->limit_head);
+ }
+ UNLOCK (&priv->lock);
+
+ path = strtok_r (NULL, ",", &saveptr);
+ }
+ } else {
+ gf_log (this->name, GF_LOG_INFO,
+ "no \"limit-set\" option provided");
+ }
- return 0;
+ LOCK (&priv->lock);
+ {
+ list_for_each_entry (quota_lim, &priv->limit_head, limit_list) {
+ gf_log (this->name, GF_LOG_INFO, "%s:%"PRId64,
+ quota_lim->path, quota_lim->value);
+ }
+ }
+ UNLOCK (&priv->lock);
+
+ ret = 0;
+err:
+ return ret;
}
-/* notify */
int32_t
-notify (xlator_t *this,
- int32_t event,
- void *data,
- ...)
-{
- struct quota_priv *priv = this->private;
-
- switch (event)
- {
- case GF_EVENT_CHILD_UP:
- if (priv->only_first_time) {
- priv->only_first_time = 0;
- if (priv->disk_usage_limit) {
- gf_quota_get_disk_usage (this);
- }
- }
- default:
- default_notify (this, event, data);
- break;
- }
+init (xlator_t *this)
+{
+ int32_t ret = -1;
+ quota_priv_t *priv = NULL;
+
+ if ((this->children == NULL)
+ || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "FATAL: quota (%s) not configured with "
+ "exactly one child", this->name);
+ return -1;
+ }
+
+ if (this->parents == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile");
+ }
+
+ QUOTA_ALLOC_OR_GOTO (priv, quota_priv_t, err);
+
+ INIT_LIST_HEAD (&priv->limit_head);
+
+ LOCK_INIT (&priv->lock);
+
+ this->private = priv;
+
+ ret = quota_parse_limits (priv, this, this->options, NULL);
+
+ if (ret) {
+ goto err;
+ }
+
+ GF_OPTION_INIT ("timeout", priv->timeout, int64, err);
+ GF_OPTION_INIT ("deem-statfs", priv->consider_statfs, bool, err);
+
+ this->local_pool = mem_pool_new (quota_local_t, 64);
+ if (!this->local_pool) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
+ goto err;
+ }
- return 0;
+ ret = 0;
+err:
+ return ret;
}
-int32_t
-init (xlator_t *this)
+void
+__quota_reconfigure_inode_ctx (xlator_t *this, inode_t *inode, limits_t *limit)
{
- int ret = 0;
- data_t *data = NULL;
- struct quota_priv *_private = NULL;
+ int ret = -1;
+ quota_inode_ctx_t *ctx = NULL;
+
+ GF_VALIDATE_OR_GOTO ("quota", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+ GF_VALIDATE_OR_GOTO (this->name, limit, out);
+
+ ret = quota_inode_ctx_get (inode, limit->value, this, NULL, NULL, &ctx,
+ 1);
+ if ((ret == -1) || (ctx == NULL)) {
+ gf_log (this->name, GF_LOG_WARNING, "cannot create quota "
+ "context in inode(gfid:%s)",
+ uuid_utoa (inode->gfid));
+ goto out;
+ }
- if (!this->children || this->children->next) {
- gf_log (this->name, GF_LOG_ERROR,
- "FATAL: quota should have exactly one child");
- return -1;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile ");
- }
+ LOCK (&ctx->lock);
+ {
+ ctx->limit = limit->value;
+ }
+ UNLOCK (&ctx->lock);
+
+out:
+ return;
+}
- _private = CALLOC (1, sizeof (struct quota_priv));
- _private->disk_usage_limit = 0;
- data = dict_get (this->options, "disk-usage-limit");
- if (data) {
- if (gf_string2bytesize (data->data, &_private->disk_usage_limit) != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "invalid number '%s' for disk-usage limit", data->data);
- ret = -1;
- goto out;
+
+void
+__quota_reconfigure (xlator_t *this, inode_table_t *itable, limits_t *limit)
+{
+ inode_t *inode = NULL;
+
+ if ((this == NULL) || (itable == NULL) || (limit == NULL)) {
+ goto out;
+ }
+
+ if (!uuid_is_null (limit->gfid)) {
+ inode = inode_find (itable, limit->gfid);
+ } else {
+ inode = inode_resolve (itable, limit->path);
+ }
+
+ if (inode != NULL) {
+ __quota_reconfigure_inode_ctx (this, inode, limit);
+ }
+
+out:
+ return;
+}
+
+
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ int32_t ret = -1;
+ quota_priv_t *priv = NULL;
+ limits_t *limit = NULL, *next = NULL, *new = NULL;
+ struct list_head head = {0, };
+ xlator_t *top = NULL;
+ char found = 0;
+
+ priv = this->private;
+
+ INIT_LIST_HEAD (&head);
+
+ LOCK (&priv->lock);
+ {
+ list_splice_init (&priv->limit_head, &head);
+ }
+ UNLOCK (&priv->lock);
+
+ ret = quota_parse_limits (priv, this, options, &head);
+ if (ret == -1) {
+ gf_log ("quota", GF_LOG_WARNING,
+ "quota reconfigure failed, "
+ "new changes will not take effect");
+ goto out;
+ }
+
+ LOCK (&priv->lock);
+ {
+ top = ((glusterfs_ctx_t *)this->ctx)->active->top;
+ GF_ASSERT (top);
+
+ list_for_each_entry (limit, &priv->limit_head, limit_list) {
+ __quota_reconfigure (this, top->itable, limit);
}
- LOCK_INIT (&_private->lock);
- _private->current_disk_usage = 0;
- }
-
- _private->min_free_disk_limit = 0;
- data = dict_get (this->options, "min-free-disk-limit");
- if (data) {
- if (gf_string2percent (data->data, &_private->min_free_disk_limit) != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "invalid percent '%s' for min-free-disk limit", data->data);
- ret = -1;
- goto out;
+ list_for_each_entry_safe (limit, next, &head, limit_list) {
+ found = 0;
+ list_for_each_entry (new, &priv->limit_head,
+ limit_list) {
+ if (strcmp (new->path, limit->path) == 0) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (!found) {
+ limit->value = -1;
+ __quota_reconfigure (this, top->itable, limit);
+ }
+
+ list_del_init (&limit->limit_list);
+ GF_FREE (limit);
}
- _private->refresh_interval = 20; /* 20seconds is default */
- data = dict_get (this->options, "refresh-interval");
- if (data) {
- if (gf_string2time (data->data,
- &_private->refresh_interval)!= 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "invalid time '%s' for refresh "
- "interval", data->data);
- ret = -1;
- goto out;
- }
- }
}
+ UNLOCK (&priv->lock);
+
+ GF_OPTION_RECONF ("timeout", priv->timeout, options, int64, out);
+ GF_OPTION_RECONF ("deem-statfs", priv->consider_statfs, options, bool,
+ out);
- _private->only_first_time = 1;
- this->private = (void *)_private;
- ret = 0;
- out:
- return ret;
+ ret = 0;
+out:
+ return ret;
}
-void
+
+void
fini (xlator_t *this)
{
- struct quota_priv *_private = this->private;
-
- if (_private) {
- gf_quota_cache_sync (this);
- this->private = NULL;
- }
-
- return ;
+ return;
}
-struct xlator_fops fops = {
- .create = quota_create,
- .open = quota_open,
- .truncate = quota_truncate,
- .ftruncate = quota_ftruncate,
- .writev = quota_writev,
- .unlink = quota_unlink,
- .rmdir = quota_rmdir,
- .mknod = quota_mknod,
- .mkdir = quota_mkdir,
- .symlink = quota_symlink,
- .statfs = quota_statfs,
-};
-struct xlator_mops mops = {
+struct xlator_fops fops = {
+ .statfs = quota_statfs,
+ .lookup = quota_lookup,
+ .writev = quota_writev,
+ .create = quota_create,
+ .mkdir = quota_mkdir,
+ .truncate = quota_truncate,
+ .ftruncate = quota_ftruncate,
+ .unlink = quota_unlink,
+ .symlink = quota_symlink,
+ .link = quota_link,
+ .rename = quota_rename,
+ .getxattr = quota_getxattr,
+ .fgetxattr = quota_fgetxattr,
+ .stat = quota_stat,
+ .fstat = quota_fstat,
+ .readlink = quota_readlink,
+ .readv = quota_readv,
+ .fsync = quota_fsync,
+ .setattr = quota_setattr,
+ .fsetattr = quota_fsetattr,
+ .mknod = quota_mknod,
+ .setxattr = quota_setxattr,
+ .fsetxattr = quota_fsetxattr,
+ .removexattr = quota_removexattr,
+ .fremovexattr = quota_fremovexattr,
+ .readdirp = quota_readdirp,
+ .fallocate = quota_fallocate,
};
struct xlator_cbks cbks = {
- .release = quota_release
+ .forget = quota_forget
};
struct volume_options options[] = {
- { .key = {"min-free-disk-limit"},
- .type = GF_OPTION_TYPE_PERCENT
- },
- { .key = {"refresh-interval"},
- .type = GF_OPTION_TYPE_TIME
- },
- { .key = {"disk-usage-limit"},
- .type = GF_OPTION_TYPE_SIZET
- },
- { .key = {NULL} },
+ {.key = {"limit-set"}},
+ {.key = {"timeout"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .min = 0,
+ .max = 60,
+ .default_value = "0",
+ .description = "quota caches the directory sizes on client. Timeout "
+ "indicates the timeout for the cache to be revalidated."
+ },
+ {.key = {"deem-statfs"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "If set to on, it takes quota limits into"
+ "consideration while estimating fs size. (df command)"
+ " (Default is off)."
+ },
+ {.key = {NULL}}
};
diff --git a/xlators/features/quota/src/quota.h b/xlators/features/quota/src/quota.h
new file mode 100644
index 000000000..84ecbb308
--- /dev/null
+++ b/xlators/features/quota/src/quota.h
@@ -0,0 +1,151 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include "call-stub.h"
+#include "defaults.h"
+#include "byte-order.h"
+#include "common-utils.h"
+#include "quota-mem-types.h"
+
+#define QUOTA_XATTR_PREFIX "trusted."
+#define DIRTY "dirty"
+#define SIZE "size"
+#define CONTRIBUTION "contri"
+#define VAL_LENGTH 8
+#define READDIR_BUF 4096
+
+#define QUOTA_SAFE_INCREMENT(lock, var) \
+ do { \
+ LOCK (lock); \
+ var ++; \
+ UNLOCK (lock); \
+ } while (0)
+
+#define QUOTA_SAFE_DECREMENT(lock, var) \
+ do { \
+ LOCK (lock); \
+ var --; \
+ UNLOCK (lock); \
+ } while (0)
+
+#define QUOTA_ALLOC_OR_GOTO(var, type, label) \
+ do { \
+ var = GF_CALLOC (sizeof (type), 1, \
+ gf_quota_mt_##type); \
+ if (!var) { \
+ gf_log ("", GF_LOG_ERROR, \
+ "out of memory :("); \
+ ret = -1; \
+ goto label; \
+ } \
+ } while (0);
+
+#define QUOTA_STACK_UNWIND(fop, frame, params...) \
+ do { \
+ quota_local_t *_local = NULL; \
+ xlator_t *_this = NULL; \
+ if (frame) { \
+ _local = frame->local; \
+ _this = frame->this; \
+ frame->local = NULL; \
+ } \
+ STACK_UNWIND_STRICT (fop, frame, params); \
+ quota_local_cleanup (_this, _local); \
+ } while (0)
+
+#define QUOTA_FREE_CONTRIBUTION_NODE(_contribution) \
+ do { \
+ list_del (&_contribution->contri_list); \
+ GF_FREE (_contribution); \
+ } while (0)
+
+#define GET_CONTRI_KEY(var, _vol_name, _gfid, _ret) \
+ do { \
+ char _gfid_unparsed[40]; \
+ uuid_unparse (_gfid, _gfid_unparsed); \
+ _ret = gf_asprintf (var, QUOTA_XATTR_PREFIX \
+ "%s.%s." CONTRIBUTION, \
+ _vol_name, _gfid_unparsed); \
+ } while (0)
+
+
+#define GET_CONTRI_KEY_OR_GOTO(var, _vol_name, _gfid, label) \
+ do { \
+ GET_CONTRI_KEY(var, _vol_name, _gfid, ret); \
+ if (ret == -1) \
+ goto label; \
+ } while (0)
+
+#define GET_DIRTY_KEY_OR_GOTO(var, _vol_name, label) \
+ do { \
+ ret = gf_asprintf (var, QUOTA_XATTR_PREFIX \
+ "%s." DIRTY, _vol_name); \
+ if (ret == -1) \
+ goto label; \
+ } while (0)
+
+struct quota_dentry {
+ char *name;
+ uuid_t par;
+ struct list_head next;
+};
+typedef struct quota_dentry quota_dentry_t;
+
+struct quota_inode_ctx {
+ int64_t size;
+ int64_t limit;
+ struct iatt buf;
+ struct list_head parents;
+ struct timeval tv;
+ gf_lock_t lock;
+};
+typedef struct quota_inode_ctx quota_inode_ctx_t;
+
+struct quota_local {
+ gf_lock_t lock;
+ uint32_t validate_count;
+ uint32_t link_count;
+ loc_t loc;
+ loc_t oldloc;
+ loc_t newloc;
+ loc_t validate_loc;
+ int64_t delta;
+ int32_t op_ret;
+ int32_t op_errno;
+ int64_t size;
+ int64_t limit;
+ char just_validated;
+ inode_t *inode;
+ call_stub_t *stub;
+};
+typedef struct quota_local quota_local_t;
+
+struct quota_priv {
+ int64_t timeout;
+ gf_boolean_t consider_statfs;
+ struct list_head limit_head;
+ gf_lock_t lock;
+};
+typedef struct quota_priv quota_priv_t;
+
+struct limits {
+ struct list_head limit_list;
+ char *path;
+ int64_t value;
+ uuid_t gfid;
+};
+typedef struct limits limits_t;
+
+uint64_t cn = 1;
diff --git a/mod_glusterfs/apache/1.3/Makefile.am b/xlators/features/read-only/Makefile.am
index d471a3f92..d471a3f92 100644
--- a/mod_glusterfs/apache/1.3/Makefile.am
+++ b/xlators/features/read-only/Makefile.am
diff --git a/xlators/features/read-only/src/Makefile.am b/xlators/features/read-only/src/Makefile.am
new file mode 100644
index 000000000..4c1462137
--- /dev/null
+++ b/xlators/features/read-only/src/Makefile.am
@@ -0,0 +1,22 @@
+xlator_LTLIBRARIES = read-only.la worm.la
+
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+noinst_HEADERS = read-only-common.h
+
+read_only_la_LDFLAGS = -module -avoid-version
+
+read_only_la_SOURCES = read-only.c read-only-common.c
+read_only_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+worm_la_LDFLAGS = -module -avoid-version
+
+worm_la_SOURCES = read-only-common.c worm.c
+worm_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
+
diff --git a/xlators/features/read-only/src/read-only-common.c b/xlators/features/read-only/src/read-only-common.c
new file mode 100644
index 000000000..56a7a7176
--- /dev/null
+++ b/xlators/features/read-only/src/read-only-common.c
@@ -0,0 +1,239 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include "defaults.h"
+
+int32_t
+ro_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (xattrop, frame, -1, EROFS, NULL, xdata);
+ return 0;
+}
+
+int32_t
+ro_fxattrop (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (fxattrop, frame, -1, EROFS, NULL, xdata);
+ return 0;
+}
+
+int32_t
+ro_entrylk (call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (entrylk, frame, -1, EROFS, xdata);
+ return 0;
+}
+
+int32_t
+ro_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, const char *basename, entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (fentrylk, frame, -1, EROFS, xdata);
+ return 0;
+}
+
+int32_t
+ro_inodelk (call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (inodelk, frame, -1, EROFS, xdata);
+ return 0;
+}
+
+int32_t
+ro_finodelk (call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (finodelk, frame, -1, EROFS, xdata);
+ return 0;
+}
+
+int32_t
+ro_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int cmd,
+ struct gf_flock *flock, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (lk, frame, -1, EROFS, NULL, xdata);
+ return 0;
+}
+
+int32_t
+ro_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (setattr, frame, -1, EROFS, NULL, NULL, xdata);
+ return 0;
+}
+
+int32_t
+ro_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (fsetattr, frame, -1, EROFS, NULL, NULL, xdata);
+ return 0;
+}
+
+
+int32_t
+ro_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (truncate, frame, -1, EROFS, NULL, NULL, xdata);
+ return 0;
+}
+
+int32_t
+ro_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (ftruncate, frame, -1, EROFS, NULL, NULL, xdata);
+ return 0;
+}
+
+int
+ro_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (mknod, frame, -1, EROFS, NULL, NULL, NULL, NULL, xdata);
+ return 0;
+}
+
+
+int
+ro_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (mkdir, frame, -1, EROFS, NULL, NULL, NULL, NULL, xdata);
+ return 0;
+}
+
+int32_t
+ro_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (unlink, frame, -1, EROFS, NULL, NULL, xdata);
+ return 0;
+}
+
+
+int
+ro_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (rmdir, frame, -1, EROFS, NULL, NULL, xdata);
+ return 0;
+}
+
+
+int
+ro_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (symlink, frame, -1, EROFS, NULL, NULL, NULL,
+ NULL, xdata);
+ return 0;
+}
+
+
+
+int32_t
+ro_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (rename, frame, -1, EROFS, NULL, NULL, NULL, NULL,
+ NULL, xdata);
+ return 0;
+}
+
+
+int32_t
+ro_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (link, frame, -1, EROFS, NULL, NULL, NULL, NULL, xdata);
+ return 0;
+}
+
+int32_t
+ro_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (create, frame, -1, EROFS, NULL, NULL, NULL,
+ NULL, NULL, xdata);
+ return 0;
+}
+
+
+static int32_t
+ro_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
+ return 0;
+}
+
+int32_t
+ro_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
+{
+ if (((flags & O_ACCMODE) == O_WRONLY) ||
+ ((flags & O_ACCMODE) == O_RDWR)) {
+ STACK_UNWIND_STRICT (open, frame, -1, EROFS, NULL, xdata);
+ return 0;
+ }
+
+ STACK_WIND (frame, ro_open_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
+ return 0;
+}
+
+int32_t
+ro_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (fsetxattr, frame, -1, EROFS, xdata);
+ return 0;
+}
+
+int32_t
+ro_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (fsyncdir, frame, -1, EROFS, xdata);
+ return 0;
+}
+
+int32_t
+ro_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
+ int32_t count, off_t off, uint32_t flags, struct iobref *iobref, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (writev, frame, -1, EROFS, NULL, NULL, xdata);
+ return 0;
+}
+
+
+int32_t
+ro_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (setxattr, frame, -1, EROFS, xdata);
+ return 0;
+}
+
+int32_t
+ro_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (removexattr, frame, -1, EROFS, xdata);
+ return 0;
+}
diff --git a/xlators/features/read-only/src/read-only-common.h b/xlators/features/read-only/src/read-only-common.h
new file mode 100644
index 000000000..5d4c7e260
--- /dev/null
+++ b/xlators/features/read-only/src/read-only-common.h
@@ -0,0 +1,115 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include "defaults.h"
+
+int32_t
+ro_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata);
+
+int32_t
+ro_fxattrop (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata);
+
+int32_t
+ro_entrylk (call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata);
+
+int32_t
+ro_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, const char *basename, entrylk_cmd cmd, entrylk_type
+ type, dict_t *xdata);
+
+int32_t
+ro_inodelk (call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata);
+
+int32_t
+ro_finodelk (call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata);
+
+int32_t
+ro_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int cmd,
+ struct gf_flock *flock, dict_t *xdata);
+
+int32_t
+ro_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata);
+
+int32_t
+ro_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata);
+
+
+int32_t
+ro_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, dict_t *xdata);
+
+int32_t
+ro_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, dict_t *xdata);
+
+int
+ro_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata);
+
+int
+ro_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata);
+
+int32_t
+ro_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata);
+
+int
+ro_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata);
+
+
+int
+ro_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata);
+
+int32_t
+ro_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, dict_t *xdata);
+
+int32_t
+ro_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, dict_t *xdata);
+
+int32_t
+ro_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata);
+
+int32_t
+ro_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata);
+
+int32_t
+ro_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata);
+
+int32_t
+ro_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict_t *xdata);
+
+int32_t
+ro_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
+ int32_t count, off_t off, uint32_t flags, struct iobref *iobref, dict_t *xdata);
+
+int32_t
+ro_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata);
+
+int32_t
+ro_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata);
diff --git a/xlators/features/read-only/src/read-only.c b/xlators/features/read-only/src/read-only.c
new file mode 100644
index 000000000..e49e54a1b
--- /dev/null
+++ b/xlators/features/read-only/src/read-only.c
@@ -0,0 +1,77 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include "defaults.h"
+#include "read-only-common.h"
+
+int32_t
+init (xlator_t *this)
+{
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "translator not configured with exactly one child");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ return 0;
+}
+
+
+void
+fini (xlator_t *this)
+{
+ return;
+}
+
+
+struct xlator_fops fops = {
+ .mknod = ro_mknod,
+ .mkdir = ro_mkdir,
+ .unlink = ro_unlink,
+ .rmdir = ro_rmdir,
+ .symlink = ro_symlink,
+ .rename = ro_rename,
+ .link = ro_link,
+ .truncate = ro_truncate,
+ .open = ro_open,
+ .writev = ro_writev,
+ .setxattr = ro_setxattr,
+ .fsetxattr = ro_fsetxattr,
+ .removexattr = ro_removexattr,
+ .fsyncdir = ro_fsyncdir,
+ .ftruncate = ro_ftruncate,
+ .create = ro_create,
+ .setattr = ro_setattr,
+ .fsetattr = ro_fsetattr,
+ .xattrop = ro_xattrop,
+ .fxattrop = ro_fxattrop,
+ .inodelk = ro_inodelk,
+ .finodelk = ro_finodelk,
+ .entrylk = ro_entrylk,
+ .fentrylk = ro_fentrylk,
+ .lk = ro_lk,
+};
+
+struct xlator_cbks cbks = {
+};
+
+struct volume_options options[] = {
+ { .key = {NULL} },
+};
diff --git a/xlators/features/read-only/src/worm.c b/xlators/features/read-only/src/worm.c
new file mode 100644
index 000000000..16c3eb3da
--- /dev/null
+++ b/xlators/features/read-only/src/worm.c
@@ -0,0 +1,89 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include "defaults.h"
+#include "read-only-common.h"
+
+static int32_t
+worm_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
+ return 0;
+}
+
+int32_t
+worm_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
+{
+ if ((((flags & O_ACCMODE) == O_WRONLY) ||
+ ((flags & O_ACCMODE) == O_RDWR)) &&
+ !(flags & O_APPEND)) {
+ STACK_UNWIND_STRICT (open, frame, -1, EROFS, NULL, NULL);
+ return 0;
+ }
+
+ STACK_WIND (frame, worm_open_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
+ return 0;
+}
+
+int32_t
+init (xlator_t *this)
+{
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "translator not configured with exactly one child");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ return 0;
+}
+
+
+void
+fini (xlator_t *this)
+{
+ return;
+}
+
+struct xlator_fops fops = {
+ .open = worm_open,
+
+ .unlink = ro_unlink,
+ .rmdir = ro_rmdir,
+ .rename = ro_rename,
+ .truncate = ro_truncate,
+ .removexattr = ro_removexattr,
+ .fsyncdir = ro_fsyncdir,
+ .xattrop = ro_xattrop,
+ .inodelk = ro_inodelk,
+ .finodelk = ro_finodelk,
+ .entrylk = ro_entrylk,
+ .fentrylk = ro_fentrylk,
+ .lk = ro_lk,
+};
+
+struct xlator_cbks cbks;
+
+struct volume_options options[] = {
+ { .key = {NULL} },
+};
+
diff --git a/xlators/features/trash/src/Makefile.am b/xlators/features/trash/src/Makefile.am
index 52fced524..5251eb082 100644
--- a/xlators/features/trash/src/Makefile.am
+++ b/xlators/features/trash/src/Makefile.am
@@ -1,13 +1,16 @@
xlator_LTLIBRARIES = trash.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/testing/features
-trash_la_LDFLAGS = -module -avoidversion
+trash_la_LDFLAGS = -module -avoid-version
trash_la_SOURCES = trash.c
trash_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+noinst_HEADERS = trash.h trash-mem-types.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/features/trash/src/trash-mem-types.h b/xlators/features/trash/src/trash-mem-types.h
new file mode 100644
index 000000000..0e6ef572f
--- /dev/null
+++ b/xlators/features/trash/src/trash-mem-types.h
@@ -0,0 +1,22 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef __TRASH_MEM_TYPES_H__
+#define __TRASH_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_trash_mem_types_ {
+ gf_trash_mt_trash_private_t = gf_common_mt_end + 1,
+ gf_trash_mt_char,
+ gf_trash_mt_trash_elim_pattern_t,
+ gf_trash_mt_end
+};
+#endif
+
diff --git a/xlators/features/trash/src/trash.c b/xlators/features/trash/src/trash.c
index fa5d752ea..addeb66a0 100644
--- a/xlators/features/trash/src/trash.c
+++ b/xlators/features/trash/src/trash.c
@@ -1,520 +1,1392 @@
/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
#endif
+#include "trash.h"
+#include "trash-mem-types.h"
-#include "glusterfs.h"
-#include "logging.h"
-#include "dict.h"
-#include "xlator.h"
-#include "defaults.h"
+int32_t
+trash_ftruncate_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iovec *vector, int32_t count,
+ struct iatt *stbuf, struct iobref *iobuf);
-#include <libgen.h>
+int32_t
+trash_truncate_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf);
-/* TODO: currently it can work only above posix, no other translators
- * between them. Not a good thing. Try making more reliable methods.
- */
+int32_t
+trash_truncate_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent);
-struct trash_struct {
- inode_t *inode;
- loc_t loc1;
- loc_t loc2;
- char origpath[ZR_PATH_MAX];
- char newpath[ZR_PATH_MAX];
- char oldpath[ZR_PATH_MAX]; // used only in case of rename
-};
-typedef struct trash_struct trash_local_t;
+int32_t
+trash_unlink_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent);
-struct trash_priv {
- char trash_dir[ZR_PATH_MAX];
-};
-typedef struct trash_priv trash_private_t;
+void
+trash_local_wipe (trash_local_t *local)
+{
+ if (!local)
+ goto out;
+
+ loc_wipe (&local->loc);
+ loc_wipe (&local->newloc);
+
+ if (local->fd)
+ fd_unref (local->fd);
+
+ if (local->newfd)
+ fd_unref (local->newfd);
+
+ mem_put (local);
+out:
+ return;
+}
int32_t
-trash_unlink_rename_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf);
+trash_common_unwind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent)
+{
+ TRASH_STACK_UNWIND (unlink, frame, op_ret, op_errno, preparent, postparent);
+ return 0;
+}
+
int32_t
-trash_rename_rename_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf);
+trash_unlink_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent)
+{
+ trash_local_t *local = NULL;
+ char *tmp_str = NULL;
+ char *tmp_path = NULL;
+ char *tmp_dirname = NULL;
+ char *dir_name = NULL;
+ int32_t count = 0;
+ int32_t loop_count = 0;
+ int i = 0;
+ loc_t tmp_loc = {0,};
+
+ local = frame->local;
+ tmp_str = gf_strdup (local->newpath);
+ if (!tmp_str) {
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto out;
+ }
+ loop_count = local->loop_count;
+
+ if ((op_ret == -1) && (op_errno == ENOENT)) {
+ tmp_dirname = strchr (tmp_str, '/');
+ while (tmp_dirname) {
+ count = tmp_dirname - tmp_str;
+ if (count == 0)
+ count = 1;
+ i++;
+ if (i > loop_count)
+ break;
+ tmp_dirname = strchr (tmp_str + count + 1, '/');
+ }
+ tmp_path = memdup (local->newpath, count);
+ if (!tmp_path) {
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto out;
+ }
+
+ tmp_loc.path = tmp_path;
+
+ /* TODO:create the directory with proper permissions */
+ STACK_WIND_COOKIE (frame, trash_unlink_mkdir_cbk, tmp_path,
+ this->children->xlator,
+ this->children->xlator->fops->mkdir,
+ &tmp_loc, 0755, NULL);
+
+ goto out;
+ }
+
+ if (op_ret == 0) {
+ dir_name = dirname (tmp_str);
+ if (strcmp((char*)cookie, dir_name) == 0) {
+ tmp_loc.path = local->newpath;
+ STACK_WIND (frame, trash_unlink_rename_cbk,
+ this->children->xlator,
+ this->children->xlator->fops->rename,
+ &local->loc, &tmp_loc);
+ goto out;
+ }
+ }
+
+ LOCK (&frame->lock);
+ {
+ loop_count = ++local->loop_count;
+ }
+ UNLOCK (&frame->lock);
+ tmp_dirname = strchr (tmp_str, '/');
+ while (tmp_dirname) {
+ count = tmp_dirname - tmp_str;
+ if (count == 0)
+ count = 1;
+ i++;
+ if ((i > loop_count) || (count > PATH_MAX))
+ break;
+ tmp_dirname = strchr (tmp_str + count + 1, '/');
+ }
+ tmp_path = memdup (local->newpath, count);
+ if (!tmp_path) {
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto out;
+ }
+ tmp_loc.path = tmp_path;
+
+ STACK_WIND_COOKIE (frame, trash_unlink_mkdir_cbk, tmp_path,
+ this->children->xlator,
+ this->children->xlator->fops->mkdir,
+ &tmp_loc, 0755, NULL);
+
+out:
+ GF_FREE (cookie);
+ GF_FREE (tmp_str);
+
+ return 0;
+}
-/**
- * trash_common_unwind_cbk -
- */
int32_t
-trash_common_unwind_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+trash_rename_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent);
+
+int32_t
+trash_unlink_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent)
{
- trash_local_t *local = frame->local;
+ trash_local_t *local = NULL;
+ char *tmp_str = NULL;
+ char *dir_name = NULL;
+ char *tmp_cookie = NULL;
+ loc_t tmp_loc = {0,};
+
+ local = frame->local;
+
+ if ((op_ret == -1) && (op_errno == ENOENT)) {
+ tmp_str = gf_strdup (local->newpath);
+ if (!tmp_str) {
+ gf_log (this->name, GF_LOG_DEBUG, "out of memory");
+ }
+ dir_name = dirname (tmp_str);
+
+ tmp_loc.path = dir_name;
+
+ tmp_cookie = gf_strdup (dir_name);
+ if (!tmp_cookie) {
+ gf_log (this->name, GF_LOG_DEBUG, "out of memory");
+ }
+ /* TODO: create the directory with proper permissions */
+ STACK_WIND_COOKIE (frame, trash_unlink_mkdir_cbk, tmp_cookie,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir,
+ &tmp_loc, 0755, NULL);
+
+ GF_FREE (tmp_str);
+
+ return 0;
+ }
+
+ if ((op_ret == -1) && (op_errno == ENOTDIR)) {
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "target(%s) exists, cannot keep the copy, deleting",
+ local->newpath);
+
+ STACK_WIND (frame, trash_common_unwind_cbk,
+ this->children->xlator,
+ this->children->xlator->fops->unlink, &local->loc);
+
+ return 0;
+ }
+
+ if ((op_ret == -1) && (op_errno == EISDIR)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "target(%s) exists as directory, cannot keep copy, "
+ "deleting", local->newpath);
+
+ STACK_WIND (frame, trash_common_unwind_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, &local->loc);
+ return 0;
+ }
+
+ /* All other cases, unlink should return success */
+ TRASH_STACK_UNWIND (unlink, frame, 0, op_errno, &local->preparent,
+ &local->postparent);
+
+ return 0;
+}
+
+
+
+int32_t
+trash_common_unwind_buf_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf)
+{
+ TRASH_STACK_UNWIND (truncate, frame, op_ret, op_errno, prebuf, postbuf);
+ return 0;
+}
+
+int
+trash_common_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent)
+{
+ TRASH_STACK_UNWIND (rename, frame, op_ret, op_errno, stbuf, preoldparent,
+ postoldparent, prenewparent, postnewparent);
+ return 0;
+}
+
+
+int32_t
+trash_unlink_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf)
+{
+ trash_private_t *priv = NULL;
+ trash_local_t *local = NULL;
+ loc_t new_loc = {0,};
+
+ priv = this->private;
+ local = frame->local;
+
+ if (-1 == op_ret) {
+ gf_log (this->name, GF_LOG_DEBUG, "%s: %s",
+ local->loc.path, strerror (op_errno));
+ goto fail;
+ }
+
+ if ((buf->ia_size == 0) ||
+ (buf->ia_size > priv->max_trash_file_size)) {
+ /* if the file is too big or zero, just unlink it */
+
+ if (buf->ia_size > priv->max_trash_file_size) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s: file size too big (%"PRId64") to "
+ "move into trash directory",
+ local->loc.path, buf->ia_size);
+ }
+
+ STACK_WIND (frame, trash_common_unwind_cbk,
+ this->children->xlator,
+ this->children->xlator->fops->unlink, &local->loc);
+ return 0;
+ }
+
+ new_loc.path = local->newpath;
+
+ STACK_WIND (frame, trash_unlink_rename_cbk,
+ this->children->xlator,
+ this->children->xlator->fops->rename,
+ &local->loc, &new_loc);
+
+ return 0;
+
+fail:
+ TRASH_STACK_UNWIND (unlink, frame, op_ret, op_errno, buf,
+ NULL);
+
+ return 0;
+
+}
+
+int32_t
+trash_rename_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent)
+{
+ trash_local_t *local = NULL;
+ char *tmp_str = NULL;
+ char *dir_name = NULL;
+ char *tmp_path = NULL;
+ loc_t tmp_loc = {0,};
+
+ local = frame->local;
+ if ((op_ret == -1) && (op_errno == ENOENT)) {
+ tmp_str = gf_strdup (local->newpath);
+ if (!tmp_str) {
+ gf_log (this->name, GF_LOG_DEBUG, "out of memory");
+ }
+ dir_name = dirname (tmp_str);
+
+ /* check for the errno, if its ENOENT create directory and call
+ * rename later
+ */
+ tmp_path = gf_strdup (dir_name);
+ if (!tmp_path) {
+ gf_log (this->name, GF_LOG_DEBUG, "out of memory");
+ }
+ tmp_loc.path = tmp_path;
+
+ /* TODO: create the directory with proper permissions */
+ STACK_WIND_COOKIE (frame, trash_rename_mkdir_cbk, tmp_path,
+ this->children->xlator,
+ this->children->xlator->fops->mkdir,
+ &tmp_loc, 0755, NULL);
+
+ GF_FREE (tmp_str);
+ return 0;
+ }
+
+ if ((op_ret == -1) && (op_errno == ENOTDIR)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "target(%s) exists, cannot keep the dest entry(%s): "
+ "renaming", local->newpath, local->origpath);
+ } else if ((op_ret == -1) && (op_errno == EISDIR)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "target(%s) exists as a directory, cannot keep the "
+ "copy (%s), renaming", local->newpath, local->origpath);
+ }
+
+ STACK_WIND (frame, trash_common_rename_cbk,
+ this->children->xlator,
+ this->children->xlator->fops->rename, &local->loc,
+ &local->newloc);
+
+ return 0;
+}
+
+
+int32_t
+trash_rename_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent)
+{
+ trash_local_t *local = NULL;
+ char *tmp_str = NULL;
+ char *tmp_path = NULL;
+ char *tmp_dirname = NULL;
+ char *dir_name = NULL;
+ int32_t count = 0;
+ loc_t tmp_loc = {0,};
+
+ local = frame->local;
+ tmp_str = gf_strdup (local->newpath);
+ if (!tmp_str) {
+ gf_log (this->name, GF_LOG_DEBUG, "out of memory");
+ goto out;
+ }
+
+ if ((op_ret == -1) && (op_errno == ENOENT)) {
+ tmp_dirname = strchr (tmp_str, '/');
+ while (tmp_dirname) {
+ count = tmp_dirname - tmp_str;
+ if (count == 0)
+ count = 1;
+
+ tmp_dirname = strchr (tmp_str + count + 1, '/');
+
+ tmp_path = memdup (local->newpath, count);
+ if (!tmp_path) {
+ gf_log (this->name, GF_LOG_DEBUG, "out of memory");
+ }
+
+ tmp_loc.path = tmp_path;
+
+ /* TODO: create the directory with proper permissions */
+ STACK_WIND_COOKIE (frame, trash_rename_mkdir_cbk,
+ tmp_path, this->children->xlator,
+ this->children->xlator->fops->mkdir,
+ &tmp_loc, 0755, NULL);
+ }
- if (!local)
goto out;
+ }
- if (local->loc1.path)
- loc_wipe (&local->loc1);
-
- if (local->loc2.path)
- loc_wipe (&local->loc2);
+ dir_name = dirname (tmp_str);
+ if (strcmp ((char*)cookie, dir_name) == 0) {
+ tmp_loc.path = local->newpath;
- out:
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
+ STACK_WIND (frame, trash_rename_rename_cbk,
+ this->children->xlator,
+ this->children->xlator->fops->rename,
+ &local->newloc, &tmp_loc);
+ }
+
+out:
+ GF_FREE (cookie); /* strdup (dir_name) was sent here :) */
+ GF_FREE (tmp_str);
+
+ return 0;
}
-/**
- * trash_common_unwind_buf_cbk -
- */
int32_t
-trash_common_unwind_buf_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+trash_rename_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ trash_private_t *priv = NULL;
+ trash_local_t *local = NULL;
+ loc_t tmp_loc = {0,};
+
+ local = frame->local;
+ priv = this->private;
+
+ if (op_ret == -1) {
+ STACK_WIND (frame, trash_common_rename_cbk,
+ this->children->xlator,
+ this->children->xlator->fops->rename,
+ &local->loc, &local->newloc);
+ return 0;
+ }
+ if ((buf->ia_size == 0) ||
+ (buf->ia_size > priv->max_trash_file_size)) {
+ /* if the file is too big or zero, just unlink it */
+
+ if (buf->ia_size > priv->max_trash_file_size) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s: file size too big (%"PRId64") to "
+ "move into trash directory",
+ local->newloc.path, buf->ia_size);
+ }
+
+ STACK_WIND (frame, trash_common_rename_cbk,
+ this->children->xlator,
+ this->children->xlator->fops->rename,
+ &local->loc, &local->newloc);
+ return 0;
+ }
+
+ tmp_loc.path = local->newpath;
+
+ STACK_WIND (frame, trash_rename_rename_cbk,
+ this->children->xlator,
+ this->children->xlator->fops->rename,
+ &local->newloc, &tmp_loc);
+
+ return 0;
+}
+
+
+int32_t
+trash_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc)
+{
+ trash_elim_pattern_t *trav = NULL;
+ trash_private_t *priv = NULL;
+ trash_local_t *local = NULL;
+ char timestr[64] = {0,};
+ int32_t match = 0;
+
+ priv = this->private;
+ if (priv->eliminate) {
+ trav = priv->eliminate;
+ while (trav) {
+ if (fnmatch(trav->pattern, newloc->name, 0) == 0) {
+ match++;
+ break;
+ }
+ trav = trav->next;
+ }
+ }
+
+ if ((strncmp (oldloc->path, priv->trash_dir,
+ strlen (priv->trash_dir)) == 0) || match) {
+ /* Trying to rename from the trash dir,
+ do the actual rename */
+ STACK_WIND (frame, trash_common_rename_cbk,
+ this->children->xlator,
+ this->children->xlator->fops->rename,
+ oldloc, newloc);
+
+ return 0;
+ }
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ TRASH_STACK_UNWIND (rename, frame, -1, ENOMEM,
+ NULL, NULL, NULL, NULL, NULL);
+ return 0;
+ }
+
+ frame->local = local;
+ loc_copy (&local->loc, oldloc);
+
+ loc_copy (&local->newloc, newloc);
+
+ strcpy (local->origpath, newloc->path);
+ strcpy (local->newpath, priv->trash_dir);
+ strcat (local->newpath, newloc->path);
+
+ {
+ /* append timestamp to file name */
+ /* TODO: can we make it optional? */
+ gf_time_ftm (timestr, sizeof timestr, time (NULL),
+ gf_timefmt_F_HMS);
+ strcat (local->newpath, timestr);
+ }
+
+ /* Send a lookup call on newloc, to ensure we are not
+ overwriting */
+ STACK_WIND (frame, trash_rename_lookup_cbk,
+ this->children->xlator,
+ this->children->xlator->fops->lookup, newloc, 0);
+
+ return 0;
+}
+
+int32_t
+trash_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
+{
+ trash_elim_pattern_t *trav = NULL;
+ trash_private_t *priv = NULL;
+ trash_local_t *local = NULL;
+ char timestr[64] = {0,};
+ int32_t match = 0;
+
+ priv = this->private;
+
+ if (priv->eliminate) {
+ trav = priv->eliminate;
+ while (trav) {
+ if (fnmatch(trav->pattern, loc->name, 0) == 0) {
+ match++;
+ break;
+ }
+ trav = trav->next;
+ }
+ }
+
+ if ((strncmp (loc->path, priv->trash_dir,
+ strlen (priv->trash_dir)) == 0) || (match)) {
+ if (match) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s: file matches eliminate pattern, "
+ "not moved to trash", loc->name);
+ } else {
+ /* unlink from the trash-dir, not keeping any copy */
+ ;
+ }
+
+ STACK_WIND (frame, trash_common_unwind_cbk,
+ this->children->xlator,
+ this->children->xlator->fops->unlink, loc);
+ return 0;
+ }
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ gf_log (this->name, GF_LOG_DEBUG, "out of memory");
+ TRASH_STACK_UNWIND (unlink, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+ }
+ frame->local = local;
+ loc_copy (&local->loc, loc);
+
+ strcpy (local->origpath, loc->path);
+ strcpy (local->newpath, priv->trash_dir);
+ strcat (local->newpath, loc->path);
+
+ {
+ /* append timestamp to file name */
+ /* TODO: can we make it optional? */
+ gf_time_fmt (timestr, sizeof timestr, time (NULL),
+ gf_timefmt_F_HMS);
+ strcat (local->newpath, timestr);
+ }
+
+ LOCK_INIT (&frame->lock);
+
+ STACK_WIND (frame, trash_unlink_stat_cbk,
+ this->children->xlator,
+ this->children->xlator->fops->stat, loc);
+
+ return 0;
+}
+
+int32_t
+trash_truncate_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent)
+{
+ /* use this Function when a failure occurs, and
+ delete the newly created file. */
+ trash_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "deleting the newly created file: %s",
+ strerror (op_errno));
+ }
+
+ STACK_WIND (frame, trash_common_unwind_buf_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->truncate,
+ &local->loc, local->fop_offset);
+
+ return 0;
+}
+
+int32_t
+trash_truncate_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iovec *vector, int32_t count,
+ struct iatt *stbuf, struct iobref *iobuf)
+{
+ trash_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "readv on the existing file failed: %s",
+ strerror (op_errno));
+
+ STACK_WIND (frame, trash_truncate_unlink_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->unlink,
+ &local->newloc);
+ goto out;
+ }
+
+ local->fsize = stbuf->ia_size;
+ STACK_WIND (frame, trash_truncate_writev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev,
+ local->newfd, vector, count, local->cur_offset, 0, iobuf);
+
+out:
+ return 0;
+
+}
+
+int32_t
+trash_truncate_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf)
+{
+ trash_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret == -1) {
+ /* Let truncate work, but previous copy is not preserved. */
+ gf_log (this->name, GF_LOG_DEBUG,
+ "writev on the existing file failed: %s",
+ strerror (op_errno));
+
+ STACK_WIND (frame, trash_truncate_unlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, &local->newloc);
+ goto out;
+ }
+
+ if (local->cur_offset < local->fsize) {
+ local->cur_offset += GF_BLOCK_READV_SIZE;
+ /* Loop back and Read the contents again. */
+ STACK_WIND (frame, trash_truncate_readv_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->readv,
+ local->fd, (size_t)GF_BLOCK_READV_SIZE,
+ local->cur_offset, 0);
+ goto out;
+ }
+
+
+ /* OOFH.....Finally calling Truncate. */
+ STACK_WIND (frame, trash_common_unwind_buf_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, &local->loc,
+ local->fop_offset);
+
+out:
+ return 0;
+}
+
+
+
+int32_t
+trash_truncate_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd)
{
- trash_local_t *local = frame->local;
+ trash_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret == -1) {
+ //Let truncate work, but previous copy is not preserved.
+ gf_log (this->name, GF_LOG_DEBUG,
+ "open on the existing file failed: %s",
+ strerror (op_errno));
+
+ STACK_WIND (frame, trash_truncate_unlink_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->unlink,
+ &local->newloc);
+ goto out;
+ }
+
+ local->cur_offset = local->fop_offset;
+
+ STACK_WIND (frame, trash_truncate_readv_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->readv,
+ local->fd, (size_t)GF_BLOCK_READV_SIZE, local->cur_offset, 0);
+out:
+ return 0;
+}
+
+
+int32_t
+trash_truncate_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent)
+{
+ trash_local_t *local = NULL;
+ char *tmp_str = NULL;
+ char *dir_name = NULL;
+ char *tmp_path = NULL;
+ int32_t flags = 0;
+ loc_t tmp_loc = {0,};
+
+ local = frame->local;
+
+ if ((op_ret == -1) && (op_errno == ENOENT)) {
+ //Creating the directory structure here.
+ tmp_str = gf_strdup (local->newpath);
+ if (!tmp_str) {
+ gf_log (this->name, GF_LOG_DEBUG, "out of memory");
+ }
+ dir_name = dirname (tmp_str);
+
+ tmp_path = gf_strdup (dir_name);
+ if (!tmp_path) {
+ gf_log (this->name, GF_LOG_DEBUG, "out of memory");
+ }
+ tmp_loc.path = tmp_path;
+
+ /* TODO: create the directory with proper permissions */
+ STACK_WIND_COOKIE (frame, trash_truncate_mkdir_cbk,
+ tmp_path, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir,
+ &tmp_loc, 0755, NULL);
+ GF_FREE (tmp_str);
+ goto out;
+ }
+
+ if (op_ret == -1) {
+ //Let truncate work, but previous copy is not preserved.
+ //Deleting the newly created copy.
+ gf_log (this->name, GF_LOG_DEBUG,
+ "creation of new file in trash-dir failed, "
+ "when truncate was called: %s", strerror (op_errno));
+
+ STACK_WIND (frame, trash_common_unwind_buf_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, &local->loc,
+ local->fop_offset);
+ goto out;
+ }
+
+ flags = O_RDONLY;
+
+ local->fd = fd_create (local->loc.inode, frame->root->pid);
+
+ STACK_WIND (frame, trash_truncate_open_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, &local->loc, flags,
+ local->fd, 0);
+out:
+ return 0;
+}
+
+int32_t
+trash_truncate_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent)
+{
+ trash_local_t *local = NULL;
+ char *tmp_str = NULL;
+ char *tmp_path = NULL;
+ char *tmp_dirname = NULL;
+ char *dir_name = NULL;
+ int32_t count = 0;
+ int32_t flags = 0;
+ int32_t loop_count = 0;
+ int i = 0;
+ loc_t tmp_loc = {0,};
+
+ local = frame->local;
if (!local)
goto out;
- if (local->loc1.path)
- loc_wipe (&local->loc1);
-
- if (local->loc2.path)
- loc_wipe (&local->loc2);
+ loop_count = local->loop_count;
+
+ tmp_str = gf_strdup (local->newpath);
+ if (!tmp_str) {
+ gf_log (this->name, GF_LOG_DEBUG, "out of memory");
+ goto out;
+ }
+
+ if ((op_ret == -1) && (op_errno == ENOENT)) {
+ tmp_dirname = strchr (tmp_str, '/');
+ while (tmp_dirname) {
+ count = tmp_dirname - tmp_str;
+ if (count == 0)
+ count = 1;
+ i++;
+ if (i > loop_count)
+ break;
+ tmp_dirname = strchr (tmp_str + count + 1, '/');
+ }
+ tmp_path = memdup (local->newpath, count);
+ if (!tmp_path) {
+ gf_log (this->name, GF_LOG_DEBUG, "out of memory");
+ }
+ tmp_loc.path = tmp_path;
+ STACK_WIND_COOKIE (frame, trash_truncate_mkdir_cbk,
+ tmp_path, this->children->xlator,
+ this->children->xlator->fops->mkdir,
+ &tmp_loc, 0755, NULL);
- out:
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
+ goto out;
+ }
+
+ if (op_ret == 0) {
+ dir_name = dirname (tmp_str);
+ if (strcmp ((char*)cookie, dir_name) == 0) {
+ flags = O_CREAT|O_EXCL|O_WRONLY;
+ ia_prot_t prot = {0, };
+
+ //Call create again once directory structure is created.
+ STACK_WIND (frame, trash_truncate_create_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->create,
+ &local->newloc, flags,
+ st_mode_from_ia (prot, local->loc.inode->ia_type),
+ local->newfd, NULL);
+ goto out;
+ }
+ }
+
+ LOCK (&frame->lock);
+ {
+ loop_count = ++local->loop_count;
+ }
+ UNLOCK (&frame->lock);
+
+ tmp_dirname = strchr (tmp_str, '/');
+ while (tmp_dirname) {
+ count = tmp_dirname - tmp_str;
+ if (count == 0)
+ count = 1;
+
+ i++;
+ if ((i > loop_count) || (count > PATH_MAX))
+ break;
+ tmp_dirname = strchr (tmp_str + count + 1, '/');
+ }
+ tmp_path = memdup (local->newpath, count);
+ if (!tmp_path) {
+ gf_log (this->name, GF_LOG_DEBUG, "out of memory");
+ }
+ tmp_loc.path = tmp_path;
+
+ STACK_WIND_COOKIE (frame, trash_truncate_mkdir_cbk, tmp_path,
+ this->children->xlator,
+ this->children->xlator->fops->mkdir,
+ &tmp_loc, 0755, NULL);
+
+out:
+ GF_FREE (cookie); /* strdup (dir_name) was sent here :) */
+ GF_FREE (tmp_str);
+
+ return 0;
}
+
int32_t
-trash_mkdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *stbuf)
+trash_truncate_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf)
{
- trash_local_t *local = frame->local;
- char *tmp_str = strdup (local->newpath);
- int32_t count = 0;
- char *tmp_path = NULL;
- char *tmp_dirname = NULL;
-
- if (op_ret == -1 && op_errno == ENOENT) {
- tmp_dirname = strchr (tmp_str, '/');
- while (tmp_dirname) {
- count = tmp_dirname - tmp_str;
- if (count == 0)
- count = 1;
- tmp_path = CALLOC (1, count + 1);
- ERR_ABORT (tmp_path);
- memcpy (tmp_path, local->newpath, count);
- loc_t tmp_loc = {
- .inode = NULL,
- .path = tmp_path,
- };
-
- /* TODO:create the directory with proper permissions */
- STACK_WIND_COOKIE (frame,
- trash_mkdir_cbk,
- tmp_path,
- this->children->xlator,
- this->children->xlator->fops->mkdir,
- &tmp_loc,
- 0777);
- tmp_dirname = strchr (tmp_str + count + 1, '/');
- }
- free (cookie);
- free (tmp_str);
- return 0;
- }
- char *dir_name = dirname (tmp_str);
- if (strcmp((char*)cookie, dir_name) == 0) {
- loc_t new_loc = {
- .inode = NULL,
- .path = local->newpath
- };
- STACK_WIND (frame,
- trash_unlink_rename_cbk,
- this->children->xlator,
- this->children->xlator->fops->rename,
- &local->loc2,
- &new_loc);
-
- }
- free (cookie); /* strdup (dir_name) was sent here :) */
- free (tmp_str);
- return 0;
+ trash_private_t *priv = NULL;
+ trash_local_t *local = NULL;
+ char timestr[64] = {0,};
+ char loc_newname[PATH_MAX] = {0,};
+ int32_t flags = 0;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "fstat on the file failed: %s",
+ strerror (op_errno));
+
+ TRASH_STACK_UNWIND (truncate, frame, op_ret, op_errno, buf, NULL);
+ return 0;
+ }
+
+ if ((buf->ia_size == 0) || (buf->ia_size > priv->max_trash_file_size)) {
+ // If the file is too big, just unlink it.
+ if (buf->ia_size > priv->max_trash_file_size)
+ gf_log (this->name, GF_LOG_DEBUG, "%s: file too big, "
+ "not moving to trash", local->loc.path);
+
+ STACK_WIND (frame, trash_common_unwind_buf_cbk,
+ this->children->xlator,
+ this->children->xlator->fops->truncate,
+ &local->loc, local->fop_offset);
+ return 0;
+ }
+
+ strcpy (local->newpath, priv->trash_dir);
+ strcat (local->newpath, local->loc.path);
+
+ {
+ gf_time_fmt (timestr, sizeof timestr, time (NULL),
+ gf_timefmt_F_HMS);
+ strcat (local->newpath, timestr);
+ }
+ strcpy (loc_newname,local->loc.name);
+ strcat (loc_newname,timestr);
+
+ local->newloc.name = gf_strdup (loc_newname);
+ local->newloc.path = gf_strdup (local->newpath);
+ local->newloc.inode = inode_new (local->loc.inode->table);
+ local->newfd = fd_create (local->newloc.inode, frame->root->pid);
+
+ flags = O_CREAT|O_EXCL|O_WRONLY;
+
+ STACK_WIND (frame, trash_truncate_create_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create,
+ &local->newloc, flags,
+ st_mode_from_ia (buf->ia_prot, local->loc.inode->ia_type),
+ local->newfd, NULL);
+
+ return 0;
}
-/**
- * trash_unlink_rename_cbk -
- */
int32_t
-trash_unlink_rename_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+trash_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ off_t offset)
{
- trash_local_t *local = frame->local;
- if (op_ret == -1 && op_errno == ENOENT) {
- /* check for the errno, if its ENOENT create directory and call
- * rename later
- */
- char *tmp_str = strdup (local->newpath);
- char *dir_name = dirname (tmp_str);
- loc_t tmp_loc = {
- .inode = NULL,
- .path = dir_name,
- };
- /* TODO: create the directory with proper permissions */
- STACK_WIND_COOKIE (frame,
- trash_mkdir_cbk,
- strdup (dir_name),
- this->children->xlator,
- this->children->xlator->fops->mkdir,
- &tmp_loc,
- 0777);
- free (tmp_str);
- } else if (op_ret == -1 && op_errno == ENOTDIR) {
- gf_log (this->name, GF_LOG_WARNING,
- "Target exists, cannot keep the copy, deleting");
- STACK_WIND (frame,
- trash_common_unwind_cbk,
- this->children->xlator,
- this->children->xlator->fops->unlink,
- &local->loc2);
- } else if (op_ret == -1 && op_errno == EISDIR) {
- gf_log (this->name, GF_LOG_WARNING,
- "Target exists as a directory, cannot keep the copy, "
- "deleting");
- STACK_WIND (frame,
- trash_common_unwind_cbk,
- this->children->xlator,
- this->children->xlator->fops->unlink,
- &local->loc2);
- } else {
- /* */
- STACK_UNWIND (frame, 0, op_errno);
- }
-
- return 0;
+ trash_elim_pattern_t *trav = NULL;
+ trash_private_t *priv = NULL;
+ trash_local_t *local = NULL;
+ int32_t match = 0;
+
+ priv = this->private;
+ if (priv->eliminate) {
+ trav = priv->eliminate;
+ while (trav) {
+ if (fnmatch(trav->pattern, loc->name, 0) == 0) {
+ match++;
+ break;
+ }
+ trav = trav->next;
+ }
+ }
+
+ if ((strncmp (loc->path, priv->trash_dir,
+ strlen (priv->trash_dir)) == 0) || (offset) || (match)) {
+ if (match) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s: file not moved to trash as per option "
+ "'eliminate'", loc->path);
+ }
+
+ // Trying to truncate from the trash can dir,
+ // do the actual truncate without moving to trash-dir.
+ STACK_WIND (frame, trash_common_unwind_buf_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset);
+ goto out;
+ }
+
+ LOCK_INIT (&frame->lock);
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ gf_log (this->name, GF_LOG_DEBUG, "out of memory");
+ TRASH_STACK_UNWIND (truncate, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+ }
+
+ loc_copy (&local->loc, loc);
+
+ local->fop_offset = offset;
+
+ frame->local = local;
+
+ STACK_WIND (frame, trash_truncate_stat_cbk,
+ this->children->xlator,
+ this->children->xlator->fops->stat, loc);
+
+out:
+ return 0;
}
+int32_t
+trash_ftruncate_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent)
+{
+ trash_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s: failed to unlink new file: %s",
+ local->newloc.path, strerror(op_errno));
+
+ }
+
+ STACK_WIND (frame, trash_common_unwind_buf_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->ftruncate,
+ local->fd, local->fop_offset);
+
+ return 0;
+}
-/**
- * trash_unlink -
- */
int32_t
-trash_unlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
+trash_ftruncate_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf)
{
- trash_private_t *priv = this->private;
- trash_local_t *local = NULL;
- time_t utime = 0;
- struct tm *tm = NULL;
- char timestr[256];
-
- if (strncmp (loc->path, priv->trash_dir,
- strlen(priv->trash_dir)) == 0) {
- /* Trying to rename from the trash can dir, do the
- actual unlink */
- STACK_WIND (frame,
- trash_common_unwind_cbk,
- this->children->xlator,
- this->children->xlator->fops->unlink,
- loc);
- } else {
- local = CALLOC (1, sizeof (trash_local_t));
- if (!local) {
- STACK_UNWIND (frame, -1, ENOMEM);
- return 0;
- }
- frame->local = local;
-
- loc_copy (&local->loc2, loc);
-
- strcpy (local->newpath, priv->trash_dir);
- strcat (local->newpath, loc->path);
-
- utime = time (NULL);
- tm = localtime (&utime);
- strftime (timestr, 256, ".%Y%m%d%H%M%S", tm);
- strcat (local->newpath, timestr);
-
- {
- loc_t new_loc = {
- .inode = NULL,
- .path = local->newpath
- };
- STACK_WIND (frame,
- trash_unlink_rename_cbk,
- this->children->xlator,
- this->children->xlator->fops->rename,
- loc,
- &new_loc);
- }
- }
- return 0;
+ trash_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret == -1) {
+ STACK_WIND (frame, trash_ftruncate_unlink_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->unlink,
+ &local->newloc);
+ return 0;
+ }
+
+ if (local->cur_offset < local->fsize) {
+ local->cur_offset += GF_BLOCK_READV_SIZE;
+ STACK_WIND (frame, trash_ftruncate_readv_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->readv,
+ local->fd, (size_t)GF_BLOCK_READV_SIZE,
+ local->cur_offset, 0);
+ return 0;
+ }
+
+ STACK_WIND (frame, trash_common_unwind_buf_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, local->fd,
+ local->fop_offset);
+
+ return 0;
}
-/* */
+
int32_t
-trash_rename_mkdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *stbuf)
+trash_ftruncate_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iovec *vector, int32_t count,
+ struct iatt *stbuf, struct iobref *iobuf)
{
- trash_local_t *local = frame->local;
- char *tmp_str = strdup (local->newpath);
-
- if (op_ret == -1 && op_errno == ENOENT) {
- int32_t count = 0;
- char *tmp_path = NULL;
- char *tmp_dirname = strchr (tmp_str, '/');
-
- while (tmp_dirname) {
- count = tmp_dirname - tmp_str;
- if (count == 0)
- count = 1;
- tmp_path = CALLOC (1, count + 2);
- ERR_ABORT (tmp_path);
- memcpy (tmp_path, local->newpath, count);
- loc_t tmp_loc = {
- .inode = NULL,
- .path = tmp_path,
- };
-
- /* TODO:create the directory with proper permissions */
- STACK_WIND_COOKIE (frame,
- trash_rename_mkdir_cbk,
- tmp_path,
- this->children->xlator,
- this->children->xlator->fops->mkdir,
- &tmp_loc,
- 0777);
- tmp_dirname = strchr (tmp_str + count + 1, '/');
- }
- free (cookie);
- free (tmp_str);
- return 0;
- }
- char *dir_name = dirname (tmp_str);
- if (strcmp((char*)cookie, dir_name) == 0) {
- loc_t new_loc = {
- .inode = NULL,
- .path = local->newpath
- };
- STACK_WIND (frame,
- trash_rename_rename_cbk,
- this->children->xlator,
- this->children->xlator->fops->rename,
- &local->loc2,
- &new_loc);
-
- }
- free (cookie); /* strdup (dir_name) was sent here :) */
- free (tmp_str);
- return 0;
+ trash_local_t *local = NULL;
+
+ local = frame->local;
+ local->fsize = stbuf->ia_size;
+
+ if (op_ret == -1) {
+ STACK_WIND (frame, trash_ftruncate_unlink_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->unlink,
+ &local->newloc);
+ return 0;
+ }
+
+ STACK_WIND (frame, trash_ftruncate_writev_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev,
+ local->newfd, vector, count, local->cur_offset, 0, NULL);
+
+ return 0;
}
-/**
- * trash_unlink_rename_cbk -
- */
int32_t
-trash_rename_rename_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+trash_ftruncate_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent)
{
- trash_local_t *local = frame->local;
- if (op_ret == -1 && op_errno == ENOENT) {
- /* check for the errno, if its ENOENT create directory and call
- * rename later
- */
- char *tmp_str = strdup (local->newpath);
- char *dir_name = dirname (tmp_str);
- loc_t tmp_loc = {
- .inode = NULL,
- .path = dir_name,
- };
- /* TODO: create the directory with proper permissions */
- STACK_WIND_COOKIE (frame,
- trash_rename_mkdir_cbk,
- strdup (dir_name),
- this->children->xlator,
- this->children->xlator->fops->mkdir,
- &tmp_loc,
- 0777);
- free (tmp_str);
- return 0;
- } else if (op_ret == -1 && op_errno == ENOTDIR) {
- gf_log (this->name, GF_LOG_WARNING,
- "Target exists, cannot keep the dest entry %s, "
- "renaming",
- local->loc2.path);
- } else if (op_ret == -1 && op_errno == EISDIR) {
- gf_log (this->name, GF_LOG_WARNING,
- "Target exists as a directory, cannot keep the "
- "copy %s, renaming",
- local->loc2.path);
- }
- loc_t new_loc = {
- .inode = NULL,
- .parent = local->loc2.parent,
- .path = local->loc2.path,
- };
- STACK_WIND (frame,
- trash_common_unwind_buf_cbk,
- this->children->xlator,
- this->children->xlator->fops->rename,
- &local->loc1,
- &new_loc);
-
- return 0;
+ trash_local_t *local = NULL;
+ char *tmp_str = NULL;
+ char *dir_name = NULL;
+ char *tmp_path = NULL;
+ loc_t tmp_loc = {0,};
+
+ local = frame->local;
+
+ if ((op_ret == -1) && (op_errno == ENOENT)) {
+ tmp_str = gf_strdup (local->newpath);
+ if (!tmp_str) {
+ gf_log (this->name, GF_LOG_DEBUG, "out of memory");
+ }
+ dir_name = dirname (tmp_str);
+
+ tmp_path = gf_strdup (dir_name);
+ if (!tmp_path) {
+ gf_log (this->name, GF_LOG_DEBUG, "out of memory");
+ }
+ tmp_loc.path = tmp_path;
+
+ /* TODO: create the directory with proper permissions */
+ STACK_WIND_COOKIE (frame, trash_truncate_mkdir_cbk,
+ tmp_path, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir,
+ &tmp_loc, 0755, NULL);
+ GF_FREE (tmp_str);
+ return 0;
+ }
+
+ if (op_ret == -1) {
+ STACK_WIND (frame, trash_common_unwind_buf_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate,
+ local->fd, local->fop_offset);
+ return 0;
+ }
+
+ STACK_WIND (frame, trash_ftruncate_readv_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, local->fd,
+ (size_t)GF_BLOCK_READV_SIZE, local->cur_offset, 0);
+
+ return 0;
}
-/**
- * trash_rename_lookup_cbk -
- */
+
int32_t
-trash_rename_lookup_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf,
- dict_t *xattr)
+trash_ftruncate_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent)
{
- trash_local_t *local = frame->local;
-
- if (op_ret == -1) {
- STACK_WIND (frame,
- trash_common_unwind_buf_cbk,
- this->children->xlator,
- this->children->xlator->fops->rename,
- &local->loc1,
- &local->loc2);
- return 0;
- }
-
- loc_t oldloc = {
- .parent = local->loc2.parent,
- .inode = inode,
- .path = local->loc2.path,
- };
- loc_t newloc = {
- .inode = NULL,
- .path = local->newpath
- };
- STACK_WIND (frame,
- trash_rename_rename_cbk,
- this->children->xlator,
- this->children->xlator->fops->rename,
- &oldloc,
- &newloc);
-
- return 0;
+ trash_local_t *local = NULL;
+ char *tmp_str = NULL;
+ char *tmp_path = NULL;
+ char *tmp_dirname = NULL;
+ char *dir_name = NULL;
+ int32_t count = 0;
+ int32_t flags = 0;
+ int32_t loop_count = 0;
+ int i = 0;
+ loc_t tmp_loc = {0,};
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ loop_count = local->loop_count;
+
+ tmp_str = gf_strdup (local->newpath);
+ if (!tmp_str) {
+ gf_log (this->name, GF_LOG_DEBUG, "out of memory");
+ goto out;
+ }
+
+ if ((op_ret == -1) && (op_errno == ENOENT)) {
+ tmp_dirname = strchr (tmp_str, '/');
+ while (tmp_dirname) {
+ count = tmp_dirname - tmp_str;
+ if (count == 0)
+ count = 1;
+ i++;
+ if (i > loop_count)
+ break;
+ tmp_dirname = strchr (tmp_str + count + 1, '/');
+ }
+ tmp_path = memdup (local->newpath, count);
+ if (!tmp_path) {
+ gf_log (this->name, GF_LOG_DEBUG, "out of memory");
+ }
+ tmp_loc.path = tmp_path;
+ STACK_WIND_COOKIE (frame, trash_ftruncate_mkdir_cbk,
+ tmp_path, this->children->xlator,
+ this->children->xlator->fops->mkdir,
+ &tmp_loc, 0755, NULL);
+
+ goto out;
+ }
+
+ if (op_ret == 0) {
+ dir_name = dirname (tmp_str);
+ if (strcmp ((char*)cookie, dir_name) == 0) {
+ ia_prot_t prot = {0, };
+ flags = O_CREAT|O_EXCL|O_WRONLY;
+
+ //Call create again once directory structure is created.
+ STACK_WIND (frame, trash_ftruncate_create_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create,
+ &local->newloc, flags,
+ st_mode_from_ia (prot, local->loc.inode->ia_type),
+ local->newfd, NULL);
+ goto out;
+ }
+ }
+
+ LOCK (&frame->lock);
+ {
+ loop_count = ++local->loop_count;
+ }
+ UNLOCK (&frame->lock);
+ tmp_dirname = strchr (tmp_str, '/');
+ while (tmp_dirname) {
+ count = tmp_dirname - tmp_str;
+ if (count == 0)
+ count = 1;
+
+ i++;
+ if ((i > loop_count) || (count > PATH_MAX))
+ break;
+ tmp_dirname = strchr (tmp_str + count + 1, '/');
+ }
+ tmp_path = memdup (local->newpath, count);
+ if (!tmp_path) {
+ gf_log (this->name, GF_LOG_DEBUG, "out of memory");
+ }
+ tmp_loc.path = tmp_path;
+
+ STACK_WIND_COOKIE (frame, trash_ftruncate_mkdir_cbk, tmp_path,
+ this->children->xlator,
+ this->children->xlator->fops->mkdir,
+ &tmp_loc, 0755, NULL);
+
+out:
+ GF_FREE (cookie); /* strdup (dir_name) was sent here :) */
+ GF_FREE (tmp_str);
+
+ return 0;
}
-/**
- * trash_rename -
- */
int32_t
-trash_rename (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc)
+trash_ftruncate_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf)
+{
+ trash_private_t *priv = NULL;
+ trash_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s: %s",local->newloc.path, strerror(op_errno));
+
+ TRASH_STACK_UNWIND (ftruncate, frame, -1, op_errno, buf, NULL);
+ return 0;
+ }
+ if ((buf->ia_size == 0) || (buf->ia_size > priv->max_trash_file_size))
+ {
+ STACK_WIND (frame, trash_common_unwind_buf_cbk,
+ this->children->xlator,
+ this->children->xlator->fops->ftruncate,
+ local->fd, local->fop_offset);
+ return 0;
+ }
+
+
+ STACK_WIND (frame, trash_ftruncate_create_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create, &local->newloc,
+ ( O_CREAT | O_EXCL | O_WRONLY ),
+ st_mode_from_ia (buf->ia_prot, local->loc.inode->ia_type),
+ local->newfd, NULL);
+
+ return 0;
+}
+
+int32_t
+trash_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
{
- trash_private_t *priv = this->private;
- trash_local_t *local = NULL;
- time_t utime = 0;
- struct tm *tm = NULL;
- char timestr[256];
-
- if (strncmp (oldloc->path, priv->trash_dir,
- strlen(priv->trash_dir)) == 0) {
- /* Trying to rename from the trash can dir,
- do the actual rename */
- STACK_WIND (frame,
- trash_common_unwind_buf_cbk,
- this->children->xlator,
- this->children->xlator->fops->rename,
- oldloc,
- newloc);
- } else {
- /* Trying to rename a regular file from GlusterFS */
- local = CALLOC (1, sizeof (trash_local_t));
- if (!local) {
- STACK_UNWIND (frame, -1, ENOMEM, NULL);
- return 0;
- }
- frame->local = local;
- loc_copy (&local->loc1, oldloc);
- loc_copy (&local->loc2, newloc);
-
- strcpy (local->newpath, priv->trash_dir);
- strcat (local->newpath, newloc->path);
-
- utime = time (NULL);
- tm = localtime (&utime);
- strftime (timestr, 256, ".%Y%m%d%H%M%S", tm);
- strcat (local->newpath, timestr);
-
- /* Send a lookup call on newloc, to ensure we are not
- overwriting */
- STACK_WIND (frame,
- trash_rename_lookup_cbk,
- this->children->xlator,
- this->children->xlator->fops->lookup,
- newloc,
- 0);
- }
- return 0;
+ trash_elim_pattern_t *trav = NULL;
+ trash_private_t *priv = NULL;
+ trash_local_t *local = NULL;
+ dentry_t *dir_entry = NULL;
+ char *pathbuf = NULL;
+ inode_t *newinode = NULL;
+ char timestr[64];
+ int32_t retval = 0;
+ int32_t match = 0;
+
+ priv = this->private;
+
+ dir_entry = __dentry_search_arbit (fd->inode);
+ retval = inode_path (fd->inode, NULL, &pathbuf);
+
+ if (priv->eliminate) {
+ trav = priv->eliminate;
+ while (trav) {
+ if (fnmatch(trav->pattern, dir_entry->name, 0) == 0) {
+ match++;
+ break;
+ }
+ trav = trav->next;
+ }
+ }
+
+ if ((strncmp (pathbuf, priv->trash_dir,
+ strlen (priv->trash_dir)) == 0) ||
+ (offset >= priv->max_trash_file_size) ||
+ (!retval) ||
+ match) {
+ STACK_WIND (frame, trash_common_unwind_buf_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate,
+ fd, offset);
+ return 0;
+ }
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ gf_log (this->name, GF_LOG_DEBUG, "out of memory");
+ TRASH_STACK_UNWIND (ftruncate, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+ }
+
+ gf_time_fmt (timestr, sizeof timestr, time (NULL), gf_timefmt_F_HMS);
+ strcpy (local->newpath, priv->trash_dir);
+ strcat (local->newpath, pathbuf);
+ strcat (local->newpath, timestr);
+
+ local->fd = fd_ref (fd);
+ newinode = inode_new (fd->inode->table);
+ local->newfd = fd_create (newinode, frame->root->pid);
+ frame->local=local;
+
+ local->newloc.inode = newinode;
+ local->newloc.path = local->newpath;
+
+ local->loc.inode = inode_ref (fd->inode);
+ local->loc.path = pathbuf;
+
+ local->fop_offset = offset;
+ local->cur_offset = offset;
+
+ STACK_WIND (frame, trash_ftruncate_fstat_cbk, this->children->xlator,
+ this->children->xlator->fops->fstat, fd);
+
+ return 0;
}
/**
@@ -523,82 +1395,143 @@ trash_rename (call_frame_t *frame,
int32_t
init (xlator_t *this)
{
- data_t *trash_dir = NULL;
- xlator_list_t *trav = NULL;
- trash_private_t *_priv = NULL;
-
- /* Create .trashcan directory in init */
- if (!this->children || this->children->next) {
- gf_log (this->name, GF_LOG_ERROR,
- "not configured with exactly one child. exiting");
- return -1;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile ");
- }
-
- trav = this->children;
- while (trav->xlator->children)
- trav = trav->xlator->children;
-
- if (strncmp ("storage/", trav->xlator->type, 8))
- {
- gf_log (this->name, GF_LOG_ERROR,
- "'trash' translator not loaded over storage "
- "translator, not a supported setup");
- return -1;
- }
-
- _priv = CALLOC (1, sizeof (*_priv));
- ERR_ABORT (_priv);
-
- trash_dir = dict_get (this->options, "trash-dir");
- if (!trash_dir) {
- gf_log (this->name, GF_LOG_WARNING,
- "no option specified for 'trash-dir', "
- "using \"/.trashcan/\"");
- strcpy (_priv->trash_dir, "/.trashcan");
- } else {
- /* Need a path with '/' as the first char, if not
- given, append it */
- if (trash_dir->data[0] == '/') {
- strcpy (_priv->trash_dir, trash_dir->data);
- } else {
- strcpy (_priv->trash_dir, "/");
- strcat (_priv->trash_dir, trash_dir->data);
- }
- }
-
- this->private = (void *)_priv;
- return 0;
+ data_t *data = NULL;
+ trash_private_t *_priv = NULL;
+ trash_elim_pattern_t *trav = NULL;
+ char *tmp_str = NULL;
+ char *strtokptr = NULL;
+ char *component = NULL;
+ char trash_dir[PATH_MAX] = {0,};
+ uint64_t max_trash_file_size64 = 0;
+
+ /* Create .trashcan directory in init */
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "not configured with exactly one child. exiting");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ _priv = GF_CALLOC (1, sizeof (*_priv), gf_trash_mt_trash_private_t);
+ if (!_priv) {
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ return -1;
+ }
+
+ data = dict_get (this->options, "trash-dir");
+ if (!data) {
+ gf_log (this->name, GF_LOG_INFO,
+ "no option specified for 'trash-dir', "
+ "using \"/.trashcan/\"");
+ _priv->trash_dir = gf_strdup ("/.trashcan");
+ } else {
+ /* Need a path with '/' as the first char, if not
+ given, append it */
+ if (data->data[0] == '/') {
+ _priv->trash_dir = gf_strdup (data->data);
+ } else {
+ /* TODO: Make sure there is no ".." in the path */
+ strcpy (trash_dir, "/");
+ strcat (trash_dir, data->data);
+ _priv->trash_dir = gf_strdup (trash_dir);
+ }
+ }
+
+ data = dict_get (this->options, "eliminate-pattern");
+ if (!data) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "no option specified for 'eliminate', using NULL");
+ } else {
+ tmp_str = gf_strdup (data->data);
+ if (!tmp_str) {
+ gf_log (this->name, GF_LOG_DEBUG, "out of memory");
+ }
+
+ /* Match Filename to option specified in eliminate. */
+ component = strtok_r (tmp_str, "|", &strtokptr);
+ while (component) {
+ trav = GF_CALLOC (1, sizeof (*trav),
+ gf_trash_mt_trash_elim_pattern_t);
+ if (!trav) {
+ gf_log (this->name, GF_LOG_DEBUG, "out of memory");
+ break;
+ }
+ trav->pattern = component;
+ trav->next = _priv->eliminate;
+ _priv->eliminate = trav;
+
+ component = strtok_r (NULL, "|", &strtokptr);
+ }
+ }
+
+ /* TODO: do gf_string2sizet () */
+ data = dict_get (this->options, "max-trashable-file-size");
+ if (!data) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no option specified for 'max-trashable-file-size', "
+ "using default = %lld MB",
+ GF_DEFAULT_MAX_FILE_SIZE / GF_UNIT_MB);
+ _priv->max_trash_file_size = GF_DEFAULT_MAX_FILE_SIZE;
+ } else {
+ (void)gf_string2bytesize (data->data,
+ &max_trash_file_size64);
+ if( max_trash_file_size64 > GF_ALLOWED_MAX_FILE_SIZE ) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Size specified for max-size(in MB) is too "
+ "large so using 1GB as max-size (NOT IDEAL)");
+ _priv->max_trash_file_size = GF_ALLOWED_MAX_FILE_SIZE;
+ } else
+ _priv->max_trash_file_size = max_trash_file_size64;
+ gf_log (this->name, GF_LOG_DEBUG, "%"GF_PRI_SIZET" max-size",
+ _priv->max_trash_file_size);
+ }
+
+ this->local_pool = mem_pool_new (trash_local_t, 64);
+ if (!this->local_pool) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
+ return -1;
+ }
+
+
+ this->private = (void *)_priv;
+ return 0;
}
void
fini (xlator_t *this)
{
- trash_private_t *priv = this->private;
- FREE (priv);
- return;
-}
-
+ trash_private_t *priv = NULL;
-struct xlator_fops fops = {
- .unlink = trash_unlink,
- .rename = trash_rename,
-};
+ priv = this->private;
+ GF_FREE (priv);
-struct xlator_mops mops = {
+ return;
+}
+struct xlator_fops fops = {
+ .unlink = trash_unlink,
+ .rename = trash_rename,
+ .truncate = trash_truncate,
+ .ftruncate = trash_ftruncate,
};
struct xlator_cbks cbks = {
};
struct volume_options options[] = {
- { .key = { "trash-dir" },
- .type = GF_OPTION_TYPE_PATH
- },
- { .key = {NULL} },
+ { .key = { "trash-directory" },
+ .type = GF_OPTION_TYPE_PATH,
+ },
+ { .key = { "eliminate-pattern" },
+ .type = GF_OPTION_TYPE_STR,
+ },
+ { .key = { "max-trashable-file-size" },
+ .type = GF_OPTION_TYPE_SIZET,
+ },
+ { .key = {NULL} },
};
diff --git a/xlators/features/trash/src/trash.h b/xlators/features/trash/src/trash.h
new file mode 100644
index 000000000..9a7c03361
--- /dev/null
+++ b/xlators/features/trash/src/trash.h
@@ -0,0 +1,79 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef __TRASH_H__
+#define __TRASH_H__
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "dict.h"
+#include "xlator.h"
+#include "defaults.h"
+#include "inode.c"
+#include "fnmatch.h"
+
+#include <libgen.h>
+
+#ifndef GF_BLOCK_READV_SIZE
+#define GF_BLOCK_READV_SIZE (128 * GF_UNIT_KB)
+#endif
+
+#ifndef GF_DEFAULT_MAX_FILE_SIZE
+#define GF_DEFAULT_MAX_FILE_SIZE (200 * GF_UNIT_MB)
+#endif
+
+#ifndef GF_ALLOWED_MAX_FILE_SIZE
+#define GF_ALLOWED_MAX_FILE_SIZE (1 * GF_UNIT_GB)
+#endif
+
+
+struct trash_struct {
+ fd_t *fd; /* for the fd of existing file */
+ fd_t *newfd; /* for the newly created file */
+ loc_t loc; /* to store the location of the existing file */
+ loc_t newloc; /* to store the location for the new file */
+ size_t fsize; /* for keeping the size of existing file */
+ off_t cur_offset; /* current offset for read and write ops */
+ off_t fop_offset;
+ char origpath[PATH_MAX];
+ char newpath[PATH_MAX];
+ int32_t loop_count;
+ struct iatt preparent;
+ struct iatt postparent;
+};
+typedef struct trash_struct trash_local_t;
+
+struct _trash_elim_pattern;
+typedef struct _trash_elim_pattern {
+ struct _trash_elim_pattern *next;
+ char *pattern;
+} trash_elim_pattern_t;
+
+struct trash_priv {
+ char *trash_dir;
+ trash_elim_pattern_t *eliminate;
+ size_t max_trash_file_size;
+};
+typedef struct trash_priv trash_private_t;
+
+#define TRASH_STACK_UNWIND(op, frame, params ...) do { \
+ trash_local_t *__local = NULL; \
+ __local = frame->local; \
+ frame->local = NULL; \
+ STACK_UNWIND_STRICT (op, frame, params); \
+ trash_local_wipe (__local); \
+ } while (0)
+
+
+#endif /* __TRASH_H__ */
diff --git a/xlators/lib/src/libxlator.c b/xlators/lib/src/libxlator.c
new file mode 100644
index 000000000..9e5357255
--- /dev/null
+++ b/xlators/lib/src/libxlator.c
@@ -0,0 +1,470 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include "mem-types.h"
+#include "libxlator.h"
+
+
+int marker_xtime_default_gauge[] = {
+ [MCNT_FOUND] = 1,
+ [MCNT_NOTFOUND] = -1,
+ [MCNT_ENODATA] = -1,
+ [MCNT_ENOTCONN] = -1,
+ [MCNT_ENOENT] = -1,
+ [MCNT_EOTHER] = -1,
+};
+
+int marker_uuid_default_gauge[] = {
+ [MCNT_FOUND] = 1,
+ [MCNT_NOTFOUND] = 0,
+ [MCNT_ENODATA] = 0,
+ [MCNT_ENOTCONN] = 0,
+ [MCNT_ENOENT] = 0,
+ [MCNT_EOTHER] = 0,
+};
+
+static int marker_idx_errno_map[] = {
+ [MCNT_FOUND] = EINVAL,
+ [MCNT_NOTFOUND] = EINVAL,
+ [MCNT_ENOENT] = ENOENT,
+ [MCNT_ENOTCONN] = ENOTCONN,
+ [MCNT_ENODATA] = ENODATA,
+ [MCNT_EOTHER] = EINVAL,
+ [MCNT_MAX] = 0,
+};
+
+/*Copy the contents of oldtimebuf to newtimbuf*/
+static void
+update_timebuf (uint32_t *oldtimbuf, uint32_t *newtimebuf)
+{
+ newtimebuf[0] = (oldtimbuf[0]);
+ newtimebuf[1] = (oldtimbuf[1]);
+}
+
+/* Convert Timebuf in network order to host order */
+static void
+get_hosttime (uint32_t *oldtimbuf, uint32_t *newtimebuf)
+{
+ newtimebuf[0] = ntohl (oldtimbuf[0]);
+ newtimebuf[1] = ntohl (oldtimbuf[1]);
+}
+
+
+
+/* Match the Incoming trusted.glusterfs.<uuid>.xtime against volume uuid */
+int
+match_uuid_local (const char *name, char *uuid)
+{
+ name = strtail ((char *)name, MARKER_XATTR_PREFIX);
+ if (!name || name++[0] != '.')
+ return -1;
+
+ name = strtail ((char *)name, uuid);
+ if (!name || strcmp (name, ".xtime") != 0)
+ return -1;
+
+ return 0;
+}
+
+static void
+marker_local_incr_errcount (xl_marker_local_t *local, int op_errno)
+{
+ marker_result_idx_t i = -1;
+
+ if (!local)
+ return;
+
+ switch (op_errno) {
+ case ENODATA:
+ i = MCNT_ENODATA;
+ break;
+ case ENOENT:
+ i = MCNT_ENOENT;
+ break;
+ case ENOTCONN:
+ i = MCNT_ENOTCONN;
+ break;
+ default:
+ i = MCNT_EOTHER;
+ break;
+ }
+
+ local->count[i]++;
+}
+
+static int
+evaluate_marker_results (int *gauge, int *count)
+{
+ int i = 0;
+ int op_errno = 0;
+ gf_boolean_t sane = _gf_true;
+
+ /* check if the policy of the gauge is violated;
+ * if yes, try to get the best errno, ie. look
+ * for the first position where there is a more
+ * specific kind of vioilation than the generic EINVAL
+ */
+ for (i = 0; i < MCNT_MAX; i++) {
+ if (sane) {
+ if ((gauge[i] > 0 && count[i] < gauge[i]) ||
+ (gauge[i] < 0 && count[i] >= -gauge[i])) {
+ sane = _gf_false;
+ /* generic action: adopt corresponding errno */
+ op_errno = marker_idx_errno_map[i];
+ }
+ } else {
+ /* already insane; trying to get a more informative
+ * errno by checking subsequent counters
+ */
+ if (count[i] > 0)
+ op_errno = marker_idx_errno_map[i];
+ }
+ if (op_errno && op_errno != EINVAL)
+ break;
+ }
+
+ return op_errno;
+}
+
+/* Aggregate all the <volid>.xtime attrs of the cluster and send the max*/
+int32_t
+cluster_markerxtime_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *dict, dict_t *xdata)
+
+{
+
+ int32_t callcnt = 0;
+ int ret = -1;
+ uint32_t *net_timebuf = NULL;
+ uint32_t host_timebuf[2] = {0,};
+ char *marker_xattr = NULL;
+ xl_marker_local_t *local = NULL;
+ char *vol_uuid = NULL;
+ char need_unwind = 0;
+
+ if (!this || !frame || !frame->local || !cookie) {
+ gf_log ("", GF_LOG_DEBUG, "possible NULL deref");
+ need_unwind = 1;
+ goto out;
+ }
+
+ local = frame->local;
+ if (!local || !local->vol_uuid) {
+ gf_log (this->name, GF_LOG_DEBUG, "possible NULL deref");
+ need_unwind = 1;
+ goto out;
+ }
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ vol_uuid = local->vol_uuid;
+
+ if (op_ret) {
+ marker_local_incr_errcount (local, op_errno);
+ goto unlock;
+ }
+
+ if (!gf_asprintf (&marker_xattr, "%s.%s.%s",
+ MARKER_XATTR_PREFIX, vol_uuid, XTIME)) {
+ op_errno = ENOMEM;
+ goto unlock;
+ }
+
+
+ if (dict_get_ptr (dict, marker_xattr, (void **)&net_timebuf)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Unable to get <uuid>.xtime attr");
+ local->count[MCNT_NOTFOUND]++;
+ goto unlock;
+ }
+
+ if (local->count[MCNT_FOUND]) {
+ get_hosttime (net_timebuf, host_timebuf);
+ if ( (host_timebuf[0]>local->host_timebuf[0]) ||
+ (host_timebuf[0] == local->host_timebuf[0] &&
+ host_timebuf[1] >= local->host_timebuf[1])) {
+ update_timebuf (net_timebuf, local->net_timebuf);
+ update_timebuf (host_timebuf, local->host_timebuf);
+ }
+
+ } else {
+ get_hosttime (net_timebuf, local->host_timebuf);
+ update_timebuf (net_timebuf, local->net_timebuf);
+ local->count[MCNT_FOUND]++;
+ }
+
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ op_ret = 0;
+ op_errno = 0;
+ need_unwind = 1;
+
+ if (local->count[MCNT_FOUND]) {
+ if (!dict)
+ dict = dict_new();
+
+ ret = dict_set_static_bin (dict, marker_xattr,
+ (void *)local->net_timebuf, 8);
+ if (ret) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+ }
+
+ op_errno = evaluate_marker_results (local->gauge, local->count);
+ if (op_errno)
+ op_ret = -1;
+ }
+
+out:
+ if (need_unwind && local && local->xl_specf_unwind) {
+ frame->local = local->xl_local;
+ local->xl_specf_unwind (frame, op_ret,
+ op_errno, dict, xdata);
+ } else if (need_unwind) {
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno,
+ dict, xdata);
+ }
+
+ GF_FREE (marker_xattr);
+ return 0;
+
+}
+
+int32_t
+cluster_markeruuid_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *dict, dict_t *xdata)
+{
+ int32_t callcnt = 0;
+ struct volume_mark *volmark = NULL;
+ xl_marker_local_t *local = NULL;
+ int32_t ret = -1;
+ char need_unwind = 0;
+ char *vol_uuid = NULL;
+
+ if (!this || !frame || !cookie) {
+ gf_log ("", GF_LOG_DEBUG, "possible NULL deref");
+ need_unwind = 1;
+ goto out;
+ }
+
+ local = frame->local;
+
+ if (!local) {
+ gf_log (this->name, GF_LOG_DEBUG, "possible NULL deref");
+ need_unwind = 1;
+ goto out;
+ }
+
+ LOCK (&frame->lock);
+ {
+ callcnt = --local->call_count;
+ vol_uuid = local->vol_uuid;
+
+ if (op_ret) {
+ marker_local_incr_errcount (local, op_errno);
+ goto unlock;
+ }
+
+ ret = dict_get_bin (dict, GF_XATTR_MARKER_KEY,
+ (void *)&volmark);
+ if (ret)
+ goto unlock;
+
+ if (local->count[MCNT_FOUND]) {
+ if ((local->volmark->major != volmark->major) ||
+ (local->volmark->minor != volmark->minor)) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto unlock;
+ }
+
+ if (local->retval)
+ goto unlock;
+ else if (volmark->retval) {
+ GF_FREE (local->volmark);
+ local->volmark =
+ memdup (volmark, sizeof (*volmark));
+ local->retval = volmark->retval;
+ } else if ((volmark->sec > local->volmark->sec) ||
+ ((volmark->sec == local->volmark->sec) &&
+ (volmark->usec >= local->volmark->usec))) {
+ GF_FREE (local->volmark);
+ local->volmark =
+ memdup (volmark, sizeof (*volmark));
+ }
+
+ } else {
+ local->volmark = memdup (volmark, sizeof (*volmark));
+ VALIDATE_OR_GOTO (local->volmark, unlock);
+ uuid_unparse (volmark->uuid, vol_uuid);
+ if (volmark->retval)
+ local->retval = volmark->retval;
+ local->count[MCNT_FOUND]++;
+ }
+ }
+unlock:
+ UNLOCK (&frame->lock);
+
+ if (!callcnt) {
+ op_ret = 0;
+ op_errno = 0;
+ need_unwind = 1;
+
+ if (local->count[MCNT_FOUND]) {
+ if (!dict)
+ dict = dict_new();
+
+ if (dict_set_bin (dict, GF_XATTR_MARKER_KEY,
+ local->volmark,
+ sizeof (struct volume_mark))) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ }
+ }
+ op_errno = evaluate_marker_results (local->gauge, local->count);
+ if (op_errno)
+ op_ret = -1;
+ }
+
+ out:
+ if (need_unwind && local && local->xl_specf_unwind) {
+ frame->local = local->xl_local;
+ local->xl_specf_unwind (frame, op_ret,
+ op_errno, dict, xdata);
+ return 0;
+ } else if (need_unwind){
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno,
+ dict, xdata);
+ }
+ return 0;
+}
+
+
+int32_t
+cluster_getmarkerattr (call_frame_t *frame,xlator_t *this, loc_t *loc,
+ const char *name, void *xl_local,
+ xlator_specf_unwind_t xl_specf_getxattr_unwind,
+ xlator_t **sub_volumes, int count, int type,
+ int *gauge, char *vol_uuid)
+{
+ int i = 0;
+ xl_marker_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO (loc->path, err);
+ VALIDATE_OR_GOTO (loc->inode, err);
+ VALIDATE_OR_GOTO (name, err);
+ VALIDATE_OR_GOTO (xl_specf_getxattr_unwind, err);
+
+ local = GF_CALLOC (sizeof (struct marker_str), 1,
+ gf_common_mt_libxl_marker_local);
+
+ if (!local)
+ goto err;
+
+ local->xl_local = xl_local;
+ local->call_count = count;
+ local->xl_specf_unwind = xl_specf_getxattr_unwind;
+ local->vol_uuid = vol_uuid;
+ memcpy (local->gauge, gauge, sizeof (local->gauge));
+
+ frame->local = local;
+
+ for (i=0; i < count; i++) {
+ if (MARKER_UUID_TYPE == type)
+ STACK_WIND (frame, cluster_markeruuid_cbk,
+ *(sub_volumes + i),
+ (*(sub_volumes + i))->fops->getxattr,
+ loc, name, NULL);
+ else if (MARKER_XTIME_TYPE == type)
+ STACK_WIND (frame, cluster_markerxtime_cbk,
+ *(sub_volumes + i),
+ (*(sub_volumes + i))->fops->getxattr,
+ loc, name, NULL);
+ else {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Unrecognized type (%d) of marker attr "
+ "received", type);
+ STACK_WIND (frame, default_getxattr_cbk,
+ *(sub_volumes + i),
+ (*(sub_volumes + i))->fops->getxattr,
+ loc, name, NULL);
+ break;
+ }
+ }
+
+ return 0;
+err:
+ return -1;
+
+}
+
+int
+gf_get_min_stime (xlator_t *this, dict_t *dst, char *key, data_t *value)
+{
+ int ret = -1;
+ uint32_t *net_timebuf = NULL;
+ uint32_t *value_timebuf = NULL;
+ uint32_t host_timebuf[2] = {0,};
+ uint32_t host_value_timebuf[2] = {0,};
+
+ /* stime should be minimum of all the other nodes */
+ ret = dict_get_bin (dst, key, (void **)&net_timebuf);
+ if (ret < 0) {
+ net_timebuf = GF_CALLOC (1, sizeof (int64_t),
+ gf_common_mt_char);
+ if (!net_timebuf)
+ goto out;
+
+ ret = dict_set_bin (dst, key, net_timebuf, sizeof (int64_t));
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "key=%s: dict set failed", key);
+ goto error;
+ }
+ }
+
+ value_timebuf = data_to_bin (value);
+ if (!value_timebuf) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "key=%s: getting value of stime failed", key);
+ ret = -1;
+ goto out;
+ }
+
+ get_hosttime (value_timebuf, host_value_timebuf);
+ get_hosttime (net_timebuf, host_timebuf);
+
+ /* can't use 'min()' macro here as we need to compare two fields
+ in the array, selectively */
+ if ((host_value_timebuf[0] > host_timebuf[0]) ||
+ ((host_value_timebuf[0] == host_timebuf[0]) &&
+ (host_value_timebuf[1] > host_timebuf[1]))) {
+ update_timebuf (value_timebuf, net_timebuf);
+ }
+
+ ret = 0;
+out:
+ return ret;
+error:
+ /* To be used only when net_timebuf is not set in the dict */
+ if (net_timebuf)
+ GF_FREE (net_timebuf);
+
+ return ret;
+}
diff --git a/xlators/lib/src/libxlator.h b/xlators/lib/src/libxlator.h
new file mode 100644
index 000000000..1d5e1657f
--- /dev/null
+++ b/xlators/lib/src/libxlator.h
@@ -0,0 +1,153 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _LIBXLATOR_H
+#define _LIBXLATOR_H
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include "logging.h"
+#include "defaults.h"
+#include "common-utils.h"
+#include "compat.h"
+#include "compat-errno.h"
+
+
+#define MARKER_XATTR_PREFIX "trusted.glusterfs"
+#define XTIME "xtime"
+#define VOLUME_MARK "volume-mark"
+#define GF_XATTR_MARKER_KEY MARKER_XATTR_PREFIX "." VOLUME_MARK
+#define UUID_SIZE 36
+#define MARKER_UUID_TYPE 1
+#define MARKER_XTIME_TYPE 2
+#define GF_XATTR_QUOTA_SIZE_KEY "trusted.glusterfs.quota.size"
+
+
+typedef int32_t (*xlator_specf_unwind_t) (call_frame_t *frame,
+ int op_ret, int op_errno,
+ dict_t *dict, dict_t *xdata);
+
+
+struct volume_mark {
+ uint8_t major;
+ uint8_t minor;
+ uint8_t uuid[16];
+ uint8_t retval;
+ uint32_t sec;
+ uint32_t usec;
+}__attribute__ ((__packed__));
+
+
+/*
+ * The enumerated type here
+ * is used to index two kind
+ * of integer arrays:
+ * - gauges
+ * - counters
+
+ * A counter is used internally,
+ * in getxattr callbacks, to count
+ * the results, categorized as
+ * the enum names suggest. So values
+ * in the counter are always non-negative.
+
+ * Gauges are part of the API.
+ * The caller passes one to the
+ * top-level aggregator function,
+ * cluster_getmarkerattr(). The gauge
+ * defines an evaluation policy for the
+ * counter. That is, at the
+ * end of the aggregation process
+ * the gauge is matched against the
+ * counter, and the policy
+ * represented by the gauge decides
+ * whether to return with success or failure,
+ * and in latter case, what particular failure
+ * case (errno).
+
+ * The rules are the following: for some index i,
+ * - if gauge[i] == 0, no requirement is set
+ * against counter[i];
+ * - if gauge[i] > 0, counter[i] >= gauge[i]
+ * is required;
+ * - if gauge[i] < 0, counter[i] < |gauge[i]|
+ * is required.
+
+ * If the requirement is not met, then i is mapped
+ * to the respective errno (MCNT_ENOENT -> ENOENT),
+ * or in lack of that, EINVAL.
+
+ * Cf. evaluate_marker_results() and marker_idx_errno_map[]
+ * in libxlator.c
+
+ * We provide two default gauges, one inteded for xtime
+ * aggregation, other for volume mark aggregation. The
+ * policies they represent agree with the hard-coded
+ * one prior to gauges. Cf. marker_xtime_default_gauge
+ * and marker_uuid_default_gauge in libxlator.c
+ */
+
+typedef enum {
+ MCNT_FOUND,
+ MCNT_NOTFOUND,
+ MCNT_ENODATA,
+ MCNT_ENOTCONN,
+ MCNT_ENOENT,
+ MCNT_EOTHER,
+ MCNT_MAX
+} marker_result_idx_t;
+
+extern int marker_xtime_default_gauge[];
+extern int marker_uuid_default_gauge[];
+
+struct marker_str {
+ struct volume_mark *volmark;
+ data_t *data;
+
+ uint32_t host_timebuf[2];
+ uint32_t net_timebuf[2];
+ int32_t call_count;
+ int gauge[MCNT_MAX];
+ int count[MCNT_MAX];
+
+ xlator_specf_unwind_t xl_specf_unwind;
+ void *xl_local;
+ char *vol_uuid;
+ uint8_t retval;
+};
+
+typedef struct marker_str xl_marker_local_t;
+
+int32_t
+cluster_markerxtime_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *dict, dict_t *xdata);
+
+int32_t
+cluster_markeruuid_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *dict, dict_t *xdata);
+
+int32_t
+cluster_getmarkerattr (call_frame_t *frame,xlator_t *this, loc_t *loc,
+ const char *name, void *xl_local,
+ xlator_specf_unwind_t xl_specf_getxattr_unwind,
+ xlator_t **sub_volumes, int count, int type,
+ int *gauge, char *vol_uuid);
+
+int
+match_uuid_local (const char *name, char *uuid);
+
+int
+gf_get_min_stime (xlator_t *this, dict_t *dst, char *key, data_t *value);
+
+#endif /* !_LIBXLATOR_H */
diff --git a/xlators/meta/src/Makefile.am b/xlators/meta/src/Makefile.am
index 385ff553f..f8fa7d4cb 100644
--- a/xlators/meta/src/Makefile.am
+++ b/xlators/meta/src/Makefile.am
@@ -4,7 +4,8 @@ xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/
meta_so_SOURCES = meta.c tree.c misc.c view.c
noinst_HEADERS = meta.h tree.h misc.h view.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall
CLEANFILES =
diff --git a/xlators/meta/src/meta-mem-types.h b/xlators/meta/src/meta-mem-types.h
new file mode 100644
index 000000000..62028b246
--- /dev/null
+++ b/xlators/meta/src/meta-mem-types.h
@@ -0,0 +1,25 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __META_MEM_TYPES_H__
+#define __META_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_meta_mem_types_ {
+ gf_meta_mt__open_local = gf_common_mt_end + 1,
+ gf_meta_mt_dir_entry_t,
+ gf_meta_mt_meta_dirent_t,
+ gf_meta_mt_meta_private_t,
+ gf_meta_mt_stat,
+ gf_meta_mt_end
+};
+#endif
+
diff --git a/xlators/meta/src/meta.c b/xlators/meta/src/meta.c
index e0cfe630c..e69719f3c 100644
--- a/xlators/meta/src/meta.c
+++ b/xlators/meta/src/meta.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
@@ -32,6 +22,7 @@
#include "meta.h"
#include "view.h"
+#include "meta-mem-types.h"
int32_t
meta_getattr_cbk (call_frame_t *frame,
@@ -516,9 +507,9 @@ meta_open (call_frame_t *frame, xlator_t *this,
if (file) {
if (file->fops && file->fops->open) {
- struct _open_local *local = CALLOC (1, sizeof (struct _open_local));
+ struct _open_local *local = GF_CALLOC (1, sizeof (struct _open_local), gf_meta_mt__open_local);
ERR_ABORT (local);
- local->path = strdup (path);
+ local->path = gf_strdup (path);
frame->local = local;
STACK_WIND (frame, meta_open_cbk,
this, file->fops->open,
@@ -528,7 +519,7 @@ meta_open (call_frame_t *frame, xlator_t *this,
else {
dict_t *ctx = get_new_dict ();
dict_ref (ctx);
- dict_set (ctx, this->name, str_to_data (strdup (path)));
+ dict_set (ctx, this->name, str_to_data (gf_strdup (path)));
STACK_UNWIND (frame, 0, 0, ctx, file->stbuf);
return 0;
}
@@ -551,9 +542,9 @@ meta_create (call_frame_t *frame, xlator_t *this,
if (file) {
if (file->fops && file->fops->create) {
- struct _open_local *local = CALLOC (1, sizeof (struct _open_local));
+ struct _open_local *local = GF_CALLOC (1, sizeof (struct _open_local), gf_meta_mt__open_local);
ERR_ABORT (local);
- local->path = strdup (path);
+ local->path = gf_strdup (path);
frame->local = local;
STACK_WIND (frame, meta_open_cbk,
this, file->fops->create,
@@ -826,7 +817,7 @@ meta_opendir (call_frame_t *frame,
if (dir) {
dict_t *ctx = get_new_dict ();
- dict_set (ctx, this->name, str_to_data (strdup (path)));
+ dict_set (ctx, this->name, str_to_data (gf_strdup (path)));
STACK_UNWIND (frame, 0, 0, ctx);
return 0;
}
@@ -850,10 +841,11 @@ meta_readdir_cbk (call_frame_t *frame,
meta_private_t *priv = (meta_private_t *)this->private;
if ((int) cookie == 1) {
- dir_entry_t *dir = CALLOC (1, sizeof (dir_entry_t));
+ dir_entry_t *dir = GF_CALLOC (1, sizeof (dir_entry_t),
+ gf_meta_mt_dir_entry_t);
ERR_ABORT (dir);
- dir->name = strdup (".meta");
+ dir->name = gf_strdup (".meta");
memcpy (&dir->buf, priv->tree->stbuf, sizeof (struct stat));
dir->next = entries->next;
entries->next = dir;
@@ -887,7 +879,8 @@ meta_readdir (call_frame_t *frame,
dir_entry_t *entries = NULL;
while (dir) {
- dir_entry_t *d = CALLOC (1, sizeof (dir_entry_t));
+ dir_entry_t *d = GF_CALLOC (1, sizeof (dir_entry_t),
+ gf_meta_mt_dir_entry_t);
ERR_ABORT (d);
d->name = dir->name;
d->buf = *dir->stbuf;
@@ -897,7 +890,8 @@ meta_readdir (call_frame_t *frame,
dir = dir->next;
}
- dir_entry_t *header = CALLOC (1, sizeof (dir_entry_t));
+ dir_entry_t *header = GF_CALLOC (1, sizeof (dir_entry_t),
+ gf_meta_mt_dir_entry_t);
ERR_ABORT (header);
header->next = entries;
STACK_UNWIND (frame, 0, 0, header, count);
@@ -1132,7 +1126,7 @@ meta_lk_cbk (call_frame_t *frame,
xlator_t *this,
int32_t op_ret,
int32_t op_errno,
- struct flock *lock)
+ struct gf_flock *lock)
{
STACK_UNWIND (frame,
op_ret,
@@ -1146,7 +1140,7 @@ meta_lk (call_frame_t *frame,
xlator_t *this,
dict_t *file,
int32_t cmd,
- struct flock *lock)
+ struct gf_flock *lock)
{
STACK_WIND (frame,
meta_lk_cbk,
@@ -1163,16 +1157,16 @@ add_xlator_to_tree (meta_dirent_t *tree, xlator_t *this,
const char *prefix)
{
char *dir;
- asprintf (&dir, "%s/%s", prefix, this->name);
+ gf_asprintf (&dir, "%s/%s", prefix, this->name);
char *children;
- asprintf (&children, "%s/%s", dir, "subvolumes");
+ gf_asprintf (&children, "%s/%s", dir, "subvolumes");
char *type;
- asprintf (&type, "%s/%s", dir, "type");
+ gf_asprintf (&type, "%s/%s", dir, "type");
char *view;
- asprintf (&view, "%s/%s", dir, "view");
+ gf_asprintf (&view, "%s/%s", dir, "view");
insert_meta_entry (tree, dir, S_IFDIR, NULL, NULL);
insert_meta_entry (tree, children, S_IFDIR, NULL, NULL);
@@ -1194,9 +1188,10 @@ static void
build_meta_tree (xlator_t *this)
{
meta_private_t *priv = (meta_private_t *) this->private;
- priv->tree = CALLOC (1, sizeof (meta_dirent_t));
+ priv->tree = GF_CALLOC (1, sizeof (meta_dirent_t),
+ gf_meta_mt_meta_dirent_t);
ERR_ABORT (priv->tree);
- priv->tree->name = strdup (".meta");
+ priv->tree->name = gf_strdup (".meta");
priv->tree->stbuf = new_stbuf ();
priv->tree->stbuf->st_mode = S_IFDIR | S_IRUSR | S_IRGRP | S_IROTH |
S_IXUSR | S_IXGRP | S_IXOTH;
@@ -1215,6 +1210,25 @@ build_meta_tree (xlator_t *this)
}
int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_meta_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR, "Memory accounting init"
+ "failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+int32_t
init (xlator_t *this)
{
if (this->parent != NULL) {
@@ -1222,12 +1236,13 @@ init (xlator_t *this)
return -1;
}
- meta_private_t *priv = CALLOC (1, sizeof (meta_private_t));
+ meta_private_t *priv = GF_CALLOC (1, sizeof (meta_private_t),
+ gf_meta_mt_meta_private_t);
ERR_ABORT (priv);
data_t *directory = dict_get (this->options, "directory");
if (directory) {
- priv->directory = strdup (data_to_str (directory));
+ priv->directory = gf_strdup (data_to_str (directory));
}
else {
priv->directory = ".meta";
@@ -1280,6 +1295,3 @@ struct xlator_fops fops = {
.create = meta_create,
.lk = meta_lk,
};
-
-struct xlator_mops mops = {
-};
diff --git a/xlators/meta/src/meta.h b/xlators/meta/src/meta.h
index 2c3d37237..73e0e50db 100644
--- a/xlators/meta/src/meta.h
+++ b/xlators/meta/src/meta.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __META_H__
#define __META_H__
diff --git a/xlators/meta/src/misc.c b/xlators/meta/src/misc.c
index a6ea441ee..1a8dfa806 100644
--- a/xlators/meta/src/misc.c
+++ b/xlators/meta/src/misc.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#include <unistd.h>
#include <sys/uio.h>
diff --git a/xlators/meta/src/misc.h b/xlators/meta/src/misc.h
index 64eabf9a2..30dd10e34 100644
--- a/xlators/meta/src/misc.h
+++ b/xlators/meta/src/misc.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __MISC_H__
#define __MISC_H__
diff --git a/xlators/meta/src/tree.c b/xlators/meta/src/tree.c
index 8eb982a6a..dacbd665a 100644
--- a/xlators/meta/src/tree.c
+++ b/xlators/meta/src/tree.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -31,6 +21,7 @@
#include "xlator.h"
#include "meta.h"
+#include "meta-mem-types.h"
static int
is_meta_path (const char *path)
@@ -46,7 +37,7 @@ struct stat *
new_stbuf (void)
{
static int next_inode = 0;
- struct stat *stbuf = CALLOC (1, sizeof (struct stat));
+ struct stat *stbuf = GF_CALLOC (1, sizeof (struct stat), gf_meta_mt_stat);
ERR_ABORT (stbuf);
@@ -99,7 +90,7 @@ meta_dirent_t *
lookup_meta_entry (meta_dirent_t *root, const char *path,
char **remain)
{
- char *_path = strdup (path);
+ char *_path = gf_strdup (path);
if (!is_meta_path (path))
return NULL;
@@ -119,10 +110,10 @@ lookup_meta_entry (meta_dirent_t *root, const char *path,
while (piece) {
char *tmp = *remain;
if (*remain)
- asprintf (remain, "/%s/%s", *remain, piece);
+ gf_asprintf (remain, "/%s/%s", *remain, piece);
else
- asprintf (remain, "/%s", piece);
- if (tmp) free (tmp);
+ gf_asprintf (remain, "/%s", piece);
+ GF_FREE (tmp);
piece = strtok (NULL, "/");
}
}
@@ -132,7 +123,7 @@ lookup_meta_entry (meta_dirent_t *root, const char *path,
trav = ntrav;
}
- free (_path);
+ GF_FREE (_path);
return trav;
}
@@ -148,9 +139,10 @@ insert_meta_entry (meta_dirent_t *root, const char *path,
if (!dir)
return NULL;
- meta_dirent_t *new = CALLOC (1, sizeof (meta_dirent_t));
+ meta_dirent_t *new = GF_CALLOC (1, sizeof (meta_dirent_t),
+ gf_meta_mt_meta_dirent_t);
ERR_ABORT (new);
- new->name = strdup (slashpos+1);
+ new->name = gf_strdup (slashpos+1);
new->type = type;
new->parent = parent;
new->next = parent->children;
@@ -167,9 +159,10 @@ insert_meta_entry (meta_dirent_t *root, const char *path,
int main (void)
{
- meta_dirent_t *root = CALLOC (1, sizeof (meta_dirent_t));
+ meta_dirent_t *root = GF_CALLOC (1, sizeof (meta_dirent_t),
+ gf_meta_mt_meta_dirent_t);
ERR_ABORT (root);
- root->name = strdup (".meta");
+ root->name = gf_strdup (".meta");
insert_meta_entry (root, "/.meta/version", S_IFREG, NULL, NULL);
return 0;
diff --git a/xlators/meta/src/tree.h b/xlators/meta/src/tree.h
index 15a9b3ad3..985df3bd7 100644
--- a/xlators/meta/src/tree.h
+++ b/xlators/meta/src/tree.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __TREE_H__
#define __TREE_H__
diff --git a/xlators/meta/src/view.c b/xlators/meta/src/view.c
index 26f1b6e03..b4e2d64a2 100644
--- a/xlators/meta/src/view.c
+++ b/xlators/meta/src/view.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
diff --git a/xlators/meta/src/view.h b/xlators/meta/src/view.h
index c7c4f923e..2eff6126e 100644
--- a/xlators/meta/src/view.h
+++ b/xlators/meta/src/view.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __VIEW_H__
#define __VIEW_H__
diff --git a/xlators/mgmt/Makefile.am b/xlators/mgmt/Makefile.am
new file mode 100644
index 000000000..bf09b07c3
--- /dev/null
+++ b/xlators/mgmt/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = glusterd
+
+CLEANFILES =
diff --git a/mod_glusterfs/apache/2.2/Makefile.am b/xlators/mgmt/glusterd/Makefile.am
index d471a3f92..d471a3f92 100644
--- a/mod_glusterfs/apache/2.2/Makefile.am
+++ b/xlators/mgmt/glusterd/Makefile.am
diff --git a/xlators/mgmt/glusterd/src/Makefile.am b/xlators/mgmt/glusterd/src/Makefile.am
new file mode 100644
index 000000000..933c44019
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/Makefile.am
@@ -0,0 +1,50 @@
+xlator_LTLIBRARIES = glusterd.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/mgmt
+glusterd_la_CPPFLAGS = $(AM_CPPFLAGS) "-DFILTERDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/filter\""
+glusterd_la_LDFLAGS = -module -avoid-version
+if ENABLE_BD_XLATOR
+glusterd_la_LDFLAGS += -llvm2app
+endif
+glusterd_la_SOURCES = glusterd.c glusterd-handler.c glusterd-sm.c \
+ glusterd-op-sm.c glusterd-utils.c glusterd-rpc-ops.c \
+ glusterd-store.c glusterd-handshake.c glusterd-pmap.c \
+ glusterd-volgen.c glusterd-rebalance.c glusterd-quota.c \
+ glusterd-geo-rep.c glusterd-replace-brick.c glusterd-log-ops.c \
+ glusterd-volume-ops.c glusterd-brick-ops.c glusterd-mountbroker.c \
+ glusterd-syncop.c glusterd-hooks.c glusterd-volume-set.c \
+ glusterd-locks.c glusterd-snapshot.c glusterd-mgmt-handler.c \
+ glusterd-mgmt.c
+
+glusterd_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
+ $(top_builddir)/rpc/xdr/src/libgfxdr.la \
+ $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \
+ $(XML_LIBS) -lcrypto
+
+noinst_HEADERS = glusterd.h glusterd-utils.h glusterd-op-sm.h \
+ glusterd-sm.h glusterd-store.h glusterd-mem-types.h \
+ glusterd-pmap.h glusterd-volgen.h glusterd-mountbroker.h \
+ glusterd-syncop.h glusterd-hooks.h glusterd-locks.h \
+ glusterd-mgmt.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(rpclibdir) -I$(CONTRIBDIR)/rbtree \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_srcdir)/rpc/rpc-lib/src \
+ -I$(CONTRIBDIR)/uuid \
+ -DSBIN_DIR=\"$(sbindir)\" -DDATADIR=\"$(localstatedir)\" \
+ -DGSYNCD_PREFIX=\"$(libexecdir)/glusterfs\"\
+ -DSYNCDAEMON_COMPILE=$(SYNCDAEMON_COMPILE) $(XML_CPPFLAGS)
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+AM_LDFLAGS = -L$(xlatordir)
+
+CLEANFILES =
+
+install-data-hook:
+
+if GF_INSTALL_VAR_LIB_GLUSTERD
+ $(mkdir_p) $(localstatedir)/lib/
+ (stat $(sysconfdir)/glusterd && \
+ mv $(sysconfdir)/glusterd $(localstatedir)/lib/) || true;
+ (ln -sf $(localstatedir)/lib/glusterd $(sysconfdir)/glusterd) || true;
+endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
new file mode 100644
index 000000000..596503c21
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
@@ -0,0 +1,1953 @@
+/*
+ Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "common-utils.h"
+#include "cli1-xdr.h"
+#include "xdr-generic.h"
+#include "glusterd.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-store.h"
+#include "glusterd-utils.h"
+#include "glusterd-volgen.h"
+#include "run.h"
+#include <sys/signal.h>
+
+/* misc */
+
+/* In this function, we decide, based on the 'count' of the brick,
+ where to add it in the current volume. 'count' tells us already
+ how many of the given bricks are added. other argument are self-
+ descriptive. */
+int
+add_brick_at_right_order (glusterd_brickinfo_t *brickinfo,
+ glusterd_volinfo_t *volinfo, int count,
+ int32_t stripe_cnt, int32_t replica_cnt)
+{
+ int idx = 0;
+ int i = 0;
+ int sub_cnt = 0;
+ glusterd_brickinfo_t *brick = NULL;
+
+ /* The complexity of the function is in deciding at which index
+ to add new brick. Even though it can be defined with a complex
+ single formula for all volume, it is seperated out to make it
+ more readable */
+ if (stripe_cnt) {
+ /* common formula when 'stripe_count' is set */
+ /* idx = ((count / ((stripe_cnt * volinfo->replica_count) -
+ volinfo->dist_leaf_count)) * volinfo->dist_leaf_count) +
+ (count + volinfo->dist_leaf_count);
+ */
+
+ sub_cnt = volinfo->dist_leaf_count;
+
+ idx = ((count / ((stripe_cnt * volinfo->replica_count) -
+ sub_cnt)) * sub_cnt) +
+ (count + sub_cnt);
+
+ goto insert_brick;
+ }
+
+ /* replica count is set */
+ /* common formula when 'replica_count' is set */
+ /* idx = ((count / (replica_cnt - existing_replica_count)) *
+ existing_replica_count) +
+ (count + existing_replica_count);
+ */
+
+ sub_cnt = volinfo->replica_count;
+ idx = (count / (replica_cnt - sub_cnt) * sub_cnt) +
+ (count + sub_cnt);
+
+insert_brick:
+ i = 0;
+ list_for_each_entry (brick, &volinfo->bricks, brick_list) {
+ i++;
+ if (i < idx)
+ continue;
+ gf_log (THIS->name, GF_LOG_DEBUG, "brick:%s index=%d, count=%d",
+ brick->path, idx, count);
+
+ list_add (&brickinfo->brick_list, &brick->brick_list);
+ break;
+ }
+
+ return 0;
+}
+
+
+static int
+gd_addbr_validate_stripe_count (glusterd_volinfo_t *volinfo, int stripe_count,
+ int total_bricks, int *type, char *err_str,
+ size_t err_len)
+{
+ int ret = -1;
+
+ switch (volinfo->type) {
+ case GF_CLUSTER_TYPE_NONE:
+ if ((volinfo->brick_count * stripe_count) == total_bricks) {
+ /* Change the volume type */
+ *type = GF_CLUSTER_TYPE_STRIPE;
+ gf_log (THIS->name, GF_LOG_INFO,
+ "Changing the type of volume %s from "
+ "'distribute' to 'stripe'", volinfo->volname);
+ ret = 0;
+ goto out;
+ } else {
+ snprintf (err_str, err_len, "Incorrect number of "
+ "bricks (%d) supplied for stripe count (%d).",
+ (total_bricks - volinfo->brick_count),
+ stripe_count);
+ gf_log (THIS->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+ break;
+ case GF_CLUSTER_TYPE_REPLICATE:
+ if (!(total_bricks % (volinfo->replica_count * stripe_count))) {
+ /* Change the volume type */
+ *type = GF_CLUSTER_TYPE_STRIPE_REPLICATE;
+ gf_log (THIS->name, GF_LOG_INFO,
+ "Changing the type of volume %s from "
+ "'replicate' to 'replicate-stripe'",
+ volinfo->volname);
+ ret = 0;
+ goto out;
+ } else {
+ snprintf (err_str, err_len, "Incorrect number of "
+ "bricks (%d) supplied for changing volume's "
+ "stripe count to %d, need at least %d bricks",
+ (total_bricks - volinfo->brick_count),
+ stripe_count,
+ (volinfo->replica_count * stripe_count));
+ gf_log (THIS->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+ break;
+ case GF_CLUSTER_TYPE_STRIPE:
+ case GF_CLUSTER_TYPE_STRIPE_REPLICATE:
+ if (stripe_count < volinfo->stripe_count) {
+ snprintf (err_str, err_len,
+ "Incorrect stripe count (%d) supplied. "
+ "Volume already has stripe count (%d)",
+ stripe_count, volinfo->stripe_count);
+ gf_log (THIS->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+ if (stripe_count == volinfo->stripe_count) {
+ if (!(total_bricks % volinfo->dist_leaf_count)) {
+ /* its same as the one which exists */
+ ret = 1;
+ goto out;
+ }
+ }
+ if (stripe_count > volinfo->stripe_count) {
+ /* We have to make sure before and after 'add-brick',
+ the number or subvolumes for distribute will remain
+ same, when stripe count is given */
+ if ((volinfo->brick_count * (stripe_count *
+ volinfo->replica_count)) ==
+ (total_bricks * volinfo->dist_leaf_count)) {
+ /* Change the dist_leaf_count */
+ gf_log (THIS->name, GF_LOG_INFO,
+ "Changing the stripe count of "
+ "volume %s from %d to %d",
+ volinfo->volname,
+ volinfo->stripe_count, stripe_count);
+ ret = 0;
+ goto out;
+ }
+ }
+ break;
+ }
+
+out:
+ return ret;
+}
+
+static int
+gd_addbr_validate_replica_count (glusterd_volinfo_t *volinfo, int replica_count,
+ int total_bricks, int *type, char *err_str,
+ int err_len)
+{
+ int ret = -1;
+
+ /* replica count is set */
+ switch (volinfo->type) {
+ case GF_CLUSTER_TYPE_NONE:
+ if ((volinfo->brick_count * replica_count) == total_bricks) {
+ /* Change the volume type */
+ *type = GF_CLUSTER_TYPE_REPLICATE;
+ gf_log (THIS->name, GF_LOG_INFO,
+ "Changing the type of volume %s from "
+ "'distribute' to 'replica'", volinfo->volname);
+ ret = 0;
+ goto out;
+
+ } else {
+ snprintf (err_str, err_len, "Incorrect number of "
+ "bricks (%d) supplied for replica count (%d).",
+ (total_bricks - volinfo->brick_count),
+ replica_count);
+ gf_log (THIS->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+ break;
+ case GF_CLUSTER_TYPE_STRIPE:
+ if (!(total_bricks % (volinfo->dist_leaf_count * replica_count))) {
+ /* Change the volume type */
+ *type = GF_CLUSTER_TYPE_STRIPE_REPLICATE;
+ gf_log (THIS->name, GF_LOG_INFO,
+ "Changing the type of volume %s from "
+ "'stripe' to 'replicate-stripe'",
+ volinfo->volname);
+ ret = 0;
+ goto out;
+ } else {
+ snprintf (err_str, err_len, "Incorrect number of "
+ "bricks (%d) supplied for changing volume's "
+ "replica count to %d, need at least %d "
+ "bricks",
+ (total_bricks - volinfo->brick_count),
+ replica_count, (volinfo->dist_leaf_count *
+ replica_count));
+ gf_log (THIS->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+ break;
+ case GF_CLUSTER_TYPE_REPLICATE:
+ case GF_CLUSTER_TYPE_STRIPE_REPLICATE:
+ if (replica_count < volinfo->replica_count) {
+ snprintf (err_str, err_len,
+ "Incorrect replica count (%d) supplied. "
+ "Volume already has (%d)",
+ replica_count, volinfo->replica_count);
+ gf_log (THIS->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+ if (replica_count == volinfo->replica_count) {
+ if (!(total_bricks % volinfo->dist_leaf_count)) {
+ ret = 1;
+ goto out;
+ }
+ }
+ if (replica_count > volinfo->replica_count) {
+ /* We have to make sure before and after 'add-brick',
+ the number or subvolumes for distribute will remain
+ same, when replica count is given */
+ if ((total_bricks * volinfo->dist_leaf_count) ==
+ (volinfo->brick_count * (replica_count *
+ volinfo->stripe_count))) {
+ /* Change the dist_leaf_count */
+ gf_log (THIS->name, GF_LOG_INFO,
+ "Changing the replica count of "
+ "volume %s from %d to %d",
+ volinfo->volname, volinfo->replica_count,
+ replica_count);
+ ret = 0;
+ goto out;
+ }
+ }
+ break;
+ }
+out:
+ return ret;
+}
+
+static int
+gd_rmbr_validate_replica_count (glusterd_volinfo_t *volinfo,
+ int32_t replica_count,
+ int32_t brick_count, char *err_str,
+ size_t err_len)
+{
+ int ret = -1;
+ int replica_nodes = 0;
+
+ switch (volinfo->type) {
+ case GF_CLUSTER_TYPE_NONE:
+ case GF_CLUSTER_TYPE_STRIPE:
+ snprintf (err_str, err_len,
+ "replica count (%d) option given for non replicate "
+ "volume %s", replica_count, volinfo->volname);
+ gf_log (THIS->name, GF_LOG_WARNING, "%s", err_str);
+ goto out;
+
+ case GF_CLUSTER_TYPE_REPLICATE:
+ case GF_CLUSTER_TYPE_STRIPE_REPLICATE:
+ /* in remove brick, you can only reduce the replica count */
+ if (replica_count > volinfo->replica_count) {
+ snprintf (err_str, err_len,
+ "given replica count (%d) option is more "
+ "than volume %s's replica count (%d)",
+ replica_count, volinfo->volname,
+ volinfo->replica_count);
+ gf_log (THIS->name, GF_LOG_WARNING, "%s", err_str);
+ goto out;
+ }
+ if (replica_count == volinfo->replica_count) {
+ /* This means the 'replica N' option on CLI was
+ redundant. Check if the total number of bricks given
+ for removal is same as 'dist_leaf_count' */
+ if (brick_count % volinfo->dist_leaf_count) {
+ snprintf (err_str, err_len,
+ "number of bricks provided (%d) is "
+ "not valid. need at least %d "
+ "(or %dxN)", brick_count,
+ volinfo->dist_leaf_count,
+ volinfo->dist_leaf_count);
+ gf_log (THIS->name, GF_LOG_WARNING, "%s",
+ err_str);
+ goto out;
+ }
+ ret = 1;
+ goto out;
+ }
+
+ replica_nodes = ((volinfo->brick_count /
+ volinfo->replica_count) *
+ (volinfo->replica_count - replica_count));
+
+ if (brick_count % replica_nodes) {
+ snprintf (err_str, err_len,
+ "need %d(xN) bricks for reducing replica "
+ "count of the volume from %d to %d",
+ replica_nodes, volinfo->replica_count,
+ replica_count);
+ goto out;
+ }
+ break;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/* Handler functions */
+int
+__glusterd_handle_add_brick (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{0,}};
+ dict_t *dict = NULL;
+ char *bricks = NULL;
+ char *volname = NULL;
+ int brick_count = 0;
+ void *cli_rsp = NULL;
+ char err_str[2048] = {0,};
+ gf_cli_rsp rsp = {0,};
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+ int total_bricks = 0;
+ int32_t replica_count = 0;
+ int32_t stripe_count = 0;
+ int type = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_ASSERT (req);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req,
+ (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ snprintf (err_str, sizeof (err_str), "Garbage args received");
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_INFO, "Received add brick req");
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf (err_str, sizeof (err_str), "Unable to decode "
+ "the command");
+ goto out;
+ }
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Unable to get volume "
+ "name");
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+
+ if (!(ret = glusterd_check_volume_exists (volname))) {
+ ret = -1;
+ snprintf (err_str, sizeof (err_str), "Volume %s does not exist",
+ volname);
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "count", &brick_count);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Unable to get volume "
+ "brick count");
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "replica-count", &replica_count);
+ if (!ret) {
+ gf_log (this->name, GF_LOG_INFO, "replica-count is %d",
+ replica_count);
+ }
+
+ ret = dict_get_int32 (dict, "stripe-count", &stripe_count);
+ if (!ret) {
+ gf_log (this->name, GF_LOG_INFO, "stripe-count is %d",
+ stripe_count);
+ }
+
+ if (!dict_get (dict, "force")) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get flag");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Unable to get volinfo "
+ "for volume name %s", volname);
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+
+ }
+
+ total_bricks = volinfo->brick_count + brick_count;
+
+ if (!stripe_count && !replica_count) {
+ if (volinfo->type == GF_CLUSTER_TYPE_NONE)
+ goto brick_val;
+
+ if ((volinfo->brick_count < volinfo->dist_leaf_count) &&
+ (total_bricks <= volinfo->dist_leaf_count))
+ goto brick_val;
+
+ if ((brick_count % volinfo->dist_leaf_count) != 0) {
+ snprintf (err_str, sizeof (err_str), "Incorrect number "
+ "of bricks supplied %d with count %d",
+ brick_count, volinfo->dist_leaf_count);
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ ret = -1;
+ goto out;
+ }
+ goto brick_val;
+ /* done with validation.. below section is if stripe|replica
+ count is given */
+ }
+
+ /* These bricks needs to be added one per a replica or stripe volume */
+ if (stripe_count) {
+ ret = gd_addbr_validate_stripe_count (volinfo, stripe_count,
+ total_bricks, &type,
+ err_str,
+ sizeof (err_str));
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+
+ /* if stripe count is same as earlier, set it back to 0 */
+ if (ret == 1)
+ stripe_count = 0;
+
+ ret = dict_set_int32 (dict, "stripe-count", stripe_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to set the stripe-count in dict");
+ goto out;
+ }
+ goto brick_val;
+ }
+
+ ret = gd_addbr_validate_replica_count (volinfo, replica_count,
+ total_bricks,
+ &type, err_str,
+ sizeof (err_str));
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+
+ /* if replica count is same as earlier, set it back to 0 */
+ if (ret == 1)
+ replica_count = 0;
+
+ ret = dict_set_int32 (dict, "replica-count", replica_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to set the replica-count in dict");
+ goto out;
+ }
+
+brick_val:
+ ret = dict_get_str (dict, "bricks", &bricks);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Unable to get volume "
+ "bricks");
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+
+ if (type != volinfo->type) {
+ ret = dict_set_int32 (dict, "type", type);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to set the new type in dict");
+ }
+
+ ret = glusterd_op_begin_synctask (req, GD_OP_ADD_BRICK, dict);
+
+out:
+ if (ret) {
+ rsp.op_ret = -1;
+ rsp.op_errno = 0;
+ if (err_str[0] == '\0')
+ snprintf (err_str, sizeof (err_str), "Operation failed");
+ rsp.op_errstr = err_str;
+ cli_rsp = &rsp;
+ glusterd_to_cli (req, cli_rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_cli_rsp, dict);
+ ret = 0; //sent error to cli, prevent second reply
+ }
+
+ free (cli_req.dict.dict_val); //its malloced by xdr
+
+ return ret;
+}
+
+int
+glusterd_handle_add_brick (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __glusterd_handle_add_brick);
+}
+
+static int
+subvol_matcher_init (int **subvols, int count)
+{
+ int ret = -1;
+
+ *subvols = GF_CALLOC (count, sizeof(int), gf_gld_mt_int);
+ if (*subvols)
+ ret = 0;
+
+ return ret;
+}
+
+static void
+subvol_matcher_update (int *subvols, glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo)
+{
+ glusterd_brickinfo_t *tmp = NULL;
+ int32_t sub_volume = 0;
+ int pos = 0;
+
+ list_for_each_entry (tmp, &volinfo->bricks, brick_list) {
+
+ if (strcmp (tmp->hostname, brickinfo->hostname) ||
+ strcmp (tmp->path, brickinfo->path)) {
+ pos++;
+ continue;
+ }
+ gf_log (THIS->name, GF_LOG_DEBUG, LOGSTR_FOUND_BRICK,
+ brickinfo->hostname, brickinfo->path,
+ volinfo->volname);
+ sub_volume = (pos / volinfo->dist_leaf_count);
+ subvols[sub_volume]++;
+ break;
+ }
+
+}
+
+static int
+subvol_matcher_verify (int *subvols, glusterd_volinfo_t *volinfo, char *err_str,
+ size_t err_len, char *vol_type)
+{
+ int i = 0;
+ int ret = 0;
+
+ do {
+
+ if (subvols[i] % volinfo->dist_leaf_count == 0) {
+ continue;
+ } else {
+ ret = -1;
+ snprintf (err_str, err_len,
+ "Bricks not from same subvol for %s", vol_type);
+ gf_log (THIS->name, GF_LOG_ERROR, "%s", err_str);
+ break;
+ }
+ } while (++i < volinfo->subvol_count);
+
+ return ret;
+}
+
+static void
+subvol_matcher_destroy (int *subvols)
+{
+ GF_FREE (subvols);
+}
+
+int
+__glusterd_handle_remove_brick (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{0,}};
+ dict_t *dict = NULL;
+ int32_t count = 0;
+ char *brick = NULL;
+ char key[256] = {0,};
+ char *brick_list = NULL;
+ int i = 1;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ int *subvols = NULL;
+ glusterd_brickinfo_t *tmp = NULL;
+ char err_str[2048] = {0};
+ gf_cli_rsp rsp = {0,};
+ void *cli_rsp = NULL;
+ char vol_type[256] = {0,};
+ int32_t replica_count = 0;
+ int32_t brick_index = 0;
+ int32_t tmp_brick_idx = 0;
+ int found = 0;
+ int diff_count = 0;
+ char *volname = 0;
+ xlator_t *this = NULL;
+
+ GF_ASSERT (req);
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req,
+ (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ snprintf (err_str, sizeof (err_str), "Received garbage args");
+ goto out;
+ }
+
+
+ gf_log (this->name, GF_LOG_INFO, "Received rem brick req");
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf (err_str, sizeof (err_str), "Unable to decode "
+ "the command");
+ goto out;
+ }
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Unable to get volume "
+ "name");
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "count", &count);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Unable to get brick "
+ "count");
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str),"Volume %s does not exist",
+ volname);
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "replica-count", &replica_count);
+ if (!ret) {
+ gf_log (this->name, GF_LOG_INFO,
+ "request to change replica-count to %d", replica_count);
+ ret = gd_rmbr_validate_replica_count (volinfo, replica_count,
+ count, err_str,
+ sizeof (err_str));
+ if (ret < 0) {
+ /* logging and error msg are done in above function
+ itself */
+ goto out;
+ }
+ dict_del (dict, "replica-count");
+ if (ret) {
+ replica_count = 0;
+ } else {
+ ret = dict_set_int32 (dict, "replica-count",
+ replica_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set the replica_count "
+ "in dict");
+ goto out;
+ }
+ }
+ }
+
+ /* 'vol_type' is used for giving the meaning full error msg for user */
+ if (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) {
+ strcpy (vol_type, "replica");
+ } else if (volinfo->type == GF_CLUSTER_TYPE_STRIPE) {
+ strcpy (vol_type, "stripe");
+ } else if (volinfo->type == GF_CLUSTER_TYPE_STRIPE_REPLICATE) {
+ strcpy (vol_type, "stripe-replicate");
+ } else {
+ strcpy (vol_type, "distribute");
+ }
+
+ /* Do not allow remove-brick if the volume is a stripe volume*/
+ if ((volinfo->type == GF_CLUSTER_TYPE_STRIPE) &&
+ (volinfo->brick_count == volinfo->stripe_count)) {
+ snprintf (err_str, sizeof (err_str),
+ "Removing brick from a stripe volume is not allowed");
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ ret = -1;
+ goto out;
+ }
+
+ if (!replica_count &&
+ (volinfo->type == GF_CLUSTER_TYPE_STRIPE_REPLICATE) &&
+ (volinfo->brick_count == volinfo->dist_leaf_count)) {
+ snprintf (err_str, sizeof(err_str),
+ "Removing bricks from stripe-replicate"
+ " configuration is not allowed without reducing "
+ "replica or stripe count explicitly.");
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ ret = -1;
+ goto out;
+ }
+
+ if (!replica_count &&
+ (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) &&
+ (volinfo->brick_count == volinfo->dist_leaf_count)) {
+ snprintf (err_str, sizeof (err_str),
+ "Removing bricks from replicate configuration "
+ "is not allowed without reducing replica count "
+ "explicitly.");
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ ret = -1;
+ goto out;
+ }
+
+ /* Do not allow remove-brick if the bricks given is less than
+ the replica count or stripe count */
+ if (!replica_count && (volinfo->type != GF_CLUSTER_TYPE_NONE)) {
+ if (volinfo->dist_leaf_count &&
+ (count % volinfo->dist_leaf_count)) {
+ snprintf (err_str, sizeof (err_str), "Remove brick "
+ "incorrect brick count of %d for %s %d",
+ count, vol_type, volinfo->dist_leaf_count);
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ brick_list = GF_MALLOC (120000 * sizeof(*brick_list),gf_common_mt_char);
+
+ if (!brick_list) {
+ ret = -1;
+ goto out;
+ }
+
+ strcpy (brick_list, " ");
+
+ if ((volinfo->type != GF_CLUSTER_TYPE_NONE) &&
+ (volinfo->subvol_count > 1)) {
+ ret = subvol_matcher_init (&subvols, volinfo->subvol_count);
+ if (ret)
+ goto out;
+ }
+
+ while ( i <= count) {
+ snprintf (key, sizeof (key), "brick%d", i);
+ ret = dict_get_str (dict, key, &brick);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Unable to get %s",
+ key);
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+ gf_log (this->name, GF_LOG_DEBUG, "Remove brick count %d brick:"
+ " %s", i, brick);
+
+ ret = glusterd_volume_brickinfo_get_by_brick(brick, volinfo,
+ &brickinfo);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Incorrect brick "
+ "%s for volume %s", brick, volname);
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+ strcat(brick_list, brick);
+ strcat(brick_list, " ");
+
+ i++;
+ if ((volinfo->type == GF_CLUSTER_TYPE_NONE) ||
+ (volinfo->brick_count <= volinfo->dist_leaf_count))
+ continue;
+
+ if (replica_count) {
+ /* do the validation of bricks here */
+ /* -2 because i++ is already done, and i starts with 1,
+ instead of 0 */
+ diff_count = (volinfo->replica_count - replica_count);
+ brick_index = (((i -2) / diff_count) * volinfo->replica_count);
+ tmp_brick_idx = 0;
+ found = 0;
+ list_for_each_entry (tmp, &volinfo->bricks, brick_list) {
+ tmp_brick_idx++;
+ gf_log (this->name, GF_LOG_TRACE,
+ "validate brick %s:%s (%d %d %d)",
+ tmp->hostname, tmp->path, tmp_brick_idx,
+ brick_index, volinfo->replica_count);
+ if (tmp_brick_idx <= brick_index)
+ continue;
+ if (tmp_brick_idx >
+ (brick_index + volinfo->replica_count))
+ break;
+ if ((!strcmp (tmp->hostname,brickinfo->hostname)) &&
+ !strcmp (tmp->path, brickinfo->path)) {
+ found = 1;
+ break;
+ }
+ }
+ if (found)
+ continue;
+
+ snprintf (err_str, sizeof (err_str), "Bricks are from "
+ "same subvol");
+ gf_log (this->name, GF_LOG_INFO,
+ "failed to validate brick %s:%s (%d %d %d)",
+ tmp->hostname, tmp->path, tmp_brick_idx,
+ brick_index, volinfo->replica_count);
+ ret = -1;
+ /* brick order is not valid */
+ goto out;
+ }
+
+ /* Find which subvolume the brick belongs to */
+ subvol_matcher_update (subvols, volinfo, brickinfo);
+ }
+
+ /* Check if the bricks belong to the same subvolumes.*/
+ if ((volinfo->type != GF_CLUSTER_TYPE_NONE) &&
+ (volinfo->subvol_count > 1)) {
+ ret = subvol_matcher_verify (subvols, volinfo,
+ err_str, sizeof(err_str),
+ vol_type);
+ if (ret)
+ goto out;
+ }
+
+ ret = glusterd_op_begin_synctask (req, GD_OP_REMOVE_BRICK, dict);
+
+out:
+ if (ret) {
+ rsp.op_ret = -1;
+ rsp.op_errno = 0;
+ if (err_str[0] == '\0')
+ snprintf (err_str, sizeof (err_str),
+ "Operation failed");
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ rsp.op_errstr = err_str;
+ cli_rsp = &rsp;
+ glusterd_to_cli (req, cli_rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_cli_rsp, dict);
+
+ ret = 0; //sent error to cli, prevent second reply
+
+ }
+
+ GF_FREE (brick_list);
+ subvol_matcher_destroy (subvols);
+ free (cli_req.dict.dict_val); //its malloced by xdr
+
+ return ret;
+}
+
+int
+glusterd_handle_remove_brick (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_handle_remove_brick);
+}
+
+static int
+_glusterd_restart_gsync_session (dict_t *this, char *key,
+ data_t *value, void *data)
+{
+ char *slave = NULL;
+ char *slave_buf = NULL;
+ char *path_list = NULL;
+ char *slave_vol = NULL;
+ char *slave_ip = NULL;
+ char *conf_path = NULL;
+ char **errmsg = NULL;
+ int ret = -1;
+ glusterd_gsync_status_temp_t *param = NULL;
+ gf_boolean_t is_running = _gf_false;
+
+ param = (glusterd_gsync_status_temp_t *)data;
+
+ GF_ASSERT (param);
+ GF_ASSERT (param->volinfo);
+
+ slave = strchr(value->data, ':');
+ if (slave) {
+ slave++;
+ slave_buf = gf_strdup (slave);
+ if (!slave_buf) {
+ gf_log ("", GF_LOG_ERROR,
+ "Failed to gf_strdup");
+ ret = -1;
+ goto out;
+ }
+ }
+ else
+ return 0;
+
+ ret = dict_set_dynstr (param->rsp_dict, "slave", slave_buf);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to store slave");
+ if (slave_buf)
+ GF_FREE(slave_buf);
+ goto out;
+ }
+
+ ret = glusterd_get_slave_details_confpath (param->volinfo,
+ param->rsp_dict,
+ &slave_ip, &slave_vol,
+ &conf_path, errmsg);
+ if (ret) {
+ if (*errmsg)
+ gf_log ("", GF_LOG_ERROR, "%s", *errmsg);
+ else
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to fetch slave or confpath details.");
+ goto out;
+ }
+
+ /* In cases that gsyncd is not running, we will not invoke it
+ * because of add-brick. */
+ ret = glusterd_check_gsync_running_local (param->volinfo->volname,
+ slave, conf_path,
+ &is_running);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "gsync running validation failed.");
+ goto out;
+ }
+ if (_gf_false == is_running) {
+ gf_log ("", GF_LOG_DEBUG, "gsync session for %s and %s is"
+ " not running on this node. Hence not restarting.",
+ param->volinfo->volname, slave);
+ ret = 0;
+ goto out;
+ }
+
+ ret = glusterd_get_local_brickpaths (param->volinfo, &path_list);
+ if (!path_list) {
+ gf_log ("", GF_LOG_DEBUG, "This node not being part of"
+ " volume should not be running gsyncd. Hence"
+ " no gsyncd process to restart.");
+ ret = 0;
+ goto out;
+ }
+
+ ret = glusterd_check_restart_gsync_session (param->volinfo, slave,
+ param->rsp_dict, path_list,
+ conf_path, 0);
+ if (ret)
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to restart gsync session.");
+
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d.", ret);
+ return ret;
+}
+
+/* op-sm */
+
+int
+glusterd_op_perform_add_bricks (glusterd_volinfo_t *volinfo, int32_t count,
+ char *bricks, dict_t *dict)
+{
+ char *brick = NULL;
+ int32_t i = 1;
+ char *brick_list = NULL;
+ char *free_ptr1 = NULL;
+ char *free_ptr2 = NULL;
+ char *saveptr = NULL;
+ int32_t ret = -1;
+ int32_t stripe_count = 0;
+ int32_t replica_count = 0;
+ int32_t type = 0;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_gsync_status_temp_t param = {0, };
+ gf_boolean_t restart_needed = 0;
+ char msg[1024] __attribute__((unused)) = {0, };
+ int caps = 0;
+
+ GF_ASSERT (volinfo);
+
+ if (bricks) {
+ brick_list = gf_strdup (bricks);
+ free_ptr1 = brick_list;
+ }
+
+ if (count)
+ brick = strtok_r (brick_list+1, " \n", &saveptr);
+
+ if (dict) {
+ ret = dict_get_int32 (dict, "stripe-count", &stripe_count);
+ if (!ret)
+ gf_log (THIS->name, GF_LOG_INFO,
+ "stripe-count is set %d", stripe_count);
+
+ ret = dict_get_int32 (dict, "replica-count", &replica_count);
+ if (!ret)
+ gf_log (THIS->name, GF_LOG_INFO,
+ "replica-count is set %d", replica_count);
+ ret = dict_get_int32 (dict, "type", &type);
+ if (!ret)
+ gf_log (THIS->name, GF_LOG_INFO,
+ "type is set %d, need to change it", type);
+ }
+
+ while ( i <= count) {
+ ret = glusterd_brickinfo_new_from_brick (brick, &brickinfo);
+ if (ret)
+ goto out;
+
+ ret = glusterd_resolve_brick (brickinfo);
+ if (ret)
+ goto out;
+ if (stripe_count || replica_count) {
+ add_brick_at_right_order (brickinfo, volinfo, (i - 1),
+ stripe_count, replica_count);
+ } else {
+ list_add_tail (&brickinfo->brick_list, &volinfo->bricks);
+ }
+ brick = strtok_r (NULL, " \n", &saveptr);
+ i++;
+ volinfo->brick_count++;
+
+ }
+
+
+ /* Gets changed only if the options are given in add-brick cli */
+ if (type)
+ volinfo->type = type;
+ if (replica_count) {
+ volinfo->replica_count = replica_count;
+ }
+ if (stripe_count) {
+ volinfo->stripe_count = stripe_count;
+ }
+ volinfo->dist_leaf_count = glusterd_get_dist_leaf_count (volinfo);
+
+ /* backward compatibility */
+ volinfo->sub_count = ((volinfo->dist_leaf_count == 1) ? 0:
+ volinfo->dist_leaf_count);
+
+ volinfo->subvol_count = (volinfo->brick_count /
+ volinfo->dist_leaf_count);
+
+ ret = glusterd_create_volfiles_and_notify_services (volinfo);
+ if (ret)
+ goto out;
+
+ ret = 0;
+ if (GLUSTERD_STATUS_STARTED != volinfo->status)
+ goto out;
+
+ brick_list = gf_strdup (bricks);
+ free_ptr2 = brick_list;
+ i = 1;
+
+ if (count)
+ brick = strtok_r (brick_list+1, " \n", &saveptr);
+#ifdef HAVE_BD_XLATOR
+ if (brickinfo->vg[0])
+ caps = CAPS_BD | CAPS_THIN |
+ CAPS_OFFLOAD_COPY | CAPS_OFFLOAD_SNAPSHOT;
+#endif
+
+ while (i <= count) {
+ ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo,
+ &brickinfo);
+ if (ret)
+ goto out;
+#ifdef HAVE_BD_XLATOR
+ /* Check for VG/thin pool if its BD volume */
+ if (brickinfo->vg[0]) {
+ ret = glusterd_is_valid_vg (brickinfo, 0, msg);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_CRITICAL, "%s", msg);
+ goto out;
+ }
+ /* if anyone of the brick does not have thin support,
+ disable it for entire volume */
+ caps &= brickinfo->caps;
+ } else
+ caps = 0;
+#endif
+
+ if (uuid_is_null (brickinfo->uuid)) {
+ ret = glusterd_resolve_brick (brickinfo);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, FMTSTR_RESOLVE_BRICK,
+ brickinfo->hostname, brickinfo->path);
+ goto out;
+ }
+ }
+
+ ret = glusterd_brick_start (volinfo, brickinfo,
+ _gf_true);
+ if (ret)
+ goto out;
+ i++;
+ brick = strtok_r (NULL, " \n", &saveptr);
+
+ /* Check if the brick is added in this node, and set
+ * the restart_needed flag. */
+ if ((!uuid_compare (brickinfo->uuid, MY_UUID)) &&
+ !restart_needed) {
+ restart_needed = 1;
+ gf_log ("", GF_LOG_DEBUG,
+ "Restart gsyncd session, if it's already "
+ "running.");
+ }
+ }
+
+ /* If the restart_needed flag is set, restart gsyncd sessions for that
+ * particular master with all the slaves. */
+ if (restart_needed) {
+ param.rsp_dict = dict;
+ param.volinfo = volinfo;
+ dict_foreach (volinfo->gsync_slaves,
+ _glusterd_restart_gsync_session, &param);
+ }
+ volinfo->caps = caps;
+out:
+ GF_FREE (free_ptr1);
+ GF_FREE (free_ptr2);
+
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+
+int
+glusterd_op_perform_remove_brick (glusterd_volinfo_t *volinfo, char *brick,
+ int force, int *need_migrate)
+{
+ glusterd_brickinfo_t *brickinfo = NULL;
+ int32_t ret = -1;
+ glusterd_conf_t *priv = NULL;
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (brick);
+
+ priv = THIS->private;
+ GF_ASSERT (priv);
+
+ ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo,
+ &brickinfo);
+ if (ret)
+ goto out;
+
+ ret = glusterd_resolve_brick (brickinfo);
+ if (ret)
+ goto out;
+
+ glusterd_volinfo_reset_defrag_stats (volinfo);
+
+ if (!uuid_compare (brickinfo->uuid, MY_UUID)) {
+ /* Only if the brick is in this glusterd, do the rebalance */
+ if (need_migrate)
+ *need_migrate = 1;
+ }
+
+ if (force) {
+ ret = glusterd_brick_stop (volinfo, brickinfo,
+ _gf_true);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Unable to stop "
+ "glusterfs, ret: %d", ret);
+ }
+ goto out;
+ }
+
+ brickinfo->decommissioned = 1;
+ ret = 0;
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_op_stage_add_brick (dict_t *dict, char **op_errstr)
+{
+ int ret = 0;
+ char *volname = NULL;
+ int count = 0;
+ int i = 0;
+ char *bricks = NULL;
+ char *brick_list = NULL;
+ char *saveptr = NULL;
+ char *free_ptr = NULL;
+ char *brick = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ char msg[2048] = {0,};
+ gf_boolean_t brick_alloc = _gf_false;
+ char *all_bricks = NULL;
+ char *str_ret = NULL;
+ gf_boolean_t is_force = _gf_false;
+
+ priv = THIS->private;
+ if (!priv)
+ goto out;
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Unable to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Unable to find volume: %s", volname);
+ goto out;
+ }
+
+ ret = glusterd_validate_volume_id (dict, volinfo);
+ if (ret)
+ goto out;
+
+ if (glusterd_is_rb_ongoing (volinfo)) {
+ snprintf (msg, sizeof (msg), "Replace brick is in progress on "
+ "volume %s. Please retry after replace-brick "
+ "operation is committed or aborted", volname);
+ gf_log (THIS->name, GF_LOG_ERROR, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ ret = -1;
+ goto out;
+ }
+
+ if (glusterd_is_defrag_on(volinfo)) {
+ snprintf (msg, sizeof(msg), "Volume name %s rebalance is in "
+ "progress. Please retry after completion", volname);
+ gf_log (THIS->name, GF_LOG_ERROR, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ ret = -1;
+ goto out;
+ }
+ ret = dict_get_int32 (dict, "count", &count);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get count");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "bricks", &bricks);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Unable to get bricks");
+ goto out;
+ }
+
+ is_force = dict_get_str_boolean (dict, "force", _gf_false);
+
+ if (bricks) {
+ brick_list = gf_strdup (bricks);
+ all_bricks = gf_strdup (bricks);
+ free_ptr = brick_list;
+ }
+
+ if (count)
+ brick = strtok_r (brick_list+1, " \n", &saveptr);
+
+
+ while ( i < count) {
+ if (!glusterd_store_is_valid_brickpath (volname, brick) ||
+ !glusterd_is_valid_volfpath (volname, brick)) {
+ snprintf (msg, sizeof (msg), "brick path %s is "
+ "too long", brick);
+ gf_log (THIS->name, GF_LOG_ERROR, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+
+ ret = -1;
+ goto out;
+
+ }
+
+ ret = glusterd_brickinfo_new_from_brick (brick, &brickinfo);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Add-brick: Unable"
+ " to get brickinfo");
+ goto out;
+ }
+ brick_alloc = _gf_true;
+
+ ret = glusterd_new_brick_validate (brick, brickinfo, msg,
+ sizeof (msg));
+ if (ret) {
+ *op_errstr = gf_strdup (msg);
+ ret = -1;
+ goto out;
+ }
+
+ if (!uuid_compare (brickinfo->uuid, MY_UUID)) {
+#ifdef HAVE_BD_XLATOR
+ if (brickinfo->vg[0]) {
+ ret = glusterd_is_valid_vg (brickinfo, 1, msg);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "%s",
+ msg);
+ *op_errstr = gf_strdup (msg);
+ goto out;
+ }
+ }
+#endif
+
+ ret = glusterd_validate_and_create_brickpath (brickinfo,
+ volinfo->volume_id,
+ op_errstr, is_force);
+ if (ret)
+ goto out;
+ }
+
+ glusterd_brickinfo_delete (brickinfo);
+ brick_alloc = _gf_false;
+ brickinfo = NULL;
+ brick = strtok_r (NULL, " \n", &saveptr);
+ i++;
+ }
+
+out:
+ GF_FREE (free_ptr);
+ if (brick_alloc && brickinfo)
+ glusterd_brickinfo_delete (brickinfo);
+ GF_FREE (str_ret);
+ GF_FREE (all_bricks);
+
+ gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
+
+ return ret;
+}
+
+int
+glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr)
+{
+ int ret = -1;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ char *errstr = NULL;
+ int32_t brick_count = 0;
+ char msg[2048] = {0,};
+ int32_t flag = 0;
+ gf1_op_commands cmd = GF_OP_CMD_NONE;
+ char *task_id_str = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Volume %s does not exist", volname);
+ goto out;
+ }
+
+ ret = glusterd_validate_volume_id (dict, volinfo);
+ if (ret)
+ goto out;
+
+ if (glusterd_is_rb_ongoing (volinfo)) {
+ snprintf (msg, sizeof (msg), "Replace brick is in progress on "
+ "volume %s. Please retry after replace-brick "
+ "operation is committed or aborted", volname);
+ gf_log (this->name, GF_LOG_ERROR, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "command", &flag);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get brick count");
+ goto out;
+ }
+ cmd = flag;
+
+ ret = -1;
+ switch (cmd) {
+ case GF_OP_CMD_NONE:
+ errstr = gf_strdup ("no remove-brick command issued");
+ goto out;
+
+ case GF_OP_CMD_STATUS:
+ ret = 0;
+ goto out;
+
+ case GF_OP_CMD_START:
+ {
+ if ((volinfo->type == GF_CLUSTER_TYPE_REPLICATE) &&
+ dict_get (dict, "replica-count")) {
+ snprintf (msg, sizeof(msg), "Migration of data is not "
+ "needed when reducing replica count. Use the"
+ " 'force' option");
+ errstr = gf_strdup (msg);
+ gf_log (this->name, GF_LOG_ERROR, "%s", errstr);
+ goto out;
+ }
+
+ if (GLUSTERD_STATUS_STARTED != volinfo->status) {
+ snprintf (msg, sizeof (msg), "Volume %s needs to be "
+ "started before remove-brick (you can use "
+ "'force' or 'commit' to override this "
+ "behavior)", volinfo->volname);
+ errstr = gf_strdup (msg);
+ gf_log (this->name, GF_LOG_ERROR, "%s", errstr);
+ goto out;
+ }
+ if (!gd_is_remove_brick_committed (volinfo)) {
+ snprintf (msg, sizeof (msg), "An earlier remove-brick "
+ "task exists for volume %s. Either commit it"
+ " or stop it before starting a new task.",
+ volinfo->volname);
+ errstr = gf_strdup (msg);
+ gf_log (this->name, GF_LOG_ERROR, "Earlier remove-brick"
+ " task exists for volume %s.",
+ volinfo->volname);
+ goto out;
+ }
+ if (glusterd_is_defrag_on(volinfo)) {
+ errstr = gf_strdup("Rebalance is in progress. Please "
+ "retry after completion");
+ gf_log (this->name, GF_LOG_ERROR, "%s", errstr);
+ goto out;
+ }
+
+ if (is_origin_glusterd (dict)) {
+ ret = glusterd_generate_and_set_task_id
+ (dict, GF_REMOVE_BRICK_TID_KEY);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to generate task-id");
+ goto out;
+ }
+ } else {
+ ret = dict_get_str (dict, GF_REMOVE_BRICK_TID_KEY,
+ &task_id_str);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Missing remove-brick-id");
+ ret = 0;
+ }
+ }
+ break;
+ }
+
+ case GF_OP_CMD_STOP:
+ ret = 0;
+ break;
+
+ case GF_OP_CMD_COMMIT:
+ if (volinfo->decommission_in_progress) {
+ errstr = gf_strdup ("use 'force' option as migration "
+ "is in progress");
+ goto out;
+ }
+ break;
+
+ case GF_OP_CMD_COMMIT_FORCE:
+ break;
+ }
+
+ ret = dict_get_int32 (dict, "count", &brick_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get brick count");
+ goto out;
+ }
+
+ ret = 0;
+ if (volinfo->brick_count == brick_count) {
+ errstr = gf_strdup ("Deleting all the bricks of the "
+ "volume is not allowed");
+ ret = -1;
+ goto out;
+ }
+
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
+ if (ret && errstr) {
+ if (op_errstr)
+ *op_errstr = errstr;
+ }
+
+ return ret;
+}
+
+int
+glusterd_remove_brick_migrate_cbk (glusterd_volinfo_t *volinfo,
+ gf_defrag_status_t status)
+{
+ int ret = 0;
+
+#if 0 /* TODO: enable this behavior once cluster-wide awareness comes for
+ defrag cbk function */
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_brickinfo_t *tmp = NULL;
+
+ switch (status) {
+ case GF_DEFRAG_STATUS_PAUSED:
+ case GF_DEFRAG_STATUS_FAILED:
+ /* No changes required in the volume file.
+ everything should remain as is */
+ break;
+ case GF_DEFRAG_STATUS_STOPPED:
+ /* Fall back to the old volume file */
+ list_for_each_entry_safe (brickinfo, tmp, &volinfo->bricks, brick_list) {
+ if (!brickinfo->decommissioned)
+ continue;
+ brickinfo->decommissioned = 0;
+ }
+ break;
+
+ case GF_DEFRAG_STATUS_COMPLETE:
+ /* Done with the task, you can remove the brick from the
+ volume file */
+ list_for_each_entry_safe (brickinfo, tmp, &volinfo->bricks, brick_list) {
+ if (!brickinfo->decommissioned)
+ continue;
+ gf_log (THIS->name, GF_LOG_INFO, "removing the brick %s",
+ brickinfo->path);
+ brickinfo->decommissioned = 0;
+ if (GLUSTERD_STATUS_STARTED == volinfo->status) {
+ /*TODO: use the 'atomic' flavour of brick_stop*/
+ ret = glusterd_brick_stop (volinfo, brickinfo);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Unable to stop glusterfs (%d)", ret);
+ }
+ }
+ glusterd_delete_brick (volinfo, brickinfo);
+ }
+ break;
+
+ default:
+ GF_ASSERT (!"cbk function called with wrong status");
+ break;
+ }
+
+ ret = glusterd_create_volfiles_and_notify_services (volinfo);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Unable to write volume files (%d)", ret);
+
+ ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Unable to store volume info (%d)", ret);
+
+
+ if (GLUSTERD_STATUS_STARTED == volinfo->status) {
+ ret = glusterd_check_generate_start_nfs ();
+ if (ret)
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Unable to start nfs process (%d)", ret);
+ }
+
+#endif
+
+ volinfo->decommission_in_progress = 0;
+ return ret;
+}
+
+
+int
+glusterd_op_add_brick (dict_t *dict, char **op_errstr)
+{
+ int ret = 0;
+ char *volname = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+ char *bricks = NULL;
+ int32_t count = 0;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = dict_get_str (dict, "volname", &volname);
+
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to allocate memory");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "count", &count);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get count");
+ goto out;
+ }
+
+
+ ret = dict_get_str (dict, "bricks", &bricks);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get bricks");
+ goto out;
+ }
+
+ ret = glusterd_op_perform_add_bricks (volinfo, count, bricks, dict);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to add bricks");
+ goto out;
+ }
+
+ ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret)
+ goto out;
+
+ if (GLUSTERD_STATUS_STARTED == volinfo->status)
+ ret = glusterd_nodesvcs_handle_graph_change (volinfo);
+
+out:
+ return ret;
+}
+
+int
+glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
+{
+ int ret = -1;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ char *brick = NULL;
+ int32_t count = 0;
+ int32_t i = 1;
+ char key[256] = {0,};
+ int32_t flag = 0;
+ char err_str[4096] = {0,};
+ int need_rebalance = 0;
+ int force = 0;
+ gf1_op_commands cmd = 0;
+ int32_t replica_count = 0;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_brickinfo_t *tmp = NULL;
+ char *task_id_str = NULL;
+ xlator_t *this = NULL;
+ dict_t *bricks_dict = NULL;
+ char *brick_tmpstr = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = dict_get_str (dict, "volname", &volname);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to allocate memory");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "command", &flag);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get command");
+ goto out;
+ }
+ cmd = flag;
+
+ /* Set task-id, if available, in ctx dict for operations other than
+ * start
+ */
+ if (is_origin_glusterd (dict) && (cmd != GF_OP_CMD_START)) {
+ if (!uuid_is_null (volinfo->rebal.rebalance_id)) {
+ ret = glusterd_copy_uuid_to_dict
+ (volinfo->rebal.rebalance_id, dict,
+ GF_REMOVE_BRICK_TID_KEY);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set remove-brick-id");
+ goto out;
+ }
+ }
+ }
+
+ /* Clear task-id, rebal.op and stored bricks on commmitting/stopping
+ * remove-brick */
+ if ((cmd != GF_OP_CMD_START) || (cmd != GF_OP_CMD_STATUS)) {
+ uuid_clear (volinfo->rebal.rebalance_id);
+ volinfo->rebal.op = GD_OP_NONE;
+ dict_unref (volinfo->rebal.dict);
+ volinfo->rebal.dict = NULL;
+ }
+
+ ret = -1;
+ switch (cmd) {
+ case GF_OP_CMD_NONE:
+ goto out;
+
+ case GF_OP_CMD_STATUS:
+ ret = 0;
+ goto out;
+
+ case GF_OP_CMD_STOP:
+ {
+ /* Fall back to the old volume file */
+ list_for_each_entry_safe (brickinfo, tmp, &volinfo->bricks,
+ brick_list) {
+ if (!brickinfo->decommissioned)
+ continue;
+ brickinfo->decommissioned = 0;
+ }
+ ret = glusterd_create_volfiles_and_notify_services (volinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to create volfiles");
+ goto out;
+ }
+
+ ret = glusterd_store_volinfo (volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to store volinfo");
+ goto out;
+ }
+
+ ret = 0;
+ goto out;
+ }
+
+ case GF_OP_CMD_START:
+ ret = dict_get_str (dict, GF_REMOVE_BRICK_TID_KEY, &task_id_str);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Missing remove-brick-id");
+ ret = 0;
+ } else {
+ uuid_parse (task_id_str, volinfo->rebal.rebalance_id) ;
+ volinfo->rebal.op = GD_OP_REMOVE_BRICK;
+ }
+ force = 0;
+ break;
+
+ case GF_OP_CMD_COMMIT:
+ force = 1;
+ break;
+
+ case GF_OP_CMD_COMMIT_FORCE:
+
+ if (volinfo->decommission_in_progress) {
+ if (volinfo->rebal.defrag) {
+ LOCK (&volinfo->rebal.defrag->lock);
+ /* Fake 'rebalance-complete' so the graph change
+ happens right away */
+ volinfo->rebal.defrag_status =
+ GF_DEFRAG_STATUS_COMPLETE;
+
+ UNLOCK (&volinfo->rebal.defrag->lock);
+ }
+ /* Graph change happens in rebalance _cbk function,
+ no need to do anything here */
+ /* TODO: '_cbk' function is not doing anything for now */
+ }
+
+ ret = 0;
+ force = 1;
+ break;
+ }
+
+ ret = dict_get_int32 (dict, "count", &count);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get count");
+ goto out;
+ }
+
+ /* Save the list of bricks for later usage. Right now this is required
+ * for displaying the task parameters with task status in volume status.
+ */
+ bricks_dict = dict_new ();
+ if (!bricks_dict) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_int32 (bricks_dict, "count", count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to save remove-brick count");
+ goto out;
+ }
+ while ( i <= count) {
+ snprintf (key, 256, "brick%d", i);
+ ret = dict_get_str (dict, key, &brick);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get %s",
+ key);
+ goto out;
+ }
+
+ brick_tmpstr = gf_strdup (brick);
+ if (!brick_tmpstr) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to duplicate brick name");
+ goto out;
+ }
+ ret = dict_set_dynstr (bricks_dict, key, brick_tmpstr);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to add brick to dict");
+ goto out;
+ }
+ brick_tmpstr = NULL;
+
+ ret = glusterd_op_perform_remove_brick (volinfo, brick, force,
+ &need_rebalance);
+ if (ret)
+ goto out;
+ i++;
+ }
+ ret = dict_get_int32 (dict, "replica-count", &replica_count);
+ if (!ret) {
+ gf_log (this->name, GF_LOG_INFO,
+ "changing replica count %d to %d on volume %s",
+ volinfo->replica_count, replica_count,
+ volinfo->volname);
+ volinfo->replica_count = replica_count;
+ volinfo->sub_count = replica_count;
+ volinfo->dist_leaf_count = glusterd_get_dist_leaf_count (volinfo);
+ volinfo->subvol_count = (volinfo->brick_count /
+ volinfo->dist_leaf_count);
+
+ if (replica_count == 1) {
+ if (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) {
+ volinfo->type = GF_CLUSTER_TYPE_NONE;
+ /* backward compatibility */
+ volinfo->sub_count = 0;
+ } else {
+ volinfo->type = GF_CLUSTER_TYPE_STRIPE;
+ /* backward compatibility */
+ volinfo->sub_count = volinfo->dist_leaf_count;
+ }
+ }
+ }
+ volinfo->rebal.dict = bricks_dict;
+ bricks_dict = NULL;
+
+ ret = glusterd_create_volfiles_and_notify_services (volinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to create volfiles");
+ goto out;
+ }
+
+ ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to store volinfo");
+ goto out;
+ }
+
+ /* Need to reset the defrag/rebalance status accordingly */
+ switch (volinfo->rebal.defrag_status) {
+ case GF_DEFRAG_STATUS_FAILED:
+ case GF_DEFRAG_STATUS_COMPLETE:
+ volinfo->rebal.defrag_status = 0;
+ default:
+ break;
+ }
+ if (!force && need_rebalance) {
+ /* perform the rebalance operations */
+ ret = glusterd_handle_defrag_start
+ (volinfo, err_str, sizeof (err_str),
+ GF_DEFRAG_CMD_START_FORCE,
+ glusterd_remove_brick_migrate_cbk, GD_OP_REMOVE_BRICK);
+
+ if (!ret)
+ volinfo->decommission_in_progress = 1;
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to start the rebalance");
+ }
+ } else {
+ if (GLUSTERD_STATUS_STARTED == volinfo->status)
+ ret = glusterd_nodesvcs_handle_graph_change (volinfo);
+ }
+
+out:
+ if (ret && err_str[0] && op_errstr)
+ *op_errstr = gf_strdup (err_str);
+
+ GF_FREE (brick_tmpstr);
+ if (bricks_dict)
+ dict_unref (bricks_dict);
+
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c
new file mode 100644
index 000000000..5786694bd
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c
@@ -0,0 +1,4236 @@
+/*
+ Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "common-utils.h"
+#include "cli1-xdr.h"
+#include "xdr-generic.h"
+#include "glusterd.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-store.h"
+#include "glusterd-utils.h"
+#include "glusterd-volgen.h"
+#include "run.h"
+#include "syscall.h"
+
+#include <signal.h>
+
+static int
+dict_get_param (dict_t *dict, char *key, char **param);
+
+static int
+glusterd_get_statefile_name (glusterd_volinfo_t *volinfo, char *slave,
+ char *conf_path, char **statefile);
+
+static int
+glusterd_get_slave_info (char *slave, char **slave_ip,
+ char **slave_vol, char **op_errstr);
+
+static int
+glusterd_gsync_read_frm_status (char *path, char *buf, size_t blen);
+
+struct gsync_config_opt_vals_ gsync_confopt_vals[] = {
+ {.op_name = "change_detector",
+ .no_of_pos_vals = 2,
+ .case_sensitive = _gf_true,
+ .values = {"xsync", "changelog"},
+ },
+ {.op_name = "special_sync_mode",
+ .no_of_pos_vals = 2,
+ .case_sensitive = _gf_true,
+ .values = {"partial", "recover"}
+ },
+ {.op_name = "log-level",
+ .no_of_pos_vals = 5,
+ .case_sensitive = _gf_false,
+ .values = {"critical", "error", "warning", "info", "debug"}
+ },
+ {.op_name = NULL,
+ },
+};
+
+static char *gsync_reserved_opts[] = {
+ "gluster-command-dir",
+ "pid-file",
+ "remote-gsyncd"
+ "state-file",
+ "session-owner",
+ "state-socket-unencoded",
+ "socketdir",
+ "ignore-deletes",
+ "local-id",
+ "local-path",
+ "slave-id",
+ NULL
+};
+
+int
+__glusterd_handle_sys_exec (rpcsvc_request_t *req)
+{
+ int32_t ret = 0;
+ dict_t *dict = NULL;
+ gf_cli_req cli_req = {{0},};
+ glusterd_op_t cli_op = GD_OP_SYS_EXEC;
+ glusterd_conf_t *priv = NULL;
+ char *host_uuid = NULL;
+ char err_str[2048] = {0,};
+ xlator_t *this = NULL;
+
+ GF_ASSERT (req);
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req,
+ (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len) {
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf (err_str, sizeof (err_str), "Unable to decode "
+ "the command");
+ goto out;
+ } else {
+ dict->extra_stdfree = cli_req.dict.dict_val;
+ }
+
+ host_uuid = gf_strdup (uuid_utoa(MY_UUID));
+ if (host_uuid == NULL) {
+ snprintf (err_str, sizeof (err_str), "Failed to get "
+ "the uuid of local glusterd");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr (dict, "host-uuid", host_uuid);
+ if (ret)
+ goto out;
+ }
+
+ ret = glusterd_op_begin_synctask (req, cli_op, dict);
+
+out:
+ if (ret) {
+ if (err_str[0] == '\0')
+ snprintf (err_str, sizeof (err_str),
+ "Operation failed");
+ ret = glusterd_op_send_cli_response (cli_op, ret, 0, req,
+ dict, err_str);
+ }
+ return ret;
+}
+
+int
+__glusterd_handle_copy_file (rpcsvc_request_t *req)
+{
+ int32_t ret = 0;
+ dict_t *dict = NULL;
+ gf_cli_req cli_req = {{0},};
+ glusterd_op_t cli_op = GD_OP_COPY_FILE;
+ glusterd_conf_t *priv = NULL;
+ char *host_uuid = NULL;
+ char err_str[2048] = {0,};
+ xlator_t *this = NULL;
+
+ GF_ASSERT (req);
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req,
+ (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len) {
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf (err_str, sizeof (err_str), "Unable to decode "
+ "the command");
+ goto out;
+ } else {
+ dict->extra_stdfree = cli_req.dict.dict_val;
+ }
+
+ host_uuid = gf_strdup (uuid_utoa(MY_UUID));
+ if (host_uuid == NULL) {
+ snprintf (err_str, sizeof (err_str), "Failed to get "
+ "the uuid of local glusterd");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr (dict, "host-uuid", host_uuid);
+ if (ret)
+ goto out;
+ }
+
+ ret = glusterd_op_begin_synctask (req, cli_op, dict);
+
+out:
+ if (ret) {
+ if (err_str[0] == '\0')
+ snprintf (err_str, sizeof (err_str),
+ "Operation failed");
+ ret = glusterd_op_send_cli_response (cli_op, ret, 0, req,
+ dict, err_str);
+ }
+ return ret;
+}
+
+int
+__glusterd_handle_gsync_set (rpcsvc_request_t *req)
+{
+ int32_t ret = 0;
+ dict_t *dict = NULL;
+ gf_cli_req cli_req = {{0},};
+ glusterd_op_t cli_op = GD_OP_GSYNC_SET;
+ char *master = NULL;
+ char *slave = NULL;
+ char operation[256] = {0,};
+ int type = 0;
+ glusterd_conf_t *priv = NULL;
+ char *host_uuid = NULL;
+ char err_str[2048] = {0,};
+ xlator_t *this = NULL;
+
+ GF_ASSERT (req);
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req,
+ (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len) {
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf (err_str, sizeof (err_str), "Unable to decode "
+ "the command");
+ goto out;
+ } else {
+ dict->extra_stdfree = cli_req.dict.dict_val;
+ }
+
+ host_uuid = gf_strdup (uuid_utoa(MY_UUID));
+ if (host_uuid == NULL) {
+ snprintf (err_str, sizeof (err_str), "Failed to get "
+ "the uuid of local glusterd");
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_dynstr (dict, "host-uuid", host_uuid);
+ if (ret)
+ goto out;
+
+ }
+
+ ret = dict_get_str (dict, "master", &master);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_INFO, "master not found, while "
+ "handling "GEOREP" options");
+ master = "(No Master)";
+ }
+
+ ret = dict_get_str (dict, "slave", &slave);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_INFO, "slave not found, while "
+ "handling "GEOREP" options");
+ slave = "(No Slave)";
+ }
+
+ ret = dict_get_int32 (dict, "type", &type);
+ if (ret < 0) {
+ snprintf (err_str, sizeof (err_str), "Command type not found "
+ "while handling "GEOREP" options");
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+
+ switch (type) {
+ case GF_GSYNC_OPTION_TYPE_CREATE:
+ strncpy (operation, "create", sizeof (operation));
+ cli_op = GD_OP_GSYNC_CREATE;
+ break;
+
+ case GF_GSYNC_OPTION_TYPE_START:
+ strncpy (operation, "start", sizeof (operation));
+ break;
+
+ case GF_GSYNC_OPTION_TYPE_STOP:
+ strncpy (operation, "stop", sizeof (operation));
+ break;
+
+ case GF_GSYNC_OPTION_TYPE_CONFIG:
+ strncpy (operation, "config", sizeof (operation));
+ break;
+
+ case GF_GSYNC_OPTION_TYPE_STATUS:
+ strncpy (operation, "status", sizeof (operation));
+ break;
+ }
+
+ ret = glusterd_op_begin_synctask (req, cli_op, dict);
+
+out:
+ if (ret) {
+ if (err_str[0] == '\0')
+ snprintf (err_str, sizeof (err_str),
+ "Operation failed");
+ ret = glusterd_op_send_cli_response (cli_op, ret, 0, req,
+ dict, err_str);
+ }
+ return ret;
+}
+
+int
+glusterd_handle_sys_exec (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __glusterd_handle_sys_exec);
+}
+
+int
+glusterd_handle_copy_file (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __glusterd_handle_copy_file);
+}
+
+int
+glusterd_handle_gsync_set (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __glusterd_handle_gsync_set);
+}
+
+/*****
+ *
+ * glusterd_urltransform* internal API
+ *
+ *****/
+
+static void
+glusterd_urltransform_init (runner_t *runner, const char *transname)
+{
+ runinit (runner);
+ runner_add_arg (runner, GSYNCD_PREFIX"/gsyncd");
+ runner_argprintf (runner, "--%s-url", transname);
+}
+
+static void
+glusterd_urltransform_add (runner_t *runner, const char *url)
+{
+ runner_add_arg (runner, url);
+}
+
+static int
+_glusterd_urltransform_add_iter (dict_t *dict, char *key, data_t *value, void *data)
+{
+ runner_t *runner = (runner_t *)data;
+ char *slave = NULL;
+
+ slave = strchr (value->data, ':');
+ GF_ASSERT (slave);
+ slave++;
+ runner_add_arg (runner, slave);
+
+ return 0;
+}
+
+static void
+glusterd_urltransform_free (char **linearr, unsigned n)
+{
+ int i = 0;
+
+ for (; i < n; i++)
+ GF_FREE (linearr[i]);
+
+ GF_FREE (linearr);
+}
+
+static int
+glusterd_urltransform (runner_t *runner, char ***linearrp)
+{
+ char **linearr = NULL;
+ char *line = NULL;
+ unsigned arr_len = 32;
+ unsigned arr_idx = 0;
+ gf_boolean_t error = _gf_false;
+
+ linearr = GF_CALLOC (arr_len, sizeof (char *), gf_gld_mt_linearr);
+ if (!linearr) {
+ error = _gf_true;
+ goto out;
+ }
+
+ runner_redir (runner, STDOUT_FILENO, RUN_PIPE);
+ if (runner_start (runner) != 0) {
+ gf_log ("", GF_LOG_ERROR, "spawning child failed");
+
+ error = _gf_true;
+ goto out;
+ }
+
+ arr_idx = 0;
+ for (;;) {
+ size_t len;
+ line = GF_MALLOC (1024, gf_gld_mt_linebuf);
+ if (!line) {
+ error = _gf_true;
+ goto out;
+ }
+
+ if (fgets (line, 1024, runner_chio (runner, STDOUT_FILENO)) ==
+ NULL)
+ break;
+
+ len = strlen (line);
+ if (len == 0 || line[len - 1] != '\n') {
+ GF_FREE (line);
+ error = _gf_true;
+ goto out;
+ }
+ line[len - 1] = '\0';
+
+ if (arr_idx == arr_len) {
+ void *p = linearr;
+ arr_len <<= 1;
+ p = GF_REALLOC (linearr, arr_len);
+ if (!p) {
+ GF_FREE (line);
+ error = _gf_true;
+ goto out;
+ }
+ linearr = p;
+ }
+ linearr[arr_idx] = line;
+
+ arr_idx++;
+ }
+
+ out:
+
+ /* XXX chpid field is not exported by run API
+ * but runner_end() does not abort the invoked
+ * process (ie. it might block in waitpid(2))
+ * so we resort to a manual kill a the private field
+ */
+ if (error && runner->chpid > 0)
+ kill (runner->chpid, SIGKILL);
+
+ if (runner_end (runner) != 0)
+ error = _gf_true;
+
+ if (error) {
+ gf_log ("", GF_LOG_ERROR, "reading data from child failed");
+ glusterd_urltransform_free (linearr, arr_idx);
+ return -1;
+ }
+
+ *linearrp = linearr;
+ return arr_idx;
+}
+
+static int
+glusterd_urltransform_single (const char *url, const char *transname,
+ char ***linearrp)
+{
+ runner_t runner = {0,};
+
+ glusterd_urltransform_init (&runner, transname);
+ glusterd_urltransform_add (&runner, url);
+ return glusterd_urltransform (&runner, linearrp);
+}
+
+
+struct dictidxmark {
+ unsigned isrch;
+ unsigned ithis;
+ char *ikey;
+};
+
+static int
+_dict_mark_atindex (dict_t *dict, char *key, data_t *value, void *data)
+{
+ struct dictidxmark *dim = data;
+
+ if (dim->isrch == dim->ithis)
+ dim->ikey = key;
+
+ dim->ithis++;
+ return 0;
+}
+
+static char *
+dict_get_by_index (dict_t *dict, unsigned i)
+{
+ struct dictidxmark dim = {0,};
+
+ dim.isrch = i;
+ dict_foreach (dict, _dict_mark_atindex, &dim);
+
+ return dim.ikey;
+}
+
+static int
+glusterd_get_slave (glusterd_volinfo_t *vol, const char *slaveurl, char **slavekey)
+{
+ runner_t runner = {0,};
+ int n = 0;
+ int i = 0;
+ char **linearr = NULL;
+
+ glusterd_urltransform_init (&runner, "canonicalize");
+ dict_foreach (vol->gsync_slaves, _glusterd_urltransform_add_iter, &runner);
+ glusterd_urltransform_add (&runner, slaveurl);
+
+ n = glusterd_urltransform (&runner, &linearr);
+ if (n == -1)
+ return -2;
+
+ for (i = 0; i < n - 1; i++) {
+ if (strcmp (linearr[i], linearr[n - 1]) == 0)
+ break;
+ }
+ glusterd_urltransform_free (linearr, i);
+
+ if (i < n - 1)
+ *slavekey = dict_get_by_index (vol->gsync_slaves, i);
+ else
+ i = -1;
+
+ return i;
+}
+
+
+static int
+glusterd_query_extutil_generic (char *resbuf, size_t blen, runner_t *runner, void *data,
+ int (*fcbk)(char *resbuf, size_t blen, FILE *fp, void *data))
+{
+ int ret = 0;
+
+ runner_redir (runner, STDOUT_FILENO, RUN_PIPE);
+ if (runner_start (runner) != 0) {
+ gf_log ("", GF_LOG_ERROR, "spawning child failed");
+
+ return -1;
+ }
+
+ ret = fcbk (resbuf, blen, runner_chio (runner, STDOUT_FILENO), data);
+
+ ret |= runner_end (runner);
+ if (ret)
+ gf_log ("", GF_LOG_ERROR, "reading data from child failed");
+
+ return ret ? -1 : 0;
+}
+
+static int
+_fcbk_singleline(char *resbuf, size_t blen, FILE *fp, void *data)
+{
+ char *ptr = NULL;
+
+ errno = 0;
+ ptr = fgets (resbuf, blen, fp);
+ if (ptr) {
+ size_t len = strlen(resbuf);
+ if (len && resbuf[len-1] == '\n')
+ resbuf[len-1] = '\0'; //strip off \n
+ }
+
+ return errno ? -1 : 0;
+}
+
+static int
+glusterd_query_extutil (char *resbuf, runner_t *runner)
+{
+ return glusterd_query_extutil_generic (resbuf, PATH_MAX, runner, NULL,
+ _fcbk_singleline);
+}
+
+static int
+_fcbk_conftodict (char *resbuf, size_t blen, FILE *fp, void *data)
+{
+ char *ptr = NULL;
+ dict_t *dict = data;
+ char *v = NULL;
+
+ for (;;) {
+ errno = 0;
+ ptr = fgets (resbuf, blen, fp);
+ if (!ptr)
+ break;
+ v = resbuf + strlen(resbuf) - 1;
+ while (isspace (*v))
+ /* strip trailing space */
+ *v-- = '\0';
+ if (v == resbuf)
+ /* skip empty line */
+ continue;
+ v = strchr (resbuf, ':');
+ if (!v)
+ return -1;
+ *v++ = '\0';
+ while (isspace (*v))
+ v++;
+ v = gf_strdup (v);
+ if (!v)
+ return -1;
+ if (dict_set_dynstr (dict, resbuf, v) != 0) {
+ GF_FREE (v);
+ return -1;
+ }
+ }
+
+ return errno ? -1 : 0;
+}
+
+static int
+glusterd_gsync_get_config (char *master, char *slave, char *conf_path, dict_t *dict)
+{
+ /* key + value, where value must be able to accommodate a path */
+ char resbuf[256 + PATH_MAX] = {0,};
+ runner_t runner = {0,};
+
+ runinit (&runner);
+ runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", "-c", NULL);
+ runner_argprintf (&runner, "%s", conf_path);
+ runner_argprintf (&runner, ":%s", master);
+ runner_add_args (&runner, slave, "--config-get-all", NULL);
+
+ return glusterd_query_extutil_generic (resbuf, sizeof (resbuf),
+ &runner, dict, _fcbk_conftodict);
+}
+
+static int
+glusterd_gsync_get_param_file (char *prmfile, const char *param, char *master,
+ char *slave, char *conf_path)
+{
+ runner_t runner = {0,};
+
+ runinit (&runner);
+ runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", "-c", NULL);
+ runner_argprintf (&runner, "%s", conf_path);
+ runner_argprintf (&runner, ":%s", master);
+ runner_add_args (&runner, slave, "--config-get", NULL);
+ runner_argprintf (&runner, "%s-file", param);
+
+ return glusterd_query_extutil (prmfile, &runner);
+}
+
+static int
+gsyncd_getpidfile (char *master, char *slave, char *pidfile, char *conf_path)
+{
+ int ret = -1;
+ glusterd_conf_t *priv = NULL;
+ char *confpath = NULL;
+ char conf_buf[PATH_MAX] = "";
+ struct stat stbuf = {0,};
+
+
+ GF_ASSERT (THIS);
+ GF_ASSERT (THIS->private);
+
+ priv = THIS->private;
+
+ GF_VALIDATE_OR_GOTO ("gsync", master, out);
+ GF_VALIDATE_OR_GOTO ("gsync", slave, out);
+
+ ret = lstat (conf_path, &stbuf);
+ if (!ret) {
+ gf_log ("", GF_LOG_DEBUG, "Using passed config template(%s).",
+ conf_path);
+ confpath = conf_path;
+ } else {
+ ret = snprintf (conf_buf, sizeof(conf_buf) - 1,
+ "%s/"GSYNC_CONF_TEMPLATE, priv->workdir);
+ conf_buf[ret] = '\0';
+ confpath = conf_buf;
+ gf_log ("", GF_LOG_DEBUG, "Using default config template(%s).",
+ confpath);
+ }
+
+ ret = glusterd_gsync_get_param_file (pidfile, "pid", master,
+ slave, confpath);
+ if (ret == -1) {
+ ret = -2;
+ gf_log ("", GF_LOG_WARNING, "failed to create the pidfile string");
+ goto out;
+ }
+
+ ret = open (pidfile, O_RDWR);
+
+ out:
+ return ret;
+}
+
+static int
+gsync_status_byfd (int fd)
+{
+ GF_ASSERT (fd >= -1);
+
+ if (lockf (fd, F_TEST, 0) == -1 &&
+ (errno == EAGAIN || errno == EACCES))
+ /* gsyncd keeps the pidfile locked */
+ return 0;
+
+ return -1;
+}
+
+/* status: return 0 when gsync is running
+ * return -1 when not running
+ */
+int
+gsync_status (char *master, char *slave, char *conf_path, int *status)
+{
+ char pidfile[PATH_MAX] = {0,};
+ int fd = -1;
+
+ fd = gsyncd_getpidfile (master, slave, pidfile, conf_path);
+ if (fd == -2)
+ return -1;
+
+ *status = gsync_status_byfd (fd);
+
+ sys_close (fd);
+
+ return 0;
+}
+
+
+static int32_t
+glusterd_gsync_volinfo_dict_set (glusterd_volinfo_t *volinfo,
+ char *key, char *value)
+{
+ int32_t ret = -1;
+ char *gsync_status = NULL;
+
+ gsync_status = gf_strdup (value);
+ if (!gsync_status) {
+ gf_log ("", GF_LOG_ERROR, "Unable to allocate memory");
+ goto out;
+ }
+
+ ret = dict_set_dynstr (volinfo->dict, key, gsync_status);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to set dict");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return 0;
+}
+
+static int
+glusterd_verify_gsyncd_spawn (char *master, char *slave)
+{
+ int ret = 0;
+ runner_t runner = {0,};
+
+ runinit (&runner);
+ runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd",
+ "--verify", "spawning", NULL);
+ runner_argprintf (&runner, ":%s", master);
+ runner_add_args (&runner, slave, NULL);
+ runner_redir (&runner, STDOUT_FILENO, RUN_PIPE);
+ ret = runner_start (&runner);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "spawning child failed");
+ ret = -1;
+ goto out;
+ }
+
+ if (runner_end (&runner) != 0)
+ ret = -1;
+
+out:
+ gf_log ("", GF_LOG_DEBUG, "returning %d", ret);
+ return ret;
+}
+
+static int
+gsync_verify_config_options (dict_t *dict, char **op_errstr, char *volname)
+{
+ char **resopt = NULL;
+ int i = 0;
+ int ret = -1;
+ char *subop = NULL;
+ char *slave = NULL;
+ char *op_name = NULL;
+ char *op_value = NULL;
+ char *t = NULL;
+ char errmsg[PATH_MAX] = "";
+ gf_boolean_t banned = _gf_true;
+ gf_boolean_t op_match = _gf_true;
+ gf_boolean_t val_match = _gf_true;
+ struct gsync_config_opt_vals_ *conf_vals = NULL;
+
+ if (dict_get_str (dict, "subop", &subop) != 0) {
+ gf_log ("", GF_LOG_WARNING, "missing subop");
+ *op_errstr = gf_strdup ("Invalid config request");
+ return -1;
+ }
+
+ if (dict_get_str (dict, "slave", &slave) != 0) {
+ gf_log ("", GF_LOG_WARNING, GEOREP" CONFIG: no slave given");
+ *op_errstr = gf_strdup ("Slave required");
+ return -1;
+ }
+
+ if (strcmp (subop, "get-all") == 0)
+ return 0;
+
+ if (dict_get_str (dict, "op_name", &op_name) != 0) {
+ gf_log ("", GF_LOG_WARNING, "option name missing");
+ *op_errstr = gf_strdup ("Option name missing");
+ return -1;
+ }
+
+ if (runcmd (GSYNCD_PREFIX"/gsyncd", "--config-check", op_name, NULL)) {
+ ret = glusterd_verify_gsyncd_spawn (volname, slave);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to spawn gsyncd");
+ return 0;
+ }
+
+ gf_log ("", GF_LOG_WARNING, "Invalid option %s", op_name);
+ *op_errstr = gf_strdup ("Invalid option");
+
+ return -1;
+ }
+
+ if (strcmp (subop, "get") == 0)
+ return 0;
+
+ t = strtail (subop, "set");
+ if (!t)
+ t = strtail (subop, "del");
+ if (!t || (t[0] && strcmp (t, "-glob") != 0)) {
+ gf_log ("", GF_LOG_WARNING, "unknown subop %s", subop);
+ *op_errstr = gf_strdup ("Invalid config request");
+ return -1;
+ }
+
+ if (strtail (subop, "set") &&
+ dict_get_str (dict, "op_value", &op_value) != 0) {
+ gf_log ("", GF_LOG_WARNING, "missing value for set");
+ *op_errstr = gf_strdup ("missing value");
+ }
+
+ /* match option name against reserved options, modulo -/_
+ * difference
+ */
+ for (resopt = gsync_reserved_opts; *resopt; resopt++) {
+ banned = _gf_true;
+ for (i = 0; (*resopt)[i] && op_name[i]; i++) {
+ if ((*resopt)[i] == op_name[i] ||
+ ((*resopt)[i] == '-' && op_name[i] == '_'))
+ continue;
+ banned = _gf_false;
+ }
+ if (banned) {
+ gf_log ("", GF_LOG_WARNING, "Reserved option %s", op_name);
+ *op_errstr = gf_strdup ("Reserved option");
+
+ return -1;
+ break;
+ }
+ }
+
+ /* Check options in gsync_confopt_vals for invalid values */
+ for (conf_vals = gsync_confopt_vals; conf_vals->op_name; conf_vals++) {
+ op_match = _gf_true;
+ for (i = 0; conf_vals->op_name[i] && op_name[i]; i++) {
+ if (conf_vals->op_name[i] == op_name[i] ||
+ (conf_vals->op_name[i] == '_' && op_name[i] == '-'))
+ continue;
+ op_match = _gf_false;
+ }
+
+ if (op_match) {
+ val_match = _gf_false;
+ for (i = 0; i < conf_vals->no_of_pos_vals; i++) {
+ if(conf_vals->case_sensitive){
+ if (!strcmp (conf_vals->values[i], op_value))
+ val_match = _gf_true;
+ } else {
+ if (!strcasecmp (conf_vals->values[i], op_value))
+ val_match = _gf_true;
+ }
+ }
+
+ if (!val_match) {
+ ret = snprintf (errmsg, sizeof(errmsg) - 1,
+ "Invalid values (%s) for"
+ " option %s", op_value,
+ op_name);
+ errmsg[ret] = '\0';
+
+ gf_log ("", GF_LOG_ERROR, "%s", errmsg);
+ *op_errstr = gf_strdup (errmsg);
+ return -1;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int
+glusterd_get_gsync_status_mst_slv (glusterd_volinfo_t *volinfo,
+ char *slave, char *conf_path,
+ dict_t *rsp_dict, char *node);
+
+static int
+_get_status_mst_slv (dict_t *this, char *key, data_t *value, void *data)
+{
+ glusterd_gsync_status_temp_t *param = NULL;
+ char *slave = NULL;
+ char *slave_buf = NULL;
+ char *slave_ip = NULL;
+ char *slave_vol = NULL;
+ char *errmsg = NULL;
+ char conf_path[PATH_MAX] = "";
+ int ret = -1;
+ glusterd_conf_t *priv = NULL;
+
+ param = (glusterd_gsync_status_temp_t *)data;
+
+ GF_ASSERT (param);
+ GF_ASSERT (param->volinfo);
+
+ if (THIS)
+ priv = THIS->private;
+ if (priv == NULL) {
+ gf_log ("", GF_LOG_ERROR, "priv of glusterd not present");
+ goto out;
+ }
+
+ slave = strchr(value->data, ':');
+ if (!slave)
+ return 0;
+ slave++;
+
+ ret = glusterd_get_slave_info (slave, &slave_ip, &slave_vol, &errmsg);
+ if (ret) {
+ if (errmsg)
+ gf_log ("", GF_LOG_ERROR, "Unable to fetch "
+ "slave details. Error: %s", errmsg);
+ else
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to fetch slave details.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = snprintf (conf_path, sizeof(conf_path) - 1,
+ "%s/"GEOREP"/%s_%s_%s/gsyncd.conf",
+ priv->workdir, param->volinfo->volname,
+ slave_ip, slave_vol);
+ conf_path[ret] = '\0';
+
+ ret = glusterd_get_gsync_status_mst_slv(param->volinfo,
+ slave, conf_path,
+ param->rsp_dict,
+ param->node);
+out:
+
+ if (slave_buf)
+ GF_FREE(slave_buf);
+
+ gf_log ("", GF_LOG_DEBUG, "Returning %d.", ret);
+ return ret;
+}
+
+
+static int
+_get_max_gsync_slave_num (dict_t *this, char *key, data_t *value, void *data)
+{
+ int tmp_slvnum = 0;
+ int *slvnum = (int *)data;
+
+ sscanf (key, "slave%d", &tmp_slvnum);
+ if (tmp_slvnum > *slvnum)
+ *slvnum = tmp_slvnum;
+
+ return 0;
+}
+
+static int
+glusterd_remove_slave_in_info (glusterd_volinfo_t *volinfo, char *slave,
+ char **op_errstr)
+{
+ int zero_slave_entries = _gf_true;
+ int ret = 0;
+ char *slavekey = NULL;
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (slave);
+
+ do {
+ ret = glusterd_get_slave (volinfo, slave, &slavekey);
+ if (ret < 0 && zero_slave_entries) {
+ ret++;
+ goto out;
+ }
+ zero_slave_entries = _gf_false;
+ dict_del (volinfo->gsync_slaves, slavekey);
+ } while (ret >= 0);
+
+ ret = glusterd_store_volinfo (volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret) {
+ *op_errstr = gf_strdup ("Failed to store the Volume"
+ "information");
+ goto out;
+ }
+ out:
+ gf_log ("", GF_LOG_DEBUG, "returning %d", ret);
+ return ret;
+
+}
+
+static int
+glusterd_gsync_get_uuid (char *slave, glusterd_volinfo_t *vol,
+ uuid_t uuid)
+{
+ int ret = 0;
+ char *slavekey = NULL;
+ char *slaveentry = NULL;
+ char *t = NULL;
+
+ GF_ASSERT (vol);
+ GF_ASSERT (slave);
+
+ ret = glusterd_get_slave (vol, slave, &slavekey);
+ if (ret < 0) {
+ /* XXX colliding cases of failure and non-extant
+ * slave... now just doing this as callers of this
+ * function can make sense only of -1 and 0 as retvals;
+ * getting at the proper semanticals will involve
+ * fixing callers as well.
+ */
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_str (vol->gsync_slaves, slavekey, &slaveentry);
+ GF_ASSERT (ret == 0);
+
+ t = strchr (slaveentry, ':');
+ GF_ASSERT (t);
+ *t = '\0';
+ ret = uuid_parse (slaveentry, uuid);
+ *t = ':';
+
+ out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_check_gsync_running_local (char *master, char *slave,
+ char *conf_path,
+ gf_boolean_t *is_run)
+{
+ int ret = -1;
+ int ret_status = 0;
+
+ GF_ASSERT (master);
+ GF_ASSERT (slave);
+ GF_ASSERT (is_run);
+
+ *is_run = _gf_false;
+ ret = gsync_status (master, slave, conf_path, &ret_status);
+ if (ret == 0 && ret_status == 0) {
+ *is_run = _gf_true;
+ } else if (ret == -1) {
+ gf_log ("", GF_LOG_WARNING, GEOREP" validation "
+ " failed");
+ goto out;
+ }
+ ret = 0;
+ out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+
+}
+
+static int
+glusterd_store_slave_in_info (glusterd_volinfo_t *volinfo, char *slave,
+ char *host_uuid, char **op_errstr,
+ gf_boolean_t is_force)
+{
+ int ret = 0;
+ int maxslv = 0;
+ char **linearr = NULL;
+ char *value = NULL;
+ char *slavekey = NULL;
+ char *slaveentry = NULL;
+ char key[512] = {0, };
+ char *t = NULL;
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (slave);
+ GF_ASSERT (host_uuid);
+
+ ret = glusterd_get_slave (volinfo, slave, &slavekey);
+ switch (ret) {
+ case -2:
+ ret = -1;
+ goto out;
+ case -1:
+ break;
+ default:
+ if (!is_force)
+ GF_ASSERT (ret > 0);
+ ret = dict_get_str (volinfo->gsync_slaves, slavekey, &slaveentry);
+ GF_ASSERT (ret == 0);
+
+ /* same-name + same-uuid slave entries should have been filtered
+ * out in glusterd_op_verify_gsync_start_options(), so we can
+ * assert an uuid mismatch
+ */
+ t = strtail (slaveentry, host_uuid);
+ if (!is_force)
+ GF_ASSERT (!t || *t != ':');
+
+ if (is_force) {
+ gf_log ("", GF_LOG_DEBUG, GEOREP" has already been "
+ "invoked for the %s (master) and %s (slave)."
+ " Allowing without saving info again due to"
+ " force command.", volinfo->volname, slave);
+ ret = 0;
+ goto out;
+ }
+
+ gf_log ("", GF_LOG_ERROR, GEOREP" has already been invoked for "
+ "the %s (master) and %s (slave) "
+ "from a different machine",
+ volinfo->volname, slave);
+ *op_errstr = gf_strdup (GEOREP" already running in "
+ "another machine");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_urltransform_single (slave, "normalize", &linearr);
+ if (ret == -1)
+ goto out;
+
+ ret = gf_asprintf (&value, "%s:%s", host_uuid, linearr[0]);
+ glusterd_urltransform_free (linearr, 1);
+ if (ret == -1)
+ goto out;
+
+ dict_foreach (volinfo->gsync_slaves, _get_max_gsync_slave_num, &maxslv);
+ snprintf (key, 512, "slave%d", maxslv + 1);
+ ret = dict_set_dynstr (volinfo->gsync_slaves, key, value);
+ if (ret)
+ goto out;
+
+ ret = glusterd_store_volinfo (volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret) {
+ *op_errstr = gf_strdup ("Failed to store the Volume "
+ "information");
+ goto out;
+ }
+ ret = 0;
+ out:
+ return ret;
+}
+
+static int
+glusterd_op_verify_gsync_start_options (glusterd_volinfo_t *volinfo,
+ char *slave, char *conf_path,
+ char *statefile, char **op_errstr,
+ gf_boolean_t is_force)
+{
+ int ret = -1;
+ gf_boolean_t is_running = _gf_false;
+ char msg[2048] = {0};
+ uuid_t uuid = {0};
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ struct stat stbuf = {0,};
+
+ this = THIS;
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (slave);
+ GF_ASSERT (op_errstr);
+ GF_ASSERT (conf_path);
+ GF_ASSERT (this && this->private);
+
+ priv = this->private;
+
+ if (GLUSTERD_STATUS_STARTED != volinfo->status) {
+ snprintf (msg, sizeof (msg), "Volume %s needs to be started "
+ "before "GEOREP" start", volinfo->volname);
+ goto out;
+ }
+
+ ret = lstat (statefile, &stbuf);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Session between %s and %s has"
+ " not been created. Please create session and retry.",
+ volinfo->volname, slave);
+ gf_log ("", GF_LOG_ERROR, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ /* Check if the gsync slave info is stored. If not
+ * session has not been created */
+ ret = glusterd_gsync_get_uuid (slave, volinfo, uuid);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Session between %s and %s has"
+ " not been created. Please create session and retry.",
+ volinfo->volname, slave);
+ gf_log ("", GF_LOG_ERROR, "%s", msg);
+ goto out;
+ }
+
+ if (is_force) {
+ ret = 0;
+ goto out;
+ }
+
+ /*Check if the gsync is already started in cmd. inited host
+ * If so initiate add it into the glusterd's priv*/
+ ret = glusterd_check_gsync_running_local (volinfo->volname,
+ slave, conf_path,
+ &is_running);
+ if (ret) {
+ snprintf (msg, sizeof (msg), GEOREP" start option "
+ "validation failed ");
+ goto out;
+ }
+ if (_gf_true == is_running) {
+ snprintf (msg, sizeof (msg), GEOREP " session between"
+ " %s & %s already started", volinfo->volname,
+ slave);
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_verify_gsyncd_spawn (volinfo->volname, slave);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Unable to spawn gsyncd");
+ gf_log ("", GF_LOG_ERROR, "%s", msg);
+ }
+out:
+ if (ret && (msg[0] != '\0')) {
+ *op_errstr = gf_strdup (msg);
+ }
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_check_gsync_running (glusterd_volinfo_t *volinfo, gf_boolean_t *flag)
+{
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (flag);
+
+ if (volinfo->gsync_slaves->count)
+ *flag = _gf_true;
+ else
+ *flag = _gf_false;
+
+ return 0;
+}
+
+static int
+glusterd_op_verify_gsync_running (glusterd_volinfo_t *volinfo,
+ char *slave, char *conf_path,
+ char **op_errstr)
+{
+ int pfd = -1;
+ int ret = -1;
+ char msg[2048] = {0};
+ char pidfile[PATH_MAX] = {0,};
+
+ GF_ASSERT (THIS && THIS->private);
+ GF_ASSERT (volinfo);
+ GF_ASSERT (slave);
+ GF_ASSERT (op_errstr);
+
+ if (GLUSTERD_STATUS_STARTED != volinfo->status) {
+ snprintf (msg, sizeof (msg), "Volume %s needs to be started "
+ "before "GEOREP" start", volinfo->volname);
+
+ goto out;
+ }
+
+ pfd = gsyncd_getpidfile (volinfo->volname, slave, pidfile, conf_path);
+ if (pfd == -2) {
+ gf_log ("", GF_LOG_ERROR, GEOREP" stop validation "
+ "failed for %s & %s", volinfo->volname, slave);
+ ret = -1;
+ goto out;
+ }
+ if (gsync_status_byfd (pfd) == -1) {
+ snprintf (msg, sizeof (msg), GEOREP" session b/w %s & %s is not"
+ " running on this node.", volinfo->volname, slave);
+ gf_log ("", GF_LOG_ERROR, "%s", msg);
+ ret = -1;
+ /* monitor gsyncd already dead */
+ goto out;
+ }
+
+ if (pfd < 0)
+ goto out;
+
+ ret = 0;
+out:
+ if (ret && (msg[0] != '\0')) {
+ *op_errstr = gf_strdup (msg);
+ }
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+static int
+glusterd_verify_gsync_status_opts (dict_t *dict, char **op_errstr)
+{
+ char *slave = NULL;
+ char *volname = NULL;
+ char errmsg[PATH_MAX] = {0, };
+ gf_boolean_t exists = _gf_false;
+ glusterd_volinfo_t *volinfo = NULL;
+ int ret = 0;
+ char *conf_path = NULL;
+ char *slave_ip = NULL;
+ char *slave_vol = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ if (THIS)
+ priv = THIS->private;
+ if (priv == NULL) {
+ gf_log ("", GF_LOG_ERROR, "priv of glusterd not present");
+ *op_errstr = gf_strdup ("glusterd defunct");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "master", &volname);
+ if (ret < 0) {
+ ret = 0;
+ goto out;
+ }
+
+ exists = glusterd_check_volume_exists (volname);
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if ((ret) || (!exists)) {
+ gf_log ("", GF_LOG_WARNING, "volume name does not exist");
+ snprintf (errmsg, sizeof(errmsg), "Volume name %s does not"
+ " exist", volname);
+ *op_errstr = gf_strdup (errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "slave", &slave);
+ if (ret < 0) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = glusterd_get_slave_details_confpath (volinfo, dict, &slave_ip,
+ &slave_vol, &conf_path,
+ op_errstr);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to fetch slave or confpath details.");
+ ret = -1;
+ goto out;
+ }
+
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+
+int
+glusterd_op_gsync_args_get (dict_t *dict, char **op_errstr,
+ char **master, char **slave, char **host_uuid)
+{
+
+ int ret = -1;
+ GF_ASSERT (dict);
+ GF_ASSERT (op_errstr);
+
+ if (master) {
+ ret = dict_get_str (dict, "master", master);
+ if (ret < 0) {
+ gf_log ("", GF_LOG_WARNING, "master not found");
+ *op_errstr = gf_strdup ("master not found");
+ goto out;
+ }
+ }
+
+ if (slave) {
+ ret = dict_get_str (dict, "slave", slave);
+ if (ret < 0) {
+ gf_log ("", GF_LOG_WARNING, "slave not found");
+ *op_errstr = gf_strdup ("slave not found");
+ goto out;
+ }
+ }
+
+ if (host_uuid) {
+ ret = dict_get_str (dict, "host-uuid", host_uuid);
+ if (ret < 0) {
+ gf_log ("", GF_LOG_WARNING, "host_uuid not found");
+ *op_errstr = gf_strdup ("host_uuid not found");
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_op_stage_sys_exec (dict_t *dict, char **op_errstr)
+{
+ char errmsg[PATH_MAX] = "";
+ char *command = NULL;
+ char command_path[PATH_MAX] = "";
+ struct stat st = {0,};
+ int ret = -1;
+ glusterd_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ if (conf->op_version < 2) {
+ gf_log ("", GF_LOG_ERROR, "Op Version not supported.");
+ snprintf (errmsg, sizeof(errmsg), "One or more nodes do not"
+ " support the required op version.");
+ *op_errstr = gf_strdup (errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "command", &command);
+ if (ret) {
+ strcpy (errmsg, "internal error");
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to get command from dict");
+ goto out;
+ }
+
+ /* enforce local occurrence of the command */
+ if (strchr (command, '/')) {
+ strcpy (errmsg, "invalid command name");
+ ret = -1;
+ goto out;
+ }
+
+ sprintf (command_path, GSYNCD_PREFIX"/peer_%s", command);
+ /* check if it's executable */
+ ret = access (command_path, X_OK);
+ if (!ret)
+ /* check if it's a regular file */
+ ret = stat (command_path, &st);
+ if (!ret && !S_ISREG (st.st_mode))
+ ret = -1;
+
+out:
+ if (ret) {
+ if (errmsg[0] == '\0')
+ snprintf (errmsg, sizeof (errmsg), "%s not found.",
+ command);
+ *op_errstr = gf_strdup (errmsg);
+ gf_log ("", GF_LOG_ERROR, "%s", errmsg);
+ }
+
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_op_stage_copy_file (dict_t *dict, char **op_errstr)
+{
+ char abs_filename[PATH_MAX] = "";
+ char errmsg[PATH_MAX] = "";
+ char *filename = NULL;
+ char *host_uuid = NULL;
+ char uuid_str [64] = {0};
+ int ret = -1;
+ glusterd_conf_t *priv = NULL;
+ struct stat stbuf = {0,};
+
+ if (THIS)
+ priv = THIS->private;
+ if (priv == NULL) {
+ gf_log ("", GF_LOG_ERROR, "priv of glusterd not present");
+ *op_errstr = gf_strdup ("glusterd defunct");
+ goto out;
+ }
+
+ if (priv->op_version < 2) {
+ gf_log ("", GF_LOG_ERROR, "Op Version not supported.");
+ snprintf (errmsg, sizeof(errmsg), "One or more nodes do not"
+ " support the required op version.");
+ *op_errstr = gf_strdup (errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "host-uuid", &host_uuid);
+ if (ret < 0) {
+ gf_log ("", GF_LOG_ERROR, "Unable to fetch"
+ " host-uuid from dict.");
+ goto out;
+ }
+
+ uuid_utoa_r (MY_UUID, uuid_str);
+ if (!strcmp (uuid_str, host_uuid)) {
+ ret = dict_get_str (dict, "source", &filename);
+ if (ret < 0) {
+ gf_log ("", GF_LOG_ERROR, "Unable to fetch"
+ " filename from dict.");
+ *op_errstr = gf_strdup ("command unsuccessful");
+ goto out;
+ }
+ snprintf (abs_filename, sizeof(abs_filename),
+ "%s/%s", priv->workdir, filename);
+
+ ret = lstat (abs_filename, &stbuf);
+ if (ret) {
+ snprintf (errmsg, sizeof (errmsg), "Source file"
+ " does not exist in %s", priv->workdir);
+ *op_errstr = gf_strdup (errmsg);
+ goto out;
+ }
+
+ if (!S_ISREG(stbuf.st_mode)) {
+ snprintf (errmsg, sizeof (errmsg), "Source file"
+ " is not a regular file.");
+ *op_errstr = gf_strdup (errmsg);
+ gf_log ("", GF_LOG_ERROR, "%s", errmsg);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+static int
+glusterd_get_statefile_name (glusterd_volinfo_t *volinfo, char *slave,
+ char *conf_path, char **statefile)
+{
+ glusterd_conf_t *priv = NULL;
+ int ret = -1;
+ char *master = NULL;
+ char *buf = NULL;
+ dict_t *confd = NULL;
+ char *confpath = NULL;
+ char conf_buf[PATH_MAX] = "";
+ struct stat stbuf = {0,};
+
+ GF_ASSERT (THIS);
+ GF_ASSERT (THIS->private);
+ GF_ASSERT (volinfo);
+
+ master = volinfo->volname;
+
+ confd = dict_new ();
+ if (!confd) {
+ gf_log ("", GF_LOG_ERROR, "Unable to create new dict");
+ goto out;
+ }
+
+ priv = THIS->private;
+
+ ret = lstat (conf_path, &stbuf);
+ if (!ret) {
+ gf_log ("", GF_LOG_INFO, "Using passed config template(%s).",
+ conf_path);
+ confpath = conf_path;
+ } else {
+ ret = snprintf (conf_buf, sizeof(conf_buf) - 1,
+ "%s/"GSYNC_CONF_TEMPLATE, priv->workdir);
+ conf_buf[ret] = '\0';
+ confpath = conf_buf;
+ gf_log ("", GF_LOG_INFO, "Using default config template(%s).",
+ confpath);
+ }
+
+ ret = glusterd_gsync_get_config (master, slave, confpath,
+ confd);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get configuration data"
+ "for %s(master), %s(slave)", master, slave);
+ goto out;
+
+ }
+
+ ret = dict_get_param (confd, "state_file", &buf);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get state_file's name.");
+ goto out;
+ }
+
+ *statefile = gf_strdup(buf);
+ if (!*statefile) {
+ gf_log ("", GF_LOG_ERROR, "Unable to gf_strdup.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+ out:
+ if (confd)
+ dict_destroy (confd);
+
+ gf_log ("", GF_LOG_DEBUG, "Returning %d ", ret);
+ return ret;
+}
+
+static int
+glusterd_create_status_file (char *master, char *slave, char *slave_ip,
+ char *slave_vol, char *status)
+{
+ int ret = -1;
+ runner_t runner = {0,};
+ glusterd_conf_t *priv = NULL;
+
+ if (THIS)
+ priv = THIS->private;
+ if (priv == NULL) {
+ gf_log ("", GF_LOG_ERROR, "priv of glusterd not present");
+ goto out;
+ }
+
+ if (!status) {
+ gf_log ("", GF_LOG_ERROR, "Status Empty");
+ goto out;
+ }
+ gf_log ("", GF_LOG_DEBUG, "slave = %s", slave);
+
+ runinit (&runner);
+ runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", "--create",
+ status, "-c", NULL);
+ runner_argprintf (&runner, "%s/"GEOREP"/%s_%s_%s/gsyncd.conf",
+ priv->workdir, master, slave_ip, slave_vol);
+ runner_argprintf (&runner, ":%s", master);
+ runner_add_args (&runner, slave, NULL);
+ synclock_unlock (&priv->big_lock);
+ ret = runner_run (&runner);
+ synclock_lock (&priv->big_lock);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Creating status file failed.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ gf_log ("", GF_LOG_DEBUG, "returning %d", ret);
+ return ret;
+}
+
+static int
+glusterd_verify_slave (char *volname, char *slave_ip, char *slave,
+ char **op_errstr, gf_boolean_t *is_force_blocker)
+{
+ int32_t ret = -1;
+ runner_t runner = {0,};
+ char log_file_path[PATH_MAX] = "";
+ char buf[PATH_MAX] = "";
+ char *tmp = NULL;
+ char *save_ptr = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ GF_ASSERT (volname);
+ GF_ASSERT (slave_ip);
+ GF_ASSERT (slave);
+
+ if (THIS)
+ priv = THIS->private;
+ if (priv == NULL) {
+ gf_log ("", GF_LOG_ERROR, "priv of glusterd not present");
+ goto out;
+ }
+
+ snprintf (log_file_path, sizeof(log_file_path),
+ DEFAULT_LOG_FILE_DIRECTORY"/create_verify_log");
+
+ runinit (&runner);
+ runner_add_args (&runner, GSYNCD_PREFIX"/gverify.sh", NULL);
+ runner_argprintf (&runner, "%s", volname);
+ runner_argprintf (&runner, "%s", slave_ip);
+ runner_argprintf (&runner, "%s", slave);
+ runner_argprintf (&runner, "%s", log_file_path);
+ runner_redir (&runner, STDOUT_FILENO, RUN_PIPE);
+ synclock_unlock (&priv->big_lock);
+ ret = runner_run (&runner);
+ synclock_lock (&priv->big_lock);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Not a valid slave");
+ ret = glusterd_gsync_read_frm_status (log_file_path,
+ buf, sizeof(buf));
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to read from %s",
+ log_file_path);
+ goto out;
+ }
+
+ /* Tokenize the error message from gverify.sh to figure out
+ * if the error is a force blocker or not. */
+ tmp = strtok_r (buf, "|", &save_ptr);
+ if (!strcmp (tmp, "FORCE_BLOCKER"))
+ *is_force_blocker = 1;
+ else {
+ /* No FORCE_BLOCKER flag present so all that is
+ * present is the error message. */
+ *is_force_blocker = 0;
+ if (tmp)
+ *op_errstr = gf_strdup (tmp);
+ ret = -1;
+ goto out;
+ }
+
+ /* Copy rest of the error message to op_errstr */
+ tmp = strtok_r (NULL, "|", &save_ptr);
+ if (tmp)
+ *op_errstr = gf_strdup (tmp);
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+out:
+ unlink (log_file_path);
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_mountbroker_check (char **slave_ip, char **op_errstr)
+{
+ int ret = -1;
+ char *tmp = NULL;
+ char *save_ptr = NULL;
+ char *username = NULL;
+ char *host = NULL;
+ char errmsg[PATH_MAX] = "";
+
+ GF_ASSERT (slave_ip);
+ GF_ASSERT (*slave_ip);
+
+ /* Checking if hostname has user specified */
+ host = strstr (*slave_ip, "@");
+ if (!host) {
+ gf_log ("", GF_LOG_DEBUG, "No username provided.");
+ ret = 0;
+ goto out;
+ } else {
+ /* Moving the host past the '@' and checking if the
+ * actual hostname also has '@' */
+ host++;
+ if (strstr (host, "@")) {
+ gf_log ("", GF_LOG_DEBUG, "host = %s", host);
+ ret = snprintf (errmsg, sizeof(errmsg) - 1,
+ "Invalid Hostname (%s).", host);
+ errmsg[ret] = '\0';
+ gf_log ("", GF_LOG_ERROR, "%s", errmsg);
+ ret = -1;
+ if (op_errstr)
+ *op_errstr = gf_strdup (errmsg);
+ goto out;
+ }
+
+ /* Fetching the username and hostname
+ * and checking if the username is non-root */
+ username = strtok_r (*slave_ip, "@", &save_ptr);
+ tmp = strtok_r (NULL, "@", &save_ptr);
+ if (strcmp (username, "root")) {
+ ret = snprintf (errmsg, sizeof(errmsg) - 1,
+ "Non-root username (%s@%s) not allowed.",
+ username, tmp);
+ errmsg[ret] = '\0';
+ if (op_errstr)
+ *op_errstr = gf_strdup (errmsg);
+ gf_log ("", GF_LOG_ERROR,
+ "Non-Root username not allowed.");
+ ret = -1;
+ goto out;
+ }
+
+ *slave_ip = gf_strdup (tmp);
+ if (!*slave_ip) {
+ gf_log ("", GF_LOG_ERROR, "Out of memory");
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_op_stage_gsync_create (dict_t *dict, char **op_errstr)
+{
+ char *down_peerstr = NULL;
+ char *slave = NULL;
+ char *volname = NULL;
+ char *host_uuid = NULL;
+ char *statefile = NULL;
+ char *slave_ip = NULL;
+ char *slave_vol = NULL;
+ char *conf_path = NULL;
+ char errmsg[PATH_MAX] = "";
+ char common_pem_file[PATH_MAX] = "";
+ char hook_script[PATH_MAX] = "";
+ char uuid_str [64] = "";
+ int ret = -1;
+ int is_pem_push = -1;
+ gf_boolean_t is_force = -1;
+ gf_boolean_t is_force_blocker = -1;
+ gf_boolean_t exists = _gf_false;
+ glusterd_conf_t *conf = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ struct stat stbuf = {0,};
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ ret = glusterd_op_gsync_args_get (dict, op_errstr, &volname,
+ &slave, &host_uuid);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to fetch arguments");
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return -1;
+ }
+
+ if (conf->op_version < 2) {
+ gf_log ("", GF_LOG_ERROR, "Op Version not supported.");
+ snprintf (errmsg, sizeof(errmsg), "One or more nodes do not"
+ " support the required op version.");
+ *op_errstr = gf_strdup (errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ exists = glusterd_check_volume_exists (volname);
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if ((ret) || (!exists)) {
+ gf_log ("", GF_LOG_WARNING, "volume name does not exist");
+ snprintf (errmsg, sizeof(errmsg), "Volume name %s does not"
+ " exist", volname);
+ *op_errstr = gf_strdup (errmsg);
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return -1;
+ }
+
+ ret = glusterd_get_slave_details_confpath (volinfo, dict, &slave_ip,
+ &slave_vol, &conf_path,
+ op_errstr);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to fetch slave or confpath details.");
+ ret = -1;
+ goto out;
+ }
+
+ is_force = dict_get_str_boolean (dict, "force", _gf_false);
+
+ uuid_utoa_r (MY_UUID, uuid_str);
+ if (!strcmp (uuid_str, host_uuid)) {
+ ret = glusterd_are_vol_all_peers_up (volinfo,
+ &conf->peers,
+ &down_peerstr);
+ if ((ret == _gf_false) && !is_force) {
+ snprintf (errmsg, sizeof (errmsg), "Peer %s,"
+ " which is a part of %s volume, is"
+ " down. Please bring up the peer and"
+ " retry.", down_peerstr,
+ volinfo->volname);
+ gf_log ("", GF_LOG_ERROR, "%s", errmsg);
+ *op_errstr = gf_strdup (errmsg);
+ GF_FREE (down_peerstr);
+ down_peerstr = NULL;
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return -1;
+ } else if (ret == _gf_false) {
+ gf_log ("", GF_LOG_INFO, "Peer %s,"
+ " which is a part of %s volume, is"
+ " down. Force creating geo-rep session."
+ " On bringing up the peer, re-run"
+ " \"gluster system:: execute"
+ " gsec_create\" and \"gluster volume"
+ " geo-replication %s %s create push-pem"
+ " force\"", down_peerstr, volinfo->volname,
+ volinfo->volname, slave);
+ }
+
+ /* Checking if slave host is pingable, has proper passwordless
+ * ssh login setup, slave volume is created, slave vol is empty,
+ * and if it has enough memory and bypass in case of force if
+ * the error is not a force blocker */
+ ret = glusterd_verify_slave (volname, slave_ip, slave_vol,
+ op_errstr, &is_force_blocker);
+ if (ret) {
+ if (is_force && !is_force_blocker) {
+ gf_log ("", GF_LOG_INFO, "%s is not a valid slave"
+ " volume. Error: %s. Force creating geo-rep"
+ " session.", slave, *op_errstr);
+ } else {
+ gf_log ("", GF_LOG_ERROR,
+ "%s is not a valid slave volume. Error: %s",
+ slave, *op_errstr);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = dict_get_int32 (dict, "push_pem", &is_pem_push);
+ if (!ret && is_pem_push) {
+ ret = snprintf (common_pem_file,
+ sizeof(common_pem_file) - 1,
+ "%s"GLUSTERD_COMMON_PEM_PUB_FILE,
+ conf->workdir);
+ common_pem_file[ret] = '\0';
+
+ ret = snprintf (hook_script, sizeof(hook_script) - 1,
+ "%s"GLUSTERD_CREATE_HOOK_SCRIPT,
+ conf->workdir);
+ hook_script[ret] = '\0';
+
+ ret = lstat (common_pem_file, &stbuf);
+ if (ret) {
+ snprintf (errmsg, sizeof (errmsg), "%s"
+ " required for push-pem is"
+ " not present. Please run"
+ " \"gluster system:: execute"
+ " gsec_create\"", common_pem_file);
+ gf_log ("", GF_LOG_ERROR, "%s", errmsg);
+ *op_errstr = gf_strdup (errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ ret = lstat (hook_script, &stbuf);
+ if (ret) {
+ snprintf (errmsg, sizeof (errmsg),
+ "The hook-script (%s) required "
+ "for push-pem is not present. "
+ "Please install the hook-script "
+ "and retry", hook_script);
+ gf_log ("", GF_LOG_ERROR, "%s", errmsg);
+ *op_errstr = gf_strdup (errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ if (!S_ISREG(stbuf.st_mode)) {
+ snprintf (errmsg, sizeof (errmsg), "%s"
+ " required for push-pem is"
+ " not a regular file. Please run"
+ " \"gluster system:: execute"
+ " gsec_create\"", common_pem_file);
+ gf_log ("", GF_LOG_ERROR, "%s", errmsg);
+ ret = -1;
+ goto out;
+ }
+ }
+ }
+
+ ret = glusterd_get_statefile_name (volinfo, slave, conf_path, &statefile);
+ if (ret) {
+ if (!strstr(slave, "::"))
+ snprintf (errmsg, sizeof (errmsg),
+ "%s is not a valid slave url.", slave);
+ else
+ snprintf (errmsg, sizeof (errmsg), "Please check gsync "
+ "config file. Unable to get statefile's name");
+ gf_log ("", GF_LOG_ERROR, "%s", errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_str (dict, "statefile", statefile);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to store statefile path");
+ goto out;
+ }
+
+ ret = lstat (statefile, &stbuf);
+ if (!ret && !is_force) {
+ snprintf (errmsg, sizeof (errmsg), "Session between %s"
+ " and %s is already created.",
+ volinfo->volname, slave);
+ gf_log ("", GF_LOG_ERROR, "%s", errmsg);
+ ret = -1;
+ goto out;
+ } else if (!ret)
+ gf_log ("", GF_LOG_INFO, "Session between %s"
+ " and %s is already created. Force"
+ " creating again.", volinfo->volname, slave);
+
+ ret = glusterd_verify_gsyncd_spawn (volinfo->volname, slave);
+ if (ret) {
+ snprintf (errmsg, sizeof (errmsg), "Unable to spawn gsyncd.");
+ gf_log ("", GF_LOG_ERROR, "%s", errmsg);
+ goto out;
+ }
+
+ ret = 0;
+out:
+
+ if (ret && errmsg[0] != '\0')
+ *op_errstr = gf_strdup (errmsg);
+
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_op_stage_gsync_set (dict_t *dict, char **op_errstr)
+{
+ int ret = 0;
+ int type = 0;
+ char *volname = NULL;
+ char *slave = NULL;
+ char *slave_ip = NULL;
+ char *slave_vol = NULL;
+ char *down_peerstr = NULL;
+ char *statefile = NULL;
+ char *path_list = NULL;
+ char *conf_path = NULL;
+ gf_boolean_t exists = _gf_false;
+ glusterd_volinfo_t *volinfo = NULL;
+ char errmsg[PATH_MAX] = {0,};
+ dict_t *ctx = NULL;
+ gf_boolean_t is_force = 0;
+ gf_boolean_t is_force_blocker = -1;
+ gf_boolean_t is_running = _gf_false;
+ uuid_t uuid = {0};
+ char uuid_str [64] = {0};
+ char *host_uuid = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ struct stat stbuf = {0,};
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ ret = dict_get_int32 (dict, "type", &type);
+ if (ret < 0) {
+ gf_log ("", GF_LOG_WARNING, "command type not found");
+ *op_errstr = gf_strdup ("command unsuccessful");
+ goto out;
+ }
+
+ if (type == GF_GSYNC_OPTION_TYPE_STATUS) {
+ ret = glusterd_verify_gsync_status_opts (dict, op_errstr);
+ goto out;
+ }
+
+ ret = glusterd_op_gsync_args_get (dict, op_errstr,
+ &volname, &slave, &host_uuid);
+ if (ret)
+ goto out;
+
+ uuid_utoa_r (MY_UUID, uuid_str);
+
+ if (conf->op_version < 2) {
+ gf_log ("", GF_LOG_ERROR, "Op Version not supported.");
+ snprintf (errmsg, sizeof(errmsg), "One or more nodes do not"
+ " support the required op version.");
+ *op_errstr = gf_strdup (errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ exists = glusterd_check_volume_exists (volname);
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if ((ret) || (!exists)) {
+ gf_log ("", GF_LOG_WARNING, "volume name does not exist");
+ snprintf (errmsg, sizeof(errmsg), "Volume name %s does not"
+ " exist", volname);
+ *op_errstr = gf_strdup (errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_get_slave_details_confpath (volinfo, dict, &slave_ip,
+ &slave_vol, &conf_path,
+ op_errstr);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to fetch slave or confpath details.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_get_statefile_name (volinfo, slave, conf_path, &statefile);
+ if (ret) {
+ /* Checking if slave host is pingable, has proper passwordless
+ * ssh login setup */
+ ret = glusterd_verify_slave (volname, slave_ip, slave_vol,
+ op_errstr, &is_force_blocker);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "%s is not a valid slave volume. Error: %s",
+ slave, *op_errstr);
+ goto out;
+ }
+
+ if (!strstr(slave, "::"))
+ snprintf (errmsg, sizeof (errmsg),
+ "%s is not a valid slave url.", slave);
+ else
+ snprintf (errmsg, sizeof (errmsg),
+ "Unable to get statefile's name");
+ gf_log ("", GF_LOG_ERROR, "%s", errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_str (dict, "statefile", statefile);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to store statefile path");
+ goto out;
+ }
+
+ is_force = dict_get_str_boolean (dict, "force", _gf_false);
+
+ /* Allowing stop force to bypass the statefile check
+ * as this command acts as a fail safe method to stop geo-rep
+ * session. */
+ if ((type == GF_GSYNC_OPTION_TYPE_CONFIG) ||
+ ((type == GF_GSYNC_OPTION_TYPE_STOP) && !is_force) ||
+ (type == GF_GSYNC_OPTION_TYPE_DELETE)) {
+ ret = lstat (statefile, &stbuf);
+ if (ret) {
+ snprintf (errmsg, sizeof(errmsg), "Geo-replication"
+ " session between %s and %s does not exist.",
+ volinfo->volname, slave);
+ gf_log ("", GF_LOG_ERROR, "%s. statefile = %s",
+ errmsg, statefile);
+ *op_errstr = gf_strdup (errmsg);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ /* Check if all peers that are a part of the volume are up or not */
+ if ((type == GF_GSYNC_OPTION_TYPE_DELETE) ||
+ ((type == GF_GSYNC_OPTION_TYPE_STOP) && !is_force)) {
+ if (!strcmp (uuid_str, host_uuid)) {
+ ret = glusterd_are_vol_all_peers_up (volinfo,
+ &conf->peers,
+ &down_peerstr);
+ if (ret == _gf_false) {
+ snprintf (errmsg, sizeof (errmsg), "Peer %s,"
+ " which is a part of %s volume, is"
+ " down. Please bring up the peer and"
+ " retry.", down_peerstr,
+ volinfo->volname);
+ *op_errstr = gf_strdup (errmsg);
+ ret = -1;
+ GF_FREE (down_peerstr);
+ down_peerstr = NULL;
+ goto out;
+ }
+ }
+ }
+
+ switch (type) {
+ case GF_GSYNC_OPTION_TYPE_START:
+ /* don't attempt to start gsync if replace-brick is
+ * in progress */
+ if (glusterd_is_rb_ongoing (volinfo)) {
+ snprintf (errmsg, sizeof(errmsg), "replace-brick is in"
+ " progress, not starting geo-replication");
+ *op_errstr = gf_strdup (errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_op_verify_gsync_start_options (volinfo, slave,
+ conf_path, statefile,
+ op_errstr, is_force);
+ if (ret)
+ goto out;
+ ctx = glusterd_op_get_ctx();
+ if (ctx) {
+ /* gsyncd does a fuse mount to start
+ * the geo-rep session */
+ if (!glusterd_is_fuse_available ()) {
+ gf_log ("glusterd", GF_LOG_ERROR, "Unable to "
+ "open /dev/fuse (%s), geo-replication "
+ "start failed", strerror (errno));
+ snprintf (errmsg, sizeof(errmsg),
+ "fuse unvailable");
+ *op_errstr = gf_strdup (errmsg);
+ ret = -1;
+ goto out;
+ }
+ }
+ break;
+
+ case GF_GSYNC_OPTION_TYPE_STOP:
+ if (!is_force) {
+ ret = glusterd_op_verify_gsync_running (volinfo, slave,
+ conf_path,
+ op_errstr);
+ if (ret) {
+ ret = glusterd_get_local_brickpaths (volinfo,
+ &path_list);
+ if (path_list)
+ ret = -1;
+ }
+ }
+ break;
+
+ case GF_GSYNC_OPTION_TYPE_CONFIG:
+ ret = gsync_verify_config_options (dict, op_errstr, volname);
+ goto out;
+ break;
+
+ case GF_GSYNC_OPTION_TYPE_DELETE:
+ /* Check if the gsync session is still running
+ * If so ask the user to stop geo-replication first.*/
+ ret = glusterd_gsync_get_uuid (slave, volinfo, uuid);
+ if (ret) {
+ snprintf (errmsg, sizeof(errmsg), "Geo-replication"
+ " session between %s and %s does not exist.",
+ volinfo->volname, slave);
+ gf_log ("", GF_LOG_ERROR, "%s", errmsg);
+ *op_errstr = gf_strdup (errmsg);
+ ret = -1;
+ goto out;
+ } else {
+ ret = glusterd_check_gsync_running_local (volinfo->volname,
+ slave, conf_path,
+ &is_running);
+ if (_gf_true == is_running) {
+ snprintf (errmsg, sizeof (errmsg), GEOREP
+ " session between %s & %s is "
+ "still active. Please stop the "
+ "session and retry.",
+ volinfo->volname, slave);
+ gf_log ("", GF_LOG_ERROR, "%s", errmsg);
+ *op_errstr = gf_strdup (errmsg);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = glusterd_verify_gsyncd_spawn (volinfo->volname, slave);
+ if (ret) {
+ snprintf (errmsg, sizeof (errmsg),
+ "Unable to spawn gsyncd");
+ *op_errstr = gf_strdup (errmsg);
+ gf_log ("", GF_LOG_ERROR, "%s", errmsg);
+ }
+
+ break;
+ }
+
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+static int
+stop_gsync (char *master, char *slave, char **msg,
+ char *conf_path, gf_boolean_t is_force)
+{
+ int32_t ret = 0;
+ int pfd = -1;
+ pid_t pid = 0;
+ char pidfile[PATH_MAX] = {0,};
+ char buf [1024] = {0,};
+ int i = 0;
+
+ GF_ASSERT (THIS);
+ GF_ASSERT (THIS->private);
+
+ pfd = gsyncd_getpidfile (master, slave, pidfile, conf_path);
+ if (pfd == -2 && !is_force) {
+ gf_log ("", GF_LOG_ERROR, GEOREP" stop validation "
+ " failed for %s & %s", master, slave);
+ ret = -1;
+ goto out;
+ }
+ if (gsync_status_byfd (pfd) == -1 && !is_force) {
+ gf_log ("", GF_LOG_ERROR, "gsyncd b/w %s & %s is not"
+ " running", master, slave);
+ /* monitor gsyncd already dead */
+ goto out;
+ }
+
+ if (pfd < 0)
+ goto out;
+
+ ret = read (pfd, buf, 1024);
+ if (ret > 0) {
+ pid = strtol (buf, NULL, 10);
+ ret = kill (-pid, SIGTERM);
+ if (ret) {
+ gf_log ("", GF_LOG_WARNING,
+ "failed to kill gsyncd");
+ goto out;
+ }
+ for (i = 0; i < 20; i++) {
+ if (gsync_status_byfd (pfd) == -1) {
+ /* monitor gsyncd is dead but worker may
+ * still be alive, give some more time
+ * before SIGKILL (hack)
+ */
+ usleep (50000);
+ break;
+ }
+ usleep (50000);
+ }
+ kill (-pid, SIGKILL);
+ unlink (pidfile);
+ }
+ ret = 0;
+
+out:
+ sys_close (pfd);
+
+ if (is_force)
+ ret = 0;
+ return ret;
+}
+
+static int
+glusterd_gsync_configure (glusterd_volinfo_t *volinfo, char *slave,
+ char *path_list, dict_t *dict,
+ dict_t *resp_dict, char **op_errstr)
+{
+ int32_t ret = -1;
+ char *op_name = NULL;
+ char *op_value = NULL;
+ runner_t runner = {0,};
+ glusterd_conf_t *priv = NULL;
+ char *subop = NULL;
+ char *master = NULL;
+ char *conf_path = NULL;
+ char *slave_ip = NULL;
+ char *slave_vol = NULL;
+ struct stat stbuf = {0, };
+
+ GF_ASSERT (slave);
+ GF_ASSERT (op_errstr);
+ GF_ASSERT (dict);
+ GF_ASSERT (resp_dict);
+
+ ret = dict_get_str (dict, "subop", &subop);
+ if (ret != 0)
+ goto out;
+
+ if (strcmp (subop, "get") == 0 || strcmp (subop, "get-all") == 0) {
+ /* deferred to cli */
+ gf_log ("", GF_LOG_DEBUG, "Returning 0");
+ return 0;
+ }
+
+ ret = dict_get_str (dict, "op_name", &op_name);
+ if (ret != 0)
+ goto out;
+
+ if (strtail (subop, "set")) {
+ ret = dict_get_str (dict, "op_value", &op_value);
+ if (ret != 0)
+ goto out;
+ }
+
+ if (THIS)
+ priv = THIS->private;
+ if (priv == NULL) {
+ gf_log ("", GF_LOG_ERROR, "priv of glusterd not present");
+ *op_errstr = gf_strdup ("glusterd defunct");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "conf_path", &conf_path);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to fetch conf file path.");
+ goto out;
+ }
+
+ master = "";
+ runinit (&runner);
+ runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", "-c", NULL);
+ runner_argprintf (&runner, "%s", conf_path);
+ if (volinfo) {
+ master = volinfo->volname;
+ runner_argprintf (&runner, ":%s", master);
+ }
+ runner_add_arg (&runner, slave);
+ runner_argprintf (&runner, "--config-%s", subop);
+ runner_add_arg (&runner, op_name);
+ if (op_value)
+ runner_add_arg (&runner, op_value);
+ synclock_unlock (&priv->big_lock);
+ ret = runner_run (&runner);
+ synclock_lock (&priv->big_lock);
+ if (ret) {
+ gf_log ("", GF_LOG_WARNING, "gsyncd failed to "
+ "%s %s option for %s %s peers",
+ subop, op_name, master, slave);
+
+ gf_asprintf (op_errstr, GEOREP" config-%s failed for %s %s",
+ subop, master, slave);
+
+ goto out;
+ }
+
+ if (!strcmp (op_name, "state_file")) {
+
+ ret = lstat (op_value, &stbuf);
+ if (ret) {
+ ret = dict_get_str (dict, "slave_ip", &slave_ip);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to fetch slave IP.");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "slave_vol", &slave_vol);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to fetch slave volume name.");
+ goto out;
+ }
+
+ ret = glusterd_create_status_file (volinfo->volname, slave,
+ slave_ip, slave_vol,
+ "Switching Status File");
+ if (ret || lstat (op_value, &stbuf)) {
+ gf_log ("", GF_LOG_ERROR, "Unable to create %s"
+ ". Error : %s", op_value,
+ strerror (errno));
+ ret = -1;
+ goto out;
+ }
+ }
+ }
+
+ ret = 0;
+ gf_asprintf (op_errstr, "config-%s successful", subop);
+
+out:
+ if (!ret && volinfo) {
+ ret = glusterd_check_restart_gsync_session (volinfo, slave,
+ resp_dict, path_list,
+ conf_path, 0);
+ if (ret)
+ *op_errstr = gf_strdup ("internal error");
+ }
+
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+static int
+glusterd_gsync_read_frm_status (char *path, char *buf, size_t blen)
+{
+ int ret = 0;
+ int status_fd = -1;
+
+ GF_ASSERT (path);
+ GF_ASSERT (buf);
+ status_fd = open (path, O_RDONLY);
+ if (status_fd == -1) {
+ gf_log ("", GF_LOG_ERROR, "Unable to read gsyncd status"
+ " file");
+ return -1;
+ }
+ ret = read (status_fd, buf, blen - 1);
+ if (ret > 0) {
+ size_t len = strnlen (buf, ret);
+ /* Ensure there is a NUL byte and that it's not the first. */
+ if (len == 0 || len == blen - 1) {
+ ret = -1;
+ } else {
+ char *p = buf + len - 1;
+ while (isspace (*p))
+ *p-- = '\0';
+ ret = 0;
+ }
+ } else if (ret < 0)
+ gf_log ("", GF_LOG_ERROR, "Status file of gsyncd is corrupt");
+
+ close (status_fd);
+ return ret;
+}
+
+static int
+glusterd_gsync_fetch_status_extra (char *path, char *buf, size_t blen)
+{
+ char sockpath[PATH_MAX] = {0,};
+ struct sockaddr_un sa = {0,};
+ size_t l = 0;
+ int s = -1;
+ struct pollfd pfd = {0,};
+ int ret = 0;
+
+ l = strlen (buf);
+ /* seek to end of data in buf */
+ buf += l;
+ blen -= l;
+
+ glusterd_set_socket_filepath (path, sockpath, sizeof (sockpath));
+
+ strncpy(sa.sun_path, sockpath, sizeof(sa.sun_path));
+ if (sa.sun_path[sizeof (sa.sun_path) - 1])
+ return -1;
+ sa.sun_family = AF_UNIX;
+
+ s = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (s == -1)
+ return -1;
+ ret = fcntl (s, F_GETFL);
+ if (ret != -1)
+ ret = fcntl (s, F_SETFL, ret | O_NONBLOCK);
+ if (ret == -1)
+ goto out;
+
+ ret = connect (s, (struct sockaddr *)&sa, sizeof (sa));
+ if (ret == -1)
+ goto out;
+ pfd.fd = s;
+ pfd.events = POLLIN;
+ /* we don't want to hang on gsyncd */
+ if (poll (&pfd, 1, 5000) < 1 ||
+ !(pfd.revents & POLLIN)) {
+ ret = -1;
+ goto out;
+ }
+ ret = read(s, buf, blen);
+ /* we expect a terminating 0 byte */
+ if (ret == 0 || (ret > 0 && buf[ret - 1]))
+ ret = -1;
+ if (ret > 0)
+ ret = 0;
+
+ out:
+ close (s);
+ return ret;
+}
+
+static int
+dict_get_param (dict_t *dict, char *key, char **param)
+{
+ char *dk = NULL;
+ char *s = NULL;
+ char x = '\0';
+ int ret = 0;
+
+ if (dict_get_str (dict, key, param) == 0)
+ return 0;
+
+ dk = gf_strdup (key);
+ if (!key)
+ return -1;
+
+ s = strpbrk (dk, "-_");
+ if (!s)
+ return -1;
+ x = (*s == '-') ? '_' : '-';
+ *s++ = x;
+ while ((s = strpbrk (s, "-_")))
+ *s++ = x;
+
+ ret = dict_get_str (dict, dk, param);
+
+ GF_FREE (dk);
+ return ret;
+}
+
+static int
+glusterd_read_status_file (glusterd_volinfo_t *volinfo, char *slave,
+ char *conf_path, dict_t *dict, char *node)
+{
+ glusterd_conf_t *priv = NULL;
+ int ret = 0;
+ char *statefile = NULL;
+ char *master = NULL;
+ char buf[1024] = "defunct";
+ char nds[1024] = {0, };
+ char mst[1024] = {0, };
+ char slv[1024] = {0, };
+ char sts[1024] = {0, };
+ char *bufp = NULL;
+ dict_t *confd = NULL;
+ int gsync_count = 0;
+ int status = 0;
+ char *dyn_node = NULL;
+ char *path_list = NULL;
+
+ GF_ASSERT (THIS);
+ GF_ASSERT (THIS->private);
+ GF_ASSERT (volinfo);
+
+ master = volinfo->volname;
+
+ confd = dict_new ();
+ if (!dict) {
+ gf_log ("", GF_LOG_ERROR, "Not able to create dict.");
+ return -1;
+ }
+
+ priv = THIS->private;
+
+ ret = glusterd_gsync_get_config (master, slave, conf_path,
+ confd);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get configuration data"
+ "for %s(master), %s(slave)", master, slave);
+ goto done;
+
+ }
+
+ ret = dict_get_param (confd, "state_file", &statefile);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get state_file's name "
+ "for %s(master), %s(slave). Please check gsync "
+ "config file.", master, slave);
+ goto done;
+ }
+ ret = glusterd_gsync_read_frm_status (statefile, buf, sizeof (buf));
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to read the status"
+ "file for %s(master), %s(slave)", master, slave);
+ strncpy (buf, "defunct", sizeof (buf));
+ goto done;
+ }
+
+ ret = gsync_status (master, slave, conf_path, &status);
+ if (ret == 0 && status == -1) {
+ if ((strcmp (buf, "Not Started")) &&
+ (strcmp (buf, "Stopped")))
+ strncpy (buf, "defunct", sizeof (buf));
+ goto done;
+ } else if (ret == -1) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get gsync status");
+ goto done;
+ }
+
+ if (strcmp (buf, "Stable") != 0)
+ goto done;
+
+ ret = dict_get_param (confd, "state_socket_unencoded", &statefile);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get state_socket_unencoded"
+ " filepath. Please check gsync config file.");
+ goto done;
+ }
+ ret = glusterd_gsync_fetch_status_extra (statefile, buf, sizeof (buf));
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to fetch extra status"
+ "for %s(master), %s(slave)", master, slave);
+ /* there is a slight chance that this occurs due to race
+ * -- in that case, the following options all seem bad:
+ *
+ * - suppress irregurlar behavior by just leaving status
+ * on "OK"
+ * - freak out users with a misleading "defunct"
+ * - overload the meaning of the regular error signal
+ * mechanism of gsyncd, that is, when status is "faulty"
+ *
+ * -- so we just come up with something new...
+ */
+ strncpy (buf, "N/A", sizeof (buf));
+ goto done;
+ }
+
+ done:
+ if ((!strcmp (buf, "defunct")) ||
+ (!strcmp (buf, "Not Started")) ||
+ (!strcmp (buf, "Stopped"))) {
+ ret = glusterd_get_local_brickpaths (volinfo, &path_list);
+ if (!path_list) {
+ gf_log ("", GF_LOG_DEBUG, "This node not being part of"
+ " volume should not be running gsyncd. Hence"
+ " shouldn't display status for this node.");
+ ret = 0;
+ goto out;
+ }
+ }
+
+ ret = dict_get_int32 (dict, "gsync-count", &gsync_count);
+
+ if (ret)
+ gsync_count = 1;
+ else
+ gsync_count++;
+
+ (void) snprintf (nds, sizeof (nds), "node%d", gsync_count);
+ dyn_node = gf_strdup (node);
+ if (!dyn_node)
+ goto out;
+ ret = dict_set_dynstr (dict, nds, dyn_node);
+ if (ret) {
+ GF_FREE (dyn_node);
+ goto out;
+ }
+
+ snprintf (mst, sizeof (mst), "master%d", gsync_count);
+ master = gf_strdup (master);
+ if (!master)
+ goto out;
+ ret = dict_set_dynstr (dict, mst, master);
+ if (ret) {
+ GF_FREE (master);
+ goto out;
+ }
+
+ snprintf (slv, sizeof (slv), "slave%d", gsync_count);
+ slave = gf_strdup (slave);
+ if (!slave)
+ goto out;
+ ret = dict_set_dynstr (dict, slv, slave);
+ if (ret) {
+ GF_FREE (slave);
+ goto out;
+ }
+
+ snprintf (sts, sizeof (slv), "status%d", gsync_count);
+ bufp = gf_strdup (buf);
+ if (!bufp)
+ goto out;
+ ret = dict_set_dynstr (dict, sts, bufp);
+ if (ret) {
+ GF_FREE (bufp);
+ goto out;
+ }
+ ret = dict_set_int32 (dict, "gsync-count", gsync_count);
+ if (ret)
+ goto out;
+
+ out:
+ dict_destroy (confd);
+
+ return 0;
+}
+
+int
+glusterd_check_restart_gsync_session (glusterd_volinfo_t *volinfo, char *slave,
+ dict_t *resp_dict, char *path_list,
+ char *conf_path, gf_boolean_t is_force)
+{
+
+ int ret = 0;
+ glusterd_conf_t *priv = NULL;
+ char *status_msg = NULL;
+ gf_boolean_t is_running = _gf_false;
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (slave);
+ GF_ASSERT (THIS);
+ GF_ASSERT (THIS->private);
+
+ priv = THIS->private;
+
+ ret = glusterd_check_gsync_running_local (volinfo->volname,
+ slave, conf_path,
+ &is_running);
+ if (!ret && (_gf_true != is_running))
+ /* gsynd not running, nothing to do */
+ goto out;
+
+ ret = stop_gsync (volinfo->volname, slave, &status_msg,
+ conf_path, is_force);
+ if (ret == 0 && status_msg)
+ ret = dict_set_str (resp_dict, "gsync-status",
+ status_msg);
+ if (ret == 0)
+ ret = glusterd_start_gsync (volinfo, slave, path_list,
+ conf_path, uuid_utoa(MY_UUID),
+ NULL);
+
+ out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+static int32_t
+glusterd_marker_changelog_create_volfile (glusterd_volinfo_t *volinfo)
+{
+ int32_t ret = 0;
+
+ ret = glusterd_create_volfiles_and_notify_services (volinfo);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to create volfile"
+ " for setting of marker while '"GEOREP" start'");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret)
+ goto out;
+
+ if (GLUSTERD_STATUS_STARTED == volinfo->status)
+ ret = glusterd_nodesvcs_handle_graph_change (volinfo);
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+glusterd_set_gsync_knob (glusterd_volinfo_t *volinfo, char *key, int *vc)
+{
+ int ret = -1;
+ int conf_enabled = _gf_false;
+ char *knob_on = NULL;
+
+ GF_ASSERT (THIS);
+ GF_ASSERT (THIS->private);
+
+ conf_enabled = glusterd_volinfo_get_boolean (volinfo, key);
+ if (conf_enabled == -1) {
+ gf_log ("", GF_LOG_ERROR,
+ "failed to get key %s from volinfo", key);
+ goto out;
+ }
+
+ ret = 0;
+ if (conf_enabled == _gf_false) {
+ *vc = 1;
+ knob_on = gf_strdup ("on");
+ if (knob_on == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_gsync_volinfo_dict_set (volinfo,
+ key, knob_on);
+ }
+
+ out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+static int
+glusterd_set_gsync_confs (glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ int volfile_changed = 0;
+
+ ret = glusterd_set_gsync_knob (volinfo,
+ VKEY_MARKER_XTIME, &volfile_changed);
+ if (ret)
+ goto out;
+
+ /**
+ * enable ignore-pid-check blindly as it could be needed for
+ * cascading setups.
+ */
+ ret = glusterd_set_gsync_knob (volinfo, VKEY_MARKER_XTIME_FORCE,
+ &volfile_changed);
+ if (ret)
+ goto out;
+
+ ret = glusterd_set_gsync_knob (volinfo,
+ VKEY_CHANGELOG, &volfile_changed);
+ if (ret)
+ goto out;
+
+ if (volfile_changed)
+ ret = glusterd_marker_changelog_create_volfile (volinfo);
+
+ out:
+ return ret;
+}
+
+static int
+glusterd_get_gsync_status_mst_slv (glusterd_volinfo_t *volinfo,
+ char *slave, char *conf_path,
+ dict_t *rsp_dict, char *node)
+{
+ char *statefile = NULL;
+ uuid_t uuid = {0, };
+ glusterd_conf_t *priv = NULL;
+ int ret = 0;
+ struct stat stbuf = {0, };
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (slave);
+ GF_ASSERT (THIS);
+ GF_ASSERT (THIS->private);
+
+ priv = THIS->private;
+
+ ret = glusterd_gsync_get_uuid (slave, volinfo, uuid);
+ if (ret) {
+ gf_log ("", GF_LOG_INFO, "geo-replication status %s %s :"
+ "session is not active", volinfo->volname, slave);
+
+ ret = glusterd_get_statefile_name (volinfo, slave,
+ conf_path, &statefile);
+ if (ret) {
+ if (!strstr(slave, "::"))
+ gf_log ("", GF_LOG_INFO,
+ "%s is not a valid slave url.", slave);
+ else
+ gf_log ("", GF_LOG_INFO, "Unable to get"
+ " statefile's name");
+ ret = 0;
+ goto out;
+ }
+
+ ret = lstat (statefile, &stbuf);
+ if (ret) {
+ gf_log ("", GF_LOG_INFO, "%s statefile not present.",
+ statefile);
+ ret = 0;
+ goto out;
+ }
+ }
+
+ ret = glusterd_read_status_file (volinfo, slave, conf_path,
+ rsp_dict, node);
+out:
+ if (statefile)
+ GF_FREE (statefile);
+
+ gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+ return ret;
+}
+
+static int
+glusterd_get_gsync_status_mst (glusterd_volinfo_t *volinfo, dict_t *rsp_dict,
+ char *node)
+{
+ glusterd_gsync_status_temp_t param = {0, };
+
+ GF_ASSERT (volinfo);
+
+ param.rsp_dict = rsp_dict;
+ param.volinfo = volinfo;
+ param.node = node;
+ dict_foreach (volinfo->gsync_slaves, _get_status_mst_slv, &param);
+
+ return 0;
+}
+
+static int
+glusterd_get_gsync_status_all (dict_t *rsp_dict, char *node)
+{
+
+ int32_t ret = 0;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+
+ GF_ASSERT (THIS);
+ priv = THIS->private;
+
+ GF_ASSERT (priv);
+
+ list_for_each_entry (volinfo, &priv->volumes, vol_list) {
+ ret = glusterd_get_gsync_status_mst (volinfo, rsp_dict, node);
+ if (ret)
+ goto out;
+ }
+
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+ return ret;
+
+}
+
+static int
+glusterd_get_gsync_status (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+{
+ char *slave = NULL;
+ char *volname = NULL;
+ char *conf_path = NULL;
+ char errmsg[PATH_MAX] = {0, };
+ gf_boolean_t exists = _gf_false;
+ glusterd_volinfo_t *volinfo = NULL;
+ int ret = 0;
+ char my_hostname[256] = {0,};
+
+ ret = gethostname(my_hostname, 256);
+ if (ret) {
+ /* stick to N/A */
+ (void) strcpy (my_hostname, "N/A");
+ }
+
+ ret = dict_get_str (dict, "master", &volname);
+ if (ret < 0){
+ ret = glusterd_get_gsync_status_all (rsp_dict, my_hostname);
+ goto out;
+ }
+
+ exists = glusterd_check_volume_exists (volname);
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if ((ret) || (!exists)) {
+ gf_log ("", GF_LOG_WARNING, "volume name does not exist");
+ snprintf (errmsg, sizeof(errmsg), "Volume name %s does not"
+ " exist", volname);
+ *op_errstr = gf_strdup (errmsg);
+ ret = -1;
+ goto out;
+ }
+
+
+ ret = dict_get_str (dict, "slave", &slave);
+ if (ret < 0) {
+ ret = glusterd_get_gsync_status_mst (volinfo,
+ rsp_dict, my_hostname);
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "conf_path", &conf_path);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to fetch conf file path.");
+ goto out;
+ }
+
+ ret = glusterd_get_gsync_status_mst_slv (volinfo, slave, conf_path,
+ rsp_dict, my_hostname);
+
+ out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+static int
+glusterd_gsync_delete (glusterd_volinfo_t *volinfo, char *slave, char *slave_ip,
+ char *slave_vol, char *path_list, dict_t *dict,
+ dict_t *resp_dict, char **op_errstr)
+{
+ int32_t ret = -1;
+ runner_t runner = {0,};
+ glusterd_conf_t *priv = NULL;
+ char *master = NULL;
+ char *gl_workdir = NULL;
+ char geo_rep_dir[PATH_MAX] = "";
+ char *conf_path = NULL;
+
+ GF_ASSERT (slave);
+ GF_ASSERT (slave_ip);
+ GF_ASSERT (slave_vol);
+ GF_ASSERT (op_errstr);
+ GF_ASSERT (dict);
+ GF_ASSERT (resp_dict);
+
+ if (THIS)
+ priv = THIS->private;
+ if (priv == NULL) {
+ gf_log ("", GF_LOG_ERROR, "priv of glusterd not present");
+ *op_errstr = gf_strdup ("glusterd defunct");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "conf_path", &conf_path);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to fetch conf file path.");
+ goto out;
+ }
+
+ gl_workdir = priv->workdir;
+ master = "";
+ runinit (&runner);
+ runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd",
+ "--delete", "-c", NULL);
+ runner_argprintf (&runner, "%s", conf_path);
+
+ if (volinfo) {
+ master = volinfo->volname;
+ runner_argprintf (&runner, ":%s", master);
+ }
+ runner_add_arg (&runner, slave);
+ runner_redir (&runner, STDOUT_FILENO, RUN_PIPE);
+ synclock_unlock (&priv->big_lock);
+ ret = runner_run (&runner);
+ synclock_lock (&priv->big_lock);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "gsyncd failed to "
+ "delete session info for %s and %s peers",
+ master, slave);
+
+ gf_asprintf (op_errstr, "gsyncd failed to "
+ "delete session info for %s and %s peers",
+ master, slave);
+
+ goto out;
+ }
+
+ ret = snprintf (geo_rep_dir, sizeof(geo_rep_dir) - 1,
+ "%s/"GEOREP"/%s_%s_%s", gl_workdir,
+ volinfo->volname, slave_ip, slave_vol);
+ geo_rep_dir[ret] = '\0';
+
+ ret = rmdir (geo_rep_dir);
+ if (ret) {
+ if (errno == ENOENT)
+ gf_log ("", GF_LOG_DEBUG, "Geo Rep Dir(%s) Not Present.",
+ geo_rep_dir);
+ else {
+ gf_log ("", GF_LOG_ERROR, "Unable to delete "
+ "Geo Rep Dir(%s). Error: %s", geo_rep_dir,
+ strerror (errno));
+ goto out;
+ }
+ }
+
+ ret = 0;
+
+ gf_asprintf (op_errstr, "delete successful");
+
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_op_sys_exec (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+{
+ char buf[PATH_MAX] = "";
+ char cmd_arg_name[PATH_MAX] = "";
+ char output_name[PATH_MAX] = "";
+ char errmsg[PATH_MAX] = "";
+ char *ptr = NULL;
+ char *bufp = NULL;
+ char *command = NULL;
+ char **cmd_args = NULL;
+ int ret = -1;
+ int i = -1;
+ int cmd_args_count = 0;
+ int output_count = 0;
+ glusterd_conf_t *priv = NULL;
+ runner_t runner = {0,};
+
+ GF_ASSERT (dict);
+ GF_ASSERT (op_errstr);
+ GF_ASSERT (rsp_dict);
+
+ if (THIS)
+ priv = THIS->private;
+ if (priv == NULL) {
+ gf_log ("", GF_LOG_ERROR, "priv of glusterd not present");
+ *op_errstr = gf_strdup ("glusterd defunct");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "command", &command);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to get command from dict");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "cmd_args_count", &cmd_args_count);
+ if (ret)
+ gf_log ("", GF_LOG_INFO, "No cmd_args_count");
+
+ if (cmd_args_count) {
+ cmd_args = GF_CALLOC (cmd_args_count, sizeof (char*),
+ gf_common_mt_char);
+ if (!cmd_args) {
+ gf_log ("", GF_LOG_ERROR, "Unable to calloc. "
+ "Errno = %s", strerror(errno));
+ goto out;
+ }
+
+ for (i=1; i <= cmd_args_count; i++) {
+ memset (cmd_arg_name, '\0', sizeof(cmd_arg_name));
+ snprintf (cmd_arg_name, sizeof(cmd_arg_name),
+ "cmd_arg_%d", i);
+ ret = dict_get_str (dict, cmd_arg_name, &cmd_args[i-1]);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to get %s in dict",
+ cmd_arg_name);
+ goto out;
+ }
+ }
+ }
+
+ runinit (&runner);
+ runner_argprintf (&runner, GSYNCD_PREFIX"/peer_%s", command);
+ for (i=0; i < cmd_args_count; i++)
+ runner_add_arg (&runner, cmd_args[i]);
+ runner_redir (&runner, STDOUT_FILENO, RUN_PIPE);
+ synclock_unlock (&priv->big_lock);
+ ret = runner_start (&runner);
+ if (ret == -1) {
+ snprintf (errmsg, sizeof (errmsg), "Unable to "
+ "execute command. Error : %s",
+ strerror (errno));
+ *op_errstr = gf_strdup (errmsg);
+ gf_log ("", GF_LOG_ERROR, "%s", errmsg);
+ ret = -1;
+ synclock_lock (&priv->big_lock);
+ goto out;
+ }
+
+ ptr = fgets(buf, sizeof(buf), runner_chio (&runner, STDOUT_FILENO));
+ if (ptr) {
+ ret = dict_get_int32 (rsp_dict, "output_count", &output_count);
+ if (ret)
+ output_count = 1;
+ else
+ output_count++;
+ memset (output_name, '\0', sizeof (output_name));
+ snprintf (output_name, sizeof (output_name),
+ "output_%d", output_count);
+ if (buf[strlen(buf) - 1] == '\n')
+ buf[strlen(buf) - 1] = '\0';
+ bufp = gf_strdup (buf);
+ if (!bufp)
+ gf_log ("", GF_LOG_ERROR, "gf_strdup failed.");
+ ret = dict_set_dynstr (rsp_dict, output_name, bufp);
+ if (ret) {
+ GF_FREE (bufp);
+ gf_log ("", GF_LOG_ERROR, "output set failed.");
+ }
+ ret = dict_set_int32 (rsp_dict, "output_count", output_count);
+ if (ret)
+ gf_log ("", GF_LOG_ERROR, "output_count set failed.");
+ }
+
+ ret = runner_end (&runner);
+ if (ret) {
+ snprintf (errmsg, sizeof (errmsg), "Unable to "
+ "end. Error : %s",
+ strerror (errno));
+ *op_errstr = gf_strdup (errmsg);
+ gf_log ("", GF_LOG_ERROR, "%s", errmsg);
+ ret = -1;
+ synclock_lock (&priv->big_lock);
+ goto out;
+ }
+ synclock_lock (&priv->big_lock);
+
+ ret = 0;
+out:
+ if (cmd_args) {
+ GF_FREE (cmd_args);
+ cmd_args = NULL;
+ }
+
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_op_copy_file (dict_t *dict, char **op_errstr)
+{
+ char abs_filename[PATH_MAX] = "";
+ char errmsg[PATH_MAX] = "";
+ char *filename = NULL;
+ char *host_uuid = NULL;
+ char uuid_str [64] = {0};
+ char *contents = NULL;
+ char buf[1024] = "";
+ int ret = -1;
+ int fd = -1;
+ int bytes_writen = 0;
+ int bytes_read = 0;
+ int contents_size = -1;
+ int file_mode = -1;
+ glusterd_conf_t *priv = NULL;
+ struct stat stbuf = {0,};
+
+
+ if (THIS)
+ priv = THIS->private;
+ if (priv == NULL) {
+ gf_log ("", GF_LOG_ERROR, "priv of glusterd not present");
+ *op_errstr = gf_strdup ("glusterd defunct");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "host-uuid", &host_uuid);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_get_str (dict, "source", &filename);
+ if (ret < 0) {
+ gf_log ("", GF_LOG_ERROR, "Unable to fetch"
+ " filename from dict.");
+ *op_errstr = gf_strdup ("command unsuccessful");
+ goto out;
+ }
+ snprintf (abs_filename, sizeof(abs_filename),
+ "%s/%s", priv->workdir, filename);
+
+ uuid_utoa_r (MY_UUID, uuid_str);
+ if (!strcmp (uuid_str, host_uuid)) {
+ ret = lstat (abs_filename, &stbuf);
+ if (ret) {
+ snprintf (errmsg, sizeof (errmsg), "Source file"
+ " does not exist in %s", priv->workdir);
+ *op_errstr = gf_strdup (errmsg);
+ gf_log ("", GF_LOG_ERROR, "%s", errmsg);
+ goto out;
+ }
+
+ contents = GF_CALLOC(1, stbuf.st_size+1, gf_common_mt_char);
+ if (!contents) {
+ snprintf (errmsg, sizeof (errmsg),
+ "Unable to allocate memory");
+ *op_errstr = gf_strdup (errmsg);
+ gf_log ("", GF_LOG_ERROR, "%s", errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ fd = open (abs_filename, O_RDONLY);
+ if (fd < 0) {
+ snprintf (errmsg, sizeof (errmsg), "Unable to open %s",
+ abs_filename);
+ *op_errstr = gf_strdup (errmsg);
+ gf_log ("", GF_LOG_ERROR, "%s", errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ do {
+ ret = read (fd, buf, sizeof(buf));
+ if (ret > 0) {
+ memcpy (contents+bytes_read, buf, ret);
+ bytes_read += ret;
+ memset (buf, '\0', sizeof(buf));
+ }
+ } while (ret > 0);
+
+ if (bytes_read != stbuf.st_size) {
+ snprintf (errmsg, sizeof (errmsg), "Unable to read all "
+ "the data from %s", abs_filename);
+ *op_errstr = gf_strdup (errmsg);
+ gf_log ("", GF_LOG_ERROR, "%s", errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_int32 (dict, "contents_size", stbuf.st_size);
+ if (ret) {
+ snprintf (errmsg, sizeof (errmsg), "Unable to set"
+ " contents size in dict.");
+ *op_errstr = gf_strdup (errmsg);
+ gf_log ("", GF_LOG_ERROR, "%s", errmsg);
+ goto out;
+ }
+
+ ret = dict_set_int32 (dict, "file_mode",
+ (int32_t)stbuf.st_mode);
+ if (ret) {
+ snprintf (errmsg, sizeof (errmsg), "Unable to set"
+ " file mode in dict.");
+ *op_errstr = gf_strdup (errmsg);
+ gf_log ("", GF_LOG_ERROR, "%s", errmsg);
+ goto out;
+ }
+
+ ret = dict_set_bin (dict, "common_pem_contents",
+ contents, stbuf.st_size);
+ if (ret) {
+ snprintf (errmsg, sizeof (errmsg), "Unable to set"
+ " pem contents in dict.");
+ *op_errstr = gf_strdup (errmsg);
+ gf_log ("", GF_LOG_ERROR, "%s", errmsg);
+ goto out;
+ }
+ close (fd);
+ } else {
+ ret = dict_get_bin (dict, "common_pem_contents",
+ (void **) &contents);
+ if (ret) {
+ snprintf (errmsg, sizeof (errmsg), "Unable to get"
+ " pem contents in dict.");
+ *op_errstr = gf_strdup (errmsg);
+ gf_log ("", GF_LOG_ERROR, "%s", errmsg);
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "contents_size", &contents_size);
+ if (ret) {
+ snprintf (errmsg, sizeof (errmsg), "Unable to set"
+ " contents size in dict.");
+ *op_errstr = gf_strdup (errmsg);
+ gf_log ("", GF_LOG_ERROR, "%s", errmsg);
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "file_mode", &file_mode);
+ if (ret) {
+ snprintf (errmsg, sizeof (errmsg), "Unable to get"
+ " file mode in dict.");
+ *op_errstr = gf_strdup (errmsg);
+ gf_log ("", GF_LOG_ERROR, "%s", errmsg);
+ goto out;
+ }
+
+ fd = open (abs_filename, O_WRONLY | O_TRUNC | O_CREAT, 0600);
+ if (fd < 0) {
+ snprintf (errmsg, sizeof (errmsg), "Unable to open %s",
+ abs_filename);
+ *op_errstr = gf_strdup (errmsg);
+ gf_log ("", GF_LOG_ERROR, "%s", errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ bytes_writen = write (fd, contents, contents_size);
+
+ if (bytes_writen != contents_size) {
+ snprintf (errmsg, sizeof (errmsg), "Failed to write"
+ " to %s", abs_filename);
+ *op_errstr = gf_strdup (errmsg);
+ gf_log ("", GF_LOG_ERROR, "%s", errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ fchmod (fd, file_mode);
+ close (fd);
+ }
+
+ ret = 0;
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_op_gsync_set (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+{
+ int32_t ret = -1;
+ int32_t type = -1;
+ dict_t *ctx = NULL;
+ dict_t *resp_dict = NULL;
+ char *host_uuid = NULL;
+ char *slave = NULL;
+ char *slave_ip = NULL;
+ char *slave_vol = NULL;
+ char *volname = NULL;
+ char *path_list = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ gf_boolean_t is_force = _gf_false;
+ char *status_msg = NULL;
+ gf_boolean_t is_running = _gf_false;
+ char *conf_path = NULL;
+
+ GF_ASSERT (THIS);
+ GF_ASSERT (THIS->private);
+ GF_ASSERT (dict);
+ GF_ASSERT (op_errstr);
+
+ priv = THIS->private;
+
+ ret = dict_get_int32 (dict, "type", &type);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_get_str (dict, "host-uuid", &host_uuid);
+ if (ret < 0)
+ goto out;
+
+ ctx = glusterd_op_get_ctx ();
+ resp_dict = ctx ? ctx : rsp_dict;
+ GF_ASSERT (resp_dict);
+
+ if (type == GF_GSYNC_OPTION_TYPE_STATUS) {
+ ret = glusterd_get_gsync_status (dict, op_errstr, resp_dict);
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "slave", &slave);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_get_str (dict, "slave_ip", &slave_ip);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to fetch slave volume name.");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "slave_vol", &slave_vol);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to fetch slave volume name.");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "conf_path", &conf_path);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to fetch conf file path.");
+ goto out;
+ }
+
+ if (dict_get_str (dict, "master", &volname) == 0) {
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_log ("", GF_LOG_WARNING, "Volinfo for %s (master) not found",
+ volname);
+ goto out;
+ }
+
+ ret = glusterd_get_local_brickpaths (volinfo, &path_list);
+ }
+
+ if (type == GF_GSYNC_OPTION_TYPE_CONFIG) {
+ ret = glusterd_gsync_configure (volinfo, slave, path_list,
+ dict, resp_dict, op_errstr);
+
+ ret = dict_set_str (resp_dict, "conf_path", conf_path);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to store conf_file_path.");
+ goto out;
+ }
+ goto out;
+ }
+
+ if (type == GF_GSYNC_OPTION_TYPE_DELETE) {
+ ret = glusterd_remove_slave_in_info(volinfo, slave, op_errstr);
+ if (ret && !is_force && path_list)
+ goto out;
+
+ ret = glusterd_gsync_delete (volinfo, slave, slave_ip,
+ slave_vol, path_list, dict,
+ resp_dict, op_errstr);
+ goto out;
+ }
+
+ if (!volinfo) {
+ ret = -1;
+ goto out;
+ }
+
+ is_force = dict_get_str_boolean (dict, "force", _gf_false);
+
+ if (type == GF_GSYNC_OPTION_TYPE_START) {
+
+ ret = glusterd_set_gsync_confs (volinfo);
+ if (ret != 0) {
+ gf_log ("", GF_LOG_WARNING, "marker/changelog start failed");
+ *op_errstr = gf_strdup ("failed to initialize indexing");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_start_gsync (volinfo, slave, path_list,
+ conf_path, host_uuid, op_errstr);
+ }
+
+ if (type == GF_GSYNC_OPTION_TYPE_STOP) {
+ ret = glusterd_check_gsync_running_local (volinfo->volname,
+ slave, conf_path,
+ &is_running);
+ if (!ret && !is_force && path_list &&
+ (_gf_true != is_running)) {
+ gf_log ("", GF_LOG_WARNING, GEOREP" is not set up for"
+ "%s(master) and %s(slave)", volname, slave);
+ *op_errstr = strdup (GEOREP" is not set up");
+ goto out;
+ }
+
+ ret = stop_gsync (volname, slave, &status_msg, conf_path, is_force);
+ if (ret == 0 && status_msg)
+ ret = dict_set_str (resp_dict, "gsync-status",
+ status_msg);
+ if (ret != 0 && !is_force && path_list)
+ *op_errstr = gf_strdup ("internal error");
+
+ if (!ret) {
+ ret = glusterd_create_status_file (volinfo->volname,
+ slave, slave_ip,
+ slave_vol, "Stopped");
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to update"
+ "state_file. Error : %s",
+ strerror (errno));
+ }
+ }
+ }
+
+out:
+ if (path_list) {
+ GF_FREE (path_list);
+ path_list = NULL;
+ }
+
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_get_slave_details_confpath (glusterd_volinfo_t *volinfo, dict_t *dict,
+ char **slave_ip, char **slave_vol,
+ char **conf_path, char **op_errstr)
+{
+ int ret = -1;
+ char confpath[PATH_MAX] = "";
+ glusterd_conf_t *priv = NULL;
+ char *slave = NULL;
+
+ GF_ASSERT (THIS);
+ priv = THIS->private;
+ GF_ASSERT (priv);
+
+ ret = dict_get_str (dict, "slave", &slave);
+ if (ret || !slave) {
+ gf_log ("", GF_LOG_ERROR, "Unable to fetch slave from dict");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_get_slave_info (slave, slave_ip, slave_vol, op_errstr);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to fetch slave details.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_str (dict, "slave_ip", *slave_ip);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to store slave IP.");
+ goto out;
+ }
+
+ ret = dict_set_str (dict, "slave_vol", *slave_vol);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to store slave volume name.");
+ goto out;
+ }
+
+ ret = snprintf (confpath, sizeof(confpath) - 1,
+ "%s/"GEOREP"/%s_%s_%s/gsyncd.conf",
+ priv->workdir, volinfo->volname,
+ *slave_ip, *slave_vol);
+ confpath[ret] = '\0';
+ *conf_path = gf_strdup (confpath);
+ if (!(*conf_path)) {
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to gf_strdup. Error: %s", strerror (errno));
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_str (dict, "conf_path", *conf_path);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to store conf_path");
+ goto out;
+ }
+
+out:
+ gf_log ("", GF_LOG_DEBUG,"Returning %d", ret);
+ return ret;
+
+}
+
+static int
+glusterd_get_slave_info (char *slave, char **slave_ip,
+ char **slave_vol, char **op_errstr)
+{
+ char *tmp = NULL;
+ char *save_ptr = NULL;
+ char **linearr = NULL;
+ int32_t ret = -1;
+ char errmsg[PATH_MAX] = "";
+
+ ret = glusterd_urltransform_single (slave, "normalize",
+ &linearr);
+ if (ret == -1) {
+ ret = snprintf (errmsg, sizeof(errmsg) - 1,
+ "Invalid Url: %s", slave);
+ errmsg[ret] = '\0';
+ *op_errstr = gf_strdup (errmsg);
+ gf_log ("", GF_LOG_ERROR, "Failed to normalize url");
+ goto out;
+ }
+
+ tmp = strtok_r (linearr[0], "/", &save_ptr);
+ tmp = strtok_r (NULL, "/", &save_ptr);
+ slave = strtok_r (tmp, ":", &save_ptr);
+ if (slave) {
+ ret = glusterd_mountbroker_check (&slave, op_errstr);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Invalid slave url: %s", *op_errstr);
+ goto out;
+ }
+
+ *slave_ip = gf_strdup (slave);
+ if (!*slave_ip) {
+ gf_log ("", GF_LOG_ERROR,
+ "Failed to gf_strdup");
+ ret = -1;
+ goto out;
+ }
+ gf_log ("", GF_LOG_DEBUG, "Slave IP : %s", *slave_ip);
+ ret = 0;
+ } else {
+ gf_log ("", GF_LOG_ERROR, "Invalid slave name");
+ goto out;
+ }
+
+ slave = strtok_r (NULL, ":", &save_ptr);
+ if (slave) {
+ *slave_vol = gf_strdup (slave);
+ if (!*slave_vol) {
+ gf_log ("", GF_LOG_ERROR,
+ "Failed to gf_strdup");
+ ret = -1;
+ goto out;
+ }
+ gf_log ("", GF_LOG_DEBUG, "Slave Vol : %s", *slave_vol);
+ ret = 0;
+ } else {
+ gf_log ("", GF_LOG_ERROR, "Invalid slave name");
+ goto out;
+ }
+
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+static void
+runinit_gsyncd_setrx (runner_t *runner, char *conf_path)
+{
+ runinit (runner);
+ runner_add_args (runner, GSYNCD_PREFIX"/gsyncd", "-c", NULL);
+ runner_argprintf (runner, "%s", conf_path);
+ runner_add_arg (runner, "--config-set-rx");
+}
+
+static int
+glusterd_check_gsync_present (int *valid_state)
+{
+ char buff[PATH_MAX] = {0, };
+ runner_t runner = {0,};
+ char *ptr = NULL;
+ int ret = 0;
+
+ runinit (&runner);
+ runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", "--version", NULL);
+ runner_redir (&runner, STDOUT_FILENO, RUN_PIPE);
+ ret = runner_start (&runner);
+ if (ret == -1) {
+ if (errno == ENOENT) {
+ gf_log ("glusterd", GF_LOG_INFO, GEOREP
+ " module not installed in the system");
+ *valid_state = 0;
+ }
+ else {
+ gf_log ("glusterd", GF_LOG_ERROR, GEOREP
+ " module not working as desired");
+ *valid_state = -1;
+ }
+ goto out;
+ }
+
+ ptr = fgets(buff, sizeof(buff), runner_chio (&runner, STDOUT_FILENO));
+ if (ptr) {
+ if (!strstr (buff, "gsyncd")) {
+ ret = -1;
+ gf_log ("glusterd", GF_LOG_ERROR, GEOREP" module not "
+ "working as desired");
+ *valid_state = -1;
+ goto out;
+ }
+ } else {
+ ret = -1;
+ gf_log ("glusterd", GF_LOG_ERROR, GEOREP" module not "
+ "working as desired");
+ *valid_state = -1;
+ goto out;
+ }
+
+ ret = 0;
+ out:
+
+ runner_end (&runner);
+
+ gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+
+}
+
+static int
+create_conf_file (glusterd_conf_t *conf, char *conf_path)
+#define RUN_GSYNCD_CMD do { \
+ ret = runner_run_reuse (&runner); \
+ if (ret == -1) { \
+ runner_log (&runner, "glusterd", GF_LOG_ERROR, "command failed"); \
+ runner_end (&runner); \
+ goto out; \
+ } \
+ runner_end (&runner); \
+} while (0)
+{
+ int ret = 0;
+ runner_t runner = {0,};
+ char georepdir[PATH_MAX] = {0,};
+ int valid_state = 0;
+
+ valid_state = -1;
+ ret = glusterd_check_gsync_present (&valid_state);
+ if (-1 == ret) {
+ ret = valid_state;
+ goto out;
+ }
+
+ ret = snprintf (georepdir, sizeof(georepdir) - 1, "%s/"GEOREP,
+ conf->workdir);
+ georepdir[ret] = '\0';
+
+ /************
+ * master pre-configuration
+ ************/
+
+ /* remote-gsyncd */
+ runinit_gsyncd_setrx (&runner, conf_path);
+ runner_add_args (&runner, "remote-gsyncd", GSYNCD_PREFIX"/gsyncd", ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ runinit_gsyncd_setrx (&runner, conf_path);
+ runner_add_args (&runner, "remote-gsyncd", "/nonexistent/gsyncd",
+ ".", "^ssh:", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* gluster-command-dir */
+ runinit_gsyncd_setrx (&runner, conf_path);
+ runner_add_args (&runner, "gluster-command-dir", SBIN_DIR"/",
+ ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* gluster-params */
+ runinit_gsyncd_setrx (&runner, conf_path);
+ runner_add_args (&runner, "gluster-params",
+ "aux-gfid-mount xlator-option=*-dht.assert-no-child-down=true",
+ ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* ssh-command */
+ runinit_gsyncd_setrx (&runner, conf_path);
+ runner_add_arg (&runner, "ssh-command");
+ runner_argprintf (&runner,
+ "ssh -oPasswordAuthentication=no "
+ "-oStrictHostKeyChecking=no "
+ "-i %s/secret.pem", georepdir);
+ runner_add_args (&runner, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* pid-file */
+ runinit_gsyncd_setrx (&runner, conf_path);
+ runner_add_arg (&runner, "pid-file");
+ runner_argprintf (&runner, "%s/${mastervol}_${remotehost}_${slavevol}/${eSlave}.pid", georepdir);
+ runner_add_args (&runner, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* state-file */
+ runinit_gsyncd_setrx (&runner, conf_path);
+ runner_add_arg (&runner, "state-file");
+ runner_argprintf (&runner, "%s/${mastervol}_${remotehost}_${slavevol}/${eSlave}.status", georepdir);
+ runner_add_args (&runner, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* state-detail-file */
+ runinit_gsyncd_setrx (&runner, conf_path);
+ runner_add_arg (&runner, "state-detail-file");
+ runner_argprintf (&runner, "%s/${mastervol}_${remotehost}_${slavevol}/${eSlave}-detail.status", georepdir);
+ runner_add_args (&runner, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* state-socket */
+ runinit_gsyncd_setrx (&runner, conf_path);
+ runner_add_arg (&runner, "state-socket-unencoded");
+ runner_argprintf (&runner, "%s/${mastervol}_${remotehost}_${slavevol}/${eSlave}.socket", georepdir);
+ runner_add_args (&runner, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* socketdir */
+ runinit_gsyncd_setrx (&runner, conf_path);
+ runner_add_args (&runner, "socketdir", GLUSTERD_SOCK_DIR, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* log-file */
+ runinit_gsyncd_setrx (&runner, conf_path);
+ runner_add_args (&runner,
+ "log-file",
+ DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"/${mastervol}/${eSlave}.log",
+ ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* gluster-log-file */
+ runinit_gsyncd_setrx (&runner, conf_path);
+ runner_add_args (&runner,
+ "gluster-log-file",
+ DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"/${mastervol}/${eSlave}${local_id}.gluster.log",
+ ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* ignore-deletes */
+ runinit_gsyncd_setrx (&runner, conf_path);
+ runner_add_args (&runner, "ignore-deletes", "true", ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* special-sync-mode */
+ runinit_gsyncd_setrx (&runner, conf_path);
+ runner_add_args (&runner, "special-sync-mode", "partial", ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* change-detector == changelog */
+ runinit_gsyncd_setrx (&runner, conf_path);
+ runner_add_args(&runner, "change-detector", "changelog", ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ runinit_gsyncd_setrx (&runner, conf_path);
+ runner_add_arg(&runner, "working-dir");
+ runner_argprintf(&runner, "%s/${mastervol}/${eSlave}",
+ DEFAULT_VAR_RUN_DIRECTORY);
+ runner_add_args (&runner, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /************
+ * slave pre-configuration
+ ************/
+
+ /* gluster-command-dir */
+ runinit_gsyncd_setrx (&runner, conf_path);
+ runner_add_args (&runner, "gluster-command-dir", SBIN_DIR"/",
+ ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* gluster-params */
+ runinit_gsyncd_setrx (&runner, conf_path);
+ runner_add_args (&runner, "gluster-params",
+ "aux-gfid-mount xlator-option=*-dht.assert-no-child-down=true",
+ ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* log-file */
+ runinit_gsyncd_setrx (&runner, conf_path);
+ runner_add_args (&runner,
+ "log-file",
+ DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"-slaves/${session_owner}:${eSlave}.log",
+ ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* MountBroker log-file */
+ runinit_gsyncd_setrx (&runner, conf_path);
+ runner_add_args (&runner,
+ "log-file-mbr",
+ DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"-slaves/mbr/${session_owner}:${eSlave}.log",
+ ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* gluster-log-file */
+ runinit_gsyncd_setrx (&runner, conf_path);
+ runner_add_args (&runner,
+ "gluster-log-file",
+ DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"-slaves/${session_owner}:${eSlave}.gluster.log",
+ ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ out:
+ return ret ? -1 : 0;
+}
+
+static int
+glusterd_create_essential_dir_files (glusterd_volinfo_t *volinfo, dict_t *dict,
+ char *slave, char *slave_ip,
+ char *slave_vol, char **op_errstr)
+{
+ int ret = -1;
+ char *conf_path = NULL;
+ char *statefile = NULL;
+ char buf[PATH_MAX] = "";
+ char errmsg[PATH_MAX] = "";
+ glusterd_conf_t *conf = NULL;
+ struct stat stbuf = {0,};
+
+ GF_ASSERT (THIS);
+ conf = THIS->private;
+
+ ret = dict_get_str (dict, "conf_path", &conf_path);
+ if (ret) {
+ snprintf (errmsg, sizeof (errmsg),
+ "Unable to fetch conf file path.");
+ gf_log ("", GF_LOG_ERROR, "%s", errmsg);
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "statefile", &statefile);
+ if (ret) {
+ snprintf (errmsg, sizeof (errmsg),
+ "Unable to fetch statefile path.");
+ gf_log ("", GF_LOG_ERROR, "%s", errmsg);
+ goto out;
+ }
+
+ ret = snprintf (buf, sizeof(buf) - 1, "%s/"GEOREP"/%s_%s_%s",
+ conf->workdir, volinfo->volname, slave_ip, slave_vol);
+ buf[ret] = '\0';
+ ret = mkdir_p (buf, 0777, _gf_true);
+ if (ret) {
+ snprintf (errmsg, sizeof (errmsg), "Unable to create %s"
+ ". Error : %s", buf, strerror (errno));
+ *op_errstr = gf_strdup (errmsg);
+ gf_log ("", GF_LOG_ERROR, "%s", errmsg);
+ goto out;
+ }
+
+ ret = snprintf (buf, PATH_MAX, DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"/%s",
+ volinfo->volname);
+ buf[ret] = '\0';
+ ret = mkdir_p (buf, 0777, _gf_true);
+ if (ret) {
+ snprintf (errmsg, sizeof (errmsg), "Unable to create %s"
+ ". Error : %s", buf, strerror (errno));
+ *op_errstr = gf_strdup (errmsg);
+ gf_log ("", GF_LOG_ERROR, "%s", errmsg);
+ goto out;
+ }
+
+ ret = lstat (conf_path, &stbuf);
+ if (!ret) {
+ gf_log ("", GF_LOG_DEBUG, "Session already running."
+ " Not creating config file again.");
+ } else {
+ ret = create_conf_file (conf, conf_path);
+ if (ret || lstat (conf_path, &stbuf)) {
+ snprintf (errmsg, sizeof (errmsg), "Failed to create"
+ " config file(%s).", conf_path);
+ gf_log ("", GF_LOG_ERROR, "%s", errmsg);
+ goto out;
+ }
+ }
+
+ ret = lstat (statefile, &stbuf);
+ if (!ret) {
+ gf_log ("", GF_LOG_DEBUG, "Session already running."
+ " Not creating status file again.");
+ goto out;
+ } else {
+ ret = glusterd_create_status_file (volinfo->volname, slave,
+ slave_ip, slave_vol,
+ "Not Started");
+ if (ret || lstat (statefile, &stbuf)) {
+ snprintf (errmsg, sizeof (errmsg), "Unable to create %s"
+ ". Error : %s", statefile, strerror (errno));
+ *op_errstr = gf_strdup (errmsg);
+ gf_log ("", GF_LOG_ERROR, "%s", errmsg);
+ ret = -1;
+ goto out;
+ }
+ }
+
+out:
+ gf_log ("", GF_LOG_DEBUG,"Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_op_gsync_create (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+{
+ char common_pem_file[PATH_MAX] = "";
+ char errmsg[PATH_MAX] = "";
+ char hooks_args[PATH_MAX] = "";
+ char uuid_str [64] = "";
+ char *host_uuid = NULL;
+ char *slave_ip = NULL;
+ char *slave_vol = NULL;
+ char *arg_buf = NULL;
+ char *volname = NULL;
+ char *slave = NULL;
+ int32_t ret = -1;
+ int32_t is_pem_push = -1;
+ gf_boolean_t is_force = -1;
+ glusterd_conf_t *conf = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+
+ GF_ASSERT (THIS);
+ conf = THIS->private;
+ GF_ASSERT (conf);
+ GF_ASSERT (dict);
+ GF_ASSERT (op_errstr);
+
+ ret = glusterd_op_gsync_args_get (dict, op_errstr,
+ &volname, &slave, &host_uuid);
+ if (ret)
+ goto out;
+
+ snprintf (common_pem_file, sizeof(common_pem_file),
+ "%s"GLUSTERD_COMMON_PEM_PUB_FILE, conf->workdir);
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Volinfo for %s"
+ " (master) not found", volname);
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "slave_vol", &slave_vol);
+ if (ret) {
+ snprintf (errmsg, sizeof (errmsg),
+ "Unable to fetch slave volume name.");
+ gf_log ("", GF_LOG_ERROR, "%s", errmsg);
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "slave_ip", &slave_ip);
+ if (ret) {
+ snprintf (errmsg, sizeof (errmsg),
+ "Unable to fetch slave IP.");
+ gf_log ("", GF_LOG_ERROR, "%s", errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ is_force = dict_get_str_boolean (dict, "force", _gf_false);
+
+ uuid_utoa_r (MY_UUID, uuid_str);
+ if (!strcmp (uuid_str, host_uuid)) {
+ ret = dict_get_int32 (dict, "push_pem", &is_pem_push);
+ if (!ret && is_pem_push) {
+ gf_log ("", GF_LOG_DEBUG, "Trying to setup"
+ " pem files in slave");
+ is_pem_push = 1;
+ } else
+ is_pem_push = 0;
+
+ snprintf(hooks_args, sizeof(hooks_args),
+ "is_push_pem=%d pub_file=%s slave_ip=%s",
+ is_pem_push, common_pem_file, slave_ip);
+
+ } else
+ snprintf(hooks_args, sizeof(hooks_args),
+ "This argument will stop the hooks script");
+
+ arg_buf = gf_strdup (hooks_args);
+ if (!arg_buf) {
+ gf_log ("", GF_LOG_ERROR, "Failed to"
+ " gf_strdup");
+ if (is_force) {
+ ret = 0;
+ goto create_essentials;
+ }
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_str (dict, "hooks_args", arg_buf);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Failed to set"
+ " hooks_args in dict.");
+ if (is_force) {
+ ret = 0;
+ goto create_essentials;
+ }
+ goto out;
+ }
+
+create_essentials:
+
+ ret = glusterd_create_essential_dir_files (volinfo, dict, slave,
+ slave_ip, slave_vol,
+ op_errstr);
+ if (ret)
+ goto out;
+
+ ret = glusterd_store_slave_in_info (volinfo, slave,
+ host_uuid, op_errstr,
+ is_force);
+ if (ret) {
+ snprintf (errmsg, sizeof (errmsg), "Unable to store"
+ " slave info.");
+ gf_log ("", GF_LOG_ERROR, "%s", errmsg);
+ goto out;
+ }
+
+out:
+ gf_log ("", GF_LOG_DEBUG,"Returning %d", ret);
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
new file mode 100644
index 000000000..71d076624
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
@@ -0,0 +1,4237 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+#include <inttypes.h>
+
+
+#include "globals.h"
+#include "glusterfs.h"
+#include "compat.h"
+#include "dict.h"
+#include "protocol-common.h"
+#include "xlator.h"
+#include "logging.h"
+#include "timer.h"
+#include "defaults.h"
+#include "compat.h"
+#include "compat-errno.h"
+#include "statedump.h"
+#include "run.h"
+#include "glusterd-mem-types.h"
+#include "glusterd.h"
+#include "glusterd-sm.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-utils.h"
+#include "glusterd-store.h"
+#include "glusterd-locks.h"
+
+#include "glusterd1-xdr.h"
+#include "cli1-xdr.h"
+#include "xdr-generic.h"
+#include "rpc-clnt.h"
+#include "glusterd-volgen.h"
+#include "glusterd-mountbroker.h"
+
+#include <sys/resource.h>
+#include <inttypes.h>
+
+#include "defaults.c"
+#include "common-utils.h"
+
+#include "globals.h"
+#include "glusterd-syncop.h"
+
+#ifdef HAVE_BD_XLATOR
+#include <lvm2app.h>
+#endif
+
+extern uuid_t global_txn_id;
+
+int glusterd_big_locked_notify (struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event,
+ void *data, rpc_clnt_notify_t notify_fn)
+{
+ glusterd_conf_t *priv = THIS->private;
+ int ret = -1;
+ synclock_lock (&priv->big_lock);
+ ret = notify_fn (rpc, mydata, event, data);
+ synclock_unlock (&priv->big_lock);
+ return ret;
+}
+
+int glusterd_big_locked_handler (rpcsvc_request_t *req, rpcsvc_actor actor_fn)
+{
+ glusterd_conf_t *priv = THIS->private;
+ int ret = -1;
+
+ synclock_lock (&priv->big_lock);
+ ret = actor_fn (req);
+ synclock_unlock (&priv->big_lock);
+
+ return ret;
+}
+
+static int
+glusterd_handle_friend_req (rpcsvc_request_t *req, uuid_t uuid,
+ char *hostname, int port,
+ gd1_mgmt_friend_req *friend_req)
+{
+ int ret = -1;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_friend_sm_event_t *event = NULL;
+ glusterd_friend_req_ctx_t *ctx = NULL;
+ char rhost[UNIX_PATH_MAX + 1] = {0};
+ uuid_t friend_uuid = {0};
+ dict_t *dict = NULL;
+
+ uuid_parse (uuid_utoa (uuid), friend_uuid);
+ if (!port)
+ port = GF_DEFAULT_BASE_PORT;
+
+ ret = glusterd_remote_hostname_get (req, rhost, sizeof (rhost));
+ ret = glusterd_friend_find (uuid, rhost, &peerinfo);
+
+ if (ret) {
+ ret = glusterd_xfer_friend_add_resp (req, hostname, rhost, port,
+ -1, GF_PROBE_UNKNOWN_PEER);
+ if (friend_req->vols.vols_val) {
+ free (friend_req->vols.vols_val);
+ friend_req->vols.vols_val = NULL;
+ }
+ goto out;
+ }
+
+ ret = glusterd_friend_sm_new_event
+ (GD_FRIEND_EVENT_RCVD_FRIEND_REQ, &event);
+
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "event generation failed: %d", ret);
+ return ret;
+ }
+
+ event->peerinfo = peerinfo;
+
+ ctx = GF_CALLOC (1, sizeof (*ctx), gf_gld_mt_friend_req_ctx_t);
+
+ if (!ctx) {
+ gf_log ("", GF_LOG_ERROR, "Unable to allocate memory");
+ ret = -1;
+ goto out;
+ }
+
+ uuid_copy (ctx->uuid, uuid);
+ if (hostname)
+ ctx->hostname = gf_strdup (hostname);
+ ctx->req = req;
+
+ dict = dict_new ();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize (friend_req->vols.vols_val,
+ friend_req->vols.vols_len,
+ &dict);
+
+ if (ret)
+ goto out;
+ else
+ dict->extra_stdfree = friend_req->vols.vols_val;
+
+ ctx->vols = dict;
+ event->ctx = ctx;
+
+ ret = glusterd_friend_sm_inject_event (event);
+ if (ret) {
+ gf_log ("glusterd", GF_LOG_ERROR, "Unable to inject event %d, "
+ "ret = %d", event->event, ret);
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ if (0 != ret) {
+ if (ctx && ctx->hostname)
+ GF_FREE (ctx->hostname);
+ GF_FREE (ctx);
+ if (dict) {
+ if ((!dict->extra_stdfree) &&
+ friend_req->vols.vols_val)
+ free (friend_req->vols.vols_val);
+ dict_unref (dict);
+ } else {
+ free (friend_req->vols.vols_val);
+ }
+ GF_FREE (event);
+ } else {
+ if (peerinfo && (0 == peerinfo->connected))
+ ret = GLUSTERD_CONNECTION_AWAITED;
+ }
+ return ret;
+}
+
+static int
+glusterd_handle_unfriend_req (rpcsvc_request_t *req, uuid_t uuid,
+ char *hostname, int port)
+{
+ int ret = -1;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_friend_sm_event_t *event = NULL;
+ glusterd_friend_req_ctx_t *ctx = NULL;
+
+ if (!port)
+ port = GF_DEFAULT_BASE_PORT;
+
+ ret = glusterd_friend_find (uuid, hostname, &peerinfo);
+
+ if (ret) {
+ gf_log ("glusterd", GF_LOG_CRITICAL,
+ "Received remove-friend from unknown peer %s",
+ hostname);
+ ret = glusterd_xfer_friend_remove_resp (req, hostname,
+ port);
+ goto out;
+ }
+
+ ret = glusterd_friend_sm_new_event
+ (GD_FRIEND_EVENT_RCVD_REMOVE_FRIEND, &event);
+
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "event generation failed: %d", ret);
+ return ret;
+ }
+
+ event->peerinfo = peerinfo;
+
+ ctx = GF_CALLOC (1, sizeof (*ctx), gf_gld_mt_friend_req_ctx_t);
+
+ if (!ctx) {
+ gf_log ("", GF_LOG_ERROR, "Unable to allocate memory");
+ ret = -1;
+ goto out;
+ }
+
+ uuid_copy (ctx->uuid, uuid);
+ if (hostname)
+ ctx->hostname = gf_strdup (hostname);
+ ctx->req = req;
+
+ event->ctx = ctx;
+
+ ret = glusterd_friend_sm_inject_event (event);
+
+ if (ret) {
+ gf_log ("glusterd", GF_LOG_ERROR, "Unable to inject event %d, "
+ "ret = %d", event->event, ret);
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ if (0 != ret) {
+ if (ctx && ctx->hostname)
+ GF_FREE (ctx->hostname);
+ GF_FREE (ctx);
+ }
+
+ return ret;
+}
+
+static int
+glusterd_add_peer_detail_to_dict (glusterd_peerinfo_t *peerinfo,
+ dict_t *friends, int count)
+{
+
+ int ret = -1;
+ char key[256] = {0, };
+ char *peer_uuid_str = NULL;
+
+ GF_ASSERT (peerinfo);
+ GF_ASSERT (friends);
+
+ snprintf (key, 256, "friend%d.uuid", count);
+ peer_uuid_str = gd_peer_uuid_str (peerinfo);
+ ret = dict_set_str (friends, key, peer_uuid_str);
+ if (ret)
+ goto out;
+
+ snprintf (key, 256, "friend%d.hostname", count);
+ ret = dict_set_str (friends, key, peerinfo->hostname);
+ if (ret)
+ goto out;
+
+ snprintf (key, 256, "friend%d.port", count);
+ ret = dict_set_int32 (friends, key, peerinfo->port);
+ if (ret)
+ goto out;
+
+ snprintf (key, 256, "friend%d.stateId", count);
+ ret = dict_set_int32 (friends, key, peerinfo->state.state);
+ if (ret)
+ goto out;
+
+ snprintf (key, 256, "friend%d.state", count);
+ ret = dict_set_str (friends, key,
+ glusterd_friend_sm_state_name_get(peerinfo->state.state));
+ if (ret)
+ goto out;
+
+ snprintf (key, 256, "friend%d.connected", count);
+ ret = dict_set_int32 (friends, key, (int32_t)peerinfo->connected);
+ if (ret)
+ goto out;
+
+out:
+ return ret;
+}
+
+struct args_pack {
+ dict_t *dict;
+ int vol_count;
+ int opt_count;
+};
+
+static int
+_build_option_key (dict_t *d, char *k, data_t *v, void *tmp)
+{
+ char reconfig_key[256] = {0, };
+ struct args_pack *pack = NULL;
+ int ret = -1;
+
+ pack = tmp;
+ if (strcmp (k, GLUSTERD_GLOBAL_OPT_VERSION) == 0)
+ return 0;
+ snprintf (reconfig_key, 256, "volume%d.option.%s",
+ pack->vol_count, k);
+ ret = dict_set_str (pack->dict, reconfig_key, v->data);
+ if (0 == ret)
+ pack->opt_count++;
+
+ return 0;
+}
+
+int
+glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo,
+ dict_t *volumes, int count)
+{
+
+ int ret = -1;
+ char key[256] = {0, };
+ glusterd_brickinfo_t *brickinfo = NULL;
+ char *buf = NULL;
+ int i = 1;
+ dict_t *dict = NULL;
+ glusterd_conf_t *priv = NULL;
+ char *volume_id_str = NULL;
+ struct args_pack pack = {0,};
+ xlator_t *this = NULL;
+#ifdef HAVE_BD_XLATOR
+ int caps = 0;
+#endif
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (volumes);
+
+ this = THIS;
+ priv = this->private;
+
+ GF_ASSERT (priv);
+
+ snprintf (key, 256, "volume%d.name", count);
+ ret = dict_set_str (volumes, key, volinfo->volname);
+ if (ret)
+ goto out;
+
+ snprintf (key, 256, "volume%d.type", count);
+ ret = dict_set_int32 (volumes, key, volinfo->type);
+ if (ret)
+ goto out;
+
+ snprintf (key, 256, "volume%d.status", count);
+ ret = dict_set_int32 (volumes, key, volinfo->status);
+ if (ret)
+ goto out;
+
+ /* As of now, the snap volumes are also displayed as part of
+ volume info command. So this change is to display whether
+ the volume is original volume or the snap_volume. If
+ displaying of snap volumes in volume info o/p is not needed
+ this should be removed.
+ */
+ snprintf (key, 256, "volume%d.snap_volume", count);
+ ret = dict_set_int32 (volumes, key, volinfo->is_snap_volume);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to set whether "
+ "the volume is a snap volume or actual volume (%s)",
+ volinfo->volname);
+ goto out;
+ }
+
+ snprintf (key, 256, "volume%d.brick_count", count);
+ ret = dict_set_int32 (volumes, key, volinfo->brick_count);
+ if (ret)
+ goto out;
+
+ snprintf (key, 256, "volume%d.dist_count", count);
+ ret = dict_set_int32 (volumes, key, volinfo->dist_leaf_count);
+ if (ret)
+ goto out;
+
+ snprintf (key, 256, "volume%d.stripe_count", count);
+ ret = dict_set_int32 (volumes, key, volinfo->stripe_count);
+ if (ret)
+ goto out;
+
+ snprintf (key, 256, "volume%d.replica_count", count);
+ ret = dict_set_int32 (volumes, key, volinfo->replica_count);
+ if (ret)
+ goto out;
+
+ snprintf (key, 256, "volume%d.transport", count);
+ ret = dict_set_int32 (volumes, key, volinfo->transport_type);
+ if (ret)
+ goto out;
+
+ volume_id_str = gf_strdup (uuid_utoa (volinfo->volume_id));
+ if (!volume_id_str)
+ goto out;
+
+ snprintf (key, sizeof (key), "volume%d.volume_id", count);
+ ret = dict_set_dynstr (volumes, key, volume_id_str);
+ if (ret)
+ goto out;
+
+ snprintf (key, 256, "volume%d.rebalance", count);
+ ret = dict_set_int32 (volumes, key, volinfo->rebal.defrag_cmd);
+ if (ret)
+ goto out;
+
+#ifdef HAVE_BD_XLATOR
+ if (volinfo->caps) {
+ caps = 0;
+ snprintf (key, 256, "volume%d.xlator0", count);
+ buf = GF_MALLOC (256, gf_common_mt_char);
+ if (!buf) {
+ ret = ENOMEM;
+ goto out;
+ }
+ if (volinfo->caps & CAPS_BD)
+ snprintf (buf, 256, "BD");
+ ret = dict_set_dynstr (volumes, key, buf);
+ if (ret) {
+ GF_FREE (buf);
+ goto out;
+ }
+
+ if (volinfo->caps & CAPS_THIN) {
+ snprintf (key, 256, "volume%d.xlator0.caps%d", count,
+ caps++);
+ buf = GF_MALLOC (256, gf_common_mt_char);
+ if (!buf) {
+ ret = ENOMEM;
+ goto out;
+ }
+ snprintf (buf, 256, "thin");
+ ret = dict_set_dynstr (volumes, key, buf);
+ if (ret) {
+ GF_FREE (buf);
+ goto out;
+ }
+ }
+
+ if (volinfo->caps & CAPS_OFFLOAD_COPY) {
+ snprintf (key, 256, "volume%d.xlator0.caps%d", count,
+ caps++);
+ buf = GF_MALLOC (256, gf_common_mt_char);
+ if (!buf) {
+ ret = ENOMEM;
+ goto out;
+ }
+ snprintf (buf, 256, "offload_copy");
+ ret = dict_set_dynstr (volumes, key, buf);
+ if (ret) {
+ GF_FREE (buf);
+ goto out;
+ }
+ }
+
+ if (volinfo->caps & CAPS_OFFLOAD_SNAPSHOT) {
+ snprintf (key, 256, "volume%d.xlator0.caps%d", count,
+ caps++);
+ buf = GF_MALLOC (256, gf_common_mt_char);
+ if (!buf) {
+ ret = ENOMEM;
+ goto out;
+ }
+ snprintf (buf, 256, "offload_snapshot");
+ ret = dict_set_dynstr (volumes, key, buf);
+ if (ret) {
+ GF_FREE (buf);
+ goto out;
+ }
+ }
+
+ }
+#endif
+
+ list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ char brick[1024] = {0,};
+ char brick_uuid[64] = {0,};
+ snprintf (key, 256, "volume%d.brick%d", count, i);
+ snprintf (brick, 1024, "%s:%s", brickinfo->hostname,
+ brickinfo->path);
+ buf = gf_strdup (brick);
+ ret = dict_set_dynstr (volumes, key, buf);
+ if (ret)
+ goto out;
+ snprintf (key, 256, "volume%d.brick%d.uuid", count, i);
+ snprintf (brick_uuid, 64, "%s", uuid_utoa (brickinfo->uuid));
+ buf = gf_strdup (brick_uuid);
+ if (!buf)
+ goto out;
+ ret = dict_set_dynstr (volumes, key, buf);
+ if (ret)
+ goto out;
+
+#ifdef HAVE_BD_XLATOR
+ if (volinfo->caps & CAPS_BD) {
+ snprintf (key, 256, "volume%d.vg%d", count, i);
+ snprintf (brick, 1024, "%s", brickinfo->vg);
+ buf = gf_strdup (brick);
+ ret = dict_set_dynstr (volumes, key, buf);
+ if (ret)
+ goto out;
+ }
+#endif
+ i++;
+ }
+
+ dict = volinfo->dict;
+ if (!dict) {
+ ret = 0;
+ goto out;
+ }
+
+ pack.dict = volumes;
+ pack.vol_count = count;
+ pack.opt_count = 0;
+ dict_foreach (dict, _build_option_key, (void *) &pack);
+ dict_foreach (priv->opts, _build_option_key, &pack);
+
+ snprintf (key, 256, "volume%d.opt_count", pack.vol_count);
+ ret = dict_set_int32 (volumes, key, pack.opt_count);
+out:
+ return ret;
+}
+
+int
+glusterd_friend_find (uuid_t uuid, char *hostname,
+ glusterd_peerinfo_t **peerinfo)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ if (uuid) {
+ ret = glusterd_friend_find_by_uuid (uuid, peerinfo);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Unable to find peer by uuid: %s",
+ uuid_utoa (uuid));
+ } else {
+ goto out;
+ }
+
+ }
+
+ if (hostname) {
+ ret = glusterd_friend_find_by_hostname (hostname, peerinfo);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Unable to find hostname: %s", hostname);
+ } else {
+ goto out;
+ }
+ }
+
+out:
+ return ret;
+}
+
+int32_t
+glusterd_op_txn_begin (rpcsvc_request_t *req, glusterd_op_t op, void *ctx,
+ char *err_str, size_t err_len)
+{
+ int32_t ret = -1;
+ dict_t *dict = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ int32_t locked = 0;
+ char *tmp = NULL;
+ char *volname = NULL;
+ uuid_t *txn_id = NULL;
+ uuid_t *originator_uuid = NULL;
+ glusterd_op_info_t txn_op_info = {{0},};
+ glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE;
+
+ GF_ASSERT (req);
+ GF_ASSERT ((op > GD_OP_NONE) && (op < GD_OP_MAX));
+ GF_ASSERT (NULL != ctx);
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ dict = ctx;
+
+ /* Generate a transaction-id for this operation and
+ * save it in the dict. This transaction id distinguishes
+ * each transaction, and helps separate opinfos in the
+ * op state machine. */
+ txn_id = GF_CALLOC (1, sizeof(uuid_t), gf_common_mt_uuid_t);
+ if (!txn_id)
+ goto out;
+
+ uuid_generate (*txn_id);
+
+ ret = dict_set_bin (dict, "transaction_id",
+ txn_id, sizeof(*txn_id));
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set transaction id.");
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Transaction_id = %s", uuid_utoa (*txn_id));
+
+ /* Save the MY_UUID as the originator_uuid. This originator_uuid
+ * will be used by is_origin_glusterd() to determine if a node
+ * is the originator node for a command. */
+ originator_uuid = GF_CALLOC (1, sizeof(uuid_t),
+ gf_common_mt_uuid_t);
+ if (!originator_uuid) {
+ ret = -1;
+ goto out;
+ }
+
+ uuid_copy (*originator_uuid, MY_UUID);
+ ret = dict_set_bin (dict, "originator_uuid",
+ originator_uuid, sizeof (uuid_t));
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set originator uuid.");
+ goto out;
+ }
+
+ /* Based on the op_version, acquire a cluster or mgmt_v3 lock */
+ if (priv->op_version < 3) {
+ ret = glusterd_lock (MY_UUID);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to acquire lock on localhost, ret: %d",
+ ret);
+ snprintf (err_str, err_len,
+ "Another transaction is in progress. "
+ "Please try again after sometime.");
+ goto out;
+ }
+ } else {
+ /* If no volname is given as a part of the command, locks will
+ * not be held */
+ ret = dict_get_str (dict, "volname", &tmp);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Failed to get volume "
+ "name");
+ goto local_locking_done;
+ } else {
+ /* Use a copy of volname, as cli response will be
+ * sent before the unlock, and the volname in the
+ * dict, might be removed */
+ volname = gf_strdup (tmp);
+ if (!volname)
+ goto out;
+ }
+
+ ret = glusterd_mgmt_v3_lock (volname, MY_UUID, "vol");
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to acquire lock for %s", volname);
+ snprintf (err_str, err_len,
+ "Another transaction is in progress for %s. "
+ "Please try again after sometime.", volname);
+ goto out;
+ }
+ }
+
+ locked = 1;
+ gf_log (this->name, GF_LOG_DEBUG, "Acquired lock on localhost");
+
+local_locking_done:
+
+ /* If no volname is given as a part of the command, locks will
+ * not be held, hence sending stage event. */
+ if (volname)
+ event_type = GD_OP_EVENT_START_LOCK;
+ else {
+ txn_op_info.state.state = GD_OP_STATE_LOCK_SENT;
+ event_type = GD_OP_EVENT_ALL_ACC;
+ }
+
+ /* Save opinfo for this transaction with the transaction id */
+ glusterd_txn_opinfo_init (&txn_op_info, NULL, &op, ctx, req);
+
+ ret = glusterd_set_txn_opinfo (txn_id, &txn_op_info);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to set transaction's opinfo");
+ if (ctx)
+ dict_unref (ctx);
+ goto out;
+ }
+
+ ret = glusterd_op_sm_inject_event (event_type, txn_id, ctx);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to acquire cluster"
+ " lock.");
+ goto out;
+ }
+
+out:
+ if (locked && ret) {
+ /* Based on the op-version, we release the
+ * cluster or mgmt_v3 lock */
+ if (priv->op_version < 3)
+ glusterd_unlock (MY_UUID);
+ else {
+ ret = glusterd_mgmt_v3_unlock (volname, MY_UUID,
+ "vol");
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to release lock for %s",
+ volname);
+ ret = -1;
+ }
+ }
+
+ if (volname)
+ GF_FREE (volname);
+
+ gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+__glusterd_handle_cluster_lock (rpcsvc_request_t *req)
+{
+ dict_t *op_ctx = NULL;
+ int32_t ret = -1;
+ gd1_mgmt_cluster_lock_req lock_req = {{0},};
+ glusterd_op_lock_ctx_t *ctx = NULL;
+ glusterd_op_t op = GD_OP_EVENT_LOCK;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_op_info_t txn_op_info = {{0},};
+ uuid_t *txn_id = &global_txn_id;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+
+ ret = xdr_to_generic (req->msg[0], &lock_req,
+ (xdrproc_t)xdr_gd1_mgmt_cluster_lock_req);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to decode lock "
+ "request received from peer");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG, "Received LOCK from uuid: %s",
+ uuid_utoa (lock_req.uuid));
+
+ if (glusterd_friend_find_by_uuid (lock_req.uuid, &peerinfo)) {
+ gf_log (this->name, GF_LOG_WARNING, "%s doesn't "
+ "belong to the cluster. Ignoring request.",
+ uuid_utoa (lock_req.uuid));
+ ret = -1;
+ goto out;
+ }
+
+ ctx = GF_CALLOC (1, sizeof (*ctx), gf_gld_mt_op_lock_ctx_t);
+
+ if (!ctx) {
+ //respond here
+ return -1;
+ }
+
+ uuid_copy (ctx->uuid, lock_req.uuid);
+ ctx->req = req;
+ ctx->dict = NULL;
+
+ op_ctx = dict_new ();
+ if (!op_ctx) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to set new dict");
+ goto out;
+ }
+
+ glusterd_txn_opinfo_init (&txn_op_info, NULL, &op, op_ctx, req);
+
+ ret = glusterd_set_txn_opinfo (txn_id, &txn_op_info);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to set transaction's opinfo");
+ dict_unref (txn_op_info.op_ctx);
+ goto out;
+ }
+
+ ret = glusterd_op_sm_inject_event (GD_OP_EVENT_LOCK, txn_id, ctx);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to inject event GD_OP_EVENT_LOCK");
+
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
+
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+
+ return ret;
+}
+
+int
+glusterd_handle_cluster_lock (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_handle_cluster_lock);
+}
+
+int
+glusterd_req_ctx_create (rpcsvc_request_t *rpc_req,
+ glusterd_op_t op, uuid_t uuid,
+ char *buf_val, size_t buf_len,
+ gf_gld_mem_types_t mem_type,
+ glusterd_req_ctx_t **req_ctx_out)
+{
+ int ret = -1;
+ char str[50] = {0,};
+ glusterd_req_ctx_t *req_ctx = NULL;
+ dict_t *dict = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ uuid_unparse (uuid, str);
+ gf_log (this->name, GF_LOG_DEBUG, "Received op from uuid %s", str);
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ req_ctx = GF_CALLOC (1, sizeof (*req_ctx), mem_type);
+ if (!req_ctx) {
+ goto out;
+ }
+
+ uuid_copy (req_ctx->uuid, uuid);
+ req_ctx->op = op;
+ ret = dict_unserialize (buf_val, buf_len, &dict);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to unserialize the dictionary");
+ goto out;
+ }
+
+ req_ctx->dict = dict;
+ req_ctx->req = rpc_req;
+ *req_ctx_out = req_ctx;
+ ret = 0;
+out:
+ if (ret) {
+ if (dict)
+ dict_unref (dict);
+ GF_FREE (req_ctx);
+ }
+ return ret;
+}
+
+int
+__glusterd_handle_stage_op (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ glusterd_req_ctx_t *req_ctx = NULL;
+ gd1_mgmt_stage_op_req op_req = {{0},};
+ glusterd_peerinfo_t *peerinfo = NULL;
+ xlator_t *this = NULL;
+ uuid_t *txn_id = &global_txn_id;
+ glusterd_op_info_t txn_op_info = {{0},};
+ glusterd_op_sm_state_info_t state;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+
+ ret = xdr_to_generic (req->msg[0], &op_req,
+ (xdrproc_t)xdr_gd1_mgmt_stage_op_req);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to decode stage "
+ "request received from peer");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (glusterd_friend_find_by_uuid (op_req.uuid, &peerinfo)) {
+ gf_log (this->name, GF_LOG_WARNING, "%s doesn't "
+ "belong to the cluster. Ignoring request.",
+ uuid_utoa (op_req.uuid));
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_req_ctx_create (req, op_req.op, op_req.uuid,
+ op_req.buf.buf_val, op_req.buf.buf_len,
+ gf_gld_mt_op_stage_ctx_t, &req_ctx);
+ if (ret)
+ goto out;
+
+ ret = dict_get_bin (req_ctx->dict, "transaction_id", (void **)&txn_id);
+
+ gf_log ("", GF_LOG_DEBUG, "transaction ID = %s", uuid_utoa (*txn_id));
+
+ /* In cases where there is no volname, the receivers won't have a
+ * transaction opinfo created, as for those operations, the locking
+ * phase where the transaction opinfos are created, won't be called. */
+ ret = glusterd_get_txn_opinfo (txn_id, &txn_op_info);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "No transaction's opinfo set");
+
+ state.state = GD_OP_STATE_LOCKED;
+ glusterd_txn_opinfo_init (&txn_op_info, &state,
+ &op_req.op, req_ctx->dict, req);
+
+ ret = glusterd_set_txn_opinfo (txn_id, &txn_op_info);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to set transaction's opinfo");
+ dict_unref (req_ctx->dict);
+ goto out;
+ }
+ }
+
+ ret = glusterd_op_sm_inject_event (GD_OP_EVENT_STAGE_OP,
+ txn_id, req_ctx);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to inject event GD_OP_EVENT_STAGE_OP");
+
+ out:
+ free (op_req.buf.buf_val);//malloced by xdr
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+ return ret;
+}
+
+int
+glusterd_handle_stage_op (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __glusterd_handle_stage_op);
+}
+
+
+int
+__glusterd_handle_commit_op (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ glusterd_req_ctx_t *req_ctx = NULL;
+ gd1_mgmt_commit_op_req op_req = {{0},};
+ glusterd_peerinfo_t *peerinfo = NULL;
+ xlator_t *this = NULL;
+ uuid_t *txn_id = &global_txn_id;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+
+ ret = xdr_to_generic (req->msg[0], &op_req,
+ (xdrproc_t)xdr_gd1_mgmt_commit_op_req);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to decode commit "
+ "request received from peer");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (glusterd_friend_find_by_uuid (op_req.uuid, &peerinfo)) {
+ gf_log (this->name, GF_LOG_WARNING, "%s doesn't "
+ "belong to the cluster. Ignoring request.",
+ uuid_utoa (op_req.uuid));
+ ret = -1;
+ goto out;
+ }
+
+ //the structures should always be equal
+ GF_ASSERT (sizeof (gd1_mgmt_commit_op_req) == sizeof (gd1_mgmt_stage_op_req));
+ ret = glusterd_req_ctx_create (req, op_req.op, op_req.uuid,
+ op_req.buf.buf_val, op_req.buf.buf_len,
+ gf_gld_mt_op_commit_ctx_t, &req_ctx);
+ if (ret)
+ goto out;
+
+ ret = dict_get_bin (req_ctx->dict, "transaction_id", (void **)&txn_id);
+
+ gf_log ("", GF_LOG_DEBUG, "transaction ID = %s", uuid_utoa (*txn_id));
+
+ ret = glusterd_op_sm_inject_event (GD_OP_EVENT_COMMIT_OP,
+ txn_id, req_ctx);
+
+out:
+ free (op_req.buf.buf_val);//malloced by xdr
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+ return ret;
+}
+
+int
+glusterd_handle_commit_op (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __glusterd_handle_commit_op);
+}
+
+int
+__glusterd_handle_cli_probe (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{0,},};
+ glusterd_peerinfo_t *peerinfo = NULL;
+ gf_boolean_t run_fsm = _gf_true;
+ xlator_t *this = NULL;
+ char *bind_name = NULL;
+ dict_t *dict = NULL;
+ char *hostname = NULL;
+ int port = 0;
+
+ GF_ASSERT (req);
+ this = THIS;
+
+ ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ gf_log ("", GF_LOG_ERROR, "xdr decoding error");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len) {
+ dict = dict_new ();
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len, &dict);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to "
+ "unserialize req-buffer to dictionary");
+ goto out;
+ }
+ }
+
+ ret = dict_get_str (dict, "hostname", &hostname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get hostname");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "port", &port);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get port");
+ goto out;
+ }
+
+ if (glusterd_is_any_volume_in_server_quorum (this) &&
+ !does_gd_meet_server_quorum (this)) {
+ glusterd_xfer_cli_probe_resp (req, -1, GF_PROBE_QUORUM_NOT_MET,
+ NULL, hostname, port, dict);
+ gf_log (this->name, GF_LOG_ERROR, "Quorum does not meet, "
+ "rejecting operation");
+ ret = 0;
+ goto out;
+ }
+
+ gf_log ("glusterd", GF_LOG_INFO, "Received CLI probe req %s %d",
+ hostname, port);
+
+ if (dict_get_str(this->options,"transport.socket.bind-address",
+ &bind_name) == 0) {
+ gf_log ("glusterd", GF_LOG_DEBUG,
+ "only checking probe address vs. bind address");
+ ret = gf_is_same_address (bind_name, hostname);
+ }
+ else {
+ ret = gf_is_local_addr (hostname);
+ }
+ if (ret) {
+ glusterd_xfer_cli_probe_resp (req, 0, GF_PROBE_LOCALHOST,
+ NULL, hostname, port, dict);
+ ret = 0;
+ goto out;
+ }
+
+ if (!(ret = glusterd_friend_find_by_hostname (hostname, &peerinfo))) {
+ if (strcmp (peerinfo->hostname, hostname) == 0) {
+
+ gf_log ("glusterd", GF_LOG_DEBUG, "Probe host %s port "
+ "%d already a peer", hostname, port);
+ glusterd_xfer_cli_probe_resp (req, 0, GF_PROBE_FRIEND,
+ NULL, hostname, port,
+ dict);
+ goto out;
+ }
+ }
+ ret = glusterd_probe_begin (req, hostname, port, dict);
+
+ if (ret == GLUSTERD_CONNECTION_AWAITED) {
+ //fsm should be run after connection establishes
+ run_fsm = _gf_false;
+ ret = 0;
+ }
+
+out:
+ free (cli_req.dict.dict_val);
+
+ if (run_fsm) {
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+ }
+
+ return ret;
+}
+
+int
+glusterd_handle_cli_probe (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __glusterd_handle_cli_probe);
+}
+
+int
+__glusterd_handle_cli_deprobe (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{0,},};
+ uuid_t uuid = {0};
+ int op_errno = 0;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ dict_t *dict = NULL;
+ char *hostname = NULL;
+ int port = 0;
+ int flags = 0;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (req);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req,
+ (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len) {
+ dict = dict_new ();
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len, &dict);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to "
+ "unserialize req-buffer to dictionary");
+ goto out;
+ }
+ }
+
+ gf_log ("glusterd", GF_LOG_INFO, "Received CLI deprobe req");
+
+ ret = dict_get_str (dict, "hostname", &hostname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get hostname");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "port", &port);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get port");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "flags", &flags);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get flags");
+ goto out;
+ }
+
+ ret = glusterd_hostname_to_uuid (hostname, uuid);
+ if (ret) {
+ op_errno = GF_DEPROBE_NOT_FRIEND;
+ goto out;
+ }
+
+ if (!uuid_compare (uuid, MY_UUID)) {
+ op_errno = GF_DEPROBE_LOCALHOST;
+ ret = -1;
+ goto out;
+ }
+
+ if (!(flags & GF_CLI_FLAG_OP_FORCE)) {
+ if (!uuid_is_null (uuid)) {
+ /* Check if peers are connected, except peer being detached*/
+ if (!glusterd_chk_peers_connected_befriended (uuid)) {
+ ret = -1;
+ op_errno = GF_DEPROBE_FRIEND_DOWN;
+ goto out;
+ }
+ ret = glusterd_all_volume_cond_check (
+ glusterd_friend_brick_belongs,
+ -1, &uuid);
+ if (ret) {
+ op_errno = GF_DEPROBE_BRICK_EXIST;
+ goto out;
+ }
+ }
+
+ if (glusterd_is_any_volume_in_server_quorum (this) &&
+ !does_gd_meet_server_quorum (this)) {
+ gf_log (this->name, GF_LOG_ERROR, "Quorum does not "
+ "meet, rejecting operation");
+ ret = -1;
+ op_errno = GF_DEPROBE_QUORUM_NOT_MET;
+ goto out;
+ }
+ }
+
+ if (!uuid_is_null (uuid)) {
+ ret = glusterd_deprobe_begin (req, hostname, port, uuid, dict);
+ } else {
+ ret = glusterd_deprobe_begin (req, hostname, port, NULL, dict);
+ }
+
+out:
+ free (cli_req.dict.dict_val);
+
+ if (ret) {
+ ret = glusterd_xfer_cli_deprobe_resp (req, ret, op_errno, NULL,
+ hostname, dict);
+ }
+
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+
+ return ret;
+}
+
+int
+glusterd_handle_cli_deprobe (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __glusterd_handle_cli_deprobe);
+}
+
+int
+__glusterd_handle_cli_list_friends (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf1_cli_peer_list_req cli_req = {0,};
+ dict_t *dict = NULL;
+
+ GF_ASSERT (req);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req,
+ (xdrproc_t)xdr_gf1_cli_peer_list_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ gf_log ("glusterd", GF_LOG_INFO, "Received cli list req");
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_log ("glusterd", GF_LOG_ERROR,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ goto out;
+ } else {
+ dict->extra_stdfree = cli_req.dict.dict_val;
+ }
+ }
+
+ ret = glusterd_list_friends (req, dict, cli_req.flags);
+
+out:
+ if (dict)
+ dict_unref (dict);
+
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+
+ return ret;
+}
+
+int
+glusterd_handle_cli_list_friends (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_handle_cli_list_friends);
+}
+
+int
+__glusterd_handle_cli_get_volume (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{0,}};
+ dict_t *dict = NULL;
+ int32_t flags = 0;
+
+ GF_ASSERT (req);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ gf_log ("glusterd", GF_LOG_INFO, "Received get vol req");
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_log ("glusterd", GF_LOG_ERROR,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ goto out;
+ } else {
+ dict->extra_stdfree = cli_req.dict.dict_val;
+ }
+ }
+
+ ret = dict_get_int32 (dict, "flags", &flags);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "failed to get flags");
+ goto out;
+ }
+
+ ret = glusterd_get_volumes (req, dict, flags);
+
+out:
+ if (dict)
+ dict_unref (dict);
+
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+
+ return ret;
+}
+
+int
+glusterd_handle_cli_get_volume (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_handle_cli_get_volume);
+}
+
+int
+__glusterd_handle_cli_uuid_reset (rpcsvc_request_t *req)
+{
+ int ret = -1;
+ dict_t *dict = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ uuid_t uuid = {0};
+ gf_cli_rsp rsp = {0,};
+ gf_cli_req cli_req = {{0,}};
+ char msg_str[2048] = {0,};
+
+ GF_ASSERT (req);
+
+ this = THIS;
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ gf_log ("glusterd", GF_LOG_DEBUG, "Received uuid reset req");
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_log ("glusterd", GF_LOG_ERROR,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf (msg_str, sizeof (msg_str), "Unable to decode "
+ "the buffer");
+ goto out;
+ } else {
+ dict->extra_stdfree = cli_req.dict.dict_val;
+ }
+ }
+
+ /* In the above section if dict_unserialize is successful, ret is set
+ * to zero.
+ */
+ ret = -1;
+ // Do not allow peer reset if there are any volumes in the cluster
+ if (!list_empty (&priv->volumes)) {
+ snprintf (msg_str, sizeof (msg_str), "volumes are already "
+ "present in the cluster. Resetting uuid is not "
+ "allowed");
+ gf_log (this->name, GF_LOG_WARNING, "%s", msg_str);
+ goto out;
+ }
+
+ // Do not allow peer reset if trusted storage pool is already formed
+ if (!list_empty (&priv->peers)) {
+ snprintf (msg_str, sizeof (msg_str),"trusted storage pool "
+ "has been already formed. Please detach this peer "
+ "from the pool and reset its uuid.");
+ gf_log (this->name, GF_LOG_WARNING, "%s", msg_str);
+ goto out;
+ }
+
+ uuid_copy (uuid, priv->uuid);
+ ret = glusterd_uuid_generate_save ();
+
+ if (!uuid_compare (uuid, MY_UUID)) {
+ snprintf (msg_str, sizeof (msg_str), "old uuid and the new uuid"
+ " are same. Try gluster peer reset again");
+ gf_log (this->name, GF_LOG_ERROR, "%s", msg_str);
+ ret = -1;
+ goto out;
+ }
+
+out:
+ if (ret) {
+ rsp.op_ret = -1;
+ if (msg_str[0] == '\0')
+ snprintf (msg_str, sizeof (msg_str), "Operation "
+ "failed");
+ rsp.op_errstr = msg_str;
+ ret = 0;
+ } else {
+ rsp.op_errstr = "";
+ }
+
+ glusterd_to_cli (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_cli_rsp, dict);
+
+ return ret;
+}
+
+int
+glusterd_handle_cli_uuid_reset (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_handle_cli_uuid_reset);
+}
+
+int
+__glusterd_handle_cli_uuid_get (rpcsvc_request_t *req)
+{
+ int ret = -1;
+ dict_t *dict = NULL;
+ dict_t *rsp_dict = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ gf_cli_rsp rsp = {0,};
+ gf_cli_req cli_req = {{0,}};
+ char msg_str[2048] = {0,};
+ char uuid_str[64] = {0,};
+
+ GF_ASSERT (req);
+
+ this = THIS;
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ gf_log ("glusterd", GF_LOG_DEBUG, "Received uuid get req");
+
+ if (cli_req.dict.dict_len) {
+ dict = dict_new ();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_log ("glusterd", GF_LOG_ERROR,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf (msg_str, sizeof (msg_str), "Unable to decode "
+ "the buffer");
+ goto out;
+
+ } else {
+ dict->extra_stdfree = cli_req.dict.dict_val;
+
+ }
+ }
+
+ rsp_dict = dict_new ();
+ if (!rsp_dict) {
+ ret = -1;
+ goto out;
+ }
+
+ uuid_utoa_r (MY_UUID, uuid_str);
+ ret = dict_set_str (rsp_dict, "uuid", uuid_str);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set uuid in "
+ "dictionary.");
+ goto out;
+ }
+
+ ret = dict_allocate_and_serialize (rsp_dict, &rsp.dict.dict_val,
+ &rsp.dict.dict_len);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to serialize "
+ "dictionary.");
+ goto out;
+ }
+ ret = 0;
+out:
+ if (ret) {
+ rsp.op_ret = -1;
+ if (msg_str[0] == '\0')
+ snprintf (msg_str, sizeof (msg_str), "Operation "
+ "failed");
+ rsp.op_errstr = msg_str;
+
+ } else {
+ rsp.op_errstr = "";
+
+ }
+
+ glusterd_to_cli (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_cli_rsp, dict);
+
+ return 0;
+}
+int
+glusterd_handle_cli_uuid_get (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_handle_cli_uuid_get);
+}
+
+int
+__glusterd_handle_cli_list_volume (rpcsvc_request_t *req)
+{
+ int ret = -1;
+ dict_t *dict = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ int count = 0;
+ char key[1024] = {0,};
+ gf_cli_rsp rsp = {0,};
+
+ GF_ASSERT (req);
+
+ priv = THIS->private;
+ GF_ASSERT (priv);
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ list_for_each_entry (volinfo, &priv->volumes, vol_list) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d", count);
+ ret = dict_set_str (dict, key, volinfo->volname);
+ if (ret)
+ goto out;
+ count++;
+ }
+
+ ret = dict_set_int32 (dict, "count", count);
+ if (ret)
+ goto out;
+
+ ret = dict_allocate_and_serialize (dict, &rsp.dict.dict_val,
+ &rsp.dict.dict_len);
+ if (ret)
+ goto out;
+
+ ret = 0;
+
+out:
+ rsp.op_ret = ret;
+ if (ret)
+ rsp.op_errstr = "Error listing volumes";
+ else
+ rsp.op_errstr = "";
+
+ glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_cli_rsp);
+ ret = 0;
+
+ if (dict)
+ dict_unref (dict);
+
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+
+ return ret;
+}
+
+int
+glusterd_handle_cli_list_volume (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_handle_cli_list_volume);
+}
+
+int32_t
+glusterd_op_begin (rpcsvc_request_t *req, glusterd_op_t op, void *ctx,
+ char *err_str, size_t err_len)
+{
+ int ret = -1;
+
+ ret = glusterd_op_txn_begin (req, op, ctx, err_str, err_len);
+
+ return ret;
+}
+
+int
+__glusterd_handle_reset_volume (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{0,}};
+ dict_t *dict = NULL;
+ glusterd_op_t cli_op = GD_OP_RESET_VOLUME;
+ char *volname = NULL;
+ char err_str[2048] = {0,};
+ xlator_t *this = NULL;
+
+ GF_ASSERT (req);
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ snprintf (err_str, sizeof (err_str), "Failed to decode request "
+ "received from cli");
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf (err_str, sizeof (err_str), "Unable to decode "
+ "the command");
+ goto out;
+ } else {
+ dict->extra_stdfree = cli_req.dict.dict_val;
+ }
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Failed to get volume "
+ "name");
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+ gf_log (this->name, GF_LOG_DEBUG, "Received volume reset request for "
+ "volume %s", volname);
+
+ ret = glusterd_op_begin_synctask (req, GD_OP_RESET_VOLUME, dict);
+
+out:
+ if (ret) {
+ if (err_str[0] == '\0')
+ snprintf (err_str, sizeof (err_str),
+ "Operation failed");
+ ret = glusterd_op_send_cli_response (cli_op, ret, 0, req,
+ dict, err_str);
+ }
+
+ return ret;
+}
+
+int
+glusterd_handle_reset_volume (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_handle_reset_volume);
+}
+
+int
+__glusterd_handle_set_volume (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{0,}};
+ dict_t *dict = NULL;
+ glusterd_op_t cli_op = GD_OP_SET_VOLUME;
+ char *key = NULL;
+ char *value = NULL;
+ char *volname = NULL;
+ char *op_errstr = NULL;
+ gf_boolean_t help = _gf_false;
+ char err_str[2048] = {0,};
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (req);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ snprintf (err_str, sizeof (err_str), "Failed to decode "
+ "request received from cli");
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf (err_str, sizeof (err_str), "Unable to decode "
+ "the command");
+ goto out;
+ } else {
+ dict->extra_stdfree = cli_req.dict.dict_val;
+ }
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Failed to get volume "
+ "name while handling volume set command");
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+
+ if (strcmp (volname, "help") == 0 ||
+ strcmp (volname, "help-xml") == 0) {
+ ret = glusterd_volset_help (dict, &op_errstr);
+ help = _gf_true;
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "key1", &key);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Failed to get key while"
+ " handling volume set for %s", volname);
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "value1", &value);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Failed to get value while"
+ " handling volume set for %s", volname);
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+ gf_log (this->name, GF_LOG_DEBUG, "Received volume set request for "
+ "volume %s", volname);
+
+ ret = glusterd_op_begin_synctask (req, GD_OP_SET_VOLUME, dict);
+
+out:
+ if (help)
+ ret = glusterd_op_send_cli_response (cli_op, ret, 0, req, dict,
+ (op_errstr)? op_errstr:"");
+ else if (ret) {
+ if (err_str[0] == '\0')
+ snprintf (err_str, sizeof (err_str),
+ "Operation failed");
+ ret = glusterd_op_send_cli_response (cli_op, ret, 0, req,
+ dict, err_str);
+ }
+ if (op_errstr)
+ GF_FREE (op_errstr);
+
+ return ret;
+}
+
+int
+glusterd_handle_set_volume (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __glusterd_handle_set_volume);
+}
+
+int
+__glusterd_handle_sync_volume (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{0,}};
+ dict_t *dict = NULL;
+ gf_cli_rsp cli_rsp = {0.};
+ char msg[2048] = {0,};
+ char *volname = NULL;
+ gf1_cli_sync_volume flags = 0;
+ char *hostname = NULL;
+ xlator_t *this = NULL;
+
+ GF_ASSERT (req);
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf (msg, sizeof (msg), "Unable to decode the "
+ "command");
+ goto out;
+ } else {
+ dict->extra_stdfree = cli_req.dict.dict_val;
+ }
+ }
+
+ ret = dict_get_str (dict, "hostname", &hostname);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Failed to get hostname");
+ gf_log (this->name, GF_LOG_ERROR, "%s", msg);
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ ret = dict_get_int32 (dict, "flags", (int32_t*)&flags);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Failed to get volume name"
+ " or flags");
+ gf_log (this->name, GF_LOG_ERROR, "%s", msg);
+ goto out;
+ }
+ }
+
+ gf_log (this->name, GF_LOG_INFO, "Received volume sync req "
+ "for volume %s", (flags & GF_CLI_SYNC_ALL) ? "all" : volname);
+
+ if (gf_is_local_addr (hostname)) {
+ ret = -1;
+ snprintf (msg, sizeof (msg), "sync from localhost"
+ " not allowed");
+ gf_log (this->name, GF_LOG_ERROR, "%s", msg);
+ goto out;
+ }
+
+ ret = glusterd_op_begin_synctask (req, GD_OP_SYNC_VOLUME, dict);
+
+out:
+ if (ret) {
+ cli_rsp.op_ret = -1;
+ cli_rsp.op_errstr = msg;
+ if (msg[0] == '\0')
+ snprintf (msg, sizeof (msg), "Operation failed");
+ glusterd_to_cli (req, &cli_rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_cli_rsp, dict);
+
+ ret = 0; //sent error to cli, prevent second reply
+ }
+
+ return ret;
+}
+
+int
+glusterd_handle_sync_volume (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __glusterd_handle_sync_volume);
+}
+
+int
+glusterd_fsm_log_send_resp (rpcsvc_request_t *req, int op_ret,
+ char *op_errstr, dict_t *dict)
+{
+
+ int ret = -1;
+ gf1_cli_fsm_log_rsp rsp = {0};
+
+ GF_ASSERT (req);
+ GF_ASSERT (op_errstr);
+
+ rsp.op_ret = op_ret;
+ rsp.op_errstr = op_errstr;
+ if (rsp.op_ret == 0)
+ ret = dict_allocate_and_serialize (dict, &rsp.fsm_log.fsm_log_val,
+ &rsp.fsm_log.fsm_log_len);
+
+ ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf1_cli_fsm_log_rsp);
+ GF_FREE (rsp.fsm_log.fsm_log_val);
+
+ gf_log ("glusterd", GF_LOG_DEBUG, "Responded, ret: %d", ret);
+
+ return 0;
+}
+
+int
+__glusterd_handle_fsm_log (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf1_cli_fsm_log_req cli_req = {0,};
+ dict_t *dict = NULL;
+ glusterd_sm_tr_log_t *log = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ char msg[2048] = {0};
+ glusterd_peerinfo_t *peerinfo = NULL;
+
+ GF_ASSERT (req);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req,
+ (xdrproc_t)xdr_gf1_cli_fsm_log_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ snprintf (msg, sizeof (msg), "Garbage request");
+ goto out;
+ }
+
+ if (strcmp ("", cli_req.name) == 0) {
+ this = THIS;
+ conf = this->private;
+ log = &conf->op_sm_log;
+ } else {
+ ret = glusterd_friend_find_by_hostname (cli_req.name,
+ &peerinfo);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "%s is not a peer",
+ cli_req.name);
+ goto out;
+ }
+ log = &peerinfo->sm_log;
+ }
+
+ dict = dict_new ();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_sm_tr_log_add_to_dict (dict, log);
+out:
+ (void)glusterd_fsm_log_send_resp (req, ret, msg, dict);
+ free (cli_req.name);//malloced by xdr
+ if (dict)
+ dict_unref (dict);
+
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+
+ return 0;//send 0 to avoid double reply
+}
+
+int
+glusterd_handle_fsm_log (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __glusterd_handle_fsm_log);
+}
+
+int
+glusterd_op_lock_send_resp (rpcsvc_request_t *req, int32_t status)
+{
+
+ gd1_mgmt_cluster_lock_rsp rsp = {{0},};
+ int ret = -1;
+
+ GF_ASSERT (req);
+ glusterd_get_uuid (&rsp.uuid);
+ rsp.op_ret = status;
+
+ ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_cluster_lock_rsp);
+
+ gf_log (THIS->name, GF_LOG_DEBUG, "Responded to lock, ret: %d", ret);
+
+ return 0;
+}
+
+int
+glusterd_op_unlock_send_resp (rpcsvc_request_t *req, int32_t status)
+{
+
+ gd1_mgmt_cluster_unlock_rsp rsp = {{0},};
+ int ret = -1;
+
+ GF_ASSERT (req);
+ rsp.op_ret = status;
+ glusterd_get_uuid (&rsp.uuid);
+
+ ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_cluster_unlock_rsp);
+
+ gf_log (THIS->name, GF_LOG_DEBUG, "Responded to unlock, ret: %d", ret);
+
+ return ret;
+}
+
+int
+glusterd_op_mgmt_v3_lock_send_resp (rpcsvc_request_t *req, uuid_t *txn_id,
+ int32_t status)
+{
+
+ gd1_mgmt_v3_lock_rsp rsp = {{0},};
+ int ret = -1;
+
+ GF_ASSERT (req);
+ GF_ASSERT (txn_id);
+ glusterd_get_uuid (&rsp.uuid);
+ rsp.op_ret = status;
+ if (rsp.op_ret)
+ rsp.op_errno = errno;
+ uuid_copy (rsp.txn_id, *txn_id);
+
+ ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_v3_lock_rsp);
+
+ gf_log (THIS->name, GF_LOG_DEBUG, "Responded to mgmt_v3 lock, ret: %d",
+ ret);
+
+ return ret;
+}
+
+int
+glusterd_op_mgmt_v3_unlock_send_resp (rpcsvc_request_t *req, uuid_t *txn_id,
+ int32_t status)
+{
+
+ gd1_mgmt_v3_unlock_rsp rsp = {{0},};
+ int ret = -1;
+
+ GF_ASSERT (req);
+ GF_ASSERT (txn_id);
+ rsp.op_ret = status;
+ if (rsp.op_ret)
+ rsp.op_errno = errno;
+ glusterd_get_uuid (&rsp.uuid);
+ uuid_copy (rsp.txn_id, *txn_id);
+
+ ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_v3_unlock_rsp);
+
+ gf_log (THIS->name, GF_LOG_DEBUG,
+ "Responded to mgmt_v3 unlock, ret: %d",
+ ret);
+
+ return ret;
+}
+
+int
+__glusterd_handle_cluster_unlock (rpcsvc_request_t *req)
+{
+ gd1_mgmt_cluster_unlock_req unlock_req = {{0}, };
+ int32_t ret = -1;
+ glusterd_op_lock_ctx_t *ctx = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ xlator_t *this = NULL;
+ uuid_t *txn_id = &global_txn_id;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+
+ ret = xdr_to_generic (req->msg[0], &unlock_req,
+ (xdrproc_t)xdr_gd1_mgmt_cluster_unlock_req);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to decode unlock "
+ "request received from peer");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Received UNLOCK from uuid: %s", uuid_utoa (unlock_req.uuid));
+
+ if (glusterd_friend_find_by_uuid (unlock_req.uuid, &peerinfo)) {
+ gf_log (this->name, GF_LOG_WARNING, "%s doesn't "
+ "belong to the cluster. Ignoring request.",
+ uuid_utoa (unlock_req.uuid));
+ ret = -1;
+ goto out;
+ }
+
+ ctx = GF_CALLOC (1, sizeof (*ctx), gf_gld_mt_op_lock_ctx_t);
+
+ if (!ctx) {
+ //respond here
+ return -1;
+ }
+ uuid_copy (ctx->uuid, unlock_req.uuid);
+ ctx->req = req;
+ ctx->dict = NULL;
+
+ ret = glusterd_op_sm_inject_event (GD_OP_EVENT_UNLOCK, txn_id, ctx);
+
+out:
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+
+ return ret;
+}
+
+int
+glusterd_handle_cluster_unlock (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_handle_cluster_unlock);
+}
+
+int
+glusterd_op_stage_send_resp (rpcsvc_request_t *req,
+ int32_t op, int32_t status,
+ char *op_errstr, dict_t *rsp_dict)
+{
+ gd1_mgmt_stage_op_rsp rsp = {{0},};
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+
+ rsp.op_ret = status;
+ glusterd_get_uuid (&rsp.uuid);
+ rsp.op = op;
+ if (op_errstr)
+ rsp.op_errstr = op_errstr;
+ else
+ rsp.op_errstr = "";
+
+ ret = dict_allocate_and_serialize (rsp_dict, &rsp.dict.dict_val,
+ &rsp.dict.dict_len);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to get serialized length of dict");
+ return ret;
+ }
+
+ ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_stage_op_rsp);
+
+ gf_log (this->name, GF_LOG_DEBUG, "Responded to stage, ret: %d", ret);
+ GF_FREE (rsp.dict.dict_val);
+
+ return ret;
+}
+
+int
+glusterd_op_commit_send_resp (rpcsvc_request_t *req,
+ int32_t op, int32_t status, char *op_errstr,
+ dict_t *rsp_dict)
+{
+ gd1_mgmt_commit_op_rsp rsp = {{0}, };
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+ rsp.op_ret = status;
+ glusterd_get_uuid (&rsp.uuid);
+ rsp.op = op;
+
+ if (op_errstr)
+ rsp.op_errstr = op_errstr;
+ else
+ rsp.op_errstr = "";
+
+ if (rsp_dict) {
+ ret = dict_allocate_and_serialize (rsp_dict, &rsp.dict.dict_val,
+ &rsp.dict.dict_len);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to get serialized length of dict");
+ goto out;
+ }
+ }
+
+
+ ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_commit_op_rsp);
+
+ gf_log (this->name, GF_LOG_DEBUG, "Responded to commit, ret: %d", ret);
+
+out:
+ GF_FREE (rsp.dict.dict_val);
+ return ret;
+}
+
+int
+__glusterd_handle_incoming_friend_req (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gd1_mgmt_friend_req friend_req = {{0},};
+ gf_boolean_t run_fsm = _gf_true;
+
+ GF_ASSERT (req);
+ ret = xdr_to_generic (req->msg[0], &friend_req,
+ (xdrproc_t)xdr_gd1_mgmt_friend_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ gf_log ("glusterd", GF_LOG_INFO,
+ "Received probe from uuid: %s", uuid_utoa (friend_req.uuid));
+ ret = glusterd_handle_friend_req (req, friend_req.uuid,
+ friend_req.hostname, friend_req.port,
+ &friend_req);
+
+ if (ret == GLUSTERD_CONNECTION_AWAITED) {
+ //fsm should be run after connection establishes
+ run_fsm = _gf_false;
+ ret = 0;
+ }
+
+out:
+ free (friend_req.hostname);//malloced by xdr
+
+ if (run_fsm) {
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+ }
+
+ return ret;
+}
+
+int
+glusterd_handle_incoming_friend_req (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_handle_incoming_friend_req);
+}
+
+int
+__glusterd_handle_incoming_unfriend_req (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gd1_mgmt_friend_req friend_req = {{0},};
+ char remote_hostname[UNIX_PATH_MAX + 1] = {0,};
+
+ GF_ASSERT (req);
+ ret = xdr_to_generic (req->msg[0], &friend_req,
+ (xdrproc_t)xdr_gd1_mgmt_friend_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ gf_log ("glusterd", GF_LOG_INFO,
+ "Received unfriend from uuid: %s", uuid_utoa (friend_req.uuid));
+
+ ret = glusterd_remote_hostname_get (req, remote_hostname,
+ sizeof (remote_hostname));
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get the remote hostname");
+ goto out;
+ }
+ ret = glusterd_handle_unfriend_req (req, friend_req.uuid,
+ remote_hostname, friend_req.port);
+
+out:
+ free (friend_req.hostname);//malloced by xdr
+ free (friend_req.vols.vols_val);//malloced by xdr
+
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+
+ return ret;
+}
+
+int
+glusterd_handle_incoming_unfriend_req (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_handle_incoming_unfriend_req);
+
+}
+
+int
+glusterd_handle_friend_update_delete (dict_t *dict)
+{
+ char *hostname = NULL;
+ int32_t ret = -1;
+
+ GF_ASSERT (dict);
+
+ ret = dict_get_str (dict, "hostname", &hostname);
+ if (ret)
+ goto out;
+
+ ret = glusterd_friend_remove (NULL, hostname);
+
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_friend_hostname_update (glusterd_peerinfo_t *peerinfo,
+ char *hostname,
+ gf_boolean_t store_update)
+{
+ char *new_hostname = NULL;
+ int ret = 0;
+
+ GF_ASSERT (peerinfo);
+ GF_ASSERT (hostname);
+
+ new_hostname = gf_strdup (hostname);
+ if (!new_hostname) {
+ ret = -1;
+ goto out;
+ }
+
+ GF_FREE (peerinfo->hostname);
+ peerinfo->hostname = new_hostname;
+ if (store_update)
+ ret = glusterd_store_peerinfo (peerinfo);
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+__glusterd_handle_friend_update (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gd1_mgmt_friend_update friend_req = {{0},};
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ glusterd_peerinfo_t *tmp = NULL;
+ gd1_mgmt_friend_update_rsp rsp = {{0},};
+ dict_t *dict = NULL;
+ char key[100] = {0,};
+ char *uuid_buf = NULL;
+ char *hostname = NULL;
+ int i = 1;
+ int count = 0;
+ uuid_t uuid = {0,};
+ glusterd_peerctx_args_t args = {0};
+ int32_t op = 0;
+
+ GF_ASSERT (req);
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = xdr_to_generic (req->msg[0], &friend_req,
+ (xdrproc_t)xdr_gd1_mgmt_friend_update);
+ if (ret < 0) {
+ //failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ ret = glusterd_friend_find (friend_req.uuid, NULL, &tmp);
+ if (ret) {
+ gf_log ("", GF_LOG_CRITICAL, "Received friend update request "
+ "from unknown peer %s", uuid_utoa (friend_req.uuid));
+ goto out;
+ }
+ gf_log ("glusterd", GF_LOG_INFO,
+ "Received friend update from uuid: %s", uuid_utoa (friend_req.uuid));
+
+ if (friend_req.friends.friends_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (friend_req.friends.friends_val,
+ friend_req.friends.friends_len,
+ &dict);
+ if (ret < 0) {
+ gf_log ("glusterd", GF_LOG_ERROR,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ goto out;
+ } else {
+ dict->extra_stdfree = friend_req.friends.friends_val;
+ }
+ }
+
+ ret = dict_get_int32 (dict, "count", &count);
+ if (ret)
+ goto out;
+
+ ret = dict_get_int32 (dict, "op", &op);
+ if (ret)
+ goto out;
+
+ if (GD_FRIEND_UPDATE_DEL == op) {
+ ret = glusterd_handle_friend_update_delete (dict);
+ goto out;
+ }
+
+ args.mode = GD_MODE_ON;
+ while ( i <= count) {
+ snprintf (key, sizeof (key), "friend%d.uuid", i);
+ ret = dict_get_str (dict, key, &uuid_buf);
+ if (ret)
+ goto out;
+ uuid_parse (uuid_buf, uuid);
+ snprintf (key, sizeof (key), "friend%d.hostname", i);
+ ret = dict_get_str (dict, key, &hostname);
+ if (ret)
+ goto out;
+
+ gf_log ("", GF_LOG_INFO, "Received uuid: %s, hostname:%s",
+ uuid_buf, hostname);
+
+ if (uuid_is_null (uuid)) {
+ gf_log (this->name, GF_LOG_WARNING, "Updates mustn't "
+ "contain peer with 'null' uuid");
+ continue;
+ }
+
+ if (!uuid_compare (uuid, MY_UUID)) {
+ gf_log ("", GF_LOG_INFO, "Received my uuid as Friend");
+ i++;
+ continue;
+ }
+
+ ret = glusterd_friend_find (uuid, hostname, &tmp);
+
+ if (!ret) {
+ if (strcmp (hostname, tmp->hostname) != 0) {
+ glusterd_friend_hostname_update (tmp, hostname,
+ _gf_true);
+ }
+ i++;
+ continue;
+ }
+
+ ret = glusterd_friend_add (hostname, friend_req.port,
+ GD_FRIEND_STATE_BEFRIENDED,
+ &uuid, &peerinfo, 0, &args);
+
+ i++;
+ }
+
+out:
+ uuid_copy (rsp.uuid, MY_UUID);
+ ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_friend_update_rsp);
+ if (dict) {
+ if (!dict->extra_stdfree && friend_req.friends.friends_val)
+ free (friend_req.friends.friends_val);//malloced by xdr
+ dict_unref (dict);
+ } else {
+ free (friend_req.friends.friends_val);//malloced by xdr
+ }
+
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+
+ return ret;
+}
+
+int
+glusterd_handle_friend_update (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_handle_friend_update);
+}
+
+int
+__glusterd_handle_probe_query (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ gd1_mgmt_probe_req probe_req = {{0},};
+ gd1_mgmt_probe_rsp rsp = {{0},};
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_peerctx_args_t args = {0};
+ int port = 0;
+ char remote_hostname[UNIX_PATH_MAX + 1] = {0,};
+
+ GF_ASSERT (req);
+
+ ret = xdr_to_generic (req->msg[0], &probe_req,
+ (xdrproc_t)xdr_gd1_mgmt_probe_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ this = THIS;
+
+ conf = this->private;
+ if (probe_req.port)
+ port = probe_req.port;
+ else
+ port = GF_DEFAULT_BASE_PORT;
+
+ gf_log ("glusterd", GF_LOG_INFO,
+ "Received probe from uuid: %s", uuid_utoa (probe_req.uuid));
+
+ /* Check for uuid collision and handle it in a user friendly way by
+ * sending the error.
+ */
+ if (!uuid_compare (probe_req.uuid, MY_UUID)) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Peer uuid %s is same as "
+ "local uuid. Please check the uuid of both the peers "
+ "from %s/%s", uuid_utoa (probe_req.uuid),
+ GLUSTERD_DEFAULT_WORKDIR, GLUSTERD_INFO_FILE);
+ rsp.op_ret = -1;
+ rsp.op_errno = GF_PROBE_SAME_UUID;
+ rsp.port = port;
+ goto respond;
+ }
+
+ ret = glusterd_remote_hostname_get (req, remote_hostname,
+ sizeof (remote_hostname));
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get the remote hostname");
+ goto out;
+ }
+ ret = glusterd_friend_find (probe_req.uuid, remote_hostname, &peerinfo);
+ if ((ret != 0 ) && (!list_empty (&conf->peers))) {
+ rsp.op_ret = -1;
+ rsp.op_errno = GF_PROBE_ANOTHER_CLUSTER;
+ } else if (ret) {
+ gf_log ("glusterd", GF_LOG_INFO, "Unable to find peerinfo"
+ " for host: %s (%d)", remote_hostname, port);
+ args.mode = GD_MODE_ON;
+ ret = glusterd_friend_add (remote_hostname, port,
+ GD_FRIEND_STATE_PROBE_RCVD,
+ NULL, &peerinfo, 0, &args);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Failed to add peer %s",
+ remote_hostname);
+ rsp.op_errno = GF_PROBE_ADD_FAILED;
+ }
+ }
+
+respond:
+ uuid_copy (rsp.uuid, MY_UUID);
+
+ rsp.hostname = probe_req.hostname;
+ rsp.op_errstr = "";
+
+ glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_probe_rsp);
+ ret = 0;
+
+ gf_log ("glusterd", GF_LOG_INFO, "Responded to %s, op_ret: %d, "
+ "op_errno: %d, ret: %d", remote_hostname,
+ rsp.op_ret, rsp.op_errno, ret);
+
+out:
+ free (probe_req.hostname);//malloced by xdr
+
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+
+ return ret;
+}
+
+int glusterd_handle_probe_query (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __glusterd_handle_probe_query);
+}
+
+int
+__glusterd_handle_cli_profile_volume (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{0,}};
+ dict_t *dict = NULL;
+ glusterd_op_t cli_op = GD_OP_PROFILE_VOLUME;
+ char *volname = NULL;
+ int32_t op = 0;
+ char err_str[2048] = {0,};
+ xlator_t *this = NULL;
+
+ GF_ASSERT (req);
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len > 0) {
+ dict = dict_new();
+ if (!dict)
+ goto out;
+ dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len, &dict);
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Unable to get volume "
+ "name");
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_INFO, "Received volume profile req "
+ "for volume %s", volname);
+ ret = dict_get_int32 (dict, "op", &op);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Unable to get operation");
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+
+ ret = glusterd_op_begin (req, cli_op, dict, err_str, sizeof (err_str));
+
+out:
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+
+ free (cli_req.dict.dict_val);
+
+ if (ret) {
+ if (err_str[0] == '\0')
+ snprintf (err_str, sizeof (err_str),
+ "Operation failed");
+ ret = glusterd_op_send_cli_response (cli_op, ret, 0, req,
+ dict, err_str);
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_handle_cli_profile_volume (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_handle_cli_profile_volume);
+}
+
+int
+__glusterd_handle_getwd (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf1_cli_getwd_rsp rsp = {0,};
+ glusterd_conf_t *priv = NULL;
+
+ GF_ASSERT (req);
+
+ priv = THIS->private;
+ GF_ASSERT (priv);
+
+ gf_log ("glusterd", GF_LOG_INFO, "Received getwd req");
+
+ rsp.wd = priv->workdir;
+
+ glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf1_cli_getwd_rsp);
+ ret = 0;
+
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+
+ return ret;
+}
+
+int
+glusterd_handle_getwd (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __glusterd_handle_getwd);
+}
+
+int
+__glusterd_handle_mount (rpcsvc_request_t *req)
+{
+ gf1_cli_mount_req mnt_req = {0,};
+ gf1_cli_mount_rsp rsp = {0,};
+ dict_t *dict = NULL;
+ int ret = 0;
+ glusterd_conf_t *priv = NULL;
+
+ GF_ASSERT (req);
+ priv = THIS->private;
+
+ ret = xdr_to_generic (req->msg[0], &mnt_req,
+ (xdrproc_t)xdr_gf1_cli_mount_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ gf_log ("glusterd", GF_LOG_INFO, "Received mount req");
+
+ if (mnt_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (mnt_req.dict.dict_val,
+ mnt_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_log ("glusterd", GF_LOG_ERROR,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ rsp.op_ret = -1;
+ rsp.op_errno = -EINVAL;
+ goto out;
+ } else {
+ dict->extra_stdfree = mnt_req.dict.dict_val;
+ }
+ }
+
+ synclock_unlock (&priv->big_lock);
+ rsp.op_ret = glusterd_do_mount (mnt_req.label, dict,
+ &rsp.path, &rsp.op_errno);
+ synclock_lock (&priv->big_lock);
+
+ out:
+ if (!rsp.path)
+ rsp.path = "";
+
+ glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf1_cli_mount_rsp);
+ ret = 0;
+
+ if (dict)
+ dict_unref (dict);
+ if (*rsp.path)
+ GF_FREE (rsp.path);
+
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+
+ return ret;
+}
+
+int
+glusterd_handle_mount (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __glusterd_handle_mount);
+}
+
+int
+__glusterd_handle_umount (rpcsvc_request_t *req)
+{
+ gf1_cli_umount_req umnt_req = {0,};
+ gf1_cli_umount_rsp rsp = {0,};
+ char *mountbroker_root = NULL;
+ char mntp[PATH_MAX] = {0,};
+ char *path = NULL;
+ runner_t runner = {0,};
+ int ret = 0;
+ xlator_t *this = THIS;
+ gf_boolean_t dir_ok = _gf_false;
+ char *pdir = NULL;
+ char *t = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ GF_ASSERT (req);
+ GF_ASSERT (this);
+ priv = this->private;
+
+ ret = xdr_to_generic (req->msg[0], &umnt_req,
+ (xdrproc_t)xdr_gf1_cli_umount_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ rsp.op_ret = -1;
+ goto out;
+ }
+
+ gf_log ("glusterd", GF_LOG_INFO, "Received umount req");
+
+ if (dict_get_str (this->options, "mountbroker-root",
+ &mountbroker_root) != 0) {
+ rsp.op_errno = ENOENT;
+ goto out;
+ }
+
+ /* check if it is allowed to umount path */
+ path = gf_strdup (umnt_req.path);
+ if (!path) {
+ rsp.op_errno = ENOMEM;
+ goto out;
+ }
+ dir_ok = _gf_false;
+ pdir = dirname (path);
+ t = strtail (pdir, mountbroker_root);
+ if (t && *t == '/') {
+ t = strtail(++t, MB_HIVE);
+ if (t && !*t)
+ dir_ok = _gf_true;
+ }
+ GF_FREE (path);
+ if (!dir_ok) {
+ rsp.op_errno = EACCES;
+ goto out;
+ }
+
+ runinit (&runner);
+ runner_add_args (&runner, "umount", umnt_req.path, NULL);
+ if (umnt_req.lazy)
+ runner_add_arg (&runner, "-l");
+ synclock_unlock (&priv->big_lock);
+ rsp.op_ret = runner_run (&runner);
+ synclock_lock (&priv->big_lock);
+ if (rsp.op_ret == 0) {
+ if (realpath (umnt_req.path, mntp))
+ rmdir (mntp);
+ else {
+ rsp.op_ret = -1;
+ rsp.op_errno = errno;
+ }
+ if (unlink (umnt_req.path) != 0) {
+ rsp.op_ret = -1;
+ rsp.op_errno = errno;
+ }
+ }
+
+ out:
+ if (rsp.op_errno)
+ rsp.op_ret = -1;
+
+ glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf1_cli_umount_rsp);
+ ret = 0;
+
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+
+ return ret;
+}
+
+int
+glusterd_handle_umount (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __glusterd_handle_umount);
+}
+
+int
+glusterd_friend_remove (uuid_t uuid, char *hostname)
+{
+ int ret = 0;
+ glusterd_peerinfo_t *peerinfo = NULL;
+
+ ret = glusterd_friend_find (uuid, hostname, &peerinfo);
+ if (ret)
+ goto out;
+
+ ret = glusterd_friend_remove_cleanup_vols (peerinfo->uuid);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING, "Volumes cleanup failed");
+ ret = glusterd_friend_cleanup (peerinfo);
+out:
+ gf_log ("", GF_LOG_DEBUG, "returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_rpc_create (struct rpc_clnt **rpc,
+ dict_t *options,
+ rpc_clnt_notify_t notify_fn,
+ void *notify_data)
+{
+ struct rpc_clnt *new_rpc = NULL;
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (options);
+
+ /* TODO: is 32 enough? or more ? */
+ new_rpc = rpc_clnt_new (options, this->ctx, this->name, 16);
+ if (!new_rpc)
+ goto out;
+
+ ret = rpc_clnt_register_notify (new_rpc, notify_fn, notify_data);
+ *rpc = new_rpc;
+ if (ret)
+ goto out;
+ ret = rpc_clnt_start (new_rpc);
+out:
+ if (ret) {
+ if (new_rpc) {
+ (void) rpc_clnt_unref (new_rpc);
+ }
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG, "returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_transport_keepalive_options_get (int *interval, int *time)
+{
+ int ret = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = dict_get_int32 (this->options,
+ "transport.socket.keepalive-interval",
+ interval);
+ ret = dict_get_int32 (this->options,
+ "transport.socket.keepalive-time",
+ time);
+ return 0;
+}
+
+int
+glusterd_transport_inet_options_build (dict_t **options, const char *hostname,
+ int port)
+{
+ dict_t *dict = NULL;
+ int32_t interval = -1;
+ int32_t time = -1;
+ int ret = 0;
+
+ GF_ASSERT (options);
+ GF_ASSERT (hostname);
+
+ if (!port)
+ port = GLUSTERD_DEFAULT_PORT;
+
+ /* Build default transport options */
+ ret = rpc_transport_inet_options_build (&dict, hostname, port);
+ if (ret)
+ goto out;
+
+ /* Set frame-timeout to 10mins. Default timeout of 30 mins is too long
+ * when compared to 2 mins for cli timeout. This ensures users don't
+ * wait too long after cli timesout before being able to resume normal
+ * operations
+ */
+ ret = dict_set_int32 (dict, "frame-timeout", 600);
+ if (ret) {
+ gf_log ("glusterd", GF_LOG_ERROR,
+ "Failed to set frame-timeout");
+ goto out;
+ }
+
+ /* Set keepalive options */
+ glusterd_transport_keepalive_options_get (&interval, &time);
+
+ if ((interval > 0) || (time > 0))
+ ret = rpc_transport_keepalive_options_set (dict, interval, time);
+ *options = dict;
+out:
+ gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_friend_rpc_create (xlator_t *this, glusterd_peerinfo_t *peerinfo,
+ glusterd_peerctx_args_t *args)
+{
+ dict_t *options = NULL;
+ int ret = -1;
+ glusterd_peerctx_t *peerctx = NULL;
+ data_t *data = NULL;
+
+ peerctx = GF_CALLOC (1, sizeof (*peerctx), gf_gld_mt_peerctx_t);
+ if (!peerctx)
+ goto out;
+
+ if (args)
+ peerctx->args = *args;
+
+ peerctx->peerinfo = peerinfo;
+
+ ret = glusterd_transport_inet_options_build (&options,
+ peerinfo->hostname,
+ peerinfo->port);
+ if (ret)
+ goto out;
+
+ /*
+ * For simulated multi-node testing, we need to make sure that we
+ * create our RPC endpoint with the same address that the peer would
+ * use to reach us.
+ */
+ if (this->options) {
+ data = dict_get(this->options,"transport.socket.bind-address");
+ if (data) {
+ ret = dict_set(options,
+ "transport.socket.source-addr",data);
+ }
+ }
+
+ ret = glusterd_rpc_create (&peerinfo->rpc, options,
+ glusterd_peer_rpc_notify, peerctx);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to create rpc for"
+ " peer %s", peerinfo->hostname);
+ goto out;
+ }
+ peerctx = NULL;
+ ret = 0;
+out:
+ GF_FREE (peerctx);
+ return ret;
+}
+
+int
+glusterd_friend_add (const char *hoststr, int port,
+ glusterd_friend_sm_state_t state,
+ uuid_t *uuid,
+ glusterd_peerinfo_t **friend,
+ gf_boolean_t restore,
+ glusterd_peerctx_args_t *args)
+{
+ int ret = 0;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ this = THIS;
+ conf = this->private;
+ GF_ASSERT (conf);
+ GF_ASSERT (hoststr);
+
+ ret = glusterd_peerinfo_new (friend, state, uuid, hoststr, port);
+ if (ret) {
+ goto out;
+ }
+
+ /*
+ * We can't add to the list after calling glusterd_friend_rpc_create,
+ * even if it succeeds, because by then the callback to take it back
+ * off and free might have happened already (notably in the case of an
+ * invalid peer name). That would mean we're adding something that had
+ * just been free, and we're likely to crash later.
+ */
+ list_add_tail (&(*friend)->uuid_list, &conf->peers);
+
+ //restore needs to first create the list of peers, then create rpcs
+ //to keep track of quorum in race-free manner. In restore for each peer
+ //rpc-create calls rpc_notify when the friend-list is partially
+ //constructed, leading to wrong quorum calculations.
+ if (!restore) {
+ ret = glusterd_store_peerinfo (*friend);
+ if (ret == 0) {
+ synclock_unlock (&conf->big_lock);
+ ret = glusterd_friend_rpc_create (this, *friend, args);
+ synclock_lock (&conf->big_lock);
+ }
+ else {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to store peerinfo");
+ }
+ }
+
+ if (ret) {
+ (void) glusterd_friend_cleanup (*friend);
+ *friend = NULL;
+ }
+
+out:
+ gf_log (this->name, GF_LOG_INFO, "connect returned %d", ret);
+ return ret;
+}
+
+int
+glusterd_probe_begin (rpcsvc_request_t *req, const char *hoststr, int port,
+ dict_t *dict)
+{
+ int ret = -1;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_peerctx_args_t args = {0};
+ glusterd_friend_sm_event_t *event = NULL;
+
+ GF_ASSERT (hoststr);
+
+ ret = glusterd_friend_find (NULL, (char *)hoststr, &peerinfo);
+
+ if (ret) {
+ gf_log ("glusterd", GF_LOG_INFO, "Unable to find peerinfo"
+ " for host: %s (%d)", hoststr, port);
+ args.mode = GD_MODE_ON;
+ args.req = req;
+ args.dict = dict;
+ ret = glusterd_friend_add ((char *)hoststr, port,
+ GD_FRIEND_STATE_DEFAULT,
+ NULL, &peerinfo, 0, &args);
+ if ((!ret) && (!peerinfo->connected)) {
+ ret = GLUSTERD_CONNECTION_AWAITED;
+ }
+
+ } else if (peerinfo->connected &&
+ (GD_FRIEND_STATE_BEFRIENDED == peerinfo->state.state)) {
+ ret = glusterd_friend_hostname_update (peerinfo, (char*)hoststr,
+ _gf_false);
+ if (ret)
+ goto out;
+ //this is just to rename so inject local acc for cluster update
+ ret = glusterd_friend_sm_new_event (GD_FRIEND_EVENT_LOCAL_ACC,
+ &event);
+ if (!ret) {
+ event->peerinfo = peerinfo;
+ ret = glusterd_friend_sm_inject_event (event);
+ glusterd_xfer_cli_probe_resp (req, 0, GF_PROBE_SUCCESS,
+ NULL, (char*)hoststr,
+ port, dict);
+ }
+ } else {
+ glusterd_xfer_cli_probe_resp (req, 0, GF_PROBE_FRIEND, NULL,
+ (char*)hoststr, port, dict);
+ }
+
+out:
+ gf_log ("", GF_LOG_DEBUG, "returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_deprobe_begin (rpcsvc_request_t *req, const char *hoststr, int port,
+ uuid_t uuid, dict_t *dict)
+{
+ int ret = -1;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_friend_sm_event_t *event = NULL;
+ glusterd_probe_ctx_t *ctx = NULL;
+
+ GF_ASSERT (hoststr);
+ GF_ASSERT (req);
+
+ ret = glusterd_friend_find (uuid, (char *)hoststr, &peerinfo);
+
+ if (ret) {
+ gf_log ("glusterd", GF_LOG_INFO, "Unable to find peerinfo"
+ " for host: %s %d", hoststr, port);
+ goto out;
+ }
+
+ if (!peerinfo->rpc) {
+ //handle this case
+ goto out;
+ }
+
+ ret = glusterd_friend_sm_new_event
+ (GD_FRIEND_EVENT_INIT_REMOVE_FRIEND, &event);
+
+ if (ret) {
+ gf_log ("glusterd", GF_LOG_ERROR,
+ "Unable to get new event");
+ return ret;
+ }
+
+ ctx = GF_CALLOC (1, sizeof(*ctx), gf_gld_mt_probe_ctx_t);
+
+ if (!ctx) {
+ goto out;
+ }
+
+ ctx->hostname = gf_strdup (hoststr);
+ ctx->port = port;
+ ctx->req = req;
+ ctx->dict = dict;
+
+ event->ctx = ctx;
+
+ event->peerinfo = peerinfo;
+
+ ret = glusterd_friend_sm_inject_event (event);
+
+ if (ret) {
+ gf_log ("glusterd", GF_LOG_ERROR, "Unable to inject event %d, "
+ "ret = %d", event->event, ret);
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+
+int
+glusterd_xfer_friend_remove_resp (rpcsvc_request_t *req, char *hostname, int port)
+{
+ gd1_mgmt_friend_rsp rsp = {{0}, };
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ GF_ASSERT (hostname);
+
+ rsp.op_ret = 0;
+ this = THIS;
+ GF_ASSERT (this);
+
+ conf = this->private;
+
+ uuid_copy (rsp.uuid, MY_UUID);
+ rsp.hostname = hostname;
+ rsp.port = port;
+ ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_friend_rsp);
+
+ gf_log ("glusterd", GF_LOG_INFO,
+ "Responded to %s (%d), ret: %d", hostname, port, ret);
+ return ret;
+}
+
+
+int
+glusterd_xfer_friend_add_resp (rpcsvc_request_t *req, char *myhostname,
+ char *remote_hostname, int port, int32_t op_ret,
+ int32_t op_errno)
+{
+ gd1_mgmt_friend_rsp rsp = {{0}, };
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ GF_ASSERT (myhostname);
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ conf = this->private;
+
+ uuid_copy (rsp.uuid, MY_UUID);
+ rsp.op_ret = op_ret;
+ rsp.op_errno = op_errno;
+ rsp.hostname = gf_strdup (myhostname);
+ rsp.port = port;
+
+ ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_friend_rsp);
+
+ gf_log ("glusterd", GF_LOG_INFO,
+ "Responded to %s (%d), ret: %d", remote_hostname, port, ret);
+ GF_FREE (rsp.hostname);
+ return ret;
+}
+
+static void
+set_probe_error_str (int op_ret, int op_errno, char *op_errstr, char *errstr,
+ size_t len, char *hostname, int port)
+{
+ if ((op_errstr) && (strcmp (op_errstr, ""))) {
+ snprintf (errstr, len, "%s", op_errstr);
+ return;
+ }
+
+ if (!op_ret) {
+ switch (op_errno) {
+ case GF_PROBE_LOCALHOST:
+ snprintf (errstr, len, "Probe on localhost not "
+ "needed");
+ break;
+
+ case GF_PROBE_FRIEND:
+ snprintf (errstr, len, "Host %s port %d already"
+ " in peer list", hostname, port);
+ break;
+
+ default:
+ if (op_errno != 0)
+ snprintf (errstr, len, "Probe returned "
+ "with unknown errno %d",
+ op_errno);
+ break;
+ }
+ } else {
+ switch (op_errno) {
+ case GF_PROBE_ANOTHER_CLUSTER:
+ snprintf (errstr, len, "%s is already part of "
+ "another cluster", hostname);
+ break;
+
+ case GF_PROBE_VOLUME_CONFLICT:
+ snprintf (errstr, len, "Atleast one volume on "
+ "%s conflicts with existing volumes "
+ "in the cluster", hostname);
+ break;
+
+ case GF_PROBE_UNKNOWN_PEER:
+ snprintf (errstr, len, "%s responded with "
+ "'unknown peer' error, this could "
+ "happen if %s doesn't have localhost "
+ "in its peer database", hostname,
+ hostname);
+ break;
+
+ case GF_PROBE_ADD_FAILED:
+ snprintf (errstr, len, "Failed to add peer "
+ "information on %s", hostname);
+ break;
+
+ case GF_PROBE_SAME_UUID:
+ snprintf (errstr, len, "Peer uuid (host %s) is "
+ "same as local uuid", hostname);
+ break;
+
+ case GF_PROBE_QUORUM_NOT_MET:
+ snprintf (errstr, len, "Cluster quorum is not "
+ "met. Changing peers is not allowed "
+ "in this state");
+ break;
+
+ default:
+ snprintf (errstr, len, "Probe returned with "
+ "unknown errno %d", op_errno);
+ break;
+ }
+ }
+}
+
+int
+glusterd_xfer_cli_probe_resp (rpcsvc_request_t *req, int32_t op_ret,
+ int32_t op_errno, char *op_errstr, char *hostname,
+ int port, dict_t *dict)
+{
+ gf_cli_rsp rsp = {0,};
+ int32_t ret = -1;
+ char errstr[2048] = {0,};
+ char *cmd_str = NULL;
+ xlator_t *this = THIS;
+
+ GF_ASSERT (req);
+ GF_ASSERT (this);
+
+ (void) set_probe_error_str (op_ret, op_errno, op_errstr, errstr,
+ sizeof (errstr), hostname, port);
+
+ if (dict) {
+ ret = dict_get_str (dict, "cmd-str", &cmd_str);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get "
+ "command string");
+ }
+
+ rsp.op_ret = op_ret;
+ rsp.op_errno = op_errno;
+ rsp.op_errstr = (errstr[0] != '\0') ? errstr : "";
+
+ gf_cmd_log ("", "%s : %s %s %s", cmd_str,
+ (op_ret) ? "FAILED" : "SUCCESS",
+ (errstr[0] != '\0') ? ":" : " ",
+ (errstr[0] != '\0') ? errstr : " ");
+
+ ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_cli_rsp);
+
+ if (dict)
+ dict_unref (dict);
+ gf_log (this->name, GF_LOG_DEBUG, "Responded to CLI, ret: %d",ret);
+
+ return ret;
+}
+
+static void
+set_deprobe_error_str (int op_ret, int op_errno, char *op_errstr, char *errstr,
+ size_t len, char *hostname)
+{
+ if ((op_errstr) && (strcmp (op_errstr, ""))) {
+ snprintf (errstr, len, "%s", op_errstr);
+ return;
+ }
+
+ if (op_ret) {
+ switch (op_errno) {
+ case GF_DEPROBE_LOCALHOST:
+ snprintf (errstr, len, "%s is localhost",
+ hostname);
+ break;
+
+ case GF_DEPROBE_NOT_FRIEND:
+ snprintf (errstr, len, "%s is not part of "
+ "cluster", hostname);
+ break;
+
+ case GF_DEPROBE_BRICK_EXIST:
+ snprintf (errstr, len, "Brick(s) with the peer "
+ "%s exist in cluster", hostname);
+ break;
+
+ case GF_DEPROBE_FRIEND_DOWN:
+ snprintf (errstr, len, "One of the peers is "
+ "probably down. Check with "
+ "'peer status'");
+ break;
+
+ case GF_DEPROBE_QUORUM_NOT_MET:
+ snprintf (errstr, len, "Cluster quorum is not "
+ "met. Changing peers is not allowed "
+ "in this state");
+ break;
+
+ default:
+ snprintf (errstr, len, "Detach returned with "
+ "unknown errno %d", op_errno);
+ break;
+
+ }
+ }
+}
+
+
+int
+glusterd_xfer_cli_deprobe_resp (rpcsvc_request_t *req, int32_t op_ret,
+ int32_t op_errno, char *op_errstr,
+ char *hostname, dict_t *dict)
+{
+ gf_cli_rsp rsp = {0,};
+ int32_t ret = -1;
+ char *cmd_str = NULL;
+ char errstr[2048] = {0,};
+
+ GF_ASSERT (req);
+
+ (void) set_deprobe_error_str (op_ret, op_errno, op_errstr, errstr,
+ sizeof (errstr), hostname);
+
+ if (dict) {
+ ret = dict_get_str (dict, "cmd-str", &cmd_str);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_ERROR, "Failed to get "
+ "command string");
+ }
+
+ rsp.op_ret = op_ret;
+ rsp.op_errno = op_errno;
+ rsp.op_errstr = (errstr[0] != '\0') ? errstr : "";
+
+ gf_cmd_log ("", "%s : %s %s %s", cmd_str,
+ (op_ret) ? "FAILED" : "SUCCESS",
+ (errstr[0] != '\0') ? ":" : " ",
+ (errstr[0] != '\0') ? errstr : " ");
+
+ ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_cli_rsp);
+
+ gf_log (THIS->name, GF_LOG_DEBUG, "Responded to CLI, ret: %d",ret);
+
+ return ret;
+}
+
+int32_t
+glusterd_list_friends (rpcsvc_request_t *req, dict_t *dict, int32_t flags)
+{
+ int32_t ret = -1;
+ glusterd_conf_t *priv = NULL;
+ glusterd_peerinfo_t *entry = NULL;
+ int32_t count = 0;
+ dict_t *friends = NULL;
+ gf1_cli_peer_list_rsp rsp = {0,};
+ char my_uuid_str[64] = {0,};
+ char key[256] = {0,};
+
+ priv = THIS->private;
+ GF_ASSERT (priv);
+
+ friends = dict_new ();
+ if (!friends) {
+ gf_log ("", GF_LOG_WARNING, "Out of Memory");
+ goto out;
+ }
+ if (!list_empty (&priv->peers)) {
+ list_for_each_entry (entry, &priv->peers, uuid_list) {
+ count++;
+ ret = glusterd_add_peer_detail_to_dict (entry,
+ friends, count);
+ if (ret)
+ goto out;
+ }
+ }
+
+ if (flags == GF_CLI_LIST_POOL_NODES) {
+ count++;
+ snprintf (key, 256, "friend%d.uuid", count);
+ uuid_utoa_r (MY_UUID, my_uuid_str);
+ ret = dict_set_str (friends, key, my_uuid_str);
+ if (ret)
+ goto out;
+
+ snprintf (key, 256, "friend%d.hostname", count);
+ ret = dict_set_str (friends, key, "localhost");
+ if (ret)
+ goto out;
+
+ snprintf (key, 256, "friend%d.connected", count);
+ ret = dict_set_int32 (friends, key, 1);
+ if (ret)
+ goto out;
+ }
+
+ ret = dict_set_int32 (friends, "count", count);
+ if (ret)
+ goto out;
+
+ ret = dict_allocate_and_serialize (friends, &rsp.friends.friends_val,
+ &rsp.friends.friends_len);
+
+ if (ret)
+ goto out;
+
+ ret = 0;
+out:
+
+ if (friends)
+ dict_unref (friends);
+
+ rsp.op_ret = ret;
+
+ glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf1_cli_peer_list_rsp);
+ ret = 0;
+ GF_FREE (rsp.friends.friends_val);
+
+ return ret;
+}
+
+int32_t
+glusterd_get_volumes (rpcsvc_request_t *req, dict_t *dict, int32_t flags)
+{
+ int32_t ret = -1;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *entry = NULL;
+ int32_t count = 0;
+ dict_t *volumes = NULL;
+ gf_cli_rsp rsp = {0,};
+ char *volname = NULL;
+
+ priv = THIS->private;
+ GF_ASSERT (priv);
+
+ volumes = dict_new ();
+ if (!volumes) {
+ gf_log ("", GF_LOG_WARNING, "Out of Memory");
+ goto out;
+ }
+
+ if (list_empty (&priv->volumes)) {
+ ret = 0;
+ goto respond;
+ }
+
+ if (flags == GF_CLI_GET_VOLUME_ALL) {
+ list_for_each_entry (entry, &priv->volumes, vol_list) {
+ ret = glusterd_add_volume_detail_to_dict (entry,
+ volumes, count);
+ if (ret)
+ goto respond;
+
+ count++;
+
+ }
+
+ } else if (flags == GF_CLI_GET_NEXT_VOLUME) {
+ ret = dict_get_str (dict, "volname", &volname);
+
+ if (ret) {
+ if (priv->volumes.next) {
+ entry = list_entry (priv->volumes.next,
+ typeof (*entry),
+ vol_list);
+ }
+ } else {
+ ret = glusterd_volinfo_find (volname, &entry);
+ if (ret)
+ goto respond;
+ entry = list_entry (entry->vol_list.next,
+ typeof (*entry),
+ vol_list);
+ }
+
+ if (&entry->vol_list == &priv->volumes) {
+ goto respond;
+ } else {
+ ret = glusterd_add_volume_detail_to_dict (entry,
+ volumes, count);
+ if (ret)
+ goto respond;
+
+ count++;
+ }
+ } else if (flags == GF_CLI_GET_VOLUME) {
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret)
+ goto respond;
+
+ ret = glusterd_volinfo_find (volname, &entry);
+ if (ret)
+ goto respond;
+
+ ret = glusterd_add_volume_detail_to_dict (entry,
+ volumes, count);
+ if (ret)
+ goto respond;
+
+ count++;
+ }
+
+respond:
+ ret = dict_set_int32 (volumes, "count", count);
+ if (ret)
+ goto out;
+ ret = dict_allocate_and_serialize (volumes, &rsp.dict.dict_val,
+ &rsp.dict.dict_len);
+
+ if (ret)
+ goto out;
+
+ ret = 0;
+out:
+ rsp.op_ret = ret;
+
+ rsp.op_errstr = "";
+ glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_cli_rsp);
+ ret = 0;
+
+ if (volumes)
+ dict_unref (volumes);
+
+ GF_FREE (rsp.dict.dict_val);
+ return ret;
+}
+
+int
+__glusterd_handle_status_volume (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ uint32_t cmd = 0;
+ dict_t *dict = NULL;
+ char *volname = 0;
+ gf_cli_req cli_req = {{0,}};
+ glusterd_op_t cli_op = GD_OP_STATUS_VOLUME;
+ char err_str[2048] = {0,};
+ xlator_t *this = NULL;
+
+ GF_ASSERT (req);
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len > 0) {
+ dict = dict_new();
+ if (!dict)
+ goto out;
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len, &dict);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to "
+ "unserialize buffer");
+ snprintf (err_str, sizeof (err_str), "Unable to decode "
+ "the command");
+ goto out;
+ }
+
+ }
+
+ ret = dict_get_uint32 (dict, "cmd", &cmd);
+ if (ret)
+ goto out;
+
+ if (!(cmd & GF_CLI_STATUS_ALL)) {
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Unable to get "
+ "volume name");
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+ gf_log (this->name, GF_LOG_INFO,
+ "Received status volume req for volume %s", volname);
+
+ }
+
+ ret = glusterd_op_begin_synctask (req, GD_OP_STATUS_VOLUME, dict);
+
+out:
+
+ if (ret) {
+ if (err_str[0] == '\0')
+ snprintf (err_str, sizeof (err_str),
+ "Operation failed");
+ ret = glusterd_op_send_cli_response (cli_op, ret, 0, req,
+ dict, err_str);
+ }
+ free (cli_req.dict.dict_val);
+
+ return ret;
+}
+
+int
+glusterd_handle_status_volume (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_handle_status_volume);
+}
+
+int
+__glusterd_handle_cli_clearlocks_volume (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{0,}};
+ glusterd_op_t cli_op = GD_OP_CLEARLOCKS_VOLUME;
+ char *volname = NULL;
+ dict_t *dict = NULL;
+ char err_str[2048] = {0,};
+ xlator_t *this = NULL;
+
+ GF_ASSERT (req);
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = -1;
+ ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len) {
+ dict = dict_new ();
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to unserialize req-buffer to"
+ " dictionary");
+ snprintf (err_str, sizeof (err_str), "unable to decode "
+ "the command");
+ goto out;
+ }
+
+ } else {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR, "Empty cli request.");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Unable to get volume "
+ "name");
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_INFO, "Received clear-locks volume req "
+ "for volume %s", volname);
+
+ ret = glusterd_op_begin_synctask (req, GD_OP_CLEARLOCKS_VOLUME, dict);
+
+out:
+ if (ret) {
+ if (err_str[0] == '\0')
+ snprintf (err_str, sizeof (err_str),
+ "Operation failed");
+ ret = glusterd_op_send_cli_response (cli_op, ret, 0, req,
+ dict, err_str);
+ }
+ free (cli_req.dict.dict_val);
+
+ return ret;
+}
+
+int
+glusterd_handle_cli_clearlocks_volume (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_handle_cli_clearlocks_volume);
+}
+
+static int
+get_brickinfo_from_brickid (char *brickid, glusterd_brickinfo_t **brickinfo)
+{
+ glusterd_volinfo_t *volinfo = NULL;
+ char *volid_str = NULL;
+ char *brick = NULL;
+ char *brickid_dup = NULL;
+ uuid_t volid = {0};
+ int ret = -1;
+
+ brickid_dup = gf_strdup (brickid);
+ if (!brickid_dup)
+ goto out;
+
+ volid_str = brickid_dup;
+ brick = strchr (brickid_dup, ':');
+ *brick = '\0';
+ brick++;
+ if (!volid_str || !brick)
+ goto out;
+
+ uuid_parse (volid_str, volid);
+ ret = glusterd_volinfo_find_by_volume_id (volid, &volinfo);
+ if (ret) {
+ /* Check if it a snapshot volume */
+ ret = glusterd_snap_volinfo_find_by_volume_id (volid, &volinfo);
+ if (ret)
+ goto out;
+ }
+
+ ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo,
+ brickinfo);
+ if (ret)
+ goto out;
+
+ ret = 0;
+out:
+ GF_FREE (brickid_dup);
+ return ret;
+}
+
+int
+__glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event, void *data)
+{
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ int ret = 0;
+ char *brickid = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+
+ brickid = mydata;
+ if (!brickid)
+ return 0;
+
+ ret = get_brickinfo_from_brickid (brickid, &brickinfo);
+ if (ret)
+ return 0;
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ switch (event) {
+ case RPC_CLNT_CONNECT:
+ gf_log (this->name, GF_LOG_DEBUG, "Connected to %s:%s",
+ brickinfo->hostname, brickinfo->path);
+ glusterd_set_brick_status (brickinfo, GF_BRICK_STARTED);
+ ret = default_notify (this, GF_EVENT_CHILD_UP, NULL);
+
+ break;
+
+ case RPC_CLNT_DISCONNECT:
+ if (GF_BRICK_STARTED == brickinfo->status)
+ gf_log (this->name, GF_LOG_INFO, "Disconnected from "
+ "%s:%s", brickinfo->hostname, brickinfo->path);
+
+ glusterd_set_brick_status (brickinfo, GF_BRICK_STOPPED);
+ if (rpc_clnt_is_disabled (rpc))
+ GF_FREE (brickid);
+ break;
+
+ default:
+ gf_log (this->name, GF_LOG_TRACE,
+ "got some other RPC event %d", event);
+ break;
+ }
+
+ return ret;
+}
+
+int
+glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event, void *data)
+{
+ return glusterd_big_locked_notify (rpc, mydata, event, data,
+ __glusterd_brick_rpc_notify);
+}
+
+int
+__glusterd_nodesvc_rpc_notify (struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event, void *data)
+{
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ char *server = NULL;
+ int ret = 0;
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ server = mydata;
+ if (!server)
+ return 0;
+
+ switch (event) {
+ case RPC_CLNT_CONNECT:
+ gf_log (this->name, GF_LOG_DEBUG, "got RPC_CLNT_CONNECT");
+ (void) glusterd_nodesvc_set_online_status (server, _gf_true);
+ ret = default_notify (this, GF_EVENT_CHILD_UP, NULL);
+
+ break;
+
+ case RPC_CLNT_DISCONNECT:
+ gf_log (this->name, GF_LOG_DEBUG, "got RPC_CLNT_DISCONNECT");
+ (void) glusterd_nodesvc_set_online_status (server, _gf_false);
+ break;
+
+ default:
+ gf_log (this->name, GF_LOG_TRACE,
+ "got some other RPC event %d", event);
+ break;
+ }
+
+ return ret;
+}
+
+int
+glusterd_nodesvc_rpc_notify (struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event, void *data)
+{
+ return glusterd_big_locked_notify (rpc, mydata, event, data,
+ __glusterd_nodesvc_rpc_notify);
+}
+
+int
+glusterd_friend_remove_notify (glusterd_peerctx_t *peerctx)
+{
+ int ret = -1;
+ glusterd_friend_sm_event_t *new_event = NULL;
+ glusterd_peerinfo_t *peerinfo = peerctx->peerinfo;
+ rpcsvc_request_t *req = peerctx->args.req;
+ char *errstr = peerctx->errstr;
+ dict_t *dict = NULL;
+
+ GF_ASSERT (peerctx);
+
+ peerinfo = peerctx->peerinfo;
+ req = peerctx->args.req;
+ dict = peerctx->args.dict;
+ errstr = peerctx->errstr;
+
+ ret = glusterd_friend_sm_new_event (GD_FRIEND_EVENT_REMOVE_FRIEND,
+ &new_event);
+ if (!ret) {
+ if (!req) {
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "Unable to find the request for responding "
+ "to User (%s)", peerinfo->hostname);
+ goto out;
+ }
+
+ glusterd_xfer_cli_probe_resp (req, -1, ENOTCONN, errstr,
+ peerinfo->hostname,
+ peerinfo->port, dict);
+
+ new_event->peerinfo = peerinfo;
+ ret = glusterd_friend_sm_inject_event (new_event);
+
+ } else {
+ gf_log ("glusterd", GF_LOG_ERROR,
+ "Unable to create event for removing peer %s",
+ peerinfo->hostname);
+ }
+
+out:
+ return ret;
+}
+
+int
+__glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event, void *data)
+{
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ int ret = 0;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_peerctx_t *peerctx = NULL;
+ gf_boolean_t quorum_action = _gf_false;
+ glusterd_volinfo_t *volinfo = NULL;
+
+ peerctx = mydata;
+ if (!peerctx)
+ return 0;
+
+ peerinfo = peerctx->peerinfo;
+ this = THIS;
+ conf = this->private;
+
+ switch (event) {
+ case RPC_CLNT_CONNECT:
+ {
+ gf_log (this->name, GF_LOG_DEBUG, "got RPC_CLNT_CONNECT");
+ peerinfo->connected = 1;
+ peerinfo->quorum_action = _gf_true;
+
+ ret = glusterd_peer_dump_version (this, rpc, peerctx);
+ if (ret)
+ gf_log ("", GF_LOG_ERROR, "glusterd handshake failed");
+ break;
+ }
+
+ case RPC_CLNT_DISCONNECT:
+ {
+ gf_log (this->name, GF_LOG_DEBUG, "got RPC_CLNT_DISCONNECT %d",
+ peerinfo->state.state);
+
+ if (peerinfo->connected) {
+ list_for_each_entry (volinfo, &conf->volumes, vol_list) {
+ ret = glusterd_mgmt_v3_unlock (volinfo->volname,
+ peerinfo->uuid,
+ "vol");
+ if (ret)
+ gf_log (this->name, GF_LOG_TRACE,
+ "Lock not released for %s",
+ volinfo->volname);
+ }
+
+ ret = 0;
+ }
+
+ if ((peerinfo->quorum_contrib != QUORUM_DOWN) &&
+ (peerinfo->state.state == GD_FRIEND_STATE_BEFRIENDED)) {
+ peerinfo->quorum_contrib = QUORUM_DOWN;
+ quorum_action = _gf_true;
+ peerinfo->quorum_action = _gf_false;
+ }
+
+ /* Remove peer if it is not a friend and connection/handshake
+ * fails, and notify cli. Happens only during probe.
+ */
+ if (peerinfo->state.state == GD_FRIEND_STATE_DEFAULT) {
+ glusterd_friend_remove_notify (peerctx);
+ goto out;
+ }
+
+ peerinfo->connected = 0;
+ break;
+ }
+ default:
+ gf_log (this->name, GF_LOG_TRACE,
+ "got some other RPC event %d", event);
+ ret = 0;
+ break;
+ }
+
+out:
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+ if (quorum_action)
+ glusterd_do_quorum_action ();
+ return ret;
+}
+
+int
+glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event, void *data)
+{
+ return glusterd_big_locked_notify (rpc, mydata, event, data,
+ __glusterd_peer_rpc_notify);
+}
+
+int
+glusterd_null (rpcsvc_request_t *req)
+{
+
+ return 0;
+}
+
+rpcsvc_actor_t gd_svc_mgmt_actors[] = {
+ [GLUSTERD_MGMT_NULL] = { "NULL", GLUSTERD_MGMT_NULL, glusterd_null, NULL, 0, DRC_NA},
+ [GLUSTERD_MGMT_CLUSTER_LOCK] = { "CLUSTER_LOCK", GLUSTERD_MGMT_CLUSTER_LOCK, glusterd_handle_cluster_lock, NULL, 0, DRC_NA},
+ [GLUSTERD_MGMT_CLUSTER_UNLOCK] = { "CLUSTER_UNLOCK", GLUSTERD_MGMT_CLUSTER_UNLOCK, glusterd_handle_cluster_unlock, NULL, 0, DRC_NA},
+ [GLUSTERD_MGMT_STAGE_OP] = { "STAGE_OP", GLUSTERD_MGMT_STAGE_OP, glusterd_handle_stage_op, NULL, 0, DRC_NA},
+ [GLUSTERD_MGMT_COMMIT_OP] = { "COMMIT_OP", GLUSTERD_MGMT_COMMIT_OP, glusterd_handle_commit_op, NULL, 0, DRC_NA},
+};
+
+struct rpcsvc_program gd_svc_mgmt_prog = {
+ .progname = "GlusterD svc mgmt",
+ .prognum = GD_MGMT_PROGRAM,
+ .progver = GD_MGMT_VERSION,
+ .numactors = GLUSTERD_MGMT_MAXVALUE,
+ .actors = gd_svc_mgmt_actors,
+ .synctask = _gf_true,
+};
+
+rpcsvc_actor_t gd_svc_peer_actors[] = {
+ [GLUSTERD_FRIEND_NULL] = { "NULL", GLUSTERD_MGMT_NULL, glusterd_null, NULL, 0, DRC_NA},
+ [GLUSTERD_PROBE_QUERY] = { "PROBE_QUERY", GLUSTERD_PROBE_QUERY, glusterd_handle_probe_query, NULL, 0, DRC_NA},
+ [GLUSTERD_FRIEND_ADD] = { "FRIEND_ADD", GLUSTERD_FRIEND_ADD, glusterd_handle_incoming_friend_req, NULL, 0, DRC_NA},
+ [GLUSTERD_FRIEND_REMOVE] = { "FRIEND_REMOVE", GLUSTERD_FRIEND_REMOVE, glusterd_handle_incoming_unfriend_req, NULL, 0, DRC_NA},
+ [GLUSTERD_FRIEND_UPDATE] = { "FRIEND_UPDATE", GLUSTERD_FRIEND_UPDATE, glusterd_handle_friend_update, NULL, 0, DRC_NA},
+};
+
+struct rpcsvc_program gd_svc_peer_prog = {
+ .progname = "GlusterD svc peer",
+ .prognum = GD_FRIEND_PROGRAM,
+ .progver = GD_FRIEND_VERSION,
+ .numactors = GLUSTERD_FRIEND_MAXVALUE,
+ .actors = gd_svc_peer_actors,
+ .synctask = _gf_false,
+};
+
+
+
+rpcsvc_actor_t gd_svc_cli_actors[] = {
+ [GLUSTER_CLI_PROBE] = { "CLI_PROBE", GLUSTER_CLI_PROBE, glusterd_handle_cli_probe, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_CREATE_VOLUME] = { "CLI_CREATE_VOLUME", GLUSTER_CLI_CREATE_VOLUME, glusterd_handle_create_volume, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_DEFRAG_VOLUME] = { "CLI_DEFRAG_VOLUME", GLUSTER_CLI_DEFRAG_VOLUME, glusterd_handle_defrag_volume, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_DEPROBE] = { "FRIEND_REMOVE", GLUSTER_CLI_DEPROBE, glusterd_handle_cli_deprobe, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_LIST_FRIENDS] = { "LIST_FRIENDS", GLUSTER_CLI_LIST_FRIENDS, glusterd_handle_cli_list_friends, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_UUID_RESET] = { "UUID_RESET", GLUSTER_CLI_UUID_RESET, glusterd_handle_cli_uuid_reset, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_UUID_GET] = { "UUID_GET", GLUSTER_CLI_UUID_GET, glusterd_handle_cli_uuid_get, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_START_VOLUME] = { "START_VOLUME", GLUSTER_CLI_START_VOLUME, glusterd_handle_cli_start_volume, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_STOP_VOLUME] = { "STOP_VOLUME", GLUSTER_CLI_STOP_VOLUME, glusterd_handle_cli_stop_volume, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_DELETE_VOLUME] = { "DELETE_VOLUME", GLUSTER_CLI_DELETE_VOLUME, glusterd_handle_cli_delete_volume, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_GET_VOLUME] = { "GET_VOLUME", GLUSTER_CLI_GET_VOLUME, glusterd_handle_cli_get_volume, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_ADD_BRICK] = { "ADD_BRICK", GLUSTER_CLI_ADD_BRICK, glusterd_handle_add_brick, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_REPLACE_BRICK] = { "REPLACE_BRICK", GLUSTER_CLI_REPLACE_BRICK, glusterd_handle_replace_brick, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_REMOVE_BRICK] = { "REMOVE_BRICK", GLUSTER_CLI_REMOVE_BRICK, glusterd_handle_remove_brick, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_LOG_ROTATE] = { "LOG FILENAME", GLUSTER_CLI_LOG_ROTATE, glusterd_handle_log_rotate, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_SET_VOLUME] = { "SET_VOLUME", GLUSTER_CLI_SET_VOLUME, glusterd_handle_set_volume, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_SYNC_VOLUME] = { "SYNC_VOLUME", GLUSTER_CLI_SYNC_VOLUME, glusterd_handle_sync_volume, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_RESET_VOLUME] = { "RESET_VOLUME", GLUSTER_CLI_RESET_VOLUME, glusterd_handle_reset_volume, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_FSM_LOG] = { "FSM_LOG", GLUSTER_CLI_FSM_LOG, glusterd_handle_fsm_log, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_GSYNC_SET] = { "GSYNC_SET", GLUSTER_CLI_GSYNC_SET, glusterd_handle_gsync_set, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_PROFILE_VOLUME] = { "STATS_VOLUME", GLUSTER_CLI_PROFILE_VOLUME, glusterd_handle_cli_profile_volume, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_QUOTA] = { "QUOTA", GLUSTER_CLI_QUOTA, glusterd_handle_quota, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_GETWD] = { "GETWD", GLUSTER_CLI_GETWD, glusterd_handle_getwd, NULL, 1, DRC_NA},
+ [GLUSTER_CLI_STATUS_VOLUME] = {"STATUS_VOLUME", GLUSTER_CLI_STATUS_VOLUME, glusterd_handle_status_volume, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_MOUNT] = { "MOUNT", GLUSTER_CLI_MOUNT, glusterd_handle_mount, NULL, 1, DRC_NA},
+ [GLUSTER_CLI_UMOUNT] = { "UMOUNT", GLUSTER_CLI_UMOUNT, glusterd_handle_umount, NULL, 1, DRC_NA},
+ [GLUSTER_CLI_HEAL_VOLUME] = { "HEAL_VOLUME", GLUSTER_CLI_HEAL_VOLUME, glusterd_handle_cli_heal_volume, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_STATEDUMP_VOLUME] = {"STATEDUMP_VOLUME", GLUSTER_CLI_STATEDUMP_VOLUME, glusterd_handle_cli_statedump_volume, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_LIST_VOLUME] = {"LIST_VOLUME", GLUSTER_CLI_LIST_VOLUME, glusterd_handle_cli_list_volume, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_CLRLOCKS_VOLUME] = {"CLEARLOCKS_VOLUME", GLUSTER_CLI_CLRLOCKS_VOLUME, glusterd_handle_cli_clearlocks_volume, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_COPY_FILE] = {"COPY_FILE", GLUSTER_CLI_COPY_FILE, glusterd_handle_copy_file, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_SYS_EXEC] = {"SYS_EXEC", GLUSTER_CLI_SYS_EXEC, glusterd_handle_sys_exec, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_SNAP] = {"SNAP", GLUSTER_CLI_SNAP, glusterd_handle_snapshot, NULL, 0, DRC_NA},
+};
+
+struct rpcsvc_program gd_svc_cli_prog = {
+ .progname = "GlusterD svc cli",
+ .prognum = GLUSTER_CLI_PROGRAM,
+ .progver = GLUSTER_CLI_VERSION,
+ .numactors = GLUSTER_CLI_MAXVALUE,
+ .actors = gd_svc_cli_actors,
+ .synctask = _gf_true,
+};
+
+/* This is a minimal RPC prog, which contains only the readonly RPC procs from
+ * the cli rpcsvc
+ */
+rpcsvc_actor_t gd_svc_cli_actors_ro[] = {
+ [GLUSTER_CLI_LIST_FRIENDS] = { "LIST_FRIENDS", GLUSTER_CLI_LIST_FRIENDS, glusterd_handle_cli_list_friends, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_UUID_GET] = { "UUID_GET", GLUSTER_CLI_UUID_GET, glusterd_handle_cli_uuid_get, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_GET_VOLUME] = { "GET_VOLUME", GLUSTER_CLI_GET_VOLUME, glusterd_handle_cli_get_volume, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_GETWD] = { "GETWD", GLUSTER_CLI_GETWD, glusterd_handle_getwd, NULL, 1, DRC_NA},
+ [GLUSTER_CLI_STATUS_VOLUME] = {"STATUS_VOLUME", GLUSTER_CLI_STATUS_VOLUME, glusterd_handle_status_volume, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_LIST_VOLUME] = {"LIST_VOLUME", GLUSTER_CLI_LIST_VOLUME, glusterd_handle_cli_list_volume, NULL, 0, DRC_NA},
+};
+
+struct rpcsvc_program gd_svc_cli_prog_ro = {
+ .progname = "GlusterD svc cli read-only",
+ .prognum = GLUSTER_CLI_PROGRAM,
+ .progver = GLUSTER_CLI_VERSION,
+ .numactors = GLUSTER_CLI_MAXVALUE,
+ .actors = gd_svc_cli_actors_ro,
+ .synctask = _gf_true,
+};
diff --git a/xlators/mgmt/glusterd/src/glusterd-handshake.c b/xlators/mgmt/glusterd/src/glusterd-handshake.c
new file mode 100644
index 000000000..0f0357c4c
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-handshake.c
@@ -0,0 +1,1366 @@
+/*
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include "defaults.h"
+#include "glusterfs.h"
+#include "compat-errno.h"
+
+#include "glusterd.h"
+#include "glusterd-utils.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-store.h"
+
+#include "glusterfs3.h"
+#include "protocol-common.h"
+#include "rpcsvc.h"
+#include "rpc-common-xdr.h"
+
+extern struct rpc_clnt_program gd_peer_prog;
+extern struct rpc_clnt_program gd_mgmt_prog;
+extern struct rpc_clnt_program gd_mgmt_v3_prog;
+extern struct rpc_clnt_program gd_mgmt_v3_prog;
+
+
+#define TRUSTED_PREFIX "trusted-"
+
+typedef ssize_t (*gfs_serialize_t) (struct iovec outmsg, void *data);
+
+static int
+get_snap_volname_and_volinfo (const char *volpath, char **volname,
+ glusterd_volinfo_t **volinfo)
+{
+ int ret = -1;
+ char *save_ptr = NULL;
+ char *str_token = NULL;
+ char *snapname = NULL;
+ char *volname_token = NULL;
+ char *vol = NULL;
+ glusterd_snap_t *snap = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (volpath);
+ GF_ASSERT (volinfo);
+
+ str_token = gf_strdup (volpath);
+ if (NULL == str_token) {
+ goto out;
+ }
+
+ /* Input volname will have below formats:
+ * /snaps/<snapname>/<volname>.<hostname>
+ * or
+ * /snaps/<snapname>/<parent-volname>
+ * We need to extract snapname and parent_volname */
+
+ /*split string by "/" */
+ strtok_r (str_token, "/", &save_ptr);
+ snapname = strtok_r(NULL, "/", &save_ptr);
+ if (!snapname) {
+ gf_log(this->name, GF_LOG_ERROR, "Invalid path: %s", volpath);
+ goto out;
+ }
+
+ volname_token = strtok_r(NULL, "/", &save_ptr);
+ if (!volname_token) {
+ gf_log(this->name, GF_LOG_ERROR, "Invalid path: %s", volpath);
+ goto out;
+ }
+
+ snap = glusterd_find_snap_by_name (snapname);
+ if (!snap) {
+ gf_log(this->name, GF_LOG_ERROR, "Failed to "
+ "fetch snap %s", snapname);
+ goto out;
+ }
+
+ /* Find if its a parent volume name or snap volume
+ * name. This function will succeed if volname_token
+ * is a parent volname
+ */
+ ret = glusterd_volinfo_find (volname_token, volinfo);
+ if (ret) {
+ *volname = gf_strdup (volname_token);
+ if (NULL == *volname) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_snap_volinfo_find (volname_token, snap,
+ volinfo);
+ if (ret) {
+ /* Split the volume name */
+ vol = strtok_r (volname_token, ".", &save_ptr);
+ if (!vol) {
+ gf_log(this->name, GF_LOG_ERROR, "Invalid "
+ "volname (%s)", volname_token);
+ goto out;
+ }
+
+ ret = glusterd_snap_volinfo_find (vol, snap, volinfo);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR, "Failed to "
+ "fetch snap volume from volname (%s)",
+ vol);
+ goto out;
+ }
+ }
+ } else {
+ /*volname_token is parent volname*/
+ ret = glusterd_snap_volinfo_find_from_parent_volname (
+ volname_token, snap, volinfo);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR, "Failed to "
+ "fetch snap volume from parent "
+ "volname (%s)", volname_token);
+ goto out;
+ }
+
+ /* Since volname_token is a parent volname we should
+ * get the snap volname here*/
+ *volname = gf_strdup ((*volinfo)->volname);
+ if (NULL == *volname) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+out:
+ if (ret && NULL != *volname) {
+ GF_FREE (*volname);
+ *volname = NULL;
+ }
+ return ret;
+}
+
+static size_t
+build_volfile_path (const char *volname, char *path,
+ size_t path_len, char *trusted_str)
+{
+ struct stat stbuf = {0,};
+ int32_t ret = -1;
+ glusterd_conf_t *priv = NULL;
+ char *vol = NULL;
+ char *dup_volname = NULL;
+ char *free_ptr = NULL;
+ char *save_ptr = NULL;
+ char *str_token = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ char *server = NULL;
+ const char *volname_ptr = NULL;
+ char path_prefix [PATH_MAX] = {0,};
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (volname);
+ GF_ASSERT (path);
+
+ if (strstr (volname, "gluster/")) {
+ server = strchr (volname, '/') + 1;
+ glusterd_get_nodesvc_volfile (server, priv->workdir,
+ path, path_len);
+ ret = 1;
+ goto out;
+ } else if ((str_token = strstr (volname, "/snaps/"))) {
+ ret = get_snap_volname_and_volinfo (str_token, &dup_volname,
+ &volinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get snap"
+ " volinfo from path (%s)", volname);
+ ret = -1;
+ goto out;
+ }
+
+ snprintf (path_prefix, sizeof (path_prefix), "%s/snaps/%s",
+ priv->workdir, volinfo->snapshot->snapname);
+
+ free_ptr = dup_volname;
+ volname_ptr = dup_volname;
+ goto gotvolinfo;
+ } else if (volname[0] != '/') {
+ /* Normal behavior */
+ dup_volname = gf_strdup (volname);
+ } else {
+ /* Bringing in NFS like behavior for mount command, */
+ /* With this, one can mount a volume with below cmd */
+ /* bash# mount -t glusterfs server:/volume /mnt/pnt */
+ dup_volname = gf_strdup (&volname[1]);
+ }
+
+ if (!dup_volname) {
+ gf_log(THIS->name, GF_LOG_ERROR, "strdup failed");
+ ret = -1;
+ goto out;
+ }
+ free_ptr = dup_volname;
+ volname_ptr = volname;
+
+ snprintf (path_prefix, sizeof (path_prefix), "%s/vols",
+ priv->workdir);
+
+ ret = glusterd_volinfo_find (dup_volname, &volinfo);
+
+ if (ret) {
+ /* Split the volume name */
+ vol = strtok_r (dup_volname, ".", &save_ptr);
+ if (!vol)
+ goto out;
+
+ ret = glusterd_volinfo_find (vol, &volinfo);
+ if (ret)
+ goto out;
+ }
+
+gotvolinfo:
+ if (!glusterd_auth_get_username (volinfo))
+ trusted_str = NULL;
+
+ ret = snprintf (path, path_len, "%s/%s/%s.vol", path_prefix,
+ volinfo->volname, volname_ptr);
+ if (ret == -1)
+ goto out;
+
+ ret = stat (path, &stbuf);
+
+ if ((ret == -1) && (errno == ENOENT)) {
+ snprintf (path, path_len, "%s/%s/%s%s-fuse.vol",
+ path_prefix, volinfo->volname,
+ (trusted_str ? trusted_str : ""),
+ dup_volname);
+
+ ret = stat (path, &stbuf);
+ }
+
+ if ((ret == -1) && (errno == ENOENT)) {
+ snprintf (path, path_len, "%s/%s/%s-tcp.vol",
+ path_prefix, volinfo->volname, volname_ptr);
+ }
+
+ ret = 1;
+out:
+ GF_FREE (free_ptr);
+ return ret;
+}
+
+/* Get and store op-versions of the clients sending the getspec request
+ * Clients of versions <= 3.3, don't send op-versions, their op-versions are
+ * defaulted to 1
+ */
+static int
+_get_client_op_versions (gf_getspec_req *args, peer_info_t *peerinfo)
+{
+ int ret = 0;
+ int client_max_op_version = 1;
+ int client_min_op_version = 1;
+ dict_t *dict = NULL;
+
+ GF_ASSERT (args);
+ GF_ASSERT (peerinfo);
+
+ if (args->xdata.xdata_len) {
+ dict = dict_new ();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize (args->xdata.xdata_val,
+ args->xdata.xdata_len, &dict);
+ if (ret) {
+ gf_log ("glusterd", GF_LOG_ERROR,
+ "Failed to unserialize request dictionary");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "min-op-version",
+ &client_min_op_version);
+ if (ret) {
+ gf_log ("glusterd", GF_LOG_ERROR,
+ "Failed to get client-min-op-version");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "max-op-version",
+ &client_max_op_version);
+ if (ret) {
+ gf_log ("glusterd", GF_LOG_ERROR,
+ "Failed to get client-max-op-version");
+ goto out;
+ }
+ }
+
+ peerinfo->max_op_version = client_max_op_version;
+ peerinfo->min_op_version = client_min_op_version;
+
+out:
+ return ret;
+}
+
+/* Checks if the client supports the volume, ie. client can understand all the
+ * options in the volfile
+ */
+static gf_boolean_t
+_client_supports_volume (peer_info_t *peerinfo, int32_t *op_errno)
+{
+ gf_boolean_t ret = _gf_true;
+ glusterd_volinfo_t *volinfo = NULL;
+
+ GF_ASSERT (peerinfo);
+ GF_ASSERT (op_errno);
+
+
+ /* Only check when the volfile being requested is a volume. Not finding
+ * a volinfo implies that the volfile requested for is not of a gluster
+ * volume. A non volume volfile is requested by the local gluster
+ * services like shd and nfs-server. These need not be checked as they
+ * will be running at the same op-version as glusterd and will be able
+ * to support all the features
+ */
+ if ((glusterd_volinfo_find (peerinfo->volname, &volinfo) == 0) &&
+ ((peerinfo->min_op_version > volinfo->client_op_version) ||
+ (peerinfo->max_op_version < volinfo->client_op_version))) {
+ ret = _gf_false;
+ *op_errno = ENOTSUP;
+ gf_log ("glusterd", GF_LOG_INFO,
+ "Client %s (%d -> %d) doesn't support required "
+ "op-version (%d). Rejecting volfile request.",
+ peerinfo->identifier, peerinfo->min_op_version,
+ peerinfo->max_op_version, volinfo->client_op_version);
+ }
+
+ return ret;
+}
+
+int
+__server_getspec (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ int32_t op_errno = 0;
+ int32_t spec_fd = -1;
+ size_t file_len = 0;
+ char filename[PATH_MAX] = {0,};
+ struct stat stbuf = {0,};
+ char *volume = NULL;
+ char *tmp = NULL;
+ int cookie = 0;
+ rpc_transport_t *trans = NULL;
+ gf_getspec_req args = {0,};
+ gf_getspec_rsp rsp = {0,};
+ char addrstr[RPCSVC_PEER_STRLEN] = {0};
+ peer_info_t *peerinfo = NULL;
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gf_getspec_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ goto fail;
+ }
+
+ peerinfo = &req->trans->peerinfo;
+
+ volume = args.key;
+ /* Need to strip leading '/' from volnames. This was introduced to
+ * support nfs style mount parameters for native gluster mount
+ */
+ if (volume[0] == '/')
+ strncpy (peerinfo->volname, &volume[1], strlen(&volume[1]));
+ else
+ strncpy (peerinfo->volname, volume, strlen(volume));
+
+ ret = _get_client_op_versions (&args, peerinfo);
+ if (ret)
+ goto fail;
+
+ if (!_client_supports_volume (peerinfo, &op_errno)) {
+ ret = -1;
+ goto fail;
+ }
+
+ trans = req->trans;
+ /* addrstr will be empty for cli socket connections */
+ ret = rpcsvc_transport_peername (trans, (char *)&addrstr,
+ sizeof (addrstr));
+ if (ret)
+ goto fail;
+
+ tmp = strrchr (addrstr, ':');
+ if (tmp)
+ *tmp = '\0';
+
+ /* The trusted volfiles are given to the glusterd owned process like NFS
+ * server, self-heal daemon etc., so that they are not inadvertently
+ * blocked by a auth.{allow,reject} setting. The trusted volfile is not
+ * meant for external users.
+ */
+ if (strlen (addrstr) && gf_is_local_addr (addrstr)) {
+
+ ret = build_volfile_path (volume, filename,
+ sizeof (filename),
+ TRUSTED_PREFIX);
+ } else {
+ ret = build_volfile_path (volume, filename,
+ sizeof (filename), NULL);
+ }
+
+ if (ret > 0) {
+ /* to allocate the proper buffer to hold the file data */
+ ret = stat (filename, &stbuf);
+ if (ret < 0){
+ gf_log ("glusterd", GF_LOG_ERROR,
+ "Unable to stat %s (%s)",
+ filename, strerror (errno));
+ goto fail;
+ }
+
+ spec_fd = open (filename, O_RDONLY);
+ if (spec_fd < 0) {
+ gf_log ("glusterd", GF_LOG_ERROR,
+ "Unable to open %s (%s)",
+ filename, strerror (errno));
+ goto fail;
+ }
+ ret = file_len = stbuf.st_size;
+ } else {
+ op_errno = ENOENT;
+ }
+
+ if (file_len) {
+ rsp.spec = CALLOC (file_len+1, sizeof (char));
+ if (!rsp.spec) {
+ ret = -1;
+ op_errno = ENOMEM;
+ goto fail;
+ }
+ ret = read (spec_fd, rsp.spec, file_len);
+
+ close (spec_fd);
+ }
+
+ /* convert to XDR */
+fail:
+ rsp.op_ret = ret;
+
+ if (op_errno)
+ rsp.op_errno = gf_errno_to_error (op_errno);
+ if (cookie)
+ rsp.op_errno = cookie;
+
+ if (!rsp.spec)
+ rsp.spec = strdup ("");
+
+ glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_getspec_rsp);
+ free (args.key);//malloced by xdr
+ free (rsp.spec);
+
+ return 0;
+}
+
+int
+server_getspec (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __server_getspec);
+}
+
+int32_t
+__server_event_notify (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ int32_t op_errno = 0;
+ gf_event_notify_req args = {0,};
+ gf_event_notify_rsp rsp = {0,};
+ dict_t *dict = NULL;
+ gf_boolean_t need_rsp = _gf_true;
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gf_event_notify_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ goto fail;
+ }
+
+ if (args.dict.dict_len) {
+ dict = dict_new ();
+ if (!dict)
+ return ret;
+ ret = dict_unserialize (args.dict.dict_val,
+ args.dict.dict_len, &dict);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Failed to unserialize req");
+ goto fail;
+ }
+ }
+
+ switch (args.op) {
+ case GF_EN_DEFRAG_STATUS:
+ gf_log ("", GF_LOG_INFO,
+ "received defrag status updated");
+ if (dict) {
+ glusterd_defrag_event_notify_handle (dict);
+ need_rsp = _gf_false;
+ }
+ break;
+ default:
+ gf_log ("", GF_LOG_ERROR, "Unknown op received in event "
+ "notify");
+ ret = -1;
+ break;
+ }
+
+fail:
+ rsp.op_ret = ret;
+
+ if (op_errno)
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ if (need_rsp)
+ glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_event_notify_rsp);
+ if (dict)
+ dict_unref (dict);
+ free (args.dict.dict_val);//malloced by xdr
+
+ return 0;
+}
+
+int32_t
+server_event_notify (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __server_event_notify);
+}
+
+int
+gd_validate_cluster_op_version (xlator_t *this, int cluster_op_version,
+ char *peerid)
+{
+ int ret = -1;
+ glusterd_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (cluster_op_version > GD_OP_VERSION_MAX) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "operating version %d is more than the maximum "
+ "supported (%d) on the machine (as per peer request "
+ "from %s)", cluster_op_version, GD_OP_VERSION_MAX,
+ peerid);
+ goto out;
+ }
+
+ /* The peer can only reduce its op-version when it doesn't have any
+ * volumes. Reducing op-version when it already contains volumes can
+ * lead to inconsistencies in the cluster
+ */
+ if ((cluster_op_version < conf->op_version) &&
+ !list_empty (&conf->volumes)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "cannot reduce operating version to %d from current "
+ "version %d as volumes exist (as per peer request from "
+ "%s)", cluster_op_version, conf->op_version, peerid);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+__glusterd_mgmt_hndsk_versions (rpcsvc_request_t *req)
+{
+ dict_t *dict = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ int ret = -1;
+ int op_errno = EINVAL;
+ gf_mgmt_hndsk_req args = {{0,},};
+ gf_mgmt_hndsk_rsp rsp = {0,};
+
+ this = THIS;
+ conf = this->private;
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gf_mgmt_hndsk_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ ret = dict_set_int32 (dict, GD_OP_VERSION_KEY, conf->op_version);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set operating version");
+ rsp.op_ret = ret;
+ goto out;
+ }
+
+ ret = dict_set_int32 (dict, GD_MIN_OP_VERSION_KEY, GD_OP_VERSION_MIN);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set %s", GD_MIN_OP_VERSION_KEY);
+ rsp.op_ret = ret;
+ goto out;
+ }
+
+ ret = dict_set_int32 (dict, GD_MAX_OP_VERSION_KEY, GD_OP_VERSION_MAX);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set %s", GD_MAX_OP_VERSION_KEY);
+ rsp.op_ret = ret;
+ goto out;
+ }
+
+ ret = 0;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, dict, (&rsp.hndsk.hndsk_val),
+ rsp.hndsk.hndsk_len, op_errno, out);
+out:
+
+ rsp.op_ret = ret;
+ rsp.op_errno = op_errno;
+
+ glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_mgmt_hndsk_rsp);
+
+ ret = 0;
+
+ if (dict)
+ dict_unref (dict);
+
+ if (args.hndsk.hndsk_val)
+ free (args.hndsk.hndsk_val);
+
+ if (rsp.hndsk.hndsk_val)
+ GF_FREE (rsp.hndsk.hndsk_val);
+
+ return ret;
+}
+
+int
+glusterd_mgmt_hndsk_versions (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_mgmt_hndsk_versions);
+}
+
+int
+__glusterd_mgmt_hndsk_versions_ack (rpcsvc_request_t *req)
+{
+ dict_t *clnt_dict = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ int ret = -1;
+ int op_errno = EINVAL;
+ int peer_op_version = 0;
+ gf_mgmt_hndsk_req args = {{0,},};
+ gf_mgmt_hndsk_rsp rsp = {0,};
+
+ this = THIS;
+ conf = this->private;
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gf_mgmt_hndsk_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, clnt_dict, args.hndsk.hndsk_val,
+ (args.hndsk.hndsk_len), ret, op_errno,
+ out);
+
+ ret = dict_get_int32 (clnt_dict, GD_OP_VERSION_KEY, &peer_op_version);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to get the op-version key peer=%s",
+ req->trans->peerinfo.identifier);
+ goto out;
+ }
+
+ ret = gd_validate_cluster_op_version (this, peer_op_version,
+ req->trans->peerinfo.identifier);
+ if (ret)
+ goto out;
+
+
+ /* As this is ACK from the Cluster for the versions supported,
+ can set the op-version of 'this' glusterd to the one
+ received. */
+ gf_log (this->name, GF_LOG_INFO, "using the op-version %d",
+ peer_op_version);
+ conf->op_version = peer_op_version;
+ ret = glusterd_store_global_info (this);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "Failed to store op-version");
+
+out:
+ rsp.op_ret = ret;
+ rsp.op_errno = op_errno;
+
+ glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_mgmt_hndsk_rsp);
+
+ ret = 0;
+
+ if (clnt_dict)
+ dict_unref (clnt_dict);
+
+ if (args.hndsk.hndsk_val)
+ free (args.hndsk.hndsk_val);
+
+ return ret;
+}
+
+int
+glusterd_mgmt_hndsk_versions_ack (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_mgmt_hndsk_versions_ack);
+}
+
+rpcsvc_actor_t gluster_handshake_actors[] = {
+ [GF_HNDSK_NULL] = {"NULL", GF_HNDSK_NULL, NULL, NULL, 0, DRC_NA},
+ [GF_HNDSK_GETSPEC] = {"GETSPEC", GF_HNDSK_GETSPEC, server_getspec, NULL, 0, DRC_NA},
+ [GF_HNDSK_EVENT_NOTIFY] = {"EVENTNOTIFY", GF_HNDSK_EVENT_NOTIFY, server_event_notify, NULL, 0, DRC_NA},
+};
+
+
+struct rpcsvc_program gluster_handshake_prog = {
+ .progname = "Gluster Handshake",
+ .prognum = GLUSTER_HNDSK_PROGRAM,
+ .progver = GLUSTER_HNDSK_VERSION,
+ .actors = gluster_handshake_actors,
+ .numactors = GF_HNDSK_MAXVALUE,
+};
+
+/* A minimal RPC program just for the cli getspec command */
+rpcsvc_actor_t gluster_cli_getspec_actors[] = {
+ [GF_HNDSK_GETSPEC] = {"GETSPEC", GF_HNDSK_GETSPEC, server_getspec, NULL, 0, DRC_NA},
+};
+
+struct rpcsvc_program gluster_cli_getspec_prog = {
+ .progname = "Gluster Handshake (CLI Getspec)",
+ .prognum = GLUSTER_HNDSK_PROGRAM,
+ .progver = GLUSTER_HNDSK_VERSION,
+ .actors = gluster_cli_getspec_actors,
+ .numactors = GF_HNDSK_MAXVALUE,
+};
+
+
+char *glusterd_dump_proc[GF_DUMP_MAXVALUE] = {
+ [GF_DUMP_NULL] = "NULL",
+ [GF_DUMP_DUMP] = "DUMP",
+};
+
+rpc_clnt_prog_t glusterd_dump_prog = {
+ .progname = "GLUSTERD-DUMP",
+ .prognum = GLUSTER_DUMP_PROGRAM,
+ .progver = GLUSTER_DUMP_VERSION,
+ .procnames = glusterd_dump_proc,
+};
+
+
+rpcsvc_actor_t glusterd_mgmt_hndsk_actors[] = {
+ [GD_MGMT_HNDSK_NULL] = {"NULL", GD_MGMT_HNDSK_NULL, NULL,
+ NULL, 0},
+ [GD_MGMT_HNDSK_VERSIONS] = {"MGMT-VERS", GD_MGMT_HNDSK_VERSIONS,
+ glusterd_mgmt_hndsk_versions, NULL,
+ 0},
+ [GD_MGMT_HNDSK_VERSIONS_ACK] = {"MGMT-VERS-ACK",
+ GD_MGMT_HNDSK_VERSIONS_ACK,
+ glusterd_mgmt_hndsk_versions_ack,
+ NULL, 0},
+};
+
+struct rpcsvc_program glusterd_mgmt_hndsk_prog = {
+ .progname = "Gluster MGMT Handshake",
+ .prognum = GD_MGMT_HNDSK_PROGRAM,
+ .progver = GD_MGMT_HNDSK_VERSION,
+ .actors = glusterd_mgmt_hndsk_actors,
+ .numactors = GD_MGMT_HNDSK_MAXVALUE,
+};
+
+char *glusterd_mgmt_hndsk_proc[GD_MGMT_HNDSK_MAXVALUE] = {
+ [GD_MGMT_HNDSK_NULL] = "NULL",
+ [GD_MGMT_HNDSK_VERSIONS] = "MGMT-VERS",
+ [GD_MGMT_HNDSK_VERSIONS_ACK] = "MGMT-VERS-ACK",
+};
+
+rpc_clnt_prog_t gd_clnt_mgmt_hndsk_prog = {
+ .progname = "Gluster MGMT Handshake",
+ .prognum = GD_MGMT_HNDSK_PROGRAM,
+ .progver = GD_MGMT_HNDSK_VERSION,
+ .procnames = glusterd_mgmt_hndsk_proc,
+};
+
+
+static int
+glusterd_event_connected_inject (glusterd_peerctx_t *peerctx)
+{
+ GF_ASSERT (peerctx);
+
+ glusterd_friend_sm_event_t *event = NULL;
+ glusterd_probe_ctx_t *ctx = NULL;
+ int ret = -1;
+ glusterd_peerinfo_t *peerinfo = NULL;
+
+
+ ret = glusterd_friend_sm_new_event
+ (GD_FRIEND_EVENT_CONNECTED, &event);
+
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get new event");
+ goto out;
+ }
+
+ ctx = GF_CALLOC (1, sizeof(*ctx), gf_gld_mt_probe_ctx_t);
+
+ if (!ctx) {
+ ret = -1;
+ gf_log ("", GF_LOG_ERROR, "Memory not available");
+ goto out;
+ }
+
+ peerinfo = peerctx->peerinfo;
+ ctx->hostname = gf_strdup (peerinfo->hostname);
+ ctx->port = peerinfo->port;
+ ctx->req = peerctx->args.req;
+ ctx->dict = peerctx->args.dict;
+
+ event->peerinfo = peerinfo;
+ event->ctx = ctx;
+
+ ret = glusterd_friend_sm_inject_event (event);
+
+ if (ret) {
+ gf_log ("glusterd", GF_LOG_ERROR, "Unable to inject "
+ "EVENT_CONNECTED ret = %d", ret);
+ goto out;
+ }
+
+out:
+ gf_log ("", GF_LOG_DEBUG, "returning %d", ret);
+ return ret;
+}
+
+
+int
+gd_validate_peer_op_version (xlator_t *this, glusterd_peerinfo_t *peerinfo,
+ dict_t *dict, char **errstr)
+{
+ int ret = -1;
+ glusterd_conf_t *conf = NULL;
+ int32_t peer_op_version = 0;
+ int32_t peer_min_op_version = 0;
+ int32_t peer_max_op_version = 0;
+
+ if (!dict && !this && !peerinfo)
+ goto out;
+
+ conf = this->private;
+
+ ret = dict_get_int32 (dict, GD_OP_VERSION_KEY, &peer_op_version);
+ if (ret)
+ goto out;
+
+ ret = dict_get_int32 (dict, GD_MAX_OP_VERSION_KEY,
+ &peer_max_op_version);
+ if (ret)
+ goto out;
+
+ ret = dict_get_int32 (dict, GD_MIN_OP_VERSION_KEY,
+ &peer_min_op_version);
+ if (ret)
+ goto out;
+
+ ret = -1;
+ /* Check if peer can support our op_version */
+ if ((peer_max_op_version < conf->op_version) ||
+ (peer_min_op_version > conf->op_version)) {
+ ret = gf_asprintf (errstr, "Peer %s does not support required "
+ "op-version", peerinfo->hostname);
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ gf_log (this->name , GF_LOG_DEBUG, "Peer %s %s", peerinfo->hostname,
+ ((ret < 0) ? "rejected" : "accepted"));
+ return ret;
+}
+
+int
+__glusterd_mgmt_hndsk_version_ack_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int ret = -1;
+ int op_errno = EINVAL;
+ gf_mgmt_hndsk_rsp rsp = {0,};
+ xlator_t *this = NULL;
+ call_frame_t *frame = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_peerctx_t *peerctx = NULL;
+ char msg[1024] = {0,};
+
+ this = THIS;
+ frame = myframe;
+ peerctx = frame->local;
+ peerinfo = peerctx->peerinfo;
+
+ if (-1 == req->rpc_status) {
+ snprintf (msg, sizeof (msg),
+ "Error through RPC layer, retry again later");
+ gf_log ("", GF_LOG_ERROR, "%s", msg);
+ peerctx->errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_mgmt_hndsk_rsp);
+ if (ret < 0) {
+ snprintf (msg, sizeof (msg), "Failed to decode XDR");
+ gf_log ("", GF_LOG_ERROR, "%s", msg);
+ peerctx->errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ op_errno = rsp.op_errno;
+ if (-1 == rsp.op_ret) {
+ ret = -1;
+ snprintf (msg, sizeof (msg),
+ "Failed to get handshake ack from remote server");
+ gf_log (frame->this->name, GF_LOG_ERROR, "%s", msg);
+ peerctx->errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ /* TODO: this is hardcoded as of now, but I don't forsee any problems
+ * with this as long as we are properly handshaking operating versions
+ */
+ peerinfo->mgmt = &gd_mgmt_prog;
+ peerinfo->peer = &gd_peer_prog;
+ peerinfo->mgmt_v3 = &gd_mgmt_v3_prog;
+
+ ret = default_notify (this, GF_EVENT_CHILD_UP, NULL);
+
+ if (GD_MODE_ON == peerctx->args.mode) {
+ ret = glusterd_event_connected_inject (peerctx);
+ peerctx->args.req = NULL;
+ } else if (GD_MODE_SWITCH_ON == peerctx->args.mode) {
+ peerctx->args.mode = GD_MODE_ON;
+ } else {
+ gf_log (this->name, GF_LOG_WARNING, "unknown mode %d",
+ peerctx->args.mode);
+ }
+
+ glusterd_friend_sm ();
+
+ ret = 0;
+out:
+
+ frame->local = NULL;
+ STACK_DESTROY (frame->root);
+
+ if (ret != 0)
+ rpc_transport_disconnect (peerinfo->rpc->conn.trans);
+
+ if (rsp.hndsk.hndsk_val)
+ free (rsp.hndsk.hndsk_val);
+
+ return 0;
+}
+
+int
+glusterd_mgmt_hndsk_version_ack_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ __glusterd_mgmt_hndsk_version_ack_cbk);
+}
+
+int
+__glusterd_mgmt_hndsk_version_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int ret = -1;
+ int op_errno = EINVAL;
+ gf_mgmt_hndsk_rsp rsp = {0,};
+ gf_mgmt_hndsk_req arg = {{0,}};
+ xlator_t *this = NULL;
+ call_frame_t *frame = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_peerctx_t *peerctx = NULL;
+ dict_t *dict = NULL;
+ dict_t *rsp_dict = NULL;
+ glusterd_conf_t *conf = NULL;
+ char msg[1024] = {0,};
+
+ this = THIS;
+ conf = this->private;
+ frame = myframe;
+ peerctx = frame->local;
+ peerinfo = peerctx->peerinfo;
+
+ if (-1 == req->rpc_status) {
+ ret = -1;
+ snprintf (msg, sizeof (msg),
+ "Error through RPC layer, retry again later");
+ gf_log (this->name, GF_LOG_ERROR, "%s", msg);
+ peerctx->errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_mgmt_hndsk_rsp);
+ if (ret < 0) {
+ snprintf (msg, sizeof (msg), "Failed to decode management "
+ "handshake response");
+ gf_log (this->name, GF_LOG_ERROR, "%s", msg);
+ peerctx->errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, dict, rsp.hndsk.hndsk_val,
+ rsp.hndsk.hndsk_len, ret, op_errno,
+ out);
+
+ op_errno = rsp.op_errno;
+ if (-1 == rsp.op_ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to get the 'versions' from peer (%s)",
+ req->conn->trans->peerinfo.identifier);
+ goto out;
+ }
+
+ /* Check if peer can be part of cluster */
+ ret = gd_validate_peer_op_version (this, peerinfo, dict,
+ &peerctx->errstr);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to validate the operating version of peer (%s)",
+ peerinfo->hostname);
+ goto out;
+ }
+
+ rsp_dict = dict_new ();
+ if (!rsp_dict)
+ goto out;
+
+ ret = dict_set_int32 (rsp_dict, GD_OP_VERSION_KEY, conf->op_version);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to set operating version in dict");
+ goto out;
+ }
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, rsp_dict, (&arg.hndsk.hndsk_val),
+ arg.hndsk.hndsk_len, op_errno, out);
+
+ ret = glusterd_submit_request (peerctx->peerinfo->rpc, &arg, frame,
+ &gd_clnt_mgmt_hndsk_prog,
+ GD_MGMT_HNDSK_VERSIONS_ACK, NULL, this,
+ glusterd_mgmt_hndsk_version_ack_cbk,
+ (xdrproc_t)xdr_gf_mgmt_hndsk_req);
+
+out:
+ if (ret) {
+ frame->local = NULL;
+ STACK_DESTROY (frame->root);
+ rpc_transport_disconnect (peerinfo->rpc->conn.trans);
+ }
+
+ if (rsp.hndsk.hndsk_val)
+ free (rsp.hndsk.hndsk_val);
+
+ if (arg.hndsk.hndsk_val)
+ GF_FREE (arg.hndsk.hndsk_val);
+
+ if (dict)
+ dict_unref (dict);
+
+ if (rsp_dict)
+ dict_unref (rsp_dict);
+
+ return 0;
+}
+
+int
+glusterd_mgmt_hndsk_version_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ __glusterd_mgmt_hndsk_version_cbk);
+}
+
+int
+glusterd_mgmt_handshake (xlator_t *this, glusterd_peerctx_t *peerctx)
+{
+ call_frame_t *frame = NULL;
+ gf_mgmt_hndsk_req req = {{0,},};
+ int ret = -1;
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame)
+ goto out;
+
+ frame->local = peerctx;
+
+ ret = glusterd_submit_request (peerctx->peerinfo->rpc, &req, frame,
+ &gd_clnt_mgmt_hndsk_prog,
+ GD_MGMT_HNDSK_VERSIONS, NULL, this,
+ glusterd_mgmt_hndsk_version_cbk,
+ (xdrproc_t)xdr_gf_mgmt_hndsk_req);
+ ret = 0;
+out:
+ if (ret && frame)
+ STACK_DESTROY (frame->root);
+
+ return ret;
+}
+
+int
+glusterd_set_clnt_mgmt_program (glusterd_peerinfo_t *peerinfo,
+ gf_prog_detail *prog)
+{
+ gf_prog_detail *trav = NULL;
+ int ret = -1;
+
+ if (!peerinfo || !prog)
+ goto out;
+
+ trav = prog;
+
+ while (trav) {
+ ret = -1;
+ if ((gd_mgmt_prog.prognum == trav->prognum) &&
+ (gd_mgmt_prog.progver == trav->progver)) {
+ peerinfo->mgmt = &gd_mgmt_prog;
+ ret = 0;
+ }
+
+ if ((gd_peer_prog.prognum == trav->prognum) &&
+ (gd_peer_prog.progver == trav->progver)) {
+ peerinfo->peer = &gd_peer_prog;
+ ret = 0;
+ }
+
+ if (ret) {
+ gf_log ("", GF_LOG_DEBUG,
+ "%s (%"PRId64":%"PRId64") not supported",
+ trav->progname, trav->prognum,
+ trav->progver);
+ }
+
+ trav = trav->next;
+ }
+
+ if (peerinfo->mgmt) {
+ gf_log ("", GF_LOG_INFO,
+ "Using Program %s, Num (%d), Version (%d)",
+ peerinfo->mgmt->progname, peerinfo->mgmt->prognum,
+ peerinfo->mgmt->progver);
+ }
+
+ if (peerinfo->peer) {
+ gf_log ("", GF_LOG_INFO,
+ "Using Program %s, Num (%d), Version (%d)",
+ peerinfo->peer->progname, peerinfo->peer->prognum,
+ peerinfo->peer->progver);
+ }
+
+ if (peerinfo->mgmt_v3) {
+ gf_log ("", GF_LOG_INFO,
+ "Using Program %s, Num (%d), Version (%d)",
+ peerinfo->mgmt_v3->progname, peerinfo->mgmt_v3->prognum,
+ peerinfo->mgmt_v3->progver);
+ }
+
+ ret = 0;
+out:
+ return ret;
+
+}
+
+static gf_boolean_t
+_mgmt_hndsk_prog_present (gf_prog_detail *prog) {
+ gf_boolean_t ret = _gf_false;
+ gf_prog_detail *trav = NULL;
+
+ GF_ASSERT (prog);
+
+ trav = prog;
+
+ while (trav) {
+ if ((trav->prognum == GD_MGMT_HNDSK_PROGRAM) &&
+ (trav->progver == GD_MGMT_HNDSK_VERSION)) {
+ ret = _gf_true;
+ goto out;
+ }
+ trav = trav->next;
+ }
+out:
+ return ret;
+}
+
+int
+__glusterd_peer_dump_version_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int ret = -1;
+ gf_dump_rsp rsp = {0,};
+ xlator_t *this = NULL;
+ gf_prog_detail *trav = NULL;
+ gf_prog_detail *next = NULL;
+ call_frame_t *frame = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_peerctx_t *peerctx = NULL;
+ glusterd_conf_t *conf = NULL;
+ char msg[1024] = {0,};
+
+ this = THIS;
+ conf = this->private;
+ frame = myframe;
+ peerctx = frame->local;
+ peerinfo = peerctx->peerinfo;
+
+ if (-1 == req->rpc_status) {
+ snprintf (msg, sizeof (msg),
+ "Error through RPC layer, retry again later");
+ gf_log ("", GF_LOG_ERROR, "%s", msg);
+ peerctx->errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_dump_rsp);
+ if (ret < 0) {
+ snprintf (msg, sizeof (msg), "Failed to decode XDR");
+ gf_log ("", GF_LOG_ERROR, "%s", msg);
+ peerctx->errstr = gf_strdup (msg);
+ goto out;
+ }
+ if (-1 == rsp.op_ret) {
+ snprintf (msg, sizeof (msg),
+ "Failed to get the 'versions' from remote server");
+ gf_log (frame->this->name, GF_LOG_ERROR, "%s", msg);
+ peerctx->errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ if (_mgmt_hndsk_prog_present (rsp.prog)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Proceeding to op-version handshake with peer %s",
+ peerinfo->hostname);
+ ret = glusterd_mgmt_handshake (this, peerctx);
+ goto out;
+ } else if (conf->op_version > 1) {
+ ret = -1;
+ snprintf (msg, sizeof (msg),
+ "Peer %s does not support required op-version",
+ peerinfo->hostname);
+ peerctx->errstr = gf_strdup (msg);
+ gf_log (this->name, GF_LOG_ERROR, "%s", msg);
+ goto out;
+ }
+
+ /* Make sure we assign the proper program to peer */
+ ret = glusterd_set_clnt_mgmt_program (peerinfo, rsp.prog);
+ if (ret) {
+ gf_log ("", GF_LOG_WARNING, "failed to set the mgmt program");
+ goto out;
+ }
+
+ ret = default_notify (this, GF_EVENT_CHILD_UP, NULL);
+
+ if (GD_MODE_ON == peerctx->args.mode) {
+ ret = glusterd_event_connected_inject (peerctx);
+ peerctx->args.req = NULL;
+ } else if (GD_MODE_SWITCH_ON == peerctx->args.mode) {
+ peerctx->args.mode = GD_MODE_ON;
+ } else {
+ gf_log ("", GF_LOG_WARNING, "unknown mode %d",
+ peerctx->args.mode);
+ }
+
+ glusterd_friend_sm();
+ glusterd_op_sm();
+
+ ret = 0;
+
+out:
+
+ /* don't use GF_FREE, buffer was allocated by libc */
+ if (rsp.prog) {
+ trav = rsp.prog;
+ while (trav) {
+ next = trav->next;
+ free (trav->progname);
+ free (trav);
+ trav = next;
+ }
+ }
+
+ frame->local = NULL;
+ STACK_DESTROY (frame->root);
+
+ if (ret != 0)
+ rpc_transport_disconnect (peerinfo->rpc->conn.trans);
+
+ return 0;
+}
+
+
+int
+glusterd_peer_dump_version_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ __glusterd_peer_dump_version_cbk);
+}
+
+int
+glusterd_peer_dump_version (xlator_t *this, struct rpc_clnt *rpc,
+ glusterd_peerctx_t *peerctx)
+{
+ call_frame_t *frame = NULL;
+ gf_dump_req req = {0,};
+ int ret = -1;
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame)
+ goto out;
+
+ frame->local = peerctx;
+
+ req.gfs_id = 0xcafe;
+
+ ret = glusterd_submit_request (peerctx->peerinfo->rpc, &req, frame,
+ &glusterd_dump_prog, GF_DUMP_DUMP,
+ NULL, this,
+ glusterd_peer_dump_version_cbk,
+ (xdrproc_t)xdr_gf_dump_req);
+out:
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-hooks.c b/xlators/mgmt/glusterd/src/glusterd-hooks.c
new file mode 100644
index 000000000..2b43a452e
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-hooks.c
@@ -0,0 +1,531 @@
+/*
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "globals.h"
+#include "glusterfs.h"
+#include "dict.h"
+#include "xlator.h"
+#include "logging.h"
+#include "run.h"
+#include "defaults.h"
+#include "compat.h"
+#include "compat-errno.h"
+#include "glusterd.h"
+#include "glusterd-sm.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-utils.h"
+#include "glusterd-store.h"
+#include "glusterd-hooks.h"
+
+#include <fnmatch.h>
+
+#define EMPTY ""
+char glusterd_hook_dirnames[GD_OP_MAX][256] =
+{
+ [GD_OP_NONE] = EMPTY,
+ [GD_OP_CREATE_VOLUME] = "create",
+ [GD_OP_START_BRICK] = EMPTY,
+ [GD_OP_STOP_BRICK] = EMPTY,
+ [GD_OP_DELETE_VOLUME] = "delete",
+ [GD_OP_START_VOLUME] = "start",
+ [GD_OP_STOP_VOLUME] = "stop",
+ [GD_OP_DEFRAG_VOLUME] = EMPTY,
+ [GD_OP_ADD_BRICK] = "add-brick",
+ [GD_OP_REMOVE_BRICK] = "remove-brick",
+ [GD_OP_REPLACE_BRICK] = EMPTY,
+ [GD_OP_SET_VOLUME] = "set",
+ [GD_OP_RESET_VOLUME] = EMPTY,
+ [GD_OP_SYNC_VOLUME] = EMPTY,
+ [GD_OP_LOG_ROTATE] = EMPTY,
+ [GD_OP_GSYNC_CREATE] = "gsync-create",
+ [GD_OP_GSYNC_SET] = EMPTY,
+ [GD_OP_PROFILE_VOLUME] = EMPTY,
+ [GD_OP_QUOTA] = EMPTY,
+ [GD_OP_STATUS_VOLUME] = EMPTY,
+ [GD_OP_REBALANCE] = EMPTY,
+ [GD_OP_HEAL_VOLUME] = EMPTY,
+ [GD_OP_STATEDUMP_VOLUME] = EMPTY,
+ [GD_OP_LIST_VOLUME] = EMPTY,
+ [GD_OP_CLEARLOCKS_VOLUME] = EMPTY,
+ [GD_OP_DEFRAG_BRICK_VOLUME] = EMPTY,
+};
+#undef EMPTY
+
+static inline gf_boolean_t
+glusterd_is_hook_enabled (char *script)
+{
+ return (script[0] == 'S');
+}
+
+int
+glusterd_hooks_create_hooks_directory (char *basedir)
+{
+ int ret = -1;
+ int op = GD_OP_NONE;
+ int type = GD_COMMIT_HOOK_NONE;
+ char version_dir[PATH_MAX] = {0, };
+ char path[PATH_MAX] = {0, };
+ char *cmd_subdir = NULL;
+ char type_subdir[GD_COMMIT_HOOK_MAX][256] = {{0, },
+ "pre",
+ "post"};
+ glusterd_conf_t *priv = NULL;
+
+ priv = THIS->private;
+
+ snprintf (path, sizeof (path), "%s/hooks", basedir);
+ ret = mkdir_p (path, 0777, _gf_true);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_CRITICAL, "Unable to create %s due"
+ "to %s", path, strerror (errno));
+ goto out;
+ }
+
+ GLUSTERD_GET_HOOKS_DIR (version_dir, GLUSTERD_HOOK_VER, priv);
+ ret = mkdir_p (version_dir, 0777, _gf_true);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_CRITICAL, "Unable to create %s due "
+ "to %s", version_dir, strerror (errno));
+ goto out;
+ }
+
+ for (op = GD_OP_NONE+1; op < GD_OP_MAX; op++) {
+ cmd_subdir = glusterd_hooks_get_hooks_cmd_subdir (op);
+ if (strlen (cmd_subdir) == 0)
+ continue;
+
+ snprintf (path, sizeof (path), "%s/%s", version_dir,
+ cmd_subdir);
+ ret = mkdir_p (path, 0777, _gf_true);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_CRITICAL,
+ "Unable to create %s due to %s",
+ path, strerror (errno));
+ goto out;
+ }
+
+ for (type = GD_COMMIT_HOOK_PRE; type < GD_COMMIT_HOOK_MAX;
+ type++) {
+ snprintf (path, sizeof (path), "%s/%s/%s",
+ version_dir, cmd_subdir, type_subdir[type]);
+ ret = mkdir_p (path, 0777, _gf_true);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_CRITICAL,
+ "Unable to create %s due to %s",
+ path, strerror (errno));
+ goto out;
+ }
+ }
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+char*
+glusterd_hooks_get_hooks_cmd_subdir (glusterd_op_t op)
+{
+ GF_ASSERT ((op > GD_OP_NONE) && (op < GD_OP_MAX));
+
+ return glusterd_hook_dirnames[op];
+}
+
+int
+glusterd_hooks_set_volume_args (dict_t *dict, runner_t *runner)
+{
+ int i = 0;
+ int count = 0;
+ int ret = -1;
+ char query[1024] = {0,};
+ char *key = NULL;
+ char *value = NULL;
+
+ ret = dict_get_int32 (dict, "count", &count);
+ if (ret)
+ goto out;
+
+ /* This will not happen unless op_ctx
+ * is corrupted*/
+ if (!count)
+ goto out;
+
+ runner_add_arg (runner, "-o");
+ for (i = 1; (ret == 0); i++) {
+ snprintf (query, sizeof (query), "key%d", i);
+ ret = dict_get_str (dict, query, &key);
+ if (ret)
+ continue;
+
+ snprintf (query, sizeof (query), "value%d", i);
+ ret = dict_get_str (dict, query, &value);
+ if (ret)
+ continue;
+
+ runner_argprintf (runner, "%s=%s", key, value);
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+glusterd_hooks_add_op_args (runner_t *runner, glusterd_op_t op,
+ dict_t *op_ctx, glusterd_commit_hook_type_t type)
+{
+ char *hooks_args = NULL;
+ int vol_count = 0;
+ gf_boolean_t truth = _gf_false;
+ glusterd_volinfo_t *voliter = NULL;
+ glusterd_conf_t *priv = NULL;
+ int ret = -1;
+
+ priv = THIS->private;
+ list_for_each_entry (voliter, &priv->volumes,
+ vol_list) {
+ if (glusterd_is_volume_started (voliter))
+ vol_count++;
+ }
+
+ ret = 0;
+ switch (op) {
+ case GD_OP_START_VOLUME:
+ if (type == GD_COMMIT_HOOK_PRE &&
+ vol_count == 0)
+ truth = _gf_true;
+
+ else if (type == GD_COMMIT_HOOK_POST &&
+ vol_count == 1)
+ truth = _gf_true;
+
+ else
+ truth = _gf_false;
+
+ runner_argprintf (runner, "--first=%s",
+ truth? "yes":"no");
+ break;
+
+ case GD_OP_STOP_VOLUME:
+ if (type == GD_COMMIT_HOOK_PRE &&
+ vol_count == 1)
+ truth = _gf_true;
+
+ else if (type == GD_COMMIT_HOOK_POST &&
+ vol_count == 0)
+ truth = _gf_true;
+
+ else
+ truth = _gf_false;
+
+ runner_argprintf (runner, "--last=%s",
+ truth? "yes":"no");
+ break;
+
+ case GD_OP_SET_VOLUME:
+ ret = glusterd_hooks_set_volume_args (op_ctx, runner);
+ break;
+
+ case GD_OP_GSYNC_CREATE:
+ ret = dict_get_str (op_ctx, "hooks_args", &hooks_args);
+ if (ret)
+ gf_log ("", GF_LOG_DEBUG,
+ "No Hooks Arguments.");
+ else
+ gf_log ("", GF_LOG_DEBUG,
+ "Hooks Args = %s", hooks_args);
+ if (hooks_args)
+ runner_argprintf (runner, "%s", hooks_args);
+ break;
+
+ default:
+ break;
+
+ }
+
+ return ret;
+}
+
+int
+glusterd_hooks_run_hooks (char *hooks_path, glusterd_op_t op, dict_t *op_ctx,
+ glusterd_commit_hook_type_t type)
+{
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ runner_t runner = {0, };
+ struct dirent *entry = NULL;
+ DIR *hookdir = NULL;
+ char *volname = NULL;
+ char **lines = NULL;
+ int N = 8; /*arbitrary*/
+ int lineno = 0;
+ int line_count = 0;
+ int ret = -1;
+
+ this = THIS;
+ priv = this->private;
+
+ ret = dict_get_str (op_ctx, "volname", &volname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_CRITICAL, "Failed to get volname "
+ "from operation context");
+ goto out;
+ }
+
+ hookdir = opendir (hooks_path);
+ if (!hookdir) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR, "Failed to open dir %s, due "
+ "to %s", hooks_path, strerror (errno));
+ goto out;
+ }
+
+ lines = GF_CALLOC (1, N * sizeof (*lines), gf_gld_mt_charptr);
+ if (!lines) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = -1;
+ line_count = 0;
+ glusterd_for_each_entry (entry, hookdir);
+ while (entry) {
+ if (line_count == N-1) {
+ N *= 2;
+ lines = GF_REALLOC (lines, N * sizeof (char *));
+ if (!lines)
+ goto out;
+ }
+
+ if (glusterd_is_hook_enabled (entry->d_name)) {
+ lines[line_count] = gf_strdup (entry->d_name);
+ line_count++;
+ }
+
+ glusterd_for_each_entry (entry, hookdir);
+ }
+
+ lines[line_count] = NULL;
+ lines = GF_REALLOC (lines, (line_count + 1) * sizeof (char *));
+ if (!lines)
+ goto out;
+
+ qsort (lines, line_count, sizeof (*lines), glusterd_compare_lines);
+
+ for (lineno = 0; lineno < line_count; lineno++) {
+
+ runinit (&runner);
+ runner_argprintf (&runner, "%s/%s", hooks_path, lines[lineno]);
+ /*Add future command line arguments to hook scripts below*/
+ runner_argprintf (&runner, "--volname=%s", volname);
+ ret = glusterd_hooks_add_op_args (&runner, op, op_ctx, type);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to add "
+ "command specific arguments");
+ goto out;
+ }
+
+ ret = runner_run_reuse (&runner);
+ if (ret) {
+ runner_log (&runner, this->name, GF_LOG_ERROR,
+ "Failed to execute script");
+ } else {
+ runner_log (&runner, this->name, GF_LOG_INFO,
+ "Ran script");
+ }
+ runner_end (&runner);
+ }
+
+ ret = 0;
+out:
+ if (lines) {
+ for (lineno = 0; lineno < line_count+1; lineno++)
+ GF_FREE (lines[lineno]);
+
+ GF_FREE (lines);
+ }
+
+ if (hookdir)
+ closedir (hookdir);
+
+ return ret;
+}
+
+int
+glusterd_hooks_post_stub_enqueue (char *scriptdir, glusterd_op_t op,
+ dict_t *op_ctx)
+{
+ int ret = -1;
+ glusterd_hooks_stub_t *stub = NULL;
+ glusterd_hooks_private_t *hooks_priv = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ conf = THIS->private;
+ hooks_priv = conf->hooks_priv;
+
+ ret = glusterd_hooks_stub_init (&stub, scriptdir, op, op_ctx);
+ if (ret)
+ goto out;
+
+ pthread_mutex_lock (&hooks_priv->mutex);
+ {
+ hooks_priv->waitcount++;
+ list_add_tail (&stub->all_hooks, &hooks_priv->list);
+ pthread_cond_signal (&hooks_priv->cond);
+ }
+ pthread_mutex_unlock (&hooks_priv->mutex);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+glusterd_hooks_stub_init (glusterd_hooks_stub_t **stub, char *scriptdir,
+ glusterd_op_t op, dict_t *op_ctx)
+{
+ int ret = -1;
+ glusterd_hooks_stub_t *hooks_stub = NULL;
+
+ GF_ASSERT (stub);
+ if (!stub)
+ goto out;
+
+ hooks_stub = GF_CALLOC (1, sizeof (*hooks_stub),
+ gf_gld_mt_hooks_stub_t);
+ if (!hooks_stub)
+ goto out;
+
+ INIT_LIST_HEAD (&hooks_stub->all_hooks);
+ hooks_stub->op = op;
+ hooks_stub->scriptdir = gf_strdup (scriptdir);
+ if (!hooks_stub->scriptdir)
+ goto out;
+
+ hooks_stub->op_ctx = dict_copy_with_ref (op_ctx, hooks_stub->op_ctx);
+ if (!hooks_stub->op_ctx)
+ goto out;
+
+ *stub = hooks_stub;
+ ret = 0;
+out:
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Failed to initialize "
+ "post hooks stub");
+ glusterd_hooks_stub_cleanup (hooks_stub);
+ }
+
+ return ret;
+}
+
+void
+glusterd_hooks_stub_cleanup (glusterd_hooks_stub_t *stub)
+{
+ if (!stub) {
+ gf_log_callingfn (THIS->name, GF_LOG_WARNING,
+ "hooks_stub is NULL");
+ return;
+ }
+
+ if (stub->op_ctx)
+ dict_unref (stub->op_ctx);
+
+ GF_FREE (stub->scriptdir);
+
+ GF_FREE (stub);
+}
+
+static void*
+hooks_worker (void *args)
+{
+ glusterd_conf_t *conf = NULL;
+ glusterd_hooks_private_t *hooks_priv = NULL;
+ glusterd_hooks_stub_t *stub = NULL;
+
+ THIS = args;
+ conf = THIS->private;
+ hooks_priv = conf->hooks_priv;
+
+ for (;;) {
+ pthread_mutex_lock (&hooks_priv->mutex);
+ {
+ while (list_empty (&hooks_priv->list)) {
+ pthread_cond_wait (&hooks_priv->cond,
+ &hooks_priv->mutex);
+ }
+ stub = list_entry (hooks_priv->list.next,
+ glusterd_hooks_stub_t,
+ all_hooks);
+ list_del_init (&stub->all_hooks);
+ hooks_priv->waitcount--;
+
+ }
+ pthread_mutex_unlock (&hooks_priv->mutex);
+
+ glusterd_hooks_run_hooks (stub->scriptdir, stub->op,
+ stub->op_ctx, GD_COMMIT_HOOK_POST);
+ glusterd_hooks_stub_cleanup (stub);
+ }
+
+ return NULL;
+}
+
+int
+glusterd_hooks_priv_init (glusterd_hooks_private_t **new)
+{
+ int ret = -1;
+ glusterd_hooks_private_t *hooks_priv = NULL;
+
+ if (!new)
+ goto out;
+
+ hooks_priv = GF_CALLOC (1, sizeof (*hooks_priv),
+ gf_gld_mt_hooks_priv_t);
+ if (!hooks_priv)
+ goto out;
+
+ pthread_mutex_init (&hooks_priv->mutex, NULL);
+ pthread_cond_init (&hooks_priv->cond, NULL);
+ INIT_LIST_HEAD (&hooks_priv->list);
+ hooks_priv->waitcount = 0;
+
+ *new = hooks_priv;
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+glusterd_hooks_spawn_worker (xlator_t *this)
+{
+ int ret = -1;
+ glusterd_conf_t *conf = NULL;
+ glusterd_hooks_private_t *hooks_priv = NULL;
+
+
+ ret = glusterd_hooks_priv_init (&hooks_priv);
+ if (ret)
+ goto out;
+
+ conf = this->private;
+ conf->hooks_priv = hooks_priv;
+ ret = pthread_create (&hooks_priv->worker, NULL, hooks_worker,
+ (void *)this);
+ if (ret)
+ gf_log (this->name, GF_LOG_CRITICAL, "Failed to spawn post "
+ "hooks worker thread");
+out:
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-hooks.h b/xlators/mgmt/glusterd/src/glusterd-hooks.h
new file mode 100644
index 000000000..c597ddd2a
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-hooks.h
@@ -0,0 +1,89 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _GLUSTERD_HOOKS_H_
+#define _GLUSTERD_HOOKS_H_
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <fnmatch.h>
+
+#define GLUSTERD_GET_HOOKS_DIR(path, version, priv) \
+ snprintf (path, PATH_MAX, "%s/hooks/%d", priv->workdir,\
+ version);
+
+#define GLUSTERD_HOOK_VER 1
+
+#define GD_HOOKS_SPECIFIC_KEY "user.*"
+
+typedef enum glusterd_commit_hook_type {
+ GD_COMMIT_HOOK_NONE = 0,
+ GD_COMMIT_HOOK_PRE,
+ GD_COMMIT_HOOK_POST,
+ GD_COMMIT_HOOK_MAX
+} glusterd_commit_hook_type_t;
+
+typedef struct hooks_private {
+ struct list_head list;
+ int waitcount; //debug purposes
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+ pthread_t worker;
+} glusterd_hooks_private_t;
+
+typedef struct hooks_stub {
+ struct list_head all_hooks;
+ char *scriptdir;
+ glusterd_op_t op;
+ dict_t *op_ctx;
+
+} glusterd_hooks_stub_t;
+
+
+static inline gf_boolean_t
+is_key_glusterd_hooks_friendly (char *key)
+{
+ gf_boolean_t is_friendly = _gf_false;
+
+ /* This is very specific to hooks friendly behavior */
+ if (fnmatch (GD_HOOKS_SPECIFIC_KEY, key, FNM_NOESCAPE) == 0) {
+ gf_log (THIS->name, GF_LOG_DEBUG, "user namespace key %s", key);
+ is_friendly = _gf_true;
+ }
+
+ return is_friendly;
+}
+
+int
+glusterd_hooks_create_hooks_directory (char *basedir);
+
+char *
+glusterd_hooks_get_hooks_cmd_subdir (glusterd_op_t op);
+
+int
+glusterd_hooks_run_hooks (char *hooks_path, glusterd_op_t op, dict_t *op_ctx,
+ glusterd_commit_hook_type_t type);
+int
+glusterd_hooks_spawn_worker (xlator_t *this);
+
+int
+glusterd_hooks_stub_init (glusterd_hooks_stub_t **stub, char *scriptdir,
+ glusterd_op_t op, dict_t *op_ctx);
+void
+glusterd_hooks_stub_cleanup (glusterd_hooks_stub_t *stub);
+
+int
+glusterd_hooks_post_stub_enqueue (char *scriptdir, glusterd_op_t op,
+ dict_t *op_ctx);
+int
+glusterd_hooks_priv_init (glusterd_hooks_private_t **new);
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-locks.c b/xlators/mgmt/glusterd/src/glusterd-locks.c
new file mode 100644
index 000000000..0af2a186f
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-locks.c
@@ -0,0 +1,637 @@
+/*
+ Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "common-utils.h"
+#include "cli1-xdr.h"
+#include "xdr-generic.h"
+#include "glusterd.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-store.h"
+#include "glusterd-utils.h"
+#include "glusterd-volgen.h"
+#include "glusterd-locks.h"
+#include "run.h"
+#include "syscall.h"
+
+#include <signal.h>
+
+#define GF_MAX_LOCKING_ENTITIES 2
+
+/* Valid entities that the mgmt_v3 lock can hold locks upon *
+ * To add newer entities to be locked, we can just add more *
+ * entries to this table along with the type and default value */
+valid_entities valid_types[] = {
+ { "vol", _gf_true },
+ { "snap", _gf_false },
+ { NULL },
+};
+
+static dict_t *mgmt_v3_lock;
+
+/* Checks if the lock request is for a valid entity */
+gf_boolean_t
+glusterd_mgmt_v3_is_type_valid (char *type)
+{
+ int32_t i = 0;
+ gf_boolean_t ret = _gf_false;
+
+ GF_ASSERT (type);
+
+ for (i = 0; valid_types[i].type; i++) {
+ if (!strcmp (type, valid_types[i].type)) {
+ ret = _gf_true;
+ break;
+ }
+ }
+
+ return ret;
+}
+
+/* Initialize the global mgmt_v3 lock list(dict) when
+ * glusterd is spawned */
+int32_t
+glusterd_mgmt_v3_lock_init ()
+{
+ int32_t ret = -1;
+
+ mgmt_v3_lock = dict_new ();
+ if (!mgmt_v3_lock) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/* Destroy the global mgmt_v3 lock list(dict) when
+ * glusterd cleanup is performed */
+void
+glusterd_mgmt_v3_lock_fini ()
+{
+ if (mgmt_v3_lock)
+ dict_destroy (mgmt_v3_lock);
+}
+
+int32_t
+glusterd_get_mgmt_v3_lock_owner (char *key, uuid_t *uuid)
+{
+ int32_t ret = -1;
+ mgmt_v3_lock_obj *lock_obj = NULL;
+ uuid_t no_owner = {"\0"};
+ xlator_t *this = NULL;
+
+ GF_ASSERT(THIS);
+ this = THIS;
+
+ if (!key || !uuid) {
+ gf_log (this->name, GF_LOG_ERROR, "key or uuid is null.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_bin(mgmt_v3_lock, key, (void **) &lock_obj);
+ if (!ret)
+ uuid_copy (*uuid, lock_obj->lock_owner);
+ else
+ uuid_copy (*uuid, no_owner);
+
+ ret = 0;
+out:
+ gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+}
+
+/* This function is called with the locked_count and type, to *
+ * release all the acquired locks. */
+static int32_t
+glusterd_release_multiple_locks_per_entity (dict_t *dict, uuid_t uuid,
+ int32_t locked_count,
+ char *type)
+{
+ char name_buf[PATH_MAX] = "";
+ char *name = NULL;
+ int32_t i = -1;
+ int32_t op_ret = 0;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT (dict);
+ GF_ASSERT (type);
+
+ if (locked_count == 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "No %s locked as part of this transaction",
+ type);
+ goto out;
+ }
+
+ /* Release all the locks held */
+ for (i = 0; i < locked_count; i++) {
+ snprintf (name_buf, sizeof(name_buf),
+ "%sname%d", type, i+1);
+
+ /* Looking for volname1, volname2 or snapname1, *
+ * as key in the dict snapname2 */
+ ret = dict_get_str (dict, name_buf, &name);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to get %s locked_count = %d",
+ name_buf, locked_count);
+ op_ret = ret;
+ continue;
+ }
+
+ ret = glusterd_mgmt_v3_unlock (name, uuid, type);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to release lock for %s.",
+ name);
+ op_ret = ret;
+ }
+ }
+
+out:
+ gf_log (this->name, GF_LOG_TRACE, "Returning %d", op_ret);
+ return op_ret;
+}
+
+/* Given the count and type of the entity this function acquires *
+ * locks on multiple elements of the same entity. For example: *
+ * If type is "vol" this function tries to acquire locks on multiple *
+ * volumes */
+static int32_t
+glusterd_acquire_multiple_locks_per_entity (dict_t *dict, uuid_t uuid,
+ int32_t count, char *type)
+{
+ char name_buf[PATH_MAX] = "";
+ char *name = NULL;
+ int32_t i = -1;
+ int32_t ret = -1;
+ int32_t locked_count = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT (dict);
+ GF_ASSERT (type);
+
+ /* Locking one element after other */
+ for (i = 0; i < count; i++) {
+ snprintf (name_buf, sizeof(name_buf),
+ "%sname%d", type, i+1);
+
+ /* Looking for volname1, volname2 or snapname1, *
+ * as key in the dict snapname2 */
+ ret = dict_get_str (dict, name_buf, &name);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to get %s count = %d",
+ name_buf, count);
+ break;
+ }
+
+ ret = glusterd_mgmt_v3_lock (name, uuid, type);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to acquire lock for %s %s "
+ "on behalf of %s. Reversing "
+ "this transaction", type, name,
+ uuid_utoa(uuid));
+ break;
+ }
+ locked_count++;
+ }
+
+ if (count == locked_count) {
+ /* If all locking ops went successfuly, return as success */
+ ret = 0;
+ goto out;
+ }
+
+ /* If we failed to lock one element, unlock others and return failure */
+ ret = glusterd_release_multiple_locks_per_entity (dict, uuid,
+ locked_count,
+ type);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to release multiple %s locks",
+ type);
+ }
+ ret = -1;
+out:
+ gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+}
+
+/* Given the type of entity, this function figures out if it should unlock a *
+ * single element of multiple elements of the said entity. For example: *
+ * if the type is "vol", this function will accordingly unlock a single volume *
+ * or multiple volumes */
+static int32_t
+glusterd_mgmt_v3_unlock_entity (dict_t *dict, uuid_t uuid, char *type,
+ gf_boolean_t default_value)
+{
+ char name_buf[PATH_MAX] = "";
+ char *name = NULL;
+ int32_t count = -1;
+ int32_t ret = -1;
+ gf_boolean_t hold_locks = _gf_false;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT (dict);
+ GF_ASSERT (type);
+
+ snprintf (name_buf, sizeof(name_buf), "hold_%s_locks", type);
+ hold_locks = dict_get_str_boolean (dict, name_buf, default_value);
+
+ if (hold_locks == _gf_false) {
+ /* Locks were not held for this particular entity *
+ * Hence nothing to release */
+ ret = 0;
+ goto out;
+ }
+
+ /* Looking for volcount or snapcount in the dict */
+ snprintf (name_buf, sizeof(name_buf), "%scount", type);
+ ret = dict_get_int32 (dict, name_buf, &count);
+ if (ret) {
+ /* count is not present. Only one *
+ * element name needs to be unlocked */
+ snprintf (name_buf, sizeof(name_buf), "%sname",
+ type);
+ ret = dict_get_str (dict, name_buf, &name);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to fetch %sname", type);
+ goto out;
+ }
+
+ ret = glusterd_mgmt_v3_unlock (name, uuid, type);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to release lock for %s %s "
+ "on behalf of %s.", type, name,
+ uuid_utoa(uuid));
+ goto out;
+ }
+ } else {
+ /* Unlocking one element name after another */
+ ret = glusterd_release_multiple_locks_per_entity (dict,
+ uuid,
+ count,
+ type);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to release all %s locks", type);
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+}
+
+/* Given the type of entity, this function figures out if it should lock a *
+ * single element or multiple elements of the said entity. For example: *
+ * if the type is "vol", this function will accordingly lock a single volume *
+ * or multiple volumes */
+static int32_t
+glusterd_mgmt_v3_lock_entity (dict_t *dict, uuid_t uuid, char *type,
+ gf_boolean_t default_value)
+{
+ char name_buf[PATH_MAX] = "";
+ char *name = NULL;
+ int32_t count = -1;
+ int32_t ret = -1;
+ gf_boolean_t hold_locks = _gf_false;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT (dict);
+ GF_ASSERT (type);
+
+ snprintf (name_buf, sizeof(name_buf), "hold_%s_locks", type);
+ hold_locks = dict_get_str_boolean (dict, name_buf, default_value);
+
+ if (hold_locks == _gf_false) {
+ /* Not holding locks for this particular entity */
+ ret = 0;
+ goto out;
+ }
+
+ /* Looking for volcount or snapcount in the dict */
+ snprintf (name_buf, sizeof(name_buf), "%scount", type);
+ ret = dict_get_int32 (dict, name_buf, &count);
+ if (ret) {
+ /* count is not present. Only one *
+ * element name needs to be locked */
+ snprintf (name_buf, sizeof(name_buf), "%sname",
+ type);
+ ret = dict_get_str (dict, name_buf, &name);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to fetch %sname", type);
+ goto out;
+ }
+
+ ret = glusterd_mgmt_v3_lock (name, uuid, type);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to acquire lock for %s %s "
+ "on behalf of %s.", type, name,
+ uuid_utoa(uuid));
+ goto out;
+ }
+ } else {
+ /* Locking one element name after another */
+ ret = glusterd_acquire_multiple_locks_per_entity (dict,
+ uuid,
+ count,
+ type);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to acquire all %s locks", type);
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+}
+
+/* Try to release locks of multiple entities like *
+ * volume, snaps etc. */
+int32_t
+glusterd_multiple_mgmt_v3_unlock (dict_t *dict, uuid_t uuid)
+{
+ int32_t i = -1;
+ int32_t ret = -1;
+ int32_t op_ret = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ if (!dict) {
+ gf_log (this->name, GF_LOG_ERROR, "dict is null.");
+ ret = -1;
+ goto out;
+ }
+
+ for (i = 0; valid_types[i].type; i++) {
+ ret = glusterd_mgmt_v3_unlock_entity
+ (dict, uuid,
+ valid_types[i].type,
+ valid_types[i].default_value);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to unlock all %s",
+ valid_types[i].type);
+ op_ret = ret;
+ }
+ }
+
+ ret = op_ret;
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+/* Try to acquire locks on multiple entities like *
+ * volume, snaps etc. */
+int32_t
+glusterd_multiple_mgmt_v3_lock (dict_t *dict, uuid_t uuid)
+{
+ int32_t i = -1;
+ int32_t ret = -1;
+ int32_t locked_count = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ if (!dict) {
+ gf_log (this->name, GF_LOG_ERROR, "dict is null.");
+ ret = -1;
+ goto out;
+ }
+
+ /* Locking one entity after other */
+ for (i = 0; valid_types[i].type; i++) {
+ ret = glusterd_mgmt_v3_lock_entity
+ (dict, uuid,
+ valid_types[i].type,
+ valid_types[i].default_value);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to lock all %s",
+ valid_types[i].type);
+ break;
+ }
+ locked_count++;
+ }
+
+ if (locked_count == GF_MAX_LOCKING_ENTITIES) {
+ /* If all locking ops went successfuly, return as success */
+ ret = 0;
+ goto out;
+ }
+
+ /* If we failed to lock one entity, unlock others and return failure */
+ for (i = 0; i < locked_count; i++) {
+ ret = glusterd_mgmt_v3_unlock_entity
+ (dict, uuid,
+ valid_types[i].type,
+ valid_types[i].default_value);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to unlock all %s",
+ valid_types[i].type);
+ }
+ }
+ ret = -1;
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_mgmt_v3_lock (const char *name, uuid_t uuid, char *type)
+{
+ char key[PATH_MAX] = "";
+ int32_t ret = -1;
+ mgmt_v3_lock_obj *lock_obj = NULL;
+ gf_boolean_t is_valid = _gf_true;
+ uuid_t owner = {0};
+ xlator_t *this = NULL;
+
+ GF_ASSERT(THIS);
+ this = THIS;
+
+ if (!name || !type) {
+ gf_log (this->name, GF_LOG_ERROR, "name or type is null.");
+ ret = -1;
+ goto out;
+ }
+
+ is_valid = glusterd_mgmt_v3_is_type_valid (type);
+ if (is_valid != _gf_true) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Invalid entity. Cannot perform locking "
+ "operation on %s types", type);
+ ret = -1;
+ goto out;
+ }
+
+ ret = snprintf (key, sizeof(key), "%s_%s", name, type);
+ if (ret != strlen(name) + 1 + strlen(type)) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR, "Unable to create key");
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Trying to acquire lock of %s %s for %s as %s",
+ type, name, uuid_utoa (uuid), key);
+
+ ret = glusterd_get_mgmt_v3_lock_owner (key, &owner);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Unable to get mgmt_v3 lock owner");
+ goto out;
+ }
+
+ /* If the lock has already been held for the given volume
+ * we fail */
+ if (!uuid_is_null (owner)) {
+ gf_log (this->name, GF_LOG_ERROR, "Lock for %s held by %s",
+ name, uuid_utoa (owner));
+ ret = -1;
+ goto out;
+ }
+
+ lock_obj = GF_CALLOC (1, sizeof(mgmt_v3_lock_obj),
+ gf_common_mt_mgmt_v3_lock_obj_t);
+ if (!lock_obj) {
+ ret = -1;
+ goto out;
+ }
+
+ uuid_copy (lock_obj->lock_owner, uuid);
+
+ ret = dict_set_bin (mgmt_v3_lock, key, lock_obj,
+ sizeof(*lock_obj));
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to set lock owner in mgmt_v3 lock");
+ if (lock_obj)
+ GF_FREE (lock_obj);
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Lock for %s %s successfully held by %s",
+ type, name, uuid_utoa (uuid));
+
+ ret = 0;
+out:
+ gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_mgmt_v3_unlock (const char *name, uuid_t uuid, char *type)
+{
+ char key[PATH_MAX] = "";
+ int32_t ret = -1;
+ gf_boolean_t is_valid = _gf_true;
+ uuid_t owner = {0};
+ xlator_t *this = NULL;
+
+ GF_ASSERT(THIS);
+ this = THIS;
+
+ if (!name || !type) {
+ gf_log (this->name, GF_LOG_ERROR, "name is null.");
+ ret = -1;
+ goto out;
+ }
+
+ is_valid = glusterd_mgmt_v3_is_type_valid (type);
+ if (is_valid != _gf_true) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Invalid entity. Cannot perform unlocking "
+ "operation on %s types", type);
+ ret = -1;
+ goto out;
+ }
+
+ ret = snprintf (key, sizeof(key), "%s_%s",
+ name, type);
+ if (ret != strlen(name) + 1 + strlen(type)) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to create key");
+ ret = -1;
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Trying to release lock of %s %s for %s as %s",
+ type, name, uuid_utoa (uuid), key);
+
+ ret = glusterd_get_mgmt_v3_lock_owner (key, &owner);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Unable to get mgmt_v3 lock owner");
+ goto out;
+ }
+
+ if (uuid_is_null (owner)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Lock for %s %s not held", type, name);
+ ret = -1;
+ goto out;
+ }
+
+ ret = uuid_compare (uuid, owner);
+ if (ret) {
+
+ gf_log (this->name, GF_LOG_ERROR, "Lock owner mismatch. "
+ "Lock for %s %s held by %s",
+ type, name, uuid_utoa (owner));
+ goto out;
+ }
+
+ /* Removing the mgmt_v3 lock from the global list */
+ dict_del (mgmt_v3_lock, key);
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Lock for %s %s successfully released",
+ type, name);
+
+ ret = 0;
+out:
+ gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-locks.h b/xlators/mgmt/glusterd/src/glusterd-locks.h
new file mode 100644
index 000000000..83eb8c997
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-locks.h
@@ -0,0 +1,51 @@
+/*
+ Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _GLUSTERD_LOCKS_H_
+#define _GLUSTERD_LOCKS_H_
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+typedef struct mgmt_v3_lock_object_ {
+ uuid_t lock_owner;
+} mgmt_v3_lock_obj;
+
+typedef struct mgmt_v3_lock_valid_entities {
+ char *type; /* Entity type like vol, snap */
+ gf_boolean_t default_value; /* The default value that *
+ * determines if the locks *
+ * should be held for that *
+ * entity */
+} valid_entities;
+
+int32_t
+glusterd_mgmt_v3_lock_init ();
+
+void
+glusterd_mgmt_v3_lock_fini ();
+
+int32_t
+glusterd_get_mgmt_v3_lock_owner (char *volname, uuid_t *uuid);
+
+int32_t
+glusterd_mgmt_v3_lock (const char *key, uuid_t uuid, char *type);
+
+int32_t
+glusterd_mgmt_v3_unlock (const char *key, uuid_t uuid, char *type);
+
+int32_t
+glusterd_multiple_mgmt_v3_lock (dict_t *dict, uuid_t uuid);
+
+int32_t
+glusterd_multiple_mgmt_v3_unlock (dict_t *dict, uuid_t uuid);
+
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-log-ops.c b/xlators/mgmt/glusterd/src/glusterd-log-ops.c
new file mode 100644
index 000000000..33bd95c03
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-log-ops.c
@@ -0,0 +1,271 @@
+/*
+ Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "common-utils.h"
+#include "cli1-xdr.h"
+#include "xdr-generic.h"
+#include "glusterd.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-store.h"
+#include "glusterd-utils.h"
+#include "glusterd-volgen.h"
+
+#include <signal.h>
+
+int
+__glusterd_handle_log_rotate (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{0,}};
+ dict_t *dict = NULL;
+ glusterd_op_t cli_op = GD_OP_LOG_ROTATE;
+ char *volname = NULL;
+ char msg[2048] = {0,};
+ xlator_t *this = NULL;
+
+ GF_ASSERT (req);
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf (msg, sizeof (msg), "Unable to decode the "
+ "command");
+ goto out;
+ }
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Failed to get volume name");
+ gf_log (this->name, GF_LOG_ERROR, "%s", msg);
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_INFO, "Received log rotate req "
+ "for volume %s", volname);
+
+ ret = dict_set_uint64 (dict, "rotate-key", (uint64_t)time (NULL));
+ if (ret)
+ goto out;
+
+ ret = glusterd_op_begin_synctask (req, GD_OP_LOG_ROTATE, dict);
+
+out:
+ if (ret) {
+ if (msg[0] == '\0')
+ snprintf (msg, sizeof (msg), "Operation failed");
+ ret = glusterd_op_send_cli_response (cli_op, ret, 0, req,
+ dict, msg);
+ }
+
+ free (cli_req.dict.dict_val);
+ return ret;
+}
+
+int
+glusterd_handle_log_rotate (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_handle_log_rotate);
+}
+
+/* op-sm */
+int
+glusterd_op_stage_log_rotate (dict_t *dict, char **op_errstr)
+{
+ int ret = -1;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ gf_boolean_t exists = _gf_false;
+ char msg[2048] = {0};
+ char *brick = NULL;
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get volume name");
+ goto out;
+ }
+
+ exists = glusterd_check_volume_exists (volname);
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (!exists) {
+ snprintf (msg, sizeof (msg), "Volume %s does not exist",
+ volname);
+ gf_log ("", GF_LOG_ERROR, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ ret = -1;
+ goto out;
+ }
+
+ if (_gf_false == glusterd_is_volume_started (volinfo)) {
+ snprintf (msg, sizeof (msg), "Volume %s needs to be started before"
+ " log rotate.", volname);
+ gf_log ("", GF_LOG_ERROR, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "brick", &brick);
+ /* If no brick is specified, do log-rotate for
+ all the bricks in the volume */
+ if (ret) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo, NULL);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Incorrect brick %s "
+ "for volume %s", brick, volname);
+ gf_log ("", GF_LOG_ERROR, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ goto out;
+ }
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+
+ return ret;
+}
+
+
+int
+glusterd_op_log_rotate (dict_t *dict)
+{
+ int ret = -1;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ xlator_t *this = NULL;
+ char *volname = NULL;
+ char *brick = NULL;
+ char logfile[PATH_MAX] = {0,};
+ char pidfile[PATH_MAX] = {0,};
+ FILE *file = NULL;
+ pid_t pid = 0;
+ uint64_t key = 0;
+ int valid_brick = 0;
+ glusterd_brickinfo_t *tmpbrkinfo = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "volname not found");
+ goto out;
+ }
+
+ ret = dict_get_uint64 (dict, "rotate-key", &key);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "rotate key not found");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "brick", &brick);
+ /* If no brick is specified, do log-rotate for
+ all the bricks in the volume */
+ if (ret)
+ goto cont;
+
+ ret = glusterd_brickinfo_new_from_brick (brick, &tmpbrkinfo);
+ if (ret) {
+ gf_log ("glusterd", GF_LOG_ERROR,
+ "cannot get brickinfo from brick");
+ goto out;
+ }
+
+cont:
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret)
+ goto out;
+
+ ret = -1;
+ list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ if (uuid_compare (brickinfo->uuid, MY_UUID))
+ continue;
+
+ if (brick &&
+ (strcmp (tmpbrkinfo->hostname, brickinfo->hostname) ||
+ strcmp (tmpbrkinfo->path,brickinfo->path)))
+ continue;
+
+ valid_brick = 1;
+
+ GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, priv);
+ file = fopen (pidfile, "r+");
+ if (!file) {
+ gf_log ("", GF_LOG_ERROR, "Unable to open pidfile: %s",
+ pidfile);
+ ret = -1;
+ goto out;
+ }
+
+ ret = fscanf (file, "%d", &pid);
+ if (ret <= 0) {
+ gf_log ("", GF_LOG_ERROR, "Unable to read pidfile: %s",
+ pidfile);
+ ret = -1;
+ goto out;
+ }
+ fclose (file);
+ file = NULL;
+
+ snprintf (logfile, PATH_MAX, "%s.%"PRIu64,
+ brickinfo->logfile, key);
+
+ ret = rename (brickinfo->logfile, logfile);
+ if (ret)
+ gf_log ("", GF_LOG_WARNING, "rename failed");
+
+ ret = kill (pid, SIGHUP);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to SIGHUP to %d", pid);
+ goto out;
+ }
+ ret = 0;
+
+ /* If request was for brick, only one iteration is enough */
+ if (brick)
+ break;
+ }
+
+ if (ret && !valid_brick)
+ ret = 0;
+
+out:
+ if (tmpbrkinfo)
+ glusterd_brickinfo_delete (tmpbrkinfo);
+
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-mem-types.h b/xlators/mgmt/glusterd/src/glusterd-mem-types.h
new file mode 100644
index 000000000..e6f6a0333
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-mem-types.h
@@ -0,0 +1,75 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __GLUSTERD_MEM_TYPES_H__
+#define __GLUSTERD_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+typedef enum gf_gld_mem_types_ {
+ gf_gld_mt_dir_entry_t = gf_common_mt_end + 1,
+ gf_gld_mt_volfile_ctx = gf_common_mt_end + 2,
+ gf_gld_mt_glusterd_state_t = gf_common_mt_end + 3,
+ gf_gld_mt_glusterd_conf_t = gf_common_mt_end + 4,
+ gf_gld_mt_locker = gf_common_mt_end + 5,
+ gf_gld_mt_string = gf_common_mt_end + 6,
+ gf_gld_mt_lock_table = gf_common_mt_end + 7,
+ gf_gld_mt_char = gf_common_mt_end + 8,
+ gf_gld_mt_glusterd_connection_t = gf_common_mt_end + 9,
+ gf_gld_mt_resolve_comp = gf_common_mt_end + 10,
+ gf_gld_mt_peerinfo_t = gf_common_mt_end + 11,
+ gf_gld_mt_friend_sm_event_t = gf_common_mt_end + 12,
+ gf_gld_mt_friend_req_ctx_t = gf_common_mt_end + 13,
+ gf_gld_mt_friend_update_ctx_t = gf_common_mt_end + 14,
+ gf_gld_mt_op_sm_event_t = gf_common_mt_end + 15,
+ gf_gld_mt_op_lock_ctx_t = gf_common_mt_end + 16,
+ gf_gld_mt_op_stage_ctx_t = gf_common_mt_end + 17,
+ gf_gld_mt_op_commit_ctx_t = gf_common_mt_end + 18,
+ gf_gld_mt_mop_stage_req_t = gf_common_mt_end + 19,
+ gf_gld_mt_probe_ctx_t = gf_common_mt_end + 20,
+ gf_gld_mt_create_volume_ctx_t = gf_common_mt_end + 21,
+ gf_gld_mt_start_volume_ctx_t = gf_common_mt_end + 22,
+ gf_gld_mt_stop_volume_ctx_t = gf_common_mt_end + 23,
+ gf_gld_mt_delete_volume_ctx_t = gf_common_mt_end + 24,
+ gf_gld_mt_glusterd_volinfo_t = gf_common_mt_end + 25,
+ gf_gld_mt_glusterd_brickinfo_t = gf_common_mt_end + 26,
+ gf_gld_mt_peer_hostname_t = gf_common_mt_end + 27,
+ gf_gld_mt_ifreq = gf_common_mt_end + 28,
+ gf_gld_mt_store_handle_t = gf_common_mt_end + 29,
+ gf_gld_mt_store_iter_t = gf_common_mt_end + 30,
+ gf_gld_mt_defrag_info = gf_common_mt_end + 31,
+ gf_gld_mt_log_filename_ctx_t = gf_common_mt_end + 32,
+ gf_gld_mt_log_locate_ctx_t = gf_common_mt_end + 33,
+ gf_gld_mt_log_rotate_ctx_t = gf_common_mt_end + 34,
+ gf_gld_mt_peerctx_t = gf_common_mt_end + 35,
+ gf_gld_mt_sm_tr_log_t = gf_common_mt_end + 36,
+ gf_gld_mt_pending_node_t = gf_common_mt_end + 37,
+ gf_gld_mt_brick_rsp_ctx_t = gf_common_mt_end + 38,
+ gf_gld_mt_mop_brick_req_t = gf_common_mt_end + 39,
+ gf_gld_mt_op_allack_ctx_t = gf_common_mt_end + 40,
+ gf_gld_mt_linearr = gf_common_mt_end + 41,
+ gf_gld_mt_linebuf = gf_common_mt_end + 42,
+ gf_gld_mt_mount_pattern = gf_common_mt_end + 43,
+ gf_gld_mt_mount_comp_container = gf_common_mt_end + 44,
+ gf_gld_mt_mount_component = gf_common_mt_end + 45,
+ gf_gld_mt_mount_spec = gf_common_mt_end + 46,
+ gf_gld_mt_georep_meet_spec = gf_common_mt_end + 47,
+ gf_gld_mt_nodesrv_t = gf_common_mt_end + 48,
+ gf_gld_mt_charptr = gf_common_mt_end + 49,
+ gf_gld_mt_hooks_stub_t = gf_common_mt_end + 50,
+ gf_gld_mt_hooks_priv_t = gf_common_mt_end + 51,
+ gf_gld_mt_mop_commit_req_t = gf_common_mt_end + 52,
+ gf_gld_mt_int = gf_common_mt_end + 53,
+ gf_gld_mt_snap_t = gf_common_mt_end + 54,
+ gf_gld_mt_missed_snapinfo_t = gf_common_mt_end + 55,
+ gf_gld_mt_end = gf_common_mt_end + 56,
+} gf_gld_mem_types_t;
+#endif
+
diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c b/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c
new file mode 100644
index 000000000..a5f38ce9c
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c
@@ -0,0 +1,924 @@
+/*
+ Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+/* rpc related syncops */
+#include "rpc-clnt.h"
+#include "protocol-common.h"
+#include "xdr-generic.h"
+#include "glusterd1-xdr.h"
+#include "glusterd-syncop.h"
+
+#include "glusterd.h"
+#include "glusterd-utils.h"
+#include "glusterd-locks.h"
+#include "glusterd-mgmt.h"
+#include "glusterd-op-sm.h"
+
+static int
+glusterd_mgmt_v3_null (rpcsvc_request_t *req)
+{
+ return 0;
+}
+
+static int
+glusterd_mgmt_v3_lock_send_resp (rpcsvc_request_t *req, int32_t status)
+{
+
+ gd1_mgmt_v3_lock_rsp rsp = {{0},};
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+
+ rsp.op_ret = status;
+ if (rsp.op_ret)
+ rsp.op_errno = errno;
+
+ glusterd_get_uuid (&rsp.uuid);
+
+ ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_v3_lock_rsp);
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Responded to mgmt_v3 lock, ret: %d", ret);
+
+ return ret;
+}
+
+static int
+glusterd_synctasked_mgmt_v3_lock (rpcsvc_request_t *req,
+ gd1_mgmt_v3_lock_req *lock_req,
+ glusterd_op_lock_ctx_t *ctx)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+ GF_ASSERT (ctx);
+ GF_ASSERT (ctx->dict);
+
+ /* Trying to acquire multiple mgmt_v3 locks */
+ ret = glusterd_multiple_mgmt_v3_lock (ctx->dict, ctx->uuid);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to acquire mgmt_v3 locks for %s",
+ uuid_utoa (ctx->uuid));
+
+ ret = glusterd_mgmt_v3_lock_send_resp (req, ret);
+
+ gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+}
+
+static int
+glusterd_op_state_machine_mgmt_v3_lock (rpcsvc_request_t *req,
+ gd1_mgmt_v3_lock_req *lock_req,
+ glusterd_op_lock_ctx_t *ctx)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_op_info_t txn_op_info = {{0},};
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+
+ glusterd_txn_opinfo_init (&txn_op_info, NULL, &lock_req->op,
+ ctx->dict, req);
+
+ ret = glusterd_set_txn_opinfo (&lock_req->txn_id, &txn_op_info);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to set transaction's opinfo");
+ goto out;
+ }
+
+ ret = glusterd_op_sm_inject_event (GD_OP_EVENT_LOCK,
+ &lock_req->txn_id, ctx);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to inject event GD_OP_EVENT_LOCK");
+
+out:
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+
+ gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+}
+
+static int
+glusterd_handle_mgmt_v3_lock_fn (rpcsvc_request_t *req)
+{
+ gd1_mgmt_v3_lock_req lock_req = {{0},};
+ int32_t ret = -1;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_op_lock_ctx_t *ctx = NULL;
+ xlator_t *this = NULL;
+ gf_boolean_t is_synctasked = _gf_false;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+
+ ret = xdr_to_generic (req->msg[0], &lock_req,
+ (xdrproc_t)xdr_gd1_mgmt_v3_lock_req);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to decode lock "
+ "request received from peer");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG, "Received mgmt_v3 lock req "
+ "from uuid: %s", uuid_utoa (lock_req.uuid));
+
+ if (glusterd_friend_find_by_uuid (lock_req.uuid, &peerinfo)) {
+ gf_log (this->name, GF_LOG_WARNING, "%s doesn't "
+ "belong to the cluster. Ignoring request.",
+ uuid_utoa (lock_req.uuid));
+ ret = -1;
+ goto out;
+ }
+
+ ctx = GF_CALLOC (1, sizeof (*ctx), gf_gld_mt_op_lock_ctx_t);
+ if (!ctx) {
+ ret = -1;
+ goto out;
+ }
+
+ uuid_copy (ctx->uuid, lock_req.uuid);
+ ctx->req = req;
+
+ ctx->dict = dict_new ();
+ if (!ctx->dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize (lock_req.dict.dict_val,
+ lock_req.dict.dict_len, &ctx->dict);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to unserialize the dictionary");
+ goto out;
+ }
+
+ is_synctasked = dict_get_str_boolean (ctx->dict,
+ "is_synctasked", _gf_false);
+ if (is_synctasked)
+ ret = glusterd_synctasked_mgmt_v3_lock (req, &lock_req, ctx);
+ else
+ ret = glusterd_op_state_machine_mgmt_v3_lock (req, &lock_req,
+ ctx);
+
+out:
+
+ if (ret) {
+ if (ctx->dict)
+ dict_unref (ctx->dict);
+ if (ctx)
+ GF_FREE (ctx);
+ }
+
+ free (lock_req.dict.dict_val);
+
+ gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+}
+
+static int
+glusterd_mgmt_v3_pre_validate_send_resp (rpcsvc_request_t *req,
+ int32_t op, int32_t status,
+ char *op_errstr, dict_t *rsp_dict)
+{
+ gd1_mgmt_v3_pre_val_rsp rsp = {{0},};
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+
+ rsp.op_ret = status;
+ glusterd_get_uuid (&rsp.uuid);
+ rsp.op = op;
+ if (op_errstr)
+ rsp.op_errstr = op_errstr;
+ else
+ rsp.op_errstr = "";
+
+ ret = dict_allocate_and_serialize (rsp_dict, &rsp.dict.dict_val,
+ &rsp.dict.dict_len);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to get serialized length of dict");
+ goto out;
+ }
+
+ ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_v3_pre_val_rsp);
+
+ GF_FREE (rsp.dict.dict_val);
+out:
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Responded to pre validation, ret: %d", ret);
+ return ret;
+}
+
+static int
+glusterd_handle_pre_validate_fn (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gd1_mgmt_v3_pre_val_req op_req = {{0},};
+ glusterd_peerinfo_t *peerinfo = NULL;
+ xlator_t *this = NULL;
+ char *op_errstr = NULL;
+ dict_t *dict = NULL;
+ dict_t *rsp_dict = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+
+ ret = xdr_to_generic (req->msg[0], &op_req,
+ (xdrproc_t)xdr_gd1_mgmt_v3_pre_val_req);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to decode pre validation "
+ "request received from peer");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (glusterd_friend_find_by_uuid (op_req.uuid, &peerinfo)) {
+ gf_log (this->name, GF_LOG_WARNING, "%s doesn't "
+ "belong to the cluster. Ignoring request.",
+ uuid_utoa (op_req.uuid));
+ ret = -1;
+ goto out;
+ }
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ ret = dict_unserialize (op_req.dict.dict_val,
+ op_req.dict.dict_len, &dict);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to unserialize the dictionary");
+ goto out;
+ }
+
+ rsp_dict = dict_new ();
+ if (!rsp_dict) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to get new dictionary");
+ return -1;
+ }
+
+ ret = gd_mgmt_v3_pre_validate_fn (op_req.op, dict, &op_errstr,
+ rsp_dict);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Pre Validation failed on operation %s",
+ gd_op_list[op_req.op]);
+ }
+
+ ret = glusterd_mgmt_v3_pre_validate_send_resp (req, op_req.op,
+ ret, op_errstr,
+ rsp_dict);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to send Pre Validation "
+ "response for operation %s",
+ gd_op_list[op_req.op]);
+ goto out;
+ }
+
+out:
+ if (op_errstr && (strcmp (op_errstr, "")))
+ GF_FREE (op_errstr);
+
+ free (op_req.dict.dict_val);
+
+ if (dict)
+ dict_unref (dict);
+
+ if (rsp_dict)
+ dict_unref (rsp_dict);
+
+ gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+}
+
+static int
+glusterd_mgmt_v3_brick_op_send_resp (rpcsvc_request_t *req,
+ int32_t op, int32_t status,
+ char *op_errstr, dict_t *rsp_dict)
+{
+ gd1_mgmt_v3_brick_op_rsp rsp = {{0},};
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+
+ rsp.op_ret = status;
+ glusterd_get_uuid (&rsp.uuid);
+ rsp.op = op;
+ if (op_errstr)
+ rsp.op_errstr = op_errstr;
+ else
+ rsp.op_errstr = "";
+
+ ret = dict_allocate_and_serialize (rsp_dict, &rsp.dict.dict_val,
+ &rsp.dict.dict_len);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to get serialized length of dict");
+ goto out;
+ }
+
+ ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_v3_brick_op_rsp);
+
+ GF_FREE (rsp.dict.dict_val);
+out:
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Responded to brick op, ret: %d", ret);
+ return ret;
+}
+
+static int
+glusterd_handle_brick_op_fn (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gd1_mgmt_v3_brick_op_req op_req = {{0},};
+ glusterd_peerinfo_t *peerinfo = NULL;
+ xlator_t *this = NULL;
+ char *op_errstr = NULL;
+ dict_t *dict = NULL;
+ dict_t *rsp_dict = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+
+ ret = xdr_to_generic (req->msg[0], &op_req,
+ (xdrproc_t)xdr_gd1_mgmt_v3_brick_op_req);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to decode brick op "
+ "request received from peer");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (glusterd_friend_find_by_uuid (op_req.uuid, &peerinfo)) {
+ gf_log (this->name, GF_LOG_WARNING, "%s doesn't "
+ "belong to the cluster. Ignoring request.",
+ uuid_utoa (op_req.uuid));
+ ret = -1;
+ goto out;
+ }
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ ret = dict_unserialize (op_req.dict.dict_val,
+ op_req.dict.dict_len, &dict);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to unserialize the dictionary");
+ goto out;
+ }
+
+ rsp_dict = dict_new ();
+ if (!rsp_dict) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to get new dictionary");
+ return -1;
+ }
+
+ ret = gd_mgmt_v3_brick_op_fn (op_req.op, dict, &op_errstr,
+ rsp_dict);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Brick Op failed on operation %s",
+ gd_op_list[op_req.op]);
+ }
+
+ ret = glusterd_mgmt_v3_brick_op_send_resp (req, op_req.op,
+ ret, op_errstr,
+ rsp_dict);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to send brick op "
+ "response for operation %s",
+ gd_op_list[op_req.op]);
+ goto out;
+ }
+
+out:
+ if (op_errstr && (strcmp (op_errstr, "")))
+ GF_FREE (op_errstr);
+
+ free (op_req.dict.dict_val);
+
+ if (dict)
+ dict_unref (dict);
+
+ if (rsp_dict)
+ dict_unref (rsp_dict);
+
+ gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+}
+
+static int
+glusterd_mgmt_v3_commit_send_resp (rpcsvc_request_t *req,
+ int32_t op, int32_t status,
+ char *op_errstr, dict_t *rsp_dict)
+{
+ gd1_mgmt_v3_commit_rsp rsp = {{0},};
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+
+ rsp.op_ret = status;
+ glusterd_get_uuid (&rsp.uuid);
+ rsp.op = op;
+ if (op_errstr)
+ rsp.op_errstr = op_errstr;
+ else
+ rsp.op_errstr = "";
+
+ ret = dict_allocate_and_serialize (rsp_dict, &rsp.dict.dict_val,
+ &rsp.dict.dict_len);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to get serialized length of dict");
+ goto out;
+ }
+
+ ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_v3_commit_rsp);
+
+ GF_FREE (rsp.dict.dict_val);
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "Responded to commit, ret: %d", ret);
+ return ret;
+}
+
+static int
+glusterd_handle_commit_fn (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gd1_mgmt_v3_commit_req op_req = {{0},};
+ glusterd_peerinfo_t *peerinfo = NULL;
+ xlator_t *this = NULL;
+ char *op_errstr = NULL;
+ dict_t *dict = NULL;
+ dict_t *rsp_dict = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+
+ ret = xdr_to_generic (req->msg[0], &op_req,
+ (xdrproc_t)xdr_gd1_mgmt_v3_commit_req);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to decode commit "
+ "request received from peer");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (glusterd_friend_find_by_uuid (op_req.uuid, &peerinfo)) {
+ gf_log (this->name, GF_LOG_WARNING, "%s doesn't "
+ "belong to the cluster. Ignoring request.",
+ uuid_utoa (op_req.uuid));
+ ret = -1;
+ goto out;
+ }
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ ret = dict_unserialize (op_req.dict.dict_val,
+ op_req.dict.dict_len, &dict);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to unserialize the dictionary");
+ goto out;
+ }
+
+ rsp_dict = dict_new ();
+ if (!rsp_dict) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to get new dictionary");
+ return -1;
+ }
+
+ ret = gd_mgmt_v3_commit_fn (op_req.op, dict, &op_errstr,
+ rsp_dict);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "commit failed on operation %s",
+ gd_op_list[op_req.op]);
+ }
+
+ ret = glusterd_mgmt_v3_commit_send_resp (req, op_req.op,
+ ret, op_errstr,
+ rsp_dict);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to send commit "
+ "response for operation %s",
+ gd_op_list[op_req.op]);
+ goto out;
+ }
+
+out:
+ if (op_errstr && (strcmp (op_errstr, "")))
+ GF_FREE (op_errstr);
+
+ free (op_req.dict.dict_val);
+
+ if (dict)
+ dict_unref (dict);
+
+ if (rsp_dict)
+ dict_unref (rsp_dict);
+
+ gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+}
+
+static int
+glusterd_mgmt_v3_post_validate_send_resp (rpcsvc_request_t *req,
+ int32_t op, int32_t status,
+ char *op_errstr, dict_t *rsp_dict)
+{
+ gd1_mgmt_v3_post_val_rsp rsp = {{0},};
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+
+ rsp.op_ret = status;
+ glusterd_get_uuid (&rsp.uuid);
+ rsp.op = op;
+ if (op_errstr)
+ rsp.op_errstr = op_errstr;
+ else
+ rsp.op_errstr = "";
+
+ ret = dict_allocate_and_serialize (rsp_dict, &rsp.dict.dict_val,
+ &rsp.dict.dict_len);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to get serialized length of dict");
+ goto out;
+ }
+
+ ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_v3_post_val_rsp);
+
+ GF_FREE (rsp.dict.dict_val);
+out:
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Responded to post validation, ret: %d", ret);
+ return ret;
+}
+
+static int
+glusterd_handle_post_validate_fn (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gd1_mgmt_v3_post_val_req op_req = {{0},};
+ glusterd_peerinfo_t *peerinfo = NULL;
+ xlator_t *this = NULL;
+ char *op_errstr = NULL;
+ dict_t *dict = NULL;
+ dict_t *rsp_dict = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+
+ ret = xdr_to_generic (req->msg[0], &op_req,
+ (xdrproc_t)xdr_gd1_mgmt_v3_post_val_req);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to decode post validation "
+ "request received from peer");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (glusterd_friend_find_by_uuid (op_req.uuid, &peerinfo)) {
+ gf_log (this->name, GF_LOG_WARNING, "%s doesn't "
+ "belong to the cluster. Ignoring request.",
+ uuid_utoa (op_req.uuid));
+ ret = -1;
+ goto out;
+ }
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ ret = dict_unserialize (op_req.dict.dict_val,
+ op_req.dict.dict_len, &dict);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to unserialize the dictionary");
+ goto out;
+ }
+
+ rsp_dict = dict_new ();
+ if (!rsp_dict) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to get new dictionary");
+ return -1;
+ }
+
+ ret = gd_mgmt_v3_post_validate_fn (op_req.op, op_req.op_ret, dict,
+ &op_errstr, rsp_dict);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Post Validation failed on operation %s",
+ gd_op_list[op_req.op]);
+ }
+
+ ret = glusterd_mgmt_v3_post_validate_send_resp (req, op_req.op,
+ ret, op_errstr,
+ rsp_dict);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to send Post Validation "
+ "response for operation %s",
+ gd_op_list[op_req.op]);
+ goto out;
+ }
+
+out:
+ if (op_errstr && (strcmp (op_errstr, "")))
+ GF_FREE (op_errstr);
+
+ free (op_req.dict.dict_val);
+
+ if (dict)
+ dict_unref (dict);
+
+ if (rsp_dict)
+ dict_unref (rsp_dict);
+
+ gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+}
+
+static int
+glusterd_mgmt_v3_unlock_send_resp (rpcsvc_request_t *req, int32_t status)
+{
+
+ gd1_mgmt_v3_unlock_rsp rsp = {{0},};
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+
+ rsp.op_ret = status;
+ if (rsp.op_ret)
+ rsp.op_errno = errno;
+
+ glusterd_get_uuid (&rsp.uuid);
+
+ ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_v3_unlock_rsp);
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Responded to mgmt_v3 unlock, ret: %d", ret);
+
+ return ret;
+}
+
+static int
+glusterd_synctasked_mgmt_v3_unlock (rpcsvc_request_t *req,
+ gd1_mgmt_v3_unlock_req *unlock_req,
+ glusterd_op_lock_ctx_t *ctx)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+ GF_ASSERT (ctx);
+
+ /* Trying to release multiple mgmt_v3 locks */
+ ret = glusterd_multiple_mgmt_v3_unlock (ctx->dict, ctx->uuid);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to release mgmt_v3 locks for %s",
+ uuid_utoa(ctx->uuid));
+ }
+
+ ret = glusterd_mgmt_v3_unlock_send_resp (req, ret);
+
+ gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+}
+
+
+static int
+glusterd_op_state_machine_mgmt_v3_unlock (rpcsvc_request_t *req,
+ gd1_mgmt_v3_unlock_req *lock_req,
+ glusterd_op_lock_ctx_t *ctx)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+
+ ret = glusterd_op_sm_inject_event (GD_OP_EVENT_UNLOCK,
+ &lock_req->txn_id, ctx);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to inject event GD_OP_EVENT_UNLOCK");
+
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+
+ gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+}
+
+static int
+glusterd_handle_mgmt_v3_unlock_fn (rpcsvc_request_t *req)
+{
+ gd1_mgmt_v3_unlock_req lock_req = {{0},};
+ int32_t ret = -1;
+ glusterd_op_lock_ctx_t *ctx = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ xlator_t *this = NULL;
+ gf_boolean_t is_synctasked = _gf_false;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+
+ ret = xdr_to_generic (req->msg[0], &lock_req,
+ (xdrproc_t)xdr_gd1_mgmt_v3_unlock_req);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to decode unlock "
+ "request received from peer");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG, "Received mgmt_v3 unlock req "
+ "from uuid: %s", uuid_utoa (lock_req.uuid));
+
+ if (glusterd_friend_find_by_uuid (lock_req.uuid, &peerinfo)) {
+ gf_log (this->name, GF_LOG_WARNING, "%s doesn't "
+ "belong to the cluster. Ignoring request.",
+ uuid_utoa (lock_req.uuid));
+ ret = -1;
+ goto out;
+ }
+
+ ctx = GF_CALLOC (1, sizeof (*ctx), gf_gld_mt_op_lock_ctx_t);
+ if (!ctx) {
+ ret = -1;
+ goto out;
+ }
+
+ uuid_copy (ctx->uuid, lock_req.uuid);
+ ctx->req = req;
+
+ ctx->dict = dict_new ();
+ if (!ctx->dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize (lock_req.dict.dict_val,
+ lock_req.dict.dict_len, &ctx->dict);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to unserialize the dictionary");
+ goto out;
+ }
+
+ is_synctasked = dict_get_str_boolean (ctx->dict,
+ "is_synctasked", _gf_false);
+ if (is_synctasked)
+ ret = glusterd_synctasked_mgmt_v3_unlock (req, &lock_req, ctx);
+ else
+ ret = glusterd_op_state_machine_mgmt_v3_unlock (req, &lock_req,
+ ctx);
+
+out:
+
+ if (ret) {
+ if (ctx->dict)
+ dict_unref (ctx->dict);
+ if (ctx)
+ GF_FREE (ctx);
+ }
+
+ free (lock_req.dict.dict_val);
+
+ gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_handle_mgmt_v3_lock (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ glusterd_handle_mgmt_v3_lock_fn);
+}
+
+static int
+glusterd_handle_pre_validate (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ glusterd_handle_pre_validate_fn);
+}
+
+static int
+glusterd_handle_brick_op (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ glusterd_handle_brick_op_fn);
+}
+
+static int
+glusterd_handle_commit (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ glusterd_handle_commit_fn);
+}
+
+static int
+glusterd_handle_post_validate (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ glusterd_handle_post_validate_fn);
+}
+
+int
+glusterd_handle_mgmt_v3_unlock (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ glusterd_handle_mgmt_v3_unlock_fn);
+}
+
+rpcsvc_actor_t gd_svc_mgmt_v3_actors[] = {
+ [GLUSTERD_MGMT_V3_NULL] = { "NULL", GLUSTERD_MGMT_V3_NULL, glusterd_mgmt_v3_null, NULL, 0, DRC_NA},
+ [GLUSTERD_MGMT_V3_LOCK] = { "MGMT_V3_LOCK", GLUSTERD_MGMT_V3_LOCK, glusterd_handle_mgmt_v3_lock, NULL, 0, DRC_NA},
+ [GLUSTERD_MGMT_V3_PRE_VALIDATE] = { "PRE_VAL", GLUSTERD_MGMT_V3_PRE_VALIDATE, glusterd_handle_pre_validate, NULL, 0, DRC_NA},
+ [GLUSTERD_MGMT_V3_BRICK_OP] = { "BRCK_OP", GLUSTERD_MGMT_V3_BRICK_OP, glusterd_handle_brick_op, NULL, 0, DRC_NA},
+ [GLUSTERD_MGMT_V3_COMMIT] = { "COMMIT", GLUSTERD_MGMT_V3_COMMIT, glusterd_handle_commit, NULL, 0, DRC_NA},
+ [GLUSTERD_MGMT_V3_POST_VALIDATE] = { "POST_VAL", GLUSTERD_MGMT_V3_POST_VALIDATE, glusterd_handle_post_validate, NULL, 0, DRC_NA},
+ [GLUSTERD_MGMT_V3_UNLOCK] = { "MGMT_V3_UNLOCK", GLUSTERD_MGMT_V3_UNLOCK, glusterd_handle_mgmt_v3_unlock, NULL, 0, DRC_NA},
+};
+
+struct rpcsvc_program gd_svc_mgmt_v3_prog = {
+ .progname = "GlusterD svc mgmt v3",
+ .prognum = GD_MGMT_V3_PROGRAM,
+ .progver = GD_MGMT_V3_VERSION,
+ .numactors = GLUSTERD_MGMT_V3_MAXVALUE,
+ .actors = gd_svc_mgmt_v3_actors,
+ .synctask = _gf_true,
+};
diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-mgmt.c
new file mode 100644
index 000000000..d52532e54
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.c
@@ -0,0 +1,1893 @@
+/*
+ Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+/* rpc related syncops */
+#include "rpc-clnt.h"
+#include "protocol-common.h"
+#include "xdr-generic.h"
+#include "glusterd1-xdr.h"
+#include "glusterd-syncop.h"
+
+#include "glusterd.h"
+#include "glusterd-utils.h"
+#include "glusterd-locks.h"
+#include "glusterd-mgmt.h"
+#include "glusterd-op-sm.h"
+
+extern struct rpc_clnt_program gd_mgmt_v3_prog;
+
+
+static void
+gd_mgmt_v3_collate_errors (struct syncargs *args, int op_ret, int op_errno,
+ char *op_errstr, int op_code,
+ glusterd_peerinfo_t *peerinfo, u_char *uuid)
+{
+ char *peer_str = NULL;
+ char err_str[PATH_MAX] = "Please check log file for details.";
+ char op_err[PATH_MAX] = "";
+ int32_t len = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (args);
+ GF_ASSERT (uuid);
+
+ if (op_ret) {
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+
+ if (peerinfo)
+ peer_str = peerinfo->hostname;
+ else
+ peer_str = uuid_utoa (uuid);
+
+ if (op_errstr && strcmp (op_errstr, "")) {
+ len = snprintf (err_str, sizeof(err_str) - 1,
+ "Error: %s", op_errstr);
+ err_str[len] = '\0';
+ }
+
+ switch (op_code){
+ case GLUSTERD_MGMT_V3_LOCK:
+ {
+ len = snprintf (op_err, sizeof(op_err) - 1,
+ "Locking failed "
+ "on %s. %s", peer_str, err_str);
+ break;
+ }
+ case GLUSTERD_MGMT_V3_PRE_VALIDATE:
+ {
+ len = snprintf (op_err, sizeof(op_err) - 1,
+ "Pre Validation failed "
+ "on %s. %s", peer_str, err_str);
+ break;
+ }
+ case GLUSTERD_MGMT_V3_BRICK_OP:
+ {
+ len = snprintf (op_err, sizeof(op_err) - 1,
+ "Brick ops failed "
+ "on %s. %s", peer_str, err_str);
+ break;
+ }
+ case GLUSTERD_MGMT_V3_COMMIT:
+ {
+ len = snprintf (op_err, sizeof(op_err) - 1,
+ "Commit failed on %s. %s",
+ peer_str, err_str);
+ break;
+ }
+ case GLUSTERD_MGMT_V3_POST_VALIDATE:
+ {
+ len = snprintf (op_err, sizeof(op_err) - 1,
+ "Post Validation failed "
+ "on %s. %s", peer_str, err_str);
+ break;
+ }
+ case GLUSTERD_MGMT_V3_UNLOCK:
+ {
+ len = snprintf (op_err, sizeof(op_err) - 1,
+ "Unlocking failed "
+ "on %s. %s", peer_str, err_str);
+ break;
+ }
+ }
+ op_err[len] = '\0';
+
+ if (args->errstr) {
+ len = snprintf (err_str, sizeof(err_str) - 1,
+ "%s\n%s", args->errstr,
+ op_err);
+ GF_FREE (args->errstr);
+ args->errstr = NULL;
+ } else
+ len = snprintf (err_str, sizeof(err_str) - 1,
+ "%s", op_err);
+ err_str[len] = '\0';
+
+ gf_log (this->name, GF_LOG_ERROR, "%s", op_err);
+ args->errstr = gf_strdup (err_str);
+ }
+
+ return;
+}
+
+int32_t
+gd_mgmt_v3_pre_validate_fn (glusterd_op_t op, dict_t *dict,
+ char **op_errstr, dict_t *rsp_dict)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (op_errstr);
+ GF_ASSERT (rsp_dict);
+
+ switch (op) {
+ case GD_OP_SNAP:
+ {
+ ret = glusterd_snapshot_prevalidate (dict, op_errstr,
+ rsp_dict);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Snapshot Prevalidate Failed");
+ goto out;
+ }
+
+ break;
+ }
+ default:
+ break;
+ }
+
+ ret = 0;
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "OP = %d. Returning %d", op, ret);
+ return ret;
+}
+
+int32_t
+gd_mgmt_v3_brick_op_fn (glusterd_op_t op, dict_t *dict,
+ char **op_errstr, dict_t *rsp_dict)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (op_errstr);
+ GF_ASSERT (rsp_dict);
+
+ switch (op) {
+ case GD_OP_SNAP:
+ {
+ ret = glusterd_snapshot_brickop (dict, op_errstr, rsp_dict);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "snapshot brickop "
+ "failed");
+ goto out;
+ }
+ break;
+ }
+ default:
+ break;
+ }
+
+ ret = 0;
+out:
+ gf_log (this->name, GF_LOG_TRACE, "OP = %d. Returning %d", op, ret);
+ return ret;
+}
+
+int32_t
+gd_mgmt_v3_commit_fn (glusterd_op_t op, dict_t *dict,
+ char **op_errstr, dict_t *rsp_dict)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (op_errstr);
+ GF_ASSERT (rsp_dict);
+
+ switch (op) {
+ case GD_OP_SNAP:
+ {
+ ret = glusterd_snapshot (dict, op_errstr, rsp_dict);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Snapshot Commit Failed");
+ goto out;
+ }
+ break;
+ }
+ default:
+ break;
+ }
+
+ ret = 0;
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "OP = %d. Returning %d", op, ret);
+ return ret;
+}
+
+int32_t
+gd_mgmt_v3_post_validate_fn (glusterd_op_t op, int32_t op_ret, dict_t *dict,
+ char **op_errstr, dict_t *rsp_dict)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (op_errstr);
+ GF_ASSERT (rsp_dict);
+
+ switch (op) {
+ case GD_OP_SNAP:
+ {
+ ret = glusterd_snapshot_postvalidate (dict, op_ret,
+ op_errstr,
+ rsp_dict);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "postvalidate operation failed");
+ goto out;
+ }
+ break;
+ }
+ default:
+ break;
+ }
+
+ ret = 0;
+
+out:
+ gf_log (this->name, GF_LOG_TRACE, "OP = %d. Returning %d", op, ret);
+ return ret;
+}
+
+int32_t
+gd_mgmt_v3_lock_cbk_fn (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int32_t ret = -1;
+ struct syncargs *args = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ gd1_mgmt_v3_lock_rsp rsp = {{0},};
+ call_frame_t *frame = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+ GF_ASSERT (myframe);
+
+ /* Even though the lock command has failed, while collating the errors
+ (gd_mgmt_v3_collate_errors), args->op_ret and args->op_errno will be
+ used. @args is obtained from frame->local. So before checking the
+ status of the request and going out if its a failure, args should be
+ set to frame->local. Otherwise, while collating args will be NULL.
+ This applies to other phases such as prevalidate, brickop, commit and
+ postvalidate also.
+ */
+ frame = myframe;
+ args = frame->local;
+ peerinfo = frame->cookie;
+ frame->local = NULL;
+ frame->cookie = NULL;
+
+ if (-1 == req->rpc_status) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ if (!iov) {
+ gf_log (this->name, GF_LOG_ERROR, "iov is NULL");
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp,
+ (xdrproc_t)xdr_gd1_mgmt_v3_lock_rsp);
+ if (ret < 0)
+ goto out;
+
+ uuid_copy (args->uuid, rsp.uuid);
+
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+
+out:
+ gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL,
+ GLUSTERD_MGMT_V3_LOCK,
+ peerinfo, rsp.uuid);
+ free (rsp.dict.dict_val);
+ STACK_DESTROY (frame->root);
+ synctask_barrier_wake(args);
+ return 0;
+}
+
+int32_t
+gd_mgmt_v3_lock_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ gd_mgmt_v3_lock_cbk_fn);
+}
+
+int
+gd_mgmt_v3_lock (glusterd_op_t op, dict_t *op_ctx,
+ glusterd_peerinfo_t *peerinfo,
+ struct syncargs *args, uuid_t my_uuid,
+ uuid_t recv_uuid)
+{
+ gd1_mgmt_v3_lock_req req = {{0},};
+ glusterd_conf_t *conf = THIS->private;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (op_ctx);
+ GF_ASSERT (peerinfo);
+ GF_ASSERT (args);
+
+ ret = dict_allocate_and_serialize (op_ctx,
+ &req.dict.dict_val,
+ &req.dict.dict_len);
+ if (ret)
+ goto out;
+
+ uuid_copy (req.uuid, my_uuid);
+ req.op = op;
+ synclock_unlock (&conf->big_lock);
+ ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo,
+ &gd_mgmt_v3_prog,
+ GLUSTERD_MGMT_V3_LOCK,
+ gd_mgmt_v3_lock_cbk,
+ (xdrproc_t) xdr_gd1_mgmt_v3_lock_req);
+ synclock_lock (&conf->big_lock);
+out:
+ GF_FREE (req.dict.dict_val);
+ gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_mgmt_v3_initiate_lockdown (glusterd_conf_t *conf, glusterd_op_t op,
+ dict_t *dict, char **op_errstr, int npeers,
+ gf_boolean_t *is_acquired)
+{
+ char *volname = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ int32_t ret = -1;
+ int32_t peer_cnt = 0;
+ struct syncargs args = {0};
+ struct list_head *peers = NULL;
+ uuid_t peer_uuid = {0};
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (conf);
+ GF_ASSERT (dict);
+ GF_ASSERT (op_errstr);
+ GF_ASSERT (is_acquired);
+
+ peers = &conf->xaction_peers;
+
+ /* Trying to acquire multiple mgmt_v3 locks on local node */
+ ret = glusterd_multiple_mgmt_v3_lock (dict, MY_UUID);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to acquire mgmt_v3 locks on localhost");
+ goto out;
+ }
+
+ *is_acquired = _gf_true;
+
+ if (!npeers) {
+ ret = 0;
+ goto out;
+ }
+
+ /* Sending mgmt_v3 lock req to other nodes in the cluster */
+ gd_syncargs_init (&args, NULL);
+ synctask_barrier_init((&args));
+ peer_cnt = 0;
+ list_for_each_entry (peerinfo, peers, op_peers_list) {
+ gd_mgmt_v3_lock (op, dict, peerinfo, &args,
+ MY_UUID, peer_uuid);
+ peer_cnt++;
+ }
+ gd_synctask_barrier_wait((&args), peer_cnt);
+
+ if (args.errstr)
+ *op_errstr = gf_strdup (args.errstr);
+
+ ret = args.op_ret;
+
+ gf_log (this->name, GF_LOG_DEBUG, "Sent lock op req for %s "
+ "to %d peers. Returning %d", gd_op_list[op], peer_cnt, ret);
+out:
+ if (ret) {
+ if (*op_errstr)
+ gf_log (this->name, GF_LOG_ERROR, "%s",
+ *op_errstr);
+
+ if (volname)
+ ret = gf_asprintf (op_errstr,
+ "Another transaction is in progress "
+ "for %s. Please try again after "
+ "sometime.", volname);
+ else
+ ret = gf_asprintf (op_errstr,
+ "Another transaction is in progress "
+ "Please try again after sometime.");
+
+ if (ret == -1)
+ *op_errstr = NULL;
+
+ ret = -1;
+ }
+
+ return ret;
+}
+
+int
+glusterd_pre_validate_aggr_rsp_dict (glusterd_op_t op, dict_t *aggr, dict_t *rsp)
+{
+ int32_t ret = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (aggr);
+ GF_ASSERT (rsp);
+
+ switch (op) {
+ case GD_OP_SNAP:
+ ret = glusterd_snap_pre_validate_use_rsp_dict (aggr, rsp);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to aggregate prevalidate "
+ "response dictionaries.");
+ goto out;
+ }
+ break;
+ default:
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR, "Invalid op (%s)", gd_op_list[op]);
+
+ break;
+ }
+out:
+ return ret;
+}
+
+int32_t
+gd_mgmt_v3_pre_validate_cbk_fn (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int32_t ret = -1;
+ struct syncargs *args = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ gd1_mgmt_v3_pre_val_rsp rsp = {{0},};
+ call_frame_t *frame = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = -1;
+ dict_t *rsp_dict = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+ GF_ASSERT (myframe);
+
+ frame = myframe;
+ args = frame->local;
+ peerinfo = frame->cookie;
+ frame->local = NULL;
+ frame->cookie = NULL;
+
+ if (-1 == req->rpc_status) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ if (!iov) {
+ gf_log (this->name, GF_LOG_ERROR, "iov is NULL");
+ op_errno = EINVAL;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp,
+ (xdrproc_t)xdr_gd1_mgmt_v3_pre_val_rsp);
+ if (ret < 0)
+ goto out;
+
+ if (rsp.dict.dict_len) {
+ /* Unserialize the dictionary */
+ rsp_dict = dict_new ();
+
+ ret = dict_unserialize (rsp.dict.dict_val,
+ rsp.dict.dict_len,
+ &rsp_dict);
+ if (ret < 0) {
+ GF_FREE (rsp.dict.dict_val);
+ goto out;
+ } else {
+ rsp_dict->extra_stdfree = rsp.dict.dict_val;
+ }
+ }
+
+ uuid_copy (args->uuid, rsp.uuid);
+ pthread_mutex_lock (&args->lock_dict);
+ {
+ ret = glusterd_pre_validate_aggr_rsp_dict (rsp.op, args->dict,
+ rsp_dict);
+ }
+ pthread_mutex_unlock (&args->lock_dict);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s",
+ "Failed to aggregate response from "
+ " node/brick");
+ if (!rsp.op_ret)
+ op_ret = ret;
+ else {
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+ }
+ } else {
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+ }
+
+out:
+ if (rsp_dict)
+ dict_unref (rsp_dict);
+
+ gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL,
+ GLUSTERD_MGMT_V3_PRE_VALIDATE,
+ peerinfo, rsp.uuid);
+
+ if (rsp.op_errstr)
+ free (rsp.op_errstr);
+
+ STACK_DESTROY (frame->root);
+ synctask_barrier_wake(args);
+ return 0;
+}
+
+int32_t
+gd_mgmt_v3_pre_validate_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ gd_mgmt_v3_pre_validate_cbk_fn);
+}
+
+int
+gd_mgmt_v3_pre_validate_req (glusterd_op_t op, dict_t *op_ctx,
+ glusterd_peerinfo_t *peerinfo,
+ struct syncargs *args, uuid_t my_uuid,
+ uuid_t recv_uuid)
+{
+ int32_t ret = -1;
+ gd1_mgmt_v3_pre_val_req req = {{0},};
+ glusterd_conf_t *conf = THIS->private;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (op_ctx);
+ GF_ASSERT (peerinfo);
+ GF_ASSERT (args);
+
+ ret = dict_allocate_and_serialize (op_ctx,
+ &req.dict.dict_val,
+ &req.dict.dict_len);
+ if (ret)
+ goto out;
+
+ uuid_copy (req.uuid, my_uuid);
+ req.op = op;
+ synclock_unlock (&conf->big_lock);
+ ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo,
+ &gd_mgmt_v3_prog,
+ GLUSTERD_MGMT_V3_PRE_VALIDATE,
+ gd_mgmt_v3_pre_validate_cbk,
+ (xdrproc_t) xdr_gd1_mgmt_v3_pre_val_req);
+ synclock_lock (&conf->big_lock);
+out:
+ GF_FREE (req.dict.dict_val);
+ gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_mgmt_v3_pre_validate (glusterd_conf_t *conf, glusterd_op_t op,
+ dict_t *req_dict, char **op_errstr, int npeers)
+{
+ int32_t ret = -1;
+ int32_t peer_cnt = 0;
+ dict_t *rsp_dict = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ struct syncargs args = {0};
+ struct list_head *peers = NULL;
+ uuid_t peer_uuid = {0};
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (conf);
+ GF_ASSERT (req_dict);
+ GF_ASSERT (op_errstr);
+
+ peers = &conf->xaction_peers;
+
+ rsp_dict = dict_new ();
+ if (!rsp_dict) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to create response dictionary");
+ goto out;
+ }
+
+ /* Pre Validation on local node */
+ ret = gd_mgmt_v3_pre_validate_fn (op, req_dict, op_errstr,
+ rsp_dict);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Pre Validation failed for "
+ "operation %s on local node",
+ gd_op_list[op]);
+
+ if (*op_errstr == NULL) {
+ ret = gf_asprintf (op_errstr,
+ "Pre-validation failed "
+ "on localhost. Please "
+ "check log file for details");
+ if (ret == -1)
+ *op_errstr = NULL;
+
+ ret = -1;
+ }
+ goto out;
+ }
+
+ ret = glusterd_pre_validate_aggr_rsp_dict (op, req_dict,
+ rsp_dict);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s",
+ "Failed to aggregate response from "
+ " node/brick");
+ goto out;
+ }
+
+ dict_unref (rsp_dict);
+ rsp_dict = NULL;
+
+ if (!npeers) {
+ ret = 0;
+ goto out;
+ }
+
+ /* Sending Pre Validation req to other nodes in the cluster */
+ gd_syncargs_init (&args, req_dict);
+ synctask_barrier_init((&args));
+ peer_cnt = 0;
+ list_for_each_entry (peerinfo, peers, op_peers_list) {
+ gd_mgmt_v3_pre_validate_req (op, req_dict, peerinfo, &args,
+ MY_UUID, peer_uuid);
+ peer_cnt++;
+ }
+ gd_synctask_barrier_wait((&args), peer_cnt);
+
+ if (args.op_ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Pre Validation failed on peers");
+
+ if (args.errstr)
+ *op_errstr = gf_strdup (args.errstr);
+ }
+
+ ret = args.op_ret;
+
+ gf_log (this->name, GF_LOG_DEBUG, "Sent pre valaidation req for %s "
+ "to %d peers. Returning %d", gd_op_list[op], peer_cnt, ret);
+out:
+ return ret;
+}
+
+int
+glusterd_mgmt_v3_build_payload (dict_t **req, char **op_errstr, dict_t *dict,
+ glusterd_op_t op)
+{
+ int32_t ret = -1;
+ dict_t *req_dict = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+ GF_ASSERT (op_errstr);
+ GF_ASSERT (dict);
+
+ req_dict = dict_new ();
+ if (!req_dict)
+ goto out;
+
+ switch (op) {
+ case GD_OP_SNAP:
+ dict_copy (dict, req_dict);
+ break;
+ default:
+ break;
+ }
+
+ *req = req_dict;
+ ret = 0;
+out:
+ return ret;
+}
+
+int32_t
+gd_mgmt_v3_brick_op_cbk_fn (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int32_t ret = -1;
+ struct syncargs *args = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ gd1_mgmt_v3_brick_op_rsp rsp = {{0},};
+ call_frame_t *frame = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+ GF_ASSERT (myframe);
+
+ frame = myframe;
+ args = frame->local;
+ peerinfo = frame->cookie;
+ frame->local = NULL;
+ frame->cookie = NULL;
+
+ /* If the operation failed, then iov can be NULL. So better check the
+ status of the operation and then worry about iov (if the status of
+ the command is success)
+ */
+ if (-1 == req->rpc_status) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ if (!iov) {
+ gf_log (this->name, GF_LOG_ERROR, "iov is NULL");
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp,
+ (xdrproc_t)xdr_gd1_mgmt_v3_brick_op_rsp);
+ if (ret < 0)
+ goto out;
+
+ uuid_copy (args->uuid, rsp.uuid);
+
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+
+out:
+ gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL,
+ GLUSTERD_MGMT_V3_BRICK_OP,
+ peerinfo, rsp.uuid);
+
+ if (rsp.op_errstr)
+ free (rsp.op_errstr);
+
+ free (rsp.dict.dict_val);
+
+ STACK_DESTROY (frame->root);
+ synctask_barrier_wake(args);
+ return 0;
+}
+
+int32_t
+gd_mgmt_v3_brick_op_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ gd_mgmt_v3_brick_op_cbk_fn);
+}
+
+int
+gd_mgmt_v3_brick_op_req (glusterd_op_t op, dict_t *op_ctx,
+ glusterd_peerinfo_t *peerinfo,
+ struct syncargs *args, uuid_t my_uuid,
+ uuid_t recv_uuid)
+{
+ int32_t ret = -1;
+ gd1_mgmt_v3_brick_op_req req = {{0},};
+ glusterd_conf_t *conf = THIS->private;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (op_ctx);
+ GF_ASSERT (peerinfo);
+ GF_ASSERT (args);
+
+ ret = dict_allocate_and_serialize (op_ctx,
+ &req.dict.dict_val,
+ &req.dict.dict_len);
+ if (ret)
+ goto out;
+
+ uuid_copy (req.uuid, my_uuid);
+ req.op = op;
+ synclock_unlock (&conf->big_lock);
+ ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo,
+ &gd_mgmt_v3_prog,
+ GLUSTERD_MGMT_V3_BRICK_OP,
+ gd_mgmt_v3_brick_op_cbk,
+ (xdrproc_t) xdr_gd1_mgmt_v3_brick_op_req);
+ synclock_lock (&conf->big_lock);
+out:
+ GF_FREE (req.dict.dict_val);
+ gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_mgmt_v3_brick_op (glusterd_conf_t *conf, glusterd_op_t op,
+ dict_t *req_dict, char **op_errstr, int npeers)
+{
+ int32_t ret = -1;
+ int32_t peer_cnt = 0;
+ dict_t *rsp_dict = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ struct syncargs args = {0};
+ struct list_head *peers = NULL;
+ uuid_t peer_uuid = {0};
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (conf);
+ GF_ASSERT (req_dict);
+ GF_ASSERT (op_errstr);
+
+ peers = &conf->xaction_peers;
+
+ rsp_dict = dict_new ();
+ if (!rsp_dict) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to create response dictionary");
+ goto out;
+ }
+
+ /* Perform brick op on local node */
+ ret = gd_mgmt_v3_brick_op_fn (op, req_dict, op_errstr,
+ rsp_dict);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Brick ops failed for "
+ "operation %s on local node",
+ gd_op_list[op]);
+
+ if (*op_errstr == NULL) {
+ ret = gf_asprintf (op_errstr,
+ "Brick ops failed "
+ "on localhost. Please "
+ "check log file for details");
+ if (ret == -1)
+ *op_errstr = NULL;
+
+ ret = -1;
+ }
+ goto out;
+ }
+
+ dict_unref (rsp_dict);
+ rsp_dict = NULL;
+
+ if (!npeers) {
+ ret = 0;
+ goto out;
+ }
+
+ /* Sending brick op req to other nodes in the cluster */
+ gd_syncargs_init (&args, NULL);
+ synctask_barrier_init((&args));
+ peer_cnt = 0;
+ list_for_each_entry (peerinfo, peers, op_peers_list) {
+ gd_mgmt_v3_brick_op_req (op, req_dict, peerinfo, &args,
+ MY_UUID, peer_uuid);
+ peer_cnt++;
+ }
+ gd_synctask_barrier_wait((&args), peer_cnt);
+
+ if (args.op_ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Brick ops failed on peers");
+
+ if (args.errstr)
+ *op_errstr = gf_strdup (args.errstr);
+ }
+
+ ret = args.op_ret;
+
+ gf_log (this->name, GF_LOG_DEBUG, "Sent brick op req for %s "
+ "to %d peers. Returning %d", gd_op_list[op], peer_cnt, ret);
+out:
+ return ret;
+}
+
+int32_t
+gd_mgmt_v3_commit_cbk_fn (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int32_t ret = -1;
+ struct syncargs *args = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ gd1_mgmt_v3_commit_rsp rsp = {{0},};
+ call_frame_t *frame = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = -1;
+ dict_t *rsp_dict = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+ GF_ASSERT (myframe);
+
+ frame = myframe;
+ args = frame->local;
+ peerinfo = frame->cookie;
+ frame->local = NULL;
+ frame->cookie = NULL;
+
+ if (-1 == req->rpc_status) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ if (!iov) {
+ gf_log (this->name, GF_LOG_ERROR, "iov is NULL");
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp,
+ (xdrproc_t)xdr_gd1_mgmt_v3_commit_rsp);
+ if (ret < 0)
+ goto out;
+
+ if (rsp.dict.dict_len) {
+ /* Unserialize the dictionary */
+ rsp_dict = dict_new ();
+
+ ret = dict_unserialize (rsp.dict.dict_val,
+ rsp.dict.dict_len,
+ &rsp_dict);
+ if (ret < 0) {
+ GF_FREE (rsp.dict.dict_val);
+ goto out;
+ } else {
+ rsp_dict->extra_stdfree = rsp.dict.dict_val;
+ }
+ }
+
+ uuid_copy (args->uuid, rsp.uuid);
+ pthread_mutex_lock (&args->lock_dict);
+ {
+ ret = glusterd_syncop_aggr_rsp_dict (rsp.op, args->dict,
+ rsp_dict);
+ }
+ pthread_mutex_unlock (&args->lock_dict);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s",
+ "Failed to aggregate response from "
+ " node/brick");
+ if (!rsp.op_ret)
+ op_ret = ret;
+ else {
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+ }
+ } else {
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+ }
+
+out:
+ if (rsp_dict)
+ dict_unref (rsp_dict);
+
+ gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL,
+ GLUSTERD_MGMT_V3_COMMIT,
+ peerinfo, rsp.uuid);
+
+ STACK_DESTROY (frame->root);
+ synctask_barrier_wake(args);
+ return 0;
+}
+
+int32_t
+gd_mgmt_v3_commit_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ gd_mgmt_v3_commit_cbk_fn);
+}
+
+int
+gd_mgmt_v3_commit_req (glusterd_op_t op, dict_t *op_ctx,
+ glusterd_peerinfo_t *peerinfo,
+ struct syncargs *args, uuid_t my_uuid,
+ uuid_t recv_uuid)
+{
+ int32_t ret = -1;
+ gd1_mgmt_v3_commit_req req = {{0},};
+ glusterd_conf_t *conf = THIS->private;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (op_ctx);
+ GF_ASSERT (peerinfo);
+ GF_ASSERT (args);
+
+ ret = dict_allocate_and_serialize (op_ctx,
+ &req.dict.dict_val,
+ &req.dict.dict_len);
+ if (ret)
+ goto out;
+
+ uuid_copy (req.uuid, my_uuid);
+ req.op = op;
+ synclock_unlock (&conf->big_lock);
+ ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo,
+ &gd_mgmt_v3_prog,
+ GLUSTERD_MGMT_V3_COMMIT,
+ gd_mgmt_v3_commit_cbk,
+ (xdrproc_t) xdr_gd1_mgmt_v3_commit_req);
+ synclock_lock (&conf->big_lock);
+out:
+ GF_FREE (req.dict.dict_val);
+ gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_mgmt_v3_commit (glusterd_conf_t *conf, glusterd_op_t op,
+ dict_t *op_ctx, dict_t *req_dict,
+ char **op_errstr, int npeers)
+{
+ int32_t ret = -1;
+ int32_t peer_cnt = 0;
+ dict_t *rsp_dict = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ struct syncargs args = {0};
+ struct list_head *peers = NULL;
+ uuid_t peer_uuid = {0};
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (conf);
+ GF_ASSERT (op_ctx);
+ GF_ASSERT (req_dict);
+ GF_ASSERT (op_errstr);
+
+ peers = &conf->xaction_peers;
+
+ rsp_dict = dict_new ();
+ if (!rsp_dict) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to create response dictionary");
+ goto out;
+ }
+
+ /* Commit on local node */
+ ret = gd_mgmt_v3_commit_fn (op, req_dict, op_errstr,
+ rsp_dict);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Commit failed for "
+ "operation %s on local node",
+ gd_op_list[op]);
+
+ if (*op_errstr == NULL) {
+ ret = gf_asprintf (op_errstr,
+ "Commit failed "
+ "on localhost. Please "
+ "check log file for details.");
+ if (ret == -1)
+ *op_errstr = NULL;
+
+ ret = -1;
+ }
+ goto out;
+ }
+
+ ret = glusterd_syncop_aggr_rsp_dict (op, op_ctx,
+ rsp_dict);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s",
+ "Failed to aggregate response from "
+ " node/brick");
+ goto out;
+ }
+
+ dict_unref (rsp_dict);
+ rsp_dict = NULL;
+
+ if (!npeers) {
+ ret = 0;
+ goto out;
+ }
+
+ /* Sending commit req to other nodes in the cluster */
+ gd_syncargs_init (&args, op_ctx);
+ synctask_barrier_init((&args));
+ peer_cnt = 0;
+ list_for_each_entry (peerinfo, peers, op_peers_list) {
+ gd_mgmt_v3_commit_req (op, req_dict, peerinfo, &args,
+ MY_UUID, peer_uuid);
+ peer_cnt++;
+ }
+ gd_synctask_barrier_wait((&args), peer_cnt);
+
+ if (args.op_ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Commit failed on peers");
+
+ if (args.errstr)
+ *op_errstr = gf_strdup (args.errstr);
+ }
+
+ ret = args.op_ret;
+
+ gf_log (this->name, GF_LOG_DEBUG, "Sent commit req for %s to %d "
+ "peers. Returning %d", gd_op_list[op], peer_cnt, ret);
+out:
+ return ret;
+}
+
+int32_t
+gd_mgmt_v3_post_validate_cbk_fn (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int32_t ret = -1;
+ struct syncargs *args = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ gd1_mgmt_v3_post_val_rsp rsp = {{0},};
+ call_frame_t *frame = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+ GF_ASSERT (myframe);
+
+ frame = myframe;
+ args = frame->local;
+ peerinfo = frame->cookie;
+ frame->local = NULL;
+ frame->cookie = NULL;
+
+ if (-1 == req->rpc_status) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ if (!iov) {
+ gf_log (this->name, GF_LOG_ERROR, "iov is NULL");
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp,
+ (xdrproc_t)xdr_gd1_mgmt_v3_post_val_rsp);
+ if (ret < 0)
+ goto out;
+
+ uuid_copy (args->uuid, rsp.uuid);
+
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+
+out:
+ gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL,
+ GLUSTERD_MGMT_V3_POST_VALIDATE,
+ peerinfo, rsp.uuid);
+ if (rsp.op_errstr)
+ free (rsp.op_errstr);
+
+ free (rsp.dict.dict_val);
+ STACK_DESTROY (frame->root);
+ synctask_barrier_wake(args);
+ return 0;
+}
+
+int32_t
+gd_mgmt_v3_post_validate_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ gd_mgmt_v3_post_validate_cbk_fn);
+}
+
+int
+gd_mgmt_v3_post_validate_req (glusterd_op_t op, int32_t op_ret, dict_t *op_ctx,
+ glusterd_peerinfo_t *peerinfo,
+ struct syncargs *args, uuid_t my_uuid,
+ uuid_t recv_uuid)
+{
+ int32_t ret = -1;
+ gd1_mgmt_v3_post_val_req req = {{0},};
+ glusterd_conf_t *conf = THIS->private;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (op_ctx);
+ GF_ASSERT (peerinfo);
+ GF_ASSERT (args);
+
+ ret = dict_allocate_and_serialize (op_ctx,
+ &req.dict.dict_val,
+ &req.dict.dict_len);
+ if (ret)
+ goto out;
+
+ uuid_copy (req.uuid, my_uuid);
+ req.op = op;
+ req.op_ret = op_ret;
+ synclock_unlock (&conf->big_lock);
+ ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo,
+ &gd_mgmt_v3_prog,
+ GLUSTERD_MGMT_V3_POST_VALIDATE,
+ gd_mgmt_v3_post_validate_cbk,
+ (xdrproc_t) xdr_gd1_mgmt_v3_post_val_req);
+ synclock_lock (&conf->big_lock);
+out:
+ GF_FREE (req.dict.dict_val);
+ gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_mgmt_v3_post_validate (glusterd_conf_t *conf, glusterd_op_t op,
+ int32_t op_ret, dict_t *dict, dict_t *req_dict,
+ char **op_errstr, int npeers)
+{
+ int32_t ret = -1;
+ int32_t peer_cnt = 0;
+ dict_t *rsp_dict = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ struct syncargs args = {0};
+ struct list_head *peers = NULL;
+ uuid_t peer_uuid = {0};
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (conf);
+ GF_ASSERT (dict);
+ GF_ASSERT (req_dict);
+ GF_ASSERT (op_errstr);
+
+ peers = &conf->xaction_peers;
+ GF_ASSERT (peers);
+
+ rsp_dict = dict_new ();
+ if (!rsp_dict) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to create response dictionary");
+ goto out;
+ }
+
+ /* Copy the contents of dict like missed snaps info to req_dict */
+ dict_copy (dict, req_dict);
+
+ /* Post Validation on local node */
+ ret = gd_mgmt_v3_post_validate_fn (op, op_ret, req_dict, op_errstr,
+ rsp_dict);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Post Validation failed for "
+ "operation %s on local node",
+ gd_op_list[op]);
+
+ if (*op_errstr == NULL) {
+ ret = gf_asprintf (op_errstr,
+ "Post-validation failed "
+ "on localhost. Please check "
+ "log file for details");
+ if (ret == -1)
+ *op_errstr = NULL;
+
+ ret = -1;
+ }
+ goto out;
+ }
+
+ dict_unref (rsp_dict);
+ rsp_dict = NULL;
+
+ if (!npeers) {
+ ret = 0;
+ goto out;
+ }
+
+ /* Sending Post Validation req to other nodes in the cluster */
+ gd_syncargs_init (&args, req_dict);
+ synctask_barrier_init((&args));
+ peer_cnt = 0;
+ list_for_each_entry (peerinfo, peers, op_peers_list) {
+ gd_mgmt_v3_post_validate_req (op, op_ret, req_dict, peerinfo,
+ &args, MY_UUID, peer_uuid);
+ peer_cnt++;
+ }
+ gd_synctask_barrier_wait((&args), peer_cnt);
+
+ if (args.op_ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Post Validation failed on peers");
+
+ if (args.errstr)
+ *op_errstr = gf_strdup (args.errstr);
+ }
+
+ ret = args.op_ret;
+
+ gf_log (this->name, GF_LOG_DEBUG, "Sent post valaidation req for %s "
+ "to %d peers. Returning %d", gd_op_list[op], peer_cnt, ret);
+out:
+ return ret;
+}
+
+int32_t
+gd_mgmt_v3_unlock_cbk_fn (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int32_t ret = -1;
+ struct syncargs *args = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ gd1_mgmt_v3_unlock_rsp rsp = {{0},};
+ call_frame_t *frame = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+ GF_ASSERT (myframe);
+
+ frame = myframe;
+ args = frame->local;
+ peerinfo = frame->cookie;
+ frame->local = NULL;
+ frame->cookie = NULL;
+
+ if (-1 == req->rpc_status) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ if (!iov) {
+ gf_log (this->name, GF_LOG_ERROR, "iov is NULL");
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp,
+ (xdrproc_t)xdr_gd1_mgmt_v3_unlock_rsp);
+ if (ret < 0)
+ goto out;
+
+ uuid_copy (args->uuid, rsp.uuid);
+
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+
+out:
+ gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL,
+ GLUSTERD_MGMT_V3_UNLOCK,
+ peerinfo, rsp.uuid);
+ free (rsp.dict.dict_val);
+ STACK_DESTROY (frame->root);
+ synctask_barrier_wake(args);
+ return 0;
+}
+
+int32_t
+gd_mgmt_v3_unlock_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ gd_mgmt_v3_unlock_cbk_fn);
+}
+
+int
+gd_mgmt_v3_unlock (glusterd_op_t op, dict_t *op_ctx,
+ glusterd_peerinfo_t *peerinfo,
+ struct syncargs *args, uuid_t my_uuid,
+ uuid_t recv_uuid)
+{
+ int32_t ret = -1;
+ gd1_mgmt_v3_unlock_req req = {{0},};
+ glusterd_conf_t *conf = THIS->private;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (op_ctx);
+ GF_ASSERT (peerinfo);
+ GF_ASSERT (args);
+
+ ret = dict_allocate_and_serialize (op_ctx,
+ &req.dict.dict_val,
+ &req.dict.dict_len);
+ if (ret)
+ goto out;
+
+ uuid_copy (req.uuid, my_uuid);
+ req.op = op;
+ synclock_unlock (&conf->big_lock);
+ ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo,
+ &gd_mgmt_v3_prog,
+ GLUSTERD_MGMT_V3_UNLOCK,
+ gd_mgmt_v3_unlock_cbk,
+ (xdrproc_t) xdr_gd1_mgmt_v3_unlock_req);
+ synclock_lock (&conf->big_lock);
+out:
+ GF_FREE (req.dict.dict_val);
+ gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_mgmt_v3_release_peer_locks (glusterd_conf_t *conf, glusterd_op_t op,
+ dict_t *dict, int32_t op_ret,
+ char **op_errstr, int npeers,
+ gf_boolean_t is_acquired)
+{
+ int32_t ret = -1;
+ int32_t peer_cnt = 0;
+ uuid_t peer_uuid = {0};
+ xlator_t *this = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ struct syncargs args = {0};
+ struct list_head *peers = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (conf);
+ GF_ASSERT (dict);
+ GF_ASSERT (op_errstr);
+
+ peers = &conf->xaction_peers;
+
+ /* If the lock has not been held during this
+ * transaction, do not send unlock requests */
+ if (!is_acquired)
+ goto out;
+
+ if (!npeers) {
+ ret = 0;
+ goto out;
+ }
+
+ /* Sending mgmt_v3 unlock req to other nodes in the cluster */
+ gd_syncargs_init (&args, NULL);
+ synctask_barrier_init((&args));
+ peer_cnt = 0;
+ list_for_each_entry (peerinfo, peers, op_peers_list) {
+ gd_mgmt_v3_unlock (op, dict, peerinfo, &args,
+ MY_UUID, peer_uuid);
+ peer_cnt++;
+ }
+ gd_synctask_barrier_wait((&args), peer_cnt);
+
+ if (args.op_ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unlock failed on peers");
+
+ if (!op_ret && args.errstr)
+ *op_errstr = gf_strdup (args.errstr);
+ }
+
+ ret = args.op_ret;
+
+ gf_log (this->name, GF_LOG_DEBUG, "Sent unlock op req for %s "
+ "to %d peers. Returning %d", gd_op_list[op], peer_cnt, ret);
+
+out:
+ return ret;
+}
+
+int32_t
+glusterd_mgmt_v3_initiate_all_phases (rpcsvc_request_t *req, glusterd_op_t op,
+ dict_t *dict)
+{
+ int32_t ret = -1;
+ int32_t op_ret = -1;
+ int32_t npeers = 0;
+ dict_t *req_dict = NULL;
+ dict_t *tmp_dict = NULL;
+ glusterd_conf_t *conf = NULL;
+ char *op_errstr = NULL;
+ xlator_t *this = NULL;
+ gf_boolean_t is_acquired = _gf_false;
+ uuid_t *originator_uuid = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+ GF_ASSERT (dict);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ /* Save the MY_UUID as the originator_uuid. This originator_uuid
+ * will be used by is_origin_glusterd() to determine if a node
+ * is the originator node for a command. */
+ originator_uuid = GF_CALLOC (1, sizeof(uuid_t),
+ gf_common_mt_uuid_t);
+ if (!originator_uuid) {
+ ret = -1;
+ goto out;
+ }
+
+ uuid_copy (*originator_uuid, MY_UUID);
+ ret = dict_set_bin (dict, "originator_uuid",
+ originator_uuid, sizeof (uuid_t));
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set originator_uuid.");
+ goto out;
+ }
+
+ /* Marking the operation as complete synctasked */
+ ret = dict_set_int32 (dict, "is_synctasked", _gf_true);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set synctasked flag.");
+ goto out;
+ }
+
+ /* Use a copy at local unlock as cli response will be sent before
+ * the unlock and the volname in the dict might be removed */
+ tmp_dict = dict_new();
+ if (!tmp_dict) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to create dict");
+ goto out;
+ }
+ dict_copy (dict, tmp_dict);
+
+ /* BUILD PEERS LIST */
+ INIT_LIST_HEAD (&conf->xaction_peers);
+ npeers = gd_build_peers_list (&conf->peers, &conf->xaction_peers, op);
+
+ /* LOCKDOWN PHASE - Acquire mgmt_v3 locks */
+ ret = glusterd_mgmt_v3_initiate_lockdown (conf, op, dict, &op_errstr,
+ npeers, &is_acquired);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "mgmt_v3 lockdown failed.");
+ goto out;
+ }
+
+ /* BUILD PAYLOAD */
+ ret = glusterd_mgmt_v3_build_payload (&req_dict, &op_errstr, dict, op);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, LOGSTR_BUILD_PAYLOAD,
+ gd_op_list[op]);
+ if (op_errstr == NULL)
+ gf_asprintf (&op_errstr, OPERRSTR_BUILD_PAYLOAD);
+ goto out;
+ }
+
+ /* PRE-COMMIT VALIDATE PHASE */
+ ret = glusterd_mgmt_v3_pre_validate (conf, op, req_dict,
+ &op_errstr, npeers);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Pre Validation Failed");
+ goto out;
+ }
+
+ /* COMMIT OP PHASE */
+ ret = glusterd_mgmt_v3_commit (conf, op, dict, req_dict,
+ &op_errstr, npeers);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Commit Op Failed");
+ goto out;
+ }
+
+ /* POST-COMMIT VALIDATE PHASE */
+ /* As of now, post_validate is not handling any other
+ commands other than snapshot. So as of now, I am
+ sending 0 (op_ret as 0).
+ */
+ ret = glusterd_mgmt_v3_post_validate (conf, op, 0, dict, req_dict,
+ &op_errstr, npeers);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Post Validation Failed");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ op_ret = ret;
+ /* UNLOCK PHASE FOR PEERS*/
+ (void) glusterd_mgmt_v3_release_peer_locks (conf, op, dict,
+ op_ret, &op_errstr,
+ npeers, is_acquired);
+
+ /* LOCAL VOLUME(S) UNLOCK */
+ if (is_acquired) {
+ /* Trying to release multiple mgmt_v3 locks */
+ ret = glusterd_multiple_mgmt_v3_unlock (tmp_dict, MY_UUID);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to release mgmt_v3 locks on localhost");
+ op_ret = ret;
+ }
+ }
+
+ /* SEND CLI RESPONSE */
+ glusterd_op_send_cli_response (op, op_ret, 0, req, dict, op_errstr);
+
+ if (req_dict)
+ dict_unref (req_dict);
+
+ if (tmp_dict)
+ dict_unref (tmp_dict);
+
+ if (op_errstr) {
+ GF_FREE (op_errstr);
+ op_errstr = NULL;
+ }
+
+ return 0;
+}
+
+int32_t
+glusterd_mgmt_v3_initiate_snap_phases (rpcsvc_request_t *req, glusterd_op_t op,
+ dict_t *dict)
+{
+ int32_t ret = -1;
+ int32_t op_ret = -1;
+ int32_t npeers = 0;
+ dict_t *req_dict = NULL;
+ dict_t *tmp_dict = NULL;
+ glusterd_conf_t *conf = NULL;
+ char *op_errstr = NULL;
+ xlator_t *this = NULL;
+ gf_boolean_t is_acquired = _gf_false;
+ uuid_t *originator_uuid = NULL;
+ gf_boolean_t success = _gf_false;
+ char *tmp_errstr = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+ GF_ASSERT (dict);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ /* Save the MY_UUID as the originator_uuid. This originator_uuid
+ * will be used by is_origin_glusterd() to determine if a node
+ * is the originator node for a command. */
+ originator_uuid = GF_CALLOC (1, sizeof(uuid_t),
+ gf_common_mt_uuid_t);
+ if (!originator_uuid) {
+ ret = -1;
+ goto out;
+ }
+
+ uuid_copy (*originator_uuid, MY_UUID);
+ ret = dict_set_bin (dict, "originator_uuid",
+ originator_uuid, sizeof (uuid_t));
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set originator_uuid.");
+ goto out;
+ }
+
+ /* Marking the operation as complete synctasked */
+ ret = dict_set_int32 (dict, "is_synctasked", _gf_true);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set synctasked flag.");
+ goto out;
+ }
+
+ /* Use a copy at local unlock as cli response will be sent before
+ * the unlock and the volname in the dict might be removed */
+ tmp_dict = dict_new();
+ if (!tmp_dict) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to create dict");
+ goto out;
+ }
+ dict_copy (dict, tmp_dict);
+
+ /* BUILD PEERS LIST */
+ INIT_LIST_HEAD (&conf->xaction_peers);
+ npeers = gd_build_peers_list (&conf->peers, &conf->xaction_peers, op);
+
+ /* LOCKDOWN PHASE - Acquire mgmt_v3 locks */
+ ret = glusterd_mgmt_v3_initiate_lockdown (conf, op, dict, &op_errstr,
+ npeers, &is_acquired);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "mgmt_v3 lockdown failed.");
+ goto out;
+ }
+
+ /* BUILD PAYLOAD */
+ ret = glusterd_mgmt_v3_build_payload (&req_dict, &op_errstr, dict, op);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, LOGSTR_BUILD_PAYLOAD,
+ gd_op_list[op]);
+ if (op_errstr == NULL)
+ gf_asprintf (&op_errstr, OPERRSTR_BUILD_PAYLOAD);
+ goto out;
+ }
+
+ /* PRE-COMMIT VALIDATE PHASE */
+ ret = glusterd_mgmt_v3_pre_validate (conf, op, req_dict,
+ &op_errstr, npeers);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Pre Validation Failed");
+ goto out;
+ }
+
+ /* BRICK OP PHASE for initiating barrier*/
+ ret = dict_set_int32 (req_dict, "barrier", 1);
+ if (ret)
+ goto out;
+ ret = glusterd_mgmt_v3_brick_op (conf, op, req_dict,
+ &op_errstr, npeers);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Brick Ops Failed");
+ goto unbarrier;
+ }
+
+ /* COMMIT OP PHASE */
+ /* TODO: As of now, the plan is to do quorum check before sending the
+ commit fop and if the quorum succeeds, then commit is sent to all
+ the other glusterds.
+ snap create functionality now creates the in memory and on disk
+ objects for the snapshot (marking them as incomplete), takes the lvm
+ snapshot and then updates the status of the in memory and on disk
+ snap objects as complete. Suppose one of the glusterds goes down
+ after taking the lvm snapshot, but before updating the snap object,
+ then treat it as a snapshot create failure and trigger cleanup.
+ i.e the number of commit responses received by the originator
+ glusterd shold be the same as the number of peers it has sent the
+ request to (i.e npeers variable). If not, then originator glusterd
+ will initiate cleanup in post-validate fop.
+ Question: What if one of the other glusterds goes down as explained
+ above and along with it the originator glusterd also goes down?
+ Who will initiate the cleanup?
+ */
+ ret = glusterd_mgmt_v3_commit (conf, op, dict, req_dict,
+ &op_errstr, npeers);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Commit Op Failed");
+ /* If the main op fails, we should save the error string.
+ Because, op_errstr will be used for unbarrier and
+ unlock ops also. We might lose the actual error that
+ caused the failure.
+ */
+ tmp_errstr = op_errstr;
+ op_errstr = NULL;
+ goto unbarrier;
+ }
+
+ success = _gf_true;
+unbarrier:
+ /* BRICK OP PHASE for removing the barrier*/
+ ret = dict_set_int32 (req_dict, "barrier", 0);
+ if (ret)
+ goto out;
+ ret = glusterd_mgmt_v3_brick_op (conf, op, req_dict,
+ &op_errstr, npeers);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Brick Ops Failed");
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ op_ret = ret;
+
+ if (success == _gf_false)
+ op_ret = -1;
+
+ /* POST-COMMIT VALIDATE PHASE */
+ ret = glusterd_mgmt_v3_post_validate (conf, op, op_ret, dict, req_dict,
+ &op_errstr, npeers);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Post Validation Failed");
+ op_ret = -1;
+ }
+
+ /* UNLOCK PHASE FOR PEERS*/
+ (void) glusterd_mgmt_v3_release_peer_locks (conf, op, dict,
+ op_ret, &op_errstr,
+ npeers, is_acquired);
+
+ /* If the commit op (snapshot taking) failed, then the error is stored
+ in tmp_errstr and unbarrier is called. Suppose, if unbarrier also
+ fails, then the error happened in unbarrier is logged and freed.
+ The error happened in commit op, which is stored in tmp_errstr
+ is sent to cli.
+ */
+ if (tmp_errstr) {
+ if (op_errstr) {
+ gf_log (this->name, GF_LOG_ERROR, "unbarrier brick op"
+ "failed with the error %s", op_errstr);
+ GF_FREE (op_errstr);
+ op_errstr = NULL;
+ }
+ op_errstr = tmp_errstr;
+ }
+
+ /* LOCAL VOLUME(S) UNLOCK */
+ if (is_acquired) {
+ /* Trying to release multiple mgmt_v3 locks */
+ ret = glusterd_multiple_mgmt_v3_unlock (tmp_dict, MY_UUID);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to release mgmt_v3 locks on localhost");
+ op_ret = ret;
+ }
+ }
+
+ /* SEND CLI RESPONSE */
+ glusterd_op_send_cli_response (op, op_ret, 0, req, dict, op_errstr);
+
+ if (req_dict)
+ dict_unref (req_dict);
+
+ if (tmp_dict)
+ dict_unref (tmp_dict);
+
+ if (op_errstr) {
+ GF_FREE (op_errstr);
+ op_errstr = NULL;
+ }
+
+ return 0;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.h b/xlators/mgmt/glusterd/src/glusterd-mgmt.h
new file mode 100644
index 000000000..b185a9bec
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.h
@@ -0,0 +1,45 @@
+/*
+ Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _GLUSTERD_MGMT_H_
+#define _GLUSTERD_MGMT_H_
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+int32_t
+gd_mgmt_v3_pre_validate_fn (glusterd_op_t op, dict_t *dict,
+ char **op_errstr, dict_t *rsp_dict);
+
+int32_t
+gd_mgmt_v3_brick_op_fn (glusterd_op_t op, dict_t *dict,
+ char **op_errstr, dict_t *rsp_dict);
+
+int32_t
+gd_mgmt_v3_commit_fn (glusterd_op_t op, dict_t *dict,
+ char **op_errstr, dict_t *rsp_dict);
+
+int32_t
+gd_mgmt_v3_post_validate_fn (glusterd_op_t op, int32_t op_ret, dict_t *dict,
+ char **op_errstr, dict_t *rsp_dict);
+
+int32_t
+glusterd_mgmt_v3_initiate_all_phases (rpcsvc_request_t *req, glusterd_op_t op,
+ dict_t *dict);
+
+int32_t
+glusterd_mgmt_v3_initiate_snap_phases (rpcsvc_request_t *req, glusterd_op_t op,
+ dict_t *dict);
+
+int
+glusterd_snap_pre_validate_use_rsp_dict (dict_t *dst, dict_t *src);
+
+#endif /* _GLUSTERD_MGMT_H_ */
diff --git a/xlators/mgmt/glusterd/src/glusterd-mountbroker.c b/xlators/mgmt/glusterd/src/glusterd-mountbroker.c
new file mode 100644
index 000000000..0d67d1303
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-mountbroker.c
@@ -0,0 +1,693 @@
+/*
+ Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+#include <inttypes.h>
+#include <fnmatch.h>
+#include <pwd.h>
+
+#include "globals.h"
+#include "glusterfs.h"
+#include "compat.h"
+#include "dict.h"
+#include "list.h"
+#include "logging.h"
+#include "defaults.h"
+#include "compat.h"
+#include "compat-errno.h"
+#include "run.h"
+#include "glusterd-mem-types.h"
+#include "glusterd.h"
+#include "glusterd-utils.h"
+#include "common-utils.h"
+#include "glusterd-mountbroker.h"
+#include "glusterd-op-sm.h"
+
+static int
+seq_dict_foreach (dict_t *dict,
+ int (*fn)(char *str, void *data),
+ void *data)
+{
+ char index[] = "4294967296"; // 1<<32
+ int i = 0;
+ char *val = NULL;
+ int ret = 0;
+
+ for (;;i++) {
+ snprintf(index, sizeof(index), "%d", i);
+ ret = dict_get_str (dict, index, &val);
+ if (ret != 0)
+ return ret == -ENOENT ? 0 : ret;
+ ret = fn (val, data);
+ if (ret != 0)
+ return ret;
+ }
+}
+
+int
+parse_mount_pattern_desc (gf_mount_spec_t *mspec, char *pdesc)
+#define SYNTAX_ERR -2
+{
+ char *curs = NULL;
+ char *c2 = NULL;
+ char sc = '\0';
+ char **cc = NULL;
+ gf_mount_pattern_t *pat = NULL;
+ int pnum = 0;
+ int ret = 0;
+ int lastsup = -1;
+ int incl = -1;
+ char **pcc = NULL;
+ int pnc = 0;
+
+ skipwhite (&pdesc);
+
+ /* a bow to theory */
+ if (!*pdesc)
+ return 0;
+
+ /* count number of components, separated by '&' */
+ mspec->len = 0;
+ for (curs = pdesc; *curs; curs++) {
+ if (*curs == ')')
+ mspec->len++;
+ }
+
+ mspec->patterns = GF_CALLOC (mspec->len, sizeof (*mspec->patterns),
+ gf_gld_mt_mount_pattern);
+ if (!mspec->patterns) {
+ ret = -1;
+ goto out;
+ }
+
+ pat = mspec->patterns;
+ curs = pdesc;
+ skipwhite (&curs);
+ for (;;) {
+ incl = -1;
+
+ /* check for pattern signedness modifier */
+ if (*curs == '-') {
+ pat->negative = _gf_true;
+ curs++;
+ }
+
+ /* now should come condition specifier,
+ * then opening paren
+ */
+ c2 = nwstrtail (curs, "SUB(");
+ if (c2) {
+ pat->condition = SET_SUB;
+ goto got_cond;
+ }
+ c2 = nwstrtail (curs, "SUP(");
+ if (c2) {
+ pat->condition = SET_SUPER;
+ lastsup = pat - mspec->patterns;
+ goto got_cond;
+ }
+ c2 = nwstrtail (curs, "EQL(");
+ if (c2) {
+ pat->condition = SET_EQUAL;
+ goto got_cond;
+ }
+ c2 = nwstrtail (curs, "MEET(");
+ if (c2) {
+ pat->condition = SET_INTERSECT;
+ goto got_cond;
+ }
+ c2 = nwstrtail (curs, "SUB+(");
+ if (c2) {
+ pat->condition = SET_SUB;
+ incl = lastsup;
+ goto got_cond;
+ }
+
+ ret = SYNTAX_ERR;
+ goto out;
+
+ got_cond:
+ curs = c2;
+ skipwhite (&curs);
+ /* count the number of components for pattern */
+ pnum = *curs == ')' ? 0 : 1;
+ for (c2 = curs ;*c2 != ')';) {
+ if (strchr ("&|", *c2)) {
+ ret = SYNTAX_ERR;
+ goto out;
+ }
+ while (!strchr ("|&)", *c2) && !isspace (*c2))
+ c2++;
+ skipwhite (&c2);
+ switch (*c2) {
+ case ')':
+ break;
+ case '\0':
+ case '&':
+ ret = SYNTAX_ERR;
+ goto out;
+ case '|':
+ *c2 = ' ';
+ skipwhite (&c2);
+ /* fall through */
+ default:
+ pnum++;
+ }
+ }
+ if (incl >= 0) {
+ pnc = 0;
+ for (pcc = mspec->patterns[incl].components; *pcc; pcc++)
+ pnc++;
+ pnum += pnc;
+ }
+ pat->components = GF_CALLOC (pnum + 1, sizeof (*pat->components),
+ gf_gld_mt_mount_comp_container);
+ if (!pat->components) {
+ ret = -1;
+ goto out;
+ }
+
+ cc = pat->components;
+ /* copy over included component set */
+ if (incl >= 0) {
+ memcpy (pat->components,
+ mspec->patterns[incl].components,
+ pnc * sizeof (*pat->components));
+ cc += pnc;
+ }
+ /* parse and add components */
+ c2 = ""; /* reset c2 */
+ while (*c2 != ')') {
+ c2 = curs;
+ while (!isspace (*c2) && *c2 != ')')
+ c2++;
+ sc = *c2;
+ *c2 = '\0';;
+ *cc = gf_strdup (curs);
+ if (!*cc) {
+ ret = -1;
+ goto out;
+ }
+ *c2 = sc;
+ skipwhite (&c2);
+ curs = c2;
+ cc++;
+ }
+
+ curs++;
+ skipwhite (&curs);
+ if (*curs == '&') {
+ curs++;
+ skipwhite (&curs);
+ }
+
+ if (!*curs)
+ break;
+ pat++;
+ }
+
+ out:
+ if (ret == SYNTAX_ERR) {
+ gf_log ("", GF_LOG_ERROR, "cannot parse mount patterns %s",
+ pdesc);
+ }
+
+ /* We've allocted a lotta stuff here but don't bother with freeing
+ * on error, in that case we'll terminate anyway
+ */
+ return ret ? -1 : 0;
+}
+#undef SYNTAX_ERR
+
+
+const char *georep_mnt_desc_template =
+ "SUP("
+ "xlator-option=\\*-dht.assert-no-child-down=true "
+ "volfile-server=localhost "
+ "client-pid=%d "
+ "user-map-root=%s "
+ ")"
+ "SUB+("
+ "log-file="DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"*/* "
+ "log-level=* "
+ "volfile-id=* "
+ ")"
+ "MEET("
+ "%s"
+ ")";
+
+const char *hadoop_mnt_desc_template =
+ "SUP("
+ "volfile-server=%s "
+ "client-pid=%d "
+ "volfile-id=%s "
+ "user-map-root=%s "
+ ")"
+ "SUB+("
+ "log-file="DEFAULT_LOG_FILE_DIRECTORY"/"GHADOOP"*/* "
+ "log-level=* "
+ ")";
+
+int
+make_georep_mountspec (gf_mount_spec_t *mspec, const char *volnames,
+ char *user)
+{
+ char *georep_mnt_desc = NULL;
+ char *meetspec = NULL;
+ char *vols = NULL;
+ char *vol = NULL;
+ char *p = NULL;
+ char *savetok = NULL;
+ char *fa[3] = {0,};
+ size_t siz = 0;
+ int vc = 0;
+ int i = 0;
+ int ret = 0;
+
+ vols = gf_strdup ((char *)volnames);
+ if (!vols)
+ goto out;
+
+ for (vc = 1, p = vols; *p; p++) {
+ if (*p == ',')
+ vc++;
+ }
+ siz = strlen (volnames) + vc * strlen("volfile-id=");
+ meetspec = GF_CALLOC (1, siz + 1, gf_gld_mt_georep_meet_spec);
+ if (!meetspec)
+ goto out;
+
+ for (p = vols;;) {
+ vol = strtok_r (p, ",", &savetok);
+ if (!vol) {
+ GF_ASSERT (vc == 0);
+ break;
+ }
+ p = NULL;
+ strcat (meetspec, "volfile-id=");
+ strcat (meetspec, vol);
+ if (--vc > 0)
+ strcat (meetspec, " ");
+ }
+
+ ret = gf_asprintf (&georep_mnt_desc, georep_mnt_desc_template,
+ GF_CLIENT_PID_GSYNCD, user, meetspec);
+ if (ret == -1) {
+ georep_mnt_desc = NULL;
+ goto out;
+ }
+
+ ret = parse_mount_pattern_desc (mspec, georep_mnt_desc);
+
+ out:
+ fa[0] = meetspec;
+ fa[1] = vols;
+ fa[2] = georep_mnt_desc;
+
+ for (i = 0; i < 3; i++) {
+ if (fa[i] == NULL)
+ ret = -1;
+ else
+ GF_FREE (fa[i]);
+ }
+
+ return ret;
+}
+
+int
+make_ghadoop_mountspec (gf_mount_spec_t *mspec, const char *volname,
+ char *user, char *server)
+{
+ char *hadoop_mnt_desc = NULL;
+ int ret = 0;
+
+ ret = gf_asprintf (&hadoop_mnt_desc, hadoop_mnt_desc_template,
+ server, GF_CLIENT_PID_HADOOP, volname, user);
+ if (ret == -1)
+ return ret;
+
+ return parse_mount_pattern_desc (mspec, hadoop_mnt_desc);
+}
+
+static gf_boolean_t
+match_comp (char *str, char *patcomp)
+{
+ char *c1 = patcomp;
+ char *c2 = str;
+
+ GF_ASSERT (c1);
+ GF_ASSERT (c2);
+
+ while (*c1 == *c2) {
+ if (!*c1)
+ return _gf_true;
+ c1++;
+ c2++;
+ if (c1[-1] == '=')
+ break;
+ }
+
+ return fnmatch (c1, c2, 0) == 0 ? _gf_true : _gf_false;
+}
+
+struct gf_set_descriptor {
+ gf_boolean_t priv[2];
+ gf_boolean_t common;
+};
+
+static int
+_gf_set_dict_iter1 (char *val, void *data)
+{
+ void **dataa = data;
+ struct gf_set_descriptor *sd = dataa[0];
+ char **curs = dataa[1];
+ gf_boolean_t priv = _gf_true;
+
+ while (*curs) {
+ if (match_comp (val, *curs)) {
+ priv = _gf_false;
+ sd->common = _gf_true;
+ }
+ curs++;
+ }
+
+ if (priv)
+ sd->priv[0] = _gf_true;
+
+ return 0;
+}
+
+static int
+_gf_set_dict_iter2 (char *val, void *data)
+{
+ void **dataa = data;
+ gf_boolean_t *boo = dataa[0];
+ char *comp = dataa[1];
+
+ if (match_comp (val, comp))
+ *boo = _gf_true;
+
+ return 0;
+}
+
+static void
+relate_sets (struct gf_set_descriptor *sd, dict_t *argdict, char **complist)
+{
+ void *dataa[] = {NULL, NULL};
+ gf_boolean_t boo = _gf_false;
+
+ memset (sd, 0, sizeof (*sd));
+
+ dataa[0] = sd;
+ dataa[1] = complist;
+ seq_dict_foreach (argdict, _gf_set_dict_iter1, dataa);
+
+ while (*complist) {
+ boo = _gf_false;
+ dataa[0] = &boo;
+ dataa[1] = *complist;
+ seq_dict_foreach (argdict, _gf_set_dict_iter2, dataa);
+
+ if (boo)
+ sd->common = _gf_true;
+ else
+ sd->priv[1] = _gf_true;
+
+ complist++;
+ }
+}
+
+static int
+_arg_parse_uid (char *val, void *data)
+{
+ char *user = strtail (val, "user-map-root=");
+ struct passwd *pw = NULL;
+
+ if (!user)
+ return 0;
+ pw = getpwnam (user);
+ if (!pw)
+ return -EINVAL;
+
+ if (*(int *)data >= 0)
+ /* uid ambiguity, already found */
+ return -EINVAL;
+
+ *(int *)data = pw->pw_uid;
+ return 0;
+}
+
+static int
+evaluate_mount_request (gf_mount_spec_t *mspec, dict_t *argdict)
+{
+ struct gf_set_descriptor sd = {{0,},};
+ int i = 0;
+ int uid = -1;
+ int ret = 0;
+ gf_boolean_t match = _gf_false;
+
+ for (i = 0; i < mspec->len; i++) {
+ relate_sets (&sd, argdict, mspec->patterns[i].components);
+ switch (mspec->patterns[i].condition) {
+ case SET_SUB:
+ match = !sd.priv[0];
+ break;
+ case SET_SUPER:
+ match = !sd.priv[1];
+ break;
+ case SET_EQUAL:
+ match = (!sd.priv[0] && !sd.priv[1]);
+ break;
+ case SET_INTERSECT:
+ match = sd.common;
+ break;
+ default:
+ GF_ASSERT(!"unreached");
+ }
+ if (mspec->patterns[i].negative)
+ match = !match;
+
+ if (!match)
+ return -EPERM;
+ }
+
+ ret = seq_dict_foreach (argdict, _arg_parse_uid, &uid);
+ if (ret != 0)
+ return ret;
+
+ return uid;
+}
+
+static int
+_volname_get (char *val, void *data)
+{
+ char **volname = data;
+
+ *volname = strtail (val, "volfile-id=");
+
+ return *volname ? 1 : 0;
+}
+
+static int
+_runner_add (char *val, void *data)
+{
+ runner_t *runner = data;
+
+ runner_argprintf (runner, "--%s", val);
+
+ return 0;
+}
+
+int
+glusterd_do_mount (char *label, dict_t *argdict, char **path, int *op_errno)
+{
+ glusterd_conf_t *priv = NULL;
+ char *mountbroker_root = NULL;
+ gf_mount_spec_t *mspec = NULL;
+ int uid = -ENOENT;
+ char *volname = NULL;
+ glusterd_volinfo_t *vol = NULL;
+ char *mtptemp = NULL;
+ char *mntlink = NULL;
+ char *cookieswitch = NULL;
+ char *cookie = NULL;
+ char *sla = NULL;
+ struct stat st = {0,};
+ runner_t runner = {0,};
+ int ret = 0;
+ xlator_t *this = THIS;
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ GF_ASSERT (op_errno);
+ *op_errno = 0;
+
+ if (dict_get_str (this->options, "mountbroker-root",
+ &mountbroker_root) != 0) {
+ *op_errno = ENOENT;
+ goto out;
+ }
+
+ GF_ASSERT (label);
+ if (!*label) {
+ *op_errno = EINVAL;
+ goto out;
+ }
+
+ /* look up spec for label */
+ list_for_each_entry (mspec, &priv->mount_specs,
+ speclist) {
+ if (strcmp (mspec->label, label) != 0)
+ continue;
+ uid = evaluate_mount_request (mspec, argdict);
+ break;
+ }
+ if (uid < 0) {
+ *op_errno = -uid;
+ goto out;
+ }
+
+ /* some sanity check on arguments */
+ seq_dict_foreach (argdict, _volname_get, &volname);
+ if (!volname) {
+ *op_errno = EINVAL;
+ goto out;
+ }
+ if (glusterd_volinfo_find (volname, &vol) != 0 ||
+ !glusterd_is_volume_started (vol)) {
+ *op_errno = ENOENT;
+ goto out;
+ }
+
+ /* go do mount */
+
+ /** create actual mount dir */
+
+ /*** "overload" string name to be possible to used for cookie
+ creation, see below */
+ ret = gf_asprintf (&mtptemp, "%s/user%d/mtpt-%s-XXXXXX/cookie",
+ mountbroker_root, uid, label);
+ if (ret == -1) {
+ mtptemp = NULL;
+ *op_errno = ENOMEM;
+ goto out;
+ }
+ /*** hide cookie part */
+ cookieswitch = strrchr (mtptemp, '/');
+ *cookieswitch = '\0';
+
+ sla = strrchr (mtptemp, '/');
+ *sla = '\0';
+ ret = mkdir (mtptemp, 0700);
+ if (ret == 0)
+ ret = chown (mtptemp, uid, 0);
+ else if (errno == EEXIST)
+ ret = 0;
+ if (ret == -1) {
+ *op_errno = errno;
+ goto out;
+ }
+ ret = lstat (mtptemp, &st);
+ if (ret == -1) {
+ *op_errno = errno;
+ goto out;
+ }
+ if (!(S_ISDIR (st.st_mode) && (st.st_mode & ~S_IFMT) == 0700 &&
+ st.st_uid == uid && st.st_gid == 0)) {
+ *op_errno = EACCES;
+ goto out;
+ }
+ *sla = '/';
+
+ if (!mkdtemp (mtptemp)) {
+ *op_errno = errno;
+ goto out;
+ }
+
+ /** create private "cookie" symlink */
+
+ /*** occupy an entry in the hive dir via mkstemp */
+ ret = gf_asprintf (&cookie, "%s/"MB_HIVE"/mntXXXXXX",
+ mountbroker_root);
+ if (ret == -1) {
+ cookie = NULL;
+ *op_errno = ENOMEM;
+ goto out;
+ }
+ ret = mkstemp (cookie);
+ if (ret == -1) {
+ *op_errno = errno;
+ goto out;
+ }
+ close (ret);
+
+ /*** assembly the path from cookie to mountpoint */
+ sla = strchr (sla - 1, '/');
+ GF_ASSERT (sla);
+ ret = gf_asprintf (&mntlink, "../user%d%s", uid, sla);
+ if (ret == -1) {
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ /*** create cookie link in (to-be) mountpoint,
+ move it over to the final place */
+ *cookieswitch = '/';
+ ret = symlink (mntlink, mtptemp);
+ if (ret != -1)
+ ret = rename (mtptemp, cookie);
+ *cookieswitch = '\0';
+ if (ret == -1) {
+ *op_errno = errno;
+ goto out;
+ }
+
+ /** invoke glusterfs on the mountpoint */
+
+ runinit (&runner);
+ runner_add_arg (&runner, SBIN_DIR"/glusterfs");
+ seq_dict_foreach (argdict, _runner_add, &runner);
+ runner_add_arg (&runner, mtptemp);
+ ret = runner_run_reuse (&runner);
+ if (ret == -1) {
+ *op_errno = EIO; /* XXX hacky fake */
+ runner_log (&runner, "", GF_LOG_ERROR, "command failed");
+ }
+ runner_end (&runner);
+
+ out:
+
+ if (*op_errno) {
+ ret = -1;
+ gf_log ("", GF_LOG_WARNING, "unsuccessful mount request (%s)",
+ strerror (*op_errno));
+ if (mtptemp) {
+ *cookieswitch = '/';
+ unlink (mtptemp);
+ *cookieswitch = '\0';
+ rmdir (mtptemp);
+ }
+ if (cookie) {
+ unlink (cookie);
+ GF_FREE (cookie);
+ }
+
+ } else {
+ ret = 0;
+ *path = cookie;
+ }
+
+ GF_FREE (mtptemp);
+
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-mountbroker.h b/xlators/mgmt/glusterd/src/glusterd-mountbroker.h
new file mode 100644
index 000000000..426252ebe
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-mountbroker.h
@@ -0,0 +1,42 @@
+/*
+ Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#define MB_HIVE "mb_hive"
+
+typedef enum {
+ SET_SUB = 1,
+ SET_SUPER,
+ SET_EQUAL,
+ SET_INTERSECT
+} gf_setrel_t;
+
+struct gf_mount_pattern {
+ char **components;
+ gf_setrel_t condition;
+ gf_boolean_t negative;
+};
+typedef struct gf_mount_pattern gf_mount_pattern_t;
+
+struct gf_mount_spec {
+ struct list_head speclist;
+ char *label;
+ gf_mount_pattern_t *patterns;
+ size_t len;
+};
+typedef struct gf_mount_spec gf_mount_spec_t;
+
+
+int parse_mount_pattern_desc (gf_mount_spec_t *mspec, char *pdesc);
+
+int make_georep_mountspec (gf_mount_spec_t *mspec, const char *volname,
+ char *user);
+int make_ghadoop_mountspec (gf_mount_spec_t *mspec, const char *volname,
+ char *user, char *server);
+
+int glusterd_do_mount (char *label, dict_t *argdict, char **path, int *op_errno);
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
new file mode 100644
index 000000000..1666f5e4d
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
@@ -0,0 +1,6243 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+#include <time.h>
+#include <sys/uio.h>
+#include <sys/resource.h>
+#include <sys/mount.h>
+
+#include <libgen.h>
+#include "uuid.h"
+
+#include "fnmatch.h"
+#include "xlator.h"
+#include "protocol-common.h"
+#include "glusterd.h"
+#include "call-stub.h"
+#include "defaults.h"
+#include "list.h"
+#include "dict.h"
+#include "compat.h"
+#include "compat-errno.h"
+#include "statedump.h"
+#include "glusterd-sm.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-utils.h"
+#include "glusterd-store.h"
+#include "glusterd-hooks.h"
+#include "glusterd-volgen.h"
+#include "glusterd-locks.h"
+#include "syscall.h"
+#include "cli1-xdr.h"
+#include "common-utils.h"
+#include "run.h"
+
+#include <sys/types.h>
+#include <signal.h>
+#include <sys/wait.h>
+
+#define ALL_VOLUME_OPTION_CHECK(volname, key, ret, op_errstr, label) \
+ do { \
+ gf_boolean_t _all = !strcmp ("all", volname); \
+ gf_boolean_t _ratio = !strcmp (key, \
+ GLUSTERD_QUORUM_RATIO_KEY); \
+ if (_all && !_ratio) { \
+ ret = -1; \
+ *op_errstr = gf_strdup ("Not a valid option for all " \
+ "volumes"); \
+ goto label; \
+ } else if (!_all && _ratio) { \
+ ret = -1; \
+ *op_errstr = gf_strdup ("Not a valid option for " \
+ "single volume"); \
+ goto label; \
+ } \
+ } while (0)
+
+static struct list_head gd_op_sm_queue;
+pthread_mutex_t gd_op_sm_lock;
+glusterd_op_info_t opinfo = {{0},};
+uuid_t global_txn_id = {"\0"}; /* To be used in
+ * heterogeneous
+ * cluster with no
+ * transaction ids */
+
+static dict_t *txn_opinfo;
+
+struct txn_opinfo_object_ {
+ glusterd_op_info_t opinfo;
+};
+typedef struct txn_opinfo_object_ txn_opinfo_obj;
+
+int32_t
+glusterd_txn_opinfo_dict_init ()
+{
+ int32_t ret = -1;
+
+ txn_opinfo = dict_new ();
+ if (!txn_opinfo) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+void
+glusterd_txn_opinfo_dict_fini ()
+{
+ if (txn_opinfo)
+ dict_destroy (txn_opinfo);
+}
+
+void
+glusterd_txn_opinfo_init (glusterd_op_info_t *opinfo,
+ glusterd_op_sm_state_info_t *state,
+ glusterd_op_t *op,
+ dict_t *op_ctx,
+ rpcsvc_request_t *req)
+{
+ GF_ASSERT (opinfo);
+
+ if (state)
+ opinfo->state = *state;
+
+ if (op)
+ opinfo->op = *op;
+
+ opinfo->op_ctx = dict_ref(op_ctx);
+
+ if (req)
+ opinfo->req = req;
+
+ return;
+}
+
+int32_t
+glusterd_get_txn_opinfo (uuid_t *txn_id, glusterd_op_info_t *opinfo)
+{
+ int32_t ret = -1;
+ txn_opinfo_obj *opinfo_obj = NULL;
+
+ if (!txn_id || !opinfo) {
+ gf_log ("", GF_LOG_ERROR,
+ "Empty transaction id or opinfo received.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_bin(txn_opinfo, uuid_utoa (*txn_id),
+ (void **) &opinfo_obj);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to get transaction opinfo");
+ goto out;
+ }
+
+ (*opinfo) = opinfo_obj->opinfo;
+
+ ret = 0;
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_set_txn_opinfo (uuid_t *txn_id, glusterd_op_info_t *opinfo)
+{
+ int32_t ret = -1;
+ txn_opinfo_obj *opinfo_obj = NULL;
+
+ if (!txn_id) {
+ gf_log ("", GF_LOG_ERROR, "Empty transaction id received.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_bin(txn_opinfo, uuid_utoa (*txn_id),
+ (void **) &opinfo_obj);
+ if (ret) {
+ opinfo_obj = GF_CALLOC (1, sizeof(txn_opinfo_obj),
+ gf_common_mt_txn_opinfo_obj_t);
+ if (!opinfo_obj) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_bin(txn_opinfo, uuid_utoa (*txn_id), opinfo_obj,
+ sizeof(txn_opinfo_obj));
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to set opinfo for transaction ID : %s",
+ uuid_utoa (*txn_id));
+ goto out;
+ }
+ }
+
+ opinfo_obj->opinfo = (*opinfo);
+
+ ret = 0;
+out:
+ if (ret)
+ if (opinfo_obj)
+ GF_FREE (opinfo_obj);
+
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_clear_txn_opinfo (uuid_t *txn_id)
+{
+ int32_t ret = -1;
+ glusterd_op_info_t txn_op_info = {{0},};
+
+ if (!txn_id) {
+ gf_log ("", GF_LOG_ERROR, "Empty transaction id received.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_get_txn_opinfo (txn_id, &txn_op_info);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Transaction opinfo not found");
+ goto out;
+ }
+
+ dict_unref (txn_op_info.op_ctx);
+
+ dict_del(txn_opinfo, uuid_utoa (*txn_id));
+
+ ret = 0;
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+static int glusterfs_port = GLUSTERD_DEFAULT_PORT;
+static char *glusterd_op_sm_state_names[] = {
+ "Default",
+ "Lock sent",
+ "Locked",
+ "Stage op sent",
+ "Staged",
+ "Commit op sent",
+ "Committed",
+ "Unlock sent",
+ "Stage op failed",
+ "Commit op failed",
+ "Brick op sent",
+ "Brick op failed",
+ "Brick op Committed",
+ "Brick op Commit failed",
+ "Ack drain",
+ "Invalid",
+};
+
+static char *glusterd_op_sm_event_names[] = {
+ "GD_OP_EVENT_NONE",
+ "GD_OP_EVENT_START_LOCK",
+ "GD_OP_EVENT_LOCK",
+ "GD_OP_EVENT_RCVD_ACC",
+ "GD_OP_EVENT_ALL_ACC",
+ "GD_OP_EVENT_STAGE_ACC",
+ "GD_OP_EVENT_COMMIT_ACC",
+ "GD_OP_EVENT_RCVD_RJT",
+ "GD_OP_EVENT_STAGE_OP",
+ "GD_OP_EVENT_COMMIT_OP",
+ "GD_OP_EVENT_UNLOCK",
+ "GD_OP_EVENT_START_UNLOCK",
+ "GD_OP_EVENT_ALL_ACK",
+ "GD_OP_EVENT_LOCAL_UNLOCK_NO_RESP",
+ "GD_OP_EVENT_INVALID"
+};
+
+extern struct volopt_map_entry glusterd_volopt_map[];
+
+char*
+glusterd_op_sm_state_name_get (int state)
+{
+ if (state < 0 || state >= GD_OP_STATE_MAX)
+ return glusterd_op_sm_state_names[GD_OP_STATE_MAX];
+ return glusterd_op_sm_state_names[state];
+}
+
+char*
+glusterd_op_sm_event_name_get (int event)
+{
+ if (event < 0 || event >= GD_OP_EVENT_MAX)
+ return glusterd_op_sm_event_names[GD_OP_EVENT_MAX];
+ return glusterd_op_sm_event_names[event];
+}
+
+void
+glusterd_destroy_lock_ctx (glusterd_op_lock_ctx_t *ctx)
+{
+ if (!ctx)
+ return;
+ GF_FREE (ctx);
+}
+
+void
+glusterd_set_volume_status (glusterd_volinfo_t *volinfo,
+ glusterd_volume_status status)
+{
+ GF_ASSERT (volinfo);
+ volinfo->status = status;
+}
+
+gf_boolean_t
+glusterd_is_volume_started (glusterd_volinfo_t *volinfo)
+{
+ GF_ASSERT (volinfo);
+ return (volinfo->status == GLUSTERD_STATUS_STARTED);
+}
+
+static int
+glusterd_op_sm_inject_all_acc (uuid_t *txn_id)
+{
+ int32_t ret = -1;
+ ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACC, txn_id, NULL);
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickinfo,
+ gd1_mgmt_brick_op_req **req, dict_t *dict)
+{
+ int ret = -1;
+ gd1_mgmt_brick_op_req *brick_req = NULL;
+ char *volname = NULL;
+ char name[1024] = {0,};
+ gf_xl_afr_op_t heal_op = GF_AFR_OP_INVALID;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (op < GD_OP_MAX);
+ GF_ASSERT (op > GD_OP_NONE);
+ GF_ASSERT (req);
+
+
+ switch (op) {
+ case GD_OP_REMOVE_BRICK:
+ case GD_OP_STOP_VOLUME:
+ brick_req = GF_CALLOC (1, sizeof (*brick_req),
+ gf_gld_mt_mop_brick_req_t);
+ if (!brick_req)
+ goto out;
+ brick_req->op = GLUSTERD_BRICK_TERMINATE;
+ brick_req->name = "";
+ break;
+ case GD_OP_PROFILE_VOLUME:
+ brick_req = GF_CALLOC (1, sizeof (*brick_req),
+ gf_gld_mt_mop_brick_req_t);
+
+ if (!brick_req)
+ goto out;
+
+ brick_req->op = GLUSTERD_BRICK_XLATOR_INFO;
+ brick_req->name = brickinfo->path;
+
+ break;
+ case GD_OP_HEAL_VOLUME:
+ {
+ brick_req = GF_CALLOC (1, sizeof (*brick_req),
+ gf_gld_mt_mop_brick_req_t);
+ if (!brick_req)
+ goto out;
+
+ brick_req->op = GLUSTERD_BRICK_XLATOR_OP;
+ brick_req->name = "";
+ ret = dict_get_int32 (dict, "heal-op", (int32_t*)&heal_op);
+ if (ret)
+ goto out;
+ ret = dict_set_int32 (dict, "xl-op", heal_op);
+ }
+ break;
+ case GD_OP_STATUS_VOLUME:
+ {
+ brick_req = GF_CALLOC (1, sizeof (*brick_req),
+ gf_gld_mt_mop_brick_req_t);
+ if (!brick_req)
+ goto out;
+ brick_req->op = GLUSTERD_BRICK_STATUS;
+ brick_req->name = "";
+ }
+ break;
+ case GD_OP_REBALANCE:
+ case GD_OP_DEFRAG_BRICK_VOLUME:
+ brick_req = GF_CALLOC (1, sizeof (*brick_req),
+ gf_gld_mt_mop_brick_req_t);
+ if (!brick_req)
+ goto out;
+
+ brick_req->op = GLUSTERD_BRICK_XLATOR_DEFRAG;
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret)
+ goto out;
+ snprintf (name, 1024, "%s-dht",volname);
+ brick_req->name = gf_strdup (name);
+
+ break;
+ case GD_OP_SNAP:
+ brick_req = GF_CALLOC (1, sizeof (*brick_req),
+ gf_gld_mt_mop_brick_req_t);
+ if (!brick_req)
+ goto out;
+
+ brick_req->op = GLUSTERD_VOLUME_BARRIER_OP;
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret)
+ goto out;
+ snprintf (name, 1024, "%s-server",volname);
+ brick_req->name = gf_strdup (name);
+
+ break;
+ default:
+ goto out;
+ break;
+ }
+
+ ret = dict_allocate_and_serialize (dict, &brick_req->input.input_val,
+ &brick_req->input.input_len);
+ if (ret)
+ goto out;
+ *req = brick_req;
+ ret = 0;
+
+out:
+ if (ret && brick_req)
+ GF_FREE (brick_req);
+ gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_node_op_build_payload (glusterd_op_t op, gd1_mgmt_brick_op_req **req,
+ dict_t *dict)
+{
+ int ret = -1;
+ gd1_mgmt_brick_op_req *brick_req = NULL;
+
+ GF_ASSERT (op < GD_OP_MAX);
+ GF_ASSERT (op > GD_OP_NONE);
+ GF_ASSERT (req);
+
+ switch (op) {
+ case GD_OP_PROFILE_VOLUME:
+ brick_req = GF_CALLOC (1, sizeof (*brick_req),
+ gf_gld_mt_mop_brick_req_t);
+ if (!brick_req)
+ goto out;
+
+ brick_req->op = GLUSTERD_NODE_PROFILE;
+ brick_req->name = "";
+
+ break;
+
+ case GD_OP_STATUS_VOLUME:
+ brick_req = GF_CALLOC (1, sizeof (*brick_req),
+ gf_gld_mt_mop_brick_req_t);
+ if (!brick_req)
+ goto out;
+
+ brick_req->op = GLUSTERD_NODE_STATUS;
+ brick_req->name = "";
+
+ break;
+
+ default:
+ goto out;
+ }
+
+ ret = dict_allocate_and_serialize (dict, &brick_req->input.input_val,
+ &brick_req->input.input_len);
+
+ if (ret)
+ goto out;
+
+ *req = brick_req;
+ ret = 0;
+
+out:
+ if (ret && brick_req)
+ GF_FREE (brick_req);
+ gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+static int
+glusterd_validate_quorum_options (xlator_t *this, char *fullkey, char *value,
+ char **op_errstr)
+{
+ int ret = 0;
+ char *key = NULL;
+ volume_option_t *opt = NULL;
+
+ if (!glusterd_is_quorum_option (fullkey))
+ goto out;
+ key = strchr (fullkey, '.');
+ key++;
+ opt = xlator_volume_option_get (this, key);
+ ret = xlator_option_validate (this, key, value, opt, op_errstr);
+out:
+ return ret;
+}
+
+static int
+glusterd_check_client_op_version_support (char *volname, uint32_t op_version,
+ char **op_errstr)
+{
+ int ret = 0;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ rpc_transport_t *xprt = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ pthread_mutex_lock (&priv->xprt_lock);
+ list_for_each_entry (xprt, &priv->xprt_list, list) {
+ if ((!strcmp(volname, xprt->peerinfo.volname)) &&
+ ((op_version > xprt->peerinfo.max_op_version) ||
+ (op_version < xprt->peerinfo.min_op_version))) {
+ ret = -1;
+ break;
+ }
+ }
+ pthread_mutex_unlock (&priv->xprt_lock);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "One or more clients "
+ "don't support the required op-version");
+ ret = gf_asprintf (op_errstr, "One or more connected clients "
+ "cannot support the feature being set. "
+ "These clients need to be upgraded or "
+ "disconnected before running this command"
+ " again");
+ return -1;
+ }
+ return 0;
+}
+
+static int
+glusterd_op_stage_set_volume (dict_t *dict, char **op_errstr)
+{
+ int ret = -1;
+ char *volname = NULL;
+ int exists = 0;
+ char *key = NULL;
+ char *key_fixed = NULL;
+ char *value = NULL;
+ char str[100] = {0, };
+ int count = 0;
+ int dict_count = 0;
+ char errstr[2048] = {0, };
+ glusterd_volinfo_t *volinfo = NULL;
+ dict_t *val_dict = NULL;
+ gf_boolean_t global_opt = _gf_false;
+ glusterd_volinfo_t *voliter = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ uint32_t new_op_version = 0;
+ uint32_t local_new_op_version = 0;
+ uint32_t key_op_version = 0;
+ uint32_t local_key_op_version = 0;
+ gf_boolean_t origin_glusterd = _gf_true;
+ gf_boolean_t check_op_version = _gf_true;
+ gf_boolean_t all_vol = _gf_false;
+ struct volopt_map_entry *vme = NULL;
+
+ GF_ASSERT (dict);
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ val_dict = dict_new();
+ if (!val_dict)
+ goto out;
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name");
+ goto out;
+ }
+
+ /* Check if we can support the required op-version
+ * This check is not done on the originator glusterd. The originator
+ * glusterd sets this value.
+ */
+ origin_glusterd = is_origin_glusterd (dict);
+
+ if (!origin_glusterd) {
+ /* Check for v3.3.x origin glusterd */
+ check_op_version = dict_get_str_boolean (dict,
+ "check-op-version",
+ _gf_false);
+
+ if (check_op_version) {
+ ret = dict_get_uint32 (dict, "new-op-version",
+ &new_op_version);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to get new_op_version");
+ goto out;
+ }
+
+ if ((new_op_version > GD_OP_VERSION_MAX) ||
+ (new_op_version < GD_OP_VERSION_MIN)) {
+ ret = -1;
+ snprintf (errstr, sizeof (errstr),
+ "Required op_version (%d) is not "
+ "supported", new_op_version);
+ gf_log (this->name, GF_LOG_ERROR, "%s", errstr);
+ goto out;
+ }
+ }
+ }
+
+ ret = dict_get_int32 (dict, "count", &dict_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Count(dict),not set in Volume-Set");
+ goto out;
+ }
+
+ if (dict_count == 0) {
+ /*No options would be specified of volume set help */
+ if (dict_get (dict, "help" )) {
+ ret = 0;
+ goto out;
+ }
+
+ if (dict_get (dict, "help-xml" )) {
+#if (HAVE_LIB_XML)
+ ret = 0;
+ goto out;
+#else
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "libxml not present in the system");
+ *op_errstr = gf_strdup ("Error: xml libraries not "
+ "present to produce xml-output");
+ goto out;
+#endif
+ }
+ gf_log (this->name, GF_LOG_ERROR, "No options received ");
+ *op_errstr = gf_strdup ("Options not specified");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name");
+ goto out;
+ }
+
+ if (strcasecmp (volname, "all") != 0) {
+ exists = glusterd_check_volume_exists (volname);
+ if (!exists) {
+ snprintf (errstr, sizeof (errstr),
+ FMTSTR_CHECK_VOL_EXISTS, volname);
+ gf_log (this->name, GF_LOG_ERROR, "%s", errstr);
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ FMTSTR_CHECK_VOL_EXISTS, volname);
+ goto out;
+ }
+
+ ret = glusterd_validate_volume_id (dict, volinfo);
+ if (ret)
+ goto out;
+ } else {
+ all_vol = _gf_true;
+ }
+
+ local_new_op_version = priv->op_version;
+
+ for ( count = 1; ret != 1 ; count++ ) {
+ global_opt = _gf_false;
+ sprintf (str, "key%d", count);
+ ret = dict_get_str (dict, str, &key);
+ if (ret)
+ break;
+
+ sprintf (str, "value%d", count);
+ ret = dict_get_str (dict, str, &value);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "invalid key,value pair in 'volume set'");
+ ret = -1;
+ goto out;
+ }
+
+ if (strcmp (key, "config.memory-accounting") == 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "enabling memory accounting for volume %s",
+ volname);
+ ret = 0;
+ }
+
+ if (strcmp (key, "config.transport") == 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "changing transport-type for volume %s",
+ volname);
+ ret = 0;
+ /* if value is none of 'tcp/rdma/tcp,rdma' error out */
+ if (!((strcasecmp (value, "rdma") == 0) ||
+ (strcasecmp (value, "tcp") == 0) ||
+ (strcasecmp (value, "tcp,rdma") == 0) ||
+ (strcasecmp (value, "rdma,tcp") == 0))) {
+ ret = snprintf (errstr, sizeof (errstr),
+ "transport-type %s does "
+ "not exist", value);
+ /* lets not bother about above return value,
+ its a failure anyways */
+ ret = -1;
+ goto out;
+ }
+ }
+
+ if (is_key_glusterd_hooks_friendly (key))
+ continue;
+
+ for (vme = &glusterd_volopt_map[0]; vme->key; vme++) {
+ if ((vme->validate_fn) &&
+ ((!strcmp (key, vme->key)) ||
+ (!strcmp (key, strchr (vme->key, '.') + 1)))) {
+ ret = vme->validate_fn (dict, key, value,
+ op_errstr);
+ if (ret)
+ goto out;
+ break;
+ }
+ }
+
+ exists = glusterd_check_option_exists (key, &key_fixed);
+ if (exists == -1) {
+ ret = -1;
+ goto out;
+ }
+
+ if (!exists) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Option with name: %s does not exist", key);
+ ret = snprintf (errstr, sizeof (errstr),
+ "option : %s does not exist",
+ key);
+ if (key_fixed)
+ snprintf (errstr + ret, sizeof (errstr) - ret,
+ "\nDid you mean %s?", key_fixed);
+ ret = -1;
+ goto out;
+ }
+
+ if (key_fixed)
+ key = key_fixed;
+ ALL_VOLUME_OPTION_CHECK (volname, key, ret, op_errstr, out);
+ ret = glusterd_validate_quorum_options (this, key, value,
+ op_errstr);
+ if (ret)
+ goto out;
+
+ local_key_op_version = glusterd_get_op_version_for_key (key);
+ if (local_key_op_version > local_new_op_version)
+ local_new_op_version = local_key_op_version;
+
+ sprintf (str, "op-version%d", count);
+ if (origin_glusterd) {
+ ret = dict_set_uint32 (dict, str, local_key_op_version);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set key-op-version in dict");
+ goto out;
+ }
+ } else if (check_op_version) {
+ ret = dict_get_uint32 (dict, str, &key_op_version);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to get key-op-version from"
+ " dict");
+ goto out;
+ }
+ if (local_key_op_version != key_op_version) {
+ ret = -1;
+ snprintf (errstr, sizeof (errstr),
+ "option: %s op-version mismatch",
+ key);
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s, required op-version = %"PRIu32", "
+ "available op-version = %"PRIu32,
+ errstr, key_op_version,
+ local_key_op_version);
+ goto out;
+ }
+ }
+
+ if (glusterd_check_globaloption (key))
+ global_opt = _gf_true;
+
+ ret = dict_set_str (val_dict, key, value);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to set the options in 'volume set'");
+ ret = -1;
+ goto out;
+ }
+
+ *op_errstr = NULL;
+ if (!global_opt && !all_vol)
+ ret = glusterd_validate_reconfopts (volinfo, val_dict, op_errstr);
+ else if (!all_vol) {
+ voliter = NULL;
+ list_for_each_entry (voliter, &priv->volumes, vol_list) {
+ ret = glusterd_validate_globalopts (voliter, val_dict, op_errstr);
+ if (ret)
+ break;
+ }
+ }
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not create "
+ "temp volfile, some option failed: %s",
+ *op_errstr);
+ goto out;
+ }
+ dict_del (val_dict, key);
+
+ if (key_fixed) {
+ GF_FREE (key_fixed);
+ key_fixed = NULL;
+ }
+ }
+
+ // Check if all the connected clients support the new op-version
+ ret = glusterd_check_client_op_version_support (volname,
+ local_new_op_version,
+ op_errstr);
+ if (ret)
+ goto out;
+
+ if (origin_glusterd) {
+ ret = dict_set_uint32 (dict, "new-op-version",
+ local_new_op_version);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set new-op-version in dict");
+ goto out;
+ }
+ /* Set this value in dict so other peers know to check for
+ * op-version. This is a hack for 3.3.x compatibility
+ *
+ * TODO: Remove this and the other places this is referred once
+ * 3.3.x compatibility is not required
+ */
+ ret = dict_set_uint32 (dict, "check-op-version",
+ _gf_true);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set check-op-version in dict");
+ goto out;
+ }
+ }
+
+ ret = 0;
+
+out:
+ if (val_dict)
+ dict_unref (val_dict);
+
+ GF_FREE (key_fixed);
+ if (errstr[0] != '\0')
+ *op_errstr = gf_strdup (errstr);
+
+ if (ret) {
+ if (!(*op_errstr)) {
+ *op_errstr = gf_strdup ("Error, Validation Failed");
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Error, Cannot Validate option :%s",
+ *op_errstr);
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Error, Cannot Validate option");
+ }
+ }
+ return ret;
+}
+
+static int
+glusterd_op_stage_reset_volume (dict_t *dict, char **op_errstr)
+{
+ int ret = 0;
+ char *volname = NULL;
+ gf_boolean_t exists = _gf_false;
+ char msg[2048] = {0};
+ char *key = NULL;
+ char *key_fixed = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = dict_get_str (dict, "volname", &volname);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name");
+ goto out;
+ }
+
+ if (strcasecmp (volname, "all") != 0) {
+ exists = glusterd_check_volume_exists (volname);
+ if (!exists) {
+ snprintf (msg, sizeof (msg), FMTSTR_CHECK_VOL_EXISTS,
+ volname);
+ ret = -1;
+ goto out;
+ }
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (msg, sizeof (msg), FMTSTR_CHECK_VOL_EXISTS,
+ volname);
+ goto out;
+ }
+
+ ret = glusterd_validate_volume_id (dict, volinfo);
+ if (ret)
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "key", &key);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get option key");
+ goto out;
+ }
+ if (strcmp(key, "all")) {
+ exists = glusterd_check_option_exists (key, &key_fixed);
+ if (exists == -1) {
+ ret = -1;
+ goto out;
+ }
+ if (!exists) {
+ ret = snprintf (msg, sizeof (msg),
+ "Option %s does not exist", key);
+ if (key_fixed)
+ snprintf (msg + ret, sizeof (msg) - ret,
+ "\nDid you mean %s?", key_fixed);
+ ret = -1;
+ goto out;
+ } else if (exists > 0) {
+ if (key_fixed)
+ key = key_fixed;
+ ALL_VOLUME_OPTION_CHECK (volname, key, ret,
+ op_errstr, out);
+ }
+ }
+
+out:
+ GF_FREE (key_fixed);
+
+ if (msg[0] != '\0') {
+ gf_log (this->name, GF_LOG_ERROR, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
+
+ return ret;
+}
+
+
+
+static int
+glusterd_op_stage_sync_volume (dict_t *dict, char **op_errstr)
+{
+ int ret = -1;
+ char *volname = NULL;
+ char *hostname = NULL;
+ gf_boolean_t exists = _gf_false;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ char msg[2048] = {0,};
+ glusterd_volinfo_t *volinfo = NULL;
+
+ ret = dict_get_str (dict, "hostname", &hostname);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "hostname couldn't be "
+ "retrieved from msg");
+ *op_errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ if (gf_is_local_addr (hostname)) {
+ //volname is not present in case of sync all
+ ret = dict_get_str (dict, "volname", &volname);
+ if (!ret) {
+ exists = glusterd_check_volume_exists (volname);
+ if (!exists) {
+ snprintf (msg, sizeof (msg), "Volume %s "
+ "does not exist", volname);
+ *op_errstr = gf_strdup (msg);
+ ret = -1;
+ goto out;
+ }
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret)
+ goto out;
+
+ } else {
+ ret = 0;
+ }
+ } else {
+ ret = glusterd_friend_find (NULL, hostname, &peerinfo);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "%s, is not a friend",
+ hostname);
+ *op_errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ if (!peerinfo->connected) {
+ snprintf (msg, sizeof (msg), "%s, is not connected at "
+ "the moment", hostname);
+ *op_errstr = gf_strdup (msg);
+ ret = -1;
+ goto out;
+ }
+
+ }
+
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+
+ return ret;
+}
+
+static int
+glusterd_op_stage_status_volume (dict_t *dict, char **op_errstr)
+{
+ int ret = -1;
+ uint32_t cmd = 0;
+ char msg[2048] = {0,};
+ char *volname = NULL;
+ char *brick = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ dict_t *vol_opts = NULL;
+ gf_boolean_t nfs_disabled = _gf_false;
+ gf_boolean_t shd_enabled = _gf_true;
+
+ GF_ASSERT (dict);
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = dict_get_uint32 (dict, "cmd", &cmd);
+ if (ret)
+ goto out;
+
+ if (cmd & GF_CLI_STATUS_ALL)
+ goto out;
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Unable to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (msg, sizeof(msg), "Volume %s does not exist",
+ volname);
+ gf_log (THIS->name, GF_LOG_ERROR, "%s", msg);
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_validate_volume_id (dict, volinfo);
+ if (ret)
+ goto out;
+
+ ret = glusterd_is_volume_started (volinfo);
+ if (!ret) {
+ snprintf (msg, sizeof (msg), "Volume %s is not started",
+ volname);
+ gf_log (THIS->name, GF_LOG_ERROR, "%s", msg);
+ ret = -1;
+ goto out;
+ }
+
+ vol_opts = volinfo->dict;
+
+ if ((cmd & GF_CLI_STATUS_NFS) != 0) {
+ nfs_disabled = dict_get_str_boolean (vol_opts, "nfs.disable",
+ _gf_false);
+ if (nfs_disabled) {
+ ret = -1;
+ snprintf (msg, sizeof (msg),
+ "NFS server is disabled for volume %s",
+ volname);
+ gf_log (THIS->name, GF_LOG_ERROR, "%s", msg);
+ goto out;
+ }
+ } else if ((cmd & GF_CLI_STATUS_SHD) != 0) {
+ if (!glusterd_is_volume_replicate (volinfo)) {
+ ret = -1;
+ snprintf (msg, sizeof (msg),
+ "Volume %s is not of type replicate",
+ volname);
+ gf_log (THIS->name, GF_LOG_ERROR, "%s", msg);
+ goto out;
+ }
+
+ shd_enabled = dict_get_str_boolean (vol_opts,
+ "cluster.self-heal-daemon",
+ _gf_true);
+ if (!shd_enabled) {
+ ret = -1;
+ snprintf (msg, sizeof (msg),
+ "Self-heal Daemon is disabled for volume %s",
+ volname);
+ gf_log (THIS->name, GF_LOG_ERROR, "%s", msg);
+ goto out;
+ }
+
+ } else if ((cmd & GF_CLI_STATUS_BRICK) != 0) {
+ ret = dict_get_str (dict, "brick", &brick);
+ if (ret)
+ goto out;
+
+ ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo,
+ &brickinfo);
+ if (ret) {
+ snprintf (msg, sizeof(msg), "No brick %s in"
+ " volume %s", brick, volname);
+ gf_log (THIS->name, GF_LOG_ERROR, "%s", msg);
+
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = 0;
+
+ out:
+ if (ret) {
+ if (msg[0] != '\0')
+ *op_errstr = gf_strdup (msg);
+ else
+ *op_errstr = gf_strdup ("Validation Failed for Status");
+ }
+
+ gf_log (THIS->name, GF_LOG_DEBUG, "Returning: %d", ret);
+ return ret;
+}
+
+
+static gf_boolean_t
+glusterd_is_profile_on (glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ gf_boolean_t is_latency_on = _gf_false;
+ gf_boolean_t is_fd_stats_on = _gf_false;
+
+ GF_ASSERT (volinfo);
+
+ ret = glusterd_volinfo_get_boolean (volinfo, VKEY_DIAG_CNT_FOP_HITS);
+ if (ret != -1)
+ is_fd_stats_on = ret;
+ ret = glusterd_volinfo_get_boolean (volinfo, VKEY_DIAG_LAT_MEASUREMENT);
+ if (ret != -1)
+ is_latency_on = ret;
+ if ((_gf_true == is_latency_on) &&
+ (_gf_true == is_fd_stats_on))
+ return _gf_true;
+ return _gf_false;
+}
+
+static int
+glusterd_op_stage_stats_volume (dict_t *dict, char **op_errstr)
+{
+ int ret = -1;
+ char *volname = NULL;
+ gf_boolean_t exists = _gf_false;
+ char msg[2048] = {0,};
+ int32_t stats_op = GF_CLI_STATS_NONE;
+ glusterd_volinfo_t *volinfo = NULL;
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Volume name get failed");
+ goto out;
+ }
+
+ exists = glusterd_check_volume_exists (volname);
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if ((!exists) || (ret < 0)) {
+ snprintf (msg, sizeof (msg), "Volume %s, "
+ "doesn't exist", volname);
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_validate_volume_id (dict, volinfo);
+ if (ret)
+ goto out;
+
+ ret = dict_get_int32 (dict, "op", &stats_op);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Volume profile op get failed");
+ goto out;
+ }
+
+ if (GF_CLI_STATS_START == stats_op) {
+ if (_gf_true == glusterd_is_profile_on (volinfo)) {
+ snprintf (msg, sizeof (msg), "Profile on Volume %s is"
+ " already started", volinfo->volname);
+ ret = -1;
+ goto out;
+ }
+
+ }
+ if ((GF_CLI_STATS_STOP == stats_op) ||
+ (GF_CLI_STATS_INFO == stats_op)) {
+ if (_gf_false == glusterd_is_profile_on (volinfo)) {
+ snprintf (msg, sizeof (msg), "Profile on Volume %s is"
+ " not started", volinfo->volname);
+ ret = -1;
+
+ goto out;
+ }
+ }
+ if ((GF_CLI_STATS_TOP == stats_op) ||
+ (GF_CLI_STATS_INFO == stats_op)) {
+ if (_gf_false == glusterd_is_volume_started (volinfo)) {
+ snprintf (msg, sizeof (msg), "Volume %s is not started.",
+ volinfo->volname);
+ gf_log ("glusterd", GF_LOG_ERROR, "%s", msg);
+ ret = -1;
+ goto out;
+ }
+ }
+ ret = 0;
+out:
+ if (msg[0] != '\0') {
+ gf_log ("glusterd", GF_LOG_ERROR, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ }
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+
+static int
+_delete_reconfig_opt (dict_t *this, char *key, data_t *value, void *data)
+{
+ int32_t *is_force = 0;
+
+ GF_ASSERT (data);
+ is_force = (int32_t*)data;
+
+ if (*is_force != 1) {
+ if (_gf_true == glusterd_check_voloption_flags (key,
+ OPT_FLAG_FORCE)) {
+ /* indicate to caller that we don't set the option
+ * due to being protected
+ */
+ *is_force = *is_force | GD_OP_PROTECTED;
+ goto out;
+ } else {
+ *is_force = *is_force | GD_OP_UNPROTECTED;
+ }
+ }
+
+ gf_log ("", GF_LOG_DEBUG, "deleting dict with key=%s,value=%s",
+ key, value->data);
+ dict_del (this, key);
+out:
+ return 0;
+}
+
+static int
+_delete_reconfig_global_opt (dict_t *this, char *key, data_t *value, void *data)
+{
+ int32_t *is_force = 0;
+
+ GF_ASSERT (data);
+ is_force = (int32_t*)data;
+
+ if (strcmp (GLUSTERD_GLOBAL_OPT_VERSION, key) == 0)
+ goto out;
+
+ _delete_reconfig_opt (this, key, value, data);
+out:
+ return 0;
+}
+
+static int
+glusterd_options_reset (glusterd_volinfo_t *volinfo, char *key,
+ int32_t *is_force)
+{
+ int ret = 0;
+ data_t *value = NULL;
+ char *key_fixed = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (volinfo->dict);
+ GF_ASSERT (key);
+
+ if (!strncmp(key, "all", 3))
+ dict_foreach (volinfo->dict, _delete_reconfig_opt, is_force);
+ else {
+ value = dict_get (volinfo->dict, key);
+ if (!value) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no value set for option %s", key);
+ goto out;
+ }
+ _delete_reconfig_opt (volinfo->dict, key, value, is_force);
+ }
+
+ gd_update_volume_op_versions (volinfo);
+
+ ret = glusterd_create_volfiles_and_notify_services (volinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to create volfile for"
+ " 'volume reset'");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret)
+ goto out;
+
+ if (GLUSTERD_STATUS_STARTED == volinfo->status) {
+ ret = glusterd_nodesvcs_handle_reconfigure (volinfo);
+ if (ret)
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ GF_FREE (key_fixed);
+ gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+static int
+glusterd_op_reset_all_volume_options (xlator_t *this, dict_t *dict)
+{
+ char *key = NULL;
+ char *key_fixed = NULL;
+ int ret = -1;
+ int32_t is_force = 0;
+ glusterd_conf_t *conf = NULL;
+ dict_t *dup_opt = NULL;
+ gf_boolean_t all = _gf_false;
+ char *next_version = NULL;
+ gf_boolean_t quorum_action = _gf_false;
+
+ conf = this->private;
+ ret = dict_get_str (dict, "key", &key);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get key");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "force", &is_force);
+ if (ret)
+ is_force = 0;
+
+ if (strcmp (key, "all")) {
+ ret = glusterd_check_option_exists (key, &key_fixed);
+ if (ret <= 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Option %s does not "
+ "exist", key);
+ ret = -1;
+ goto out;
+ }
+ } else {
+ all = _gf_true;
+ }
+
+ if (key_fixed)
+ key = key_fixed;
+
+ ret = -1;
+ dup_opt = dict_new ();
+ if (!dup_opt)
+ goto out;
+ if (!all) {
+ dict_copy (conf->opts, dup_opt);
+ dict_del (dup_opt, key);
+ }
+ ret = glusterd_get_next_global_opt_version_str (conf->opts,
+ &next_version);
+ if (ret)
+ goto out;
+
+ ret = dict_set_str (dup_opt, GLUSTERD_GLOBAL_OPT_VERSION, next_version);
+ if (ret)
+ goto out;
+
+ ret = glusterd_store_options (this, dup_opt);
+ if (ret)
+ goto out;
+
+ if (glusterd_is_quorum_changed (conf->opts, key, NULL))
+ quorum_action = _gf_true;
+
+ ret = dict_set_dynstr (conf->opts, GLUSTERD_GLOBAL_OPT_VERSION,
+ next_version);
+ if (ret)
+ goto out;
+ else
+ next_version = NULL;
+
+ if (!all) {
+ dict_del (conf->opts, key);
+ } else {
+ dict_foreach (conf->opts, _delete_reconfig_global_opt,
+ &is_force);
+ }
+out:
+ GF_FREE (key_fixed);
+ if (dup_opt)
+ dict_unref (dup_opt);
+
+ gf_log (this->name, GF_LOG_DEBUG, "returning %d", ret);
+ if (quorum_action)
+ glusterd_do_quorum_action ();
+ GF_FREE (next_version);
+ return ret;
+}
+
+static int
+glusterd_op_reset_volume (dict_t *dict, char **op_rspstr)
+{
+ glusterd_volinfo_t *volinfo = NULL;
+ int ret = -1;
+ char *volname = NULL;
+ char *key = NULL;
+ char *key_fixed = NULL;
+ int32_t is_force = 0;
+ gf_boolean_t quorum_action = _gf_false;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name" );
+ goto out;
+ }
+
+ if (strcasecmp (volname, "all") == 0) {
+ ret = glusterd_op_reset_all_volume_options (this, dict);
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "force", &is_force);
+ if (ret)
+ is_force = 0;
+
+ ret = dict_get_str (dict, "key", &key);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get option key");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, FMTSTR_CHECK_VOL_EXISTS,
+ volname);
+ goto out;
+ }
+
+ if (strcmp (key, "all") &&
+ glusterd_check_option_exists (key, &key_fixed) != 1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "volinfo dict inconsistency: option %s not found",
+ key);
+ ret = -1;
+ goto out;
+ }
+ if (key_fixed)
+ key = key_fixed;
+
+ if (glusterd_is_quorum_changed (volinfo->dict, key, NULL))
+ quorum_action = _gf_true;
+
+ ret = glusterd_options_reset (volinfo, key, &is_force);
+ if (ret == -1) {
+ gf_asprintf(op_rspstr, "Volume reset : failed");
+ } else if (is_force & GD_OP_PROTECTED) {
+ if (is_force & GD_OP_UNPROTECTED) {
+ gf_asprintf (op_rspstr, "All unprotected fields were"
+ " reset. To reset the protected fields,"
+ " use 'force'.");
+ } else {
+ ret = -1;
+ gf_asprintf (op_rspstr, "'%s' is protected. To reset"
+ " use 'force'.", key);
+ }
+ }
+
+out:
+ GF_FREE (key_fixed);
+ if (quorum_action)
+ glusterd_do_quorum_action ();
+
+ gf_log (this->name, GF_LOG_DEBUG, "'volume reset' returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_stop_bricks (glusterd_volinfo_t *volinfo)
+{
+ glusterd_brickinfo_t *brickinfo = NULL;
+
+ list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ /*TODO: Need to change @del_brick in brick_stop to _gf_true
+ * once we enable synctask in peer rpc prog */
+ if (glusterd_brick_stop (volinfo, brickinfo, _gf_false))
+ return -1;
+ }
+
+ return 0;
+}
+
+int
+glusterd_start_bricks (glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ glusterd_brickinfo_t *brickinfo = NULL;
+
+ GF_ASSERT (volinfo);
+
+ list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ ret = glusterd_brick_start (volinfo, brickinfo, _gf_false);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Failed to start %s:%s for %s",
+ brickinfo->hostname, brickinfo->path,
+ volinfo->volname);
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+glusterd_op_set_all_volume_options (xlator_t *this, dict_t *dict)
+{
+ char *key = NULL;
+ char *key_fixed = NULL;
+ char *value = NULL;
+ char *dup_value = NULL;
+ int ret = -1;
+ glusterd_conf_t *conf = NULL;
+ dict_t *dup_opt = NULL;
+ char *next_version = NULL;
+ gf_boolean_t quorum_action = _gf_false;
+
+ conf = this->private;
+ ret = dict_get_str (dict, "key1", &key);
+ if (ret)
+ goto out;
+
+ ret = dict_get_str (dict, "value1", &value);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "invalid key,value pair in 'volume set'");
+ goto out;
+ }
+ ret = glusterd_check_option_exists (key, &key_fixed);
+ if (ret <= 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Invalid key %s", key);
+ ret = -1;
+ goto out;
+ }
+
+ if (key_fixed)
+ key = key_fixed;
+
+ ret = -1;
+ dup_opt = dict_new ();
+ if (!dup_opt)
+ goto out;
+ dict_copy (conf->opts, dup_opt);
+ ret = dict_set_str (dup_opt, key, value);
+ if (ret)
+ goto out;
+
+ ret = glusterd_get_next_global_opt_version_str (conf->opts,
+ &next_version);
+ if (ret)
+ goto out;
+
+ ret = dict_set_str (dup_opt, GLUSTERD_GLOBAL_OPT_VERSION, next_version);
+ if (ret)
+ goto out;
+
+ dup_value = gf_strdup (value);
+ if (!dup_value)
+ goto out;
+
+ ret = glusterd_store_options (this, dup_opt);
+ if (ret)
+ goto out;
+
+ if (glusterd_is_quorum_changed (conf->opts, key, value))
+ quorum_action = _gf_true;
+
+ ret = dict_set_dynstr (conf->opts, GLUSTERD_GLOBAL_OPT_VERSION,
+ next_version);
+ if (ret)
+ goto out;
+ else
+ next_version = NULL;
+
+ ret = dict_set_dynstr (conf->opts, key, dup_value);
+ if (ret)
+ goto out;
+out:
+ GF_FREE (key_fixed);
+ if (dup_opt)
+ dict_unref (dup_opt);
+
+ gf_log (this->name, GF_LOG_DEBUG, "returning %d", ret);
+ if (quorum_action)
+ glusterd_do_quorum_action ();
+ GF_FREE (next_version);
+ return ret;
+}
+
+static int
+glusterd_op_set_volume (dict_t *dict)
+{
+ int ret = 0;
+ glusterd_volinfo_t *volinfo = NULL;
+ char *volname = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ int count = 1;
+ char *key = NULL;
+ char *key_fixed = NULL;
+ char *value = NULL;
+ char str[50] = {0, };
+ char *op_errstr = NULL;
+ gf_boolean_t global_opt = _gf_false;
+ gf_boolean_t global_opts_set = _gf_false;
+ glusterd_volinfo_t *voliter = NULL;
+ int32_t dict_count = 0;
+ gf_boolean_t check_op_version = _gf_false;
+ uint32_t new_op_version = 0;
+ gf_boolean_t quorum_action = _gf_false;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = dict_get_int32 (dict, "count", &dict_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Count(dict),not set in Volume-Set");
+ goto out;
+ }
+
+ if (dict_count == 0) {
+ ret = glusterd_volset_help (NULL, &op_errstr);
+ if (ret) {
+ op_errstr = (op_errstr)? op_errstr:
+ "Volume set help internal error";
+ gf_log (this->name, GF_LOG_ERROR, "%s", op_errstr);
+ }
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name");
+ goto out;
+ }
+
+ if (strcasecmp (volname, "all") == 0) {
+ ret = glusterd_op_set_all_volume_options (this, dict);
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, FMTSTR_CHECK_VOL_EXISTS,
+ volname);
+ goto out;
+ }
+
+ // TODO: Remove this once v3.3 compatability is not required
+ check_op_version = dict_get_str_boolean (dict, "check-op-version",
+ _gf_false);
+
+ if (check_op_version) {
+ ret = dict_get_uint32 (dict, "new-op-version", &new_op_version);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to get new op-version from dict");
+ goto out;
+ }
+ }
+
+ for (count = 1; ret != -1 ; count++) {
+
+ sprintf (str, "key%d", count);
+ ret = dict_get_str (dict, str, &key);
+ if (ret)
+ break;
+
+ sprintf (str, "value%d", count);
+ ret = dict_get_str (dict, str, &value);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "invalid key,value pair in 'volume set'");
+ ret = -1;
+ goto out;
+ }
+
+ if (strcmp (key, "config.memory-accounting") == 0) {
+ ret = gf_string2boolean (value,
+ &volinfo->memory_accounting);
+ }
+
+ if (strcmp (key, "config.transport") == 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "changing transport-type for volume %s to %s",
+ volname, value);
+ ret = 0;
+ if (strcasecmp (value, "rdma") == 0) {
+ volinfo->transport_type = GF_TRANSPORT_RDMA;
+ } else if (strcasecmp (value, "tcp") == 0) {
+ volinfo->transport_type = GF_TRANSPORT_TCP;
+ } else if ((strcasecmp (value, "tcp,rdma") == 0) ||
+ (strcasecmp (value, "rdma,tcp") == 0)) {
+ volinfo->transport_type =
+ GF_TRANSPORT_BOTH_TCP_RDMA;
+ } else {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ if (!is_key_glusterd_hooks_friendly (key)) {
+ ret = glusterd_check_option_exists (key, &key_fixed);
+ GF_ASSERT (ret);
+ if (ret <= 0) {
+ key_fixed = NULL;
+ goto out;
+ }
+ }
+
+ global_opt = _gf_false;
+ if (glusterd_check_globaloption (key)) {
+ global_opt = _gf_true;
+ global_opts_set = _gf_true;
+ }
+
+ if (!global_opt)
+ value = gf_strdup (value);
+
+ if (!value) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to set the options in 'volume set'");
+ ret = -1;
+ goto out;
+ }
+
+ if (key_fixed)
+ key = key_fixed;
+
+ if (glusterd_is_quorum_changed (volinfo->dict, key, value))
+ quorum_action = _gf_true;
+
+ if (global_opt) {
+ list_for_each_entry (voliter, &priv->volumes, vol_list) {
+ value = gf_strdup (value);
+ ret = dict_set_dynstr (voliter->dict, key, value);
+ if (ret)
+ goto out;
+ }
+ } else {
+ ret = dict_set_dynstr (volinfo->dict, key, value);
+ if (ret)
+ goto out;
+ }
+
+ if (key_fixed) {
+ GF_FREE (key_fixed);
+ key_fixed = NULL;
+ }
+ }
+
+ if (count == 1) {
+ gf_log (this->name, GF_LOG_ERROR, "No options received ");
+ ret = -1;
+ goto out;
+ }
+
+ /* Update the cluster op-version before regenerating volfiles so that
+ * correct volfiles are generated
+ */
+ if (new_op_version > priv->op_version) {
+ priv->op_version = new_op_version;
+ ret = glusterd_store_global_info (this);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to store op-version");
+ goto out;
+ }
+ }
+
+ if (!global_opts_set) {
+ gd_update_volume_op_versions (volinfo);
+ ret = glusterd_create_volfiles_and_notify_services (volinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to create volfile for"
+ " 'volume set'");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret)
+ goto out;
+
+ if (GLUSTERD_STATUS_STARTED == volinfo->status) {
+ ret = glusterd_nodesvcs_handle_reconfigure (volinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Unable to restart NFS-Server");
+ goto out;
+ }
+ }
+
+ } else {
+ list_for_each_entry (voliter, &priv->volumes, vol_list) {
+ volinfo = voliter;
+ gd_update_volume_op_versions (volinfo);
+ ret = glusterd_create_volfiles_and_notify_services (volinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to create volfile for"
+ " 'volume set'");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_store_volinfo (volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret)
+ goto out;
+
+ if (GLUSTERD_STATUS_STARTED == volinfo->status) {
+ ret = glusterd_nodesvcs_handle_reconfigure (volinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Unable to restart NFS-Server");
+ goto out;
+ }
+ }
+ }
+ }
+
+ out:
+ GF_FREE (key_fixed);
+ gf_log (this->name, GF_LOG_DEBUG, "returning %d", ret);
+ if (quorum_action)
+ glusterd_do_quorum_action ();
+ return ret;
+}
+
+
+static int
+glusterd_op_sync_volume (dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict)
+{
+ int ret = -1;
+ char *volname = NULL;
+ char *hostname = NULL;
+ char msg[2048] = {0,};
+ int count = 1;
+ int vol_count = 0;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = dict_get_str (dict, "hostname", &hostname);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "hostname couldn't be "
+ "retrieved from msg");
+ *op_errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ if (!gf_is_local_addr (hostname)) {
+ ret = 0;
+ goto out;
+ }
+
+ //volname is not present in case of sync all
+ ret = dict_get_str (dict, "volname", &volname);
+ if (!ret) {
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Volume with name: %s "
+ "not exists", volname);
+ goto out;
+ }
+ }
+
+ if (!rsp_dict) {
+ //this should happen only on source
+ ret = 0;
+ goto out;
+ }
+
+ if (volname) {
+ ret = glusterd_add_volume_to_dict (volinfo, rsp_dict,
+ 1);
+ vol_count = 1;
+ } else {
+ list_for_each_entry (volinfo, &priv->volumes, vol_list) {
+ ret = glusterd_add_volume_to_dict (volinfo,
+ rsp_dict, count);
+ if (ret)
+ goto out;
+
+ vol_count = count++;
+ }
+ }
+ ret = dict_set_int32 (rsp_dict, "count", vol_count);
+
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+
+ return ret;
+}
+
+static int
+glusterd_add_profile_volume_options (glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ char *latency_key = NULL;
+ char *fd_stats_key = NULL;
+
+ GF_ASSERT (volinfo);
+
+ latency_key = VKEY_DIAG_LAT_MEASUREMENT;
+ fd_stats_key = VKEY_DIAG_CNT_FOP_HITS;
+
+ ret = dict_set_str (volinfo->dict, latency_key, "on");
+ if (ret) {
+ gf_log ("glusterd", GF_LOG_ERROR, "failed to set the volume %s "
+ "option %s value %s",
+ volinfo->volname, latency_key, "on");
+ goto out;
+ }
+
+ ret = dict_set_str (volinfo->dict, fd_stats_key, "on");
+ if (ret) {
+ gf_log ("glusterd", GF_LOG_ERROR, "failed to set the volume %s "
+ "option %s value %s",
+ volinfo->volname, fd_stats_key, "on");
+ goto out;
+ }
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+static void
+glusterd_remove_profile_volume_options (glusterd_volinfo_t *volinfo)
+{
+ char *latency_key = NULL;
+ char *fd_stats_key = NULL;
+
+ GF_ASSERT (volinfo);
+
+ latency_key = VKEY_DIAG_LAT_MEASUREMENT;
+ fd_stats_key = VKEY_DIAG_CNT_FOP_HITS;
+ dict_del (volinfo->dict, latency_key);
+ dict_del (volinfo->dict, fd_stats_key);
+}
+
+static int
+glusterd_op_stats_volume (dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict)
+{
+ int ret = -1;
+ char *volname = NULL;
+ char msg[2048] = {0,};
+ glusterd_volinfo_t *volinfo = NULL;
+ int32_t stats_op = GF_CLI_STATS_NONE;
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log ("glusterd", GF_LOG_ERROR, "volume name get failed");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Volume %s does not exists",
+ volname);
+
+ gf_log ("", GF_LOG_ERROR, "%s", msg);
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "op", &stats_op);
+ if (ret) {
+ gf_log ("glusterd", GF_LOG_ERROR, "volume profile op get failed");
+ goto out;
+ }
+
+ switch (stats_op) {
+ case GF_CLI_STATS_START:
+ ret = glusterd_add_profile_volume_options (volinfo);
+ if (ret)
+ goto out;
+ break;
+ case GF_CLI_STATS_STOP:
+ glusterd_remove_profile_volume_options (volinfo);
+ break;
+ case GF_CLI_STATS_INFO:
+ case GF_CLI_STATS_TOP:
+ //info is already collected in brick op.
+ //just goto out;
+ ret = 0;
+ goto out;
+ break;
+ default:
+ GF_ASSERT (0);
+ gf_log ("glusterd", GF_LOG_ERROR, "Invalid profile op: %d",
+ stats_op);
+ ret = -1;
+ goto out;
+ break;
+ }
+ ret = glusterd_create_volfiles_and_notify_services (volinfo);
+
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to create volfile for"
+ " 'volume set'");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_store_volinfo (volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret)
+ goto out;
+
+ if (GLUSTERD_STATUS_STARTED == volinfo->status)
+ ret = glusterd_nodesvcs_handle_reconfigure (volinfo);
+
+ ret = 0;
+
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+
+ return ret;
+}
+
+static int
+_add_brick_name_to_dict (dict_t *dict, char *key, glusterd_brickinfo_t *brick)
+{
+ int ret = -1;
+ char tmp[1024] = {0,};
+ char *brickname = NULL;
+ xlator_t *this = NULL;
+
+ GF_ASSERT (dict);
+ GF_ASSERT (key);
+ GF_ASSERT (brick);
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ snprintf (tmp, sizeof (tmp), "%s:%s", brick->hostname, brick->path);
+ brickname = gf_strdup (tmp);
+ if (!brickname) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to dup brick name");
+ goto out;
+ }
+
+ ret = dict_set_dynstr (dict, key, brickname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to add brick name to dict");
+ goto out;
+ }
+ brickname = NULL;
+out:
+ if (brickname)
+ GF_FREE (brickname);
+ return ret;
+}
+
+static int
+_add_remove_bricks_to_dict (dict_t *dict, glusterd_volinfo_t *volinfo,
+ char *prefix)
+{
+ int ret = -1;
+ int count = 0;
+ int i = 0;
+ char brick_key[1024] = {0,};
+ char dict_key[1024] ={0,};
+ char *brick = NULL;
+ xlator_t *this = NULL;
+
+ GF_ASSERT (dict);
+ GF_ASSERT (volinfo);
+ GF_ASSERT (prefix);
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = dict_get_int32 (volinfo->rebal.dict, "count", &count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to get brick count");
+ goto out;
+ }
+
+ snprintf (dict_key, sizeof (dict_key), "%s.count", prefix);
+ ret = dict_set_int32 (dict, dict_key, count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set brick count in dict");
+ goto out;
+ }
+
+ for (i = 1; i <= count; i++) {
+ memset (brick_key, 0, sizeof (brick_key));
+ snprintf (brick_key, sizeof (brick_key), "brick%d", i);
+
+ ret = dict_get_str (volinfo->rebal.dict, brick_key, &brick);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to get %s", brick_key);
+ goto out;
+ }
+
+ memset (dict_key, 0, sizeof (dict_key));
+ snprintf (dict_key, sizeof (dict_key), "%s.%s", prefix,
+ brick_key);
+ ret = dict_set_str (dict, dict_key, brick);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to add brick to dict");
+ goto out;
+ }
+ brick = NULL;
+ }
+
+out:
+ return ret;
+}
+
+/* This adds the respective task-id and all available parameters of a task into
+ * a dictionary
+ */
+static int
+_add_task_to_dict (dict_t *dict, glusterd_volinfo_t *volinfo, int op, int index)
+{
+
+ int ret = -1;
+ char key[128] = {0,};
+ char *uuid_str = NULL;
+ int status = 0;
+ xlator_t *this = NULL;
+
+ GF_ASSERT (dict);
+ GF_ASSERT (volinfo);
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ switch (op) {
+ case GD_OP_REMOVE_BRICK:
+ snprintf (key, sizeof (key), "task%d", index);
+ ret = _add_remove_bricks_to_dict (dict, volinfo, key);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to add remove bricks to dict");
+ goto out;
+ }
+ case GD_OP_REBALANCE:
+ uuid_str = gf_strdup (uuid_utoa (volinfo->rebal.rebalance_id));
+ status = volinfo->rebal.defrag_status;
+ break;
+
+ case GD_OP_REPLACE_BRICK:
+ snprintf (key, sizeof (key), "task%d.src-brick", index);
+ ret = _add_brick_name_to_dict (dict, key,
+ volinfo->rep_brick.src_brick);
+ if (ret)
+ goto out;
+ memset (key, 0, sizeof (key));
+
+ snprintf (key, sizeof (key), "task%d.dst-brick", index);
+ ret = _add_brick_name_to_dict (dict, key,
+ volinfo->rep_brick.dst_brick);
+ if (ret)
+ goto out;
+ memset (key, 0, sizeof (key));
+
+ uuid_str = gf_strdup (uuid_utoa (volinfo->rep_brick.rb_id));
+ status = volinfo->rep_brick.rb_status;
+ break;
+
+ default:
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR, "%s operation doesn't have a"
+ " task_id", gd_op_list[op]);
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "task%d.type", index);
+ ret = dict_set_str (dict, key, (char *)gd_op_list[op]);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error setting task type in dict");
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "task%d.id", index);
+
+ if (!uuid_str)
+ goto out;
+ ret = dict_set_dynstr (dict, key, uuid_str);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error setting task id in dict");
+ goto out;
+ }
+ uuid_str = NULL;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "task%d.status", index);
+ ret = dict_set_int32 (dict, key, status);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error setting task status in dict");
+ goto out;
+ }
+
+out:
+ if (uuid_str)
+ GF_FREE (uuid_str);
+ return ret;
+}
+
+static int
+glusterd_aggregate_task_status (dict_t *rsp_dict, glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ int tasks = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ if (!uuid_is_null (volinfo->rebal.rebalance_id)) {
+ ret = _add_task_to_dict (rsp_dict, volinfo, volinfo->rebal.op,
+ tasks);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to add task details to dict");
+ goto out;
+ }
+ tasks++;
+ }
+
+ if (!uuid_is_null (volinfo->rep_brick.rb_id)) {
+ ret = _add_task_to_dict (rsp_dict, volinfo, GD_OP_REPLACE_BRICK,
+ tasks);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to add task details to dict");
+ goto out;
+ }
+ tasks++;
+ }
+
+ ret = dict_set_int32 (rsp_dict, "tasks", tasks);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error setting tasks count in dict");
+ goto out;
+ }
+ ret = 0;
+
+out:
+ return ret;
+}
+
+static int
+glusterd_op_status_volume (dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict)
+{
+ int ret = -1;
+ int node_count = 0;
+ int brick_index = -1;
+ int other_count = 0;
+ int other_index = 0;
+ uint32_t cmd = 0;
+ char *volname = NULL;
+ char *brick = NULL;
+ xlator_t *this = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ dict_t *vol_opts = NULL;
+ gf_boolean_t nfs_disabled = _gf_false;
+ gf_boolean_t shd_enabled = _gf_true;
+ gf_boolean_t origin_glusterd = _gf_false;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+
+ GF_ASSERT (priv);
+
+ GF_ASSERT (dict);
+
+ origin_glusterd = is_origin_glusterd (dict);
+
+ ret = dict_get_uint32 (dict, "cmd", &cmd);
+ if (ret)
+ goto out;
+
+ if (origin_glusterd) {
+ ret = 0;
+ if ((cmd & GF_CLI_STATUS_ALL)) {
+ ret = glusterd_get_all_volnames (rsp_dict);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to get all volume "
+ "names for status");
+ }
+ }
+
+ ret = dict_set_uint32 (rsp_dict, "cmd", cmd);
+ if (ret)
+ goto out;
+
+ if (cmd & GF_CLI_STATUS_ALL)
+ goto out;
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret)
+ goto out;
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Volume with name: %s "
+ "does not exist", volname);
+ goto out;
+ }
+ vol_opts = volinfo->dict;
+
+ if ((cmd & GF_CLI_STATUS_NFS) != 0) {
+ ret = glusterd_add_node_to_dict ("nfs", rsp_dict, 0, vol_opts);
+ if (ret)
+ goto out;
+ other_count++;
+ node_count++;
+
+ } else if ((cmd & GF_CLI_STATUS_SHD) != 0) {
+ ret = glusterd_add_node_to_dict ("glustershd", rsp_dict, 0,
+ vol_opts);
+ if (ret)
+ goto out;
+ other_count++;
+ node_count++;
+
+ } else if ((cmd & GF_CLI_STATUS_BRICK) != 0) {
+ ret = dict_get_str (dict, "brick", &brick);
+ if (ret)
+ goto out;
+
+ ret = glusterd_volume_brickinfo_get_by_brick (brick,
+ volinfo,
+ &brickinfo);
+ if (ret)
+ goto out;
+
+ if (uuid_compare (brickinfo->uuid, MY_UUID))
+ goto out;
+
+ glusterd_add_brick_to_dict (volinfo, brickinfo, rsp_dict,
+ ++brick_index);
+ if (cmd & GF_CLI_STATUS_DETAIL)
+ glusterd_add_brick_detail_to_dict (volinfo, brickinfo,
+ rsp_dict,
+ brick_index);
+ node_count++;
+
+ } else if ((cmd & GF_CLI_STATUS_TASKS) != 0) {
+ ret = glusterd_aggregate_task_status (rsp_dict, volinfo);
+ goto out;
+
+ } else {
+ list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ brick_index++;
+ if (uuid_compare (brickinfo->uuid, MY_UUID))
+ continue;
+
+ glusterd_add_brick_to_dict (volinfo, brickinfo,
+ rsp_dict, brick_index);
+
+ if (cmd & GF_CLI_STATUS_DETAIL) {
+ glusterd_add_brick_detail_to_dict (volinfo,
+ brickinfo,
+ rsp_dict,
+ brick_index);
+ }
+ node_count++;
+ }
+
+ if ((cmd & GF_CLI_STATUS_MASK) == GF_CLI_STATUS_NONE) {
+ other_index = brick_index + 1;
+
+ nfs_disabled = dict_get_str_boolean (vol_opts,
+ "nfs.disable",
+ _gf_false);
+ if (!nfs_disabled) {
+ ret = glusterd_add_node_to_dict ("nfs",
+ rsp_dict,
+ other_index,
+ vol_opts);
+ if (ret)
+ goto out;
+ other_index++;
+ other_count++;
+ node_count++;
+ }
+
+ shd_enabled = dict_get_str_boolean
+ (vol_opts, "cluster.self-heal-daemon",
+ _gf_true);
+ if (glusterd_is_volume_replicate (volinfo)
+ && shd_enabled) {
+ ret = glusterd_add_node_to_dict ("glustershd",
+ rsp_dict,
+ other_index,
+ vol_opts);
+ if (ret)
+ goto out;
+ other_count++;
+ node_count++;
+ }
+ }
+ }
+
+ ret = dict_set_int32 (rsp_dict, "brick-index-max", brick_index);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error setting brick-index-max to dict");
+ goto out;
+ }
+ ret = dict_set_int32 (rsp_dict, "other-count", other_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error setting other-count to dict");
+ goto out;
+ }
+ ret = dict_set_int32 (rsp_dict, "count", node_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error setting node count to dict");
+ goto out;
+ }
+
+ /* Active tasks */
+ /* Tasks are added only for normal volume status request for either a
+ * single volume or all volumes, and only by the origin glusterd
+ */
+ if (((cmd & GF_CLI_STATUS_MASK) != GF_CLI_STATUS_NONE) ||
+ !(cmd & (GF_CLI_STATUS_VOL | GF_CLI_STATUS_ALL)) ||
+ !origin_glusterd)
+ goto out;
+
+ ret = glusterd_aggregate_task_status (rsp_dict, volinfo);
+ if (ret)
+ goto out;
+ ret = 0;
+
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
+
+ return ret;
+}
+
+static int
+glusterd_op_ac_none (glusterd_op_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+
+ gf_log (THIS->name, GF_LOG_DEBUG, "Returning with %d", ret);
+
+ return ret;
+}
+
+static int
+glusterd_op_ac_send_lock (glusterd_op_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+ rpc_clnt_procedure_t *proc = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ uint32_t pending_count = 0;
+ dict_t *dict = NULL;
+
+ this = THIS;
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ list_for_each_entry (peerinfo, &priv->peers, uuid_list) {
+ GF_ASSERT (peerinfo);
+
+ if (!peerinfo->connected || !peerinfo->mgmt)
+ continue;
+ if ((peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) &&
+ (glusterd_op_get_op() != GD_OP_SYNC_VOLUME))
+ continue;
+
+ /* Based on the op_version, acquire a cluster or mgmt_v3 lock */
+ if (priv->op_version < 3) {
+ proc = &peerinfo->mgmt->proctable[GLUSTERD_MGMT_CLUSTER_LOCK];
+ if (proc->fn) {
+ ret = proc->fn (NULL, this, peerinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Failed to send lock request "
+ "for operation 'Volume %s' to "
+ "peer %s",
+ gd_op_list[opinfo.op],
+ peerinfo->hostname);
+ continue;
+ }
+ pending_count++;
+ }
+ } else {
+ dict = glusterd_op_get_ctx ();
+ dict_ref (dict);
+
+ proc = &peerinfo->mgmt_v3->proctable
+ [GLUSTERD_MGMT_V3_LOCK];
+ if (proc->fn) {
+ ret = dict_set_static_ptr (dict, "peerinfo",
+ peerinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to set peerinfo");
+ dict_unref (dict);
+ goto out;
+ }
+
+ ret = proc->fn (NULL, this, dict);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Failed to send mgmt_v3 lock "
+ "request for operation "
+ "'Volume %s' to peer %s",
+ gd_op_list[opinfo.op],
+ peerinfo->hostname);
+ dict_unref (dict);
+ continue;
+ }
+ pending_count++;
+ }
+ }
+ }
+
+ opinfo.pending_count = pending_count;
+ if (!opinfo.pending_count)
+ ret = glusterd_op_sm_inject_all_acc (&event->txn_id);
+
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "Returning with %d", ret);
+ return ret;
+}
+
+static int
+glusterd_op_ac_send_unlock (glusterd_op_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+ rpc_clnt_procedure_t *proc = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ uint32_t pending_count = 0;
+ dict_t *dict = NULL;
+
+ this = THIS;
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ list_for_each_entry (peerinfo, &priv->peers, uuid_list) {
+ GF_ASSERT (peerinfo);
+
+ if (!peerinfo->connected || !peerinfo->mgmt)
+ continue;
+ if ((peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) &&
+ (glusterd_op_get_op() != GD_OP_SYNC_VOLUME))
+ continue;
+
+ /* Based on the op_version, release the *
+ * cluster or mgmt_v3 lock */
+ if (priv->op_version < 3) {
+ proc = &peerinfo->mgmt->proctable[GLUSTERD_MGMT_CLUSTER_UNLOCK];
+ if (proc->fn) {
+ ret = proc->fn (NULL, this, peerinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Failed to send unlock request "
+ "for operation 'Volume %s' to "
+ "peer %s",
+ gd_op_list[opinfo.op],
+ peerinfo->hostname);
+ continue;
+ }
+ pending_count++;
+ }
+ } else {
+ dict = glusterd_op_get_ctx ();
+ dict_ref (dict);
+
+ proc = &peerinfo->mgmt_v3->proctable
+ [GLUSTERD_MGMT_V3_UNLOCK];
+ if (proc->fn) {
+ ret = dict_set_static_ptr (dict, "peerinfo",
+ peerinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to set peerinfo");
+ dict_unref (dict);
+ goto out;
+ }
+
+ ret = proc->fn (NULL, this, dict);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Failed to send mgmt_v3 unlock "
+ "request for operation "
+ "'Volume %s' to peer %s",
+ gd_op_list[opinfo.op],
+ peerinfo->hostname);
+ dict_unref (dict);
+ continue;
+ }
+ pending_count++;
+ }
+ }
+ }
+
+ opinfo.pending_count = pending_count;
+ if (!opinfo.pending_count)
+ ret = glusterd_op_sm_inject_all_acc (&event->txn_id);
+
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "Returning with %d", ret);
+ return ret;
+}
+
+static int
+glusterd_op_ac_ack_drain (glusterd_op_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+
+ if (opinfo.pending_count > 0)
+ opinfo.pending_count--;
+
+ if (!opinfo.pending_count)
+ ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK,
+ &event->txn_id, NULL);
+
+ gf_log (THIS->name, GF_LOG_DEBUG, "Returning with %d", ret);
+
+ return ret;
+}
+
+static int
+glusterd_op_ac_send_unlock_drain (glusterd_op_sm_event_t *event, void *ctx)
+{
+ return glusterd_op_ac_ack_drain (event, ctx);
+}
+
+static int
+glusterd_op_ac_lock (glusterd_op_sm_event_t *event, void *ctx)
+{
+ int32_t ret = 0;
+ char *volname = NULL;
+ glusterd_op_lock_ctx_t *lock_ctx = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ GF_ASSERT (event);
+ GF_ASSERT (ctx);
+
+ this = THIS;
+ priv = this->private;
+
+ lock_ctx = (glusterd_op_lock_ctx_t *)ctx;
+
+ /* If the req came from a node running on older op_version
+ * the dict won't be present. Based on it acquiring a cluster
+ * or mgmt_v3 lock */
+ if (lock_ctx->dict == NULL) {
+ ret = glusterd_lock (lock_ctx->uuid);
+ glusterd_op_lock_send_resp (lock_ctx->req, ret);
+ } else {
+ ret = dict_get_str (lock_ctx->dict, "volname", &volname);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to acquire volname");
+ else {
+ ret = glusterd_mgmt_v3_lock (volname, lock_ctx->uuid,
+ "vol");
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to acquire lock for %s",
+ volname);
+ }
+
+ glusterd_op_mgmt_v3_lock_send_resp (lock_ctx->req,
+ &event->txn_id, ret);
+
+ dict_unref (lock_ctx->dict);
+ }
+
+ gf_log (THIS->name, GF_LOG_DEBUG, "Lock Returned %d", ret);
+ return ret;
+}
+
+static int
+glusterd_op_ac_unlock (glusterd_op_sm_event_t *event, void *ctx)
+{
+ int32_t ret = 0;
+ char *volname = NULL;
+ glusterd_op_lock_ctx_t *lock_ctx = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+
+ GF_ASSERT (event);
+ GF_ASSERT (ctx);
+
+ this = THIS;
+ priv = this->private;
+
+ lock_ctx = (glusterd_op_lock_ctx_t *)ctx;
+
+ /* If the req came from a node running on older op_version
+ * the dict won't be present. Based on it releasing the cluster
+ * or mgmt_v3 lock */
+ if (lock_ctx->dict == NULL) {
+ ret = glusterd_unlock (lock_ctx->uuid);
+ glusterd_op_unlock_send_resp (lock_ctx->req, ret);
+ } else {
+ ret = dict_get_str (lock_ctx->dict, "volname", &volname);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to acquire volname");
+ else {
+ ret = glusterd_mgmt_v3_unlock (volname, lock_ctx->uuid,
+ "vol");
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to release lock for %s", volname);
+ }
+
+ glusterd_op_mgmt_v3_unlock_send_resp (lock_ctx->req,
+ &event->txn_id, ret);
+
+ dict_unref (lock_ctx->dict);
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG, "Unlock Returned %d", ret);
+
+ if (priv->pending_quorum_action)
+ glusterd_do_quorum_action ();
+ return ret;
+}
+
+static int
+glusterd_op_ac_local_unlock (glusterd_op_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+ uuid_t *originator = NULL;
+
+ GF_ASSERT (event);
+ GF_ASSERT (ctx);
+
+ originator = (uuid_t *) ctx;
+
+ ret = glusterd_unlock (*originator);
+
+ gf_log (THIS->name, GF_LOG_DEBUG, "Unlock Returned %d", ret);
+
+ return ret;
+}
+
+static int
+glusterd_op_ac_rcvd_lock_acc (glusterd_op_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+
+ GF_ASSERT (event);
+
+ if (opinfo.pending_count > 0)
+ opinfo.pending_count--;
+
+ if (opinfo.pending_count > 0)
+ goto out;
+
+ ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACC,
+ &event->txn_id, NULL);
+
+ gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
+
+out:
+ return ret;
+}
+
+static int
+glusterd_dict_set_volid (dict_t *dict, char *volname, char **op_errstr)
+{
+ int ret = -1;
+ glusterd_volinfo_t *volinfo = NULL;
+ char *volid = NULL;
+ char msg[1024] = {0,};
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ if (!dict || !volname)
+ goto out;
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (msg, sizeof (msg), FMTSTR_CHECK_VOL_EXISTS, volname);
+ goto out;
+ }
+ volid = gf_strdup (uuid_utoa (volinfo->volume_id));
+ if (!volid) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_dynstr (dict, "vol-id", volid);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Failed to set volume id of volume"
+ " %s", volname);
+ goto out;
+ }
+out:
+ if (msg[0] != '\0') {
+ gf_log (this->name, GF_LOG_ERROR, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ }
+ return ret;
+}
+
+int
+glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx)
+{
+ int ret = -1;
+ void *ctx = NULL;
+ dict_t *dict = NULL;
+ dict_t *req_dict = NULL;
+ glusterd_op_t op = GD_OP_NONE;
+ char *volname = NULL;
+ uint32_t status_cmd = GF_CLI_STATUS_NONE;
+ char *errstr = NULL;
+ xlator_t *this = NULL;
+
+ GF_ASSERT (req);
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ req_dict = dict_new ();
+ if (!req_dict)
+ goto out;
+
+ if (!op_ctx) {
+ op = glusterd_op_get_op ();
+ ctx = (void*)glusterd_op_get_ctx ();
+ if (!ctx) {
+ gf_log (this->name, GF_LOG_ERROR, "Null Context for "
+ "op %d", op);
+ ret = -1;
+ goto out;
+ }
+
+ } else {
+#define GD_SYNC_OPCODE_KEY "sync-mgmt-operation"
+ ret = dict_get_int32 (op_ctx, GD_SYNC_OPCODE_KEY, (int32_t*)&op);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get volume"
+ " operation");
+ goto out;
+ }
+ ctx = op_ctx;
+#undef GD_SYNC_OPCODE_KEY
+ }
+
+ dict = ctx;
+ switch (op) {
+ case GD_OP_CREATE_VOLUME:
+ {
+ ++glusterfs_port;
+ ret = dict_set_int32 (dict, "port",
+ glusterfs_port);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set port in "
+ "dictionary");
+ goto out;
+ }
+ dict_copy (dict, req_dict);
+ }
+ break;
+
+ case GD_OP_GSYNC_CREATE:
+ case GD_OP_GSYNC_SET:
+ {
+ ret = glusterd_op_gsync_args_get (dict,
+ &errstr,
+ &volname,
+ NULL, NULL);
+ if (ret == 0) {
+ ret = glusterd_dict_set_volid
+ (dict, volname, op_errstr);
+ if (ret)
+ goto out;
+ }
+ dict_copy (dict, req_dict);
+ }
+ break;
+
+ case GD_OP_SET_VOLUME:
+ {
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "volname is not present in "
+ "operation ctx");
+ goto out;
+ }
+ if (strcmp (volname, "help") &&
+ strcmp (volname, "help-xml") &&
+ strcasecmp (volname, "all")) {
+ ret = glusterd_dict_set_volid
+ (dict, volname, op_errstr);
+ if (ret)
+ goto out;
+ }
+ dict_destroy (req_dict);
+ req_dict = dict_ref (dict);
+ }
+ break;
+
+ case GD_OP_SYNC_VOLUME:
+ {
+ dict_copy (dict, req_dict);
+ break;
+ }
+
+ case GD_OP_REMOVE_BRICK:
+ {
+ dict_t *dict = ctx;
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "volname is not present in "
+ "operation ctx");
+ goto out;
+ }
+
+ ret = glusterd_dict_set_volid (dict, volname,
+ op_errstr);
+ if (ret)
+ goto out;
+
+ dict_destroy (req_dict);
+ req_dict = dict_ref (dict);
+ }
+ break;
+
+ case GD_OP_STATUS_VOLUME:
+ {
+ ret = dict_get_uint32 (dict, "cmd",
+ &status_cmd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Status command not present "
+ "in op ctx");
+ goto out;
+ }
+ if (GF_CLI_STATUS_ALL & status_cmd) {
+ dict_copy (dict, req_dict);
+ break;
+ }
+ }
+ /*fall-through*/
+ case GD_OP_DELETE_VOLUME:
+ case GD_OP_START_VOLUME:
+ case GD_OP_STOP_VOLUME:
+ case GD_OP_ADD_BRICK:
+ case GD_OP_REPLACE_BRICK:
+ case GD_OP_RESET_VOLUME:
+ case GD_OP_LOG_ROTATE:
+ case GD_OP_QUOTA:
+ case GD_OP_PROFILE_VOLUME:
+ case GD_OP_REBALANCE:
+ case GD_OP_HEAL_VOLUME:
+ case GD_OP_STATEDUMP_VOLUME:
+ case GD_OP_CLEARLOCKS_VOLUME:
+ case GD_OP_DEFRAG_BRICK_VOLUME:
+ {
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "volname is not present in "
+ "operation ctx");
+ goto out;
+ }
+
+ if (strcasecmp (volname, "all")) {
+ ret = glusterd_dict_set_volid (dict,
+ volname,
+ op_errstr);
+ if (ret)
+ goto out;
+ }
+ dict_copy (dict, req_dict);
+ }
+ break;
+
+ case GD_OP_COPY_FILE:
+ {
+ dict_copy (dict, req_dict);
+ break;
+ }
+
+ case GD_OP_SYS_EXEC:
+ {
+ dict_copy (dict, req_dict);
+ break;
+ }
+
+ default:
+ break;
+ }
+
+ *req = req_dict;
+ ret = 0;
+
+out:
+ return ret;
+}
+
+gf_boolean_t
+glusterd_is_get_op (xlator_t *this, glusterd_op_t op, dict_t *dict)
+{
+ char *key = NULL;
+ char *volname = NULL;
+ int ret = 0;
+
+ if (op == GD_OP_STATUS_VOLUME)
+ return _gf_true;
+
+ if ((op == GD_OP_SET_VOLUME)) {
+ //check for set volume help
+ ret = dict_get_str (dict, "volname", &volname);
+ if (volname &&
+ ((strcmp (volname, "help") == 0) ||
+ (strcmp (volname, "help-xml") == 0))) {
+ ret = dict_get_str (dict, "key1", &key);
+ if (ret < 0)
+ return _gf_true;
+ }
+ }
+
+ return _gf_false;
+}
+
+gf_boolean_t
+glusterd_is_op_quorum_validation_required (xlator_t *this, glusterd_op_t op,
+ dict_t *dict)
+{
+ gf_boolean_t required = _gf_true;
+ char *key = NULL;
+ char *key_fixed = NULL;
+ int ret = -1;
+
+ if (glusterd_is_get_op (this, op, dict)) {
+ required = _gf_false;
+ goto out;
+ }
+ if ((op != GD_OP_SET_VOLUME) && (op != GD_OP_RESET_VOLUME))
+ goto out;
+ if (op == GD_OP_SET_VOLUME)
+ ret = dict_get_str (dict, "key1", &key);
+ else if (op == GD_OP_RESET_VOLUME)
+ ret = dict_get_str (dict, "key", &key);
+ if (ret)
+ goto out;
+ ret = glusterd_check_option_exists (key, &key_fixed);
+ if (ret <= 0)
+ goto out;
+ if (key_fixed)
+ key = key_fixed;
+ if (glusterd_is_quorum_option (key))
+ required = _gf_false;
+out:
+ GF_FREE (key_fixed);
+ return required;
+}
+
+static int
+glusterd_op_validate_quorum (xlator_t *this, glusterd_op_t op,
+ dict_t *dict, char **op_errstr)
+{
+ int ret = 0;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ char *errstr = NULL;
+
+
+ errstr = "Quorum not met. Volume operation not allowed.";
+ if (!glusterd_is_op_quorum_validation_required (this, op, dict))
+ goto out;
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ ret = 0;
+ goto out;
+ }
+
+ if (does_gd_meet_server_quorum (this)) {
+ ret = 0;
+ goto out;
+ }
+
+ if (glusterd_is_volume_in_server_quorum (volinfo)) {
+ ret = -1;
+ *op_errstr = gf_strdup (errstr);
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+glusterd_op_ac_send_stage_op (glusterd_op_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+ rpc_clnt_procedure_t *proc = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ dict_t *dict = NULL;
+ char *op_errstr = NULL;
+ glusterd_op_t op = GD_OP_NONE;
+ uint32_t pending_count = 0;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ op = glusterd_op_get_op ();
+
+ ret = glusterd_op_build_payload (&dict, &op_errstr, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, LOGSTR_BUILD_PAYLOAD,
+ gd_op_list[op]);
+ if (op_errstr == NULL)
+ gf_asprintf (&op_errstr, OPERRSTR_BUILD_PAYLOAD);
+ opinfo.op_errstr = op_errstr;
+ goto out;
+ }
+
+ ret = glusterd_op_validate_quorum (this, op, dict, &op_errstr);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s", op_errstr);
+ opinfo.op_errstr = op_errstr;
+ goto out;
+ }
+
+ /* rsp_dict NULL from source */
+ ret = glusterd_op_stage_validate (op, dict, &op_errstr, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, LOGSTR_STAGE_FAIL,
+ gd_op_list[op], "localhost",
+ (op_errstr) ? ":" : " ", (op_errstr) ? op_errstr : " ");
+ if (op_errstr == NULL)
+ gf_asprintf (&op_errstr, OPERRSTR_STAGE_FAIL,
+ "localhost");
+ opinfo.op_errstr = op_errstr;
+ goto out;
+ }
+
+ list_for_each_entry (peerinfo, &priv->peers, uuid_list) {
+ GF_ASSERT (peerinfo);
+
+ if (!peerinfo->connected || !peerinfo->mgmt)
+ continue;
+ if ((peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) &&
+ (glusterd_op_get_op() != GD_OP_SYNC_VOLUME))
+ continue;
+
+ proc = &peerinfo->mgmt->proctable[GLUSTERD_MGMT_STAGE_OP];
+ GF_ASSERT (proc);
+ if (proc->fn) {
+ ret = dict_set_static_ptr (dict, "peerinfo", peerinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to "
+ "set peerinfo");
+ goto out;
+ }
+
+ ret = proc->fn (NULL, this, dict);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "Failed to "
+ "send stage request for operation "
+ "'Volume %s' to peer %s",
+ gd_op_list[op], peerinfo->hostname);
+ continue;
+ }
+ pending_count++;
+ }
+ }
+
+ opinfo.pending_count = pending_count;
+out:
+ if (dict)
+ dict_unref (dict);
+ if (ret) {
+ glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT,
+ &event->txn_id, NULL);
+ opinfo.op_ret = ret;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG, "Sent stage op request for "
+ "'Volume %s' to %d peers", gd_op_list[op],
+ opinfo.pending_count);
+
+ if (!opinfo.pending_count)
+ ret = glusterd_op_sm_inject_all_acc (&event->txn_id);
+
+ gf_log (this->name, GF_LOG_DEBUG, "Returning with %d", ret);
+
+ return ret;
+
+}
+
+static int32_t
+glusterd_op_start_rb_timer (dict_t *dict, uuid_t *txn_id)
+{
+ int32_t op = 0;
+ struct timespec timeout = {0, };
+ glusterd_conf_t *priv = NULL;
+ int32_t ret = -1;
+ dict_t *rb_ctx = NULL;
+
+ GF_ASSERT (dict);
+ priv = THIS->private;
+
+ ret = dict_get_int32 (dict, "operation", &op);
+ if (ret) {
+ gf_log ("", GF_LOG_DEBUG,
+ "dict_get on operation failed");
+ goto out;
+ }
+
+ if (op != GF_REPLACE_OP_START) {
+ ret = glusterd_op_sm_inject_all_acc (txn_id);
+ goto out;
+ }
+
+ timeout.tv_sec = 5;
+ timeout.tv_nsec = 0;
+
+
+ rb_ctx = dict_copy (dict, rb_ctx);
+ if (!rb_ctx) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Couldn't copy "
+ "replace brick context. Can't start replace brick");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_bin (rb_ctx, "transaction_id",
+ txn_id, sizeof(*txn_id));
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Failed to set transaction id.");
+ goto out;
+ } else
+ gf_log ("", GF_LOG_DEBUG,
+ "transaction_id = %s", uuid_utoa (*txn_id));
+
+ priv->timer = gf_timer_call_after (THIS->ctx, timeout,
+ glusterd_do_replace_brick,
+ (void *) rb_ctx);
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+/* This function takes a dict and converts the uuid values of key specified
+ * into hostnames
+ */
+static int
+glusterd_op_volume_dict_uuid_to_hostname (dict_t *dict, const char *key_fmt,
+ int idx_min, int idx_max)
+{
+ int ret = -1;
+ int i = 0;
+ char key[1024];
+ char *uuid_str = NULL;
+ uuid_t uuid = {0,};
+ char *hostname = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (dict);
+ GF_ASSERT (key_fmt);
+
+ for (i = idx_min; i < idx_max; i++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), key_fmt, i);
+ ret = dict_get_str (dict, key, &uuid_str);
+ if (ret)
+ continue;
+
+ gf_log (this->name, GF_LOG_DEBUG, "Got uuid %s",
+ uuid_str);
+
+ ret = uuid_parse (uuid_str, uuid);
+ /* if parsing fails don't error out
+ * let the original value be retained
+ */
+ if (ret)
+ continue;
+
+ hostname = glusterd_uuid_to_hostname (uuid);
+ if (hostname) {
+ gf_log (this->name, GF_LOG_DEBUG, "%s -> %s",
+ uuid_str, hostname);
+ ret = dict_set_dynstr (dict, key, hostname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error setting hostname %s to dict",
+ hostname);
+ GF_FREE (hostname);
+ goto out;
+ }
+ }
+ }
+
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+static int
+reassign_defrag_status (dict_t *dict, char *key, gf_defrag_status_t *status)
+{
+ int ret = 0;
+
+ if (!*status)
+ return ret;
+
+ switch (*status) {
+ case GF_DEFRAG_STATUS_STARTED:
+ *status = GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED;
+ break;
+
+ case GF_DEFRAG_STATUS_STOPPED:
+ *status = GF_DEFRAG_STATUS_LAYOUT_FIX_STOPPED;
+ break;
+
+ case GF_DEFRAG_STATUS_COMPLETE:
+ *status = GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE;
+ break;
+
+ case GF_DEFRAG_STATUS_FAILED:
+ *status = GF_DEFRAG_STATUS_LAYOUT_FIX_FAILED;
+ break;
+ default:
+ break;
+ }
+
+ ret = dict_set_int32(dict, key, *status);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to reset defrag %s in dict", key);
+
+ return ret;
+}
+
+/* Check and reassign the defrag_status enum got from the rebalance process
+ * of all peers so that the rebalance-status CLI command can display if a
+ * full-rebalance or just a fix-layout was carried out.
+ */
+static int
+glusterd_op_check_peer_defrag_status (dict_t *dict, int count)
+{
+ glusterd_volinfo_t *volinfo = NULL;
+ gf_defrag_status_t status = GF_DEFRAG_STATUS_NOT_STARTED;
+ char key[256] = {0,};
+ char *volname = NULL;
+ int ret = -1;
+ int i = 1;
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_WARNING, "Unable to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_WARNING, FMTSTR_CHECK_VOL_EXISTS,
+ volname);
+ goto out;
+ }
+
+ if (volinfo->rebal.defrag_cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) {
+ /* Fix layout was not issued; we don't need to reassign
+ the status */
+ ret = 0;
+ goto out;
+ }
+
+ do {
+ memset (key, 0, 256);
+ snprintf (key, 256, "status-%d", i);
+ ret = dict_get_int32 (dict, key, (int32_t *)&status);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to get defrag %s", key);
+ goto out;
+ }
+ ret = reassign_defrag_status (dict, key, &status);
+ if (ret)
+ goto out;
+ i++;
+ } while (i <= count);
+
+ ret = 0;
+out:
+ return ret;
+
+}
+
+/* This function is used to modify the op_ctx dict before sending it back
+ * to cli. This is useful in situations like changing the peer uuids to
+ * hostnames etc.
+ */
+void
+glusterd_op_modify_op_ctx (glusterd_op_t op, void *ctx)
+{
+ int ret = -1;
+ dict_t *op_ctx = NULL;
+ int brick_index_max = -1;
+ int other_count = 0;
+ int count = 0;
+ uint32_t cmd = GF_CLI_STATUS_NONE;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ if (ctx)
+ op_ctx = ctx;
+ else
+ op_ctx = glusterd_op_get_ctx();
+
+ if (!op_ctx) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Operation context is not present.");
+ goto out;
+ }
+
+ switch (op) {
+ case GD_OP_STATUS_VOLUME:
+ ret = dict_get_uint32 (op_ctx, "cmd", &cmd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Failed to get status cmd");
+ goto out;
+ }
+ if (!(cmd & GF_CLI_STATUS_NFS || cmd & GF_CLI_STATUS_SHD ||
+ (cmd & GF_CLI_STATUS_MASK) == GF_CLI_STATUS_NONE)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "op_ctx modification not required for status "
+ "operation being performed");
+ goto out;
+ }
+
+ ret = dict_get_int32 (op_ctx, "brick-index-max",
+ &brick_index_max);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Failed to get brick-index-max");
+ goto out;
+ }
+
+ ret = dict_get_int32 (op_ctx, "other-count", &other_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Failed to get other-count");
+ goto out;
+ }
+
+ count = brick_index_max + other_count + 1;
+
+ ret = glusterd_op_volume_dict_uuid_to_hostname (op_ctx,
+ "brick%d.path",
+ 0, count);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "Failed uuid to hostname conversion");
+
+ break;
+
+ case GD_OP_PROFILE_VOLUME:
+ ret = dict_get_str_boolean (op_ctx, "nfs", _gf_false);
+ if (!ret)
+ goto out;
+
+ ret = dict_get_int32 (op_ctx, "count", &count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Failed to get brick count");
+ goto out;
+ }
+
+ ret = glusterd_op_volume_dict_uuid_to_hostname (op_ctx,
+ "%d-brick",
+ 1, (count + 1));
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "Failed uuid to hostname conversion");
+
+ break;
+
+ /* For both rebalance and remove-brick status, the glusterd op is the
+ * same
+ */
+ case GD_OP_DEFRAG_BRICK_VOLUME:
+ ret = dict_get_int32 (op_ctx, "count", &count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Failed to get count");
+ goto out;
+ }
+
+ /* add 'node-name-%d' into op_ctx with value uuid_str.
+ this will be used to convert to hostname later */
+ {
+ char key[1024];
+ char *uuid_str = NULL;
+ int i;
+
+ for (i = 1; i <= count; i++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "node-uuid-%d", i);
+ ret = dict_get_str (op_ctx, key, &uuid_str);
+ if (!ret) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key),
+ "node-name-%d", i);
+ ret = dict_set_str (op_ctx, key,
+ uuid_str);
+ }
+ }
+ }
+
+ ret = glusterd_op_volume_dict_uuid_to_hostname (op_ctx,
+ "node-name-%d",
+ 1, (count + 1));
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "Failed uuid to hostname conversion");
+
+ ret = glusterd_op_check_peer_defrag_status (op_ctx, count);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to reset defrag status for fix-layout");
+ break;
+
+ default:
+ ret = 0;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "op_ctx modification not required");
+ break;
+
+ }
+
+out:
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "op_ctx modification failed");
+ return;
+}
+
+static int
+glusterd_op_commit_hook (glusterd_op_t op, dict_t *op_ctx,
+ glusterd_commit_hook_type_t type)
+{
+ glusterd_conf_t *priv = NULL;
+ char hookdir[PATH_MAX] = {0, };
+ char scriptdir[PATH_MAX] = {0, };
+ char type_subdir[256] = {0, };
+ char *cmd_subdir = NULL;
+ int ret = -1;
+
+ priv = THIS->private;
+ switch (type) {
+ case GD_COMMIT_HOOK_NONE:
+ case GD_COMMIT_HOOK_MAX:
+ /*Won't be called*/
+ break;
+
+ case GD_COMMIT_HOOK_PRE:
+ strcpy (type_subdir, "pre");
+ break;
+ case GD_COMMIT_HOOK_POST:
+ strcpy (type_subdir, "post");
+ break;
+ }
+
+ cmd_subdir = glusterd_hooks_get_hooks_cmd_subdir (op);
+ if (strlen (cmd_subdir) == 0)
+ return -1;
+
+ GLUSTERD_GET_HOOKS_DIR (hookdir, GLUSTERD_HOOK_VER, priv);
+ snprintf (scriptdir, sizeof (scriptdir), "%s/%s/%s",
+ hookdir, cmd_subdir, type_subdir);
+
+ switch (type) {
+ case GD_COMMIT_HOOK_NONE:
+ case GD_COMMIT_HOOK_MAX:
+ /*Won't be called*/
+ break;
+
+ case GD_COMMIT_HOOK_PRE:
+ ret = glusterd_hooks_run_hooks (scriptdir, op, op_ctx,
+ type);
+ break;
+ case GD_COMMIT_HOOK_POST:
+ ret = glusterd_hooks_post_stub_enqueue (scriptdir, op,
+ op_ctx);
+ break;
+ }
+
+ return ret;
+}
+
+static int
+glusterd_op_ac_send_commit_op (glusterd_op_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+ rpc_clnt_procedure_t *proc = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ dict_t *dict = NULL;
+ dict_t *op_dict = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ char *op_errstr = NULL;
+ glusterd_op_t op = GD_OP_NONE;
+ uint32_t pending_count = 0;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ op = glusterd_op_get_op ();
+ op_dict = glusterd_op_get_ctx ();
+
+ ret = glusterd_op_build_payload (&dict, &op_errstr, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, LOGSTR_BUILD_PAYLOAD,
+ gd_op_list[op]);
+ if (op_errstr == NULL)
+ gf_asprintf (&op_errstr, OPERRSTR_BUILD_PAYLOAD);
+ opinfo.op_errstr = op_errstr;
+ goto out;
+ }
+
+ ret = glusterd_op_commit_perform (op, dict, &op_errstr, NULL); //rsp_dict invalid for source
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, LOGSTR_COMMIT_FAIL,
+ gd_op_list[op], "localhost", (op_errstr) ? ":" : " ",
+ (op_errstr) ? op_errstr : " ");
+ if (op_errstr == NULL)
+ gf_asprintf (&op_errstr, OPERRSTR_COMMIT_FAIL,
+ "localhost");
+ opinfo.op_errstr = op_errstr;
+ goto out;
+ }
+
+
+ list_for_each_entry (peerinfo, &priv->peers, uuid_list) {
+ GF_ASSERT (peerinfo);
+
+ if (!peerinfo->connected || !peerinfo->mgmt)
+ continue;
+ if ((peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) &&
+ (glusterd_op_get_op() != GD_OP_SYNC_VOLUME))
+ continue;
+
+ proc = &peerinfo->mgmt->proctable[GLUSTERD_MGMT_COMMIT_OP];
+ GF_ASSERT (proc);
+ if (proc->fn) {
+ ret = dict_set_static_ptr (dict, "peerinfo", peerinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to set peerinfo");
+ goto out;
+ }
+ ret = proc->fn (NULL, this, dict);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "Failed to "
+ "send commit request for operation "
+ "'Volume %s' to peer %s",
+ gd_op_list[op], peerinfo->hostname);
+ continue;
+ }
+ pending_count++;
+ }
+ }
+
+ opinfo.pending_count = pending_count;
+ gf_log (this->name, GF_LOG_DEBUG, "Sent commit op req for 'Volume %s' "
+ "to %d peers", gd_op_list[op], opinfo.pending_count);
+out:
+ if (dict)
+ dict_unref (dict);
+ if (ret) {
+ glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT,
+ &event->txn_id, NULL);
+ opinfo.op_ret = ret;
+ }
+
+ if (!opinfo.pending_count) {
+ if (op == GD_OP_REPLACE_BRICK) {
+ ret = glusterd_op_start_rb_timer (op_dict,
+ &event->txn_id);
+
+ } else {
+ glusterd_op_modify_op_ctx (op, NULL);
+ ret = glusterd_op_sm_inject_all_acc (&event->txn_id);
+ }
+ goto err;
+ }
+
+err:
+ gf_log (this->name, GF_LOG_DEBUG, "Returning with %d", ret);
+
+ return ret;
+
+}
+
+static int
+glusterd_op_ac_rcvd_stage_op_acc (glusterd_op_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+
+ GF_ASSERT (event);
+
+ if (opinfo.pending_count > 0)
+ opinfo.pending_count--;
+
+ if (opinfo.pending_count > 0)
+ goto out;
+
+ ret = glusterd_op_sm_inject_event (GD_OP_EVENT_STAGE_ACC,
+ &event->txn_id, NULL);
+
+out:
+ gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
+
+ return ret;
+}
+
+static int
+glusterd_op_ac_stage_op_failed (glusterd_op_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+
+ GF_ASSERT (event);
+
+ if (opinfo.pending_count > 0)
+ opinfo.pending_count--;
+
+ if (opinfo.pending_count > 0)
+ goto out;
+
+ ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK,
+ &event->txn_id, NULL);
+
+out:
+ gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
+
+ return ret;
+}
+
+static int
+glusterd_op_ac_commit_op_failed (glusterd_op_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+
+ GF_ASSERT (event);
+
+ if (opinfo.pending_count > 0)
+ opinfo.pending_count--;
+
+ if (opinfo.pending_count > 0)
+ goto out;
+
+ ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK,
+ &event->txn_id, NULL);
+
+out:
+ gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
+
+ return ret;
+}
+
+static int
+glusterd_op_ac_brick_op_failed (glusterd_op_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+ glusterd_op_brick_rsp_ctx_t *ev_ctx = NULL;
+ gf_boolean_t free_errstr = _gf_false;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (event);
+ GF_ASSERT (ctx);
+ ev_ctx = ctx;
+
+ ret = glusterd_remove_pending_entry (&opinfo.pending_bricks, ev_ctx->pending_node->node);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "unknown response received ");
+ ret = -1;
+ free_errstr = _gf_true;
+ goto out;
+ }
+ if (opinfo.brick_pending_count > 0)
+ opinfo.brick_pending_count--;
+ if (opinfo.op_ret == 0)
+ opinfo.op_ret = ev_ctx->op_ret;
+
+ if (opinfo.op_errstr == NULL)
+ opinfo.op_errstr = ev_ctx->op_errstr;
+ else
+ free_errstr = _gf_true;
+
+ if (opinfo.brick_pending_count > 0)
+ goto out;
+
+ ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK,
+ &event->txn_id, ev_ctx->commit_ctx);
+
+out:
+ if (ev_ctx->rsp_dict)
+ dict_unref (ev_ctx->rsp_dict);
+ if (free_errstr && ev_ctx->op_errstr)
+ GF_FREE (ev_ctx->op_errstr);
+ GF_FREE (ctx);
+ gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
+
+ return ret;
+}
+
+static int
+glusterd_op_ac_rcvd_commit_op_acc (glusterd_op_sm_event_t *event, void *ctx)
+{
+ dict_t *op_ctx = NULL;
+ int ret = 0;
+ gf_boolean_t commit_ack_inject = _gf_true;
+ glusterd_op_t op = GD_OP_NONE;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ op = glusterd_op_get_op ();
+ GF_ASSERT (event);
+
+ if (opinfo.pending_count > 0)
+ opinfo.pending_count--;
+
+ if (opinfo.pending_count > 0)
+ goto out;
+
+ if (op == GD_OP_REPLACE_BRICK) {
+ op_ctx = glusterd_op_get_ctx ();
+ if (!op_ctx) {
+ gf_log (this->name, GF_LOG_CRITICAL, "Operation "
+ "context is not present.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_op_start_rb_timer (op_ctx, &event->txn_id);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Couldn't start "
+ "replace-brick operation.");
+ goto out;
+ }
+
+ commit_ack_inject = _gf_false;
+ goto out;
+ }
+
+
+out:
+ if (commit_ack_inject) {
+ if (ret)
+ ret = glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT,
+ &event->txn_id, NULL);
+ else if (!opinfo.pending_count) {
+ glusterd_op_modify_op_ctx (op, NULL);
+ ret = glusterd_op_sm_inject_event (GD_OP_EVENT_COMMIT_ACC,
+ &event->txn_id, NULL);
+ }
+ /*else do nothing*/
+ }
+
+ return ret;
+}
+
+static int
+glusterd_op_ac_rcvd_unlock_acc (glusterd_op_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+
+ GF_ASSERT (event);
+
+ if (opinfo.pending_count > 0)
+ opinfo.pending_count--;
+
+ if (opinfo.pending_count > 0)
+ goto out;
+
+ ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACC,
+ &event->txn_id, NULL);
+
+ gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
+
+out:
+ return ret;
+}
+
+int32_t
+glusterd_op_clear_errstr() {
+ opinfo.op_errstr = NULL;
+ return 0;
+}
+
+int32_t
+glusterd_op_set_ctx (void *ctx)
+{
+
+ opinfo.op_ctx = ctx;
+
+ return 0;
+
+}
+
+int32_t
+glusterd_op_reset_ctx ()
+{
+
+ glusterd_op_set_ctx (NULL);
+
+ return 0;
+}
+
+int32_t
+glusterd_op_txn_complete (uuid_t *txn_id)
+{
+ int32_t ret = -1;
+ glusterd_conf_t *priv = NULL;
+ int32_t op = -1;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+ rpcsvc_request_t *req = NULL;
+ void *ctx = NULL;
+ char *op_errstr = NULL;
+ char *volname = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ op = glusterd_op_get_op ();
+ ctx = glusterd_op_get_ctx ();
+ op_ret = opinfo.op_ret;
+ op_errno = opinfo.op_errno;
+ req = opinfo.req;
+ if (opinfo.op_errstr)
+ op_errstr = opinfo.op_errstr;
+
+ opinfo.op_ret = 0;
+ opinfo.op_errno = 0;
+ glusterd_op_clear_op ();
+ glusterd_op_reset_ctx ();
+ glusterd_op_clear_errstr ();
+
+ /* Based on the op-version, we release the cluster or mgmt_v3 lock */
+ if (priv->op_version < 3) {
+ ret = glusterd_unlock (MY_UUID);
+ /* unlock cant/shouldnt fail here!! */
+ if (ret)
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Unable to clear local lock, ret: %d", ret);
+ else
+ gf_log (this->name, GF_LOG_DEBUG, "Cleared local lock");
+ } else {
+ ret = dict_get_str (ctx, "volname", &volname);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to acquire volname");
+
+ if (volname) {
+ ret = glusterd_mgmt_v3_unlock (volname, MY_UUID,
+ "vol");
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to release lock for %s",
+ volname);
+ }
+ }
+
+ ret = glusterd_op_send_cli_response (op, op_ret,
+ op_errno, req, ctx, op_errstr);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Responding to cli failed, "
+ "ret: %d", ret);
+ //Ignore this error, else state machine blocks
+ ret = 0;
+ }
+
+ if (op_errstr && (strcmp (op_errstr, "")))
+ GF_FREE (op_errstr);
+
+
+ if (priv->pending_quorum_action)
+ glusterd_do_quorum_action ();
+
+ /* Clearing the transaction opinfo */
+ ret = glusterd_clear_txn_opinfo (txn_id);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to clear transaction's opinfo");
+
+ gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+static int
+glusterd_op_ac_unlocked_all (glusterd_op_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+
+ GF_ASSERT (event);
+
+ ret = glusterd_op_txn_complete (&event->txn_id);
+
+ gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
+
+ return ret;
+}
+
+static int
+glusterd_op_ac_stage_op (glusterd_op_sm_event_t *event, void *ctx)
+{
+ int ret = -1;
+ glusterd_req_ctx_t *req_ctx = NULL;
+ int32_t status = 0;
+ dict_t *rsp_dict = NULL;
+ char *op_errstr = NULL;
+ dict_t *dict = NULL;
+ xlator_t *this = NULL;
+ uuid_t *txn_id = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (ctx);
+
+ req_ctx = ctx;
+
+ dict = req_ctx->dict;
+
+ rsp_dict = dict_new ();
+ if (!rsp_dict) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to get new dictionary");
+ return -1;
+ }
+
+ status = glusterd_op_stage_validate (req_ctx->op, dict, &op_errstr,
+ rsp_dict);
+
+ if (status) {
+ gf_log (this->name, GF_LOG_ERROR, "Stage failed on operation"
+ " 'Volume %s', Status : %d", gd_op_list[req_ctx->op],
+ status);
+ }
+
+ txn_id = GF_CALLOC (1, sizeof(uuid_t), gf_common_mt_uuid_t);
+
+ if (txn_id)
+ uuid_copy (*txn_id, event->txn_id);
+ else
+ gf_log (this->name, GF_LOG_ERROR, "Out of Memory");
+
+ ret = dict_set_bin (rsp_dict, "transaction_id",
+ txn_id, sizeof(*txn_id));
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set transaction id.");
+
+ ret = glusterd_op_stage_send_resp (req_ctx->req, req_ctx->op,
+ status, op_errstr, rsp_dict);
+
+ if (op_errstr && (strcmp (op_errstr, "")))
+ GF_FREE (op_errstr);
+
+ gf_log (this->name, GF_LOG_DEBUG, "Returning with %d", ret);
+
+ if (rsp_dict)
+ dict_unref (rsp_dict);
+
+ return ret;
+}
+
+static gf_boolean_t
+glusterd_need_brick_op (glusterd_op_t op)
+{
+ gf_boolean_t ret = _gf_false;
+
+ GF_ASSERT (GD_OP_NONE < op && op < GD_OP_MAX);
+
+ switch (op) {
+ case GD_OP_PROFILE_VOLUME:
+ case GD_OP_STATUS_VOLUME:
+ case GD_OP_DEFRAG_BRICK_VOLUME:
+ case GD_OP_HEAL_VOLUME:
+ ret = _gf_true;
+ break;
+ default:
+ ret = _gf_false;
+ }
+
+ return ret;
+}
+
+dict_t*
+glusterd_op_init_commit_rsp_dict (glusterd_op_t op)
+{
+ dict_t *rsp_dict = NULL;
+ dict_t *op_ctx = NULL;
+
+ GF_ASSERT (GD_OP_NONE < op && op < GD_OP_MAX);
+
+ if (glusterd_need_brick_op (op)) {
+ op_ctx = glusterd_op_get_ctx ();
+ GF_ASSERT (op_ctx);
+ rsp_dict = dict_ref (op_ctx);
+ } else {
+ rsp_dict = dict_new ();
+ }
+
+ return rsp_dict;
+}
+
+static int
+glusterd_op_ac_commit_op (glusterd_op_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+ glusterd_req_ctx_t *req_ctx = NULL;
+ int32_t status = 0;
+ char *op_errstr = NULL;
+ dict_t *dict = NULL;
+ dict_t *rsp_dict = NULL;
+ xlator_t *this = NULL;
+ uuid_t *txn_id = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (ctx);
+
+ req_ctx = ctx;
+
+ dict = req_ctx->dict;
+
+ rsp_dict = glusterd_op_init_commit_rsp_dict (req_ctx->op);
+ if (NULL == rsp_dict)
+ return -1;
+
+
+ if (GD_OP_CLEARLOCKS_VOLUME == req_ctx->op) {
+ /*clear locks should be run only on
+ * originator glusterd*/
+ status = 0;
+
+ } else {
+ status = glusterd_op_commit_perform (req_ctx->op, dict,
+ &op_errstr, rsp_dict);
+ }
+
+ if (status)
+ gf_log (this->name, GF_LOG_ERROR, "Commit of operation "
+ "'Volume %s' failed: %d", gd_op_list[req_ctx->op],
+ status);
+
+ txn_id = GF_CALLOC (1, sizeof(uuid_t), gf_common_mt_uuid_t);
+
+ if (txn_id)
+ uuid_copy (*txn_id, event->txn_id);
+ else
+ gf_log (this->name, GF_LOG_ERROR, "Out of Memory");
+
+ ret = dict_set_bin (rsp_dict, "transaction_id",
+ txn_id, sizeof(*txn_id));
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set transaction id.");
+
+ ret = glusterd_op_commit_send_resp (req_ctx->req, req_ctx->op,
+ status, op_errstr, rsp_dict);
+
+ if (op_errstr && (strcmp (op_errstr, "")))
+ GF_FREE (op_errstr);
+
+ if (rsp_dict)
+ dict_unref (rsp_dict);
+
+ gf_log (this->name, GF_LOG_DEBUG, "Returning with %d", ret);
+
+ return ret;
+}
+
+static int
+glusterd_op_ac_send_commit_failed (glusterd_op_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+ glusterd_req_ctx_t *req_ctx = NULL;
+ dict_t *op_ctx = NULL;
+
+ GF_ASSERT (ctx);
+
+ req_ctx = ctx;
+
+ op_ctx = glusterd_op_get_ctx ();
+
+ ret = glusterd_op_commit_send_resp (req_ctx->req, req_ctx->op,
+ opinfo.op_ret, opinfo.op_errstr,
+ op_ctx);
+
+ if (opinfo.op_errstr && (strcmp (opinfo.op_errstr, ""))) {
+ GF_FREE (opinfo.op_errstr);
+ opinfo.op_errstr = NULL;
+ }
+
+ gf_log (THIS->name, GF_LOG_DEBUG, "Returning with %d", ret);
+ return ret;
+}
+
+static int
+glusterd_op_sm_transition_state (glusterd_op_info_t *opinfo,
+ glusterd_op_sm_t *state,
+ glusterd_op_sm_event_type_t event_type)
+{
+ glusterd_conf_t *conf = NULL;
+
+ GF_ASSERT (state);
+ GF_ASSERT (opinfo);
+
+ conf = THIS->private;
+ GF_ASSERT (conf);
+
+ (void) glusterd_sm_tr_log_transition_add (&conf->op_sm_log,
+ opinfo->state.state,
+ state[event_type].next_state,
+ event_type);
+
+ opinfo->state.state = state[event_type].next_state;
+ return 0;
+}
+
+int32_t
+glusterd_op_stage_validate (glusterd_op_t op, dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict)
+{
+ int ret = -1;
+ xlator_t *this = THIS;
+
+ switch (op) {
+ case GD_OP_CREATE_VOLUME:
+ ret = glusterd_op_stage_create_volume (dict, op_errstr);
+ break;
+
+ case GD_OP_START_VOLUME:
+ ret = glusterd_op_stage_start_volume (dict, op_errstr);
+ break;
+
+ case GD_OP_STOP_VOLUME:
+ ret = glusterd_op_stage_stop_volume (dict, op_errstr);
+ break;
+
+ case GD_OP_DELETE_VOLUME:
+ ret = glusterd_op_stage_delete_volume (dict, op_errstr);
+ break;
+
+ case GD_OP_ADD_BRICK:
+ ret = glusterd_op_stage_add_brick (dict, op_errstr);
+ break;
+
+ case GD_OP_REPLACE_BRICK:
+ ret = glusterd_op_stage_replace_brick (dict, op_errstr,
+ rsp_dict);
+ break;
+
+ case GD_OP_SET_VOLUME:
+ ret = glusterd_op_stage_set_volume (dict, op_errstr);
+ break;
+
+ case GD_OP_RESET_VOLUME:
+ ret = glusterd_op_stage_reset_volume (dict, op_errstr);
+ break;
+
+ case GD_OP_REMOVE_BRICK:
+ ret = glusterd_op_stage_remove_brick (dict, op_errstr);
+ break;
+
+ case GD_OP_LOG_ROTATE:
+ ret = glusterd_op_stage_log_rotate (dict, op_errstr);
+ break;
+
+ case GD_OP_SYNC_VOLUME:
+ ret = glusterd_op_stage_sync_volume (dict, op_errstr);
+ break;
+
+ case GD_OP_GSYNC_CREATE:
+ ret = glusterd_op_stage_gsync_create (dict, op_errstr);
+ break;
+
+ case GD_OP_GSYNC_SET:
+ ret = glusterd_op_stage_gsync_set (dict, op_errstr);
+ break;
+
+ case GD_OP_PROFILE_VOLUME:
+ ret = glusterd_op_stage_stats_volume (dict, op_errstr);
+ break;
+
+ case GD_OP_QUOTA:
+ ret = glusterd_op_stage_quota (dict, op_errstr);
+ break;
+
+ case GD_OP_STATUS_VOLUME:
+ ret = glusterd_op_stage_status_volume (dict, op_errstr);
+ break;
+
+ case GD_OP_REBALANCE:
+ case GD_OP_DEFRAG_BRICK_VOLUME:
+ ret = glusterd_op_stage_rebalance (dict, op_errstr);
+ break;
+
+ case GD_OP_HEAL_VOLUME:
+ ret = glusterd_op_stage_heal_volume (dict, op_errstr);
+ break;
+
+ case GD_OP_STATEDUMP_VOLUME:
+ ret = glusterd_op_stage_statedump_volume (dict,
+ op_errstr);
+ break;
+ case GD_OP_CLEARLOCKS_VOLUME:
+ ret = glusterd_op_stage_clearlocks_volume (dict,
+ op_errstr);
+ break;
+
+ case GD_OP_COPY_FILE:
+ ret = glusterd_op_stage_copy_file (dict, op_errstr);
+ break;
+
+ case GD_OP_SYS_EXEC:
+ ret = glusterd_op_stage_sys_exec (dict, op_errstr);
+ break;
+
+ default:
+ gf_log (this->name, GF_LOG_ERROR, "Unknown op %s",
+ gd_op_list[op]);
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG, "OP = %d. Returning %d", op, ret);
+ return ret;
+}
+
+
+int32_t
+glusterd_op_commit_perform (glusterd_op_t op, dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict)
+{
+ int ret = -1;
+ xlator_t *this = THIS;
+
+ glusterd_op_commit_hook (op, dict, GD_COMMIT_HOOK_PRE);
+ switch (op) {
+ case GD_OP_CREATE_VOLUME:
+ ret = glusterd_op_create_volume (dict, op_errstr);
+ break;
+
+ case GD_OP_START_VOLUME:
+ ret = glusterd_op_start_volume (dict, op_errstr);
+ break;
+
+ case GD_OP_STOP_VOLUME:
+ ret = glusterd_op_stop_volume (dict);
+ break;
+
+ case GD_OP_DELETE_VOLUME:
+ ret = glusterd_op_delete_volume (dict);
+ break;
+
+ case GD_OP_ADD_BRICK:
+ ret = glusterd_op_add_brick (dict, op_errstr);
+ break;
+
+ case GD_OP_REPLACE_BRICK:
+ ret = glusterd_op_replace_brick (dict, rsp_dict);
+ break;
+
+ case GD_OP_SET_VOLUME:
+ ret = glusterd_op_set_volume (dict);
+ break;
+
+ case GD_OP_RESET_VOLUME:
+ ret = glusterd_op_reset_volume (dict, op_errstr);
+ break;
+
+ case GD_OP_REMOVE_BRICK:
+ ret = glusterd_op_remove_brick (dict, op_errstr);
+ break;
+
+ case GD_OP_LOG_ROTATE:
+ ret = glusterd_op_log_rotate (dict);
+ break;
+
+ case GD_OP_SYNC_VOLUME:
+ ret = glusterd_op_sync_volume (dict, op_errstr, rsp_dict);
+ break;
+
+ case GD_OP_GSYNC_CREATE:
+ ret = glusterd_op_gsync_create (dict, op_errstr,
+ rsp_dict);
+ break;
+
+ case GD_OP_GSYNC_SET:
+ ret = glusterd_op_gsync_set (dict, op_errstr, rsp_dict);
+ break;
+
+ case GD_OP_PROFILE_VOLUME:
+ ret = glusterd_op_stats_volume (dict, op_errstr,
+ rsp_dict);
+ break;
+
+ case GD_OP_QUOTA:
+ ret = glusterd_op_quota (dict, op_errstr, rsp_dict);
+ break;
+
+ case GD_OP_STATUS_VOLUME:
+ ret = glusterd_op_status_volume (dict, op_errstr, rsp_dict);
+ break;
+
+ case GD_OP_REBALANCE:
+ case GD_OP_DEFRAG_BRICK_VOLUME:
+ ret = glusterd_op_rebalance (dict, op_errstr, rsp_dict);
+ break;
+
+ case GD_OP_HEAL_VOLUME:
+ ret = glusterd_op_heal_volume (dict, op_errstr);
+ break;
+
+ case GD_OP_STATEDUMP_VOLUME:
+ ret = glusterd_op_statedump_volume (dict, op_errstr);
+ break;
+
+ case GD_OP_CLEARLOCKS_VOLUME:
+ ret = glusterd_op_clearlocks_volume (dict, op_errstr,
+ rsp_dict);
+ break;
+
+ case GD_OP_COPY_FILE:
+ ret = glusterd_op_copy_file (dict, op_errstr);
+ break;
+
+ case GD_OP_SYS_EXEC:
+ ret = glusterd_op_sys_exec (dict, op_errstr, rsp_dict);
+ break;
+
+ default:
+ gf_log (this->name, GF_LOG_ERROR, "Unknown op %s",
+ gd_op_list[op]);
+ break;
+ }
+
+ if (ret == 0)
+ glusterd_op_commit_hook (op, dict, GD_COMMIT_HOOK_POST);
+
+ gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+
+static int
+glusterd_bricks_select_stop_volume (dict_t *dict, char **op_errstr,
+ struct list_head *selected)
+{
+ int ret = 0;
+ int flags = 0;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_pending_node_t *pending_node = NULL;
+
+ ret = glusterd_op_stop_volume_args_get (dict, &volname, &flags);
+ if (ret)
+ goto out;
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, FMTSTR_CHECK_VOL_EXISTS,
+ volname);
+ gf_asprintf (op_errstr, FMTSTR_CHECK_VOL_EXISTS, volname);
+ goto out;
+ }
+
+ list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ if (glusterd_is_brick_started (brickinfo)) {
+ pending_node = GF_CALLOC (1, sizeof (*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ goto out;
+ } else {
+ pending_node->node = brickinfo;
+ pending_node->type = GD_NODE_BRICK;
+ list_add_tail (&pending_node->list, selected);
+ pending_node = NULL;
+ }
+ }
+ }
+
+out:
+ return ret;
+}
+
+static int
+glusterd_bricks_select_remove_brick (dict_t *dict, char **op_errstr,
+ struct list_head *selected)
+{
+ int ret = -1;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ char *brick = NULL;
+ int32_t count = 0;
+ int32_t i = 1;
+ char key[256] = {0,};
+ glusterd_pending_node_t *pending_node = NULL;
+ int32_t force = 0;
+
+
+
+ ret = dict_get_str (dict, "volname", &volname);
+
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to allocate memory");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "count", &count);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get count");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "force", &force);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_INFO, "force flag is not set");
+ ret = 0;
+ goto out;
+ }
+
+ while ( i <= count) {
+ snprintf (key, 256, "brick%d", i);
+ ret = dict_get_str (dict, key, &brick);
+ if (ret) {
+ gf_log ("glusterd", GF_LOG_ERROR, "Unable to get brick");
+ goto out;
+ }
+
+ ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo,
+ &brickinfo);
+ if (ret)
+ goto out;
+ if (glusterd_is_brick_started (brickinfo)) {
+ pending_node = GF_CALLOC (1, sizeof (*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ goto out;
+ } else {
+ pending_node->node = brickinfo;
+ pending_node->type = GD_NODE_BRICK;
+ list_add_tail (&pending_node->list, selected);
+ pending_node = NULL;
+ }
+ }
+ i++;
+ }
+
+out:
+ return ret;
+}
+
+static int
+glusterd_bricks_select_profile_volume (dict_t *dict, char **op_errstr,
+ struct list_head *selected)
+{
+ int ret = -1;
+ char *volname = NULL;
+ char msg[2048] = {0,};
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+ int32_t stats_op = GF_CLI_STATS_NONE;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_pending_node_t *pending_node = NULL;
+ char *brick = NULL;
+
+
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log ("glusterd", GF_LOG_ERROR, "volume name get failed");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Volume %s does not exists",
+ volname);
+
+ *op_errstr = gf_strdup (msg);
+ gf_log ("", GF_LOG_ERROR, "%s", msg);
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "op", &stats_op);
+ if (ret) {
+ gf_log ("glusterd", GF_LOG_ERROR, "volume profile op get failed");
+ goto out;
+ }
+
+ switch (stats_op) {
+ case GF_CLI_STATS_START:
+ case GF_CLI_STATS_STOP:
+ goto out;
+ break;
+ case GF_CLI_STATS_INFO:
+ ret = dict_get_str_boolean (dict, "nfs", _gf_false);
+ if (ret) {
+ if (!glusterd_is_nodesvc_online ("nfs")) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR, "NFS server"
+ " is not running");
+ goto out;
+ }
+ pending_node = GF_CALLOC (1, sizeof (*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ goto out;
+ }
+ pending_node->node = priv->nfs;
+ pending_node->type = GD_NODE_NFS;
+ list_add_tail (&pending_node->list, selected);
+ pending_node = NULL;
+
+ ret = 0;
+ goto out;
+
+ }
+ list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ if (glusterd_is_brick_started (brickinfo)) {
+ pending_node = GF_CALLOC (1, sizeof (*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ goto out;
+ } else {
+ pending_node->node = brickinfo;
+ pending_node->type = GD_NODE_BRICK;
+ list_add_tail (&pending_node->list,
+ selected);
+ pending_node = NULL;
+ }
+ }
+ }
+ break;
+
+ case GF_CLI_STATS_TOP:
+ ret = dict_get_str_boolean (dict, "nfs", _gf_false);
+ if (ret) {
+ if (!glusterd_is_nodesvc_online ("nfs")) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR, "NFS server"
+ " is not running");
+ goto out;
+ }
+ pending_node = GF_CALLOC (1, sizeof (*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ goto out;
+ }
+ pending_node->node = priv->nfs;
+ pending_node->type = GD_NODE_NFS;
+ list_add_tail (&pending_node->list, selected);
+ pending_node = NULL;
+
+ ret = 0;
+ goto out;
+
+ }
+ ret = dict_get_str (dict, "brick", &brick);
+ if (!ret) {
+ ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo,
+ &brickinfo);
+ if (ret)
+ goto out;
+
+ if (!glusterd_is_brick_started (brickinfo))
+ goto out;
+
+ pending_node = GF_CALLOC (1, sizeof (*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ goto out;
+ } else {
+ pending_node->node = brickinfo;
+ pending_node->type = GD_NODE_BRICK;
+ list_add_tail (&pending_node->list,
+ selected);
+ pending_node = NULL;
+ goto out;
+ }
+ }
+ ret = 0;
+ list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ if (glusterd_is_brick_started (brickinfo)) {
+ pending_node = GF_CALLOC (1, sizeof (*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ goto out;
+ } else {
+ pending_node->node = brickinfo;
+ pending_node->type = GD_NODE_BRICK;
+ list_add_tail (&pending_node->list,
+ selected);
+ pending_node = NULL;
+ }
+ }
+ }
+ break;
+
+ default:
+ GF_ASSERT (0);
+ gf_log ("glusterd", GF_LOG_ERROR, "Invalid profile op: %d",
+ stats_op);
+ ret = -1;
+ goto out;
+ break;
+ }
+
+
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+
+ return ret;
+}
+
+static int
+_add_rxlator_to_dict (dict_t *dict, char *volname, int index, int count)
+{
+ int ret = -1;
+ char key[128] = {0,};
+ char *xname = NULL;
+
+ snprintf (key, sizeof (key), "xl-%d", count);
+ ret = gf_asprintf (&xname, "%s-replicate-%d", volname, index);
+ if (ret == -1)
+ goto out;
+
+ ret = dict_set_dynstr (dict, key, xname);
+ if (ret)
+ goto out;
+
+ ret = dict_set_int32 (dict, xname, index);
+out:
+ return ret;
+}
+
+int
+get_replica_index_for_per_replica_cmd (glusterd_volinfo_t *volinfo,
+ dict_t *dict) {
+ int ret = 0;
+ char *hostname = NULL;
+ char *path = NULL;
+ int index = 0;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ int cmd_replica_index = -1;
+ int replica_count = -1;
+
+
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "per-replica-cmd-hostname", &hostname);
+ if (ret)
+ goto out;
+ ret = dict_get_str (dict, "per-replica-cmd-path", &path);
+ if (ret)
+ goto out;
+
+ replica_count = volinfo->replica_count;
+
+ list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ if (uuid_is_null (brickinfo->uuid))
+ (void)glusterd_resolve_brick (brickinfo);
+ if (!strcmp (brickinfo->path, path) &&
+ !strcmp (brickinfo->hostname, hostname)) {
+ cmd_replica_index = index/(replica_count);
+ goto out;
+ }
+ index++;
+ }
+
+
+out:
+ if (ret)
+ cmd_replica_index = -1;
+
+ return cmd_replica_index;
+}
+
+int
+_select_rxlators_with_local_bricks (xlator_t *this, glusterd_volinfo_t *volinfo,
+ dict_t *dict, cli_cmd_type type)
+{
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ int index = 0;
+ int rxlator_count = 0;
+ int replica_count = 0;
+ gf_boolean_t add = _gf_false;
+ int ret = 0;
+ int cmd_replica_index = -1;
+
+ priv = this->private;
+ replica_count = volinfo->replica_count;
+
+ if (type == PER_REPLICA) {
+
+ cmd_replica_index = get_replica_index_for_per_replica_cmd
+ (volinfo, dict);
+ if (cmd_replica_index == -1) {
+ ret = -1;
+ goto err;
+ }
+ }
+
+ index = 1;
+
+ list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ if (uuid_is_null (brickinfo->uuid))
+ (void)glusterd_resolve_brick (brickinfo);
+
+ switch (type) {
+ case ALL_REPLICA:
+ if (!uuid_compare (MY_UUID, brickinfo->uuid))
+ add = _gf_true;
+ break;
+ case PER_REPLICA:
+ if (!uuid_compare (MY_UUID, brickinfo->uuid) &&
+ ((index-1)/replica_count == cmd_replica_index))
+
+ add = _gf_true;
+ break;
+ }
+
+ if (index % replica_count == 0) {
+ if (add) {
+ _add_rxlator_to_dict (dict, volinfo->volname,
+ (index-1)/replica_count,
+ rxlator_count);
+ rxlator_count++;
+ }
+ add = _gf_false;
+ }
+
+ index++;
+ }
+err:
+ if (ret)
+ rxlator_count = -1;
+
+ return rxlator_count;
+}
+
+int
+_select_rxlators_for_full_self_heal (xlator_t *this,
+ glusterd_volinfo_t *volinfo,
+ dict_t *dict)
+{
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ int index = 1;
+ int rxlator_count = 0;
+ int replica_count = 0;
+ uuid_t candidate = {0};
+
+ priv = this->private;
+ replica_count = volinfo->replica_count;
+
+ list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ if (uuid_is_null (brickinfo->uuid))
+ (void)glusterd_resolve_brick (brickinfo);
+
+ if (uuid_compare (brickinfo->uuid, candidate) > 0)
+ uuid_copy (candidate, brickinfo->uuid);
+
+ if (index % replica_count == 0) {
+ if (!uuid_compare (MY_UUID, candidate)) {
+ _add_rxlator_to_dict (dict, volinfo->volname,
+ (index-1)/replica_count,
+ rxlator_count);
+ rxlator_count++;
+ }
+ uuid_clear (candidate);
+ }
+
+ index++;
+ }
+ return rxlator_count;
+}
+
+
+static int
+glusterd_bricks_select_snap (dict_t *dict, char **op_errstr,
+ struct list_head *selected)
+{
+ int ret = -1;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ glusterd_pending_node_t *pending_node = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ char *volname = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ int brick_index = -1;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get"
+ " volname");
+ goto out;
+ }
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret)
+ goto out;
+
+ list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ brick_index++;
+ if (uuid_compare (brickinfo->uuid, MY_UUID) ||
+ !glusterd_is_brick_started (brickinfo)) {
+ continue;
+ }
+ pending_node = GF_CALLOC (1, sizeof (*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ goto out;
+ }
+ pending_node->node = brickinfo;
+ pending_node->type = GD_NODE_BRICK;
+ pending_node->index = brick_index;
+ list_add_tail (&pending_node->list,
+ selected);
+ pending_node = NULL;
+ }
+
+ ret = 0;
+
+out:
+ gf_log (THIS->name, GF_LOG_DEBUG, "Returning ret %d", ret);
+ return ret;
+}
+
+static int
+fill_shd_status_for_local_bricks (dict_t *dict, glusterd_volinfo_t *volinfo,
+ cli_cmd_type type, dict_t *req_dict)
+{
+ glusterd_brickinfo_t *brickinfo = NULL;
+ char msg[1024] = {0,};
+ char key[1024] = {0,};
+ char value[1024] = {0,};
+ int index = 0;
+ int ret = 0;
+ xlator_t *this = NULL;
+ int cmd_replica_index = -1;
+
+ this = THIS;
+ snprintf (msg, sizeof (msg), "self-heal-daemon is not running on");
+
+ if (type == PER_REPLICA) {
+ cmd_replica_index = get_replica_index_for_per_replica_cmd
+ (volinfo, req_dict);
+ if (cmd_replica_index == -1) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Could not find the "
+ "replica index for per replica type command");
+ ret = -1;
+ goto out;
+ }
+ }
+
+ list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ if (uuid_is_null (brickinfo->uuid))
+ (void)glusterd_resolve_brick (brickinfo);
+
+ if (uuid_compare (MY_UUID, brickinfo->uuid)) {
+ index++;
+ continue;
+ }
+
+ if (type == PER_REPLICA) {
+ if (cmd_replica_index != (index/volinfo->replica_count)) {
+ index++;
+ continue;
+ }
+
+ }
+ snprintf (key, sizeof (key), "%d-status",index);
+ snprintf (value, sizeof (value), "%s %s",msg,
+ uuid_utoa(MY_UUID));
+ ret = dict_set_dynstr (dict, key, gf_strdup(value));
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to"
+ "set the dictionary for shd status msg");
+ goto out;
+ }
+ snprintf (key, sizeof (key), "%d-shd-status",index);
+ ret = dict_set_str (dict, key, "off");
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to"
+ " set dictionary for shd status msg");
+ goto out;
+ }
+
+ index++;
+ }
+
+out:
+ return ret;
+
+}
+
+
+static int
+glusterd_bricks_select_heal_volume (dict_t *dict, char **op_errstr,
+ struct list_head *selected,
+ dict_t *rsp_dict)
+{
+ int ret = -1;
+ char *volname = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+ char msg[2048] = {0,};
+ glusterd_pending_node_t *pending_node = NULL;
+ gf_xl_afr_op_t heal_op = GF_AFR_OP_INVALID;
+ int rxlator_count = 0;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log ("glusterd", GF_LOG_ERROR, "volume name get failed");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Volume %s does not exist",
+ volname);
+
+ *op_errstr = gf_strdup (msg);
+ gf_log ("", GF_LOG_ERROR, "%s", msg);
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "heal-op", (int32_t*)&heal_op);
+ if (ret || (heal_op == GF_AFR_OP_INVALID)) {
+ gf_log ("glusterd", GF_LOG_ERROR, "heal op invalid");
+ goto out;
+ }
+
+ switch (heal_op) {
+ case GF_AFR_OP_INDEX_SUMMARY:
+ case GF_AFR_OP_STATISTICS_HEAL_COUNT:
+ if (!glusterd_is_nodesvc_online ("glustershd")) {
+ if (!rsp_dict) {
+ gf_log (this->name, GF_LOG_ERROR, "Received "
+ "empty ctx.");
+ goto out;
+ }
+
+ ret = fill_shd_status_for_local_bricks (rsp_dict,
+ volinfo,
+ ALL_REPLICA,
+ dict);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "Unable to "
+ "fill the shd status for the local "
+ "bricks");
+ goto out;
+
+ }
+ break;
+ case GF_AFR_OP_STATISTICS_HEAL_COUNT_PER_REPLICA:
+ if (!glusterd_is_nodesvc_online ("glustershd")) {
+ if (!rsp_dict) {
+ gf_log (this->name, GF_LOG_ERROR, "Received "
+ "empty ctx.");
+ goto out;
+ }
+ ret = fill_shd_status_for_local_bricks (rsp_dict,
+ volinfo,
+ PER_REPLICA,
+ dict);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "Unable to "
+ "fill the shd status for the local"
+ " bricks.");
+ goto out;
+
+ }
+ break;
+ default:
+ break;
+ }
+
+
+ switch (heal_op) {
+ case GF_AFR_OP_HEAL_FULL:
+ rxlator_count = _select_rxlators_for_full_self_heal (this,
+ volinfo,
+ dict);
+ break;
+ case GF_AFR_OP_STATISTICS_HEAL_COUNT_PER_REPLICA:
+ rxlator_count = _select_rxlators_with_local_bricks (this,
+ volinfo,
+ dict,
+ PER_REPLICA);
+ break;
+ default:
+ rxlator_count = _select_rxlators_with_local_bricks (this,
+ volinfo,
+ dict,
+ ALL_REPLICA);
+ break;
+ }
+ if (!rxlator_count)
+ goto out;
+ if (rxlator_count == -1){
+ gf_log (this->name, GF_LOG_ERROR, "Could not determine the"
+ "translator count");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_int32 (dict, "count", rxlator_count);
+ if (ret)
+ goto out;
+
+ pending_node = GF_CALLOC (1, sizeof (*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ goto out;
+ } else {
+ pending_node->node = priv->shd;
+ pending_node->type = GD_NODE_SHD;
+ list_add_tail (&pending_node->list, selected);
+ pending_node = NULL;
+ }
+
+out:
+ gf_log (THIS->name, GF_LOG_DEBUG, "Returning ret %d", ret);
+ return ret;
+
+}
+
+static int
+glusterd_bricks_select_rebalance_volume (dict_t *dict, char **op_errstr,
+ struct list_head *selected)
+{
+ int ret = -1;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+ char msg[2048] = {0,};
+ glusterd_pending_node_t *pending_node = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log ("glusterd", GF_LOG_ERROR, "volume name get failed");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Volume %s does not exist",
+ volname);
+
+ *op_errstr = gf_strdup (msg);
+ gf_log ("", GF_LOG_ERROR, "%s", msg);
+ goto out;
+ }
+ pending_node = GF_CALLOC (1, sizeof (*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ goto out;
+ } else {
+ pending_node->node = volinfo;
+ pending_node->type = GD_NODE_REBALANCE;
+ list_add_tail (&pending_node->list,
+ &opinfo.pending_bricks);
+ pending_node = NULL;
+ }
+
+out:
+ return ret;
+}
+
+
+
+
+static int
+glusterd_bricks_select_status_volume (dict_t *dict, char **op_errstr,
+ struct list_head *selected)
+{
+ int ret = -1;
+ int cmd = 0;
+ int brick_index = -1;
+ char *volname = NULL;
+ char *brickname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_pending_node_t *pending_node = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ GF_ASSERT (dict);
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = dict_get_int32 (dict, "cmd", &cmd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get status type");
+ goto out;
+ }
+
+ if (cmd & GF_CLI_STATUS_ALL)
+ goto out;
+
+ switch (cmd & GF_CLI_STATUS_MASK) {
+ case GF_CLI_STATUS_MEM:
+ case GF_CLI_STATUS_CLIENTS:
+ case GF_CLI_STATUS_INODE:
+ case GF_CLI_STATUS_FD:
+ case GF_CLI_STATUS_CALLPOOL:
+ case GF_CLI_STATUS_NFS:
+ case GF_CLI_STATUS_SHD:
+ break;
+ default:
+ goto out;
+ }
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get volname");
+ goto out;
+ }
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ goto out;
+ }
+
+ if ( (cmd & GF_CLI_STATUS_BRICK) != 0) {
+ ret = dict_get_str (dict, "brick", &brickname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to get brick");
+ goto out;
+ }
+ ret = glusterd_volume_brickinfo_get_by_brick (brickname,
+ volinfo,
+ &brickinfo);
+ if (ret)
+ goto out;
+
+ if (uuid_compare (brickinfo->uuid, MY_UUID)||
+ !glusterd_is_brick_started (brickinfo))
+ goto out;
+
+ pending_node = GF_CALLOC (1, sizeof (*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ goto out;
+ }
+ pending_node->node = brickinfo;
+ pending_node->type = GD_NODE_BRICK;
+ pending_node->index = 0;
+ list_add_tail (&pending_node->list, selected);
+
+ ret = 0;
+ } else if ((cmd & GF_CLI_STATUS_NFS) != 0) {
+ if (!glusterd_is_nodesvc_online ("nfs")) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "NFS server is not running");
+ goto out;
+ }
+ pending_node = GF_CALLOC (1, sizeof (*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ goto out;
+ }
+ pending_node->node = priv->nfs;
+ pending_node->type = GD_NODE_NFS;
+ pending_node->index = 0;
+ list_add_tail (&pending_node->list, selected);
+
+ ret = 0;
+ } else if ((cmd & GF_CLI_STATUS_SHD) != 0) {
+ if (!glusterd_is_nodesvc_online ("glustershd")) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Self-heal daemon is not running");
+ goto out;
+ }
+ pending_node = GF_CALLOC (1, sizeof (*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ goto out;
+ }
+ pending_node->node = priv->shd;
+ pending_node->type = GD_NODE_SHD;
+ pending_node->index = 0;
+ list_add_tail (&pending_node->list, selected);
+
+ ret = 0;
+ } else {
+ list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ brick_index++;
+ if (uuid_compare (brickinfo->uuid, MY_UUID) ||
+ !glusterd_is_brick_started (brickinfo)) {
+ continue;
+ }
+ pending_node = GF_CALLOC (1, sizeof (*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ gf_log (THIS->name ,GF_LOG_ERROR,
+ "Unable to allocate memory");
+ goto out;
+ }
+ pending_node->node = brickinfo;
+ pending_node->type = GD_NODE_BRICK;
+ pending_node->index = brick_index;
+ list_add_tail (&pending_node->list, selected);
+ pending_node = NULL;
+ }
+ }
+out:
+ return ret;
+}
+
+static int
+glusterd_op_ac_send_brick_op (glusterd_op_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+ rpc_clnt_procedure_t *proc = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ glusterd_op_t op = GD_OP_NONE;
+ glusterd_req_ctx_t *req_ctx = NULL;
+ char *op_errstr = NULL;
+
+ this = THIS;
+ priv = this->private;
+
+ if (ctx) {
+ req_ctx = ctx;
+ } else {
+ req_ctx = GF_CALLOC (1, sizeof (*req_ctx),
+ gf_gld_mt_op_allack_ctx_t);
+ op = glusterd_op_get_op ();
+ req_ctx->op = op;
+ uuid_copy (req_ctx->uuid, MY_UUID);
+ ret = glusterd_op_build_payload (&req_ctx->dict, &op_errstr,
+ NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, LOGSTR_BUILD_PAYLOAD,
+ gd_op_list[op]);
+ if (op_errstr == NULL)
+ gf_asprintf (&op_errstr,
+ OPERRSTR_BUILD_PAYLOAD);
+ opinfo.op_errstr = op_errstr;
+ goto out;
+ }
+ }
+
+ proc = &priv->gfs_mgmt->proctable[GLUSTERD_BRICK_OP];
+ if (proc->fn) {
+ ret = proc->fn (NULL, this, req_ctx);
+ if (ret)
+ goto out;
+ }
+
+ if (!opinfo.pending_count && !opinfo.brick_pending_count) {
+ glusterd_clear_pending_nodes (&opinfo.pending_bricks);
+ ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK,
+ &event->txn_id, req_ctx);
+ }
+
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "Returning with %d", ret);
+
+ return ret;
+}
+
+
+static int
+glusterd_op_ac_rcvd_brick_op_acc (glusterd_op_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+ glusterd_op_brick_rsp_ctx_t *ev_ctx = NULL;
+ char *op_errstr = NULL;
+ glusterd_op_t op = GD_OP_NONE;
+ gd_node_type type = GD_NODE_NONE;
+ dict_t *op_ctx = NULL;
+ glusterd_req_ctx_t *req_ctx = NULL;
+ void *pending_entry = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (event);
+ GF_ASSERT (ctx);
+ ev_ctx = ctx;
+
+ req_ctx = ev_ctx->commit_ctx;
+ GF_ASSERT (req_ctx);
+
+ op = req_ctx->op;
+ op_ctx = glusterd_op_get_ctx ();
+ pending_entry = ev_ctx->pending_node->node;
+ type = ev_ctx->pending_node->type;
+
+ ret = glusterd_remove_pending_entry (&opinfo.pending_bricks,
+ pending_entry);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "unknown response received ");
+ ret = -1;
+ goto out;
+ }
+
+ if (opinfo.brick_pending_count > 0)
+ opinfo.brick_pending_count--;
+
+ glusterd_handle_node_rsp (req_ctx->dict, pending_entry, op, ev_ctx->rsp_dict,
+ op_ctx, &op_errstr, type);
+
+ if (opinfo.brick_pending_count > 0)
+ goto out;
+
+ ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, &event->txn_id,
+ ev_ctx->commit_ctx);
+
+out:
+ if (ev_ctx->rsp_dict)
+ dict_unref (ev_ctx->rsp_dict);
+ GF_FREE (ev_ctx);
+ gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
+
+ return ret;
+}
+
+int32_t
+glusterd_op_bricks_select (glusterd_op_t op, dict_t *dict, char **op_errstr,
+ struct list_head *selected, dict_t *rsp_dict)
+{
+ int ret = 0;
+
+ GF_ASSERT (dict);
+ GF_ASSERT (op_errstr);
+ GF_ASSERT (op > GD_OP_NONE);
+ GF_ASSERT (op < GD_OP_MAX);
+
+ switch (op) {
+ case GD_OP_STOP_VOLUME:
+ ret = glusterd_bricks_select_stop_volume (dict, op_errstr,
+ selected);
+ break;
+
+ case GD_OP_REMOVE_BRICK:
+ ret = glusterd_bricks_select_remove_brick (dict, op_errstr,
+ selected);
+ break;
+
+ case GD_OP_PROFILE_VOLUME:
+ ret = glusterd_bricks_select_profile_volume (dict, op_errstr,
+ selected);
+ break;
+
+ case GD_OP_HEAL_VOLUME:
+ ret = glusterd_bricks_select_heal_volume (dict, op_errstr,
+ selected, rsp_dict);
+ break;
+
+ case GD_OP_STATUS_VOLUME:
+ ret = glusterd_bricks_select_status_volume (dict, op_errstr,
+ selected);
+ break;
+
+ case GD_OP_DEFRAG_BRICK_VOLUME:
+ ret = glusterd_bricks_select_rebalance_volume (dict, op_errstr,
+ selected);
+ break;
+ case GD_OP_SNAP:
+ ret = glusterd_bricks_select_snap (dict, op_errstr, selected);
+ break;
+ default:
+ break;
+ }
+
+ gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
+
+ return ret;
+}
+
+glusterd_op_sm_t glusterd_op_state_default [] = {
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, //EVENT_NONE
+ {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_send_lock},//EVENT_START_LOCK
+ {GD_OP_STATE_LOCKED, glusterd_op_ac_lock}, //EVENT_LOCK
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, //EVENT_RCVD_ACC
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, //EVENT_ALL_ACC
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, //EVENT_STAGE_ACC
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, //EVENT_COMMIT_ACC
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, //EVENT_RCVD_RJT
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, //EVENT_STAGE_OP
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, //EVENT_COMMIT_OP
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, //EVENT_START_UNLOCK
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, //EVENT_ALL_ACK
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, //EVENT_LOCAL_UNLOCK_NO_RESP
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, //EVENT_MAX
+};
+
+glusterd_op_sm_t glusterd_op_state_lock_sent [] = {
+ {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, //EVENT_NONE
+ {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none},//EVENT_START_LOCK
+ {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_lock}, //EVENT_LOCK
+ {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_rcvd_lock_acc}, //EVENT_RCVD_ACC
+ {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_send_stage_op}, //EVENT_ALL_ACC
+ {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, //EVENT_STAGE_ACC
+ {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, //EVENT_COMMIT_ACC
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_send_unlock_drain}, //EVENT_RCVD_RJT
+ {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, //EVENT_STAGE_OP
+ {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, //EVENT_COMMIT_OP
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_START_UNLOCK
+ {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, //EVENT_ALL_ACK
+ {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, //EVENT_LOCAL_UNLOCK_NO_RESP
+ {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, //EVENT_MAX
+};
+
+glusterd_op_sm_t glusterd_op_state_locked [] = {
+ {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, //EVENT_NONE
+ {GD_OP_STATE_LOCKED, glusterd_op_ac_none},//EVENT_START_LOCK
+ {GD_OP_STATE_LOCKED, glusterd_op_ac_lock}, //EVENT_LOCK
+ {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, //EVENT_RCVD_ACC
+ {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, //EVENT_ALL_ACC
+ {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, //EVENT_STAGE_ACC
+ {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, //EVENT_COMMIT_ACC
+ {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, //EVENT_RCVD_RJT
+ {GD_OP_STATE_STAGED, glusterd_op_ac_stage_op}, //EVENT_STAGE_OP
+ {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, //EVENT_COMMIT_OP
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK
+ {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, //EVENT_START_UNLOCK
+ {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, //EVENT_ALL_ACK
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_local_unlock}, //EVENT_LOCAL_UNLOCK_NO_RESP
+ {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, //EVENT_MAX
+};
+
+glusterd_op_sm_t glusterd_op_state_stage_op_sent [] = {
+ {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none}, //EVENT_NONE
+ {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none},//EVENT_START_LOCK
+ {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_lock}, //EVENT_LOCK
+ {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_rcvd_stage_op_acc}, //EVENT_RCVD_ACC
+ {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_send_brick_op}, //EVENT_ALL_ACC
+ {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_send_brick_op}, //EVENT_STAGE_ACC
+ {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none}, //EVENT_COMMIT_ACC
+ {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_stage_op_failed}, //EVENT_RCVD_RJT
+ {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none}, //EVENT_STAGE_OP
+ {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none}, //EVENT_COMMIT_OP
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_START_UNLOCK
+ {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none}, //EVENT_ALL_ACK
+ {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none}, //EVENT_LOCAL_UNLOCK_NO_RESP
+ {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none}, //EVENT_MAX
+};
+
+glusterd_op_sm_t glusterd_op_state_stage_op_failed [] = {
+ {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, //EVENT_NONE
+ {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none},//EVENT_START_LOCK
+ {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_lock}, //EVENT_LOCK
+ {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_stage_op_failed}, //EVENT_RCVD_ACC
+ {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, //EVENT_ALL_ACC
+ {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, //EVENT_STAGE_ACC
+ {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, //EVENT_COMMIT_ACC
+ {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_stage_op_failed}, //EVENT_RCVD_RJT
+ {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, //EVENT_STAGE_OP
+ {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, //EVENT_COMMIT_OP
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_START_UNLOCK
+ {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_send_unlock}, //EVENT_ALL_ACK
+ {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, //EVENT_LOCAL_UNLOCK_NO_RESP
+ {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, //EVENT_MAX
+};
+
+glusterd_op_sm_t glusterd_op_state_staged [] = {
+ {GD_OP_STATE_STAGED, glusterd_op_ac_none}, //EVENT_NONE
+ {GD_OP_STATE_STAGED, glusterd_op_ac_none},//EVENT_START_LOCK
+ {GD_OP_STATE_STAGED, glusterd_op_ac_lock}, //EVENT_LOCK
+ {GD_OP_STATE_STAGED, glusterd_op_ac_none}, //EVENT_RCVD_ACC
+ {GD_OP_STATE_STAGED, glusterd_op_ac_none}, //EVENT_ALL_ACC
+ {GD_OP_STATE_STAGED, glusterd_op_ac_none}, //EVENT_STAGE_ACC
+ {GD_OP_STATE_STAGED, glusterd_op_ac_none}, //EVENT_COMMIT_ACC
+ {GD_OP_STATE_STAGED, glusterd_op_ac_none}, //EVENT_RCVD_RJT
+ {GD_OP_STATE_STAGED, glusterd_op_ac_none}, //EVENT_STAGE_OP
+ {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_send_brick_op}, //EVENT_COMMIT_OP
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK
+ {GD_OP_STATE_STAGED, glusterd_op_ac_none}, //EVENT_START_UNLOCK
+ {GD_OP_STATE_STAGED, glusterd_op_ac_none}, //EVENT_ALL_ACK
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_local_unlock}, //EVENT_LOCAL_UNLOCK_NO_RESP
+ {GD_OP_STATE_STAGED, glusterd_op_ac_none}, //EVENT_MAX
+};
+
+glusterd_op_sm_t glusterd_op_state_brick_op_sent [] = {
+ {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, //EVENT_NONE
+ {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none},//EVENT_START_LOCK
+ {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_lock}, //EVENT_LOCK
+ {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_rcvd_brick_op_acc}, //EVENT_RCVD_ACC
+ {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, //EVENT_ALL_ACC
+ {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, //EVENT_STAGE_ACC
+ {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, //EVENT_COMMIT_ACC
+ {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_brick_op_failed}, //EVENT_RCVD_RJT
+ {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, //EVENT_BRICK_OP
+ {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, //EVENT_COMMIT_OP
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_START_UNLOCK
+ {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_send_commit_op}, //EVENT_ALL_ACK
+ {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, //EVENT_LOCAL_UNLOCK_NO_RESP
+ {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, //EVENT_MAX
+};
+
+glusterd_op_sm_t glusterd_op_state_brick_op_failed [] = {
+ {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, //EVENT_NONE
+ {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none},//EVENT_START_LOCK
+ {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_lock}, //EVENT_LOCK
+ {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_brick_op_failed}, //EVENT_RCVD_ACC
+ {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, //EVENT_ALL_ACC
+ {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, //EVENT_STAGE_ACC
+ {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, //EVENT_COMMIT_ACC
+ {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_brick_op_failed}, //EVENT_RCVD_RJT
+ {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, //EVENT_BRICK_OP
+ {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, //EVENT_COMMIT_OP
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_START_UNLOCK
+ {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_send_unlock}, //EVENT_ALL_ACK
+ {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, //EVENT_LOCAL_UNLOCK_NO_RESP
+ {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, //EVENT_MAX
+};
+
+glusterd_op_sm_t glusterd_op_state_brick_committed [] = {
+ {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, //EVENT_NONE
+ {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none},//EVENT_START_LOCK
+ {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_lock}, //EVENT_LOCK
+ {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_rcvd_brick_op_acc}, //EVENT_RCVD_ACC
+ {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, //EVENT_ALL_ACC
+ {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, //EVENT_STAGE_ACC
+ {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, //EVENT_COMMIT_ACC
+ {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_brick_op_failed}, //EVENT_RCVD_RJT
+ {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, //EVENT_STAGE_OP
+ {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, //EVENT_COMMIT_OP
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK
+ {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, //EVENT_START_UNLOCK
+ {GD_OP_STATE_COMMITED, glusterd_op_ac_commit_op}, //EVENT_ALL_ACK
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_local_unlock}, //EVENT_LOCAL_UNLOCK_NO_RESP
+ {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, //EVENT_MAX
+};
+
+glusterd_op_sm_t glusterd_op_state_brick_commit_failed [] = {
+ {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, //EVENT_NONE
+ {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none},//EVENT_START_LOCK
+ {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_lock}, //EVENT_LOCK
+ {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_brick_op_failed}, //EVENT_RCVD_ACC
+ {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, //EVENT_ALL_ACC
+ {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, //EVENT_STAGE_ACC
+ {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, //EVENT_COMMIT_ACC
+ {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_brick_op_failed}, //EVENT_RCVD_RJT
+ {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, //EVENT_STAGE_OP
+ {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, //EVENT_COMMIT_OP
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK
+ {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, //EVENT_START_UNLOCK
+ {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_send_commit_failed}, //EVENT_ALL_ACK
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_local_unlock}, //EVENT_LOCAL_UNLOCK_NO_RESP
+ {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, //EVENT_MAX
+};
+
+glusterd_op_sm_t glusterd_op_state_commit_op_failed [] = {
+ {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, //EVENT_NONE
+ {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none},//EVENT_START_LOCK
+ {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_lock}, //EVENT_LOCK
+ {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_commit_op_failed}, //EVENT_RCVD_ACC
+ {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, //EVENT_ALL_ACC
+ {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, //EVENT_STAGE_ACC
+ {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, //EVENT_COMMIT_ACC
+ {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_commit_op_failed}, //EVENT_RCVD_RJT
+ {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, //EVENT_STAGE_OP
+ {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, //EVENT_COMMIT_OP
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_START_UNLOCK
+ {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_send_unlock}, //EVENT_ALL_ACK
+ {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, //EVENT_LOCAL_UNLOCK_NO_RESP
+ {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, //EVENT_MAX
+};
+
+glusterd_op_sm_t glusterd_op_state_commit_op_sent [] = {
+ {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none}, //EVENT_NONE
+ {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none},//EVENT_START_LOCK
+ {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_lock}, //EVENT_LOCK
+ {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_rcvd_commit_op_acc}, //EVENT_RCVD_ACC
+ {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_send_unlock}, //EVENT_ALL_ACC
+ {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none}, //EVENT_STAGE_ACC
+ {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_send_unlock}, //EVENT_COMMIT_ACC
+ {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_commit_op_failed}, //EVENT_RCVD_RJT
+ {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none}, //EVENT_STAGE_OP
+ {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none}, //EVENT_COMMIT_OP
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_START_UNLOCK
+ {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none}, //EVENT_ALL_ACK
+ {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none}, //EVENT_LOCAL_UNLOCK_NO_RESP
+ {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none}, //EVENT_MAX
+};
+
+glusterd_op_sm_t glusterd_op_state_committed [] = {
+ {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_NONE
+ {GD_OP_STATE_COMMITED, glusterd_op_ac_none},//EVENT_START_LOCK
+ {GD_OP_STATE_COMMITED, glusterd_op_ac_lock}, //EVENT_LOCK
+ {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_RCVD_ACC
+ {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_ALL_ACC
+ {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_STAGE_ACC
+ {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_COMMIT_ACC
+ {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_RCVD_RJT
+ {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_STAGE_OP
+ {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_COMMIT_OP
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK
+ {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_START_UNLOCK
+ {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_ALL_ACK
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_local_unlock}, //EVENT_LOCAL_UNLOCK_NO_RESP
+ {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_MAX
+};
+
+glusterd_op_sm_t glusterd_op_state_unlock_sent [] = {
+ {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, //EVENT_NONE
+ {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none},//EVENT_START_LOCK
+ {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_lock}, //EVENT_LOCK
+ {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_rcvd_unlock_acc}, //EVENT_RCVD_ACC
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlocked_all}, //EVENT_ALL_ACC
+ {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, //EVENT_STAGE_ACC
+ {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, //EVENT_COMMIT_ACC
+ {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_rcvd_unlock_acc}, //EVENT_RCVD_RJT
+ {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, //EVENT_STAGE_OP
+ {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, //EVENT_COMMIT_OP
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_START_UNLOCK
+ {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, //EVENT_ALL_ACK
+ {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, //EVENT_LOCAL_UNLOCK_NO_RESP
+ {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, //EVENT_MAX
+};
+
+glusterd_op_sm_t glusterd_op_state_ack_drain [] = {
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_NONE
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none},//EVENT_START_LOCK
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_lock}, //EVENT_LOCK
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_send_unlock_drain}, //EVENT_RCVD_ACC
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_ALL_ACC
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_STAGE_ACC
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_COMMIT_ACC
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_send_unlock_drain}, //EVENT_RCVD_RJT
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_STAGE_OP
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_COMMIT_OP
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_START_UNLOCK
+ {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_send_unlock}, //EVENT_ALL_ACK
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_LOCAL_UNLOCK_NO_RESP
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_MAX
+};
+
+glusterd_op_sm_t *glusterd_op_state_table [] = {
+ glusterd_op_state_default,
+ glusterd_op_state_lock_sent,
+ glusterd_op_state_locked,
+ glusterd_op_state_stage_op_sent,
+ glusterd_op_state_staged,
+ glusterd_op_state_commit_op_sent,
+ glusterd_op_state_committed,
+ glusterd_op_state_unlock_sent,
+ glusterd_op_state_stage_op_failed,
+ glusterd_op_state_commit_op_failed,
+ glusterd_op_state_brick_op_sent,
+ glusterd_op_state_brick_op_failed,
+ glusterd_op_state_brick_committed,
+ glusterd_op_state_brick_commit_failed,
+ glusterd_op_state_ack_drain
+};
+
+int
+glusterd_op_sm_new_event (glusterd_op_sm_event_type_t event_type,
+ glusterd_op_sm_event_t **new_event)
+{
+ glusterd_op_sm_event_t *event = NULL;
+
+ GF_ASSERT (new_event);
+ GF_ASSERT (GD_OP_EVENT_NONE <= event_type &&
+ GD_OP_EVENT_MAX > event_type);
+
+ event = GF_CALLOC (1, sizeof (*event), gf_gld_mt_op_sm_event_t);
+
+ if (!event)
+ return -1;
+
+ *new_event = event;
+ event->event = event_type;
+ INIT_LIST_HEAD (&event->list);
+
+ return 0;
+}
+
+int
+glusterd_op_sm_inject_event (glusterd_op_sm_event_type_t event_type,
+ uuid_t *txn_id, void *ctx)
+{
+ int32_t ret = -1;
+ glusterd_op_sm_event_t *event = NULL;
+
+ GF_ASSERT (event_type < GD_OP_EVENT_MAX &&
+ event_type >= GD_OP_EVENT_NONE);
+
+ ret = glusterd_op_sm_new_event (event_type, &event);
+
+ if (ret)
+ goto out;
+
+ event->ctx = ctx;
+
+ if (txn_id)
+ uuid_copy (event->txn_id, *txn_id);
+
+ gf_log (THIS->name, GF_LOG_DEBUG, "Enqueue event: '%s'",
+ glusterd_op_sm_event_name_get (event->event));
+ list_add_tail (&event->list, &gd_op_sm_queue);
+
+out:
+ return ret;
+}
+
+void
+glusterd_destroy_req_ctx (glusterd_req_ctx_t *ctx)
+{
+ if (!ctx)
+ return;
+ if (ctx->dict)
+ dict_unref (ctx->dict);
+ GF_FREE (ctx);
+}
+
+void
+glusterd_destroy_local_unlock_ctx (uuid_t *ctx)
+{
+ if (!ctx)
+ return;
+ GF_FREE (ctx);
+}
+
+void
+glusterd_destroy_op_event_ctx (glusterd_op_sm_event_t *event)
+{
+ if (!event)
+ return;
+
+ switch (event->event) {
+ case GD_OP_EVENT_LOCK:
+ case GD_OP_EVENT_UNLOCK:
+ glusterd_destroy_lock_ctx (event->ctx);
+ break;
+ case GD_OP_EVENT_STAGE_OP:
+ case GD_OP_EVENT_ALL_ACK:
+ glusterd_destroy_req_ctx (event->ctx);
+ break;
+ case GD_OP_EVENT_LOCAL_UNLOCK_NO_RESP:
+ glusterd_destroy_local_unlock_ctx (event->ctx);
+ break;
+ default:
+ break;
+ }
+}
+
+int
+glusterd_op_sm ()
+{
+ glusterd_op_sm_event_t *event = NULL;
+ glusterd_op_sm_event_t *tmp = NULL;
+ int ret = -1;
+ int lock_err = 0;
+ glusterd_op_sm_ac_fn handler = NULL;
+ glusterd_op_sm_t *state = NULL;
+ glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE;
+ xlator_t *this = NULL;
+ glusterd_op_info_t txn_op_info;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ if ((lock_err = pthread_mutex_trylock (&gd_op_sm_lock))) {
+ gf_log (this->name, GF_LOG_ERROR, "lock failed due to %s",
+ strerror (lock_err));
+ goto lock_failed;
+ }
+
+ while (!list_empty (&gd_op_sm_queue)) {
+
+ list_for_each_entry_safe (event, tmp, &gd_op_sm_queue, list) {
+
+ list_del_init (&event->list);
+ event_type = event->event;
+ gf_log (this->name, GF_LOG_DEBUG, "Dequeued event of "
+ "type: '%s'",
+ glusterd_op_sm_event_name_get(event_type));
+
+ gf_log ("", GF_LOG_DEBUG, "transaction ID = %s",
+ uuid_utoa (event->txn_id));
+
+ ret = glusterd_get_txn_opinfo (&event->txn_id,
+ &txn_op_info);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to get transaction's opinfo");
+ glusterd_destroy_op_event_ctx (event);
+ GF_FREE (event);
+ continue;
+ } else
+ opinfo = txn_op_info;
+
+ state = glusterd_op_state_table[opinfo.state.state];
+
+ GF_ASSERT (state);
+
+ handler = state[event_type].handler;
+ GF_ASSERT (handler);
+
+ ret = handler (event, event->ctx);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "handler returned: %d", ret);
+ glusterd_destroy_op_event_ctx (event);
+ GF_FREE (event);
+ continue;
+ }
+
+ ret = glusterd_op_sm_transition_state (&opinfo, state,
+ event_type);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to transition"
+ "state from '%s' to '%s'",
+ glusterd_op_sm_state_name_get(opinfo.state.state),
+ glusterd_op_sm_state_name_get(state[event_type].next_state));
+ (void ) pthread_mutex_unlock (&gd_op_sm_lock);
+ return ret;
+ }
+
+ if ((state[event_type].next_state ==
+ GD_OP_STATE_DEFAULT) &&
+ (event_type == GD_OP_EVENT_UNLOCK)) {
+ /* Clearing the transaction opinfo */
+ ret = glusterd_clear_txn_opinfo(&event->txn_id);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to clear "
+ "transaction's opinfo");
+ } else {
+ ret = glusterd_set_txn_opinfo (&event->txn_id,
+ &opinfo);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to set "
+ "transaction's opinfo");
+ }
+
+ glusterd_destroy_op_event_ctx (event);
+ GF_FREE (event);
+
+ }
+ }
+
+
+ (void ) pthread_mutex_unlock (&gd_op_sm_lock);
+ ret = 0;
+
+lock_failed:
+
+ return ret;
+}
+
+int32_t
+glusterd_op_set_op (glusterd_op_t op)
+{
+
+ GF_ASSERT (op < GD_OP_MAX);
+ GF_ASSERT (op > GD_OP_NONE);
+
+ opinfo.op = op;
+
+ return 0;
+
+}
+
+int32_t
+glusterd_op_get_op ()
+{
+
+ return opinfo.op;
+
+}
+
+int32_t
+glusterd_op_set_req (rpcsvc_request_t *req)
+{
+
+ GF_ASSERT (req);
+ opinfo.req = req;
+ return 0;
+}
+
+int32_t
+glusterd_op_clear_op (glusterd_op_t op)
+{
+
+ opinfo.op = GD_OP_NONE;
+
+ return 0;
+
+}
+
+int32_t
+glusterd_op_free_ctx (glusterd_op_t op, void *ctx)
+{
+
+ if (ctx) {
+ switch (op) {
+ case GD_OP_CREATE_VOLUME:
+ case GD_OP_DELETE_VOLUME:
+ case GD_OP_STOP_VOLUME:
+ case GD_OP_ADD_BRICK:
+ case GD_OP_REMOVE_BRICK:
+ case GD_OP_REPLACE_BRICK:
+ case GD_OP_LOG_ROTATE:
+ case GD_OP_SYNC_VOLUME:
+ case GD_OP_SET_VOLUME:
+ case GD_OP_START_VOLUME:
+ case GD_OP_RESET_VOLUME:
+ case GD_OP_GSYNC_SET:
+ case GD_OP_QUOTA:
+ case GD_OP_PROFILE_VOLUME:
+ case GD_OP_STATUS_VOLUME:
+ case GD_OP_REBALANCE:
+ case GD_OP_HEAL_VOLUME:
+ case GD_OP_STATEDUMP_VOLUME:
+ case GD_OP_CLEARLOCKS_VOLUME:
+ case GD_OP_DEFRAG_BRICK_VOLUME:
+ dict_unref (ctx);
+ break;
+ default:
+ GF_ASSERT (0);
+ break;
+ }
+ }
+
+ glusterd_op_reset_ctx ();
+ return 0;
+
+}
+
+void *
+glusterd_op_get_ctx ()
+{
+
+ return opinfo.op_ctx;
+
+}
+
+int
+glusterd_op_sm_init ()
+{
+ INIT_LIST_HEAD (&gd_op_sm_queue);
+ pthread_mutex_init (&gd_op_sm_lock, NULL);
+ return 0;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.h b/xlators/mgmt/glusterd/src/glusterd-op-sm.h
new file mode 100644
index 000000000..cf57b78e0
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.h
@@ -0,0 +1,299 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _GLUSTERD_OP_SM_H_
+#define _GLUSTERD_OP_SM_H_
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#ifndef GSYNC_CONF_TEMPLATE
+#define GSYNC_CONF_TEMPLATE GEOREP"/gsyncd_template.conf"
+#endif
+
+#include <pthread.h>
+#include "uuid.h"
+
+#include "glusterfs.h"
+#include "xlator.h"
+#include "logging.h"
+#include "call-stub.h"
+#include "fd.h"
+#include "byte-order.h"
+#include "glusterd.h"
+#include "protocol-common.h"
+
+#define GD_VOLUME_NAME_MAX 256
+#define GD_OP_PROTECTED (0x02)
+#define GD_OP_UNPROTECTED (0x04)
+
+typedef enum glusterd_op_sm_state_ {
+ GD_OP_STATE_DEFAULT = 0,
+ GD_OP_STATE_LOCK_SENT,
+ GD_OP_STATE_LOCKED,
+ GD_OP_STATE_STAGE_OP_SENT,
+ GD_OP_STATE_STAGED,
+ GD_OP_STATE_COMMIT_OP_SENT,
+ GD_OP_STATE_COMMITED,
+ GD_OP_STATE_UNLOCK_SENT,
+ GD_OP_STATE_STAGE_OP_FAILED,
+ GD_OP_STATE_COMMIT_OP_FAILED,
+ GD_OP_STATE_BRICK_OP_SENT,
+ GD_OP_STATE_BRICK_OP_FAILED,
+ GD_OP_STATE_BRICK_COMMITTED,
+ GD_OP_STATE_BRICK_COMMIT_FAILED,
+ GD_OP_STATE_ACK_DRAIN,
+ GD_OP_STATE_MAX,
+} glusterd_op_sm_state_t;
+
+typedef enum glusterd_op_sm_event_type_ {
+ GD_OP_EVENT_NONE = 0,
+ GD_OP_EVENT_START_LOCK,
+ GD_OP_EVENT_LOCK,
+ GD_OP_EVENT_RCVD_ACC,
+ GD_OP_EVENT_ALL_ACC,
+ GD_OP_EVENT_STAGE_ACC,
+ GD_OP_EVENT_COMMIT_ACC,
+ GD_OP_EVENT_RCVD_RJT,
+ GD_OP_EVENT_STAGE_OP,
+ GD_OP_EVENT_COMMIT_OP,
+ GD_OP_EVENT_UNLOCK,
+ GD_OP_EVENT_START_UNLOCK,
+ GD_OP_EVENT_ALL_ACK,
+ GD_OP_EVENT_LOCAL_UNLOCK_NO_RESP,
+ GD_OP_EVENT_MAX
+} glusterd_op_sm_event_type_t;
+
+
+struct glusterd_op_sm_event_ {
+ struct list_head list;
+ void *ctx;
+ glusterd_op_sm_event_type_t event;
+ uuid_t txn_id;
+};
+
+typedef struct glusterd_op_sm_event_ glusterd_op_sm_event_t;
+
+typedef int (*glusterd_op_sm_ac_fn) (glusterd_op_sm_event_t *, void *);
+
+typedef struct glusterd_op_sm_ {
+ glusterd_op_sm_state_t next_state;
+ glusterd_op_sm_ac_fn handler;
+} glusterd_op_sm_t;
+
+typedef struct glusterd_op_sm_state_info_ {
+ glusterd_op_sm_state_t state;
+ struct timeval time;
+} glusterd_op_sm_state_info_t;
+
+struct glusterd_op_info_ {
+ glusterd_op_sm_state_info_t state;
+ int32_t pending_count;
+ int32_t brick_pending_count;
+ int32_t op_count;
+ glusterd_op_t op;
+ struct list_head op_peers;
+ void *op_ctx;
+ rpcsvc_request_t *req;
+ int32_t op_ret;
+ int32_t op_errno;
+ char *op_errstr;
+ struct list_head pending_bricks;
+};
+
+typedef struct glusterd_op_info_ glusterd_op_info_t;
+
+struct glusterd_op_log_filename_ctx_ {
+ char volume_name[GD_VOLUME_NAME_MAX];
+ char brick[GD_VOLUME_NAME_MAX];
+ char path[PATH_MAX];
+};
+typedef struct glusterd_op_log_filename_ctx_ glusterd_op_log_filename_ctx_t;
+
+struct glusterd_op_lock_ctx_ {
+ uuid_t uuid;
+ dict_t *dict;
+ rpcsvc_request_t *req;
+};
+
+typedef struct glusterd_op_lock_ctx_ glusterd_op_lock_ctx_t;
+
+struct glusterd_req_ctx_ {
+ rpcsvc_request_t *req;
+ u_char uuid[16];
+ int op;
+ dict_t *dict;
+};
+
+typedef struct glusterd_req_ctx_ glusterd_req_ctx_t;
+
+typedef struct glusterd_op_brick_rsp_ctx_ {
+ int op_ret;
+ char *op_errstr;
+ dict_t *rsp_dict;
+ glusterd_req_ctx_t *commit_ctx;
+ glusterd_pending_node_t *pending_node;
+} glusterd_op_brick_rsp_ctx_t;
+
+typedef struct glusterd_pr_brick_rsp_conv_t {
+ int count;
+ dict_t *dict;
+} glusterd_pr_brick_rsp_conv_t;
+
+typedef struct glusterd_heal_rsp_conv_ {
+ dict_t *dict;
+ glusterd_volinfo_t *volinfo;
+ xlator_t *this;
+} glusterd_heal_rsp_conv_t;
+
+typedef struct glusterd_status_rsp_conv_ {
+ int count;
+ int brick_index_max;
+ int other_count;
+ dict_t *dict;
+} glusterd_status_rsp_conv_t;
+
+typedef struct glusterd_gsync_status_temp {
+ dict_t *rsp_dict;
+ glusterd_volinfo_t *volinfo;
+ char *node;
+}glusterd_gsync_status_temp_t;
+
+typedef enum cli_cmd_type_ {
+ PER_REPLICA,
+ ALL_REPLICA,
+ } cli_cmd_type;
+
+int
+glusterd_op_sm_new_event (glusterd_op_sm_event_type_t event_type,
+ glusterd_op_sm_event_t **new_event);
+int
+glusterd_op_sm_inject_event (glusterd_op_sm_event_type_t event_type,
+ uuid_t *txn_id, void *ctx);
+
+int
+glusterd_op_sm_init ();
+
+int
+glusterd_op_sm ();
+
+int32_t
+glusterd_op_set_ctx (void *ctx);
+
+int32_t
+glusterd_op_set_op (glusterd_op_t op);
+
+int
+glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx);
+
+int32_t
+glusterd_op_stage_validate (glusterd_op_t op, dict_t *req, char **op_errstr,
+ dict_t *rsp_dict);
+
+int32_t
+glusterd_op_commit_perform (glusterd_op_t op, dict_t *req, char **op_errstr,
+ dict_t* dict);
+
+int32_t
+glusterd_op_txn_begin (rpcsvc_request_t *req, glusterd_op_t op, void *ctx,
+ char *err_str, size_t err_len);
+
+int32_t
+glusterd_op_txn_complete ();
+
+void *
+glusterd_op_get_ctx ();
+
+int32_t
+glusterd_op_set_req (rpcsvc_request_t *req);
+
+int32_t
+glusterd_op_send_cli_response (glusterd_op_t op, int32_t op_ret,
+ int32_t op_errno, rpcsvc_request_t *req,
+ void *ctx, char *op_errstr);
+int32_t
+glusterd_op_get_op ();
+
+int32_t
+glusterd_op_clear_op ();
+
+int32_t
+glusterd_op_free_ctx (glusterd_op_t op, void *ctx);
+
+int
+glusterd_check_option_exists(char *optstring, char **completion);
+
+int
+set_xlator_option (dict_t *dict, char *key, char *value);
+
+void
+glusterd_do_replace_brick (void *data);
+
+char*
+glusterd_op_sm_state_name_get (int state);
+
+char*
+glusterd_op_sm_event_name_get (int event);
+int32_t
+glusterd_op_bricks_select (glusterd_op_t op, dict_t *dict, char **op_errstr,
+ struct list_head *selected, dict_t *rsp_dict);
+int
+glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickinfo,
+ gd1_mgmt_brick_op_req **req, dict_t *dict);
+int
+glusterd_node_op_build_payload (glusterd_op_t op, gd1_mgmt_brick_op_req **req,
+ dict_t *dict);
+int32_t
+glusterd_handle_brick_rsp (void *pending_entry, glusterd_op_t op,
+ dict_t *rsp_dict, dict_t *ctx_dict, char **op_errstr,
+ gd_node_type type);
+
+dict_t*
+glusterd_op_init_commit_rsp_dict (glusterd_op_t op);
+
+void
+glusterd_op_modify_op_ctx (glusterd_op_t op, void *op_ctx);
+
+int32_t
+glusterd_volume_stats_read_perf (char *brick_path, int32_t blk_size,
+ int32_t blk_count, double *throughput, double *time);
+int32_t
+glusterd_volume_stats_write_perf (char *brick_path, int32_t blk_size,
+ int32_t blk_count, double *throughput, double *time);
+gf_boolean_t
+glusterd_is_volume_started (glusterd_volinfo_t *volinfo);
+int
+glusterd_start_bricks (glusterd_volinfo_t *volinfo);
+gf_boolean_t
+glusterd_are_all_volumes_stopped ();
+int
+glusterd_stop_bricks (glusterd_volinfo_t *volinfo);
+int
+gsync_status (char *master, char *slave, char *conf_path, int *status);
+
+int
+glusterd_check_gsync_running (glusterd_volinfo_t *volinfo, gf_boolean_t *flag);
+
+int
+glusterd_defrag_volume_node_rsp (dict_t *req_dict, dict_t *rsp_dict,
+ dict_t *op_ctx);
+int
+glusterd_is_valid_vg (glusterd_brickinfo_t *brick, int check_tag, char *msg);
+
+int32_t
+glusterd_get_txn_opinfo (uuid_t *txn_id, glusterd_op_info_t *opinfo);
+
+int32_t
+glusterd_set_txn_opinfo (uuid_t *txn_id, glusterd_op_info_t *opinfo);
+
+int32_t
+glusterd_clear_txn_opinfo (uuid_t *txn_id);
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.c b/xlators/mgmt/glusterd/src/glusterd-pmap.c
new file mode 100644
index 000000000..a153ca1a9
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-pmap.c
@@ -0,0 +1,492 @@
+/*
+ Copyright (c) 2010-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include "glusterfs.h"
+#include "compat-errno.h"
+
+#include "glusterd.h"
+#include "glusterd-utils.h"
+
+#include "portmap-xdr.h"
+#include "xdr-generic.h"
+#include "protocol-common.h"
+#include "rpcsvc.h"
+
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+
+
+int
+pmap_port_isfree (int port)
+{
+ struct sockaddr_in sin;
+ int sock = -1;
+ int ret = 0;
+
+ memset (&sin, 0, sizeof (sin));
+ sin.sin_family = PF_INET;
+ sin.sin_port = hton16 (port);
+
+ sock = socket (PF_INET, SOCK_STREAM, 0);
+ if (sock == -1)
+ return -1;
+
+ ret = bind (sock, (struct sockaddr *)&sin, sizeof (sin));
+ close (sock);
+
+ return (ret == 0) ? 1 : 0;
+}
+
+
+static struct pmap_registry *
+pmap_registry_new (xlator_t *this)
+{
+ struct pmap_registry *pmap = NULL;
+ int i = 0;
+
+ pmap = CALLOC (sizeof (*pmap), 1);
+ if (!pmap)
+ return NULL;
+
+ for (i = 0; i < 65536; i++) {
+ if (pmap_port_isfree (i))
+ pmap->ports[i].type = GF_PMAP_PORT_FREE;
+ else
+ pmap->ports[i].type = GF_PMAP_PORT_FOREIGN;
+ }
+
+ pmap->base_port = pmap->last_alloc =
+ ((glusterd_conf_t *)(this->private))->base_port;
+
+ return pmap;
+}
+
+
+struct pmap_registry *
+pmap_registry_get (xlator_t *this)
+{
+ glusterd_conf_t *priv = NULL;
+ struct pmap_registry *pmap = NULL;
+
+ priv = this->private;
+
+ pmap = priv->pmap;
+ if (!pmap) {
+ pmap = pmap_registry_new (this);
+ if (!pmap)
+ return NULL;
+ priv->pmap = pmap;
+ }
+
+ return pmap;
+}
+
+
+static char*
+nextword (char *str)
+{
+ while (*str && !isspace (*str))
+ str++;
+ while (*str && isspace (*str))
+ str++;
+
+ return str;
+}
+
+int
+pmap_registry_search (xlator_t *this, const char *brickname,
+ gf_pmap_port_type_t type)
+{
+ struct pmap_registry *pmap = NULL;
+ int p = 0;
+ char *brck = NULL;
+ char *nbrck = NULL;
+
+ pmap = pmap_registry_get (this);
+
+ for (p = pmap->base_port; p <= pmap->last_alloc; p++) {
+ if (!pmap->ports[p].brickname || pmap->ports[p].type != type)
+ continue;
+
+ for (brck = pmap->ports[p].brickname;;) {
+ nbrck = strtail (brck, brickname);
+ if (nbrck && (!*nbrck || isspace (*nbrck)))
+ return p;
+ brck = nextword (brck);
+ if (!*brck)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+int
+pmap_registry_search_by_xprt (xlator_t *this, void *xprt,
+ gf_pmap_port_type_t type)
+{
+ struct pmap_registry *pmap = NULL;
+ int p = 0;
+ int port = 0;
+
+ pmap = pmap_registry_get (this);
+
+ for (p = pmap->base_port; p <= pmap->last_alloc; p++) {
+ if (!pmap->ports[p].xprt)
+ continue;
+ if (pmap->ports[p].xprt == xprt &&
+ pmap->ports[p].type == type) {
+ port = p;
+ break;
+ }
+ }
+
+ return port;
+}
+
+
+char *
+pmap_registry_search_by_port (xlator_t *this, int port)
+{
+ struct pmap_registry *pmap = NULL;
+ char *brickname = NULL;
+
+ if (port > 65535)
+ goto out;
+
+ pmap = pmap_registry_get (this);
+
+ if (pmap->ports[port].type == GF_PMAP_PORT_BRICKSERVER)
+ brickname = pmap->ports[port].brickname;
+
+out:
+ return brickname;
+}
+
+
+int
+pmap_registry_alloc (xlator_t *this)
+{
+ struct pmap_registry *pmap = NULL;
+ int p = 0;
+ int port = 0;
+
+ pmap = pmap_registry_get (this);
+
+ for (p = pmap->last_alloc; p < 65535; p++) {
+ if (pmap->ports[p].type != GF_PMAP_PORT_FREE)
+ continue;
+
+ if (pmap_port_isfree (p)) {
+ pmap->ports[p].type = GF_PMAP_PORT_LEASED;
+ port = p;
+ break;
+ }
+ }
+
+ if (port)
+ pmap->last_alloc = port;
+
+ return port;
+}
+
+int
+pmap_registry_bind (xlator_t *this, int port, const char *brickname,
+ gf_pmap_port_type_t type, void *xprt)
+{
+ struct pmap_registry *pmap = NULL;
+ int p = 0;
+
+ pmap = pmap_registry_get (this);
+
+ if (port > 65535)
+ goto out;
+
+ p = port;
+ pmap->ports[p].type = type;
+ free (pmap->ports[p].brickname);
+ pmap->ports[p].brickname = strdup (brickname);
+ pmap->ports[p].type = type;
+ pmap->ports[p].xprt = xprt;
+
+ gf_log ("pmap", GF_LOG_INFO, "adding brick %s on port %d",
+ brickname, port);
+
+ if (pmap->last_alloc < p)
+ pmap->last_alloc = p;
+out:
+ return 0;
+}
+
+int
+pmap_registry_remove (xlator_t *this, int port, const char *brickname,
+ gf_pmap_port_type_t type, void *xprt)
+{
+ struct pmap_registry *pmap = NULL;
+ int p = 0;
+ glusterd_conf_t *priv = NULL;
+
+ priv = this->private;
+ pmap = priv->pmap;
+ if (!pmap)
+ goto out;
+
+ if (port) {
+ if (port > 65535)
+ goto out;
+
+ p = port;
+ goto remove;
+ }
+
+ if (brickname && strchr (brickname, '/')) {
+ p = pmap_registry_search (this, brickname, type);
+ if (p)
+ goto remove;
+ }
+
+ if (xprt) {
+ p = pmap_registry_search_by_xprt (this, xprt, type);
+ if (p)
+ goto remove;
+ }
+
+ goto out;
+remove:
+ gf_log ("pmap", GF_LOG_INFO, "removing brick %s on port %d",
+ pmap->ports[p].brickname, p);
+
+ free (pmap->ports[p].brickname);
+
+ pmap->ports[p].brickname = NULL;
+ pmap->ports[p].xprt = NULL;
+
+out:
+ return 0;
+}
+
+int
+__gluster_pmap_portbybrick (rpcsvc_request_t *req)
+{
+ pmap_port_by_brick_req args = {0,};
+ pmap_port_by_brick_rsp rsp = {0,};
+ char *brick = NULL;
+ int port = 0;
+ int ret = -1;
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_pmap_port_by_brick_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ goto fail;
+ }
+
+ brick = args.brick;
+
+ port = pmap_registry_search (THIS, brick, GF_PMAP_PORT_BRICKSERVER);
+
+ if (!port)
+ rsp.op_ret = -1;
+
+ rsp.port = port;
+
+fail:
+ glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_pmap_port_by_brick_rsp);
+ free (args.brick);//malloced by xdr
+
+ return 0;
+}
+
+
+int
+gluster_pmap_portbybrick (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __gluster_pmap_portbybrick);
+}
+
+
+int
+__gluster_pmap_brickbyport (rpcsvc_request_t *req)
+{
+ pmap_brick_by_port_req args = {0,};
+ pmap_brick_by_port_rsp rsp = {0,};
+ int ret = -1;
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_pmap_brick_by_port_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ goto fail;
+ }
+
+ rsp.brick = pmap_registry_search_by_port (THIS, args.port);
+ if (!rsp.brick) {
+ rsp.op_ret = -1;
+ rsp.brick = "";
+ }
+fail:
+
+ glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_pmap_brick_by_port_rsp);
+
+ return 0;
+}
+
+
+int
+gluster_pmap_brickbyport (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __gluster_pmap_brickbyport);
+}
+
+
+static int
+glusterd_brick_update_signin (glusterd_brickinfo_t *brickinfo,
+ gf_boolean_t value)
+{
+ brickinfo->signed_in = value;
+
+ return 0;
+}
+
+int
+__gluster_pmap_signup (rpcsvc_request_t *req)
+{
+ pmap_signup_req args = {0,};
+ pmap_signup_rsp rsp = {0,};
+ int ret = -1;
+
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_pmap_signup_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ goto fail;
+ }
+
+ rsp.op_ret = pmap_registry_bind (THIS, args.port, args.brick,
+ GF_PMAP_PORT_BRICKSERVER, req->trans);
+
+fail:
+ glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_pmap_signup_rsp);
+ free (args.brick);//malloced by xdr
+
+ return 0;
+}
+
+int
+gluster_pmap_signup (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __gluster_pmap_signup);
+}
+
+int
+__gluster_pmap_signin (rpcsvc_request_t *req)
+{
+ pmap_signin_req args = {0,};
+ pmap_signin_rsp rsp = {0,};
+ glusterd_brickinfo_t *brickinfo = NULL;
+ int ret = -1;
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_pmap_signin_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ goto fail;
+ }
+
+ rsp.op_ret = pmap_registry_bind (THIS, args.port, args.brick,
+ GF_PMAP_PORT_BRICKSERVER, req->trans);
+
+ ret = glusterd_get_brickinfo (THIS, args.brick, args.port, _gf_true,
+ &brickinfo);
+fail:
+ glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_pmap_signin_rsp);
+ free (args.brick);//malloced by xdr
+
+ if (!ret)
+ glusterd_brick_update_signin (brickinfo, _gf_true);
+
+ return 0;
+}
+
+
+int
+gluster_pmap_signin (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __gluster_pmap_signin);
+}
+
+
+int
+__gluster_pmap_signout (rpcsvc_request_t *req)
+{
+ pmap_signout_req args = {0,};
+ pmap_signout_rsp rsp = {0,};
+ int ret = -1;
+ glusterd_brickinfo_t *brickinfo = NULL;
+
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_pmap_signout_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ goto fail;
+ }
+
+ rsp.op_ret = pmap_registry_remove (THIS, args.port, args.brick,
+ GF_PMAP_PORT_BRICKSERVER, req->trans);
+
+ ret = glusterd_get_brickinfo (THIS, args.brick, args.port, _gf_true,
+ &brickinfo);
+fail:
+ glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_pmap_signout_rsp);
+ free (args.brick);//malloced by xdr
+
+ if (!ret)
+ glusterd_brick_update_signin (brickinfo, _gf_false);
+
+ return 0;
+}
+
+int
+gluster_pmap_signout (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __gluster_pmap_signout);
+}
+
+rpcsvc_actor_t gluster_pmap_actors[] = {
+ [GF_PMAP_NULL] = {"NULL", GF_PMAP_NULL, NULL, NULL, 0, DRC_NA},
+ [GF_PMAP_PORTBYBRICK] = {"PORTBYBRICK", GF_PMAP_PORTBYBRICK, gluster_pmap_portbybrick, NULL, 0, DRC_NA},
+ [GF_PMAP_BRICKBYPORT] = {"BRICKBYPORT", GF_PMAP_BRICKBYPORT, gluster_pmap_brickbyport, NULL, 0, DRC_NA},
+ [GF_PMAP_SIGNIN] = {"SIGNIN", GF_PMAP_SIGNIN, gluster_pmap_signin, NULL, 0, DRC_NA},
+ [GF_PMAP_SIGNOUT] = {"SIGNOUT", GF_PMAP_SIGNOUT, gluster_pmap_signout, NULL, 0, DRC_NA},
+ [GF_PMAP_SIGNUP] = {"SIGNUP", GF_PMAP_SIGNUP, gluster_pmap_signup, NULL, 0, DRC_NA},
+};
+
+
+struct rpcsvc_program gluster_pmap_prog = {
+ .progname = "Gluster Portmap",
+ .prognum = GLUSTER_PMAP_PROGRAM,
+ .progver = GLUSTER_PMAP_VERSION,
+ .actors = gluster_pmap_actors,
+ .numactors = GF_PMAP_MAXVALUE,
+};
diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.h b/xlators/mgmt/glusterd/src/glusterd-pmap.h
new file mode 100644
index 000000000..6336ee998
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-pmap.h
@@ -0,0 +1,54 @@
+/*
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _GLUSTERD_PMAP_H_
+#define _GLUSTERD_PMAP_H_
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <pthread.h>
+#include "uuid.h"
+
+#include "glusterfs.h"
+#include "xlator.h"
+#include "logging.h"
+#include "call-stub.h"
+#include "fd.h"
+#include "byte-order.h"
+#include "glusterd.h"
+#include "rpcsvc.h"
+
+
+#define GF_IANA_PRIV_PORTS_START 49152 /* RFC 6335 */
+
+struct pmap_port_status {
+ gf_pmap_port_type_t type;
+ char *brickname;
+ void *xprt;
+};
+
+struct pmap_registry {
+ int base_port;
+ int last_alloc;
+ struct pmap_port_status ports[65536];
+};
+
+int pmap_registry_alloc (xlator_t *this);
+int pmap_registry_bind (xlator_t *this, int port, const char *brickname,
+ gf_pmap_port_type_t type, void *xprt);
+int pmap_registry_remove (xlator_t *this, int port, const char *brickname,
+ gf_pmap_port_type_t type, void *xprt);
+int pmap_registry_search (xlator_t *this, const char *brickname,
+ gf_pmap_port_type_t type);
+struct pmap_registry *pmap_registry_get (xlator_t *this);
+
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-quota.c b/xlators/mgmt/glusterd/src/glusterd-quota.c
new file mode 100644
index 000000000..318267199
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-quota.c
@@ -0,0 +1,839 @@
+/*
+ Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "common-utils.h"
+#include "cli1-xdr.h"
+#include "xdr-generic.h"
+#include "glusterd.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-store.h"
+#include "glusterd-utils.h"
+#include "glusterd-volgen.h"
+#include "run.h"
+
+#include <sys/wait.h>
+
+int
+__glusterd_handle_quota (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{0,}};
+ dict_t *dict = NULL;
+ glusterd_op_t cli_op = GD_OP_QUOTA;
+ char operation[256] = {0, };
+ char *volname = NULL;
+ int32_t type = 0;
+ char msg[2048] = {0,};
+ xlator_t *this = NULL;
+
+ GF_ASSERT (req);
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf (msg, sizeof (msg), "Unable to decode the "
+ "command");
+ goto out;
+ } else {
+ dict->extra_stdfree = cli_req.dict.dict_val;
+ }
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Unable to get volume name");
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name, "
+ "while handling quota command");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "type", &type);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Unable to get type of command");
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get type of cmd, "
+ "while handling quota command");
+ goto out;
+ }
+
+ switch (type) {
+ case GF_QUOTA_OPTION_TYPE_ENABLE:
+ strncpy (operation, "enable", sizeof (operation));
+ break;
+
+ case GF_QUOTA_OPTION_TYPE_DISABLE:
+ strncpy (operation, "disable", sizeof (operation));
+ break;
+
+ case GF_QUOTA_OPTION_TYPE_LIMIT_USAGE:
+ strncpy (operation, "limit-usage", sizeof (operation));
+ break;
+
+ case GF_QUOTA_OPTION_TYPE_REMOVE:
+ strncpy (operation, "remove", sizeof (operation));
+ break;
+ }
+ ret = glusterd_op_begin_synctask (req, GD_OP_QUOTA, dict);
+
+out:
+ if (ret) {
+ if (msg[0] == '\0')
+ snprintf (msg, sizeof (msg), "Operation failed");
+ ret = glusterd_op_send_cli_response (cli_op, ret, 0, req,
+ dict, msg);
+ }
+
+ return ret;
+}
+
+int
+glusterd_handle_quota (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __glusterd_handle_quota);
+}
+
+int32_t
+glusterd_check_if_quota_trans_enabled (glusterd_volinfo_t *volinfo)
+{
+ int32_t ret = 0;
+ int flag = _gf_false;
+
+ flag = glusterd_volinfo_get_boolean (volinfo, VKEY_FEATURES_QUOTA);
+ if (flag == -1) {
+ gf_log ("", GF_LOG_ERROR, "failed to get the quota status");
+ ret = -1;
+ goto out;
+ }
+
+ if (flag == _gf_false) {
+ gf_log ("", GF_LOG_ERROR, "first enable the quota translator");
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+/* At the end of the function, the variable found will be set
+ * to true if the path to be removed was present in the limit-list,
+ * else will be false.
+ */
+int32_t
+_glusterd_quota_remove_limits (char **quota_limits, char *path,
+ gf_boolean_t *found)
+{
+ int ret = 0;
+ int i = 0;
+ int size = 0;
+ int len = 0;
+ int pathlen = 0;
+ int skiplen = 0;
+ int flag = 0;
+ char *limits = NULL;
+ char *qlimits = NULL;
+
+ if (found != NULL)
+ *found = _gf_false;
+
+ if (*quota_limits == NULL)
+ return -1;
+
+ qlimits = *quota_limits;
+
+ pathlen = strlen (path);
+
+ len = strlen (qlimits);
+
+ limits = GF_CALLOC (len + 1, sizeof (char), gf_gld_mt_char);
+ if (!limits)
+ return -1;
+
+ while (i < len) {
+ if (!memcmp ((void *) &qlimits [i], (void *)path, pathlen))
+ if (qlimits [i + pathlen] == ':') {
+ flag = 1;
+ if (found != NULL)
+ *found = _gf_true;
+ }
+
+ while (qlimits [i + size] != ',' &&
+ qlimits [i + size] != '\0')
+ size++;
+
+ if (!flag) {
+ memcpy ((void *) &limits [i], (void *) &qlimits [i], size + 1);
+ } else {
+ skiplen = size + 1;
+ size = len - i - size;
+ memcpy ((void *) &limits [i], (void *) &qlimits [i + skiplen], size);
+ break;
+ }
+
+ i += size + 1;
+ size = 0;
+ }
+
+ if (!flag) {
+ ret = 1;
+ } else {
+ len = strlen (limits);
+
+ if (len == 0) {
+ GF_FREE (qlimits);
+
+ *quota_limits = NULL;
+
+ goto out;
+ }
+
+ if (limits[len - 1] == ',') {
+ limits[len - 1] = '\0';
+ len --;
+ }
+
+ GF_FREE (qlimits);
+
+ qlimits = GF_CALLOC (len + 1, sizeof (char), gf_gld_mt_char);
+
+ if (!qlimits) {
+ ret = -1;
+ goto out;
+ }
+
+ memcpy ((void *) qlimits, (void *) limits, len + 1);
+
+ *quota_limits = qlimits;
+
+ ret = 0;
+ }
+
+out:
+ GF_FREE (limits);
+
+ return ret;
+}
+
+int32_t
+glusterd_quota_initiate_fs_crawl (glusterd_conf_t *priv, char *volname)
+{
+ pid_t pid;
+ int32_t ret = 0;
+ int status = 0;
+ char mountdir[] = "/tmp/mntXXXXXX";
+ runner_t runner = {0};
+
+ if (mkdtemp (mountdir) == NULL) {
+ gf_log ("glusterd", GF_LOG_DEBUG,
+ "failed to create a temporary mount directory");
+ ret = -1;
+ goto out;
+ }
+
+ runinit (&runner);
+ runner_add_args (&runner, SBIN_DIR"/glusterfs",
+ "-s", "localhost",
+ "--volfile-id", volname,
+ "-l", DEFAULT_LOG_FILE_DIRECTORY"/quota-crawl.log",
+ mountdir, NULL);
+
+ synclock_unlock (&priv->big_lock);
+ ret = runner_run_reuse (&runner);
+ synclock_lock (&priv->big_lock);
+ if (ret == -1) {
+ runner_log (&runner, "glusterd", GF_LOG_DEBUG, "command failed");
+ runner_end (&runner);
+ goto out;
+ }
+ runner_end (&runner);
+
+ if ((pid = fork ()) < 0) {
+ gf_log ("glusterd", GF_LOG_WARNING, "fork from parent failed");
+ ret = -1;
+ goto out;
+ } else if (pid == 0) {//first child
+ /* fork one more to not hold back main process on
+ * blocking call below
+ */
+ pid = fork ();
+ if (pid)
+ _exit (pid > 0 ? EXIT_SUCCESS : EXIT_FAILURE);
+
+ ret = chdir (mountdir);
+ if (ret == -1) {
+ gf_log ("glusterd", GF_LOG_WARNING, "chdir %s failed, "
+ "reason: %s", mountdir, strerror (errno));
+ exit (EXIT_FAILURE);
+ }
+ runinit (&runner);
+ runner_add_args (&runner, "/usr/bin/find", "find", ".", NULL);
+ if (runner_start (&runner) == -1)
+ _exit (EXIT_FAILURE);
+
+#ifndef GF_LINUX_HOST_OS
+ runner_end (&runner); /* blocks in waitpid */
+ runcmd ("umount", mountdir, NULL);
+#else
+ runcmd ("umount", "-l", mountdir, NULL);
+#endif
+ rmdir (mountdir);
+ _exit (EXIT_SUCCESS);
+ }
+ ret = (waitpid (pid, &status, 0) == pid &&
+ WIFEXITED (status) && WEXITSTATUS (status) == EXIT_SUCCESS) ? 0 : -1;
+
+out:
+ return ret;
+}
+
+char *
+glusterd_quota_get_limit_value (char *quota_limits, char *path)
+{
+ int32_t i, j, k, l, len;
+ int32_t pat_len, diff;
+ char *ret_str = NULL;
+
+ len = strlen (quota_limits);
+ pat_len = strlen (path);
+ i = 0;
+ j = 0;
+
+ while (i < len) {
+ j = i;
+ k = 0;
+ while (path [k] == quota_limits [j]) {
+ j++;
+ k++;
+ }
+
+ l = j;
+
+ while (quota_limits [j] != ',' &&
+ quota_limits [j] != '\0')
+ j++;
+
+ if (quota_limits [l] == ':' && pat_len == (l - i)) {
+ diff = j - i;
+ ret_str = GF_CALLOC (diff + 1, sizeof (char),
+ gf_gld_mt_char);
+
+ strncpy (ret_str, &quota_limits [i], diff);
+
+ break;
+ }
+ i = ++j; //skip ','
+ }
+
+ return ret_str;
+}
+
+char*
+_glusterd_quota_get_limit_usages (glusterd_volinfo_t *volinfo,
+ char *path, char **op_errstr)
+{
+ int32_t ret = 0;
+ char *quota_limits = NULL;
+ char *ret_str = NULL;
+
+ if (volinfo == NULL)
+ return NULL;
+
+ ret = glusterd_volinfo_get (volinfo, VKEY_FEATURES_LIMIT_USAGE,
+ &quota_limits);
+ if (ret)
+ return NULL;
+ if (quota_limits == NULL) {
+ ret_str = NULL;
+ *op_errstr = gf_strdup ("Limit not set on any directory");
+ } else if (path == NULL)
+ ret_str = gf_strdup (quota_limits);
+ else
+ ret_str = glusterd_quota_get_limit_value (quota_limits, path);
+
+ return ret_str;
+}
+
+int32_t
+glusterd_quota_get_limit_usages (glusterd_conf_t *priv,
+ glusterd_volinfo_t *volinfo,
+ char *volname,
+ dict_t *dict,
+ char **op_errstr,
+ dict_t *rsp_dict)
+{
+ int32_t i = 0;
+ int32_t ret = 0;
+ int32_t count = 0;
+ char *path = NULL;
+ char cmd_str [1024] = {0, };
+ char *ret_str = NULL;
+
+ if (rsp_dict == NULL)
+ return 0;
+
+ ret = dict_get_int32 (dict, "count", &count);
+ if (ret < 0)
+ goto out;
+
+ if (count == 0) {
+ ret_str = _glusterd_quota_get_limit_usages (volinfo, NULL,
+ op_errstr);
+ } else {
+ i = 0;
+ while (count--) {
+ snprintf (cmd_str, 1024, "path%d", i++);
+
+ ret = dict_get_str (dict, cmd_str, &path);
+ if (ret < 0)
+ goto out;
+
+ ret_str = _glusterd_quota_get_limit_usages (volinfo, path, op_errstr);
+ }
+ }
+
+ if (ret_str) {
+ ret = dict_set_dynstr (rsp_dict, "limit_list", ret_str);
+ }
+out:
+ return ret;
+}
+
+int32_t
+glusterd_quota_enable (glusterd_volinfo_t *volinfo, char **op_errstr,
+ gf_boolean_t *crawl)
+{
+ int32_t ret = -1;
+ char *quota_status = NULL;
+
+ GF_VALIDATE_OR_GOTO ("glusterd", volinfo, out);
+ GF_VALIDATE_OR_GOTO ("glusterd", crawl, out);
+ GF_VALIDATE_OR_GOTO ("glusterd", op_errstr, out);
+
+ if (glusterd_is_volume_started (volinfo) == 0) {
+ *op_errstr = gf_strdup ("Volume is stopped, start volume "
+ "to enable quota.");
+ goto out;
+ }
+
+ ret = glusterd_check_if_quota_trans_enabled (volinfo);
+ if (ret == 0) {
+ *op_errstr = gf_strdup ("Quota is already enabled");
+ goto out;
+ }
+
+ quota_status = gf_strdup ("on");
+ if (!quota_status) {
+ gf_log ("", GF_LOG_ERROR, "memory allocation failed");
+ *op_errstr = gf_strdup ("Enabling quota has been unsuccessful");
+ goto out;
+ }
+
+ ret = dict_set_dynstr (volinfo->dict, VKEY_FEATURES_QUOTA, quota_status);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "dict set failed");
+ *op_errstr = gf_strdup ("Enabling quota has been unsuccessful");
+ goto out;
+ }
+
+ *op_errstr = gf_strdup ("Enabling quota has been successful");
+
+ *crawl = _gf_true;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int32_t
+glusterd_quota_disable (glusterd_volinfo_t *volinfo, char **op_errstr)
+{
+ int32_t ret = -1;
+ char *quota_status = NULL, *quota_limits = NULL;
+
+ GF_VALIDATE_OR_GOTO ("glusterd", volinfo, out);
+ GF_VALIDATE_OR_GOTO ("glusterd", op_errstr, out);
+
+ ret = glusterd_check_if_quota_trans_enabled (volinfo);
+ if (ret == -1) {
+ *op_errstr = gf_strdup ("Quota is already disabled");
+ goto out;
+ }
+
+ quota_status = gf_strdup ("off");
+ if (!quota_status) {
+ gf_log ("", GF_LOG_ERROR, "memory allocation failed");
+ *op_errstr = gf_strdup ("Disabling quota has been unsuccessful");
+ goto out;
+ }
+
+ ret = dict_set_dynstr (volinfo->dict, VKEY_FEATURES_QUOTA, quota_status);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "dict set failed");
+ *op_errstr = gf_strdup ("Disabling quota has been unsuccessful");
+ goto out;
+ }
+
+ *op_errstr = gf_strdup ("Disabling quota has been successful");
+
+ ret = glusterd_volinfo_get (volinfo, VKEY_FEATURES_LIMIT_USAGE,
+ &quota_limits);
+ if (ret) {
+ gf_log ("", GF_LOG_WARNING, "failed to get the quota limits");
+ } else {
+ GF_FREE (quota_limits);
+ }
+
+ dict_del (volinfo->dict, VKEY_FEATURES_LIMIT_USAGE);
+
+out:
+ return ret;
+}
+
+int32_t
+glusterd_quota_limit_usage (glusterd_volinfo_t *volinfo, dict_t *dict, char **op_errstr)
+{
+ int32_t ret = -1;
+ char *path = NULL;
+ char *limit = NULL;
+ char *value = NULL;
+ char msg [1024] = {0,};
+ char *quota_limits = NULL;
+
+ GF_VALIDATE_OR_GOTO ("glusterd", dict, out);
+ GF_VALIDATE_OR_GOTO ("glusterd", volinfo, out);
+ GF_VALIDATE_OR_GOTO ("glusterd", op_errstr, out);
+
+ ret = glusterd_check_if_quota_trans_enabled (volinfo);
+ if (ret == -1) {
+ *op_errstr = gf_strdup ("Quota is disabled, please enable "
+ "quota");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_get (volinfo, VKEY_FEATURES_LIMIT_USAGE,
+ &quota_limits);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "failed to get the quota limits");
+ *op_errstr = gf_strdup ("failed to set limit");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "path", &path);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to fetch quota limits" );
+ *op_errstr = gf_strdup ("failed to set limit");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "limit", &limit);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to fetch quota limits" );
+ *op_errstr = gf_strdup ("failed to set limit");
+ goto out;
+ }
+
+ if (quota_limits) {
+ ret = _glusterd_quota_remove_limits (&quota_limits, path, NULL);
+ if (ret == -1) {
+ gf_log ("", GF_LOG_ERROR, "Unable to allocate memory");
+ *op_errstr = gf_strdup ("failed to set limit");
+ goto out;
+ }
+ }
+
+ if (quota_limits == NULL) {
+ ret = gf_asprintf (&value, "%s:%s", path, limit);
+ if (ret == -1) {
+ gf_log ("", GF_LOG_ERROR, "Unable to allocate memory");
+ *op_errstr = gf_strdup ("failed to set limit");
+ goto out;
+ }
+ } else {
+ ret = gf_asprintf (&value, "%s,%s:%s",
+ quota_limits, path, limit);
+ if (ret == -1) {
+ gf_log ("", GF_LOG_ERROR, "Unable to allocate memory");
+ *op_errstr = gf_strdup ("failed to set limit");
+ goto out;
+ }
+
+ GF_FREE (quota_limits);
+ }
+
+ quota_limits = value;
+
+ ret = dict_set_str (volinfo->dict, VKEY_FEATURES_LIMIT_USAGE,
+ quota_limits);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to set quota limits" );
+ *op_errstr = gf_strdup ("failed to set limit");
+ goto out;
+ }
+ snprintf (msg, 1024, "limit set on %s", path);
+ *op_errstr = gf_strdup (msg);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int32_t
+glusterd_quota_remove_limits (glusterd_volinfo_t *volinfo, dict_t *dict, char **op_errstr)
+{
+ int32_t ret = -1;
+ char str [PATH_MAX + 1024] = {0,};
+ char *quota_limits = NULL;
+ char *path = NULL;
+ gf_boolean_t flag = _gf_false;
+
+ GF_VALIDATE_OR_GOTO ("glusterd", dict, out);
+ GF_VALIDATE_OR_GOTO ("glusterd", volinfo, out);
+ GF_VALIDATE_OR_GOTO ("glusterd", op_errstr, out);
+
+ ret = glusterd_check_if_quota_trans_enabled (volinfo);
+ if (ret == -1) {
+ *op_errstr = gf_strdup ("Quota is disabled, please enable quota");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_get (volinfo, VKEY_FEATURES_LIMIT_USAGE,
+ &quota_limits);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "failed to get the quota limits");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "path", &path);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to fetch quota limits" );
+ goto out;
+ }
+
+ ret = _glusterd_quota_remove_limits (&quota_limits, path, &flag);
+ if (ret == -1) {
+ if (flag == _gf_true)
+ snprintf (str, sizeof (str), "Removing limit on %s has "
+ "been unsuccessful", path);
+ else
+ snprintf (str, sizeof (str), "%s has no limit set", path);
+ *op_errstr = gf_strdup (str);
+ goto out;
+ } else {
+ if (flag == _gf_true)
+ snprintf (str, sizeof (str), "Removed quota limit on "
+ "%s", path);
+ else
+ snprintf (str, sizeof (str), "no limit set on %s",
+ path);
+ *op_errstr = gf_strdup (str);
+ }
+
+ if (quota_limits) {
+ ret = dict_set_str (volinfo->dict, VKEY_FEATURES_LIMIT_USAGE,
+ quota_limits);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to set quota limits" );
+ goto out;
+ }
+ } else {
+ dict_del (volinfo->dict, VKEY_FEATURES_LIMIT_USAGE);
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+
+int
+glusterd_op_quota (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+{
+ glusterd_volinfo_t *volinfo = NULL;
+ int32_t ret = -1;
+ char *volname = NULL;
+ int type = -1;
+ gf_boolean_t start_crawl = _gf_false;
+ glusterd_conf_t *priv = NULL;
+
+ GF_ASSERT (dict);
+ GF_ASSERT (op_errstr);
+
+ priv = THIS->private;
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get volume name " );
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to allocate memory");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "type", &type);
+
+ if (type == GF_QUOTA_OPTION_TYPE_ENABLE) {
+ ret = glusterd_quota_enable (volinfo, op_errstr, &start_crawl);
+ if (ret < 0)
+ goto out;
+
+ goto create_vol;
+ }
+
+ if (type == GF_QUOTA_OPTION_TYPE_DISABLE) {
+ ret = glusterd_quota_disable (volinfo, op_errstr);
+ if (ret < 0)
+ goto out;
+
+ goto create_vol;
+ }
+
+ if (type == GF_QUOTA_OPTION_TYPE_LIMIT_USAGE) {
+ ret = glusterd_quota_limit_usage (volinfo, dict, op_errstr);
+ if (ret < 0)
+ goto out;
+
+ goto create_vol;
+ }
+
+ if (type == GF_QUOTA_OPTION_TYPE_REMOVE) {
+ ret = glusterd_quota_remove_limits (volinfo, dict, op_errstr);
+ if (ret < 0)
+ goto out;
+
+ goto create_vol;
+ }
+
+ if (type == GF_QUOTA_OPTION_TYPE_LIST) {
+ ret = glusterd_check_if_quota_trans_enabled (volinfo);
+ if (ret == -1) {
+ *op_errstr = gf_strdup ("cannot list the limits, "
+ "quota is disabled");
+ goto out;
+ }
+
+ ret = glusterd_quota_get_limit_usages (priv, volinfo, volname,
+ dict, op_errstr, rsp_dict);
+
+ goto out;
+ }
+create_vol:
+ ret = glusterd_create_volfiles_and_notify_services (volinfo);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to re-create volfile for"
+ " 'quota'");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret)
+ goto out;
+
+ if (GLUSTERD_STATUS_STARTED == volinfo->status)
+ ret = glusterd_check_generate_start_nfs ();
+
+ ret = 0;
+
+out:
+ if (rsp_dict && start_crawl == _gf_true)
+ glusterd_quota_initiate_fs_crawl (priv, volname);
+
+ if (rsp_dict && *op_errstr) {
+ ret = dict_set_dynstr (rsp_dict, "errstr", *op_errstr);
+ if (ret) {
+ GF_FREE (*op_errstr);
+ gf_log ("", GF_LOG_DEBUG,
+ "failed to set error message in ctx");
+ }
+ *op_errstr = NULL;
+ }
+
+ return ret;
+}
+
+int
+glusterd_op_stage_quota (dict_t *dict, char **op_errstr)
+{
+ int ret = 0;
+ char *volname = NULL;
+ gf_boolean_t exists = _gf_false;
+ int type = 0;
+ dict_t *ctx = NULL;
+
+ GF_ASSERT (dict);
+ GF_ASSERT (op_errstr);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get volume name");
+ goto out;
+ }
+
+ exists = glusterd_check_volume_exists (volname);
+ if (!exists) {
+ gf_log ("", GF_LOG_ERROR, "Volume with name: %s "
+ "does not exist",
+ volname);
+ *op_errstr = gf_strdup ("Invalid volume name");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "type", &type);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get 'type' for quota op");
+ *op_errstr = gf_strdup ("Volume quota failed, internal error "
+ ", unable to get type of operation");
+ goto out;
+ }
+
+
+ ctx = glusterd_op_get_ctx();
+ if (ctx && (type == GF_QUOTA_OPTION_TYPE_ENABLE
+ || type == GF_QUOTA_OPTION_TYPE_LIST)) {
+ /* Fuse mount req. only for enable & list-usage options*/
+ if (!glusterd_is_fuse_available ()) {
+ gf_log ("glusterd", GF_LOG_ERROR, "Unable to open /dev/"
+ "fuse (%s), quota command failed",
+ strerror (errno));
+ *op_errstr = gf_strdup ("Fuse unavailable");
+ ret = -1;
+ goto out;
+ }
+ }
+
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
new file mode 100644
index 000000000..b7b974c68
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
@@ -0,0 +1,749 @@
+/*
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <inttypes.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/resource.h>
+#include <sys/statvfs.h>
+
+#include "globals.h"
+#include "compat.h"
+#include "protocol-common.h"
+#include "xlator.h"
+#include "logging.h"
+#include "timer.h"
+#include "glusterd-mem-types.h"
+#include "glusterd.h"
+#include "glusterd-sm.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-utils.h"
+#include "glusterd-store.h"
+#include "run.h"
+#include "glusterd-volgen.h"
+
+#include "syscall.h"
+#include "cli1-xdr.h"
+#include "xdr-generic.h"
+
+int32_t
+glusterd_brick_op_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe);
+int
+glusterd_defrag_start_validate (glusterd_volinfo_t *volinfo, char *op_errstr,
+ size_t len, glusterd_op_t op)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ /* Check only if operation is not remove-brick */
+ if ((GD_OP_REMOVE_BRICK != op) &&
+ !gd_is_remove_brick_committed (volinfo)) {
+ gf_log (this->name, GF_LOG_DEBUG, "A remove-brick task on "
+ "volume %s is not yet committed", volinfo->volname);
+ snprintf (op_errstr, len, "A remove-brick task on volume %s is"
+ " not yet committed. Either commit or stop the "
+ "remove-brick task.", volinfo->volname);
+ goto out;
+ }
+
+ if (glusterd_is_defrag_on (volinfo)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "rebalance on volume %s already started",
+ volinfo->volname);
+ snprintf (op_errstr, len, "Rebalance on %s is already started",
+ volinfo->volname);
+ goto out;
+ }
+
+ if (glusterd_is_rb_started (volinfo) ||
+ glusterd_is_rb_paused (volinfo)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Rebalance failed as replace brick is in progress on volume %s",
+ volinfo->volname);
+ snprintf (op_errstr, len, "Rebalance failed as replace brick is in progress on "
+ "volume %s", volinfo->volname);
+ goto out;
+ }
+ ret = 0;
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+
+int32_t
+__glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event, void *data)
+{
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_defrag_info_t *defrag = NULL;
+ int ret = 0;
+ char pidfile[PATH_MAX];
+ glusterd_conf_t *priv = NULL;
+
+ priv = THIS->private;
+ if (!priv)
+ return 0;
+
+ volinfo = mydata;
+ if (!volinfo)
+ return 0;
+
+ defrag = volinfo->rebal.defrag;
+ if (!defrag)
+ return 0;
+
+ if ((event == RPC_CLNT_DISCONNECT) && defrag->connected)
+ volinfo->rebal.defrag = NULL;
+
+ GLUSTERD_GET_DEFRAG_PID_FILE(pidfile, volinfo, priv);
+
+ switch (event) {
+ case RPC_CLNT_CONNECT:
+ {
+ if (defrag->connected)
+ return 0;
+
+ LOCK (&defrag->lock);
+ {
+ defrag->connected = 1;
+ }
+ UNLOCK (&defrag->lock);
+
+ gf_log ("", GF_LOG_DEBUG, "%s got RPC_CLNT_CONNECT",
+ rpc->conn.trans->name);
+ break;
+ }
+
+ case RPC_CLNT_DISCONNECT:
+ {
+ if (!defrag->connected)
+ return 0;
+
+ LOCK (&defrag->lock);
+ {
+ defrag->connected = 0;
+ }
+ UNLOCK (&defrag->lock);
+
+ if (!glusterd_is_service_running (pidfile, NULL)) {
+ if (volinfo->rebal.defrag_status ==
+ GF_DEFRAG_STATUS_STARTED) {
+ volinfo->rebal.defrag_status =
+ GF_DEFRAG_STATUS_FAILED;
+ }
+ }
+
+ glusterd_store_perform_node_state_store (volinfo);
+
+ if (defrag->rpc) {
+ rpc_clnt_unref (defrag->rpc);
+ defrag->rpc = NULL;
+ }
+ if (defrag->cbk_fn)
+ defrag->cbk_fn (volinfo,
+ volinfo->rebal.defrag_status);
+
+ GF_FREE (defrag);
+ gf_log ("", GF_LOG_DEBUG, "%s got RPC_CLNT_DISCONNECT",
+ rpc->conn.trans->name);
+ break;
+ }
+ default:
+ gf_log ("", GF_LOG_TRACE,
+ "got some other RPC event %d", event);
+ ret = 0;
+ break;
+ }
+
+ return ret;
+}
+
+int32_t
+glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event, void *data)
+{
+ return glusterd_big_locked_notify (rpc, mydata, event,
+ data, __glusterd_defrag_notify);
+}
+
+int
+glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,
+ size_t len, int cmd, defrag_cbk_fn_t cbk,
+ glusterd_op_t op)
+{
+ int ret = -1;
+ glusterd_defrag_info_t *defrag = NULL;
+ runner_t runner = {0,};
+ glusterd_conf_t *priv = NULL;
+ char defrag_path[PATH_MAX];
+ char sockfile[PATH_MAX] = {0,};
+ char pidfile[PATH_MAX] = {0,};
+ char logfile[PATH_MAX] = {0,};
+ dict_t *options = NULL;
+ char valgrind_logfile[PATH_MAX] = {0,};
+
+ priv = THIS->private;
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (op_errstr);
+
+ ret = glusterd_defrag_start_validate (volinfo, op_errstr, len, op);
+ if (ret)
+ goto out;
+ if (!volinfo->rebal.defrag)
+ volinfo->rebal.defrag =
+ GF_CALLOC (1, sizeof (*volinfo->rebal.defrag),
+ gf_gld_mt_defrag_info);
+ if (!volinfo->rebal.defrag)
+ goto out;
+
+ defrag = volinfo->rebal.defrag;
+
+ defrag->cmd = cmd;
+
+ volinfo->rebal.defrag_cmd = cmd;
+ volinfo->rebal.op = op;
+
+ LOCK_INIT (&defrag->lock);
+
+ volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_STARTED;
+
+ glusterd_volinfo_reset_defrag_stats (volinfo);
+ glusterd_store_perform_node_state_store (volinfo);
+
+ GLUSTERD_GET_DEFRAG_DIR (defrag_path, volinfo, priv);
+ ret = mkdir_p (defrag_path, 0777, _gf_true);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Failed to create "
+ "directory %s", defrag_path);
+ goto out;
+ }
+
+ GLUSTERD_GET_DEFRAG_SOCK_FILE (sockfile, volinfo, priv);
+ GLUSTERD_GET_DEFRAG_PID_FILE (pidfile, volinfo, priv);
+ snprintf (logfile, PATH_MAX, "%s/%s-rebalance.log",
+ DEFAULT_LOG_FILE_DIRECTORY, volinfo->volname);
+ runinit (&runner);
+
+ if (priv->valgrind) {
+ snprintf (valgrind_logfile, PATH_MAX,
+ "%s/valgrind-%s-rebalance.log",
+ DEFAULT_LOG_FILE_DIRECTORY,
+ volinfo->volname);
+
+ runner_add_args (&runner, "valgrind", "--leak-check=full",
+ "--trace-children=yes", "--track-origins=yes",
+ NULL);
+ runner_argprintf (&runner, "--log-file=%s", valgrind_logfile);
+ }
+
+ runner_add_args (&runner, SBIN_DIR"/glusterfs",
+ "-s", "localhost", "--volfile-id", volinfo->volname,
+ "--xlator-option", "*dht.use-readdirp=yes",
+ "--xlator-option", "*dht.lookup-unhashed=yes",
+ "--xlator-option", "*dht.assert-no-child-down=yes",
+ "--xlator-option", "*replicate*.data-self-heal=off",
+ "--xlator-option",
+ "*replicate*.metadata-self-heal=off",
+ "--xlator-option", "*replicate*.entry-self-heal=off",
+ "--xlator-option", "*replicate*.readdir-failover=off",
+ "--xlator-option", "*dht.readdir-optimize=on",
+ NULL);
+ runner_add_arg (&runner, "--xlator-option");
+ runner_argprintf ( &runner, "*dht.rebalance-cmd=%d",cmd);
+ runner_add_arg (&runner, "--xlator-option");
+ runner_argprintf (&runner, "*dht.node-uuid=%s", uuid_utoa(MY_UUID));
+ runner_add_arg (&runner, "--socket-file");
+ runner_argprintf (&runner, "%s",sockfile);
+ runner_add_arg (&runner, "--pid-file");
+ runner_argprintf (&runner, "%s",pidfile);
+ runner_add_arg (&runner, "-l");
+ runner_argprintf (&runner, logfile);
+ if (volinfo->memory_accounting)
+ runner_add_arg (&runner, "--mem-accounting");
+
+ ret = runner_run_nowait (&runner);
+ if (ret) {
+ gf_log ("glusterd", GF_LOG_DEBUG, "rebalance command failed");
+ goto out;
+ }
+
+ sleep (5);
+
+ /* Setting frame-timeout to 10mins (600seconds).
+ * Unix domain sockets ensures that the connection is reliable. The
+ * default timeout of 30mins used for unreliable network connections is
+ * too long for unix domain socket connections.
+ */
+ ret = rpc_transport_unix_options_build (&options, sockfile, 600);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Unix options build failed");
+ goto out;
+ }
+
+ synclock_unlock (&priv->big_lock);
+ ret = glusterd_rpc_create (&defrag->rpc, options,
+ glusterd_defrag_notify, volinfo);
+ synclock_lock (&priv->big_lock);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "RPC create failed");
+ goto out;
+ }
+
+ if (cbk)
+ defrag->cbk_fn = cbk;
+
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+
+int
+glusterd_rebalance_rpc_create (glusterd_volinfo_t *volinfo,
+ glusterd_conf_t *priv, int cmd)
+{
+ dict_t *options = NULL;
+ char sockfile[PATH_MAX] = {0,};
+ int ret = -1;
+ glusterd_defrag_info_t *defrag = NULL;
+
+ if (!volinfo->rebal.defrag)
+ volinfo->rebal.defrag =
+ GF_CALLOC (1, sizeof (*volinfo->rebal.defrag),
+ gf_gld_mt_defrag_info);
+
+ if (!volinfo->rebal.defrag)
+ goto out;
+
+ defrag = volinfo->rebal.defrag;
+
+ defrag->cmd = cmd;
+
+ LOCK_INIT (&defrag->lock);
+
+ GLUSTERD_GET_DEFRAG_SOCK_FILE (sockfile, volinfo, priv);
+
+ /* Setting frame-timeout to 10mins (600seconds).
+ * Unix domain sockets ensures that the connection is reliable. The
+ * default timeout of 30mins used for unreliable network connections is
+ * too long for unix domain socket connections.
+ */
+ ret = rpc_transport_unix_options_build (&options, sockfile, 600);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Unix options build failed");
+ goto out;
+ }
+
+ synclock_unlock (&priv->big_lock);
+ ret = glusterd_rpc_create (&defrag->rpc, options,
+ glusterd_defrag_notify, volinfo);
+ synclock_lock (&priv->big_lock);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "RPC create failed");
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+glusterd_rebalance_cmd_validate (int cmd, char *volname,
+ glusterd_volinfo_t **volinfo,
+ char *op_errstr, size_t len)
+{
+ int ret = -1;
+
+ if (glusterd_volinfo_find(volname, volinfo)) {
+ gf_log ("glusterd", GF_LOG_ERROR, "Received rebalance on invalid"
+ " volname %s", volname);
+ snprintf (op_errstr, len, "Volume %s does not exist",
+ volname);
+ goto out;
+ }
+ if ((*volinfo)->brick_count <= (*volinfo)->dist_leaf_count) {
+ gf_log ("glusterd", GF_LOG_ERROR, "Volume %s is not a "
+ "distribute type or contains only 1 brick", volname);
+ snprintf (op_errstr, len, "Volume %s is not a distribute "
+ "volume or contains only 1 brick.\n"
+ "Not performing rebalance", volname);
+ goto out;
+ }
+
+ if ((*volinfo)->status != GLUSTERD_STATUS_STARTED) {
+ gf_log ("glusterd", GF_LOG_ERROR, "Received rebalance on stopped"
+ " volname %s", volname);
+ snprintf (op_errstr, len, "Volume %s needs to "
+ "be started to perform rebalance", volname);
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+__glusterd_handle_defrag_volume (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{0,}};
+ glusterd_conf_t *priv = NULL;
+ dict_t *dict = NULL;
+ char *volname = NULL;
+ gf_cli_defrag_type cmd = 0;
+ char msg[2048] = {0,};
+ xlator_t *this = NULL;
+
+ GF_ASSERT (req);
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf (msg, sizeof (msg), "Unable to decode the "
+ "command");
+ goto out;
+ }
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Failed to get volume name");
+ gf_log (this->name, GF_LOG_ERROR, "%s", msg);
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "rebalance-command", (int32_t*)&cmd);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Failed to get command");
+ gf_log (this->name, GF_LOG_ERROR, "%s", msg);
+ goto out;
+ }
+
+ ret = dict_set_static_bin (dict, "node-uuid", MY_UUID, 16);
+ if (ret)
+ goto out;
+
+ if ((cmd == GF_DEFRAG_CMD_STATUS) ||
+ (cmd == GF_DEFRAG_CMD_STOP)) {
+ ret = glusterd_op_begin (req, GD_OP_DEFRAG_BRICK_VOLUME,
+ dict, msg, sizeof (msg));
+ } else
+ ret = glusterd_op_begin (req, GD_OP_REBALANCE, dict,
+ msg, sizeof (msg));
+
+out:
+
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+
+ if (ret) {
+ if (msg[0] == '\0')
+ snprintf (msg, sizeof (msg), "Operation failed");
+ ret = glusterd_op_send_cli_response (GD_OP_REBALANCE, ret, 0,
+ req, dict, msg);
+
+ }
+
+ free (cli_req.dict.dict_val);//malloced by xdr
+
+ return 0;
+}
+
+int
+glusterd_handle_defrag_volume (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __glusterd_handle_defrag_volume);
+}
+
+
+int
+glusterd_op_stage_rebalance (dict_t *dict, char **op_errstr)
+{
+ char *volname = NULL;
+ int ret = 0;
+ int32_t cmd = 0;
+ char msg[2048] = {0};
+ glusterd_volinfo_t *volinfo = NULL;
+ char *task_id_str = NULL;
+ dict_t *op_ctx = NULL;
+ xlator_t *this = 0;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG, "volname not found");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "rebalance-command", &cmd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG, "cmd not found");
+ goto out;
+ }
+
+ ret = glusterd_rebalance_cmd_validate (cmd, volname, &volinfo,
+ msg, sizeof (msg));
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG, "failed to validate");
+ goto out;
+ }
+ switch (cmd) {
+ case GF_DEFRAG_CMD_START:
+ case GF_DEFRAG_CMD_START_LAYOUT_FIX:
+ case GF_DEFRAG_CMD_START_FORCE:
+ if (is_origin_glusterd (dict)) {
+ op_ctx = glusterd_op_get_ctx ();
+ if (!op_ctx) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to get op_ctx");
+ goto out;
+ }
+
+ ret = glusterd_generate_and_set_task_id
+ (op_ctx, GF_REBALANCE_TID_KEY);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to generate task-id");
+ goto out;
+ }
+ } else {
+ ret = dict_get_str (dict, GF_REBALANCE_TID_KEY,
+ &task_id_str);
+ if (ret) {
+ snprintf (msg, sizeof (msg),
+ "Missing rebalance-id");
+ gf_log (this->name, GF_LOG_WARNING, "%s", msg);
+ ret = 0;
+ }
+ }
+ ret = glusterd_defrag_start_validate (volinfo, msg,
+ sizeof (msg),
+ GD_OP_REBALANCE);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "start validate failed");
+ goto out;
+ }
+ break;
+ case GF_DEFRAG_CMD_STATUS:
+ case GF_DEFRAG_CMD_STOP:
+ break;
+ default:
+ break;
+ }
+
+ ret = 0;
+out:
+ if (ret && op_errstr && msg[0])
+ *op_errstr = gf_strdup (msg);
+
+ return ret;
+}
+
+
+int
+glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+{
+ char *volname = NULL;
+ int ret = 0;
+ int32_t cmd = 0;
+ char msg[2048] = {0};
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_brickinfo_t *tmp = NULL;
+ gf_boolean_t volfile_update = _gf_false;
+ char *task_id_str = NULL;
+ dict_t *ctx = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG, "volname not given");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "rebalance-command", &cmd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG, "command not given");
+ goto out;
+ }
+
+
+ ret = glusterd_rebalance_cmd_validate (cmd, volname, &volinfo,
+ msg, sizeof (msg));
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG, "cmd validate failed");
+ goto out;
+ }
+
+ /* Set task-id, if available, in op_ctx dict for operations other than
+ * start
+ */
+ if (cmd == GF_DEFRAG_CMD_STATUS || cmd == GF_DEFRAG_CMD_STOP) {
+ if (!uuid_is_null (volinfo->rebal.rebalance_id)) {
+ ctx = glusterd_op_get_ctx ();
+ if (!ctx) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to get op_ctx");
+ ret = -1;
+ goto out;
+ }
+
+ if (GD_OP_REMOVE_BRICK == volinfo->rebal.op)
+ ret = glusterd_copy_uuid_to_dict
+ (volinfo->rebal.rebalance_id, ctx,
+ GF_REMOVE_BRICK_TID_KEY);
+ else
+ ret = glusterd_copy_uuid_to_dict
+ (volinfo->rebal.rebalance_id, ctx,
+ GF_REBALANCE_TID_KEY);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set task-id");
+ goto out;
+ }
+ }
+ }
+
+ switch (cmd) {
+ case GF_DEFRAG_CMD_START:
+ case GF_DEFRAG_CMD_START_LAYOUT_FIX:
+ case GF_DEFRAG_CMD_START_FORCE:
+ ret = dict_get_str (dict, GF_REBALANCE_TID_KEY, &task_id_str);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG, "Missing rebalance "
+ "id");
+ ret = 0;
+ } else {
+ uuid_parse (task_id_str, volinfo->rebal.rebalance_id) ;
+ volinfo->rebal.op = GD_OP_REBALANCE;
+ }
+ ret = glusterd_handle_defrag_start (volinfo, msg, sizeof (msg),
+ cmd, NULL, GD_OP_REBALANCE);
+ break;
+ case GF_DEFRAG_CMD_STOP:
+ /* Clear task-id only on explicitly stopping rebalance.
+ * Also clear the stored operation, so it doesn't cause trouble
+ * with future rebalance/remove-brick starts
+ */
+ uuid_clear (volinfo->rebal.rebalance_id);
+ volinfo->rebal.op = GD_OP_NONE;
+
+ /* Fall back to the old volume file in case of decommission*/
+ list_for_each_entry_safe (brickinfo, tmp, &volinfo->bricks,
+ brick_list) {
+ if (!brickinfo->decommissioned)
+ continue;
+ brickinfo->decommissioned = 0;
+ volfile_update = _gf_true;
+ }
+
+ if (volfile_update == _gf_false) {
+ ret = 0;
+ break;
+ }
+
+ ret = glusterd_create_volfiles_and_notify_services (volinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to create volfiles");
+ goto out;
+ }
+
+ ret = glusterd_store_volinfo (volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to store volinfo");
+ goto out;
+ }
+
+ ret = 0;
+ break;
+
+ case GF_DEFRAG_CMD_STATUS:
+ break;
+ default:
+ break;
+ }
+
+out:
+ if (ret && op_errstr && msg[0])
+ *op_errstr = gf_strdup (msg);
+
+ return ret;
+}
+
+int32_t
+glusterd_defrag_event_notify_handle (dict_t *dict)
+{
+ glusterd_volinfo_t *volinfo = NULL;
+ char *volname = NULL;
+ int32_t ret = -1;
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Failed to get volname");
+ return ret;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Failed to get volinfo for %s"
+ , volname);
+ return ret;
+ }
+
+ ret = glusterd_defrag_volume_status_update (volinfo, dict);
+
+ if (ret)
+ gf_log ("", GF_LOG_ERROR, "Failed to update status");
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
new file mode 100644
index 000000000..54b830870
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
@@ -0,0 +1,2024 @@
+/*
+ Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "common-utils.h"
+#include "cli1-xdr.h"
+#include "xdr-generic.h"
+#include "glusterfs.h"
+#include "glusterd.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-store.h"
+#include "glusterd-utils.h"
+#include "glusterd-volgen.h"
+#include "run.h"
+#include "syscall.h"
+
+#include <signal.h>
+
+#define GLUSTERD_GET_RB_MNTPT(path, len, volinfo) \
+ snprintf (path, len, \
+ DEFAULT_VAR_RUN_DIRECTORY"/%s-"RB_CLIENT_MOUNTPOINT, \
+ volinfo->volname);
+
+extern uuid_t global_txn_id;
+
+int
+glusterd_get_replace_op_str (gf1_cli_replace_op op, char *op_str)
+{
+ int ret = -1;
+
+ if (!op_str)
+ goto out;
+
+ switch (op) {
+ case GF_REPLACE_OP_START:
+ strcpy (op_str, "start");
+ break;
+ case GF_REPLACE_OP_COMMIT:
+ strcpy (op_str, "commit");
+ break;
+ case GF_REPLACE_OP_PAUSE:
+ strcpy (op_str, "pause");
+ break;
+ case GF_REPLACE_OP_ABORT:
+ strcpy (op_str, "abort");
+ break;
+ case GF_REPLACE_OP_STATUS:
+ strcpy (op_str, "status");
+ break;
+ case GF_REPLACE_OP_COMMIT_FORCE:
+ strcpy (op_str, "commit-force");
+ break;
+ default:
+ strcpy (op_str, "unknown");
+ break;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+__glusterd_handle_replace_brick (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{0,}};
+ dict_t *dict = NULL;
+ char *src_brick = NULL;
+ char *dst_brick = NULL;
+ int32_t op = 0;
+ char operation[256];
+ glusterd_op_t cli_op = GD_OP_REPLACE_BRICK;
+ char *volname = NULL;
+ char msg[2048] = {0,};
+ xlator_t *this = NULL;
+
+ GF_ASSERT (req);
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_INFO, "Received replace brick req");
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf (msg, sizeof (msg), "Unable to decode the "
+ "command");
+ goto out;
+ }
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Could not get volume name");
+ gf_log (this->name, GF_LOG_ERROR, "%s", msg);
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "operation", &op);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "dict_get on operation failed");
+ snprintf (msg, sizeof (msg), "Could not get operation");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "src-brick", &src_brick);
+
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Failed to get src brick");
+ gf_log (this->name, GF_LOG_ERROR, "%s", msg);
+ goto out;
+ }
+ gf_log (this->name, GF_LOG_DEBUG,
+ "src brick=%s", src_brick);
+
+ ret = dict_get_str (dict, "dst-brick", &dst_brick);
+
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Failed to get dest brick");
+ gf_log (this->name, GF_LOG_ERROR, "%s", msg);
+ goto out;
+ }
+
+ (void) glusterd_get_replace_op_str (op, operation);
+ gf_log (this->name, GF_LOG_DEBUG, "dst brick=%s", dst_brick);
+ gf_log (this->name, GF_LOG_INFO, "Received replace brick %s request",
+ operation);
+
+ ret = glusterd_op_begin (req, GD_OP_REPLACE_BRICK, dict,
+ msg, sizeof (msg));
+
+out:
+ free (cli_req.dict.dict_val);//malloced by xdr
+
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+
+ if (ret) {
+ if (msg[0] == '\0')
+ snprintf (msg, sizeof (msg), "Operation failed");
+ ret = glusterd_op_send_cli_response (cli_op, ret, 0, req,
+ dict, msg);
+ }
+
+ return ret;
+}
+
+int
+glusterd_handle_replace_brick (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_handle_replace_brick);
+}
+
+static int
+glusterd_get_rb_dst_brickinfo (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t **brickinfo)
+{
+ int32_t ret = -1;
+
+ if (!volinfo || !brickinfo)
+ goto out;
+
+ *brickinfo = volinfo->rep_brick.dst_brick;
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int
+glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict)
+{
+ int ret = 0;
+ int32_t port = 0;
+ char *src_brick = NULL;
+ char *dst_brick = NULL;
+ char *volname = NULL;
+ int replace_op = 0;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *src_brickinfo = NULL;
+ char *host = NULL;
+ char *path = NULL;
+ char msg[2048] = {0};
+ char *dup_dstbrick = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_brickinfo_t *dst_brickinfo = NULL;
+ gf_boolean_t is_run = _gf_false;
+ dict_t *ctx = NULL;
+ glusterd_conf_t *priv = NULL;
+ char *savetok = NULL;
+ char pidfile[PATH_MAX] = {0};
+ char *task_id_str = NULL;
+ xlator_t *this = NULL;
+ gf_boolean_t is_force = _gf_false;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = dict_get_str (dict, "src-brick", &src_brick);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get src brick");
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG, "src brick=%s", src_brick);
+
+ ret = dict_get_str (dict, "dst-brick", &dst_brick);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get dest brick");
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG, "dst brick=%s", dst_brick);
+
+ ret = dict_get_str (dict, "volname", &volname);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "operation", (int32_t *)&replace_op);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "dict get on replace-brick operation failed");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "volume: %s does not exist",
+ volname);
+ *op_errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ if (GLUSTERD_STATUS_STARTED != volinfo->status) {
+ ret = -1;
+ snprintf (msg, sizeof (msg), "volume: %s is not started",
+ volname);
+ *op_errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ if (!glusterd_store_is_valid_brickpath (volname, dst_brick) ||
+ !glusterd_is_valid_volfpath (volname, dst_brick)) {
+ snprintf (msg, sizeof (msg), "brick path %s is too "
+ "long.", dst_brick);
+ gf_log (this->name, GF_LOG_ERROR, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_check_gsync_running (volinfo, &is_run);
+ if (ret && (is_run == _gf_false))
+ gf_log (this->name, GF_LOG_WARNING, "Unable to get the status"
+ " of active "GEOREP" session");
+ if (is_run) {
+ gf_log (this->name, GF_LOG_WARNING, GEOREP" sessions active"
+ "for the volume %s ", volname);
+ snprintf (msg, sizeof(msg), GEOREP" sessions are active "
+ "for the volume %s.\nStop "GEOREP " sessions "
+ "involved in this volume. Use 'volume "GEOREP
+ " status' command for more info.",
+ volname);
+ *op_errstr = gf_strdup (msg);
+ ret = -1;
+ goto out;
+ }
+
+ if (glusterd_is_defrag_on(volinfo)) {
+ snprintf (msg, sizeof(msg), "Volume name %s rebalance is in "
+ "progress. Please retry after completion", volname);
+ gf_log (this->name, GF_LOG_ERROR, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ ret = -1;
+ goto out;
+ }
+
+ ctx = glusterd_op_get_ctx();
+
+ switch (replace_op) {
+ case GF_REPLACE_OP_START:
+ if (glusterd_is_rb_started (volinfo)) {
+ snprintf (msg, sizeof (msg), "Replace brick is already "
+ "started for volume");
+ gf_log (this->name, GF_LOG_ERROR, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ ret = -1;
+ goto out;
+ }
+ if (is_origin_glusterd (dict)) {
+ if (!ctx) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to get op_ctx");
+ goto out;
+ }
+
+ ret = glusterd_generate_and_set_task_id
+ (ctx, GF_REPLACE_BRICK_TID_KEY);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to generate task-id");
+ goto out;
+ }
+
+ } else {
+ ret = dict_get_str (dict, GF_REPLACE_BRICK_TID_KEY,
+ &task_id_str);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Missing replace-brick-id");
+ ret = 0;
+ }
+ }
+ is_force = dict_get_str_boolean (dict, "force", _gf_false);
+
+ break;
+
+ case GF_REPLACE_OP_PAUSE:
+ if (glusterd_is_rb_paused (volinfo)) {
+ gf_log (this->name, GF_LOG_ERROR, "Replace brick is "
+ "already paused for volume ");
+ ret = -1;
+ goto out;
+ } else if (!glusterd_is_rb_started(volinfo)) {
+ gf_log (this->name, GF_LOG_ERROR, "Replace brick is not"
+ " started for volume ");
+ ret = -1;
+ goto out;
+ }
+ break;
+
+ case GF_REPLACE_OP_ABORT:
+ if (!glusterd_is_rb_ongoing (volinfo)) {
+ gf_log (this->name, GF_LOG_ERROR, "Replace brick is not"
+ " started or paused for volume ");
+ ret = -1;
+ goto out;
+ }
+ break;
+
+ case GF_REPLACE_OP_COMMIT:
+ if (!glusterd_is_rb_ongoing (volinfo)) {
+ gf_log (this->name, GF_LOG_ERROR, "Replace brick is not "
+ "started for volume ");
+ ret = -1;
+ goto out;
+ }
+ break;
+
+ case GF_REPLACE_OP_COMMIT_FORCE:
+ is_force = _gf_true;
+ break;
+
+ case GF_REPLACE_OP_STATUS:
+
+ if (glusterd_is_rb_ongoing (volinfo) == _gf_false) {
+ ret = gf_asprintf (op_errstr, "replace-brick not"
+ " started on volume %s",
+ volinfo->volname);
+ if (ret < 0) {
+ *op_errstr = NULL;
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_ERROR, "%s", *op_errstr);
+ ret = -1;
+ goto out;
+ }
+ break;
+
+ default:
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_volume_brickinfo_get_by_brick (src_brick, volinfo,
+ &src_brickinfo);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "brick: %s does not exist in "
+ "volume: %s", src_brick, volname);
+ *op_errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ if (ctx) {
+ if (!glusterd_is_fuse_available ()) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to open /dev/"
+ "fuse (%s), replace-brick command failed",
+ strerror (errno));
+ snprintf (msg, sizeof(msg), "Fuse unavailable\n "
+ "Replace-brick failed");
+ *op_errstr = gf_strdup (msg);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ if (gf_is_local_addr (src_brickinfo->hostname)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "I AM THE SOURCE HOST");
+ if (src_brickinfo->port && rsp_dict) {
+ ret = dict_set_int32 (rsp_dict, "src-brick-port",
+ src_brickinfo->port);
+ if (ret) {
+ gf_log ("", GF_LOG_DEBUG,
+ "Could not set src-brick-port=%d",
+ src_brickinfo->port);
+ }
+ }
+
+ GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, src_brickinfo,
+ priv);
+ if ((replace_op != GF_REPLACE_OP_COMMIT_FORCE) &&
+ !glusterd_is_service_running (pidfile, NULL)) {
+ snprintf(msg, sizeof(msg), "Source brick %s:%s "
+ "is not online.", src_brickinfo->hostname,
+ src_brickinfo->path);
+ *op_errstr = gf_strdup (msg);
+ ret = -1;
+ goto out;
+ }
+
+
+ }
+
+ dup_dstbrick = gf_strdup (dst_brick);
+ if (!dup_dstbrick) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR, "Memory allocation failed");
+ goto out;
+ }
+ host = strtok_r (dup_dstbrick, ":", &savetok);
+ path = strtok_r (NULL, ":", &savetok);
+
+ if (!host || !path) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "dst brick %s is not of form <HOSTNAME>:<export-dir>",
+ dst_brick);
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_brickinfo_new_from_brick (dst_brick, &dst_brickinfo);
+ if (ret)
+ goto out;
+
+ ret = glusterd_new_brick_validate (dst_brick, dst_brickinfo,
+ msg, sizeof (msg));
+ if (ret) {
+ *op_errstr = gf_strdup (msg);
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR, "%s", *op_errstr);
+ goto out;
+ }
+
+ if (!glusterd_is_rb_ongoing (volinfo) &&
+ (replace_op == GF_REPLACE_OP_START ||
+ replace_op == GF_REPLACE_OP_COMMIT_FORCE)) {
+
+ volinfo->rep_brick.src_brick = src_brickinfo;
+ volinfo->rep_brick.dst_brick = dst_brickinfo;
+ }
+
+ if (glusterd_rb_check_bricks (volinfo, src_brickinfo, dst_brickinfo)) {
+
+ ret = -1;
+ *op_errstr = gf_strdup ("Incorrect source or "
+ "destination brick");
+ if (*op_errstr)
+ gf_log (this->name, GF_LOG_ERROR, "%s", *op_errstr);
+ goto out;
+ }
+
+ if (!glusterd_is_rb_ongoing (volinfo) &&
+ gf_is_local_addr (host)) {
+ ret = glusterd_validate_and_create_brickpath (dst_brickinfo,
+ volinfo->volume_id,
+ op_errstr, is_force);
+ if (ret)
+ goto out;
+ }
+
+ if (!gf_is_local_addr (host)) {
+ ret = glusterd_friend_find (NULL, host, &peerinfo);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "%s, is not a friend",
+ host);
+ *op_errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ if (!peerinfo->connected) {
+ snprintf (msg, sizeof (msg), "%s, is not connected at "
+ "the moment", host);
+ *op_errstr = gf_strdup (msg);
+ ret = -1;
+ goto out;
+ }
+
+ if (GD_FRIEND_STATE_BEFRIENDED != peerinfo->state.state) {
+ snprintf (msg, sizeof (msg), "%s, is not befriended "
+ "at the moment", host);
+ *op_errstr = gf_strdup (msg);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ if (replace_op == GF_REPLACE_OP_START &&
+ gf_is_local_addr (volinfo->rep_brick.dst_brick->hostname)) {
+ port = pmap_registry_alloc (THIS);
+ if (!port) {
+ gf_log (THIS->name, GF_LOG_CRITICAL,
+ "No free ports available");
+ ret = -1;
+ goto out;
+ }
+
+ ctx = glusterd_op_get_ctx();
+ ret = dict_set_int32 ((ctx)?ctx:rsp_dict, "dst-brick-port",
+ port);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Failed to set dst "
+ "brick port");
+ goto out;
+ }
+ volinfo->rep_brick.dst_brick->port = port;
+ }
+
+ ret = 0;
+
+out:
+ GF_FREE (dup_dstbrick);
+ gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
+
+ return ret;
+}
+
+static int
+rb_set_mntfd (int mntfd)
+{
+ int ret = -1;
+ dict_t *ctx = NULL;
+
+ ctx = glusterd_op_get_ctx ();
+ if (!ctx) {
+ gf_log (THIS->name, GF_LOG_CRITICAL, "Failed to get op ctx");
+ goto out;
+ }
+ ret = dict_set_int32 (ctx, "mntfd", mntfd);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_DEBUG, "Failed to set mnt fd "
+ "in op ctx");
+out:
+ return ret;
+}
+
+static int
+rb_get_mntfd (int *mntfd)
+{
+ int ret = -1;
+ dict_t *ctx = NULL;
+
+ ctx = glusterd_op_get_ctx ();
+ if (!ctx) {
+ gf_log (THIS->name, GF_LOG_CRITICAL, "Failed to get op ctx");
+ goto out;
+ }
+ ret = dict_get_int32 (ctx, "mntfd", mntfd);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_DEBUG, "Failed to get mnt fd "
+ "from op ctx");
+out:
+ return ret;
+}
+
+static int
+rb_regenerate_volfiles (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo,
+ int32_t pump_needed)
+{
+ dict_t *dict = NULL;
+ int ret = 0;
+
+ dict = volinfo->dict;
+
+ gf_log ("", GF_LOG_DEBUG,
+ "attempting to set pump value=%d", pump_needed);
+
+ ret = dict_set_int32 (dict, "enable-pump", pump_needed);
+ if (ret) {
+ gf_log ("", GF_LOG_DEBUG,
+ "could not dict_set enable-pump");
+ goto out;
+ }
+
+ ret = glusterd_create_rb_volfiles (volinfo, brickinfo);
+
+ dict_del (dict, "enable-pump");
+
+out:
+ return ret;
+}
+
+static int
+rb_src_brick_restart (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *src_brickinfo,
+ int activate_pump)
+{
+ int ret = 0;
+
+ gf_log ("", GF_LOG_DEBUG,
+ "Attempting to kill src");
+
+ ret = glusterd_nfs_server_stop (volinfo);
+
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to stop nfs, ret: %d",
+ ret);
+ }
+
+ ret = glusterd_volume_stop_glusterfs (volinfo, src_brickinfo,
+ _gf_false);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to stop "
+ "glusterfs, ret: %d", ret);
+ goto out;
+ }
+
+ glusterd_delete_volfile (volinfo, src_brickinfo);
+
+ if (activate_pump) {
+ ret = rb_regenerate_volfiles (volinfo, src_brickinfo, 1);
+ if (ret) {
+ gf_log ("", GF_LOG_DEBUG,
+ "Could not regenerate volfiles with pump");
+ goto out;
+ }
+ } else {
+ ret = rb_regenerate_volfiles (volinfo, src_brickinfo, 0);
+ if (ret) {
+ gf_log ("", GF_LOG_DEBUG,
+ "Could not regenerate volfiles without pump");
+ goto out;
+ }
+
+ }
+
+ sleep (2);
+ ret = glusterd_volume_start_glusterfs (volinfo, src_brickinfo,
+ _gf_false);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to start "
+ "glusterfs, ret: %d", ret);
+ goto out;
+ }
+
+out:
+ ret = glusterd_nfs_server_start (volinfo);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to start nfs, ret: %d",
+ ret);
+ }
+ return ret;
+}
+
+static int
+rb_send_xattr_command (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *src_brickinfo,
+ glusterd_brickinfo_t *dst_brickinfo,
+ const char *xattr_key, const char *value)
+{
+ int ret = -1;
+ int mntfd = -1;
+
+ ret = rb_get_mntfd (&mntfd);
+ if (ret)
+ goto out;
+
+ ret = sys_fsetxattr (mntfd, xattr_key, value, strlen (value) + 1, 0);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_DEBUG, "setxattr on key: "
+ "%s, reason: %s", xattr_key, strerror (errno));
+
+out:
+ return ret;
+}
+
+static int
+rb_spawn_dst_brick (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo)
+{
+ glusterd_conf_t *priv = NULL;
+ runner_t runner = {0,};
+ int ret = -1;
+ int32_t port = 0;
+
+ priv = THIS->private;
+
+ port = brickinfo->port;
+ GF_ASSERT (port);
+
+ runinit (&runner);
+ runner_add_arg (&runner, SBIN_DIR"/glusterfs");
+ runner_argprintf (&runner, "-f" "%s/vols/%s/"RB_DSTBRICKVOL_FILENAME,
+ priv->workdir, volinfo->volname);
+ runner_argprintf (&runner, "-p" "%s/vols/%s/"RB_DSTBRICK_PIDFILE,
+ priv->workdir, volinfo->volname);
+ runner_add_arg (&runner, "--xlator-option");
+ runner_argprintf (&runner, "src-server.listen-port=%d", port);
+ if (volinfo->memory_accounting)
+ runner_add_arg (&runner, "--mem-accounting");
+
+ ret = runner_run_nowait (&runner);
+ if (ret) {
+ pmap_registry_remove (THIS, 0, brickinfo->path,
+ GF_PMAP_PORT_BRICKSERVER, NULL);
+ gf_log ("", GF_LOG_DEBUG,
+ "Could not start glusterfs");
+ goto out;
+ }
+
+ gf_log ("", GF_LOG_DEBUG,
+ "Successfully started glusterfs: brick=%s:%s",
+ brickinfo->hostname, brickinfo->path);
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+static int
+rb_spawn_glusterfs_client (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo)
+{
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ runner_t runner = {0,};
+ struct stat buf = {0,};
+ char mntpt[PATH_MAX] = {0,};
+ int mntfd = -1;
+ int ret = -1;
+
+ this = THIS;
+ priv = this->private;
+
+ GLUSTERD_GET_RB_MNTPT (mntpt, sizeof (mntpt), volinfo);
+ runinit (&runner);
+ runner_add_arg (&runner, SBIN_DIR"/glusterfs");
+ runner_argprintf (&runner, "-f" "%s/vols/%s/"RB_CLIENTVOL_FILENAME,
+ priv->workdir, volinfo->volname);
+ runner_add_arg (&runner, mntpt);
+ if (volinfo->memory_accounting)
+ runner_add_arg (&runner, "--mem-accounting");
+
+ ret = runner_run_reuse (&runner);
+ if (ret) {
+ runner_log (&runner, this->name, GF_LOG_DEBUG,
+ "Could not start glusterfs");
+ runner_end (&runner);
+ goto out;
+ } else {
+ runner_log (&runner, this->name, GF_LOG_DEBUG,
+ "Successfully started glusterfs");
+ runner_end (&runner);
+ }
+
+ ret = stat (mntpt, &buf);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG, "stat on mount point %s "
+ "failed", mntpt);
+ goto out;
+ }
+
+ mntfd = open (mntpt, O_DIRECTORY);
+ if (mntfd == -1)
+ goto out;
+
+ ret = rb_set_mntfd (mntfd);
+ if (ret)
+ goto out;
+
+ runinit (&runner);
+ runner_add_args (&runner, "/bin/umount", "-l", mntpt, NULL);
+ ret = runner_run_reuse (&runner);
+ if (ret) {
+ runner_log (&runner, this->name, GF_LOG_DEBUG,
+ "Lazy unmount failed on maintenance client");
+ runner_end (&runner);
+ goto out;
+ } else {
+ runner_log (&runner, this->name, GF_LOG_DEBUG,
+ "Successfully unmounted maintenance client");
+ runner_end (&runner);
+ }
+
+
+out:
+
+ return ret;
+}
+
+static const char *client_volfile_str = "volume mnt-client\n"
+ " type protocol/client\n"
+ " option remote-host %s\n"
+ " option remote-subvolume %s\n"
+ " option remote-port %d\n"
+ " option transport-type %s\n"
+ " option username %s\n"
+ " option password %s\n"
+ "end-volume\n"
+ "volume mnt-wb\n"
+ " type performance/write-behind\n"
+ " subvolumes mnt-client\n"
+ "end-volume\n";
+
+static int
+rb_generate_client_volfile (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *src_brickinfo)
+{
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ FILE *file = NULL;
+ char filename[PATH_MAX] = {0, };
+ int ret = -1;
+ int fd = -1;
+ char *ttype = NULL;
+
+ this = THIS;
+ priv = this->private;
+
+ gf_log (this->name, GF_LOG_DEBUG, "Creating volfile");
+
+ snprintf (filename, PATH_MAX, "%s/vols/%s/%s",
+ priv->workdir, volinfo->volname,
+ RB_CLIENTVOL_FILENAME);
+
+ fd = open (filename, O_CREAT | O_RDONLY, S_IRUSR | S_IWUSR);
+ if (fd < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s", strerror (errno));
+ goto out;
+ }
+ close (fd);
+
+ file = fopen (filename, "w+");
+ if (!file) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Open of volfile failed");
+ ret = -1;
+ goto out;
+ }
+
+ GF_ASSERT (src_brickinfo->port);
+
+ ttype = glusterd_get_trans_type_rb (volinfo->transport_type);
+ if (NULL == ttype){
+ ret = -1;
+ goto out;
+ }
+
+ fprintf (file, client_volfile_str, src_brickinfo->hostname,
+ src_brickinfo->path,
+ src_brickinfo->port, ttype,
+ glusterd_auth_get_username (volinfo),
+ glusterd_auth_get_password (volinfo));
+
+ fclose (file);
+ GF_FREE (ttype);
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+static const char *dst_brick_volfile_str = "volume src-posix\n"
+ " type storage/posix\n"
+ " option directory %s\n"
+ " option volume-id %s\n"
+ "end-volume\n"
+ "volume %s\n"
+ " type features/locks\n"
+ " subvolumes src-posix\n"
+ "end-volume\n"
+ "volume src-server\n"
+ " type protocol/server\n"
+ " option auth.login.%s.allow %s\n"
+ " option auth.login.%s.password %s\n"
+ " option auth.addr.%s.allow *\n"
+ " option transport-type %s\n"
+ " subvolumes %s\n"
+ "end-volume\n";
+
+static int
+rb_generate_dst_brick_volfile (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *dst_brickinfo)
+{
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ FILE *file = NULL;
+ char filename[PATH_MAX] = {0, };
+ int ret = -1;
+ int fd = -1;
+ char *trans_type = NULL;
+
+ this = THIS;
+ priv = this->private;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Creating volfile");
+
+ snprintf (filename, PATH_MAX, "%s/vols/%s/%s",
+ priv->workdir, volinfo->volname,
+ RB_DSTBRICKVOL_FILENAME);
+
+ fd = creat (filename, S_IRUSR | S_IWUSR);
+ if (fd < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s", strerror (errno));
+ goto out;
+ }
+ close (fd);
+
+ file = fopen (filename, "w+");
+ if (!file) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Open of volfile failed");
+ ret = -1;
+ goto out;
+ }
+
+ trans_type = glusterd_get_trans_type_rb (volinfo->transport_type);
+ if (NULL == trans_type){
+ ret = -1;
+ goto out;
+ }
+
+ fprintf (file, dst_brick_volfile_str,
+ dst_brickinfo->path,
+ uuid_utoa (volinfo->volume_id),
+ dst_brickinfo->path,
+ dst_brickinfo->path,
+ glusterd_auth_get_username (volinfo),
+ glusterd_auth_get_username (volinfo),
+ glusterd_auth_get_password (volinfo),
+ dst_brickinfo->path,
+ trans_type,
+ dst_brickinfo->path);
+
+ GF_FREE (trans_type);
+
+ fclose (file);
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+
+static int
+rb_mountpoint_mkdir (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *src_brickinfo)
+{
+ char mntpt[PATH_MAX] = {0,};
+ int ret = -1;
+
+ GLUSTERD_GET_RB_MNTPT (mntpt, sizeof (mntpt), volinfo);
+ ret = mkdir (mntpt, 0777);
+ if (ret && (errno != EEXIST)) {
+ gf_log ("", GF_LOG_DEBUG, "mkdir failed, due to %s",
+ strerror (errno));
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+static int
+rb_mountpoint_rmdir (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *src_brickinfo)
+{
+ char mntpt[PATH_MAX] = {0,};
+ int ret = -1;
+
+ GLUSTERD_GET_RB_MNTPT (mntpt, sizeof (mntpt), volinfo);
+ ret = rmdir (mntpt);
+ if (ret) {
+ gf_log ("", GF_LOG_DEBUG, "rmdir failed, reason: %s",
+ strerror (errno));
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+static int
+rb_destroy_maintenance_client (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *src_brickinfo)
+{
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ char volfile[PATH_MAX] = {0,};
+ int ret = -1;
+ int mntfd = -1;
+
+ this = THIS;
+ priv = this->private;
+
+ ret = rb_get_mntfd (&mntfd);
+ if (ret)
+ goto out;
+
+ ret = close (mntfd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG, "Failed to close mount "
+ "point directory");
+ goto out;
+ }
+
+ ret = rb_mountpoint_rmdir (volinfo, src_brickinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG, "rmdir of mountpoint "
+ "failed");
+ goto out;
+ }
+
+ snprintf (volfile, PATH_MAX, "%s/vols/%s/%s", priv->workdir,
+ volinfo->volname, RB_CLIENTVOL_FILENAME);
+
+ ret = unlink (volfile);
+ if (ret) {
+ gf_log ("", GF_LOG_DEBUG, "unlink of %s failed, reason: %s",
+ volfile, strerror (errno));
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+static int
+rb_spawn_maintenance_client (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *src_brickinfo)
+{
+ int ret = -1;
+
+ ret = rb_generate_client_volfile (volinfo, src_brickinfo);
+ if (ret) {
+ gf_log ("", GF_LOG_DEBUG, "Unable to generate client "
+ "volfile");
+ goto out;
+ }
+
+ ret = rb_mountpoint_mkdir (volinfo, src_brickinfo);
+ if (ret) {
+ gf_log ("", GF_LOG_DEBUG, "Unable to mkdir "
+ "mountpoint");
+ goto out;
+ }
+
+ ret = rb_spawn_glusterfs_client (volinfo, src_brickinfo);
+ if (ret) {
+ gf_log ("", GF_LOG_DEBUG, "Unable to start glusterfs");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+rb_spawn_destination_brick (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *dst_brickinfo)
+
+{
+ int ret = -1;
+
+ ret = rb_generate_dst_brick_volfile (volinfo, dst_brickinfo);
+ if (ret) {
+ gf_log ("", GF_LOG_DEBUG, "Unable to generate client "
+ "volfile");
+ goto out;
+ }
+
+ ret = rb_spawn_dst_brick (volinfo, dst_brickinfo);
+ if (ret) {
+ gf_log ("", GF_LOG_DEBUG, "Unable to start glusterfs");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+rb_kill_destination_brick (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *dst_brickinfo)
+{
+ glusterd_conf_t *priv = NULL;
+ char pidfile[PATH_MAX] = {0,};
+
+ priv = THIS->private;
+
+ snprintf (pidfile, PATH_MAX, "%s/vols/%s/%s",
+ priv->workdir, volinfo->volname,
+ RB_DSTBRICK_PIDFILE);
+
+ return glusterd_service_stop ("brick", pidfile, SIGTERM, _gf_true);
+}
+
+static int
+rb_get_xattr_command (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *src_brickinfo,
+ glusterd_brickinfo_t *dst_brickinfo,
+ const char *xattr_key,
+ char *value)
+{
+ int ret = -1;
+ int mntfd = -1;
+
+ ret = rb_get_mntfd (&mntfd);
+ if (ret)
+ goto out;
+
+ ret = sys_fgetxattr (mntfd, xattr_key, value, 8192);
+
+ if (ret < 0) {
+ gf_log (THIS->name, GF_LOG_DEBUG, "getxattr on key: %s "
+ "failed, reason: %s", xattr_key, strerror (errno));
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+rb_send_cmd (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *src,
+ glusterd_brickinfo_t *dst,
+ gf1_cli_replace_op op)
+{
+ char start_value[8192] = {0,};
+ char status_str[8192] = {0,};
+ char *status_reply = NULL;
+ char *tmp = NULL;
+ char *save_ptr = NULL;
+ char filename[PATH_MAX] = {0,};
+ char *current_file = NULL;
+ uint64_t files = 0;
+ int status = 0;
+ dict_t *ctx = NULL;
+ int ret = 0;
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (src);
+ GF_ASSERT (dst);
+ GF_ASSERT ((op > GF_REPLACE_OP_NONE)
+ && (op <= GF_REPLACE_OP_COMMIT_FORCE));
+
+ switch (op) {
+ case GF_REPLACE_OP_START:
+ {
+ snprintf (start_value, sizeof (start_value),
+ "%s:%s:%d", dst->hostname, dst->path,
+ dst->port);
+ ret = rb_send_xattr_command (volinfo, src, dst,
+ RB_PUMP_CMD_START,
+ start_value);
+ }
+ break;
+ case GF_REPLACE_OP_PAUSE:
+ {
+ ret = rb_send_xattr_command (volinfo, src, dst,
+ RB_PUMP_CMD_PAUSE,
+ RB_PUMP_DEF_ARG);
+ }
+ break;
+ case GF_REPLACE_OP_ABORT:
+ {
+ ret = rb_send_xattr_command (volinfo, src, dst,
+ RB_PUMP_CMD_ABORT,
+ RB_PUMP_DEF_ARG);
+ }
+ break;
+ case GF_REPLACE_OP_COMMIT:
+ {
+ ret = rb_send_xattr_command (volinfo, src, dst,
+ RB_PUMP_CMD_COMMIT,
+ RB_PUMP_DEF_ARG);
+ }
+ break;
+ case GF_REPLACE_OP_STATUS:
+ {
+ ret = rb_get_xattr_command (volinfo, src, dst,
+ RB_PUMP_CMD_STATUS,
+ status_str);
+ if (ret)
+ goto out;
+
+ ctx = glusterd_op_get_ctx ();
+ GF_ASSERT (ctx);
+ if (!ctx) {
+ ret = -1;
+ gf_log (THIS->name, GF_LOG_CRITICAL,
+ "ctx is not present.");
+ goto out;
+ }
+
+ /* Split status reply into different parts */
+ tmp = strtok_r (status_str, ":", &save_ptr);
+ if (!tmp) {
+ ret = -1;
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Couldn't tokenize status string");
+ goto out;
+ }
+ sscanf (tmp, "status=%d", &status);
+ ret = dict_set_int32 (ctx, "status", status);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Couldn't "
+ "set rb status in context");
+ goto out;
+ }
+
+ tmp = NULL;
+ tmp = strtok_r (NULL, ":", &save_ptr);
+ if (!tmp) {
+ ret = -1;
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Couldn't tokenize status string");
+ goto out;
+ }
+ sscanf (tmp, "no_of_files=%"SCNu64, &files);
+ ret = dict_set_uint64 (ctx, "files", files);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Couldn't "
+ "set rb files in context");
+ goto out;
+ }
+
+ if (status == 0) {
+ tmp = NULL;
+ tmp = strtok_r (NULL, ":", &save_ptr);
+ if (!tmp) {
+ ret = -1;
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Couldn't tokenize status "
+ "string");
+ goto out;
+ }
+ sscanf (tmp, "current_file=%s", filename);
+ current_file = gf_strdup (filename);
+ ret = dict_set_dynstr (ctx, "current_file",
+ current_file);
+ if (ret) {
+ GF_FREE (current_file);
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Couldn't set rb current file "
+ "in context");
+ goto out;
+ }
+ }
+ if (status) {
+ ret = gf_asprintf (&status_reply,
+ "Number of files migrated = %"
+ PRIu64"\tMigration complete",
+ files);
+ } else {
+ ret = gf_asprintf (&status_reply,
+ "Number of files migrated = %"
+ PRIu64"\tCurrent file = %s",
+ files, filename);
+ }
+ if (ret == -1) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Failed to create status_reply string");
+ goto out;
+ }
+ ret = dict_set_dynstr (ctx, "status-reply",
+ status_reply);
+ if (ret) {
+ GF_FREE (status_reply);
+ gf_log (THIS->name, GF_LOG_ERROR, "Couldn't "
+ "set rb status response in context.");
+ goto out;
+ }
+ }
+ break;
+ default:
+ {
+ GF_ASSERT (0);
+ ret = -1;
+ gf_log (THIS->name, GF_LOG_CRITICAL, "Invalid replace"
+ " brick subcommand.");
+ }
+ break;
+ }
+out:
+ return ret;
+}
+
+static int
+rb_do_operation (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *src_brickinfo,
+ glusterd_brickinfo_t *dst_brickinfo,
+ gf1_cli_replace_op op)
+{
+
+ int ret = -1;
+ char op_str[256] = {0, };
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ ret = rb_spawn_maintenance_client (volinfo, src_brickinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG, "Could not spawn "
+ "maintenance client");
+ goto umount;
+ }
+
+ ret = rb_send_cmd (volinfo, src_brickinfo, dst_brickinfo, op);
+ if (ret) {
+ (void) glusterd_get_replace_op_str (op, op_str);
+ gf_log (this->name, GF_LOG_DEBUG, "Sending replace-brick "
+ "sub-command %s failed.", op_str);
+ }
+
+umount:
+ if (rb_destroy_maintenance_client (volinfo, src_brickinfo))
+ gf_log (this->name, GF_LOG_DEBUG, "Failed to destroy "
+ "maintenance client");
+
+ return ret;
+}
+
+/* Set src-brick's port number to be used in the maintenance mount
+ * after all commit acks are received.
+ */
+static int
+rb_update_srcbrick_port (glusterd_brickinfo_t *src_brickinfo, dict_t *rsp_dict,
+ dict_t *req_dict, int32_t replace_op)
+{
+ xlator_t *this = NULL;
+ dict_t *ctx = NULL;
+ int ret = 0;
+ int dict_ret = 0;
+ int src_port = 0;
+
+ this = THIS;
+
+ dict_ret = dict_get_int32 (req_dict, "src-brick-port", &src_port);
+ if (src_port)
+ src_brickinfo->port = src_port;
+
+ if (gf_is_local_addr (src_brickinfo->hostname)) {
+ gf_log ("", GF_LOG_INFO,
+ "adding src-brick port no");
+
+ src_brickinfo->port = pmap_registry_search (this,
+ src_brickinfo->path, GF_PMAP_PORT_BRICKSERVER);
+ if (!src_brickinfo->port &&
+ replace_op != GF_REPLACE_OP_COMMIT_FORCE ) {
+ gf_log ("", GF_LOG_ERROR,
+ "Src brick port not available");
+ ret = -1;
+ goto out;
+ }
+
+ if (rsp_dict) {
+ ret = dict_set_int32 (rsp_dict, "src-brick-port", src_brickinfo->port);
+ if (ret) {
+ gf_log ("", GF_LOG_DEBUG,
+ "Could not set src-brick port no");
+ goto out;
+ }
+ }
+
+ ctx = glusterd_op_get_ctx ();
+ if (ctx) {
+ ret = dict_set_int32 (ctx, "src-brick-port", src_brickinfo->port);
+ if (ret) {
+ gf_log ("", GF_LOG_DEBUG,
+ "Could not set src-brick port no");
+ goto out;
+ }
+ }
+
+ }
+
+out:
+ return ret;
+
+}
+
+static int
+rb_update_dstbrick_port (glusterd_brickinfo_t *dst_brickinfo, dict_t *rsp_dict,
+ dict_t *req_dict, int32_t replace_op)
+{
+ dict_t *ctx = NULL;
+ int ret = 0;
+ int dict_ret = 0;
+ int dst_port = 0;
+
+ dict_ret = dict_get_int32 (req_dict, "dst-brick-port", &dst_port);
+ if (!dict_ret)
+ dst_brickinfo->port = dst_port;
+
+
+ if (gf_is_local_addr (dst_brickinfo->hostname)) {
+ gf_log ("", GF_LOG_INFO,
+ "adding dst-brick port no");
+
+ if (rsp_dict) {
+ ret = dict_set_int32 (rsp_dict, "dst-brick-port",
+ dst_brickinfo->port);
+ if (ret) {
+ gf_log ("", GF_LOG_DEBUG,
+ "Could not set dst-brick port no in rsp dict");
+ goto out;
+ }
+ }
+
+ ctx = glusterd_op_get_ctx ();
+ if (ctx) {
+ ret = dict_set_int32 (ctx, "dst-brick-port",
+ dst_brickinfo->port);
+ if (ret) {
+ gf_log ("", GF_LOG_DEBUG,
+ "Could not set dst-brick port no");
+ goto out;
+ }
+ }
+ }
+out:
+ return ret;
+}
+
+static int
+glusterd_op_perform_replace_brick (glusterd_volinfo_t *volinfo,
+ char *old_brick, char *new_brick)
+{
+ glusterd_brickinfo_t *old_brickinfo = NULL;
+ glusterd_brickinfo_t *new_brickinfo = NULL;
+ int32_t ret = -1;
+
+ GF_ASSERT (volinfo);
+
+ ret = glusterd_brickinfo_new_from_brick (new_brick,
+ &new_brickinfo);
+ if (ret)
+ goto out;
+
+ ret = glusterd_resolve_brick (new_brickinfo);
+
+ if (ret)
+ goto out;
+
+ ret = glusterd_volume_brickinfo_get_by_brick (old_brick,
+ volinfo, &old_brickinfo);
+ if (ret)
+ goto out;
+
+ list_add_tail (&new_brickinfo->brick_list,
+ &old_brickinfo->brick_list);
+
+ volinfo->brick_count++;
+
+ ret = glusterd_op_perform_remove_brick (volinfo, old_brick, 1, NULL);
+ if (ret)
+ goto out;
+
+ ret = glusterd_create_volfiles_and_notify_services (volinfo);
+ if (ret)
+ goto out;
+
+ if (GLUSTERD_STATUS_STARTED == volinfo->status) {
+ ret = glusterd_brick_start (volinfo, new_brickinfo, _gf_false);
+ if (ret)
+ goto out;
+ }
+
+out:
+
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict)
+{
+ int ret = 0;
+ dict_t *ctx = NULL;
+ int replace_op = 0;
+ glusterd_volinfo_t *volinfo = NULL;
+ char *volname = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ char *src_brick = NULL;
+ char *dst_brick = NULL;
+ glusterd_brickinfo_t *src_brickinfo = NULL;
+ glusterd_brickinfo_t *dst_brickinfo = NULL;
+ char *task_id_str = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = dict_get_str (dict, "src-brick", &src_brick);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get src brick");
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG, "src brick=%s", src_brick);
+
+ ret = dict_get_str (dict, "dst-brick", &dst_brick);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get dst brick");
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG, "dst brick=%s", dst_brick);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "operation", (int32_t *)&replace_op);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "dict_get on operation failed");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to allocate memory");
+ goto out;
+ }
+
+ ret = glusterd_volume_brickinfo_get_by_brick (src_brick, volinfo,
+ &src_brickinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Unable to get src-brickinfo");
+ goto out;
+ }
+
+
+ ret = glusterd_get_rb_dst_brickinfo (volinfo, &dst_brickinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get "
+ "replace brick destination brickinfo");
+ goto out;
+ }
+
+ ret = glusterd_resolve_brick (dst_brickinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Unable to resolve dst-brickinfo");
+ goto out;
+ }
+
+ ret = rb_update_srcbrick_port (src_brickinfo, rsp_dict,
+ dict, replace_op);
+ if (ret)
+ goto out;
+
+
+ if ((GF_REPLACE_OP_START != replace_op)) {
+
+ /* Set task-id, if available, in op_ctx dict for operations
+ * other than start
+ */
+ if (is_origin_glusterd (dict)) {
+ ctx = glusterd_op_get_ctx();
+ if (!ctx) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to "
+ "get op_ctx");
+ ret = -1;
+ goto out;
+ }
+ if (!uuid_is_null (volinfo->rep_brick.rb_id)) {
+ ret = glusterd_copy_uuid_to_dict
+ (volinfo->rep_brick.rb_id, ctx,
+ GF_REPLACE_BRICK_TID_KEY);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set "
+ "replace-brick-id");
+ goto out;
+ }
+ }
+ }
+ }
+ ret = rb_update_dstbrick_port (dst_brickinfo, rsp_dict,
+ dict, replace_op);
+ if (ret)
+ goto out;
+
+ switch (replace_op) {
+ case GF_REPLACE_OP_START:
+ {
+ ret = dict_get_str (dict, GF_REPLACE_BRICK_TID_KEY, &task_id_str);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Missing replace-brick-id");
+ ret = 0;
+ } else {
+ uuid_parse (task_id_str, volinfo->rep_brick.rb_id);
+ }
+
+ if (gf_is_local_addr (dst_brickinfo->hostname)) {
+ gf_log (this->name, GF_LOG_INFO,
+ "I AM THE DESTINATION HOST");
+ if (!glusterd_is_rb_paused (volinfo)) {
+ ret = rb_spawn_destination_brick
+ (volinfo, dst_brickinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Failed to spawn destination "
+ "brick");
+ goto out;
+ }
+ } else {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Replace brick is already started=> no "
+ "need to restart dst brick ");
+ }
+ }
+
+
+ if (gf_is_local_addr (src_brickinfo->hostname)) {
+ ret = rb_src_brick_restart (volinfo, src_brickinfo,
+ 1);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Could not restart src-brick");
+ goto out;
+ }
+ }
+
+ if (gf_is_local_addr (dst_brickinfo->hostname)) {
+ gf_log (this->name, GF_LOG_INFO,
+ "adding dst-brick port no");
+
+ ret = rb_update_dstbrick_port (dst_brickinfo, rsp_dict,
+ dict, replace_op);
+ if (ret)
+ goto out;
+ }
+
+ glusterd_set_rb_status (volinfo, GF_RB_STATUS_STARTED);
+ break;
+ }
+
+ case GF_REPLACE_OP_COMMIT:
+ {
+ ctx = glusterd_op_get_ctx ();
+ if (ctx) {
+ ret = rb_do_operation (volinfo, src_brickinfo,
+ dst_brickinfo,
+ GF_REPLACE_OP_COMMIT);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Commit operation failed");
+ goto out;
+ }
+ }
+ }
+ /* fall through */
+ case GF_REPLACE_OP_COMMIT_FORCE:
+ {
+ if (gf_is_local_addr (dst_brickinfo->hostname)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "I AM THE DESTINATION HOST");
+ ret = rb_kill_destination_brick (volinfo,
+ dst_brickinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Unable to cleanup dst brick");
+ goto out;
+ }
+ }
+
+ ret = glusterd_nodesvcs_stop (volinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to stop nfs server, ret: %d", ret);
+ }
+
+ ret = glusterd_op_perform_replace_brick (volinfo, src_brick,
+ dst_brick);
+ if (ret) {
+ gf_log (this->name, GF_LOG_CRITICAL, "Unable to add "
+ "dst-brick: %s to volume: %s", dst_brick,
+ volinfo->volname);
+ (void) glusterd_nodesvcs_handle_graph_change (volinfo);
+ goto out;
+ }
+
+ volinfo->rebal.defrag_status = 0;
+
+ ret = glusterd_nodesvcs_handle_graph_change (volinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Failed to generate nfs volume file");
+ }
+
+
+ ret = glusterd_fetchspec_notify (THIS);
+ glusterd_set_rb_status (volinfo, GF_RB_STATUS_NONE);
+ glusterd_brickinfo_delete (volinfo->rep_brick.dst_brick);
+ volinfo->rep_brick.src_brick = NULL;
+ volinfo->rep_brick.dst_brick = NULL;
+ uuid_clear (volinfo->rep_brick.rb_id);
+ }
+ break;
+
+ case GF_REPLACE_OP_PAUSE:
+ {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Received pause - doing nothing");
+ ctx = glusterd_op_get_ctx ();
+ if (ctx) {
+ ret = rb_do_operation (volinfo, src_brickinfo,
+ dst_brickinfo,
+ GF_REPLACE_OP_PAUSE);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Pause operation failed");
+ goto out;
+ }
+ }
+
+ glusterd_set_rb_status (volinfo, GF_RB_STATUS_PAUSED);
+ }
+ break;
+
+ case GF_REPLACE_OP_ABORT:
+ {
+
+ ctx = glusterd_op_get_ctx ();
+ if (ctx) {
+ ret = rb_do_operation (volinfo, src_brickinfo,
+ dst_brickinfo,
+ GF_REPLACE_OP_ABORT);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Abort operation failed");
+ goto out;
+ }
+ }
+
+ if (gf_is_local_addr (src_brickinfo->hostname)) {
+ ret = rb_src_brick_restart (volinfo, src_brickinfo,
+ 0);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Couldn't restart src brick "
+ "with pump xlator disabled.");
+ goto out;
+ }
+ }
+
+ if (gf_is_local_addr (dst_brickinfo->hostname)) {
+ gf_log (this->name, GF_LOG_INFO,
+ "I AM THE DESTINATION HOST");
+ ret = rb_kill_destination_brick (volinfo, dst_brickinfo);
+ if (ret) {
+ gf_log ("", GF_LOG_DEBUG,
+ "Failed to kill destination brick");
+ goto out;
+ }
+ }
+ glusterd_set_rb_status (volinfo, GF_RB_STATUS_NONE);
+ glusterd_brickinfo_delete (volinfo->rep_brick.dst_brick);
+ volinfo->rep_brick.src_brick = NULL;
+ volinfo->rep_brick.dst_brick = NULL;
+ }
+ break;
+
+ case GF_REPLACE_OP_STATUS:
+ {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "received status - doing nothing");
+ ctx = glusterd_op_get_ctx ();
+ if (ctx) {
+ if (glusterd_is_rb_paused (volinfo)) {
+ ret = dict_set_str (ctx, "status-reply",
+ "replace brick has been paused");
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to set pump status"
+ " in ctx");
+ goto out;
+ }
+
+ ret = rb_do_operation (volinfo, src_brickinfo,
+ dst_brickinfo,
+ GF_REPLACE_OP_STATUS);
+ if (ret)
+ goto out;
+ }
+
+ }
+ break;
+
+ default:
+ ret = -1;
+ goto out;
+ }
+ if (!ret && replace_op != GF_REPLACE_OP_STATUS)
+ ret = glusterd_store_volinfo (volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "Couldn't store"
+ " replace brick operation's state");
+
+out:
+ return ret;
+}
+
+void
+glusterd_do_replace_brick (void *data)
+{
+ glusterd_volinfo_t *volinfo = NULL;
+ int32_t op = 0;
+ int32_t src_port = 0;
+ int32_t dst_port = 0;
+ dict_t *dict = NULL;
+ char *src_brick = NULL;
+ char *dst_brick = NULL;
+ char *volname = NULL;
+ glusterd_brickinfo_t *src_brickinfo = NULL;
+ glusterd_brickinfo_t *dst_brickinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ uuid_t *txn_id = &global_txn_id;
+
+ int ret = 0;
+
+ dict = data;
+
+ GF_ASSERT (THIS);
+
+ priv = THIS->private;
+
+ if (priv->timer) {
+ gf_timer_call_cancel (THIS->ctx, priv->timer);
+ priv->timer = NULL;
+ gf_log ("", GF_LOG_DEBUG,
+ "Cancelling timer thread");
+ }
+
+ gf_log ("", GF_LOG_DEBUG,
+ "Replace brick operation detected");
+
+ ret = dict_get_bin (dict, "transaction_id", (void **)&txn_id);
+
+ gf_log ("", GF_LOG_DEBUG, "transaction ID = %s", uuid_utoa (*txn_id));
+
+ ret = dict_get_int32 (dict, "operation", &op);
+ if (ret) {
+ gf_log ("", GF_LOG_DEBUG,
+ "dict_get on operation failed");
+ goto out;
+ }
+ ret = dict_get_str (dict, "src-brick", &src_brick);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get src brick");
+ goto out;
+ }
+
+ gf_log ("", GF_LOG_DEBUG,
+ "src brick=%s", src_brick);
+
+ ret = dict_get_str (dict, "dst-brick", &dst_brick);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get dst brick");
+ goto out;
+ }
+
+ gf_log ("", GF_LOG_DEBUG,
+ "dst brick=%s", dst_brick);
+
+ ret = dict_get_str (dict, "volname", &volname);
+
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to allocate memory");
+ goto out;
+ }
+
+ ret = glusterd_volume_brickinfo_get_by_brick (src_brick, volinfo,
+ &src_brickinfo);
+ if (ret) {
+ gf_log ("", GF_LOG_DEBUG, "Unable to get src-brickinfo");
+ goto out;
+ }
+
+ ret = glusterd_get_rb_dst_brickinfo (volinfo, &dst_brickinfo);
+ if (!dst_brickinfo) {
+ gf_log ("", GF_LOG_DEBUG, "Unable to get dst-brickinfo");
+ goto out;
+ }
+
+ ret = glusterd_resolve_brick (dst_brickinfo);
+ if (ret) {
+ gf_log ("", GF_LOG_DEBUG, "Unable to resolve dst-brickinfo");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "src-brick-port", &src_port);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get src-brick port");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "dst-brick-port", &dst_port);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get dst-brick port");
+ }
+
+ dst_brickinfo->port = dst_port;
+ src_brickinfo->port = src_port;
+
+ switch (op) {
+ case GF_REPLACE_OP_START:
+ if (!dst_port) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = rb_do_operation (volinfo, src_brickinfo, dst_brickinfo,
+ GF_REPLACE_OP_START);
+ if (ret)
+ goto out;
+ break;
+ case GF_REPLACE_OP_PAUSE:
+ case GF_REPLACE_OP_ABORT:
+ case GF_REPLACE_OP_COMMIT:
+ case GF_REPLACE_OP_COMMIT_FORCE:
+ case GF_REPLACE_OP_STATUS:
+ break;
+ default:
+ ret = -1;
+ goto out;
+ }
+
+out:
+ if (ret)
+ ret = glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT,
+ txn_id, NULL);
+ else
+ ret = glusterd_op_sm_inject_event (GD_OP_EVENT_COMMIT_ACC,
+ txn_id, NULL);
+
+ glusterd_op_sm ();
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c
new file mode 100644
index 000000000..d5200a4ae
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c
@@ -0,0 +1,1974 @@
+/*
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "rpc-clnt.h"
+#include "glusterd1-xdr.h"
+#include "cli1-xdr.h"
+
+#include "xdr-generic.h"
+
+#include "compat-errno.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-sm.h"
+#include "glusterd.h"
+#include "protocol-common.h"
+#include "glusterd-utils.h"
+#include "common-utils.h"
+#include <sys/uio.h>
+
+
+#define SERVER_PATH_MAX (16 * 1024)
+
+
+extern glusterd_op_info_t opinfo;
+extern uuid_t global_txn_id;
+
+int32_t
+glusterd_op_send_cli_response (glusterd_op_t op, int32_t op_ret,
+ int32_t op_errno, rpcsvc_request_t *req,
+ void *op_ctx, char *op_errstr)
+{
+ int32_t ret = -1;
+ void *cli_rsp = NULL;
+ dict_t *ctx = NULL;
+ char *free_ptr = NULL;
+ glusterd_conf_t *conf = NULL;
+ xdrproc_t xdrproc = NULL;
+ char *errstr = NULL;
+ int32_t status = 0;
+ int32_t count = 0;
+ gf_cli_rsp rsp = {0,};
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+
+ GF_ASSERT (conf);
+
+ ctx = op_ctx;
+
+ switch (op) {
+ case GD_OP_REMOVE_BRICK:
+ {
+ if (ctx)
+ ret = dict_get_str (ctx, "errstr", &errstr);
+ break;
+ }
+ case GD_OP_RESET_VOLUME:
+ {
+ if (op_ret && !op_errstr)
+ errstr = "Error while resetting options";
+ break;
+ }
+ case GD_OP_REBALANCE:
+ case GD_OP_DEFRAG_BRICK_VOLUME:
+ {
+ if (ctx) {
+ ret = dict_get_int32 (ctx, "status", &status);
+ if (ret) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "failed to get status");
+ }
+ }
+ break;
+ }
+ case GD_OP_GSYNC_CREATE:
+ case GD_OP_GSYNC_SET:
+ {
+ if (ctx) {
+ ret = dict_get_str (ctx, "errstr", &errstr);
+ ret = dict_set_str (ctx, "glusterd_workdir", conf->workdir);
+ /* swallow error here, that will be re-triggered in cli */
+
+ }
+ break;
+
+ }
+ case GD_OP_QUOTA:
+ {
+ if (ctx && !op_errstr) {
+ ret = dict_get_str (ctx, "errstr", &errstr);
+ }
+ break;
+ }
+ case GD_OP_PROFILE_VOLUME:
+ {
+ if (ctx && dict_get_int32 (ctx, "count", &count)) {
+ ret = dict_set_int32 (ctx, "count", 0);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to set count in dictionary");
+ }
+ }
+ break;
+ }
+ case GD_OP_START_BRICK:
+ case GD_OP_STOP_BRICK:
+ {
+ gf_log (this->name, GF_LOG_DEBUG, "op '%s' not supported",
+ gd_op_list[op]);
+ break;
+ }
+ case GD_OP_NONE:
+ case GD_OP_MAX:
+ {
+ gf_log (this->name, GF_LOG_ERROR, "invalid operation");
+ break;
+ }
+ case GD_OP_CREATE_VOLUME:
+ case GD_OP_START_VOLUME:
+ case GD_OP_STOP_VOLUME:
+ case GD_OP_DELETE_VOLUME:
+ case GD_OP_DEFRAG_VOLUME:
+ case GD_OP_ADD_BRICK:
+ case GD_OP_LOG_ROTATE:
+ case GD_OP_SYNC_VOLUME:
+ case GD_OP_STATEDUMP_VOLUME:
+ case GD_OP_REPLACE_BRICK:
+ case GD_OP_STATUS_VOLUME:
+ case GD_OP_SET_VOLUME:
+ case GD_OP_LIST_VOLUME:
+ case GD_OP_CLEARLOCKS_VOLUME:
+ case GD_OP_HEAL_VOLUME:
+ case GD_OP_SNAP:
+ {
+ /*nothing specific to be done*/
+ break;
+ }
+ case GD_OP_COPY_FILE:
+ {
+ if (ctx)
+ ret = dict_get_str (ctx, "errstr", &errstr);
+ break;
+ }
+ case GD_OP_SYS_EXEC:
+ {
+ if (ctx) {
+ ret = dict_get_str (ctx, "errstr", &errstr);
+ ret = dict_set_str (ctx, "glusterd_workdir",
+ conf->workdir);
+ }
+ break;
+ }
+ }
+
+ rsp.op_ret = op_ret;
+ rsp.op_errno = errno;
+ if (errstr)
+ rsp.op_errstr = errstr;
+ else if (op_errstr)
+ rsp.op_errstr = op_errstr;
+
+ if (!rsp.op_errstr)
+ rsp.op_errstr = "";
+
+ if (ctx) {
+ ret = dict_allocate_and_serialize (ctx, &rsp.dict.dict_val,
+ &rsp.dict.dict_len);
+ if (ret < 0 )
+ gf_log (this->name, GF_LOG_ERROR, "failed to "
+ "serialize buffer");
+ else
+ free_ptr = rsp.dict.dict_val;
+ }
+
+ /* needed by 'rebalance status' */
+ if (status)
+ rsp.op_errno = status;
+
+ cli_rsp = &rsp;
+ xdrproc = (xdrproc_t) xdr_gf_cli_rsp;
+
+ glusterd_to_cli (req, cli_rsp, NULL, 0, NULL,
+ xdrproc, ctx);
+ ret = 0;
+
+ GF_FREE (free_ptr);
+ gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_big_locked_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe, fop_cbk_fn_t fn)
+{
+ glusterd_conf_t *priv = THIS->private;
+ int ret = -1;
+
+ synclock_lock (&priv->big_lock);
+ ret = fn (req, iov, count, myframe);
+ synclock_unlock (&priv->big_lock);
+
+ return ret;
+}
+
+int
+__glusterd_probe_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gd1_mgmt_probe_rsp rsp = {{0},};
+ int ret = 0;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_friend_sm_event_t *event = NULL;
+ glusterd_probe_ctx_t *ctx = NULL;
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_probe_rsp);
+ if (ret < 0) {
+ gf_log ("", GF_LOG_ERROR, "error");
+ //rsp.op_ret = -1;
+ //rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ gf_log ("glusterd", GF_LOG_INFO,
+ "Received probe resp from uuid: %s, host: %s",
+ uuid_utoa (rsp.uuid), rsp.hostname);
+ if (rsp.op_ret != 0) {
+ ctx = ((call_frame_t *)myframe)->local;
+ ((call_frame_t *)myframe)->local = NULL;
+
+ GF_ASSERT (ctx);
+
+ if (ctx->req) {
+ glusterd_xfer_cli_probe_resp (ctx->req, rsp.op_ret,
+ rsp.op_errno,
+ rsp.op_errstr,
+ ctx->hostname, ctx->port,
+ ctx->dict);
+ }
+
+ glusterd_destroy_probe_ctx (ctx);
+ (void) glusterd_friend_remove (rsp.uuid, rsp.hostname);
+ ret = rsp.op_ret;
+ goto out;
+ }
+ ret = glusterd_friend_find (rsp.uuid, rsp.hostname, &peerinfo);
+ if (ret) {
+ GF_ASSERT (0);
+ }
+
+ if (strncasecmp (rsp.hostname, peerinfo->hostname, 1024)) {
+ gf_log (THIS->name, GF_LOG_INFO, "Host: %s with uuid: %s "
+ "already present in cluster with alias hostname: %s",
+ rsp.hostname, uuid_utoa (rsp.uuid), peerinfo->hostname);
+
+ ctx = ((call_frame_t *)myframe)->local;
+ ((call_frame_t *)myframe)->local = NULL;
+
+ GF_ASSERT (ctx);
+
+ rsp.op_errno = GF_PROBE_FRIEND;
+ if (ctx->req) {
+ glusterd_xfer_cli_probe_resp (ctx->req, rsp.op_ret,
+ rsp.op_errno,
+ rsp.op_errstr,
+ ctx->hostname, ctx->port,
+ ctx->dict);
+ }
+
+ glusterd_destroy_probe_ctx (ctx);
+ (void) glusterd_friend_remove (NULL, rsp.hostname);
+ ret = rsp.op_ret;
+ goto out;
+ }
+
+ uuid_copy (peerinfo->uuid, rsp.uuid);
+
+ ret = glusterd_friend_sm_new_event
+ (GD_FRIEND_EVENT_INIT_FRIEND_REQ, &event);
+
+ if (ret) {
+ gf_log ("glusterd", GF_LOG_ERROR,
+ "Unable to get event");
+ goto out;
+ }
+
+ event->peerinfo = peerinfo;
+ event->ctx = ((call_frame_t *)myframe)->local;
+ ((call_frame_t *)myframe)->local = NULL;
+ ret = glusterd_friend_sm_inject_event (event);
+
+
+ if (!ret) {
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+ }
+
+ gf_log ("glusterd", GF_LOG_INFO, "Received resp to probe req");
+
+out:
+ free (rsp.hostname);//malloced by xdr
+ GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe));
+ return ret;
+}
+
+int
+glusterd_probe_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ __glusterd_probe_cbk);
+}
+
+
+int
+__glusterd_friend_add_cbk (struct rpc_req * req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gd1_mgmt_friend_rsp rsp = {{0},};
+ int ret = -1;
+ glusterd_friend_sm_event_t *event = NULL;
+ glusterd_friend_sm_event_type_t event_type = GD_FRIEND_EVENT_NONE;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = -1;
+ glusterd_probe_ctx_t *ctx = NULL;
+ glusterd_friend_update_ctx_t *ev_ctx = NULL;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_friend_rsp);
+ if (ret < 0) {
+ gf_log ("", GF_LOG_ERROR, "error");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+
+ gf_log ("glusterd", GF_LOG_INFO,
+ "Received %s from uuid: %s, host: %s, port: %d",
+ (op_ret)?"RJT":"ACC", uuid_utoa (rsp.uuid), rsp.hostname, rsp.port);
+
+ ret = glusterd_friend_find (rsp.uuid, rsp.hostname, &peerinfo);
+
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "received friend add response from"
+ " unknown peer uuid: %s", uuid_utoa (rsp.uuid));
+ goto out;
+ }
+
+ if (op_ret)
+ event_type = GD_FRIEND_EVENT_RCVD_RJT;
+ else
+ event_type = GD_FRIEND_EVENT_RCVD_ACC;
+
+ ret = glusterd_friend_sm_new_event (event_type, &event);
+
+ if (ret) {
+ gf_log ("glusterd", GF_LOG_ERROR,
+ "Unable to get event");
+ goto out;
+ }
+ event->peerinfo = peerinfo;
+ ev_ctx = GF_CALLOC (1, sizeof (*ev_ctx),
+ gf_gld_mt_friend_update_ctx_t);
+ if (!ev_ctx) {
+ ret = -1;
+ goto out;
+ }
+
+ uuid_copy (ev_ctx->uuid, rsp.uuid);
+ ev_ctx->hostname = gf_strdup (rsp.hostname);
+
+ event->ctx = ev_ctx;
+ ret = glusterd_friend_sm_inject_event (event);
+
+ if (ret)
+ goto out;
+
+out:
+ ctx = ((call_frame_t *)myframe)->local;
+ ((call_frame_t *)myframe)->local = NULL;
+
+ GF_ASSERT (ctx);
+
+ if (ctx->req)//reverse probe doesn't have req
+ ret = glusterd_xfer_cli_probe_resp (ctx->req, op_ret, op_errno,
+ NULL, ctx->hostname,
+ ctx->port, ctx->dict);
+ if (!ret) {
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+ }
+ if (ctx)
+ glusterd_destroy_probe_ctx (ctx);
+ free (rsp.hostname);//malloced by xdr
+ GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe));
+ return ret;
+}
+
+int
+glusterd_friend_add_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ __glusterd_friend_add_cbk);
+}
+
+int
+__glusterd_friend_remove_cbk (struct rpc_req * req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gd1_mgmt_friend_rsp rsp = {{0},};
+ glusterd_conf_t *conf = NULL;
+ int ret = -1;
+ glusterd_friend_sm_event_t *event = NULL;
+ glusterd_friend_sm_event_type_t event_type = GD_FRIEND_EVENT_NONE;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = -1;
+ glusterd_probe_ctx_t *ctx = NULL;
+ gf_boolean_t move_sm_now = _gf_true;
+
+ conf = THIS->private;
+ GF_ASSERT (conf);
+
+ ctx = ((call_frame_t *)myframe)->local;
+ ((call_frame_t *)myframe)->local = NULL;
+ GF_ASSERT (ctx);
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ move_sm_now = _gf_false;
+ goto inject;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_friend_rsp);
+ if (ret < 0) {
+ gf_log ("", GF_LOG_ERROR, "error");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto respond;
+ }
+
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+
+ gf_log ("glusterd", GF_LOG_INFO,
+ "Received %s from uuid: %s, host: %s, port: %d",
+ (op_ret)?"RJT":"ACC", uuid_utoa (rsp.uuid), rsp.hostname, rsp.port);
+
+inject:
+ ret = glusterd_friend_find (rsp.uuid, ctx->hostname, &peerinfo);
+
+ if (ret) {
+ //can happen as part of rpc clnt connection cleanup
+ //when the frame timeout happens after 30 minutes
+ goto respond;
+ }
+
+ event_type = GD_FRIEND_EVENT_REMOVE_FRIEND;
+
+ ret = glusterd_friend_sm_new_event (event_type, &event);
+
+ if (ret) {
+ gf_log ("glusterd", GF_LOG_ERROR,
+ "Unable to get event");
+ goto respond;
+ }
+ event->peerinfo = peerinfo;
+
+ ret = glusterd_friend_sm_inject_event (event);
+
+ if (ret)
+ goto respond;
+
+ /*friend_sm would be moved on CLNT_DISCONNECT, consequently
+ cleaning up peerinfo. Else, we run the risk of triggering
+ a clnt_destroy within saved_frames_unwind.
+ */
+ op_ret = 0;
+
+
+respond:
+ ret = glusterd_xfer_cli_deprobe_resp (ctx->req, op_ret, op_errno, NULL,
+ ctx->hostname, ctx->dict);
+ if (!ret && move_sm_now) {
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+ }
+
+ if (ctx) {
+ glusterd_broadcast_friend_delete (ctx->hostname, NULL);
+ glusterd_destroy_probe_ctx (ctx);
+ }
+
+ free (rsp.hostname);//malloced by xdr
+ GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe));
+ return ret;
+}
+
+int
+glusterd_friend_remove_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ __glusterd_friend_remove_cbk);
+}
+
+int32_t
+__glusterd_friend_update_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int ret = -1;
+ gd1_mgmt_friend_update_rsp rsp = {{0}, };
+ xlator_t *this = NULL;
+
+ GF_ASSERT (req);
+ this = THIS;
+
+ if (-1 == req->rpc_status) {
+ gf_log (this->name, GF_LOG_ERROR, "RPC Error");
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp,
+ (xdrproc_t)xdr_gd1_mgmt_friend_update_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to serialize friend"
+ " update repsonse");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ gf_log (this->name, GF_LOG_INFO, "Received %s from uuid: %s",
+ (ret)?"RJT":"ACC", uuid_utoa (rsp.uuid));
+
+ GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe));
+ return ret;
+}
+
+int
+glusterd_friend_update_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ __glusterd_friend_update_cbk);
+}
+
+int32_t
+__glusterd_cluster_lock_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gd1_mgmt_cluster_lock_rsp rsp = {{0},};
+ int ret = -1;
+ int32_t op_ret = -1;
+ glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ xlator_t *this = NULL;
+ uuid_t *txn_id = &global_txn_id;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_cluster_lock_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to decode lock "
+ "response received from peer");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+out:
+ op_ret = rsp.op_ret;
+
+ gf_log (this->name, (op_ret) ? GF_LOG_ERROR : GF_LOG_DEBUG,
+ "Received lock %s from uuid: %s", (op_ret) ? "RJT" : "ACC",
+ uuid_utoa (rsp.uuid));
+
+ ret = glusterd_friend_find (rsp.uuid, NULL, &peerinfo);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_CRITICAL, "Lock response received "
+ "from unknown peer: %s", uuid_utoa (rsp.uuid));
+ }
+
+ if (op_ret) {
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ opinfo.op_ret = op_ret;
+ opinfo.op_errstr = gf_strdup ("Another transaction could be in "
+ "progress. Please try again after"
+ " sometime.");
+ } else {
+ event_type = GD_OP_EVENT_RCVD_ACC;
+ }
+
+ ret = glusterd_op_sm_inject_event (event_type, txn_id, NULL);
+
+ if (!ret) {
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+ }
+
+ GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe));
+ return ret;
+}
+
+int32_t
+glusterd_cluster_lock_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ __glusterd_cluster_lock_cbk);
+}
+
+static int32_t
+glusterd_mgmt_v3_lock_peers_cbk_fn (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gd1_mgmt_v3_lock_rsp rsp = {{0},};
+ int ret = -1;
+ int32_t op_ret = -1;
+ glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ xlator_t *this = NULL;
+ uuid_t *txn_id = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp,
+ (xdrproc_t)xdr_gd1_mgmt_v3_lock_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to decode mgmt_v3 lock "
+ "response received from peer");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+out:
+ op_ret = rsp.op_ret;
+
+ txn_id = &rsp.txn_id;
+
+ gf_log (this->name, (op_ret) ? GF_LOG_ERROR : GF_LOG_DEBUG,
+ "Received mgmt_v3 lock %s from uuid: %s",
+ (op_ret) ? "RJT" : "ACC", uuid_utoa (rsp.uuid));
+
+ ret = glusterd_friend_find (rsp.uuid, NULL, &peerinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "mgmt_v3 lock response received "
+ "from unknown peer: %s", uuid_utoa (rsp.uuid));
+ }
+
+ if (op_ret) {
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ opinfo.op_ret = op_ret;
+ opinfo.op_errstr = gf_strdup ("Another transaction could be in "
+ "progress. Please try again after"
+ " sometime.");
+ } else {
+ event_type = GD_OP_EVENT_RCVD_ACC;
+ }
+
+ ret = glusterd_op_sm_inject_event (event_type, txn_id, NULL);
+
+ if (!ret) {
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+ }
+
+ GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe));
+ return ret;
+}
+
+int32_t
+glusterd_mgmt_v3_lock_peers_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ glusterd_mgmt_v3_lock_peers_cbk_fn);
+}
+
+static int32_t
+glusterd_mgmt_v3_unlock_peers_cbk_fn (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gd1_mgmt_v3_unlock_rsp rsp = {{0},};
+ int ret = -1;
+ int32_t op_ret = -1;
+ glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ xlator_t *this = NULL;
+ uuid_t *txn_id = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp,
+ (xdrproc_t)xdr_gd1_mgmt_v3_unlock_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to decode mgmt_v3 unlock "
+ "response received from peer");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+out:
+ op_ret = rsp.op_ret;
+
+ txn_id = &rsp.txn_id;
+
+ gf_log (this->name, (op_ret) ? GF_LOG_ERROR : GF_LOG_DEBUG,
+ "Received mgmt_v3 unlock %s from uuid: %s",
+ (op_ret) ? "RJT" : "ACC",
+ uuid_utoa (rsp.uuid));
+
+ ret = glusterd_friend_find (rsp.uuid, NULL, &peerinfo);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "mgmt_v3 unlock response received "
+ "from unknown peer: %s", uuid_utoa (rsp.uuid));
+ }
+
+ if (op_ret) {
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ opinfo.op_ret = op_ret;
+ opinfo.op_errstr = gf_strdup ("Another transaction could be in "
+ "progress. Please try again after"
+ " sometime.");
+ } else {
+ event_type = GD_OP_EVENT_RCVD_ACC;
+ }
+
+ ret = glusterd_op_sm_inject_event (event_type, txn_id, NULL);
+
+ if (!ret) {
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+ }
+
+ GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe));
+ return ret;
+}
+
+int32_t
+glusterd_mgmt_v3_unlock_peers_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ glusterd_mgmt_v3_unlock_peers_cbk_fn);
+}
+
+int32_t
+__glusterd_cluster_unlock_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gd1_mgmt_cluster_lock_rsp rsp = {{0},};
+ int ret = -1;
+ int32_t op_ret = -1;
+ glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ xlator_t *this = NULL;
+ uuid_t *txn_id = &global_txn_id;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_cluster_unlock_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to decode unlock "
+ "response received from peer");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+out:
+ op_ret = rsp.op_ret;
+
+ gf_log (this->name, (op_ret) ? GF_LOG_ERROR : GF_LOG_DEBUG,
+ "Received unlock %s from uuid: %s",
+ (op_ret)?"RJT":"ACC", uuid_utoa (rsp.uuid));
+
+ ret = glusterd_friend_find (rsp.uuid, NULL, &peerinfo);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_CRITICAL, "Unlock response received "
+ "from unknown peer %s", uuid_utoa (rsp.uuid));
+ }
+
+ if (op_ret) {
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ opinfo.op_ret = op_ret;
+ } else {
+ event_type = GD_OP_EVENT_RCVD_ACC;
+ }
+
+ ret = glusterd_op_sm_inject_event (event_type, txn_id, NULL);
+
+ if (!ret) {
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+ }
+
+ GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe));
+ return ret;
+}
+
+int32_t
+glusterd_cluster_unlock_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ __glusterd_cluster_unlock_cbk);
+}
+
+int32_t
+__glusterd_stage_op_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gd1_mgmt_stage_op_rsp rsp = {{0},};
+ int ret = -1;
+ int32_t op_ret = -1;
+ glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ dict_t *dict = NULL;
+ char err_str[2048] = {0};
+ char *peer_str = NULL;
+ xlator_t *this = NULL;
+ uuid_t *txn_id = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ /* use standard allocation because to keep uniformity
+ in freeing it */
+ rsp.op_errstr = strdup ("error");
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_stage_op_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to decode stage "
+ "response received from peer");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ /* use standard allocation because to keep uniformity
+ in freeing it */
+ rsp.op_errstr = strdup ("Failed to decode stage response "
+ "received from peer.");
+ goto out;
+ }
+
+ if (rsp.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (rsp.dict.dict_val,
+ rsp.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to "
+ "unserialize rsp-buffer to dictionary");
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ goto out;
+ } else {
+ dict->extra_stdfree = rsp.dict.dict_val;
+ }
+ }
+
+out:
+ op_ret = rsp.op_ret;
+
+ gf_log (this->name, (op_ret) ? GF_LOG_ERROR : GF_LOG_DEBUG,
+ "Received stage %s from uuid: %s",
+ (op_ret) ? "RJT" : "ACC", uuid_utoa (rsp.uuid));
+
+ ret = dict_get_bin (dict, "transaction_id", (void **)&txn_id);
+
+ gf_log ("", GF_LOG_DEBUG, "transaction ID = %s", uuid_utoa (*txn_id));
+
+ ret = glusterd_friend_find (rsp.uuid, NULL, &peerinfo);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_CRITICAL, "Stage response received "
+ "from unknown peer: %s", uuid_utoa (rsp.uuid));
+ }
+
+ if (op_ret) {
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ opinfo.op_ret = op_ret;
+ if (strcmp ("", rsp.op_errstr)) {
+ opinfo.op_errstr = gf_strdup (rsp.op_errstr);
+ } else {
+ if (peerinfo)
+ peer_str = peerinfo->hostname;
+ else
+ peer_str = uuid_utoa (rsp.uuid);
+ snprintf (err_str, sizeof (err_str),
+ OPERRSTR_STAGE_FAIL, peer_str);
+ opinfo.op_errstr = gf_strdup (err_str);
+ }
+ if (!opinfo.op_errstr) {
+ ret = -1;
+ goto out;
+ }
+ } else {
+ event_type = GD_OP_EVENT_RCVD_ACC;
+ }
+
+ switch (rsp.op) {
+ case GD_OP_REPLACE_BRICK:
+ glusterd_rb_use_rsp_dict (NULL, dict);
+ break;
+ }
+
+ ret = glusterd_op_sm_inject_event (event_type, txn_id, NULL);
+
+ if (!ret) {
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+ }
+
+ free (rsp.op_errstr); //malloced by xdr
+ if (dict) {
+ if (!dict->extra_stdfree && rsp.dict.dict_val)
+ free (rsp.dict.dict_val); //malloced by xdr
+ dict_unref (dict);
+ } else {
+ free (rsp.dict.dict_val); //malloced by xdr
+ }
+ GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe));
+ return ret;
+}
+
+int32_t
+glusterd_stage_op_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ __glusterd_stage_op_cbk);
+}
+
+int32_t
+__glusterd_commit_op_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gd1_mgmt_commit_op_rsp rsp = {{0},};
+ int ret = -1;
+ int32_t op_ret = -1;
+ glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ dict_t *dict = NULL;
+ char err_str[2048] = {0};
+ char *peer_str = NULL;
+ xlator_t *this = NULL;
+ uuid_t *txn_id = NULL;
+
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ /* use standard allocation because to keep uniformity
+ in freeing it */
+ rsp.op_errstr = strdup ("error");
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_commit_op_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to decode commit "
+ "response received from peer");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ /* use standard allocation because to keep uniformity
+ in freeing it */
+ rsp.op_errstr = strdup ("Failed to decode commit response "
+ "received from peer.");
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ goto out;
+ }
+
+ if (rsp.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (rsp.dict.dict_val,
+ rsp.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to "
+ "unserialize rsp-buffer to dictionary");
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ goto out;
+ } else {
+ dict->extra_stdfree = rsp.dict.dict_val;
+ }
+ }
+
+ op_ret = rsp.op_ret;
+
+ gf_log (this->name, (op_ret) ? GF_LOG_ERROR : GF_LOG_DEBUG,
+ "Received commit %s from uuid: %s",
+ (op_ret)?"RJT":"ACC", uuid_utoa (rsp.uuid));
+
+ ret = dict_get_bin (dict, "transaction_id", (void **)&txn_id);
+
+ gf_log ("", GF_LOG_DEBUG, "transaction ID = %s", uuid_utoa (*txn_id));
+
+ ret = glusterd_friend_find (rsp.uuid, NULL, &peerinfo);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_CRITICAL, "Commit response for "
+ "'Volume %s' received from unknown peer: %s",
+ gd_op_list[opinfo.op], uuid_utoa (rsp.uuid));
+ }
+
+ if (op_ret) {
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ opinfo.op_ret = op_ret;
+ if (strcmp ("", rsp.op_errstr)) {
+ opinfo.op_errstr = gf_strdup(rsp.op_errstr);
+ } else {
+ if (peerinfo)
+ peer_str = peerinfo->hostname;
+ else
+ peer_str = uuid_utoa (rsp.uuid);
+ snprintf (err_str, sizeof (err_str),
+ OPERRSTR_COMMIT_FAIL, peer_str);
+ opinfo.op_errstr = gf_strdup (err_str);
+ }
+ if (!opinfo.op_errstr) {
+ ret = -1;
+ goto out;
+ }
+ } else {
+ event_type = GD_OP_EVENT_RCVD_ACC;
+ switch (rsp.op) {
+ case GD_OP_REPLACE_BRICK:
+ ret = glusterd_rb_use_rsp_dict (NULL, dict);
+ if (ret)
+ goto out;
+ break;
+
+ case GD_OP_SYNC_VOLUME:
+ ret = glusterd_sync_use_rsp_dict (NULL, dict);
+ if (ret)
+ goto out;
+ break;
+
+ case GD_OP_PROFILE_VOLUME:
+ ret = glusterd_profile_volume_use_rsp_dict (NULL, dict);
+ if (ret)
+ goto out;
+ break;
+
+ case GD_OP_GSYNC_SET:
+ ret = glusterd_gsync_use_rsp_dict (NULL, dict, rsp.op_errstr);
+ if (ret)
+ goto out;
+ break;
+
+ case GD_OP_STATUS_VOLUME:
+ ret = glusterd_volume_status_copy_to_op_ctx_dict (NULL, dict);
+ if (ret)
+ goto out;
+ break;
+
+ case GD_OP_REBALANCE:
+ case GD_OP_DEFRAG_BRICK_VOLUME:
+ ret = glusterd_volume_rebalance_use_rsp_dict (NULL, dict);
+ if (ret)
+ goto out;
+ break;
+
+ case GD_OP_HEAL_VOLUME:
+ ret = glusterd_volume_heal_use_rsp_dict (NULL, dict);
+ if (ret)
+ goto out;
+
+ break;
+
+ default:
+ break;
+ }
+ }
+
+out:
+ ret = glusterd_op_sm_inject_event (event_type, txn_id, NULL);
+
+ if (!ret) {
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+ }
+
+ if (dict)
+ dict_unref (dict);
+ free (rsp.op_errstr); //malloced by xdr
+ GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe));
+ return ret;
+}
+
+int32_t
+glusterd_commit_op_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ __glusterd_commit_op_cbk);
+}
+
+int32_t
+glusterd_rpc_probe (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gd1_mgmt_probe_req req = {{0},};
+ int ret = 0;
+ int port = 0;
+ char *hostname = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ dict_t *dict = NULL;
+
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
+ }
+
+ dict = data;
+ priv = this->private;
+
+ GF_ASSERT (priv);
+ ret = dict_get_str (dict, "hostname", &hostname);
+ if (ret)
+ goto out;
+ ret = dict_get_int32 (dict, "port", &port);
+ if (ret)
+ port = GF_DEFAULT_BASE_PORT;
+
+ ret = dict_get_ptr (dict, "peerinfo", VOID (&peerinfo));
+ if (ret)
+ goto out;
+
+ uuid_copy (req.uuid, MY_UUID);
+ req.hostname = gf_strdup (hostname);
+ req.port = port;
+
+ ret = glusterd_submit_request (peerinfo->rpc, &req, frame, peerinfo->peer,
+ GLUSTERD_PROBE_QUERY,
+ NULL, this, glusterd_probe_cbk,
+ (xdrproc_t)xdr_gd1_mgmt_probe_req);
+
+out:
+ GF_FREE (req.hostname);
+ gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+
+int32_t
+glusterd_rpc_friend_add (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gd1_mgmt_friend_req req = {{0},};
+ int ret = 0;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_friend_sm_event_t *event = NULL;
+ dict_t *vols = NULL;
+
+
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
+ }
+
+ event = data;
+ priv = this->private;
+
+ GF_ASSERT (priv);
+
+ peerinfo = event->peerinfo;
+
+ ret = glusterd_build_volume_dict (&vols);
+ if (ret)
+ goto out;
+
+ uuid_copy (req.uuid, MY_UUID);
+ req.hostname = peerinfo->hostname;
+ req.port = peerinfo->port;
+
+ ret = dict_allocate_and_serialize (vols, &req.vols.vols_val,
+ &req.vols.vols_len);
+ if (ret)
+ goto out;
+
+ ret = glusterd_submit_request (peerinfo->rpc, &req, frame, peerinfo->peer,
+ GLUSTERD_FRIEND_ADD,
+ NULL, this, glusterd_friend_add_cbk,
+ (xdrproc_t)xdr_gd1_mgmt_friend_req);
+
+
+out:
+ GF_FREE (req.vols.vols_val);
+
+ if (vols)
+ dict_unref (vols);
+
+ gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_rpc_friend_remove (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gd1_mgmt_friend_req req = {{0},};
+ int ret = 0;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_friend_sm_event_t *event = NULL;
+
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
+ }
+
+ event = data;
+ priv = this->private;
+
+ GF_ASSERT (priv);
+
+ peerinfo = event->peerinfo;
+
+ uuid_copy (req.uuid, MY_UUID);
+ req.hostname = peerinfo->hostname;
+ req.port = peerinfo->port;
+ ret = glusterd_submit_request (peerinfo->rpc, &req, frame, peerinfo->peer,
+ GLUSTERD_FRIEND_REMOVE, NULL,
+ this, glusterd_friend_remove_cbk,
+ (xdrproc_t)xdr_gd1_mgmt_friend_req);
+
+out:
+ gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+
+int32_t
+glusterd_rpc_friend_update (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gd1_mgmt_friend_update req = {{0},};
+ int ret = 0;
+ glusterd_conf_t *priv = NULL;
+ dict_t *friends = NULL;
+ call_frame_t *dummy_frame = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ friends = data;
+ if (!friends)
+ goto out;
+
+ ret = dict_get_ptr (friends, "peerinfo", VOID(&peerinfo));
+ if (ret)
+ goto out;
+
+ ret = dict_allocate_and_serialize (friends, &req.friends.friends_val,
+ &req.friends.friends_len);
+ if (ret)
+ goto out;
+
+ uuid_copy (req.uuid, MY_UUID);
+
+ dummy_frame = create_frame (this, this->ctx->pool);
+ ret = glusterd_submit_request (peerinfo->rpc, &req, dummy_frame,
+ peerinfo->peer,
+ GLUSTERD_FRIEND_UPDATE, NULL,
+ this, glusterd_friend_update_cbk,
+ (xdrproc_t)xdr_gd1_mgmt_friend_update);
+
+out:
+ GF_FREE (req.friends.friends_val);
+
+ gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_cluster_lock (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gd1_mgmt_cluster_lock_req req = {{0},};
+ int ret = -1;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ call_frame_t *dummy_frame = NULL;
+
+ if (!this)
+ goto out;
+
+ peerinfo = data;
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ glusterd_get_uuid (&req.uuid);
+
+ dummy_frame = create_frame (this, this->ctx->pool);
+ if (!dummy_frame)
+ goto out;
+
+ ret = glusterd_submit_request (peerinfo->rpc, &req, dummy_frame,
+ peerinfo->mgmt, GLUSTERD_MGMT_CLUSTER_LOCK,
+ NULL,
+ this, glusterd_cluster_lock_cbk,
+ (xdrproc_t)xdr_gd1_mgmt_cluster_lock_req);
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_mgmt_v3_lock_peers (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gd1_mgmt_v3_lock_req req = {{0},};
+ int ret = -1;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ call_frame_t *dummy_frame = NULL;
+ dict_t *dict = NULL;
+ uuid_t *txn_id = NULL;
+
+ if (!this)
+ goto out;
+
+ dict = data;
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = dict_get_ptr (dict, "peerinfo", VOID (&peerinfo));
+ if (ret)
+ goto out;
+
+ //peerinfo should not be in payload
+ dict_del (dict, "peerinfo");
+
+ glusterd_get_uuid (&req.uuid);
+
+ ret = dict_allocate_and_serialize (dict, &req.dict.dict_val,
+ &req.dict.dict_len);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to serialize dict "
+ "to request buffer");
+ goto out;
+ }
+
+ /* Sending valid transaction ID to peers */
+ ret = dict_get_bin (dict, "transaction_id",
+ (void **)&txn_id);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to get transaction id.");
+ goto out;
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Transaction_id = %s", uuid_utoa (*txn_id));
+ uuid_copy (req.txn_id, *txn_id);
+ }
+
+ dummy_frame = create_frame (this, this->ctx->pool);
+ if (!dummy_frame)
+ goto out;
+
+ ret = glusterd_submit_request (peerinfo->rpc, &req, dummy_frame,
+ peerinfo->mgmt_v3,
+ GLUSTERD_MGMT_V3_LOCK, NULL,
+ this, glusterd_mgmt_v3_lock_peers_cbk,
+ (xdrproc_t)xdr_gd1_mgmt_v3_lock_req);
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_mgmt_v3_unlock_peers (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gd1_mgmt_v3_unlock_req req = {{0},};
+ int ret = -1;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ call_frame_t *dummy_frame = NULL;
+ dict_t *dict = NULL;
+ uuid_t *txn_id = NULL;
+
+ if (!this)
+ goto out;
+
+ dict = data;
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = dict_get_ptr (dict, "peerinfo", VOID (&peerinfo));
+ if (ret)
+ goto out;
+
+ //peerinfo should not be in payload
+ dict_del (dict, "peerinfo");
+
+ glusterd_get_uuid (&req.uuid);
+
+ ret = dict_allocate_and_serialize (dict, &req.dict.dict_val,
+ &req.dict.dict_len);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to serialize dict "
+ "to request buffer");
+ goto out;
+ }
+
+ /* Sending valid transaction ID to peers */
+ ret = dict_get_bin (dict, "transaction_id",
+ (void **)&txn_id);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to get transaction id.");
+ goto out;
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Transaction_id = %s", uuid_utoa (*txn_id));
+ uuid_copy (req.txn_id, *txn_id);
+ }
+
+ dummy_frame = create_frame (this, this->ctx->pool);
+ if (!dummy_frame)
+ goto out;
+
+ ret = glusterd_submit_request (peerinfo->rpc, &req, dummy_frame,
+ peerinfo->mgmt_v3,
+ GLUSTERD_MGMT_V3_UNLOCK, NULL,
+ this, glusterd_mgmt_v3_unlock_peers_cbk,
+ (xdrproc_t)xdr_gd1_mgmt_v3_unlock_req);
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_cluster_unlock (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gd1_mgmt_cluster_lock_req req = {{0},};
+ int ret = -1;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ call_frame_t *dummy_frame = NULL;
+
+ if (!this ) {
+ ret = -1;
+ goto out;
+ }
+ peerinfo = data;
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ glusterd_get_uuid (&req.uuid);
+
+ dummy_frame = create_frame (this, this->ctx->pool);
+ if (!dummy_frame)
+ goto out;
+
+ ret = glusterd_submit_request (peerinfo->rpc, &req, dummy_frame,
+ peerinfo->mgmt, GLUSTERD_MGMT_CLUSTER_UNLOCK,
+ NULL,
+ this, glusterd_cluster_unlock_cbk,
+ (xdrproc_t)xdr_gd1_mgmt_cluster_unlock_req);
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_stage_op (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gd1_mgmt_stage_op_req req = {{0,},};
+ int ret = -1;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ call_frame_t *dummy_frame = NULL;
+ dict_t *dict = NULL;
+ gf_boolean_t is_alloc = _gf_true;
+
+ if (!this) {
+ goto out;
+ }
+
+ dict = data;
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = dict_get_ptr (dict, "peerinfo", VOID (&peerinfo));
+ if (ret)
+ goto out;
+
+ //peerinfo should not be in payload
+ dict_del (dict, "peerinfo");
+
+ glusterd_get_uuid (&req.uuid);
+ req.op = glusterd_op_get_op ();
+
+ ret = dict_allocate_and_serialize (dict, &req.buf.buf_val,
+ &req.buf.buf_len);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to serialize dict "
+ "to request buffer");
+ goto out;
+ }
+
+
+ dummy_frame = create_frame (this, this->ctx->pool);
+ if (!dummy_frame)
+ goto out;
+
+ ret = glusterd_submit_request (peerinfo->rpc, &req, dummy_frame,
+ peerinfo->mgmt, GLUSTERD_MGMT_STAGE_OP,
+ NULL,
+ this, glusterd_stage_op_cbk,
+ (xdrproc_t)xdr_gd1_mgmt_stage_op_req);
+
+out:
+ if ((_gf_true == is_alloc) && req.buf.buf_val)
+ GF_FREE (req.buf.buf_val);
+
+ gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_commit_op (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gd1_mgmt_commit_op_req req = {{0,},};
+ int ret = -1;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ call_frame_t *dummy_frame = NULL;
+ dict_t *dict = NULL;
+ gf_boolean_t is_alloc = _gf_true;
+
+ if (!this) {
+ goto out;
+ }
+
+ dict = data;
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = dict_get_ptr (dict, "peerinfo", VOID (&peerinfo));
+ if (ret)
+ goto out;
+
+ //peerinfo should not be in payload
+ dict_del (dict, "peerinfo");
+
+ glusterd_get_uuid (&req.uuid);
+ req.op = glusterd_op_get_op ();
+
+ ret = dict_allocate_and_serialize (dict, &req.buf.buf_val,
+ &req.buf.buf_len);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to serialize dict to "
+ "request buffer");
+ goto out;
+ }
+
+ dummy_frame = create_frame (this, this->ctx->pool);
+ if (!dummy_frame)
+ goto out;
+
+ ret = glusterd_submit_request (peerinfo->rpc, &req, dummy_frame,
+ peerinfo->mgmt, GLUSTERD_MGMT_COMMIT_OP,
+ NULL,
+ this, glusterd_commit_op_cbk,
+ (xdrproc_t)xdr_gd1_mgmt_commit_op_req);
+
+out:
+ if ((_gf_true == is_alloc) && req.buf.buf_val)
+ GF_FREE (req.buf.buf_val);
+
+ gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+__glusterd_brick_op_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gd1_mgmt_brick_op_rsp rsp = {0};
+ int ret = -1;
+ int32_t op_ret = -1;
+ glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE;
+ call_frame_t *frame = NULL;
+ glusterd_op_brick_rsp_ctx_t *ev_ctx = NULL;
+ dict_t *dict = NULL;
+ int index = 0;
+ glusterd_req_ctx_t *req_ctx = NULL;
+ glusterd_pending_node_t *node = NULL;
+ xlator_t *this = NULL;
+ uuid_t *txn_id = &global_txn_id;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (req);
+ frame = myframe;
+ req_ctx = frame->local;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ /* use standard allocation because to keep uniformity
+ in freeing it */
+ rsp.op_errstr = strdup ("error");
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to decode brick op "
+ "response received");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ rsp.op_errstr = strdup ("Unable to decode brick op response");
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ goto out;
+ }
+
+ if (rsp.output.output_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (rsp.output.output_val,
+ rsp.output.output_len,
+ &dict);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to "
+ "unserialize rsp-buffer to dictionary");
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ goto out;
+ } else {
+ dict->extra_stdfree = rsp.output.output_val;
+ }
+ }
+
+ op_ret = rsp.op_ret;
+
+ /* Add index to rsp_dict for GD_OP_STATUS_VOLUME */
+ if (GD_OP_STATUS_VOLUME == req_ctx->op) {
+ node = frame->cookie;
+ index = node->index;
+ ret = dict_set_int32 (dict, "index", index);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error setting index on brick status rsp dict");
+ rsp.op_ret = -1;
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ goto out;
+ }
+ }
+out:
+
+ ret = dict_get_bin (req_ctx->dict, "transaction_id", (void **)&txn_id);
+
+ gf_log ("", GF_LOG_DEBUG, "transaction ID = %s", uuid_utoa (*txn_id));
+
+ ev_ctx = GF_CALLOC (1, sizeof (*ev_ctx), gf_gld_mt_brick_rsp_ctx_t);
+ GF_ASSERT (ev_ctx);
+ if (op_ret) {
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ ev_ctx->op_ret = op_ret;
+ ev_ctx->op_errstr = gf_strdup(rsp.op_errstr);
+ } else {
+ event_type = GD_OP_EVENT_RCVD_ACC;
+ }
+ ev_ctx->pending_node = frame->cookie;
+ ev_ctx->rsp_dict = dict;
+ ev_ctx->commit_ctx = frame->local;
+ ret = glusterd_op_sm_inject_event (event_type, txn_id, ev_ctx);
+ if (!ret) {
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+ }
+
+ if (ret && dict)
+ dict_unref (dict);
+ free (rsp.op_errstr); //malloced by xdr
+ GLUSTERD_STACK_DESTROY (frame);
+ return ret;
+}
+
+int32_t
+glusterd_brick_op_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ __glusterd_brick_op_cbk);
+}
+
+int32_t
+glusterd_brick_op (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+
+ gd1_mgmt_brick_op_req *req = NULL;
+ int ret = 0;
+ glusterd_conf_t *priv = NULL;
+ call_frame_t *dummy_frame = NULL;
+ char *op_errstr = NULL;
+ int pending_bricks = 0;
+ glusterd_pending_node_t *pending_node;
+ glusterd_req_ctx_t *req_ctx = NULL;
+ struct rpc_clnt *rpc = NULL;
+ dict_t *op_ctx = NULL;
+ uuid_t *txn_id = &global_txn_id;
+
+ if (!this) {
+ ret = -1;
+ goto out;
+ }
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ req_ctx = data;
+ GF_ASSERT (req_ctx);
+ INIT_LIST_HEAD (&opinfo.pending_bricks);
+ ret = glusterd_op_bricks_select (req_ctx->op, req_ctx->dict, &op_errstr,
+ &opinfo.pending_bricks, NULL);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to select bricks "
+ "while performing brick op during 'Volume %s'",
+ gd_op_list[opinfo.op]);
+ opinfo.op_errstr = op_errstr;
+ goto out;
+ }
+
+
+ ret = dict_get_bin (req_ctx->dict, "transaction_id", (void **)&txn_id);
+
+ gf_log ("", GF_LOG_DEBUG, "transaction ID = %s", uuid_utoa (*txn_id));
+
+ list_for_each_entry (pending_node, &opinfo.pending_bricks, list) {
+ dummy_frame = create_frame (this, this->ctx->pool);
+ if (!dummy_frame)
+ continue;
+
+ if ((pending_node->type == GD_NODE_NFS) ||
+ ((pending_node->type == GD_NODE_SHD) &&
+ (req_ctx->op == GD_OP_STATUS_VOLUME)))
+ ret = glusterd_node_op_build_payload
+ (req_ctx->op,
+ (gd1_mgmt_brick_op_req **)&req,
+ req_ctx->dict);
+ else {
+ ret = glusterd_brick_op_build_payload
+ (req_ctx->op, pending_node->node,
+ (gd1_mgmt_brick_op_req **)&req,
+ req_ctx->dict);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to "
+ "build brick op payload during "
+ "'Volume %s'", gd_op_list[req_ctx->op]);
+ goto out;
+ }
+ }
+
+ dummy_frame->local = data;
+ dummy_frame->cookie = pending_node;
+
+ rpc = glusterd_pending_node_get_rpc (pending_node);
+ if (!rpc) {
+ if (pending_node->type == GD_NODE_REBALANCE) {
+ opinfo.brick_pending_count = 0;
+ ret = 0;
+ if (req) {
+ GF_FREE (req->input.input_val);
+ GF_FREE (req);
+ req = NULL;
+ }
+ GLUSTERD_STACK_DESTROY (dummy_frame);
+
+ op_ctx = glusterd_op_get_ctx ();
+ if (!op_ctx)
+ goto out;
+ glusterd_defrag_volume_node_rsp (req_ctx->dict,
+ NULL, op_ctx);
+
+ goto out;
+ }
+
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR, "Brick Op failed "
+ "due to rpc failure.");
+ goto out;
+ }
+
+ ret = glusterd_submit_request (rpc, req, dummy_frame,
+ priv->gfs_mgmt,
+ req->op, NULL,
+ this, glusterd_brick_op_cbk,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+ if (req) {
+ GF_FREE (req->input.input_val);
+ GF_FREE (req);
+ req = NULL;
+ }
+ if (!ret)
+ pending_bricks++;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG, "Sent brick op req for operation "
+ "'Volume %s' to %d bricks", gd_op_list[req_ctx->op],
+ pending_bricks);
+ opinfo.brick_pending_count = pending_bricks;
+
+out:
+ if (ret) {
+ glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT,
+ txn_id, data);
+ opinfo.op_ret = ret;
+ }
+ gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+struct rpc_clnt_procedure gd_brick_actors[GLUSTERD_BRICK_MAXVALUE] = {
+ [GLUSTERD_BRICK_NULL] = {"NULL", NULL },
+ [GLUSTERD_BRICK_OP] = {"BRICK_OP", glusterd_brick_op},
+};
+
+struct rpc_clnt_procedure gd_peer_actors[GLUSTERD_FRIEND_MAXVALUE] = {
+ [GLUSTERD_FRIEND_NULL] = {"NULL", NULL },
+ [GLUSTERD_PROBE_QUERY] = {"PROBE_QUERY", glusterd_rpc_probe},
+ [GLUSTERD_FRIEND_ADD] = {"FRIEND_ADD", glusterd_rpc_friend_add},
+ [GLUSTERD_FRIEND_REMOVE] = {"FRIEND_REMOVE", glusterd_rpc_friend_remove},
+ [GLUSTERD_FRIEND_UPDATE] = {"FRIEND_UPDATE", glusterd_rpc_friend_update},
+};
+
+struct rpc_clnt_procedure gd_mgmt_actors[GLUSTERD_MGMT_MAXVALUE] = {
+ [GLUSTERD_MGMT_NULL] = {"NULL", NULL },
+ [GLUSTERD_MGMT_CLUSTER_LOCK] = {"CLUSTER_LOCK", glusterd_cluster_lock},
+ [GLUSTERD_MGMT_CLUSTER_UNLOCK] = {"CLUSTER_UNLOCK", glusterd_cluster_unlock},
+ [GLUSTERD_MGMT_STAGE_OP] = {"STAGE_OP", glusterd_stage_op},
+ [GLUSTERD_MGMT_COMMIT_OP] = {"COMMIT_OP", glusterd_commit_op},
+};
+
+struct rpc_clnt_procedure gd_mgmt_v3_actors[GLUSTERD_MGMT_V3_MAXVALUE] = {
+ [GLUSTERD_MGMT_V3_NULL] = {"NULL", NULL },
+ [GLUSTERD_MGMT_V3_LOCK] = {"MGMT_V3_LOCK", glusterd_mgmt_v3_lock_peers},
+ [GLUSTERD_MGMT_V3_UNLOCK] = {"MGMT_V3_UNLOCK", glusterd_mgmt_v3_unlock_peers},
+};
+
+struct rpc_clnt_program gd_mgmt_prog = {
+ .progname = "glusterd mgmt",
+ .prognum = GD_MGMT_PROGRAM,
+ .progver = GD_MGMT_VERSION,
+ .proctable = gd_mgmt_actors,
+ .numproc = GLUSTERD_MGMT_MAXVALUE,
+};
+
+struct rpc_clnt_program gd_brick_prog = {
+ .progname = "brick operations",
+ .prognum = GD_BRICK_PROGRAM,
+ .progver = GD_BRICK_VERSION,
+ .proctable = gd_brick_actors,
+ .numproc = GLUSTERD_BRICK_MAXVALUE,
+};
+
+struct rpc_clnt_program gd_peer_prog = {
+ .progname = "Peer mgmt",
+ .prognum = GD_FRIEND_PROGRAM,
+ .progver = GD_FRIEND_VERSION,
+ .proctable = gd_peer_actors,
+ .numproc = GLUSTERD_FRIEND_MAXVALUE,
+};
+
+struct rpc_clnt_program gd_mgmt_v3_prog = {
+ .progname = "glusterd mgmt v3",
+ .prognum = GD_MGMT_V3_PROGRAM,
+ .progver = GD_MGMT_V3_VERSION,
+ .proctable = gd_mgmt_v3_actors,
+ .numproc = GLUSTERD_MGMT_V3_MAXVALUE,
+};
diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.c b/xlators/mgmt/glusterd/src/glusterd-sm.c
new file mode 100644
index 000000000..c671edf68
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-sm.c
@@ -0,0 +1,1109 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+#include <time.h>
+#include <sys/uio.h>
+#include <sys/resource.h>
+
+#include <libgen.h>
+#include "uuid.h"
+
+#include "fnmatch.h"
+#include "xlator.h"
+#include "protocol-common.h"
+#include "glusterd.h"
+#include "call-stub.h"
+#include "defaults.h"
+#include "list.h"
+#include "dict.h"
+#include "compat.h"
+#include "compat-errno.h"
+#include "statedump.h"
+#include "glusterd-sm.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-utils.h"
+#include "glusterd-store.h"
+
+static struct list_head gd_friend_sm_queue;
+
+static char *glusterd_friend_sm_state_names[] = {
+ "Establishing Connection",
+ "Probe Sent to Peer",
+ "Probe Received from Peer",
+ "Peer in Cluster",
+ "Accepted peer request",
+ "Sent and Received peer request",
+ "Peer Rejected",
+ "Peer detach in progress",
+ "Probe Received from peer",
+ "Connected to Peer",
+ "Peer is connected and Accepted",
+ "Invalid State"
+};
+
+static char *glusterd_friend_sm_event_names[] = {
+ "GD_FRIEND_EVENT_NONE",
+ "GD_FRIEND_EVENT_PROBE",
+ "GD_FRIEND_EVENT_INIT_FRIEND_REQ",
+ "GD_FRIEND_EVENT_RCVD_ACC",
+ "GD_FRIEND_EVENT_LOCAL_ACC",
+ "GD_FRIEND_EVENT_RCVD_RJT",
+ "GD_FRIEND_EVENT_LOCAL_RJT",
+ "GD_FRIEND_EVENT_RCVD_FRIEND_REQ",
+ "GD_FRIEND_EVENT_INIT_REMOVE_FRIEND",
+ "GD_FRIEND_EVENT_RCVD_REMOVE_FRIEND",
+ "GD_FRIEND_EVENT_REMOVE_FRIEND",
+ "GD_FRIEND_EVENT_CONNECTED",
+ "GD_FRIEND_EVENT_MAX"
+};
+
+char*
+glusterd_friend_sm_state_name_get (int state)
+{
+ if (state < 0 || state >= GD_FRIEND_STATE_MAX)
+ return glusterd_friend_sm_state_names[GD_FRIEND_STATE_MAX];
+ return glusterd_friend_sm_state_names[state];
+}
+
+char*
+glusterd_friend_sm_event_name_get (int event)
+{
+ if (event < 0 || event >= GD_FRIEND_EVENT_MAX)
+ return glusterd_friend_sm_event_names[GD_FRIEND_EVENT_MAX];
+ return glusterd_friend_sm_event_names[event];
+}
+
+void
+glusterd_destroy_probe_ctx (glusterd_probe_ctx_t *ctx)
+{
+ if (!ctx)
+ return;
+
+ GF_FREE (ctx->hostname);
+ GF_FREE (ctx);
+}
+
+void
+glusterd_destroy_friend_req_ctx (glusterd_friend_req_ctx_t *ctx)
+{
+ if (!ctx)
+ return;
+
+ if (ctx->vols)
+ dict_unref (ctx->vols);
+ GF_FREE (ctx->hostname);
+ GF_FREE (ctx);
+}
+
+void
+glusterd_destroy_friend_update_ctx (glusterd_friend_update_ctx_t *ctx)
+{
+ if (!ctx)
+ return;
+ GF_FREE (ctx->hostname);
+ GF_FREE (ctx);
+}
+
+int
+glusterd_broadcast_friend_delete (char *hostname, uuid_t uuid)
+{
+ int ret = 0;
+ rpc_clnt_procedure_t *proc = NULL;
+ xlator_t *this = NULL;
+ glusterd_friend_update_ctx_t ctx = {{0},};
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ dict_t *friends = NULL;
+ char key[100] = {0,};
+ int32_t count = 0;
+
+ this = THIS;
+ priv = this->private;
+
+ GF_ASSERT (priv);
+
+ ctx.hostname = hostname;
+ ctx.op = GD_FRIEND_UPDATE_DEL;
+
+ friends = dict_new ();
+ if (!friends)
+ goto out;
+
+ snprintf (key, sizeof (key), "op");
+ ret = dict_set_int32 (friends, key, ctx.op);
+ if (ret)
+ goto out;
+
+ snprintf (key, sizeof (key), "hostname");
+ ret = dict_set_str (friends, key, hostname);
+ if (ret)
+ goto out;
+
+ ret = dict_set_int32 (friends, "count", count);
+ if (ret)
+ goto out;
+
+ list_for_each_entry (peerinfo, &priv->peers, uuid_list) {
+ if (!peerinfo->connected || !peerinfo->peer)
+ continue;
+
+ ret = dict_set_static_ptr (friends, "peerinfo", peerinfo);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "failed to set peerinfo");
+ goto out;
+ }
+
+ proc = &peerinfo->peer->proctable[GLUSTERD_FRIEND_UPDATE];
+ if (proc->fn) {
+ ret = proc->fn (NULL, this, friends);
+ }
+ }
+
+ gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+
+out:
+ if (friends)
+ dict_unref (friends);
+
+ return ret;
+}
+
+
+static int
+glusterd_ac_none (glusterd_friend_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+
+ gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+
+ return ret;
+}
+
+static int
+glusterd_ac_error (glusterd_friend_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+
+ gf_log ("", GF_LOG_ERROR, "Received event %d ", event->event);
+
+ return ret;
+}
+
+static int
+glusterd_ac_reverse_probe_begin (glusterd_friend_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_friend_sm_event_t *new_event = NULL;
+ glusterd_probe_ctx_t *new_ev_ctx = NULL;
+
+ GF_ASSERT (event);
+ GF_ASSERT (ctx);
+
+ peerinfo = event->peerinfo;
+ ret = glusterd_friend_sm_new_event
+ (GD_FRIEND_EVENT_PROBE, &new_event);
+
+ if (ret) {
+ gf_log ("glusterd", GF_LOG_ERROR, "Unable to get new new_event");
+ ret = -1;
+ goto out;
+ }
+
+ new_ev_ctx = GF_CALLOC (1, sizeof(*new_ev_ctx), gf_gld_mt_probe_ctx_t);
+
+ if (!new_ev_ctx) {
+ ret = -1;
+ goto out;
+ }
+
+ new_ev_ctx->hostname = gf_strdup (peerinfo->hostname);
+ new_ev_ctx->port = peerinfo->port;
+ new_ev_ctx->req = NULL;
+
+ new_event->peerinfo = peerinfo;
+ new_event->ctx = new_ev_ctx;
+
+ ret = glusterd_friend_sm_inject_event (new_event);
+
+ if (ret) {
+ gf_log ("glusterd", GF_LOG_ERROR, "Unable to inject new_event %d, "
+ "ret = %d", new_event->event, ret);
+ }
+
+out:
+ if (ret) {
+ GF_FREE (new_event);
+ GF_FREE (new_ev_ctx->hostname);
+ GF_FREE (new_ev_ctx);
+ }
+ gf_log ("", GF_LOG_DEBUG, "returning with %d", ret);
+ return ret;
+}
+
+static int
+glusterd_ac_friend_add (glusterd_friend_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ glusterd_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+
+ GF_ASSERT (event);
+ peerinfo = event->peerinfo;
+
+ this = THIS;
+ conf = this->private;
+
+ GF_ASSERT (conf);
+
+ if (!peerinfo->peer)
+ goto out;
+ proc = &peerinfo->peer->proctable[GLUSTERD_FRIEND_ADD];
+ if (proc->fn) {
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame) {
+ goto out;
+ }
+ frame->local = ctx;
+ ret = proc->fn (frame, this, event);
+ }
+
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+
+ return ret;
+}
+
+static int
+glusterd_ac_friend_probe (glusterd_friend_sm_event_t *event, void *ctx)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ glusterd_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+ glusterd_probe_ctx_t *probe_ctx = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ dict_t *dict = NULL;
+
+ GF_ASSERT (ctx);
+
+ probe_ctx = ctx;
+
+ this = THIS;
+
+ GF_ASSERT (this);
+
+ conf = this->private;
+
+ GF_ASSERT (conf);
+
+ ret = glusterd_friend_find (NULL, probe_ctx->hostname, &peerinfo);
+ if (ret) {
+ //We should not reach this state ideally
+ GF_ASSERT (0);
+ goto out;
+ }
+
+ if (!peerinfo->peer)
+ goto out;
+ proc = &peerinfo->peer->proctable[GLUSTERD_PROBE_QUERY];
+ if (proc->fn) {
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame) {
+ goto out;
+ }
+ frame->local = ctx;
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+ ret = dict_set_str (dict, "hostname", probe_ctx->hostname);
+ if (ret)
+ goto out;
+
+ ret = dict_set_int32 (dict, "port", probe_ctx->port);
+ if (ret)
+ goto out;
+
+ ret = dict_set_static_ptr (dict, "peerinfo", peerinfo);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "failed to set peerinfo");
+ goto out;
+ }
+
+ ret = proc->fn (frame, this, dict);
+ if (ret)
+ goto out;
+
+ }
+
+
+out:
+ if (dict)
+ dict_unref (dict);
+ gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+
+ return ret;
+}
+
+static int
+glusterd_ac_send_friend_remove_req (glusterd_friend_sm_event_t *event,
+ void *data)
+{
+ int ret = 0;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ glusterd_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+ glusterd_friend_sm_event_type_t event_type = GD_FRIEND_EVENT_NONE;
+ glusterd_probe_ctx_t *ctx = NULL;
+ glusterd_friend_sm_event_t *new_event = NULL;
+
+ GF_ASSERT (event);
+ peerinfo = event->peerinfo;
+
+ this = THIS;
+ conf = this->private;
+
+ GF_ASSERT (conf);
+
+ ctx = event->ctx;
+
+ if (!peerinfo->connected) {
+ event_type = GD_FRIEND_EVENT_REMOVE_FRIEND;
+
+ ret = glusterd_friend_sm_new_event (event_type, &new_event);
+
+ if (!ret) {
+ new_event->peerinfo = peerinfo;
+ ret = glusterd_friend_sm_inject_event (new_event);
+ } else {
+ gf_log ("glusterd", GF_LOG_ERROR,
+ "Unable to get event");
+ }
+
+ if (ctx)
+ ret = glusterd_xfer_cli_deprobe_resp (ctx->req, ret, 0,
+ NULL,
+ ctx->hostname,
+ ctx->dict);
+ glusterd_friend_sm ();
+ glusterd_op_sm ();
+
+ if (ctx) {
+ glusterd_broadcast_friend_delete (ctx->hostname, NULL);
+ glusterd_destroy_probe_ctx (ctx);
+ }
+ goto out;
+ }
+
+ if (!peerinfo->peer)
+ goto out;
+ proc = &peerinfo->peer->proctable[GLUSTERD_FRIEND_REMOVE];
+ if (proc->fn) {
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame) {
+ goto out;
+ }
+ frame->local = data;
+ ret = proc->fn (frame, this, event);
+ }
+
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+
+ return ret;
+}
+
+static gf_boolean_t
+glusterd_should_update_peer (glusterd_peerinfo_t *peerinfo,
+ glusterd_peerinfo_t *cur_peerinfo)
+{
+ gf_boolean_t is_valid = _gf_false;
+
+ if ((peerinfo == cur_peerinfo) ||
+ (peerinfo->state.state == GD_FRIEND_STATE_BEFRIENDED))
+ is_valid = _gf_true;
+
+ return is_valid;
+}
+
+static int
+glusterd_ac_send_friend_update (glusterd_friend_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+ glusterd_peerinfo_t *cur_peerinfo = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ xlator_t *this = NULL;
+ glusterd_friend_update_ctx_t ev_ctx = {{0}};
+ glusterd_conf_t *priv = NULL;
+ dict_t *friends = NULL;
+ char key[100] = {0,};
+ char *dup_buf = NULL;
+ int32_t count = 0;
+
+ GF_ASSERT (event);
+ cur_peerinfo = event->peerinfo;
+
+ this = THIS;
+ priv = this->private;
+
+ GF_ASSERT (priv);
+
+ ev_ctx.op = GD_FRIEND_UPDATE_ADD;
+
+ friends = dict_new ();
+ if (!friends)
+ goto out;
+
+ snprintf (key, sizeof (key), "op");
+ ret = dict_set_int32 (friends, key, ev_ctx.op);
+ if (ret)
+ goto out;
+
+ list_for_each_entry (peerinfo, &priv->peers, uuid_list) {
+ if (!glusterd_should_update_peer (peerinfo, cur_peerinfo))
+ continue;
+
+ count++;
+ snprintf (key, sizeof (key), "friend%d.uuid", count);
+ dup_buf = gf_strdup (uuid_utoa (peerinfo->uuid));
+ ret = dict_set_dynstr (friends, key, dup_buf);
+ if (ret)
+ goto out;
+ snprintf (key, sizeof (key), "friend%d.hostname", count);
+ ret = dict_set_str (friends, key, peerinfo->hostname);
+ if (ret)
+ goto out;
+ gf_log ("", GF_LOG_INFO, "Added uuid: %s, host: %s",
+ dup_buf, peerinfo->hostname);
+ }
+
+ ret = dict_set_int32 (friends, "count", count);
+ if (ret)
+ goto out;
+
+ list_for_each_entry (peerinfo, &priv->peers, uuid_list) {
+ if (!peerinfo->connected || !peerinfo->peer)
+ continue;
+
+ if (!glusterd_should_update_peer (peerinfo, cur_peerinfo))
+ continue;
+
+ ret = dict_set_static_ptr (friends, "peerinfo", peerinfo);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "failed to set peerinfo");
+ goto out;
+ }
+
+ proc = &peerinfo->peer->proctable[GLUSTERD_FRIEND_UPDATE];
+ if (proc->fn) {
+ ret = proc->fn (NULL, this, friends);
+ }
+ }
+
+ gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+
+out:
+ if (friends)
+ dict_unref (friends);
+
+ return ret;
+}
+
+static int
+glusterd_peer_detach_cleanup (glusterd_conf_t *priv)
+{
+ int ret = -1;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_volinfo_t *tmp_volinfo = NULL;
+
+ GF_ASSERT (priv);
+
+ list_for_each_entry_safe (volinfo,tmp_volinfo,
+ &priv->volumes, vol_list) {
+ if (!glusterd_friend_contains_vol_bricks (volinfo,
+ MY_UUID)) {
+ gf_log (THIS->name, GF_LOG_INFO,
+ "Deleting stale volume %s", volinfo->volname);
+ ret = glusterd_delete_volume (volinfo);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Error deleting stale volume");
+ goto out;
+ }
+ }
+ }
+ ret = 0;
+out:
+ gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+static int
+glusterd_ac_handle_friend_remove_req (glusterd_friend_sm_event_t *event,
+ void *ctx)
+{
+ int ret = 0;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_friend_req_ctx_t *ev_ctx = NULL;
+ glusterd_friend_sm_event_t *new_event = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ GF_ASSERT (ctx);
+ ev_ctx = ctx;
+ peerinfo = event->peerinfo;
+ GF_ASSERT (peerinfo);
+
+ priv = THIS->private;
+ GF_ASSERT (priv);
+
+ ret = glusterd_xfer_friend_remove_resp (ev_ctx->req, ev_ctx->hostname,
+ ev_ctx->port);
+
+ list_for_each_entry (peerinfo, &priv->peers, uuid_list) {
+
+ ret = glusterd_friend_sm_new_event (GD_FRIEND_EVENT_REMOVE_FRIEND,
+ &new_event);
+ if (ret)
+ goto out;
+
+ new_event->peerinfo = peerinfo;
+
+ ret = glusterd_friend_sm_inject_event (new_event);
+ if (ret)
+ goto out;
+ }
+ ret = glusterd_peer_detach_cleanup (priv);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "Peer detach cleanup was not successful");
+ ret = 0;
+ }
+out:
+ gf_log (THIS->name, GF_LOG_DEBUG, "Returning with %d", ret);
+
+ return ret;
+}
+
+static int
+glusterd_ac_friend_remove (glusterd_friend_sm_event_t *event, void *ctx)
+{
+ int ret = -1;
+
+ ret = glusterd_friend_remove_cleanup_vols (event->peerinfo->uuid);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING, "Volumes cleanup failed");
+
+ ret = glusterd_friend_cleanup (event->peerinfo);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Cleanup returned: %d", ret);
+ }
+
+ return 0;
+}
+
+/*static int
+glusterd_ac_none (void *ctx)
+{
+ int ret = 0;
+
+ gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+
+ return ret;
+}*/
+
+static int
+glusterd_ac_handle_friend_add_req (glusterd_friend_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+ uuid_t uuid;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_friend_req_ctx_t *ev_ctx = NULL;
+ glusterd_friend_update_ctx_t *new_ev_ctx = NULL;
+ glusterd_friend_sm_event_t *new_event = NULL;
+ glusterd_friend_sm_event_type_t event_type = GD_FRIEND_EVENT_NONE;
+ int status = 0;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+
+ GF_ASSERT (ctx);
+ ev_ctx = ctx;
+ uuid_copy (uuid, ev_ctx->uuid);
+ peerinfo = event->peerinfo;
+ GF_ASSERT (peerinfo);
+ uuid_copy (peerinfo->uuid, ev_ctx->uuid);
+
+ //Build comparison logic here.
+ ret = glusterd_compare_friend_data (ev_ctx->vols, &status,
+ peerinfo->hostname);
+ if (ret)
+ goto out;
+
+ if (GLUSTERD_VOL_COMP_RJT != status) {
+ event_type = GD_FRIEND_EVENT_LOCAL_ACC;
+ op_ret = 0;
+ } else {
+ event_type = GD_FRIEND_EVENT_LOCAL_RJT;
+ op_errno = GF_PROBE_VOLUME_CONFLICT;
+ op_ret = -1;
+ }
+
+ ret = glusterd_friend_sm_new_event (event_type, &new_event);
+
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Out of Memory");
+ }
+
+ new_event->peerinfo = peerinfo;
+
+ new_ev_ctx = GF_CALLOC (1, sizeof (*new_ev_ctx),
+ gf_gld_mt_friend_update_ctx_t);
+ if (!new_ev_ctx) {
+ ret = -1;
+ goto out;
+ }
+
+ uuid_copy (new_ev_ctx->uuid, ev_ctx->uuid);
+ new_ev_ctx->hostname = gf_strdup (ev_ctx->hostname);
+ new_ev_ctx->op = GD_FRIEND_UPDATE_ADD;
+
+ new_event->ctx = new_ev_ctx;
+
+ glusterd_friend_sm_inject_event (new_event);
+
+ ret = glusterd_xfer_friend_add_resp (ev_ctx->req, ev_ctx->hostname,
+ peerinfo->hostname, ev_ctx->port,
+ op_ret, op_errno);
+
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+
+ return ret;
+}
+
+static int
+glusterd_friend_sm_transition_state (glusterd_peerinfo_t *peerinfo,
+ glusterd_sm_t *state,
+ glusterd_friend_sm_event_type_t event_type)
+{
+
+ GF_ASSERT (state);
+ GF_ASSERT (peerinfo);
+
+ (void) glusterd_sm_tr_log_transition_add (&peerinfo->sm_log,
+ peerinfo->state.state,
+ state[event_type].next_state,
+ event_type);
+
+ peerinfo->state.state = state[event_type].next_state;
+ return 0;
+}
+
+
+glusterd_sm_t glusterd_state_default [] = {
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none},
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_probe},//EV_PROBE
+ {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_friend_add}, //EV_INIT_FRIEND_REQ
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, //EVENT_RCVD_ACC
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, //EVENT_RCVD_LOCAL_ACC
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, //EVENT_RCVD_RJT
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, //EVENT_RCVD_LOCAL_RJT
+ {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_handle_friend_add_req}, //EVENT_RCV_FRIEND_REQ
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_send_friend_remove_req}, //EV_INIT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, //EVENT_RCVD_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_remove}, //EVENT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_probe}, //EVENT_CONNECTED
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, //EVENT_MAX
+};
+
+glusterd_sm_t glusterd_state_probe_rcvd [] = {
+ {GD_FRIEND_STATE_PROBE_RCVD, glusterd_ac_none},
+ {GD_FRIEND_STATE_PROBE_RCVD, glusterd_ac_none}, //EV_PROBE
+ {GD_FRIEND_STATE_PROBE_RCVD, glusterd_ac_none}, //EV_INIT_FRIEND_REQ
+ {GD_FRIEND_STATE_PROBE_RCVD, glusterd_ac_none}, //EVENT_RCVD_ACC
+ {GD_FRIEND_STATE_PROBE_RCVD, glusterd_ac_none}, //EVENT_RCVD_LOCAL_ACC
+ {GD_FRIEND_STATE_PROBE_RCVD, glusterd_ac_none}, //EVENT_RCVD_RJT
+ {GD_FRIEND_STATE_PROBE_RCVD, glusterd_ac_none}, //EVENT_RCVD_LOCAL_RJT
+ {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_handle_friend_add_req}, //EVENT_RCV_FRIEND_REQ
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_send_friend_remove_req}, //EV_INIT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, //EVENT_RCVD_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_remove}, //EVENT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, //EVENT_CONNECTED
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, //EVENT_MAX
+};
+
+glusterd_sm_t glusterd_state_connected_rcvd [] = {
+ {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none},
+ {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, //EV_PROBE
+ {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, //EV_INIT_FRIEND_REQ
+ {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, //EVENT_RCVD_ACC
+ {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_reverse_probe_begin}, //EVENT_RCVD_LOCAL_ACC
+ {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, //EVENT_RCVD_RJT
+ {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, //EVENT_RCVD_LOCAL_RJT
+ {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_handle_friend_add_req}, //EVENT_RCV_FRIEND_REQ
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_send_friend_remove_req}, //EV_INIT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, //EVENT_RCVD_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_remove}, //EVENT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, //EVENT_CONNECTED
+ {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, //EVENT_MAX
+};
+
+glusterd_sm_t glusterd_state_connected_accepted [] = {
+ {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none},
+ {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_friend_probe}, //EV_PROBE
+ {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_friend_add}, //EV_INIT_FRIEND_REQ
+ {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none}, //EVENT_RCVD_ACC
+ {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none}, //EVENT_RCVD_LOCAL_ACC
+ {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none}, //EVENT_RCVD_RJT
+ {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none}, //EVENT_RCVD_LOCAL_RJT
+ {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none}, //EVENT_RCV_FRIEND_REQ
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_send_friend_remove_req}, //EV_INIT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, //EVENT_RCVD_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_remove}, //EVENT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none}, //EVENT_CONNECTED
+ {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none}, //EVENT_MAX
+};
+
+glusterd_sm_t glusterd_state_req_sent [] = {
+ {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none}, //EVENT_NONE,
+ {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none}, //EVENT_PROBE,
+ {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none}, //EVENT_INIT_FRIEND_REQ,
+ {GD_FRIEND_STATE_REQ_ACCEPTED, glusterd_ac_none}, //EVENT_RCVD_ACC
+ {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none}, //EVENT_RCVD_LOCAL_ACC
+ {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, //EVENT_RCVD_RJT
+ {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none}, //EVENT_RCVD_LOCAL_RJT
+ {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_handle_friend_add_req}, //EVENT_RCV_FRIEND_REQ
+ {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_send_friend_remove_req}, //EVENT_INIT_REMOVE_FRIEND,
+ {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none}, //EVENT_RCVD_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_remove}, //EVENT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none},//EVENT_CONNECTED
+ {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none},//EVENT_MAX
+};
+
+glusterd_sm_t glusterd_state_req_rcvd [] = {
+ {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_none}, //EVENT_NONE,
+ {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_none}, //EVENT_PROBE,
+ {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none}, //EVENT_INIT_FRIEND_REQ,
+ {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_none}, //EVENT_RCVD_ACC
+ {GD_FRIEND_STATE_REQ_ACCEPTED, glusterd_ac_none}, //EVENT_RCVD_LOCAL_ACC
+ {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_none}, //EVENT_RCVD_RJT
+ {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, //EVENT_RCVD_LOCAL_RJT
+ {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_none}, //EVENT_RCV_FRIEND_REQ
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_send_friend_remove_req}, //EVENT_INIT_REMOVE_FRIEND,
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_handle_friend_remove_req}, //EVENT_RCVD_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_remove}, //EVENT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none},//EVENT_CONNECTED
+ {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_none},//EVENT_MAX
+};
+
+glusterd_sm_t glusterd_state_befriended [] = {
+ {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_none}, //EVENT_NONE,
+ {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_none}, //EVENT_PROBE,
+ {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_none}, //EVENT_INIT_FRIEND_REQ,
+ {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_none}, //EVENT_RCVD_ACC
+ {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_send_friend_update}, //EVENT_RCVD_LOCAL_ACC
+ {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, //EVENT_RCVD_RJT
+ {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, //EVENT_RCVD_LOCAL_RJT
+ {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_handle_friend_add_req}, //EVENT_RCV_FRIEND_REQ
+ {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_send_friend_remove_req}, //EVENT_INIT_REMOVE_FRIEND,
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_handle_friend_remove_req}, //EVENT_RCVD_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_remove}, //EVENT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_friend_add},//EVENT_CONNECTED
+ {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_none},//EVENT_MAX
+};
+
+glusterd_sm_t glusterd_state_req_sent_rcvd [] = {
+ {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none}, //EVENT_NONE,
+ {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none}, //EVENT_PROBE,
+ {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none}, //EVENT_INIT_FRIEND_REQ,
+ {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_send_friend_update}, //EVENT_RCVD_ACC
+ {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none}, //EVENT_RCVD_LOCAL_ACC
+ {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, //EVENT_RCVD_RJT
+ {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none}, //EVENT_RCVD_LOCAL_RJT
+ {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none}, //EVENT_RCV_FRIEND_REQ
+ {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_send_friend_remove_req}, //EVENT_INIT_REMOVE_FRIEND,
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_handle_friend_remove_req}, //EVENT_RCVD_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_remove}, //EVENT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none},//EVENT_CONNECTED
+ {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none},//EVENT_MAX
+};
+
+glusterd_sm_t glusterd_state_rejected [] = {
+ {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, //EVENT_NONE,
+ {GD_FRIEND_STATE_REJECTED, glusterd_ac_friend_probe}, //EVENT_PROBE,
+ {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_friend_add}, //EVENT_INIT_FRIEND_REQ,
+ {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_none}, //EVENT_RCVD_ACC
+ {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_none}, //EVENT_RCVD_LOCAL_ACC
+ {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, //EVENT_RCVD_RJT
+ {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, //EVENT_RCVD_LOCAL_RJT
+ {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_handle_friend_add_req}, //EVENT_RCV_FRIEND_REQ
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_send_friend_remove_req}, //EVENT_INIT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_handle_friend_remove_req}, //EVENT_RCVD_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_remove}, //EVENT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_REJECTED, glusterd_ac_friend_add},//EVENT_CONNECTED
+ {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_none},//EVENT_MAX
+};
+
+glusterd_sm_t glusterd_state_req_accepted [] = {
+ {GD_FRIEND_STATE_REQ_ACCEPTED, glusterd_ac_none}, //EVENT_NONE,
+ {GD_FRIEND_STATE_REQ_ACCEPTED, glusterd_ac_none}, //EVENT_PROBE,
+ {GD_FRIEND_STATE_REQ_ACCEPTED, glusterd_ac_none}, //EVENT_INIT_FRIEND_REQ,
+ {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_send_friend_update}, //EVENT_RCVD_ACC
+ {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_send_friend_update}, //EVENT_RCVD_LOCAL_ACC
+ {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, //EVENT_RCVD_RJT
+ {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, //EVENT_RCVD_LOCAL_RJT
+ {GD_FRIEND_STATE_REQ_ACCEPTED, glusterd_ac_handle_friend_add_req}, //EVENT_RCV_FRIEND_REQ
+ {GD_FRIEND_STATE_REQ_ACCEPTED, glusterd_ac_send_friend_remove_req}, //EVENT_INIT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_handle_friend_remove_req}, //EVENT_RCVD_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_remove}, //EVENT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_reverse_probe_begin},//EVENT_CONNECTED
+ {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none},//EVENT_MAX
+};
+
+glusterd_sm_t glusterd_state_unfriend_sent [] = {
+ {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_none}, //EVENT_NONE,
+ {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_error}, //EVENT_PROBE,
+ {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_none}, //EVENT_INIT_FRIEND_REQ,
+ {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_none}, //EVENT_RCVD_ACC
+ {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_none}, //EVENT_RCVD_LOCAL_ACC
+ {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_error}, //EVENT_RCVD_RJT
+ {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_error}, //EVENT_RCVD_LOCAL_RJT
+ {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_error}, //EVENT_RCV_FRIEND_REQ
+ {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_none}, //EVENT_INIT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_none}, //EVENT_RCVD_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_remove}, //EVENT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_none},//EVENT_CONNECTED
+ {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_none},//EVENT_MAX
+};
+
+glusterd_sm_t *glusterd_friend_state_table [] = {
+ glusterd_state_default,
+ glusterd_state_req_sent,
+ glusterd_state_req_rcvd,
+ glusterd_state_befriended,
+ glusterd_state_req_accepted,
+ glusterd_state_req_sent_rcvd,
+ glusterd_state_rejected,
+ glusterd_state_unfriend_sent,
+ glusterd_state_probe_rcvd,
+ glusterd_state_connected_rcvd,
+ glusterd_state_connected_accepted
+};
+
+int
+glusterd_friend_sm_new_event (glusterd_friend_sm_event_type_t event_type,
+ glusterd_friend_sm_event_t **new_event)
+{
+ glusterd_friend_sm_event_t *event = NULL;
+
+ GF_ASSERT (new_event);
+ GF_ASSERT (GD_FRIEND_EVENT_NONE <= event_type &&
+ GD_FRIEND_EVENT_MAX > event_type);
+
+ event = GF_CALLOC (1, sizeof (*event), gf_gld_mt_friend_sm_event_t);
+
+ if (!event)
+ return -1;
+
+ *new_event = event;
+ event->event = event_type;
+ INIT_LIST_HEAD (&event->list);
+
+ return 0;
+}
+
+int
+glusterd_friend_sm_inject_event (glusterd_friend_sm_event_t *event)
+{
+ GF_ASSERT (event);
+ gf_log ("glusterd", GF_LOG_DEBUG, "Enqueue event: '%s'",
+ glusterd_friend_sm_event_name_get (event->event));
+ list_add_tail (&event->list, &gd_friend_sm_queue);
+
+ return 0;
+}
+
+void
+glusterd_destroy_friend_event_context (glusterd_friend_sm_event_t *event)
+{
+ if (!event)
+ return;
+
+ switch (event->event) {
+ case GD_FRIEND_EVENT_RCVD_FRIEND_REQ:
+ case GD_FRIEND_EVENT_RCVD_REMOVE_FRIEND:
+ glusterd_destroy_friend_req_ctx (event->ctx);
+ break;
+ case GD_FRIEND_EVENT_LOCAL_ACC:
+ case GD_FRIEND_EVENT_LOCAL_RJT:
+ case GD_FRIEND_EVENT_RCVD_ACC:
+ case GD_FRIEND_EVENT_RCVD_RJT:
+ glusterd_destroy_friend_update_ctx (event->ctx);
+ break;
+ default:
+ break;
+ }
+}
+
+gf_boolean_t
+gd_does_peer_affect_quorum (glusterd_friend_sm_state_t old_state,
+ glusterd_friend_sm_event_type_t event_type,
+ glusterd_peerinfo_t *peerinfo)
+{
+ gf_boolean_t affects = _gf_false;
+
+ //When glusterd comes up with friends in BEFRIENDED state in store,
+ //wait until compare-data happens.
+ if ((old_state == GD_FRIEND_STATE_BEFRIENDED) &&
+ (event_type != GD_FRIEND_EVENT_RCVD_ACC) &&
+ (event_type != GD_FRIEND_EVENT_LOCAL_ACC))
+ goto out;
+ if ((peerinfo->state.state == GD_FRIEND_STATE_BEFRIENDED)
+ && peerinfo->connected) {
+ affects = _gf_true;
+ }
+out:
+ return affects;
+}
+
+int
+glusterd_friend_sm ()
+{
+ glusterd_friend_sm_event_t *event = NULL;
+ glusterd_friend_sm_event_t *tmp = NULL;
+ int ret = -1;
+ glusterd_friend_sm_ac_fn handler = NULL;
+ glusterd_sm_t *state = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_friend_sm_event_type_t event_type = 0;
+ gf_boolean_t is_await_conn = _gf_false;
+ gf_boolean_t quorum_action = _gf_false;
+ glusterd_friend_sm_state_t old_state = GD_FRIEND_STATE_DEFAULT;
+
+ while (!list_empty (&gd_friend_sm_queue)) {
+ list_for_each_entry_safe (event, tmp, &gd_friend_sm_queue, list) {
+
+ list_del_init (&event->list);
+ event_type = event->event;
+ peerinfo = event->peerinfo;
+ if (!peerinfo) {
+ gf_log ("glusterd", GF_LOG_CRITICAL, "Received"
+ " event %s with empty peer info",
+ glusterd_friend_sm_event_name_get (event_type));
+
+ GF_FREE (event);
+ continue;
+ }
+ gf_log ("", GF_LOG_DEBUG, "Dequeued event of type: '%s'",
+ glusterd_friend_sm_event_name_get (event_type));
+
+
+ old_state = peerinfo->state.state;
+ state = glusterd_friend_state_table[peerinfo->state.state];
+
+ GF_ASSERT (state);
+
+ handler = state[event_type].handler;
+ GF_ASSERT (handler);
+
+ ret = handler (event, event->ctx);
+ if (ret == GLUSTERD_CONNECTION_AWAITED) {
+ is_await_conn = _gf_true;
+ ret = 0;
+ }
+
+ if (ret) {
+ gf_log ("glusterd", GF_LOG_ERROR, "handler returned: "
+ "%d", ret);
+ glusterd_destroy_friend_event_context (event);
+ GF_FREE (event);
+ continue;
+ }
+
+ if ((GD_FRIEND_EVENT_REMOVE_FRIEND == event_type) ||
+ (GD_FRIEND_EVENT_INIT_REMOVE_FRIEND == event_type)){
+ glusterd_destroy_friend_event_context (event);
+ GF_FREE (event);
+ continue;
+ }
+
+ ret = glusterd_friend_sm_transition_state (peerinfo,
+ state, event_type);
+
+ if (ret) {
+ gf_log ("glusterd", GF_LOG_ERROR, "Unable to transition"
+ " state from '%s' to '%s' for event '%s'",
+ glusterd_friend_sm_state_name_get(peerinfo->state.state),
+ glusterd_friend_sm_state_name_get(state[event_type].next_state),
+ glusterd_friend_sm_event_name_get(event_type));
+ goto out;
+ }
+
+ if (gd_does_peer_affect_quorum (old_state, event_type,
+ peerinfo)) {
+ peerinfo->quorum_contrib = QUORUM_UP;
+ if (peerinfo->quorum_action) {
+ peerinfo->quorum_action = _gf_false;
+ quorum_action = _gf_true;
+ }
+ }
+
+ ret = glusterd_store_peerinfo (peerinfo);
+
+ glusterd_destroy_friend_event_context (event);
+ GF_FREE (event);
+ if (is_await_conn)
+ break;
+ }
+ if (is_await_conn)
+ break;
+ }
+
+ ret = 0;
+out:
+ if (quorum_action) {
+ /* When glusterd is restarted, it needs to wait until the 'friends' view
+ * of the volumes settle, before it starts any of the internal daemons.
+ *
+ * Every friend that was part of the cluster, would send its
+ * cluster-view, 'our' way. For every friend, who belongs to
+ * a partition which has a different cluster-view from our
+ * partition, we may update our cluster-view. For subsequent
+ * friends from that partition would agree with us, if the first
+ * friend wasn't rejected. For every first friend, whom we agreed with,
+ * we would need to start internal daemons/bricks belonging to the
+ * new volumes.
+ * glusterd_spawn_daemons calls functions that are idempotent. ie,
+ * the functions spawn process(es) only if they are not started yet.
+ *
+ * */
+ glusterd_spawn_daemons (NULL);
+ glusterd_do_quorum_action ();
+ }
+ return ret;
+}
+
+
+int
+glusterd_friend_sm_init ()
+{
+ INIT_LIST_HEAD (&gd_friend_sm_queue);
+ return 0;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.h b/xlators/mgmt/glusterd/src/glusterd-sm.h
new file mode 100644
index 000000000..b9bedbe69
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-sm.h
@@ -0,0 +1,217 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _GLUSTERD_SM_H_
+#define _GLUSTERD_SM_H_
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <pthread.h>
+#include "uuid.h"
+
+#include "rpc-clnt.h"
+#include "glusterfs.h"
+#include "xlator.h"
+#include "logging.h"
+#include "call-stub.h"
+#include "fd.h"
+#include "byte-order.h"
+//#include "glusterd.h"
+#include "rpcsvc.h"
+#include "store.h"
+
+typedef enum gd_quorum_contribution_ {
+ QUORUM_NONE,
+ QUORUM_WAITING,
+ QUORUM_DOWN,
+ QUORUM_UP
+} gd_quorum_contrib_t;
+
+typedef enum gd_quorum_status_ {
+ QUORUM_UNKNOWN,
+ QUORUM_NOT_APPLICABLE,
+ QUORUM_MEETS,
+ QUORUM_DOES_NOT_MEET
+} gd_quorum_status_t;
+
+typedef enum glusterd_friend_sm_state_ {
+ GD_FRIEND_STATE_DEFAULT = 0,
+ GD_FRIEND_STATE_REQ_SENT,
+ GD_FRIEND_STATE_REQ_RCVD,
+ GD_FRIEND_STATE_BEFRIENDED,
+ GD_FRIEND_STATE_REQ_ACCEPTED,
+ GD_FRIEND_STATE_REQ_SENT_RCVD,
+ GD_FRIEND_STATE_REJECTED,
+ GD_FRIEND_STATE_UNFRIEND_SENT,
+ GD_FRIEND_STATE_PROBE_RCVD,
+ GD_FRIEND_STATE_CONNECTED_RCVD,
+ GD_FRIEND_STATE_CONNECTED_ACCEPTED,
+ GD_FRIEND_STATE_MAX
+} glusterd_friend_sm_state_t;
+
+typedef struct glusterd_peer_state_info_ {
+ glusterd_friend_sm_state_t state;
+ struct timeval transition_time;
+}glusterd_peer_state_info_t;
+
+typedef struct glusterd_peer_hostname_ {
+ char *hostname;
+ struct list_head hostname_list;
+}glusterd_peer_hostname_t;
+
+typedef struct glusterd_sm_transition_ {
+ int old_state;
+ int event;
+ int new_state;
+ time_t time;
+} glusterd_sm_transition_t;
+
+typedef struct glusterd_sm_tr_log_ {
+ glusterd_sm_transition_t *transitions;
+ size_t current;
+ size_t size;
+ size_t count;
+ char* (*state_name_get) (int);
+ char* (*event_name_get) (int);
+} glusterd_sm_tr_log_t;
+
+struct glusterd_peerinfo_ {
+ uuid_t uuid;
+ char uuid_str[50]; /* Retrieve this using
+ * gd_peer_uuid_str ()
+ */
+ glusterd_peer_state_info_t state;
+ char *hostname;
+ int port;
+ struct list_head uuid_list;
+ struct list_head op_peers_list;
+ struct rpc_clnt *rpc;
+ rpc_clnt_prog_t *mgmt;
+ rpc_clnt_prog_t *peer;
+ rpc_clnt_prog_t *mgmt_v3;
+ int connected;
+ gf_store_handle_t *shandle;
+ glusterd_sm_tr_log_t sm_log;
+ gf_boolean_t quorum_action;
+ gd_quorum_contrib_t quorum_contrib;
+ gf_boolean_t locked;
+};
+
+typedef struct glusterd_peerinfo_ glusterd_peerinfo_t;
+
+typedef enum glusterd_ev_gen_mode_ {
+ GD_MODE_OFF,
+ GD_MODE_ON,
+ GD_MODE_SWITCH_ON
+} glusterd_ev_gen_mode_t;
+
+typedef struct glusterd_peer_ctx_args_ {
+ rpcsvc_request_t *req;
+ glusterd_ev_gen_mode_t mode;
+ dict_t *dict;
+} glusterd_peerctx_args_t;
+
+typedef struct glusterd_peer_ctx_ {
+ glusterd_peerctx_args_t args;
+ glusterd_peerinfo_t *peerinfo;
+ char *errstr;
+} glusterd_peerctx_t;
+
+typedef enum glusterd_friend_sm_event_type_ {
+ GD_FRIEND_EVENT_NONE = 0,
+ GD_FRIEND_EVENT_PROBE,
+ GD_FRIEND_EVENT_INIT_FRIEND_REQ,
+ GD_FRIEND_EVENT_RCVD_ACC,
+ GD_FRIEND_EVENT_LOCAL_ACC,
+ GD_FRIEND_EVENT_RCVD_RJT,
+ GD_FRIEND_EVENT_LOCAL_RJT,
+ GD_FRIEND_EVENT_RCVD_FRIEND_REQ,
+ GD_FRIEND_EVENT_INIT_REMOVE_FRIEND,
+ GD_FRIEND_EVENT_RCVD_REMOVE_FRIEND,
+ GD_FRIEND_EVENT_REMOVE_FRIEND,
+ GD_FRIEND_EVENT_CONNECTED,
+ GD_FRIEND_EVENT_MAX
+} glusterd_friend_sm_event_type_t;
+
+
+typedef enum glusterd_friend_update_op_ {
+ GD_FRIEND_UPDATE_NONE = 0,
+ GD_FRIEND_UPDATE_ADD,
+ GD_FRIEND_UPDATE_DEL,
+} glusterd_friend_update_op_t;
+
+
+struct glusterd_friend_sm_event_ {
+ struct list_head list;
+ glusterd_peerinfo_t *peerinfo;
+ void *ctx;
+ glusterd_friend_sm_event_type_t event;
+};
+
+typedef struct glusterd_friend_sm_event_ glusterd_friend_sm_event_t;
+
+typedef int (*glusterd_friend_sm_ac_fn) (glusterd_friend_sm_event_t *, void *);
+
+typedef struct glusterd_sm_ {
+ glusterd_friend_sm_state_t next_state;
+ glusterd_friend_sm_ac_fn handler;
+} glusterd_sm_t;
+
+typedef struct glusterd_friend_req_ctx_ {
+ uuid_t uuid;
+ char *hostname;
+ rpcsvc_request_t *req;
+ int port;
+ dict_t *vols;
+} glusterd_friend_req_ctx_t;
+
+typedef struct glusterd_friend_update_ctx_ {
+ uuid_t uuid;
+ char *hostname;
+ int op;
+} glusterd_friend_update_ctx_t;
+
+typedef struct glusterd_probe_ctx_ {
+ char *hostname;
+ rpcsvc_request_t *req;
+ int port;
+ dict_t *dict;
+} glusterd_probe_ctx_t;
+int
+glusterd_friend_sm_new_event (glusterd_friend_sm_event_type_t event_type,
+ glusterd_friend_sm_event_t **new_event);
+int
+glusterd_friend_sm_inject_event (glusterd_friend_sm_event_t *event);
+
+int
+glusterd_friend_sm_init ();
+
+int
+glusterd_friend_sm ();
+
+void
+glusterd_destroy_probe_ctx (glusterd_probe_ctx_t *ctx);
+
+void
+glusterd_destroy_friend_req_ctx (glusterd_friend_req_ctx_t *ctx);
+
+char*
+glusterd_friend_sm_state_name_get (int state);
+
+char*
+glusterd_friend_sm_event_name_get (int event);
+
+int
+glusterd_broadcast_friend_delete (char *hostname, uuid_t uuid);
+void
+glusterd_destroy_friend_update_ctx (glusterd_friend_update_ctx_t *ctx);
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot.c b/xlators/mgmt/glusterd/src/glusterd-snapshot.c
new file mode 100644
index 000000000..9b811cd05
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-snapshot.c
@@ -0,0 +1,5590 @@
+/*
+ Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <inttypes.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/resource.h>
+#include <sys/statvfs.h>
+#include <sys/mount.h>
+
+#include "globals.h"
+#include "compat.h"
+#include "protocol-common.h"
+#include "xlator.h"
+#include "logging.h"
+#include "timer.h"
+#include "glusterd-mem-types.h"
+#include "glusterd.h"
+#include "glusterd-sm.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-utils.h"
+#include "glusterd-store.h"
+#include "run.h"
+#include "glusterd-volgen.h"
+#include "glusterd-mgmt.h"
+#include "glusterd-syncop.h"
+
+#include "syscall.h"
+#include "cli1-xdr.h"
+#include "xdr-generic.h"
+
+#ifdef GF_LINUX_HOST_OS
+#include <mntent.h>
+#endif
+
+char snap_mount_folder[PATH_MAX];
+
+static int32_t
+glusterd_find_missed_snap (dict_t *rsp_dict, glusterd_volinfo_t *vol,
+ char *snap_uuid, struct list_head *peers,
+ int32_t op);
+
+/* This function will restore a snapshot volumes
+ *
+ * @param dict dictionary containing snapshot restore request
+ * @param op_errstr In case of any failure error message will be returned
+ * in this variable
+ * @return Negative value on Failure and 0 in success
+ */
+int
+glusterd_snapshot_restore (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+{
+ int ret = -1;
+ char *volname = NULL;
+ char *snapname = NULL;
+ xlator_t *this = NULL;
+ glusterd_volinfo_t *snap_volinfo = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_snap_t *snap = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (op_errstr);
+ GF_ASSERT (rsp_dict);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = dict_get_str (dict, "snapname", &snapname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get "
+ "snap name");
+ goto out;
+ }
+
+ snap = glusterd_find_snap_by_name (snapname);
+ if (NULL == snap) {
+ ret = gf_asprintf (op_errstr, "Snap (%s) not found",
+ snapname);
+ if (ret < 0) {
+ goto out;
+ }
+ gf_log (this->name, GF_LOG_ERROR, "%s", *op_errstr);
+ ret = -1;
+ goto out;
+ }
+
+ /* TODO : As of now there is only volume in snapshot.
+ * Change this when multiple volume snapshot is introduced
+ */
+ snap_volinfo = list_entry (snap->volumes.next, glusterd_volinfo_t,
+ vol_list);
+
+ ret = glusterd_volinfo_find (snap_volinfo->parent_volname, &volinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not get volinfo of "
+ "%s", snap_volinfo->parent_volname);
+ goto out;
+ }
+
+ if (is_origin_glusterd (dict) == _gf_true) {
+ /* From origin glusterd check if *
+ * any peers with snap bricks is down */
+ ret = glusterd_find_missed_snap (rsp_dict, snap_volinfo,
+ snap_volinfo->volname,
+ &priv->peers,
+ GF_SNAP_OPTION_TYPE_RESTORE);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to find missed snap restores");
+ goto out;
+ }
+ }
+
+ ret = gd_restore_snap_volume (rsp_dict, volinfo, snap_volinfo);
+ if (ret) {
+ /* No need to update op_errstr because it is assumed
+ * that the called function will do that in case of
+ * failure.
+ */
+ gf_log (this->name, GF_LOG_ERROR, "Failed to restore "
+ "snap for %s volume", volname);
+ goto out;
+ }
+
+ ret = 0;
+
+ /* TODO: Need to check if we need to delete the snap after the
+ * operation is successful or not. Also need to persist the state
+ * of restore operation in the store.
+ */
+out:
+ return ret;
+}
+
+/* This function is called before actual restore is taken place. This function
+ * will validate whether the snapshot volumes are ready to be restored or not.
+ *
+ * @param dict dictionary containing snapshot restore request
+ * @param op_errstr In case of any failure error message will be returned
+ * in this variable
+ * @param rsp_dict response dictionary
+ * @return Negative value on Failure and 0 in success
+ */
+int
+glusterd_snapshot_restore_prevalidate (dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict)
+{
+ int ret = -1;
+ int32_t i = 0;
+ int32_t volcount = 0;
+ gf_boolean_t snap_restored = _gf_false;
+ char key[PATH_MAX] = {0, };
+ char *volname = NULL;
+ char *snapname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_snap_t *snap = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (op_errstr);
+ GF_ASSERT (rsp_dict);
+
+ ret = dict_get_str (dict, "snapname", &snapname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get "
+ "snap name");
+ goto out;
+ }
+
+ snap = glusterd_find_snap_by_name (snapname);
+ if (NULL == snap) {
+ ret = gf_asprintf (op_errstr, "Snap (%s) not found",
+ snapname);
+ if (ret < 0) {
+ goto out;
+ }
+ gf_log (this->name, GF_LOG_ERROR, "%s", *op_errstr);
+ ret = -1;
+ goto out;
+ }
+
+ snap_restored = snap->snap_restored;
+
+ if (snap_restored) {
+ ret = gf_asprintf (op_errstr, "Snap (%s) is already "
+ "restored", snapname);
+ if (ret < 0) {
+ goto out;
+ }
+ gf_log (this->name, GF_LOG_ERROR, "%s", *op_errstr);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_str (rsp_dict, "snapname", snapname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set "
+ "snap name");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "volcount", &volcount);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get volume count");
+ goto out;
+ }
+
+ /* Snapshot restore will only work if all the volumes,
+ that are part of the snapshot, are stopped. */
+ for (i = 1; i <= volcount; ++i) {
+ snprintf (key, sizeof (key), "volname%d", i);
+ ret = dict_get_str (dict, key, &volname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to "
+ "get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ ret = gf_asprintf (op_errstr, "Volume (%s) not found",
+ volname);
+ if (ret < 0) {
+ goto out;
+ }
+ gf_log (this->name, GF_LOG_ERROR, "%s", *op_errstr);
+ ret = -1;
+ goto out;
+ }
+
+ if (glusterd_is_volume_started (volinfo)) {
+ ret = gf_asprintf (op_errstr, "Volume (%s) has been "
+ "started. Volume needs to be stopped before restoring "
+ "a snapshot.", volname);
+ if (ret < 0) {
+ goto out;
+ }
+ gf_log (this->name, GF_LOG_ERROR, "%s", *op_errstr);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+snap_max_hard_limits_validate (dict_t *dict, char *volname,
+ uint64_t value, char **op_errstr)
+{
+ char err_str[PATH_MAX] = "";
+ glusterd_conf_t *conf = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ int ret = -1;
+ uint64_t max_limit = GLUSTERD_SNAPS_MAX_HARD_LIMIT;
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (op_errstr);
+
+ conf = this->private;
+
+ GF_ASSERT (conf);
+
+ if (volname) {
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (!ret) {
+ if (volinfo->is_snap_volume) {
+ ret = -1;
+ snprintf (err_str, PATH_MAX,
+ "%s is a snap volume. Configuring "
+ "snap-max-hard-limit for a snap "
+ "volume is prohibited.", volname);
+ goto out;
+ }
+ }
+ }
+
+ if (value) {
+ /* Max limit for the system is GLUSTERD_SNAPS_MAX_HARD_LIMIT
+ * but max limit for a volume is conf->snap_max_hard_limit.
+ */
+ if (volname) {
+ max_limit = conf->snap_max_hard_limit;
+ } else {
+ max_limit = GLUSTERD_SNAPS_MAX_HARD_LIMIT;
+ }
+ }
+
+ if ((value < 0) || (value > max_limit)) {
+ ret = -1;
+ snprintf (err_str, PATH_MAX, "Invalid snap-max-hard-limit"
+ "%"PRIu64 ". Expected range 0 - %"PRIu64,
+ value, max_limit);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret) {
+ *op_errstr = gf_strdup (err_str);
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ }
+ return ret;
+}
+
+int
+glusterd_snapshot_config_prevalidate (dict_t *dict, char **op_errstr)
+{
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+ int ret = -1;
+ int config_command = 0;
+ char err_str[PATH_MAX] = {0,};
+ glusterd_conf_t *conf = NULL;
+ uint64_t value = 0;
+ uint64_t hard_limit = 0;
+ uint64_t soft_limit = 0;
+ gf_loglevel_t loglevel = GF_LOG_ERROR;
+ uint64_t max_limit = GLUSTERD_SNAPS_MAX_HARD_LIMIT;
+
+ this = THIS;
+
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (op_errstr);
+
+ conf = this->private;
+
+ GF_ASSERT (conf);
+
+ ret = dict_get_int32 (dict, "config-command", &config_command);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str),
+ "failed to get config-command type");
+ goto out;
+ }
+
+ ret = dict_get_uint64 (dict, "snap-max-hard-limit", &hard_limit);
+
+ ret = dict_get_uint64 (dict, "snap-max-soft-limit", &soft_limit);
+
+ ret = dict_get_str (dict, "volname", &volname);
+
+ if (volname) {
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str),
+ "Volume %s does not exist.", volname);
+ goto out;
+ }
+ }
+
+ switch (config_command) {
+ case GF_SNAP_CONFIG_TYPE_SET:
+ if (hard_limit) {
+ /* Validations for snap-max-hard-limits */
+ ret = snap_max_hard_limits_validate (dict, volname,
+ hard_limit, op_errstr);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "snap-max-hard-limit validation "
+ "failed.");
+ goto out;
+ }
+ }
+
+ if (soft_limit) {
+ max_limit = GLUSTERD_SNAPS_MAX_SOFT_LIMIT_PERCENT;
+ if ((soft_limit < 0) || (soft_limit > max_limit)) {
+ ret = -1;
+ snprintf (err_str, PATH_MAX, "Invalid "
+ "snap-max-soft-limit ""%"
+ PRIu64 ". Expected range 0 - %"PRIu64,
+ value, max_limit);
+ goto out;
+ }
+ break;
+ }
+ default:
+ break;
+ }
+
+ ret = 0;
+out:
+
+ if (ret && err_str[0] != '\0') {
+ gf_log (this->name, loglevel, "%s", err_str);
+ *op_errstr = gf_strdup (err_str);
+ }
+
+ return ret;
+}
+
+int
+glusterd_snap_create_pre_val_use_rsp_dict (dict_t *dst, dict_t *src)
+{
+ char *snap_brick_dir = NULL;
+ char *snap_device = NULL;
+ char *tmpstr = NULL;
+ char key[PATH_MAX] = "";
+ char snapbrckcnt[PATH_MAX] = "";
+ char snapbrckord[PATH_MAX] = "";
+ int ret = -1;
+ int64_t i = -1;
+ int64_t j = -1;
+ int64_t volume_count = 0;
+ int64_t brick_count = 0;
+ int64_t brick_order = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (dst);
+ GF_ASSERT (src);
+
+ ret = dict_get_int64 (src, "volcount", &volume_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to "
+ "get the volume count");
+ goto out;
+ }
+
+ for (i = 0; i < volume_count; i++) {
+ memset (snapbrckcnt, '\0', sizeof(snapbrckcnt));
+ ret = snprintf (snapbrckcnt, sizeof(snapbrckcnt) - 1,
+ "vol%ld_brickcount", i+1);
+ ret = dict_get_int64 (src, snapbrckcnt, &brick_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "No bricks for this volume in this dict");
+ continue;
+ }
+
+ for (j = 0; j < brick_count; j++) {
+ /* Fetching data from source dict */
+ snprintf (key, sizeof(key) - 1,
+ "vol%ld.brickdir%ld", i+1, j);
+
+ ret = dict_get_ptr (src, key,
+ (void **)&snap_brick_dir);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Unable to fetch %s", key);
+ continue;
+ }
+
+ snprintf (key, sizeof(key) - 1,
+ "vol%ld.brick_snapdevice%ld", i+1, j);
+
+ ret = dict_get_ptr (src, key,
+ (void **)&snap_device);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to fetch snap_device");
+ goto out;
+ }
+
+ snprintf (snapbrckord, sizeof(snapbrckord) - 1,
+ "vol%ld.brick%ld.order", i+1, j);
+
+ ret = dict_get_int64 (src, snapbrckord, &brick_order);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to get brick order");
+ goto out;
+ }
+
+ /* Adding the data in the dst dict */
+ snprintf (key, sizeof(key) - 1,
+ "vol%ld.brickdir%ld", i+1, brick_order);
+
+ tmpstr = gf_strdup (snap_brick_dir);
+ if (!tmpstr) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out Of Memory");
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_dynstr (dst, key, tmpstr);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set %s", key);
+ GF_FREE (tmpstr);
+ goto out;
+ }
+
+ snprintf (key, sizeof(key) - 1,
+ "vol%ld.brick_snapdevice%ld",
+ i+1, brick_order);
+
+ tmpstr = gf_strdup (snap_device);
+ if (!tmpstr) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_dynstr (dst, key, tmpstr);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set %s", key);
+ GF_FREE (tmpstr);
+ goto out;
+ }
+
+ }
+ }
+
+ ret = 0;
+out:
+
+ gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_snap_pre_validate_use_rsp_dict (dict_t *dst, dict_t *src)
+{
+ int ret = -1;
+ int32_t snap_command = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ if (!dst || !src) {
+ gf_log (this->name, GF_LOG_ERROR, "Source or Destination "
+ "dict is empty.");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dst, "type", &snap_command);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "unable to get the type of "
+ "the snapshot command");
+ goto out;
+ }
+
+ switch (snap_command) {
+ case GF_SNAP_OPTION_TYPE_CREATE:
+ ret = glusterd_snap_create_pre_val_use_rsp_dict (dst, src);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to use "
+ "rsp dict");
+ goto out;
+ }
+ break;
+ default:
+ break;
+ }
+
+ ret = 0;
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_snapshot_create_prevalidate (dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict)
+{
+ char *volname = NULL;
+ char *snapname = NULL;
+ char *device = NULL;
+ char *tmpstr = NULL;
+ char *brick_dir = NULL;
+ char snap_brick_dir[PATH_MAX] = "";
+ char *mnt_pt = NULL;
+ char key[PATH_MAX] = "";
+ char snap_mount[PATH_MAX] = "";
+ char snap_volname[64] = "";
+ char err_str[PATH_MAX] = "";
+ int ret = -1;
+ int64_t i = 0;
+ int64_t volcount = 0;
+ int64_t brick_count = 0;
+ int64_t brick_order = 0;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+ uuid_t *snap_volid = NULL;
+ gf_loglevel_t loglevel = GF_LOG_ERROR;
+ glusterd_conf_t *conf = NULL;
+ int64_t effective_max_limit = 0;
+
+ this = THIS;
+ GF_ASSERT (op_errstr);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ ret = dict_get_int64 (dict, "volcount", &volcount);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Failed to "
+ "get the volume count");
+ goto out;
+ }
+ if (volcount <= 0) {
+ snprintf (err_str, sizeof (err_str), "Invalid volume count %ld "
+ "supplied", volcount);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "snapname", &snapname);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Failed to get snapname");
+ goto out;
+ }
+
+ if (glusterd_find_snap_by_name (snapname)) {
+ ret = -1;
+ snprintf (err_str, sizeof (err_str), "Snap %s already exists",
+ snapname);
+ goto out;
+ }
+
+ for (i = 1; i <= volcount; i++) {
+ snprintf (key, sizeof (key), "volname%ld", i);
+ ret = dict_get_str (dict, key, &volname);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str),
+ "failed to get volume name");
+ goto out;
+ }
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str),
+ "Volume (%s) does not exist ", volname);
+ goto out;
+ }
+
+ ret = -1;
+ if (!glusterd_is_volume_started (volinfo)) {
+ snprintf (err_str, sizeof (err_str), "volume %s is "
+ "not started", volinfo->volname);
+ loglevel = GF_LOG_WARNING;
+ goto out;
+ }
+ if (glusterd_is_defrag_on (volinfo)) {
+ snprintf (err_str, sizeof (err_str),
+ "rebalance process is running for the "
+ "volume %s", volname);
+ loglevel = GF_LOG_WARNING;
+ goto out;
+ }
+ /* TODO: Also check whether geo replication is running */
+
+ if (volinfo->is_snap_volume == _gf_true) {
+ snprintf (err_str, sizeof (err_str),
+ "Volume %s is a snap volume", volname);
+ loglevel = GF_LOG_WARNING;
+ goto out;
+ }
+
+ if (volinfo->snap_max_hard_limit < conf->snap_max_hard_limit)
+ effective_max_limit = volinfo->snap_max_hard_limit;
+ else
+ effective_max_limit = conf->snap_max_hard_limit;
+
+ if (volinfo->snap_count >= effective_max_limit) {
+ snprintf (err_str, sizeof (err_str),
+ "The number of existing snaps has reached "
+ "the effective maximum limit of %"PRIu64" ,"
+ "for the volume %s", effective_max_limit,
+ volname);
+ loglevel = GF_LOG_WARNING;
+ goto out;
+ }
+
+ snprintf (key, sizeof(key) - 1, "vol%ld_volid", i);
+ ret = dict_get_bin (dict, key, (void **)&snap_volid);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to fetch snap_volid");
+ goto out;
+ }
+
+ /* snap volume uuid is used as lvm snapshot name.
+ This will avoid restrictions on snapshot names
+ provided by user */
+ GLUSTERD_GET_UUID_NOHYPHEN (snap_volname, *snap_volid);
+
+ brick_count = 0;
+ brick_order = 0;
+ /* Adding snap bricks mount paths to the dict */
+ list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ if (uuid_compare (brickinfo->uuid, MY_UUID)) {
+ brick_order++;
+ continue;
+ }
+
+ if (!glusterd_is_brick_started (brickinfo)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "brick %s:%s is not started",
+ brickinfo->hostname,
+ brickinfo->path);
+ brick_order++;
+ brick_count++;
+ continue;
+ }
+
+ device = glusterd_get_brick_mount_details (brickinfo);
+ if (!device) {
+ snprintf (err_str, sizeof (err_str),
+ "getting device name for the brick "
+ "%s:%s failed", brickinfo->hostname,
+ brickinfo->path);
+ ret = -1;
+ goto out;
+ }
+
+ device = glusterd_build_snap_device_path (device,
+ snap_volname);
+ if (!device) {
+ snprintf (err_str, sizeof (err_str),
+ "cannot copy the snapshot device "
+ "name (volname: %s, snapname: %s)",
+ volinfo->volname, snapname);
+ loglevel = GF_LOG_WARNING;
+ ret = -1;
+ goto out;
+ }
+
+ snprintf (key, sizeof(key),
+ "vol%ld.brick_snapdevice%ld", i,
+ brick_count);
+ ret = dict_set_dynstr (rsp_dict, key, device);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set %s", key);
+ GF_FREE (device);
+ goto out;
+ }
+
+ ret = glusterd_get_brick_root (brickinfo->path,
+ &mnt_pt);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str),
+ "could not get the root of the brick path %s",
+ brickinfo->path);
+ loglevel = GF_LOG_WARNING;
+ goto out;
+ }
+ if (strncmp (brickinfo->path, mnt_pt, strlen(mnt_pt))) {
+ snprintf (err_str, sizeof (err_str),
+ "brick: %s brick mount: %s",
+ brickinfo->path, mnt_pt);
+ loglevel = GF_LOG_WARNING;
+ goto out;
+ }
+
+ brick_dir = &brickinfo->path[strlen (mnt_pt)];
+ brick_dir++;
+
+ snprintf (snap_brick_dir, sizeof (snap_brick_dir),
+ "/%s", brick_dir);
+
+ tmpstr = gf_strdup (snap_brick_dir);
+ if (!tmpstr) {
+ ret = -1;
+ goto out;
+ }
+ snprintf (key, sizeof(key), "vol%ld.brickdir%ld", i,
+ brick_count);
+ ret = dict_set_dynstr (rsp_dict, key, tmpstr);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set %s", snap_mount);
+ goto out;
+ }
+ tmpstr = NULL;
+
+ snprintf (key, sizeof(key) - 1, "vol%ld.brick%ld.order",
+ i, brick_count);
+ ret = dict_set_int64 (rsp_dict, key, brick_order);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set %s", key);
+ goto out;
+ }
+
+ brick_count++;
+ brick_order++;
+ }
+ snprintf (key, sizeof(key) - 1, "vol%ld_brickcount", i);
+ ret = dict_set_int64 (rsp_dict, key, brick_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set %s",
+ key);
+ goto out;
+ }
+ }
+
+ ret = dict_set_int64 (rsp_dict, "volcount", volcount);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set volcount");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret)
+ GF_FREE (tmpstr);
+
+ if (ret && err_str[0] != '\0') {
+ gf_log (this->name, loglevel, "%s", err_str);
+ *op_errstr = gf_strdup (err_str);
+ }
+
+ gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+}
+
+glusterd_snap_t*
+glusterd_new_snap_object()
+{
+ glusterd_snap_t *snap = NULL;
+
+ snap = GF_CALLOC (1, sizeof (*snap), gf_gld_mt_snap_t);
+
+ if (snap) {
+ if (LOCK_INIT (&snap->lock)) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Failed initiating"
+ " snap lock");
+ GF_FREE (snap);
+ return NULL;
+ }
+
+ INIT_LIST_HEAD (&snap->snap_list);
+ INIT_LIST_HEAD (&snap->volumes);
+ snap->snapname[0] = 0;
+ snap->snap_status = GD_SNAP_STATUS_INIT;
+ }
+
+ return snap;
+
+};
+
+/* Function glusterd_list_add_snapvol adds the volinfo object (snapshot volume)
+ to the snapshot object list and to the parent volume list */
+int32_t
+glusterd_list_add_snapvol (glusterd_volinfo_t *origin_vol,
+ glusterd_volinfo_t *snap_vol)
+{
+ int ret = -1;
+ glusterd_snap_t *snap = NULL;
+
+ GF_VALIDATE_OR_GOTO ("glusterd", origin_vol, out);
+ GF_VALIDATE_OR_GOTO ("glusterd", snap_vol, out);
+
+ snap = snap_vol->snapshot;
+ GF_ASSERT (snap);
+
+ list_add_tail (&snap_vol->vol_list, &snap->volumes);
+ LOCK (&origin_vol->lock);
+ {
+ list_add_order (&snap_vol->snapvol_list,
+ &origin_vol->snap_volumes,
+ glusterd_compare_snap_vol_time);
+ origin_vol->snap_count++;
+ }
+ UNLOCK (&origin_vol->lock);
+
+ gf_log (THIS->name, GF_LOG_DEBUG, "Snap %s added to the list",
+ snap->snapname);
+ ret = 0;
+out:
+ return ret;
+}
+
+glusterd_snap_t*
+glusterd_find_snap_by_name (char *snapname)
+{
+ glusterd_snap_t *snap = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ priv = THIS->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (snapname);
+
+
+ list_for_each_entry (snap, &priv->snapshots, snap_list) {
+ if (!strcmp (snap->snapname, snapname)) {
+ gf_log (THIS->name, GF_LOG_DEBUG, "Found "
+ "snap %s (%s)", snap->snapname,
+ uuid_utoa (snap->snap_id));
+ goto out;
+ }
+ }
+ snap = NULL;
+out:
+ return snap;
+}
+
+glusterd_snap_t*
+glusterd_find_snap_by_id (uuid_t snap_id)
+{
+ glusterd_snap_t *snap = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ priv = THIS->private;
+ GF_ASSERT (priv);
+
+ if (uuid_is_null(snap_id))
+ goto out;
+
+ list_for_each_entry (snap, &priv->snapshots, snap_list) {
+ if (!uuid_compare (snap->snap_id, snap_id)) {
+ gf_log (THIS->name, GF_LOG_DEBUG, "Found "
+ "snap %s (%s)", snap->snapname,
+ uuid_utoa (snap->snap_id));
+ goto out;
+ }
+ }
+ snap = NULL;
+out:
+ return snap;
+}
+
+int
+glusterd_do_lvm_snapshot_remove (glusterd_volinfo_t *snap_vol,
+ glusterd_brickinfo_t *brickinfo,
+ const char *mount_pt, const char *snap_device)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ runner_t runner = {0,};
+ char msg[1024] = {0, };
+ char pidfile[PATH_MAX] = {0, };
+ pid_t pid = -1;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ if (!brickinfo) {
+ gf_log (this->name, GF_LOG_ERROR, "brickinfo NULL");
+ goto out;
+ }
+
+ GF_ASSERT (snap_vol);
+ GF_ASSERT (mount_pt);
+ GF_ASSERT (snap_device);
+
+ GLUSTERD_GET_BRICK_PIDFILE (pidfile, snap_vol, brickinfo, priv);
+ if (glusterd_is_service_running (pidfile, &pid)) {
+ ret = kill (pid, SIGKILL);
+ if (ret && errno != ESRCH) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to kill pid "
+ "%d reason : %s", pid, strerror(errno));
+ goto out;
+ }
+ }
+
+ runinit (&runner);
+ snprintf (msg, sizeof (msg), "umount the snapshot mounted path %s",
+ mount_pt);
+ runner_add_args (&runner, "umount", mount_pt, NULL);
+ runner_log (&runner, "", GF_LOG_DEBUG, msg);
+
+ /* We need not do synclock_unlock => runner_run => synclock_lock here.
+ Because it is needed if we are running a glusterfs process in
+ runner_run, so that when the glusterfs process started wants to
+ communicate to glusterd, glusterd wont be able to respond if it
+ has held the big lock. So we do unlock, run glusterfs process
+ (thus communicate to glusterd), lock. But since this is not a
+ glusterfs command that is being run, unlocking and then relocking
+ is not needed.
+ */
+ ret = runner_run (&runner);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "unmounting the "
+ "path %s (brick: %s) failed (%s)", mount_pt,
+ brickinfo->path, strerror (errno));
+ goto out;
+ }
+
+ runinit (&runner);
+ snprintf (msg, sizeof(msg), "remove snapshot of the brick %s:%s, "
+ "device: %s", brickinfo->hostname, brickinfo->path,
+ snap_device);
+ runner_add_args (&runner, "/sbin/lvremove", "-f", snap_device, NULL);
+ runner_log (&runner, "", GF_LOG_DEBUG, msg);
+
+ ret = runner_run (&runner);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "removing snapshot of the "
+ "brick (%s:%s) of device %s failed",
+ brickinfo->hostname, brickinfo->path, snap_device);
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+int32_t
+glusterd_lvm_snapshot_remove (dict_t *rsp_dict, glusterd_volinfo_t *snap_vol)
+{
+ char *mnt_pt = NULL;
+ struct mntent *entry = NULL;
+ int32_t brick_count = -1;
+ int32_t ret = -1;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ xlator_t *this = NULL;
+ FILE *mtab = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (rsp_dict);
+ GF_ASSERT (snap_vol);
+
+ if (!snap_vol) {
+ gf_log (this->name, GF_LOG_ERROR, "snap volinfo is NULL");
+ goto out;
+ }
+
+ brick_count = -1;
+ list_for_each_entry (brickinfo, &snap_vol->bricks, brick_list) {
+ brick_count++;
+ if (uuid_compare (brickinfo->uuid, MY_UUID))
+ continue;
+
+ if (brickinfo->snap_status == -1) {
+ gf_log (this->name, GF_LOG_INFO,
+ "snapshot was pending. lvm not present "
+ "for brick %s:%s of the snap %s.",
+ brickinfo->hostname, brickinfo->path,
+ snap_vol->snapshot->snapname);
+
+ /* Adding missed delete to the dict */
+ ret = glusterd_add_missed_snaps_to_dict
+ (rsp_dict,
+ snap_vol->volname,
+ brickinfo,
+ brick_count + 1,
+ GF_SNAP_OPTION_TYPE_DELETE);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to add missed snapshot info "
+ "for %s:%s in the rsp_dict",
+ brickinfo->hostname,
+ brickinfo->path);
+ goto out;
+ }
+
+ continue;
+ }
+
+ ret = glusterd_get_brick_root (brickinfo->path, &mnt_pt);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "getting the root "
+ "of the brick for volume %s (snap %s) failed ",
+ snap_vol->volname, snap_vol->snapshot->snapname);
+ goto out;
+ }
+
+ entry = glusterd_get_mnt_entry_info (mnt_pt, mtab);
+ if (!entry) {
+ gf_log (this->name, GF_LOG_WARNING, "getting the mount"
+ " entry for the brick %s:%s of the snap %s "
+ "(volume: %s) failed", brickinfo->hostname,
+ brickinfo->path, snap_vol->snapshot->snapname,
+ snap_vol->volname);
+ ret = -1;
+ goto out;
+ }
+ ret = glusterd_do_lvm_snapshot_remove (snap_vol, brickinfo,
+ mnt_pt,
+ entry->mnt_fsname);
+ if (mtab)
+ endmntent (mtab);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to "
+ "remove the snapshot %s (%s)",
+ brickinfo->path, entry->mnt_fsname);
+ goto out;
+ }
+
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int32_t
+glusterd_snap_volume_remove (dict_t *rsp_dict,
+ glusterd_volinfo_t *snap_vol,
+ gf_boolean_t remove_lvm,
+ gf_boolean_t force)
+{
+ int ret = -1;
+ int save_ret = 0;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_volinfo_t *origin_vol = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (rsp_dict);
+ GF_ASSERT (snap_vol);
+
+ if (!snap_vol) {
+ gf_log(this->name, GF_LOG_WARNING, "snap_vol in NULL");
+ ret = -1;
+ goto out;
+ }
+
+ list_for_each_entry (brickinfo, &snap_vol->bricks, brick_list) {
+ if (uuid_compare (brickinfo->uuid, MY_UUID))
+ continue;
+
+ ret = glusterd_brick_stop (snap_vol, brickinfo, _gf_false);
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING, "Failed to stop "
+ "brick for volume %s", snap_vol->volname);
+ save_ret = ret;
+
+ /* Continue to cleaning up the snap in case of error
+ if force flag is enabled */
+ if (!force)
+ goto out;
+ }
+ }
+
+ /* Only remove the backend lvm when required */
+ if (remove_lvm) {
+ ret = glusterd_lvm_snapshot_remove (rsp_dict, snap_vol);
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING, "Failed to remove "
+ "lvm snapshot volume %s", snap_vol->volname);
+ save_ret = ret;
+ if (!force)
+ goto out;
+ }
+ }
+
+ ret = glusterd_store_delete_volume (snap_vol);
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING, "Failed to remove volume %s "
+ "from store", snap_vol->volname);
+ save_ret = ret;
+ if (!force)
+ goto out;
+ }
+
+ if (!list_empty(&snap_vol->snapvol_list)) {
+ ret = glusterd_volinfo_find (snap_vol->parent_volname,
+ &origin_vol);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get "
+ "parent volinfo %s for volume %s",
+ snap_vol->parent_volname, snap_vol->volname);
+ save_ret = ret;
+ if (!force)
+ goto out;
+ }
+ origin_vol->snap_count--;
+ }
+
+ ret = glusterd_volinfo_delete (snap_vol);
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING, "Failed to remove volinfo "
+ "%s ", snap_vol->volname);
+ save_ret = ret;
+ if (!force)
+ goto out;
+ }
+
+ if (save_ret)
+ ret = save_ret;
+out:
+ gf_log (this->name, GF_LOG_TRACE, "returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_snapobject_delete (glusterd_snap_t *snap)
+{
+ if (snap == NULL) {
+ gf_log(THIS->name, GF_LOG_WARNING, "snap is NULL");
+ return -1;
+ }
+
+ list_del_init (&snap->snap_list);
+ list_del_init (&snap->volumes);
+ if (LOCK_DESTROY(&snap->lock))
+ gf_log (THIS->name, GF_LOG_WARNING, "Failed destroying lock"
+ "of snap %s", snap->snapname);
+
+ GF_FREE (snap->description);
+ GF_FREE (snap);
+
+ return 0;
+}
+
+int32_t
+glusterd_snap_remove (dict_t *rsp_dict,
+ glusterd_snap_t *snap,
+ gf_boolean_t remove_lvm,
+ gf_boolean_t force)
+{
+ int ret = -1;
+ int save_ret = 0;
+ glusterd_volinfo_t *snap_vol = NULL;
+ glusterd_volinfo_t *tmp = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (rsp_dict);
+ GF_ASSERT (snap);
+
+ if (!snap) {
+ gf_log(this->name, GF_LOG_WARNING, "snap is NULL");
+ ret = -1;
+ goto out;
+ }
+
+ list_for_each_entry_safe (snap_vol, tmp, &snap->volumes, vol_list) {
+ ret = glusterd_snap_volume_remove (rsp_dict, snap_vol,
+ remove_lvm, force);
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING, "Failed to remove "
+ "volinfo %s for snap %s", snap_vol->volname,
+ snap->snapname);
+ save_ret = ret;
+
+ /* Continue to cleaning up the snap in case of error
+ if force flag is enabled */
+ if (!force)
+ goto out;
+ }
+ }
+
+ ret = glusterd_store_delete_snap (snap);
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING, "Failed to remove snap %s "
+ "from store", snap->snapname);
+ save_ret = ret;
+ if (!force)
+ goto out;
+ }
+
+ ret = glusterd_snapobject_delete (snap);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING, "Failed to delete "
+ "snap object %s", snap->snapname);
+
+ if (save_ret)
+ ret = save_ret;
+out:
+ gf_log (THIS->name, GF_LOG_TRACE, "returning %d", ret);
+ return ret;
+}
+
+static int
+glusterd_snapshot_get_snapvol_detail (dict_t *dict,
+ glusterd_volinfo_t *snap_vol,
+ char *keyprefix, int detail)
+{
+ int ret = -1;
+ int snap_limit = 0;
+ char key[PATH_MAX] = {0,};
+ char *value = NULL;
+ glusterd_volinfo_t *origin_vol = NULL;
+ glusterd_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ GF_ASSERT (dict);
+ GF_ASSERT (snap_vol);
+ GF_ASSERT (keyprefix);
+
+ /* Volume Name */
+ value = gf_strdup (snap_vol->volname);
+ if (!value)
+ goto out;
+
+ snprintf (key, sizeof (key), "%s.volname", keyprefix);
+ ret = dict_set_dynstr (dict, key, value);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set "
+ "volume name in dictionary: %s", key);
+ goto out;
+ }
+
+ /* Volume ID */
+ value = gf_strdup (uuid_utoa (snap_vol->volume_id));
+ if (NULL == value) {
+ ret = -1;
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "%s.vol-id", keyprefix);
+ ret = dict_set_dynstr (dict, key, value);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set "
+ "volume id in dictionary: %s", key);
+ goto out;
+ }
+ value = NULL;
+
+ /* volume status */
+ snprintf (key, sizeof (key), "%s.vol-status", keyprefix);
+ switch (snap_vol->status) {
+ case GLUSTERD_STATUS_STARTED:
+ ret = dict_set_str (dict, key, "Started");
+ break;
+ case GLUSTERD_STATUS_STOPPED:
+ ret = dict_set_str (dict, key, "Stopped");
+ break;
+ case GD_SNAP_STATUS_NONE:
+ ret = dict_set_str (dict, key, "None");
+ break;
+ default:
+ gf_log (this->name, GF_LOG_ERROR, "Invalid volume status");
+ ret = -1;
+ goto out;
+ }
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set volume status"
+ " in dictionary: %s", key);
+ goto out;
+ }
+
+
+ ret = glusterd_volinfo_find (snap_vol->parent_volname, &origin_vol);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to get the parent "
+ "volinfo for the volume %s", snap_vol->volname);
+ goto out;
+ }
+
+ /* Snaps available */
+ if (conf->snap_max_hard_limit < origin_vol->snap_max_hard_limit) {
+ snap_limit = conf->snap_max_hard_limit;
+ gf_log(this->name, GF_LOG_DEBUG, "system snap-max-hard-limit is"
+ " lesser than volume snap-max-hard-limit, "
+ "snap-max-hard-limit value is set to %d", snap_limit);
+ } else {
+ snap_limit = origin_vol->snap_max_hard_limit;
+ gf_log(this->name, GF_LOG_DEBUG, "volume snap-max-hard-limit is"
+ " lesser than system snap-max-hard-limit, "
+ "snap-max-hard-limit value is set to %d", snap_limit);
+ }
+
+ snprintf (key, sizeof (key), "%s.snaps-available", keyprefix);
+ if (snap_limit > origin_vol->snap_count)
+ ret = dict_set_int32 (dict, key,
+ snap_limit - origin_vol->snap_count);
+ else
+ ret = dict_set_int32 (dict, key, 0);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set available snaps");
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "%s.snapcount", keyprefix);
+ ret = dict_set_int32 (dict, key, origin_vol->snap_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not save snapcount");
+ goto out;
+ }
+
+ if (!detail)
+ goto out;
+
+ /* Parent volume name */
+ value = gf_strdup (snap_vol->parent_volname);
+ if (!value)
+ goto out;
+
+ snprintf (key, sizeof (key), "%s.origin-volname", keyprefix);
+ ret = dict_set_dynstr (dict, key, value);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set parent "
+ "volume name in dictionary: %s", key);
+ goto out;
+ }
+ value = NULL;
+
+ ret = 0;
+out:
+ if (value)
+ GF_FREE (value);
+
+ return ret;
+}
+
+static int
+glusterd_snapshot_get_snap_detail (dict_t *dict, glusterd_snap_t *snap,
+ char *keyprefix, glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ int volcount = 0;
+ char key[PATH_MAX] = {0,};
+ char *value = NULL;
+ char *timestr = NULL;
+ struct tm *tmptr = NULL;
+ glusterd_volinfo_t *snap_vol = NULL;
+ glusterd_volinfo_t *tmp_vol = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ GF_ASSERT (dict);
+ GF_ASSERT (snap);
+ GF_ASSERT (keyprefix);
+
+ /* Snap Name */
+ value = gf_strdup (snap->snapname);
+ if (!value)
+ goto out;
+
+ snprintf (key, sizeof (key), "%s.snapname", keyprefix);
+ ret = dict_set_dynstr (dict, key, value);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set "
+ "snap name in dictionary");
+ goto out;
+ }
+
+ /* Snap ID */
+ value = gf_strdup (uuid_utoa (snap->snap_id));
+ if (NULL == value) {
+ ret = -1;
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "%s.snap-id", keyprefix);
+ ret = dict_set_dynstr (dict, key, value);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set "
+ "snap id in dictionary");
+ goto out;
+ }
+ value = NULL;
+
+ tmptr = localtime (&(snap->time_stamp));
+ if (NULL == tmptr) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to convert "
+ "time_t to *tm");
+ ret = -1;
+ goto out;
+ }
+
+ timestr = GF_CALLOC (1, PATH_MAX, gf_gld_mt_char);
+ if (NULL == timestr) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = strftime (timestr, PATH_MAX, "%Y-%m-%d %H:%M:%S", tmptr);
+ if (0 == ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to convert time_t "
+ "to string");
+ ret = -1;
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "%s.snap-time", keyprefix);
+ ret = dict_set_dynstr (dict, key, timestr);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set "
+ "snap time stamp in dictionary");
+ goto out;
+ }
+ timestr = NULL;
+
+ /* If snap description is provided then add that into dictionary */
+ if (NULL != snap->description) {
+ value = gf_strdup (snap->description);
+ if (NULL == value) {
+ ret = -1;
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "%s.snap-desc", keyprefix);
+ ret = dict_set_dynstr (dict, key, value);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set "
+ "snap description in dictionary");
+ goto out;
+ }
+ value = NULL;
+ }
+
+ snprintf (key, sizeof (key), "%s.snap-status", keyprefix);
+ switch (snap->snap_status) {
+ case GD_SNAP_STATUS_INIT:
+ ret = dict_set_str (dict, key, "Init");
+ break;
+ case GD_SNAP_STATUS_IN_USE:
+ ret = dict_set_str (dict, key, "In-use");
+ break;
+ case GD_SNAP_STATUS_DECOMMISSION:
+ ret = dict_set_str (dict, key, "Decommisioned");
+ break;
+ case GD_SNAP_STATUS_RESTORED:
+ ret = dict_set_str (dict, key, "Restored");
+ break;
+ case GD_SNAP_STATUS_NONE:
+ ret = dict_set_str (dict, key, "None");
+ break;
+ default:
+ gf_log (this->name, GF_LOG_ERROR, "Invalid snap status");
+ ret = -1;
+ goto out;
+ }
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set snap status "
+ "in dictionary");
+ goto out;
+ }
+
+ if (volinfo) {
+ volcount = 1;
+ snprintf (key, sizeof (key), "%s.vol%d", keyprefix, volcount);
+ ret = glusterd_snapshot_get_snapvol_detail (dict,
+ volinfo, key, 0);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to "
+ "get volume detail %s for snap %s",
+ snap_vol->volname, snap->snapname);
+ goto out;
+ }
+ goto done;
+ }
+
+ list_for_each_entry_safe (snap_vol, tmp_vol, &snap->volumes, vol_list) {
+ volcount++;
+ snprintf (key, sizeof (key), "%s.vol%d", keyprefix, volcount);
+ ret = glusterd_snapshot_get_snapvol_detail (dict,
+ snap_vol, key, 1);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to "
+ "get volume detail %s for snap %s",
+ snap_vol->volname, snap->snapname);
+ goto out;
+ }
+ }
+
+done:
+ snprintf (key, sizeof (key), "%s.vol-count", keyprefix);
+ ret = dict_set_int32 (dict, key, volcount);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set %s",
+ key);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (value)
+ GF_FREE (value);
+
+ if (timestr)
+ GF_FREE(timestr);
+
+ return ret;
+}
+
+static int
+glusterd_snapshot_get_all_snap_info (dict_t *dict)
+{
+ int ret = -1;
+ int snapcount = 0;
+ char key[PATH_MAX] = {0,};
+ glusterd_snap_t *snap = NULL;
+ glusterd_snap_t *tmp_snap = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ /* General parameter validation */
+ GF_ASSERT (dict);
+
+ list_for_each_entry_safe (snap, tmp_snap, &priv->snapshots, snap_list) {
+ snapcount++;
+ snprintf (key, sizeof (key), "snap%d", snapcount);
+ ret = glusterd_snapshot_get_snap_detail (dict, snap, key, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get "
+ "snapdetail for snap %s", snap->snapname);
+ goto out;
+ }
+ }
+
+ ret = dict_set_int32 (dict, "snap-count", snapcount);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set snapcount");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+glusterd_snapshot_get_info_by_volume (dict_t *dict, char *volname,
+ char *err_str, size_t len)
+{
+ int ret = -1;
+ int snapcount = 0;
+ int snap_limit = 0;
+ char *value = NULL;
+ char key[PATH_MAX] = "";
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_volinfo_t *snap_vol = NULL;
+ glusterd_volinfo_t *tmp_vol = NULL;
+ glusterd_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ GF_ASSERT (dict);
+ GF_ASSERT (volname);
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (err_str, len, "Volume (%s) does not exist", volname);
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+
+ /* Snaps available */
+ if (conf->snap_max_hard_limit < volinfo->snap_max_hard_limit) {
+ snap_limit = conf->snap_max_hard_limit;
+ gf_log(this->name, GF_LOG_DEBUG, "system snap-max-hard-limit is"
+ " lesser than volume snap-max-hard-limit, "
+ "snap-max-hard-limit value is set to %d", snap_limit);
+ } else {
+ snap_limit = volinfo->snap_max_hard_limit;
+ gf_log(this->name, GF_LOG_DEBUG, "volume snap-max-hard-limit is"
+ " lesser than system snap-max-hard-limit, "
+ "snap-max-hard-limit value is set to %d", snap_limit);
+ }
+
+ if (snap_limit > volinfo->snap_count)
+ ret = dict_set_int32 (dict, "snaps-available",
+ snap_limit - volinfo->snap_count);
+ else
+ ret = dict_set_int32 (dict, "snaps-available", 0);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set available snaps");
+ goto out;
+ }
+
+ /* Origin volume name */
+ value = gf_strdup (volinfo->volname);
+ if (!value)
+ goto out;
+
+ ret = dict_set_dynstr (dict, "origin-volname", value);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set parent "
+ "volume name in dictionary: %s", key);
+ goto out;
+ }
+ value = NULL;
+
+ list_for_each_entry_safe (snap_vol, tmp_vol, &volinfo->snap_volumes,
+ snapvol_list) {
+ snapcount++;
+ snprintf (key, sizeof (key), "snap%d", snapcount);
+ ret = glusterd_snapshot_get_snap_detail (dict,
+ snap_vol->snapshot,
+ key, snap_vol);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get "
+ "snapdetail for snap %s",
+ snap_vol->snapshot->snapname);
+ goto out;
+ }
+ }
+ ret = dict_set_int32 (dict, "snap-count", snapcount);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set snapcount");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (value)
+ GF_FREE (value);
+
+ return ret;
+}
+
+/* This function will be called from RPC handler routine.
+ * This function is responsible for getting the requested
+ * snapshot info into the dictionary.
+ *
+ * @param req RPC request object. Required for sending a response back.
+ * @param op glusterd operation. Required for sending a response back.
+ * @param dict pointer to dictionary which will contain both
+ * request and response key-pair values.
+ * @return -1 on error and 0 on success
+ */
+int
+glusterd_handle_snapshot_info (rpcsvc_request_t *req, glusterd_op_t op,
+ dict_t *dict, char *err_str, size_t len)
+{
+ int ret = -1;
+ int8_t snap_driven = 1;
+ char *volname = NULL;
+ char *snapname = NULL;
+ glusterd_snap_t *snap = NULL;
+ xlator_t *this = NULL;
+ int32_t cmd = GF_SNAP_INFO_TYPE_ALL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_VALIDATE_OR_GOTO (this->name, req, out);
+ GF_VALIDATE_OR_GOTO (this->name, dict, out);
+
+
+ ret = dict_get_int32 (dict, "cmd", &cmd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get type "
+ "of snapshot info");
+ goto out;
+ }
+
+ switch (cmd) {
+ case GF_SNAP_INFO_TYPE_ALL:
+ {
+ ret = glusterd_snapshot_get_all_snap_info (dict);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to get info of all snaps");
+ goto out;
+ }
+ break;
+ }
+
+ case GF_SNAP_INFO_TYPE_SNAP:
+ {
+ ret = dict_get_str (dict, "snapname", &snapname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to get snap name");
+ goto out;
+ }
+
+ ret = dict_set_int32 (dict, "snap-count", 1);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set snapcount");
+ goto out;
+ }
+
+ snap = glusterd_find_snap_by_name (snapname);
+ if (!snap) {
+ snprintf (err_str, len,
+ "Snap (%s) does not exist", snapname);
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s", err_str);
+ ret = -1;
+ goto out;
+ }
+ ret = glusterd_snapshot_get_snap_detail (dict, snap,
+ "snap1", NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to get snap detail of snap "
+ "%s", snap->snapname);
+ goto out;
+ }
+ break;
+ }
+
+ case GF_SNAP_INFO_TYPE_VOL:
+ {
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to get volname");
+ goto out;
+ }
+ ret = glusterd_snapshot_get_info_by_volume (dict,
+ volname, err_str, len);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to get volume info of volume "
+ "%s", volname);
+ goto out;
+ }
+ snap_driven = 0;
+ break;
+ }
+ }
+
+ ret = dict_set_int8 (dict, "snap-driven", snap_driven);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set snap-driven");
+ goto out;
+ }
+
+ /* If everything is successful then send the response back to cli.
+ * In case of failure the caller of this function will take care
+ of the response */
+ ret = glusterd_op_send_cli_response (op, 0, 0, req, dict, err_str);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to send cli "
+ "response");
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+/* This function sets all the snapshot names in the dictionary */
+int
+glusterd_snapshot_get_all_snapnames (dict_t *dict)
+{
+ int ret = -1;
+ int snapcount = 0;
+ char *snapname = NULL;
+ char key[PATH_MAX] = {0,};
+ glusterd_snap_t *snap = NULL;
+ glusterd_snap_t *tmp_snap = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ priv = this->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (dict);
+
+ list_for_each_entry_safe (snap, tmp_snap, &priv->snapshots, snap_list) {
+ snapcount++;
+ snapname = gf_strdup (snap->snapname);
+ if (!snapname) {
+ gf_log (this->name, GF_LOG_ERROR, "strdup failed");
+ ret = -1;
+ goto out;
+ }
+ snprintf (key, sizeof (key), "snapname%d", snapcount);
+ ret = dict_set_dynstr (dict, key, snapname);
+ if (ret) {
+ GF_FREE (snapname);
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set %s",
+ key);
+ goto out;
+ }
+ }
+
+ ret = dict_set_int32 (dict, "snap-count", snapcount);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set snapcount");
+ goto out;
+ }
+
+ ret = 0;
+out:
+
+ return ret;
+}
+
+/* This function sets all the snapshot names
+ under a given volume in the dictionary */
+int
+glusterd_snapshot_get_vol_snapnames (dict_t *dict, glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ int snapcount = 0;
+ char *snapname = NULL;
+ char key[PATH_MAX] = {0,};
+ glusterd_volinfo_t *snap_vol = NULL;
+ glusterd_volinfo_t *tmp_vol = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (dict);
+ GF_ASSERT (volinfo);
+
+ list_for_each_entry_safe (snap_vol, tmp_vol,
+ &volinfo->snap_volumes, snapvol_list) {
+ snapcount++;
+ snapname = gf_strdup (snap_vol->snapshot->snapname);
+ if (!snapname) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "strdup failed");
+ ret = -1;
+ goto out;
+ }
+ snprintf (key, sizeof (key), "snapname%d", snapcount);
+ ret = dict_set_dynstr (dict, key, snapname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to "
+ "set %s", key);
+ GF_FREE (snapname);
+ goto out;
+ }
+ }
+
+ ret = dict_set_int32 (dict, "snap-count", snapcount);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set snapcount");
+ goto out;
+ }
+
+ ret = 0;
+out:
+
+ return ret;
+}
+
+int
+glusterd_handle_snapshot_list (rpcsvc_request_t *req, glusterd_op_t op,
+ dict_t *dict, char *err_str, size_t len)
+{
+ int ret = -1;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ GF_VALIDATE_OR_GOTO (this->name, req, out);
+ GF_VALIDATE_OR_GOTO (this->name, dict, out);
+
+ /* Ignore error for getting volname as it is optional */
+ ret = dict_get_str (dict, "volname", &volname);
+
+ if (NULL == volname) {
+ ret = glusterd_snapshot_get_all_snapnames (dict);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to get snapshot list");
+ goto out;
+ }
+ } else {
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (err_str, len,
+ "Volume (%s) does not exist", volname);
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s", err_str);
+ goto out;
+ }
+
+ ret = glusterd_snapshot_get_vol_snapnames (dict, volinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to get snapshot list for volume %s",
+ volname);
+ goto out;
+ }
+ }
+
+ /* If everything is successful then send the response back to cli.
+ In case of failure the caller of this function will take of response.*/
+ ret = glusterd_op_send_cli_response (op, 0, 0, req, dict, err_str);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to send cli "
+ "response");
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+/* This is a snapshot create handler function. This function will be
+ * executed in the originator node. This function is responsible for
+ * calling mgmt_v3 framework to do the actual snap creation on all the bricks
+ *
+ * @param req RPC request object
+ * @param op gluster operation
+ * @param dict dictionary containing snapshot restore request
+ * @param err_str In case of an err this string should be populated
+ * @param len length of err_str buffer
+ *
+ * @return Negative value on Failure and 0 in success
+ */
+int
+glusterd_handle_snapshot_create (rpcsvc_request_t *req, glusterd_op_t op,
+ dict_t *dict, char *err_str, size_t len)
+{
+ int ret = -1;
+ char *volname = NULL;
+ char *snapname = NULL;
+ int64_t volcount = 0;
+ xlator_t *this = NULL;
+ char key[PATH_MAX] = "";
+ char *username = NULL;
+ char *password = NULL;
+ uuid_t *uuid_ptr = NULL;
+ uuid_t tmp_uuid = {0};
+ int i = 0;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+ GF_ASSERT (dict);
+ GF_ASSERT (err_str);
+
+ ret = dict_get_int64 (dict, "volcount", &volcount);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to "
+ "get the volume count");
+ goto out;
+ }
+ if (volcount <= 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Invalid volume count %ld "
+ "supplied", volcount);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "snapname", &snapname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to get the snapname");
+ goto out;
+ }
+
+ if (strlen(snapname) >= GLUSTERD_MAX_SNAP_NAME) {
+ snprintf (err_str, len, "snapname cannot exceed 255 "
+ "characters");
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ ret = -1;
+ goto out;
+ }
+
+ uuid_ptr = GF_CALLOC (1, sizeof(uuid_t), gf_common_mt_uuid_t);
+ if (!uuid_ptr) {
+ gf_log (this->name, GF_LOG_ERROR, "Out Of Memory");
+ ret = -1;
+ goto out;
+ }
+
+ uuid_generate (*uuid_ptr);
+ ret = dict_set_bin (dict, "snap-id", uuid_ptr, sizeof(uuid_t));
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to set snap-id");
+ GF_FREE (uuid_ptr);
+ goto out;
+ }
+ uuid_ptr = NULL;
+
+ ret = dict_set_int64 (dict, "snap-time", (int64_t)time(NULL));
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to set snap-time");
+ goto out;
+ }
+
+ for (i = 1; i <= volcount; i++) {
+ snprintf (key, sizeof (key), "volname%d", i);
+ ret = dict_get_str (dict, key, &volname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to get volume name");
+ goto out;
+ }
+
+ /* generate internal username and password for the snap*/
+ uuid_generate (tmp_uuid);
+ username = gf_strdup (uuid_utoa (tmp_uuid));
+ snprintf (key, sizeof(key), "volume%d_username", i);
+ ret = dict_set_dynstr (dict, key, username);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set snap "
+ "username for volume %s", volname);
+ GF_FREE (username);
+ goto out;
+ }
+
+ uuid_generate (tmp_uuid);
+ password = gf_strdup (uuid_utoa (tmp_uuid));
+ snprintf (key, sizeof(key), "volume%d_password", i);
+ ret = dict_set_dynstr (dict, key, password);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set snap "
+ "password for volume %s", volname);
+ GF_FREE (password);
+ goto out;
+ }
+
+ uuid_ptr = GF_CALLOC (1, sizeof(uuid_t), gf_common_mt_uuid_t);
+ if (!uuid_ptr) {
+ gf_log (this->name, GF_LOG_ERROR, "Out Of Memory");
+ ret = -1;
+ goto out;
+ }
+
+ snprintf (key, sizeof(key) - 1, "vol%d_volid", i);
+ uuid_generate (*uuid_ptr);
+ ret = dict_set_bin (dict, key, uuid_ptr, sizeof(uuid_t));
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to set snap_volid");
+ GF_FREE (uuid_ptr);
+ goto out;
+ }
+ }
+
+ ret = glusterd_mgmt_v3_initiate_snap_phases (req, op, dict);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to initiate snap "
+ "phases");
+ }
+
+out:
+ return ret;
+}
+
+/* This is a snapshot status handler function. This function will be
+ * executed in a originator node. This function is responsible for
+ * calling mgmt v3 framework to get the actual snapshot status from
+ * all the bricks
+ *
+ * @param req RPC request object
+ * @param op gluster operation
+ * @param dict dictionary containing snapshot status request
+ * @param err_str In case of an err this string should be populated
+ * @param len length of err_str buffer
+ *
+ * return : 0 in case of success.
+ * -1 in case of failure.
+ *
+ */
+int
+glusterd_handle_snapshot_status (rpcsvc_request_t *req, glusterd_op_t op,
+ dict_t *dict, char *err_str, size_t len)
+{
+ int ret = -1;
+ char *volname = NULL;
+ char *snapname = NULL;
+ char *buf = NULL;
+ glusterd_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+ int32_t cmd = -1;
+ int i = 0;
+ dict_t *voldict = NULL;
+ char key[PATH_MAX] = "";
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_snap_t *snap = NULL;
+ glusterd_volinfo_t *snap_volinfo = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+
+ GF_ASSERT (conf);
+ GF_ASSERT (req);
+ GF_ASSERT (dict);
+ GF_ASSERT (err_str);
+
+ ret = dict_get_int32 (dict, "cmd", &cmd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not get status type");
+ goto out;
+ }
+ switch (cmd) {
+ case GF_SNAP_STATUS_TYPE_ALL:
+ {
+ /* IF we give "gluster snapshot status"
+ * then lock is held on all snaps.
+ * This is the place where necessary information
+ * (snapname and snapcount)is populated in dictionary
+ * for locking.
+ */
+ ++i;
+ list_for_each_entry (snap, &conf->snapshots, snap_list)
+ {
+ snprintf (key, sizeof (key), "snapname%d", i);
+ buf = gf_strdup (snap->snapname);
+ if (!buf) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_dynstr (dict, key, buf);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not save snapname (%s) "
+ "in the dictionary",
+ snap->snapname);
+ GF_FREE (buf);
+ goto out;
+ }
+
+ buf = NULL;
+ i++;
+ }
+
+ ret = dict_set_int32 (dict, "snapcount", i - 1);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not "
+ "save snapcount in the dictionary");
+ goto out;
+ }
+ break;
+ }
+
+ case GF_SNAP_STATUS_TYPE_SNAP:
+ {
+ /* IF we give "gluster snapshot status <snapname>"
+ * then lock is held on single snap.
+ * This is the place where necessary information
+ * (snapname)is populated in dictionary
+ * for locking.
+ */
+ ret = dict_get_str (dict, "snapname", &snapname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to fetch snap name");
+ goto out;
+ }
+
+ snap = glusterd_find_snap_by_name (snapname);
+ if (!snap) {
+ snprintf (err_str, len, "Snap (%s)"
+ "does not exist", snapname);
+ gf_log(this->name, GF_LOG_ERROR,
+ "%s", err_str);
+ ret = -1;
+ goto out;
+ }
+ break;
+ }
+ case GF_SNAP_STATUS_TYPE_VOL:
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to fetch volname");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (err_str, len, "Volume (%s) "
+ "does not exist", volname);
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s", err_str);
+ goto out;
+ }
+
+ i = 1;
+ list_for_each_entry (snap_volinfo,
+ &volinfo->snap_volumes, snapvol_list) {
+ snprintf (key, sizeof (key), "snapname%d", i);
+
+ buf = gf_strdup
+ (snap_volinfo->snapshot->snapname);
+ if (!buf) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr (dict, key, buf);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not save snapname");
+ GF_FREE (buf);
+ goto out;
+ }
+
+ buf = NULL;
+ i++;
+ }
+
+ ret = dict_set_int32 (dict, "snapcount", i-1);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not save snapcount");
+ goto out;
+ }
+ break;
+ default:
+ {
+ gf_log (this->name, GF_LOG_ERROR, "Unknown type");
+ ret = -1;
+ goto out;
+ }
+ }
+
+ /* Volume lock is not necessary for snapshot status, hence
+ * turning it off
+ */
+ ret = dict_set_int8 (dict, "hold_vol_locks", 0);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Setting volume lock "
+ "flag failed");
+ goto out;
+ }
+
+ ret = glusterd_mgmt_v3_initiate_snap_phases (req, op, dict);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to initiate "
+ "snap phases");
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ if (voldict) {
+ dict_unref (voldict);
+ }
+ return ret;
+}
+
+
+/* This is a snapshot restore handler function. This function will be
+ * executed in the originator node. This function is responsible for
+ * calling mgmt_v3 framework to do the actual restore on all the bricks
+ *
+ * @param req RPC request object
+ * @param op gluster operation
+ * @param dict dictionary containing snapshot restore request
+ * @param err_str In case of an err this string should be populated
+ * @param len length of err_str buffer
+ *
+ * @return Negative value on Failure and 0 in success
+ */
+int
+glusterd_handle_snapshot_restore (rpcsvc_request_t *req, glusterd_op_t op,
+ dict_t *dict, char *err_str, size_t len)
+{
+ int ret = -1;
+ char *snapname = NULL;
+ char *buf = NULL;
+ glusterd_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+ glusterd_snap_t *snap = NULL;
+ glusterd_volinfo_t *snap_volinfo = NULL;
+ int32_t i = 0;
+ char key[PATH_MAX] = "";
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+
+ GF_ASSERT (conf);
+ GF_ASSERT (req);
+ GF_ASSERT (dict);
+ GF_ASSERT (err_str);
+
+ ret = dict_get_str (dict, "snapname", &snapname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to "
+ "get snapname");
+ goto out;
+ }
+
+ snap = glusterd_find_snap_by_name (snapname);
+ if (!snap) {
+ snprintf (err_str, len, "Snap (%s) does not exist", snapname);
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ ret = -1;
+ goto out;
+ }
+
+ list_for_each_entry (snap_volinfo, &snap->volumes, vol_list) {
+ i++;
+ snprintf (key, sizeof (key), "volname%d", i);
+ buf = gf_strdup (snap_volinfo->parent_volname);
+ if (!buf) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_dynstr (dict, key, buf);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not set "
+ "parent volume name %s in the dict",
+ snap_volinfo->parent_volname);
+ GF_FREE (buf);
+ goto out;
+ }
+ buf = NULL;
+ }
+
+ ret = dict_set_int32 (dict, "volcount", i);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not save volume count");
+ goto out;
+ }
+
+ ret = glusterd_mgmt_v3_initiate_snap_phases (req, op, dict);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to initiate snap "
+ "phases");
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+glusterd_snap_t*
+glusterd_create_snap_object (dict_t *dict, dict_t *rsp_dict)
+{
+ char *snapname = NULL;
+ uuid_t *snap_id = NULL;
+ char *description = NULL;
+ glusterd_snap_t *snap = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ int ret = -1;
+ int64_t time_stamp = 0;
+
+ this = THIS;
+ priv = this->private;
+
+ GF_ASSERT (dict);
+ GF_ASSERT (rsp_dict);
+
+ /* Fetch snapname, description, id and time from dict */
+ ret = dict_get_str (dict, "snapname", &snapname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to fetch snapname");
+ goto out;
+ }
+
+ /* Ignore ret value for description*/
+ ret = dict_get_str (dict, "description", &description);
+
+ ret = dict_get_bin (dict, "snap-id", (void **)&snap_id);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to fetch snap_id");
+ goto out;
+ }
+
+ ret = dict_get_int64 (dict, "snap-time", &time_stamp);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to fetch snap-time");
+ goto out;
+ }
+ if (time_stamp <= 0) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR, "Invalid time-stamp: %ld",
+ time_stamp);
+ goto out;
+ }
+
+ list_for_each_entry (snap, &priv->snapshots, snap_list) {
+ if (!strcmp (snap->snapname, snapname) ||
+ !uuid_compare (snap->snap_id, *snap_id)) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Found duplicate snap %s (%s)",
+ snap->snapname, uuid_utoa (snap->snap_id));
+ ret = -1;
+ break;
+ }
+ }
+ if (ret) {
+ snap = NULL;
+ goto out;
+ }
+
+ snap = glusterd_new_snap_object ();
+ if (!snap) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not create "
+ "the snap object for snap %s", snapname);
+ goto out;
+ }
+
+ strcpy (snap->snapname, snapname);
+ uuid_copy (snap->snap_id, *snap_id);
+ snap->time_stamp = (time_t)time_stamp;
+ /* Set the status as GD_SNAP_STATUS_INIT and once the backend snapshot
+ is taken and snap is really ready to use, set the status to
+ GD_SNAP_STATUS_IN_USE. This helps in identifying the incomplete
+ snapshots and cleaning them up.
+ */
+ snap->snap_status = GD_SNAP_STATUS_INIT;
+ if (description) {
+ snap->description = gf_strdup (description);
+ if (snap->description == NULL) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Saving the Snap Description Failed");
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = glusterd_store_snap (snap);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "Could not store snap"
+ "object %s", snap->snapname);
+ goto out;
+ }
+
+ list_add_order (&snap->snap_list, &priv->snapshots,
+ glusterd_compare_snap_time);
+
+ gf_log (this->name, GF_LOG_TRACE, "Snap %s added to the list",
+ snap->snapname);
+
+ ret = 0;
+
+out:
+ if (ret) {
+ if (snap)
+ glusterd_snap_remove (rsp_dict, snap,
+ _gf_true, _gf_true);
+ snap = NULL;
+ }
+
+ return snap;
+}
+
+/* This function is called to get the device path of the snap lvm. Usually
+ if /dev/mapper/<group-name>-<lvm-name> is the device for the lvm,
+ then the snap device will be /dev/<group-name>/<snapname>.
+ This function takes care of building the path for the snap device.
+*/
+char *
+glusterd_build_snap_device_path (char *device, char *snapname)
+{
+ char snap[PATH_MAX] = "";
+ char msg[1024] = "";
+ char volgroup[PATH_MAX] = "";
+ char *snap_device = NULL;
+ xlator_t *this = NULL;
+ runner_t runner = {0,};
+ char *ptr = NULL;
+ int ret = -1;
+
+ this = THIS;
+ GF_ASSERT (this);
+ if (!device) {
+ gf_log (this->name, GF_LOG_ERROR, "device is NULL");
+ goto out;
+ }
+ if (!snapname) {
+ gf_log (this->name, GF_LOG_ERROR, "snapname is NULL");
+ goto out;
+ }
+
+ runinit (&runner);
+ runner_add_args (&runner, "/sbin/lvs", "--noheadings", "-o", "vg_name",
+ device, NULL);
+ runner_redir (&runner, STDOUT_FILENO, RUN_PIPE);
+ snprintf (msg, sizeof (msg), "Get volume group for device %s", device);
+ runner_log (&runner, this->name, GF_LOG_DEBUG, msg);
+ ret = runner_start (&runner);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get volume group "
+ "for device %s", device);
+ runner_end (&runner);
+ goto out;
+ }
+ ptr = fgets(volgroup, sizeof(volgroup),
+ runner_chio (&runner, STDOUT_FILENO));
+ if (!ptr || !strlen(volgroup)) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get volume group "
+ "for snap %s", snapname);
+ runner_end (&runner);
+ ret = -1;
+ goto out;
+ }
+ runner_end (&runner);
+
+ snprintf (snap, sizeof(snap), "/dev/%s/%s", gf_trim(volgroup),
+ snapname);
+ snap_device = gf_strdup (snap);
+ if (!snap_device) {
+ gf_log (this->name, GF_LOG_WARNING, "Cannot copy the "
+ "snapshot device name for snapname: %s)", snapname);
+ }
+
+out:
+ return snap_device;
+}
+
+/* This function actually calls the command (or the API) for taking the
+ snapshot of the backend brick filesystem. If this is successful,
+ then call the glusterd_snap_create function to create the snap object
+ for glusterd
+*/
+char *
+glusterd_take_lvm_snapshot (glusterd_volinfo_t *snap_vol,
+ glusterd_brickinfo_t *brickinfo)
+{
+ char msg[NAME_MAX] = "";
+ char buf[PATH_MAX] = "";
+ char *snap_device = NULL;
+ char *ptr = NULL;
+ char *device = NULL;
+ int ret = -1;
+ gf_boolean_t match = _gf_false;
+ runner_t runner = {0,};
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ if (!brickinfo) {
+ gf_log (this->name, GF_LOG_ERROR, "brickinfo NULL");
+ goto out;
+ }
+
+ device = glusterd_get_brick_mount_details (brickinfo);
+ if (!device) {
+ gf_log (this->name, GF_LOG_ERROR, "getting device name for "
+ "the brick %s:%s failed", brickinfo->hostname,
+ brickinfo->path);
+ goto out;
+ }
+
+ /* Figuring out if setactivationskip flag is supported or not */
+ runinit (&runner);
+ snprintf (msg, sizeof (msg), "running lvcreate help");
+ runner_add_args (&runner, "/sbin/lvcreate", "--help", NULL);
+ runner_log (&runner, "", GF_LOG_DEBUG, msg);
+ runner_redir (&runner, STDOUT_FILENO, RUN_PIPE);
+ ret = runner_start (&runner);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to run lvcreate help");
+ runner_end (&runner);
+ goto out;
+ }
+
+ /* Looking for setactivationskip in lvcreate --help */
+ do {
+ ptr = fgets(buf, sizeof(buf),
+ runner_chio (&runner, STDOUT_FILENO));
+ if (ptr) {
+ if (strstr(buf, "setactivationskip")) {
+ match = _gf_true;
+ break;
+ }
+ }
+ } while (ptr != NULL);
+ runner_end (&runner);
+
+ /* Takng the actual snapshot */
+ runinit (&runner);
+ snprintf (msg, sizeof (msg), "taking snapshot of the brick %s:%s",
+ brickinfo->hostname, brickinfo->path);
+ if (match == _gf_true)
+ runner_add_args (&runner, "/sbin/lvcreate", "-s", device,
+ "--setactivationskip", "n", "--name",
+ snap_vol->volname, NULL);
+ else
+ runner_add_args (&runner, "/sbin/lvcreate", "-s", device,
+ "--name", snap_vol->volname, NULL);
+ runner_log (&runner, "", GF_LOG_DEBUG, msg);
+ ret = runner_start (&runner);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "taking snapshot of the "
+ "brick (%s:%s) of device %s failed",
+ brickinfo->hostname, brickinfo->path, device);
+ runner_end (&runner);
+ goto out;
+ }
+ runner_end (&runner);
+
+ snap_device = glusterd_build_snap_device_path (device,
+ snap_vol->volname);
+ if (!snap_device) {
+ gf_log (this->name, GF_LOG_WARNING, "Cannot copy the snapshot "
+ "device name for snap %s (volume id: %s)",
+ snap_vol->snapshot->snapname, snap_vol->volname);
+ ret = -1;
+ goto out;
+ }
+
+out:
+ return snap_device;
+}
+
+int32_t
+glusterd_snap_brick_create (char *device, glusterd_volinfo_t *snap_volinfo,
+ glusterd_brickinfo_t *original_brickinfo,
+ int32_t brick_count, char *snap_brick_dir)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ char snap_brick_mount_path[PATH_MAX] = "";
+ char snap_brick_path[PATH_MAX] = "";
+ char msg[1024] = "";
+ struct stat statbuf = {0, };
+ runner_t runner = {0, };
+
+ this = THIS;
+ priv = this->private;
+
+ GF_ASSERT (device);
+ GF_ASSERT (snap_volinfo);
+ GF_ASSERT (original_brickinfo);
+ GF_ASSERT (snap_brick_dir);
+
+ snprintf (snap_brick_mount_path, sizeof (snap_brick_mount_path),
+ "%s/%s/brick%d", snap_mount_folder, snap_volinfo->volname,
+ brick_count+1);
+
+ snprintf (snap_brick_path, sizeof (snap_brick_path), "%s%s",
+ snap_brick_mount_path, snap_brick_dir);
+
+ ret = mkdir_p (snap_brick_mount_path, 0777, _gf_true);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "creating the brick directory"
+ " %s for the snapshot %s(device: %s) failed",
+ snap_brick_mount_path, snap_volinfo->volname, device);
+ goto out;
+ }
+ /* mount the snap logical device on the directory inside
+ /run/gluster/snaps/<snapname>/@snap_brick_mount_path
+ Way to mount the snap brick via mount api is this.
+ ret = mount (device, snap_brick_mount_path, entry->mnt_type,
+ MS_MGC_VAL, "nouuid");
+ But for now, mounting using runner apis.
+ */
+ runinit (&runner);
+ snprintf (msg, sizeof (msg), "mounting snapshot of the brick %s:%s",
+ original_brickinfo->hostname, original_brickinfo->path);
+ runner_add_args (&runner, "mount", "-o", "nouuid", device,
+ snap_brick_mount_path, NULL);
+ runner_log (&runner, "", GF_LOG_DEBUG, msg);
+
+ /* let glusterd get blocked till snapshot is over */
+ synclock_unlock (&priv->big_lock);
+ ret = runner_run (&runner);
+ synclock_lock (&priv->big_lock);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "mounting the snapshot "
+ "logical device %s failed (error: %s)", device,
+ strerror (errno));
+ goto out;
+ } else
+ gf_log (this->name, GF_LOG_DEBUG, "mounting the snapshot "
+ "logical device %s successful", device);
+
+ ret = stat (snap_brick_path, &statbuf);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "stat of the brick %s"
+ "(brick mount: %s) failed (%s)", snap_brick_path,
+ snap_brick_mount_path, strerror (errno));
+ goto out;
+ }
+ ret = sys_lsetxattr (snap_brick_path,
+ GF_XATTR_VOL_ID_KEY,
+ snap_volinfo->volume_id, 16,
+ XATTR_REPLACE);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set "
+ "extended attribute %s on %s. Reason: "
+ "%s, snap: %s", GF_XATTR_VOL_ID_KEY,
+ snap_brick_path, strerror (errno),
+ snap_volinfo->volname);
+ goto out;
+ }
+
+out:
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "unmounting the snap brick"
+ " mount %s", snap_brick_mount_path);
+ umount (snap_brick_mount_path);
+ }
+
+ gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+}
+
+/* Added missed_snap_entry to rsp_dict */
+int32_t
+glusterd_add_missed_snaps_to_dict (dict_t *rsp_dict, char *snap_uuid,
+ glusterd_brickinfo_t *brickinfo,
+ int32_t brick_number, int32_t op)
+{
+ char *buf = NULL;
+ char missed_snap_entry[PATH_MAX] = "";
+ char name_buf[PATH_MAX] = "";
+ int32_t missed_snap_count = -1;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (rsp_dict);
+ GF_ASSERT (snap_uuid);
+ GF_ASSERT (brickinfo);
+
+ snprintf (missed_snap_entry, sizeof(missed_snap_entry),
+ "%s:%s=%d:%s:%d:%d", uuid_utoa(brickinfo->uuid),
+ snap_uuid, brick_number, brickinfo->path, op,
+ GD_MISSED_SNAP_PENDING);
+
+ buf = gf_strdup (missed_snap_entry);
+ if (!buf) {
+ ret = -1;
+ goto out;
+ }
+
+ /* Fetch the missed_snap_count from the dict */
+ ret = dict_get_int32 (rsp_dict, "missed_snap_count",
+ &missed_snap_count);
+ if (ret) {
+ /* Initialize the missed_snap_count for the first time */
+ missed_snap_count = 0;
+ }
+
+ /* Setting the missed_snap_entry in the rsp_dict */
+ snprintf (name_buf, sizeof(name_buf), "missed_snaps_%d",
+ missed_snap_count);
+ ret = dict_set_dynstr (rsp_dict, name_buf, buf);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set missed_snap_entry (%s) "
+ "in the rsp_dict.", buf);
+ GF_FREE (buf);
+ goto out;
+ }
+ missed_snap_count++;
+
+ /* Setting the new missed_snap_count in the dict */
+ ret = dict_set_int32 (rsp_dict, "missed_snap_count",
+ missed_snap_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set missed_snap_count for %s "
+ "in the rsp_dict.", missed_snap_entry);
+ goto out;
+ }
+
+out:
+ gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+}
+
+static int32_t
+glusterd_add_bricks_to_snap_volume (dict_t *dict, dict_t *rsp_dict,
+ glusterd_volinfo_t *snap_vol,
+ glusterd_brickinfo_t *original_brickinfo,
+ glusterd_brickinfo_t *snap_brickinfo,
+ char **snap_brick_dir, int64_t volcount,
+ int32_t brick_count)
+{
+ char key[PATH_MAX] = "";
+ char snap_brick_path[PATH_MAX] = "";
+ char *snap_device = NULL;
+ gf_boolean_t add_missed_snap = _gf_false;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (rsp_dict);
+ GF_ASSERT (snap_vol);
+ GF_ASSERT (original_brickinfo);
+ GF_ASSERT (snap_brickinfo);
+ GF_ASSERT (snap_brick_dir);
+
+ snprintf (key, sizeof(key) - 1, "vol%ld.brickdir%d", volcount,
+ brick_count);
+ ret = dict_get_ptr (dict, key, (void **)snap_brick_dir);
+ if (ret) {
+ /* Using original brickinfo here because it will be a
+ * pending snapshot and storing the original brickinfo
+ * will help in mapping while recreating the missed snapshot
+ */
+ gf_log (this->name, GF_LOG_WARNING, "Unable to fetch "
+ "snap mount path (%s). Using original brickinfo", key);
+ snap_brickinfo->snap_status = -1;
+ strcpy (snap_brick_path, original_brickinfo->path);
+
+ /* In origiator node add snaps missed
+ * from different nodes to the dict
+ */
+ if (is_origin_glusterd (dict) == _gf_true)
+ add_missed_snap = _gf_true;
+ } else {
+ /* Create brick-path in the format /var/run/gluster/snaps/ *
+ * <snap-uuid>/<original-brick#>/snap-brick-dir *
+ */
+ snprintf (snap_brick_path, sizeof(snap_brick_path),
+ "%s/%s/brick%d%s", snap_mount_folder,
+ snap_vol->volname, brick_count+1,
+ *snap_brick_dir);
+ }
+
+ if ((snap_brickinfo->snap_status != -1) &&
+ (!uuid_compare (original_brickinfo->uuid, MY_UUID)) &&
+ (!glusterd_is_brick_started (original_brickinfo))) {
+ /* In case if the brick goes down after prevalidate. */
+ gf_log (this->name, GF_LOG_WARNING, "brick %s:%s is not"
+ " started (snap: %s)",
+ original_brickinfo->hostname,
+ original_brickinfo->path,
+ snap_vol->snapshot->snapname);
+
+ snap_brickinfo->snap_status = -1;
+ strcpy (snap_brick_path, original_brickinfo->path);
+ add_missed_snap = _gf_true;
+ }
+
+ if (add_missed_snap) {
+ ret = glusterd_add_missed_snaps_to_dict (rsp_dict,
+ snap_vol->volname,
+ original_brickinfo,
+ brick_count + 1,
+ GF_SNAP_OPTION_TYPE_CREATE);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to add missed"
+ " snapshot info for %s:%s in the rsp_dict",
+ original_brickinfo->hostname,
+ original_brickinfo->path);
+ goto out;
+ }
+ }
+
+ snprintf (key, sizeof(key), "vol%ld.brick_snapdevice%d",
+ volcount, brick_count);
+ ret = dict_get_ptr (dict, key, (void **)&snap_device);
+ if (ret) {
+ /* If the device name is empty, so will be the brick path
+ * Hence the missed snap has already been added above
+ */
+ gf_log (this->name, GF_LOG_ERROR, "Unable to fetch "
+ "snap device (%s). Leaving empty", key);
+ } else
+ strcpy (snap_brickinfo->device_path, snap_device);
+
+ ret = gf_canonicalize_path (snap_brick_path);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to canonicalize path");
+ goto out;
+ }
+
+ strcpy (snap_brickinfo->hostname, original_brickinfo->hostname);
+ strcpy (snap_brickinfo->path, snap_brick_path);
+ uuid_copy (snap_brickinfo->uuid, original_brickinfo->uuid);
+ list_add_tail (&snap_brickinfo->brick_list, &snap_vol->bricks);
+
+out:
+ gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+}
+
+static int32_t
+glusterd_take_brick_snapshot (glusterd_volinfo_t *origin_vol,
+ glusterd_volinfo_t *snap_vol, dict_t *rsp_dict,
+ glusterd_brickinfo_t *original_brickinfo,
+ glusterd_brickinfo_t *snap_brickinfo,
+ char *snap_brick_dir, int32_t brick_count)
+{
+ char *device = NULL;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (origin_vol);
+ GF_ASSERT (snap_vol);
+ GF_ASSERT (rsp_dict);
+ GF_ASSERT (original_brickinfo);
+ GF_ASSERT (snap_brickinfo);
+ GF_ASSERT (snap_brick_dir);
+
+ device = glusterd_take_lvm_snapshot (snap_vol, original_brickinfo);
+ /* Fail the snapshot even though snapshot on one of
+ the bricks fails. At the end when we check whether
+ the snapshot volume meets quorum or not, then the
+ the snapshot can either be treated as success, or
+ in case of failure we can undo the changes and return
+ failure to cli. */
+ if (!device) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to take snapshot of %s:%s",
+ original_brickinfo->hostname,
+ original_brickinfo->path);
+ goto out;
+ }
+
+ /* create the complete brick here */
+ ret = glusterd_snap_brick_create (device, snap_vol,
+ original_brickinfo,
+ brick_count, snap_brick_dir);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "not able to"
+ " create the brickinfo for the snap %s"
+ ", volume %s", snap_vol->snapshot->snapname,
+ origin_vol->volname);
+ goto out;
+ }
+
+out:
+ if (device)
+ GF_FREE (device);
+
+ gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+}
+
+/* Look for disconnected peers, for missed snap creates or deletes */
+static int32_t
+glusterd_find_missed_snap (dict_t *rsp_dict, glusterd_volinfo_t *vol,
+ char *snap_uuid, struct list_head *peers,
+ int32_t op)
+{
+ int32_t brick_count = -1;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (rsp_dict);
+ GF_ASSERT (peers);
+ GF_ASSERT (vol);
+ GF_ASSERT (snap_uuid);
+
+ brick_count = 0;
+ list_for_each_entry (brickinfo, &vol->bricks, brick_list) {
+ if (!uuid_compare (brickinfo->uuid, MY_UUID)) {
+ /* If the brick belongs to the same node */
+ brick_count++;
+ continue;
+ }
+
+ list_for_each_entry (peerinfo, peers, uuid_list) {
+ if (uuid_compare (peerinfo->uuid, brickinfo->uuid)) {
+ /* If the brick doesnt belong to this peer */
+ continue;
+ }
+
+ /* Found peer who owns the brick, *
+ * if peer is not connected or not *
+ * friend add it to missed snap list */
+ if (!(peerinfo->connected) ||
+ (peerinfo->state.state !=
+ GD_FRIEND_STATE_BEFRIENDED)) {
+ ret = glusterd_add_missed_snaps_to_dict
+ (rsp_dict,
+ snap_uuid,
+ brickinfo,
+ brick_count + 1,
+ op);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to add missed snapshot "
+ "info for %s:%s in the "
+ "rsp_dict", brickinfo->hostname,
+ brickinfo->path);
+ goto out;
+ }
+ }
+ }
+ brick_count++;
+ }
+
+ ret = 0;
+out:
+ gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+}
+
+glusterd_volinfo_t *
+glusterd_do_snap_vol (glusterd_volinfo_t *origin_vol, glusterd_snap_t *snap,
+ dict_t *dict, dict_t *rsp_dict, int64_t volcount)
+{
+ char key[PATH_MAX] = "";
+ char *snap_brick_dir = NULL;
+ char *username = NULL;
+ char *password = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *snap_vol = NULL;
+ uuid_t *snap_volid = NULL;
+ int32_t ret = -1;
+ int32_t brick_count = 0;
+ glusterd_brickinfo_t *snap_brickinfo = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (origin_vol);
+ GF_ASSERT (dict);
+ GF_ASSERT (rsp_dict);
+
+ /* fetch username, password and vol_id from dict*/
+ snprintf (key, sizeof(key), "volume%ld_username", volcount);
+ ret = dict_get_str (dict, key, &username);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get %s for "
+ "snap %s", key, snap->snapname);
+ goto out;
+ }
+
+ snprintf (key, sizeof(key), "volume%ld_password", volcount);
+ ret = dict_get_str (dict, key, &password);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get %s for "
+ "snap %s", key, snap->snapname);
+ goto out;
+ }
+ snprintf (key, sizeof(key) - 1, "vol%ld_volid", volcount);
+ ret = dict_get_bin (dict, key, (void **)&snap_volid);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to fetch snap_volid");
+ goto out;
+ }
+
+ /* We are not setting the username and password here as
+ * we need to set the user name and password passed in
+ * the dictionary
+ */
+ ret = glusterd_volinfo_dup (origin_vol, &snap_vol, _gf_false);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to duplicate volinfo "
+ "for the snapshot %s", snap->snapname);
+ goto out;
+ }
+
+ /* uuid is used as lvm snapshot name.
+ This will avoid restrictions on snapshot names provided by user */
+ GLUSTERD_GET_UUID_NOHYPHEN (snap_vol->volname, *snap_volid);
+ uuid_copy (snap_vol->volume_id, *snap_volid);
+ snap_vol->is_snap_volume = _gf_true;
+ strcpy (snap_vol->parent_volname, origin_vol->volname);
+ snap_vol->snapshot = snap;
+
+ glusterd_auth_set_username (snap_vol, username);
+ glusterd_auth_set_password (snap_vol, password);
+
+ /* Adding snap brickinfos to the snap volinfo */
+ brick_count = 0;
+ list_for_each_entry (brickinfo, &origin_vol->bricks, brick_list) {
+ snap_brickinfo = NULL;
+
+ ret = glusterd_brickinfo_new (&snap_brickinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "initializing the brick for the snap "
+ "volume failed (snapname: %s)", snap->snapname);
+ goto out;
+ }
+
+ ret = glusterd_add_bricks_to_snap_volume (dict, rsp_dict,
+ snap_vol,
+ brickinfo,
+ snap_brickinfo,
+ &snap_brick_dir,
+ volcount,
+ brick_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to add the snap brick for "
+ "%s:%s to the snap volume",
+ brickinfo->hostname, brickinfo->path);
+ GF_FREE (snap_brickinfo);
+ goto out;
+ }
+
+ /* Take snapshot of the brick */
+ if ((uuid_compare (brickinfo->uuid, MY_UUID)) ||
+ (snap_brickinfo->snap_status == -1)) {
+ brick_count++;
+ continue;
+ }
+
+ ret = glusterd_take_brick_snapshot (origin_vol, snap_vol,
+ rsp_dict, brickinfo,
+ snap_brickinfo,
+ snap_brick_dir,
+ brick_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to take snapshot for %s:%s",
+ brickinfo->hostname, brickinfo->path);
+ goto out;
+ }
+
+ brick_count++;
+ }
+
+ /*TODO: the quorum check of the snap volume here */
+
+ ret = glusterd_store_volinfo (snap_vol,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to store snapshot "
+ "volinfo (%s) for snap %s", snap_vol->volname,
+ snap->snapname);
+ goto out;
+ }
+
+ ret = generate_brick_volfiles (snap_vol);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "generating the brick "
+ "volfiles for the snap %s (volume: %s) failed",
+ snap->snapname, origin_vol->volname);
+ goto out;
+ }
+
+ ret = generate_client_volfiles (snap_vol, GF_CLIENT_TRUSTED);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "generating the trusted "
+ "client volfiles for the snap %s (volume: %s) failed",
+ snap->snapname, origin_vol->volname);
+ goto out;
+ }
+ ret = generate_client_volfiles (snap_vol, GF_CLIENT_OTHER);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "generating the client "
+ "volfiles for the snap %s (volume: %s) failed",
+ snap->snapname, origin_vol->volname);
+ goto out;
+ }
+
+ ret = glusterd_list_add_snapvol (origin_vol, snap_vol);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "could not add the snap "
+ "volume %s to the list", snap_vol->volname);
+ goto out;
+ }
+
+ list_for_each_entry (brickinfo, &snap_vol->bricks, brick_list) {
+ if (uuid_compare (brickinfo->uuid, MY_UUID))
+ continue;
+
+ if (brickinfo->snap_status == -1) {
+ gf_log (this->name, GF_LOG_INFO,
+ "not starting snap brick %s:%s for "
+ "for the snap %s (volume: %s)",
+ brickinfo->hostname, brickinfo->path,
+ snap->snapname, origin_vol->volname);
+ continue;
+ }
+
+ ret = glusterd_brick_start (snap_vol, brickinfo, _gf_true);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "starting the "
+ "brick %s:%s for the snap %s (volume: %s) "
+ "failed", brickinfo->hostname, brickinfo->path,
+ snap->snapname, origin_vol->volname);
+ goto out;
+ }
+ }
+
+ snap_vol->status = GLUSTERD_STATUS_STARTED;
+ ret = glusterd_store_volinfo (snap_vol,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to store snap volinfo");
+ goto out;
+ }
+
+out:
+ if (ret) {
+ if (snap_vol)
+ glusterd_snap_volume_remove (rsp_dict, snap_vol,
+ _gf_true, _gf_true);
+ snap_vol = NULL;
+ }
+
+ return snap_vol;
+}
+
+/* This is a snapshot remove handler function. This function will be
+ * executed in the originator node. This function is responsible for
+ * calling mgmt v3 framework to do the actual remove on all the bricks
+ *
+ * @param req RPC request object
+ * @param op gluster operation
+ * @param dict dictionary containing snapshot remove request
+ * @param err_str In case of an err this string should be populated
+ * @param len length of err_str buffer
+ *
+ * @return Negative value on Failure and 0 in success
+ */
+int
+glusterd_handle_snapshot_remove (rpcsvc_request_t *req, glusterd_op_t op,
+ dict_t *dict, char *err_str, size_t len)
+{
+ int ret = -1;
+ int64_t volcount = 0;
+ char *snapname = NULL;
+ char *volname = NULL;
+ char key[PATH_MAX] = "";
+ glusterd_snap_t *snap = NULL;
+ glusterd_volinfo_t *snap_vol = NULL;
+ glusterd_volinfo_t *tmp = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ GF_ASSERT (req);
+ GF_ASSERT (dict);
+ GF_ASSERT (err_str);
+
+ ret = dict_get_str (dict, "snapname", &snapname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get snapname");
+ goto out;
+ }
+
+ snap = glusterd_find_snap_by_name (snapname);
+ if (!snap) {
+ snprintf (err_str, len, "Snap (%s) does not exist", snapname);
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s", err_str);
+ ret = -1;
+ goto out;
+ }
+
+ /* Set volnames in the dict to get mgmt_v3 lock */
+ list_for_each_entry_safe (snap_vol, tmp, &snap->volumes, vol_list) {
+ volcount++;
+ volname = gf_strdup (snap_vol->parent_volname);
+ if (!volname) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR, "strdup failed");
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "volname%ld", volcount);
+ ret = dict_set_dynstr (dict, key, volname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set "
+ "volume name in dictionary");
+ GF_FREE (volname);
+ goto out;
+ }
+ volname = NULL;
+ }
+ ret = dict_set_int64 (dict, "volcount", volcount);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set volcount");
+ goto out;
+ }
+
+ ret = glusterd_mgmt_v3_initiate_snap_phases (req, op, dict);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to initiate snap "
+ "phases");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+glusterd_snapshot_remove_prevalidate (dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict)
+{
+ int32_t ret = -1;
+ char *snapname = NULL;
+ xlator_t *this = NULL;
+ glusterd_snap_t *snap = NULL;
+
+ this = THIS;
+
+ if (!dict || !op_errstr) {
+ gf_log (this->name, GF_LOG_ERROR, "input parameters NULL");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "snapname", &snapname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Getting the snap name "
+ "failed");
+ goto out;
+ }
+
+ snap = glusterd_find_snap_by_name (snapname);
+ if (!snap) {
+ gf_log (this->name, GF_LOG_ERROR, "Snap %s does not exist",
+ snapname);
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+glusterd_snapshot_status_prevalidate (dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict)
+{
+ int ret = -1;
+ char *snapname = NULL;
+ glusterd_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+ int32_t cmd = -1;
+ glusterd_volinfo_t *volinfo = NULL;
+ char *volname = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+
+ GF_ASSERT (conf);
+ GF_ASSERT (op_errstr);
+ if (!dict) {
+ gf_log (this->name, GF_LOG_ERROR, "Input dict is NULL");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "cmd", &cmd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not fetch status cmd");
+ goto out;
+ }
+
+ switch (cmd) {
+ case GF_SNAP_STATUS_TYPE_ALL:
+ {
+ break;
+ }
+ case GF_SNAP_STATUS_TYPE_SNAP:
+ {
+ ret = dict_get_str (dict, "snapname", &snapname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not fetch snapname");
+ goto out;
+ }
+
+ if (!glusterd_find_snap_by_name (snapname)) {
+ ret = gf_asprintf (op_errstr, "Snap (%s) "
+ "not found", snapname);
+ if (ret < 0) {
+ goto out;
+ }
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR, "Snap (%s) "
+ "not found", snapname);
+ goto out;
+ }
+ break;
+ }
+ case GF_SNAP_STATUS_TYPE_VOL:
+ {
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not fetch volname");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ ret = gf_asprintf (op_errstr, "Volume (%s)"
+ "not found", volname);
+ if (ret < 0) {
+ goto out;
+ }
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR, "Volume "
+ "%s not present", volname);
+ goto out;
+ }
+ break;
+
+ }
+ default:
+ {
+ gf_log (this->name, GF_LOG_ERROR, "Invalid command");
+ break;
+ }
+ }
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int32_t
+glusterd_snapshot_remove_commit (dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict)
+{
+ int32_t ret = -1;
+ char *snapname = NULL;
+ char *dup_snapname = NULL;
+ glusterd_snap_t *snap = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *snap_volinfo = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (rsp_dict);
+ GF_ASSERT (op_errstr);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ if (!dict || !op_errstr) {
+ gf_log (this->name, GF_LOG_ERROR, "input parameters NULL");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "snapname", &snapname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Getting the snap name "
+ "failed");
+ goto out;
+ }
+
+ snap = glusterd_find_snap_by_name (snapname);
+ if (!snap) {
+ gf_log (this->name, GF_LOG_ERROR, "Snap %s does not exist",
+ snapname);
+ ret = -1;
+ goto out;
+ }
+
+ if (is_origin_glusterd (dict) == _gf_true) {
+ /* TODO : As of now there is only volume in snapshot.
+ * Change this when multiple volume snapshot is introduced
+ */
+ snap_volinfo = list_entry (snap->volumes.next,
+ glusterd_volinfo_t,
+ vol_list);
+ if (!snap_volinfo) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to fetch snap_volinfo");
+ ret = -1;
+ goto out;
+ }
+
+ /* From origin glusterd check if *
+ * any peers with snap bricks is down */
+ ret = glusterd_find_missed_snap (rsp_dict, snap_volinfo,
+ snap_volinfo->volname,
+ &priv->peers,
+ GF_SNAP_OPTION_TYPE_DELETE);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to find missed snap deletes");
+ goto out;
+ }
+ }
+
+ ret = glusterd_snap_remove (rsp_dict, snap, _gf_true, _gf_false);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to remove snap %s",
+ snapname);
+ goto out;
+ }
+
+ dup_snapname = gf_strdup (snapname);
+ if (!dup_snapname) {
+ gf_log (this->name, GF_LOG_ERROR, "Strdup failed");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr (rsp_dict, "snapname", dup_snapname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set the snapname");
+ GF_FREE (dup_snapname);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int32_t
+glusterd_do_snap_cleanup (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+{
+ int32_t ret = -1;
+ char *name = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_snap_t *snap = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ if (!dict || !op_errstr) {
+ gf_log (this->name, GF_LOG_ERROR, "input parameters NULL");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "snapname", &name);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "getting the snap "
+ "name failed (volume: %s)", volinfo->volname);
+ goto out;
+ }
+
+ /*
+ If the snapname is not found that means the failure happened at
+ staging, or in commit, before the snap object is created, in which
+ case there is nothing to cleanup. So set ret to 0.
+ */
+ snap = glusterd_find_snap_by_name (name);
+ if (!snap) {
+ gf_log (this->name, GF_LOG_INFO, "snap %s is not found", name);
+ ret = 0;
+ goto out;
+ }
+
+ ret = glusterd_snap_remove (rsp_dict, snap, _gf_true, _gf_true);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "removing the snap %s failed",
+ name);
+ goto out;
+ }
+
+ name = NULL;
+
+ ret = 0;
+
+out:
+
+ return ret;
+}
+
+/* In case of a successful, delete or create operation, during post_validate *
+ * look for missed snap operations and update the missed snap lists */
+int32_t
+glusterd_snapshot_update_snaps_post_validate (dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict)
+{
+ int32_t ret = -1;
+ int32_t missed_snap_count = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (rsp_dict);
+ GF_ASSERT (op_errstr);
+
+ ret = dict_get_int32 (dict, "missed_snap_count",
+ &missed_snap_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG, "No missed snaps");
+ ret = 0;
+ goto out;
+ }
+
+ ret = glusterd_store_update_missed_snaps (dict, missed_snap_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to update missed_snaps_list");
+ goto out;
+ }
+
+out:
+ gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_snapshot_create_commit (dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict)
+{
+ int ret = -1;
+ int64_t i = 0;
+ int64_t volcount = 0;
+ char *snapname = NULL;
+ char *volname = NULL;
+ char *tmp_name = NULL;
+ char key[PATH_MAX] = "";
+ xlator_t *this = NULL;
+ glusterd_snap_t *snap = NULL;
+ glusterd_volinfo_t *origin_vol = NULL;
+ glusterd_volinfo_t *snap_vol = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+ GF_ASSERT(op_errstr);
+ GF_ASSERT(rsp_dict);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = dict_get_int64 (dict, "volcount", &volcount);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to "
+ "get the volume count");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "snapname", &snapname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to fetch snapname");
+ goto out;
+ }
+ tmp_name = gf_strdup (snapname);
+ if (!tmp_name) {
+ gf_log (this->name, GF_LOG_ERROR, "Out of memory");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr (rsp_dict, "snapname", tmp_name);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to set snapname in rsp_dict");
+ GF_FREE (tmp_name);
+ goto out;
+ }
+ tmp_name = NULL;
+
+ snap = glusterd_create_snap_object (dict, rsp_dict);
+ if (!snap) {
+ gf_log (this->name, GF_LOG_ERROR, "creating the"
+ "snap object %s failed", snapname);
+ ret = -1;
+ goto out;
+ }
+
+ for (i = 1; i <= volcount; i++) {
+ snprintf (key, sizeof (key), "volname%ld", i);
+ ret = dict_get_str (dict, key, &volname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &origin_vol);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to get the volinfo for "
+ "the volume %s", volname);
+ goto out;
+ }
+
+ /* TODO: Create a stub where the bricks are
+ added parallely by worker threads so that
+ the snap creating happens parallely. */
+ snap_vol = glusterd_do_snap_vol (origin_vol, snap, dict,
+ rsp_dict, i);
+ if (!snap_vol) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_WARNING, "taking the "
+ "snapshot of the volume %s failed", volname);
+ goto out;
+ }
+ }
+
+ snap->snap_status = GD_SNAP_STATUS_IN_USE;
+ ret = glusterd_store_snap (snap);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "Could not store snap"
+ "object %s", snap->snapname);
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ if (ret) {
+ if (snap)
+ glusterd_snap_remove (rsp_dict, snap,
+ _gf_true, _gf_true);
+ snap = NULL;
+ }
+
+ gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+}
+
+int
+snap_max_hard_limit_set_commit (dict_t *dict, uint64_t value,
+ char *volname, char **op_errstr)
+{
+ char err_str[PATH_MAX] = "";
+ glusterd_conf_t *conf = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (volname);
+ GF_ASSERT (op_errstr);
+
+ conf = this->private;
+
+ GF_ASSERT (conf);
+
+ /* TODO: Initiate auto deletion when there is a limit change */
+ if (!volname) {
+ /* For system limit */
+ conf->snap_max_hard_limit = value;
+
+ ret = glusterd_store_global_info (this);
+ if (ret) {
+ snprintf (err_str, PATH_MAX, "Failed to store "
+ "snap-max-hard-limit for system");
+ goto out;
+ }
+ } else {
+ /* For one volume */
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (err_str, PATH_MAX, "Failed to get the"
+ " volinfo for volume %s", volname);
+ goto out;
+ }
+
+ volinfo->snap_max_hard_limit = value;
+
+ ret = glusterd_store_volinfo (volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret) {
+ snprintf (err_str, PATH_MAX, "Failed to store "
+ "snap-max-hard-limit for volume %s", volname);
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ if (ret) {
+ *op_errstr = gf_strdup (err_str);
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ }
+ return ret;
+}
+
+int
+snap_max_limits_display_commit (dict_t *rsp_dict, char *volname,
+ char **op_errstr)
+{
+ char err_str[PATH_MAX] = "";
+ char buf[PATH_MAX] = "";
+ glusterd_conf_t *conf = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ int ret = -1;
+ uint64_t active_hard_limit = 0;
+ uint64_t snap_max_limit = 0;
+ uint64_t soft_limit_value = -1;
+ uint64_t count = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ GF_ASSERT (this);
+ GF_ASSERT (rsp_dict);
+ GF_ASSERT (volname);
+ GF_ASSERT (op_errstr);
+
+ conf = this->private;
+
+ GF_ASSERT (conf);
+
+ if (!volname) {
+ /* For system limit */
+ list_for_each_entry (volinfo, &conf->volumes, vol_list) {
+ if (volinfo->is_snap_volume == _gf_true)
+ continue;
+ snap_max_limit = volinfo->snap_max_hard_limit;
+ if (snap_max_limit > conf->snap_max_hard_limit)
+ active_hard_limit = conf->snap_max_hard_limit;
+ else
+ active_hard_limit = snap_max_limit;
+ soft_limit_value = (active_hard_limit *
+ conf->snap_max_soft_limit) / 100;
+
+ snprintf (buf, sizeof(buf), "volume%ld-volname", count);
+ ret = dict_set_str (rsp_dict, buf, volinfo->volname);
+ if (ret) {
+ snprintf (err_str, PATH_MAX,
+ "Failed to set %s", buf);
+ goto out;
+ }
+
+ snprintf (buf, sizeof(buf),
+ "volume%ld-snap-max-hard-limit", count);
+ ret = dict_set_uint64 (rsp_dict, buf, snap_max_limit);
+ if (ret) {
+ snprintf (err_str, PATH_MAX,
+ "Failed to set %s", buf);
+ goto out;
+ }
+
+ snprintf (buf, sizeof(buf),
+ "volume%ld-active-hard-limit", count);
+ ret = dict_set_uint64 (rsp_dict, buf,
+ active_hard_limit);
+ if (ret) {
+ snprintf (err_str, PATH_MAX,
+ "Failed to set %s", buf);
+ goto out;
+ }
+
+ snprintf (buf, sizeof(buf),
+ "volume%ld-snap-max-soft-limit", count);
+ ret = dict_set_uint64 (rsp_dict, buf, soft_limit_value);
+ if (ret) {
+ snprintf (err_str, PATH_MAX,
+ "Failed to set %s", buf);
+ goto out;
+ }
+ count++;
+ }
+
+ ret = dict_set_uint64 (rsp_dict, "voldisplaycount", count);
+ if (ret) {
+ snprintf (err_str, PATH_MAX,
+ "Failed to set voldisplaycount");
+ goto out;
+ }
+ } else {
+ /* For one volume */
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (err_str, PATH_MAX, "Failed to get the"
+ " volinfo for volume %s", volname);
+ goto out;
+ }
+
+ snap_max_limit = volinfo->snap_max_hard_limit;
+ if (snap_max_limit > conf->snap_max_hard_limit)
+ active_hard_limit = conf->snap_max_hard_limit;
+ else
+ active_hard_limit = snap_max_limit;
+
+ soft_limit_value = (active_hard_limit *
+ conf->snap_max_soft_limit) / 100;
+
+ snprintf (buf, sizeof(buf), "volume%ld-volname", count);
+ ret = dict_set_str (rsp_dict, buf, volinfo->volname);
+ if (ret) {
+ snprintf (err_str, PATH_MAX,
+ "Failed to set %s", buf);
+ goto out;
+ }
+
+ snprintf (buf, sizeof(buf),
+ "volume%ld-snap-max-hard-limit", count);
+ ret = dict_set_uint64 (rsp_dict, buf, snap_max_limit);
+ if (ret) {
+ snprintf (err_str, PATH_MAX,
+ "Failed to set %s", buf);
+ goto out;
+ }
+
+ snprintf (buf, sizeof(buf),
+ "volume%ld-active-hard-limit", count);
+ ret = dict_set_uint64 (rsp_dict, buf, active_hard_limit);
+ if (ret) {
+ snprintf (err_str, PATH_MAX,
+ "Failed to set %s", buf);
+ goto out;
+ }
+
+ snprintf (buf, sizeof(buf),
+ "volume%ld-snap-max-soft-limit", count);
+ ret = dict_set_uint64 (rsp_dict, buf, soft_limit_value);
+ if (ret) {
+ snprintf (err_str, PATH_MAX,
+ "Failed to set %s", buf);
+ goto out;
+ }
+
+ count++;
+
+ ret = dict_set_uint64 (rsp_dict, "voldisplaycount", count);
+ if (ret) {
+ snprintf (err_str, PATH_MAX,
+ "Failed to set voldisplaycount");
+ goto out;
+ }
+
+ }
+
+ ret = dict_set_uint64 (rsp_dict, "snap-max-hard-limit",
+ conf->snap_max_hard_limit);
+ if (ret) {
+ snprintf (err_str, PATH_MAX,
+ "Failed to set sys-snap-max-hard-limit ");
+ goto out;
+ }
+
+ ret = dict_set_uint64 (rsp_dict, "snap-max-soft-limit",
+ conf->snap_max_soft_limit);
+ if (ret) {
+ snprintf (err_str, PATH_MAX,
+ "Failed to set sys-snap-max-hard-limit ");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret) {
+ *op_errstr = gf_strdup (err_str);
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ }
+ return ret;
+}
+
+int
+glusterd_snapshot_config_commit (dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict)
+{
+ char *volname = NULL;
+ xlator_t *this = NULL;
+ int ret = -1;
+ char err_str[PATH_MAX] = {0,};
+ glusterd_conf_t *conf = NULL;
+ int config_command = 0;
+ uint64_t hard_limit = 0;
+ uint64_t soft_limit = 0;
+
+ this = THIS;
+
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (op_errstr);
+
+ conf = this->private;
+
+ GF_ASSERT (conf);
+
+ ret = dict_get_int32 (dict, "config-command", &config_command);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to get config-command type");
+ goto out;
+ }
+
+ /* Ignore the return value of the following dict_get,
+ * as they are optional
+ */
+ ret = dict_get_str (dict, "volname", &volname);
+
+ ret = dict_get_uint64 (dict, "snap-max-hard-limit", &hard_limit);
+
+ ret = dict_get_uint64 (dict, "snap-max-soft-limit", &soft_limit);
+
+ switch (config_command) {
+ case GF_SNAP_CONFIG_TYPE_SET:
+ if (hard_limit) {
+ /* Commit ops for snap-max-hard-limit */
+ ret = snap_max_hard_limit_set_commit (dict, hard_limit,
+ volname,
+ op_errstr);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "snap-max-hard-limit set "
+ "commit failed.");
+ goto out;
+ }
+ }
+
+ if (soft_limit) {
+ /* For system limit */
+ conf->snap_max_soft_limit = soft_limit;
+
+ ret = glusterd_store_global_info (this);
+ if (ret) {
+ snprintf (err_str, PATH_MAX, "Failed to store "
+ "snap-max-soft-limit for system");
+ *op_errstr = gf_strdup (err_str);
+ gf_log (this->name, GF_LOG_ERROR, "%s",
+ err_str);
+ goto out;
+ }
+ }
+ break;
+
+ case GF_SNAP_CONFIG_DISPLAY:
+ /* Reading data from local node only */
+ if (!is_origin_glusterd (dict)) {
+ ret = 0;
+ break;
+ }
+
+ ret = snap_max_limits_display_commit (rsp_dict, volname,
+ op_errstr);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "snap-max-limit "
+ "display commit failed.");
+ goto out;
+ }
+ break;
+ default:
+ break;
+ }
+
+out:
+ gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_get_brick_lvm_details (dict_t *rsp_dict,
+ glusterd_brickinfo_t *brickinfo, char *volname,
+ char *device, char *key_prefix)
+{
+
+ int ret = -1;
+ glusterd_conf_t *priv = NULL;
+ runner_t runner = {0,};
+ xlator_t *this = NULL;
+ char msg[PATH_MAX] = "";
+ char buf[PATH_MAX] = "";
+ char *ptr = NULL;
+ char *token = NULL;
+ char key[PATH_MAX] = "";
+ char *value = NULL;
+
+ GF_ASSERT (rsp_dict);
+ GF_ASSERT (brickinfo);
+ GF_ASSERT (volname);
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ device = glusterd_get_brick_mount_details (brickinfo);
+ if (!device) {
+ gf_log (this->name, GF_LOG_ERROR, "Getting device name for "
+ "the brick %s:%s failed", brickinfo->hostname,
+ brickinfo->path);
+ goto out;
+ }
+ runinit (&runner);
+ snprintf (msg, sizeof (msg), "running lvs command, "
+ "for getting snap status");
+ /* Using lvs command fetch the Volume Group name,
+ * Percentage of data filled and Logical Volume size
+ *
+ * "-o" argument is used to get the desired information,
+ * example : "lvs /dev/VolGroup/thin_vol -o vgname,lv_size",
+ * will get us Volume Group name and Logical Volume size.
+ *
+ * Here separator used is ":",
+ * for the above given command with separator ":",
+ * The output will be "vgname:lvsize"
+ */
+ runner_add_args (&runner, "lvs", device, "--noheading", "-o",
+ "vg_name,data_percent,lv_size",
+ "--separator", ":", NULL);
+ runner_redir (&runner, STDOUT_FILENO, RUN_PIPE);
+ runner_log (&runner, "", GF_LOG_DEBUG, msg);
+ ret = runner_start (&runner);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not perform lvs action");
+ goto end;
+ }
+ do {
+ ptr = fgets (buf, sizeof (buf),
+ runner_chio (&runner, STDOUT_FILENO));
+
+ if (ptr == NULL)
+ break;
+ token = strtok (buf, ":");
+ if (token != NULL) {
+ while (token && token[0] == ' ')
+ token++;
+ if (!token) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Invalid vg entry");
+ goto end;
+ }
+ value = gf_strdup (token);
+ if (!value) {
+ ret = -1;
+ goto end;
+ }
+ ret = snprintf (key, sizeof (key), "%s.vgname",
+ key_prefix);
+ if (ret < 0) {
+ goto end;
+ }
+
+ ret = dict_set_dynstr (rsp_dict, key, value);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not save vgname ");
+ goto end;
+ }
+ }
+
+ token = strtok (NULL, ":");
+ if (token != NULL) {
+ value = gf_strdup (token);
+ if (!value) {
+ ret = -1;
+ goto end;
+ }
+ ret = snprintf (key, sizeof (key), "%s.data",
+ key_prefix);
+ if (ret < 0) {
+ goto end;
+ }
+
+ ret = dict_set_dynstr (rsp_dict, key, value);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not save data percent ");
+ goto end;
+ }
+ }
+ token = strtok (NULL, ":");
+ if (token != NULL) {
+ value = gf_strdup (token);
+ if (!value) {
+ ret = -1;
+ goto end;
+ }
+ ret = snprintf (key, sizeof (key), "%s.lvsize",
+ key_prefix);
+ if (ret < 0) {
+ goto end;
+ }
+
+ ret = dict_set_dynstr (rsp_dict, key, value);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not save meta data percent ");
+ goto end;
+ }
+ }
+
+ } while (ptr != NULL);
+
+ ret = 0;
+
+end:
+ runner_end (&runner);
+
+out:
+ if (ret && value) {
+ GF_FREE (value);
+ }
+
+ return ret;
+}
+
+int
+glusterd_get_single_brick_status (char **op_errstr, dict_t *rsp_dict,
+ char *keyprefix, int index,
+ glusterd_volinfo_t *snap_volinfo,
+ glusterd_brickinfo_t *brickinfo)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ char key[PATH_MAX] = "";
+ char *device = NULL;
+ char *value = NULL;
+ char brick_path[PATH_MAX] = "";
+ char pidfile[PATH_MAX] = "";
+ pid_t pid = -1;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ GF_ASSERT (op_errstr);
+ GF_ASSERT (rsp_dict);
+ GF_ASSERT (keyprefix);
+ GF_ASSERT (snap_volinfo);
+ GF_ASSERT (brickinfo);
+
+ ret = snprintf (key, sizeof (key), "%s.brick%d.path", keyprefix,
+ index);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = snprintf (brick_path, sizeof (brick_path),
+ "%s:%s", brickinfo->hostname, brickinfo->path);
+ if (ret < 0) {
+ goto out;
+ }
+
+ value = gf_strdup (brick_path);
+ if (!value) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr (rsp_dict, key, value);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to store "
+ "brick_path %s", brickinfo->path);
+ goto out;
+ }
+
+ if (brickinfo->snap_status == -1) {
+ /* Setting vgname as "Pending Snapshot" */
+ value = gf_strdup ("Pending Snapshot");
+ if (!value) {
+ ret = -1;
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "%s.brick%d.vgname",
+ keyprefix, index);
+ ret = dict_set_dynstr (rsp_dict, key, value);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not save vgname ");
+ goto out;
+ }
+
+ ret = 0;
+ goto out;
+ }
+ value = NULL;
+
+ ret = snprintf (key, sizeof (key), "%s.brick%d.status",
+ keyprefix, index);
+ if (ret < 0) {
+ goto out;
+ }
+
+ if (brickinfo->status == GF_BRICK_STOPPED) {
+ value = gf_strdup ("No");
+ if (!value) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_str (rsp_dict, key, value);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not save brick status");
+ goto out;
+ }
+ value = NULL;
+ } else {
+ value = gf_strdup ("Yes");
+ if (!value) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_str (rsp_dict, key, value);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not save brick status");
+ goto out;
+ }
+ value = NULL;
+
+ GLUSTERD_GET_BRICK_PIDFILE (pidfile, snap_volinfo,
+ brickinfo, priv);
+ ret = glusterd_is_service_running (pidfile, &pid);
+
+ ret = snprintf (key, sizeof (key), "%s.brick%d.pid",
+ keyprefix, index);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_set_int32 (rsp_dict, key, pid);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not save pid %d", pid);
+ goto out;
+ }
+ }
+
+ ret = snprintf (key, sizeof (key), "%s.brick%d",
+ keyprefix, index);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = glusterd_get_brick_lvm_details (rsp_dict, brickinfo,
+ snap_volinfo->volname,
+ device, key);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get "
+ "brick LVM details");
+ goto out;
+ }
+out:
+ if (ret && value) {
+ GF_FREE (value);
+ }
+
+ return ret;
+}
+
+int
+glusterd_get_single_snap_status (char **op_errstr, dict_t *rsp_dict,
+ char *keyprefix, glusterd_snap_t *snap)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ char key[PATH_MAX] = "";
+ char brickkey[PATH_MAX] = "";
+ glusterd_volinfo_t *snap_volinfo = NULL;
+ glusterd_volinfo_t *tmp_volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ int volcount = 0;
+ int brickcount = 0;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (op_errstr);
+ GF_ASSERT (rsp_dict);
+ GF_ASSERT (keyprefix);
+ GF_ASSERT (snap);
+
+ list_for_each_entry_safe (snap_volinfo, tmp_volinfo, &snap->volumes,
+ vol_list) {
+ ret = snprintf (key, sizeof (key), "%s.vol%d", keyprefix,
+ volcount);
+ if (ret < 0) {
+ goto out;
+ }
+ list_for_each_entry (brickinfo, &snap_volinfo->bricks,
+ brick_list) {
+ if (!glusterd_is_local_brick (this, snap_volinfo,
+ brickinfo)) {
+ brickcount++;
+ continue;
+ }
+
+ ret = glusterd_get_single_brick_status (op_errstr,
+ rsp_dict, key, brickcount,
+ snap_volinfo, brickinfo);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Getting "
+ "single snap status failed");
+ goto out;
+ }
+ brickcount++;
+ }
+ ret = snprintf (brickkey, sizeof (brickkey), "%s.brickcount",
+ key);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_set_int32 (rsp_dict, brickkey, brickcount);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not save brick count");
+ goto out;
+ }
+ volcount++;
+ }
+
+ ret = snprintf (key, sizeof (key), "%s.volcount", keyprefix);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_set_int32 (rsp_dict, key, volcount);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not save volcount");
+ goto out;
+ }
+
+out:
+
+ return ret;
+}
+
+int
+glusterd_get_each_snap_object_status (char **op_errstr, dict_t *rsp_dict,
+ glusterd_snap_t *snap, char *keyprefix)
+{
+ int ret = -1;
+ char key[PATH_MAX] = "";
+ char *temp = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (op_errstr);
+ GF_ASSERT (rsp_dict);
+ GF_ASSERT (snap);
+ GF_ASSERT (keyprefix);
+
+ /* TODO : Get all the snap volume info present in snap object,
+ * as of now, There will be only one snapvolinfo per snap object
+ */
+ ret = snprintf (key, sizeof (key), "%s.snapname", keyprefix);
+ if (ret < 0) {
+ goto out;
+ }
+
+ temp = gf_strdup (snap->snapname);
+ if (temp == NULL) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_dynstr (rsp_dict, key, temp);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not save "
+ "snap name");
+ goto out;
+ }
+
+ temp = NULL;
+
+ ret = snprintf (key, sizeof (key), "%s.uuid", keyprefix);
+ if (ret < 0) {
+ goto out;
+ }
+
+ temp = gf_strdup (uuid_utoa (snap->snap_id));
+ if (temp == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr (rsp_dict, key, temp);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not save "
+ "snap UUID");
+ goto out;
+ }
+
+ temp = NULL;
+
+ ret = glusterd_get_single_snap_status (op_errstr, rsp_dict, keyprefix,
+ snap);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not get single snap status");
+ goto out;
+ }
+
+ ret = snprintf (key, sizeof (key), "%s.volcount", keyprefix);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_set_int32 (rsp_dict, key, 1);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not save volcount");
+ goto out;
+ }
+out:
+ if (ret && temp)
+ GF_FREE (temp);
+
+ return ret;
+}
+
+int
+glusterd_get_snap_status_of_volume (char **op_errstr, dict_t *rsp_dict,
+ char *volname, char *keyprefix) {
+ int ret = -1;
+ glusterd_volinfo_t *snap_volinfo = NULL;
+ glusterd_volinfo_t *temp_volinfo = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ char key[PATH_MAX] = "";
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ int i = 0;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ GF_ASSERT (op_errstr);
+ GF_ASSERT (rsp_dict);
+ GF_ASSERT (volname);
+ GF_ASSERT (keyprefix);
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get volinfo of "
+ "volume %s", volname);
+ goto out;
+ }
+
+ list_for_each_entry_safe (snap_volinfo, temp_volinfo,
+ &volinfo->snap_volumes, snapvol_list) {
+ ret = snprintf (key, sizeof (key), "status.snap%d", i);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = glusterd_get_each_snap_object_status (op_errstr,
+ rsp_dict, snap_volinfo->snapshot, key);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Function : "
+ "glusterd_get_single_snap_status failed");
+ goto out;
+ }
+ i++;
+ }
+
+ ret = dict_set_int32 (rsp_dict, "status.snapcount", i);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to save snapcount");
+ ret = -1;
+ goto out;
+ }
+out:
+ return ret;
+}
+
+int
+glusterd_get_all_snapshot_status (dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict)
+{
+ int32_t i = 0;
+ int ret = -1;
+ char key[PATH_MAX] = "";
+ glusterd_conf_t *priv = NULL;
+ glusterd_snap_t *snap = NULL;
+ glusterd_snap_t *tmp_snap = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ GF_ASSERT (dict);
+ GF_ASSERT (op_errstr);
+
+ list_for_each_entry_safe (snap, tmp_snap,
+ &priv->snapshots, snap_list) {
+ ret = snprintf (key, sizeof (key), "status.snap%d", i);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = glusterd_get_each_snap_object_status (op_errstr,
+ rsp_dict, snap, key);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not get "
+ "the details of a snap object: %s",
+ snap->snapname);
+ goto out;
+ }
+ i++;
+ }
+
+ ret = dict_set_int32 (rsp_dict, "status.snapcount", i);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not save snapcount");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+int
+glusterd_snapshot_status_commit (dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict)
+{
+ xlator_t *this = NULL;
+ int ret = -1;
+ glusterd_conf_t *conf = NULL;
+ char *get_buffer = NULL;
+ int32_t cmd = -1;
+ char *snapname = NULL;
+ glusterd_snap_t *snap = NULL;
+ char *volname = NULL;
+
+ this = THIS;
+
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (op_errstr);
+
+ conf = this->private;
+
+ GF_ASSERT (conf);
+ ret = dict_get_int32 (dict, "cmd", &cmd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to get status cmd type");
+ goto out;
+ }
+
+ ret = dict_set_int32 (rsp_dict, "cmd", cmd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not save status cmd in rsp dictionary");
+ goto out;
+ }
+ switch (cmd) {
+ case GF_SNAP_STATUS_TYPE_ALL:
+ {
+ ret = glusterd_get_all_snapshot_status (dict, op_errstr,
+ rsp_dict);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to "
+ "get snapshot status");
+ goto out;
+ }
+ break;
+ }
+ case GF_SNAP_STATUS_TYPE_SNAP:
+ {
+
+ ret = dict_get_str (dict, "snapname", &snapname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to "
+ "get snap name");
+ goto out;
+ }
+
+ snap = glusterd_find_snap_by_name (snapname);
+ if (!snap) {
+ ret = gf_asprintf (op_errstr, "Snap (%s) "
+ "not found", snapname);
+ if (ret < 0) {
+ goto out;
+ }
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR, "Unable to "
+ "get snap volinfo");
+ goto out;
+ }
+ ret = glusterd_get_each_snap_object_status (op_errstr,
+ rsp_dict, snap, "status.snap0");
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to "
+ "get status of snap %s", get_buffer);
+ goto out;
+ }
+ break;
+ }
+ case GF_SNAP_STATUS_TYPE_VOL:
+ {
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to"
+ " get volume name");
+ goto out;
+ }
+
+ ret = glusterd_get_snap_status_of_volume (op_errstr,
+ rsp_dict, volname, "status.vol0");
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Function :"
+ " glusterd_get_snap_status_of_volume "
+ "failed");
+ goto out;
+ }
+ }
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+int32_t
+glusterd_snapshot_create_postvalidate (dict_t *dict, int32_t op_ret,
+ char **op_errstr, dict_t *rsp_dict)
+{
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ int ret = -1;
+
+ this = THIS;
+
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (rsp_dict);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ if (op_ret) {
+ ret = glusterd_do_snap_cleanup (dict, op_errstr, rsp_dict);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "cleanup operation "
+ "failed");
+ goto out;
+ }
+ } else {
+ ret = glusterd_snapshot_update_snaps_post_validate (dict,
+ op_errstr,
+ rsp_dict);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to "
+ "create snapshot");
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int32_t
+glusterd_snapshot (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+{
+
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ int32_t snap_command = 0;
+ int ret = -1;
+
+ this = THIS;
+
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (rsp_dict);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = dict_get_int32 (dict, "type", &snap_command);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "unable to get the type of "
+ "the snapshot command");
+ goto out;
+ }
+
+ switch (snap_command) {
+ case (GF_SNAP_OPTION_TYPE_CREATE):
+ ret = glusterd_snapshot_create_commit (dict, op_errstr,
+ rsp_dict);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to "
+ "create snapshot");
+ goto out;
+ }
+ break;
+
+ case GF_SNAP_OPTION_TYPE_CONFIG:
+ ret = glusterd_snapshot_config_commit (dict, op_errstr,
+ rsp_dict);
+ break;
+
+ case GF_SNAP_OPTION_TYPE_DELETE:
+ ret = glusterd_snapshot_remove_commit (dict, op_errstr,
+ rsp_dict);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to "
+ "delete snapshot");
+ goto out;
+ }
+ break;
+
+ case GF_SNAP_OPTION_TYPE_RESTORE:
+ ret = glusterd_snapshot_restore (dict, op_errstr,
+ rsp_dict);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "Failed to "
+ "restore snapshot");
+ goto out;
+ }
+
+ break;
+
+ case GF_SNAP_OPTION_TYPE_STATUS:
+ ret = glusterd_snapshot_status_commit (dict, op_errstr,
+ rsp_dict);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to "
+ "show snapshot status");
+ goto out;
+ }
+ break;
+
+
+ default:
+ gf_log (this->name, GF_LOG_WARNING, "invalid snap command");
+ goto out;
+ break;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int
+glusterd_snapshot_brickop (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+{
+ int ret = -1;
+ int64_t vol_count = 0;
+ int64_t count = 1;
+ char key[1024] = {0,};
+ char *volname = NULL;
+ int32_t snap_command = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (rsp_dict);
+
+ ret = dict_get_int32 (dict, "type", &snap_command);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "unable to get the type of "
+ "the snapshot command");
+ goto out;
+ }
+
+ switch (snap_command) {
+ case GF_SNAP_OPTION_TYPE_CREATE:
+ ret = dict_get_int64 (dict, "volcount", &vol_count);
+ if (ret)
+ goto out;
+ while (count <= vol_count) {
+ snprintf (key, 1024, "volname%"PRId64, count);
+ ret = dict_get_str (dict, key, &volname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to get volname");
+ goto out;
+ }
+ ret = dict_set_str (dict, "volname", volname);
+ if (ret)
+ goto out;
+
+ ret = gd_brick_op_phase (GD_OP_SNAP, NULL, dict,
+ op_errstr);
+ if (ret)
+ goto out;
+ volname = NULL;
+ count++;
+ }
+
+ dict_del (dict, "volname");
+ ret = 0;
+ break;
+ case GF_SNAP_OPTION_TYPE_DELETE:
+ break;
+ default:
+ break;
+ }
+
+out:
+ return ret;
+}
+
+int
+glusterd_snapshot_prevalidate (dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict)
+{
+ int snap_command = 0;
+ xlator_t *this = NULL;
+ int ret = -1;
+
+ this = THIS;
+
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (rsp_dict);
+
+ ret = dict_get_int32 (dict, "type", &snap_command);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "unable to get the type of "
+ "the snapshot command");
+ goto out;
+ }
+
+ switch (snap_command) {
+ case (GF_SNAP_OPTION_TYPE_CREATE):
+ ret = glusterd_snapshot_create_prevalidate (dict, op_errstr,
+ rsp_dict);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "Snapshot create "
+ "pre-validation failed");
+ goto out;
+ }
+ break;
+
+ case (GF_SNAP_OPTION_TYPE_CONFIG):
+ ret = glusterd_snapshot_config_prevalidate (dict, op_errstr);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "Snapshot config "
+ "pre-validation failed");
+ goto out;
+ }
+ break;
+
+ case GF_SNAP_OPTION_TYPE_RESTORE:
+ ret = glusterd_snapshot_restore_prevalidate (dict, op_errstr,
+ rsp_dict);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "Snapshot restore "
+ "validation failed");
+ goto out;
+ }
+ break;
+ case GF_SNAP_OPTION_TYPE_DELETE:
+ ret = glusterd_snapshot_remove_prevalidate (dict, op_errstr,
+ rsp_dict);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "Snapshot remove "
+ "validation failed");
+ goto out;
+ }
+ break;
+
+ case GF_SNAP_OPTION_TYPE_STATUS:
+ ret = glusterd_snapshot_status_prevalidate (dict, op_errstr,
+ rsp_dict);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "Snapshot status "
+ "validation failed");
+ goto out;
+ }
+ break;
+
+ default:
+ gf_log (this->name, GF_LOG_WARNING, "invalid snap command");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+glusterd_snapshot_postvalidate (dict_t *dict, int32_t op_ret, char **op_errstr,
+ dict_t *rsp_dict)
+{
+ int snap_command = 0;
+ xlator_t *this = NULL;
+ int ret = -1;
+
+ this = THIS;
+
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (rsp_dict);
+
+ ret = dict_get_int32 (dict, "type", &snap_command);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "unable to get the type of "
+ "the snapshot command");
+ goto out;
+ }
+
+ switch (snap_command) {
+ case GF_SNAP_OPTION_TYPE_CREATE:
+ ret = glusterd_snapshot_create_postvalidate (dict, op_ret,
+ op_errstr,
+ rsp_dict);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "Snapshot create "
+ "post-validation failed");
+ goto out;
+ }
+ break;
+
+ case GF_SNAP_OPTION_TYPE_DELETE:
+ case GF_SNAP_OPTION_TYPE_RESTORE:
+ ret = glusterd_snapshot_update_snaps_post_validate (dict,
+ op_errstr,
+ rsp_dict);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to "
+ "update missed snaps list");
+ goto out;
+ }
+ break;
+
+ default:
+ gf_log (this->name, GF_LOG_WARNING, "invalid snap command");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+glusterd_handle_snapshot_fn (rpcsvc_request_t *req)
+{
+ int32_t ret = 0;
+ dict_t *dict = NULL;
+ gf_cli_req cli_req = {{0},};
+ glusterd_op_t cli_op = GD_OP_SNAP;
+ int type = 0;
+ glusterd_conf_t *conf = NULL;
+ char *host_uuid = NULL;
+ char err_str[2048] = {0,};
+ xlator_t *this = NULL;
+ char *volname = NULL;
+
+ GF_ASSERT (req);
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req,
+ (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len > 0) {
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf (err_str, sizeof (err_str), "Unable to decode "
+ "the command");
+ goto out;
+ }
+
+ dict->extra_stdfree = cli_req.dict.dict_val;
+
+ host_uuid = gf_strdup (uuid_utoa(MY_UUID));
+ if (host_uuid == NULL) {
+ snprintf (err_str, sizeof (err_str), "Failed to get "
+ "the uuid of local glusterd");
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_dynstr (dict, "host-uuid", host_uuid);
+ if (ret) {
+ GF_FREE (host_uuid);
+ goto out;
+ }
+
+
+ } else {
+ gf_log (this->name, GF_LOG_ERROR, "request dict length is %d",
+ cli_req.dict.dict_len);
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "type", &type);
+ if (ret < 0) {
+ snprintf (err_str, sizeof (err_str), "Command type not found");
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+
+ switch (type) {
+ case GF_SNAP_OPTION_TYPE_CREATE:
+ ret = glusterd_handle_snapshot_create (req, cli_op, dict,
+ err_str, sizeof (err_str));
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "Snapshot create "
+ "failed: %s", err_str);
+ }
+ break;
+ case GF_SNAP_OPTION_TYPE_RESTORE:
+ ret = glusterd_handle_snapshot_restore (req, cli_op, dict,
+ err_str, sizeof (err_str));
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "Snapshot restore "
+ "failed: %s", err_str);
+ }
+
+ break;
+ case GF_SNAP_OPTION_TYPE_INFO:
+ ret = glusterd_handle_snapshot_info (req, cli_op, dict,
+ err_str, sizeof (err_str));
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "Snapshot info "
+ "failed");
+ }
+ break;
+ case GF_SNAP_OPTION_TYPE_LIST:
+ ret = glusterd_handle_snapshot_list (req, cli_op, dict,
+ err_str, sizeof (err_str));
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "Snapshot list "
+ "failed");
+ }
+ break;
+ case GF_SNAP_OPTION_TYPE_CONFIG:
+ /* TODO : Type of lock to be taken when we are setting
+ * limits system wide
+ */
+ ret = dict_get_str (dict, "volname", &volname);
+ if (!volname) {
+ ret = dict_set_int32 (dict, "hold_vol_locks",
+ _gf_false);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Unable to set hold_vol_locks value "
+ "as _gf_false");
+ goto out;
+ }
+
+ }
+ ret = glusterd_mgmt_v3_initiate_all_phases (req, cli_op, dict);
+ break;
+ case GF_SNAP_OPTION_TYPE_DELETE:
+ ret = glusterd_handle_snapshot_remove (req, cli_op, dict,
+ err_str,
+ sizeof (err_str));
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "Snapshot delete "
+ "failed: %s", err_str);
+ }
+ break;
+ case GF_SNAP_OPTION_TYPE_START:
+ case GF_SNAP_OPTION_TYPE_STOP:
+ case GF_SNAP_OPTION_TYPE_STATUS:
+ ret = glusterd_handle_snapshot_status (req, cli_op, dict,
+ err_str,
+ sizeof (err_str));
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "Snapshot status "
+ "failed: %s", err_str);
+ }
+ break;
+ default:
+ gf_log (this->name, GF_LOG_ERROR, "Unkown snapshot request "
+ "type (%d)", type);
+ ret = -1; /* Failure */
+ }
+
+out:
+ if (ret) {
+ if (err_str[0] == '\0')
+ snprintf (err_str, sizeof (err_str),
+ "Operation failed");
+ ret = glusterd_op_send_cli_response (cli_op, ret, 0, req,
+ dict, err_str);
+ }
+
+ return ret;
+}
+
+int
+glusterd_handle_snapshot (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, glusterd_handle_snapshot_fn);
+}
+
+static inline void
+glusterd_free_snap_op (glusterd_snap_op_t *snap_op)
+{
+ if (snap_op) {
+ if (snap_op->brick_path)
+ GF_FREE (snap_op->brick_path);
+
+ GF_FREE (snap_op);
+ }
+}
+
+/* Look for duplicates and accordingly update the list */
+int32_t
+glusterd_update_missed_snap_entry (glusterd_missed_snap_info *missed_snapinfo,
+ glusterd_snap_op_t *missed_snap_op)
+{
+ int32_t ret = -1;
+ glusterd_snap_op_t *snap_opinfo = NULL;
+ gf_boolean_t match = _gf_false;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(missed_snapinfo);
+ GF_ASSERT(missed_snap_op);
+
+ list_for_each_entry (snap_opinfo, &missed_snapinfo->snap_ops,
+ snap_ops_list) {
+ if ((!strcmp (snap_opinfo->brick_path,
+ missed_snap_op->brick_path)) &&
+ (snap_opinfo->op == missed_snap_op->op)) {
+ /* If two entries have conflicting status
+ * GD_MISSED_SNAP_DONE takes precedence
+ */
+ if ((snap_opinfo->status == GD_MISSED_SNAP_PENDING) &&
+ (missed_snap_op->status == GD_MISSED_SNAP_DONE)) {
+ snap_opinfo->status = GD_MISSED_SNAP_DONE;
+ gf_log (this->name, GF_LOG_INFO,
+ "Updating missed snap status "
+ "for %s:%d:%s:%d as DONE",
+ missed_snapinfo->node_snap_info,
+ snap_opinfo->brick_num,
+ snap_opinfo->brick_path,
+ snap_opinfo->op);
+ ret = 0;
+ glusterd_free_snap_op (missed_snap_op);
+ goto out;
+ }
+ match = _gf_true;
+ break;
+ } else if ((snap_opinfo->brick_num ==
+ missed_snap_op->brick_num) &&
+ (snap_opinfo->op == GF_SNAP_OPTION_TYPE_CREATE) &&
+ (missed_snap_op->op ==
+ GF_SNAP_OPTION_TYPE_DELETE)) {
+ /* Optimizing create and delete entries for the same
+ * brick and same node
+ */
+ gf_log (this->name, GF_LOG_INFO,
+ "Updating missed snap status "
+ "for %s:%d:%s:%d as DONE",
+ missed_snapinfo->node_snap_info,
+ snap_opinfo->brick_num,
+ snap_opinfo->brick_path,
+ snap_opinfo->op);
+ snap_opinfo->status = GD_MISSED_SNAP_DONE;
+ ret = 0;
+ glusterd_free_snap_op (missed_snap_op);
+ goto out;
+ }
+ }
+
+ if (match == _gf_true) {
+ gf_log (this->name, GF_LOG_INFO,
+ "Duplicate entry. Not updating");
+ glusterd_free_snap_op (missed_snap_op);
+ } else {
+ list_add_tail (&missed_snap_op->snap_ops_list,
+ &missed_snapinfo->snap_ops);
+ }
+
+ ret = 0;
+out:
+ gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+}
+
+/* Add new missed snap entry to the missed_snaps list. */
+int32_t
+glusterd_store_missed_snaps_list (char *missed_info, int32_t brick_num,
+ char *brick_path, int32_t snap_op,
+ int32_t snap_status)
+{
+ int32_t ret = -1;
+ glusterd_missed_snap_info *missed_snapinfo = NULL;
+ glusterd_snap_op_t *missed_snap_op = NULL;
+ glusterd_conf_t *priv = NULL;
+ gf_boolean_t match = _gf_false;
+ gf_boolean_t free_missed_snap_info = _gf_false;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(missed_info);
+ GF_ASSERT(brick_path);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ /* Create the snap_op object consisting of the *
+ * snap id and the op */
+ ret = glusterd_missed_snap_op_new (&missed_snap_op);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to create new missed snap object.");
+ ret = -1;
+ goto out;
+ }
+
+ missed_snap_op->brick_path = gf_strdup(brick_path);
+ if (!missed_snap_op->brick_path) {
+ ret = -1;
+ goto out;
+ }
+ missed_snap_op->brick_num = brick_num;
+ missed_snap_op->op = snap_op;
+ missed_snap_op->status = snap_status;
+
+ /* Look for other entries for the same node and same snap */
+ list_for_each_entry (missed_snapinfo, &priv->missed_snaps_list,
+ missed_snaps) {
+ if (!strcmp (missed_snapinfo->node_snap_info,
+ missed_info)) {
+ /* Found missed snapshot info for *
+ * the same node and same snap */
+ match = _gf_true;
+ break;
+ }
+ }
+
+ if (match == _gf_false) {
+ /* First snap op missed for the brick */
+ ret = glusterd_missed_snapinfo_new (&missed_snapinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to create missed snapinfo");
+ goto out;
+ }
+ free_missed_snap_info = _gf_true;
+ missed_snapinfo->node_snap_info = gf_strdup(missed_info);
+ if (!missed_snapinfo->node_snap_info) {
+ ret = -1;
+ goto out;
+ }
+
+ list_add_tail (&missed_snap_op->snap_ops_list,
+ &missed_snapinfo->snap_ops);
+ list_add_tail (&missed_snapinfo->missed_snaps,
+ &priv->missed_snaps_list);
+
+ ret = 0;
+ goto out;
+ } else {
+ ret = glusterd_update_missed_snap_entry (missed_snapinfo,
+ missed_snap_op);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to update existing missed snap entry.");
+ goto out;
+ }
+ }
+
+out:
+ if (ret) {
+ glusterd_free_snap_op (missed_snap_op);
+
+ if (missed_snapinfo &&
+ (free_missed_snap_info == _gf_true)) {
+ if (missed_snapinfo->node_snap_info)
+ GF_FREE (missed_snapinfo->node_snap_info);
+
+ GF_FREE (missed_snapinfo);
+ }
+ }
+
+ gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+}
+
+/* Add missing snap entries to the in-memory conf->missed_snap_list */
+int32_t
+glusterd_add_missed_snaps_to_list (dict_t *dict, int32_t missed_snap_count)
+{
+ char *buf = NULL;
+ char *tmp = NULL;
+ char *save_ptr = NULL;
+ char *nodeid = NULL;
+ char *snap_uuid = NULL;
+ char *brick_path = NULL;
+ char missed_info[PATH_MAX] = "";
+ char name_buf[PATH_MAX] = "";
+ int32_t i = -1;
+ int32_t ret = -1;
+ int32_t brick_num = -1;
+ int32_t snap_op = -1;
+ int32_t snap_status = -1;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ /* We can update the missed_snaps_list without acquiring *
+ * any additional locks as big lock will be held. */
+ for (i = 0; i < missed_snap_count; i++) {
+ snprintf (name_buf, sizeof(name_buf), "missed_snaps_%d",
+ i);
+ ret = dict_get_str (dict, name_buf, &buf);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to fetch %s", name_buf);
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG, "missed_snap_entry = %s",
+ buf);
+
+ /* Need to make a duplicate string coz the same dictionary *
+ * is resent to the non-originator nodes */
+ tmp = gf_strdup (buf);
+ if (!tmp) {
+ ret = -1;
+ goto out;
+ }
+
+ /* Fetch the node-id, snap-id, brick_num,
+ * brick_path, snap_op and snap status
+ */
+ nodeid = strtok_r (tmp, ":", &save_ptr);
+ snap_uuid = strtok_r (NULL, "=", &save_ptr);
+ brick_num = atoi(strtok_r (NULL, ":", &save_ptr));
+ brick_path = strtok_r (NULL, ":", &save_ptr);
+ snap_op = atoi(strtok_r (NULL, ":", &save_ptr));
+ snap_status = atoi(strtok_r (NULL, ":", &save_ptr));
+
+ if (!nodeid || !snap_uuid || !brick_path ||
+ brick_num < 1 || snap_op < 1 ||
+ snap_status < 1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Invalid missed_snap_entry");
+ ret = -1;
+ goto out;
+ }
+
+ snprintf (missed_info, sizeof(missed_info), "%s:%s",
+ nodeid, snap_uuid);
+
+ ret = glusterd_store_missed_snaps_list (missed_info,
+ brick_num,
+ brick_path,
+ snap_op,
+ snap_status);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to store missed snaps_list");
+ goto out;
+ }
+
+ GF_FREE (tmp);
+ tmp = NULL;
+ }
+
+out:
+ if (tmp)
+ GF_FREE (tmp);
+
+ gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c
new file mode 100644
index 000000000..1c2ec58e8
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-store.c
@@ -0,0 +1,3564 @@
+/*
+ Copyright (c) 2007-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterd-op-sm.h"
+#include <inttypes.h>
+
+
+#include "globals.h"
+#include "glusterfs.h"
+#include "compat.h"
+#include "dict.h"
+#include "protocol-common.h"
+#include "xlator.h"
+#include "logging.h"
+#include "timer.h"
+#include "defaults.h"
+#include "compat.h"
+#include "compat-errno.h"
+#include "statedump.h"
+#include "glusterd-mem-types.h"
+#include "glusterd.h"
+#include "glusterd-sm.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-utils.h"
+#include "glusterd-hooks.h"
+#include "store.h"
+#include "glusterd-store.h"
+
+#include "rpc-clnt.h"
+#include "common-utils.h"
+
+#include <sys/resource.h>
+#include <inttypes.h>
+#include <dirent.h>
+
+void
+glusterd_replace_slash_with_hyphen (char *str)
+{
+ char *ptr = NULL;
+
+ ptr = strchr (str, '/');
+
+ while (ptr) {
+ *ptr = '-';
+ ptr = strchr (str, '/');
+ }
+}
+
+int32_t
+glusterd_store_create_brick_dir (glusterd_volinfo_t *volinfo)
+{
+ int32_t ret = -1;
+ char brickdirpath[PATH_MAX] = {0,};
+ glusterd_conf_t *priv = NULL;
+
+ GF_ASSERT (volinfo);
+
+ priv = THIS->private;
+ GF_ASSERT (priv);
+
+ GLUSTERD_GET_BRICK_DIR (brickdirpath, volinfo, priv);
+ ret = gf_store_mkdir (brickdirpath);
+
+ return ret;
+}
+
+static void
+glusterd_store_key_vol_brick_set (glusterd_brickinfo_t *brickinfo,
+ char *key_vol_brick, size_t len)
+{
+ GF_ASSERT (brickinfo);
+ GF_ASSERT (key_vol_brick);
+ GF_ASSERT (len >= PATH_MAX);
+
+ snprintf (key_vol_brick, len, "%s", brickinfo->path);
+ glusterd_replace_slash_with_hyphen (key_vol_brick);
+}
+
+static void
+glusterd_store_brickinfofname_set (glusterd_brickinfo_t *brickinfo,
+ char *brickfname, size_t len)
+{
+ char key_vol_brick[PATH_MAX] = {0};
+
+ GF_ASSERT (brickfname);
+ GF_ASSERT (brickinfo);
+ GF_ASSERT (len >= PATH_MAX);
+
+ glusterd_store_key_vol_brick_set (brickinfo, key_vol_brick,
+ sizeof (key_vol_brick));
+ snprintf (brickfname, len, "%s:%s", brickinfo->hostname, key_vol_brick);
+}
+
+static void
+glusterd_store_brickinfopath_set (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo,
+ char *brickpath, size_t len)
+{
+ char brickfname[PATH_MAX] = {0};
+ char brickdirpath[PATH_MAX] = {0,};
+ glusterd_conf_t *priv = NULL;
+
+ GF_ASSERT (brickpath);
+ GF_ASSERT (brickinfo);
+ GF_ASSERT (len >= PATH_MAX);
+
+ priv = THIS->private;
+ GF_ASSERT (priv);
+
+ GLUSTERD_GET_BRICK_DIR (brickdirpath, volinfo, priv);
+ glusterd_store_brickinfofname_set (brickinfo, brickfname,
+ sizeof (brickfname));
+ snprintf (brickpath, len, "%s/%s", brickdirpath, brickfname);
+}
+
+gf_boolean_t
+glusterd_store_is_valid_brickpath (char *volname, char *brick)
+{
+ char brickpath[PATH_MAX] = {0};
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ int32_t ret = 0;
+ size_t volname_len = strlen (volname);
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = glusterd_brickinfo_new_from_brick (brick, &brickinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "Failed to create brick "
+ "info for brick %s", brick);
+ ret = 0;
+ goto out;
+ }
+ ret = glusterd_volinfo_new (&volinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "Failed to create volinfo");
+ ret = 0;
+ goto out;
+ }
+ if (volname_len >= sizeof (volinfo->volname)) {
+ gf_log (this->name, GF_LOG_WARNING, "volume name too long");
+ ret = 0;
+ goto out;
+ }
+ memcpy (volinfo->volname, volname, volname_len+1);
+ glusterd_store_brickinfopath_set (volinfo, brickinfo, brickpath,
+ sizeof (brickpath));
+
+ ret = (strlen (brickpath) < _POSIX_PATH_MAX);
+
+out:
+ if (brickinfo)
+ glusterd_brickinfo_delete (brickinfo);
+ if (volinfo)
+ glusterd_volinfo_delete (volinfo);
+
+ return ret;
+}
+
+int32_t
+glusterd_store_volinfo_brick_fname_write (int vol_fd,
+ glusterd_brickinfo_t *brickinfo,
+ int32_t brick_count)
+{
+ char key[PATH_MAX] = {0,};
+ char brickfname[PATH_MAX] = {0,};
+ int32_t ret = -1;
+
+ snprintf (key, sizeof (key), "%s-%d", GLUSTERD_STORE_KEY_VOL_BRICK,
+ brick_count);
+ glusterd_store_brickinfofname_set (brickinfo, brickfname,
+ sizeof (brickfname));
+ ret = gf_store_save_value (vol_fd, key, brickfname);
+ if (ret)
+ goto out;
+
+out:
+ return ret;
+}
+
+int32_t
+glusterd_store_create_brick_shandle_on_absence (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo)
+{
+ char brickpath[PATH_MAX] = {0,};
+ int32_t ret = 0;
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (brickinfo);
+
+ glusterd_store_brickinfopath_set (volinfo, brickinfo, brickpath,
+ sizeof (brickpath));
+ ret = gf_store_handle_create_on_absence (&brickinfo->shandle,
+ brickpath);
+ return ret;
+}
+
+int32_t
+glusterd_store_brickinfo_write (int fd, glusterd_brickinfo_t *brickinfo)
+{
+ char value[256] = {0,};
+ int32_t ret = 0;
+
+ GF_ASSERT (brickinfo);
+ GF_ASSERT (fd > 0);
+
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_HOSTNAME,
+ brickinfo->hostname);
+ if (ret)
+ goto out;
+
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_PATH,
+ brickinfo->path);
+ if (ret)
+ goto out;
+
+ snprintf (value, sizeof(value), "%d", brickinfo->port);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_PORT, value);
+
+ snprintf (value, sizeof(value), "%d", brickinfo->rdma_port);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_RDMA_PORT,
+ value);
+
+ snprintf (value, sizeof(value), "%d", brickinfo->decommissioned);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED,
+ value);
+ if (ret)
+ goto out;
+
+ if (strlen(brickinfo->device_path) > 0) {
+ snprintf (value, sizeof(value), "%s", brickinfo->device_path);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_DEVICE_PATH,
+ value);
+ if (ret)
+ goto out;
+ }
+
+ snprintf (value, sizeof(value), "%d", brickinfo->snap_status);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_SNAP_STATUS,
+ value);
+ if (ret)
+ goto out;
+
+ if (!brickinfo->vg[0])
+ goto out;
+
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_VGNAME,
+ brickinfo->vg);
+out:
+ gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_store_perform_brick_store (glusterd_brickinfo_t *brickinfo)
+{
+ int fd = -1;
+ int32_t ret = -1;
+ GF_ASSERT (brickinfo);
+
+ fd = gf_store_mkstemp (brickinfo->shandle);
+ if (fd <= 0) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_store_brickinfo_write (fd, brickinfo);
+ if (ret)
+ goto out;
+
+out:
+ if (ret && (fd > 0))
+ gf_store_unlink_tmppath (brickinfo->shandle);
+ if (fd > 0)
+ close (fd);
+ gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_store_brickinfo (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo, int32_t brick_count,
+ int vol_fd)
+{
+ int32_t ret = -1;
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (brickinfo);
+
+ ret = glusterd_store_volinfo_brick_fname_write (vol_fd, brickinfo,
+ brick_count);
+ if (ret)
+ goto out;
+
+ ret = glusterd_store_create_brick_dir (volinfo);
+ if (ret)
+ goto out;
+
+ ret = glusterd_store_create_brick_shandle_on_absence (volinfo,
+ brickinfo);
+ if (ret)
+ goto out;
+
+ ret = glusterd_store_perform_brick_store (brickinfo);
+
+out:
+ gf_log (THIS->name, GF_LOG_DEBUG, "Returning with %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_store_delete_brick (glusterd_brickinfo_t *brickinfo, char *delete_path)
+{
+ int32_t ret = -1;
+ glusterd_conf_t *priv = NULL;
+ char brickpath[PATH_MAX] = {0,};
+ char *ptr = NULL;
+ char *tmppath = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (brickinfo);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ tmppath = gf_strdup (brickinfo->path);
+
+ ptr = strchr (tmppath, '/');
+
+ while (ptr) {
+ *ptr = '-';
+ ptr = strchr (tmppath, '/');
+ }
+
+ snprintf (brickpath, sizeof (brickpath),
+ "%s/"GLUSTERD_BRICK_INFO_DIR"/%s:%s", delete_path,
+ brickinfo->hostname, tmppath);
+
+ GF_FREE (tmppath);
+
+ ret = unlink (brickpath);
+
+ if ((ret < 0) && (errno != ENOENT)) {
+ gf_log (this->name, GF_LOG_DEBUG, "Unlink failed on %s, "
+ "reason: %s", brickpath, strerror(errno));
+ ret = -1;
+ goto out;
+ } else {
+ ret = 0;
+ }
+
+out:
+ if (brickinfo->shandle) {
+ gf_store_handle_destroy (brickinfo->shandle);
+ brickinfo->shandle = NULL;
+ }
+ gf_log (this->name, GF_LOG_DEBUG, "Returning with %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_store_remove_bricks (glusterd_volinfo_t *volinfo, char *delete_path)
+{
+ int32_t ret = 0;
+ glusterd_brickinfo_t *tmp = NULL;
+ glusterd_conf_t *priv = NULL;
+ char brickdir [PATH_MAX] = {0,};
+ DIR *dir = NULL;
+ struct dirent *entry = NULL;
+ char path[PATH_MAX] = {0,};
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (volinfo);
+
+ list_for_each_entry (tmp, &volinfo->bricks, brick_list) {
+ ret = glusterd_store_delete_brick (tmp, delete_path);
+ if (ret)
+ goto out;
+ }
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ snprintf (brickdir, sizeof (brickdir), "%s/%s", delete_path,
+ GLUSTERD_BRICK_INFO_DIR);
+
+ dir = opendir (brickdir);
+
+ glusterd_for_each_entry (entry, dir);
+
+ while (entry) {
+ snprintf (path, sizeof (path), "%s/%s",
+ brickdir, entry->d_name);
+ ret = unlink (path);
+ if (ret && errno != ENOENT) {
+ gf_log (this->name, GF_LOG_DEBUG, "Unable to unlink %s, "
+ "reason: %s", path, strerror(errno));
+ }
+ glusterd_for_each_entry (entry, dir);
+ }
+
+ closedir (dir);
+
+ ret = rmdir (brickdir);
+
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "Returning with %d", ret);
+ return ret;
+}
+
+static int
+_storeslaves (dict_t *this, char *key, data_t *value, void *data)
+{
+ int32_t ret = 0;
+ gf_store_handle_t *shandle = NULL;
+ xlator_t *xl = NULL;
+
+ xl = THIS;
+ GF_ASSERT (xl);
+
+ shandle = (gf_store_handle_t*)data;
+
+ GF_ASSERT (shandle);
+ GF_ASSERT (shandle->fd > 0);
+ GF_ASSERT (shandle->path);
+ GF_ASSERT (key);
+ GF_ASSERT (value && value->data);
+
+ if ((!shandle) || (shandle->fd <= 0) || (!shandle->path))
+ return -1;
+
+ if (!key)
+ return -1;
+ if (!value || !value->data)
+ return -1;
+
+ gf_log (xl->name, GF_LOG_DEBUG, "Storing in volinfo:key= %s, val=%s",
+ key, value->data);
+
+ ret = gf_store_save_value (shandle->fd, key, (char*)value->data);
+ if (ret) {
+ gf_log (xl->name, GF_LOG_ERROR, "Unable to write into store"
+ " handle for path: %s", shandle->path);
+ return -1;
+ }
+ return 0;
+}
+
+
+int _storeopts (dict_t *this, char *key, data_t *value, void *data)
+{
+ int32_t ret = 0;
+ int32_t exists = 0;
+ gf_store_handle_t *shandle = NULL;
+ xlator_t *xl = NULL;
+
+ xl = THIS;
+ GF_ASSERT (xl);
+
+ shandle = (gf_store_handle_t*)data;
+
+ GF_ASSERT (shandle);
+ GF_ASSERT (shandle->fd > 0);
+ GF_ASSERT (shandle->path);
+ GF_ASSERT (key);
+ GF_ASSERT (value && value->data);
+
+ if ((!shandle) || (shandle->fd <= 0) || (!shandle->path))
+ return -1;
+
+ if (!key)
+ return -1;
+ if (!value || !value->data)
+ return -1;
+
+ if (is_key_glusterd_hooks_friendly (key)) {
+ exists = 1;
+
+ } else {
+ exists = glusterd_check_option_exists (key, NULL);
+ }
+
+ if (1 == exists) {
+ gf_log (xl->name, GF_LOG_DEBUG, "Storing in volinfo:key= %s, "
+ "val=%s", key, value->data);
+
+ } else {
+ gf_log (xl->name, GF_LOG_DEBUG, "Discarding:key= %s, val=%s",
+ key, value->data);
+ return 0;
+ }
+
+ ret = gf_store_save_value (shandle->fd, key, (char*)value->data);
+ if (ret) {
+ gf_log (xl->name, GF_LOG_ERROR, "Unable to write into store"
+ " handle for path: %s", shandle->path);
+ return -1;
+ }
+ return 0;
+}
+
+int32_t
+glusterd_volume_exclude_options_write (int fd, glusterd_volinfo_t *volinfo)
+{
+ char *str = NULL;
+ char buf[PATH_MAX] = {0,};
+ int32_t ret = -1;
+
+ GF_ASSERT (fd > 0);
+ GF_ASSERT (volinfo);
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->type);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_TYPE, buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->brick_count);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_COUNT, buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->status);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_STATUS, buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->sub_count);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_SUB_COUNT, buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->stripe_count);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_STRIPE_CNT, buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->replica_count);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_REPLICA_CNT,
+ buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->version);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_VERSION, buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->transport_type);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_TRANSPORT, buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%s", volinfo->parent_volname);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_PARENT_VOLNAME, buf);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Failed to store "
+ GLUSTERD_STORE_KEY_PARENT_VOLNAME);
+ goto out;
+ }
+
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_ID,
+ uuid_utoa (volinfo->volume_id));
+ if (ret)
+ goto out;
+
+ str = glusterd_auth_get_username (volinfo);
+ if (str) {
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_USERNAME,
+ str);
+ if (ret)
+ goto out;
+ }
+
+ str = glusterd_auth_get_password (volinfo);
+ if (str) {
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_PASSWORD,
+ str);
+ if (ret)
+ goto out;
+ }
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->op_version);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_OP_VERSION, buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->client_op_version);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION,
+ buf);
+ if (ret)
+ goto out;
+ if (volinfo->caps) {
+ snprintf (buf, sizeof (buf), "%d", volinfo->caps);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_CAPS,
+ buf);
+ if (ret)
+ goto out;
+ }
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->is_volume_restored);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_IS_RESTORED, buf);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Unable to write is_volume_restored");
+ goto out;
+ }
+
+ snprintf (buf, sizeof (buf), "%"PRIu64, volinfo->snap_max_hard_limit);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT,
+ buf);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Unable to write snap-max-hard-limit");
+ goto out;
+ }
+
+out:
+ if (ret)
+ gf_log (THIS->name, GF_LOG_ERROR, "Unable to write volume "
+ "values for %s", volinfo->volname);
+ return ret;
+}
+
+static void
+glusterd_store_voldirpath_set (glusterd_volinfo_t *volinfo, char *voldirpath,
+ size_t len)
+{
+ glusterd_conf_t *priv = NULL;
+
+ GF_ASSERT (volinfo);
+ priv = THIS->private;
+ GF_ASSERT (priv);
+
+ GLUSTERD_GET_VOLUME_DIR (voldirpath, volinfo, priv);
+}
+
+static int32_t
+glusterd_store_create_volume_dir (glusterd_volinfo_t *volinfo)
+{
+ int32_t ret = -1;
+ char voldirpath[PATH_MAX] = {0,};
+
+ GF_ASSERT (volinfo);
+
+ glusterd_store_voldirpath_set (volinfo, voldirpath,
+ sizeof (voldirpath));
+ ret = gf_store_mkdir (voldirpath);
+
+ gf_log (THIS->name, GF_LOG_DEBUG, "Returning with %d", ret);
+ return ret;
+}
+
+static int32_t
+glusterd_store_create_snap_dir (glusterd_snap_t *snap)
+{
+ int32_t ret = -1;
+ char snapdirpath[PATH_MAX] = {0,};
+ glusterd_conf_t *priv = NULL;
+
+ priv = THIS->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (snap);
+
+ GLUSTERD_GET_SNAP_DIR (snapdirpath, snap, priv);
+
+ ret = mkdir_p (snapdirpath, 0755, _gf_true);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Failed to create snaps dir "
+ "%s", snapdirpath);
+ }
+ return ret;
+}
+
+int32_t
+glusterd_store_volinfo_write (int fd, glusterd_volinfo_t *volinfo)
+{
+ int32_t ret = -1;
+ gf_store_handle_t *shandle = NULL;
+ GF_ASSERT (fd > 0);
+ GF_ASSERT (volinfo);
+ GF_ASSERT (volinfo->shandle);
+
+ shandle = volinfo->shandle;
+ ret = glusterd_volume_exclude_options_write (fd, volinfo);
+ if (ret)
+ goto out;
+
+ shandle->fd = fd;
+ dict_foreach (volinfo->dict, _storeopts, shandle);
+
+ dict_foreach (volinfo->gsync_slaves, _storeslaves, shandle);
+ shandle->fd = 0;
+out:
+ gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_store_snapinfo_write (glusterd_snap_t *snap)
+{
+ int32_t ret = -1;
+ int fd = 0;
+ char buf[PATH_MAX] = "";
+
+ GF_ASSERT (snap);
+
+ fd = gf_store_mkstemp (snap->shandle);
+ if (fd <= 0)
+ goto out;
+
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_SNAP_ID,
+ uuid_utoa (snap->snap_id));
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d", snap->snap_status);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_SNAP_STATUS, buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d", snap->snap_restored);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_SNAP_RESTORED, buf);
+ if (ret)
+ goto out;
+
+ if (snap->description) {
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_SNAP_DESC,
+ snap->description);
+ if (ret)
+ goto out;
+ }
+
+ snprintf (buf, sizeof (buf), "%ld", snap->time_stamp);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_SNAP_TIMESTAMP, buf);
+
+out:
+ gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+static void
+glusterd_store_rbstatepath_set (glusterd_volinfo_t *volinfo, char *rbstatepath,
+ size_t len)
+{
+ char voldirpath[PATH_MAX] = {0,};
+ GF_ASSERT (volinfo);
+ GF_ASSERT (rbstatepath);
+ GF_ASSERT (len <= PATH_MAX);
+
+ glusterd_store_voldirpath_set (volinfo, voldirpath,
+ sizeof (voldirpath));
+ snprintf (rbstatepath, len, "%s/%s", voldirpath,
+ GLUSTERD_VOLUME_RBSTATE_FILE);
+}
+
+static void
+glusterd_store_volfpath_set (glusterd_volinfo_t *volinfo, char *volfpath,
+ size_t len)
+{
+ char voldirpath[PATH_MAX] = {0,};
+ GF_ASSERT (volinfo);
+ GF_ASSERT (volfpath);
+ GF_ASSERT (len <= PATH_MAX);
+
+ glusterd_store_voldirpath_set (volinfo, voldirpath,
+ sizeof (voldirpath));
+ snprintf (volfpath, len, "%s/%s", voldirpath, GLUSTERD_VOLUME_INFO_FILE);
+}
+
+static void
+glusterd_store_node_state_path_set (glusterd_volinfo_t *volinfo,
+ char *node_statepath, size_t len)
+{
+ char voldirpath[PATH_MAX] = {0,};
+ GF_ASSERT (volinfo);
+ GF_ASSERT (node_statepath);
+ GF_ASSERT (len <= PATH_MAX);
+
+ glusterd_store_voldirpath_set (volinfo, voldirpath,
+ sizeof (voldirpath));
+ snprintf (node_statepath, len, "%s/%s", voldirpath,
+ GLUSTERD_NODE_STATE_FILE);
+}
+
+static void
+glusterd_store_missed_snaps_list_path_set (char *missed_snaps_list,
+ size_t len)
+{
+ glusterd_conf_t *priv = NULL;
+
+ priv = THIS->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (missed_snaps_list);
+ GF_ASSERT (len <= PATH_MAX);
+
+ snprintf (missed_snaps_list, len, "%s/snaps/"
+ GLUSTERD_MISSED_SNAPS_LIST_FILE, priv->workdir);
+}
+
+static void
+glusterd_store_snapfpath_set (glusterd_snap_t *snap, char *snap_fpath,
+ size_t len)
+{
+ glusterd_conf_t *priv = NULL;
+
+ priv = THIS->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (snap);
+ GF_ASSERT (snap_fpath);
+ GF_ASSERT (len <= PATH_MAX);
+
+ snprintf (snap_fpath, len, "%s/snaps/%s/%s", priv->workdir,
+ snap->snapname, GLUSTERD_SNAP_INFO_FILE);
+}
+
+int32_t
+glusterd_store_create_rbstate_shandle_on_absence (glusterd_volinfo_t *volinfo)
+{
+ char rbstatepath[PATH_MAX] = {0};
+ int32_t ret = 0;
+
+ GF_ASSERT (volinfo);
+
+ glusterd_store_rbstatepath_set (volinfo, rbstatepath, sizeof (rbstatepath));
+ ret = gf_store_handle_create_on_absence (&volinfo->rb_shandle,
+ rbstatepath);
+ return ret;
+}
+
+int32_t
+glusterd_store_create_vol_shandle_on_absence (glusterd_volinfo_t *volinfo)
+{
+ char volfpath[PATH_MAX] = {0};
+ int32_t ret = 0;
+
+ GF_ASSERT (volinfo);
+
+ glusterd_store_volfpath_set (volinfo, volfpath, sizeof (volfpath));
+ ret = gf_store_handle_create_on_absence (&volinfo->shandle, volfpath);
+ return ret;
+}
+
+int32_t
+glusterd_store_create_nodestate_sh_on_absence (glusterd_volinfo_t *volinfo)
+{
+ char node_state_path[PATH_MAX] = {0};
+ int32_t ret = 0;
+
+ GF_ASSERT (volinfo);
+
+ glusterd_store_node_state_path_set (volinfo, node_state_path,
+ sizeof (node_state_path));
+ ret =
+ gf_store_handle_create_on_absence (&volinfo->node_state_shandle,
+ node_state_path);
+
+ return ret;
+}
+
+static int32_t
+glusterd_store_create_missed_snaps_list_shandle_on_absence ()
+{
+ char missed_snaps_list[PATH_MAX] = "";
+ int32_t ret = -1;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ glusterd_store_missed_snaps_list_path_set (missed_snaps_list,
+ sizeof(missed_snaps_list));
+
+ ret = gf_store_handle_create_on_absence
+ (&priv->missed_snaps_list_shandle,
+ missed_snaps_list);
+ return ret;
+}
+
+int32_t
+glusterd_store_create_snap_shandle_on_absence (glusterd_snap_t *snap)
+{
+ char snapfpath[PATH_MAX] = {0};
+ int32_t ret = 0;
+
+ GF_ASSERT (snap);
+
+ glusterd_store_snapfpath_set (snap, snapfpath, sizeof (snapfpath));
+ ret = gf_store_handle_create_on_absence (&snap->shandle, snapfpath);
+ return ret;
+}
+
+int32_t
+glusterd_store_brickinfos (glusterd_volinfo_t *volinfo, int vol_fd)
+{
+ int32_t ret = 0;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ int32_t brick_count = 0;
+
+ GF_ASSERT (volinfo);
+
+ list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ ret = glusterd_store_brickinfo (volinfo, brickinfo,
+ brick_count, vol_fd);
+ if (ret)
+ goto out;
+ brick_count++;
+ }
+out:
+ gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_store_rbstate_write (int fd, glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ int port = 0;
+ char buf[PATH_MAX] = {0, };
+
+ GF_ASSERT (fd > 0);
+ GF_ASSERT (volinfo);
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->rep_brick.rb_status);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_RB_STATUS, buf);
+ if (ret)
+ goto out;
+
+ if (volinfo->rep_brick.rb_status > GF_RB_STATUS_NONE) {
+
+ snprintf (buf, sizeof (buf), "%s:%s",
+ volinfo->rep_brick.src_brick->hostname,
+ volinfo->rep_brick.src_brick->path);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_RB_SRC_BRICK,
+ buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%s:%s",
+ volinfo->rep_brick.dst_brick->hostname,
+ volinfo->rep_brick.dst_brick->path);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_RB_DST_BRICK,
+ buf);
+ if (ret)
+ goto out;
+
+ switch (volinfo->transport_type) {
+ case GF_TRANSPORT_RDMA:
+ port = volinfo->rep_brick.dst_brick->rdma_port;
+ break;
+
+ case GF_TRANSPORT_TCP:
+ case GF_TRANSPORT_BOTH_TCP_RDMA:
+ port = volinfo->rep_brick.dst_brick->port;
+ break;
+ }
+
+ snprintf (buf, sizeof (buf), "%d", port);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_RB_DST_PORT,
+ buf);
+ if (ret)
+ goto out;
+ uuid_unparse (volinfo->rep_brick.rb_id, buf);
+ ret = gf_store_save_value (fd, GF_REPLACE_BRICK_TID_KEY, buf);
+ }
+
+ ret = 0;
+out:
+ gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_store_perform_rbstate_store (glusterd_volinfo_t *volinfo)
+{
+ int fd = -1;
+ int32_t ret = -1;
+ GF_ASSERT (volinfo);
+
+ fd = gf_store_mkstemp (volinfo->rb_shandle);
+ if (fd <= 0) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_store_rbstate_write (fd, volinfo);
+ if (ret)
+ goto out;
+
+ ret = gf_store_rename_tmppath (volinfo->rb_shandle);
+ if (ret)
+ goto out;
+
+out:
+ if (ret && (fd > 0))
+ gf_store_unlink_tmppath (volinfo->rb_shandle);
+ if (fd > 0)
+ close (fd);
+ gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_store_node_state_write (int fd, glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ char buf[PATH_MAX] = {0, };
+
+ GF_ASSERT (fd > 0);
+ GF_ASSERT (volinfo);
+
+ if (volinfo->rebal.defrag_cmd == GF_DEFRAG_CMD_STATUS) {
+ ret = 0;
+ goto out;
+ }
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->rebal.defrag_cmd);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_DEFRAG, buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->rebal.op);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_DEFRAG_OP, buf);
+ if (ret)
+ goto out;
+
+ if (volinfo->rebal.defrag_cmd) {
+ uuid_unparse (volinfo->rebal.rebalance_id, buf);
+ ret = gf_store_save_value (fd, GF_REBALANCE_TID_KEY, buf);
+ }
+out:
+ gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_store_perform_node_state_store (glusterd_volinfo_t *volinfo)
+{
+ int fd = -1;
+ int32_t ret = -1;
+ GF_ASSERT (volinfo);
+
+ fd = gf_store_mkstemp (volinfo->node_state_shandle);
+ if (fd <= 0) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_store_node_state_write (fd, volinfo);
+ if (ret)
+ goto out;
+
+ ret = gf_store_rename_tmppath (volinfo->node_state_shandle);
+ if (ret)
+ goto out;
+
+out:
+ if (ret && (fd > 0))
+ gf_store_unlink_tmppath (volinfo->node_state_shandle);
+ if (fd > 0)
+ close (fd);
+ gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_store_perform_volume_store (glusterd_volinfo_t *volinfo)
+{
+ int fd = -1;
+ int32_t ret = -1;
+ GF_ASSERT (volinfo);
+
+ fd = gf_store_mkstemp (volinfo->shandle);
+ if (fd <= 0) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_store_volinfo_write (fd, volinfo);
+ if (ret)
+ goto out;
+
+ ret = glusterd_store_brickinfos (volinfo, fd);
+ if (ret)
+ goto out;
+
+out:
+ if (ret && (fd > 0))
+ gf_store_unlink_tmppath (volinfo->shandle);
+ if (fd > 0)
+ close (fd);
+ gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+void
+glusterd_perform_volinfo_version_action (glusterd_volinfo_t *volinfo,
+ glusterd_volinfo_ver_ac_t ac)
+{
+ GF_ASSERT (volinfo);
+
+ switch (ac) {
+ case GLUSTERD_VOLINFO_VER_AC_NONE:
+ break;
+ case GLUSTERD_VOLINFO_VER_AC_INCREMENT:
+ volinfo->version++;
+ break;
+ case GLUSTERD_VOLINFO_VER_AC_DECREMENT:
+ volinfo->version--;
+ break;
+ }
+}
+
+void
+glusterd_store_bricks_cleanup_tmp (glusterd_volinfo_t *volinfo)
+{
+ glusterd_brickinfo_t *brickinfo = NULL;
+
+ GF_ASSERT (volinfo);
+
+ list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ gf_store_unlink_tmppath (brickinfo->shandle);
+ }
+}
+
+void
+glusterd_store_volume_cleanup_tmp (glusterd_volinfo_t *volinfo)
+{
+ GF_ASSERT (volinfo);
+
+ glusterd_store_bricks_cleanup_tmp (volinfo);
+
+ gf_store_unlink_tmppath (volinfo->shandle);
+
+ gf_store_unlink_tmppath (volinfo->rb_shandle);
+
+ gf_store_unlink_tmppath (volinfo->node_state_shandle);
+}
+
+void
+glusterd_store_snap_cleanup_tmp (glusterd_snap_t *snap)
+{
+ GF_ASSERT (snap);
+
+ gf_store_unlink_tmppath (snap->shandle);
+}
+
+int32_t
+glusterd_store_brickinfos_atomic_update (glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ glusterd_brickinfo_t *brickinfo = NULL;
+
+ GF_ASSERT (volinfo);
+
+ list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ ret = gf_store_rename_tmppath (brickinfo->shandle);
+ if (ret)
+ goto out;
+ }
+out:
+ return ret;
+}
+
+int32_t
+glusterd_store_volinfo_atomic_update (glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ GF_ASSERT (volinfo);
+
+ ret = gf_store_rename_tmppath (volinfo->shandle);
+ if (ret)
+ goto out;
+
+out:
+ if (ret)
+ gf_log (THIS->name, GF_LOG_ERROR, "Couldn't rename "
+ "temporary file(s): Reason %s", strerror (errno));
+ return ret;
+}
+
+int32_t
+glusterd_store_volume_atomic_update (glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ GF_ASSERT (volinfo);
+
+ ret = glusterd_store_brickinfos_atomic_update (volinfo);
+ if (ret)
+ goto out;
+
+ ret = glusterd_store_volinfo_atomic_update (volinfo);
+
+out:
+ return ret;
+}
+
+int32_t
+glusterd_store_snap_atomic_update (glusterd_snap_t *snap)
+{
+ int ret = -1;
+ GF_ASSERT (snap);
+
+ ret = gf_store_rename_tmppath (snap->shandle);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_ERROR, "Couldn't rename "
+ "temporary file(s): Reason %s", strerror (errno));
+
+ return ret;
+}
+
+int32_t
+glusterd_store_snap (glusterd_snap_t *snap)
+{
+ int32_t ret = -1;
+
+ GF_ASSERT (snap);
+
+ ret = glusterd_store_create_snap_dir (snap);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Failed to create snap dir");
+ goto out;
+ }
+
+ ret = glusterd_store_create_snap_shandle_on_absence (snap);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Failed to create snap info "
+ "file");
+ goto out;
+ }
+
+ ret = glusterd_store_snapinfo_write (snap);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Failed to write snap info");
+ goto out;
+ }
+
+ ret = glusterd_store_snap_atomic_update (snap);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR,"Failed to do automic update");
+ goto out;
+ }
+
+out:
+ if (ret)
+ glusterd_store_snap_cleanup_tmp (snap);
+
+ gf_log (THIS->name, GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_store_volinfo (glusterd_volinfo_t *volinfo, glusterd_volinfo_ver_ac_t ac)
+{
+ int32_t ret = -1;
+
+ GF_ASSERT (volinfo);
+
+ glusterd_perform_volinfo_version_action (volinfo, ac);
+ ret = glusterd_store_create_volume_dir (volinfo);
+ if (ret)
+ goto out;
+
+ ret = glusterd_store_create_vol_shandle_on_absence (volinfo);
+ if (ret)
+ goto out;
+
+ ret = glusterd_store_create_rbstate_shandle_on_absence (volinfo);
+ if (ret)
+ goto out;
+
+ ret = glusterd_store_create_nodestate_sh_on_absence (volinfo);
+ if (ret)
+ goto out;
+
+ ret = glusterd_store_perform_volume_store (volinfo);
+ if (ret)
+ goto out;
+
+ ret = glusterd_store_volume_atomic_update (volinfo);
+ if (ret) {
+ glusterd_perform_volinfo_version_action (volinfo,
+ GLUSTERD_VOLINFO_VER_AC_DECREMENT);
+ goto out;
+ }
+
+ ret = glusterd_store_perform_rbstate_store (volinfo);
+ if (ret)
+ goto out;
+
+ ret = glusterd_store_perform_node_state_store (volinfo);
+ if (ret)
+ goto out;
+
+ //checksum should be computed at the end
+ ret = glusterd_volume_compute_cksum (volinfo);
+ if (ret)
+ goto out;
+
+out:
+ if (ret)
+ glusterd_store_volume_cleanup_tmp (volinfo);
+
+ gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
+
+ return ret;
+}
+
+int32_t
+glusterd_store_delete_volume (glusterd_volinfo_t *volinfo)
+{
+ char pathname[PATH_MAX] = {0,};
+ int32_t ret = 0;
+ glusterd_conf_t *priv = NULL;
+ DIR *dir = NULL;
+ struct dirent *entry = NULL;
+ char path[PATH_MAX] = {0,};
+ char delete_path[PATH_MAX] = {0,};
+ char trashdir[PATH_MAX] = {0,};
+ struct stat st = {0, };
+ xlator_t *this = NULL;
+ gf_boolean_t rename_fail = _gf_false;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (volinfo);
+ priv = this->private;
+
+ GF_ASSERT (priv);
+
+ GLUSTERD_GET_VOLUME_DIR (pathname, volinfo, priv);
+
+ snprintf (delete_path, sizeof (delete_path),
+ "%s/"GLUSTERD_TRASH"/%s.deleted", priv->workdir,
+ uuid_utoa (volinfo->volume_id));
+
+ snprintf (trashdir, sizeof (trashdir), "%s/"GLUSTERD_TRASH,
+ priv->workdir);
+
+ ret = mkdir (trashdir, 0777);
+ if (ret && errno != EEXIST) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to create trash "
+ "directory, reason : %s", strerror (errno));
+ ret = -1;
+ goto out;
+ }
+
+ ret = rename (pathname, delete_path);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to rename volume "
+ "directory for volume %s", volinfo->volname);
+ rename_fail = _gf_true;
+ goto out;
+ }
+
+ dir = opendir (delete_path);
+ if (!dir) {
+ gf_log (this->name, GF_LOG_DEBUG, "Failed to open directory %s."
+ " Reason : %s", delete_path, strerror (errno));
+ ret = 0;
+ goto out;
+ }
+ ret = glusterd_store_remove_bricks (volinfo, delete_path);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG, "Remove bricks failed for %s",
+ volinfo->volname);
+ }
+
+ glusterd_for_each_entry (entry, dir);
+ while (entry) {
+
+ snprintf (path, PATH_MAX, "%s/%s", delete_path, entry->d_name);
+ ret = stat (path, &st);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG, "Failed to stat "
+ "entry %s : %s", path, strerror (errno));
+ goto stat_failed;
+ }
+
+ if (S_ISDIR (st.st_mode))
+ ret = rmdir (path);
+ else
+ ret = unlink (path);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG, " Failed to remove "
+ "%s. Reason : %s", path, strerror (errno));
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG, "%s %s",
+ ret ? "Failed to remove":"Removed",
+ entry->d_name);
+stat_failed:
+ memset (path, 0, sizeof(path));
+ glusterd_for_each_entry (entry, dir);
+ }
+
+ ret = closedir (dir);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG, "Failed to close dir %s. "
+ "Reason : %s",delete_path, strerror (errno));
+ }
+
+ ret = rmdir (delete_path);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG, "Failed to rmdir: %s,err: %s",
+ delete_path, strerror (errno));
+ }
+ ret = rmdir (trashdir);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG, "Failed to rmdir: %s, Reason:"
+ " %s", trashdir, strerror (errno));
+ }
+
+out:
+ if (volinfo->shandle) {
+ gf_store_handle_destroy (volinfo->shandle);
+ volinfo->shandle = NULL;
+ }
+ ret = (rename_fail == _gf_true) ? -1: 0;
+
+ gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+/*TODO: cleanup the duplicate code and implement a generic function for
+ * deleting snap/volume depending on the parameter flag */
+int32_t
+glusterd_store_delete_snap (glusterd_snap_t *snap)
+{
+ char pathname[PATH_MAX] = {0,};
+ int32_t ret = 0;
+ glusterd_conf_t *priv = NULL;
+ DIR *dir = NULL;
+ struct dirent *entry = NULL;
+ char path[PATH_MAX] = {0,};
+ char delete_path[PATH_MAX] = {0,};
+ char trashdir[PATH_MAX] = {0,};
+ struct stat st = {0, };
+ xlator_t *this = NULL;
+ gf_boolean_t rename_fail = _gf_false;
+
+ this = THIS;
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ GF_ASSERT (snap);
+ GLUSTERD_GET_SNAP_DIR (pathname, snap, priv);
+
+ snprintf (delete_path, sizeof (delete_path),
+ "%s/"GLUSTERD_TRASH"/snap-%s.deleted", priv->workdir,
+ uuid_utoa (snap->snap_id));
+
+ snprintf (trashdir, sizeof (trashdir), "%s/"GLUSTERD_TRASH,
+ priv->workdir);
+
+ ret = mkdir (trashdir, 0777);
+ if (ret && errno != EEXIST) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to create trash "
+ "directory, reason : %s", strerror (errno));
+ ret = -1;
+ goto out;
+ }
+
+ ret = rename (pathname, delete_path);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to rename snap "
+ "directory %s to %s", snap->snapname, delete_path);
+ rename_fail = _gf_true;
+ goto out;
+ }
+
+ dir = opendir (delete_path);
+ if (!dir) {
+ gf_log (this->name, GF_LOG_DEBUG, "Failed to open directory %s."
+ " Reason : %s", delete_path, strerror (errno));
+ ret = 0;
+ goto out;
+ }
+
+ glusterd_for_each_entry (entry, dir);
+ while (entry) {
+ snprintf (path, PATH_MAX, "%s/%s", delete_path, entry->d_name);
+ ret = stat (path, &st);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_DEBUG, "Failed to stat "
+ "entry %s : %s", path, strerror (errno));
+ goto stat_failed;
+ }
+
+ if (S_ISDIR (st.st_mode))
+ ret = rmdir (path);
+ else
+ ret = unlink (path);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG, " Failed to remove "
+ "%s. Reason : %s", path, strerror (errno));
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG, "%s %s",
+ ret ? "Failed to remove":"Removed",
+ entry->d_name);
+stat_failed:
+ memset (path, 0, sizeof(path));
+ glusterd_for_each_entry (entry, dir);
+ }
+
+ ret = closedir (dir);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG, "Failed to close dir %s. "
+ "Reason : %s",delete_path, strerror (errno));
+ }
+
+ ret = rmdir (delete_path);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG, "Failed to rmdir: %s,err: %s",
+ delete_path, strerror (errno));
+ }
+ ret = rmdir (trashdir);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG, "Failed to rmdir: %s, Reason:"
+ " %s", trashdir, strerror (errno));
+ }
+
+out:
+ if (snap->shandle) {
+ gf_store_handle_destroy (snap->shandle);
+ snap->shandle = NULL;
+ }
+ ret = (rename_fail == _gf_true) ? -1: 0;
+
+ gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_store_global_info (xlator_t *this)
+{
+ int ret = -1;
+ glusterd_conf_t *conf = NULL;
+ char op_version_str[15] = {0,};
+ char path[PATH_MAX] = {0,};
+ gf_store_handle_t *handle = NULL;
+ char *uuid_str = NULL;
+ char buf[256] = {0, };
+
+ conf = this->private;
+
+ uuid_str = gf_strdup (uuid_utoa (MY_UUID));
+ if (!uuid_str)
+ goto out;
+
+ if (!conf->handle) {
+ snprintf (path, PATH_MAX, "%s/%s", conf->workdir,
+ GLUSTERD_INFO_FILE);
+ ret = gf_store_handle_new (path, &handle);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to get store handle");
+ goto out;
+ }
+
+ conf->handle = handle;
+ } else
+ handle = conf->handle;
+
+ /* These options need to be available for all users */
+ ret = chmod (handle->path, 0644);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "chmod error for %s: %s",
+ GLUSTERD_INFO_FILE, strerror (errno));
+ goto out;
+ }
+
+ handle->fd = gf_store_mkstemp (handle);
+ if (handle->fd <= 0) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = gf_store_save_value (handle->fd, GLUSTERD_STORE_UUID_KEY,
+ uuid_str);
+ if (ret) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Storing uuid failed ret = %d", ret);
+ goto out;
+ }
+
+ snprintf (op_version_str, 15, "%d", conf->op_version);
+ ret = gf_store_save_value (handle->fd, GD_OP_VERSION_KEY,
+ op_version_str);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Storing op-version failed ret = %d", ret);
+ goto out;
+ }
+
+ snprintf (buf, sizeof (buf), "%"PRIu64, conf->snap_max_hard_limit);
+ ret = gf_store_save_value (handle->fd,
+ GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT, buf);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Storing snap-max-hard-limit failed ret = %d", ret);
+ goto out;
+ }
+
+ snprintf (buf, sizeof (buf), "%"PRIu64, conf->snap_max_soft_limit);
+ ret = gf_store_save_value (handle->fd,
+ GLUSTERD_STORE_KEY_SNAP_MAX_SOFT_LIMIT, buf);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Storing snap-max-soft-limit failed ret = %d", ret);
+ goto out;
+ }
+
+ ret = gf_store_rename_tmppath (handle);
+out:
+ if (ret && (handle->fd > 0))
+ gf_store_unlink_tmppath (handle);
+
+ if (handle->fd > 0) {
+ close (handle->fd);
+ handle->fd = 0;
+ }
+
+ if (uuid_str)
+ GF_FREE (uuid_str);
+
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to store glusterd global-info");
+
+ return ret;
+}
+
+int
+glusterd_retrieve_op_version (xlator_t *this, int *op_version)
+{
+ char *op_version_str = NULL;
+ glusterd_conf_t *priv = NULL;
+ int ret = -1;
+ int tmp_version = 0;
+ char *tmp = NULL;
+ char path[PATH_MAX] = {0,};
+ gf_store_handle_t *handle = NULL;
+
+ priv = this->private;
+
+ if (!priv->handle) {
+ snprintf (path, PATH_MAX, "%s/%s", priv->workdir,
+ GLUSTERD_INFO_FILE);
+ ret = gf_store_handle_retrieve (path, &handle);
+
+ if (ret) {
+ gf_log ("", GF_LOG_DEBUG, "Unable to get store "
+ "handle!");
+ goto out;
+ }
+
+ priv->handle = handle;
+ }
+
+ ret = gf_store_retrieve_value (priv->handle, GD_OP_VERSION_KEY,
+ &op_version_str);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "No previous op_version present");
+ goto out;
+ }
+
+ tmp_version = strtol (op_version_str, &tmp, 10);
+ if ((tmp_version <= 0) || (tmp && strlen (tmp) > 1)) {
+ gf_log (this->name, GF_LOG_WARNING, "invalid version number");
+ goto out;
+ }
+
+ *op_version = tmp_version;
+
+ ret = 0;
+out:
+ if (op_version_str)
+ GF_FREE (op_version_str);
+
+ return ret;
+}
+
+int
+glusterd_retrieve_sys_snap_max_limit (xlator_t *this, uint64_t *limit,
+ char *key)
+{
+ char *limit_str = NULL;
+ glusterd_conf_t *priv = NULL;
+ int ret = -1;
+ uint64_t tmp_limit = 0;
+ char *tmp = NULL;
+ char path[PATH_MAX] = {0,};
+ gf_store_handle_t *handle = NULL;
+
+ GF_ASSERT (this);
+ priv = this->private;
+
+ GF_ASSERT (priv);
+ GF_ASSERT (limit);
+ GF_ASSERT (key);
+
+ if (!priv->handle) {
+ snprintf (path, PATH_MAX, "%s/%s", priv->workdir,
+ GLUSTERD_INFO_FILE);
+ ret = gf_store_handle_retrieve (path, &handle);
+
+ if (ret) {
+ gf_log ("", GF_LOG_DEBUG, "Unable to get store "
+ "handle!");
+ goto out;
+ }
+
+ priv->handle = handle;
+ }
+
+ ret = gf_store_retrieve_value (priv->handle,
+ key,
+ &limit_str);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "No previous %s present", key);
+ goto out;
+ }
+
+ tmp_limit = strtoul (limit_str, &tmp, 10);
+ if ((tmp_limit <= 0) || (tmp && strlen (tmp) > 1)) {
+ gf_log (this->name, GF_LOG_WARNING, "invalid version number");
+ goto out;
+ }
+
+ *limit = tmp_limit;
+
+ ret = 0;
+out:
+ if (limit_str)
+ GF_FREE (limit_str);
+
+ return ret;
+}
+static int
+glusterd_restore_op_version (xlator_t *this)
+{
+ glusterd_conf_t *conf = NULL;
+ int ret = 0;
+ int op_version = 0;
+
+ conf = this->private;
+
+ ret = glusterd_retrieve_sys_snap_max_limit (this,
+ &conf->snap_max_hard_limit,
+ GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Unable to retrieve system snap-max-hard-limit, "
+ "setting it to default value(%d)",
+ GLUSTERD_SNAPS_MAX_HARD_LIMIT);
+ conf->snap_max_hard_limit = GLUSTERD_SNAPS_MAX_HARD_LIMIT;
+ }
+
+ ret = glusterd_retrieve_sys_snap_max_limit (this,
+ &conf->snap_max_soft_limit,
+ GLUSTERD_STORE_KEY_SNAP_MAX_SOFT_LIMIT);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Unable to retrieve system snap-max-soft-limit, "
+ "setting it to default value(%d)",
+ GLUSTERD_SNAPS_DEF_SOFT_LIMIT_PERCENT);
+ conf->snap_max_soft_limit = GLUSTERD_SNAPS_DEF_SOFT_LIMIT_PERCENT;
+ }
+
+ ret = glusterd_retrieve_op_version (this, &op_version);
+ if (!ret) {
+ if ((op_version < GD_OP_VERSION_MIN) ||
+ (op_version > GD_OP_VERSION_MAX)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "wrong op-version (%d) retrieved", op_version);
+ ret = -1;
+ goto out;
+ }
+ conf->op_version = op_version;
+ gf_log ("glusterd", GF_LOG_INFO,
+ "retrieved op-version: %d", conf->op_version);
+ goto out;
+ }
+
+ /* op-version can be missing from the store file in 2 cases,
+ * 1. This is a new install of glusterfs
+ * 2. This is an upgrade of glusterfs from a version without op-version
+ * to a version with op-version (eg. 3.3 -> 3.4)
+ *
+ * Detection of a new install or an upgrade from an older install can be
+ * done by checking for the presence of the its peer-id in the store
+ * file. If peer-id is present, the installation is an upgrade else, it
+ * is a new install.
+ *
+ * For case 1, set op-version to GD_OP_VERSION_MAX.
+ * For case 2, set op-version to GD_OP_VERSION_MIN.
+ */
+ ret = glusterd_retrieve_uuid();
+ if (ret) {
+ gf_log (this->name, GF_LOG_INFO, "Detected new install. Setting"
+ " op-version to maximum : %d", GD_OP_VERSION_MAX);
+ conf->op_version = GD_OP_VERSION_MAX;
+ } else {
+ gf_log (this->name, GF_LOG_INFO, "Upgrade detected. Setting"
+ " op-version to minimum : %d", GD_OP_VERSION_MIN);
+ conf->op_version = GD_OP_VERSION_MIN;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+int32_t
+glusterd_retrieve_uuid ()
+{
+ char *uuid_str = NULL;
+ int32_t ret = -1;
+ gf_store_handle_t *handle = NULL;
+ glusterd_conf_t *priv = NULL;
+ char path[PATH_MAX] = {0,};
+
+ priv = THIS->private;
+
+ if (!priv->handle) {
+ snprintf (path, PATH_MAX, "%s/%s", priv->workdir,
+ GLUSTERD_INFO_FILE);
+ ret = gf_store_handle_retrieve (path, &handle);
+
+ if (ret) {
+ gf_log ("", GF_LOG_DEBUG, "Unable to get store"
+ "handle!");
+ goto out;
+ }
+
+ priv->handle = handle;
+ }
+
+ ret = gf_store_retrieve_value (priv->handle, GLUSTERD_STORE_UUID_KEY,
+ &uuid_str);
+
+ if (ret) {
+ gf_log ("", GF_LOG_DEBUG, "No previous uuid is present");
+ goto out;
+ }
+
+ uuid_parse (uuid_str, priv->uuid);
+
+out:
+ GF_FREE (uuid_str);
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+
+int32_t
+glusterd_store_retrieve_bricks (glusterd_volinfo_t *volinfo)
+{
+ int32_t ret = 0;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ gf_store_iter_t *iter = NULL;
+ char *key = NULL;
+ char *value = NULL;
+ char brickdir[PATH_MAX] = {0,};
+ char path[PATH_MAX] = {0,};
+ glusterd_conf_t *priv = NULL;
+ int32_t brick_count = 0;
+ char tmpkey[4096] = {0,};
+ gf_store_iter_t *tmpiter = NULL;
+ char *tmpvalue = NULL;
+ struct pmap_registry *pmap = NULL;
+ gf_store_op_errno_t op_errno = GD_STORE_SUCCESS;
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (volinfo->volname);
+
+ priv = THIS->private;
+
+ GLUSTERD_GET_BRICK_DIR (brickdir, volinfo, priv);
+
+ ret = gf_store_iter_new (volinfo->shandle, &tmpiter);
+ if (ret)
+ goto out;
+
+ while (brick_count < volinfo->brick_count) {
+ ret = glusterd_brickinfo_new (&brickinfo);
+
+ if (ret)
+ goto out;
+ snprintf (tmpkey, sizeof (tmpkey), "%s-%d",
+ GLUSTERD_STORE_KEY_VOL_BRICK,brick_count);
+ ret = gf_store_iter_get_matching (tmpiter, tmpkey, &tmpvalue);
+ snprintf (path, sizeof (path), "%s/%s", brickdir, tmpvalue);
+
+ GF_FREE (tmpvalue);
+
+ tmpvalue = NULL;
+
+ ret = gf_store_handle_retrieve (path, &brickinfo->shandle);
+
+ if (ret)
+ goto out;
+
+ ret = gf_store_iter_new (brickinfo->shandle, &iter);
+
+ if (ret)
+ goto out;
+
+ ret = gf_store_iter_get_next (iter, &key, &value, &op_errno);
+ if (ret) {
+ gf_log ("glusterd", GF_LOG_ERROR, "Unable to iterate "
+ "the store for brick: %s, reason: %s", path,
+ gf_store_strerror (op_errno));
+ goto out;
+ }
+ while (!ret) {
+ if (!strncmp (key, GLUSTERD_STORE_KEY_BRICK_HOSTNAME,
+ strlen (GLUSTERD_STORE_KEY_BRICK_HOSTNAME))) {
+ strncpy (brickinfo->hostname, value, 1024);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_BRICK_PATH,
+ strlen (GLUSTERD_STORE_KEY_BRICK_PATH))) {
+ strncpy (brickinfo->path, value,
+ sizeof (brickinfo->path));
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_BRICK_PORT,
+ strlen (GLUSTERD_STORE_KEY_BRICK_PORT))) {
+ gf_string2int (value, &brickinfo->port);
+
+ if (brickinfo->port < priv->base_port) {
+ /* This is required to adhere to the
+ IANA standards */
+ brickinfo->port = 0;
+ } else {
+ /* This is required to have proper ports
+ assigned to bricks after restart */
+ pmap = pmap_registry_get (THIS);
+ if (pmap->last_alloc <= brickinfo->port)
+ pmap->last_alloc =
+ brickinfo->port + 1;
+ }
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_BRICK_RDMA_PORT,
+ strlen (GLUSTERD_STORE_KEY_BRICK_RDMA_PORT))) {
+ gf_string2int (value, &brickinfo->rdma_port);
+
+ if (brickinfo->rdma_port < priv->base_port) {
+ /* This is required to adhere to the
+ IANA standards */
+ brickinfo->rdma_port = 0;
+ } else {
+ /* This is required to have proper ports
+ assigned to bricks after restart */
+ pmap = pmap_registry_get (THIS);
+ if (pmap->last_alloc <=
+ brickinfo->rdma_port)
+ pmap->last_alloc =
+ brickinfo->rdma_port +1;
+ }
+
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED,
+ strlen (GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED))) {
+ gf_string2int (value, &brickinfo->decommissioned);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_BRICK_DEVICE_PATH,
+ strlen (GLUSTERD_STORE_KEY_BRICK_DEVICE_PATH))) {
+ strncpy (brickinfo->device_path, value,
+ sizeof (brickinfo->device_path));
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_BRICK_SNAP_STATUS,
+ strlen (GLUSTERD_STORE_KEY_BRICK_SNAP_STATUS))) {
+ gf_string2int (value, &brickinfo->snap_status);
+ } else if (!strncmp (key,
+ GLUSTERD_STORE_KEY_BRICK_VGNAME,
+ strlen (GLUSTERD_STORE_KEY_BRICK_VGNAME))) {
+ strncpy (brickinfo->vg, value,
+ sizeof (brickinfo->vg));
+ } else {
+ gf_log ("", GF_LOG_ERROR, "Unknown key: %s",
+ key);
+ }
+
+ GF_FREE (key);
+ GF_FREE (value);
+ key = NULL;
+ value = NULL;
+
+ ret = gf_store_iter_get_next (iter, &key, &value,
+ &op_errno);
+ }
+
+ if (op_errno != GD_STORE_EOF)
+ goto out;
+ ret = gf_store_iter_destroy (iter);
+
+ if (ret)
+ goto out;
+
+ list_add_tail (&brickinfo->brick_list, &volinfo->bricks);
+ brick_count++;
+ }
+
+ ret = gf_store_iter_destroy (tmpiter);
+ if (ret)
+ goto out;
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+
+ return ret;
+}
+
+
+int32_t
+glusterd_store_retrieve_rbstate (glusterd_volinfo_t *volinfo)
+{
+ int32_t ret = -1;
+ gf_store_iter_t *iter = NULL;
+ char *key = NULL;
+ char *value = NULL;
+ char volpath[PATH_MAX] = {0,};
+ glusterd_conf_t *priv = NULL;
+ char path[PATH_MAX] = {0,};
+ gf_store_op_errno_t op_errno = GD_STORE_SUCCESS;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (volinfo);
+
+ GLUSTERD_GET_VOLUME_DIR(volpath, volinfo, priv);
+ snprintf (path, sizeof (path), "%s/%s", volpath,
+ GLUSTERD_VOLUME_RBSTATE_FILE);
+
+ ret = gf_store_handle_retrieve (path, &volinfo->rb_shandle);
+
+ if (ret)
+ goto out;
+
+ ret = gf_store_iter_new (volinfo->rb_shandle, &iter);
+
+ if (ret)
+ goto out;
+
+ ret = gf_store_iter_get_next (iter, &key, &value, &op_errno);
+ if (ret)
+ goto out;
+
+ while (!ret) {
+ if (!strncmp (key, GLUSTERD_STORE_KEY_RB_STATUS,
+ strlen (GLUSTERD_STORE_KEY_RB_STATUS))) {
+ volinfo->rep_brick.rb_status = atoi (value);
+ }
+
+ if (volinfo->rep_brick.rb_status > GF_RB_STATUS_NONE) {
+ if (!strncmp (key, GLUSTERD_STORE_KEY_RB_SRC_BRICK,
+ strlen (GLUSTERD_STORE_KEY_RB_SRC_BRICK))) {
+ ret = glusterd_brickinfo_new_from_brick (value,
+ &volinfo->rep_brick.src_brick);
+ if (ret)
+ goto out;
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_RB_DST_BRICK,
+ strlen (GLUSTERD_STORE_KEY_RB_DST_BRICK))) {
+ ret = glusterd_brickinfo_new_from_brick (value,
+ &volinfo->rep_brick.dst_brick);
+ if (ret)
+ goto out;
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_RB_DST_PORT,
+ strlen (GLUSTERD_STORE_KEY_RB_DST_PORT))) {
+ switch (volinfo->transport_type) {
+ case GF_TRANSPORT_RDMA:
+ volinfo->rep_brick.dst_brick->rdma_port =
+ atoi (value);
+ break;
+
+ case GF_TRANSPORT_TCP:
+ case GF_TRANSPORT_BOTH_TCP_RDMA:
+ volinfo->rep_brick.dst_brick->port =
+ atoi (value);
+ break;
+ }
+ } else if (!strncmp (key, GF_REPLACE_BRICK_TID_KEY,
+ strlen (GF_REPLACE_BRICK_TID_KEY))) {
+ uuid_parse (value,
+ volinfo->rep_brick.rb_id);
+ }
+ }
+
+ GF_FREE (key);
+ GF_FREE (value);
+ key = NULL;
+ value = NULL;
+
+ ret = gf_store_iter_get_next (iter, &key, &value, &op_errno);
+ }
+
+ if (op_errno != GD_STORE_EOF)
+ goto out;
+
+ ret = gf_store_iter_destroy (iter);
+
+ if (ret)
+ goto out;
+
+out:
+ gf_log (this->name, GF_LOG_TRACE, "Returning with %d", ret);
+
+ return ret;
+}
+
+int32_t
+glusterd_store_retrieve_node_state (glusterd_volinfo_t *volinfo)
+{
+ int32_t ret = -1;
+ gf_store_iter_t *iter = NULL;
+ char *key = NULL;
+ char *value = NULL;
+ char volpath[PATH_MAX] = {0,};
+ glusterd_conf_t *priv = NULL;
+ char path[PATH_MAX] = {0,};
+ gf_store_op_errno_t op_errno = GD_STORE_SUCCESS;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (volinfo);
+
+ GLUSTERD_GET_VOLUME_DIR(volpath, volinfo, priv);
+ snprintf (path, sizeof (path), "%s/%s", volpath,
+ GLUSTERD_NODE_STATE_FILE);
+
+ ret = gf_store_handle_retrieve (path, &volinfo->node_state_shandle);
+ if (ret)
+ goto out;
+
+ ret = gf_store_iter_new (volinfo->node_state_shandle, &iter);
+
+ if (ret)
+ goto out;
+
+ ret = gf_store_iter_get_next (iter, &key, &value, &op_errno);
+ if (ret)
+ goto out;
+
+ while (ret == 0) {
+ if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_DEFRAG,
+ strlen (GLUSTERD_STORE_KEY_VOL_DEFRAG))) {
+ volinfo->rebal.defrag_cmd = atoi (value);
+ }
+
+ if (volinfo->rebal.defrag_cmd) {
+ if (!strncmp (key, GF_REBALANCE_TID_KEY,
+ strlen (GF_REBALANCE_TID_KEY)))
+ uuid_parse (value, volinfo->rebal.rebalance_id);
+
+ if (!strncmp (key, GLUSTERD_STORE_KEY_DEFRAG_OP,
+ strlen (GLUSTERD_STORE_KEY_DEFRAG_OP)))
+ volinfo->rebal.op = atoi (value);
+ }
+
+ GF_FREE (key);
+ GF_FREE (value);
+ key = NULL;
+ value = NULL;
+
+ ret = gf_store_iter_get_next (iter, &key, &value, &op_errno);
+ }
+
+ if (op_errno != GD_STORE_EOF)
+ goto out;
+
+ ret = gf_store_iter_destroy (iter);
+
+ if (ret)
+ goto out;
+
+out:
+ gf_log (this->name, GF_LOG_TRACE, "Returning with %d", ret);
+
+ return ret;
+}
+
+
+int
+glusterd_store_update_volinfo (glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ int exists = 0;
+ char *key = NULL;
+ char *value = NULL;
+ char volpath[PATH_MAX] = {0,};
+ char path[PATH_MAX] = {0,};
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ gf_store_iter_t *iter = NULL;
+ gf_store_op_errno_t op_errno = GD_STORE_SUCCESS;
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = THIS->private;
+ GF_ASSERT (volinfo);
+
+ GLUSTERD_GET_VOLUME_DIR(volpath, volinfo, conf);
+
+ snprintf (path, sizeof (path), "%s/%s", volpath,
+ GLUSTERD_VOLUME_INFO_FILE);
+
+ ret = gf_store_handle_retrieve (path, &volinfo->shandle);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "volinfo handle is NULL");
+ goto out;
+ }
+
+ ret = gf_store_iter_new (volinfo->shandle, &iter);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get new store "
+ "iter");
+ goto out;
+ }
+
+ ret = gf_store_iter_get_next (iter, &key, &value, &op_errno);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get next store "
+ "iter");
+ goto out;
+ }
+
+ while (!ret) {
+ gf_log ("", GF_LOG_DEBUG, "key = %s value = %s", key, value);
+ if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_TYPE,
+ strlen (GLUSTERD_STORE_KEY_VOL_TYPE))) {
+ volinfo->type = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_COUNT,
+ strlen (GLUSTERD_STORE_KEY_VOL_COUNT))) {
+ volinfo->brick_count = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_STATUS,
+ strlen (GLUSTERD_STORE_KEY_VOL_STATUS))) {
+ volinfo->status = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_VERSION,
+ strlen (GLUSTERD_STORE_KEY_VOL_VERSION))) {
+ volinfo->version = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_PORT,
+ strlen (GLUSTERD_STORE_KEY_VOL_PORT))) {
+ volinfo->port = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_SUB_COUNT,
+ strlen (GLUSTERD_STORE_KEY_VOL_SUB_COUNT))) {
+ volinfo->sub_count = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_STRIPE_CNT,
+ strlen (GLUSTERD_STORE_KEY_VOL_STRIPE_CNT))) {
+ volinfo->stripe_count = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_REPLICA_CNT,
+ strlen (GLUSTERD_STORE_KEY_VOL_REPLICA_CNT))) {
+ volinfo->replica_count = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_TRANSPORT,
+ strlen (GLUSTERD_STORE_KEY_VOL_TRANSPORT))) {
+ volinfo->transport_type = atoi (value);
+ volinfo->nfs_transport_type = volinfo->transport_type;
+ if (volinfo->transport_type == GF_TRANSPORT_BOTH_TCP_RDMA) {
+ volinfo->nfs_transport_type = GF_DEFAULT_NFS_TRANSPORT;
+ }
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_ID,
+ strlen (GLUSTERD_STORE_KEY_VOL_ID))) {
+ ret = uuid_parse (value, volinfo->volume_id);
+ if (ret)
+ gf_log ("", GF_LOG_WARNING,
+ "failed to parse uuid");
+
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_USERNAME,
+ strlen (GLUSTERD_STORE_KEY_USERNAME))) {
+
+ glusterd_auth_set_username (volinfo, value);
+
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_PASSWORD,
+ strlen (GLUSTERD_STORE_KEY_PASSWORD))) {
+
+ glusterd_auth_set_password (volinfo, value);
+
+ } else if (strstr (key, "slave")) {
+ ret = dict_set_dynstr (volinfo->gsync_slaves, key,
+ gf_strdup (value));
+ if (ret) {
+ gf_log ("",GF_LOG_ERROR, "Error in "
+ "dict_set_str");
+ goto out;
+ }
+ gf_log ("", GF_LOG_DEBUG, "Parsed as "GEOREP" "
+ " slave:key=%s,value:%s", key, value);
+
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_OP_VERSION,
+ strlen (GLUSTERD_STORE_KEY_VOL_OP_VERSION))) {
+ volinfo->op_version = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION,
+ strlen (GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION))) {
+ volinfo->client_op_version = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_CAPS,
+ strlen (GLUSTERD_STORE_KEY_VOL_CAPS))) {
+ volinfo->caps = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT,
+ strlen (GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT))) {
+ volinfo->snap_max_hard_limit = (uint64_t) atoll (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_IS_RESTORED,
+ strlen (GLUSTERD_STORE_KEY_VOL_IS_RESTORED))) {
+ volinfo->is_volume_restored = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_PARENT_VOLNAME,
+ strlen (GLUSTERD_STORE_KEY_PARENT_VOLNAME))) {
+ strncpy (volinfo->parent_volname, value, sizeof(volinfo->parent_volname) - 1);
+ } else {
+
+ if (is_key_glusterd_hooks_friendly (key)) {
+ exists = 1;
+
+ } else {
+ exists = glusterd_check_option_exists (key,
+ NULL);
+ }
+
+ switch (exists) {
+ case -1:
+ ret = -1;
+ goto out;
+
+ case 0:
+ gf_log ("", GF_LOG_ERROR, "Unknown key: %s",
+ key);
+ break;
+
+ case 1:
+ ret = dict_set_str(volinfo->dict, key,
+ gf_strdup (value));
+ if (ret) {
+ gf_log ("",GF_LOG_ERROR, "Error in "
+ "dict_set_str");
+ goto out;
+ }
+ gf_log ("", GF_LOG_DEBUG, "Parsed as Volume-"
+ "set:key=%s,value:%s", key, value);
+ break;
+ }
+ }
+
+ GF_FREE (key);
+ GF_FREE (value);
+ key = NULL;
+ value = NULL;
+
+ ret = gf_store_iter_get_next (iter, &key, &value, &op_errno);
+ }
+
+ /* backward compatibility */
+ {
+
+ switch (volinfo->type) {
+
+ case GF_CLUSTER_TYPE_NONE:
+ volinfo->stripe_count = 1;
+ volinfo->replica_count = 1;
+ break;
+
+ case GF_CLUSTER_TYPE_STRIPE:
+ volinfo->stripe_count = volinfo->sub_count;
+ volinfo->replica_count = 1;
+ break;
+
+ case GF_CLUSTER_TYPE_REPLICATE:
+ volinfo->stripe_count = 1;
+ volinfo->replica_count = volinfo->sub_count;
+ break;
+
+ case GF_CLUSTER_TYPE_STRIPE_REPLICATE:
+ /* Introduced in 3.3 */
+ GF_ASSERT (volinfo->stripe_count > 0);
+ GF_ASSERT (volinfo->replica_count > 0);
+ break;
+
+ default:
+ GF_ASSERT (0);
+ break;
+ }
+
+ volinfo->dist_leaf_count = glusterd_get_dist_leaf_count (volinfo);
+
+ volinfo->subvol_count = (volinfo->brick_count /
+ volinfo->dist_leaf_count);
+
+ /* Only calculate volume op-versions if they are not found */
+ if (!volinfo->op_version && !volinfo->client_op_version)
+ gd_update_volume_op_versions (volinfo);
+ }
+
+ if (op_errno != GD_STORE_EOF)
+ goto out;
+
+ ret = gf_store_iter_destroy (iter);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to destroy store "
+ "iter");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+glusterd_volinfo_t*
+glusterd_store_retrieve_volume (char *volname, glusterd_snap_t *snap)
+{
+ int32_t ret = -1;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_volinfo_t *origin_volinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (volname);
+
+ ret = glusterd_volinfo_new (&volinfo);
+ if (ret)
+ goto out;
+
+ priv = THIS->private;
+
+ strncpy (volinfo->volname, volname, GLUSTERD_MAX_VOLUME_NAME);
+ volinfo->snapshot = snap;
+ if (snap)
+ volinfo->is_snap_volume = _gf_true;
+
+ ret = glusterd_store_update_volinfo (volinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to update volinfo "
+ "for %s volume", volname);
+ goto out;
+ }
+
+ ret = glusterd_store_retrieve_bricks (volinfo);
+ if (ret)
+ goto out;
+
+ ret = glusterd_volume_compute_cksum (volinfo);
+ if (ret)
+ goto out;
+
+ if (!snap) {
+ list_add_tail (&volinfo->vol_list, &priv->volumes);
+ } else {
+ ret = glusterd_volinfo_find (volinfo->parent_volname,
+ &origin_volinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Parent volinfo "
+ "not found for %s volume", volname);
+ goto out;
+ }
+ glusterd_list_add_snapvol (origin_volinfo, volinfo);
+ }
+out:
+ if (ret) {
+ if (volinfo)
+ glusterd_volinfo_delete (volinfo);
+ volinfo = NULL;
+ }
+
+ gf_log (this->name, GF_LOG_TRACE, "Returning with %d", ret);
+
+ return volinfo;
+}
+
+inline void
+glusterd_store_set_options_path (glusterd_conf_t *conf, char *path, size_t len)
+{
+ snprintf (path, len, "%s/options", conf->workdir);
+}
+
+int
+_store_global_opts (dict_t *this, char *key, data_t *value, void *data)
+{
+ gf_store_handle_t *shandle = data;
+
+ gf_store_save_value (shandle->fd, key, (char*)value->data);
+ return 0;
+}
+
+int32_t
+glusterd_store_options (xlator_t *this, dict_t *opts)
+{
+ gf_store_handle_t *shandle = NULL;
+ glusterd_conf_t *conf = NULL;
+ char path[PATH_MAX] = {0};
+ int fd = -1;
+ int32_t ret = -1;
+
+ conf = this->private;
+ glusterd_store_set_options_path (conf, path, sizeof (path));
+
+ ret = gf_store_handle_new (path, &shandle);
+ if (ret)
+ goto out;
+
+ fd = gf_store_mkstemp (shandle);
+ if (fd <= 0) {
+ ret = -1;
+ goto out;
+ }
+
+ shandle->fd = fd;
+ dict_foreach (opts, _store_global_opts, shandle);
+ shandle->fd = 0;
+ ret = gf_store_rename_tmppath (shandle);
+ if (ret)
+ goto out;
+out:
+ gf_store_handle_destroy (shandle);
+ if (fd >=0 )
+ close (fd);
+ return ret;
+}
+
+int32_t
+glusterd_store_retrieve_options (xlator_t *this)
+{
+ char path[PATH_MAX] = {0};
+ glusterd_conf_t *conf = NULL;
+ gf_store_handle_t *shandle = NULL;
+ gf_store_iter_t *iter = NULL;
+ char *key = NULL;
+ char *value = NULL;
+ gf_store_op_errno_t op_errno = 0;
+ int ret = -1;
+
+ conf = this->private;
+ glusterd_store_set_options_path (conf, path, sizeof (path));
+
+ ret = gf_store_handle_retrieve (path, &shandle);
+ if (ret)
+ goto out;
+
+ ret = gf_store_iter_new (shandle, &iter);
+ if (ret)
+ goto out;
+
+ ret = gf_store_iter_get_next (iter, &key, &value, &op_errno);
+ while (!ret) {
+ ret = dict_set_dynstr (conf->opts, key, value);
+ if (ret) {
+ GF_FREE (key);
+ GF_FREE (value);
+ goto out;
+ }
+ GF_FREE (key);
+ key = NULL;
+ value = NULL;
+
+ ret = gf_store_iter_get_next (iter, &key, &value, &op_errno);
+ }
+ if (op_errno != GD_STORE_EOF)
+ goto out;
+ ret = 0;
+out:
+ gf_store_iter_destroy (iter);
+ gf_store_handle_destroy (shandle);
+ return ret;
+}
+
+int32_t
+glusterd_store_retrieve_volumes (xlator_t *this, glusterd_snap_t *snap)
+{
+ int32_t ret = -1;
+ char path[PATH_MAX] = {0,};
+ glusterd_conf_t *priv = NULL;
+ DIR *dir = NULL;
+ struct dirent *entry = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+
+ GF_ASSERT (this);
+ priv = this->private;
+
+ GF_ASSERT (priv);
+
+ if (snap)
+ snprintf (path, PATH_MAX, "%s/snaps/%s", priv->workdir,
+ snap->snapname);
+ else
+ snprintf (path, PATH_MAX, "%s/%s", priv->workdir,
+ GLUSTERD_VOLUME_DIR_PREFIX);
+
+ dir = opendir (path);
+
+ if (!dir) {
+ gf_log ("", GF_LOG_ERROR, "Unable to open dir %s", path);
+ goto out;
+ }
+
+ glusterd_for_each_entry (entry, dir);
+
+ while (entry) {
+ if ( entry->d_type != DT_DIR )
+ goto next;
+
+ volinfo = glusterd_store_retrieve_volume (entry->d_name, snap);
+ if (!volinfo) {
+ gf_log ("", GF_LOG_ERROR, "Unable to restore "
+ "volume: %s", entry->d_name);
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_store_retrieve_rbstate (volinfo);
+ if (ret) {
+ /* Backward compatibility */
+ gf_log ("", GF_LOG_INFO, "Creating a new rbstate "
+ "for volume: %s.", entry->d_name);
+ ret = glusterd_store_create_rbstate_shandle_on_absence (volinfo);
+ ret = glusterd_store_perform_rbstate_store (volinfo);
+ }
+
+ ret = glusterd_store_retrieve_node_state (volinfo);
+ if (ret) {
+ /* Backward compatibility */
+ gf_log ("", GF_LOG_INFO, "Creating a new node_state "
+ "for volume: %s.", entry->d_name);
+ glusterd_store_create_nodestate_sh_on_absence (volinfo);
+ ret = glusterd_store_perform_node_state_store (volinfo);
+
+ }
+
+next:
+ glusterd_for_each_entry (entry, dir);
+ }
+
+ ret = 0;
+out:
+ if (dir)
+ closedir (dir);
+ gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+
+ return ret;
+}
+
+int32_t
+glusterd_resolve_snap_bricks (xlator_t *this, glusterd_snap_t *snap)
+{
+ int32_t ret = -1;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+
+ GF_ASSERT (this);
+ GF_VALIDATE_OR_GOTO (this->name, snap, out);
+
+ list_for_each_entry (volinfo, &snap->volumes, vol_list) {
+ list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ ret = glusterd_resolve_brick (brickinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "resolve brick failed in restore");
+ goto out;
+ }
+ }
+ }
+
+ ret = 0;
+
+out:
+ gf_log (this->name, GF_LOG_TRACE, "Returning with %d", ret);
+
+ return ret;
+}
+
+int
+glusterd_store_update_snap (glusterd_snap_t *snap)
+{
+ int ret = -1;
+ char *key = NULL;
+ char *value = NULL;
+ char snappath[PATH_MAX] = {0,};
+ char path[PATH_MAX] = {0,};
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ gf_store_iter_t *iter = NULL;
+ gf_store_op_errno_t op_errno = GD_STORE_SUCCESS;
+
+ this = THIS;
+ conf = this->private;
+ GF_ASSERT (snap);
+
+ GLUSTERD_GET_SNAP_DIR (snappath, snap, conf);
+
+ snprintf (path, sizeof (path), "%s/%s", snappath,
+ GLUSTERD_SNAP_INFO_FILE);
+
+ ret = gf_store_handle_retrieve (path, &snap->shandle);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "snap handle is NULL");
+ goto out;
+ }
+
+ ret = gf_store_iter_new (snap->shandle, &iter);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get new store "
+ "iter");
+ goto out;
+ }
+
+ ret = gf_store_iter_get_next (iter, &key, &value, &op_errno);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get next store "
+ "iter");
+ goto out;
+ }
+
+ while (!ret) {
+ gf_log (this->name, GF_LOG_DEBUG, "key = %s value = %s",
+ key, value);
+
+ if (!strncmp (key, GLUSTERD_STORE_KEY_SNAP_ID,
+ strlen (GLUSTERD_STORE_KEY_SNAP_ID))) {
+ ret = uuid_parse (value, snap->snap_id);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "Failed to parse uuid");
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_SNAP_RESTORED,
+ strlen (GLUSTERD_STORE_KEY_SNAP_RESTORED))) {
+ snap->snap_restored = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_SNAP_STATUS,
+ strlen (GLUSTERD_STORE_KEY_SNAP_STATUS))) {
+ snap->snap_status = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_SNAP_DESC,
+ strlen (GLUSTERD_STORE_KEY_SNAP_DESC))) {
+ snap->description = gf_strdup (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_SNAP_TIMESTAMP,
+ strlen (GLUSTERD_STORE_KEY_SNAP_TIMESTAMP))) {
+ snap->time_stamp = atoi (value);
+ }
+
+ GF_FREE (key);
+ GF_FREE (value);
+ key = NULL;
+ value = NULL;
+
+ ret = gf_store_iter_get_next (iter, &key, &value, &op_errno);
+ }
+
+ if (op_errno != GD_STORE_EOF)
+ goto out;
+
+ ret = gf_store_iter_destroy (iter);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to destroy store "
+ "iter");
+ }
+
+out:
+ return ret;
+}
+
+int32_t
+glusterd_store_retrieve_snap (char *snapname)
+{
+ int32_t ret = -1;
+ dict_t *dict = NULL;
+ glusterd_snap_t *snap = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ priv = this->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (snapname);
+
+ dict = dict_new();
+ if (!dict) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to create dict");
+ ret = -1;
+ goto out;
+ }
+
+ snap = glusterd_new_snap_object ();
+ if (!snap) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to create "
+ " snap object");
+ goto out;
+ }
+
+ strncpy (snap->snapname, snapname, strlen(snapname));
+ ret = glusterd_store_update_snap (snap);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to update snapshot "
+ "for %s snap", snapname);
+ goto out;
+ }
+
+ ret = glusterd_store_retrieve_volumes (this, snap);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to retrieve "
+ "snap volumes for snap %s", snapname);
+ goto out;
+ }
+
+ /* Unlike bricks of normal volumes which are resolved at the end of
+ the glusterd restore, the bricks belonging to the snap volumes of
+ each snap should be resolved as part of snapshot restore itself.
+ Because if the snapshot has to be removed, then resolving bricks
+ helps glusterd in understanding what all bricks have its own uuid
+ and killing those bricks.
+ */
+ ret = glusterd_resolve_snap_bricks (this, snap);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING, "resolving the snap bricks"
+ " failed (snap: %s)", snap?snap->snapname:"");
+
+ /* When the snapshot command from cli is received, the on disk and
+ in memory structures for the snapshot are created (with the status)
+ being marked as GD_SNAP_STATUS_INIT. Once the backend snapshot is
+ taken, the status is changed to GD_SNAP_STATUS_IN_USE. If glusterd
+ dies after taking the backend snapshot, but before updating the
+ status, then when glusterd comes up, it should treat that snapshot
+ as a failed snapshot and clean it up.
+ */
+ if (snap->snap_status != GD_SNAP_STATUS_IN_USE) {
+ ret = glusterd_snap_remove (dict, snap, _gf_true, _gf_true);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING, "failed to remove"
+ " the snapshot %s", snap->snapname);
+ goto out;
+ }
+
+ /* TODO: list_add_order can do 'N-square' comparisions and
+ is not efficient. Find a better solution to store the snap
+ in order */
+ list_add_order (&snap->snap_list, &priv->snapshots,
+ glusterd_compare_snap_time);
+
+out:
+ if (dict)
+ dict_unref (dict);
+
+ gf_log (this->name, GF_LOG_TRACE, "Returning with %d", ret);
+ return ret;
+}
+
+/* Read the missed_snap_list and update the in-memory structs */
+int32_t
+glusterd_store_retrieve_missed_snaps_list (xlator_t *this)
+{
+ char buf[PATH_MAX] = "";
+ char path[PATH_MAX] = "";
+ char *missed_node_info = NULL;
+ char *brick_path = NULL;
+ char *value = NULL;
+ char *save_ptr = NULL;
+ FILE *fp = NULL;
+ int32_t brick_num = -1;
+ int32_t snap_op = -1;
+ int32_t snap_status = -1;
+ int32_t ret = -1;
+ glusterd_conf_t *priv = NULL;
+ gf_store_op_errno_t store_errno = GD_STORE_SUCCESS;
+
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ /* Get the path of the missed_snap_list */
+ glusterd_store_missed_snaps_list_path_set (path, sizeof(path));
+
+ fp = fopen (path, "r");
+ if (!fp) {
+ /* If errno is ENOENT then there are no missed snaps yet */
+ if (errno != ENOENT) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to open %s. "
+ "Error: %s", path, strerror(errno));
+ } else {
+ gf_log (this->name, GF_LOG_INFO,
+ "No missed snaps list.");
+ ret = 0;
+ }
+ goto out;
+ }
+
+ do {
+ ret = gf_store_read_and_tokenize (fp, buf,
+ &missed_node_info, &value,
+ &store_errno);
+ if (ret) {
+ if (store_errno == GD_STORE_EOF) {
+ gf_log (this->name,
+ GF_LOG_DEBUG,
+ "EOF for missed_snap_list");
+ ret = 0;
+ break;
+ }
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to fetch data from "
+ "missed_snaps_list. Error: %s",
+ gf_store_strerror (store_errno));
+ goto out;
+ }
+
+ /* Fetch the brick_num, brick_path, snap_op and snap status */
+ brick_num = atoi(strtok_r (value, ":", &save_ptr));
+ brick_path = strtok_r (NULL, ":", &save_ptr);
+ snap_op = atoi(strtok_r (NULL, ":", &save_ptr));
+ snap_status = atoi(strtok_r (NULL, ":", &save_ptr));
+
+ if (!missed_node_info || !brick_path ||
+ brick_num < 1 || snap_op < 1 ||
+ snap_status < 1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Invalid missed_snap_entry");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_store_missed_snaps_list (missed_node_info,
+ brick_num,
+ brick_path,
+ snap_op,
+ snap_status);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to store missed snaps_list");
+ goto out;
+ }
+
+ } while (store_errno == GD_STORE_SUCCESS);
+
+ ret = 0;
+out:
+ gf_log (this->name, GF_LOG_TRACE, "Returning with %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_store_retrieve_snaps (xlator_t *this)
+{
+ int32_t ret = 0;
+ char path[PATH_MAX] = {0,};
+ glusterd_conf_t *priv = NULL;
+ DIR *dir = NULL;
+ struct dirent *entry = NULL;
+
+ GF_ASSERT (this);
+ priv = this->private;
+
+ GF_ASSERT (priv);
+
+ snprintf (path, PATH_MAX, "%s/snaps", priv->workdir);
+
+ dir = opendir (path);
+
+ if (!dir) {
+ /* If snaps dir doesn't exists ignore the error for
+ backward compatibility */
+ if (errno != ENOENT) {
+ ret = -1;
+ gf_log ("", GF_LOG_ERROR, "Unable to open dir %s", path);
+ }
+ goto out;
+ }
+
+ glusterd_for_each_entry (entry, dir);
+
+ while (entry) {
+ if (entry->d_type == DT_DIR) {
+ ret = glusterd_store_retrieve_snap (entry->d_name);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to restore snapshot: %s",
+ entry->d_name);
+ goto out;
+ }
+ }
+
+ glusterd_for_each_entry (entry, dir);
+ }
+
+ /* Retrieve missed_snaps_list */
+ ret = glusterd_store_retrieve_missed_snaps_list (this);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Failed to retrieve missed_snaps_list");
+ goto out;
+ }
+
+out:
+ if (dir)
+ closedir (dir);
+ gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+
+ return ret;
+}
+
+/* Writes all the contents of conf->missed_snap_list */
+int32_t
+glusterd_store_write_missed_snapinfo (int32_t fd)
+{
+ char value[PATH_MAX] = "";
+ int32_t ret = -1;
+ glusterd_conf_t *priv = NULL;
+ glusterd_missed_snap_info *missed_snapinfo = NULL;
+ glusterd_snap_op_t *snap_opinfo = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ /* Write the missed_snap_entry */
+ list_for_each_entry (missed_snapinfo, &priv->missed_snaps_list,
+ missed_snaps) {
+ list_for_each_entry (snap_opinfo,
+ &missed_snapinfo->snap_ops,
+ snap_ops_list) {
+ snprintf (value, sizeof(value), "%d:%s:%d:%d",
+ snap_opinfo->brick_num,
+ snap_opinfo->brick_path,
+ snap_opinfo->op, snap_opinfo->status);
+ ret = gf_store_save_value
+ (fd,
+ missed_snapinfo->node_snap_info,
+ value);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to write missed snapinfo");
+ goto out;
+ }
+ }
+ }
+
+ ret = 0;
+out:
+ gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+}
+
+/* Adds the missed snap entries to the in-memory conf->missed_snap_list *
+ * and writes them to disk */
+int32_t
+glusterd_store_update_missed_snaps (dict_t *dict, int32_t missed_snap_count)
+{
+ int32_t fd = -1;
+ int32_t ret = -1;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ if (missed_snap_count < 1) {
+ gf_log (this->name, GF_LOG_DEBUG, "No missed snaps");
+ ret = 0;
+ goto out;
+ }
+
+ ret = glusterd_store_create_missed_snaps_list_shandle_on_absence ();
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to obtain "
+ "missed_snaps_list store handle.");
+ goto out;
+ }
+
+ fd = gf_store_mkstemp (priv->missed_snaps_list_shandle);
+ if (fd <= 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to create tmp file");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_add_missed_snaps_to_list (dict, missed_snap_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to add missed snaps to list");
+ goto out;
+ }
+
+ ret = glusterd_store_write_missed_snapinfo (fd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to write missed snaps to disk");
+ goto out;
+ }
+
+ ret = gf_store_rename_tmppath (priv->missed_snaps_list_shandle);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to rename the tmp file");
+ goto out;
+ }
+out:
+ if (ret && (fd > 0)) {
+ ret = gf_store_unlink_tmppath (priv->missed_snaps_list_shandle);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to unlink the tmp file");
+ }
+ ret = -1;
+ }
+
+ if (fd > 0)
+ close (fd);
+
+ gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_store_delete_peerinfo (glusterd_peerinfo_t *peerinfo)
+{
+ int32_t ret = -1;
+ glusterd_conf_t *priv = NULL;
+ char peerdir[PATH_MAX] = {0,};
+ char filepath[PATH_MAX] = {0,};
+ char hostname_path[PATH_MAX] = {0,};
+
+
+ if (!peerinfo) {
+ ret = 0;
+ goto out;
+ }
+
+ priv = THIS->private;
+
+ snprintf (peerdir, PATH_MAX, "%s/peers", priv->workdir);
+
+
+ if (uuid_is_null (peerinfo->uuid)) {
+
+ if (peerinfo->hostname) {
+ snprintf (filepath, PATH_MAX, "%s/%s", peerdir,
+ peerinfo->hostname);
+ } else {
+ ret = 0;
+ goto out;
+ }
+ } else {
+
+ snprintf (filepath, PATH_MAX, "%s/%s", peerdir,
+ uuid_utoa (peerinfo->uuid));
+ snprintf (hostname_path, PATH_MAX, "%s/%s",
+ peerdir, peerinfo->hostname);
+
+ ret = unlink (hostname_path);
+
+ if (!ret)
+ goto out;
+ }
+
+ ret = unlink (filepath);
+ if (ret && (errno == ENOENT))
+ ret = 0;
+
+out:
+ if (peerinfo->shandle) {
+ gf_store_handle_destroy (peerinfo->shandle);
+ peerinfo->shandle = NULL;
+ }
+ gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+
+ return ret;
+}
+
+void
+glusterd_store_peerinfo_dirpath_set (char *path, size_t len)
+{
+ glusterd_conf_t *priv = NULL;
+ GF_ASSERT (path);
+ GF_ASSERT (len >= PATH_MAX);
+
+ priv = THIS->private;
+ snprintf (path, len, "%s/peers", priv->workdir);
+}
+
+int32_t
+glusterd_store_create_peer_dir ()
+{
+ int32_t ret = 0;
+ char path[PATH_MAX];
+
+ glusterd_store_peerinfo_dirpath_set (path, sizeof (path));
+ ret = gf_store_mkdir (path);
+
+ gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+ return ret;
+}
+
+static void
+glusterd_store_uuid_peerpath_set (glusterd_peerinfo_t *peerinfo, char *peerfpath,
+ size_t len)
+{
+ char peerdir[PATH_MAX];
+ char str[50] = {0};
+
+ GF_ASSERT (peerinfo);
+ GF_ASSERT (peerfpath);
+ GF_ASSERT (len >= PATH_MAX);
+
+ glusterd_store_peerinfo_dirpath_set (peerdir, sizeof (peerdir));
+ uuid_unparse (peerinfo->uuid, str);
+ snprintf (peerfpath, len, "%s/%s", peerdir, str);
+}
+
+static void
+glusterd_store_hostname_peerpath_set (glusterd_peerinfo_t *peerinfo,
+ char *peerfpath, size_t len)
+{
+ char peerdir[PATH_MAX];
+
+ GF_ASSERT (peerinfo);
+ GF_ASSERT (peerfpath);
+ GF_ASSERT (len >= PATH_MAX);
+
+ glusterd_store_peerinfo_dirpath_set (peerdir, sizeof (peerdir));
+ snprintf (peerfpath, len, "%s/%s", peerdir, peerinfo->hostname);
+}
+
+int32_t
+glusterd_store_peerinfo_hostname_shandle_create (glusterd_peerinfo_t *peerinfo)
+{
+ char peerfpath[PATH_MAX];
+ int32_t ret = -1;
+
+ glusterd_store_hostname_peerpath_set (peerinfo, peerfpath,
+ sizeof (peerfpath));
+ ret = gf_store_handle_create_on_absence (&peerinfo->shandle,
+ peerfpath);
+ return ret;
+}
+
+int32_t
+glusterd_store_peerinfo_uuid_shandle_create (glusterd_peerinfo_t *peerinfo)
+{
+ char peerfpath[PATH_MAX];
+ int32_t ret = -1;
+
+ glusterd_store_uuid_peerpath_set (peerinfo, peerfpath,
+ sizeof (peerfpath));
+ ret = gf_store_handle_create_on_absence (&peerinfo->shandle,
+ peerfpath);
+ return ret;
+}
+
+int32_t
+glusterd_peerinfo_hostname_shandle_check_destroy (glusterd_peerinfo_t *peerinfo)
+{
+ char peerfpath[PATH_MAX];
+ int32_t ret = -1;
+ struct stat stbuf = {0,};
+
+ glusterd_store_hostname_peerpath_set (peerinfo, peerfpath,
+ sizeof (peerfpath));
+ ret = stat (peerfpath, &stbuf);
+ if (!ret) {
+ if (peerinfo->shandle)
+ gf_store_handle_destroy (peerinfo->shandle);
+ peerinfo->shandle = NULL;
+ ret = unlink (peerfpath);
+ }
+ return ret;
+}
+
+int32_t
+glusterd_store_create_peer_shandle (glusterd_peerinfo_t *peerinfo)
+{
+ int32_t ret = 0;
+
+ GF_ASSERT (peerinfo);
+
+ if (glusterd_peerinfo_is_uuid_unknown (peerinfo)) {
+ ret = glusterd_store_peerinfo_hostname_shandle_create (peerinfo);
+ } else {
+ ret = glusterd_peerinfo_hostname_shandle_check_destroy (peerinfo);
+ ret = glusterd_store_peerinfo_uuid_shandle_create (peerinfo);
+ }
+ return ret;
+}
+
+int32_t
+glusterd_store_peer_write (int fd, glusterd_peerinfo_t *peerinfo)
+{
+ char buf[50] = {0};
+ int32_t ret = 0;
+
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_PEER_UUID,
+ uuid_utoa (peerinfo->uuid));
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d", peerinfo->state.state);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_PEER_STATE, buf);
+ if (ret)
+ goto out;
+
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_PEER_HOSTNAME "1",
+ peerinfo->hostname);
+ if (ret)
+ goto out;
+
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_store_perform_peer_store (glusterd_peerinfo_t *peerinfo)
+{
+ int fd = -1;
+ int32_t ret = -1;
+
+ GF_ASSERT (peerinfo);
+
+ fd = gf_store_mkstemp (peerinfo->shandle);
+ if (fd <= 0) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_store_peer_write (fd, peerinfo);
+ if (ret)
+ goto out;
+
+ ret = gf_store_rename_tmppath (peerinfo->shandle);
+out:
+ if (ret && (fd > 0))
+ gf_store_unlink_tmppath (peerinfo->shandle);
+ if (fd > 0)
+ close (fd);
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_store_peerinfo (glusterd_peerinfo_t *peerinfo)
+{
+ int32_t ret = -1;
+
+ GF_ASSERT (peerinfo);
+
+ ret = glusterd_store_create_peer_dir ();
+ if (ret)
+ goto out;
+
+ ret = glusterd_store_create_peer_shandle (peerinfo);
+ if (ret)
+ goto out;
+
+ ret = glusterd_store_perform_peer_store (peerinfo);
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_store_retrieve_peers (xlator_t *this)
+{
+ int32_t ret = 0;
+ glusterd_conf_t *priv = NULL;
+ DIR *dir = NULL;
+ struct dirent *entry = NULL;
+ char path[PATH_MAX] = {0,};
+ glusterd_peerinfo_t *peerinfo = NULL;
+ uuid_t uuid = {0,};
+ char *hostname = NULL;
+ int32_t state = 0;
+ gf_store_handle_t *shandle = NULL;
+ char filepath[PATH_MAX] = {0,};
+ gf_store_iter_t *iter = NULL;
+ char *key = NULL;
+ char *value = NULL;
+ glusterd_peerctx_args_t args = {0};
+ gf_store_op_errno_t op_errno = GD_STORE_SUCCESS;
+
+ GF_ASSERT (this);
+ priv = this->private;
+
+ GF_ASSERT (priv);
+
+ snprintf (path, PATH_MAX, "%s/%s", priv->workdir,
+ GLUSTERD_PEER_DIR_PREFIX);
+
+ dir = opendir (path);
+
+ if (!dir) {
+ gf_log ("", GF_LOG_ERROR, "Unable to open dir %s", path);
+ ret = -1;
+ goto out;
+ }
+
+ glusterd_for_each_entry (entry, dir);
+
+ while (entry) {
+ snprintf (filepath, PATH_MAX, "%s/%s", path, entry->d_name);
+ ret = gf_store_handle_retrieve (filepath, &shandle);
+ if (ret)
+ goto out;
+
+ ret = gf_store_iter_new (shandle, &iter);
+ if (ret)
+ goto out;
+
+ ret = gf_store_iter_get_next (iter, &key, &value, &op_errno);
+ if (ret)
+ goto out;
+
+ while (!ret) {
+
+ if (!strncmp (GLUSTERD_STORE_KEY_PEER_UUID, key,
+ strlen (GLUSTERD_STORE_KEY_PEER_UUID))) {
+ if (value)
+ uuid_parse (value, uuid);
+ } else if (!strncmp (GLUSTERD_STORE_KEY_PEER_STATE,
+ key,
+ strlen (GLUSTERD_STORE_KEY_PEER_STATE))) {
+ state = atoi (value);
+ } else if (!strncmp (GLUSTERD_STORE_KEY_PEER_HOSTNAME,
+ key,
+ strlen (GLUSTERD_STORE_KEY_PEER_HOSTNAME))) {
+ hostname = gf_strdup (value);
+ } else {
+ gf_log ("", GF_LOG_ERROR, "Unknown key: %s",
+ key);
+ }
+
+ GF_FREE (key);
+ GF_FREE (value);
+ key = NULL;
+ value = NULL;
+
+ ret = gf_store_iter_get_next (iter, &key, &value,
+ &op_errno);
+ }
+ if (op_errno != GD_STORE_EOF)
+ goto out;
+
+ (void) gf_store_iter_destroy (iter);
+
+ ret = glusterd_friend_add (hostname, 0, state, &uuid,
+ &peerinfo, 1, NULL);
+
+ GF_FREE (hostname);
+ if (ret)
+ goto out;
+
+ peerinfo->shandle = shandle;
+ glusterd_for_each_entry (entry, dir);
+ }
+
+ args.mode = GD_MODE_ON;
+ list_for_each_entry (peerinfo, &priv->peers, uuid_list) {
+ ret = glusterd_friend_rpc_create (this, peerinfo, &args);
+ if (ret)
+ goto out;
+ }
+
+out:
+ if (dir)
+ closedir (dir);
+ gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+
+ return ret;
+}
+
+int32_t
+glusterd_resolve_all_bricks (xlator_t *this)
+{
+ int32_t ret = 0;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+
+ GF_ASSERT (this);
+ priv = this->private;
+
+ GF_ASSERT (priv);
+
+ list_for_each_entry (volinfo, &priv->volumes, vol_list) {
+ list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ ret = glusterd_resolve_brick (brickinfo);
+ if (ret) {
+ gf_log ("glusterd", GF_LOG_ERROR,
+ "resolve brick failed in restore");
+ goto out;
+ }
+ }
+ }
+
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+
+ return ret;
+}
+
+int32_t
+glusterd_restore ()
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ ret = glusterd_restore_op_version (this);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to restore op_version");
+ goto out;
+ }
+
+ ret = glusterd_store_retrieve_volumes (this, NULL);
+ if (ret)
+ goto out;
+
+ ret = glusterd_store_retrieve_snaps (this);
+ if (ret)
+ goto out;
+
+ ret = glusterd_store_retrieve_peers (this);
+ if (ret)
+ goto out;
+
+ ret = glusterd_resolve_all_bricks (this);
+ if (ret)
+ goto out;
+
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h
new file mode 100644
index 000000000..1b5cebc0c
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-store.h
@@ -0,0 +1,164 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _GLUSTERD_HA_H_
+#define _GLUSTERD_HA_H_
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <pthread.h>
+#include "uuid.h"
+
+#include "glusterfs.h"
+#include "xlator.h"
+#include "run.h"
+#include "logging.h"
+#include "call-stub.h"
+#include "fd.h"
+#include "byte-order.h"
+#include "glusterd.h"
+#include "rpcsvc.h"
+
+typedef enum glusterd_store_ver_ac_{
+ GLUSTERD_VOLINFO_VER_AC_NONE = 0,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT = 1,
+ GLUSTERD_VOLINFO_VER_AC_DECREMENT = 2,
+} glusterd_volinfo_ver_ac_t;
+
+
+#define GLUSTERD_STORE_UUID_KEY "UUID"
+
+#define GLUSTERD_STORE_KEY_VOL_TYPE "type"
+#define GLUSTERD_STORE_KEY_VOL_COUNT "count"
+#define GLUSTERD_STORE_KEY_VOL_STATUS "status"
+#define GLUSTERD_STORE_KEY_VOL_PORT "port"
+#define GLUSTERD_STORE_KEY_VOL_SUB_COUNT "sub_count"
+#define GLUSTERD_STORE_KEY_VOL_STRIPE_CNT "stripe_count"
+#define GLUSTERD_STORE_KEY_VOL_REPLICA_CNT "replica_count"
+#define GLUSTERD_STORE_KEY_VOL_BRICK "brick"
+#define GLUSTERD_STORE_KEY_VOL_VERSION "version"
+#define GLUSTERD_STORE_KEY_VOL_TRANSPORT "transport-type"
+#define GLUSTERD_STORE_KEY_VOL_ID "volume-id"
+#define GLUSTERD_STORE_KEY_VOL_IS_RESTORED "is-volume-restored"
+#define GLUSTERD_STORE_KEY_RB_STATUS "rb_status"
+#define GLUSTERD_STORE_KEY_RB_SRC_BRICK "rb_src"
+#define GLUSTERD_STORE_KEY_RB_DST_BRICK "rb_dst"
+#define GLUSTERD_STORE_KEY_RB_DST_PORT "rb_port"
+#define GLUSTERD_STORE_KEY_VOL_DEFRAG "rebalance_status"
+#define GLUSTERD_STORE_KEY_DEFRAG_OP "rebalance_op"
+#define GLUSTERD_STORE_KEY_USERNAME "username"
+#define GLUSTERD_STORE_KEY_PASSWORD "password"
+#define GLUSTERD_STORE_KEY_PARENT_VOLNAME "parent_volname"
+#define GLUSTERD_STORE_KEY_VOL_OP_VERSION "op-version"
+#define GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION "client-op-version"
+
+#define GLUSTERD_STORE_KEY_SNAP_NAME "name"
+#define GLUSTERD_STORE_KEY_SNAP_ID "snap-id"
+#define GLUSTERD_STORE_KEY_SNAP_DESC "desc"
+#define GLUSTERD_STORE_KEY_SNAP_TIMESTAMP "time-stamp"
+#define GLUSTERD_STORE_KEY_SNAP_STATUS "status"
+#define GLUSTERD_STORE_KEY_SNAP_RESTORED "snap-restored"
+#define GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT "snap-max-hard-limit"
+#define GLUSTERD_STORE_KEY_SNAP_MAX_SOFT_LIMIT "snap-max-soft-limit"
+
+#define GLUSTERD_STORE_KEY_BRICK_HOSTNAME "hostname"
+#define GLUSTERD_STORE_KEY_BRICK_PATH "path"
+#define GLUSTERD_STORE_KEY_BRICK_PORT "listen-port"
+#define GLUSTERD_STORE_KEY_BRICK_RDMA_PORT "rdma.listen-port"
+#define GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED "decommissioned"
+#define GLUSTERD_STORE_KEY_BRICK_VGNAME "vg"
+#define GLUSTERD_STORE_KEY_BRICK_DEVICE_PATH "device_path"
+#define GLUSTERD_STORE_KEY_BRICK_SNAP_STATUS "snap-status"
+
+#define GLUSTERD_STORE_KEY_PEER_UUID "uuid"
+#define GLUSTERD_STORE_KEY_PEER_HOSTNAME "hostname"
+#define GLUSTERD_STORE_KEY_PEER_STATE "state"
+
+#define GLUSTERD_STORE_KEY_VOL_CAPS "caps"
+
+#define glusterd_for_each_entry(entry, dir) \
+ do {\
+ entry = NULL;\
+ if (dir) {\
+ entry = readdir (dir);\
+ while (entry && (!strcmp (entry->d_name, ".") ||\
+ !fnmatch ("*.tmp", entry->d_name, 0) ||\
+ !strcmp (entry->d_name, ".."))) {\
+ entry = readdir (dir);\
+ }\
+ }\
+ } while (0); \
+
+
+int32_t
+glusterd_store_volinfo (glusterd_volinfo_t *volinfo, glusterd_volinfo_ver_ac_t ac);
+
+int32_t
+glusterd_store_delete_volume (glusterd_volinfo_t *volinfo);
+
+int32_t
+glusterd_store_delete_snap (glusterd_snap_t *snap);
+
+int32_t
+glusterd_retrieve_uuid ();
+
+int32_t
+glusterd_store_peerinfo (glusterd_peerinfo_t *peerinfo);
+
+int32_t
+glusterd_store_delete_peerinfo (glusterd_peerinfo_t *peerinfo);
+
+int32_t
+glusterd_store_delete_brick (glusterd_brickinfo_t *brickinfo,
+ char *delete_path);
+
+int32_t
+glusterd_restore ();
+
+void
+glusterd_perform_volinfo_version_action (glusterd_volinfo_t *volinfo,
+ glusterd_volinfo_ver_ac_t ac);
+gf_boolean_t
+glusterd_store_is_valid_brickpath (char *volname, char *brick);
+
+int32_t
+glusterd_store_perform_node_state_store (glusterd_volinfo_t *volinfo);
+
+int
+glusterd_retrieve_op_version (xlator_t *this, int *op_version);
+
+int
+glusterd_store_global_info (xlator_t *this);
+
+int32_t
+glusterd_store_retrieve_options (xlator_t *this);
+
+int32_t
+glusterd_store_retrieve_bricks (glusterd_volinfo_t *volinfo);
+
+int32_t
+glusterd_store_options (xlator_t *this, dict_t *opts);
+
+void
+glusterd_replace_slash_with_hyphen (char *str);
+
+int32_t
+glusterd_store_perform_volume_store (glusterd_volinfo_t *volinfo);
+
+int32_t
+glusterd_store_snap (glusterd_snap_t *snap);
+
+int32_t
+glusterd_store_update_missed_snaps (dict_t *dict,
+ int32_t missed_snap_count);
+
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c
new file mode 100644
index 000000000..438df8266
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c
@@ -0,0 +1,1639 @@
+/*
+ Copyright (c) 2012-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+/* rpc related syncops */
+#include "rpc-clnt.h"
+#include "protocol-common.h"
+#include "xdr-generic.h"
+#include "glusterd1-xdr.h"
+#include "glusterd-syncop.h"
+
+#include "glusterd.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-utils.h"
+#include "glusterd-locks.h"
+
+extern glusterd_op_info_t opinfo;
+
+void
+gd_synctask_barrier_wait (struct syncargs *args, int count)
+{
+ glusterd_conf_t *conf = THIS->private;
+
+ synclock_unlock (&conf->big_lock);
+ synctask_barrier_wait (args, count);
+ synclock_lock (&conf->big_lock);
+
+ syncbarrier_destroy (&args->barrier);
+}
+
+static void
+gd_mgmt_v3_collate_errors (struct syncargs *args, int op_ret, int op_errno,
+ char *op_errstr, int op_code,
+ glusterd_peerinfo_t *peerinfo, u_char *uuid)
+{
+ char err_str[PATH_MAX] = "Please check log file for details.";
+ char op_err[PATH_MAX] = "";
+ int len = -1;
+ char *peer_str = NULL;
+
+ if (op_ret) {
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+
+ if (peerinfo)
+ peer_str = peerinfo->hostname;
+ else
+ peer_str = uuid_utoa (uuid);
+
+ if (op_errstr && strcmp (op_errstr, "")) {
+ len = snprintf (err_str, sizeof(err_str) - 1,
+ "Error: %s", op_errstr);
+ err_str[len] = '\0';
+ }
+
+ switch (op_code){
+ case GLUSTERD_MGMT_V3_LOCK:
+ {
+ len = snprintf (op_err, sizeof(op_err) - 1,
+ "Locking failed "
+ "on %s. %s", peer_str, err_str);
+ break;
+ }
+ case GLUSTERD_MGMT_V3_UNLOCK:
+ {
+ len = snprintf (op_err, sizeof(op_err) - 1,
+ "Unlocking failed "
+ "on %s. %s", peer_str, err_str);
+ break;
+ }
+ }
+ op_err[len] = '\0';
+
+ if (args->errstr) {
+ len = snprintf (err_str, sizeof(err_str) - 1,
+ "%s\n%s", args->errstr,
+ op_err);
+ GF_FREE (args->errstr);
+ args->errstr = NULL;
+ } else
+ len = snprintf (err_str, sizeof(err_str) - 1,
+ "%s", op_err);
+ err_str[len] = '\0';
+
+ gf_log ("", GF_LOG_ERROR, "%s", op_err);
+ args->errstr = gf_strdup (err_str);
+ }
+
+ return;
+}
+
+static void
+gd_collate_errors (struct syncargs *args, int op_ret, int op_errno,
+ char *op_errstr, int op_code,
+ glusterd_peerinfo_t *peerinfo, u_char *uuid)
+{
+ char err_str[PATH_MAX] = "Please check log file for details.";
+ char op_err[PATH_MAX] = "";
+ int len = -1;
+ char *peer_str = NULL;
+
+ if (op_ret) {
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+
+ if (peerinfo)
+ peer_str = peerinfo->hostname;
+ else
+ peer_str = uuid_utoa (uuid);
+
+ if (op_errstr && strcmp (op_errstr, "")) {
+ len = snprintf (err_str, sizeof(err_str) - 1,
+ "Error: %s", op_errstr);
+ err_str[len] = '\0';
+ }
+
+ switch (op_code){
+ case GLUSTERD_MGMT_CLUSTER_UNLOCK :
+ {
+ len = snprintf (op_err, sizeof(op_err) - 1,
+ "Unlocking failed on %s. %s",
+ peer_str, err_str);
+ break;
+ }
+ case GLUSTERD_MGMT_STAGE_OP :
+ {
+ len = snprintf (op_err, sizeof(op_err) - 1,
+ "Staging failed on %s. %s",
+ peer_str, err_str);
+ break;
+ }
+ case GLUSTERD_MGMT_COMMIT_OP :
+ {
+ len = snprintf (op_err, sizeof(op_err) - 1,
+ "Commit failed on %s. %s",
+ peer_str, err_str);
+ break;
+ }
+ }
+ op_err[len] = '\0';
+
+ if (args->errstr) {
+ len = snprintf (err_str, sizeof(err_str) - 1,
+ "%s\n%s", args->errstr,
+ op_err);
+ GF_FREE (args->errstr);
+ args->errstr = NULL;
+ } else
+ len = snprintf (err_str, sizeof(err_str) - 1,
+ "%s", op_err);
+ err_str[len] = '\0';
+
+ gf_log ("", GF_LOG_ERROR, "%s", op_err);
+ args->errstr = gf_strdup (err_str);
+ }
+
+ return;
+}
+
+void
+gd_syncargs_init (struct syncargs *args, dict_t *op_ctx)
+{
+ args->dict = op_ctx;
+ pthread_mutex_init (&args->lock_dict, NULL);
+}
+
+static void
+gd_stage_op_req_free (gd1_mgmt_stage_op_req *req)
+{
+ if (!req)
+ return;
+
+ GF_FREE (req->buf.buf_val);
+ GF_FREE (req);
+}
+
+static void
+gd_commit_op_req_free (gd1_mgmt_commit_op_req *req)
+{
+ if (!req)
+ return;
+
+ GF_FREE (req->buf.buf_val);
+ GF_FREE (req);
+}
+
+static void
+gd_brick_op_req_free (gd1_mgmt_brick_op_req *req)
+{
+ if (!req)
+ return;
+
+ if (strcmp (req->name, "") != 0)
+ GF_FREE (req->name);
+ GF_FREE (req->input.input_val);
+ GF_FREE (req);
+}
+
+int
+gd_syncop_submit_request (struct rpc_clnt *rpc, void *req, void *local,
+ void *cookie, rpc_clnt_prog_t *prog, int procnum,
+ fop_cbk_fn_t cbkfn, xdrproc_t xdrproc)
+{
+ int ret = -1;
+ struct iobuf *iobuf = NULL;
+ struct iobref *iobref = NULL;
+ int count = 0;
+ struct iovec iov = {0, };
+ ssize_t req_size = 0;
+ call_frame_t *frame = NULL;
+
+ GF_ASSERT (rpc);
+ if (!req)
+ goto out;
+
+ req_size = xdr_sizeof (xdrproc, req);
+ iobuf = iobuf_get2 (rpc->ctx->iobuf_pool, req_size);
+ if (!iobuf)
+ goto out;
+
+ iobref = iobref_new ();
+ if (!iobref)
+ goto out;
+
+ frame = create_frame (THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+
+ iobref_add (iobref, iobuf);
+
+ iov.iov_base = iobuf->ptr;
+ iov.iov_len = iobuf_pagesize (iobuf);
+
+ /* Create the xdr payload */
+ ret = xdr_serialize_generic (iov, req, xdrproc);
+ if (ret == -1)
+ goto out;
+
+ iov.iov_len = ret;
+ count = 1;
+
+ frame->local = local;
+ frame->cookie = cookie;
+
+ /* Send the msg */
+ ret = rpc_clnt_submit (rpc, prog, procnum, cbkfn,
+ &iov, count, NULL, 0, iobref,
+ frame, NULL, 0, NULL, 0, NULL);
+
+ /* TODO: do we need to start ping also? */
+
+out:
+ iobref_unref (iobref);
+ iobuf_unref (iobuf);
+
+ return ret;
+}
+
+/* Defined in glusterd-rpc-ops.c */
+extern struct rpc_clnt_program gd_mgmt_prog;
+extern struct rpc_clnt_program gd_brick_prog;
+extern struct rpc_clnt_program gd_mgmt_v3_prog;
+
+int
+glusterd_syncop_aggr_rsp_dict (glusterd_op_t op, dict_t *aggr, dict_t *rsp)
+{
+ int ret = 0;
+
+ switch (op) {
+ case GD_OP_REPLACE_BRICK:
+ ret = glusterd_rb_use_rsp_dict (aggr, rsp);
+ if (ret)
+ goto out;
+ break;
+
+ case GD_OP_SYNC_VOLUME:
+ ret = glusterd_sync_use_rsp_dict (aggr, rsp);
+ if (ret)
+ goto out;
+ break;
+
+ case GD_OP_PROFILE_VOLUME:
+ ret = glusterd_profile_volume_use_rsp_dict (aggr, rsp);
+ if (ret)
+ goto out;
+ break;
+
+ case GD_OP_GSYNC_CREATE:
+ break;
+
+ case GD_OP_GSYNC_SET:
+ ret = glusterd_gsync_use_rsp_dict (aggr, rsp, NULL);
+ if (ret)
+ goto out;
+ break;
+
+ case GD_OP_STATUS_VOLUME:
+ ret = glusterd_volume_status_copy_to_op_ctx_dict (aggr, rsp);
+ if (ret)
+ goto out;
+ break;
+
+ case GD_OP_REBALANCE:
+ case GD_OP_DEFRAG_BRICK_VOLUME:
+ ret = glusterd_volume_rebalance_use_rsp_dict (aggr, rsp);
+ if (ret)
+ goto out;
+ break;
+
+ case GD_OP_HEAL_VOLUME:
+ ret = glusterd_volume_heal_use_rsp_dict (aggr, rsp);
+ if (ret)
+ goto out;
+
+ break;
+
+ case GD_OP_QUOTA:
+ case GD_OP_CLEARLOCKS_VOLUME:
+ ret = glusterd_use_rsp_dict (aggr, rsp);
+ if (ret)
+ goto out;
+
+ break;
+
+ case GD_OP_SYS_EXEC:
+ ret = glusterd_sys_exec_output_rsp_dict (aggr, rsp);
+ if (ret)
+ goto out;
+ break;
+
+ case GD_OP_SNAP:
+ ret = glusterd_snap_use_rsp_dict (aggr, rsp);
+ if (ret)
+ goto out;
+ break;
+
+ default:
+ break;
+ }
+out:
+ return ret;
+}
+
+int32_t
+_gd_syncop_mgmt_lock_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int ret = -1;
+ struct syncargs *args = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ gd1_mgmt_cluster_lock_rsp rsp = {{0},};
+ call_frame_t *frame = NULL;
+ int op_ret = -1;
+ int op_errno = -1;
+
+ frame = myframe;
+ args = frame->local;
+ peerinfo = frame->cookie;
+ frame->local = NULL;
+ frame->cookie = NULL;
+
+ if (-1 == req->rpc_status) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp,
+ (xdrproc_t)xdr_gd1_mgmt_cluster_lock_rsp);
+ if (ret < 0)
+ goto out;
+
+ uuid_copy (args->uuid, rsp.uuid);
+
+ /* Set peer as locked, so we unlock only the locked peers */
+ if (rsp.op_ret == 0)
+ peerinfo->locked = _gf_true;
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+out:
+ gd_collate_errors (args, op_ret, op_errno, NULL,
+ GLUSTERD_MGMT_CLUSTER_LOCK, peerinfo, rsp.uuid);
+ STACK_DESTROY (frame->root);
+ synctask_barrier_wake(args);
+ return 0;
+}
+
+int32_t
+gd_syncop_mgmt_lock_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ _gd_syncop_mgmt_lock_cbk);
+}
+
+int
+gd_syncop_mgmt_lock (glusterd_peerinfo_t *peerinfo, struct syncargs *args,
+ uuid_t my_uuid, uuid_t recv_uuid)
+{
+ int ret = -1;
+ gd1_mgmt_cluster_lock_req req = {{0},};
+ glusterd_conf_t *conf = THIS->private;
+
+ uuid_copy (req.uuid, my_uuid);
+ synclock_unlock (&conf->big_lock);
+ ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo,
+ &gd_mgmt_prog,
+ GLUSTERD_MGMT_CLUSTER_LOCK,
+ gd_syncop_mgmt_lock_cbk,
+ (xdrproc_t) xdr_gd1_mgmt_cluster_lock_req);
+ synclock_lock (&conf->big_lock);
+ return ret;
+}
+
+int32_t
+gd_syncop_mgmt_v3_lock_cbk_fn (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int ret = -1;
+ struct syncargs *args = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ gd1_mgmt_v3_lock_rsp rsp = {{0},};
+ call_frame_t *frame = NULL;
+ int op_ret = -1;
+ int op_errno = -1;
+
+ GF_ASSERT(req);
+ GF_ASSERT(iov);
+ GF_ASSERT(myframe);
+
+ frame = myframe;
+ args = frame->local;
+ peerinfo = frame->cookie;
+ frame->local = NULL;
+ frame->cookie = NULL;
+
+ if (-1 == req->rpc_status) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp,
+ (xdrproc_t)xdr_gd1_mgmt_v3_lock_rsp);
+ if (ret < 0)
+ goto out;
+
+ uuid_copy (args->uuid, rsp.uuid);
+
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+out:
+ gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL,
+ GLUSTERD_MGMT_V3_LOCK,
+ peerinfo, rsp.uuid);
+ STACK_DESTROY (frame->root);
+ synctask_barrier_wake(args);
+ return 0;
+}
+
+int32_t
+gd_syncop_mgmt_v3_lock_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ gd_syncop_mgmt_v3_lock_cbk_fn);
+}
+
+int
+gd_syncop_mgmt_v3_lock (glusterd_op_t op, dict_t *op_ctx,
+ glusterd_peerinfo_t *peerinfo,
+ struct syncargs *args, uuid_t my_uuid,
+ uuid_t recv_uuid, uuid_t txn_id)
+{
+ int ret = -1;
+ gd1_mgmt_v3_lock_req req = {{0},};
+ glusterd_conf_t *conf = THIS->private;
+
+ GF_ASSERT(op_ctx);
+ GF_ASSERT(peerinfo);
+ GF_ASSERT(args);
+
+ ret = dict_allocate_and_serialize (op_ctx,
+ &req.dict.dict_val,
+ &req.dict.dict_len);
+ if (ret)
+ goto out;
+
+ uuid_copy (req.uuid, my_uuid);
+ uuid_copy (req.txn_id, txn_id);
+ req.op = op;
+ synclock_unlock (&conf->big_lock);
+ ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo,
+ &gd_mgmt_v3_prog,
+ GLUSTERD_MGMT_V3_LOCK,
+ gd_syncop_mgmt_v3_lock_cbk,
+ (xdrproc_t) xdr_gd1_mgmt_v3_lock_req);
+ synclock_lock (&conf->big_lock);
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+gd_syncop_mgmt_v3_unlock_cbk_fn (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int ret = -1;
+ struct syncargs *args = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ gd1_mgmt_v3_unlock_rsp rsp = {{0},};
+ call_frame_t *frame = NULL;
+ int op_ret = -1;
+ int op_errno = -1;
+
+ GF_ASSERT(req);
+ GF_ASSERT(iov);
+ GF_ASSERT(myframe);
+
+ frame = myframe;
+ args = frame->local;
+ peerinfo = frame->cookie;
+ frame->local = NULL;
+ frame->cookie = NULL;
+
+ if (-1 == req->rpc_status) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp,
+ (xdrproc_t)xdr_gd1_mgmt_v3_unlock_rsp);
+ if (ret < 0)
+ goto out;
+
+ uuid_copy (args->uuid, rsp.uuid);
+
+ /* Set peer as locked, so we unlock only the locked peers */
+ if (rsp.op_ret == 0)
+ peerinfo->locked = _gf_true;
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+out:
+ gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL,
+ GLUSTERD_MGMT_V3_UNLOCK,
+ peerinfo, rsp.uuid);
+ STACK_DESTROY (frame->root);
+ synctask_barrier_wake(args);
+ return 0;
+}
+
+int32_t
+gd_syncop_mgmt_v3_unlock_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ gd_syncop_mgmt_v3_unlock_cbk_fn);
+}
+
+int
+gd_syncop_mgmt_v3_unlock (dict_t *op_ctx, glusterd_peerinfo_t *peerinfo,
+ struct syncargs *args, uuid_t my_uuid,
+ uuid_t recv_uuid, uuid_t txn_id)
+{
+ int ret = -1;
+ gd1_mgmt_v3_unlock_req req = {{0},};
+ glusterd_conf_t *conf = THIS->private;
+
+ GF_ASSERT(op_ctx);
+ GF_ASSERT(peerinfo);
+ GF_ASSERT(args);
+
+ ret = dict_allocate_and_serialize (op_ctx,
+ &req.dict.dict_val,
+ &req.dict.dict_len);
+ if (ret)
+ goto out;
+
+ uuid_copy (req.uuid, my_uuid);
+ uuid_copy (req.txn_id, txn_id);
+ synclock_unlock (&conf->big_lock);
+ ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo,
+ &gd_mgmt_v3_prog,
+ GLUSTERD_MGMT_V3_UNLOCK,
+ gd_syncop_mgmt_v3_unlock_cbk,
+ (xdrproc_t) xdr_gd1_mgmt_v3_unlock_req);
+ synclock_lock (&conf->big_lock);
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+_gd_syncop_mgmt_unlock_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int ret = -1;
+ struct syncargs *args = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ gd1_mgmt_cluster_unlock_rsp rsp = {{0},};
+ call_frame_t *frame = NULL;
+ int op_ret = -1;
+ int op_errno = -1;
+
+ frame = myframe;
+ args = frame->local;
+ peerinfo = frame->cookie;
+ frame->local = NULL;
+
+ if (-1 == req->rpc_status) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp,
+ (xdrproc_t)xdr_gd1_mgmt_cluster_unlock_rsp);
+ if (ret < 0)
+ goto out;
+
+ uuid_copy (args->uuid, rsp.uuid);
+
+ peerinfo->locked = _gf_false;
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+out:
+ gd_collate_errors (args, op_ret, op_errno, NULL,
+ GLUSTERD_MGMT_CLUSTER_UNLOCK, peerinfo, rsp.uuid);
+ STACK_DESTROY (frame->root);
+ synctask_barrier_wake(args);
+ return 0;
+}
+
+int32_t
+gd_syncop_mgmt_unlock_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ _gd_syncop_mgmt_unlock_cbk);
+}
+
+
+int
+gd_syncop_mgmt_unlock (glusterd_peerinfo_t *peerinfo, struct syncargs *args,
+ uuid_t my_uuid, uuid_t recv_uuid)
+{
+ int ret = -1;
+ gd1_mgmt_cluster_unlock_req req = {{0},};
+ glusterd_conf_t *conf = THIS->private;
+
+ uuid_copy (req.uuid, my_uuid);
+ synclock_unlock (&conf->big_lock);
+ ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo,
+ &gd_mgmt_prog,
+ GLUSTERD_MGMT_CLUSTER_UNLOCK,
+ gd_syncop_mgmt_unlock_cbk,
+ (xdrproc_t) xdr_gd1_mgmt_cluster_lock_req);
+ synclock_lock (&conf->big_lock);
+ return ret;
+}
+
+int32_t
+_gd_syncop_stage_op_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int ret = -1;
+ gd1_mgmt_stage_op_rsp rsp = {{0},};
+ struct syncargs *args = NULL;
+ xlator_t *this = NULL;
+ dict_t *rsp_dict = NULL;
+ call_frame_t *frame = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ int op_ret = -1;
+ int op_errno = -1;
+
+ this = THIS;
+ frame = myframe;
+ args = frame->local;
+ frame->local = NULL;
+
+ if (-1 == req->rpc_status) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp,
+ (xdrproc_t)xdr_gd1_mgmt_stage_op_rsp);
+ if (ret < 0)
+ goto out;
+
+ if (rsp.dict.dict_len) {
+ /* Unserialize the dictionary */
+ rsp_dict = dict_new ();
+
+ ret = dict_unserialize (rsp.dict.dict_val,
+ rsp.dict.dict_len,
+ &rsp_dict);
+ if (ret < 0) {
+ GF_FREE (rsp.dict.dict_val);
+ goto out;
+ } else {
+ rsp_dict->extra_stdfree = rsp.dict.dict_val;
+ }
+ }
+
+ ret = glusterd_friend_find (rsp.uuid, NULL, &peerinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_CRITICAL, "Staging response "
+ "for 'Volume %s' received from unknown "
+ "peer: %s", gd_op_list[rsp.op],
+ uuid_utoa (rsp.uuid));
+ goto out;
+ }
+
+ uuid_copy (args->uuid, rsp.uuid);
+ if (rsp.op == GD_OP_REPLACE_BRICK) {
+ pthread_mutex_lock (&args->lock_dict);
+ {
+ ret = glusterd_syncop_aggr_rsp_dict (rsp.op, args->dict,
+ rsp_dict);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "%s",
+ "Failed to aggregate response from "
+ " node/brick");
+ }
+ pthread_mutex_unlock (&args->lock_dict);
+ }
+
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+
+out:
+ gd_collate_errors (args, op_ret, op_errno, rsp.op_errstr,
+ GLUSTERD_MGMT_STAGE_OP, peerinfo, rsp.uuid);
+
+ if (rsp_dict)
+ dict_unref (rsp_dict);
+
+ STACK_DESTROY (frame->root);
+ synctask_barrier_wake(args);
+ return 0;
+}
+
+int32_t
+gd_syncop_stage_op_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ _gd_syncop_stage_op_cbk);
+}
+
+
+int
+gd_syncop_mgmt_stage_op (struct rpc_clnt *rpc, struct syncargs *args,
+ uuid_t my_uuid, uuid_t recv_uuid, int op,
+ dict_t *dict_out, dict_t *op_ctx)
+{
+ gd1_mgmt_stage_op_req *req = NULL;
+ glusterd_conf_t *conf = THIS->private;
+ int ret = -1;
+
+ req = GF_CALLOC (1, sizeof (*req), gf_gld_mt_mop_stage_req_t);
+ if (!req)
+ goto out;
+
+ uuid_copy (req->uuid, my_uuid);
+ req->op = op;
+
+ ret = dict_allocate_and_serialize (dict_out,
+ &req->buf.buf_val, &req->buf.buf_len);
+ if (ret)
+ goto out;
+
+ synclock_unlock (&conf->big_lock);
+ ret = gd_syncop_submit_request (rpc, req, args, NULL, &gd_mgmt_prog,
+ GLUSTERD_MGMT_STAGE_OP,
+ gd_syncop_stage_op_cbk,
+ (xdrproc_t) xdr_gd1_mgmt_stage_op_req);
+ synclock_lock (&conf->big_lock);
+out:
+ gd_stage_op_req_free (req);
+ return ret;
+
+}
+
+int32_t
+_gd_syncop_brick_op_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ struct syncargs *args = NULL;
+ gd1_mgmt_brick_op_rsp rsp = {0,};
+ int ret = -1;
+ call_frame_t *frame = NULL;
+
+ frame = myframe;
+ args = frame->local;
+ frame->local = NULL;
+
+ /* initialize */
+ args->op_ret = -1;
+ args->op_errno = EINVAL;
+
+ if (-1 == req->rpc_status) {
+ args->op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
+ if (ret < 0)
+ goto out;
+
+ if (rsp.output.output_len) {
+ args->dict = dict_new ();
+ if (!args->dict) {
+ ret = -1;
+ args->op_errno = ENOMEM;
+ goto out;
+ }
+
+ ret = dict_unserialize (rsp.output.output_val,
+ rsp.output.output_len,
+ &args->dict);
+ if (ret < 0)
+ goto out;
+ }
+
+ args->op_ret = rsp.op_ret;
+ args->op_errno = rsp.op_errno;
+ args->errstr = gf_strdup (rsp.op_errstr);
+
+out:
+ if ((rsp.op_errstr) && (strcmp (rsp.op_errstr, "") != 0))
+ free (rsp.op_errstr);
+ free (rsp.output.output_val);
+
+ STACK_DESTROY (frame->root);
+ __wake (args);
+
+ return 0;
+}
+
+int32_t
+gd_syncop_brick_op_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ _gd_syncop_brick_op_cbk);
+}
+
+int
+gd_syncop_mgmt_brick_op (struct rpc_clnt *rpc, glusterd_pending_node_t *pnode,
+ int op, dict_t *dict_out, dict_t *op_ctx,
+ char **errstr)
+{
+ struct syncargs args = {0, };
+ gd1_mgmt_brick_op_req *req = NULL;
+ int ret = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ args.op_ret = -1;
+ args.op_errno = ENOTCONN;
+
+ if ((pnode->type == GD_NODE_NFS) ||
+ ((pnode->type == GD_NODE_SHD) &&
+ (op == GD_OP_STATUS_VOLUME))) {
+ ret = glusterd_node_op_build_payload
+ (op, &req, dict_out);
+
+ } else {
+ ret = glusterd_brick_op_build_payload
+ (op, pnode->node, &req, dict_out);
+
+ }
+
+ if (ret)
+ goto out;
+
+ GD_SYNCOP (rpc, (&args), NULL, gd_syncop_brick_op_cbk, req,
+ &gd_brick_prog, req->op, xdr_gd1_mgmt_brick_op_req);
+
+ if (args.errstr) {
+ if ((strlen(args.errstr) > 0) && errstr)
+ *errstr = args.errstr;
+ else
+ GF_FREE (args.errstr);
+ }
+
+ if (GD_OP_STATUS_VOLUME == op) {
+ ret = dict_set_int32 (args.dict, "index", pnode->index);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error setting index on brick status"
+ " rsp dict");
+ args.op_ret = -1;
+ goto out;
+ }
+ }
+ if (args.op_ret == 0)
+ glusterd_handle_node_rsp (dict_out, pnode->node, op,
+ args.dict, op_ctx, errstr,
+ pnode->type);
+
+out:
+ errno = args.op_errno;
+ if (args.dict)
+ dict_unref (args.dict);
+ gd_brick_op_req_free (req);
+ return args.op_ret;
+
+}
+
+int32_t
+_gd_syncop_commit_op_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int ret = -1;
+ gd1_mgmt_commit_op_rsp rsp = {{0},};
+ struct syncargs *args = NULL;
+ xlator_t *this = NULL;
+ dict_t *rsp_dict = NULL;
+ call_frame_t *frame = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ int op_ret = -1;
+ int op_errno = -1;
+
+ this = THIS;
+ frame = myframe;
+ args = frame->local;
+ frame->local = NULL;
+
+ if (-1 == req->rpc_status) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp,
+ (xdrproc_t)xdr_gd1_mgmt_commit_op_rsp);
+ if (ret < 0) {
+ goto out;
+ }
+
+ if (rsp.dict.dict_len) {
+ /* Unserialize the dictionary */
+ rsp_dict = dict_new ();
+
+ ret = dict_unserialize (rsp.dict.dict_val,
+ rsp.dict.dict_len,
+ &rsp_dict);
+ if (ret < 0) {
+ GF_FREE (rsp.dict.dict_val);
+ goto out;
+ } else {
+ rsp_dict->extra_stdfree = rsp.dict.dict_val;
+ }
+ }
+
+ ret = glusterd_friend_find (rsp.uuid, NULL, &peerinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_CRITICAL, "Commit response "
+ "for 'Volume %s' received from unknown "
+ "peer: %s", gd_op_list[rsp.op],
+ uuid_utoa (rsp.uuid));
+ goto out;
+ }
+
+ uuid_copy (args->uuid, rsp.uuid);
+ pthread_mutex_lock (&args->lock_dict);
+ {
+ ret = glusterd_syncop_aggr_rsp_dict (rsp.op, args->dict,
+ rsp_dict);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "%s",
+ "Failed to aggregate response from "
+ " node/brick");
+ }
+ pthread_mutex_unlock (&args->lock_dict);
+
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+
+out:
+ gd_collate_errors (args, op_ret, op_errno, rsp.op_errstr,
+ GLUSTERD_MGMT_COMMIT_OP, peerinfo, rsp.uuid);
+ if (rsp_dict)
+ dict_unref (rsp_dict);
+
+ STACK_DESTROY (frame->root);
+ synctask_barrier_wake(args);
+
+ return 0;
+}
+
+int32_t
+gd_syncop_commit_op_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk (req, iov, count, myframe,
+ _gd_syncop_commit_op_cbk);
+}
+
+
+int
+gd_syncop_mgmt_commit_op (struct rpc_clnt *rpc, struct syncargs *args,
+ uuid_t my_uuid, uuid_t recv_uuid,
+ int op, dict_t *dict_out, dict_t *op_ctx)
+{
+ glusterd_conf_t *conf = THIS->private;
+ gd1_mgmt_commit_op_req *req = NULL;
+ int ret = -1;
+
+ req = GF_CALLOC (1, sizeof (*req), gf_gld_mt_mop_commit_req_t);
+ if (!req)
+ goto out;
+
+ uuid_copy (req->uuid, my_uuid);
+ req->op = op;
+
+ ret = dict_allocate_and_serialize (dict_out,
+ &req->buf.buf_val, &req->buf.buf_len);
+ if (ret)
+ goto out;
+
+ synclock_unlock (&conf->big_lock);
+ ret = gd_syncop_submit_request (rpc, req, args, NULL, &gd_mgmt_prog,
+ GLUSTERD_MGMT_COMMIT_OP ,
+ gd_syncop_commit_op_cbk,
+ (xdrproc_t) xdr_gd1_mgmt_commit_op_req);
+ synclock_lock (&conf->big_lock);
+out:
+ gd_commit_op_req_free (req);
+ return ret;
+}
+
+
+int
+gd_build_peers_list (struct list_head *peers, struct list_head *xact_peers,
+ glusterd_op_t op)
+{
+ glusterd_peerinfo_t *peerinfo = NULL;
+ int npeers = 0;
+
+ list_for_each_entry (peerinfo, peers, uuid_list) {
+ if (!peerinfo->connected)
+ continue;
+ if (op != GD_OP_SYNC_VOLUME &&
+ peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED)
+ continue;
+
+ list_add_tail (&peerinfo->op_peers_list, xact_peers);
+ npeers++;
+ }
+ return npeers;
+}
+
+int
+gd_lock_op_phase (glusterd_conf_t *conf, glusterd_op_t op, dict_t *op_ctx,
+ char **op_errstr, int npeers, uuid_t txn_id)
+{
+ int ret = -1;
+ int peer_cnt = 0;
+ uuid_t peer_uuid = {0};
+ xlator_t *this = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ struct syncargs args = {0};
+ struct list_head *peers = NULL;
+
+ peers = &conf->xaction_peers;
+
+ if (!npeers) {
+ ret = 0;
+ goto out;
+ }
+
+ this = THIS;
+ synctask_barrier_init((&args));
+ peer_cnt = 0;
+ list_for_each_entry (peerinfo, peers, op_peers_list) {
+ if (conf->op_version < 3) {
+ /* Reset lock status */
+ peerinfo->locked = _gf_false;
+ gd_syncop_mgmt_lock (peerinfo, &args,
+ MY_UUID, peer_uuid);
+ } else
+ gd_syncop_mgmt_v3_lock (op, op_ctx, peerinfo, &args,
+ MY_UUID, peer_uuid, txn_id);
+ peer_cnt++;
+ }
+ gd_synctask_barrier_wait((&args), peer_cnt);
+
+ if (args.op_ret) {
+ if (args.errstr)
+ *op_errstr = gf_strdup (args.errstr);
+ else {
+ ret = gf_asprintf (op_errstr, "Another transaction could be "
+ "in progress. Please try again after "
+ "sometime.");
+ if (ret == -1)
+ *op_errstr = NULL;
+
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to acquire lock");
+
+ }
+ }
+
+ ret = args.op_ret;
+
+ gf_log (this->name, GF_LOG_DEBUG, "Sent lock op req for 'Volume %s' "
+ "to %d peers. Returning %d", gd_op_list[op], peer_cnt, ret);
+out:
+ return ret;
+}
+
+int
+gd_stage_op_phase (struct list_head *peers, glusterd_op_t op, dict_t *op_ctx,
+ dict_t *req_dict, char **op_errstr, int npeers)
+{
+ int ret = -1;
+ int peer_cnt = 0;
+ dict_t *rsp_dict = NULL;
+ char *hostname = NULL;
+ xlator_t *this = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ uuid_t tmp_uuid = {0};
+ char *errstr = NULL;
+ struct syncargs args = {0};
+
+ this = THIS;
+ rsp_dict = dict_new ();
+ if (!rsp_dict)
+ goto out;
+
+ ret = glusterd_op_stage_validate (op, req_dict, op_errstr, rsp_dict);
+ if (ret) {
+ hostname = "localhost";
+ goto stage_done;
+ }
+
+ if ((op == GD_OP_REPLACE_BRICK)) {
+ ret = glusterd_syncop_aggr_rsp_dict (op, op_ctx, rsp_dict);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s",
+ "Failed to aggregate response from node/brick");
+ goto out;
+ }
+ }
+ dict_unref (rsp_dict);
+ rsp_dict = NULL;
+
+stage_done:
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, LOGSTR_STAGE_FAIL,
+ gd_op_list[op], hostname, (*op_errstr) ? ":" : " ",
+ (*op_errstr) ? *op_errstr : " ");
+ if (*op_errstr == NULL)
+ gf_asprintf (op_errstr, OPERRSTR_STAGE_FAIL, hostname);
+ goto out;
+ }
+
+ if (!npeers) {
+ ret = 0;
+ goto out;
+ }
+
+ gd_syncargs_init (&args, op_ctx);
+ synctask_barrier_init((&args));
+ peer_cnt = 0;
+ list_for_each_entry (peerinfo, peers, op_peers_list) {
+ ret = gd_syncop_mgmt_stage_op (peerinfo->rpc, &args,
+ MY_UUID, tmp_uuid,
+ op, req_dict, op_ctx);
+ peer_cnt++;
+ }
+ gd_synctask_barrier_wait((&args), peer_cnt);
+
+ if (args.errstr)
+ *op_errstr = gf_strdup (args.errstr);
+ else if (dict_get_str (op_ctx, "errstr", &errstr) == 0)
+ *op_errstr = gf_strdup (errstr);
+
+ ret = args.op_ret;
+
+ gf_log (this->name, GF_LOG_DEBUG, "Sent stage op req for 'Volume %s' "
+ "to %d peers", gd_op_list[op], peer_cnt);
+out:
+ if (rsp_dict)
+ dict_unref (rsp_dict);
+ return ret;
+}
+
+int
+gd_commit_op_phase (struct list_head *peers, glusterd_op_t op, dict_t *op_ctx,
+ dict_t *req_dict, char **op_errstr, int npeers)
+{
+ dict_t *rsp_dict = NULL;
+ int peer_cnt = -1;
+ int ret = -1;
+ char *hostname = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ xlator_t *this = NULL;
+ uuid_t tmp_uuid = {0};
+ char *errstr = NULL;
+ struct syncargs args = {0};
+
+ this = THIS;
+ rsp_dict = dict_new ();
+ if (!rsp_dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_op_commit_perform (op, req_dict, op_errstr, rsp_dict);
+ if (ret) {
+ hostname = "localhost";
+ goto commit_done;
+ }
+ if (op != GD_OP_SYNC_VOLUME) {
+ ret = glusterd_syncop_aggr_rsp_dict (op, op_ctx, rsp_dict);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s",
+ "Failed to aggregate response "
+ "from node/brick");
+ goto out;
+ }
+ }
+ dict_unref (rsp_dict);
+ rsp_dict = NULL;
+
+commit_done:
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, LOGSTR_COMMIT_FAIL,
+ gd_op_list[op], hostname, (*op_errstr) ? ":" : " ",
+ (*op_errstr) ? *op_errstr : " ");
+ if (*op_errstr == NULL)
+ gf_asprintf (op_errstr, OPERRSTR_COMMIT_FAIL,
+ hostname);
+ goto out;
+ }
+
+ if (!npeers) {
+ ret = 0;
+ goto out;
+ }
+
+ gd_syncargs_init (&args, op_ctx);
+ synctask_barrier_init((&args));
+ peer_cnt = 0;
+ list_for_each_entry (peerinfo, peers, op_peers_list) {
+ ret = gd_syncop_mgmt_commit_op (peerinfo->rpc, &args,
+ MY_UUID, tmp_uuid,
+ op, req_dict, op_ctx);
+ peer_cnt++;
+ }
+ gd_synctask_barrier_wait((&args), peer_cnt);
+ ret = args.op_ret;
+ if (args.errstr)
+ *op_errstr = gf_strdup (args.errstr);
+ else if (dict_get_str (op_ctx, "errstr", &errstr) == 0)
+ *op_errstr = gf_strdup (errstr);
+
+ gf_log (this->name, GF_LOG_DEBUG, "Sent commit op req for 'Volume %s' "
+ "to %d peers", gd_op_list[op], peer_cnt);
+out:
+ if (!ret)
+ glusterd_op_modify_op_ctx (op, op_ctx);
+
+ if (rsp_dict)
+ dict_unref (rsp_dict);
+
+ GF_FREE (args.errstr);
+ args.errstr = NULL;
+
+ return ret;
+}
+
+int
+gd_unlock_op_phase (glusterd_conf_t *conf, glusterd_op_t op, int op_ret,
+ rpcsvc_request_t *req, dict_t *op_ctx, char *op_errstr,
+ int npeers, char *volname, gf_boolean_t is_acquired,
+ uuid_t txn_id)
+{
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_peerinfo_t *tmp = NULL;
+ uuid_t tmp_uuid = {0};
+ int peer_cnt = 0;
+ int ret = -1;
+ xlator_t *this = NULL;
+ struct syncargs args = {0};
+ struct list_head *peers = NULL;
+
+ peers = &conf->xaction_peers;
+
+ if (!npeers) {
+ ret = 0;
+ goto out;
+ }
+
+ /* If the lock has not been held during this
+ * transaction, do not send unlock requests */
+ if (!is_acquired)
+ goto out;
+
+ this = THIS;
+ synctask_barrier_init((&args));
+ peer_cnt = 0;
+ list_for_each_entry_safe (peerinfo, tmp, peers, op_peers_list) {
+ if (conf->op_version < 3) {
+ /* Only unlock peers that were locked */
+ if (peerinfo->locked)
+ gd_syncop_mgmt_unlock (peerinfo, &args,
+ MY_UUID, tmp_uuid);
+ } else
+ gd_syncop_mgmt_v3_unlock (op_ctx, peerinfo,
+ &args, MY_UUID,
+ tmp_uuid, txn_id);
+ peer_cnt++;
+ list_del_init (&peerinfo->op_peers_list);
+ }
+ gd_synctask_barrier_wait((&args), peer_cnt);
+ ret = args.op_ret;
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to unlock "
+ "on some peer(s)");
+ }
+
+out:
+ glusterd_op_send_cli_response (op, op_ret, 0, req, op_ctx, op_errstr);
+ glusterd_op_clear_op (op);
+ if (is_acquired) {
+ /* Based on the op-version, we release *
+ * the cluster or mgmt_v3 lock */
+ if (conf->op_version < 3)
+ glusterd_unlock (MY_UUID);
+ else {
+ ret = glusterd_mgmt_v3_unlock (volname, MY_UUID,
+ "vol");
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to release lock for %s",
+ volname);
+ }
+ }
+
+ return 0;
+}
+
+int
+gd_get_brick_count (struct list_head *bricks)
+{
+ glusterd_pending_node_t *pending_node = NULL;
+ int npeers = 0;
+ list_for_each_entry (pending_node, bricks, list) {
+ npeers++;
+ }
+ return npeers;
+}
+
+int
+gd_brick_op_phase (glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
+ char **op_errstr)
+{
+ glusterd_pending_node_t *pending_node = NULL;
+ struct list_head selected = {0,};
+ xlator_t *this = NULL;
+ int brick_count = 0;
+ int ret = -1;
+ rpc_clnt_t *rpc = NULL;
+ dict_t *rsp_dict = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ this = THIS;
+ conf = this->private;
+ rsp_dict = dict_new ();
+ if (!rsp_dict) {
+ ret = -1;
+ goto out;
+ }
+
+ INIT_LIST_HEAD (&selected);
+ ret = glusterd_op_bricks_select (op, req_dict, op_errstr, &selected, rsp_dict);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s",
+ (*op_errstr)? *op_errstr: "Brick op failed. Check "
+ "glusterd log file for more details.");
+ goto out;
+ }
+
+ if (op == GD_OP_HEAL_VOLUME) {
+ ret = glusterd_syncop_aggr_rsp_dict (op, op_ctx, rsp_dict);
+ if (ret)
+ goto out;
+ }
+ dict_unref (rsp_dict);
+ rsp_dict = NULL;
+
+ brick_count = 0;
+ list_for_each_entry (pending_node, &selected, list) {
+ rpc = glusterd_pending_node_get_rpc (pending_node);
+ if (!rpc) {
+ if (pending_node->type == GD_NODE_REBALANCE) {
+ ret = 0;
+ glusterd_defrag_volume_node_rsp (req_dict,
+ NULL, op_ctx);
+ goto out;
+ }
+
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR, "Brick Op failed "
+ "due to rpc failure.");
+ goto out;
+ }
+ ret = gd_syncop_mgmt_brick_op (rpc, pending_node, op, req_dict,
+ op_ctx, op_errstr);
+ if (ret)
+ goto out;
+
+ brick_count++;
+ }
+
+ ret = 0;
+out:
+ if (rsp_dict)
+ dict_unref (rsp_dict);
+ gf_log (this->name, GF_LOG_DEBUG, "Sent op req to %d bricks",
+ brick_count);
+ return ret;
+}
+
+void
+gd_sync_task_begin (dict_t *op_ctx, rpcsvc_request_t * req)
+{
+ int ret = -1;
+ int npeers = 0;
+ dict_t *req_dict = NULL;
+ glusterd_conf_t *conf = NULL;
+ glusterd_op_t op = 0;
+ int32_t tmp_op = 0;
+ char *op_errstr = NULL;
+ char *tmp = NULL;
+ char *volname = NULL;
+ xlator_t *this = NULL;
+ gf_boolean_t is_acquired = _gf_false;
+ uuid_t *txn_id = NULL;
+ uuid_t *originator_uuid = NULL;
+ glusterd_op_info_t txn_opinfo;
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ /* Generate a transaction-id for this operation and
+ * save it in the dict */
+ txn_id = GF_CALLOC (1, sizeof(uuid_t), gf_common_mt_uuid_t);
+ if (!txn_id) {
+ ret = -1;
+ goto out;
+ }
+
+ uuid_generate (*txn_id);
+
+ ret = dict_set_bin (op_ctx, "transaction_id",
+ txn_id, sizeof(*txn_id));
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set transaction id.");
+ goto out;
+ } else
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Transaction_id = %s", uuid_utoa (*txn_id));
+
+ /* Save the MY_UUID as the originator_uuid */
+ originator_uuid = GF_CALLOC (1, sizeof(uuid_t),
+ gf_common_mt_uuid_t);
+ if (!originator_uuid) {
+ ret = -1;
+ goto out;
+ }
+
+ uuid_copy (*originator_uuid, MY_UUID);
+ ret = dict_set_bin (op_ctx, "originator_uuid",
+ originator_uuid, sizeof (uuid_t));
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set originator_uuid.");
+ goto out;
+ }
+
+ ret = dict_get_int32 (op_ctx, GD_SYNC_OPCODE_KEY, &tmp_op);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get volume "
+ "operation");
+ goto out;
+ }
+
+ op = tmp_op;
+
+ /* Based on the op_version, acquire a cluster or mgmt_v3 lock */
+ if (conf->op_version < 3) {
+ ret = glusterd_lock (MY_UUID);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to acquire lock");
+ gf_asprintf (&op_errstr,
+ "Another transaction is in progress. "
+ "Please try again after sometime.");
+ goto out;
+ }
+ } else {
+
+ /* If no volname is given as a part of the command, locks will
+ * not be held */
+ ret = dict_get_str (op_ctx, "volname", &tmp);
+ if (ret) {
+ gf_log ("", GF_LOG_DEBUG, "Failed to get volume "
+ "name");
+ goto local_locking_done;
+ } else {
+ /* Use a copy of volname, as cli response will be
+ * sent before the unlock, and the volname in the
+ * dict, might be removed */
+ volname = gf_strdup (tmp);
+ if (!volname)
+ goto out;
+ }
+
+ ret = glusterd_mgmt_v3_lock (volname, MY_UUID, "vol");
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to acquire lock for %s", volname);
+ gf_asprintf (&op_errstr,
+ "Another transaction is in progress "
+ "for %s. Please try again after sometime.",
+ volname);
+ goto out;
+ }
+ }
+
+ is_acquired = _gf_true;
+
+local_locking_done:
+
+ /* Save opinfo for this transaction with the transaction id */
+ glusterd_txn_opinfo_init (&txn_opinfo, NULL, &op, NULL, NULL);
+ ret = glusterd_set_txn_opinfo (txn_id, &txn_opinfo);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to set transaction's opinfo");
+
+ opinfo = txn_opinfo;
+
+ INIT_LIST_HEAD (&conf->xaction_peers);
+
+ /* Make 'volume status tasks' command a local operation.
+ * This is accomplished by setting npeers to 0.
+ */
+ if (!glusterd_is_status_tasks_op (op, op_ctx))
+ npeers = gd_build_peers_list (&conf->peers,
+ &conf->xaction_peers, op);
+
+ /* If no volname is given as a part of the command, locks will
+ * not be held */
+ if (volname) {
+ ret = gd_lock_op_phase (conf, op, op_ctx, &op_errstr, npeers, *txn_id);
+ if (ret)
+ goto out;
+ }
+
+ ret = glusterd_op_build_payload (&req_dict, &op_errstr, op_ctx);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, LOGSTR_BUILD_PAYLOAD,
+ gd_op_list[op]);
+ if (op_errstr == NULL)
+ gf_asprintf (&op_errstr, OPERRSTR_BUILD_PAYLOAD);
+ goto out;
+ }
+
+ ret = gd_stage_op_phase (&conf->xaction_peers, op, op_ctx, req_dict,
+ &op_errstr, npeers);
+ if (ret)
+ goto out;
+
+ ret = gd_brick_op_phase (op, op_ctx, req_dict, &op_errstr);
+ if (ret)
+ goto out;
+
+ ret = gd_commit_op_phase (&conf->xaction_peers, op, op_ctx, req_dict,
+ &op_errstr, npeers);
+ if (ret)
+ goto out;
+
+ ret = 0;
+out:
+ (void) gd_unlock_op_phase (conf, op, ret, req, op_ctx, op_errstr,
+ npeers, volname, is_acquired, *txn_id);
+
+ /* Clearing the transaction opinfo */
+ ret = glusterd_clear_txn_opinfo (txn_id);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to clear transaction's opinfo");
+
+ if (volname)
+ GF_FREE (volname);
+
+ if (req_dict)
+ dict_unref (req_dict);
+
+ if (op_errstr) {
+ GF_FREE (op_errstr);
+ op_errstr = NULL;
+ }
+
+ return;
+}
+
+int32_t
+glusterd_op_begin_synctask (rpcsvc_request_t *req, glusterd_op_t op,
+ void *dict)
+{
+ int ret = 0;
+
+ ret = dict_set_int32 (dict, GD_SYNC_OPCODE_KEY, op);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "dict set failed for setting operations");
+ goto out;
+ }
+
+ gd_sync_task_begin (dict, req);
+ ret = 0;
+out:
+
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.h b/xlators/mgmt/glusterd/src/glusterd-syncop.h
new file mode 100644
index 000000000..e83ea2f4c
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-syncop.h
@@ -0,0 +1,71 @@
+/*
+ Copyright (c) 2012-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef __RPC_SYNCOP_H
+#define __RPC_SYNCOP_H
+
+#include "syncop.h"
+#include "glusterd-sm.h"
+#include "glusterd.h"
+
+#define GD_SYNC_OPCODE_KEY "sync-mgmt-operation"
+
+/* gd_syncop_* */
+#define GD_SYNCOP(rpc, stb, cookie, cbk, req, prog, procnum, xdrproc) do { \
+ int ret = 0; \
+ struct synctask *task = NULL; \
+ glusterd_conf_t *conf= THIS->private; \
+ \
+ task = synctask_get (); \
+ stb->task = task; \
+ \
+ /*This is to ensure that the brick_op_cbk is able to \
+ * take the big lock*/ \
+ synclock_unlock (&conf->big_lock); \
+ ret = gd_syncop_submit_request (rpc, req, stb, cookie, \
+ prog, procnum, cbk, \
+ (xdrproc_t)xdrproc); \
+ if (!ret) \
+ synctask_yield (stb->task); \
+ synclock_lock (&conf->big_lock); \
+ } while (0)
+
+
+int gd_syncop_submit_request (struct rpc_clnt *rpc, void *req, void *local,
+ void *cookie, rpc_clnt_prog_t *prog, int procnum,
+ fop_cbk_fn_t cbkfn, xdrproc_t xdrproc);
+
+
+int gd_syncop_mgmt_lock (glusterd_peerinfo_t *peerinfo, struct syncargs *arg,
+ uuid_t my_uuid, uuid_t recv_uuid);
+int gd_syncop_mgmt_unlock (glusterd_peerinfo_t *peerinfo, struct syncargs *arg,
+ uuid_t my_uuid, uuid_t recv_uuid);
+int gd_syncop_mgmt_stage_op (struct rpc_clnt *rpc, struct syncargs *arg,
+ uuid_t my_uuid, uuid_t recv_uuid, int op,
+ dict_t *dict_out, dict_t *op_ctx);
+int gd_syncop_mgmt_commit_op (struct rpc_clnt *rpc, struct syncargs *arg,
+ uuid_t my_uuid, uuid_t recv_uuid, int op,
+ dict_t *dict_out, dict_t *op_ctx);
+
+void
+gd_synctask_barrier_wait (struct syncargs *args, int count);
+
+int
+gd_build_peers_list (struct list_head *peers, struct list_head *xact_peers,
+ glusterd_op_t op);
+int
+gd_brick_op_phase (glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
+ char **op_errstr);
+
+int
+glusterd_syncop_aggr_rsp_dict (glusterd_op_t op, dict_t *aggr, dict_t *rsp);
+
+void
+gd_syncargs_init (struct syncargs *args, dict_t *op_ctx);
+#endif /* __RPC_SYNCOP_H */
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
new file mode 100644
index 000000000..e8ae05851
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -0,0 +1,8894 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+#include <inttypes.h>
+
+#include "globals.h"
+#include "glusterfs.h"
+#include "compat.h"
+#include "dict.h"
+#include "xlator.h"
+#include "logging.h"
+#include "timer.h"
+#include "defaults.h"
+#include "compat.h"
+#include "syncop.h"
+#include "run.h"
+#include "compat-errno.h"
+#include "statedump.h"
+#include "syscall.h"
+#include "glusterd-mem-types.h"
+#include "glusterd.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-sm.h"
+#include "glusterd-utils.h"
+#include "glusterd-store.h"
+#include "glusterd-volgen.h"
+#include "glusterd-pmap.h"
+#include "glusterfs-acl.h"
+#include "glusterd-locks.h"
+
+#include "xdr-generic.h"
+#include <sys/resource.h>
+#include <inttypes.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <net/if.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <rpc/pmap_clnt.h>
+#include <unistd.h>
+#include <fnmatch.h>
+#include <sys/statvfs.h>
+#include <ifaddrs.h>
+#ifdef HAVE_BD_XLATOR
+#include <lvm2app.h>
+#endif
+
+
+#ifdef GF_LINUX_HOST_OS
+#include <mntent.h>
+#endif
+
+#ifdef GF_SOLARIS_HOST_OS
+#include <sys/sockio.h>
+#endif
+
+#define NFS_PROGRAM 100003
+#define NFSV3_VERSION 3
+
+#define MOUNT_PROGRAM 100005
+#define MOUNTV3_VERSION 3
+#define MOUNTV1_VERSION 1
+
+#define NLM_PROGRAM 100021
+#define NLMV4_VERSION 4
+#define NLMV1_VERSION 1
+
+#define CEILING_POS(X) (((X)-(int)(X)) > 0 ? (int)((X)+1) : (int)(X))
+
+static glusterd_lock_t lock;
+
+char*
+gd_peer_uuid_str (glusterd_peerinfo_t *peerinfo)
+{
+ if ((peerinfo == NULL) || uuid_is_null (peerinfo->uuid))
+ return NULL;
+
+ if (peerinfo->uuid_str[0] == '\0')
+ uuid_utoa_r (peerinfo->uuid, peerinfo->uuid_str);
+
+ return peerinfo->uuid_str;
+}
+
+
+int32_t
+glusterd_get_lock_owner (uuid_t *uuid)
+{
+ uuid_copy (*uuid, lock.owner) ;
+ return 0;
+}
+
+static int32_t
+glusterd_set_lock_owner (uuid_t owner)
+{
+ uuid_copy (lock.owner, owner);
+ //TODO: set timestamp
+ return 0;
+}
+
+static int32_t
+glusterd_unset_lock_owner (uuid_t owner)
+{
+ uuid_clear (lock.owner);
+ //TODO: set timestamp
+ return 0;
+}
+
+gf_boolean_t
+glusterd_is_fuse_available ()
+{
+
+ int fd = 0;
+
+ fd = open ("/dev/fuse", O_RDWR);
+
+ if (fd > -1 && !close (fd))
+ return _gf_true;
+ else
+ return _gf_false;
+}
+
+int32_t
+glusterd_lock (uuid_t uuid)
+{
+
+ uuid_t owner;
+ char new_owner_str[50];
+ char owner_str[50];
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (uuid);
+
+ glusterd_get_lock_owner (&owner);
+
+ if (!uuid_is_null (owner)) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get lock"
+ " for uuid: %s, lock held by: %s",
+ uuid_utoa_r (uuid, new_owner_str),
+ uuid_utoa_r (owner, owner_str));
+ goto out;
+ }
+
+ ret = glusterd_set_lock_owner (uuid);
+
+ if (!ret) {
+ gf_log (this->name, GF_LOG_DEBUG, "Cluster lock held by"
+ " %s", uuid_utoa (uuid));
+ }
+
+out:
+ return ret;
+}
+
+
+int32_t
+glusterd_unlock (uuid_t uuid)
+{
+ uuid_t owner;
+ char new_owner_str[50];
+ char owner_str[50];
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (uuid);
+
+ glusterd_get_lock_owner (&owner);
+
+ if (uuid_is_null (owner)) {
+ gf_log (this->name, GF_LOG_ERROR, "Cluster lock not held!");
+ goto out;
+ }
+
+ ret = uuid_compare (uuid, owner);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Cluster lock held by %s ,"
+ "unlock req from %s!", uuid_utoa_r (owner ,owner_str)
+ , uuid_utoa_r (uuid, new_owner_str));
+ goto out;
+ }
+
+ ret = glusterd_unset_lock_owner (uuid);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to clear cluster "
+ "lock");
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+
+int
+glusterd_get_uuid (uuid_t *uuid)
+{
+ glusterd_conf_t *priv = NULL;
+
+ priv = THIS->private;
+
+ GF_ASSERT (priv);
+
+ uuid_copy (*uuid, MY_UUID);
+
+ return 0;
+}
+
+int
+glusterd_submit_request (struct rpc_clnt *rpc, void *req,
+ call_frame_t *frame, rpc_clnt_prog_t *prog,
+ int procnum, struct iobref *iobref,
+ xlator_t *this, fop_cbk_fn_t cbkfn, xdrproc_t xdrproc)
+{
+ int ret = -1;
+ struct iobuf *iobuf = NULL;
+ int count = 0;
+ char new_iobref = 0, start_ping = 0;
+ struct iovec iov = {0, };
+ ssize_t req_size = 0;
+
+ GF_ASSERT (rpc);
+ GF_ASSERT (this);
+
+ if (req) {
+ req_size = xdr_sizeof (xdrproc, req);
+ iobuf = iobuf_get2 (this->ctx->iobuf_pool, req_size);
+ if (!iobuf) {
+ goto out;
+ };
+
+ if (!iobref) {
+ iobref = iobref_new ();
+ if (!iobref) {
+ goto out;
+ }
+
+ new_iobref = 1;
+ }
+
+ iobref_add (iobref, iobuf);
+
+ iov.iov_base = iobuf->ptr;
+ iov.iov_len = iobuf_pagesize (iobuf);
+
+ /* Create the xdr payload */
+ ret = xdr_serialize_generic (iov, req, xdrproc);
+ if (ret == -1) {
+ goto out;
+ }
+ iov.iov_len = ret;
+ count = 1;
+ }
+
+ /* Send the msg */
+ ret = rpc_clnt_submit (rpc, prog, procnum, cbkfn,
+ &iov, count,
+ NULL, 0, iobref, frame, NULL, 0, NULL, 0, NULL);
+
+ if (ret == 0) {
+ pthread_mutex_lock (&rpc->conn.lock);
+ {
+ if (!rpc->conn.ping_started) {
+ start_ping = 1;
+ }
+ }
+ pthread_mutex_unlock (&rpc->conn.lock);
+ }
+
+ if (start_ping)
+ //client_start_ping ((void *) this);
+
+ ret = 0;
+out:
+ if (new_iobref) {
+ iobref_unref (iobref);
+ }
+
+ iobuf_unref (iobuf);
+
+ return ret;
+}
+
+struct iobuf *
+glusterd_serialize_reply (rpcsvc_request_t *req, void *arg,
+ struct iovec *outmsg, xdrproc_t xdrproc)
+{
+ struct iobuf *iob = NULL;
+ ssize_t retlen = -1;
+ ssize_t rsp_size = 0;
+
+ /* First, get the io buffer into which the reply in arg will
+ * be serialized.
+ */
+ rsp_size = xdr_sizeof (xdrproc, arg);
+ iob = iobuf_get2 (req->svc->ctx->iobuf_pool, rsp_size);
+ if (!iob) {
+ gf_log ("", GF_LOG_ERROR, "Failed to get iobuf");
+ goto ret;
+ }
+
+ iobuf_to_iovec (iob, outmsg);
+ /* Use the given serializer to translate the give C structure in arg
+ * to XDR format which will be written into the buffer in outmsg.
+ */
+ /* retlen is used to received the error since size_t is unsigned and we
+ * need -1 for error notification during encoding.
+ */
+ retlen = xdr_serialize_generic (*outmsg, arg, xdrproc);
+ if (retlen == -1) {
+ gf_log ("", GF_LOG_ERROR, "Failed to encode message");
+ goto ret;
+ }
+
+ outmsg->iov_len = retlen;
+ret:
+ if (retlen == -1) {
+ iobuf_unref (iob);
+ iob = NULL;
+ }
+
+ return iob;
+}
+
+int
+glusterd_submit_reply (rpcsvc_request_t *req, void *arg,
+ struct iovec *payload, int payloadcount,
+ struct iobref *iobref, xdrproc_t xdrproc)
+{
+ struct iobuf *iob = NULL;
+ int ret = -1;
+ struct iovec rsp = {0,};
+ char new_iobref = 0;
+
+ if (!req) {
+ GF_ASSERT (req);
+ goto out;
+ }
+
+ if (!iobref) {
+ iobref = iobref_new ();
+ if (!iobref) {
+ gf_log ("", GF_LOG_ERROR, "out of memory");
+ goto out;
+ }
+
+ new_iobref = 1;
+ }
+
+ iob = glusterd_serialize_reply (req, arg, &rsp, xdrproc);
+ if (!iob) {
+ gf_log ("", GF_LOG_ERROR, "Failed to serialize reply");
+ } else {
+ iobref_add (iobref, iob);
+ }
+
+ ret = rpcsvc_submit_generic (req, &rsp, 1, payload, payloadcount,
+ iobref);
+
+ /* Now that we've done our job of handing the message to the RPC layer
+ * we can safely unref the iob in the hope that RPC layer must have
+ * ref'ed the iob on receiving into the txlist.
+ */
+ if (ret == -1) {
+ gf_log ("", GF_LOG_ERROR, "Reply submission failed");
+ goto out;
+ }
+
+ ret = 0;
+out:
+
+ if (new_iobref) {
+ iobref_unref (iobref);
+ }
+
+ if (iob)
+ iobuf_unref (iob);
+ return ret;
+}
+
+gf_boolean_t
+glusterd_check_volume_exists (char *volname)
+{
+ char pathname[1024] = {0,};
+ struct stat stbuf = {0,};
+ int32_t ret = -1;
+ glusterd_conf_t *priv = NULL;
+
+ priv = THIS->private;
+
+ snprintf (pathname, 1024, "%s/vols/%s", priv->workdir,
+ volname);
+
+ ret = stat (pathname, &stbuf);
+
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_DEBUG, "Volume %s does not exist."
+ "stat failed with errno : %d on path: %s",
+ volname, errno, pathname);
+ return _gf_false;
+ }
+
+ return _gf_true;
+}
+
+int32_t
+glusterd_volinfo_new (glusterd_volinfo_t **volinfo)
+{
+ glusterd_volinfo_t *new_volinfo = NULL;
+ int32_t ret = -1;
+
+ GF_ASSERT (volinfo);
+
+ new_volinfo = GF_CALLOC (1, sizeof(*new_volinfo),
+ gf_gld_mt_glusterd_volinfo_t);
+
+ if (!new_volinfo)
+ goto out;
+
+ LOCK_INIT (&new_volinfo->lock);
+ INIT_LIST_HEAD (&new_volinfo->vol_list);
+ INIT_LIST_HEAD (&new_volinfo->snapvol_list);
+ INIT_LIST_HEAD (&new_volinfo->bricks);
+ INIT_LIST_HEAD (&new_volinfo->snap_volumes);
+
+ new_volinfo->dict = dict_new ();
+ if (!new_volinfo->dict) {
+ GF_FREE (new_volinfo);
+
+ goto out;
+ }
+
+ new_volinfo->gsync_slaves = dict_new ();
+ if (!new_volinfo->gsync_slaves) {
+ GF_FREE (new_volinfo);
+
+ goto out;
+ }
+
+ snprintf (new_volinfo->parent_volname, GLUSTERD_MAX_VOLUME_NAME, "N/A");
+
+ new_volinfo->snap_max_hard_limit = GLUSTERD_SNAPS_MAX_HARD_LIMIT;
+
+ new_volinfo->xl = THIS;
+
+ *volinfo = new_volinfo;
+
+ ret = 0;
+
+out:
+ gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+/* This function will create a new volinfo and then
+ * dup the entries from volinfo to the new_volinfo.
+ *
+ * @param volinfo volinfo which will be duplicated
+ * @param dup_volinfo new volinfo which will be created
+ * @param set_userauth if this true then auth info is also set
+ *
+ * @return 0 on success else -1
+ */
+int32_t
+glusterd_volinfo_dup (glusterd_volinfo_t *volinfo,
+ glusterd_volinfo_t **dup_volinfo,
+ gf_boolean_t set_userauth)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_volinfo_t *new_volinfo = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_VALIDATE_OR_GOTO (this->name, volinfo, out);
+ GF_VALIDATE_OR_GOTO (this->name, dup_volinfo, out);
+
+ ret = glusterd_volinfo_new (&new_volinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "not able to create the "
+ "duplicate volinfo for the volume %s",
+ volinfo->volname);
+ goto out;
+ }
+
+ new_volinfo->type = volinfo->type;
+ new_volinfo->replica_count = volinfo->replica_count;
+ new_volinfo->stripe_count = volinfo->stripe_count;
+ new_volinfo->dist_leaf_count = volinfo->dist_leaf_count;
+ new_volinfo->sub_count = volinfo->sub_count;
+ new_volinfo->transport_type = volinfo->transport_type;
+ new_volinfo->nfs_transport_type = volinfo->nfs_transport_type;
+ new_volinfo->brick_count = volinfo->brick_count;
+
+ dict_copy (volinfo->dict, new_volinfo->dict);
+ gd_update_volume_op_versions (new_volinfo);
+
+ if (set_userauth) {
+ glusterd_auth_set_username (new_volinfo,
+ volinfo->auth.username);
+ glusterd_auth_set_password (new_volinfo,
+ volinfo->auth.password);
+ }
+
+ *dup_volinfo = new_volinfo;
+ ret = 0;
+out:
+ if (ret && (NULL != new_volinfo)) {
+ (void) glusterd_volinfo_delete (new_volinfo);
+ }
+ return ret;
+}
+
+/* This function will duplicate brickinfo
+ *
+ * @param brickinfo Source brickinfo
+ * @param dup_brickinfo Destination brickinfo
+ *
+ * @return 0 on success else -1
+ */
+int32_t
+glusterd_brickinfo_dup (glusterd_brickinfo_t *brickinfo,
+ glusterd_brickinfo_t *dup_brickinfo)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_VALIDATE_OR_GOTO (this->name, brickinfo, out);
+ GF_VALIDATE_OR_GOTO (this->name, dup_brickinfo, out);
+
+ strcpy (dup_brickinfo->hostname, brickinfo->hostname);
+ strcpy (dup_brickinfo->path, brickinfo->path);
+ strcpy (dup_brickinfo->device_path, brickinfo->device_path);
+ ret = gf_canonicalize_path (dup_brickinfo->path);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Failed to canonicalize "
+ "brick path");
+ goto out;
+ }
+ uuid_copy (dup_brickinfo->uuid, brickinfo->uuid);
+
+ dup_brickinfo->port = brickinfo->port;
+ dup_brickinfo->rdma_port = brickinfo->rdma_port;
+ if (NULL != brickinfo->logfile) {
+ dup_brickinfo->logfile = gf_strdup (brickinfo->logfile);
+ if (NULL == dup_brickinfo->logfile) {
+ ret = -1;
+ goto out;
+ }
+ }
+ dup_brickinfo->status = brickinfo->status;
+ dup_brickinfo->snap_status = brickinfo->snap_status;
+out:
+ return ret;
+}
+
+/* This function will copy snap volinfo to the new
+ * passed volinfo and regenerate backend store files
+ * for the restored snap.
+ *
+ * @param new_volinfo new volinfo
+ * @param snap_volinfo volinfo of snap volume
+ *
+ * @return 0 on success and -1 on failure
+ *
+ * TODO: Duplicate all members of volinfo, e.g. geo-rep sync slaves
+ */
+int32_t
+glusterd_snap_volinfo_restore (dict_t *rsp_dict,
+ glusterd_volinfo_t *new_volinfo,
+ glusterd_volinfo_t *snap_volinfo)
+{
+ int32_t brick_count = -1;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_brickinfo_t *new_brickinfo = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (rsp_dict);
+
+ GF_VALIDATE_OR_GOTO (this->name, new_volinfo, out);
+ GF_VALIDATE_OR_GOTO (this->name, snap_volinfo, out);
+
+ brick_count = 0;
+ list_for_each_entry (brickinfo, &snap_volinfo->bricks, brick_list) {
+ ret = glusterd_brickinfo_new (&new_brickinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to create "
+ "new brickinfo");
+ goto out;
+ }
+
+ /* Duplicate brickinfo */
+ ret = glusterd_brickinfo_dup (brickinfo, new_brickinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to dup "
+ "brickinfo");
+ goto out;
+ }
+
+ /* If the brick is not of this peer, or snapshot is missed *
+ * for the brick do not replace the xattr for it */
+ if ((!uuid_compare (brickinfo->uuid, MY_UUID)) &&
+ (brickinfo->snap_status != -1)) {
+ /* We need to replace the volume id of all the bricks
+ * to the volume id of the origin volume. new_volinfo
+ * has the origin volume's volume id*/
+ ret = sys_lsetxattr (new_brickinfo->path,
+ GF_XATTR_VOL_ID_KEY,
+ new_volinfo->volume_id,
+ sizeof (new_volinfo->volume_id),
+ XATTR_REPLACE);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to "
+ "set extended attribute %s on %s. "
+ "Reason: %s, snap: %s",
+ GF_XATTR_VOL_ID_KEY,
+ new_brickinfo->path, strerror (errno),
+ new_volinfo->volname);
+ goto out;
+ }
+ }
+
+ /* If a snapshot is pending for this brick then
+ * restore should also be pending
+ */
+ if (brickinfo->snap_status == -1) {
+ /* Adding missed delete to the dict */
+ ret = glusterd_add_missed_snaps_to_dict
+ (rsp_dict,
+ snap_volinfo->volname,
+ brickinfo,
+ brick_count + 1,
+ GF_SNAP_OPTION_TYPE_RESTORE);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to add missed snapshot info "
+ "for %s:%s in the rsp_dict",
+ brickinfo->hostname,
+ brickinfo->path);
+ goto out;
+ }
+ }
+
+ list_add_tail (&new_brickinfo->brick_list,
+ &new_volinfo->bricks);
+ /* ownership of new_brickinfo is passed to new_volinfo */
+ new_brickinfo = NULL;
+ brick_count++;
+ }
+
+ /* Regenerate all volfiles */
+ ret = glusterd_create_volfiles_and_notify_services (new_volinfo);
+
+out:
+ if (ret && (NULL != new_brickinfo)) {
+ (void) glusterd_brickinfo_delete (new_brickinfo);
+ }
+
+ return ret;
+}
+
+void
+glusterd_auth_cleanup (glusterd_volinfo_t *volinfo) {
+
+ GF_ASSERT (volinfo);
+
+ GF_FREE (volinfo->auth.username);
+
+ GF_FREE (volinfo->auth.password);
+}
+
+char *
+glusterd_auth_get_username (glusterd_volinfo_t *volinfo) {
+
+ GF_ASSERT (volinfo);
+
+ return volinfo->auth.username;
+}
+
+char *
+glusterd_auth_get_password (glusterd_volinfo_t *volinfo) {
+
+ GF_ASSERT (volinfo);
+
+ return volinfo->auth.password;
+}
+
+int32_t
+glusterd_auth_set_username (glusterd_volinfo_t *volinfo, char *username) {
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (username);
+
+ volinfo->auth.username = gf_strdup (username);
+ return 0;
+}
+
+int32_t
+glusterd_auth_set_password (glusterd_volinfo_t *volinfo, char *password) {
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (password);
+
+ volinfo->auth.password = gf_strdup (password);
+ return 0;
+}
+
+int32_t
+glusterd_brickinfo_delete (glusterd_brickinfo_t *brickinfo)
+{
+ int32_t ret = -1;
+
+ GF_ASSERT (brickinfo);
+
+ list_del_init (&brickinfo->brick_list);
+
+ GF_FREE (brickinfo->logfile);
+ GF_FREE (brickinfo);
+
+ ret = 0;
+
+ return ret;
+}
+
+int32_t
+glusterd_volume_brickinfos_delete (glusterd_volinfo_t *volinfo)
+{
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_brickinfo_t *tmp = NULL;
+ int32_t ret = 0;
+
+ GF_ASSERT (volinfo);
+
+ list_for_each_entry_safe (brickinfo, tmp, &volinfo->bricks,
+ brick_list) {
+ ret = glusterd_brickinfo_delete (brickinfo);
+ if (ret)
+ goto out;
+ }
+
+out:
+ gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_volinfo_delete (glusterd_volinfo_t *volinfo)
+{
+ int32_t ret = -1;
+
+ GF_ASSERT (volinfo);
+
+ list_del_init (&volinfo->vol_list);
+ list_del_init (&volinfo->snapvol_list);
+
+ ret = glusterd_volume_brickinfos_delete (volinfo);
+ if (ret)
+ goto out;
+ if (volinfo->dict)
+ dict_unref (volinfo->dict);
+ if (volinfo->gsync_slaves)
+ dict_unref (volinfo->gsync_slaves);
+ GF_FREE (volinfo->logdir);
+
+ glusterd_auth_cleanup (volinfo);
+
+ GF_FREE (volinfo);
+ ret = 0;
+
+out:
+ gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_brickinfo_new (glusterd_brickinfo_t **brickinfo)
+{
+ glusterd_brickinfo_t *new_brickinfo = NULL;
+ int32_t ret = -1;
+
+ GF_ASSERT (brickinfo);
+
+ new_brickinfo = GF_CALLOC (1, sizeof(*new_brickinfo),
+ gf_gld_mt_glusterd_brickinfo_t);
+
+ if (!new_brickinfo)
+ goto out;
+
+ INIT_LIST_HEAD (&new_brickinfo->brick_list);
+
+ *brickinfo = new_brickinfo;
+
+ ret = 0;
+
+out:
+ gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_resolve_brick (glusterd_brickinfo_t *brickinfo)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (brickinfo);
+
+ ret = glusterd_hostname_to_uuid (brickinfo->hostname, brickinfo->uuid);
+ gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_brickinfo_new_from_brick (char *brick,
+ glusterd_brickinfo_t **brickinfo)
+{
+ int32_t ret = -1;
+ glusterd_brickinfo_t *new_brickinfo = NULL;
+ char *hostname = NULL;
+ char *path = NULL;
+ char *tmp_host = NULL;
+ char *tmp_path = NULL;
+ char *vg = NULL;
+
+ GF_ASSERT (brick);
+ GF_ASSERT (brickinfo);
+
+ tmp_host = gf_strdup (brick);
+ if (tmp_host && !get_host_name (tmp_host, &hostname))
+ goto out;
+ tmp_path = gf_strdup (brick);
+ if (tmp_path && !get_path_name (tmp_path, &path))
+ goto out;
+
+ GF_ASSERT (hostname);
+ GF_ASSERT (path);
+
+ ret = glusterd_brickinfo_new (&new_brickinfo);
+ if (ret)
+ goto out;
+
+#ifdef HAVE_BD_XLATOR
+ vg = strchr (path, '?');
+ /* ? is used as a delimiter for vg */
+ if (vg) {
+ strncpy (new_brickinfo->vg, vg + 1, PATH_MAX - 1);
+ *vg = '\0';
+ }
+ new_brickinfo->caps = CAPS_BD;
+#else
+ vg = NULL; /* Avoid compiler warnings when BD not enabled */
+#endif
+ ret = gf_canonicalize_path (path);
+ if (ret)
+ goto out;
+
+ strncpy (new_brickinfo->hostname, hostname, 1024);
+ strncpy (new_brickinfo->path, path, 1024);
+
+ *brickinfo = new_brickinfo;
+
+ ret = 0;
+out:
+ GF_FREE (tmp_host);
+ if (tmp_host)
+ GF_FREE (tmp_path);
+ gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+static gf_boolean_t
+_is_prefix (char *str1, char *str2)
+{
+ GF_ASSERT (str1);
+ GF_ASSERT (str2);
+
+ int i = 0;
+ int len1 = 0;
+ int len2 = 0;
+ int small_len = 0;
+ char *bigger = NULL;
+ gf_boolean_t prefix = _gf_true;
+
+ len1 = strlen (str1);
+ len2 = strlen (str2);
+ small_len = min (len1, len2);
+ for (i = 0; i < small_len; i++) {
+ if (str1[i] != str2[i]) {
+ prefix = _gf_false;
+ break;
+ }
+ }
+
+ if (len1 < len2)
+ bigger = str2;
+
+ else if (len1 > len2)
+ bigger = str1;
+
+ else
+ return prefix;
+
+ if (bigger[small_len] != '/')
+ prefix = _gf_false;
+
+ return prefix;
+}
+
+/* Checks if @path is available in the peer identified by @uuid
+ * 'availability' is determined by querying current state of volumes
+ * in the cluster. */
+gf_boolean_t
+glusterd_is_brickpath_available (uuid_t uuid, char *path)
+{
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ gf_boolean_t available = _gf_false;
+ char tmp_path[PATH_MAX+1] = {0};
+ char tmp_brickpath[PATH_MAX+1] = {0};
+
+ priv = THIS->private;
+
+ strncpy (tmp_path, path, PATH_MAX);
+ /* path may not yet exist */
+ if (!realpath (path, tmp_path)) {
+ if (errno != ENOENT) {
+ goto out;
+ }
+ /* When realpath(3) fails, tmp_path is undefined. */
+ strncpy(tmp_path,path,PATH_MAX);
+ }
+
+ list_for_each_entry (volinfo, &priv->volumes, vol_list) {
+ list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ if (uuid_compare (uuid, brickinfo->uuid))
+ continue;
+
+ if (!realpath (brickinfo->path, tmp_brickpath)) {
+ if (errno == ENOENT)
+ strncpy (tmp_brickpath, brickinfo->path,
+ PATH_MAX);
+ else
+ goto out;
+ }
+
+ if (_is_prefix (tmp_brickpath, tmp_path))
+ goto out;
+ }
+ }
+ available = _gf_true;
+out:
+ return available;
+}
+
+#ifdef HAVE_BD_XLATOR
+/*
+ * Sets the tag of the format "trusted.glusterfs.volume-id:<uuid>" in
+ * the brick VG. It is used to avoid using same VG for another brick.
+ * @volume-id - gfid, @brick - brick info, @msg - Error message returned
+ * to the caller
+ */
+int
+glusterd_bd_set_vg_tag (unsigned char *volume_id, glusterd_brickinfo_t *brick,
+ char *msg, int msg_size)
+{
+ lvm_t handle = NULL;
+ vg_t vg = NULL;
+ char *uuid = NULL;
+ int ret = -1;
+
+ gf_asprintf (&uuid, "%s:%s", GF_XATTR_VOL_ID_KEY,
+ uuid_utoa (volume_id));
+ if (!uuid) {
+ snprintf (msg, sizeof(*msg), "Could not allocate memory "
+ "for tag");
+ return -1;
+ }
+
+ handle = lvm_init (NULL);
+ if (!handle) {
+ snprintf (msg, sizeof(*msg), "lvm_init failed");
+ goto out;
+ }
+
+ vg = lvm_vg_open (handle, brick->vg, "w", 0);
+ if (!vg) {
+ snprintf (msg, sizeof(*msg), "Could not open VG %s",
+ brick->vg);
+ goto out;
+ }
+
+ if (lvm_vg_add_tag (vg, uuid) < 0) {
+ snprintf (msg, sizeof(*msg), "Could not set tag %s for "
+ "VG %s", uuid, brick->vg);
+ goto out;
+ }
+ lvm_vg_write (vg);
+ ret = 0;
+out:
+ GF_FREE (uuid);
+
+ if (vg)
+ lvm_vg_close (vg);
+ if (handle)
+ lvm_quit (handle);
+
+ return ret;
+}
+#endif
+
+int
+glusterd_validate_and_create_brickpath (glusterd_brickinfo_t *brickinfo,
+ uuid_t volume_id, char **op_errstr,
+ gf_boolean_t is_force)
+{
+ int ret = -1;
+ char parentdir[PATH_MAX] = {0,};
+ struct stat parent_st = {0,};
+ struct stat brick_st = {0,};
+ struct stat root_st = {0,};
+ char msg[2048] = {0,};
+ gf_boolean_t is_created = _gf_false;
+
+ ret = mkdir (brickinfo->path, 0777);
+ if (ret) {
+ if (errno != EEXIST) {
+ snprintf (msg, sizeof (msg), "Failed to create brick "
+ "directory for brick %s:%s. Reason : %s ",
+ brickinfo->hostname, brickinfo->path,
+ strerror (errno));
+ goto out;
+ }
+ } else {
+ is_created = _gf_true;
+ }
+
+ ret = lstat (brickinfo->path, &brick_st);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "lstat failed on %s. Reason : %s",
+ brickinfo->path, strerror (errno));
+ goto out;
+ }
+
+ if ((!is_created) && (!S_ISDIR (brick_st.st_mode))) {
+ snprintf (msg, sizeof (msg), "The provided path %s which is "
+ "already present, is not a directory",
+ brickinfo->path);
+ ret = -1;
+ goto out;
+ }
+
+ snprintf (parentdir, sizeof (parentdir), "%s/..", brickinfo->path);
+
+ ret = lstat ("/", &root_st);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "lstat failed on /. Reason : %s",
+ strerror (errno));
+ goto out;
+ }
+
+ ret = lstat (parentdir, &parent_st);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "lstat failed on %s. Reason : %s",
+ parentdir, strerror (errno));
+ goto out;
+ }
+
+ if (!is_force) {
+ if (brick_st.st_dev != parent_st.st_dev) {
+ snprintf (msg, sizeof (msg), "The brick %s:%s is a "
+ "mount point. Please create a sub-directory "
+ "under the mount point and use that as the "
+ "brick directory. Or use 'force' at the end "
+ "of the command if you want to override this "
+ "behavior.", brickinfo->hostname,
+ brickinfo->path);
+ ret = -1;
+ goto out;
+ }
+ else if (parent_st.st_dev == root_st.st_dev) {
+ snprintf (msg, sizeof (msg), "The brick %s:%s is "
+ "is being created in the root partition. It "
+ "is recommended that you don't use the "
+ "system's root partition for storage backend."
+ " Or use 'force' at the end of the command if"
+ " you want to override this behavior.",
+ brickinfo->hostname, brickinfo->path);
+ ret = -1;
+ goto out;
+ }
+ }
+
+#ifdef HAVE_BD_XLATOR
+ if (brickinfo->vg[0]) {
+ ret = glusterd_bd_set_vg_tag (volume_id, brickinfo, msg,
+ sizeof(msg));
+ if (ret)
+ goto out;
+ }
+#endif
+ ret = glusterd_check_and_set_brick_xattr (brickinfo->hostname,
+ brickinfo->path, volume_id,
+ op_errstr, is_force);
+ if (ret)
+ goto out;
+
+ ret = 0;
+
+out:
+ if (ret && is_created)
+ rmdir (brickinfo->path);
+ if (ret && !*op_errstr && msg[0] != '\0')
+ *op_errstr = gf_strdup (msg);
+
+ return ret;
+}
+
+int32_t
+glusterd_volume_brickinfo_get (uuid_t uuid, char *hostname, char *path,
+ glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t **brickinfo)
+{
+ glusterd_brickinfo_t *brickiter = NULL;
+ uuid_t peer_uuid = {0};
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ if (uuid) {
+ uuid_copy (peer_uuid, uuid);
+ } else {
+ ret = glusterd_hostname_to_uuid (hostname, peer_uuid);
+ if (ret)
+ goto out;
+ }
+ ret = -1;
+ list_for_each_entry (brickiter, &volinfo->bricks, brick_list) {
+
+ if ((uuid_is_null (brickiter->uuid)) &&
+ (glusterd_resolve_brick (brickiter) != 0))
+ goto out;
+ if (uuid_compare (peer_uuid, brickiter->uuid))
+ continue;
+
+ if (strcmp (brickiter->path, path) == 0) {
+ gf_log (this->name, GF_LOG_DEBUG, LOGSTR_FOUND_BRICK,
+ brickiter->hostname, brickiter->path,
+ volinfo->volname);
+ ret = 0;
+ if (brickinfo)
+ *brickinfo = brickiter;
+ break;
+ }
+ }
+
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_volume_brickinfo_get_by_brick (char *brick,
+ glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t **brickinfo)
+{
+ int32_t ret = -1;
+ glusterd_brickinfo_t *tmp_brickinfo = NULL;
+
+ GF_ASSERT (brick);
+ GF_ASSERT (volinfo);
+
+ ret = glusterd_brickinfo_new_from_brick (brick, &tmp_brickinfo);
+ if (ret)
+ goto out;
+
+ ret = glusterd_volume_brickinfo_get (NULL, tmp_brickinfo->hostname,
+ tmp_brickinfo->path, volinfo,
+ brickinfo);
+ (void) glusterd_brickinfo_delete (tmp_brickinfo);
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+gf_boolean_t
+glusterd_is_brick_decommissioned (glusterd_volinfo_t *volinfo, char *hostname,
+ char *path)
+{
+ gf_boolean_t decommissioned = _gf_false;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ int ret = -1;
+
+ ret = glusterd_volume_brickinfo_get (NULL, hostname, path, volinfo,
+ &brickinfo);
+ if (ret)
+ goto out;
+ decommissioned = brickinfo->decommissioned;
+out:
+ return decommissioned;
+}
+
+int32_t
+glusterd_friend_cleanup (glusterd_peerinfo_t *peerinfo)
+{
+ GF_ASSERT (peerinfo);
+ glusterd_peerctx_t *peerctx = NULL;
+ gf_boolean_t quorum_action = _gf_false;
+ glusterd_conf_t *priv = THIS->private;
+
+ if (peerinfo->quorum_contrib != QUORUM_NONE)
+ quorum_action = _gf_true;
+ if (peerinfo->rpc) {
+ /* cleanup the saved-frames before last unref */
+ synclock_unlock (&priv->big_lock);
+ rpc_clnt_connection_cleanup (&peerinfo->rpc->conn);
+ synclock_lock (&priv->big_lock);
+
+ peerctx = peerinfo->rpc->mydata;
+ peerinfo->rpc->mydata = NULL;
+ peerinfo->rpc = rpc_clnt_unref (peerinfo->rpc);
+ peerinfo->rpc = NULL;
+ if (peerctx) {
+ GF_FREE (peerctx->errstr);
+ GF_FREE (peerctx);
+ }
+ }
+ glusterd_peer_destroy (peerinfo);
+
+ if (quorum_action)
+ glusterd_do_quorum_action ();
+ return 0;
+}
+
+int
+glusterd_volinfo_find_by_volume_id (uuid_t volume_id, glusterd_volinfo_t **volinfo)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_volinfo_t *voliter = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ if (!volume_id)
+ return -1;
+
+ this = THIS;
+ priv = this->private;
+
+ list_for_each_entry (voliter, &priv->volumes, vol_list) {
+ if (uuid_compare (volume_id, voliter->volume_id))
+ continue;
+ *volinfo = voliter;
+ ret = 0;
+ gf_log (this->name, GF_LOG_DEBUG, "Volume %s found",
+ voliter->volname);
+ break;
+ }
+ return ret;
+}
+
+int
+glusterd_snap_volinfo_find_by_volume_id (uuid_t volume_id,
+ glusterd_volinfo_t **volinfo)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_volinfo_t *voliter = NULL;
+ glusterd_snap_t *snap = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ priv = this->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (volinfo);
+
+ if (uuid_is_null(volume_id)) {
+ gf_log (this->name, GF_LOG_WARNING, "Volume UUID is NULL");
+ goto out;
+ }
+
+ list_for_each_entry (snap, &priv->snapshots, snap_list) {
+ list_for_each_entry (voliter, &snap->volumes, vol_list) {
+ if (uuid_compare (volume_id, voliter->volume_id))
+ continue;
+ *volinfo = voliter;
+ ret = 0;
+ goto out;
+ }
+ }
+
+ gf_log (this->name, GF_LOG_WARNING, "Snap volume not found");
+out:
+ gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_volinfo_find (char *volname, glusterd_volinfo_t **volinfo)
+{
+ glusterd_volinfo_t *tmp_volinfo = NULL;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ GF_ASSERT (volname);
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ list_for_each_entry (tmp_volinfo, &priv->volumes, vol_list) {
+ if (!strcmp (tmp_volinfo->volname, volname)) {
+ gf_log (this->name, GF_LOG_DEBUG, "Volume %s found",
+ volname);
+ ret = 0;
+ *volinfo = tmp_volinfo;
+ break;
+ }
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_snap_volinfo_find (char *snap_volname, glusterd_snap_t *snap,
+ glusterd_volinfo_t **volinfo)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_volinfo_t *snap_vol = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ priv = this->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (snap);
+ GF_ASSERT (snap_volname);
+
+ list_for_each_entry (snap_vol, &snap->volumes, vol_list) {
+ if (!strcmp (snap_vol->volname, snap_volname)) {
+ ret = 0;
+ *volinfo = snap_vol;
+ goto out;
+ }
+ }
+
+ gf_log (this->name, GF_LOG_WARNING, "Snap volume %s not found",
+ snap_volname);
+out:
+ gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_snap_volinfo_find_from_parent_volname (char *origin_volname,
+ glusterd_snap_t *snap,
+ glusterd_volinfo_t **volinfo)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_volinfo_t *snap_vol = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ priv = this->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (snap);
+ GF_ASSERT (origin_volname);
+
+ list_for_each_entry (snap_vol, &snap->volumes, vol_list) {
+ if (!strcmp (snap_vol->parent_volname, origin_volname)) {
+ ret = 0;
+ *volinfo = snap_vol;
+ goto out;
+ }
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG, "Snap volume not found(snap: %s, "
+ "origin-volume: %s", snap->snapname, origin_volname);
+
+out:
+ gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_service_stop (const char *service, char *pidfile, int sig,
+ gf_boolean_t force_kill)
+{
+ int32_t ret = -1;
+ pid_t pid = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ if (!glusterd_is_service_running (pidfile, &pid)) {
+ ret = 0;
+ gf_log (this->name, GF_LOG_INFO, "%s already stopped", service);
+ goto out;
+ }
+ gf_log (this->name, GF_LOG_DEBUG, "Stopping gluster %s running in pid: "
+ "%d", service, pid);
+
+ ret = kill (pid, sig);
+ if (!force_kill)
+ goto out;
+
+ sleep (1);
+ if (glusterd_is_service_running (pidfile, NULL)) {
+ ret = kill (pid, SIGKILL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to "
+ "kill pid %d reason: %s", pid,
+ strerror(errno));
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+void
+glusterd_set_socket_filepath (char *sock_filepath, char *sockpath, size_t len)
+{
+ char md5_sum[MD5_DIGEST_LENGTH*2+1] = {0,};
+
+ md5_wrapper ((unsigned char *) sock_filepath, strlen(sock_filepath), md5_sum);
+ snprintf (sockpath, len, "%s/%s.socket", GLUSTERD_SOCK_DIR, md5_sum);
+}
+
+void
+glusterd_set_brick_socket_filepath (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo,
+ char *sockpath, size_t len)
+{
+ char export_path[PATH_MAX] = {0,};
+ char sock_filepath[PATH_MAX] = {0,};
+ char volume_dir[PATH_MAX] = {0,};
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ int expected_file_len = 0;
+
+ expected_file_len = strlen (GLUSTERD_SOCK_DIR) + strlen ("/") +
+ MD5_DIGEST_LENGTH*2 + strlen (".socket") + 1;
+ GF_ASSERT (len >= expected_file_len);
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+
+ GLUSTERD_GET_VOLUME_DIR (volume_dir, volinfo, priv);
+ GLUSTERD_REMOVE_SLASH_FROM_PATH (brickinfo->path, export_path);
+ snprintf (sock_filepath, PATH_MAX, "%s/run/%s-%s",
+ volume_dir, brickinfo->hostname, export_path);
+
+ glusterd_set_socket_filepath (sock_filepath, sockpath, len);
+}
+
+/* connection happens only if it is not aleady connected,
+ * reconnections are taken care by rpc-layer
+ */
+int32_t
+glusterd_brick_connect (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo, char *socketpath)
+{
+ int ret = 0;
+ char volume_id_str[64];
+ char *brickid = NULL;
+ dict_t *options = NULL;
+ struct rpc_clnt *rpc = NULL;
+ glusterd_conf_t *priv = THIS->private;
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (brickinfo);
+ GF_ASSERT (socketpath);
+
+ if (brickinfo->rpc == NULL) {
+ /* Setting frame-timeout to 10mins (600seconds).
+ * Unix domain sockets ensures that the connection is reliable.
+ * The default timeout of 30mins used for unreliable network
+ * connections is too long for unix domain socket connections.
+ */
+ ret = rpc_transport_unix_options_build (&options, socketpath,
+ 600);
+ if (ret)
+ goto out;
+
+ uuid_utoa_r (volinfo->volume_id, volume_id_str);
+ ret = gf_asprintf (&brickid, "%s:%s:%s", volume_id_str,
+ brickinfo->hostname, brickinfo->path);
+ if (ret < 0)
+ goto out;
+
+ synclock_unlock (&priv->big_lock);
+ ret = glusterd_rpc_create (&rpc, options,
+ glusterd_brick_rpc_notify,
+ brickid);
+ synclock_lock (&priv->big_lock);
+ if (ret)
+ goto out;
+ brickinfo->rpc = rpc;
+ }
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+/* Caller should ensure that brick process is not running*/
+static void
+_reap_brick_process (char *pidfile, char *brickpath)
+{
+ unlink (pidfile);
+ /* Brick process is not running and pmap may have an entry for it.*/
+ pmap_registry_remove (THIS, 0, brickpath,
+ GF_PMAP_PORT_BRICKSERVER, NULL);
+}
+
+static int
+_mk_rundir_p (glusterd_volinfo_t *volinfo)
+{
+ char voldir[PATH_MAX] = {0,};
+ char rundir[PATH_MAX] = {0,};
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ int ret = -1;
+
+ this = THIS;
+ priv = this->private;
+ GLUSTERD_GET_VOLUME_DIR (voldir, volinfo, priv);
+ snprintf (rundir, sizeof (rundir)-1, "%s/run", voldir);
+ ret = mkdir_p (rundir, 0777, _gf_true);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "Failed to create rundir");
+ return ret;
+}
+
+int32_t
+glusterd_volume_start_glusterfs (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo,
+ gf_boolean_t wait)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ char pidfile[PATH_MAX+1] = {0,};
+ char volfile[PATH_MAX] = {0,};
+ runner_t runner = {0,};
+ char exp_path[PATH_MAX] = {0,};
+ char logfile[PATH_MAX] = {0,};
+ int port = 0;
+ int rdma_port = 0;
+ char socketpath[PATH_MAX] = {0};
+ char glusterd_uuid[1024] = {0,};
+ char valgrind_logfile[PATH_MAX] = {0};
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (brickinfo);
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ if (brickinfo->snap_status == -1) {
+ gf_log (this->name, GF_LOG_INFO,
+ "Snapshot is pending on %s:%s. "
+ "Hence not starting the brick",
+ brickinfo->hostname,
+ brickinfo->path);
+ ret = 0;
+ goto out;
+ }
+
+ ret = _mk_rundir_p (volinfo);
+ if (ret)
+ goto out;
+
+ glusterd_set_brick_socket_filepath (volinfo, brickinfo, socketpath,
+ sizeof (socketpath));
+
+ GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, priv);
+ if (glusterd_is_service_running (pidfile, NULL))
+ goto connect;
+
+ _reap_brick_process (pidfile, brickinfo->path);
+
+ port = brickinfo->port;
+ if (!port)
+ port = pmap_registry_alloc (THIS);
+
+ /* Build the exp_path, before starting the glusterfsd even in
+ valgrind mode. Otherwise all the glusterfsd processes start
+ writing the valgrind log to the same file.
+ */
+ GLUSTERD_REMOVE_SLASH_FROM_PATH (brickinfo->path, exp_path);
+ runinit (&runner);
+
+ if (priv->valgrind) {
+ /* Run bricks with valgrind */
+ if (volinfo->logdir) {
+ snprintf (valgrind_logfile, PATH_MAX,
+ "%s/valgrind-%s-%s.log",
+ volinfo->logdir,
+ volinfo->volname, exp_path);
+ } else {
+ snprintf (valgrind_logfile, PATH_MAX,
+ "%s/bricks/valgrind-%s-%s.log",
+ DEFAULT_LOG_FILE_DIRECTORY,
+ volinfo->volname, exp_path);
+ }
+
+ runner_add_args (&runner, "valgrind", "--leak-check=full",
+ "--trace-children=yes", "--track-origins=yes",
+ NULL);
+ runner_argprintf (&runner, "--log-file=%s", valgrind_logfile);
+ }
+
+ if (volinfo->is_snap_volume) {
+ snprintf (volfile, PATH_MAX,"/%s/%s/%s.%s.%s",
+ GLUSTERD_VOL_SNAP_DIR_PREFIX,
+ volinfo->snapshot->snapname, volinfo->volname,
+ brickinfo->hostname, exp_path);
+ } else {
+ snprintf (volfile, PATH_MAX, "%s.%s.%s", volinfo->volname,
+ brickinfo->hostname, exp_path);
+ }
+
+ if (volinfo->logdir) {
+ snprintf (logfile, PATH_MAX, "%s/%s.log",
+ volinfo->logdir, exp_path);
+ } else {
+ snprintf (logfile, PATH_MAX, "%s/bricks/%s.log",
+ DEFAULT_LOG_FILE_DIRECTORY, exp_path);
+ }
+ if (!brickinfo->logfile)
+ brickinfo->logfile = gf_strdup (logfile);
+
+ (void) snprintf (glusterd_uuid, 1024, "*-posix.glusterd-uuid=%s",
+ uuid_utoa (MY_UUID));
+ runner_add_args (&runner, SBIN_DIR"/glusterfsd",
+ "-s", brickinfo->hostname, "--volfile-id", volfile,
+ "-p", pidfile, "-S", socketpath,
+ "--brick-name", brickinfo->path,
+ "-l", brickinfo->logfile,
+ "--xlator-option", glusterd_uuid,
+ NULL);
+
+ runner_add_arg (&runner, "--brick-port");
+ if (volinfo->transport_type != GF_TRANSPORT_BOTH_TCP_RDMA) {
+ runner_argprintf (&runner, "%d", port);
+ } else {
+ rdma_port = brickinfo->rdma_port;
+ if (!rdma_port)
+ rdma_port = pmap_registry_alloc (THIS);
+ runner_argprintf (&runner, "%d,%d", port, rdma_port);
+ runner_add_arg (&runner, "--xlator-option");
+ runner_argprintf (&runner, "%s-server.transport.rdma.listen-port=%d",
+ volinfo->volname, rdma_port);
+ }
+
+ runner_add_arg (&runner, "--xlator-option");
+ runner_argprintf (&runner, "%s-server.listen-port=%d",
+ volinfo->volname, port);
+
+ if (volinfo->memory_accounting)
+ runner_add_arg (&runner, "--mem-accounting");
+
+ runner_log (&runner, "", GF_LOG_DEBUG, "Starting GlusterFS");
+ if (wait) {
+ synclock_unlock (&priv->big_lock);
+ ret = runner_run (&runner);
+ synclock_lock (&priv->big_lock);
+
+ } else {
+ ret = runner_run_nowait (&runner);
+ }
+
+ if (ret)
+ goto out;
+
+ brickinfo->port = port;
+ brickinfo->rdma_port = rdma_port;
+
+connect:
+ ret = glusterd_brick_connect (volinfo, brickinfo, socketpath);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to connect to brick %s:%s on %s",
+ brickinfo->hostname, brickinfo->path, socketpath);
+ goto out;
+ }
+out:
+ return ret;
+}
+
+int32_t
+glusterd_brick_unlink_socket_file (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo)
+{
+ char path[PATH_MAX] = {0,};
+ char socketpath[PATH_MAX] = {0};
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ int ret = 0;
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (brickinfo);
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+ GLUSTERD_GET_VOLUME_DIR (path, volinfo, priv);
+ glusterd_set_brick_socket_filepath (volinfo, brickinfo, socketpath,
+ sizeof (socketpath));
+ ret = unlink (socketpath);
+ if (ret && (ENOENT == errno)) {
+ ret = 0;
+ } else {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to remove %s"
+ " error: %s", socketpath, strerror (errno));
+ }
+
+ return ret;
+}
+
+int32_t
+glusterd_brick_disconnect (glusterd_brickinfo_t *brickinfo)
+{
+ rpc_clnt_t *rpc = NULL;
+ glusterd_conf_t *priv = THIS->private;
+
+ GF_ASSERT (brickinfo);
+
+ if (!brickinfo) {
+ gf_log_callingfn ("glusterd", GF_LOG_WARNING, "!brickinfo");
+ return -1;
+ }
+
+ rpc = brickinfo->rpc;
+ brickinfo->rpc = NULL;
+
+ if (rpc) {
+ synclock_unlock (&priv->big_lock);
+ rpc_clnt_unref (rpc);
+ synclock_lock (&priv->big_lock);
+ }
+
+ return 0;
+}
+
+int32_t
+glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo,
+ gf_boolean_t del_brick)
+{
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ char pidfile[PATH_MAX] = {0,};
+ int ret = 0;
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (brickinfo);
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+ if (del_brick)
+ list_del_init (&brickinfo->brick_list);
+
+ if (GLUSTERD_STATUS_STARTED == volinfo->status) {
+ (void) glusterd_brick_disconnect (brickinfo);
+ GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, priv);
+ ret = glusterd_service_stop ("brick", pidfile, SIGTERM, _gf_false);
+ if (ret == 0) {
+ glusterd_set_brick_status (brickinfo, GF_BRICK_STOPPED);
+ (void) glusterd_brick_unlink_socket_file (volinfo, brickinfo);
+ }
+ }
+
+ if (del_brick)
+ glusterd_delete_brick (volinfo, brickinfo);
+
+ return ret;
+}
+
+int32_t
+glusterd_peer_hostname_new (char *hostname, glusterd_peer_hostname_t **name)
+{
+ glusterd_peer_hostname_t *peer_hostname = NULL;
+ int32_t ret = -1;
+
+ GF_ASSERT (hostname);
+ GF_ASSERT (name);
+
+ peer_hostname = GF_CALLOC (1, sizeof (*peer_hostname),
+ gf_gld_mt_peer_hostname_t);
+
+ if (!peer_hostname)
+ goto out;
+
+ peer_hostname->hostname = gf_strdup (hostname);
+ INIT_LIST_HEAD (&peer_hostname->hostname_list);
+
+ *name = peer_hostname;
+ ret = 0;
+
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+/* Free LINE[0..N-1] and then the LINE buffer. */
+static void
+free_lines (char **line, size_t n)
+{
+ size_t i;
+ for (i = 0; i < n; i++)
+ GF_FREE (line[i]);
+ GF_FREE (line);
+}
+
+char **
+glusterd_readin_file (const char *filepath, int *line_count)
+{
+ int ret = -1;
+ int n = 8;
+ int counter = 0;
+ char buffer[PATH_MAX + 256] = {0};
+ char **lines = NULL;
+ FILE *fp = NULL;
+ void *p;
+
+ fp = fopen (filepath, "r");
+ if (!fp)
+ goto out;
+
+ lines = GF_CALLOC (1, n * sizeof (*lines), gf_gld_mt_charptr);
+ if (!lines)
+ goto out;
+
+ for (counter = 0; fgets (buffer, sizeof (buffer), fp); counter++) {
+
+ if (counter == n-1) {
+ n *= 2;
+ p = GF_REALLOC (lines, n * sizeof (char *));
+ if (!p) {
+ free_lines (lines, n/2);
+ lines = NULL;
+ goto out;
+ }
+ lines = p;
+ }
+
+ lines[counter] = gf_strdup (buffer);
+ }
+
+ lines[counter] = NULL;
+ /* Reduce allocation to minimal size. */
+ p = GF_REALLOC (lines, (counter + 1) * sizeof (char *));
+ if (!p) {
+ free_lines (lines, counter);
+ lines = NULL;
+ goto out;
+ }
+ lines = p;
+
+ *line_count = counter;
+ ret = 0;
+
+ out:
+ if (ret)
+ gf_log (THIS->name, GF_LOG_ERROR, "%s", strerror (errno));
+ if (fp)
+ fclose (fp);
+
+ return lines;
+}
+
+int
+glusterd_compare_lines (const void *a, const void *b) {
+
+ return strcmp(* (char * const *) a, * (char * const *) b);
+}
+
+int
+glusterd_sort_and_redirect (const char *src_filepath, int dest_fd)
+{
+ int ret = -1;
+ int line_count = 0;
+ int counter = 0;
+ char **lines = NULL;
+
+
+ if (!src_filepath || dest_fd < 0)
+ goto out;
+
+ lines = glusterd_readin_file (src_filepath, &line_count);
+ if (!lines)
+ goto out;
+
+ qsort (lines, line_count, sizeof (*lines), glusterd_compare_lines);
+
+ for (counter = 0; lines[counter]; counter++) {
+
+ ret = write (dest_fd, lines[counter],
+ strlen (lines[counter]));
+ if (ret < 0)
+ goto out;
+
+ GF_FREE (lines[counter]);
+ }
+
+ ret = 0;
+ out:
+ GF_FREE (lines);
+
+ return ret;
+}
+
+int
+glusterd_volume_compute_cksum (glusterd_volinfo_t *volinfo)
+{
+ int32_t ret = -1;
+ glusterd_conf_t *priv = NULL;
+ char path[PATH_MAX] = {0,};
+ char cksum_path[PATH_MAX] = {0,};
+ char filepath[PATH_MAX] = {0,};
+ int fd = -1;
+ uint32_t cksum = 0;
+ char buf[4096] = {0,};
+ char sort_filepath[PATH_MAX] = {0};
+ gf_boolean_t unlink_sortfile = _gf_false;
+ int sort_fd = 0;
+ xlator_t *this = NULL;
+
+ GF_ASSERT (volinfo);
+ this = THIS;
+ priv = THIS->private;
+ GF_ASSERT (priv);
+
+ GLUSTERD_GET_VOLUME_DIR (path, volinfo, priv);
+
+ snprintf (cksum_path, sizeof (cksum_path), "%s/%s",
+ path, GLUSTERD_CKSUM_FILE);
+
+ fd = open (cksum_path, O_RDWR | O_APPEND | O_CREAT| O_TRUNC, 0600);
+
+ if (-1 == fd) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to open %s, errno: %d",
+ cksum_path, errno);
+ ret = -1;
+ goto out;
+ }
+
+ snprintf (filepath, sizeof (filepath), "%s/%s", path,
+ GLUSTERD_VOLUME_INFO_FILE);
+ snprintf (sort_filepath, sizeof (sort_filepath),
+ "/tmp/%s.XXXXXX", volinfo->volname);
+
+ sort_fd = mkstemp (sort_filepath);
+ if (sort_fd < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not generate temp "
+ "file, reason: %s for %s: %s", strerror (errno),
+ (volinfo->is_snap_volume)?"snap":"volume",
+ volinfo->volname);
+ goto out;
+ } else {
+ unlink_sortfile = _gf_true;
+ }
+
+ /* sort the info file, result in sort_filepath */
+
+ ret = glusterd_sort_and_redirect (filepath, sort_fd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "sorting info file failed");
+ goto out;
+ }
+
+ ret = close (sort_fd);
+ if (ret)
+ goto out;
+
+ ret = get_checksum_for_path (sort_filepath, &cksum);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get checksum"
+ " for path: %s", sort_filepath);
+ goto out;
+ }
+
+ snprintf (buf, sizeof (buf), "%s=%u\n", "info", cksum);
+ ret = write (fd, buf, strlen (buf));
+
+ if (ret <= 0) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = get_checksum_for_file (fd, &cksum);
+
+ if (ret)
+ goto out;
+
+ volinfo->cksum = cksum;
+out:
+ if (fd > 0)
+ close (fd);
+ if (unlink_sortfile)
+ unlink (sort_filepath);
+ gf_log (this->name, GF_LOG_DEBUG, "Returning with %d", ret);
+
+ return ret;
+}
+
+int
+_add_dict_to_prdict (dict_t *this, char *key, data_t *value, void *data)
+{
+ glusterd_dict_ctx_t *ctx = NULL;
+ char optkey[512] = {0,};
+ int ret = -1;
+
+ ctx = data;
+ snprintf (optkey, sizeof (optkey), "%s.%s%d", ctx->prefix,
+ ctx->key_name, ctx->opt_count);
+ ret = dict_set_str (ctx->dict, optkey, key);
+ if (ret)
+ gf_log ("", GF_LOG_ERROR, "option add for %s%d %s",
+ ctx->key_name, ctx->opt_count, key);
+ snprintf (optkey, sizeof (optkey), "%s.%s%d", ctx->prefix,
+ ctx->val_name, ctx->opt_count);
+ ret = dict_set_str (ctx->dict, optkey, value->data);
+ if (ret)
+ gf_log ("", GF_LOG_ERROR, "option add for %s%d %s",
+ ctx->val_name, ctx->opt_count, value->data);
+ ctx->opt_count++;
+
+ return ret;
+}
+
+int32_t
+glusterd_add_bricks_hname_path_to_dict (dict_t *dict,
+ glusterd_volinfo_t *volinfo)
+{
+ glusterd_brickinfo_t *brickinfo = NULL;
+ int ret = 0;
+ char key[256] = {0};
+ int index = 0;
+
+
+ list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ snprintf (key, sizeof (key), "%d-hostname", index);
+ ret = dict_set_str (dict, key, brickinfo->hostname);
+ if (ret)
+ goto out;
+
+ snprintf (key, sizeof (key), "%d-path", index);
+ ret = dict_set_str (dict, key, brickinfo->path);
+ if (ret)
+ goto out;
+
+ index++;
+ }
+out:
+ return ret;
+}
+
+int32_t
+glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo,
+ dict_t *dict, int32_t count)
+{
+ int32_t ret = -1;
+ char prefix[512] = {0,};
+ char key[512] = {0,};
+ glusterd_brickinfo_t *brickinfo = NULL;
+ int32_t i = 1;
+ char *volume_id_str = NULL;
+ char *src_brick = NULL;
+ char *dst_brick = NULL;
+ char *str = NULL;
+ glusterd_dict_ctx_t ctx = {0};
+ char *rebalance_id_str = NULL;
+ char *rb_id_str = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (dict);
+ GF_ASSERT (volinfo);
+
+ snprintf (key, sizeof (key), "volume%d.name", count);
+ ret = dict_set_str (dict, key, volinfo->volname);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.type", count);
+ ret = dict_set_int32 (dict, key, volinfo->type);
+ if (ret)
+ goto out;
+
+ snprintf (key, sizeof (key), "volume%d.is_volume_restored", count);
+ ret = dict_set_int32 (dict, key, volinfo->is_volume_restored);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Failed to set "
+ "is_volume_restored option for %s volume",
+ volinfo->volname);
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.brick_count", count);
+ ret = dict_set_int32 (dict, key, volinfo->brick_count);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.version", count);
+ ret = dict_set_int32 (dict, key, volinfo->version);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.status", count);
+ ret = dict_set_int32 (dict, key, volinfo->status);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.sub_count", count);
+ ret = dict_set_int32 (dict, key, volinfo->sub_count);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.stripe_count", count);
+ ret = dict_set_int32 (dict, key, volinfo->stripe_count);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.replica_count", count);
+ ret = dict_set_int32 (dict, key, volinfo->replica_count);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.dist_count", count);
+ ret = dict_set_int32 (dict, key, volinfo->dist_leaf_count);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.ckusm", count);
+ ret = dict_set_int64 (dict, key, volinfo->cksum);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.transport_type", count);
+ ret = dict_set_uint32 (dict, key, volinfo->transport_type);
+ if (ret)
+ goto out;
+
+ snprintf (key, sizeof (key), "volume%d.is_snap_volume", count);
+ ret = dict_set_uint32 (dict, key, volinfo->is_snap_volume);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Unable to set %s", key);
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "volume%d.snap-max-hard-limit", count);
+ ret = dict_set_uint64 (dict, key, volinfo->snap_max_hard_limit);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Unable to set %s", key);
+ goto out;
+ }
+
+ volume_id_str = gf_strdup (uuid_utoa (volinfo->volume_id));
+ if (!volume_id_str) {
+ ret = -1;
+ goto out;
+ }
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.volume_id", count);
+ ret = dict_set_dynstr (dict, key, volume_id_str);
+ if (ret)
+ goto out;
+ volume_id_str = NULL;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.username", count);
+ str = glusterd_auth_get_username (volinfo);
+ if (str) {
+ ret = dict_set_dynstr (dict, key, gf_strdup (str));
+ if (ret)
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.password", count);
+ str = glusterd_auth_get_password (volinfo);
+ if (str) {
+ ret = dict_set_dynstr (dict, key, gf_strdup (str));
+ if (ret)
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, 256, "volume%d.rebalance", count);
+ ret = dict_set_int32 (dict, key, volinfo->rebal.defrag_cmd);
+ if (ret)
+ goto out;
+
+ if (volinfo->rebal.defrag_cmd) {
+ rebalance_id_str = gf_strdup (uuid_utoa
+ (volinfo->rebal.rebalance_id));
+ if (!rebalance_id_str) {
+ ret = -1;
+ goto out;
+ }
+ memset (key, 0, sizeof (key));
+ snprintf (key, 256, "volume%d.rebalance-id", count);
+ ret = dict_set_dynstr (dict, key, rebalance_id_str);
+ if (ret)
+ goto out;
+ rebalance_id_str = NULL;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.rebalance-op", count);
+ ret = dict_set_uint32 (dict, key, volinfo->rebal.op);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, 256, "volume%d."GLUSTERD_STORE_KEY_RB_STATUS, count);
+ ret = dict_set_int32 (dict, key, volinfo->rep_brick.rb_status);
+ if (ret)
+ goto out;
+
+ if (volinfo->rep_brick.rb_status > GF_RB_STATUS_NONE) {
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, 256, "volume%d."GLUSTERD_STORE_KEY_RB_SRC_BRICK,
+ count);
+ gf_asprintf (&src_brick, "%s:%s",
+ volinfo->rep_brick.src_brick->hostname,
+ volinfo->rep_brick.src_brick->path);
+ ret = dict_set_dynstr (dict, key, src_brick);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, 256, "volume%d."GLUSTERD_STORE_KEY_RB_DST_BRICK,
+ count);
+ gf_asprintf (&dst_brick, "%s:%s",
+ volinfo->rep_brick.dst_brick->hostname,
+ volinfo->rep_brick.dst_brick->path);
+ ret = dict_set_dynstr (dict, key, dst_brick);
+ if (ret)
+ goto out;
+
+ rb_id_str = gf_strdup (uuid_utoa (volinfo->rep_brick.rb_id));
+ if (!rb_id_str) {
+ ret = -1;
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.rb_id", count);
+ ret = dict_set_dynstr (dict, key, rb_id_str);
+ if (ret)
+ goto out;
+ rb_id_str = NULL;
+ }
+
+ snprintf (prefix, sizeof (prefix), "volume%d", count);
+ ctx.dict = dict;
+ ctx.prefix = prefix;
+ ctx.opt_count = 1;
+ ctx.key_name = "key";
+ ctx.val_name = "value";
+ GF_ASSERT (volinfo->dict);
+
+ dict_foreach (volinfo->dict, _add_dict_to_prdict, &ctx);
+ ctx.opt_count--;
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.opt-count", count);
+ ret = dict_set_int32 (dict, key, ctx.opt_count);
+ if (ret)
+ goto out;
+
+ ctx.dict = dict;
+ ctx.prefix = prefix;
+ ctx.opt_count = 1;
+ ctx.key_name = "slave-num";
+ ctx.val_name = "slave-val";
+ GF_ASSERT (volinfo->gsync_slaves);
+
+ dict_foreach (volinfo->gsync_slaves, _add_dict_to_prdict, &ctx);
+ ctx.opt_count--;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.gsync-count", count);
+ ret = dict_set_int32 (dict, key, ctx.opt_count);
+ if (ret)
+ goto out;
+
+ list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.brick%d.hostname",
+ count, i);
+ ret = dict_set_str (dict, key, brickinfo->hostname);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.brick%d.path",
+ count, i);
+ ret = dict_set_str (dict, key, brickinfo->path);
+ if (ret)
+ goto out;
+
+ snprintf (key, sizeof (key), "volume%d.brick%d.snap_status",
+ count, i);
+ ret = dict_set_int32 (dict, key, brickinfo->snap_status);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set snap_status for %s:%s",
+ brickinfo->hostname,
+ brickinfo->path);
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "volume%d.brick%d.device_path",
+ count, i);
+ ret = dict_set_str (dict, key, brickinfo->device_path);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Failed to set snap_device for %s:%s",
+ brickinfo->hostname,
+ brickinfo->path);
+ goto out;
+ }
+
+ i++;
+ }
+
+ /* Add volume op-versions to dict. This prevents volume inconsistencies
+ * in the cluster
+ */
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.op-version", count);
+ ret = dict_set_int32 (dict, key, volinfo->op_version);
+ if (ret)
+ goto out;
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.client-op-version", count);
+ ret = dict_set_int32 (dict, key, volinfo->client_op_version);
+
+out:
+ GF_FREE (volume_id_str);
+ GF_FREE (rebalance_id_str);
+ GF_FREE (rb_id_str);
+
+ gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+
+ return ret;
+}
+
+int32_t
+glusterd_build_volume_dict (dict_t **vols)
+{
+ int32_t ret = -1;
+ dict_t *dict = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ int32_t count = 0;
+ glusterd_dict_ctx_t ctx = {0};
+
+ priv = THIS->private;
+
+ dict = dict_new ();
+
+ if (!dict)
+ goto out;
+
+ list_for_each_entry (volinfo, &priv->volumes, vol_list) {
+ count++;
+ ret = glusterd_add_volume_to_dict (volinfo, dict, count);
+ if (ret)
+ goto out;
+ }
+
+
+ ret = dict_set_int32 (dict, "count", count);
+ if (ret)
+ goto out;
+
+ ctx.dict = dict;
+ ctx.prefix = "global";
+ ctx.opt_count = 1;
+ ctx.key_name = "key";
+ ctx.val_name = "val";
+ dict_foreach (priv->opts, _add_dict_to_prdict, &ctx);
+ ctx.opt_count--;
+ ret = dict_set_int32 (dict, "global-opt-count", ctx.opt_count);
+ if (ret)
+ goto out;
+
+ *vols = dict;
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+ if (ret)
+ dict_unref (dict);
+
+ return ret;
+}
+
+int32_t
+glusterd_compare_friend_volume (dict_t *vols, int32_t count, int32_t *status,
+ char *hostname)
+{
+
+ int32_t ret = -1;
+ char key[512] = {0,};
+ glusterd_volinfo_t *volinfo = NULL;
+ char *volname = NULL;
+ uint32_t cksum = 0;
+ int32_t version = 0;
+
+ GF_ASSERT (vols);
+ GF_ASSERT (status);
+
+ snprintf (key, sizeof (key), "volume%d.name", count);
+ ret = dict_get_str (vols, key, &volname);
+ if (ret)
+ goto out;
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+
+ if (ret) {
+ *status = GLUSTERD_VOL_COMP_UPDATE_REQ;
+ ret = 0;
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.version", count);
+ ret = dict_get_int32 (vols, key, &version);
+ if (ret)
+ goto out;
+
+ if (version > volinfo->version) {
+ //Mismatch detected
+ ret = 0;
+ gf_log ("", GF_LOG_ERROR, "Version of volume %s differ."
+ "local version = %d, remote version = %d on peer %s",
+ volinfo->volname, volinfo->version, version, hostname);
+ *status = GLUSTERD_VOL_COMP_UPDATE_REQ;
+ goto out;
+ } else if (version < volinfo->version) {
+ *status = GLUSTERD_VOL_COMP_SCS;
+ goto out;
+ }
+
+ //Now, versions are same, compare cksums.
+ //
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.ckusm", count);
+ ret = dict_get_uint32 (vols, key, &cksum);
+ if (ret)
+ goto out;
+
+ if (cksum != volinfo->cksum) {
+ ret = 0;
+ gf_log ("", GF_LOG_ERROR, "Cksums of volume %s differ."
+ " local cksum = %u, remote cksum = %u on peer %s",
+ volinfo->volname, volinfo->cksum, cksum, hostname);
+ *status = GLUSTERD_VOL_COMP_RJT;
+ goto out;
+ }
+
+ *status = GLUSTERD_VOL_COMP_SCS;
+
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning with ret: %d, status: %d",
+ ret, *status);
+ return ret;
+}
+
+static int32_t
+import_prdict_dict (dict_t *vols, dict_t *dst_dict, char *key_prefix,
+ char *value_prefix, int opt_count, char *prefix)
+{
+ char key[512] = {0,};
+ int32_t ret = 0;
+ int i = 1;
+ char *opt_key = NULL;
+ char *opt_val = NULL;
+ char *dup_opt_val = NULL;
+ char msg[2048] = {0};
+
+ while (i <= opt_count) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.%s%d",
+ prefix, key_prefix, i);
+ ret = dict_get_str (vols, key, &opt_key);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Volume dict key not "
+ "specified");
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.%s%d",
+ prefix, value_prefix, i);
+ ret = dict_get_str (vols, key, &opt_val);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Volume dict value not "
+ "specified");
+ goto out;
+ }
+ dup_opt_val = gf_strdup (opt_val);
+ if (!dup_opt_val) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_dynstr (dst_dict, opt_key, dup_opt_val);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Volume set %s %s "
+ "unsuccessful", opt_key, dup_opt_val);
+ goto out;
+ }
+ i++;
+ }
+
+out:
+ if (msg[0])
+ gf_log ("glusterd", GF_LOG_ERROR, "%s", msg);
+ gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+ return ret;
+
+}
+
+gf_boolean_t
+glusterd_is_quorum_option (char *option)
+{
+ gf_boolean_t res = _gf_false;
+ int i = 0;
+ char *keys[] = {GLUSTERD_QUORUM_TYPE_KEY,
+ GLUSTERD_QUORUM_RATIO_KEY, NULL};
+
+ for (i = 0; keys[i]; i++) {
+ if (strcmp (option, keys[i]) == 0) {
+ res = _gf_true;
+ break;
+ }
+ }
+ return res;
+}
+
+gf_boolean_t
+glusterd_is_quorum_changed (dict_t *options, char *option, char *value)
+{
+ int ret = 0;
+ gf_boolean_t reconfigured = _gf_false;
+ gf_boolean_t all = _gf_false;
+ char *oldquorum = NULL;
+ char *newquorum = NULL;
+ char *oldratio = NULL;
+ char *newratio = NULL;
+
+ if ((strcmp ("all", option) != 0) &&
+ !glusterd_is_quorum_option (option))
+ goto out;
+
+ if (strcmp ("all", option) == 0)
+ all = _gf_true;
+
+ if (all || (strcmp (GLUSTERD_QUORUM_TYPE_KEY, option) == 0)) {
+ newquorum = value;
+ ret = dict_get_str (options, GLUSTERD_QUORUM_TYPE_KEY,
+ &oldquorum);
+ }
+
+ if (all || (strcmp (GLUSTERD_QUORUM_RATIO_KEY, option) == 0)) {
+ newratio = value;
+ ret = dict_get_str (options, GLUSTERD_QUORUM_RATIO_KEY,
+ &oldratio);
+ }
+
+ reconfigured = _gf_true;
+
+ if (oldquorum && newquorum && (strcmp (oldquorum, newquorum) == 0))
+ reconfigured = _gf_false;
+ if (oldratio && newratio && (strcmp (oldratio, newratio) == 0))
+ reconfigured = _gf_false;
+
+ if ((oldratio == NULL) && (newratio == NULL) && (oldquorum == NULL) &&
+ (newquorum == NULL))
+ reconfigured = _gf_false;
+out:
+ return reconfigured;
+}
+
+static inline gf_boolean_t
+_is_contributing_to_quorum (gd_quorum_contrib_t contrib)
+{
+ if ((contrib == QUORUM_UP) || (contrib == QUORUM_DOWN))
+ return _gf_true;
+ return _gf_false;
+}
+
+static inline gf_boolean_t
+_does_quorum_meet (int active_count, int quorum_count)
+{
+ return (active_count >= quorum_count);
+}
+
+int
+glusterd_get_quorum_cluster_counts (xlator_t *this, int *active_count,
+ int *quorum_count)
+{
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_conf_t *conf = NULL;
+ int ret = -1;
+ int inquorum_count = 0;
+ char *val = NULL;
+ double quorum_percentage = 0.0;
+ gf_boolean_t ratio = _gf_false;
+ int count = 0;
+
+ conf = this->private;
+ //Start with counting self
+ inquorum_count = 1;
+ if (active_count)
+ *active_count = 1;
+ list_for_each_entry (peerinfo, &conf->peers, uuid_list) {
+ if (peerinfo->quorum_contrib == QUORUM_WAITING)
+ goto out;
+
+ if (_is_contributing_to_quorum (peerinfo->quorum_contrib))
+ inquorum_count = inquorum_count + 1;
+
+ if (active_count && (peerinfo->quorum_contrib == QUORUM_UP))
+ *active_count = *active_count + 1;
+ }
+
+ ret = dict_get_str (conf->opts, GLUSTERD_QUORUM_RATIO_KEY, &val);
+ if (ret == 0) {
+ ratio = _gf_true;
+ ret = gf_string2percent (val, &quorum_percentage);
+ if (!ret)
+ ratio = _gf_true;
+ }
+ if (ratio)
+ count = CEILING_POS (inquorum_count *
+ quorum_percentage / 100.0);
+ else
+ count = (inquorum_count * 50 / 100) + 1;
+
+ *quorum_count = count;
+ ret = 0;
+out:
+ return ret;
+}
+
+gf_boolean_t
+glusterd_is_volume_in_server_quorum (glusterd_volinfo_t *volinfo)
+{
+ gf_boolean_t res = _gf_false;
+ char *quorum_type = NULL;
+ int ret = 0;
+
+ ret = dict_get_str (volinfo->dict, GLUSTERD_QUORUM_TYPE_KEY,
+ &quorum_type);
+ if (ret)
+ goto out;
+
+ if (strcmp (quorum_type, GLUSTERD_SERVER_QUORUM) == 0)
+ res = _gf_true;
+out:
+ return res;
+}
+
+gf_boolean_t
+glusterd_is_any_volume_in_server_quorum (xlator_t *this)
+{
+ glusterd_conf_t *conf = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+
+ conf = this->private;
+ list_for_each_entry (volinfo, &conf->volumes, vol_list) {
+ if (glusterd_is_volume_in_server_quorum (volinfo)) {
+ return _gf_true;
+ }
+ }
+ return _gf_false;
+}
+
+gf_boolean_t
+does_gd_meet_server_quorum (xlator_t *this)
+{
+ int quorum_count = 0;
+ int active_count = 0;
+ gf_boolean_t in = _gf_false;
+ glusterd_conf_t *conf = NULL;
+ int ret = -1;
+
+ conf = this->private;
+ ret = glusterd_get_quorum_cluster_counts (this, &active_count,
+ &quorum_count);
+ if (ret)
+ goto out;
+
+ if (!_does_quorum_meet (active_count, quorum_count)) {
+ goto out;
+ }
+
+ in = _gf_true;
+out:
+ return in;
+}
+
+int
+glusterd_spawn_daemons (void *opaque)
+{
+ glusterd_conf_t *conf = THIS->private;
+ gf_boolean_t start_bricks = !conf->restart_done;
+
+ if (start_bricks) {
+ glusterd_restart_bricks (conf);
+ conf->restart_done = _gf_true;
+ }
+ glusterd_restart_gsyncds (conf);
+ glusterd_restart_rebalance (conf);
+ return 0;
+}
+
+void
+glusterd_do_volume_quorum_action (xlator_t *this, glusterd_volinfo_t *volinfo,
+ gf_boolean_t meets_quorum)
+{
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ conf = this->private;
+ if (volinfo->status != GLUSTERD_STATUS_STARTED)
+ goto out;
+
+ if (!glusterd_is_volume_in_server_quorum (volinfo))
+ meets_quorum = _gf_true;
+
+ list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ if (!glusterd_is_local_brick (this, volinfo, brickinfo))
+ continue;
+ if (meets_quorum)
+ glusterd_brick_start (volinfo, brickinfo, _gf_false);
+ else
+ glusterd_brick_stop (volinfo, brickinfo, _gf_false);
+ }
+out:
+ return;
+}
+
+int
+glusterd_do_quorum_action ()
+{
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ int ret = 0;
+ int active_count = 0;
+ int quorum_count = 0;
+ gf_boolean_t meets = _gf_false;
+
+ this = THIS;
+ conf = this->private;
+
+ conf->pending_quorum_action = _gf_true;
+ ret = glusterd_lock (conf->uuid);
+ if (ret)
+ goto out;
+
+ {
+ ret = glusterd_get_quorum_cluster_counts (this, &active_count,
+ &quorum_count);
+ if (ret)
+ goto unlock;
+
+ if (_does_quorum_meet (active_count, quorum_count))
+ meets = _gf_true;
+ list_for_each_entry (volinfo, &conf->volumes, vol_list) {
+ glusterd_do_volume_quorum_action (this, volinfo, meets);
+ }
+ }
+unlock:
+ (void)glusterd_unlock (conf->uuid);
+ conf->pending_quorum_action = _gf_false;
+out:
+ return ret;
+}
+
+int32_t
+glusterd_import_friend_volume_opts (dict_t *vols, int count,
+ glusterd_volinfo_t *volinfo)
+{
+ char key[512] = {0,};
+ int32_t ret = -1;
+ int opt_count = 0;
+ char msg[2048] = {0};
+ char volume_prefix[1024] = {0};
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.opt-count", count);
+ ret = dict_get_int32 (vols, key, &opt_count);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Volume option count not "
+ "specified for %s", volinfo->volname);
+ goto out;
+ }
+
+ snprintf (volume_prefix, sizeof (volume_prefix), "volume%d", count);
+ ret = import_prdict_dict (vols, volinfo->dict, "key", "value",
+ opt_count, volume_prefix);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Unable to import options dict "
+ "specified for %s", volinfo->volname);
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.gsync-count", count);
+ ret = dict_get_int32 (vols, key, &opt_count);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Gsync count not "
+ "specified for %s", volinfo->volname);
+ goto out;
+ }
+
+ ret = import_prdict_dict (vols, volinfo->gsync_slaves, "slave-num",
+ "slave-val", opt_count, volume_prefix);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Unable to import gsync sessions "
+ "specified for %s", volinfo->volname);
+ goto out;
+ }
+
+out:
+ if (msg[0])
+ gf_log ("glusterd", GF_LOG_ERROR, "%s", msg);
+ gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_import_new_brick (dict_t *vols, int32_t vol_count,
+ int32_t brick_count,
+ glusterd_brickinfo_t **brickinfo)
+{
+ char key[512] = {0,};
+ int ret = -1;
+ int32_t snap_status = 0;
+ char *snap_device = NULL;
+ char *hostname = NULL;
+ char *path = NULL;
+ glusterd_brickinfo_t *new_brickinfo = NULL;
+ char msg[2048] = {0};
+
+ GF_ASSERT (vols);
+ GF_ASSERT (vol_count >= 0);
+ GF_ASSERT (brickinfo);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.brick%d.hostname",
+ vol_count, brick_count);
+ ret = dict_get_str (vols, key, &hostname);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "%s missing in payload", key);
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.brick%d.path",
+ vol_count, brick_count);
+ ret = dict_get_str (vols, key, &path);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "%s missing in payload", key);
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "volume%d.brick%d.snap_status",
+ vol_count, brick_count);
+ ret = dict_get_int32 (vols, key, &snap_status);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "%s missing in payload", key);
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "volume%d.brick%d.device_path",
+ vol_count, brick_count);
+ ret = dict_get_str (vols, key, &snap_device);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "%s missing in payload", key);
+ goto out;
+ }
+
+ ret = glusterd_brickinfo_new (&new_brickinfo);
+ if (ret)
+ goto out;
+
+ strcpy (new_brickinfo->path, path);
+ strcpy (new_brickinfo->hostname, hostname);
+ strcpy (new_brickinfo->device_path, snap_device);
+ new_brickinfo->snap_status = snap_status;
+
+ //peerinfo might not be added yet
+ (void) glusterd_resolve_brick (new_brickinfo);
+ ret = 0;
+ *brickinfo = new_brickinfo;
+out:
+ if (msg[0])
+ gf_log ("glusterd", GF_LOG_ERROR, "%s", msg);
+ gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_import_bricks (dict_t *vols, int32_t vol_count,
+ glusterd_volinfo_t *new_volinfo)
+{
+ int ret = -1;
+ int brick_count = 1;
+ glusterd_brickinfo_t *new_brickinfo = NULL;
+
+ GF_ASSERT (vols);
+ GF_ASSERT (vol_count >= 0);
+ GF_ASSERT (new_volinfo);
+ while (brick_count <= new_volinfo->brick_count) {
+
+ ret = glusterd_import_new_brick (vols, vol_count, brick_count,
+ &new_brickinfo);
+ if (ret)
+ goto out;
+ list_add_tail (&new_brickinfo->brick_list, &new_volinfo->bricks);
+ brick_count++;
+ }
+ ret = 0;
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_import_volinfo (dict_t *vols, int count,
+ glusterd_volinfo_t **volinfo)
+{
+ int ret = -1;
+ char key[256] = {0};
+ char *volname = NULL;
+ glusterd_volinfo_t *new_volinfo = NULL;
+ char *volume_id_str = NULL;
+ char msg[2048] = {0};
+ char *src_brick = NULL;
+ char *dst_brick = NULL;
+ char *str = NULL;
+ int rb_status = 0;
+ char *rebalance_id_str = NULL;
+ char *rb_id_str = NULL;
+ int op_version = 0;
+ int client_op_version = 0;
+ uint32_t is_snap_volume = 0;
+
+ GF_ASSERT (vols);
+ GF_ASSERT (volinfo);
+
+ snprintf (key, sizeof (key), "volume%d.name", count);
+ ret = dict_get_str (vols, key, &volname);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "%s missing in payload", key);
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.is_snap_volume", count);
+ ret = dict_get_uint32 (vols, key, &is_snap_volume);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "%s missing in payload for %s",
+ key, volname);
+ goto out;
+ }
+
+ if (is_snap_volume == _gf_true) {
+ gf_log (THIS->name, GF_LOG_DEBUG,
+ "Not syncing snap volume %s", volname);
+ ret = 0;
+ goto out;
+ }
+
+ ret = glusterd_volinfo_new (&new_volinfo);
+ if (ret)
+ goto out;
+ strncpy (new_volinfo->volname, volname, sizeof (new_volinfo->volname));
+
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.type", count);
+ ret = dict_get_int32 (vols, key, &new_volinfo->type);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "%s missing in payload for %s",
+ key, volname);
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.brick_count", count);
+ ret = dict_get_int32 (vols, key, &new_volinfo->brick_count);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "%s missing in payload for %s",
+ key, volname);
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.version", count);
+ ret = dict_get_int32 (vols, key, &new_volinfo->version);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "%s missing in payload for %s",
+ key, volname);
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.status", count);
+ ret = dict_get_int32 (vols, key, (int32_t *)&new_volinfo->status);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "%s missing in payload for %s",
+ key, volname);
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.sub_count", count);
+ ret = dict_get_int32 (vols, key, &new_volinfo->sub_count);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "%s missing in payload for %s",
+ key, volname);
+ goto out;
+ }
+
+ /* not having a 'stripe_count' key is not a error
+ (as peer may be of old version) */
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.stripe_count", count);
+ ret = dict_get_int32 (vols, key, &new_volinfo->stripe_count);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_INFO,
+ "peer is possibly old version");
+
+ /* not having a 'replica_count' key is not a error
+ (as peer may be of old version) */
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.replica_count", count);
+ ret = dict_get_int32 (vols, key, &new_volinfo->replica_count);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_INFO,
+ "peer is possibly old version");
+
+ /* not having a 'dist_count' key is not a error
+ (as peer may be of old version) */
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.dist_count", count);
+ ret = dict_get_int32 (vols, key, &new_volinfo->dist_leaf_count);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_INFO,
+ "peer is possibly old version");
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.ckusm", count);
+ ret = dict_get_uint32 (vols, key, &new_volinfo->cksum);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "%s missing in payload for %s",
+ key, volname);
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.volume_id", count);
+ ret = dict_get_str (vols, key, &volume_id_str);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "%s missing in payload for %s",
+ key, volname);
+ goto out;
+ }
+
+ uuid_parse (volume_id_str, new_volinfo->volume_id);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.username", count);
+ ret = dict_get_str (vols, key, &str);
+ if (!ret) {
+ ret = glusterd_auth_set_username (new_volinfo, str);
+ if (ret)
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.password", count);
+ ret = dict_get_str (vols, key, &str);
+ if (!ret) {
+ ret = glusterd_auth_set_password (new_volinfo, str);
+ if (ret)
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.transport_type", count);
+ ret = dict_get_uint32 (vols, key, &new_volinfo->transport_type);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "%s missing in payload for %s",
+ key, volname);
+ goto out;
+ }
+
+ new_volinfo->is_snap_volume = is_snap_volume;
+
+ snprintf (key, sizeof (key), "volume%d.is_volume_restored", count);
+ ret = dict_get_uint32 (vols, key, &new_volinfo->is_volume_restored);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Failed to get "
+ "is_volume_restored option for %s",
+ volname);
+ goto out;
+ }
+
+ snprintf (key, sizeof (key), "volume%d.snap-max-hard-limit", count);
+ ret = dict_get_uint64 (vols, key, &new_volinfo->snap_max_hard_limit);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "%s missing in payload for %s",
+ key, volname);
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.rebalance", count);
+ ret = dict_get_uint32 (vols, key, &new_volinfo->rebal.defrag_cmd);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "%s missing in payload for %s",
+ key, volname);
+ goto out;
+ }
+
+ if (new_volinfo->rebal.defrag_cmd) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.rebalance-id", count);
+ ret = dict_get_str (vols, key, &rebalance_id_str);
+ if (ret) {
+ /* This is not present in older glusterfs versions,
+ * so don't error out
+ */
+ ret = 0;
+ } else {
+ uuid_parse (rebalance_id_str,
+ new_volinfo->rebal.rebalance_id);
+ }
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.rebalance-op", count);
+ ret = dict_get_uint32 (vols, key,(uint32_t *) &new_volinfo->rebal.op);
+ if (ret) {
+ /* This is not present in older glusterfs versions,
+ * so don't error out
+ */
+ ret = 0;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, 256, "volume%d."GLUSTERD_STORE_KEY_RB_STATUS, count);
+ ret = dict_get_int32 (vols, key, &rb_status);
+ if (ret)
+ goto out;
+ new_volinfo->rep_brick.rb_status = rb_status;
+
+ if (new_volinfo->rep_brick.rb_status > GF_RB_STATUS_NONE) {
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, 256, "volume%d."GLUSTERD_STORE_KEY_RB_SRC_BRICK,
+ count);
+ ret = dict_get_str (vols, key, &src_brick);
+ if (ret)
+ goto out;
+
+ ret = glusterd_brickinfo_new_from_brick (src_brick,
+ &new_volinfo->rep_brick.src_brick);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to create"
+ " src brickinfo");
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, 256, "volume%d."GLUSTERD_STORE_KEY_RB_DST_BRICK,
+ count);
+ ret = dict_get_str (vols, key, &dst_brick);
+ if (ret)
+ goto out;
+
+ ret = glusterd_brickinfo_new_from_brick (dst_brick,
+ &new_volinfo->rep_brick.dst_brick);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to create"
+ " dst brickinfo");
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.rb_id", count);
+ ret = dict_get_str (vols, key, &rb_id_str);
+ if (ret) {
+ /* This is not present in older glusterfs versions,
+ * so don't error out
+ */
+ ret = 0;
+ } else {
+ uuid_parse (rb_id_str, new_volinfo->rep_brick.rb_id);
+ }
+ }
+
+
+ ret = glusterd_import_friend_volume_opts (vols, count, new_volinfo);
+ if (ret)
+ goto out;
+
+ /* Import the volume's op-versions if available else set it to 1.
+ * Not having op-versions implies this informtation was obtained from a
+ * op-version 1 friend (gluster-3.3), ergo the cluster is at op-version
+ * 1 and all volumes are at op-versions 1.
+ *
+ * Either both the volume op-versions should be absent or both should be
+ * present. Only one being present is a failure
+ */
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.op-version", count);
+ ret = dict_get_int32 (vols, key, &op_version);
+ if (ret)
+ ret = 0;
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.client-op-version", count);
+ ret = dict_get_int32 (vols, key, &client_op_version);
+ if (ret)
+ ret = 0;
+
+ if (op_version && client_op_version) {
+ new_volinfo->op_version = op_version;
+ new_volinfo->client_op_version = client_op_version;
+ } else if (((op_version == 0) && (client_op_version != 0)) ||
+ ((op_version != 0) && (client_op_version == 0))) {
+ ret = -1;
+ gf_log ("glusterd", GF_LOG_ERROR,
+ "Only one volume op-version found");
+ goto out;
+ } else {
+ new_volinfo->op_version = 1;
+ new_volinfo->client_op_version = 1;
+ }
+
+ ret = glusterd_import_bricks (vols, count, new_volinfo);
+ if (ret)
+ goto out;
+ *volinfo = new_volinfo;
+out:
+ if (msg[0])
+ gf_log ("glusterd", GF_LOG_ERROR, "%s", msg);
+ gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_volume_disconnect_all_bricks (glusterd_volinfo_t *volinfo)
+{
+ int ret = 0;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ GF_ASSERT (volinfo);
+
+ list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ if (glusterd_is_brick_started (brickinfo)) {
+ ret = glusterd_brick_disconnect (brickinfo);
+ if (ret) {
+ gf_log ("glusterd", GF_LOG_ERROR, "Failed to "
+ "disconnect %s:%s", brickinfo->hostname,
+ brickinfo->path);
+ break;
+ }
+ }
+ }
+
+ return ret;
+}
+
+int32_t
+glusterd_volinfo_copy_brick_portinfo (glusterd_volinfo_t *new_volinfo,
+ glusterd_volinfo_t *old_volinfo)
+{
+ glusterd_brickinfo_t *new_brickinfo = NULL;
+ glusterd_brickinfo_t *old_brickinfo = NULL;
+
+ int ret = 0;
+ GF_ASSERT (new_volinfo);
+ GF_ASSERT (old_volinfo);
+ if (_gf_false == glusterd_is_volume_started (new_volinfo))
+ goto out;
+ list_for_each_entry (new_brickinfo, &new_volinfo->bricks, brick_list) {
+ ret = glusterd_volume_brickinfo_get (new_brickinfo->uuid,
+ new_brickinfo->hostname,
+ new_brickinfo->path,
+ old_volinfo, &old_brickinfo);
+ if ((0 == ret) && glusterd_is_brick_started (old_brickinfo)) {
+ new_brickinfo->port = old_brickinfo->port;
+ }
+ }
+out:
+ ret = 0;
+ return ret;
+}
+
+int32_t
+glusterd_volinfo_stop_stale_bricks (glusterd_volinfo_t *new_volinfo,
+ glusterd_volinfo_t *old_volinfo)
+{
+ glusterd_brickinfo_t *new_brickinfo = NULL;
+ glusterd_brickinfo_t *old_brickinfo = NULL;
+
+ int ret = 0;
+ GF_ASSERT (new_volinfo);
+ GF_ASSERT (old_volinfo);
+ if (_gf_false == glusterd_is_volume_started (old_volinfo))
+ goto out;
+ list_for_each_entry (old_brickinfo, &old_volinfo->bricks, brick_list) {
+ ret = glusterd_volume_brickinfo_get (old_brickinfo->uuid,
+ old_brickinfo->hostname,
+ old_brickinfo->path,
+ new_volinfo, &new_brickinfo);
+ if (ret) {
+ /*TODO: may need to switch to 'atomic' flavour of
+ * brick_stop, once we make peer rpc program also
+ * synctask enabled*/
+ ret = glusterd_brick_stop (old_volinfo, old_brickinfo,
+ _gf_false);
+ if (ret)
+ gf_log ("glusterd", GF_LOG_ERROR, "Failed to "
+ "stop brick %s:%s", old_brickinfo->hostname,
+ old_brickinfo->path);
+ }
+ }
+ ret = 0;
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_delete_stale_volume (glusterd_volinfo_t *stale_volinfo,
+ glusterd_volinfo_t *valid_volinfo)
+{
+ GF_ASSERT (stale_volinfo);
+ GF_ASSERT (valid_volinfo);
+
+ /* If stale volume is in started state, copy the port numbers of the
+ * local bricks if they exist in the valid volume information.
+ * stop stale bricks. Stale volume information is going to be deleted.
+ * Which deletes the valid brick information inside stale volinfo.
+ * We dont want brick_rpc_notify to access already deleted brickinfo.
+ * Disconnect all bricks from stale_volinfo (unconditionally), since
+ * they are being deleted subsequently.
+ */
+ if (glusterd_is_volume_started (stale_volinfo)) {
+ if (glusterd_is_volume_started (valid_volinfo)) {
+ (void) glusterd_volinfo_stop_stale_bricks (valid_volinfo,
+ stale_volinfo);
+ //Only valid bricks will be running now.
+ (void) glusterd_volinfo_copy_brick_portinfo (valid_volinfo,
+ stale_volinfo);
+
+ } else {
+ (void) glusterd_stop_bricks (stale_volinfo);
+ }
+
+ (void) glusterd_volume_disconnect_all_bricks (stale_volinfo);
+ }
+ /* Delete all the bricks and stores and vol files. They will be created
+ * again by the valid_volinfo. Volume store delete should not be
+ * performed because some of the bricks could still be running,
+ * keeping pid files under run directory
+ */
+ (void) glusterd_delete_all_bricks (stale_volinfo);
+ if (stale_volinfo->shandle) {
+ unlink (stale_volinfo->shandle->path);
+ (void) gf_store_handle_destroy (stale_volinfo->shandle);
+ stale_volinfo->shandle = NULL;
+ }
+ (void) glusterd_volinfo_delete (stale_volinfo);
+ return 0;
+}
+
+int32_t
+glusterd_import_friend_volume (dict_t *vols, size_t count)
+{
+
+ int32_t ret = -1;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ glusterd_volinfo_t *old_volinfo = NULL;
+ glusterd_volinfo_t *new_volinfo = NULL;
+
+ GF_ASSERT (vols);
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+ ret = glusterd_import_volinfo (vols, count, &new_volinfo);
+ if (ret)
+ goto out;
+
+ if (!new_volinfo) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Not importing snap volume");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (new_volinfo->volname, &old_volinfo);
+ if (0 == ret) {
+ (void) glusterd_delete_stale_volume (old_volinfo, new_volinfo);
+ }
+
+ if (glusterd_is_volume_started (new_volinfo)) {
+ (void) glusterd_start_bricks (new_volinfo);
+ }
+
+ ret = glusterd_store_volinfo (new_volinfo, GLUSTERD_VOLINFO_VER_AC_NONE);
+ ret = glusterd_create_volfiles_and_notify_services (new_volinfo);
+ if (ret)
+ goto out;
+
+ list_add_tail (&new_volinfo->vol_list, &priv->volumes);
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning with ret: %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_import_friend_volumes (dict_t *vols)
+{
+ int32_t ret = -1;
+ int32_t count = 0;
+ int i = 1;
+
+ GF_ASSERT (vols);
+
+ ret = dict_get_int32 (vols, "count", &count);
+ if (ret)
+ goto out;
+
+ while (i <= count) {
+ ret = glusterd_import_friend_volume (vols, i);
+ if (ret)
+ goto out;
+ i++;
+ }
+
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+ return ret;
+}
+
+int
+glusterd_get_global_opt_version (dict_t *opts, uint32_t *version)
+{
+ int ret = -1;
+ char *version_str = NULL;
+
+ ret = dict_get_str (opts, GLUSTERD_GLOBAL_OPT_VERSION, &version_str);
+ if (ret)
+ goto out;
+
+ ret = gf_string2uint (version_str, version);
+ if (ret)
+ goto out;
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+glusterd_get_next_global_opt_version_str (dict_t *opts, char **version_str)
+{
+ int ret = -1;
+ char version_string[64] = {0};
+ uint32_t version = 0;
+
+ ret = glusterd_get_global_opt_version (opts, &version);
+ if (ret)
+ goto out;
+ version++;
+ snprintf (version_string, sizeof (version_string), "%"PRIu32, version);
+ *version_str = gf_strdup (version_string);
+ if (*version_str)
+ ret = 0;
+out:
+ return ret;
+}
+
+int32_t
+glusterd_import_global_opts (dict_t *friend_data)
+{
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ int ret = -1;
+ dict_t *import_options = NULL;
+ int count = 0;
+ uint32_t local_version = 0;
+ uint32_t remote_version = 0;
+
+ this = THIS;
+ conf = this->private;
+
+ ret = dict_get_int32 (friend_data, "global-opt-count", &count);
+ if (ret) {
+ //old version peer
+ ret = 0;
+ goto out;
+ }
+
+ import_options = dict_new ();
+ if (!import_options)
+ goto out;
+ ret = import_prdict_dict (friend_data, import_options, "key", "val",
+ count, "global");
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to import"
+ " global options");
+ goto out;
+ }
+
+ ret = glusterd_get_global_opt_version (conf->opts, &local_version);
+ if (ret)
+ goto out;
+ ret = glusterd_get_global_opt_version (import_options, &remote_version);
+ if (ret)
+ goto out;
+ if (remote_version > local_version) {
+ ret = glusterd_store_options (this, import_options);
+ if (ret)
+ goto out;
+ dict_unref (conf->opts);
+ conf->opts = dict_ref (import_options);
+ }
+ ret = 0;
+out:
+ if (import_options)
+ dict_unref (import_options);
+ return ret;
+}
+
+int32_t
+glusterd_compare_friend_data (dict_t *vols, int32_t *status, char *hostname)
+{
+ int32_t ret = -1;
+ int32_t count = 0;
+ int i = 1;
+ gf_boolean_t update = _gf_false;
+ gf_boolean_t stale_nfs = _gf_false;
+ gf_boolean_t stale_shd = _gf_false;
+
+ GF_ASSERT (vols);
+ GF_ASSERT (status);
+
+ ret = dict_get_int32 (vols, "count", &count);
+ if (ret)
+ goto out;
+
+ while (i <= count) {
+ ret = glusterd_compare_friend_volume (vols, i, status,
+ hostname);
+ if (ret)
+ goto out;
+
+ if (GLUSTERD_VOL_COMP_RJT == *status) {
+ ret = 0;
+ goto out;
+ }
+ if (GLUSTERD_VOL_COMP_UPDATE_REQ == *status)
+ update = _gf_true;
+
+ i++;
+ }
+
+ if (update) {
+ if (glusterd_is_nodesvc_running ("nfs"))
+ stale_nfs = _gf_true;
+ if (glusterd_is_nodesvc_running ("glustershd"))
+ stale_shd = _gf_true;
+ ret = glusterd_import_global_opts (vols);
+ if (ret)
+ goto out;
+ ret = glusterd_import_friend_volumes (vols);
+ if (ret)
+ goto out;
+ if (_gf_false == glusterd_are_all_volumes_stopped ()) {
+ ret = glusterd_nodesvcs_handle_graph_change (NULL);
+ } else {
+ if (stale_nfs)
+ glusterd_nfs_server_stop ();
+ if (stale_shd)
+ glusterd_shd_stop ();
+ }
+ }
+
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning with ret: %d, status: %d",
+ ret, *status);
+
+ return ret;
+}
+
+/* Valid only in if service is 'local' to glusterd.
+ * pid can be -1, if reading pidfile failed */
+gf_boolean_t
+glusterd_is_service_running (char *pidfile, int *pid)
+{
+ FILE *file = NULL;
+ gf_boolean_t running = _gf_false;
+ int ret = 0;
+ int fno = 0;
+
+ file = fopen (pidfile, "r+");
+ if (!file)
+ goto out;
+
+ fno = fileno (file);
+ ret = lockf (fno, F_TEST, 0);
+ if (ret == -1)
+ running = _gf_true;
+ if (!pid)
+ goto out;
+
+ ret = fscanf (file, "%d", pid);
+ if (ret <= 0) {
+ gf_log ("", GF_LOG_ERROR, "Unable to read pidfile: %s, %s",
+ pidfile, strerror (errno));
+ *pid = -1;
+ }
+
+out:
+ if (file)
+ fclose (file);
+ return running;
+}
+
+void
+glusterd_get_nodesvc_dir (char *server, char *workdir,
+ char *path, size_t len)
+{
+ GF_ASSERT (len == PATH_MAX);
+ snprintf (path, len, "%s/%s", workdir, server);
+}
+
+void
+glusterd_get_nodesvc_rundir (char *server, char *workdir,
+ char *path, size_t len)
+{
+ char dir[PATH_MAX] = {0};
+ GF_ASSERT (len == PATH_MAX);
+
+ glusterd_get_nodesvc_dir (server, workdir, dir, sizeof (dir));
+ snprintf (path, len, "%s/run", dir);
+}
+
+void
+glusterd_get_nodesvc_pidfile (char *server, char *workdir,
+ char *path, size_t len)
+{
+ char dir[PATH_MAX] = {0};
+ GF_ASSERT (len == PATH_MAX);
+
+ glusterd_get_nodesvc_rundir (server, workdir, dir, sizeof (dir));
+ snprintf (path, len, "%s/%s.pid", dir, server);
+}
+
+void
+glusterd_get_nodesvc_volfile (char *server, char *workdir,
+ char *volfile, size_t len)
+{
+ char dir[PATH_MAX] = {0,};
+ GF_ASSERT (len == PATH_MAX);
+
+ glusterd_get_nodesvc_dir (server, workdir, dir, sizeof (dir));
+ snprintf (volfile, len, "%s/%s-server.vol", dir, server);
+}
+
+void
+glusterd_nodesvc_set_online_status (char *server, gf_boolean_t status)
+{
+ glusterd_conf_t *priv = NULL;
+
+ GF_ASSERT (server);
+ priv = THIS->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (priv->shd);
+ GF_ASSERT (priv->nfs);
+
+ if (!strcmp("glustershd", server))
+ priv->shd->online = status;
+ else if (!strcmp ("nfs", server))
+ priv->nfs->online = status;
+}
+
+gf_boolean_t
+glusterd_is_nodesvc_online (char *server)
+{
+ glusterd_conf_t *conf = NULL;
+ gf_boolean_t online = _gf_false;
+
+ GF_ASSERT (server);
+ conf = THIS->private;
+ GF_ASSERT (conf);
+ GF_ASSERT (conf->shd);
+ GF_ASSERT (conf->nfs);
+
+ if (!strcmp (server, "glustershd"))
+ online = conf->shd->online;
+ else if (!strcmp (server, "nfs"))
+ online = conf->nfs->online;
+
+ return online;
+}
+
+int32_t
+glusterd_nodesvc_set_socket_filepath (char *rundir, uuid_t uuid,
+ char *socketpath, int len)
+{
+ char sockfilepath[PATH_MAX] = {0,};
+
+ snprintf (sockfilepath, sizeof (sockfilepath), "%s/run-%s",
+ rundir, uuid_utoa (uuid));
+
+ glusterd_set_socket_filepath (sockfilepath, socketpath, len);
+ return 0;
+}
+
+struct rpc_clnt*
+glusterd_pending_node_get_rpc (glusterd_pending_node_t *pending_node)
+{
+ struct rpc_clnt *rpc = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ nodesrv_t *shd = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ nodesrv_t *nfs = NULL;
+
+ GF_VALIDATE_OR_GOTO (THIS->name, pending_node, out);
+ GF_VALIDATE_OR_GOTO (THIS->name, pending_node->node, out);
+
+ if (pending_node->type == GD_NODE_BRICK) {
+ brickinfo = pending_node->node;
+ rpc = brickinfo->rpc;
+
+ } else if (pending_node->type == GD_NODE_SHD) {
+ shd = pending_node->node;
+ rpc = shd->rpc;
+
+ } else if (pending_node->type == GD_NODE_REBALANCE) {
+ volinfo = pending_node->node;
+ if (volinfo->rebal.defrag)
+ rpc = volinfo->rebal.defrag->rpc;
+
+ } else if (pending_node->type == GD_NODE_NFS) {
+ nfs = pending_node->node;
+ rpc = nfs->rpc;
+
+ } else {
+ GF_ASSERT (0);
+ }
+
+out:
+ return rpc;
+}
+
+struct rpc_clnt*
+glusterd_nodesvc_get_rpc (char *server)
+{
+ glusterd_conf_t *priv = NULL;
+ struct rpc_clnt *rpc = NULL;
+
+ GF_ASSERT (server);
+ priv = THIS->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (priv->shd);
+ GF_ASSERT (priv->nfs);
+
+ if (!strcmp (server, "glustershd"))
+ rpc = priv->shd->rpc;
+ else if (!strcmp (server, "nfs"))
+ rpc = priv->nfs->rpc;
+
+ return rpc;
+}
+
+int32_t
+glusterd_nodesvc_set_rpc (char *server, struct rpc_clnt *rpc)
+{
+ int ret = 0;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+ GF_ASSERT (priv->shd);
+ GF_ASSERT (priv->nfs);
+
+ if (!strcmp ("glustershd", server))
+ priv->shd->rpc = rpc;
+ else if (!strcmp ("nfs", server))
+ priv->nfs->rpc = rpc;
+
+ return ret;
+}
+
+int32_t
+glusterd_nodesvc_connect (char *server, char *socketpath) {
+ int ret = 0;
+ dict_t *options = NULL;
+ struct rpc_clnt *rpc = NULL;
+ glusterd_conf_t *priv = THIS->private;
+
+ rpc = glusterd_nodesvc_get_rpc (server);
+
+ if (rpc == NULL) {
+ /* Setting frame-timeout to 10mins (600seconds).
+ * Unix domain sockets ensures that the connection is reliable.
+ * The default timeout of 30mins used for unreliable network
+ * connections is too long for unix domain socket connections.
+ */
+ ret = rpc_transport_unix_options_build (&options, socketpath,
+ 600);
+ if (ret)
+ goto out;
+ synclock_unlock (&priv->big_lock);
+ ret = glusterd_rpc_create (&rpc, options,
+ glusterd_nodesvc_rpc_notify,
+ server);
+ synclock_lock (&priv->big_lock);
+ if (ret)
+ goto out;
+ (void) glusterd_nodesvc_set_rpc (server, rpc);
+ }
+out:
+ return ret;
+}
+
+int32_t
+glusterd_nodesvc_disconnect (char *server)
+{
+ struct rpc_clnt *rpc = NULL;
+
+ rpc = glusterd_nodesvc_get_rpc (server);
+ (void)glusterd_nodesvc_set_rpc (server, NULL);
+
+ if (rpc)
+ rpc_clnt_unref (rpc);
+
+ return 0;
+}
+
+int32_t
+glusterd_nodesvc_start (char *server)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ runner_t runner = {0,};
+ char pidfile[PATH_MAX] = {0,};
+ char logfile[PATH_MAX] = {0,};
+ char volfile[PATH_MAX] = {0,};
+ char rundir[PATH_MAX] = {0,};
+ char sockfpath[PATH_MAX] = {0,};
+ char volfileid[256] = {0};
+ char glusterd_uuid_option[1024] = {0};
+ char valgrind_logfile[PATH_MAX] = {0};
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ priv = this->private;
+
+ glusterd_get_nodesvc_rundir (server, priv->workdir,
+ rundir, sizeof (rundir));
+ ret = mkdir (rundir, 0777);
+
+ if ((ret == -1) && (EEXIST != errno)) {
+ gf_log ("", GF_LOG_ERROR, "Unable to create rundir %s",
+ rundir);
+ goto out;
+ }
+
+ glusterd_get_nodesvc_pidfile (server, priv->workdir,
+ pidfile, sizeof (pidfile));
+ glusterd_get_nodesvc_volfile (server, priv->workdir,
+ volfile, sizeof (volfile));
+ ret = access (volfile, F_OK);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "%s Volfile %s is not present",
+ server, volfile);
+ goto out;
+ }
+
+ snprintf (logfile, PATH_MAX, "%s/%s.log", DEFAULT_LOG_FILE_DIRECTORY,
+ server);
+ snprintf (volfileid, sizeof (volfileid), "gluster/%s", server);
+
+ glusterd_nodesvc_set_socket_filepath (rundir, MY_UUID,
+ sockfpath, sizeof (sockfpath));
+
+ runinit (&runner);
+
+ if (priv->valgrind) {
+ snprintf (valgrind_logfile, PATH_MAX,
+ "%s/valgrind-%s.log",
+ DEFAULT_LOG_FILE_DIRECTORY,
+ server);
+
+ runner_add_args (&runner, "valgrind", "--leak-check=full",
+ "--trace-children=yes", "--track-origins=yes",
+ NULL);
+ runner_argprintf (&runner, "--log-file=%s", valgrind_logfile);
+ }
+
+ runner_add_args (&runner, SBIN_DIR"/glusterfs",
+ "-s", "localhost",
+ "--volfile-id", volfileid,
+ "-p", pidfile,
+ "-l", logfile,
+ "-S", sockfpath, NULL);
+
+ if (!strcmp (server, "glustershd")) {
+ snprintf (glusterd_uuid_option, sizeof (glusterd_uuid_option),
+ "*replicate*.node-uuid=%s", uuid_utoa (MY_UUID));
+ runner_add_args (&runner, "--xlator-option",
+ glusterd_uuid_option, NULL);
+ }
+ runner_log (&runner, "", GF_LOG_DEBUG,
+ "Starting the nfs/glustershd services");
+
+ ret = runner_run_nowait (&runner);
+ if (ret == 0) {
+ glusterd_nodesvc_connect (server, sockfpath);
+ }
+out:
+ return ret;
+}
+
+int
+glusterd_nfs_server_start ()
+{
+ return glusterd_nodesvc_start ("nfs");
+}
+
+int
+glusterd_shd_start ()
+{
+ return glusterd_nodesvc_start ("glustershd");
+}
+
+gf_boolean_t
+glusterd_is_nodesvc_running (char *server)
+{
+ char pidfile[PATH_MAX] = {0,};
+ glusterd_conf_t *priv = THIS->private;
+
+ glusterd_get_nodesvc_pidfile (server, priv->workdir,
+ pidfile, sizeof (pidfile));
+ return glusterd_is_service_running (pidfile, NULL);
+}
+
+int32_t
+glusterd_nodesvc_unlink_socket_file (char *server)
+{
+ int ret = 0;
+ char sockfpath[PATH_MAX] = {0,};
+ char rundir[PATH_MAX] = {0,};
+ glusterd_conf_t *priv = THIS->private;
+
+ glusterd_get_nodesvc_rundir (server, priv->workdir,
+ rundir, sizeof (rundir));
+
+ glusterd_nodesvc_set_socket_filepath (rundir, MY_UUID,
+ sockfpath, sizeof (sockfpath));
+
+ ret = unlink (sockfpath);
+ if (ret && (ENOENT == errno)) {
+ ret = 0;
+ } else {
+ gf_log (THIS->name, GF_LOG_ERROR, "Failed to remove %s"
+ " error: %s", sockfpath, strerror (errno));
+ }
+
+ return ret;
+}
+
+int32_t
+glusterd_nodesvc_stop (char *server, int sig)
+{
+ char pidfile[PATH_MAX] = {0,};
+ glusterd_conf_t *priv = THIS->private;
+ int ret = 0;
+
+ if (!glusterd_is_nodesvc_running (server))
+ goto out;
+
+ (void)glusterd_nodesvc_disconnect (server);
+
+ glusterd_get_nodesvc_pidfile (server, priv->workdir,
+ pidfile, sizeof (pidfile));
+ ret = glusterd_service_stop (server, pidfile, sig, _gf_true);
+
+ if (ret == 0) {
+ glusterd_nodesvc_set_online_status (server, _gf_false);
+ (void)glusterd_nodesvc_unlink_socket_file (server);
+ }
+out:
+ return ret;
+}
+
+void
+glusterd_nfs_pmap_deregister ()
+{
+ if (pmap_unset (MOUNT_PROGRAM, MOUNTV3_VERSION))
+ gf_log ("", GF_LOG_INFO, "De-registered MOUNTV3 successfully");
+ else
+ gf_log ("", GF_LOG_ERROR, "De-register MOUNTV3 is unsuccessful");
+
+ if (pmap_unset (MOUNT_PROGRAM, MOUNTV1_VERSION))
+ gf_log ("", GF_LOG_INFO, "De-registered MOUNTV1 successfully");
+ else
+ gf_log ("", GF_LOG_ERROR, "De-register MOUNTV1 is unsuccessful");
+
+ if (pmap_unset (NFS_PROGRAM, NFSV3_VERSION))
+ gf_log ("", GF_LOG_INFO, "De-registered NFSV3 successfully");
+ else
+ gf_log ("", GF_LOG_ERROR, "De-register NFSV3 is unsuccessful");
+
+ if (pmap_unset (NLM_PROGRAM, NLMV4_VERSION))
+ gf_log ("", GF_LOG_INFO, "De-registered NLM v4 successfully");
+ else
+ gf_log ("", GF_LOG_ERROR, "De-registration of NLM v4 failed");
+
+ if (pmap_unset (NLM_PROGRAM, NLMV1_VERSION))
+ gf_log ("", GF_LOG_INFO, "De-registered NLM v1 successfully");
+ else
+ gf_log ("", GF_LOG_ERROR, "De-registration of NLM v1 failed");
+
+ if (pmap_unset (ACL_PROGRAM, ACLV3_VERSION))
+ gf_log ("", GF_LOG_INFO, "De-registered ACL v3 successfully");
+ else
+ gf_log ("", GF_LOG_ERROR, "De-registration of ACL v3 failed");
+}
+
+int
+glusterd_nfs_server_stop ()
+{
+ int ret = 0;
+ gf_boolean_t deregister = _gf_false;
+
+ if (glusterd_is_nodesvc_running ("nfs"))
+ deregister = _gf_true;
+ ret = glusterd_nodesvc_stop ("nfs", SIGKILL);
+ if (ret)
+ goto out;
+ if (deregister)
+ glusterd_nfs_pmap_deregister ();
+out:
+ return ret;
+}
+
+int
+glusterd_shd_stop ()
+{
+ return glusterd_nodesvc_stop ("glustershd", SIGTERM);
+}
+
+int
+glusterd_add_node_to_dict (char *server, dict_t *dict, int count,
+ dict_t *vol_opts)
+{
+ int ret = -1;
+ glusterd_conf_t *priv = THIS->private;
+ char pidfile[PATH_MAX] = {0,};
+ gf_boolean_t running = _gf_false;
+ int pid = -1;
+ int port = 0;
+ char key[1024] = {0,};
+
+ glusterd_get_nodesvc_pidfile (server, priv->workdir, pidfile,
+ sizeof (pidfile));
+ //Consider service to be running only when glusterd sees it Online
+ if (glusterd_is_nodesvc_online (server))
+ running = glusterd_is_service_running (pidfile, &pid);
+
+ /* For nfs-servers/self-heal-daemon setting
+ * brick<n>.hostname = "NFS Server" / "Self-heal Daemon"
+ * brick<n>.path = uuid
+ * brick<n>.port = 0
+ *
+ * This might be confusing, but cli displays the name of
+ * the brick as hostname+path, so this will make more sense
+ * when output.
+ */
+ snprintf (key, sizeof (key), "brick%d.hostname", count);
+ if (!strcmp (server, "nfs"))
+ ret = dict_set_str (dict, key, "NFS Server");
+ else if (!strcmp (server, "glustershd"))
+ ret = dict_set_str (dict, key, "Self-heal Daemon");
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.path", count);
+ ret = dict_set_dynstr (dict, key, gf_strdup (uuid_utoa (MY_UUID)));
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.port", count);
+ /* Port is available only for the NFS server.
+ * Self-heal daemon doesn't provide any port for access
+ * by entities other than gluster.
+ */
+ if (!strcmp (server, "nfs")) {
+ if (dict_get (vol_opts, "nfs.port")) {
+ ret = dict_get_int32 (vol_opts, "nfs.port", &port);
+ if (ret)
+ goto out;
+ } else
+ port = GF_NFS3_PORT;
+ }
+ ret = dict_set_int32 (dict, key, port);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.pid", count);
+ ret = dict_set_int32 (dict, key, pid);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.status", count);
+ ret = dict_set_int32 (dict, key, running);
+ if (ret)
+ goto out;
+
+
+out:
+ gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_remote_hostname_get (rpcsvc_request_t *req, char *remote_host, int len)
+{
+ GF_ASSERT (req);
+ GF_ASSERT (remote_host);
+ GF_ASSERT (req->trans);
+
+ char *name = NULL;
+ char *hostname = NULL;
+ char *tmp_host = NULL;
+ int ret = 0;
+
+ name = req->trans->peerinfo.identifier;
+ tmp_host = gf_strdup (name);
+ if (tmp_host)
+ get_host_name (tmp_host, &hostname);
+
+ GF_ASSERT (hostname);
+ if (!hostname) {
+ memset (remote_host, 0, len);
+ ret = -1;
+ goto out;
+ }
+
+ strncpy (remote_host, hostname, strlen (hostname));
+
+
+out:
+ GF_FREE (tmp_host);
+ return ret;
+}
+
+int
+glusterd_check_generate_start_service (int (*create_volfile) (),
+ int (*stop) (), int (*start) ())
+{
+ int ret = -1;
+
+ ret = create_volfile ();
+ if (ret)
+ goto out;
+
+ ret = stop ();
+ if (ret)
+ goto out;
+
+ ret = start ();
+out:
+ return ret;
+}
+
+int
+glusterd_reconfigure_nodesvc (int (*create_volfile) ())
+{
+ int ret = -1;
+
+ ret = create_volfile ();
+ if (ret)
+ goto out;
+
+ ret = glusterd_fetchspec_notify (THIS);
+out:
+ return ret;
+}
+
+int
+glusterd_reconfigure_shd ()
+{
+ int (*create_volfile) () = glusterd_create_shd_volfile;
+ return glusterd_reconfigure_nodesvc (create_volfile);
+}
+
+int
+glusterd_reconfigure_nfs ()
+{
+ int ret = -1;
+ gf_boolean_t identical = _gf_false;
+
+ /*
+ * Check both OLD and NEW volfiles, if they are SAME by size
+ * and cksum i.e. "character-by-character". If YES, then
+ * NOTHING has been changed, just return.
+ */
+ ret = glusterd_check_nfs_volfile_identical (&identical);
+ if (ret)
+ goto out;
+
+ if (identical) {
+ ret = 0;
+ goto out;
+ }
+
+ /*
+ * They are not identical. Find out if the topology is changed
+ * OR just the volume options. If just the options which got
+ * changed, then inform the xlator to reconfigure the options.
+ */
+ identical = _gf_false; /* RESET the FLAG */
+ ret = glusterd_check_nfs_topology_identical (&identical);
+ if (ret)
+ goto out;
+
+ /* Topology is not changed, but just the options. But write the
+ * options to NFS volfile, so that NFS will be reconfigured.
+ */
+ if (identical) {
+ ret = glusterd_create_nfs_volfile();
+ if (ret == 0) {/* Only if above PASSES */
+ ret = glusterd_fetchspec_notify (THIS);
+ }
+ goto out;
+ }
+
+ /*
+ * NFS volfile's topology has been changed. NFS server needs
+ * to be RESTARTED to ACT on the changed volfile.
+ */
+ ret = glusterd_check_generate_start_nfs ();
+
+out:
+ return ret;
+}
+
+
+int
+glusterd_check_generate_start_nfs ()
+{
+ int ret = 0;
+
+ ret = glusterd_check_generate_start_service (glusterd_create_nfs_volfile,
+ glusterd_nfs_server_stop,
+ glusterd_nfs_server_start);
+ return ret;
+}
+
+int
+glusterd_check_generate_start_shd ()
+{
+ int ret = 0;
+
+ ret = glusterd_check_generate_start_service (glusterd_create_shd_volfile,
+ glusterd_shd_stop,
+ glusterd_shd_start);
+ if (ret == -EINVAL)
+ ret = 0;
+ return ret;
+}
+
+int
+glusterd_nodesvcs_batch_op (glusterd_volinfo_t *volinfo,
+ int (*nfs_op) (), int (*shd_op) ())
+{
+ int ret = 0;
+
+ ret = nfs_op ();
+ if (ret)
+ goto out;
+
+ if (volinfo && !glusterd_is_volume_replicate (volinfo))
+ goto out;
+
+ ret = shd_op ();
+ if (ret)
+ goto out;
+out:
+ return ret;
+}
+
+int
+glusterd_nodesvcs_start (glusterd_volinfo_t *volinfo)
+{
+ return glusterd_nodesvcs_batch_op (volinfo,
+ glusterd_nfs_server_start,
+ glusterd_shd_start);
+}
+
+int
+glusterd_nodesvcs_stop (glusterd_volinfo_t *volinfo)
+{
+ return glusterd_nodesvcs_batch_op (volinfo,
+ glusterd_nfs_server_stop,
+ glusterd_shd_stop);
+}
+
+gf_boolean_t
+glusterd_are_all_volumes_stopped ()
+{
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ glusterd_volinfo_t *voliter = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ list_for_each_entry (voliter, &priv->volumes, vol_list) {
+ if (voliter->status == GLUSTERD_STATUS_STARTED)
+ return _gf_false;
+ }
+
+ return _gf_true;
+
+}
+
+gf_boolean_t
+glusterd_all_replicate_volumes_stopped ()
+{
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ glusterd_volinfo_t *voliter = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ list_for_each_entry (voliter, &priv->volumes, vol_list) {
+ if (!glusterd_is_volume_replicate (voliter))
+ continue;
+ if (voliter->status == GLUSTERD_STATUS_STARTED)
+ return _gf_false;
+ }
+
+ return _gf_true;
+}
+
+int
+glusterd_nodesvcs_handle_graph_change (glusterd_volinfo_t *volinfo)
+{
+ int (*shd_op) () = NULL;
+ int (*nfs_op) () = NULL;
+
+ shd_op = glusterd_check_generate_start_shd;
+ nfs_op = glusterd_check_generate_start_nfs;
+ if (glusterd_are_all_volumes_stopped ()) {
+ shd_op = glusterd_shd_stop;
+ nfs_op = glusterd_nfs_server_stop;
+ } else if (glusterd_all_replicate_volumes_stopped()) {
+ shd_op = glusterd_shd_stop;
+ }
+ return glusterd_nodesvcs_batch_op (volinfo, nfs_op, shd_op);
+}
+
+int
+glusterd_nodesvcs_handle_reconfigure (glusterd_volinfo_t *volinfo)
+{
+ return glusterd_nodesvcs_batch_op (volinfo,
+ glusterd_reconfigure_nfs,
+ glusterd_reconfigure_shd);
+}
+
+int
+glusterd_volume_count_get (void)
+{
+ glusterd_volinfo_t *tmp_volinfo = NULL;
+ int32_t ret = 0;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+
+ list_for_each_entry (tmp_volinfo, &priv->volumes, vol_list) {
+ ret++;
+ }
+
+
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+
+}
+
+int
+glusterd_brickinfo_get (uuid_t uuid, char *hostname, char *path,
+ glusterd_brickinfo_t **brickinfo)
+{
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ int ret = -1;
+
+ GF_ASSERT (path);
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+
+ list_for_each_entry (volinfo, &priv->volumes, vol_list) {
+
+ ret = glusterd_volume_brickinfo_get (uuid, hostname, path,
+ volinfo, brickinfo);
+ if (ret == 0)
+ /*Found*/
+ goto out;
+ }
+out:
+ return ret;
+}
+
+int
+glusterd_brick_start (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo,
+ gf_boolean_t wait)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ if ((!brickinfo) || (!volinfo))
+ goto out;
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ if (uuid_is_null (brickinfo->uuid)) {
+ ret = glusterd_resolve_brick (brickinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, FMTSTR_RESOLVE_BRICK,
+ brickinfo->hostname, brickinfo->path);
+ goto out;
+ }
+ }
+
+ if (uuid_compare (brickinfo->uuid, MY_UUID)) {
+ ret = 0;
+ goto out;
+ }
+ ret = glusterd_volume_start_glusterfs (volinfo, brickinfo, wait);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to start brick %s:%s",
+ brickinfo->hostname, brickinfo->path);
+ goto out;
+ }
+
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "returning %d ", ret);
+ return ret;
+}
+
+int
+glusterd_restart_bricks (glusterd_conf_t *conf)
+{
+ int ret = 0;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_snap_t *snap = NULL;
+ gf_boolean_t start_nodesvcs = _gf_false;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ list_for_each_entry (volinfo, &conf->volumes, vol_list) {
+ if (volinfo->status != GLUSTERD_STATUS_STARTED)
+ continue;
+ start_nodesvcs = _gf_true;
+ gf_log (this->name, GF_LOG_DEBUG, "starting the volume %s",
+ volinfo->volname);
+ list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ glusterd_brick_start (volinfo, brickinfo, _gf_false);
+ }
+ }
+
+ list_for_each_entry (snap, &conf->snapshots, snap_list) {
+ list_for_each_entry (volinfo, &snap->volumes, vol_list) {
+ if (volinfo->status != GLUSTERD_STATUS_STARTED)
+ continue;
+ start_nodesvcs = _gf_true;
+ gf_log (this->name, GF_LOG_DEBUG, "starting the snap "
+ "volume %s", volinfo->volname);
+ list_for_each_entry (brickinfo, &volinfo->bricks,
+ brick_list) {
+ glusterd_brick_start (volinfo, brickinfo,
+ _gf_false);
+ }
+ }
+ }
+
+ if (start_nodesvcs)
+ glusterd_nodesvcs_handle_graph_change (NULL);
+
+ return ret;
+}
+
+int
+_local_gsyncd_start (dict_t *this, char *key, data_t *value, void *data)
+{
+ char *path_list = NULL;
+ char *slave = NULL;
+ int uuid_len = 0;
+ int ret = 0;
+ char uuid_str[64] = {0};
+ glusterd_volinfo_t *volinfo = NULL;
+ char *conf_path = NULL;
+
+ volinfo = data;
+ GF_ASSERT (volinfo);
+ slave = strchr(value->data, ':');
+ if (slave)
+ slave ++;
+ else
+ return 0;
+ uuid_len = (slave - value->data - 1);
+
+ strncpy (uuid_str, (char*)value->data, uuid_len);
+
+ ret = glusterd_get_local_brickpaths (volinfo, &path_list);
+
+ ret = dict_get_str (this, "conf_path", &conf_path);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to fetch conf file path.");
+ goto out;
+ }
+
+ glusterd_start_gsync (volinfo, slave, path_list, conf_path,
+ uuid_str, NULL);
+
+ GF_FREE (path_list);
+ path_list = NULL;
+
+out:
+ return ret;
+}
+
+int
+glusterd_volume_restart_gsyncds (glusterd_volinfo_t *volinfo)
+{
+ GF_ASSERT (volinfo);
+
+ dict_foreach (volinfo->gsync_slaves, _local_gsyncd_start, volinfo);
+ return 0;
+}
+
+int
+glusterd_restart_gsyncds (glusterd_conf_t *conf)
+{
+ glusterd_volinfo_t *volinfo = NULL;
+ int ret = 0;
+
+ list_for_each_entry (volinfo, &conf->volumes, vol_list) {
+ glusterd_volume_restart_gsyncds (volinfo);
+ }
+ return ret;
+}
+
+inline int
+glusterd_get_dist_leaf_count (glusterd_volinfo_t *volinfo)
+{
+ int rcount = volinfo->replica_count;
+ int scount = volinfo->stripe_count;
+
+ return (rcount ? rcount : 1) * (scount ? scount : 1);
+}
+
+int
+glusterd_get_brickinfo (xlator_t *this, const char *brickname, int port,
+ gf_boolean_t localhost, glusterd_brickinfo_t **brickinfo)
+{
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *tmpbrkinfo = NULL;
+ int ret = -1;
+
+ GF_ASSERT (brickname);
+ GF_ASSERT (this);
+
+ priv = this->private;
+ list_for_each_entry (volinfo, &priv->volumes, vol_list) {
+ list_for_each_entry (tmpbrkinfo, &volinfo->bricks,
+ brick_list) {
+ if (localhost && !gf_is_local_addr (tmpbrkinfo->hostname))
+ continue;
+ if (!strcmp(tmpbrkinfo->path, brickname) &&
+ (tmpbrkinfo->port == port)) {
+ *brickinfo = tmpbrkinfo;
+ return 0;
+ }
+ }
+ }
+ return ret;
+}
+
+glusterd_brickinfo_t*
+glusterd_get_brickinfo_by_position (glusterd_volinfo_t *volinfo, uint32_t pos)
+{
+ glusterd_brickinfo_t *tmpbrkinfo = NULL;
+
+ list_for_each_entry (tmpbrkinfo, &volinfo->bricks,
+ brick_list) {
+ if (pos == 0)
+ return tmpbrkinfo;
+ pos--;
+ }
+ return NULL;
+}
+
+void
+glusterd_set_brick_status (glusterd_brickinfo_t *brickinfo,
+ gf_brick_status_t status)
+{
+ GF_ASSERT (brickinfo);
+ brickinfo->status = status;
+ if (GF_BRICK_STARTED == status) {
+ gf_log ("glusterd", GF_LOG_DEBUG, "Setting brick %s:%s status "
+ "to started", brickinfo->hostname, brickinfo->path);
+ } else {
+ gf_log ("glusterd", GF_LOG_DEBUG, "Setting brick %s:%s status "
+ "to stopped", brickinfo->hostname, brickinfo->path);
+ }
+}
+
+gf_boolean_t
+glusterd_is_brick_started (glusterd_brickinfo_t *brickinfo)
+{
+ GF_ASSERT (brickinfo);
+ return (brickinfo->status == GF_BRICK_STARTED);
+}
+
+int
+glusterd_friend_brick_belongs (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo, void* uuid)
+{
+ int ret = -1;
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (brickinfo);
+ GF_ASSERT (uuid);
+
+ if (uuid_is_null (brickinfo->uuid)) {
+ ret = glusterd_resolve_brick (brickinfo);
+ if (ret) {
+ GF_ASSERT (0);
+ goto out;
+ }
+ }
+ if (!uuid_compare (brickinfo->uuid, *((uuid_t *)uuid)))
+ return 0;
+out:
+ return -1;
+}
+
+#ifdef GF_LINUX_HOST_OS
+int
+glusterd_get_brick_root (char *path, char **mount_point)
+{
+ char *ptr = NULL;
+ char *mnt_pt = NULL;
+ struct stat brickstat = {0};
+ struct stat buf = {0};
+
+ if (!path)
+ goto err;
+ mnt_pt = gf_strdup (path);
+ if (!mnt_pt)
+ goto err;
+ if (stat (mnt_pt, &brickstat))
+ goto err;
+
+ while ((ptr = strrchr (mnt_pt, '/')) &&
+ ptr != mnt_pt) {
+
+ *ptr = '\0';
+ if (stat (mnt_pt, &buf)) {
+ gf_log (THIS->name, GF_LOG_ERROR, "error in "
+ "stat: %s", strerror (errno));
+ goto err;
+ }
+
+ if (brickstat.st_dev != buf.st_dev) {
+ *ptr = '/';
+ break;
+ }
+ }
+
+ if (ptr == mnt_pt) {
+ if (stat ("/", &buf)) {
+ gf_log (THIS->name, GF_LOG_ERROR, "error in "
+ "stat: %s", strerror (errno));
+ goto err;
+ }
+ if (brickstat.st_dev == buf.st_dev)
+ strcpy (mnt_pt, "/");
+ }
+
+ *mount_point = mnt_pt;
+ return 0;
+
+ err:
+ GF_FREE (mnt_pt);
+ return -1;
+}
+
+static char*
+glusterd_parse_inode_size (char *stream, char *pattern)
+{
+ char *needle = NULL;
+ char *trail = NULL;
+
+ needle = strstr (stream, pattern);
+ if (!needle)
+ goto out;
+
+ needle = nwstrtail (needle, pattern);
+
+ trail = needle;
+ while (trail && isdigit (*trail)) trail++;
+ if (trail)
+ *trail = '\0';
+
+out:
+ return needle;
+}
+
+static int
+glusterd_add_inode_size_to_dict (dict_t *dict, int count)
+{
+ int ret = -1;
+ char key[1024] = {0};
+ char buffer[4096] = {0};
+ char *inode_size = NULL;
+ char *device = NULL;
+ char *fs_name = NULL;
+ char *cur_word = NULL;
+ char *pattern = NULL;
+ char *trail = NULL;
+ runner_t runner = {0, };
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.device", count);
+ ret = dict_get_str (dict, key, &device);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.fs_name", count);
+ ret = dict_get_str (dict, key, &fs_name);
+ if (ret)
+ goto out;
+
+ runinit (&runner);
+ runner_redir (&runner, STDOUT_FILENO, RUN_PIPE);
+ /* get inode size for xfs or ext2/3/4 */
+ if (!strcmp (fs_name, "xfs")) {
+
+ runner_add_args (&runner, "xfs_info", device, NULL);
+ pattern = "isize=";
+
+ } else if (IS_EXT_FS(fs_name)) {
+
+ runner_add_args (&runner, "tune2fs", "-l", device, NULL);
+ pattern = "Inode size:";
+
+ } else {
+ ret = 0;
+ gf_log (THIS->name, GF_LOG_INFO, "Skipped fetching "
+ "inode size for %s: FS type not recommended",
+ fs_name);
+ goto out;
+ }
+
+ ret = runner_start (&runner);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "could not get inode "
+ "size for %s : %s package missing", fs_name,
+ ((strcmp (fs_name, "xfs")) ?
+ "e2fsprogs" : "xfsprogs"));
+ goto out;
+ }
+
+ for (;;) {
+ if (fgets (buffer, sizeof (buffer),
+ runner_chio (&runner, STDOUT_FILENO)) == NULL)
+ break;
+ trail = strrchr (buffer, '\n');
+ if (trail)
+ *trail = '\0';
+
+ cur_word = glusterd_parse_inode_size (buffer, pattern);
+ if (cur_word)
+ break;
+ }
+
+ ret = runner_end (&runner);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "%s exited with non-zero "
+ "exit status", ((!strcmp (fs_name, "xfs")) ?
+ "xfs_info" : "tune2fs"));
+ goto out;
+ }
+ if (!cur_word) {
+ ret = -1;
+ gf_log (THIS->name, GF_LOG_ERROR, "Unable to retrieve inode "
+ "size using %s",
+ (!strcmp (fs_name, "xfs")? "xfs_info": "tune2fs"));
+ goto out;
+ }
+
+ inode_size = gf_strdup (cur_word);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.inode_size", count);
+
+ ret = dict_set_dynstr (dict, key, inode_size);
+
+ out:
+ if (ret)
+ gf_log (THIS->name, GF_LOG_ERROR, "failed to get inode size");
+ return ret;
+}
+
+struct mntent *
+glusterd_get_mnt_entry_info (char *mnt_pt, FILE *mtab)
+{
+ struct mntent *entry = NULL;
+
+ mtab = setmntent (_PATH_MOUNTED, "r");
+ if (!mtab)
+ goto out;
+
+ entry = getmntent (mtab);
+
+ while (1) {
+ if (!entry)
+ goto out;
+
+ if (!strcmp (entry->mnt_dir, mnt_pt) &&
+ strcmp (entry->mnt_type, "rootfs"))
+ break;
+ entry = getmntent (mtab);
+ }
+
+out:
+ return entry;
+}
+
+static int
+glusterd_add_brick_mount_details (glusterd_brickinfo_t *brickinfo,
+ dict_t *dict, int count)
+{
+ int ret = -1;
+ char key[1024] = {0};
+ char base_key[1024] = {0};
+ char *mnt_pt = NULL;
+ char *fs_name = NULL;
+ char *mnt_options = NULL;
+ char *device = NULL;
+ struct mntent *entry = NULL;
+ FILE *mtab = NULL;
+
+ snprintf (base_key, sizeof (base_key), "brick%d", count);
+
+ ret = glusterd_get_brick_root (brickinfo->path, &mnt_pt);
+ if (ret)
+ goto out;
+
+ entry = glusterd_get_mnt_entry_info (mnt_pt, mtab);
+ if (!entry) {
+ ret = -1;
+ goto out;
+ }
+
+ /* get device file */
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.device", base_key);
+
+ device = gf_strdup (entry->mnt_fsname);
+ ret = dict_set_dynstr (dict, key, device);
+ if (ret)
+ goto out;
+
+ /* fs type */
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.fs_name", base_key);
+
+ fs_name = gf_strdup (entry->mnt_type);
+ ret = dict_set_dynstr (dict, key, fs_name);
+ if (ret)
+ goto out;
+
+ /* mount options */
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.mnt_options", base_key);
+
+ mnt_options = gf_strdup (entry->mnt_opts);
+ ret = dict_set_dynstr (dict, key, mnt_options);
+
+ out:
+ GF_FREE (mnt_pt);
+ if (mtab)
+ endmntent (mtab);
+
+ return ret;
+}
+
+char*
+glusterd_get_brick_mount_details (glusterd_brickinfo_t *brickinfo)
+{
+ int ret = -1;
+ char *mnt_pt = NULL;
+ char *device = NULL;
+ FILE *mtab = NULL;
+ struct mntent *entry = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (brickinfo);
+
+ ret = glusterd_get_brick_root (brickinfo->path, &mnt_pt);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get mount point "
+ "for %s brick", brickinfo->path);
+ goto out;
+ }
+
+ entry = glusterd_get_mnt_entry_info (mnt_pt, mtab);
+ if (NULL == entry) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get mnt entry "
+ "for %s mount path", mnt_pt);
+ goto out;
+ }
+
+ /* get the fs_name/device */
+ device = gf_strdup (entry->mnt_fsname);
+
+out:
+ if (NULL != mtab) {
+ endmntent (mtab);
+ }
+
+ return device;
+}
+#endif
+
+int
+glusterd_add_brick_detail_to_dict (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo,
+ dict_t *dict, int count)
+{
+ int ret = -1;
+ uint64_t memtotal = 0;
+ uint64_t memfree = 0;
+ uint64_t inodes_total = 0;
+ uint64_t inodes_free = 0;
+ uint64_t block_size = 0;
+ char key[1024] = {0};
+ char base_key[1024] = {0};
+ struct statvfs brickstat = {0};
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (volinfo);
+ GF_ASSERT (brickinfo);
+ GF_ASSERT (dict);
+
+ snprintf (base_key, sizeof (base_key), "brick%d", count);
+
+ ret = statvfs (brickinfo->path, &brickstat);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "statfs error: %s ",
+ strerror (errno));
+ goto out;
+ }
+
+ /* file system block size */
+ block_size = brickstat.f_bsize;
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.block_size", base_key);
+ ret = dict_set_uint64 (dict, key, block_size);
+ if (ret)
+ goto out;
+
+ /* free space in brick */
+ memfree = brickstat.f_bfree * brickstat.f_bsize;
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.free", base_key);
+ ret = dict_set_uint64 (dict, key, memfree);
+ if (ret)
+ goto out;
+
+ /* total space of brick */
+ memtotal = brickstat.f_blocks * brickstat.f_bsize;
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.total", base_key);
+ ret = dict_set_uint64 (dict, key, memtotal);
+ if (ret)
+ goto out;
+
+ /* inodes: total and free counts only for ext2/3/4 and xfs */
+ inodes_total = brickstat.f_files;
+ if (inodes_total) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.total_inodes", base_key);
+ ret = dict_set_uint64 (dict, key, inodes_total);
+ if (ret)
+ goto out;
+ }
+
+ inodes_free = brickstat.f_ffree;
+ if (inodes_free) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.free_inodes", base_key);
+ ret = dict_set_uint64 (dict, key, inodes_free);
+ if (ret)
+ goto out;
+ }
+#ifdef GF_LINUX_HOST_OS
+ ret = glusterd_add_brick_mount_details (brickinfo, dict, count);
+ if (ret)
+ goto out;
+
+ ret = glusterd_add_inode_size_to_dict (dict, count);
+#endif
+ out:
+ if (ret)
+ gf_log (this->name, GF_LOG_DEBUG, "Error adding brick"
+ " detail to dict: %s", strerror (errno));
+ return ret;
+}
+
+int32_t
+glusterd_add_brick_to_dict (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo,
+ dict_t *dict, int32_t count)
+{
+
+ int ret = -1;
+ int32_t pid = -1;
+ int32_t brick_online = -1;
+ char key[1024] = {0};
+ char base_key[1024] = {0};
+ char pidfile[PATH_MAX] = {0};
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (brickinfo);
+ GF_ASSERT (dict);
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+
+ snprintf (base_key, sizeof (base_key), "brick%d", count);
+ snprintf (key, sizeof (key), "%s.hostname", base_key);
+
+ ret = dict_set_str (dict, key, brickinfo->hostname);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.path", base_key);
+ ret = dict_set_str (dict, key, brickinfo->path);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.port", base_key);
+ ret = dict_set_int32 (dict, key, brickinfo->port);
+ if (ret)
+ goto out;
+
+ GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, priv);
+
+ brick_online = glusterd_is_service_running (pidfile, &pid);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.pid", base_key);
+ ret = dict_set_int32 (dict, key, pid);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.status", base_key);
+ ret = dict_set_int32 (dict, key, brick_online);
+
+out:
+ if (ret)
+ gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
+
+ return ret;
+}
+
+int32_t
+glusterd_get_all_volnames (dict_t *dict)
+{
+ int ret = -1;
+ int32_t vol_count = 0;
+ char key[256] = {0};
+ glusterd_volinfo_t *entry = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ priv = THIS->private;
+ GF_ASSERT (priv);
+
+ list_for_each_entry (entry, &priv->volumes, vol_list) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "vol%d", vol_count);
+ ret = dict_set_str (dict, key, entry->volname);
+ if (ret)
+ goto out;
+
+ vol_count++;
+ }
+
+ ret = dict_set_int32 (dict, "vol_count", vol_count);
+
+ out:
+ if (ret)
+ gf_log (THIS->name, GF_LOG_ERROR, "failed to get all "
+ "volume names for status");
+ return ret;
+}
+
+int
+glusterd_all_volume_cond_check (glusterd_condition_func func, int status,
+ void *ctx)
+{
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ priv = this->private;
+
+ list_for_each_entry (volinfo, &priv->volumes, vol_list) {
+ list_for_each_entry (brickinfo, &volinfo->bricks,
+ brick_list) {
+ ret = func (volinfo, brickinfo, ctx);
+ if (ret != status) {
+ ret = -1;
+ goto out;
+ }
+ }
+ }
+ ret = 0;
+out:
+ gf_log ("", GF_LOG_DEBUG, "returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_friend_find_by_uuid (uuid_t uuid,
+ glusterd_peerinfo_t **peerinfo)
+{
+ int ret = -1;
+ glusterd_conf_t *priv = NULL;
+ glusterd_peerinfo_t *entry = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (peerinfo);
+
+ *peerinfo = NULL;
+ priv = this->private;
+
+ GF_ASSERT (priv);
+
+ if (uuid_is_null (uuid))
+ return -1;
+
+ list_for_each_entry (entry, &priv->peers, uuid_list) {
+ if (!uuid_compare (entry->uuid, uuid)) {
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Friend found... state: %s",
+ glusterd_friend_sm_state_name_get (entry->state.state));
+ *peerinfo = entry;
+ return 0;
+ }
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG, "Friend with uuid: %s, not found",
+ uuid_utoa (uuid));
+ return ret;
+}
+
+
+int
+glusterd_friend_find_by_hostname (const char *hoststr,
+ glusterd_peerinfo_t **peerinfo)
+{
+ int ret = -1;
+ glusterd_conf_t *priv = NULL;
+ glusterd_peerinfo_t *entry = NULL;
+ struct addrinfo *addr = NULL;
+ struct addrinfo *p = NULL;
+ char *host = NULL;
+ struct sockaddr_in6 *s6 = NULL;
+ struct sockaddr_in *s4 = NULL;
+ struct in_addr *in_addr = NULL;
+ char hname[1024] = {0,};
+ xlator_t *this = NULL;
+
+
+ this = THIS;
+ GF_ASSERT (hoststr);
+ GF_ASSERT (peerinfo);
+
+ *peerinfo = NULL;
+ priv = this->private;
+
+ GF_ASSERT (priv);
+
+ list_for_each_entry (entry, &priv->peers, uuid_list) {
+ if (!strncasecmp (entry->hostname, hoststr,
+ 1024)) {
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Friend %s found.. state: %d", hoststr,
+ entry->state.state);
+ *peerinfo = entry;
+ return 0;
+ }
+ }
+
+ ret = getaddrinfo (hoststr, NULL, NULL, &addr);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "error in getaddrinfo: %s\n",
+ gai_strerror(ret));
+ goto out;
+ }
+
+ for (p = addr; p != NULL; p = p->ai_next) {
+ switch (p->ai_family) {
+ case AF_INET:
+ s4 = (struct sockaddr_in *) p->ai_addr;
+ in_addr = &s4->sin_addr;
+ break;
+ case AF_INET6:
+ s6 = (struct sockaddr_in6 *) p->ai_addr;
+ in_addr =(struct in_addr *) &s6->sin6_addr;
+ break;
+ default: ret = -1;
+ goto out;
+ }
+ host = inet_ntoa(*in_addr);
+
+ ret = getnameinfo (p->ai_addr, p->ai_addrlen, hname,
+ 1024, NULL, 0, 0);
+ if (ret)
+ goto out;
+
+ list_for_each_entry (entry, &priv->peers, uuid_list) {
+ if (!strncasecmp (entry->hostname, host,
+ 1024) || !strncasecmp (entry->hostname,hname,
+ 1024)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Friend %s found.. state: %d",
+ hoststr, entry->state.state);
+ *peerinfo = entry;
+ freeaddrinfo (addr);
+ return 0;
+ }
+ }
+ }
+
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "Unable to find friend: %s", hoststr);
+ if (addr)
+ freeaddrinfo (addr);
+ return -1;
+}
+
+int
+glusterd_hostname_to_uuid (char *hostname, uuid_t uuid)
+{
+ GF_ASSERT (hostname);
+ GF_ASSERT (uuid);
+
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = glusterd_friend_find_by_hostname (hostname, &peerinfo);
+ if (ret) {
+ if (gf_is_local_addr (hostname)) {
+ uuid_copy (uuid, MY_UUID);
+ ret = 0;
+ } else {
+ ret = 0;
+ goto out;
+ }
+ } else {
+ uuid_copy (uuid, peerinfo->uuid);
+ }
+
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_brick_stop (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo,
+ gf_boolean_t del_brick)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ if ((!brickinfo) || (!volinfo))
+ goto out;
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ if (uuid_is_null (brickinfo->uuid)) {
+ ret = glusterd_resolve_brick (brickinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, FMTSTR_RESOLVE_BRICK,
+ brickinfo->hostname, brickinfo->path);
+ goto out;
+ }
+ }
+
+ if (uuid_compare (brickinfo->uuid, MY_UUID)) {
+ ret = 0;
+ if (del_brick)
+ glusterd_delete_brick (volinfo, brickinfo);
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG, "About to stop glusterfs"
+ " for brick %s:%s", brickinfo->hostname,
+ brickinfo->path);
+ ret = glusterd_volume_stop_glusterfs (volinfo, brickinfo, del_brick);
+ if (ret) {
+ gf_log (this->name, GF_LOG_CRITICAL, "Unable to stop"
+ " brick: %s:%s", brickinfo->hostname,
+ brickinfo->path);
+ goto out;
+ }
+
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "returning %d ", ret);
+ return ret;
+}
+
+int
+glusterd_is_defrag_on (glusterd_volinfo_t *volinfo)
+{
+ return (volinfo->rebal.defrag != NULL);
+}
+
+gf_boolean_t
+glusterd_is_rb_ongoing (glusterd_volinfo_t *volinfo)
+{
+ gf_boolean_t ret = _gf_false;
+
+ GF_ASSERT (volinfo);
+
+ if (glusterd_is_rb_started (volinfo) ||
+ glusterd_is_rb_paused (volinfo))
+ ret = _gf_true;
+
+ return ret;
+}
+
+int
+glusterd_new_brick_validate (char *brick, glusterd_brickinfo_t *brickinfo,
+ char *op_errstr, size_t len)
+{
+ glusterd_brickinfo_t *newbrickinfo = NULL;
+ int ret = -1;
+ gf_boolean_t is_allocated = _gf_false;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+
+ GF_ASSERT (brick);
+ GF_ASSERT (op_errstr);
+
+ if (!brickinfo) {
+ ret = glusterd_brickinfo_new_from_brick (brick, &newbrickinfo);
+ if (ret)
+ goto out;
+ is_allocated = _gf_true;
+ } else {
+ newbrickinfo = brickinfo;
+ }
+
+ ret = glusterd_resolve_brick (newbrickinfo);
+ if (ret) {
+ snprintf(op_errstr, len, "Host %s is not in \'Peer "
+ "in Cluster\' state", newbrickinfo->hostname);
+ goto out;
+ }
+
+ if (!uuid_compare (MY_UUID, newbrickinfo->uuid)) {
+ /* brick is local */
+ if (!glusterd_is_brickpath_available (newbrickinfo->uuid,
+ newbrickinfo->path)) {
+ snprintf(op_errstr, len, "Brick: %s not available."
+ " Brick may be containing or be contained "
+ "by an existing brick", brick);
+ ret = -1;
+ goto out;
+ }
+
+ } else {
+ ret = glusterd_friend_find_by_uuid (newbrickinfo->uuid,
+ &peerinfo);
+ if (ret) {
+ snprintf (op_errstr, len, "Failed to find host %s",
+ newbrickinfo->hostname);
+ goto out;
+ }
+
+ if ((!peerinfo->connected)) {
+ snprintf(op_errstr, len, "Host %s not connected",
+ newbrickinfo->hostname);
+ ret = -1;
+ goto out;
+ }
+
+ if (peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) {
+ snprintf(op_errstr, len, "Host %s is not in \'Peer "
+ "in Cluster\' state",
+ newbrickinfo->hostname);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ if (is_allocated)
+ glusterd_brickinfo_delete (newbrickinfo);
+ if (op_errstr[0] != '\0')
+ gf_log (this->name, GF_LOG_ERROR, "%s", op_errstr);
+ gf_log (this->name, GF_LOG_DEBUG, "returning %d ", ret);
+ return ret;
+}
+
+int
+glusterd_is_rb_started(glusterd_volinfo_t *volinfo)
+{
+ gf_log ("", GF_LOG_DEBUG,
+ "is_rb_started:status=%d", volinfo->rep_brick.rb_status);
+ return (volinfo->rep_brick.rb_status == GF_RB_STATUS_STARTED);
+
+}
+
+int
+glusterd_is_rb_paused ( glusterd_volinfo_t *volinfo)
+{
+ gf_log ("", GF_LOG_DEBUG,
+ "is_rb_paused:status=%d", volinfo->rep_brick.rb_status);
+
+ return (volinfo->rep_brick.rb_status == GF_RB_STATUS_PAUSED);
+}
+
+inline int
+glusterd_set_rb_status (glusterd_volinfo_t *volinfo, gf_rb_status_t status)
+{
+ gf_log ("", GF_LOG_DEBUG,
+ "setting status from %d to %d",
+ volinfo->rep_brick.rb_status,
+ status);
+
+ volinfo->rep_brick.rb_status = status;
+ return 0;
+}
+
+inline int
+glusterd_rb_check_bricks (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *src, glusterd_brickinfo_t *dst)
+{
+ glusterd_replace_brick_t *rb = NULL;
+
+ GF_ASSERT (volinfo);
+
+ rb = &volinfo->rep_brick;
+
+ if (!rb->src_brick || !rb->dst_brick)
+ return -1;
+
+ if (strcmp (rb->src_brick->hostname, src->hostname) ||
+ strcmp (rb->src_brick->path, src->path)) {
+ gf_log("", GF_LOG_ERROR, "Replace brick src bricks differ");
+ return -1;
+ }
+
+ if (strcmp (rb->dst_brick->hostname, dst->hostname) ||
+ strcmp (rb->dst_brick->path, dst->path)) {
+ gf_log ("", GF_LOG_ERROR, "Replace brick dst bricks differ");
+ return -1;
+ }
+
+ return 0;
+}
+
+/*path needs to be absolute; works only on gfid, volume-id*/
+static int
+glusterd_is_uuid_present (char *path, char *xattr, gf_boolean_t *present)
+{
+ GF_ASSERT (path);
+ GF_ASSERT (xattr);
+ GF_ASSERT (present);
+
+ int ret = -1;
+ uuid_t uid = {0,};
+
+ if (!path || !xattr || !present)
+ goto out;
+
+ ret = sys_lgetxattr (path, xattr, &uid, 16);
+
+ if (ret >= 0) {
+ *present = _gf_true;
+ ret = 0;
+ goto out;
+ }
+
+ switch (errno) {
+#if defined(ENODATA)
+ case ENODATA: /* FALLTHROUGH */
+#endif
+#if defined(ENOATTR) && (ENOATTR != ENODATA)
+ case ENOATTR: /* FALLTHROUGH */
+#endif
+ case ENOTSUP:
+ *present = _gf_false;
+ ret = 0;
+ break;
+ default:
+ break;
+ }
+out:
+ return ret;
+}
+
+/*path needs to be absolute*/
+static int
+glusterd_is_path_in_use (char *path, gf_boolean_t *in_use, char **op_errstr)
+{
+ int i = 0;
+ int ret = -1;
+ gf_boolean_t used = _gf_false;
+ char dir[PATH_MAX] = {0,};
+ char *curdir = NULL;
+ char msg[2048] = {0};
+ char *keys[3] = {GFID_XATTR_KEY,
+ GF_XATTR_VOL_ID_KEY,
+ NULL};
+
+ GF_ASSERT (path);
+ if (!path)
+ goto out;
+
+ strcpy (dir, path);
+ curdir = dir;
+ do {
+ for (i = 0; !used && keys[i]; i++) {
+ ret = glusterd_is_uuid_present (curdir, keys[i], &used);
+ if (ret)
+ goto out;
+ }
+
+ if (used)
+ break;
+
+ curdir = dirname (curdir);
+ if (!strcmp (curdir, "."))
+ goto out;
+
+
+ } while (strcmp (curdir, "/"));
+
+ if (!strcmp (curdir, "/")) {
+ for (i = 0; !used && keys[i]; i++) {
+ ret = glusterd_is_uuid_present (curdir, keys[i], &used);
+ if (ret)
+ goto out;
+ }
+ }
+
+ ret = 0;
+ *in_use = used;
+out:
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Failed to get extended "
+ "attribute %s, reason: %s", keys[i],
+ strerror (errno));
+ }
+
+ if (*in_use) {
+ if (!strcmp (path, curdir)) {
+ snprintf (msg, sizeof (msg), "%s is already part of a "
+ "volume", path);
+ } else {
+ snprintf (msg, sizeof (msg), "parent directory %s is "
+ "already part of a volume", curdir);
+ }
+ }
+
+ if (strlen (msg)) {
+ gf_log (THIS->name, GF_LOG_ERROR, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ }
+
+ return ret;
+}
+
+int
+glusterd_check_and_set_brick_xattr (char *host, char *path, uuid_t uuid,
+ char **op_errstr, gf_boolean_t is_force)
+{
+ int ret = -1;
+ char msg[2048] = {0,};
+ gf_boolean_t in_use = _gf_false;
+ int flags = 0;
+
+ /* Check for xattr support in backend fs */
+ ret = sys_lsetxattr (path, "trusted.glusterfs.test",
+ "working", 8, 0);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Glusterfs is not"
+ " supported on brick: %s:%s.\nSetting"
+ " extended attributes failed, reason:"
+ " %s.", host, path, strerror(errno));
+ goto out;
+
+ } else {
+ sys_lremovexattr (path, "trusted.glusterfs.test");
+ }
+
+ ret = glusterd_is_path_in_use (path, &in_use, op_errstr);
+ if (ret)
+ goto out;
+
+ if (in_use && !is_force) {
+ ret = -1;
+ goto out;
+ }
+
+
+ if (!is_force)
+ flags = XATTR_CREATE;
+
+ ret = sys_lsetxattr (path, GF_XATTR_VOL_ID_KEY, uuid, 16,
+ flags);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Failed to set extended "
+ "attributes %s, reason: %s",
+ GF_XATTR_VOL_ID_KEY, strerror (errno));
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (strlen (msg))
+ *op_errstr = gf_strdup (msg);
+
+ return ret;
+}
+
+int
+glusterd_sm_tr_log_transition_add_to_dict (dict_t *dict,
+ glusterd_sm_tr_log_t *log, int i,
+ int count)
+{
+ int ret = -1;
+ char key[512] = {0};
+ char timestr[64] = {0,};
+ char *str = NULL;
+
+ GF_ASSERT (dict);
+ GF_ASSERT (log);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "log%d-old-state", count);
+ str = log->state_name_get (log->transitions[i].old_state);
+ ret = dict_set_str (dict, key, str);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "log%d-event", count);
+ str = log->event_name_get (log->transitions[i].event);
+ ret = dict_set_str (dict, key, str);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "log%d-new-state", count);
+ str = log->state_name_get (log->transitions[i].new_state);
+ ret = dict_set_str (dict, key, str);
+ if (ret)
+ goto out;
+
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "log%d-time", count);
+ gf_time_fmt (timestr, sizeof timestr, log->transitions[i].time,
+ gf_timefmt_FT);
+ str = gf_strdup (timestr);
+ ret = dict_set_dynstr (dict, key, str);
+ if (ret)
+ goto out;
+
+out:
+ gf_log ("", GF_LOG_DEBUG, "returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_sm_tr_log_add_to_dict (dict_t *dict,
+ glusterd_sm_tr_log_t *circular_log)
+{
+ int ret = -1;
+ int i = 0;
+ int start = 0;
+ int end = 0;
+ int index = 0;
+ char key[256] = {0};
+ glusterd_sm_tr_log_t *log = NULL;
+ int count = 0;
+
+ GF_ASSERT (dict);
+ GF_ASSERT (circular_log);
+
+ log = circular_log;
+ if (!log->count)
+ return 0;
+
+ if (log->count == log->size)
+ start = log->current + 1;
+
+ end = start + log->count;
+ for (i = start; i < end; i++, count++) {
+ index = i % log->count;
+ ret = glusterd_sm_tr_log_transition_add_to_dict (dict, log, index,
+ count);
+ if (ret)
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "count");
+ ret = dict_set_int32 (dict, key, log->count);
+
+out:
+ gf_log ("", GF_LOG_DEBUG, "returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_sm_tr_log_init (glusterd_sm_tr_log_t *log,
+ char * (*state_name_get) (int),
+ char * (*event_name_get) (int),
+ size_t size)
+{
+ glusterd_sm_transition_t *transitions = NULL;
+ int ret = -1;
+
+ GF_ASSERT (size > 0);
+ GF_ASSERT (log && state_name_get && event_name_get);
+
+ if (!log || !state_name_get || !event_name_get || (size <= 0))
+ goto out;
+
+ transitions = GF_CALLOC (size, sizeof (*transitions),
+ gf_gld_mt_sm_tr_log_t);
+ if (!transitions)
+ goto out;
+
+ log->transitions = transitions;
+ log->size = size;
+ log->state_name_get = state_name_get;
+ log->event_name_get = event_name_get;
+ ret = 0;
+
+out:
+ gf_log ("", GF_LOG_DEBUG, "returning %d", ret);
+ return ret;
+}
+
+void
+glusterd_sm_tr_log_delete (glusterd_sm_tr_log_t *log)
+{
+ if (!log)
+ return;
+ GF_FREE (log->transitions);
+ return;
+}
+
+int
+glusterd_sm_tr_log_transition_add (glusterd_sm_tr_log_t *log,
+ int old_state, int new_state,
+ int event)
+{
+ glusterd_sm_transition_t *transitions = NULL;
+ int ret = -1;
+ int next = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (log);
+ if (!log)
+ goto out;
+
+ transitions = log->transitions;
+ if (!transitions)
+ goto out;
+
+ if (log->count)
+ next = (log->current + 1) % log->size;
+ else
+ next = 0;
+
+ transitions[next].old_state = old_state;
+ transitions[next].new_state = new_state;
+ transitions[next].event = event;
+ time (&transitions[next].time);
+ log->current = next;
+ if (log->count < log->size)
+ log->count++;
+ ret = 0;
+ gf_log (this->name, GF_LOG_DEBUG, "Transitioning from '%s' to '%s' "
+ "due to event '%s'", log->state_name_get (old_state),
+ log->state_name_get (new_state), log->event_name_get (event));
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_peerinfo_new (glusterd_peerinfo_t **peerinfo,
+ glusterd_friend_sm_state_t state, uuid_t *uuid,
+ const char *hostname, int port)
+{
+ glusterd_peerinfo_t *new_peer = NULL;
+ int ret = -1;
+
+ GF_ASSERT (peerinfo);
+ if (!peerinfo)
+ goto out;
+
+ new_peer = GF_CALLOC (1, sizeof (*new_peer), gf_gld_mt_peerinfo_t);
+ if (!new_peer)
+ goto out;
+
+ new_peer->state.state = state;
+ if (hostname)
+ new_peer->hostname = gf_strdup (hostname);
+
+ INIT_LIST_HEAD (&new_peer->uuid_list);
+
+ if (uuid) {
+ uuid_copy (new_peer->uuid, *uuid);
+ }
+
+ ret = glusterd_sm_tr_log_init (&new_peer->sm_log,
+ glusterd_friend_sm_state_name_get,
+ glusterd_friend_sm_event_name_get,
+ GLUSTERD_TR_LOG_SIZE);
+ if (ret)
+ goto out;
+
+ if (new_peer->state.state == GD_FRIEND_STATE_BEFRIENDED)
+ new_peer->quorum_contrib = QUORUM_WAITING;
+ new_peer->port = port;
+ *peerinfo = new_peer;
+out:
+ if (ret && new_peer)
+ glusterd_friend_cleanup (new_peer);
+ gf_log ("", GF_LOG_DEBUG, "returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_peer_destroy (glusterd_peerinfo_t *peerinfo)
+{
+ int32_t ret = -1;
+
+ if (!peerinfo)
+ goto out;
+
+ ret = glusterd_store_delete_peerinfo (peerinfo);
+
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Deleting peer info failed");
+ }
+
+ list_del_init (&peerinfo->uuid_list);
+ GF_FREE (peerinfo->hostname);
+ glusterd_sm_tr_log_delete (&peerinfo->sm_log);
+ GF_FREE (peerinfo);
+ peerinfo = NULL;
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int
+glusterd_remove_pending_entry (struct list_head *list, void *elem)
+{
+ glusterd_pending_node_t *pending_node = NULL;
+ glusterd_pending_node_t *tmp = NULL;
+ int ret = 0;
+
+ list_for_each_entry_safe (pending_node, tmp, list, list) {
+ if (elem == pending_node->node) {
+ list_del_init (&pending_node->list);
+ GF_FREE (pending_node);
+ ret = 0;
+ goto out;
+ }
+ }
+out:
+ gf_log (THIS->name, GF_LOG_DEBUG, "returning %d", ret);
+ return ret;
+
+}
+
+int
+glusterd_clear_pending_nodes (struct list_head *list)
+{
+ glusterd_pending_node_t *pending_node = NULL;
+ glusterd_pending_node_t *tmp = NULL;
+
+ list_for_each_entry_safe (pending_node, tmp, list, list) {
+ list_del_init (&pending_node->list);
+ GF_FREE (pending_node);
+ }
+
+ return 0;
+}
+
+gf_boolean_t
+glusterd_peerinfo_is_uuid_unknown (glusterd_peerinfo_t *peerinfo)
+{
+ GF_ASSERT (peerinfo);
+
+ if (uuid_is_null (peerinfo->uuid))
+ return _gf_true;
+ return _gf_false;
+}
+
+int32_t
+glusterd_delete_volume (glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ GF_ASSERT (volinfo);
+
+ ret = glusterd_store_delete_volume (volinfo);
+
+ if (ret)
+ goto out;
+
+ ret = glusterd_volinfo_delete (volinfo);
+out:
+ gf_log (THIS->name, GF_LOG_DEBUG, "returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_delete_brick (glusterd_volinfo_t* volinfo,
+ glusterd_brickinfo_t *brickinfo)
+{
+ int ret = 0;
+ char voldir[PATH_MAX] = {0,};
+ glusterd_conf_t *priv = THIS->private;
+ GF_ASSERT (volinfo);
+ GF_ASSERT (brickinfo);
+
+ GLUSTERD_GET_VOLUME_DIR(voldir, volinfo, priv);
+
+ glusterd_delete_volfile (volinfo, brickinfo);
+ glusterd_store_delete_brick (brickinfo, voldir);
+ glusterd_brickinfo_delete (brickinfo);
+ volinfo->brick_count--;
+ return ret;
+}
+
+int32_t
+glusterd_delete_all_bricks (glusterd_volinfo_t* volinfo)
+{
+ int ret = 0;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_brickinfo_t *tmp = NULL;
+
+ GF_ASSERT (volinfo);
+
+ list_for_each_entry_safe (brickinfo, tmp, &volinfo->bricks, brick_list) {
+ ret = glusterd_delete_brick (volinfo, brickinfo);
+ }
+ return ret;
+}
+
+int
+glusterd_get_local_brickpaths (glusterd_volinfo_t *volinfo, char **pathlist)
+{
+ char **path_tokens = NULL;
+ char *tmp_path_list = NULL;
+ char path[PATH_MAX] = "";
+ int32_t count = 0;
+ int32_t pathlen = 0;
+ int32_t total_len = 0;
+ int32_t ret = 0;
+ int i = 0;
+ glusterd_brickinfo_t *brickinfo = NULL;
+
+ if ((!volinfo) || (!pathlist))
+ goto out;
+
+ path_tokens = GF_CALLOC (sizeof(char*), volinfo->brick_count,
+ gf_gld_mt_charptr);
+ if (!path_tokens) {
+ gf_log ("", GF_LOG_DEBUG, "Could not allocate memory.");
+ ret = -1;
+ goto out;
+ }
+
+ list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ if (uuid_compare (brickinfo->uuid, MY_UUID))
+ continue;
+
+ pathlen = snprintf (path, sizeof(path),
+ "--path=%s ", brickinfo->path);
+ if (pathlen < sizeof(path))
+ path[pathlen] = '\0';
+ else
+ path[sizeof(path)-1] = '\0';
+ path_tokens[count] = gf_strdup (path);
+ if (!path_tokens[count]) {
+ gf_log ("", GF_LOG_DEBUG,
+ "Could not allocate memory.");
+ ret = -1;
+ goto out;
+ }
+ count++;
+ total_len += pathlen;
+ }
+
+ tmp_path_list = GF_CALLOC (sizeof(char), total_len + 1,
+ gf_gld_mt_char);
+ if (!tmp_path_list) {
+ gf_log ("", GF_LOG_DEBUG, "Could not allocate memory.");
+ ret = -1;
+ goto out;
+ }
+
+ for (i = 0; i < count; i++)
+ strcat (tmp_path_list, path_tokens[i]);
+
+ if (count)
+ *pathlist = tmp_path_list;
+
+ ret = count;
+out:
+ for (i = 0; i < count; i++) {
+ GF_FREE (path_tokens[i]);
+ path_tokens[i] = NULL;
+ }
+
+ GF_FREE (path_tokens);
+ path_tokens = NULL;
+
+ if (ret == 0) {
+ gf_log ("", GF_LOG_DEBUG, "No Local Bricks Present.");
+ GF_FREE (tmp_path_list);
+ tmp_path_list = NULL;
+ }
+
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_start_gsync (glusterd_volinfo_t *master_vol, char *slave,
+ char *path_list, char *conf_path,
+ char *glusterd_uuid_str,
+ char **op_errstr)
+{
+ int32_t ret = 0;
+ int32_t status = 0;
+ char uuid_str [64] = {0};
+ runner_t runner = {0,};
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ int errcode = 0;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ uuid_utoa_r (MY_UUID, uuid_str);
+
+ if (!path_list) {
+ ret = 0;
+ gf_log ("", GF_LOG_DEBUG, "No Bricks in this node."
+ " Not starting gsyncd.");
+ goto out;
+ }
+
+ ret = gsync_status (master_vol->volname, slave, conf_path, &status);
+ if (status == 0)
+ goto out;
+
+ uuid_utoa_r (master_vol->volume_id, uuid_str);
+ runinit (&runner);
+ runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd",
+ path_list, "-c", NULL);
+ runner_argprintf (&runner, "%s", conf_path);
+ runner_argprintf (&runner, ":%s", master_vol->volname);
+ runner_add_args (&runner, slave, "--config-set", "session-owner",
+ uuid_str, NULL);
+ synclock_unlock (&priv->big_lock);
+ ret = runner_run (&runner);
+ synclock_lock (&priv->big_lock);
+ if (ret == -1) {
+ errcode = -1;
+ goto out;
+ }
+
+ runinit (&runner);
+ runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd",
+ path_list, "--monitor", "-c", NULL);
+ runner_argprintf (&runner, "%s", conf_path);
+ runner_argprintf (&runner, ":%s", master_vol->volname);
+ runner_argprintf (&runner, "--glusterd-uuid=%s",
+ uuid_utoa (priv->uuid));
+ runner_add_arg (&runner, slave);
+ synclock_unlock (&priv->big_lock);
+ ret = runner_run (&runner);
+ synclock_lock (&priv->big_lock);
+ if (ret == -1) {
+ gf_asprintf (op_errstr, GEOREP" start failed for %s %s",
+ master_vol->volname, slave);
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ if ((ret != 0) && errcode == -1) {
+ if (op_errstr)
+ *op_errstr = gf_strdup ("internal error, cannot start "
+ "the " GEOREP " session");
+ }
+
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_recreate_volfiles (glusterd_conf_t *conf)
+{
+
+ glusterd_volinfo_t *volinfo = NULL;
+ int ret = 0;
+ int op_ret = 0;
+
+ GF_ASSERT (conf);
+ list_for_each_entry (volinfo, &conf->volumes, vol_list) {
+ ret = generate_brick_volfiles (volinfo);
+ if (ret) {
+ gf_log ("glusterd", GF_LOG_ERROR, "Failed to "
+ "regenerate brick volfiles for %s",
+ volinfo->volname);
+ op_ret = ret;
+ }
+ ret = generate_client_volfiles (volinfo, GF_CLIENT_TRUSTED);
+ if (ret) {
+ gf_log ("glusterd", GF_LOG_ERROR, "Failed to "
+ "regenerate trusted client volfiles for %s",
+ volinfo->volname);
+ op_ret = ret;
+ }
+ ret = generate_client_volfiles (volinfo, GF_CLIENT_OTHER);
+ if (ret) {
+ gf_log ("glusterd", GF_LOG_ERROR, "Failed to "
+ "regenerate client volfiles for %s",
+ volinfo->volname);
+ op_ret = ret;
+ }
+ }
+ return op_ret;
+}
+
+int32_t
+glusterd_handle_upgrade_downgrade (dict_t *options, glusterd_conf_t *conf)
+{
+ int ret = 0;
+ char *type = NULL;
+ gf_boolean_t upgrade = _gf_false;
+ gf_boolean_t downgrade = _gf_false;
+ gf_boolean_t regenerate_volfiles = _gf_false;
+ gf_boolean_t terminate = _gf_false;
+
+ ret = dict_get_str (options, "upgrade", &type);
+ if (!ret) {
+ ret = gf_string2boolean (type, &upgrade);
+ if (ret) {
+ gf_log ("glusterd", GF_LOG_ERROR, "upgrade option "
+ "%s is not a valid boolean type", type);
+ ret = -1;
+ goto out;
+ }
+ if (_gf_true == upgrade)
+ regenerate_volfiles = _gf_true;
+ }
+
+ ret = dict_get_str (options, "downgrade", &type);
+ if (!ret) {
+ ret = gf_string2boolean (type, &downgrade);
+ if (ret) {
+ gf_log ("glusterd", GF_LOG_ERROR, "downgrade option "
+ "%s is not a valid boolean type", type);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ if (upgrade && downgrade) {
+ gf_log ("glusterd", GF_LOG_ERROR, "Both upgrade and downgrade"
+ " options are set. Only one should be on");
+ ret = -1;
+ goto out;
+ }
+
+ if (!upgrade && !downgrade)
+ ret = 0;
+ else
+ terminate = _gf_true;
+ if (regenerate_volfiles) {
+ ret = glusterd_recreate_volfiles (conf);
+ }
+out:
+ if (terminate && (ret == 0))
+ kill (getpid(), SIGTERM);
+ return ret;
+}
+
+gf_boolean_t
+glusterd_is_volume_replicate (glusterd_volinfo_t *volinfo)
+{
+ gf_boolean_t replicates = _gf_false;
+ if (volinfo && ((volinfo->type == GF_CLUSTER_TYPE_REPLICATE) ||
+ (volinfo->type == GF_CLUSTER_TYPE_STRIPE_REPLICATE)))
+ replicates = _gf_true;
+ return replicates;
+}
+
+int
+glusterd_set_dump_options (char *dumpoptions_path, char *options,
+ int option_cnt)
+{
+ int ret = 0;
+ char *dup_options = NULL;
+ char *option = NULL;
+ char *tmpptr = NULL;
+ FILE *fp = NULL;
+ int nfs_cnt = 0;
+
+ if (0 == option_cnt ||
+ (option_cnt == 1 && (!strcmp (options, "nfs ")))) {
+ ret = 0;
+ goto out;
+ }
+
+ fp = fopen (dumpoptions_path, "w");
+ if (!fp) {
+ ret = -1;
+ goto out;
+ }
+ dup_options = gf_strdup (options);
+ gf_log ("", GF_LOG_INFO, "Received following statedump options: %s",
+ dup_options);
+ option = strtok_r (dup_options, " ", &tmpptr);
+ while (option) {
+ if (!strcmp (option, "nfs")) {
+ if (nfs_cnt > 0) {
+ unlink (dumpoptions_path);
+ ret = 0;
+ goto out;
+ }
+ nfs_cnt++;
+ option = strtok_r (NULL, " ", &tmpptr);
+ continue;
+ }
+ fprintf (fp, "%s=yes\n", option);
+ option = strtok_r (NULL, " ", &tmpptr);
+ }
+
+out:
+ if (fp)
+ fclose (fp);
+ GF_FREE (dup_options);
+ return ret;
+}
+
+int
+glusterd_brick_statedump (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo,
+ char *options, int option_cnt, char **op_errstr)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ char pidfile_path[PATH_MAX] = {0,};
+ char dumpoptions_path[PATH_MAX] = {0,};
+ FILE *pidfile = NULL;
+ pid_t pid = -1;
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ if (uuid_is_null (brickinfo->uuid)) {
+ ret = glusterd_resolve_brick (brickinfo);
+ if (ret) {
+ gf_log ("glusterd", GF_LOG_ERROR,
+ "Cannot resolve brick %s:%s",
+ brickinfo->hostname, brickinfo->path);
+ goto out;
+ }
+ }
+
+ if (uuid_compare (brickinfo->uuid, MY_UUID)) {
+ ret = 0;
+ goto out;
+ }
+
+ GLUSTERD_GET_BRICK_PIDFILE (pidfile_path, volinfo, brickinfo, conf);
+
+ pidfile = fopen (pidfile_path, "r");
+ if (!pidfile) {
+ gf_log ("", GF_LOG_ERROR, "Unable to open pidfile: %s",
+ pidfile_path);
+ ret = -1;
+ goto out;
+ }
+
+ ret = fscanf (pidfile, "%d", &pid);
+ if (ret <= 0) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get pid of brick process");
+ ret = -1;
+ goto out;
+ }
+
+ snprintf (dumpoptions_path, sizeof (dumpoptions_path),
+ DEFAULT_VAR_RUN_DIRECTORY"/glusterdump.%d.options", pid);
+ ret = glusterd_set_dump_options (dumpoptions_path, options, option_cnt);
+ if (ret < 0) {
+ gf_log ("", GF_LOG_ERROR, "error while parsing the statedump "
+ "options");
+ ret = -1;
+ goto out;
+ }
+
+ gf_log ("", GF_LOG_INFO, "Performing statedump on brick with pid %d",
+ pid);
+
+ kill (pid, SIGUSR1);
+
+ sleep (1);
+ ret = 0;
+out:
+ unlink (dumpoptions_path);
+ if (pidfile)
+ fclose (pidfile);
+ return ret;
+}
+
+int
+glusterd_nfs_statedump (char *options, int option_cnt, char **op_errstr)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ char pidfile_path[PATH_MAX] = {0,};
+ char path[PATH_MAX] = {0,};
+ FILE *pidfile = NULL;
+ pid_t pid = -1;
+ char dumpoptions_path[PATH_MAX] = {0,};
+ char *option = NULL;
+ char *tmpptr = NULL;
+ char *dup_options = NULL;
+ char msg[256] = {0,};
+
+ this = THIS;
+ GF_ASSERT (this);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ dup_options = gf_strdup (options);
+ option = strtok_r (dup_options, " ", &tmpptr);
+ if (strcmp (option, "nfs")) {
+ snprintf (msg, sizeof (msg), "for nfs statedump, options should"
+ " be after the key nfs");
+ *op_errstr = gf_strdup (msg);
+ ret = -1;
+ goto out;
+ }
+
+ GLUSTERD_GET_NFS_DIR (path, conf);
+ GLUSTERD_GET_NFS_PIDFILE (pidfile_path, path);
+
+ pidfile = fopen (pidfile_path, "r");
+ if (!pidfile) {
+ gf_log ("", GF_LOG_ERROR, "Unable to open pidfile: %s",
+ pidfile_path);
+ ret = -1;
+ goto out;
+ }
+
+ ret = fscanf (pidfile, "%d", &pid);
+ if (ret <= 0) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get pid of brick process");
+ ret = -1;
+ goto out;
+ }
+
+ snprintf (dumpoptions_path, sizeof (dumpoptions_path),
+ DEFAULT_VAR_RUN_DIRECTORY"/glusterdump.%d.options", pid);
+ ret = glusterd_set_dump_options (dumpoptions_path, options, option_cnt);
+ if (ret < 0) {
+ gf_log ("", GF_LOG_ERROR, "error while parsing the statedump "
+ "options");
+ ret = -1;
+ goto out;
+ }
+
+ gf_log ("", GF_LOG_INFO, "Performing statedump on nfs server with "
+ "pid %d", pid);
+
+ kill (pid, SIGUSR1);
+
+ sleep (1);
+
+ ret = 0;
+out:
+ if (pidfile)
+ fclose (pidfile);
+ unlink (dumpoptions_path);
+ GF_FREE (dup_options);
+ return ret;
+}
+
+/* Checks if the given peer contains all the bricks belonging to the
+ * given volume. Returns true if it does else returns false
+ */
+gf_boolean_t
+glusterd_friend_contains_vol_bricks (glusterd_volinfo_t *volinfo,
+ uuid_t friend_uuid)
+{
+ gf_boolean_t ret = _gf_true;
+ glusterd_brickinfo_t *brickinfo = NULL;
+
+ GF_ASSERT (volinfo);
+
+ list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ if (uuid_compare (friend_uuid, brickinfo->uuid)) {
+ ret = _gf_false;
+ break;
+ }
+ }
+ gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+/* Remove all volumes which completely belong to given friend
+ */
+int
+glusterd_friend_remove_cleanup_vols (uuid_t uuid)
+{
+ int ret = -1;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_volinfo_t *tmp_volinfo = NULL;
+
+ priv = THIS->private;
+ GF_ASSERT (priv);
+
+ list_for_each_entry_safe (volinfo, tmp_volinfo,
+ &priv->volumes, vol_list) {
+ if (glusterd_friend_contains_vol_bricks (volinfo, uuid)) {
+ gf_log (THIS->name, GF_LOG_INFO,
+ "Deleting stale volume %s", volinfo->volname);
+ ret = glusterd_delete_volume (volinfo);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Error deleting stale volume");
+ goto out;
+ }
+ }
+ }
+ ret = 0;
+out:
+ gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+/* Check if the all peers are connected and befriended, except the peer
+ * specified (the peer being detached)
+ */
+gf_boolean_t
+glusterd_chk_peers_connected_befriended (uuid_t skip_uuid)
+{
+ gf_boolean_t ret = _gf_true;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ priv= THIS->private;
+ GF_ASSERT (priv);
+
+ list_for_each_entry (peerinfo, &priv->peers, uuid_list) {
+
+ if (!uuid_is_null (skip_uuid) && !uuid_compare (skip_uuid,
+ peerinfo->uuid))
+ continue;
+
+ if ((GD_FRIEND_STATE_BEFRIENDED != peerinfo->state.state)
+ || !(peerinfo->connected)) {
+ ret = _gf_false;
+ break;
+ }
+ }
+ gf_log (THIS->name, GF_LOG_DEBUG, "Returning %s",
+ (ret?"TRUE":"FALSE"));
+ return ret;
+}
+
+void
+glusterd_get_client_filepath (char *filepath, glusterd_volinfo_t *volinfo,
+ gf_transport_type type)
+{
+ char path[PATH_MAX] = {0,};
+ glusterd_conf_t *priv = NULL;
+
+ priv = THIS->private;
+
+ GLUSTERD_GET_VOLUME_DIR (path, volinfo, priv);
+
+ if ((volinfo->transport_type == GF_TRANSPORT_BOTH_TCP_RDMA) &&
+ (type == GF_TRANSPORT_RDMA))
+ snprintf (filepath, PATH_MAX, "%s/%s.rdma-fuse.vol",
+ path, volinfo->volname);
+ else
+ snprintf (filepath, PATH_MAX, "%s/%s-fuse.vol",
+ path, volinfo->volname);
+}
+
+void
+glusterd_get_trusted_client_filepath (char *filepath,
+ glusterd_volinfo_t *volinfo,
+ gf_transport_type type)
+{
+ char path[PATH_MAX] = {0,};
+ glusterd_conf_t *priv = NULL;
+
+ priv = THIS->private;
+
+ GLUSTERD_GET_VOLUME_DIR (path, volinfo, priv);
+
+ if ((volinfo->transport_type == GF_TRANSPORT_BOTH_TCP_RDMA) &&
+ (type == GF_TRANSPORT_RDMA))
+ snprintf (filepath, PATH_MAX,
+ "%s/trusted-%s.rdma-fuse.vol",
+ path, volinfo->volname);
+ else
+ snprintf (filepath, PATH_MAX,
+ "%s/trusted-%s-fuse.vol",
+ path, volinfo->volname);
+}
+
+int
+glusterd_volume_defrag_restart (glusterd_volinfo_t *volinfo, char *op_errstr,
+ size_t len, int cmd, defrag_cbk_fn_t cbk)
+{
+ glusterd_conf_t *priv = NULL;
+ char pidfile[PATH_MAX];
+ int ret = -1;
+ pid_t pid;
+
+ priv = THIS->private;
+ if (!priv)
+ return ret;
+
+ GLUSTERD_GET_DEFRAG_PID_FILE(pidfile, volinfo, priv);
+
+ if (!glusterd_is_service_running (pidfile, &pid)) {
+ glusterd_handle_defrag_start (volinfo, op_errstr, len, cmd,
+ cbk, volinfo->rebal.op);
+ } else {
+ glusterd_rebalance_rpc_create (volinfo, priv, cmd);
+ }
+
+ return ret;
+}
+
+int
+glusterd_restart_rebalance (glusterd_conf_t *conf)
+{
+ glusterd_volinfo_t *volinfo = NULL;
+ int ret = 0;
+ char op_errstr[256];
+
+ list_for_each_entry (volinfo, &conf->volumes, vol_list) {
+ if (!volinfo->rebal.defrag_cmd)
+ continue;
+ glusterd_volume_defrag_restart (volinfo, op_errstr, 256,
+ volinfo->rebal.defrag_cmd, NULL);
+ }
+ return ret;
+}
+
+void
+glusterd_volinfo_reset_defrag_stats (glusterd_volinfo_t *volinfo)
+{
+ glusterd_rebalance_t *rebal = NULL;
+ GF_ASSERT (volinfo);
+
+ rebal = &volinfo->rebal;
+ rebal->rebalance_files = 0;
+ rebal->rebalance_data = 0;
+ rebal->lookedup_files = 0;
+ rebal->rebalance_failures = 0;
+ rebal->rebalance_time = 0;
+ rebal->skipped_files = 0;
+
+}
+
+/* Return hostname for given uuid if it exists
+ * else return NULL
+ */
+char *
+glusterd_uuid_to_hostname (uuid_t uuid)
+{
+ char *hostname = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_peerinfo_t *entry = NULL;
+
+ priv = THIS->private;
+ GF_ASSERT (priv);
+
+ if (!uuid_compare (MY_UUID, uuid)) {
+ hostname = gf_strdup ("localhost");
+ }
+ if (!list_empty (&priv->peers)) {
+ list_for_each_entry (entry, &priv->peers, uuid_list) {
+ if (!uuid_compare (entry->uuid, uuid)) {
+ hostname = gf_strdup (entry->hostname);
+ break;
+ }
+ }
+ }
+
+ return hostname;
+}
+
+gf_boolean_t
+glusterd_is_local_brick (xlator_t *this, glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo)
+{
+ gf_boolean_t local = _gf_false;
+ int ret = 0;
+ glusterd_conf_t *conf = NULL;
+
+ if (uuid_is_null (brickinfo->uuid)) {
+ ret = glusterd_resolve_brick (brickinfo);
+ if (ret)
+ goto out;
+ }
+ conf = this->private;
+ local = !uuid_compare (brickinfo->uuid, MY_UUID);
+out:
+ return local;
+}
+int
+glusterd_validate_volume_id (dict_t *op_dict, glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ char *volid_str = NULL;
+ uuid_t vol_uid = {0, };
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = dict_get_str (op_dict, "vol-id", &volid_str);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get volume id for "
+ "volume %s", volinfo->volname);
+ goto out;
+ }
+ ret = uuid_parse (volid_str, vol_uid);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to parse volume id "
+ "for volume %s", volinfo->volname);
+ goto out;
+ }
+
+ if (uuid_compare (vol_uid, volinfo->volume_id)) {
+ gf_log (this->name, GF_LOG_ERROR, "Volume ids of volume %s - %s"
+ " and %s - are different. Possibly a split brain among "
+ "peers.", volinfo->volname, volid_str,
+ uuid_utoa (volinfo->volume_id));
+ ret = -1;
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+int
+glusterd_defrag_volume_status_update (glusterd_volinfo_t *volinfo,
+ dict_t *rsp_dict)
+{
+ int ret = 0;
+ uint64_t files = 0;
+ uint64_t size = 0;
+ uint64_t lookup = 0;
+ gf_defrag_status_t status = GF_DEFRAG_STATUS_NOT_STARTED;
+ uint64_t failures = 0;
+ uint64_t skipped = 0;
+ xlator_t *this = NULL;
+ double run_time = 0;
+
+ this = THIS;
+
+ ret = dict_get_uint64 (rsp_dict, "files", &files);
+ if (ret)
+ gf_log (this->name, GF_LOG_TRACE,
+ "failed to get file count");
+
+ ret = dict_get_uint64 (rsp_dict, "size", &size);
+ if (ret)
+ gf_log (this->name, GF_LOG_TRACE,
+ "failed to get size of xfer");
+
+ ret = dict_get_uint64 (rsp_dict, "lookups", &lookup);
+ if (ret)
+ gf_log (this->name, GF_LOG_TRACE,
+ "failed to get lookedup file count");
+
+ ret = dict_get_int32 (rsp_dict, "status", (int32_t *)&status);
+ if (ret)
+ gf_log (this->name, GF_LOG_TRACE,
+ "failed to get status");
+
+ ret = dict_get_uint64 (rsp_dict, "failures", &failures);
+ if (ret)
+ gf_log (this->name, GF_LOG_TRACE,
+ "failed to get failure count");
+
+ ret = dict_get_uint64 (rsp_dict, "skipped", &skipped);
+ if (ret)
+ gf_log (this->name, GF_LOG_TRACE,
+ "failed to get skipped count");
+
+ ret = dict_get_double (rsp_dict, "run-time", &run_time);
+ if (ret)
+ gf_log (this->name, GF_LOG_TRACE,
+ "failed to get run-time");
+
+ if (files)
+ volinfo->rebal.rebalance_files = files;
+ if (size)
+ volinfo->rebal.rebalance_data = size;
+ if (lookup)
+ volinfo->rebal.lookedup_files = lookup;
+ if (status)
+ volinfo->rebal.defrag_status = status;
+ if (failures)
+ volinfo->rebal.rebalance_failures = failures;
+ if (skipped)
+ volinfo->rebal.skipped_files = skipped;
+ if (run_time)
+ volinfo->rebal.rebalance_time = run_time;
+
+ return ret;
+}
+
+int
+glusterd_check_topology_identical (const char *filename1,
+ const char *filename2,
+ gf_boolean_t *identical)
+{
+ int ret = -1; /* FAILURE */
+ xlator_t *this = NULL;
+ FILE *fp1 = NULL;
+ FILE *fp2 = NULL;
+ glusterfs_graph_t *grph1 = NULL;
+ glusterfs_graph_t *grph2 = NULL;
+
+ if ((!filename1) || (!filename2) || (!identical))
+ goto out;
+
+ this = THIS;
+
+ errno = 0; /* RESET the errno */
+
+ /* fopen() the volfile1 to create the graph */
+ fp1 = fopen (filename1, "r");
+ if (fp1 == NULL) {
+ gf_log (this->name, GF_LOG_ERROR, "fopen() on file: %s failed "
+ "(%s)", filename1, strerror (errno));
+ goto out;
+ }
+
+ /* fopen() the volfile2 to create the graph */
+ fp2 = fopen (filename2, "r");
+ if (fp2 == NULL) {
+ gf_log (this->name, GF_LOG_ERROR, "fopen() on file: %s failed "
+ "(%s)", filename2, strerror (errno));
+ goto out;
+ }
+
+ /* create the graph for filename1 */
+ grph1 = glusterfs_graph_construct(fp1);
+ if (grph1 == NULL)
+ goto out;
+
+ /* create the graph for filename2 */
+ grph2 = glusterfs_graph_construct(fp2);
+ if (grph2 == NULL)
+ goto out;
+
+ /* compare the graph topology */
+ *identical = is_graph_topology_equal(grph1, grph2);
+ ret = 0; /* SUCCESS */
+out:
+ if (fp1)
+ fclose(fp1);
+ if (fp2)
+ fclose(fp2);
+ if (grph1)
+ glusterfs_graph_destroy(grph1);
+ if (grph2)
+ glusterfs_graph_destroy(grph2);
+
+ gf_log (this->name, GF_LOG_DEBUG, "Returning with %d", ret);
+ return ret;
+}
+
+int
+glusterd_check_files_identical (char *filename1, char *filename2,
+ gf_boolean_t *identical)
+{
+ int ret = -1;
+ struct stat buf1 = {0,};
+ struct stat buf2 = {0,};
+ uint32_t cksum1 = 0;
+ uint32_t cksum2 = 0;
+ xlator_t *this = NULL;
+
+ GF_ASSERT (filename1);
+ GF_ASSERT (filename2);
+ GF_ASSERT (identical);
+
+ this = THIS;
+
+ ret = stat (filename1, &buf1);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "stat on file: %s failed "
+ "(%s)", filename1, strerror (errno));
+ goto out;
+ }
+
+ ret = stat (filename2, &buf2);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "stat on file: %s failed "
+ "(%s)", filename2, strerror (errno));
+ goto out;
+ }
+
+ if (buf1.st_size != buf2.st_size) {
+ *identical = _gf_false;
+ goto out;
+ }
+
+ ret = get_checksum_for_path (filename1, &cksum1);
+ if (ret)
+ goto out;
+
+
+ ret = get_checksum_for_path (filename2, &cksum2);
+ if (ret)
+ goto out;
+
+ if (cksum1 != cksum2)
+ *identical = _gf_false;
+ else
+ *identical = _gf_true;
+
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "Returning with %d", ret);
+ return ret;
+}
+
+int
+glusterd_volset_help (dict_t *dict, char **op_errstr)
+{
+ int ret = -1;
+ gf_boolean_t xml_out = _gf_false;
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ if (!dict) {
+ if (!(dict = glusterd_op_get_ctx ())) {
+ ret = 0;
+ goto out;
+ }
+ }
+
+ if (dict_get (dict, "help" )) {
+ xml_out = _gf_false;
+
+ } else if (dict_get (dict, "help-xml" )) {
+ xml_out = _gf_true;
+#if (HAVE_LIB_XML)
+ ret = 0;
+#else
+ gf_log (this->name, GF_LOG_ERROR,
+ "libxml not present in the system");
+ if (op_errstr)
+ *op_errstr = gf_strdup ("Error: xml libraries not "
+ "present to produce "
+ "xml-output");
+ goto out;
+#endif
+
+ } else {
+ goto out;
+ }
+
+ ret = glusterd_get_volopt_content (dict, xml_out);
+ if (ret && op_errstr)
+ *op_errstr = gf_strdup ("Failed to get volume options help");
+ out:
+
+ gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_to_cli (rpcsvc_request_t *req, gf_cli_rsp *arg, struct iovec *payload,
+ int payloadcount, struct iobref *iobref, xdrproc_t xdrproc,
+ dict_t *dict)
+{
+ int ret = -1;
+ char *cmd = NULL;
+ int op_ret = 0;
+ char *op_errstr = NULL;
+ int op_errno = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ op_ret = arg->op_ret;
+ op_errstr = arg->op_errstr;
+ op_errno = arg->op_errno;
+
+ ret = dict_get_str (dict, "cmd-str", &cmd);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get command "
+ "string");
+
+ if (cmd) {
+ if (op_ret)
+ gf_cmd_log ("", "%s : FAILED %s %s", cmd,
+ (op_errstr)? ":" : " ",
+ (op_errstr)? op_errstr : " ");
+ else
+ gf_cmd_log ("", "%s : SUCCESS", cmd);
+ }
+
+ glusterd_submit_reply (req, arg, payload, payloadcount, iobref,
+ (xdrproc_t) xdrproc);
+ if (dict)
+ dict_unref (dict);
+
+ return ret;
+}
+
+static int32_t
+glusterd_append_gsync_status (dict_t *dst, dict_t *src)
+{
+ int ret = 0;
+ char *stop_msg = NULL;
+
+ ret = dict_get_str (src, "gsync-status", &stop_msg);
+ if (ret) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = dict_set_dynstr (dst, "gsync-status", gf_strdup (stop_msg));
+ if (ret) {
+ gf_log ("glusterd", GF_LOG_WARNING, "Unable to set the stop"
+ "message in the ctx dictionary");
+ goto out;
+ }
+
+ ret = 0;
+ out:
+ gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+
+}
+
+static int32_t
+glusterd_append_status_dicts (dict_t *dst, dict_t *src)
+{
+ int dst_count = 0;
+ int src_count = 0;
+ int i = 0;
+ int ret = 0;
+ char mst[PATH_MAX] = {0,};
+ char slv[PATH_MAX] = {0, };
+ char sts[PATH_MAX] = {0, };
+ char nds[PATH_MAX] = {0, };
+ char *mst_val = NULL;
+ char *slv_val = NULL;
+ char *sts_val = NULL;
+ char *nds_val = NULL;
+
+ GF_ASSERT (dst);
+
+ if (src == NULL)
+ goto out;
+
+ ret = dict_get_int32 (dst, "gsync-count", &dst_count);
+ if (ret)
+ dst_count = 0;
+
+ ret = dict_get_int32 (src, "gsync-count", &src_count);
+ if (ret || !src_count) {
+ gf_log ("", GF_LOG_DEBUG, "Source brick empty");
+ ret = 0;
+ goto out;
+ }
+
+ for (i = 1; i <= src_count; i++) {
+ snprintf (nds, sizeof(nds), "node%d", i);
+ snprintf (mst, sizeof(mst), "master%d", i);
+ snprintf (slv, sizeof(slv), "slave%d", i);
+ snprintf (sts, sizeof(sts), "status%d", i);
+
+ ret = dict_get_str (src, nds, &nds_val);
+ if (ret)
+ goto out;
+
+ ret = dict_get_str (src, mst, &mst_val);
+ if (ret)
+ goto out;
+
+ ret = dict_get_str (src, slv, &slv_val);
+ if (ret)
+ goto out;
+
+ ret = dict_get_str (src, sts, &sts_val);
+ if (ret)
+ goto out;
+
+ snprintf (nds, sizeof(nds), "node%d", i+dst_count);
+ snprintf (mst, sizeof(mst), "master%d", i+dst_count);
+ snprintf (slv, sizeof(slv), "slave%d", i+dst_count);
+ snprintf (sts, sizeof(sts), "status%d", i+dst_count);
+
+ ret = dict_set_dynstr (dst, nds, gf_strdup (nds_val));
+ if (ret)
+ goto out;
+
+ ret = dict_set_dynstr (dst, mst, gf_strdup (mst_val));
+ if (ret)
+ goto out;
+
+ ret = dict_set_dynstr (dst, slv, gf_strdup (slv_val));
+ if (ret)
+ goto out;
+
+ ret = dict_set_dynstr (dst, sts, gf_strdup (sts_val));
+ if (ret)
+ goto out;
+
+ }
+
+ ret = dict_set_int32 (dst, "gsync-count", dst_count+src_count);
+
+ out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+
+}
+
+int32_t
+glusterd_gsync_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict, char *op_errstr)
+{
+ dict_t *ctx = NULL;
+ int ret = 0;
+ char *conf_path = NULL;
+
+ if (aggr) {
+ ctx = aggr;
+
+ } else {
+ ctx = glusterd_op_get_ctx ();
+ if (!ctx) {
+ gf_log ("", GF_LOG_ERROR,
+ "Operation Context is not present");
+ GF_ASSERT (0);
+ }
+ }
+
+ if (rsp_dict) {
+ ret = glusterd_append_status_dicts (ctx, rsp_dict);
+ if (ret)
+ goto out;
+
+ ret = glusterd_append_gsync_status (ctx, rsp_dict);
+ if (ret)
+ goto out;
+
+ ret = dict_get_str (rsp_dict, "conf_path", &conf_path);
+ if (!ret && conf_path) {
+ ret = dict_set_dynstr (ctx, "conf_path",
+ gf_strdup(conf_path));
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to store conf path.");
+ goto out;
+ }
+ }
+ }
+ if ((op_errstr) && (strcmp ("", op_errstr))) {
+ ret = dict_set_dynstr (ctx, "errstr", gf_strdup(op_errstr));
+ if (ret)
+ goto out;
+ }
+
+ ret = 0;
+ out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d ", ret);
+ return ret;
+}
+
+int32_t
+glusterd_rb_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict)
+{
+ int32_t src_port = 0;
+ int32_t dst_port = 0;
+ int ret = 0;
+ dict_t *ctx = NULL;
+
+
+ if (aggr) {
+ ctx = aggr;
+
+ } else {
+ ctx = glusterd_op_get_ctx ();
+ if (!ctx) {
+ gf_log ("", GF_LOG_ERROR,
+ "Operation Context is not present");
+ GF_ASSERT (0);
+ }
+ }
+
+ if (rsp_dict) {
+ ret = dict_get_int32 (rsp_dict, "src-brick-port", &src_port);
+ if (ret == 0) {
+ gf_log ("", GF_LOG_DEBUG,
+ "src-brick-port=%d found", src_port);
+ }
+
+ ret = dict_get_int32 (rsp_dict, "dst-brick-port", &dst_port);
+ if (ret == 0) {
+ gf_log ("", GF_LOG_DEBUG,
+ "dst-brick-port=%d found", dst_port);
+ }
+
+ }
+
+ if (src_port) {
+ ret = dict_set_int32 (ctx, "src-brick-port",
+ src_port);
+ if (ret) {
+ gf_log ("", GF_LOG_DEBUG,
+ "Could not set src-brick");
+ goto out;
+ }
+ }
+
+ if (dst_port) {
+ ret = dict_set_int32 (ctx, "dst-brick-port",
+ dst_port);
+ if (ret) {
+ gf_log ("", GF_LOG_DEBUG,
+ "Could not set dst-brick");
+ goto out;
+ }
+
+ }
+
+out:
+ return ret;
+
+}
+
+int32_t
+glusterd_sync_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict)
+{
+ int ret = 0;
+
+ GF_ASSERT (rsp_dict);
+
+ if (!rsp_dict) {
+ goto out;
+ }
+
+ ret = glusterd_import_friend_volumes (rsp_dict);
+out:
+ return ret;
+
+}
+
+static int
+_profile_volume_add_friend_rsp (dict_t *this, char *key, data_t *value,
+ void *data)
+{
+ char new_key[256] = {0};
+ glusterd_pr_brick_rsp_conv_t *rsp_ctx = NULL;
+ data_t *new_value = NULL;
+ int brick_count = 0;
+ char brick_key[256];
+
+ if (strcmp (key, "count") == 0)
+ return 0;
+ sscanf (key, "%d%s", &brick_count, brick_key);
+ rsp_ctx = data;
+ new_value = data_copy (value);
+ GF_ASSERT (new_value);
+ snprintf (new_key, sizeof (new_key), "%d%s",
+ rsp_ctx->count + brick_count, brick_key);
+ dict_set (rsp_ctx->dict, new_key, new_value);
+ return 0;
+}
+
+int
+glusterd_profile_volume_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict)
+{
+ int ret = 0;
+ glusterd_pr_brick_rsp_conv_t rsp_ctx = {0};
+ int32_t brick_count = 0;
+ int32_t count = 0;
+ dict_t *ctx_dict = NULL;
+ glusterd_op_t op = GD_OP_NONE;
+
+ GF_ASSERT (rsp_dict);
+
+ ret = dict_get_int32 (rsp_dict, "count", &brick_count);
+ if (ret) {
+ ret = 0; //no bricks in the rsp
+ goto out;
+ }
+
+ op = glusterd_op_get_op ();
+ GF_ASSERT (GD_OP_PROFILE_VOLUME == op);
+ if (aggr) {
+ ctx_dict = aggr;
+
+ } else {
+ ctx_dict = glusterd_op_get_ctx ();
+ }
+
+ ret = dict_get_int32 (ctx_dict, "count", &count);
+ rsp_ctx.count = count;
+ rsp_ctx.dict = ctx_dict;
+ dict_foreach (rsp_dict, _profile_volume_add_friend_rsp, &rsp_ctx);
+ dict_del (ctx_dict, "count");
+ ret = dict_set_int32 (ctx_dict, "count", count + brick_count);
+out:
+ return ret;
+}
+
+static int
+glusterd_volume_status_add_peer_rsp (dict_t *this, char *key, data_t *value,
+ void *data)
+{
+ glusterd_status_rsp_conv_t *rsp_ctx = NULL;
+ data_t *new_value = NULL;
+ char brick_key[1024] = {0,};
+ char new_key[1024] = {0,};
+ int32_t index = 0;
+ int32_t ret = 0;
+
+ /* Skip the following keys, they are already present in the ctx_dict */
+ if (!strcmp (key, "count") || !strcmp (key, "cmd") ||
+ !strcmp (key, "brick-index-max") || !strcmp (key, "other-count"))
+ return 0;
+
+ rsp_ctx = data;
+ new_value = data_copy (value);
+ GF_ASSERT (new_value);
+
+ sscanf (key, "brick%d.%s", &index, brick_key);
+
+ if (index > rsp_ctx->brick_index_max) {
+ snprintf (new_key, sizeof (new_key), "brick%d.%s",
+ index + rsp_ctx->other_count, brick_key);
+ } else {
+ strncpy (new_key, key, sizeof (new_key));
+ new_key[sizeof (new_key) - 1] = 0;
+ }
+
+ ret = dict_set (rsp_ctx->dict, new_key, new_value);
+ if (ret)
+ gf_log ("", GF_LOG_ERROR, "Unable to set key: %s in dict",
+ key);
+
+ return 0;
+}
+
+int
+glusterd_volume_status_copy_to_op_ctx_dict (dict_t *aggr, dict_t *rsp_dict)
+{
+ int ret = 0;
+ glusterd_status_rsp_conv_t rsp_ctx = {0};
+ int32_t cmd = GF_CLI_STATUS_NONE;
+ int32_t node_count = 0;
+ int32_t other_count = 0;
+ int32_t brick_index_max = -1;
+ int32_t rsp_node_count = 0;
+ int32_t rsp_other_count = 0;
+ int vol_count = -1;
+ int i = 0;
+ dict_t *ctx_dict = NULL;
+ char key[PATH_MAX] = {0,};
+ char *volname = NULL;
+
+ GF_ASSERT (rsp_dict);
+
+ if (aggr) {
+ ctx_dict = aggr;
+
+ } else {
+ ctx_dict = glusterd_op_get_ctx (GD_OP_STATUS_VOLUME);
+
+ }
+
+ ret = dict_get_int32 (ctx_dict, "cmd", &cmd);
+ if (ret)
+ goto out;
+
+ if (cmd & GF_CLI_STATUS_ALL && is_origin_glusterd (ctx_dict)) {
+ ret = dict_get_int32 (rsp_dict, "vol_count", &vol_count);
+ if (ret == 0) {
+ ret = dict_set_int32 (ctx_dict, "vol_count",
+ vol_count);
+ if (ret)
+ goto out;
+
+ for (i = 0; i < vol_count; i++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "vol%d", i);
+ ret = dict_get_str (rsp_dict, key, &volname);
+ if (ret)
+ goto out;
+
+ ret = dict_set_str (ctx_dict, key, volname);
+ if (ret)
+ goto out;
+ }
+ }
+ }
+
+ if ((cmd & GF_CLI_STATUS_TASKS) != 0) {
+ dict_copy (rsp_dict, aggr);
+ ret = 0;
+ goto out;
+ }
+
+ ret = dict_get_int32 (rsp_dict, "count", &rsp_node_count);
+ if (ret) {
+ ret = 0; //no bricks in the rsp
+ goto out;
+ }
+
+ ret = dict_get_int32 (rsp_dict, "other-count", &rsp_other_count);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Failed to get other count from rsp_dict");
+ goto out;
+ }
+
+ ret = dict_get_int32 (ctx_dict, "count", &node_count);
+ ret = dict_get_int32 (ctx_dict, "other-count", &other_count);
+ if (!dict_get (ctx_dict, "brick-index-max")) {
+ ret = dict_get_int32 (rsp_dict, "brick-index-max", &brick_index_max);
+ if (ret)
+ goto out;
+ ret = dict_set_int32 (ctx_dict, "brick-index-max", brick_index_max);
+ if (ret)
+ goto out;
+
+ } else {
+ ret = dict_get_int32 (ctx_dict, "brick-index-max", &brick_index_max);
+ }
+
+ rsp_ctx.count = node_count;
+ rsp_ctx.brick_index_max = brick_index_max;
+ rsp_ctx.other_count = other_count;
+ rsp_ctx.dict = ctx_dict;
+
+ dict_foreach (rsp_dict, glusterd_volume_status_add_peer_rsp, &rsp_ctx);
+
+ ret = dict_set_int32 (ctx_dict, "count", node_count + rsp_node_count);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Failed to update node count");
+ goto out;
+ }
+
+ ret = dict_set_int32 (ctx_dict, "other-count",
+ (other_count + rsp_other_count));
+ if (ret)
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Failed to update other-count");
+out:
+ return ret;
+}
+
+int
+glusterd_volume_rebalance_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict)
+{
+ char key[256] = {0,};
+ char *node_uuid = NULL;
+ char *node_uuid_str = NULL;
+ char *volname = NULL;
+ dict_t *ctx_dict = NULL;
+ double elapsed_time = 0;
+ glusterd_conf_t *conf = NULL;
+ glusterd_op_t op = GD_OP_NONE;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ int ret = 0;
+ int32_t index = 0;
+ int32_t count = 0;
+ int32_t current_index = 2;
+ int32_t value32 = 0;
+ uint64_t value = 0;
+ char *peer_uuid_str = NULL;
+
+ GF_ASSERT (rsp_dict);
+ conf = THIS->private;
+
+ op = glusterd_op_get_op ();
+ GF_ASSERT ((GD_OP_REBALANCE == op) ||
+ (GD_OP_DEFRAG_BRICK_VOLUME == op));
+
+ if (aggr) {
+ ctx_dict = aggr;
+
+ } else {
+ ctx_dict = glusterd_op_get_ctx (op);
+
+ }
+
+ if (!ctx_dict)
+ goto out;
+
+ ret = dict_get_str (ctx_dict, "volname", &volname);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+
+ if (ret)
+ goto out;
+
+ ret = dict_get_int32 (rsp_dict, "count", &index);
+ if (ret)
+ gf_log ("", GF_LOG_ERROR, "failed to get index");
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "node-uuid-%d", index);
+ ret = dict_get_str (rsp_dict, key, &node_uuid);
+ if (!ret) {
+ node_uuid_str = gf_strdup (node_uuid);
+
+ /* Finding the index of the node-uuid in the peer-list */
+ list_for_each_entry (peerinfo, &conf->peers, uuid_list) {
+ peer_uuid_str = gd_peer_uuid_str (peerinfo);
+ if (strcmp (peer_uuid_str, node_uuid_str) == 0)
+ break;
+
+ current_index++;
+ }
+
+ /* Setting the largest index value as the total count. */
+ ret = dict_get_int32 (ctx_dict, "count", &count);
+ if (count < current_index) {
+ ret = dict_set_int32 (ctx_dict, "count", current_index);
+ if (ret)
+ gf_log ("", GF_LOG_ERROR, "Failed to set count");
+ }
+
+ /* Setting the same index for the node, as is in the peerlist.*/
+ memset (key, 0, 256);
+ snprintf (key, 256, "node-uuid-%d", current_index);
+ ret = dict_set_dynstr (ctx_dict, key, node_uuid_str);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_DEBUG,
+ "failed to set node-uuid");
+ }
+ }
+
+ snprintf (key, 256, "files-%d", index);
+ ret = dict_get_uint64 (rsp_dict, key, &value);
+ if (!ret) {
+ memset (key, 0, 256);
+ snprintf (key, 256, "files-%d", current_index);
+ ret = dict_set_uint64 (ctx_dict, key, value);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_DEBUG,
+ "failed to set the file count");
+ }
+ }
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "size-%d", index);
+ ret = dict_get_uint64 (rsp_dict, key, &value);
+ if (!ret) {
+ memset (key, 0, 256);
+ snprintf (key, 256, "size-%d", current_index);
+ ret = dict_set_uint64 (ctx_dict, key, value);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_DEBUG,
+ "failed to set the size of migration");
+ }
+ }
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "lookups-%d", index);
+ ret = dict_get_uint64 (rsp_dict, key, &value);
+ if (!ret) {
+ memset (key, 0, 256);
+ snprintf (key, 256, "lookups-%d", current_index);
+ ret = dict_set_uint64 (ctx_dict, key, value);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_DEBUG,
+ "failed to set lookuped file count");
+ }
+ }
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "status-%d", index);
+ ret = dict_get_int32 (rsp_dict, key, &value32);
+ if (!ret) {
+ memset (key, 0, 256);
+ snprintf (key, 256, "status-%d", current_index);
+ ret = dict_set_int32 (ctx_dict, key, value32);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_DEBUG,
+ "failed to set status");
+ }
+ }
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "failures-%d", index);
+ ret = dict_get_uint64 (rsp_dict, key, &value);
+ if (!ret) {
+ memset (key, 0, 256);
+ snprintf (key, 256, "failures-%d", current_index);
+ ret = dict_set_uint64 (ctx_dict, key, value);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_DEBUG,
+ "failed to set failure count");
+ }
+ }
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "skipped-%d", index);
+ ret = dict_get_uint64 (rsp_dict, key, &value);
+ if (!ret) {
+ memset (key, 0, 256);
+ snprintf (key, 256, "skipped-%d", current_index);
+ ret = dict_set_uint64 (ctx_dict, key, value);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_DEBUG,
+ "failed to set skipped count");
+ }
+ }
+ memset (key, 0, 256);
+ snprintf (key, 256, "run-time-%d", index);
+ ret = dict_get_double (rsp_dict, key, &elapsed_time);
+ if (!ret) {
+ memset (key, 0, 256);
+ snprintf (key, 256, "run-time-%d", current_index);
+ ret = dict_set_double (ctx_dict, key, elapsed_time);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_DEBUG,
+ "failed to set run-time");
+ }
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int
+glusterd_snap_config_use_rsp_dict (dict_t *dst, dict_t *src)
+{
+ char buf[PATH_MAX] = "";
+ char *volname = NULL;
+ int ret = -1;
+ int config_command = 0;
+ uint64_t i = 0;
+ uint64_t value = 0;
+ uint64_t voldisplaycount = 0;
+
+ if (!dst || !src) {
+ gf_log ("", GF_LOG_ERROR, "Source or Destination "
+ "dict is empty.");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dst, "config-command", &config_command);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "failed to get config-command type");
+ goto out;
+ }
+
+ switch (config_command) {
+ case GF_SNAP_CONFIG_DISPLAY:
+ ret = dict_get_uint64 (src, "snap-max-hard-limit", &value);
+ if (!ret) {
+ ret = dict_set_uint64 (dst, "snap-max-hard-limit", value);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to set snap_max_hard_limit");
+ goto out;
+ }
+ } else {
+ /* Received dummy response from other nodes */
+ ret = 0;
+ goto out;
+ }
+
+ ret = dict_get_uint64 (src, "snap-max-soft-limit", &value);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to get snap_max_soft_limit");
+ goto out;
+ }
+
+ ret = dict_set_uint64 (dst, "snap-max-soft-limit", value);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to set snap_max_soft_limit");
+ goto out;
+ }
+
+ ret = dict_get_uint64 (src, "voldisplaycount",
+ &voldisplaycount);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to get voldisplaycount");
+ goto out;
+ }
+
+ ret = dict_set_uint64 (dst, "voldisplaycount",
+ voldisplaycount);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to set voldisplaycount");
+ goto out;
+ }
+
+ for (i = 0; i < voldisplaycount; i++) {
+ snprintf (buf, sizeof(buf), "volume%ld-volname", i);
+ ret = dict_get_str (src, buf, &volname);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to get %s", buf);
+ goto out;
+ }
+ ret = dict_set_str (dst, buf, volname);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to set %s", buf);
+ goto out;
+ }
+
+ snprintf (buf, sizeof(buf),
+ "volume%ld-snap-max-hard-limit", i);
+ ret = dict_get_uint64 (src, buf, &value);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to get %s", buf);
+ goto out;
+ }
+ ret = dict_set_uint64 (dst, buf, value);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to set %s", buf);
+ goto out;
+ }
+
+ snprintf (buf, sizeof(buf),
+ "volume%ld-active-hard-limit", i);
+ ret = dict_get_uint64 (src, buf, &value);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to get %s", buf);
+ goto out;
+ }
+ ret = dict_set_uint64 (dst, buf, value);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to set %s", buf);
+ goto out;
+ }
+
+ snprintf (buf, sizeof(buf),
+ "volume%ld-snap-max-soft-limit", i);
+ ret = dict_get_uint64 (src, buf, &value);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to get %s", buf);
+ goto out;
+ }
+ ret = dict_set_uint64 (dst, buf, value);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to set %s", buf);
+ goto out;
+ }
+ }
+
+ break;
+ default:
+ break;
+ }
+
+ ret = 0;
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+/* Aggregate missed_snap_counts from different nodes and save it *
+ * in the req_dict of the originator node */
+int
+glusterd_snap_create_use_rsp_dict (dict_t *dst, dict_t *src)
+{
+ char *buf = NULL;
+ char *tmp_str = NULL;
+ char name_buf[PATH_MAX] = "";
+ int32_t i = -1;
+ int32_t ret = -1;
+ int32_t src_missed_snap_count = -1;
+ int32_t dst_missed_snap_count = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ if (!dst || !src) {
+ gf_log (this->name, GF_LOG_ERROR, "Source or Destination "
+ "dict is empty.");
+ goto out;
+ }
+
+ ret = dict_get_int32 (src, "missed_snap_count",
+ &src_missed_snap_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG, "No missed snaps");
+ ret = 0;
+ goto out;
+ }
+
+ ret = dict_get_int32 (dst, "missed_snap_count",
+ &dst_missed_snap_count);
+ if (ret) {
+ /* Initialize dst_missed_count for the first time */
+ dst_missed_snap_count = 0;
+ }
+
+ for (i = 0; i < src_missed_snap_count; i++) {
+ snprintf (name_buf, sizeof(name_buf), "missed_snaps_%d",
+ i);
+ ret = dict_get_str (src, name_buf, &buf);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to fetch %s", name_buf);
+ goto out;
+ }
+
+ snprintf (name_buf, sizeof(name_buf), "missed_snaps_%d",
+ dst_missed_snap_count);
+
+ tmp_str = gf_strdup (buf);
+ if (!tmp_str) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr (dst, name_buf, tmp_str);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to set %s", name_buf);
+ goto out;
+ }
+
+ tmp_str = NULL;
+ dst_missed_snap_count++;
+ }
+
+ ret = dict_set_int32 (dst, "missed_snap_count", dst_missed_snap_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to set dst_missed_snap_count");
+ goto out;
+ }
+
+out:
+ if (ret && tmp_str)
+ GF_FREE(tmp_str);
+
+ gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_snap_use_rsp_dict (dict_t *dst, dict_t *src)
+{
+ int ret = -1;
+ int32_t snap_command = 0;
+
+ if (!dst || !src) {
+ gf_log ("", GF_LOG_ERROR, "Source or Destination "
+ "dict is empty.");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dst, "type", &snap_command);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "unable to get the type of "
+ "the snapshot command");
+ goto out;
+ }
+
+ switch (snap_command) {
+ case GF_SNAP_OPTION_TYPE_CREATE:
+ case GF_SNAP_OPTION_TYPE_DELETE:
+ ret = glusterd_snap_create_use_rsp_dict (dst, src);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to use rsp dict");
+ goto out;
+ }
+ break;
+ case GF_SNAP_OPTION_TYPE_CONFIG:
+ ret = glusterd_snap_config_use_rsp_dict (dst, src);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to use rsp dict");
+ goto out;
+ }
+ break;
+ default:
+ // copy the response dictinary's contents to the dict to be
+ // sent back to the cli
+ dict_copy (src, dst);
+ break;
+ }
+
+ ret = 0;
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_sys_exec_output_rsp_dict (dict_t *dst, dict_t *src)
+{
+ char output_name[PATH_MAX] = "";
+ char *output = NULL;
+ int ret = 0;
+ int i = 0;
+ int len = 0;
+ int src_output_count = 0;
+ int dst_output_count = 0;
+
+ if (!dst || !src) {
+ gf_log ("", GF_LOG_ERROR, "Source or Destination "
+ "dict is empty.");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dst, "output_count", &dst_output_count);
+
+ ret = dict_get_int32 (src, "output_count", &src_output_count);
+ if (ret) {
+ gf_log ("", GF_LOG_DEBUG, "No output from source");
+ ret = 0;
+ goto out;
+ }
+
+ for (i = 1; i <= src_output_count; i++) {
+ len = snprintf (output_name, sizeof(output_name) - 1,
+ "output_%d", i);
+ output_name[len] = '\0';
+ ret = dict_get_str (src, output_name, &output);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to fetch %s",
+ output_name);
+ goto out;
+ }
+
+ len = snprintf (output_name, sizeof(output_name) - 1,
+ "output_%d", i+dst_output_count);
+ output_name[len] = '\0';
+ ret = dict_set_dynstr (dst, output_name, gf_strdup (output));
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to set %s",
+ output_name);
+ goto out;
+ }
+ }
+
+ ret = dict_set_int32 (dst, "output_count",
+ dst_output_count+src_output_count);
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict)
+{
+ int ret = 0;
+ glusterd_op_t op = GD_OP_NONE;
+
+ op = glusterd_op_get_op ();
+ GF_ASSERT (aggr);
+ GF_ASSERT (rsp_dict);
+
+ if (!aggr)
+ goto out;
+ dict_copy (rsp_dict, aggr);
+out:
+ return ret;
+}
+
+int
+glusterd_volume_heal_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict)
+{
+ int ret = 0;
+ dict_t *ctx_dict = NULL;
+ glusterd_op_t op = GD_OP_NONE;
+
+ GF_ASSERT (rsp_dict);
+
+ op = glusterd_op_get_op ();
+ GF_ASSERT (GD_OP_HEAL_VOLUME == op);
+
+ if (aggr) {
+ ctx_dict = aggr;
+
+ } else {
+ ctx_dict = glusterd_op_get_ctx (op);
+ }
+
+ if (!ctx_dict)
+ goto out;
+ dict_copy (rsp_dict, ctx_dict);
+out:
+ return ret;
+}
+
+int
+_profile_volume_add_brick_rsp (dict_t *this, char *key, data_t *value,
+ void *data)
+{
+ char new_key[256] = {0};
+ glusterd_pr_brick_rsp_conv_t *rsp_ctx = NULL;
+ data_t *new_value = NULL;
+
+ rsp_ctx = data;
+ new_value = data_copy (value);
+ GF_ASSERT (new_value);
+ snprintf (new_key, sizeof (new_key), "%d-%s", rsp_ctx->count, key);
+ dict_set (rsp_ctx->dict, new_key, new_value);
+ return 0;
+}
+
+int
+glusterd_profile_volume_brick_rsp (void *pending_entry,
+ dict_t *rsp_dict, dict_t *op_ctx,
+ char **op_errstr, gd_node_type type)
+{
+ int ret = 0;
+ glusterd_pr_brick_rsp_conv_t rsp_ctx = {0};
+ int32_t count = 0;
+ char brick[PATH_MAX+1024] = {0};
+ char key[256] = {0};
+ char *full_brick = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ GF_ASSERT (rsp_dict);
+ GF_ASSERT (op_ctx);
+ GF_ASSERT (op_errstr);
+ GF_ASSERT (pending_entry);
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = dict_get_int32 (op_ctx, "count", &count);
+ if (ret) {
+ count = 1;
+ } else {
+ count++;
+ }
+ snprintf (key, sizeof (key), "%d-brick", count);
+ if (type == GD_NODE_BRICK) {
+ brickinfo = pending_entry;
+ snprintf (brick, sizeof (brick), "%s:%s", brickinfo->hostname,
+ brickinfo->path);
+ } else if (type == GD_NODE_NFS) {
+ snprintf (brick, sizeof (brick), "%s", uuid_utoa (MY_UUID));
+ }
+ full_brick = gf_strdup (brick);
+ GF_ASSERT (full_brick);
+ ret = dict_set_dynstr (op_ctx, key, full_brick);
+
+ rsp_ctx.count = count;
+ rsp_ctx.dict = op_ctx;
+ dict_foreach (rsp_dict, _profile_volume_add_brick_rsp, &rsp_ctx);
+ dict_del (op_ctx, "count");
+ ret = dict_set_int32 (op_ctx, "count", count);
+ return ret;
+}
+
+//input-key: <replica-id>:<child-id>-*
+//output-key: <brick-id>-*
+int
+_heal_volume_add_shd_rsp (dict_t *this, char *key, data_t *value, void *data)
+{
+ char new_key[256] = {0,};
+ char int_str[16] = {0};
+ data_t *new_value = NULL;
+ char *rxl_end = NULL;
+ char *rxl_child_end = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ int rxl_id = 0;
+ int rxl_child_id = 0;
+ int brick_id = 0;
+ int int_len = 0;
+ int ret = 0;
+ glusterd_heal_rsp_conv_t *rsp_ctx = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+
+ rsp_ctx = data;
+ rxl_end = strchr (key, '-');
+ if (!rxl_end)
+ goto out;
+
+ int_len = strlen (key) - strlen (rxl_end);
+ strncpy (int_str, key, int_len);
+ int_str[int_len] = '\0';
+ ret = gf_string2int (int_str, &rxl_id);
+ if (ret)
+ goto out;
+
+ rxl_child_end = strchr (rxl_end + 1, '-');
+ if (!rxl_child_end)
+ goto out;
+
+ int_len = strlen (rxl_end) - strlen (rxl_child_end) - 1;
+ strncpy (int_str, rxl_end + 1, int_len);
+ int_str[int_len] = '\0';
+ ret = gf_string2int (int_str, &rxl_child_id);
+ if (ret)
+ goto out;
+
+ volinfo = rsp_ctx->volinfo;
+ brick_id = rxl_id * volinfo->replica_count + rxl_child_id;
+
+ if (!strcmp (rxl_child_end, "-status")) {
+ brickinfo = glusterd_get_brickinfo_by_position (volinfo,
+ brick_id);
+ if (!brickinfo)
+ goto out;
+ if (!glusterd_is_local_brick (rsp_ctx->this, volinfo,
+ brickinfo))
+ goto out;
+ }
+ new_value = data_copy (value);
+ snprintf (new_key, sizeof (new_key), "%d%s", brick_id, rxl_child_end);
+ dict_set (rsp_ctx->dict, new_key, new_value);
+
+out:
+ return 0;
+}
+
+int
+_heal_volume_add_shd_rsp_of_statistics (dict_t *this, char *key, data_t
+ *value, void *data)
+{
+ char new_key[256] = {0,};
+ char int_str[16] = {0,};
+ char key_begin_string[128] = {0,};
+ data_t *new_value = NULL;
+ char *rxl_end = NULL;
+ char *rxl_child_end = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ char *key_begin_str = NULL;
+ int rxl_id = 0;
+ int rxl_child_id = 0;
+ int brick_id = 0;
+ int int_len = 0;
+ int ret = 0;
+ glusterd_heal_rsp_conv_t *rsp_ctx = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+
+ rsp_ctx = data;
+ key_begin_str = strchr (key, '-');
+ if (!key_begin_str)
+ goto out;
+
+ int_len = strlen (key) - strlen (key_begin_str);
+ strncpy (key_begin_string, key, int_len);
+ key_begin_string[int_len] = '\0';
+
+ rxl_end = strchr (key_begin_str + 1, '-');
+ if (!rxl_end)
+ goto out;
+
+ int_len = strlen (key_begin_str) - strlen (rxl_end) - 1;
+ strncpy (int_str, key_begin_str + 1, int_len);
+ int_str[int_len] = '\0';
+ ret = gf_string2int (int_str, &rxl_id);
+ if (ret)
+ goto out;
+
+
+ rxl_child_end = strchr (rxl_end + 1, '-');
+ if (!rxl_child_end)
+ goto out;
+
+ int_len = strlen (rxl_end) - strlen (rxl_child_end) - 1;
+ strncpy (int_str, rxl_end + 1, int_len);
+ int_str[int_len] = '\0';
+ ret = gf_string2int (int_str, &rxl_child_id);
+ if (ret)
+ goto out;
+
+ volinfo = rsp_ctx->volinfo;
+ brick_id = rxl_id * volinfo->replica_count + rxl_child_id;
+
+ brickinfo = glusterd_get_brickinfo_by_position (volinfo, brick_id);
+ if (!brickinfo)
+ goto out;
+ if (!glusterd_is_local_brick (rsp_ctx->this, volinfo, brickinfo))
+ goto out;
+
+ new_value = data_copy (value);
+ snprintf (new_key, sizeof (new_key), "%s-%d%s", key_begin_string,
+ brick_id, rxl_child_end);
+ dict_set (rsp_ctx->dict, new_key, new_value);
+
+out:
+ return 0;
+
+}
+
+int
+glusterd_heal_volume_brick_rsp (dict_t *req_dict, dict_t *rsp_dict,
+ dict_t *op_ctx, char **op_errstr)
+{
+ int ret = 0;
+ glusterd_heal_rsp_conv_t rsp_ctx = {0};
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ int heal_op = -1;
+
+ GF_ASSERT (rsp_dict);
+ GF_ASSERT (op_ctx);
+ GF_ASSERT (op_errstr);
+
+ ret = dict_get_str (req_dict, "volname", &volname);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get volume name");
+ goto out;
+ }
+
+ ret = dict_get_int32 (req_dict, "heal-op", &heal_op);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get heal_op");
+ goto out;
+ }
+
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+
+ if (ret)
+ goto out;
+
+ rsp_ctx.dict = op_ctx;
+ rsp_ctx.volinfo = volinfo;
+ rsp_ctx.this = THIS;
+ if (heal_op == GF_AFR_OP_STATISTICS)
+ dict_foreach (rsp_dict, _heal_volume_add_shd_rsp_of_statistics,
+ &rsp_ctx);
+ else
+ dict_foreach (rsp_dict, _heal_volume_add_shd_rsp, &rsp_ctx);
+
+
+out:
+ return ret;
+}
+
+int
+_status_volume_add_brick_rsp (dict_t *this, char *key, data_t *value,
+ void *data)
+{
+ char new_key[256] = {0,};
+ data_t *new_value = 0;
+ glusterd_pr_brick_rsp_conv_t *rsp_ctx = NULL;
+
+ rsp_ctx = data;
+ new_value = data_copy (value);
+ snprintf (new_key, sizeof (new_key), "brick%d.%s", rsp_ctx->count, key);
+ dict_set (rsp_ctx->dict, new_key, new_value);
+
+ return 0;
+}
+
+int
+glusterd_status_volume_brick_rsp (dict_t *rsp_dict, dict_t *op_ctx,
+ char **op_errstr)
+{
+ int ret = 0;
+ glusterd_pr_brick_rsp_conv_t rsp_ctx = {0};
+ int32_t count = 0;
+ int index = 0;
+
+ GF_ASSERT (rsp_dict);
+ GF_ASSERT (op_ctx);
+ GF_ASSERT (op_errstr);
+
+ ret = dict_get_int32 (op_ctx, "count", &count);
+ if (ret) {
+ count = 0;
+ } else {
+ count++;
+ }
+ ret = dict_get_int32 (rsp_dict, "index", &index);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Couldn't get node index");
+ goto out;
+ }
+ dict_del (rsp_dict, "index");
+
+ rsp_ctx.count = index;
+ rsp_ctx.dict = op_ctx;
+ dict_foreach (rsp_dict, _status_volume_add_brick_rsp, &rsp_ctx);
+ ret = dict_set_int32 (op_ctx, "count", count);
+
+out:
+ return ret;
+}
+
+int
+glusterd_defrag_volume_node_rsp (dict_t *req_dict, dict_t *rsp_dict,
+ dict_t *op_ctx)
+{
+ int ret = 0;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ char key[256] = {0,};
+ int32_t i = 0;
+ char buf[1024] = {0,};
+ char *node_str = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ priv = THIS->private;
+ GF_ASSERT (req_dict);
+
+ ret = dict_get_str (req_dict, "volname", &volname);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+
+ if (ret)
+ goto out;
+
+ if (rsp_dict) {
+ ret = glusterd_defrag_volume_status_update (volinfo,
+ rsp_dict);
+ }
+
+ if (!op_ctx) {
+ dict_copy (rsp_dict, op_ctx);
+ goto out;
+ }
+
+ ret = dict_get_int32 (op_ctx, "count", &i);
+ i++;
+
+ ret = dict_set_int32 (op_ctx, "count", i);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_ERROR, "Failed to set count");
+
+ snprintf (buf, 1024, "%s", uuid_utoa (MY_UUID));
+ node_str = gf_strdup (buf);
+
+ snprintf (key, 256, "node-uuid-%d",i);
+ ret = dict_set_dynstr (op_ctx, key, node_str);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "failed to set node-uuid");
+
+ memset (key, 0 , 256);
+ snprintf (key, 256, "files-%d", i);
+ ret = dict_set_uint64 (op_ctx, key, volinfo->rebal.rebalance_files);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "failed to set file count");
+
+ memset (key, 0 , 256);
+ snprintf (key, 256, "size-%d", i);
+ ret = dict_set_uint64 (op_ctx, key, volinfo->rebal.rebalance_data);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "failed to set size of xfer");
+
+ memset (key, 0 , 256);
+ snprintf (key, 256, "lookups-%d", i);
+ ret = dict_set_uint64 (op_ctx, key, volinfo->rebal.lookedup_files);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "failed to set lookedup file count");
+
+ memset (key, 0 , 256);
+ snprintf (key, 256, "status-%d", i);
+ ret = dict_set_int32 (op_ctx, key, volinfo->rebal.defrag_status);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "failed to set status");
+
+ memset (key, 0 , 256);
+ snprintf (key, 256, "failures-%d", i);
+ ret = dict_set_uint64 (op_ctx, key, volinfo->rebal.rebalance_failures);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "failed to set failure count");
+
+ memset (key, 0 , 256);
+ snprintf (key, 256, "skipped-%d", i);
+ ret = dict_set_uint64 (op_ctx, key, volinfo->rebal.skipped_files);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "failed to set skipped count");
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "run-time-%d", i);
+ ret = dict_set_double (op_ctx, key, volinfo->rebal.rebalance_time);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "failed to set run-time");
+
+out:
+ return ret;
+}
+int32_t
+glusterd_handle_node_rsp (dict_t *req_dict, void *pending_entry,
+ glusterd_op_t op, dict_t *rsp_dict, dict_t *op_ctx,
+ char **op_errstr, gd_node_type type)
+{
+ int ret = 0;
+
+ GF_ASSERT (op_errstr);
+
+ switch (op) {
+ case GD_OP_PROFILE_VOLUME:
+ ret = glusterd_profile_volume_brick_rsp (pending_entry,
+ rsp_dict, op_ctx,
+ op_errstr, type);
+ break;
+ case GD_OP_STATUS_VOLUME:
+ ret = glusterd_status_volume_brick_rsp (rsp_dict, op_ctx,
+ op_errstr);
+ break;
+
+ case GD_OP_DEFRAG_BRICK_VOLUME:
+ glusterd_defrag_volume_node_rsp (req_dict,
+ rsp_dict, op_ctx);
+ break;
+
+ case GD_OP_HEAL_VOLUME:
+ ret = glusterd_heal_volume_brick_rsp (req_dict, rsp_dict,
+ op_ctx, op_errstr);
+ break;
+ default:
+ break;
+ }
+
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+/* Should be used only when an operation is in progress, as that is the only
+ * time a lock_owner is set
+ */
+gf_boolean_t
+is_origin_glusterd (dict_t *dict)
+{
+ gf_boolean_t ret = _gf_false;
+ uuid_t lock_owner = {0,};
+ uuid_t *originator_uuid = NULL;
+
+ GF_ASSERT (dict);
+
+ ret = dict_get_bin (dict, "originator_uuid",
+ (void **) &originator_uuid);
+ if (ret) {
+ /* If not originator_uuid has been set, then the command
+ * has been originated from a glusterd running on older version
+ * Hence fetching the lock owner */
+ ret = glusterd_get_lock_owner (&lock_owner);
+ if (ret) {
+ ret = _gf_false;
+ goto out;
+ }
+ ret = !uuid_compare (MY_UUID, lock_owner);
+ } else
+ ret = !uuid_compare (MY_UUID, *originator_uuid);
+
+out:
+ return ret;
+}
+
+int
+glusterd_generate_and_set_task_id (dict_t *dict, char *key)
+{
+ int ret = -1;
+ uuid_t task_id = {0,};
+ char *uuid_str = NULL;
+ xlator_t *this = NULL;
+
+ GF_ASSERT (dict);
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ uuid_generate (task_id);
+ uuid_str = gf_strdup (uuid_utoa (task_id));
+ if (!uuid_str) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr (dict, key, uuid_str);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set %s in dict",
+ key);
+ goto out;
+ }
+ gf_log (this->name, GF_LOG_INFO, "Generated task-id %s for key %s",
+ uuid_str, key);
+
+out:
+ if (ret)
+ GF_FREE (uuid_str);
+ return ret;
+}
+
+int
+glusterd_copy_uuid_to_dict (uuid_t uuid, dict_t *dict, char *key)
+{
+ int ret = -1;
+ char tmp_str[40] = {0,};
+ char *task_id_str = NULL;
+
+ GF_ASSERT (dict);
+ GF_ASSERT (key);
+
+ uuid_unparse (uuid, tmp_str);
+ task_id_str = gf_strdup (tmp_str);
+ if (!task_id_str)
+ return -1;
+
+ ret = dict_set_dynstr (dict, key, task_id_str);
+ if (ret) {
+ GF_FREE (task_id_str);
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Error setting uuid in dict with key %s", key);
+ }
+
+ return 0;
+}
+
+int
+_update_volume_op_versions (dict_t *this, char *key, data_t *value, void *data)
+{
+ int op_version = 0;
+ glusterd_volinfo_t *ctx = NULL;
+ gf_boolean_t enabled = _gf_true;
+ int ret = -1;
+
+ GF_ASSERT (data);
+ ctx = data;
+
+ op_version = glusterd_get_op_version_for_key (key);
+
+ if (gd_is_xlator_option (key) || gd_is_boolean_option (key)) {
+ ret = gf_string2boolean (value->data, &enabled);
+ if (ret)
+ return 0;
+
+ if (!enabled)
+ return 0;
+ }
+
+ if (op_version > ctx->op_version)
+ ctx->op_version = op_version;
+
+ if (gd_is_client_option (key) &&
+ (op_version > ctx->client_op_version))
+ ctx->client_op_version = op_version;
+
+ return 0;
+}
+
+void
+gd_update_volume_op_versions (glusterd_volinfo_t *volinfo)
+{
+ glusterd_conf_t *conf = NULL;
+ gf_boolean_t ob_enabled = _gf_false;
+
+ GF_ASSERT (volinfo);
+
+ conf = THIS->private;
+ GF_ASSERT (conf);
+
+ /* Reset op-versions to minimum */
+ volinfo->op_version = 1;
+ volinfo->client_op_version = 1;
+
+ dict_foreach (volinfo->dict, _update_volume_op_versions, volinfo);
+
+ /* Special case for open-behind
+ * If cluster op-version >= 2 and open-behind hasn't been explicitly
+ * disabled, volume op-versions must be updated to account for it
+ */
+
+ /* TODO: Remove once we have a general way to update automatically
+ * enabled features
+ */
+ if (conf->op_version >= 2) {
+ ob_enabled = dict_get_str_boolean (volinfo->dict,
+ "performance.open-behind",
+ _gf_true);
+ if (ob_enabled) {
+
+ if (volinfo->op_version < 2)
+ volinfo->op_version = 2;
+ if (volinfo->client_op_version < 2)
+ volinfo->client_op_version = 2;
+ }
+ }
+
+ return;
+}
+
+/* A task is committed/completed once the task-id for it is cleared */
+gf_boolean_t
+gd_is_remove_brick_committed (glusterd_volinfo_t *volinfo)
+{
+ GF_ASSERT (volinfo);
+
+ if ((GD_OP_REMOVE_BRICK == volinfo->rebal.op) &&
+ !uuid_is_null (volinfo->rebal.rebalance_id))
+ return _gf_false;
+
+ return _gf_true;
+}
+
+gf_boolean_t
+glusterd_are_vol_all_peers_up (glusterd_volinfo_t *volinfo,
+ struct list_head *peers,
+ char **down_peerstr)
+{
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ gf_boolean_t ret = _gf_false;
+
+ list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ if (!uuid_compare (brickinfo->uuid, MY_UUID))
+ continue;
+
+ list_for_each_entry (peerinfo, peers, uuid_list) {
+ if (uuid_compare (peerinfo->uuid, brickinfo->uuid))
+ continue;
+
+ /*Found peer who owns the brick, return false
+ * if peer is not connected or not friend */
+ if (!(peerinfo->connected) ||
+ (peerinfo->state.state !=
+ GD_FRIEND_STATE_BEFRIENDED)) {
+ *down_peerstr = gf_strdup (peerinfo->hostname);
+ gf_log ("", GF_LOG_DEBUG, "Peer %s is down. ",
+ peerinfo->hostname);
+ goto out;
+ }
+ }
+ }
+
+ ret = _gf_true;
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+gf_boolean_t
+glusterd_is_status_tasks_op (glusterd_op_t op, dict_t *dict)
+{
+ int ret = -1;
+ uint32_t cmd = GF_CLI_STATUS_NONE;
+ gf_boolean_t is_status_tasks = _gf_false;
+
+ if (op != GD_OP_STATUS_VOLUME)
+ goto out;
+
+ ret = dict_get_uint32 (dict, "cmd", &cmd);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Failed to get opcode");
+ goto out;
+ }
+
+ if (cmd & GF_CLI_STATUS_TASKS)
+ is_status_tasks = _gf_true;
+
+out:
+ return is_status_tasks;
+}
+
+int
+glusterd_compare_snap_time(struct list_head *list1, struct list_head *list2)
+{
+ glusterd_snap_t *snap1 = NULL;
+ glusterd_snap_t *snap2 = NULL;
+ double diff_time = 0;
+
+ GF_ASSERT (list1);
+ GF_ASSERT (list2);
+
+ snap1 = list_entry(list1, glusterd_snap_t, snap_list);
+ snap2 = list_entry(list2, glusterd_snap_t, snap_list);
+ diff_time = difftime(snap1->time_stamp, snap2->time_stamp);
+
+ return ((int)diff_time);
+}
+
+int
+glusterd_compare_snap_vol_time(struct list_head *list1, struct list_head *list2)
+{
+ glusterd_volinfo_t *snapvol1 = NULL;
+ glusterd_volinfo_t *snapvol2 = NULL;
+ double diff_time = 0;
+
+ GF_ASSERT (list1);
+ GF_ASSERT (list2);
+
+ snapvol1 = list_entry(list1, glusterd_volinfo_t, snapvol_list);
+ snapvol2 = list_entry(list2, glusterd_volinfo_t, snapvol_list);
+ diff_time = difftime(snapvol1->snapshot->time_stamp,
+ snapvol2->snapshot->time_stamp);
+
+ return ((int)diff_time);
+}
+
+int32_t
+glusterd_missed_snapinfo_new (glusterd_missed_snap_info **missed_snapinfo)
+{
+ glusterd_missed_snap_info *new_missed_snapinfo = NULL;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (missed_snapinfo);
+
+ new_missed_snapinfo = GF_CALLOC (1, sizeof(*new_missed_snapinfo),
+ gf_gld_mt_missed_snapinfo_t);
+
+ if (!new_missed_snapinfo)
+ goto out;
+
+ new_missed_snapinfo->node_snap_info = NULL;
+ INIT_LIST_HEAD (&new_missed_snapinfo->missed_snaps);
+ INIT_LIST_HEAD (&new_missed_snapinfo->snap_ops);
+
+ *missed_snapinfo = new_missed_snapinfo;
+
+ ret = 0;
+
+out:
+ gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_missed_snap_op_new (glusterd_snap_op_t **snap_op)
+{
+ glusterd_snap_op_t *new_snap_op = NULL;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (snap_op);
+
+ new_snap_op = GF_CALLOC (1, sizeof(*new_snap_op),
+ gf_gld_mt_missed_snapinfo_t);
+
+ if (!new_snap_op)
+ goto out;
+
+ new_snap_op->brick_path = NULL;
+ new_snap_op->brick_num = -1;
+ new_snap_op->op = -1;
+ new_snap_op->status = -1;
+ INIT_LIST_HEAD (&new_snap_op->snap_ops_list);
+
+ *snap_op = new_snap_op;
+
+ ret = 0;
+out:
+ gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
new file mode 100644
index 000000000..56bb799bf
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
@@ -0,0 +1,634 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _GLUSTERD_UTILS_H
+#define _GLUSTERD_UTILS_H_
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <pthread.h>
+#include "uuid.h"
+
+#include "glusterfs.h"
+#include "xlator.h"
+#include "logging.h"
+#include "call-stub.h"
+#include "fd.h"
+#include "byte-order.h"
+#include "glusterd.h"
+#include "rpc-clnt.h"
+#include "protocol-common.h"
+
+#define GLUSTERD_SOCK_DIR "/var/run"
+
+struct glusterd_lock_ {
+ uuid_t owner;
+ time_t timestamp;
+};
+
+typedef struct glusterd_dict_ctx_ {
+ dict_t *dict;
+ int opt_count;
+ char *key_name;
+ char *val_name;
+ char *prefix;
+} glusterd_dict_ctx_t;
+
+int
+glusterd_compare_lines (const void *a, const void *b);
+
+typedef int (*glusterd_condition_func) (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo,
+ void *ctx);
+typedef struct glusterd_lock_ glusterd_lock_t;
+
+int32_t
+glusterd_get_lock_owner (uuid_t *cur_owner);
+
+int32_t
+glusterd_lock (uuid_t new_owner);
+
+int32_t
+glusterd_unlock (uuid_t owner);
+
+int32_t
+glusterd_get_uuid (uuid_t *uuid);
+
+int
+glusterd_submit_reply (rpcsvc_request_t *req, void *arg,
+ struct iovec *payload, int payloadcount,
+ struct iobref *iobref, xdrproc_t xdrproc);
+
+int
+glusterd_to_cli (rpcsvc_request_t *req, gf_cli_rsp *arg, struct iovec *payload,
+ int payloadcount, struct iobref *iobref, xdrproc_t xdrproc,
+ dict_t *dict);
+
+int
+glusterd_submit_request (struct rpc_clnt *rpc, void *req,
+ call_frame_t *frame, rpc_clnt_prog_t *prog,
+ int procnum, struct iobref *iobref,
+ xlator_t *this, fop_cbk_fn_t cbkfn, xdrproc_t xdrproc);
+int32_t
+glusterd_volinfo_new (glusterd_volinfo_t **volinfo);
+
+int32_t
+glusterd_volinfo_dup (glusterd_volinfo_t *volinfo,
+ glusterd_volinfo_t **dup_volinfo,
+ gf_boolean_t set_userauth);
+
+char *
+glusterd_auth_get_username (glusterd_volinfo_t *volinfo);
+
+char *
+glusterd_auth_get_password (glusterd_volinfo_t *volinfo);
+
+int32_t
+glusterd_auth_set_username (glusterd_volinfo_t *volinfo, char *username);
+
+int32_t
+glusterd_auth_set_password (glusterd_volinfo_t *volinfo, char *password);
+
+void
+glusterd_auth_cleanup (glusterd_volinfo_t *volinfo);
+
+gf_boolean_t
+glusterd_check_volume_exists (char *volname);
+
+int32_t
+glusterd_brickinfo_new (glusterd_brickinfo_t **brickinfo);
+
+int32_t
+glusterd_brickinfo_new_from_brick (char *brick, glusterd_brickinfo_t **brickinfo);
+
+int32_t
+glusterd_friend_cleanup (glusterd_peerinfo_t *peerinfo);
+
+int32_t
+glusterd_peer_destroy (glusterd_peerinfo_t *peerinfo);
+
+int32_t
+glusterd_peer_hostname_new (char *hostname, glusterd_peer_hostname_t **name);
+
+int32_t
+glusterd_snap_volinfo_find (char *volname, glusterd_snap_t *snap,
+ glusterd_volinfo_t **volinfo);
+int32_t
+glusterd_snap_volinfo_find_from_parent_volname (char *origin_volname,
+ glusterd_snap_t *snap,
+ glusterd_volinfo_t **volinfo);
+
+int32_t
+glusterd_volinfo_find (char *volname, glusterd_volinfo_t **volinfo);
+
+int
+glusterd_volinfo_find_by_volume_id (uuid_t volume_id, glusterd_volinfo_t **volinfo);
+
+int
+glusterd_snap_volinfo_find_by_volume_id (uuid_t volume_id,
+ glusterd_volinfo_t **volinfo);
+
+int32_t
+glusterd_service_stop(const char *service, char *pidfile, int sig,
+ gf_boolean_t force_kill);
+
+int32_t
+glusterd_resolve_brick (glusterd_brickinfo_t *brickinfo);
+
+int32_t
+glusterd_volume_start_glusterfs (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo,
+ gf_boolean_t wait);
+
+int32_t
+glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo,
+ gf_boolean_t del_brick);
+
+int32_t
+glusterd_volinfo_delete (glusterd_volinfo_t *volinfo);
+
+int32_t
+glusterd_brickinfo_delete (glusterd_brickinfo_t *brickinfo);
+
+gf_boolean_t
+glusterd_is_cli_op_req (int32_t op);
+
+int32_t
+glusterd_volume_brickinfo_get_by_brick (char *brick,
+ glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t **brickinfo);
+
+int32_t
+glusterd_build_volume_dict (dict_t **vols);
+
+int32_t
+glusterd_compare_friend_data (dict_t *vols, int32_t *status, char *hostname);
+
+int
+glusterd_volume_compute_cksum (glusterd_volinfo_t *volinfo);
+
+void
+glusterd_get_nodesvc_volfile (char *server, char *workdir,
+ char *volfile, size_t len);
+
+gf_boolean_t
+glusterd_is_service_running (char *pidfile, int *pid);
+
+gf_boolean_t
+glusterd_is_nodesvc_running ();
+
+gf_boolean_t
+glusterd_is_nodesvc_running ();
+
+void
+glusterd_get_nodesvc_dir (char *server, char *workdir,
+ char *path, size_t len);
+int32_t
+glusterd_nfs_server_start ();
+
+int32_t
+glusterd_nfs_server_stop ();
+
+int32_t
+glusterd_shd_start ();
+
+int32_t
+glusterd_shd_stop ();
+
+void
+glusterd_set_socket_filepath (char *sock_filepath, char *sockpath, size_t len);
+
+int32_t
+glusterd_nodesvc_set_socket_filepath (char *rundir, uuid_t uuid,
+ char *socketpath, int len);
+
+struct rpc_clnt*
+glusterd_pending_node_get_rpc (glusterd_pending_node_t *pending_node);
+
+struct rpc_clnt*
+glusterd_nodesvc_get_rpc (char *server);
+
+int32_t
+glusterd_nodesvc_set_rpc (char *server, struct rpc_clnt *rpc);
+
+int32_t
+glusterd_nodesvc_connect (char *server, char *socketpath);
+
+void
+glusterd_nodesvc_set_online_status (char *server, gf_boolean_t status);
+
+gf_boolean_t
+glusterd_is_nodesvc_online (char *server);
+
+int
+glusterd_remote_hostname_get (rpcsvc_request_t *req,
+ char *remote_host, int len);
+int32_t
+glusterd_import_friend_volumes (dict_t *vols);
+void
+glusterd_set_volume_status (glusterd_volinfo_t *volinfo,
+ glusterd_volume_status status);
+int
+glusterd_check_generate_start_nfs (void);
+
+int
+glusterd_check_generate_start_shd (void);
+
+int
+glusterd_nodesvcs_handle_graph_change (glusterd_volinfo_t *volinfo);
+
+int
+glusterd_nodesvcs_handle_reconfigure (glusterd_volinfo_t *volinfo);
+
+int
+glusterd_nodesvcs_start (glusterd_volinfo_t *volinfo);
+
+int
+glusterd_nodesvcs_stop (glusterd_volinfo_t *volinfo);
+
+int32_t
+glusterd_volume_count_get (void);
+int32_t
+glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo,
+ dict_t *dict, int32_t count);
+int
+glusterd_get_brickinfo (xlator_t *this, const char *brickname,
+ int port, gf_boolean_t localhost,
+ glusterd_brickinfo_t **brickinfo);
+
+void
+glusterd_set_brick_status (glusterd_brickinfo_t *brickinfo,
+ gf_brick_status_t status);
+
+gf_boolean_t
+glusterd_is_brick_started (glusterd_brickinfo_t *brickinfo);
+
+int
+glusterd_friend_find_by_hostname (const char *hoststr,
+ glusterd_peerinfo_t **peerinfo);
+int
+glusterd_hostname_to_uuid (char *hostname, uuid_t uuid);
+
+int
+glusterd_friend_brick_belongs (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo, void *uuid);
+int
+glusterd_all_volume_cond_check (glusterd_condition_func func, int status,
+ void *ctx);
+int
+glusterd_brick_start (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo,
+ gf_boolean_t wait);
+int
+glusterd_brick_stop (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo,
+ gf_boolean_t del_brick);
+
+int
+glusterd_is_defrag_on (glusterd_volinfo_t *volinfo);
+
+int32_t
+glusterd_volinfo_bricks_delete (glusterd_volinfo_t *volinfo);
+
+int
+glusterd_friend_find_by_uuid (uuid_t uuid,
+ glusterd_peerinfo_t **peerinfo);
+int
+glusterd_new_brick_validate (char *brick, glusterd_brickinfo_t *brickinfo,
+ char *op_errstr, size_t len);
+int32_t
+glusterd_volume_brickinfos_delete (glusterd_volinfo_t *volinfo);
+
+int32_t
+glusterd_volume_brickinfo_get (uuid_t uuid, char *hostname, char *path,
+ glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t **brickinfo);
+
+int
+glusterd_brickinfo_get (uuid_t uuid, char *hostname, char *path,
+ glusterd_brickinfo_t **brickinfo);
+int
+glusterd_is_rb_started (glusterd_volinfo_t *volinfo);
+
+int
+glusterd_is_rb_paused (glusterd_volinfo_t *volinfo);
+
+int
+glusterd_set_rb_status (glusterd_volinfo_t *volinfo, gf_rb_status_t status);
+
+gf_boolean_t
+glusterd_is_rb_ongoing (glusterd_volinfo_t *volinfo);
+
+int
+glusterd_rb_check_bricks (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *src_brick,
+ glusterd_brickinfo_t *dst_brick);
+
+int
+glusterd_check_and_set_brick_xattr (char *host, char *path, uuid_t uuid,
+ char **op_errstr, gf_boolean_t is_force);
+
+int
+glusterd_validate_and_create_brickpath (glusterd_brickinfo_t *brickinfo,
+ uuid_t volume_id, char **op_errstr,
+ gf_boolean_t is_force);
+int
+glusterd_sm_tr_log_transition_add (glusterd_sm_tr_log_t *log,
+ int old_state, int new_state,
+ int event);
+int
+glusterd_peerinfo_new (glusterd_peerinfo_t **peerinfo,
+ glusterd_friend_sm_state_t state, uuid_t *uuid,
+ const char *hostname, int port);
+int
+glusterd_sm_tr_log_init (glusterd_sm_tr_log_t *log,
+ char * (*state_name_get) (int),
+ char * (*event_name_get) (int),
+ size_t size);
+void
+glusterd_sm_tr_log_delete (glusterd_sm_tr_log_t *log);
+
+int
+glusterd_sm_tr_log_add_to_dict (dict_t *dict,
+ glusterd_sm_tr_log_t *circular_log);
+int
+glusterd_remove_pending_entry (struct list_head *list, void *elem);
+int
+glusterd_clear_pending_nodes (struct list_head *list);
+gf_boolean_t
+glusterd_peerinfo_is_uuid_unknown (glusterd_peerinfo_t *peerinfo);
+int32_t
+glusterd_brick_connect (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo, char *socketpath);
+int32_t
+glusterd_brick_disconnect (glusterd_brickinfo_t *brickinfo);
+int32_t
+glusterd_delete_volume (glusterd_volinfo_t *volinfo);
+int32_t
+glusterd_delete_brick (glusterd_volinfo_t* volinfo,
+ glusterd_brickinfo_t *brickinfo);
+
+int32_t
+glusterd_delete_all_bricks (glusterd_volinfo_t* volinfo);
+
+int
+glusterd_spawn_daemons (void *opaque);
+
+int
+glusterd_restart_gsyncds (glusterd_conf_t *conf);
+
+int
+glusterd_start_gsync (glusterd_volinfo_t *master_vol, char *slave,
+ char *path_list, char *conf_path,
+ char *glusterd_uuid_str,
+ char **op_errstr);
+int
+glusterd_get_local_brickpaths (glusterd_volinfo_t *volinfo,
+ char **pathlist);
+
+int32_t
+glusterd_recreate_bricks (glusterd_conf_t *conf);
+int32_t
+glusterd_handle_upgrade_downgrade (dict_t *options, glusterd_conf_t *conf);
+
+int
+glusterd_add_brick_detail_to_dict (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo,
+ dict_t *dict, int32_t count);
+
+int32_t
+glusterd_add_brick_to_dict (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo,
+ dict_t *dict, int32_t count);
+
+int32_t
+glusterd_get_all_volnames (dict_t *dict);
+
+gf_boolean_t
+glusterd_is_fuse_available ();
+
+int
+glusterd_brick_statedump (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo,
+ char *options, int option_cnt, char **op_errstr);
+int
+glusterd_nfs_statedump (char *options, int option_cnt, char **op_errstr);
+gf_boolean_t
+glusterd_is_volume_replicate (glusterd_volinfo_t *volinfo);
+gf_boolean_t
+glusterd_is_brick_decommissioned (glusterd_volinfo_t *volinfo, char *hostname,
+ char *path);
+gf_boolean_t
+glusterd_friend_contains_vol_bricks (glusterd_volinfo_t *volinfo,
+ uuid_t friend_uuid);
+int
+glusterd_friend_remove_cleanup_vols (uuid_t uuid);
+
+gf_boolean_t
+glusterd_chk_peers_connected_befriended (uuid_t skip_uuid);
+
+void
+glusterd_get_client_filepath (char *filepath,
+ glusterd_volinfo_t *volinfo,
+ gf_transport_type type);
+void
+glusterd_get_trusted_client_filepath (char *filepath,
+ glusterd_volinfo_t *volinfo,
+ gf_transport_type type);
+int
+glusterd_restart_rebalance (glusterd_conf_t *conf);
+
+int32_t
+glusterd_add_bricks_hname_path_to_dict (dict_t *dict,
+ glusterd_volinfo_t *volinfo);
+
+int
+glusterd_add_node_to_dict (char *server, dict_t *dict, int count,
+ dict_t *vol_opts);
+
+char *
+glusterd_uuid_to_hostname (uuid_t uuid);
+
+int
+glusterd_get_dist_leaf_count (glusterd_volinfo_t *volinfo);
+
+glusterd_brickinfo_t*
+glusterd_get_brickinfo_by_position (glusterd_volinfo_t *volinfo, uint32_t pos);
+
+gf_boolean_t
+glusterd_is_local_brick (xlator_t *this, glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo);
+int
+glusterd_validate_volume_id (dict_t *op_dict, glusterd_volinfo_t *volinfo);
+
+int
+glusterd_defrag_volume_status_update (glusterd_volinfo_t *volinfo,
+ dict_t *rsp_dict);
+
+int
+glusterd_check_files_identical (char *filename1, char *filename2,
+ gf_boolean_t *identical);
+
+int
+glusterd_check_topology_identical (const char *filename1,
+ const char *filename2,
+ gf_boolean_t *identical);
+
+void
+glusterd_volinfo_reset_defrag_stats (glusterd_volinfo_t *volinfo);
+int
+glusterd_volset_help (dict_t *dict, char **op_errstr);
+
+int32_t
+glusterd_sync_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict);
+int32_t
+glusterd_gsync_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict, char *op_errstr);
+int32_t
+glusterd_rb_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict);
+int
+glusterd_profile_volume_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict);
+int
+glusterd_volume_status_copy_to_op_ctx_dict (dict_t *aggr, dict_t *rsp_dict);
+int
+glusterd_volume_rebalance_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict);
+int
+glusterd_volume_heal_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict);
+int
+glusterd_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict);
+int
+glusterd_sys_exec_output_rsp_dict (dict_t *aggr, dict_t *rsp_dict);
+int
+glusterd_snap_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict);
+int32_t
+glusterd_handle_node_rsp (dict_t *req_ctx, void *pending_entry,
+ glusterd_op_t op, dict_t *rsp_dict, dict_t *op_ctx,
+ char **op_errstr, gd_node_type type);
+int
+glusterd_volume_rebalance_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict);
+int
+glusterd_volume_heal_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict);
+int
+_profile_volume_add_brick_rsp (dict_t *this, char *key, data_t *value,
+ void *data);
+int
+glusterd_profile_volume_brick_rsp (void *pending_entry,
+ dict_t *rsp_dict, dict_t *op_ctx,
+ char **op_errstr, gd_node_type type);
+
+gf_boolean_t
+glusterd_are_vol_all_peers_up (glusterd_volinfo_t *volinfo,
+ struct list_head *peers,
+ char **down_peerstr);
+
+/* Should be used only when an operation is in progress, as that is the only
+ * time a lock_owner is set
+ */
+gf_boolean_t
+is_origin_glusterd (dict_t *dict);
+
+gf_boolean_t
+glusterd_is_quorum_changed (dict_t *options, char *option, char *value);
+
+int
+glusterd_do_quorum_action ();
+
+int
+glusterd_get_quorum_cluster_counts (xlator_t *this, int *active_count,
+ int *quorum_count);
+
+int
+glusterd_get_next_global_opt_version_str (dict_t *opts, char **version_str);
+gf_boolean_t
+glusterd_is_quorum_option (char *option);
+gf_boolean_t
+glusterd_is_volume_in_server_quorum (glusterd_volinfo_t *volinfo);
+gf_boolean_t
+glusterd_is_any_volume_in_server_quorum (xlator_t *this);
+gf_boolean_t
+does_gd_meet_server_quorum (xlator_t *this);
+
+int
+glusterd_generate_and_set_task_id (dict_t *dict, char *key);
+
+int
+glusterd_copy_uuid_to_dict (uuid_t uuid, dict_t *dict, char *key);
+
+gf_boolean_t
+glusterd_is_same_address (char *name1, char *name2);
+
+void
+gd_update_volume_op_versions (glusterd_volinfo_t *volinfo);
+
+char*
+gd_peer_uuid_str (glusterd_peerinfo_t *peerinfo);
+
+gf_boolean_t
+gd_is_remove_brick_committed (glusterd_volinfo_t *volinfo);
+
+gf_boolean_t
+glusterd_are_vol_all_peers_up (glusterd_volinfo_t *volinfo,
+ struct list_head *peers,
+ char **down_peerstr);
+
+int
+glusterd_get_slave_details_confpath (glusterd_volinfo_t *volinfo, dict_t *dict,
+ char **slave_ip, char **slave_vol,
+ char **conf_path, char **op_errstr);
+
+int
+glusterd_check_restart_gsync_session (glusterd_volinfo_t *volinfo, char *slave,
+ dict_t *resp_dict, char *path_list,
+ char *conf_path, gf_boolean_t is_force);
+
+int
+glusterd_check_gsync_running_local (char *master, char *slave,
+ char *conf_path,
+ gf_boolean_t *is_run);
+
+gf_boolean_t
+glusterd_is_status_tasks_op (glusterd_op_t op, dict_t *dict);
+
+#ifdef GF_LINUX_HOST_OS
+char*
+glusterd_get_brick_mount_details (glusterd_brickinfo_t *brickinfo);
+struct mntent *
+glusterd_get_mnt_entry_info (char *mnt_pt, FILE *mtab);
+int
+glusterd_get_brick_root (char *path, char **mount_point);
+#endif //LINUX_HOST
+
+int
+glusterd_compare_snap_time(struct list_head *, struct list_head *);
+
+int
+glusterd_compare_snap_vol_time(struct list_head *, struct list_head *);
+
+int32_t
+glusterd_snap_volinfo_restore (dict_t *rsp_dict,
+ glusterd_volinfo_t *new_volinfo,
+ glusterd_volinfo_t *snap_volinfo);
+int32_t
+glusterd_lvm_snapshot_remove (dict_t *rsp_dict, glusterd_volinfo_t *snap_vol);
+
+int32_t
+glusterd_missed_snapinfo_new (glusterd_missed_snap_info **missed_snapinfo);
+
+int32_t
+glusterd_missed_snap_op_new (glusterd_snap_op_t **snap_op);
+
+int32_t
+glusterd_add_missed_snaps_to_dict (dict_t *rsp_dict, char *snap_uuid,
+ glusterd_brickinfo_t *brickinfo,
+ int32_t brick_number, int32_t op);
+
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
new file mode 100644
index 000000000..6f3c69e7d
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
@@ -0,0 +1,4065 @@
+/*
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <fnmatch.h>
+#include <sys/wait.h>
+#include <dlfcn.h>
+
+#if (HAVE_LIB_XML)
+#include <libxml/encoding.h>
+#include <libxml/xmlwriter.h>
+#endif
+
+#include "xlator.h"
+#include "glusterd.h"
+#include "defaults.h"
+#include "logging.h"
+#include "dict.h"
+#include "graph-utils.h"
+#include "glusterd-store.h"
+#include "glusterd-hooks.h"
+#include "trie.h"
+#include "glusterd-mem-types.h"
+#include "cli1-xdr.h"
+#include "glusterd-volgen.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-utils.h"
+#include "run.h"
+#include "options.h"
+
+extern struct volopt_map_entry glusterd_volopt_map[];
+
+/*********************************************
+ *
+ * xlator generation / graph manipulation API
+ *
+ *********************************************/
+
+
+struct volgen_graph {
+ char **errstr;
+ glusterfs_graph_t graph;
+};
+typedef struct volgen_graph volgen_graph_t;
+
+static void
+set_graph_errstr (volgen_graph_t *graph, const char *str)
+{
+ if (!graph->errstr)
+ return;
+
+ *graph->errstr = gf_strdup (str);
+}
+
+static xlator_t *
+xlator_instantiate_va (const char *type, const char *format, va_list arg)
+{
+ xlator_t *xl = NULL;
+ char *volname = NULL;
+ int ret = 0;
+
+ ret = gf_vasprintf (&volname, format, arg);
+ if (ret < 0) {
+ volname = NULL;
+
+ goto error;
+ }
+
+ xl = GF_CALLOC (1, sizeof (*xl), gf_common_mt_xlator_t);
+ if (!xl)
+ goto error;
+ ret = xlator_set_type_virtual (xl, type);
+ if (ret)
+ goto error;
+ xl->options = get_new_dict();
+ if (!xl->options)
+ goto error;
+ xl->name = volname;
+ INIT_LIST_HEAD (&xl->volume_options);
+
+ xl->ctx = THIS->ctx;
+
+ return xl;
+
+ error:
+ gf_log ("", GF_LOG_ERROR, "creating xlator of type %s failed",
+ type);
+ GF_FREE (volname);
+ if (xl)
+ xlator_destroy (xl);
+
+ return NULL;
+}
+
+#ifdef __not_used_as_of_now_
+static xlator_t *
+xlator_instantiate (const char *type, const char *format, ...)
+{
+ va_list arg;
+ xlator_t *xl;
+
+ va_start (arg, format);
+ xl = xlator_instantiate_va (type, format, arg);
+ va_end (arg);
+
+ return xl;
+}
+#endif
+
+static int
+volgen_xlator_link (xlator_t *pxl, xlator_t *cxl)
+{
+ int ret = 0;
+
+ ret = glusterfs_xlator_link (pxl, cxl);
+ if (ret == -1) {
+ gf_log ("", GF_LOG_ERROR,
+ "Out of memory, cannot link xlators %s <- %s",
+ pxl->name, cxl->name);
+ }
+
+ return ret;
+}
+
+static int
+volgen_graph_link (volgen_graph_t *graph, xlator_t *xl)
+{
+ int ret = 0;
+
+ /* no need to care about graph->top here */
+ if (graph->graph.first)
+ ret = volgen_xlator_link (xl, graph->graph.first);
+ if (ret == -1) {
+ gf_log ("", GF_LOG_ERROR, "failed to add graph entry %s",
+ xl->name);
+
+ return -1;
+ }
+
+ return 0;
+}
+
+static xlator_t *
+volgen_graph_add_as (volgen_graph_t *graph, const char *type,
+ const char *format, ...)
+{
+ va_list arg;
+ xlator_t *xl = NULL;
+
+ va_start (arg, format);
+ xl = xlator_instantiate_va (type, format, arg);
+ va_end (arg);
+
+ if (!xl)
+ return NULL;
+
+ if (volgen_graph_link (graph, xl)) {
+ xlator_destroy (xl);
+
+ return NULL;
+ } else
+ glusterfs_graph_set_first (&graph->graph, xl);
+
+ return xl;
+}
+
+static xlator_t *
+volgen_graph_add_nolink (volgen_graph_t *graph, const char *type,
+ const char *format, ...)
+{
+ va_list arg;
+ xlator_t *xl = NULL;
+
+ va_start (arg, format);
+ xl = xlator_instantiate_va (type, format, arg);
+ va_end (arg);
+
+ if (!xl)
+ return NULL;
+
+ glusterfs_graph_set_first (&graph->graph, xl);
+
+ return xl;
+}
+
+static xlator_t *
+volgen_graph_add (volgen_graph_t *graph, char *type, char *volname)
+{
+ char *shorttype = NULL;
+
+ shorttype = strrchr (type, '/');
+ GF_ASSERT (shorttype);
+ shorttype++;
+ GF_ASSERT (*shorttype);
+
+ return volgen_graph_add_as (graph, type, "%s-%s", volname, shorttype);
+}
+
+/* XXX Seems there is no such generic routine?
+ * Maybe should put to xlator.c ??
+ */
+static int
+xlator_set_option (xlator_t *xl, char *key, char *value)
+{
+ char *dval = NULL;
+
+ dval = gf_strdup (value);
+ if (!dval) {
+ gf_log ("", GF_LOG_ERROR,
+ "failed to set xlator opt: %s[%s] = %s",
+ xl->name, key, value);
+
+ return -1;
+ }
+
+ return dict_set_dynstr (xl->options, key, dval);
+}
+
+static int
+xlator_get_option (xlator_t *xl, char *key, char **value)
+{
+ GF_ASSERT (xl);
+ return dict_get_str (xl->options, key, value);
+}
+
+static inline xlator_t *
+first_of (volgen_graph_t *graph)
+{
+ return (xlator_t *)graph->graph.first;
+}
+
+
+
+
+/**************************
+ *
+ * Trie glue
+ *
+ *************************/
+
+
+static int
+volopt_selector (int lvl, char **patt, void *param,
+ int (*optcbk)(char *word, void *param))
+{
+ struct volopt_map_entry *vme = NULL;
+ char *w = NULL;
+ int i = 0;
+ int len = 0;
+ int ret = 0;
+ char *dot = NULL;
+
+ for (vme = glusterd_volopt_map; vme->key; vme++) {
+ w = vme->key;
+
+ for (i = 0; i < lvl; i++) {
+ if (patt[i]) {
+ w = strtail (w, patt[i]);
+ GF_ASSERT (!w || *w);
+ if (!w || *w != '.')
+ goto next;
+ } else {
+ w = strchr (w, '.');
+ GF_ASSERT (w);
+ }
+ w++;
+ }
+
+ dot = strchr (w, '.');
+ if (dot) {
+ len = dot - w;
+ w = gf_strdup (w);
+ if (!w)
+ return -1;
+ w[len] = '\0';
+ }
+ ret = optcbk (w, param);
+ if (dot)
+ GF_FREE (w);
+ if (ret)
+ return -1;
+ next:
+ continue;
+ }
+
+ return 0;
+}
+
+static int
+volopt_trie_cbk (char *word, void *param)
+{
+ return trie_add ((trie_t *)param, word);
+}
+
+static int
+process_nodevec (struct trienodevec *nodevec, char **hint)
+{
+ int ret = 0;
+ char *hint1 = NULL;
+ char *hint2 = NULL;
+ char *hintinfx = "";
+ trienode_t **nodes = nodevec->nodes;
+
+ if (!nodes[0]) {
+ *hint = NULL;
+ return 0;
+ }
+
+#if 0
+ /* Limit as in git */
+ if (trienode_get_dist (nodes[0]) >= 6) {
+ *hint = NULL;
+ return 0;
+ }
+#endif
+
+ if (trienode_get_word (nodes[0], &hint1))
+ return -1;
+
+ if (nodevec->cnt < 2 || !nodes[1]) {
+ *hint = hint1;
+ return 0;
+ }
+
+ if (trienode_get_word (nodes[1], &hint2))
+ return -1;
+
+ if (*hint)
+ hintinfx = *hint;
+ ret = gf_asprintf (hint, "%s or %s%s", hint1, hintinfx, hint2);
+ if (ret > 0)
+ ret = 0;
+ return ret;
+}
+
+static int
+volopt_trie_section (int lvl, char **patt, char *word, char **hint, int hints)
+{
+ trienode_t *nodes[] = { NULL, NULL };
+ struct trienodevec nodevec = { nodes, 2};
+ trie_t *trie = NULL;
+ int ret = 0;
+
+ trie = trie_new ();
+ if (!trie)
+ return -1;
+
+ if (volopt_selector (lvl, patt, trie, &volopt_trie_cbk)) {
+ trie_destroy (trie);
+
+ return -1;
+ }
+
+ GF_ASSERT (hints <= 2);
+ nodevec.cnt = hints;
+ ret = trie_measure_vec (trie, word, &nodevec);
+ if (ret || !nodevec.nodes[0])
+ trie_destroy (trie);
+
+ ret = process_nodevec (&nodevec, hint);
+ trie_destroy (trie);
+
+ return ret;
+}
+
+static int
+volopt_trie (char *key, char **hint)
+{
+ char *patt[] = { NULL };
+ char *fullhint = NULL;
+ char *dot = NULL;
+ char *dom = NULL;
+ int len = 0;
+ int ret = 0;
+
+ *hint = NULL;
+
+ dot = strchr (key, '.');
+ if (!dot)
+ return volopt_trie_section (1, patt, key, hint, 2);
+
+ len = dot - key;
+ dom = gf_strdup (key);
+ if (!dom)
+ return -1;
+ dom[len] = '\0';
+
+ ret = volopt_trie_section (0, NULL, dom, patt, 1);
+ GF_FREE (dom);
+ if (ret) {
+ patt[0] = NULL;
+ goto out;
+ }
+ if (!patt[0])
+ goto out;
+
+ *hint = "...";
+ ret = volopt_trie_section (1, patt, dot + 1, hint, 2);
+ if (ret)
+ goto out;
+ if (*hint) {
+ ret = gf_asprintf (&fullhint, "%s.%s", patt[0], *hint);
+ GF_FREE (*hint);
+ if (ret >= 0) {
+ ret = 0;
+ *hint = fullhint;
+ }
+ }
+
+ out:
+ GF_FREE (patt[0]);
+ if (ret)
+ *hint = NULL;
+
+ return ret;
+}
+
+
+
+
+/**************************
+ *
+ * Volume generation engine
+ *
+ **************************/
+
+
+typedef int (*volgen_opthandler_t) (volgen_graph_t *graph,
+ struct volopt_map_entry *vme,
+ void *param);
+
+struct opthandler_data {
+ volgen_graph_t *graph;
+ volgen_opthandler_t handler;
+ struct volopt_map_entry *vme;
+ gf_boolean_t found;
+ gf_boolean_t data_t_fake;
+ int rv;
+ char *volname;
+ void *param;
+};
+
+static int
+process_option (char *key, data_t *value, void *param)
+{
+ struct opthandler_data *odt = param;
+ struct volopt_map_entry vme = {0,};
+
+ if (odt->rv)
+ return 0;
+ odt->found = _gf_true;
+
+ vme.key = key;
+ vme.voltype = odt->vme->voltype;
+ vme.option = odt->vme->option;
+ vme.op_version = odt->vme->op_version;
+
+ if (!vme.option) {
+ vme.option = strrchr (key, '.');
+ if (vme.option)
+ vme.option++;
+ else
+ vme.option = key;
+ }
+ if (odt->data_t_fake)
+ vme.value = (char *)value;
+ else
+ vme.value = value->data;
+
+ odt->rv = odt->handler (odt->graph, &vme, odt->param);
+ return 0;
+}
+
+static int
+volgen_graph_set_options_generic (volgen_graph_t *graph, dict_t *dict,
+ void *param, volgen_opthandler_t handler)
+{
+ struct volopt_map_entry *vme = NULL;
+ struct opthandler_data odt = {0,};
+ data_t *data = NULL;
+
+ odt.graph = graph;
+ odt.handler = handler;
+ odt.param = param;
+ (void)data;
+
+ for (vme = glusterd_volopt_map; vme->key; vme++) {
+ odt.vme = vme;
+ odt.found = _gf_false;
+ odt.data_t_fake = _gf_false;
+
+ data = dict_get (dict, vme->key);
+
+ if (data)
+ process_option (vme->key, data, &odt);
+ if (odt.rv)
+ return odt.rv;
+
+ if (odt.found)
+ continue;
+
+ /* check for default value */
+
+ if (vme->value) {
+ /* stupid hack to be able to reuse dict iterator
+ * in this context
+ */
+ odt.data_t_fake = _gf_true;
+ process_option (vme->key, (data_t *)vme->value, &odt);
+ if (odt.rv)
+ return odt.rv;
+ }
+ }
+
+ return 0;
+}
+
+static int
+no_filter_option_handler (volgen_graph_t *graph, struct volopt_map_entry *vme,
+ void *param)
+{
+ xlator_t *trav;
+ int ret = 0;
+
+ for (trav = first_of (graph); trav; trav = trav->next) {
+ if (strcmp (trav->type, vme->voltype) != 0)
+ continue;
+
+ ret = xlator_set_option (trav, vme->option, vme->value);
+ if (ret)
+ break;
+ }
+ return ret;
+}
+
+static int
+basic_option_handler (volgen_graph_t *graph, struct volopt_map_entry *vme,
+ void *param)
+{
+ int ret = 0;
+
+ if (vme->option[0] == '!')
+ goto out;
+
+ ret = no_filter_option_handler (graph, vme, param);
+out:
+ return ret;
+}
+
+static int
+volgen_graph_set_options (volgen_graph_t *graph, dict_t *dict)
+{
+ return volgen_graph_set_options_generic (graph, dict, NULL,
+ &basic_option_handler);
+}
+
+static int
+optget_option_handler (volgen_graph_t *graph, struct volopt_map_entry *vme,
+ void *param)
+{
+ struct volopt_map_entry *vme2 = param;
+
+ if (strcmp (vme->key, vme2->key) == 0)
+ vme2->value = vme->value;
+
+ return 0;
+}
+
+static glusterd_server_xlator_t
+get_server_xlator (char *xlator)
+{
+ glusterd_server_xlator_t subvol = GF_XLATOR_NONE;
+
+ if (strcmp (xlator, "posix") == 0)
+ subvol = GF_XLATOR_POSIX;
+ if (strcmp (xlator, "acl") == 0)
+ subvol = GF_XLATOR_ACL;
+ if (strcmp (xlator, "locks") == 0)
+ subvol = GF_XLATOR_LOCKS;
+ if (strcmp (xlator, "io-threads") == 0)
+ subvol = GF_XLATOR_IOT;
+ if (strcmp (xlator, "index") == 0)
+ subvol = GF_XLATOR_INDEX;
+ if (strcmp (xlator, "marker") == 0)
+ subvol = GF_XLATOR_MARKER;
+ if (strcmp (xlator, "io-stats") == 0)
+ subvol = GF_XLATOR_IO_STATS;
+ if (strcmp (xlator, "bd") == 0)
+ subvol = GF_XLATOR_BD;
+
+ return subvol;
+}
+
+static glusterd_client_xlator_t
+get_client_xlator (char *xlator)
+{
+ glusterd_client_xlator_t subvol = GF_CLNT_XLATOR_NONE;
+
+ if (strcmp (xlator, "client") == 0)
+ subvol = GF_CLNT_XLATOR_FUSE;
+
+ return subvol;
+}
+
+static int
+debugxl_option_handler (volgen_graph_t *graph, struct volopt_map_entry *vme,
+ void *param)
+{
+ char *volname = NULL;
+ gf_boolean_t enabled = _gf_false;
+
+ volname = param;
+
+ if (strcmp (vme->option, "!debug") != 0)
+ return 0;
+
+ if (!strcmp (vme->key , "debug.trace") ||
+ !strcmp (vme->key, "debug.error-gen")) {
+ if (get_server_xlator (vme->value) == GF_XLATOR_NONE &&
+ get_client_xlator (vme->value) == GF_CLNT_XLATOR_NONE)
+ return 0;
+ else
+ goto add_graph;
+ }
+
+ if (gf_string2boolean (vme->value, &enabled) == -1)
+ return -1;
+ if (!enabled)
+ return 0;
+
+add_graph:
+ if (volgen_graph_add (graph, vme->voltype, volname))
+ return 0;
+ else
+ return -1;
+}
+
+int
+check_and_add_debug_xl (volgen_graph_t *graph, dict_t *set_dict, char *volname,
+ char *xlname)
+{
+ int ret = 0;
+ char *value_str = NULL;
+
+ ret = dict_get_str (set_dict, "debug.trace", &value_str);
+ if (!ret) {
+ if (strcmp (xlname, value_str) == 0) {
+ ret = volgen_graph_set_options_generic (graph, set_dict, volname,
+ &debugxl_option_handler);
+ if (ret)
+ goto out;
+ }
+ }
+
+ ret = dict_get_str (set_dict, "debug.error-gen", &value_str);
+ if (!ret) {
+ if (strcmp (xlname, value_str) == 0) {
+ ret = volgen_graph_set_options_generic (graph, set_dict, volname,
+ &debugxl_option_handler);
+ if (ret)
+ goto out;
+ }
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+/* This getter considers defaults also. */
+static int
+volgen_dict_get (dict_t *dict, char *key, char **value)
+{
+ struct volopt_map_entry vme = {0,};
+ int ret = 0;
+
+ vme.key = key;
+
+ ret = volgen_graph_set_options_generic (NULL, dict, &vme,
+ &optget_option_handler);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Out of memory");
+
+ return -1;
+ }
+
+ *value = vme.value;
+
+ return 0;
+}
+
+static int
+option_complete (char *key, char **completion)
+{
+ struct volopt_map_entry *vme = NULL;
+
+ *completion = NULL;
+ for (vme = glusterd_volopt_map; vme->key; vme++) {
+ if (strcmp (strchr (vme->key, '.') + 1, key) != 0)
+ continue;
+
+ if (*completion && strcmp (*completion, vme->key) != 0) {
+ /* cancel on non-unique match */
+ *completion = NULL;
+
+ return 0;
+ } else
+ *completion = vme->key;
+ }
+
+ if (*completion) {
+ /* For sake of unified API we want
+ * have the completion to be a to-be-freed
+ * string.
+ */
+ *completion = gf_strdup (*completion);
+ return -!*completion;
+ }
+
+ return 0;
+}
+
+int
+glusterd_volinfo_get (glusterd_volinfo_t *volinfo, char *key, char **value)
+{
+ return volgen_dict_get (volinfo->dict, key, value);
+}
+
+int
+glusterd_volinfo_get_boolean (glusterd_volinfo_t *volinfo, char *key)
+{
+ char *val = NULL;
+ gf_boolean_t boo = _gf_false;
+ int ret = 0;
+
+ ret = glusterd_volinfo_get (volinfo, key, &val);
+ if (ret)
+ return -1;
+
+ if (val)
+ ret = gf_string2boolean (val, &boo);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "value for %s option is not valid", key);
+
+ return -1;
+ }
+
+ return boo;
+}
+
+gf_boolean_t
+glusterd_check_voloption_flags (char *key, int32_t flags)
+{
+ char *completion = NULL;
+ struct volopt_map_entry *vmep = NULL;
+ int ret = 0;
+
+ COMPLETE_OPTION(key, completion, ret);
+ for (vmep = glusterd_volopt_map; vmep->key; vmep++) {
+ if (strcmp (vmep->key, key) == 0) {
+ if (vmep->flags & flags)
+ return _gf_true;
+ else
+ return _gf_false;
+ }
+ }
+
+ return _gf_false;
+}
+
+gf_boolean_t
+glusterd_check_globaloption (char *key)
+{
+ char *completion = NULL;
+ struct volopt_map_entry *vmep = NULL;
+ int ret = 0;
+
+ COMPLETE_OPTION(key, completion, ret);
+ for (vmep = glusterd_volopt_map; vmep->key; vmep++) {
+ if (strcmp (vmep->key, key) == 0) {
+ if ((vmep->type == GLOBAL_DOC) ||
+ (vmep->type == GLOBAL_NO_DOC))
+ return _gf_true;
+ else
+ return _gf_false;
+ }
+ }
+
+ return _gf_false;
+}
+
+gf_boolean_t
+glusterd_check_localoption (char *key)
+{
+ char *completion = NULL;
+ struct volopt_map_entry *vmep = NULL;
+ int ret = 0;
+
+ COMPLETE_OPTION(key, completion, ret);
+ for (vmep = glusterd_volopt_map; vmep->key; vmep++) {
+ if (strcmp (vmep->key, key) == 0) {
+ if ((vmep->type == DOC) ||
+ (vmep->type == NO_DOC))
+ return _gf_true;
+ else
+ return _gf_false;
+ }
+ }
+
+ return _gf_false;
+}
+
+int
+glusterd_check_voloption (char *key)
+{
+ char *completion = NULL;
+ struct volopt_map_entry *vmep = NULL;
+ int ret = 0;
+
+ COMPLETE_OPTION(key, completion, ret);
+ for (vmep = glusterd_volopt_map; vmep->key; vmep++) {
+ if (strcmp (vmep->key, key) == 0) {
+ if ((vmep->type == DOC) ||
+ (vmep->type == DOC))
+ return _gf_true;
+ else
+ return _gf_false;
+ }
+ }
+
+ return _gf_false;
+
+}
+
+int
+glusterd_check_option_exists (char *key, char **completion)
+{
+ struct volopt_map_entry vme = {0,};
+ struct volopt_map_entry *vmep = NULL;
+ int ret = 0;
+ xlator_t *this = THIS;
+
+ (void)vme;
+ (void)vmep;
+
+ if (!strchr (key, '.')) {
+ if (completion) {
+ ret = option_complete (key, completion);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Out of memory");
+ return -1;
+ }
+
+ ret = !!*completion;
+ if (ret)
+ return ret;
+ else
+ goto trie;
+ } else
+ return 0;
+ }
+
+ for (vmep = glusterd_volopt_map; vmep->key; vmep++) {
+ if (strcmp (vmep->key, key) == 0) {
+ ret = 1;
+ break;
+ }
+ }
+
+ if (ret || !completion)
+ return ret;
+
+ trie:
+ ret = volopt_trie (key, completion);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Some error occurred during keyword hinting");
+ }
+
+ return ret;
+}
+
+char*
+glusterd_get_trans_type_rb (gf_transport_type ttype)
+{
+ char *trans_type = NULL;
+
+ switch (ttype) {
+ case GF_TRANSPORT_RDMA:
+ gf_asprintf (&trans_type, "rdma");
+ break;
+ case GF_TRANSPORT_TCP:
+ case GF_TRANSPORT_BOTH_TCP_RDMA:
+ gf_asprintf (&trans_type, "tcp");
+ break;
+ default:
+ gf_log (THIS->name, GF_LOG_ERROR, "Unknown "
+ "transport type");
+ }
+
+ return trans_type;
+}
+
+static int
+_xl_link_children (xlator_t *parent, xlator_t *children, size_t child_count)
+{
+ xlator_t *trav = NULL;
+ size_t seek = 0;
+ int ret = -1;
+
+ if (child_count == 0)
+ goto out;
+ seek = child_count;
+ for (trav = children; --seek; trav = trav->next);
+ for (; child_count--; trav = trav->prev) {
+ ret = volgen_xlator_link (parent, trav);
+ if (ret)
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+volgen_graph_merge_sub (volgen_graph_t *dgraph, volgen_graph_t *sgraph,
+ size_t child_count)
+{
+ xlator_t *trav = NULL;
+ int ret = 0;
+
+ GF_ASSERT (dgraph->graph.first);
+
+ ret = _xl_link_children (first_of (dgraph), first_of (sgraph),
+ child_count);
+ if (ret)
+ goto out;
+
+ for (trav = first_of (dgraph); trav->next; trav = trav->next);
+
+ trav->next = first_of (sgraph);
+ trav->next->prev = trav;
+ dgraph->graph.xl_count += sgraph->graph.xl_count;
+
+out:
+ return ret;
+}
+
+static void
+volgen_apply_filters (char *orig_volfile)
+{
+ DIR *filterdir = NULL;
+ struct dirent entry = {0,};
+ struct dirent *next = NULL;
+ char *filterpath = NULL;
+ struct stat statbuf = {0,};
+
+ filterdir = opendir(FILTERDIR);
+ if (!filterdir) {
+ return;
+ }
+
+ while ((readdir_r(filterdir,&entry,&next) == 0) && next) {
+ if (!strncmp(entry.d_name,".",sizeof(entry.d_name))) {
+ continue;
+ }
+ if (!strncmp(entry.d_name,"..",sizeof(entry.d_name))) {
+ continue;
+ }
+ /*
+ * d_type isn't guaranteed to be present/valid on all systems,
+ * so do an explicit stat instead.
+ */
+ if (gf_asprintf(&filterpath,"%s/%.*s",FILTERDIR,
+ sizeof(entry.d_name), entry.d_name) == (-1)) {
+ continue;
+ }
+ /* Deliberately use stat instead of lstat to allow symlinks. */
+ if (stat(filterpath,&statbuf) == (-1)) {
+ goto free_fp;
+ }
+ if (!S_ISREG(statbuf.st_mode)) {
+ goto free_fp;
+ }
+ /*
+ * We could check the mode in statbuf directly, or just skip
+ * this entirely and check for EPERM after exec fails, but this
+ * is cleaner.
+ */
+ if (access(filterpath,X_OK) != 0) {
+ goto free_fp;
+ }
+ if (runcmd(filterpath,orig_volfile,NULL)) {
+ gf_log("",GF_LOG_ERROR,"failed to run filter %.*s",
+ (int)sizeof(entry.d_name), entry.d_name);
+ }
+free_fp:
+ GF_FREE(filterpath);
+ }
+}
+
+static int
+volgen_write_volfile (volgen_graph_t *graph, char *filename)
+{
+ char *ftmp = NULL;
+ FILE *f = NULL;
+ int fd = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ if (gf_asprintf (&ftmp, "%s.tmp", filename) == -1) {
+ ftmp = NULL;
+
+ goto error;
+ }
+
+ fd = creat (ftmp, S_IRUSR | S_IWUSR);
+ if (fd < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "%s",
+ strerror (errno));
+ goto error;
+ }
+
+ close (fd);
+
+ f = fopen (ftmp, "w");
+ if (!f)
+ goto error;
+
+ if (glusterfs_graph_print_file (f, &graph->graph) == -1)
+ goto error;
+
+ if (fclose (f) != 0) {
+ gf_log (THIS->name, GF_LOG_ERROR, "fclose on the file %s "
+ "failed (%s)", ftmp, strerror (errno));
+ /*
+ * Even though fclose has failed here, we have to set f to NULL.
+ * Otherwise when the code path goes to error, there again we
+ * try to close it which might cause undefined behavior such as
+ * process crash.
+ */
+ f = NULL;
+ goto error;
+ }
+
+ f = NULL;
+
+ if (rename (ftmp, filename) == -1)
+ goto error;
+
+ GF_FREE (ftmp);
+
+ volgen_apply_filters(filename);
+
+ return 0;
+
+ error:
+
+ GF_FREE (ftmp);
+ if (f)
+ fclose (f);
+
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create volfile %s", filename);
+
+ return -1;
+}
+
+static void
+volgen_graph_free (volgen_graph_t *graph)
+{
+ xlator_t *trav = NULL;
+ xlator_t *trav_old = NULL;
+
+ for (trav = first_of (graph) ;; trav = trav->next) {
+ if (trav_old)
+ xlator_destroy (trav_old);
+
+ trav_old = trav;
+
+ if (!trav)
+ break;
+ }
+}
+
+static int
+build_graph_generic (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ dict_t *mod_dict, void *param,
+ int (*builder) (volgen_graph_t *graph,
+ glusterd_volinfo_t *volinfo,
+ dict_t *set_dict, void *param))
+{
+ dict_t *set_dict = NULL;
+ int ret = 0;
+
+ if (mod_dict) {
+ set_dict = dict_copy (volinfo->dict, NULL);
+ if (!set_dict)
+ return -1;
+ dict_copy (mod_dict, set_dict);
+ /* XXX dict_copy swallows errors */
+ } else {
+ set_dict = volinfo->dict;
+ }
+
+ ret = builder (graph, volinfo, set_dict, param);
+ if (!ret)
+ ret = volgen_graph_set_options (graph, set_dict);
+
+ if (mod_dict)
+ dict_destroy (set_dict);
+
+ return ret;
+}
+
+static gf_transport_type
+transport_str_to_type (char *tt)
+{
+ gf_transport_type type = GF_TRANSPORT_TCP;
+
+ if (!strcmp ("tcp", tt))
+ type = GF_TRANSPORT_TCP;
+ else if (!strcmp ("rdma", tt))
+ type = GF_TRANSPORT_RDMA;
+ else if (!strcmp ("tcp,rdma", tt))
+ type = GF_TRANSPORT_BOTH_TCP_RDMA;
+ return type;
+}
+
+static void
+transport_type_to_str (gf_transport_type type, char *tt)
+{
+ switch (type) {
+ case GF_TRANSPORT_RDMA:
+ strcpy (tt, "rdma");
+ break;
+ case GF_TRANSPORT_TCP:
+ strcpy (tt, "tcp");
+ break;
+ case GF_TRANSPORT_BOTH_TCP_RDMA:
+ strcpy (tt, "tcp,rdma");
+ break;
+ }
+}
+
+static void
+get_vol_transport_type (glusterd_volinfo_t *volinfo, char *tt)
+{
+ transport_type_to_str (volinfo->transport_type, tt);
+}
+
+static void
+get_vol_nfs_transport_type (glusterd_volinfo_t *volinfo, char *tt)
+{
+ if (volinfo->nfs_transport_type == GF_TRANSPORT_BOTH_TCP_RDMA) {
+ gf_log ("", GF_LOG_ERROR, "%s:nfs transport cannot be both"
+ " tcp and rdma", volinfo->volname);
+ GF_ASSERT (0);
+ }
+ transport_type_to_str (volinfo->nfs_transport_type, tt);
+}
+
+/* gets the volinfo, dict, a character array for filling in
+ * the transport type and a boolean option which says whether
+ * the transport type is required for nfs or not. If its not
+ * for nfs, then it is considered as the client transport
+ * and client transport type is filled in the character array
+ */
+static void
+get_transport_type (glusterd_volinfo_t *volinfo, dict_t *set_dict,
+ char *transt, gf_boolean_t is_nfs)
+{
+ int ret = -1;
+ char *tt = NULL;
+ char *key = NULL;
+ typedef void (*transport_type) (glusterd_volinfo_t *volinfo, char *tt);
+ transport_type get_transport;
+
+ if (is_nfs == _gf_false) {
+ key = "client-transport-type";
+ get_transport = get_vol_transport_type;
+ } else {
+ key = "nfs.transport-type";
+ get_transport = get_vol_nfs_transport_type;
+ }
+
+ ret = dict_get_str (set_dict, key, &tt);
+ if (ret)
+ get_transport (volinfo, transt);
+ if (!ret)
+ strcpy (transt, tt);
+}
+
+static int
+server_auth_option_handler (volgen_graph_t *graph,
+ struct volopt_map_entry *vme, void *param)
+{
+ xlator_t *xl = NULL;
+ xlator_list_t *trav = NULL;
+ char *aa = NULL;
+ int ret = 0;
+ char *key = NULL;
+
+ if (strcmp (vme->option, "!server-auth") != 0)
+ return 0;
+
+ xl = first_of (graph);
+
+ /* from 'auth.allow' -> 'allow', and 'auth.reject' -> 'reject' */
+ key = strchr (vme->key, '.') + 1;
+
+ for (trav = xl->children; trav; trav = trav->next) {
+ ret = gf_asprintf (&aa, "auth.addr.%s.%s", trav->xlator->name,
+ key);
+ if (ret != -1) {
+ ret = xlator_set_option (xl, aa, vme->value);
+ GF_FREE (aa);
+ }
+ if (ret)
+ return -1;
+ }
+
+ return 0;
+}
+
+static int
+loglevel_option_handler (volgen_graph_t *graph,
+ struct volopt_map_entry *vme, void *param)
+{
+ char *role = param;
+ struct volopt_map_entry vme2 = {0,};
+
+ if ( (strcmp (vme->option, "!client-log-level") != 0 &&
+ strcmp (vme->option, "!brick-log-level") != 0)
+ || !strstr (vme->key, role))
+ return 0;
+
+ memcpy (&vme2, vme, sizeof (vme2));
+ vme2.option = "log-level";
+
+ return basic_option_handler (graph, &vme2, NULL);
+}
+
+static int
+server_check_marker_off (volgen_graph_t *graph, struct volopt_map_entry *vme,
+ glusterd_volinfo_t *volinfo)
+{
+ gf_boolean_t bool = _gf_false;
+ int ret = 0;
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (vme);
+
+ if (strcmp (vme->option, "!xtime") != 0)
+ return 0;
+
+ ret = gf_string2boolean (vme->value, &bool);
+ if (ret || bool)
+ goto out;
+
+ ret = glusterd_volinfo_get_boolean (volinfo, VKEY_MARKER_XTIME);
+ if (ret < 0) {
+ gf_log ("", GF_LOG_WARNING, "failed to get the marker status");
+ ret = -1;
+ goto out;
+ }
+
+ if (ret) {
+ bool = _gf_false;
+ ret = glusterd_check_gsync_running (volinfo, &bool);
+
+ if (bool) {
+ gf_log ("", GF_LOG_WARNING, GEOREP" sessions active"
+ "for the volume %s, cannot disable marker "
+ ,volinfo->volname);
+ set_graph_errstr (graph,
+ VKEY_MARKER_XTIME" cannot be disabled "
+ "while "GEOREP" sessions exist");
+ ret = -1;
+ goto out;
+ }
+
+ if (ret) {
+ gf_log ("", GF_LOG_WARNING, "Unable to get the status"
+ " of active gsync session");
+ goto out;
+ }
+ }
+
+ ret = 0;
+ out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+
+}
+
+static int
+sys_loglevel_option_handler (volgen_graph_t *graph,
+ struct volopt_map_entry *vme,
+ void *param)
+{
+ char *role = NULL;
+ struct volopt_map_entry vme2 = {0,};
+
+ role = (char *) param;
+
+ if (strcmp (vme->option, "!sys-log-level") != 0 ||
+ !strstr (vme->key, role))
+ return 0;
+
+ memcpy (&vme2, vme, sizeof (vme2));
+ vme2.option = "sys-log-level";
+
+ return basic_option_handler (graph, &vme2, NULL);
+}
+
+static int
+volgen_graph_set_xl_options (volgen_graph_t *graph, dict_t *dict)
+{
+ int32_t ret = -1;
+ char *xlator = NULL;
+ char xlator_match[1024] = {0,}; /* for posix* -> *posix* */
+ char *loglevel = NULL;
+ xlator_t *trav = NULL;
+
+ ret = dict_get_str (dict, "xlator", &xlator);
+ if (ret)
+ goto out;
+
+ ret = dict_get_str (dict, "loglevel", &loglevel);
+ if (ret)
+ goto out;
+
+ snprintf (xlator_match, 1024, "*%s", xlator);
+
+ for (trav = first_of (graph); trav; trav = trav->next) {
+ if (fnmatch(xlator_match, trav->type, FNM_NOESCAPE) == 0) {
+ gf_log ("glusterd", GF_LOG_DEBUG, "Setting log level for xlator: %s",
+ trav->type);
+ ret = xlator_set_option (trav, "log-level", loglevel);
+ if (ret)
+ break;
+ }
+ }
+
+ out:
+ return ret;
+}
+
+static int
+server_spec_option_handler (volgen_graph_t *graph,
+ struct volopt_map_entry *vme, void *param)
+{
+ int ret = 0;
+ glusterd_volinfo_t *volinfo = NULL;
+
+ volinfo = param;
+
+ ret = server_auth_option_handler (graph, vme, NULL);
+ if (!ret)
+ ret = server_check_marker_off (graph, vme, volinfo);
+
+ if (!ret)
+ ret = loglevel_option_handler (graph, vme, "brick");
+
+ if (!ret)
+ ret = sys_loglevel_option_handler (graph, vme, "brick");
+
+ return ret;
+}
+
+static int
+server_spec_extended_option_handler (volgen_graph_t *graph,
+ struct volopt_map_entry *vme, void *param)
+{
+ int ret = 0;
+ dict_t *dict = NULL;
+
+ GF_ASSERT (param);
+ dict = (dict_t *)param;
+
+ ret = server_auth_option_handler (graph, vme, NULL);
+ if (!ret)
+ ret = volgen_graph_set_xl_options (graph, dict);
+
+ return ret;
+}
+
+static void get_vol_tstamp_file (char *filename, glusterd_volinfo_t *volinfo);
+
+static int
+server_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ dict_t *set_dict, void *param)
+{
+ char *volname = NULL;
+ char *path = NULL;
+ int pump = 0;
+ xlator_t *xl = NULL;
+ xlator_t *txl = NULL;
+ xlator_t *rbxl = NULL;
+ char transt[16] = {0,};
+ char *ptranst = NULL;
+ char volume_id[64] = {0,};
+ char tstamp_file[PATH_MAX] = {0,};
+ int ret = 0;
+ char *xlator = NULL;
+ char *loglevel = NULL;
+ char *username = NULL;
+ char *password = NULL;
+ char index_basepath[PATH_MAX] = {0};
+ char key[1024] = {0};
+ glusterd_brickinfo_t *brickinfo = NULL;
+ char changelog_basepath[PATH_MAX] = {0,};
+
+ brickinfo = param;
+ path = brickinfo->path;
+ volname = volinfo->volname;
+ get_vol_transport_type (volinfo, transt);
+
+ ret = dict_get_str (set_dict, "xlator", &xlator);
+
+ /* got a cli log level request */
+ if (!ret) {
+ ret = dict_get_str (set_dict, "loglevel", &loglevel);
+ if (ret) {
+ gf_log ("glusterd", GF_LOG_ERROR, "could not get both"
+ " translator name and loglevel for log level request");
+ goto out;
+ }
+ }
+
+ xl = volgen_graph_add (graph, "storage/posix", volname);
+ if (!xl)
+ return -1;
+
+ ret = xlator_set_option (xl, "directory", path);
+ if (ret)
+ return -1;
+
+ ret = xlator_set_option (xl, "volume-id",
+ uuid_utoa (volinfo->volume_id));
+ if (ret)
+ return -1;
+
+ ret = check_and_add_debug_xl (graph, set_dict, volname,
+ "posix");
+ if (ret)
+ return -1;
+#ifdef HAVE_BD_XLATOR
+ if (*brickinfo->vg != '\0') {
+ /* Now add BD v2 xlator if volume is BD type */
+ xl = volgen_graph_add (graph, "storage/bd", volname);
+ if (!xl)
+ return -1;
+
+ ret = xlator_set_option (xl, "device", "vg");
+ if (ret)
+ return -1;
+ ret = xlator_set_option (xl, "export", brickinfo->vg);
+ if (ret)
+ return -1;
+
+ ret = check_and_add_debug_xl (graph, set_dict, volname, "bd");
+ if (ret)
+ return -1;
+
+ }
+#endif
+
+ xl = volgen_graph_add (graph, "features/changelog", volname);
+ if (!xl)
+ return -1;
+
+ ret = xlator_set_option (xl, "changelog-brick", path);
+ if (ret)
+ return -1;
+
+ snprintf (changelog_basepath, sizeof (changelog_basepath),
+ "%s/%s", path, ".glusterfs/changelogs");
+ ret = xlator_set_option (xl, "changelog-dir", changelog_basepath);
+ if (ret)
+ return -1;
+
+ ret = check_and_add_debug_xl (graph, set_dict, volname, "changelog");
+ if (ret)
+ return -1;
+
+ xl = volgen_graph_add (graph, "features/access-control", volname);
+ if (!xl)
+ return -1;
+
+ ret = check_and_add_debug_xl (graph, set_dict, volname, "acl");
+ if (ret)
+ return -1;
+
+ xl = volgen_graph_add (graph, "features/locks", volname);
+ if (!xl)
+ return -1;
+
+ ret = check_and_add_debug_xl (graph, set_dict, volname, "locks");
+ if (ret)
+ return -1;
+
+ xl = volgen_graph_add (graph, "performance/io-threads", volname);
+ if (!xl)
+ return -1;
+
+ ret = check_and_add_debug_xl (graph, set_dict, volname, "io-threads");
+ if (ret)
+ return -1;
+
+ ret = dict_get_int32 (volinfo->dict, "enable-pump", &pump);
+ if (ret == -ENOENT)
+ ret = pump = 0;
+ if (ret)
+ return -1;
+
+ username = glusterd_auth_get_username (volinfo);
+ password = glusterd_auth_get_password (volinfo);
+
+ if (pump) {
+ txl = first_of (graph);
+
+ rbxl = volgen_graph_add_nolink (graph, "protocol/client",
+ "%s-replace-brick", volname);
+ if (!rbxl)
+ return -1;
+
+ ptranst = glusterd_get_trans_type_rb (volinfo->transport_type);
+ if (NULL == ptranst)
+ return -1;
+
+ if (username) {
+ ret = xlator_set_option (rbxl, "username", username);
+ if (ret)
+ return -1;
+ }
+
+ if (password) {
+ ret = xlator_set_option (rbxl, "password", password);
+ if (ret)
+ return -1;
+ }
+
+ ret = xlator_set_option (rbxl, "transport-type", ptranst);
+ GF_FREE (ptranst);
+ if (ret)
+ return -1;
+
+ xl = volgen_graph_add_nolink (graph, "cluster/pump", "%s-pump",
+ volname);
+ if (!xl)
+ return -1;
+ ret = volgen_xlator_link (xl, txl);
+ if (ret)
+ return -1;
+ ret = volgen_xlator_link (xl, rbxl);
+ if (ret)
+ return -1;
+ }
+
+ xl = volgen_graph_add (graph, "features/index", volname);
+ if (!xl)
+ return -1;
+
+ snprintf (index_basepath, sizeof (index_basepath), "%s/%s",
+ path, ".glusterfs/indices");
+ ret = xlator_set_option (xl, "index-base", index_basepath);
+ if (ret)
+ return -1;
+
+ ret = check_and_add_debug_xl (graph, set_dict, volname,
+ "index");
+ if (ret)
+ return -1;
+
+ xl = volgen_graph_add (graph, "features/marker", volname);
+ if (!xl)
+ return -1;
+
+ uuid_unparse (volinfo->volume_id, volume_id);
+ ret = xlator_set_option (xl, "volume-uuid", volume_id);
+ if (ret)
+ return -1;
+ get_vol_tstamp_file (tstamp_file, volinfo);
+ ret = xlator_set_option (xl, "timestamp-file", tstamp_file);
+ if (ret)
+ return -1;
+
+ ret = check_and_add_debug_xl (graph, set_dict, volname, "marker");
+ if (ret)
+ return -1;
+
+ if (dict_get_str_boolean (set_dict, "features.read-only", 0) &&
+ dict_get_str_boolean (set_dict, "features.worm",0)) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "read-only and worm cannot be set together");
+ ret = -1;
+ goto out;
+ }
+
+ /* Check for read-only volume option, and add it to the graph */
+ if (dict_get_str_boolean (set_dict, "features.read-only", 0)
+ || volinfo -> is_snap_volume) {
+ xl = volgen_graph_add (graph, "features/read-only", volname);
+ if (!xl) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ /* Check for worm volume option, and add it to the graph */
+ if (dict_get_str_boolean (set_dict, "features.worm", 0)) {
+ xl = volgen_graph_add (graph, "features/worm", volname);
+ if (!xl) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ /* Check for compress volume option, and add it to the graph on server side */
+ if (dict_get_str_boolean (set_dict, "features.compress", 0)) {
+ xl = volgen_graph_add (graph, "features/cdc", volname);
+ if (!xl) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_str (set_dict, "compress.mode", "server");
+ if (ret)
+ goto out;
+ }
+
+ xl = volgen_graph_add_as (graph, "debug/io-stats", path);
+ if (!xl)
+ return -1;
+
+ xl = volgen_graph_add (graph, "protocol/server", volname);
+ if (!xl)
+ return -1;
+ ret = xlator_set_option (xl, "transport-type", transt);
+ if (ret)
+ return -1;
+
+ /*In the case of running multiple glusterds on a single machine,
+ * we should ensure that bricks don't listen on all IPs on that
+ * machine and break the IP based separation being brought about.*/
+ if (dict_get (THIS->options, "transport.socket.bind-address")) {
+ ret = xlator_set_option (xl, "transport.socket.bind-address",
+ brickinfo->hostname);
+ if (ret)
+ return -1;
+ }
+
+ if (username) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "auth.login.%s.allow", path);
+
+ ret = xlator_set_option (xl, key, username);
+ if (ret)
+ return -1;
+ }
+
+ if (password) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "auth.login.%s.password",
+ username);
+
+ ret = xlator_set_option (xl, key, password);
+ if (ret)
+ return -1;
+ }
+
+ ret = volgen_graph_set_options_generic (graph, set_dict,
+ (xlator && loglevel) ? (void *)set_dict : volinfo,
+ (xlator && loglevel) ? &server_spec_extended_option_handler :
+ &server_spec_option_handler);
+
+ out:
+ return ret;
+}
+
+
+/* builds a graph for server role , with option overrides in mod_dict */
+static int
+build_server_graph (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ dict_t *mod_dict, glusterd_brickinfo_t *brickinfo)
+{
+ return build_graph_generic (graph, volinfo, mod_dict, brickinfo,
+ &server_graph_builder);
+}
+
+static int
+perfxl_option_handler (volgen_graph_t *graph, struct volopt_map_entry *vme,
+ void *param)
+{
+ gf_boolean_t enabled = _gf_false;
+ glusterd_volinfo_t *volinfo = NULL;
+
+ GF_ASSERT (param);
+ volinfo = param;
+
+ if (strcmp (vme->option, "!perf") != 0)
+ return 0;
+
+ if (gf_string2boolean (vme->value, &enabled) == -1)
+ return -1;
+ if (!enabled)
+ return 0;
+
+ /* Check op-version before adding the 'open-behind' xlator in the graph
+ */
+ if (!strcmp (vme->key, "performance.open-behind") &&
+ (vme->op_version > volinfo->client_op_version))
+ return 0;
+
+ if (volgen_graph_add (graph, vme->voltype, volinfo->volname))
+ return 0;
+ else
+ return -1;
+}
+
+static int
+nfsperfxl_option_handler (volgen_graph_t *graph, struct volopt_map_entry *vme,
+ void *param)
+{
+ char *volname = NULL;
+ gf_boolean_t enabled = _gf_false;
+
+ volname = param;
+
+ if (strcmp (vme->option, "!nfsperf") != 0)
+ return 0;
+
+ if (gf_string2boolean (vme->value, &enabled) == -1)
+ return -1;
+ if (!enabled)
+ return 0;
+
+ if (volgen_graph_add (graph, vme->voltype, volname))
+ return 0;
+ else
+ return -1;
+}
+
+#if (HAVE_LIB_XML)
+static int
+end_sethelp_xml_doc (xmlTextWriterPtr writer)
+{
+ int ret = -1;
+
+ ret = xmlTextWriterEndElement(writer);
+ if (ret < 0) {
+ gf_log ("glusterd", GF_LOG_ERROR, "Could not end an "
+ "xmlElemetnt");
+ ret = -1;
+ goto out;
+ }
+ ret = xmlTextWriterEndDocument (writer);
+ if (ret < 0) {
+ gf_log ("glusterd", GF_LOG_ERROR, "Could not end an "
+ "xmlDocument");
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+ out:
+ gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+
+}
+
+static int
+init_sethelp_xml_doc (xmlTextWriterPtr *writer, xmlBufferPtr *buf)
+{
+ int ret;
+
+ *buf = xmlBufferCreateSize (8192);
+ if (buf == NULL) {
+ gf_log ("glusterd", GF_LOG_ERROR, "Error creating the xml "
+ "buffer");
+ ret = -1;
+ goto out;
+ }
+
+ xmlBufferSetAllocationScheme (*buf,XML_BUFFER_ALLOC_DOUBLEIT);
+
+ *writer = xmlNewTextWriterMemory(*buf, 0);
+ if (writer == NULL) {
+ gf_log ("glusterd", GF_LOG_ERROR, " Error creating the xml "
+ "writer");
+ ret = -1;
+ goto out;
+ }
+
+ ret = xmlTextWriterStartDocument(*writer, "1.0", "UTF-8", "yes");
+ if (ret < 0) {
+ gf_log ("glusterd", GF_LOG_ERROR, "Error While starting the "
+ "xmlDoc");
+ goto out;
+ }
+
+ ret = xmlTextWriterStartElement(*writer, (xmlChar *)"options");
+ if (ret < 0) {
+ gf_log ("glusterd", GF_LOG_ERROR, "Could not create an "
+ "xmlElemetnt");
+ ret = -1;
+ goto out;
+ }
+
+
+ ret = 0;
+
+ out:
+ gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+
+}
+
+static int
+xml_add_volset_element (xmlTextWriterPtr writer, const char *name,
+ const char *def_val, const char *dscrpt)
+{
+
+ int ret = -1;
+
+ GF_ASSERT (name);
+
+ ret = xmlTextWriterStartElement(writer, (xmlChar *) "option");
+ if (ret < 0) {
+ gf_log ("glusterd", GF_LOG_ERROR, "Could not create an "
+ "xmlElemetnt");
+ ret = -1;
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar*)"defaultValue",
+ "%s", def_val);
+ if (ret < 0) {
+ gf_log ("glusterd", GF_LOG_ERROR, "Could not create an "
+ "xmlElemetnt");
+ ret = -1;
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"description",
+ "%s", dscrpt );
+ if (ret < 0) {
+ gf_log ("glusterd", GF_LOG_ERROR, "Could not create an "
+ "xmlElemetnt");
+ ret = -1;
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *) "name", "%s",
+ name);
+ if (ret < 0) {
+ gf_log ("glusterd", GF_LOG_ERROR, "Could not create an "
+ "xmlElemetnt");
+ ret = -1;
+ goto out;
+ }
+
+ ret = xmlTextWriterEndElement(writer);
+ if (ret < 0) {
+ gf_log ("glusterd", GF_LOG_ERROR, "Could not end an "
+ "xmlElemetnt");
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+ out:
+ gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+
+}
+
+#endif
+
+static int
+_get_xlator_opt_key_from_vme ( struct volopt_map_entry *vme, char **key)
+{
+ int ret = 0;
+
+ GF_ASSERT (vme);
+ GF_ASSERT (key);
+
+
+ if (!strcmp (vme->key, AUTH_ALLOW_MAP_KEY))
+ *key = gf_strdup (AUTH_ALLOW_OPT_KEY);
+ else if (!strcmp (vme->key, AUTH_REJECT_MAP_KEY))
+ *key = gf_strdup (AUTH_REJECT_OPT_KEY);
+ else if (!strcmp (vme->key, NFS_DISABLE_MAP_KEY))
+ *key = gf_strdup (NFS_DISABLE_OPT_KEY);
+ else {
+ if (vme->option) {
+ if (vme->option[0] == '!') {
+ *key = vme->option + 1;
+ if (!*key[0])
+ ret = -1;
+ } else {
+ *key = vme->option;
+ }
+ } else {
+ *key = strchr (vme->key, '.');
+ if (*key) {
+ (*key) ++;
+ if (!*key[0])
+ ret = -1;
+ } else {
+ ret = -1;
+ }
+ }
+ }
+ if (ret)
+ gf_log ("glusterd", GF_LOG_ERROR, "Wrong entry found in "
+ "glusterd_volopt_map entry %s", vme->key);
+ else
+ gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret);
+
+ return ret;
+}
+
+static void
+_free_xlator_opt_key (char *key)
+{
+ GF_ASSERT (key);
+
+ if (!strcmp (key, AUTH_ALLOW_OPT_KEY) ||
+ !strcmp (key, AUTH_REJECT_OPT_KEY) ||
+ !strcmp (key, NFS_DISABLE_OPT_KEY))
+ GF_FREE (key);
+
+ return;
+}
+
+int
+glusterd_get_volopt_content (dict_t * ctx, gf_boolean_t xml_out)
+{
+ void *dl_handle = NULL;
+ volume_opt_list_t vol_opt_handle = {{0},};
+ char *key = NULL;
+ struct volopt_map_entry *vme = NULL;
+ int ret = -1;
+ char *def_val = NULL;
+ char *descr = NULL;
+ char output_string[25600] = {0, };
+ char *output = NULL;
+ char tmp_str[2048] = {0, };
+#if (HAVE_LIB_XML)
+ xmlTextWriterPtr writer = NULL;
+ xmlBufferPtr buf = NULL;
+
+ if (xml_out) {
+ ret = init_sethelp_xml_doc (&writer, &buf);
+ if (ret) /*logging done in init_xml_lib*/
+ goto out;
+ }
+#endif
+
+ INIT_LIST_HEAD (&vol_opt_handle.list);
+
+ for (vme = &glusterd_volopt_map[0]; vme->key; vme++) {
+
+ if ((vme->type == NO_DOC) || (vme->type == GLOBAL_NO_DOC))
+ continue;
+
+ if (vme->description) {
+ descr = vme->description;
+ def_val = vme->value;
+ } else {
+ if (_get_xlator_opt_key_from_vme (vme, &key)) {
+ gf_log ("glusterd", GF_LOG_DEBUG, "Failed to "
+ "get %s key from volume option entry",
+ vme->key);
+ goto out; /*Some error while geting key*/
+ }
+
+ ret = xlator_volopt_dynload (vme->voltype,
+ &dl_handle,
+ &vol_opt_handle);
+
+ if (ret) {
+ gf_log ("glusterd", GF_LOG_DEBUG,
+ "xlator_volopt_dynload error(%d)", ret);
+ ret = 0;
+ goto cont;
+ }
+
+ ret = xlator_option_info_list (&vol_opt_handle, key,
+ &def_val, &descr);
+ if (ret) { /*Swallow Error i.e if option not found*/
+ gf_log ("glusterd", GF_LOG_DEBUG,
+ "Failed to get option for %s key", key);
+ ret = 0;
+ goto cont;
+ }
+ }
+
+ if (xml_out) {
+#if (HAVE_LIB_XML)
+ if (xml_add_volset_element (writer,vme->key,
+ def_val, descr)) {
+ ret = -1;
+ goto cont;
+ }
+#else
+ gf_log ("glusterd", GF_LOG_ERROR, "Libxml not present");
+#endif
+ } else {
+ snprintf (tmp_str, sizeof (tmp_str), "Option: %s\nDefault "
+ "Value: %s\nDescription: %s\n\n",
+ vme->key, def_val, descr);
+ strcat (output_string, tmp_str);
+ }
+cont:
+ if (dl_handle) {
+ dlclose (dl_handle);
+ dl_handle = NULL;
+ vol_opt_handle.given_opt = NULL;
+ }
+ if (key) {
+ _free_xlator_opt_key (key);
+ key = NULL;
+ }
+ if (ret)
+ goto out;
+ }
+
+#if (HAVE_LIB_XML)
+ if ((xml_out) &&
+ (ret = end_sethelp_xml_doc (writer)))
+ goto out;
+#else
+ if (xml_out)
+ gf_log ("glusterd", GF_LOG_ERROR, "Libxml not present");
+#endif
+
+ if (!xml_out)
+ output = gf_strdup (output_string);
+ else
+#if (HAVE_LIB_XML)
+ output = gf_strdup ((char *)buf->content);
+#else
+ gf_log ("glusterd", GF_LOG_ERROR, "Libxml not present");
+#endif
+
+ if (NULL == output) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr (ctx, "help-str", output);
+out:
+ gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+
+}
+
+static int
+volgen_graph_build_clients (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ dict_t *set_dict, void *param)
+{
+ int i = 0;
+ int ret = -1;
+ uint32_t client_type = GF_CLIENT_OTHER;
+ char transt[16] = {0,};
+ char *volname = NULL;
+ char *str = NULL;
+ glusterd_brickinfo_t *brick = NULL;
+ xlator_t *xl = NULL;
+ char *ssl_str = NULL;
+ gf_boolean_t ssl_bool;
+
+ volname = volinfo->volname;
+
+ if (volinfo->brick_count == 0) {
+ gf_log ("", GF_LOG_ERROR,
+ "volume inconsistency: brick count is 0");
+ goto out;
+ }
+
+ if ((volinfo->dist_leaf_count < volinfo->brick_count) &&
+ ((volinfo->brick_count % volinfo->dist_leaf_count) != 0)) {
+ gf_log ("", GF_LOG_ERROR,
+ "volume inconsistency: "
+ "total number of bricks (%d) is not divisible with "
+ "number of bricks per cluster (%d) in a multi-cluster "
+ "setup",
+ volinfo->brick_count, volinfo->dist_leaf_count);
+ goto out;
+ }
+
+ get_transport_type (volinfo, set_dict, transt, _gf_false);
+
+ if (!strcmp (transt, "tcp,rdma"))
+ strcpy (transt, "tcp");
+
+ i = 0;
+ ret = -1;
+ list_for_each_entry (brick, &volinfo->bricks, brick_list) {
+ ret = -1;
+ xl = volgen_graph_add_nolink (graph, "protocol/client",
+ "%s-client-%d", volname, i);
+ if (!xl)
+ goto out;
+ ret = xlator_set_option (xl, "remote-host", brick->hostname);
+ if (ret)
+ goto out;
+ ret = xlator_set_option (xl, "remote-subvolume", brick->path);
+ if (ret)
+ goto out;
+ ret = xlator_set_option (xl, "transport-type", transt);
+ if (ret)
+ goto out;
+
+ ret = dict_get_uint32 (set_dict, "trusted-client",
+ &client_type);
+
+ if (!ret && client_type == GF_CLIENT_TRUSTED) {
+ str = NULL;
+ str = glusterd_auth_get_username (volinfo);
+ if (str) {
+ ret = xlator_set_option (xl, "username",
+ str);
+ if (ret)
+ goto out;
+ }
+
+ str = glusterd_auth_get_password (volinfo);
+ if (str) {
+ ret = xlator_set_option (xl, "password",
+ str);
+ if (ret)
+ goto out;
+ }
+ }
+
+ if (dict_get_str(set_dict,"client.ssl",&ssl_str) == 0) {
+ if (gf_string2boolean(ssl_str,&ssl_bool) == 0) {
+ if (ssl_bool) {
+ ret = xlator_set_option(xl,
+ "transport.socket.ssl-enabled",
+ "true");
+ if (ret) {
+ goto out;
+ }
+ }
+ }
+ }
+
+ i++;
+ }
+
+ if (i != volinfo->brick_count) {
+ gf_log ("", GF_LOG_ERROR,
+ "volume inconsistency: actual number of bricks (%d) "
+ "differs from brick count (%d)", i,
+ volinfo->brick_count);
+
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+volgen_graph_build_clusters (volgen_graph_t *graph,
+ glusterd_volinfo_t *volinfo, char *xl_type,
+ char *xl_namefmt, size_t child_count,
+ size_t sub_count)
+{
+ int i = 0;
+ int j = 0;
+ xlator_t *txl = NULL;
+ xlator_t *xl = NULL;
+ xlator_t *trav = NULL;
+ char *volname = NULL;
+ int ret = -1;
+
+ if (child_count == 0)
+ goto out;
+ volname = volinfo->volname;
+ txl = first_of (graph);
+ for (trav = txl; --child_count; trav = trav->next);
+ for (;; trav = trav->prev) {
+ if ((i % sub_count) == 0) {
+ xl = volgen_graph_add_nolink (graph, xl_type,
+ xl_namefmt, volname, j);
+ if (!xl) {
+ ret = -1;
+ goto out;
+ }
+ j++;
+ }
+
+ ret = volgen_xlator_link (xl, trav);
+ if (ret)
+ goto out;
+
+ if (trav == txl)
+ break;
+
+ i++;
+ }
+
+ ret = j;
+out:
+ return ret;
+}
+
+gf_boolean_t
+_xl_is_client_decommissioned (xlator_t *xl, glusterd_volinfo_t *volinfo)
+{
+ int ret = 0;
+ gf_boolean_t decommissioned = _gf_false;
+ char *hostname = NULL;
+ char *path = NULL;
+
+ GF_ASSERT (!strcmp (xl->type, "protocol/client"));
+ ret = xlator_get_option (xl, "remote-host", &hostname);
+ if (ret) {
+ GF_ASSERT (0);
+ gf_log ("glusterd", GF_LOG_ERROR, "Failed to get remote-host "
+ "from client %s", xl->name);
+ goto out;
+ }
+ ret = xlator_get_option (xl, "remote-subvolume", &path);
+ if (ret) {
+ GF_ASSERT (0);
+ gf_log ("glusterd", GF_LOG_ERROR, "Failed to get remote-host "
+ "from client %s", xl->name);
+ goto out;
+ }
+
+ decommissioned = glusterd_is_brick_decommissioned (volinfo, hostname,
+ path);
+out:
+ return decommissioned;
+}
+
+gf_boolean_t
+_xl_has_decommissioned_clients (xlator_t *xl, glusterd_volinfo_t *volinfo)
+{
+ xlator_list_t *xl_child = NULL;
+ gf_boolean_t decommissioned = _gf_false;
+ xlator_t *cxl = NULL;
+
+ if (!xl)
+ goto out;
+
+ if (!strcmp (xl->type, "protocol/client")) {
+ decommissioned = _xl_is_client_decommissioned (xl, volinfo);
+ goto out;
+ }
+
+ xl_child = xl->children;
+ while (xl_child) {
+ cxl = xl_child->xlator;
+ /* this can go into 2 depths if the volume type
+ is stripe-replicate */
+ decommissioned = _xl_has_decommissioned_clients (cxl, volinfo);
+ if (decommissioned)
+ break;
+
+ xl_child = xl_child->next;
+ }
+out:
+ return decommissioned;
+}
+
+static int
+_graph_get_decommissioned_children (xlator_t *dht, glusterd_volinfo_t *volinfo,
+ char **children)
+{
+ int ret = -1;
+ xlator_list_t *xl_child = NULL;
+ xlator_t *cxl = NULL;
+ gf_boolean_t comma = _gf_false;
+
+ *children = NULL;
+ xl_child = dht->children;
+ while (xl_child) {
+ cxl = xl_child->xlator;
+ if (_xl_has_decommissioned_clients (cxl, volinfo)) {
+ if (!*children) {
+ *children = GF_CALLOC (16 * GF_UNIT_KB, 1,
+ gf_common_mt_char);
+ if (!*children)
+ goto out;
+ }
+
+ if (comma)
+ strcat (*children, ",");
+ strcat (*children, cxl->name);
+ comma = _gf_true;
+ }
+
+ xl_child = xl_child->next;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+volgen_graph_build_dht_cluster (volgen_graph_t *graph,
+ glusterd_volinfo_t *volinfo, size_t child_count)
+{
+ int32_t clusters = 0;
+ int ret = -1;
+ char *decommissioned_children = NULL;
+ xlator_t *dht = NULL;
+ char *voltype = "cluster/distribute";
+
+ /* NUFA and Switch section */
+ if (dict_get_str_boolean (volinfo->dict, "cluster.nufa", 0) &&
+ dict_get_str_boolean (volinfo->dict, "cluster.switch", 0)) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "nufa and switch cannot be set together");
+ ret = -1;
+ goto out;
+ }
+
+ /* Check for NUFA volume option, and change the voltype */
+ if (dict_get_str_boolean (volinfo->dict, "cluster.nufa", 0))
+ voltype = "cluster/nufa";
+
+ /* Check for switch volume option, and change the voltype */
+ if (dict_get_str_boolean (volinfo->dict, "cluster.switch", 0))
+ voltype = "cluster/switch";
+
+ clusters = volgen_graph_build_clusters (graph, volinfo,
+ voltype,
+ "%s-dht",
+ child_count,
+ child_count);
+ if (clusters < 0)
+ goto out;
+
+ dht = first_of (graph);
+ ret = _graph_get_decommissioned_children (dht, volinfo,
+ &decommissioned_children);
+ if (ret)
+ goto out;
+ if (decommissioned_children) {
+ ret = xlator_set_option (dht, "decommissioned-bricks",
+ decommissioned_children);
+ if (ret)
+ goto out;
+ }
+ ret = 0;
+out:
+ GF_FREE (decommissioned_children);
+ return ret;
+}
+
+static int
+volume_volgen_graph_build_clusters (volgen_graph_t *graph,
+ glusterd_volinfo_t *volinfo)
+{
+ char *replicate_args[] = {"cluster/replicate",
+ "%s-replicate-%d"};
+ char *stripe_args[] = {"cluster/stripe",
+ "%s-stripe-%d"};
+ int rclusters = 0;
+ int clusters = 0;
+ int dist_count = 0;
+ int ret = -1;
+
+ if (!volinfo->dist_leaf_count)
+ goto out;
+
+ if (volinfo->dist_leaf_count == 1)
+ goto build_distribute;
+
+ /* All other cases, it will have one or the other cluster type */
+ switch (volinfo->type) {
+ case GF_CLUSTER_TYPE_REPLICATE:
+ clusters = volgen_graph_build_clusters (graph, volinfo,
+ replicate_args[0],
+ replicate_args[1],
+ volinfo->brick_count,
+ volinfo->replica_count);
+ if (clusters < 0)
+ goto out;
+ break;
+ case GF_CLUSTER_TYPE_STRIPE:
+ clusters = volgen_graph_build_clusters (graph, volinfo,
+ stripe_args[0],
+ stripe_args[1],
+ volinfo->brick_count,
+ volinfo->stripe_count);
+ if (clusters < 0)
+ goto out;
+ break;
+ case GF_CLUSTER_TYPE_STRIPE_REPLICATE:
+ /* Replicate after the clients, then stripe */
+ if (volinfo->replica_count == 0)
+ goto out;
+ clusters = volgen_graph_build_clusters (graph, volinfo,
+ replicate_args[0],
+ replicate_args[1],
+ volinfo->brick_count,
+ volinfo->replica_count);
+ if (clusters < 0)
+ goto out;
+
+ rclusters = volinfo->brick_count / volinfo->replica_count;
+ GF_ASSERT (rclusters == clusters);
+ clusters = volgen_graph_build_clusters (graph, volinfo,
+ stripe_args[0],
+ stripe_args[1],
+ rclusters,
+ volinfo->stripe_count);
+ if (clusters < 0)
+ goto out;
+ break;
+ default:
+ gf_log ("", GF_LOG_ERROR, "volume inconsistency: "
+ "unrecognized clustering type");
+ goto out;
+ }
+
+build_distribute:
+ dist_count = volinfo->brick_count / volinfo->dist_leaf_count;
+ if (!dist_count) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = volgen_graph_build_dht_cluster (graph, volinfo,
+ dist_count);
+ if (ret == -1)
+ goto out;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static int client_graph_set_perf_options(volgen_graph_t *graph,
+ glusterd_volinfo_t *volinfo,
+ dict_t *set_dict)
+{
+ data_t *tmp_data = NULL;
+ char *volname = NULL;
+
+ /*
+ * Logic to make sure NFS doesn't have performance translators by
+ * default for a volume
+ */
+ volname = volinfo->volname;
+ tmp_data = dict_get (set_dict, "nfs-volume-file");
+ if (!tmp_data)
+ return volgen_graph_set_options_generic(graph, set_dict,
+ volname,
+ &perfxl_option_handler);
+ else
+ return volgen_graph_set_options_generic(graph, set_dict,
+ volname,
+ &nfsperfxl_option_handler);
+}
+
+static int
+client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ dict_t *set_dict, void *param)
+{
+ int ret = 0;
+ xlator_t *xl = NULL;
+ char *volname = NULL;
+
+ volname = volinfo->volname;
+ ret = volgen_graph_build_clients (graph, volinfo, set_dict, param);
+ if (ret)
+ goto out;
+
+ ret = volume_volgen_graph_build_clusters (graph, volinfo);
+ if (ret == -1)
+ goto out;
+
+ /* Check for compress volume option, and add it to the graph on client side */
+ if (dict_get_str_boolean (set_dict, "features.compress", 0)) {
+ xl = volgen_graph_add (graph, "features/cdc", volname);
+ if (!xl) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_str (set_dict, "compress.mode", "client");
+ if (ret)
+ goto out;
+
+ }
+
+ ret = glusterd_volinfo_get_boolean (volinfo, "features.encryption");
+ if (ret == -1)
+ goto out;
+ if (ret) {
+ xl = volgen_graph_add (graph, "encryption/crypt", volname);
+
+ if (!xl) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = glusterd_volinfo_get_boolean (volinfo, VKEY_FEATURES_QUOTA);
+ if (ret == -1)
+ goto out;
+ if (ret) {
+ xl = volgen_graph_add (graph, "features/quota", volname);
+
+ if (!xl) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+
+ ret = glusterd_volinfo_get_boolean (volinfo, "features.file-snapshot");
+ if (ret == -1)
+ goto out;
+ if (ret) {
+ xl = volgen_graph_add (graph, "features/qemu-block", volname);
+
+ if (!xl) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = client_graph_set_perf_options(graph, volinfo, set_dict);
+ if (ret)
+ goto out;
+
+ /* add debug translators depending on the options */
+ ret = check_and_add_debug_xl (graph, set_dict, volname,
+ "client");
+ if (ret)
+ return -1;
+
+ ret = -1;
+ xl = volgen_graph_add_as (graph, "debug/io-stats", volname);
+ if (!xl)
+ goto out;
+
+ ret = volgen_graph_set_options_generic (graph, set_dict, "client",
+ &loglevel_option_handler);
+
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING, "changing client log level"
+ " failed");
+
+ ret = volgen_graph_set_options_generic (graph, set_dict, "client",
+ &sys_loglevel_option_handler);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING, "changing client syslog "
+ "level failed");
+out:
+ return ret;
+}
+
+
+/* builds a graph for client role , with option overrides in mod_dict */
+static int
+build_client_graph (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ dict_t *mod_dict)
+{
+ return build_graph_generic (graph, volinfo, mod_dict, NULL,
+ &client_graph_builder);
+}
+
+char *gd_shd_options[] = {
+ "!self-heal-daemon",
+ "!heal-timeout",
+ NULL
+};
+
+char*
+gd_get_matching_option (char **options, char *option)
+{
+ while (*options && strcmp (*options, option))
+ options++;
+ return *options;
+}
+
+static int
+shd_option_handler (volgen_graph_t *graph, struct volopt_map_entry *vme,
+ void *param)
+{
+ int ret = 0;
+ struct volopt_map_entry new_vme = {0};
+ char *shd_option = NULL;
+
+ shd_option = gd_get_matching_option (gd_shd_options, vme->option);
+ if ((vme->option[0] == '!') && !shd_option)
+ goto out;
+ new_vme = *vme;
+ if (shd_option) {
+ new_vme.option = shd_option + 1;//option with out '!'
+ }
+
+ ret = no_filter_option_handler (graph, &new_vme, param);
+out:
+ return ret;
+}
+
+static int
+nfs_option_handler (volgen_graph_t *graph,
+ struct volopt_map_entry *vme, void *param)
+{
+ xlator_t *xl = NULL;
+ char *aa = NULL;
+ int ret = 0;
+ glusterd_volinfo_t *volinfo = NULL;
+
+ volinfo = param;
+
+ xl = first_of (graph);
+
+/* if (vme->type == GLOBAL_DOC || vme->type == GLOBAL_NO_DOC) {
+
+ ret = xlator_set_option (xl, vme->key, vme->value);
+ }*/
+ if (!volinfo || (volinfo->volname[0] == '\0'))
+ return 0;
+
+ if (! strcmp (vme->option, "!rpc-auth.addr.*.allow")) {
+ ret = gf_asprintf (&aa, "rpc-auth.addr.%s.allow",
+ volinfo->volname);
+
+ if (ret != -1) {
+ ret = xlator_set_option (xl, aa, vme->value);
+ GF_FREE (aa);
+ }
+
+ if (ret)
+ return -1;
+ }
+
+ if (! strcmp (vme->option, "!rpc-auth.addr.*.reject")) {
+ ret = gf_asprintf (&aa, "rpc-auth.addr.%s.reject",
+ volinfo->volname);
+
+ if (ret != -1) {
+ ret = xlator_set_option (xl, aa, vme->value);
+ GF_FREE (aa);
+ }
+
+ if (ret)
+ return -1;
+ }
+
+ if (! strcmp (vme->option, "!rpc-auth.auth-unix.*")) {
+ ret = gf_asprintf (&aa, "rpc-auth.auth-unix.%s",
+ volinfo->volname);
+
+ if (ret != -1) {
+ ret = xlator_set_option (xl, aa, vme->value);
+ GF_FREE (aa);
+ }
+
+ if (ret)
+ return -1;
+ }
+ if (! strcmp (vme->option, "!rpc-auth.auth-null.*")) {
+ ret = gf_asprintf (&aa, "rpc-auth.auth-null.%s",
+ volinfo->volname);
+
+ if (ret != -1) {
+ ret = xlator_set_option (xl, aa, vme->value);
+ GF_FREE (aa);
+ }
+
+ if (ret)
+ return -1;
+ }
+
+ if (! strcmp (vme->option, "!nfs3.*.trusted-sync")) {
+ ret = gf_asprintf (&aa, "nfs3.%s.trusted-sync",
+ volinfo->volname);
+
+ if (ret != -1) {
+ ret = xlator_set_option (xl, aa, vme->value);
+ GF_FREE (aa);
+ }
+
+ if (ret)
+ return -1;
+ }
+
+ if (! strcmp (vme->option, "!nfs3.*.trusted-write")) {
+ ret = gf_asprintf (&aa, "nfs3.%s.trusted-write",
+ volinfo->volname);
+
+ if (ret != -1) {
+ ret = xlator_set_option (xl, aa, vme->value);
+ GF_FREE (aa);
+ }
+
+ if (ret)
+ return -1;
+ }
+
+ if (! strcmp (vme->option, "!nfs3.*.volume-access")) {
+ ret = gf_asprintf (&aa, "nfs3.%s.volume-access",
+ volinfo->volname);
+
+ if (ret != -1) {
+ ret = xlator_set_option (xl, aa, vme->value);
+ GF_FREE (aa);
+ }
+
+ if (ret)
+ return -1;
+ }
+
+ if (! strcmp (vme->option, "!nfs3.*.export-dir")) {
+ ret = gf_asprintf (&aa, "nfs3.%s.export-dir",
+ volinfo->volname);
+
+ if (ret != -1) {
+ ret = gf_canonicalize_path (vme->value);
+ if (ret)
+ return -1;
+
+ ret = xlator_set_option (xl, aa, vme->value);
+ GF_FREE (aa);
+ }
+
+ if (ret)
+ return -1;
+ }
+
+
+
+ if (! strcmp (vme->option, "!rpc-auth.ports.*.insecure")) {
+ ret = gf_asprintf (&aa, "rpc-auth.ports.%s.insecure",
+ volinfo->volname);
+
+ if (ret != -1) {
+ ret = xlator_set_option (xl, aa, vme->value);
+ GF_FREE (aa);
+ }
+
+ if (ret)
+ return -1;
+ }
+
+
+ if (! strcmp (vme->option, "!nfs-disable")) {
+ ret = gf_asprintf (&aa, "nfs.%s.disable",
+ volinfo->volname);
+
+ if (ret != -1) {
+ ret = xlator_set_option (xl, aa, vme->value);
+ GF_FREE (aa);
+ }
+
+ if (ret)
+ return -1;
+ }
+
+ if ( (strcmp (vme->voltype, "nfs/server") == 0) &&
+ (vme->option && vme->option[0]!='!') ) {
+ ret = xlator_set_option (xl, vme->option, vme->value);
+ if (ret)
+ return -1;
+ }
+
+
+ /*key = strchr (vme->key, '.') + 1;
+
+ for (trav = xl->children; trav; trav = trav->next) {
+ ret = gf_asprintf (&aa, "auth.addr.%s.%s", trav->xlator->name,
+ key);
+ if (ret != -1) {
+ ret = xlator_set_option (xl, aa, vme->value);
+ GF_FREE (aa);
+ }
+ if (ret)
+ return -1;
+ }*/
+
+ return 0;
+}
+
+static int
+volgen_graph_set_iam_shd (volgen_graph_t *graph)
+{
+ xlator_t *trav;
+ int ret = 0;
+
+ for (trav = first_of (graph); trav; trav = trav->next) {
+ if (strcmp (trav->type, "cluster/replicate") != 0)
+ continue;
+
+ ret = xlator_set_option (trav, "iam-self-heal-daemon", "yes");
+ if (ret)
+ break;
+ }
+ return ret;
+}
+
+static int
+build_shd_graph (volgen_graph_t *graph, dict_t *mod_dict)
+{
+ volgen_graph_t cgraph = {0};
+ glusterd_volinfo_t *voliter = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ dict_t *set_dict = NULL;
+ int ret = 0;
+ gf_boolean_t valid_config = _gf_false;
+ xlator_t *iostxl = NULL;
+ int rclusters = 0;
+ int replica_count = 0;
+ gf_boolean_t graph_check = _gf_false;
+
+ this = THIS;
+ priv = this->private;
+
+ set_dict = dict_new ();
+ if (!set_dict) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ graph_check = dict_get_str_boolean (mod_dict, "graph-check", 0);
+ iostxl = volgen_graph_add_as (graph, "debug/io-stats", "glustershd");
+ if (!iostxl) {
+ ret = -1;
+ goto out;
+ }
+
+ list_for_each_entry (voliter, &priv->volumes, vol_list) {
+ if (!graph_check &&
+ (voliter->status != GLUSTERD_STATUS_STARTED))
+ continue;
+
+ if (!glusterd_is_volume_replicate (voliter))
+ continue;
+
+ replica_count = voliter->replica_count;
+
+ valid_config = _gf_true;
+
+ ret = dict_set_str (set_dict, "cluster.self-heal-daemon", "on");
+ if (ret)
+ goto out;
+
+ ret = dict_set_uint32 (set_dict, "trusted-client",
+ GF_CLIENT_TRUSTED);
+ if (ret)
+ goto out;
+
+ dict_copy (voliter->dict, set_dict);
+ if (mod_dict)
+ dict_copy (mod_dict, set_dict);
+
+ memset (&cgraph, 0, sizeof (cgraph));
+ ret = volgen_graph_build_clients (&cgraph, voliter, set_dict,
+ NULL);
+ if (ret)
+ goto out;
+
+ rclusters = volgen_graph_build_clusters (&cgraph, voliter,
+ "cluster/replicate",
+ "%s-replicate-%d",
+ voliter->brick_count,
+ replica_count);
+ if (rclusters < 0) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = volgen_graph_set_options_generic (&cgraph, set_dict, voliter,
+ shd_option_handler);
+ if (ret)
+ goto out;
+
+ ret = volgen_graph_set_iam_shd (&cgraph);
+ if (ret)
+ goto out;
+
+ ret = volgen_graph_merge_sub (graph, &cgraph, rclusters);
+ if (ret)
+ goto out;
+
+ ret = volgen_graph_set_options_generic (graph, set_dict,
+ "client",
+ &loglevel_option_handler);
+
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING, "changing loglevel "
+ "of self-heal daemon failed");
+
+ ret = volgen_graph_set_options_generic (graph, set_dict,
+ "client",
+ &sys_loglevel_option_handler);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING, "changing syslog "
+ "level of self-heal daemon failed");
+
+ ret = dict_reset (set_dict);
+ if (ret)
+ goto out;
+ }
+out:
+ if (set_dict)
+ dict_unref (set_dict);
+ if (!valid_config)
+ ret = -EINVAL;
+ return ret;
+}
+
+/* builds a graph for nfs server role, with option overrides in mod_dict */
+static int
+build_nfs_graph (volgen_graph_t *graph, dict_t *mod_dict)
+{
+ volgen_graph_t cgraph = {0,};
+ glusterd_volinfo_t *voliter = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ dict_t *set_dict = NULL;
+ xlator_t *nfsxl = NULL;
+ char *skey = NULL;
+ int ret = 0;
+ char nfs_xprt[16] = {0,};
+ char *volname = NULL;
+ data_t *data = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ set_dict = dict_new ();
+ if (!set_dict) {
+ gf_log ("", GF_LOG_ERROR, "Out of memory");
+ return -1;
+ }
+
+ nfsxl = volgen_graph_add_as (graph, "nfs/server", "nfs-server");
+ if (!nfsxl) {
+ ret = -1;
+ goto out;
+ }
+ ret = xlator_set_option (nfsxl, "nfs.dynamic-volumes", "on");
+ if (ret)
+ goto out;
+
+ ret = xlator_set_option (nfsxl, "nfs.nlm", "on");
+ if (ret)
+ goto out;
+
+ ret = xlator_set_option (nfsxl, "nfs.drc", "on");
+ if (ret)
+ goto out;
+
+ list_for_each_entry (voliter, &priv->volumes, vol_list) {
+ if (voliter->status != GLUSTERD_STATUS_STARTED)
+ continue;
+
+ if (dict_get_str_boolean (voliter->dict, "nfs.disable", 0))
+ continue;
+
+ ret = gf_asprintf (&skey, "rpc-auth.addr.%s.allow",
+ voliter->volname);
+ if (ret == -1) {
+ gf_log ("", GF_LOG_ERROR, "Out of memory");
+ goto out;
+ }
+ ret = xlator_set_option (nfsxl, skey, "*");
+ GF_FREE (skey);
+ if (ret)
+ goto out;
+
+ ret = gf_asprintf (&skey, "nfs3.%s.volume-id",
+ voliter->volname);
+ if (ret == -1) {
+ gf_log ("", GF_LOG_ERROR, "Out of memory");
+ goto out;
+ }
+ ret = xlator_set_option (nfsxl, skey, uuid_utoa (voliter->volume_id));
+ GF_FREE (skey);
+ if (ret)
+ goto out;
+
+ /* If both RDMA and TCP are the transport_type, use RDMA
+ for NFS client protocols */
+ memset (&cgraph, 0, sizeof (cgraph));
+ if (mod_dict)
+ get_transport_type (voliter, mod_dict, nfs_xprt, _gf_true);
+ else
+ get_transport_type (voliter, voliter->dict, nfs_xprt, _gf_true);
+
+ ret = dict_set_str (set_dict, "performance.stat-prefetch", "off");
+ if (ret)
+ goto out;
+
+ ret = dict_set_str (set_dict, "performance.client-io-threads",
+ "off");
+ if (ret)
+ goto out;
+
+ ret = dict_set_str (set_dict, "client-transport-type",
+ nfs_xprt);
+ if (ret)
+ goto out;
+
+ ret = dict_set_uint32 (set_dict, "trusted-client",
+ GF_CLIENT_TRUSTED);
+ if (ret)
+ goto out;
+
+ ret = dict_set_str (set_dict, "nfs-volume-file", "yes");
+ if (ret)
+ goto out;
+
+ if (mod_dict && (data = dict_get (mod_dict, "volume-name"))) {
+ volname = data->data;
+ if (strcmp (volname, voliter->volname) == 0)
+ dict_copy (mod_dict, set_dict);
+ }
+
+ ret = build_client_graph (&cgraph, voliter, set_dict);
+ if (ret)
+ goto out;
+
+ if (mod_dict) {
+ dict_copy (mod_dict, set_dict);
+ ret = volgen_graph_set_options_generic (&cgraph, set_dict, voliter,
+ basic_option_handler);
+ } else {
+ ret = volgen_graph_set_options_generic (&cgraph, voliter->dict, voliter,
+ basic_option_handler);
+ }
+
+ if (ret)
+ goto out;
+
+ ret = volgen_graph_merge_sub (graph, &cgraph, 1);
+ if (ret)
+ goto out;
+ ret = dict_reset (set_dict);
+ if (ret)
+ goto out;
+ }
+
+ list_for_each_entry (voliter, &priv->volumes, vol_list) {
+
+ if (mod_dict) {
+ ret = volgen_graph_set_options_generic (graph, mod_dict, voliter,
+ nfs_option_handler);
+ } else {
+ ret = volgen_graph_set_options_generic (graph, voliter->dict, voliter,
+ nfs_option_handler);
+ }
+
+ if (ret)
+ gf_log ("glusterd", GF_LOG_WARNING, "Could not set "
+ "vol-options for the volume %s", voliter->volname);
+ }
+
+ out:
+ gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret);
+ dict_destroy (set_dict);
+
+ return ret;
+}
+
+
+
+
+/****************************
+ *
+ * Volume generation interface
+ *
+ ****************************/
+
+
+static void
+get_brick_filepath (char *filename, glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo)
+{
+ char path[PATH_MAX] = {0,};
+ char brick[PATH_MAX] = {0,};
+ glusterd_conf_t *priv = NULL;
+
+ priv = THIS->private;
+
+ GLUSTERD_REMOVE_SLASH_FROM_PATH (brickinfo->path, brick);
+ GLUSTERD_GET_VOLUME_DIR (path, volinfo, priv);
+
+ snprintf (filename, PATH_MAX, "%s/%s.%s.%s.vol",
+ path, volinfo->volname,
+ brickinfo->hostname,
+ brick);
+}
+
+gf_boolean_t
+glusterd_is_valid_volfpath (char *volname, char *brick)
+{
+ char volfpath[PATH_MAX] = {0,};
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ int32_t ret = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = glusterd_brickinfo_new_from_brick (brick, &brickinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "Failed to create brickinfo"
+ " for brick %s", brick );
+ ret = 0;
+ goto out;
+ }
+ ret = glusterd_volinfo_new (&volinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "Failed to create volinfo");
+ ret = 0;
+ goto out;
+ }
+ strncpy (volinfo->volname, volname, sizeof (volinfo->volname));
+ get_brick_filepath (volfpath, volinfo, brickinfo);
+
+ ret = (strlen (volfpath) < _POSIX_PATH_MAX);
+
+out:
+ if (brickinfo)
+ glusterd_brickinfo_delete (brickinfo);
+ if (volinfo)
+ glusterd_volinfo_delete (volinfo);
+ return ret;
+}
+
+static int
+glusterd_generate_brick_volfile (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo)
+{
+ volgen_graph_t graph = {0,};
+ char filename[PATH_MAX] = {0,};
+ int ret = -1;
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (brickinfo);
+
+ get_brick_filepath (filename, volinfo, brickinfo);
+
+ ret = build_server_graph (&graph, volinfo, NULL, brickinfo);
+ if (!ret)
+ ret = volgen_write_volfile (&graph, filename);
+
+ volgen_graph_free (&graph);
+
+ return ret;
+}
+
+static void
+get_vol_tstamp_file (char *filename, glusterd_volinfo_t *volinfo)
+{
+ glusterd_conf_t *priv = NULL;
+
+ priv = THIS->private;
+
+ GLUSTERD_GET_VOLUME_DIR (filename, volinfo, priv);
+ strncat (filename, "/marker.tstamp",
+ PATH_MAX - strlen(filename) - 1);
+}
+
+int
+generate_brick_volfiles (glusterd_volinfo_t *volinfo)
+{
+ glusterd_brickinfo_t *brickinfo = NULL;
+ char tstamp_file[PATH_MAX] = {0,};
+ int ret = -1;
+
+ ret = glusterd_volinfo_get_boolean (volinfo, VKEY_MARKER_XTIME);
+ if (ret == -1)
+ return -1;
+
+ get_vol_tstamp_file (tstamp_file, volinfo);
+
+ if (ret) {
+ ret = open (tstamp_file, O_WRONLY|O_CREAT|O_EXCL, 0600);
+ if (ret == -1 && errno == EEXIST) {
+ gf_log ("", GF_LOG_DEBUG, "timestamp file exist");
+ ret = -2;
+ }
+ if (ret == -1) {
+ gf_log ("", GF_LOG_ERROR, "failed to create %s (%s)",
+ tstamp_file, strerror (errno));
+ return -1;
+ }
+ if (ret >= 0)
+ close (ret);
+ } else {
+ ret = unlink (tstamp_file);
+ if (ret == -1 && errno == ENOENT)
+ ret = 0;
+ if (ret == -1) {
+ gf_log ("", GF_LOG_ERROR, "failed to unlink %s (%s)",
+ tstamp_file, strerror (errno));
+ return -1;
+ }
+ }
+
+ list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ gf_log ("", GF_LOG_DEBUG,
+ "Found a brick - %s:%s", brickinfo->hostname,
+ brickinfo->path);
+
+ ret = glusterd_generate_brick_volfile (volinfo, brickinfo);
+ if (ret)
+ goto out;
+
+ }
+
+ ret = 0;
+
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+static int
+generate_single_transport_client_volfile (glusterd_volinfo_t *volinfo,
+ char *filepath, dict_t *dict)
+{
+ volgen_graph_t graph = {0,};
+ int ret = -1;
+
+ ret = build_client_graph (&graph, volinfo, dict);
+ if (!ret)
+ ret = volgen_write_volfile (&graph, filepath);
+
+ volgen_graph_free (&graph);
+
+ return ret;
+}
+
+static void
+enumerate_transport_reqs (gf_transport_type type, char **types)
+{
+ switch (type) {
+ case GF_TRANSPORT_TCP:
+ types[0] = "tcp";
+ break;
+ case GF_TRANSPORT_RDMA:
+ types[0] = "rdma";
+ break;
+ case GF_TRANSPORT_BOTH_TCP_RDMA:
+ types[0] = "tcp";
+ types[1] = "rdma";
+ break;
+ }
+}
+
+int
+generate_client_volfiles (glusterd_volinfo_t *volinfo,
+ glusterd_client_type_t client_type)
+{
+ char filepath[PATH_MAX] = {0,};
+ int ret = -1;
+ char *types[] = {NULL, NULL, NULL};
+ int i = 0;
+ dict_t *dict = NULL;
+ gf_transport_type type = GF_TRANSPORT_TCP;
+
+ enumerate_transport_reqs (volinfo->transport_type, types);
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+ for (i = 0; types[i]; i++) {
+ memset (filepath, 0, sizeof (filepath));
+ ret = dict_set_str (dict, "client-transport-type", types[i]);
+ if (ret)
+ goto out;
+ type = transport_str_to_type (types[i]);
+
+ ret = dict_set_uint32 (dict, "trusted-client", client_type);
+ if (ret)
+ goto out;
+
+ if (client_type == GF_CLIENT_TRUSTED) {
+ glusterd_get_trusted_client_filepath (filepath,
+ volinfo,
+ type);
+ } else {
+ glusterd_get_client_filepath (filepath,
+ volinfo,
+ type);
+ }
+
+ ret = generate_single_transport_client_volfile (volinfo,
+ filepath,
+ dict);
+ if (ret)
+ goto out;
+ }
+out:
+ if (dict)
+ dict_unref (dict);
+ return ret;
+}
+
+int
+glusterd_create_rb_volfiles (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo)
+{
+ int ret = -1;
+
+ ret = glusterd_generate_brick_volfile (volinfo, brickinfo);
+ if (!ret)
+ ret = generate_client_volfiles (volinfo, GF_CLIENT_TRUSTED);
+ if (!ret)
+ ret = glusterd_fetchspec_notify (THIS);
+
+ return ret;
+}
+
+int
+glusterd_create_volfiles_and_notify_services (glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ ret = generate_brick_volfiles (volinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not generate volfiles for bricks");
+ goto out;
+ }
+
+ ret = generate_client_volfiles (volinfo, GF_CLIENT_TRUSTED);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not generate trusted client volfiles");
+ goto out;
+ }
+
+ ret = generate_client_volfiles (volinfo, GF_CLIENT_OTHER);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not generate client volfiles");
+ goto out;
+ }
+
+ ret = glusterd_fetchspec_notify (this);
+
+out:
+ return ret;
+}
+
+int
+glusterd_create_global_volfile (int (*builder) (volgen_graph_t *graph,
+ dict_t *set_dict),
+ char *filepath, dict_t *mod_dict)
+{
+ volgen_graph_t graph = {0,};
+ int ret = -1;
+
+ ret = builder (&graph, mod_dict);
+ if (!ret)
+ ret = volgen_write_volfile (&graph, filepath);
+
+ volgen_graph_free (&graph);
+
+ return ret;
+}
+
+int
+glusterd_create_nfs_volfile ()
+{
+ char filepath[PATH_MAX] = {0,};
+ glusterd_conf_t *conf = THIS->private;
+
+ glusterd_get_nodesvc_volfile ("nfs", conf->workdir,
+ filepath, sizeof (filepath));
+ return glusterd_create_global_volfile (build_nfs_graph,
+ filepath, NULL);
+}
+
+int
+glusterd_create_shd_volfile ()
+{
+ char filepath[PATH_MAX] = {0,};
+ int ret = -1;
+ glusterd_conf_t *conf = THIS->private;
+ dict_t *mod_dict = NULL;
+
+ mod_dict = dict_new ();
+ if (!mod_dict)
+ goto out;
+
+ ret = dict_set_uint32 (mod_dict, "cluster.background-self-heal-count", 0);
+ if (ret)
+ goto out;
+
+ ret = dict_set_str (mod_dict, "cluster.data-self-heal", "on");
+ if (ret)
+ goto out;
+
+ ret = dict_set_str (mod_dict, "cluster.metadata-self-heal", "on");
+ if (ret)
+ goto out;
+
+ ret = dict_set_str (mod_dict, "cluster.entry-self-heal", "on");
+ if (ret)
+ goto out;
+
+ glusterd_get_nodesvc_volfile ("glustershd", conf->workdir,
+ filepath, sizeof (filepath));
+ ret = glusterd_create_global_volfile (build_shd_graph, filepath,
+ mod_dict);
+out:
+ if (mod_dict)
+ dict_unref (mod_dict);
+ return ret;
+}
+
+int
+glusterd_check_nfs_topology_identical (gf_boolean_t *identical)
+{
+ char nfsvol[PATH_MAX] = {0,};
+ char tmpnfsvol[PATH_MAX] = {0,};
+ glusterd_conf_t *conf = NULL;
+ xlator_t *this = THIS;
+ int ret = -1;
+ int tmpclean = 0;
+ int tmpfd = -1;
+
+ if ((!identical) || (!this) || (!this->private))
+ goto out;
+
+ conf = (glusterd_conf_t *) this->private;
+
+ /* Fetch the original NFS volfile */
+ glusterd_get_nodesvc_volfile ("nfs", conf->workdir,
+ nfsvol, sizeof (nfsvol));
+
+ /* Create the temporary NFS volfile */
+ snprintf (tmpnfsvol, sizeof (tmpnfsvol), "/tmp/gnfs-XXXXXX");
+ tmpfd = mkstemp (tmpnfsvol);
+ if (tmpfd < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Unable to create temp file %s: (%s)",
+ tmpnfsvol, strerror (errno));
+ goto out;
+ }
+
+ tmpclean = 1; /* SET the flag to unlink() tmpfile */
+
+ ret = glusterd_create_global_volfile (build_nfs_graph,
+ tmpnfsvol, NULL);
+ if (ret)
+ goto out;
+
+ /* Compare the topology of volfiles */
+ ret = glusterd_check_topology_identical (nfsvol, tmpnfsvol,
+ identical);
+out:
+ if (tmpfd >= 0)
+ close (tmpfd);
+ if (tmpclean)
+ unlink (tmpnfsvol);
+ return ret;
+}
+
+int
+glusterd_check_nfs_volfile_identical (gf_boolean_t *identical)
+{
+ char nfsvol[PATH_MAX] = {0,};
+ char tmpnfsvol[PATH_MAX] = {0,};
+ glusterd_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+ int ret = -1;
+ int need_unlink = 0;
+ int tmp_fd = -1;
+
+ this = THIS;
+
+ GF_ASSERT (this);
+ GF_ASSERT (identical);
+
+ conf = this->private;
+
+ glusterd_get_nodesvc_volfile ("nfs", conf->workdir,
+ nfsvol, sizeof (nfsvol));
+
+ snprintf (tmpnfsvol, sizeof (tmpnfsvol), "/tmp/gnfs-XXXXXX");
+
+ tmp_fd = mkstemp (tmpnfsvol);
+ if (tmp_fd < 0) {
+ gf_log ("", GF_LOG_WARNING, "Unable to create temp file %s: "
+ "(%s)", tmpnfsvol, strerror (errno));
+ goto out;
+ }
+
+ need_unlink = 1;
+
+ ret = glusterd_create_global_volfile (build_nfs_graph,
+ tmpnfsvol, NULL);
+ if (ret)
+ goto out;
+
+ ret = glusterd_check_files_identical (nfsvol, tmpnfsvol,
+ identical);
+ if (ret)
+ goto out;
+
+out:
+ if (need_unlink)
+ unlink (tmpnfsvol);
+
+ if (tmp_fd >= 0)
+ close (tmp_fd);
+
+ return ret;
+}
+
+int
+glusterd_delete_volfile (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo)
+{
+ int ret = 0;
+ char filename[PATH_MAX] = {0,};
+
+ GF_ASSERT (volinfo);
+ GF_ASSERT (brickinfo);
+
+ get_brick_filepath (filename, volinfo, brickinfo);
+ ret = unlink (filename);
+ if (ret)
+ gf_log ("glusterd", GF_LOG_ERROR, "failed to delete file: %s, "
+ "reason: %s", filename, strerror (errno));
+ return ret;
+}
+
+int
+validate_shdopts (glusterd_volinfo_t *volinfo,
+ dict_t *val_dict,
+ char **op_errstr)
+{
+ volgen_graph_t graph = {0,};
+ int ret = -1;
+
+ graph.errstr = op_errstr;
+
+ if (!glusterd_is_volume_replicate (volinfo)) {
+ ret = 0;
+ goto out;
+ }
+ ret = dict_set_str (val_dict, "graph-check", "on");
+ if (ret)
+ goto out;
+ ret = build_shd_graph (&graph, val_dict);
+ if (!ret)
+ ret = graph_reconf_validateopt (&graph.graph, op_errstr);
+
+ volgen_graph_free (&graph);
+
+ gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret);
+out:
+ dict_del (val_dict, "graph-check");
+ return ret;
+}
+
+int
+validate_nfsopts (glusterd_volinfo_t *volinfo,
+ dict_t *val_dict,
+ char **op_errstr)
+{
+ volgen_graph_t graph = {0,};
+ int ret = -1;
+ char transport_type[16] = {0,};
+ char *tt = NULL;
+ char err_str[4096] = {0,};
+ xlator_t *this = THIS;
+
+ GF_ASSERT (this);
+
+ graph.errstr = op_errstr;
+
+ get_vol_transport_type (volinfo, transport_type);
+ ret = dict_get_str (val_dict, "nfs.transport-type", &tt);
+ if (!ret) {
+ if (volinfo->transport_type != GF_TRANSPORT_BOTH_TCP_RDMA) {
+ snprintf (err_str, sizeof (err_str), "Changing nfs "
+ "transport type is allowed only for volumes "
+ "of transport type tcp,rdma");
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ *op_errstr = gf_strdup (err_str);
+ ret = -1;
+ goto out;
+ }
+ if (strcmp (tt,"tcp") && strcmp (tt,"rdma")) {
+ snprintf (err_str, sizeof (err_str), "wrong transport "
+ "type %s", tt);
+ *op_errstr = gf_strdup (err_str);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = dict_set_str (val_dict, "volume-name", volinfo->volname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set volume name");
+ goto out;
+ }
+
+ ret = build_nfs_graph (&graph, val_dict);
+ if (!ret)
+ ret = graph_reconf_validateopt (&graph.graph, op_errstr);
+
+ volgen_graph_free (&graph);
+
+out:
+ if (dict_get (val_dict, "volume-name"))
+ dict_del (val_dict, "volume-name");
+ gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+
+int
+validate_clientopts (glusterd_volinfo_t *volinfo,
+ dict_t *val_dict,
+ char **op_errstr)
+{
+ volgen_graph_t graph = {0,};
+ int ret = -1;
+
+ GF_ASSERT (volinfo);
+
+ graph.errstr = op_errstr;
+
+ ret = build_client_graph (&graph, volinfo, val_dict);
+ if (!ret)
+ ret = graph_reconf_validateopt (&graph.graph, op_errstr);
+
+ volgen_graph_free (&graph);
+
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+validate_brickopts (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo,
+ dict_t *val_dict,
+ char **op_errstr)
+{
+ volgen_graph_t graph = {0,};
+ int ret = -1;
+
+ GF_ASSERT (volinfo);
+
+ graph.errstr = op_errstr;
+
+ ret = build_server_graph (&graph, volinfo, val_dict, brickinfo);
+ if (!ret)
+ ret = graph_reconf_validateopt (&graph.graph, op_errstr);
+
+ volgen_graph_free (&graph);
+
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_validate_brickreconf (glusterd_volinfo_t *volinfo,
+ dict_t *val_dict,
+ char **op_errstr)
+{
+ glusterd_brickinfo_t *brickinfo = NULL;
+ int ret = -1;
+
+ list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ gf_log ("", GF_LOG_DEBUG,
+ "Validating %s", brickinfo->hostname);
+
+ ret = validate_brickopts (volinfo, brickinfo, val_dict,
+ op_errstr);
+ if (ret)
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+static int
+_check_globalopt (dict_t *this, char *key, data_t *value, void *ret_val)
+{
+ int *ret = NULL;
+
+ ret = ret_val;
+ if (*ret)
+ return 0;
+ if (!glusterd_check_globaloption (key))
+ *ret = 1;
+
+ return 0;
+}
+
+int
+glusterd_validate_globalopts (glusterd_volinfo_t *volinfo,
+ dict_t *val_dict, char **op_errstr)
+{
+ int ret = 0;
+
+ dict_foreach (val_dict, _check_globalopt, &ret);
+ if (ret) {
+ *op_errstr = gf_strdup ( "option specified is not a global option");
+ return -1;
+ }
+ ret = glusterd_validate_brickreconf (volinfo, val_dict, op_errstr);
+
+ if (ret) {
+ gf_log ("", GF_LOG_DEBUG,
+ "Could not Validate bricks");
+ goto out;
+ }
+
+ ret = validate_clientopts (volinfo, val_dict, op_errstr);
+ if (ret) {
+ gf_log ("", GF_LOG_DEBUG,
+ "Could not Validate client");
+ goto out;
+ }
+
+ ret = validate_nfsopts (volinfo, val_dict, op_errstr);
+ if (ret) {
+ gf_log ("", GF_LOG_DEBUG, "Could not Validate nfs");
+ goto out;
+ }
+
+ ret = validate_shdopts (volinfo, val_dict, op_errstr);
+ if (ret) {
+ gf_log ("", GF_LOG_DEBUG, "Could not Validate self-heald");
+ goto out;
+ }
+
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+static int
+_check_localopt (dict_t *this, char *key, data_t *value, void *ret_val)
+{
+ int *ret = NULL;
+
+ ret = ret_val;
+ if (*ret)
+ return 0;
+ if (!glusterd_check_localoption (key))
+ *ret = 1;
+
+ return 0;
+}
+
+int
+glusterd_validate_reconfopts (glusterd_volinfo_t *volinfo, dict_t *val_dict,
+ char **op_errstr)
+{
+ int ret = 0;
+
+ dict_foreach (val_dict, _check_localopt, &ret);
+ if (ret) {
+ *op_errstr = gf_strdup ( "option specified is not a local option");
+ return -1;
+ }
+ ret = glusterd_validate_brickreconf (volinfo, val_dict, op_errstr);
+
+ if (ret) {
+ gf_log ("", GF_LOG_DEBUG,
+ "Could not Validate bricks");
+ goto out;
+ }
+
+ ret = validate_clientopts (volinfo, val_dict, op_errstr);
+ if (ret) {
+ gf_log ("", GF_LOG_DEBUG,
+ "Could not Validate client");
+ goto out;
+ }
+
+ ret = validate_nfsopts (volinfo, val_dict, op_errstr);
+ if (ret) {
+ gf_log ("", GF_LOG_DEBUG, "Could not Validate nfs");
+ goto out;
+ }
+
+
+ ret = validate_shdopts (volinfo, val_dict, op_errstr);
+ if (ret) {
+ gf_log ("", GF_LOG_DEBUG, "Could not Validate self-heald");
+ goto out;
+ }
+
+
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+static struct volopt_map_entry *
+_gd_get_vmep (char *key) {
+ char *completion = NULL;
+ struct volopt_map_entry *vmep = NULL;
+ int ret = 0;
+
+ COMPLETE_OPTION ((char *)key, completion, ret);
+ for (vmep = glusterd_volopt_map; vmep->key; vmep++) {
+ if (strcmp (vmep->key, key) == 0)
+ return vmep;
+ }
+
+ return NULL;
+}
+
+uint32_t
+glusterd_get_op_version_for_key (char *key)
+{
+ struct volopt_map_entry *vmep = NULL;
+
+ GF_ASSERT (key);
+
+ vmep = _gd_get_vmep (key);
+ if (vmep)
+ return vmep->op_version;
+
+ return 0;
+}
+
+gf_boolean_t
+gd_is_client_option (char *key)
+{
+ struct volopt_map_entry *vmep = NULL;
+
+ GF_ASSERT (key);
+
+ vmep = _gd_get_vmep (key);
+ if (vmep && (vmep->flags & OPT_FLAG_CLIENT_OPT))
+ return _gf_true;
+
+ return _gf_false;
+}
+
+gf_boolean_t
+gd_is_xlator_option (char *key)
+{
+ struct volopt_map_entry *vmep = NULL;
+
+ GF_ASSERT (key);
+
+ vmep = _gd_get_vmep (key);
+ if (vmep && (vmep->flags & OPT_FLAG_XLATOR_OPT))
+ return _gf_true;
+
+ return _gf_false;
+}
+
+volume_option_type_t
+_gd_get_option_type (char *key)
+{
+ struct volopt_map_entry *vmep = NULL;
+ void *dl_handle = NULL;
+ volume_opt_list_t vol_opt_list = {{0},};
+ int ret = -1;
+ volume_option_t *opt = NULL;
+ char *xlopt_key = NULL;
+ volume_option_type_t opt_type = GF_OPTION_TYPE_MAX;
+
+ GF_ASSERT (key);
+
+ vmep = _gd_get_vmep (key);
+
+ if (vmep) {
+ INIT_LIST_HEAD (&vol_opt_list.list);
+ ret = xlator_volopt_dynload (vmep->voltype, &dl_handle,
+ &vol_opt_list);
+ if (ret)
+ goto out;
+
+ if (_get_xlator_opt_key_from_vme (vmep, &xlopt_key))
+ goto out;
+
+ opt = xlator_volume_option_get_list (&vol_opt_list, xlopt_key);
+ _free_xlator_opt_key (xlopt_key);
+
+ if (opt)
+ opt_type = opt->type;
+ }
+
+out:
+ if (dl_handle) {
+ dlclose (dl_handle);
+ dl_handle = NULL;
+ }
+
+ return opt_type;
+}
+
+gf_boolean_t
+gd_is_boolean_option (char *key)
+{
+ GF_ASSERT (key);
+
+ if (GF_OPTION_TYPE_BOOL == _gd_get_option_type (key))
+ return _gf_true;
+
+ return _gf_false;
+}
+
+/* This function will restore origin volume to it's snap.
+ * The restore operation will simply replace the Gluster origin
+ * volume with the snap volume.
+ * TODO: Multi-volume delete to be done.
+ * Cleanup in case of restore failure is pending.
+ *
+ * @param orig_vol volinfo of origin volume
+ * @param snap_vol volinfo of snapshot volume
+ *
+ * @return 0 on success and negative value on error
+ */
+int
+gd_restore_snap_volume (dict_t *rsp_dict,
+ glusterd_volinfo_t *orig_vol,
+ glusterd_volinfo_t *snap_vol)
+{
+ int ret = -1;
+ glusterd_volinfo_t *new_volinfo = NULL;
+ glusterd_snap_t *snap = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ glusterd_volinfo_t *temp_volinfo = NULL;
+ glusterd_volinfo_t *voliter = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (rsp_dict);
+ conf = this->private;
+ GF_ASSERT (conf);
+
+ GF_VALIDATE_OR_GOTO (this->name, orig_vol, out);
+ GF_VALIDATE_OR_GOTO (this->name, snap_vol, out);
+ snap = snap_vol->snapshot;
+ GF_VALIDATE_OR_GOTO (this->name, snap, out);
+
+ /* Snap volume must be stoped before performing the
+ * restore operation.
+ */
+ ret = glusterd_stop_volume (snap_vol);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to stop "
+ "snap volume");
+ goto out;
+ }
+
+ /* Create a new volinfo for the restored volume */
+ ret = glusterd_volinfo_dup (snap_vol, &new_volinfo, _gf_true);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to create volinfo");
+ goto out;
+ }
+
+ /* Following entries need to be derived from origin volume. */
+ strcpy (new_volinfo->volname, orig_vol->volname);
+ uuid_copy (new_volinfo->volume_id, orig_vol->volume_id);
+ new_volinfo->snap_count = orig_vol->snap_count;
+ new_volinfo->snap_max_hard_limit = orig_vol->snap_max_hard_limit;
+ new_volinfo->is_volume_restored = _gf_true;
+
+ /* Bump the version of the restored volume, so that nodes *
+ * which are done can sync during handshake */
+ new_volinfo->version = orig_vol->version;
+
+ list_for_each_entry_safe (voliter, temp_volinfo,
+ &orig_vol->snap_volumes, snapvol_list) {
+ list_add_tail (&voliter->snapvol_list,
+ &new_volinfo->snap_volumes);
+ }
+ /* Copy the snap vol info to the new_volinfo.*/
+ ret = glusterd_snap_volinfo_restore (rsp_dict, new_volinfo, snap_vol);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to restore snap");
+ (void)glusterd_volinfo_delete (new_volinfo);
+ goto out;
+ }
+
+ /* If the orig_vol is already restored then we should delete
+ * the backend LVMs */
+ if (orig_vol->is_volume_restored) {
+ ret = glusterd_lvm_snapshot_remove (rsp_dict, orig_vol);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to remove "
+ "LVM backend");
+ (void)glusterd_volinfo_delete (new_volinfo);
+ goto out;
+ }
+ }
+
+ /* Once the new_volinfo is completely constructed then delete
+ * the orinal volinfo
+ */
+ ret = glusterd_volinfo_delete (orig_vol);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to delete volinfo");
+ (void)glusterd_volinfo_delete (new_volinfo);
+ goto out;
+ }
+
+ /* New volinfo always shows the status as created. Therefore
+ * set the status to stop. */
+ glusterd_set_volume_status (new_volinfo, GLUSTERD_STATUS_STOPPED);
+
+ list_add_tail (&new_volinfo->vol_list, &conf->volumes);
+
+ /* Now delete the snap entry. As a first step delete the snap
+ * volume information stored in store. */
+ ret = glusterd_snap_remove (rsp_dict, snap, _gf_false, _gf_true);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "Failed to delete "
+ "snap %s", snap->snapname);
+ goto out;
+ }
+
+ ret = glusterd_store_volinfo (new_volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to store volinfo");
+ goto out;
+ }
+
+ ret = 0;
+out:
+
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.h b/xlators/mgmt/glusterd/src/glusterd-volgen.h
new file mode 100644
index 000000000..fcbaaf93e
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-volgen.h
@@ -0,0 +1,176 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _GLUSTERD_VOLGEN_H_
+#define _GLUSTERD_VOLGEN_H_
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterd.h"
+
+/* volopt map key name definitions */
+
+#define VKEY_DIAG_CNT_FOP_HITS "diagnostics.count-fop-hits"
+#define VKEY_DIAG_LAT_MEASUREMENT "diagnostics.latency-measurement"
+#define VKEY_FEATURES_LIMIT_USAGE "features.limit-usage"
+#define VKEY_MARKER_XTIME GEOREP".indexing"
+#define VKEY_MARKER_XTIME_FORCE GEOREP".ignore-pid-check"
+#define VKEY_CHANGELOG "changelog.changelog"
+#define VKEY_FEATURES_QUOTA "features.quota"
+
+#define AUTH_ALLOW_MAP_KEY "auth.allow"
+#define AUTH_REJECT_MAP_KEY "auth.reject"
+#define NFS_DISABLE_MAP_KEY "nfs.disable"
+#define AUTH_ALLOW_OPT_KEY "auth.addr.*.allow"
+#define AUTH_REJECT_OPT_KEY "auth.addr.*.reject"
+#define NFS_DISABLE_OPT_KEY "nfs.*.disable"
+
+
+typedef enum {
+ GF_CLIENT_TRUSTED,
+ GF_CLIENT_OTHER
+} glusterd_client_type_t;
+
+#define COMPLETE_OPTION(key, completion, ret) \
+ do { \
+ if (!strchr (key, '.')) { \
+ ret = option_complete (key, &completion); \
+ if (ret) { \
+ gf_log ("", GF_LOG_ERROR, "Out of memory"); \
+ return _gf_false; \
+ } \
+ \
+ if (!completion) { \
+ gf_log ("", GF_LOG_ERROR, "option %s does not" \
+ "exist", key); \
+ return _gf_false; \
+ } \
+ } \
+ \
+ if (completion) \
+ GF_FREE (completion); \
+ } while (0);
+
+typedef enum gd_volopt_flags_ {
+ OPT_FLAG_NONE,
+ OPT_FLAG_FORCE = 0x01, // option needs force to be reset
+ OPT_FLAG_XLATOR_OPT = 0x02, // option enables/disables xlators
+ OPT_FLAG_CLIENT_OPT = 0x04, // option affects clients
+} gd_volopt_flags_t;
+
+typedef enum {
+ GF_XLATOR_POSIX = 0,
+ GF_XLATOR_ACL,
+ GF_XLATOR_LOCKS,
+ GF_XLATOR_IOT,
+ GF_XLATOR_INDEX,
+ GF_XLATOR_MARKER,
+ GF_XLATOR_IO_STATS,
+ GF_XLATOR_BD,
+ GF_XLATOR_NONE,
+} glusterd_server_xlator_t;
+
+/* As of now debug xlators can be loaded only below fuse in the client
+ * graph via cli. More xlators can be added below when the cli option
+ * for adding debug xlators anywhere in the client graph has to be made
+ * available.
+ */
+typedef enum {
+ GF_CLNT_XLATOR_FUSE = 0,
+ GF_CLNT_XLATOR_NONE,
+} glusterd_client_xlator_t;
+
+typedef enum { DOC, NO_DOC, GLOBAL_DOC, GLOBAL_NO_DOC } option_type_t;
+
+typedef int (*vme_option_validation) (dict_t *dict, char *key, char *value,
+ char **op_errstr);
+
+struct volopt_map_entry {
+ char *key;
+ char *voltype;
+ char *option;
+ char *value;
+ option_type_t type;
+ uint32_t flags;
+ uint32_t op_version;
+ char *description;
+ vme_option_validation validate_fn;
+ /* If client_option is true, the option affects clients.
+ * this is used to calculate client-op-version of volumes
+ */
+ //gf_boolean_t client_option;
+};
+
+int glusterd_create_rb_volfiles (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo);
+
+int glusterd_create_volfiles_and_notify_services (glusterd_volinfo_t *volinfo);
+
+void glusterd_get_nfs_filepath (char *filename);
+
+void glusterd_get_shd_filepath (char *filename);
+
+int glusterd_create_nfs_volfile ();
+int glusterd_create_shd_volfile ();
+
+int glusterd_delete_volfile (glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo);
+int
+glusterd_delete_snap_volfile (glusterd_volinfo_t *volinfo,
+ glusterd_volinfo_t *snap_volinfo,
+ glusterd_brickinfo_t *brickinfo);
+
+int glusterd_volinfo_get (glusterd_volinfo_t *volinfo, char *key, char **value);
+int glusterd_volinfo_get_boolean (glusterd_volinfo_t *volinfo, char *key);
+
+int glusterd_validate_globalopts (glusterd_volinfo_t *volinfo, dict_t *val_dict, char **op_errstr);
+
+int glusterd_validate_localopts (dict_t *val_dict, char **op_errstr);
+gf_boolean_t glusterd_check_globaloption (char *key);
+gf_boolean_t
+glusterd_check_voloption_flags (char *key, int32_t flags);
+gf_boolean_t
+glusterd_is_valid_volfpath (char *volname, char *brick);
+int generate_brick_volfiles (glusterd_volinfo_t *volinfo);
+int generate_snap_brick_volfiles (glusterd_volinfo_t *volinfo,
+ glusterd_volinfo_t *snap_volinfo);
+int generate_client_volfiles (glusterd_volinfo_t *volinfo,
+ glusterd_client_type_t client_type);
+int
+generate_snap_client_volfiles (glusterd_volinfo_t *actual_volinfo,
+ glusterd_volinfo_t *snap_volinfo,
+ glusterd_client_type_t client_type,
+ gf_boolean_t vol_restore);
+int glusterd_get_volopt_content (dict_t *dict, gf_boolean_t xml_out);
+char*
+glusterd_get_trans_type_rb (gf_transport_type ttype);
+int
+glusterd_check_nfs_volfile_identical (gf_boolean_t *identical);
+int
+glusterd_check_nfs_topology_identical (gf_boolean_t *identical);
+
+uint32_t
+glusterd_get_op_version_for_key (char *key);
+
+gf_boolean_t
+gd_is_client_option (char *key);
+
+gf_boolean_t
+gd_is_xlator_option (char *key);
+
+gf_boolean_t
+gd_is_boolean_option (char *key);
+
+int gd_restore_snap_volume (dict_t *rsp_dict,
+ glusterd_volinfo_t *orig_vol,
+ glusterd_volinfo_t *snap_vol);
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
new file mode 100644
index 000000000..0d322b9ad
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
@@ -0,0 +1,2225 @@
+/*
+ Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef HAVE_BD_XLATOR
+#include <lvm2app.h>
+#endif
+
+#include "common-utils.h"
+#include "syscall.h"
+#include "cli1-xdr.h"
+#include "xdr-generic.h"
+#include "glusterd.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-store.h"
+#include "glusterd-utils.h"
+#include "glusterd-volgen.h"
+#include "run.h"
+
+#define glusterd_op_start_volume_args_get(dict, volname, flags) \
+ glusterd_op_stop_volume_args_get (dict, volname, flags)
+
+
+int
+__glusterd_handle_create_volume (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{0,}};
+ dict_t *dict = NULL;
+ char *bricks = NULL;
+ char *volname = NULL;
+ int brick_count = 0;
+ void *cli_rsp = NULL;
+ char err_str[2048] = {0,};
+ gf_cli_rsp rsp = {0,};
+ xlator_t *this = NULL;
+ char *free_ptr = NULL;
+ char *trans_type = NULL;
+ uuid_t volume_id = {0,};
+ uuid_t tmp_uuid = {0};
+ int32_t type = 0;
+ char *username = NULL;
+ char *password = NULL;
+
+ GF_ASSERT (req);
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ ret = -1;
+ ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ snprintf (err_str, sizeof (err_str), "Failed to decode request "
+ "received from cli");
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG, "Received create volume req");
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf (err_str, sizeof (err_str), "Unable to decode "
+ "the command");
+ goto out;
+ } else {
+ dict->extra_stdfree = cli_req.dict.dict_val;
+ }
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Unable to get volume "
+ "name");
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+
+ if ((ret = glusterd_check_volume_exists (volname))) {
+ snprintf (err_str, sizeof (err_str), "Volume %s already exists",
+ volname);
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "count", &brick_count);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Unable to get brick count"
+ " for volume %s", volname);
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "type", &type);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Unable to get type of "
+ "volume %s", volname);
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "transport", &trans_type);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Unable to get "
+ "transport-type of volume %s", volname);
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+ ret = dict_get_str (dict, "bricks", &bricks);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Unable to get bricks for "
+ "volume %s", volname);
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+
+ if (!dict_get (dict, "force")) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get 'force' flag");
+ goto out;
+ }
+
+ uuid_generate (volume_id);
+ free_ptr = gf_strdup (uuid_utoa (volume_id));
+ ret = dict_set_dynstr (dict, "volume-id", free_ptr);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Unable to set volume "
+ "id of volume %s", volname);
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+ free_ptr = NULL;
+
+ /* generate internal username and password */
+
+ uuid_generate (tmp_uuid);
+ username = gf_strdup (uuid_utoa (tmp_uuid));
+ ret = dict_set_dynstr (dict, "internal-username", username);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set username for "
+ "volume %s", volname);
+ goto out;
+ }
+
+ uuid_generate (tmp_uuid);
+ password = gf_strdup (uuid_utoa (tmp_uuid));
+ ret = dict_set_dynstr (dict, "internal-password", password);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set password for "
+ "volume %s", volname);
+ goto out;
+ }
+
+ ret = glusterd_op_begin_synctask (req, GD_OP_CREATE_VOLUME, dict);
+
+out:
+ if (ret) {
+ rsp.op_ret = -1;
+ rsp.op_errno = 0;
+ if (err_str[0] == '\0')
+ snprintf (err_str, sizeof (err_str),
+ "Operation failed");
+ rsp.op_errstr = err_str;
+ cli_rsp = &rsp;
+ glusterd_to_cli (req, cli_rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_cli_rsp, dict);
+ ret = 0; //Client response sent, prevent second response
+ }
+
+ GF_FREE(free_ptr);
+
+ return ret;
+}
+
+int
+glusterd_handle_create_volume (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_handle_create_volume);
+}
+
+int
+__glusterd_handle_cli_start_volume (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{0,}};
+ char *volname = NULL;
+ dict_t *dict = NULL;
+ glusterd_op_t cli_op = GD_OP_START_VOLUME;
+ char errstr[2048] = {0,};
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ snprintf (errstr, sizeof (errstr), "Failed to decode message "
+ "received from cli");
+ req->rpc_err = GARBAGE_ARGS;
+ gf_log (this->name, sizeof (errstr), "%s", errstr);
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf (errstr, sizeof (errstr), "Unable to decode "
+ "the command");
+ goto out;
+ }
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ snprintf (errstr, sizeof (errstr), "Unable to get volume name");
+ gf_log (this->name, GF_LOG_ERROR, "%s", errstr);
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG, "Received start vol req"
+ " for volume %s", volname);
+
+ ret = glusterd_op_begin_synctask (req, GD_OP_START_VOLUME, dict);
+
+out:
+ free (cli_req.dict.dict_val); //its malloced by xdr
+
+ if (ret) {
+ if(errstr[0] == '\0')
+ snprintf (errstr, sizeof (errstr), "Operation failed");
+ ret = glusterd_op_send_cli_response (cli_op, ret, 0, req,
+ dict, errstr);
+ }
+
+ return ret;
+}
+
+int
+glusterd_handle_cli_start_volume (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_handle_cli_start_volume);
+}
+
+int
+__glusterd_handle_cli_stop_volume (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{0,}};
+ char *dup_volname = NULL;
+ dict_t *dict = NULL;
+ glusterd_op_t cli_op = GD_OP_STOP_VOLUME;
+ xlator_t *this = NULL;
+ char err_str[2048] = {0,};
+
+ this = THIS;
+ GF_ASSERT (this);
+ GF_ASSERT (req);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ snprintf (err_str, sizeof (err_str), "Failed to decode message "
+ "received from cli");
+ req->rpc_err = GARBAGE_ARGS;
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf (err_str, sizeof (err_str), "Unable to decode "
+ "the command");
+ goto out;
+ }
+ }
+
+ ret = dict_get_str (dict, "volname", &dup_volname);
+
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Failed to get volume "
+ "name");
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG, "Received stop vol req "
+ "for volume %s", dup_volname);
+
+ ret = glusterd_op_begin_synctask (req, GD_OP_STOP_VOLUME, dict);
+
+out:
+ free (cli_req.dict.dict_val); //its malloced by xdr
+
+ if (ret) {
+ if (err_str[0] == '\0')
+ snprintf (err_str, sizeof (err_str),
+ "Operation failed");
+ ret = glusterd_op_send_cli_response (cli_op, ret, 0, req,
+ dict, err_str);
+ }
+
+ return ret;
+}
+
+int
+glusterd_handle_cli_stop_volume (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_handle_cli_stop_volume);
+}
+
+int
+__glusterd_handle_cli_delete_volume (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{0,},};
+ glusterd_op_t cli_op = GD_OP_DELETE_VOLUME;
+ dict_t *dict = NULL;
+ char *volname = NULL;
+ char err_str[2048]= {0,};
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (req);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ snprintf (err_str, sizeof (err_str), "Failed to decode request "
+ "received from cli");
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf (err_str, sizeof (err_str), "Unable to decode "
+ "the command");
+ goto out;
+ }
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Failed to get volume "
+ "name");
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG, "Received delete vol req"
+ "for volume %s", volname);
+
+ ret = glusterd_op_begin_synctask (req, GD_OP_DELETE_VOLUME, dict);
+
+out:
+ free (cli_req.dict.dict_val); //its malloced by xdr
+
+ if (ret) {
+ if (err_str[0] == '\0')
+ snprintf (err_str, sizeof (err_str),
+ "Operation failed");
+ ret = glusterd_op_send_cli_response (cli_op, ret, 0, req,
+ dict, err_str);
+ }
+
+ return ret;
+}
+
+int
+glusterd_handle_cli_delete_volume (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_handle_cli_delete_volume);
+}
+
+int
+__glusterd_handle_cli_heal_volume (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{0,}};
+ dict_t *dict = NULL;
+ glusterd_op_t cli_op = GD_OP_HEAL_VOLUME;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+ char op_errstr[2048] = {0,};
+
+ GF_ASSERT (req);
+
+ ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf (op_errstr, sizeof (op_errstr),
+ "Unable to decode the command");
+ goto out;
+ } else {
+ dict->extra_stdfree = cli_req.dict.dict_val;
+ }
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ snprintf (op_errstr, sizeof (op_errstr), "Unable to find "
+ "volume name");
+ gf_log (this->name, GF_LOG_ERROR, "%s", op_errstr);
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_INFO, "Received heal vol req "
+ "for volume %s", volname);
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (op_errstr, sizeof (op_errstr),
+ "Volume %s does not exist", volname);
+ gf_log (this->name, GF_LOG_ERROR, "%s", op_errstr);
+ goto out;
+ }
+
+ ret = glusterd_add_bricks_hname_path_to_dict (dict, volinfo);
+ if (ret)
+ goto out;
+
+ ret = dict_set_int32 (dict, "count", volinfo->brick_count);
+ if (ret)
+ goto out;
+
+ ret = glusterd_op_begin_synctask (req, GD_OP_HEAL_VOLUME, dict);
+
+out:
+ if (ret) {
+ if (op_errstr[0] == '\0')
+ snprintf (op_errstr, sizeof (op_errstr),
+ "operation failed");
+ ret = glusterd_op_send_cli_response (cli_op, ret, 0, req,
+ dict, op_errstr);
+ }
+
+ return ret;
+}
+
+int
+glusterd_handle_cli_heal_volume (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_handle_cli_heal_volume);
+}
+
+int
+__glusterd_handle_cli_statedump_volume (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{0,}};
+ char *volname = NULL;
+ char *options = NULL;
+ dict_t *dict = NULL;
+ int32_t option_cnt = 0;
+ glusterd_op_t cli_op = GD_OP_STATEDUMP_VOLUME;
+ char err_str[2048] = {0,};
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_ASSERT (req);
+
+ ret = -1;
+ ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new ();
+
+ ret = dict_unserialize (cli_req.dict.dict_val,
+ cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf (err_str, sizeof (err_str), "Unable to "
+ "decode the command");
+ goto out;
+ }
+ }
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Unable to get the volume "
+ "name");
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "options", &options);
+ if (ret) {
+ snprintf (err_str, sizeof (err_str), "Unable to get options");
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "option_cnt", &option_cnt);
+ if (ret) {
+ snprintf (err_str , sizeof (err_str), "Unable to get option "
+ "count");
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
+ }
+
+
+ gf_log (this->name, GF_LOG_INFO, "Received statedump request for "
+ "volume %s with options %s", volname, options);
+
+ ret = glusterd_op_begin_synctask (req, GD_OP_STATEDUMP_VOLUME, dict);
+
+out:
+ if (ret) {
+ if (err_str[0] == '\0')
+ snprintf (err_str, sizeof (err_str),
+ "Operation failed");
+ ret = glusterd_op_send_cli_response (cli_op, ret, 0, req,
+ dict, err_str);
+ }
+ free (cli_req.dict.dict_val);
+
+ return ret;
+}
+
+int
+glusterd_handle_cli_statedump_volume (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_handle_cli_statedump_volume);
+}
+
+#ifdef HAVE_BD_XLATOR
+/*
+ * Validates if given VG in the brick exists or not. Also checks if VG has
+ * GF_XATTR_VOL_ID_KEY tag set to avoid using same VG for multiple bricks.
+ * Tag is checked only during glusterd_op_stage_create_volume. Tag is set during
+ * glusterd_validate_and_create_brickpath().
+ * @brick - brick info, @check_tag - check for VG tag or not
+ * @msg - Error message to return to caller
+ */
+int
+glusterd_is_valid_vg (glusterd_brickinfo_t *brick, int check_tag, char *msg)
+{
+ lvm_t handle = NULL;
+ vg_t vg = NULL;
+ char *vg_name = NULL;
+ int retval = 0;
+ char *p = NULL;
+ char *ptr = NULL;
+ struct dm_list *dm_lvlist = NULL;
+ struct dm_list *dm_seglist = NULL;
+ struct lvm_lv_list *lv_list = NULL;
+ struct lvm_property_value prop = {0, };
+ struct lvm_lvseg_list *seglist = NULL;
+ struct dm_list *taglist = NULL;
+ struct lvm_str_list *strl = NULL;
+
+ handle = lvm_init (NULL);
+ if (!handle) {
+ sprintf (msg, "lvm_init failed, could not validate vg");
+ return -1;
+ }
+ if (*brick->vg == '\0') { /* BD xlator has vg in brick->path */
+ p = gf_strdup (brick->path);
+ vg_name = strtok_r (p, "/", &ptr);
+ } else
+ vg_name = brick->vg;
+
+ vg = lvm_vg_open (handle, vg_name, "r", 0);
+ if (!vg) {
+ sprintf (msg, "no such vg: %s", vg_name);
+ retval = -1;
+ goto out;
+ }
+ if (!check_tag)
+ goto next;
+
+ taglist = lvm_vg_get_tags (vg);
+ if (!taglist)
+ goto next;
+
+ dm_list_iterate_items (strl, taglist) {
+ if (!strncmp(strl->str, GF_XATTR_VOL_ID_KEY,
+ strlen (GF_XATTR_VOL_ID_KEY))) {
+ sprintf (msg, "VG %s is already part of"
+ " a brick", vg_name);
+ retval = -1;
+ goto out;
+ }
+ }
+next:
+
+ brick->caps = CAPS_BD | CAPS_OFFLOAD_COPY | CAPS_OFFLOAD_SNAPSHOT;
+
+ dm_lvlist = lvm_vg_list_lvs (vg);
+ if (!dm_lvlist)
+ goto out;
+
+ dm_list_iterate_items (lv_list, dm_lvlist) {
+ dm_seglist = lvm_lv_list_lvsegs (lv_list->lv);
+ dm_list_iterate_items (seglist, dm_seglist) {
+ prop = lvm_lvseg_get_property (seglist->lvseg,
+ "segtype");
+ if (!prop.is_valid || !prop.value.string)
+ continue;
+ if (!strcmp (prop.value.string, "thin-pool")) {
+ brick->caps |= CAPS_THIN;
+ gf_log (THIS->name, GF_LOG_INFO, "Thin Pool "
+ "\"%s\" will be used for thin LVs",
+ lvm_lv_get_name (lv_list->lv));
+ break;
+ }
+ }
+ }
+
+ retval = 0;
+out:
+ if (vg)
+ lvm_vg_close (vg);
+ lvm_quit (handle);
+ if (p)
+ GF_FREE (p);
+ return retval;
+}
+#endif
+
+/* op-sm */
+int
+glusterd_op_stage_create_volume (dict_t *dict, char **op_errstr)
+{
+ int ret = 0;
+ char *volname = NULL;
+ gf_boolean_t exists = _gf_false;
+ char *bricks = NULL;
+ char *brick_list = NULL;
+ char *free_ptr = NULL;
+ glusterd_brickinfo_t *brick_info = NULL;
+ int32_t brick_count = 0;
+ int32_t i = 0;
+ char *brick = NULL;
+ char *tmpptr = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ char msg[2048] = {0};
+ uuid_t volume_uuid;
+ char *volume_uuid_str;
+ gf_boolean_t is_force = _gf_false;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name");
+ goto out;
+ }
+
+ exists = glusterd_check_volume_exists (volname);
+ if (exists) {
+ snprintf (msg, sizeof (msg), "Volume %s already exists",
+ volname);
+ ret = -1;
+ goto out;
+ } else {
+ ret = 0;
+ }
+
+ ret = dict_get_int32 (dict, "count", &brick_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get brick count "
+ "for volume %s", volname);
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "volume-id", &volume_uuid_str);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get volume id of "
+ "volume %s", volname);
+ goto out;
+ }
+
+ ret = uuid_parse (volume_uuid_str, volume_uuid);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to parse volume id of"
+ " volume %s", volname);
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "bricks", &bricks);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get bricks for "
+ "volume %s", volname);
+ goto out;
+ }
+
+ is_force = dict_get_str_boolean (dict, "force", _gf_false);
+
+ if (bricks) {
+ brick_list = gf_strdup (bricks);
+ if (!brick_list) {
+ ret = -1;
+ goto out;
+ } else {
+ free_ptr = brick_list;
+ }
+ }
+
+ while ( i < brick_count) {
+ i++;
+ brick= strtok_r (brick_list, " \n", &tmpptr);
+ brick_list = tmpptr;
+
+ if (!glusterd_store_is_valid_brickpath (volname, brick) ||
+ !glusterd_is_valid_volfpath (volname, brick)) {
+ snprintf (msg, sizeof (msg), "brick path %s is too "
+ "long.", brick);
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_brickinfo_new_from_brick (brick, &brick_info);
+ if (ret)
+ goto out;
+
+ ret = glusterd_new_brick_validate (brick, brick_info, msg,
+ sizeof (msg));
+ if (ret)
+ goto out;
+
+ ret = glusterd_resolve_brick (brick_info);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, FMTSTR_RESOLVE_BRICK,
+ brick_info->hostname, brick_info->path);
+ goto out;
+ }
+
+ if (!uuid_compare (brick_info->uuid, MY_UUID)) {
+
+#ifdef HAVE_BD_XLATOR
+ if (brick_info->vg[0]) {
+ ret = glusterd_is_valid_vg (brick_info, 1, msg);
+ if (ret)
+ goto out;
+ }
+#endif
+ ret = glusterd_validate_and_create_brickpath (brick_info,
+ volume_uuid, op_errstr,
+ is_force);
+ if (ret)
+ goto out;
+ brick_list = tmpptr;
+ }
+ glusterd_brickinfo_delete (brick_info);
+ brick_info = NULL;
+ }
+out:
+ GF_FREE (free_ptr);
+ if (brick_info)
+ glusterd_brickinfo_delete (brick_info);
+
+ if (msg[0] != '\0') {
+ gf_log (this->name, GF_LOG_ERROR, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ }
+ gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
+
+ return ret;
+}
+
+int
+glusterd_op_stop_volume_args_get (dict_t *dict, char** volname, int *flags)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ if (!dict || !volname || !flags)
+ goto out;
+
+ ret = dict_get_str (dict, "volname", volname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "flags", flags);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get flags");
+ goto out;
+ }
+out:
+ return ret;
+}
+
+int
+glusterd_op_statedump_volume_args_get (dict_t *dict, char **volname,
+ char **options, int *option_cnt)
+{
+ int ret = -1;
+
+ if (!dict || !volname || !options || !option_cnt)
+ goto out;
+
+ ret = dict_get_str (dict, "volname", volname);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get volname");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "options", options);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get options");
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "option_cnt", option_cnt);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get option count");
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+int
+glusterd_op_stage_start_volume (dict_t *dict, char **op_errstr)
+{
+ int ret = 0;
+ char *volname = NULL;
+ int flags = 0;
+ gf_boolean_t exists = _gf_false;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ char msg[2048];
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ uuid_t volume_id = {0,};
+ char volid[50] = {0,};
+ char xattr_volid[50] = {0,};
+ int caps = 0;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = glusterd_op_start_volume_args_get (dict, &volname, &flags);
+ if (ret)
+ goto out;
+
+ exists = glusterd_check_volume_exists (volname);
+
+ if (!exists) {
+ snprintf (msg, sizeof (msg), FMTSTR_CHECK_VOL_EXISTS, volname);
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, FMTSTR_CHECK_VOL_EXISTS,
+ volname);
+ goto out;
+ }
+
+ ret = glusterd_validate_volume_id (dict, volinfo);
+ if (ret)
+ goto out;
+
+ if (!(flags & GF_CLI_FLAG_OP_FORCE)) {
+ if (glusterd_is_volume_started (volinfo)) {
+ snprintf (msg, sizeof (msg), "Volume %s already "
+ "started", volname);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ ret = glusterd_resolve_brick (brickinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, FMTSTR_RESOLVE_BRICK,
+ brickinfo->hostname, brickinfo->path);
+ goto out;
+ }
+
+ if (uuid_compare (brickinfo->uuid, MY_UUID))
+ continue;
+
+ ret = gf_lstat_dir (brickinfo->path, NULL);
+ if (ret && (flags & GF_CLI_FLAG_OP_FORCE)) {
+ continue;
+ } else if (ret) {
+ snprintf (msg, sizeof (msg), "Failed to find "
+ "brick directory %s for volume %s. "
+ "Reason : %s", brickinfo->path,
+ volname, strerror (errno));
+ goto out;
+ }
+ ret = sys_lgetxattr (brickinfo->path, GF_XATTR_VOL_ID_KEY,
+ volume_id, 16);
+ if (ret < 0 && (!(flags & GF_CLI_FLAG_OP_FORCE))) {
+ snprintf (msg, sizeof (msg), "Failed to get "
+ "extended attribute %s for brick dir %s. "
+ "Reason : %s", GF_XATTR_VOL_ID_KEY,
+ brickinfo->path, strerror (errno));
+ ret = -1;
+ goto out;
+ } else if (ret < 0) {
+ ret = sys_lsetxattr (brickinfo->path,
+ GF_XATTR_VOL_ID_KEY,
+ volinfo->volume_id, 16,
+ XATTR_CREATE);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Failed to set "
+ "extended attribute %s on %s. Reason: "
+ "%s", GF_XATTR_VOL_ID_KEY,
+ brickinfo->path, strerror (errno));
+ goto out;
+ } else {
+ continue;
+ }
+ }
+ if (uuid_compare (volinfo->volume_id, volume_id)) {
+ snprintf (msg, sizeof (msg), "Volume id mismatch for "
+ "brick %s:%s. Expected volume id %s, "
+ "volume id %s found", brickinfo->hostname,
+ brickinfo->path,
+ uuid_utoa_r (volinfo->volume_id, volid),
+ uuid_utoa_r (volume_id, xattr_volid));
+ ret = -1;
+ goto out;
+ }
+#ifdef HAVE_BD_XLATOR
+ if (brickinfo->vg[0])
+ caps = CAPS_BD | CAPS_THIN |
+ CAPS_OFFLOAD_COPY | CAPS_OFFLOAD_SNAPSHOT;
+ /* Check for VG/thin pool if its BD volume */
+ if (brickinfo->vg[0]) {
+ ret = glusterd_is_valid_vg (brickinfo, 0, msg);
+ if (ret)
+ goto out;
+ /* if anyone of the brick does not have thin support,
+ disable it for entire volume */
+ caps &= brickinfo->caps;
+ } else
+ caps = 0;
+#endif
+ }
+
+ volinfo->caps = caps;
+ ret = 0;
+out:
+ if (ret && (msg[0] != '\0')) {
+ gf_log (this->name, GF_LOG_ERROR, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ }
+ return ret;
+}
+
+int
+glusterd_op_stage_stop_volume (dict_t *dict, char **op_errstr)
+{
+ int ret = -1;
+ char *volname = NULL;
+ int flags = 0;
+ gf_boolean_t exists = _gf_false;
+ gf_boolean_t is_run = _gf_false;
+ glusterd_volinfo_t *volinfo = NULL;
+ char msg[2048] = {0};
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = glusterd_op_stop_volume_args_get (dict, &volname, &flags);
+ if (ret)
+ goto out;
+
+ exists = glusterd_check_volume_exists (volname);
+
+ if (!exists) {
+ snprintf (msg, sizeof (msg), FMTSTR_CHECK_VOL_EXISTS, volname);
+ gf_log (this->name, GF_LOG_ERROR, "%s", msg);
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (msg, sizeof (msg), FMTSTR_CHECK_VOL_EXISTS, volname);
+ gf_log (this->name, GF_LOG_ERROR, "%s", msg);
+ goto out;
+ }
+
+ ret = glusterd_validate_volume_id (dict, volinfo);
+ if (ret)
+ goto out;
+
+ /* If 'force' flag is given, no check is required */
+ if (flags & GF_CLI_FLAG_OP_FORCE)
+ goto out;
+
+ if (_gf_false == glusterd_is_volume_started (volinfo)) {
+ snprintf (msg, sizeof(msg), "Volume %s "
+ "is not in the started state", volname);
+ gf_log (this->name, GF_LOG_ERROR, "%s", msg);
+ ret = -1;
+ goto out;
+ }
+ ret = glusterd_check_gsync_running (volinfo, &is_run);
+ if (ret && (is_run == _gf_false))
+ gf_log (this->name, GF_LOG_WARNING, "Unable to get the status"
+ " of active "GEOREP" session");
+ if (is_run) {
+ gf_log (this->name, GF_LOG_WARNING, GEOREP" sessions active"
+ "for the volume %s ", volname);
+ snprintf (msg, sizeof(msg), GEOREP" sessions are active "
+ "for the volume '%s'.\nUse 'volume "GEOREP" "
+ "status' command for more info. Use 'force' "
+ "option to ignore and stop the volume.",
+ volname);
+ ret = -1;
+ goto out;
+ }
+
+ if (glusterd_is_rb_ongoing (volinfo)) {
+ snprintf (msg, sizeof (msg), "Replace brick is in progress on "
+ "volume %s. Please retry after replace-brick "
+ "operation is committed or aborted", volname);
+ gf_log (this->name, GF_LOG_WARNING, "replace-brick in progress "
+ "on volume %s", volname);
+ ret = -1;
+ goto out;
+ }
+
+ if (glusterd_is_defrag_on (volinfo)) {
+ snprintf (msg, sizeof(msg), "rebalance session is "
+ "in progress for the volume '%s'", volname);
+ gf_log (this->name, GF_LOG_WARNING, "%s", msg);
+ ret = -1;
+ goto out;
+ }
+ if (volinfo->rep_brick.rb_status != GF_RB_STATUS_NONE) {
+ snprintf (msg, sizeof(msg), "replace-brick session is "
+ "in progress for the volume '%s'", volname);
+ gf_log (this->name, GF_LOG_WARNING, "%s", msg);
+ ret = -1;
+ goto out;
+ }
+
+out:
+ if (msg[0] != 0)
+ *op_errstr = gf_strdup (msg);
+ gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
+
+ return ret;
+}
+
+int
+glusterd_op_stage_delete_volume (dict_t *dict, char **op_errstr)
+{
+ int ret = 0;
+ char *volname = NULL;
+ gf_boolean_t exists = _gf_false;
+ glusterd_volinfo_t *volinfo = NULL;
+ char msg[2048] = {0};
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name");
+ goto out;
+ }
+
+ exists = glusterd_check_volume_exists (volname);
+ if (!exists) {
+ snprintf (msg, sizeof (msg), FMTSTR_CHECK_VOL_EXISTS, volname);
+ ret = -1;
+ goto out;
+ } else {
+ ret = 0;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (msg, sizeof (msg), FMTSTR_CHECK_VOL_EXISTS, volname);
+ goto out;
+ }
+
+ ret = glusterd_validate_volume_id (dict, volinfo);
+ if (ret)
+ goto out;
+
+ if (glusterd_is_volume_started (volinfo)) {
+ snprintf (msg, sizeof (msg), "Volume %s has been started."
+ "Volume needs to be stopped before deletion.",
+ volname);
+ ret = -1;
+ goto out;
+ }
+
+ if (volinfo->snap_count > 0 || !list_empty(&volinfo->snap_volumes)) {
+ snprintf (msg, sizeof (msg), "Cannot delete Volume %s ,"
+ "as it has %ld snapshots. "
+ "To delete the volume, "
+ "first delete all the snapshots under it.",
+ volname, volinfo->snap_count);
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ if (msg[0] != '\0') {
+ gf_log (this->name, GF_LOG_ERROR, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ }
+ gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
+
+ return ret;
+}
+
+int
+glusterd_op_stage_heal_volume (dict_t *dict, char **op_errstr)
+{
+ int ret = 0;
+ char *volname = NULL;
+ gf_boolean_t enabled = _gf_false;
+ glusterd_volinfo_t *volinfo = NULL;
+ char msg[2048];
+ glusterd_conf_t *priv = NULL;
+ dict_t *opt_dict = NULL;
+ gf_xl_afr_op_t heal_op = GF_AFR_OP_INVALID;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ priv = this->private;
+ if (!priv) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "priv is NULL");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ ret = -1;
+ snprintf (msg, sizeof (msg), "Volume %s does not exist", volname);
+ gf_log (this->name, GF_LOG_ERROR, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ ret = glusterd_validate_volume_id (dict, volinfo);
+ if (ret)
+ goto out;
+
+ if (!glusterd_is_volume_replicate (volinfo)) {
+ ret = -1;
+ snprintf (msg, sizeof (msg), "Volume %s is not of type "
+ "replicate", volname);
+ *op_errstr = gf_strdup (msg);
+ gf_log (this->name, GF_LOG_WARNING, "%s", msg);
+ goto out;
+ }
+
+ if (!glusterd_is_volume_started (volinfo)) {
+ ret = -1;
+ snprintf (msg, sizeof (msg), "Volume %s is not started.",
+ volname);
+ gf_log (THIS->name, GF_LOG_WARNING, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ opt_dict = volinfo->dict;
+ if (!opt_dict) {
+ ret = 0;
+ goto out;
+ }
+
+ enabled = dict_get_str_boolean (opt_dict, "cluster.self-heal-daemon",
+ 1);
+ if (!enabled) {
+ ret = -1;
+ snprintf (msg, sizeof (msg), "Self-heal-daemon is "
+ "disabled. Heal will not be triggered on volume %s",
+ volname);
+ gf_log (this->name, GF_LOG_WARNING, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "heal-op", (int32_t*)&heal_op);
+ if (ret || (heal_op == GF_AFR_OP_INVALID)) {
+ ret = -1;
+ *op_errstr = gf_strdup("Invalid heal-op");
+ gf_log (this->name, GF_LOG_WARNING, "%s", "Invalid heal-op");
+ goto out;
+ }
+
+ switch (heal_op) {
+ case GF_AFR_OP_INDEX_SUMMARY:
+ case GF_AFR_OP_STATISTICS_HEAL_COUNT:
+ case GF_AFR_OP_STATISTICS_HEAL_COUNT_PER_REPLICA:
+ break;
+ default:
+ if (!glusterd_is_nodesvc_online("glustershd")){
+ ret = -1;
+ *op_errstr = gf_strdup ("Self-heal daemon is "
+ "not running. Check self-heal "
+ "daemon log file.");
+ gf_log (this->name, GF_LOG_WARNING, "%s",
+ "Self-heal daemon is not running."
+ "Check self-heal daemon log file.");
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+
+ return ret;
+}
+
+int
+glusterd_op_stage_statedump_volume (dict_t *dict, char **op_errstr)
+{
+ int ret = -1;
+ char *volname = NULL;
+ char *options = NULL;
+ int option_cnt = 0;
+ gf_boolean_t is_running = _gf_false;
+ glusterd_volinfo_t *volinfo = NULL;
+ char msg[2408] = {0,};
+
+ ret = glusterd_op_statedump_volume_args_get (dict, &volname, &options,
+ &option_cnt);
+ if (ret)
+ goto out;
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (msg, sizeof(msg), "Volume %s does not exist",
+ volname);
+ gf_log ("", GF_LOG_ERROR, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ ret = glusterd_validate_volume_id (dict, volinfo);
+ if (ret)
+ goto out;
+
+ is_running = glusterd_is_volume_started (volinfo);
+ if (!is_running) {
+ snprintf (msg, sizeof(msg), "Volume %s is not in a started"
+ " state", volname);
+ gf_log ("", GF_LOG_ERROR, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ ret = -1;
+ goto out;
+ }
+
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_op_stage_clearlocks_volume (dict_t *dict, char **op_errstr)
+{
+ int ret = -1;
+ char *volname = NULL;
+ char *path = NULL;
+ char *type = NULL;
+ char *kind = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ char msg[2048] = {0,};
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ snprintf (msg, sizeof(msg), "Failed to get volume name");
+ gf_log (THIS->name, GF_LOG_ERROR, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "path", &path);
+ if (ret) {
+ snprintf (msg, sizeof(msg), "Failed to get path");
+ gf_log (THIS->name, GF_LOG_ERROR, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "kind", &kind);
+ if (ret) {
+ snprintf (msg, sizeof(msg), "Failed to get kind");
+ gf_log ("", GF_LOG_ERROR, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "type", &type);
+ if (ret) {
+ snprintf (msg, sizeof(msg), "Failed to get type");
+ gf_log ("", GF_LOG_ERROR, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (msg, sizeof(msg), "Volume %s does not exist",
+ volname);
+ gf_log ("", GF_LOG_ERROR, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ ret = glusterd_validate_volume_id (dict, volinfo);
+ if (ret)
+ goto out;
+
+ if (!glusterd_is_volume_started (volinfo)) {
+ snprintf (msg, sizeof(msg), "Volume %s is not started",
+ volname);
+ gf_log ("", GF_LOG_ERROR, "%s", msg);
+ *op_errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_op_create_volume (dict_t *dict, char **op_errstr)
+{
+ int ret = 0;
+ char *volname = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ gf_boolean_t vol_added = _gf_false;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ xlator_t *this = NULL;
+ char *brick = NULL;
+ int32_t count = 0;
+ int32_t i = 1;
+ char *bricks = NULL;
+ char *brick_list = NULL;
+ char *free_ptr = NULL;
+ char *saveptr = NULL;
+ char *trans_type = NULL;
+ char *str = NULL;
+ char *username = NULL;
+ char *password = NULL;
+ int caps = 0;
+ char msg[1024] __attribute__((unused)) = {0, };
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = glusterd_volinfo_new (&volinfo);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to allocate memory for volinfo");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name");
+ goto out;
+ }
+
+ strncpy (volinfo->volname, volname, GLUSTERD_MAX_VOLUME_NAME);
+ GF_ASSERT (volinfo->volname);
+
+ ret = dict_get_int32 (dict, "type", &volinfo->type);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get type of volume"
+ " %s", volname);
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "count", &volinfo->brick_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get brick count of"
+ " volume %s", volname);
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "port", &volinfo->port);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get port");
+ goto out;
+ }
+
+ count = volinfo->brick_count;
+
+ ret = dict_get_str (dict, "bricks", &bricks);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get bricks for "
+ "volume %s", volname);
+ goto out;
+ }
+
+ /* replica-count 1 means, no replication, file is in one brick only */
+ volinfo->replica_count = 1;
+ /* stripe-count 1 means, no striping, file is present as a whole */
+ volinfo->stripe_count = 1;
+
+ if (GF_CLUSTER_TYPE_REPLICATE == volinfo->type) {
+ ret = dict_get_int32 (dict, "replica-count",
+ &volinfo->replica_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get "
+ "replica count for volume %s", volname);
+ goto out;
+ }
+ } else if (GF_CLUSTER_TYPE_STRIPE == volinfo->type) {
+ ret = dict_get_int32 (dict, "stripe-count",
+ &volinfo->stripe_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get stripe"
+ " count for volume %s", volname);
+ goto out;
+ }
+ } else if (GF_CLUSTER_TYPE_STRIPE_REPLICATE == volinfo->type) {
+ ret = dict_get_int32 (dict, "stripe-count",
+ &volinfo->stripe_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get stripe"
+ " count for volume %s", volname);
+ goto out;
+ }
+ ret = dict_get_int32 (dict, "replica-count",
+ &volinfo->replica_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get "
+ "replica count for volume %s", volname);
+ goto out;
+ }
+ }
+
+ /* dist-leaf-count is the count of brick nodes for a given
+ subvolume of distribute */
+ volinfo->dist_leaf_count = glusterd_get_dist_leaf_count (volinfo);
+
+ /* subvol_count is the count of number of subvolumes present
+ for a given distribute volume */
+ volinfo->subvol_count = (volinfo->brick_count /
+ volinfo->dist_leaf_count);
+
+ /* Keep sub-count same as earlier, for the sake of backward
+ compatibility */
+ if (volinfo->dist_leaf_count > 1)
+ volinfo->sub_count = volinfo->dist_leaf_count;
+
+ ret = dict_get_str (dict, "transport", &trans_type);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to get transport type of volume %s", volname);
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "volume-id", &str);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to get volume-id of volume %s", volname);
+ goto out;
+ }
+ ret = uuid_parse (str, volinfo->volume_id);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "unable to parse uuid %s of volume %s", str, volname);
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "internal-username", &username);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "unable to get internal username of volume %s",
+ volname);
+ goto out;
+ }
+ glusterd_auth_set_username (volinfo, username);
+
+ ret = dict_get_str (dict, "internal-password", &password);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "unable to get internal password of volume %s",
+ volname);
+ goto out;
+ }
+ glusterd_auth_set_password (volinfo, password);
+
+ if (strcasecmp (trans_type, "rdma") == 0) {
+ volinfo->transport_type = GF_TRANSPORT_RDMA;
+ volinfo->nfs_transport_type = GF_TRANSPORT_RDMA;
+ } else if (strcasecmp (trans_type, "tcp") == 0) {
+ volinfo->transport_type = GF_TRANSPORT_TCP;
+ volinfo->nfs_transport_type = GF_TRANSPORT_TCP;
+ } else {
+ volinfo->transport_type = GF_TRANSPORT_BOTH_TCP_RDMA;
+ volinfo->nfs_transport_type = GF_DEFAULT_NFS_TRANSPORT;
+ }
+
+ if (bricks) {
+ brick_list = gf_strdup (bricks);
+ free_ptr = brick_list;
+ }
+
+ if (count)
+ brick = strtok_r (brick_list+1, " \n", &saveptr);
+ caps = CAPS_BD | CAPS_THIN | CAPS_OFFLOAD_COPY | CAPS_OFFLOAD_SNAPSHOT;
+
+ while ( i <= count) {
+ ret = glusterd_brickinfo_new_from_brick (brick, &brickinfo);
+ if (ret)
+ goto out;
+
+ ret = glusterd_resolve_brick (brickinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, FMTSTR_RESOLVE_BRICK,
+ brickinfo->hostname, brickinfo->path);
+ goto out;
+ }
+
+#ifdef HAVE_BD_XLATOR
+ if (!uuid_compare (brickinfo->uuid, MY_UUID)) {
+ if (brickinfo->vg[0]) {
+ ret = glusterd_is_valid_vg (brickinfo, 0, msg);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s",
+ msg);
+ goto out;
+ }
+
+ /* if anyone of the brick does not have thin
+ support, disable it for entire volume */
+ caps &= brickinfo->caps;
+
+
+ } else
+ caps = 0;
+ }
+#endif
+
+ list_add_tail (&brickinfo->brick_list, &volinfo->bricks);
+ brick = strtok_r (NULL, " \n", &saveptr);
+ i++;
+ }
+
+ gd_update_volume_op_versions (volinfo);
+
+ volinfo->caps = caps;
+
+ ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret) {
+ glusterd_store_delete_volume (volinfo);
+ *op_errstr = gf_strdup ("Failed to store the Volume information");
+ goto out;
+ }
+
+ ret = glusterd_create_volfiles_and_notify_services (volinfo);
+ if (ret) {
+ *op_errstr = gf_strdup ("Failed to create volume files");
+ goto out;
+ }
+
+ volinfo->rebal.defrag_status = 0;
+ list_add_tail (&volinfo->vol_list, &priv->volumes);
+ vol_added = _gf_true;
+
+out:
+ GF_FREE(free_ptr);
+ if (!vol_added && volinfo)
+ glusterd_volinfo_delete (volinfo);
+ return ret;
+}
+
+int
+glusterd_op_start_volume (dict_t *dict, char **op_errstr)
+{
+ int ret = 0;
+ char *volname = NULL;
+ int flags = 0;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = glusterd_op_start_volume_args_get (dict, &volname, &flags);
+ if (ret)
+ goto out;
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, FMTSTR_CHECK_VOL_EXISTS,
+ volname);
+ goto out;
+ }
+
+ list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ ret = glusterd_brick_start (volinfo, brickinfo, _gf_true);
+ /* If 'force' try to start all bricks regardless of success or
+ * failure
+ */
+ if (!(flags & GF_CLI_FLAG_OP_FORCE) && ret)
+ goto out;
+ }
+
+ glusterd_set_volume_status (volinfo, GLUSTERD_STATUS_STARTED);
+
+ ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret)
+ goto out;
+
+ ret = glusterd_nodesvcs_handle_graph_change (volinfo);
+
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "returning %d ", ret);
+ return ret;
+}
+
+int
+glusterd_stop_volume (glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ GF_VALIDATE_OR_GOTO (this->name, volinfo, out);
+
+ list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ ret = glusterd_brick_stop (volinfo, brickinfo, _gf_false);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to stop "
+ "brick (%s)", brickinfo->path);
+ goto out;
+ }
+ }
+
+ glusterd_set_volume_status (volinfo, GLUSTERD_STATUS_STOPPED);
+
+ ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to store volinfo of "
+ "%s volume", volinfo->volname);
+ goto out;
+ }
+
+ ret = glusterd_nodesvcs_handle_graph_change (volinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to notify graph "
+ "change for %s volume", volinfo->volname);
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+
+int
+glusterd_op_stop_volume (dict_t *dict)
+{
+ int ret = 0;
+ int flags = 0;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ ret = glusterd_op_stop_volume_args_get (dict, &volname, &flags);
+ if (ret)
+ goto out;
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, FMTSTR_CHECK_VOL_EXISTS,
+ volname);
+ goto out;
+ }
+
+ ret = glusterd_stop_volume (volinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to stop %s volume",
+ volname);
+ goto out;
+ }
+out:
+ return ret;
+}
+
+int
+glusterd_op_delete_volume (dict_t *dict)
+{
+ int ret = 0;
+ char *volname = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, FMTSTR_CHECK_VOL_EXISTS,
+ volname);
+ goto out;
+ }
+
+ ret = glusterd_delete_volume (volinfo);
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_op_heal_volume (dict_t *dict, char **op_errstr)
+{
+ int ret = 0;
+ /* Necessary subtasks of heal are completed in brick op */
+
+ return ret;
+}
+
+int
+glusterd_op_statedump_volume (dict_t *dict, char **op_errstr)
+{
+ int ret = 0;
+ char *volname = NULL;
+ char *options = NULL;
+ int option_cnt = 0;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+
+ ret = glusterd_op_statedump_volume_args_get (dict, &volname, &options,
+ &option_cnt);
+ if (ret)
+ goto out;
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret)
+ goto out;
+ gf_log ("", GF_LOG_DEBUG, "Performing statedump on volume %s", volname);
+ if (strstr (options, "nfs") != NULL) {
+ ret = glusterd_nfs_statedump (options, option_cnt, op_errstr);
+ if (ret)
+ goto out;
+ } else {
+ list_for_each_entry (brickinfo, &volinfo->bricks,
+ brick_list) {
+ ret = glusterd_brick_statedump (volinfo, brickinfo,
+ options, option_cnt,
+ op_errstr);
+ /* Let us take the statedump of other bricks instead of
+ * exiting, if statedump of this brick fails.
+ */
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING, "could not "
+ "take the statedump of the brick %s:%s."
+ " Proceeding to other bricks",
+ brickinfo->hostname, brickinfo->path);
+ }
+ }
+
+out:
+ return ret;
+}
+
+int
+glusterd_clearlocks_send_cmd (glusterd_volinfo_t *volinfo, char *cmd,
+ char *path, char *result, char *errstr,
+ int err_len, char *mntpt)
+{
+ int ret = -1;
+ glusterd_conf_t *priv = NULL;
+ char abspath[PATH_MAX] = {0, };
+
+ priv = THIS->private;
+
+ snprintf (abspath, sizeof (abspath), "%s/%s", mntpt, path);
+ ret = sys_lgetxattr (abspath, cmd, result, PATH_MAX);
+ if (ret < 0) {
+ snprintf (errstr, err_len, "clear-locks getxattr command "
+ "failed. Reason: %s", strerror (errno));
+ gf_log (THIS->name, GF_LOG_DEBUG, "%s", errstr);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+glusterd_clearlocks_rmdir_mount (glusterd_volinfo_t *volinfo, char *mntpt)
+{
+ int ret = -1;
+ glusterd_conf_t *priv = NULL;
+
+ priv = THIS->private;
+
+ ret = rmdir (mntpt);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_DEBUG, "rmdir failed");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+void
+glusterd_clearlocks_unmount (glusterd_volinfo_t *volinfo, char *mntpt)
+{
+ glusterd_conf_t *priv = NULL;
+ runner_t runner = {0,};
+ int ret = 0;
+
+ priv = THIS->private;
+
+ /*umount failures are ignored. Using stat we could have avoided
+ * attempting to unmount a non-existent filesystem. But a failure of
+ * stat() on mount can be due to network failures.*/
+
+ runinit (&runner);
+ runner_add_args (&runner, "/bin/umount", "-f", NULL);
+ runner_argprintf (&runner, "%s", mntpt);
+
+ synclock_unlock (&priv->big_lock);
+ ret = runner_run (&runner);
+ synclock_lock (&priv->big_lock);
+ if (ret) {
+ ret = 0;
+ gf_log ("", GF_LOG_DEBUG,
+ "umount failed on maintenance client");
+ }
+
+ return;
+}
+
+int
+glusterd_clearlocks_create_mount (glusterd_volinfo_t *volinfo, char **mntpt)
+{
+ int ret = -1;
+ glusterd_conf_t *priv = NULL;
+ char template[PATH_MAX] = {0,};
+ char *tmpl = NULL;
+
+ priv = THIS->private;
+
+ snprintf (template, sizeof (template), "/tmp/%s.XXXXXX",
+ volinfo->volname);
+ tmpl = mkdtemp (template);
+ if (!tmpl) {
+ gf_log (THIS->name, GF_LOG_DEBUG, "Couldn't create temporary "
+ "mount directory. Reason %s", strerror (errno));
+ goto out;
+ }
+
+ *mntpt = gf_strdup (tmpl);
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+glusterd_clearlocks_mount (glusterd_volinfo_t *volinfo, char **xl_opts,
+ char *mntpt)
+{
+ int ret = -1;
+ int i = 0;
+ glusterd_conf_t *priv = NULL;
+ runner_t runner = {0,};
+ char client_volfpath[PATH_MAX] = {0,};
+ char self_heal_opts[3][1024] = {"*replicate*.data-self-heal=off",
+ "*replicate*.metadata-self-heal=off",
+ "*replicate*.entry-self-heal=off"};
+
+ priv = THIS->private;
+
+ runinit (&runner);
+ glusterd_get_trusted_client_filepath (client_volfpath, volinfo,
+ volinfo->transport_type);
+ runner_add_args (&runner, SBIN_DIR"/glusterfs", "-f", NULL);
+ runner_argprintf (&runner, "%s", client_volfpath);
+ runner_add_arg (&runner, "-l");
+ runner_argprintf (&runner, DEFAULT_LOG_FILE_DIRECTORY
+ "/%s-clearlocks-mnt.log", volinfo->volname);
+ if (volinfo->memory_accounting)
+ runner_add_arg (&runner, "--mem-accounting");
+
+ for (i = 0; i < volinfo->brick_count && xl_opts[i]; i++) {
+ runner_add_arg (&runner, "--xlator-option");
+ runner_argprintf (&runner, "%s", xl_opts[i]);
+ }
+
+ for (i = 0; i < 3; i++) {
+ runner_add_args (&runner, "--xlator-option",
+ self_heal_opts[i], NULL);
+ }
+
+ runner_argprintf (&runner, "%s", mntpt);
+ synclock_unlock (&priv->big_lock);
+ ret = runner_run (&runner);
+ synclock_lock (&priv->big_lock);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_DEBUG,
+ "Could not start glusterfs");
+ goto out;
+ }
+ gf_log (THIS->name, GF_LOG_DEBUG,
+ "Started glusterfs successfully");
+
+out:
+ return ret;
+}
+
+int
+glusterd_clearlocks_get_local_client_ports (glusterd_volinfo_t *volinfo,
+ char **xl_opts)
+{
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ int index = 0;
+ int ret = -1;
+ int i = 0;
+ int port = 0;
+
+ GF_ASSERT (xl_opts);
+ if (!xl_opts) {
+ gf_log (THIS->name, GF_LOG_DEBUG, "Should pass non-NULL "
+ "xl_opts");
+ goto out;
+ }
+
+ priv = THIS->private;
+
+ index = -1;
+ list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
+ index++;
+ if (uuid_compare (brickinfo->uuid, MY_UUID))
+ continue;
+
+ port = pmap_registry_search (THIS, brickinfo->path,
+ GF_PMAP_PORT_BRICKSERVER);
+ if (!port) {
+ ret = -1;
+ gf_log (THIS->name, GF_LOG_DEBUG, "Couldn't get port "
+ " for brick %s:%s", brickinfo->hostname,
+ brickinfo->path);
+ goto out;
+ }
+
+ ret = gf_asprintf (&xl_opts[i], "%s-client-%d.remote-port=%d",
+ volinfo->volname, index, port);
+ if (ret == -1) {
+ xl_opts[i] = NULL;
+ goto out;
+ }
+ i++;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+glusterd_op_clearlocks_volume (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+{
+ int32_t ret = -1;
+ int i = 0;
+ char *volname = NULL;
+ char *path = NULL;
+ char *kind = NULL;
+ char *type = NULL;
+ char *opts = NULL;
+ char *cmd_str = NULL;
+ char *free_ptr = NULL;
+ char msg[PATH_MAX] = {0,};
+ char result[PATH_MAX] = {0,};
+ char *mntpt = NULL;
+ char **xl_opts = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Failed to get volume name");
+ goto out;
+ }
+ gf_log ("", GF_LOG_DEBUG, "Performing clearlocks on volume %s", volname);
+
+ ret = dict_get_str (dict, "path", &path);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Failed to get path");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "kind", &kind);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Failed to get kind");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "type", &type);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Failed to get type");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "opts", &opts);
+ if (ret)
+ ret = 0;
+
+ gf_log (THIS->name, GF_LOG_INFO, "Received clear-locks request for "
+ "volume %s with kind %s type %s and options %s", volname,
+ kind, type, opts);
+
+ if (opts)
+ ret = gf_asprintf (&cmd_str, GF_XATTR_CLRLK_CMD".t%s.k%s.%s",
+ type, kind, opts);
+ else
+ ret = gf_asprintf (&cmd_str, GF_XATTR_CLRLK_CMD".t%s.k%s",
+ type, kind);
+ if (ret == -1)
+ goto out;
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Volume %s doesn't exist.",
+ volname);
+ gf_log (THIS->name, GF_LOG_ERROR, "%s", msg);
+ goto out;
+ }
+
+ xl_opts = GF_CALLOC (volinfo->brick_count+1, sizeof (char*),
+ gf_gld_mt_charptr);
+ if (!xl_opts)
+ goto out;
+
+ ret = glusterd_clearlocks_get_local_client_ports (volinfo, xl_opts);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Couldn't get port numbers of "
+ "local bricks");
+ gf_log (THIS->name, GF_LOG_ERROR, "%s", msg);
+ goto out;
+ }
+
+ ret = glusterd_clearlocks_create_mount (volinfo, &mntpt);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Creating mount directory "
+ "for clear-locks failed.");
+ gf_log (THIS->name, GF_LOG_ERROR, "%s", msg);
+ goto out;
+ }
+
+ ret = glusterd_clearlocks_mount (volinfo, xl_opts, mntpt);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Failed to mount clear-locks "
+ "maintenance client.");
+ gf_log (THIS->name, GF_LOG_ERROR, "%s", msg);
+ goto out;
+ }
+
+ ret = glusterd_clearlocks_send_cmd (volinfo, cmd_str, path, result,
+ msg, sizeof (msg), mntpt);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "%s", msg);
+ goto umount;
+ }
+
+ free_ptr = gf_strdup(result);
+ if (dict_set_dynstr (rsp_dict, "lk-summary", free_ptr)) {
+ GF_FREE (free_ptr);
+ snprintf (msg, sizeof (msg), "Failed to set clear-locks "
+ "result");
+ gf_log (THIS->name, GF_LOG_ERROR, "%s", msg);
+ }
+
+umount:
+ glusterd_clearlocks_unmount (volinfo, mntpt);
+
+ if (glusterd_clearlocks_rmdir_mount (volinfo, mntpt))
+ gf_log (THIS->name, GF_LOG_WARNING, "Couldn't unmount "
+ "clear-locks mount point");
+
+out:
+ if (ret)
+ *op_errstr = gf_strdup (msg);
+
+ if (xl_opts) {
+ for (i = 0; i < volinfo->brick_count && xl_opts[i]; i++)
+ GF_FREE (xl_opts[i]);
+ GF_FREE (xl_opts);
+ }
+
+ GF_FREE (cmd_str);
+
+ GF_FREE (mntpt);
+
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
new file mode 100644
index 000000000..665a8b298
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -0,0 +1,1452 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterd-volgen.h"
+#include "glusterd-utils.h"
+
+static int
+check_dict_key_value (dict_t *dict, char *key, char *value)
+{
+ glusterd_conf_t *priv = NULL;
+ int ret = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ if (!dict) {
+ gf_log (this->name, GF_LOG_ERROR, "Received Empty Dict.");
+ ret = -1;
+ goto out;
+ }
+
+ if (!key) {
+ gf_log (this->name, GF_LOG_ERROR, "Received Empty Key.");
+ ret = -1;
+ goto out;
+ }
+
+ if (!value) {
+ gf_log (this->name, GF_LOG_ERROR, "Received Empty Value.");
+ ret = -1;
+ goto out;
+ }
+
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
+
+ return ret;
+}
+
+static int
+get_volname_volinfo (dict_t *dict, char **volname, glusterd_volinfo_t **volinfo)
+{
+ glusterd_conf_t *priv = NULL;
+ int ret = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = dict_get_str (dict, "volname", volname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (*volname, volinfo);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to allocate memory");
+ goto out;
+ }
+
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
+
+ return ret;
+}
+
+static int
+validate_cache_max_min_size (dict_t *dict, char *key, char *value,
+ char **op_errstr)
+{
+ char *current_max_value = NULL;
+ char *current_min_value = NULL;
+ char errstr[2048] = "";
+ char *volname = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ int ret = 0;
+ uint64_t max_value = 0;
+ uint64_t min_value = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = check_dict_key_value (dict, key, value);
+ if (ret)
+ goto out;
+
+ ret = get_volname_volinfo (dict, &volname, &volinfo);
+ if (ret)
+ goto out;
+
+ if ((!strcmp (key, "performance.cache-min-file-size")) ||
+ (!strcmp (key, "cache-min-file-size"))) {
+ glusterd_volinfo_get (volinfo,
+ "performance.cache-max-file-size",
+ &current_max_value);
+ if (current_max_value) {
+ gf_string2bytesize (current_max_value, &max_value);
+ gf_string2bytesize (value, &min_value);
+ current_min_value = value;
+ }
+ } else if ((!strcmp (key, "performance.cache-max-file-size")) ||
+ (!strcmp (key, "cache-max-file-size"))) {
+ glusterd_volinfo_get (volinfo,
+ "performance.cache-min-file-size",
+ &current_min_value);
+ if (current_min_value) {
+ gf_string2bytesize (current_min_value, &min_value);
+ gf_string2bytesize (value, &max_value);
+ current_max_value = value;
+ }
+ }
+
+ if (min_value > max_value) {
+ snprintf (errstr, sizeof (errstr),
+ "cache-min-file-size (%s) is greater than "
+ "cache-max-file-size (%s)",
+ current_min_value, current_max_value);
+ gf_log (this->name, GF_LOG_ERROR, "%s", errstr);
+ *op_errstr = gf_strdup (errstr);
+ ret = -1;
+ goto out;
+ }
+
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
+
+ return ret;
+}
+
+static int
+validate_quota (dict_t *dict, char *key, char *value,
+ char **op_errstr)
+{
+ char errstr[2048] = "";
+ char *volname = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ int ret = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = check_dict_key_value (dict, key, value);
+ if (ret)
+ goto out;
+
+ ret = get_volname_volinfo (dict, &volname, &volinfo);
+ if (ret)
+ goto out;
+
+ ret = glusterd_volinfo_get_boolean (volinfo, VKEY_FEATURES_QUOTA);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to get the quota status");
+ goto out;
+ }
+
+ if (ret == _gf_false) {
+ snprintf (errstr, sizeof (errstr),
+ "Cannot set %s. Enable quota first.", key);
+ gf_log (this->name, GF_LOG_ERROR, "%s", errstr);
+ *op_errstr = gf_strdup (errstr);
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
+
+ return ret;
+}
+
+static int
+validate_stripe (dict_t *dict, char *key, char *value, char **op_errstr)
+{
+ char errstr[2048] = "";
+ char *volname = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ int ret = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = check_dict_key_value (dict, key, value);
+ if (ret)
+ goto out;
+
+ ret = get_volname_volinfo (dict, &volname, &volinfo);
+ if (ret)
+ goto out;
+
+ if (volinfo->stripe_count == 1) {
+ snprintf (errstr, sizeof (errstr),
+ "Cannot set %s for a non-stripe volume.", key);
+ gf_log (this->name, GF_LOG_ERROR, "%s", errstr);
+ *op_errstr = gf_strdup (errstr);
+ ret = -1;
+ goto out;
+ }
+
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
+
+ return ret;
+}
+
+static int
+validate_subvols_per_directory (dict_t *dict, char *key, char *value,
+ char **op_errstr)
+{
+ char errstr[2048] = "";
+ char *volname = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ int ret = 0;
+ int subvols = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = check_dict_key_value (dict, key, value);
+ if (ret)
+ goto out;
+
+ ret = get_volname_volinfo (dict, &volname, &volinfo);
+ if (ret)
+ goto out;
+
+ subvols = atoi(value);
+
+ /* Checking if the subvols-per-directory exceed the total
+ number of subvolumes. */
+ if (subvols > volinfo->subvol_count) {
+ snprintf (errstr, sizeof(errstr),
+ "subvols-per-directory(%d) is greater "
+ "than the number of subvolumes(%d).",
+ subvols, volinfo->subvol_count);
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s.", errstr);
+ *op_errstr = gf_strdup (errstr);
+ ret = -1;
+ goto out;
+ }
+
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
+
+ return ret;
+}
+
+
+/* dispatch table for VOLUME SET
+ * -----------------------------
+ *
+ * Format of entries:
+ *
+ * First field is the <key>, for the purpose of looking it up
+ * in volume dictionary. Each <key> is of the format "<domain>.<specifier>".
+ *
+ * Second field is <voltype>.
+ *
+ * Third field is <option>, if its unset, it's assumed to be
+ * the same as <specifier>.
+ *
+ * Fourth field is <value>. In this context they are used to specify
+ * a default. That is, even the volume dict doesn't have a value,
+ * we procced as if the default value were set for it.
+ *
+ * Fifth field is <doctype>, which decides if the option is public and available
+ * in "set help" or not. "NO_DOC" entries are not part of the public interface
+ * and are subject to change at any time. This also decides if an option is
+ * global (apllies to all volumes) or normal (applies to only specified volume).
+ *
+ * Sixth field is <flags>.
+ *
+ * Seventh field is <op-version>.
+ *
+ * Eight field is description of option: If NULL, tried to fetch from
+ * translator code's xlator_options table.
+ *
+ * Nineth field is validation function: If NULL, xlator's option specific
+ * validation will be tried, otherwise tried at glusterd code itself.
+ *
+ * There are two type of entries: basic and special.
+ *
+ * - Basic entries are the ones where the <option> does _not_ start with
+ * the bang! character ('!').
+ *
+ * In their case, <option> is understood as an option for an xlator of
+ * type <voltype>. Their effect is to copy over the volinfo->dict[<key>]
+ * value to all graph nodes of type <voltype> (if such a value is set).
+ *
+ * You are free to add entries of this type, they will become functional
+ * just by being present in the table.
+ *
+ * - Special entries where the <option> starts with the bang!.
+ *
+ * They are not applied to all graphs during generation, and you cannot
+ * extend them in a trivial way which could be just picked up. Better
+ * not touch them unless you know what you do.
+ *
+ *
+ * Another kind of grouping for options, according to visibility:
+ *
+ * - Exported: one which is used in the code. These are characterized by
+ * being used a macro as <key> (of the format VKEY_..., defined in
+ * glusterd-volgen.h
+ *
+ * - Non-exported: the rest; these have string literal <keys>.
+ *
+ * Adhering to this policy, option name changes shall be one-liners.
+ *
+ */
+
+struct volopt_map_entry glusterd_volopt_map[] = {
+ /* DHT xlator options */
+ { .key = "cluster.lookup-unhashed",
+ .voltype = "cluster/distribute",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.min-free-disk",
+ .voltype = "cluster/distribute",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.min-free-inodes",
+ .voltype = "cluster/distribute",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.rebalance-stats",
+ .voltype = "cluster/distribute",
+ .op_version = 2,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.subvols-per-directory",
+ .voltype = "cluster/distribute",
+ .option = "directory-layout-spread",
+ .op_version = 2,
+ .validate_fn = validate_subvols_per_directory,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.readdir-optimize",
+ .voltype = "cluster/distribute",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.rsync-hash-regex",
+ .voltype = "cluster/distribute",
+ .type = NO_DOC,
+ .op_version = 3,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.extra-hash-regex",
+ .voltype = "cluster/distribute",
+ .type = NO_DOC,
+ .op_version = 3,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.dht-xattr-name",
+ .voltype = "cluster/distribute",
+ .option = "xattr-name",
+ .type = NO_DOC,
+ .op_version = 3,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+
+ /* NUFA xlator options (Distribute special case) */
+ { .key = "cluster.nufa",
+ .voltype = "cluster/distribute",
+ .option = "!nufa",
+ .type = NO_DOC,
+ .op_version = 2,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.local-volume-name",
+ .voltype = "cluster/nufa",
+ .option = "local-volume-name",
+ .type = NO_DOC,
+ .op_version = 3,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+
+ /* Switch xlator options (Distribute special case) */
+ { .key = "cluster.switch",
+ .voltype = "cluster/distribute",
+ .option = "!switch",
+ .type = NO_DOC,
+ .op_version = 3,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.switch-pattern",
+ .voltype = "cluster/switch",
+ .option = "pattern.switch.case",
+ .type = NO_DOC,
+ .op_version = 3,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+
+ /* AFR xlator options */
+ { .key = "cluster.entry-change-log",
+ .voltype = "cluster/replicate",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.read-subvolume",
+ .voltype = "cluster/replicate",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.read-subvolume-index",
+ .voltype = "cluster/replicate",
+ .op_version = 2,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.read-hash-mode",
+ .voltype = "cluster/replicate",
+ .op_version = 2,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.background-self-heal-count",
+ .voltype = "cluster/replicate",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.metadata-self-heal",
+ .voltype = "cluster/replicate",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.data-self-heal",
+ .voltype = "cluster/replicate",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.entry-self-heal",
+ .voltype = "cluster/replicate",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.self-heal-daemon",
+ .voltype = "cluster/replicate",
+ .option = "!self-heal-daemon",
+ .op_version = 1
+ },
+ { .key = "cluster.heal-timeout",
+ .voltype = "cluster/replicate",
+ .option = "!heal-timeout",
+ .op_version = 2,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.strict-readdir",
+ .voltype = "cluster/replicate",
+ .type = NO_DOC,
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.self-heal-window-size",
+ .voltype = "cluster/replicate",
+ .option = "data-self-heal-window-size",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.data-change-log",
+ .voltype = "cluster/replicate",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.metadata-change-log",
+ .voltype = "cluster/replicate",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.data-self-heal-algorithm",
+ .voltype = "cluster/replicate",
+ .option = "data-self-heal-algorithm",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.eager-lock",
+ .voltype = "cluster/replicate",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.quorum-type",
+ .voltype = "cluster/replicate",
+ .option = "quorum-type",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.quorum-count",
+ .voltype = "cluster/replicate",
+ .option = "quorum-count",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.choose-local",
+ .voltype = "cluster/replicate",
+ .op_version = 2,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.self-heal-readdir-size",
+ .voltype = "cluster/replicate",
+ .op_version = 2,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.post-op-delay-secs",
+ .voltype = "cluster/replicate",
+ .type = NO_DOC,
+ .op_version = 2,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.readdir-failover",
+ .voltype = "cluster/replicate",
+ .op_version = 2,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.ensure-durability",
+ .voltype = "cluster/replicate",
+ .op_version = 3,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+
+ /* Stripe xlator options */
+ { .key = "cluster.stripe-block-size",
+ .voltype = "cluster/stripe",
+ .option = "block-size",
+ .op_version = 1,
+ .validate_fn = validate_stripe,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.stripe-coalesce",
+ .voltype = "cluster/stripe",
+ .option = "coalesce",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+
+ /* IO-stats xlator options */
+ { .key = VKEY_DIAG_LAT_MEASUREMENT,
+ .voltype = "debug/io-stats",
+ .option = "latency-measurement",
+ .value = "off",
+ .op_version = 1
+ },
+ { .key = "diagnostics.dump-fd-stats",
+ .voltype = "debug/io-stats",
+ .op_version = 1
+ },
+ { .key = VKEY_DIAG_CNT_FOP_HITS,
+ .voltype = "debug/io-stats",
+ .option = "count-fop-hits",
+ .value = "off",
+ .type = NO_DOC,
+ .op_version = 1
+ },
+ { .key = "diagnostics.brick-log-level",
+ .voltype = "debug/io-stats",
+ .option = "!brick-log-level",
+ .op_version = 1
+ },
+ { .key = "diagnostics.client-log-level",
+ .voltype = "debug/io-stats",
+ .option = "!client-log-level",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "diagnostics.brick-sys-log-level",
+ .voltype = "debug/io-stats",
+ .option = "!sys-log-level",
+ .op_version = 1
+ },
+ { .key = "diagnostics.client-sys-log-level",
+ .voltype = "debug/io-stats",
+ .option = "!sys-log-level",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+
+ /* IO-cache xlator options */
+ { .key = "performance.cache-max-file-size",
+ .voltype = "performance/io-cache",
+ .option = "max-file-size",
+ .op_version = 1,
+ .validate_fn = validate_cache_max_min_size,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "performance.cache-min-file-size",
+ .voltype = "performance/io-cache",
+ .option = "min-file-size",
+ .op_version = 1,
+ .validate_fn = validate_cache_max_min_size,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "performance.cache-refresh-timeout",
+ .voltype = "performance/io-cache",
+ .option = "cache-timeout",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "performance.cache-priority",
+ .voltype = "performance/io-cache",
+ .option = "priority",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "performance.cache-size",
+ .voltype = "performance/io-cache",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+
+ /* IO-threads xlator options */
+ { .key = "performance.io-thread-count",
+ .voltype = "performance/io-threads",
+ .option = "thread-count",
+ .op_version = 1
+ },
+ { .key = "performance.high-prio-threads",
+ .voltype = "performance/io-threads",
+ .op_version = 1
+ },
+ { .key = "performance.normal-prio-threads",
+ .voltype = "performance/io-threads",
+ .op_version = 1
+ },
+ { .key = "performance.low-prio-threads",
+ .voltype = "performance/io-threads",
+ .op_version = 1
+ },
+ { .key = "performance.least-prio-threads",
+ .voltype = "performance/io-threads",
+ .op_version = 1
+ },
+ { .key = "performance.enable-least-priority",
+ .voltype = "performance/io-threads",
+ .op_version = 1
+ },
+ { .key = "performance.least-rate-limit",
+ .voltype = "performance/io-threads",
+ .op_version = 2
+ },
+
+ /* Other perf xlators' options */
+ { .key = "performance.cache-size",
+ .voltype = "performance/quick-read",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "performance.flush-behind",
+ .voltype = "performance/write-behind",
+ .option = "flush-behind",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "performance.write-behind-window-size",
+ .voltype = "performance/write-behind",
+ .option = "cache-size",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "performance.strict-o-direct",
+ .voltype = "performance/write-behind",
+ .option = "strict-O_DIRECT",
+ .op_version = 2,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "performance.strict-write-ordering",
+ .voltype = "performance/write-behind",
+ .option = "strict-write-ordering",
+ .op_version = 2,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "performance.lazy-open",
+ .voltype = "performance/open-behind",
+ .option = "lazy-open",
+ .op_version = 3,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "performance.read-ahead-page-count",
+ .voltype = "performance/read-ahead",
+ .option = "page-count",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "performance.md-cache-timeout",
+ .voltype = "performance/md-cache",
+ .option = "md-cache-timeout",
+ .op_version = 2,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+
+ /* Crypt xlator options */
+
+ { .key = "features.encryption",
+ .voltype = "encryption/crypt",
+ .option = "!feat",
+ .value = "off",
+ .op_version = 3,
+ .description = "enable/disable client-side encryption for "
+ "the volume.",
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT
+ },
+
+ { .key = "encryption.master-key",
+ .voltype = "encryption/crypt",
+ .op_version = 3,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "encryption.data-key-size",
+ .voltype = "encryption/crypt",
+ .op_version = 3,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "encryption.block-size",
+ .voltype = "encryption/crypt",
+ .op_version = 3,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+
+ /* Client xlator options */
+ { .key = "network.frame-timeout",
+ .voltype = "protocol/client",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "network.ping-timeout",
+ .voltype = "protocol/client",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "network.tcp-window-size",
+ .voltype = "protocol/client",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "features.lock-heal",
+ .voltype = "protocol/client",
+ .option = "lk-heal",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "features.grace-timeout",
+ .voltype = "protocol/client",
+ .option = "grace-timeout",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "client.ssl",
+ .voltype = "protocol/client",
+ .option = "transport.socket.ssl-enabled",
+ .type = NO_DOC,
+ .op_version = 2,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "network.remote-dio",
+ .voltype = "protocol/client",
+ .option = "filter-O_DIRECT",
+ .op_version = 2,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+
+ /* Server xlator options */
+ { .key = "network.tcp-window-size",
+ .voltype = "protocol/server",
+ .op_version = 1
+ },
+ { .key = "network.inode-lru-limit",
+ .voltype = "protocol/server",
+ .op_version = 1
+ },
+ { .key = AUTH_ALLOW_MAP_KEY,
+ .voltype = "protocol/server",
+ .option = "!server-auth",
+ .value = "*",
+ .op_version = 1
+ },
+ { .key = AUTH_REJECT_MAP_KEY,
+ .voltype = "protocol/server",
+ .option = "!server-auth",
+ .op_version = 1
+ },
+ { .key = "transport.keepalive",
+ .voltype = "protocol/server",
+ .option = "transport.socket.keepalive",
+ .type = NO_DOC,
+ .op_version = 1
+ },
+ { .key = "server.allow-insecure",
+ .voltype = "protocol/server",
+ .option = "rpc-auth-allow-insecure",
+ .type = NO_DOC,
+ .op_version = 1
+ },
+ { .key = "server.root-squash",
+ .voltype = "protocol/server",
+ .option = "root-squash",
+ .op_version = 2
+ },
+ { .key = "server.statedump-path",
+ .voltype = "protocol/server",
+ .option = "statedump-path",
+ .op_version = 1
+ },
+ { .key = "server.outstanding-rpc-limit",
+ .voltype = "protocol/server",
+ .option = "rpc.outstanding-rpc-limit",
+ .type = GLOBAL_DOC,
+ .op_version = 3
+ },
+ { .key = "features.lock-heal",
+ .voltype = "protocol/server",
+ .option = "lk-heal",
+ .type = NO_DOC,
+ .op_version = 1
+ },
+ { .key = "features.grace-timeout",
+ .voltype = "protocol/server",
+ .option = "grace-timeout",
+ .type = NO_DOC,
+ .op_version = 1
+ },
+ { .key = "server.ssl",
+ .voltype = "protocol/server",
+ .option = "transport.socket.ssl-enabled",
+ .type = NO_DOC,
+ .op_version = 2
+ },
+
+ /* Performance xlators enable/disbable options */
+ { .key = "performance.write-behind",
+ .voltype = "performance/write-behind",
+ .option = "!perf",
+ .value = "on",
+ .op_version = 1,
+ .description = "enable/disable write-behind translator in the "
+ "volume.",
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT
+ },
+ { .key = "performance.read-ahead",
+ .voltype = "performance/read-ahead",
+ .option = "!perf",
+ .value = "on",
+ .op_version = 1,
+ .description = "enable/disable read-ahead translator in the volume.",
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT
+ },
+ { .key = "performance.readdir-ahead",
+ .voltype = "performance/readdir-ahead",
+ .option = "!perf",
+ .value = "off",
+ .op_version = 3,
+ .description = "enable/disable readdir-ahead translator in the volume.",
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT
+ },
+
+ { .key = "performance.io-cache",
+ .voltype = "performance/io-cache",
+ .option = "!perf",
+ .value = "on",
+ .op_version = 1,
+ .description = "enable/disable io-cache translator in the volume.",
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "performance.quick-read",
+ .voltype = "performance/quick-read",
+ .option = "!perf",
+ .value = "on",
+ .op_version = 1,
+ .description = "enable/disable quick-read translator in the volume.",
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT
+
+ },
+ { .key = "performance.open-behind",
+ .voltype = "performance/open-behind",
+ .option = "!perf",
+ .value = "on",
+ .op_version = 2,
+ .description = "enable/disable open-behind translator in the volume.",
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT
+
+ },
+ { .key = "performance.stat-prefetch",
+ .voltype = "performance/md-cache",
+ .option = "!perf",
+ .value = "on",
+ .op_version = 1,
+ .description = "enable/disable meta-data caching translator in the "
+ "volume.",
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT
+ },
+ { .key = "performance.client-io-threads",
+ .voltype = "performance/io-threads",
+ .option = "!perf",
+ .value = "off",
+ .op_version = 1,
+ .description = "enable/disable io-threads translator in the client "
+ "graph of volume.",
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT
+ },
+ { .key = "performance.nfs.write-behind",
+ .voltype = "performance/write-behind",
+ .option = "!nfsperf",
+ .value = "on",
+ .type = NO_DOC,
+ .op_version = 1,
+ .flags = OPT_FLAG_XLATOR_OPT
+ },
+ { .key = "performance.nfs.read-ahead",
+ .voltype = "performance/read-ahead",
+ .option = "!nfsperf",
+ .value = "off",
+ .type = NO_DOC,
+ .op_version = 1,
+ .flags = OPT_FLAG_XLATOR_OPT
+ },
+ { .key = "performance.nfs.io-cache",
+ .voltype = "performance/io-cache",
+ .option = "!nfsperf",
+ .value = "off",
+ .type = NO_DOC,
+ .op_version = 1,
+ .flags = OPT_FLAG_XLATOR_OPT
+ },
+ { .key = "performance.nfs.quick-read",
+ .voltype = "performance/quick-read",
+ .option = "!nfsperf",
+ .value = "off",
+ .type = NO_DOC,
+ .op_version = 1,
+ .flags = OPT_FLAG_XLATOR_OPT
+ },
+ { .key = "performance.nfs.stat-prefetch",
+ .voltype = "performance/md-cache",
+ .option = "!nfsperf",
+ .value = "off",
+ .type = NO_DOC,
+ .op_version = 1,
+ .flags = OPT_FLAG_XLATOR_OPT
+ },
+ { .key = "performance.nfs.io-threads",
+ .voltype = "performance/io-threads",
+ .option = "!nfsperf",
+ .value = "off",
+ .type = NO_DOC,
+ .op_version = 1,
+ .flags = OPT_FLAG_XLATOR_OPT
+ },
+ { .key = "performance.force-readdirp",
+ .voltype = "performance/md-cache",
+ .option = "force-readdirp",
+ .op_version = 2,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+
+ /* Feature translators */
+ { .key = "features.file-snapshot",
+ .voltype = "features/qemu-block",
+ .option = "!feat",
+ .value = "off",
+ .op_version = 3,
+ .description = "enable/disable file-snapshot feature in the "
+ "volume.",
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT
+ },
+
+#ifdef HAVE_LIB_Z
+ /* Compressor-decompressor xlator options
+ * defaults used from xlator/feature/compress/src/cdc.h
+ */
+ { .key = "features.compress",
+ .voltype = "features/cdc",
+ .option = "!compress",
+ .value = "off",
+ .type = NO_DOC,
+ .op_version = 2,
+ .description = "enable/disable compression translator"
+ },
+ { .key = "compress.mode",
+ .voltype = "features/cdc",
+ .type = NO_DOC,
+ .op_version = 2
+ },
+ { .key = "compress.window-size",
+ .voltype = "features/cdc",
+ .type = NO_DOC,
+ .op_version = 2
+ },
+ { .key = "compress.mem-level",
+ .voltype = "features/cdc",
+ .type = NO_DOC,
+ .op_version = 2
+ },
+ { .key = "compress.min-size",
+ .voltype = "features/cdc",
+ .type = NO_DOC,
+ .op_version = 2
+ },
+ { .key = "compress.compression-level",
+ .voltype = "features/cdc",
+ .type = NO_DOC,
+ .op_version = 2
+ },
+ { .key = "compress.debug",
+ .voltype = "features/cdc",
+ .type = NO_DOC,
+ .op_version = 2
+ },
+ #endif
+
+ /* Quota xlator options */
+ { .key = VKEY_FEATURES_LIMIT_USAGE,
+ .voltype = "features/quota",
+ .option = "limit-set",
+ .type = NO_DOC,
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "features.quota-timeout",
+ .voltype = "features/quota",
+ .option = "timeout",
+ .value = "0",
+ .op_version = 1,
+ .validate_fn = validate_quota,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "features.quota-deem-statfs",
+ .voltype = "features/quota",
+ .option = "deem-statfs",
+ .value = "off",
+ .type = DOC,
+ .op_version = 3,
+ .validate_fn = validate_quota,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+
+ /* Marker xlator options */
+ { .key = VKEY_MARKER_XTIME,
+ .voltype = "features/marker",
+ .option = "xtime",
+ .value = "off",
+ .type = NO_DOC,
+ .flags = OPT_FLAG_FORCE,
+ .op_version = 1
+ },
+ { .key = VKEY_MARKER_XTIME,
+ .voltype = "features/marker",
+ .option = "!xtime",
+ .value = "off",
+ .type = NO_DOC,
+ .flags = OPT_FLAG_FORCE,
+ .op_version = 1
+ },
+ { .key = VKEY_MARKER_XTIME_FORCE,
+ .voltype = "features/marker",
+ .option = "gsync-force-xtime",
+ .value = "off",
+ .type = NO_DOC,
+ .flags = OPT_FLAG_FORCE,
+ .op_version = 2
+ },
+ { .key = VKEY_MARKER_XTIME_FORCE,
+ .voltype = "features/marker",
+ .option = "!gsync-force-xtime",
+ .value = "off",
+ .type = NO_DOC,
+ .flags = OPT_FLAG_FORCE,
+ .op_version = 2
+ },
+ { .key = VKEY_FEATURES_QUOTA,
+ .voltype = "features/marker",
+ .option = "quota",
+ .value = "off",
+ .type = NO_DOC,
+ .flags = OPT_FLAG_FORCE,
+ .op_version = 1
+ },
+
+ /* Debug xlators options */
+ { .key = "debug.trace",
+ .voltype = "debug/trace",
+ .option = "!debug",
+ .value = "off",
+ .type = NO_DOC,
+ .op_version = 1,
+ .flags = OPT_FLAG_XLATOR_OPT
+ },
+ { .key = "debug.log-history",
+ .voltype = "debug/trace",
+ .option = "log-history",
+ .type = NO_DOC,
+ .op_version = 2
+ },
+ { .key = "debug.log-file",
+ .voltype = "debug/trace",
+ .option = "log-file",
+ .type = NO_DOC,
+ .op_version = 2
+ },
+ { .key = "debug.exclude-ops",
+ .voltype = "debug/trace",
+ .option = "exclude-ops",
+ .type = NO_DOC,
+ .op_version = 2
+ },
+ { .key = "debug.include-ops",
+ .voltype = "debug/trace",
+ .option = "include-ops",
+ .type = NO_DOC,
+ .op_version = 2
+ },
+ { .key = "debug.error-gen",
+ .voltype = "debug/error-gen",
+ .option = "!debug",
+ .value = "off",
+ .type = NO_DOC,
+ .op_version = 1,
+ .flags = OPT_FLAG_XLATOR_OPT
+ },
+ { .key = "debug.error-failure",
+ .voltype = "debug/error-gen",
+ .option = "failure",
+ .type = NO_DOC,
+ .op_version = 3
+ },
+ { .key = "debug.error-number",
+ .voltype = "debug/error-gen",
+ .option = "error-no",
+ .type = NO_DOC,
+ .op_version = 3
+ },
+ { .key = "debug.random-failure",
+ .voltype = "debug/error-gen",
+ .option = "random-failure",
+ .type = NO_DOC,
+ .op_version = 3
+ },
+ { .key = "debug.error-fops",
+ .voltype = "debug/error-gen",
+ .option = "enable",
+ .type = NO_DOC,
+ .op_version = 3
+ },
+
+
+ /* NFS xlator options */
+ { .key = "nfs.enable-ino32",
+ .voltype = "nfs/server",
+ .option = "nfs.enable-ino32",
+ .type = GLOBAL_DOC,
+ .op_version = 1
+ },
+ { .key = "nfs.mem-factor",
+ .voltype = "nfs/server",
+ .option = "nfs.mem-factor",
+ .type = GLOBAL_DOC,
+ .op_version = 1
+ },
+ { .key = "nfs.export-dirs",
+ .voltype = "nfs/server",
+ .option = "nfs3.export-dirs",
+ .type = GLOBAL_DOC,
+ .op_version = 1
+ },
+ { .key = "nfs.export-volumes",
+ .voltype = "nfs/server",
+ .option = "nfs3.export-volumes",
+ .type = GLOBAL_DOC,
+ .op_version = 1
+ },
+ { .key = "nfs.addr-namelookup",
+ .voltype = "nfs/server",
+ .option = "rpc-auth.addr.namelookup",
+ .type = GLOBAL_DOC,
+ .op_version = 1
+ },
+ { .key = "nfs.dynamic-volumes",
+ .voltype = "nfs/server",
+ .option = "nfs.dynamic-volumes",
+ .type = GLOBAL_DOC,
+ .op_version = 1
+ },
+ { .key = "nfs.register-with-portmap",
+ .voltype = "nfs/server",
+ .option = "rpc.register-with-portmap",
+ .type = GLOBAL_DOC,
+ .op_version = 1
+ },
+ { .key = "nfs.outstanding-rpc-limit",
+ .voltype = "nfs/server",
+ .option = "rpc.outstanding-rpc-limit",
+ .type = GLOBAL_DOC,
+ .op_version = 3
+ },
+ { .key = "nfs.port",
+ .voltype = "nfs/server",
+ .option = "nfs.port",
+ .type = GLOBAL_DOC,
+ .op_version = 1
+ },
+ { .key = "nfs.rpc-auth-unix",
+ .voltype = "nfs/server",
+ .option = "!rpc-auth.auth-unix.*",
+ .op_version = 1
+ },
+ { .key = "nfs.rpc-auth-null",
+ .voltype = "nfs/server",
+ .option = "!rpc-auth.auth-null.*",
+ .op_version = 1
+ },
+ { .key = "nfs.rpc-auth-allow",
+ .voltype = "nfs/server",
+ .option = "!rpc-auth.addr.*.allow",
+ .op_version = 1
+ },
+ { .key = "nfs.rpc-auth-reject",
+ .voltype = "nfs/server",
+ .option = "!rpc-auth.addr.*.reject",
+ .op_version = 1
+ },
+ { .key = "nfs.ports-insecure",
+ .voltype = "nfs/server",
+ .option = "!rpc-auth.ports.*.insecure",
+ .op_version = 1
+ },
+ { .key = "nfs.transport-type",
+ .voltype = "nfs/server",
+ .option = "!nfs.transport-type",
+ .value = "tcp",
+ .op_version = 1,
+ .description = "Specifies the nfs transport type. Valid "
+ "transport types are 'tcp' and 'rdma'."
+ },
+ { .key = "nfs.trusted-sync",
+ .voltype = "nfs/server",
+ .option = "!nfs3.*.trusted-sync",
+ .op_version = 1
+ },
+ { .key = "nfs.trusted-write",
+ .voltype = "nfs/server",
+ .option = "!nfs3.*.trusted-write",
+ .op_version = 1
+ },
+ { .key = "nfs.volume-access",
+ .voltype = "nfs/server",
+ .option = "!nfs3.*.volume-access",
+ .op_version = 1
+ },
+ { .key = "nfs.export-dir",
+ .voltype = "nfs/server",
+ .option = "!nfs3.*.export-dir",
+ .op_version = 1
+ },
+ { .key = NFS_DISABLE_MAP_KEY,
+ .voltype = "nfs/server",
+ .option = "!nfs-disable",
+ .op_version = 1
+ },
+ { .key = "nfs.nlm",
+ .voltype = "nfs/server",
+ .option = "nfs.nlm",
+ .type = GLOBAL_DOC,
+ .op_version = 1
+ },
+ { .key = "nfs.acl",
+ .voltype = "nfs/server",
+ .option = "nfs.acl",
+ .type = GLOBAL_DOC,
+ .op_version = 3
+ },
+ { .key = "nfs.mount-udp",
+ .voltype = "nfs/server",
+ .option = "nfs.mount-udp",
+ .type = GLOBAL_DOC,
+ .op_version = 1
+ },
+ { .key = "nfs.mount-rmtab",
+ .voltype = "nfs/server",
+ .option = "nfs.mount-rmtab",
+ .type = GLOBAL_DOC,
+ .op_version = 1
+ },
+ { .key = "nfs.server-aux-gids",
+ .voltype = "nfs/server",
+ .option = "nfs.server-aux-gids",
+ .type = NO_DOC,
+ .op_version = 2
+ },
+ { .key = "nfs.drc",
+ .voltype = "nfs/server",
+ .option = "nfs.drc",
+ .type = GLOBAL_DOC,
+ .op_version = 3
+ },
+ { .key = "nfs.drc-size",
+ .voltype = "nfs/server",
+ .option = "nfs.drc-size",
+ .type = GLOBAL_DOC,
+ .op_version = 3
+ },
+ { .key = "nfs.read-size",
+ .voltype = "nfs/server",
+ .option = "nfs3.read-size",
+ .type = GLOBAL_DOC,
+ .op_version = 3
+ },
+ { .key = "nfs.write-size",
+ .voltype = "nfs/server",
+ .option = "nfs3.write-size",
+ .type = GLOBAL_DOC,
+ .op_version = 3
+ },
+ { .key = "nfs.readdir-size",
+ .voltype = "nfs/server",
+ .option = "nfs3.readdir-size",
+ .type = GLOBAL_DOC,
+ .op_version = 3
+ },
+
+ /* Other options which don't fit any place above */
+ { .key = "features.read-only",
+ .voltype = "features/read-only",
+ .option = "!read-only",
+ .value = "off",
+ .op_version = 1,
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT
+ },
+ { .key = "features.worm",
+ .voltype = "features/worm",
+ .option = "!worm",
+ .value = "off",
+ .op_version = 2,
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT
+ },
+ { .key = "storage.linux-aio",
+ .voltype = "storage/posix",
+ .op_version = 1
+ },
+ { .key = "storage.batch-fsync-mode",
+ .voltype = "storage/posix",
+ .op_version = 3
+ },
+ { .key = "storage.batch-fsync-delay-usec",
+ .voltype = "storage/posix",
+ .op_version = 3
+ },
+ { .key = "storage.owner-uid",
+ .voltype = "storage/posix",
+ .option = "brick-uid",
+ .op_version = 1
+ },
+ { .key = "storage.owner-gid",
+ .voltype = "storage/posix",
+ .option = "brick-gid",
+ .op_version = 1
+ },
+ { .key = "storage.node-uuid-pathinfo",
+ .voltype = "storage/posix",
+ .op_version = 3
+ },
+ { .key = "storage.health-check-interval",
+ .voltype = "storage/posix",
+ .op_version = 3
+ },
+ { .key = "storage.bd-aio",
+ .voltype = "storage/bd",
+ .op_version = 3
+ },
+ { .key = "config.memory-accounting",
+ .voltype = "configuration",
+ .option = "!config",
+ .op_version = 2,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "config.transport",
+ .voltype = "configuration",
+ .option = "!config",
+ .op_version = 2
+ },
+ { .key = GLUSTERD_QUORUM_TYPE_KEY,
+ .voltype = "mgmt/glusterd",
+ .value = "off",
+ .op_version = 2
+ },
+ { .key = GLUSTERD_QUORUM_RATIO_KEY,
+ .voltype = "mgmt/glusterd",
+ .value = "0",
+ .op_version = 2
+ },
+ /* changelog translator - global tunables */
+ { .key = "changelog.changelog",
+ .voltype = "features/changelog",
+ .type = NO_DOC,
+ .op_version = 2
+ },
+ { .key = "changelog.changelog-dir",
+ .voltype = "features/changelog",
+ .type = NO_DOC,
+ .op_version = 2
+ },
+ { .key = "changelog.encoding",
+ .voltype = "features/changelog",
+ .type = NO_DOC,
+ .op_version = 2
+ },
+ { .key = "changelog.rollover-time",
+ .voltype = "features/changelog",
+ .type = NO_DOC,
+ .op_version = 2
+ },
+ { .key = "changelog.fsync-interval",
+ .voltype = "features/changelog",
+ .type = NO_DOC,
+ .op_version = 2
+ },
+ { .key = NULL
+ }
+};
diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c
new file mode 100644
index 000000000..59288ada0
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd.c
@@ -0,0 +1,1597 @@
+/*
+ Copyright (c) 2006-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+#include <time.h>
+#include <grp.h>
+#include <sys/uio.h>
+#include <sys/resource.h>
+
+#include <libgen.h>
+#include "uuid.h"
+
+#include "glusterd.h"
+#include "rpcsvc.h"
+#include "fnmatch.h"
+#include "xlator.h"
+#include "call-stub.h"
+#include "defaults.h"
+#include "list.h"
+#include "dict.h"
+#include "compat.h"
+#include "compat-errno.h"
+#include "statedump.h"
+#include "glusterd-sm.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-store.h"
+#include "glusterd-hooks.h"
+#include "glusterd-utils.h"
+#include "glusterd-locks.h"
+#include "common-utils.h"
+#include "run.h"
+
+#include "syncop.h"
+
+#include "glusterd-mountbroker.h"
+
+extern struct rpcsvc_program gluster_handshake_prog;
+extern struct rpcsvc_program gluster_cli_getspec_prog;
+extern struct rpcsvc_program gluster_pmap_prog;
+extern glusterd_op_info_t opinfo;
+extern struct rpcsvc_program gd_svc_mgmt_prog;
+extern struct rpcsvc_program gd_svc_mgmt_v3_prog;
+extern struct rpcsvc_program gd_svc_peer_prog;
+extern struct rpcsvc_program gd_svc_cli_prog;
+extern struct rpcsvc_program gd_svc_cli_prog_ro;
+extern struct rpc_clnt_program gd_brick_prog;
+extern struct rpcsvc_program glusterd_mgmt_hndsk_prog;
+
+extern char snap_mount_folder[PATH_MAX];
+
+rpcsvc_cbk_program_t glusterd_cbk_prog = {
+ .progname = "Gluster Callback",
+ .prognum = GLUSTER_CBK_PROGRAM,
+ .progver = GLUSTER_CBK_VERSION,
+};
+
+struct rpcsvc_program *gd_inet_programs[] = {
+ &gd_svc_peer_prog,
+ &gd_svc_cli_prog_ro,
+ &gd_svc_mgmt_prog,
+ &gd_svc_mgmt_v3_prog,
+ &gluster_pmap_prog,
+ &gluster_handshake_prog,
+ &glusterd_mgmt_hndsk_prog,
+};
+int gd_inet_programs_count = (sizeof (gd_inet_programs) /
+ sizeof (gd_inet_programs[0]));
+
+struct rpcsvc_program *gd_uds_programs[] = {
+ &gd_svc_cli_prog,
+ &gluster_cli_getspec_prog,
+};
+int gd_uds_programs_count = (sizeof (gd_uds_programs) /
+ sizeof (gd_uds_programs[0]));
+
+const char *gd_op_list[GD_OP_MAX + 1] = {
+ [GD_OP_NONE] = "Invalid op",
+ [GD_OP_CREATE_VOLUME] = "Create",
+ [GD_OP_START_BRICK] = "Start Brick",
+ [GD_OP_STOP_BRICK] = "Stop Brick",
+ [GD_OP_DELETE_VOLUME] = "Delete",
+ [GD_OP_START_VOLUME] = "Start",
+ [GD_OP_STOP_VOLUME] = "Stop",
+ [GD_OP_DEFRAG_VOLUME] = "Rebalance",
+ [GD_OP_ADD_BRICK] = "Add brick",
+ [GD_OP_REMOVE_BRICK] = "Remove brick",
+ [GD_OP_REPLACE_BRICK] = "Replace brick",
+ [GD_OP_SET_VOLUME] = "Set",
+ [GD_OP_RESET_VOLUME] = "Reset",
+ [GD_OP_SYNC_VOLUME] = "Sync",
+ [GD_OP_LOG_ROTATE] = "Log rotate",
+ [GD_OP_GSYNC_SET] = "Geo-replication",
+ [GD_OP_PROFILE_VOLUME] = "Profile",
+ [GD_OP_QUOTA] = "Quota",
+ [GD_OP_STATUS_VOLUME] = "Status",
+ [GD_OP_REBALANCE] = "Rebalance",
+ [GD_OP_HEAL_VOLUME] = "Heal",
+ [GD_OP_STATEDUMP_VOLUME] = "Statedump",
+ [GD_OP_LIST_VOLUME] = "Lists",
+ [GD_OP_CLEARLOCKS_VOLUME] = "Clear locks",
+ [GD_OP_DEFRAG_BRICK_VOLUME] = "Rebalance",
+ [GD_OP_COPY_FILE] = "Copy File",
+ [GD_OP_SYS_EXEC] = "Execute system commands",
+ [GD_OP_GSYNC_CREATE] = "Geo-replication Create",
+ [GD_OP_SNAP] = "Snapshot",
+ [GD_OP_MAX] = "Invalid op"
+};
+
+static int
+glusterd_opinfo_init ()
+{
+ int32_t ret = -1;
+
+ opinfo.op = GD_OP_NONE;
+
+ return ret;
+}
+
+
+int
+glusterd_uuid_init ()
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+
+ ret = glusterd_retrieve_uuid ();
+ if (ret == 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "retrieved UUID: %s", uuid_utoa (priv->uuid));
+ return 0;
+ }
+
+ ret = glusterd_uuid_generate_save ();
+
+ if (ret) {
+ gf_log ("glusterd", GF_LOG_ERROR,
+ "Unable to generate and save new UUID");
+ return ret;
+ }
+
+ return 0;
+}
+
+int
+glusterd_uuid_generate_save ()
+{
+ int ret = -1;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ uuid_generate (priv->uuid);
+
+ gf_log (this->name, GF_LOG_INFO, "generated UUID: %s",
+ uuid_utoa (priv->uuid));
+
+ ret = glusterd_store_global_info (this);
+
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to store the generated uuid %s",
+ uuid_utoa (priv->uuid));
+
+ return ret;
+}
+
+int
+glusterd_options_init (xlator_t *this)
+{
+ int ret = -1;
+ glusterd_conf_t *priv = NULL;
+ char *initial_version = "0";
+
+ priv = this->private;
+
+ priv->opts = dict_new ();
+ if (!priv->opts)
+ goto out;
+
+ ret = glusterd_store_retrieve_options (this);
+ if (ret == 0)
+ goto out;
+
+ ret = dict_set_str (priv->opts, GLUSTERD_GLOBAL_OPT_VERSION,
+ initial_version);
+ if (ret)
+ goto out;
+ ret = glusterd_store_options (this, priv->opts);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Unable to store version");
+ return ret;
+ }
+out:
+
+ return 0;
+}
+int
+glusterd_fetchspec_notify (xlator_t *this)
+{
+ int ret = -1;
+ glusterd_conf_t *priv = NULL;
+ rpc_transport_t *trans = NULL;
+
+ priv = this->private;
+
+ pthread_mutex_lock (&priv->xprt_lock);
+ {
+ list_for_each_entry (trans, &priv->xprt_list, list) {
+ rpcsvc_callback_submit (priv->rpc, trans,
+ &glusterd_cbk_prog,
+ GF_CBK_FETCHSPEC, NULL, 0);
+ }
+ }
+ pthread_mutex_unlock (&priv->xprt_lock);
+
+ ret = 0;
+
+ return ret;
+}
+
+int
+glusterd_priv (xlator_t *this)
+{
+ return 0;
+}
+
+
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_gld_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
+ " failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+int
+glusterd_rpcsvc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event,
+ void *data)
+{
+ xlator_t *this = NULL;
+ rpc_transport_t *xprt = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ if (!xl || !data) {
+ gf_log ("glusterd", GF_LOG_WARNING,
+ "Calling rpc_notify without initializing");
+ goto out;
+ }
+
+ this = xl;
+ xprt = data;
+
+ priv = this->private;
+
+ switch (event) {
+ case RPCSVC_EVENT_ACCEPT:
+ {
+ INIT_LIST_HEAD (&xprt->list);
+
+ pthread_mutex_lock (&priv->xprt_lock);
+ list_add_tail (&xprt->list, &priv->xprt_list);
+ pthread_mutex_unlock (&priv->xprt_lock);
+ break;
+ }
+ case RPCSVC_EVENT_DISCONNECT:
+ {
+ pthread_mutex_lock (&priv->xprt_lock);
+ list_del (&xprt->list);
+ pthread_mutex_unlock (&priv->xprt_lock);
+ pmap_registry_remove (this, 0, NULL, GF_PMAP_PORT_NONE, xprt);
+ break;
+ }
+
+ default:
+ break;
+ }
+
+out:
+ return 0;
+}
+
+
+inline int32_t
+glusterd_program_register (xlator_t *this, rpcsvc_t *svc,
+ rpcsvc_program_t *prog)
+{
+ int32_t ret = -1;
+
+ ret = rpcsvc_program_register (svc, prog);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "cannot register program (name: %s, prognum:%d, "
+ "progver:%d)", prog->progname, prog->prognum,
+ prog->progver);
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+int
+glusterd_rpcsvc_options_build (dict_t *options)
+{
+ int ret = 0;
+ uint32_t backlog = 0;
+
+ ret = dict_get_uint32 (options, "transport.socket.listen-backlog",
+ &backlog);
+
+ if (ret) {
+ backlog = GLUSTERD_SOCKET_LISTEN_BACKLOG;
+ ret = dict_set_uint32 (options,
+ "transport.socket.listen-backlog",
+ backlog);
+ if (ret)
+ goto out;
+ }
+
+ gf_log ("", GF_LOG_DEBUG, "listen-backlog value: %d", backlog);
+
+out:
+ return ret;
+}
+
+#if SYNCDAEMON_COMPILE
+static int
+glusterd_check_gsync_present (int *valid_state)
+{
+ char buff[PATH_MAX] = {0, };
+ runner_t runner = {0,};
+ char *ptr = NULL;
+ int ret = 0;
+
+ runinit (&runner);
+ runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", "--version", NULL);
+ runner_redir (&runner, STDOUT_FILENO, RUN_PIPE);
+ ret = runner_start (&runner);
+ if (ret == -1) {
+ if (errno == ENOENT) {
+ gf_log ("glusterd", GF_LOG_INFO, GEOREP
+ " module not installed in the system");
+ *valid_state = 0;
+ }
+ else {
+ gf_log ("glusterd", GF_LOG_ERROR, GEOREP
+ " module not working as desired");
+ *valid_state = -1;
+ }
+ goto out;
+ }
+
+ ptr = fgets(buff, sizeof(buff), runner_chio (&runner, STDOUT_FILENO));
+ if (ptr) {
+ if (!strstr (buff, "gsyncd")) {
+ ret = -1;
+ gf_log ("glusterd", GF_LOG_ERROR, GEOREP" module not "
+ "working as desired");
+ *valid_state = -1;
+ goto out;
+ }
+ } else {
+ ret = -1;
+ gf_log ("glusterd", GF_LOG_ERROR, GEOREP" module not "
+ "working as desired");
+ *valid_state = -1;
+ goto out;
+ }
+
+ ret = 0;
+ out:
+
+ runner_end (&runner);
+
+ gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+
+}
+
+static int
+group_write_allow (char *path, gid_t gid)
+{
+ struct stat st = {0,};
+ int ret = 0;
+
+ ret = stat (path, &st);
+ if (ret == -1)
+ goto out;
+ GF_ASSERT (S_ISDIR (st.st_mode));
+
+ ret = chown (path, -1, gid);
+ if (ret == -1)
+ goto out;
+
+ ret = chmod (path, (st.st_mode & ~S_IFMT) | S_IWGRP|S_IXGRP|S_ISVTX);
+
+ out:
+ if (ret == -1)
+ gf_log ("", GF_LOG_CRITICAL,
+ "failed to set up write access to %s for group %d (%s)",
+ path, gid, strerror (errno));
+ return ret;
+}
+
+static int
+glusterd_crt_georep_folders (char *georepdir, glusterd_conf_t *conf)
+{
+ char *greplg_s = NULL;
+ struct group *gr = NULL;
+ int ret = 0;
+
+ GF_ASSERT (georepdir);
+ GF_ASSERT (conf);
+
+ if (strlen (conf->workdir)+2 > PATH_MAX-strlen(GEOREP)) {
+ ret = -1;
+ gf_log ("glusterd", GF_LOG_CRITICAL,
+ "directory path %s/"GEOREP" is longer than PATH_MAX",
+ conf->workdir);
+ goto out;
+ }
+
+ snprintf (georepdir, PATH_MAX, "%s/"GEOREP, conf->workdir);
+ ret = mkdir_p (georepdir, 0777, _gf_true);
+ if (-1 == ret) {
+ gf_log ("glusterd", GF_LOG_CRITICAL,
+ "Unable to create "GEOREP" directory %s",
+ georepdir);
+ goto out;
+ }
+
+ if (strlen (DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP) >= PATH_MAX) {
+ ret = -1;
+ gf_log ("glusterd", GF_LOG_CRITICAL,
+ "directory path "DEFAULT_LOG_FILE_DIRECTORY"/"
+ GEOREP" is longer than PATH_MAX");
+ goto out;
+ }
+ ret = mkdir_p (DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP, 0777, _gf_true);
+ if (-1 == ret) {
+ gf_log ("glusterd", GF_LOG_CRITICAL,
+ "Unable to create "GEOREP" log directory");
+ goto out;
+ }
+
+ /* Slave log file directory */
+ if (strlen(DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"-slaves") >= PATH_MAX) {
+ ret = -1;
+ gf_log ("glusterd", GF_LOG_CRITICAL,
+ "directory path "DEFAULT_LOG_FILE_DIRECTORY"/"
+ GEOREP"-slaves"" is longer than PATH_MAX");
+ goto out;
+ }
+ ret = mkdir_p (DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"-slaves", 0777,
+ _gf_true);
+ if (-1 == ret) {
+ gf_log ("glusterd", GF_LOG_CRITICAL,
+ "Unable to create "GEOREP" slave log directory");
+ goto out;
+ }
+
+ /* MountBroker log file directory */
+ if (strlen(DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"-slaves/mbr") >= PATH_MAX) {
+ ret = -1;
+ gf_log ("glusterd", GF_LOG_CRITICAL,
+ "directory path "DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP
+ "-slaves/mbr"" is longer than PATH_MAX");
+ goto out;
+ }
+ ret = mkdir_p (DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"-slaves/mbr", 0777,
+ _gf_true);
+ if (-1 == ret) {
+ gf_log ("glusterd", GF_LOG_CRITICAL,
+ "Unable to create "GEOREP" mountbroker slave log directory");
+ goto out;
+ }
+
+ ret = dict_get_str (THIS->options, GEOREP"-log-group", &greplg_s);
+ if (ret)
+ ret = 0;
+ else {
+ gr = getgrnam (greplg_s);
+ if (!gr) {
+ gf_log ("glusterd", GF_LOG_CRITICAL,
+ "group "GEOREP"-log-group %s does not exist", greplg_s);
+ ret = -1;
+ goto out;
+ }
+
+ ret = group_write_allow (DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP,
+ gr->gr_gid);
+ if (ret == 0)
+ ret = group_write_allow (DEFAULT_LOG_FILE_DIRECTORY"/"
+ GEOREP"-slaves", gr->gr_gid);
+ if (ret == 0)
+ ret = group_write_allow (DEFAULT_LOG_FILE_DIRECTORY"/"
+ GEOREP"-slaves/mbr", gr->gr_gid);
+ }
+
+ out:
+ gf_log("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+static void
+runinit_gsyncd_setrx (runner_t *runner, glusterd_conf_t *conf)
+{
+ runinit (runner);
+ runner_add_args (runner, GSYNCD_PREFIX"/gsyncd", "-c", NULL);
+ runner_argprintf (runner, "%s/"GSYNC_CONF_TEMPLATE, conf->workdir);
+ runner_add_arg (runner, "--config-set-rx");
+}
+
+static int
+configure_syncdaemon (glusterd_conf_t *conf)
+#define RUN_GSYNCD_CMD do { \
+ ret = runner_run_reuse (&runner); \
+ if (ret == -1) { \
+ runner_log (&runner, "glusterd", GF_LOG_ERROR, "command failed"); \
+ runner_end (&runner); \
+ goto out; \
+ } \
+ runner_end (&runner); \
+} while (0)
+{
+ int ret = 0;
+ runner_t runner = {0,};
+ char georepdir[PATH_MAX] = {0,};
+ int valid_state = 0;
+
+ ret = setenv ("_GLUSTERD_CALLED_", "1", 1);
+ if (ret < 0) {
+ ret = 0;
+ goto out;
+ }
+ valid_state = -1;
+ ret = glusterd_check_gsync_present (&valid_state);
+ if (-1 == ret) {
+ ret = valid_state;
+ goto out;
+ }
+
+ glusterd_crt_georep_folders (georepdir, conf);
+ if (ret) {
+ ret = 0;
+ goto out;
+ }
+
+ /************
+ * master pre-configuration
+ ************/
+
+ /* remote-gsyncd */
+ runinit_gsyncd_setrx (&runner, conf);
+ runner_add_args (&runner, "remote-gsyncd", GSYNCD_PREFIX"/gsyncd", ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ runinit_gsyncd_setrx (&runner, conf);
+ runner_add_args (&runner, "remote-gsyncd", "/nonexistent/gsyncd",
+ ".", "^ssh:", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* gluster-command-dir */
+ runinit_gsyncd_setrx (&runner, conf);
+ runner_add_args (&runner, "gluster-command-dir", SBIN_DIR"/",
+ ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* gluster-params */
+ runinit_gsyncd_setrx (&runner, conf);
+ runner_add_args (&runner, "gluster-params",
+ "aux-gfid-mount xlator-option=*-dht.assert-no-child-down=true",
+ ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* ssh-command */
+ runinit_gsyncd_setrx (&runner, conf);
+ runner_add_arg (&runner, "ssh-command");
+ runner_argprintf (&runner,
+ "ssh -oPasswordAuthentication=no "
+ "-oStrictHostKeyChecking=no "
+ "-i %s/secret.pem", georepdir);
+ runner_add_args (&runner, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* pid-file */
+ runinit_gsyncd_setrx (&runner, conf);
+ runner_add_arg (&runner, "pid-file");
+ runner_argprintf (&runner, "%s/${mastervol}_${remotehost}_${slavevol}/${eSlave}.pid", georepdir);
+ runner_add_args (&runner, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* state-file */
+ runinit_gsyncd_setrx (&runner, conf);
+ runner_add_arg (&runner, "state-file");
+ runner_argprintf (&runner, "%s/${mastervol}_${remotehost}_${slavevol}/${eSlave}.status", georepdir);
+ runner_add_args (&runner, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* state-detail-file */
+ runinit_gsyncd_setrx (&runner, conf);
+ runner_add_arg (&runner, "state-detail-file");
+ runner_argprintf (&runner, "%s/${mastervol}_${remotehost}_${slavevol}/${eSlave}-detail.status",
+ georepdir);
+ runner_add_args (&runner, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* state-detail-file */
+ runinit_gsyncd_setrx (&runner, conf);
+ runner_add_arg (&runner, "state-detail-file");
+ runner_argprintf (&runner, "%s/${mastervol}_${remotehost}_${slavevol}/${eSlave}-detail.status",
+ georepdir);
+ runner_add_args (&runner, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* state-socket */
+ runinit_gsyncd_setrx (&runner, conf);
+ runner_add_arg (&runner, "state-socket-unencoded");
+ runner_argprintf (&runner, "%s/${mastervol}/${eSlave}.socket", georepdir);
+ runner_add_args (&runner, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* socketdir */
+ runinit_gsyncd_setrx (&runner, conf);
+ runner_add_args (&runner, "socketdir", GLUSTERD_SOCK_DIR, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* log-file */
+ runinit_gsyncd_setrx (&runner, conf);
+ runner_add_args (&runner,
+ "log-file",
+ DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"/${mastervol}/${eSlave}.log",
+ ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* gluster-log-file */
+ runinit_gsyncd_setrx (&runner, conf);
+ runner_add_args (&runner,
+ "gluster-log-file",
+ DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"/${mastervol}/${eSlave}${local_id}.gluster.log",
+ ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* ignore-deletes */
+ runinit_gsyncd_setrx (&runner, conf);
+ runner_add_args (&runner, "ignore-deletes", "true", ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* special-sync-mode */
+ runinit_gsyncd_setrx (&runner, conf);
+ runner_add_args (&runner, "special-sync-mode", "partial", ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* change-detector == changelog */
+ runinit_gsyncd_setrx (&runner, conf);
+ runner_add_args(&runner, "change-detector", "changelog", ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ runinit_gsyncd_setrx (&runner, conf);
+ runner_add_arg(&runner, "working-dir");
+ runner_argprintf(&runner, "%s/${mastervol}/${eSlave}",
+ DEFAULT_VAR_RUN_DIRECTORY);
+ runner_add_args (&runner, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /************
+ * slave pre-configuration
+ ************/
+
+ /* gluster-command-dir */
+ runinit_gsyncd_setrx (&runner, conf);
+ runner_add_args (&runner, "gluster-command-dir", SBIN_DIR"/",
+ ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* gluster-params */
+ runinit_gsyncd_setrx (&runner, conf);
+ runner_add_args (&runner, "gluster-params",
+ "aux-gfid-mount xlator-option=*-dht.assert-no-child-down=true",
+ ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* log-file */
+ runinit_gsyncd_setrx (&runner, conf);
+ runner_add_args (&runner,
+ "log-file",
+ DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"-slaves/${session_owner}:${eSlave}.log",
+ ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* MountBroker log-file */
+ runinit_gsyncd_setrx (&runner, conf);
+ runner_add_args (&runner,
+ "log-file-mbr",
+ DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"-slaves/mbr/${session_owner}:${eSlave}.log",
+ ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* gluster-log-file */
+ runinit_gsyncd_setrx (&runner, conf);
+ runner_add_args (&runner,
+ "gluster-log-file",
+ DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"-slaves/${session_owner}:${eSlave}.gluster.log",
+ ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ out:
+ return ret ? -1 : 0;
+}
+#undef RUN_GSYNCD_CMD
+#else /* SYNCDAEMON_COMPILE */
+static int
+configure_syncdaemon (glusterd_conf_t *conf)
+{
+ return 0;
+}
+#endif /* !SYNCDAEMON_COMPILE */
+
+
+static int
+check_prepare_mountbroker_root (char *mountbroker_root)
+{
+ int dfd0 = -1;
+ int dfd = -1;
+ int dfd2 = -1;
+ struct stat st = {0,};
+ struct stat st2 = {0,};
+ int ret = 0;
+
+ ret = open (mountbroker_root, O_RDONLY);
+ if (ret != -1) {
+ dfd = ret;
+ ret = fstat (dfd, &st);
+ }
+ if (ret == -1 || !S_ISDIR (st.st_mode)) {
+ gf_log ("", GF_LOG_ERROR,
+ "cannot access mountbroker-root directory %s",
+ mountbroker_root);
+ ret = -1;
+ goto out;
+ }
+ if (st.st_uid != 0 ||
+ (st.st_mode & (S_IWGRP|S_IWOTH))) {
+ gf_log ("", GF_LOG_ERROR,
+ "permissions on mountbroker-root directory %s are "
+ "too liberal", mountbroker_root);
+ ret = -1;
+ goto out;
+ }
+ if (!(st.st_mode & (S_IXGRP|S_IXOTH))) {
+ gf_log ("", GF_LOG_WARNING,
+ "permissions on mountbroker-root directory %s are "
+ "probably too strict", mountbroker_root);
+ }
+
+ dfd0 = dup (dfd);
+
+ for (;;) {
+ ret = openat (dfd, "..", O_RDONLY);
+ if (ret != -1) {
+ dfd2 = ret;
+ ret = fstat (dfd2, &st2);
+ }
+ if (ret == -1) {
+ gf_log ("", GF_LOG_ERROR,
+ "error while checking mountbroker-root ancestors "
+ "%d (%s)", errno, strerror (errno));
+ goto out;
+ }
+
+ if (st2.st_ino == st.st_ino)
+ break; /* arrived to root */
+
+ if (st2.st_uid != 0 ||
+ ((st2.st_mode & (S_IWGRP|S_IWOTH)) &&
+ !(st2.st_mode & S_ISVTX))) {
+ gf_log ("", GF_LOG_ERROR,
+ "permissions on ancestors of mountbroker-root "
+ "directory are too liberal");
+ ret = -1;
+ goto out;
+ }
+ if (!(st.st_mode & (S_IXGRP|S_IXOTH))) {
+ gf_log ("", GF_LOG_WARNING,
+ "permissions on ancestors of mountbroker-root "
+ "directory are probably too strict");
+ }
+
+ close (dfd);
+ dfd = dfd2;
+ st = st2;
+ }
+
+ ret = mkdirat (dfd0, MB_HIVE, 0711);
+ if (ret == -1 && errno == EEXIST)
+ ret = 0;
+ if (ret != -1)
+ ret = fstatat (dfd0, MB_HIVE, &st, AT_SYMLINK_NOFOLLOW);
+ if (ret == -1 || st.st_mode != (S_IFDIR|0711)) {
+ gf_log ("", GF_LOG_ERROR,
+ "failed to set up mountbroker-root directory %s",
+ mountbroker_root);
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+
+ out:
+ if (dfd0 != -1)
+ close (dfd0);
+ if (dfd != -1)
+ close (dfd);
+ if (dfd2 != -1)
+ close (dfd2);
+
+ return ret;
+}
+
+static int
+_install_mount_spec (dict_t *opts, char *key, data_t *value, void *data)
+{
+ glusterd_conf_t *priv = THIS->private;
+ char *label = NULL;
+ gf_boolean_t georep = _gf_false;
+ gf_boolean_t ghadoop = _gf_false;
+ char *pdesc = value->data;
+ char *volname = NULL;
+ int rv = 0;
+ gf_mount_spec_t *mspec = NULL;
+ char *user = NULL;
+ char *volfile_server = NULL;
+
+ label = strtail (key, "mountbroker.");
+
+ /* check for presence of geo-rep/hadoop label */
+ if (!label) {
+ label = strtail (key, "mountbroker-"GEOREP".");
+ if (label)
+ georep = _gf_true;
+ else {
+ label = strtail (key, "mountbroker-"GHADOOP".");
+ if (label)
+ ghadoop = _gf_true;
+ }
+ }
+
+ if (!label)
+ return 0;
+
+ mspec = GF_CALLOC (1, sizeof (*mspec), gf_gld_mt_mount_spec);
+ if (!mspec)
+ goto err;
+ mspec->label = label;
+
+ if (georep || ghadoop) {
+ volname = gf_strdup (pdesc);
+ if (!volname)
+ goto err;
+ user = strchr (volname, ':');
+ if (user) {
+ *user = '\0';
+ user++;
+ } else
+ user = label;
+
+ if (georep)
+ rv = make_georep_mountspec (mspec, volname, user);
+
+ if (ghadoop) {
+ volfile_server = strchr (user, ':');
+ if (volfile_server)
+ *volfile_server++ = '\0';
+ else
+ volfile_server = "localhost";
+
+ rv = make_ghadoop_mountspec (mspec, volname, user, volfile_server);
+ }
+
+ GF_FREE (volname);
+ if (rv != 0)
+ goto err;
+ } else if (parse_mount_pattern_desc (mspec, pdesc) != 0)
+ goto err;
+
+ list_add_tail (&mspec->speclist, &priv->mount_specs);
+
+ return 0;
+ err:
+
+ gf_log ("", GF_LOG_ERROR,
+ "adding %smount spec failed: label: %s desc: %s",
+ georep ? GEOREP" " : "", label, pdesc);
+
+ return -1;
+}
+
+
+static int
+gd_default_synctask_cbk (int ret, call_frame_t *frame, void *opaque)
+{
+ glusterd_conf_t *priv = THIS->private;
+ synclock_unlock (&priv->big_lock);
+ return ret;
+}
+
+static void
+glusterd_launch_synctask (synctask_fn_t fn, void *opaque)
+{
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ int ret = -1;
+
+ this = THIS;
+ priv = this->private;
+
+ synclock_lock (&priv->big_lock);
+ ret = synctask_new (this->ctx->env, fn, gd_default_synctask_cbk, NULL,
+ opaque);
+ if (ret)
+ gf_log (this->name, GF_LOG_CRITICAL, "Failed to spawn bricks"
+ " and other volume related services");
+}
+
+int
+glusterd_uds_rpcsvc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event,
+ void *data)
+{
+ /* glusterd_rpcsvc_notify() does stuff that calls coming in from the
+ * unix domain socket don't need. This is just an empty function to be
+ * used for the uds listener. This will be used later if required.
+ */
+ return 0;
+}
+
+/* The glusterd unix domain socket listener only listens for cli */
+rpcsvc_t *
+glusterd_init_uds_listener (xlator_t *this)
+{
+ int ret = -1;
+ dict_t *options = NULL;
+ rpcsvc_t *rpc = NULL;
+ data_t *sock_data = NULL;
+ char sockfile[PATH_MAX+1] = {0,};
+ int i = 0;
+
+
+ GF_ASSERT (this);
+
+ sock_data = dict_get (this->options, "glusterd-sockfile");
+ if (!sock_data) {
+ strncpy (sockfile, DEFAULT_GLUSTERD_SOCKFILE, PATH_MAX);
+ } else {
+ strncpy (sockfile, sock_data->data, PATH_MAX);
+ }
+
+ options = dict_new ();
+ if (!options)
+ goto out;
+
+ ret = rpcsvc_transport_unix_options_build (&options, sockfile);
+ if (ret)
+ goto out;
+
+ rpc = rpcsvc_init (this, this->ctx, options, 8);
+ if (rpc == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = rpcsvc_register_notify (rpc, glusterd_uds_rpcsvc_notify,
+ this);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Failed to register notify function");
+ goto out;
+ }
+
+ ret = rpcsvc_create_listeners (rpc, options, this->name);
+ if (ret != 1) {
+ gf_log (this->name, GF_LOG_DEBUG, "Failed to create listener");
+ goto out;
+ }
+ ret = 0;
+
+ for (i = 0; i < gd_uds_programs_count; i++) {
+ ret = glusterd_program_register (this, rpc, gd_uds_programs[i]);
+ if (ret) {
+ i--;
+ for (; i >= 0; i--)
+ rpcsvc_program_unregister (rpc,
+ gd_uds_programs[i]);
+
+ goto out;
+ }
+ }
+
+out:
+ if (options)
+ dict_unref (options);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to start glusterd "
+ "unix domain socket listener.");
+ if (rpc) {
+ GF_FREE (rpc);
+ rpc = NULL;
+ }
+ }
+ return rpc;
+}
+
+void
+glusterd_stop_uds_listener (xlator_t *this)
+{
+ glusterd_conf_t *conf = NULL;
+ rpcsvc_listener_t *listener = NULL;
+ rpcsvc_listener_t *next = NULL;
+
+ GF_ASSERT (this);
+ conf = this->private;
+
+ (void) rpcsvc_program_unregister (conf->uds_rpc, &gd_svc_cli_prog);
+ (void) rpcsvc_program_unregister (conf->uds_rpc, &gluster_handshake_prog);
+
+ list_for_each_entry_safe (listener, next, &conf->uds_rpc->listeners,
+ list) {
+ rpcsvc_listener_destroy (listener);
+ }
+
+ (void) rpcsvc_unregister_notify (conf->uds_rpc, glusterd_rpcsvc_notify,
+ this);
+
+ unlink (DEFAULT_GLUSTERD_SOCKFILE);
+
+ GF_FREE (conf->uds_rpc);
+ conf->uds_rpc = NULL;
+
+ return;
+}
+
+static int
+glusterd_init_snap_folder (xlator_t *this)
+{
+ int ret = -1;
+ struct stat buf = {0,};
+
+ GF_ASSERT (this);
+
+ /* Snapshot volumes are mounted under /var/run/gluster/snaps folder.
+ * But /var/run is normally a symbolic link to /run folder, which
+ * creates problems as the entry point in the mtab for the mount point
+ * and glusterd maintained entry point will be different. Therefore
+ * identify the correct run folder and use it for snap volume mounting.
+ */
+ ret = lstat (GLUSTERD_VAR_RUN_DIR, &buf);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "stat fails on %s, exiting. (errno = %d)",
+ GLUSTERD_VAR_RUN_DIR, errno);
+ goto out;
+ }
+
+ /* If /var/run is symlink then use /run folder */
+ if (S_ISLNK (buf.st_mode)) {
+ strcpy (snap_mount_folder, GLUSTERD_RUN_DIR);
+ } else {
+ strcpy (snap_mount_folder, GLUSTERD_VAR_RUN_DIR);
+ }
+
+ strcat (snap_mount_folder, GLUSTERD_DEFAULT_SNAPS_BRICK_DIR);
+
+ ret = stat (snap_mount_folder, &buf);
+ if ((ret != 0) && (ENOENT != errno)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "stat fails on %s, exiting. (errno = %d)",
+ snap_mount_folder, errno);
+ ret = -1;
+ goto out;
+ }
+
+ if ((!ret) && (!S_ISDIR(buf.st_mode))) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Provided snap path %s is not a directory,"
+ "exiting", snap_mount_folder);
+ ret = -1;
+ goto out;
+ }
+
+ if ((-1 == ret) && (ENOENT == errno)) {
+ /* Create missing folders */
+ ret = mkdir_p (snap_mount_folder, 0777, _gf_false);
+
+ if (-1 == ret) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Unable to create directory %s"
+ " ,errno = %d", snap_mount_folder, errno);
+ goto out;
+ }
+ }
+
+out:
+ return ret;
+}
+
+/*
+ * init - called during glusterd initialization
+ *
+ * @this:
+ *
+ */
+int
+init (xlator_t *this)
+{
+ int32_t ret = -1;
+ rpcsvc_t *rpc = NULL;
+ rpcsvc_t *uds_rpc = NULL;
+ glusterd_conf_t *conf = NULL;
+ data_t *dir_data = NULL;
+ struct stat buf = {0,};
+ char storedir [PATH_MAX] = {0,};
+ char workdir [PATH_MAX] = {0,};
+ char hooks_dir [PATH_MAX] = {0,};
+ char cmd_log_filename [PATH_MAX] = {0,};
+ int first_time = 0;
+ char *mountbroker_root = NULL;
+ int i = 0;
+ char *valgrind_str = NULL;
+
+ dir_data = dict_get (this->options, "working-directory");
+
+ if (!dir_data) {
+ //Use default working dir
+ strncpy (workdir, GLUSTERD_DEFAULT_WORKDIR, PATH_MAX);
+ } else {
+ strncpy (workdir, dir_data->data, PATH_MAX);
+ }
+
+ ret = stat (workdir, &buf);
+ if ((ret != 0) && (ENOENT != errno)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "stat fails on %s, exiting. (errno = %d)",
+ workdir, errno);
+ exit (1);
+ }
+
+ if ((!ret) && (!S_ISDIR(buf.st_mode))) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Provided working area %s is not a directory,"
+ "exiting", workdir);
+ exit (1);
+ }
+
+
+ if ((-1 == ret) && (ENOENT == errno)) {
+ ret = mkdir (workdir, 0777);
+
+ if (-1 == ret) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Unable to create directory %s"
+ " ,errno = %d", workdir, errno);
+ exit (1);
+ }
+
+ first_time = 1;
+ }
+
+ setenv ("GLUSTERD_WORKING_DIR", workdir, 1);
+ gf_log (this->name, GF_LOG_INFO, "Using %s as working directory",
+ workdir);
+
+ ret = glusterd_init_snap_folder (this);
+ if (ret) {
+ gf_log (this->name, GF_LOG_CRITICAL, "Unable to create "
+ "snap backend folder");
+ exit (1);
+ }
+
+ snprintf (cmd_log_filename, PATH_MAX,"%s/.cmd_log_history",
+ DEFAULT_LOG_FILE_DIRECTORY);
+ ret = gf_cmd_log_init (cmd_log_filename);
+
+ if (ret == -1) {
+ gf_log ("this->name", GF_LOG_CRITICAL,
+ "Unable to create cmd log file %s", cmd_log_filename);
+ exit (1);
+ }
+
+ snprintf (storedir, PATH_MAX, "%s/vols", workdir);
+
+ ret = mkdir (storedir, 0777);
+
+ if ((-1 == ret) && (errno != EEXIST)) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Unable to create volume directory %s"
+ " ,errno = %d", storedir, errno);
+ exit (1);
+ }
+
+ snprintf (storedir, PATH_MAX, "%s/peers", workdir);
+
+ ret = mkdir (storedir, 0777);
+
+ if ((-1 == ret) && (errno != EEXIST)) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Unable to create peers directory %s"
+ " ,errno = %d", storedir, errno);
+ exit (1);
+ }
+
+ snprintf (storedir, PATH_MAX, "%s/bricks", DEFAULT_LOG_FILE_DIRECTORY);
+ ret = mkdir (storedir, 0777);
+ if ((-1 == ret) && (errno != EEXIST)) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Unable to create logs directory %s"
+ " ,errno = %d", storedir, errno);
+ exit (1);
+ }
+
+ snprintf (storedir, PATH_MAX, "%s/nfs", workdir);
+ ret = mkdir (storedir, 0777);
+ if ((-1 == ret) && (errno != EEXIST)) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Unable to create nfs directory %s"
+ " ,errno = %d", storedir, errno);
+ exit (1);
+ }
+
+ snprintf (storedir, PATH_MAX, "%s/glustershd", workdir);
+ ret = mkdir (storedir, 0777);
+ if ((-1 == ret) && (errno != EEXIST)) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Unable to create glustershd directory %s"
+ " ,errno = %d", storedir, errno);
+ exit (1);
+ }
+
+ snprintf (storedir, PATH_MAX, "%s/groups", workdir);
+ ret = mkdir (storedir, 0777);
+ if ((-1 == ret) && (errno != EEXIST)) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Unable to create glustershd directory %s"
+ " ,errno = %d", storedir, errno);
+ exit (1);
+ }
+
+ ret = glusterd_rpcsvc_options_build (this->options);
+ if (ret)
+ goto out;
+ rpc = rpcsvc_init (this, this->ctx, this->options, 64);
+ if (rpc == NULL) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to init rpc");
+ goto out;
+ }
+
+ ret = rpcsvc_register_notify (rpc, glusterd_rpcsvc_notify, this);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "rpcsvc_register_notify returned %d", ret);
+ goto out;
+ }
+
+ /*
+ * only one (atmost a pair - rdma and socket) listener for
+ * glusterd1_mop_prog, gluster_pmap_prog and gluster_handshake_prog.
+ */
+ ret = rpcsvc_create_listeners (rpc, this->options, this->name);
+ if (ret < 1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "creation of listener failed");
+ ret = -1;
+ goto out;
+ }
+
+ for (i = 0; i < gd_inet_programs_count; i++) {
+ ret = glusterd_program_register (this, rpc,
+ gd_inet_programs[i]);
+ if (ret) {
+ i--;
+ for (; i >= 0; i--)
+ rpcsvc_program_unregister (rpc,
+ gd_inet_programs[i]);
+
+ goto out;
+ }
+ }
+
+ /* Start a unix domain socket listener just for cli commands
+ * This should prevent ports from being wasted by being in TIMED_WAIT
+ * when cli commands are done continuously
+ */
+ uds_rpc = glusterd_init_uds_listener (this);
+ if (uds_rpc == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ conf = GF_CALLOC (1, sizeof (glusterd_conf_t),
+ gf_gld_mt_glusterd_conf_t);
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
+ conf->shd = GF_CALLOC (1, sizeof (nodesrv_t),
+ gf_gld_mt_nodesrv_t);
+ GF_VALIDATE_OR_GOTO(this->name, conf->shd, out);
+ conf->nfs = GF_CALLOC (1, sizeof (nodesrv_t),
+ gf_gld_mt_nodesrv_t);
+ GF_VALIDATE_OR_GOTO(this->name, conf->nfs, out);
+
+ INIT_LIST_HEAD (&conf->peers);
+ INIT_LIST_HEAD (&conf->volumes);
+ INIT_LIST_HEAD (&conf->snapshots);
+ INIT_LIST_HEAD (&conf->missed_snaps_list);
+
+ pthread_mutex_init (&conf->mutex, NULL);
+ conf->rpc = rpc;
+ conf->uds_rpc = uds_rpc;
+ conf->gfs_mgmt = &gd_brick_prog;
+ strncpy (conf->workdir, workdir, PATH_MAX);
+
+ synclock_init (&conf->big_lock);
+ pthread_mutex_init (&conf->xprt_lock, NULL);
+ INIT_LIST_HEAD (&conf->xprt_list);
+
+ glusterd_friend_sm_init ();
+ glusterd_op_sm_init ();
+ glusterd_opinfo_init ();
+ glusterd_mgmt_v3_lock_init ();
+ glusterd_txn_opinfo_dict_init ();
+ ret = glusterd_sm_tr_log_init (&conf->op_sm_log,
+ glusterd_op_sm_state_name_get,
+ glusterd_op_sm_event_name_get,
+ GLUSTERD_TR_LOG_SIZE);
+ if (ret)
+ goto out;
+
+ conf->base_port = GF_IANA_PRIV_PORTS_START;
+ if (dict_get_uint32(this->options, "base-port", &conf->base_port) == 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "base-port override: %d", conf->base_port);
+ }
+
+ /* Set option to run bricks on valgrind if enabled in glusterd.vol */
+ conf->valgrind = _gf_false;
+ ret = dict_get_str (this->options, "run-with-valgrind", &valgrind_str);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "cannot get run-with-valgrind value");
+ }
+ if (valgrind_str) {
+ if (gf_string2boolean (valgrind_str, &(conf->valgrind))) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "run-with-valgrind value not a boolean string");
+ }
+ }
+
+ this->private = conf;
+ (void) glusterd_nodesvc_set_online_status ("glustershd", _gf_false);
+
+ GLUSTERD_GET_HOOKS_DIR (hooks_dir, GLUSTERD_HOOK_VER, conf);
+ if (stat (hooks_dir, &buf)) {
+ ret = glusterd_hooks_create_hooks_directory (conf->workdir);
+ if (-1 == ret) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Unable to create hooks directory ");
+ exit (1);
+ }
+ }
+
+ INIT_LIST_HEAD (&conf->mount_specs);
+
+ ret = dict_foreach (this->options, _install_mount_spec, NULL);
+ if (ret)
+ goto out;
+ ret = dict_get_str (this->options, "mountbroker-root",
+ &mountbroker_root);
+ if (ret)
+ ret = 0;
+ else
+ ret = check_prepare_mountbroker_root (mountbroker_root);
+ if (ret)
+ goto out;
+
+ ret = configure_syncdaemon (conf);
+ if (ret)
+ goto out;
+
+ ret = glusterd_restore ();
+ if (ret < 0)
+ goto out;
+
+ /* If there are no 'friends', this would be the best time to
+ * spawn process/bricks that may need (re)starting since last
+ * time (this) glusterd was up.*/
+
+ if (list_empty (&conf->peers)) {
+ glusterd_launch_synctask (glusterd_spawn_daemons, NULL);
+ }
+ ret = glusterd_options_init (this);
+ if (ret < 0)
+ goto out;
+
+ ret = glusterd_handle_upgrade_downgrade (this->options, conf);
+ if (ret)
+ goto out;
+
+ ret = glusterd_hooks_spawn_worker (this);
+ if (ret)
+ goto out;
+
+ ret = 0;
+out:
+ if (ret < 0) {
+ if (this->private != NULL) {
+ GF_FREE (this->private);
+ this->private = NULL;
+ }
+
+ }
+
+ return ret;
+}
+
+
+
+
+/*
+ * fini - finish function for glusterd, called before
+ * unloading gluster.
+ *
+ * @this:
+ *
+ */
+void
+fini (xlator_t *this)
+{
+ glusterd_conf_t *conf = NULL;
+ if (!this || !this->private)
+ goto out;
+
+ conf = this->private;
+
+ glusterd_stop_uds_listener (this);
+
+ FREE (conf->pmap);
+ if (conf->handle)
+ gf_store_handle_destroy (conf->handle);
+ glusterd_sm_tr_log_delete (&conf->op_sm_log);
+ glusterd_mgmt_v3_lock_fini ();
+ glusterd_txn_opinfo_dict_fini ();
+ GF_FREE (conf);
+
+ this->private = NULL;
+out:
+ return;
+}
+
+/*
+ * notify - notify function for glusterd
+ * @this:
+ * @trans:
+ * @event:
+ *
+ */
+int
+notify (xlator_t *this, int32_t event, void *data, ...)
+{
+ int ret = 0;
+
+ switch (event) {
+ case GF_EVENT_POLLIN:
+ break;
+
+ case GF_EVENT_POLLERR:
+ break;
+
+ case GF_EVENT_TRANSPORT_CLEANUP:
+ break;
+
+ default:
+ default_notify (this, event, data);
+ break;
+
+ }
+
+ return ret;
+}
+
+
+struct xlator_fops fops;
+
+struct xlator_cbks cbks;
+
+struct xlator_dumpops dumpops = {
+ .priv = glusterd_priv,
+};
+
+
+struct volume_options options[] = {
+ { .key = {"working-directory"},
+ .type = GF_OPTION_TYPE_PATH,
+ },
+ { .key = {"transport-type"},
+ .type = GF_OPTION_TYPE_ANY,
+ },
+ { .key = {"transport.*"},
+ .type = GF_OPTION_TYPE_ANY,
+ },
+ { .key = {"rpc-auth.*"},
+ .type = GF_OPTION_TYPE_ANY,
+ },
+ { .key = {"rpc-auth-allow-insecure"},
+ .type = GF_OPTION_TYPE_BOOL,
+ },
+ { .key = {"upgrade"},
+ .type = GF_OPTION_TYPE_BOOL,
+ },
+ { .key = {"downgrade"},
+ .type = GF_OPTION_TYPE_BOOL,
+ },
+ { .key = {"bind-insecure"},
+ .type = GF_OPTION_TYPE_BOOL,
+ },
+ { .key = {"mountbroker-root"},
+ .type = GF_OPTION_TYPE_PATH,
+ },
+ { .key = {"mountbroker.*"},
+ .type = GF_OPTION_TYPE_ANY,
+ },
+ { .key = {"mountbroker-"GEOREP".*"},
+ .type = GF_OPTION_TYPE_ANY,
+ },
+ { .key = {"mountbroker-"GHADOOP".*"},
+ .type = GF_OPTION_TYPE_ANY,
+ },
+ { .key = {GEOREP"-log-group"},
+ .type = GF_OPTION_TYPE_ANY,
+ },
+ { .key = {"run-with-valgrind"},
+ .type = GF_OPTION_TYPE_BOOL,
+ },
+ { .key = {"server-quorum-type"},
+ .type = GF_OPTION_TYPE_STR,
+ .value = { "none", "server"},
+ .description = "If set to server, enables the specified "
+ "volume to participate in quorum."
+ },
+ { .key = {"server-quorum-ratio"},
+ .type = GF_OPTION_TYPE_PERCENT,
+ .description = "Sets the quorum percentage for the trusted "
+ "storage pool."
+ },
+ { .key = {"glusterd-sockfile"},
+ .type = GF_OPTION_TYPE_PATH,
+ .description = "The socket file on which glusterd should listen for "
+ "cli requests. Default is "DEFAULT_GLUSTERD_SOCKFILE "."
+ },
+ { .key = {"base-port"},
+ .type = GF_OPTION_TYPE_INT,
+ .description = "Sets the base port for portmap query"
+ },
+ { .key = {"snap-brick-path"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "directory where the bricks for the snapshots will be created"
+ },
+ { .key = {NULL} },
+};
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
new file mode 100644
index 000000000..3aa395ebc
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd.h
@@ -0,0 +1,952 @@
+/*
+ Copyright (c) 2006-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _GLUSTERD_H_
+#define _GLUSTERD_H_
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/types.h>
+#include <dirent.h>
+#include <pthread.h>
+#include <libgen.h>
+
+#include "uuid.h"
+
+#include "rpc-clnt.h"
+#include "glusterfs.h"
+#include "xlator.h"
+#include "logging.h"
+#include "call-stub.h"
+#include "fd.h"
+#include "byte-order.h"
+#include "glusterd-mem-types.h"
+#include "rpcsvc.h"
+#include "glusterd-sm.h"
+#include "glusterd1-xdr.h"
+#include "protocol-common.h"
+#include "glusterd-pmap.h"
+#include "cli1-xdr.h"
+#include "syncop.h"
+#include "store.h"
+
+#define GLUSTERD_MAX_VOLUME_NAME 1000
+#define GLUSTERD_TR_LOG_SIZE 50
+#define GLUSTERD_NAME "glusterd"
+#define GLUSTERD_SOCKET_LISTEN_BACKLOG 128
+#define GLUSTERD_QUORUM_TYPE_KEY "cluster.server-quorum-type"
+#define GLUSTERD_QUORUM_RATIO_KEY "cluster.server-quorum-ratio"
+#define GLUSTERD_GLOBAL_OPT_VERSION "global-option-version"
+#define GLUSTERD_COMMON_PEM_PUB_FILE "/geo-replication/common_secret.pem.pub"
+#define GEO_CONF_MAX_OPT_VALS 5
+#define GLUSTERD_CREATE_HOOK_SCRIPT "/hooks/1/gsync-create/post/" \
+ "S56glusterd-geo-rep-create-post.sh"
+
+
+#define GLUSTERD_SNAPS_MAX_HARD_LIMIT 256
+#define GLUSTERD_SNAPS_DEF_SOFT_LIMIT_PERCENT 90
+#define GLUSTERD_SNAPS_MAX_SOFT_LIMIT_PERCENT 100
+#define GLUSTERD_SERVER_QUORUM "server"
+
+#define FMTSTR_CHECK_VOL_EXISTS "Volume %s does not exist"
+#define FMTSTR_RESOLVE_BRICK "Could not find peer on which brick %s:%s resides"
+
+#define LOGSTR_FOUND_BRICK "Found brick %s:%s in volume %s"
+#define LOGSTR_BUILD_PAYLOAD "Failed to build payload for operation 'Volume %s'"
+#define LOGSTR_STAGE_FAIL "Staging of operation 'Volume %s' failed on %s %s %s"
+#define LOGSTR_COMMIT_FAIL "Commit of operation 'Volume %s' failed on %s %s %s"
+
+#define OPERRSTR_BUILD_PAYLOAD "Failed to build payload. Please check the log "\
+ "file for more details."
+#define OPERRSTR_STAGE_FAIL "Staging failed on %s. Please check the log file " \
+ "for more details."
+#define OPERRSTR_COMMIT_FAIL "Commit failed on %s. Please check the log file "\
+ "for more details."
+
+struct glusterd_volinfo_;
+typedef struct glusterd_volinfo_ glusterd_volinfo_t;
+
+struct glusterd_snap_;
+typedef struct glusterd_snap_ glusterd_snap_t;
+
+typedef enum glusterd_op_ {
+ GD_OP_NONE = 0,
+ GD_OP_CREATE_VOLUME,
+ GD_OP_START_BRICK,
+ GD_OP_STOP_BRICK,
+ GD_OP_DELETE_VOLUME,
+ GD_OP_START_VOLUME,
+ GD_OP_STOP_VOLUME,
+ GD_OP_DEFRAG_VOLUME,
+ GD_OP_ADD_BRICK,
+ GD_OP_REMOVE_BRICK,
+ GD_OP_REPLACE_BRICK,
+ GD_OP_SET_VOLUME,
+ GD_OP_RESET_VOLUME,
+ GD_OP_SYNC_VOLUME,
+ GD_OP_LOG_ROTATE,
+ GD_OP_GSYNC_SET,
+ GD_OP_PROFILE_VOLUME,
+ GD_OP_QUOTA,
+ GD_OP_STATUS_VOLUME,
+ GD_OP_REBALANCE,
+ GD_OP_HEAL_VOLUME,
+ GD_OP_STATEDUMP_VOLUME,
+ GD_OP_LIST_VOLUME,
+ GD_OP_CLEARLOCKS_VOLUME,
+ GD_OP_DEFRAG_BRICK_VOLUME,
+ GD_OP_COPY_FILE,
+ GD_OP_SYS_EXEC,
+ GD_OP_GSYNC_CREATE,
+ GD_OP_SNAP,
+ GD_OP_MAX,
+} glusterd_op_t;
+
+extern const char * gd_op_list[];
+
+struct glusterd_volgen {
+ dict_t *dict;
+};
+
+typedef struct {
+ struct rpc_clnt *rpc;
+ gf_boolean_t online;
+} nodesrv_t;
+
+typedef struct {
+ gf_boolean_t quorum;
+ double quorum_ratio;
+ uint64_t gl_opt_version;
+} gd_global_opts_t;
+
+typedef struct {
+ struct _volfile_ctx *volfile;
+ pthread_mutex_t mutex;
+ struct list_head peers;
+ struct list_head xaction_peers;
+ gf_boolean_t verify_volfile_checksum;
+ gf_boolean_t trace;
+ uuid_t uuid;
+ char workdir[PATH_MAX];
+ rpcsvc_t *rpc;
+ nodesrv_t *shd;
+ nodesrv_t *nfs;
+ struct pmap_registry *pmap;
+ struct list_head volumes;
+ struct list_head snapshots; /*List of snap volumes */
+ pthread_mutex_t xprt_lock;
+ struct list_head xprt_list;
+ gf_store_handle_t *handle;
+ gf_timer_t *timer;
+ glusterd_sm_tr_log_t op_sm_log;
+ struct rpc_clnt_program *gfs_mgmt;
+
+ struct list_head mount_specs;
+ gf_boolean_t valgrind;
+ pthread_t brick_thread;
+ void *hooks_priv;
+ /* need for proper handshake_t */
+ int op_version; /* Starts with 1 for 3.3.0 */
+ xlator_t *xl; /* Should be set to 'THIS' before creating thread */
+ gf_boolean_t pending_quorum_action;
+ dict_t *opts;
+ synclock_t big_lock;
+ gf_boolean_t restart_done;
+ rpcsvc_t *uds_rpc; /* RPCSVC for the unix domain socket */
+ uint32_t base_port;
+ uint64_t snap_max_hard_limit;
+ uint64_t snap_max_soft_limit;
+ char *snap_bricks_directory;
+ gf_store_handle_t *missed_snaps_list_shandle;
+ struct list_head missed_snaps_list;
+} glusterd_conf_t;
+
+
+typedef enum gf_brick_status {
+ GF_BRICK_STOPPED,
+ GF_BRICK_STARTED,
+} gf_brick_status_t;
+
+struct glusterd_brickinfo {
+ char hostname[1024];
+ char path[PATH_MAX];
+ char device_path[PATH_MAX];
+ struct list_head brick_list;
+ uuid_t uuid;
+ int port;
+ int rdma_port;
+ char *logfile;
+ gf_boolean_t signed_in;
+ gf_store_handle_t *shandle;
+ gf_brick_status_t status;
+ struct rpc_clnt *rpc;
+ int decommissioned;
+ char vg[PATH_MAX]; /* FIXME: Use max size for length of vg */
+ int caps; /* Capability */
+ int32_t snap_status;
+};
+
+typedef struct glusterd_brickinfo glusterd_brickinfo_t;
+
+struct gf_defrag_brickinfo_ {
+ char *name;
+ int files;
+ int size;
+};
+
+typedef int (*defrag_cbk_fn_t) (glusterd_volinfo_t *volinfo,
+ gf_defrag_status_t status);
+
+struct glusterd_defrag_info_ {
+ uint64_t total_files;
+ uint64_t total_data;
+ uint64_t num_files_lookedup;
+ uint64_t total_failures;
+ gf_lock_t lock;
+ int cmd;
+ pthread_t th;
+ gf_defrag_status_t defrag_status;
+ struct rpc_clnt *rpc;
+ uint32_t connected;
+ char mount[1024];
+ char databuf[131072];
+ struct gf_defrag_brickinfo_ *bricks; /* volinfo->brick_count */
+
+ defrag_cbk_fn_t cbk_fn;
+};
+
+
+typedef struct glusterd_defrag_info_ glusterd_defrag_info_t;
+
+typedef enum gf_transport_type_ {
+ GF_TRANSPORT_TCP, //DEFAULT
+ GF_TRANSPORT_RDMA,
+ GF_TRANSPORT_BOTH_TCP_RDMA,
+} gf_transport_type;
+
+#define GF_DEFAULT_NFS_TRANSPORT GF_TRANSPORT_RDMA
+
+typedef enum gf_rb_status_ {
+ GF_RB_STATUS_NONE,
+ GF_RB_STATUS_STARTED,
+ GF_RB_STATUS_PAUSED,
+} gf_rb_status_t;
+
+struct _auth {
+ char *username;
+ char *password;
+};
+
+typedef struct _auth auth_t;
+
+/* Capabilities of xlator */
+#define CAPS_BD 0x00000001
+#define CAPS_THIN 0x00000002
+#define CAPS_OFFLOAD_COPY 0x00000004
+#define CAPS_OFFLOAD_SNAPSHOT 0x00000008
+
+struct glusterd_rebalance_ {
+ gf_defrag_status_t defrag_status;
+ uint64_t rebalance_files;
+ uint64_t rebalance_data;
+ uint64_t lookedup_files;
+ uint64_t skipped_files;
+ glusterd_defrag_info_t *defrag;
+ gf_cli_defrag_type defrag_cmd;
+ uint64_t rebalance_failures;
+ uuid_t rebalance_id;
+ double rebalance_time;
+ glusterd_op_t op;
+ dict_t *dict; /* Dict to store misc information
+ * like list of bricks being removed */
+};
+
+typedef struct glusterd_rebalance_ glusterd_rebalance_t;
+
+struct glusterd_replace_brick_ {
+ gf_rb_status_t rb_status;
+ glusterd_brickinfo_t *src_brick;
+ glusterd_brickinfo_t *dst_brick;
+ uuid_t rb_id;
+};
+
+typedef struct glusterd_replace_brick_ glusterd_replace_brick_t;
+
+struct glusterd_volinfo_ {
+ gf_lock_t lock;
+ char volname[GLUSTERD_MAX_VOLUME_NAME];
+ gf_boolean_t is_snap_volume;
+ glusterd_snap_t *snapshot;
+ gf_boolean_t is_volume_restored;
+ char parent_volname[GLUSTERD_MAX_VOLUME_NAME];
+ /* In case of a snap volume
+ i.e (is_snap_volume == TRUE) this
+ field will contain the name of
+ the volume which is snapped. In
+ case of a non-snap volume, this
+ field will be initialized as N/A */
+ int type;
+ int brick_count;
+ uint64_t snap_count;
+ uint64_t snap_max_hard_limit;
+ struct list_head vol_list;
+ /* In case of a snap volume
+ i.e (is_snap_volume == TRUE) this
+ is linked to glusterd_snap_t->volumes.
+ In case of a non-snap volume, this is
+ linked to glusterd_conf_t->volumes */
+ struct list_head snapvol_list;
+ /* This is a current pointer for
+ glusterd_volinfo_t->snap_volumes */
+ struct list_head bricks;
+ struct list_head snap_volumes;
+ /* TODO : Need to remove this, as this
+ * is already part of snapshot object.
+ */
+ glusterd_volume_status status;
+ int sub_count; /* backward compatibility */
+ int stripe_count;
+ int replica_count;
+ int subvol_count; /* Number of subvolumes in a
+ distribute volume */
+ int dist_leaf_count; /* Number of bricks in one
+ distribute subvolume */
+ int port;
+ gf_store_handle_t *shandle;
+ gf_store_handle_t *rb_shandle;
+ gf_store_handle_t *node_state_shandle;
+
+ /* Defrag/rebalance related */
+ glusterd_rebalance_t rebal;
+
+ /* Replace brick status */
+ glusterd_replace_brick_t rep_brick;
+
+ int version;
+ uint32_t cksum;
+ gf_transport_type transport_type;
+ gf_transport_type nfs_transport_type;
+
+ dict_t *dict;
+
+ uuid_t volume_id;
+ auth_t auth;
+ char *logdir;
+
+ dict_t *gsync_slaves;
+
+ int decommission_in_progress;
+ xlator_t *xl;
+
+ gf_boolean_t memory_accounting;
+ int caps; /* Capability */
+
+ int op_version;
+ int client_op_version;
+};
+
+typedef enum gd_snap_status_ {
+ GD_SNAP_STATUS_NONE,
+ GD_SNAP_STATUS_INIT,
+ GD_SNAP_STATUS_IN_USE,
+ GD_SNAP_STATUS_DECOMMISSION,
+ GD_SNAP_STATUS_RESTORED,
+} gd_snap_status_t;
+
+struct glusterd_snap_ {
+ gf_lock_t lock;
+ struct list_head volumes;
+ struct list_head snap_list;
+ char snapname[GLUSTERD_MAX_SNAP_NAME];
+ uuid_t snap_id;
+ char *description;
+ time_t time_stamp;
+ gf_boolean_t snap_restored;
+ gd_snap_status_t snap_status;
+ gf_store_handle_t *shandle;
+};
+
+typedef struct glusterd_snap_op_ {
+ int32_t brick_num;
+ char *brick_path;
+ int32_t op;
+ int32_t status;
+ struct list_head snap_ops_list;
+} glusterd_snap_op_t;
+
+typedef struct glusterd_missed_snap_ {
+ char *node_snap_info;
+ struct list_head missed_snaps;
+ struct list_head snap_ops;
+} glusterd_missed_snap_info;
+
+typedef enum gd_node_type_ {
+ GD_NODE_NONE,
+ GD_NODE_BRICK,
+ GD_NODE_SHD,
+ GD_NODE_REBALANCE,
+ GD_NODE_NFS,
+} gd_node_type;
+
+typedef enum missed_snap_stat {
+ GD_MISSED_SNAP_NONE,
+ GD_MISSED_SNAP_PENDING,
+ GD_MISSED_SNAP_DONE,
+} missed_snap_stat;
+
+typedef struct glusterd_pending_node_ {
+ struct list_head list;
+ void *node;
+ gd_node_type type;
+ int32_t index;
+} glusterd_pending_node_t;
+
+struct gsync_config_opt_vals_ {
+ char *op_name;
+ int no_of_pos_vals;
+ gf_boolean_t case_sensitive;
+ char *values[GEO_CONF_MAX_OPT_VALS];
+};
+
+enum glusterd_op_ret {
+ GLUSTERD_CONNECTION_AWAITED = 100,
+};
+
+enum glusterd_vol_comp_status_ {
+ GLUSTERD_VOL_COMP_NONE = 0,
+ GLUSTERD_VOL_COMP_SCS = 1,
+ GLUSTERD_VOL_COMP_UPDATE_REQ,
+ GLUSTERD_VOL_COMP_RJT,
+};
+
+#define GLUSTERD_DEFAULT_WORKDIR "/var/lib/glusterd"
+#define GLUSTERD_DEFAULT_PORT GF_DEFAULT_BASE_PORT
+#define GLUSTERD_INFO_FILE "glusterd.info"
+#define GLUSTERD_VOLUME_DIR_PREFIX "vols"
+#define GLUSTERD_PEER_DIR_PREFIX "peers"
+#define GLUSTERD_VOLUME_INFO_FILE "info"
+#define GLUSTERD_SNAP_INFO_FILE "info"
+#define GLUSTERD_VOLUME_RBSTATE_FILE "rbstate"
+#define GLUSTERD_BRICK_INFO_DIR "bricks"
+#define GLUSTERD_CKSUM_FILE "cksum"
+#define GLUSTERD_TRASH "trash"
+#define GLUSTERD_NODE_STATE_FILE "node_state.info"
+#define GLUSTERD_MISSED_SNAPS_LIST_FILE "missed_snaps_list"
+#define GLUSTERD_VOL_SNAP_DIR_PREFIX "snaps"
+
+#define GLUSTERD_DEFAULT_SNAPS_BRICK_DIR "/gluster/snaps"
+#define GLUSTERD_VAR_RUN_DIR "/var/run"
+#define GLUSTERD_RUN_DIR "/run"
+
+/* definitions related to replace brick */
+#define RB_CLIENT_MOUNTPOINT "rb_mount"
+#define RB_CLIENTVOL_FILENAME "rb_client.vol"
+#define RB_DSTBRICK_PIDFILE "rb_dst_brick.pid"
+#define RB_DSTBRICKVOL_FILENAME "rb_dst_brick.vol"
+#define RB_PUMP_DEF_ARG "default"
+
+#define GLUSTERD_UUID_LEN 50
+
+typedef ssize_t (*gd_serialize_t) (struct iovec outmsg, void *args);
+
+#define GLUSTERD_GET_VOLUME_DIR(path, volinfo, priv) \
+ if (volinfo->is_snap_volume) { \
+ snprintf (path, PATH_MAX, "%s/snaps/%s/%s", priv->workdir, \
+ volinfo->snapshot->snapname, volinfo->volname); \
+ } else { \
+ snprintf (path, PATH_MAX, "%s/vols/%s", priv->workdir, \
+ volinfo->volname); \
+ }
+
+#define GLUSTERD_GET_SNAP_DIR(path, snap, priv) \
+ snprintf (path, PATH_MAX, "%s/snaps/%s", priv->workdir, \
+ snap->snapname);
+
+#define GLUSTERD_GET_BRICK_DIR(path, volinfo, priv) \
+ if (volinfo->is_snap_volume) { \
+ snprintf (path, PATH_MAX, "%s/snaps/%s/%s/%s", priv->workdir, \
+ volinfo->snapshot->snapname, volinfo->volname, \
+ GLUSTERD_BRICK_INFO_DIR); \
+ } else { \
+ snprintf (path, PATH_MAX, "%s/%s/%s/%s", priv->workdir, \
+ GLUSTERD_VOLUME_DIR_PREFIX, volinfo->volname, \
+ GLUSTERD_BRICK_INFO_DIR); \
+ }
+
+#define GLUSTERD_GET_NFS_DIR(path, priv) \
+ snprintf (path, PATH_MAX, "%s/nfs", priv->workdir);
+
+#define GLUSTERD_REMOVE_SLASH_FROM_PATH(path,string) do { \
+ int i = 0; \
+ for (i = 1; i < strlen (path); i++) { \
+ string[i-1] = path[i]; \
+ if (string[i-1] == '/') \
+ string[i-1] = '-'; \
+ } \
+ } while (0)
+
+#define GLUSTERD_GET_BRICK_PIDFILE(pidfile,volinfo,brickinfo, priv) do { \
+ char exp_path[PATH_MAX] = {0,}; \
+ char volpath[PATH_MAX] = {0,}; \
+ GLUSTERD_GET_VOLUME_DIR (volpath, volinfo, priv); \
+ GLUSTERD_REMOVE_SLASH_FROM_PATH (brickinfo->path, exp_path); \
+ snprintf (pidfile, PATH_MAX, "%s/run/%s-%s.pid", \
+ volpath, brickinfo->hostname, exp_path); \
+ } while (0)
+
+#define GLUSTERD_GET_NFS_PIDFILE(pidfile,nfspath) { \
+ snprintf (pidfile, PATH_MAX, "%s/run/nfs.pid", \
+ nfspath); \
+ }
+
+#define GLUSTERD_STACK_DESTROY(frame) do {\
+ frame->local = NULL; \
+ STACK_DESTROY (frame->root); \
+ } while (0)
+
+#define GLUSTERD_GET_DEFRAG_DIR(path, volinfo, priv) do { \
+ char vol_path[PATH_MAX]; \
+ GLUSTERD_GET_VOLUME_DIR(vol_path, volinfo, priv); \
+ snprintf (path, PATH_MAX, "%s/rebalance",vol_path); \
+ } while (0)
+
+#define GLUSTERD_GET_DEFRAG_SOCK_FILE(path, volinfo, priv) do { \
+ char defrag_path[PATH_MAX]; \
+ GLUSTERD_GET_DEFRAG_DIR(defrag_path, volinfo, priv); \
+ snprintf (path, PATH_MAX, "%s/%s.sock", defrag_path, \
+ uuid_utoa(MY_UUID)); \
+ } while (0)
+
+#define GLUSTERD_GET_DEFRAG_PID_FILE(path, volinfo, priv) do { \
+ char defrag_path[PATH_MAX]; \
+ GLUSTERD_GET_DEFRAG_DIR(defrag_path, volinfo, priv); \
+ snprintf (path, PATH_MAX, "%s/%s.pid", defrag_path, \
+ uuid_utoa(MY_UUID)); \
+ } while (0)
+
+#define GLUSTERD_GET_UUID_NOHYPHEN(ret_string, uuid) do { \
+ char *snap_volname_ptr = ret_string; \
+ char *snap_volid_ptr = uuid_utoa(uuid); \
+ while (*snap_volid_ptr) { \
+ if (*snap_volid_ptr == '-') { \
+ snap_volid_ptr++; \
+ } else { \
+ (*snap_volname_ptr++) = \
+ (*snap_volid_ptr++); \
+ } \
+ } \
+ *snap_volname_ptr = '\0'; \
+ } while (0)
+
+int glusterd_uuid_init();
+
+int glusterd_uuid_generate_save ();
+
+#define MY_UUID (__glusterd_uuid())
+
+static inline unsigned char *
+__glusterd_uuid()
+{
+ glusterd_conf_t *priv = THIS->private;
+
+ if (uuid_is_null (priv->uuid))
+ glusterd_uuid_init();
+ return &priv->uuid[0];
+}
+
+int glusterd_big_locked_notify (struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event,
+ void *data, rpc_clnt_notify_t notify_fn);
+
+int
+glusterd_big_locked_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe, fop_cbk_fn_t fn);
+
+int glusterd_big_locked_handler (rpcsvc_request_t *req, rpcsvc_actor actor_fn);
+
+int32_t
+glusterd_brick_from_brickinfo (glusterd_brickinfo_t *brickinfo,
+ char **new_brick);
+int
+glusterd_probe_begin (rpcsvc_request_t *req, const char *hoststr, int port,
+ dict_t *dict);
+
+int
+glusterd_xfer_friend_add_resp (rpcsvc_request_t *req, char *myhostname,
+ char *remote_hostname, int port, int32_t op_ret,
+ int32_t op_errno);
+
+int
+glusterd_friend_find (uuid_t uuid, char *hostname,
+ glusterd_peerinfo_t **peerinfo);
+
+int
+glusterd_friend_add (const char *hoststr, int port,
+ glusterd_friend_sm_state_t state,
+ uuid_t *uuid, glusterd_peerinfo_t **friend,
+ gf_boolean_t restore, glusterd_peerctx_args_t *args);
+
+int
+glusterd_friend_rpc_create (xlator_t *this, glusterd_peerinfo_t *peerinfo,
+ glusterd_peerctx_args_t *args);
+int
+glusterd_friend_remove (uuid_t uuid, char *hostname);
+
+int
+glusterd_op_lock_send_resp (rpcsvc_request_t *req, int32_t status);
+
+int
+glusterd_op_unlock_send_resp (rpcsvc_request_t *req, int32_t status);
+
+int
+glusterd_op_mgmt_v3_lock_send_resp (rpcsvc_request_t *req,
+ uuid_t *txn_id, int32_t status);
+
+int
+glusterd_op_mgmt_v3_unlock_send_resp (rpcsvc_request_t *req,
+ uuid_t *txn_id, int32_t status);
+
+int
+glusterd_op_stage_send_resp (rpcsvc_request_t *req,
+ int32_t op, int32_t status,
+ char *op_errstr, dict_t *rsp_dict);
+
+int
+glusterd_op_commmit_send_resp (rpcsvc_request_t *req,
+ int32_t op, int32_t status);
+
+int32_t
+glusterd_create_volume (rpcsvc_request_t *req, dict_t *dict);
+
+int
+glusterd_handle_incoming_friend_req (rpcsvc_request_t *req);
+
+int
+glusterd_handle_probe_query (rpcsvc_request_t *req);
+
+int
+glusterd_handle_cluster_lock (rpcsvc_request_t *req);
+
+int
+glusterd_handle_cluster_unlock (rpcsvc_request_t *req);
+
+int
+glusterd_handle_stage_op (rpcsvc_request_t *req);
+
+int
+glusterd_handle_commit_op (rpcsvc_request_t *req);
+
+int
+glusterd_handle_cli_probe (rpcsvc_request_t *req);
+
+int
+glusterd_handle_create_volume (rpcsvc_request_t *req);
+
+int
+glusterd_handle_defrag_volume (rpcsvc_request_t *req);
+
+int
+glusterd_handle_defrag_volume_v2 (rpcsvc_request_t *req);
+
+int
+glusterd_xfer_cli_probe_resp (rpcsvc_request_t *req, int32_t op_ret,
+ int32_t op_errno, char *op_errstr, char *hostname,
+ int port, dict_t *dict);
+
+int
+glusterd_op_commit_send_resp (rpcsvc_request_t *req,
+ int32_t op, int32_t status, char *op_errstr,
+ dict_t *rsp_dict);
+
+int
+glusterd_xfer_friend_remove_resp (rpcsvc_request_t *req, char *hostname, int port);
+
+int
+glusterd_deprobe_begin (rpcsvc_request_t *req, const char *hoststr, int port,
+ uuid_t uuid, dict_t *dict);
+
+int
+glusterd_handle_cli_deprobe (rpcsvc_request_t *req);
+
+int
+glusterd_handle_incoming_unfriend_req (rpcsvc_request_t *req);
+
+int32_t
+glusterd_list_friends (rpcsvc_request_t *req, dict_t *dict, int32_t flags);
+
+int
+glusterd_handle_cli_list_friends (rpcsvc_request_t *req);
+
+int
+glusterd_handle_cli_start_volume (rpcsvc_request_t *req);
+
+int
+glusterd_handle_friend_update (rpcsvc_request_t *req);
+
+int
+glusterd_handle_cli_stop_volume (rpcsvc_request_t *req);
+
+int
+glusterd_handle_cli_delete_volume (rpcsvc_request_t *req);
+
+int
+glusterd_handle_cli_get_volume (rpcsvc_request_t *req);
+
+int32_t
+glusterd_get_volumes (rpcsvc_request_t *req, dict_t *dict, int32_t flags);
+
+int
+glusterd_handle_add_brick (rpcsvc_request_t *req);
+
+int
+glusterd_handle_replace_brick (rpcsvc_request_t *req);
+
+int
+glusterd_handle_remove_brick (rpcsvc_request_t *req);
+
+int
+glusterd_handle_log_rotate (rpcsvc_request_t *req);
+
+int
+glusterd_handle_sync_volume (rpcsvc_request_t *req);
+
+int32_t
+glusterd_log_filename (rpcsvc_request_t *req, dict_t *dict);
+
+int32_t
+glusterd_log_rotate (rpcsvc_request_t *req, dict_t *dict);
+
+int32_t
+glusterd_remove_brick (rpcsvc_request_t *req, dict_t *dict);
+
+int32_t
+glusterd_set_volume (rpcsvc_request_t *req, dict_t *dict);
+
+int32_t
+glusterd_reset_volume (rpcsvc_request_t *req, dict_t *dict);
+
+int32_t
+glusterd_gsync_set (rpcsvc_request_t *req, dict_t *dict);
+
+int32_t
+glusterd_quota (rpcsvc_request_t *req, dict_t *dict);
+
+int
+glusterd_handle_set_volume (rpcsvc_request_t *req);
+
+int
+glusterd_handle_reset_volume (rpcsvc_request_t *req);
+
+int
+glusterd_handle_copy_file (rpcsvc_request_t *req);
+
+int
+glusterd_handle_sys_exec (rpcsvc_request_t *req);
+
+int
+glusterd_handle_gsync_set (rpcsvc_request_t *req);
+
+int
+glusterd_handle_quota (rpcsvc_request_t *req);
+
+int
+glusterd_handle_fsm_log (rpcsvc_request_t *req);
+
+int
+glusterd_xfer_cli_deprobe_resp (rpcsvc_request_t *req, int32_t op_ret,
+ int32_t op_errno, char *op_errstr,
+ char *hostname, dict_t *dict);
+
+int
+glusterd_fetchspec_notify (xlator_t *this);
+
+int
+glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo,
+ dict_t *volumes, int count);
+
+int
+glusterd_restart_bricks (glusterd_conf_t *conf);
+
+int32_t
+glusterd_volume_txn (rpcsvc_request_t *req, char *volname, int flags,
+ glusterd_op_t op);
+
+int
+glusterd_peer_dump_version (xlator_t *this, struct rpc_clnt *rpc,
+ glusterd_peerctx_t *peerctx);
+
+int
+glusterd_validate_reconfopts (glusterd_volinfo_t *volinfo, dict_t *val_dict, char **op_errstr);
+int
+glusterd_handle_cli_profile_volume (rpcsvc_request_t *req);
+
+int
+glusterd_handle_getwd (rpcsvc_request_t *req);
+
+int32_t
+glusterd_set_volume (rpcsvc_request_t *req, dict_t *dict);
+int
+glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event,
+ void *data);
+int
+glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event, void *data);
+
+int
+glusterd_nodesvc_rpc_notify (struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event, void *data);
+
+int
+glusterd_rpc_create (struct rpc_clnt **rpc, dict_t *options,
+ rpc_clnt_notify_t notify_fn, void *notify_data);
+
+
+/* handler functions */
+int32_t glusterd_op_begin (rpcsvc_request_t *req, glusterd_op_t op, void *ctx,
+ char *err_str, size_t size);
+
+/* removed other definitions as they have been defined elsewhere in this file*/
+
+int glusterd_handle_cli_statedump_volume (rpcsvc_request_t *req);
+int glusterd_handle_cli_clearlocks_volume (rpcsvc_request_t *req);
+
+int glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,
+ size_t len, int cmd, defrag_cbk_fn_t cbk,
+ glusterd_op_t op);
+int
+glusterd_rebalance_rpc_create (glusterd_volinfo_t *volinfo,
+ glusterd_conf_t *priv, int cmd);
+
+int glusterd_handle_cli_heal_volume (rpcsvc_request_t *req);
+
+int glusterd_handle_cli_list_volume (rpcsvc_request_t *req);
+
+int
+glusterd_handle_snapshot (rpcsvc_request_t *req);
+
+/* op-sm functions */
+int glusterd_op_stage_heal_volume (dict_t *dict, char **op_errstr);
+int glusterd_op_heal_volume (dict_t *dict, char **op_errstr);
+int glusterd_op_stage_gsync_set (dict_t *dict, char **op_errstr);
+int glusterd_op_gsync_set (dict_t *dict, char **op_errstr, dict_t *rsp_dict);
+int glusterd_op_stage_copy_file (dict_t *dict, char **op_errstr);
+int glusterd_op_copy_file (dict_t *dict, char **op_errstr);
+int glusterd_op_stage_sys_exec (dict_t *dict, char **op_errstr);
+int glusterd_op_sys_exec (dict_t *dict, char **op_errstr, dict_t *rsp_dict);
+int glusterd_op_stage_gsync_create (dict_t *dict, char **op_errstr);
+int glusterd_op_gsync_create (dict_t *dict, char **op_errstr, dict_t *rsp_dict);
+int glusterd_op_quota (dict_t *dict, char **op_errstr, dict_t *rsp_dict);
+int glusterd_op_stage_quota (dict_t *dict, char **op_errstr);
+int glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict);
+int glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict);
+int glusterd_op_log_rotate (dict_t *dict);
+int glusterd_op_stage_log_rotate (dict_t *dict, char **op_errstr);
+int glusterd_op_stage_create_volume (dict_t *dict, char **op_errstr);
+int glusterd_op_stage_start_volume (dict_t *dict, char **op_errstr);
+int glusterd_op_stage_stop_volume (dict_t *dict, char **op_errstr);
+int glusterd_op_stage_delete_volume (dict_t *dict, char **op_errstr);
+int glusterd_op_create_volume (dict_t *dict, char **op_errstr);
+int glusterd_op_start_volume (dict_t *dict, char **op_errstr);
+int glusterd_op_stop_volume (dict_t *dict);
+int glusterd_op_delete_volume (dict_t *dict);
+
+int glusterd_op_add_brick (dict_t *dict, char **op_errstr);
+int glusterd_op_remove_brick (dict_t *dict, char **op_errstr);
+int glusterd_op_stage_add_brick (dict_t *dict, char **op_errstr);
+int glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr);
+
+int glusterd_op_stage_rebalance (dict_t *dict, char **op_errstr);
+int glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict);
+
+int glusterd_op_stage_statedump_volume (dict_t *dict, char **op_errstr);
+int glusterd_op_statedump_volume (dict_t *dict, char **op_errstr);
+
+int glusterd_op_stage_clearlocks_volume (dict_t *dict, char **op_errstr);
+int glusterd_op_clearlocks_volume (dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict);
+
+/* misc */
+void glusterd_do_replace_brick (void *data);
+int glusterd_op_perform_remove_brick (glusterd_volinfo_t *volinfo, char *brick,
+ int force, int *need_migrate);
+int glusterd_op_stop_volume_args_get (dict_t *dict, char** volname, int *flags);
+int glusterd_op_statedump_volume_args_get (dict_t *dict, char **volname,
+ char **options, int *option_cnt);
+
+int glusterd_op_gsync_args_get (dict_t *dict, char **op_errstr,
+ char **master, char **slave, char **host_uuid);
+int glusterd_stop_volume (glusterd_volinfo_t *volinfo);
+
+/* Synctask part */
+int32_t glusterd_op_begin_synctask (rpcsvc_request_t *req, glusterd_op_t op,
+ void *dict);
+int32_t
+glusterd_defrag_event_notify_handle (dict_t *dict);
+
+int32_t
+glusterd_txn_opinfo_dict_init ();
+
+void
+glusterd_txn_opinfo_dict_fini ();
+
+void
+glusterd_txn_opinfo_init ();
+
+/* snapshot */
+glusterd_snap_t*
+glusterd_new_snap_object();
+
+int32_t
+glusterd_list_add_snapvol (glusterd_volinfo_t *origin_vol,
+ glusterd_volinfo_t *snap_vol);
+
+glusterd_snap_t*
+glusterd_remove_snap_by_id (uuid_t snap_id);
+
+glusterd_snap_t*
+glusterd_remove_snap_by_name (char *snap_name);
+
+glusterd_snap_t*
+glusterd_find_snap_by_name (char *snap_name);
+
+glusterd_snap_t*
+glusterd_find_snap_by_id (uuid_t snap_id);
+
+int
+glusterd_snapshot_prevalidate (dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict);
+int
+glusterd_snapshot_brickop (dict_t *dict, char **op_errstr, dict_t *rsp_dict);
+int
+glusterd_snapshot (dict_t *dict, char **op_errstr, dict_t *rsp_dict);
+int
+glusterd_snapshot_postvalidate (dict_t *dict, int32_t op_ret, char **op_errstr,
+ dict_t *rsp_dict);
+char *
+glusterd_build_snap_device_path (char *device, char *snapname);
+int32_t
+glusterd_snap_remove (dict_t *rsp_dict, glusterd_snap_t *snap,
+ gf_boolean_t remove_lvm, gf_boolean_t force);
+int32_t
+glusterd_snapshot_cleanup (dict_t *dict, char **op_errstr, dict_t *rsp_dict);
+
+int32_t
+glusterd_add_missed_snaps_to_list (dict_t *dict, int32_t missed_snap_count);
+
+int32_t
+glusterd_store_missed_snaps_list (char *missed_info, int32_t brick_num,
+ char *brick_path, int32_t snap_op,
+ int32_t snap_status);
+
+#endif
diff --git a/xlators/mount/fuse/src/Makefile.am b/xlators/mount/fuse/src/Makefile.am
index e85d63887..653121d18 100644
--- a/xlators/mount/fuse/src/Makefile.am
+++ b/xlators/mount/fuse/src/Makefile.am
@@ -1,17 +1,36 @@
-noinst_HEADERS = $(CONTRIBDIR)/fuse-include/fuse_kernel.h\
- $(CONTRIBDIR)/fuse-include/fuse-mount.h\
- $(CONTRIBDIR)/fuse-include/fuse-misc.h
+noinst_HEADERS_linux = $(CONTRIBDIR)/fuse-include/fuse_kernel.h\
+ $(CONTRIBDIR)/fuse-include/mount_util.h\
+ $(CONTRIBDIR)/fuse-lib/mount-gluster-compat.h
+noinst_HEADERS_darwin = $(CONTRIBDIR)/fuse-include/fuse_kernel_macfuse.h
+noinst_HEADERS_common = $(CONTRIBDIR)/fuse-include/fuse-mount.h\
+ $(CONTRIBDIR)/fuse-include/fuse-misc.h fuse-mem-types.h \
+ fuse-bridge.h
+
+if GF_DARWIN_HOST_OS
+ noinst_HEADERS = $(noinst_HEADERS_common) $(noinst_HEADERS_darwin)
+else
+ noinst_HEADERS = $(noinst_HEADERS_common) $(noinst_HEADERS_linux)
+endif
xlator_LTLIBRARIES = fuse.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/mount
-fuse_la_SOURCES = fuse-bridge.c $(CONTRIBDIR)/fuse-lib/misc.c \
- $(CONTRIBDIR)/fuse-lib/mount.c
-fuse_la_LDFLAGS = -module -avoidversion -shared -nostartfiles
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D$(GF_HOST_OS) -Wall \
- -I$(top_srcdir)/libglusterfs/src -I$(CONTRIBDIR)/fuse-include \
- $(GF_CFLAGS) -DFUSERMOUNT_DIR=\"$(bindir)\"
+if GF_DARWIN_HOST_OS
+ mount_source=$(CONTRIBDIR)/macfuse/mount_darwin.c
+else
+ mount_source=$(CONTRIBDIR)/fuse-lib/mount.c $(CONTRIBDIR)/fuse-lib/mount-common.c
+endif
+
+fuse_la_SOURCES = fuse-helpers.c fuse-resolve.c fuse-bridge.c \
+ $(CONTRIBDIR)/fuse-lib/misc.c $(mount_source)
+fuse_la_LDFLAGS = -module -avoid-version
+fuse_la_LIBADD = @GF_FUSE_LDADD@
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) \
+ -I$(top_srcdir)/libglusterfs/src -I$(CONTRIBDIR)/fuse-include \
+ -I$(CONTRIBDIR)/fuse-lib $(GF_FUSE_CFLAGS)
-CLEANFILES =
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+CLEANFILES =
diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
index dcb4e2950..6a5587c2d 100644
--- a/xlators/mount/fuse/src/fuse-bridge.c
+++ b/xlators/mount/fuse/src/fuse-bridge.c
@@ -1,245 +1,164 @@
/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
+#include <sys/wait.h>
+#include "fuse-bridge.h"
+#include "mount-gluster-compat.h"
+#include "glusterfs.h"
+#include "glusterfs-acl.h"
+
+#ifdef __NetBSD__
+#undef open /* in perfuse.h, pulled from mount-gluster-compat.h */
+#endif
+
+static int gf_fuse_conn_err_log;
+static int gf_fuse_xattr_enotsup_log;
+
+void fini (xlator_t *this_xl);
+
+static void fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino);
+
/*
- * TODO:
- * Need to free_state() when fuse_reply_err() + return.
- * Check loc->path for "" after fuse_loc_fill in all fops
- * (now being done in getattr, lookup) or better - make
- * fuse_loc_fill() and inode_path() return success/failure.
+ * Send an invalidate notification up to fuse to purge the file from local
+ * page cache.
*/
+static int32_t
+fuse_invalidate(xlator_t *this, inode_t *inode)
+{
+ fuse_private_t *priv = this->private;
+ uint64_t nodeid;
-#include <stdint.h>
-#include <signal.h>
-#include <pthread.h>
-#include <stddef.h>
-#include <dirent.h>
-#include <sys/mount.h>
+ /*
+ * NOTE: We only invalidate at the moment if fopen_keep_cache is
+ * enabled because otherwise this is a departure from default
+ * behavior. Specifically, the performance/write-behind xlator
+ * causes unconditional invalidations on write requests.
+ */
+ if (!priv->fopen_keep_cache)
+ return 0;
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif /* _CONFIG_H */
+ nodeid = inode_to_fuse_nodeid(inode);
+ gf_log(this->name, GF_LOG_DEBUG, "Invalidate inode id %lu.", nodeid);
+ fuse_log_eh (this, "Sending invalidate inode id: %lu gfid: %s", nodeid,
+ uuid_utoa (inode->gfid));
+ fuse_invalidate_inode(this, nodeid);
-#include "glusterfs.h"
-#include "logging.h"
-#include "xlator.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "statedump.h"
-
-#include "fuse_kernel.h"
-#include "fuse-misc.h"
-#include "fuse-mount.h"
-
-#include "list.h"
-#include "dict.h"
-
-#include "compat.h"
-#include "compat-errno.h"
-
-/* TODO: when supporting posix acl, remove this definition */
-#define DISABLE_POSIX_ACL
-
-#define ZR_MOUNTPOINT_OPT "mountpoint"
-#define ZR_DIRECT_IO_OPT "direct-io-mode"
-#define ZR_STRICT_VOLFILE_CHECK "strict-volfile-check"
-
-#define FUSE_712_OP_HIGH (FUSE_POLL + 1)
-#define GLUSTERFS_XATTR_LEN_MAX 65536
-
-typedef struct fuse_in_header fuse_in_header_t;
-typedef void (fuse_handler_t) (xlator_t *this, fuse_in_header_t *finh,
- void *msg);
-
-struct fuse_private {
- int fd;
- uint32_t proto_minor;
- char *volfile;
- size_t volfile_size;
- char *mount_point;
- struct iobuf *iobuf;
- pthread_t fuse_thread;
- char fuse_thread_started;
- uint32_t direct_io_mode;
- size_t *msg0_len_p;
- double entry_timeout;
- double attribute_timeout;
- pthread_cond_t first_call_cond;
- pthread_mutex_t first_call_mutex;
- char first_call;
- gf_boolean_t strict_volfile_check;
-};
-typedef struct fuse_private fuse_private_t;
-
-#define _FH_TO_FD(fh) ((fd_t *)(uintptr_t)(fh))
-
-#define FH_TO_FD(fh) ((_FH_TO_FD (fh))?(fd_ref (_FH_TO_FD (fh))):((fd_t *) 0))
-
-#define FUSE_FOP(state, ret, op_num, fop, args ...) \
- do { \
- call_frame_t *frame = NULL; \
- xlator_t *xl = NULL; \
- \
- frame = get_call_frame_for_req (state); \
- if (!frame) { \
- /* This is not completely clean, as some \
- * earlier allocations might remain unfreed \
- * if we return at this point, but still \
- * better than trying to go on with a NULL \
- * frame ... \
- */ \
- gf_log ("glusterfs-fuse", \
- GF_LOG_ERROR, \
- "FUSE message" \
- " unique %"PRIu64" opcode %d:" \
- " frame allocation failed", \
- state->finh->unique, \
- state->finh->opcode); \
- free_state (state); \
- return; \
- } \
- xl = frame->this->children ? \
- frame->this->children->xlator : NULL; \
- frame->root->state = state; \
- frame->root->op = op_num; \
- STACK_WIND (frame, ret, xl, xl->fops->fop, args); \
- } while (0)
-
-#define GF_SELECT_LOG_LEVEL(_errno) \
- (((_errno == ENOENT) || (_errno == ESTALE))? \
- GF_LOG_DEBUG)
-
-#define GET_STATE(this, finh, state) \
- do { \
- state = get_state (this, finh); \
- if (!state) { \
- gf_log ("glusterfs-fuse", \
- GF_LOG_ERROR, \
- "FUSE message unique %"PRIu64" opcode %d:" \
- " state allocation failed", \
- finh->unique, finh->opcode); \
- \
- send_fuse_err (this, finh, ENOMEM); \
- FREE (finh); \
- \
- return; \
- } \
- } while (0)
-
-
-typedef struct {
- void *pool;
- xlator_t *this;
- inode_table_t *itable;
- loc_t loc;
- loc_t loc2;
- fuse_in_header_t *finh;
- int32_t flags;
- off_t off;
- size_t size;
- unsigned long nlookup;
- fd_t *fd;
- dict_t *dict;
- char *name;
- char is_revalidate;
-} fuse_state_t;
+ return 0;
+}
+static int32_t
+fuse_forget_cbk (xlator_t *this, inode_t *inode)
+{
+ //Nothing to free in inode ctx, hence return.
+ return 0;
+}
-static void
-free_state (fuse_state_t *state)
+void
+fuse_inode_set_need_lookup (inode_t *inode, xlator_t *this)
{
- loc_wipe (&state->loc);
+ uint64_t need_lookup = 1;
- loc_wipe (&state->loc2);
+ if (!inode || !this)
+ return;
- if (state->dict) {
- dict_unref (state->dict);
- state->dict = (void *)0xaaaaeeee;
- }
- if (state->name) {
- FREE (state->name);
- state->name = NULL;
- }
- if (state->fd) {
- fd_unref (state->fd);
- state->fd = (void *)0xfdfdfdfd;
- }
- if (state->finh) {
- FREE (state->finh);
- state->finh = NULL;
- }
-#ifdef DEBUG
- memset (state, 0x90, sizeof (*state));
-#endif
- FREE (state);
- state = NULL;
+ inode_ctx_set (inode, this, &need_lookup);
+
+ return;
}
-fuse_state_t *
-get_state (xlator_t *this, fuse_in_header_t *finh)
+gf_boolean_t
+fuse_inode_needs_lookup (inode_t *inode, xlator_t *this)
{
- fuse_state_t *state = NULL;
+ uint64_t need_lookup = 0;
+ gf_boolean_t ret = _gf_false;
- state = (void *)calloc (1, sizeof (*state));
- if (!state)
- return NULL;
- state->pool = this->ctx->pool;
- state->itable = this->itable;
- state->finh = finh;
- state->this = this;
+ if (!inode || !this)
+ return ret;
- return state;
+ inode_ctx_get (inode, this, &need_lookup);
+ if (need_lookup)
+ ret = _gf_true;
+ need_lookup = 0;
+ inode_ctx_set (inode, this, &need_lookup);
+
+ return ret;
}
-static call_frame_t *
-get_call_frame_for_req (fuse_state_t *state)
+fuse_fd_ctx_t *
+__fuse_fd_ctx_check_n_create (xlator_t *this, fd_t *fd)
{
- call_pool_t *pool = NULL;
- fuse_in_header_t *finh = NULL;
- call_frame_t *frame = NULL;
- xlator_t *this = NULL;
- fuse_private_t *priv = NULL;
+ uint64_t val = 0;
+ int32_t ret = 0;
+ fuse_fd_ctx_t *fd_ctx = NULL;
- pool = state->pool;
- finh = state->finh;
- this = state->this;
- priv = this->private;
+ ret = __fd_ctx_get (fd, this, &val);
- frame = create_frame (this, pool);
- if (!frame)
- return NULL;
+ fd_ctx = (fuse_fd_ctx_t *)(unsigned long) val;
- if (finh) {
- frame->root->uid = finh->uid;
- frame->root->gid = finh->gid;
- frame->root->pid = finh->pid;
- frame->root->unique = finh->unique;
+ if (fd_ctx == NULL) {
+ fd_ctx = GF_CALLOC (1, sizeof (*fd_ctx),
+ gf_fuse_mt_fd_ctx_t);
+ if (!fd_ctx) {
+ goto out;
+ }
+ ret = __fd_ctx_set (fd, this,
+ (uint64_t)(unsigned long)fd_ctx);
+ if (ret < 0) {
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "fd-ctx-set failed");
+ GF_FREE (fd_ctx);
+ fd_ctx = NULL;
+ }
}
+out:
+ return fd_ctx;
+}
- frame->root->type = GF_OP_TYPE_FOP_REQUEST;
+fuse_fd_ctx_t *
+fuse_fd_ctx_check_n_create (xlator_t *this, fd_t *fd)
+{
+ fuse_fd_ctx_t *fd_ctx = NULL;
+
+ if ((fd == NULL) || (this == NULL)) {
+ goto out;
+ }
+
+ LOCK (&fd->lock);
+ {
+ fd_ctx = __fuse_fd_ctx_check_n_create (this, fd);
+ }
+ UNLOCK (&fd->lock);
- return frame;
+out:
+ return fd_ctx;
}
+fuse_fd_ctx_t *
+fuse_fd_ctx_get (xlator_t *this, fd_t *fd)
+{
+ fuse_fd_ctx_t *fdctx = NULL;
+ uint64_t value = 0;
+ int ret = 0;
+
+ ret = fd_ctx_get (fd, this, &value);
+ if (ret < 0) {
+ goto out;
+ }
+
+ fdctx = (fuse_fd_ctx_t *) (unsigned long)value;
+
+out:
+ return fdctx;
+}
/*
* iov_out should contain a fuse_out_header at zeroth position.
@@ -253,6 +172,10 @@ send_fuse_iov (xlator_t *this, fuse_in_header_t *finh, struct iovec *iov_out,
struct fuse_out_header *fouh = NULL;
int res, i;
+ if (!this || !finh || !iov_out) {
+ gf_log ("send_fuse_iov", GF_LOG_ERROR,"Invalid arguments");
+ return EINVAL;
+ }
priv = this->private;
fouh = iov_out[0].iov_base;
@@ -268,6 +191,22 @@ send_fuse_iov (xlator_t *this, fuse_in_header_t *finh, struct iovec *iov_out,
return errno;
if (res != fouh->len)
return EINVAL;
+
+ if (priv->fuse_dump_fd != -1) {
+ char w = 'W';
+
+ pthread_mutex_lock (&priv->fuse_dump_mutex);
+ res = write (priv->fuse_dump_fd, &w, 1);
+ if (res != -1)
+ res = writev (priv->fuse_dump_fd, iov_out, count);
+ pthread_mutex_unlock (&priv->fuse_dump_mutex);
+
+ if (res == -1)
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "failed to dump fuse message (W): %s",
+ strerror (errno));
+ }
+
return 0;
}
@@ -288,340 +227,419 @@ send_fuse_data (xlator_t *this, fuse_in_header_t *finh, void *data, size_t size)
#define send_fuse_obj(this, finh, obj) \
send_fuse_data (this, finh, obj, sizeof (*(obj)))
-static int
-send_fuse_err (xlator_t *this, fuse_in_header_t *finh, int error)
-{
- struct fuse_out_header fouh = {0, };
- struct iovec iov_out;
- fouh.error = -error;
- iov_out.iov_base = &fouh;
+static void
+fuse_invalidate_entry (xlator_t *this, uint64_t fuse_ino)
+{
+ struct fuse_out_header *fouh = NULL;
+ struct fuse_notify_inval_entry_out *fnieo = NULL;
+ fuse_private_t *priv = NULL;
+ dentry_t *dentry = NULL;
+ inode_t *inode = NULL;
+ size_t nlen = 0;
+ int rv = 0;
+ char inval_buf[INVAL_BUF_SIZE] = {0,};
+
+ fouh = (struct fuse_out_header *)inval_buf;
+ fnieo = (struct fuse_notify_inval_entry_out *)(fouh + 1);
- return send_fuse_iov (this, finh, &iov_out, 1);
-}
+ priv = this->private;
+ if (priv->revchan_out == -1)
+ return;
+ fouh->unique = 0;
+ fouh->error = FUSE_NOTIFY_INVAL_ENTRY;
-GF_MUST_CHECK static int32_t
-fuse_loc_fill (loc_t *loc, fuse_state_t *state, ino_t ino,
- ino_t par, const char *name)
-{
- inode_t *inode = NULL;
- inode_t *parent = NULL;
- int32_t ret = -1;
- char *path = NULL;
+ inode = fuse_ino_to_inode (fuse_ino, this);
- /* resistance against multiple invocation of loc_fill not to get
- reference leaks via inode_search() */
+ list_for_each_entry (dentry, &inode->dentry_list, inode_list) {
+ nlen = strlen (dentry->name);
+ fouh->len = sizeof (*fouh) + sizeof (*fnieo) + nlen + 1;
+ fnieo->parent = inode_to_fuse_nodeid (dentry->parent);
- inode = loc->inode;
+ fnieo->namelen = nlen;
+ strcpy (inval_buf + sizeof (*fouh) + sizeof (*fnieo), dentry->name);
- if (!inode) {
- if (ino)
- inode = inode_search (state->itable, ino, NULL);
- if (par && name)
- inode = inode_search (state->itable, par, name);
+ rv = write (priv->revchan_out, inval_buf, fouh->len);
+ if (rv != fouh->len) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "kernel notification daemon defunct");
- loc->inode = inode;
- if (inode)
- loc->ino = inode->ino;
- }
+ close (priv->fd);
+ break;
+ }
- parent = loc->parent;
- if (!parent) {
- if (inode)
- parent = inode_parent (inode, par, name);
- else
- parent = inode_search (state->itable, par, NULL);
- loc->parent = parent;
- }
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE, "INVALIDATE entry: "
+ "%"PRIu64"/%s", fnieo->parent, dentry->name);
- if (name && parent) {
- ret = inode_path (parent, name, &path);
- if (ret <= 0) {
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "inode_path failed for %"PRId64"/%s",
- parent->ino, name);
- goto fail;
- } else {
- loc->path = path;
- }
- } else if (inode) {
- ret = inode_path (inode, NULL, &path);
- if (ret <= 0) {
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "inode_path failed for %"PRId64,
- inode->ino);
- goto fail;
+ if (dentry->parent) {
+ fuse_log_eh (this, "Invalidated entry %s (parent: %s)",
+ dentry->name,
+ uuid_utoa (dentry->parent->gfid));
} else {
- loc->path = path;
+ fuse_log_eh (this, "Invalidated entry %s(nodeid: %ld)",
+ dentry->name, fnieo->parent);
}
}
- if (loc->path) {
- loc->name = strrchr (loc->path, '/');
- if (loc->name)
- loc->name++;
- else loc->name = "";
- }
- if ((ino != 1) &&
- (parent == NULL)) {
- gf_log ("fuse-bridge", GF_LOG_DEBUG,
- "failed to search parent for %"PRId64"/%s (%"PRId64")",
- (ino_t)par, name, (ino_t)ino);
- ret = -1;
- goto fail;
- }
- ret = 0;
-fail:
- return ret;
+ if (inode)
+ inode_unref (inode);
}
-
-static int
-need_fresh_lookup (int32_t op_ret, int32_t op_errno,
- loc_t *loc, struct stat *buf)
+/*
+ * Send an inval inode notification to fuse. This causes an invalidation of the
+ * entire page cache mapping on the inode.
+ */
+static void
+fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
{
- if (op_ret == -1) {
- gf_log ("fuse-bridge", GF_LOG_DEBUG,
- "revalidate of %s failed (%s)",
- loc->path, strerror (op_errno));
- return 1;
- }
+ struct fuse_out_header *fouh = NULL;
+ struct fuse_notify_inval_inode_out *fniio = NULL;
+ fuse_private_t *priv = NULL;
+ int rv = 0;
+ char inval_buf[INVAL_BUF_SIZE] = {0};
+ inode_t *inode = NULL;
+
+ fouh = (struct fuse_out_header *) inval_buf;
+ fniio = (struct fuse_notify_inval_inode_out *) (fouh + 1);
+
+ priv = this->private;
+
+ if (priv->revchan_out < 0)
+ return;
+
+ fouh->unique = 0;
+ fouh->error = FUSE_NOTIFY_INVAL_INODE;
+ fouh->len = sizeof(struct fuse_out_header) +
+ sizeof(struct fuse_notify_inval_inode_out);
- if (loc->inode->ino != buf->st_ino) {
- gf_log ("fuse-bridge", GF_LOG_DEBUG,
- "inode num of %s changed %"PRId64" -> %"PRId64,
- loc->path, loc->inode->ino, buf->st_ino);
- return 1;
+ /* inval the entire mapping until we learn how to be more granular */
+ fniio->ino = fuse_ino;
+ fniio->off = 0;
+ fniio->len = -1;
+
+ inode = fuse_ino_to_inode (fuse_ino, this);
+
+ rv = write(priv->revchan_out, inval_buf, fouh->len);
+ if (rv != fouh->len) {
+ gf_log("glusterfs-fuse", GF_LOG_ERROR, "kernel notification "
+ "daemon defunct");
+ close(priv->fd);
}
- if ((loc->inode->st_mode & S_IFMT) ^ (buf->st_mode & S_IFMT)) {
- gf_log ("fuse-bridge", GF_LOG_DEBUG,
- "inode mode of %s changed 0%o -> 0%o",
- loc->path, loc->inode->st_mode, buf->st_mode);
- return 1;
+ gf_log("glusterfs-fuse", GF_LOG_TRACE, "INVALIDATE inode: %lu", fuse_ino);
+
+ if (inode) {
+ fuse_log_eh (this, "Invalidated inode %lu (gfid: %s)",
+ fuse_ino, uuid_utoa (inode->gfid));
+ } else {
+ fuse_log_eh (this, "Invalidated inode %lu ", fuse_ino);
}
- return 0;
+ if (inode)
+ inode_unref (inode);
}
-/* courtesy of folly */
-static void
-stat2attr (struct stat *st, struct fuse_attr *fa)
-{
- fa->ino = st->st_ino;
- fa->size = st->st_size;
- fa->blocks = st->st_blocks;
- fa->atime = st->st_atime;
- fa->mtime = st->st_mtime;
- fa->ctime = st->st_ctime;
- fa->atimensec = ST_ATIM_NSEC (st);
- fa->mtimensec = ST_MTIM_NSEC (st);
- fa->ctimensec = ST_CTIM_NSEC (st);
- fa->mode = st->st_mode;
- fa->nlink = st->st_nlink;
- fa->uid = st->st_uid;
- fa->gid = st->st_gid;
- fa->rdev = st->st_rdev;
- fa->blksize = st->st_blksize;
-}
+int
+send_fuse_err (xlator_t *this, fuse_in_header_t *finh, int error)
+{
+ struct fuse_out_header fouh = {0, };
+ struct iovec iov_out;
+ inode_t *inode = NULL;
-static int
-fuse_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct stat *stat, dict_t *dict);
+ fouh.error = -error;
+ iov_out.iov_base = &fouh;
+
+ inode = fuse_ino_to_inode (finh->nodeid, this);
+
+ // filter out ENOENT
+ if (error != ENOENT) {
+ if (inode) {
+ fuse_log_eh (this,"Sending %s for operation %d on "
+ "inode %s", strerror (error), finh->opcode,
+ uuid_utoa (inode->gfid));
+ } else {
+ fuse_log_eh (this, "Sending %s for operation %d on "
+ "inode %ld", strerror (error),
+ finh->opcode, finh->nodeid);
+ }
+ }
+
+ if (inode)
+ inode_unref (inode);
+
+ return send_fuse_iov (this, finh, &iov_out, 1);
+}
static int
fuse_entry_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct stat *buf)
+ inode_t *inode, struct iatt *buf, dict_t *xdata)
{
- fuse_state_t *state = NULL;
- fuse_in_header_t *finh = NULL;
- struct fuse_entry_out feo = {0, };
- struct fuse_attr_out *fao = NULL;
- fuse_private_t *priv = NULL;
+ fuse_state_t *state = NULL;
+ fuse_in_header_t *finh = NULL;
+ struct fuse_entry_out feo = {0, };
+ fuse_private_t *priv = NULL;
+ inode_t *linked_inode = NULL;
priv = this->private;
state = frame->root->state;
finh = state->finh;
- if (!op_ret && state->loc.ino == 1) {
- buf->st_ino = 1;
+ if (op_ret == 0) {
+ if (__is_root_gfid (state->loc.inode->gfid))
+ buf->ia_ino = 1;
+ if (uuid_is_null (buf->ia_gfid)) {
+ /* With a NULL gfid inode linking is
+ not possible. Let's not pretend this
+ call was a "success".
+ */
+ gf_log ("glusterfs-fuse", GF_LOG_WARNING,
+ "Received NULL gfid for %s. Forcing EIO",
+ state->loc.path);
+ op_ret = -1;
+ op_errno = EIO;
+ }
}
- if (state->is_revalidate == 1
- && need_fresh_lookup (op_ret, op_errno, &state->loc, buf)) {
- inode_unref (state->loc.inode);
- state->loc.inode = inode_new (state->itable);
- state->is_revalidate = 2;
-
- STACK_WIND (frame, fuse_lookup_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->lookup,
- &state->loc, state->dict);
-
- return 0;
- }
+ /* log into the event-history after the null uuid check is done, since
+ * the op_ret and op_errno are being changed if the gfid is NULL.
+ */
+ fuse_log_eh (this, "op_ret: %d op_errno: %d "
+ "%"PRIu64": %s() %s => %s", op_ret, op_errno,
+ frame->root->unique, gf_fop_list[frame->root->op],
+ state->loc.path, (op_ret == 0)?
+ uuid_utoa(buf->ia_gfid):uuid_utoa(state->loc.gfid));
if (op_ret == 0) {
gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": %s() %s => %"PRId64" (%"PRId64")",
+ "%"PRIu64": %s() %s => %"PRIu64,
frame->root->unique, gf_fop_list[frame->root->op],
- state->loc.path, buf->st_ino, state->loc.ino);
-
- inode_link (inode, state->loc.parent, state->loc.name, buf);
+ state->loc.path, buf->ia_ino);
- inode_lookup (inode);
+ buf->ia_blksize = this->ctx->page_size;
+ gf_fuse_stat2attr (buf, &feo.attr, priv->enable_ino32);
- buf->st_blksize = this->ctx->page_size;
- stat2attr (buf, &feo.attr);
-
- if (!inode->ino || !buf->st_ino) {
+ if (!buf->ia_ino) {
gf_log ("glusterfs-fuse", GF_LOG_WARNING,
"%"PRIu64": %s() %s returning inode 0",
frame->root->unique,
gf_fop_list[frame->root->op], state->loc.path);
}
- if (state->loc.parent) {
- /* TODO: make these timeouts configurable (via meta?) */
- feo.nodeid = inode->ino;
+ linked_inode = inode_link (inode, state->loc.parent,
+ state->loc.name, buf);
-#ifdef GF_DARWIN_HOST_OS
- feo.generation = 0;
-#else
- feo.generation = buf->st_ctime;
-#endif
+ if (linked_inode != inode) {
+ }
- feo.entry_valid =
- calc_timeout_sec (priv->entry_timeout);
- feo.entry_valid_nsec =
- calc_timeout_nsec (priv->entry_timeout);
- feo.attr_valid =
- calc_timeout_sec (priv->attribute_timeout);
- feo.attr_valid_nsec =
- calc_timeout_nsec (priv->attribute_timeout);
+ inode_lookup (linked_inode);
- priv->proto_minor >= 9 ?
- send_fuse_obj (this, finh, &feo) :
- send_fuse_data (this, finh, &feo,
- FUSE_COMPAT_ENTRY_OUT_SIZE);
- } else {
- /* Refurbish the entry_out as attr_out. Too hacky?... */
- fao = (struct fuse_attr_out *)
- ((char *)&feo.attr -
- offsetof (struct fuse_attr_out, attr));
+ feo.nodeid = inode_to_fuse_nodeid (linked_inode);
- fao->attr_valid =
- calc_timeout_sec (priv->attribute_timeout);
- fao->attr_valid_nsec =
- calc_timeout_nsec (priv->attribute_timeout);
+ inode_unref (linked_inode);
- priv->proto_minor >= 9 ?
- send_fuse_obj (this, finh, fao) :
- send_fuse_data (this, finh, fao,
- FUSE_COMPAT_ATTR_OUT_SIZE);
- }
+ feo.entry_valid =
+ calc_timeout_sec (priv->entry_timeout);
+ feo.entry_valid_nsec =
+ calc_timeout_nsec (priv->entry_timeout);
+ feo.attr_valid =
+ calc_timeout_sec (priv->attribute_timeout);
+ feo.attr_valid_nsec =
+ calc_timeout_nsec (priv->attribute_timeout);
+
+#if FUSE_KERNEL_MINOR_VERSION >= 9
+ priv->proto_minor >= 9 ?
+ send_fuse_obj (this, finh, &feo) :
+ send_fuse_data (this, finh, &feo,
+ FUSE_COMPAT_ENTRY_OUT_SIZE);
+#else
+ send_fuse_obj (this, finh, &feo);
+#endif
} else {
gf_log ("glusterfs-fuse",
(op_errno == ENOENT ? GF_LOG_TRACE : GF_LOG_WARNING),
"%"PRIu64": %s() %s => -1 (%s)", frame->root->unique,
gf_fop_list[frame->root->op], state->loc.path,
strerror (op_errno));
- send_fuse_err (this, state->finh, op_errno);
+
+ if ((op_errno == ENOENT) && (priv->negative_timeout != 0)) {
+ feo.entry_valid =
+ calc_timeout_sec (priv->negative_timeout);
+ feo.entry_valid_nsec =
+ calc_timeout_nsec (priv->negative_timeout);
+ send_fuse_obj (this, finh, &feo);
+ } else {
+ send_fuse_err (this, state->finh, op_errno);
+ }
}
- free_state (state);
+ free_fuse_state (state);
STACK_DESTROY (frame->root);
return 0;
}
-
static int
-fuse_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct stat *stat, dict_t *dict)
+fuse_newentry_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- fuse_entry_cbk (frame, cookie, this, op_ret, op_errno, inode, stat);
+ fuse_entry_cbk (frame, cookie, this, op_ret, op_errno, inode, buf,
+ xdata);
return 0;
}
-
-static void
-fuse_lookup (xlator_t *this, fuse_in_header_t *finh, void *msg)
+static int
+fuse_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *stat, dict_t *dict,
+ struct iatt *postparent)
{
- char *name = msg;
+ fuse_state_t *state = NULL;
+ call_frame_t *prev = NULL;
+ inode_table_t *itable = NULL;
- fuse_state_t *state = NULL;
- int32_t ret = -1;
+ state = frame->root->state;
+ prev = cookie;
+
+ if (op_ret == -1 && state->is_revalidate == 1) {
+ itable = state->itable;
+ /*
+ * A stale mapping might exist for a dentry/inode that has been
+ * removed from another client.
+ */
+ if (op_errno == ENOENT)
+ inode_unlink(state->loc.inode, state->loc.parent,
+ state->loc.name);
+ inode_unref (state->loc.inode);
+ state->loc.inode = inode_new (itable);
+ state->is_revalidate = 2;
+ if (uuid_is_null (state->gfid))
+ uuid_generate (state->gfid);
+ fuse_gfid_set (state);
- GET_STATE (this, finh, state);
+ STACK_WIND (frame, fuse_lookup_cbk,
+ prev->this, prev->this->fops->lookup,
+ &state->loc, state->xdata);
+ return 0;
+ }
- ret = fuse_loc_fill (&state->loc, state, 0, finh->nodeid, name);
+ fuse_entry_cbk (frame, cookie, this, op_ret, op_errno, inode, stat,
+ dict);
+ return 0;
+}
- if (ret < 0) {
- gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRIu64": LOOKUP %"PRIu64"/%s (fuse_loc_fill() failed)",
- finh->unique, finh->nodeid, name);
- free_state (state);
- send_fuse_err (this, finh, ENOENT);
+void
+fuse_lookup_resume (fuse_state_t *state)
+{
+ if (!state->loc.parent && !state->loc.inode) {
+ gf_log ("fuse", GF_LOG_ERROR, "failed to resolve path %s",
+ state->loc.path);
+ send_fuse_err (state->this, state->finh, ENOENT);
+ free_fuse_state (state);
return;
}
- if (!state->loc.inode) {
- gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": LOOKUP %s", finh->unique,
- state->loc.path);
+ /* parent was resolved, entry could not, may be a missing gfid?
+ * Hence try to do a regular lookup
+ */
+ if ((state->resolve.op_ret == -1)
+ && (state->resolve.op_errno == ENODATA)) {
+ state->resolve.op_ret = 0;
+ }
- state->loc.inode = inode_new (state->itable);
- /* to differntiate in entry_cbk what kind of call it is */
- state->is_revalidate = -1;
- } else {
+ if (state->loc.inode) {
gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": LOOKUP %s(%"PRId64")", finh->unique,
- state->loc.path, state->loc.inode->ino);
+ "%"PRIu64": LOOKUP %s(%s)", state->finh->unique,
+ state->loc.path, uuid_utoa (state->loc.inode->gfid));
state->is_revalidate = 1;
+ } else {
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": LOOKUP %s", state->finh->unique,
+ state->loc.path);
+ state->loc.inode = inode_new (state->loc.parent->table);
+ if (uuid_is_null (state->gfid))
+ uuid_generate (state->gfid);
+ fuse_gfid_set (state);
}
- state->dict = dict_new ();
-
FUSE_FOP (state, fuse_lookup_cbk, GF_FOP_LOOKUP,
- lookup, &state->loc, state->dict);
+ lookup, &state->loc, state->xdata);
}
+static void
+fuse_lookup (xlator_t *this, fuse_in_header_t *finh, void *msg)
+{
+ char *name = msg;
+ fuse_state_t *state = NULL;
+
+ GET_STATE (this, finh, state);
+
+ (void) fuse_resolve_entry_init (state, &state->resolve,
+ finh->nodeid, name);
+
+ fuse_resolve_and_resume (state, fuse_lookup_resume);
+
+ return;
+}
+
+static inline void
+do_forget(xlator_t *this, uint64_t unique, uint64_t nodeid, uint64_t nlookup)
+{
+ inode_t *fuse_inode = fuse_ino_to_inode(nodeid, this);
+
+ fuse_log_eh(this, "%"PRIu64": FORGET %"PRIu64"/%"PRIu64" gfid: (%s)",
+ unique, nodeid, nlookup, uuid_utoa(fuse_inode->gfid));
+
+ inode_forget(fuse_inode, nlookup);
+ inode_unref(fuse_inode);
+}
static void
fuse_forget (xlator_t *this, fuse_in_header_t *finh, void *msg)
{
- struct fuse_forget_in *ffi = msg;
-
- inode_t *fuse_inode;
+ struct fuse_forget_in *ffi = msg;
if (finh->nodeid == 1) {
- FREE (finh);
+ GF_FREE (finh);
return;
}
- fuse_inode = inode_search (this->itable, finh->nodeid, NULL);
- if (fuse_inode) {
- gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "got forget on inode (%"PRIu64")", finh->nodeid);
- inode_forget (fuse_inode, ffi->nlookup);
- inode_unref (fuse_inode);
- } else {
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "got forget, but inode (%"PRIu64") not found", finh->nodeid);
- }
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": FORGET %"PRIu64"/%"PRIu64,
+ finh->unique, finh->nodeid, ffi->nlookup);
- FREE (finh);
+ do_forget(this, finh->unique, finh->nodeid, ffi->nlookup);
+
+ GF_FREE (finh);
}
+static void
+fuse_batch_forget(xlator_t *this, fuse_in_header_t *finh, void *msg)
+{
+ struct fuse_batch_forget_in *fbfi = msg;
+ struct fuse_forget_one *ffo = (struct fuse_forget_one *) (fbfi + 1);
+ int i;
+
+ gf_log("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": BATCH_FORGET %"PRIu64"/%"PRIu32,
+ finh->unique, finh->nodeid, fbfi->count);
+
+ for (i = 0; i < fbfi->count; i++) {
+ if (ffo[i].nodeid == 1)
+ continue;
+ do_forget(this, finh->unique, ffo[i].nodeid, ffo[i].nlookup);
+ }
+
+ GF_FREE(finh);
+}
static int
-fuse_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+fuse_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
fuse_state_t *state;
fuse_in_header_t *finh;
@@ -632,26 +650,30 @@ fuse_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
state = frame->root->state;
finh = state->finh;
+ fuse_log_eh_fop(this, state, frame, op_ret, op_errno);
+
if (op_ret == 0) {
gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": %s() %s => %"PRId64, frame->root->unique,
+ "%"PRIu64": %s() %s => %"PRIu64, frame->root->unique,
gf_fop_list[frame->root->op],
state->loc.path ? state->loc.path : "ERR",
- buf->st_ino);
+ prebuf->ia_ino);
- /* TODO: make these timeouts configurable via meta */
- /* TODO: what if the inode number has changed by now */
- buf->st_blksize = this->ctx->page_size;
- stat2attr (buf, &fao.attr);
+ postbuf->ia_blksize = this->ctx->page_size;
+ gf_fuse_stat2attr (postbuf, &fao.attr, priv->enable_ino32);
fao.attr_valid = calc_timeout_sec (priv->attribute_timeout);
fao.attr_valid_nsec =
calc_timeout_nsec (priv->attribute_timeout);
+#if FUSE_KERNEL_MINOR_VERSION >= 9
priv->proto_minor >= 9 ?
send_fuse_obj (this, finh, &fao) :
send_fuse_data (this, finh, &fao,
FUSE_COMPAT_ATTR_OUT_SIZE);
+#else
+ send_fuse_obj (this, finh, &fao);
+#endif
} else {
gf_log ("glusterfs-fuse", GF_LOG_WARNING,
"%"PRIu64": %s() %s => -1 (%s)", frame->root->unique,
@@ -662,341 +684,512 @@ fuse_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
send_fuse_err (this, finh, op_errno);
}
- free_state (state);
+ free_fuse_state (state);
+ STACK_DESTROY (frame->root);
+
+ return 0;
+}
+
+static int
+fuse_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
+{
+ fuse_state_t *state;
+ fuse_in_header_t *finh;
+ fuse_private_t *priv = NULL;
+ struct fuse_attr_out fao;
+
+ priv = this->private;
+ state = frame->root->state;
+ finh = state->finh;
+
+ fuse_log_eh (this, "op_ret: %d, op_errno: %d, %"PRIu64": %s() %s => "
+ "gfid: %s", op_ret, op_errno, frame->root->unique,
+ gf_fop_list[frame->root->op], state->loc.path,
+ state->loc.inode ? uuid_utoa (state->loc.inode->gfid) : "");
+ if (op_ret == 0) {
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": %s() %s => %"PRIu64, frame->root->unique,
+ gf_fop_list[frame->root->op],
+ state->loc.path ? state->loc.path : "ERR",
+ buf->ia_ino);
+
+ buf->ia_blksize = this->ctx->page_size;
+ gf_fuse_stat2attr (buf, &fao.attr, priv->enable_ino32);
+
+ fao.attr_valid = calc_timeout_sec (priv->attribute_timeout);
+ fao.attr_valid_nsec =
+ calc_timeout_nsec (priv->attribute_timeout);
+
+#if FUSE_KERNEL_MINOR_VERSION >= 9
+ priv->proto_minor >= 9 ?
+ send_fuse_obj (this, finh, &fao) :
+ send_fuse_data (this, finh, &fao,
+ FUSE_COMPAT_ATTR_OUT_SIZE);
+#else
+ send_fuse_obj (this, finh, &fao);
+#endif
+ } else {
+ GF_LOG_OCCASIONALLY ( gf_fuse_conn_err_log, "glusterfs-fuse",
+ GF_LOG_WARNING,
+ "%"PRIu64": %s() %s => -1 (%s)",
+ frame->root->unique,
+ gf_fop_list[frame->root->op],
+ state->loc.path ? state->loc.path : "ERR",
+ strerror (op_errno));
+
+ send_fuse_err (this, finh, op_errno);
+ }
+
+ free_fuse_state (state);
STACK_DESTROY (frame->root);
+
+ return 0;
+}
+
+static int
+fuse_root_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *stat, dict_t *dict,
+ struct iatt *postparent)
+{
+ fuse_attr_cbk (frame, cookie, this, op_ret, op_errno, stat, dict);
+
return 0;
}
+void
+fuse_getattr_resume (fuse_state_t *state)
+{
+ if (!state->loc.inode) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "%"PRIu64": GETATTR %"PRIu64" (%s) resolution failed",
+ state->finh->unique, state->finh->nodeid,
+ uuid_utoa (state->resolve.gfid));
+ send_fuse_err (state->this, state->finh, ENOENT);
+ free_fuse_state (state);
+ return;
+ }
+
+ if (!IA_ISDIR (state->loc.inode->ia_type)) {
+ state->fd = fd_lookup (state->loc.inode, state->finh->pid);
+ }
+
+ if (!state->fd) {
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": GETATTR %"PRIu64" (%s)",
+ state->finh->unique, state->finh->nodeid,
+ state->loc.path);
+
+ FUSE_FOP (state, fuse_attr_cbk, GF_FOP_STAT,
+ stat, &state->loc, state->xdata);
+ } else {
+
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": FGETATTR %"PRIu64" (%s/%p)",
+ state->finh->unique, state->finh->nodeid,
+ state->loc.path, state->fd);
+
+ FUSE_FOP (state, fuse_attr_cbk, GF_FOP_FSTAT,
+ fstat, state->fd, state->xdata);
+ }
+}
static void
fuse_getattr (xlator_t *this, fuse_in_header_t *finh, void *msg)
{
fuse_state_t *state;
- fd_t *fd = NULL;
int32_t ret = -1;
GET_STATE (this, finh, state);
if (finh->nodeid == 1) {
+ state->gfid[15] = 1;
+
ret = fuse_loc_fill (&state->loc, state, finh->nodeid, 0, NULL);
if (ret < 0) {
gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRIu64": GETATTR %"PRIu64" (fuse_loc_fill() failed)",
- finh->unique, finh->nodeid);
+ "%"PRIu64": GETATTR on / (fuse_loc_fill() failed)",
+ finh->unique);
send_fuse_err (this, finh, ENOENT);
- free_state (state);
+ free_fuse_state (state);
return;
}
- if (state->loc.inode)
- state->is_revalidate = 1;
- else
- state->is_revalidate = -1;
-
- state->dict = dict_new ();
+ fuse_gfid_set (state);
- FUSE_FOP (state, fuse_lookup_cbk, GF_FOP_LOOKUP,
- lookup, &state->loc, state->dict);
+ FUSE_FOP (state, fuse_root_lookup_cbk, GF_FOP_LOOKUP,
+ lookup, &state->loc, state->xdata);
return;
}
- ret = fuse_loc_fill (&state->loc, state, finh->nodeid, 0, NULL);
+ fuse_resolve_inode_init (state, &state->resolve, state->finh->nodeid);
- if (!state->loc.inode) {
- gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRIu64": GETATTR %"PRIu64" (%s) (fuse_loc_fill() returned NULL inode)",
- finh->unique, finh->nodeid, state->loc.path);
- send_fuse_err (this, finh, ENOENT);
- return;
+ fuse_resolve_and_resume (state, fuse_getattr_resume);
+}
+
+static int32_t
+fuse_fd_inherit_directio (xlator_t *this, fd_t *fd, struct fuse_open_out *foo)
+{
+ int32_t ret = 0;
+ fuse_fd_ctx_t *fdctx = NULL, *tmp_fdctx = NULL;
+ fd_t *tmp_fd = NULL;
+
+ GF_VALIDATE_OR_GOTO_WITH_ERROR ("glusterfs-fuse", this, out, ret,
+ -EINVAL);
+ GF_VALIDATE_OR_GOTO_WITH_ERROR ("glusterfs-fuse", fd, out, ret,
+ -EINVAL);
+ GF_VALIDATE_OR_GOTO_WITH_ERROR ("glusterfs-fuse", foo, out, ret,
+ -EINVAL);
+
+ fdctx = fuse_fd_ctx_get (this, fd);
+ if (!fdctx) {
+ ret = -ENOMEM;
+ goto out;
}
- fd = fd_lookup (state->loc.inode, finh->pid);
- state->fd = fd;
- if (!fd || S_ISDIR (state->loc.inode->st_mode)) {
- /* this is the @ret of fuse_loc_fill, checked here
- to permit fstat() to happen even when fuse_loc_fill fails
- */
- if (ret < 0) {
- gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRIu64": GETATTR %"PRIu64" (fuse_loc_fill() failed)",
- finh->unique, finh->nodeid);
- send_fuse_err (this, finh, ENOENT);
- free_state (state);
- return;
+ tmp_fd = fd_lookup (fd->inode, 0);
+ if (tmp_fd) {
+ tmp_fdctx = fuse_fd_ctx_get (this, tmp_fd);
+ if (tmp_fdctx) {
+ foo->open_flags &= ~FOPEN_DIRECT_IO;
+ foo->open_flags |= (tmp_fdctx->open_flags
+ & FOPEN_DIRECT_IO);
}
+ }
- gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": GETATTR %"PRIu64" (%s)",
- finh->unique, finh->nodeid, state->loc.path);
-
-
- FUSE_FOP (state, fuse_attr_cbk, GF_FOP_STAT,
- stat, &state->loc);
- } else {
+ fdctx->open_flags |= (foo->open_flags & FOPEN_DIRECT_IO);
- gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": FGETATTR %"PRIu64" (%s/%p)",
- finh->unique, finh->nodeid, state->loc.path, fd);
-
- FUSE_FOP (state,fuse_attr_cbk, GF_FOP_FSTAT,
- fstat, fd);
+ if (tmp_fd != NULL) {
+ fd_unref (tmp_fd);
}
-}
+ ret = 0;
+out:
+ return ret;
+}
static int
fuse_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- fuse_state_t *state;
- fuse_in_header_t *finh;
- fuse_private_t *priv = NULL;
- struct fuse_open_out foo = {0, };
+ fuse_state_t *state = NULL;
+ fuse_in_header_t *finh = NULL;
+ fuse_private_t *priv = NULL;
+ int32_t ret = 0;
+ struct fuse_open_out foo = {0, };
priv = this->private;
state = frame->root->state;
finh = state->finh;
+ fuse_log_eh_fop(this, state, frame, op_ret, op_errno);
+
if (op_ret >= 0) {
foo.fh = (uintptr_t) fd;
foo.open_flags = 0;
- if (!S_ISDIR (fd->inode->st_mode)) {
- if (((state->flags & O_ACCMODE) != O_RDONLY) &&
- priv->direct_io_mode)
+ if (!IA_ISDIR (fd->inode->ia_type)) {
+ if (((priv->direct_io_mode == 2)
+ && ((state->flags & O_ACCMODE) != O_RDONLY))
+ || (priv->direct_io_mode == 1))
foo.open_flags |= FOPEN_DIRECT_IO;
+#ifdef GF_DARWIN_HOST_OS
+ /* In Linux: by default, buffer cache
+ * is purged upon open, setting
+ * FOPEN_KEEP_CACHE implies no-purge
+ *
+ * In MacFUSE: by default, buffer cache
+ * is left intact upon open, setting
+ * FOPEN_PURGE_UBC implies purge
+ *
+ * [[Interesting...]]
+ */
+ if (!priv->fopen_keep_cache)
+ foo.open_flags |= FOPEN_PURGE_UBC;
+#else
+ /*
+ * If fopen-keep-cache is enabled, we set the associated
+ * flag here such that files are not invalidated on open.
+ * File invalidations occur either in fuse or explicitly
+ * when the cache is set invalid on the inode.
+ */
+ if (priv->fopen_keep_cache)
+ foo.open_flags |= FOPEN_KEEP_CACHE;
+#endif
}
gf_log ("glusterfs-fuse", GF_LOG_TRACE,
"%"PRIu64": %s() %s => %p", frame->root->unique,
gf_fop_list[frame->root->op], state->loc.path, fd);
- fd_ref (fd);
+ ret = fuse_fd_inherit_directio (this, fd, &foo);
+ if (ret < 0) {
+ op_errno = -ret;
+ gf_log ("glusterfs-fuse", GF_LOG_WARNING,
+ "cannot inherit direct-io values for fd "
+ "(ptr:%p inode-gfid:%s) from fds already "
+ "opened", fd, uuid_utoa (fd->inode->gfid));
+ goto err;
+ }
+
if (send_fuse_obj (this, finh, &foo) == ENOENT) {
gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
"open(%s) got EINTR", state->loc.path);
- fd_unref (fd);
- goto out;
+ gf_fd_put (priv->fdtable, state->fd_no);
+ goto out;
}
fd_bind (fd);
} else {
+ err:
gf_log ("glusterfs-fuse", GF_LOG_WARNING,
"%"PRIu64": %s() %s => -1 (%s)", frame->root->unique,
gf_fop_list[frame->root->op], state->loc.path,
strerror (op_errno));
send_fuse_err (this, finh, op_errno);
+ gf_fd_put (priv->fdtable, state->fd_no);
}
out:
- free_state (state);
+ free_fuse_state (state);
STACK_DESTROY (frame->root);
return 0;
}
-
static void
-do_chmod (xlator_t *this, fuse_in_header_t *finh, struct fuse_setattr_in *fsi)
+fuse_do_truncate (fuse_state_t *state, size_t size)
{
- fuse_state_t *state = NULL;
- fd_t *fd = NULL;
- int32_t ret = -1;
-
- GET_STATE (this, finh, state);
- if (fsi->valid & FATTR_FH) {
- fd = FH_TO_FD (fsi->fh);
- state->fd = fd;
+ if (state->fd) {
+ FUSE_FOP (state, fuse_truncate_cbk, GF_FOP_FTRUNCATE,
+ ftruncate, state->fd, size, state->xdata);
+ } else {
+ FUSE_FOP (state, fuse_truncate_cbk, GF_FOP_TRUNCATE,
+ truncate, &state->loc, size, state->xdata);
}
- if (fd) {
- gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": FCHMOD %p", finh->unique, fd);
+ return;
+}
- FUSE_FOP (state, fuse_attr_cbk, GF_FOP_FCHMOD,
- fchmod, fd, fsi->mode);
- } else {
- ret = fuse_loc_fill (&state->loc, state, finh->nodeid, 0, NULL);
+static int
+fuse_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *statpre, struct iatt *statpost, dict_t *xdata)
+{
+ fuse_state_t *state;
+ fuse_in_header_t *finh;
+ fuse_private_t *priv = NULL;
+ struct fuse_attr_out fao;
- if ((state->loc.inode == NULL) ||
- (ret < 0)) {
- gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRIu64": CHMOD %"PRIu64" (%s) (fuse_loc_fill() failed)",
- finh->unique, finh->nodeid,
- state->loc.path);
- send_fuse_err (this, finh, ENOENT);
- free_state (state);
- return;
- }
+ int op_done = 0;
+ priv = this->private;
+ state = frame->root->state;
+ finh = state->finh;
- gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": CHMOD %s", finh->unique,
- state->loc.path);
+ fuse_log_eh(this, "op_ret: %d, op_errno: %d, %"PRIu64", %s() %s => "
+ "gfid: %s", op_ret, op_errno, frame->root->unique,
+ gf_fop_list[frame->root->op], state->loc.path,
+ state->loc.inode ? uuid_utoa (state->loc.inode->gfid) : "");
- FUSE_FOP (state, fuse_attr_cbk, GF_FOP_CHMOD,
- chmod, &state->loc, fsi->mode);
- }
-}
+ if (op_ret == 0) {
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": %s() %s => %"PRIu64, frame->root->unique,
+ gf_fop_list[frame->root->op],
+ state->loc.path ? state->loc.path : "ERR",
+ statpost->ia_ino);
+ statpost->ia_blksize = this->ctx->page_size;
+ gf_fuse_stat2attr (statpost, &fao.attr, priv->enable_ino32);
-static void
-do_chown (xlator_t *this, fuse_in_header_t *finh, struct fuse_setattr_in *fsi)
-{
- fuse_state_t *state = NULL;
- fd_t *fd = NULL;
- int32_t ret = -1;
- uid_t uid = 0;
- gid_t gid = 0;
+ fao.attr_valid = calc_timeout_sec (priv->attribute_timeout);
+ fao.attr_valid_nsec =
+ calc_timeout_nsec (priv->attribute_timeout);
- uid = (fsi->valid & FATTR_UID) ? fsi->uid : (uid_t) -1;
- gid = (fsi->valid & FATTR_GID) ? fsi->gid : (gid_t) -1;
- GET_STATE (this, finh, state);
+ if (state->truncate_needed) {
+ fuse_do_truncate (state, state->size);
+ } else {
+#if FUSE_KERNEL_MINOR_VERSION >= 9
+ priv->proto_minor >= 9 ?
+ send_fuse_obj (this, finh, &fao) :
+ send_fuse_data (this, finh, &fao,
+ FUSE_COMPAT_ATTR_OUT_SIZE);
+#else
+ send_fuse_obj (this, finh, &fao);
+#endif
+ op_done = 1;
+ }
+ } else {
+ gf_log ("glusterfs-fuse", GF_LOG_WARNING,
+ "%"PRIu64": %s() %s => -1 (%s)", frame->root->unique,
+ gf_fop_list[frame->root->op],
+ state->loc.path ? state->loc.path : "ERR",
+ strerror (op_errno));
- if (fsi->valid & FATTR_FH) {
- fd = FH_TO_FD (fsi->fh);
- state->fd = fd;
+ send_fuse_err (this, finh, op_errno);
+ op_done = 1;
}
- if (fd) {
- gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": FCHOWN %p", finh->unique, fd);
-
- FUSE_FOP (state, fuse_attr_cbk, GF_FOP_FCHOWN,
- fchown, fd, uid, gid);
- } else {
- ret = fuse_loc_fill (&state->loc, state, finh->nodeid, 0, NULL);
- if ((state->loc.inode == NULL) ||
- (ret < 0)) {
- gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRIu64": CHOWN %"PRIu64" (%s) (fuse_loc_fill() failed)",
- finh->unique, finh->nodeid,
- state->loc.path);
- send_fuse_err (this, finh, ENOENT);
- free_state (state);
- return;
- }
+ if (op_done) {
+ free_fuse_state (state);
+ }
- gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": CHOWN %s", finh->unique,
- state->loc.path);
+ STACK_DESTROY (frame->root);
- FUSE_FOP (state, fuse_attr_cbk, GF_FOP_CHOWN,
- chown, &state->loc, uid, gid);
- }
+ return 0;
}
-
-static void
-do_truncate (xlator_t *this, fuse_in_header_t *finh, struct fuse_setattr_in *fsi)
+static int32_t
+fattr_to_gf_set_attr (int32_t valid)
{
- fuse_state_t *state = NULL;
- fd_t *fd = NULL;
- int32_t ret = -1;
+ int32_t gf_valid = 0;
- GET_STATE (this, finh, state);
+ if (valid & FATTR_MODE)
+ gf_valid |= GF_SET_ATTR_MODE;
- if (fsi->valid & FATTR_FH) {
- fd = FH_TO_FD (fsi->fh);
- state->fd = fd;
- }
+ if (valid & FATTR_UID)
+ gf_valid |= GF_SET_ATTR_UID;
- if (fd) {
- gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": FTRUNCATE %p/%"PRId64, finh->unique,
- fd, fsi->size);
+ if (valid & FATTR_GID)
+ gf_valid |= GF_SET_ATTR_GID;
- FUSE_FOP (state, fuse_attr_cbk, GF_FOP_FTRUNCATE,
- ftruncate, fd, fsi->size);
- } else {
- ret = fuse_loc_fill (&state->loc, state, finh->nodeid, 0, NULL);
- if ((state->loc.inode == NULL) ||
- (ret < 0)) {
- gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRIu64": TRUNCATE %s/%"PRId64" (fuse_loc_fill() failed)",
- finh->unique, state->loc.path,
- fsi->size);
- send_fuse_err (this, finh, ENOENT);
- free_state (state);
- return;
- }
+ if (valid & FATTR_ATIME)
+ gf_valid |= GF_SET_ATTR_ATIME;
- gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": TRUNCATE %s/%"PRId64"(%"PRIu64")",
- finh->unique,
- state->loc.path, fsi->size, finh->nodeid);
+ if (valid & FATTR_MTIME)
+ gf_valid |= GF_SET_ATTR_MTIME;
- FUSE_FOP (state, fuse_attr_cbk, GF_FOP_TRUNCATE,
- truncate, &state->loc, fsi->size);
- }
+ if (valid & FATTR_SIZE)
+ gf_valid |= GF_SET_ATTR_SIZE;
- return;
+ return gf_valid;
}
+#define FATTR_MASK (FATTR_SIZE \
+ | FATTR_UID | FATTR_GID \
+ | FATTR_ATIME | FATTR_MTIME \
+ | FATTR_MODE)
-static void
-do_utimes (xlator_t *this, fuse_in_header_t *finh, struct fuse_setattr_in *fsi)
+void
+fuse_setattr_resume (fuse_state_t *state)
{
- fuse_state_t *state = NULL;
- struct timespec tv[2];
- int32_t ret = -1;
-
- tv[0].tv_sec = fsi->atime;
- tv[0].tv_nsec = fsi->atimensec;
- tv[1].tv_sec = fsi->mtime;
- tv[1].tv_nsec = fsi->mtimensec;
-
- GET_STATE (this, finh, state);
- ret = fuse_loc_fill (&state->loc, state, finh->nodeid, 0, NULL);
- if ((state->loc.inode == NULL) ||
- (ret < 0)) {
- gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRIu64": UTIMENS %s (fuse_loc_fill() failed)",
- finh->unique, state->loc.path);
- send_fuse_err (this, finh, ENOENT);
- free_state (state);
+ if (!state->fd && !state->loc.inode) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "%"PRIu64": SETATTR %"PRIu64" (%s) resolution failed",
+ state->finh->unique, state->finh->nodeid,
+ uuid_utoa (state->resolve.gfid));
+ send_fuse_err (state->this, state->finh, ENOENT);
+ free_fuse_state (state);
return;
}
gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": UTIMENS (%"PRIu64")%s", finh->unique,
- finh->nodeid, state->loc.path);
+ "%"PRIu64": SETATTR (%"PRIu64")%s", state->finh->unique,
+ state->finh->nodeid, state->loc.path);
+
+#ifdef GF_TEST_FFOP
+ /* this is for calls like 'fchmod()' */
+ if (!state->fd)
+ state->fd = fd_lookup (state->loc.inode, state->finh->pid);
+#endif /* GF_TEST_FFOP */
+
+ if ((state->valid & (FATTR_MASK)) != FATTR_SIZE) {
+ if (state->fd &&
+ !((state->valid & FATTR_ATIME) ||
+ (state->valid & FATTR_MTIME))) {
+ /*
+ there is no "futimes" call, so don't send
+ fsetattr if ATIME or MTIME is set
+ */
+
+ FUSE_FOP (state, fuse_setattr_cbk, GF_FOP_FSETATTR,
+ fsetattr, state->fd, &state->attr,
+ fattr_to_gf_set_attr (state->valid),
+ state->xdata);
+ } else {
+ FUSE_FOP (state, fuse_setattr_cbk, GF_FOP_SETATTR,
+ setattr, &state->loc, &state->attr,
+ fattr_to_gf_set_attr (state->valid),
+ state->xdata);
+ }
+ } else {
+ fuse_do_truncate (state, state->size);
+ }
- FUSE_FOP (state, fuse_attr_cbk, GF_FOP_UTIMENS,
- utimens, &state->loc, tv);
}
-
static void
fuse_setattr (xlator_t *this, fuse_in_header_t *finh, void *msg)
{
struct fuse_setattr_in *fsi = msg;
- if (fsi->valid & FATTR_MODE)
- do_chmod (this, finh, fsi);
- else if (fsi->valid & (FATTR_UID | FATTR_GID))
- do_chown (this, finh, fsi);
- else if (fsi->valid & FATTR_SIZE)
- do_truncate (this, finh, fsi);
- else if (fsi->valid & (FATTR_ATIME | FATTR_MTIME))
- do_utimes (this, finh, fsi);
- else
- /* As of now, getattr uses only the header.
- * If it happens to change, we'll have to
- * do some refactoring...
- */
- fuse_getattr (this, finh, NULL);
-}
+ fuse_private_t *priv = NULL;
+ fuse_state_t *state = NULL;
+ GET_STATE (this, finh, state);
-static int gf_fuse_xattr_enotsup_log;
+ if (fsi->valid & FATTR_FH &&
+ !(fsi->valid & (FATTR_ATIME|FATTR_MTIME))) {
+ /* We need no loc if kernel sent us an fd and
+ * we are not fiddling with times */
+ state->fd = FH_TO_FD (fsi->fh);
+ fuse_resolve_fd_init (state, &state->resolve, state->fd);
+ } else {
+ fuse_resolve_inode_init (state, &state->resolve, finh->nodeid);
+ }
+
+ /*
+ * This is just stub code demonstrating how to retrieve
+ * lock_owner in setattr, according to the FUSE proto.
+ * We do not make use of ATM. Its purpose is supporting
+ * mandatory locking, but getting that right is further
+ * down the road. Cf.
+ *
+ * http://thread.gmane.org/gmane.comp.file-systems.fuse.devel/
+ * 4962/focus=4982
+ *
+ * http://git.kernel.org/?p=linux/kernel/git/torvalds/
+ * linux-2.6.git;a=commit;h=v2.6.23-5896-gf333211
+ */
+ priv = this->private;
+#if FUSE_KERNEL_MINOR_VERSION >= 9
+ if (priv->proto_minor >= 9 && fsi->valid & FATTR_LOCKOWNER)
+ state->lk_owner = fsi->lock_owner;
+#endif
+
+ state->valid = fsi->valid;
+
+ if ((fsi->valid & (FATTR_MASK)) != FATTR_SIZE) {
+ if (fsi->valid & FATTR_SIZE) {
+ state->size = fsi->size;
+ state->truncate_needed = _gf_true;
+ }
+
+ state->attr.ia_size = fsi->size;
+ state->attr.ia_atime = fsi->atime;
+ state->attr.ia_mtime = fsi->mtime;
+ state->attr.ia_atime_nsec = fsi->atimensec;
+ state->attr.ia_mtime_nsec = fsi->mtimensec;
+
+ state->attr.ia_prot = ia_prot_from_st_mode (fsi->mode);
+ state->attr.ia_uid = fsi->uid;
+ state->attr.ia_gid = fsi->gid;
+ } else {
+ state->size = fsi->size;
+ }
+
+ fuse_resolve_and_resume (state, fuse_setattr_resume);
+}
static int
fuse_err_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
fuse_state_t *state = frame->root->state;
fuse_in_header_t *finh = state->finh;
+ fuse_log_eh_fop(this, state, frame, op_ret, op_errno);
+
if (op_ret == 0) {
gf_log ("glusterfs-fuse", GF_LOG_TRACE,
"%"PRIu64": %s() %s => 0", frame->root->unique,
@@ -1005,46 +1198,47 @@ fuse_err_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
send_fuse_err (this, finh, 0);
} else {
- if (frame->root->op == GF_FOP_SETXATTR) {
- op_ret = gf_compat_setxattr (state->dict);
- if (op_ret == 0)
- op_errno = 0;
- if (op_errno == ENOTSUP) {
- gf_fuse_xattr_enotsup_log++;
- if (!(gf_fuse_xattr_enotsup_log % GF_UNIVERSAL_ANSWER))
- gf_log ("glusterfs-fuse",
- GF_LOG_CRITICAL,
- "extended attribute not "
- "supported by the backend "
- "storage");
- }
- } else {
- if ((frame->root->op == GF_FOP_REMOVEXATTR)
- && (op_errno == ENOATTR)) {
- goto nolog;
- }
- gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRIu64": %s() %s => -1 (%s)",
- frame->root->unique,
- gf_fop_list[frame->root->op],
- state->loc.path ? state->loc.path : "ERR",
- strerror (op_errno));
- }
- nolog:
+ gf_log ("glusterfs-fuse", GF_LOG_WARNING,
+ "%"PRIu64": %s() %s => -1 (%s)",
+ frame->root->unique,
+ gf_fop_list[frame->root->op],
+ state->loc.path ? state->loc.path : "ERR",
+ strerror (op_errno));
send_fuse_err (this, finh, op_errno);
}
- free_state (state);
+ free_fuse_state (state);
STACK_DESTROY (frame->root);
return 0;
}
+static int
+fuse_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ return fuse_err_cbk (frame, cookie, this, op_ret, op_errno, xdata);
+}
+
+static int
+fuse_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ if (op_ret == -1 && op_errno == ENOTSUP)
+ GF_LOG_OCCASIONALLY (gf_fuse_xattr_enotsup_log,
+ "glusterfs-fuse", GF_LOG_CRITICAL,
+ "extended attribute not supported "
+ "by the backend storage");
+
+ return fuse_err_cbk (frame, cookie, this, op_ret, op_errno, xdata);
+}
static int
fuse_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
fuse_state_t *state = NULL;
fuse_in_header_t *finh = NULL;
@@ -1052,11 +1246,14 @@ fuse_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
state = frame->root->state;
finh = state->finh;
- if (op_ret == 0)
- inode_unlink (state->loc.inode, state->loc.parent,
- state->loc.name);
+ fuse_log_eh (this, "op_ret: %d, op_errno: %d, %"PRIu64": %s() %s => "
+ "gfid: %s", op_ret, op_errno, frame->root->unique,
+ gf_fop_list[frame->root->op], state->loc.path,
+ state->loc.inode ? uuid_utoa (state->loc.inode->gfid) : "");
if (op_ret == 0) {
+ inode_unlink (state->loc.inode, state->loc.parent,
+ state->loc.name);
gf_log ("glusterfs-fuse", GF_LOG_TRACE,
"%"PRIu64": %s() %s => 0", frame->root->unique,
gf_fop_list[frame->root->op], state->loc.path);
@@ -1072,49 +1269,55 @@ fuse_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
send_fuse_err (this, finh, op_errno);
}
- free_state (state);
+ free_fuse_state (state);
STACK_DESTROY (frame->root);
return 0;
}
+void
+fuse_access_resume (fuse_state_t *state)
+{
+ if (!state->loc.inode) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "%"PRIu64": ACCESS %"PRIu64" (%s) resolution failed",
+ state->finh->unique, state->finh->nodeid,
+ uuid_utoa (state->resolve.gfid));
+ send_fuse_err (state->this, state->finh, ENOENT);
+ free_fuse_state (state);
+ return;
+ }
+
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64" ACCESS %s/%"PRIu64" mask=%d",
+ state->finh->unique, state->loc.path,
+ state->finh->nodeid, state->mask);
+
+ FUSE_FOP (state, fuse_err_cbk, GF_FOP_ACCESS, access,
+ &state->loc, state->mask, state->xdata);
+}
static void
fuse_access (xlator_t *this, fuse_in_header_t *finh, void *msg)
{
struct fuse_access_in *fai = msg;
-
fuse_state_t *state = NULL;
- int32_t ret = -1;
GET_STATE (this, finh, state);
- ret = fuse_loc_fill (&state->loc, state, finh->nodeid, 0, NULL);
- if ((state->loc.inode == NULL) ||
- (ret < 0)) {
- gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRIu64": ACCESS %"PRIu64" (%s) (fuse_loc_fill() failed)",
- finh->unique, finh->nodeid, state->loc.path);
- send_fuse_err (this, finh, ENOENT);
- free_state (state);
- return;
- }
+ fuse_resolve_inode_init (state, &state->resolve, finh->nodeid);
- gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64" ACCESS %s/%"PRIu64" mask=%d", finh->unique,
- state->loc.path, finh->nodeid, fai->mask);
+ state->mask = fai->mask;
- FUSE_FOP (state, fuse_err_cbk,
- GF_FOP_ACCESS, access,
- &state->loc, fai->mask);
+ fuse_resolve_and_resume (state, fuse_access_resume);
return;
}
-
static int
fuse_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, const char *linkname)
+ int32_t op_ret, int32_t op_errno, const char *linkname,
+ struct iatt *buf, dict_t *xdata)
{
fuse_state_t *state = NULL;
fuse_in_header_t *finh = NULL;
@@ -1122,6 +1325,12 @@ fuse_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
state = frame->root->state;
finh = state->finh;
+ fuse_log_eh (this, "op_ret: %d, op_errno: %d %"PRIu64": %s() => %s"
+ " linkname: %s, gfid: %s", op_ret, op_errno,
+ frame->root->unique, gf_fop_list[frame->root->op],
+ state->loc.gfid, linkname,
+ uuid_utoa (state->loc.gfid));
+
if (op_ret > 0) {
((char *)linkname)[op_ret] = '\0';
@@ -1138,42 +1347,80 @@ fuse_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
send_fuse_err (this, finh, op_errno);
}
- free_state (state);
+ free_fuse_state (state);
STACK_DESTROY (frame->root);
return 0;
}
+void
+fuse_readlink_resume (fuse_state_t *state)
+{
+ if (!state->loc.inode) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "READLINK %"PRIu64" (%s) resolution failed",
+ state->finh->unique, uuid_utoa (state->resolve.gfid));
+ send_fuse_err (state->this, state->finh, ENOENT);
+ free_fuse_state (state);
+ return;
+ }
+
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64" READLINK %s/%s", state->finh->unique,
+ state->loc.path, uuid_utoa (state->loc.inode->gfid));
+
+ FUSE_FOP (state, fuse_readlink_cbk, GF_FOP_READLINK,
+ readlink, &state->loc, 4096, state->xdata);
+}
static void
fuse_readlink (xlator_t *this, fuse_in_header_t *finh, void *msg)
{
fuse_state_t *state = NULL;
- int32_t ret = -1;
GET_STATE (this, finh, state);
- ret = fuse_loc_fill (&state->loc, state, finh->nodeid, 0, NULL);
- if ((state->loc.inode == NULL) ||
- (ret < 0)) {
- gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRIu64" READLINK %s/%"PRId64" (fuse_loc_fill() returned NULL inode)",
- finh->unique, state->loc.path,
- state->loc.inode->ino);
- send_fuse_err (this, finh, ENOENT);
- free_state (state);
- return;
- }
- gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64" READLINK %s/%"PRId64, finh->unique,
- state->loc.path, state->loc.inode->ino);
+ fuse_resolve_inode_init (state, &state->resolve, finh->nodeid);
- FUSE_FOP (state, fuse_readlink_cbk, GF_FOP_READLINK,
- readlink, &state->loc, 4096);
+ fuse_resolve_and_resume (state, fuse_readlink_resume);
return;
}
+void
+fuse_mknod_resume (fuse_state_t *state)
+{
+ if (!state->loc.parent) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "MKNOD %"PRIu64"/%s (%s/%s) resolution failed",
+ state->finh->nodeid, state->resolve.bname,
+ uuid_utoa (state->resolve.gfid), state->resolve.bname);
+ send_fuse_err (state->this, state->finh, ENOENT);
+ free_fuse_state (state);
+ return;
+ }
+
+ if (state->resolve.op_errno == ENOENT) {
+ state->resolve.op_ret = 0;
+ state->resolve.op_errno = 0;
+ }
+
+ if (state->loc.inode) {
+ gf_log (state->this->name, GF_LOG_DEBUG, "inode already present");
+ inode_unref (state->loc.inode);
+ state->loc.inode = NULL;
+ }
+
+ state->loc.inode = inode_new (state->loc.parent->table);
+
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": MKNOD %s", state->finh->unique,
+ state->loc.path);
+
+ FUSE_FOP (state, fuse_newentry_cbk, GF_FOP_MKNOD,
+ mknod, &state->loc, state->mode, state->rdev, state->umask,
+ state->xdata);
+}
static void
fuse_mknod (xlator_t *this, fuse_in_header_t *finh, void *msg)
@@ -1186,166 +1433,224 @@ fuse_mknod (xlator_t *this, fuse_in_header_t *finh, void *msg)
int32_t ret = -1;
priv = this->private;
+#if FUSE_KERNEL_MINOR_VERSION >= 12
if (priv->proto_minor < 12)
name = (char *)msg + FUSE_COMPAT_MKNOD_IN_SIZE;
+#endif
GET_STATE (this, finh, state);
- ret = fuse_loc_fill (&state->loc, state, 0, finh->nodeid, name);
- if (ret < 0) {
- gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRIu64" MKNOD %s (fuse_loc_fill() failed)",
- finh->unique, state->loc.path);
- send_fuse_err (this, finh, ENOENT);
- free_state (state);
+
+ uuid_generate (state->gfid);
+
+ fuse_resolve_entry_init (state, &state->resolve, finh->nodeid, name);
+
+ state->mode = fmi->mode;
+ state->rdev = fmi->rdev;
+
+ priv = this->private;
+#if FUSE_KERNEL_MINOR_VERSION >=12
+ FUSE_ENTRY_CREATE(this, priv, finh, state, fmi, "MKNOD");
+#endif
+
+ fuse_resolve_and_resume (state, fuse_mknod_resume);
+
+ return;
+}
+
+void
+fuse_mkdir_resume (fuse_state_t *state)
+{
+ if (!state->loc.parent) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "MKDIR %"PRIu64" (%s/%s) resolution failed",
+ state->finh->nodeid, uuid_utoa (state->resolve.gfid),
+ state->resolve.bname);
+ send_fuse_err (state->this, state->finh, ENOENT);
+ free_fuse_state (state);
return;
}
- state->loc.inode = inode_new (state->itable);
+ if (state->resolve.op_errno == ENOENT) {
+ state->resolve.op_ret = 0;
+ state->resolve.op_errno = 0;
+ }
+
+ if (state->loc.inode) {
+ gf_log (state->this->name, GF_LOG_DEBUG, "inode already present");
+ inode_unref (state->loc.inode);
+ state->loc.inode = NULL;
+ }
+
+ state->loc.inode = inode_new (state->loc.parent->table);
gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": MKNOD %s", finh->unique,
+ "%"PRIu64": MKDIR %s", state->finh->unique,
state->loc.path);
- FUSE_FOP (state, fuse_entry_cbk, GF_FOP_MKNOD,
- mknod, &state->loc, fmi->mode, fmi->rdev);
-
- return;
+ FUSE_FOP (state, fuse_newentry_cbk, GF_FOP_MKDIR,
+ mkdir, &state->loc, state->mode, state->umask, state->xdata);
}
-
static void
fuse_mkdir (xlator_t *this, fuse_in_header_t *finh, void *msg)
{
struct fuse_mkdir_in *fmi = msg;
char *name = (char *)(fmi + 1);
+ fuse_private_t *priv = NULL;
fuse_state_t *state;
int32_t ret = -1;
GET_STATE (this, finh, state);
- ret = fuse_loc_fill (&state->loc, state, 0, finh->nodeid, name);
- if (ret < 0) {
- gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRIu64" MKDIR %s (fuse_loc_fill() failed)",
- finh->unique, state->loc.path);
- send_fuse_err (this, finh, ENOENT);
- free_state (state);
- return;
- }
- state->loc.inode = inode_new (state->itable);
+ uuid_generate (state->gfid);
- gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": MKDIR %s", finh->unique,
- state->loc.path);
+ fuse_resolve_entry_init (state, &state->resolve, finh->nodeid, name);
+
+ state->mode = fmi->mode;
- FUSE_FOP (state, fuse_entry_cbk, GF_FOP_MKDIR,
- mkdir, &state->loc, fmi->mode);
+ priv = this->private;
+#if FUSE_KERNEL_MINOR_VERSION >=12
+ FUSE_ENTRY_CREATE(this, priv, finh, state, fmi, "MKDIR");
+#endif
+
+ fuse_resolve_and_resume (state, fuse_mkdir_resume);
return;
}
+void
+fuse_unlink_resume (fuse_state_t *state)
+{
+ if (!state->loc.parent || !state->loc.inode) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "UNLINK %"PRIu64" (%s/%s) resolution failed",
+ state->finh->nodeid, uuid_utoa (state->resolve.gfid),
+ state->resolve.bname);
+ send_fuse_err (state->this, state->finh, ENOENT);
+ free_fuse_state (state);
+ return;
+ }
+
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": UNLINK %s", state->finh->unique,
+ state->loc.path);
+
+ FUSE_FOP (state, fuse_unlink_cbk, GF_FOP_UNLINK,
+ unlink, &state->loc, 0, state->xdata);
+}
static void
fuse_unlink (xlator_t *this, fuse_in_header_t *finh, void *msg)
{
char *name = msg;
-
fuse_state_t *state = NULL;
- int32_t ret = -1;
GET_STATE (this, finh, state);
- ret = fuse_loc_fill (&state->loc, state, 0, finh->nodeid, name);
+ fuse_resolve_entry_init (state, &state->resolve, finh->nodeid, name);
- if ((state->loc.inode == NULL) ||
- (ret < 0)) {
- gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRIu64": UNLINK %s (fuse_loc_fill() returned NULL inode)",
- finh->unique, state->loc.path);
- send_fuse_err (this, finh, ENOENT);
- free_state (state);
+ fuse_resolve_and_resume (state, fuse_unlink_resume);
+
+ return;
+}
+
+void
+fuse_rmdir_resume (fuse_state_t *state)
+{
+ if (!state->loc.parent || !state->loc.inode) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "RMDIR %"PRIu64" (%s/%s) resolution failed",
+ state->finh->nodeid, uuid_utoa (state->resolve.gfid),
+ state->resolve.bname);
+ send_fuse_err (state->this, state->finh, ENOENT);
+ free_fuse_state (state);
return;
}
gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": UNLINK %s", finh->unique,
+ "%"PRIu64": RMDIR %s", state->finh->unique,
state->loc.path);
- FUSE_FOP (state, fuse_unlink_cbk, GF_FOP_UNLINK,
- unlink, &state->loc);
-
- return;
+ FUSE_FOP (state, fuse_unlink_cbk, GF_FOP_RMDIR,
+ rmdir, &state->loc, 0, state->xdata);
}
-
static void
fuse_rmdir (xlator_t *this, fuse_in_header_t *finh, void *msg)
{
char *name = msg;
-
fuse_state_t *state = NULL;
- int32_t ret = -1;
GET_STATE (this, finh, state);
- ret = fuse_loc_fill (&state->loc, state, 0, finh->nodeid, name);
- if ((state->loc.inode == NULL) ||
- (ret < 0)) {
- gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRIu64": RMDIR %s (fuse_loc_fill() failed)",
- finh->unique, state->loc.path);
- send_fuse_err (this, finh, ENOENT);
- free_state (state);
- return;
- }
- gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": RMDIR %s", finh->unique,
- state->loc.path);
+ fuse_resolve_entry_init (state, &state->resolve, finh->nodeid, name);
- FUSE_FOP (state, fuse_unlink_cbk, GF_FOP_RMDIR,
- rmdir, &state->loc);
+ fuse_resolve_and_resume (state, fuse_rmdir_resume);
return;
}
+void
+fuse_symlink_resume (fuse_state_t *state)
+{
+ if (!state->loc.parent) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "SYMLINK %"PRIu64" (%s/%s) -> %s resolution failed",
+ state->finh->nodeid, uuid_utoa (state->resolve.gfid),
+ state->resolve.bname, state->name);
+ send_fuse_err (state->this, state->finh, ENOENT);
+ free_fuse_state (state);
+ return;
+ }
+
+ if (state->resolve.op_errno == ENOENT) {
+ state->resolve.op_ret = 0;
+ state->resolve.op_errno = 0;
+ }
+
+ if (state->loc.inode) {
+ gf_log (state->this->name, GF_LOG_DEBUG, "inode already present");
+ inode_unref (state->loc.inode);
+ state->loc.inode = NULL;
+ }
+
+ state->loc.inode = inode_new (state->loc.parent->table);
+
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": SYMLINK %s -> %s", state->finh->unique,
+ state->loc.path, state->name);
+
+ FUSE_FOP (state, fuse_newentry_cbk, GF_FOP_SYMLINK,
+ symlink, state->name, &state->loc, state->umask, state->xdata);
+}
static void
fuse_symlink (xlator_t *this, fuse_in_header_t *finh, void *msg)
{
- char *name = msg;
- char *linkname = name + strlen (name) + 1;
-
+ char *name = msg;
+ char *linkname = name + strlen (name) + 1;
fuse_state_t *state = NULL;
- int32_t ret = -1;
GET_STATE (this, finh, state);
- ret = fuse_loc_fill (&state->loc, state, 0, finh->nodeid, name);
- if (ret < 0) {
- gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRIu64" SYMLINK %s -> %s (fuse_loc_fill() failed)",
- finh->unique, state->loc.path, linkname);
- send_fuse_err (this, finh, ENOENT);
- free_state (state);
- return;
- }
- state->loc.inode = inode_new (state->itable);
+ uuid_generate (state->gfid);
- gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": SYMLINK %s -> %s", finh->unique,
- state->loc.path, linkname);
+ fuse_resolve_entry_init (state, &state->resolve, finh->nodeid, name);
- FUSE_FOP (state, fuse_entry_cbk, GF_FOP_SYMLINK,
- symlink, linkname, &state->loc);
+ state->name = gf_strdup (linkname);
+
+ fuse_resolve_and_resume (state, fuse_symlink_resume);
return;
}
-
int
fuse_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
fuse_state_t *state = NULL;
fuse_in_header_t *finh = NULL;
@@ -1353,22 +1658,30 @@ fuse_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
state = frame->root->state;
finh = state->finh;
+ fuse_log_eh (this, "op_ret: %d, op_errno: %d, %"PRIu64": %s() "
+ "path: %s parent: %s ==> path: %s parent: %s"
+ "gfid: %s", op_ret, op_errno, frame->root->unique,
+ gf_fop_list[frame->root->op], state->loc.path,
+ state->loc.parent?uuid_utoa (state->loc.parent->gfid):"",
+ state->loc2.path,
+ state->loc2.parent?uuid_utoa (state->loc2.parent->gfid):"",
+ state->loc.inode?uuid_utoa (state->loc.inode->gfid):"");
+
if (op_ret == 0) {
gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": %s -> %s => 0 (buf->st_ino=%"PRId64" , loc->ino=%"PRId64")",
+ "%"PRIu64": %s -> %s => 0 (buf->ia_ino=%"PRIu64")",
frame->root->unique, state->loc.path, state->loc2.path,
- buf->st_ino, state->loc.ino);
+ buf->ia_ino);
{
/* ugly ugly - to stay blind to situation where
rename happens on a new inode
*/
- buf->st_ino = state->loc.ino;
- buf->st_mode = state->loc.inode->st_mode;
+ buf->ia_type = state->loc.inode->ia_type;
}
- buf->st_blksize = this->ctx->page_size;
+ buf->ia_blksize = this->ctx->page_size;
- inode_rename (state->itable,
+ inode_rename (state->loc.parent->table,
state->loc.parent, state->loc.name,
state->loc2.parent, state->loc2.name,
state->loc.inode, buf);
@@ -1382,11 +1695,56 @@ fuse_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
send_fuse_err (this, finh, op_errno);
}
- free_state (state);
+ free_fuse_state (state);
STACK_DESTROY (frame->root);
return 0;
}
+void
+fuse_rename_resume (fuse_state_t *state)
+{
+ char loc_uuid[64] = {0,};
+ char loc2_uuid[64] = {0,};
+
+ if (!state->loc.parent || !state->loc.inode) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "RENAME %"PRIu64" %s/%s -> %s/%s src resolution failed",
+ state->finh->unique,
+ uuid_utoa_r (state->resolve.gfid, loc_uuid),
+ state->resolve.bname,
+ uuid_utoa_r (state->resolve2.gfid, loc2_uuid),
+ state->resolve2.bname);
+
+ send_fuse_err (state->this, state->finh, ENOENT);
+ free_fuse_state (state);
+ return;
+ }
+
+ if (!state->loc2.parent) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "RENAME %"PRIu64" %s/%s -> %s/%s dst resolution failed",
+ state->finh->unique,
+ uuid_utoa_r (state->resolve.gfid, loc_uuid),
+ state->resolve.bname,
+ uuid_utoa_r (state->resolve2.gfid, loc2_uuid),
+ state->resolve2.bname);
+
+ send_fuse_err (state->this, state->finh, ENOENT);
+ free_fuse_state (state);
+ return;
+ }
+
+ state->resolve.op_ret = 0;
+ state->resolve2.op_ret = 0;
+
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": RENAME `%s (%s)' -> `%s (%s)'",
+ state->finh->unique, state->loc.path, loc_uuid,
+ state->loc2.path, loc2_uuid);
+
+ FUSE_FOP (state, fuse_rename_cbk, GF_FOP_RENAME,
+ rename, &state->loc, &state->loc2, state->xdata);
+}
static void
fuse_rename (xlator_t *this, fuse_in_header_t *finh, void *msg)
@@ -1394,129 +1752,122 @@ fuse_rename (xlator_t *this, fuse_in_header_t *finh, void *msg)
struct fuse_rename_in *fri = msg;
char *oldname = (char *)(fri + 1);
char *newname = oldname + strlen (oldname) + 1;
-
fuse_state_t *state = NULL;
- int32_t ret = -1;
GET_STATE (this, finh, state);
- ret = fuse_loc_fill (&state->loc, state, 0, finh->nodeid, oldname);
- if ((state->loc.inode == NULL) ||
- (ret < 0)) {
- gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "for %s %"PRIu64": RENAME `%s' -> `%s' (fuse_loc_fill() failed)",
- state->loc.path, finh->unique, state->loc.path,
- state->loc2.path);
+ fuse_resolve_entry_init (state, &state->resolve, finh->nodeid, oldname);
+
+ fuse_resolve_entry_init (state, &state->resolve2, fri->newdir, newname);
+
+ fuse_resolve_and_resume (state, fuse_rename_resume);
+
+ return;
+}
- send_fuse_err (this, finh, ENOENT);
- free_state (state);
+void
+fuse_link_resume (fuse_state_t *state)
+{
+ if (!state->loc2.inode || !state->loc.parent) {
+ gf_log ("glusterfs-fuse", GF_LOG_WARNING,
+ "fuse_loc_fill() failed %"PRIu64": LINK %s %s",
+ state->finh->unique, state->loc2.path, state->loc.path);
+ send_fuse_err (state->this, state->finh, ENOENT);
+ free_fuse_state (state);
return;
}
- ret = fuse_loc_fill (&state->loc2, state, 0, fri->newdir, newname);
- if (ret < 0) {
- gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "for %s %"PRIu64": RENAME `%s' -> `%s' (fuse_loc_fill() failed)",
- state->loc.path, finh->unique, state->loc.path,
- state->loc2.path);
+ state->resolve.op_ret = 0;
+ state->resolve2.op_ret = 0;
- send_fuse_err (this, finh, ENOENT);
- free_state (state);
- return;
- }
+ if (state->loc.inode) {
+ inode_unref (state->loc.inode);
+ state->loc.inode = NULL;
+ }
+ state->loc.inode = inode_ref (state->loc2.inode);
gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": RENAME `%s (%"PRId64")' -> `%s (%"PRId64")'",
- finh->unique, state->loc.path, state->loc.ino,
- state->loc2.path, state->loc2.ino);
-
- FUSE_FOP (state, fuse_rename_cbk, GF_FOP_RENAME,
- rename, &state->loc, &state->loc2);
+ "%"PRIu64": LINK() %s -> %s",
+ state->finh->unique, state->loc2.path,
+ state->loc.path);
- return;
+ FUSE_FOP (state, fuse_newentry_cbk, GF_FOP_LINK,
+ link, &state->loc2, &state->loc, state->xdata);
}
-
static void
fuse_link (xlator_t *this, fuse_in_header_t *finh, void *msg)
{
struct fuse_link_in *fli = msg;
char *name = (char *)(fli + 1);
-
fuse_state_t *state = NULL;
- int32_t ret = -1;
GET_STATE (this, finh, state);
- ret = fuse_loc_fill (&state->loc, state, 0, finh->nodeid, name);
- if (ret == 0)
- ret = fuse_loc_fill (&state->loc2, state, fli->oldnodeid, 0,
- NULL);
-
- if ((state->loc2.inode == NULL) ||
- (ret < 0)) {
- gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "fuse_loc_fill() failed for %s %"PRIu64": LINK %s %s",
- state->loc2.path, finh->unique,
- state->loc2.path, state->loc.path);
- send_fuse_err (this, finh, ENOENT);
- free_state (state);
- return;
- }
+ fuse_resolve_inode_init (state, &state->resolve2, fli->oldnodeid);
- state->loc.inode = inode_ref (state->loc2.inode);
- gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": LINK() %s (%"PRId64") -> %s (%"PRId64")",
- finh->unique, state->loc2.path, state->loc2.ino,
- state->loc.path, state->loc.ino);
+ fuse_resolve_entry_init (state, &state->resolve, finh->nodeid, name);
- FUSE_FOP (state, fuse_entry_cbk, GF_FOP_LINK,
- link, &state->loc2, &state->loc);
+ fuse_resolve_and_resume (state, fuse_link_resume);
return;
}
-
static int
fuse_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- fd_t *fd, inode_t *inode, struct stat *buf)
+ fd_t *fd, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- fuse_state_t *state = NULL;
- fuse_in_header_t *finh = NULL;
- fuse_private_t *priv = NULL;
- struct fuse_out_header fouh = {0, };
- struct fuse_entry_out feo = {0, };
- struct fuse_open_out foo = {0, };
- struct iovec iov_out[3];
+ fuse_state_t *state = NULL;
+ fuse_in_header_t *finh = NULL;
+ fuse_private_t *priv = NULL;
+ struct fuse_out_header fouh = {0, };
+ struct fuse_entry_out feo = {0, };
+ struct fuse_open_out foo = {0, };
+ struct iovec iov_out[3];
+ inode_t *linked_inode = NULL;
state = frame->root->state;
priv = this->private;
finh = state->finh;
foo.open_flags = 0;
+ fuse_log_eh_fop(this, state, frame, op_ret, op_errno);
+
if (op_ret >= 0) {
foo.fh = (uintptr_t) fd;
- if (((state->flags & O_ACCMODE) != O_RDONLY) &&
- priv->direct_io_mode)
+ if (((priv->direct_io_mode == 2)
+ && ((state->flags & O_ACCMODE) != O_RDONLY))
+ || (priv->direct_io_mode == 1))
foo.open_flags |= FOPEN_DIRECT_IO;
gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": %s() %s => %p (ino=%"PRId64")",
+ "%"PRIu64": %s() %s => %p (ino=%"PRIu64")",
frame->root->unique, gf_fop_list[frame->root->op],
- state->loc.path, fd, buf->st_ino);
+ state->loc.path, fd, buf->ia_ino);
- buf->st_blksize = this->ctx->page_size;
- stat2attr (buf, &feo.attr);
+ buf->ia_blksize = this->ctx->page_size;
+ gf_fuse_stat2attr (buf, &feo.attr, priv->enable_ino32);
- feo.nodeid = buf->st_ino;
+ linked_inode = inode_link (inode, state->loc.parent,
+ state->loc.name, buf);
-#ifdef GF_DARWIN_HOST_OS
- feo.generation = 0;
-#else
- feo.generation = buf->st_ctime;
-#endif
+ if (linked_inode != inode) {
+ /*
+ VERY racy code (if used anywhere else)
+ -- don't do this without understanding
+ */
+ inode_unref (fd->inode);
+ fd->inode = inode_ref (linked_inode);
+ }
+
+ inode_lookup (linked_inode);
+
+ inode_unref (linked_inode);
+
+ feo.nodeid = inode_to_fuse_nodeid (linked_inode);
feo.entry_valid = calc_timeout_sec (priv->entry_timeout);
feo.entry_valid_nsec = calc_timeout_nsec (priv->entry_timeout);
@@ -1524,26 +1875,24 @@ fuse_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
feo.attr_valid_nsec =
calc_timeout_nsec (priv->attribute_timeout);
- inode_link (inode, state->loc.parent,
- state->loc.name, buf);
-
- inode_lookup (inode);
-
- fd_ref (fd);
-
fouh.error = 0;
iov_out[0].iov_base = &fouh;
iov_out[1].iov_base = &feo;
+#if FUSE_KERNEL_MINOR_VERSION >= 9
iov_out[1].iov_len = priv->proto_minor >= 9 ?
sizeof (feo) :
FUSE_COMPAT_ENTRY_OUT_SIZE;
+#else
+ iov_out[1].iov_len = sizeof (feo);
+#endif
iov_out[2].iov_base = &foo;
iov_out[2].iov_len = sizeof (foo);
+
if (send_fuse_iov (this, finh, iov_out, 3) == ENOENT) {
gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
"create(%s) got EINTR", state->loc.path);
inode_forget (inode, 1);
- fd_unref (fd);
+ gf_fd_put (priv->fdtable, state->fd_no);
goto out;
}
@@ -1553,104 +1902,194 @@ fuse_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
"%"PRIu64": %s => -1 (%s)", finh->unique,
state->loc.path, strerror (op_errno));
send_fuse_err (this, finh, op_errno);
+ gf_fd_put (priv->fdtable, state->fd_no);
}
out:
- free_state (state);
+ free_fuse_state (state);
STACK_DESTROY (frame->root);
return 0;
}
+void
+fuse_create_resume (fuse_state_t *state)
+{
+ fd_t *fd = NULL;
+ fuse_private_t *priv = NULL;
+ fuse_fd_ctx_t *fdctx = NULL;
+
+ if (!state->loc.parent) {
+ gf_log ("glusterfs-fuse", GF_LOG_WARNING,
+ "%"PRIu64" CREATE %s/%s resolution failed",
+ state->finh->unique, uuid_utoa (state->resolve.gfid),
+ state->resolve.bname);
+ send_fuse_err (state->this, state->finh, ENOENT);
+ free_fuse_state (state);
+ return;
+ }
+
+ if (state->resolve.op_errno == ENOENT) {
+ state->resolve.op_ret = 0;
+ state->resolve.op_errno = 0;
+ }
+
+ if (state->loc.inode) {
+ gf_log (state->this->name, GF_LOG_DEBUG,
+ "inode already present");
+ inode_unref (state->loc.inode);
+ }
+
+ state->loc.inode = inode_new (state->loc.parent->table);
+
+ fd = fd_create (state->loc.inode, state->finh->pid);
+ if (fd == NULL) {
+ gf_log ("glusterfs-fuse", GF_LOG_WARNING,
+ "%"PRIu64" CREATE cannot create a new fd",
+ state->finh->unique);
+ send_fuse_err (state->this, state->finh, ENOMEM);
+ free_fuse_state (state);
+ return;
+ }
+
+ fdctx = fuse_fd_ctx_check_n_create (state->this, fd);
+ if (fdctx == NULL) {
+ gf_log ("glusterfs-fuse", GF_LOG_WARNING,
+ "%"PRIu64" CREATE creation of fdctx failed",
+ state->finh->unique);
+ fd_unref (fd);
+ send_fuse_err (state->this, state->finh, ENOMEM);
+ free_fuse_state (state);
+ return;
+ }
+
+ priv = state->this->private;
+
+ state->fd_no = gf_fd_unused_get (priv->fdtable, fd);
+
+ state->fd = fd_ref (fd);
+ fd->flags = state->flags;
+
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": CREATE %s", state->finh->unique,
+ state->loc.path);
+
+ FUSE_FOP (state, fuse_create_cbk, GF_FOP_CREATE,
+ create, &state->loc, state->flags, state->mode,
+ state->umask, fd, state->xdata);
+
+}
static void
fuse_create (xlator_t *this, fuse_in_header_t *finh, void *msg)
{
+#if FUSE_KERNEL_MINOR_VERSION >= 12
struct fuse_create_in *fci = msg;
+#else
+ struct fuse_open_in *fci = msg;
+#endif
char *name = (char *)(fci + 1);
fuse_private_t *priv = NULL;
fuse_state_t *state = NULL;
- fd_t *fd = NULL;
int32_t ret = -1;
priv = this->private;
+#if FUSE_KERNEL_MINOR_VERSION >= 12
if (priv->proto_minor < 12)
name = (char *)((struct fuse_open_in *)msg + 1);
+#endif
GET_STATE (this, finh, state);
+
+ uuid_generate (state->gfid);
+
+ fuse_resolve_entry_init (state, &state->resolve, finh->nodeid, name);
+
+ state->mode = fci->mode;
state->flags = fci->flags;
- ret = fuse_loc_fill (&state->loc, state, 0, finh->nodeid, name);
- if (ret < 0) {
+ priv = this->private;
+#if FUSE_KERNEL_MINOR_VERSION >=12
+ FUSE_ENTRY_CREATE(this, priv, finh, state, fci, "CREATE");
+#endif
+ fuse_resolve_and_resume (state, fuse_create_resume);
+
+ return;
+}
+
+void
+fuse_open_resume (fuse_state_t *state)
+{
+ fd_t *fd = NULL;
+ fuse_private_t *priv = NULL;
+ fuse_fd_ctx_t *fdctx = NULL;
+
+ if (!state->loc.inode) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "%"PRIu64": OPEN %s resolution failed",
+ state->finh->unique, uuid_utoa (state->resolve.gfid));
+
+ send_fuse_err (state->this, state->finh, ENOENT);
+ free_fuse_state (state);
+ return;
+ }
+
+ fd = fd_create (state->loc.inode, state->finh->pid);
+ if (!fd) {
+ gf_log ("fuse", GF_LOG_ERROR,
+ "fd is NULL");
+ send_fuse_err (state->this, state->finh, ENOENT);
+ free_fuse_state (state);
+ return;
+ }
+
+ fdctx = fuse_fd_ctx_check_n_create (state->this, fd);
+ if (fdctx == NULL) {
gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRIu64" CREATE %s (fuse_loc_fill() failed)",
- finh->unique, state->loc.path);
- send_fuse_err (this, finh, ENOENT);
- free_state (state);
+ "%"PRIu64": OPEN creation of fdctx failed",
+ state->finh->unique);
+ fd_unref (fd);
+ send_fuse_err (state->this, state->finh, ENOMEM);
+ free_fuse_state (state);
return;
}
- state->loc.inode = inode_new (state->itable);
+ priv = state->this->private;
- fd = fd_create (state->loc.inode, finh->pid);
- state->fd = fd;
+ state->fd_no = gf_fd_unused_get (priv->fdtable, fd);
+ state->fd = fd_ref (fd);
fd->flags = state->flags;
gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": CREATE %s", finh->unique,
+ "%"PRIu64": OPEN %s", state->finh->unique,
state->loc.path);
- FUSE_FOP (state, fuse_create_cbk, GF_FOP_CREATE,
- create, &state->loc, state->flags, fci->mode, fd);
-
- return;
+ FUSE_FOP (state, fuse_fd_cbk, GF_FOP_OPEN,
+ open, &state->loc, state->flags, fd, state->xdata);
}
-
static void
fuse_open (xlator_t *this, fuse_in_header_t *finh, void *msg)
{
struct fuse_open_in *foi = msg;
-
fuse_state_t *state = NULL;
- fd_t *fd = NULL;
- int32_t ret = -1;
GET_STATE (this, finh, state);
- state->flags = foi->flags;
-
- ret = fuse_loc_fill (&state->loc, state, finh->nodeid, 0, NULL);
- if ((state->loc.inode == NULL) ||
- (ret < 0)) {
- gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRIu64": OPEN %s (fuse_loc_fill() failed)",
- finh->unique, state->loc.path);
- send_fuse_err (this, finh, ENOENT);
- free_state (state);
- return;
- }
-
- fd = fd_create (state->loc.inode, finh->pid);
- state->fd = fd;
- fd->flags = foi->flags;
+ fuse_resolve_inode_init (state, &state->resolve, finh->nodeid);
- gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": OPEN %s", finh->unique,
- state->loc.path);
+ state->flags = foi->flags;
- FUSE_FOP (state, fuse_fd_cbk, GF_FOP_OPEN,
- open, &state->loc, foi->flags, fd);
+ fuse_resolve_and_resume (state, fuse_open_resume);
return;
}
-
static int
fuse_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
struct iovec *vector, int32_t count,
- struct stat *stbuf, struct iobref *iobref)
+ struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
{
fuse_state_t *state = NULL;
fuse_in_header_t *finh = NULL;
@@ -1660,19 +2099,22 @@ fuse_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
state = frame->root->state;
finh = state->finh;
+ fuse_log_eh_fop(this, state, frame, op_ret, op_errno);
+
if (op_ret >= 0) {
gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": READ => %d/%"GF_PRI_SIZET",%"PRId64"/%"PRId64,
+ "%"PRIu64": READ => %d/%"GF_PRI_SIZET",%"PRId64"/%"PRIu64,
frame->root->unique,
- op_ret, state->size, state->off, stbuf->st_size);
+ op_ret, state->size, state->off, stbuf->ia_size);
- iov_out = CALLOC (count + 1, sizeof (*iov_out));
+ iov_out = GF_CALLOC (count + 1, sizeof (*iov_out),
+ gf_fuse_mt_iovec);
if (iov_out) {
fouh.error = 0;
iov_out[0].iov_base = &fouh;
memcpy (iov_out + 1, vector, count * sizeof (*iov_out));
send_fuse_iov (this, finh, iov_out, count + 1);
- FREE (iov_out);
+ GF_FREE (iov_out);
} else
send_fuse_err (this, finh, ENOMEM);
} else {
@@ -1683,42 +2125,58 @@ fuse_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
send_fuse_err (this, finh, op_errno);
}
- free_state (state);
+ free_fuse_state (state);
STACK_DESTROY (frame->root);
return 0;
}
+void
+fuse_readv_resume (fuse_state_t *state)
+{
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": READ (%p, size=%zu, offset=%"PRIu64")",
+ state->finh->unique, state->fd, state->size, state->off);
+
+ FUSE_FOP (state, fuse_readv_cbk, GF_FOP_READ, readv, state->fd,
+ state->size, state->off, state->io_flags, state->xdata);
+}
static void
fuse_readv (xlator_t *this, fuse_in_header_t *finh, void *msg)
{
struct fuse_read_in *fri = msg;
+ fuse_private_t *priv = NULL;
fuse_state_t *state = NULL;
fd_t *fd = NULL;
GET_STATE (this, finh, state);
- state->size = fri->size;
- state->off = fri->offset;
fd = FH_TO_FD (fri->fh);
state->fd = fd;
- gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": READ (%p, size=%"PRIu32", offset=%"PRIu64")",
- finh->unique, fd, fri->size, fri->offset);
+ fuse_resolve_fd_init (state, &state->resolve, fd);
+
+ /* See comment by similar code in fuse_settatr */
+ priv = this->private;
+#if FUSE_KERNEL_MINOR_VERSION >= 9
+ if (priv->proto_minor >= 9 && fri->read_flags & FUSE_READ_LOCKOWNER)
+ state->lk_owner = fri->lock_owner;
+#endif
- FUSE_FOP (state, fuse_readv_cbk, GF_FOP_READ,
- readv, fd, fri->size, fri->offset);
+ state->size = fri->size;
+ state->off = fri->offset;
+ /* lets ignore 'fri->read_flags', but just consider 'fri->flags' */
+ state->io_flags = fri->flags;
+ fuse_resolve_and_resume (state, fuse_readv_resume);
}
-
static int
fuse_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- struct stat *stbuf)
+ struct iatt *stbuf, struct iatt *postbuf, dict_t *xdata)
{
fuse_state_t *state = NULL;
fuse_in_header_t *finh = NULL;
@@ -1727,11 +2185,13 @@ fuse_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
state = frame->root->state;
finh = state->finh;
+ fuse_log_eh_fop(this, state, frame, op_ret, op_errno);
+
if (op_ret >= 0) {
gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": WRITE => %d/%"GF_PRI_SIZET",%"PRId64"/%"PRId64,
+ "%"PRIu64": WRITE => %d/%"GF_PRI_SIZET",%"PRId64"/%"PRIu64,
frame->root->unique,
- op_ret, state->size, state->off, stbuf->st_size);
+ op_ret, state->size, state->off, stbuf->ia_size);
fwo.size = op_ret;
send_fuse_obj (this, finh, &fwo);
@@ -1743,12 +2203,43 @@ fuse_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
send_fuse_err (this, finh, op_errno);
}
- free_state (state);
+ free_fuse_state (state);
STACK_DESTROY (frame->root);
return 0;
}
+void
+fuse_write_resume (fuse_state_t *state)
+{
+ struct iobref *iobref = NULL;
+ struct iobuf *iobuf = NULL;
+
+
+ iobref = iobref_new ();
+ if (!iobref) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "%"PRIu64": WRITE iobref allocation failed",
+ state->finh->unique);
+ send_fuse_err (state->this, state->finh, ENOMEM);
+
+ free_fuse_state (state);
+ return;
+ }
+
+ iobuf = ((fuse_private_t *) (state->this->private))->iobuf;
+ iobref_add (iobref, iobuf);
+
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": WRITE (%p, size=%"GF_PRI_SIZET", offset=%"PRId64")",
+ state->finh->unique, state->fd, state->size, state->off);
+
+ FUSE_FOP (state, fuse_writev_cbk, GF_FOP_WRITE, writev, state->fd,
+ &state->vector, 1, state->off, state->io_flags, iobref,
+ state->xdata);
+
+ iobref_unref (iobref);
+}
static void
fuse_write (xlator_t *this, fuse_in_header_t *finh, void *msg)
@@ -1761,44 +2252,46 @@ fuse_write (xlator_t *this, fuse_in_header_t *finh, void *msg)
fuse_private_t *priv = NULL;
fuse_state_t *state = NULL;
- struct iovec vector;
fd_t *fd = NULL;
- struct iobref *iobref = NULL;
- struct iobuf *iobuf = NULL;
priv = this->private;
GET_STATE (this, finh, state);
- state->size = fwi->size;
- state->off = fwi->offset;
fd = FH_TO_FD (fwi->fh);
state->fd = fd;
- vector.iov_base = msg;
- vector.iov_len = fwi->size;
+ state->size = fwi->size;
+ state->off = fwi->offset;
- gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": WRITE (%p, size=%"PRIu32", offset=%"PRId64")",
- finh->unique, fd, fwi->size, fwi->offset);
+ /* lets ignore 'fwi->write_flags', but just consider 'fwi->flags' */
+ state->io_flags = fwi->flags;
+ /* TODO: may need to handle below flag
+ (fwi->write_flags & FUSE_WRITE_CACHE);
+ */
- iobref = iobref_new ();
- if (!iobref) {
- gf_log ("glusterfs-fuse", GF_LOG_ERROR,
- "%"PRIu64": WRITE iobref allocation failed",
- finh->unique);
- free_state (state);
- return;
- }
- iobuf = ((fuse_private_t *) (state->this->private))->iobuf;
- iobref_add (iobref, iobuf);
+ fuse_resolve_fd_init (state, &state->resolve, fd);
- FUSE_FOP (state, fuse_writev_cbk, GF_FOP_WRITE,
- writev, fd, &vector, 1, fwi->offset, iobref);
+ /* See comment by similar code in fuse_settatr */
+ priv = this->private;
+#if FUSE_KERNEL_MINOR_VERSION >= 9
+ if (priv->proto_minor >= 9 && fwi->write_flags & FUSE_WRITE_LOCKOWNER)
+ state->lk_owner = fwi->lock_owner;
+#endif
+
+ state->vector.iov_base = msg;
+ state->vector.iov_len = fwi->size;
+
+ fuse_resolve_and_resume (state, fuse_write_resume);
- iobref_unref (iobref);
return;
}
+void
+fuse_flush_resume (fuse_state_t *state)
+{
+ FUSE_FOP (state, fuse_err_cbk, GF_FOP_FLUSH,
+ flush, state->fd, state->xdata);
+}
static void
fuse_flush (xlator_t *this, fuse_in_header_t *finh, void *msg)
@@ -1812,37 +2305,77 @@ fuse_flush (xlator_t *this, fuse_in_header_t *finh, void *msg)
fd = FH_TO_FD (ffi->fh);
state->fd = fd;
+ fuse_resolve_fd_init (state, &state->resolve, fd);
+
+ state->lk_owner = ffi->lock_owner;
+
gf_log ("glusterfs-fuse", GF_LOG_TRACE,
"%"PRIu64": FLUSH %p", finh->unique, fd);
- FUSE_FOP (state, fuse_err_cbk, GF_FOP_FLUSH,
- flush, fd);
+ fuse_resolve_and_resume (state, fuse_flush_resume);
return;
}
-
static void
fuse_release (xlator_t *this, fuse_in_header_t *finh, void *msg)
{
- struct fuse_release_in *fri = msg;
-
- fuse_state_t *state = NULL;
+ struct fuse_release_in *fri = msg;
+ fd_t *activefd = NULL;
+ fd_t *fd = NULL;
+ uint64_t val = 0;
+ int ret = 0;
+ fuse_state_t *state = NULL;
+ fuse_fd_ctx_t *fdctx = NULL;
+ fuse_private_t *priv = NULL;
GET_STATE (this, finh, state);
- state->fd = FH_TO_FD (fri->fh);
+ fd = FH_TO_FD (fri->fh);
+ state->fd = fd;
+
+ priv = this->private;
+
+ fuse_log_eh (this, "RELEASE(): %"PRIu64":, fd: %p, gfid: %s",
+ finh->unique, fd, uuid_utoa (fd->inode->gfid));
gf_log ("glusterfs-fuse", GF_LOG_TRACE,
"%"PRIu64": RELEASE %p", finh->unique, state->fd);
- fd_unref (state->fd);
+ ret = fd_ctx_del (fd, this, &val);
+ if (!ret) {
+ fdctx = (fuse_fd_ctx_t *)(unsigned long)val;
+ if (fdctx) {
+ activefd = fdctx->activefd;
+ if (activefd) {
+ fd_unref (activefd);
+ }
+
+ GF_FREE (fdctx);
+ }
+ }
+ fd_unref (fd);
+
+ state->fd = NULL;
+
+ gf_fdptr_put (priv->fdtable, fd);
send_fuse_err (this, finh, 0);
- free_state (state);
+ free_fuse_state (state);
return;
}
+void
+fuse_fsync_resume (fuse_state_t *state)
+{
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": FSYNC %p", state->finh->unique,
+ state->fd);
+
+ /* fsync_flags: 1 means "datasync" (no defines for this) */
+ FUSE_FOP (state, fuse_fsync_cbk, GF_FOP_FSYNC,
+ fsync, state->fd, (state->flags & 1), state->xdata);
+}
static void
fuse_fsync (xlator_t *this, fuse_in_header_t *finh, void *msg)
@@ -1856,76 +2389,104 @@ fuse_fsync (xlator_t *this, fuse_in_header_t *finh, void *msg)
fd = FH_TO_FD (fsi->fh);
state->fd = fd;
- gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": FSYNC %p", finh->unique, fd);
-
- /* fsync_flags: 1 means "datasync" (no defines for this) */
- FUSE_FOP (state, fuse_err_cbk, GF_FOP_FSYNC,
- fsync, fd, fsi->fsync_flags & 1);
+ fuse_resolve_fd_init (state, &state->resolve, fd);
+ state->flags = fsi->fsync_flags;
+ fuse_resolve_and_resume (state, fuse_fsync_resume);
return;
}
-
-static void
-fuse_opendir (xlator_t *this, fuse_in_header_t *finh, void *msg)
+void
+fuse_opendir_resume (fuse_state_t *state)
{
- /*
- struct fuse_open_in *foi = msg;
- */
+ fd_t *fd = NULL;
+ fuse_private_t *priv = NULL;
+ fuse_fd_ctx_t *fdctx = NULL;
- fuse_state_t *state = NULL;
- fd_t *fd = NULL;
- int32_t ret = -1;
+ priv = state->this->private;
- GET_STATE (this, finh, state);
- ret = fuse_loc_fill (&state->loc, state, finh->nodeid, 0, NULL);
- if ((state->loc.inode == NULL) ||
- (ret < 0)) {
+ if (!state->loc.inode) {
gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRIu64": OPENDIR %s (fuse_loc_fill() failed)",
- finh->unique, state->loc.path);
+ "%"PRIu64": OPENDIR (%s) resolution failed",
+ state->finh->unique, uuid_utoa (state->resolve.gfid));
+ send_fuse_err (state->this, state->finh, ENOENT);
+ free_fuse_state (state);
+ return;
+ }
- send_fuse_err (this, finh, ENOENT);
- free_state (state);
+ fd = fd_create (state->loc.inode, state->finh->pid);
+ if (fd == NULL) {
+ gf_log ("glusterfs-fuse", GF_LOG_WARNING,
+ "%"PRIu64": OPENDIR fd creation failed",
+ state->finh->unique);
+ send_fuse_err (state->this, state->finh, ENOMEM);
+ free_fuse_state (state);
+ }
+
+ fdctx = fuse_fd_ctx_check_n_create (state->this, fd);
+ if (fdctx == NULL) {
+ gf_log ("glusterfs-fuse", GF_LOG_WARNING,
+ "%"PRIu64": OPENDIR creation of fdctx failed",
+ state->finh->unique);
+ fd_unref (fd);
+ send_fuse_err (state->this, state->finh, ENOMEM);
+ free_fuse_state (state);
return;
}
- fd = fd_create (state->loc.inode, finh->pid);
- state->fd = fd;
+ state->fd = fd_ref (fd);
+ state->fd_no = gf_fd_unused_get (priv->fdtable, fd);
gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": OPENDIR %s", finh->unique,
+ "%"PRIu64": OPENDIR %s", state->finh->unique,
state->loc.path);
FUSE_FOP (state, fuse_fd_cbk, GF_FOP_OPENDIR,
- opendir, &state->loc, fd);
+ opendir, &state->loc, fd, state->xdata);
}
+static void
+fuse_opendir (xlator_t *this, fuse_in_header_t *finh, void *msg)
+{
+ /*
+ struct fuse_open_in *foi = msg;
+ */
+
+ fuse_state_t *state = NULL;
+
+ GET_STATE (this, finh, state);
+
+ fuse_resolve_inode_init (state, &state->resolve, finh->nodeid);
+
+ fuse_resolve_and_resume (state, fuse_opendir_resume);
+}
unsigned char
-d_type_from_stat (struct stat *buf)
+d_type_from_stat (struct iatt *buf)
{
unsigned char d_type;
- if (buf->st_mode & S_IFREG) {
- d_type = DT_REG;
+ if (IA_ISLNK (buf->ia_type)) {
+ d_type = DT_LNK;
- } else if (buf->st_mode & S_IFDIR) {
+ } else if (IA_ISDIR (buf->ia_type)) {
d_type = DT_DIR;
- } else if (buf->st_mode & S_IFIFO) {
+ } else if (IA_ISFIFO (buf->ia_type)) {
d_type = DT_FIFO;
- } else if (buf->st_mode & S_IFSOCK) {
+ } else if (IA_ISSOCK (buf->ia_type)) {
d_type = DT_SOCK;
- } else if (buf->st_mode & S_IFCHR) {
+ } else if (IA_ISCHR (buf->ia_type)) {
d_type = DT_CHR;
- } else if (buf->st_mode & S_IFBLK) {
+ } else if (IA_ISBLK (buf->ia_type)) {
d_type = DT_BLK;
+ } else if (IA_ISREG (buf->ia_type)) {
+ d_type = DT_REG;
+
} else {
d_type = DT_UNKNOWN;
}
@@ -1933,10 +2494,10 @@ d_type_from_stat (struct stat *buf)
return d_type;
}
-
static int
fuse_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries)
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
{
fuse_state_t *state = NULL;
fuse_in_header_t *finh = NULL;
@@ -1944,9 +2505,13 @@ fuse_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
char *buf = NULL;
gf_dirent_t *entry = NULL;
struct fuse_dirent *fde = NULL;
+ fuse_private_t *priv = NULL;
state = frame->root->state;
finh = state->finh;
+ priv = state->this->private;
+
+ fuse_log_eh_fop(this, state, frame, op_ret, op_errno);
if (op_ret < 0) {
gf_log ("glusterfs-fuse", GF_LOG_WARNING,
@@ -1966,7 +2531,12 @@ fuse_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
strlen (entry->d_name));
}
- buf = CALLOC (1, size);
+ if (size <= 0) {
+ send_fuse_data (this, finh, 0, 0);
+ goto out;
+ }
+
+ buf = GF_CALLOC (1, size, gf_fuse_mt_char);
if (!buf) {
gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
"%"PRIu64": READDIR => -1 (%s)", frame->root->unique,
@@ -1978,25 +2548,33 @@ fuse_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
size = 0;
list_for_each_entry (entry, &entries->list, list) {
fde = (struct fuse_dirent *)(buf + size);
- fde->ino = entry->d_ino;
- fde->off = entry->d_off;
- fde->type = d_type_from_stat (&entry->d_stat);
- fde->namelen = strlen (entry->d_name);
- strncpy (fde->name, entry->d_name, fde->namelen);
+ gf_fuse_fill_dirent (entry, fde, priv->enable_ino32);
size += FUSE_DIRENT_SIZE (fde);
}
send_fuse_data (this, finh, buf, size);
+ /* TODO: */
+ /* gf_link_inodes_from_dirent (this, state->fd->inode, entries); */
+
out:
- free_state (state);
+ free_fuse_state (state);
STACK_DESTROY (frame->root);
- if (buf)
- FREE (buf);
+ GF_FREE (buf);
return 0;
}
+void
+fuse_readdir_resume (fuse_state_t *state)
+{
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": READDIR (%p, size=%"GF_PRI_SIZET", offset=%"PRId64")",
+ state->finh->unique, state->fd, state->size, state->off);
+
+ FUSE_FOP (state, fuse_readdir_cbk, GF_FOP_READDIR,
+ readdir, state->fd, state->size, state->off, state->xdata);
+}
static void
fuse_readdir (xlator_t *this, fuse_in_header_t *finh, void *msg)
@@ -2012,37 +2590,244 @@ fuse_readdir (xlator_t *this, fuse_in_header_t *finh, void *msg)
fd = FH_TO_FD (fri->fh);
state->fd = fd;
- gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": READDIR (%p, size=%"PRIu32", offset=%"PRId64")",
- finh->unique, fd, fri->size, fri->offset);
+ fuse_resolve_fd_init (state, &state->resolve, fd);
- FUSE_FOP (state, fuse_readdir_cbk, GF_FOP_READDIR,
- readdir, fd, fri->size, fri->offset);
+ fuse_resolve_and_resume (state, fuse_readdir_resume);
+}
+
+
+static int
+fuse_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ fuse_state_t *state = NULL;
+ fuse_in_header_t *finh = NULL;
+ int size = 0;
+ char *buf = NULL;
+ gf_dirent_t *entry = NULL;
+ struct fuse_direntplus *fde = NULL;
+ struct fuse_entry_out *feo = NULL;
+ fuse_private_t *priv = NULL;
+
+ state = frame->root->state;
+ finh = state->finh;
+ priv = this->private;
+
+ if (op_ret < 0) {
+ gf_log ("glusterfs-fuse", GF_LOG_WARNING,
+ "%"PRIu64": READDIRP => -1 (%s)", frame->root->unique,
+ strerror (op_errno));
+
+ send_fuse_err (this, finh, op_errno);
+ goto out;
+ }
+
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": READDIRP => %d/%"GF_PRI_SIZET",%"PRId64,
+ frame->root->unique, op_ret, state->size, state->off);
+
+ list_for_each_entry (entry, &entries->list, list) {
+ size += FUSE_DIRENT_ALIGN (FUSE_NAME_OFFSET_DIRENTPLUS +
+ strlen (entry->d_name));
+ }
+
+ if (size <= 0) {
+ send_fuse_data (this, finh, 0, 0);
+ goto out;
+ }
+
+ buf = GF_CALLOC (1, size, gf_fuse_mt_char);
+ if (!buf) {
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRIu64": READDIRP => -1 (%s)", frame->root->unique,
+ strerror (ENOMEM));
+ send_fuse_err (this, finh, ENOMEM);
+ goto out;
+ }
+
+ size = 0;
+ list_for_each_entry (entry, &entries->list, list) {
+ inode_t *linked_inode;
+
+ fde = (struct fuse_direntplus *)(buf + size);
+ feo = &fde->entry_out;
+ fde->dirent.ino = entry->d_ino;
+ fde->dirent.off = entry->d_off;
+ fde->dirent.type = entry->d_type;
+ fde->dirent.namelen = strlen (entry->d_name);
+ strncpy (fde->dirent.name, entry->d_name, fde->dirent.namelen);
+ size += FUSE_DIRENTPLUS_SIZE (fde);
+
+ if (!entry->inode)
+ continue;
+
+ entry->d_stat.ia_blksize = this->ctx->page_size;
+ gf_fuse_stat2attr (&entry->d_stat, &feo->attr, priv->enable_ino32);
+
+ linked_inode = inode_link (entry->inode, state->fd->inode,
+ entry->d_name, &entry->d_stat);
+ if (!linked_inode)
+ continue;
+
+ inode_lookup (linked_inode);
+
+ feo->nodeid = inode_to_fuse_nodeid (linked_inode);
+
+ fuse_inode_set_need_lookup (linked_inode, this);
+
+ inode_unref (linked_inode);
+
+ feo->entry_valid =
+ calc_timeout_sec (priv->entry_timeout);
+ feo->entry_valid_nsec =
+ calc_timeout_nsec (priv->entry_timeout);
+ feo->attr_valid =
+ calc_timeout_sec (priv->attribute_timeout);
+ feo->attr_valid_nsec =
+ calc_timeout_nsec (priv->attribute_timeout);
+ }
+
+ send_fuse_data (this, finh, buf, size);
+out:
+ free_fuse_state (state);
+ STACK_DESTROY (frame->root);
+ GF_FREE (buf);
+ return 0;
+
+}
+
+
+void
+fuse_readdirp_resume (fuse_state_t *state)
+{
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": READDIRP (%p, size=%"GF_PRI_SIZET", offset=%"PRId64")",
+ state->finh->unique, state->fd, state->size, state->off);
+
+ FUSE_FOP (state, fuse_readdirp_cbk, GF_FOP_READDIRP,
+ readdirp, state->fd, state->size, state->off, state->xdata);
}
static void
-fuse_releasedir (xlator_t *this, fuse_in_header_t *finh, void *msg)
+fuse_readdirp (xlator_t *this, fuse_in_header_t *finh, void *msg)
{
- struct fuse_release_in *fri = msg;
+ struct fuse_read_in *fri = msg;
- fuse_state_t *state = NULL;
+ fuse_state_t *state = NULL;
+ fd_t *fd = NULL;
+
+ GET_STATE (this, finh, state);
+ state->size = fri->size;
+ state->off = fri->offset;
+ fd = FH_TO_FD (fri->fh);
+ state->fd = fd;
+
+ fuse_resolve_fd_init (state, &state->resolve, fd);
+
+ fuse_resolve_and_resume (state, fuse_readdirp_resume);
+}
+
+static int
+fuse_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ return fuse_err_cbk(frame, cookie, this, op_ret, op_errno, xdata);
+}
+
+static void
+fuse_fallocate_resume(fuse_state_t *state)
+{
+ gf_log("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": FALLOCATE (%p, flags=%d, size=%zu, offset=%"PRId64")",
+ state->finh->unique, state->fd, state->flags, state->size,
+ state->off);
+
+ if (state->flags & FALLOC_FL_PUNCH_HOLE)
+ FUSE_FOP(state, fuse_fallocate_cbk, GF_FOP_DISCARD, discard,
+ state->fd, state->off, state->size, state->xdata);
+ else
+ FUSE_FOP(state, fuse_fallocate_cbk, GF_FOP_FALLOCATE, fallocate,
+ state->fd, (state->flags & FALLOC_FL_KEEP_SIZE),
+ state->off, state->size, state->xdata);
+}
+
+static void
+fuse_fallocate(xlator_t *this, fuse_in_header_t *finh, void *msg)
+{
+ struct fuse_fallocate_in *ffi = msg;
+ fuse_state_t *state = NULL;
+
+ GET_STATE(this, finh, state);
+ state->off = ffi->offset;
+ state->size = ffi->length;
+ state->flags = ffi->mode;
+ state->fd = FH_TO_FD(ffi->fh);
+
+ fuse_resolve_fd_init(state, &state->resolve, state->fd);
+ fuse_resolve_and_resume(state, fuse_fallocate_resume);
+}
+
+
+static void
+fuse_releasedir (xlator_t *this, fuse_in_header_t *finh, void *msg)
+{
+ struct fuse_release_in *fri = msg;
+ fd_t *activefd = NULL;
+ uint64_t val = 0;
+ int ret = 0;
+ fuse_state_t *state = NULL;
+ fuse_fd_ctx_t *fdctx = NULL;
+ fuse_private_t *priv = NULL;
GET_STATE (this, finh, state);
state->fd = FH_TO_FD (fri->fh);
+ priv = this->private;
+
+ fuse_log_eh (this, "RELEASEDIR (): %"PRIu64": fd: %p, gfid: %s",
+ finh->unique, state->fd,
+ uuid_utoa (state->fd->inode->gfid));
+
gf_log ("glusterfs-fuse", GF_LOG_TRACE,
"%"PRIu64": RELEASEDIR %p", finh->unique, state->fd);
+ ret = fd_ctx_del (state->fd, this, &val);
+
+ if (!ret) {
+ fdctx = (fuse_fd_ctx_t *)(unsigned long)val;
+ if (fdctx) {
+ activefd = fdctx->activefd;
+ if (activefd) {
+ fd_unref (activefd);
+ }
+
+ GF_FREE (fdctx);
+ }
+ }
+
fd_unref (state->fd);
+ gf_fdptr_put (priv->fdtable, state->fd);
+
+ state->fd = NULL;
+
send_fuse_err (this, finh, 0);
- free_state (state);
+ free_fuse_state (state);
return;
}
+void
+fuse_fsyncdir_resume (fuse_state_t *state)
+{
+ FUSE_FOP (state, fuse_err_cbk, GF_FOP_FSYNCDIR,
+ fsyncdir, state->fd, (state->flags & 1), state->xdata);
+
+}
static void
fuse_fsyncdir (xlator_t *this, fuse_in_header_t *finh, void *msg)
@@ -2057,16 +2842,18 @@ fuse_fsyncdir (xlator_t *this, fuse_in_header_t *finh, void *msg)
GET_STATE (this, finh, state);
state->fd = fd;
- FUSE_FOP (state, fuse_err_cbk, GF_FOP_FSYNCDIR,
- fsyncdir, fd, fsi->fsync_flags & 1);
+ fuse_resolve_fd_init (state, &state->resolve, fd);
+
+ state->flags = fsi->fsync_flags;
+ fuse_resolve_and_resume (state, fuse_fsyncdir_resume);
return;
}
-
static int
fuse_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *buf)
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf,
+ dict_t *xdata)
{
fuse_state_t *state = NULL;
fuse_in_header_t *finh = NULL;
@@ -2076,19 +2863,10 @@ fuse_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
state = frame->root->state;
priv = this->private;
finh = state->finh;
- /*
- Filesystems (like ZFS on solaris) reports
- different ->f_frsize and ->f_bsize. Old coreutils
- df tools use statfs() and do not see ->f_frsize.
- the ->f_blocks, ->f_bavail and ->f_bfree are
- w.r.t ->f_frsize and not ->f_bsize which makes the
- df tools report wrong values.
-
- Scale the block counts to match ->f_bsize.
- */
- /* TODO: with old coreutils, f_bsize is taken from stat()'s st_blksize
- * so the df with old coreutils this wont work :(
- */
+
+ fuse_log_eh (this, "op_ret: %d, op_errno: %d, %"PRIu64": %s()",
+ op_ret, op_errno, frame->root->unique,
+ gf_fop_list[frame->root->op]);
if (op_ret == 0) {
#ifndef GF_DARWIN_HOST_OS
@@ -2123,37 +2901,81 @@ fuse_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
send_fuse_err (this, finh, op_errno);
}
- free_state (state);
+ free_fuse_state (state);
STACK_DESTROY (frame->root);
return 0;
}
+void
+fuse_statfs_resume (fuse_state_t *state)
+{
+ if (!state->loc.inode) {
+ gf_log ("glusterfs-fuse", GF_LOG_WARNING,
+ "%"PRIu64": STATFS (%s) resolution fail",
+ state->finh->unique, uuid_utoa (state->resolve.gfid));
+
+ send_fuse_err (state->this, state->finh, ENOENT);
+ free_fuse_state (state);
+ return;
+ }
+
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": STATFS", state->finh->unique);
+
+ FUSE_FOP (state, fuse_statfs_cbk, GF_FOP_STATFS,
+ statfs, &state->loc, state->xdata);
+}
+
static void
fuse_statfs (xlator_t *this, fuse_in_header_t *finh, void *msg)
{
fuse_state_t *state = NULL;
- int32_t ret = -1;
GET_STATE (this, finh, state);
- ret = fuse_loc_fill (&state->loc, state, 1, 0, NULL);
- if ((state->loc.inode == NULL) ||
- (ret < 0)) {
- gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRIu64": STATFS (fuse_loc_fill() fail)",
- finh->unique);
- send_fuse_err (this, finh, ENOENT);
- free_state (state);
+ fuse_resolve_inode_init (state, &state->resolve, finh->nodeid);
+
+ fuse_resolve_and_resume (state, fuse_statfs_resume);
+}
+
+
+void
+fuse_setxattr_resume (fuse_state_t *state)
+{
+ if (!state->loc.inode) {
+ gf_log ("glusterfs-fuse", GF_LOG_WARNING,
+ "%"PRIu64": SETXATTR %s/%"PRIu64" (%s) "
+ "resolution failed",
+ state->finh->unique, uuid_utoa (state->resolve.gfid),
+ state->finh->nodeid, state->name);
+ send_fuse_err (state->this, state->finh, ENOENT);
+ free_fuse_state (state);
return;
}
- gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": STATFS", finh->unique);
+#ifdef GF_TEST_FFOP
+ state->fd = fd_lookup (state->loc.inode, state->finh->pid);
+#endif /* GF_TEST_FFOP */
- FUSE_FOP (state, fuse_statfs_cbk, GF_FOP_STATFS,
- statfs, &state->loc);
+ if (state->fd) {
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": SETXATTR %p/%"PRIu64" (%s)", state->finh->unique,
+ state->fd, state->finh->nodeid, state->name);
+
+ FUSE_FOP (state, fuse_setxattr_cbk, GF_FOP_FSETXATTR,
+ fsetxattr, state->fd, state->xattr, state->flags,
+ state->xdata);
+ } else {
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": SETXATTR %s/%"PRIu64" (%s)", state->finh->unique,
+ state->loc.path, state->finh->nodeid, state->name);
+
+ FUSE_FOP (state, fuse_setxattr_cbk, GF_FOP_SETXATTR,
+ setxattr, &state->loc, state->xattr, state->flags,
+ state->xdata);
+ }
}
@@ -2163,58 +2985,115 @@ fuse_setxattr (xlator_t *this, fuse_in_header_t *finh, void *msg)
struct fuse_setxattr_in *fsi = msg;
char *name = (char *)(fsi + 1);
char *value = name + strlen (name) + 1;
+ struct fuse_private *priv = NULL;
fuse_state_t *state = NULL;
char *dict_value = NULL;
int32_t ret = -1;
+ char *newkey = NULL;
+
+ priv = this->private;
-#ifdef DISABLE_POSIX_ACL
- if (!strncmp (name, "system.", 7)) {
- send_fuse_err (this, finh, EOPNOTSUPP);
+#ifdef GF_DARWIN_HOST_OS
+ if (fsi->position) {
+ gf_log ("glusterfs-fuse", GF_LOG_WARNING,
+ "%"PRIu64": SETXATTR %s/%"PRIu64" (%s):"
+ "refusing positioned setxattr",
+ finh->unique, state->loc.path, finh->nodeid, name);
+ send_fuse_err (this, finh, EINVAL);
+ FREE (finh);
return;
}
#endif
- GET_STATE (this, finh, state);
- state->size = fsi->size;
- ret = fuse_loc_fill (&state->loc, state, finh->nodeid, 0, NULL);
- if ((state->loc.inode == NULL) ||
- (ret < 0)) {
- gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRIu64": SETXATTR %s/%"PRIu64" (%s) (fuse_loc_fill() failed)",
- finh->unique,
- state->loc.path, finh->nodeid, name);
+ if (fuse_ignore_xattr_set (priv, name)) {
+ (void) send_fuse_err (this, finh, 0);
+ return;
+ }
+
+ if (!priv->acl) {
+ if ((strcmp (name, POSIX_ACL_ACCESS_XATTR) == 0) ||
+ (strcmp (name, POSIX_ACL_DEFAULT_XATTR) == 0)) {
+ send_fuse_err (this, finh, EOPNOTSUPP);
+ GF_FREE (finh);
+ return;
+ }
+ }
+
+ if (!priv->selinux) {
+ if (strncmp (name, "security.", 9) == 0) {
+ send_fuse_err (this, finh, EOPNOTSUPP);
+ GF_FREE (finh);
+ return;
+ }
+ }
- send_fuse_err (this, finh, ENOENT);
- free_state (state);
+ /* Check if the command is for changing the log
+ level of process or specific xlator */
+ ret = is_gf_log_command (this, name, value);
+ if (ret >= 0) {
+ send_fuse_err (this, finh, ret);
+ GF_FREE (finh);
+ return;
+ }
+
+ if (!strcmp ("inode-invalidate", name)) {
+ gf_log ("fuse", GF_LOG_TRACE,
+ "got request to invalidate %"PRIu64, finh->nodeid);
+ send_fuse_err (this, finh, 0);
+ fuse_invalidate_entry (this, finh->nodeid);
+ GF_FREE (finh);
+ return;
+ }
+
+ if (!strcmp (GFID_XATTR_KEY, name)) {
+ send_fuse_err (this, finh, EPERM);
+ GF_FREE (finh);
return;
}
- state->dict = get_new_dict ();
- if (!state->dict) {
+ GET_STATE (this, finh, state);
+ state->size = fsi->size;
+
+ fuse_resolve_inode_init (state, &state->resolve, finh->nodeid);
+
+ state->xattr = get_new_dict ();
+ if (!state->xattr) {
gf_log ("glusterfs-fuse", GF_LOG_ERROR,
"%"PRIu64": SETXATTR dict allocation failed",
finh->unique);
- free_state (state);
+ send_fuse_err (this, finh, ENOMEM);
+ free_fuse_state (state);
return;
}
- dict_value = memdup (value, fsi->size);
- dict_set (state->dict, (char *)name,
+ ret = fuse_flip_xattr_ns (priv, name, &newkey);
+ if (ret) {
+ send_fuse_err (this, finh, ENOMEM);
+ free_fuse_state (state);
+ return;
+ }
+
+ if (fsi->size > 0) {
+ dict_value = memdup (value, fsi->size);
+ } else {
+ gf_log (THIS->name, GF_LOG_ERROR, "value size zero");
+ dict_value = NULL;
+ }
+ dict_set (state->xattr, newkey,
data_from_dynptr ((void *)dict_value, fsi->size));
- dict_ref (state->dict);
+ dict_ref (state->xattr);
- gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": SETXATTR %s/%"PRIu64" (%s)", finh->unique,
- state->loc.path, finh->nodeid, name);
+ state->flags = fsi->flags;
+ state->name = newkey;
- FUSE_FOP (state, fuse_err_cbk, GF_FOP_SETXATTR,
- setxattr, &state->loc, state->dict, fsi->flags);
+ fuse_resolve_and_resume (state, fuse_setxattr_resume);
return;
}
+
static void
send_fuse_xattr (xlator_t *this, fuse_in_header_t *finh, const char *value,
size_t size, size_t expected)
@@ -2238,55 +3117,44 @@ send_fuse_xattr (xlator_t *this, fuse_in_header_t *finh, const char *value,
}
}
+/* filter out xattrs that need not be visible on the
+ * mount point. this is _specifically_ for geo-rep
+ * as of now, to prevent Rsync from crying out loud
+ * when it tries to setxattr() for selinux xattrs
+ */
+static int
+fuse_filter_xattr(char *key)
+{
+ int need_filter = 0;
+ struct fuse_private *priv = THIS->private;
+
+ if ((priv->client_pid == GF_CLIENT_PID_GSYNCD)
+ && fnmatch ("*.selinux*", key, FNM_PERIOD) == 0)
+ need_filter = 1;
+
+ return need_filter;
+}
+
+
static int
fuse_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
int need_to_free_dict = 0;
char *value = "";
fuse_state_t *state = NULL;
fuse_in_header_t *finh = NULL;
- int32_t dummy_ret = 0;
data_t *value_data = NULL;
- fuse_private_t *priv = NULL;
- struct stat st;
- char *file = NULL;
- int32_t fd = -1;
int ret = -1;
int32_t len = 0;
- data_pair_t *trav = NULL;
+ int32_t len_next = 0;
- priv = this->private;
- ret = op_ret;
state = frame->root->state;
finh = state->finh;
- dummy_ret = 0;
-
-#ifdef GF_DARWIN_HOST_OS
- /* This is needed in MacFuse, where MacOSX Finder needs some specific
- * keys to be supported from FS
- */
- if (state->name) {
- if (!dict) {
- dict = get_new_dict ();
- need_to_free_dict = 1;
- }
- dummy_ret = gf_compat_getxattr (state->name, dict);
- if (dummy_ret != -1)
- ret = dummy_ret;
- } else {
- if (!dict) {
- dict = get_new_dict ();
- need_to_free_dict = 1;
- }
- dummy_ret = gf_compat_listxattr (ret, dict, state->size);
- if (dummy_ret != -1)
- ret = dummy_ret;
- }
-#endif /* DARWIN */
+ fuse_log_eh_fop(this, state, frame, op_ret, op_errno);
- if (ret >= 0) {
+ if (op_ret >= 0) {
gf_log ("glusterfs-fuse", GF_LOG_TRACE,
"%"PRIu64": %s() %s => %d", frame->root->unique,
gf_fop_list[frame->root->op], state->loc.path, op_ret);
@@ -2296,143 +3164,260 @@ fuse_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
/* if callback for getxattr */
value_data = dict_get (dict, state->name);
if (value_data) {
+
ret = value_data->len; /* Don't return the value for '\0' */
value = value_data->data;
send_fuse_xattr (this, finh, value, ret, state->size);
/* if(ret >...)...else if...else */
- } else if (!strcmp (state->name, "user.glusterfs-booster-volfile")) {
- if (!priv->volfile) {
- memset (&st, 0, sizeof (st));
- fd = fileno (this->ctx->specfp);
- ret = fstat (fd, &st);
- if (ret != 0) {
- gf_log (this->name,
- GF_LOG_ERROR,
- "fstat on fd (%d) failed (%s)", fd, strerror (errno));
- send_fuse_err (this, finh, ENODATA);
- }
-
- priv->volfile_size = st.st_size;
- file = priv->volfile = CALLOC (1, priv->volfile_size);
- ret = lseek (fd, 0, SEEK_SET);
- while ((ret = read (fd, file, GF_UNIT_KB)) > 0) {
- file += ret;
- }
- }
-
- send_fuse_xattr (this, finh, priv->volfile,
- priv->volfile_size, state->size);
- /* if(ret >...)...else if...else */
- } else if (!strcmp (state->name, "user.glusterfs-booster-path")) {
- send_fuse_xattr (this, finh, state->loc.path,
- strlen (state->loc.path) + 1, state->size);
- } else if (!strcmp (state->name, "user.glusterfs-booster-mount")) {
- send_fuse_xattr (this, finh, priv->mount_point,
- strlen (priv->mount_point) + 1, state->size);
} else {
send_fuse_err (this, finh, ENODATA);
} /* if(value_data)...else */
} else {
/* if callback for listxattr */
- trav = dict->members_list;
- while (trav) {
- len += strlen (trav->key) + 1;
- trav = trav->next;
- } /* while(trav) */
+ /* we need to invoke fuse_filter_xattr() twice. Once
+ * while counting size and then while filling buffer
+ */
+ len = dict_keys_join (NULL, 0, dict, fuse_filter_xattr);
+ if (len < 0)
+ goto out;
+
value = alloca (len + 1);
- ERR_ABORT (value);
- len = 0;
- trav = dict->members_list;
- while (trav) {
- strcpy (value + len, trav->key);
- value[len + strlen (trav->key)] = '\0';
- len += strlen (trav->key) + 1;
- trav = trav->next;
- } /* while(trav) */
+ if (!value)
+ goto out;
+
+ len_next = dict_keys_join (value, len, dict,
+ fuse_filter_xattr);
+ if (len_next != len)
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "sizes not equal %d != %d",
+ len, len_next);
+
send_fuse_xattr (this, finh, value, len, state->size);
} /* if(state->name)...else */
} else {
/* if failure - no need to check if listxattr or getxattr */
if (op_errno != ENODATA) {
- if (op_errno == ENOTSUP)
- {
- gf_fuse_xattr_enotsup_log++;
- if (!(gf_fuse_xattr_enotsup_log % GF_UNIVERSAL_ANSWER))
- gf_log ("glusterfs-fuse", GF_LOG_ERROR,
- "extended attribute not "
- "supported by the backend "
- "storage");
- }
- else
- {
+ if (op_errno == ENOTSUP) {
+ GF_LOG_OCCASIONALLY (gf_fuse_xattr_enotsup_log,
+ "glusterfs-fuse",
+ GF_LOG_ERROR,
+ "extended attribute not "
+ "supported by the backend "
+ "storage");
+ } else {
gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRIu64": %s() %s => -1 (%s)",
+ "%"PRIu64": %s(%s) %s => -1 (%s)",
frame->root->unique,
- gf_fop_list[frame->root->op],
+ gf_fop_list[frame->root->op], state->name,
state->loc.path, strerror (op_errno));
}
} else {
- gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRIu64": %s() %s => -1 (%s)",
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRIu64": %s(%s) %s => -1 (%s)",
frame->root->unique,
- gf_fop_list[frame->root->op], state->loc.path,
- strerror (op_errno));
+ gf_fop_list[frame->root->op], state->name,
+ state->loc.path, strerror (op_errno));
} /* if(op_errno!= ENODATA)...else */
send_fuse_err (this, finh, op_errno);
} /* if(op_ret>=0)...else */
+out:
if (need_to_free_dict)
dict_unref (dict);
- free_state (state);
+ free_fuse_state (state);
STACK_DESTROY (frame->root);
return 0;
}
+void
+fuse_getxattr_resume (fuse_state_t *state)
+{
+ char *value = NULL;
+
+ if (!state->loc.inode) {
+ gf_log ("glusterfs-fuse", GF_LOG_WARNING,
+ "%"PRIu64": GETXATTR %s/%"PRIu64" (%s) "
+ "resolution failed",
+ state->finh->unique,
+ uuid_utoa (state->resolve.gfid),
+ state->finh->nodeid, state->name);
+
+ send_fuse_err (state->this, state->finh, ENOENT);
+ free_fuse_state (state);
+ return;
+ }
+
+#ifdef GF_TEST_FFOP
+ state->fd = fd_lookup (state->loc.inode, state->finh->pid);
+#endif /* GF_TEST_FFOP */
+
+ if (state->name &&
+ (strcmp (state->name, VIRTUAL_GFID_XATTR_KEY) == 0)) {
+ /* send glusterfs gfid in binary form */
+
+ value = GF_CALLOC (16 + 1, sizeof(char),
+ gf_common_mt_char);
+ if (!value) {
+ send_fuse_err (state->this, state->finh, ENOMEM);
+ goto internal_out;
+ }
+ memcpy (value, state->loc.inode->gfid, 16);
+
+ send_fuse_xattr (THIS, state->finh, value, 16, state->size);
+ GF_FREE (value);
+ internal_out:
+ free_fuse_state (state);
+ return;
+ }
+
+ if (state->name &&
+ (strcmp (state->name, VIRTUAL_GFID_XATTR_KEY_STR) == 0)) {
+ /* transform binary gfid to canonical form */
+
+ value = GF_CALLOC (UUID_CANONICAL_FORM_LEN + 1, sizeof(char),
+ gf_common_mt_char);
+ if (!value) {
+ send_fuse_err (state->this, state->finh, ENOMEM);
+ goto internal_out1;
+ }
+ uuid_utoa_r (state->loc.inode->gfid, value);
+
+ send_fuse_xattr (THIS, state->finh, value,
+ UUID_CANONICAL_FORM_LEN, state->size);
+ GF_FREE (value);
+ internal_out1:
+ free_fuse_state (state);
+ return;
+ }
+
+
+ if (state->fd) {
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": GETXATTR %p/%"PRIu64" (%s)", state->finh->unique,
+ state->fd, state->finh->nodeid, state->name);
+
+ FUSE_FOP (state, fuse_xattr_cbk, GF_FOP_FGETXATTR,
+ fgetxattr, state->fd, state->name, state->xdata);
+ } else {
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": GETXATTR %s/%"PRIu64" (%s)", state->finh->unique,
+ state->loc.path, state->finh->nodeid, state->name);
+
+ FUSE_FOP (state, fuse_xattr_cbk, GF_FOP_GETXATTR,
+ getxattr, &state->loc, state->name, state->xdata);
+ }
+}
+
+
static void
fuse_getxattr (xlator_t *this, fuse_in_header_t *finh, void *msg)
{
- struct fuse_getxattr_in *fgxi = msg;
- char *name = (char *)(msg + 1);
+ struct fuse_getxattr_in *fgxi = msg;
+ char *name = (char *)(fgxi + 1);
+ fuse_state_t *state = NULL;
+ struct fuse_private *priv = NULL;
+ int rv = 0;
+ int op_errno = EINVAL;
+ char *newkey = NULL;
- fuse_state_t *state = NULL;
- int32_t ret = -1;
+ priv = this->private;
+ GET_STATE (this, finh, state);
-#ifdef DISABLE_POSIX_ACL
- if (!strncmp (name, "system.", 7)) {
- send_fuse_err (this, finh, ENODATA);
- return;
+#ifdef GF_DARWIN_HOST_OS
+ if (fgxi->position) {
+ /* position can be used only for
+ * resource fork queries which we
+ * don't support anyway... so handling
+ * it separately is just sort of a
+ * matter of aesthetics, not strictly
+ * necessary.
+ */
+
+ gf_log ("glusterfs-fuse", GF_LOG_WARNING,
+ "%"PRIu64": GETXATTR %s/%"PRIu64" (%s):"
+ "refusing positioned getxattr",
+ finh->unique, state->loc.path, finh->nodeid, name);
+ op_errno = EINVAL;
+ goto err;
}
#endif
+ if (!priv->acl) {
+ if ((strcmp (name, POSIX_ACL_ACCESS_XATTR) == 0) ||
+ (strcmp (name, POSIX_ACL_DEFAULT_XATTR) == 0)) {
+ op_errno = ENOTSUP;
+ goto err;
+ }
+ }
+
+ if (!priv->selinux) {
+ if (strncmp (name, "security.", 9) == 0) {
+ op_errno = ENODATA;
+ goto err;
+ }
+ }
+
GET_STATE (this, finh, state);
+
+ fuse_resolve_inode_init (state, &state->resolve, finh->nodeid);
+
+ rv = fuse_flip_xattr_ns (priv, name, &newkey);
+ if (rv) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
state->size = fgxi->size;
- state->name = strdup (name);
+ state->name = newkey;
+
+ fuse_resolve_and_resume (state, fuse_getxattr_resume);
+
+ return;
+ err:
+ send_fuse_err (this, finh, op_errno);
+ free_fuse_state (state);
+ return;
+}
+
- ret = fuse_loc_fill (&state->loc, state, finh->nodeid, 0, NULL);
- if ((state->loc.inode == NULL) ||
- (ret < 0)) {
+void
+fuse_listxattr_resume (fuse_state_t *state)
+{
+ if (!state->loc.inode) {
gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRIu64": GETXATTR %s/%"PRIu64" (%s) (fuse_loc_fill() failed)",
- finh->unique, state->loc.path, finh->nodeid, name);
+ "%"PRIu64": LISTXATTR %s/%"PRIu64
+ "resolution failed", state->finh->unique,
+ uuid_utoa (state->resolve.gfid), state->finh->nodeid);
- send_fuse_err (this, finh, ENOENT);
- free_state (state);
+ send_fuse_err (state->this, state->finh, ENOENT);
+ free_fuse_state (state);
return;
}
- gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": GETXATTR %s/%"PRIu64" (%s)", finh->unique,
- state->loc.path, finh->nodeid, name);
+#ifdef GF_TEST_FFOP
+ state->fd = fd_lookup (state->loc.inode, state->finh->pid);
+#endif /* GF_TEST_FFOP */
- FUSE_FOP (state, fuse_xattr_cbk, GF_FOP_GETXATTR,
- getxattr, &state->loc, name);
+ if (state->fd) {
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": LISTXATTR %p/%"PRIu64, state->finh->unique,
+ state->fd, state->finh->nodeid);
- return;
+ FUSE_FOP (state, fuse_xattr_cbk, GF_FOP_FGETXATTR,
+ fgetxattr, state->fd, NULL, state->xdata);
+ } else {
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": LISTXATTR %s/%"PRIu64, state->finh->unique,
+ state->loc.path, state->finh->nodeid);
+
+ FUSE_FOP (state, fuse_xattr_cbk, GF_FOP_GETXATTR,
+ getxattr, &state->loc, NULL, state->xdata);
+ }
}
@@ -2440,64 +3425,89 @@ static void
fuse_listxattr (xlator_t *this, fuse_in_header_t *finh, void *msg)
{
struct fuse_getxattr_in *fgxi = msg;
-
fuse_state_t *state = NULL;
- int32_t ret = -1;
GET_STATE (this, finh, state);
+
+ fuse_resolve_inode_init (state, &state->resolve, finh->nodeid);
+
state->size = fgxi->size;
- ret = fuse_loc_fill (&state->loc, state, finh->nodeid, 0, NULL);
- if ((state->loc.inode == NULL) ||
- (ret < 0)) {
- gf_log ("glusterfs-fuse", GF_LOG_WARNING,
- "%"PRIu64": LISTXATTR %s/%"PRIu64" (fuse_loc_fill() failed)",
- finh->unique, state->loc.path, finh->nodeid);
- send_fuse_err (this, finh, ENOENT);
- free_state (state);
+ fuse_resolve_and_resume (state, fuse_listxattr_resume);
+
+ return;
+}
+
+
+void
+fuse_removexattr_resume (fuse_state_t *state)
+{
+ if (!state->loc.inode) {
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "%"PRIu64": REMOVEXATTR %s/%"PRIu64" (%s) "
+ "resolution failed",
+ state->finh->unique, uuid_utoa (state->resolve.gfid),
+ state->finh->nodeid, state->name);
+
+ send_fuse_err (state->this, state->finh, ENOENT);
+ free_fuse_state (state);
return;
}
- gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": LISTXATTR %s/%"PRIu64, finh->unique,
- state->loc.path, finh->nodeid);
+#ifdef GF_TEST_FFOP
+ state->fd = fd_lookup (state->loc.inode, state->finh->pid);
+#endif /* GF_TEST_FFOP */
- FUSE_FOP (state, fuse_xattr_cbk, GF_FOP_GETXATTR,
- getxattr, &state->loc, NULL);
+ if (state->fd) {
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": REMOVEXATTR %p/%"PRIu64" (%s)", state->finh->unique,
+ state->fd, state->finh->nodeid, state->name);
- return;
+ FUSE_FOP (state, fuse_err_cbk, GF_FOP_FREMOVEXATTR,
+ fremovexattr, state->fd, state->name, state->xdata);
+ } else {
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": REMOVEXATTR %s/%"PRIu64" (%s)", state->finh->unique,
+ state->loc.path, state->finh->nodeid, state->name);
+
+ FUSE_FOP (state, fuse_err_cbk, GF_FOP_REMOVEXATTR,
+ removexattr, &state->loc, state->name, state->xdata);
+ }
}
static void
fuse_removexattr (xlator_t *this, fuse_in_header_t *finh, void *msg)
-
{
char *name = msg;
fuse_state_t *state = NULL;
+ fuse_private_t *priv = NULL;
int32_t ret = -1;
+ char *newkey = NULL;
+
+ if (!strcmp (GFID_XATTR_KEY, name)) {
+ send_fuse_err (this, finh, EPERM);
+ GF_FREE (finh);
+ return;
+ }
+
+ priv = this->private;
GET_STATE (this, finh, state);
- ret = fuse_loc_fill (&state->loc, state, finh->nodeid, 0, NULL);
- if ((state->loc.inode == NULL) ||
- (ret < 0)) {
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
- "%"PRIu64": REMOVEXATTR %s/%"PRIu64" (%s) (fuse_loc_fill() failed)",
- finh->unique, state->loc.path, finh->nodeid, name);
- send_fuse_err (this, finh, ENOENT);
- free_state (state);
+ fuse_resolve_inode_init (state, &state->resolve, finh->nodeid);
+
+ ret = fuse_flip_xattr_ns (priv, name, &newkey);
+ if (ret) {
+ send_fuse_err (this, finh, ENOMEM);
+ free_fuse_state (state);
return;
}
- gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": REMOVEXATTR %s/%"PRIu64" (%s)", finh->unique,
- state->loc.path, finh->nodeid, name);
-
- FUSE_FOP (state, fuse_err_cbk, GF_FOP_REMOVEXATTR,
- removexattr, &state->loc, name);
+ state->name = newkey;
+ fuse_resolve_and_resume (state, fuse_removexattr_resume);
return;
}
@@ -2506,13 +3516,16 @@ static int gf_fuse_lk_enosys_log;
static int
fuse_getlk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct flock *lock)
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
{
fuse_state_t *state = NULL;
state = frame->root->state;
struct fuse_lk_out flo = {{0, }, };
+ fuse_log_eh_fop(this, state, frame, op_ret, op_errno);
+
if (op_ret == 0) {
gf_log ("glusterfs-fuse", GF_LOG_TRACE,
"%"PRIu64": ERR => 0", frame->root->unique);
@@ -2544,13 +3557,24 @@ fuse_getlk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
send_fuse_err (this, state->finh, op_errno);
}
- free_state (state);
+ free_fuse_state (state);
STACK_DESTROY (frame->root);
return 0;
}
+void
+fuse_getlk_resume (fuse_state_t *state)
+{
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": GETLK %p", state->finh->unique, state->fd);
+
+ FUSE_FOP (state, fuse_getlk_cbk, GF_FOP_LK,
+ lk, state->fd, F_GETLK, &state->lk_lock, state->xdata);
+}
+
+
static void
fuse_getlk (xlator_t *this, fuse_in_header_t *finh, void *msg)
{
@@ -2558,18 +3582,19 @@ fuse_getlk (xlator_t *this, fuse_in_header_t *finh, void *msg)
fuse_state_t *state = NULL;
fd_t *fd = NULL;
- struct flock lock = {0, };
fd = FH_TO_FD (fli->fh);
GET_STATE (this, finh, state);
state->fd = fd;
- convert_fuse_file_lock (&fli->lk, &lock);
- gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": GETLK %p", finh->unique, fd);
+ fuse_resolve_fd_init (state, &state->resolve, fd);
- FUSE_FOP (state, fuse_getlk_cbk, GF_FOP_LK,
- lk, fd, F_GETLK, &lock);
+ convert_fuse_file_lock (&fli->lk, &state->lk_lock,
+ fli->owner);
+
+ state->lk_owner = fli->owner;
+
+ fuse_resolve_and_resume (state, fuse_getlk_resume);
return;
}
@@ -2577,15 +3602,24 @@ fuse_getlk (xlator_t *this, fuse_in_header_t *finh, void *msg)
static int
fuse_setlk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct flock *lock)
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
{
+ uint32_t op = 0;
fuse_state_t *state = NULL;
state = frame->root->state;
+ op = state->finh->opcode;
+
+ fuse_log_eh_fop(this, state, frame, op_ret, op_errno);
if (op_ret == 0) {
gf_log ("glusterfs-fuse", GF_LOG_TRACE,
"%"PRIu64": ERR => 0", frame->root->unique);
+ fd_lk_insert_and_merge (state->fd,
+ (op == FUSE_SETLK) ? F_SETLK : F_SETLKW,
+ &state->lk_lock);
+
send_fuse_err (this, state->finh, 0);
} else {
if (op_errno == ENOSYS) {
@@ -2596,6 +3630,14 @@ fuse_setlk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
"'features/posix-locks' on server side "
"will add SETLK support.");
}
+ } else if (op_errno == EAGAIN) {
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "Returning EAGAIN Flock: "
+ "start=%llu, len=%llu, pid=%llu, lk-owner=%s",
+ (unsigned long long) state->lk_lock.l_start,
+ (unsigned long long) state->lk_lock.l_len,
+ (unsigned long long) state->lk_lock.l_pid,
+ lkowner_utoa (&frame->root->lk_owner));
} else {
gf_log ("glusterfs-fuse", GF_LOG_WARNING,
"%"PRIu64": ERR => -1 (%s)",
@@ -2605,13 +3647,26 @@ fuse_setlk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
send_fuse_err (this, state->finh, op_errno);
}
- free_state (state);
+ free_fuse_state (state);
STACK_DESTROY (frame->root);
return 0;
}
+void
+fuse_setlk_resume (fuse_state_t *state)
+{
+ gf_log ("glusterfs-fuse", GF_LOG_TRACE,
+ "%"PRIu64": SETLK%s %p", state->finh->unique,
+ state->finh->opcode == FUSE_SETLK ? "" : "W", state->fd);
+
+ FUSE_FOP (state, fuse_setlk_cbk, GF_FOP_LK, lk, state->fd,
+ state->finh->opcode == FUSE_SETLK ? F_SETLK : F_SETLKW,
+ &state->lk_lock, state->xdata);
+}
+
+
static void
fuse_setlk (xlator_t *this, fuse_in_header_t *finh, void *msg)
{
@@ -2619,38 +3674,75 @@ fuse_setlk (xlator_t *this, fuse_in_header_t *finh, void *msg)
fuse_state_t *state = NULL;
fd_t *fd = NULL;
- struct flock lock = {0, };
fd = FH_TO_FD (fli->fh);
GET_STATE (this, finh, state);
state->finh = finh;
state->fd = fd;
- convert_fuse_file_lock (&fli->lk, &lock);
- gf_log ("glusterfs-fuse", GF_LOG_TRACE,
- "%"PRIu64": SETLK%s %p", finh->unique,
- finh->opcode == FUSE_SETLK ? "" : "W", fd);
+ fuse_resolve_fd_init (state, &state->resolve, fd);
+
+ convert_fuse_file_lock (&fli->lk, &state->lk_lock,
+ fli->owner);
- FUSE_FOP (state, fuse_setlk_cbk, GF_FOP_LK,
- lk, fd, finh->opcode == FUSE_SETLK ? F_SETLK : F_SETLKW,
- &lock);
+ state->lk_owner = fli->owner;
+
+ fuse_resolve_and_resume (state, fuse_setlk_resume);
return;
}
+static void *
+notify_kernel_loop (void *data)
+{
+ xlator_t *this = NULL;
+ fuse_private_t *priv = NULL;
+ struct fuse_out_header *fouh = NULL;
+ int rv = 0;
+
+ char inval_buf[INVAL_BUF_SIZE] = {0,};
+
+ this = data;
+ priv = this->private;
+
+ for (;;) {
+ rv = read (priv->revchan_in, inval_buf, sizeof (*fouh));
+ if (rv != sizeof (*fouh))
+ break;
+ fouh = (struct fuse_out_header *)inval_buf;
+ rv = read (priv->revchan_in, inval_buf + sizeof (*fouh),
+ fouh->len - sizeof (*fouh));
+ if (rv != fouh->len - sizeof (*fouh))
+ break;
+ rv = write (priv->fd, inval_buf, fouh->len);
+ if (rv != fouh->len && !(rv == -1 && errno == ENOENT))
+ break;
+ }
+
+ close (priv->revchan_in);
+ close (priv->revchan_out);
+
+ gf_log ("glusterfs-fuse", GF_LOG_INFO,
+ "kernel notifier loop terminated");
+
+ return NULL;
+}
+
+
static void
fuse_init (xlator_t *this, fuse_in_header_t *finh, void *msg)
{
- struct fuse_init_in *fini = msg;
-
- struct fuse_init_out fino;
- fuse_private_t *priv = NULL;
- int ret;
+ struct fuse_init_in *fini = msg;
+ struct fuse_init_out fino = {0,};
+ fuse_private_t *priv = NULL;
+ int ret = 0;
+ int pfd[2] = {0,};
+ pthread_t messenger;
priv = this->private;
- if (!priv->first_call) {
+ if (priv->init_recvd) {
gf_log ("glusterfs-fuse", GF_LOG_ERROR,
"got INIT after first message");
@@ -2658,6 +3750,8 @@ fuse_init (xlator_t *this, fuse_in_header_t *finh, void *msg)
goto out;
}
+ priv->init_recvd = 1;
+
if (fini->major != FUSE_KERNEL_VERSION) {
gf_log ("glusterfs-fuse", GF_LOG_ERROR,
"unsupported FUSE protocol version %d.%d",
@@ -2673,14 +3767,112 @@ fuse_init (xlator_t *this, fuse_in_header_t *finh, void *msg)
fino.max_readahead = 1 << 17;
fino.max_write = 1 << 17;
fino.flags = FUSE_ASYNC_READ | FUSE_POSIX_LOCKS;
+#if FUSE_KERNEL_MINOR_VERSION >= 17
+ if (fini->minor >= 17)
+ fino.flags |= FUSE_FLOCK_LOCKS;
+#endif
+#if FUSE_KERNEL_MINOR_VERSION >= 12
+ if (fini->minor >= 12) {
+ /* let fuse leave the umask processing to us, so that it does not
+ * break extended POSIX ACL defaults on server */
+ fino.flags |= FUSE_DONT_MASK;
+ }
+#endif
+#if FUSE_KERNEL_MINOR_VERSION >= 9
if (fini->minor >= 6 /* fuse_init_in has flags */ &&
fini->flags & FUSE_BIG_WRITES) {
- /* no need for direct I/O mode if big writes are supported */
- priv->direct_io_mode = 0;
+ /* no need for direct I/O mode by default if big writes are supported */
+ if (priv->direct_io_mode == 2)
+ priv->direct_io_mode = 0;
fino.flags |= FUSE_BIG_WRITES;
}
+
+ /* Used for 'reverse invalidation of inode' */
+ if (fini->minor >= 12) {
+ if (pipe(pfd) == -1) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "cannot create pipe pair (%s)",
+ strerror(errno));
+
+ close (priv->fd);
+ goto out;
+ }
+ priv->revchan_in = pfd[0];
+ priv->revchan_out = pfd[1];
+ ret = gf_thread_create (&messenger, NULL, notify_kernel_loop,
+ this);
+ if (ret != 0) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "failed to start messenger daemon (%s)",
+ strerror(errno));
+
+ close (priv->fd);
+ goto out;
+ }
+ priv->reverse_fuse_thread_started = _gf_true;
+ } else {
+ /*
+ * FUSE minor < 12 does not implement invalidate notifications.
+ * This mechanism is required for fopen-keep-cache to operate
+ * correctly. Disable and warn the user.
+ */
+ if (priv->fopen_keep_cache) {
+ gf_log("glusterfs-fuse", GF_LOG_WARNING, "FUSE version "
+ "%d.%d does not support inval notifications. "
+ "fopen-keep-cache disabled.", fini->major,
+ fini->minor);
+ priv->fopen_keep_cache = 0;
+ }
+ }
+
+ if (fini->minor >= 13) {
+ fino.max_background = priv->background_qlen;
+ fino.congestion_threshold = priv->congestion_threshold;
+ }
if (fini->minor < 9)
*priv->msg0_len_p = sizeof(*finh) + FUSE_COMPAT_WRITE_IN_SIZE;
+#endif
+ if (priv->use_readdirp) {
+ if (fini->flags & FUSE_DO_READDIRPLUS)
+ fino.flags |= FUSE_DO_READDIRPLUS;
+ }
+
+ if (priv->fopen_keep_cache == 2) {
+ /* If user did not explicitly set --fopen-keep-cache[=off],
+ then check if kernel support FUSE_AUTO_INVAL_DATA and ...
+ */
+ if (fini->flags & FUSE_AUTO_INVAL_DATA) {
+ /* ... enable fopen_keep_cache mode if supported.
+ */
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG, "Detected "
+ "support for FUSE_AUTO_INVAL_DATA. Enabling "
+ "fopen_keep_cache automatically.");
+ fino.flags |= FUSE_AUTO_INVAL_DATA;
+ priv->fopen_keep_cache = 1;
+ } else {
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG, "No support "
+ "for FUSE_AUTO_INVAL_DATA. Disabling "
+ "fopen_keep_cache.");
+ /* ... else disable. */
+ priv->fopen_keep_cache = 0;
+ }
+ } else if (priv->fopen_keep_cache == 1) {
+ /* If user explicitly set --fopen-keep-cache[=on],
+ then enable FUSE_AUTO_INVAL_DATA if possible.
+ */
+ if (fini->flags & FUSE_AUTO_INVAL_DATA) {
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG, "fopen_keep_cache "
+ "is explicitly set. Enabling FUSE_AUTO_INVAL_DATA");
+ fino.flags |= FUSE_AUTO_INVAL_DATA;
+ } else {
+ gf_log ("glusterfs-fuse", GF_LOG_WARNING, "fopen_keep_cache "
+ "is explicitly set. Support for "
+ "FUSE_AUTO_INVAL_DATA is missing");
+ }
+ }
+
+ if (fini->flags & FUSE_ASYNC_DIO)
+ fino.flags |= FUSE_ASYNC_DIO;
ret = send_fuse_obj (this, finh, &fino);
if (ret == 0)
@@ -2697,7 +3889,7 @@ fuse_init (xlator_t *this, fuse_in_header_t *finh, void *msg)
}
out:
- FREE (finh);
+ GF_FREE (finh);
}
@@ -2706,129 +3898,775 @@ fuse_enosys (xlator_t *this, fuse_in_header_t *finh, void *msg)
{
send_fuse_err (this, finh, ENOSYS);
- FREE (finh);
+ GF_FREE (finh);
}
static void
-fuse_discard (xlator_t *this, fuse_in_header_t *finh, void *msg)
+fuse_destroy (xlator_t *this, fuse_in_header_t *finh, void *msg)
{
- FREE (finh);
+ send_fuse_err (this, finh, 0);
+
+ GF_FREE (finh);
}
-static fuse_handler_t *fuse_ops[FUSE_712_OP_HIGH];
+
+
+struct fuse_first_lookup {
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+ char fin;
+};
int
-fuse_root_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct stat *buf, dict_t *xattr)
+fuse_first_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf, dict_t *xattr,
+ struct iatt *postparent)
{
- fuse_private_t *priv = NULL;
+ struct fuse_first_lookup *stub = NULL;
- priv = this->private;
+ stub = frame->local;
if (op_ret == 0) {
gf_log (this->name, GF_LOG_TRACE,
"first lookup on root succeeded.");
- inode_lookup (inode);
} else {
gf_log (this->name, GF_LOG_DEBUG,
"first lookup on root failed.");
}
- STACK_DESTROY (frame->root);
- pthread_mutex_lock (&priv->first_call_mutex);
+
+ pthread_mutex_lock (&stub->mutex);
{
- priv->first_call = 0;
- pthread_cond_broadcast (&priv->first_call_cond);
+ stub->fin = 1;
+ pthread_cond_broadcast (&stub->cond);
}
- pthread_mutex_unlock (&priv->first_call_mutex);
+ pthread_mutex_unlock (&stub->mutex);
+
return 0;
}
int
-fuse_root_lookup (xlator_t *this)
+fuse_first_lookup (xlator_t *this)
{
- fuse_private_t *priv = NULL;
- loc_t loc;
- call_frame_t *frame = NULL;
- xlator_t *xl = NULL;
- dict_t *dict = NULL;
+ fuse_private_t *priv = NULL;
+ loc_t loc = {0, };
+ call_frame_t *frame = NULL;
+ xlator_t *xl = NULL;
+ dict_t *dict = NULL;
+ struct fuse_first_lookup stub;
+ uuid_t gfid;
+ int ret;
priv = this->private;
- pthread_cond_init (&priv->first_call_cond, NULL);
- pthread_mutex_init (&priv->first_call_mutex, NULL);
-
loc.path = "/";
loc.name = "";
- loc.ino = 1;
- loc.inode = inode_search (this->itable, 1, NULL);
+ loc.inode = fuse_ino_to_inode (1, this);
+ uuid_copy (loc.gfid, loc.inode->gfid);
loc.parent = NULL;
dict = dict_new ();
frame = create_frame (this, this->ctx->pool);
- frame->root->type = GF_OP_TYPE_FOP_REQUEST;
- xl = this->children->xlator;
+ frame->root->type = GF_OP_TYPE_FOP;
+
+ xl = priv->active_subvol;
+
+ pthread_mutex_init (&stub.mutex, NULL);
+ pthread_cond_init (&stub.cond, NULL);
+ stub.fin = 0;
- STACK_WIND (frame, fuse_root_lookup_cbk, xl, xl->fops->lookup,
+ frame->local = &stub;
+
+ memset (gfid, 0, 16);
+ gfid[15] = 1;
+ ret = dict_set_static_bin (dict, "gfid-req", gfid, 16);
+ if (ret)
+ gf_log (xl->name, GF_LOG_ERROR, "failed to set 'gfid-req'");
+
+ STACK_WIND (frame, fuse_first_lookup_cbk, xl, xl->fops->lookup,
&loc, dict);
dict_unref (dict);
- pthread_mutex_lock (&priv->first_call_mutex);
+ pthread_mutex_lock (&stub.mutex);
+ {
+ while (!stub.fin) {
+ pthread_cond_wait (&stub.cond, &stub.mutex);
+ }
+ }
+ pthread_mutex_unlock (&stub.mutex);
+
+ pthread_mutex_destroy (&stub.mutex);
+ pthread_cond_destroy (&stub.cond);
+
+ frame->local = NULL;
+ STACK_DESTROY (frame->root);
+
+ return 0;
+}
+
+
+int
+fuse_nameless_lookup (xlator_t *xl, uuid_t gfid, loc_t *loc)
+{
+ int ret = -1;
+ dict_t *xattr_req = NULL;
+ struct iatt iatt = {0, };
+ inode_t *linked_inode = NULL;
+
+ if ((loc == NULL) || (xl == NULL)) {
+ goto out;
+ }
+
+ if (loc->inode == NULL) {
+ loc->inode = inode_new (xl->itable);
+ if (loc->inode == NULL) {
+ goto out;
+ }
+ }
+
+ uuid_copy (loc->gfid, gfid);
+
+ xattr_req = dict_new ();
+ if (xattr_req == NULL) {
+ goto out;
+ }
+
+ ret = syncop_lookup (xl, loc, xattr_req, &iatt, NULL, NULL);
+ if (ret < 0) {
+ goto out;
+ }
+
+ linked_inode = inode_link (loc->inode, NULL, NULL, &iatt);
+ inode_unref (loc->inode);
+ loc->inode = linked_inode;
+
+ ret = 0;
+out:
+ if (xattr_req != NULL) {
+ dict_unref (xattr_req);
+ }
+
+ return ret;
+}
+
+
+int
+fuse_migrate_fd_open (xlator_t *this, fd_t *basefd, fd_t *oldfd,
+ xlator_t *old_subvol, xlator_t *new_subvol)
+{
+ loc_t loc = {0, };
+ fd_t *newfd = NULL, *old_activefd = NULL;
+ fuse_fd_ctx_t *basefd_ctx = NULL;
+ fuse_fd_ctx_t *newfd_ctx = NULL;
+ int ret = 0, flags = 0;
+
+ ret = inode_path (basefd->inode, NULL, (char **)&loc.path);
+ if (ret < 0) {
+ gf_log ("glusterfs-fuse", GF_LOG_WARNING,
+ "cannot construct path of gfid (%s) failed"
+ "(old-subvolume:%s-%d new-subvolume:%s-%d)",
+ uuid_utoa (basefd->inode->gfid),
+ old_subvol->name, old_subvol->graph->id,
+ new_subvol->name, new_subvol->graph->id);
+ goto out;
+ }
+
+ uuid_copy (loc.gfid, basefd->inode->gfid);
+
+ loc.inode = inode_find (new_subvol->itable, basefd->inode->gfid);
+
+ if (loc.inode == NULL) {
+ ret = fuse_nameless_lookup (new_subvol, basefd->inode->gfid,
+ &loc);
+ if (ret < 0) {
+ gf_log ("glusterfs-fuse", GF_LOG_WARNING,
+ "name-less lookup of gfid (%s) failed (%s)"
+ "(old-subvolume:%s-%d new-subvolume:%s-%d)",
+ uuid_utoa (basefd->inode->gfid),
+ strerror (errno),
+ old_subvol->name, old_subvol->graph->id,
+ new_subvol->name, new_subvol->graph->id);
+ goto out;
+ }
+
+ }
+
+ basefd_ctx = fuse_fd_ctx_get (this, basefd);
+ GF_VALIDATE_OR_GOTO ("glusterfs-fuse", basefd_ctx, out);
+
+ newfd = fd_create (loc.inode, basefd->pid);
+ if (newfd == NULL) {
+ gf_log ("glusterfs-fuse", GF_LOG_WARNING,
+ "cannot create new fd, hence not migrating basefd "
+ "(ptr:%p inode-gfid:%s) "
+ "(old-subvolume:%s-%d new-subvolume:%s-%d)", basefd,
+ uuid_utoa (loc.inode->gfid),
+ old_subvol->name, old_subvol->graph->id,
+ new_subvol->name, new_subvol->graph->id);
+ goto out;
+ }
+
+ newfd->flags = basefd->flags;
+ if (newfd->lk_ctx)
+ fd_lk_ctx_unref (newfd->lk_ctx);
+
+ newfd->lk_ctx = fd_lk_ctx_ref (oldfd->lk_ctx);
+
+ newfd_ctx = fuse_fd_ctx_check_n_create (this, newfd);
+ GF_VALIDATE_OR_GOTO ("glusterfs-fuse", newfd_ctx, out);
+
+ if (IA_ISDIR (basefd->inode->ia_type)) {
+ ret = syncop_opendir (new_subvol, &loc, newfd);
+ } else {
+ flags = basefd->flags & ~(O_CREAT | O_EXCL | O_TRUNC);
+ ret = syncop_open (new_subvol, &loc, flags, newfd);
+ }
+
+ if (ret < 0) {
+ gf_log ("glusterfs-fuse", GF_LOG_WARNING,
+ "open on basefd (ptr:%p inode-gfid:%s) failed (%s)"
+ "(old-subvolume:%s-%d new-subvolume:%s-%d)", basefd,
+ uuid_utoa (basefd->inode->gfid), strerror (errno),
+ old_subvol->name, old_subvol->graph->id,
+ new_subvol->name, new_subvol->graph->id);
+ goto out;
+ }
+
+ fd_bind (newfd);
+
+ LOCK (&basefd->lock);
+ {
+ if (basefd_ctx->activefd != NULL) {
+ old_activefd = basefd_ctx->activefd;
+ }
+
+ basefd_ctx->activefd = newfd;
+ }
+ UNLOCK (&basefd->lock);
+
+ if (old_activefd != NULL) {
+ fd_unref (old_activefd);
+ }
+
+ gf_log ("glusterfs-fuse", GF_LOG_INFO,
+ "migrated basefd (%p) to newfd (%p) (inode-gfid:%s)"
+ "(old-subvolume:%s-%d new-subvolume:%s-%d)", basefd, newfd,
+ uuid_utoa (basefd->inode->gfid),
+ old_subvol->name, old_subvol->graph->id,
+ new_subvol->name, new_subvol->graph->id);
+
+ ret = 0;
+
+out:
+ loc_wipe (&loc);
+
+ return ret;
+}
+
+int
+fuse_migrate_locks (xlator_t *this, fd_t *basefd, fd_t *oldfd,
+ xlator_t *old_subvol, xlator_t *new_subvol)
+{
+ int ret = -1;
+ dict_t *lockinfo = NULL;
+ void *ptr = NULL;
+ fd_t *newfd = NULL;
+ fuse_fd_ctx_t *basefd_ctx = NULL;
+
+
+ if (!oldfd->lk_ctx || fd_lk_ctx_empty (oldfd->lk_ctx))
+ return 0;
+
+ basefd_ctx = fuse_fd_ctx_get (this, basefd);
+ GF_VALIDATE_OR_GOTO ("glusterfs-fuse", basefd_ctx, out);
+
+ LOCK (&basefd->lock);
+ {
+ newfd = fd_ref (basefd_ctx->activefd);
+ }
+ UNLOCK (&basefd->lock);
+
+ ret = syncop_fgetxattr (old_subvol, oldfd, &lockinfo,
+ GF_XATTR_LOCKINFO_KEY);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "getting lockinfo failed while migrating locks"
+ "(oldfd:%p newfd:%p inode-gfid:%s)"
+ "(old-subvol:%s-%d new-subvol:%s-%d)",
+ oldfd, newfd, uuid_utoa (newfd->inode->gfid),
+ old_subvol->name, old_subvol->graph->id,
+ new_subvol->name, new_subvol->graph->id);
+ goto out;
+ }
+
+ ret = dict_get_ptr (lockinfo, GF_XATTR_LOCKINFO_KEY, &ptr);
+ if (ptr == NULL) {
+ ret = 0;
+ gf_log (this->name, GF_LOG_INFO,
+ "No lockinfo present on any of the bricks "
+ "(oldfd: %p newfd:%p inode-gfid:%s) "
+ "(old-subvol:%s-%d new-subvol:%s-%d)",
+ oldfd, newfd, uuid_utoa (newfd->inode->gfid),
+ old_subvol->name, old_subvol->graph->id,
+ new_subvol->name, new_subvol->graph->id);
+
+ goto out;
+ }
+
+ ret = syncop_fsetxattr (new_subvol, newfd, lockinfo, 0);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "migrating locks failed (oldfd:%p newfd:%p "
+ "inode-gfid:%s) (old-subvol:%s-%d new-subvol:%s-%d)",
+ oldfd, newfd, uuid_utoa (newfd->inode->gfid),
+ old_subvol->name, old_subvol->graph->id,
+ new_subvol->name, new_subvol->graph->id);
+ goto out;
+ }
+
+out:
+ if (newfd)
+ fd_unref (newfd);
+
+ if (lockinfo != NULL) {
+ dict_unref (lockinfo);
+ }
+
+ return ret;
+}
+
+
+int
+fuse_migrate_fd (xlator_t *this, fd_t *basefd, xlator_t *old_subvol,
+ xlator_t *new_subvol)
+{
+ int ret = -1;
+ char create_in_progress = 0;
+ fuse_fd_ctx_t *basefd_ctx = NULL;
+ fd_t *oldfd = NULL;
+
+ basefd_ctx = fuse_fd_ctx_get (this, basefd);
+ GF_VALIDATE_OR_GOTO ("glusterfs-fuse", basefd_ctx, out);
+
+ LOCK (&basefd->lock);
+ {
+ oldfd = basefd_ctx->activefd ? basefd_ctx->activefd
+ : basefd;
+ fd_ref (oldfd);
+ }
+ UNLOCK (&basefd->lock);
+
+ LOCK (&oldfd->inode->lock);
+ {
+ if (uuid_is_null (oldfd->inode->gfid)) {
+ create_in_progress = 1;
+ } else {
+ create_in_progress = 0;
+ }
+ }
+ UNLOCK (&oldfd->inode->lock);
+
+ if (create_in_progress) {
+ gf_log ("glusterfs-fuse", GF_LOG_INFO,
+ "create call on fd (%p) is in progress "
+ "(basefd-ptr:%p basefd-inode.gfid:%s), "
+ "hence deferring migration till application does an "
+ "fd based operation on this fd"
+ "(old-subvolume:%s-%d, new-subvolume:%s-%d)",
+ oldfd, basefd, uuid_utoa (basefd->inode->gfid),
+ old_subvol->name, old_subvol->graph->id,
+ new_subvol->name, new_subvol->graph->id);
+
+ ret = 0;
+ goto out;
+ }
+
+ if (oldfd->inode->table->xl == old_subvol) {
+ ret = syncop_fsync (old_subvol, oldfd, 0);
+ if (ret < 0) {
+ gf_log ("glusterfs-fuse", GF_LOG_WARNING,
+ "syncop_fsync failed (%s) on fd (%p)"
+ "(basefd:%p basefd-inode.gfid:%s) "
+ "(old-subvolume:%s-%d new-subvolume:%s-%d)",
+ strerror (errno), oldfd, basefd,
+ uuid_utoa (basefd->inode->gfid),
+ old_subvol->name, old_subvol->graph->id,
+ new_subvol->name, new_subvol->graph->id);
+ }
+ } else {
+ gf_log ("glusterfs-fuse", GF_LOG_WARNING,
+ "basefd (ptr:%p inode-gfid:%s) was not "
+ "migrated during previous graph switch"
+ "(old-subvolume:%s-%d new-subvolume: %s-%d)", basefd,
+ basefd->inode->gfid,
+ old_subvol->name, old_subvol->graph->id,
+ new_subvol->name, new_subvol->graph->id);
+ }
+
+ ret = fuse_migrate_fd_open (this, basefd, oldfd, old_subvol,
+ new_subvol);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING, "open corresponding to "
+ "basefd (ptr:%p inode-gfid:%s) in new graph failed "
+ "(old-subvolume:%s-%d new-subvolume:%s-%d)", basefd,
+ uuid_utoa (basefd->inode->gfid), old_subvol->name,
+ old_subvol->graph->id, new_subvol->name,
+ new_subvol->graph->id);
+ goto out;
+ }
+
+ ret = fuse_migrate_locks (this, basefd, oldfd, old_subvol,
+ new_subvol);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "migrating locks from old-subvolume (%s-%d) to "
+ "new-subvolume (%s-%d) failed (inode-gfid:%s oldfd:%p "
+ "basefd:%p)", old_subvol->name, old_subvol->graph->id,
+ new_subvol->name, new_subvol->graph->id,
+ uuid_utoa (basefd->inode->gfid), oldfd, basefd);
+
+ }
+out:
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING, "migration of basefd "
+ "(ptr:%p inode-gfid:%s) failed"
+ "(old-subvolume:%s-%d new-subvolume:%s-%d)", basefd,
+ oldfd ? uuid_utoa (oldfd->inode->gfid) : NULL,
+ old_subvol->name, old_subvol->graph->id,
+ new_subvol->name, new_subvol->graph->id);
+ }
+
+ fd_unref (oldfd);
+
+ return ret;
+}
+
+
+int
+fuse_handle_opened_fds (xlator_t *this, xlator_t *old_subvol,
+ xlator_t *new_subvol)
+{
+ fuse_private_t *priv = NULL;
+ fdentry_t *fdentries = NULL;
+ uint32_t count = 0;
+ fdtable_t *fdtable = NULL;
+ int i = 0;
+ fd_t *fd = NULL;
+ int32_t ret = 0;
+ fuse_fd_ctx_t *fdctx = NULL;
+
+ priv = this->private;
+
+ fdtable = priv->fdtable;
+
+ fdentries = gf_fd_fdtable_copy_all_fds (fdtable, &count);
+ if (fdentries != NULL) {
+ for (i = 0; i < count; i++) {
+ fd = fdentries[i].fd;
+ if (fd == NULL)
+ continue;
+
+ ret = fuse_migrate_fd (this, fd, old_subvol,
+ new_subvol);
+
+ fdctx = fuse_fd_ctx_get (this, fd);
+ if (fdctx) {
+ LOCK (&fd->lock);
+ {
+ if (ret < 0) {
+ fdctx->migration_failed = 1;
+ } else {
+ fdctx->migration_failed = 0;
+ }
+ }
+ UNLOCK (&fd->lock);
+ }
+ }
+
+ for (i = 0; i < count ; i++) {
+ fd = fdentries[i].fd;
+ if (fd)
+ fd_unref (fd);
+ }
+
+ GF_FREE (fdentries);
+ }
+
+ return 0;
+}
+
+
+static int
+fuse_handle_blocked_locks (xlator_t *this, xlator_t *old_subvol,
+ xlator_t *new_subvol)
+{
+ return 0;
+}
+
+
+static int
+fuse_graph_switch_task (void *data)
+{
+ fuse_graph_switch_args_t *args = NULL;
+
+ args = data;
+ if (args == NULL) {
+ goto out;
+ }
+
+ /* don't change the order of handling open fds and blocked locks, since
+ * the act of opening files also reacquires granted locks in new graph.
+ */
+ fuse_handle_opened_fds (args->this, args->old_subvol, args->new_subvol);
+
+ fuse_handle_blocked_locks (args->this, args->old_subvol,
+ args->new_subvol);
+
+out:
+ return 0;
+}
+
+
+fuse_graph_switch_args_t *
+fuse_graph_switch_args_alloc (void)
+{
+ fuse_graph_switch_args_t *args = NULL;
+
+ args = GF_CALLOC (1, sizeof (*args), gf_fuse_mt_graph_switch_args_t);
+ if (args == NULL) {
+ goto out;
+ }
+
+out:
+ return args;
+}
+
+
+void
+fuse_graph_switch_args_destroy (fuse_graph_switch_args_t *args)
+{
+ if (args == NULL) {
+ goto out;
+ }
+
+ GF_FREE (args);
+out:
+ return;
+}
+
+
+int
+fuse_handle_graph_switch (xlator_t *this, xlator_t *old_subvol,
+ xlator_t *new_subvol)
+{
+ call_frame_t *frame = NULL;
+ int32_t ret = -1;
+ fuse_graph_switch_args_t *args = NULL;
+
+ frame = create_frame (this, this->ctx->pool);
+ if (frame == NULL) {
+ goto out;
+ }
+
+ args = fuse_graph_switch_args_alloc ();
+ if (args == NULL) {
+ goto out;
+ }
+
+ args->this = this;
+ args->old_subvol = old_subvol;
+ args->new_subvol = new_subvol;
+
+ ret = synctask_new (this->ctx->env, fuse_graph_switch_task, NULL, frame,
+ args);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING, "starting sync-task to "
+ "handle graph switch failed");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (args != NULL) {
+ fuse_graph_switch_args_destroy (args);
+ }
+
+ if (frame != NULL) {
+ STACK_DESTROY (frame->root);
+ }
+
+ return ret;
+}
+
+
+int
+fuse_graph_sync (xlator_t *this)
+{
+ fuse_private_t *priv = NULL;
+ int need_first_lookup = 0;
+ int ret = 0;
+ xlator_t *old_subvol = NULL, *new_subvol = NULL;
+ uint64_t winds_on_old_subvol = 0;
+
+ priv = this->private;
+
+ pthread_mutex_lock (&priv->sync_mutex);
{
- while (priv->first_call) {
- pthread_cond_wait (&priv->first_call_cond,
- &priv->first_call_mutex);
+ if (!priv->next_graph)
+ goto unlock;
+
+ old_subvol = priv->active_subvol;
+ new_subvol = priv->active_subvol = priv->next_graph->top;
+ priv->next_graph = NULL;
+ need_first_lookup = 1;
+
+ while (!priv->event_recvd) {
+ ret = pthread_cond_wait (&priv->sync_cond,
+ &priv->sync_mutex);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "timedwait returned non zero value "
+ "ret: %d errno: %d", ret, errno);
+ break;
+ }
+ }
+ }
+unlock:
+ pthread_mutex_unlock (&priv->sync_mutex);
+
+ if (need_first_lookup) {
+ fuse_first_lookup (this);
+ }
+
+ if ((old_subvol != NULL) && (new_subvol != NULL)) {
+ fuse_handle_graph_switch (this, old_subvol, new_subvol);
+
+ pthread_mutex_lock (&priv->sync_mutex);
+ {
+ old_subvol->switched = 1;
+ winds_on_old_subvol = old_subvol->winds;
+ }
+ pthread_mutex_unlock (&priv->sync_mutex);
+
+ if (winds_on_old_subvol == 0) {
+ xlator_notify (old_subvol, GF_EVENT_PARENT_DOWN,
+ old_subvol, NULL);
}
}
- pthread_mutex_unlock (&priv->first_call_mutex);
return 0;
}
+int
+fuse_get_mount_status (xlator_t *this)
+{
+ int kid_status = -1;
+ fuse_private_t *priv = this->private;
+
+ if (read(priv->status_pipe[0],&kid_status, sizeof(kid_status)) < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "could not get mount status");
+ kid_status = -1;
+ }
+ gf_log (this->name, GF_LOG_DEBUG, "mount status is %d", kid_status);
+ close(priv->status_pipe[0]);
+ close(priv->status_pipe[1]);
+ return kid_status;
+}
static void *
fuse_thread_proc (void *data)
{
- char *mount_point = NULL;
- xlator_t *this = NULL;
- fuse_private_t *priv = NULL;
- ssize_t res = 0;
- struct iobuf *iobuf = NULL;
- fuse_in_header_t *finh;
- struct iovec iov_in[2];
- void *msg = NULL;
- const size_t msg0_size = sizeof (*finh) + 128;
+ char *mount_point = NULL;
+ xlator_t *this = NULL;
+ fuse_private_t *priv = NULL;
+ ssize_t res = 0;
+ struct iobuf *iobuf = NULL;
+ fuse_in_header_t *finh;
+ struct iovec iov_in[2];
+ void *msg = NULL;
+ const size_t msg0_size = sizeof (*finh) + 128;
+ fuse_handler_t **fuse_ops = NULL;
+ struct pollfd pfd[2] = {{0,}};
+ gf_boolean_t mount_finished = _gf_false;
this = data;
priv = this->private;
+ fuse_ops = priv->fuse_ops;
THIS = this;
iov_in[0].iov_len = sizeof (*finh) + sizeof (struct fuse_write_in);
iov_in[1].iov_len = ((struct iobuf_pool *)this->ctx->iobuf_pool)
- ->page_size;
+ ->default_page_size;
priv->msg0_len_p = &iov_in[0].iov_len;
for (;;) {
+ /* THIS has to be reset here */
+ THIS = this;
+
+ if (!mount_finished) {
+ memset(pfd,0,sizeof(pfd));
+ pfd[0].fd = priv->status_pipe[0];
+ pfd[0].events = POLLIN | POLLHUP | POLLERR;
+ pfd[1].fd = priv->fd;
+ pfd[1].events = POLLIN | POLLHUP | POLLERR;
+ if (poll(pfd,2,-1) < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "poll error %s", strerror(errno));
+ break;
+ }
+ if (pfd[0].revents & POLLIN) {
+ if (fuse_get_mount_status(this) != 0) {
+ break;
+ }
+ mount_finished = _gf_true;
+ }
+ else if (pfd[0].revents) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "mount pipe closed without status");
+ break;
+ }
+ if (!pfd[1].revents) {
+ continue;
+ }
+ }
+
+ /*
+ * We don't want to block on readv while we're still waiting
+ * for mount status. That means we only want to get here if
+ * mount_status is true (meaning that our wait completed
+ * already) or if we already called poll(2) on priv->fd to
+ * make sure it's ready.
+ */
+
+ if (priv->init_recvd)
+ fuse_graph_sync (this);
+
+ /* TODO: This place should always get maximum supported buffer
+ size from 'fuse', which is as of today 128KB. If we bring in
+ support for higher block sizes support, then we should be
+ changing this one too */
iobuf = iobuf_get (this->ctx->iobuf_pool);
+
/* Add extra 128 byte to the first iov so that it can
- * accomodate "ordinary" non-write requests. It's not
+ * accommodate "ordinary" non-write requests. It's not
* guaranteed to be big enough, as SETXATTR and namespace
* operations with very long names may grow behind it,
* but it's good enough in most cases (and we can handle
* rest via realloc).
*/
- iov_in[0].iov_base = CALLOC (1, msg0_size);
+ iov_in[0].iov_base = GF_CALLOC (1, msg0_size,
+ gf_fuse_mt_iov_base);
if (!iobuf || !iov_in[0].iov_base) {
gf_log (this->name, GF_LOG_ERROR,
"Out of memory");
if (iobuf)
iobuf_unref (iobuf);
- FREE (iov_in[0].iov_base);
+ GF_FREE (iov_in[0].iov_base);
sleep (10);
continue;
}
@@ -2837,39 +4675,55 @@ fuse_thread_proc (void *data)
res = readv (priv->fd, iov_in, 2);
- if (priv->first_call) {
- if (priv->first_call > 1) {
- priv->first_call--;
- } else {
- fuse_root_lookup (this);
- }
- }
-
if (res == -1) {
if (errno == ENODEV || errno == EBADF) {
- gf_log ("glusterfs-fuse", GF_LOG_NORMAL,
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
"terminating upon getting %s when "
"reading /dev/fuse",
errno == ENODEV ? "ENODEV" : "EBADF");
-
+ fuse_log_eh (this, "glusterfs-fuse: terminating"
+ " upon getting %s when "
+ "reading /dev/fuse",
+ errno == ENODEV ? "ENODEV":
+ "EBADF");
break;
}
if (errno != EINTR) {
gf_log ("glusterfs-fuse", GF_LOG_WARNING,
"read from /dev/fuse returned -1 (%s)",
strerror (errno));
+ fuse_log_eh (this, "glusterfs-fuse: read from "
+ "/dev/fuse returned -1 (%s)",
+ strerror (errno));
}
goto cont_err;
}
if (res < sizeof (finh)) {
- gf_log ("glusterfs-fuse", GF_LOG_WARNING, "short read on /dev/fuse");
+ gf_log ("glusterfs-fuse", GF_LOG_WARNING,
+ "short read on /dev/fuse");
+ fuse_log_eh (this, "glusterfs-fuse: short read on "
+ "/dev/fuse");
break;
}
finh = (fuse_in_header_t *)iov_in[0].iov_base;
- if (res != finh->len) {
- gf_log ("glusterfs-fuse", GF_LOG_WARNING, "inconsistent read on /dev/fuse");
+
+ if (res != finh->len
+#ifdef GF_DARWIN_HOST_OS
+ /* work around fuse4bsd/MacFUSE msg size miscalculation bug,
+ * that is, payload size is not taken into account for
+ * buffered writes
+ */
+ && !(finh->opcode == FUSE_WRITE &&
+ finh->len == sizeof(*finh) + sizeof(struct fuse_write_in) &&
+ res == finh->len + ((struct fuse_write_in *)(finh + 1))->size)
+#endif
+ ) {
+ gf_log ("glusterfs-fuse", GF_LOG_WARNING,
+ "inconsistent read on /dev/fuse");
+ fuse_log_eh (this, "glusterfs-fuse: inconsistent read "
+ "on /dev/fuse");
break;
}
@@ -2879,11 +4733,12 @@ fuse_thread_proc (void *data)
msg = iov_in[1].iov_base;
else {
if (res > msg0_size) {
- iov_in[0].iov_base =
- realloc (iov_in[0].iov_base, res);
- if (iov_in[0].iov_base)
+ void *b = GF_REALLOC (iov_in[0].iov_base, res);
+ if (b) {
+ iov_in[0].iov_base = b;
finh = (fuse_in_header_t *)
iov_in[0].iov_base;
+ }
else {
gf_log ("glusterfs-fuse", GF_LOG_ERROR,
"Out of memory");
@@ -2900,33 +4755,44 @@ fuse_thread_proc (void *data)
msg = finh + 1;
}
- fuse_ops[finh->opcode] (this, finh, msg);
+ if (priv->uid_map_root &&
+ finh->uid == priv->uid_map_root)
+ finh->uid = 0;
+
+ if (finh->opcode >= FUSE_OP_HIGH)
+ /* turn down MacFUSE specific messages */
+ fuse_enosys (this, finh, msg);
+ else
+ fuse_ops[finh->opcode] (this, finh, msg);
iobuf_unref (iobuf);
continue;
cont_err:
iobuf_unref (iobuf);
- FREE (iov_in[0].iov_base);
+ GF_FREE (iov_in[0].iov_base);
}
- iobuf_unref (iobuf);
- FREE (iov_in[0].iov_base);
+ /*
+ * We could be in all sorts of states with respect to iobuf and iov_in
+ * by the time we get here, and it's just not worth untangling them if
+ * we're about to kill ourselves anyway.
+ */
if (dict_get (this->options, ZR_MOUNTPOINT_OPT))
mount_point = data_to_str (dict_get (this->options,
ZR_MOUNTPOINT_OPT));
if (mount_point) {
- gf_log (this->name, GF_LOG_NORMAL,
+ gf_log (this->name, GF_LOG_INFO,
"unmounting %s", mount_point);
- dict_del (this->options, ZR_MOUNTPOINT_OPT);
}
- raise (SIGTERM);
-
+ /* Kill the whole process, not just this thread. */
+ kill (getpid(), SIGTERM);
return NULL;
}
+
int32_t
fuse_itable_dump (xlator_t *this)
{
@@ -2936,7 +4802,7 @@ fuse_itable_dump (xlator_t *this)
gf_proc_dump_add_section("xlator.mount.fuse.itable");
inode_table_dump(this->itable, "xlator.mount.fuse.itable");
- return 0;
+ return 0;
}
int32_t
@@ -2954,90 +4820,283 @@ fuse_priv_dump (xlator_t *this)
gf_proc_dump_add_section("xlator.mount.fuse.priv");
- gf_proc_dump_write("xlator.mount.fuse.priv.fd", "%d", private->fd);
- gf_proc_dump_write("xlator.mount.fuse.priv.proto_minor", "%u",
+ gf_proc_dump_write("fd", "%d", private->fd);
+ gf_proc_dump_write("proto_minor", "%u",
private->proto_minor);
- gf_proc_dump_write("xlator.mount.fuse.priv.volfile", "%s",
+ gf_proc_dump_write("volfile", "%s",
private->volfile?private->volfile:"None");
- gf_proc_dump_write("xlator.mount.fuse.volfile_size", "%d",
+ gf_proc_dump_write("volfile_size", "%d",
private->volfile_size);
- gf_proc_dump_write("xlator.mount.fuse.mount_point", "%s",
+ gf_proc_dump_write("mount_point", "%s",
private->mount_point);
- gf_proc_dump_write("xlator.mount.fuse.iobuf", "%u",
+ gf_proc_dump_write("iobuf", "%u",
private->iobuf);
- gf_proc_dump_write("xlator.mount.fuse.fuse_thread_started", "%d",
+ gf_proc_dump_write("fuse_thread_started", "%d",
(int)private->fuse_thread_started);
- gf_proc_dump_write("xlator.mount.fuse.direct_io_mode", "%d",
+ gf_proc_dump_write("direct_io_mode", "%d",
private->direct_io_mode);
- gf_proc_dump_write("xlator.mount.fuse.entry_timeout", "%lf",
+ gf_proc_dump_write("entry_timeout", "%lf",
private->entry_timeout);
- gf_proc_dump_write("xlator.mount.fuse.entry_timeout", "%lf",
+ gf_proc_dump_write("attribute_timeout", "%lf",
private->attribute_timeout);
- gf_proc_dump_write("xlator.mount.fuse.first_call", "%d",
- (int)private->first_call);
- gf_proc_dump_write("xlator.mount.fuse.strict_volfile_check", "%d",
+ gf_proc_dump_write("init_recvd", "%d",
+ (int)private->init_recvd);
+ gf_proc_dump_write("strict_volfile_check", "%d",
(int)private->strict_volfile_check);
+ gf_proc_dump_write("reverse_thread_started", "%d",
+ (int)private->reverse_fuse_thread_started);
+ gf_proc_dump_write("use_readdirp", "%d", private->use_readdirp);
return 0;
}
+int
+fuse_history_dump (xlator_t *this)
+{
+ int ret = -1;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0,};
-int32_t
+ GF_VALIDATE_OR_GOTO ("fuse", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->history, out);
+
+ gf_proc_dump_build_key (key_prefix, "xlator.mount.fuse",
+ "history");
+ gf_proc_dump_add_section (key_prefix);
+ eh_dump (this->history, NULL, dump_history_fuse);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+dump_history_fuse (circular_buffer_t *cb, void *data)
+{
+ char *string = NULL;
+ struct tm *tm = NULL;
+ char timestr[256] = {0,};
+
+ string = (char *)cb->data;
+ tm = localtime (&cb->tv.tv_sec);
+
+ if (tm) {
+ strftime (timestr, 256, "%Y-%m-%d %H:%M:%S", tm);
+ snprintf (timestr + strlen (timestr), 256 - strlen (timestr),
+ ".%"GF_PRI_SUSECONDS, cb->tv.tv_usec);
+ gf_proc_dump_write ("TIME", "%s", timestr);
+ }
+
+ gf_proc_dump_write ("message", "%s\n", string);
+
+ return 0;
+}
+
+int
+fuse_graph_setup (xlator_t *this, glusterfs_graph_t *graph)
+{
+ inode_table_t *itable = NULL;
+ int ret = 0;
+ fuse_private_t *priv = NULL;
+
+ priv = this->private;
+
+ /* handle the case of more than one CHILD_UP on same graph */
+ if (priv->active_subvol == graph->top)
+ return 0; /* This is a valid case */
+
+ if (graph->used)
+ return 0;
+
+ graph->used = 1;
+
+ itable = inode_table_new (0, graph->top);
+ if (!itable)
+ return -1;
+
+ ((xlator_t *)graph->top)->itable = itable;
+
+ pthread_mutex_lock (&priv->sync_mutex);
+ {
+ priv->next_graph = graph;
+ priv->event_recvd = 0;
+
+ pthread_cond_signal (&priv->sync_cond);
+ }
+ pthread_mutex_unlock (&priv->sync_mutex);
+
+ gf_log ("fuse", GF_LOG_INFO, "switched to graph %d",
+ ((graph) ? graph->id : 0));
+
+ return ret;
+}
+
+
+int
notify (xlator_t *this, int32_t event, void *data, ...)
{
- int32_t ret = 0;
- fuse_private_t *private = NULL;
+ int32_t ret = 0;
+ fuse_private_t *private = NULL;
+ glusterfs_graph_t *graph = NULL;
private = this->private;
+ graph = data;
+
+ gf_log ("fuse", GF_LOG_DEBUG, "got event %d on graph %d",
+ event, ((graph) ? graph->id : 0));
+
switch (event)
{
+ case GF_EVENT_GRAPH_NEW:
+ break;
+
case GF_EVENT_CHILD_UP:
+ case GF_EVENT_CHILD_DOWN:
case GF_EVENT_CHILD_CONNECTING:
{
- if (!private->fuse_thread_started)
- {
- private->fuse_thread_started = 1;
+ if (graph) {
+ ret = fuse_graph_setup (this, graph);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to setup the graph");
+ }
+
+ if ((event == GF_EVENT_CHILD_UP)
+ || (event == GF_EVENT_CHILD_DOWN)) {
+ pthread_mutex_lock (&private->sync_mutex);
+ {
+ private->event_recvd = 1;
+ pthread_cond_broadcast (&private->sync_cond);
+ }
+ pthread_mutex_unlock (&private->sync_mutex);
+ }
- ret = pthread_create (&private->fuse_thread, NULL,
- fuse_thread_proc, this);
+ if (!private->fuse_thread_started) {
+ private->fuse_thread_started = 1;
+ ret = gf_thread_create (&private->fuse_thread, NULL,
+ fuse_thread_proc, this);
if (ret != 0) {
- gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_DEBUG,
"pthread_create() failed (%s)",
strerror (errno));
-
- /* If fuse thread is not started, that means,
- its hung, we can't use this process. */
- raise (SIGTERM);
+ break;
}
}
- break;
- }
- case GF_EVENT_PARENT_UP:
- {
- default_notify (this, GF_EVENT_PARENT_UP, data);
break;
}
- case GF_EVENT_VOLFILE_MODIFIED:
+
+ case GF_EVENT_AUTH_FAILED:
{
- gf_log ("fuse", GF_LOG_CRITICAL,
- "Remote volume file changed, try re-mounting.");
- if (private->strict_volfile_check) {
- //fuse_session_remove_chan (private->ch);
- //fuse_session_destroy (private->se);
- //fuse_unmount (private->mount_point, private->ch);
- /* TODO: Above code if works, will be a cleaner way,
- but for now, lets just achieve what we want */
- raise (SIGTERM);
- }
- break;
+ /* Authentication failure is an error and glusterfs should stop */
+ gf_log (this->name, GF_LOG_ERROR, "Server authenication failed. Shutting down.");
+ fini (this);
+ break;
}
+
default:
break;
}
- return 0;
+
+ return ret;
+}
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_fuse_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
+ "failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+
+static fuse_handler_t *fuse_std_ops[FUSE_OP_HIGH] = {
+ [FUSE_LOOKUP] = fuse_lookup,
+ [FUSE_FORGET] = fuse_forget,
+ [FUSE_GETATTR] = fuse_getattr,
+ [FUSE_SETATTR] = fuse_setattr,
+ [FUSE_READLINK] = fuse_readlink,
+ [FUSE_SYMLINK] = fuse_symlink,
+ [FUSE_MKNOD] = fuse_mknod,
+ [FUSE_MKDIR] = fuse_mkdir,
+ [FUSE_UNLINK] = fuse_unlink,
+ [FUSE_RMDIR] = fuse_rmdir,
+ [FUSE_RENAME] = fuse_rename,
+ [FUSE_LINK] = fuse_link,
+ [FUSE_OPEN] = fuse_open,
+ [FUSE_READ] = fuse_readv,
+ [FUSE_WRITE] = fuse_write,
+ [FUSE_STATFS] = fuse_statfs,
+ [FUSE_RELEASE] = fuse_release,
+ [FUSE_FSYNC] = fuse_fsync,
+ [FUSE_SETXATTR] = fuse_setxattr,
+ [FUSE_GETXATTR] = fuse_getxattr,
+ [FUSE_LISTXATTR] = fuse_listxattr,
+ [FUSE_REMOVEXATTR] = fuse_removexattr,
+ [FUSE_FLUSH] = fuse_flush,
+ [FUSE_INIT] = fuse_init,
+ [FUSE_OPENDIR] = fuse_opendir,
+ [FUSE_READDIR] = fuse_readdir,
+ [FUSE_RELEASEDIR] = fuse_releasedir,
+ [FUSE_FSYNCDIR] = fuse_fsyncdir,
+ [FUSE_GETLK] = fuse_getlk,
+ [FUSE_SETLK] = fuse_setlk,
+ [FUSE_SETLKW] = fuse_setlk,
+ [FUSE_ACCESS] = fuse_access,
+ [FUSE_CREATE] = fuse_create,
+ /* [FUSE_INTERRUPT] */
+ /* [FUSE_BMAP] */
+ [FUSE_DESTROY] = fuse_destroy,
+ /* [FUSE_IOCTL] */
+ /* [FUSE_POLL] */
+ /* [FUSE_NOTIFY_REPLY] */
+ [FUSE_BATCH_FORGET]= fuse_batch_forget,
+ [FUSE_FALLOCATE] = fuse_fallocate,
+ [FUSE_READDIRPLUS] = fuse_readdirp,
+};
+
+
+static fuse_handler_t *fuse_dump_ops[FUSE_OP_HIGH];
+
+
+static void
+fuse_dumper (xlator_t *this, fuse_in_header_t *finh, void *msg)
+{
+ fuse_private_t *priv = NULL;
+ struct iovec diov[3];
+ char r = 'R';
+ int ret = 0;
+
+ priv = this->private;
+
+ diov[0].iov_base = &r;
+ diov[0].iov_len = 1;
+ diov[1].iov_base = finh;
+ diov[1].iov_len = sizeof (*finh);
+ diov[2].iov_base = msg;
+ diov[2].iov_len = finh->len - sizeof (*finh);
+
+ pthread_mutex_lock (&priv->fuse_dump_mutex);
+ ret = writev (priv->fuse_dump_fd, diov, 3);
+ pthread_mutex_unlock (&priv->fuse_dump_mutex);
+ if (ret == -1)
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "failed to dump fuse message (R): %s",
+ strerror (errno));
+
+ priv->fuse_ops0[finh->opcode] (this, finh, msg);
}
@@ -3047,11 +5106,19 @@ init (xlator_t *this_xl)
int ret = 0;
dict_t *options = NULL;
char *value_string = NULL;
+ cmd_args_t *cmd_args = NULL;
char *fsname = NULL;
fuse_private_t *priv = NULL;
struct stat stbuf = {0,};
int i = 0;
int xl_name_allocated = 0;
+ int fsname_allocated = 0;
+ glusterfs_ctx_t *ctx = NULL;
+ gf_boolean_t sync_to_mount = _gf_false;
+ gf_boolean_t fopen_keep_cache = _gf_false;
+ unsigned long mntflags = 0;
+ char *mnt_args = NULL;
+ eh_t *event = NULL;
if (this_xl == NULL)
return -1;
@@ -3059,10 +5126,14 @@ init (xlator_t *this_xl)
if (this_xl->options == NULL)
return -1;
+ ctx = this_xl->ctx;
+ if (!ctx)
+ return -1;
+
options = this_xl->options;
if (this_xl->name == NULL) {
- this_xl->name = strdup ("fuse");
+ this_xl->name = gf_strdup ("fuse");
if (!this_xl->name) {
gf_log ("glusterfs-fuse", GF_LOG_ERROR,
"Out of memory");
@@ -3072,7 +5143,7 @@ init (xlator_t *this_xl)
xl_name_allocated = 1;
}
- priv = CALLOC (1, sizeof (*priv));
+ priv = GF_CALLOC (1, sizeof (*priv), gf_fuse_mt_fuse_private_t);
if (!priv) {
gf_log ("glusterfs-fuse", GF_LOG_ERROR,
"Out of memory");
@@ -3082,10 +5153,12 @@ init (xlator_t *this_xl)
this_xl->private = (void *) priv;
priv->mount_point = NULL;
priv->fd = -1;
+ priv->revchan_in = -1;
+ priv->revchan_out = -1;
/* get options from option dictionary */
ret = dict_get_str (options, ZR_MOUNTPOINT_OPT, &value_string);
- if (value_string == NULL) {
+ if (ret == -1 || value_string == NULL) {
gf_log ("fuse", GF_LOG_ERROR,
"Mandatory option 'mountpoint' is not specified.");
goto cleanup_exit;
@@ -3116,7 +5189,7 @@ init (xlator_t *this_xl)
ZR_MOUNTPOINT_OPT, value_string);
goto cleanup_exit;
}
- priv->mount_point = strdup (value_string);
+ priv->mount_point = gf_strdup (value_string);
if (!priv->mount_point) {
gf_log ("glusterfs-fuse", GF_LOG_ERROR,
"Out of memory");
@@ -3124,99 +5197,209 @@ init (xlator_t *this_xl)
goto cleanup_exit;
}
- ret = dict_get_double (options, "attribute-timeout",
- &priv->attribute_timeout);
- if (!priv->attribute_timeout)
- priv->attribute_timeout = 1.0; /* default */
+ GF_OPTION_INIT ("attribute-timeout", priv->attribute_timeout, double,
+ cleanup_exit);
- ret = dict_get_double (options, "entry-timeout",
- &priv->entry_timeout);
- if (!priv->entry_timeout)
- priv->entry_timeout = 1.0; /* default */
+ GF_OPTION_INIT ("entry-timeout", priv->entry_timeout, double,
+ cleanup_exit);
+ GF_OPTION_INIT ("negative-timeout", priv->negative_timeout, double,
+ cleanup_exit);
- priv->direct_io_mode = 1;
+ GF_OPTION_INIT ("client-pid", priv->client_pid, int32, cleanup_exit);
+ /* have to check & register the presence of client-pid manually */
+ priv->client_pid_set = !!dict_get (this_xl->options, "client-pid");
+
+ GF_OPTION_INIT ("uid-map-root", priv->uid_map_root, uint32,
+ cleanup_exit);
+
+ priv->direct_io_mode = 2;
ret = dict_get_str (options, ZR_DIRECT_IO_OPT, &value_string);
- if (value_string) {
+ if (ret == 0) {
ret = gf_string2boolean (value_string, &priv->direct_io_mode);
+ GF_ASSERT (ret == 0);
}
- priv->strict_volfile_check = 0;
- ret = dict_get_str (options, ZR_STRICT_VOLFILE_CHECK, &value_string);
- if (value_string) {
+ GF_OPTION_INIT (ZR_STRICT_VOLFILE_CHECK, priv->strict_volfile_check,
+ bool, cleanup_exit);
+
+ GF_OPTION_INIT ("acl", priv->acl, bool, cleanup_exit);
+
+ if (priv->uid_map_root)
+ priv->acl = 1;
+
+ GF_OPTION_INIT ("selinux", priv->selinux, bool, cleanup_exit);
+
+ GF_OPTION_INIT ("read-only", priv->read_only, bool, cleanup_exit);
+
+ GF_OPTION_INIT ("enable-ino32", priv->enable_ino32, bool, cleanup_exit);
+
+ GF_OPTION_INIT ("use-readdirp", priv->use_readdirp, bool, cleanup_exit);
+
+ priv->fuse_dump_fd = -1;
+ ret = dict_get_str (options, "dump-fuse", &value_string);
+ if (ret == 0) {
+ ret = unlink (value_string);
+ if (ret != -1 || errno == ENOENT)
+ ret = open (value_string, O_RDWR|O_CREAT|O_EXCL,
+ S_IRUSR|S_IWUSR);
+ if (ret == -1) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "cannot open fuse dump file %s",
+ value_string);
+
+ goto cleanup_exit;
+ }
+ priv->fuse_dump_fd = ret;
+ }
+
+ sync_to_mount = _gf_false;
+ ret = dict_get_str (options, "sync-to-mount", &value_string);
+ if (ret == 0) {
ret = gf_string2boolean (value_string,
- &priv->strict_volfile_check);
+ &sync_to_mount);
+ GF_ASSERT (ret == 0);
}
- fsname = this_xl->ctx->cmd_args.volume_file;
- fsname = (fsname ? fsname : this_xl->ctx->cmd_args.volfile_server);
- fsname = (fsname ? fsname : "glusterfs");
+ priv->fopen_keep_cache = 2;
+ if (dict_get (options, "fopen-keep-cache")) {
+ GF_OPTION_INIT("fopen-keep-cache", fopen_keep_cache, bool,
+ cleanup_exit);
+ priv->fopen_keep_cache = fopen_keep_cache;
+ }
- this_xl->itable = inode_table_new (0, this_xl);
- if (!this_xl->itable) {
- gf_log ("glusterfs-fuse", GF_LOG_ERROR,
- "Out of memory");
+ GF_OPTION_INIT("gid-timeout", priv->gid_cache_timeout, int32,
+ cleanup_exit);
+ GF_OPTION_INIT ("fuse-mountopts", priv->fuse_mountopts, str, cleanup_exit);
+
+ if (gid_cache_init(&priv->gid_cache, priv->gid_cache_timeout) < 0) {
+ gf_log("glusterfs-fuse", GF_LOG_ERROR, "Failed to initialize "
+ "group cache.");
goto cleanup_exit;
}
- priv->fd = gf_fuse_mount (priv->mount_point, fsname,
- "allow_other,default_permissions,"
- "max_read=131072");
+ /* default values seemed to work fine during testing */
+ GF_OPTION_INIT ("background-qlen", priv->background_qlen, int32,
+ cleanup_exit);
+ GF_OPTION_INIT ("congestion-threshold", priv->congestion_threshold,
+ int32, cleanup_exit);
+
+ /* user has set only background-qlen, not congestion-threshold,
+ use the fuse kernel driver formula to set congestion. ie, 75% */
+ if (dict_get (this_xl->options, "background-qlen") &&
+ !dict_get (this_xl->options, "congestion-threshold")) {
+ priv->congestion_threshold = (priv->background_qlen * 3) / 4;
+ gf_log (this_xl->name, GF_LOG_INFO,
+ "setting congestion control as 75%% of "
+ "background-queue length (ie, (.75 * %d) = %d",
+ priv->background_qlen, priv->congestion_threshold);
+ }
+
+ /* congestion should not be higher than background queue length */
+ if (priv->congestion_threshold > priv->background_qlen) {
+ gf_log (this_xl->name, GF_LOG_INFO,
+ "setting congestion control same as "
+ "background-queue length (%d)",
+ priv->background_qlen);
+ priv->congestion_threshold = priv->background_qlen;
+ }
+
+ cmd_args = &this_xl->ctx->cmd_args;
+ fsname = cmd_args->volfile;
+ if (!fsname && cmd_args->volfile_server) {
+ if (cmd_args->volfile_id) {
+ fsname = GF_MALLOC (
+ strlen (cmd_args->volfile_server) + 1 +
+ strlen (cmd_args->volfile_id) + 1,
+ gf_fuse_mt_fuse_private_t);
+ if (!fsname) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "Out of memory");
+ goto cleanup_exit;
+ }
+ fsname_allocated = 1;
+ strcpy (fsname, cmd_args->volfile_server);
+ strcat (fsname, ":");
+ strcat (fsname, cmd_args->volfile_id);
+ } else
+ fsname = cmd_args->volfile_server;
+ }
+ if (!fsname)
+ fsname = "glusterfs";
+
+ priv->fdtable = gf_fd_fdtable_alloc ();
+ if (priv->fdtable == NULL) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR, "Out of memory");
+ goto cleanup_exit;
+ }
+
+ if (priv->read_only)
+ mntflags |= MS_RDONLY;
+ gf_asprintf (&mnt_args, "%s%s%sallow_other,max_read=131072",
+ priv->acl ? "" : "default_permissions,",
+ priv->fuse_mountopts ? priv->fuse_mountopts : "",
+ priv->fuse_mountopts ? "," : "");
+ if (!mnt_args)
+ goto cleanup_exit;
+
+ if (pipe(priv->status_pipe) < 0) {
+ gf_log (this_xl->name, GF_LOG_ERROR,
+ "could not create pipe to separate mount process");
+ goto cleanup_exit;
+ }
+
+ priv->fd = gf_fuse_mount (priv->mount_point, fsname, mntflags, mnt_args,
+ sync_to_mount ? &ctx->mnt_pid : NULL,
+ priv->status_pipe[1]);
if (priv->fd == -1)
goto cleanup_exit;
- this_xl->ctx->top = this_xl;
-
- priv->first_call = 2;
-
- for (i = 0; i < FUSE_712_OP_HIGH; i++)
- fuse_ops[i] = fuse_enosys;
- fuse_ops[FUSE_INIT] = fuse_init;
- fuse_ops[FUSE_DESTROY] = fuse_discard;
- fuse_ops[FUSE_LOOKUP] = fuse_lookup;
- fuse_ops[FUSE_FORGET] = fuse_forget;
- fuse_ops[FUSE_GETATTR] = fuse_getattr;
- fuse_ops[FUSE_SETATTR] = fuse_setattr;
- fuse_ops[FUSE_OPENDIR] = fuse_opendir;
- fuse_ops[FUSE_READDIR] = fuse_readdir;
- fuse_ops[FUSE_RELEASEDIR] = fuse_releasedir;
- fuse_ops[FUSE_ACCESS] = fuse_access;
- fuse_ops[FUSE_READLINK] = fuse_readlink;
- fuse_ops[FUSE_MKNOD] = fuse_mknod;
- fuse_ops[FUSE_MKDIR] = fuse_mkdir;
- fuse_ops[FUSE_UNLINK] = fuse_unlink;
- fuse_ops[FUSE_RMDIR] = fuse_rmdir;
- fuse_ops[FUSE_SYMLINK] = fuse_symlink;
- fuse_ops[FUSE_RENAME] = fuse_rename;
- fuse_ops[FUSE_LINK] = fuse_link;
- fuse_ops[FUSE_CREATE] = fuse_create;
- fuse_ops[FUSE_OPEN] = fuse_open;
- fuse_ops[FUSE_READ] = fuse_readv;
- fuse_ops[FUSE_WRITE] = fuse_write;
- fuse_ops[FUSE_FLUSH] = fuse_flush;
- fuse_ops[FUSE_RELEASE] = fuse_release;
- fuse_ops[FUSE_FSYNC] = fuse_fsync;
- fuse_ops[FUSE_FSYNCDIR] = fuse_fsyncdir;
- fuse_ops[FUSE_STATFS] = fuse_statfs;
- fuse_ops[FUSE_SETXATTR] = fuse_setxattr;
- fuse_ops[FUSE_GETXATTR] = fuse_getxattr;
- fuse_ops[FUSE_LISTXATTR] = fuse_listxattr;
- fuse_ops[FUSE_REMOVEXATTR] = fuse_removexattr;
- fuse_ops[FUSE_GETLK] = fuse_getlk;
- fuse_ops[FUSE_SETLK] = fuse_setlk;
- fuse_ops[FUSE_SETLKW] = fuse_setlk;
+ event = eh_new (FUSE_EVENT_HISTORY_SIZE, _gf_false, NULL);
+ if (!event) {
+ gf_log (this_xl->name, GF_LOG_ERROR,
+ "could not create a new event history");
+ goto cleanup_exit;
+ }
+
+ this_xl->history = event;
+ pthread_mutex_init (&priv->fuse_dump_mutex, NULL);
+ pthread_cond_init (&priv->sync_cond, NULL);
+ pthread_mutex_init (&priv->sync_mutex, NULL);
+ priv->event_recvd = 0;
+
+ for (i = 0; i < FUSE_OP_HIGH; i++) {
+ if (!fuse_std_ops[i])
+ fuse_std_ops[i] = fuse_enosys;
+ if (!fuse_dump_ops[i])
+ fuse_dump_ops[i] = fuse_dumper;
+ }
+ priv->fuse_ops = fuse_std_ops;
+ if (priv->fuse_dump_fd != -1) {
+ priv->fuse_ops0 = priv->fuse_ops;
+ priv->fuse_ops = fuse_dump_ops;
+ }
+
+ if (fsname_allocated)
+ GF_FREE (fsname);
+ GF_FREE (mnt_args);
return 0;
cleanup_exit:
if (xl_name_allocated)
- FREE (this_xl->name);
+ GF_FREE (this_xl->name);
+ if (fsname_allocated)
+ GF_FREE (fsname);
if (priv) {
- FREE (priv->mount_point);
- close (priv->fd);
+ GF_FREE (priv->mount_point);
+ if (priv->fd != -1)
+ close (priv->fd);
+ if (priv->fuse_dump_fd != -1)
+ close (priv->fuse_dump_fd);
+ GF_FREE (priv);
}
- FREE (priv);
+ GF_FREE (mnt_args);
return -1;
}
@@ -3237,46 +5420,109 @@ fini (xlator_t *this_xl)
mount_point = data_to_str (dict_get (this_xl->options,
ZR_MOUNTPOINT_OPT));
if (mount_point != NULL) {
- gf_log (this_xl->name, GF_LOG_NORMAL,
+ gf_log (this_xl->name, GF_LOG_INFO,
"Unmounting '%s'.", mount_point);
- dict_del (this_xl->options, ZR_MOUNTPOINT_OPT);
gf_fuse_unmount (mount_point, priv->fd);
+ close (priv->fuse_dump_fd);
+ dict_del (this_xl->options, ZR_MOUNTPOINT_OPT);
}
+ /* Process should terminate once fuse xlator is finished.
+ * Required for AUTH_FAILED event.
+ */
+ kill (getpid (), SIGTERM);
}
-struct xlator_fops fops = {
-};
+struct xlator_fops fops;
struct xlator_cbks cbks = {
+ .invalidate = fuse_invalidate,
+ .forget = fuse_forget_cbk,
};
-struct xlator_mops mops = {
-};
struct xlator_dumpops dumpops = {
.priv = fuse_priv_dump,
.inode = fuse_itable_dump,
+ .history = fuse_history_dump,
};
struct volume_options options[] = {
{ .key = {"direct-io-mode"},
.type = GF_OPTION_TYPE_BOOL
},
- { .key = {"macfuse-local"},
- .type = GF_OPTION_TYPE_BOOL
+ { .key = {ZR_MOUNTPOINT_OPT, "mount-point"},
+ .type = GF_OPTION_TYPE_PATH
},
- { .key = {"mountpoint", "mount-point"},
+ { .key = {ZR_DUMP_FUSE, "fuse-dumpfile"},
.type = GF_OPTION_TYPE_PATH
},
- { .key = {"attribute-timeout"},
- .type = GF_OPTION_TYPE_DOUBLE
+ { .key = {ZR_ATTR_TIMEOUT_OPT},
+ .type = GF_OPTION_TYPE_DOUBLE,
+ .default_value = "1.0"
+ },
+ { .key = {ZR_ENTRY_TIMEOUT_OPT},
+ .type = GF_OPTION_TYPE_DOUBLE,
+ .default_value = "1.0"
+ },
+ { .key = {ZR_NEGATIVE_TIMEOUT_OPT},
+ .type = GF_OPTION_TYPE_DOUBLE,
+ .default_value = "0.0"
},
- { .key = {"entry-timeout"},
- .type = GF_OPTION_TYPE_DOUBLE
+ { .key = {ZR_STRICT_VOLFILE_CHECK},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false"
},
- { .key = {"strict-volfile-check"},
+ { .key = {"client-pid"},
+ .type = GF_OPTION_TYPE_INT
+ },
+ { .key = {"uid-map-root"},
+ .type = GF_OPTION_TYPE_INT
+ },
+ { .key = {"sync-to-mount"},
+ .type = GF_OPTION_TYPE_BOOL
+ },
+ { .key = {"read-only"},
.type = GF_OPTION_TYPE_BOOL
},
+ { .key = {"fopen-keep-cache"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false"
+ },
+ { .key = {"gid-timeout"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "2"
+ },
+ { .key = {"acl"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false"
+ },
+ { .key = {"selinux"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false"
+ },
+ { .key = {"enable-ino32"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false"
+ },
+ { .key = {"background-qlen"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "64",
+ .min = 16,
+ .max = (64 * GF_UNIT_KB),
+ },
+ { .key = {"congestion-threshold"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "48",
+ .min = 12,
+ .max = (64 * GF_UNIT_KB),
+ },
+ { .key = {"fuse-mountopts"},
+ .type = GF_OPTION_TYPE_STR
+ },
+ { .key = {"use-readdirp"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "yes"
+ },
{ .key = {NULL} },
};
diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h
new file mode 100644
index 000000000..34794b6ea
--- /dev/null
+++ b/xlators/mount/fuse/src/fuse-bridge.h
@@ -0,0 +1,537 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _GF_FUSE_BRIDGE_H_
+#define _GF_FUSE_BRIDGE_H_
+
+#include <stdint.h>
+#include <signal.h>
+#include <pthread.h>
+#include <stddef.h>
+#include <dirent.h>
+#include <sys/mount.h>
+#include <sys/time.h>
+#include <fnmatch.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif /* _CONFIG_H */
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "xlator.h"
+#include "defaults.h"
+#include "common-utils.h"
+#include "statedump.h"
+
+#ifdef GF_DARWIN_HOST_OS
+/* This is MacFUSE's marker for MacFUSE-specific code */
+#define __FreeBSD__ 10
+#include "fuse_kernel_macfuse.h"
+#else
+#include "fuse_kernel.h"
+#endif
+#include "fuse-misc.h"
+#include "fuse-mount.h"
+#include "fuse-mem-types.h"
+
+#include "list.h"
+#include "dict.h"
+#include "syncop.h"
+#include "gidcache.h"
+
+#if defined(GF_LINUX_HOST_OS) || defined(__NetBSD__)
+#define FUSE_OP_HIGH (FUSE_READDIRPLUS + 1)
+#endif
+#ifdef GF_DARWIN_HOST_OS
+#define FUSE_OP_HIGH (FUSE_DESTROY + 1)
+#endif
+#define GLUSTERFS_XATTR_LEN_MAX 65536
+
+#define MAX_FUSE_PROC_DELAY 1
+
+typedef struct fuse_in_header fuse_in_header_t;
+typedef void (fuse_handler_t) (xlator_t *this, fuse_in_header_t *finh,
+ void *msg);
+
+struct fuse_private {
+ int fd;
+ uint32_t proto_minor;
+ char *volfile;
+ size_t volfile_size;
+ char *mount_point;
+ struct iobuf *iobuf;
+
+ pthread_t fuse_thread;
+ char fuse_thread_started;
+
+ uint32_t direct_io_mode;
+ size_t *msg0_len_p;
+
+ double entry_timeout;
+ double negative_timeout;
+ double attribute_timeout;
+
+ pthread_cond_t sync_cond;
+ pthread_mutex_t sync_mutex;
+ char event_recvd;
+
+ char init_recvd;
+
+ gf_boolean_t strict_volfile_check;
+
+ fuse_handler_t **fuse_ops;
+ fuse_handler_t **fuse_ops0;
+ pthread_mutex_t fuse_dump_mutex;
+ int fuse_dump_fd;
+
+ glusterfs_graph_t *next_graph;
+ xlator_t *active_subvol;
+
+ pid_t client_pid;
+ gf_boolean_t client_pid_set;
+ unsigned uid_map_root;
+ gf_boolean_t acl;
+ gf_boolean_t selinux;
+ gf_boolean_t read_only;
+ int32_t fopen_keep_cache;
+ int32_t gid_cache_timeout;
+ gf_boolean_t enable_ino32;
+ fdtable_t *fdtable;
+ gid_cache_t gid_cache;
+ char *fuse_mountopts;
+
+ /* For fuse-reverse-validation */
+ int revchan_in;
+ int revchan_out;
+ gf_boolean_t reverse_fuse_thread_started;
+
+ /* For communicating with separate mount thread. */
+ int status_pipe[2];
+
+ /* for fuse queue length and congestion threshold */
+ int background_qlen;
+ int congestion_threshold;
+
+ /* for using fuse-kernel readdirp*/
+ gf_boolean_t use_readdirp;
+};
+typedef struct fuse_private fuse_private_t;
+
+struct fuse_graph_switch_args {
+ xlator_t *this;
+ xlator_t *old_subvol;
+ xlator_t *new_subvol;
+};
+typedef struct fuse_graph_switch_args fuse_graph_switch_args_t;
+
+#define INVAL_BUF_SIZE (sizeof (struct fuse_out_header) + \
+ max (sizeof (struct fuse_notify_inval_inode_out), \
+ sizeof (struct fuse_notify_inval_entry_out) + \
+ NAME_MAX + 1))
+
+#define FUSE_EVENT_HISTORY_SIZE 1024
+
+#define _FH_TO_FD(fh) ((fd_t *)(uintptr_t)(fh))
+
+#define FH_TO_FD(fh) ((_FH_TO_FD (fh))?(fd_ref (_FH_TO_FD (fh))):((fd_t *) 0))
+
+#define FUSE_FOP(state, ret, op_num, fop, args ...) \
+ do { \
+ call_frame_t *frame = NULL; \
+ xlator_t *xl = NULL; \
+ int32_t op_ret = 0, op_errno = 0; \
+ fuse_resolve_t *resolve = NULL; \
+ \
+ frame = get_call_frame_for_req (state); \
+ if (!frame) { \
+ /* This is not completely clean, as some \
+ * earlier allocations might remain unfreed \
+ * if we return at this point, but still \
+ * better than trying to go on with a NULL \
+ * frame ... \
+ */ \
+ gf_log_callingfn ("glusterfs-fuse", \
+ GF_LOG_ERROR, \
+ "FUSE message" \
+ " unique %"PRIu64" opcode %d:" \
+ " frame allocation failed", \
+ state->finh->unique, \
+ state->finh->opcode); \
+ free_fuse_state (state); \
+ /* ideally, need to 'return', but let the */ \
+ /* calling function take care of it */ \
+ break; \
+ } \
+ \
+ frame->root->state = state; \
+ frame->root->op = op_num; \
+ frame->op = op_num; \
+ \
+ if ( state->resolve_now ) { \
+ resolve = state->resolve_now; \
+ } else { \
+ resolve = &(state->resolve); \
+ } \
+ \
+ xl = state->active_subvol; \
+ if (!xl) { \
+ gf_log_callingfn ("glusterfs-fuse", GF_LOG_ERROR, \
+ "xl is NULL"); \
+ op_errno = ENOENT; \
+ op_ret = -1; \
+ } else if (resolve->op_ret < 0) { \
+ op_errno = resolve->op_errno; \
+ op_ret = -1; \
+ if (op_num == GF_FOP_LOOKUP) { \
+ gf_log ("glusterfs-fuse", \
+ (op_errno == ENOENT ? GF_LOG_TRACE \
+ : GF_LOG_WARNING), \
+ "%"PRIu64": %s() %s => -1 (%s)", \
+ frame->root->unique, \
+ gf_fop_list[frame->root->op], \
+ resolve->resolve_loc.path, \
+ strerror (op_errno)); \
+ } else { \
+ gf_log ("glusterfs-fuse", \
+ GF_LOG_WARNING, \
+ "%"PRIu64": %s() inode " \
+ "migration of %s failed (%s)", \
+ frame->root->unique, \
+ gf_fop_list[frame->root->op], \
+ resolve->resolve_loc.path, \
+ strerror (op_errno)); \
+ } \
+ } else if (state->resolve2.op_ret < 0) { \
+ op_errno = state->resolve2.op_errno; \
+ op_ret = -1; \
+ gf_log ("glusterfs-fuse", \
+ GF_LOG_WARNING, \
+ "%"PRIu64": %s() inode " \
+ "migration of %s failed (%s)", \
+ frame->root->unique, \
+ gf_fop_list[frame->root->op], \
+ state->resolve2.resolve_loc.path, \
+ strerror (op_errno)); \
+ } \
+ \
+ if (op_ret < 0) { \
+ send_fuse_err (state->this, state->finh, op_errno); \
+ free_fuse_state (state); \
+ STACK_DESTROY (frame->root); \
+ } else { \
+ if (state->this->history) \
+ gf_log_eh ("%"PRIu64", %s, path: (%s), gfid: " \
+ "(%s)", frame->root->unique, \
+ gf_fop_list[frame->root->op], \
+ state->loc.path, \
+ (state->fd == NULL)? \
+ uuid_utoa (state->loc.gfid): \
+ uuid_utoa (state->fd->inode->gfid));\
+ STACK_WIND (frame, ret, xl, xl->fops->fop, args); \
+ } \
+ \
+ } while (0)
+
+
+#define FUSE_FOP_COOKIE(state, xl, ret, cky, op_num, fop, args ...) \
+ do { \
+ call_frame_t *frame = NULL; \
+ xlator_t *xl = NULL; \
+ int32_t op_ret = 0, op_errno = 0; \
+ \
+ frame = get_call_frame_for_req (state); \
+ if (!frame) { \
+ gf_log ("glusterfs-fuse", \
+ GF_LOG_ERROR, \
+ "FUSE message" \
+ " unique %"PRIu64" opcode %d:" \
+ " frame allocation failed", \
+ state->finh->unique, \
+ state->finh->opcode); \
+ free_fuse_state (state); \
+ return 0; \
+ } \
+ \
+ frame->root->state = state; \
+ frame->root->op = op_num; \
+ frame->op = op_num; \
+ \
+ xl = state->active_subvol; \
+ if (!xl) { \
+ gf_log_callingfn ("glusterfs-fuse", GF_LOG_ERROR, \
+ "xl is NULL"); \
+ op_errno = ENOENT; \
+ op_ret = -1; \
+ } else if (state->resolve.op_ret < 0) { \
+ op_errno = state->resolve.op_errno; \
+ op_ret = -1; \
+ if (op_num == GF_FOP_LOOKUP) { \
+ gf_log ("glusterfs-fuse", \
+ (op_errno == ENOENT ? GF_LOG_TRACE \
+ : GF_LOG_WARNING), \
+ "%"PRIu64": %s() %s => -1 (%s)", \
+ frame->root->unique, \
+ gf_fop_list[frame->root->op], \
+ state->resolve.resolve_loc.path, \
+ strerror (op_errno)); \
+ } else { \
+ gf_log ("glusterfs-fuse", \
+ GF_LOG_WARNING, \
+ "%"PRIu64": %s() inode " \
+ "migration of %s failed (%s)", \
+ frame->root->unique, \
+ gf_fop_list[frame->root->op], \
+ state->resolve.resolve_loc.path, \
+ strerror (op_errno)); \
+ } \
+ } else if (state->resolve2.op_ret < 0) { \
+ op_errno = state->resolve2.op_errno; \
+ op_ret = -1; \
+ gf_log ("glusterfs-fuse", \
+ GF_LOG_WARNING, \
+ "%"PRIu64": %s() inode " \
+ "migration of %s failed (%s)", \
+ frame->root->unique, \
+ gf_fop_list[frame->root->op], \
+ state->resolve2.resolve_loc.path, \
+ strerror (op_errno)); \
+ } \
+ \
+ if (op_ret < 0) { \
+ send_fuse_err (state->this, state->finh, op_errno); \
+ free_fuse_state (state); \
+ STACK_DESTROY (frame->root); \
+ } else { \
+ if (xl->history) \
+ gf_log_eh ("%"PRIu64", %s, path: (%s), gfid: " \
+ "(%s)", frame->root->unique, \
+ gf_fop_list[frame->root->op], \
+ state->loc.path, \
+ uuid_utoa (state->loc.gfid)); \
+ STACK_WIND_COOKIE (frame, ret, cky, xl, xl->fops->fop, \
+ args); \
+ } \
+ } while (0)
+
+#define GF_SELECT_LOG_LEVEL(_errno) \
+ (((_errno == ENOENT) || (_errno == ESTALE))? \
+ GF_LOG_DEBUG)
+
+#define GET_STATE(this, finh, state) \
+ do { \
+ state = get_fuse_state (this, finh); \
+ if (!state) { \
+ gf_log ("glusterfs-fuse", \
+ GF_LOG_ERROR, \
+ "FUSE message unique %"PRIu64" opcode %d:" \
+ " state allocation failed", \
+ finh->unique, finh->opcode); \
+ \
+ send_fuse_err (this, finh, ENOMEM); \
+ GF_FREE (finh); \
+ \
+ return; \
+ } \
+ } while (0)
+
+#define FUSE_ENTRY_CREATE(this, priv, finh, state, fci, op) \
+ do { \
+ if (priv->proto_minor >= 12) \
+ state->mode &= ~fci->umask; \
+ if (priv->proto_minor >= 12 && priv->acl) { \
+ state->xdata = dict_new (); \
+ if (!state->xdata) { \
+ gf_log ("glusterfs-fuse", \
+ GF_LOG_WARNING, \
+ "%s failed to allocate " \
+ "a param dictionary", op); \
+ send_fuse_err (this, finh, ENOMEM); \
+ free_fuse_state (state); \
+ return; \
+ } \
+ state->umask = fci->umask; \
+ \
+/* TODO: remove this after 3.4.0 release. keeping it for the \
+ sake of backward compatibility with old (3.3.[01]) \
+ releases till then. */ \
+ ret = dict_set_int16 (state->xdata, "umask", \
+ fci->umask); \
+ if (ret < 0) { \
+ gf_log ("glusterfs-fuse", \
+ GF_LOG_WARNING, \
+ "%s Failed adding umask"\
+ " to request", op); \
+ dict_destroy (state->xdata); \
+ send_fuse_err (this, finh, ENOMEM); \
+ free_fuse_state (state); \
+ return; \
+ } \
+ ret = dict_set_int16 (state->xdata, "mode", \
+ fci->mode); \
+ if (ret < 0) { \
+ gf_log ("glusterfs-fuse", \
+ GF_LOG_WARNING, \
+ "%s Failed adding mode " \
+ "to request", op); \
+ dict_destroy (state->xdata); \
+ send_fuse_err (this, finh, ENOMEM); \
+ free_fuse_state (state); \
+ return; \
+ } \
+ } \
+ } while (0)
+
+#define fuse_log_eh_fop(this, state, frame, op_ret, op_errno) \
+ do { \
+ if (this->history) { \
+ if (state->fd) \
+ gf_log_eh ("op_ret: %d, op_errno: %d, " \
+ "%"PRIu64", %s () => %p, gfid: %s", \
+ op_ret, op_errno, \
+ frame->root->unique, \
+ gf_fop_list[frame->root->op], \
+ state->fd, \
+ uuid_utoa (state->fd->inode->gfid)); \
+ else \
+ gf_log_eh ("op_ret: %d, op_errno: %d, " \
+ "%"PRIu64", %s () => %s, gfid: %s", \
+ op_ret, op_errno, \
+ frame->root->unique, \
+ gf_fop_list[frame->root->op], \
+ state->loc.path, \
+ uuid_utoa (state->loc.gfid)); \
+ } \
+ } while(0)
+
+#define fuse_log_eh(this, args...) \
+ do { \
+ if (this->history) \
+ gf_log_eh(args); \
+ } while (0)
+
+static inline xlator_t *
+fuse_active_subvol (xlator_t *fuse)
+{
+ fuse_private_t *priv = NULL;
+
+ priv = fuse->private;
+
+ return priv->active_subvol;
+}
+
+
+typedef enum {
+ RESOLVE_MUST = 1,
+ RESOLVE_NOT,
+ RESOLVE_MAY,
+ RESOLVE_DONTCARE,
+ RESOLVE_EXACT
+} fuse_resolve_type_t;
+
+
+typedef struct {
+ fuse_resolve_type_t type;
+ fd_t *fd;
+ char *path;
+ char *bname;
+ u_char gfid[16];
+ inode_t *hint;
+ u_char pargfid[16];
+ inode_t *parhint;
+ char *resolved;
+ int op_ret;
+ int op_errno;
+ loc_t resolve_loc;
+} fuse_resolve_t;
+
+
+typedef struct {
+ void *pool;
+ xlator_t *this;
+ xlator_t *active_subvol;
+ inode_table_t *itable;
+ loc_t loc;
+ loc_t loc2;
+ fuse_in_header_t *finh;
+ int32_t flags;
+ off_t off;
+ size_t size;
+ unsigned long nlookup;
+ fd_t *fd;
+ dict_t *xattr;
+ dict_t *xdata;
+ char *name;
+ char is_revalidate;
+ gf_boolean_t truncate_needed;
+ gf_lock_t lock;
+ uint64_t lk_owner;
+
+ /* used within resolve_and_resume */
+ /* */
+ fuse_resolve_t resolve;
+ fuse_resolve_t resolve2;
+
+ loc_t *loc_now;
+ fuse_resolve_t *resolve_now;
+
+ void *resume_fn;
+
+ int valid;
+ int mask;
+ dev_t rdev;
+ mode_t mode;
+ mode_t umask;
+ struct iatt attr;
+ struct gf_flock lk_lock;
+ struct iovec vector;
+
+ uuid_t gfid;
+ uint32_t io_flags;
+ int32_t fd_no;
+} fuse_state_t;
+
+typedef struct {
+ uint32_t open_flags;
+ char migration_failed;
+ fd_t *activefd;
+} fuse_fd_ctx_t;
+
+typedef void (*fuse_resume_fn_t) (fuse_state_t *state);
+
+GF_MUST_CHECK int32_t
+fuse_loc_fill (loc_t *loc, fuse_state_t *state, ino_t ino,
+ ino_t par, const char *name);
+call_frame_t *get_call_frame_for_req (fuse_state_t *state);
+fuse_state_t *get_fuse_state (xlator_t *this, fuse_in_header_t *finh);
+void free_fuse_state (fuse_state_t *state);
+void gf_fuse_stat2attr (struct iatt *st, struct fuse_attr *fa,
+ gf_boolean_t enable_ino32);
+void gf_fuse_fill_dirent (gf_dirent_t *entry, struct fuse_dirent *fde,
+ gf_boolean_t enable_ino32);
+uint64_t inode_to_fuse_nodeid (inode_t *inode);
+xlator_t *fuse_active_subvol (xlator_t *fuse);
+inode_t *fuse_ino_to_inode (uint64_t ino, xlator_t *fuse);
+int send_fuse_err (xlator_t *this, fuse_in_header_t *finh, int error);
+int fuse_gfid_set (fuse_state_t *state);
+int fuse_flip_xattr_ns (struct fuse_private *priv, char *okey, char **nkey);
+fuse_fd_ctx_t * __fuse_fd_ctx_check_n_create (xlator_t *this, fd_t *fd);
+fuse_fd_ctx_t * fuse_fd_ctx_check_n_create (xlator_t *this, fd_t *fd);
+
+int fuse_resolve_and_resume (fuse_state_t *state, fuse_resume_fn_t fn);
+int fuse_resolve_inode_init (fuse_state_t *state, fuse_resolve_t *resolve,
+ ino_t ino);
+int fuse_resolve_entry_init (fuse_state_t *state, fuse_resolve_t *resolve,
+ ino_t par, char *name);
+int fuse_resolve_fd_init (fuse_state_t *state, fuse_resolve_t *resolve,
+ fd_t *fd);
+int fuse_ignore_xattr_set (fuse_private_t *priv, char *key);
+int dump_history_fuse (circular_buffer_t *cb, void *data);
+#endif /* _GF_FUSE_BRIDGE_H_ */
diff --git a/xlators/mount/fuse/src/fuse-helpers.c b/xlators/mount/fuse/src/fuse-helpers.c
new file mode 100644
index 000000000..4d478b919
--- /dev/null
+++ b/xlators/mount/fuse/src/fuse-helpers.c
@@ -0,0 +1,605 @@
+/*
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifdef __NetBSD__
+#define _KMEMUSER
+#endif
+
+#include "fuse-bridge.h"
+#if defined(GF_SOLARIS_HOST_OS)
+#include <sys/procfs.h>
+#else
+#include <sys/sysctl.h>
+#endif
+
+
+static void
+fuse_resolve_wipe (fuse_resolve_t *resolve)
+{
+ GF_FREE ((void *)resolve->path);
+
+ GF_FREE ((void *)resolve->bname);
+
+ GF_FREE ((void *)resolve->resolved);
+
+ if (resolve->fd)
+ fd_unref (resolve->fd);
+
+ loc_wipe (&resolve->resolve_loc);
+
+ if (resolve->hint) {
+ inode_unref (resolve->hint);
+ resolve->hint = 0;
+ }
+
+ if (resolve->parhint) {
+ inode_unref (resolve->parhint);
+ resolve->parhint = 0;
+ }
+}
+
+
+void
+free_fuse_state (fuse_state_t *state)
+{
+ xlator_t *this = NULL;
+ fuse_private_t *priv = NULL;
+ uint64_t winds = 0;
+ char switched = 0;
+
+ this = state->this;
+
+ priv = this->private;
+
+ loc_wipe (&state->loc);
+
+ loc_wipe (&state->loc2);
+
+ if (state->xdata) {
+ dict_unref (state->xdata);
+ state->xdata = (void *)0xaaaaeeee;
+ }
+ if (state->xattr)
+ dict_unref (state->xattr);
+
+ if (state->name) {
+ GF_FREE (state->name);
+ state->name = NULL;
+ }
+ if (state->fd) {
+ fd_unref (state->fd);
+ state->fd = (void *)0xfdfdfdfd;
+ }
+ if (state->finh) {
+ GF_FREE (state->finh);
+ state->finh = NULL;
+ }
+
+ fuse_resolve_wipe (&state->resolve);
+ fuse_resolve_wipe (&state->resolve2);
+
+ pthread_mutex_lock (&priv->sync_mutex);
+ {
+ winds = --state->active_subvol->winds;
+ switched = state->active_subvol->switched;
+ }
+ pthread_mutex_unlock (&priv->sync_mutex);
+
+ if ((winds == 0) && (switched)) {
+ xlator_notify (state->active_subvol, GF_EVENT_PARENT_DOWN,
+ state->active_subvol, NULL);
+ }
+
+#ifdef DEBUG
+ memset (state, 0x90, sizeof (*state));
+#endif
+ GF_FREE (state);
+ state = NULL;
+}
+
+
+fuse_state_t *
+get_fuse_state (xlator_t *this, fuse_in_header_t *finh)
+{
+ fuse_state_t *state = NULL;
+ xlator_t *active_subvol = NULL;
+ fuse_private_t *priv = NULL;
+
+ state = (void *)GF_CALLOC (1, sizeof (*state),
+ gf_fuse_mt_fuse_state_t);
+ if (!state)
+ return NULL;
+
+ state->this = THIS;
+ priv = this->private;
+
+ pthread_mutex_lock (&priv->sync_mutex);
+ {
+ active_subvol = fuse_active_subvol (state->this);
+ active_subvol->winds++;
+ }
+ pthread_mutex_unlock (&priv->sync_mutex);
+
+ state->active_subvol = active_subvol;
+ state->itable = active_subvol->itable;
+
+ state->pool = this->ctx->pool;
+ state->finh = finh;
+ state->this = this;
+
+ LOCK_INIT (&state->lock);
+
+ return state;
+}
+
+
+#define FUSE_MAX_AUX_GROUPS 32 /* We can get only up to 32 aux groups from /proc */
+void
+frame_fill_groups (call_frame_t *frame)
+{
+#if defined(GF_LINUX_HOST_OS)
+ char filename[32];
+ char line[4096];
+ char *ptr = NULL;
+ FILE *fp = NULL;
+ int idx = 0;
+ long int id = 0;
+ char *saveptr = NULL;
+ char *endptr = NULL;
+ int ret = 0;
+
+ ret = snprintf (filename, sizeof filename, "/proc/%d/status", frame->root->pid);
+ if (ret >= sizeof filename)
+ goto out;
+
+ fp = fopen (filename, "r");
+ if (!fp)
+ goto out;
+
+ if (call_stack_alloc_groups (frame->root, FUSE_MAX_AUX_GROUPS) != 0)
+ goto out;
+
+ while ((ptr = fgets (line, sizeof line, fp))) {
+ if (strncmp (ptr, "Groups:", 7) != 0)
+ continue;
+
+ ptr = line + 8;
+
+ for (ptr = strtok_r (ptr, " \t\r\n", &saveptr);
+ ptr;
+ ptr = strtok_r (NULL, " \t\r\n", &saveptr)) {
+ errno = 0;
+ id = strtol (ptr, &endptr, 0);
+ if (errno == ERANGE)
+ break;
+ if (!endptr || *endptr)
+ break;
+ frame->root->groups[idx++] = id;
+ if (idx == FUSE_MAX_AUX_GROUPS)
+ break;
+ }
+
+ frame->root->ngrps = idx;
+ break;
+ }
+out:
+ if (fp)
+ fclose (fp);
+#elif defined(GF_SOLARIS_HOST_OS)
+ char filename[32];
+ char scratch[128];
+ prcred_t *prcred = (prcred_t *) scratch;
+ FILE *fp = NULL;
+ int ret = 0;
+ int ngrps;
+
+ ret = snprintf (filename, sizeof filename,
+ "/proc/%d/cred", frame->root->pid);
+
+ if (ret < sizeof filename) {
+ fp = fopen (filename, "r");
+ if (fp != NULL) {
+ if (fgets (scratch, sizeof scratch, fp) != NULL) {
+ ngrps = MIN(prcred->pr_ngroups,
+ GF_MAX_AUX_GROUPS);
+ if (call_stack_alloc_groups (frame->root,
+ ngrps) != 0)
+ return;
+ }
+ fclose (fp);
+ }
+ }
+#elif defined(CTL_KERN) /* DARWIN and *BSD */
+ /*
+ N.B. CTL_KERN is an enum on Linux. (Meaning, if it's not
+ obvious, that it's not subject to preprocessor directives
+ like '#if defined'.)
+ Unlike Linux, on Mac OS and the BSDs it is a #define. We
+ could test to see that KERN_PROC is defined, but, barring any
+ evidence to the contrary, I think that's overkill.
+ We might also test that GF_DARWIN_HOST_OS is defined, why
+ limit this to just Mac OS. It's equally valid for the BSDs
+ and we do have people building on NetBSD and FreeBSD.
+ */
+ int name[] = { CTL_KERN, KERN_PROC, KERN_PROC_PID, frame->root->pid };
+ size_t namelen = sizeof name / sizeof name[0];
+ struct kinfo_proc kp;
+ size_t kplen = sizeof(kp);
+ int i, ngroups;
+
+ if (sysctl(name, namelen, &kp, &kplen, NULL, 0) != 0)
+ return;
+ ngroups = MIN(kp.kp_eproc.e_ucred.cr_ngroups, GF_MAX_AUX_GROUPS);
+ if (call_stack_alloc_groups (frame->root, ngroups) != 0)
+ return;
+ for (i = 0; i < ngroups; i++)
+ frame->root->groups[i] = kp.kp_eproc.e_ucred.cr_groups[i];
+ frame->root->ngrps = ngroups;
+#else
+ frame->root->ngrps = 0;
+#endif /* GF_LINUX_HOST_OS */
+}
+
+/*
+ * Get the groups for the PID associated with this frame. If enabled,
+ * use the gid cache to reduce group list collection.
+ */
+static void get_groups(fuse_private_t *priv, call_frame_t *frame)
+{
+ int i;
+ const gid_list_t *gl;
+ gid_list_t agl;
+
+ if (-1 == priv->gid_cache_timeout) {
+ frame->root->ngrps = 0;
+ return;
+ }
+
+ if (!priv->gid_cache_timeout) {
+ frame_fill_groups(frame);
+ return;
+ }
+
+ gl = gid_cache_lookup(&priv->gid_cache, frame->root->pid);
+ if (gl) {
+ if (call_stack_alloc_groups (frame->root, gl->gl_count) != 0)
+ return;
+ frame->root->ngrps = gl->gl_count;
+ for (i = 0; i < gl->gl_count; i++)
+ frame->root->groups[i] = gl->gl_list[i];
+ gid_cache_release(&priv->gid_cache, gl);
+ return;
+ }
+
+ frame_fill_groups (frame);
+
+ agl.gl_id = frame->root->pid;
+ agl.gl_count = frame->root->ngrps;
+ agl.gl_list = GF_CALLOC(frame->root->ngrps, sizeof(gid_t),
+ gf_fuse_mt_gids_t);
+ if (!agl.gl_list)
+ return;
+
+ for (i = 0; i < frame->root->ngrps; i++)
+ agl.gl_list[i] = frame->root->groups[i];
+
+ if (gid_cache_add(&priv->gid_cache, &agl) != 1)
+ GF_FREE(agl.gl_list);
+}
+
+call_frame_t *
+get_call_frame_for_req (fuse_state_t *state)
+{
+ call_pool_t *pool = NULL;
+ fuse_in_header_t *finh = NULL;
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ fuse_private_t *priv = NULL;
+
+ pool = state->pool;
+ finh = state->finh;
+ this = state->this;
+ priv = this->private;
+
+ frame = create_frame (this, pool);
+ if (!frame)
+ return NULL;
+
+ if (finh) {
+ frame->root->uid = finh->uid;
+ frame->root->gid = finh->gid;
+ frame->root->pid = finh->pid;
+ frame->root->unique = finh->unique;
+ set_lk_owner_from_uint64 (&frame->root->lk_owner,
+ state->lk_owner);
+ }
+
+ get_groups(priv, frame);
+
+ if (priv && priv->client_pid_set)
+ frame->root->pid = priv->client_pid;
+
+ frame->root->type = GF_OP_TYPE_FOP;
+
+ return frame;
+}
+
+
+inode_t *
+fuse_ino_to_inode (uint64_t ino, xlator_t *fuse)
+{
+ inode_t *inode = NULL;
+ xlator_t *active_subvol = NULL;
+
+ if (ino == 1) {
+ active_subvol = fuse_active_subvol (fuse);
+ if (active_subvol)
+ inode = active_subvol->itable->root;
+ } else {
+ inode = (inode_t *) (unsigned long) ino;
+ inode_ref (inode);
+ }
+
+ return inode;
+}
+
+uint64_t
+inode_to_fuse_nodeid (inode_t *inode)
+{
+ if (!inode)
+ return 0;
+ if (__is_root_gfid (inode->gfid))
+ return 1;
+
+ return (unsigned long) inode;
+}
+
+
+GF_MUST_CHECK int32_t
+fuse_loc_fill (loc_t *loc, fuse_state_t *state, ino_t ino,
+ ino_t par, const char *name)
+{
+ inode_t *inode = NULL;
+ inode_t *parent = NULL;
+ int32_t ret = -1;
+ char *path = NULL;
+ uuid_t null_gfid = {0,};
+
+ /* resistance against multiple invocation of loc_fill not to get
+ reference leaks via inode_search() */
+
+ if (name) {
+ parent = loc->parent;
+ if (!parent) {
+ parent = fuse_ino_to_inode (par, state->this);
+ loc->parent = parent;
+ if (parent)
+ uuid_copy (loc->pargfid, parent->gfid);
+ }
+
+ inode = loc->inode;
+ if (!inode) {
+ inode = inode_grep (parent->table, parent, name);
+ loc->inode = inode;
+ }
+
+ ret = inode_path (parent, name, &path);
+ if (ret <= 0) {
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "inode_path failed for %s/%s",
+ (parent)?uuid_utoa (parent->gfid):"0", name);
+ goto fail;
+ }
+ loc->path = path;
+ } else {
+ inode = loc->inode;
+ if (!inode) {
+ inode = fuse_ino_to_inode (ino, state->this);
+ loc->inode = inode;
+ if (inode)
+ uuid_copy (loc->gfid, inode->gfid);
+ }
+
+ parent = loc->parent;
+ if (!parent) {
+ parent = inode_parent (inode, null_gfid, NULL);
+ loc->parent = parent;
+ if (parent)
+ uuid_copy (loc->pargfid, parent->gfid);
+
+ }
+
+ ret = inode_path (inode, NULL, &path);
+ if (ret <= 0) {
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG,
+ "inode_path failed for %s",
+ (inode) ? uuid_utoa (inode->gfid) : "0");
+ goto fail;
+ }
+ loc->path = path;
+ }
+
+ if (loc->path) {
+ loc->name = strrchr (loc->path, '/');
+ if (loc->name)
+ loc->name++;
+ else
+ loc->name = "";
+ }
+
+ if ((ino != 1) && (parent == NULL)) {
+ gf_log ("fuse-bridge", GF_LOG_DEBUG,
+ "failed to search parent for %"PRId64"/%s (%"PRId64")",
+ (ino_t)par, name, (ino_t)ino);
+ ret = -1;
+ goto fail;
+ }
+ ret = 0;
+fail:
+ /* this should not happen as inode_path returns -1 when buf is NULL
+ for sure */
+ if (path && !loc->path)
+ GF_FREE (path);
+ return ret;
+}
+
+/* Use the same logic as the Linux NFS-client */
+#define GF_FUSE_SQUASH_INO(ino) ((uint32_t) ino) ^ (ino >> 32)
+
+/* courtesy of folly */
+void
+gf_fuse_stat2attr (struct iatt *st, struct fuse_attr *fa, gf_boolean_t enable_ino32)
+{
+ if (enable_ino32)
+ fa->ino = GF_FUSE_SQUASH_INO(st->ia_ino);
+ else
+ fa->ino = st->ia_ino;
+
+ fa->size = st->ia_size;
+ fa->blocks = st->ia_blocks;
+ fa->atime = st->ia_atime;
+ fa->mtime = st->ia_mtime;
+ fa->ctime = st->ia_ctime;
+ fa->atimensec = st->ia_atime_nsec;
+ fa->mtimensec = st->ia_mtime_nsec;
+ fa->ctimensec = st->ia_ctime_nsec;
+ fa->mode = st_mode_from_ia (st->ia_prot, st->ia_type);
+ fa->nlink = st->ia_nlink;
+ fa->uid = st->ia_uid;
+ fa->gid = st->ia_gid;
+ fa->rdev = makedev (ia_major (st->ia_rdev),
+ ia_minor (st->ia_rdev));
+#if FUSE_KERNEL_MINOR_VERSION >= 9
+ fa->blksize = st->ia_blksize;
+#endif
+#ifdef GF_DARWIN_HOST_OS
+ fa->crtime = (uint64_t)-1;
+ fa->crtimensec = (uint32_t)-1;
+ fa->flags = 0;
+#endif
+}
+
+void
+gf_fuse_fill_dirent (gf_dirent_t *entry, struct fuse_dirent *fde, gf_boolean_t enable_ino32)
+{
+ if (enable_ino32)
+ fde->ino = GF_FUSE_SQUASH_INO(entry->d_ino);
+ else
+ fde->ino = entry->d_ino;
+
+ fde->off = entry->d_off;
+ fde->type = entry->d_type;
+ fde->namelen = strlen (entry->d_name);
+ strncpy (fde->name, entry->d_name, fde->namelen);
+}
+
+static int
+fuse_do_flip_xattr_ns (char *okey, const char *nns, char **nkey)
+{
+ int ret = 0;
+ char *key = NULL;
+
+ okey = strchr (okey, '.');
+ GF_ASSERT (okey);
+
+ key = GF_CALLOC (1, strlen (nns) + strlen(okey) + 1,
+ gf_common_mt_char);
+ if (!key) {
+ ret = -1;
+ goto out;
+ }
+
+ strcpy (key, nns);
+ strcat (key, okey);
+
+ *nkey = key;
+
+ out:
+ return ret;
+}
+
+static int
+fuse_xattr_alloc_default (char *okey, char **nkey)
+{
+ int ret = 0;
+
+ *nkey = gf_strdup (okey);
+ if (!*nkey)
+ ret = -1;
+ return ret;
+}
+
+#define PRIV_XA_NS "trusted"
+#define UNPRIV_XA_NS "system"
+
+int
+fuse_flip_xattr_ns (fuse_private_t *priv, char *okey, char **nkey)
+{
+ int ret = 0;
+ gf_boolean_t need_flip = _gf_false;
+
+ switch (priv->client_pid) {
+ case GF_CLIENT_PID_GSYNCD:
+ /* valid xattr(s): *xtime, volume-mark* */
+ gf_log("glusterfs-fuse", GF_LOG_DEBUG, "PID: %d, checking xattr(s): "
+ "volume-mark*, *xtime", priv->client_pid);
+ if ( (strcmp (okey, UNPRIV_XA_NS".glusterfs.volume-mark") == 0)
+ || (fnmatch (UNPRIV_XA_NS".glusterfs.volume-mark.*", okey, FNM_PERIOD) == 0)
+ || (fnmatch (UNPRIV_XA_NS".glusterfs.*.xtime", okey, FNM_PERIOD) == 0) )
+ need_flip = _gf_true;
+ break;
+
+ case GF_CLIENT_PID_HADOOP:
+ /* valid xattr(s): pathinfo */
+ gf_log("glusterfs-fuse", GF_LOG_DEBUG, "PID: %d, checking xattr(s): "
+ "pathinfo", priv->client_pid);
+ if (strcmp (okey, UNPRIV_XA_NS".glusterfs.pathinfo") == 0)
+ need_flip = _gf_true;
+ break;
+ }
+
+ if (need_flip) {
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG, "flipping %s to "PRIV_XA_NS" equivalent",
+ okey);
+ ret = fuse_do_flip_xattr_ns (okey, PRIV_XA_NS, nkey);
+ } else {
+ /* if we cannot match, continue with what we got */
+ ret = fuse_xattr_alloc_default (okey, nkey);
+ }
+
+ return ret;
+}
+
+int
+fuse_ignore_xattr_set (fuse_private_t *priv, char *key)
+{
+ int ret = 0;
+
+ /* don't mess with user namespace */
+ if (fnmatch ("user.*", key, FNM_PERIOD) == 0)
+ goto out;
+
+ if (priv->client_pid != GF_CLIENT_PID_GSYNCD)
+ goto out;
+
+ /* trusted NS check */
+ if (!((fnmatch ("*.glusterfs.*.xtime", key, FNM_PERIOD) == 0)
+ || (fnmatch ("*.glusterfs.volume-mark",
+ key, FNM_PERIOD) == 0)
+ || (fnmatch ("*.glusterfs.volume-mark.*",
+ key, FNM_PERIOD) == 0)))
+ ret = -1;
+
+ out:
+ gf_log ("glusterfs-fuse", GF_LOG_DEBUG, "%s setxattr: key [%s], "
+ " client pid [%d]", (ret ? "disallowing" : "allowing"), key,
+ priv->client_pid);
+
+ return ret;
+}
diff --git a/xlators/mount/fuse/src/fuse-mem-types.h b/xlators/mount/fuse/src/fuse-mem-types.h
new file mode 100644
index 000000000..28b4dfbdd
--- /dev/null
+++ b/xlators/mount/fuse/src/fuse-mem-types.h
@@ -0,0 +1,28 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __FUSE_MEM_TYPES_H__
+#define __FUSE_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_fuse_mem_types_ {
+ gf_fuse_mt_iovec = gf_common_mt_end + 1,
+ gf_fuse_mt_fuse_private_t,
+ gf_fuse_mt_char,
+ gf_fuse_mt_iov_base,
+ gf_fuse_mt_fuse_state_t,
+ gf_fuse_mt_fd_ctx_t,
+ gf_fuse_mt_graph_switch_args_t,
+ gf_fuse_mt_gids_t,
+ gf_fuse_mt_end
+};
+#endif
+
diff --git a/xlators/mount/fuse/src/fuse-resolve.c b/xlators/mount/fuse/src/fuse-resolve.c
new file mode 100644
index 000000000..8565ce0e4
--- /dev/null
+++ b/xlators/mount/fuse/src/fuse-resolve.c
@@ -0,0 +1,724 @@
+/*
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "fuse-bridge.h"
+
+static int
+fuse_resolve_all (fuse_state_t *state);
+
+int fuse_resolve_continue (fuse_state_t *state);
+int fuse_resolve_entry_simple (fuse_state_t *state);
+int fuse_resolve_inode_simple (fuse_state_t *state);
+int fuse_migrate_fd (xlator_t *this, fd_t *fd, xlator_t *old_subvol,
+ xlator_t *new_subvol);
+
+fuse_fd_ctx_t *
+fuse_fd_ctx_get (xlator_t *this, fd_t *fd);
+
+gf_boolean_t fuse_inode_needs_lookup (inode_t *inode, xlator_t *this);
+
+static int
+fuse_resolve_loc_touchup (fuse_state_t *state)
+{
+ fuse_resolve_t *resolve = NULL;
+ loc_t *loc = NULL;
+ char *path = NULL;
+ int ret = 0;
+
+ resolve = state->resolve_now;
+ loc = state->loc_now;
+
+ if (!loc->path) {
+ if (loc->parent && resolve->bname) {
+ ret = inode_path (loc->parent, resolve->bname, &path);
+ uuid_copy (loc->pargfid, loc->parent->gfid);
+ loc->name = resolve->bname;
+ } else if (loc->inode) {
+ ret = inode_path (loc->inode, NULL, &path);
+ uuid_copy (loc->gfid, loc->inode->gfid);
+ }
+ if (ret)
+ gf_log (THIS->name, GF_LOG_TRACE,
+ "return value inode_path %d", ret);
+ loc->path = path;
+ }
+
+ return 0;
+}
+
+
+int
+fuse_resolve_entry_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ fuse_state_t *state = NULL;
+ fuse_resolve_t *resolve = NULL;
+ inode_t *link_inode = NULL;
+ loc_t *resolve_loc = NULL;
+
+ state = frame->root->state;
+ resolve = state->resolve_now;
+ resolve_loc = &resolve->resolve_loc;
+
+ STACK_DESTROY (frame->root);
+
+ if (op_ret == -1) {
+ gf_log (this->name, (op_errno == ENOENT)
+ ? GF_LOG_DEBUG : GF_LOG_WARNING,
+ "%s/%s: failed to resolve (%s)",
+ uuid_utoa (resolve_loc->pargfid), resolve_loc->name,
+ strerror (op_errno));
+ resolve->op_ret = -1;
+ resolve->op_errno = op_errno;
+ goto out;
+ }
+
+ link_inode = inode_link (inode, resolve_loc->parent,
+ resolve_loc->name, buf);
+
+ state->loc_now->inode = link_inode;
+
+out:
+ loc_wipe (resolve_loc);
+
+ fuse_resolve_continue (state);
+ return 0;
+}
+
+
+int
+fuse_resolve_entry (fuse_state_t *state)
+{
+ fuse_resolve_t *resolve = NULL;
+ loc_t *resolve_loc = NULL;
+
+ resolve = state->resolve_now;
+ resolve_loc = &resolve->resolve_loc;
+
+ resolve_loc->parent = inode_ref (state->loc_now->parent);
+ uuid_copy (resolve_loc->pargfid, state->loc_now->pargfid);
+ resolve_loc->name = resolve->bname;
+ resolve_loc->inode = inode_new (state->itable);
+
+ inode_path (resolve_loc->parent, resolve_loc->name,
+ (char **) &resolve_loc->path);
+
+ FUSE_FOP (state, fuse_resolve_entry_cbk, GF_FOP_LOOKUP,
+ lookup, resolve_loc, NULL);
+
+ return 0;
+}
+
+
+int
+fuse_resolve_gfid_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xattr, struct iatt *postparent)
+{
+ fuse_state_t *state = NULL;
+ fuse_resolve_t *resolve = NULL;
+ inode_t *link_inode = NULL;
+ loc_t *loc_now = NULL;
+
+ state = frame->root->state;
+ resolve = state->resolve_now;
+ loc_now = state->loc_now;
+
+ STACK_DESTROY (frame->root);
+
+ if (op_ret == -1) {
+ gf_log (this->name, (op_errno == ENOENT)
+ ? GF_LOG_DEBUG : GF_LOG_WARNING,
+ "%s: failed to resolve (%s)",
+ uuid_utoa (resolve->resolve_loc.gfid),
+ strerror (op_errno));
+ loc_wipe (&resolve->resolve_loc);
+
+ /* resolve->op_ret can have 3 values: 0, -1, -2.
+ * 0 : resolution was successful.
+ * -1: parent inode could not be resolved.
+ * -2: entry (inode corresponding to path) could not be resolved
+ */
+
+ if (uuid_is_null (resolve->gfid)) {
+ resolve->op_ret = -1;
+ } else {
+ resolve->op_ret = -2;
+ }
+
+ resolve->op_errno = op_errno;
+ goto out;
+ }
+
+ loc_wipe (&resolve->resolve_loc);
+
+ link_inode = inode_link (inode, NULL, NULL, buf);
+
+ if (!link_inode)
+ goto out;
+
+ if (!uuid_is_null (resolve->gfid)) {
+ loc_now->inode = link_inode;
+ goto out;
+ }
+
+ loc_now->parent = link_inode;
+ uuid_copy (loc_now->pargfid, link_inode->gfid);
+
+ fuse_resolve_entry (state);
+
+ return 0;
+out:
+ fuse_resolve_continue (state);
+ return 0;
+}
+
+
+int
+fuse_resolve_gfid (fuse_state_t *state)
+{
+ fuse_resolve_t *resolve = NULL;
+ loc_t *resolve_loc = NULL;
+ int ret = 0;
+
+ resolve = state->resolve_now;
+ resolve_loc = &resolve->resolve_loc;
+
+ if (!uuid_is_null (resolve->pargfid)) {
+ uuid_copy (resolve_loc->gfid, resolve->pargfid);
+ } else if (!uuid_is_null (resolve->gfid)) {
+ uuid_copy (resolve_loc->gfid, resolve->gfid);
+ }
+
+ /* inode may already exist in case we are looking up an inode which was
+ linked through readdirplus */
+ resolve_loc->inode = inode_find (state->itable, resolve_loc->gfid);
+ if (!resolve_loc->inode)
+ resolve_loc->inode = inode_new (state->itable);
+ ret = loc_path (resolve_loc, NULL);
+
+ if (ret <= 0) {
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to get the path for inode %s",
+ uuid_utoa (resolve->gfid));
+ }
+
+ FUSE_FOP (state, fuse_resolve_gfid_cbk, GF_FOP_LOOKUP,
+ lookup, resolve_loc, NULL);
+
+ return 0;
+}
+
+
+/*
+ * Return value:
+ * 0 - resolved parent and entry (as necessary)
+ * -1 - resolved parent but not entry (though necessary)
+ * 1 - resolved neither parent nor entry
+ */
+
+int
+fuse_resolve_parent_simple (fuse_state_t *state)
+{
+ fuse_resolve_t *resolve = NULL;
+ loc_t *loc = NULL;
+ inode_t *parent = NULL;
+ inode_t *inode = NULL;
+
+ resolve = state->resolve_now;
+ loc = state->loc_now;
+
+ loc->name = resolve->bname;
+
+ parent = resolve->parhint;
+ if (parent->table == state->itable) {
+ if (fuse_inode_needs_lookup (parent, THIS))
+ return 1;
+
+ /* no graph switches since */
+ loc->parent = inode_ref (parent);
+ uuid_copy (loc->pargfid, parent->gfid);
+ loc->inode = inode_grep (state->itable, parent, loc->name);
+
+ /* nodeid for root is 1 and we blindly take the latest graph's
+ * table->root as the parhint and because of this there is
+ * ambiguity whether the entry should have existed or not, and
+ * we took the conservative approach of assuming entry should
+ * have been there even though it need not have (bug #804592).
+ */
+ if ((loc->inode == NULL)
+ && __is_root_gfid (parent->gfid)) {
+ /* non decisive result - entry missing */
+ return -1;
+ }
+
+ /* decisive result - resolution success */
+ return 0;
+ }
+
+ parent = inode_find (state->itable, resolve->pargfid);
+ if (!parent) {
+ /* non decisive result - parent missing */
+ return 1;
+ }
+ if (fuse_inode_needs_lookup (parent, THIS)) {
+ inode_unref (parent);
+ return 1;
+ }
+
+ loc->parent = parent;
+ uuid_copy (loc->pargfid, resolve->pargfid);
+
+ inode = inode_grep (state->itable, parent, loc->name);
+ if (inode) {
+ loc->inode = inode;
+ /* decisive result - resolution success */
+ return 0;
+ }
+
+ /* non decisive result - entry missing */
+ return -1;
+}
+
+
+int
+fuse_resolve_parent (fuse_state_t *state)
+{
+ int ret = 0;
+
+ ret = fuse_resolve_parent_simple (state);
+ if (ret > 0) {
+ fuse_resolve_gfid (state);
+ return 0;
+ }
+
+ if (ret < 0) {
+ fuse_resolve_entry (state);
+ return 0;
+ }
+
+ fuse_resolve_continue (state);
+
+ return 0;
+}
+
+
+int
+fuse_resolve_inode_simple (fuse_state_t *state)
+{
+ fuse_resolve_t *resolve = NULL;
+ loc_t *loc = NULL;
+ inode_t *inode = NULL;
+
+ resolve = state->resolve_now;
+ loc = state->loc_now;
+
+ inode = resolve->hint;
+ if (inode->table == state->itable)
+ inode_ref (inode);
+ else
+ inode = inode_find (state->itable, resolve->gfid);
+
+ if (inode) {
+ if (!fuse_inode_needs_lookup (inode, THIS))
+ goto found;
+ /* inode was linked through readdirplus */
+ inode_unref (inode);
+ }
+
+ return 1;
+found:
+ loc->inode = inode;
+ return 0;
+}
+
+
+int
+fuse_resolve_inode (fuse_state_t *state)
+{
+ int ret = 0;
+
+ ret = fuse_resolve_inode_simple (state);
+
+ if (ret > 0) {
+ fuse_resolve_gfid (state);
+ return 0;
+ }
+
+ fuse_resolve_continue (state);
+
+ return 0;
+}
+
+
+int
+fuse_migrate_fd_task (void *data)
+{
+ int ret = -1;
+ fuse_state_t *state = NULL;
+ fd_t *basefd = NULL, *oldfd = NULL;
+ fuse_fd_ctx_t *basefd_ctx = NULL;
+ xlator_t *old_subvol = NULL;
+
+ state = data;
+ if (state == NULL) {
+ goto out;
+ }
+
+ basefd = state->fd;
+
+ basefd_ctx = fuse_fd_ctx_get (state->this, basefd);
+
+ LOCK (&basefd->lock);
+ {
+ oldfd = basefd_ctx->activefd ? basefd_ctx->activefd : basefd;
+ fd_ref (oldfd);
+ }
+ UNLOCK (&basefd->lock);
+
+ old_subvol = oldfd->inode->table->xl;
+
+ ret = fuse_migrate_fd (state->this, basefd, old_subvol,
+ state->active_subvol);
+
+ LOCK (&basefd->lock);
+ {
+ if (ret < 0) {
+ basefd_ctx->migration_failed = 1;
+ } else {
+ basefd_ctx->migration_failed = 0;
+ }
+ }
+ UNLOCK (&basefd->lock);
+
+ ret = 0;
+
+out:
+ if (oldfd)
+ fd_unref (oldfd);
+
+ return ret;
+}
+
+
+static inline int
+fuse_migrate_fd_error (xlator_t *this, fd_t *fd)
+{
+ fuse_fd_ctx_t *fdctx = NULL;
+ char error = 0;
+
+ fdctx = fuse_fd_ctx_get (this, fd);
+ if (fdctx != NULL) {
+ if (fdctx->migration_failed) {
+ error = 1;
+ }
+ }
+
+ return error;
+}
+
+#define FUSE_FD_GET_ACTIVE_FD(activefd, basefd) \
+ do { \
+ LOCK (&basefd->lock); \
+ { \
+ activefd = basefd_ctx->activefd ? \
+ basefd_ctx->activefd : basefd; \
+ if (activefd != basefd) { \
+ fd_ref (activefd); \
+ } \
+ } \
+ UNLOCK (&basefd->lock); \
+ \
+ if (activefd == basefd) { \
+ fd_ref (activefd); \
+ } \
+ } while (0);
+
+
+static int
+fuse_resolve_fd (fuse_state_t *state)
+{
+ fuse_resolve_t *resolve = NULL;
+ fd_t *basefd = NULL, *activefd = NULL;
+ xlator_t *active_subvol = NULL, *this = NULL;
+ int ret = 0;
+ char fd_migration_error = 0;
+ fuse_fd_ctx_t *basefd_ctx = NULL;
+
+ resolve = state->resolve_now;
+
+ this = state->this;
+
+ basefd = resolve->fd;
+ basefd_ctx = fuse_fd_ctx_get (this, basefd);
+ if (basefd_ctx == NULL) {
+ gf_log (state->this->name, GF_LOG_WARNING,
+ "fdctx is NULL for basefd (ptr:%p inode-gfid:%s), "
+ "resolver erroring out with errno EINVAL",
+ basefd, uuid_utoa (basefd->inode->gfid));
+ resolve->op_ret = -1;
+ resolve->op_errno = EINVAL;
+ goto resolve_continue;
+ }
+
+ FUSE_FD_GET_ACTIVE_FD (activefd, basefd);
+
+ active_subvol = activefd->inode->table->xl;
+
+ fd_migration_error = fuse_migrate_fd_error (state->this, basefd);
+ if (fd_migration_error) {
+ resolve->op_ret = -1;
+ resolve->op_errno = EBADF;
+ } else if (state->active_subvol != active_subvol) {
+ ret = synctask_new (state->this->ctx->env, fuse_migrate_fd_task,
+ NULL, NULL, state);
+
+ fd_migration_error = fuse_migrate_fd_error (state->this,
+ basefd);
+ fd_unref (activefd);
+
+ FUSE_FD_GET_ACTIVE_FD (activefd, basefd);
+ active_subvol = activefd->inode->table->xl;
+
+ if ((ret == -1) || fd_migration_error
+ || (state->active_subvol != active_subvol)) {
+ if (ret == -1) {
+ gf_log (state->this->name, GF_LOG_WARNING,
+ "starting sync-task to migrate "
+ "basefd (ptr:%p inode-gfid:%s) failed "
+ "(old-subvolume:%s-%d "
+ "new-subvolume:%s-%d)",
+ basefd,
+ uuid_utoa (basefd->inode->gfid),
+ active_subvol->name,
+ active_subvol->graph->id,
+ state->active_subvol->name,
+ state->active_subvol->graph->id);
+ } else {
+ gf_log (state->this->name, GF_LOG_WARNING,
+ "fd migration of basefd "
+ "(ptr:%p inode-gfid:%s) failed "
+ "(old-subvolume:%s-%d "
+ "new-subvolume:%s-%d)",
+ basefd,
+ uuid_utoa (basefd->inode->gfid),
+ active_subvol->name,
+ active_subvol->graph->id,
+ state->active_subvol->name,
+ state->active_subvol->graph->id);
+ }
+
+ resolve->op_ret = -1;
+ resolve->op_errno = EBADF;
+ } else {
+ gf_log (state->this->name, GF_LOG_DEBUG,
+ "basefd (ptr:%p inode-gfid:%s) migrated "
+ "successfully in resolver "
+ "(old-subvolume:%s-%d new-subvolume:%s-%d)",
+ basefd, uuid_utoa (basefd->inode->gfid),
+ active_subvol->name, active_subvol->graph->id,
+ state->active_subvol->name,
+ state->active_subvol->graph->id);
+ }
+ }
+
+ if ((resolve->op_ret == -1) && (resolve->op_errno == EBADF)) {
+ gf_log ("fuse-resolve", GF_LOG_WARNING,
+ "migration of basefd (ptr:%p inode-gfid:%s) "
+ "did not complete, failing fop with EBADF "
+ "(old-subvolume:%s-%d new-subvolume:%s-%d)", basefd,
+ uuid_utoa (basefd->inode->gfid),
+ active_subvol->name, active_subvol->graph->id,
+ state->active_subvol->name,
+ state->active_subvol->graph->id);
+ }
+
+ if (activefd != basefd) {
+ state->fd = fd_ref (activefd);
+ fd_unref (basefd);
+ }
+
+ /* state->active_subvol = active_subvol; */
+
+resolve_continue:
+ if (activefd != NULL) {
+ fd_unref (activefd);
+ }
+
+ fuse_resolve_continue (state);
+
+ return 0;
+}
+
+
+int
+fuse_gfid_set (fuse_state_t *state)
+{
+ int ret = 0;
+
+ if (uuid_is_null (state->gfid))
+ goto out;
+
+ if (!state->xdata)
+ state->xdata = dict_new ();
+
+ if (!state->xdata) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_static_bin (state->xdata, "gfid-req",
+ state->gfid, sizeof (state->gfid));
+out:
+ return ret;
+}
+
+
+int
+fuse_resolve_entry_init (fuse_state_t *state, fuse_resolve_t *resolve,
+ ino_t par, char *name)
+{
+ inode_t *parent = NULL;
+
+ parent = fuse_ino_to_inode (par, state->this);
+ uuid_copy (resolve->pargfid, parent->gfid);
+ resolve->parhint = parent;
+ resolve->bname = gf_strdup (name);
+
+ return 0;
+}
+
+
+int
+fuse_resolve_inode_init (fuse_state_t *state, fuse_resolve_t *resolve,
+ ino_t ino)
+{
+ inode_t *inode = NULL;
+
+ inode = fuse_ino_to_inode (ino, state->this);
+ uuid_copy (resolve->gfid, inode->gfid);
+ resolve->hint = inode;
+
+ return 0;
+}
+
+
+int
+fuse_resolve_fd_init (fuse_state_t *state, fuse_resolve_t *resolve,
+ fd_t *fd)
+{
+ resolve->fd = fd_ref (fd);
+
+ return 0;
+}
+
+
+static int
+fuse_resolve (fuse_state_t *state)
+{
+ fuse_resolve_t *resolve = NULL;
+
+ resolve = state->resolve_now;
+
+ if (resolve->fd) {
+
+ fuse_resolve_fd (state);
+
+ } else if (!uuid_is_null (resolve->pargfid)) {
+
+ fuse_resolve_parent (state);
+
+ } else if (!uuid_is_null (resolve->gfid)) {
+
+ fuse_resolve_inode (state);
+
+ } else {
+ fuse_resolve_all (state);
+ }
+
+ return 0;
+}
+
+
+static int
+fuse_resolve_done (fuse_state_t *state)
+{
+ fuse_resume_fn_t fn = NULL;
+
+ fn = state->resume_fn;
+
+ fn (state);
+
+ return 0;
+}
+
+
+/*
+ * This function is called multiple times, once per resolving one location/fd.
+ * state->resolve_now is used to decide which location/fd is to be resolved now
+ */
+static int
+fuse_resolve_all (fuse_state_t *state)
+{
+ if (state->resolve_now == NULL) {
+
+ state->resolve_now = &state->resolve;
+ state->loc_now = &state->loc;
+
+ fuse_resolve (state);
+
+ } else if (state->resolve_now == &state->resolve) {
+
+ state->resolve_now = &state->resolve2;
+ state->loc_now = &state->loc2;
+
+ fuse_resolve (state);
+
+ } else if (state->resolve_now == &state->resolve2) {
+
+ fuse_resolve_done (state);
+
+ } else {
+ gf_log ("fuse-resolve", GF_LOG_ERROR,
+ "Invalid pointer for state->resolve_now");
+ }
+
+ return 0;
+}
+
+
+int
+fuse_resolve_continue (fuse_state_t *state)
+{
+ fuse_resolve_loc_touchup (state);
+
+ fuse_resolve_all (state);
+
+ return 0;
+}
+
+
+int
+fuse_resolve_and_resume (fuse_state_t *state, fuse_resume_fn_t fn)
+{
+ fuse_gfid_set (state);
+
+ state->resume_fn = fn;
+
+ fuse_resolve_all (state);
+
+ return 0;
+}
diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in
index 19418191a..a192d6059 100755
--- a/xlators/mount/fuse/utils/mount.glusterfs.in
+++ b/xlators/mount/fuse/utils/mount.glusterfs.in
@@ -1,16 +1,16 @@
-#!/bin/bash
-# (C) 2006, 2007, 2008 Z RESEARCH Inc. <http://www.zresearch.com>
-#
+#!/bin/sh
+# (C) 2006, 2007, 2008 Gluster Inc. <http://www.gluster.com>
+#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of
# the License, or (at your option) any later version.
-#
+#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
-#
+#
# You should have received a copy of the GNU General Public
# License along with this program; if not, write to the Free
# Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
@@ -23,215 +23,445 @@ _init ()
LOG_CRITICAL=CRITICAL;
LOG_ERROR=ERROR;
LOG_WARNING=WARNING;
- LOG_NORMAL=NORMAL
+ LOG_INFO=INFO
LOG_DEBUG=DEBUG;
LOG_TRACE=TRACE;
- # set default log level to NORMAL
- log_level=$LOG_NORMAL;
+ HOST_NAME_MAX=64;
+
prefix="@prefix@";
exec_prefix=@exec_prefix@;
cmd_line=$(echo "@sbindir@/glusterfs");
+
+ case `uname -s` in
+ NetBSD)
+ getinode="stat -f %i"
+ getdev="stat -f %d"
+ lgetinode="${getinode} -L"
+ lgetdev="${getdev} -L"
+
+ mounttab=/proc/mounts
+ ;;
+ Linux)
+ getinode="stat -c %i $i"
+ getdev="stat -c %d $d"
+ lgetinode="${getinode} -L"
+ lgetdev="${getdev} -L"
+
+ mounttab=/etc/mtab
+ ;;
+ esac
+
+ UPDATEDBCONF=/etc/updatedb.conf
+}
+
+parse_backup_volfile_servers ()
+{
+ local server_list=$1
+ local servers=""
+ local new_servers=""
+
+ servers=$(echo ${server_list} | sed 's/\:/ /g')
+ for server in ${servers}; do
+ length=$(echo $server | wc -c)
+ if [ ${length} -gt ${HOST_NAME_MAX} ]; then
+ echo "Hostname:${server} provided is too long.. skipping"
+ continue
+ fi
+ new_servers=$(echo "$new_servers $server")
+ done
+ echo ${new_servers}
}
start_glusterfs ()
{
+ # lets the comparsion be case insensitive for all strings
+
if [ -n "$log_level_str" ]; then
- case "$log_level_str" in
- "ERROR")
- log_level=$LOG_ERROR;
- ;;
- "NORMAL")
- log_level=$LOG_NORMAL
+ case "$( echo $log_level_str | tr '[a-z]' '[A-Z]')" in
+ "ERROR")
+ log_level=$LOG_ERROR;
;;
- "DEBUG")
- log_level=$LOG_DEBUG;
- ;;
- "CRITICAL")
- log_level=$LOG_CRITICAL;
- ;;
- "WARNING")
- log_level=$LOG_WARNING;
- ;;
- "TRACE")
- log_level=$LOG_TRACE;
- ;;
- "NONE")
- log_level=$LOG_NONE;
- ;;
- *)
- echo "invalid log level $log_level_str, using NORMAL";
- log_level=$LOG_NORMAL;
- ;;
- esac
- fi
- cmd_line=$(echo "$cmd_line --log-level=$log_level");
-
- if [ -n "$log_file" ]; then
- cmd_line=$(echo "$cmd_line --log-file=$log_file");
+ "INFO")
+ log_level=$LOG_INFO
+ ;;
+ "DEBUG")
+ log_level=$LOG_DEBUG;
+ ;;
+ "CRITICAL")
+ log_level=$LOG_CRITICAL;
+ ;;
+ "WARNING")
+ log_level=$LOG_WARNING;
+ ;;
+ "TRACE")
+ log_level=$LOG_TRACE;
+ ;;
+ "NONE")
+ log_level=$LOG_NONE;
+ ;;
+ *)
+ echo "invalid log level $log_level_str, using INFO";
+ log_level=$LOG_INFO;
+ ;;
+ esac
+ fi
+
+#options without values start here
+ if [ -n "$read_only" ]; then
+ cmd_line=$(echo "$cmd_line --read-only");
+ fi
+
+ if [ -n "$acl" ]; then
+ cmd_line=$(echo "$cmd_line --acl");
+ fi
+
+ if [ -n "$selinux" ]; then
+ cmd_line=$(echo "$cmd_line --selinux");
+ fi
+
+ if [ -n "$enable_ino32" ]; then
+ cmd_line=$(echo "$cmd_line --enable-ino32");
+ fi
+
+ if [ -n "$worm" ]; then
+ cmd_line=$(echo "$cmd_line --worm");
+ fi
+
+ if [ -n "$fopen_keep_cache" ]; then
+ cmd_line=$(echo "$cmd_line --fopen-keep-cache");
fi
if [ -n "$volfile_check" ]; then
- cmd_line=$(echo "$cmd_line --volfile-check");
+ cmd_line=$(echo "$cmd_line --volfile-check");
+ fi
+
+ if [ -n "$mem_accounting" ]; then
+ cmd_line=$(echo "$cmd_line --mem-accounting");
+ fi
+
+ if [ -n "$aux_gfid_mount" ]; then
+ cmd_line=$(echo "$cmd_line --aux-gfid-mount");
+ fi
+
+#options with values start here
+ if [ -n "$log_level" ]; then
+ cmd_line=$(echo "$cmd_line --log-level=$log_level");
+ fi
+
+ if [ -n "$log_file" ]; then
+ cmd_line=$(echo "$cmd_line --log-file=$log_file");
fi
if [ -n "$direct_io_mode" ]; then
- cmd_line=$(echo "$cmd_line --disable-direct-io-mode");
+ cmd_line=$(echo "$cmd_line --direct-io-mode=$direct_io_mode");
+ fi
+
+ if [ -n "$use_readdirp" ]; then
+ cmd_line=$(echo "$cmd_line --use-readdirp=$use_readdirp");
fi
if [ -n "$volume_name" ]; then
cmd_line=$(echo "$cmd_line --volume-name=$volume_name");
fi
-
- if [ -n "$log_server" ]; then
- if [ -n "$log_server_port" ]; then
- cmd_line=$(echo "$cmd_line \
---log-server=$log_server \
---log-server-port=$log_server_port");
- fi
+
+ if [ -n "$attribute_timeout" ]; then
+ cmd_line=$(echo "$cmd_line --attribute-timeout=$attribute_timeout");
+ fi
+
+ if [ -n "$entry_timeout" ]; then
+ cmd_line=$(echo "$cmd_line --entry-timeout=$entry_timeout");
+ fi
+
+ if [ -n "$negative_timeout" ]; then
+ cmd_line=$(echo "$cmd_line --negative-timeout=$negative_timeout");
+ fi
+
+ if [ -n "$gid_timeout" ]; then
+ cmd_line=$(echo "$cmd_line --gid-timeout=$gid_timeout");
+ fi
+
+ if [ -n "$bg_qlen" ]; then
+ cmd_line=$(echo "$cmd_line --background-qlen=$bg_qlen");
+ fi
+
+ if [ -n "$cong_threshold" ]; then
+ cmd_line=$(echo "$cmd_line --congestion-threshold=$cong_threshold");
+ fi
+
+ if [ -n "$fuse_mountopts" ]; then
+ cmd_line=$(echo "$cmd_line --fuse-mountopts=$fuse_mountopts");
fi
+ if [ -n "$xlator_option" ]; then
+ xlator_option=$(echo $xlator_option | sed s/"xlator-option="/"--xlator-option "/g)
+ cmd_line=$(echo "$cmd_line $xlator_option");
+ fi
+
+ # for rdma volume, we have to fetch volfile with '.rdma' added
+ # to volume name, so that it fetches the right client vol file
+ volume_id_rdma="";
+
if [ -z "$volfile_loc" ]; then
if [ -n "$server_ip" ]; then
- cmd_line=$(echo "$cmd_line --volfile-server-port=$server_port");
- if [ -n "$transport" ]; then
- cmd_line=$(echo "$cmd_line --volfile-server-transport=$transport");
+
+ cmd_line=$(echo "$cmd_line --volfile-server=$server_ip");
+
+ if [ -n "$backup_volfile_servers" ]; then
+ servers=$(parse_backup_volfile_servers ${backup_volfile_servers})
+ for i in $(echo ${servers}); do
+ cmd_line=$(echo "$cmd_line --volfile-server=$i");
+ done
fi
- if [ -n "$volume_id" ]; then
- cmd_line=$(echo "$cmd_line --volfile-id=$volume_id");
+
+ if [ -n "$server_port" ]; then
+ cmd_line=$(echo "$cmd_line --volfile-server-port=$server_port");
fi
- if [ -n "$backupvolfile_server" ]; then
- cmd_line1=$(echo "$cmd_line --volfile-server=$backupvolfile_server");
+ if [ -n "$transport" ]; then
+ cmd_line=$(echo "$cmd_line --volfile-server-transport=$transport");
+ if [ "$transport" = "rdma" ]; then
+ volume_id_rdma=".rdma";
+ fi
fi
- cmd_line=$(echo "$cmd_line --volfile-server=$server_ip");
+ if [ -n "$volume_id" ]; then
+ if [ -n "$volume_id_rdma" ]; then
+ volume_id="$volume_id$volume_id_rdma";
+ fi
+ cmd_line=$(echo "$cmd_line --volfile-id=$volume_id");
+ fi
fi
else
cmd_line=$(echo "$cmd_line --volfile=$volfile_loc");
fi
-
+
+ if [ -n "$fuse_mountopts" ]; then
+ cmd_line=$(echo "$cmd_line --fuse-mountopts=$fuse_mountopts");
+ fi
+
cmd_line=$(echo "$cmd_line $mount_point");
+ err=0;
$cmd_line;
- # retry the failover
- if [ $? != "0" ]; then
- if [ -n "$cmd_line1" ]; then
- cmd_line1=$(echo "$cmd_line1 $mount_point");
- $cmd_line1
- fi
+
+ inode=$( ${getinode} $mount_point 2>/dev/null);
+
+ # this is required if the stat returns error
+ if [ -z "$inode" ]; then
+ inode="0";
fi
+ if [ $inode -ne 1 ]; then
+ err=1;
+ fi
+
+ if [ $err -eq "1" ]; then
+ echo "Mount failed. Please check the log file for more details."
+ umount $mount_point > /dev/null 2>&1;
+ exit 1;
+ fi
}
usage ()
{
-echo "Usage: mount.glusterfs <volumeserver>:<volumeid/volumeport> -o <options> <mountpoint>
-Options:
-man 8 mount.glusterfs
+echo "Usage: mount.glusterfs <volumeserver>:<volumeid/volumeport> -o <options> <mountpoint>
+Options:
+man 8 mount.glusterfs
-To display the version number of the mount helper:
+To display the version number of the mount helper:
mount.glusterfs --version"
}
-main ()
+# check for recursive mounts. i.e, mounting over an existing brick
+check_recursive_mount ()
{
- helper=$(echo "$@" | sed -n 's/.*\--[ ]*\([^ ]*\).*/\1/p');
-
- options=$(echo "$@" | sed -n 's/.*\-o[ ]*\([^ ]*\).*/\1/p');
-
- new_log_level=$(echo "$options" | sed -n 's/.*log-level=\([^,]*\).*/\1/p');
-
- [ -n "$new_log_level" ] && {
- log_level_str="$new_log_level";
- }
-
- log_file=$(echo "$options" | sed -n 's/.*log-file=\([^,]*\).*/\1/p');
-
- transport=$(echo "$options" | sed -n 's/.*transport=\([^,]*\).*/\1/p');
+ if [ $1 = "/" ]; then
+ echo Cannot mount over root;
+ exit 2;
+ fi
+ # GFID check first
+ # remove trailing / from mount point
+ mnt_dir=${1%/};
+
+ export PATH;
+ # check whether getfattr exists
+ which getfattr > /dev/null 2>&1;
+ if [ $? -ne 0 ]; then
+ return;
+ fi
- direct_io_mode=$(echo "$options" | sed -n 's/.*direct-io-mode=\([^,]*\).*/\1/p');
+ getfattr -n trusted.gfid $mnt_dir 2>/dev/null | grep -iq "trusted.gfid=";
+ if [ $? -eq 0 ]; then
+ echo "ERROR: $mnt_dir is in use as a brick of a gluster volume";
+ exit 2;
+ fi
- volume_name=$(echo "$options" | sed -n 's/.*volume-name=\([^,]*\).*/\1/p');
+ # check if the mount point is a brick's parent directory
+ GLUSTERD_WORKDIR="/var/lib/glusterd";
- volume_id=$(echo "$options" | sed -n 's/.*volume_id=\([^,]*\).*/\1/p');
+ ls -L "$GLUSTERD_WORKDIR"/vols/*/bricks/* > /dev/null 2>&1;
+ if [ $? -ne 0 ]; then
+ return;
+ fi
- volfile_check=$(echo "$options" | sed -n 's/.*volfile-check=\([^,]*\).*/\1/p');
+ brick_path=`grep ^path "$GLUSTERD_WORKDIR"/vols/*/bricks/* | cut -d "=" -f 2`;
+ root_inode=`${lgetinode} /`;
+ root_dev=`${lgetdev} /`;
+ mnt_inode=`${lgetinode} $mnt_dir`;
+ mnt_dev=`${lgetdev} $mnt_dir`;
+ for brick in "$brick_path";
+ do
+ # evaluate brick path to see if this is local, if non-local, skip iteration
+ ls $brick > /dev/null 2>&1;
+ if [ $? -ne 0 ]; then
+ continue;
+ fi
+ getfattr -n trusted.gfid "$brick" 2>/dev/null | grep -iq "trusted.gfid=";
+ if [ $? -ne 0 ]; then
+ continue;
+ else
+ # brick is local
+ while [ 1 ];
+ do
+ tmp_brick="$brick";
+ brick="$brick"/..;
+ brick_dev=`${lgetdev} $brick`;
+ brick_inode=`${lgetinode} $brick`;
+ if [ "$mnt_inode" -eq "$brick_inode" -a "$mnt_dev" -eq "$brick_dev" ]; then
+ echo ERROR: $mnt_dir is a parent of the brick $tmp_brick;
+ exit 2;
+ fi
+ [ "$root_inode" -ne "$brick_inode" -o "$root_dev" -ne "$brick_dev" ] || break;
+ done;
+ fi
+ done;
+}
- server_port=$(echo "$options" | sed -n 's/.*server-port=\([^,]*\).*/\1/p');
- backupvolfile_server=$(echo "$options" | sed -n 's/.*backupvolfile-server=\([^,]*\).*/\1/p');
+main ()
+{
+ helper=$(echo "$@" | sed -n 's/.*\--[ ]*\([^ ]*\).*/\1/p');
+ in_opt="no"
+ pos_args=0
+ for opt in "$@"; do
+ if [ "$in_opt" = "yes" ]; then
+ for pair in $(echo "$opt" | tr "," " "); do
+ # Handle options without values.
+ case "$pair" in
+ "ro") read_only=1 ;;
+ "acl") acl=1 ;;
+ "selinux") selinux=1 ;;
+ "worm") worm=1 ;;
+ "fopen-keep-cache") fopen_keep_cache=1 ;;
+ "enable-ino32") enable_ino32=1 ;;
+ "mem-accounting") mem_accounting=1;;
+ "aux-gfid-mount")
+ if [ `uname -s` = "Linux" ]; then
+ aux_gfid_mount=1
+ fi
+ ;;
+ # "mount -t glusterfs" sends this, but it's useless.
+ "rw") ;;
+ # these ones are interpreted during system initialization
+ "noauto") ;;
+ "_netdev") ;;
+ *)
+ key=$(echo "$pair" | cut -f1 -d'=');
+ value=$(echo "$pair" | cut -f2- -d'=');
+
+ # Handle options with values.
+ case "$key" in
+ "log-level") log_level_str=$value ;;
+ "log-file") log_file=$value ;;
+ "transport") transport=$value ;;
+ "direct-io-mode") direct_io_mode=$value ;;
+ "volume-name") volume_name=$value ;;
+ "volume-id") volume_id=$value ;;
+ "volfile-check") volfile_check=$value ;;
+ "server-port") server_port=$value ;;
+ "attribute-timeout")
+ attribute_timeout=$value ;;
+ "entry-timeout") entry_timeout=$value ;;
+ "negative-timeout") negative_timeout=$value ;;
+ "gid-timeout") gid_timeout=$value ;;
+ "background-qlen") bg_qlen=$value ;;
+ "backup-volfile-servers") backup_volfile_servers=$value ;;
+ "congestion-threshold") cong_threshold=$value ;;
+ "xlator-option") xlator_option=$xlator_option" "$pair ;;
+ "fuse-mountopts") fuse_mountopts=$value ;;
+ "use-readdirp") use_readdirp=$value ;;
+ *)
+ # Passthru
+ [ -z "$fuse_mountopts" ] || fuse_mountopts="$fuse_mountopts,"
+ fuse_mountopts="$fuse_mountopts$pair"
+ ;;
+ esac
+ esac
+ done
+ in_opt="no"
+ elif [ "$opt" = "-o" ]; then
+ in_opt="yes"
+ else
+ case $pos_args in
+ 0) volfile_loc=$opt ;;
+ 1) mount_point=$opt ;;
+ *) echo "extra arguments at end (ignored)" ;;
+ esac
+ pos_args=$((pos_args+1))
+ fi
+ done
+ if [ $in_opt = "yes" -o $pos_args -lt 2 ]; then
+ usage
+ exit 1
+ fi
- log_server=$(echo "$options" | sed -n 's/.*log-server=\([^,]*\).*/\1/p');
-
- log_server_port=$(echo "$options" | sed -n 's/.*log-server-port=\([^,]*\).*/\1/p');
-
- volfile_loc="$1";
-
[ -r "$volfile_loc" ] || {
- server_ip=$(echo "$volfile_loc" | sed -n 's/\([^\:]*\).*/\1/p');
+ server_ip=$(echo "$volfile_loc" | sed -n 's/\([a-zA-Z0-9:.\-]*\):.*/\1/p');
test_str=$(echo "$volfile_loc" | sed -n 's/.*:\([^ ]*\).*/\1/p');
[ -n "$test_str" ] && {
- # Backward compatibility
- test_str1=$(echo "$test_str" | sed -e 's/[0-9]//g');
- [ -n "$test_str1" ] && {
- volume_id="$test_str";
- } || {
- server_port=$test_str;
- }
- }
- volfile_loc="";
- }
-
- [ -n "$server_port" ] || {
- server_port="6996";
+ volume_id="$test_str";
+ }
+ volfile_loc="";
}
- new_fs_options=$(echo "$options" | sed -e 's/[,]*log-file=[^,]*//' \
- -e 's/[,]*log-level=[^,]*//' \
- -e 's/[,]*volume-name=[^,]*//' \
- -e 's/[,]*direct-io-mode=[^,]*//' \
- -e 's/[,]*volfile-check=[^,]*//' \
- -e 's/[,]*transport=[^,]*//' \
- -e 's/[,]*backupvolfile-server=[^,]*//' \
- -e 's/[,]*server-port=[^,]*//' \
- -e 's/[,]*volume-id=[^,]*//' \
- -e 's/[,]*log-server=[^,]*//' \
- -e 's/[,]*log-server-port=[^,]*//');
-
- #
+ #
[ -n "$helper" ] && {
cmd_line=$(echo "$cmd_line --$helper");
exec $cmd_line;
exit 0;
}
- mount_provided=$(echo "$@" | cut -f2 -d'/');
-
- [ -n "$mount_provided" ] && {
- mount_point="/$mount_provided";
- }
-
- [ -z "$mount_point" ] && {
+ # No need to do a ! -d test, it is taken care while initializing the
+ # variable mount_point
+ [ -z "$mount_point" -o ! -d "$mount_point" ] && {
+ echo "ERROR: Mount point does not exist."
usage;
exit 0;
}
# Simple check to avoid multiple identical mounts
- if grep -q " $mount_point fuse" /etc/mtab; then
+ if grep -q "[[:space:]+]${mount_point}[[:space:]+]fuse" $mounttab; then
echo -n "$0: according to mtab, GlusterFS is already mounted on "
echo "$mount_point"
- sleep 1;
exit 0;
fi
- fs_options=$(echo "$fs_options,$new_fs_options");
-
- start_glusterfs;
+ check_recursive_mount "$mount_point";
+
+ # Append fuse.glusterfs to PRUNEFS variable in updatedb.conf(5). updatedb(8)
+ # should not index files under GlusterFS, indexing will slow down GlusteFS
+ # if the filesystem is several TB in size.
+ test -f $UPDATEDBCONF && {
+ if ! grep -q 'glusterfs' $UPDATEDBCONF; then
+ sed 's/\(PRUNEFS.*\)"/\1 fuse.glusterfs"/' $UPDATEDBCONF \
+ > ${UPDATEDBCONF}.bak
+ mv -f ${UPDATEDBCONF}.bak $UPDATEDBCONF
+ fi
+ }
- sleep 3;
+ start_glusterfs;
}
_init "$@" && main "$@";
-
diff --git a/xlators/mount/fuse/utils/mount_glusterfs.in b/xlators/mount/fuse/utils/mount_glusterfs.in
index b6a3d1857..b12b4e04e 100755
--- a/xlators/mount/fuse/utils/mount_glusterfs.in
+++ b/xlators/mount/fuse/utils/mount_glusterfs.in
@@ -1,5 +1,5 @@
#!/bin/sh
-# (C) 2008 Z RESEARCH Inc. <http://www.zresearch.com>
+# (C) 2008 Gluster Inc. <http://www.gluster.com>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
@@ -25,11 +25,11 @@ _init ()
LOG_CRITICAL=CRITICAL;
LOG_ERROR=ERROR;
LOG_WARNING=WARNING;
- LOG_NORMAL=NORMAL;
+ LOG_INFO=INFO;
LOG_DEBUG=DEBUG;
# set default log level to ERROR
- log_level=$LOG_NORMAL;
+ log_level=$LOG_INFO;
}
start_glusterfs ()
@@ -43,8 +43,8 @@ start_glusterfs ()
"ERROR")
log_level=$LOG_ERROR;
;;
- "NORMAL")
- log_level=$LOG_NORMAL;
+ "INFO")
+ log_level=$LOG_INFO;
;;
"DEBUG")
log_level=$LOG_DEBUG;
@@ -59,8 +59,8 @@ start_glusterfs ()
log_level=$LOG_NONE;
;;
*)
- echo "invalid log level $log_level_str, using NORMAL";
- log_level=$LOG_NORMAL;
+ echo "invalid log level $log_level_str, using INFO";
+ log_level=$LOG_INFO;
;;
esac
fi
@@ -82,12 +82,10 @@ start_glusterfs ()
if [ -n "$transport" ]; then
cmd_line=$(echo "$cmd_line \
--volfile-server=$server_ip \
---volfile-server-port=$server_port \
--volfile-server-transport=$transport");
else
cmd_line=$(echo "$cmd_line \
---volfile-server=$server_ip \
---volfile-server-port=$server_port");
+--volfile-server=$server_ip");
fi
else
cmd_line=$(echo "$cmd_line --volfile=$volfile_loc");
@@ -167,19 +165,15 @@ main ()
# TODO: use getopt. This is very much darwin specific
volfile_loc="$1";
- while [ "$volfile_loc" == "-o" ] ; do
+ while [ "$volfile_loc" = "-o" ] ; do
shift ;
shift ;
volfile_loc="$1";
done
[ -r "$volfile_loc" ] || {
- server_ip=$(echo "$volfile_loc" | sed -n 's/\([^\:]*\).*/\1/p');
- server_port=$(echo "$volfile_loc" | sed -n 's/.*:\([^ ]*\).*/\1/p');
- [ -n "$server_port" ] || {
- server_port="6996";
- }
-
+ server_ip=$(echo "$volfile_loc" | sed -n 's/\([a-zA-Z0-9:.\-]*\):.*/\1/p');
+ volume_id=$(echo "$volfile_loc" | sed -n 's/[a-zA-Z0-9:.\-]*:\(.*\)/\1/p');
volfile_loc="";
}
# following line is product of love towards sed
diff --git a/xlators/nfs/Makefile.am b/xlators/nfs/Makefile.am
new file mode 100644
index 000000000..8771032f6
--- /dev/null
+++ b/xlators/nfs/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = server
+
+CLEANFILES =
diff --git a/xlators/nfs/server/Makefile.am b/xlators/nfs/server/Makefile.am
new file mode 100644
index 000000000..a985f42a8
--- /dev/null
+++ b/xlators/nfs/server/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/nfs/server/src/Makefile.am b/xlators/nfs/server/src/Makefile.am
new file mode 100644
index 000000000..62fbf6535
--- /dev/null
+++ b/xlators/nfs/server/src/Makefile.am
@@ -0,0 +1,24 @@
+xlator_LTLIBRARIES = server.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/nfs
+nfsrpclibdir = $(top_srcdir)/rpc/rpc-lib/src
+server_la_LDFLAGS = -module -avoid-version
+server_la_SOURCES = nfs.c nfs-common.c nfs-fops.c nfs-inodes.c \
+ nfs-generics.c mount3.c nfs3-fh.c nfs3.c nfs3-helpers.c nlm4.c \
+ nlmcbk_svc.c mount3udp_svc.c acl3.c
+server_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = nfs.h nfs-common.h nfs-fops.h nfs-inodes.h nfs-generics.h \
+ mount3.h nfs3-fh.h nfs3.h nfs3-helpers.h nfs-mem-types.h nlm4.h \
+ acl3.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) \
+ -DLIBDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/auth\" \
+ -I$(top_srcdir)/libglusterfs/src \
+ -I$(nfsrpclibdir) -I$(CONTRIBDIR)/rbtree \
+ -I$(top_srcdir)/rpc/xdr/src/
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+AM_LDFLAGS = -L$(xlatordir)
+
+CLEANFILES =
diff --git a/xlators/nfs/server/src/acl3.c b/xlators/nfs/server/src/acl3.c
new file mode 100644
index 000000000..08b099b4e
--- /dev/null
+++ b/xlators/nfs/server/src/acl3.c
@@ -0,0 +1,708 @@
+/*
+ * Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "defaults.h"
+#include "rpcsvc.h"
+#include "dict.h"
+#include "xlator.h"
+#include "nfs.h"
+#include "mem-pool.h"
+#include "logging.h"
+#include "nfs-fops.h"
+#include "inode.h"
+#include "nfs3.h"
+#include "nfs-mem-types.h"
+#include "nfs3-helpers.h"
+#include "nfs3-fh.h"
+#include "nfs-generics.h"
+#include "acl3.h"
+
+
+typedef ssize_t (*acl3_serializer) (struct iovec outmsg, void *args);
+
+extern void nfs3_call_state_wipe (nfs3_call_state_t *cs);
+
+extern nfs3_call_state_t *
+nfs3_call_state_init (struct nfs3_state *s, rpcsvc_request_t *req, xlator_t *v);
+
+extern int
+nfs3_fh_validate (struct nfs3_fh *fh);
+
+extern fattr3
+nfs3_stat_to_fattr3 (struct iatt *buf);
+
+#define acl3_validate_nfs3_state(request, state, status, label, retval) \
+ do { \
+ state = rpcsvc_request_program_private (request); \
+ if (!state) { \
+ gf_log (GF_ACL, GF_LOG_ERROR, "NFSv3 state " \
+ "missing from RPC request"); \
+ rpcsvc_request_seterr (req, SYSTEM_ERR); \
+ status = NFS3ERR_SERVERFAULT; \
+ goto label; \
+ } \
+ } while (0); \
+
+#define acl3_validate_gluster_fh(handle, status, errlabel) \
+ do { \
+ if (!nfs3_fh_validate (handle)) { \
+ status = NFS3ERR_SERVERFAULT; \
+ goto errlabel; \
+ } \
+ } while (0) \
+
+
+extern xlator_t *
+nfs3_fh_to_xlator (struct nfs3_state *nfs3, struct nfs3_fh *fh);
+
+#define acl3_map_fh_to_volume(nfs3state, handle, req, volume, status, label) \
+ do { \
+ char exportid[256], gfid[256]; \
+ rpc_transport_t *trans = NULL; \
+ volume = nfs3_fh_to_xlator ((nfs3state), handle); \
+ if (!volume) { \
+ uuid_unparse (handle->exportid, exportid); \
+ uuid_unparse (handle->gfid, gfid); \
+ trans = rpcsvc_request_transport (req); \
+ gf_log (GF_ACL, GF_LOG_ERROR, "Failed to map " \
+ "FH to vol: client=%s, exportid=%s, gfid=%s",\
+ trans->peerinfo.identifier, exportid, \
+ gfid); \
+ gf_log (GF_ACL, GF_LOG_ERROR, \
+ "Stale nfs client %s must be trying to "\
+ "connect to a deleted volume, please " \
+ "unmount it.", trans->peerinfo.identifier);\
+ status = NFS3ERR_STALE; \
+ goto label; \
+ } else { \
+ gf_log (GF_ACL, GF_LOG_TRACE, "FH to Volume: %s"\
+ ,volume->name); \
+ rpcsvc_request_set_private (req, volume); \
+ } \
+ } while (0); \
+
+#define acl3_volume_started_check(nfs3state, vlm, rtval, erlbl) \
+ do { \
+ if ((!nfs_subvolume_started (nfs_state (nfs3state->nfsx), vlm))){\
+ gf_log (GF_ACL, GF_LOG_ERROR, "Volume is disabled: %s",\
+ vlm->name); \
+ rtval = RPCSVC_ACTOR_IGNORE; \
+ goto erlbl; \
+ } \
+ } while (0) \
+
+#define acl3_check_fh_resolve_status(cst, nfstat, erlabl) \
+ do { \
+ xlator_t *xlatorp = NULL; \
+ char buf[256], gfid[256]; \
+ rpc_transport_t *trans = NULL; \
+ if ((cst)->resolve_ret < 0) { \
+ trans = rpcsvc_request_transport (cst->req); \
+ xlatorp = nfs3_fh_to_xlator (cst->nfs3state, \
+ &cst->resolvefh); \
+ uuid_unparse (cst->resolvefh.gfid, gfid); \
+ snprintf (buf, sizeof (buf), "(%s) %s : %s", \
+ trans->peerinfo.identifier, \
+ xlatorp ? xlatorp->name : "ERR", \
+ gfid); \
+ gf_log (GF_ACL, GF_LOG_ERROR, "Unable to resolve FH"\
+ ": %s", buf); \
+ nfstat = nfs3_errno_to_nfsstat3 (cst->resolve_errno);\
+ goto erlabl; \
+ } \
+ } while (0) \
+
+#define acl3_handle_call_state_init(nfs3state, calls, rq, v, opstat, errlabel)\
+ do { \
+ calls = nfs3_call_state_init ((nfs3state), (rq), v); \
+ if (!calls) { \
+ gf_log (GF_ACL, GF_LOG_ERROR, "Failed to " \
+ "init call state"); \
+ opstat = NFS3ERR_SERVERFAULT; \
+ rpcsvc_request_seterr (req, SYSTEM_ERR); \
+ goto errlabel; \
+ } \
+ } while (0) \
+
+
+int
+acl3svc_submit_reply (rpcsvc_request_t *req, void *arg, acl3_serializer sfunc)
+{
+ struct iovec outmsg = {0, };
+ struct iobuf *iob = NULL;
+ struct nfs3_state *nfs3 = NULL;
+ int ret = -1;
+ ssize_t msglen = 0;
+ struct iobref *iobref = NULL;
+
+ if (!req)
+ return -1;
+
+ nfs3 = (struct nfs3_state *)rpcsvc_request_program_private (req);
+ if (!nfs3) {
+ gf_log (GF_ACL, GF_LOG_ERROR, "mount state not found");
+ goto ret;
+ }
+
+ /* First, get the io buffer into which the reply in arg will
+ * be serialized.
+ */
+ iob = iobuf_get (nfs3->iobpool);
+ if (!iob) {
+ gf_log (GF_ACL, GF_LOG_ERROR, "Failed to get iobuf");
+ goto ret;
+ }
+
+ iobuf_to_iovec (iob, &outmsg);
+ /* Use the given serializer to translate the give C structure in arg
+ * to XDR format which will be written into the buffer in outmsg.
+ */
+ msglen = sfunc (outmsg, arg);
+ if (msglen < 0) {
+ gf_log (GF_ACL, GF_LOG_ERROR, "Failed to encode message");
+ goto ret;
+ }
+ outmsg.iov_len = msglen;
+
+ iobref = iobref_new ();
+ if (iobref == NULL) {
+ gf_log (GF_ACL, GF_LOG_ERROR, "Failed to get iobref");
+ goto ret;
+ }
+
+ ret = iobref_add (iobref, iob);
+ if (ret) {
+ gf_log (GF_ACL, GF_LOG_ERROR, "Failed to add iob to iobref");
+ goto ret;
+ }
+
+ /* Then, submit the message for transmission. */
+ ret = rpcsvc_submit_message (req, &outmsg, 1, NULL, 0, iobref);
+ if (ret == -1) {
+ gf_log (GF_ACL, GF_LOG_ERROR, "Reply submission failed");
+ goto ret;
+ }
+
+ ret = 0;
+ret:
+ if (iob)
+ iobuf_unref (iob);
+ if (iobref)
+ iobref_unref (iobref);
+
+ return ret;
+}
+
+
+int
+acl3svc_null (rpcsvc_request_t *req)
+{
+ struct iovec dummyvec = {0, };
+
+ if (!req) {
+ gf_log (GF_ACL, GF_LOG_ERROR, "Got NULL request!");
+ return 0;
+ }
+ rpcsvc_submit_generic (req, &dummyvec, 1, NULL, 0, NULL);
+ return 0;
+}
+
+int
+acl3_getacl_reply (nfs3_call_state_t *cs, getaclreply *reply)
+{
+ acl3svc_submit_reply (cs->req, (void *)reply,
+ (acl3_serializer)xdr_serialize_getaclreply);
+ return 0;
+}
+
+
+int
+acl3_getacl_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ nfs3_call_state_t *cs = NULL;
+ data_t *data = NULL;
+ int *p = NULL;
+ int i = 0;
+ getaclreply *getaclreply = NULL;
+
+ if (!frame->local) {
+ gf_log (GF_ACL, GF_LOG_ERROR, "Invalid argument,"
+ " frame->local NULL");
+ return EINVAL;
+ }
+ cs = frame->local;
+ getaclreply = &cs->args.getaclreply;
+ if (op_ret == -1) {
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
+ goto err;
+ }
+
+ getaclreply->aclentry.aclentry_val = cs->aclentry;
+ getaclreply->daclentry.daclentry_val = cs->daclentry;
+
+ /* FIXME: use posix_acl_from_xattr() */
+ data = dict_get (dict, POSIX_ACL_ACCESS_XATTR);
+ if (data && (p = data_to_bin (data))) {
+ /* POSIX_ACL_VERSION */
+ p++;
+ while ((char *)p < (data->data + data->len)) {
+ getaclreply->aclentry.aclentry_val[i].type = *(*(short **)&p)++;
+ getaclreply->aclentry.aclentry_val[i].perm = *(*(short **)&p)++;
+ getaclreply->aclentry.aclentry_val[i].uid = *(*(int **)&p)++;
+ i++;
+ }
+ getaclreply->aclcount = getaclreply->aclentry.aclentry_len = i;
+ }
+ i = 0;
+
+ data = dict_get (dict, POSIX_ACL_DEFAULT_XATTR);
+ if (data && (p = data_to_bin (data))) {
+ /* POSIX_ACL_VERSION */
+ p++;
+ while ((char *)p < (data->data + data->len)) {
+ getaclreply->daclentry.daclentry_val[i].type = *(*(short **)&p)++;
+ getaclreply->daclentry.daclentry_val[i].perm = *(*(short **)&p)++;
+ getaclreply->daclentry.daclentry_val[i].uid = *(*(int **)&p)++;
+ i++;
+ }
+ getaclreply->daclcount = getaclreply->daclentry.daclentry_len = i;
+ }
+
+ acl3_getacl_reply (cs, getaclreply);
+ nfs3_call_state_wipe (cs);
+ return 0;
+
+err:
+ if (getaclreply)
+ getaclreply->status = stat;
+ acl3_getacl_reply (cs, getaclreply);
+ nfs3_call_state_wipe (cs);
+ return 0;
+}
+
+int
+acl3_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ nfs3_call_state_t *cs = NULL;
+ getaclreply *getaclreply = NULL;
+ int ret = -1;
+ nfs_user_t nfu = {0, };
+
+ if (!frame->local) {
+ gf_log (GF_ACL, GF_LOG_ERROR, "Invalid argument,"
+ " frame->local NULL");
+ return EINVAL;
+ }
+
+ cs = frame->local;
+ getaclreply = &cs->args.getaclreply;
+
+ if (op_ret == -1) {
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
+ goto err;
+ }
+
+ getaclreply->attr_follows = 1;
+ getaclreply->attr = nfs3_stat_to_fattr3 (buf);
+ getaclreply->mask = 0xf;
+ nfs_request_user_init (&nfu, cs->req);
+ ret = nfs_getxattr (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc, NULL, NULL,
+ acl3_getacl_cbk, cs);
+ if (ret == -1) {
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
+ goto err;
+ }
+ return 0;
+err:
+ getaclreply->status = stat;
+ acl3_getacl_reply (cs, getaclreply);
+ nfs3_call_state_wipe (cs);
+ return 0;
+}
+
+
+int
+acl3_getacl_resume (void *carg)
+{
+ int ret = -1;
+ nfs3_call_state_t *cs = NULL;
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ nfs_user_t nfu = {0, };
+
+ if (!carg)
+ return ret;
+
+ cs = (nfs3_call_state_t *)carg;
+ acl3_check_fh_resolve_status (cs, stat, acl3err);
+ nfs_request_user_init (&nfu, cs->req);
+
+ ret = nfs_stat (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
+ acl3_stat_cbk, cs);
+ stat = -ret;
+acl3err:
+ if (ret < 0) {
+ gf_log (GF_ACL, GF_LOG_ERROR, "unable to open_and_resume");
+ cs->args.getaclreply.status = nfs3_errno_to_nfsstat3 (stat);
+ acl3_getacl_reply (cs, &cs->args.getaclreply);
+ nfs3_call_state_wipe (cs);
+ }
+
+ return ret;
+}
+
+
+int
+acl3svc_getacl (rpcsvc_request_t *req)
+{
+ xlator_t *vol = NULL;
+ struct nfs_state *nfs = NULL;
+ nfs3_state_t *nfs3 = NULL;
+ nfs3_call_state_t *cs = NULL;
+ int ret = RPCSVC_ACTOR_ERROR;
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ struct nfs3_fh fh, *fhp = NULL;
+ getaclargs getaclargs;
+
+ if (!req)
+ return ret;
+
+ acl3_validate_nfs3_state (req, nfs3, stat, rpcerr, ret);
+ nfs = nfs_state (nfs3->nfsx);
+ memset (&getaclargs, 0, sizeof (getaclargs));
+ getaclargs.fh.n_bytes = (char *)&fh;
+ if (xdr_to_getaclargs(req->msg[0], &getaclargs) <= 0) {
+ gf_log (GF_ACL, GF_LOG_ERROR, "Error decoding args");
+ rpcsvc_request_seterr (req, GARBAGE_ARGS);
+ goto rpcerr;
+ }
+ fhp = &fh;
+ acl3_validate_gluster_fh (&fh, stat, acl3err);
+ acl3_map_fh_to_volume (nfs->nfs3state, fhp, req,
+ vol, stat, acl3err);
+ acl3_handle_call_state_init (nfs->nfs3state, cs, req,
+ vol, stat, rpcerr);
+
+ cs->vol = vol;
+ acl3_volume_started_check (nfs3, vol, ret, acl3err);
+
+ ret = nfs3_fh_resolve_and_resume (cs, fhp,
+ NULL, acl3_getacl_resume);
+
+acl3err:
+ if (ret < 0) {
+ gf_log (GF_ACL, GF_LOG_ERROR, "unable to resolve and resume");
+ if (cs) {
+ cs->args.getaclreply.status = stat;
+ acl3_getacl_reply (cs, &cs->args.getaclreply);
+ nfs3_call_state_wipe (cs);
+ }
+ return 0;
+ }
+
+rpcerr:
+ return ret;
+}
+
+int
+acl3_setacl_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
+{
+ nfs3_call_state_t *cs = NULL;
+ cs = frame->local;
+ if (op_ret < 0) {
+ nfsstat3 status = nfs3_cbk_errno_status (op_ret, op_errno);
+ cs->args.setaclreply.status = status;
+ }
+
+ acl3svc_submit_reply (cs->req, (void *)&cs->args.setaclreply,
+ (acl3_serializer)xdr_serialize_setaclreply);
+ return 0;
+}
+
+int
+acl3_setacl_resume (void *carg)
+{
+ int ret = -1;
+ nfs3_call_state_t *cs = NULL;
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ nfs_user_t nfu = {0, };
+ dict_t *xattr = NULL;
+
+ if (!carg)
+ return ret;
+ cs = (nfs3_call_state_t *)carg;
+ acl3_check_fh_resolve_status (cs, stat, acl3err);
+ nfs_request_user_init (&nfu, cs->req);
+ xattr = dict_new();
+ if (cs->aclcount)
+ ret = dict_set_static_bin (xattr, POSIX_ACL_ACCESS_XATTR, cs->aclxattr,
+ cs->aclcount * 8 + 4);
+ if (cs->daclcount)
+ ret = dict_set_static_bin (xattr, POSIX_ACL_DEFAULT_XATTR,
+ cs->daclxattr, cs->daclcount * 8 + 4);
+
+ ret = nfs_setxattr (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc, xattr,
+ 0, NULL, acl3_setacl_cbk, cs);
+ dict_unref (xattr);
+
+acl3err:
+ if (ret < 0) {
+ stat = -ret;
+ gf_log (GF_ACL, GF_LOG_ERROR, "unable to open_and_resume");
+ cs->args.setaclreply.status = nfs3_errno_to_nfsstat3 (stat);
+ acl3svc_submit_reply (cs->req, (void *)&cs->args.setaclreply,
+ (acl3_serializer)xdr_serialize_setaclreply);
+ nfs3_call_state_wipe (cs);
+ }
+
+ return ret;
+}
+
+
+int
+acl3svc_setacl (rpcsvc_request_t *req)
+{
+ xlator_t *vol = NULL;
+ struct nfs_state *nfs = NULL;
+ nfs3_state_t *nfs3 = NULL;
+ nfs3_call_state_t *cs = NULL;
+ int ret = RPCSVC_ACTOR_ERROR;
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ struct nfs3_fh fh;
+ struct nfs3_fh *fhp = NULL;
+ setaclargs setaclargs;
+ aclentry *aclentry = NULL;
+ struct aclentry *daclentry = NULL;
+ int i = 0;
+ struct posix_acl_xattr_header *bufheader = NULL;
+ struct posix_acl_xattr_entry *bufentry = NULL;
+
+ if (!req)
+ return ret;
+ aclentry = GF_CALLOC (NFS_ACL_MAX_ENTRIES, sizeof(*aclentry),
+ gf_nfs_mt_arr);
+ if (!aclentry) {
+ goto rpcerr;
+ }
+ daclentry = GF_CALLOC (NFS_ACL_MAX_ENTRIES, sizeof(*daclentry),
+ gf_nfs_mt_arr);
+ if (!daclentry) {
+ goto rpcerr;
+ }
+
+ acl3_validate_nfs3_state (req, nfs3, stat, rpcerr, ret);
+ nfs = nfs_state (nfs3->nfsx);
+ memset (&setaclargs, 0, sizeof (setaclargs));
+ memset (&fh, 0, sizeof (fh));
+ setaclargs.fh.n_bytes = (char *)&fh;
+ setaclargs.aclentry.aclentry_val = aclentry;
+ setaclargs.daclentry.daclentry_val = daclentry;
+ if (xdr_to_setaclargs(req->msg[0], &setaclargs) <= 0) {
+ gf_log (GF_ACL, GF_LOG_ERROR, "Error decoding args");
+ rpcsvc_request_seterr (req, GARBAGE_ARGS);
+ goto rpcerr;
+ }
+ fhp = &fh;
+ acl3_validate_gluster_fh (fhp, stat, acl3err);
+ acl3_map_fh_to_volume (nfs->nfs3state, fhp, req,
+ vol, stat, acl3err);
+ acl3_handle_call_state_init (nfs->nfs3state, cs, req,
+ vol, stat, rpcerr);
+
+ cs->vol = vol;
+ acl3_volume_started_check (nfs3, vol, ret, rpcerr);
+
+ cs->aclcount = setaclargs.aclcount;
+ cs->daclcount = setaclargs.daclcount;
+
+ if ((cs->aclcount > NFS_ACL_MAX_ENTRIES) ||
+ (cs->daclcount > NFS_ACL_MAX_ENTRIES))
+ goto acl3err;
+ /* FIXME: use posix_acl_to_xattr() */
+ /* Populate xattr buffer for user ACL */
+ bufheader = (struct posix_acl_xattr_header *)(cs->aclxattr);
+ bufheader->version = htole32(POSIX_ACL_VERSION);
+ bufentry = bufheader->entries;
+ for (i = 0; i < cs->aclcount; i++) {
+ int uaceuid;
+ const struct aclentry *uace = &aclentry[i];
+ switch (uace->type) {
+ case POSIX_ACL_USER:
+ case POSIX_ACL_GROUP:
+ uaceuid = uace->uid;
+ break;
+ default:
+ uaceuid = POSIX_ACL_UNDEFINED_ID;
+ break;
+ }
+ bufentry->tag = htole16(uace->type);
+ bufentry->perm = htole16(uace->perm);
+ bufentry->id = htole32(uaceuid);
+
+ bufentry++;
+ }
+
+ /* Populate xattr buffer for Default ACL */
+ bufheader = (struct posix_acl_xattr_header *)(cs->aclxattr);
+ bufheader->version = htole32(POSIX_ACL_VERSION);
+ bufentry = bufheader->entries;
+ for (i = 0; i < cs->daclcount; i++) {
+ int daceuid;
+ int dacetype;
+ const struct aclentry *dace = &daclentry[i];
+ /*
+ * For "default ACL", NFSv3 handles the 'type' differently
+ * i.e. by logical OR'ing 'type' with NFS_ACL_DEFAULT.
+ * Which the backend File system does not understand and
+ * that needs to be masked OFF.
+ */
+ dacetype = (dace->type & ~(NFS_ACL_DEFAULT));
+ switch (dacetype) {
+ case POSIX_ACL_USER:
+ case POSIX_ACL_GROUP:
+ daceuid = dace->uid;
+ break;
+ default:
+ daceuid = POSIX_ACL_UNDEFINED_ID;
+ break;
+ }
+ bufentry->tag = htole16(dacetype);
+ bufentry->perm = htole16(dace->perm);
+ bufentry->id = htole32(daceuid);
+
+ bufentry++;
+ }
+
+
+ ret = nfs3_fh_resolve_and_resume (cs, fhp,
+ NULL, acl3_setacl_resume);
+
+acl3err:
+ if (ret < 0) {
+ gf_log (GF_ACL, GF_LOG_ERROR, "unable to resolve and resume");
+ cs->args.setaclreply.status = stat;
+ acl3svc_submit_reply (cs->req, (void *)&cs->args.setaclreply,
+ (acl3_serializer)xdr_serialize_setaclreply);
+ nfs3_call_state_wipe (cs);
+ GF_FREE(aclentry);
+ GF_FREE(daclentry);
+ return 0;
+ }
+
+rpcerr:
+ if (ret < 0)
+ nfs3_call_state_wipe (cs);
+ if (aclentry)
+ GF_FREE (aclentry);
+ if (daclentry)
+ GF_FREE (daclentry);
+ return ret;
+}
+
+
+
+rpcsvc_actor_t acl3svc_actors[ACL3_PROC_COUNT] = {
+ {"NULL", ACL3_NULL, acl3svc_null, NULL, 0},
+ {"GETACL", ACL3_GETACL, acl3svc_getacl, NULL, 0},
+ {"SETACL", ACL3_SETACL, acl3svc_setacl, NULL, 0},
+};
+
+rpcsvc_program_t acl3prog = {
+ .progname = "ACL3",
+ .prognum = ACL_PROGRAM,
+ .progver = ACLV3_VERSION,
+ .progport = GF_NFS3_PORT,
+ .actors = acl3svc_actors,
+ .numactors = ACL3_PROC_COUNT,
+ .min_auth = AUTH_NULL,
+};
+
+rpcsvc_program_t *
+acl3svc_init(xlator_t *nfsx)
+{
+ struct nfs3_state *ns = NULL;
+ struct nfs_state *nfs = NULL;
+ dict_t *options = NULL;
+ int ret = -1;
+ char *portstr = NULL;
+ static gf_boolean_t acl3_inited = _gf_false;
+
+ /* Already inited */
+ if (acl3_inited)
+ return &acl3prog;
+
+ nfs = (struct nfs_state*)nfsx->private;
+
+ ns = nfs->nfs3state;
+ if (!ns) {
+ gf_log (GF_ACL, GF_LOG_ERROR, "ACL3 init failed");
+ goto err;
+ }
+ acl3prog.private = ns;
+
+ options = dict_new ();
+
+ ret = gf_asprintf (&portstr, "%d", GF_ACL3_PORT);
+ if (ret == -1)
+ goto err;
+
+ ret = dict_set_dynstr (options, "transport.socket.listen-port",
+ portstr);
+ if (ret == -1)
+ goto err;
+ ret = dict_set_str (options, "transport-type", "socket");
+ if (ret == -1) {
+ gf_log (GF_ACL, GF_LOG_ERROR, "dict_set_str error");
+ goto err;
+ }
+
+ if (nfs->allow_insecure) {
+ ret = dict_set_str (options, "rpc-auth-allow-insecure", "on");
+ if (ret == -1) {
+ gf_log (GF_ACL, GF_LOG_ERROR, "dict_set_str error");
+ goto err;
+ }
+ ret = dict_set_str (options, "rpc-auth.ports.insecure", "on");
+ if (ret == -1) {
+ gf_log (GF_ACL, GF_LOG_ERROR, "dict_set_str error");
+ goto err;
+ }
+ }
+
+ ret = dict_set_str (options, "transport.address-family", "inet");
+ if (ret == -1) {
+ gf_log (GF_ACL, GF_LOG_ERROR, "dict_set_str error");
+ goto err;
+ }
+
+ ret = rpcsvc_create_listeners (nfs->rpcsvc, options, "ACL");
+ if (ret == -1) {
+ gf_log (GF_ACL, GF_LOG_ERROR, "Unable to create listeners");
+ dict_unref (options);
+ goto err;
+ }
+
+ acl3_inited = _gf_true;
+ return &acl3prog;
+err:
+ return NULL;
+}
diff --git a/xlators/nfs/server/src/acl3.h b/xlators/nfs/server/src/acl3.h
new file mode 100644
index 000000000..e0e61281a
--- /dev/null
+++ b/xlators/nfs/server/src/acl3.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#ifndef _ACL3_H
+#define _ACL3_H
+
+#include "glusterfs-acl.h"
+
+#define GF_ACL3_PORT 38469
+#define GF_ACL GF_NFS"-ACL"
+
+/*
+ * NFSv3, identifies the default ACL by NFS_ACL_DEFAULT. Gluster
+ * NFS needs to mask it OFF before sending it upto POSIX layer
+ * or File system layer.
+ */
+#define NFS_ACL_DEFAULT 0x1000
+
+#define NFS_ACL_MAX_ENTRIES 1024
+
+rpcsvc_program_t *
+acl3svc_init(xlator_t *nfsx);
+
+#endif
diff --git a/xlators/nfs/server/src/mount3.c b/xlators/nfs/server/src/mount3.c
new file mode 100644
index 000000000..b0824bf10
--- /dev/null
+++ b/xlators/nfs/server/src/mount3.c
@@ -0,0 +1,2759 @@
+/*
+ Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "rpcsvc.h"
+#include "dict.h"
+#include "xlator.h"
+#include "mount3.h"
+#include "xdr-nfs3.h"
+#include "msg-nfs3.h"
+#include "iobuf.h"
+#include "nfs-common.h"
+#include "nfs3-fh.h"
+#include "nfs-fops.h"
+#include "nfs-inodes.h"
+#include "nfs-generics.h"
+#include "locking.h"
+#include "iatt.h"
+#include "nfs-mem-types.h"
+#include "nfs.h"
+#include "common-utils.h"
+#include "store.h"
+
+#include <errno.h>
+#include <sys/socket.h>
+#include <sys/uio.h>
+
+
+#define IPv4_ADDR_SIZE 32
+
+/* Macro to typecast the parameter to struct sockaddr_in
+ */
+#define SA(addr) ((struct sockaddr_in*)(addr))
+
+/* Macro will mask the ip address with netmask.
+ */
+#define MASKED_IP(ipv4addr, netmask) \
+ (ntohl(SA(ipv4addr)->sin_addr.s_addr) & (netmask))
+
+/* Macro will compare two IP address after applying the mask
+ */
+#define COMPARE_IPv4_ADDRS(ip1, ip2, netmask) \
+ ((MASKED_IP(ip1, netmask)) == (MASKED_IP(ip2, netmask)))
+
+/* This macro will assist in freeing up entire link list
+ * of host_auth_spec structure.
+ */
+#define FREE_HOSTSPEC(exp) do { \
+ struct host_auth_spec *host= exp->hostspec; \
+ while (NULL != host){ \
+ struct host_auth_spec* temp = host; \
+ host = host->next; \
+ if (NULL != temp->host_addr) { \
+ GF_FREE (temp->host_addr); \
+ } \
+ GF_FREE (temp); \
+ } \
+ exp->hostspec = NULL; \
+ } while (0)
+
+typedef ssize_t (*mnt3_serializer) (struct iovec outmsg, void *args);
+
+extern void *
+mount3udp_thread (void *argv);
+
+static inline void
+mnt3_export_free (struct mnt3_export *exp)
+{
+ if (!exp)
+ return;
+
+ if (exp->exptype == MNT3_EXPTYPE_DIR)
+ FREE_HOSTSPEC (exp);
+ GF_FREE (exp->expname);
+ GF_FREE (exp);
+}
+
+/* Generic reply function for MOUNTv3 specific replies. */
+int
+mnt3svc_submit_reply (rpcsvc_request_t *req, void *arg, mnt3_serializer sfunc)
+{
+ struct iovec outmsg = {0, };
+ struct iobuf *iob = NULL;
+ struct mount3_state *ms = NULL;
+ int ret = -1;
+ ssize_t msglen = 0;
+ struct iobref *iobref = NULL;
+
+ if (!req)
+ return -1;
+
+ ms = (struct mount3_state *)rpcsvc_request_program_private (req);
+ if (!ms) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "mount state not found");
+ goto ret;
+ }
+
+ /* First, get the io buffer into which the reply in arg will
+ * be serialized.
+ */
+ /* TODO: use 'xdrproc_t' instead of 'sfunc' to get the xdr-size */
+ iob = iobuf_get (ms->iobpool);
+ if (!iob) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Failed to get iobuf");
+ goto ret;
+ }
+
+ iobuf_to_iovec (iob, &outmsg);
+ /* Use the given serializer to translate the give C structure in arg
+ * to XDR format which will be written into the buffer in outmsg.
+ */
+ msglen = sfunc (outmsg, arg);
+ if (msglen < 0) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Failed to encode message");
+ goto ret;
+ }
+ outmsg.iov_len = msglen;
+
+ iobref = iobref_new ();
+ if (iobref == NULL) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Failed to get iobref");
+ goto ret;
+ }
+
+ ret = iobref_add (iobref, iob);
+ if (ret) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Failed to add iob to iobref");
+ goto ret;
+ }
+
+ /* Then, submit the message for transmission. */
+ ret = rpcsvc_submit_message (req, &outmsg, 1, NULL, 0, iobref);
+ if (ret == -1) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Reply submission failed");
+ goto ret;
+ }
+
+ ret = 0;
+ret:
+ if (NULL != iob)
+ iobuf_unref (iob);
+ if (NULL != iobref)
+ iobref_unref (iobref);
+
+ return ret;
+}
+
+
+/* Generic error reply function, just pass the err status
+ * and it will do the rest, including transmission.
+ */
+int
+mnt3svc_mnt_error_reply (rpcsvc_request_t *req, int mntstat)
+{
+ mountres3 res;
+
+ if (!req)
+ return -1;
+
+ res.fhs_status = mntstat;
+ mnt3svc_submit_reply (req, (void *)&res,
+ (mnt3_serializer)xdr_serialize_mountres3);
+
+ return 0;
+}
+
+
+mountstat3
+mnt3svc_errno_to_mnterr (int32_t errnum)
+{
+ mountstat3 stat;
+
+ switch (errnum) {
+
+ case 0:
+ stat = MNT3_OK;
+ break;
+ case ENOENT:
+ stat = MNT3ERR_NOENT;
+ break;
+ case EPERM:
+ stat = MNT3ERR_PERM;
+ break;
+ case EIO:
+ stat = MNT3ERR_IO;
+ break;
+ case EACCES:
+ stat = MNT3ERR_ACCES;
+ break;
+ case ENOTDIR:
+ stat = MNT3ERR_NOTDIR;
+ break;
+ case EINVAL:
+ stat = MNT3ERR_INVAL;
+ break;
+ case ENOSYS:
+ stat = MNT3ERR_NOTSUPP;
+ break;
+ case ENOMEM:
+ stat = MNT3ERR_SERVERFAULT;
+ break;
+ default:
+ stat = MNT3ERR_SERVERFAULT;
+ break;
+ }
+
+ return stat;
+}
+
+
+mountres3
+mnt3svc_set_mountres3 (mountstat3 stat, struct nfs3_fh *fh, int *authflavor,
+ u_int aflen)
+{
+ mountres3 res = {0, };
+ uint32_t fhlen = 0;
+
+ res.fhs_status = stat;
+ fhlen = nfs3_fh_compute_size (fh);
+ res.mountres3_u.mountinfo.fhandle.fhandle3_len = fhlen;
+ res.mountres3_u.mountinfo.fhandle.fhandle3_val = (char *)fh;
+ res.mountres3_u.mountinfo.auth_flavors.auth_flavors_val = authflavor;
+ res.mountres3_u.mountinfo.auth_flavors.auth_flavors_len = aflen;
+
+ return res;
+}
+
+/* Read the rmtab from the store_handle and append (or not) the entries to the
+ * mountlist.
+ *
+ * Requires the store_handle to be locked.
+ */
+static int
+__mount_read_rmtab (gf_store_handle_t *sh, struct list_head *mountlist,
+ gf_boolean_t append)
+{
+ int ret = 0;
+ unsigned int idx = 0;
+ struct mountentry *me = NULL, *tmp = NULL;
+ /* me->hostname is a char[MNTPATHLEN] */
+ char key[MNTPATHLEN + 11];
+
+ GF_ASSERT (sh && mountlist);
+
+ if (!gf_store_locked_local (sh)) {
+ gf_log (GF_MNT, GF_LOG_WARNING, "Not reading unlocked %s",
+ sh->path);
+ return -1;
+ }
+
+ if (!append) {
+ list_for_each_entry_safe (me, tmp, mountlist, mlist) {
+ list_del (&me->mlist);
+ GF_FREE (me);
+ }
+ me = NULL;
+ }
+
+ for (;;) {
+ char *value = NULL;
+
+ if (me && append) {
+ /* do not add duplicates */
+ list_for_each_entry (tmp, mountlist, mlist) {
+ if (!strcmp(tmp->hostname, me->hostname) &&
+ !strcmp(tmp->exname, me->exname)) {
+ GF_FREE (me);
+ goto dont_add;
+ }
+ }
+ list_add_tail (&me->mlist, mountlist);
+ } else if (me) {
+ list_add_tail (&me->mlist, mountlist);
+ }
+
+dont_add:
+ me = GF_CALLOC (1, sizeof (*me), gf_nfs_mt_mountentry);
+ if (!me) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Out of memory");
+ ret = -1;
+ goto out;
+ }
+
+ INIT_LIST_HEAD (&me->mlist);
+
+ snprintf (key, 9 + MNTPATHLEN, "hostname-%d", idx);
+ ret = gf_store_retrieve_value (sh, key, &value);
+ if (ret)
+ break;
+ strncpy (me->hostname, value, MNTPATHLEN);
+ GF_FREE (value);
+
+ snprintf (key, 11 + MNTPATHLEN, "mountpoint-%d", idx);
+ ret = gf_store_retrieve_value (sh, key, &value);
+ if (ret)
+ break;
+ strncpy (me->exname, value, MNTPATHLEN);
+ GF_FREE (value);
+
+ idx++;
+ gf_log (GF_MNT, GF_LOG_TRACE, "Read entries %s:%s", me->hostname, me->exname);
+ }
+ gf_log (GF_MNT, GF_LOG_DEBUG, "Read %d entries from '%s'", idx, sh->path);
+ GF_FREE (me);
+out:
+ return ret;
+}
+
+/* Overwrite the contents of the rwtab with te in-memory client list.
+ * Fail gracefully if the stora_handle is not locked.
+ */
+static void
+__mount_rewrite_rmtab(struct mount3_state *ms, gf_store_handle_t *sh)
+{
+ struct mountentry *me = NULL;
+ char key[16];
+ int fd, ret;
+ unsigned int idx = 0;
+
+ if (!gf_store_locked_local (sh)) {
+ gf_log (GF_MNT, GF_LOG_WARNING, "Not modifying unlocked %s",
+ sh->path);
+ return;
+ }
+
+ fd = gf_store_mkstemp (sh);
+ if (fd == -1) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Failed to open %s", sh->path);
+ return;
+ }
+
+ list_for_each_entry (me, &ms->mountlist, mlist) {
+ snprintf (key, 16, "hostname-%d", idx);
+ ret = gf_store_save_value (fd, key, me->hostname);
+ if (ret)
+ goto fail;
+
+ snprintf (key, 16, "mountpoint-%d", idx);
+ ret = gf_store_save_value (fd, key, me->exname);
+ if (ret)
+ goto fail;
+
+ idx++;
+ }
+
+ gf_log (GF_MNT, GF_LOG_DEBUG, "Updated rmtab with %d entries", idx);
+
+ close (fd);
+ if (gf_store_rename_tmppath (sh))
+ gf_log (GF_MNT, GF_LOG_ERROR, "Failed to overwrite rwtab %s",
+ sh->path);
+
+ return;
+
+fail:
+ gf_log (GF_MNT, GF_LOG_ERROR, "Failed to update %s", sh->path);
+ close (fd);
+ gf_store_unlink_tmppath (sh);
+}
+
+/* Read the rmtab into a clean ms->mountlist.
+ */
+static void
+mount_read_rmtab (struct mount3_state *ms)
+{
+ gf_store_handle_t *sh = NULL;
+ struct nfs_state *nfs = NULL;
+ int ret;
+
+ nfs = (struct nfs_state *)ms->nfsx->private;
+
+ ret = gf_store_handle_new (nfs->rmtab, &sh);
+ if (ret) {
+ gf_log (GF_MNT, GF_LOG_WARNING, "Failed to open '%s'",
+ nfs->rmtab);
+ return;
+ }
+
+ if (gf_store_lock (sh)) {
+ gf_log (GF_MNT, GF_LOG_WARNING, "Failed to lock '%s'",
+ nfs->rmtab);
+ goto out;
+ }
+
+ __mount_read_rmtab (sh, &ms->mountlist, _gf_false);
+ gf_store_unlock (sh);
+
+out:
+ gf_store_handle_destroy (sh);
+}
+
+/* Write the ms->mountlist to the rmtab.
+ *
+ * The rmtab could be empty, or it can exists and have been updated by a
+ * different storage server without our knowing.
+ *
+ * 1. takes the store_handle lock on the current rmtab
+ * - blocks if an other storage server rewrites the rmtab at the same time
+ * 2. [if new_rmtab] takes the store_handle lock on the new rmtab
+ * 3. reads/merges the entries from the current rmtab
+ * 4. [if new_rmtab] reads/merges the entries from the new rmtab
+ * 5. [if new_rmtab] writes the new rmtab
+ * 6. [if not new_rmtab] writes the current rmtab
+ * 7 [if new_rmtab] replaces nfs->rmtab to point to the new location
+ * 8. [if new_rmtab] releases the store_handle lock of the new rmtab
+ * 9. releases the store_handle lock of the old rmtab
+ */
+void
+mount_rewrite_rmtab (struct mount3_state *ms, char *new_rmtab)
+{
+ gf_store_handle_t *sh = NULL, *nsh = NULL;
+ struct nfs_state *nfs = NULL;
+ int ret;
+ char *rmtab = NULL;
+
+ nfs = (struct nfs_state *)ms->nfsx->private;
+
+ ret = gf_store_handle_new (nfs->rmtab, &sh);
+ if (ret) {
+ gf_log (GF_MNT, GF_LOG_WARNING, "Failed to open '%s'",
+ nfs->rmtab);
+ return;
+ }
+
+ if (gf_store_lock (sh)) {
+ gf_log (GF_MNT, GF_LOG_WARNING, "Not rewriting '%s'",
+ nfs->rmtab);
+ goto free_sh;
+ }
+
+ if (new_rmtab) {
+ ret = gf_store_handle_new (new_rmtab, &nsh);
+ if (ret) {
+ gf_log (GF_MNT, GF_LOG_WARNING, "Failed to open '%s'",
+ new_rmtab);
+ goto unlock_sh;
+ }
+
+ if (gf_store_lock (nsh)) {
+ gf_log (GF_MNT, GF_LOG_WARNING, "Not rewriting '%s'",
+ new_rmtab);
+ goto free_nsh;
+ }
+ }
+
+ /* always read the currently used rmtab */
+ __mount_read_rmtab (sh, &ms->mountlist, _gf_true);
+
+ if (new_rmtab) {
+ /* read the new rmtab and write changes to the new location */
+ __mount_read_rmtab (nsh, &ms->mountlist, _gf_true);
+ __mount_rewrite_rmtab (ms, nsh);
+
+ /* replace the nfs->rmtab reference to the new rmtab */
+ rmtab = gf_strdup(new_rmtab);
+ if (rmtab == NULL) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Out of memory, keeping "
+ "%s as rmtab", nfs->rmtab);
+ } else {
+ GF_FREE (nfs->rmtab);
+ nfs->rmtab = new_rmtab;
+ }
+
+ gf_store_unlock (nsh);
+ } else {
+ /* rewrite the current (unchanged location) rmtab */
+ __mount_rewrite_rmtab (ms, sh);
+ }
+
+free_nsh:
+ if (new_rmtab)
+ gf_store_handle_destroy (nsh);
+unlock_sh:
+ gf_store_unlock (sh);
+free_sh:
+ gf_store_handle_destroy (sh);
+}
+
+/* Add a new NFS-client to the ms->mountlist and update the rmtab if we can.
+ *
+ * A NFS-client will only be removed from the ms->mountlist in case the
+ * NFS-client sends a unmount request. It is possible that a NFS-client
+ * crashed/rebooted had network loss or something else prevented the NFS-client
+ * to unmount cleanly. In this case, a duplicate entry would be added to the
+ * ms->mountlist, which is wrong and we should prevent.
+ *
+ * It is fully acceptible that the ms->mountlist is not 100% correct, this is a
+ * common issue for all(?) NFS-servers.
+ */
+int
+mnt3svc_update_mountlist (struct mount3_state *ms, rpcsvc_request_t *req,
+ char *expname)
+{
+ struct mountentry *me = NULL;
+ struct mountentry *cur = NULL;
+ int ret = -1;
+ char *colon = NULL;
+ struct nfs_state *nfs = NULL;
+ gf_store_handle_t *sh = NULL;
+
+ if ((!ms) || (!req) || (!expname))
+ return -1;
+
+ me = (struct mountentry *)GF_CALLOC (1, sizeof (*me),
+ gf_nfs_mt_mountentry);
+ if (!me)
+ return -1;
+
+ nfs = (struct nfs_state *)ms->nfsx->private;
+
+ ret = gf_store_handle_new (nfs->rmtab, &sh);
+ if (ret) {
+ gf_log (GF_MNT, GF_LOG_WARNING, "Failed to open '%s'",
+ nfs->rmtab);
+ goto free_err;
+ }
+
+ strncpy (me->exname, expname, MNTPATHLEN);
+
+ INIT_LIST_HEAD (&me->mlist);
+ /* Must get the IP or hostname of the client so we
+ * can map it into the mount entry.
+ */
+ ret = rpcsvc_transport_peername (req->trans, me->hostname, MNTPATHLEN);
+ if (ret == -1)
+ goto free_err2;
+
+ colon = strrchr (me->hostname, ':');
+ if (colon) {
+ *colon = '\0';
+ }
+ LOCK (&ms->mountlock);
+ {
+ /* in case locking fails, we just don't write the rmtab */
+ if (gf_store_lock (sh)) {
+ gf_log (GF_MNT, GF_LOG_WARNING, "Failed to lock '%s'"
+ ", changes will not be written", nfs->rmtab);
+ } else {
+ __mount_read_rmtab (sh, &ms->mountlist, _gf_false);
+ }
+
+ /* do not add duplicates */
+ list_for_each_entry (cur, &ms->mountlist, mlist) {
+ if (!strcmp(cur->hostname, me->hostname) &&
+ !strcmp(cur->exname, me->exname)) {
+ GF_FREE (me);
+ goto dont_add;
+ }
+ }
+ list_add_tail (&me->mlist, &ms->mountlist);
+
+ /* only write the rmtab in case it was locked */
+ if (gf_store_locked_local (sh))
+ __mount_rewrite_rmtab (ms, sh);
+ }
+dont_add:
+ if (gf_store_locked_local (sh))
+ gf_store_unlock (sh);
+
+ UNLOCK (&ms->mountlock);
+
+free_err2:
+ gf_store_handle_destroy (sh);
+
+free_err:
+ if (ret == -1)
+ GF_FREE (me);
+
+ return ret;
+}
+
+
+int
+__mnt3_get_volume_id (struct mount3_state *ms, xlator_t *mntxl,
+ uuid_t volumeid)
+{
+ int ret = -1;
+ struct mnt3_export *exp = NULL;
+
+ if ((!ms) || (!mntxl))
+ return ret;
+
+ LOCK (&ms->mountlock);
+ list_for_each_entry (exp, &ms->exportlist, explist) {
+ if (exp->vol == mntxl) {
+ uuid_copy (volumeid, exp->volumeid);
+ ret = 0;
+ goto out;
+ }
+ }
+
+out:
+ UNLOCK (&ms->mountlock);
+ return ret;
+}
+
+
+int32_t
+mnt3svc_lookup_mount_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ mountres3 res = {0, };
+ rpcsvc_request_t *req = NULL;
+ struct nfs3_fh fh = {{0}, };
+ struct mount3_state *ms = NULL;
+ mountstat3 status = 0;
+ int autharr[10];
+ int autharrlen = 0;
+ rpcsvc_t *svc = NULL;
+ xlator_t *mntxl = NULL;
+ uuid_t volumeid = {0, };
+ char fhstr[1024], *path = NULL;
+
+ req = (rpcsvc_request_t *)frame->local;
+
+ if (!req)
+ return -1;
+
+ mntxl = (xlator_t *)cookie;
+ ms = (struct mount3_state *)rpcsvc_request_program_private (req);
+ if (!ms) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "mount state not found");
+ op_ret = -1;
+ op_errno = EINVAL;
+ }
+
+ if (op_ret == -1) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "error=%s", strerror (op_errno));
+ status = mnt3svc_errno_to_mnterr (op_errno);
+ }
+ if (status != MNT3_OK)
+ goto xmit_res;
+
+ path = GF_CALLOC (PATH_MAX, sizeof (char), gf_nfs_mt_char);
+ if (!path) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Out of memory");
+ goto xmit_res;
+ }
+
+ snprintf (path, PATH_MAX, "/%s", mntxl->name);
+ mnt3svc_update_mountlist (ms, req, path);
+ GF_FREE (path);
+ if (gf_nfs_dvm_off (nfs_state (ms->nfsx))) {
+ fh = nfs3_fh_build_indexed_root_fh (ms->nfsx->children, mntxl);
+ goto xmit_res;
+ }
+
+ __mnt3_get_volume_id (ms, mntxl, volumeid);
+ fh = nfs3_fh_build_uuid_root_fh (volumeid);
+
+xmit_res:
+ nfs3_fh_to_str (&fh, fhstr, sizeof (fhstr));
+ gf_log (GF_MNT, GF_LOG_DEBUG, "MNT reply: fh %s, status: %d", fhstr,
+ status);
+ if (op_ret == 0) {
+ svc = rpcsvc_request_service (req);
+ autharrlen = rpcsvc_auth_array (svc, mntxl->name, autharr,
+ 10);
+ }
+
+ res = mnt3svc_set_mountres3 (status, &fh, autharr, autharrlen);
+ mnt3svc_submit_reply (req, (void *)&res,
+ (mnt3_serializer)xdr_serialize_mountres3);
+
+ return 0;
+}
+
+
+int
+mnt3_match_dirpath_export (char *expname, char *dirpath)
+{
+ int ret = 0;
+ size_t dlen;
+
+ if ((!expname) || (!dirpath))
+ return 0;
+
+ /* Some clients send a dirpath for mount that includes the slash at the
+ * end. String compare for searching the export will fail because our
+ * exports list does not include that slash. Remove the slash to
+ * compare.
+ */
+ dlen = strlen (dirpath);
+ if (dlen && dirpath [dlen - 1] == '/')
+ dirpath [dlen - 1] = '\0';
+
+ if (dirpath[0] != '/')
+ expname++;
+
+ if (strcmp (expname, dirpath) == 0)
+ ret = 1;
+
+ return ret;
+}
+
+
+int
+mnt3svc_mount_inode (rpcsvc_request_t *req, struct mount3_state *ms,
+ xlator_t * xl, inode_t *exportinode)
+{
+ int ret = -EFAULT;
+ nfs_user_t nfu = {0, };
+ loc_t exportloc = {0, };
+
+ if ((!req) || (!xl) || (!ms) || (!exportinode))
+ return ret;
+
+ ret = nfs_inode_loc_fill (exportinode, &exportloc, NFS_RESOLVE_EXIST);
+ if (ret < 0) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Loc fill failed for export inode"
+ ": gfid %s, volume: %s",
+ uuid_utoa (exportinode->gfid), xl->name);
+ goto err;
+ }
+
+ /* To service the mount request, all we need to do
+ * is to send a lookup fop that returns the stat
+ * for the root of the child volume. This is
+ * used to build the root fh sent to the client.
+ */
+ nfs_request_user_init (&nfu, req);
+ ret = nfs_lookup (ms->nfsx, xl, &nfu, &exportloc,
+ mnt3svc_lookup_mount_cbk, (void *)req);
+
+ nfs_loc_wipe (&exportloc);
+err:
+ return ret;
+}
+
+
+/* For a volume mount request, we just have to create loc on the root inode,
+ * and send a lookup. In the lookup callback the mount reply is send along with
+ * the file handle.
+ */
+int
+mnt3svc_volume_mount (rpcsvc_request_t *req, struct mount3_state *ms,
+ struct mnt3_export *exp)
+{
+ inode_t *exportinode = NULL;
+ int ret = -EFAULT;
+ uuid_t rootgfid = {0, };
+
+ if ((!req) || (!exp) || (!ms))
+ return ret;
+
+ rootgfid[15] = 1;
+ exportinode = inode_find (exp->vol->itable, rootgfid);
+ if (!exportinode) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Failed to get root inode");
+ ret = -ENOENT;
+ goto err;
+ }
+
+ ret = mnt3svc_mount_inode (req, ms, exp->vol, exportinode);
+ inode_unref (exportinode);
+
+err:
+ return ret;
+}
+
+
+/* The catch with directory exports is that the first component of the export
+ * name will be the name of the volume.
+ * Any lookup that needs to be performed to build the directory's file handle
+ * needs to start from the directory path from the root of the volume. For that
+ * we need to strip out the volume name first.
+ */
+char *
+__volume_subdir (char *dirpath, char **volname)
+{
+ char *subdir = NULL;
+ int volname_len = 0;
+
+ if (!dirpath)
+ return NULL;
+
+ if (dirpath[0] == '/')
+ dirpath++;
+
+ subdir = index (dirpath, (int)'/');
+ if (!subdir)
+ goto out;
+
+ if (!volname)
+ goto out;
+
+ if (!*volname)
+ goto out;
+
+ /* subdir points to the first / after the volume name while dirpath
+ * points to the first char of the volume name.
+ */
+ volname_len = subdir - dirpath;
+ strncpy (*volname, dirpath, volname_len);
+ *(*volname + volname_len) = '\0';
+out:
+ return subdir;
+}
+
+
+void
+mnt3_resolve_state_wipe (mnt3_resolve_t *mres)
+{
+ if (!mres)
+ return;
+
+ nfs_loc_wipe (&mres->resolveloc);
+ GF_FREE (mres);
+
+}
+
+
+/* Sets up the component argument to contain the next component in the path and
+ * sets up path as an absolute path starting from the next component.
+ */
+static char *
+setup_next_component (char *path, size_t plen, char *component, size_t clen)
+{
+ char *comp = NULL;
+ char *nextcomp = NULL;
+
+ if ((!path) || (!component))
+ return NULL;
+
+ strncpy (component, path, clen);
+ comp = index (component, (int)'/');
+ if (!comp)
+ goto err;
+
+ comp++;
+ nextcomp = index (comp, (int)'/');
+ if (nextcomp) {
+ strncpy (path, nextcomp, plen);
+ *nextcomp = '\0';
+ } else
+ path[0] = '\0';
+
+err:
+ return comp;
+}
+
+int32_t
+mnt3_resolve_subdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xattr,
+ struct iatt *postparent);
+
+/* There are multiple components in the directory export path and each one
+ * needs to be looked up one after the other.
+ */
+int
+__mnt3_resolve_export_subdir_comp (mnt3_resolve_t *mres)
+{
+ char dupsubdir[MNTPATHLEN];
+ char *nextcomp = NULL;
+ int ret = -EFAULT;
+ nfs_user_t nfu = {0, };
+ uuid_t gfid = {0, };
+
+ if (!mres)
+ return ret;
+
+ nextcomp = setup_next_component (mres->remainingdir,
+ sizeof (mres->remainingdir),
+ dupsubdir, sizeof (dupsubdir));
+ if (!nextcomp)
+ goto err;
+
+ /* Wipe the contents of the previous component */
+ uuid_copy (gfid, mres->resolveloc.inode->gfid);
+ nfs_loc_wipe (&mres->resolveloc);
+ ret = nfs_entry_loc_fill (mres->exp->vol->itable, gfid, nextcomp,
+ &mres->resolveloc, NFS_RESOLVE_CREATE);
+ if ((ret < 0) && (ret != -2)) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Failed to resolve and create "
+ "inode: parent gfid %s, entry %s",
+ uuid_utoa (gfid), nextcomp);
+ ret = -EFAULT;
+ goto err;
+ }
+
+ nfs_request_user_init (&nfu, mres->req);
+ ret = nfs_lookup (mres->mstate->nfsx, mres->exp->vol, &nfu,
+ &mres->resolveloc, mnt3_resolve_subdir_cbk, mres);
+
+err:
+ return ret;
+}
+
+
+int32_t
+mnt3_resolve_subdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ mnt3_resolve_t *mres = NULL;
+ mountstat3 mntstat = MNT3ERR_SERVERFAULT;
+ struct nfs3_fh fh = {{0}, };
+ int autharr[10];
+ int autharrlen = 0;
+ rpcsvc_t *svc = NULL;
+ mountres3 res = {0, };
+ xlator_t *mntxl = NULL;
+ char *path = NULL;
+
+ mres = frame->local;
+ mntxl = (xlator_t *)cookie;
+ if (op_ret == -1) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "path=%s (%s)",
+ mres->resolveloc.path, strerror (op_errno));
+ mntstat = mnt3svc_errno_to_mnterr (op_errno);
+ goto err;
+ }
+
+ inode_link (mres->resolveloc.inode, mres->resolveloc.parent,
+ mres->resolveloc.name, buf);
+
+ nfs3_fh_build_child_fh (&mres->parentfh, buf, &fh);
+ if (strlen (mres->remainingdir) <= 0) {
+ op_ret = -1;
+ mntstat = MNT3_OK;
+ path = GF_CALLOC (PATH_MAX, sizeof (char), gf_nfs_mt_char);
+ if (!path) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Memory allocation "
+ "failed");
+ goto err;
+ }
+ snprintf (path, PATH_MAX, "/%s%s", mres->exp->vol->name,
+ mres->resolveloc.path);
+ mnt3svc_update_mountlist (mres->mstate, mres->req,
+ path);
+ GF_FREE (path);
+ } else {
+ mres->parentfh = fh;
+ op_ret = __mnt3_resolve_export_subdir_comp (mres);
+ if (op_ret < 0)
+ mntstat = mnt3svc_errno_to_mnterr (-op_ret);
+ }
+err:
+ if (op_ret == -1) {
+ gf_log (GF_MNT, GF_LOG_DEBUG, "Mount reply status: %d",
+ mntstat);
+ svc = rpcsvc_request_service (mres->req);
+ autharrlen = rpcsvc_auth_array (svc, mntxl->name, autharr,
+ 10);
+
+ res = mnt3svc_set_mountres3 (mntstat, &fh, autharr, autharrlen);
+ mnt3svc_submit_reply (mres->req, (void *)&res,
+ (mnt3_serializer)xdr_serialize_mountres3);
+ mnt3_resolve_state_wipe (mres);
+ }
+
+ return 0;
+}
+
+
+
+/* We will always have to perform a hard lookup on all the components of a
+ * directory export for a mount request because in the mount reply we need the
+ * file handle of the directory. Our file handle creation code is designed with
+ * the assumption that to build a child file/dir fh, we'll always have the
+ * parent dir's fh available so that we may copy the hash array of the previous
+ * dir levels.
+ *
+ * Since we do not store the file handles anywhere, for every mount request we
+ * must resolve the file handles of every component so that the parent dir file
+ * of the exported directory can be built.
+ */
+int
+__mnt3_resolve_subdir (mnt3_resolve_t *mres)
+{
+ char dupsubdir[MNTPATHLEN];
+ char *firstcomp = NULL;
+ int ret = -EFAULT;
+ nfs_user_t nfu = {0, };
+ uuid_t rootgfid = {0, };
+
+ if (!mres)
+ return ret;
+
+ firstcomp = setup_next_component (mres->remainingdir,
+ sizeof (mres->remainingdir),
+ dupsubdir, sizeof (dupsubdir));
+ if (!firstcomp)
+ goto err;
+
+ rootgfid[15] = 1;
+ ret = nfs_entry_loc_fill (mres->exp->vol->itable, rootgfid, firstcomp,
+ &mres->resolveloc, NFS_RESOLVE_CREATE);
+ if ((ret < 0) && (ret != -2)) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Failed to resolve and create "
+ "inode for volume root: %s", mres->exp->vol->name);
+ ret = -EFAULT;
+ goto err;
+ }
+
+ nfs_request_user_init (&nfu, mres->req);
+ ret = nfs_lookup (mres->mstate->nfsx, mres->exp->vol, &nfu,
+ &mres->resolveloc, mnt3_resolve_subdir_cbk, mres);
+
+err:
+ return ret;
+}
+
+
+/**
+ * This function will verify if the client is allowed to mount
+ * the directory or not. Client's IP address will be compared with
+ * allowed IP list or range present in mnt3_export structure.
+ *
+ * @param req - RPC request. This structure contains client's IP address.
+ * @param export - mnt3_export structure. Contains allowed IP list/range.
+ *
+ * @return 0 - on Success and -EACCES on failure.
+ */
+int
+mnt3_verify_auth (rpcsvc_request_t *req, struct mnt3_export *export)
+{
+ int retvalue = -EACCES;
+ int ret = 0;
+ int shiftbits = 0;
+ uint32_t ipv4netmask = 0;
+ uint32_t routingprefix = 0;
+ struct host_auth_spec *host = NULL;
+ struct sockaddr_in *client_addr = NULL;
+ struct sockaddr_in *allowed_addr = NULL;
+ struct addrinfo *allowed_addrinfo = NULL;
+
+ /* Sanity check */
+ if ((NULL == req) ||
+ (NULL == req->trans) ||
+ (NULL == export) ||
+ (NULL == export->hostspec)) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Invalid argument");
+ return retvalue;
+ }
+
+ host = export->hostspec;
+
+
+ /* Client's IP address. */
+ client_addr = (struct sockaddr_in *)(&(req->trans->peerinfo.sockaddr));
+
+ /* Try to see if the client IP matches the allowed IP list.*/
+ while (NULL != host){
+ GF_ASSERT (host->host_addr);
+
+ if (NULL != allowed_addrinfo) {
+ freeaddrinfo (allowed_addrinfo);
+ allowed_addrinfo = NULL;
+ }
+
+ /* Get the addrinfo for the allowed host (host_addr). */
+ ret = getaddrinfo (host->host_addr,
+ NULL,
+ NULL,
+ &allowed_addrinfo);
+ if (0 != ret){
+ gf_log (GF_MNT, GF_LOG_ERROR, "getaddrinfo: %s\n",
+ gai_strerror (ret));
+ host = host->next;
+
+ /* Failed to get IP addrinfo. Continue to check other
+ * allowed IPs in the list.
+ */
+ continue;
+ }
+
+ allowed_addr = (struct sockaddr_in *)(allowed_addrinfo->ai_addr);
+
+ if (NULL == allowed_addr) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Invalid structure");
+ break;
+ }
+
+ if (AF_INET == allowed_addr->sin_family){
+ if (IPv4_ADDR_SIZE < host->routeprefix) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "invalid IP "
+ "configured for export-dir AUTH");
+ host = host->next;
+ continue;
+ }
+
+ /* -1 means no route prefix is provided. In this case
+ * the IP should be an exact match. Which is same as
+ * providing a route prefix of IPv4_ADDR_SIZE.
+ */
+ if (-1 == host->routeprefix) {
+ routingprefix = IPv4_ADDR_SIZE;
+ } else {
+ routingprefix = host->routeprefix;
+ }
+
+ /* Create a mask from the routing prefix. User provided
+ * CIDR address is split into IP address (host_addr) and
+ * routing prefix (routeprefix). This CIDR address may
+ * denote a single, distinct interface address or the
+ * beginning address of an entire network.
+ *
+ * e.g. the IPv4 block 192.168.100.0/24 represents the
+ * 256 IPv4 addresses from 192.168.100.0 to
+ * 192.168.100.255.
+ * Therefore to check if an IP matches 192.168.100.0/24
+ * we should mask the IP with FFFFFF00 and compare it
+ * with host address part of CIDR.
+ */
+ shiftbits = IPv4_ADDR_SIZE - routingprefix;
+ ipv4netmask = 0xFFFFFFFFUL << shiftbits;
+
+ /* Mask both the IPs and then check if they match
+ * or not. */
+ if (COMPARE_IPv4_ADDRS (allowed_addr,
+ client_addr,
+ ipv4netmask)){
+ retvalue = 0;
+ break;
+ }
+ }
+
+ /* Client IP didn't match the allowed IP.
+ * Check with the next allowed IP.*/
+ host = host->next;
+ }
+
+ if (NULL != allowed_addrinfo) {
+ freeaddrinfo (allowed_addrinfo);
+ }
+
+ return retvalue;
+}
+
+int
+mnt3_resolve_subdir (rpcsvc_request_t *req, struct mount3_state *ms,
+ struct mnt3_export *exp, char *subdir)
+{
+ mnt3_resolve_t *mres = NULL;
+ int ret = -EFAULT;
+ struct nfs3_fh pfh = GF_NFS3FH_STATIC_INITIALIZER;
+
+ if ((!req) || (!ms) || (!exp) || (!subdir))
+ return ret;
+
+ /* Need to check AUTH */
+ if (NULL != exp->hostspec) {
+ ret = mnt3_verify_auth (req, exp);
+ if (0 != ret) {
+ gf_log (GF_MNT,GF_LOG_ERROR,
+ "AUTH verification failed");
+ return ret;
+ }
+ }
+
+ mres = GF_CALLOC (1, sizeof (mnt3_resolve_t), gf_nfs_mt_mnt3_resolve);
+ if (!mres) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Memory allocation failed");
+ goto err;
+ }
+
+ mres->exp = exp;
+ mres->mstate = ms;
+ mres->req = req;
+ strncpy (mres->remainingdir, subdir, MNTPATHLEN);
+ if (gf_nfs_dvm_off (nfs_state (ms->nfsx)))
+ pfh = nfs3_fh_build_indexed_root_fh (mres->mstate->nfsx->children, mres->exp->vol);
+ else
+ pfh = nfs3_fh_build_uuid_root_fh (exp->volumeid);
+
+ mres->parentfh = pfh;
+ ret = __mnt3_resolve_subdir (mres);
+ if (ret < 0) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Failed to resolve export dir: %s"
+ , mres->exp->expname);
+ GF_FREE (mres);
+ }
+
+err:
+ return ret;
+}
+
+
+int
+mnt3_resolve_export_subdir (rpcsvc_request_t *req, struct mount3_state *ms,
+ struct mnt3_export *exp)
+{
+ char *volume_subdir = NULL;
+ int ret = -EFAULT;
+
+ if ((!req) || (!ms) || (!exp))
+ return ret;
+
+ volume_subdir = __volume_subdir (exp->expname, NULL);
+ if (!volume_subdir)
+ goto err;
+
+ ret = mnt3_resolve_subdir (req, ms, exp, volume_subdir);
+ if (ret < 0) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Failed to resolve export dir: %s"
+ , exp->expname);
+ goto err;
+ }
+
+err:
+ return ret;
+}
+
+
+int
+mnt3svc_mount (rpcsvc_request_t *req, struct mount3_state *ms,
+ struct mnt3_export *exp)
+{
+ int ret = -EFAULT;
+
+ if ((!req) || (!ms) || (!exp))
+ return ret;
+
+ if (exp->exptype == MNT3_EXPTYPE_VOLUME)
+ ret = mnt3svc_volume_mount (req, ms, exp);
+ else if (exp->exptype == MNT3_EXPTYPE_DIR)
+ ret = mnt3_resolve_export_subdir (req, ms, exp);
+
+ return ret;
+}
+
+
+/* mnt3_mntpath_to_xlator sets this to 1 if the mount is for a full
+* volume or 2 for a subdir in the volume.
+*/
+struct mnt3_export *
+mnt3_mntpath_to_export (struct mount3_state *ms, char *dirpath)
+{
+ struct mnt3_export *exp = NULL;
+ struct mnt3_export *found = NULL;
+
+ if ((!ms) || (!dirpath))
+ return NULL;
+
+ LOCK (&ms->mountlock);
+ list_for_each_entry (exp, &ms->exportlist, explist) {
+
+ /* Search for the an exact match with the volume */
+ if (mnt3_match_dirpath_export (exp->expname, dirpath)) {
+ found = exp;
+ gf_log (GF_MNT, GF_LOG_DEBUG, "Found export volume: "
+ "%s", exp->vol->name);
+ goto foundexp;
+ }
+ }
+
+ gf_log (GF_MNT, GF_LOG_DEBUG, "Export not found");
+foundexp:
+ UNLOCK (&ms->mountlock);
+ return found;
+}
+
+
+int
+mnt3_check_client_net (struct mount3_state *ms, rpcsvc_request_t *req,
+ xlator_t *targetxl)
+{
+
+ rpcsvc_t *svc = NULL;
+ rpc_transport_t *trans = NULL;
+ struct sockaddr_storage sastorage = {0,};
+ char peer[RPCSVC_PEER_STRLEN] = {0,};
+ int ret = -1;
+
+ if ((!ms) || (!req) || (!targetxl))
+ return -1;
+
+ svc = rpcsvc_request_service (req);
+
+ trans = rpcsvc_request_transport (req);
+ ret = rpcsvc_transport_peeraddr (trans, peer, RPCSVC_PEER_STRLEN,
+ &sastorage, sizeof (sastorage));
+ if (ret != 0) {
+ gf_log (GF_MNT, GF_LOG_WARNING, "Failed to get peer addr: %s",
+ gai_strerror (ret));
+ }
+
+ ret = rpcsvc_auth_check (svc, targetxl->name, trans);
+ if (ret == RPCSVC_AUTH_REJECT) {
+ gf_log (GF_MNT, GF_LOG_INFO, "Peer %s not allowed", peer);
+ goto err;
+ }
+
+ ret = rpcsvc_transport_privport_check (svc, targetxl->name,
+ rpcsvc_request_transport (req));
+ if (ret == RPCSVC_AUTH_REJECT) {
+ gf_log (GF_MNT, GF_LOG_INFO, "Peer %s rejected. Unprivileged "
+ "port not allowed", peer);
+ goto err;
+ }
+
+ ret = 0;
+err:
+ return ret;
+}
+
+
+int
+mnt3_parse_dir_exports (rpcsvc_request_t *req, struct mount3_state *ms,
+ char *subdir)
+{
+ char volname[1024];
+ struct mnt3_export *exp = NULL;
+ char *volname_ptr = NULL;
+ int ret = -1;
+
+ if ((!ms) || (!subdir))
+ return -1;
+
+ volname_ptr = volname;
+ subdir = __volume_subdir (subdir, &volname_ptr);
+ if (!subdir)
+ goto err;
+
+ exp = mnt3_mntpath_to_export (ms, volname);
+ if (!exp)
+ goto err;
+
+ ret = mnt3_resolve_subdir (req, ms, exp, subdir);
+ if (ret < 0) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Failed to resolve export dir: %s"
+ , subdir);
+ goto err;
+ }
+
+err:
+ return ret;
+}
+
+
+int
+mnt3_find_export (rpcsvc_request_t *req, char *path, struct mnt3_export **e)
+{
+ int ret = -EFAULT;
+ struct mount3_state *ms = NULL;
+ struct mnt3_export *exp = NULL;
+
+ if ((!req) || (!path) || (!e))
+ return -1;
+
+ ms = (struct mount3_state *) rpcsvc_request_program_private (req);
+ if (!ms) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Mount state not present");
+ rpcsvc_request_seterr (req, SYSTEM_ERR);
+ goto err;
+ }
+
+ gf_log (GF_MNT, GF_LOG_DEBUG, "dirpath: %s", path);
+ exp = mnt3_mntpath_to_export (ms, path);
+ if (exp) {
+ ret = 0;
+ *e = exp;
+ goto err;
+ }
+
+ if (!gf_mnt3_export_dirs(ms)) {
+ ret = -1;
+ goto err;
+ }
+
+ ret = mnt3_parse_dir_exports (req, ms, path);
+ if (ret == 0) {
+ ret = -2;
+ goto err;
+ }
+
+err:
+ return ret;
+}
+
+
+int
+mnt3svc_mnt (rpcsvc_request_t *req)
+{
+ struct iovec pvec = {0, };
+ char path[MNTPATHLEN];
+ int ret = -1;
+ struct mount3_state *ms = NULL;
+ mountstat3 mntstat = MNT3ERR_SERVERFAULT;
+ struct mnt3_export *exp = NULL;
+ struct nfs_state *nfs = NULL;
+
+ if (!req)
+ return -1;
+
+ pvec.iov_base = path;
+ pvec.iov_len = MNTPATHLEN;
+ ret = xdr_to_mountpath (pvec, req->msg[0]);
+ if (ret == -1) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Failed to decode args");
+ rpcsvc_request_seterr (req, GARBAGE_ARGS);
+ goto rpcerr;
+ }
+
+ ms = (struct mount3_state *)rpcsvc_request_program_private (req);
+ if (!ms) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Mount state not present");
+ rpcsvc_request_seterr (req, SYSTEM_ERR);
+ ret = -1;
+ goto rpcerr;
+ }
+
+ ret = 0;
+ nfs = (struct nfs_state *)ms->nfsx->private;
+ gf_log (GF_MNT, GF_LOG_DEBUG, "dirpath: %s", path);
+ ret = mnt3_find_export (req, path, &exp);
+ if (ret == -2) {
+ ret = 0;
+ goto rpcerr;
+ } else if (ret < 0) {
+ ret = -1;
+ mntstat = MNT3ERR_NOENT;
+ goto mnterr;
+ }
+
+ if (!nfs_subvolume_started (nfs, exp->vol)) {
+ gf_log (GF_MNT, GF_LOG_DEBUG, "Volume %s not started",
+ exp->vol->name);
+ ret = -1;
+ mntstat = MNT3ERR_NOENT;
+ goto mnterr;
+ }
+
+ ret = mnt3_check_client_net (ms, req, exp->vol);
+ if (ret == RPCSVC_AUTH_REJECT) {
+ mntstat = MNT3ERR_ACCES;
+ gf_log (GF_MNT, GF_LOG_DEBUG, "Client mount not allowed");
+ ret = -1;
+ goto mnterr;
+ }
+
+ ret = mnt3svc_mount (req, ms, exp);
+ if (ret < 0)
+ mntstat = mnt3svc_errno_to_mnterr (-ret);
+mnterr:
+ if (ret < 0) {
+ mnt3svc_mnt_error_reply (req, mntstat);
+ ret = 0;
+ }
+
+rpcerr:
+ return ret;
+}
+
+
+int
+mnt3svc_null (rpcsvc_request_t *req)
+{
+ struct iovec dummyvec = {0, };
+
+ if (!req) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Got NULL request!");
+ return 0;
+ }
+ rpcsvc_submit_generic (req, &dummyvec, 1, NULL, 0, NULL);
+ return 0;
+}
+
+
+mountlist
+__build_mountlist (struct mount3_state *ms, int *count)
+{
+ struct mountbody *mlist = NULL;
+ struct mountbody *prev = NULL;
+ struct mountbody *first = NULL;
+ size_t namelen = 0;
+ int ret = -1;
+ struct mountentry *me = NULL;
+
+ if ((!ms) || (!count))
+ return NULL;
+
+ /* read rmtab, other peers might have updated it */
+ mount_read_rmtab(ms);
+
+ *count = 0;
+ gf_log (GF_MNT, GF_LOG_DEBUG, "Building mount list:");
+ list_for_each_entry (me, &ms->mountlist, mlist) {
+ namelen = strlen (me->exname);
+ mlist = GF_CALLOC (1, sizeof (*mlist), gf_nfs_mt_mountbody);
+ if (!mlist) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Memory allocation"
+ " failed");
+ goto free_list;
+ }
+ if (!first)
+ first = mlist;
+
+ mlist->ml_directory = GF_CALLOC (namelen + 2, sizeof (char),
+ gf_nfs_mt_char);
+ if (!mlist->ml_directory) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Memory allocation"
+ " failed");
+ goto free_list;
+ }
+
+ strcpy (mlist->ml_directory, me->exname);
+
+ namelen = strlen (me->hostname);
+ mlist->ml_hostname = GF_CALLOC (namelen + 2, sizeof (char),
+ gf_nfs_mt_char);
+ if (!mlist->ml_hostname) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Memory allocation"
+ " failed");
+ goto free_list;
+ }
+
+ strcat (mlist->ml_hostname, me->hostname);
+
+ gf_log (GF_MNT, GF_LOG_DEBUG, "mount entry: dir: %s, host: %s",
+ mlist->ml_directory, mlist->ml_hostname);
+ if (prev) {
+ prev->ml_next = mlist;
+ prev = mlist;
+ } else
+ prev = mlist;
+
+ (*count)++;
+ }
+
+ ret = 0;
+
+free_list:
+ if (ret == -1) {
+ xdr_free_mountlist (first);
+ first = NULL;
+ }
+
+ return first;
+}
+
+
+mountlist
+mnt3svc_build_mountlist (struct mount3_state *ms, int *count)
+{
+ struct mountbody *first = NULL;
+
+ LOCK (&ms->mountlock);
+ {
+ first = __build_mountlist (ms, count);
+ }
+ UNLOCK (&ms->mountlock);
+
+ return first;
+}
+
+
+int
+mnt3svc_dump (rpcsvc_request_t *req)
+{
+ int ret = -1;
+ struct mount3_state *ms = NULL;
+ mountlist mlist;
+ mountstat3 mstat = 0;
+ mnt3_serializer sfunc = NULL;
+ void *arg = NULL;
+
+
+ if (!req)
+ return -1;
+
+ ms = (struct mount3_state *)rpcsvc_request_program_private (req);
+ if (!ms) {
+ rpcsvc_request_seterr (req, SYSTEM_ERR);
+ goto rpcerr;
+ }
+
+ sfunc = (mnt3_serializer)xdr_serialize_mountlist;
+ mlist = mnt3svc_build_mountlist (ms, &ret);
+ arg = &mlist;
+
+ if (!mlist) {
+ if (ret != 0) {
+ rpcsvc_request_seterr (req, SYSTEM_ERR);
+ ret = -1;
+ goto rpcerr;
+ } else {
+ arg = &mstat;
+ sfunc = (mnt3_serializer)xdr_serialize_mountstat3;
+ }
+ }
+
+ mnt3svc_submit_reply (req, arg, sfunc);
+
+ xdr_free_mountlist (mlist);
+ ret = 0;
+
+rpcerr:
+ return ret;
+}
+
+
+int
+mnt3svc_umount (struct mount3_state *ms, char *dirpath, char *hostname)
+{
+ struct mountentry *me = NULL;
+ int ret = -1;
+ gf_store_handle_t *sh = NULL;
+ struct nfs_state *nfs = NULL;
+
+ if ((!ms) || (!dirpath) || (!hostname))
+ return -1;
+
+ nfs = (struct nfs_state *)ms->nfsx->private;
+
+ ret = gf_store_handle_new (nfs->rmtab, &sh);
+ if (ret) {
+ gf_log (GF_MNT, GF_LOG_WARNING, "Failed to open '%s'",
+ nfs->rmtab);
+ return 0;
+ }
+
+ ret = gf_store_lock (sh);
+ if (ret) {
+ goto out_free;
+ }
+
+ LOCK (&ms->mountlock);
+ {
+ __mount_read_rmtab (sh, &ms->mountlist, _gf_false);
+ if (list_empty (&ms->mountlist)) {
+ ret = 0;
+ goto out_unlock;
+ }
+
+ ret = -1;
+ list_for_each_entry (me, &ms->mountlist, mlist) {
+ if ((strcmp (me->exname, dirpath) == 0) &&
+ (strcmp (me->hostname, hostname) == 0)) {
+ ret = 0;
+ break;
+ }
+ }
+
+ /* Need this check here because at the end of the search me
+ * might still be pointing to the last entry, which may not be
+ * the one we're looking for.
+ */
+ if (ret == -1) {/* Not found in list. */
+ gf_log (GF_MNT, GF_LOG_TRACE, "Export not found");
+ goto out_unlock;
+ }
+
+ if (!me)
+ goto out_unlock;
+
+ gf_log (GF_MNT, GF_LOG_DEBUG, "Unmounting: dir %s, host: %s",
+ me->exname, me->hostname);
+
+ list_del (&me->mlist);
+ GF_FREE (me);
+ __mount_rewrite_rmtab (ms, sh);
+ }
+out_unlock:
+ UNLOCK (&ms->mountlock);
+ gf_store_unlock (sh);
+out_free:
+ gf_store_handle_destroy (sh);
+ return ret;
+}
+
+
+int
+mnt3svc_umnt (rpcsvc_request_t *req)
+{
+ char hostname[MNTPATHLEN];
+ char dirpath[MNTPATHLEN];
+ struct iovec pvec = {0, };
+ int ret = -1;
+ struct mount3_state *ms = NULL;
+ mountstat3 mstat = MNT3_OK;
+ char *colon = NULL;
+
+ if (!req)
+ return -1;
+
+ /* Remove the mount point from the exports list. */
+ pvec.iov_base = dirpath;
+ pvec.iov_len = MNTPATHLEN;
+ ret = xdr_to_mountpath (pvec, req->msg[0]);
+ if (ret == -1) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Failed decode args");
+ rpcsvc_request_seterr (req, GARBAGE_ARGS);
+ goto rpcerr;
+ }
+
+ ms = (struct mount3_state *)rpcsvc_request_program_private (req);
+ if (!ms) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Mount state not present");
+ rpcsvc_request_seterr (req, SYSTEM_ERR);
+ ret = -1;
+ goto rpcerr;
+ }
+
+ ret = rpcsvc_transport_peername (req->trans, hostname, MNTPATHLEN);
+ if (ret != 0) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Failed to get remote name: %s",
+ gai_strerror (ret));
+ goto rpcerr;
+ }
+
+ colon = strrchr (hostname, ':');
+ if (colon) {
+ *colon= '\0';
+ }
+ gf_log (GF_MNT, GF_LOG_DEBUG, "dirpath: %s, hostname: %s", dirpath,
+ hostname);
+ ret = mnt3svc_umount (ms, dirpath, hostname);
+
+ if (ret == -1) {
+ ret = 0;
+ mstat = MNT3ERR_NOENT;
+ }
+ /* FIXME: also take care of the corner case where the
+ * client was resolvable at mount but not at the umount - vice-versa.
+ */
+ mnt3svc_submit_reply (req, &mstat,
+ (mnt3_serializer)xdr_serialize_mountstat3);
+
+rpcerr:
+ return ret;
+}
+
+
+int
+__mnt3svc_umountall (struct mount3_state *ms)
+{
+ struct mountentry *me = NULL;
+ struct mountentry *tmp = NULL;
+
+ if (!ms)
+ return -1;
+
+ if (list_empty (&ms->mountlist))
+ return 0;
+
+ list_for_each_entry_safe (me, tmp, &ms->mountlist, mlist) {
+ list_del (&me->mlist);
+ GF_FREE (me);
+ }
+
+ return 0;
+}
+
+
+int
+mnt3svc_umountall (struct mount3_state *ms)
+{
+ int ret = -1;
+ if (!ms)
+ return -1;
+
+ LOCK (&ms->mountlock);
+ {
+ ret = __mnt3svc_umountall (ms);
+ }
+ UNLOCK (&ms->mountlock);
+
+ return ret;
+}
+
+
+int
+mnt3svc_umntall (rpcsvc_request_t *req)
+{
+ int ret = RPCSVC_ACTOR_ERROR;
+ struct mount3_state *ms = NULL;
+ mountstat3 mstat = MNT3_OK;
+
+ if (!req)
+ return ret;
+
+ ms = (struct mount3_state *)rpcsvc_request_program_private (req);
+ if (!ms) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Mount state not present");
+ rpcsvc_request_seterr (req, SYSTEM_ERR);
+ goto rpcerr;
+ }
+
+ mnt3svc_umountall (ms);
+ mnt3svc_submit_reply (req, &mstat,
+ (mnt3_serializer)xdr_serialize_mountstat3);
+
+ ret = RPCSVC_ACTOR_SUCCESS;
+rpcerr:
+ return ret;
+}
+
+
+exports
+mnt3_xlchildren_to_exports (rpcsvc_t *svc, struct mount3_state *ms)
+{
+ struct exportnode *elist = NULL;
+ struct exportnode *prev = NULL;
+ struct exportnode *first = NULL;
+ size_t namelen = 0;
+ int ret = -1;
+ char *addrstr = NULL;
+ struct mnt3_export *ent = NULL;
+ struct nfs_state *nfs = NULL;
+
+ if ((!ms) || (!svc))
+ return NULL;
+
+ nfs = (struct nfs_state *)ms->nfsx->private;
+ if (!nfs)
+ return NULL;
+
+ LOCK (&ms->mountlock);
+ list_for_each_entry(ent, &ms->exportlist, explist) {
+
+ /* If volume is not started yet, do not list it for tools like
+ * showmount.
+ */
+ if (!nfs_subvolume_started (nfs, ent->vol))
+ continue;
+
+ namelen = strlen (ent->expname) + 1;
+ elist = GF_CALLOC (1, sizeof (*elist), gf_nfs_mt_exportnode);
+ if (!elist) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Memory allocation"
+ " failed");
+ goto free_list;
+ }
+ if (!first)
+ first = elist;
+ elist->ex_dir = GF_CALLOC (namelen + 2, sizeof (char),
+ gf_nfs_mt_char);
+ if (!elist->ex_dir) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Memory allocation"
+ " failed");
+ goto free_list;
+ }
+ strcpy (elist->ex_dir, ent->expname);
+
+ addrstr = rpcsvc_volume_allowed (svc->options,
+ ent->vol->name);
+ elist->ex_groups = GF_CALLOC (1, sizeof (struct groupnode),
+ gf_nfs_mt_groupnode);
+ if (!elist->ex_groups) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Memory allocation"
+ " failed");
+ goto free_list;
+ }
+ /*This check has to be done after checking
+ * elist->ex_groups allocation check to avoid resource leak;
+ */
+ if (addrstr)
+ addrstr = gf_strdup (addrstr);
+ else
+ addrstr = gf_strdup ("No Access");
+
+ if (!addrstr) {
+ goto free_list;
+ }
+ elist->ex_groups->gr_name = addrstr;
+ if (prev) {
+ prev->ex_next = elist;
+ prev = elist;
+ } else
+ prev = elist;
+ }
+
+ ret = 0;
+
+free_list:
+ UNLOCK (&ms->mountlock);
+ if (ret == -1) {
+ xdr_free_exports_list (first);
+ first = NULL;
+ }
+
+ return first;
+}
+
+
+int
+mnt3svc_export (rpcsvc_request_t *req)
+{
+ struct mount3_state *ms = NULL;
+ exports elist = NULL;
+ int ret = -1;
+
+ if (!req)
+ return -1;
+
+ ms = (struct mount3_state *)rpcsvc_request_program_private (req);
+ if (!ms) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "mount state not found");
+ rpcsvc_request_seterr (req, SYSTEM_ERR);
+ goto err;
+ }
+
+ /* Using the children translator names, build the export list */
+ elist = mnt3_xlchildren_to_exports (rpcsvc_request_service (req),
+ ms);
+ /* Do not return error when exports list is empty. An exports list can
+ * be empty when no subvolumes have come up. No point returning error
+ * and confusing the user.
+ if (!elist) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Failed to build exports list");
+ nfs_rpcsvc_request_seterr (req, SYSTEM_ERR);
+ goto err;
+ }
+ */
+
+ /* Note how the serializer is passed to the generic reply function. */
+ mnt3svc_submit_reply (req, &elist,
+ (mnt3_serializer)xdr_serialize_exports);
+
+ xdr_free_exports_list (elist);
+ ret = 0;
+err:
+ return ret;
+}
+
+/* just declaring, definition is way down below */
+rpcsvc_program_t mnt3prog;
+
+/* nfs3_rootfh used by mount3udp thread needs to access mount3prog.private
+ * directly as we don't have nfs xlator pointer to dereference it. But thats OK
+ */
+
+struct nfs3_fh *
+nfs3_rootfh (char* path)
+{
+ struct mount3_state *ms = NULL;
+ struct nfs3_fh *fh = NULL;
+ struct mnt3_export *exp = NULL;
+ inode_t *inode = NULL;
+ char *tmp = NULL;
+
+ ms = mnt3prog.private;
+ exp = mnt3_mntpath_to_export (ms, path);
+ if (exp == NULL)
+ goto err;
+
+ tmp = (char *)path;
+ tmp = strchr (tmp, '/');
+ if (tmp == NULL)
+ tmp = "/";
+
+ inode = inode_from_path (exp->vol->itable, tmp);
+ if (inode == NULL)
+ goto err;
+
+ fh = GF_CALLOC (1, sizeof(*fh), gf_nfs_mt_nfs3_fh);
+ if (fh == NULL)
+ goto err;
+ nfs3_build_fh (inode, exp->volumeid, fh);
+
+err:
+ if (inode)
+ inode_unref (inode);
+ return fh;
+}
+
+int
+mount3udp_add_mountlist (char *host, dirpath *expname)
+{
+ struct mountentry *me = NULL;
+ struct mount3_state *ms = NULL;
+ char *export = NULL;
+
+ ms = mnt3prog.private;
+ me = GF_CALLOC (1, sizeof (*me), gf_nfs_mt_mountentry);
+ if (!me)
+ return -1;
+ export = (char *)expname;
+ while (*export == '/')
+ export++;
+
+ strncpy (me->exname, export, MNTPATHLEN);
+ strncpy (me->hostname, host, MNTPATHLEN);
+ INIT_LIST_HEAD (&me->mlist);
+ LOCK (&ms->mountlock);
+ {
+ list_add_tail (&me->mlist, &ms->mountlist);
+ mount_rewrite_rmtab(ms, NULL);
+ }
+ UNLOCK (&ms->mountlock);
+ return 0;
+}
+
+int
+mount3udp_delete_mountlist (char *hostname, dirpath *expname)
+{
+ struct mount3_state *ms = NULL;
+ char *export = NULL;
+
+ ms = mnt3prog.private;
+ export = (char *)expname;
+ while (*export == '/')
+ export++;
+ mnt3svc_umount (ms, export, hostname);
+ return 0;
+}
+
+/**
+ * This function will parse the hostip (IP addres, IP range, or hostname)
+ * and fill the host_auth_spec structure.
+ *
+ * @param hostspec - struct host_auth_spec
+ * @param hostip - IP address, IP range (CIDR format) or hostname
+ *
+ * @return 0 - on success and -1 on failure
+ */
+int
+mnt3_export_fill_hostspec (struct host_auth_spec* hostspec, const char* hostip)
+{
+ char *ipdupstr = NULL;
+ char *savptr = NULL;
+ char *ip = NULL;
+ char *token = NULL;
+ int ret = -1;
+
+ /* Create copy of the string so that the source won't change
+ */
+ ipdupstr = gf_strdup (hostip);
+ if (NULL == ipdupstr) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Memory allocation failed");
+ goto err;
+ }
+
+ ip = strtok_r (ipdupstr, "/", &savptr);
+ hostspec->host_addr = gf_strdup (ip);
+ if (NULL == hostspec->host_addr) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Memory allocation failed");
+ goto err;
+ }
+
+ /* Check if the IP is in <IP address> / <Range> format.
+ * If yes, then strip the range and store it separately.
+ */
+ token = strtok_r (NULL, "/", &savptr);
+
+ if (NULL == token) {
+ hostspec->routeprefix = -1;
+ } else {
+ hostspec->routeprefix = atoi (token);
+ }
+
+ // success
+ ret = 0;
+err:
+ if (NULL != ipdupstr) {
+ GF_FREE (ipdupstr);
+ }
+ return ret;
+}
+
+
+/**
+ * This function will parse the AUTH parameter passed along with
+ * "export-dir" option. If AUTH parameter is present then it will be
+ * stripped from exportpath and stored in mnt3_export (exp) structure.
+ *
+ * @param exp - mnt3_export structure. Holds information needed for mount.
+ * @param exportpath - Value of "export-dir" key. Holds both export path
+ * and AUTH parameter for the path.
+ * exportpath format: <abspath>[(hostdesc[|hostspec|...])]
+ *
+ * @return This function will return 0 on success and -1 on failure.
+ */
+int
+mnt3_export_parse_auth_param (struct mnt3_export* exp, char* exportpath)
+{
+ char *token = NULL;
+ char *savPtr = NULL;
+ char *hostip = NULL;
+ struct host_auth_spec *host = NULL;
+ int ret = 0;
+
+ /* Using exportpath directly in strtok_r because we want
+ * to strip off AUTH parameter from exportpath. */
+ token = strtok_r (exportpath, "(", &savPtr);
+
+ /* Get the next token, which will be the AUTH parameter. */
+ token = strtok_r (NULL, ")", &savPtr);
+
+ if (NULL == token) {
+ /* If AUTH is not present then we should return success. */
+ return 0;
+ }
+
+ /* Free any previously allocated hostspec structure. */
+ if (NULL != exp->hostspec) {
+ GF_FREE (exp->hostspec);
+ exp->hostspec = NULL;
+ }
+
+ exp->hostspec = GF_CALLOC (1,
+ sizeof (*(exp->hostspec)),
+ gf_nfs_mt_auth_spec);
+ if (NULL == exp->hostspec){
+ gf_log (GF_MNT, GF_LOG_ERROR, "Memory allocation failed");
+ return -1;
+ }
+
+ /* AUTH parameter can have multiple entries. For each entry
+ * a host_auth_spec structure is created. */
+ host = exp->hostspec;
+
+ hostip = strtok_r (token, "|", &savPtr);
+
+ /* Parse all AUTH parameters separated by '|' */
+ while (NULL != hostip){
+ ret = mnt3_export_fill_hostspec (host, hostip);
+ if (0 != ret) {
+ gf_log(GF_MNT, GF_LOG_WARNING,
+ "Failed to parse hostspec: %s", hostip);
+ goto err;
+ }
+
+ hostip = strtok_r (NULL, "|", &savPtr);
+ if (NULL == hostip) {
+ break;
+ }
+
+ host->next = GF_CALLOC (1, sizeof (*(host)),
+ gf_nfs_mt_auth_spec);
+ if (NULL == host->next){
+ gf_log (GF_MNT,GF_LOG_ERROR,
+ "Memory allocation failed");
+ goto err;
+ }
+ host = host->next;
+ }
+
+ /* In case of success return from here */
+ return 0;
+err:
+ /* In case of failure free up hostspec structure. */
+ FREE_HOSTSPEC (exp);
+
+ return -1;
+}
+
+/**
+ * exportpath will also have AUTH options (ip address, subnet address or
+ * hostname) mentioned.
+ * exportpath format: <abspath>[(hostdesc[|hostspec|...])]
+ */
+struct mnt3_export *
+mnt3_init_export_ent (struct mount3_state *ms, xlator_t *xl, char *exportpath,
+ uuid_t volumeid)
+{
+ struct mnt3_export *exp = NULL;
+ int alloclen = 0;
+ int ret = -1;
+
+ if ((!ms) || (!xl))
+ return NULL;
+
+ exp = GF_CALLOC (1, sizeof (*exp), gf_nfs_mt_mnt3_export);
+ if (!exp) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Memory allocation failed");
+ return NULL;
+ }
+
+ if (NULL != exportpath) {
+ /* If exportpath is not NULL then we should check if AUTH
+ * parameter is present or not. If AUTH parameter is present
+ * then it will be stripped and stored in mnt3_export (exp)
+ * structure.
+ */
+ if (0 != mnt3_export_parse_auth_param (exp, exportpath)){
+ gf_log (GF_MNT, GF_LOG_ERROR,
+ "Failed to parse auth param");
+ goto err;
+ }
+ }
+
+
+ INIT_LIST_HEAD (&exp->explist);
+ if (exportpath)
+ alloclen = strlen (xl->name) + 2 + strlen (exportpath);
+ else
+ alloclen = strlen (xl->name) + 2;
+
+ exp->expname = GF_CALLOC (alloclen, sizeof (char), gf_nfs_mt_char);
+ if (!exp->expname) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Memory allocation failed");
+ goto err;
+ }
+
+ if (exportpath) {
+ gf_log (GF_MNT, GF_LOG_TRACE, "Initing dir export: %s:%s",
+ xl->name, exportpath);
+ exp->exptype = MNT3_EXPTYPE_DIR;
+ ret = snprintf (exp->expname, alloclen, "/%s%s", xl->name,
+ exportpath);
+ } else {
+ gf_log (GF_MNT, GF_LOG_TRACE, "Initing volume export: %s",
+ xl->name);
+ exp->exptype = MNT3_EXPTYPE_VOLUME;
+ ret = snprintf (exp->expname, alloclen, "/%s", xl->name);
+ }
+ if (ret < 0) {
+ gf_log (xl->name, GF_LOG_ERROR,
+ "Failed to set the export name");
+ goto err;
+ }
+ /* Just copy without discrimination, we'll determine whether to
+ * actually use it when a mount request comes in and a file handle
+ * needs to be built.
+ */
+ uuid_copy (exp->volumeid, volumeid);
+ exp->vol = xl;
+
+ /* On success we should return from here*/
+ return exp;
+err:
+ /* On failure free exp and it's members.*/
+ if (NULL != exp) {
+ mnt3_export_free (exp);
+ exp = NULL;
+ }
+
+ return exp;
+}
+
+
+int
+__mnt3_init_volume_direxports (struct mount3_state *ms, xlator_t *xlator,
+ char *optstr, uuid_t volumeid)
+{
+ struct mnt3_export *newexp = NULL;
+ int ret = -1;
+ char *savptr = NULL;
+ char *dupopt = NULL;
+ char *token = NULL;
+
+ if ((!ms) || (!xlator) || (!optstr))
+ return -1;
+
+ dupopt = gf_strdup (optstr);
+ if (!dupopt) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "gf_strdup failed");
+ goto err;
+ }
+
+ token = strtok_r (dupopt, ",", &savptr);
+ while (token) {
+ newexp = mnt3_init_export_ent (ms, xlator, token, volumeid);
+ if (!newexp) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Failed to init dir "
+ "export: %s", token);
+ ret = -1;
+ goto err;
+ }
+
+ list_add_tail (&newexp->explist, &ms->exportlist);
+ token = strtok_r (NULL, ",", &savptr);
+ }
+
+ ret = 0;
+err:
+ GF_FREE (dupopt);
+
+ return ret;
+}
+
+
+int
+__mnt3_init_volume (struct mount3_state *ms, dict_t *opts, xlator_t *xlator)
+{
+ struct mnt3_export *newexp = NULL;
+ int ret = -1;
+ char searchstr[1024];
+ char *optstr = NULL;
+ uuid_t volumeid = {0, };
+
+ if ((!ms) || (!xlator) || (!opts))
+ return -1;
+
+ uuid_clear (volumeid);
+ if (gf_nfs_dvm_off (nfs_state (ms->nfsx)))
+ goto no_dvm;
+
+ ret = snprintf (searchstr, 1024, "nfs3.%s.volume-id", xlator->name);
+ if (ret < 0) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "snprintf failed");
+ ret = -1;
+ goto err;
+ }
+
+ if (dict_get (opts, searchstr)) {
+ ret = dict_get_str (opts, searchstr, &optstr);
+ if (ret < 0) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Failed to read option"
+ ": %s", searchstr);
+ ret = -1;
+ goto err;
+ }
+ } else {
+ gf_log (GF_MNT, GF_LOG_ERROR, "DVM is on but volume-id not "
+ "given for volume: %s", xlator->name);
+ ret = -1;
+ goto err;
+ }
+
+ if (optstr) {
+ ret = uuid_parse (optstr, volumeid);
+ if (ret < 0) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Failed to parse volume "
+ "UUID");
+ ret = -1;
+ goto err;
+ }
+ }
+
+no_dvm:
+ ret = snprintf (searchstr, 1024, "nfs3.%s.export-dir", xlator->name);
+ if (ret < 0) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "snprintf failed");
+ ret = -1;
+ goto err;
+ }
+
+ if (dict_get (opts, searchstr)) {
+ ret = dict_get_str (opts, searchstr, &optstr);
+ if (ret < 0) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Failed to read option: "
+ "%s", searchstr);
+ ret = -1;
+ goto err;
+ }
+
+ ret = __mnt3_init_volume_direxports (ms, xlator, optstr,
+ volumeid);
+ if (ret == -1) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Dir export setup failed"
+ " for volume: %s", xlator->name);
+ goto err;
+ }
+ }
+
+ if (ms->export_volumes) {
+ newexp = mnt3_init_export_ent (ms, xlator, NULL, volumeid);
+ if (!newexp) {
+ ret = -1;
+ goto err;
+ }
+
+ list_add_tail (&newexp->explist, &ms->exportlist);
+ }
+ ret = 0;
+
+
+err:
+ return ret;
+}
+
+
+int
+__mnt3_init_volume_export (struct mount3_state *ms, dict_t *opts)
+{
+ int ret = -1;
+ char *optstr = NULL;
+ /* On by default. */
+ gf_boolean_t boolt = _gf_true;
+
+ if ((!ms) || (!opts))
+ return -1;
+
+ if (!dict_get (opts, "nfs3.export-volumes")) {
+ ret = 0;
+ goto err;
+ }
+
+ ret = dict_get_str (opts, "nfs3.export-volumes", &optstr);
+ if (ret < 0) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Failed to read option: "
+ "nfs3.export-volumes");
+ ret = -1;
+ goto err;
+ }
+
+ ret = gf_string2boolean (optstr, &boolt);
+ if (ret < 0) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Failed to convert"
+ " string to boolean");
+ }
+
+err:
+ if (boolt == _gf_false) {
+ gf_log (GF_MNT, GF_LOG_TRACE, "Volume exports disabled");
+ ms->export_volumes = 0;
+ } else {
+ gf_log (GF_MNT, GF_LOG_TRACE, "Volume exports enabled");
+ ms->export_volumes = 1;
+ }
+
+ return ret;
+}
+
+
+int
+__mnt3_init_dir_export (struct mount3_state *ms, dict_t *opts)
+{
+ int ret = -1;
+ char *optstr = NULL;
+ /* On by default. */
+ gf_boolean_t boolt = _gf_true;
+
+ if ((!ms) || (!opts))
+ return -1;
+
+ if (!dict_get (opts, "nfs3.export-dirs")) {
+ ret = 0;
+ goto err;
+ }
+
+ ret = dict_get_str (opts, "nfs3.export-dirs", &optstr);
+ if (ret < 0) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Failed to read option: "
+ "nfs3.export-dirs");
+ ret = -1;
+ goto err;
+ }
+
+ ret = gf_string2boolean (optstr, &boolt);
+ if (ret < 0) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Failed to convert"
+ " string to boolean");
+ }
+
+err:
+ if (boolt == _gf_false) {
+ gf_log (GF_MNT, GF_LOG_TRACE, "Dir exports disabled");
+ ms->export_dirs = 0;
+ } else {
+ gf_log (GF_MNT, GF_LOG_TRACE, "Dir exports enabled");
+ ms->export_dirs = 1;
+ }
+
+ return ret;
+}
+
+
+int
+mnt3_init_options (struct mount3_state *ms, dict_t *options)
+{
+ xlator_list_t *volentry = NULL;
+ int ret = -1;
+
+ if ((!ms) || (!options))
+ return -1;
+
+ __mnt3_init_volume_export (ms, options);
+ __mnt3_init_dir_export (ms, options);
+ volentry = ms->nfsx->children;
+ while (volentry) {
+ gf_log (GF_MNT, GF_LOG_TRACE, "Initing options for: %s",
+ volentry->xlator->name);
+ ret = __mnt3_init_volume (ms, options, volentry->xlator);
+ if (ret < 0) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Volume init failed");
+ goto err;
+ }
+
+ volentry = volentry->next;
+ }
+
+ ret = 0;
+err:
+ return ret;
+}
+
+
+struct mount3_state *
+mnt3_init_state (xlator_t *nfsx)
+{
+ struct mount3_state *ms = NULL;
+ int ret = -1;
+
+ if (!nfsx)
+ return NULL;
+
+ ms = GF_CALLOC (1, sizeof (*ms), gf_nfs_mt_mount3_state);
+ if (!ms) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Memory allocation failed");
+ return NULL;
+ }
+
+ ms->iobpool = nfsx->ctx->iobuf_pool;
+ ms->nfsx = nfsx;
+ INIT_LIST_HEAD (&ms->exportlist);
+ ret = mnt3_init_options (ms, nfsx->options);
+ if (ret < 0) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Options init failed");
+ return NULL;
+ }
+
+ INIT_LIST_HEAD (&ms->mountlist);
+ LOCK_INIT (&ms->mountlock);
+
+ return ms;
+}
+
+int
+mount_init_state (xlator_t *nfsx)
+{
+ int ret = -1;
+ struct nfs_state *nfs = NULL;
+
+ if (!nfsx)
+ goto out;
+
+ nfs = (struct nfs_state *)nfs_state (nfsx);
+ /*Maintaining global state for MOUNT1 and MOUNT3*/
+ nfs->mstate = mnt3_init_state (nfsx);
+ if (!nfs->mstate) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "Failed to allocate"
+ "mount state");
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+rpcsvc_actor_t mnt3svc_actors[MOUNT3_PROC_COUNT] = {
+ {"NULL", MOUNT3_NULL, mnt3svc_null, NULL, 0, DRC_NA},
+ {"MNT", MOUNT3_MNT, mnt3svc_mnt, NULL, 0, DRC_NA},
+ {"DUMP", MOUNT3_DUMP, mnt3svc_dump, NULL, 0, DRC_NA},
+ {"UMNT", MOUNT3_UMNT, mnt3svc_umnt, NULL, 0, DRC_NA},
+ {"UMNTALL", MOUNT3_UMNTALL, mnt3svc_umntall, NULL, 0, DRC_NA},
+ {"EXPORT", MOUNT3_EXPORT, mnt3svc_export, NULL, 0, DRC_NA}
+};
+
+
+
+/* Static init parts are assigned here, dynamic ones are done in
+ * mnt3svc_init and mnt3_init_state.
+ * Making MOUNT3 a synctask so that the blocking DNS calls during rpc auth
+ * gets offloaded to syncenv, keeping the main/poll thread unblocked
+ */
+rpcsvc_program_t mnt3prog = {
+ .progname = "MOUNT3",
+ .prognum = MOUNT_PROGRAM,
+ .progver = MOUNT_V3,
+ .progport = GF_MOUNTV3_PORT,
+ .actors = mnt3svc_actors,
+ .numactors = MOUNT3_PROC_COUNT,
+ .min_auth = AUTH_NULL,
+ .synctask = _gf_true,
+};
+
+
+
+rpcsvc_program_t *
+mnt3svc_init (xlator_t *nfsx)
+{
+ struct mount3_state *mstate = NULL;
+ struct nfs_state *nfs = NULL;
+ dict_t *options = NULL;
+ char *portstr = NULL;
+ int ret = -1;
+ pthread_t udp_thread;
+
+ if (!nfsx || !nfsx->private)
+ return NULL;
+
+ nfs = (struct nfs_state *)nfsx->private;
+
+ gf_log (GF_MNT, GF_LOG_DEBUG, "Initing Mount v3 state");
+ mstate = (struct mount3_state *)nfs->mstate;
+ if (!mstate) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Mount v3 state init failed");
+ goto err;
+ }
+
+ mnt3prog.private = mstate;
+ options = dict_new ();
+
+ ret = gf_asprintf (&portstr, "%d", GF_MOUNTV3_PORT);
+ if (ret == -1)
+ goto err;
+
+ ret = dict_set_dynstr (options, "transport.socket.listen-port", portstr);
+ if (ret == -1)
+ goto err;
+ ret = dict_set_str (options, "transport-type", "socket");
+ if (ret == -1) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "dict_set_str error");
+ goto err;
+ }
+
+ if (nfs->allow_insecure) {
+ ret = dict_set_str (options, "rpc-auth-allow-insecure", "on");
+ if (ret == -1) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "dict_set_str error");
+ goto err;
+ }
+ ret = dict_set_str (options, "rpc-auth.ports.insecure", "on");
+ if (ret == -1) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "dict_set_str error");
+ goto err;
+ }
+ }
+
+ ret= rpcsvc_create_listeners (nfs->rpcsvc, options, nfsx->name);
+ if (ret == -1) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "Unable to create listeners");
+ dict_unref (options);
+ goto err;
+ }
+
+ if (nfs->mount_udp) {
+ pthread_create (&udp_thread, NULL, mount3udp_thread, NULL);
+ }
+ return &mnt3prog;
+err:
+ return NULL;
+}
+
+
+rpcsvc_actor_t mnt1svc_actors[MOUNT1_PROC_COUNT] = {
+ {"NULL", MOUNT1_NULL, mnt3svc_null, NULL, 0, DRC_NA},
+ {"MNT", MOUNT1_MNT, NULL, NULL, 0, DRC_NA },
+ {"DUMP", MOUNT1_DUMP, mnt3svc_dump, NULL, 0, DRC_NA},
+ {"UMNT", MOUNT1_UMNT, mnt3svc_umnt, NULL, 0, DRC_NA},
+ {"UMNTALL", MOUNT1_UMNTALL, NULL, NULL, 0, DRC_NA},
+ {"EXPORT", MOUNT1_EXPORT, mnt3svc_export, NULL, 0, DRC_NA}
+};
+
+rpcsvc_program_t mnt1prog = {
+ .progname = "MOUNT1",
+ .prognum = MOUNT_PROGRAM,
+ .progver = MOUNT_V1,
+ .progport = GF_MOUNTV1_PORT,
+ .actors = mnt1svc_actors,
+ .numactors = MOUNT1_PROC_COUNT,
+ .min_auth = AUTH_NULL,
+ .synctask = _gf_true,
+};
+
+
+rpcsvc_program_t *
+mnt1svc_init (xlator_t *nfsx)
+{
+ struct mount3_state *mstate = NULL;
+ struct nfs_state *nfs = NULL;
+ dict_t *options = NULL;
+ char *portstr = NULL;
+ int ret = -1;
+
+ if (!nfsx || !nfsx->private)
+ return NULL;
+
+ nfs = (struct nfs_state *)nfsx->private;
+
+ gf_log (GF_MNT, GF_LOG_DEBUG, "Initing Mount v1 state");
+ mstate = (struct mount3_state *)nfs->mstate;
+ if (!mstate) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Mount v3 state init failed");
+ goto err;
+ }
+
+ mnt1prog.private = mstate;
+
+ options = dict_new ();
+
+ ret = gf_asprintf (&portstr, "%d", GF_MOUNTV1_PORT);
+ if (ret == -1)
+ goto err;
+
+ ret = dict_set_dynstr (options, "transport.socket.listen-port", portstr);
+ if (ret == -1)
+ goto err;
+ ret = dict_set_str (options, "transport-type", "socket");
+ if (ret == -1) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "dict_set_str error");
+ goto err;
+ }
+
+ if (nfs->allow_insecure) {
+ ret = dict_set_str (options, "rpc-auth-allow-insecure", "on");
+ if (ret == -1) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "dict_set_str error");
+ goto err;
+ }
+ ret = dict_set_str (options, "rpc-auth.ports.insecure", "on");
+ if (ret == -1) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "dict_set_str error");
+ goto err;
+ }
+ }
+
+ ret = rpcsvc_create_listeners (nfs->rpcsvc, options, nfsx->name);
+ if (ret == -1) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "Unable to create listeners");
+ dict_unref (options);
+ goto err;
+ }
+
+ return &mnt1prog;
+err:
+ return NULL;
+}
+
+int
+mount_reconfigure_state (xlator_t *nfsx, dict_t *options)
+{
+ int ret = -1;
+ struct nfs_state *nfs = NULL;
+ struct mount3_state *ms = NULL;
+ struct mnt3_export *exp = NULL;
+ struct mnt3_export *texp = NULL;
+
+ if ((!nfsx) || (!options))
+ return (-1);
+
+ nfs = (struct nfs_state *)nfs_state (nfsx);
+ if (!nfs)
+ return (-1);
+
+ ms = nfs->mstate;
+ if (!ms)
+ return (-1);
+
+ /*
+ * Free() up the old export list. mnt3_init_options() will
+ * rebuild the export list from scratch. Do it with locking
+ * to avoid unnecessary race conditions.
+ */
+ LOCK (&ms->mountlock);
+ list_for_each_entry_safe (exp, texp, &ms->exportlist, explist) {
+ list_del (&exp->explist);
+ mnt3_export_free (exp);
+ }
+ ret = mnt3_init_options (ms, options);
+ UNLOCK (&ms->mountlock);
+
+ if (ret < 0) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Options reconfigure failed");
+ return (-1);
+ }
+
+ return (0);
+}
diff --git a/xlators/nfs/server/src/mount3.h b/xlators/nfs/server/src/mount3.h
new file mode 100644
index 000000000..7fc16ed57
--- /dev/null
+++ b/xlators/nfs/server/src/mount3.h
@@ -0,0 +1,131 @@
+/*
+ Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _MOUNT3_H_
+#define _MOUNT3_H_
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "rpcsvc.h"
+#include "dict.h"
+#include "xlator.h"
+#include "iobuf.h"
+#include "nfs.h"
+#include "list.h"
+#include "xdr-nfs3.h"
+#include "locking.h"
+#include "nfs3-fh.h"
+#include "uuid.h"
+
+/* Registered with portmap */
+#define GF_MOUNTV3_PORT 38465
+#define GF_MOUNTV3_IOB (2 * GF_UNIT_KB)
+#define GF_MOUNTV3_IOBPOOL (GF_MOUNTV3_IOB * 50)
+
+#define GF_MOUNTV1_PORT 38466
+#define GF_MNT GF_NFS"-mount"
+
+extern rpcsvc_program_t *
+mnt3svc_init (xlator_t *nfsx);
+
+extern rpcsvc_program_t *
+mnt1svc_init (xlator_t *nfsx);
+
+extern int
+mount_init_state (xlator_t *nfsx);
+
+extern int
+mount_reconfigure_state (xlator_t *nfsx, dict_t *options);
+
+void
+mount_rewrite_rmtab (struct mount3_state *ms, char *new_rmtab);
+
+/* Data structure used to store the list of mounts points currently
+ * in use by NFS clients.
+ */
+struct mountentry {
+ /* Links to mount3_state->mountlist. */
+ struct list_head mlist;
+
+ /* The export name */
+ char exname[MNTPATHLEN];
+ char hostname[MNTPATHLEN];
+};
+
+#define MNT3_EXPTYPE_VOLUME 1
+#define MNT3_EXPTYPE_DIR 2
+
+/* Structure to hold export-dir AUTH parameter */
+struct host_auth_spec {
+ char *host_addr; /* Allowed IP or host name */
+ int routeprefix; /* Routing prefix */
+ struct host_auth_spec *next; /* Pointer to next AUTH struct */
+};
+
+struct mnt3_export {
+ struct list_head explist;
+
+ /* The string that may contain either the volume name if the full volume
+ * is exported or the subdirectory in the volume.
+ */
+ char *expname;
+ /*
+ * IP address, hostname or subnets who are allowed to connect to expname
+ * subvolume or subdirectory
+ */
+ struct host_auth_spec* hostspec;
+ xlator_t *vol;
+ int exptype;
+
+ /* Extracted from nfs volume options if nfs.dynamicvolumes is on.
+ */
+ uuid_t volumeid;
+};
+
+struct mount3_state {
+ xlator_t *nfsx;
+
+ /* The buffers for all network IO are got from this pool. */
+ struct iobuf_pool *iobpool;
+
+ /* List of exports, can be volumes or directories in those volumes. */
+ struct list_head exportlist;
+
+ /* List of current mount points over all the exports from this
+ * server.
+ */
+ struct list_head mountlist;
+
+ /* Used to protect the mountlist. */
+ gf_lock_t mountlock;
+
+ /* Set to 0 if exporting full volumes is disabled. On by default. */
+ gf_boolean_t export_volumes;
+ gf_boolean_t export_dirs;
+};
+
+#define gf_mnt3_export_dirs(mst) ((mst)->export_dirs)
+
+struct mount3_resolve_state {
+ struct mnt3_export *exp;
+ struct mount3_state *mstate;
+ rpcsvc_request_t *req;
+
+ char remainingdir[MNTPATHLEN];
+ loc_t resolveloc;
+ struct nfs3_fh parentfh;
+};
+
+typedef struct mount3_resolve_state mnt3_resolve_t;
+
+#endif
diff --git a/xlators/nfs/server/src/mount3udp_svc.c b/xlators/nfs/server/src/mount3udp_svc.c
new file mode 100644
index 000000000..fb59e282c
--- /dev/null
+++ b/xlators/nfs/server/src/mount3udp_svc.c
@@ -0,0 +1,189 @@
+/*
+ Copyright (c) 2012 Gluster, Inc. <http://www.gluster.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#include "xdr-nfs3.h"
+#include "logging.h"
+#include "mem-pool.h"
+#include "nfs-mem-types.h"
+#include "mount3.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <rpc/pmap_clnt.h>
+#include <string.h>
+#include <memory.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+
+
+extern struct nfs3_fh* nfs3_rootfh (char *dp);
+extern mountres3 mnt3svc_set_mountres3 (mountstat3 stat, struct nfs3_fh *fh,
+ int *authflavor, u_int aflen);
+extern int
+mount3udp_add_mountlist (char *host, dirpath *expname);
+
+extern int
+mount3udp_delete_mountlist (char *host, dirpath *expname);
+
+
+/* only this thread will use this, no locking needed */
+char mnthost[INET_ADDRSTRLEN+1];
+
+mountres3 *
+mountudpproc3_mnt_3_svc(dirpath **dpp, struct svc_req *req)
+{
+ struct mountres3 *res = NULL;
+ int *autharr = NULL;
+ struct nfs3_fh *fh = NULL;
+ char *tmp = NULL;
+
+ tmp = (char *)*dpp;
+ while (*tmp == '/')
+ tmp++;
+ fh = nfs3_rootfh (tmp);
+ if (fh == NULL) {
+ gf_log (GF_MNT, GF_LOG_DEBUG, "unable to get fh for %s", tmp);
+ goto err;
+ }
+
+ res = GF_CALLOC (1, sizeof(*res), gf_nfs_mt_mountres3);
+ if (res == NULL) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "unable to allocate memory");
+ goto err;
+ }
+ autharr = GF_CALLOC (1, sizeof(*autharr), gf_nfs_mt_int);
+ if (autharr == NULL) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "unable to allocate memory");
+ goto err;
+ }
+ autharr[0] = AUTH_UNIX;
+ *res = mnt3svc_set_mountres3 (MNT3_OK, fh, autharr, 1);
+ mount3udp_add_mountlist (mnthost, *dpp);
+ return res;
+
+ err:
+ GF_FREE (fh);
+ GF_FREE (res);
+ GF_FREE (autharr);
+ return NULL;
+}
+
+mountstat3 *
+mountudpproc3_umnt_3_svc(dirpath **dp, struct svc_req *req)
+{
+ mountstat3 *stat = NULL;
+
+ stat = GF_CALLOC (1, sizeof(mountstat3), gf_nfs_mt_mountstat3);
+ if (stat == NULL) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "unable to allocate memory");
+ return NULL;
+ }
+ *stat = MNT3_OK;
+ mount3udp_delete_mountlist (mnthost, *dp);
+ return stat;
+}
+
+static void
+mountudp_program_3(struct svc_req *rqstp, register SVCXPRT *transp)
+{
+ union {
+ dirpath mountudpproc3_mnt_3_arg;
+ } argument;
+ char *result = NULL;
+ xdrproc_t _xdr_argument = NULL, _xdr_result = NULL;
+ char *(*local)(char *, struct svc_req *) = NULL;
+ mountres3 *res = NULL;
+ struct sockaddr_in *sin = NULL;
+
+ sin = svc_getcaller (transp);
+ inet_ntop (AF_INET, &sin->sin_addr, mnthost, INET_ADDRSTRLEN+1);
+
+ switch (rqstp->rq_proc) {
+ case NULLPROC:
+ (void) svc_sendreply (transp, (xdrproc_t) xdr_void,
+ (char *)NULL);
+ return;
+
+ case MOUNT3_MNT:
+ _xdr_argument = (xdrproc_t) xdr_dirpath;
+ _xdr_result = (xdrproc_t) xdr_mountres3;
+ local = (char *(*)(char *,
+ struct svc_req *)) mountudpproc3_mnt_3_svc;
+ break;
+
+ case MOUNT3_UMNT:
+ _xdr_argument = (xdrproc_t) xdr_dirpath;
+ _xdr_result = (xdrproc_t) xdr_mountstat3;
+ local = (char *(*)(char *,
+ struct svc_req *)) mountudpproc3_umnt_3_svc;
+ break;
+
+ default:
+ svcerr_noproc (transp);
+ return;
+ }
+ memset ((char *)&argument, 0, sizeof (argument));
+ if (!svc_getargs (transp, (xdrproc_t) _xdr_argument,
+ (caddr_t) &argument)) {
+ svcerr_decode (transp);
+ return;
+ }
+ result = (*local)((char *)&argument, rqstp);
+ if (result == NULL) {
+ gf_log (GF_MNT, GF_LOG_DEBUG, "PROC returned error");
+ svcerr_systemerr (transp);
+ }
+ if (result != NULL && !svc_sendreply(transp, (xdrproc_t) _xdr_result,
+ result)) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "svc_sendreply returned error");
+ svcerr_systemerr (transp);
+ }
+ if (!svc_freeargs (transp, (xdrproc_t) _xdr_argument,
+ (caddr_t) &argument)) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "unable to free arguments");
+ }
+ if (result == NULL)
+ return;
+ /* free the result */
+ switch (rqstp->rq_proc) {
+ case MOUNT3_MNT:
+ res = (mountres3 *) result;
+ GF_FREE (res->mountres3_u.mountinfo.fhandle.fhandle3_val);
+ GF_FREE (res->mountres3_u.mountinfo.auth_flavors.auth_flavors_val);
+ GF_FREE (res);
+ break;
+
+ case MOUNT3_UMNT:
+ GF_FREE (result);
+ break;
+ }
+ return;
+}
+
+void *
+mount3udp_thread (void *argv)
+{
+ register SVCXPRT *transp = NULL;
+
+ transp = svcudp_create(RPC_ANYSOCK);
+ if (transp == NULL) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "svcudp_create error");
+ return NULL;
+ }
+ if (!svc_register(transp, MOUNT_PROGRAM, MOUNT_V3,
+ mountudp_program_3, IPPROTO_UDP)) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "svc_register error");
+ return NULL;
+ }
+
+ svc_run ();
+ gf_log (GF_MNT, GF_LOG_ERROR, "svc_run returned");
+ return NULL;
+}
diff --git a/xlators/nfs/server/src/nfs-common.c b/xlators/nfs/server/src/nfs-common.c
new file mode 100644
index 000000000..f74396ee8
--- /dev/null
+++ b/xlators/nfs/server/src/nfs-common.c
@@ -0,0 +1,465 @@
+/*
+ Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "rpcsvc.h"
+#include "dict.h"
+#include "xlator.h"
+#include "xdr-nfs3.h"
+#include "msg-nfs3.h"
+#include "iobuf.h"
+#include "nfs-common.h"
+#include "nfs-fops.h"
+#include "nfs-mem-types.h"
+#include "rpcsvc.h"
+#include "iatt.h"
+
+#include <libgen.h>
+
+xlator_t *
+nfs_xlid_to_xlator (xlator_list_t *cl, uint8_t xlid)
+{
+ xlator_t *xl = NULL;
+ uint8_t id = 0;
+
+ while (id <= xlid) {
+ if (!cl) {
+ xl = NULL;
+ break;
+ }
+
+ xl = cl->xlator;
+ cl = cl->next;
+ id++;
+ }
+
+ return xl;
+}
+
+
+xlator_t *
+nfs_path_to_xlator (xlator_list_t *cl, char *path)
+{
+ return NULL;
+}
+
+
+uint16_t
+nfs_xlator_to_xlid (xlator_list_t *cl, xlator_t *xl)
+{
+ uint16_t xlid = 0;
+
+ if ((!cl) || (!xl))
+ return 0;
+
+ while (cl) {
+ if (xl == cl->xlator)
+ break;
+ cl = cl->next;
+ ++xlid;
+ }
+
+ return xlid;
+}
+
+
+xlator_t *
+nfs_mntpath_to_xlator (xlator_list_t *cl, char *path)
+{
+ char volname[MNTPATHLEN];
+ char *volptr = NULL;
+ size_t pathlen;
+ xlator_t *targetxl = NULL;
+
+ if ((!cl) || (!path))
+ return NULL;
+
+ strncpy (volname, path, MNTPATHLEN);
+ pathlen = strlen (volname);
+ gf_log (GF_NFS, GF_LOG_TRACE, "Subvolume search: %s", path);
+ if (volname[0] == '/')
+ volptr = &volname[1];
+ else
+ volptr = &volname[0];
+
+ if (pathlen && volname[pathlen - 1] == '/')
+ volname[pathlen - 1] = '\0';
+
+ while (cl) {
+ if (strcmp (volptr, cl->xlator->name) == 0) {
+ targetxl = cl->xlator;
+ break;
+ }
+
+ cl = cl->next;
+ }
+
+ return targetxl;
+
+}
+
+
+/* Returns 1 if the stat seems to be filled with zeroes. */
+int
+nfs_zero_filled_stat (struct iatt *buf)
+{
+ if (!buf)
+ return 1;
+
+ /* Do not use st_dev because it is transformed to store the xlator id
+ * in place of the device number. Do not use st_ino because by this time
+ * we've already mapped the root ino to 1 so it is not guaranteed to be
+ * 0.
+ */
+ if ((buf->ia_nlink == 0) && (buf->ia_ctime == 0))
+ return 1;
+
+ return 0;
+}
+
+
+void
+nfs_loc_wipe (loc_t *loc)
+{
+ loc_wipe (loc);
+}
+
+
+int
+nfs_loc_copy (loc_t *dst, loc_t *src)
+{
+ int ret = -1;
+
+ ret = loc_copy (dst, src);
+
+ return ret;
+}
+
+
+int
+nfs_loc_fill (loc_t *loc, inode_t *inode, inode_t *parent, char *path)
+{
+ int ret = -EFAULT;
+
+ if (!loc)
+ return ret;
+
+ if (inode) {
+ loc->inode = inode_ref (inode);
+ if (!uuid_is_null (inode->gfid))
+ uuid_copy (loc->gfid, inode->gfid);
+ }
+
+ if (parent)
+ loc->parent = inode_ref (parent);
+
+ if (path) {
+ loc->path = gf_strdup (path);
+ if (!loc->path) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "strdup failed");
+ goto loc_wipe;
+ }
+ loc->name = strrchr (loc->path, '/');
+ if (loc->name)
+ loc->name++;
+ }
+
+ ret = 0;
+loc_wipe:
+ if (ret < 0)
+ nfs_loc_wipe (loc);
+
+ return ret;
+}
+
+
+int
+nfs_inode_loc_fill (inode_t *inode, loc_t *loc, int how)
+{
+ char *resolvedpath = NULL;
+ inode_t *parent = NULL;
+ int ret = -EFAULT;
+
+ if ((!inode) || (!loc))
+ return ret;
+
+ /* If gfid is not null, then the inode is already linked to
+ * the inode table, and not a newly created one. For newly
+ * created inode, inode_path returns null gfid as the path.
+ */
+ if (!uuid_is_null (inode->gfid)) {
+ ret = inode_path (inode, NULL, &resolvedpath);
+ if (ret < 0) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "path resolution failed "
+ "%s", resolvedpath);
+ goto err;
+ }
+ }
+
+ if (resolvedpath == NULL) {
+ char tmp_path[GFID_STR_PFX_LEN + 1] = {0,};
+ snprintf (tmp_path, sizeof (tmp_path), "<gfid:%s>",
+ uuid_utoa (loc->gfid));
+ resolvedpath = gf_strdup (tmp_path);
+ } else {
+ parent = inode_parent (inode, loc->pargfid, NULL);
+ }
+
+ ret = nfs_loc_fill (loc, inode, parent, resolvedpath);
+ if (ret < 0) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "loc fill resolution failed %s",
+ resolvedpath);
+ goto err;
+ }
+
+ ret = 0;
+err:
+ if (parent)
+ inode_unref (parent);
+
+ GF_FREE (resolvedpath);
+
+ return ret;
+}
+
+int
+nfs_gfid_loc_fill (inode_table_t *itable, uuid_t gfid, loc_t *loc, int how)
+{
+ int ret = -EFAULT;
+ inode_t *inode = NULL;
+
+ if (!loc)
+ return ret;
+
+ inode = inode_find (itable, gfid);
+ if (!inode) {
+ gf_log (GF_NFS, GF_LOG_TRACE, "Inode not found in itable, will try to create one.");
+ if (how == NFS_RESOLVE_CREATE) {
+ gf_log (GF_NFS, GF_LOG_TRACE, "Inode needs to be created.");
+ inode = inode_new (itable);
+ if (!inode) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "Failed to "
+ "allocate memory");
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ } else {
+ gf_log (GF_NFS, GF_LOG_ERROR, "Inode not found in itable and no creation was requested.");
+ ret = -ENOENT;
+ goto err;
+ }
+ } else {
+ gf_log (GF_NFS, GF_LOG_TRACE, "Inode was found in the itable.");
+ }
+
+ uuid_copy (loc->gfid, gfid);
+
+ ret = nfs_inode_loc_fill (inode, loc, how);
+ if (ret < 0) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "Inode loc filling failed.: %s", strerror (-ret));
+ goto err;
+ }
+
+err:
+ if (inode)
+ inode_unref (inode);
+ return ret;
+}
+
+
+int
+nfs_root_loc_fill (inode_table_t *itable, loc_t *loc)
+{
+ uuid_t rootgfid = {0, };
+
+ rootgfid[15] = 1;
+ return nfs_gfid_loc_fill (itable, rootgfid, loc, NFS_RESOLVE_EXIST);
+}
+
+
+
+int
+nfs_parent_inode_loc_fill (inode_t *parent, inode_t *entryinode, char *entry,
+ loc_t *loc)
+{
+ int ret = -EFAULT;
+ char *path = NULL;
+
+ if ((!parent) || (!entry) || (!loc) || (!entryinode))
+ return ret;
+
+ ret = inode_path (parent, entry, &path);
+ if (ret < 0) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "path resolution failed %s",
+ path);
+ goto err;
+ }
+
+ ret = nfs_loc_fill (loc, entryinode, parent, path);
+ GF_FREE (path);
+err:
+ return ret;
+}
+
+
+/* Returns -1 if parent is not available, return -2 if the entry is not
+ * available. In case the return is going to be -2, and how = NFS_RESOLVE_CREATE
+ * it does however fill in the loc so that it can be used to perform a lookup
+ * fop for the entry.
+ * On other errors, return -3. 0 on success.
+ */
+int
+nfs_entry_loc_fill (inode_table_t *itable, uuid_t pargfid, char *entry,
+ loc_t *loc, int how)
+{
+ inode_t *parent = NULL;
+ inode_t *entryinode = NULL;
+ int ret = -3;
+ char *resolvedpath = NULL;
+ int pret = -3;
+
+ if ((!itable) || (!entry) || (!loc))
+ return ret;
+
+ parent = inode_find (itable, pargfid);
+
+ ret = -1;
+ /* Will need hard resolution now */
+ if (!parent)
+ goto err;
+
+ uuid_copy (loc->pargfid, pargfid);
+
+ ret = -2;
+ entryinode = inode_grep (itable, parent, entry);
+ if (!entryinode) {
+ if (how == NFS_RESOLVE_CREATE) {
+ /* Even though we'll create the inode and the loc for
+ * a missing inode, we still need to return -2 so
+ * that the caller can use the filled loc to call
+ * lookup.
+ */
+ entryinode = inode_new (itable);
+ /* Cannot change ret because that must
+ * continue to have -2.
+ */
+ pret = nfs_parent_inode_loc_fill (parent, entryinode,
+ entry, loc);
+ /* Only if parent loc fill fails, should we notify error
+ * through ret, otherwise, we still need to force a
+ * lookup by returning -2.
+ */
+ if (pret < 0)
+ ret = -3;
+ }
+ goto err;
+ }
+
+ ret = inode_path (parent, entry, &resolvedpath);
+ if (ret < 0) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "path resolution failed %s",
+ resolvedpath);
+ ret = -3;
+ goto err;
+ }
+
+ ret = nfs_loc_fill (loc, entryinode, parent, resolvedpath);
+ if (ret < 0) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "loc_fill failed %s",
+ resolvedpath);
+ ret = -3;
+ }
+
+err:
+ if (parent)
+ inode_unref (parent);
+
+ if (entryinode)
+ inode_unref (entryinode);
+
+ GF_FREE (resolvedpath);
+
+ return ret;
+}
+
+
+uint32_t
+nfs_hash_gfid (uuid_t gfid)
+{
+ uint32_t hash = 0;
+ uint64_t msb64 = 0;
+ uint64_t lsb64 = 0;
+ uint32_t a1 = 0;
+ uint32_t a2 = 0;
+ uint32_t a3 = 0;
+ uint32_t a4 = 0;
+ uint32_t b1 = 0;
+ uint32_t b2 = 0;
+
+ if (__is_root_gfid (gfid))
+ return 0x1;
+
+ memcpy (&msb64, &gfid[8], 8);
+ memcpy (&lsb64, &gfid[0], 8);
+
+ a1 = (msb64 << 32);
+ a2 = (msb64 >> 32);
+ a3 = (lsb64 << 32);
+ a4 = (lsb64 >> 32);
+
+ b1 = a1 ^ a4;
+ b2 = a2 ^ a3;
+
+ hash = b1 ^ b2;
+
+ return hash;
+}
+
+
+void
+nfs_fix_generation (xlator_t *this, inode_t *inode)
+{
+ uint64_t raw_ctx = 0;
+ struct nfs_inode_ctx *ictx = NULL;
+ struct nfs_state *priv = NULL;
+ int ret = -1;
+
+ if (!inode) {
+ return;
+ }
+ priv = this->private;
+
+ if (inode_ctx_get(inode,this,&raw_ctx) == 0) {
+ ictx = (struct nfs_inode_ctx *)raw_ctx;
+ ictx->generation = priv->generation;
+ }
+ else {
+ ictx = GF_CALLOC (1, sizeof (struct nfs_inode_ctx),
+ gf_nfs_mt_inode_ctx);
+ if (!ictx) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not allocate nfs inode ctx");
+ return;
+ }
+ INIT_LIST_HEAD(&ictx->shares);
+ ictx->generation = priv->generation;
+ ret = inode_ctx_put (inode, this, (uint64_t)ictx);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not store nfs inode ctx");
+ return;
+ }
+ }
+}
diff --git a/xlators/nfs/server/src/nfs-common.h b/xlators/nfs/server/src/nfs-common.h
new file mode 100644
index 000000000..2e97f1563
--- /dev/null
+++ b/xlators/nfs/server/src/nfs-common.h
@@ -0,0 +1,81 @@
+/*
+ Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _NFS_COMMON_H_
+#define _NFS_COMMON_H_
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <unistd.h>
+
+#include "xlator.h"
+#include "rpcsvc.h"
+#include "iatt.h"
+#include "uuid.h"
+
+//NFS_PATH_MAX hard-coded to 4096 as a work around for bug 2476.
+//nfs server crashes when path received is longer than PATH_MAX
+#define NFS_PATH_MAX 4096
+#define NFS_NAME_MAX NAME_MAX
+
+#define NFS_DEFAULT_CREATE_MODE 0600
+
+extern xlator_t *
+nfs_xlid_to_xlator (xlator_list_t *cl, uint8_t xlid);
+
+extern uint16_t
+nfs_xlator_to_xlid (xlator_list_t *cl, xlator_t *xl);
+
+extern xlator_t *
+nfs_path_to_xlator (xlator_list_t *cl, char *path);
+
+extern xlator_t *
+nfs_mntpath_to_xlator (xlator_list_t *cl, char *path);
+
+extern int
+nfs_zero_filled_stat (struct iatt *buf);
+
+extern void
+nfs_loc_wipe (loc_t *loc);
+
+extern int
+nfs_loc_copy (loc_t *dst, loc_t *src);
+
+extern int
+nfs_loc_fill (loc_t *loc, inode_t *inode, inode_t *parent, char *path);
+
+#define NFS_RESOLVE_EXIST 1
+#define NFS_RESOLVE_CREATE 2
+
+extern int
+nfs_inode_loc_fill (inode_t *inode, loc_t *loc, int how);
+
+extern int
+nfs_ino_loc_fill (inode_table_t *itable, uuid_t gfid, loc_t *l);
+
+extern int
+nfs_entry_loc_fill (inode_table_t *itable, uuid_t pargfid, char *entry,
+ loc_t *loc, int how);
+
+extern int
+nfs_root_loc_fill (inode_table_t *itable, loc_t *loc);
+
+extern uint32_t
+nfs_hash_gfid (uuid_t gfid);
+
+extern int
+nfs_gfid_loc_fill (inode_table_t *itable, uuid_t gfid, loc_t *loc, int how);
+
+void
+nfs_fix_generation (xlator_t *this, inode_t *inode);
+#endif
diff --git a/xlators/nfs/server/src/nfs-fops.c b/xlators/nfs/server/src/nfs-fops.c
new file mode 100644
index 000000000..236b80c76
--- /dev/null
+++ b/xlators/nfs/server/src/nfs-fops.c
@@ -0,0 +1,1661 @@
+/*
+ Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <grp.h>
+#include <pwd.h>
+
+#include "dict.h"
+#include "xlator.h"
+#include "iobuf.h"
+#include "call-stub.h"
+#include "stack.h"
+#include "nfs.h"
+#include "nfs-fops.h"
+#include "inode.h"
+#include "nfs-common.h"
+#include "nfs3-helpers.h"
+#include "nfs-mem-types.h"
+#include <libgen.h>
+#include <semaphore.h>
+
+void
+nfs_fix_groups (xlator_t *this, call_stack_t *root)
+{
+ struct passwd mypw;
+ char mystrs[1024];
+ struct passwd *result;
+ gid_t mygroups[GF_MAX_AUX_GROUPS];
+ int ngroups;
+ int i;
+ struct nfs_state *priv = this->private;
+ const gid_list_t *agl;
+ gid_list_t gl;
+
+ if (!priv->server_aux_gids) {
+ return;
+ }
+
+ agl = gid_cache_lookup(&priv->gid_cache, root->uid);
+ if (agl) {
+ for (ngroups = 0; ngroups < agl->gl_count; ngroups++)
+ root->groups[ngroups] = agl->gl_list[ngroups];
+ root->ngrps = ngroups;
+ gid_cache_release(&priv->gid_cache, agl);
+ return;
+ }
+
+ /* No cached list found. */
+ if (getpwuid_r(root->uid,&mypw,mystrs,sizeof(mystrs),&result) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "getpwuid_r(%u) failed", root->uid);
+ return;
+ }
+
+ if (!result) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "getpwuid_r(%u) found nothing", root->uid);
+ return;
+ }
+
+ gf_log (this->name, GF_LOG_TRACE, "mapped %u => %s",
+ root->uid, result->pw_name);
+
+ ngroups = GF_MAX_AUX_GROUPS;
+ if (getgrouplist(result->pw_name,root->gid,mygroups,&ngroups) == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not map %s to group list", result->pw_name);
+ return;
+ }
+
+ /* Add the group data to the cache. */
+ gl.gl_list = GF_CALLOC(ngroups, sizeof(gid_t), gf_nfs_mt_aux_gids);
+ if (gl.gl_list) {
+ /* It's not fatal if the alloc failed. */
+ gl.gl_id = root->uid;
+ gl.gl_count = ngroups;
+ memcpy(gl.gl_list, mygroups, sizeof(gid_t) * ngroups);
+ if (gid_cache_add(&priv->gid_cache, &gl) != 1)
+ GF_FREE(gl.gl_list);
+ }
+
+ /* Copy data to the frame. */
+ for (i = 0; i < ngroups; ++i) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "%s is in group %u", result->pw_name, mygroups[i]);
+ root->groups[i] = mygroups[i];
+ }
+ root->ngrps = ngroups;
+}
+
+struct nfs_fop_local *
+nfs_fop_local_init (xlator_t *nfsx)
+{
+ struct nfs_fop_local *l = NULL;
+
+ if (!nfsx)
+ return NULL;
+
+ l = mem_get (nfs_fop_mempool (nfsx));
+
+ memset (l, 0, sizeof (*l));
+ return l;
+}
+
+void
+nfs_fop_local_wipe (xlator_t *nfsx, struct nfs_fop_local *l)
+{
+ if ((!nfsx) || (!l))
+ return;
+
+ if (l->iobref)
+ iobref_unref (l->iobref);
+
+ if (l->parent)
+ inode_unref (l->parent);
+
+ if (l->inode)
+ inode_unref (l->inode);
+
+ if (l->newparent)
+ inode_unref (l->newparent);
+
+ if (l->dictgfid)
+ dict_unref (l->dictgfid);
+
+ mem_put (l);
+
+ return;
+}
+
+#define nfs_stack_destroy(nfl, fram) \
+ do { \
+ nfs_fop_local_wipe ((nfl)->nfsx, nfl); \
+ (fram)->local = NULL; \
+ STACK_DESTROY ((fram)->root); \
+ } while (0) \
+
+
+pthread_mutex_t ctr = PTHREAD_MUTEX_INITIALIZER;
+unsigned int cval = 1;
+
+
+int
+nfs_frame_getctr ()
+{
+ uint64_t val = 0;
+
+ pthread_mutex_lock (&ctr);
+ {
+ if (cval == 0)
+ cval = 1;
+ val = cval;
+ cval++;
+ }
+ pthread_mutex_unlock (&ctr);
+
+ return val;
+}
+
+
+call_frame_t *
+nfs_create_frame (xlator_t *xl, nfs_user_t *nfu)
+{
+ call_frame_t *frame = NULL;
+ int x = 0;
+ int y = 0;
+
+ if ((!xl) || (!nfu) || (nfu->ngrps > NFS_NGROUPS))
+ return NULL;
+
+ frame = create_frame (xl, (call_pool_t *)xl->ctx->pool);
+ if (!frame)
+ goto err;
+ if (call_stack_alloc_groups (frame->root, nfu->ngrps) != 0) {
+ STACK_DESTROY (frame->root);
+ frame = NULL;
+ goto err;
+ }
+
+ frame->root->pid = NFS_PID;
+ frame->root->uid = nfu->uid;
+ frame->root->gid = nfu->gids[NFS_PRIMGID_IDX];
+ frame->root->lk_owner = nfu->lk_owner;
+
+ if (nfu->ngrps != 1) {
+ frame->root->ngrps = nfu->ngrps - 1;
+
+ gf_log (GF_NFS, GF_LOG_TRACE,"uid: %d, gid %d, gids: %d",
+ frame->root->uid, frame->root->gid, frame->root->ngrps);
+ for(y = 0, x = 1; y < frame->root->ngrps; x++,y++) {
+ gf_log (GF_NFS, GF_LOG_TRACE, "gid: %d", nfu->gids[x]);
+ frame->root->groups[y] = nfu->gids[x];
+ }
+ }
+
+ /*
+ * It's tempting to do this *instead* of using nfu above, but we need
+ * to have those values in case nfs_fix_groups doesn't do anything.
+ */
+ nfs_fix_groups(xl,frame->root);
+
+err:
+ return frame;
+}
+
+#define nfs_fop_handle_frame_create(fram, xla, nfuser, retval, errlabel) \
+ do { \
+ fram = nfs_create_frame (xla, (nfuser)); \
+ if (!fram) { \
+ retval = (-ENOMEM); \
+ gf_log (GF_NFS, GF_LOG_ERROR,"Frame creation failed");\
+ goto errlabel; \
+ } \
+ } while (0) \
+
+/* Look into the inode and parent inode of a loc and save enough state
+ * for us to determine in the callback whether to funge the ino in the stat buf
+ * with 1 for the parent.
+ */
+#define nfs_fop_save_root_ino(locl, loc) \
+ do { \
+ if (((loc)->inode) && \
+ __is_root_gfid ((loc)->inode->gfid)) \
+ (locl)->rootinode = 1; \
+ else if (((loc)->parent) && \
+ __is_root_gfid ((loc)->parent->gfid)) \
+ (locl)->rootparentinode = 1; \
+ } while (0)
+
+/* Do the same for an fd */
+#define nfs_fop_save_root_fd_ino(locl, fdesc) \
+ do { \
+ if (__is_root_gfid ((fdesc)->inode->gfid)) \
+ (locl)->rootinode = 1; \
+ } while (0)
+
+
+/* Use the state saved by the previous macro to funge the ino in the appropriate
+ * structure.
+ */
+#define nfs_fop_restore_root_ino(locl, fopret, preattr, postattr, prepar, postpar) \
+ do { \
+ if (fopret == -1) \
+ break; \
+ if ((locl)->rootinode) { \
+ if ((preattr)) { \
+ ((struct iatt *)(preattr))->ia_ino = 1; \
+ ((struct iatt *)(preattr))->ia_dev = 0; \
+ } \
+ if ((postattr)) { \
+ ((struct iatt *)(postattr))->ia_ino = 1; \
+ ((struct iatt *)(postattr))->ia_dev = 0; \
+ } \
+ } else if ((locl)->rootparentinode) { \
+ if ((prepar)) { \
+ ((struct iatt *)(prepar))->ia_ino = 1; \
+ ((struct iatt *)(prepar))->ia_dev = 0; \
+ } \
+ if ((postpar)) { \
+ ((struct iatt *)(postpar))->ia_ino = 1; \
+ ((struct iatt *)(postpar))->ia_dev = 0; \
+ } \
+ } \
+ } while (0) \
+
+/* If the newly created, inode's parent is root, we'll need to funge the ino
+ * in the parent attr when we receive them in the callback.
+ */
+#define nfs_fop_newloc_save_root_ino(locl, newloc) \
+ do { \
+ if (((newloc)->inode) && \
+ __is_root_gfid ((newloc)->inode->gfid)) \
+ (locl)->newrootinode = 1; \
+ else if (((newloc)->parent) && \
+ __is_root_gfid ((newloc)->parent->gfid)) \
+ (locl)->newrootparentinode = 1; \
+ } while (0)
+
+#define nfs_fop_newloc_restore_root_ino(locl, fopret, preattr, postattr, prepar, postpar) \
+ do { \
+ if (fopret == -1) \
+ break; \
+ \
+ if ((locl)->newrootinode) { \
+ if ((preattr)) \
+ ((struct iatt *)(preattr))->ia_ino = 1; \
+ if ((postattr)) \
+ ((struct iatt *)(postattr))->ia_ino = 1; \
+ } else if ((locl)->newrootparentinode) { \
+ if ((prepar)) \
+ ((struct iatt *)(prepar))->ia_ino = 1; \
+ if ((postpar)) \
+ ((struct iatt *)(postpar))->ia_ino = 1; \
+ } \
+ } while (0) \
+
+dict_t *
+nfs_gfid_dict (inode_t *inode)
+{
+ uuid_t newgfid = {0, };
+ char *dyngfid = NULL;
+ dict_t *dictgfid = NULL;
+ int ret = -1;
+ uuid_t rootgfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
+
+ dyngfid = GF_CALLOC (1, sizeof (uuid_t), gf_common_mt_char);
+ uuid_generate (newgfid);
+
+ if (uuid_compare (inode->gfid, rootgfid) == 0)
+ memcpy (dyngfid, rootgfid, sizeof (uuid_t));
+ else
+ memcpy (dyngfid, newgfid, sizeof (uuid_t));
+
+ dictgfid = dict_new ();
+ if (!dictgfid) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "Failed to create gfid dict");
+ goto out;
+ }
+
+ ret = dict_set_bin (dictgfid, "gfid-req", dyngfid, sizeof (uuid_t));
+ if (ret < 0) {
+ dict_unref (dictgfid);
+ dictgfid = NULL;
+ }
+
+out:
+ return dictgfid;
+}
+
+#define nfs_fop_gfid_setup(nflcl, inode, retval, erlbl) \
+ do { \
+ if (nflcl) { \
+ (nflcl)->dictgfid = nfs_gfid_dict (inode); \
+ \
+ if (!((nflcl)->dictgfid)) { \
+ retval = -EFAULT; \
+ goto erlbl; \
+ } \
+ } \
+ } while (0) \
+
+/* Fops Layer Explained
+ * The fops layer has three types of functions. They can all be identified by
+ * their names. Here are the three patterns:
+ *
+ * nfs_fop_<fopname>
+ * This is the lowest level function that knows nothing about states and
+ * callbacks. At most this is required to create a frame and call the
+ * fop. The idea here is to provide a convenient way to call fops than
+ * directly use STACK_WINDs. If this type of interface is used, the caller's
+ * callback is responsible for doing the relevant GlusterFS state
+ * maintenance operations on the data returned in the callbacks.
+ *
+ * nfs_<fopname>
+ * Unlike the nfs_fop_<fopname> variety, this is the stateful type of fop, in
+ * that it silently performs all the relevant GlusterFS state maintenance
+ * operations on the data returned to the callbacks, leaving the caller's
+ * callback to just use the data returned for whatever it needs to do with that
+ * data, for eg. the nfs_lookup, will take care of looking up the inodes,
+ * revalidating them if needed and linking new inodes into the table, while
+ * the caller's callback, for eg, the NFSv3 LOOKUP callback can just use
+ * the stat bufs returned to create file handle, map the file handle into the
+ * fh cache and finally encode the fh and the stat bufs into a NFS reply.
+ *
+ */
+
+int32_t
+nfs_fop_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xattr, struct iatt *postparent)
+{
+ struct nfs_fop_local *local = NULL;
+ fop_lookup_cbk_t progcbk;
+
+ if (op_ret == 0) {
+ nfs_fix_generation(this,inode);
+ }
+
+ nfl_to_prog_data (local, progcbk, frame);
+ nfs_fop_restore_root_ino (local, op_ret, buf, NULL, NULL, postparent);
+ if (progcbk)
+ progcbk (frame, cookie, this, op_ret, op_errno, inode, buf,
+ xattr, postparent);
+
+ nfs_stack_destroy (local, frame);
+ return 0;
+}
+
+
+int
+nfs_fop_lookup (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *loc,
+ fop_lookup_cbk_t cbk, void *local)
+{
+ call_frame_t *frame = NULL;
+ int ret = -EFAULT;
+ struct nfs_fop_local *nfl = NULL;
+
+ if ((!xl) || (!loc) || (!nfu))
+ return ret;
+
+ gf_log (GF_NFS, GF_LOG_TRACE, "Lookup: %s", loc->path);
+ nfs_fop_handle_frame_create (frame, nfsx, nfu, ret, err);
+ nfs_fop_handle_local_init (frame, nfsx, nfl, cbk, local, ret, err);
+ nfs_fop_save_root_ino (nfl, loc);
+ nfs_fop_gfid_setup (nfl, loc->inode, ret, err);
+
+ STACK_WIND_COOKIE (frame, nfs_fop_lookup_cbk, xl, xl,
+ xl->fops->lookup, loc, nfl->dictgfid);
+
+ ret = 0;
+err:
+ if (ret < 0) {
+ if (frame)
+ nfs_stack_destroy (nfl, frame);
+ }
+
+ return ret;
+}
+
+int32_t
+nfs_fop_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ struct nfs_fop_local *nfl = NULL;
+ fop_access_cbk_t progcbk = NULL;
+
+ nfl_to_prog_data (nfl, progcbk, frame);
+ if (progcbk)
+ progcbk (frame, cookie, this, op_ret, op_errno, xdata);
+
+ nfs_stack_destroy (nfl, frame);
+ return 0;
+}
+
+int
+nfs_fop_access (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *loc,
+ int32_t accesstest, fop_access_cbk_t cbk, void *local)
+{
+ call_frame_t *frame = NULL;
+ int ret = -EFAULT;
+ struct nfs_fop_local *nfl = NULL;
+ uint32_t accessbits = 0;
+
+ if ((!xl) || (!loc) || (!nfu))
+ return ret;
+
+ gf_log (GF_NFS, GF_LOG_TRACE, "Access: %s", loc->path);
+ nfs_fop_handle_frame_create (frame, nfsx, nfu, ret, err);
+ nfs_fop_handle_local_init (frame, nfsx, nfl, cbk, local, ret, err);
+ nfs_fop_save_root_ino (nfl, loc);
+
+ accessbits = nfs3_request_to_accessbits (accesstest);
+ STACK_WIND_COOKIE (frame, nfs_fop_access_cbk, xl, xl, xl->fops->access,
+ loc, accessbits, NULL);
+ ret = 0;
+err:
+ if (ret < 0) {
+ if (frame)
+ nfs_stack_destroy (nfl, frame);
+ }
+
+ return ret;
+}
+
+int32_t
+nfs_fop_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ struct nfs_fop_local *nfl = NULL;
+ fop_stat_cbk_t progcbk = NULL;
+
+ nfl_to_prog_data (nfl, progcbk, frame);
+ nfs_fop_restore_root_ino (nfl, op_ret, buf, NULL, NULL, NULL);
+ if (progcbk)
+ progcbk (frame, cookie, this, op_ret, op_errno, buf, xdata);
+
+ nfs_stack_destroy (nfl, frame);
+ return 0;
+}
+
+
+int
+nfs_fop_stat (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *loc,
+ fop_stat_cbk_t cbk, void *local)
+{
+ call_frame_t *frame = NULL;
+ int ret = -EFAULT;
+ struct nfs_fop_local *nfl = NULL;
+
+ if ((!xl) || (!loc) || (!nfu))
+ return ret;
+
+ gf_log (GF_NFS, GF_LOG_TRACE, "Stat: %s", loc->path);
+ nfs_fop_handle_frame_create (frame, nfsx, nfu, ret, err);
+ nfs_fop_handle_local_init (frame, nfsx, nfl, cbk, local, ret, err);
+ nfs_fop_save_root_ino (nfl, loc);
+
+ STACK_WIND_COOKIE (frame, nfs_fop_stat_cbk, xl, xl, xl->fops->stat,
+ loc, NULL);
+ ret = 0;
+err:
+ if (ret < 0) {
+ if (frame)
+ nfs_stack_destroy (nfl, frame);
+ }
+
+ return ret;
+}
+
+
+int32_t
+nfs_fop_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ struct nfs_fop_local *nfl = NULL;
+ fop_fstat_cbk_t progcbk = NULL;
+
+ nfl_to_prog_data (nfl, progcbk, frame);
+ nfs_fop_restore_root_ino (nfl, op_ret, buf, NULL, NULL, NULL);
+ if (progcbk)
+ progcbk (frame, cookie, this, op_ret, op_errno, buf, xdata);
+
+ nfs_stack_destroy (nfl, frame);
+ return 0;
+}
+
+
+int
+nfs_fop_fstat (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *fd,
+ fop_fstat_cbk_t cbk, void *local)
+{
+ call_frame_t *frame = NULL;
+ int ret = -EFAULT;
+ struct nfs_fop_local *nfl = NULL;
+
+ if ((!nfsx) || (!xl) || (!fd) || (!nfu))
+ return ret;
+
+ gf_log (GF_NFS, GF_LOG_TRACE, "FStat");
+ nfs_fop_handle_frame_create (frame, nfsx, nfu, ret, err);
+ nfs_fop_handle_local_init (frame, nfsx, nfl, cbk, local, ret, err);
+ nfs_fop_save_root_fd_ino (nfl, fd);
+
+ STACK_WIND_COOKIE (frame, nfs_fop_fstat_cbk, xl, xl, xl->fops->fstat,
+ fd, NULL);
+
+ ret = 0;
+err:
+ if (ret < 0) {
+ if (frame)
+ nfs_stack_destroy (nfl, frame);
+ }
+
+ return ret;
+}
+
+
+int32_t
+nfs_fop_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ struct nfs_fop_local *nfl = NULL;
+ fop_opendir_cbk_t progcbk = NULL;
+
+ nfl_to_prog_data (nfl, progcbk, frame);
+ if (progcbk)
+ progcbk (frame, cookie, this, op_ret, op_errno, fd, xdata);
+ nfs_stack_destroy (nfl, frame);
+ return 0;
+}
+
+
+int
+nfs_fop_opendir (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ fd_t *dirfd, fop_opendir_cbk_t cbk, void *local)
+{
+ call_frame_t *frame = NULL;
+ int ret = -EFAULT;
+ struct nfs_fop_local *nfl = NULL;
+
+ if ((!nfsx) || (!xl) || (!pathloc) || (!dirfd) || (!nfu))
+ return ret;
+
+ gf_log (GF_NFS, GF_LOG_TRACE, "Opendir: %s", pathloc->path);
+ nfs_fop_handle_frame_create (frame, nfsx, nfu, ret, err);
+ nfs_fop_handle_local_init (frame, nfsx, nfl, cbk, local, ret, err);
+
+ STACK_WIND_COOKIE (frame, nfs_fop_opendir_cbk, xl, xl,
+ xl->fops->opendir, pathloc, dirfd, NULL);
+ ret = 0;
+
+err:
+ if (ret < 0) {
+ if (frame)
+ nfs_stack_destroy (nfl, frame);
+ }
+
+ return ret;
+}
+
+int
+nfs_fop_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ struct nfs_fop_local *nfl = NULL;
+ fop_flush_cbk_t progcbk = NULL;
+
+ nfl_to_prog_data (nfl, progcbk, frame);
+ if (progcbk)
+ progcbk (frame, cookie, this, op_ret, op_errno, xdata);
+
+ nfs_stack_destroy (nfl, frame);
+ return 0;
+}
+
+
+int
+nfs_fop_flush (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *fd,
+ fop_flush_cbk_t cbk, void *local)
+{
+ call_frame_t *frame = NULL;
+ int ret = -EFAULT;
+ struct nfs_fop_local *nfl = NULL;
+
+ if ((!nfsx) || (!xl) || (!fd) || (!nfu))
+ return ret;
+
+ nfs_fop_handle_frame_create (frame, nfsx, nfu, ret, err);
+ nfs_fop_handle_local_init (frame, nfsx, nfl, cbk, local, ret, err);
+
+ STACK_WIND_COOKIE (frame, nfs_fop_flush_cbk, xl, xl, xl->fops->flush,
+ fd, NULL);
+ ret = 0;
+err:
+ if (ret < 0) {
+ if (frame)
+ nfs_stack_destroy (nfl, frame);
+ }
+
+ return ret;
+}
+
+
+int32_t
+nfs_fop_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ struct nfs_fop_local *nfl = NULL;
+ fop_readdirp_cbk_t progcbk = NULL;
+
+ nfl_to_prog_data (nfl, progcbk, frame);
+ if (progcbk)
+ progcbk (frame, cookie, this, op_ret, op_errno, entries, xdata);
+
+ nfs_stack_destroy (nfl, frame);
+
+ return 0;
+}
+
+
+int
+nfs_fop_readdirp (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *dirfd,
+ size_t bufsize, off_t offset, fop_readdirp_cbk_t cbk,
+ void *local)
+{
+ call_frame_t *frame = NULL;
+ int ret = -EFAULT;
+ struct nfs_fop_local *nfl = NULL;
+
+ if ((!nfsx) || (!xl) || (!dirfd) || (!nfu))
+ return ret;
+
+ gf_log (GF_NFS, GF_LOG_TRACE, "readdir");
+ nfs_fop_handle_frame_create (frame, nfsx, nfu, ret, err);
+ nfs_fop_handle_local_init (frame, nfsx, nfl, cbk, local, ret, err);
+
+ STACK_WIND_COOKIE (frame, nfs_fop_readdirp_cbk, xl, xl,
+ xl->fops->readdirp, dirfd, bufsize, offset, 0);
+
+ ret = 0;
+err:
+ if (ret < 0) {
+ if (frame)
+ nfs_stack_destroy (nfl, frame);
+ }
+
+ return ret;
+}
+
+
+int32_t
+nfs_fop_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf,
+ dict_t *xdata)
+{
+
+ struct nfs_fop_local *nfl = NULL;
+ fop_statfs_cbk_t progcbk = NULL;
+
+ nfl_to_prog_data (nfl, progcbk, frame);
+ if (progcbk)
+ progcbk (frame, cookie, this, op_ret, op_errno, buf, xdata);
+
+ nfs_stack_destroy (nfl, frame);
+ return 0;
+}
+
+
+int
+nfs_fop_statfs (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ fop_statfs_cbk_t cbk, void *local)
+{
+ call_frame_t *frame = NULL;
+ int ret = -EFAULT;
+ struct nfs_fop_local *nfl = NULL;
+
+ if ((!nfsx) || (!xl) || (!pathloc) || (!nfu))
+ return ret;
+
+ gf_log (GF_NFS, GF_LOG_TRACE, "Statfs: %s", pathloc->path);
+ nfs_fop_handle_frame_create (frame, nfsx, nfu, ret, err);
+ nfs_fop_handle_local_init (frame, nfsx, nfl, cbk, local, ret, err);
+
+ STACK_WIND_COOKIE (frame, nfs_fop_statfs_cbk, xl, xl,
+ xl->fops->statfs, pathloc, NULL);
+ ret = 0;
+err:
+ if (ret < 0) {
+ if (frame)
+ nfs_stack_destroy (nfl, frame);
+ }
+
+ return ret;
+}
+
+
+int32_t
+nfs_fop_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ struct nfs_fop_local *nfl = NULL;
+ fop_create_cbk_t progcbk = NULL;
+
+ if (op_ret == 0) {
+ nfs_fix_generation(this,inode);
+ }
+
+ nfl_to_prog_data (nfl, progcbk, frame);
+ nfs_fop_restore_root_ino (nfl, op_ret, buf, NULL, preparent,
+ postparent);
+ if (progcbk)
+ progcbk (frame, cookie, this, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, NULL);
+
+ nfs_stack_destroy (nfl, frame);
+ return 0;
+}
+
+
+int
+nfs_fop_create (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ int flags, mode_t mode, fd_t *fd, fop_create_cbk_t cbk,
+ void *local)
+{
+ call_frame_t *frame = NULL;
+ int ret = -EFAULT;
+ struct nfs_fop_local *nfl = NULL;
+
+ if ((!nfsx) || (!xl) || (!pathloc) || (!nfu))
+ return ret;
+
+ gf_log (GF_NFS, GF_LOG_TRACE, "Create: %s", pathloc->path);
+ nfs_fop_handle_frame_create (frame, nfsx, nfu, ret, err);
+ nfs_fop_handle_local_init (frame, nfsx, nfl, cbk, local, ret, err);
+ nfs_fop_save_root_ino (nfl, pathloc);
+ nfs_fop_gfid_setup (nfl, pathloc->inode, ret, err);
+
+ STACK_WIND_COOKIE (frame, nfs_fop_create_cbk, xl, xl, xl->fops->create,
+ pathloc, flags, mode, 0, fd, nfl->dictgfid);
+
+ ret = 0;
+err:
+ if (ret < 0) {
+ if (frame)
+ nfs_stack_destroy (nfl, frame);
+ }
+
+ return ret;
+}
+
+
+int32_t
+nfs_fop_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ struct nfs_fop_local *nfl = NULL;
+ fop_setattr_cbk_t progcbk = NULL;
+
+ nfl_to_prog_data (nfl, progcbk, frame);
+ nfs_fop_restore_root_ino (nfl, op_ret, pre, post, NULL, NULL);
+ if (progcbk)
+ progcbk (frame, cookie, this, op_ret, op_errno, pre, post,
+ xdata);
+ nfs_stack_destroy (nfl, frame);
+ return 0;
+}
+
+
+
+int
+nfs_fop_setattr (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ struct iatt *buf, int32_t valid, fop_setattr_cbk_t cbk,
+ void *local)
+{
+ call_frame_t *frame = NULL;
+ int ret = -EFAULT;
+ struct nfs_fop_local *nfl = NULL;
+
+ if ((!nfsx) || (!xl) || (!pathloc) || (!nfu))
+ return ret;
+
+ gf_log (GF_NFS, GF_LOG_TRACE, "Setattr: %s", pathloc->path);
+ nfs_fop_handle_frame_create (frame, nfsx, nfu, ret, err);
+ nfs_fop_handle_local_init (frame, nfsx, nfl, cbk, local, ret, err);
+ nfs_fop_save_root_ino (nfl, pathloc);
+
+ STACK_WIND_COOKIE (frame, nfs_fop_setattr_cbk, xl, xl,
+ xl->fops->setattr, pathloc, buf, valid, NULL);
+ ret = 0;
+err:
+ if (ret < 0) {
+ if (frame)
+ nfs_stack_destroy (nfl, frame);
+ }
+
+ return ret;
+}
+
+
+int32_t
+nfs_fop_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ struct nfs_fop_local *nfl = NULL;
+ fop_mkdir_cbk_t progcbk = NULL;
+
+ if (op_ret == 0) {
+ nfs_fix_generation(this,inode);
+ }
+
+ nfl_to_prog_data (nfl, progcbk, frame);
+ nfs_fop_restore_root_ino (nfl, op_ret, buf, NULL,preparent, postparent);
+ if (progcbk)
+ progcbk (frame, cookie, this, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+ nfs_stack_destroy (nfl, frame);
+ return 0;
+}
+
+
+int
+nfs_fop_mkdir (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ mode_t mode, fop_mkdir_cbk_t cbk, void *local)
+{
+ call_frame_t *frame = NULL;
+ int ret = -EFAULT;
+ struct nfs_fop_local *nfl = NULL;
+
+ if ((!nfsx) || (!xl) || (!pathloc) || (!nfu))
+ return ret;
+
+ gf_log (GF_NFS, GF_LOG_TRACE, "Mkdir: %s", pathloc->path);
+ nfs_fop_handle_frame_create (frame, nfsx, nfu, ret, err);
+ nfs_fop_handle_local_init (frame, nfsx, nfl, cbk, local, ret, err);
+ nfs_fop_save_root_ino (nfl, pathloc);
+ nfs_fop_gfid_setup (nfl, pathloc->inode, ret, err);
+
+ STACK_WIND_COOKIE (frame, nfs_fop_mkdir_cbk, xl, xl, xl->fops->mkdir,
+ pathloc, mode, 0, nfl->dictgfid);
+ ret = 0;
+err:
+ if (ret < 0) {
+ if (frame)
+ nfs_stack_destroy (nfl, frame);
+ }
+
+ return ret;
+}
+
+
+int32_t
+nfs_fop_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ struct nfs_fop_local *nfl = NULL;
+ fop_symlink_cbk_t progcbk = NULL;
+
+ if (op_ret == 0) {
+ nfs_fix_generation(this,inode);
+ }
+
+ nfl_to_prog_data (nfl, progcbk, frame);
+ nfs_fop_restore_root_ino (nfl, op_ret,buf, NULL, preparent, postparent);
+ if (progcbk)
+ progcbk (frame, cookie, this, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+ nfs_stack_destroy (nfl, frame);
+ return 0;
+}
+
+int
+nfs_fop_symlink (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, char *target,
+ loc_t *pathloc, fop_symlink_cbk_t cbk, void *local)
+{
+ call_frame_t *frame = NULL;
+ int ret = -EFAULT;
+ struct nfs_fop_local *nfl = NULL;
+
+ if ((!nfsx) || (!xl) || (!pathloc) || (!target) || (!nfu))
+ return ret;
+
+ gf_log (GF_NFS, GF_LOG_TRACE, "Symlink: %s", pathloc->path);
+ nfs_fop_handle_frame_create (frame, nfsx, nfu, ret, err);
+ nfs_fop_handle_local_init (frame, nfsx, nfl, cbk, local, ret, err);
+ nfs_fop_save_root_ino (nfl, pathloc);
+ nfs_fop_gfid_setup (nfl, pathloc->inode, ret, err);
+
+ STACK_WIND_COOKIE (frame, nfs_fop_symlink_cbk, xl, xl,
+ xl->fops->symlink, target, pathloc,
+ 0, nfl->dictgfid);
+ ret = 0;
+err:
+ if (ret < 0) {
+ if (frame)
+ nfs_stack_destroy (nfl, frame);
+ }
+
+ return ret;
+}
+
+
+int32_t
+nfs_fop_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, const char *path,
+ struct iatt *buf, dict_t *xdata)
+{
+ struct nfs_fop_local *nfl = NULL;
+ fop_readlink_cbk_t progcbk = NULL;
+
+ nfl_to_prog_data (nfl, progcbk, frame);
+ nfs_fop_restore_root_ino (nfl, op_ret, buf, NULL, NULL, NULL);
+ if (progcbk)
+ progcbk (frame, cookie, this, op_ret, op_errno, path, buf,
+ xdata);
+ nfs_stack_destroy (nfl, frame);
+ return 0;
+}
+
+
+int
+nfs_fop_readlink (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ size_t size, fop_readlink_cbk_t cbk, void *local)
+{
+ call_frame_t *frame = NULL;
+ int ret = -EFAULT;
+ struct nfs_fop_local *nfl = NULL;
+
+ if ((!nfsx) || (!xl) || (!pathloc) || (!nfu))
+ return ret;
+
+ gf_log (GF_NFS, GF_LOG_TRACE, "Readlink: %s", pathloc->path);
+ nfs_fop_handle_frame_create (frame, nfsx, nfu, ret, err);
+ nfs_fop_handle_local_init (frame, nfsx, nfl, cbk, local, ret, err);
+ nfs_fop_save_root_ino (nfl, pathloc);
+
+ STACK_WIND_COOKIE (frame, nfs_fop_readlink_cbk, xl, xl,
+ xl->fops->readlink, pathloc, size, NULL);
+ ret = 0;
+err:
+ if (ret < 0) {
+ if (frame)
+ nfs_stack_destroy (nfl, frame);
+ }
+
+ return ret;
+}
+
+
+int32_t
+nfs_fop_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ struct nfs_fop_local *nfl = NULL;
+ fop_mknod_cbk_t progcbk = NULL;
+
+ if (op_ret == 0) {
+ nfs_fix_generation(this,inode);
+ }
+
+ nfl_to_prog_data (nfl, progcbk, frame);
+ nfs_fop_restore_root_ino (nfl, op_ret,buf, NULL, preparent, postparent);
+ if (progcbk)
+ progcbk (frame, cookie, this, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+ nfs_stack_destroy (nfl, frame);
+ return 0;
+}
+
+
+int
+nfs_fop_mknod (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ mode_t mode, dev_t dev, fop_mknod_cbk_t cbk, void *local)
+{
+ call_frame_t *frame = NULL;
+ int ret = -EFAULT;
+ struct nfs_fop_local *nfl = NULL;
+
+ if ((!nfsx) || (!xl) || (!pathloc) || (!nfu))
+ return ret;
+
+ gf_log (GF_NFS, GF_LOG_TRACE, "Mknod: %s", pathloc->path);
+ nfs_fop_handle_frame_create (frame, nfsx, nfu, ret, err);
+ nfs_fop_handle_local_init (frame, nfsx, nfl, cbk, local, ret, err);
+ nfs_fop_save_root_ino (nfl, pathloc);
+ nfs_fop_gfid_setup (nfl, pathloc->inode, ret, err);
+
+ STACK_WIND_COOKIE (frame, nfs_fop_mknod_cbk, xl, xl, xl->fops->mknod,
+ pathloc, mode, dev, 0, nfl->dictgfid);
+ ret = 0;
+err:
+ if (ret < 0) {
+ if (frame)
+ nfs_stack_destroy (nfl, frame);
+ }
+
+ return ret;
+}
+
+int32_t
+nfs_fop_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ struct nfs_fop_local *nfl = frame->local;
+ fop_rmdir_cbk_t progcbk = NULL;
+
+ nfl_to_prog_data (nfl, progcbk, frame);
+ nfs_fop_restore_root_ino (nfl, op_ret, NULL, NULL, preparent,
+ postparent);
+ if (progcbk)
+ progcbk (frame, cookie, this, op_ret, op_errno, preparent,
+ postparent, NULL);
+ nfs_stack_destroy (nfl, frame);
+ return 0;
+}
+
+
+
+int
+nfs_fop_rmdir (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ fop_rmdir_cbk_t cbk, void *local)
+{
+ call_frame_t *frame = NULL;
+ int ret = -EFAULT;
+ struct nfs_fop_local *nfl = NULL;
+
+ if ((!nfsx) || (!xl) || (!pathloc) || (!nfu))
+ return ret;
+
+ gf_log (GF_NFS, GF_LOG_TRACE, "Rmdir: %s", pathloc->path);
+ nfs_fop_handle_frame_create (frame, nfsx, nfu, ret, err);
+ nfs_fop_handle_local_init (frame, nfsx, nfl, cbk, local, ret, err);
+ nfs_fop_save_root_ino (nfl, pathloc);
+
+ STACK_WIND_COOKIE (frame, nfs_fop_rmdir_cbk, xl, xl, xl->fops->rmdir,
+ pathloc, 0, NULL);
+ ret = 0;
+err:
+ if (ret < 0) {
+ if (frame)
+ nfs_stack_destroy (nfl, frame);
+ }
+
+ return ret;
+
+}
+
+
+int32_t
+nfs_fop_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ struct nfs_fop_local *nfl = frame->local;
+ fop_unlink_cbk_t progcbk = NULL;
+
+ nfl_to_prog_data (nfl, progcbk, frame);
+ nfs_fop_restore_root_ino (nfl, op_ret, NULL, NULL, preparent,
+ postparent);
+ if (progcbk)
+ progcbk (frame, cookie, this, op_ret, op_errno, preparent,
+ postparent, xdata);
+ nfs_stack_destroy (nfl, frame);
+ return 0;
+}
+
+
+int
+nfs_fop_unlink (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ fop_unlink_cbk_t cbk, void *local)
+{
+ call_frame_t *frame = NULL;
+ int ret = -EFAULT;
+ struct nfs_fop_local *nfl = NULL;
+
+ if ((!nfsx) || (!xl) || (!pathloc) || (!nfu))
+ return ret;
+
+ gf_log (GF_NFS, GF_LOG_TRACE, "Unlink: %s", pathloc->path);
+ nfs_fop_handle_frame_create (frame, nfsx, nfu, ret, err);
+ nfs_fop_handle_local_init (frame, nfsx, nfl, cbk, local, ret, err);
+ nfs_fop_save_root_ino (nfl, pathloc);
+
+ STACK_WIND_COOKIE (frame, nfs_fop_unlink_cbk, xl, xl,
+ xl->fops->unlink, pathloc, 0, NULL);
+ ret = 0;
+err:
+ if (ret < 0) {
+ if (frame)
+ nfs_stack_destroy (nfl, frame);
+ }
+
+ return ret;
+
+}
+
+
+int32_t
+nfs_fop_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ struct nfs_fop_local *nfl = NULL;
+ fop_link_cbk_t progcbk = NULL;
+
+ if (op_ret == 0) {
+ nfs_fix_generation(this,inode);
+ }
+
+ nfl_to_prog_data (nfl, progcbk, frame);
+ nfs_fop_restore_root_ino (nfl, op_ret, buf, NULL, preparent,
+ postparent);
+ if (progcbk)
+ progcbk (frame, cookie, this, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+
+ nfs_stack_destroy (nfl, frame);
+ return 0;
+}
+
+
+int
+nfs_fop_link (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *oldloc,
+ loc_t *newloc, fop_link_cbk_t cbk, void *local)
+{
+ call_frame_t *frame = NULL;
+ int ret = -EFAULT;
+ struct nfs_fop_local *nfl = NULL;
+
+ if ((!nfsx) || (!xl) || (!oldloc) || (!newloc) || (!nfu))
+ return ret;
+
+ gf_log (GF_NFS, GF_LOG_TRACE, "Link: %s -> %s", newloc->path,
+ oldloc->path);
+ nfs_fop_handle_frame_create (frame, nfsx, nfu, ret, err);
+ nfs_fop_handle_local_init (frame, nfsx, nfl, cbk, local, ret, err);
+ nfs_fop_save_root_ino (nfl, newloc);
+
+ STACK_WIND_COOKIE (frame, nfs_fop_link_cbk, xl, xl, xl->fops->link,
+ oldloc, newloc, NULL);
+ ret = 0;
+err:
+ if (ret < 0) {
+ if (frame)
+ nfs_stack_destroy (nfl, frame);
+ }
+
+ return ret;
+}
+
+
+int32_t
+nfs_fop_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
+{
+
+ struct nfs_fop_local *nfl = NULL;
+ fop_rename_cbk_t progcbk = NULL;
+
+ nfl_to_prog_data (nfl, progcbk, frame);
+ /* The preattr arg needs to be NULL instead of @buf because it is
+ * possible that the new parent is not root whereas the source dir
+ * could have been. That is handled in the next macro.
+ */
+ nfs_fop_restore_root_ino (nfl, op_ret, NULL, NULL, preoldparent,
+ postoldparent);
+ nfs_fop_newloc_restore_root_ino (nfl, op_ret, buf, NULL, prenewparent,
+ postnewparent);
+ if (progcbk)
+ progcbk (frame, cookie, this, op_ret, op_errno, buf,
+ preoldparent, postoldparent, prenewparent,
+ postnewparent, xdata);
+ nfs_stack_destroy (nfl, frame);
+ return 0;
+}
+
+
+int
+nfs_fop_rename (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *oldloc,
+ loc_t *newloc, fop_rename_cbk_t cbk, void *local)
+{
+ call_frame_t *frame = NULL;
+ int ret = -EFAULT;
+ struct nfs_fop_local *nfl = NULL;
+
+ if ((!nfsx) || (!xl) || (!oldloc) || (!newloc) || (!nfu))
+ return ret;
+
+ gf_log (GF_NFS, GF_LOG_TRACE, "Rename: %s -> %s", oldloc->path,
+ newloc->path);
+ nfs_fop_handle_frame_create (frame, nfsx, nfu, ret, err);
+ nfs_fop_handle_local_init (frame, nfsx, nfl, cbk, local, ret, err);
+ nfs_fop_save_root_ino (nfl, oldloc);
+ nfs_fop_newloc_save_root_ino (nfl, newloc);
+
+ STACK_WIND_COOKIE (frame, nfs_fop_rename_cbk, xl, xl,
+ xl->fops->rename, oldloc, newloc, NULL);
+ ret = 0;
+err:
+ if (ret < 0) {
+ if (frame)
+ nfs_stack_destroy (nfl, frame);
+ }
+
+ return ret;
+}
+
+
+int32_t
+nfs_fop_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ struct nfs_fop_local *nfl = NULL;
+ fop_open_cbk_t progcbk = NULL;
+
+ nfl_to_prog_data (nfl, progcbk, frame);
+ if (progcbk)
+ progcbk (frame, cookie, this, op_ret, op_errno, fd, xdata);
+ nfs_stack_destroy (nfl, frame);
+
+ return 0;
+}
+
+int
+nfs_fop_open (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *loc,
+ int32_t flags, fd_t *fd, fop_open_cbk_t cbk,
+ void *local)
+{
+ call_frame_t *frame = NULL;
+ int ret = -EFAULT;
+ struct nfs_fop_local *nfl = NULL;
+
+ if ((!nfsx) || (!xl) || (!loc) || (!fd) || (!nfu))
+ return ret;
+
+ gf_log (GF_NFS, GF_LOG_TRACE, "Open: %s", loc->path);
+ nfs_fop_handle_frame_create (frame, nfsx, nfu, ret, err);
+ nfs_fop_handle_local_init (frame, nfsx, nfl, cbk, local, ret, err);
+
+ STACK_WIND_COOKIE (frame, nfs_fop_open_cbk, xl, xl, xl->fops->open,
+ loc, flags, fd, NULL);
+ ret = 0;
+err:
+ if (ret < 0) {
+ if (frame)
+ nfs_stack_destroy (nfl, frame);
+ }
+
+ return ret;
+}
+
+
+int32_t
+nfs_fop_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ struct nfs_fop_local *nfl = NULL;
+ fop_writev_cbk_t progcbk = NULL;
+
+ nfl_to_prog_data (nfl, progcbk, frame);
+ nfs_fop_restore_root_ino (nfl, op_ret, prebuf, postbuf, NULL, NULL);
+ if (progcbk)
+ progcbk (frame, cookie, this, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+
+ nfs_stack_destroy (nfl, frame);
+
+ return 0;
+}
+
+
+int
+nfs_fop_write (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *fd,
+ struct iobref *srciobref, struct iovec *vector, int32_t count,
+ off_t offset, fop_writev_cbk_t cbk, void *local)
+{
+ call_frame_t *frame = NULL;
+ int ret = -EFAULT;
+ struct nfs_fop_local *nfl = NULL;
+
+ if ((!nfsx) || (!xl) || (!fd) || (!vector) || (!nfu) || (!srciobref))
+ return ret;
+
+ nfs_fop_handle_frame_create (frame, nfsx, nfu, ret, err);
+ nfs_fop_handle_local_init (frame, nfsx, nfl, cbk, local, ret, err);
+ nfs_fop_save_root_fd_ino (nfl, fd);
+/*
+ nfl->iobref = iobref_new ();
+ if (!nfl->iobref) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "iobref creation failed");
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ iobref_add (nfl->iobref, srciob);
+*/
+ STACK_WIND_COOKIE (frame, nfs_fop_writev_cbk, xl, xl,xl->fops->writev,
+ fd, vector, count, offset, fd->flags, srciobref, NULL);
+ ret = 0;
+err:
+ if (ret < 0) {
+ if (frame)
+ nfs_stack_destroy (nfl, frame);
+ }
+
+ return ret;
+}
+
+
+int32_t
+nfs_fop_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ struct nfs_fop_local *nfl = NULL;
+ fop_fsync_cbk_t progcbk = NULL;
+
+ nfl_to_prog_data (nfl, progcbk, frame);
+ nfs_fop_restore_root_ino (nfl, op_ret, prebuf, postbuf, NULL, NULL);
+ if (progcbk)
+ progcbk (frame, cookie, this, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+
+ nfs_stack_destroy (nfl, frame);
+ return 0;
+}
+
+
+
+int
+nfs_fop_fsync (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *fd,
+ int32_t datasync, fop_fsync_cbk_t cbk, void *local)
+{
+ call_frame_t *frame = NULL;
+ int ret = -EFAULT;
+ struct nfs_fop_local *nfl = NULL;
+
+ if ((!nfsx) || (!xl) || (!fd))
+ return ret;
+
+ nfs_fop_handle_frame_create (frame, nfsx, nfu, ret, err);
+ nfs_fop_handle_local_init (frame, nfsx, nfl, cbk, local, ret, err);
+ nfs_fop_save_root_fd_ino (nfl, fd);
+
+ STACK_WIND_COOKIE (frame, nfs_fop_fsync_cbk, xl, xl,
+ xl->fops->fsync, fd, datasync, NULL);
+ ret = 0;
+err:
+ if (ret < 0) {
+ if (frame)
+ nfs_stack_destroy (nfl, frame);
+ }
+
+ return ret;
+}
+
+
+int32_t
+nfs_fop_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata)
+{
+ struct nfs_fop_local *nfl = NULL;
+ fop_readv_cbk_t progcbk = NULL;
+
+ nfl_to_prog_data (nfl, progcbk, frame);
+ nfs_fop_restore_root_ino (nfl, op_ret, stbuf, NULL, NULL, NULL);
+ if (progcbk)
+ progcbk (frame, cookie, this, op_ret, op_errno, vector, count,
+ stbuf, iobref, xdata);
+
+ nfs_stack_destroy (nfl, frame);
+ return 0;
+}
+
+
+int
+nfs_fop_read (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *fd,
+ size_t size, off_t offset, fop_readv_cbk_t cbk, void *local)
+{
+ call_frame_t *frame = NULL;
+ int ret = -EFAULT;
+ struct nfs_fop_local *nfl = NULL;
+
+ if ((!xl) || (!fd) || (!nfu))
+ return ret;
+
+ nfs_fop_handle_frame_create (frame, nfsx, nfu, ret, err);
+ nfs_fop_handle_local_init (frame, nfsx, nfl, cbk, local, ret, err);
+ nfs_fop_save_root_fd_ino (nfl, fd);
+
+ STACK_WIND_COOKIE (frame, nfs_fop_readv_cbk, xl, xl, xl->fops->readv,
+ fd, size, offset, 0, NULL);
+ ret = 0;
+err:
+ if (ret < 0) {
+ if (frame)
+ nfs_stack_destroy (nfl, frame);
+ }
+
+ return ret;
+}
+
+int32_t
+nfs_fop_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *flock,
+ dict_t *xdata)
+{
+ struct nfs_fop_local *nfl = NULL;
+ fop_lk_cbk_t progcbk = NULL;
+
+ nfl_to_prog_data (nfl, progcbk, frame);
+
+ if (!op_ret)
+ fd_lk_insert_and_merge (nfl->fd, nfl->cmd, &nfl->flock);
+
+ fd_unref (nfl->fd);
+
+ if (progcbk)
+ progcbk (frame, cookie, this, op_ret, op_errno, flock, xdata);
+
+ nfs_stack_destroy (nfl, frame);
+ return 0;
+}
+
+
+int
+nfs_fop_lk (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *fd,
+ int cmd, struct gf_flock *flock, fop_lk_cbk_t cbk, void *local)
+{
+ call_frame_t *frame = NULL;
+ int ret = -EFAULT;
+ struct nfs_fop_local *nfl = NULL;
+
+ if ((!xl) || (!fd) || (!nfu))
+ return ret;
+
+ nfs_fop_handle_frame_create (frame, nfsx, nfu, ret, err);
+ nfs_fop_handle_local_init (frame, nfsx, nfl, cbk, local, ret, err);
+
+ nfl->cmd = cmd;
+ nfl->fd = fd_ref (fd);
+ nfl->flock = *flock;
+
+ STACK_WIND_COOKIE (frame, nfs_fop_lk_cbk, xl, xl, xl->fops->lk,
+ fd, cmd, flock, NULL);
+ ret = 0;
+err:
+ if (ret < 0) {
+ if (frame)
+ nfs_stack_destroy (nfl, frame);
+ }
+
+ return ret;
+}
+
+int32_t
+nfs_fop_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ struct nfs_fop_local *nfl = NULL;
+ fop_getxattr_cbk_t progcbk = NULL;
+
+ nfl_to_prog_data (nfl, progcbk, frame);
+
+ if (progcbk)
+ progcbk (frame, cookie, this, op_ret, op_errno, dict, xdata);
+
+ nfs_stack_destroy (nfl, frame);
+ return 0;
+}
+
+
+int
+nfs_fop_getxattr (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *loc,
+ char *name, dict_t *xdata, fop_getxattr_cbk_t cbk, void *local)
+{
+ call_frame_t *frame = NULL;
+ int ret = -EFAULT;
+ struct nfs_fop_local *nfl = NULL;
+
+ if ((!xl) || (!loc) || (!nfu))
+ return ret;
+
+ nfs_fop_handle_frame_create (frame, nfsx, nfu, ret, err);
+ nfs_fop_handle_local_init (frame, nfsx, nfl, cbk, local, ret, err);
+
+ STACK_WIND_COOKIE (frame, nfs_fop_getxattr_cbk, xl, xl, xl->fops->getxattr,
+ loc, name, NULL);
+ ret = 0;
+err:
+ if (ret < 0) {
+ if (frame)
+ nfs_stack_destroy (nfl, frame);
+ }
+
+ return ret;
+}
+
+
+int32_t
+nfs_fop_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ struct nfs_fop_local *nfl = NULL;
+ fop_setxattr_cbk_t progcbk = NULL;
+
+ nfl_to_prog_data (nfl, progcbk, frame);
+
+ if (progcbk)
+ progcbk (frame, cookie, this, op_ret, op_errno, xdata);
+
+ nfs_stack_destroy (nfl, frame);
+ return 0;
+}
+
+
+int
+nfs_fop_setxattr (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu,
+ loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata,
+ fop_setxattr_cbk_t cbk, void *local)
+{
+ call_frame_t *frame = NULL;
+ int ret = -EFAULT;
+ struct nfs_fop_local *nfl = NULL;
+
+ if ((!xl) || (!loc) || (!nfu))
+ return ret;
+
+ nfs_fop_handle_frame_create (frame, nfsx, nfu, ret, err);
+ nfs_fop_handle_local_init (frame, nfsx, nfl, cbk, local, ret, err);
+
+ STACK_WIND_COOKIE (frame, nfs_fop_setxattr_cbk, xl, xl, xl->fops->setxattr,
+ loc, dict, flags, xdata);
+ ret = 0;
+err:
+ if (ret < 0) {
+ if (frame)
+ nfs_stack_destroy (nfl, frame);
+ }
+
+ return ret;
+}
+
+
+int32_t
+nfs_fop_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ struct nfs_fop_local *nfl = NULL;
+ fop_truncate_cbk_t progcbk = NULL;
+
+ nfl_to_prog_data (nfl, progcbk, frame);
+ nfs_fop_restore_root_ino (nfl, op_ret, prebuf, postbuf, NULL, NULL);
+ if (progcbk)
+ progcbk (frame, cookie, this, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+
+ nfs_stack_destroy (nfl, frame);
+ return 0;
+}
+
+
+int
+nfs_fop_truncate (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *loc,
+ off_t offset, fop_truncate_cbk_t cbk, void *local)
+{
+ call_frame_t *frame = NULL;
+ int ret = -EFAULT;
+ struct nfs_fop_local *nfl = NULL;
+
+ if ((!nfsx) || (!xl) || (!loc) || (!nfu))
+ return ret;
+
+ nfs_fop_handle_frame_create (frame, nfsx, nfu, ret, err);
+ nfs_fop_handle_local_init (frame, nfsx, nfl, cbk, local, ret, err);
+ nfs_fop_save_root_ino (nfl, loc);
+
+ STACK_WIND_COOKIE (frame, nfs_fop_truncate_cbk, xl, xl,
+ xl->fops->truncate, loc, offset, NULL);
+
+ ret = 0;
+err:
+ if (ret < 0) {
+ if (frame)
+ nfs_stack_destroy (nfl, frame);
+ }
+
+ return ret;
+}
diff --git a/xlators/nfs/server/src/nfs-fops.h b/xlators/nfs/server/src/nfs-fops.h
new file mode 100644
index 000000000..44e99c66b
--- /dev/null
+++ b/xlators/nfs/server/src/nfs-fops.h
@@ -0,0 +1,248 @@
+/*
+ Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _NFS_FOPS_H_
+#define _NFS_FOPS_H_
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "dict.h"
+#include "xlator.h"
+#include "iobuf.h"
+#include "call-stub.h"
+#include "stack.h"
+#include "nfs.h"
+#include "nfs-common.h"
+#include <semaphore.h>
+
+/* This structure used to communicate state between a fop and its callback.
+ * The problem is, when we're calling a fop in the nfs op handler, the callback
+ * is the NFS protocol's callback and we have to perform all GlusterFS
+ * inode, inode table, fd_ts and fd table operations in the NFS callback. That
+ * approach soon gets extremely complicated and confusing because, then we have
+ * to try and separate in our heads which source lines in those callbacks are
+ * required for serving the NFS op and which ones are needed for satisfying
+ * GlusterFS requirements. This structure allows us avoid performing GlusterFS
+ * state maintenance operations inside the fops layer itself. Now, before
+ * we call the callback registered by the NFS operation, a hidden fops-layer
+ * specific callback is called which performs the state maintenance and then
+ * calls the NFS callback.
+ *
+ * These are allocated from a mem-pool stored in the nfs xlator's state.
+ * i.e. struct nfs_state.
+ * That is initiated in nfs_init_subvolumes in nfs.c.
+ */
+struct nfs_fop_local {
+ /* The local sent along by the user of the fop. */
+ void *proglocal;
+
+ /* The address of the callback supplied by the user. After our
+ * callback is executed this one is called.
+ * The exact cast destination of this pointer will depend on the
+ * fop that is being called.
+ */
+ void *progcbk;
+
+ /* Used only for write requests. */
+ struct iobref *iobref;
+
+ inode_t *parent;
+ inode_t *newparent;
+ inode_t *inode;
+
+ /* Set to 1 by nfs-inodes layer, which uses this to decide whether to
+ * link the newly allocated inode into the itable, in case the fop was
+ * successful.
+ */
+ int newinode;
+
+ /* Used by nfs-fops layer in order to determine whether to funge the
+ * ino in a dir's stbuf. This funging of root ino is needed to ensure
+ * that the root ino remains 1 even when the NFS server has been
+ * restarted. Note that in distribute, a fresh lookup and a revalidate
+ * on the root inode returns two different inode numbers and this we
+ * need to handle by ourself.
+ */
+ int rootinode;
+
+ /* This member is used to determine whether the new parent of a file
+ * being renamed is the root directory. If yes, the ino is funged.
+ */
+ int newrootinode;
+ int newrootparentinode;
+
+ /* Determines whether to funge the ino in the post and pre parent
+ * stbufs for a file/dir where the parent directory could be the root
+ * dir. Needed here because of the same reason as above.
+ */
+ int rootparentinode;
+
+ char path[NFS_NAME_MAX + 1];
+ char newpath[NFS_NAME_MAX + 1];
+ xlator_t *nfsx;
+ dict_t *dictgfid;
+
+ fd_t *fd;
+ int cmd;
+ struct gf_flock flock;
+};
+
+extern struct nfs_fop_local *
+nfs_fop_local_init (xlator_t *xl);
+
+extern void
+nfs_fop_local_wipe (xlator_t *xl, struct nfs_fop_local *l);
+
+#define nfs_state(nfsxl) (nfsxl)->private
+#define nfs_fop_mempool(nfxl) (((struct nfs_state *)nfs_state(nfxl))->foppool)
+
+#define prog_data_to_nfl(nf,nflocal, fram, pcbk, plocal) \
+ do { \
+ nflocal = nfs_fop_local_init (nf); \
+ if (nflocal) { \
+ nflocal->proglocal = plocal; \
+ nflocal->progcbk = *VOID(&pcbk); \
+ nflocal->nfsx = nf; \
+ if (fram) \
+ ((call_frame_t *)fram)->local = nflocal;\
+ } \
+ } while (0) \
+
+
+
+#define nfl_to_prog_data(nflocal, pcbk, fram) \
+ do { \
+ nflocal = fram->local; \
+ fram->local = nflocal->proglocal; \
+ pcbk = nflocal->progcbk; \
+ } while (0) \
+
+#define nfs_fop_handle_local_init(fram,nfx, nfloc, cbck,prgloc,retval,lab) \
+ do { \
+ prog_data_to_nfl (nfx, nfloc, fram, cbck, prgloc); \
+ if (!nfloc) { \
+ gf_log (GF_NFS,GF_LOG_ERROR,"Failed to init local");\
+ retval = -ENOMEM; \
+ goto lab; \
+ } \
+ } while (0) \
+
+extern int
+nfs_fop_fstat (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *fd,
+ fop_stat_cbk_t cbk, void *local);
+
+extern int
+nfs_fop_readdirp (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *dirfd,
+ size_t bufsize, off_t offset, fop_readdir_cbk_t cbk,
+ void *local);
+extern int
+nfs_fop_lookup (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *loc,
+ fop_lookup_cbk_t cbk, void *local);
+
+extern int
+nfs_fop_create (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ int flags, mode_t mode, fd_t *fd, fop_create_cbk_t cbk,
+ void *local);
+extern int
+nfs_fop_flush (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *fd,
+ fop_flush_cbk_t cbk, void *local);
+
+extern int
+nfs_fop_mkdir (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ mode_t mode, fop_mkdir_cbk_t cbk, void *local);
+
+extern int
+nfs_fop_truncate (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *loc,
+ off_t offset, fop_truncate_cbk_t cbk, void *local);
+
+extern int
+nfs_fop_read (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *fd,
+ size_t size, off_t offset, fop_readv_cbk_t cbk, void *local);
+
+extern int
+nfs_fop_fsync (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *fd,
+ int32_t datasync, fop_fsync_cbk_t cbk, void *local);
+
+extern int
+nfs_fop_write (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *fd,
+ struct iobref *srciobref, struct iovec *vector, int32_t count,
+ off_t offset, fop_writev_cbk_t cbk, void *local);
+
+extern int
+nfs_fop_open (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *loc,
+ int32_t flags, fd_t *fd, fop_open_cbk_t cbk,
+ void *local);
+
+extern int
+nfs_fop_rename (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *oldloc,
+ loc_t *newloc, fop_rename_cbk_t cbk, void *local);
+
+extern int
+nfs_fop_link (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *oldloc,
+ loc_t *newloc, fop_link_cbk_t cbk, void *local);
+
+extern int
+nfs_fop_unlink (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ fop_unlink_cbk_t cbk, void *local);
+
+extern int
+nfs_fop_rmdir (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ fop_rmdir_cbk_t cbk, void *local);
+
+extern int
+nfs_fop_mknod (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ mode_t mode, dev_t dev, fop_mknod_cbk_t cbk, void *local);
+
+extern int
+nfs_fop_readlink (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ size_t size, fop_readlink_cbk_t cbk, void *local);
+
+extern int
+nfs_fop_symlink (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, char *target,
+ loc_t *pathloc, fop_symlink_cbk_t cbk, void *local);
+
+extern int
+nfs_fop_setattr (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ struct iatt *buf, int32_t valid, fop_setattr_cbk_t cbk,
+ void *local);
+
+extern int
+nfs_fop_statfs (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ fop_statfs_cbk_t cbk, void *local);
+
+extern int
+nfs_fop_opendir (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ fd_t *dirfd, fop_opendir_cbk_t cbk, void *local);
+
+extern int
+nfs_fop_stat (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *loc,
+ fop_stat_cbk_t cbk, void *local);
+
+extern int
+nfs_fop_access (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *loc,
+ int32_t accesstest, fop_access_cbk_t cbk, void *local);
+
+extern int
+nfs_fop_lk (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *fd,
+ int cmd, struct gf_flock *flock, fop_lk_cbk_t cbk, void *local);
+
+extern int
+nfs_fop_getxattr (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *loc,
+ char *name, dict_t *xdata, fop_getxattr_cbk_t cbk, void *local);
+
+extern int
+nfs_fop_setxattr (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu,
+ loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata,
+ fop_setxattr_cbk_t cbk, void *local);
+
+#endif
diff --git a/xlators/nfs/server/src/nfs-generics.c b/xlators/nfs/server/src/nfs-generics.c
new file mode 100644
index 000000000..cb32b7f1b
--- /dev/null
+++ b/xlators/nfs/server/src/nfs-generics.c
@@ -0,0 +1,345 @@
+/*
+ Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+
+#include "string.h"
+
+#include "inode.h"
+#include "nfs.h"
+#include "nfs-fops.h"
+#include "nfs-inodes.h"
+#include "nfs-generics.h"
+#include "xlator.h"
+
+
+int
+nfs_fstat (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *fd,
+ fop_stat_cbk_t cbk, void *local)
+{
+ int ret = -EFAULT;
+
+ if ((!nfsx) || (!xl) || (!fd) || (!nfu))
+ return ret;
+
+ ret = nfs_fop_fstat (nfsx, xl, nfu, fd, cbk, local);
+ return ret;
+}
+
+int
+nfs_access (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ int32_t accesstest, fop_access_cbk_t cbk, void *local)
+{
+ int ret = -EFAULT;
+
+ if ((!nfsx) || (!xl) || (!pathloc) || (!nfu))
+ return ret;
+
+ ret = nfs_fop_access (nfsx, xl, nfu, pathloc, accesstest, cbk, local);
+
+ return ret;
+}
+
+int
+nfs_stat (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ fop_stat_cbk_t cbk, void *local)
+{
+ int ret = -EFAULT;
+
+ if ((!nfsx) || (!xl) || (!pathloc) || (!nfu))
+ return ret;
+
+ ret = nfs_fop_stat (nfsx, xl, nfu, pathloc, cbk, local);
+
+ return ret;
+}
+
+
+int
+nfs_readdirp (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *dirfd,
+ size_t bufsize, off_t offset, fop_readdir_cbk_t cbk, void *local)
+{
+ if ((!nfsx) || (!xl) || (!dirfd) || (!nfu))
+ return -EFAULT;
+
+ return nfs_fop_readdirp (nfsx, xl, nfu, dirfd, bufsize, offset, cbk,
+ local);
+}
+
+
+int
+nfs_lookup (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ fop_lookup_cbk_t cbk, void *local)
+{
+ int ret = -EFAULT;
+
+ if ((!nfsx) || (!xl) || (!pathloc) || (!nfu))
+ return ret;
+
+ ret = nfs_fop_lookup (nfsx, xl, nfu, pathloc, cbk, local);
+ return ret;
+}
+
+int
+nfs_create (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ int flags, mode_t mode, fop_create_cbk_t cbk, void *local)
+{
+ int ret = -EFAULT;
+
+ if ((!nfsx) || (!xl) || (!pathloc) || (!nfu))
+ return ret;
+
+ ret = nfs_inode_create (nfsx, xl, nfu, pathloc, flags, mode, cbk,local);
+ return ret;
+}
+
+
+int
+nfs_flush (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *fd,
+ fop_flush_cbk_t cbk, void *local)
+{
+ return nfs_fop_flush (nfsx, xl, nfu, fd, cbk, local);
+}
+
+
+
+int
+nfs_mkdir (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ mode_t mode, fop_mkdir_cbk_t cbk, void *local)
+{
+ int ret = -EFAULT;
+
+ if ((!nfsx) || (!xl) || (!pathloc) || (!nfu))
+ return ret;
+
+ ret = nfs_inode_mkdir (nfsx, xl, nfu, pathloc, mode, cbk, local);
+ return ret;
+}
+
+
+
+int
+nfs_truncate (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ off_t offset, fop_truncate_cbk_t cbk, void *local)
+{
+ int ret = -EFAULT;
+
+ if ((!xl) || (!pathloc) || (!nfu))
+ return ret;
+
+ ret = nfs_fop_truncate (nfsx, xl, nfu, pathloc, offset, cbk, local);
+ return ret;
+}
+
+int
+nfs_read (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *fd, size_t size,
+ off_t offset, fop_readv_cbk_t cbk, void *local)
+{
+ return nfs_fop_read (nfsx, xl, nfu, fd, size, offset, cbk, local);
+}
+
+int
+nfs_lk (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *fd,
+ int cmd, struct gf_flock *flock, fop_lk_cbk_t cbk, void *local)
+{
+ return nfs_fop_lk ( nfsx, xl, nfu, fd, cmd, flock, cbk, local);
+}
+
+int
+nfs_getxattr (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *loc,
+ char *name, dict_t *xdata, fop_getxattr_cbk_t cbk, void *local)
+{
+ return nfs_fop_getxattr (nfsx, xl, nfu, loc, name, xdata, cbk, local);
+}
+
+int
+nfs_setxattr (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu,
+ loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata,
+ fop_setxattr_cbk_t cbk, void *local)
+{
+ return nfs_fop_setxattr (nfsx, xl, nfu, loc, dict, flags, xdata, cbk,
+ local);
+}
+
+int
+nfs_fsync (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *fd,
+ int32_t datasync, fop_fsync_cbk_t cbk, void *local)
+{
+ return nfs_fop_fsync (nfsx, xl, nfu, fd, datasync, cbk, local);
+}
+
+
+int
+nfs_write (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *fd,
+ struct iobref *srciobref, struct iovec *vector, int32_t count,
+ off_t offset, fop_writev_cbk_t cbk, void *local)
+{
+ return nfs_fop_write (nfsx, xl, nfu, fd, srciobref, vector, count,
+ offset, cbk, local);
+}
+
+
+int
+nfs_open (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ int32_t flags, fop_open_cbk_t cbk, void *local)
+{
+ int ret = -EFAULT;
+
+ if ((!nfsx) || (!xl) || (!pathloc) || (!nfu))
+ return ret;
+
+ ret = nfs_inode_open (nfsx, xl, nfu, pathloc, flags, cbk,
+ local);
+ return ret;
+}
+
+
+int
+nfs_rename (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *oldloc,
+ loc_t *newloc, fop_rename_cbk_t cbk, void *local)
+{
+ int ret = -EFAULT;
+
+ if ((!nfsx) || (!xl) || (!oldloc) || (!newloc) || (!nfu))
+ return ret;
+
+ ret = nfs_inode_rename (nfsx, xl, nfu, oldloc, newloc, cbk, local);
+ return ret;
+}
+
+
+int
+nfs_link (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *oldloc,
+ loc_t *newloc, fop_link_cbk_t cbk, void *local)
+{
+ int ret = -EFAULT;
+
+ if ((!nfsx) || (!xl) || (!oldloc) || (!newloc) || (!nfu))
+ return ret;
+
+ ret = nfs_inode_link (nfsx, xl, nfu, oldloc, newloc, cbk, local);
+ return ret;
+}
+
+
+int
+nfs_unlink (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ fop_unlink_cbk_t cbk, void *local)
+{
+ int ret = -EFAULT;
+
+ if ((!xl) || (!pathloc) || (!nfu))
+ return ret;
+
+ ret = nfs_inode_unlink (nfsx, xl, nfu, pathloc, cbk, local);
+ return ret;
+}
+
+
+int
+nfs_rmdir (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *path,
+ fop_rmdir_cbk_t cbk, void *local)
+{
+ int ret = -EFAULT;
+
+ if ((!nfsx) || (!xl) || (!path) || (!nfu))
+ return ret;
+
+ ret = nfs_inode_rmdir (nfsx, xl, nfu, path, cbk, local);
+ return ret;
+}
+
+
+int
+nfs_mknod (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ mode_t mode, dev_t dev, fop_mknod_cbk_t cbk, void *local)
+{
+ int ret = -EFAULT;
+
+ if ((!nfsx) || (!xl) || (!pathloc) || (!nfu))
+ return ret;
+
+ ret = nfs_inode_mknod (nfsx, xl, nfu, pathloc, mode, dev, cbk, local);
+ return ret;
+}
+
+
+int
+nfs_readlink (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *linkloc,
+ fop_readlink_cbk_t cbk, void *local)
+{
+ int ret = -EFAULT;
+
+ if ((!nfsx) || (!xl) || (!linkloc) || (!nfu))
+ return ret;
+
+ ret = nfs_fop_readlink (nfsx, xl, nfu, linkloc, NFS_PATH_MAX, cbk,
+ local);
+ return ret;
+}
+
+
+int
+nfs_symlink (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, char *target,
+ loc_t *linkloc, fop_symlink_cbk_t cbk, void *local)
+{
+ int ret = -EFAULT;
+
+ if ((!nfsx) || (!xl) || (!linkloc) || (!target) || (!nfu))
+ return ret;
+
+ ret = nfs_inode_symlink (nfsx, xl, nfu, target, linkloc, cbk, local);
+ return ret;
+}
+
+
+
+int
+nfs_setattr (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ struct iatt *buf, int32_t valid, fop_setattr_cbk_t cbk,
+ void *local)
+{
+ int ret = -EFAULT;
+
+ if ((!nfsx) || (!xl) || (!pathloc) || (!nfu))
+ return ret;
+
+ ret = nfs_fop_setattr (nfsx, xl, nfu, pathloc, buf, valid, cbk, local);
+ return ret;
+}
+
+
+int
+nfs_statfs (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ fop_statfs_cbk_t cbk, void *local)
+{
+ int ret = -EFAULT;
+
+ if ((!nfsx) || (!xl) || (!pathloc) || (!nfu))
+ return ret;
+
+ ret = nfs_fop_statfs (nfsx, xl, nfu, pathloc, cbk, local);
+ return ret;
+}
+
+int
+nfs_opendir (xlator_t *nfsx, xlator_t *fopxl, nfs_user_t *nfu, loc_t *pathloc,
+ fop_opendir_cbk_t cbk, void *local)
+{
+ if ((!nfsx) || (!fopxl) || (!pathloc) || (!nfu))
+ return -EFAULT;
+
+ return nfs_inode_opendir (nfsx, fopxl, nfu, pathloc, cbk, local);
+}
+
diff --git a/xlators/nfs/server/src/nfs-generics.h b/xlators/nfs/server/src/nfs-generics.h
new file mode 100644
index 000000000..01876d68e
--- /dev/null
+++ b/xlators/nfs/server/src/nfs-generics.h
@@ -0,0 +1,168 @@
+/*
+ Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _NFS_GENERICS_H_
+#define _NFS_GENERICS_H_
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "nfs.h"
+#include "xlator.h"
+#include "nfs-fops.h"
+#include "nfs-inodes.h"
+
+struct nfs_direntcache {
+ gf_dirent_t entries; /* Head of list of cached dirents. */
+ gf_dirent_t *next; /* Pointer to the next entry that
+ * should be sent by readdir */
+ uint64_t prev_off; /* Offset where the next read will
+ * happen.
+ */
+};
+
+/* WE're trying to abstract the fops interface from the NFS xlator so that
+ * different NFS versions can simply call a standard interface and have fop
+ * interface dependent functions be handled internally.
+ * This structure is part of such an abstraction. The fops layer stores any
+ * state is requires in the fd. E.g. the dirent cache for a directory fd_t.
+ */
+typedef struct nfs_fop_fdcontext {
+ pthread_mutex_t lock;
+ size_t dirent_bufsize;
+ off_t offset;
+ struct nfs_direntcache *dcache;
+ xlator_t *dirvol;
+} nfs_fdctx_t;
+
+extern int
+nfs_fstat (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *fd,
+ fop_stat_cbk_t cbk, void *local);
+
+extern int
+nfs_readdirp (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *dirfd,
+ size_t bufsize, off_t offset, fop_readdir_cbk_t cbk, void *local);
+
+
+extern int
+nfs_lookup (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ fop_lookup_cbk_t cbk, void *local);
+
+extern int
+nfs_create (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ int flags, mode_t mode, fop_create_cbk_t cbk, void *local);
+
+extern int
+nfs_flush (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *fd,
+ fop_flush_cbk_t cbk, void *local);
+
+extern int
+nfs_mkdir (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ mode_t mode, fop_mkdir_cbk_t cbk, void *local);
+
+extern int
+nfs_truncate (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ off_t offset, fop_truncate_cbk_t cbk, void *local);
+
+extern int
+nfs_read (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *fd, size_t size,
+ off_t offset, fop_readv_cbk_t cbk, void *local);
+
+extern int
+nfs_fsync (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *fd,
+ int32_t datasync, fop_fsync_cbk_t cbk, void *local);
+
+extern int
+nfs_write (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *fd,
+ struct iobref *srciobref, struct iovec *vector, int32_t count,
+ off_t offset, fop_writev_cbk_t cbk, void *local);
+
+extern int
+nfs_open (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ int32_t flags, fop_open_cbk_t cbk, void *local);
+
+extern int
+nfs_rename (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *oldloc,
+ loc_t *newloc, fop_rename_cbk_t cbk, void *local);
+
+extern int
+nfs_link (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *oldloc,
+ loc_t *newloc, fop_link_cbk_t cbk, void *local);
+
+extern int
+nfs_unlink (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ fop_unlink_cbk_t cbk, void *local);
+
+extern int
+nfs_rmdir (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ fop_rmdir_cbk_t cbk, void *local);
+
+extern int
+nfs_mknod (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ mode_t mode, dev_t dev, fop_mknod_cbk_t cbk, void *local);
+
+extern int
+nfs_readlink (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *linkloc,
+ fop_readlink_cbk_t cbk, void *local);
+
+extern int
+nfs_setattr (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ struct iatt *buf, int32_t valid, fop_setattr_cbk_t cbk,
+ void *local);
+
+extern int
+nfs_statfs (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ fop_statfs_cbk_t cbk, void *local);
+
+extern int
+nfs_stat (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ fop_stat_cbk_t cbk, void *local);
+
+extern int
+nfs_symlink (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, char *target,
+ loc_t *linkloc, fop_symlink_cbk_t cbk, void *local);
+
+/* Synchronous equivalents */
+
+extern call_stub_t *
+nfs_open_sync (xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ int32_t flags);
+
+extern call_stub_t *
+nfs_write_sync (xlator_t *xl, nfs_user_t *nfu, fd_t *fd, struct iobuf *srciob,
+ struct iovec *vec, int count, off_t offset);
+
+extern call_stub_t *
+nfs_read_sync (xlator_t *xl, nfs_user_t *nfu, fd_t *fd, size_t size,
+ off_t offset);
+
+extern int
+nfs_opendir (xlator_t *nfsx, xlator_t *fopxl, nfs_user_t *nfu, loc_t *pathloc,
+ fop_opendir_cbk_t cbk, void *local);
+
+extern int
+nfs_access (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ int32_t accesstest, fop_access_cbk_t cbk, void *local);
+extern int
+nfs_lk (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *fd,
+ int cmd, struct gf_flock *flock, fop_lk_cbk_t cbk, void *local);
+
+extern int
+nfs_getxattr (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *loc,
+ char *name, dict_t *xdata, fop_getxattr_cbk_t cbk, void *local);
+
+extern int
+nfs_setxattr (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu,
+ loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata,
+ fop_setxattr_cbk_t cbk, void *local);
+
+#endif
diff --git a/xlators/nfs/server/src/nfs-inodes.c b/xlators/nfs/server/src/nfs-inodes.c
new file mode 100644
index 000000000..63d5e8a19
--- /dev/null
+++ b/xlators/nfs/server/src/nfs-inodes.c
@@ -0,0 +1,608 @@
+/*
+ Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "string.h"
+
+#include "inode.h"
+#include "nfs.h"
+#include "nfs-inodes.h"
+#include "nfs-fops.h"
+#include "xlator.h"
+
+#include <libgen.h>
+
+#define inodes_nfl_to_prog_data(nflocal, pcbk, fram) \
+ do { \
+ nflocal = fram->local; \
+ fram->local = nflocal->proglocal; \
+ *VOID(&pcbk) = nflocal->progcbk; \
+ nfs_fop_local_wipe (nflocal->nfsx, nflocal); \
+ } while (0) \
+
+void
+nfl_inodes_init (struct nfs_fop_local *nfl, inode_t *inode, inode_t *parent,
+ inode_t *newparent, const char *name, const char *newname)
+{
+ if (!nfl)
+ return;
+
+ if (inode)
+ nfl->inode = inode_ref (inode);
+
+ if (parent)
+ nfl->parent = inode_ref (parent);
+
+ if (newparent)
+ nfl->newparent = inode_ref (newparent);
+
+ if (name)
+ strncpy (nfl->path, name, NFS_NAME_MAX);
+
+ if (newname)
+ strncpy (nfl->newpath, newname, NFS_NAME_MAX);
+
+ return;
+}
+
+
+int32_t
+nfs_inode_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode
+ , struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ struct nfs_fop_local *nfl = frame->local;
+ fop_create_cbk_t progcbk = NULL;
+ inode_t *linked_inode = NULL;
+
+ if (op_ret == -1)
+ goto do_not_link;
+
+ linked_inode = inode_link (inode, nfl->parent, nfl->path, buf);
+
+do_not_link:
+ /* NFS does not need it, upper layers should not expect the pointer to
+ * be a valid fd.
+ */
+ fd_unref (fd);
+
+ inodes_nfl_to_prog_data (nfl, progcbk, frame);
+ if (progcbk)
+ progcbk (frame, cookie, this, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
+
+ if (linked_inode) {
+ inode_lookup (linked_inode);
+ inode_unref (linked_inode);
+ }
+
+ return 0;
+}
+
+
+int
+nfs_inode_create (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu,
+ loc_t *pathloc, int flags, int mode, fop_create_cbk_t cbk,
+ void *local)
+{
+ struct nfs_fop_local *nfl = NULL;
+ int ret = -EFAULT;
+ fd_t *newfd = NULL;
+
+ if ((!nfsx) || (!xl) || (!pathloc) || (!nfu))
+ return ret;
+
+ nfs_fop_handle_local_init (NULL, nfsx, nfl, cbk, local, ret, err);
+
+ newfd = fd_create (pathloc->inode, 0);
+ if (!newfd) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "Failed to create new fd");
+ ret = -ENOMEM;
+ goto wipe_nfl;
+ }
+
+ /* The parent and base name will be needed to link the new inode
+ * into the inode table.
+ */
+ nfl_inodes_init (nfl, pathloc->inode, pathloc->parent, NULL,
+ pathloc->name, NULL);
+ ret = nfs_fop_create (nfsx, xl, nfu, pathloc, flags, mode, newfd,
+ nfs_inode_create_cbk, nfl);
+wipe_nfl:
+ if (ret < 0)
+ nfs_fop_local_wipe (xl, nfl);
+
+err:
+ return ret;
+}
+
+
+int32_t
+nfs_inode_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ struct nfs_fop_local *nfl = frame->local;
+ fop_mkdir_cbk_t progcbk = NULL;
+ inode_t *linked_inode = NULL;
+
+ if (op_ret == -1)
+ goto do_not_link;
+
+ linked_inode = inode_link (inode, nfl->parent, nfl->path, buf);
+
+do_not_link:
+ inodes_nfl_to_prog_data (nfl, progcbk, frame);
+ if (progcbk)
+ progcbk (frame, cookie, this, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+
+ if (linked_inode) {
+ inode_lookup (linked_inode);
+ inode_unref (linked_inode);
+ }
+
+ return 0;
+}
+
+
+int
+nfs_inode_mkdir (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ int mode, fop_mkdir_cbk_t cbk, void *local)
+{
+ struct nfs_fop_local *nfl = NULL;
+ int ret = -EFAULT;
+
+ if ((!nfsx) || (!xl) || (!pathloc) || (!nfu))
+ return ret;
+
+ nfs_fop_handle_local_init (NULL, nfsx, nfl, cbk, local, ret, err);
+ nfl_inodes_init (nfl, pathloc->inode, pathloc->parent, NULL,
+ pathloc->name, NULL);
+ ret = nfs_fop_mkdir (nfsx, xl, nfu, pathloc, mode, nfs_inode_mkdir_cbk,
+ nfl);
+ if (ret < 0)
+ nfs_fop_local_wipe (nfsx, nfl);
+
+err:
+ return ret;
+}
+
+
+int32_t
+nfs_inode_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+
+ struct nfs_fop_local *nfl = NULL;
+ fop_open_cbk_t progcbk = NULL;
+
+ if ((op_ret == -1) && (fd))
+ fd_unref (fd);
+ /* Not needed here since the fd is cached in higher layers and the bind
+ * must happen atomically when the fd gets added to the fd LRU.
+ */
+/* else
+ fd_bind (fd);
+*/
+ inodes_nfl_to_prog_data (nfl, progcbk, frame);
+ if (progcbk)
+ progcbk (frame, cookie, this, op_ret, op_errno, fd, xdata);
+ return 0;
+}
+
+
+int
+nfs_inode_open (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *loc,
+ int32_t flags, fop_open_cbk_t cbk, void *local)
+{
+ struct nfs_fop_local *nfl = NULL;
+ fd_t *newfd = NULL;
+ int ret = -EFAULT;
+
+ if ((!nfsx) || (!xl) || (!loc) || (!nfu))
+ return ret;
+
+ newfd = fd_create (loc->inode, 0);
+ if (!newfd) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "Failed to create fd");
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ nfs_fop_handle_local_init (NULL, nfsx, nfl, cbk, local, ret, fd_err);
+ ret = nfs_fop_open (nfsx, xl, nfu, loc, flags, newfd,
+ nfs_inode_open_cbk, nfl);
+
+ if (ret < 0)
+ nfs_fop_local_wipe (xl, nfl);
+
+fd_err:
+ if (ret < 0)
+ if (newfd)
+ fd_unref (newfd);
+
+err:
+
+ return ret;
+}
+
+
+
+int32_t
+nfs_inode_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ struct nfs_fop_local *nfl = NULL;
+ fop_rename_cbk_t progcbk = NULL;
+
+ nfl = frame->local;
+ if (op_ret == -1)
+ goto do_not_link;
+
+ inode_rename (this->itable, nfl->parent, nfl->path, nfl->newparent,
+ nfl->newpath, nfl->inode, buf);
+
+do_not_link:
+ inodes_nfl_to_prog_data (nfl, progcbk, frame);
+ if (progcbk)
+ progcbk (frame, cookie, this, op_ret, op_errno, buf,
+ preoldparent, postoldparent, prenewparent,
+ postnewparent, xdata);
+ return 0;
+}
+
+
+int
+nfs_inode_rename (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *oldloc,
+ loc_t *newloc, fop_rename_cbk_t cbk, void *local)
+{
+ struct nfs_fop_local *nfl = NULL;
+ int ret = -EFAULT;
+
+ if ((!nfsx) || (!xl) || (!oldloc) || (!newloc))
+ return ret;
+
+ nfs_fop_handle_local_init (NULL, nfsx, nfl, cbk, local, ret, err);
+ nfl_inodes_init (nfl, oldloc->inode, oldloc->parent, newloc->parent,
+ oldloc->name, newloc->name);
+ ret = nfs_fop_rename (nfsx,xl, nfu, oldloc, newloc, nfs_inode_rename_cbk
+ , nfl);
+
+err:
+ if (ret < 0)
+ nfs_fop_local_wipe (xl, nfl);
+
+ return ret;
+}
+
+
+int32_t
+nfs_inode_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ struct nfs_fop_local *nfl = NULL;
+ fop_link_cbk_t progcbk = NULL;
+ inode_t *linked_inode = NULL;
+
+ if (op_ret == -1)
+ goto do_not_link;
+
+ nfl = frame->local;
+ linked_inode = inode_link (inode, nfl->newparent, nfl->path, buf);
+
+do_not_link:
+ inodes_nfl_to_prog_data (nfl, progcbk, frame);
+ if (progcbk)
+ progcbk (frame, cookie, this, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+
+ if (linked_inode) {
+ inode_lookup (linked_inode);
+ inode_unref (linked_inode);
+ }
+
+ return 0;
+}
+
+
+int
+nfs_inode_link (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *oldloc,
+ loc_t *newloc, fop_link_cbk_t cbk, void *local)
+{
+ struct nfs_fop_local *nfl = NULL;
+ int ret = -EFAULT;
+
+ if ((!nfsx) || (!xl) || (!oldloc) || (!newloc) || (!nfu))
+ return -EFAULT;
+
+ nfs_fop_handle_local_init (NULL, nfsx, nfl, cbk, local, ret, err);
+ nfl_inodes_init (nfl, NULL, NULL, newloc->parent, newloc->name, NULL);
+ ret = nfs_fop_link (nfsx, xl, nfu, oldloc, newloc, nfs_inode_link_cbk,
+ nfl);
+
+err:
+ if (ret < 0)
+ nfs_fop_local_wipe (xl, nfl);
+
+ return ret;
+}
+
+
+int32_t
+nfs_inode_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ struct nfs_fop_local *nfl = NULL;
+ fop_unlink_cbk_t progcbk = NULL;
+
+ nfl = frame->local;
+
+ if (op_ret == -1)
+ goto do_not_unlink;
+
+ inode_unlink (nfl->inode, nfl->parent, nfl->path);
+ inode_forget (nfl->inode, 0);
+
+do_not_unlink:
+ inodes_nfl_to_prog_data (nfl, progcbk, frame);
+ if (progcbk)
+ progcbk (frame, cookie, this, op_ret, op_errno, preparent,
+ postparent, xdata);
+ return 0;
+}
+
+
+int
+nfs_inode_unlink (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ fop_unlink_cbk_t cbk, void *local)
+{
+ struct nfs_fop_local *nfl = NULL;
+ int ret = -EFAULT;
+
+ if ((!nfsx) || (!xl) || (!pathloc) || (!nfu))
+ return -EFAULT;
+
+ nfs_fop_handle_local_init (NULL, nfsx, nfl, cbk, local, ret, err);
+ nfl_inodes_init (nfl, pathloc->inode, pathloc->parent, NULL,
+ pathloc->name, NULL);
+ ret = nfs_fop_unlink (nfsx, xl, nfu, pathloc, nfs_inode_unlink_cbk,nfl);
+
+err:
+ if (ret < 0)
+ nfs_fop_local_wipe (xl, nfl);
+
+ return ret;
+}
+
+
+int32_t
+nfs_inode_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ struct nfs_fop_local *nfl = NULL;
+ fop_rmdir_cbk_t progcbk = NULL;
+
+ nfl = frame->local;
+
+ if (op_ret == -1)
+ goto do_not_unlink;
+
+ inode_unlink (nfl->inode, nfl->parent, nfl->path);
+ inode_forget (nfl->inode, 0);
+
+do_not_unlink:
+ inodes_nfl_to_prog_data (nfl, progcbk, frame);
+ if (progcbk)
+ progcbk (frame, cookie, this, op_ret, op_errno, preparent,
+ postparent, xdata);
+
+ return 0;
+}
+
+
+int
+nfs_inode_rmdir (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ fop_rmdir_cbk_t cbk, void *local)
+{
+ struct nfs_fop_local *nfl = NULL;
+ int ret = -EFAULT;
+
+ if ((!nfsx) || (!xl) || (!pathloc) || (!nfu))
+ return ret;
+
+ nfs_fop_handle_local_init (NULL, nfsx, nfl, cbk, local, ret, err);
+ nfl_inodes_init (nfl, pathloc->inode, pathloc->parent, NULL,
+ pathloc->name, NULL);
+
+ ret = nfs_fop_rmdir (nfsx, xl, nfu, pathloc, nfs_inode_rmdir_cbk, nfl);
+
+err:
+ if (ret < 0)
+ nfs_fop_local_wipe (xl, nfl);
+ return ret;
+}
+
+
+int32_t
+nfs_inode_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ struct nfs_fop_local *nfl = NULL;
+ fop_mknod_cbk_t progcbk = NULL;
+ inode_t *linked_inode = NULL;
+
+ nfl = frame->local;
+
+ if (op_ret == -1)
+ goto do_not_link;
+
+ linked_inode = inode_link (inode, nfl->parent, nfl->path, buf);
+
+do_not_link:
+ inodes_nfl_to_prog_data (nfl, progcbk, frame);
+ if (progcbk)
+ progcbk (frame, cookie, this, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+
+ if (linked_inode) {
+ inode_lookup (linked_inode);
+ inode_unref (linked_inode);
+ }
+
+ return 0;
+}
+
+
+int
+nfs_inode_mknod (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ mode_t mode, dev_t dev, fop_mknod_cbk_t cbk, void *local)
+{
+ struct nfs_fop_local *nfl = NULL;
+ int ret = -EFAULT;
+
+ if ((!nfsx) || (!xl) || (!pathloc) || (!nfu))
+ return ret;
+
+ nfs_fop_handle_local_init (NULL, nfsx, nfl, cbk, local, ret, err);
+ nfl_inodes_init (nfl, pathloc->inode, pathloc->parent, NULL,
+ pathloc->name, NULL);
+
+ ret = nfs_fop_mknod (nfsx, xl, nfu, pathloc, mode, dev,
+ nfs_inode_mknod_cbk, nfl);
+
+err:
+ if (ret < 0)
+ nfs_fop_local_wipe (xl, nfl);
+
+ return ret;
+}
+
+
+int32_t
+nfs_inode_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ struct nfs_fop_local *nfl = NULL;
+ fop_symlink_cbk_t progcbk = NULL;
+ inode_t *linked_inode = NULL;
+
+ nfl = frame->local;
+ if (op_ret == -1)
+ goto do_not_link;
+
+ linked_inode = inode_link (inode, nfl->parent, nfl->path, buf);
+
+do_not_link:
+ inodes_nfl_to_prog_data (nfl, progcbk, frame);
+ if (progcbk)
+ progcbk (frame, cookie, this, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+
+ if (linked_inode) {
+ inode_lookup (linked_inode);
+ inode_unref (linked_inode);
+ }
+
+ return 0;
+}
+
+
+int
+nfs_inode_symlink (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, char *target,
+ loc_t *pathloc, fop_symlink_cbk_t cbk, void *local)
+{
+ struct nfs_fop_local *nfl = NULL;
+ int ret = -EFAULT;
+
+ if ((!nfsx) || (!xl) || (!target) || (!pathloc) || (!nfu))
+ return ret;
+
+ nfs_fop_handle_local_init (NULL, nfsx, nfl, cbk, local, ret, err);
+ nfl_inodes_init (nfl, pathloc->inode, pathloc->parent, NULL,
+ pathloc->name, NULL);
+ ret = nfs_fop_symlink (nfsx, xl, nfu, target, pathloc,
+ nfs_inode_symlink_cbk, nfl);
+
+err:
+ if (ret < 0)
+ nfs_fop_local_wipe (xl, nfl);
+
+ return ret;
+}
+
+int32_t
+nfs_inode_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+
+ struct nfs_fop_local *nfl = NULL;
+ fop_open_cbk_t progcbk = NULL;
+
+ if (op_ret != -1)
+ fd_bind (fd);
+
+ inodes_nfl_to_prog_data (nfl, progcbk, frame);
+
+ if (progcbk)
+ progcbk (frame, cookie, this, op_ret, op_errno, fd, xdata);
+
+ return 0;
+}
+
+
+int
+nfs_inode_opendir (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *loc,
+ fop_opendir_cbk_t cbk, void *local)
+{
+ struct nfs_fop_local *nfl = NULL;
+ fd_t *newfd = NULL;
+ int ret = -EFAULT;
+
+ if ((!nfsx) || (!xl) || (!loc) || (!nfu))
+ return ret;
+
+ newfd = fd_create (loc->inode, 0);
+ if (!newfd) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "Failed to create fd");
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ nfs_fop_handle_local_init (NULL, nfsx, nfl, cbk, local, ret, err);
+ ret = nfs_fop_opendir (nfsx, xl, nfu, loc, newfd,
+ nfs_inode_opendir_cbk, nfl);
+
+err:
+ if (ret < 0) {
+ if (newfd)
+ fd_unref (newfd);
+ nfs_fop_local_wipe (xl, nfl);
+ }
+
+ return ret;
+}
diff --git a/xlators/nfs/server/src/nfs-inodes.h b/xlators/nfs/server/src/nfs-inodes.h
new file mode 100644
index 000000000..ba7a57124
--- /dev/null
+++ b/xlators/nfs/server/src/nfs-inodes.h
@@ -0,0 +1,76 @@
+/*
+ Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _NFS_INODES_H_
+#define _NFS_INODES_H_
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "dict.h"
+#include "xlator.h"
+#include "iobuf.h"
+#include "call-stub.h"
+#include "stack.h"
+#include "nfs-fops.h"
+
+
+extern int
+nfs_link_inode (inode_t *newi, inode_t *parent, char *name,
+ struct iatt *newstat);
+
+extern int
+nfs_inode_create (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu,
+ loc_t *pathloc, int flags, int mode, fop_create_cbk_t cbk,
+ void *local);
+
+extern int
+nfs_inode_mkdir (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ int mode, fop_mkdir_cbk_t cbk, void *local);
+
+extern int
+nfs_inode_open (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *loc,
+ int32_t flags, fop_open_cbk_t cbk,
+ void *local);
+
+extern int
+nfs_inode_rename (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *oldloc,
+ loc_t *newloc, fop_rename_cbk_t cbk, void *local);
+
+extern int
+nfs_inode_link (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *oldloc,
+ loc_t *newloc, fop_link_cbk_t cbk, void *local);
+
+extern int
+nfs_inode_unlink (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ fop_unlink_cbk_t cbk, void *local);
+
+extern int
+nfs_inode_rmdir (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ fop_rmdir_cbk_t cbk, void *local);
+
+extern int
+nfs_inode_symlink (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, char *target,
+ loc_t *pathloc, fop_symlink_cbk_t cbk, void *local);
+
+extern int
+nfs_inode_opendir (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *loc,
+ fop_opendir_cbk_t cbk, void *local);
+
+extern int
+nfs_inode_mknod (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ mode_t mode, dev_t dev, fop_mknod_cbk_t cbk, void *local);
+
+extern int
+nfs_inode_lookup (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, loc_t *pathloc,
+ fop_lookup_cbk_t cbk, void *local);
+#endif
diff --git a/xlators/nfs/server/src/nfs-mem-types.h b/xlators/nfs/server/src/nfs-mem-types.h
new file mode 100644
index 000000000..450b6f2fe
--- /dev/null
+++ b/xlators/nfs/server/src/nfs-mem-types.h
@@ -0,0 +1,53 @@
+/*
+ Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef __NFS_MEM_TYPES_H__
+#define __NFS_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_nfs_mem_types_ {
+ gf_nfs_mt_mountentry = gf_common_mt_end + 1,
+ gf_nfs_mt_mountbody,
+ gf_nfs_mt_nfs_state,
+ gf_nfs_mt_char,
+ gf_nfs_mt_exportnode,
+ gf_nfs_mt_groupnode,
+ gf_nfs_mt_mount3_state,
+ gf_nfs_mt_write3args,
+ gf_nfs_mt_nfs3_export,
+ gf_nfs_mt_nfs3_state,
+ gf_nfs_mt_entry3,
+ gf_nfs_mt_entryp3,
+ gf_nfs_mt_nfs3_fd_entry,
+ gf_nfs_mt_nfs3_fh,
+ gf_nfs_mt_nfs_initer_list,
+ gf_nfs_mt_xlator_t,
+ gf_nfs_mt_list_head,
+ gf_nfs_mt_mnt3_resolve,
+ gf_nfs_mt_mnt3_export,
+ gf_nfs_mt_int,
+ gf_nfs_mt_mountres3,
+ gf_nfs_mt_mountstat3,
+ gf_nfs_mt_inode_q,
+ gf_nfs_mt_nlm4_state,
+ gf_nfs_mt_nlm4_cm,
+ gf_nfs_mt_nlm4_fde,
+ gf_nfs_mt_nlm4_nlmclnt,
+ gf_nfs_mt_nlm4_share,
+ gf_nfs_mt_aux_gids,
+ gf_nfs_mt_inode_ctx,
+ gf_nfs_mt_auth_spec,
+ gf_nfs_mt_arr,
+ gf_nfs_mt_end
+};
+#endif
+
diff --git a/xlators/nfs/server/src/nfs.c b/xlators/nfs/server/src/nfs.c
new file mode 100644
index 000000000..75c8fe44e
--- /dev/null
+++ b/xlators/nfs/server/src/nfs.c
@@ -0,0 +1,1825 @@
+/*
+ Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/* This is the primary translator source for NFS.
+ * Every other protocol version gets initialized from here.
+ */
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "defaults.h"
+#include "rpcsvc.h"
+#include "dict.h"
+#include "xlator.h"
+#include "nfs.h"
+#include "mem-pool.h"
+#include "logging.h"
+#include "nfs-fops.h"
+#include "inode.h"
+#include "mount3.h"
+#include "nfs3.h"
+#include "nfs-mem-types.h"
+#include "nfs3-helpers.h"
+#include "nlm4.h"
+#include "options.h"
+#include "acl3.h"
+#include "rpc-drc.h"
+
+#define STRINGIFY(val) #val
+#define TOSTRING(val) STRINGIFY(val)
+
+#define OPT_SERVER_AUX_GIDS "nfs.server-aux-gids"
+#define OPT_SERVER_GID_CACHE_TIMEOUT "nfs.server.aux-gid-timeout"
+
+/* TODO: DATADIR should be based on configure's $(localstatedir) */
+#define DATADIR "/var/lib/glusterd"
+#define NFS_DATADIR DATADIR "/nfs"
+
+/* Forward declaration */
+int nfs_add_initer (struct list_head *list, nfs_version_initer_t init);
+
+static int
+nfs_init_version (xlator_t *this, nfs_version_initer_t init)
+{
+ int ret = -1;
+ struct nfs_initer_list *version = NULL;
+ struct nfs_initer_list *tmp = NULL;
+ rpcsvc_program_t *prog = NULL;
+ struct list_head *versions = NULL;
+ struct nfs_state *nfs = NULL;
+ gf_boolean_t found = _gf_false;
+
+ if ((!this) || (!this->private) || (!init))
+ return (-1);
+
+ nfs = (struct nfs_state *)this->private;
+
+ ret = nfs_add_initer (&nfs->versions, init);
+ if (ret == -1) {
+ gf_log (GF_NFS, GF_LOG_ERROR,
+ "Failed to add protocol initializer");
+ goto err;
+ }
+
+ versions = &nfs->versions;
+ list_for_each_entry_safe (version, tmp, versions, list) {
+ prog = version->program;
+ if (version->init == init) {
+ prog = init(this);
+ if (!prog) {
+ ret = -1;
+ goto err;
+ }
+ version->program = prog;
+ found = _gf_true;
+ break;
+ }
+ }
+
+ /* program not added */
+ if (!found) {
+ gf_log (GF_NFS, GF_LOG_ERROR,
+ "Program: %s NOT found", prog->progname);
+ goto err;
+ }
+
+ /* Check if nfs.port is configured */
+ if (nfs->override_portnum)
+ prog->progport = nfs->override_portnum;
+
+ gf_log (GF_NFS, GF_LOG_DEBUG, "Starting program: %s", prog->progname);
+
+ ret = rpcsvc_program_register (nfs->rpcsvc, prog);
+ if (ret == -1) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "Program: %s init failed",
+ prog->progname);
+ goto err;
+ }
+
+ /* Registration with portmapper is disabled, Nothing to do */
+ if (!nfs->register_portmap)
+ goto err;
+
+ ret = rpcsvc_program_register_portmap (prog, prog->progport);
+ if (ret == -1) {
+ gf_log (GF_NFS, GF_LOG_ERROR,
+ "Program %s registration failed",
+ prog->progname);
+ goto err;
+ }
+ ret = 0; /* All well */
+err:
+ return ret;
+}
+
+static int
+nfs_deinit_version (struct nfs_state *nfs, nfs_version_initer_t init)
+{
+ int ret = -1;
+ struct nfs_initer_list *version = NULL;
+ struct nfs_initer_list *tmp = NULL;
+ rpcsvc_program_t *prog = NULL;
+ struct list_head *versions = NULL;
+
+ if ((!nfs) || (!init))
+ return (-1);
+
+ versions = &nfs->versions;
+ list_for_each_entry_safe (version, tmp, versions, list) {
+ prog = version->program;
+ if (version->init == init) {
+ prog = version->program;
+ ret = rpcsvc_program_unregister (nfs->rpcsvc, prog);
+ if (ret != 0)
+ return (-1);
+ list_del (&version->list);
+ GF_FREE (version);
+ return (0);
+ }
+ }
+
+ return (-1);
+}
+
+static int
+nfs_reconfigure_acl3 (xlator_t *this)
+{
+ struct nfs_state *nfs = NULL;
+
+ if ((!this) || (!this->private))
+ return (-1);
+
+ nfs = (struct nfs_state *)this->private;
+
+ /* ACL is enabled */
+ if (nfs->enable_acl)
+ return nfs_init_version (this, acl3svc_init);
+
+ /* ACL is disabled */
+ return nfs_deinit_version (nfs, acl3svc_init);
+}
+
+static int
+nfs_reconfigure_nlm4 (xlator_t *this)
+{
+ struct nfs_state *nfs = NULL;
+
+ if ((!this) || (!this->private))
+ return (-1);
+
+ nfs = (struct nfs_state *)this->private;
+
+ /* NLM is enabled */
+ if (nfs->enable_nlm)
+ return nfs_init_version (this, nlm4svc_init);
+
+ /* NLM is disabled */
+ return nfs_deinit_version (nfs, nlm4svc_init);
+}
+
+static int
+nfs_program_register_portmap_all (struct nfs_state *nfs)
+{
+ struct list_head *versions = NULL;
+ struct nfs_initer_list *version = NULL;
+ struct nfs_initer_list *tmp = NULL;
+ rpcsvc_program_t *prog = NULL;
+
+ if (nfs == NULL)
+ return (-1);
+
+ versions = &nfs->versions;
+ list_for_each_entry_safe (version, tmp, versions, list) {
+ prog = version->program;
+ if (prog == NULL)
+ continue;
+ if (nfs->override_portnum)
+ prog->progport = nfs->override_portnum;
+ (void) rpcsvc_program_register_portmap (prog, prog->progport);
+ }
+
+ return (0);
+}
+
+static int
+nfs_program_unregister_portmap_all (struct nfs_state *nfs)
+{
+ struct list_head *versions = NULL;
+ struct nfs_initer_list *version = NULL;
+ struct nfs_initer_list *tmp = NULL;
+ rpcsvc_program_t *prog = NULL;
+
+ if (nfs == NULL)
+ return (-1);
+
+ versions = &nfs->versions;
+ list_for_each_entry_safe (version, tmp, versions, list) {
+ prog = version->program;
+ if (prog == NULL)
+ continue;
+ (void) rpcsvc_program_unregister_portmap (prog);
+ }
+
+ return (0);
+}
+
+/* Every NFS version must call this function with the init function
+ * for its particular version.
+ */
+int
+nfs_add_initer (struct list_head *list, nfs_version_initer_t init)
+{
+ struct nfs_initer_list *new = NULL;
+ if ((!list) || (!init))
+ return -1;
+
+ new = GF_CALLOC (1, sizeof (*new), gf_nfs_mt_nfs_initer_list);
+ if (!new) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "Memory allocation failed");
+ return -1;
+ }
+
+ new->init = init;
+ list_add_tail (&new->list, list);
+ return 0;
+}
+
+
+int
+nfs_deinit_versions (struct list_head *versions, xlator_t *this)
+{
+ struct nfs_initer_list *version = NULL;
+ struct nfs_initer_list *tmp = NULL;
+ struct nfs_state *nfs = NULL;
+
+ if ((!versions) || (!this))
+ return -1;
+
+ nfs = (struct nfs_state *)this->private;
+ list_for_each_entry_safe (version, tmp, versions, list) {
+ /* TODO: Add version specific destructor.
+ * if (!version->deinit)
+ goto err;
+
+ version->deinit (this);
+ */
+ if (version->program)
+ rpcsvc_program_unregister (nfs->rpcsvc,
+ (version->program));
+
+ list_del (&version->list);
+ GF_FREE (version);
+ }
+
+ return 0;
+}
+
+int
+nfs_init_versions (struct nfs_state *nfs, xlator_t *this)
+{
+ struct nfs_initer_list *version = NULL;
+ struct nfs_initer_list *tmp = NULL;
+ rpcsvc_program_t *prog = NULL;
+ int ret = -1;
+ struct list_head *versions = NULL;
+
+ if ((!nfs) || (!this))
+ return -1;
+
+ gf_log (GF_NFS, GF_LOG_DEBUG, "Initing protocol versions");
+ versions = &nfs->versions;
+ list_for_each_entry_safe (version, tmp, versions, list) {
+ if (!version->init) {
+ ret = -1;
+ goto err;
+ }
+
+ prog = version->init (this);
+ if (!prog) {
+ ret = -1;
+ goto err;
+ }
+
+ version->program = prog;
+ if (nfs->override_portnum)
+ prog->progport = nfs->override_portnum;
+ gf_log (GF_NFS, GF_LOG_DEBUG, "Starting program: %s",
+ prog->progname);
+
+ ret = rpcsvc_program_register (nfs->rpcsvc, prog);
+ if (ret == -1) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "Program: %s init failed",
+ prog->progname);
+ goto err;
+ }
+ if (nfs->register_portmap) {
+ ret = rpcsvc_program_register_portmap (prog,
+ prog->progport);
+ if (ret == -1) {
+ gf_log (GF_NFS, GF_LOG_ERROR,
+ "Program %s registration failed",
+ prog->progname);
+ goto err;
+ }
+ }
+
+ }
+
+ ret = 0;
+err:
+ return ret;
+}
+
+
+int
+nfs_add_all_initiators (struct nfs_state *nfs)
+{
+ int ret = 0;
+
+ /* Add the initializers for all versions. */
+ ret = nfs_add_initer (&nfs->versions, mnt3svc_init);
+ if (ret == -1) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "Failed to add "
+ "MOUNT3 protocol initializer");
+ goto ret;
+ }
+
+ ret = nfs_add_initer (&nfs->versions, mnt1svc_init);
+ if (ret == -1) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "Failed to add "
+ "MOUNT1 protocol initializer");
+ goto ret;
+ }
+
+ ret = nfs_add_initer (&nfs->versions, nfs3svc_init);
+ if (ret == -1) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "Failed to add "
+ "NFS3 protocol initializer");
+ goto ret;
+ }
+
+ if (nfs->enable_nlm == _gf_true) {
+ ret = nfs_add_initer (&nfs->versions, nlm4svc_init);
+ if (ret == -1) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "Failed to add protocol"
+ " initializer");
+ goto ret;
+ }
+ }
+
+ if (nfs->enable_acl == _gf_true) {
+ ret = nfs_add_initer (&nfs->versions, acl3svc_init);
+ if (ret == -1) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "Failed to add "
+ "ACL protocol initializer");
+ goto ret;
+ }
+ }
+
+ ret = 0;
+ret:
+ return ret;
+}
+
+
+int
+nfs_subvolume_started (struct nfs_state *nfs, xlator_t *xl)
+{
+ int x = 0;
+ int started = 0;
+
+ if ((!nfs) || (!xl))
+ return 1;
+
+ LOCK (&nfs->svinitlock);
+ {
+ for (;x < nfs->allsubvols; ++x) {
+ if (nfs->initedxl[x] == xl) {
+ started = 1;
+ goto unlock;
+ }
+ }
+ }
+unlock:
+ UNLOCK (&nfs->svinitlock);
+
+ return started;
+}
+
+
+int
+nfs_subvolume_set_started (struct nfs_state *nfs, xlator_t *xl)
+{
+ int x = 0;
+
+ if ((!nfs) || (!xl))
+ return 1;
+
+ LOCK (&nfs->svinitlock);
+ {
+ for (;x < nfs->allsubvols; ++x) {
+ if (nfs->initedxl[x] == xl) {
+ gf_log (GF_NFS, GF_LOG_DEBUG,
+ "Volume already started %s",
+ xl->name);
+ break;
+ }
+
+ if (nfs->initedxl[x] == NULL) {
+ nfs->initedxl[x] = xl;
+ ++nfs->upsubvols;
+ gf_log (GF_NFS, GF_LOG_DEBUG, "Starting up: %s "
+ ", vols started till now: %d", xl->name,
+ nfs->upsubvols);
+ goto unlock;
+ }
+ }
+ }
+unlock:
+ UNLOCK (&nfs->svinitlock);
+
+ return 0;
+}
+
+
+int32_t
+nfs_start_subvol_lookup_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ if (op_ret == -1) {
+ gf_log (GF_NFS, GF_LOG_CRITICAL, "Failed to lookup root: %s",
+ strerror (op_errno));
+ goto err;
+ }
+
+ nfs_subvolume_set_started (this->private, ((xlator_t *)cookie));
+ gf_log (GF_NFS, GF_LOG_TRACE, "Started %s", ((xlator_t *)cookie)->name);
+err:
+ return 0;
+}
+
+
+int
+nfs_startup_subvolume (xlator_t *nfsx, xlator_t *xl)
+{
+ int ret = -1;
+ loc_t rootloc = {0, };
+ nfs_user_t nfu = {0, };
+
+ if ((!nfsx) || (!xl))
+ return -1;
+
+ if (nfs_subvolume_started (nfsx->private, xl)) {
+ gf_log (GF_NFS,GF_LOG_TRACE, "Subvolume already started: %s",
+ xl->name);
+ ret = 0;
+ goto err;
+ }
+
+ ret = nfs_root_loc_fill (xl->itable, &rootloc);
+ if (ret == -1) {
+ gf_log (GF_NFS, GF_LOG_CRITICAL, "Failed to init root loc");
+ goto err;
+ }
+
+ nfs_user_root_create (&nfu);
+ ret = nfs_fop_lookup (nfsx, xl, &nfu, &rootloc,
+ nfs_start_subvol_lookup_cbk,
+ (void *)nfsx->private);
+ if (ret < 0) {
+ gf_log (GF_NFS, GF_LOG_CRITICAL, "Failed to lookup root: %s",
+ strerror (-ret));
+ goto err;
+ }
+
+ nfs_loc_wipe (&rootloc);
+
+err:
+ return ret;
+}
+
+int
+nfs_startup_subvolumes (xlator_t *nfsx)
+{
+ int ret = -1;
+ xlator_list_t *cl = NULL;
+ struct nfs_state *nfs = NULL;
+
+ if (!nfsx)
+ return -1;
+
+ nfs = nfsx->private;
+ cl = nfs->subvols;
+ while (cl) {
+ gf_log (GF_NFS, GF_LOG_DEBUG, "Starting subvolume: %s",
+ cl->xlator->name);
+ ret = nfs_startup_subvolume (nfsx, cl->xlator);
+ if (ret == -1) {
+ gf_log (GF_NFS, GF_LOG_CRITICAL, "Failed to start-up "
+ "xlator: %s", cl->xlator->name);
+ goto err;
+ }
+ cl = cl->next;
+ }
+
+ ret = 0;
+err:
+ return ret;
+}
+
+
+int
+nfs_init_subvolume (struct nfs_state *nfs, xlator_t *xl)
+{
+ unsigned int lrusize = 0;
+ int ret = -1;
+
+ if ((!nfs) || (!xl))
+ return -1;
+
+ lrusize = nfs->memfactor * GF_NFS_INODE_LRU_MULT;
+ xl->itable = inode_table_new (lrusize, xl);
+ if (!xl->itable) {
+ gf_log (GF_NFS, GF_LOG_CRITICAL, "Failed to allocate "
+ "inode table");
+ goto err;
+ }
+ ret = 0;
+err:
+ return ret;
+}
+
+int
+nfs_init_subvolumes (struct nfs_state *nfs, xlator_list_t *cl)
+{
+ int ret = -1;
+ unsigned int lrusize = 0;
+ int svcount = 0;
+
+ if ((!nfs) || (!cl))
+ return -1;
+
+ lrusize = nfs->memfactor * GF_NFS_INODE_LRU_MULT;
+ nfs->subvols = cl;
+ gf_log (GF_NFS, GF_LOG_TRACE, "inode table lru: %d", lrusize);
+
+ while (cl) {
+ gf_log (GF_NFS, GF_LOG_DEBUG, "Initing subvolume: %s",
+ cl->xlator->name);
+ ret = nfs_init_subvolume (nfs, cl->xlator);
+ if (ret == -1) {
+ gf_log (GF_NFS, GF_LOG_CRITICAL, "Failed to init "
+ "xlator: %s", cl->xlator->name);
+ goto err;
+ }
+ ++svcount;
+ cl = cl->next;
+ }
+
+ LOCK_INIT (&nfs->svinitlock);
+ nfs->initedxl = GF_CALLOC (svcount, sizeof (xlator_t *),
+ gf_nfs_mt_xlator_t );
+ if (!nfs->initedxl) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "Failed to allocated inited xls");
+ ret = -1;
+ goto err;
+ }
+
+ gf_log (GF_NFS, GF_LOG_TRACE, "Inited volumes: %d", svcount);
+ nfs->allsubvols = svcount;
+ ret = 0;
+err:
+ return ret;
+}
+
+
+int
+nfs_user_root_create (nfs_user_t *newnfu)
+{
+ if (!newnfu)
+ return -1;
+
+ newnfu->uid = 0;
+ newnfu->gids[0] = 0;
+ newnfu->ngrps = 1;
+
+ return 0;
+}
+
+
+int
+nfs_user_create (nfs_user_t *newnfu, uid_t uid, gid_t gid, gid_t *auxgids,
+ int auxcount)
+{
+ int x = 1;
+ int y = 0;
+
+ /* We test for GF_REQUEST_MAXGROUPS instead of NFS_FOP_NGROUPS because
+ * the latter accounts for the @gid being in @auxgids, which is not the
+ * case here.
+ */
+ if ((!newnfu) || (auxcount > GF_REQUEST_MAXGROUPS))
+ return -1;
+
+ newnfu->uid = uid;
+ newnfu->gids[0] = gid;
+ newnfu->ngrps = 1;
+
+ gf_log (GF_NFS, GF_LOG_TRACE, "uid: %d, gid %d, gids: %d", uid, gid,
+ auxcount);
+
+ if (!auxgids)
+ return 0;
+
+ for (; y < auxcount; ++x,++y) {
+ newnfu->gids[x] = auxgids[y];
+ ++newnfu->ngrps;
+ gf_log (GF_NFS, GF_LOG_TRACE, "gid: %d", auxgids[y]);
+ }
+
+ return 0;
+}
+
+
+void
+nfs_request_user_init (nfs_user_t *nfu, rpcsvc_request_t *req)
+{
+ gid_t *gidarr = NULL;
+ int gids = 0;
+
+ if ((!req) || (!nfu))
+ return;
+
+ gidarr = rpcsvc_auth_unix_auxgids (req, &gids);
+ nfs_user_create (nfu, rpcsvc_request_uid (req),
+ rpcsvc_request_gid (req), gidarr, gids);
+
+ return;
+}
+
+void
+nfs_request_primary_user_init (nfs_user_t *nfu, rpcsvc_request_t *req,
+ uid_t uid, gid_t gid)
+{
+ gid_t *gidarr = NULL;
+ int gids = 0;
+
+ if ((!req) || (!nfu))
+ return;
+
+ gidarr = rpcsvc_auth_unix_auxgids (req, &gids);
+ nfs_user_create (nfu, uid, gid, gidarr, gids);
+
+ return;
+}
+
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_nfs_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR, "Memory accounting init"
+ "failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+
+struct nfs_state *
+nfs_init_state (xlator_t *this)
+{
+ struct nfs_state *nfs = NULL;
+ int ret = -1;
+ unsigned int fopspoolsize = 0;
+ char *optstr = NULL;
+ gf_boolean_t boolt = _gf_false;
+ struct stat stbuf = {0,};
+
+ if (!this)
+ return NULL;
+
+ if (!this->children) {
+ gf_log (GF_NFS, GF_LOG_INFO,
+ "NFS is manually disabled: Exiting");
+ /* Nothing for nfs process to do, exit cleanly */
+ kill (getpid (), SIGTERM);
+ }
+
+ nfs = GF_CALLOC (1, sizeof (*nfs), gf_nfs_mt_nfs_state);
+ if (!nfs) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "memory allocation failed");
+ return NULL;
+ }
+
+ nfs->memfactor = GF_NFS_DEFAULT_MEMFACTOR;
+ if (dict_get (this->options, "nfs.mem-factor")) {
+ ret = dict_get_str (this->options, "nfs.mem-factor",
+ &optstr);
+ if (ret < 0) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "Failed to parse dict");
+ goto free_rpcsvc;
+ }
+
+ ret = gf_string2uint (optstr, &nfs->memfactor);
+ if (ret < 0) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "Failed to parse uint "
+ "string");
+ goto free_rpcsvc;
+ }
+ }
+
+ fopspoolsize = nfs->memfactor * GF_NFS_CONCURRENT_OPS_MULT;
+ /* FIXME: Really saddens me to see this as xlator wide. */
+ nfs->foppool = mem_pool_new (struct nfs_fop_local, fopspoolsize);
+ if (!nfs->foppool) {
+ gf_log (GF_NFS, GF_LOG_CRITICAL, "Failed to allocate fops "
+ "local pool");
+ goto free_rpcsvc;
+ }
+
+ nfs->dynamicvolumes = GF_NFS_DVM_OFF;
+ if (dict_get (this->options, "nfs.dynamic-volumes")) {
+ ret = dict_get_str (this->options, "nfs.dynamic-volumes",
+ &optstr);
+ if (ret < 0) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "Failed to parse dict");
+ goto free_foppool;
+ }
+
+ ret = gf_string2boolean (optstr, &boolt);
+ if (ret < 0) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "Failed to parse bool "
+ "string");
+ goto free_foppool;
+ }
+
+ if (boolt == _gf_true)
+ nfs->dynamicvolumes = GF_NFS_DVM_ON;
+ }
+
+ nfs->enable_nlm = _gf_true;
+ ret = dict_get_str_boolean (this->options, "nfs.nlm", _gf_true);
+ if (ret == _gf_false) {
+ gf_log (GF_NFS, GF_LOG_INFO, "NLM is manually disabled");
+ nfs->enable_nlm = _gf_false;
+ }
+
+ nfs->enable_acl = _gf_true;
+ ret = dict_get_str_boolean (this->options, "nfs.acl", _gf_true);
+ if (ret == _gf_false) {
+ gf_log (GF_NFS, GF_LOG_INFO, "ACL is manually disabled");
+ nfs->enable_acl = _gf_false;
+ }
+
+ nfs->enable_ino32 = 0;
+ if (dict_get (this->options, "nfs.enable-ino32")) {
+ ret = dict_get_str (this->options, "nfs.enable-ino32",
+ &optstr);
+ if (ret < 0) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "Failed to parse dict");
+ goto free_foppool;
+ }
+
+ ret = gf_string2boolean (optstr, &boolt);
+ if (ret < 0) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "Failed to parse bool "
+ "string");
+ goto free_foppool;
+ }
+
+ if (boolt == _gf_true)
+ nfs->enable_ino32 = 1;
+ }
+
+ if (dict_get (this->options, "nfs.port")) {
+ ret = dict_get_str (this->options, "nfs.port",
+ &optstr);
+ if (ret < 0) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "Failed to parse dict");
+ goto free_foppool;
+ }
+
+ ret = gf_string2uint (optstr, &nfs->override_portnum);
+ if (ret < 0) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "Failed to parse uint "
+ "string");
+ goto free_foppool;
+ }
+ }
+
+ if (dict_get(this->options, "transport.socket.listen-port") == NULL) {
+ if (nfs->override_portnum)
+ ret = gf_asprintf (&optstr, "%d",
+ nfs->override_portnum);
+ else
+ ret = gf_asprintf (&optstr, "%d", GF_NFS3_PORT);
+ if (ret == -1) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "failed mem-allocation");
+ goto free_foppool;
+ }
+ ret = dict_set_dynstr (this->options,
+ "transport.socket.listen-port", optstr);
+ if (ret == -1) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "dict_set_dynstr error");
+ goto free_foppool;
+ }
+ }
+
+ if (dict_get(this->options, "transport-type") == NULL) {
+ ret = dict_set_str (this->options, "transport-type", "socket");
+ if (ret == -1) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "dict_set_str error");
+ goto free_foppool;
+ }
+ }
+
+ nfs->mount_udp = 0;
+ if (dict_get(this->options, "nfs.mount-udp")) {
+ ret = dict_get_str (this->options, "nfs.mount-udp", &optstr);
+ if (ret == -1) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "Failed to parse dict");
+ goto free_foppool;
+ }
+
+ ret = gf_string2boolean (optstr, &boolt);
+ if (ret < 0) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "Failed to parse bool "
+ "string");
+ goto free_foppool;
+ }
+
+ if (boolt == _gf_true)
+ nfs->mount_udp = 1;
+ }
+
+ nfs->rmtab = gf_strdup (NFS_DATADIR "/rmtab");
+ if (dict_get(this->options, "nfs.mount-rmtab")) {
+ ret = dict_get_str (this->options, "nfs.mount-rmtab", &nfs->rmtab);
+ if (ret == -1) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "Failed to parse dict");
+ goto free_foppool;
+ }
+ }
+
+ /* support both options rpc-auth.ports.insecure and
+ * rpc-auth-allow-insecure for backward compatibility
+ */
+ nfs->allow_insecure = 1;
+ if (dict_get(this->options, "rpc-auth.ports.insecure")) {
+ ret = dict_get_str (this->options, "rpc-auth.ports.insecure",
+ &optstr);
+ if (ret < 0) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "Failed to parse dict");
+ goto free_foppool;
+ }
+
+ ret = gf_string2boolean (optstr, &boolt);
+ if (ret < 0) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "Failed to parse bool "
+ "string");
+ goto free_foppool;
+ }
+
+ if (boolt == _gf_false)
+ nfs->allow_insecure = 0;
+ }
+
+ if (dict_get(this->options, "rpc-auth-allow-insecure")) {
+ ret = dict_get_str (this->options, "rpc-auth-allow-insecure",
+ &optstr);
+ if (ret < 0) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "Failed to parse dict");
+ goto free_foppool;
+ }
+
+ ret = gf_string2boolean (optstr, &boolt);
+ if (ret < 0) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "Failed to parse bool "
+ "string");
+ goto free_foppool;
+ }
+
+ if (boolt == _gf_false)
+ nfs->allow_insecure = 0;
+ }
+
+ if (nfs->allow_insecure) {
+ /* blindly set both the options */
+ dict_del (this->options, "rpc-auth-allow-insecure");
+ ret = dict_set_str (this->options,
+ "rpc-auth-allow-insecure", "on");
+ if (ret == -1) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "dict_set_str error");
+ goto free_foppool;
+ }
+ dict_del (this->options, "rpc-auth.ports.insecure");
+ ret = dict_set_str (this->options,
+ "rpc-auth.ports.insecure", "on");
+ if (ret == -1) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "dict_set_str error");
+ goto free_foppool;
+ }
+ }
+
+ GF_OPTION_INIT (OPT_SERVER_AUX_GIDS, nfs->server_aux_gids,
+ bool, free_foppool);
+ GF_OPTION_INIT (OPT_SERVER_GID_CACHE_TIMEOUT, nfs->server_aux_gids_max_age,
+ uint32, free_foppool);
+
+ if (gid_cache_init(&nfs->gid_cache, nfs->server_aux_gids_max_age) < 0) {
+ gf_log(GF_NFS, GF_LOG_ERROR, "Failed to initialize group cache.");
+ goto free_foppool;
+ }
+
+ if (stat("/sbin/rpc.statd", &stbuf) == -1) {
+ gf_log (GF_NFS, GF_LOG_WARNING, "/sbin/rpc.statd not found. "
+ "Disabling NLM");
+ nfs->enable_nlm = _gf_false;
+ }
+
+ nfs->rpcsvc = rpcsvc_init (this, this->ctx, this->options, 0);
+ if (!nfs->rpcsvc) {
+ ret = -1;
+ gf_log (GF_NFS, GF_LOG_ERROR, "RPC service init failed");
+ goto free_foppool;
+ }
+
+ nfs->register_portmap = rpcsvc_register_portmap_enabled (nfs->rpcsvc);
+
+ this->private = (void *)nfs;
+ INIT_LIST_HEAD (&nfs->versions);
+ nfs->generation = 1965;
+
+ ret = 0;
+
+free_foppool:
+ if (ret < 0)
+ mem_pool_destroy (nfs->foppool);
+
+free_rpcsvc:
+ /*
+ * rpcsvc_deinit */
+ if (ret < 0) {
+ GF_FREE (nfs);
+ nfs = NULL;
+ }
+
+ return nfs;
+}
+
+int
+nfs_drc_init (xlator_t *this)
+{
+ int ret = -1;
+ rpcsvc_t *svc = NULL;
+
+ svc = ((struct nfs_state *)(this->private))->rpcsvc;
+ if (!svc)
+ goto out;
+
+ ret = rpcsvc_drc_init (svc, this->options);
+
+ out:
+ return ret;
+}
+
+int
+nfs_reconfigure_state (xlator_t *this, dict_t *options)
+{
+ int ret = 0;
+ int keyindx = 0;
+ char *optstr = NULL;
+ gf_boolean_t optbool;
+ uint32_t optuint32;
+ struct nfs_state *nfs = NULL;
+ char *blacklist_keys[] = {
+ "nfs.port",
+ "nfs.transport-type",
+ "nfs.mem-factor",
+ NULL};
+
+ GF_VALIDATE_OR_GOTO (GF_NFS, this, out);
+ GF_VALIDATE_OR_GOTO (GF_NFS, this->private, out);
+ GF_VALIDATE_OR_GOTO (GF_NFS, options, out);
+
+ nfs = (struct nfs_state *)this->private;
+
+ /* Black listed options can't be reconfigured, they need
+ * NFS to be restarted. There are two cases 1. SET 2. UNSET.
+ * 1. SET */
+ while (blacklist_keys[keyindx]) {
+ if (dict_get (options, blacklist_keys[keyindx])) {
+ gf_log (GF_NFS, GF_LOG_ERROR,
+ "Reconfiguring %s needs NFS restart",
+ blacklist_keys[keyindx]);
+ goto out;
+ }
+ keyindx ++;
+ }
+
+ /* UNSET for nfs.mem-factor */
+ if ((!dict_get (options, "nfs.mem-factor")) &&
+ (nfs->memfactor != GF_NFS_DEFAULT_MEMFACTOR)) {
+ gf_log (GF_NFS, GF_LOG_INFO,
+ "Reconfiguring nfs.mem-factor needs NFS restart");
+ goto out;
+ }
+
+ /* UNSET for nfs.port */
+ if ((!dict_get (options, "nfs.port")) &&
+ (nfs->override_portnum)) {
+ gf_log (GF_NFS, GF_LOG_ERROR,
+ "Reconfiguring nfs.port needs NFS restart");
+ goto out;
+ }
+
+ /* reconfig nfs.mount-rmtab */
+ optstr = NFS_DATADIR "/rmtab";
+ if (dict_get (options, "nfs.mount-rmtab")) {
+ ret = dict_get_str (options, "nfs.mount-rmtab", &optstr);
+ if (ret < 0) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "Failed to read "
+ "reconfigured option: nfs.mount-rmtab");
+ goto out;
+ }
+ gf_path_strip_trailing_slashes (optstr);
+ }
+ if (strcmp (nfs->rmtab, optstr) != 0) {
+ mount_rewrite_rmtab (nfs->mstate, optstr);
+ gf_log (GF_NFS, GF_LOG_INFO,
+ "Reconfigured nfs.mount-rmtab path: %s",
+ nfs->rmtab);
+ }
+
+ GF_OPTION_RECONF (OPT_SERVER_AUX_GIDS, optbool,
+ options, bool, out);
+ if (nfs->server_aux_gids != optbool) {
+ nfs->server_aux_gids = optbool;
+ gf_log(GF_NFS, GF_LOG_INFO, "Reconfigured %s with value %d",
+ OPT_SERVER_AUX_GIDS, optbool);
+ }
+
+ GF_OPTION_RECONF (OPT_SERVER_GID_CACHE_TIMEOUT, optuint32,
+ options, uint32, out);
+ if (nfs->server_aux_gids_max_age != optuint32) {
+ nfs->server_aux_gids_max_age = optuint32;
+ gid_cache_reconf (&nfs->gid_cache, optuint32);
+ gf_log(GF_NFS, GF_LOG_INFO, "Reconfigured %s with value %d",
+ OPT_SERVER_GID_CACHE_TIMEOUT, optuint32);
+ }
+
+ /* reconfig nfs.dynamic-volumes */
+ ret = dict_get_str_boolean (options, "nfs.dynamic-volumes",
+ GF_NFS_DVM_OFF);
+ switch (ret) {
+ case GF_NFS_DVM_ON:
+ case GF_NFS_DVM_OFF:
+ optbool = ret;
+ break;
+ default:
+ optbool = GF_NFS_DVM_OFF;
+ break;
+ }
+ if (nfs->dynamicvolumes != optbool) {
+ nfs->dynamicvolumes = optbool;
+ gf_log(GF_NFS, GF_LOG_INFO, "Reconfigured nfs.dynamic-volumes"
+ " with value %d", optbool);
+ }
+
+ optbool = _gf_false;
+ if (dict_get (options, "nfs.enable-ino32")) {
+ ret = dict_get_str_boolean (options, "nfs.enable-ino32",
+ _gf_false);
+ if (ret < 0) {
+ gf_log (GF_NFS, GF_LOG_ERROR,
+ "Failed to read reconfigured option: "
+ "nfs.enable-ino32");
+ goto out;
+ }
+ optbool = ret;
+ }
+ if (nfs->enable_ino32 != optbool) {
+ nfs->enable_ino32 = optbool;
+ gf_log(GF_NFS, GF_LOG_INFO, "Reconfigured nfs.enable-ino32"
+ " with value %d", optbool);
+ }
+
+ /* nfs.nlm is enabled by default */
+ ret = dict_get_str_boolean (options, "nfs.nlm", _gf_true);
+ if (ret < 0) {
+ optbool = _gf_true;
+ } else {
+ optbool = ret;
+ }
+ if (nfs->enable_nlm != optbool) {
+ gf_log (GF_NFS, GF_LOG_INFO, "NLM is manually %s",
+ (optbool ? "enabled":"disabled"));
+ nfs->enable_nlm = optbool;
+ nfs_reconfigure_nlm4 (this);
+ }
+
+ /* nfs.acl is enabled by default */
+ ret = dict_get_str_boolean (options, "nfs.acl", _gf_true);
+ if (ret < 0) {
+ optbool = _gf_true;
+ } else {
+ optbool = ret;
+ }
+ if (nfs->enable_acl != optbool) {
+ gf_log (GF_NFS, GF_LOG_INFO, "ACL is manually %s",
+ (optbool ? "enabled":"disabled"));
+ nfs->enable_acl = optbool;
+ nfs_reconfigure_acl3 (this);
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+/*
+ * reconfigure() for NFS server xlator.
+ */
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ int ret = 0;
+ struct nfs_state *nfs = NULL;
+ gf_boolean_t regpmap = _gf_true;
+
+ if ((!this) || (!this->private) || (!options))
+ return (-1);
+
+ nfs = (struct nfs_state *)this->private;
+
+ /* Reconfigure nfs options */
+ ret = nfs_reconfigure_state(this, options);
+ if (ret) {
+ gf_log (GF_NFS, GF_LOG_ERROR,
+ "nfs reconfigure state failed");
+ return (-1);
+ }
+
+ /* Reconfigure nfs3 options */
+ ret = nfs3_reconfigure_state(this, options);
+ if (ret) {
+ gf_log (GF_NFS, GF_LOG_ERROR,
+ "nfs3 reconfigure state failed");
+ return (-1);
+ }
+
+ /* Reconfigure mount options */
+ ret = mount_reconfigure_state(this, options);
+ if (ret) {
+ gf_log (GF_NFS, GF_LOG_ERROR,
+ "mount reconfigure state failed");
+ return (-1);
+ }
+
+ /* Reconfigure rpc layer */
+ ret = rpcsvc_reconfigure_options (nfs->rpcsvc, options);
+ if (ret) {
+ gf_log (GF_NFS, GF_LOG_ERROR,
+ "rpcsvc reconfigure options failed");
+ return (-1);
+ }
+ regpmap = rpcsvc_register_portmap_enabled(nfs->rpcsvc);
+ if (nfs->register_portmap != regpmap) {
+ nfs->register_portmap = regpmap;
+ if (regpmap) {
+ (void) nfs_program_register_portmap_all (nfs);
+ } else {
+ (void) nfs_program_unregister_portmap_all (nfs);
+ }
+ }
+
+ /* Reconfigure drc */
+ ret = rpcsvc_drc_reconfigure (nfs->rpcsvc, options);
+ if (ret) {
+ gf_log (GF_NFS, GF_LOG_ERROR,
+ "rpcsvc DRC reconfigure failed");
+ return (-1);
+ }
+
+ return (0);
+}
+
+int
+init (xlator_t *this) {
+
+ struct nfs_state *nfs = NULL;
+ int ret = -1;
+
+ if (!this)
+ return -1;
+
+ nfs = nfs_init_state (this);
+ if (!nfs) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "Failed to init nfs option");
+ return -1;
+ }
+
+ ret = nfs_add_all_initiators (nfs);
+ if (ret == -1) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "Failed to add initiators");
+ goto err;
+ }
+
+ ret = nfs_init_subvolumes (nfs, this->children);
+ if (ret == -1) {
+ gf_log (GF_NFS, GF_LOG_CRITICAL, "Failed to init NFS "
+ "exports");
+ goto err;
+ }
+
+ ret = mount_init_state (this);
+ if (ret == -1) {
+ gf_log (GF_NFS, GF_LOG_CRITICAL, "Failed to init Mount"
+ "state");
+ goto err;
+ }
+
+ ret = nlm4_init_state (this);
+ if (ret == -1) {
+ gf_log (GF_NFS, GF_LOG_CRITICAL, "Failed to init NLM"
+ "state");
+ goto err;
+ }
+
+ ret = nfs_init_versions (nfs, this);
+ if (ret == -1) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "Failed to initialize "
+ "protocols");
+ /* Do not return an error on this. If we dont return
+ * an error, the process keeps running and it helps
+ * to point out where the log is by doing ps ax|grep gluster.
+ */
+ ret = 0;
+ goto err;
+ }
+
+ ret = nfs_drc_init (this);
+ if (ret == 0)
+ gf_log (GF_NFS, GF_LOG_INFO, "NFS service started");
+err:
+
+ return ret;
+}
+
+
+int
+notify (xlator_t *this, int32_t event, void *data, ...)
+{
+ xlator_t *subvol = NULL;
+ struct nfs_state *priv = NULL;
+
+ subvol = (xlator_t *)data;
+
+ gf_log (GF_NFS, GF_LOG_TRACE, "Notification received: %d",
+ event);
+
+ switch (event) {
+ case GF_EVENT_CHILD_UP:
+ nfs_startup_subvolume (this, subvol);
+ break;
+
+ case GF_EVENT_CHILD_MODIFIED:
+ priv = this->private;
+ ++(priv->generation);
+ break;
+
+ case GF_EVENT_PARENT_UP:
+ default_notify (this, GF_EVENT_PARENT_UP, data);
+ break;
+ }
+
+ return 0;
+}
+
+
+int
+fini (xlator_t *this)
+{
+
+ struct nfs_state *nfs = NULL;
+
+ nfs = (struct nfs_state *)this->private;
+ gf_log (GF_NFS, GF_LOG_DEBUG, "NFS service going down");
+ nfs_deinit_versions (&nfs->versions, this);
+ return 0;
+}
+
+int32_t
+nfs_forget (xlator_t *this, inode_t *inode)
+{
+ uint64_t ctx = 0;
+ struct nfs_inode_ctx *ictx = NULL;
+
+ if (inode_ctx_del (inode, this, &ctx))
+ return -1;
+
+ ictx = (struct nfs_inode_ctx *)ctx;
+ GF_FREE (ictx);
+
+ return 0;
+}
+
+gf_boolean_t
+_nfs_export_is_for_vol (char *exname, char *volname)
+{
+ gf_boolean_t ret = _gf_false;
+ char *tmp = NULL;
+
+ tmp = exname;
+ if (tmp[0] == '/')
+ tmp++;
+
+ if (!strcmp (tmp, volname))
+ ret = _gf_true;
+
+ return ret;
+}
+
+int
+nfs_priv_to_dict (xlator_t *this, dict_t *dict)
+{
+ int ret = -1;
+ struct nfs_state *priv = NULL;
+ struct mountentry *mentry = NULL;
+ char *volname = NULL;
+ char key[1024] = {0,};
+ int count = 0;
+
+ GF_VALIDATE_OR_GOTO (THIS->name, this, out);
+ GF_VALIDATE_OR_GOTO (THIS->name, dict, out);
+
+ priv = this->private;
+ GF_ASSERT (priv);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Could not get volname");
+ goto out;
+ }
+
+ list_for_each_entry (mentry, &priv->mstate->mountlist, mlist) {
+ if (!_nfs_export_is_for_vol (mentry->exname, volname))
+ continue;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "client%d.hostname", count);
+ ret = dict_set_str (dict, key, mentry->hostname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error writing hostname to dict");
+ goto out;
+ }
+
+ /* No connection data available yet in nfs server.
+ * Hence, setting to 0 to prevent cli failing
+ */
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "client%d.bytesread", count);
+ ret = dict_set_uint64 (dict, key, 0);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error writing bytes read to dict");
+ goto out;
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "client%d.byteswrite", count);
+ ret = dict_set_uint64 (dict, key, 0);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error writing bytes write to dict");
+ goto out;
+ }
+
+ count++;
+ }
+
+ ret = dict_set_int32 (dict, "clientcount", count);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "Error writing client count to dict");
+
+out:
+ gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+extern int32_t
+nlm_priv (xlator_t *this);
+
+int32_t
+nfs_priv (xlator_t *this)
+{
+ int32_t ret = -1;
+
+ /* DRC needs the global drc structure, xl is of no use to it. */
+ ret = rpcsvc_drc_priv (((struct nfs_state *)(this->private))->rpcsvc->drc);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG, "Statedump of DRC failed");
+ goto out;
+ }
+
+ ret = nlm_priv (this);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG, "Statedump of NLM failed");
+ goto out;
+ }
+ out:
+ return ret;
+}
+
+
+struct xlator_cbks cbks = {
+ .forget = nfs_forget,
+};
+
+struct xlator_fops fops;
+
+struct xlator_dumpops dumpops = {
+ .priv = nfs_priv,
+ .priv_to_dict = nfs_priv_to_dict,
+};
+
+/* TODO: If needed, per-volume options below can be extended to be export
++ * specific also because after export-dir is introduced, a volume is not
++ * neccessarily an export whereas different subdirectories within that volume
++ * can be and may need these options to be specified separately.
++ */
+struct volume_options options[] = {
+ { .key = {"nfs3.read-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .min = GF_NFS3_RTMIN,
+ .max = GF_NFS3_RTMAX,
+ .default_value = TOSTRING(GF_NFS3_RTPREF),
+ .description = "Size in which the client should issue read requests "
+ "to the Gluster NFSv3 server. Must be a multiple of "
+ "4KB (4096). Min and Max supported values are 4KB "
+ "(4096) and 1MB (1048576) respectively. If the "
+ "specified value is within the supported range but "
+ "not a multiple of 4096, it is rounded up to the "
+ "nearest multiple of 4096."
+ },
+ { .key = {"nfs3.write-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .min = GF_NFS3_WTMIN,
+ .max = GF_NFS3_WTMAX,
+ .default_value = TOSTRING(GF_NFS3_WTPREF),
+ .description = "Size in which the client should issue write requests "
+ "to the Gluster NFSv3 server. Must be a multiple of "
+ "1KB (1024). Min and Max supported values are "
+ "4KB (4096) and 1MB(1048576) respectively. If the "
+ "specified value is within the supported range but "
+ "not a multiple of 4096, it is rounded up to the "
+ "nearest multiple of 4096."
+ },
+ { .key = {"nfs3.readdir-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .min = GF_NFS3_DTMIN,
+ .max = GF_NFS3_DTMAX,
+ .default_value = TOSTRING(GF_NFS3_DTPREF),
+ .description = "Size in which the client should issue directory "
+ "reading requests to the Gluster NFSv3 server. Must "
+ "be a multiple of 1KB (1024). Min and Max supported "
+ "values are 4KB (4096) and 1MB (1048576) respectively."
+ "If the specified value is within the supported range "
+ "but not a multiple of 4096, it is rounded up to the "
+ "nearest multiple of 4096."
+ },
+ { .key = {"nfs3.*.volume-access"},
+ .type = GF_OPTION_TYPE_STR,
+ .value = {"read-only", "read-write"},
+ .default_value = "read-write",
+ .description = "Type of access desired for this subvolume: "
+ " read-only, read-write(default)"
+ },
+ { .key = {"nfs3.*.trusted-write"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "On an UNSTABLE write from client, return STABLE flag"
+ " to force client to not send a COMMIT request. In "
+ "some environments, combined with a replicated "
+ "GlusterFS setup, this option can improve write "
+ "performance. This flag allows user to trust Gluster"
+ " replication logic to sync data to the disks and "
+ "recover when required. COMMIT requests if received "
+ "will be handled in a default manner by fsyncing."
+ " STABLE writes are still handled in a sync manner. "
+ "Off by default."
+
+ },
+ { .key = {"nfs3.*.trusted-sync"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "All writes and COMMIT requests are treated as async."
+ " This implies that no write requests are guaranteed"
+ " to be on server disks when the write reply is "
+ "received at the NFS client. Trusted sync includes "
+ " trusted-write behaviour. Off by default."
+
+ },
+ { .key = {"nfs3.*.export-dir"},
+ .type = GF_OPTION_TYPE_PATH,
+ .default_value = "",
+ .description = "By default, all subvolumes of nfs are exported as "
+ "individual exports. There are cases where a "
+ "subdirectory or subdirectories in the volume need to "
+ "be exported separately. This option can also be used "
+ "in conjunction with nfs3.export-volumes option to "
+ "restrict exports only to the subdirectories specified"
+ " through this option. Must be an absolute path. Along"
+ " with path allowed list of IPs/hostname can be "
+ "associated with each subdirectory. If provided "
+ "connection will allowed only from these IPs. By "
+ "default connections from all IPs are allowed. "
+ "Format: <dir>[(hostspec[|hostspec|...])][,...]. Where"
+ " hostspec can be an IP address, hostname or an IP "
+ "range in CIDR notation. "
+ "e.g. /foo(192.168.1.0/24|host1|10.1.1.8),/host2."
+ " NOTE: Care must be taken while configuring this "
+ "option as invalid entries and/or unreachable DNS "
+ "servers can introduce unwanted delay in all the mount"
+ " calls."
+ },
+ { .key = {"nfs3.export-dirs"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "By default, all subvolumes of nfs are exported as "
+ "individual exports. There are cases where a "
+ "subdirectory or subdirectories in the volume need to "
+ "be exported separately. Enabling this option allows "
+ "any directory on a volumes to be exported separately."
+ "Directory exports are enabled by default."
+ },
+ { .key = {"nfs3.export-volumes"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Enable or disable exporting whole volumes, instead "
+ "if used in conjunction with nfs3.export-dir, can "
+ "allow setting up only subdirectories as exports. On "
+ "by default."
+ },
+ { .key = {"rpc-auth.auth-unix"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Disable or enable the AUTH_UNIX authentication type."
+ "Must always be enabled for better interoperability."
+ "However, can be disabled if needed. Enabled by"
+ "default"
+ },
+ { .key = {"rpc-auth.auth-null"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Disable or enable the AUTH_NULL authentication type."
+ "Must always be enabled. This option is here only to"
+ " avoid unrecognized option warnings"
+ },
+ { .key = {"rpc-auth.auth-unix.*"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Disable or enable the AUTH_UNIX authentication type "
+ "for a particular exported volume overriding defaults"
+ " and general setting for AUTH_UNIX scheme. Must "
+ "always be enabled for better interoperability."
+ "However, can be disabled if needed. Enabled by"
+ "default."
+ },
+ { .key = {"rpc-auth.auth-unix.*.allow"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "on",
+ .description = "Disable or enable the AUTH_UNIX authentication type "
+ "for a particular exported volume overriding defaults"
+ " and general setting for AUTH_UNIX scheme. Must "
+ "always be enabled for better interoperability."
+ "However, can be disabled if needed. Enabled by"
+ "default."
+ },
+ { .key = {"rpc-auth.auth-null.*"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Disable or enable the AUTH_NULL authentication type "
+ "for a particular exported volume overriding defaults"
+ " and general setting for AUTH_NULL. Must always be "
+ "enabled. This option is here only to avoid "
+ "unrecognized option warnings."
+ },
+ { .key = {"rpc-auth.addr.allow"},
+ .type = GF_OPTION_TYPE_INTERNET_ADDRESS_LIST,
+ .default_value = "all",
+ .description = "Allow a comma separated list of addresses and/or"
+ " hostnames to connect to the server. By default, all"
+ " connections are allowed. This allows users to "
+ "define a general rule for all exported volumes."
+ },
+ { .key = {"rpc-auth.addr.reject"},
+ .type = GF_OPTION_TYPE_INTERNET_ADDRESS_LIST,
+ .default_value = "none",
+ .description = "Reject a comma separated list of addresses and/or"
+ " hostnames from connecting to the server. By default,"
+ " all connections are allowed. This allows users to"
+ "define a general rule for all exported volumes."
+ },
+ { .key = {"rpc-auth.addr.*.allow"},
+ .type = GF_OPTION_TYPE_INTERNET_ADDRESS_LIST,
+ .default_value = "all",
+ .description = "Allow a comma separated list of addresses and/or"
+ " hostnames to connect to the server. By default, all"
+ " connections are allowed. This allows users to "
+ "define a rule for a specific exported volume."
+ },
+ { .key = {"rpc-auth.addr.*.reject"},
+ .type = GF_OPTION_TYPE_INTERNET_ADDRESS_LIST,
+ .default_value = "none",
+ .description = "Reject a comma separated list of addresses and/or"
+ " hostnames from connecting to the server. By default,"
+ " all connections are allowed. This allows users to"
+ "define a rule for a specific exported volume."
+ },
+ { .key = {"rpc-auth.ports.insecure"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "Allow client connections from unprivileged ports. By "
+ "default only privileged ports are allowed. This is a"
+ "global setting in case insecure ports are to be "
+ "enabled for all exports using a single option."
+ },
+ { .key = {"rpc-auth.ports.*.insecure"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "Allow client connections from unprivileged ports. By "
+ "default only privileged ports are allowed. Use this"
+ " option to enable or disable insecure ports for "
+ "a specific subvolume and to override the global "
+ "setting set by the previous option."
+ },
+ { .key = {"rpc-auth.addr.namelookup"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "Users have the option of turning on name lookup for"
+ " incoming client connections using this option. Use this "
+ "option to turn on name lookups during address-based "
+ "authentication. Turning this on will enable you to"
+ " use hostnames in nfs.rpc-auth-* filters. In some "
+ "setups, the name server can take too long to reply to DNS "
+ "queries resulting in timeouts of mount requests. By "
+ "default, name lookup is off"
+ },
+ { .key = {"nfs.dynamic-volumes"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "Internal option set to tell gnfs to use a different"
+ " scheme for encoding file handles when DVM is being"
+ " used."
+ },
+ { .key = {"nfs3.*.volume-id"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "",
+ .description = "When nfs.dynamic-volumes is set, gnfs expects every "
+ "subvolume to have this option set for it, so that "
+ "gnfs can use this option to identify the volume. "
+ "If all subvolumes do not have this option set, an "
+ "error is reported."
+ },
+ { .key = {"nfs.enable-ino32"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .description = "For nfs clients or apps that do not support 64-bit "
+ "inode numbers, use this option to make NFS return "
+ "32-bit inode numbers instead. Disabled by default, so"
+ " NFS returns 64-bit inode numbers."
+ },
+ { .key = {"rpc.register-with-portmap"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "For systems that need to run multiple nfs servers, "
+ "only one registration is possible with "
+ "portmap service. Use this option to turn off portmap "
+ "registration for Gluster NFS. On by default"
+ },
+ { .key = {"rpc.outstanding-rpc-limit"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = RPCSVC_MIN_OUTSTANDING_RPC_LIMIT,
+ .max = RPCSVC_MAX_OUTSTANDING_RPC_LIMIT,
+ .default_value = TOSTRING(RPCSVC_DEFAULT_OUTSTANDING_RPC_LIMIT),
+ .description = "Parameter to throttle the number of incoming RPC "
+ "requests from a client. 0 means no limit (can "
+ "potentially run out of memory)"
+ },
+ { .key = {"nfs.port"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 0xffff,
+ .default_value = TOSTRING(GF_NFS3_PORT),
+ .description = "Use this option on systems that need Gluster NFS to "
+ "be associated with a non-default port number."
+ },
+ { .key = {"nfs.mem-factor"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 1024,
+ .default_value = TOSTRING(GF_NFS_DEFAULT_MEMFACTOR),
+ .description = "Use this option to make NFS be faster on systems by "
+ "using more memory. This option specifies a multiple "
+ "that determines the total amount of memory used. "
+ "Default value is 15. Increase to use more memory in "
+ "order to improve performance for certain use cases."
+ "Please consult gluster-users list before using this "
+ "option."
+ },
+ { .key = {"nfs.*.disable"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .description = "This option is used to start or stop NFS server"
+ "for individual volume."
+ },
+
+ { .key = {"nfs.nlm"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "This option, if set to 'off', disables NLM server "
+ "by not registering the service with the portmapper."
+ " Set it to 'on' to re-enable it. Default value: 'on'"
+ },
+
+ { .key = {"nfs.mount-udp"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "set the option to 'on' to enable mountd on UDP. "
+ "Required for some Solaris and AIX NFS clients. "
+ "The need for enabling this option often depends "
+ "on the usage of NLM."
+ },
+ { .key = {"nfs.mount-rmtab"},
+ .type = GF_OPTION_TYPE_PATH,
+ .default_value = DATADIR "/rmtab",
+ .description = "Set the location of the cache file that is used to "
+ "list all the NFS-clients that have connected "
+ "through the MOUNT protocol. If this is on shared "
+ "storage, all GlusterFS servers will update and "
+ "output (with 'showmount') the same list."
+ },
+ { .key = {OPT_SERVER_AUX_GIDS},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "Let the server look up which groups a user belongs "
+ "to, overwriting the list passed from the client. "
+ "This enables support for group lists longer than "
+ "can be passed through the NFS protocol, but is not "
+ "secure unless users and groups are well synchronized "
+ "between clients and servers."
+ },
+ { .key = {OPT_SERVER_GID_CACHE_TIMEOUT},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = 3600,
+ .default_value = "5",
+ .description = "Number of seconds to cache auxiliary-GID data, when "
+ OPT_SERVER_AUX_GIDS " is set."
+ },
+ { .key = {"nfs.acl"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "This option is used to control ACL support for NFS."
+ },
+ { .key = {"nfs.drc"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "on",
+ .description = "Enable Duplicate Request Cache in gNFS server to "
+ "improve correctness of non-idempotent operations like "
+ "write, delete, link, et al"
+ },
+ { .key = {"nfs.drc-size"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "0x20000",
+ .description = "Sets the number of non-idempotent "
+ "requests to cache in drc"
+ },
+ { .key = {NULL} },
+};
diff --git a/xlators/nfs/server/src/nfs.h b/xlators/nfs/server/src/nfs.h
new file mode 100644
index 000000000..00c7f8046
--- /dev/null
+++ b/xlators/nfs/server/src/nfs.h
@@ -0,0 +1,135 @@
+/*
+ Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __NFS_H__
+#define __NFS_H__
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "rpcsvc.h"
+#include "dict.h"
+#include "xlator.h"
+#include "lkowner.h"
+#include "gidcache.h"
+
+#define GF_NFS "nfs"
+
+#define GF_NFS_CONCURRENT_OPS_MULT 15
+
+#define GF_NFS_INODE_LRU_MULT 6000
+
+#define GF_RPC_MIN_THREADS 1
+#define GF_RPC_MAX_THREADS 16
+
+#define GF_NFS_DEFAULT_MEMFACTOR 15
+#define GF_NFS_MIN_MEMFACTOR 1
+#define GF_NFS_MAX_MEMFACTOR 30
+
+#define GF_NFS_DVM_ON 1
+#define GF_NFS_DVM_OFF 0
+
+/* This corresponds to the max 16 number of group IDs that are sent through an
+ * RPC request. Since NFS is the only one going to set this, we can be safe
+ * in keeping this size hardcoded.
+ */
+#define GF_REQUEST_MAXGROUPS 16
+
+/* Callback into a version-specific NFS protocol.
+ * The return type is used by the nfs.c code to register the protocol.
+ * with the RPC service.
+ */
+typedef rpcsvc_program_t *(*nfs_version_initer_t) (xlator_t *nfsx);
+
+/* List of version-specific protocol initiators */
+struct nfs_initer_list {
+ struct list_head list;
+ nfs_version_initer_t init;
+ rpcsvc_program_t *program;
+};
+
+struct nfs_state {
+ rpcsvc_t *rpcsvc;
+ struct list_head versions;
+ struct mount3_state *mstate;
+ struct nfs3_state *nfs3state;
+ struct nlm4_state *nlm4state;
+ struct mem_pool *foppool;
+ unsigned int memfactor;
+ xlator_list_t *subvols;
+
+ gf_lock_t svinitlock;
+ int allsubvols;
+ int upsubvols;
+ xlator_t **initedxl;
+ int subvols_started;
+ int dynamicvolumes;
+ int enable_ino32;
+ unsigned int override_portnum;
+ int allow_insecure;
+ int enable_nlm;
+ int enable_acl;
+ int mount_udp;
+ char *rmtab;
+ struct rpc_clnt *rpc_clnt;
+ gf_boolean_t server_aux_gids;
+ uint32_t server_aux_gids_max_age;
+ gid_cache_t gid_cache;
+ uint32_t generation;
+ gf_boolean_t register_portmap;
+};
+
+struct nfs_inode_ctx {
+ struct list_head shares;
+ uint32_t generation;
+};
+
+#define gf_nfs_dvm_on(nfsstt) (((struct nfs_state *)nfsstt)->dynamicvolumes == GF_NFS_DVM_ON)
+#define gf_nfs_dvm_off(nfsstt) (((struct nfs_state *)nfsstt)->dynamicvolumes == GF_NFS_DVM_OFF)
+#define __gf_nfs_enable_ino32(nfsstt) (((struct nfs_state *)nfsstt)->enable_ino32)
+#define gf_nfs_this_private ((struct nfs_state *)((xlator_t *)THIS)->private)
+#define gf_nfs_enable_ino32() (__gf_nfs_enable_ino32(gf_nfs_this_private))
+
+/* We have one gid more than the glusterfs maximum since we pass the primary
+ * gid as the first element of the array.
+ */
+#define NFS_NGROUPS (GF_REQUEST_MAXGROUPS + 1)
+
+/* Index of the primary gid */
+#define NFS_PRIMGID_IDX 0
+
+typedef struct nfs_user_info {
+ uid_t uid;
+ gid_t gids[NFS_NGROUPS];
+ int ngrps;
+ gf_lkowner_t lk_owner;
+} nfs_user_t;
+
+extern int
+nfs_user_root_create (nfs_user_t *newnfu);
+
+extern int
+nfs_user_create (nfs_user_t *newnfu, uid_t uid, gid_t gid, gid_t *auxgids,
+ int auxcount);
+
+extern void
+nfs_request_user_init (nfs_user_t *nfu, rpcsvc_request_t *req);
+
+extern void
+nfs_request_primary_user_init (nfs_user_t *nfu, rpcsvc_request_t *req,
+ uid_t uid, gid_t gid);
+extern int
+nfs_subvolume_started (struct nfs_state *nfs, xlator_t *xl);
+
+extern void
+nfs_fix_groups (xlator_t *this, call_stack_t *root);
+#endif
diff --git a/xlators/nfs/server/src/nfs3-fh.c b/xlators/nfs/server/src/nfs3-fh.c
new file mode 100644
index 000000000..e199c56dc
--- /dev/null
+++ b/xlators/nfs/server/src/nfs3-fh.c
@@ -0,0 +1,188 @@
+/*
+ Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "rpcsvc.h"
+#include "dict.h"
+#include "xlator.h"
+#include "xdr-nfs3.h"
+#include "msg-nfs3.h"
+#include "iobuf.h"
+#include "nfs3-fh.h"
+#include "nfs-common.h"
+#include "iatt.h"
+#include "common-utils.h"
+
+
+int
+nfs3_fh_validate (struct nfs3_fh *fh)
+{
+ if (!fh)
+ return 0;
+
+ if (fh->ident[0] != GF_NFSFH_IDENT0)
+ return 0;
+
+ if (fh->ident[1] != GF_NFSFH_IDENT1)
+ return 0;
+
+ if (fh->ident[2] != GF_NFSFH_IDENT2)
+ return 0;
+
+ if (fh->ident[3] != GF_NFSFH_IDENT3)
+ return 0;
+
+ return 1;
+}
+
+
+void
+nfs3_fh_init (struct nfs3_fh *fh, struct iatt *buf)
+{
+ if ((!fh) || (!buf))
+ return;
+
+ fh->ident[0] = GF_NFSFH_IDENT0;
+ fh->ident[1] = GF_NFSFH_IDENT1;
+ fh->ident[2] = GF_NFSFH_IDENT2;
+ fh->ident[3] = GF_NFSFH_IDENT3;
+
+ uuid_copy (fh->gfid, buf->ia_gfid);
+}
+
+
+struct nfs3_fh
+nfs3_fh_build_indexed_root_fh (xlator_list_t *cl, xlator_t *xl)
+{
+ struct nfs3_fh fh = {{0}, };
+ struct iatt buf = {0, };
+ uuid_t root = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
+
+ if ((!cl) || (!xl))
+ return fh;
+
+ uuid_copy (buf.ia_gfid, root);
+ nfs3_fh_init (&fh, &buf);
+ fh.exportid [15] = nfs_xlator_to_xlid (cl, xl);
+
+ return fh;
+}
+
+
+struct nfs3_fh
+nfs3_fh_build_uuid_root_fh (uuid_t volumeid)
+{
+ struct nfs3_fh fh = {{0}, };
+ struct iatt buf = {0, };
+ uuid_t root = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
+
+ uuid_copy (buf.ia_gfid, root);
+ nfs3_fh_init (&fh, &buf);
+ uuid_copy (fh.exportid, volumeid);
+
+ return fh;
+}
+
+
+int
+nfs3_fh_is_root_fh (struct nfs3_fh *fh)
+{
+ uuid_t rootgfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
+
+ if (!fh)
+ return 0;
+
+ if (uuid_compare (fh->gfid, rootgfid) == 0)
+ return 1;
+
+ return 0;
+}
+
+
+void
+nfs3_fh_to_str (struct nfs3_fh *fh, char *str, size_t len)
+{
+ char gfid[GF_UUID_BUF_SIZE];
+ char exportid[GF_UUID_BUF_SIZE];
+
+ if ((!fh) || (!str))
+ return;
+
+ snprintf (str, len, "FH: exportid %s, gfid %s",
+ uuid_utoa_r (fh->exportid, exportid),
+ uuid_utoa_r (fh->gfid, gfid));
+}
+
+void
+nfs3_log_fh (struct nfs3_fh *fh)
+{
+ char gfidstr[512];
+ char exportidstr[512];
+
+ if (!fh)
+ return;
+
+ gf_log ("nfs3-fh", GF_LOG_TRACE, "filehandle: exportid "
+ "0x%s, gfid 0x%s",
+ uuid_utoa_r (fh->exportid, exportidstr),
+ uuid_utoa_r (fh->gfid, gfidstr));
+}
+
+int
+nfs3_fh_build_parent_fh (struct nfs3_fh *child, struct iatt *newstat,
+ struct nfs3_fh *newfh)
+{
+ if ((!child) || (!newstat) || (!newfh))
+ return -1;
+
+ nfs3_fh_init (newfh, newstat);
+ uuid_copy (newfh->exportid, child->exportid);
+
+ return 0;
+}
+
+int
+nfs3_build_fh (inode_t *inode, uuid_t exportid, struct nfs3_fh *newfh)
+{
+ if (!newfh || !inode)
+ return -1;
+
+ newfh->ident[0] = GF_NFSFH_IDENT0;
+ newfh->ident[1] = GF_NFSFH_IDENT1;
+ newfh->ident[2] = GF_NFSFH_IDENT2;
+ newfh->ident[3] = GF_NFSFH_IDENT3;
+ uuid_copy (newfh->gfid, inode->gfid);
+ uuid_copy (newfh->exportid, exportid);
+ return 0;
+}
+
+int
+nfs3_fh_build_child_fh (struct nfs3_fh *parent, struct iatt *newstat,
+ struct nfs3_fh *newfh)
+{
+ if ((!parent) || (!newstat) || (!newfh))
+ return -1;
+
+ nfs3_fh_init (newfh, newstat);
+ uuid_copy (newfh->exportid, parent->exportid);
+
+ return 0;
+}
+
+
+uint32_t
+nfs3_fh_compute_size (struct nfs3_fh *fh)
+{
+ return GF_NFSFH_STATIC_SIZE;
+}
diff --git a/xlators/nfs/server/src/nfs3-fh.h b/xlators/nfs/server/src/nfs3-fh.h
new file mode 100644
index 000000000..1049cdc96
--- /dev/null
+++ b/xlators/nfs/server/src/nfs3-fh.h
@@ -0,0 +1,103 @@
+/*
+ Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _NFS_FH_H_
+#define _NFS_FH_H_
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include "xdr-nfs3.h"
+#include "iatt.h"
+#include <sys/types.h>
+#include "uuid.h"
+
+/* BIG FAT WARNING: The file handle code is tightly coupled to NFSv3 file
+ * handles for now. This will change if and when we need v4. */
+#define GF_NFSFH_IDENT0 ':'
+#define GF_NFSFH_IDENT1 'O'
+#define GF_NFSFH_IDENT2 'G'
+#define GF_NFSFH_IDENT3 'L'
+#define GF_NFSFH_IDENT_SIZE (sizeof(char) * 4)
+#define GF_NFSFH_STATIC_SIZE (GF_NFSFH_IDENT_SIZE + (2*sizeof (uuid_t)))
+
+#define nfs3_fh_exportid_to_index(exprtid) ((uint16_t)exprtid[15])
+/* ATTENTION: Change in size of the structure below should be reflected in the
+ * GF_NFSFH_STATIC_SIZE.
+ */
+struct nfs3_fh {
+
+ /* Used to ensure that a bunch of bytes are actually a GlusterFS NFS
+ * file handle. Should contain ":OGL"
+ */
+ char ident[4];
+
+ /* UUID that identifies an export. The value stored in exportid
+ * depends on the usage of gluster nfs. If the DVM is enabled using
+ * the nfs.dynamic-volumes option then exportid will contain the UUID
+ * of the volume so that gnfs is able to identify volumes uniquely
+ * through volume additions,deletions,migrations, etc.
+ *
+ * When not using dvm, exportid contains the index of the volume
+ * based on the position of the volume in the list of subvolumes
+ * for gnfs.
+ */
+ uuid_t exportid;
+
+ /* File/dir gfid. */
+ uuid_t gfid;
+ /* This structure must be exactly NFS3_FHSIZE (64) bytes long.
+ Having the structure shorter results in buffer overflows
+ during XDR decoding.
+ */
+ unsigned char padding[NFS3_FHSIZE - GF_NFSFH_STATIC_SIZE];
+} __attribute__((__packed__));
+
+#define GF_NFS3FH_STATIC_INITIALIZER {{0},}
+
+extern uint32_t
+nfs3_fh_compute_size (struct nfs3_fh *fh);
+
+extern uint16_t
+nfs3_fh_hash_entry (uuid_t gfid);
+
+extern int
+nfs3_fh_validate (struct nfs3_fh *fh);
+
+extern struct nfs3_fh
+nfs3_fh_build_indexed_root_fh (xlator_list_t *cl, xlator_t *xl);
+
+extern int
+nfs3_fh_is_root_fh (struct nfs3_fh *fh);
+
+extern int
+nfs3_fh_build_child_fh (struct nfs3_fh *parent, struct iatt *newstat,
+ struct nfs3_fh *newfh);
+
+extern void
+nfs3_log_fh (struct nfs3_fh *fh);
+
+extern void
+nfs3_fh_to_str (struct nfs3_fh *fh, char *str, size_t len);
+
+extern int
+nfs3_fh_build_parent_fh (struct nfs3_fh *child, struct iatt *newstat,
+ struct nfs3_fh *newfh);
+
+extern struct nfs3_fh
+nfs3_fh_build_uuid_root_fh (uuid_t volumeid);
+
+extern int
+nfs3_build_fh (inode_t *inode, uuid_t exportid, struct nfs3_fh *newfh);
+
+#endif
diff --git a/xlators/nfs/server/src/nfs3-helpers.c b/xlators/nfs/server/src/nfs3-helpers.c
new file mode 100644
index 000000000..9059fc341
--- /dev/null
+++ b/xlators/nfs/server/src/nfs3-helpers.c
@@ -0,0 +1,3881 @@
+/*
+ Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <inttypes.h>
+
+#include "xlator.h"
+#include "nfs3.h"
+#include "nfs3-fh.h"
+#include "msg-nfs3.h"
+#include "rbthash.h"
+#include "nfs-fops.h"
+#include "nfs-inodes.h"
+#include "nfs-generics.h"
+#include "nfs3-helpers.h"
+#include "nfs-mem-types.h"
+#include "iatt.h"
+#include "common-utils.h"
+#include <string.h>
+
+extern int
+nfs3_set_root_looked_up (struct nfs3_state *nfs3, struct nfs3_fh *rootfh);
+
+extern int
+nfs3_is_root_looked_up (struct nfs3_state *nfs3, struct nfs3_fh *rootfh);
+
+
+#define nfs3_call_resume(cst) \
+ do { \
+ if (((cst)) && (cst)->resume_fn) \
+ (cst)->resume_fn (cst); \
+ } while (0) \
+
+#define nfs3_call_resume_estale(csta) \
+ do { \
+ (csta)->resolve_ret = -1; \
+ (csta)->resolve_errno = ESTALE; \
+ nfs3_call_resume (csta); \
+ } while (0) \
+
+struct nfs3stat_strerror {
+ nfsstat3 stat;
+ char strerror[100];
+};
+
+
+struct nfs3stat_strerror nfs3stat_strerror_table[] = {
+ { NFS3_OK, "Call completed successfully." },
+ { NFS3ERR_PERM, "Not owner" },
+ { NFS3ERR_NOENT, "No such file or directory" },
+ { NFS3ERR_IO, "I/O error" },
+ { NFS3ERR_NXIO, "I/O error" },
+ { NFS3ERR_ACCES, "Permission denied" },
+ { NFS3ERR_EXIST, "File exists" },
+ { NFS3ERR_XDEV, "Attempt to do a cross-device hard link"},
+ { NFS3ERR_NODEV, "No such device" },
+ { NFS3ERR_NOTDIR, "Not a directory" },
+ { NFS3ERR_ISDIR, "Is a directory" },
+ { NFS3ERR_INVAL, "Invalid argument for operation" },
+ { NFS3ERR_FBIG, "File too large" },
+ { NFS3ERR_NOSPC, "No space left on device" },
+ { NFS3ERR_ROFS, "Read-only file system" },
+ { NFS3ERR_MLINK, "Too many hard links" },
+ { NFS3ERR_NAMETOOLONG, "Filename in operation was too long" },
+ { NFS3ERR_NOTEMPTY, "Directory not empty" },
+ { NFS3ERR_DQUOT, "Resource (quota) hard limit exceeded" },
+ { NFS3ERR_STALE, "Invalid file handle" },
+ { NFS3ERR_REMOTE, "Too many levels of remote in path" },
+ { NFS3ERR_BADHANDLE, "Illegal NFS file handle" },
+ { NFS3ERR_NOT_SYNC, "Update synchronization mismatch detected" },
+ { NFS3ERR_BAD_COOKIE, "READDIR or READDIRPLUS cookie is stale"},
+ { NFS3ERR_NOTSUPP, "Operation is not supported" },
+ { NFS3ERR_TOOSMALL, "Buffer or request is too small" },
+ { NFS3ERR_SERVERFAULT, "Error occurred on the server or IO Error" },
+ { NFS3ERR_BADTYPE, "Type not supported by the server" },
+ { NFS3ERR_JUKEBOX, "Cannot complete server initiated request" },
+ { -1, "IO Error" },
+
+};
+
+
+uint64_t
+nfs3_iatt_gfid_to_ino (struct iatt *buf)
+{
+ uint64_t ino = 0;
+
+ if (!buf)
+ return 0;
+
+ if (gf_nfs_enable_ino32()) {
+ ino = (uint32_t )nfs_hash_gfid (buf->ia_gfid);
+ goto hashout;
+ }
+
+ /* from posix its guaranteed to send unique ino */
+ ino = buf->ia_ino;
+
+hashout:
+ return ino;
+}
+
+
+void
+nfs3_map_deviceid_to_statdev (struct iatt *ia, uint64_t deviceid)
+{
+ if (!ia)
+ return;
+
+ ia->ia_dev = deviceid;
+}
+
+
+struct nfs3_fh
+nfs3_extract_nfs3_fh (nfs_fh3 fh)
+{
+ struct nfs3_fh gfh;
+
+ memcpy (&gfh, fh.data.data_val, fh.data.data_len);
+ return gfh;
+}
+
+
+struct nfs3_fh
+nfs3_extract_lookup_fh (lookup3args *args)
+{
+ return nfs3_extract_nfs3_fh(args->what.dir);
+}
+
+
+char *
+nfs3_extract_lookup_name (lookup3args *args)
+{
+ return args->what.name;
+}
+
+
+nfsstat3
+nfs3_errno_to_nfsstat3 (int errnum)
+{
+ nfsstat3 stat = NFS3_OK;
+
+ switch (errnum) {
+
+ case 0:
+ stat = NFS3_OK;
+ break;
+
+ case EPERM:
+ stat = NFS3ERR_PERM;
+ break;
+
+ case ENOENT:
+ stat = NFS3ERR_NOENT;
+ break;
+
+ case EACCES:
+ stat = NFS3ERR_ACCES;
+ break;
+
+ case EEXIST:
+ stat = NFS3ERR_EXIST;
+ break;
+
+ case EXDEV:
+ stat = NFS3ERR_XDEV;
+ break;
+
+ case ENODEV:
+ stat = NFS3ERR_NODEV;
+ break;
+
+ case EIO:
+ stat = NFS3ERR_IO;
+ break;
+
+ case ENXIO:
+ stat = NFS3ERR_NXIO;
+ break;
+
+ case ENOTDIR:
+ stat = NFS3ERR_NOTDIR;
+ break;
+
+ case EISDIR:
+ stat = NFS3ERR_ISDIR;
+ break;
+
+ case EINVAL:
+ stat = NFS3ERR_INVAL;
+ break;
+
+ case ENOSPC:
+ stat = NFS3ERR_NOSPC;
+ break;
+
+ case EROFS:
+ stat = NFS3ERR_ROFS;
+ break;
+
+ case EFBIG:
+ stat = NFS3ERR_FBIG;
+ break;
+
+ case EMLINK:
+ stat = NFS3ERR_MLINK;
+ break;
+
+ case ENAMETOOLONG:
+ stat = NFS3ERR_NAMETOOLONG;
+ break;
+
+ case ENOTEMPTY:
+ stat = NFS3ERR_NOTEMPTY;
+ break;
+
+ case EFAULT:
+ stat = NFS3ERR_SERVERFAULT;
+ break;
+
+ case ENOSYS:
+ stat = NFS3ERR_NOTSUPP;
+ break;
+
+ case EBADF:
+ stat = NFS3ERR_BADTYPE;
+ break;
+
+ case ESTALE:
+ stat = NFS3ERR_STALE;
+ break;
+
+ case ENOTCONN:
+ stat = NFS3ERR_IO;
+ break;
+
+ case EDQUOT:
+ stat = NFS3ERR_DQUOT;
+ break;
+
+ default:
+ stat = NFS3ERR_SERVERFAULT;
+ break;
+ }
+
+ return stat;
+}
+
+/*
+ * Special case: If op_ret is -1, it's very unusual op_errno being
+ * 0 which means something came wrong from upper layer(s). If it
+ * happens by any means, then set NFS3 status to NFS3ERR_SERVERFAULT.
+ */
+inline nfsstat3
+nfs3_cbk_errno_status (int32_t op_ret, int32_t op_errno)
+{
+ if ((op_ret == -1) && (op_errno == 0)) {
+ return NFS3ERR_SERVERFAULT;
+ }
+
+ return nfs3_errno_to_nfsstat3 (op_errno);
+}
+
+void
+nfs3_fill_lookup3res_error (lookup3res *res, nfsstat3 stat,
+ struct iatt *dirstat)
+{
+
+ memset (res, 0, sizeof (*res));
+ res->status = stat;
+ if (!dirstat) {
+ res->lookup3res_u.resfail.dir_attributes.attributes_follow = FALSE;
+ }
+
+}
+
+fattr3
+nfs3_stat_to_fattr3 (struct iatt *buf)
+{
+ fattr3 fa = {0, };
+
+ if (buf == NULL)
+ goto out;
+
+ if (IA_ISDIR (buf->ia_type))
+ fa.type = NF3DIR;
+ else if (IA_ISREG (buf->ia_type))
+ fa.type = NF3REG;
+ else if (IA_ISCHR (buf->ia_type))
+ fa.type = NF3CHR;
+ else if (IA_ISBLK (buf->ia_type))
+ fa.type = NF3BLK;
+ else if (IA_ISFIFO (buf->ia_type))
+ fa.type = NF3FIFO;
+ else if (IA_ISLNK (buf->ia_type))
+ fa.type = NF3LNK;
+ else if (IA_ISSOCK (buf->ia_type))
+ fa.type = NF3SOCK;
+
+ if (IA_PROT_RUSR (buf->ia_prot))
+ fa.mode |= NFS3MODE_ROWNER;
+ if (IA_PROT_WUSR (buf->ia_prot))
+ fa.mode |= NFS3MODE_WOWNER;
+ if (IA_PROT_XUSR (buf->ia_prot))
+ fa.mode |= NFS3MODE_XOWNER;
+
+ if (IA_PROT_RGRP (buf->ia_prot))
+ fa.mode |= NFS3MODE_RGROUP;
+ if (IA_PROT_WGRP (buf->ia_prot))
+ fa.mode |= NFS3MODE_WGROUP;
+ if (IA_PROT_XGRP (buf->ia_prot))
+ fa.mode |= NFS3MODE_XGROUP;
+
+ if (IA_PROT_ROTH (buf->ia_prot))
+ fa.mode |= NFS3MODE_ROTHER;
+ if (IA_PROT_WOTH (buf->ia_prot))
+ fa.mode |= NFS3MODE_WOTHER;
+ if (IA_PROT_XOTH (buf->ia_prot))
+ fa.mode |= NFS3MODE_XOTHER;
+
+ if (IA_PROT_SUID (buf->ia_prot))
+ fa.mode |= NFS3MODE_SETXUID;
+ if (IA_PROT_SGID (buf->ia_prot))
+ fa.mode |= NFS3MODE_SETXGID;
+ if (IA_PROT_STCKY (buf->ia_prot))
+ fa.mode |= NFS3MODE_SAVESWAPTXT;
+
+ fa.nlink = buf->ia_nlink;
+ fa.uid = buf->ia_uid;
+ fa.gid = buf->ia_gid;
+ fa.size = buf->ia_size;
+ fa.used = (buf->ia_blocks * 512);
+
+ if ((IA_ISCHR (buf->ia_type) || IA_ISBLK (buf->ia_type))) {
+ fa.rdev.specdata1 = ia_major (buf->ia_rdev);
+ fa.rdev.specdata2 = ia_minor (buf->ia_rdev);
+ } else {
+ fa.rdev.specdata1 = 0;
+ fa.rdev.specdata2 = 0;
+ }
+
+ fa.fsid = buf->ia_dev;
+ fa.fileid = nfs3_iatt_gfid_to_ino (buf);
+
+ fa.atime.seconds = buf->ia_atime;
+ fa.atime.nseconds = buf->ia_atime_nsec;
+
+ fa.ctime.seconds = buf->ia_ctime;
+ fa.ctime.nseconds = buf->ia_ctime_nsec;
+
+ fa.mtime.seconds = buf->ia_mtime;
+ fa.mtime.nseconds = buf->ia_mtime_nsec;
+
+out:
+ return fa;
+}
+
+
+post_op_attr
+nfs3_stat_to_post_op_attr (struct iatt *buf)
+{
+ post_op_attr attr = {0, };
+ if (!buf)
+ return attr;
+
+ /* Some performance translators return zero-filled stats when they
+ * do not have up-to-date attributes. Need to handle this by not
+ * returning these zeroed out attrs.
+ */
+ attr.attributes_follow = FALSE;
+ if (nfs_zero_filled_stat (buf))
+ goto out;
+
+ attr.post_op_attr_u.attributes = nfs3_stat_to_fattr3 (buf);
+ attr.attributes_follow = TRUE;
+
+out:
+ return attr;
+}
+
+
+pre_op_attr
+nfs3_stat_to_pre_op_attr (struct iatt *pre)
+{
+ pre_op_attr poa = {0, };
+
+ /* Some performance translators return zero-filled stats when they
+ * do not have up-to-date attributes. Need to handle this by not
+ * returning these zeroed out attrs.
+ */
+ poa.attributes_follow = FALSE;
+ if (nfs_zero_filled_stat (pre))
+ goto out;
+
+ poa.attributes_follow = TRUE;
+ poa.pre_op_attr_u.attributes.size = pre->ia_size;
+ poa.pre_op_attr_u.attributes.mtime.seconds = pre->ia_mtime;
+ poa.pre_op_attr_u.attributes.mtime.nseconds = pre->ia_mtime_nsec;
+ poa.pre_op_attr_u.attributes.ctime.seconds = pre->ia_ctime;
+ poa.pre_op_attr_u.attributes.ctime.nseconds = pre->ia_ctime_nsec;
+
+out:
+ return poa;
+}
+
+void
+nfs3_fill_lookup3res_success (lookup3res *res, nfsstat3 stat,
+ struct nfs3_fh *fh, struct iatt *buf,
+ struct iatt *postparent)
+{
+ post_op_attr obj, dir;
+ uint32_t fhlen = 0;
+
+ res->status = stat;
+ if (fh) {
+ res->lookup3res_u.resok.object.data.data_val = (void *)fh;
+ fhlen = nfs3_fh_compute_size (fh);
+ res->lookup3res_u.resok.object.data.data_len = fhlen;
+ }
+
+ obj.attributes_follow = FALSE;
+ dir.attributes_follow = FALSE;
+ obj = nfs3_stat_to_post_op_attr (buf);
+ dir = nfs3_stat_to_post_op_attr (postparent);
+
+ res->lookup3res_u.resok.obj_attributes = obj;
+ res->lookup3res_u.resok.dir_attributes = dir;
+}
+
+
+void
+nfs3_fill_lookup3res (lookup3res *res, nfsstat3 stat, struct nfs3_fh *newfh,
+ struct iatt *buf, struct iatt *postparent,
+ uint64_t deviceid)
+{
+
+ memset (res, 0, sizeof (*res));
+ nfs3_map_deviceid_to_statdev (buf, deviceid);
+ nfs3_map_deviceid_to_statdev (postparent, deviceid);
+ if (stat != NFS3_OK)
+ nfs3_fill_lookup3res_error (res, stat, postparent);
+ else
+ nfs3_fill_lookup3res_success (res, stat, newfh, buf,
+ postparent);
+}
+
+struct nfs3_fh
+nfs3_extract_getattr_fh (getattr3args *args)
+{
+ return nfs3_extract_nfs3_fh(args->object);
+}
+
+
+void
+nfs3_fill_getattr3res (getattr3res *res, nfsstat3 stat, struct iatt *buf,
+ uint64_t deviceid)
+{
+
+ memset (res, 0, sizeof (*res));
+ res->status = stat;
+ if (stat != NFS3_OK)
+ return;
+
+ nfs3_map_deviceid_to_statdev (buf, deviceid);
+ res->getattr3res_u.resok.obj_attributes = nfs3_stat_to_fattr3 (buf);
+
+}
+
+
+struct nfs3_fh
+nfs3_extract_fsinfo_fh (fsinfo3args *args)
+{
+ return nfs3_extract_nfs3_fh (args->fsroot);
+}
+
+
+void
+nfs3_fill_fsinfo3res (struct nfs3_state *nfs3, fsinfo3res *res,
+ nfsstat3 status, struct iatt *fsroot, uint64_t deviceid)
+{
+ fsinfo3resok resok = {{0}, };
+ nfstime3 tdelta = GF_NFS3_TIMEDELTA_SECS;
+
+ memset (res, 0, sizeof (*res));
+ res->status = status;
+ if (status != NFS3_OK)
+ return;
+
+ nfs3_map_deviceid_to_statdev (fsroot, deviceid);
+ resok.obj_attributes = nfs3_stat_to_post_op_attr (fsroot);
+ resok.rtmax = nfs3->readsize;
+ resok.rtpref = nfs3->readsize;
+ resok.rtmult = GF_NFS3_RTMULT;
+ resok.wtmax = nfs3->writesize;
+ resok.wtpref = nfs3->writesize;
+ resok.wtmult = GF_NFS3_WTMULT;
+ resok.dtpref = nfs3->readdirsize;
+ resok.maxfilesize = GF_NFS3_MAXFILESIZE;
+ resok.time_delta = tdelta;
+ resok.properties = GF_NFS3_FS_PROP;
+
+ res->fsinfo3res_u.resok = resok;
+
+}
+
+
+void
+nfs3_prep_lookup3args (lookup3args *args, struct nfs3_fh *fh, char *name)
+{
+ memset (args, 0, sizeof (*args));
+ args->what.dir.data.data_val = (void *)fh;
+ args->what.name = name;
+}
+
+
+void
+nfs3_prep_getattr3args (getattr3args *args, struct nfs3_fh *fh)
+{
+ memset (args, 0, sizeof (*args));
+ args->object.data.data_val = (void *)fh;
+}
+
+
+void
+nfs3_prep_fsinfo3args (fsinfo3args *args, struct nfs3_fh *root)
+{
+ memset (args, 0, sizeof (*args));
+ args->fsroot.data.data_val = (void *)root;
+}
+
+
+char *
+nfsstat3_strerror(int stat)
+{
+ int i;
+ for(i = 0; nfs3stat_strerror_table[i].stat != -1; i++) {
+ if (nfs3stat_strerror_table[i].stat == stat)
+ return nfs3stat_strerror_table[i].strerror;
+ }
+
+ return nfs3stat_strerror_table[i].strerror;
+}
+
+
+
+void
+nfs3_prep_access3args (access3args *args, struct nfs3_fh *fh)
+{
+ memset (args, 0, sizeof (*args));
+ args->object.data.data_val = (void *)fh;
+}
+
+#define POSIX_READ 4
+#define POSIX_WRITE 2
+#define POSIX_EXEC 1
+
+uint32_t
+nfs3_accessbits (int32_t accbits)
+{
+ uint32_t accresult = 0;
+
+ if (accbits & POSIX_READ)
+ accresult |= ACCESS3_READ;
+
+ if (accbits & POSIX_WRITE)
+ accresult |= (ACCESS3_MODIFY | ACCESS3_EXTEND | ACCESS3_DELETE);
+
+ /* lookup on directory allowed only in case of execute permission */
+ if (accbits & POSIX_EXEC)
+ accresult |= (ACCESS3_EXECUTE | ACCESS3_LOOKUP);
+
+ return accresult;
+}
+
+uint32_t
+nfs3_request_to_accessbits (int32_t accbits)
+{
+ uint32_t acc_request = 0;
+
+ if (accbits & ACCESS3_READ)
+ acc_request |= POSIX_READ;
+
+ if (accbits & (ACCESS3_MODIFY | ACCESS3_EXTEND | ACCESS3_DELETE))
+ acc_request |= POSIX_WRITE;
+
+ /* For lookup on directory check for execute permission */
+ if (accbits & (ACCESS3_EXECUTE | ACCESS3_LOOKUP))
+ acc_request |= POSIX_EXEC;
+
+ return acc_request;
+}
+void
+nfs3_fill_access3res (access3res *res, nfsstat3 status, int32_t accbits,
+ int32_t reqaccbits)
+{
+ uint32_t accres = 0;
+
+ memset (res, 0, sizeof (*res));
+ res->status = status;
+ if (status != NFS3_OK)
+ return;
+
+ accres = nfs3_accessbits (accbits);
+
+ /* do not answer what was not asked */
+ res->access3res_u.resok.access = accres & reqaccbits;
+}
+
+void
+nfs3_prep_readdir3args (readdir3args *ra, struct nfs3_fh *fh)
+{
+ memset (ra, 0, sizeof (*ra));
+ ra->dir.data.data_val = (void *)fh;
+}
+
+
+int
+nfs3_is_dot_entry (char *entry)
+{
+ int ret = 0;
+
+ if (!entry)
+ return 0;
+
+ if (strcmp (entry, ".") == 0)
+ ret = 1;
+
+ return ret;
+}
+
+
+int
+nfs3_is_parentdir_entry (char *entry)
+{
+ int ret = 0;
+
+ if (!entry)
+ return 0;
+
+ if (strcmp (entry, "..") == 0)
+ ret = 1;
+
+ return ret;
+}
+
+
+void
+nfs3_funge_root_dotdot_dirent (gf_dirent_t *ent, struct nfs3_fh *dfh)
+{
+ if ((!ent) || (!dfh))
+ return;
+
+ if (nfs3_fh_is_root_fh (dfh) &&
+ nfs3_is_parentdir_entry (ent->d_name)) {
+ ent->d_ino = 1;
+ ent->d_stat.ia_ino = 1;
+ }
+
+ if (nfs3_fh_is_root_fh (dfh) &&
+ nfs3_is_dot_entry (ent->d_name)) {
+ ent->d_ino = 1;
+ ent->d_stat.ia_ino = 1;
+ }
+
+}
+
+
+entry3 *
+nfs3_fill_entry3 (gf_dirent_t *entry, struct nfs3_fh *dfh)
+{
+ entry3 *ent = NULL;
+ if ((!entry) || (!dfh))
+ return NULL;
+
+ ent = GF_CALLOC (1, sizeof (*ent), gf_nfs_mt_entry3);
+ if (!ent)
+ return NULL;
+
+ gf_log (GF_NFS3, GF_LOG_TRACE, "Entry: %s", entry->d_name);
+
+ /* If the entry is . or .., we need to replace the physical ino and gen
+ * with 1 and 0 respectively if the directory is root. This funging is
+ * needed because there is no parent directory of the root. In that
+ * sense the behavior we provide is similar to the output of the
+ * command: "stat /.."
+ */
+ entry->d_ino = nfs3_iatt_gfid_to_ino (&entry->d_stat);
+ nfs3_funge_root_dotdot_dirent (entry, dfh);
+ ent->fileid = entry->d_ino;
+ ent->cookie = entry->d_off;
+ ent->name = GF_CALLOC ((strlen (entry->d_name) + 1), sizeof (char),
+ gf_nfs_mt_char);
+ if (!ent->name) {
+ GF_FREE (ent);
+ ent = NULL;
+ goto err;
+ }
+ strcpy (ent->name, entry->d_name);
+
+err:
+ return ent;
+}
+
+
+void
+nfs3_fill_post_op_fh3 (struct nfs3_fh *fh, post_op_fh3 *pfh)
+{
+ uint32_t fhlen = 0;
+
+ if ((!fh) || (!pfh))
+ return;
+
+ pfh->handle_follows = 1;
+ fhlen = nfs3_fh_compute_size (fh);
+ pfh->post_op_fh3_u.handle.data.data_val = (void *)fh;
+ pfh->post_op_fh3_u.handle.data.data_len = fhlen;
+}
+
+
+post_op_fh3
+nfs3_fh_to_post_op_fh3 (struct nfs3_fh *fh)
+{
+ post_op_fh3 pfh = {0, };
+ char *fhp = NULL;
+
+ if (!fh)
+ return pfh;
+
+ pfh.handle_follows = 1;
+
+ fhp = GF_CALLOC (1, sizeof (*fh), gf_nfs_mt_char);
+ if (!fhp)
+ return pfh;
+
+ memcpy (fhp, fh, sizeof (*fh));
+ nfs3_fill_post_op_fh3 ((struct nfs3_fh *)fhp, &pfh);
+ return pfh;
+}
+
+
+entryp3 *
+nfs3_fill_entryp3 (gf_dirent_t *entry, struct nfs3_fh *dirfh, uint64_t devid)
+{
+ entryp3 *ent = NULL;
+ struct nfs3_fh newfh = {{0}, };
+
+ if ((!entry) || (!dirfh))
+ return NULL;
+
+ /* If the entry is . or .., we need to replace the physical ino and gen
+ * with 1 and 0 respectively if the directory is root. This funging is
+ * needed because there is no parent directory of the root. In that
+ * sense the behavior we provide is similar to the output of the
+ * command: "stat /.."
+ */
+ entry->d_ino = nfs3_iatt_gfid_to_ino (&entry->d_stat);
+ nfs3_funge_root_dotdot_dirent (entry, dirfh);
+ gf_log (GF_NFS3, GF_LOG_TRACE, "Entry: %s, ino: %"PRIu64,
+ entry->d_name, entry->d_ino);
+ ent = GF_CALLOC (1, sizeof (*ent), gf_nfs_mt_entryp3);
+ if (!ent)
+ return NULL;
+
+ ent->fileid = entry->d_ino;
+ ent->cookie = entry->d_off;
+ ent->name = GF_CALLOC ((strlen (entry->d_name) + 1), sizeof (char),
+ gf_nfs_mt_char);
+ if (!ent->name) {
+ GF_FREE (ent);
+ ent = NULL;
+ goto err;
+ }
+ strcpy (ent->name, entry->d_name);
+
+ nfs3_fh_build_child_fh (dirfh, &entry->d_stat, &newfh);
+ nfs3_map_deviceid_to_statdev (&entry->d_stat, devid);
+ ent->name_attributes = nfs3_stat_to_post_op_attr (&entry->d_stat);
+ ent->name_handle = nfs3_fh_to_post_op_fh3 (&newfh);
+err:
+ return ent;
+}
+
+
+void
+nfs3_fill_readdir3res (readdir3res *res, nfsstat3 stat, struct nfs3_fh *dirfh,
+ uint64_t cverf, struct iatt *dirstat,
+ gf_dirent_t *entries, count3 count, int is_eof,
+ uint64_t deviceid)
+{
+ post_op_attr dirattr;
+ entry3 *ent = NULL;
+ entry3 *headentry = NULL;
+ entry3 *preventry = NULL;
+ count3 filled = 0;
+ gf_dirent_t *listhead = NULL;
+
+ memset (res, 0, sizeof (*res));
+ res->status = stat;
+ if (stat != NFS3_OK)
+ return;
+
+ nfs3_map_deviceid_to_statdev (dirstat, deviceid);
+ dirattr = nfs3_stat_to_post_op_attr (dirstat);
+ res->readdir3res_u.resok.dir_attributes = dirattr;
+ res->readdir3res_u.resok.reply.eof = (bool_t)is_eof;
+ memcpy (res->readdir3res_u.resok.cookieverf, &cverf, sizeof (cverf));
+
+ filled = NFS3_READDIR_RESOK_SIZE;
+ /* First entry is just the list head */
+ listhead = entries;
+ entries = entries->next;
+ while (((entries) && (entries != listhead)) && (filled < count)) {
+ /*
+ if ((strcmp (entries->d_name, ".") == 0) ||
+ (strcmp (entries->d_name, "..") == 0))
+ goto nextentry;
+ */
+ ent = nfs3_fill_entry3 (entries, dirfh);
+ if (!ent)
+ break;
+
+ if (!headentry)
+ headentry = ent;
+
+ if (preventry) {
+ preventry->nextentry = ent;
+ preventry = ent;
+ } else
+ preventry = ent;
+
+ filled += NFS3_ENTRY3_FIXED_SIZE + strlen (ent->name);
+//nextentry:
+ entries = entries->next;
+ }
+
+ res->readdir3res_u.resok.reply.entries = headentry;
+
+ return;
+}
+
+
+void
+nfs3_fill_readdirp3res (readdirp3res *res, nfsstat3 stat,
+ struct nfs3_fh *dirfh, uint64_t cverf,
+ struct iatt *dirstat, gf_dirent_t *entries,
+ count3 dircount, count3 maxcount, int is_eof,
+ uint64_t deviceid)
+{
+ post_op_attr dirattr;
+ entryp3 *ent = NULL;
+ entryp3 *headentry = NULL;
+ entryp3 *preventry = NULL;
+ count3 filled = 0;
+ gf_dirent_t *listhead = NULL;
+ int fhlen = 0;
+
+ memset (res, 0, sizeof (*res));
+ res->status = stat;
+ if (stat != NFS3_OK)
+ return;
+
+ nfs3_map_deviceid_to_statdev (dirstat, deviceid);
+ dirattr = nfs3_stat_to_post_op_attr (dirstat);
+ res->readdirp3res_u.resok.dir_attributes = dirattr;
+ res->readdirp3res_u.resok.reply.eof = (bool_t)is_eof;
+ memcpy (res->readdirp3res_u.resok.cookieverf, &cverf, sizeof (cverf));
+
+ filled = NFS3_READDIR_RESOK_SIZE;
+ /* First entry is just the list head */
+ listhead = entries;
+ entries = entries->next;
+ while (((entries) && (entries != listhead)) && (filled < maxcount)) {
+ /* Linux does not display . and .. entries unless we provide
+ * these entries here.
+ */
+/* if ((strcmp (entries->d_name, ".") == 0) ||
+ (strcmp (entries->d_name, "..") == 0))
+ goto nextentry;
+ */
+ ent = nfs3_fill_entryp3 (entries, dirfh, deviceid);
+ if (!ent)
+ break;
+
+ if (!headentry)
+ headentry = ent;
+
+ if (preventry) {
+ preventry->nextentry = ent;
+ preventry = ent;
+ } else
+ preventry = ent;
+
+ fhlen = ent->name_handle.post_op_fh3_u.handle.data.data_len;
+ filled += NFS3_ENTRYP3_FIXED_SIZE + fhlen + strlen (ent->name);
+//nextentry:
+ entries = entries->next;
+ }
+
+ res->readdirp3res_u.resok.reply.entries = headentry;
+
+ return;
+}
+
+
+void
+nfs3_prep_readdirp3args (readdirp3args *ra, struct nfs3_fh *fh)
+{
+ memset (ra, 0, sizeof (*ra));
+ ra->dir.data.data_val = (void *)fh;
+}
+
+void
+nfs3_free_readdirp3res (readdirp3res *res)
+{
+ entryp3 *ent = NULL;
+ entryp3 *next = NULL;
+
+ if (!res)
+ return;
+
+ ent = res->readdirp3res_u.resok.reply.entries;
+ while (ent) {
+
+ next = ent->nextentry;
+ GF_FREE (ent->name);
+ GF_FREE (ent->name_handle.post_op_fh3_u.handle.data.data_val);
+ GF_FREE (ent);
+ ent = next;
+ }
+
+ return;
+}
+
+
+void
+nfs3_free_readdir3res (readdir3res *res)
+{
+ entry3 *ent = NULL;
+ entry3 *next = NULL;
+
+ if (!res)
+ return;
+
+ ent = res->readdir3res_u.resok.reply.entries;
+ while (ent) {
+
+ next = ent->nextentry;
+ GF_FREE (ent->name);
+ GF_FREE (ent);
+ ent = next;
+ }
+
+ return;
+}
+
+void
+nfs3_prep_fsstat3args (fsstat3args *args, struct nfs3_fh *fh)
+{
+ memset (args, 0, sizeof (*args));
+ args->fsroot.data.data_val = (char *)fh;
+}
+
+
+void
+nfs3_fill_fsstat3res (fsstat3res *res, nfsstat3 stat, struct statvfs *fsbuf,
+ struct iatt *postbuf, uint64_t deviceid)
+{
+ post_op_attr poa;
+ fsstat3resok resok;
+
+ memset (res, 0, sizeof (*res));
+ res->status = stat;
+ if (stat != NFS3_OK)
+ return;
+
+ nfs3_map_deviceid_to_statdev (postbuf, deviceid);
+ poa = nfs3_stat_to_post_op_attr (postbuf);
+ resok.tbytes = (size3)(fsbuf->f_frsize * fsbuf->f_blocks);
+ resok.fbytes = (size3)(fsbuf->f_frsize * fsbuf->f_bfree);
+ resok.abytes = (size3)(fsbuf->f_frsize * fsbuf->f_bavail);
+ resok.tfiles = (size3)(fsbuf->f_files);
+ resok.ffiles = (size3)(fsbuf->f_ffree);
+ resok.afiles = (size3)(fsbuf->f_favail);
+ resok.invarsec = 0;
+
+ resok.obj_attributes = poa;
+ res->fsstat3res_u.resok = resok;
+}
+
+
+int32_t
+nfs3_sattr3_to_setattr_valid (sattr3 *sattr, struct iatt *buf, mode_t *omode)
+{
+ int32_t valid = 0;
+ ia_prot_t prot = {0, };
+ mode_t mode = 0;
+
+ if (!sattr)
+ return 0;
+
+ if (sattr->mode.set_it) {
+ valid |= GF_SET_ATTR_MODE;
+
+ if (sattr->mode.set_mode3_u.mode & NFS3MODE_ROWNER) {
+ mode |= S_IRUSR;
+ prot.owner.read = 1;
+ }
+ if (sattr->mode.set_mode3_u.mode & NFS3MODE_WOWNER) {
+ mode |= S_IWUSR;
+ prot.owner.write = 1;
+ }
+ if (sattr->mode.set_mode3_u.mode & NFS3MODE_XOWNER) {
+ mode |= S_IXUSR;
+ prot.owner.exec = 1;
+ }
+
+ if (sattr->mode.set_mode3_u.mode & NFS3MODE_RGROUP) {
+ mode |= S_IRGRP;
+ prot.group.read = 1;
+ }
+ if (sattr->mode.set_mode3_u.mode & NFS3MODE_WGROUP) {
+ mode |= S_IWGRP;
+ prot.group.write = 1;
+ }
+ if (sattr->mode.set_mode3_u.mode & NFS3MODE_XGROUP) {
+ mode |= S_IXGRP;
+ prot.group.exec = 1;
+ }
+
+ if (sattr->mode.set_mode3_u.mode & NFS3MODE_ROTHER) {
+ mode |= S_IROTH;
+ prot.other.read = 1;
+ }
+ if (sattr->mode.set_mode3_u.mode & NFS3MODE_WOTHER) {
+ mode |= S_IWOTH;
+ prot.other.write = 1;
+ }
+ if (sattr->mode.set_mode3_u.mode & NFS3MODE_XOTHER) {
+ mode |= S_IXOTH;
+ prot.other.exec = 1;
+ }
+
+ if (sattr->mode.set_mode3_u.mode & NFS3MODE_SETXUID) {
+ mode |= S_ISUID;
+ prot.suid = 1;
+ }
+ if (sattr->mode.set_mode3_u.mode & NFS3MODE_SETXGID) {
+ mode |= S_ISGID;
+ prot.sgid = 1;
+ }
+ if (sattr->mode.set_mode3_u.mode & NFS3MODE_SAVESWAPTXT) {
+ mode |= S_ISVTX;
+ prot.sticky = 1;
+ }
+
+ if (buf)
+ buf->ia_prot = prot;
+ /* Create fop still requires the old mode_t style argument. */
+ if (omode)
+ *omode = mode;
+ }
+
+ if (sattr->uid.set_it) {
+ valid |= GF_SET_ATTR_UID;
+ if (buf)
+ buf->ia_uid = sattr->uid.set_uid3_u.uid;
+ }
+
+ if (sattr->gid.set_it) {
+ valid |= GF_SET_ATTR_GID;
+ if (buf)
+ buf->ia_gid = sattr->gid.set_gid3_u.gid;
+ }
+
+ if (sattr->size.set_it) {
+ valid |= GF_SET_ATTR_SIZE;
+ if (buf)
+ buf->ia_size = sattr->size.set_size3_u.size;
+ }
+
+ if (sattr->atime.set_it == SET_TO_CLIENT_TIME) {
+ valid |= GF_SET_ATTR_ATIME;
+ if (buf)
+ buf->ia_atime = sattr->atime.set_atime_u.atime.seconds;
+ }
+
+ if (sattr->atime.set_it == SET_TO_SERVER_TIME) {
+ valid |= GF_SET_ATTR_ATIME;
+ if (buf)
+ buf->ia_atime = time (NULL);
+ }
+
+ if (sattr->mtime.set_it == SET_TO_CLIENT_TIME) {
+ valid |= GF_SET_ATTR_MTIME;
+ if (buf)
+ buf->ia_mtime = sattr->mtime.set_mtime_u.mtime.seconds;
+ }
+
+ if (sattr->mtime.set_it == SET_TO_SERVER_TIME) {
+ valid |= GF_SET_ATTR_MTIME;
+ if (buf)
+ buf->ia_mtime = time (NULL);
+ }
+
+ return valid;
+}
+
+
+wcc_data
+nfs3_stat_to_wcc_data (struct iatt *pre, struct iatt *post)
+{
+ wcc_data wd = {{0}, };
+
+ if (post)
+ wd.after = nfs3_stat_to_post_op_attr (post);
+ if (pre)
+ wd.before = nfs3_stat_to_pre_op_attr (pre);
+
+ return wd;
+}
+
+void
+nfs3_fill_create3res (create3res *res, nfsstat3 stat, struct nfs3_fh *newfh,
+ struct iatt *newbuf, struct iatt *preparent,
+ struct iatt *postparent, uint64_t deviceid)
+{
+ post_op_attr poa = {0, };
+ wcc_data dirwcc = {{0}, };
+
+ memset (res, 0, sizeof (*res));
+ res->status = stat;
+ if (stat != NFS3_OK)
+ return;
+
+ nfs3_fill_post_op_fh3 (newfh, &res->create3res_u.resok.obj);
+ nfs3_map_deviceid_to_statdev (newbuf, deviceid);
+ poa = nfs3_stat_to_post_op_attr (newbuf);
+ res->create3res_u.resok.obj_attributes = poa;
+ nfs3_map_deviceid_to_statdev (preparent, deviceid);
+ nfs3_map_deviceid_to_statdev (postparent, deviceid);
+ dirwcc = nfs3_stat_to_wcc_data (preparent, postparent);
+
+ res->create3res_u.resok.dir_wcc = dirwcc;
+}
+
+void
+nfs3_prep_create3args (create3args *args, struct nfs3_fh *fh, char *name)
+{
+
+ memset (args, 0, sizeof (*args));
+ args->where.dir.data.data_val = (void *)fh;
+ args->where.name = name;
+}
+
+void
+nfs3_prep_setattr3args (setattr3args *args, struct nfs3_fh *fh)
+{
+ memset (args, 0, sizeof (*args));
+ args->object.data.data_val = (void *)fh;
+}
+
+
+void
+nfs3_fill_setattr3res (setattr3res *res, nfsstat3 stat, struct iatt *preop,
+ struct iatt *postop, uint64_t deviceid)
+{
+ wcc_data wcc;
+ memset (res, 0, sizeof (*res));
+ res->status = stat;
+ if (stat != NFS3_OK)
+ return;
+
+ nfs3_map_deviceid_to_statdev (preop, deviceid);
+ nfs3_map_deviceid_to_statdev (postop, deviceid);
+ wcc = nfs3_stat_to_wcc_data (preop, postop);
+ res->setattr3res_u.resok.obj_wcc = wcc;
+}
+
+
+void
+nfs3_prep_mkdir3args (mkdir3args *args, struct nfs3_fh *dirfh, char *name)
+{
+
+ memset (args, 0, sizeof (*args));
+ args->where.dir.data.data_val = (void *)dirfh;
+ args->where.name = name;
+}
+
+
+void
+nfs3_fill_mkdir3res (mkdir3res *res, nfsstat3 stat, struct nfs3_fh *fh,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, uint64_t deviceid)
+{
+ wcc_data dirwcc;
+ post_op_attr poa;
+
+ memset (res, 0, sizeof (*res));
+ res->status = stat;
+ if (stat != NFS3_OK)
+ return;
+
+ nfs3_fill_post_op_fh3 (fh, &res->mkdir3res_u.resok.obj);
+ nfs3_map_deviceid_to_statdev (buf, deviceid);
+ poa = nfs3_stat_to_post_op_attr (buf);
+ nfs3_map_deviceid_to_statdev (preparent, deviceid);
+ nfs3_map_deviceid_to_statdev (postparent, deviceid);
+ dirwcc = nfs3_stat_to_wcc_data (preparent, postparent);
+ res->mkdir3res_u.resok.obj_attributes = poa;
+ res->mkdir3res_u.resok.dir_wcc = dirwcc;
+
+}
+
+
+void
+nfs3_prep_symlink3args (symlink3args *args, struct nfs3_fh *dirfh, char *name,
+ char *target)
+{
+ memset (args, 0, sizeof (*args));
+ args->where.dir.data.data_val = (void *)dirfh;
+ args->where.name = name;
+ args->symlink.symlink_data = target;
+}
+
+
+void
+nfs3_fill_symlink3res (symlink3res *res, nfsstat3 stat, struct nfs3_fh *fh,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, uint64_t deviceid)
+{
+ wcc_data dirwcc;
+ post_op_attr poa;
+
+ memset (res, 0, sizeof (*res));
+ res->status = stat;
+ if (stat != NFS3_OK)
+ return;
+
+ nfs3_fill_post_op_fh3 (fh, &res->symlink3res_u.resok.obj);
+ nfs3_map_deviceid_to_statdev (buf, deviceid);
+ poa = nfs3_stat_to_post_op_attr (buf);
+ nfs3_map_deviceid_to_statdev (postparent, deviceid);
+ nfs3_map_deviceid_to_statdev (preparent, deviceid);
+ dirwcc = nfs3_stat_to_wcc_data (preparent, postparent);
+ res->symlink3res_u.resok.obj_attributes = poa;
+ res->symlink3res_u.resok.dir_wcc = dirwcc;
+
+}
+
+
+void
+nfs3_prep_readlink3args (readlink3args *args, struct nfs3_fh *fh)
+{
+
+ memset (args, 0, sizeof (*args));
+ args->symlink.data.data_val = (void *)fh;
+}
+
+
+void
+nfs3_fill_readlink3res (readlink3res *res, nfsstat3 stat, char *path,
+ struct iatt *buf, uint64_t deviceid)
+{
+ post_op_attr poa;
+
+ memset (res, 0, sizeof (*res));
+ res->status = stat;
+
+ if (stat != NFS3_OK)
+ return;
+
+ nfs3_map_deviceid_to_statdev (buf, deviceid);
+ poa = nfs3_stat_to_post_op_attr (buf);
+ res->readlink3res_u.resok.data = (void *)path;
+ res->readlink3res_u.resok.symlink_attributes = poa;
+}
+
+
+void
+nfs3_prep_mknod3args (mknod3args *args, struct nfs3_fh *fh, char *name)
+{
+ memset (args, 0, sizeof (*args));
+ args->where.dir.data.data_val = (void *)fh;
+ args->where.name = name;
+
+}
+
+void
+nfs3_fill_mknod3res (mknod3res *res, nfsstat3 stat, struct nfs3_fh *fh,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, uint64_t deviceid)
+{
+ post_op_attr poa;
+ wcc_data wccdir;
+
+ memset (res, 0, sizeof (*res));
+ res->status = stat;
+ if (stat != NFS3_OK)
+ return;
+
+ nfs3_fill_post_op_fh3 (fh, &res->mknod3res_u.resok.obj);
+ nfs3_map_deviceid_to_statdev (buf, deviceid);
+ poa = nfs3_stat_to_post_op_attr (buf);
+ nfs3_map_deviceid_to_statdev (preparent, deviceid);
+ nfs3_map_deviceid_to_statdev (postparent, deviceid);
+ wccdir = nfs3_stat_to_wcc_data (preparent, postparent);
+ res->mknod3res_u.resok.obj_attributes = poa;
+ res->mknod3res_u.resok.dir_wcc = wccdir;
+
+}
+
+
+void
+nfs3_fill_remove3res (remove3res *res, nfsstat3 stat, struct iatt *preparent,
+ struct iatt *postparent, uint64_t deviceid)
+{
+ wcc_data dirwcc;
+
+ memset (res, 0, sizeof (*res));
+ res->status = stat;
+ if (stat != NFS3_OK)
+ return;
+
+ nfs3_map_deviceid_to_statdev (preparent, deviceid);
+ nfs3_map_deviceid_to_statdev (postparent, deviceid);
+ dirwcc = nfs3_stat_to_wcc_data (preparent, postparent);
+ res->remove3res_u.resok.dir_wcc = dirwcc;
+}
+
+
+void
+nfs3_prep_remove3args (remove3args *args, struct nfs3_fh *fh, char *name)
+{
+ memset (args, 0, sizeof (*args));
+ args->object.dir.data.data_val = (void *)fh;
+ args->object.name = name;
+}
+
+
+void
+nfs3_prep_rmdir3args (rmdir3args *args, struct nfs3_fh *fh, char *name)
+{
+ memset (args, 0, sizeof (*args));
+ args->object.dir.data.data_val = (void *)fh;
+ args->object.name = name;
+}
+
+
+void
+nfs3_fill_rmdir3res (rmdir3res *res, nfsstat3 stat, struct iatt *preparent,
+ struct iatt *postparent, uint64_t deviceid)
+{
+ wcc_data dirwcc;
+ memset (res, 0, sizeof (*res));
+ res->status = stat;
+
+ if (stat != NFS3_OK)
+ return;
+
+ nfs3_map_deviceid_to_statdev (postparent, deviceid);
+ nfs3_map_deviceid_to_statdev (preparent, deviceid);
+ dirwcc = nfs3_stat_to_wcc_data (preparent, postparent);
+ res->rmdir3res_u.resok.dir_wcc = dirwcc;
+}
+
+
+void
+nfs3_prep_link3args (link3args *args, struct nfs3_fh *target,
+ struct nfs3_fh * dirfh, char *name)
+{
+ memset (args, 0, sizeof (*args));
+ args->file.data.data_val = (void *)target;
+ args->link.dir.data.data_val = (void *)dirfh;
+ args->link.name = name;
+}
+
+
+void
+nfs3_fill_link3res (link3res *res, nfsstat3 stat, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ uint64_t deviceid)
+{
+ post_op_attr poa;
+ wcc_data dirwcc;
+
+ memset (res, 0, sizeof (*res));
+ res->status = stat;
+ if (stat != NFS3_OK)
+ return;
+
+ nfs3_map_deviceid_to_statdev (preparent, deviceid);
+ nfs3_map_deviceid_to_statdev (postparent, deviceid);
+ nfs3_map_deviceid_to_statdev (buf,deviceid);
+ poa = nfs3_stat_to_post_op_attr (buf);
+ dirwcc = nfs3_stat_to_wcc_data (preparent, postparent);
+ res->link3res_u.resok.file_attributes = poa;
+ res->link3res_u.resok.linkdir_wcc = dirwcc;
+}
+
+
+
+void
+nfs3_prep_rename3args (rename3args *args, struct nfs3_fh *olddirfh,
+ char *oldname, struct nfs3_fh *newdirfh, char *newname)
+{
+ memset (args, 0, sizeof (*args));
+
+ args->from.name = oldname;
+ args->from.dir.data.data_val = (void *)olddirfh;
+ args->to.name = newname;
+ args->to.dir.data.data_val = (void *)newdirfh;
+
+}
+
+
+void
+nfs3_fill_rename3res (rename3res *res, nfsstat3 stat, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ uint64_t deviceid)
+
+{
+ wcc_data dirwcc;
+
+ memset (res, 0, sizeof (*res));
+ res->status = stat;
+ if (stat != NFS3_OK)
+ return;
+
+ nfs3_map_deviceid_to_statdev (preoldparent, deviceid);
+ nfs3_map_deviceid_to_statdev (postoldparent, deviceid);
+ nfs3_map_deviceid_to_statdev (prenewparent, deviceid);
+ nfs3_map_deviceid_to_statdev (postnewparent, deviceid);
+ nfs3_map_deviceid_to_statdev (buf, deviceid);
+ dirwcc = nfs3_stat_to_wcc_data (preoldparent, postoldparent);
+ res->rename3res_u.resok.fromdir_wcc = dirwcc;
+ dirwcc = nfs3_stat_to_wcc_data (prenewparent, postnewparent);
+ res->rename3res_u.resok.todir_wcc = dirwcc;
+}
+
+
+void
+nfs3_prep_write3args (write3args *args, struct nfs3_fh *fh)
+{
+ memset (args, 0, sizeof (*args));
+ args->file.data.data_val = (void *)fh;
+}
+
+
+void
+nfs3_fill_write3res (write3res *res, nfsstat3 stat, count3 count,
+ stable_how stable, uint64_t wverf, struct iatt *prestat,
+ struct iatt *poststat, uint64_t deviceid)
+{
+ write3resok resok;
+ memset (res, 0, sizeof (*res));
+ res->status = stat;
+ if (stat != NFS3_OK)
+ return;
+
+ nfs3_map_deviceid_to_statdev (prestat, deviceid);
+ nfs3_map_deviceid_to_statdev (poststat, deviceid);
+ resok.file_wcc = nfs3_stat_to_wcc_data (prestat, poststat);
+ resok.count = count;
+ resok.committed = stable;
+ memcpy (resok.verf, &wverf, sizeof (wverf));
+
+ res->write3res_u.resok = resok;
+}
+
+
+void
+nfs3_prep_commit3args (commit3args *args, struct nfs3_fh *fh)
+{
+ memset (args, 0, sizeof (*args));
+ args->file.data.data_val = (void *)fh;
+}
+
+
+void
+nfs3_fill_commit3res (commit3res *res, nfsstat3 stat, uint64_t wverf,
+ struct iatt *prestat, struct iatt *poststat,
+ uint64_t deviceid)
+{
+ memset (res, 0, sizeof (*res));
+ res->status = stat;
+ if (stat != NFS3_OK)
+ return;
+
+ nfs3_map_deviceid_to_statdev (poststat, deviceid);
+ nfs3_map_deviceid_to_statdev (prestat, deviceid);
+ res->commit3res_u.resok.file_wcc = nfs3_stat_to_wcc_data (prestat,
+ poststat);
+ memcpy (res->commit3res_u.resok.verf, &wverf, sizeof (wverf));
+}
+
+void
+nfs3_fill_read3res (read3res *res, nfsstat3 stat, count3 count,
+ struct iatt *poststat, int is_eof, uint64_t deviceid)
+{
+ post_op_attr poa;
+
+ memset (res, 0, sizeof (*res));
+ res->status = stat;
+ if (stat != NFS3_OK)
+ return;
+
+ nfs3_map_deviceid_to_statdev (poststat, deviceid);
+ poa = nfs3_stat_to_post_op_attr (poststat);
+ res->read3res_u.resok.file_attributes = poa;
+ res->read3res_u.resok.count = count;
+ res->read3res_u.resok.eof = is_eof;
+ res->read3res_u.resok.data.data_len = count;
+}
+
+
+void
+nfs3_prep_read3args (read3args *args, struct nfs3_fh *fh)
+{
+ memset (args, 0, sizeof (*args));
+ args->file.data.data_val = (void *)fh;
+}
+
+
+void
+nfs3_fill_pathconf3res (pathconf3res *res, nfsstat3 stat, struct iatt *buf,
+ uint64_t deviceid)
+{
+ pathconf3resok resok;
+
+ memset (res, 0, sizeof (*res));
+ res->status = stat;
+ if (stat != NFS3_OK)
+ return;
+
+ nfs3_map_deviceid_to_statdev (buf, deviceid);
+ resok.obj_attributes = nfs3_stat_to_post_op_attr (buf);
+ resok.linkmax = 256;
+ resok.name_max = NFS_NAME_MAX;
+ resok.no_trunc = TRUE;
+ resok.chown_restricted = FALSE;
+ resok.case_insensitive = FALSE;
+ resok.case_preserving = TRUE;
+
+ res->pathconf3res_u.resok = resok;
+}
+
+
+void
+nfs3_prep_pathconf3args (pathconf3args *args, struct nfs3_fh *fh)
+{
+ memset (args, 0, sizeof (*args));
+ args->object.data.data_val = (void *)fh;
+}
+
+
+int
+nfs3_verify_dircookie (struct nfs3_state *nfs3, fd_t *dirfd, cookie3 cookie,
+ uint64_t cverf, nfsstat3 *stat)
+{
+ int ret = -1;
+
+ if ((!nfs3) || (!dirfd))
+ return -1;
+
+ /* Can assume that this is first read on the dir, so cookie check
+ * is successful by default.
+ */
+ if (cookie == 0)
+ return 0;
+
+ gf_log (GF_NFS3, GF_LOG_TRACE, "Verifying cookie: cverf: %"PRIu64
+ ", cookie: %"PRIu64, cverf, cookie);
+ /* The cookie bad, no way cverf will be zero with a non-zero cookie. */
+ if ((cverf == 0) && (cookie != 0)) {
+ gf_log (GF_NFS3, GF_LOG_TRACE, "Bad cookie requested");
+ if (stat)
+ *stat = NFS3ERR_BAD_COOKIE;
+ goto err;
+ }
+
+ /* Yes, its true, our cookie is simply the fd_t address.
+ * NOTE: We used have the check for cookieverf but VMWare client sends
+ * a readdirp requests even after we've told it that EOF has been
+ * reached on the directory. This causes a problem because we close a
+ * dir fd_t after reaching EOF. The next readdirp sent by VMWare
+ * contains the address of the closed fd_t as cookieverf. Since we
+ * closed that fd_t, this readdirp results in a new opendir which will
+ * give an fd_t that will fail this check below.
+ */
+/* if ((cverf != (uint64_t)dirfd)) {
+ gf_log (GF_NFS3, GF_LOG_TRACE, "Cookieverf does not match");
+ if (stat)
+ *stat = NFS3ERR_BAD_COOKIE;
+ goto err;
+ }
+*/
+ gf_log (GF_NFS3, GF_LOG_TRACE, "Cookie verified");
+ if (stat)
+ *stat = NFS3_OK;
+ ret = 0;
+err:
+ return ret;
+}
+
+
+void
+nfs3_stat_to_errstr (uint32_t xid, char *op, nfsstat3 stat, int pstat,
+ char *errstr, size_t len)
+{
+ if ((!op) || (!errstr))
+ return;
+
+ snprintf (errstr, len, "XID: %x, %s: NFS: %d(%s), POSIX: %d(%s)",
+ xid, op,stat, nfsstat3_strerror (stat), pstat,
+ strerror (pstat));
+}
+
+void
+nfs3_log_common_call (uint32_t xid, char *op, struct nfs3_fh *fh)
+{
+ char fhstr[1024];
+
+ if (THIS->ctx->log.loglevel < GF_LOG_DEBUG)
+ return;
+
+ nfs3_fh_to_str (fh, fhstr, sizeof (fhstr));
+ gf_log (GF_NFS3, GF_LOG_DEBUG, "XID: %x, %s: args: %s", xid, op,
+ fhstr);
+}
+
+
+void
+nfs3_log_fh_entry_call (uint32_t xid, char *op, struct nfs3_fh *fh,
+ char *name)
+{
+ char fhstr[1024];
+
+ if (THIS->ctx->log.loglevel < GF_LOG_DEBUG)
+ return;
+ nfs3_fh_to_str (fh, fhstr, sizeof (fhstr));
+ gf_log (GF_NFS3, GF_LOG_DEBUG, "XID: %x, %s: args: %s, name: %s", xid,
+ op, fhstr, name);
+}
+
+
+void
+nfs3_log_rename_call (uint32_t xid, struct nfs3_fh *src, char *sname,
+ struct nfs3_fh *dst, char *dname)
+{
+ char sfhstr[1024];
+ char dfhstr[1024];
+
+ if (THIS->ctx->log.loglevel < GF_LOG_DEBUG)
+ return;
+ nfs3_fh_to_str (src, sfhstr, sizeof (sfhstr));
+ nfs3_fh_to_str (dst, dfhstr, sizeof (dfhstr));
+ gf_log (GF_NFS3, GF_LOG_DEBUG, "XID: %x, RENAME: args: Src: %s, "
+ "name: %s, Dst: %s, name: %s", xid, sfhstr, sname, dfhstr,
+ dname);
+}
+
+
+
+void
+nfs3_log_create_call (uint32_t xid, struct nfs3_fh *fh, char *name,
+ createmode3 mode)
+{
+ char fhstr[1024];
+ char *modestr = NULL;
+ char exclmode[] = "EXCLUSIVE";
+ char unchkd[] = "UNCHECKED";
+ char guarded[] = "GUARDED";
+
+ if (THIS->ctx->log.loglevel < GF_LOG_DEBUG)
+ return;
+ nfs3_fh_to_str (fh, fhstr, sizeof (fhstr));
+ if (mode == EXCLUSIVE)
+ modestr = exclmode;
+ else if (mode == GUARDED)
+ modestr = guarded;
+ else
+ modestr = unchkd;
+
+ gf_log (GF_NFS3, GF_LOG_DEBUG, "XID: %x, CREATE: args: %s, name: %s,"
+ " mode: %s", xid, fhstr, name, modestr);
+}
+
+
+void
+nfs3_log_mknod_call (uint32_t xid, struct nfs3_fh *fh, char *name, int type)
+{
+ char fhstr[1024];
+ char *modestr = NULL;
+ char chr[] = "CHAR";
+ char blk[] = "BLK";
+ char sock[] = "SOCK";
+ char fifo[] = "FIFO";
+
+ if (THIS->ctx->log.loglevel < GF_LOG_DEBUG)
+ return;
+ nfs3_fh_to_str (fh, fhstr, sizeof (fhstr));
+ if (type == NF3CHR)
+ modestr = chr;
+ else if (type == NF3BLK)
+ modestr = blk;
+ else if (type == NF3SOCK)
+ modestr = sock;
+ else
+ modestr = fifo;
+
+ gf_log (GF_NFS3, GF_LOG_DEBUG, "XID: %x, MKNOD: args: %s, name: %s,"
+ " type: %s", xid, fhstr, name, modestr);
+}
+
+
+
+void
+nfs3_log_symlink_call (uint32_t xid, struct nfs3_fh *fh, char *name, char *tgt)
+{
+ char fhstr[1024];
+
+ if (THIS->ctx->log.loglevel < GF_LOG_DEBUG)
+ return;
+ nfs3_fh_to_str (fh, fhstr, sizeof (fhstr));
+ gf_log (GF_NFS3, GF_LOG_DEBUG, "XID: %x, SYMLINK: args: %s, name: %s,"
+ " target: %s", xid, fhstr, name, tgt);
+}
+
+
+void
+nfs3_log_link_call (uint32_t xid, struct nfs3_fh *fh, char *name,
+ struct nfs3_fh *tgt)
+{
+ char dfhstr[1024];
+ char tfhstr[1024];
+
+ if (THIS->ctx->log.loglevel < GF_LOG_DEBUG)
+ return;
+ nfs3_fh_to_str (fh, dfhstr, sizeof (dfhstr));
+ nfs3_fh_to_str (tgt, tfhstr, sizeof (tfhstr));
+ gf_log (GF_NFS3, GF_LOG_DEBUG, "XID: %x, LINK: args: %s, name: %s,"
+ " target: %s", xid, dfhstr, name, tfhstr);
+}
+
+
+void
+nfs3_log_rw_call (uint32_t xid, char *op, struct nfs3_fh *fh, offset3 offt,
+ count3 count, int stablewrite)
+{
+ char fhstr[1024];
+
+ if (THIS->ctx->log.loglevel < GF_LOG_DEBUG)
+ return;
+ nfs3_fh_to_str (fh, fhstr, sizeof (fhstr));
+ if (stablewrite == -1)
+ gf_log (GF_NFS3, GF_LOG_DEBUG, "XID: %x, %s: args: %s, offset:"
+ " %"PRIu64", count: %"PRIu32, xid, op, fhstr, offt,
+ count);
+ else
+ gf_log (GF_NFS3, GF_LOG_DEBUG, "XID: %x, %s: args: %s, offset:"
+ " %"PRIu64", count: %"PRIu32", %s", xid, op, fhstr,
+ offt, count,
+ (stablewrite == UNSTABLE)?"UNSTABLE":"STABLE");
+
+}
+
+
+int
+nfs3_getattr_loglevel (nfsstat3 stat) {
+
+ int ll = GF_LOG_DEBUG;
+
+ switch (stat) {
+
+ case NFS3ERR_PERM:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOENT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_ACCES:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_EXIST:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_XDEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NODEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_IO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NXIO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_ISDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_INVAL:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOSPC:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_ROFS:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_FBIG:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_MLINK:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NAMETOOLONG:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTEMPTY:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_SERVERFAULT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTSUPP:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_BADHANDLE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_STALE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_DQUOT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ default:
+ ll = GF_LOG_DEBUG;
+ break;
+ }
+
+ return ll;
+}
+
+
+int
+nfs3_setattr_loglevel (nfsstat3 stat) {
+
+ int ll = GF_LOG_DEBUG;
+
+ switch (stat) {
+
+ case NFS3ERR_NOENT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_EXIST:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_XDEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NODEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_IO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NXIO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_ISDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_INVAL:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOSPC:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_ROFS:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_FBIG:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_MLINK:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NAMETOOLONG:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTEMPTY:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_SERVERFAULT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTSUPP:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_BADHANDLE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_STALE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_DQUOT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ default:
+ ll = GF_LOG_DEBUG;
+ break;
+ }
+
+ return ll;
+}
+
+
+int
+nfs3_lookup_loglevel (nfsstat3 stat) {
+
+ int ll = GF_LOG_DEBUG;
+
+ switch (stat) {
+
+ case NFS3ERR_PERM:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_ACCES:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_EXIST:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_XDEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NODEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_IO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NXIO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_ISDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_INVAL:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOSPC:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_ROFS:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_FBIG:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_MLINK:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NAMETOOLONG:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTEMPTY:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_SERVERFAULT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTSUPP:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_BADHANDLE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_STALE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_DQUOT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ default:
+ ll = GF_LOG_DEBUG;
+ break;
+ }
+
+ return ll;
+}
+
+
+int
+nfs3_access_loglevel (nfsstat3 stat) {
+
+ int ll = GF_LOG_DEBUG;
+
+ switch (stat) {
+
+ case NFS3ERR_NOENT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_EXIST:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_XDEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NODEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_IO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NXIO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_ISDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_INVAL:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOSPC:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_ROFS:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_FBIG:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_MLINK:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NAMETOOLONG:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTEMPTY:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_SERVERFAULT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTSUPP:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_BADHANDLE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_STALE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_DQUOT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ default:
+ ll = GF_LOG_DEBUG;
+ break;
+ }
+
+ return ll;
+}
+
+
+int
+nfs3_readlink_loglevel (nfsstat3 stat) {
+
+ int ll = GF_LOG_DEBUG;
+
+ switch (stat) {
+
+ case NFS3ERR_EXIST:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_XDEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NODEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_IO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NXIO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_ISDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_INVAL:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOSPC:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_ROFS:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_FBIG:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_MLINK:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTEMPTY:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_SERVERFAULT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTSUPP:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_BADHANDLE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_STALE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_DQUOT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ default:
+ ll = GF_LOG_DEBUG;
+ break;
+ }
+
+ return ll;
+}
+
+int
+nfs3_read_loglevel (nfsstat3 stat) {
+
+ int ll = GF_LOG_DEBUG;
+
+ switch (stat) {
+
+ case NFS3ERR_NOENT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_EXIST:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_XDEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NODEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_IO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NXIO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_ISDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_INVAL:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOSPC:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_ROFS:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_FBIG:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_MLINK:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NAMETOOLONG:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTEMPTY:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_SERVERFAULT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTSUPP:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_BADHANDLE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_STALE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_DQUOT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ default:
+ ll = GF_LOG_DEBUG;
+ break;
+ }
+
+ return ll;
+}
+
+
+int
+nfs3_write_loglevel (nfsstat3 stat) {
+
+ int ll = GF_LOG_DEBUG;
+
+ switch (stat) {
+
+ case NFS3ERR_NOENT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_EXIST:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_XDEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NODEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_IO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NXIO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_ISDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_INVAL:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOSPC:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_ROFS:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_FBIG:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_MLINK:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NAMETOOLONG:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTEMPTY:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_SERVERFAULT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTSUPP:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_BADHANDLE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_STALE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_DQUOT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ default:
+ ll = GF_LOG_DEBUG;
+ break;
+ }
+
+ return ll;
+}
+
+
+int
+nfs3_create_loglevel (nfsstat3 stat) {
+
+ int ll = GF_LOG_DEBUG;
+
+ switch (stat) {
+
+ case NFS3ERR_NOENT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_EXIST:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_XDEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NODEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_IO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NXIO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_ISDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_INVAL:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_FBIG:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_MLINK:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTEMPTY:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_SERVERFAULT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTSUPP:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_BADHANDLE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_STALE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ default:
+ ll = GF_LOG_DEBUG;
+ break;
+ }
+
+ return ll;
+}
+
+
+int
+nfs3_mkdir_loglevel (nfsstat3 stat) {
+
+ int ll = GF_LOG_DEBUG;
+
+ switch (stat) {
+
+ case NFS3ERR_NOENT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_XDEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NODEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_IO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NXIO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_ISDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_INVAL:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_FBIG:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_MLINK:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTEMPTY:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_SERVERFAULT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTSUPP:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_BADHANDLE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_STALE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_DQUOT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ default:
+ ll = GF_LOG_DEBUG;
+ break;
+ }
+
+ return ll;
+}
+
+
+int
+nfs3_symlink_loglevel (nfsstat3 stat) {
+
+ int ll = GF_LOG_DEBUG;
+
+ switch (stat) {
+
+ case NFS3ERR_XDEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NODEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_IO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NXIO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_ISDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_INVAL:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_FBIG:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_MLINK:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTEMPTY:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_SERVERFAULT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTSUPP:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_BADHANDLE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_STALE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_DQUOT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ default:
+ ll = GF_LOG_DEBUG;
+ break;
+ }
+
+ return ll;
+}
+
+
+int
+nfs3_mknod_loglevel (nfsstat3 stat) {
+
+ int ll = GF_LOG_DEBUG;
+
+ switch (stat) {
+
+ case NFS3ERR_NOENT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_XDEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NODEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_IO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NXIO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_ISDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_INVAL:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_FBIG:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_MLINK:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTEMPTY:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_SERVERFAULT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTSUPP:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_BADHANDLE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_STALE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ default:
+ ll = GF_LOG_DEBUG;
+ break;
+ }
+
+ return ll;
+}
+
+int
+nfs3_remove_loglevel (nfsstat3 stat) {
+
+ int ll = GF_LOG_DEBUG;
+
+ switch (stat) {
+
+ case NFS3ERR_EXIST:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_XDEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NODEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_IO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NXIO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_INVAL:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOSPC:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_FBIG:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_MLINK:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_SERVERFAULT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTSUPP:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_BADHANDLE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_STALE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_DQUOT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ default:
+ ll = GF_LOG_DEBUG;
+ break;
+ }
+
+ return ll;
+}
+
+
+int
+nfs3_rmdir_loglevel (nfsstat3 stat) {
+
+ int ll = GF_LOG_DEBUG;
+
+ switch (stat) {
+
+ case NFS3ERR_EXIST:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_XDEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NODEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_IO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NXIO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_INVAL:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOSPC:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_FBIG:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_MLINK:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_SERVERFAULT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTSUPP:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_BADHANDLE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_STALE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_DQUOT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ default:
+ ll = GF_LOG_DEBUG;
+ break;
+ }
+
+ return ll;
+}
+
+
+int
+nfs3_rename_loglevel (nfsstat3 stat) {
+
+ int ll = GF_LOG_DEBUG;
+
+ switch (stat) {
+
+ case NFS3ERR_XDEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NODEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_IO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NXIO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_ISDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_INVAL:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOSPC:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_FBIG:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_MLINK:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTEMPTY:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_SERVERFAULT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTSUPP:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_BADHANDLE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_STALE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_DQUOT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ default:
+ ll = GF_LOG_DEBUG;
+ break;
+ }
+
+ return ll;
+}
+
+
+int
+nfs3_link_loglevel (nfsstat3 stat) {
+
+ int ll = GF_LOG_DEBUG;
+
+ switch (stat) {
+
+ case NFS3ERR_XDEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NODEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_IO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NXIO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_INVAL:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_FBIG:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_MLINK:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTEMPTY:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_SERVERFAULT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTSUPP:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_BADHANDLE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_STALE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_DQUOT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ default:
+ ll = GF_LOG_DEBUG;
+ break;
+ }
+
+ return ll;
+}
+
+
+int
+nfs3_readdir_loglevel (nfsstat3 stat) {
+
+ int ll = GF_LOG_DEBUG;
+
+ switch (stat) {
+
+ case NFS3ERR_NOENT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_EXIST:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_XDEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NODEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_IO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NXIO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_ISDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_INVAL:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOSPC:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_ROFS:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_FBIG:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_MLINK:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NAMETOOLONG:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTEMPTY:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_SERVERFAULT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTSUPP:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_BADHANDLE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_STALE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_DQUOT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ default:
+ ll = GF_LOG_DEBUG;
+ break;
+ }
+
+ return ll;
+}
+
+
+int
+nfs3_fsstat_loglevel (nfsstat3 stat) {
+
+ int ll = GF_LOG_DEBUG;
+
+ switch (stat) {
+
+ case NFS3ERR_PERM:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOENT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_ACCES:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_EXIST:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_XDEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NODEV:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_IO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NXIO:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_ISDIR:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_INVAL:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOSPC:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_ROFS:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_FBIG:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_MLINK:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NAMETOOLONG:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTEMPTY:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_SERVERFAULT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_NOTSUPP:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_BADHANDLE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_STALE:
+ ll = GF_LOG_WARNING;
+ break;
+
+ case NFS3ERR_DQUOT:
+ ll = GF_LOG_WARNING;
+ break;
+
+ default:
+ ll = GF_LOG_DEBUG;
+ break;
+ }
+
+ return ll;
+}
+
+struct nfs3op_str {
+ int op;
+ char str[100];
+};
+
+struct nfs3op_str nfs3op_strings[] = {
+ { NFS3_NULL, "NULL"},
+ { NFS3_GETATTR, "GETATTR"},
+ { NFS3_SETATTR, "SETATTR"},
+ { NFS3_LOOKUP, "LOOKUP"},
+ { NFS3_ACCESS, "ACCESS"},
+ { NFS3_READLINK, "READLINK"},
+ { NFS3_READ, "READ"},
+ { NFS3_WRITE, "WRITE"},
+ { NFS3_CREATE, "CREATE"},
+ { NFS3_MKDIR, "MKDIR"},
+ { NFS3_SYMLINK, "SYMLINK"},
+ { NFS3_MKNOD, "MKNOD"},
+ { NFS3_REMOVE, "REMOVE"},
+ { NFS3_RMDIR, "RMDIR"},
+ { NFS3_RENAME, "RENAME"},
+ { NFS3_LINK, "LINK"},
+ { NFS3_READDIR, "READDIR"},
+ { NFS3_READDIRP, "READDIRP"},
+ { NFS3_FSSTAT, "FSSTAT"},
+ { NFS3_FSINFO, "FSINFO"},
+ { NFS3_PATHCONF, "PATHCONF"},
+ { NFS3_COMMIT, "COMMIT"},
+};
+
+int
+nfs3_loglevel (int nfs_op, nfsstat3 stat) {
+
+ int ll = GF_LOG_DEBUG;
+
+ switch (nfs_op) {
+ case NFS3_GETATTR:
+ ll = nfs3_getattr_loglevel (stat);
+ break;
+
+ case NFS3_SETATTR:
+ ll = nfs3_setattr_loglevel (stat);
+ break;
+
+ case NFS3_LOOKUP:
+ ll = nfs3_lookup_loglevel (stat);
+ break;
+
+ case NFS3_ACCESS:
+ ll = nfs3_access_loglevel (stat);
+ break;
+
+ case NFS3_READLINK:
+ ll = nfs3_readlink_loglevel (stat);
+ break;
+
+ case NFS3_READ:
+ ll = nfs3_read_loglevel (stat);
+ break;
+
+ case NFS3_WRITE:
+ ll = nfs3_write_loglevel (stat);
+ break;
+
+ case NFS3_CREATE:
+ ll = nfs3_create_loglevel (stat);
+ break;
+
+ case NFS3_MKDIR:
+ ll = nfs3_mkdir_loglevel (stat);
+ break;
+
+ case NFS3_SYMLINK:
+ ll = nfs3_symlink_loglevel (stat);
+ break;
+
+ case NFS3_MKNOD:
+ ll = nfs3_mknod_loglevel (stat);
+ break;
+
+ case NFS3_REMOVE:
+ ll = nfs3_remove_loglevel (stat);
+ break;
+
+ case NFS3_RMDIR:
+ ll = nfs3_rmdir_loglevel (stat);
+ break;
+
+ case NFS3_RENAME:
+ ll = nfs3_rename_loglevel (stat);
+ break;
+
+ case NFS3_LINK:
+ ll = nfs3_link_loglevel (stat);
+ break;
+
+ case NFS3_READDIR:
+ ll = nfs3_readdir_loglevel (stat);
+ break;
+
+ case NFS3_READDIRP:
+ ll = nfs3_readdir_loglevel (stat);
+ break;
+
+ case NFS3_FSSTAT:
+ ll = nfs3_fsstat_loglevel (stat);
+ break;
+
+ case NFS3_FSINFO:
+ ll = nfs3_fsstat_loglevel (stat);
+ break;
+
+ case NFS3_PATHCONF:
+ ll = nfs3_fsstat_loglevel (stat);
+ break;
+
+ case NFS3_COMMIT:
+ ll = nfs3_write_loglevel (stat);
+ break;
+
+ default:
+ ll = GF_LOG_DEBUG;
+ break;
+ }
+
+ return ll;
+}
+
+void
+nfs3_log_common_res (uint32_t xid, int op, nfsstat3 stat, int pstat)
+{
+ char errstr[1024];
+ int ll = nfs3_loglevel (op, stat);
+
+ if (THIS->ctx->log.loglevel < ll)
+ return;
+ nfs3_stat_to_errstr (xid, nfs3op_strings[op].str, stat, pstat, errstr, sizeof (errstr));
+ gf_log (GF_NFS3, ll, "%s", errstr);
+}
+
+void
+nfs3_log_readlink_res (uint32_t xid, nfsstat3 stat, int pstat, char *linkpath)
+{
+ char errstr[1024];
+ int ll = nfs3_loglevel (NFS3_READLINK, stat);
+
+ if (THIS->ctx->log.loglevel < ll)
+ return;
+
+ nfs3_stat_to_errstr (xid, "READLINK", stat, pstat, errstr, sizeof (errstr));
+ gf_log (GF_NFS3, ll, "%s, target: %s",
+ errstr, linkpath);
+
+}
+
+void
+nfs3_log_read_res (uint32_t xid, nfsstat3 stat, int pstat, count3 count,
+ int is_eof, struct iovec *vec, int32_t veccount)
+{
+ char errstr[1024];
+ int ll = GF_LOG_DEBUG;
+
+ ll = nfs3_loglevel (NFS3_READ, stat);
+ if (THIS->ctx->log.loglevel < ll)
+ return;
+ nfs3_stat_to_errstr (xid, "READ", stat, pstat, errstr, sizeof (errstr));
+ if (vec)
+ gf_log (GF_NFS3, ll, "%s, count: %"PRIu32", is_eof:"
+ " %d, vector: count: %d, len: %zd", errstr, count,
+ is_eof, veccount, vec->iov_len);
+ else
+ gf_log (GF_NFS3, ll, "%s, count: %"PRIu32", is_eof:"
+ " %d", errstr, count, is_eof);
+}
+
+
+void
+nfs3_log_write_res (uint32_t xid, nfsstat3 stat, int pstat, count3 count,
+ int stable, uint64_t wverf)
+{
+ char errstr[1024];
+ int ll = nfs3_loglevel (NFS3_WRITE, stat);
+
+ if (THIS->ctx->log.loglevel < ll)
+ return;
+
+ nfs3_stat_to_errstr (xid, "WRITE", stat, pstat, errstr, sizeof (errstr));
+ gf_log (GF_NFS3, ll, "%s, count: %"PRIu32", %s,wverf: %"PRIu64
+ , errstr, count, (stable == UNSTABLE)?"UNSTABLE":"STABLE",
+ wverf);
+}
+
+
+void
+nfs3_log_newfh_res (uint32_t xid, int op, nfsstat3 stat, int pstat,
+ struct nfs3_fh *newfh)
+{
+ char errstr[1024];
+ char fhstr[1024];
+ int ll = nfs3_loglevel (op, stat);
+
+ if (THIS->ctx->log.loglevel < ll)
+ return;
+ nfs3_stat_to_errstr (xid, nfs3op_strings[op].str, stat, pstat, errstr, sizeof (errstr));
+ nfs3_fh_to_str (newfh, fhstr, sizeof (fhstr));
+
+ gf_log (GF_NFS3, nfs3_loglevel (op, stat), "%s, %s", errstr, fhstr);
+}
+
+
+void
+nfs3_log_readdir_res (uint32_t xid, nfsstat3 stat, int pstat, uint64_t cverf,
+ count3 count, int is_eof)
+{
+ char errstr[1024];
+ int ll = nfs3_loglevel (NFS3_READDIR, stat);
+
+ if (THIS->ctx->log.loglevel < ll)
+ return;
+ nfs3_stat_to_errstr (xid, "READDIR", stat, pstat, errstr, sizeof (errstr));
+ gf_log (GF_NFS3, ll, "%s, count: %"PRIu32", cverf: %"PRIu64
+ ", is_eof: %d", errstr, count, cverf, is_eof);
+}
+
+
+void
+nfs3_log_readdirp_res (uint32_t xid, nfsstat3 stat, int pstat, uint64_t cverf,
+ count3 dircount, count3 maxcount, int is_eof)
+{
+ char errstr[1024];
+ int ll = nfs3_loglevel (NFS3_READDIRP, stat);
+
+ if (THIS->ctx->log.loglevel < ll)
+ return;
+ nfs3_stat_to_errstr (xid, "READDIRPLUS", stat, pstat, errstr, sizeof (errstr));
+ gf_log (GF_NFS3, ll, "%s, dircount: %"PRIu32", maxcount: %"
+ PRIu32", cverf: %"PRIu64", is_eof: %d", errstr, dircount,
+ maxcount, cverf, is_eof);
+}
+
+
+void
+nfs3_log_commit_res (uint32_t xid, nfsstat3 stat, int pstat, uint64_t wverf)
+{
+ char errstr[1024];
+ int ll = nfs3_loglevel (NFS3_COMMIT, stat);
+
+ if (THIS->ctx->log.loglevel < ll)
+ return;
+ nfs3_stat_to_errstr (xid, "COMMIT", stat, pstat, errstr, sizeof (errstr));
+ gf_log (GF_NFS3, ll, "%s, wverf: %"PRIu64, errstr, wverf);
+}
+
+
+void
+nfs3_log_readdir_call (uint32_t xid, struct nfs3_fh *fh, count3 dircount,
+ count3 maxcount)
+{
+ char fhstr[1024];
+
+ if (THIS->ctx->log.loglevel < GF_LOG_DEBUG)
+ return;
+
+ nfs3_fh_to_str (fh, fhstr, sizeof (fhstr));
+
+ if (maxcount == 0)
+ gf_log (GF_NFS3, GF_LOG_DEBUG, "XID: %x, READDIR: args: %s,"
+ " count: %d", xid, fhstr, (uint32_t)dircount);
+ else
+ gf_log (GF_NFS3, GF_LOG_DEBUG, "XID: %x, READDIRPLUS: args: %s,"
+ " dircount: %d, maxcount: %d", xid, fhstr,
+ (uint32_t)dircount, (uint32_t)maxcount);
+}
+
+
+int
+nfs3_fh_resolve_inode_done (nfs3_call_state_t *cs, inode_t *inode)
+{
+ int ret = -EFAULT;
+
+ if ((!cs) || (!inode))
+ return ret;
+
+ gf_log (GF_NFS3, GF_LOG_TRACE, "FH inode resolved");
+ ret = nfs_inode_loc_fill (inode, &cs->resolvedloc, NFS_RESOLVE_EXIST);
+ if (ret < 0) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "inode loc fill failed");
+ goto err;
+ }
+
+ nfs3_call_resume (cs);
+
+err:
+ return ret;
+}
+
+
+int32_t
+nfs3_fh_resolve_entry_lookup_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ nfs3_call_state_t *cs = NULL;
+ inode_t *linked_inode = NULL;
+
+ cs = frame->local;
+ cs->resolve_ret = op_ret;
+ cs->resolve_errno = op_errno;
+
+ if (op_ret == -1) {
+ gf_log (GF_NFS3, (op_errno == ENOENT ? GF_LOG_TRACE : GF_LOG_ERROR),
+ "Lookup failed: %s: %s",
+ cs->resolvedloc.path, strerror (op_errno));
+ goto err;
+ } else
+ gf_log (GF_NFS3, GF_LOG_TRACE, "Entry looked up: %s",
+ cs->resolvedloc.path);
+
+ memcpy (&cs->stbuf, buf, sizeof (*buf));
+ memcpy (&cs->postparent, postparent, sizeof (*postparent));
+ linked_inode = inode_link (inode, cs->resolvedloc.parent,
+ cs->resolvedloc.name, buf);
+ if (linked_inode) {
+ inode_lookup (linked_inode);
+ inode_unref (cs->resolvedloc.inode);
+ cs->resolvedloc.inode = linked_inode;
+ }
+err:
+ nfs3_call_resume (cs);
+ return 0;
+}
+
+
+int32_t
+nfs3_fh_resolve_inode_lookup_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ nfs3_call_state_t *cs = NULL;
+ inode_t *linked_inode = NULL;
+
+ cs = frame->local;
+ cs->resolve_ret = op_ret;
+ cs->resolve_errno = op_errno;
+
+ if (op_ret == -1) {
+ gf_log (GF_NFS3, (op_errno == ENOENT ? GF_LOG_TRACE : GF_LOG_ERROR),
+ "Lookup failed: %s: %s",
+ cs->resolvedloc.path, strerror (op_errno));
+ nfs3_call_resume (cs);
+ goto err;
+ }
+
+ memcpy (&cs->stbuf, buf, sizeof(*buf));
+ memcpy (&cs->postparent, buf, sizeof(*postparent));
+ linked_inode = inode_link (inode, cs->resolvedloc.parent,
+ cs->resolvedloc.name, buf);
+ if (linked_inode) {
+ inode_lookup (linked_inode);
+ inode_unref (cs->resolvedloc.inode);
+ cs->resolvedloc.inode = linked_inode;
+ }
+
+ /* If it is an entry lookup and we landed in the callback for hard
+ * inode resolution, it means the parent inode was not available and
+ * had to be resolved first. Now that is done, lets head back into
+ * entry resolution.
+ */
+ if (cs->resolventry)
+ nfs3_fh_resolve_entry_hard (cs);
+ else
+ nfs3_call_resume (cs);
+err:
+ return 0;
+}
+
+
+
+/* Needs no extra argument since it knows that the fh to be resolved is in
+ * resolvefh and that it needs to start looking from the root.
+ */
+int
+nfs3_fh_resolve_inode_hard (nfs3_call_state_t *cs)
+{
+ int ret = -EFAULT;
+ nfs_user_t nfu = {0, };
+
+ if (!cs)
+ return ret;
+
+ gf_log (GF_NFS3, GF_LOG_TRACE, "FH hard resolution for: gfid 0x%s",
+ uuid_utoa (cs->resolvefh.gfid));
+ cs->hardresolved = 1;
+ nfs_loc_wipe (&cs->resolvedloc);
+ ret = nfs_gfid_loc_fill (cs->vol->itable, cs->resolvefh.gfid,
+ &cs->resolvedloc, NFS_RESOLVE_CREATE);
+ if (ret < 0) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to fill loc using gfid: "
+ "%s", strerror (-ret));
+ goto out;
+ }
+
+ nfs_user_root_create (&nfu);
+ ret = nfs_lookup (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
+ nfs3_fh_resolve_inode_lookup_cbk, cs);
+
+out:
+ return ret;
+}
+
+
+int
+nfs3_fh_resolve_entry_hard (nfs3_call_state_t *cs)
+{
+ int ret = -EFAULT;
+ nfs_user_t nfu = {0, };
+
+ if (!cs)
+ return ret;
+
+ nfs_loc_wipe (&cs->resolvedloc);
+ nfs_user_root_create (&nfu);
+ gf_log (GF_NFS3, GF_LOG_TRACE, "FH hard resolution: gfid: %s "
+ ", entry: %s", uuid_utoa (cs->resolvefh.gfid),
+ cs->resolventry);
+
+ ret = nfs_entry_loc_fill (cs->vol->itable, cs->resolvefh.gfid,
+ cs->resolventry, &cs->resolvedloc,
+ NFS_RESOLVE_CREATE);
+
+ if (ret == -2) {
+ gf_log (GF_NFS3, GF_LOG_TRACE, "Entry needs lookup: %s",
+ cs->resolvedloc.path);
+ /* If the NFS op is lookup, let the resume callback
+ * handle the sending of the lookup fop. Similarly,
+ * if the NFS op is create, let the create call
+ * go ahead in the resume callback so that an EEXIST gets
+ * handled at posix without an extra fop at this point.
+ */
+ if (nfs3_lookup_op (cs) ||
+ (nfs3_create_op (cs) && !nfs3_create_exclusive_op (cs))) {
+ cs->lookuptype = GF_NFS3_FRESH;
+ cs->resolve_ret = 0;
+ nfs3_call_resume (cs);
+ } else {
+ cs->hardresolved = 1;
+ nfs_lookup (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
+ nfs3_fh_resolve_entry_lookup_cbk, cs);
+ }
+ ret = 0;
+ } else if (ret == -1) {
+ gf_log (GF_NFS3, GF_LOG_TRACE, "Entry needs parent lookup: %s",
+ cs->resolvedloc.path);
+ ret = nfs3_fh_resolve_inode_hard (cs);
+ } else if (ret == 0) {
+ cs->resolve_ret = 0;
+ nfs3_call_resume (cs);
+ }
+
+ return ret;
+}
+
+int
+nfs3_fh_resolve_inode (nfs3_call_state_t *cs)
+{
+ inode_t *inode = NULL;
+ int ret = -EFAULT;
+
+ if (!cs)
+ return ret;
+
+ gf_log (GF_NFS3, GF_LOG_TRACE, "FH needs inode resolution");
+ uuid_copy (cs->resolvedloc.gfid, cs->resolvefh.gfid);
+ inode = inode_find (cs->vol->itable, cs->resolvefh.gfid);
+ if (!inode)
+ ret = nfs3_fh_resolve_inode_hard (cs);
+ else
+ ret = nfs3_fh_resolve_inode_done (cs, inode);
+
+ if (inode)
+ inode_unref (inode);
+
+ return ret;
+}
+
+int
+nfs3_fh_resolve_entry (nfs3_call_state_t *cs)
+{
+ int ret = -EFAULT;
+
+ if (!cs)
+ return ret;
+
+ return nfs3_fh_resolve_entry_hard (cs);
+}
+
+
+int
+nfs3_fh_resolve_resume (nfs3_call_state_t *cs)
+{
+ int ret = -EFAULT;
+
+ if (!cs)
+ return ret;
+
+ if (cs->resolve_ret < 0)
+ goto err_resume_call;
+
+ if (!cs->resolventry)
+ ret = nfs3_fh_resolve_inode (cs);
+ else
+ ret = nfs3_fh_resolve_entry (cs);
+
+err_resume_call:
+ if (ret < 0) {
+ cs->resolve_ret = -1;
+ cs->resolve_errno = EFAULT;
+ nfs3_call_resume (cs);
+ ret = 0;
+ }
+
+ return ret;
+}
+
+
+int32_t
+nfs3_fh_resolve_root_lookup_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ nfs3_call_state_t *cs = NULL;
+
+ cs = frame->local;
+ cs->resolve_ret = op_ret;
+ cs->resolve_errno = op_errno;
+
+ if (op_ret == -1) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Root lookup failed: %s",
+ strerror (op_errno));
+ goto err;
+ } else
+ gf_log (GF_NFS3, GF_LOG_TRACE, "Root looked up: %s",
+ cs->resolvedloc.path);
+
+ nfs3_set_root_looked_up (cs->nfs3state, &cs->resolvefh);
+err:
+ nfs3_fh_resolve_resume (cs);
+ return 0;
+}
+
+
+int
+nfs3_fh_resolve_root (nfs3_call_state_t *cs)
+{
+ int ret = -EFAULT;
+ nfs_user_t nfu = {0, };
+
+ if (!cs)
+ return ret;
+
+ if (nfs3_is_root_looked_up (cs->nfs3state, &cs->resolvefh)) {
+ ret = nfs3_fh_resolve_resume (cs);
+ goto out;
+ }
+
+ nfs_user_root_create (&nfu);
+ gf_log (GF_NFS3, GF_LOG_TRACE, "Root needs lookup");
+ ret = nfs_root_loc_fill (cs->vol->itable, &cs->resolvedloc);
+ if (ret < 0) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to lookup root from itable: %s",
+ strerror (-ret));
+ goto out;
+ }
+
+ ret = nfs_lookup (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
+ nfs3_fh_resolve_root_lookup_cbk, cs);
+
+out:
+ return ret;
+}
+
+
+int
+nfs3_fh_resolve_and_resume (nfs3_call_state_t *cs, struct nfs3_fh *fh,
+ char *entry, nfs3_resume_fn_t resum_fn)
+{
+ int ret = -EFAULT;
+
+ if ((!cs) || (!fh))
+ return ret;
+
+ cs->resume_fn = resum_fn;
+ cs->resolvefh = *fh;
+ cs->hashidx = 0;
+
+ /* Check if the resolution is:
+ * a. fh resolution
+ *
+ * or
+ *
+ * b. (fh, basename) resolution
+ */
+ if (entry) { /* b */
+ cs->resolventry = gf_strdup (entry);
+ if (!cs->resolventry)
+ goto err;
+ }
+
+ ret = nfs3_fh_resolve_root (cs);
+err:
+ return ret;
+}
diff --git a/xlators/nfs/server/src/nfs3-helpers.h b/xlators/nfs/server/src/nfs3-helpers.h
new file mode 100644
index 000000000..4de1d5623
--- /dev/null
+++ b/xlators/nfs/server/src/nfs3-helpers.h
@@ -0,0 +1,337 @@
+/*
+ Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _NFS3_HELPER_H_
+#define _NFS3_HELPER_H_
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+
+#include "xlator.h"
+#include "nfs3.h"
+#include "nfs3-fh.h"
+#include "msg-nfs3.h"
+#include "xdr-nfs3.h"
+
+#include <sys/statvfs.h>
+
+#define GF_NFS3_FD_CACHED 0xcaced
+
+extern struct nfs3_fh
+nfs3_extract_lookup_fh (lookup3args *args);
+
+extern char *
+nfs3_extract_lookup_name (lookup3args *args);
+
+extern nfsstat3
+nfs3_errno_to_nfsstat3 (int errnum);
+
+extern nfsstat3
+nfs3_cbk_errno_status (int32_t, int32_t);
+
+extern void
+nfs3_fill_lookup3res (lookup3res *res, nfsstat3 stat, struct nfs3_fh *newfh,
+ struct iatt *stbuf, struct iatt *postparent,
+ uint64_t deviceid);
+
+extern post_op_attr
+nfs3_stat_to_post_op_attr (struct iatt *buf);
+
+extern struct nfs3_fh
+nfs3_extract_getattr_fh (getattr3args *args);
+
+extern void
+nfs3_fill_getattr3res (getattr3res *res, nfsstat3 stat, struct iatt *buf,
+ uint64_t deviceid);
+
+extern struct nfs3_fh
+nfs3_extract_fsinfo_fh (fsinfo3args *args);
+
+extern void
+nfs3_fill_fsinfo3res (struct nfs3_state *nfs3, fsinfo3res *res,
+ nfsstat3 status, struct iatt *fsroot,uint64_t deviceid);
+
+/* Functions containing _prep_ are used specifically to work around
+ * the memory allocations that happen inside Sun RPC library.
+ * In that library, there are numerous places where every NFS request
+ * can result in really tiny malloc calls. I fear the memory fragmentation
+ * that will follow. After studying the points at and the way in which malloc
+ * is called in Sun RPC, I've come up with this work-around. It is based on
+ * the idea that if the user/caller of the xdr_to_XXXXargs functions can provide
+ * already allocated memory or provide references to memory areas on its stack
+ * just for the short-term purpose of decoding the message from XDR format, we
+ * can avoid the memory allocations in Sun RPC. This is based on the fact
+ * that Sun RPC first checks whether structure members which require memory
+ * are NULL or not and only then calls malloc. In this case, if the caller
+ * provided references are non-NULL, then the if-branches containing malloc
+ * in Sun RPC will be avoided.
+ * PS: You're not expected to understand this unless you've spent some time
+ * looking through the glibc/sunrpc sources.
+ */
+extern void
+nfs3_prep_lookup3args (lookup3args *args, struct nfs3_fh *fh, char *name);
+
+extern void
+nfs3_prep_getattr3args (getattr3args *args, struct nfs3_fh *fh);
+
+extern void
+nfs3_prep_fsinfo3args (fsinfo3args *args, struct nfs3_fh *root);
+
+extern char *
+nfsstat3_strerror(int stat);
+
+extern void
+nfs3_prep_access3args (access3args *args, struct nfs3_fh *fh);
+
+extern void
+nfs3_fill_access3res (access3res *res, nfsstat3 status, int32_t accbits,
+ int32_t reqaccbits);
+
+extern char *
+nfs3_fhcache_getpath (struct nfs3_state *nfs3, struct nfs3_fh *fh);
+
+extern int
+nfs3_fhcache_putpath (struct nfs3_state *nfs3, struct nfs3_fh *fh, char *path);
+
+extern void
+nfs3_prep_readdir3args (readdir3args *ra, struct nfs3_fh *fh);
+
+extern void
+nfs3_fill_readdir3res (readdir3res *res, nfsstat3 stat, struct nfs3_fh *dfh,
+ uint64_t cverf, struct iatt *dirstat,
+ gf_dirent_t *entries, count3 count, int is_eof,
+ uint64_t deviceid);
+
+extern void
+nfs3_prep_readdirp3args (readdirp3args *ra, struct nfs3_fh *fh);
+
+extern void
+nfs3_fill_readdirp3res (readdirp3res *res, nfsstat3 stat,
+ struct nfs3_fh *dirfh, uint64_t cverf,
+ struct iatt *dirstat, gf_dirent_t *entries,
+ count3 dircount, count3 maxcount, int is_eof,
+ uint64_t deviceid);
+
+extern void
+nfs3_free_readdirp3res (readdirp3res *res);
+
+extern void
+nfs3_free_readdir3res (readdir3res *res);
+
+extern void
+nfs3_prep_fsstat3args (fsstat3args *args, struct nfs3_fh *fh);
+
+extern void
+nfs3_fill_fsstat3res (fsstat3res *res, nfsstat3 stat, struct statvfs *fsbuf,
+ struct iatt *postbuf, uint64_t deviceid);
+
+extern int32_t
+nfs3_sattr3_to_setattr_valid (sattr3 *sattr, struct iatt *buf, mode_t *omode);
+extern void
+nfs3_fill_create3res (create3res *res, nfsstat3 stat, struct nfs3_fh *newfh,
+ struct iatt *newbuf, struct iatt *preparent,
+ struct iatt *postparent, uint64_t deviceid);
+
+extern void
+nfs3_prep_create3args (create3args *args, struct nfs3_fh *fh, char *name);
+
+extern void
+nfs3_prep_setattr3args (setattr3args *args, struct nfs3_fh *fh);
+
+extern void
+nfs3_fill_setattr3res (setattr3res *res, nfsstat3 stat, struct iatt *preop,
+ struct iatt *postop, uint64_t deviceid);
+
+extern void
+nfs3_prep_mkdir3args (mkdir3args *args, struct nfs3_fh *dirfh, char *name);
+
+extern void
+nfs3_fill_mkdir3res (mkdir3res *res, nfsstat3 stat, struct nfs3_fh *fh,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, uint64_t deviceid);
+
+extern void
+nfs3_prep_symlink3args (symlink3args *args, struct nfs3_fh *dirfh, char *name,
+ char *target);
+
+extern void
+nfs3_fill_symlink3res (symlink3res *res, nfsstat3 stat, struct nfs3_fh *fh,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, uint64_t deviceid);
+
+extern void
+nfs3_prep_readlink3args (readlink3args *args, struct nfs3_fh *fh);
+
+extern void
+nfs3_fill_readlink3res (readlink3res *res, nfsstat3 stat, char *path,
+ struct iatt *buf, uint64_t deviceid);
+
+extern void
+nfs3_prep_mknod3args (mknod3args *args, struct nfs3_fh *fh, char *name);
+
+extern void
+nfs3_fill_mknod3res (mknod3res *res, nfsstat3 stat, struct nfs3_fh *fh,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, uint64_t deviceid);
+
+extern void
+nfs3_fill_remove3res (remove3res *res, nfsstat3 stat, struct iatt *preparent,
+ struct iatt *postparent, uint64_t deviceid);
+extern void
+nfs3_prep_remove3args (remove3args *args, struct nfs3_fh *fh, char *name);
+
+extern void
+nfs3_fill_rmdir3res (rmdir3res *res, nfsstat3 stat, struct iatt *preparent,
+ struct iatt *postparent, uint64_t deviceid);
+
+extern void
+nfs3_prep_rmdir3args (rmdir3args *args, struct nfs3_fh *fh, char *name);
+
+extern void
+nfs3_fill_link3res (link3res *res, nfsstat3 stat, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ uint64_t deviceid);
+
+extern void
+nfs3_prep_link3args (link3args *args, struct nfs3_fh *target,
+ struct nfs3_fh * dirfh, char *name);
+
+extern void
+nfs3_prep_rename3args (rename3args *args, struct nfs3_fh *olddirfh,
+ char *oldname, struct nfs3_fh *newdirfh,
+ char *newname);
+
+extern void
+nfs3_fill_rename3res (rename3res *res, nfsstat3 stat, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ uint64_t deviceid);
+
+extern void
+nfs3_prep_write3args (write3args *args, struct nfs3_fh *fh);
+
+extern void
+nfs3_fill_write3res (write3res *res, nfsstat3 stat, count3 count,
+ stable_how stable, uint64_t wverf, struct iatt *prestat,
+ struct iatt *poststat, uint64_t deviceid);
+
+extern void
+nfs3_prep_commit3args (commit3args *args, struct nfs3_fh *fh);
+
+extern void
+nfs3_fill_commit3res (commit3res *res, nfsstat3 stat, uint64_t wverf,
+ struct iatt *prestat, struct iatt *poststat,
+ uint64_t deviceid);
+
+extern void
+nfs3_fill_read3res (read3res *res, nfsstat3 stat, count3 count,
+ struct iatt *poststat, int is_eof, uint64_t deviceid);
+
+extern void
+nfs3_prep_read3args (read3args *args, struct nfs3_fh *fh);
+
+extern void
+nfs3_prep_pathconf3args (pathconf3args *args, struct nfs3_fh *fh);
+
+extern void
+nfs3_fill_pathconf3res (pathconf3res *res, nfsstat3 stat, struct iatt *buf,
+ uint64_t deviceid);
+
+extern int
+nfs3_cached_inode_opened (xlator_t *nfsxl, inode_t *inode);
+
+extern void
+nfs3_log_common_res (uint32_t xid, int op, nfsstat3 stat, int pstat);
+
+extern void
+nfs3_log_readlink_res (uint32_t xid, nfsstat3 stat, int pstat, char *linkpath);
+
+extern void
+nfs3_log_read_res (uint32_t xid, nfsstat3 stat, int pstat, count3 count,
+ int is_eof, struct iovec *vec, int32_t vcount);
+
+extern void
+nfs3_log_write_res (uint32_t xid, nfsstat3 stat, int pstat, count3 count,
+ int stable, uint64_t wverf);
+
+extern void
+nfs3_log_newfh_res (uint32_t xid, int op, nfsstat3 stat, int pstat,
+ struct nfs3_fh *newfh);
+
+extern void
+nfs3_log_readdir_res (uint32_t xid, nfsstat3 stat, int pstat, uint64_t cverf,
+ count3 count, int is_eof);
+
+extern void
+nfs3_log_readdirp_res (uint32_t xid, nfsstat3 stat, int pstat, uint64_t cverf,
+ count3 dircount, count3 maxcount, int is_eof);
+
+extern void
+nfs3_log_commit_res (uint32_t xid, nfsstat3 stat, int pstat, uint64_t wverf);
+
+extern void
+nfs3_log_common_call (uint32_t xid, char *op, struct nfs3_fh *fh);
+
+extern void
+nfs3_log_fh_entry_call (uint32_t xid, char *op, struct nfs3_fh *fh,
+ char *name);
+
+extern void
+nfs3_log_rw_call (uint32_t xid, char *op, struct nfs3_fh *fh, offset3 offt,
+ count3 count, int stablewrite);
+
+extern void
+nfs3_log_create_call (uint32_t xid, struct nfs3_fh *fh, char *name,
+ createmode3 mode);
+
+extern void
+nfs3_log_symlink_call (uint32_t xid, struct nfs3_fh *fh, char *name, char *tgt);
+
+extern void
+nfs3_log_mknod_call (uint32_t xid, struct nfs3_fh *fh, char *name, int type);
+
+extern void
+nfs3_log_rename_call (uint32_t xid, struct nfs3_fh *src, char *sname,
+ struct nfs3_fh *dst, char *dname);
+
+extern void
+nfs3_log_link_call (uint32_t xid, struct nfs3_fh *fh, char *name,
+ struct nfs3_fh *tgt);
+
+extern void
+nfs3_log_readdir_call (uint32_t xid, struct nfs3_fh *fh, count3 dircount,
+ count3 maxcount);
+
+extern int
+nfs3_fh_resolve_entry_hard (nfs3_call_state_t *cs);
+
+extern int
+nfs3_fh_resolve_inode (nfs3_call_state_t *cs);
+
+extern int
+nfs3_fh_resolve_entry (nfs3_call_state_t *cs);
+
+extern int
+nfs3_fh_resolve_and_resume (nfs3_call_state_t *cs, struct nfs3_fh *fh,
+ char *entry, nfs3_resume_fn_t resum_fn);
+
+extern int
+nfs3_verify_dircookie (struct nfs3_state *nfs3, fd_t *dirfd, cookie3 cookie,
+ uint64_t cverf, nfsstat3 *stat);
+
+extern int
+nfs3_is_parentdir_entry (char *entry);
+
+uint32_t
+nfs3_request_to_accessbits (int32_t accbits);
+
+#endif
diff --git a/xlators/nfs/server/src/nfs3.c b/xlators/nfs/server/src/nfs3.c
new file mode 100644
index 000000000..f914c3193
--- /dev/null
+++ b/xlators/nfs/server/src/nfs3.c
@@ -0,0 +1,5630 @@
+/*
+ Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "rpcsvc.h"
+#include "dict.h"
+#include "xlator.h"
+#include "mount3.h"
+#include "xdr-nfs3.h"
+#include "msg-nfs3.h"
+#include "iobuf.h"
+#include "nfs3.h"
+#include "mem-pool.h"
+#include "logging.h"
+#include "nfs-common.h"
+#include "nfs-fops.h"
+#include "nfs-inodes.h"
+#include "nfs-generics.h"
+#include "nfs3-helpers.h"
+#include "nfs-mem-types.h"
+#include "nfs.h"
+#include "xdr-rpc.h"
+#include "xdr-generic.h"
+
+#include <sys/socket.h>
+#include <sys/uio.h>
+#include <sys/statvfs.h>
+#include <time.h>
+
+#define nfs3_validate_strlen_or_goto(str, len, label, status, retval) \
+ do { \
+ if ((str)) { \
+ if (strlen ((str)) > (len)) { \
+ gf_log (GF_NFS3, GF_LOG_ERROR, "strlen "\
+ "too long"); \
+ status = NFS3ERR_NAMETOOLONG; \
+ retval = -ENAMETOOLONG; \
+ goto label; \
+ } \
+ } \
+ } while (0); \
+
+#define nfs3_validate_nfs3_state(request, state, status, label, retval) \
+ do { \
+ state = rpcsvc_request_program_private (request); \
+ if (!state) { \
+ gf_log (GF_NFS3, GF_LOG_ERROR, "NFSv3 state " \
+ "missing from RPC request"); \
+ status = NFS3ERR_SERVERFAULT; \
+ ret = -EFAULT; \
+ goto label; \
+ } \
+ } while (0); \
+
+
+struct nfs3_export *
+__nfs3_get_export_by_index (struct nfs3_state *nfs3, uuid_t exportid)
+{
+ struct nfs3_export *exp = NULL;
+ int index = 0;
+ int searchindex = 0;
+
+ searchindex = nfs3_fh_exportid_to_index (exportid);
+ list_for_each_entry (exp, &nfs3->exports, explist) {
+ if (searchindex == index)
+ goto found;
+
+ ++index;
+ }
+
+ exp = NULL;
+ gf_log (GF_NFS, GF_LOG_ERROR, "searchindex=%d not found", searchindex);
+found:
+ return exp;
+}
+
+
+struct nfs3_export *
+__nfs3_get_export_by_volumeid (struct nfs3_state *nfs3, uuid_t exportid)
+{
+ struct nfs3_export *exp = NULL;
+
+ list_for_each_entry (exp, &nfs3->exports, explist) {
+ if (!uuid_compare (exportid, exp->volumeid))
+ goto found;
+ }
+
+ exp = NULL;
+found:
+ return exp;
+}
+
+
+struct nfs3_export *
+__nfs3_get_export_by_exportid (struct nfs3_state *nfs3, uuid_t exportid)
+{
+ struct nfs3_export *exp = NULL;
+
+ if (!nfs3)
+ return exp;
+
+ if (gf_nfs_dvm_off (nfs_state(nfs3->nfsx)))
+ exp = __nfs3_get_export_by_index (nfs3, exportid);
+ else
+ exp = __nfs3_get_export_by_volumeid (nfs3, exportid);
+
+ return exp;
+}
+
+
+int
+nfs3_export_access (struct nfs3_state *nfs3, uuid_t exportid)
+{
+ int ret = GF_NFS3_VOLACCESS_RO;
+ struct nfs3_export *exp = NULL;
+
+ GF_VALIDATE_OR_GOTO (GF_NFS3, nfs3, err);
+
+ exp = __nfs3_get_export_by_exportid (nfs3, exportid);
+
+ if (!exp) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to get export by ID");
+ goto err;
+ }
+
+ ret = exp->access;
+
+err:
+ return ret;
+}
+
+#define nfs3_check_rw_volaccess(nfs3state, exid, status, label) \
+ do { \
+ if (nfs3_export_access (nfs3state,exid)!=GF_NFS3_VOLACCESS_RW){\
+ gf_log (GF_NFS3, GF_LOG_ERROR, "No read-write access");\
+ status = NFS3ERR_ROFS; \
+ goto label; \
+ } \
+ } while (0) \
+
+
+
+xlator_t *
+nfs3_fh_to_xlator (struct nfs3_state *nfs3, struct nfs3_fh *fh)
+{
+ xlator_t *vol = NULL;
+ struct nfs3_export *exp = NULL;
+
+ GF_VALIDATE_OR_GOTO (GF_NFS3, nfs3, out);
+ GF_VALIDATE_OR_GOTO (GF_NFS3, fh, out);
+
+ exp = __nfs3_get_export_by_exportid (nfs3, fh->exportid);
+ if (!exp)
+ goto out;
+
+ vol = exp->subvol;
+out:
+ return vol;
+}
+
+
+int
+nfs3_is_root_looked_up (struct nfs3_state *nfs3, struct nfs3_fh *rootfh)
+{
+ struct nfs3_export *exp = NULL;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO (GF_NFS3, nfs3, out);
+ GF_VALIDATE_OR_GOTO (GF_NFS3, rootfh, out);
+
+ exp = __nfs3_get_export_by_exportid (nfs3, rootfh->exportid);
+ if (!exp)
+ goto out;
+
+ ret = exp->rootlookedup;
+out:
+ return ret;
+}
+
+
+int
+nfs3_set_root_looked_up (struct nfs3_state *nfs3, struct nfs3_fh *rootfh)
+{
+ struct nfs3_export *exp = NULL;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO (GF_NFS3, nfs3, out);
+ GF_VALIDATE_OR_GOTO (GF_NFS3, rootfh, out);
+
+ exp = __nfs3_get_export_by_exportid (nfs3, rootfh->exportid);
+ if (!exp)
+ goto out;
+
+ exp->rootlookedup = 1;
+out:
+ return ret;
+}
+
+
+#define nfs3_map_fh_to_volume(nfs3state, handle, req, volume, status, label) \
+ do { \
+ char exportid[256], gfid[256]; \
+ rpc_transport_t *trans = NULL; \
+ volume = nfs3_fh_to_xlator ((nfs3state), handle); \
+ if (!volume) { \
+ uuid_unparse (handle->exportid, exportid); \
+ uuid_unparse (handle->gfid, gfid); \
+ trans = rpcsvc_request_transport (req); \
+ GF_LOG_OCCASIONALLY (nfs3state->occ_logger, \
+ GF_NFS3, GF_LOG_ERROR, "Failed to map " \
+ "FH to vol: client=%s, exportid=%s, " \
+ "gfid=%s", trans->peerinfo.identifier, \
+ exportid, gfid); \
+ GF_LOG_OCCASIONALLY (nfs3state->occ_logger, \
+ GF_NFS3, GF_LOG_ERROR, "Stale nfs " \
+ "client %s must be trying to connect to"\
+ " a deleted volume, please unmount it.",\
+ trans->peerinfo.identifier); \
+ status = NFS3ERR_STALE; \
+ goto label; \
+ } else { \
+ gf_log (GF_NFS3, GF_LOG_TRACE, "FH to Volume:" \
+ "%s", volume->name); \
+ rpcsvc_request_set_private (req, volume); \
+ } \
+ } while (0); \
+
+
+#define nfs3_validate_gluster_fh(handle, status, errlabel) \
+ do { \
+ if (!nfs3_fh_validate (handle)) { \
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Bad Handle"); \
+ status = NFS3ERR_BADHANDLE; \
+ goto errlabel; \
+ } \
+ } while (0) \
+
+#define nfs3_check_fh_resolve_status(cst, nfstat, erlabl) \
+ do { \
+ xlator_t *xlatorp = NULL; \
+ char buf[256], gfid[256]; \
+ rpc_transport_t *trans = NULL; \
+ if ((cst)->resolve_ret < 0) { \
+ trans = rpcsvc_request_transport (cst->req); \
+ xlatorp = nfs3_fh_to_xlator (cst->nfs3state, \
+ &cst->resolvefh); \
+ uuid_unparse (cst->resolvefh.gfid, gfid); \
+ snprintf (buf, sizeof (buf), "(%s) %s : %s", \
+ trans->peerinfo.identifier, \
+ xlatorp ? xlatorp->name : "ERR", \
+ gfid ); \
+ gf_log (GF_NFS3, GF_LOG_ERROR, "%s: %s", \
+ strerror(cst->resolve_errno), buf); \
+ nfstat = nfs3_errno_to_nfsstat3 (cst->resolve_errno);\
+ goto erlabl; \
+ } \
+ } while (0) \
+
+#define nfs3_check_new_fh_resolve_status(cst, nfstat, erlabl) \
+ do { \
+ xlator_t *xlatorp = NULL; \
+ char buf[256], gfid[256]; \
+ rpc_transport_t *trans = NULL; \
+ if (((cst)->resolve_ret < 0) && \
+ ((cst)->resolve_errno != ENOENT)) { \
+ trans = rpcsvc_request_transport (cst->req); \
+ xlatorp = nfs3_fh_to_xlator (cst->nfs3state, \
+ &cst->resolvefh); \
+ uuid_unparse (cst->resolvefh.gfid, gfid); \
+ snprintf (buf, sizeof (buf), "(%s) %s : %s", \
+ trans->peerinfo.identifier, \
+ xlatorp ? xlatorp->name : "ERR", \
+ gfid); \
+ gf_log (GF_NFS3, GF_LOG_ERROR, "%s: %s", \
+ strerror(cst->resolve_errno), buf); \
+ nfstat = nfs3_errno_to_nfsstat3 (cs->resolve_errno);\
+ goto erlabl; \
+ } \
+ } while (0) \
+
+
+int
+__nfs3_get_volume_id (struct nfs3_state *nfs3, xlator_t *xl,
+ uuid_t volumeid)
+{
+ int ret = -1;
+ struct nfs3_export *exp = NULL;
+
+ GF_VALIDATE_OR_GOTO (GF_NFS3, nfs3, out);
+ GF_VALIDATE_OR_GOTO (GF_NFS3, xl, out);
+
+ list_for_each_entry (exp, &nfs3->exports, explist) {
+ if (exp->subvol == xl) {
+ uuid_copy (volumeid, exp->volumeid);
+ ret = 0;
+ goto out;
+ }
+ }
+
+out:
+ return ret;
+}
+
+
+#define nfs3_funge_solaris_zerolen_fh(nfs3st, fhd, enam, nfsst, erl) \
+ do { \
+ xlator_t *fungexl = NULL; \
+ uuid_t zero = {0, }; \
+ fungexl =nfs_mntpath_to_xlator ((nfs3st)->exportslist,enam);\
+ if (!fungexl) { \
+ (nfsst) = NFS3ERR_NOENT; \
+ goto erl; \
+ } \
+ \
+ uuid_copy ((fhd)->gfid, zero); \
+ (fhd)->gfid[15] = 1; \
+ (enam) = NULL; \
+ if ((gf_nfs_dvm_off (nfs_state (nfs3st->nfsx)))) \
+ (fhd)->exportid[15] = nfs_xlator_to_xlid ((nfs3st)->exportslist, fungexl); \
+ else { \
+ if(__nfs3_get_volume_id ((nfs3st), fungexl, (fhd)->exportid) < 0) { \
+ (nfsst) = NFS3ERR_STALE; \
+ goto erl; \
+ } \
+ } \
+ } while (0) \
+
+
+#define nfs3_volume_started_check(nf3stt, vlm, rtval, erlbl) \
+ do { \
+ if ((!nfs_subvolume_started (nfs_state (nf3stt->nfsx), vlm))){\
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Volume is disabled: %s",\
+ vlm->name); \
+ rtval = RPCSVC_ACTOR_IGNORE; \
+ goto erlbl; \
+ } \
+ } while (0) \
+
+
+int
+nfs3_export_sync_trusted (struct nfs3_state *nfs3, uuid_t exportid)
+{
+ struct nfs3_export *exp = NULL;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO (GF_NFS3, nfs3, err);
+
+ exp = __nfs3_get_export_by_exportid (nfs3, exportid);
+ if (!exp)
+ goto err;
+
+ ret = exp->trusted_sync;
+err:
+ return ret;
+}
+
+
+int
+nfs3_export_write_trusted (struct nfs3_state *nfs3, uuid_t exportid)
+{
+ struct nfs3_export *exp = NULL;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO (GF_NFS3, nfs3, err);
+
+ exp = __nfs3_get_export_by_exportid (nfs3, exportid);
+ if (!exp)
+ goto err;
+
+ ret = exp->trusted_write;
+err:
+ return ret;
+}
+
+int
+nfs3_solaris_zerolen_fh (struct nfs3_fh *fh, int fhlen)
+{
+ if (!fh)
+ return 0;
+
+ if (nfs3_fh_validate (fh))
+ return 0;
+
+ if (fhlen == 0)
+ return 1;
+
+ return 0;
+}
+
+
+/* Function pointer that represents the generic prototypes of functions used
+ * to serialize NFS3 message structures into the XDR format.
+ * For usage, see the nfs3svc_XXX_cbk functions.
+ */
+typedef ssize_t (*nfs3_serializer) (struct iovec outmsg, void *args);
+
+nfs3_call_state_t *
+nfs3_call_state_init (struct nfs3_state *s, rpcsvc_request_t *req, xlator_t *v)
+{
+ nfs3_call_state_t *cs = NULL;
+
+ GF_VALIDATE_OR_GOTO (GF_NFS3, s, err);
+ GF_VALIDATE_OR_GOTO (GF_NFS3, req, err);
+ GF_VALIDATE_OR_GOTO (GF_NFS3, v, err);
+
+ cs = (nfs3_call_state_t *) mem_get (s->localpool);
+ if (!cs) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "out of memory");
+ return NULL;
+ }
+
+ memset (cs, 0, sizeof (*cs));
+ INIT_LIST_HEAD (&cs->entries.list);
+ INIT_LIST_HEAD (&cs->openwait_q);
+ cs->operrno = EINVAL;
+ cs->req = req;
+ cs->vol = v;
+ cs->nfsx = s->nfsx;
+ cs->nfs3state = s;
+err:
+ return cs;
+}
+
+void
+nfs3_call_state_wipe (nfs3_call_state_t *cs)
+{
+ if (!cs)
+ return;
+
+ if (cs->fd) {
+ gf_log (GF_NFS3, GF_LOG_TRACE, "fd 0x%lx ref: %d",
+ (long)cs->fd, cs->fd->refcount);
+ fd_unref (cs->fd);
+ }
+
+ GF_FREE (cs->resolventry);
+
+ GF_FREE (cs->pathname);
+
+ if (!list_empty (&cs->entries.list))
+ gf_dirent_free (&cs->entries);
+
+ nfs_loc_wipe (&cs->oploc);
+ nfs_loc_wipe (&cs->resolvedloc);
+ if (cs->iob)
+ iobuf_unref (cs->iob);
+ if (cs->iobref)
+ iobref_unref (cs->iobref);
+ if (cs->trans)
+ rpc_transport_unref (cs->trans);
+ memset (cs, 0, sizeof (*cs));
+ mem_put (cs);
+ /* Already refd by fd_lookup, so no need to ref again. */
+}
+
+
+#define nfs3_handle_call_state_init(nfs3state, calls, rq, vl ,opstat, errlabel)\
+ do { \
+ calls = nfs3_call_state_init ((nfs3state), (rq), (vl)); \
+ if (!calls) { \
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to " \
+ "init call state"); \
+ opstat = NFS3ERR_SERVERFAULT; \
+ goto errlabel; \
+ } \
+ } while (0) \
+
+
+
+struct iobuf *
+nfs3_serialize_reply (rpcsvc_request_t *req, void *arg, nfs3_serializer sfunc,
+ struct iovec *outmsg)
+{
+ struct nfs3_state *nfs3 = NULL;
+ struct iobuf *iob = NULL;
+ ssize_t retlen = -1;
+
+ nfs3 = (struct nfs3_state *)rpcsvc_request_program_private (req);
+ if (!nfs3) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "NFSv3 state not found in RPC"
+ " request");
+ goto ret;
+ }
+
+ /* First, get the io buffer into which the reply in arg will
+ * be serialized.
+ */
+ /* TODO: get rid of 'sfunc' and use 'xdrproc_t' so we
+ can have 'xdr_sizeof' */
+ iob = iobuf_get (nfs3->iobpool);
+ if (!iob) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to get iobuf");
+ goto ret;
+ }
+
+ iobuf_to_iovec (iob, outmsg);
+ /* Use the given serializer to translate the give C structure in arg
+ * to XDR format which will be written into the buffer in outmsg.
+ */
+ /* retlen is used to received the error since size_t is unsigned and we
+ * need -1 for error notification during encoding.
+ */
+ retlen = sfunc (*outmsg, arg);
+ if (retlen == -1) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to encode message");
+ goto ret;
+ }
+
+ outmsg->iov_len = retlen;
+ret:
+ if (retlen == -1) {
+ iobuf_unref (iob);
+ iob = NULL;
+ }
+
+ return iob;
+}
+
+
+
+/* Generic reply function for NFSv3 specific replies. */
+int
+nfs3svc_submit_reply (rpcsvc_request_t *req, void *arg, nfs3_serializer sfunc)
+{
+ struct iovec outmsg = {0, };
+ struct iobuf *iob = NULL;
+ int ret = -1;
+ struct iobref *iobref = NULL;
+
+ if (!req)
+ return -1;
+
+ iob = nfs3_serialize_reply (req, arg, sfunc, &outmsg);
+ if (!iob) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to serialize reply");
+ goto ret;
+ }
+
+ iobref = iobref_new ();
+ if (!iobref) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "failed on iobref_new()");
+ goto ret;
+ }
+
+ ret = iobref_add (iobref, iob);
+ if (ret) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to add iob to iobref");
+ goto ret;
+ }
+
+ /* Then, submit the message for transmission. */
+ ret = rpcsvc_submit_message (req, &outmsg, 1, NULL, 0, iobref);
+ if (ret == -1) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Reply submission failed");
+ goto ret;
+ }
+
+ ret = 0;
+ret:
+ /* Now that we've done our job of handing the message to the RPC layer
+ * we can safely unref the iob in the hope that RPC layer must have
+ * ref'ed the iob on receiving into the txlist.
+ */
+ if (NULL != iob)
+ iobuf_unref (iob);
+ if (NULL != iobref)
+ iobref_unref (iobref);
+ return ret;
+}
+
+
+int
+nfs3svc_submit_vector_reply (rpcsvc_request_t *req, void *arg,
+ nfs3_serializer sfunc, struct iovec *payload,
+ int vcount, struct iobref *iobref)
+{
+ struct iovec outmsg = {0, };
+ struct iobuf *iob = NULL;
+ int ret = -1;
+ int new_iobref = 0;
+
+ if (!req)
+ return -1;
+
+ iob = nfs3_serialize_reply (req, arg, sfunc, &outmsg);
+ if (!iob) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to serialize reply");
+ goto ret;
+ }
+ if (iobref == NULL) {
+ iobref = iobref_new ();
+ if (!iobref) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "failed on iobref_new");
+ goto ret;
+ }
+ new_iobref = 1;
+ }
+
+ ret = iobref_add (iobref, iob);
+ if (ret) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to add iob to iobref");
+ goto ret;
+ }
+
+ /* Then, submit the message for transmission. */
+ ret = rpcsvc_submit_message (req, &outmsg, 1, payload, vcount, iobref);
+ if (ret == -1) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Reply submission failed");
+ goto ret;
+ }
+
+ ret = 0;
+ret:
+ /* Now that we've done our job of handing the message to the RPC layer
+ * we can safely unref the iob in the hope that RPC layer must have
+ * ref'ed the iob on receiving into the txlist.
+ */
+ if (NULL != iob)
+ iobuf_unref (iob);
+ if (new_iobref)
+ iobref_unref (iobref);
+ return ret;
+}
+
+uint64_t
+nfs3_request_xlator_deviceid (rpcsvc_request_t *rq)
+{
+ struct nfs3_state *nfs3 = NULL;
+ xlator_t *xl = NULL;
+ uint64_t devid = 0;
+ uuid_t volumeid = {0, };
+
+ if (!rq)
+ return 0;
+
+ xl = rpcsvc_request_private (rq);
+ nfs3 = rpcsvc_request_program_private (rq);
+ if (gf_nfs_dvm_off (nfs_state (nfs3->nfsx)))
+ devid = (uint64_t)nfs_xlator_to_xlid (nfs3->exportslist, xl);
+ else {
+ __nfs3_get_volume_id (nfs3, xl, volumeid);
+ memcpy (&devid, &volumeid[8], sizeof (devid));
+ }
+
+ return devid;
+}
+
+
+int
+nfs3svc_null (rpcsvc_request_t *req)
+{
+ struct iovec dummyvec = {0, };
+ if (!req)
+ return RPCSVC_ACTOR_ERROR;
+ rpcsvc_submit_generic (req, &dummyvec, 1, NULL, 0, NULL);
+ return RPCSVC_ACTOR_SUCCESS;
+}
+
+
+int
+nfs3_getattr_reply (rpcsvc_request_t *req, nfsstat3 status, struct iatt *buf)
+{
+ getattr3res res;
+ uint64_t deviceid = 0;
+
+ deviceid = nfs3_request_xlator_deviceid (req);
+ nfs3_fill_getattr3res (&res, status, buf, deviceid);
+ nfs3svc_submit_reply (req, &res,
+ (nfs3_serializer)xdr_serialize_getattr3res);
+
+ return 0;
+}
+
+
+int32_t
+nfs3svc_getattr_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ nfsstat3 status = NFS3_OK;
+ nfs3_call_state_t *cs = NULL;
+
+ cs = frame->local;
+
+ /*
+ * Somewhat counter-intuitively, we don't need to look for sh-failed
+ * here. Failing this getattr will generate a new lookup from the
+ * client, and nfs_fop_lookup_cbk will detect any self-heal failures.
+ */
+
+ if (op_ret == -1) {
+ gf_log (GF_NFS, GF_LOG_WARNING,
+ "%x: %s => -1 (%s)", rpcsvc_request_xid (cs->req),
+ cs->resolvedloc.path, strerror (op_errno));
+ status = nfs3_cbk_errno_status (op_ret, op_errno);
+ }
+ else {
+ nfs_fix_generation(this,inode);
+ }
+
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req), NFS3_GETATTR,
+ status, op_errno);
+
+ nfs3_getattr_reply (cs->req, status, buf);
+ nfs3_call_state_wipe (cs);
+
+ return 0;
+}
+
+
+int32_t
+nfs3svc_getattr_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ nfsstat3 status = NFS3_OK;
+ nfs3_call_state_t *cs = NULL;
+
+ cs = frame->local;
+
+ if (op_ret == -1) {
+ gf_log (GF_NFS, GF_LOG_WARNING,
+ "%x: %s => -1 (%s)", rpcsvc_request_xid (cs->req),
+ cs->resolvedloc.path, strerror (op_errno));
+ status = nfs3_cbk_errno_status (op_ret, op_errno);
+ }
+
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req), NFS3_GETATTR,
+ status, op_errno);
+
+ nfs3_getattr_reply (cs->req, status, buf);
+ nfs3_call_state_wipe (cs);
+
+ return 0;
+}
+
+
+int
+nfs3_getattr_resume (void *carg)
+{
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int ret = -EFAULT;
+ nfs_user_t nfu = {0, };
+ nfs3_call_state_t *cs = NULL;
+ uint64_t raw_ctx = 0;
+ struct nfs_inode_ctx *ictx = NULL;
+ struct nfs_state *priv = NULL;
+
+ if (!carg)
+ return ret;
+
+ cs = (nfs3_call_state_t *)carg;
+ nfs3_check_fh_resolve_status (cs, stat, nfs3err);
+ nfs_request_user_init (&nfu, cs->req);
+ /* If inode which is to be getattr'd is the root, we need to do a
+ * lookup instead because after a server reboot, it is not necessary
+ * for the root to have been looked up when the getattr on the root is
+ * sent. AND, this causes a problem for stat-prefetch in that it
+ * expects even the root inode to have been looked up.
+
+ if (__is_root_gfid (cs->resolvedloc.inode->gfid))
+ ret = nfs_lookup (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
+ nfs3svc_getattr_lookup_cbk, cs);
+ else
+ ret = nfs_stat (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
+ */
+
+ if (cs->hardresolved) {
+ ret = -EFAULT;
+ stat = NFS3_OK;
+ goto nfs3err;
+ }
+
+ /*
+ * If brick state changed, we need to force a proper lookup cycle (as
+ * would happen in native protocol) to do self-heal checks. We detect
+ * this by comparing the generation number for the last successful
+ * creation/lookup on the inode to the current number, so inodes that
+ * haven't been validated since the state change are affected.
+ */
+ if (inode_ctx_get(cs->resolvedloc.inode,cs->nfsx,&raw_ctx) == 0) {
+ ictx = (struct nfs_inode_ctx *)raw_ctx;
+ priv = cs->nfsx->private;
+ if (ictx->generation != priv->generation) {
+ ret = nfs_lookup (cs->nfsx, cs->vol, &nfu,
+ &cs->resolvedloc,
+ nfs3svc_getattr_lookup_cbk, cs);
+ goto check_err;
+ }
+ }
+
+ ret = nfs_stat (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
+ nfs3svc_getattr_stat_cbk, cs);
+
+check_err:
+ if (ret < 0) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Stat fop failed: %s: %s",
+ cs->oploc.path, strerror (-ret));
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+ }
+
+nfs3err:
+ if (ret < 0) {
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_GETATTR, stat, -ret);
+ nfs3_getattr_reply (cs->req, stat, &cs->stbuf);
+ nfs3_call_state_wipe (cs);
+ ret = 0;
+ }
+
+ return ret;
+}
+
+
+int
+nfs3_getattr (rpcsvc_request_t *req, struct nfs3_fh *fh)
+{
+ xlator_t *vol = NULL;
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int ret = -EFAULT;
+ struct nfs3_state *nfs3 = NULL;
+ nfs3_call_state_t *cstate = NULL;
+
+ GF_VALIDATE_OR_GOTO (GF_NFS3, req, out);
+ GF_VALIDATE_OR_GOTO (GF_NFS3, fh, out);
+
+ nfs3_log_common_call (rpcsvc_request_xid (req), "GETATTR", fh);
+ nfs3_validate_gluster_fh (fh, stat, nfs3err);
+ nfs3_validate_nfs3_state (req, nfs3, stat, nfs3err, ret);
+ nfs3_map_fh_to_volume (nfs3, fh, req, vol, stat, nfs3err);
+ nfs3_volume_started_check (nfs3, vol, ret, out);
+ nfs3_handle_call_state_init (nfs3, cstate, req, vol, stat, nfs3err);
+
+ ret = nfs3_fh_resolve_and_resume (cstate, fh, NULL,nfs3_getattr_resume);
+ if (ret < 0)
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+
+nfs3err:
+ if (ret < 0) {
+ nfs3_log_common_res (rpcsvc_request_xid (req), NFS3_GETATTR,
+ stat, -ret);
+ nfs3_getattr_reply (req, stat, NULL);
+ ret = 0;
+ nfs3_call_state_wipe (cstate);
+ }
+out:
+ return ret;
+}
+
+
+int
+nfs3svc_getattr (rpcsvc_request_t *req)
+{
+ struct nfs3_fh fh = {{0}, };
+ getattr3args args;
+ int ret = RPCSVC_ACTOR_ERROR;
+
+ if (!req)
+ return ret;
+
+ nfs3_prep_getattr3args (&args, &fh);
+ if (xdr_to_getattr3args (req->msg[0], &args) <= 0) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Error decoding args");
+ rpcsvc_request_seterr (req, GARBAGE_ARGS);
+ goto rpcerr;
+ }
+
+ ret = nfs3_getattr (req, &fh);
+ if ((ret < 0) && (ret != RPCSVC_ACTOR_IGNORE)) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "GETATTR procedure failed");
+ rpcsvc_request_seterr (req, SYSTEM_ERR);
+ ret = RPCSVC_ACTOR_ERROR;
+ }
+
+rpcerr:
+ return ret;
+}
+
+
+int
+nfs3_setattr_reply (rpcsvc_request_t *req, nfsstat3 stat, struct iatt *preop,
+ struct iatt *postop)
+{
+ setattr3res res = {0, };
+ uint64_t deviceid = 0;
+
+ deviceid = nfs3_request_xlator_deviceid (req);
+ nfs3_fill_setattr3res (&res, stat, preop, postop, deviceid);
+ nfs3svc_submit_reply (req, (void *)&res,
+ (nfs3_serializer) xdr_serialize_setattr3res);
+ return 0;
+}
+
+
+int32_t
+nfs3svc_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ struct iatt *prestat = NULL;
+ nfs3_call_state_t *cs = NULL;
+
+ cs = frame->local;
+ if (op_ret == -1) {
+ gf_log (GF_NFS, GF_LOG_WARNING,
+ "%x: %s => -1 (%s)", rpcsvc_request_xid (cs->req),
+ cs->resolvedloc.path, strerror (op_errno));
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
+ goto nfs3err;
+ }
+
+ /* If the first stat was got from the guarded setattr callback, or
+ * from an earlier setattr call then we'll need to use that stat
+ * instead of the preop returned here.
+ */
+ if (cs->preparent.ia_ino != 0)
+ prestat = &cs->preparent;
+ else
+ prestat = prebuf;
+
+ stat = NFS3_OK;
+nfs3err:
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req), NFS3_SETATTR, stat,
+ op_errno);
+ nfs3_setattr_reply (cs->req, stat, prestat, postbuf);
+ nfs3_call_state_wipe (cs);
+
+ return 0;
+}
+
+
+int32_t
+nfs3svc_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preop,
+ struct iatt *postop, dict_t *xdata)
+{
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int ret = -1;
+ struct iatt *prebuf = NULL;
+ nfs_user_t nfu = {0, };
+ nfs3_call_state_t *cs = NULL;
+
+ cs = frame->local;
+ if (op_ret == -1) {
+ gf_log (GF_NFS, GF_LOG_WARNING,
+ "%x: %s => -1 (%s)", rpcsvc_request_xid (cs->req),
+ cs->resolvedloc.path, strerror (op_errno));
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
+ goto nfs3err;
+ }
+
+ prebuf = preop;
+ /* Store the current preop in case we need to send a truncate,
+ * in which case the preop to be returned will be this one.
+ */
+ cs->preparent = *preop;
+
+ /* Only truncate if the size is not already same as the requested
+ * truncation and also only if this is not a directory.
+ */
+ if ((gf_attr_size_set (cs->setattr_valid)) &&
+ (!IA_ISDIR (postop->ia_type)) &&
+ (preop->ia_size != cs->stbuf.ia_size)) {
+ nfs_request_user_init (&nfu, cs->req);
+ ret = nfs_truncate (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
+ cs->stbuf.ia_size, nfs3svc_truncate_cbk,cs);
+
+ if (ret < 0)
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+ } else {
+ ret = -1; /* Force a reply in the branch below. */
+ stat = NFS3_OK;
+ }
+
+nfs3err:
+ if (ret < 0) {
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_SETATTR, stat, op_errno);
+ nfs3_setattr_reply (cs->req, stat, prebuf, postop);
+ nfs3_call_state_wipe (cs);
+ }
+
+ return 0;
+}
+
+
+
+int32_t
+nfs3svc_setattr_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+
+ int ret = -EFAULT;
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ nfs_user_t nfu = {0, };
+ nfs3_call_state_t *cs = NULL;
+
+ cs = frame->local;
+ if (op_ret == -1) {
+ gf_log (GF_NFS, GF_LOG_WARNING,
+ "%x: %s => -1 (%s)", rpcsvc_request_xid (cs->req),
+ cs->resolvedloc.path, strerror (op_errno));
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
+ goto nfs3err;
+ }
+
+ if (buf->ia_ctime != cs->timestamp.seconds) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Timestamps not in sync");
+ stat = NFS3ERR_NOT_SYNC;
+ goto nfs3err;
+ }
+
+ /* Not a clean way but no motivation to add a new member to local. */
+ cs->preparent = *buf;
+ nfs_request_user_init (&nfu, cs->req);
+ ret = nfs_setattr (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,&cs->stbuf,
+ cs->setattr_valid, nfs3svc_setattr_cbk, cs);
+ if (ret < 0)
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+
+nfs3err:
+ if (ret < 0) {
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_SETATTR, stat, op_errno);
+ nfs3_setattr_reply (cs->req, stat, NULL, NULL);
+ nfs3_call_state_wipe (cs);
+ }
+
+ return 0;
+}
+
+
+int
+nfs3_setattr_resume (void *carg)
+{
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int ret = -EFAULT;
+ nfs_user_t nfu = {0, };
+ nfs3_call_state_t *cs = NULL;
+
+ if (!carg)
+ return ret;
+
+ cs = (nfs3_call_state_t *)carg;
+ nfs3_check_fh_resolve_status (cs, stat, nfs3err);
+ nfs_request_user_init (&nfu, cs->req);
+ ret = nfs_setattr (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
+ &cs->stbuf, cs->setattr_valid,
+ nfs3svc_setattr_cbk, cs);
+
+ if (ret < 0)
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+
+nfs3err:
+ if (ret < 0) {
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_SETATTR, stat, -ret);
+ nfs3_setattr_reply (cs->req, stat, NULL, NULL);
+ nfs3_call_state_wipe (cs);
+ }
+
+ return ret;
+}
+
+
+int
+nfs3_setattr (rpcsvc_request_t *req, struct nfs3_fh *fh, sattr3 *sattr,
+ sattrguard3 *guard)
+{
+ xlator_t *vol = NULL;
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int ret = -EFAULT;
+ struct nfs3_state *nfs3 = NULL;
+ nfs3_call_state_t *cs = NULL;
+
+ GF_VALIDATE_OR_GOTO (GF_NFS3, req, out);
+ GF_VALIDATE_OR_GOTO (GF_NFS3, fh, out);
+ GF_VALIDATE_OR_GOTO (GF_NFS3, sattr, out);
+ GF_VALIDATE_OR_GOTO (GF_NFS3, guard, out);
+
+ nfs3_log_common_call (rpcsvc_request_xid (req), "SETATTR", fh);
+ nfs3_validate_gluster_fh (fh, stat, nfs3err);
+ nfs3_validate_nfs3_state (req, nfs3, stat, nfs3err, ret);
+ nfs3_map_fh_to_volume (nfs3, fh, req, vol, stat, nfs3err);
+ nfs3_volume_started_check (nfs3, vol, ret, out);
+ nfs3_check_rw_volaccess (nfs3, fh->exportid, stat, nfs3err);
+ nfs3_handle_call_state_init (nfs3, cs, req, vol, stat, nfs3err);
+
+ cs->setattr_valid = nfs3_sattr3_to_setattr_valid (sattr, &cs->stbuf,
+ NULL);
+ if (guard->check) {
+ gf_log (GF_NFS3, GF_LOG_TRACE, "Guard check required");
+ cs->timestamp = guard->sattrguard3_u.obj_ctime;
+ cs->sattrguardcheck = 1;
+ } else {
+ gf_log (GF_NFS3, GF_LOG_TRACE, "Guard check not required");
+ cs->sattrguardcheck = 0;
+ }
+
+ if (!cs->setattr_valid) {
+ ret = -EINVAL; /* Force a reply */
+ stat = NFS3_OK;
+ gf_log (GF_NFS3, GF_LOG_ERROR, "cs->setattr_valid is invalid");
+ goto nfs3err;
+ }
+
+ ret = nfs3_fh_resolve_and_resume (cs, fh, NULL, nfs3_setattr_resume);
+ if (ret < 0)
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+
+nfs3err:
+ if (ret < 0) {
+ nfs3_log_common_res (rpcsvc_request_xid (req), NFS3_SETATTR,
+ stat, -ret);
+ nfs3_setattr_reply (req, stat, NULL, NULL);
+ nfs3_call_state_wipe (cs);
+ /* Ret must be 0 after this so that the caller does not
+ * also send an RPC reply.
+ */
+ ret = 0;
+ }
+out:
+ return ret;
+}
+
+
+
+int
+nfs3svc_setattr (rpcsvc_request_t *req)
+{
+ struct nfs3_fh fh = {{0}, };
+ setattr3args args;
+ int ret = RPCSVC_ACTOR_ERROR;
+
+ GF_VALIDATE_OR_GOTO (GF_NFS3, req, rpcerr);
+
+ nfs3_prep_setattr3args (&args, &fh);
+ if (xdr_to_setattr3args (req->msg[0], &args) <= 0) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Error decoding args");
+ rpcsvc_request_seterr (req, GARBAGE_ARGS);
+ goto rpcerr;
+ }
+
+ ret = nfs3_setattr (req, &fh, &args.new_attributes, &args.guard);
+ if ((ret < 0) && (ret != RPCSVC_ACTOR_IGNORE)) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "SETATTR procedure failed");
+ rpcsvc_request_seterr (req, SYSTEM_ERR);
+ ret = RPCSVC_ACTOR_ERROR;
+ }
+
+rpcerr:
+ return ret;
+
+}
+
+
+int
+nfs3_lookup_reply (rpcsvc_request_t *req, nfsstat3 stat, struct nfs3_fh *newfh,
+ struct iatt *stbuf, struct iatt *postparent)
+{
+ lookup3res res = {0, };
+ uint64_t deviceid = 0;
+
+ deviceid = nfs3_request_xlator_deviceid (req);
+ nfs3_fill_lookup3res (&res, stat, newfh, stbuf, postparent, deviceid);
+ return nfs3svc_submit_reply (req, &res,
+ (nfs3_serializer)xdr_serialize_lookup3res);
+}
+
+int
+nfs3_lookup_resume (void *carg);
+
+
+int
+nfs3_fresh_lookup (nfs3_call_state_t *cs)
+{
+ int ret = -EFAULT;
+ char *oldresolventry = NULL;
+
+ GF_VALIDATE_OR_GOTO (GF_NFS3, cs, err);
+ gf_log (GF_NFS3, GF_LOG_DEBUG, "inode needs fresh lookup");
+ inode_unlink (cs->resolvedloc.inode, cs->resolvedloc.parent,
+ cs->resolventry);
+ nfs_loc_wipe (&cs->resolvedloc);
+
+ /* Store pointer to currently allocated resolventry because it gets over
+ * written in fh_resolve_and_resume.
+ */
+ oldresolventry = cs->resolventry;
+ cs->lookuptype = GF_NFS3_FRESH;
+ ret = nfs3_fh_resolve_and_resume (cs, &cs->resolvefh, cs->resolventry,
+ nfs3_lookup_resume);
+ /* Allocated in the previous call to fh_resolve_and_resume using the
+ * same call_state.
+ */
+ GF_FREE (oldresolventry);
+err:
+ return ret;
+}
+
+int
+nfs3svc_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xattr, struct iatt *postparent)
+{
+ struct nfs3_fh newfh = {{0}, };
+ nfsstat3 status = NFS3_OK;
+ nfs3_call_state_t *cs = NULL;
+ inode_t *oldinode = NULL;
+
+ cs = frame->local;
+ if (op_ret == -1) {
+ gf_log (GF_NFS,
+ (op_errno == ENOENT ? GF_LOG_TRACE : GF_LOG_WARNING),
+ "%x: %s => -1 (%s)", rpcsvc_request_xid (cs->req),
+ cs->resolvedloc.path, strerror (op_errno));
+ status = nfs3_cbk_errno_status (op_ret, op_errno);
+ goto xmit_res;
+ }
+
+ nfs3_fh_build_child_fh (&cs->parent, buf, &newfh);
+ oldinode = inode_link (inode, cs->resolvedloc.parent,
+ cs->resolvedloc.name, buf);
+xmit_res:
+ /* Only send fresh lookup if it was a revalidate that failed. */
+ if ((op_ret == -1) && (nfs3_is_revalidate_lookup (cs))) {
+ op_ret = nfs3_fresh_lookup (cs);
+ goto out;
+ }
+
+ nfs3_log_newfh_res (rpcsvc_request_xid (cs->req), NFS3_LOOKUP, status,
+ op_errno, &newfh);
+ nfs3_lookup_reply (cs->req, status, &newfh, buf, postparent);
+ nfs3_call_state_wipe (cs);
+out:
+ if (oldinode) {
+ inode_lookup (oldinode);
+ inode_unref (oldinode);
+ }
+ return 0;
+}
+
+
+int
+nfs3svc_lookup_parentdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ struct nfs3_fh newfh = {{0}, };
+ nfsstat3 status = NFS3_OK;
+ nfs3_call_state_t *cs = NULL;
+ uuid_t volumeid = {0, };
+ struct nfs3_state *nfs3 = NULL;
+
+ cs = frame->local;
+ if (op_ret == -1) {
+ gf_log (GF_NFS, GF_LOG_WARNING,
+ "%x: %s => -1 (%s)", rpcsvc_request_xid (cs->req),
+ cs->resolvedloc.path, strerror (op_errno));
+ status = nfs3_cbk_errno_status (op_ret, op_errno);
+ goto xmit_res;
+ }
+
+ nfs3 = cs->nfs3state;
+ /* If the buf inode shows that this is a root dir's buf, then the file
+ * handle needs to be specially crafted, in all other cases, we'll just
+ * create the handle normally using the buffer of the parent dir.
+ */
+ if (buf->ia_ino != 1) {
+ nfs3_fh_build_parent_fh (&cs->fh, buf, &newfh);
+ goto xmit_res;
+ }
+
+ if (gf_nfs_dvm_off (nfs_state (nfs3->nfsx)))
+ newfh = nfs3_fh_build_indexed_root_fh (nfs3->exportslist,
+ cs->vol);
+ else {
+ __nfs3_get_volume_id (nfs3, cs->vol, volumeid);
+ newfh = nfs3_fh_build_uuid_root_fh (volumeid);
+ }
+
+xmit_res:
+ nfs3_log_newfh_res (rpcsvc_request_xid (cs->req), NFS3_LOOKUP, status,
+ op_errno, &newfh);
+ nfs3_lookup_reply (cs->req, status, &newfh, buf, postparent);
+ nfs3_call_state_wipe (cs);
+
+ return 0;
+}
+
+
+
+int
+nfs3_lookup_parentdir_resume (void *carg)
+{
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int ret = -EFAULT;
+ nfs_user_t nfu = {0, };
+ nfs3_call_state_t *cs = NULL;
+ inode_t *parent = NULL;
+
+ if (!carg) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Invalid argument,"
+ " carg value NULL");
+ return EINVAL;
+ }
+
+ cs = (nfs3_call_state_t *)carg;
+ nfs3_check_fh_resolve_status (cs, stat, nfs3err);
+
+ /* At this point now, the loc in cs is for the directory file handle
+ * sent by the client. This loc needs to be transformed into a loc that
+ * represents the parent dir of cs->resolvedloc.inode.
+ *
+ * EXCEPT in the case where the .. is a parent of the root directory.
+ * In this case we'll be returning the file handle and attributes of the
+ * root itself.
+ */
+ nfs_request_user_init (&nfu, cs->req);
+
+ /* Save the file handle from the LOOKUP request. We'll use this to
+ * build the file handle of the parent directory in case the parent is
+ * not root dir.
+ */
+ cs->fh = cs->resolvefh;
+
+ /* If fh is that of the root, the resolvedloc will already contain
+ * the loc for root. After that, we'll send lookup for the root dir
+ * itself since we cannot send the lookup on the parent of root.
+ *
+ * For all other cases, we'll send the lookup on the parent of the
+ * given directory file handle.
+ */
+ if (!nfs3_fh_is_root_fh (&cs->fh)) {
+ parent = inode_ref (cs->resolvedloc.parent);
+ nfs_loc_wipe (&cs->resolvedloc);
+ ret = nfs_inode_loc_fill (parent, &cs->resolvedloc,
+ NFS_RESOLVE_CREATE);
+
+ if (ret < 0) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "nfs_inode_loc_fill"
+ " error");
+ goto errtostat;
+ }
+ }
+
+ ret = nfs_lookup (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
+ nfs3svc_lookup_parentdir_cbk, cs);
+errtostat:
+ if (ret < 0)
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+
+nfs3err:
+ if (ret < 0) {
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req), NFS3_LOOKUP,
+ stat, -ret);
+ nfs3_lookup_reply (cs->req, stat, NULL, NULL, NULL);
+ nfs3_call_state_wipe (cs);
+ }
+
+ if (parent)
+ inode_unref (parent);
+
+ return ret;
+}
+
+
+int
+nfs3_lookup_resume (void *carg)
+{
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int ret = -EFAULT;
+ nfs_user_t nfu = {0, };
+ nfs3_call_state_t *cs = NULL;
+ struct nfs3_fh newfh = {{0},};
+
+ if (!carg) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Invalid argument,"
+ " carg value NULL");
+ return EINVAL;
+ }
+
+ cs = (nfs3_call_state_t *)carg;
+ nfs3_check_fh_resolve_status (cs, stat, nfs3err);
+ cs->parent = cs->resolvefh;
+
+ if (cs->hardresolved) {
+ stat = NFS3_OK;
+ nfs3_fh_build_child_fh (&cs->parent, &cs->stbuf, &newfh);
+ goto nfs3err;
+ }
+
+ nfs_request_user_init (&nfu, cs->req);
+ ret = nfs_lookup (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
+ nfs3svc_lookup_cbk, cs);
+ if (ret < 0)
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+
+nfs3err:
+ if (ret < 0) {
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req), NFS3_LOOKUP,
+ stat, -ret);
+ nfs3_lookup_reply (cs->req, stat, &newfh, &cs->stbuf,
+ &cs->postparent);
+ nfs3_call_state_wipe (cs);
+ }
+
+ return ret;
+}
+
+
+int
+nfs3_lookup (rpcsvc_request_t *req, struct nfs3_fh *fh, int fhlen, char *name)
+{
+ xlator_t *vol = NULL;
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int ret = -EFAULT;
+ struct nfs3_state *nfs3 = NULL;
+ nfs3_call_state_t *cs = NULL;
+
+ GF_VALIDATE_OR_GOTO (GF_NFS3, req, out);
+ GF_VALIDATE_OR_GOTO (GF_NFS3, fh, out);
+ GF_VALIDATE_OR_GOTO (GF_NFS3, name, out);
+
+ nfs3_log_fh_entry_call (rpcsvc_request_xid (req), "LOOKUP", fh,
+ name);
+ nfs3_validate_nfs3_state (req, nfs3, stat, nfs3err, ret);
+ if (nfs3_solaris_zerolen_fh (fh, fhlen))
+ nfs3_funge_solaris_zerolen_fh (nfs3, fh, name, stat, nfs3err);
+ else
+ nfs3_validate_gluster_fh (fh, stat, nfs3err);
+ nfs3_validate_strlen_or_goto (name, NFS_NAME_MAX, nfs3err, stat, ret);
+ nfs3_map_fh_to_volume (nfs3, fh, req, vol, stat, nfs3err);
+ nfs3_volume_started_check (nfs3, vol, ret, out);
+ nfs3_handle_call_state_init (nfs3, cs, req, vol, stat, nfs3err);
+
+ cs->lookuptype = GF_NFS3_REVALIDATE;
+ ret = nfs3_fh_resolve_and_resume (cs, fh, name,
+ nfs3_lookup_resume);
+
+ if (ret < 0) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "failed to start hard reslove");
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+ }
+
+nfs3err:
+ if (ret < 0) {
+ nfs3_log_common_res (rpcsvc_request_xid (req), NFS3_LOOKUP,
+ stat,
+ -ret);
+ nfs3_lookup_reply (req, stat, NULL, NULL, NULL);
+ nfs3_call_state_wipe (cs);
+ /* Ret must be 0 after this so that the caller does not
+ * also send an RPC reply.
+ */
+ ret = 0;
+ }
+out:
+ return ret;
+}
+
+
+int
+nfs3svc_lookup (rpcsvc_request_t *req)
+{
+ char name[NFS_PATH_MAX];
+ struct nfs3_fh fh = {{0}, };
+ lookup3args args;
+ int ret = RPCSVC_ACTOR_ERROR;
+
+ GF_VALIDATE_OR_GOTO (GF_NFS, req, rpcerr);
+
+ nfs3_prep_lookup3args (&args, &fh, name);
+ if (xdr_to_lookup3args (req->msg[0], &args) <= 0) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Error decoding args");
+ rpcsvc_request_seterr (req, GARBAGE_ARGS);
+ goto rpcerr;
+ }
+
+ ret = nfs3_lookup (req, &fh, args.what.dir.data.data_len, name);
+ if ((ret < 0) && (ret != RPCSVC_ACTOR_IGNORE)) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "LOOKUP procedure failed");
+ rpcsvc_request_seterr (req, SYSTEM_ERR);
+ ret = RPCSVC_ACTOR_ERROR;
+ }
+
+rpcerr:
+ return ret;
+}
+
+
+int
+nfs3_access_reply (rpcsvc_request_t *req, nfsstat3 status, int32_t accbits,
+ int32_t reqaccbits)
+{
+ access3res res;
+
+ nfs3_fill_access3res (&res, status, accbits, reqaccbits);
+ nfs3svc_submit_reply (req, &res,
+ (nfs3_serializer)xdr_serialize_access3res);
+ return 0;
+}
+
+
+int32_t
+nfs3svc_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ nfsstat3 status = NFS3_OK;
+ nfs3_call_state_t *cs = NULL;
+
+ cs = frame->local;
+
+ if (op_ret == -1) {
+ gf_log (GF_NFS, GF_LOG_WARNING,
+ "%x: %s => -1 (%s)", rpcsvc_request_xid (cs->req),
+ cs->resolvedloc.path, strerror (op_errno));
+ status = nfs3_cbk_errno_status (op_ret, op_errno);
+ }
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req), NFS3_ACCESS, status,
+ op_errno);
+ nfs3_access_reply (cs->req, status, op_errno, cs->accessbits);
+ nfs3_call_state_wipe (cs);
+
+ return 0;
+}
+
+int
+nfs3_access_resume (void *carg)
+{
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int ret = -EFAULT;
+ nfs_user_t nfu = {0, };
+ nfs3_call_state_t *cs = NULL;
+
+ if (!carg) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Invalid argument,"
+ " carg value NULL");
+ return EINVAL;
+ }
+
+ cs = (nfs3_call_state_t *)carg;
+ nfs3_check_fh_resolve_status (cs, stat, nfs3err);
+ cs->fh = cs->resolvefh;
+ nfs_request_user_init (&nfu, cs->req);
+ ret = nfs_access (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
+ cs->accessbits, nfs3svc_access_cbk, cs);
+ if (ret < 0)
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+
+nfs3err:
+ if (ret < 0) {
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req), NFS3_ACCESS,
+ stat, -ret);
+ nfs3_access_reply (cs->req, stat, 0, 0);
+ nfs3_call_state_wipe (cs);
+ ret = 0;
+ }
+
+ return ret;
+}
+
+
+int
+nfs3_access (rpcsvc_request_t *req, struct nfs3_fh *fh, uint32_t accbits)
+{
+ xlator_t *vol = NULL;
+ struct nfs3_state *nfs3 = NULL;
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int ret = -EFAULT;
+ nfs3_call_state_t *cs = NULL;
+
+ GF_VALIDATE_OR_GOTO (GF_NFS, req, out);
+ GF_VALIDATE_OR_GOTO (GF_NFS, fh, out);
+ nfs3_log_common_call (rpcsvc_request_xid (req), "ACCESS", fh);
+ nfs3_validate_gluster_fh (fh, stat, nfs3err);
+ nfs3_validate_nfs3_state (req, nfs3, stat, nfs3err, ret);
+ nfs3_map_fh_to_volume (nfs3, fh, req, vol, stat, nfs3err);
+ nfs3_volume_started_check (nfs3, vol, ret, out);
+ nfs3_handle_call_state_init (nfs3, cs, req, vol, stat, nfs3err);
+ cs->accessbits = accbits;
+
+ ret = nfs3_fh_resolve_and_resume (cs, fh, NULL, nfs3_access_resume);
+ if (ret < 0)
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+
+nfs3err:
+ if (ret < 0) {
+ nfs3_log_common_res (rpcsvc_request_xid (req), NFS3_ACCESS,
+ stat, -ret);
+ nfs3_access_reply (req, stat, 0, 0);
+ nfs3_call_state_wipe (cs);
+ ret = 0;
+ }
+out:
+ return ret;
+}
+
+
+int
+nfs3svc_access (rpcsvc_request_t *req)
+{
+ struct nfs3_fh fh = {{0}, };
+ access3args args;
+ int ret = RPCSVC_ACTOR_ERROR;
+
+ if (!req)
+ return ret;
+
+ nfs3_prep_access3args (&args, &fh);
+ if (xdr_to_access3args (req->msg[0], &args) <= 0) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Error decoding args");
+ rpcsvc_request_seterr (req, GARBAGE_ARGS);
+ goto rpcerr;
+ }
+
+ ret = nfs3_access (req, &fh, args.access);
+ if ((ret < 0) && (ret != RPCSVC_ACTOR_IGNORE)) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "ACCESS procedure failed");
+ rpcsvc_request_seterr (req, SYSTEM_ERR);
+ ret = RPCSVC_ACTOR_ERROR;
+ }
+
+rpcerr:
+ return ret;
+}
+
+
+int
+nfs3_readlink_reply (rpcsvc_request_t *req, nfsstat3 stat, char *path,
+ struct iatt *buf)
+{
+ readlink3res res = {0, };
+ uint64_t deviceid = 0;
+
+ deviceid = nfs3_request_xlator_deviceid (req);
+ nfs3_fill_readlink3res (&res, stat, path, buf, deviceid);
+ nfs3svc_submit_reply (req, (void *)&res,
+ (nfs3_serializer)xdr_serialize_readlink3res);
+
+ return 0;
+}
+
+
+int32_t
+nfs3svc_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, const char *path,
+ struct iatt *buf, dict_t *xdata)
+{
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ nfs3_call_state_t *cs = NULL;
+
+ cs = frame->local;
+ if (op_ret == -1) {
+ gf_log (GF_NFS, GF_LOG_WARNING,
+ "%x: %s => -1 (%s)", rpcsvc_request_xid (cs->req),
+ cs->resolvedloc.path, strerror (op_errno));
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
+ goto nfs3err;
+ }
+
+ stat = NFS3_OK;
+
+nfs3err:
+ nfs3_log_readlink_res (rpcsvc_request_xid (cs->req), stat, op_errno,
+ (char *)path);
+ nfs3_readlink_reply (cs->req, stat, (char *)path, buf);
+ nfs3_call_state_wipe (cs);
+
+ return 0;
+}
+
+
+int
+nfs3_readlink_resume (void *carg)
+{
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int ret = -EFAULT;
+ nfs3_call_state_t *cs = NULL;
+ nfs_user_t nfu = {0, };
+
+ if (!carg)
+ return ret;
+
+ cs = (nfs3_call_state_t *)carg;
+ nfs3_check_fh_resolve_status (cs, stat, nfs3err);
+ nfs_request_user_init (&nfu, cs->req);
+ ret = nfs_readlink (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
+ nfs3svc_readlink_cbk, cs);
+ if (ret < 0)
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+
+nfs3err:
+ if (ret < 0) {
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_READLINK, stat, -ret);
+ nfs3_readlink_reply (cs->req, stat, NULL, NULL);
+ nfs3_call_state_wipe (cs);
+ }
+
+ return ret;
+}
+
+
+int
+nfs3_readlink (rpcsvc_request_t *req, struct nfs3_fh *fh)
+{
+ xlator_t *vol = NULL;
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int ret = -EFAULT;
+ struct nfs3_state *nfs3 = NULL;
+ nfs3_call_state_t *cs = NULL;
+
+ if ((!req) || (!fh)) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Bad arguments");
+ return -1;
+ }
+
+ nfs3_log_common_call (rpcsvc_request_xid (req), "READLINK", fh);
+ nfs3_validate_gluster_fh (fh, stat, nfs3err);
+ nfs3_validate_nfs3_state (req, nfs3, stat, nfs3err, ret);
+ nfs3_map_fh_to_volume (nfs3, fh, req, vol, stat, nfs3err);
+ nfs3_volume_started_check (nfs3, vol, ret, out);
+ nfs3_handle_call_state_init (nfs3, cs, req, vol, stat, nfs3err);
+
+ ret = nfs3_fh_resolve_and_resume (cs, fh, NULL, nfs3_readlink_resume);
+ if (ret < 0)
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+
+nfs3err:
+ if (ret < 0) {
+ nfs3_log_common_res (rpcsvc_request_xid (req), NFS3_READLINK,
+ stat, -ret);
+ nfs3_readlink_reply (req, stat, NULL, NULL);
+ nfs3_call_state_wipe (cs);
+ /* Ret must be 0 after this so that the caller does not
+ * also send an RPC reply.
+ */
+ ret = 0;
+ }
+out:
+ return ret;
+}
+
+
+int
+nfs3svc_readlink (rpcsvc_request_t *req)
+{
+ struct nfs3_fh fh = {{0}, };
+ readlink3args args;
+ int ret = RPCSVC_ACTOR_ERROR;
+
+ if (!req)
+ return ret;
+
+ nfs3_prep_readlink3args (&args, &fh);
+ if (xdr_to_readlink3args (req->msg[0], &args) <= 0) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Error decoding args");
+ rpcsvc_request_seterr (req, GARBAGE_ARGS);
+ goto rpcerr;
+ }
+
+ ret = nfs3_readlink (req, &fh);
+ if ((ret < 0) && (ret != RPCSVC_ACTOR_IGNORE)) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "READLINK procedure failed");
+ rpcsvc_request_seterr (req, SYSTEM_ERR);
+ ret = RPCSVC_ACTOR_ERROR;
+ }
+
+rpcerr:
+ return ret;
+}
+
+
+int
+nfs3_read_reply (rpcsvc_request_t *req, nfsstat3 stat, count3 count,
+ struct iovec *vec, int vcount, struct iobref *iobref,
+ struct iatt *poststat, int is_eof)
+{
+ read3res res = {0, };
+ uint64_t deviceid = 0;
+
+ deviceid = nfs3_request_xlator_deviceid (req);
+ nfs3_fill_read3res (&res, stat, count, poststat, is_eof, deviceid);
+ if (stat == NFS3_OK) {
+ xdr_vector_round_up (vec, vcount, count);
+ /* iob can be zero if the file size was zero. If so, op_ret
+ * would be 0 and count = 0.
+ */
+
+ if (count != 0) {
+ nfs3svc_submit_vector_reply (req, (void *)&res,
+ (nfs3_serializer)
+ xdr_serialize_read3res_nocopy,
+ vec, vcount, iobref);
+ } else
+
+ nfs3svc_submit_reply (req, (void *)&res,
+ (nfs3_serializer)
+ xdr_serialize_read3res_nocopy);
+ } else
+ nfs3svc_submit_reply (req, (void *)&res,
+ (nfs3_serializer)
+ xdr_serialize_read3res_nocopy);
+
+ return 0;
+}
+
+
+int32_t
+nfs3svc_read_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata)
+{
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int is_eof = 0;
+ nfs3_call_state_t *cs = NULL;
+
+ cs = frame->local;
+ if (op_ret == -1) {
+ gf_log (GF_NFS, GF_LOG_WARNING,
+ "%x: %s => -1 (%s)", rpcsvc_request_xid (cs->req),
+ cs->resolvedloc.path, strerror (op_errno));
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
+ goto err;
+ } else
+ stat = NFS3_OK;
+
+ if (op_errno == ENOENT)
+ is_eof = 1;
+
+err:
+ nfs3_log_read_res (rpcsvc_request_xid (cs->req), stat, op_errno,
+ op_ret, is_eof, vector, count);
+ nfs3_read_reply (cs->req, stat, op_ret, vector, count, iobref, stbuf,
+ is_eof);
+ nfs3_call_state_wipe (cs);
+
+ return 0;
+}
+
+
+int
+nfs3_read_fd_resume (void *carg)
+{
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int ret = -EFAULT;
+ nfs_user_t nfu = {0, };
+ nfs3_call_state_t *cs = NULL;
+
+ if (!carg)
+ return ret;
+
+ cs = (nfs3_call_state_t *)carg;
+ nfs3_check_fh_resolve_status (cs, stat, nfs3err);
+ nfs_request_user_init (&nfu, cs->req);
+ ret = nfs_read (cs->nfsx, cs->vol, &nfu, cs->fd, cs->datacount,
+ cs->dataoffset, nfs3svc_read_cbk, cs);
+ if (ret < 0)
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+nfs3err:
+ if (ret < 0) {
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req), NFS3_READ,
+ stat, -ret);
+ nfs3_read_reply (cs->req, stat, 0, NULL, 0, NULL, NULL, 0);
+ nfs3_call_state_wipe (cs);
+ }
+
+ return ret;
+}
+
+
+int
+nfs3_read_resume (void *carg)
+{
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int ret = -EFAULT;
+ nfs3_call_state_t *cs = NULL;
+ fd_t *fd = NULL;
+
+ if (!carg)
+ return ret;
+
+ cs = (nfs3_call_state_t *)carg;
+ nfs3_check_fh_resolve_status (cs, stat, nfs3err);
+ fd = fd_anonymous (cs->resolvedloc.inode);
+ if (!fd) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to create anonymous fd");
+ goto nfs3err;
+ }
+
+ cs->fd = fd;
+ nfs3_read_fd_resume (cs);
+ ret = 0;
+nfs3err:
+ if (ret < 0) {
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req), NFS3_READ,
+ stat, -ret);
+ nfs3_read_reply (cs->req, stat, 0, NULL,0, NULL, NULL, 0);
+ nfs3_call_state_wipe (cs);
+ }
+
+ return ret;
+}
+
+int
+nfs3_read (rpcsvc_request_t *req, struct nfs3_fh *fh, offset3 offset,
+ count3 count)
+{
+ xlator_t *vol = NULL;
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int ret = -EFAULT;
+ struct nfs3_state *nfs3 = NULL;
+ nfs3_call_state_t *cs = NULL;
+
+ if ((!req) || (!fh)) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Bad arguments");
+ return -1;
+ }
+
+ nfs3_log_rw_call (rpcsvc_request_xid (req), "READ", fh, offset,
+ count, -1);
+ nfs3_validate_gluster_fh (fh, stat, nfs3err);
+ nfs3_validate_nfs3_state (req, nfs3, stat, nfs3err, ret);
+ nfs3_map_fh_to_volume (nfs3, fh, req, vol, stat, nfs3err);
+ nfs3_volume_started_check (nfs3, vol, ret, out);
+ nfs3_handle_call_state_init (nfs3, cs, req, vol, stat, nfs3err);
+
+ cs->datacount = count;
+ cs->dataoffset = offset;
+ ret = nfs3_fh_resolve_and_resume (cs, fh, NULL, nfs3_read_resume);
+ if (ret < 0)
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+
+nfs3err:
+ if (ret < 0) {
+ nfs3_log_common_res (rpcsvc_request_xid (req), NFS3_READ, stat,
+ -ret);
+ nfs3_read_reply (req, stat, 0, NULL,0, NULL, NULL, 0);
+ nfs3_call_state_wipe (cs);
+ ret = 0;
+ }
+out:
+ return ret;
+}
+
+
+int
+nfs3svc_read (rpcsvc_request_t *req)
+{
+ struct nfs3_fh fh = {{0}, };
+ read3args args;
+ int ret = RPCSVC_ACTOR_ERROR;
+
+ if (!req)
+ return ret;
+
+ nfs3_prep_read3args (&args, &fh);
+ if (xdr_to_read3args (req->msg[0], &args) <= 0) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Error decoding args");
+ rpcsvc_request_seterr (req, GARBAGE_ARGS);
+ goto rpcerr;
+ }
+
+ ret = nfs3_read (req, &fh, args.offset, args.count);
+ if ((ret < 0) && (ret != RPCSVC_ACTOR_IGNORE)) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "READ procedure failed");
+ rpcsvc_request_seterr (req, SYSTEM_ERR);
+ ret = RPCSVC_ACTOR_ERROR;
+ }
+
+rpcerr:
+ return ret;
+}
+
+
+int
+nfs3_write_reply (rpcsvc_request_t *req, nfsstat3 stat, count3 count,
+ stable_how stable, uint64_t wverf, struct iatt *prestat,
+ struct iatt *poststat)
+{
+ write3res res = {0, };
+ uint64_t deviceid = 0;
+
+ deviceid = nfs3_request_xlator_deviceid (req);
+ nfs3_fill_write3res (&res, stat, count, stable, wverf, prestat,
+ poststat, deviceid);
+ nfs3svc_submit_reply (req, (void *)&res,
+ (nfs3_serializer)xdr_serialize_write3res);
+
+ return 0;
+}
+
+int32_t
+nfs3svc_write_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ struct nfs3_state *nfs3 = NULL;
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ nfs3_call_state_t *cs = NULL;
+
+ cs = frame->local;
+ nfs3 = rpcsvc_request_program_private (cs->req);
+
+ if (op_ret == -1) {
+ gf_log (GF_NFS, GF_LOG_WARNING,
+ "%x: %s => -1 (%s)", rpcsvc_request_xid (cs->req),
+ cs->resolvedloc.path, strerror (op_errno));
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
+ } else
+ stat = NFS3_OK;
+
+ nfs3_log_write_res (rpcsvc_request_xid (cs->req), stat, op_errno,
+ cs->maxcount, cs->writetype, nfs3->serverstart);
+ nfs3_write_reply (cs->req, stat, cs->maxcount, cs->writetype,
+ nfs3->serverstart, &cs->stbuf, postbuf);
+ nfs3_call_state_wipe (cs);
+ return 0;
+}
+
+
+
+/*
+ * Before going into the write reply logic, here is a matrix that shows the
+ * requirements for a write reply as given by RFC1813.
+ *
+ * Requested Write Type || Possible Returns
+ * ==============================================
+ * FILE_SYNC || FILE_SYNC
+ * DATA_SYNC || DATA_SYNC or FILE_SYNC
+ * UNSTABLE || DATA_SYNC or FILE_SYNC or UNSTABLE
+ *
+ * Write types other than UNSTABLE are together called STABLE.
+ * RS - Return Stable
+ * RU - Return Unstable
+ * WS - Write Stable
+ * WU - Write Unstable
+ *
+ *+============================================+
+ *| Vol Opts -> || trusted-write| trusted-sync |
+ *| Write Type || | |
+ *|-------------||--------------|--------------|
+ *| STABLE || WS | WU |
+ *| || RS | RS |
+ *|-------------||--------------|--------------|
+ *| UNSTABLE || WU | WU |
+ *| || RS | RS |
+ *|-------------||--------------|--------------|
+ *| COMMIT || fsync | getattr |
+ *+============================================+
+ *
+ *
+ */
+int32_t
+nfs3svc_write_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ nfs3_call_state_t *cs = NULL;
+ struct nfs3_state *nfs3 = NULL;
+
+ cs = frame->local;
+ nfs3 = rpcsvc_request_program_private (cs->req);
+ if (op_ret == -1) {
+ gf_log (GF_NFS, GF_LOG_WARNING,
+ "%x: %s => -1 (%s)", rpcsvc_request_xid (cs->req),
+ cs->resolvedloc.path, strerror (op_errno));
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
+ goto err;
+ }
+
+ stat = NFS3_OK;
+ cs->maxcount = op_ret;
+
+err:
+ nfs3_log_write_res (rpcsvc_request_xid (cs->req), stat,
+ op_errno, cs->maxcount, cs->writetype,
+ nfs3->serverstart);
+ nfs3_write_reply (cs->req, stat, cs->maxcount,
+ cs->writetype, nfs3->serverstart, prebuf,
+ postbuf);
+ nfs3_call_state_wipe (cs);
+
+ return 0;
+}
+
+
+int
+__nfs3_write_resume (nfs3_call_state_t *cs)
+{
+ int ret = -EFAULT;
+ nfs_user_t nfu = {0, };
+
+ if (!cs)
+ return ret;
+
+ nfs_request_user_init (&nfu, cs->req);
+ /* It is possible that the RPC record contains more bytes than
+ * than the size of write requested in this request. This means,
+ * that in the RPC message buffer, there could be more bytes
+ * beyind the @count bytes. Since @payload is referring to the write
+ * data directly inside the RPC request buffer(..since we performed a
+ * no-copy deXDRing..), we might end up writing more data than
+ * requested, because till now payload.iov_len accounts for all the
+ * bytes not just the write request bytes. These extra bytes are present
+ * as a requirement of the XDR encoding to round up the all string and
+ * opaque data buffers to multiples of 4 bytes.
+ */
+ cs->datavec.iov_len = cs->datacount;
+ ret = nfs_write (cs->nfsx, cs->vol, &nfu, cs->fd, cs->iobref,
+ &cs->datavec, 1, cs->dataoffset, nfs3svc_write_cbk,
+ cs);
+
+ return ret;
+}
+
+
+int
+nfs3_write_resume (void *carg)
+{
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int ret = -EFAULT;
+ nfs3_call_state_t *cs = NULL;
+ fd_t *fd = NULL;
+
+ if (!carg)
+ return ret;
+
+ cs = (nfs3_call_state_t *)carg;
+ nfs3_check_fh_resolve_status (cs, stat, nfs3err);
+ fd = fd_anonymous (cs->resolvedloc.inode);
+ if (!fd) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to create anonymous fd");
+ goto nfs3err;
+ }
+
+ cs->fd = fd; /* Gets unrefd when the call state is wiped. */
+
+/*
+ enum stable_how {
+ UNSTABLE = 0,
+ DATA_SYNC = 1,
+ FILE_SYNC = 2,
+ };
+*/
+ switch (cs->writetype) {
+ case UNSTABLE:
+ break;
+ case DATA_SYNC:
+ fd->flags |= O_DSYNC;
+ break;
+ case FILE_SYNC:
+ fd->flags |= O_SYNC;
+ break;
+ }
+
+ ret = __nfs3_write_resume (cs);
+ if (ret < 0)
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+nfs3err:
+ if (ret < 0) {
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req), NFS3_WRITE,
+ stat, -ret);
+ nfs3_write_reply (cs->req, stat, 0, cs->writetype, 0, NULL,
+ NULL);
+ nfs3_call_state_wipe (cs);
+ }
+ return ret;
+}
+
+
+int
+nfs3_write (rpcsvc_request_t *req, struct nfs3_fh *fh, offset3 offset,
+ count3 count, stable_how stable, struct iovec payload,
+ struct iobref *iobref)
+{
+ xlator_t *vol = NULL;
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int ret = -EFAULT;
+ struct nfs3_state *nfs3 = NULL;
+ nfs3_call_state_t *cs = NULL;
+
+ if ((!req) || (!fh) || (!payload.iov_base)) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Bad arguments");
+ return -1;
+ }
+
+ nfs3_log_rw_call (rpcsvc_request_xid (req), "WRITE", fh, offset,
+ count, stable);
+ nfs3_validate_gluster_fh (fh, stat, nfs3err);
+ nfs3_validate_nfs3_state (req, nfs3, stat, nfs3err, ret);
+ nfs3_map_fh_to_volume (nfs3, fh, req, vol, stat, nfs3err);
+ nfs3_volume_started_check (nfs3, vol, ret, out);
+ nfs3_check_rw_volaccess (nfs3, fh->exportid, stat, nfs3err);
+ nfs3_handle_call_state_init (nfs3, cs, req, vol, stat, nfs3err);
+ cs->datacount = count;
+ cs->dataoffset = offset;
+ cs->writetype = stable;
+ cs->iobref = iobref;
+ cs->datavec = payload;
+
+
+ ret = nfs3_fh_resolve_and_resume (cs, fh, NULL, nfs3_write_resume);
+ if (ret < 0)
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+
+nfs3err:
+ if (ret < 0) {
+ nfs3_log_common_res (rpcsvc_request_xid (req), NFS3_WRITE,
+ stat, -ret);
+ nfs3_write_reply (req, stat, 0, stable, 0, NULL, NULL);
+ nfs3_call_state_wipe (cs);
+ ret = 0;
+ }
+out:
+ return ret;
+}
+
+#define NFS3_VECWRITE_READFHLEN 1
+#define NFS3_VECWRITE_READFH 2
+#define NFS3_VECWRITE_READREST 3
+
+#define NFS3_WRITE_POSTFH_SIZE 20
+
+
+int
+nfs3svc_write_vecsizer (int state, ssize_t *readsize, char *base_addr,
+ char *curr_addr)
+{
+ int ret = 0;
+ uint32_t fhlen = 0;
+ uint32_t fhlen_n = 0;
+
+ if (state == 0) {
+ ret = NFS3_VECWRITE_READFHLEN;
+ *readsize = 4;
+ } else if (state == NFS3_VECWRITE_READFHLEN) {
+ fhlen_n = *(uint32_t *)(curr_addr - 4);
+ fhlen = ntohl (fhlen_n);
+ *readsize = xdr_length_round_up (fhlen, NFS3_FHSIZE);
+ ret = NFS3_VECWRITE_READFH;
+ } else if (state == NFS3_VECWRITE_READFH) {
+ *readsize = NFS3_WRITE_POSTFH_SIZE;
+ ret = NFS3_VECWRITE_READREST;
+ } else if (state == NFS3_VECWRITE_READREST) {
+ ret = 0;
+ *readsize = 0;
+ } else
+ gf_log ("nfs", GF_LOG_ERROR, "state wrong");
+
+ return ret;
+}
+
+
+int
+nfs3svc_write (rpcsvc_request_t *req)
+{
+ struct nfs3_fh fh = {{0}, };
+ write3args args;
+ int ret = RPCSVC_ACTOR_ERROR;
+
+ if (!req)
+ return ret;
+ nfs3_prep_write3args (&args, &fh);
+ if (xdr_to_write3args (req->msg[0], &args) <= 0) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Error decoding args");
+ rpcsvc_request_seterr (req, GARBAGE_ARGS);
+ goto rpcerr;
+ }
+
+ /* To ensure that the iobuf for the current record does not
+ * get returned to the iobpool, we need to keep a reference for
+ * ourselves because the RPC call handler who called us will unref its
+ * own ref of the record's iobuf when it is done handling the request.
+ */
+
+ ret = nfs3_write (req, &fh, args.offset, args.count, args.stable,
+ req->msg[1], rpcsvc_request_iobref_ref (req));
+ if ((ret < 0) && (ret != RPCSVC_ACTOR_IGNORE)) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "WRITE procedure failed");
+ rpcsvc_request_seterr (req, SYSTEM_ERR);
+ ret = RPCSVC_ACTOR_ERROR;
+ }
+
+rpcerr:
+ return ret;
+}
+
+
+int
+nfs3_create_reply (rpcsvc_request_t *req, nfsstat3 stat, struct nfs3_fh *newfh,
+ struct iatt *newbuf, struct iatt *preparent,
+ struct iatt *postparent)
+{
+ create3res res = {0, };
+ uint64_t deviceid = 0;
+
+ deviceid = nfs3_request_xlator_deviceid (req);
+ nfs3_fill_create3res (&res, stat, newfh, newbuf, preparent, postparent,
+ deviceid);
+ nfs3svc_submit_reply (req, (void *)&res,
+ (nfs3_serializer)xdr_serialize_create3res);
+ return 0;
+}
+
+
+int32_t
+nfs3svc_create_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
+{
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ nfs3_call_state_t *cs = NULL;
+
+ cs = frame->local;
+ if (op_ret == -1) {
+ gf_log (GF_NFS, GF_LOG_WARNING,
+ "%x: %s => -1 (%s)", rpcsvc_request_xid (cs->req),
+ cs->resolvedloc.path, strerror (op_errno));
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
+ goto nfs3err;
+ }
+
+ stat = NFS3_OK;
+nfs3err:
+ nfs3_log_newfh_res (rpcsvc_request_xid (cs->req), NFS3_CREATE, stat,
+ op_errno, &cs->fh);
+ nfs3_create_reply (cs->req, stat, &cs->fh, postop, &cs->preparent,
+ &cs->postparent);
+ nfs3_call_state_wipe (cs);
+
+ return 0;
+}
+
+
+int32_t
+nfs3svc_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int ret = -EFAULT;
+ nfs_user_t nfu = {0, };
+ nfs3_call_state_t *cs = NULL;
+ inode_t *oldinode = NULL;
+
+ cs = frame->local;
+ if (op_ret == -1) {
+ gf_log (GF_NFS, GF_LOG_WARNING,
+ "%x: %s => -1 (%s)", rpcsvc_request_xid (cs->req),
+ cs->resolvedloc.path, strerror (op_errno));
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
+ goto nfs3err;
+ }
+
+ nfs3_fh_build_child_fh (&cs->parent, buf, &cs->fh);
+ oldinode = inode_link (inode, cs->resolvedloc.parent,
+ cs->resolvedloc.name, buf);
+
+ /* Means no attributes were required to be set. */
+ if (!cs->setattr_valid) {
+ stat = NFS3_OK;
+ ret = -1;
+ goto nfs3err;
+ }
+
+ cs->preparent = *preparent;
+ cs->postparent = *postparent;
+ nfs_request_user_init (&nfu, cs->req);
+ uuid_copy (cs->resolvedloc.gfid, inode->gfid);
+ ret = nfs_setattr (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,&cs->stbuf,
+ cs->setattr_valid, nfs3svc_create_setattr_cbk, cs);
+ if (ret < 0)
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+
+nfs3err:
+ if (oldinode) {
+ inode_lookup (oldinode);
+ inode_unref (oldinode);
+ }
+
+ if (ret < 0) {
+ nfs3_log_newfh_res (rpcsvc_request_xid (cs->req), NFS3_CREATE,
+ stat, op_errno, &cs->fh);
+ nfs3_create_reply (cs->req, stat, &cs->fh, buf, preparent,
+ postparent);
+ nfs3_call_state_wipe (cs);
+ }
+
+ return 0;
+}
+
+int
+nfs3_create_common (nfs3_call_state_t *cs)
+{
+ int ret = -EFAULT;
+ int flags = 0;
+ nfs_user_t nfu = {0, };
+ uid_t uid = 0;
+ gid_t gid = 0;
+
+ if (!cs)
+ return ret;
+
+ if (cs->createmode == GUARDED)
+ flags = (O_RDWR | O_EXCL);
+ else
+ flags = O_RDWR;
+
+ if (gf_attr_uid_set (cs->setattr_valid)) {
+ uid = cs->stbuf.ia_uid;
+ cs->setattr_valid &= ~GF_SET_ATTR_UID;
+ } else
+ uid = rpcsvc_request_uid (cs->req);
+
+ if (gf_attr_gid_set (cs->setattr_valid)) {
+ gid = cs->stbuf.ia_gid;
+ cs->setattr_valid &= ~GF_SET_ATTR_GID;
+ } else
+ gid = rpcsvc_request_gid (cs->req);
+
+ nfs_request_primary_user_init (&nfu, cs->req, uid, gid);
+ /* We can avoid sending the setattr call later if only the mode is
+ * required to be set. This is possible because the create fop allows
+ * us to specify a mode arg.
+ */
+ if (cs->setattr_valid & GF_SET_ATTR_MODE) {
+ cs->setattr_valid &= ~GF_SET_ATTR_MODE;
+ ret = nfs_create (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
+ flags, cs->mode, nfs3svc_create_cbk, cs);
+ } else
+ ret = nfs_create (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
+ flags, NFS_DEFAULT_CREATE_MODE,
+ nfs3svc_create_cbk, cs);
+
+ return ret;
+}
+
+
+int32_t
+nfs3svc_create_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ int ret = -EFAULT;
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ nfs_user_t nfu = {0, };
+ nfs3_call_state_t *cs = NULL;
+
+ cs = frame->local;
+ nfs_request_user_init (&nfu, cs->req);
+ if (op_ret == -1) {
+ gf_log (GF_NFS, GF_LOG_WARNING,
+ "%x: %s => -1 (%s)", rpcsvc_request_xid (cs->req),
+ cs->resolvedloc.path, strerror (op_errno));
+ ret = -op_errno;
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
+ goto nfs3err;
+ }
+
+ if ((cs->stbuf.ia_mtime == buf->ia_mtime) &&
+ (cs->stbuf.ia_atime == buf->ia_atime)) {
+ gf_log (GF_NFS3, GF_LOG_DEBUG,
+ "Create req retransmitted verf %x %x",
+ cs->stbuf.ia_mtime, cs->stbuf.ia_atime);
+ stat = NFS3_OK;
+ nfs3_fh_build_child_fh (&cs->parent, buf, &cs->fh);
+ } else {
+ gf_log (GF_NFS3, GF_LOG_DEBUG,
+ "File already exist new_verf %x %x"
+ "old_verf %x %x", cs->stbuf.ia_mtime,
+ cs->stbuf.ia_atime,
+ buf->ia_mtime, buf->ia_atime);
+ stat = NFS3ERR_EXIST;
+ }
+
+nfs3err:
+ if (ret < 0) {
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req), NFS3_CREATE,
+ stat, op_errno);
+ nfs3_create_reply (cs->req, stat, &cs->fh, buf, NULL, NULL);
+ nfs3_call_state_wipe (cs);
+ }
+
+ return 0;
+}
+
+
+int
+nfs3_create_exclusive (nfs3_call_state_t *cs)
+{
+ int ret = -EFAULT;
+ nfs_user_t nfu = {0, };
+
+ if (!cs)
+ return ret;
+
+ /* Storing verifier as a mtime and atime attribute, to store it
+ * in stable storage */
+ memcpy (&cs->stbuf.ia_atime, &cs->cookieverf,
+ sizeof (cs->stbuf.ia_atime));
+ memcpy (&cs->stbuf.ia_mtime,
+ ((char *) &cs->cookieverf) + sizeof (cs->stbuf.ia_atime),
+ sizeof (cs->stbuf.ia_mtime));
+ cs->setattr_valid |= GF_SET_ATTR_ATIME;
+ cs->setattr_valid |= GF_SET_ATTR_MTIME;
+ nfs_request_user_init (&nfu, cs->req);
+
+ /* If the file already existed we need to get that attributes so we can
+ * compare and check whether a previous create operation was
+ * interrupted due to server failure or dropped packets.
+ */
+ if ((cs->resolve_ret == 0) ||
+ ((cs->resolve_ret == -1) && (cs->resolve_errno != ENOENT))) {
+ ret = nfs_stat (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
+ nfs3svc_create_stat_cbk, cs);
+ goto nfs3err;
+ }
+
+ ret = nfs3_create_common (cs);
+nfs3err:
+ return ret;
+}
+
+
+int
+nfs3_create_resume (void *carg)
+{
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int ret = -EFAULT;
+ nfs3_call_state_t *cs = NULL;
+
+ if (!carg)
+ return ret;
+
+ cs = (nfs3_call_state_t *)carg;
+ nfs3_check_new_fh_resolve_status (cs, stat, nfs3err);
+ if (cs->createmode == EXCLUSIVE)
+ ret = nfs3_create_exclusive (cs);
+ else
+ ret = nfs3_create_common (cs);
+
+ /* Handle a failure return from either of the create functions above. */
+ if (ret < 0)
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+
+nfs3err:
+ if (ret < 0) {
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req), NFS3_CREATE,
+ stat, -ret);
+ nfs3_create_reply (cs->req, stat, NULL, NULL, NULL, NULL);
+ nfs3_call_state_wipe (cs);
+ }
+
+ return ret;
+}
+
+int
+nfs3_create (rpcsvc_request_t *req, struct nfs3_fh *dirfh, char *name,
+ createmode3 mode, sattr3 *sattr, uint64_t cverf)
+{
+ xlator_t *vol = NULL;
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int ret = -EFAULT;
+ struct nfs3_state *nfs3 = NULL;
+ nfs3_call_state_t *cs = NULL;
+
+ if ((!req) || (!dirfh) || (!name) || (!sattr))
+ return -1;
+
+ nfs3_log_create_call (rpcsvc_request_xid (req), dirfh, name, mode);
+ nfs3_validate_gluster_fh (dirfh, stat, nfs3err);
+ nfs3_validate_nfs3_state (req, nfs3, stat, nfs3err, ret);
+ nfs3_validate_strlen_or_goto (name, NFS_NAME_MAX, nfs3err, stat, ret);
+ nfs3_map_fh_to_volume (nfs3, dirfh, req, vol, stat, nfs3err);
+ nfs3_volume_started_check (nfs3, vol, ret, out);
+ nfs3_check_rw_volaccess (nfs3, dirfh->exportid, stat, nfs3err);
+ nfs3_handle_call_state_init (nfs3, cs, req, vol, stat, nfs3err);
+
+ cs->cookieverf = cverf;
+ /*In Exclusive create client is supposed to send cverf instead of
+ * sattr*/
+ if (mode != EXCLUSIVE)
+ cs->setattr_valid = nfs3_sattr3_to_setattr_valid (sattr,
+ &cs->stbuf,
+ &cs->mode);
+ cs->createmode = mode;
+ cs->parent = *dirfh;
+
+ ret = nfs3_fh_resolve_and_resume (cs, dirfh, name, nfs3_create_resume);
+ if (ret < 0)
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+
+nfs3err:
+ if (ret < 0) {
+ nfs3_log_common_res (rpcsvc_request_xid (req), NFS3_CREATE,
+ stat, -ret);
+ nfs3_create_reply (req, stat, NULL, NULL, NULL, NULL);
+ nfs3_call_state_wipe (cs);
+ ret = 0;
+ }
+out:
+ return ret;
+}
+
+
+int
+nfs3svc_create (rpcsvc_request_t *req)
+{
+ char name[NFS_PATH_MAX];
+ struct nfs3_fh dirfh = {{0}, };
+ create3args args;
+ int ret = RPCSVC_ACTOR_ERROR;
+ uint64_t cverf = 0;
+ uint64_t *cval;
+
+ if (!req)
+ return ret;
+
+ nfs3_prep_create3args (&args, &dirfh, name);
+ if (xdr_to_create3args (req->msg[0], &args) <= 0) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Error decoding args");
+ rpcsvc_request_seterr (req, GARBAGE_ARGS);
+ goto rpcerr;
+ }
+
+ cval = (uint64_t *)args.how.createhow3_u.verf;
+ cverf = *cval;
+
+ ret = nfs3_create (req, &dirfh, name, args.how.mode,
+ &args.how.createhow3_u.obj_attributes, cverf);
+ if ((ret < 0) && (ret != RPCSVC_ACTOR_IGNORE)) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "CREATE procedure failed");
+ rpcsvc_request_seterr (req, SYSTEM_ERR);
+ ret = RPCSVC_ACTOR_ERROR;
+ }
+
+rpcerr:
+ return ret;
+}
+
+
+int
+nfs3_mkdir_reply (rpcsvc_request_t *req, nfsstat3 stat, struct nfs3_fh *fh,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent)
+{
+ mkdir3res res = {0, };
+ uint64_t deviceid = 0;
+
+ deviceid = nfs3_request_xlator_deviceid (req);
+ nfs3_fill_mkdir3res (&res, stat, fh, buf, preparent, postparent,
+ deviceid);
+ nfs3svc_submit_reply (req, &res,
+ (nfs3_serializer)xdr_serialize_mkdir3res);
+ return 0;
+}
+
+int32_t
+nfs3svc_mkdir_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
+{
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ nfs3_call_state_t *cs = NULL;
+
+ cs = frame->local;
+ if (op_ret == -1) {
+ gf_log (GF_NFS, GF_LOG_WARNING,
+ "%x: %s => -1 (%s)", rpcsvc_request_xid (cs->req),
+ cs->resolvedloc.path, strerror (op_errno));
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
+ goto nfs3err;
+ }
+
+ stat = NFS3_OK;
+nfs3err:
+ nfs3_log_newfh_res (rpcsvc_request_xid (cs->req), NFS3_MKDIR, stat,
+ op_errno, &cs->fh);
+ nfs3_mkdir_reply (cs->req, stat, &cs->fh, postop, &cs->preparent,
+ &cs->postparent);
+ nfs3_call_state_wipe (cs);
+
+ return 0;
+}
+
+
+int32_t
+nfs3svc_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int ret = -EFAULT;
+ nfs_user_t nfu = {0, };
+ nfs3_call_state_t *cs = NULL;
+
+ cs = frame->local;
+ if (op_ret == -1) {
+ gf_log (GF_NFS, GF_LOG_WARNING,
+ "%x: %s => -1 (%s)", rpcsvc_request_xid (cs->req),
+ cs->resolvedloc.path, strerror (op_errno));
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
+ goto nfs3err;
+ }
+
+ nfs3_fh_build_child_fh (&cs->parent, buf, &cs->fh);
+
+ /* Means no attributes were required to be set. */
+ if (!cs->setattr_valid) {
+ stat = NFS3_OK;
+ goto nfs3err;
+ }
+
+ cs->preparent = *preparent;
+ cs->postparent = *postparent;
+ nfs_request_user_init (&nfu, cs->req);
+ ret = nfs_setattr (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,&cs->stbuf,
+ cs->setattr_valid, nfs3svc_mkdir_setattr_cbk, cs);
+ if (ret < 0)
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+
+nfs3err:
+ if (ret < 0) {
+ nfs3_log_newfh_res (rpcsvc_request_xid (cs->req), NFS3_MKDIR,
+ stat, op_errno, &cs->fh);
+ nfs3_mkdir_reply (cs->req, stat, &cs->fh, buf, preparent,
+ postparent);
+ nfs3_call_state_wipe (cs);
+ }
+
+ return 0;
+}
+
+
+int
+nfs3_mkdir_resume (void *carg)
+{
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int ret = -EFAULT;
+ nfs_user_t nfu = {0, };
+ nfs3_call_state_t *cs = NULL;
+
+ if (!carg)
+ return ret;
+
+ cs = (nfs3_call_state_t *)carg;
+ nfs3_check_new_fh_resolve_status (cs, stat, nfs3err);
+ nfs_request_user_init (&nfu, cs->req);
+
+ if (gf_attr_mode_set (cs->setattr_valid)) {
+ cs->setattr_valid &= ~GF_SET_ATTR_MODE;
+ ret = nfs_mkdir (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
+ cs->mode, nfs3svc_mkdir_cbk, cs);
+ } else
+ ret = nfs_mkdir (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
+ cs->mode, nfs3svc_mkdir_cbk, cs);
+
+ if (ret < 0)
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+
+nfs3err:
+ if (ret < 0) {
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req), NFS3_MKDIR,
+ stat, -ret);
+ nfs3_mkdir_reply (cs->req, stat, NULL, NULL, NULL, NULL);
+ nfs3_call_state_wipe (cs);
+ }
+
+ return 0;
+}
+
+
+
+int
+nfs3_mkdir (rpcsvc_request_t *req, struct nfs3_fh *dirfh, char *name,
+ sattr3 *sattr)
+{
+ xlator_t *vol = NULL;
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int ret = -EFAULT;
+ struct nfs3_state *nfs3 = NULL;
+ nfs3_call_state_t *cs = NULL;
+
+ if ((!req) || (!dirfh) || (!name) || (!sattr)) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Bad arguments");
+ return -1;
+ }
+
+ nfs3_log_fh_entry_call (rpcsvc_request_xid (req), "MKDIR", dirfh,
+ name);
+ nfs3_validate_gluster_fh (dirfh, stat, nfs3err);
+ nfs3_validate_nfs3_state (req, nfs3, stat, nfs3err, ret);
+ nfs3_validate_strlen_or_goto (name, NFS_NAME_MAX, nfs3err, stat, ret);
+ nfs3_map_fh_to_volume (nfs3, dirfh, req, vol, stat, nfs3err);
+ nfs3_volume_started_check (nfs3, vol, ret, out);
+ nfs3_check_rw_volaccess (nfs3, dirfh->exportid, stat, nfs3err);
+ nfs3_handle_call_state_init (nfs3, cs, req, vol, stat, nfs3err);
+
+ cs->parent = *dirfh;
+ cs->setattr_valid = nfs3_sattr3_to_setattr_valid (sattr, &cs->stbuf,
+ &cs->mode);
+ ret = nfs3_fh_resolve_and_resume (cs, dirfh, name, nfs3_mkdir_resume);
+ if (ret < 0)
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+
+nfs3err:
+ if (ret < 0) {
+ nfs3_log_common_res (rpcsvc_request_xid (req), NFS3_MKDIR,
+ stat, -ret);
+ nfs3_mkdir_reply (req, stat, NULL, NULL, NULL, NULL);
+ nfs3_call_state_wipe (cs);
+ ret = 0;
+ }
+out:
+ return ret;
+}
+
+
+int
+nfs3svc_mkdir (rpcsvc_request_t *req)
+{
+ char name[NFS_PATH_MAX];
+ struct nfs3_fh dirfh = {{0}, };
+ mkdir3args args;
+ int ret = RPCSVC_ACTOR_ERROR;
+
+ if (!req)
+ return ret;
+ nfs3_prep_mkdir3args (&args, &dirfh, name);
+ if (xdr_to_mkdir3args (req->msg[0], &args) <= 0) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Error decoding args");
+ rpcsvc_request_seterr (req, GARBAGE_ARGS);
+ goto rpcerr;
+ }
+
+ ret = nfs3_mkdir (req, &dirfh, name, &args.attributes);
+ if ((ret < 0) && (ret != RPCSVC_ACTOR_IGNORE)) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "MKDIR procedure failed");
+ rpcsvc_request_seterr (req, SYSTEM_ERR);
+ ret = RPCSVC_ACTOR_ERROR;
+ }
+
+rpcerr:
+ return ret;
+}
+
+
+int
+nfs3_symlink_reply (rpcsvc_request_t *req, nfsstat3 stat, struct nfs3_fh *fh,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent)
+{
+ symlink3res res = {0, };
+ uint64_t deviceid = 0;
+
+ deviceid = nfs3_request_xlator_deviceid (req);
+ nfs3_fill_symlink3res (&res, stat, fh, buf, preparent, postparent,
+ deviceid);
+ nfs3svc_submit_reply (req, (void *)&res,
+ (nfs3_serializer)xdr_serialize_symlink3res);
+
+ return 0;
+}
+
+
+int32_t
+nfs3svc_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ nfs3_call_state_t *cs = NULL;
+
+ cs = frame->local;
+ if (op_ret == -1) {
+ gf_log (GF_NFS, GF_LOG_WARNING,
+ "%x: %s => -1 (%s)", rpcsvc_request_xid (cs->req),
+ cs->resolvedloc.path, strerror (op_errno));
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
+ goto nfs3err;
+ }
+
+ nfs3_fh_build_child_fh (&cs->parent, buf, &cs->fh);
+ stat = NFS3_OK;
+
+nfs3err:
+ nfs3_log_newfh_res (rpcsvc_request_xid (cs->req), NFS3_SYMLINK, stat,
+ op_errno, &cs->fh);
+ nfs3_symlink_reply (cs->req, stat, &cs->fh, buf, preparent,
+ postparent);
+ nfs3_call_state_wipe (cs);
+ return 0;
+}
+
+
+int
+nfs3_symlink_resume (void *carg)
+{
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int ret = -EFAULT;
+ nfs3_call_state_t *cs = NULL;
+ nfs_user_t nfu = {0, };
+
+ if (!carg)
+ return ret;
+
+ cs = (nfs3_call_state_t *)carg;
+ nfs3_check_new_fh_resolve_status (cs, stat, nfs3err);
+ nfs_request_user_init (&nfu, cs->req);
+ ret = nfs_symlink (cs->nfsx, cs->vol, &nfu, cs->pathname,
+ &cs->resolvedloc, nfs3svc_symlink_cbk, cs);
+ if (ret < 0)
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+
+nfs3err:
+ if (ret < 0) {
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_SYMLINK, stat, -ret);
+ nfs3_symlink_reply (cs->req, stat, NULL, NULL, NULL, NULL);
+ nfs3_call_state_wipe (cs);
+ }
+
+ return ret;
+}
+
+
+int
+nfs3_symlink (rpcsvc_request_t *req, struct nfs3_fh *dirfh, char *name,
+ char *target, sattr3 *sattr)
+{
+ xlator_t *vol = NULL;
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int ret = -EFAULT;
+ struct nfs3_state *nfs3 = NULL;
+ nfs3_call_state_t *cs = NULL;
+
+ if ((!req) || (!dirfh) || (!name) || (!target) || (!sattr)) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Bad arguments");
+ return -1;
+ }
+
+ nfs3_log_symlink_call (rpcsvc_request_xid (req), dirfh, name,
+ target);
+ nfs3_validate_gluster_fh (dirfh, stat, nfs3err);
+ nfs3_validate_nfs3_state (req, nfs3, stat, nfs3err, ret);
+ nfs3_validate_strlen_or_goto (name, NFS_NAME_MAX, nfs3err, stat, ret);
+ nfs3_map_fh_to_volume (nfs3, dirfh, req, vol, stat, nfs3err);
+ nfs3_volume_started_check (nfs3, vol, ret, out);
+ nfs3_check_rw_volaccess (nfs3, dirfh->exportid, stat, nfs3err);
+ nfs3_handle_call_state_init (nfs3, cs, req, vol, stat, nfs3err);
+
+ cs->parent = *dirfh;
+ cs->pathname = gf_strdup (target);
+ if (!cs->pathname) {
+ ret = -1;
+ stat = NFS3ERR_SERVERFAULT;
+ goto nfs3err;
+ }
+
+ ret = nfs3_fh_resolve_and_resume (cs, dirfh, name, nfs3_symlink_resume);
+ if (ret < 0)
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+
+nfs3err:
+ if (ret < 0) {
+ nfs3_log_common_res (rpcsvc_request_xid (req), NFS3_SYMLINK,
+ stat, -ret);
+ nfs3_symlink_reply (req, stat, NULL, NULL, NULL, NULL);
+ nfs3_call_state_wipe (cs);
+ /* Ret must be 0 after this so that the caller does not
+ * also send an RPC reply.
+ */
+ ret = 0;
+ }
+out:
+ return ret;
+}
+
+
+int
+nfs3svc_symlink (rpcsvc_request_t *req)
+{
+ char name[NFS_PATH_MAX];
+ struct nfs3_fh dirfh = {{0}, };
+ char target[NFS_PATH_MAX];
+ symlink3args args;
+ int ret = RPCSVC_ACTOR_ERROR;
+
+ if (!req)
+ return ret;
+ nfs3_prep_symlink3args (&args, &dirfh, name, target);
+ if (xdr_to_symlink3args (req->msg[0], &args) <= 0) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Error decoding args");
+ rpcsvc_request_seterr (req, GARBAGE_ARGS);
+ goto rpcerr;
+ }
+
+ ret = nfs3_symlink (req, &dirfh, name, target,
+ &args.symlink.symlink_attributes);
+ if ((ret < 0) && (ret != RPCSVC_ACTOR_IGNORE)) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "SYMLINK procedure failed");
+ rpcsvc_request_seterr (req, SYSTEM_ERR);
+ ret = RPCSVC_ACTOR_ERROR;
+ }
+
+rpcerr:
+ return ret;
+}
+
+
+int
+nfs3_mknod_reply (rpcsvc_request_t *req, nfsstat3 stat, struct nfs3_fh *fh,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent)
+{
+ mknod3res res = {0, };
+ uint64_t deviceid = 0;
+
+ deviceid = nfs3_request_xlator_deviceid (req);
+ nfs3_fill_mknod3res (&res, stat, fh, buf, preparent, postparent,
+ deviceid);
+ nfs3svc_submit_reply (req, (void *)&res,
+ (nfs3_serializer)xdr_serialize_mknod3res);
+
+ return 0;
+}
+
+int32_t
+nfs3svc_mknod_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
+{
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ nfs3_call_state_t *cs = NULL;
+
+ cs = frame->local;
+ if (op_ret == -1) {
+ gf_log (GF_NFS, GF_LOG_WARNING,
+ "%x: %s => -1 (%s)", rpcsvc_request_xid (cs->req),
+ cs->resolvedloc.path, strerror (op_errno));
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
+ goto nfs3err;
+ }
+
+ stat = NFS3_OK;
+nfs3err:
+ nfs3_log_newfh_res (rpcsvc_request_xid (cs->req), NFS3_MKNOD, stat,
+ op_errno, &cs->fh);
+ nfs3_mknod_reply (cs->req, stat, &cs->fh, postop, &cs->preparent,
+ &cs->postparent);
+ nfs3_call_state_wipe (cs);
+ return 0;
+}
+
+
+
+int32_t
+nfs3svc_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int ret = -1;
+ nfs_user_t nfu = {0, };
+ nfs3_call_state_t *cs = NULL;
+
+ cs = frame->local;
+ if (op_ret == -1) {
+ gf_log (GF_NFS, GF_LOG_WARNING,
+ "%x: %s => -1 (%s)", rpcsvc_request_xid (cs->req),
+ cs->resolvedloc.path, strerror (op_errno));
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
+ goto nfs3err;
+ }
+
+ nfs3_fh_build_child_fh (&cs->parent, buf, &cs->fh);
+
+ /* Means no attributes were required to be set. */
+ if (!cs->setattr_valid) {
+ stat = NFS3_OK;
+ ret = -1;
+ goto nfs3err;
+ }
+
+ cs->preparent = *preparent;
+ cs->postparent = *postparent;
+ nfs_request_user_init (&nfu, cs->req);
+ ret = nfs_setattr (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,&cs->stbuf,
+ cs->setattr_valid, nfs3svc_mknod_setattr_cbk, cs);
+ if (ret < 0)
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+nfs3err:
+ if (ret < 0) {
+ nfs3_log_newfh_res (rpcsvc_request_xid (cs->req), NFS3_MKNOD,
+ stat,
+ op_errno, &cs->fh);
+ nfs3_mknod_reply (cs->req, stat, &cs->fh, buf, preparent,
+ postparent);
+ nfs3_call_state_wipe (cs);
+ }
+
+ return 0;
+}
+
+
+int
+nfs3_mknod_device (nfs3_call_state_t *cs)
+{
+ int ret = -EFAULT;
+ dev_t devnum = 0;
+ mode_t mode = 0;
+ nfs_user_t nfu = {0, };
+
+ if (!cs)
+ return ret;
+
+ devnum = makedev (cs->devnums.specdata1, cs->devnums.specdata2);
+ if (cs->mknodtype == NF3CHR)
+ mode = S_IFCHR;
+ else
+ mode = S_IFBLK;
+
+ nfs_request_user_init (&nfu, cs->req);
+ if (gf_attr_mode_set (cs->setattr_valid)) {
+ cs->setattr_valid &= ~GF_SET_ATTR_MODE;
+ mode |= cs->mode;
+ ret = nfs_mknod (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
+ mode, devnum, nfs3svc_mknod_cbk, cs);
+ } else
+ ret = nfs_mknod (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
+ mode, devnum, nfs3svc_mknod_cbk, cs);
+
+ return ret;
+}
+
+
+int
+nfs3_mknod_fifo (nfs3_call_state_t *cs)
+{
+ int ret = -EFAULT;
+ nfs_user_t nfu = {0, };
+ mode_t mode = S_IFIFO;
+
+ if (!cs)
+ return ret;
+
+ nfs_request_user_init (&nfu, cs->req);
+ if (gf_attr_mode_set (cs->setattr_valid)) {
+ cs->setattr_valid &= ~GF_SET_ATTR_MODE;
+ mode |= cs->mode;
+ ret = nfs_mknod (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
+ mode, 0, nfs3svc_mknod_cbk, cs);
+ } else
+ ret = nfs_mknod (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
+ mode, 0, nfs3svc_mknod_cbk, cs);
+
+ return ret;
+}
+
+
+int
+nfs3_mknod_resume (void *carg)
+{
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int ret = -EFAULT;
+ nfs3_call_state_t *cs = NULL;
+
+ if (!carg)
+ return ret;
+
+ cs = (nfs3_call_state_t *)carg;
+ nfs3_check_new_fh_resolve_status (cs, stat, nfs3err);
+ switch (cs->mknodtype) {
+
+ case NF3CHR:
+ case NF3BLK:
+ ret = nfs3_mknod_device (cs);
+ break;
+ case NF3SOCK:
+ case NF3FIFO:
+ ret = nfs3_mknod_fifo (cs);
+ break;
+ default:
+ ret = -EBADF;
+ break;
+ }
+
+ if (ret < 0)
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+
+nfs3err:
+ if (ret < 0) {
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req), NFS3_MKNOD,
+ stat, -ret);
+ nfs3_mknod_reply (cs->req, stat, NULL, NULL, NULL, NULL);
+ nfs3_call_state_wipe (cs);
+ }
+
+ return ret;
+}
+
+
+
+int
+nfs3_mknod (rpcsvc_request_t *req, struct nfs3_fh *fh, char *name,
+ mknoddata3 *nodedata)
+{
+ xlator_t *vol = NULL;
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int ret = -EFAULT;
+ struct nfs3_state *nfs3 = NULL;
+ nfs3_call_state_t *cs = NULL;
+ sattr3 *sattr = NULL;
+
+ if ((!req) || (!fh) || (!name) || (!nodedata)) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Bad arguments");
+ return -1;
+ }
+
+ nfs3_log_mknod_call (rpcsvc_request_xid (req), fh, name,
+ nodedata->type);
+ nfs3_validate_gluster_fh (fh, stat, nfs3err);
+ nfs3_validate_nfs3_state (req, nfs3, stat, nfs3err, ret);
+ nfs3_validate_strlen_or_goto (name, NFS_NAME_MAX, nfs3err, stat, ret);
+ nfs3_map_fh_to_volume (nfs3, fh, req, vol, stat, nfs3err);
+ nfs3_volume_started_check (nfs3, vol, ret, out);
+ nfs3_check_rw_volaccess (nfs3, fh->exportid, stat, nfs3err);
+ nfs3_handle_call_state_init (nfs3, cs, req, vol, stat, nfs3err);
+
+ cs->mknodtype = nodedata->type;
+ switch (nodedata->type) {
+ case NF3CHR:
+ case NF3BLK:
+ cs->devnums = nodedata->mknoddata3_u.device.spec;
+ sattr = &nodedata->mknoddata3_u.device.dev_attributes;
+ cs->setattr_valid = nfs3_sattr3_to_setattr_valid (sattr,
+ &cs->stbuf,
+ &cs->mode);
+ break;
+ case NF3SOCK:
+ case NF3FIFO:
+ sattr = &nodedata->mknoddata3_u.pipe_attributes;
+ cs->setattr_valid = nfs3_sattr3_to_setattr_valid (sattr,
+ &cs->stbuf,
+ &cs->mode);
+ break;
+ default:
+ ret = -EBADF;
+ break;
+ }
+
+ cs->parent = *fh;
+ ret = nfs3_fh_resolve_and_resume (cs, fh, name, nfs3_mknod_resume);
+ if (ret < 0)
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+
+nfs3err:
+ if (ret < 0) {
+ nfs3_log_common_res (rpcsvc_request_xid (req), NFS3_MKNOD,
+ stat, -ret);
+ nfs3_mknod_reply (req, stat, NULL, NULL, NULL, NULL);
+ /* Ret must be 0 after this so that the caller does not
+ * also send an RPC reply.
+ */
+ nfs3_call_state_wipe (cs);
+ ret = 0;
+ }
+out:
+ return ret;
+}
+
+
+int
+nfs3svc_mknod (rpcsvc_request_t *req)
+{
+ char name[NFS_PATH_MAX];
+ struct nfs3_fh fh = {{0}, };
+ mknod3args args;
+ int ret = RPCSVC_ACTOR_ERROR;
+
+ if (!req)
+ return ret;
+ nfs3_prep_mknod3args (&args, &fh, name);
+ if (xdr_to_mknod3args (req->msg[0], &args) <= 0) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Error decoding args");
+ rpcsvc_request_seterr (req, GARBAGE_ARGS);
+ goto rpcerr;
+ }
+
+ ret = nfs3_mknod (req, &fh, name, &args.what);
+ if ((ret < 0) && (ret != RPCSVC_ACTOR_IGNORE)) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "MKNOD procedure failed");
+ rpcsvc_request_seterr (req, SYSTEM_ERR);
+ ret = RPCSVC_ACTOR_ERROR;
+ }
+
+rpcerr:
+ return ret;
+}
+
+int
+nfs3_remove_reply (rpcsvc_request_t *req, nfsstat3 stat, struct iatt *preparent
+ , struct iatt *postparent)
+{
+ remove3res res = {0, };
+ uint64_t deviceid = 0;
+
+ deviceid = nfs3_request_xlator_deviceid (req);
+ nfs3_fill_remove3res (&res, stat, preparent, postparent, deviceid);
+ nfs3svc_submit_reply (req, (void *)&res,
+ (nfs3_serializer)xdr_serialize_remove3res);
+ return 0;
+}
+
+
+
+int32_t
+nfs3svc_remove_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ nfs3_call_state_t *cs = NULL;
+
+ cs = frame->local;
+ if (op_ret == -1) {
+ gf_log (GF_NFS, GF_LOG_WARNING,
+ "%x: %s => -1 (%s)", rpcsvc_request_xid (cs->req),
+ cs->resolvedloc.path, strerror (op_errno));
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
+ }
+
+ if (op_ret == 0)
+ stat = NFS3_OK;
+
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req), NFS3_REMOVE, stat,
+ op_errno);
+ nfs3_remove_reply (cs->req, stat, preparent, postparent);
+ nfs3_call_state_wipe (cs);
+
+ return 0;
+}
+
+
+int
+__nfs3_remove (nfs3_call_state_t *cs)
+{
+ int ret = -EFAULT;
+ nfs_user_t nfu = {0, };
+ ia_type_t type = 0;
+
+ if (!cs)
+ return ret;
+ type = cs->resolvedloc.inode->ia_type;
+ nfs_request_user_init (&nfu, cs->req);
+ if (IA_ISDIR (type))
+ ret = nfs_rmdir (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
+ nfs3svc_remove_cbk, cs);
+ else
+ ret = nfs_unlink (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
+ nfs3svc_remove_cbk, cs);
+
+ return ret;
+}
+
+
+int
+nfs3_remove_resume (void *carg)
+{
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int ret = -EFAULT;
+ nfs3_call_state_t *cs = NULL;
+
+ if (!carg)
+ return ret;
+
+ cs = (nfs3_call_state_t *)carg;
+ nfs3_check_fh_resolve_status (cs, stat, nfs3err);
+ ret = __nfs3_remove (cs);
+ if (ret < 0)
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+
+nfs3err:
+ if (ret < 0) {
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req), NFS3_REMOVE,
+ stat, -ret);
+ nfs3_remove_reply (cs->req, stat, NULL, NULL);
+ nfs3_call_state_wipe (cs);
+ }
+
+ return ret;
+}
+
+
+int
+nfs3_remove (rpcsvc_request_t *req, struct nfs3_fh *fh, char *name)
+{
+ xlator_t *vol = NULL;
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int ret = -EFAULT;
+ struct nfs3_state *nfs3 = NULL;
+ nfs3_call_state_t *cs = NULL;
+
+ if ((!req) || (!fh) || (!name)) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Bad arguments");
+ return -1;
+ }
+
+ nfs3_log_fh_entry_call (rpcsvc_request_xid (req), "REMOVE", fh,
+ name);
+ nfs3_validate_gluster_fh (fh, stat, nfs3err);
+ nfs3_validate_nfs3_state (req, nfs3, stat, nfs3err, ret);
+ nfs3_validate_strlen_or_goto (name, NFS_NAME_MAX, nfs3err, stat, ret);
+ nfs3_map_fh_to_volume (nfs3, fh, req, vol, stat, nfs3err);
+ nfs3_volume_started_check (nfs3, vol, ret, out);
+ nfs3_check_rw_volaccess (nfs3, fh->exportid, stat, nfs3err);
+ nfs3_handle_call_state_init (nfs3, cs, req, vol, stat, nfs3err);
+
+ ret = nfs3_fh_resolve_and_resume (cs, fh, name, nfs3_remove_resume);
+ if (ret < 0)
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+
+nfs3err:
+ if (ret < 0) {
+ nfs3_log_common_res (rpcsvc_request_xid (req), NFS3_REMOVE,
+ stat, -ret);
+ nfs3_remove_reply (req, stat, NULL, NULL);
+ nfs3_call_state_wipe (cs);
+ /* Ret must be 0 after this so that the caller does not
+ * also send an RPC reply.
+ */
+ ret = 0;
+ }
+out:
+ return ret;
+}
+
+
+int
+nfs3svc_remove (rpcsvc_request_t *req)
+{
+ char name[NFS_PATH_MAX];
+ struct nfs3_fh fh = {{0}, };
+ remove3args args;
+ int ret = RPCSVC_ACTOR_ERROR;
+
+ if (!req)
+ return ret;
+ nfs3_prep_remove3args (&args, &fh, name);
+ if (xdr_to_remove3args (req->msg[0], &args) <= 0) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Error decoding args");
+ rpcsvc_request_seterr (req, GARBAGE_ARGS);
+ goto rpcerr;
+ }
+
+ ret = nfs3_remove (req, &fh, name);
+ if ((ret < 0) && (ret != RPCSVC_ACTOR_IGNORE)) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "REMOVE procedure failed");
+ rpcsvc_request_seterr (req, SYSTEM_ERR);
+ ret = RPCSVC_ACTOR_ERROR;
+ }
+
+rpcerr:
+ return ret;
+}
+
+
+int
+nfs3_rmdir_reply (rpcsvc_request_t *req, nfsstat3 stat, struct iatt *preparent,
+ struct iatt *postparent)
+{
+ rmdir3res res = {0, };
+ uint64_t deviceid = 0;
+
+ deviceid = nfs3_request_xlator_deviceid (req);
+ nfs3_fill_rmdir3res (&res, stat, preparent, postparent, deviceid);
+ nfs3svc_submit_reply (req, (void *)&res,
+ (nfs3_serializer)xdr_serialize_rmdir3res);
+ return 0;
+}
+
+
+int32_t
+nfs3svc_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ nfs3_call_state_t *cs = NULL;
+
+ cs = frame->local;
+ if (op_ret == -1) {
+ gf_log (GF_NFS, GF_LOG_WARNING,
+ "%x: %s => -1 (%s)", rpcsvc_request_xid (cs->req),
+ cs->resolvedloc.path, strerror (op_errno));
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
+ } else {
+ stat = NFS3_OK;
+ }
+
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req), NFS3_RMDIR, stat,
+ op_errno);
+ nfs3_rmdir_reply (cs->req, stat, preparent, postparent);
+ nfs3_call_state_wipe (cs);
+
+ return 0;
+}
+
+int
+nfs3_rmdir_resume (void *carg)
+{
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int ret = -EFAULT;
+ nfs3_call_state_t *cs = NULL;
+ nfs_user_t nfu = {0, };
+
+ if (!carg)
+ return ret;
+
+ cs = (nfs3_call_state_t *)carg;
+ nfs3_check_fh_resolve_status (cs, stat, nfs3err);
+ nfs_request_user_init (&nfu, cs->req);
+ ret = nfs_rmdir (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
+ nfs3svc_rmdir_cbk, cs);
+ if (ret < 0)
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+
+nfs3err:
+ if (ret < 0) {
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req), NFS3_RMDIR,
+ stat, -ret);
+ nfs3_rmdir_reply (cs->req, stat, NULL, NULL);
+ nfs3_call_state_wipe (cs);
+ }
+
+ return ret;
+}
+
+
+
+int
+nfs3_rmdir (rpcsvc_request_t *req, struct nfs3_fh *fh, char *name)
+{
+ xlator_t *vol = NULL;
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int ret = -EFAULT;
+ struct nfs3_state *nfs3 = NULL;
+ nfs3_call_state_t *cs = NULL;
+
+ if ((!req) || (!fh) || (!name)) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Bad arguments");
+ return -1;
+ }
+
+ nfs3_log_fh_entry_call (rpcsvc_request_xid (req), "RMDIR", fh,
+ name);
+ nfs3_validate_gluster_fh (fh, stat, nfs3err);
+ nfs3_validate_nfs3_state (req, nfs3, stat, nfs3err, ret);
+ nfs3_validate_strlen_or_goto (name, NFS_NAME_MAX, nfs3err, stat, ret);
+ nfs3_map_fh_to_volume (nfs3, fh, req, vol, stat, nfs3err);
+ nfs3_volume_started_check (nfs3, vol, ret, out);
+ nfs3_check_rw_volaccess (nfs3, fh->exportid, stat, nfs3err);
+ nfs3_handle_call_state_init (nfs3, cs, req, vol, stat, nfs3err);
+
+ ret = nfs3_fh_resolve_and_resume (cs, fh, name, nfs3_rmdir_resume);
+ if (ret < 0)
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+
+nfs3err:
+ if (ret < 0) {
+ nfs3_log_common_res (rpcsvc_request_xid (req), NFS3_RMDIR,
+ stat, -ret);
+ nfs3_rmdir_reply (req, stat, NULL, NULL);
+ nfs3_call_state_wipe (cs);
+ /* Ret must be 0 after this so that the caller does not
+ * also send an RPC reply.
+ */
+ ret = 0;
+ }
+out:
+ return ret;
+}
+
+
+int
+nfs3svc_rmdir (rpcsvc_request_t *req)
+{
+ char name[NFS_PATH_MAX];
+ struct nfs3_fh fh = {{0}, };
+ rmdir3args args;
+ int ret = RPCSVC_ACTOR_ERROR;
+
+ if (!req)
+ return ret;
+ nfs3_prep_rmdir3args (&args, &fh, name);
+ if (xdr_to_rmdir3args (req->msg[0], &args) <= 0) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Error decoding args");
+ rpcsvc_request_seterr (req, GARBAGE_ARGS);
+ goto rpcerr;
+ }
+
+ ret = nfs3_rmdir (req, &fh, name);
+ if ((ret < 0) && (ret != RPCSVC_ACTOR_IGNORE)) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "RMDIR procedure failed");
+ rpcsvc_request_seterr (req, SYSTEM_ERR);
+ ret = RPCSVC_ACTOR_ERROR;
+ }
+
+rpcerr:
+ return ret;
+}
+
+
+int
+nfs3_rename_reply (rpcsvc_request_t *req, nfsstat3 stat, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent)
+{
+ rename3res res = {0, };
+ uint64_t deviceid = 0;
+
+ deviceid = nfs3_request_xlator_deviceid (req);
+ nfs3_fill_rename3res (&res, stat, buf, preoldparent, postoldparent,
+ prenewparent, postnewparent, deviceid);
+
+ nfs3svc_submit_reply (req, (void *)&res,
+ (nfs3_serializer) xdr_serialize_rename3res);
+
+ return 0;
+}
+
+
+
+int32_t
+nfs3svc_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ int ret = -EFAULT;
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ nfs3_call_state_t *cs = NULL;
+
+ cs = frame->local;
+ if (op_ret == -1) {
+ gf_log (GF_NFS, GF_LOG_WARNING,
+ "%x: rename %s -> %s => -1 (%s)",
+ rpcsvc_request_xid (cs->req), cs->oploc.path,
+ cs->resolvedloc.path, strerror (op_errno));
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
+ goto nfs3err;
+ }
+
+ stat = NFS3_OK;
+nfs3err:
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req), NFS3_RENAME, stat,
+ -ret);
+ nfs3_rename_reply (cs->req, stat, buf, preoldparent, postoldparent,
+ prenewparent, postnewparent);
+ nfs3_call_state_wipe (cs);
+ return 0;
+}
+
+
+int
+nfs3_rename_resume_dst (void *carg)
+{
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int ret = -EFAULT;
+ nfs3_call_state_t *cs = NULL;
+ nfs_user_t nfu = {0, };
+
+ if (!carg)
+ return ret;
+
+ cs = (nfs3_call_state_t *)carg;
+ nfs3_check_new_fh_resolve_status (cs, stat, nfs3err);
+ cs->parent = cs->resolvefh;
+ nfs_request_user_init (&nfu, cs->req);
+ ret = nfs_rename (cs->nfsx, cs->vol, &nfu, &cs->oploc, &cs->resolvedloc,
+ nfs3svc_rename_cbk, cs);
+ if (ret < 0)
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+
+nfs3err:
+ if (ret < 0) {
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req), NFS3_RENAME,
+ stat, -ret);
+ nfs3_rename_reply (cs->req, stat, NULL, NULL, NULL, NULL, NULL);
+ nfs3_call_state_wipe (cs);
+ }
+
+ return ret;
+}
+
+
+
+int
+nfs3_rename_resume_src (void *carg)
+{
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int ret = -EFAULT;
+ nfs3_call_state_t *cs = NULL;
+
+ if (!carg)
+ return ret;
+
+ cs = (nfs3_call_state_t *)carg;
+ nfs3_check_fh_resolve_status (cs, stat, nfs3err);
+ /* Copy the resolved loc for the source file into another loc
+ * for safekeeping till we resolve the dest loc.
+ */
+ nfs_loc_copy (&cs->oploc, &cs->resolvedloc);
+ nfs_loc_wipe (&cs->resolvedloc);
+ GF_FREE (cs->resolventry);
+
+ ret = nfs3_fh_resolve_and_resume (cs, &cs->fh, cs->pathname,
+ nfs3_rename_resume_dst);
+ if (ret < 0)
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+
+nfs3err:
+ if (ret < 0) {
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req), NFS3_RENAME,
+ stat, -ret);
+ nfs3_rename_reply (cs->req, stat, NULL, NULL, NULL, NULL, NULL);
+ nfs3_call_state_wipe (cs);
+ }
+
+ return ret;
+}
+
+
+int
+nfs3_rename (rpcsvc_request_t *req, struct nfs3_fh *olddirfh, char *oldname,
+ struct nfs3_fh *newdirfh, char *newname)
+{
+ xlator_t *vol = NULL;
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int ret = -EFAULT;
+ struct nfs3_state *nfs3 = NULL;
+ nfs3_call_state_t *cs = NULL;
+
+ if ((!req) || (!olddirfh) || (!oldname) || (!newdirfh) || (!newname)) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Bad arguments");
+ return -1;
+ }
+
+ nfs3_log_rename_call (rpcsvc_request_xid (req), olddirfh, oldname,
+ newdirfh, newname);
+ nfs3_validate_gluster_fh (olddirfh, stat, nfs3err);
+ nfs3_validate_gluster_fh (newdirfh, stat, nfs3err);
+ nfs3_validate_nfs3_state (req, nfs3, stat, nfs3err, ret);
+ nfs3_validate_strlen_or_goto(oldname, NFS_NAME_MAX, nfs3err, stat, ret);
+ nfs3_validate_strlen_or_goto(newname, NFS_NAME_MAX, nfs3err, stat, ret);
+ nfs3_map_fh_to_volume (nfs3, olddirfh, req, vol, stat, nfs3err);
+ nfs3_volume_started_check (nfs3, vol, ret, out);
+ nfs3_check_rw_volaccess (nfs3, olddirfh->exportid, stat, nfs3err);
+ nfs3_handle_call_state_init (nfs3, cs, req, vol, stat, nfs3err);
+
+ /* While we resolve the source (fh, name) pair, we need to keep a copy
+ * of the dest (fh,name) pair.
+ */
+ cs->fh = *newdirfh;
+ cs->pathname = gf_strdup (newname);
+ if (!cs->pathname) {
+ stat = NFS3ERR_SERVERFAULT;
+ ret = -1;
+ goto nfs3err;
+ }
+
+ ret = nfs3_fh_resolve_and_resume (cs, olddirfh, oldname,
+ nfs3_rename_resume_src);
+ if (ret < 0)
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+
+nfs3err:
+ if (ret < 0) {
+ nfs3_log_common_res (rpcsvc_request_xid (req), NFS3_RENAME,
+ stat, -ret);
+ nfs3_rename_reply (req, stat, NULL, NULL, NULL, NULL, NULL);
+ nfs3_call_state_wipe (cs);
+ /* Ret must be 0 after this so that the caller does not
+ * also send an RPC reply.
+ */
+ ret = 0;
+ }
+out:
+ return ret;
+}
+
+
+int
+nfs3svc_rename (rpcsvc_request_t *req)
+{
+ char newname[NFS_PATH_MAX];
+ char oldname[NFS_PATH_MAX];
+ struct nfs3_fh olddirfh = {{0}, };
+ struct nfs3_fh newdirfh = {{0}, };
+ rename3args args;
+ int ret = RPCSVC_ACTOR_ERROR;
+
+ if (!req)
+ return ret;
+ nfs3_prep_rename3args (&args, &olddirfh, oldname, &newdirfh, newname);
+ if (xdr_to_rename3args (req->msg[0], &args) <= 0) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Error decoding args");
+ rpcsvc_request_seterr (req, GARBAGE_ARGS);
+ goto rpcerr;
+ }
+
+ ret = nfs3_rename (req, &olddirfh, oldname, &newdirfh, newname);
+ if ((ret < 0) && (ret != RPCSVC_ACTOR_IGNORE)) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "RENAME procedure failed");
+ rpcsvc_request_seterr (req, SYSTEM_ERR);
+ ret = RPCSVC_ACTOR_ERROR;
+ }
+
+rpcerr:
+ return ret;
+}
+
+
+int
+nfs3_link_reply (rpcsvc_request_t *req, nfsstat3 stat, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent)
+{
+ link3res res = {0, };
+ uint64_t deviceid = 0;
+
+ deviceid = nfs3_request_xlator_deviceid (req);
+ nfs3_fill_link3res (&res, stat, buf, preparent, postparent, deviceid);
+ nfs3svc_submit_reply (req, (void *)&res,
+ (nfs3_serializer)xdr_serialize_link3res);
+
+ return 0;
+}
+
+
+int32_t
+nfs3svc_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ nfs3_call_state_t *cs = NULL;
+
+ cs = frame->local;
+ if (op_ret == -1) {
+ gf_log (GF_NFS, GF_LOG_WARNING,
+ "%x: link %s <- %s => -1 (%s)",
+ rpcsvc_request_xid (cs->req), cs->oploc.path,
+ cs->resolvedloc.path, strerror (op_errno));
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
+ } else
+ stat = NFS3_OK;
+
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req), NFS3_LINK, stat,
+ op_errno);
+ nfs3_link_reply (cs->req, stat, buf, preparent, postparent);
+ nfs3_call_state_wipe (cs);
+
+ return 0;
+}
+
+
+int
+nfs3_link_resume_lnk (void *carg)
+{
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int ret = -EFAULT;
+ nfs3_call_state_t *cs = NULL;
+ nfs_user_t nfu = {0, };
+
+ if (!carg)
+ return ret;
+
+ cs = (nfs3_call_state_t *)carg;
+ nfs3_check_new_fh_resolve_status (cs, stat, nfs3err);
+
+ nfs_request_user_init (&nfu, cs->req);
+ ret = nfs_link (cs->nfsx, cs->vol, &nfu, &cs->oploc, &cs->resolvedloc,
+ nfs3svc_link_cbk, cs);
+ if (ret < 0)
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+
+nfs3err:
+ if (ret < 0) {
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req), NFS3_LINK,
+ stat, -ret);
+ nfs3_link_reply (cs->req, stat, NULL, NULL, NULL);
+ nfs3_call_state_wipe (cs);
+ }
+ return ret;
+}
+
+
+int
+nfs3_link_resume_tgt (void *carg)
+{
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int ret = -EFAULT;
+ nfs3_call_state_t *cs = NULL;
+
+ if (!carg)
+ return ret;
+
+ cs = (nfs3_call_state_t *)carg;
+ nfs3_check_fh_resolve_status (cs, stat, nfs3err);
+ nfs_loc_copy (&cs->oploc, &cs->resolvedloc);
+ nfs_loc_wipe (&cs->resolvedloc);
+
+ ret = nfs3_fh_resolve_and_resume (cs, &cs->fh, cs->pathname,
+ nfs3_link_resume_lnk);
+ if (ret < 0)
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+
+nfs3err:
+ if (ret < 0) {
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req), NFS3_LINK,
+ stat, -ret);
+ nfs3_link_reply (cs->req, stat, NULL, NULL, NULL);
+ nfs3_call_state_wipe (cs);
+ }
+
+ return ret;
+}
+
+
+int
+nfs3_link (rpcsvc_request_t *req, struct nfs3_fh *targetfh,
+ struct nfs3_fh *dirfh, char *newname)
+{
+ xlator_t *vol = NULL;
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int ret = -EFAULT;
+ struct nfs3_state *nfs3 = NULL;
+ nfs3_call_state_t *cs = NULL;
+
+ if ((!req) || (!targetfh) || (!dirfh) || (!newname)) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Bad arguments");
+ return -1;
+ }
+
+ nfs3_validate_gluster_fh (dirfh, stat, nfs3err);
+ nfs3_validate_gluster_fh (targetfh, stat, nfs3err);
+ nfs3_validate_nfs3_state (req, nfs3, stat, nfs3err, ret);
+ nfs3_validate_strlen_or_goto(newname, NFS_NAME_MAX, nfs3err, stat, ret);
+ nfs3_map_fh_to_volume (nfs3, dirfh, req, vol, stat, nfs3err);
+ nfs3_volume_started_check (nfs3, vol, ret, out);
+ nfs3_check_rw_volaccess (nfs3, dirfh->exportid, stat, nfs3err);
+ nfs3_handle_call_state_init (nfs3, cs, req, vol, stat, nfs3err);
+
+ cs->fh = *dirfh;
+ cs->pathname = gf_strdup (newname);
+ if (!cs->pathname) {
+ stat = NFS3ERR_SERVERFAULT;
+ ret = -1;
+ goto nfs3err;
+ }
+
+ ret = nfs3_fh_resolve_and_resume (cs, targetfh, NULL,
+ nfs3_link_resume_tgt);
+ if (ret < 0)
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+
+nfs3err:
+ if (ret < 0) {
+ nfs3_log_common_res (rpcsvc_request_xid (req), NFS3_LINK, stat,
+ -ret);
+ nfs3_link_reply (req, stat, NULL, NULL, NULL);
+ nfs3_call_state_wipe (cs);
+ /* Ret must be 0 after this so that the caller does not
+ * also send an RPC reply.
+ */
+ ret = 0;
+ }
+out:
+ return ret;
+}
+
+int
+nfs3svc_link (rpcsvc_request_t *req)
+{
+ char newpath[NFS_PATH_MAX];
+ struct nfs3_fh dirfh = {{0}, };
+ struct nfs3_fh targetfh = {{0}, };
+ link3args args;
+ int ret = RPCSVC_ACTOR_ERROR;
+
+ if (!req)
+ return ret;
+ nfs3_prep_link3args (&args, &targetfh, &dirfh, newpath);
+ if (xdr_to_link3args (req->msg[0], &args) <= 0) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Error decoding args");
+ rpcsvc_request_seterr (req, GARBAGE_ARGS);
+ goto rpcerr;
+ }
+
+ ret = nfs3_link (req, &targetfh, &dirfh, newpath);
+ if ((ret < 0) && (ret != RPCSVC_ACTOR_IGNORE)) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "LINK procedure failed");
+ rpcsvc_request_seterr (req, SYSTEM_ERR);
+ ret = RPCSVC_ACTOR_ERROR;
+ }
+
+rpcerr:
+ return ret;
+}
+
+
+int
+nfs3_readdirp_reply (rpcsvc_request_t *req, nfsstat3 stat,struct nfs3_fh *dirfh,
+ uint64_t cverf, struct iatt *dirstat, gf_dirent_t *entries,
+ count3 dircount, count3 maxcount, int is_eof)
+{
+ readdirp3res res = {0, };
+ uint64_t deviceid = 0;
+
+ deviceid = nfs3_request_xlator_deviceid (req);
+ nfs3_fill_readdirp3res (&res, stat, dirfh, cverf, dirstat, entries,
+ dircount, maxcount, is_eof, deviceid);
+ nfs3svc_submit_reply (req, (void *)&res,
+ (nfs3_serializer) xdr_serialize_readdirp3res);
+ nfs3_free_readdirp3res (&res);
+
+ return 0;
+}
+
+
+int
+nfs3_readdir_reply (rpcsvc_request_t *req, nfsstat3 stat, struct nfs3_fh *dirfh,
+ uint64_t cverf, struct iatt *dirstat, gf_dirent_t *entries,
+ count3 count, int is_eof)
+{
+ readdir3res res = {0, };
+ uint64_t deviceid = 0;
+
+ deviceid = nfs3_request_xlator_deviceid (req);
+ nfs3_fill_readdir3res (&res, stat, dirfh, cverf, dirstat, entries, count
+ , is_eof, deviceid);
+ nfs3svc_submit_reply (req, (void *)&res,
+ (nfs3_serializer) xdr_serialize_readdir3res);
+ nfs3_free_readdir3res (&res);
+
+ return 0;
+}
+
+
+int32_t
+nfs3svc_readdir_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int is_eof = 0;
+ nfs3_call_state_t *cs = NULL;
+
+ cs = frame->local;
+ if (op_ret == -1) {
+ gf_log (GF_NFS, GF_LOG_WARNING,
+ "%x: %s => -1 (%s)", rpcsvc_request_xid (cs->req),
+ cs->resolvedloc.path, strerror (op_errno));
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
+ goto nfs3err;
+ }
+
+ /* Check whether we encountered a end of directory stream while
+ * readdir'ing.
+ */
+ if (cs->operrno == ENOENT) {
+ gf_log (GF_NFS3, GF_LOG_TRACE, "Reached end-of-directory");
+ is_eof = 1;
+ }
+
+ stat = NFS3_OK;
+nfs3err:
+ if (cs->maxcount == 0) {
+ nfs3_log_readdir_res (rpcsvc_request_xid (cs->req), stat,
+ op_errno, (uintptr_t)cs->fd,
+ cs->dircount, is_eof);
+ nfs3_readdir_reply (cs->req, stat, &cs->parent,
+ (uintptr_t)cs->fd, buf, &cs->entries,
+ cs->dircount, is_eof);
+ } else {
+ nfs3_log_readdirp_res (rpcsvc_request_xid (cs->req), stat,
+ op_errno, (uintptr_t)cs->fd,
+ cs->dircount, cs->maxcount, is_eof);
+ nfs3_readdirp_reply (cs->req, stat, &cs->parent,
+ (uintptr_t)cs->fd, buf,
+ &cs->entries, cs->dircount,
+ cs->maxcount, is_eof);
+ }
+
+ if (is_eof) {
+ /* do nothing */
+ }
+
+ nfs3_call_state_wipe (cs);
+ return 0;
+}
+
+
+int32_t
+nfs3svc_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int ret = -EFAULT;
+ nfs_user_t nfu = {0, };
+ nfs3_call_state_t *cs = NULL;
+
+ cs = frame->local;
+ if (op_ret == -1) {
+ gf_log (GF_NFS, GF_LOG_WARNING,
+ "%x: %s => -1 (%s)", rpcsvc_request_xid (cs->req),
+ cs->resolvedloc.path, strerror (op_errno));
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
+ goto err;
+ }
+
+ cs->operrno = op_errno;
+ list_splice_init (&entries->list, &cs->entries.list);
+ nfs_request_user_init (&nfu, cs->req);
+ ret = nfs_fstat (cs->nfsx, cs->vol, &nfu, cs->fd,
+ nfs3svc_readdir_fstat_cbk, cs);
+ if (ret < 0) {
+ op_ret = -1;
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+ op_errno = -ret;
+ }
+
+err:
+ if (op_ret >= 0)
+ goto ret;
+
+ if (cs->maxcount == 0) {
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_READDIR, stat, op_errno);
+ nfs3_readdir_reply (cs->req, stat, NULL, 0, NULL, NULL, 0, 0);
+ } else {
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_READDIRP, stat, op_errno);
+ nfs3_readdirp_reply (cs->req, stat, NULL, 0, NULL, NULL,
+ 0, 0, 0);
+ }
+
+ /* For directories, we force a purge from the fd cache on close
+ * so that next time the dir is read, we'll get any changed directory
+ * entries.
+ */
+ nfs3_call_state_wipe (cs);
+ret:
+ return 0;
+}
+
+int
+nfs3_readdir_process (nfs3_call_state_t *cs)
+{
+ int ret = -EFAULT;
+ nfs_user_t nfu = {0, };
+
+ if (!cs)
+ return ret;
+
+ nfs_request_user_init (&nfu, cs->req);
+ ret = nfs_readdirp (cs->nfsx, cs->vol, &nfu, cs->fd, cs->dircount,
+ cs->cookie, nfs3svc_readdir_cbk, cs);
+ return ret;
+}
+
+
+int
+nfs3_readdir_read_resume (void *carg)
+{
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int ret = -EFAULT;
+ nfs3_call_state_t *cs = NULL;
+ struct nfs3_state *nfs3 = NULL;
+
+ if (!carg)
+ return ret;
+
+ cs = (nfs3_call_state_t *)carg;
+ nfs3_check_fh_resolve_status (cs, stat, nfs3err);
+ nfs3 = rpcsvc_request_program_private (cs->req);
+ ret = nfs3_verify_dircookie (nfs3, cs->fd, cs->cookie, cs->cookieverf,
+ &stat);
+ if (ret < 0) /* Stat already set by verifier function above. */
+ goto nfs3err;
+
+ ret = nfs3_readdir_process (cs);
+ if (ret < 0)
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+nfs3err:
+ if (ret < 0) {
+ if (cs->maxcount == 0) {
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_READDIR, stat, -ret);
+ nfs3_readdir_reply (cs->req, stat, NULL, 0, NULL, NULL,
+ 0, 0);
+ } else {
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_READDIRP, stat, -ret);
+ nfs3_readdirp_reply (cs->req, stat, NULL, 0, NULL, NULL,
+ 0, 0, 0);
+ }
+ nfs3_call_state_wipe (cs);
+ }
+
+ return 0;
+}
+
+
+int32_t
+nfs3svc_readdir_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ dict_t *xdata)
+{
+ /*
+ * We don't really need this, it's just an artifact of forcing the
+ * opendir to happen.
+ */
+ if (fd) {
+ fd_unref(fd);
+ }
+
+ return 0;
+}
+
+
+int
+nfs3_readdir_open_resume (void *carg)
+{
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int ret = -EFAULT;
+ nfs3_call_state_t *cs = NULL;
+ nfs_user_t nfu = {0, };
+
+ if (!carg)
+ return ret;
+
+ cs = (nfs3_call_state_t *)carg;
+ nfs3_check_fh_resolve_status (cs, stat, nfs3err);
+ cs->fd = fd_anonymous (cs->resolvedloc.inode);
+ if (!cs->fd) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Faile to create anonymous fd");
+ goto nfs3err;
+ }
+
+ /*
+ * NFS client will usually send us a readdirp without an opendir,
+ * which would cause us to skip our usual self-heal checks which occur
+ * in opendir for native protocol. To make sure those checks do happen,
+ * our most reliable option is to do our own opendir for any readdirp
+ * at the beginning of the directory.
+ */
+ if (cs->cookie == 0) {
+ nfs_request_user_init (&nfu, cs->req);
+ ret = nfs_opendir (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
+ nfs3svc_readdir_opendir_cbk, cs);
+ if (ret < 0) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "auto-opendir failed");
+ }
+ }
+
+ ret = nfs3_readdir_read_resume (cs);
+ if (ret < 0)
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+
+nfs3err:
+ if (ret < 0) {
+ if (cs->maxcount == 0) {
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_READDIR, stat, -ret);
+ nfs3_readdir_reply (cs->req, stat, NULL, 0, NULL, NULL,
+ 0, 0);
+ } else {
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_READDIRP, stat, -ret);
+ nfs3_readdirp_reply (cs->req, stat, NULL, 0, NULL, NULL,
+ 0, 0, 0);
+ }
+ nfs3_call_state_wipe (cs);
+ }
+
+ return ret;
+}
+
+
+
+int
+nfs3_readdir (rpcsvc_request_t *req, struct nfs3_fh *fh, cookie3 cookie,
+ uint64_t cverf, count3 dircount, count3 maxcount)
+{
+ xlator_t *vol = NULL;
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int ret = -EFAULT;
+ struct nfs3_state *nfs3 = NULL;
+ nfs3_call_state_t *cs = NULL;
+
+ if ((!req) || (!fh)) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Bad arguments");
+ return -1;
+ }
+
+ nfs3_log_readdir_call (rpcsvc_request_xid (req), fh, dircount,
+ maxcount);
+ nfs3_validate_gluster_fh (fh, stat, nfs3err);
+ nfs3_validate_nfs3_state (req, nfs3, stat, nfs3err, ret);
+ nfs3_map_fh_to_volume (nfs3, fh, req, vol, stat, nfs3err);
+ nfs3_volume_started_check (nfs3, vol, ret, out);
+ nfs3_handle_call_state_init (nfs3, cs, req, vol, stat, nfs3err);
+
+ cs->cookieverf = cverf;
+ cs->dircount = dircount;
+ cs->maxcount = maxcount;
+ cs->cookie = cookie;
+ cs->parent = *fh;
+ ret = nfs3_fh_resolve_and_resume (cs, fh, NULL,
+ nfs3_readdir_open_resume);
+ if (ret < 0)
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+
+nfs3err:
+ if (ret < 0) {
+ if (maxcount == 0) {
+ nfs3_log_common_res (rpcsvc_request_xid (req),
+ NFS3_READDIR, stat, -ret);
+ nfs3_readdir_reply (req, stat, NULL, 0, NULL, NULL, 0,
+ 0);
+ } else {
+ nfs3_log_common_res (rpcsvc_request_xid (req),
+ NFS3_READDIRP, stat, -ret);
+ nfs3_readdirp_reply (req, stat, NULL, 0, NULL, NULL, 0,
+ 0, 0);
+ }
+ /* Ret must be NULL after this so that the caller does not
+ * also send an RPC reply.
+ */
+ ret = 0;
+ nfs3_call_state_wipe (cs);
+ }
+out:
+ return ret;
+}
+
+
+int
+nfs3svc_readdir (rpcsvc_request_t *req)
+{
+ readdir3args ra;
+ struct nfs3_fh fh = {{0},};
+ int ret = RPCSVC_ACTOR_ERROR;
+ uint64_t verf = 0;
+ uint64_t *cval;
+
+ if (!req)
+ return ret;
+ nfs3_prep_readdir3args (&ra, &fh);
+ if (xdr_to_readdir3args (req->msg[0], &ra) <= 0) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Error decoding args");
+ rpcsvc_request_seterr (req, GARBAGE_ARGS);
+ goto rpcerr;
+ }
+ cval = (uint64_t *) ra.cookieverf;
+ verf = *cval;
+
+ ret = nfs3_readdir (req, &fh, ra.cookie, verf, ra.count, 0);
+ if ((ret < 0) && (ret != RPCSVC_ACTOR_IGNORE)) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "READDIR procedure failed");
+ rpcsvc_request_seterr (req, SYSTEM_ERR);
+ ret = RPCSVC_ACTOR_ERROR;
+ }
+
+rpcerr:
+ return ret;
+}
+
+
+int
+nfs3svc_readdirp (rpcsvc_request_t *req)
+{
+ readdirp3args ra;
+ struct nfs3_fh fh = {{0},};
+ int ret = RPCSVC_ACTOR_ERROR;
+ uint64_t cverf = 0;
+ uint64_t *cval;
+
+ if (!req)
+ return ret;
+ nfs3_prep_readdirp3args (&ra, &fh);
+ if (xdr_to_readdirp3args (req->msg[0], &ra) <= 0) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Error decoding args");
+ rpcsvc_request_seterr (req, GARBAGE_ARGS);
+ goto rpcerr;
+ }
+ cval = (uint64_t *) ra.cookieverf;
+ cverf = *cval;
+
+ ret = nfs3_readdir (req, &fh, ra.cookie, cverf, ra.dircount,
+ ra.maxcount);
+ if ((ret < 0) && (ret != RPCSVC_ACTOR_IGNORE)) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "READDIRP procedure failed");
+ rpcsvc_request_seterr (req, SYSTEM_ERR);
+ ret = RPCSVC_ACTOR_ERROR;
+ }
+
+rpcerr:
+ return ret;
+}
+
+
+int
+nfs3_fsstat_reply (rpcsvc_request_t *req, nfsstat3 stat, struct statvfs *fsbuf,
+ struct iatt *postbuf)
+{
+ fsstat3res res = {0, };
+ uint64_t deviceid = 0;
+
+ deviceid = nfs3_request_xlator_deviceid (req);
+ nfs3_fill_fsstat3res (&res, stat, fsbuf, postbuf, deviceid);
+ return nfs3svc_submit_reply (req, &res,
+ (nfs3_serializer)xdr_serialize_fsstat3res);
+
+}
+
+
+int32_t
+nfs3_fsstat_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ nfs3_call_state_t *cs = NULL;
+
+ cs = frame->local;
+ if (op_ret == -1) {
+ gf_log (GF_NFS, GF_LOG_WARNING,
+ "%x: %s => -1 (%s)", rpcsvc_request_xid (cs->req),
+ cs->resolvedloc.path, strerror (op_errno));
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
+ } else
+ stat = NFS3_OK;
+
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req), NFS3_FSSTAT, stat,
+ op_errno);
+ nfs3_fsstat_reply (cs->req, stat, &cs->fsstat, buf);
+ nfs3_call_state_wipe (cs);
+ return 0;
+}
+
+
+int32_t
+nfs3_fsstat_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf,
+ dict_t *xdata)
+{
+ nfs_user_t nfu = {0, };
+ int ret = -EFAULT;
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ nfs3_call_state_t *cs = NULL;
+
+ cs = frame->local;
+ if (op_ret == -1) {
+ gf_log (GF_NFS, GF_LOG_WARNING,
+ "%x: %s => -1 (%s)", rpcsvc_request_xid (cs->req),
+ cs->resolvedloc.path, strerror (op_errno));
+ ret = -op_errno;
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
+ goto err;
+ }
+
+ /* Then get the stat for the fs root in order to fill in the
+ * post_op_attr.
+ */
+ cs->fsstat = *buf;
+ nfs_request_user_init (&nfu, cs->req);
+ ret = nfs_stat (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
+ nfs3_fsstat_stat_cbk, cs);
+ if (ret < 0)
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+
+err:
+ if (ret < 0) {
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req), NFS3_FSSTAT,
+ stat, -ret);
+ nfs3_fsstat_reply (cs->req, stat, NULL, NULL);
+ nfs3_call_state_wipe (cs);
+ }
+
+ return 0;
+}
+
+
+int
+nfs3_fsstat_resume (void *carg)
+{
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int ret = -EFAULT;
+ nfs3_call_state_t *cs = NULL;
+ nfs_user_t nfu = {0, };
+
+ if (!carg)
+ return ret;
+
+ cs = (nfs3_call_state_t *)carg;
+ nfs3_check_fh_resolve_status (cs, stat, nfs3err);
+ nfs_request_user_init (&nfu, cs->req);
+ /* First, we need to get the statvfs for the subvol */
+ ret = nfs_statfs (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
+ nfs3_fsstat_statfs_cbk, cs);
+ if (ret < 0)
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+
+nfs3err:
+ if (ret < 0) {
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req), NFS3_FSSTAT,
+ stat, -ret);
+ nfs3_fsstat_reply (cs->req, stat, NULL, NULL);
+ nfs3_call_state_wipe (cs);
+ }
+
+ return ret;
+}
+
+
+
+int
+nfs3_fsstat (rpcsvc_request_t *req, struct nfs3_fh *fh)
+{
+ xlator_t *vol = NULL;
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int ret = -EFAULT;
+ struct nfs3_state *nfs3 = NULL;
+ nfs3_call_state_t *cs = NULL;
+
+ if ((!req) || (!fh)) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Bad arguments");
+ return -1;
+ }
+
+ nfs3_log_common_call (rpcsvc_request_xid (req), "FSSTAT", fh);
+ nfs3_validate_gluster_fh (fh, stat, nfs3err);
+ nfs3_validate_nfs3_state (req, nfs3, stat, nfs3err, ret);
+ nfs3_map_fh_to_volume (nfs3, fh, req, vol, stat, nfs3err);
+ nfs3_volume_started_check (nfs3, vol, ret, out);
+ nfs3_handle_call_state_init (nfs3, cs, req, vol, stat, nfs3err);
+
+ ret = nfs3_fh_resolve_and_resume (cs, fh, NULL, nfs3_fsstat_resume);
+ if (ret < 0)
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+
+nfs3err:
+ if (ret < 0) {
+ nfs3_log_common_res (rpcsvc_request_xid (req), NFS3_FSSTAT,
+ stat, -ret);
+ nfs3_fsstat_reply (req, stat, NULL, NULL);
+ nfs3_call_state_wipe (cs);
+ /* Ret must be 0 after this so that the caller does not
+ * also send an RPC reply.
+ */
+ ret = 0;
+ }
+out:
+ return ret;
+}
+
+
+int
+nfs3svc_fsstat (rpcsvc_request_t *req)
+{
+ struct nfs3_fh fh = {{0}, };
+ fsstat3args args;
+ int ret = RPCSVC_ACTOR_ERROR;
+
+ if (!req)
+ return ret;
+ nfs3_prep_fsstat3args (&args, &fh);
+ if (xdr_to_fsstat3args (req->msg[0], &args) <= 0) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Error decoding args");
+ rpcsvc_request_seterr (req, GARBAGE_ARGS);
+ goto rpcerr;
+ }
+
+ ret = nfs3_fsstat (req, &fh);
+ if ((ret < 0) && (ret != RPCSVC_ACTOR_IGNORE)) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "FSTAT procedure failed");
+ rpcsvc_request_seterr (req, SYSTEM_ERR);
+ ret = RPCSVC_ACTOR_ERROR;
+ }
+
+rpcerr:
+ return ret;
+}
+
+
+int
+nfs3_fsinfo_reply (rpcsvc_request_t *req, nfsstat3 status, struct iatt *fsroot)
+{
+ fsinfo3res res;
+ struct nfs3_state *nfs3 = NULL;
+ uint64_t deviceid = 0;
+
+ deviceid = nfs3_request_xlator_deviceid (req);
+ nfs3 = rpcsvc_request_program_private (req);
+ nfs3_fill_fsinfo3res (nfs3, &res, status, fsroot, deviceid);
+
+ nfs3svc_submit_reply (req, &res,
+ (nfs3_serializer)xdr_serialize_fsinfo3res);
+ return 0;
+}
+
+
+int32_t
+nfs3svc_fsinfo_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ nfsstat3 status = NFS3ERR_SERVERFAULT;
+ nfs3_call_state_t *cs = NULL;
+
+ cs = frame->local;
+
+ if (op_ret == -1) {
+ gf_log (GF_NFS, GF_LOG_WARNING,
+ "%x: %s => -1 (%s)", rpcsvc_request_xid (cs->req),
+ cs->resolvedloc.path, strerror (op_errno));
+ status = nfs3_cbk_errno_status (op_ret, op_errno);
+ }else
+ status = NFS3_OK;
+
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req), NFS3_FSINFO, status,
+ op_errno);
+
+ nfs3_fsinfo_reply (cs->req, status, buf);
+ nfs3_call_state_wipe (cs);
+
+ return 0;
+}
+
+
+int
+nfs3_fsinfo_resume (void *carg)
+{
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int ret = -EFAULT;
+ nfs_user_t nfu = {0, };
+ nfs3_call_state_t *cs = NULL;
+
+
+ if (!carg)
+ return ret;
+
+ cs = (nfs3_call_state_t *)carg;
+ nfs3_check_fh_resolve_status (cs, stat, nfs3err);
+ nfs_request_user_init (&nfu, cs->req);
+
+ ret = nfs_stat (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
+ nfs3svc_fsinfo_cbk, cs);
+ if (ret < 0)
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+
+nfs3err:
+ if (ret < 0) {
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req), NFS3_FSINFO,
+ stat, -ret);
+ nfs3_fsinfo_reply (cs->req, stat, NULL);
+ nfs3_call_state_wipe (cs);
+ }
+
+ return ret;
+}
+
+
+int
+nfs3_fsinfo (rpcsvc_request_t *req, struct nfs3_fh *fh)
+{
+ xlator_t *vol = NULL;
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int ret = -EFAULT;
+ struct nfs3_state *nfs3 = NULL;
+ nfs3_call_state_t *cs = NULL;
+
+ if ((!req) || (!fh)) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Bad arguments");
+ return -1;
+ }
+
+ nfs3_log_common_call (rpcsvc_request_xid (req), "FSINFO", fh);
+ nfs3_validate_gluster_fh (fh, stat, nfs3err);
+ nfs3_validate_nfs3_state (req, nfs3, stat, nfs3err, ret);
+ nfs3_map_fh_to_volume (nfs3, fh, req, vol, stat, nfs3err);
+ nfs3_volume_started_check (nfs3, vol, ret, out);
+ nfs3_handle_call_state_init (nfs3, cs, req, vol, stat, nfs3err);
+
+ ret = nfs3_fh_resolve_and_resume (cs, fh, NULL, nfs3_fsinfo_resume);
+ if (ret < 0)
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+
+nfs3err:
+ if (ret < 0) {
+ nfs3_log_common_res (rpcsvc_request_xid (req), NFS3_FSINFO,
+ stat, -ret);
+ nfs3_fsinfo_reply (req, stat, NULL);
+ nfs3_call_state_wipe (cs);
+ ret = 0;
+ }
+out:
+ return ret;
+}
+
+
+int
+nfs3svc_fsinfo (rpcsvc_request_t *req)
+{
+ int ret = RPCSVC_ACTOR_ERROR;
+ fsinfo3args args;
+ struct nfs3_fh root = {{0}, };
+
+ if (!req)
+ return ret;
+
+ nfs3_prep_fsinfo3args (&args, &root);
+ if (xdr_to_fsinfo3args (req->msg[0], &args) <= 0) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Error decoding arguments");
+ rpcsvc_request_seterr (req, GARBAGE_ARGS);
+ goto rpcerr;
+ }
+
+ ret = nfs3_fsinfo (req, &root);
+ if ((ret < 0) && (ret != RPCSVC_ACTOR_IGNORE)) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "FSINFO procedure failed");
+ rpcsvc_request_seterr (req, SYSTEM_ERR);
+ ret = RPCSVC_ACTOR_ERROR;
+ }
+
+rpcerr:
+ return ret;
+}
+
+
+int
+nfs3_pathconf_reply (rpcsvc_request_t *req, nfsstat3 stat, struct iatt *buf)
+{
+ pathconf3res res = {0, };
+ uint64_t deviceid = 0;
+
+ deviceid = nfs3_request_xlator_deviceid (req);
+ nfs3_fill_pathconf3res (&res, stat, buf, deviceid);
+ nfs3svc_submit_reply (req, (void *)&res,
+ (nfs3_serializer)xdr_serialize_pathconf3res);
+ return 0;
+}
+
+
+int32_t
+nfs3svc_pathconf_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ struct iatt *sbuf = NULL;
+ nfs3_call_state_t *cs = NULL;
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+
+ cs = frame->local;
+ if (op_ret == -1) {
+ gf_log (GF_NFS, GF_LOG_WARNING,
+ "%x: %s => -1 (%s)", rpcsvc_request_xid (cs->req),
+ cs->resolvedloc.path, strerror (op_errno));
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
+ } else {
+ /* If stat fop failed, we can still send the other components
+ * in a pathconf reply.
+ */
+ sbuf = buf;
+ stat = NFS3_OK;
+ }
+
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req), NFS3_PATHCONF, stat,
+ op_errno);
+ nfs3_pathconf_reply (cs->req, stat, sbuf);
+ nfs3_call_state_wipe (cs);
+
+ return 0;
+}
+
+
+int
+nfs3_pathconf_resume (void *carg)
+{
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int ret = -EFAULT;
+ nfs_user_t nfu = {0, };
+ nfs3_call_state_t *cs = NULL;
+
+ if (!carg)
+ return ret;
+
+ cs = (nfs3_call_state_t *)carg;
+ nfs3_check_fh_resolve_status (cs, stat, nfs3err);
+ nfs_request_user_init (&nfu, cs->req);
+ ret = nfs_stat (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc,
+ nfs3svc_pathconf_cbk, cs);
+ if (ret < 0)
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+nfs3err:
+ if (ret < 0) {
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req),
+ NFS3_PATHCONF, stat, -ret);
+ nfs3_pathconf_reply (cs->req, stat, NULL);
+ nfs3_call_state_wipe (cs);
+ }
+
+ return ret;
+}
+
+int
+nfs3_pathconf (rpcsvc_request_t *req, struct nfs3_fh *fh)
+{
+ xlator_t *vol = NULL;
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int ret = -EFAULT;
+ struct nfs3_state *nfs3 = NULL;
+ nfs3_call_state_t *cs = NULL;
+
+ if ((!req) || (!fh)) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Bad arguments");
+ return -1;
+ }
+
+ nfs3_log_common_call (rpcsvc_request_xid (req), "PATHCONF", fh);
+ nfs3_validate_gluster_fh (fh, stat, nfs3err);
+ nfs3_validate_nfs3_state (req, nfs3, stat, nfs3err, ret);
+ nfs3_map_fh_to_volume (nfs3, fh, req, vol, stat, nfs3err);
+ nfs3_volume_started_check (nfs3, vol, ret, out);
+ nfs3_handle_call_state_init (nfs3, cs, req, vol, stat, nfs3err);
+
+ ret = nfs3_fh_resolve_and_resume (cs, fh, NULL, nfs3_pathconf_resume);
+ if (ret < 0)
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+
+nfs3err:
+ if (ret < 0) {
+ nfs3_log_common_res (rpcsvc_request_xid (req), NFS3_PATHCONF,
+ stat, -ret);
+ nfs3_pathconf_reply (req, stat, NULL);
+ nfs3_call_state_wipe (cs);
+ /* Ret must be 0 after this so that the caller does not
+ * also send an RPC reply.
+ */
+ ret = 0;
+ }
+out:
+ return ret;
+}
+
+
+int
+nfs3svc_pathconf (rpcsvc_request_t *req)
+{
+ struct nfs3_fh fh = {{0}, };
+ pathconf3args args;
+ int ret = RPCSVC_ACTOR_ERROR;
+
+ if (!req)
+ return ret;
+ nfs3_prep_pathconf3args (&args, &fh);
+ if (xdr_to_pathconf3args (req->msg[0], &args) <= 0) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Error decoding args");
+ rpcsvc_request_seterr (req, GARBAGE_ARGS);
+ goto rpcerr;
+ }
+
+ ret = nfs3_pathconf (req, &fh);
+ if ((ret < 0) && (ret != RPCSVC_ACTOR_IGNORE)) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "PATHCONF procedure failed");
+ rpcsvc_request_seterr (req, SYSTEM_ERR);
+ ret = RPCSVC_ACTOR_ERROR;
+ }
+
+rpcerr:
+ return ret;
+}
+
+int
+nfs3_commit_reply (rpcsvc_request_t *req, nfsstat3 stat, uint64_t wverf,
+ struct iatt *prestat, struct iatt *poststat)
+{
+ commit3res res = {0, };
+ uint64_t deviceid = 0;
+
+ deviceid = nfs3_request_xlator_deviceid (req);
+ nfs3_fill_commit3res (&res, stat, wverf, prestat, poststat, deviceid);
+ nfs3svc_submit_reply (req, (void *)&res,
+ (nfs3_serializer)xdr_serialize_commit3res);
+
+ return 0;
+}
+
+
+int32_t
+nfs3svc_commit_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ nfs3_call_state_t *cs = NULL;
+ struct nfs3_state *nfs3 = NULL;
+
+ cs = frame->local;
+ if (op_ret == -1) {
+ gf_log (GF_NFS, GF_LOG_WARNING,
+ "%x: %s => -1 (%s)", rpcsvc_request_xid (cs->req),
+ cs->resolvedloc.path, strerror (op_errno));
+ stat = nfs3_cbk_errno_status (op_ret, op_errno);
+ } else
+ stat = NFS3_OK;
+
+ nfs3 = rpcsvc_request_program_private (cs->req);
+ nfs3_log_commit_res (rpcsvc_request_xid (cs->req), stat, op_errno,
+ nfs3->serverstart);
+ nfs3_commit_reply (cs->req, stat, nfs3->serverstart, NULL, NULL);
+ nfs3_call_state_wipe (cs);
+
+ return 0;
+}
+
+int
+nfs3_commit_resume (void *carg)
+{
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int ret = -EFAULT;
+ nfs_user_t nfu = {0, };
+ nfs3_call_state_t *cs = NULL;
+
+ if (!carg)
+ return ret;
+
+ cs = (nfs3_call_state_t *)carg;
+ nfs3_check_fh_resolve_status (cs, stat, nfs3err);
+
+ if (nfs3_export_sync_trusted (cs->nfs3state, cs->resolvefh.exportid)) {
+ ret = -1;
+ stat = NFS3_OK;
+ goto nfs3err;
+ }
+
+ nfs_request_user_init (&nfu, cs->req);
+ ret = nfs_flush (cs->nfsx, cs->vol, &nfu, cs->fd,
+ nfs3svc_commit_cbk, cs);
+ if (ret < 0)
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+
+nfs3err:
+ if (ret < 0) {
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req), NFS3_COMMIT,
+ stat, -ret);
+ nfs3_commit_reply (cs->req, stat, cs->nfs3state->serverstart,
+ NULL, NULL);
+ nfs3_call_state_wipe (cs);
+ ret = 0;
+ }
+
+ return 0;
+}
+
+
+int
+nfs3_commit_open_resume (void *carg)
+{
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int ret = -EFAULT;
+ nfs3_call_state_t *cs = NULL;
+
+ if (!carg)
+ return ret;
+
+ cs = (nfs3_call_state_t *)carg;
+ nfs3_check_fh_resolve_status (cs, stat, nfs3err);
+ cs->fd = fd_anonymous (cs->resolvedloc.inode);
+ if (!cs->fd) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to create anonymous fd.");
+ goto nfs3err;
+ }
+
+ ret = nfs3_commit_resume (cs);
+ if (ret < 0)
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+nfs3err:
+ if (ret < 0) {
+ nfs3_log_common_res (rpcsvc_request_xid (cs->req), NFS3_COMMIT,
+ stat, -ret);
+ nfs3_commit_reply (cs->req, stat, 0, NULL, NULL);
+ nfs3_call_state_wipe (cs);
+ }
+
+ return ret;
+}
+
+
+
+int
+nfs3_commit (rpcsvc_request_t *req, struct nfs3_fh *fh, offset3 offset,
+ count3 count)
+{
+ xlator_t *vol = NULL;
+ nfsstat3 stat = NFS3ERR_SERVERFAULT;
+ int ret = -EFAULT;
+ struct nfs3_state *nfs3 = NULL;
+ nfs3_call_state_t *cs = NULL;
+
+ if ((!req) || (!fh)) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Bad arguments");
+ return -1;
+ }
+
+ nfs3_log_rw_call (rpcsvc_request_xid (req), "COMMIT", fh, offset,
+ count, -1);
+ nfs3_validate_gluster_fh (fh, stat, nfs3err);
+ nfs3_validate_nfs3_state (req, nfs3, stat, nfs3err, ret);
+ nfs3_map_fh_to_volume (nfs3, fh, req, vol, stat, nfs3err);
+ nfs3_volume_started_check (nfs3, vol, ret, out);
+ nfs3_check_rw_volaccess (nfs3, fh->exportid, stat, nfs3err);
+ nfs3_handle_call_state_init (nfs3, cs, req, vol, stat, nfs3err);
+
+ cs->datacount = count;
+ cs->dataoffset = offset;
+ ret = nfs3_fh_resolve_and_resume (cs, fh, NULL,
+ nfs3_commit_open_resume);
+ if (ret < 0)
+ stat = nfs3_errno_to_nfsstat3 (-ret);
+
+nfs3err:
+ if (ret < 0) {
+ nfs3_log_common_res (rpcsvc_request_xid (req), NFS3_COMMIT,
+ stat, -ret);
+ nfs3_commit_reply (req, stat, 0, NULL, NULL);
+ nfs3_call_state_wipe (cs);
+ ret = 0;
+ }
+out:
+ return ret;
+}
+
+
+
+int
+nfs3svc_commit (rpcsvc_request_t *req)
+{
+ struct nfs3_fh fh = {{0}, };
+ commit3args args;
+ int ret = RPCSVC_ACTOR_ERROR;
+
+ if (!req)
+ return ret;
+ nfs3_prep_commit3args (&args, &fh);
+ if (xdr_to_commit3args (req->msg[0], &args) <= 0) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Error decoding args");
+ rpcsvc_request_seterr (req, GARBAGE_ARGS);
+ goto rpcerr;
+ }
+
+ ret = nfs3_commit (req, &fh, args.offset, args.count);
+ if ((ret < 0) && (ret != RPCSVC_ACTOR_IGNORE)) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "COMMIT procedure failed");
+ rpcsvc_request_seterr (req, SYSTEM_ERR);
+ ret = RPCSVC_ACTOR_ERROR;
+ }
+
+rpcerr:
+ return ret;
+}
+
+
+rpcsvc_actor_t nfs3svc_actors[NFS3_PROC_COUNT] = {
+ {"NULL", NFS3_NULL, nfs3svc_null, NULL, 0, DRC_IDEMPOTENT},
+ {"GETATTR", NFS3_GETATTR, nfs3svc_getattr, NULL, 0, DRC_IDEMPOTENT},
+ {"SETATTR", NFS3_SETATTR, nfs3svc_setattr, NULL, 0, DRC_NON_IDEMPOTENT},
+ {"LOOKUP", NFS3_LOOKUP, nfs3svc_lookup, NULL, 0, DRC_IDEMPOTENT},
+ {"ACCESS", NFS3_ACCESS, nfs3svc_access, NULL, 0, DRC_IDEMPOTENT},
+ {"READLINK", NFS3_READLINK, nfs3svc_readlink, NULL, 0, DRC_IDEMPOTENT},
+ {"READ", NFS3_READ, nfs3svc_read, NULL, 0, DRC_IDEMPOTENT},
+ {"WRITE", NFS3_WRITE, nfs3svc_write, nfs3svc_write_vecsizer, 0, DRC_NON_IDEMPOTENT},
+ {"CREATE", NFS3_CREATE, nfs3svc_create, NULL, 0, DRC_NON_IDEMPOTENT},
+ {"MKDIR", NFS3_MKDIR, nfs3svc_mkdir, NULL, 0, DRC_NON_IDEMPOTENT},
+ {"SYMLINK", NFS3_SYMLINK, nfs3svc_symlink, NULL, 0, DRC_NON_IDEMPOTENT},
+ {"MKNOD", NFS3_MKNOD, nfs3svc_mknod, NULL, 0, DRC_NON_IDEMPOTENT},
+ {"REMOVE", NFS3_REMOVE, nfs3svc_remove, NULL, 0, DRC_NON_IDEMPOTENT},
+ {"RMDIR", NFS3_RMDIR, nfs3svc_rmdir, NULL, 0, DRC_NON_IDEMPOTENT},
+ {"RENAME", NFS3_RENAME, nfs3svc_rename, NULL, 0, DRC_NON_IDEMPOTENT},
+ {"LINK", NFS3_LINK, nfs3svc_link, NULL, 0, DRC_NON_IDEMPOTENT},
+ {"READDIR", NFS3_READDIR, nfs3svc_readdir, NULL, 0, DRC_IDEMPOTENT},
+ {"READDIRPLUS", NFS3_READDIRP, nfs3svc_readdirp, NULL, 0, DRC_IDEMPOTENT},
+ {"FSSTAT", NFS3_FSSTAT, nfs3svc_fsstat, NULL, 0, DRC_IDEMPOTENT},
+ {"FSINFO", NFS3_FSINFO, nfs3svc_fsinfo, NULL, 0, DRC_IDEMPOTENT},
+ {"PATHCONF", NFS3_PATHCONF, nfs3svc_pathconf, NULL, 0, DRC_IDEMPOTENT},
+ {"COMMIT", NFS3_COMMIT, nfs3svc_commit, NULL, 0, DRC_IDEMPOTENT}
+};
+
+
+rpcsvc_program_t nfs3prog = {
+ .progname = "NFS3",
+ .prognum = NFS_PROGRAM,
+ .progver = NFS_V3,
+ .progport = GF_NFS3_PORT,
+ .actors = nfs3svc_actors,
+ .numactors = NFS3_PROC_COUNT,
+
+ /* Requests like FSINFO are sent before an auth scheme
+ * is inited by client. See RFC 2623, Section 2.3.2. */
+ .min_auth = AUTH_NULL,
+};
+
+/*
+ * This function rounds up the input value to multiple of 4096. Min and Max
+ * supported I/O size limits are 4KB (GF_NFS3_FILE_IO_SIZE_MIN) and
+ * 1MB (GF_NFS3_FILE_IO_SIZE_MAX).
+ */
+void
+nfs3_iosize_roundup_4KB (uint64_t *ioszptr)
+{
+ uint64_t iosize;
+ uint64_t iopages;
+
+ if (!ioszptr)
+ return;
+
+ iosize = *ioszptr;
+ iopages = (iosize + GF_NFS3_IO_SIZE -1) >> GF_NFS3_IO_SHIFT;
+ iosize = (iopages * GF_NFS3_IO_SIZE);
+
+ /* Double check - boundary conditions */
+ if (iosize < GF_NFS3_FILE_IO_SIZE_MIN) {
+ iosize = GF_NFS3_FILE_IO_SIZE_MIN;
+ } else if (iosize > GF_NFS3_FILE_IO_SIZE_MAX) {
+ iosize = GF_NFS3_FILE_IO_SIZE_MAX;
+ }
+
+ *ioszptr = iosize;
+}
+
+int
+nfs3_init_options (struct nfs3_state *nfs3, dict_t *options)
+{
+ int ret = -1;
+ char *optstr = NULL;
+ uint64_t size64 = 0;
+
+ if ((!nfs3) || (!options))
+ return -1;
+
+ /* nfs3.read-size */
+ nfs3->readsize = GF_NFS3_RTPREF;
+ if (dict_get (options, "nfs3.read-size")) {
+ ret = dict_get_str (options, "nfs3.read-size", &optstr);
+ if (ret < 0) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to read "
+ " option: nfs3.read-size");
+ ret = -1;
+ goto err;
+ }
+
+ ret = gf_string2bytesize (optstr, &size64);
+ if (ret == -1) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to format"
+ " option: nfs3.read-size");
+ ret = -1;
+ goto err;
+ }
+
+ nfs3_iosize_roundup_4KB (&size64);
+ nfs3->readsize = size64;
+ }
+
+ /* nfs3.write-size */
+ nfs3->writesize = GF_NFS3_WTPREF;
+ if (dict_get (options, "nfs3.write-size")) {
+ ret = dict_get_str (options, "nfs3.write-size", &optstr);
+ if (ret < 0) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to read "
+ " option: nfs3.write-size");
+ ret = -1;
+ goto err;
+ }
+
+ ret = gf_string2bytesize (optstr, &size64);
+ if (ret == -1) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to format"
+ " option: nfs3.write-size");
+ ret = -1;
+ goto err;
+ }
+
+ nfs3_iosize_roundup_4KB (&size64);
+ nfs3->writesize = size64;
+ }
+
+ /* nfs3.readdir.size */
+ nfs3->readdirsize = GF_NFS3_DTPREF;
+ if (dict_get (options, "nfs3.readdir-size")) {
+ ret = dict_get_str (options,"nfs3.readdir-size", &optstr);
+ if (ret < 0) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to read"
+ " option: nfs3.readdir-size");
+ ret = -1;
+ goto err;
+ }
+
+ ret = gf_string2bytesize (optstr, &size64);
+ if (ret == -1) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to format"
+ " option: nfs3.readdir-size");
+ ret = -1;
+ goto err;
+ }
+
+ nfs3_iosize_roundup_4KB (&size64);
+ nfs3->readdirsize = size64;
+ }
+
+ /* We want to use the size of the biggest param for the io buffer size.
+ */
+ nfs3->iobsize = nfs3->readsize;
+ if (nfs3->iobsize < nfs3->writesize)
+ nfs3->iobsize = nfs3->writesize;
+ if (nfs3->iobsize < nfs3->readdirsize)
+ nfs3->iobsize = nfs3->readdirsize;
+
+ /* But this is the true size of each iobuf. We need this size to
+ * accommodate the NFS headers also in the same buffer. */
+ nfs3->iobsize = nfs3->iobsize * 2;
+
+ /* mem-factor */
+ nfs3->memfactor = GF_NFS3_DEFAULT_MEMFACTOR;
+ ret = 0;
+err:
+ return ret;
+}
+
+int
+nfs3_init_subvolume_options (xlator_t *nfsx,
+ struct nfs3_export *exp,
+ dict_t *options)
+{
+ int ret = -1;
+ char *optstr = NULL;
+ char searchkey[1024];
+ char *name = NULL;
+ gf_boolean_t boolt = _gf_false;
+ uuid_t volumeid = {0, };
+
+ if ((!nfsx) || (!exp))
+ return -1;
+
+ /* For init, fetch options from xlator but for
+ * reconfigure, take the parameter */
+ if (!options)
+ options = nfsx->options;
+
+ if (!options)
+ return (-1);
+
+ uuid_clear (volumeid);
+ if (gf_nfs_dvm_off (nfs_state (nfsx)))
+ goto no_dvm;
+
+ ret = snprintf (searchkey, 1024, "nfs3.%s.volume-id",exp->subvol->name);
+ if (ret < 0) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "snprintf failed");
+ ret = -1;
+ goto err;
+ }
+
+ if (dict_get (options, searchkey)) {
+ ret = dict_get_str (options, searchkey, &optstr);
+ if (ret < 0) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Failed to read option"
+ ": %s", searchkey);
+ ret = -1;
+ goto err;
+ }
+ } else {
+ gf_log (GF_MNT, GF_LOG_ERROR, "DVM is on but volume-id not "
+ "given for volume: %s", exp->subvol->name);
+ ret = -1;
+ goto err;
+ }
+
+ if (optstr) {
+ ret = uuid_parse (optstr, volumeid);
+ if (ret < 0) {
+ gf_log (GF_MNT, GF_LOG_ERROR, "Failed to parse volume "
+ "UUID");
+ ret = -1;
+ goto err;
+ }
+ uuid_copy (exp->volumeid, volumeid);
+ }
+
+no_dvm:
+ /* Volume Access */
+ name = exp->subvol->name;
+ ret = snprintf (searchkey, 1024, "nfs3.%s.volume-access", name);
+ if (ret < 0) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "snprintf failed");
+ ret = -1;
+ goto err;
+ }
+
+ exp->access = GF_NFS3_DEFAULT_VOLACCESS;
+ if (dict_get (options, searchkey)) {
+ ret = dict_get_str (options, searchkey, &optstr);
+ if (ret < 0) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to read "
+ " option: %s", searchkey);
+ ret = -1;
+ goto err;
+ }
+
+ if (strcmp (optstr, "read-only") == 0)
+ exp->access = GF_NFS3_VOLACCESS_RO;
+ }
+
+ ret = snprintf (searchkey, 1024, "rpc-auth.%s.unix", name);
+ if (ret < 0) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "snprintf failed");
+ ret = -1;
+ goto err;
+ }
+
+ if (dict_get (options, searchkey)) {
+ ret = dict_get_str (options, searchkey, &optstr);
+ if (ret < 0) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to read "
+ " option: %s", searchkey);
+ ret = -1;
+ goto err;
+ }
+ }
+
+ exp->trusted_sync = 0;
+ ret = snprintf (searchkey, 1024, "nfs3.%s.trusted-sync", name);
+ if (ret < 0) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "snprintf failed");
+ ret = -1;
+ goto err;
+ }
+
+ if (dict_get (options, searchkey)) {
+ ret = dict_get_str (options, searchkey, &optstr);
+ if (ret < 0) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to read "
+ " option: %s", searchkey);
+ ret = -1;
+ goto err;
+ }
+
+ ret = gf_string2boolean (optstr, &boolt);
+ if (ret < 0) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to convert str "
+ "to gf_boolean_t");
+ ret = -1;
+ goto err;
+ }
+
+ if (boolt == _gf_true)
+ exp->trusted_sync = 1;
+ }
+
+ exp->trusted_write = 0;
+ ret = snprintf (searchkey, 1024, "nfs3.%s.trusted-write", name);
+ if (ret < 0) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "snprintf failed");
+ ret = -1;
+ goto err;
+ }
+
+ if (dict_get (options, searchkey)) {
+ ret = dict_get_str (options, searchkey, &optstr);
+ if (ret < 0) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to read "
+ " option: %s", searchkey);
+ ret = -1;
+ goto err;
+ }
+
+ ret = gf_string2boolean (optstr, &boolt);
+ if (ret < 0) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to convert str "
+ "to gf_boolean_t");
+ ret = -1;
+ goto err;
+ }
+
+ if (boolt == _gf_true)
+ exp->trusted_write = 1;
+ }
+
+ /* If trusted-sync is on, then we also switch on trusted-write because
+ * tw is included in ts. In write logic, we're then only checking for
+ * tw.
+ */
+ if (exp->trusted_sync)
+ exp->trusted_write = 1;
+
+ gf_log (GF_NFS3, GF_LOG_TRACE, "%s: %s, %s, %s", exp->subvol->name,
+ (exp->access == GF_NFS3_VOLACCESS_RO)?"read-only":"read-write",
+ (exp->trusted_sync == 0)?"no trusted_sync":"trusted_sync",
+ (exp->trusted_write == 0)?"no trusted_write":"trusted_write");
+ ret = 0;
+err:
+ return ret;
+}
+
+
+struct nfs3_export *
+nfs3_init_subvolume (struct nfs3_state *nfs3, xlator_t *subvol)
+{
+ int ret = -1;
+ struct nfs3_export *exp = NULL;
+
+ if ((!nfs3) || (!subvol))
+ return NULL;
+
+ exp = GF_CALLOC (1, sizeof (*exp), gf_nfs_mt_nfs3_export);
+ exp->subvol = subvol;
+ INIT_LIST_HEAD (&exp->explist);
+ gf_log (GF_NFS3, GF_LOG_TRACE, "Initing state: %s", exp->subvol->name);
+
+ ret = nfs3_init_subvolume_options (nfs3->nfsx, exp, NULL);
+ if (ret == -1) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to init subvol");
+ goto exp_free;
+ }
+
+ ret = 0;
+exp_free:
+ if (ret < 0) {
+ GF_FREE (exp);
+ exp = NULL;
+ }
+
+ return exp;
+}
+
+
+int
+nfs3_init_subvolumes (struct nfs3_state *nfs3)
+{
+ int ret = -1;
+ struct xlator_list *xl_list = NULL;
+ struct nfs3_export *exp = NULL;
+
+ if (!nfs3)
+ return -1;
+
+ xl_list = nfs3->nfsx->children;
+
+ while (xl_list) {
+ exp = nfs3_init_subvolume (nfs3, xl_list->xlator);
+ if (!exp) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to init subvol: "
+ "%s", xl_list->xlator->name);
+ goto err;
+ }
+ list_add_tail (&exp->explist, &nfs3->exports);
+ xl_list = xl_list->next;
+ }
+
+ ret = 0;
+err:
+ return ret;
+}
+
+
+struct nfs3_state *
+nfs3_init_state (xlator_t *nfsx)
+{
+ struct nfs3_state *nfs3 = NULL;
+ int ret = -1;
+ unsigned int localpool = 0;
+ struct nfs_state *nfs = NULL;
+
+ if (!nfsx)
+ return NULL;
+
+ nfs3 = (struct nfs3_state *)GF_CALLOC (1, sizeof (*nfs3),
+ gf_nfs_mt_nfs3_state);
+ if (!nfs3) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Memory allocation failed");
+ return NULL;
+ }
+
+ nfs = nfsx->private;
+ ret = nfs3_init_options (nfs3, nfsx->options);
+ if (ret == -1) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to init options");
+ goto ret;
+ }
+
+ nfs3->iobpool = nfsx->ctx->iobuf_pool;
+
+ localpool = nfs3->memfactor * GF_NFS_CONCURRENT_OPS_MULT;
+ gf_log (GF_NFS3, GF_LOG_TRACE, "local pool: %d", localpool);
+ nfs3->localpool = mem_pool_new (nfs3_call_state_t, localpool);
+ if (!nfs3->localpool) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "local mempool creation failed");
+ ret = -1;
+ goto ret;
+ }
+
+ nfs3->nfsx = nfsx;
+ nfs3->exportslist = nfsx->children;
+ INIT_LIST_HEAD (&nfs3->exports);
+ ret = nfs3_init_subvolumes (nfs3);
+ if (ret == -1) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to init per-subvolume "
+ "state");
+ goto free_localpool;
+ }
+
+ nfs3->serverstart = (uint64_t)time (NULL);
+ INIT_LIST_HEAD (&nfs3->fdlru);
+ LOCK_INIT (&nfs3->fdlrulock);
+ nfs3->fdcount = 0;
+
+ ret = rpcsvc_create_listeners (nfs->rpcsvc, nfsx->options, nfsx->name);
+ if (ret == -1) {
+ gf_log (GF_NFS, GF_LOG_ERROR, "Unable to create listeners");
+ goto free_localpool;
+ }
+
+ nfs->nfs3state = nfs3;
+ ret = 0;
+
+free_localpool:
+ if (ret == -1)
+ mem_pool_destroy (nfs3->localpool);
+
+ret:
+ if (ret == -1) {
+ GF_FREE (nfs3);
+ nfs3 = NULL;
+ }
+
+ return nfs3;
+}
+
+
+rpcsvc_program_t *
+nfs3svc_init (xlator_t *nfsx)
+{
+ struct nfs3_state *nfs3 = NULL;
+
+ if (!nfsx)
+ return NULL;
+
+ nfs3 = nfs3_init_state (nfsx);
+ if (!nfs3) {
+ gf_log (GF_NFS3, GF_LOG_ERROR, "NFSv3 state init failed");
+ return NULL;
+ }
+
+ nfs3prog.private = nfs3;
+
+ return &nfs3prog;
+}
+
+int
+nfs3_reconfigure_state (xlator_t *nfsx, dict_t *options)
+{
+ int ret = -1;
+ struct nfs3_export *exp = NULL;
+ struct nfs_state *nfs = NULL;
+ struct nfs3_state *nfs3 = NULL;
+
+ if ((!nfsx) || (!nfsx->private) || (!options))
+ goto out;
+
+ nfs = (struct nfs_state *)nfsx->private;
+ nfs3 = nfs->nfs3state;
+ if (!nfs3)
+ goto out;
+
+ ret = nfs3_init_options (nfs3, options);
+ if (ret) {
+ gf_log (GF_NFS3, GF_LOG_ERROR,
+ "Failed to reconfigure options");
+ goto out;
+ }
+
+ list_for_each_entry (exp, &nfs3->exports, explist) {
+ ret = nfs3_init_subvolume_options (nfsx, exp, options);
+ if (ret) {
+ gf_log (GF_NFS3, GF_LOG_ERROR,
+ "Failed to reconfigure subvol options");
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
diff --git a/xlators/nfs/server/src/nfs3.h b/xlators/nfs/server/src/nfs3.h
new file mode 100644
index 000000000..0c35445a4
--- /dev/null
+++ b/xlators/nfs/server/src/nfs3.h
@@ -0,0 +1,285 @@
+/*
+ Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _NFS3_H_
+#define _NFS3_H_
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "rpcsvc.h"
+#include "dict.h"
+#include "xlator.h"
+#include "iobuf.h"
+#include "nfs.h"
+#include "nfs3-fh.h"
+#include "nfs-common.h"
+#include "xdr-nfs3.h"
+#include "mem-pool.h"
+#include "nlm4.h"
+#include "acl3-xdr.h"
+#include "acl3.h"
+#include <sys/statvfs.h>
+
+#define GF_NFS3 GF_NFS"-nfsv3"
+
+#define GF_NFS3_DEFAULT_MEMFACTOR 15
+#define GF_NFS3_IOBPOOL_MULT GF_NFS_CONCURRENT_OPS_MULT
+#define GF_NFS3_CLTABLE_BUCKETS_MULT 2
+#define GF_NFS3_FDTABLE_BUCKETS_MULT 2
+
+
+/* Static values used for FSINFO
+ * To change the maximum rsize and wsize supported by the NFS client, adjust
+ * GF_NFS3_FILE_IO_SIZE_MAX. The Gluster NFS server defaults to 1MB(1048576)
+ * (same as kernel NFS server). For slower network, rsize/wsize can be trimmed
+ * to 16/32/64-KB. rsize and wsize can be tuned through nfs.read-size and
+ * nfs.write-size respectively.
+ *
+ * NB: For Kernel-NFS, NFS_MAX_FILE_IO_SIZE is 1048576U (1MB).
+ */
+#define GF_NFS3_FILE_IO_SIZE_MAX (1 * GF_UNIT_MB) /* 1048576 */
+#define GF_NFS3_FILE_IO_SIZE_MIN (4 * GF_UNIT_KB) /* 4096 */
+
+#define GF_NFS3_FILE_IO_SIZE_DEF GF_NFS3_FILE_IO_SIZE_MAX
+
+#define GF_NFS3_RTMAX GF_NFS3_FILE_IO_SIZE_MAX
+#define GF_NFS3_RTMIN GF_NFS3_FILE_IO_SIZE_MIN
+#define GF_NFS3_RTPREF GF_NFS3_FILE_IO_SIZE_DEF
+#define GF_NFS3_RTMULT GF_NFS3_FILE_IO_SIZE_MIN
+
+#define GF_NFS3_WTMAX GF_NFS3_FILE_IO_SIZE_MAX
+#define GF_NFS3_WTMIN GF_NFS3_FILE_IO_SIZE_MIN
+#define GF_NFS3_WTPREF GF_NFS3_FILE_IO_SIZE_DEF
+#define GF_NFS3_WTMULT GF_NFS3_FILE_IO_SIZE_MIN
+
+/* This can be tuned through nfs.readdir-size */
+#define GF_NFS3_DTMAX GF_NFS3_FILE_IO_SIZE_MAX
+#define GF_NFS3_DTMIN GF_NFS3_FILE_IO_SIZE_MIN
+#define GF_NFS3_DTPREF GF_NFS3_FILE_IO_SIZE_DEF
+
+#define GF_NFS3_MAXFILESIZE (1 * GF_UNIT_PB)
+
+#define GF_NFS3_IO_SIZE 4096 /* 4-KB */
+#define GF_NFS3_IO_SHIFT 12 /* 2^12 = 4KB */
+
+/* FIXME: Handle time resolutions */
+#define GF_NFS3_TIMEDELTA_SECS {1,0}
+#define GF_NFS3_TIMEDELTA_NSECS {0,1}
+#define GF_NFS3_TIMEDELTA_MSECS {0,1000000}
+
+#define GF_NFS3_FS_PROP (FSF3_LINK | FSF3_SYMLINK | FSF3_HOMOGENEOUS | FSF3_CANSETTIME)
+
+#define GF_NFS3_DIRFD_VALID 1
+#define GF_NFS3_DIRFD_INVALID 0
+
+#define GF_NFS3_VOLACCESS_RW 1
+#define GF_NFS3_VOLACCESS_RO 2
+
+
+#define GF_NFS3_FDCACHE_SIZE 512
+/* This should probably be moved to a more generic layer so that if needed
+ * different versions of NFS protocol can use the same thing.
+ */
+struct nfs3_fd_entry {
+ fd_t *cachedfd;
+ struct list_head list;
+};
+
+/* Per subvolume nfs3 specific state */
+struct nfs3_export {
+ struct list_head explist;
+ xlator_t *subvol;
+ uuid_t volumeid;
+ int access;
+ int trusted_sync;
+ int trusted_write;
+ int rootlookedup;
+};
+
+#define GF_NFS3_DEFAULT_VOLACCESS (GF_NFS3_VOLACCESS_RW)
+
+/* The NFSv3 protocol state */
+typedef struct nfs3_state {
+
+ /* The NFS xlator pointer. The NFS xlator can be running
+ * multiple versions of the NFS protocol.
+ */
+ xlator_t *nfsx;
+
+ /* The iob pool from which memory allocations are made for receiving
+ * and sending network messages.
+ */
+ struct iobuf_pool *iobpool;
+
+ /* List of child subvolumes for the NFSv3 protocol.
+ * Right now, is simply referring to the list of children in nfsx above.
+ */
+ xlator_list_t *exportslist;
+
+ struct list_head exports;
+ /* Mempool for allocations of struct nfs3_local */
+ struct mem_pool *localpool;
+
+ /* Server start-up timestamp, currently used for write verifier. */
+ uint64_t serverstart;
+
+ /* NFSv3 Protocol configurables */
+ uint64_t readsize;
+ uint64_t writesize;
+ uint64_t readdirsize;
+
+ /* Size of the iobufs used, depends on the sizes of the three params
+ * above.
+ */
+ uint64_t iobsize;
+
+ unsigned int memfactor;
+
+ struct list_head fdlru;
+ gf_lock_t fdlrulock;
+ int fdcount;
+ uint32_t occ_logger;
+} nfs3_state_t;
+
+typedef enum nfs3_lookup_type {
+ GF_NFS3_REVALIDATE = 1,
+ GF_NFS3_FRESH,
+} nfs3_lookup_type_t;
+
+typedef union args_ {
+ nlm4_stat nlm4_stat;
+ nlm4_holder nlm4_holder;
+ nlm4_lock nlm4_lock;
+ nlm4_share nlm4_share;
+ nlm4_testrply nlm4_testrply;
+ nlm4_testres nlm4_testres;
+ nlm4_testargs nlm4_testargs;
+ nlm4_res nlm4_res;
+ nlm4_lockargs nlm4_lockargs;
+ nlm4_cancargs nlm4_cancargs;
+ nlm4_unlockargs nlm4_unlockargs;
+ nlm4_shareargs nlm4_shareargs;
+ nlm4_shareres nlm4_shareres;
+ nlm4_freeallargs nlm4_freeallargs;
+ getaclargs getaclargs;
+ setaclargs setaclargs;
+ getaclreply getaclreply;
+ setaclreply setaclreply;
+} args;
+
+
+typedef int (*nfs3_resume_fn_t) (void *cs);
+/* Structure used to communicate state between a fop and its callback.
+ * Not all members are used at all times. Usage is fop and NFS request
+ * dependent.
+ *
+ * I wish we could have a smaller structure for communicating state
+ * between callers and callbacks. It could be broken into smaller parts
+ * but I feel that will lead to a proliferation of types/structures and then
+ * we'll just be tracking down which structure is used by which fop, not
+ * to mention that having one type allows me to used a single mem-pool.
+ * Imagine the chaos if we need a mem-pool for each one of those sub-structures.
+ */
+struct nfs3_local {
+ rpcsvc_request_t *req;
+ xlator_t *vol;
+ nfs3_resume_fn_t resume_fn;
+ xlator_t *nfsx;
+ struct nfs3_state *nfs3state;
+
+ /* The list hook to attach this call state to the inode's queue till
+ * the opening of the fd on the inode completes.
+ */
+ struct list_head openwait_q;
+
+ /* Per-NFSv3 Op state */
+ struct nfs3_fh parent;
+ struct nfs3_fh fh;
+ fd_t *fd;
+ uint32_t accessbits;
+ int operrno;
+ count3 dircount;
+ count3 maxcount;
+ struct statvfs fsstat;
+ gf_dirent_t entries;
+ struct iatt stbuf;
+ struct iatt preparent;
+ struct iatt postparent;
+ int32_t setattr_valid;
+ nfstime3 timestamp;
+ loc_t oploc;
+ int writetype;
+ count3 datacount;
+ offset3 dataoffset;
+ struct iobuf *iob;
+ struct iobref *iobref;
+ createmode3 createmode;
+ uint64_t cookieverf;
+ int sattrguardcheck;
+ char *pathname;
+ ftype3 mknodtype;
+ specdata3 devnums;
+ cookie3 cookie;
+ struct iovec datavec;
+ mode_t mode;
+
+ /* NFSv3 FH resolver state */
+ int hardresolved;
+ struct nfs3_fh resolvefh;
+ loc_t resolvedloc;
+ int resolve_ret;
+ int resolve_errno;
+ int hashidx;
+ fd_t *resolve_dir_fd;
+ char *resolventry;
+ nfs3_lookup_type_t lookuptype;
+ gf_dirent_t *hashmatch;
+ gf_dirent_t *entrymatch;
+ off_t lastentryoffset;
+ struct flock flock;
+ args args;
+ nlm4_lkowner_t lkowner;
+ char cookiebytes[1024];
+ struct nfs3_fh lockfh;
+ int monitor;
+ rpc_transport_t *trans;
+ call_frame_t *frame;
+
+ /* ACL */
+ aclentry aclentry[NFS_ACL_MAX_ENTRIES];
+ aclentry daclentry[NFS_ACL_MAX_ENTRIES];
+ int aclcount;
+ char aclxattr[NFS_ACL_MAX_ENTRIES*8 + 4];
+ int daclcount;
+ char daclxattr[NFS_ACL_MAX_ENTRIES*8 + 4];
+};
+
+#define nfs3_is_revalidate_lookup(cst) ((cst)->lookuptype == GF_NFS3_REVALIDATE)
+#define nfs3_lookup_op(cst) (rpcsvc_request_procnum(cst->req) == NFS3_LOOKUP)
+#define nfs3_create_op(cst) (rpcsvc_request_procnum(cst->req) == NFS3_CREATE)
+#define nfs3_create_exclusive_op(cst) ((cst)->createmode == EXCLUSIVE)
+
+typedef struct nfs3_local nfs3_call_state_t;
+
+/* Queue of ops waiting for open fop to return. */
+struct inode_op_queue {
+ struct list_head opq;
+ pthread_mutex_t qlock;
+};
+
+extern rpcsvc_program_t *
+nfs3svc_init (xlator_t *nfsx);
+
+extern int
+nfs3_reconfigure_state (xlator_t *nfsx, dict_t *options);
+#endif
diff --git a/xlators/nfs/server/src/nlm4.c b/xlators/nfs/server/src/nlm4.c
new file mode 100644
index 000000000..5c5d87412
--- /dev/null
+++ b/xlators/nfs/server/src/nlm4.c
@@ -0,0 +1,2525 @@
+/*
+ Copyright (c) 2012 Gluster, Inc. <http://www.gluster.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "defaults.h"
+#include "rpcsvc.h"
+#include "dict.h"
+#include "xlator.h"
+#include "nfs.h"
+#include "mem-pool.h"
+#include "logging.h"
+#include "nfs-fops.h"
+#include "inode.h"
+#include "mount3.h"
+#include "nfs3.h"
+#include "nfs-mem-types.h"
+#include "nfs3-helpers.h"
+#include "nfs3-fh.h"
+#include "nlm4.h"
+#include "nlm4-xdr.h"
+#include "msg-nfs3.h"
+#include "nfs-generics.h"
+#include "rpc-clnt.h"
+#include "nsm-xdr.h"
+#include "nlmcbk-xdr.h"
+#include "run.h"
+#include <unistd.h>
+#include <rpc/pmap_clnt.h>
+#include <rpc/rpc.h>
+#include <rpc/xdr.h>
+#include <statedump.h>
+
+/* TODO:
+ * 1) 2 opens racing .. creating an fd leak.
+ * 2) use mempool for nlmclnt - destroy if no fd exists, create during 1st call
+ */
+
+typedef ssize_t (*nlm4_serializer) (struct iovec outmsg, void *args);
+
+extern void nfs3_call_state_wipe (nfs3_call_state_t *cs);
+
+struct list_head nlm_client_list;
+gf_lock_t nlm_client_list_lk;
+
+/* race on this is harmless */
+int nlm_grace_period = 50;
+
+#define nlm4_validate_nfs3_state(request, state, status, label, retval) \
+ do { \
+ state = rpcsvc_request_program_private (request); \
+ if (!state) { \
+ gf_log (GF_NLM, GF_LOG_ERROR, "NFSv3 state " \
+ "missing from RPC request"); \
+ rpcsvc_request_seterr (req, SYSTEM_ERR); \
+ status = nlm4_failed; \
+ goto label; \
+ } \
+ } while (0); \
+
+nfs3_call_state_t *
+nfs3_call_state_init (struct nfs3_state *s, rpcsvc_request_t *req, xlator_t *v);
+
+#define nlm4_handle_call_state_init(nfs3state, calls, rq, opstat, errlabel)\
+ do { \
+ calls = nlm4_call_state_init ((nfs3state), (rq)); \
+ if (!calls) { \
+ gf_log (GF_NLM, GF_LOG_ERROR, "Failed to " \
+ "init call state"); \
+ opstat = nlm4_failed; \
+ rpcsvc_request_seterr (req, SYSTEM_ERR); \
+ goto errlabel; \
+ } \
+ } while (0) \
+
+#define nlm4_validate_gluster_fh(handle, status, errlabel) \
+ do { \
+ if (!nfs3_fh_validate (handle)) { \
+ status = nlm4_stale_fh; \
+ goto errlabel; \
+ } \
+ } while (0) \
+
+xlator_t *
+nfs3_fh_to_xlator (struct nfs3_state *nfs3, struct nfs3_fh *fh);
+
+#define nlm4_map_fh_to_volume(nfs3state, handle, req, volume, status, label) \
+ do { \
+ char exportid[256], gfid[256]; \
+ rpc_transport_t *trans = NULL; \
+ volume = nfs3_fh_to_xlator ((nfs3state), &handle); \
+ if (!volume) { \
+ uuid_unparse (handle.exportid, exportid); \
+ uuid_unparse (handle.gfid, gfid); \
+ trans = rpcsvc_request_transport (req); \
+ gf_log (GF_NLM, GF_LOG_ERROR, "Failed to map " \
+ "FH to vol: client=%s, exportid=%s, gfid=%s",\
+ trans->peerinfo.identifier, exportid, \
+ gfid); \
+ gf_log (GF_NLM, GF_LOG_ERROR, \
+ "Stale nfs client %s must be trying to "\
+ "connect to a deleted volume, please " \
+ "unmount it.", trans->peerinfo.identifier);\
+ status = nlm4_stale_fh; \
+ goto label; \
+ } else { \
+ gf_log (GF_NLM, GF_LOG_TRACE, "FH to Volume: %s"\
+ ,volume->name); \
+ rpcsvc_request_set_private (req, volume); \
+ } \
+ } while (0); \
+
+#define nlm4_volume_started_check(nfs3state, vlm, rtval, erlbl) \
+ do { \
+ if ((!nfs_subvolume_started (nfs_state (nfs3state->nfsx), vlm))){\
+ gf_log (GF_NLM, GF_LOG_ERROR, "Volume is disabled: %s",\
+ vlm->name); \
+ rtval = RPCSVC_ACTOR_IGNORE; \
+ goto erlbl; \
+ } \
+ } while (0) \
+
+#define nlm4_check_fh_resolve_status(cst, nfstat, erlabl) \
+ do { \
+ xlator_t *xlatorp = NULL; \
+ char buf[256], gfid[256]; \
+ rpc_transport_t *trans = NULL; \
+ if ((cst)->resolve_ret < 0) { \
+ trans = rpcsvc_request_transport (cst->req); \
+ xlatorp = nfs3_fh_to_xlator (cst->nfs3state, \
+ &cst->resolvefh); \
+ uuid_unparse (cst->resolvefh.gfid, gfid); \
+ snprintf (buf, sizeof (buf), "(%s) %s : %s", \
+ trans->peerinfo.identifier, \
+ xlatorp ? xlatorp->name : "ERR", \
+ gfid); \
+ gf_log (GF_NLM, GF_LOG_ERROR, "Unable to resolve FH"\
+ ": %s", buf); \
+ nfstat = nlm4_errno_to_nlm4stat (cst->resolve_errno);\
+ goto erlabl; \
+ } \
+ } while (0) \
+
+
+void
+nlm4_prep_nlm4_testargs (nlm4_testargs *args, struct nfs3_fh *fh,
+ nlm4_lkowner_t *oh, char *cookiebytes)
+{
+ memset (args, 0, sizeof (*args));
+ args->alock.fh.n_bytes = (void *)fh;
+ args->alock.oh.n_bytes = (void *)oh;
+ args->cookie.n_bytes = (void *)cookiebytes;
+}
+
+void
+nlm4_prep_nlm4_lockargs (nlm4_lockargs *args, struct nfs3_fh *fh,
+ nlm4_lkowner_t *oh, char *cookiebytes)
+{
+ memset (args, 0, sizeof (*args));
+ args->alock.fh.n_bytes = (void *)fh;
+ args->alock.oh.n_bytes = (void *)oh;
+ args->cookie.n_bytes = (void *)cookiebytes;
+}
+
+void
+nlm4_prep_nlm4_cancargs (nlm4_cancargs *args, struct nfs3_fh *fh,
+ nlm4_lkowner_t *oh, char *cookiebytes)
+{
+ memset (args, 0, sizeof (*args));
+ args->alock.fh.n_bytes = (void *)fh;
+ args->alock.oh.n_bytes = (void *)oh;
+ args->cookie.n_bytes = (void *)cookiebytes;
+}
+
+void
+nlm4_prep_nlm4_unlockargs (nlm4_unlockargs *args, struct nfs3_fh *fh,
+ nlm4_lkowner_t *oh, char *cookiebytes)
+{
+ memset (args, 0, sizeof (*args));
+ args->alock.fh.n_bytes = (void *)fh;
+ args->alock.oh.n_bytes = (void *)oh;
+ args->cookie.n_bytes = (void *)cookiebytes;
+}
+
+void
+nlm4_prep_shareargs (nlm4_shareargs *args, struct nfs3_fh *fh,
+ nlm4_lkowner_t *oh, char *cookiebytes)
+{
+ memset (args, 0, sizeof (*args));
+ args->share.fh.n_bytes = (void *)fh;
+ args->share.oh.n_bytes = (void *)oh;
+ args->cookie.n_bytes = (void *)cookiebytes;
+}
+
+void
+nlm4_prep_freeallargs (nlm4_freeallargs *args, nlm4_lkowner_t *oh)
+{
+ memset (args, 0, sizeof (*args));
+ args->name = (void *)oh;
+}
+
+void
+nlm_copy_lkowner (gf_lkowner_t *dst, netobj *src)
+{
+ dst->len = src->n_len;
+ memcpy (dst->data, src->n_bytes, dst->len);
+}
+
+int
+nlm_is_oh_same_lkowner (gf_lkowner_t *a, netobj *b)
+{
+ if (!a || !b) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "invalid args");
+ return -1;
+ }
+
+ return (a->len == b->n_len &&
+ !memcmp (a->data, b->n_bytes, a->len));
+}
+
+nlm4_stats
+nlm4_errno_to_nlm4stat (int errnum)
+{
+ nlm4_stats stat = nlm4_denied;
+
+ switch (errnum) {
+ case 0:
+ stat = nlm4_granted;
+ break;
+ case EROFS:
+ stat = nlm4_rofs;
+ break;
+ case ESTALE:
+ stat = nlm4_stale_fh;
+ break;
+ case ENOLCK:
+ stat = nlm4_failed;
+ break;
+ default:
+ stat = nlm4_denied;
+ break;
+ }
+
+ return stat;
+}
+
+nfs3_call_state_t *
+nlm4_call_state_init (struct nfs3_state *s, rpcsvc_request_t *req)
+{
+ nfs3_call_state_t *cs = NULL;
+
+ if ((!s) || (!req))
+ return NULL;
+
+ cs = (nfs3_call_state_t *) mem_get (s->localpool);
+ if (!cs)
+ return NULL;
+
+ memset (cs, 0, sizeof (*cs));
+ INIT_LIST_HEAD (&cs->entries.list);
+ INIT_LIST_HEAD (&cs->openwait_q);
+ cs->operrno = EINVAL;
+ cs->req = req;
+ cs->nfsx = s->nfsx;
+ cs->nfs3state = s;
+ cs->monitor = 1;
+
+ return cs;
+}
+
+int
+nlm_monitor (char *caller_name)
+{
+ nlm_client_t *nlmclnt = NULL;
+ int monitor = -1;
+
+ LOCK (&nlm_client_list_lk);
+ list_for_each_entry (nlmclnt, &nlm_client_list, nlm_clients) {
+ if (!strcmp(caller_name, nlmclnt->caller_name)) {
+ monitor = nlmclnt->nsm_monitor;
+ nlmclnt->nsm_monitor = 1;
+ break;
+ }
+ }
+ UNLOCK (&nlm_client_list_lk);
+
+ if (monitor == -1)
+ gf_log (GF_NLM, GF_LOG_ERROR, "%s was not found in "
+ "the nlmclnt list", caller_name);
+
+ return monitor;
+}
+
+rpc_clnt_t *
+nlm_get_rpc_clnt (char *caller_name)
+{
+ nlm_client_t *nlmclnt = NULL;
+ int nlmclnt_found = 0;
+ rpc_clnt_t *rpc_clnt = NULL;
+
+ LOCK (&nlm_client_list_lk);
+ list_for_each_entry (nlmclnt, &nlm_client_list, nlm_clients) {
+ if (!strcmp(caller_name, nlmclnt->caller_name)) {
+ nlmclnt_found = 1;
+ break;
+ }
+ }
+ if (!nlmclnt_found)
+ goto ret;
+ if (nlmclnt->rpc_clnt)
+ rpc_clnt = rpc_clnt_ref (nlmclnt->rpc_clnt);
+ret:
+ UNLOCK (&nlm_client_list_lk);
+ return rpc_clnt;
+}
+
+int
+nlm_set_rpc_clnt (rpc_clnt_t *rpc_clnt, char *caller_name)
+{
+ nlm_client_t *nlmclnt = NULL;
+ int nlmclnt_found = 0;
+ int ret = -1;
+
+ LOCK (&nlm_client_list_lk);
+ list_for_each_entry (nlmclnt, &nlm_client_list, nlm_clients) {
+ if (!strcmp(caller_name, nlmclnt->caller_name)) {
+ nlmclnt_found = 1;
+ break;
+ }
+ }
+
+ if (!nlmclnt_found) {
+ nlmclnt = GF_CALLOC (1, sizeof(*nlmclnt),
+ gf_nfs_mt_nlm4_nlmclnt);
+ if (nlmclnt == NULL)
+ goto ret;
+
+ INIT_LIST_HEAD(&nlmclnt->fdes);
+ INIT_LIST_HEAD(&nlmclnt->nlm_clients);
+ INIT_LIST_HEAD(&nlmclnt->shares);
+
+ list_add (&nlmclnt->nlm_clients, &nlm_client_list);
+ nlmclnt->caller_name = gf_strdup (caller_name);
+ }
+
+ if (nlmclnt->rpc_clnt == NULL) {
+ nlmclnt->rpc_clnt = rpc_clnt_ref (rpc_clnt);
+ }
+ ret = 0;
+ret:
+ UNLOCK (&nlm_client_list_lk);
+ return ret;
+}
+
+int
+nlm_unset_rpc_clnt (rpc_clnt_t *rpc)
+{
+ nlm_client_t *nlmclnt = NULL;
+ rpc_clnt_t *rpc_clnt = NULL;
+
+ LOCK (&nlm_client_list_lk);
+ list_for_each_entry (nlmclnt, &nlm_client_list, nlm_clients) {
+ if (rpc == nlmclnt->rpc_clnt) {
+ rpc_clnt = nlmclnt->rpc_clnt;
+ nlmclnt->rpc_clnt = NULL;
+ break;
+ }
+ }
+ UNLOCK (&nlm_client_list_lk);
+ if (rpc_clnt == NULL) {
+ return -1;
+ }
+ if (rpc_clnt) {
+ /* cleanup the saved-frames before last unref */
+ rpc_clnt_connection_cleanup (&rpc_clnt->conn);
+
+ rpc_clnt_unref (rpc_clnt);
+ }
+ return 0;
+}
+
+int
+nlm_add_nlmclnt (char *caller_name)
+{
+ nlm_client_t *nlmclnt = NULL;
+ int nlmclnt_found = 0;
+ int ret = -1;
+
+ LOCK (&nlm_client_list_lk);
+ list_for_each_entry (nlmclnt, &nlm_client_list, nlm_clients) {
+ if (!strcmp(caller_name, nlmclnt->caller_name)) {
+ nlmclnt_found = 1;
+ break;
+ }
+ }
+ if (!nlmclnt_found) {
+ nlmclnt = GF_CALLOC (1, sizeof(*nlmclnt),
+ gf_nfs_mt_nlm4_nlmclnt);
+ if (nlmclnt == NULL) {
+ gf_log (GF_NLM, GF_LOG_DEBUG, "malloc error");
+ goto ret;
+ }
+
+ INIT_LIST_HEAD(&nlmclnt->fdes);
+ INIT_LIST_HEAD(&nlmclnt->nlm_clients);
+ INIT_LIST_HEAD(&nlmclnt->shares);
+
+ list_add (&nlmclnt->nlm_clients, &nlm_client_list);
+ nlmclnt->caller_name = gf_strdup (caller_name);
+ }
+ ret = 0;
+ret:
+ UNLOCK (&nlm_client_list_lk);
+ return ret;
+}
+
+int
+nlm4svc_submit_reply (rpcsvc_request_t *req, void *arg, nlm4_serializer sfunc)
+{
+ struct iovec outmsg = {0, };
+ struct iobuf *iob = NULL;
+ struct nfs3_state *nfs3 = NULL;
+ int ret = -1;
+ ssize_t msglen = 0;
+ struct iobref *iobref = NULL;
+
+ if (!req)
+ return -1;
+
+ nfs3 = (struct nfs3_state *)rpcsvc_request_program_private (req);
+ if (!nfs3) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "mount state not found");
+ goto ret;
+ }
+
+ /* First, get the io buffer into which the reply in arg will
+ * be serialized.
+ */
+ iob = iobuf_get (nfs3->iobpool);
+ if (!iob) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "Failed to get iobuf");
+ goto ret;
+ }
+
+ iobuf_to_iovec (iob, &outmsg);
+ /* Use the given serializer to translate the give C structure in arg
+ * to XDR format which will be written into the buffer in outmsg.
+ */
+ msglen = sfunc (outmsg, arg);
+ if (msglen < 0) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "Failed to encode message");
+ goto ret;
+ }
+ outmsg.iov_len = msglen;
+
+ iobref = iobref_new ();
+ if (iobref == NULL) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "Failed to get iobref");
+ goto ret;
+ }
+
+ ret = iobref_add (iobref, iob);
+ if (ret) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "Failed to add iob to iobref");
+ goto ret;
+ }
+
+ /* Then, submit the message for transmission. */
+ ret = rpcsvc_submit_message (req, &outmsg, 1, NULL, 0, iobref);
+ if (ret == -1) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "Reply submission failed");
+ goto ret;
+ }
+
+ ret = 0;
+ret:
+ if (iob)
+ iobuf_unref (iob);
+ if (iobref)
+ iobref_unref (iobref);
+
+ return ret;
+}
+
+typedef int (*nlm4_resume_fn_t) (void *cs);
+
+int32_t
+nlm4_file_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ nfs3_call_state_t *cs = frame->local;
+
+ if (op_ret == 0)
+ fd_bind (cs->fd);
+ cs->resolve_ret = op_ret;
+ cs->resume_fn (cs);
+ frame->local = NULL;
+ STACK_DESTROY (frame->root);
+ return 0;
+}
+
+void *
+nsm_monitor(void *arg)
+{
+ CLIENT *clnt = NULL;
+ enum clnt_stat ret;
+ struct mon nsm_mon;
+ struct sm_stat_res res;
+ struct timeval tout = { 5, 0 };
+ char *host = NULL;
+
+ host = arg;
+ nsm_mon.mon_id.mon_name = gf_strdup(host);
+ nsm_mon.mon_id.my_id.my_name = gf_strdup("localhost");
+ nsm_mon.mon_id.my_id.my_prog = NLMCBK_PROGRAM;
+ nsm_mon.mon_id.my_id.my_vers = NLMCBK_V1;
+ nsm_mon.mon_id.my_id.my_proc = NLMCBK_SM_NOTIFY;
+ /* nothing to put in the private data */
+#define SM_PROG 100024
+#define SM_VERS 1
+#define SM_MON 2
+
+ /* create a connection to nsm on the localhost */
+ clnt = clnt_create("localhost", SM_PROG, SM_VERS, "tcp");
+ if(!clnt)
+ {
+ gf_log (GF_NLM, GF_LOG_ERROR, "%s",
+ clnt_spcreateerror ("Clnt_create()"));
+ goto out;
+ }
+
+ ret = clnt_call(clnt, SM_MON,
+ (xdrproc_t) xdr_mon, (caddr_t) & nsm_mon,
+ (xdrproc_t) xdr_sm_stat_res, (caddr_t) & res, tout);
+ if(ret != RPC_SUCCESS)
+ {
+ gf_log (GF_NLM, GF_LOG_ERROR, "clnt_call(): %s",
+ clnt_sperrno(ret));
+ goto out;
+ }
+ if(res.res_stat != STAT_SUCC)
+ {
+ gf_log (GF_NLM, GF_LOG_ERROR, "clnt_call(): %s",
+ clnt_sperrno(ret));
+ goto out;
+ }
+
+out:
+ GF_FREE(nsm_mon.mon_id.mon_name);
+ GF_FREE(nsm_mon.mon_id.my_id.my_name);
+ if (clnt != NULL)
+ clnt_destroy(clnt);
+ return NULL;
+}
+
+nlm_client_t *
+__nlm_get_uniq (char *caller_name)
+{
+ nlm_client_t *nlmclnt = NULL;
+
+ if (!caller_name)
+ return NULL;
+
+ list_for_each_entry (nlmclnt, &nlm_client_list, nlm_clients) {
+ if (!strcmp(caller_name, nlmclnt->caller_name))
+ return nlmclnt;
+ }
+
+ return NULL;
+}
+
+nlm_client_t *
+nlm_get_uniq (char *caller_name)
+{
+ nlm_client_t *nlmclnt = NULL;
+
+ LOCK (&nlm_client_list_lk);
+ nlmclnt = __nlm_get_uniq (caller_name);
+ UNLOCK (&nlm_client_list_lk);
+
+ return nlmclnt;
+}
+
+
+int
+nlm4_file_open_and_resume(nfs3_call_state_t *cs, nlm4_resume_fn_t resume)
+{
+ fd_t *fd = NULL;
+ int ret = -1;
+ int flags = 0;
+ nlm_client_t *nlmclnt = NULL;
+ call_frame_t *frame = NULL;
+
+ if (cs->args.nlm4_lockargs.exclusive == _gf_false)
+ flags = O_RDONLY;
+ else
+ flags = O_WRONLY;
+
+ nlmclnt = nlm_get_uniq (cs->args.nlm4_lockargs.alock.caller_name);
+ if (nlmclnt == NULL) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "nlm_get_uniq() returned NULL");
+ ret = -ENOLCK;
+ goto err;
+ }
+ cs->resume_fn = resume;
+ fd = fd_lookup_uint64 (cs->resolvedloc.inode, (uint64_t)nlmclnt);
+ if (fd) {
+ cs->fd = fd;
+ cs->resolve_ret = 0;
+ cs->resume_fn(cs);
+ ret = 0;
+ goto err;
+ }
+
+ fd = fd_create_uint64 (cs->resolvedloc.inode, (uint64_t)nlmclnt);
+ if (fd == NULL) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "fd_create_uint64() returned NULL");
+ ret = -ENOLCK;
+ goto err;
+ }
+
+ cs->fd = fd;
+
+ frame = create_frame (cs->nfsx, cs->nfsx->ctx->pool);
+ if (!frame) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "unable to create frame");
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ frame->root->pid = NFS_PID;
+ frame->root->uid = rpcsvc_request_uid (cs->req);
+ frame->root->gid = rpcsvc_request_gid (cs->req);
+ frame->local = cs;
+ nfs_fix_groups (cs->nfsx, frame->root);
+
+ STACK_WIND_COOKIE (frame, nlm4_file_open_cbk, cs->vol, cs->vol,
+ cs->vol->fops->open, &cs->resolvedloc, flags,
+ cs->fd, NULL);
+ ret = 0;
+err:
+ return ret;
+}
+
+int
+nlm4_generic_reply (rpcsvc_request_t *req, netobj cookie, nlm4_stats stat)
+{
+ nlm4_res res;
+
+ memset (&res, 0, sizeof (res));
+ res.cookie = cookie;
+ res.stat.stat = stat;
+
+ nlm4svc_submit_reply (req, (void *)&res,
+ (nlm4_serializer)xdr_serialize_nlm4_res);
+ return 0;
+}
+
+int
+nlm4svc_null (rpcsvc_request_t *req)
+{
+ struct iovec dummyvec = {0, };
+
+ if (!req) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "Got NULL request!");
+ return 0;
+ }
+ rpcsvc_submit_generic (req, &dummyvec, 1, NULL, 0, NULL);
+ return 0;
+}
+
+int
+nlm4_gf_flock_to_holder (nlm4_holder *holder, struct gf_flock *flock)
+{
+ switch (flock->l_type) {
+ case GF_LK_F_WRLCK:
+ holder->exclusive = 1;
+ break;
+ }
+
+ holder->svid = flock->l_pid;
+ holder->l_offset = flock->l_start;
+ holder->l_len = flock->l_len;
+ return 0;
+}
+
+int
+nlm4_lock_to_gf_flock (struct gf_flock *flock, nlm4_lock *lock, int excl)
+{
+ flock->l_pid = lock->svid;
+ flock->l_start = lock->l_offset;
+ flock->l_len = lock->l_len;
+ if (excl)
+ flock->l_type = F_WRLCK;
+ else
+ flock->l_type = F_RDLCK;
+ flock->l_whence = SEEK_SET;
+ nlm_copy_lkowner (&flock->l_owner, &lock->oh);
+ return 0;
+}
+
+rpc_clnt_procedure_t nlm4_clnt_actors[NLM4_PROC_COUNT] = {
+ [NLM4_NULL] = {"NULL", NULL},
+ [NLM4_GRANTED] = {"GRANTED", NULL},
+};
+
+char *nlm4_clnt_names[NLM4_PROC_COUNT] = {
+ [NLM4_NULL] = "NULL",
+ [NLM4_GRANTED] = "GRANTED",
+};
+
+rpc_clnt_prog_t nlm4clntprog = {
+ .progname = "NLMv4",
+ .prognum = NLM_PROGRAM,
+ .progver = NLM_V4,
+ .numproc = NLM4_PROC_COUNT,
+ .proctable = nlm4_clnt_actors,
+ .procnames = nlm4_clnt_names,
+};
+
+int
+nlm4_test_reply (nfs3_call_state_t *cs, nlm4_stats stat, struct gf_flock *flock)
+{
+ nlm4_testres res;
+
+ memset (&res, 0, sizeof (res));
+ res.cookie = cs->args.nlm4_testargs.cookie;
+ res.stat.stat = stat;
+ if (stat == nlm4_denied)
+ nlm4_gf_flock_to_holder (&res.stat.nlm4_testrply_u.holder,
+ flock);
+
+ nlm4svc_submit_reply (cs->req, (void *)&res,
+ (nlm4_serializer)xdr_serialize_nlm4_testres);
+ return 0;
+}
+
+int
+nlm4svc_test_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *flock,
+ dict_t *xdata)
+{
+ nlm4_stats stat = nlm4_denied;
+ nfs3_call_state_t *cs = NULL;
+
+ cs = frame->local;
+ if (op_ret == -1) {
+ stat = nlm4_errno_to_nlm4stat (op_errno);
+ goto err;
+ } else if (flock->l_type == F_UNLCK)
+ stat = nlm4_granted;
+
+err:
+ nlm4_test_reply (cs, stat, flock);
+ nfs3_call_state_wipe (cs);
+ return 0;
+}
+
+int
+nlm4_test_fd_resume (void *carg)
+{
+ int ret = -EFAULT;
+ nfs_user_t nfu = {0, };
+ nfs3_call_state_t *cs = NULL;
+ struct gf_flock flock = {0, };
+
+ if (!carg)
+ return ret;
+
+ cs = (nfs3_call_state_t *)carg;
+ nfs_request_user_init (&nfu, cs->req);
+ nlm4_lock_to_gf_flock (&flock, &cs->args.nlm4_testargs.alock,
+ cs->args.nlm4_testargs.exclusive);
+ nlm_copy_lkowner (&nfu.lk_owner, &cs->args.nlm4_testargs.alock.oh);
+ ret = nfs_lk (cs->nfsx, cs->vol, &nfu, cs->fd, F_GETLK, &flock,
+ nlm4svc_test_cbk, cs);
+
+ return ret;
+}
+
+
+int
+nlm4_test_resume (void *carg)
+{
+ nlm4_stats stat = nlm4_failed;
+ int ret = -1;
+ nfs3_call_state_t *cs = NULL;
+ fd_t *fd = NULL;
+
+ if (!carg)
+ return ret;
+
+ cs = (nfs3_call_state_t *)carg;
+ nlm4_check_fh_resolve_status (cs, stat, nlm4err);
+ fd = fd_anonymous (cs->resolvedloc.inode);
+ if (!fd)
+ goto nlm4err;
+ cs->fd = fd;
+ ret = nlm4_test_fd_resume (cs);
+
+nlm4err:
+ if (ret < 0) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "unable to open_and_resume");
+ stat = nlm4_errno_to_nlm4stat (-ret);
+ nlm4_test_reply (cs, stat, NULL);
+ nfs3_call_state_wipe (cs);
+ }
+
+ return ret;
+}
+
+int
+nlm4svc_test (rpcsvc_request_t *req)
+{
+ xlator_t *vol = NULL;
+ nlm4_stats stat = nlm4_failed;
+ struct nfs_state *nfs = NULL;
+ nfs3_state_t *nfs3 = NULL;
+ nfs3_call_state_t *cs = NULL;
+ int ret = RPCSVC_ACTOR_ERROR;
+ struct nfs3_fh fh = {{0}, };
+
+ if (!req)
+ return ret;
+
+ nlm4_validate_nfs3_state (req, nfs3, stat, rpcerr, ret);
+ nfs = nfs_state (nfs3->nfsx);
+ nlm4_handle_call_state_init (nfs->nfs3state, cs, req,
+ stat, rpcerr);
+
+ nlm4_prep_nlm4_testargs (&cs->args.nlm4_testargs, &fh, &cs->lkowner,
+ cs->cookiebytes);
+ if (xdr_to_nlm4_testargs(req->msg[0], &cs->args.nlm4_testargs) <= 0) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "Error decoding args");
+ rpcsvc_request_seterr (req, GARBAGE_ARGS);
+ goto rpcerr;
+ }
+
+ nlm4_validate_gluster_fh (&fh, stat, nlm4err);
+ nlm4_map_fh_to_volume (cs->nfs3state, fh, req, vol, stat, nlm4err);
+
+ if (nlm_grace_period) {
+ gf_log (GF_NLM, GF_LOG_WARNING, "NLM in grace period");
+ stat = nlm4_denied_grace_period;
+ nlm4_test_reply (cs, stat, NULL);
+ nfs3_call_state_wipe (cs);
+ return 0;
+ }
+
+ cs->vol = vol;
+ nlm4_volume_started_check (nfs3, vol, ret, rpcerr);
+
+ ret = nfs3_fh_resolve_and_resume (cs, &fh,
+ NULL, nlm4_test_resume);
+
+nlm4err:
+ if (ret < 0) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "unable to resolve and resume");
+ nlm4_test_reply (cs, stat, NULL);
+ nfs3_call_state_wipe (cs);
+ return 0;
+ }
+
+rpcerr:
+ if (ret < 0)
+ nfs3_call_state_wipe (cs);
+
+ return ret;
+}
+
+int
+nlm4svc_send_granted_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ STACK_DESTROY (((call_frame_t*)myframe)->root);
+ return 0;
+}
+
+void
+nlm4svc_send_granted (nfs3_call_state_t *cs);
+
+int
+nlm_rpcclnt_notify (struct rpc_clnt *rpc_clnt, void *mydata,
+ rpc_clnt_event_t fn, void *data)
+{
+ int ret = 0;
+ char *caller_name = NULL;
+ nfs3_call_state_t *cs = NULL;
+
+ cs = mydata;
+ caller_name = cs->args.nlm4_lockargs.alock.caller_name;
+
+ switch (fn) {
+ case RPC_CLNT_CONNECT:
+ ret = nlm_set_rpc_clnt (rpc_clnt, caller_name);
+ if (ret == -1) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "Failed to set rpc clnt");
+ goto err;
+ }
+ rpc_clnt_unref (rpc_clnt);
+ nlm4svc_send_granted (cs);
+
+ break;
+
+ case RPC_CLNT_MSG:
+ break;
+
+ case RPC_CLNT_DISCONNECT:
+ nlm_unset_rpc_clnt (rpc_clnt);
+ break;
+ }
+
+ err:
+ return 0;
+}
+
+void *
+nlm4_establish_callback (void *csarg)
+{
+ int ret = -1;
+ nfs3_call_state_t *cs = NULL;
+ union gf_sock_union sock_union;
+ dict_t *options = NULL;
+ char peerip[INET6_ADDRSTRLEN+1] = {0};
+ char *portstr = NULL;
+ char myip[INET6_ADDRSTRLEN+1] = {0};
+ rpc_clnt_t *rpc_clnt = NULL;
+ int port = -1;
+
+
+ cs = (nfs3_call_state_t *) csarg;
+ glusterfs_this_set (cs->nfsx);
+
+ rpc_transport_get_peeraddr (cs->trans, NULL, 0, &sock_union.storage,
+ sizeof (sock_union.storage));
+
+ switch (sock_union.sa.sa_family) {
+ case AF_INET6:
+ /* can not come here as NLM listens on IPv4 */
+ gf_log (GF_NLM, GF_LOG_ERROR, "NLM is not supported on IPv6 in"
+ " this release");
+ goto err;
+/*
+ inet_ntop (AF_INET6,
+ &((struct sockaddr_in6 *)sockaddr)->sin6_addr,
+ peerip, INET6_ADDRSTRLEN+1);
+ break;
+*/
+ case AF_INET:
+ inet_ntop (AF_INET, &sock_union.sin.sin_addr, peerip,
+ INET6_ADDRSTRLEN+1);
+ inet_ntop (AF_INET, &(((struct sockaddr_in *)&cs->trans->myinfo.sockaddr)->sin_addr),
+ myip, INET6_ADDRSTRLEN + 1);
+
+ break;
+ default:
+ break;
+ /* FIXME: handle the error */
+ }
+
+ /* looks like libc rpc supports only ipv4 */
+ port = pmap_getport (&sock_union.sin, NLM_PROGRAM,
+ NLM_V4, IPPROTO_TCP);
+
+ if (port == 0) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "Unable to get NLM port of the "
+ "client. Is the firewall running on client?");
+ goto err;
+ }
+
+ options = dict_new();
+ ret = dict_set_str (options, "transport-type", "socket");
+ if (ret == -1) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "dict_set_str error");
+ goto err;
+ }
+
+ ret = dict_set_dynstr (options, "remote-host", gf_strdup (peerip));
+ if (ret == -1) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "dict_set_str error");
+ goto err;
+ }
+
+ ret = gf_asprintf (&portstr, "%d", port);
+ if (ret == -1)
+ goto err;
+
+ ret = dict_set_dynstr (options, "remote-port",
+ portstr);
+ if (ret == -1) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "dict_set_dynstr error");
+ goto err;
+ }
+
+ /* needed in case virtual IP is used */
+ ret = dict_set_dynstr (options, "transport.socket.source-addr",
+ gf_strdup (myip));
+ if (ret == -1)
+ goto err;
+
+ ret = dict_set_str (options, "auth-null", "on");
+ if (ret == -1) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "dict_set_dynstr error");
+ goto err;
+ }
+
+ /* TODO: is 32 frames in transit enough ? */
+ rpc_clnt = rpc_clnt_new (options, cs->nfsx->ctx, "NLM-client", 32);
+ if (rpc_clnt == NULL) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "rpc_clnt NULL");
+ goto err;
+ }
+
+ ret = rpc_clnt_register_notify (rpc_clnt, nlm_rpcclnt_notify, cs);
+ if (ret == -1) {
+ gf_log (GF_NLM, GF_LOG_ERROR,"rpc_clnt_register_connect error");
+ goto err;
+ }
+
+ /* After this connect succeeds, granted msg is sent in notify */
+ ret = rpc_transport_connect (rpc_clnt->conn.trans, port);
+
+ if (ret == -1 && EINPROGRESS == errno)
+ ret = 0;
+
+err:
+ if (ret == -1 && rpc_clnt) {
+ rpc_clnt_unref (rpc_clnt);
+ }
+
+ return rpc_clnt;
+}
+
+void
+nlm4svc_send_granted (nfs3_call_state_t *cs)
+{
+ int ret = -1;
+ rpc_clnt_t *rpc_clnt = NULL;
+ struct iovec outmsg = {0, };
+ nlm4_testargs testargs;
+ struct iobuf *iobuf = NULL;
+ struct iobref *iobref = NULL;
+ char peerip[INET6_ADDRSTRLEN+1];
+ union gf_sock_union sock_union;
+
+ rpc_clnt = nlm_get_rpc_clnt (cs->args.nlm4_lockargs.alock.caller_name);
+ if (rpc_clnt == NULL) {
+ nlm4_establish_callback ((void*)cs);
+ return;
+ }
+
+ rpc_transport_get_peeraddr (cs->trans, NULL, 0, &sock_union.storage,
+ sizeof (sock_union.storage));
+
+ switch (sock_union.sa.sa_family) {
+ case AF_INET6:
+ inet_ntop (AF_INET6, &sock_union.sin6.sin6_addr, peerip,
+ INET6_ADDRSTRLEN+1);
+ break;
+ case AF_INET:
+ inet_ntop (AF_INET, &sock_union.sin.sin_addr, peerip,
+ INET6_ADDRSTRLEN+1);
+ break;
+ default:
+ break;
+ }
+
+ testargs.cookie = cs->args.nlm4_lockargs.cookie;
+ testargs.exclusive = cs->args.nlm4_lockargs.exclusive;
+ testargs.alock = cs->args.nlm4_lockargs.alock;
+
+ iobuf = iobuf_get (cs->nfs3state->iobpool);
+ if (!iobuf) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "Failed to get iobuf");
+ goto ret;
+ }
+
+ iobuf_to_iovec (iobuf, &outmsg);
+ /* Use the given serializer to translate the give C structure in arg
+ * to XDR format which will be written into the buffer in outmsg.
+ */
+ outmsg.iov_len = xdr_serialize_nlm4_testargs (outmsg, &testargs);
+
+ iobref = iobref_new ();
+ if (iobref == NULL) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "Failed to get iobref");
+ goto ret;
+ }
+
+ ret = iobref_add (iobref, iobuf);
+ if (ret) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "Failed to add iob to iobref");
+ goto ret;
+ }
+
+ ret = rpc_clnt_submit (rpc_clnt, &nlm4clntprog, NLM4_GRANTED,
+ nlm4svc_send_granted_cbk, &outmsg, 1,
+ NULL, 0, iobref, cs->frame, NULL, 0,
+ NULL, 0, NULL);
+
+ if (ret < 0) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "rpc_clnt_submit error");
+ goto ret;
+ }
+ret:
+ if (iobref)
+ iobref_unref (iobref);
+ if (iobuf)
+ iobuf_unref (iobuf);
+
+ rpc_clnt_unref (rpc_clnt);
+ nfs3_call_state_wipe (cs);
+ return;
+}
+
+int
+nlm_cleanup_fds (char *caller_name)
+{
+ int nlmclnt_found = 0;
+ nlm_fde_t *fde = NULL, *tmp = NULL;
+ nlm_client_t *nlmclnt = NULL;
+
+ LOCK (&nlm_client_list_lk);
+ list_for_each_entry (nlmclnt,
+ &nlm_client_list, nlm_clients) {
+ if (!strcmp(caller_name, nlmclnt->caller_name)) {
+ nlmclnt_found = 1;
+ break;
+ }
+ }
+
+ if (!nlmclnt_found)
+ goto ret;
+
+ if (list_empty (&nlmclnt->fdes))
+ goto ret;
+
+ list_for_each_entry_safe (fde, tmp, &nlmclnt->fdes, fde_list) {
+ fd_unref (fde->fd);
+ list_del (&fde->fde_list);
+ GF_FREE (fde);
+ }
+
+ret:
+ UNLOCK (&nlm_client_list_lk);
+ return 0;
+}
+
+void
+nlm_search_and_delete (fd_t *fd, char *caller_name)
+{
+ nlm_fde_t *fde = NULL;
+ nlm_client_t *nlmclnt = NULL;
+ int nlmclnt_found = 0;
+ int fde_found = 0;
+ int transit_cnt = 0;
+
+ LOCK (&nlm_client_list_lk);
+ list_for_each_entry (nlmclnt,
+ &nlm_client_list, nlm_clients) {
+ if (!strcmp(caller_name, nlmclnt->caller_name)) {
+ nlmclnt_found = 1;
+ break;
+ }
+ }
+
+ if (!nlmclnt_found)
+ goto ret;
+
+ list_for_each_entry (fde, &nlmclnt->fdes, fde_list) {
+ if (fde->fd == fd) {
+ fde_found = 1;
+ break;
+ }
+ }
+
+ if (!fde_found)
+ goto ret;
+ transit_cnt = fde->transit_cnt;
+ if (transit_cnt)
+ goto ret;
+ list_del (&fde->fde_list);
+
+ret:
+ UNLOCK (&nlm_client_list_lk);
+
+ if (fde_found && !transit_cnt) {
+ fd_unref (fde->fd);
+ GF_FREE (fde);
+ }
+ return;
+}
+
+int
+nlm_dec_transit_count (fd_t *fd, char *caller_name)
+{
+ nlm_fde_t *fde = NULL;
+ nlm_client_t *nlmclnt = NULL;
+ int nlmclnt_found = 0;
+ int fde_found = 0;
+ int transit_cnt = -1;
+
+ LOCK (&nlm_client_list_lk);
+ list_for_each_entry (nlmclnt,
+ &nlm_client_list, nlm_clients) {
+ if (!strcmp(caller_name, nlmclnt->caller_name)) {
+ nlmclnt_found = 1;
+ break;
+ }
+ }
+
+ if (!nlmclnt_found) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "nlmclnt not found");
+ nlmclnt = NULL;
+ goto ret;
+ }
+
+ list_for_each_entry (fde, &nlmclnt->fdes, fde_list) {
+ if (fde->fd == fd) {
+ fde_found = 1;
+ break;
+ }
+ }
+
+ if (fde_found) {
+ transit_cnt = --fde->transit_cnt;
+ goto ret;
+ }
+ret:
+
+ UNLOCK (&nlm_client_list_lk);
+ return transit_cnt;
+}
+
+
+nlm_client_t *
+nlm_search_and_add (fd_t *fd, char *caller_name)
+{
+ nlm_fde_t *fde = NULL;
+ nlm_client_t *nlmclnt = NULL;
+ int nlmclnt_found = 0;
+ int fde_found = 0;
+
+ LOCK (&nlm_client_list_lk);
+ list_for_each_entry (nlmclnt,
+ &nlm_client_list, nlm_clients) {
+ if (!strcmp(caller_name, nlmclnt->caller_name)) {
+ nlmclnt_found = 1;
+ break;
+ }
+ }
+
+ if (!nlmclnt_found) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "nlmclnt not found");
+ nlmclnt = NULL;
+ goto ret;
+ }
+
+ list_for_each_entry (fde, &nlmclnt->fdes, fde_list) {
+ if (fde->fd == fd) {
+ fde_found = 1;
+ break;
+ }
+ }
+
+ if (fde_found)
+ goto ret;
+
+ fde = GF_CALLOC (1, sizeof (*fde), gf_nfs_mt_nlm4_fde);
+
+ fde->fd = fd_ref (fd);
+ list_add (&fde->fde_list, &nlmclnt->fdes);
+ret:
+ if (nlmclnt_found && fde)
+ fde->transit_cnt++;
+ UNLOCK (&nlm_client_list_lk);
+ return nlmclnt;
+}
+
+int
+nlm4svc_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *flock,
+ dict_t *xdata)
+{
+ nlm4_stats stat = nlm4_denied;
+ int transit_cnt = -1;
+ char *caller_name = NULL;
+ nfs3_call_state_t *cs = NULL;
+ pthread_t thr;
+
+ cs = frame->local;
+ caller_name = cs->args.nlm4_lockargs.alock.caller_name;
+ transit_cnt = nlm_dec_transit_count (cs->fd, caller_name);
+
+ if (op_ret == -1) {
+ if (transit_cnt == 0)
+ nlm_search_and_delete (cs->fd, caller_name);
+ stat = nlm4_errno_to_nlm4stat (op_errno);
+ goto err;
+ } else {
+ stat = nlm4_granted;
+ if (cs->monitor && !nlm_monitor (caller_name)) {
+ /* FIXME: handle nsm_monitor failure */
+ pthread_create (&thr, NULL, nsm_monitor, (void*)caller_name);
+ }
+ }
+
+err:
+ if (cs->args.nlm4_lockargs.block) {
+ cs->frame = copy_frame (frame);
+ frame->local = NULL;
+ nlm4svc_send_granted (cs);
+ } else {
+ nlm4_generic_reply (cs->req, cs->args.nlm4_lockargs.cookie,
+ stat);
+ nfs3_call_state_wipe (cs);
+ }
+ return 0;
+}
+
+int
+nlm4_lock_fd_resume (void *carg)
+{
+ nlm4_stats stat = nlm4_denied;
+ int ret = -EFAULT;
+ nfs_user_t nfu = {0, };
+ nfs3_call_state_t *cs = NULL;
+ struct gf_flock flock = {0, };
+
+ if (!carg)
+ return ret;
+
+ cs = (nfs3_call_state_t *)carg;
+ nlm4_check_fh_resolve_status (cs, stat, nlm4err);
+ (void) nlm_search_and_add (cs->fd,
+ cs->args.nlm4_lockargs.alock.caller_name);
+ nfs_request_user_init (&nfu, cs->req);
+ nlm4_lock_to_gf_flock (&flock, &cs->args.nlm4_lockargs.alock,
+ cs->args.nlm4_lockargs.exclusive);
+ nlm_copy_lkowner (&nfu.lk_owner, &cs->args.nlm4_lockargs.alock.oh);
+ if (cs->args.nlm4_lockargs.block) {
+ nlm4_generic_reply (cs->req, cs->args.nlm4_lockargs.cookie,
+ nlm4_blocked);
+ ret = nfs_lk (cs->nfsx, cs->vol, &nfu, cs->fd, F_SETLKW,
+ &flock, nlm4svc_lock_cbk, cs);
+ /* FIXME: handle error from nfs_lk() specially by just
+ * cleaning up cs and unblock the client lock request.
+ */
+ ret = 0;
+ } else
+ ret = nfs_lk (cs->nfsx, cs->vol, &nfu, cs->fd, F_SETLK,
+ &flock, nlm4svc_lock_cbk, cs);
+
+nlm4err:
+ if (ret < 0) {
+ stat = nlm4_errno_to_nlm4stat (-ret);
+ gf_log (GF_NLM, GF_LOG_ERROR, "unable to call lk()");
+ nlm4_generic_reply (cs->req, cs->args.nlm4_lockargs.cookie,
+ stat);
+ nfs3_call_state_wipe (cs);
+ }
+
+ return ret;
+}
+
+
+int
+nlm4_lock_resume (void *carg)
+{
+ nlm4_stats stat = nlm4_failed;
+ int ret = -1;
+ nfs3_call_state_t *cs = NULL;
+
+ if (!carg)
+ return ret;
+
+ cs = (nfs3_call_state_t *)carg;
+ nlm4_check_fh_resolve_status (cs, stat, nlm4err);
+ ret = nlm4_file_open_and_resume (cs, nlm4_lock_fd_resume);
+
+nlm4err:
+ if (ret < 0) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "unable to open and resume");
+ stat = nlm4_errno_to_nlm4stat (-ret);
+ nlm4_generic_reply (cs->req, cs->args.nlm4_lockargs.cookie,
+ stat);
+ nfs3_call_state_wipe (cs);
+ }
+
+ return ret;
+}
+
+int
+nlm4svc_lock_common (rpcsvc_request_t *req, int mon)
+{
+ int ret = RPCSVC_ACTOR_ERROR;
+ nlm4_stats stat = nlm4_failed;
+ struct nfs3_fh fh = {{0}, };
+ xlator_t *vol = NULL;
+ nfs3_state_t *nfs3 = NULL;
+ nfs3_call_state_t *cs = NULL;
+ struct nfs_state *nfs = NULL;
+
+ if (!req)
+ return ret;
+
+ nlm4_validate_nfs3_state (req, nfs3, stat, rpcerr, ret);
+ nfs = nfs_state (nfs3->nfsx);
+ nlm4_handle_call_state_init (nfs->nfs3state, cs, req,
+ stat, rpcerr);
+
+ nlm4_prep_nlm4_lockargs (&cs->args.nlm4_lockargs, &cs->lockfh,
+ &cs->lkowner, cs->cookiebytes);
+ if (xdr_to_nlm4_lockargs(req->msg[0], &cs->args.nlm4_lockargs) <= 0) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "Error decoding args");
+ rpcsvc_request_seterr (req, GARBAGE_ARGS);
+ goto rpcerr;
+ }
+
+ fh = cs->lockfh;
+ cs->monitor = mon;
+ nlm4_validate_gluster_fh (&fh, stat, nlm4err);
+ nlm4_map_fh_to_volume (cs->nfs3state, fh, req, vol, stat, nlm4err);
+
+ if (nlm_grace_period && !cs->args.nlm4_lockargs.reclaim) {
+ gf_log (GF_NLM, GF_LOG_WARNING, "NLM in grace period");
+ stat = nlm4_denied_grace_period;
+ nlm4_generic_reply (req, cs->args.nlm4_unlockargs.cookie, stat);
+ nfs3_call_state_wipe (cs);
+ return 0;
+ }
+
+ cs->vol = vol;
+ cs->trans = rpcsvc_request_transport_ref(req);
+ nlm4_volume_started_check (nfs3, vol, ret, rpcerr);
+
+ ret = nlm_add_nlmclnt (cs->args.nlm4_lockargs.alock.caller_name);
+
+ ret = nfs3_fh_resolve_and_resume (cs, &fh,
+ NULL, nlm4_lock_resume);
+
+nlm4err:
+ if (ret < 0) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "unable to resolve and resume");
+ nlm4_generic_reply (cs->req, cs->args.nlm4_lockargs.cookie,
+ stat);
+ nfs3_call_state_wipe (cs);
+ return 0;
+ }
+
+rpcerr:
+ if (ret < 0) {
+ nfs3_call_state_wipe (cs);
+ }
+
+ return ret;
+}
+
+int
+nlm4svc_lock (rpcsvc_request_t *req)
+{
+ return nlm4svc_lock_common (req, 1);
+}
+
+int
+nlm4svc_nm_lock (rpcsvc_request_t *req)
+{
+ return nlm4svc_lock_common (req, 0);
+}
+
+int
+nlm4svc_cancel_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *flock,
+ dict_t *xdata)
+{
+ nlm4_stats stat = nlm4_denied;
+ nfs3_call_state_t *cs = NULL;
+
+ cs = frame->local;
+ if (op_ret == -1) {
+ stat = nlm4_errno_to_nlm4stat (op_errno);
+ goto err;
+ } else
+ stat = nlm4_granted;
+
+err:
+ nlm4_generic_reply (cs->req, cs->args.nlm4_cancargs.cookie,
+ stat);
+ nfs3_call_state_wipe (cs);
+ return 0;
+}
+
+int
+nlm4_cancel_fd_resume (void *carg)
+{
+ int ret = -EFAULT;
+ nfs_user_t nfu = {0, };
+ nfs3_call_state_t *cs = NULL;
+ struct gf_flock flock = {0, };
+
+ if (!carg)
+ return ret;
+
+ cs = (nfs3_call_state_t *)carg;
+ nfs_request_user_init (&nfu, cs->req);
+ nlm4_lock_to_gf_flock (&flock, &cs->args.nlm4_cancargs.alock,
+ cs->args.nlm4_cancargs.exclusive);
+ nlm_copy_lkowner (&nfu.lk_owner, &cs->args.nlm4_cancargs.alock.oh);
+ flock.l_type = F_UNLCK;
+ ret = nfs_lk (cs->nfsx, cs->vol, &nfu, cs->fd, F_SETLK,
+ &flock, nlm4svc_cancel_cbk, cs);
+
+ return ret;
+}
+
+int
+nlm4_cancel_resume (void *carg)
+{
+ nlm4_stats stat = nlm4_failed;
+ int ret = -EFAULT;
+ nfs3_call_state_t *cs = NULL;
+ nlm_client_t *nlmclnt = NULL;
+
+ if (!carg)
+ return ret;
+
+ cs = (nfs3_call_state_t *)carg;
+ nlm4_check_fh_resolve_status (cs, stat, nlm4err);
+
+ nlmclnt = nlm_get_uniq (cs->args.nlm4_cancargs.alock.caller_name);
+ if (nlmclnt == NULL) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "nlm_get_uniq() returned NULL");
+ goto nlm4err;
+ }
+ cs->fd = fd_lookup_uint64 (cs->resolvedloc.inode, (uint64_t)nlmclnt);
+ if (cs->fd == NULL) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "fd_lookup_uint64 retrned NULL");
+ goto nlm4err;
+ }
+ ret = nlm4_cancel_fd_resume (cs);
+
+nlm4err:
+ if (ret < 0) {
+ gf_log (GF_NLM, GF_LOG_WARNING, "unable to unlock_fd_resume()");
+ stat = nlm4_errno_to_nlm4stat (-ret);
+ nlm4_generic_reply (cs->req, cs->args.nlm4_cancargs.cookie,
+ stat);
+
+ nfs3_call_state_wipe (cs);
+ }
+ /* clean up is taken care of */
+ return 0;
+}
+
+int
+nlm4svc_cancel (rpcsvc_request_t *req)
+{
+ xlator_t *vol = NULL;
+ nlm4_stats stat = nlm4_failed;
+ struct nfs_state *nfs = NULL;
+ nfs3_state_t *nfs3 = NULL;
+ nfs3_call_state_t *cs = NULL;
+ int ret = RPCSVC_ACTOR_ERROR;
+ struct nfs3_fh fh = {{0}, };
+
+ if (!req)
+ return ret;
+
+ nlm4_validate_nfs3_state (req, nfs3, stat, rpcerr, ret);
+ nfs = nfs_state (nfs3->nfsx);
+ nlm4_handle_call_state_init (nfs->nfs3state, cs, req,
+ stat, rpcerr);
+
+ nlm4_prep_nlm4_cancargs (&cs->args.nlm4_cancargs, &fh, &cs->lkowner,
+ cs->cookiebytes);
+ if (xdr_to_nlm4_cancelargs(req->msg[0], &cs->args.nlm4_cancargs) <= 0) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "Error decoding args");
+ rpcsvc_request_seterr (req, GARBAGE_ARGS);
+ goto rpcerr;
+ }
+
+ nlm4_validate_gluster_fh (&fh, stat, nlm4err);
+ nlm4_map_fh_to_volume (cs->nfs3state, fh, req, vol, stat, nlm4err);
+
+ if (nlm_grace_period) {
+ gf_log (GF_NLM, GF_LOG_WARNING, "NLM in grace period");
+ stat = nlm4_denied_grace_period;
+ nlm4_generic_reply (req, cs->args.nlm4_unlockargs.cookie, stat);
+ nfs3_call_state_wipe (cs);
+ return 0;
+ }
+
+ cs->vol = vol;
+ nlm4_volume_started_check (nfs3, vol, ret, rpcerr);
+
+ ret = nfs3_fh_resolve_and_resume (cs, &fh,
+ NULL, nlm4_cancel_resume);
+
+nlm4err:
+ if (ret < 0) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "unable to resolve and resume");
+ nlm4_generic_reply (cs->req, cs->args.nlm4_cancargs.cookie,
+ stat);
+ nfs3_call_state_wipe (cs);
+ return 0;
+ }
+
+rpcerr:
+ if (ret < 0) {
+ nfs3_call_state_wipe (cs);
+ }
+ return ret;
+}
+
+int
+nlm4svc_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *flock,
+ dict_t *xdata)
+{
+ nlm4_stats stat = nlm4_denied;
+ nfs3_call_state_t *cs = NULL;
+
+ cs = frame->local;
+ if (op_ret == -1) {
+ stat = nlm4_errno_to_nlm4stat (op_errno);
+ goto err;
+ } else {
+ stat = nlm4_granted;
+ if (flock->l_type == F_UNLCK)
+ nlm_search_and_delete (cs->fd,
+ cs->args.nlm4_unlockargs.alock.caller_name);
+ }
+
+err:
+ nlm4_generic_reply (cs->req, cs->args.nlm4_unlockargs.cookie, stat);
+ nfs3_call_state_wipe (cs);
+ return 0;
+}
+
+int
+nlm4_unlock_fd_resume (void *carg)
+{
+ int ret = -EFAULT;
+ nfs_user_t nfu = {0, };
+ nfs3_call_state_t *cs = NULL;
+ struct gf_flock flock = {0, };
+
+ if (!carg)
+ return ret;
+ cs = (nfs3_call_state_t *)carg;
+ nfs_request_user_init (&nfu, cs->req);
+ nlm4_lock_to_gf_flock (&flock, &cs->args.nlm4_unlockargs.alock, 0);
+ nlm_copy_lkowner (&nfu.lk_owner, &cs->args.nlm4_unlockargs.alock.oh);
+ flock.l_type = F_UNLCK;
+ ret = nfs_lk (cs->nfsx, cs->vol, &nfu, cs->fd, F_SETLK,
+ &flock, nlm4svc_unlock_cbk, cs);
+
+ return ret;
+}
+
+int
+nlm4_unlock_resume (void *carg)
+{
+ nlm4_stats stat = nlm4_failed;
+ int ret = -1;
+ nfs3_call_state_t *cs = NULL;
+ nlm_client_t *nlmclnt = NULL;
+
+ if (!carg)
+ return ret;
+
+ cs = (nfs3_call_state_t *)carg;
+ nlm4_check_fh_resolve_status (cs, stat, nlm4err);
+
+ nlmclnt = nlm_get_uniq (cs->args.nlm4_unlockargs.alock.caller_name);
+ if (nlmclnt == NULL) {
+ stat = nlm4_granted;
+ gf_log (GF_NLM, GF_LOG_WARNING, "nlm_get_uniq() returned NULL");
+ goto nlm4err;
+ }
+ cs->fd = fd_lookup_uint64 (cs->resolvedloc.inode, (uint64_t)nlmclnt);
+ if (cs->fd == NULL) {
+ stat = nlm4_granted;
+ gf_log (GF_NLM, GF_LOG_WARNING, "fd_lookup_uint64() returned "
+ "NULL");
+ goto nlm4err;
+ }
+ ret = nlm4_unlock_fd_resume (cs);
+
+nlm4err:
+ if (ret < 0) {
+ gf_log (GF_NLM, GF_LOG_WARNING, "unable to unlock_fd_resume");
+ stat = nlm4_errno_to_nlm4stat (-ret);
+ nlm4_generic_reply (cs->req, cs->args.nlm4_unlockargs.cookie,
+ stat);
+
+ nfs3_call_state_wipe (cs);
+ }
+ /* we have already taken care of cleanup */
+ return 0;
+}
+
+int
+nlm4svc_unlock (rpcsvc_request_t *req)
+{
+ xlator_t *vol = NULL;
+ nlm4_stats stat = nlm4_failed;
+ struct nfs_state *nfs = NULL;
+ nfs3_state_t *nfs3 = NULL;
+ nfs3_call_state_t *cs = NULL;
+ int ret = RPCSVC_ACTOR_ERROR;
+ struct nfs3_fh fh = {{0}, };
+
+ if (!req)
+ return ret;
+
+ nlm4_validate_nfs3_state (req, nfs3, stat, rpcerr, ret);
+ nfs = nfs_state (nfs3->nfsx);
+ nlm4_handle_call_state_init (nfs->nfs3state, cs, req,
+ stat, rpcerr);
+
+ nlm4_prep_nlm4_unlockargs (&cs->args.nlm4_unlockargs, &fh, &cs->lkowner,
+ cs->cookiebytes);
+ if (xdr_to_nlm4_unlockargs(req->msg[0], &cs->args.nlm4_unlockargs) <= 0)
+ {
+ gf_log (GF_NLM, GF_LOG_ERROR, "Error decoding args");
+ rpcsvc_request_seterr (req, GARBAGE_ARGS);
+ goto rpcerr;
+ }
+
+ nlm4_validate_gluster_fh (&fh, stat, nlm4err);
+ nlm4_map_fh_to_volume (cs->nfs3state, fh, req, vol, stat, nlm4err);
+
+ if (nlm_grace_period) {
+ gf_log (GF_NLM, GF_LOG_WARNING, "NLM in grace period");
+ stat = nlm4_denied_grace_period;
+ nlm4_generic_reply (req, cs->args.nlm4_unlockargs.cookie, stat);
+ nfs3_call_state_wipe (cs);
+ return 0;
+ }
+
+ cs->vol = vol;
+ /* FIXME: check if trans is being used at all for unlock */
+ cs->trans = rpcsvc_request_transport_ref(req);
+ nlm4_volume_started_check (nfs3, vol, ret, rpcerr);
+
+ ret = nfs3_fh_resolve_and_resume (cs, &fh,
+ NULL, nlm4_unlock_resume);
+
+nlm4err:
+ if (ret < 0) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "unable to resolve and resume");
+ nlm4_generic_reply (req, cs->args.nlm4_unlockargs.cookie, stat);
+ nfs3_call_state_wipe (cs);
+ return 0;
+ }
+
+rpcerr:
+ if (ret < 0) {
+ nfs3_call_state_wipe (cs);
+ }
+ return ret;
+}
+
+int
+nlm4_share_reply (nfs3_call_state_t *cs, nlm4_stats stat)
+{
+ nlm4_shareres res = {{0}, 0, 0};
+
+ if (!cs)
+ return -1;
+
+ res.cookie = cs->args.nlm4_shareargs.cookie;
+ res.stat = stat;
+ res.sequence = 0;
+
+ nlm4svc_submit_reply (cs->req, (void *)&res,
+ (nlm4_serializer)xdr_serialize_nlm4_shareres);
+ return 0;
+}
+
+nlm_share_t *
+nlm4_share_new ()
+{
+ nlm_share_t *share = NULL;
+
+ share = GF_CALLOC (1, sizeof (nlm_share_t),
+ gf_nfs_mt_nlm4_share);
+ if (!share)
+ goto out;
+
+ INIT_LIST_HEAD (&share->client_list);
+ INIT_LIST_HEAD (&share->inode_list);
+ out:
+ return share;
+}
+
+int
+nlm4_add_share_to_inode (nlm_share_t *share)
+{
+ int ret = -1;
+ uint64_t ctx = 0;
+ struct list_head *head = NULL;
+ xlator_t *this = NULL;
+ inode_t *inode = NULL;
+ struct nfs_inode_ctx *ictx = NULL;
+ struct nfs_state *priv = NULL;
+
+ this = THIS;
+ priv = this->private;
+ inode = share->inode;
+ ret = inode_ctx_get (inode, this, &ctx);
+
+ if (ret == -1) {
+ ictx = GF_CALLOC (1, sizeof (struct nfs_inode_ctx),
+ gf_nfs_mt_inode_ctx);
+ if (!ictx ) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not allocate nfs inode ctx");
+ ret = -1;
+ goto out;
+ }
+ ictx->generation = priv->generation;
+
+ head = &ictx->shares;
+ INIT_LIST_HEAD (head);
+
+ ret = inode_ctx_put (inode, this, (uint64_t)ictx);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not store share list");
+ goto out;
+ }
+ }
+ else {
+ ictx = (struct nfs_inode_ctx *)ctx;
+ head = &ictx->shares;
+ }
+
+ list_add (&share->inode_list, head);
+
+ out:
+ if (ret && head)
+ GF_FREE (head);
+
+ return ret;
+}
+
+int
+nlm4_approve_share_reservation (nfs3_call_state_t *cs)
+{
+ int ret = -1;
+ uint64_t ctx = 0;
+ fsh_mode req_mode = 0;
+ fsh_access req_access = 0;
+ inode_t *inode = NULL;
+ nlm_share_t *share = NULL;
+ struct list_head *head = NULL;
+ struct nfs_inode_ctx *ictx = NULL;
+
+ if (!cs)
+ goto out;
+
+ inode = cs->resolvedloc.inode;
+
+ ret = inode_ctx_get (inode, THIS, &ctx);
+ if (ret) {
+ ret = 0;
+ goto out;
+ }
+ ictx = (struct nfs_inode_ctx *)ctx;
+
+ head = &ictx->shares;
+ if (!head || list_empty (head))
+ goto out;
+
+ req_mode = cs->args.nlm4_shareargs.share.mode;
+ req_access = cs->args.nlm4_shareargs.share.access;
+
+ list_for_each_entry (share, head, inode_list) {
+ ret = (((req_mode & share->access) == 0) &&
+ ((req_access & share->mode) == 0));
+ if (!ret) {
+ ret = -1;
+ goto out;
+ }
+ }
+ ret = 0;
+
+ out:
+ return ret;
+}
+
+int
+nlm4_create_share_reservation (nfs3_call_state_t *cs)
+{
+ int ret = -1;
+ nlm_share_t *share = NULL;
+ nlm_client_t *client = NULL;
+ inode_t *inode = NULL;
+
+ LOCK (&nlm_client_list_lk);
+
+ inode = inode_ref (cs->resolvedloc.inode);
+ if (!inode) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "inode not found");
+ goto out;
+ }
+
+ client = __nlm_get_uniq (cs->args.nlm4_shareargs.share.caller_name);
+ if (!client) {
+ /* DO NOT add client. the client is supposed
+ to be here, since nlm4svc_share adds it */
+ gf_log (GF_NLM, GF_LOG_ERROR, "client not found");
+ goto out;
+ }
+
+ ret = nlm4_approve_share_reservation (cs);
+ if (ret)
+ goto out;
+
+ share = nlm4_share_new ();
+ if (!share) {
+ ret = -1;
+ goto out;
+ }
+
+ share->inode = inode;
+ share->mode = cs->args.nlm4_shareargs.share.mode;
+ share->access = cs->args.nlm4_shareargs.share.access;
+ nlm_copy_lkowner (&share->lkowner,
+ &cs->args.nlm4_shareargs.share.oh);
+
+ ret = nlm4_add_share_to_inode (share);
+ if (ret)
+ goto out;
+
+ list_add (&share->client_list, &client->shares);
+
+ out:
+ if (ret && inode) {
+ inode_unref (inode);
+ GF_FREE (share);
+ }
+
+ UNLOCK (&nlm_client_list_lk);
+ return ret;
+}
+
+/*
+ SHARE and UNSHARE calls DO NOT perform STACK_WIND,
+ the (non-monitored) share reservations are maintained
+ at *nfs xlator level only*, in memory
+*/
+int
+nlm4_share_resume (void *call_state)
+{
+ int ret = -1;
+ nlm4_stats stat = nlm4_failed;
+ nfs3_call_state_t *cs = NULL;
+
+ if (!call_state)
+ return ret;
+
+ cs = (nfs3_call_state_t *)call_state;
+ nlm4_check_fh_resolve_status (cs, stat, out);
+
+ ret = nlm4_create_share_reservation (cs);
+ if (!ret)
+ stat = nlm4_granted;
+
+ out:
+ nlm4_share_reply (cs, stat);
+ nfs3_call_state_wipe (cs);
+ return 0;
+}
+
+int
+nlm4svc_share (rpcsvc_request_t *req)
+{
+ nlm4_stats stat = nlm4_failed;
+ xlator_t *vol = NULL;
+ nfs3_state_t *nfs3 = NULL;
+ nfs3_call_state_t *cs = NULL;
+ struct nfs_state *nfs = NULL;
+ struct nfs3_fh fh = {{0}, };
+ int ret = RPCSVC_ACTOR_ERROR;
+
+ if (!req)
+ return ret;
+
+ nlm4_validate_nfs3_state (req, nfs3, stat, rpcerr, ret);
+ nfs = nfs_state (nfs3->nfsx);
+ nlm4_handle_call_state_init (nfs->nfs3state, cs, req,
+ stat, rpcerr);
+
+ nlm4_prep_shareargs (&cs->args.nlm4_shareargs, &cs->lockfh,
+ &cs->lkowner, cs->cookiebytes);
+
+ if (xdr_to_nlm4_shareargs (req->msg[0],
+ &cs->args.nlm4_shareargs) <= 0) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "Error decoding SHARE args");
+ rpcsvc_request_seterr (req, GARBAGE_ARGS);
+ goto rpcerr;
+ }
+
+ fh = cs->lockfh;
+ nlm4_validate_gluster_fh (&fh, stat, nlm4err);
+ nlm4_map_fh_to_volume (cs->nfs3state, fh, req,
+ vol, stat, nlm4err);
+
+ if (nlm_grace_period && !cs->args.nlm4_shareargs.reclaim) {
+ gf_log (GF_NLM, GF_LOG_DEBUG, "NLM in grace period");
+ stat = nlm4_denied_grace_period;
+ nlm4_share_reply (cs, stat);
+ nfs3_call_state_wipe (cs);
+ return 0;
+ }
+
+ cs->vol = vol;
+ cs->trans = rpcsvc_request_transport_ref(req);
+ nlm4_volume_started_check (nfs3, vol, ret, rpcerr);
+
+ ret = nlm_add_nlmclnt (cs->args.nlm4_shareargs.share.caller_name);
+
+ ret = nfs3_fh_resolve_and_resume (cs, &fh, NULL, nlm4_share_resume);
+
+ nlm4err:
+ if (ret < 0) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "SHARE call failed");
+ nlm4_share_reply (cs, stat);
+ nfs3_call_state_wipe (cs);
+ return 0;
+ }
+
+ rpcerr:
+ if (ret < 0)
+ nfs3_call_state_wipe (cs);
+
+ return ret;
+}
+
+int
+nlm4_remove_share_reservation (nfs3_call_state_t *cs)
+{
+ int ret = -1;
+ uint64_t ctx = 0;
+ fsh_mode req_mode = 0;
+ fsh_access req_access = 0;
+ nlm_share_t *share = NULL;
+ nlm_share_t *tmp = NULL;
+ nlm_client_t *client = NULL;
+ char *caller = NULL;
+ inode_t *inode = NULL;
+ xlator_t *this = NULL;
+ struct list_head *head = NULL;
+ nlm4_shareargs *args = NULL;
+ struct nfs_inode_ctx *ictx = NULL;
+
+ LOCK (&nlm_client_list_lk);
+
+ args = &cs->args.nlm4_shareargs;
+ caller = args->share.caller_name;
+
+ client = __nlm_get_uniq (caller);
+ if (!client) {
+ gf_log (GF_NLM, GF_LOG_ERROR,
+ "client not found: %s", caller);
+ goto out;
+ }
+
+ inode = cs->resolvedloc.inode;
+ if (!inode) {
+ gf_log (GF_NLM, GF_LOG_ERROR,
+ "inode not found: client: %s", caller);
+ goto out;
+ }
+
+ this = THIS;
+ ret = inode_ctx_get (inode, this, &ctx);
+ if (ret) {
+ gf_log (GF_NLM, GF_LOG_ERROR,
+ "no shares found for inode:"
+ "gfid: %s; client: %s",
+ inode->gfid, caller);
+ goto out;
+ }
+ ictx = (struct nfs_inode_ctx *)ctx;
+
+ head = &ictx->shares;
+ if (list_empty (head)) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+ req_mode = args->share.mode;
+ req_access = args->share.access;
+
+ list_for_each_entry_safe (share, tmp, head, inode_list) {
+ ret = ((req_mode == share->mode) &&
+ (req_access == share->access) &&
+ nlm_is_oh_same_lkowner (&share->lkowner, &args->share.oh));
+ if (ret) {
+ list_del (&share->client_list);
+ list_del (&share->inode_list);
+ inode_unref (share->inode);
+ GF_FREE (share);
+ break;
+ }
+ }
+
+ ret = 0;
+ out:
+ UNLOCK (&nlm_client_list_lk);
+ return ret;
+
+}
+
+int
+nlm4_unshare_resume (void *call_state)
+{
+ int ret = -1;
+ nlm4_stats stat = nlm4_failed;
+ nfs3_call_state_t *cs = NULL;
+
+ if (!call_state)
+ return ret;
+
+ cs = (nfs3_call_state_t *)call_state;
+
+ nlm4_check_fh_resolve_status (cs, stat, out);
+ ret = nlm4_remove_share_reservation (cs);
+ if (!ret)
+ stat = nlm4_granted;
+
+ out:
+ nlm4_share_reply (cs, stat);
+ nfs3_call_state_wipe (cs);
+ return 0;
+}
+
+int
+nlm4svc_unshare (rpcsvc_request_t *req)
+{
+ nlm4_stats stat = nlm4_failed;
+ xlator_t *vol = NULL;
+ nfs3_state_t *nfs3 = NULL;
+ nfs3_call_state_t *cs = NULL;
+ struct nfs_state *nfs = NULL;
+ struct nfs3_fh fh = {{0}, };
+ int ret = RPCSVC_ACTOR_ERROR;
+
+ if (!req)
+ return ret;
+
+ nlm4_validate_nfs3_state (req, nfs3, stat, rpcerr, ret);
+ nfs = nfs_state (nfs3->nfsx);
+ nlm4_handle_call_state_init (nfs->nfs3state, cs, req,
+ stat, rpcerr);
+
+ nlm4_prep_shareargs (&cs->args.nlm4_shareargs, &cs->lockfh,
+ &cs->lkowner, cs->cookiebytes);
+
+ if (xdr_to_nlm4_shareargs (req->msg[0],
+ &cs->args.nlm4_shareargs) <= 0) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "Error decoding UNSHARE args");
+ rpcsvc_request_seterr (req, GARBAGE_ARGS);
+ goto rpcerr;
+ }
+
+ fh = cs->lockfh;
+ nlm4_validate_gluster_fh (&fh, stat, nlm4err);
+ nlm4_map_fh_to_volume (cs->nfs3state, fh, req,
+ vol, stat, nlm4err);
+
+ if (nlm_grace_period && !cs->args.nlm4_shareargs.reclaim) {
+ gf_log (GF_NLM, GF_LOG_DEBUG, "NLM in grace period");
+ stat = nlm4_denied_grace_period;
+ nlm4_share_reply (cs, stat);
+ nfs3_call_state_wipe (cs);
+ return 0;
+ }
+
+ cs->vol = vol;
+ cs->trans = rpcsvc_request_transport_ref(req);
+ nlm4_volume_started_check (nfs3, vol, ret, rpcerr);
+
+ ret = nfs3_fh_resolve_and_resume (cs, &fh, NULL,
+ nlm4_unshare_resume);
+
+ nlm4err:
+ if (ret < 0) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "UNSHARE call failed");
+ nlm4_share_reply (cs, stat);
+ ret = 0;
+ return 0;
+ }
+
+ rpcerr:
+ if (ret < 0)
+ nfs3_call_state_wipe (cs);
+
+ return ret;
+}
+
+int
+nlm4_free_all_shares (char *caller_name)
+{
+ nlm_share_t *share = NULL;
+ nlm_share_t *tmp = NULL;
+ nlm_client_t *client = NULL;
+
+ LOCK (&nlm_client_list_lk);
+
+ client = __nlm_get_uniq (caller_name);
+ if (!client) {
+ gf_log (GF_NLM, GF_LOG_DEBUG,
+ "client not found: %s", caller_name);
+ goto out;
+ }
+
+ list_for_each_entry_safe (share, tmp, &client->shares, client_list) {
+ list_del (&share->inode_list);
+ list_del (&share->client_list);
+ inode_unref (share->inode);
+ GF_FREE (share);
+ }
+ out:
+ UNLOCK (&nlm_client_list_lk);
+ return 0;
+}
+
+int
+nlm4svc_free_all (rpcsvc_request_t *req)
+{
+ int ret = RPCSVC_ACTOR_ERROR;
+ nlm4_stats stat = nlm4_failed;
+ nfs3_state_t *nfs3 = NULL;
+ nfs3_call_state_t *cs = NULL;
+ struct nfs_state *nfs = NULL;
+
+ nlm4_validate_nfs3_state (req, nfs3, stat, err, ret);
+ nfs = nfs_state (nfs3->nfsx);
+ nlm4_handle_call_state_init (nfs->nfs3state, cs,
+ req, stat, err);
+
+ nlm4_prep_freeallargs (&cs->args.nlm4_freeallargs,
+ &cs->lkowner);
+
+ if (xdr_to_nlm4_freeallargs (req->msg[0],
+ &cs->args.nlm4_freeallargs) <= 0) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "Error decoding FREE_ALL args");
+ rpcsvc_request_seterr (req, GARBAGE_ARGS);
+ goto err;
+ }
+
+ ret = nlm4_free_all_shares (cs->args.nlm4_freeallargs.name);
+ if (ret)
+ goto err;
+
+ ret = nlm_cleanup_fds (cs->args.nlm4_freeallargs.name);
+ if (ret)
+ goto err;
+
+ err:
+ nfs3_call_state_wipe (cs);
+ if (ret)
+ gf_log (GF_NLM, GF_LOG_DEBUG,
+ "error in free all; stat: %d", stat);
+ return ret;
+
+}
+
+void
+nlm4svc_sm_notify (struct nlm_sm_status *status)
+{
+ gf_log (GF_NLM, GF_LOG_INFO, "sm_notify: %s, state: %d",
+ status->mon_name,
+ status->state);
+ nlm_cleanup_fds (status->mon_name);
+}
+
+rpcsvc_actor_t nlm4svc_actors[NLM4_PROC_COUNT] = {
+ /* 0 */
+ {"NULL", NLM4_NULL, nlm4svc_null, NULL, 0, DRC_IDEMPOTENT},
+ {"TEST", NLM4_TEST, nlm4svc_test, NULL, 0, DRC_IDEMPOTENT},
+ {"LOCK", NLM4_LOCK, nlm4svc_lock, NULL, 0, DRC_NON_IDEMPOTENT},
+ {"CANCEL", NLM4_CANCEL, nlm4svc_cancel, NULL, 0, DRC_NON_IDEMPOTENT},
+ {"UNLOCK", NLM4_UNLOCK, nlm4svc_unlock, NULL, 0, DRC_NON_IDEMPOTENT},
+ /* 5 */
+ {"GRANTED", NLM4_GRANTED, NULL, NULL, 0, DRC_NA},
+ {"TEST", NLM4_TEST_MSG, NULL, NULL, 0, DRC_NA},
+ {"LOCK", NLM4_LOCK_MSG, NULL, NULL, 0, DRC_NA},
+ {"CANCEL", NLM4_CANCEL_MSG, NULL, NULL, 0, DRC_NA},
+ {"UNLOCK", NLM4_UNLOCK_MSG, NULL, NULL, 0, DRC_NA},
+ /* 10 */
+ {"GRANTED", NLM4_GRANTED_MSG, NULL, NULL, 0, DRC_NA},
+ {"TEST", NLM4_TEST_RES, NULL, NULL, 0, DRC_NA},
+ {"LOCK", NLM4_LOCK_RES, NULL, NULL, 0, DRC_NA},
+ {"CANCEL", NLM4_CANCEL_RES, NULL, NULL, 0, DRC_NA},
+ {"UNLOCK", NLM4_UNLOCK_RES, NULL, NULL, 0, DRC_NA},
+ /* 15 ; procedures 17,18,19 are not defined by nlm */
+ {"GRANTED", NLM4_GRANTED_RES, NULL, NULL, 0, DRC_NA},
+ {"SM_NOTIFY", NLM4_SM_NOTIFY, NULL, NULL, 0, DRC_NA},
+ {"SEVENTEEN", NLM4_SEVENTEEN, NULL, NULL, 0, DRC_NA},
+ {"EIGHTEEN", NLM4_EIGHTEEN, NULL, NULL, 0, DRC_NA},
+ {"NINETEEN", NLM4_NINETEEN, NULL, NULL, 0, DRC_NA},
+ /* 20 */
+ {"SHARE", NLM4_SHARE, nlm4svc_share, NULL, 0, DRC_NON_IDEMPOTENT},
+ {"UNSHARE", NLM4_UNSHARE, nlm4svc_unshare, NULL, 0, DRC_NON_IDEMPOTENT},
+ {"NM_LOCK", NLM4_NM_LOCK, nlm4svc_nm_lock, NULL, 0, DRC_NON_IDEMPOTENT},
+ {"FREE_ALL", NLM4_FREE_ALL, nlm4svc_free_all, NULL, 0, DRC_IDEMPOTENT},
+};
+
+rpcsvc_program_t nlm4prog = {
+ .progname = "NLM4",
+ .prognum = NLM_PROGRAM,
+ .progver = NLM_V4,
+ .progport = GF_NLM4_PORT,
+ .actors = nlm4svc_actors,
+ .numactors = NLM4_PROC_COUNT,
+ .min_auth = AUTH_NULL,
+};
+
+
+int
+nlm4_init_state (xlator_t *nfsx)
+{
+ return 0;
+}
+
+extern void *nsm_thread (void *argv);
+
+void nlm_grace_period_over(void *arg)
+{
+ nlm_grace_period = 0;
+}
+
+rpcsvc_program_t *
+nlm4svc_init(xlator_t *nfsx)
+{
+ struct nfs3_state *ns = NULL;
+ struct nfs_state *nfs = NULL;
+ dict_t *options = NULL;
+ int ret = -1;
+ char *portstr = NULL;
+ pthread_t thr;
+ struct timespec timeout = {0,};
+ FILE *pidfile = NULL;
+ pid_t pid = -1;
+ static gf_boolean_t nlm4_inited = _gf_false;
+
+ /* Already inited */
+ if (nlm4_inited)
+ return &nlm4prog;
+
+ nfs = (struct nfs_state*)nfsx->private;
+
+ ns = nfs->nfs3state;
+ if (!ns) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "NLM4 init failed");
+ goto err;
+ }
+ nlm4prog.private = ns;
+
+ options = dict_new ();
+
+ ret = gf_asprintf (&portstr, "%d", GF_NLM4_PORT);
+ if (ret == -1)
+ goto err;
+
+ ret = dict_set_dynstr (options, "transport.socket.listen-port",
+ portstr);
+ if (ret == -1)
+ goto err;
+ ret = dict_set_str (options, "transport-type", "socket");
+ if (ret == -1) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "dict_set_str error");
+ goto err;
+ }
+
+ if (nfs->allow_insecure) {
+ ret = dict_set_str (options, "rpc-auth-allow-insecure", "on");
+ if (ret == -1) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "dict_set_str error");
+ goto err;
+ }
+ ret = dict_set_str (options, "rpc-auth.ports.insecure", "on");
+ if (ret == -1) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "dict_set_str error");
+ goto err;
+ }
+ }
+
+ ret = dict_set_str (options, "transport.address-family", "inet");
+ if (ret == -1) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "dict_set_str error");
+ goto err;
+ }
+
+ ret = rpcsvc_create_listeners (nfs->rpcsvc, options, "NLM");
+ if (ret == -1) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "Unable to create listeners");
+ dict_unref (options);
+ goto err;
+ }
+ INIT_LIST_HEAD(&nlm_client_list);
+ LOCK_INIT (&nlm_client_list_lk);
+
+ /* unlink sm-notify.pid so that when we restart rpc.statd/sm-notify
+ * it thinks that the machine has restarted and sends NOTIFY to clients.
+ */
+ ret = unlink ("/var/run/sm-notify.pid");
+ if (ret == -1 && errno != ENOENT) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "unable to unlink sm-notify");
+ goto err;
+ }
+ /* temporary work around to restart statd, not distro/OS independant.
+ * Need to figure out a more graceful way
+ * killall will cause problems on solaris.
+ */
+
+ pidfile = fopen ("/var/run/rpc.statd.pid", "r");
+ if (pidfile) {
+ ret = fscanf (pidfile, "%d", &pid);
+ if (ret <= 0) {
+ gf_log (GF_NLM, GF_LOG_WARNING, "unable to get pid of "
+ "rpc.statd");
+ ret = runcmd ("killall", "-9", "rpc.statd", NULL);
+ } else
+ kill (pid, SIGKILL);
+
+ fclose (pidfile);
+ } else {
+ gf_log (GF_NLM, GF_LOG_WARNING, "opening the pid file of "
+ "rpc.statd failed (%s)", strerror (errno));
+ /* if ret == -1, do nothing - case either statd was not
+ * running or was running in valgrind mode
+ */
+ ret = runcmd ("killall", "-9", "rpc.statd", NULL);
+ }
+
+ ret = unlink ("/var/run/rpc.statd.pid");
+ if (ret == -1 && errno != ENOENT) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "unable to unlink rpc.statd");
+ goto err;
+ }
+
+ ret = runcmd ("/sbin/rpc.statd", NULL);
+ if (ret == -1) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "unable to start rpc.statd");
+ goto err;
+ }
+ pthread_create (&thr, NULL, nsm_thread, (void*)NULL);
+
+ timeout.tv_sec = nlm_grace_period;
+ timeout.tv_nsec = 0;
+
+ gf_timer_call_after (nfsx->ctx, timeout, nlm_grace_period_over, NULL);
+ nlm4_inited = _gf_true;
+ return &nlm4prog;
+err:
+ return NULL;
+}
+
+int32_t
+nlm_priv (xlator_t *this)
+{
+ int32_t ret = -1;
+ uint32_t client_count = 0;
+ uint64_t file_count = 0;
+ nlm_client_t *client = NULL;
+ nlm_fde_t *fde = NULL;
+ char key[GF_DUMP_MAX_BUF_LEN] = {0};
+ char gfid_str[64] = {0};
+
+ gf_proc_dump_add_section("nfs.nlm");
+
+ if (TRY_LOCK (&nlm_client_list_lk))
+ goto out;
+
+ list_for_each_entry (client, &nlm_client_list, nlm_clients) {
+
+ gf_proc_dump_build_key (key, "client", "%d.hostname", client_count);
+ gf_proc_dump_write (key, "%s\n", client->caller_name);
+
+ file_count = 0;
+ list_for_each_entry (fde, &client->fdes, fde_list) {
+ gf_proc_dump_build_key (key, "file", "%ld.gfid", file_count);
+ memset (gfid_str, 0, 64);
+ uuid_utoa_r (fde->fd->inode->gfid, gfid_str);
+ gf_proc_dump_write (key, "%s", gfid_str);
+ file_count++;
+ }
+
+ gf_proc_dump_build_key (key, "client", "files-locked");
+ gf_proc_dump_write (key, "%ld\n", file_count);
+ client_count++;
+ }
+
+ gf_proc_dump_build_key (key, "nlm", "client-count");
+ gf_proc_dump_write (key, "%d", client_count);
+ ret = 0;
+ UNLOCK (&nlm_client_list_lk);
+
+ out:
+ if (ret) {
+ gf_proc_dump_build_key (key, "nlm", "statedump_error");
+ gf_proc_dump_write (key, "Unable to dump nlm state because "
+ "nlm_client_list_lk lock couldn't be acquired");
+ }
+
+ return ret;
+}
diff --git a/xlators/nfs/server/src/nlm4.h b/xlators/nfs/server/src/nlm4.h
new file mode 100644
index 000000000..9b5d54081
--- /dev/null
+++ b/xlators/nfs/server/src/nlm4.h
@@ -0,0 +1,77 @@
+/*
+ Copyright (c) 2012 Gluster, Inc. <http://www.gluster.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _NLM4_H_
+#define _NLM4_H_
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/types.h>
+#include <signal.h>
+#include "rpcsvc.h"
+#include "dict.h"
+#include "xlator.h"
+#include "iobuf.h"
+#include "nfs.h"
+#include "list.h"
+#include "xdr-nfs3.h"
+#include "locking.h"
+#include "nfs3-fh.h"
+#include "uuid.h"
+#include "nlm4-xdr.h"
+#include "lkowner.h"
+
+/* Registered with portmap */
+#define GF_NLM4_PORT 38468
+#define GF_NLM GF_NFS"-NLM"
+
+extern rpcsvc_program_t *
+nlm4svc_init (xlator_t *nfsx);
+
+extern int
+nlm4_init_state (xlator_t *nfsx);
+
+#define NLM_PROGRAM 100021
+#define NLM_V4 4
+
+typedef struct nlm4_lwowner {
+ char temp[1024];
+} nlm4_lkowner_t;
+
+typedef struct nlm_client {
+ struct sockaddr_storage sa;
+ pid_t uniq;
+ struct list_head nlm_clients;
+ struct list_head fdes;
+ struct list_head shares;
+ struct rpc_clnt *rpc_clnt;
+ char *caller_name;
+ int nsm_monitor;
+} nlm_client_t;
+
+typedef struct nlm_share {
+ struct list_head client_list;
+ struct list_head inode_list;
+ gf_lkowner_t lkowner;
+ inode_t *inode;
+ fsh_mode mode;
+ fsh_access access;
+} nlm_share_t;
+
+typedef struct nlm_fde {
+ struct list_head fde_list;
+ fd_t *fd;
+ int transit_cnt;
+} nlm_fde_t;
+
+#endif
diff --git a/xlators/nfs/server/src/nlmcbk_svc.c b/xlators/nfs/server/src/nlmcbk_svc.c
new file mode 100644
index 000000000..e1b588765
--- /dev/null
+++ b/xlators/nfs/server/src/nlmcbk_svc.c
@@ -0,0 +1,117 @@
+/*
+ Copyright (c) 2012 Gluster, Inc. <http://www.gluster.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/*
+ * Please do not edit this file.
+ * It was generated using rpcgen.
+ */
+
+#include "nlmcbk-xdr.h"
+#include "nlm4.h"
+#include "logging.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <rpc/pmap_clnt.h>
+#include <string.h>
+#include <memory.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+
+#ifndef SIG_PF
+#define SIG_PF void(*)(int)
+#endif
+
+void
+nlm4svc_sm_notify (struct nlm_sm_status *status);
+
+void *nlmcbk_sm_notify_0_svc(struct nlm_sm_status *status, struct svc_req *req)
+{
+ nlm4svc_sm_notify (status);
+ return NULL;
+}
+
+static void
+nlmcbk_program_0(struct svc_req *rqstp, register SVCXPRT *transp)
+{
+ union {
+ struct nlm_sm_status nlmcbk_sm_notify_0_arg;
+ } argument;
+ char *result;
+ xdrproc_t _xdr_argument, _xdr_result;
+ char *(*local)(char *, struct svc_req *);
+
+ switch (rqstp->rq_proc) {
+ case NULLPROC:
+ (void) svc_sendreply (transp, (xdrproc_t) xdr_void, (char *)NULL);
+ return;
+
+ case NLMCBK_SM_NOTIFY:
+ _xdr_argument = (xdrproc_t) xdr_nlm_sm_status;
+ _xdr_result = (xdrproc_t) xdr_void;
+ local = (char *(*)(char *, struct svc_req *)) nlmcbk_sm_notify_0_svc;
+ break;
+
+ default:
+ svcerr_noproc (transp);
+ return;
+ }
+ memset ((char *)&argument, 0, sizeof (argument));
+ if (!svc_getargs (transp, (xdrproc_t) _xdr_argument, (caddr_t) &argument)) {
+ svcerr_decode (transp);
+ return;
+ }
+ result = (*local)((char *)&argument, rqstp);
+ if (!svc_sendreply(transp, (xdrproc_t) _xdr_result, result)) {
+ svcerr_systemerr (transp);
+ }
+
+ if (!svc_freeargs (transp, (xdrproc_t) _xdr_argument, (caddr_t) &argument)) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "unable to free arguments");
+ return;
+ }
+ return;
+}
+
+void *
+nsm_thread (void *argv)
+{
+ register SVCXPRT *transp;
+ int ret = 0;
+
+ ret = pmap_unset (NLMCBK_PROGRAM, NLMCBK_V1);
+ if (ret == 0) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "pmap_unset failed");
+ return NULL;
+ }
+ transp = svcudp_create(RPC_ANYSOCK);
+ if (transp == NULL) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "cannot create udp service.");
+ return NULL;
+ }
+ if (!svc_register(transp, NLMCBK_PROGRAM, NLMCBK_V1, nlmcbk_program_0, IPPROTO_UDP)) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "unable to register (NLMCBK_PROGRAM, NLMCBK_V0, udp).");
+ return NULL;
+ }
+
+ transp = svctcp_create(RPC_ANYSOCK, 0, 0);
+ if (transp == NULL) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "cannot create tcp service.");
+ return NULL;
+ }
+ if (!svc_register(transp, NLMCBK_PROGRAM, NLMCBK_V1, nlmcbk_program_0, IPPROTO_TCP)) {
+ gf_log (GF_NLM, GF_LOG_ERROR, "unable to register (NLMCBK_PROGRAM, NLMCBK_V0, tcp).");
+ return NULL;
+ }
+
+ svc_run ();
+ gf_log (GF_NLM, GF_LOG_ERROR, "svc_run returned");
+ return NULL;
+ /* NOTREACHED */
+}
diff --git a/xlators/performance/Makefile.am b/xlators/performance/Makefile.am
index e91d5f6ef..a494190ba 100644
--- a/xlators/performance/Makefile.am
+++ b/xlators/performance/Makefile.am
@@ -1,3 +1,3 @@
-SUBDIRS = write-behind read-ahead io-threads io-cache symlink-cache quick-read stat-prefetch
+SUBDIRS = write-behind read-ahead readdir-ahead io-threads io-cache symlink-cache quick-read md-cache open-behind
CLEANFILES =
diff --git a/xlators/performance/io-cache/src/Makefile.am b/xlators/performance/io-cache/src/Makefile.am
index b1bf5bfbf..155be9988 100644
--- a/xlators/performance/io-cache/src/Makefile.am
+++ b/xlators/performance/io-cache/src/Makefile.am
@@ -1,14 +1,16 @@
xlator_LTLIBRARIES = io-cache.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance
-io_cache_la_LDFLAGS = -module -avoidversion
+io_cache_la_LDFLAGS = -module -avoid-version
io_cache_la_SOURCES = io-cache.c page.c ioc-inode.c
io_cache_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = io-cache.h
+noinst_HEADERS = io-cache.h ioc-mem-types.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(CONTRIBDIR)/rbtree
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/performance/io-cache/src/io-cache.c b/xlators/performance/io-cache/src/io-cache.c
index 49a1ce823..201777b38 100644
--- a/xlators/performance/io-cache/src/io-cache.c
+++ b/xlators/performance/io-cache/src/io-cache.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
@@ -27,428 +18,282 @@
#include "dict.h"
#include "xlator.h"
#include "io-cache.h"
+#include "ioc-mem-types.h"
+#include "statedump.h"
#include <assert.h>
#include <sys/time.h>
-uint32_t
-ioc_get_priority (ioc_table_t *table, const char *path);
+int ioc_log2_page_size;
uint32_t
ioc_get_priority (ioc_table_t *table, const char *path);
-inline ioc_inode_t *
+struct volume_options options[];
+
+
+static inline uint32_t
+ioc_hashfn (void *data, int len)
+{
+ off_t offset;
+
+ offset = *(off_t *) data;
+
+ return (offset >> ioc_log2_page_size);
+}
+
+static inline ioc_inode_t *
ioc_inode_reupdate (ioc_inode_t *ioc_inode)
{
- ioc_table_t *table = ioc_inode->table;
+ ioc_table_t *table = NULL;
+
+ table = ioc_inode->table;
+
+ list_add_tail (&ioc_inode->inode_lru,
+ &table->inode_lru[ioc_inode->weight]);
- list_add_tail (&ioc_inode->inode_lru,
- &table->inode_lru[ioc_inode->weight]);
-
- return ioc_inode;
+ return ioc_inode;
}
-inline ioc_inode_t *
+static inline ioc_inode_t *
ioc_get_inode (dict_t *dict, char *name)
{
- ioc_inode_t *ioc_inode = NULL;
- data_t *ioc_inode_data = dict_get (dict, name);
- ioc_table_t *table = NULL;
-
- if (ioc_inode_data) {
- ioc_inode = data_to_ptr (ioc_inode_data);
- table = ioc_inode->table;
-
- ioc_table_lock (table);
- {
- if (list_empty (&ioc_inode->inode_lru)) {
- ioc_inode = ioc_inode_reupdate (ioc_inode);
- }
- }
- ioc_table_unlock (table);
- }
-
- return ioc_inode;
+ ioc_inode_t *ioc_inode = NULL;
+ data_t *ioc_inode_data = NULL;
+ ioc_table_t *table = NULL;
+
+ ioc_inode_data = dict_get (dict, name);
+ if (ioc_inode_data) {
+ ioc_inode = data_to_ptr (ioc_inode_data);
+ table = ioc_inode->table;
+
+ ioc_table_lock (table);
+ {
+ if (list_empty (&ioc_inode->inode_lru)) {
+ ioc_inode = ioc_inode_reupdate (ioc_inode);
+ }
+ }
+ ioc_table_unlock (table);
+ }
+
+ return ioc_inode;
}
int32_t
ioc_inode_need_revalidate (ioc_inode_t *ioc_inode)
{
- int8_t need_revalidate = 0;
- struct timeval tv = {0,};
- int32_t ret = -1;
- ioc_table_t *table = ioc_inode->table;
+ int8_t need_revalidate = 0;
+ struct timeval tv = {0,};
+ ioc_table_t *table = NULL;
+
+ table = ioc_inode->table;
- ret = gettimeofday (&tv, NULL);
+ gettimeofday (&tv, NULL);
- if (time_elapsed (&tv, &ioc_inode->tv) >= table->cache_timeout)
- need_revalidate = 1;
+ if (time_elapsed (&tv, &ioc_inode->cache.tv) >= table->cache_timeout)
+ need_revalidate = 1;
- return need_revalidate;
+ return need_revalidate;
}
/*
* __ioc_inode_flush - flush all the cached pages of the given inode
*
- * @ioc_inode:
+ * @ioc_inode:
*
* assumes lock is held
*/
-int32_t
+int64_t
__ioc_inode_flush (ioc_inode_t *ioc_inode)
{
- ioc_page_t *curr = NULL, *next = NULL;
- int32_t destroy_size = 0;
- int32_t ret = 0;
-
- list_for_each_entry_safe (curr, next, &ioc_inode->pages, pages) {
- ret = ioc_page_destroy (curr);
-
- if (ret != -1)
- destroy_size += ret;
- }
-
- return destroy_size;
+ ioc_page_t *curr = NULL, *next = NULL;
+ int64_t destroy_size = 0;
+ int64_t ret = 0;
+
+ list_for_each_entry_safe (curr, next, &ioc_inode->cache.page_lru,
+ page_lru) {
+ ret = __ioc_page_destroy (curr);
+
+ if (ret != -1)
+ destroy_size += ret;
+ }
+
+ return destroy_size;
}
void
ioc_inode_flush (ioc_inode_t *ioc_inode)
{
- int32_t destroy_size = 0;
-
- ioc_inode_lock (ioc_inode);
- {
- destroy_size = __ioc_inode_flush (ioc_inode);
- }
- ioc_inode_unlock (ioc_inode);
-
- if (destroy_size) {
- ioc_table_lock (ioc_inode->table);
- {
- ioc_inode->table->cache_used -= destroy_size;
- }
- ioc_table_unlock (ioc_inode->table);
- }
-
- return;
+ int64_t destroy_size = 0;
+
+ ioc_inode_lock (ioc_inode);
+ {
+ destroy_size = __ioc_inode_flush (ioc_inode);
+ }
+ ioc_inode_unlock (ioc_inode);
+
+ if (destroy_size) {
+ ioc_table_lock (ioc_inode->table);
+ {
+ ioc_inode->table->cache_used -= destroy_size;
+ }
+ ioc_table_unlock (ioc_inode->table);
+ }
+
+ return;
}
-/*
- * ioc_utimens_cbk -
- *
- * @frame:
- * @cookie:
- * @this:
- * @op_ret:
- * @op_errno:
- * @stbuf:
- *
- */
int32_t
-ioc_utimens_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *stbuf)
+ioc_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, stbuf);
- return 0;
+ STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, preop, postop,
+ xdata);
+ return 0;
}
-/*
- * ioc_utimens -
- *
- * @frame:
- * @this:
- * @loc:
- * @tv:
- *
- */
int32_t
-ioc_utimens (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct timespec *tv)
+ioc_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- uint64_t ioc_inode = 0;
- inode_ctx_get (loc->inode, this, &ioc_inode);
+ uint64_t ioc_inode = 0;
- if (ioc_inode)
- ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
+ inode_ctx_get (loc->inode, this, &ioc_inode);
- STACK_WIND (frame, ioc_utimens_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->utimens, loc, tv);
+ if (ioc_inode
+ && ((valid & GF_SET_ATTR_ATIME)
+ || (valid & GF_SET_ATTR_MTIME)))
+ ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
- return 0;
+ STACK_WIND (frame, ioc_setattr_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->setattr, loc, stbuf, valid, xdata);
+
+ return 0;
}
int32_t
ioc_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct stat *stbuf, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *xdata, struct iatt *postparent)
{
- ioc_inode_t *ioc_inode = NULL;
- ioc_local_t *local = frame->local;
- ioc_table_t *table = this->private;
- ioc_page_t *page = NULL;
- data_t *content_data = NULL;
- char *src = NULL;
- char need_unref = 0;
- uint8_t cache_still_valid = 0;
- uint32_t weight = 0;
- uint64_t tmp_ioc_inode = 0;
- char *buf = NULL;
- char *tmp = NULL;
- int i;
- struct iobref *iobref = NULL;
- struct iobuf *iobuf = NULL;
-
- if (op_ret != 0)
- goto out;
-
- inode_ctx_get (inode, this, &tmp_ioc_inode);
- ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode;
- if (ioc_inode) {
- cache_still_valid = ioc_cache_still_valid (ioc_inode,
- stbuf);
-
- if (!cache_still_valid) {
- ioc_inode_flush (ioc_inode);
- }
- /* update the time-stamp of revalidation */
- ioc_inode_lock (ioc_inode);
- {
- gettimeofday (&ioc_inode->tv, NULL);
- }
- ioc_inode_unlock (ioc_inode);
-
- ioc_table_lock (ioc_inode->table);
- {
- list_move_tail (&ioc_inode->inode_lru,
- &table->inode_lru[ioc_inode->weight]);
- }
- ioc_table_unlock (ioc_inode->table);
- }
-
- if (local && stbuf->st_size &&
- local->need_xattr >= stbuf->st_size) {
- if (!ioc_inode) {
- weight = ioc_get_priority (table,
- local->file_loc.path);
- ioc_inode = ioc_inode_update (table,
- inode, weight);
- if (ioc_inode == NULL) {
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory");
- op_ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
+ ioc_inode_t *ioc_inode = NULL;
+ ioc_table_t *table = NULL;
+ uint8_t cache_still_valid = 0;
+ uint64_t tmp_ioc_inode = 0;
+ uint32_t weight = 0xffffffff;
+ const char *path = NULL;
+ ioc_local_t *local = NULL;
+
+ if (op_ret != 0)
+ goto out;
- inode_ctx_put (inode, this,
- (uint64_t)(long)ioc_inode);
- }
-
- ioc_inode_lock (ioc_inode);
- {
- content_data = dict_get (dict, "glusterfs.content");
- page = ioc_page_get (ioc_inode, 0);
-
- if (content_data) {
- if (page) {
- iobref_unref (page->iobref);
- free (page->vector);
- page->vector = NULL;
-
- ioc_table_lock (table);
- {
- table->cache_used -=
- iobref_size (page->iobref);
- }
- ioc_table_unlock (table);
- } else {
- page = ioc_page_create (ioc_inode, 0);
- if (page == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- gf_log (this->name,
- GF_LOG_ERROR,
- "out of memory");
- goto out;
- }
- }
+ local = frame->local;
+ if (local == NULL) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
- src = data_to_ptr (content_data);
+ if (!this || !this->private) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
- iobuf = iobuf_get (this->ctx->iobuf_pool);
- page->iobref = iobref_new ();
- if (page->iobref == NULL) {
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory");
-
- ioc_page_destroy (page);
- page = NULL;
- op_ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
+ table = this->private;
- iobref_add (page->iobref, iobuf);
-
- memcpy (iobuf->ptr, src, stbuf->st_size);
+ path = local->file_loc.path;
- page->vector = CALLOC (1,
- sizeof (*page->vector));
-
- if (page->vector == NULL) {
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory");
+ LOCK (&inode->lock);
+ {
+ __inode_ctx_get (inode, this, &tmp_ioc_inode);
+ ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode;
- op_ret = -1;
- op_errno = ENOMEM;
- ioc_page_destroy (page);
- page = NULL;
- goto out;
- }
+ if (!ioc_inode) {
+ weight = ioc_get_priority (table, path);
- page->vector->iov_base = iobuf->ptr;
- page->vector->iov_len = stbuf->st_size;
- page->count = 1;
-
- page->waitq = NULL;
- page->size = stbuf->st_size;
- page->ready = 1;
-
- ioc_table_lock (table);
- {
- table->cache_used +=
- iobref_size (page->iobref);
- }
- ioc_table_unlock (table);
-
- } else {
- if (!(page && page->ready)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "page not present");
-
- ioc_inode_unlock (ioc_inode);
- STACK_WIND (frame, ioc_lookup_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->lookup,
- &local->file_loc,
- local->xattr_req);
- return 0;
- }
- buf = CALLOC (1, stbuf->st_size);
- if (buf == NULL) {
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory");
- op_ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
+ ioc_inode = ioc_inode_update (table, inode,
+ weight);
- tmp = buf;
-
- for (i = 0; i < page->count; i++) {
- memcpy (tmp, page->vector[i].iov_base,
- page->vector[i].iov_len);
- tmp += page->vector[i].iov_len;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "serving file %s from cache",
- local->file_loc.path);
-
- if (!dict) {
- dict = get_new_dict ();
- if (dict == NULL) {
- /* logged in get_new_dict() */
- FREE (buf);
- buf = tmp = NULL;
- op_ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
+ __inode_ctx_put (inode, this,
+ (uint64_t)(long)ioc_inode);
+ }
+ }
+ UNLOCK (&inode->lock);
- need_unref = 1;
- dict_ref (dict);
- }
- dict_set (dict, "glusterfs.content",
- data_from_dynptr (buf,
- stbuf->st_size));
- }
-
- ioc_inode->mtime = stbuf->st_mtime;
- gettimeofday (&ioc_inode->tv, NULL);
- }
- ioc_inode_unlock (ioc_inode);
-
- if (content_data &&
- ioc_need_prune (ioc_inode->table)) {
- ioc_prune (ioc_inode->table);
- }
- }
+ ioc_inode_lock (ioc_inode);
+ {
+ if (ioc_inode->cache.mtime == 0) {
+ ioc_inode->cache.mtime = stbuf->ia_mtime;
+ ioc_inode->cache.mtime_nsec = stbuf->ia_mtime_nsec;
+ }
-out:
- STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf, dict);
+ ioc_inode->ia_size = stbuf->ia_size;
+ }
+ ioc_inode_unlock (ioc_inode);
- if (need_unref) {
- dict_unref (dict);
- }
+ cache_still_valid = ioc_cache_still_valid (ioc_inode,
+ stbuf);
- if (iobref)
- iobref_unref (iobref);
+ if (!cache_still_valid) {
+ ioc_inode_flush (ioc_inode);
+ }
- if (iobuf)
- iobuf_unref (iobuf);
+ ioc_table_lock (ioc_inode->table);
+ {
+ list_move_tail (&ioc_inode->inode_lru,
+ &table->inode_lru[ioc_inode->weight]);
+ }
+ ioc_table_unlock (ioc_inode->table);
- return 0;
+out:
+ if (frame->local != NULL) {
+ local = frame->local;
+ loc_wipe (&local->file_loc);
+ }
+
+ STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, stbuf,
+ xdata, postparent);
+ return 0;
}
-int32_t
+int32_t
ioc_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *xattr_req)
+ dict_t *xdata)
{
- uint64_t content_limit = 0;
- uint64_t tmp_ioc_inode = 0;
- ioc_inode_t *ioc_inode = NULL;
- ioc_page_t *page = NULL;
- ioc_local_t *local = NULL;
+ ioc_local_t *local = NULL;
+ int32_t op_errno = -1, ret = -1;
- if (GF_FILE_CONTENT_REQUESTED(xattr_req, &content_limit)) {
- local = CALLOC (1, sizeof (*local));
- if (local == NULL) {
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory");
- STACK_UNWIND (frame, -1, ENOMEM, NULL, NULL, NULL);
- return 0;
- }
+ local = mem_get0 (this->local_pool);
+ if (local == NULL) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto unwind;
+ }
- local->need_xattr = content_limit;
- local->file_loc.path = loc->path;
- local->file_loc.inode = loc->inode;
- frame->local = local;
-
- inode_ctx_get (loc->inode, this, &tmp_ioc_inode);
- ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode;
-
- if (ioc_inode) {
- ioc_inode_lock (ioc_inode);
- {
- page = ioc_page_get (ioc_inode, 0);
- if ((content_limit <=
- ioc_inode->table->page_size) &&
- page && page->ready) {
- local->need_xattr = -1;
- }
- }
- ioc_inode_unlock (ioc_inode);
- }
- }
-
- STACK_WIND (frame, ioc_lookup_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->lookup, loc, xattr_req);
+ ret = loc_copy (&local->file_loc, loc);
+ if (ret != 0) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto unwind;
+ }
- return 0;
+ frame->local = local;
+
+ STACK_WIND (frame, ioc_lookup_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->lookup, loc, xdata);
+
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT (lookup, frame, -1, op_errno, NULL, NULL,
+ NULL, NULL);
+
+ return 0;
}
/*
- * ioc_forget -
+ * ioc_forget -
*
* @frame:
* @this:
@@ -458,19 +303,33 @@ ioc_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
int32_t
ioc_forget (xlator_t *this, inode_t *inode)
{
- uint64_t ioc_inode = 0;
+ uint64_t ioc_inode = 0;
- inode_ctx_get (inode, this, &ioc_inode);
+ inode_ctx_get (inode, this, &ioc_inode);
+
+ if (ioc_inode)
+ ioc_inode_destroy ((ioc_inode_t *)(long)ioc_inode);
+
+ return 0;
+}
+
+static int32_t
+ioc_invalidate(xlator_t *this, inode_t *inode)
+{
+ uint64_t ioc_addr = 0;
+ ioc_inode_t *ioc_inode = NULL;
+
+ inode_ctx_get(inode, this, (uint64_t *) &ioc_addr);
+ ioc_inode = (void *) ioc_addr;
if (ioc_inode)
- ioc_inode_destroy ((ioc_inode_t *)(long)ioc_inode);
-
+ ioc_inode_flush(ioc_inode);
+
return 0;
}
-
-/*
- * ioc_cache_validate_cbk -
+/*
+ * ioc_cache_validate_cbk -
*
* @frame:
* @cookie:
@@ -482,84 +341,89 @@ ioc_forget (xlator_t *this, inode_t *inode)
*/
int32_t
ioc_cache_validate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *stbuf)
+ int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
+ dict_t *xdata)
{
- ioc_local_t *local = NULL;
- ioc_inode_t *ioc_inode = NULL;
- size_t destroy_size = 0;
- struct stat *local_stbuf = NULL;
+ ioc_local_t *local = NULL;
+ ioc_inode_t *ioc_inode = NULL;
+ size_t destroy_size = 0;
+ struct iatt *local_stbuf = NULL;
local = frame->local;
- ioc_inode = local->inode;
+ ioc_inode = local->inode;
local_stbuf = stbuf;
- if ((op_ret == -1) ||
- ((op_ret >= 0) && !ioc_cache_still_valid(ioc_inode, stbuf))) {
- gf_log (ioc_inode->table->xl->name, GF_LOG_DEBUG,
- "cache for inode(%p) is invalid. flushing all pages",
- ioc_inode);
- /* NOTE: only pages with no waiting frames are flushed by
- * ioc_inode_flush. page_fault will be generated for all
- * the pages which have waiting frames by ioc_inode_wakeup()
- */
- ioc_inode_lock (ioc_inode);
- {
- destroy_size = __ioc_inode_flush (ioc_inode);
- if (op_ret >= 0)
- ioc_inode->mtime = stbuf->st_mtime;
- }
- ioc_inode_unlock (ioc_inode);
- local_stbuf = NULL;
- }
-
- if (destroy_size) {
- ioc_table_lock (ioc_inode->table);
- {
- ioc_inode->table->cache_used -= destroy_size;
- }
- ioc_table_unlock (ioc_inode->table);
- }
-
- if (op_ret < 0)
- local_stbuf = NULL;
-
- ioc_inode_lock (ioc_inode);
- {
- gettimeofday (&ioc_inode->tv, NULL);
- }
- ioc_inode_unlock (ioc_inode);
-
- ioc_inode_wakeup (frame, ioc_inode, local_stbuf);
-
- /* any page-fault initiated by ioc_inode_wakeup() will have its own
- * fd_ref on fd, safe to unref validate frame's private copy
- */
- fd_unref (local->fd);
-
- STACK_DESTROY (frame->root);
+ if ((op_ret == -1) ||
+ ((op_ret >= 0) && !ioc_cache_still_valid(ioc_inode, stbuf))) {
+ gf_log (ioc_inode->table->xl->name, GF_LOG_DEBUG,
+ "cache for inode(%p) is invalid. flushing all pages",
+ ioc_inode);
+ /* NOTE: only pages with no waiting frames are flushed by
+ * ioc_inode_flush. page_fault will be generated for all
+ * the pages which have waiting frames by ioc_inode_wakeup()
+ */
+ ioc_inode_lock (ioc_inode);
+ {
+ destroy_size = __ioc_inode_flush (ioc_inode);
+ if (op_ret >= 0) {
+ ioc_inode->cache.mtime = stbuf->ia_mtime;
+ ioc_inode->cache.mtime_nsec
+ = stbuf->ia_mtime_nsec;
+ }
+ }
+ ioc_inode_unlock (ioc_inode);
+ local_stbuf = NULL;
+ }
- return 0;
+ if (destroy_size) {
+ ioc_table_lock (ioc_inode->table);
+ {
+ ioc_inode->table->cache_used -= destroy_size;
+ }
+ ioc_table_unlock (ioc_inode->table);
+ }
+
+ if (op_ret < 0)
+ local_stbuf = NULL;
+
+ ioc_inode_lock (ioc_inode);
+ {
+ gettimeofday (&ioc_inode->cache.tv, NULL);
+ }
+ ioc_inode_unlock (ioc_inode);
+
+ ioc_inode_wakeup (frame, ioc_inode, local_stbuf);
+
+ /* any page-fault initiated by ioc_inode_wakeup() will have its own
+ * fd_ref on fd, safe to unref validate frame's private copy
+ */
+ fd_unref (local->fd);
+
+ STACK_DESTROY (frame->root);
+
+ return 0;
}
int32_t
ioc_wait_on_inode (ioc_inode_t *ioc_inode, ioc_page_t *page)
{
- ioc_waitq_t *waiter = NULL, *trav = NULL;
- uint32_t page_found = 0;
- int32_t ret = 0;
-
- trav = ioc_inode->waitq;
-
- while (trav) {
- if (trav->data == page) {
- page_found = 1;
- break;
- }
- trav = trav->next;
- }
-
- if (!page_found) {
- waiter = CALLOC (1, sizeof (ioc_waitq_t));
+ ioc_waitq_t *waiter = NULL, *trav = NULL;
+ uint32_t page_found = 0;
+ int32_t ret = 0;
+
+ trav = ioc_inode->waitq;
+
+ while (trav) {
+ if (trav->data == page) {
+ page_found = 1;
+ break;
+ }
+ trav = trav->next;
+ }
+
+ if (!page_found) {
+ waiter = GF_CALLOC (1, sizeof (ioc_waitq_t),
+ gf_ioc_mt_ioc_waitq_t);
if (waiter == NULL) {
gf_log (ioc_inode->table->xl->name, GF_LOG_ERROR,
"out of memory");
@@ -567,13 +431,13 @@ ioc_wait_on_inode (ioc_inode_t *ioc_inode, ioc_page_t *page)
goto out;
}
- waiter->data = page;
- waiter->next = ioc_inode->waitq;
- ioc_inode->waitq = waiter;
- }
+ waiter->data = page;
+ waiter->next = ioc_inode->waitq;
+ ioc_inode->waitq = waiter;
+ }
-out:
- return ret;
+out:
+ return ret;
}
/*
@@ -586,15 +450,15 @@ out:
*/
int32_t
ioc_cache_validate (call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd,
- ioc_page_t *page)
+ ioc_page_t *page)
{
- call_frame_t *validate_frame = NULL;
- ioc_local_t *validate_local = NULL;
- ioc_local_t *local = NULL;
- int32_t ret = 0;
+ call_frame_t *validate_frame = NULL;
+ ioc_local_t *validate_local = NULL;
+ ioc_local_t *local = NULL;
+ int32_t ret = 0;
local = frame->local;
- validate_local = CALLOC (1, sizeof (ioc_local_t));
+ validate_local = mem_get0 (THIS->local_pool);
if (validate_local == NULL) {
ret = -1;
local->op_ret = -1;
@@ -604,59 +468,58 @@ ioc_cache_validate (call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd,
goto out;
}
- validate_frame = copy_frame (frame);
+ validate_frame = copy_frame (frame);
if (validate_frame == NULL) {
ret = -1;
local->op_ret = -1;
local->op_errno = ENOMEM;
- FREE (validate_local);
+ mem_put (validate_local);
gf_log (ioc_inode->table->xl->name, GF_LOG_ERROR,
"out of memory");
goto out;
}
- validate_local->fd = fd_ref (fd);
- validate_local->inode = ioc_inode;
- validate_frame->local = validate_local;
-
- STACK_WIND (validate_frame, ioc_cache_validate_cbk,
+ validate_local->fd = fd_ref (fd);
+ validate_local->inode = ioc_inode;
+ validate_frame->local = validate_local;
+
+ STACK_WIND (validate_frame, ioc_cache_validate_cbk,
FIRST_CHILD (frame->this),
- FIRST_CHILD (frame->this)->fops->fstat, fd);
+ FIRST_CHILD (frame->this)->fops->fstat, fd, NULL);
out:
- return ret;
+ return ret;
}
-inline uint32_t
+static inline uint32_t
is_match (const char *path, const char *pattern)
{
- int32_t ret = 0;
+ int32_t ret = 0;
+
+ ret = fnmatch (pattern, path, FNM_NOESCAPE);
- ret = fnmatch (pattern, path, FNM_NOESCAPE);
-
- return (ret == 0);
+ return (ret == 0);
}
uint32_t
ioc_get_priority (ioc_table_t *table, const char *path)
{
- uint32_t priority = 0;
- struct ioc_priority *curr = NULL;
-
- if (list_empty(&table->priority_list)) {
- priority = 1;
- }
- else {
- list_for_each_entry (curr, &table->priority_list, list) {
- if (is_match (path, curr->pattern))
- priority = curr->priority;
- }
- }
-
- return priority;
+ uint32_t priority = 1;
+ struct ioc_priority *curr = NULL;
+
+ if (list_empty(&table->priority_list))
+ return priority;
+
+ priority = 0;
+ list_for_each_entry (curr, &table->priority_list, list) {
+ if (is_match (path, curr->pattern))
+ priority = curr->priority;
+ }
+
+ return priority;
}
-/*
+/*
* ioc_open_cbk - open callback for io cache
*
* @frame: call frame
@@ -669,80 +532,74 @@ ioc_get_priority (ioc_table_t *table, const char *path)
*/
int32_t
ioc_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, fd_t *fd)
+ int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- uint64_t tmp_ioc_inode = 0;
- ioc_local_t *local = NULL;
- ioc_table_t *table = NULL;
- ioc_inode_t *ioc_inode = NULL;
- inode_t *inode = NULL;
- uint32_t weight = 0xffffffff;
- const char *path = NULL;
+ uint64_t tmp_ioc_inode = 0;
+ ioc_local_t *local = NULL;
+ ioc_table_t *table = NULL;
+ ioc_inode_t *ioc_inode = NULL;
+ uint32_t weight = 0xffffffff;
local = frame->local;
+ if (!this || !this->private) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
table = this->private;
- inode = local->file_loc.inode;
- path = local->file_loc.path;
- if (op_ret != -1) {
- /* look for ioc_inode corresponding to this fd */
- LOCK (&fd->inode->lock);
- {
- __inode_ctx_get (fd->inode, this, &tmp_ioc_inode);
- ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode;
-
- if (!ioc_inode) {
- /*
- this is the first time someone is opening
- this file, assign weight
- */
- weight = ioc_get_priority (table, path);
-
- ioc_inode = ioc_inode_update (table, inode,
- weight);
- __inode_ctx_put (fd->inode, this,
- (uint64_t)(long)ioc_inode);
- } else {
- ioc_table_lock (ioc_inode->table);
- {
- list_move_tail (&ioc_inode->inode_lru,
- &table->inode_lru[ioc_inode->weight]);
- }
- ioc_table_unlock (ioc_inode->table);
+ if (op_ret != -1) {
+ inode_ctx_get (fd->inode, this, &tmp_ioc_inode);
+ ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode;
+
+ //TODO: see why inode context is NULL and handle it.
+ if (!ioc_inode) {
+ gf_log (this->name, GF_LOG_ERROR, "inode context is "
+ "NULL (%s)", uuid_utoa (fd->inode->gfid));
+ goto out;
+ }
+
+ ioc_table_lock (ioc_inode->table);
+ {
+ list_move_tail (&ioc_inode->inode_lru,
+ &table->inode_lru[ioc_inode->weight]);
+ }
+ ioc_table_unlock (ioc_inode->table);
+
+ ioc_inode_lock (ioc_inode);
+ {
+ if ((table->min_file_size > ioc_inode->ia_size)
+ || ((table->max_file_size > 0)
+ && (table->max_file_size < ioc_inode->ia_size))) {
+ fd_ctx_set (fd, this, 1);
}
+ }
+ ioc_inode_unlock (ioc_inode);
+
+ /* If O_DIRECT open, we disable caching on it */
+ if ((local->flags & O_DIRECT)){
+ /* O_DIRECT is only for one fd, not the inode
+ * as a whole
+ */
+ fd_ctx_set (fd, this, 1);
+ }
- }
- UNLOCK (&fd->inode->lock);
-
- /* If mandatory locking has been enabled on this file,
- we disable caching on it */
- if (((inode->st_mode & S_ISGID)
- && !(inode->st_mode & S_IXGRP))) {
- fd_ctx_set (fd, this, 1);
- }
-
- /* If O_DIRECT open, we disable caching on it */
- if ((local->flags & O_DIRECT)){
- /* O_DIRECT is only for one fd, not the inode
- * as a whole
- */
- fd_ctx_set (fd, this, 1);
- }
-
- /* weight = 0, we disable caching on it */
- if (weight == 0) {
- /* we allow a pattern-matched cache disable this way
- */
- fd_ctx_set (fd, this, 1);
- }
- }
-
- FREE (local);
- frame->local = NULL;
-
- STACK_UNWIND (frame, op_ret, op_errno, fd);
+ /* weight = 0, we disable caching on it */
+ if (weight == 0) {
+ /* we allow a pattern-matched cache disable this way
+ */
+ fd_ctx_set (fd, this, 1);
+ }
+ }
- return 0;
+out:
+ mem_put (local);
+ frame->local = NULL;
+
+ STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
+
+ return 0;
}
/*
@@ -761,62 +618,182 @@ ioc_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
int32_t
ioc_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, fd_t *fd,
- inode_t *inode, struct stat *buf)
+ inode_t *inode, struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- ioc_local_t *local = NULL;
- ioc_table_t *table = NULL;
- ioc_inode_t *ioc_inode = NULL;
- uint32_t weight = 0xffffffff;
- const char *path = NULL;
+ ioc_local_t *local = NULL;
+ ioc_table_t *table = NULL;
+ ioc_inode_t *ioc_inode = NULL;
+ uint32_t weight = 0xffffffff;
+ const char *path = NULL;
+ int ret = -1;
local = frame->local;
+ if (!this || !this->private) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
table = this->private;
path = local->file_loc.path;
- if (op_ret != -1) {
- {
- /* assign weight */
- weight = ioc_get_priority (table, path);
+ if (op_ret != -1) {
+ /* assign weight */
+ weight = ioc_get_priority (table, path);
+
+ ioc_inode = ioc_inode_update (table, inode, weight);
+
+ ioc_inode_lock (ioc_inode);
+ {
+ ioc_inode->cache.mtime = buf->ia_mtime;
+ ioc_inode->cache.mtime_nsec = buf->ia_mtime_nsec;
+ ioc_inode->ia_size = buf->ia_size;
+
+ if ((table->min_file_size > ioc_inode->ia_size)
+ || ((table->max_file_size > 0)
+ && (table->max_file_size < ioc_inode->ia_size))) {
+ ret = fd_ctx_set (fd, this, 1);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to set fd ctx",
+ local->file_loc.path);
+ }
+ }
+ ioc_inode_unlock (ioc_inode);
+
+ inode_ctx_put (fd->inode, this,
+ (uint64_t)(long)ioc_inode);
+
+ /* If O_DIRECT open, we disable caching on it */
+ if (local->flags & O_DIRECT) {
+ /*
+ * O_DIRECT is only for one fd, not the inode
+ * as a whole */
+ ret = fd_ctx_set (fd, this, 1);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to set fd ctx",
+ local->file_loc.path);
+ }
- ioc_inode = ioc_inode_update (table, inode, weight);
+ /* if weight == 0, we disable caching on it */
+ if (!weight) {
+ /* we allow a pattern-matched cache disable this way */
+ ret = fd_ctx_set (fd, this, 1);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to set fd ctx",
+ local->file_loc.path);
+ }
- inode_ctx_put (fd->inode, this,
- (uint64_t)(long)ioc_inode);
- }
- /*
- * If mandatory locking has been enabled on this file,
- * we disable caching on it
- */
- if ((inode->st_mode & S_ISGID) &&
- !(inode->st_mode & S_IXGRP)) {
- fd_ctx_set (fd, this, 1);
- }
-
- /* If O_DIRECT open, we disable caching on it */
- if (local->flags & O_DIRECT){
- /*
- * O_DIRECT is only for one fd, not the inode
- * as a whole
- */
- fd_ctx_set (fd, this, 1);
- }
-
- /* weight = 0, we disable caching on it */
- if (weight == 0) {
- /* we allow a pattern-matched cache disable this way
- */
- fd_ctx_set (fd, this, 1);
- }
- }
-
- frame->local = NULL;
- FREE (local);
-
- STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf);
+ }
- return 0;
+out:
+ frame->local = NULL;
+ mem_put (local);
+
+ STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
+
+ return 0;
+}
+
+
+int32_t
+ioc_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ ioc_local_t *local = NULL;
+ ioc_table_t *table = NULL;
+ ioc_inode_t *ioc_inode = NULL;
+ uint32_t weight = 0xffffffff;
+ const char *path = NULL;
+
+ local = frame->local;
+ if (!this || !this->private) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ table = this->private;
+ path = local->file_loc.path;
+
+ if (op_ret != -1) {
+ /* assign weight */
+ weight = ioc_get_priority (table, path);
+
+ ioc_inode = ioc_inode_update (table, inode, weight);
+
+ ioc_inode_lock (ioc_inode);
+ {
+ ioc_inode->cache.mtime = buf->ia_mtime;
+ ioc_inode->cache.mtime_nsec = buf->ia_mtime_nsec;
+ ioc_inode->ia_size = buf->ia_size;
+ }
+ ioc_inode_unlock (ioc_inode);
+
+ inode_ctx_put (inode, this,
+ (uint64_t)(long)ioc_inode);
+ }
+
+out:
+ frame->local = NULL;
+
+ loc_wipe (&local->file_loc);
+ mem_put (local);
+
+ STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
}
+
+int
+ioc_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata)
+{
+ ioc_local_t *local = NULL;
+ int32_t op_errno = -1, ret = -1;
+
+ local = mem_get0 (this->local_pool);
+ if (local == NULL) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto unwind;
+ }
+
+ ret = loc_copy (&local->file_loc, loc);
+ if (ret != 0) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ goto unwind;
+ }
+
+ frame->local = local;
+
+ STACK_WIND (frame, ioc_mknod_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod,
+ loc, mode, rdev, umask, xdata);
+ return 0;
+
+unwind:
+ if (local != NULL) {
+ loc_wipe (&local->file_loc);
+ mem_put (local);
+ }
+
+ STACK_UNWIND_STRICT (mknod, frame, -1, op_errno, NULL, NULL,
+ NULL, NULL, NULL);
+
+ return 0;
+}
+
+
/*
* ioc_open - open fop for io cache
* @frame:
@@ -827,33 +804,34 @@ ioc_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
*/
int32_t
ioc_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd)
+ fd_t *fd, dict_t *xdata)
{
-
- ioc_local_t *local = NULL;
- local = CALLOC (1, sizeof (ioc_local_t));
+ ioc_local_t *local = NULL;
+
+ local = mem_get0 (this->local_pool);
if (local == NULL) {
gf_log (this->name, GF_LOG_ERROR, "out of memory");
- STACK_UNWIND (frame, -1, ENOMEM, NULL);
+ STACK_UNWIND_STRICT (open, frame, -1, ENOMEM, NULL, NULL);
return 0;
}
- local->flags = flags;
- local->file_loc.path = loc->path;
- local->file_loc.inode = loc->inode;
-
- frame->local = local;
-
- STACK_WIND (frame, ioc_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, loc, flags, fd);
+ local->flags = flags;
+ local->file_loc.path = loc->path;
+ local->file_loc.inode = loc->inode;
- return 0;
+ frame->local = local;
+
+ STACK_WIND (frame, ioc_open_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, loc, flags, fd,
+ xdata);
+
+ return 0;
}
/*
* ioc_create - create fop for io cache
- *
+ *
* @frame:
* @this:
* @pathname:
@@ -863,25 +841,27 @@ ioc_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
*/
int32_t
ioc_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, fd_t *fd)
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
- ioc_local_t *local = NULL;
-
- local = CALLOC (1, sizeof (ioc_local_t));
+ ioc_local_t *local = NULL;
+
+ local = mem_get0 (this->local_pool);
if (local == NULL) {
gf_log (this->name, GF_LOG_ERROR, "out of memory");
- STACK_UNWIND (frame, -1, ENOMEM, NULL, NULL, NULL);
+ STACK_UNWIND_STRICT (create, frame, -1, ENOMEM, NULL, NULL,
+ NULL, NULL, NULL, NULL);
return 0;
}
- local->flags = flags;
- local->file_loc.path = loc->path;
- frame->local = local;
-
- STACK_WIND (frame, ioc_create_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create, loc, flags, mode, fd);
+ local->flags = flags;
+ local->file_loc.path = loc->path;
+ frame->local = local;
- return 0;
+ STACK_WIND (frame, ioc_create_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create, loc, flags, mode,
+ umask, fd, xdata);
+
+ return 0;
}
@@ -889,7 +869,7 @@ ioc_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
/*
* ioc_release - release fop for io cache
- *
+ *
* @frame:
* @this:
* @fd:
@@ -898,11 +878,11 @@ ioc_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
int32_t
ioc_release (xlator_t *this, fd_t *fd)
{
- return 0;
+ return 0;
}
-/*
- * ioc_readv_disabled_cbk
+/*
+ * ioc_readv_disabled_cbk
* @frame:
* @cookie:
* @this:
@@ -911,163 +891,174 @@ ioc_release (xlator_t *this, fd_t *fd)
* @vector:
* @count:
*
- */
+ */
int32_t
ioc_readv_disabled_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct stat *stbuf,
- struct iobref *iobref)
+ int32_t count, struct iatt *stbuf,
+ struct iobref *iobref, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf, iobref);
- return 0;
+ STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count,
+ stbuf, iobref, xdata);
+ return 0;
}
int32_t
ioc_need_prune (ioc_table_t *table)
{
- int64_t cache_difference = 0;
-
- ioc_table_lock (table);
- {
- cache_difference = table->cache_used - table->cache_size;
- }
- ioc_table_unlock (table);
-
- if (cache_difference > 0)
- return 1;
- else
- return 0;
+ int64_t cache_difference = 0;
+
+ ioc_table_lock (table);
+ {
+ cache_difference = table->cache_used - table->cache_size;
+ }
+ ioc_table_unlock (table);
+
+ if (cache_difference > 0)
+ return 1;
+ else
+ return 0;
}
/*
* ioc_dispatch_requests -
- *
+ *
* @frame:
* @inode:
*
- *
+ *
*/
void
ioc_dispatch_requests (call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd,
off_t offset, size_t size)
{
- ioc_local_t *local = NULL;
- ioc_table_t *table = NULL;
- ioc_page_t *trav = NULL;
- ioc_waitq_t *waitq = NULL;
- off_t rounded_offset = 0;
- off_t rounded_end = 0;
- off_t trav_offset = 0;
- int32_t fault = 0;
- size_t trav_size = 0;
- off_t local_offset = 0;
- int32_t ret = -1;
- int8_t need_validate = 0;
- int8_t might_need_validate = 0; /*
- * if a page exists, do we need
- * to validate it?
- */
+ ioc_local_t *local = NULL;
+ ioc_table_t *table = NULL;
+ ioc_page_t *trav = NULL;
+ ioc_waitq_t *waitq = NULL;
+ off_t rounded_offset = 0;
+ off_t rounded_end = 0;
+ off_t trav_offset = 0;
+ int32_t fault = 0;
+ size_t trav_size = 0;
+ off_t local_offset = 0;
+ int32_t ret = -1;
+ int8_t need_validate = 0;
+ int8_t might_need_validate = 0; /*
+ * if a page exists, do we need
+ * to validate it?
+ */
local = frame->local;
table = ioc_inode->table;
- rounded_offset = floor (offset, table->page_size);
- rounded_end = roof (offset + size, table->page_size);
- trav_offset = rounded_offset;
-
- /* once a frame does read, it should be waiting on something */
- local->wait_count++;
-
- /* Requested region can fall in three different pages,
- * 1. Ready - region is already in cache, we just have to serve it.
- * 2. In-transit - page fault has been generated on this page, we need
- * to wait till the page is ready
- * 3. Fault - page is not in cache, we have to generate a page fault
- */
-
- might_need_validate = ioc_inode_need_revalidate (ioc_inode);
-
- while (trav_offset < rounded_end) {
- ioc_inode_lock (ioc_inode);
- //{
-
- /* look for requested region in the cache */
- trav = ioc_page_get (ioc_inode, trav_offset);
-
- local_offset = max (trav_offset, offset);
- trav_size = min (((offset+size) - local_offset),
- table->page_size);
-
- if (!trav) {
- /* page not in cache, we need to generate page fault */
- trav = ioc_page_create (ioc_inode, trav_offset);
- fault = 1;
- if (!trav) {
- gf_log (frame->this->name, GF_LOG_CRITICAL,
- "out of memory");
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- goto out;
- }
- }
-
- ioc_wait_on_page (trav, frame, local_offset, trav_size);
-
- if (trav->ready) {
- /* page found in cache */
- if (!might_need_validate && !ioc_inode->waitq) {
- /* fresh enough */
- gf_log (frame->this->name, GF_LOG_TRACE,
- "cache hit for trav_offset=%"PRId64""
- "/local_offset=%"PRId64"",
- trav_offset, local_offset);
- waitq = ioc_page_wakeup (trav);
- } else {
- /* if waitq already exists, fstat revalidate is
- already on the way */
- if (!ioc_inode->waitq) {
- need_validate = 1;
- }
-
- ret = ioc_wait_on_inode (ioc_inode, trav);
- if (ret < 0) {
+ rounded_offset = floor (offset, table->page_size);
+ rounded_end = roof (offset + size, table->page_size);
+ trav_offset = rounded_offset;
+
+ /* once a frame does read, it should be waiting on something */
+ local->wait_count++;
+
+ /* Requested region can fall in three different pages,
+ * 1. Ready - region is already in cache, we just have to serve it.
+ * 2. In-transit - page fault has been generated on this page, we need
+ * to wait till the page is ready
+ * 3. Fault - page is not in cache, we have to generate a page fault
+ */
+
+ might_need_validate = ioc_inode_need_revalidate (ioc_inode);
+
+ while (trav_offset < rounded_end) {
+ ioc_inode_lock (ioc_inode);
+ {
+ /* look for requested region in the cache */
+ trav = __ioc_page_get (ioc_inode, trav_offset);
+
+ local_offset = max (trav_offset, offset);
+ trav_size = min (((offset+size) - local_offset),
+ table->page_size);
+
+ if (!trav) {
+ /* page not in cache, we need to generate page
+ * fault
+ */
+ trav = __ioc_page_create (ioc_inode,
+ trav_offset);
+ fault = 1;
+ if (!trav) {
+ gf_log (frame->this->name,
+ GF_LOG_CRITICAL,
+ "out of memory");
local->op_ret = -1;
- local->op_errno = -ret;
- need_validate = 0;
+ local->op_errno = ENOMEM;
+ goto out;
+ }
+ }
- waitq = ioc_page_wakeup (trav);
- ioc_inode_unlock (ioc_inode);
+ __ioc_wait_on_page (trav, frame, local_offset,
+ trav_size);
+
+ if (trav->ready) {
+ /* page found in cache */
+ if (!might_need_validate && !ioc_inode->waitq) {
+ /* fresh enough */
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "cache hit for trav_offset=%"
+ PRId64"/local_offset=%"PRId64"",
+ trav_offset, local_offset);
+ waitq = __ioc_page_wakeup (trav,
+ trav->op_errno);
+ } else {
+ /* if waitq already exists, fstat
+ * revalidate is
+ * already on the way
+ */
+ if (!ioc_inode->waitq) {
+ need_validate = 1;
+ }
- ioc_waitq_return (waitq);
- waitq = NULL;
- goto out;
+ ret = ioc_wait_on_inode (ioc_inode,
+ trav);
+ if (ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -ret;
+ need_validate = 0;
+
+ waitq = __ioc_page_wakeup (trav,
+ trav->op_errno);
+ ioc_inode_unlock (ioc_inode);
+
+ ioc_waitq_return (waitq);
+ waitq = NULL;
+ goto out;
+ }
}
- }
- }
-
- //}
- ioc_inode_unlock (ioc_inode);
-
- ioc_waitq_return (waitq);
- waitq = NULL;
-
- if (fault) {
- fault = 0;
- /* new page created, increase the table->cache_used */
- ioc_page_fault (ioc_inode, frame, fd, trav_offset);
- }
-
- if (need_validate) {
- need_validate = 0;
- gf_log (frame->this->name, GF_LOG_TRACE,
- "sending validate request for "
- "inode(%"PRId64") at offset=%"PRId64"",
- fd->inode->ino, trav_offset);
- ret = ioc_cache_validate (frame, ioc_inode, fd, trav);
+ }
+
+ }
+ ioc_inode_unlock (ioc_inode);
+
+ ioc_waitq_return (waitq);
+ waitq = NULL;
+
+ if (fault) {
+ fault = 0;
+ /* new page created, increase the table->cache_used */
+ ioc_page_fault (ioc_inode, frame, fd, trav_offset);
+ }
+
+ if (need_validate) {
+ need_validate = 0;
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "sending validate request for "
+ "inode(%s) at offset=%"PRId64"",
+ uuid_utoa (fd->inode->gfid), trav_offset);
+ ret = ioc_cache_validate (frame, ioc_inode, fd, trav);
if (ret == -1) {
ioc_inode_lock (ioc_inode);
{
- waitq = ioc_page_wakeup (trav);
+ waitq = __ioc_page_wakeup (trav,
+ trav->op_errno);
}
ioc_inode_unlock (ioc_inode);
@@ -1075,25 +1066,25 @@ ioc_dispatch_requests (call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd,
waitq = NULL;
goto out;
}
- }
-
- trav_offset += table->page_size;
- }
+ }
+
+ trav_offset += table->page_size;
+ }
out:
- ioc_frame_return (frame);
+ ioc_frame_return (frame);
- if (ioc_need_prune (ioc_inode->table)) {
- ioc_prune (ioc_inode->table);
- }
+ if (ioc_need_prune (ioc_inode->table)) {
+ ioc_prune (ioc_inode->table);
+ }
- return;
+ return;
}
/*
* ioc_readv -
- *
+ *
* @frame:
* @this:
* @fd:
@@ -1103,70 +1094,107 @@ out:
*/
int32_t
ioc_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t offset)
+ size_t size, off_t offset, uint32_t flags, dict_t *xdata)
{
- uint64_t tmp_ioc_inode = 0;
- ioc_inode_t *ioc_inode = NULL;
- ioc_local_t *local = NULL;
- uint32_t weight = 0;
-
- inode_ctx_get (fd->inode, this, &tmp_ioc_inode);
- ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode;
- if (!ioc_inode) {
- /* caching disabled, go ahead with normal readv */
- STACK_WIND (frame, ioc_readv_disabled_cbk,
- FIRST_CHILD (frame->this),
- FIRST_CHILD (frame->this)->fops->readv, fd, size,
- offset);
- return 0;
- }
-
- if (!fd_ctx_get (fd, this, NULL)) {
- /* disable caching for this fd, go ahead with normal readv */
- STACK_WIND (frame, ioc_readv_disabled_cbk,
- FIRST_CHILD (frame->this),
- FIRST_CHILD (frame->this)->fops->readv, fd, size,
- offset);
- return 0;
- }
-
- local = (ioc_local_t *) CALLOC (1, sizeof (ioc_local_t));
+ uint64_t tmp_ioc_inode = 0;
+ ioc_inode_t *ioc_inode = NULL;
+ ioc_local_t *local = NULL;
+ uint32_t weight = 0;
+ ioc_table_t *table = NULL;
+ int32_t op_errno = -1;
+
+ if (!this) {
+ goto out;
+ }
+
+ inode_ctx_get (fd->inode, this, &tmp_ioc_inode);
+ ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode;
+ if (!ioc_inode) {
+ /* caching disabled, go ahead with normal readv */
+ STACK_WIND (frame, ioc_readv_disabled_cbk,
+ FIRST_CHILD (frame->this),
+ FIRST_CHILD (frame->this)->fops->readv, fd, size,
+ offset, flags, xdata);
+ return 0;
+ }
+
+
+ table = this->private;
+
+ if (!table) {
+ gf_log (this->name, GF_LOG_ERROR, "table is null");
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ ioc_inode_lock (ioc_inode);
+ {
+ if (!ioc_inode->cache.page_table) {
+ ioc_inode->cache.page_table
+ = rbthash_table_init
+ (IOC_PAGE_TABLE_BUCKET_COUNT,
+ ioc_hashfn, NULL, 0,
+ table->mem_pool);
+
+ if (ioc_inode->cache.page_table == NULL) {
+ op_errno = ENOMEM;
+ ioc_inode_unlock (ioc_inode);
+ goto out;
+ }
+ }
+ }
+ ioc_inode_unlock (ioc_inode);
+
+ if (!fd_ctx_get (fd, this, NULL)) {
+ /* disable caching for this fd, go ahead with normal readv */
+ STACK_WIND (frame, ioc_readv_disabled_cbk,
+ FIRST_CHILD (frame->this),
+ FIRST_CHILD (frame->this)->fops->readv, fd, size,
+ offset, flags, xdata);
+ return 0;
+ }
+
+ local = mem_get0 (this->local_pool);
if (local == NULL) {
gf_log (this->name, GF_LOG_ERROR, "out of memory");
- STACK_UNWIND (frame, -1, ENOMEM, NULL, 0, NULL, NULL);
- return 0;
+ op_errno = ENOMEM;
+ goto out;
}
- INIT_LIST_HEAD (&local->fill_list);
+ INIT_LIST_HEAD (&local->fill_list);
- frame->local = local;
- local->pending_offset = offset;
- local->pending_size = size;
- local->offset = offset;
- local->size = size;
- local->inode = ioc_inode;
+ frame->local = local;
+ local->pending_offset = offset;
+ local->pending_size = size;
+ local->offset = offset;
+ local->size = size;
+ local->inode = ioc_inode;
- gf_log (this->name, GF_LOG_TRACE,
- "NEW REQ (%p) offset = %"PRId64" && size = %"GF_PRI_SIZET"",
- frame, offset, size);
+ gf_log (this->name, GF_LOG_TRACE,
+ "NEW REQ (%p) offset = %"PRId64" && size = %"GF_PRI_SIZET"",
+ frame, offset, size);
- weight = ioc_inode->weight;
+ weight = ioc_inode->weight;
- ioc_table_lock (ioc_inode->table);
- {
- list_move_tail (&ioc_inode->inode_lru,
- &ioc_inode->table->inode_lru[weight]);
- }
- ioc_table_unlock (ioc_inode->table);
+ ioc_table_lock (ioc_inode->table);
+ {
+ list_move_tail (&ioc_inode->inode_lru,
+ &ioc_inode->table->inode_lru[weight]);
+ }
+ ioc_table_unlock (ioc_inode->table);
- ioc_dispatch_requests (frame, ioc_inode, fd, offset, size);
-
- return 0;
+ ioc_dispatch_requests (frame, ioc_inode, fd, offset, size);
+ return 0;
+
+out:
+ STACK_UNWIND_STRICT (readv, frame, -1, op_errno, NULL, 0, NULL, NULL,
+ NULL);
+ return 0;
}
/*
* ioc_writev_cbk -
- *
+ *
* @frame:
* @cookie:
* @this:
@@ -1176,24 +1204,26 @@ ioc_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
*/
int32_t
ioc_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *stbuf)
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- ioc_local_t *local = NULL;
- uint64_t ioc_inode = 0;
+ ioc_local_t *local = NULL;
+ uint64_t ioc_inode = 0;
local = frame->local;
- inode_ctx_get (local->fd->inode, this, &ioc_inode);
-
- if (ioc_inode)
- ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
+ inode_ctx_get (local->fd->inode, this, &ioc_inode);
- STACK_UNWIND (frame, op_ret, op_errno, stbuf);
- return 0;
+ if (ioc_inode)
+ ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
+
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
}
/*
* ioc_writev
- *
+ *
* @frame:
* @this:
* @fd:
@@ -1204,38 +1234,38 @@ ioc_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
*/
int32_t
ioc_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int32_t count, off_t offset,
- struct iobref *iobref)
+ struct iovec *vector, int32_t count, off_t offset,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
- ioc_local_t *local = NULL;
- uint64_t ioc_inode = 0;
-
- local = CALLOC (1, sizeof (ioc_local_t));
+ ioc_local_t *local = NULL;
+ uint64_t ioc_inode = 0;
+
+ local = mem_get0 (this->local_pool);
if (local == NULL) {
gf_log (this->name, GF_LOG_ERROR, "out of memory");
- STACK_UNWIND (frame, -1, ENOMEM, NULL);
+ STACK_UNWIND_STRICT (writev, frame, -1, ENOMEM, NULL, NULL, NULL);
return 0;
}
- /* TODO: why is it not fd_ref'ed */
- local->fd = fd;
- frame->local = local;
+ /* TODO: why is it not fd_ref'ed */
+ local->fd = fd;
+ frame->local = local;
- inode_ctx_get (fd->inode, this, &ioc_inode);
- if (ioc_inode)
- ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
+ inode_ctx_get (fd->inode, this, &ioc_inode);
+ if (ioc_inode)
+ ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
- STACK_WIND (frame, ioc_writev_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev, fd, vector, count, offset,
- iobref);
+ STACK_WIND (frame, ioc_writev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, vector, count, offset,
+ flags, iobref, xdata);
- return 0;
+ return 0;
}
/*
* ioc_truncate_cbk -
- *
+ *
* @frame:
* @cookie:
* @this:
@@ -1244,41 +1274,69 @@ ioc_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
* @buf:
*
*/
-int32_t
+int32_t
ioc_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
+ STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+ return 0;
}
+
+/*
+ * ioc_ftruncate_cbk -
+ *
+ * @frame:
+ * @cookie:
+ * @this:
+ * @op_ret:
+ * @op_errno:
+ * @buf:
+ *
+ */
+int32_t
+ioc_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+
+ STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+ return 0;
+}
+
+
/*
* ioc_truncate -
- *
+ *
* @frame:
* @this:
* @loc:
* @offset:
*
*/
-int32_t
-ioc_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
+int32_t
+ioc_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
{
- uint64_t ioc_inode = 0;
- inode_ctx_get (loc->inode, this, &ioc_inode);
+ uint64_t ioc_inode = 0;
- if (ioc_inode)
- ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
+ inode_ctx_get (loc->inode, this, &ioc_inode);
- STACK_WIND (frame, ioc_truncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate, loc, offset);
- return 0;
+ if (ioc_inode)
+ ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
+
+ STACK_WIND (frame, ioc_truncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
+ return 0;
}
/*
* ioc_ftruncate -
- *
+ *
* @frame:
* @this:
* @fd:
@@ -1286,309 +1344,815 @@ ioc_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
*
*/
int32_t
-ioc_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
+ioc_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
{
- uint64_t ioc_inode = 0;
- inode_ctx_get (fd->inode, this, &ioc_inode);
+ uint64_t ioc_inode = 0;
- if (ioc_inode)
- ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
+ inode_ctx_get (fd->inode, this, &ioc_inode);
- STACK_WIND (frame, ioc_truncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate, fd, offset);
- return 0;
+ if (ioc_inode)
+ ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
+
+ STACK_WIND (frame, ioc_ftruncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
+ return 0;
}
int32_t
ioc_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct flock *lock)
+ int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, lock);
- return 0;
+ STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, lock, xdata);
+ return 0;
}
-int32_t
+int32_t
ioc_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
- struct flock *lock)
+ struct gf_flock *lock, dict_t *xdata)
{
- ioc_inode_t *ioc_inode = NULL;
- uint64_t tmp_inode = 0;
-
- inode_ctx_get (fd->inode, this, &tmp_inode);
- ioc_inode = (ioc_inode_t *)(long)tmp_inode;
- if (!ioc_inode) {
- gf_log (this->name, GF_LOG_DEBUG,
- "inode context is NULL: returning EBADFD");
- STACK_UNWIND (frame, -1, EBADFD, NULL);
- return 0;
- }
-
- ioc_inode_lock (ioc_inode);
- {
- gettimeofday (&ioc_inode->tv, NULL);
- }
- ioc_inode_unlock (ioc_inode);
-
- STACK_WIND (frame, ioc_lk_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->lk, fd, cmd, lock);
+ ioc_inode_t *ioc_inode = NULL;
+ uint64_t tmp_inode = 0;
+
+ inode_ctx_get (fd->inode, this, &tmp_inode);
+ ioc_inode = (ioc_inode_t *)(long)tmp_inode;
+ if (!ioc_inode) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "inode context is NULL: returning EBADFD");
+ STACK_UNWIND_STRICT (lk, frame, -1, EBADFD, NULL, NULL);
+ return 0;
+ }
+
+ ioc_inode_lock (ioc_inode);
+ {
+ gettimeofday (&ioc_inode->cache.tv, NULL);
+ }
+ ioc_inode_unlock (ioc_inode);
+
+ STACK_WIND (frame, ioc_lk_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->lk, fd, cmd, lock, xdata);
+ return 0;
+}
+
+int
+ioc_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *entries, dict_t *xdata)
+{
+ gf_dirent_t *entry = NULL;
+
+ if (op_ret <= 0)
+ goto unwind;
+
+ list_for_each_entry (entry, &entries->list, list) {
+ /* TODO: fill things */
+ }
+
+unwind:
+ STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata);
+
+ return 0;
+}
+int
+ioc_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *dict)
+{
+ STACK_WIND (frame, ioc_readdirp_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp,
+ fd, size, offset, dict);
+
+ return 0;
+}
+
+static int32_t
+ioc_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(discard, frame, op_ret, op_errno, pre, post, xdata);
return 0;
}
+static int32_t
+ioc_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ uint64_t ioc_inode = 0;
+
+ inode_ctx_get (fd->inode, this, &ioc_inode);
+
+ if (ioc_inode)
+ ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
+
+ STACK_WIND(frame, ioc_discard_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata);
+ return 0;
+}
+
+static int32_t
+ioc_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(zerofill, frame, op_ret,
+ op_errno, pre, post, xdata);
+ return 0;
+}
+
+static int32_t
+ioc_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ uint64_t ioc_inode = 0;
+
+ inode_ctx_get (fd->inode, this, &ioc_inode);
+
+ if (ioc_inode)
+ ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode);
+
+ STACK_WIND(frame, ioc_zerofill_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata);
+ return 0;
+}
+
+
int32_t
ioc_get_priority_list (const char *opt_str, struct list_head *first)
{
- int32_t max_pri = 1;
- char *tmp_str = NULL;
- char *tmp_str1 = NULL;
- char *tmp_str2 = NULL;
- char *dup_str = NULL;
- char *stripe_str = NULL;
- char *pattern = NULL;
- char *priority = NULL;
- char *string = NULL;
- struct ioc_priority *curr = NULL, *tmp = NULL;
-
- string = strdup (opt_str);
+ int32_t max_pri = 1;
+ char *tmp_str = NULL;
+ char *tmp_str1 = NULL;
+ char *tmp_str2 = NULL;
+ char *dup_str = NULL;
+ char *stripe_str = NULL;
+ char *pattern = NULL;
+ char *priority = NULL;
+ char *string = NULL;
+ struct ioc_priority *curr = NULL, *tmp = NULL;
+
+ string = gf_strdup (opt_str);
if (string == NULL) {
max_pri = -1;
goto out;
}
-
- /* Get the pattern for cache priority.
- * "option priority *.jpg:1,abc*:2" etc
- */
- /* TODO: inode_lru in table is statically hard-coded to 5,
- * should be changed to run-time configuration
- */
- stripe_str = strtok_r (string, ",", &tmp_str);
- while (stripe_str) {
- curr = CALLOC (1, sizeof (struct ioc_priority));
+
+ /* Get the pattern for cache priority.
+ * "option priority *.jpg:1,abc*:2" etc
+ */
+ /* TODO: inode_lru in table is statically hard-coded to 5,
+ * should be changed to run-time configuration
+ */
+ stripe_str = strtok_r (string, ",", &tmp_str);
+ while (stripe_str) {
+ curr = GF_CALLOC (1, sizeof (struct ioc_priority),
+ gf_ioc_mt_ioc_priority);
if (curr == NULL) {
max_pri = -1;
goto out;
}
- list_add_tail (&curr->list, first);
+ list_add_tail (&curr->list, first);
- dup_str = strdup (stripe_str);
+ dup_str = gf_strdup (stripe_str);
if (dup_str == NULL) {
max_pri = -1;
goto out;
}
- pattern = strtok_r (dup_str, ":", &tmp_str1);
- if (!pattern) {
+ pattern = strtok_r (dup_str, ":", &tmp_str1);
+ if (!pattern) {
max_pri = -1;
goto out;
}
- priority = strtok_r (NULL, ":", &tmp_str1);
- if (!priority) {
+ priority = strtok_r (NULL, ":", &tmp_str1);
+ if (!priority) {
max_pri = -1;
goto out;
}
- gf_log ("io-cache", GF_LOG_TRACE,
- "ioc priority : pattern %s : priority %s",
- pattern,
- priority);
+ gf_log ("io-cache", GF_LOG_TRACE,
+ "ioc priority : pattern %s : priority %s",
+ pattern,
+ priority);
- curr->pattern = strdup (pattern);
+ curr->pattern = gf_strdup (pattern);
if (curr->pattern == NULL) {
max_pri = -1;
goto out;
}
- curr->priority = strtol (priority, &tmp_str2, 0);
- if (tmp_str2 && (*tmp_str2)) {
+ curr->priority = strtol (priority, &tmp_str2, 0);
+ if (tmp_str2 && (*tmp_str2)) {
max_pri = -1;
goto out;
} else {
- max_pri = max (max_pri, curr->priority);
+ max_pri = max (max_pri, curr->priority);
}
- free (dup_str);
+ GF_FREE (dup_str);
dup_str = NULL;
- stripe_str = strtok_r (NULL, ",", &tmp_str);
- }
-out:
- if (string != NULL) {
- free (string);
+ stripe_str = strtok_r (NULL, ",", &tmp_str);
}
+out:
+ GF_FREE (string);
- if (dup_str != NULL) {
- free (dup_str);
- }
+ GF_FREE (dup_str);
if (max_pri == -1) {
list_for_each_entry_safe (curr, tmp, first, list) {
list_del_init (&curr->list);
- free (curr->pattern);
- free (curr);
+ GF_FREE (curr->pattern);
+ GF_FREE (curr);
}
}
- return max_pri;
+ return max_pri;
+}
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_ioc_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
+ "failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+
+static gf_boolean_t
+check_cache_size_ok (xlator_t *this, uint64_t cache_size)
+{
+ gf_boolean_t ret = _gf_true;
+ uint64_t total_mem = 0;
+ uint64_t max_cache_size = 0;
+ volume_option_t *opt = NULL;
+
+ GF_ASSERT (this);
+ opt = xlator_volume_option_get (this, "cache-size");
+ if (!opt) {
+ ret = _gf_false;
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not get cache-size option");
+ goto out;
+ }
+
+ total_mem = get_mem_size ();
+ if (-1 == total_mem)
+ max_cache_size = opt->max;
+ else
+ max_cache_size = total_mem;
+
+ gf_log (this->name, GF_LOG_DEBUG, "Max cache size is %"PRIu64,
+ max_cache_size);
+
+ if (cache_size > max_cache_size) {
+ ret = _gf_false;
+ gf_log (this->name, GF_LOG_ERROR, "Cache size %"PRIu64
+ " is greater than the max size of %"PRIu64,
+ cache_size, max_cache_size);
+ goto out;
+ }
+out:
+ return ret;
+}
+
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ data_t *data = NULL;
+ ioc_table_t *table = NULL;
+ int ret = -1;
+ uint64_t cache_size_new = 0;
+ if (!this || !this->private)
+ goto out;
+
+ table = this->private;
+
+ ioc_table_lock (table);
+ {
+ GF_OPTION_RECONF ("cache-timeout", table->cache_timeout,
+ options, int32, unlock);
+
+ data = dict_get (options, "priority");
+ if (data) {
+ char *option_list = data_to_str (data);
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "option path %s", option_list);
+ /* parse the list of pattern:priority */
+ table->max_pri = ioc_get_priority_list (option_list,
+ &table->priority_list);
+
+ if (table->max_pri == -1) {
+ goto unlock;
+ }
+ table->max_pri ++;
+ }
+
+ GF_OPTION_RECONF ("max-file-size", table->max_file_size,
+ options, size, unlock);
+
+ GF_OPTION_RECONF ("min-file-size", table->min_file_size,
+ options, size, unlock);
+
+ if ((table->max_file_size >= 0) &&
+ (table->min_file_size > table->max_file_size)) {
+ gf_log (this->name, GF_LOG_ERROR, "minimum size (%"
+ PRIu64") of a file that can be cached is "
+ "greater than maximum size (%"PRIu64"). "
+ "Hence Defaulting to old value",
+ table->min_file_size, table->max_file_size);
+ goto unlock;
+ }
+
+ GF_OPTION_RECONF ("cache-size", cache_size_new,
+ options, size, unlock);
+ if (!check_cache_size_ok (this, cache_size_new)) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Not reconfiguring cache-size");
+ goto unlock;
+ }
+ table->cache_size = cache_size_new;
+
+ ret = 0;
+ }
+unlock:
+ ioc_table_unlock (table);
+out:
+ return ret;
}
+
/*
- * init -
+ * init -
* @this:
*
*/
-int32_t
+int32_t
init (xlator_t *this)
{
- ioc_table_t *table = NULL;
- dict_t *options = this->options;
- uint32_t index = 0;
- char *cache_size_string = NULL;
- int32_t ret = -1;
-
- if (!this->children || this->children->next) {
- gf_log (this->name, GF_LOG_ERROR,
- "FATAL: io-cache not configured with exactly "
- "one child");
+ ioc_table_t *table = NULL;
+ dict_t *xl_options = NULL;
+ uint32_t index = 0;
+ int32_t ret = -1;
+ glusterfs_ctx_t *ctx = NULL;
+ data_t *data = 0;
+ uint32_t num_pages = 0;
+
+ xl_options = this->options;
+
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "FATAL: io-cache not configured with exactly "
+ "one child");
goto out;
- }
+ }
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile ");
- }
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
- table = (void *) CALLOC (1, sizeof (*table));
+ table = (void *) GF_CALLOC (1, sizeof (*table), gf_ioc_mt_ioc_table_t);
if (table == NULL) {
gf_log (this->name, GF_LOG_ERROR, "out of memory");
goto out;
}
-
- table->xl = this;
- table->page_size = this->ctx->page_size;
- table->cache_size = IOC_CACHE_SIZE;
-
- if (dict_get (options, "cache-size"))
- cache_size_string = data_to_str (dict_get (options,
- "cache-size"));
- if (cache_size_string) {
- if (gf_string2bytesize (cache_size_string,
- &table->cache_size) != 0) {
- gf_log ("io-cache", GF_LOG_ERROR,
- "invalid number format \"%s\" of "
- "\"option cache-size\"",
- cache_size_string);
- goto out;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "using cache-size %"PRIu64"", table->cache_size);
- }
-
- table->cache_timeout = 1;
-
- if (dict_get (options, "cache-timeout")) {
- table->cache_timeout =
- data_to_uint32 (dict_get (options,
- "cache-timeout"));
- gf_log (this->name, GF_LOG_TRACE,
- "Using %d seconds to revalidate cache",
- table->cache_timeout);
- }
-
- INIT_LIST_HEAD (&table->priority_list);
- table->max_pri = 1;
- if (dict_get (options, "priority")) {
- char *option_list = data_to_str (dict_get (options,
- "priority"));
- gf_log (this->name, GF_LOG_TRACE,
- "option path %s", option_list);
- /* parse the list of pattern:priority */
- table->max_pri = ioc_get_priority_list (option_list,
- &table->priority_list);
-
- if (table->max_pri == -1) {
+
+ table->xl = this;
+ table->page_size = this->ctx->page_size;
+
+ GF_OPTION_INIT ("cache-size", table->cache_size, size, out);
+
+ GF_OPTION_INIT ("cache-timeout", table->cache_timeout, int32, out);
+
+ GF_OPTION_INIT ("min-file-size", table->min_file_size, size, out);
+
+ GF_OPTION_INIT ("max-file-size", table->max_file_size, size, out);
+
+ if (!check_cache_size_ok (this, table->cache_size)) {
+ ret = -1;
+ goto out;
+ }
+
+ INIT_LIST_HEAD (&table->priority_list);
+ table->max_pri = 1;
+ data = dict_get (xl_options, "priority");
+ if (data) {
+ char *option_list = data_to_str (data);
+ gf_log (this->name, GF_LOG_TRACE,
+ "option path %s", option_list);
+ /* parse the list of pattern:priority */
+ table->max_pri = ioc_get_priority_list (option_list,
+ &table->priority_list);
+
+ if (table->max_pri == -1) {
goto out;
}
- }
- table->max_pri ++;
- INIT_LIST_HEAD (&table->inodes);
-
- table->inode_lru = CALLOC (table->max_pri, sizeof (struct list_head));
+ }
+ table->max_pri ++;
+
+ INIT_LIST_HEAD (&table->inodes);
+
+ if ((table->max_file_size >= 0)
+ && (table->min_file_size > table->max_file_size)) {
+ gf_log ("io-cache", GF_LOG_ERROR, "minimum size (%"
+ PRIu64") of a file that can be cached is "
+ "greater than maximum size (%"PRIu64")",
+ table->min_file_size, table->max_file_size);
+ goto out;
+ }
+
+ table->inode_lru = GF_CALLOC (table->max_pri,
+ sizeof (struct list_head),
+ gf_ioc_mt_list_head);
if (table->inode_lru == NULL) {
goto out;
}
- for (index = 0; index < (table->max_pri); index++)
- INIT_LIST_HEAD (&table->inode_lru[index]);
+ for (index = 0; index < (table->max_pri); index++)
+ INIT_LIST_HEAD (&table->inode_lru[index]);
+
+ this->local_pool = mem_pool_new (ioc_local_t, 64);
+ if (!this->local_pool) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
+ goto out;
+ }
+
+ pthread_mutex_init (&table->table_lock, NULL);
+ this->private = table;
+
+ num_pages = (table->cache_size / table->page_size)
+ + ((table->cache_size % table->page_size)
+ ? 1 : 0);
+
+ table->mem_pool = mem_pool_new (rbthash_entry_t, num_pages);
+ if (!table->mem_pool) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to allocate mem_pool");
+ goto out;
+ }
- pthread_mutex_init (&table->table_lock, NULL);
- this->private = table;
ret = 0;
+ ctx = this->ctx;
+ ioc_log2_page_size = log_base2 (ctx->page_size);
+
out:
if (ret == -1) {
if (table != NULL) {
- free (table->inode_lru);
- free (table);
+ GF_FREE (table->inode_lru);
+ GF_FREE (table);
}
}
- return ret;
+ return ret;
+}
+
+void
+ioc_page_waitq_dump (ioc_page_t *page, char *prefix)
+{
+ ioc_waitq_t *trav = NULL;
+ call_frame_t *frame = NULL;
+ int32_t i = 0;
+ char key[GF_DUMP_MAX_BUF_LEN] = {0, };
+
+ trav = page->waitq;
+
+ while (trav) {
+ frame = trav->data;
+ sprintf (key, "waitq.frame[%d]", i++);
+ gf_proc_dump_write (key, "%"PRId64, frame->root->unique);
+
+ trav = trav->next;
+ }
+}
+
+void
+__ioc_inode_waitq_dump (ioc_inode_t *ioc_inode, char *prefix)
+{
+ ioc_waitq_t *trav = NULL;
+ ioc_page_t *page = NULL;
+ int32_t i = 0;
+ char key[GF_DUMP_MAX_BUF_LEN] = {0, };
+
+ trav = ioc_inode->waitq;
+
+ while (trav) {
+ page = trav->data;
+
+ sprintf (key, "cache-validation-waitq.page[%d].offset", i++);
+ gf_proc_dump_write (key, "%"PRId64, page->offset);
+
+ trav = trav->next;
+ }
+}
+
+void
+__ioc_page_dump (ioc_page_t *page, char *prefix)
+{
+
+ int ret = -1;
+
+ if (!page)
+ return;
+ /* ioc_page_lock can be used to hold the mutex. But in statedump
+ * its better to use trylock to avoid deadlocks.
+ */
+ ret = pthread_mutex_trylock (&page->page_lock);
+ if (ret)
+ goto out;
+ {
+ gf_proc_dump_write ("offset", "%"PRId64, page->offset);
+ gf_proc_dump_write ("size", "%"PRId64, page->size);
+ gf_proc_dump_write ("dirty", "%s", page->dirty ? "yes" : "no");
+ gf_proc_dump_write ("ready", "%s", page->ready ? "yes" : "no");
+ ioc_page_waitq_dump (page, prefix);
+ }
+ pthread_mutex_unlock (&page->page_lock);
+
+out:
+ if (ret && page)
+ gf_proc_dump_write ("Unable to dump the page information",
+ "(Lock acquisition failed) %p", page);
+
+ return;
+}
+
+void
+__ioc_cache_dump (ioc_inode_t *ioc_inode, char *prefix)
+{
+ off_t offset = 0;
+ ioc_table_t *table = NULL;
+ ioc_page_t *page = NULL;
+ int i = 0;
+ char key[GF_DUMP_MAX_BUF_LEN] = {0, };
+ char timestr[256] = {0, };
+
+ if ((ioc_inode == NULL) || (prefix == NULL)) {
+ goto out;
+ }
+
+ table = ioc_inode->table;
+
+ if (ioc_inode->cache.tv.tv_sec) {
+ gf_time_fmt (timestr, sizeof timestr,
+ ioc_inode->cache.tv.tv_sec, gf_timefmt_FT);
+ snprintf (timestr + strlen (timestr), sizeof timestr - strlen (timestr),
+ ".%"GF_PRI_SUSECONDS, ioc_inode->cache.tv.tv_usec);
+
+ gf_proc_dump_write ("last-cache-validation-time", "%s",
+ timestr);
+ }
+
+ for (offset = 0; offset < ioc_inode->ia_size;
+ offset += table->page_size) {
+ page = __ioc_page_get (ioc_inode, offset);
+ if (page == NULL) {
+ continue;
+ }
+
+ sprintf (key, "inode.cache.page[%d]", i++);
+ __ioc_page_dump (page, key);
+ }
+out:
+ return;
+}
+
+
+int
+ioc_inode_dump (xlator_t *this, inode_t *inode)
+{
+
+ char *path = NULL;
+ int ret = -1;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, };
+ uint64_t tmp_ioc_inode = 0;
+ ioc_inode_t *ioc_inode = NULL;
+ gf_boolean_t section_added = _gf_false;
+ char uuid_str[64] = {0,};
+
+ if (this == NULL || inode == NULL)
+ goto out;
+
+ gf_proc_dump_build_key (key_prefix, "io-cache", "inode");
+
+ inode_ctx_get (inode, this, &tmp_ioc_inode);
+ ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode;
+ if (ioc_inode == NULL)
+ goto out;
+
+ /* Similar to ioc_page_dump function its better to use
+ * pthread_mutex_trylock and not to use gf_log in statedump
+ * to avoid deadlocks.
+ */
+ ret = pthread_mutex_trylock (&ioc_inode->inode_lock);
+ if (ret)
+ goto out;
+
+ {
+ if (uuid_is_null (ioc_inode->inode->gfid))
+ goto unlock;
+
+ gf_proc_dump_add_section (key_prefix);
+ section_added = _gf_true;
+
+ __inode_path (ioc_inode->inode, NULL, &path);
+
+ gf_proc_dump_write ("inode.weight", "%d", ioc_inode->weight);
+
+ if (path) {
+ gf_proc_dump_write ("path", "%s", path);
+ GF_FREE (path);
+ }
+
+ gf_proc_dump_write ("uuid", "%s", uuid_utoa_r
+ (ioc_inode->inode->gfid, uuid_str));
+ __ioc_cache_dump (ioc_inode, key_prefix);
+ __ioc_inode_waitq_dump (ioc_inode, key_prefix);
+ }
+unlock:
+ pthread_mutex_unlock (&ioc_inode->inode_lock);
+
+out:
+ if (ret && ioc_inode) {
+ if (section_added == _gf_false)
+ gf_proc_dump_add_section (key_prefix);
+ gf_proc_dump_write ("Unable to print the status of ioc_inode",
+ "(Lock acquisition failed) %s",
+ uuid_utoa (inode->gfid));
+ }
+ return ret;
+}
+
+int
+ioc_priv_dump (xlator_t *this)
+{
+ ioc_table_t *priv = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, };
+ int ret = -1;
+ gf_boolean_t add_section = _gf_false;
+
+ if (!this || !this->private)
+ goto out;
+
+ priv = this->private;
+
+ gf_proc_dump_build_key (key_prefix, "io-cache", "priv");
+ gf_proc_dump_add_section (key_prefix);
+ add_section = _gf_true;
+
+ ret = pthread_mutex_trylock (&priv->table_lock);
+ if (ret)
+ goto out;
+ {
+ gf_proc_dump_write ("page_size", "%ld", priv->page_size);
+ gf_proc_dump_write ("cache_size", "%ld", priv->cache_size);
+ gf_proc_dump_write ("cache_used", "%ld", priv->cache_used);
+ gf_proc_dump_write ("inode_count", "%u", priv->inode_count);
+ gf_proc_dump_write ("cache_timeout", "%u", priv->cache_timeout);
+ gf_proc_dump_write ("min-file-size", "%u", priv->min_file_size);
+ gf_proc_dump_write ("max-file-size", "%u", priv->max_file_size);
+ }
+ pthread_mutex_unlock (&priv->table_lock);
+out:
+ if (ret && priv) {
+ if (!add_section) {
+ gf_proc_dump_build_key (key_prefix, "xlator."
+ "performance.io-cache", "priv");
+ gf_proc_dump_add_section (key_prefix);
+ }
+ gf_proc_dump_write ("Unable to dump the state of private "
+ "structure of io-cache xlator", "(Lock "
+ "acquisition failed) %s", this->name);
+ }
+
+ return 0;
}
/*
* fini -
- *
+ *
* @this:
*
*/
void
fini (xlator_t *this)
{
- ioc_table_t *table = this->private;
+ ioc_table_t *table = NULL;
+ struct ioc_priority *curr = NULL, *tmp = NULL;
+ int i = 0;
+
+ table = this->private;
+
+ if (table == NULL)
+ return;
- pthread_mutex_destroy (&table->table_lock);
- FREE (table);
+ this->private = NULL;
- this->private = NULL;
- return;
+ if (table->mem_pool != NULL) {
+ mem_pool_destroy (table->mem_pool);
+ table->mem_pool = NULL;
+ }
+
+ list_for_each_entry_safe (curr, tmp, &table->priority_list, list) {
+ list_del_init (&curr->list);
+ GF_FREE (curr->pattern);
+ GF_FREE (curr);
+ }
+
+ for (i = 0; i < table->max_pri; i++) {
+ GF_ASSERT (list_empty (&table->inode_lru[i]));
+ }
+
+ GF_ASSERT (list_empty (&table->inodes));
+ pthread_mutex_destroy (&table->table_lock);
+ GF_FREE (table);
+
+ this->private = NULL;
+ return;
}
struct xlator_fops fops = {
- .open = ioc_open,
- .create = ioc_create,
- .readv = ioc_readv,
- .writev = ioc_writev,
- .truncate = ioc_truncate,
- .ftruncate = ioc_ftruncate,
- .utimens = ioc_utimens,
- .lookup = ioc_lookup,
- .lk = ioc_lk
+ .open = ioc_open,
+ .create = ioc_create,
+ .readv = ioc_readv,
+ .writev = ioc_writev,
+ .truncate = ioc_truncate,
+ .ftruncate = ioc_ftruncate,
+ .lookup = ioc_lookup,
+ .lk = ioc_lk,
+ .setattr = ioc_setattr,
+ .mknod = ioc_mknod,
+
+ .readdirp = ioc_readdirp,
+ .discard = ioc_discard,
+ .zerofill = ioc_zerofill,
};
-struct xlator_mops mops = {
+
+struct xlator_dumpops dumpops = {
+ .priv = ioc_priv_dump,
+ .inodectx = ioc_inode_dump,
};
struct xlator_cbks cbks = {
- .forget = ioc_forget,
- .release = ioc_release
+ .forget = ioc_forget,
+ .release = ioc_release,
+ .invalidate = ioc_invalidate,
};
struct volume_options options[] = {
- { .key = {"priority"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {"cache-timeout", "force-revalidate-timeout"},
- .type = GF_OPTION_TYPE_INT,
- .min = 0,
- .max = 60
- },
- { .key = {"cache-size"},
- .type = GF_OPTION_TYPE_SIZET,
- .min = 4 * GF_UNIT_MB,
- .max = 6 * GF_UNIT_GB
- },
- { .key = {NULL} },
+ { .key = {"priority"},
+ .type = GF_OPTION_TYPE_PRIORITY_LIST,
+ .default_value = "",
+ .description = "Assigns priority to filenames with specific "
+ "patterns so that when a page needs to be ejected "
+ "out of the cache, the page of a file whose "
+ "priority is the lowest will be ejected earlier"
+ },
+ { .key = {"cache-timeout", "force-revalidate-timeout"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = 60,
+ .default_value = "1",
+ .description = "The cached data for a file will be retained till "
+ "'cache-refresh-timeout' seconds, after which data "
+ "re-validation is performed."
+ },
+ { .key = {"cache-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .min = 4 * GF_UNIT_MB,
+ .max = 32 * GF_UNIT_GB,
+ .default_value = "32MB",
+ .description = "Size of the read cache."
+ },
+ { .key = {"min-file-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .default_value = "0",
+ .description = "Minimum file size which would be cached by the "
+ "io-cache translator."
+ },
+ { .key = {"max-file-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .default_value = "0",
+ .description = "Maximum file size which would be cached by the "
+ "io-cache translator."
+ },
+ { .key = {NULL} },
};
diff --git a/xlators/performance/io-cache/src/io-cache.h b/xlators/performance/io-cache/src/io-cache.h
index 82c01c127..46d758a66 100644
--- a/xlators/performance/io-cache/src/io-cache.h
+++ b/xlators/performance/io-cache/src/io-cache.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __IO_CACHE_H
@@ -34,11 +25,14 @@
#include "xlator.h"
#include "common-utils.h"
#include "call-stub.h"
+#include "rbthash.h"
+#include "hashfn.h"
#include <sys/time.h>
#include <fnmatch.h>
#define IOC_PAGE_SIZE (1024 * 128) /* 128KB */
#define IOC_CACHE_SIZE (32 * 1024 * 1024)
+#define IOC_PAGE_TABLE_BUCKET_COUNT 1
struct ioc_table;
struct ioc_local;
@@ -46,123 +40,135 @@ struct ioc_page;
struct ioc_inode;
struct ioc_priority {
- struct list_head list;
- char *pattern;
- uint32_t priority;
+ struct list_head list;
+ char *pattern;
+ uint32_t priority;
};
/*
- * ioc_waitq - this structure is used to represents the waiting
+ * ioc_waitq - this structure is used to represents the waiting
* frames on a page
*
* @next: pointer to next object in waitq
* @data: pointer to the frame which is waiting
*/
struct ioc_waitq {
- struct ioc_waitq *next;
- void *data;
- off_t pending_offset;
- size_t pending_size;
+ struct ioc_waitq *next;
+ void *data;
+ off_t pending_offset;
+ size_t pending_size;
};
/*
- * ioc_fill -
+ * ioc_fill -
*
*/
struct ioc_fill {
- struct list_head list; /* list of ioc_fill structures of a frame */
- off_t offset;
- size_t size;
- struct iovec *vector;
- int32_t count;
- struct iobref *iobref;
+ struct list_head list; /* list of ioc_fill structures of a frame */
+ off_t offset;
+ size_t size;
+ struct iovec *vector;
+ int32_t count;
+ struct iobref *iobref;
};
struct ioc_local {
- mode_t mode;
- int32_t flags;
- loc_t file_loc;
- off_t offset;
- size_t size;
- int32_t op_ret;
- int32_t op_errno;
- struct list_head fill_list; /* list of ioc_fill structures */
- off_t pending_offset; /*
+ mode_t mode;
+ int32_t flags;
+ loc_t file_loc;
+ off_t offset;
+ size_t size;
+ int32_t op_ret;
+ int32_t op_errno;
+ struct list_head fill_list; /* list of ioc_fill structures */
+ off_t pending_offset; /*
* offset from this frame should
* continue
*/
- size_t pending_size; /*
+ size_t pending_size; /*
* size of data this frame is waiting
* on
*/
- struct ioc_inode *inode;
- int32_t wait_count;
- pthread_mutex_t local_lock;
- struct ioc_waitq *waitq;
- void *stub;
- fd_t *fd;
- int32_t need_xattr;
- dict_t *xattr_req;
+ struct ioc_inode *inode;
+ int32_t wait_count;
+ pthread_mutex_t local_lock;
+ struct ioc_waitq *waitq;
+ void *stub;
+ fd_t *fd;
+ int32_t need_xattr;
+ dict_t *xattr_req;
};
/*
- * ioc_page - structure to store page of data from file
+ * ioc_page - structure to store page of data from file
*
*/
struct ioc_page {
- struct list_head pages;
- struct list_head page_lru;
- struct ioc_inode *inode; /* inode this page belongs to */
- struct ioc_priority *priority;
- char dirty;
- char ready;
- struct iovec *vector;
- int32_t count;
- off_t offset;
- size_t size;
- struct ioc_waitq *waitq;
- struct iobref *iobref;
- pthread_mutex_t page_lock;
+ struct list_head page_lru;
+ struct ioc_inode *inode; /* inode this page belongs to */
+ struct ioc_priority *priority;
+ char dirty;
+ char ready;
+ struct iovec *vector;
+ int32_t count;
+ off_t offset;
+ size_t size;
+ struct ioc_waitq *waitq;
+ struct iobref *iobref;
+ pthread_mutex_t page_lock;
+ int32_t op_errno;
+ char stale;
+};
+
+struct ioc_cache {
+ rbthash_table_t *page_table;
+ struct list_head page_lru;
+ time_t mtime; /*
+ * seconds component of file mtime
+ */
+ time_t mtime_nsec; /*
+ * nanosecond component of file mtime
+ */
+ struct timeval tv; /*
+ * time-stamp at last re-validate
+ */
};
struct ioc_inode {
- struct ioc_table *table;
- struct list_head pages; /* list of pages of this inode */
- struct list_head inode_list; /*
- * list of inodes, maintained by io-cache
- * translator
- */
- struct list_head inode_lru;
- struct list_head page_lru;
- struct ioc_waitq *waitq;
- pthread_mutex_t inode_lock;
- uint32_t weight; /*
- * weight of the inode, increases on each
- * read
- */
- time_t mtime; /*
- * mtime of the server file when last
- * cached
- */
- struct timeval tv; /*
- * time-stamp at last re-validate
- */
+ struct ioc_table *table;
+ off_t ia_size;
+ struct ioc_cache cache;
+ struct list_head inode_list; /*
+ * list of inodes, maintained by
+ * io-cache translator
+ */
+ struct list_head inode_lru;
+ struct ioc_waitq *waitq;
+ pthread_mutex_t inode_lock;
+ uint32_t weight; /*
+ * weight of the inode, increases
+ * on each read
+ */
+ inode_t *inode;
};
struct ioc_table {
- uint64_t page_size;
- uint64_t cache_size;
- uint64_t cache_used;
- struct list_head inodes; /* list of inodes cached */
- struct list_head active;
- struct list_head *inode_lru;
- struct list_head priority_list;
- int32_t readv_count;
- pthread_mutex_t table_lock;
- xlator_t *xl;
- uint32_t inode_count;
- int32_t cache_timeout;
- int32_t max_pri;
+ uint64_t page_size;
+ uint64_t cache_size;
+ uint64_t cache_used;
+ uint64_t min_file_size;
+ uint64_t max_file_size;
+ struct list_head inodes; /* list of inodes cached */
+ struct list_head active;
+ struct list_head *inode_lru;
+ struct list_head priority_list;
+ int32_t readv_count;
+ pthread_mutex_t table_lock;
+ xlator_t *xl;
+ uint32_t inode_count;
+ int32_t cache_timeout;
+ int32_t max_pri;
+ struct mem_pool *mem_pool;
};
typedef struct ioc_table ioc_table_t;
@@ -178,36 +184,33 @@ str_to_ptr (char *string);
char *
ptr_to_str (void *ptr);
-int32_t
+int32_t
ioc_readv_disabled_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct stat *stbuf,
- struct iobref *iobref);
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *stbuf,
+ struct iobref *iobref, dict_t *xdata);
ioc_page_t *
-ioc_page_get (ioc_inode_t *ioc_inode, off_t offset);
+__ioc_page_get (ioc_inode_t *ioc_inode, off_t offset);
ioc_page_t *
-ioc_page_create (ioc_inode_t *ioc_inode, off_t offset);
+__ioc_page_create (ioc_inode_t *ioc_inode, off_t offset);
void
-ioc_page_fault (ioc_inode_t *ioc_inode, call_frame_t *frame, fd_t *fd,
- off_t offset);
+ioc_page_fault (ioc_inode_t *ioc_inode, call_frame_t *frame, fd_t *fd,
+ off_t offset);
void
-ioc_wait_on_page (ioc_page_t *page, call_frame_t *frame, off_t offset,
- size_t size);
+__ioc_wait_on_page (ioc_page_t *page, call_frame_t *frame, off_t offset,
+ size_t size);
ioc_waitq_t *
-ioc_page_wakeup (ioc_page_t *page);
+__ioc_page_wakeup (ioc_page_t *page, int32_t op_errno);
void
ioc_page_flush (ioc_page_t *page);
ioc_waitq_t *
-ioc_page_error (ioc_page_t *page, int32_t op_ret, int32_t op_errno);
-
-void
-ioc_page_purge (ioc_page_t *page);
+__ioc_page_error (ioc_page_t *page, int32_t op_ret, int32_t op_errno);
void
ioc_frame_return (call_frame_t *frame);
@@ -217,97 +220,97 @@ ioc_waitq_return (ioc_waitq_t *waitq);
int32_t
ioc_frame_fill (ioc_page_t *page, call_frame_t *frame, off_t offset,
- size_t size);
+ size_t size, int32_t op_errno);
-#define ioc_inode_lock(ioc_inode) \
- do { \
- gf_log (ioc_inode->table->xl->name, GF_LOG_TRACE, \
- "locked inode(%p)", ioc_inode); \
- pthread_mutex_lock (&ioc_inode->inode_lock); \
- } while (0)
+#define ioc_inode_lock(ioc_inode) \
+ do { \
+ gf_log (ioc_inode->table->xl->name, GF_LOG_TRACE, \
+ "locked inode(%p)", ioc_inode); \
+ pthread_mutex_lock (&ioc_inode->inode_lock); \
+ } while (0)
-#define ioc_inode_unlock(ioc_inode) \
- do { \
- gf_log (ioc_inode->table->xl->name, GF_LOG_TRACE, \
- "unlocked inode(%p)", ioc_inode); \
- pthread_mutex_unlock (&ioc_inode->inode_lock); \
- } while (0)
+#define ioc_inode_unlock(ioc_inode) \
+ do { \
+ gf_log (ioc_inode->table->xl->name, GF_LOG_TRACE, \
+ "unlocked inode(%p)", ioc_inode); \
+ pthread_mutex_unlock (&ioc_inode->inode_lock); \
+ } while (0)
-#define ioc_table_lock(table) \
- do { \
- gf_log (table->xl->name, GF_LOG_TRACE, \
- "locked table(%p)", table); \
- pthread_mutex_lock (&table->table_lock); \
- } while (0)
+#define ioc_table_lock(table) \
+ do { \
+ gf_log (table->xl->name, GF_LOG_TRACE, \
+ "locked table(%p)", table); \
+ pthread_mutex_lock (&table->table_lock); \
+ } while (0)
-#define ioc_table_unlock(table) \
- do { \
- gf_log (table->xl->name, GF_LOG_TRACE, \
- "unlocked table(%p)", table); \
- pthread_mutex_unlock (&table->table_lock); \
- } while (0)
+#define ioc_table_unlock(table) \
+ do { \
+ gf_log (table->xl->name, GF_LOG_TRACE, \
+ "unlocked table(%p)", table); \
+ pthread_mutex_unlock (&table->table_lock); \
+ } while (0)
-#define ioc_local_lock(local) \
- do { \
- gf_log (local->inode->table->xl->name, GF_LOG_TRACE, \
- "locked local(%p)", local); \
- pthread_mutex_lock (&local->local_lock); \
- } while (0)
+#define ioc_local_lock(local) \
+ do { \
+ gf_log (local->inode->table->xl->name, GF_LOG_TRACE, \
+ "locked local(%p)", local); \
+ pthread_mutex_lock (&local->local_lock); \
+ } while (0)
-#define ioc_local_unlock(local) \
- do { \
- gf_log (local->inode->table->xl->name, GF_LOG_TRACE, \
- "unlocked local(%p)", local); \
- pthread_mutex_unlock (&local->local_lock); \
- } while (0)
+#define ioc_local_unlock(local) \
+ do { \
+ gf_log (local->inode->table->xl->name, GF_LOG_TRACE, \
+ "unlocked local(%p)", local); \
+ pthread_mutex_unlock (&local->local_lock); \
+ } while (0)
-#define ioc_page_lock(page) \
- do { \
- gf_log (page->inode->table->xl->name, GF_LOG_TRACE, \
- "locked page(%p)", page); \
- pthread_mutex_lock (&page->page_lock); \
- } while (0)
+#define ioc_page_lock(page) \
+ do { \
+ gf_log (page->inode->table->xl->name, GF_LOG_TRACE, \
+ "locked page(%p)", page); \
+ pthread_mutex_lock (&page->page_lock); \
+ } while (0)
-#define ioc_page_unlock(page) \
- do { \
- gf_log (page->inode->table->xl->name, GF_LOG_TRACE, \
- "unlocked page(%p)", page); \
- pthread_mutex_unlock (&page->page_lock); \
- } while (0)
+#define ioc_page_unlock(page) \
+ do { \
+ gf_log (page->inode->table->xl->name, GF_LOG_TRACE, \
+ "unlocked page(%p)", page); \
+ pthread_mutex_unlock (&page->page_lock); \
+ } while (0)
static inline uint64_t
time_elapsed (struct timeval *now,
- struct timeval *then)
+ struct timeval *then)
{
- uint64_t sec = now->tv_sec - then->tv_sec;
+ uint64_t sec = now->tv_sec - then->tv_sec;
+
+ if (sec)
+ return sec;
- if (sec)
- return sec;
-
- return 0;
+ return 0;
}
ioc_inode_t *
ioc_inode_search (ioc_table_t *table, inode_t *inode);
-void
+void
ioc_inode_destroy (ioc_inode_t *ioc_inode);
ioc_inode_t *
ioc_inode_update (ioc_table_t *table, inode_t *inode, uint32_t weight);
-int64_t
-ioc_page_destroy (ioc_page_t *page);
+int64_t
+__ioc_page_destroy (ioc_page_t *page);
-int32_t
+int64_t
__ioc_inode_flush (ioc_inode_t *ioc_inode);
void
@@ -315,10 +318,10 @@ ioc_inode_flush (ioc_inode_t *ioc_inode);
void
ioc_inode_wakeup (call_frame_t *frame, ioc_inode_t *ioc_inode,
- struct stat *stbuf);
+ struct iatt *stbuf);
int8_t
-ioc_cache_still_valid (ioc_inode_t *ioc_inode, struct stat *stbuf);
+ioc_cache_still_valid (ioc_inode_t *ioc_inode, struct iatt *stbuf);
int32_t
ioc_prune (ioc_table_t *table);
diff --git a/xlators/performance/io-cache/src/ioc-inode.c b/xlators/performance/io-cache/src/ioc-inode.c
index ab1d7b600..86a54bb14 100644
--- a/xlators/performance/io-cache/src/ioc-inode.c
+++ b/xlators/performance/io-cache/src/ioc-inode.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
@@ -23,7 +14,9 @@
#endif
#include "io-cache.h"
+#include "ioc-mem-types.h"
+extern int ioc_log2_page_size;
/*
* str_to_ptr - convert a string to pointer
@@ -33,10 +26,14 @@
void *
str_to_ptr (char *string)
{
- void *ptr = NULL;
+ void *ptr = NULL;
+
+ GF_VALIDATE_OR_GOTO ("io-cache", string, out);
ptr = (void *)strtoul (string, NULL, 16);
- return ptr;
+
+out:
+ return ptr;
}
@@ -49,102 +46,126 @@ char *
ptr_to_str (void *ptr)
{
int ret = 0;
- char *str = NULL;
- ret = asprintf (&str, "%p", ptr);
+ char *str = NULL;
+
+ GF_VALIDATE_OR_GOTO ("io-cache", ptr, out);
+
+ ret = gf_asprintf (&str, "%p", ptr);
if (-1 == ret) {
- gf_log ("ioc", GF_LOG_ERROR,
+ gf_log ("io-cache", GF_LOG_WARNING,
"asprintf failed while converting ptr to str");
- return NULL;
+ str = NULL;
+ goto out;
}
- return str;
+
+out:
+ return str;
}
+
void
-ioc_inode_wakeup (call_frame_t *frame, ioc_inode_t *ioc_inode,
- struct stat *stbuf)
+ioc_inode_wakeup (call_frame_t *frame, ioc_inode_t *ioc_inode,
+ struct iatt *stbuf)
{
- ioc_waitq_t *waiter = NULL, *waited = NULL;
- ioc_waitq_t *page_waitq = NULL;
- int8_t cache_still_valid = 1;
- ioc_local_t *local = NULL;
- int8_t need_fault = 0;
- ioc_page_t *waiter_page = NULL;
+ ioc_waitq_t *waiter = NULL, *waited = NULL;
+ ioc_waitq_t *page_waitq = NULL;
+ int8_t cache_still_valid = 1;
+ ioc_local_t *local = NULL;
+ int8_t need_fault = 0;
+ ioc_page_t *waiter_page = NULL;
+
+ GF_VALIDATE_OR_GOTO ("io-cache", frame, out);
local = frame->local;
- ioc_inode_lock (ioc_inode);
- {
- waiter = ioc_inode->waitq;
- ioc_inode->waitq = NULL;
- }
- ioc_inode_unlock (ioc_inode);
-
- if (stbuf)
- cache_still_valid = ioc_cache_still_valid (ioc_inode, stbuf);
- else
- cache_still_valid = 0;
-
- if (!waiter) {
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "cache validate called without any "
- "page waiting to be validated");
- }
-
- while (waiter) {
- waiter_page = waiter->data;
- page_waitq = NULL;
-
- if (waiter_page) {
- if (cache_still_valid) {
- /* cache valid, wake up page */
- ioc_inode_lock (ioc_inode);
- {
- page_waitq =
- ioc_page_wakeup (waiter_page);
- }
- ioc_inode_unlock (ioc_inode);
- if (page_waitq)
- ioc_waitq_return (page_waitq);
- } else {
- /* cache invalid, generate page fault and set
- * page->ready = 0, to avoid double faults
- */
- ioc_inode_lock (ioc_inode);
-
- if (waiter_page->ready) {
- waiter_page->ready = 0;
- need_fault = 1;
- } else {
- gf_log (frame->this->name,
- GF_LOG_TRACE,
- "validate frame(%p) is waiting"
- "for in-transit page = %p",
- frame, waiter_page);
- }
-
- ioc_inode_unlock (ioc_inode);
-
- if (need_fault) {
- need_fault = 0;
- ioc_page_fault (ioc_inode, frame,
- local->fd,
- waiter_page->offset);
- }
- }
- }
-
- waited = waiter;
- waiter = waiter->next;
-
- waited->data = NULL;
- free (waited);
- }
+ GF_VALIDATE_OR_GOTO (frame->this->name, local, out);
+
+ if (ioc_inode == NULL) {
+ local->op_ret = -1;
+ local->op_errno = EINVAL;
+ gf_log (frame->this->name, GF_LOG_WARNING, "ioc_inode is NULL");
+ goto out;
+ }
+
+ ioc_inode_lock (ioc_inode);
+ {
+ waiter = ioc_inode->waitq;
+ ioc_inode->waitq = NULL;
+ }
+ ioc_inode_unlock (ioc_inode);
+
+ if (stbuf)
+ cache_still_valid = ioc_cache_still_valid (ioc_inode, stbuf);
+ else
+ cache_still_valid = 0;
+
+ if (!waiter) {
+ gf_log (frame->this->name, GF_LOG_WARNING,
+ "cache validate called without any "
+ "page waiting to be validated");
+ }
+
+ while (waiter) {
+ waiter_page = waiter->data;
+ page_waitq = NULL;
+
+ if (waiter_page) {
+ if (cache_still_valid) {
+ /* cache valid, wake up page */
+ ioc_inode_lock (ioc_inode);
+ {
+ page_waitq =
+ __ioc_page_wakeup (waiter_page,
+ waiter_page->op_errno);
+ }
+ ioc_inode_unlock (ioc_inode);
+ if (page_waitq)
+ ioc_waitq_return (page_waitq);
+ } else {
+ /* cache invalid, generate page fault and set
+ * page->ready = 0, to avoid double faults
+ */
+ ioc_inode_lock (ioc_inode);
+ {
+ if (waiter_page->ready) {
+ waiter_page->ready = 0;
+ need_fault = 1;
+ } else {
+ gf_log (frame->this->name,
+ GF_LOG_TRACE,
+ "validate frame(%p) is "
+ "waiting for in-transit"
+ " page = %p", frame,
+ waiter_page);
+ }
+ }
+ ioc_inode_unlock (ioc_inode);
+
+ if (need_fault) {
+ need_fault = 0;
+ ioc_page_fault (ioc_inode, frame,
+ local->fd,
+ waiter_page->offset);
+ }
+ }
+ }
+
+ waited = waiter;
+ waiter = waiter->next;
+
+ waited->data = NULL;
+ GF_FREE (waited);
+ }
+
+out:
+ return;
}
-/*
- * ioc_inode_update - create a new ioc_inode_t structure and add it to
- * the table table. fill in the fields which are derived
+
+/*
+ * ioc_inode_update - create a new ioc_inode_t structure and add it to
+ * the table table. fill in the fields which are derived
* from inode_t corresponding to the file
- *
+ *
* @table: io-table structure
* @inode: inode structure
*
@@ -153,61 +174,67 @@ ioc_inode_wakeup (call_frame_t *frame, ioc_inode_t *ioc_inode,
ioc_inode_t *
ioc_inode_update (ioc_table_t *table, inode_t *inode, uint32_t weight)
{
- ioc_inode_t *ioc_inode = NULL;
-
- ioc_inode = CALLOC (1, sizeof (ioc_inode_t));
+ ioc_inode_t *ioc_inode = NULL;
+
+ GF_VALIDATE_OR_GOTO ("io-cache", table, out);
+
+ ioc_inode = GF_CALLOC (1, sizeof (ioc_inode_t), gf_ioc_mt_ioc_inode_t);
if (ioc_inode == NULL) {
goto out;
}
-
- ioc_inode->table = table;
-
- /* initialize the list for pages */
- INIT_LIST_HEAD (&ioc_inode->pages);
- INIT_LIST_HEAD (&ioc_inode->page_lru);
-
- ioc_table_lock (table);
-
- table->inode_count++;
- list_add (&ioc_inode->inode_list, &table->inodes);
- list_add_tail (&ioc_inode->inode_lru, &table->inode_lru[weight]);
- gf_log (table->xl->name,
- GF_LOG_TRACE,
- "adding to inode_lru[%d]", weight);
+ ioc_inode->inode = inode;
+ ioc_inode->table = table;
+ INIT_LIST_HEAD (&ioc_inode->cache.page_lru);
+ pthread_mutex_init (&ioc_inode->inode_lock, NULL);
+ ioc_inode->weight = weight;
+
+ ioc_table_lock (table);
+ {
+ table->inode_count++;
+ list_add (&ioc_inode->inode_list, &table->inodes);
+ list_add_tail (&ioc_inode->inode_lru,
+ &table->inode_lru[weight]);
+ }
+ ioc_table_unlock (table);
- ioc_table_unlock (table);
+ gf_log (table->xl->name, GF_LOG_TRACE,
+ "adding to inode_lru[%d]", weight);
- pthread_mutex_init (&ioc_inode->inode_lock, NULL);
- ioc_inode->weight = weight;
-
out:
- return ioc_inode;
+ return ioc_inode;
}
-/*
+/*
* ioc_inode_destroy - destroy an ioc_inode_t object.
*
* @inode: inode to destroy
*
- * to be called only from ioc_forget.
+ * to be called only from ioc_forget.
*/
void
ioc_inode_destroy (ioc_inode_t *ioc_inode)
{
- ioc_table_t *table = NULL;
+ ioc_table_t *table = NULL;
+
+ GF_VALIDATE_OR_GOTO ("io-cache", ioc_inode, out);
table = ioc_inode->table;
- ioc_table_lock (table);
- table->inode_count--;
- list_del (&ioc_inode->inode_list);
- list_del (&ioc_inode->inode_lru);
- ioc_table_unlock (table);
-
- ioc_inode_flush (ioc_inode);
+ ioc_table_lock (table);
+ {
+ table->inode_count--;
+ list_del (&ioc_inode->inode_list);
+ list_del (&ioc_inode->inode_lru);
+ }
+ ioc_table_unlock (table);
+
+ ioc_inode_flush (ioc_inode);
+ rbthash_table_destroy (ioc_inode->cache.page_table);
- pthread_mutex_destroy (&ioc_inode->inode_lock);
- free (ioc_inode);
+ pthread_mutex_destroy (&ioc_inode->inode_lock);
+ GF_FREE (ioc_inode);
+out:
+ return;
}
diff --git a/xlators/performance/io-cache/src/ioc-mem-types.h b/xlators/performance/io-cache/src/ioc-mem-types.h
new file mode 100644
index 000000000..9b68f9fce
--- /dev/null
+++ b/xlators/performance/io-cache/src/ioc-mem-types.h
@@ -0,0 +1,29 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __IOC_MT_H__
+#define __IOC_MT_H__
+
+#include "mem-types.h"
+
+enum gf_ioc_mem_types_ {
+ gf_ioc_mt_iovec = gf_common_mt_end + 1,
+ gf_ioc_mt_ioc_table_t,
+ gf_ioc_mt_char,
+ gf_ioc_mt_ioc_waitq_t,
+ gf_ioc_mt_ioc_priority,
+ gf_ioc_mt_list_head,
+ gf_ioc_mt_call_pool_t,
+ gf_ioc_mt_ioc_inode_t,
+ gf_ioc_mt_ioc_fill_t,
+ gf_ioc_mt_ioc_newpage_t,
+ gf_ioc_mt_end
+};
+#endif
diff --git a/xlators/performance/io-cache/src/page.c b/xlators/performance/io-cache/src/page.c
index fc8c87618..b2e20ba65 100644
--- a/xlators/performance/io-cache/src/page.c
+++ b/xlators/performance/io-cache/src/page.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
@@ -27,88 +18,176 @@
#include "dict.h"
#include "xlator.h"
#include "io-cache.h"
+#include "ioc-mem-types.h"
#include <assert.h>
#include <sys/time.h>
+char
+ioc_empty (struct ioc_cache *cache)
+{
+ char is_empty = -1;
+
+ GF_VALIDATE_OR_GOTO ("io-cache", cache, out);
+
+ is_empty = list_empty (&cache->page_lru);
+
+out:
+ return is_empty;
+}
+
+
ioc_page_t *
-ioc_page_get (ioc_inode_t *ioc_inode, off_t offset)
+__ioc_page_get (ioc_inode_t *ioc_inode, off_t offset)
{
- int8_t found = 0;
- ioc_page_t *page = NULL;
- ioc_table_t *table = NULL;
- off_t rounded_offset = 0;
+ ioc_page_t *page = NULL;
+ ioc_table_t *table = NULL;
+ off_t rounded_offset = 0;
+
+ GF_VALIDATE_OR_GOTO ("io-cache", ioc_inode, out);
table = ioc_inode->table;
+ GF_VALIDATE_OR_GOTO ("io-cache", ioc_inode, out);
+
rounded_offset = floor (offset, table->page_size);
-
- if (list_empty (&ioc_inode->pages)) {
- return NULL;
- }
-
- list_for_each_entry (page, &ioc_inode->pages, pages) {
- if (page->offset == rounded_offset) {
- found = 1;
- break;
- }
- }
-
- /* was previously returning ioc_inode itself..,
- * 1st of its type and found one more downstairs :O */
- if (!found){
- page = NULL;
- } else {
- /* push the page to the end of the lru list */
- list_move_tail (&page->page_lru, &ioc_inode->page_lru);
- }
-
- return page;
+
+ page = rbthash_get (ioc_inode->cache.page_table, &rounded_offset,
+ sizeof (rounded_offset));
+
+ if (page != NULL) {
+ /* push the page to the end of the lru list */
+ list_move_tail (&page->page_lru, &ioc_inode->cache.page_lru);
+ }
+
+out:
+ return page;
+}
+
+
+ioc_page_t *
+ioc_page_get (ioc_inode_t *ioc_inode, off_t offset)
+{
+ ioc_page_t *page = NULL;
+
+ if (ioc_inode == NULL) {
+ goto out;
+ }
+
+ ioc_inode_lock (ioc_inode);
+ {
+ page = __ioc_page_get (ioc_inode, offset);
+ }
+ ioc_inode_unlock (ioc_inode);
+
+out:
+ return page;
}
/*
- * ioc_page_destroy -
+ * __ioc_page_destroy -
*
* @page:
*
*/
int64_t
+__ioc_page_destroy (ioc_page_t *page)
+{
+ int64_t page_size = 0;
+
+ GF_VALIDATE_OR_GOTO ("io-cache", page, out);
+
+ if (page->iobref)
+ page_size = iobref_size (page->iobref);
+
+ if (page->waitq) {
+ /* frames waiting on this page, do not destroy this page */
+ page_size = -1;
+ page->stale = 1;
+ } else {
+ rbthash_remove (page->inode->cache.page_table, &page->offset,
+ sizeof (page->offset));
+ list_del (&page->page_lru);
+
+ gf_log (page->inode->table->xl->name, GF_LOG_TRACE,
+ "destroying page = %p, offset = %"PRId64" "
+ "&& inode = %p",
+ page, page->offset, page->inode);
+
+ if (page->vector){
+ iobref_unref (page->iobref);
+ GF_FREE (page->vector);
+ page->vector = NULL;
+ }
+
+ page->inode = NULL;
+ }
+
+ if (page_size != -1) {
+ pthread_mutex_destroy (&page->page_lock);
+ GF_FREE (page);
+ }
+
+out:
+ return page_size;
+}
+
+
+int64_t
ioc_page_destroy (ioc_page_t *page)
{
- int64_t page_size = 0;
-
- page_size = iobref_size (page->iobref);
-
- if (page->waitq) {
- /* frames waiting on this page, do not destroy this page */
- page_size = -1;
- } else {
-
- list_del (&page->pages);
- list_del (&page->page_lru);
-
- gf_log (page->inode->table->xl->name, GF_LOG_TRACE,
- "destroying page = %p, offset = %"PRId64" "
- "&& inode = %p",
- page, page->offset, page->inode);
-
- if (page->vector){
- iobref_unref (page->iobref);
- free (page->vector);
- page->vector = NULL;
- }
-
- page->inode = NULL;
-
- }
-
- if (page_size != -1) {
- pthread_mutex_destroy (&page->page_lock);
- free (page);
- }
-
- return page_size;
+ int64_t ret = 0;
+
+ if (page == NULL) {
+ goto out;
+ }
+
+ ioc_inode_lock (page->inode);
+ {
+ ret = __ioc_page_destroy (page);
+ }
+ ioc_inode_unlock (page->inode);
+
+out:
+ return ret;
}
+int32_t
+__ioc_inode_prune (ioc_inode_t *curr, uint64_t *size_pruned,
+ uint64_t size_to_prune, uint32_t index)
+{
+ ioc_page_t *page = NULL, *next = NULL;
+ int32_t ret = 0;
+ ioc_table_t *table = NULL;
+
+ if (curr == NULL) {
+ goto out;
+ }
+
+ table = curr->table;
+
+ list_for_each_entry_safe (page, next, &curr->cache.page_lru, page_lru) {
+ *size_pruned += page->size;
+ ret = __ioc_page_destroy (page);
+
+ if (ret != -1)
+ table->cache_used -= ret;
+
+ gf_log (table->xl->name, GF_LOG_TRACE,
+ "index = %d && table->cache_used = %"PRIu64" && table->"
+ "cache_size = %"PRIu64, index, table->cache_used,
+ table->cache_size);
+
+ if ((*size_pruned) >= size_to_prune)
+ break;
+ }
+
+ if (ioc_empty (&curr->cache)) {
+ list_del_init (&curr->inode_lru);
+ }
+
+out:
+ return 0;
+}
/*
* ioc_prune - prune the cache. we have a limit to the number of pages we
* can have in-memory.
@@ -119,159 +198,149 @@ ioc_page_destroy (ioc_page_t *page)
int32_t
ioc_prune (ioc_table_t *table)
{
- ioc_inode_t *curr = NULL, *next_ioc_inode = NULL;
- ioc_page_t *page = NULL, *next = NULL;
- int32_t ret = -1;
- int32_t index = 0;
- uint64_t size_to_prune = 0;
- uint64_t size_pruned = 0;
-
- ioc_table_lock (table);
- {
- size_to_prune = table->cache_used - table->cache_size;
- /* take out the least recently used inode */
- for (index=0; index < table->max_pri; index++) {
- list_for_each_entry_safe (curr, next_ioc_inode,
- &table->inode_lru[index],
- inode_lru) {
- /* prune page-by-page for this inode, till
- * we reach the equilibrium */
- ioc_inode_lock (curr);
- /* { */
-
- list_for_each_entry_safe (page, next,
- &curr->page_lru,
- page_lru) {
- /* done with all pages, and not
- * reached equilibrium yet??
- * continue with next inode in
- * lru_list */
- size_pruned += page->size;
- ret = ioc_page_destroy (page);
-
- if (ret != -1)
- table->cache_used -= ret;
-
- gf_log (table->xl->name,
- GF_LOG_TRACE,
- "index = %d && table->cache_"
- "used = %"PRIu64" && table->"
- "cache_size = %"PRIu64,
- index, table->cache_used,
- table->cache_size);
-
- if (size_pruned >= size_to_prune)
- break;
- } /* list_for_each_entry_safe(page...) */
- if (list_empty (&curr->pages)) {
- list_del_init (&curr->inode_lru);
- }
-
- /* } */
- ioc_inode_unlock (curr);
-
- if (size_pruned >= size_to_prune)
- break;
- } /* list_for_each_entry_safe (curr...) */
-
- if (size_pruned >= size_to_prune)
- break;
- } /* for(index=0;...) */
-
- } /* ioc_inode_table locked region end */
- ioc_table_unlock (table);
-
- return 0;
+ ioc_inode_t *curr = NULL, *next_ioc_inode = NULL;
+ int32_t index = 0;
+ uint64_t size_to_prune = 0;
+ uint64_t size_pruned = 0;
+
+ GF_VALIDATE_OR_GOTO ("io-cache", table, out);
+
+ ioc_table_lock (table);
+ {
+ size_to_prune = table->cache_used - table->cache_size;
+ /* take out the least recently used inode */
+ for (index=0; index < table->max_pri; index++) {
+ list_for_each_entry_safe (curr, next_ioc_inode,
+ &table->inode_lru[index],
+ inode_lru) {
+ /* prune page-by-page for this inode, till
+ * we reach the equilibrium */
+ ioc_inode_lock (curr);
+ {
+ __ioc_inode_prune (curr, &size_pruned,
+ size_to_prune,
+ index);
+ }
+ ioc_inode_unlock (curr);
+
+ if (size_pruned >= size_to_prune)
+ break;
+ } /* list_for_each_entry_safe (curr...) */
+
+ if (size_pruned >= size_to_prune)
+ break;
+ } /* for(index=0;...) */
+
+ } /* ioc_inode_table locked region end */
+ ioc_table_unlock (table);
+
+out:
+ return 0;
}
/*
- * ioc_page_create - create a new page.
+ * __ioc_page_create - create a new page.
*
- * @ioc_inode:
+ * @ioc_inode:
* @offset:
*
*/
ioc_page_t *
-ioc_page_create (ioc_inode_t *ioc_inode, off_t offset)
+__ioc_page_create (ioc_inode_t *ioc_inode, off_t offset)
{
- ioc_table_t *table = NULL;
- ioc_page_t *page = NULL;
- off_t rounded_offset = 0;
- ioc_page_t *newpage = NULL;
-
+ ioc_table_t *table = NULL;
+ ioc_page_t *page = NULL;
+ off_t rounded_offset = 0;
+ ioc_page_t *newpage = NULL;
+
+ GF_VALIDATE_OR_GOTO ("io-cache", ioc_inode, out);
+
table = ioc_inode->table;
+ GF_VALIDATE_OR_GOTO ("io-cache", table, out);
+
rounded_offset = floor (offset, table->page_size);
- newpage = CALLOC (1, sizeof (*newpage));
+ newpage = GF_CALLOC (1, sizeof (*newpage), gf_ioc_mt_ioc_newpage_t);
if (newpage == NULL) {
goto out;
}
- if (ioc_inode) {
- table = ioc_inode->table;
- } else {
- free (newpage);
+ if (!ioc_inode) {
+ GF_FREE (newpage);
newpage = NULL;
goto out;
- }
-
- newpage->offset = rounded_offset;
- newpage->inode = ioc_inode;
- pthread_mutex_init (&newpage->page_lock, NULL);
+ }
- list_add_tail (&newpage->page_lru, &ioc_inode->page_lru);
- list_add_tail (&newpage->pages, &ioc_inode->pages);
+ newpage->offset = rounded_offset;
+ newpage->inode = ioc_inode;
+ pthread_mutex_init (&newpage->page_lock, NULL);
- page = newpage;
+ rbthash_insert (ioc_inode->cache.page_table, newpage, &rounded_offset,
+ sizeof (rounded_offset));
- gf_log ("io-cache", GF_LOG_TRACE,
- "returning new page %p", page);
+ list_add_tail (&newpage->page_lru, &ioc_inode->cache.page_lru);
+
+ page = newpage;
+
+ gf_log ("io-cache", GF_LOG_TRACE,
+ "returning new page %p", page);
out:
- return page;
+ return page;
}
-/*
- * ioc_wait_on_page - pause a frame to wait till the arrival of a page.
- * here we need to handle the case when the frame who calls wait_on_page
- * himself has caused page_fault
+/*
+ * ioc_wait_on_page - pause a frame to wait till the arrival of a page.
+ * here we need to handle the case when the frame who calls wait_on_page
+ * himself has caused page_fault
*
* @page: page to wait on
* @frame: call frame who is waiting on page
*
*/
void
-ioc_wait_on_page (ioc_page_t *page, call_frame_t *frame, off_t offset,
- size_t size)
+__ioc_wait_on_page (ioc_page_t *page, call_frame_t *frame, off_t offset,
+ size_t size)
{
- ioc_waitq_t *waitq = NULL;
- ioc_local_t *local = frame->local;
+ ioc_waitq_t *waitq = NULL;
+ ioc_local_t *local = NULL;
+
+ GF_VALIDATE_OR_GOTO ("io-cache", frame, out);
+ local = frame->local;
+
+ GF_VALIDATE_OR_GOTO (frame->this->name, local, out);
+
+ if (page == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ gf_log (frame->this->name, GF_LOG_WARNING,
+ "asked to wait on a NULL page");
+ }
- waitq = CALLOC (1, sizeof (*waitq));
+ waitq = GF_CALLOC (1, sizeof (*waitq), gf_ioc_mt_ioc_waitq_t);
if (waitq == NULL) {
local->op_ret = -1;
local->op_errno = ENOMEM;
- gf_log (frame->this->name, GF_LOG_ERROR, "out of memory");
goto out;
- }
-
- gf_log (frame->this->name, GF_LOG_TRACE,
- "frame(%p) waiting on page = %p, offset=%"PRId64", "
- "size=%"GF_PRI_SIZET"",
- frame, page, offset, size);
-
- waitq->data = frame;
- waitq->next = page->waitq;
- waitq->pending_offset = offset;
- waitq->pending_size = size;
- page->waitq = waitq;
- /* one frame can wait only once on a given page,
- * local->wait_count is number of pages a frame is waiting on */
- ioc_local_lock (local);
- {
- local->wait_count++;
- }
- ioc_local_unlock (local);
+ }
+
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "frame(%p) waiting on page = %p, offset=%"PRId64", "
+ "size=%"GF_PRI_SIZET"",
+ frame, page, offset, size);
+
+ waitq->data = frame;
+ waitq->next = page->waitq;
+ waitq->pending_offset = offset;
+ waitq->pending_size = size;
+ page->waitq = waitq;
+ /* one frame can wait only once on a given page,
+ * local->wait_count is number of pages a frame is waiting on */
+ ioc_local_lock (local);
+ {
+ local->wait_count++;
+ }
+ ioc_local_unlock (local);
out:
return;
@@ -279,7 +348,7 @@ out:
/*
- * ioc_cache_still_valid - see if cached pages ioc_inode are still valid
+ * ioc_cache_still_valid - see if cached pages ioc_inode are still valid
* against given stbuf
*
* @ioc_inode:
@@ -288,197 +357,212 @@ out:
* assumes ioc_inode is locked
*/
int8_t
-ioc_cache_still_valid (ioc_inode_t *ioc_inode, struct stat *stbuf)
+ioc_cache_still_valid (ioc_inode_t *ioc_inode, struct iatt *stbuf)
{
- int8_t cache_still_valid = 1;
-
+ int8_t cache_still_valid = 1;
+
+ GF_VALIDATE_OR_GOTO ("io-cache", ioc_inode, out);
+
#if 0
- if (!stbuf || (stbuf->st_mtime != ioc_inode->mtime) ||
- (stbuf->st_mtim.tv_nsec != ioc_inode->stbuf.st_mtim.tv_nsec))
- cache_still_valid = 0;
+ if (!stbuf || (stbuf->ia_mtime != ioc_inode->cache.mtime) ||
+ (stbuf->st_mtim.tv_nsec != ioc_inode->stbuf.st_mtim.tv_nsec))
+ cache_still_valid = 0;
#else
- if (!stbuf || (stbuf->st_mtime != ioc_inode->mtime))
- cache_still_valid = 0;
+ if (!stbuf || (stbuf->ia_mtime != ioc_inode->cache.mtime)
+ || (stbuf->ia_mtime_nsec != ioc_inode->cache.mtime_nsec))
+ cache_still_valid = 0;
#endif
#if 0
- /* talk with avati@zresearch.com to enable this section */
- if (!ioc_inode->mtime && stbuf) {
- cache_still_valid = 1;
- ioc_inode->mtime = stbuf->st_mtime;
- }
+ /* talk with avati@gluster.com to enable this section */
+ if (!ioc_inode->mtime && stbuf) {
+ cache_still_valid = 1;
+ ioc_inode->mtime = stbuf->ia_mtime;
+ }
#endif
- return cache_still_valid;
+out:
+ return cache_still_valid;
}
void
ioc_waitq_return (ioc_waitq_t *waitq)
{
- ioc_waitq_t *trav = NULL;
- ioc_waitq_t *next = NULL;
- call_frame_t *frame = NULL;
+ ioc_waitq_t *trav = NULL;
+ ioc_waitq_t *next = NULL;
+ call_frame_t *frame = NULL;
- for (trav = waitq; trav; trav = next) {
- next = trav->next;
+ for (trav = waitq; trav; trav = next) {
+ next = trav->next;
- frame = trav->data;
- ioc_frame_return (frame);
- free (trav);
- }
+ frame = trav->data;
+ ioc_frame_return (frame);
+ GF_FREE (trav);
+ }
}
int
ioc_fault_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct stat *stbuf, struct iobref *iobref)
+ int32_t count, struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata)
{
- ioc_local_t *local = NULL;
- off_t offset = 0;
- ioc_inode_t *ioc_inode = NULL;
- ioc_table_t *table = NULL;
- ioc_page_t *page = NULL;
- off_t trav_offset = 0;
- size_t payload_size = 0;
- int32_t destroy_size = 0;
- size_t page_size = 0;
- ioc_waitq_t *waitq = NULL;
- size_t iobref_page_size = 0;
+ ioc_local_t *local = NULL;
+ off_t offset = 0;
+ ioc_inode_t *ioc_inode = NULL;
+ ioc_table_t *table = NULL;
+ ioc_page_t *page = NULL;
+ int32_t destroy_size = 0;
+ size_t page_size = 0;
+ ioc_waitq_t *waitq = NULL;
+ size_t iobref_page_size = 0;
+ char zero_filled = 0;
+
+ GF_ASSERT (frame);
local = frame->local;
+ GF_ASSERT (local);
+
offset = local->pending_offset;
ioc_inode = local->inode;
+ GF_ASSERT (ioc_inode);
+
table = ioc_inode->table;
+ GF_ASSERT (table);
+
+ zero_filled = ((op_ret >=0) && (stbuf->ia_mtime == 0));
+
+ ioc_inode_lock (ioc_inode);
+ {
+ if (op_ret == -1 || !(zero_filled ||
+ ioc_cache_still_valid(ioc_inode,
+ stbuf))) {
+ gf_log (ioc_inode->table->xl->name, GF_LOG_TRACE,
+ "cache for inode(%p) is invalid. flushing "
+ "all pages", ioc_inode);
+ destroy_size = __ioc_inode_flush (ioc_inode);
+ }
- trav_offset = offset;
- payload_size = op_ret;
-
- ioc_inode_lock (ioc_inode);
- {
- if (op_ret == -1 ||
- (op_ret >= 0 &&
- !ioc_cache_still_valid(ioc_inode, stbuf))) {
- gf_log (ioc_inode->table->xl->name, GF_LOG_TRACE,
- "cache for inode(%p) is invalid. flushing "
- "all pages", ioc_inode);
- destroy_size = __ioc_inode_flush (ioc_inode);
- }
-
- if (op_ret >= 0)
- ioc_inode->mtime = stbuf->st_mtime;
-
- gettimeofday (&ioc_inode->tv, NULL);
-
- if (op_ret < 0) {
- /* error, readv returned -1 */
- page = ioc_page_get (ioc_inode, offset);
- if (page)
- waitq = ioc_page_error (page, op_ret,
- op_errno);
- } else {
- gf_log (ioc_inode->table->xl->name, GF_LOG_TRACE,
- "op_ret = %d", op_ret);
- page = ioc_page_get (ioc_inode, offset);
- if (!page) {
- /* page was flushed */
- /* some serious bug ? */
- gf_log (this->name, GF_LOG_DEBUG,
- "wasted copy: %"PRId64"[+%"PRId64"] "
- "ioc_inode=%p", offset,
- table->page_size, ioc_inode);
- } else {
- if (page->vector) {
- iobref_unref (page->iobref);
- free (page->vector);
- page->vector = NULL;
- }
-
- /* keep a copy of the page for our cache */
- page->vector = iov_dup (vector, count);
+ if ((op_ret >= 0) && !zero_filled) {
+ ioc_inode->cache.mtime = stbuf->ia_mtime;
+ ioc_inode->cache.mtime_nsec = stbuf->ia_mtime_nsec;
+ }
+
+ gettimeofday (&ioc_inode->cache.tv, NULL);
+
+ if (op_ret < 0) {
+ /* error, readv returned -1 */
+ page = __ioc_page_get (ioc_inode, offset);
+ if (page)
+ waitq = __ioc_page_error (page, op_ret,
+ op_errno);
+ } else {
+ gf_log (ioc_inode->table->xl->name, GF_LOG_TRACE,
+ "op_ret = %d", op_ret);
+ page = __ioc_page_get (ioc_inode, offset);
+ if (!page) {
+ /* page was flushed */
+ /* some serious bug ? */
+ gf_log (frame->this->name, GF_LOG_WARNING,
+ "wasted copy: %"PRId64"[+%"PRId64"] "
+ "ioc_inode=%p", offset,
+ table->page_size, ioc_inode);
+ } else {
+ if (page->vector) {
+ iobref_unref (page->iobref);
+ GF_FREE (page->vector);
+ page->vector = NULL;
+ }
+
+ /* keep a copy of the page for our cache */
+ page->vector = iov_dup (vector, count);
if (page->vector == NULL) {
- page = ioc_page_get (ioc_inode, offset);
+ page = __ioc_page_get (ioc_inode,
+ offset);
if (page != NULL)
- waitq = ioc_page_error (page,
- -1,
- ENOMEM);
- op_ret = -1;
- op_errno = ENOMEM;
+ waitq = __ioc_page_error (page,
+ -1,
+ ENOMEM);
goto unlock;
}
- page->count = count;
- if (iobref) {
- page->iobref = iobref_ref (iobref);
- } else {
- /* TODO: we have got a response to
- * our request and no data */
- gf_log (this->name, GF_LOG_CRITICAL,
- "frame>root>rsp_refs is null");
- } /* if(frame->root->rsp_refs) */
-
- /* page->size should indicate exactly how
- * much the readv call to the child
- * translator returned. earlier op_ret
- * from child translator was used, which
- * gave rise to a bug where reads from
- * io-cached volume were resulting in 0
- * byte replies */
- page_size = iov_length(vector, count);
-
- page->size = page_size;
+ page->count = count;
+ if (iobref) {
+ page->iobref = iobref_ref (iobref);
+ } else {
+ /* TODO: we have got a response to
+ * our request and no data */
+ gf_log (frame->this->name,
+ GF_LOG_CRITICAL,
+ "frame>root>rsp_refs is null");
+ } /* if(frame->root->rsp_refs) */
+
+ /* page->size should indicate exactly how
+ * much the readv call to the child
+ * translator returned. earlier op_ret
+ * from child translator was used, which
+ * gave rise to a bug where reads from
+ * io-cached volume were resulting in 0
+ * byte replies */
+ page_size = iov_length(vector, count);
+ page->size = page_size;
+ page->op_errno = op_errno;
iobref_page_size = iobref_size (page->iobref);
- if (page->waitq) {
- /* wake up all the frames waiting on
- * this page, including
- * the frame which triggered fault */
- waitq = ioc_page_wakeup (page);
- } /* if(page->waitq) */
- } /* if(!page)...else */
- } /* if(op_ret < 0)...else */
- } /* ioc_inode locked region end */
+ if (page->waitq) {
+ /* wake up all the frames waiting on
+ * this page, including
+ * the frame which triggered fault */
+ waitq = __ioc_page_wakeup (page,
+ op_errno);
+ } /* if(page->waitq) */
+ } /* if(!page)...else */
+ } /* if(op_ret < 0)...else */
+ } /* ioc_inode locked region end */
unlock:
- ioc_inode_unlock (ioc_inode);
+ ioc_inode_unlock (ioc_inode);
- ioc_waitq_return (waitq);
+ ioc_waitq_return (waitq);
- if (iobref_page_size) {
- ioc_table_lock (table);
- {
- table->cache_used += iobref_page_size;
- }
- ioc_table_unlock (table);
- }
+ if (iobref_page_size) {
+ ioc_table_lock (table);
+ {
+ table->cache_used += iobref_page_size;
+ }
+ ioc_table_unlock (table);
+ }
- if (destroy_size) {
- ioc_table_lock (table);
- {
- table->cache_used -= destroy_size;
- }
- ioc_table_unlock (table);
- }
+ if (destroy_size) {
+ ioc_table_lock (table);
+ {
+ table->cache_used -= destroy_size;
+ }
+ ioc_table_unlock (table);
+ }
- if (ioc_need_prune (ioc_inode->table)) {
- ioc_prune (ioc_inode->table);
- }
+ if (ioc_need_prune (ioc_inode->table)) {
+ ioc_prune (ioc_inode->table);
+ }
- gf_log (this->name, GF_LOG_TRACE, "fault frame %p returned", frame);
- pthread_mutex_destroy (&local->local_lock);
+ gf_log (frame->this->name, GF_LOG_TRACE, "fault frame %p returned",
+ frame);
+ pthread_mutex_destroy (&local->local_lock);
- fd_unref (local->fd);
+ fd_unref (local->fd);
- STACK_DESTROY (frame->root);
- return 0;
+ STACK_DESTROY (frame->root);
+ return 0;
}
+
/*
* ioc_page_fault -
- *
+ *
* @ioc_inode:
* @frame:
* @fd:
@@ -486,201 +570,223 @@ unlock:
*
*/
void
-ioc_page_fault (ioc_inode_t *ioc_inode, call_frame_t *frame, fd_t *fd,
- off_t offset)
+ioc_page_fault (ioc_inode_t *ioc_inode, call_frame_t *frame, fd_t *fd,
+ off_t offset)
{
- ioc_table_t *table = NULL;
- call_frame_t *fault_frame = NULL;
- ioc_local_t *fault_local = NULL;
- int32_t op_ret = -1, op_errno = -1;
- ioc_waitq_t *waitq = NULL;
- ioc_page_t *page = NULL;
+ ioc_table_t *table = NULL;
+ call_frame_t *fault_frame = NULL;
+ ioc_local_t *fault_local = NULL;
+ int32_t op_ret = -1, op_errno = -1;
+ ioc_waitq_t *waitq = NULL;
+ ioc_page_t *page = NULL;
+
+ GF_ASSERT (ioc_inode);
+ if (frame == NULL) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ gf_log ("io-cache", GF_LOG_WARNING,
+ "page fault on a NULL frame");
+ goto err;
+ }
table = ioc_inode->table;
fault_frame = copy_frame (frame);
if (fault_frame == NULL) {
op_ret = -1;
op_errno = ENOMEM;
- gf_log (ioc_inode->table->xl->name, GF_LOG_ERROR,
- "out of memory");
goto err;
}
- fault_local = CALLOC (1, sizeof (ioc_local_t));
+ fault_local = mem_get0 (THIS->local_pool);
if (fault_local == NULL) {
op_ret = -1;
op_errno = ENOMEM;
STACK_DESTROY (fault_frame->root);
- gf_log (ioc_inode->table->xl->name, GF_LOG_ERROR,
- "out of memory");
goto err;
}
- /* NOTE: copy_frame() means, the frame the fop whose fd_ref we
- * are using till now won't be valid till we get reply from server.
- * we unref this fd, in fault_cbk */
- fault_local->fd = fd_ref (fd);
+ /* NOTE: copy_frame() means, the frame the fop whose fd_ref we
+ * are using till now won't be valid till we get reply from server.
+ * we unref this fd, in fault_cbk */
+ fault_local->fd = fd_ref (fd);
- fault_frame->local = fault_local;
- pthread_mutex_init (&fault_local->local_lock, NULL);
+ fault_frame->local = fault_local;
+ pthread_mutex_init (&fault_local->local_lock, NULL);
- INIT_LIST_HEAD (&fault_local->fill_list);
- fault_local->pending_offset = offset;
- fault_local->pending_size = table->page_size;
- fault_local->inode = ioc_inode;
+ INIT_LIST_HEAD (&fault_local->fill_list);
+ fault_local->pending_offset = offset;
+ fault_local->pending_size = table->page_size;
+ fault_local->inode = ioc_inode;
- gf_log (frame->this->name, GF_LOG_TRACE,
- "stack winding page fault for offset = %"PRId64" with "
- "frame %p", offset, fault_frame);
-
- STACK_WIND (fault_frame, ioc_fault_cbk, FIRST_CHILD(fault_frame->this),
- FIRST_CHILD(fault_frame->this)->fops->readv, fd,
- table->page_size, offset);
- return;
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "stack winding page fault for offset = %"PRId64" with "
+ "frame %p", offset, fault_frame);
+
+ STACK_WIND (fault_frame, ioc_fault_cbk, FIRST_CHILD(fault_frame->this),
+ FIRST_CHILD(fault_frame->this)->fops->readv, fd,
+ table->page_size, offset, 0, NULL);
+ return;
err:
- page = ioc_page_get (ioc_inode, offset);
- if (page != NULL) {
- waitq = ioc_page_error (page, op_ret, op_errno);
- if (waitq != NULL) {
- ioc_waitq_return (waitq);
+ ioc_inode_lock (ioc_inode);
+ {
+ page = __ioc_page_get (ioc_inode, offset);
+ if (page != NULL) {
+ waitq = __ioc_page_error (page, op_ret, op_errno);
}
}
+ ioc_inode_unlock (ioc_inode);
+
+ if (waitq != NULL) {
+ ioc_waitq_return (waitq);
+ }
}
+
int32_t
-ioc_frame_fill (ioc_page_t *page, call_frame_t *frame, off_t offset,
- size_t size)
+__ioc_frame_fill (ioc_page_t *page, call_frame_t *frame, off_t offset,
+ size_t size, int32_t op_errno)
{
- ioc_local_t *local = NULL;
- ioc_fill_t *fill = NULL;
- off_t src_offset = 0;
- off_t dst_offset = 0;
- ssize_t copy_size = 0;
- ioc_inode_t *ioc_inode = NULL;
- ioc_fill_t *new = NULL;
- int8_t found = 0;
- int32_t ret = 0;
-
+ ioc_local_t *local = NULL;
+ ioc_fill_t *fill = NULL;
+ off_t src_offset = 0;
+ off_t dst_offset = 0;
+ ssize_t copy_size = 0;
+ ioc_inode_t *ioc_inode = NULL;
+ ioc_fill_t *new = NULL;
+ int8_t found = 0;
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("io-cache", frame, out);
+
local = frame->local;
+ GF_VALIDATE_OR_GOTO (frame->this->name, local, out);
+
+ if (page == NULL) {
+ gf_log (frame->this->name, GF_LOG_WARNING,
+ "NULL page has been provided to serve read request");
+ local->op_ret = -1;
+ local->op_errno = EINVAL;
+ goto out;
+ }
+
ioc_inode = page->inode;
- gf_log (frame->this->name, GF_LOG_TRACE,
- "frame (%p) offset = %"PRId64" && size = %"GF_PRI_SIZET" "
- "&& page->size = %"GF_PRI_SIZET" && wait_count = %d",
- frame, offset, size, page->size, local->wait_count);
-
- /* immediately move this page to the end of the page_lru list */
- list_move_tail (&page->page_lru, &ioc_inode->page_lru);
- /* fill local->pending_size bytes from local->pending_offset */
- if (local->op_ret != -1 && page->size) {
- if (offset > page->offset)
- /* offset is offset in file, convert it to offset in
- * page */
- src_offset = offset - page->offset;
- /*FIXME: since offset is the offset within page is the
- * else case valid? */
- else
- /* local->pending_offset is in previous page. do not
- * fill until we have filled all previous pages */
- dst_offset = page->offset - offset;
-
- /* we have to copy from offset to either end of this page
- * or till the requested size */
- copy_size = min (page->size - src_offset,
- size - dst_offset);
-
- if (copy_size < 0) {
- /* if page contains fewer bytes and the required offset
- is beyond the page size in the page */
- copy_size = src_offset = 0;
- }
-
- gf_log (page->inode->table->xl->name, GF_LOG_TRACE,
- "copy_size = %"GF_PRI_SIZET" && src_offset = "
- "%"PRId64" && dst_offset = %"PRId64"",
- copy_size, src_offset, dst_offset);
-
- {
- new = CALLOC (1, sizeof (*new));
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "frame (%p) offset = %"PRId64" && size = %"GF_PRI_SIZET" "
+ "&& page->size = %"GF_PRI_SIZET" && wait_count = %d",
+ frame, offset, size, page->size, local->wait_count);
+
+ /* immediately move this page to the end of the page_lru list */
+ list_move_tail (&page->page_lru, &ioc_inode->cache.page_lru);
+ /* fill local->pending_size bytes from local->pending_offset */
+ if (local->op_ret != -1) {
+ local->op_errno = op_errno;
+
+ if (page->size == 0) {
+ goto done;
+ }
+
+ if (offset > page->offset)
+ /* offset is offset in file, convert it to offset in
+ * page */
+ src_offset = offset - page->offset;
+ /*FIXME: since offset is the offset within page is the
+ * else case valid? */
+ else
+ /* local->pending_offset is in previous page. do not
+ * fill until we have filled all previous pages */
+ dst_offset = page->offset - offset;
+
+ /* we have to copy from offset to either end of this page
+ * or till the requested size */
+ copy_size = min (page->size - src_offset,
+ size - dst_offset);
+
+ if (copy_size < 0) {
+ /* if page contains fewer bytes and the required offset
+ is beyond the page size in the page */
+ copy_size = src_offset = 0;
+ }
+
+ gf_log (page->inode->table->xl->name, GF_LOG_TRACE,
+ "copy_size = %"GF_PRI_SIZET" && src_offset = "
+ "%"PRId64" && dst_offset = %"PRId64"",
+ copy_size, src_offset, dst_offset);
+
+ {
+ new = GF_CALLOC (1, sizeof (*new),
+ gf_ioc_mt_ioc_fill_t);
if (new == NULL) {
local->op_ret = -1;
local->op_errno = ENOMEM;
- ret = -1;
- gf_log (page->inode->table->xl->name,
- GF_LOG_ERROR, "out of memory");
goto out;
}
- new->offset = page->offset;
- new->size = copy_size;
- new->iobref = iobref_ref (page->iobref);
- new->count = iov_subset (page->vector,
- page->count,
- src_offset,
- src_offset + copy_size,
- NULL);
-
- new->vector = CALLOC (new->count,
- sizeof (struct iovec));
+ new->offset = page->offset;
+ new->size = copy_size;
+ new->iobref = iobref_ref (page->iobref);
+ new->count = iov_subset (page->vector, page->count,
+ src_offset,
+ src_offset + copy_size,
+ NULL);
+
+ new->vector = GF_CALLOC (new->count,
+ sizeof (struct iovec),
+ gf_ioc_mt_iovec);
if (new->vector == NULL) {
local->op_ret = -1;
local->op_errno = ENOMEM;
iobref_unref (new->iobref);
- FREE (new);
-
- ret = -1;
- gf_log (page->inode->table->xl->name,
- GF_LOG_ERROR, "out of memory");
+ GF_FREE (new);
goto out;
}
- new->count = iov_subset (page->vector,
- page->count,
- src_offset,
- src_offset + copy_size,
- new->vector);
-
+ new->count = iov_subset (page->vector, page->count,
+ src_offset,
+ src_offset + copy_size,
+ new->vector);
+
+ /* add the ioc_fill to fill_list for this frame */
+ if (list_empty (&local->fill_list)) {
+ /* if list is empty, then this is the first
+ * time we are filling frame, add the
+ * ioc_fill_t to the end of list */
+ list_add_tail (&new->list, &local->fill_list);
+ } else {
+ found = 0;
+ /* list is not empty, we need to look for
+ * where this offset fits in list */
+ list_for_each_entry (fill, &local->fill_list,
+ list) {
+ if (fill->offset > new->offset) {
+ found = 1;
+ break;
+ }
+ }
+ if (found) {
+ list_add_tail (&new->list,
+ &fill->list);
+ } else {
+ list_add_tail (&new->list,
+ &local->fill_list);
+ }
+ }
+ }
- /* add the ioc_fill to fill_list for this frame */
- if (list_empty (&local->fill_list)) {
- /* if list is empty, then this is the first
- * time we are filling frame, add the
- * ioc_fill_t to the end of list */
- list_add_tail (&new->list, &local->fill_list);
- } else {
- found = 0;
- /* list is not empty, we need to look for
- * where this offset fits in list */
- list_for_each_entry (fill, &local->fill_list,
- list) {
- if (fill->offset > new->offset) {
- found = 1;
- break;
- }
- }
-
- if (found) {
- found = 0;
- list_add_tail (&new->list,
- &fill->list);
- } else {
- list_add_tail (&new->list,
- &local->fill_list);
- }
- }
- }
- local->op_ret += copy_size;
- }
+ local->op_ret += copy_size;
+ }
+done:
+ ret = 0;
out:
return ret;
}
/*
- * ioc_frame_unwind - frame unwinds only from here
+ * ioc_frame_unwind - frame unwinds only from here
*
* @frame: call frame to unwind
*
@@ -691,84 +797,99 @@ out:
static void
ioc_frame_unwind (call_frame_t *frame)
{
- ioc_local_t *local = NULL;
- ioc_fill_t *fill = NULL, *next = NULL;
- int32_t count = 0;
- struct iovec *vector = NULL;
- int32_t copied = 0;
- struct iobref *iobref = NULL;
- struct stat stbuf = {0,};
- int32_t op_ret = 0, op_errno = 0;
+ ioc_local_t *local = NULL;
+ ioc_fill_t *fill = NULL, *next = NULL;
+ int32_t count = 0;
+ struct iovec *vector = NULL;
+ int32_t copied = 0;
+ struct iobref *iobref = NULL;
+ struct iatt stbuf = {0,};
+ int32_t op_ret = 0, op_errno = 0;
+
+ GF_ASSERT (frame);
local = frame->local;
- // ioc_local_lock (local);
- frame->local = NULL;
- iobref = iobref_new ();
+ if (local == NULL) {
+ gf_log (frame->this->name, GF_LOG_WARNING,
+ "local is NULL");
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ if (local->op_ret < 0) {
+ op_ret = local->op_ret;
+ op_errno = local->op_errno;
+ goto unwind;
+ }
+
+ // ioc_local_lock (local);
+ iobref = iobref_new ();
if (iobref == NULL) {
op_ret = -1;
op_errno = ENOMEM;
- gf_log (frame->this->name, GF_LOG_ERROR, "out of memory");
}
- if (list_empty (&local->fill_list)) {
- gf_log (frame->this->name, GF_LOG_TRACE,
- "frame(%p) has 0 entries in local->fill_list "
- "(offset = %"PRId64" && size = %"GF_PRI_SIZET")",
- frame, local->offset, local->size);
- }
+ if (list_empty (&local->fill_list)) {
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "frame(%p) has 0 entries in local->fill_list "
+ "(offset = %"PRId64" && size = %"GF_PRI_SIZET")",
+ frame, local->offset, local->size);
+ }
- list_for_each_entry (fill, &local->fill_list, list) {
- count += fill->count;
- }
+ list_for_each_entry (fill, &local->fill_list, list) {
+ count += fill->count;
+ }
- vector = CALLOC (count, sizeof (*vector));
+ vector = GF_CALLOC (count, sizeof (*vector), gf_ioc_mt_iovec);
if (vector == NULL) {
op_ret = -1;
op_errno = ENOMEM;
-
- gf_log (frame->this->name, GF_LOG_ERROR, "out of memory");
}
-
- list_for_each_entry_safe (fill, next, &local->fill_list, list) {
- if ((vector != NULL) && (iobref != NULL)) {
+
+ list_for_each_entry_safe (fill, next, &local->fill_list, list) {
+ if ((vector != NULL) && (iobref != NULL)) {
memcpy (((char *)vector) + copied,
fill->vector,
fill->count * sizeof (*vector));
-
+
copied += (fill->count * sizeof (*vector));
iobref_merge (iobref, fill->iobref);
}
- list_del (&fill->list);
- iobref_unref (fill->iobref);
- free (fill->vector);
- free (fill);
- }
-
+ list_del (&fill->list);
+ iobref_unref (fill->iobref);
+ GF_FREE (fill->vector);
+ GF_FREE (fill);
+ }
+
if (op_ret != -1) {
op_ret = iov_length (vector, count);
}
- gf_log (frame->this->name, GF_LOG_TRACE,
- "frame(%p) unwinding with op_ret=%d", frame, op_ret);
+unwind:
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "frame(%p) unwinding with op_ret=%d", frame, op_ret);
- // ioc_local_unlock (local);
+ // ioc_local_unlock (local);
- STACK_UNWIND (frame, op_ret, local->op_errno, vector, count,
- &stbuf, iobref);
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector,
+ count, &stbuf, iobref, NULL);
if (iobref != NULL) {
iobref_unref (iobref);
}
-
+
if (vector != NULL) {
- free (vector);
+ GF_FREE (vector);
vector = NULL;
}
-
- pthread_mutex_destroy (&local->local_lock);
- free (local);
+
+ pthread_mutex_destroy (&local->local_lock);
+ if (local)
+ mem_put (local);
return;
}
@@ -782,60 +903,119 @@ ioc_frame_unwind (call_frame_t *frame)
void
ioc_frame_return (call_frame_t *frame)
{
- ioc_local_t *local = NULL;
- int32_t wait_count = 0;
+ ioc_local_t *local = NULL;
+ int32_t wait_count = 0;
+
+ GF_ASSERT (frame);
local = frame->local;
- assert (local->wait_count > 0);
+ GF_ASSERT (local->wait_count > 0);
- ioc_local_lock (local);
- {
- wait_count = --local->wait_count;
- }
- ioc_local_unlock (local);
+ ioc_local_lock (local);
+ {
+ wait_count = --local->wait_count;
+ }
+ ioc_local_unlock (local);
- if (!wait_count) {
- ioc_frame_unwind (frame);
- }
+ if (!wait_count) {
+ ioc_frame_unwind (frame);
+ }
- return;
+ return;
}
-/*
+/*
* ioc_page_wakeup -
* @page:
*
* to be called only when a frame is waiting on an in-transit page
*/
ioc_waitq_t *
-ioc_page_wakeup (ioc_page_t *page)
+__ioc_page_wakeup (ioc_page_t *page, int32_t op_errno)
{
- ioc_waitq_t *waitq = NULL, *trav = NULL;
- call_frame_t *frame = NULL;
- int32_t ret = -1;
-
- waitq = page->waitq;
- page->waitq = NULL;
-
- trav = waitq;
- page->ready = 1;
-
- gf_log (page->inode->table->xl->name, GF_LOG_TRACE,
- "page is %p && waitq = %p", page, waitq);
-
- for (trav = waitq; trav; trav = trav->next) {
- frame = trav->data;
- ret = ioc_frame_fill (page, frame, trav->pending_offset,
- trav->pending_size);
+ ioc_waitq_t *waitq = NULL, *trav = NULL;
+ call_frame_t *frame = NULL;
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("io-cache", page, out);
+
+ waitq = page->waitq;
+ page->waitq = NULL;
+
+ page->ready = 1;
+
+ gf_log (page->inode->table->xl->name, GF_LOG_TRACE,
+ "page is %p && waitq = %p", page, waitq);
+
+ for (trav = waitq; trav; trav = trav->next) {
+ frame = trav->data;
+ ret = __ioc_frame_fill (page, frame, trav->pending_offset,
+ trav->pending_size, op_errno);
if (ret == -1) {
break;
}
- }
-
- return waitq;
+ }
+
+ if (page->stale) {
+ __ioc_page_destroy (page);
+ }
+
+out:
+ return waitq;
}
+
+/*
+ * ioc_page_error -
+ * @page:
+ * @op_ret:
+ * @op_errno:
+ *
+ */
+ioc_waitq_t *
+__ioc_page_error (ioc_page_t *page, int32_t op_ret, int32_t op_errno)
+{
+ ioc_waitq_t *waitq = NULL, *trav = NULL;
+ call_frame_t *frame = NULL;
+ int64_t ret = 0;
+ ioc_table_t *table = NULL;
+ ioc_local_t *local = NULL;
+
+ GF_VALIDATE_OR_GOTO ("io-cache", page, out);
+
+ waitq = page->waitq;
+ page->waitq = NULL;
+
+ gf_log (page->inode->table->xl->name, GF_LOG_WARNING,
+ "page error for page = %p & waitq = %p", page, waitq);
+
+ for (trav = waitq; trav; trav = trav->next) {
+
+ frame = trav->data;
+
+ local = frame->local;
+ ioc_local_lock (local);
+ {
+ if (local->op_ret != -1) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ }
+ }
+ ioc_local_unlock (local);
+ }
+
+ table = page->inode->table;
+ ret = __ioc_page_destroy (page);
+
+ if (ret != -1) {
+ table->cache_used -= ret;
+ }
+
+out:
+ return waitq;
+}
+
/*
* ioc_page_error -
* @page:
@@ -846,39 +1026,18 @@ ioc_page_wakeup (ioc_page_t *page)
ioc_waitq_t *
ioc_page_error (ioc_page_t *page, int32_t op_ret, int32_t op_errno)
{
- ioc_waitq_t *waitq = NULL, *trav = NULL;
- call_frame_t *frame = NULL;
- int64_t ret = 0;
- ioc_table_t *table = NULL;
- ioc_local_t *local = NULL;
-
- waitq = page->waitq;
- page->waitq = NULL;
-
- gf_log (page->inode->table->xl->name, GF_LOG_DEBUG,
- "page error for page = %p & waitq = %p", page, waitq);
-
- for (trav = waitq; trav; trav = trav->next) {
-
- frame = trav->data;
-
- local = frame->local;
- ioc_local_lock (local);
- {
- if (local->op_ret != -1) {
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- }
- }
- ioc_local_unlock (local);
- }
-
- table = page->inode->table;
- ret = ioc_page_destroy (page);
-
- if (ret != -1) {
- table->cache_used -= ret;
- }
-
- return waitq;
+ ioc_waitq_t *waitq = NULL;
+
+ if (page == NULL) {
+ goto out;
+ }
+
+ ioc_inode_lock (page->inode);
+ {
+ waitq = __ioc_page_error (page, op_ret, op_errno);
+ }
+ ioc_inode_unlock (page->inode);
+
+out:
+ return waitq;
}
diff --git a/xlators/performance/io-threads/src/Makefile.am b/xlators/performance/io-threads/src/Makefile.am
index 38dea3eb7..d63042e7c 100644
--- a/xlators/performance/io-threads/src/Makefile.am
+++ b/xlators/performance/io-threads/src/Makefile.am
@@ -1,14 +1,15 @@
xlator_LTLIBRARIES = io-threads.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance
-io_threads_la_LDFLAGS = -module -avoidversion
+io_threads_la_LDFLAGS = -module -avoid-version
io_threads_la_SOURCES = io-threads.c
io_threads_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = io-threads.h
+noinst_HEADERS = io-threads.h iot-mem-types.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/performance/io-threads/src/io-threads.c b/xlators/performance/io-threads/src/io-threads.c
index 118ae4242..bbcf4ed26 100644
--- a/xlators/performance/io-threads/src/io-threads.c
+++ b/xlators/performance/io-threads/src/io-threads.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
@@ -33,315 +24,343 @@
#include <time.h>
#include "locking.h"
-typedef void *(*iot_worker_fn)(void*);
+void *iot_worker (void *arg);
+int iot_workers_scale (iot_conf_t *conf);
+int __iot_workers_scale (iot_conf_t *conf);
+struct volume_options options[];
-void
-iot_stop_worker (iot_worker_t *worker);
+call_stub_t *
+__iot_dequeue (iot_conf_t *conf, int *pri, struct timespec *sleep)
+{
+ call_stub_t *stub = NULL;
+ int i = 0;
+ struct timeval curtv = {0,}, difftv = {0,};
+
+ *pri = -1;
+ sleep->tv_sec = 0;
+ sleep->tv_nsec = 0;
+ for (i = 0; i < IOT_PRI_MAX; i++) {
+ if (list_empty (&conf->reqs[i]) ||
+ (conf->ac_iot_count[i] >= conf->ac_iot_limit[i]))
+ continue;
+
+ if (i == IOT_PRI_LEAST) {
+ pthread_mutex_lock(&conf->throttle.lock);
+ if (!conf->throttle.sample_time.tv_sec) {
+ /* initialize */
+ gettimeofday(&conf->throttle.sample_time, NULL);
+ } else {
+ /*
+ * Maintain a running count of least priority
+ * operations that are handled over a particular
+ * time interval. The count is provided via
+ * state dump and is used as a measure against
+ * least priority op throttling.
+ */
+ gettimeofday(&curtv, NULL);
+ timersub(&curtv, &conf->throttle.sample_time,
+ &difftv);
+ if (difftv.tv_sec >= IOT_LEAST_THROTTLE_DELAY) {
+ conf->throttle.cached_rate =
+ conf->throttle.sample_cnt;
+ conf->throttle.sample_cnt = 0;
+ conf->throttle.sample_time = curtv;
+ }
+
+ /*
+ * If we're over the configured rate limit,
+ * provide an absolute time to the caller that
+ * represents the soonest we're allowed to
+ * return another least priority request.
+ */
+ if (conf->throttle.rate_limit &&
+ conf->throttle.sample_cnt >=
+ conf->throttle.rate_limit) {
+ struct timeval delay;
+ delay.tv_sec = IOT_LEAST_THROTTLE_DELAY;
+ delay.tv_usec = 0;
+
+ timeradd(&conf->throttle.sample_time,
+ &delay, &curtv);
+ TIMEVAL_TO_TIMESPEC(&curtv, sleep);
+
+ pthread_mutex_unlock(
+ &conf->throttle.lock);
+ break;
+ }
+ }
+ conf->throttle.sample_cnt++;
+ pthread_mutex_unlock(&conf->throttle.lock);
+ }
+
+ stub = list_entry (conf->reqs[i].next, call_stub_t, list);
+ conf->ac_iot_count[i]++;
+ *pri = i;
+ break;
+ }
+
+ if (!stub)
+ return NULL;
+
+ conf->queue_size--;
+ conf->queue_sizes[*pri]--;
+ list_del_init (&stub->list);
+
+ return stub;
+}
-void
-iot_stop_workers (iot_worker_t **workers, int start_idx, int count);
void
-_iot_queue (iot_worker_t *worker, iot_request_t *req);
-
-iot_request_t *
-iot_init_request (iot_worker_t *conf, call_stub_t *stub);
-
-int
-iot_startup_workers (iot_worker_t **workers, int start_idx, int count,
- iot_worker_fn workerfunc);
-
-void *
-iot_worker_unordered (void *arg);
-
-void *
-iot_worker_ordered (void *arg);
-
-int
-iot_startup_worker (iot_worker_t *worker, iot_worker_fn workerfunc);
+__iot_enqueue (iot_conf_t *conf, call_stub_t *stub, int pri)
+{
+ if (pri < 0 || pri >= IOT_PRI_MAX)
+ pri = IOT_PRI_MAX-1;
-void
-iot_destroy_request (iot_worker_t *worker, iot_request_t * req);
+ list_add_tail (&stub->list, &conf->reqs[pri]);
-void
-iot_notify_worker (iot_worker_t *worker)
-{
-#ifndef HAVE_SPINLOCK
- pthread_cond_broadcast (&worker->notifier);
-#else
- sem_post (&worker->notifier);
-#endif
+ conf->queue_size++;
+ conf->queue_sizes[pri]++;
return;
}
-int
-iot_notify_wait (iot_worker_t *worker, int idletime)
-{
- struct timeval tv;
- struct timespec ts = {0, };
- int waitres = 0;
-
- gettimeofday (&tv, NULL);
- /* Slightly skew the idle time for threads so that, we dont
- * have all of them rushing to exit at the same time, if
- * they've been idle.
- */
- ts.tv_sec = skew_sec_idle_time (tv.tv_sec + idletime);
-#ifndef HAVE_SPINLOCK
- waitres = pthread_cond_timedwait (&worker->notifier, &worker->qlock,
- &ts);
-#else
- UNLOCK (&worker->qlock);
- errno = 0;
- waitres = sem_timedwait (&worker->notifier, &ts);
- LOCK (&worker->qlock);
- if (waitres < 0)
- waitres = errno;
-#endif
-
- return waitres;
-}
+void *
+iot_worker (void *data)
+{
+ iot_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+ call_stub_t *stub = NULL;
+ struct timespec sleep_till = {0, };
+ int ret = 0;
+ int pri = -1;
+ char timeout = 0;
+ char bye = 0;
+ struct timespec sleep = {0,};
+
+ conf = data;
+ this = conf->this;
+ THIS = this;
+
+ for (;;) {
+ sleep_till.tv_sec = time (NULL) + conf->idle_time;
+
+ pthread_mutex_lock (&conf->mutex);
+ {
+ if (pri != -1) {
+ conf->ac_iot_count[pri]--;
+ pri = -1;
+ }
+ while (conf->queue_size == 0) {
+ conf->sleep_count++;
+
+ ret = pthread_cond_timedwait (&conf->cond,
+ &conf->mutex,
+ &sleep_till);
+ conf->sleep_count--;
+
+ if (ret == ETIMEDOUT) {
+ timeout = 1;
+ break;
+ }
+ }
-void
-iot_notify_init (iot_worker_t *worker)
-{
- if (worker == NULL)
- return;
+ if (timeout) {
+ if (conf->curr_count > IOT_MIN_THREADS) {
+ conf->curr_count--;
+ bye = 1;
+ gf_log (conf->this->name, GF_LOG_DEBUG,
+ "timeout, terminated. conf->curr_count=%d",
+ conf->curr_count);
+ } else {
+ timeout = 0;
+ }
+ }
- LOCK_INIT (&worker->qlock);
+ stub = __iot_dequeue (conf, &pri, &sleep);
+ if (!stub && (sleep.tv_sec || sleep.tv_nsec)) {
+ pthread_cond_timedwait(&conf->cond,
+ &conf->mutex, &sleep);
+ pthread_mutex_unlock(&conf->mutex);
+ continue;
+ }
+ }
+ pthread_mutex_unlock (&conf->mutex);
-#ifndef HAVE_SPINLOCK
- pthread_cond_init (&worker->notifier, NULL);
-#else
- sem_init (&worker->notifier, 0, 0);
-#endif
+ if (stub) /* guard against spurious wakeups */
+ call_resume (stub);
- return;
-}
+ if (bye)
+ break;
+ }
-/* I know this function modularizes things a bit too much,
- * but it is easier on the eyes to read this than see all that locking,
- * queueing, and thread firing in the same curly block, as was the
- * case before this function.
- */
-int
-iot_request_queue_and_thread_fire (iot_worker_t *worker,
- iot_worker_fn workerfunc, iot_request_t *req)
-{
- int ret = -1;
- LOCK (&worker->qlock);
- {
- if (iot_worker_active (worker)) {
- _iot_queue (worker, req);
- ret = 0;
- }else {
- ret = iot_startup_worker (worker, workerfunc);
- if (ret < 0) {
- goto unlock;
- }
- _iot_queue (worker, req);
+ if (pri != -1) {
+ pthread_mutex_lock (&conf->mutex);
+ {
+ conf->ac_iot_count[pri]--;
}
+ pthread_mutex_unlock (&conf->mutex);
}
-unlock:
- UNLOCK (&worker->qlock);
-
- return ret;
+ return NULL;
}
int
-iot_unordered_request_balancer (iot_conf_t *conf)
+do_iot_schedule (iot_conf_t *conf, call_stub_t *stub, int pri)
{
- long int rand = 0;
- int idx = 0;
-
- /* Decide which thread will service the request.
- * FIXME: This should change into some form of load-balancing.
- * */
- rand = random ();
-
- /* If scaling is on, we can choose from any thread
- * that has been allocated upto, max_o_threads, but
- * with scaling off, we'll never have threads more
- * than min_o_threads.
- */
- if (iot_unordered_scaling_on (conf))
- idx = (rand % conf->max_u_threads);
- else
- idx = (rand % conf->min_u_threads);
+ int ret = 0;
- return idx;
-}
-
-
-int
-iot_schedule_unordered (iot_conf_t *conf, inode_t *inode, call_stub_t *stub)
-{
- int32_t idx = 0;
- iot_worker_t *selected_worker = NULL;
- iot_request_t *req = NULL;
- int ret = -1;
+ pthread_mutex_lock (&conf->mutex);
+ {
+ __iot_enqueue (conf, stub, pri);
- idx = iot_unordered_request_balancer (conf);
- selected_worker = conf->uworkers[idx];
+ pthread_cond_signal (&conf->cond);
- req = iot_init_request (selected_worker, stub);
- if (req == NULL) {
- ret = -ENOMEM;
- goto out;
+ ret = __iot_workers_scale (conf);
}
+ pthread_mutex_unlock (&conf->mutex);
- ret = iot_request_queue_and_thread_fire (selected_worker,
- iot_worker_unordered, req);
- if (ret < 0) {
- iot_destroy_request (selected_worker, req);
- }
-out:
return ret;
}
-
-/* Only to be used with ordered requests.
- */
-uint64_t
-iot_create_inode_worker_assoc (iot_conf_t * conf, inode_t * inode)
+char*
+iot_get_pri_meaning (iot_pri_t pri)
{
- long int rand = 0;
- uint64_t idx = 0;
-
- rand = random ();
- /* If scaling is on, we can choose from any thread
- * that has been allocated upto, max_o_threads, but
- * with scaling off, we'll never have threads more
- * than min_o_threads.
- */
- if (iot_ordered_scaling_on (conf))
- idx = (rand % conf->max_o_threads);
- else
- idx = (rand % conf->min_o_threads);
-
- __inode_ctx_put (inode, conf->this, idx);
-
- return idx;
-}
-
-
-/* Assumes inode lock is held. */
-int32_t
-iot_ordered_request_balancer (iot_conf_t *conf, inode_t *inode, uint64_t *idx)
-{
- int ret = -1;
-
- if (__inode_ctx_get (inode, conf->this, idx) < 0)
- *idx = iot_create_inode_worker_assoc (conf, inode);
- else {
- /* Sanity check to ensure the idx received from the inode
- * context is within bounds. We're a bit optimistic in
- * assuming that if an index is within bounds, it is
- * not corrupted. idx is uint so we dont check for less
- * than 0.
- */
- if ((*idx >= (uint64_t)conf->max_o_threads)) {
- gf_log (conf->this->name, GF_LOG_DEBUG,
- "inode context returned insane thread index %"
- PRIu64, *idx);
- ret = -EINVAL;
- goto out;
- }
+ char *name = NULL;
+ switch (pri) {
+ case IOT_PRI_HI:
+ name = "fast";
+ break;
+ case IOT_PRI_NORMAL:
+ name = "normal";
+ break;
+ case IOT_PRI_LO:
+ name = "slow";
+ break;
+ case IOT_PRI_LEAST:
+ name = "least priority";
+ break;
+ case IOT_PRI_MAX:
+ name = "invalid";
+ break;
}
- ret = 0;
-out:
- return ret;
+ return name;
}
-
int
-iot_schedule_ordered (iot_conf_t *conf, inode_t *inode, call_stub_t *stub)
+iot_schedule (call_frame_t *frame, xlator_t *this, call_stub_t *stub)
{
- uint64_t idx = 0;
- iot_worker_t *selected_worker = NULL;
- iot_request_t *req = NULL;
- int balstatus = 0, ret = -1;
+ int ret = -1;
+ iot_pri_t pri = IOT_PRI_MAX - 1;
+ iot_conf_t *conf = this->private;
- if (inode == NULL) {
- gf_log (conf->this->name, GF_LOG_DEBUG,
- "Got NULL inode for ordered request");
- ret = -EINVAL;
+ if ((frame->root->pid < GF_CLIENT_PID_MAX) && conf->least_priority) {
+ pri = IOT_PRI_LEAST;
goto out;
}
- LOCK (&inode->lock);
- {
- balstatus = iot_ordered_request_balancer (conf, inode, &idx);
- if (balstatus < 0) {
- gf_log (conf->this->name, GF_LOG_DEBUG,
- "Insane worker index. Unwinding stack");
- ret = -ECANCELED;
- goto unlock_out;
- }
- /* inode lock once acquired, cannot be left here
- * because other gluster main threads might be
- * contending on it to append a request for this file.
- * So we'll also leave the lock only after we've
- * added the request to the worker queue.
- */
- selected_worker = conf->oworkers[idx];
-
- req = iot_init_request (selected_worker, stub);
- if (req == NULL) {
- gf_log (conf->this->name, GF_LOG_ERROR,"out of memory");
- ret = -ENOMEM;
- goto unlock_out;
- }
-
- ret = iot_request_queue_and_thread_fire (selected_worker,
- iot_worker_ordered,
- req);
+ switch (stub->fop) {
+ case GF_FOP_OPEN:
+ case GF_FOP_STAT:
+ case GF_FOP_FSTAT:
+ case GF_FOP_LOOKUP:
+ case GF_FOP_ACCESS:
+ case GF_FOP_READLINK:
+ case GF_FOP_OPENDIR:
+ case GF_FOP_STATFS:
+ case GF_FOP_READDIR:
+ case GF_FOP_READDIRP:
+ pri = IOT_PRI_HI;
+ break;
+
+ case GF_FOP_CREATE:
+ case GF_FOP_FLUSH:
+ case GF_FOP_LK:
+ case GF_FOP_INODELK:
+ case GF_FOP_FINODELK:
+ case GF_FOP_ENTRYLK:
+ case GF_FOP_FENTRYLK:
+ case GF_FOP_UNLINK:
+ case GF_FOP_SETATTR:
+ case GF_FOP_FSETATTR:
+ case GF_FOP_MKNOD:
+ case GF_FOP_MKDIR:
+ case GF_FOP_RMDIR:
+ case GF_FOP_SYMLINK:
+ case GF_FOP_RENAME:
+ case GF_FOP_LINK:
+ case GF_FOP_SETXATTR:
+ case GF_FOP_GETXATTR:
+ case GF_FOP_FGETXATTR:
+ case GF_FOP_FSETXATTR:
+ case GF_FOP_REMOVEXATTR:
+ case GF_FOP_FREMOVEXATTR:
+ pri = IOT_PRI_NORMAL;
+ break;
+
+ case GF_FOP_READ:
+ case GF_FOP_WRITE:
+ case GF_FOP_FSYNC:
+ case GF_FOP_TRUNCATE:
+ case GF_FOP_FTRUNCATE:
+ case GF_FOP_FSYNCDIR:
+ case GF_FOP_XATTROP:
+ case GF_FOP_FXATTROP:
+ case GF_FOP_RCHECKSUM:
+ case GF_FOP_FALLOCATE:
+ case GF_FOP_DISCARD:
+ case GF_FOP_ZEROFILL:
+ pri = IOT_PRI_LO;
+ break;
+
+ case GF_FOP_NULL:
+ case GF_FOP_FORGET:
+ case GF_FOP_RELEASE:
+ case GF_FOP_RELEASEDIR:
+ case GF_FOP_GETSPEC:
+ case GF_FOP_MAXVALUE:
+ //fail compilation on missing fop
+ //new fop must choose priority.
+ break;
}
-unlock_out:
- UNLOCK (&inode->lock);
-
out:
- if (ret < 0) {
- if (req != NULL) {
- iot_destroy_request (selected_worker, req);
- }
- }
+ gf_log (this->name, GF_LOG_DEBUG, "%s scheduled as %s fop",
+ gf_fop_list[stub->fop], iot_get_pri_meaning (pri));
+ ret = do_iot_schedule (this->private, stub, pri);
return ret;
}
-
int
iot_lookup_cbk (call_frame_t *frame, void * cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct stat *buf, dict_t *xattr)
+ inode_t *inode, struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
{
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf, xattr);
+ STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf, xdata,
+ postparent);
return 0;
}
int
iot_lookup_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *xattr_req)
+ dict_t *xdata)
{
STACK_WIND (frame, iot_lookup_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->lookup,
- loc, xattr_req);
+ loc, xdata);
return 0;
}
int
-iot_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
+iot_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
call_stub_t *stub = NULL;
int ret = -1;
- stub = fop_lookup_stub (frame, iot_lookup_wrapper, loc, xattr_req);
+ stub = fop_lookup_stub (frame, iot_lookup_wrapper, loc, xdata);
if (!stub) {
gf_log (this->name, GF_LOG_ERROR,
"cannot create lookup stub (out of memory)");
@@ -349,15 +368,15 @@ iot_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
goto out;
}
- ret = iot_schedule_unordered ((iot_conf_t *)this->private, loc->inode,
- stub);
+ ret = iot_schedule (frame, this, stub);
out:
if (ret < 0) {
if (stub != NULL) {
call_stub_destroy (stub);
}
- STACK_UNWIND (frame, -1, -ret, NULL, NULL, NULL);
+ STACK_UNWIND_STRICT (lookup, frame, -1, -ret, NULL, NULL, NULL,
+ NULL);
}
return 0;
@@ -365,210 +384,103 @@ out:
int
-iot_chmod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+iot_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, preop, postop,
+ xdata);
return 0;
}
int
-iot_chmod_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- mode_t mode)
+iot_setattr_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- STACK_WIND (frame, iot_chmod_cbk,
+ STACK_WIND (frame, iot_setattr_cbk,
FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->chmod,
- loc, mode);
+ FIRST_CHILD (this)->fops->setattr,
+ loc, stbuf, valid, xdata);
return 0;
}
int
-iot_chmod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode)
+iot_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
call_stub_t *stub = NULL;
- fd_t *fd = NULL;
- int ret = -1;
-
- stub = fop_chmod_stub (frame, iot_chmod_wrapper, loc, mode);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create chmod stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- fd = fd_lookup (loc->inode, frame->root->pid);
- if (fd == NULL)
- ret = iot_schedule_unordered ((iot_conf_t *)this->private,
- loc->inode, stub);
- else {
- ret = iot_schedule_ordered ((iot_conf_t *)this->private,
- loc->inode, stub);
- fd_unref (fd);
- }
-
-out:
- if (ret < 0) {
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
-
- STACK_UNWIND (frame, -1, -ret, NULL);
- }
-
- return 0;
-}
-
-
-int
-iot_fchmod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
-
-
-int
-iot_fchmod_wrapper (call_frame_t *frame, xlator_t *this,
- fd_t *fd, mode_t mode)
-{
- STACK_WIND (frame, iot_fchmod_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fchmod, fd, mode);
- return 0;
-}
-
-
-int
-iot_fchmod (call_frame_t *frame, xlator_t *this, fd_t *fd, mode_t mode)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
+ int ret = -1;
- stub = fop_fchmod_stub (frame, iot_fchmod_wrapper, fd, mode);
+ stub = fop_setattr_stub (frame, iot_setattr_wrapper, loc, stbuf, valid,
+ xdata);
if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create fchmod stub"
- "(out of memory)");
+ gf_log (this->name, GF_LOG_ERROR, "Cannot create setattr stub"
+ "(Out of memory)");
ret = -ENOMEM;
goto out;
}
- ret = iot_schedule_ordered ((iot_conf_t *)this->private, fd->inode,
- stub);
+ ret = iot_schedule (frame, this, stub);
out:
if (ret < 0) {
- STACK_UNWIND (frame, -1, -ret, NULL);
-
if (stub != NULL) {
call_stub_destroy (stub);
}
- }
- return 0;
-}
-
-
-int
-iot_chown_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
-
-
-int
-iot_chown_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc, uid_t uid,
- gid_t gid)
-{
- STACK_WIND (frame, iot_chown_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->chown,
- loc, uid, gid);
- return 0;
-}
-
-int
-iot_chown (call_frame_t *frame, xlator_t *this, loc_t *loc, uid_t uid,
- gid_t gid)
-{
- call_stub_t *stub = NULL;
- fd_t *fd = NULL;
- int ret = -1;
-
- stub = fop_chown_stub (frame, iot_chown_wrapper, loc, uid, gid);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create chown stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- fd = fd_lookup (loc->inode, frame->root->pid);
- if (fd == NULL)
- ret = iot_schedule_unordered ((iot_conf_t *)this->private,
- loc->inode, stub);
- else {
- ret = iot_schedule_ordered ((iot_conf_t *)this->private,
- loc->inode, stub);
- fd_unref (fd);
+ STACK_UNWIND_STRICT (setattr, frame, -1, -ret, NULL, NULL, NULL);
}
-out:
- if (ret < 0) {
- STACK_UNWIND (frame, -1, -ret, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
return 0;
}
int
-iot_fchown_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+iot_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ STACK_UNWIND_STRICT (fsetattr, frame, op_ret, op_errno, preop, postop,
+ xdata);
return 0;
}
int
-iot_fchown_wrapper (call_frame_t *frame, xlator_t *this,
- fd_t *fd, uid_t uid, gid_t gid)
+iot_fsetattr_wrapper (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- STACK_WIND (frame, iot_fchown_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fchown, fd, uid, gid);
+ STACK_WIND (frame, iot_fsetattr_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fsetattr, fd, stbuf, valid,
+ xdata);
return 0;
}
int
-iot_fchown (call_frame_t *frame, xlator_t *this, fd_t *fd, uid_t uid, gid_t gid)
+iot_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
call_stub_t *stub = NULL;
- int ret = -1;
+ int ret = -1;
- stub = fop_fchown_stub (frame, iot_fchown_wrapper, fd, uid, gid);
+ stub = fop_fsetattr_stub (frame, iot_fsetattr_wrapper, fd, stbuf,
+ valid, xdata);
if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create fchown stub"
+ gf_log (this->name, GF_LOG_ERROR, "cannot create fsetattr stub"
"(out of memory)");
ret = -ENOMEM;
goto out;
}
- ret = iot_schedule_ordered ((iot_conf_t *)this->private, fd->inode,
- stub);
+ ret = iot_schedule (frame, this, stub);
out:
if (ret < 0) {
- STACK_UNWIND (frame, -1, -ret, NULL);
+ STACK_UNWIND_STRICT (fsetattr, frame, -1, -ret, NULL, NULL,
+ NULL);
if (stub != NULL) {
call_stub_destroy (stub);
}
@@ -579,30 +491,31 @@ out:
int
iot_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (access, frame, op_ret, op_errno, xdata);
return 0;
}
int
iot_access_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t mask)
+ int32_t mask, dict_t *xdata)
{
STACK_WIND (frame, iot_access_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->access, loc, mask);
+ FIRST_CHILD (this)->fops->access, loc, mask, xdata);
return 0;
}
int
-iot_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask)
+iot_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata)
{
call_stub_t *stub = NULL;
int ret = -1;
- stub = fop_access_stub (frame, iot_access_wrapper, loc, mask);
+ stub = fop_access_stub (frame, iot_access_wrapper, loc, mask, xdata);
if (!stub) {
gf_log (this->name, GF_LOG_ERROR, "cannot create access stub"
"(out of memory)");
@@ -610,11 +523,10 @@ iot_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask)
goto out;
}
- ret = iot_schedule_unordered ((iot_conf_t *)this->private, loc->inode,
- stub);
+ ret = iot_schedule (frame, this, stub);
out:
if (ret < 0) {
- STACK_UNWIND (frame, -1, -ret);
+ STACK_UNWIND_STRICT (access, frame, -1, -ret, NULL);
if (stub != NULL) {
call_stub_destroy (stub);
@@ -626,32 +538,34 @@ out:
int
iot_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, const char *path)
+ int32_t op_ret, int32_t op_errno, const char *path,
+ struct iatt *stbuf, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, path);
+ STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, path, stbuf,
+ xdata);
return 0;
}
int
iot_readlink_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- size_t size)
+ size_t size, dict_t *xdata)
{
STACK_WIND (frame, iot_readlink_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->readlink,
- loc, size);
+ loc, size, xdata);
return 0;
}
int
-iot_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size)
+iot_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size, dict_t *xdata)
{
call_stub_t *stub = NULL;
int ret = -1;
- stub = fop_readlink_stub (frame, iot_readlink_wrapper, loc, size);
+ stub = fop_readlink_stub (frame, iot_readlink_wrapper, loc, size, xdata);
if (!stub) {
gf_log (this->name, GF_LOG_ERROR, "cannot create readlink stub"
"(out of memory)");
@@ -659,12 +573,11 @@ iot_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size)
goto out;
}
- ret = iot_schedule_unordered ((iot_conf_t *)this->private, loc->inode,
- stub);
+ ret = iot_schedule (frame, this, stub);
out:
if (ret < 0) {
- STACK_UNWIND (frame, -1, -ret, NULL);
+ STACK_UNWIND_STRICT (readlink, frame, -1, -ret, NULL, NULL, NULL);
if (stub != NULL) {
call_stub_destroy (stub);
@@ -678,31 +591,35 @@ out:
int
iot_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct stat *buf)
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
+ STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
return 0;
}
int
iot_mknod_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dev_t rdev)
+ dev_t rdev, mode_t umask, dict_t *xdata)
{
STACK_WIND (frame, iot_mknod_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->mknod, loc, mode, rdev);
+ FIRST_CHILD (this)->fops->mknod, loc, mode, rdev, umask,
+ xdata);
return 0;
}
int
iot_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dev_t rdev)
+ dev_t rdev, mode_t umask, dict_t *xdata)
{
call_stub_t *stub = NULL;
int ret = -1;
- stub = fop_mknod_stub (frame, iot_mknod_wrapper, loc, mode, rdev);
+ stub = fop_mknod_stub (frame, iot_mknod_wrapper, loc, mode, rdev,
+ umask, xdata);
if (!stub) {
gf_log (this->name, GF_LOG_ERROR, "cannot create mknod stub"
"(out of memory)");
@@ -710,12 +627,12 @@ iot_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
goto out;
}
- ret = iot_schedule_unordered ((iot_conf_t *)this->private, loc->inode,
- stub);
+ ret = iot_schedule (frame, this, stub);
out:
if (ret < 0) {
- STACK_UNWIND (frame, -1, -ret, NULL, NULL);
+ STACK_UNWIND_STRICT (mknod, frame, -1, -ret, NULL, NULL, NULL,
+ NULL, NULL);
if (stub != NULL) {
call_stub_destroy (stub);
@@ -728,29 +645,34 @@ out:
int
iot_mkdir_cbk (call_frame_t *frame, void * cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct stat *buf)
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
+ STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
return 0;
}
int
-iot_mkdir_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode)
+iot_mkdir_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
{
STACK_WIND (frame, iot_mkdir_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->mkdir, loc, mode);
+ FIRST_CHILD (this)->fops->mkdir, loc, mode, umask, xdata);
return 0;
}
int
-iot_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode)
+iot_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
{
call_stub_t *stub = NULL;
int ret = -1;
- stub = fop_mkdir_stub (frame, iot_mkdir_wrapper, loc, mode);
+ stub = fop_mkdir_stub (frame, iot_mkdir_wrapper, loc, mode, umask,
+ xdata);
if (!stub) {
gf_log (this->name, GF_LOG_ERROR, "cannot create mkdir stub"
"(out of memory)");
@@ -758,12 +680,12 @@ iot_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode)
goto out;
}
- ret = iot_schedule_unordered ((iot_conf_t *)this->private, loc->inode,
- stub);
+ ret = iot_schedule (frame, this, stub);
out:
if (ret < 0) {
- STACK_UNWIND (frame, -1, -ret, NULL, NULL);
+ STACK_UNWIND_STRICT (mkdir, frame, -1, -ret, NULL, NULL, NULL,
+ NULL, NULL);
if (stub != NULL) {
call_stub_destroy (stub);
@@ -775,29 +697,31 @@ out:
int
iot_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno, preparent,
+ postparent, xdata);
return 0;
}
int
-iot_rmdir_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc)
+iot_rmdir_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, dict_t *xdata)
{
STACK_WIND (frame, iot_rmdir_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->rmdir, loc);
+ FIRST_CHILD (this)->fops->rmdir, loc, flags, xdata);
return 0;
}
int
-iot_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc)
+iot_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, dict_t *xdata)
{
call_stub_t *stub = NULL;
int ret = -1;
- stub = fop_rmdir_stub (frame, iot_rmdir_wrapper, loc);
+ stub = fop_rmdir_stub (frame, iot_rmdir_wrapper, loc, flags, xdata);
if (!stub) {
gf_log (this->name, GF_LOG_ERROR, "cannot create rmdir stub"
"(out of memory)");
@@ -805,12 +729,11 @@ iot_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc)
goto out;
}
- ret = iot_schedule_unordered ((iot_conf_t *)this->private, loc->inode,
- stub);
+ ret = iot_schedule (frame, this, stub);
out:
if (ret < 0) {
- STACK_UNWIND (frame, -1, -ret);
-
+ STACK_UNWIND_STRICT (rmdir, frame, -1, -ret, NULL, NULL, NULL);
+
if (stub != NULL) {
call_stub_destroy (stub);
}
@@ -822,31 +745,35 @@ out:
int
iot_symlink_cbk (call_frame_t *frame, void * cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct stat *buf)
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
+ STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
return 0;
}
int
iot_symlink_wrapper (call_frame_t *frame, xlator_t *this, const char *linkname,
- loc_t *loc)
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
STACK_WIND (frame, iot_symlink_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->symlink, linkname, loc);
+ FIRST_CHILD (this)->fops->symlink, linkname, loc, umask,
+ xdata);
return 0;
}
int
iot_symlink (call_frame_t *frame, xlator_t *this, const char *linkname,
- loc_t *loc)
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
call_stub_t *stub = NULL;
int ret = -1;
- stub = fop_symlink_stub (frame, iot_symlink_wrapper, linkname, loc);
+ stub = fop_symlink_stub (frame, iot_symlink_wrapper, linkname, loc,
+ umask, xdata);
if (!stub) {
gf_log (this->name, GF_LOG_ERROR, "cannot create symlink stub"
"(out of memory)");
@@ -854,13 +781,12 @@ iot_symlink (call_frame_t *frame, xlator_t *this, const char *linkname,
goto out;
}
- ret = iot_schedule_unordered ((iot_conf_t *)this->private, loc->inode,
- stub);
+ ret = iot_schedule (frame, this, stub);
out:
if (ret < 0) {
- STACK_UNWIND (frame, -1, -ret, NULL, NULL);
-
+ STACK_UNWIND_STRICT (symlink, frame, -1, -ret, NULL, NULL, NULL,
+ NULL, NULL);
if (stub != NULL) {
call_stub_destroy (stub);
}
@@ -872,30 +798,35 @@ out:
int
iot_rename_cbk (call_frame_t *frame, void * cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, buf, preoldparent,
+ postoldparent, prenewparent, postnewparent, xdata);
return 0;
}
int
iot_rename_wrapper (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
- loc_t *newloc)
+ loc_t *newloc, dict_t *xdata)
{
STACK_WIND (frame, iot_rename_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->rename, oldloc, newloc);
+ FIRST_CHILD (this)->fops->rename, oldloc, newloc, xdata);
return 0;
}
int
-iot_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
+iot_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
call_stub_t *stub = NULL;
int ret = -1;
- stub = fop_rename_stub (frame, iot_rename_wrapper, oldloc, newloc);
+ stub = fop_rename_stub (frame, iot_rename_wrapper, oldloc, newloc, xdata);
if (!stub) {
gf_log (this->name, GF_LOG_DEBUG, "cannot create rename stub"
"(out of memory)");
@@ -903,49 +834,50 @@ iot_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
goto out;
}
- ret = iot_schedule_unordered ((iot_conf_t *)this->private,
- oldloc->inode, stub);
+ ret = iot_schedule (frame, this, stub);
out:
if (ret < 0) {
- STACK_UNWIND (frame, -1, -ret, NULL);
-
+ STACK_UNWIND_STRICT (rename, frame, -1, -ret, NULL, NULL, NULL,
+ NULL, NULL, NULL);
if (stub != NULL) {
call_stub_destroy (stub);
}
}
-
+
return 0;
}
int
iot_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, fd_t *fd)
+ int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, fd);
+ STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
return 0;
}
int
iot_open_wrapper (call_frame_t * frame, xlator_t * this, loc_t *loc,
- int32_t flags, fd_t * fd)
+ int32_t flags, fd_t * fd, dict_t *xdata)
{
STACK_WIND (frame, iot_open_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->open, loc, flags, fd);
+ FIRST_CHILD (this)->fops->open, loc, flags, fd,
+ xdata);
return 0;
}
int
iot_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd)
+ fd_t *fd, dict_t *xdata)
{
call_stub_t *stub = NULL;
int ret = -1;
- stub = fop_open_stub (frame, iot_open_wrapper, loc, flags, fd);
+ stub = fop_open_stub (frame, iot_open_wrapper, loc, flags, fd,
+ xdata);
if (!stub) {
gf_log (this->name, GF_LOG_ERROR,
"cannot create open call stub"
@@ -954,17 +886,16 @@ iot_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
goto out;
}
- ret = iot_schedule_unordered ((iot_conf_t *)this->private, loc->inode,
- stub);
+ ret = iot_schedule (frame, this, stub);
out:
if (ret < 0) {
- STACK_UNWIND (frame, -1, -ret, NULL, 0);
+ STACK_UNWIND_STRICT (open, frame, -1, -ret, NULL, NULL);
if (stub != NULL) {
call_stub_destroy (stub);
}
- }
+ }
return 0;
}
@@ -973,34 +904,37 @@ out:
int
iot_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
- struct stat *stbuf)
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, fd, inode, stbuf);
+ STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, stbuf,
+ preparent, postparent, xdata);
return 0;
}
int
iot_create_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags, mode_t mode, fd_t *fd)
+ int32_t flags, mode_t mode, mode_t umask, fd_t *fd,
+ dict_t *xdata)
{
STACK_WIND (frame, iot_create_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->create,
- loc, flags, mode, fd);
+ loc, flags, mode, umask, fd, xdata);
return 0;
}
int
iot_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, fd_t *fd)
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
call_stub_t *stub = NULL;
int ret = -1;
stub = fop_create_stub (frame, iot_create_wrapper, loc, flags, mode,
- fd);
+ umask, fd, xdata);
if (!stub) {
gf_log (this->name, GF_LOG_ERROR,
"cannot create \"create\" call stub"
@@ -1009,12 +943,12 @@ iot_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
goto out;
}
- ret = iot_schedule_unordered ((iot_conf_t *)this->private, loc->inode,
- stub);
+ ret = iot_schedule (frame, this, stub);
out:
if (ret < 0) {
- STACK_UNWIND (frame, -1, -ret, NULL, 0);
+ STACK_UNWIND_STRICT (create, frame, -1, -ret, NULL, NULL, NULL,
+ NULL, NULL, NULL);
if (stub != NULL) {
call_stub_destroy (stub);
@@ -1028,9 +962,11 @@ out:
int
iot_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct stat *stbuf, struct iobref *iobref)
+ int32_t count, struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf, iobref);
+ STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count,
+ stbuf, iobref, xdata);
return 0;
}
@@ -1038,39 +974,39 @@ iot_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
iot_readv_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
+ off_t offset, uint32_t flags, dict_t *xdata)
{
STACK_WIND (frame, iot_readv_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readv,
- fd, size, offset);
+ fd, size, offset, flags, xdata);
return 0;
}
int
iot_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
+ off_t offset, uint32_t flags, dict_t *xdata)
{
call_stub_t *stub = NULL;
int ret = -1;
- stub = fop_readv_stub (frame, iot_readv_wrapper, fd, size, offset);
+ stub = fop_readv_stub (frame, iot_readv_wrapper, fd, size, offset,
+ flags, xdata);
if (!stub) {
- gf_log (this->name, GF_LOG_ERROR,
+ gf_log (this->name, GF_LOG_ERROR,
"cannot create readv call stub"
"(out of memory)");
ret = -ENOMEM;
goto out;
}
- ret = iot_schedule_ordered ((iot_conf_t *)this->private, fd->inode,
- stub);
+ ret = iot_schedule (frame, this, stub);
out:
if (ret < 0) {
- STACK_UNWIND (frame, -1, -ret, NULL, 0);
-
+ STACK_UNWIND_STRICT (readv, frame, -1, -ret, NULL, -1, NULL,
+ NULL, NULL);
if (stub != NULL) {
call_stub_destroy (stub);
}
@@ -1081,31 +1017,31 @@ out:
int
iot_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, xdata);
return 0;
}
int
-iot_flush_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd)
+iot_flush_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
STACK_WIND (frame, iot_flush_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->flush,
- fd);
+ fd, xdata);
return 0;
}
int
-iot_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
+iot_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
call_stub_t *stub = NULL;
int ret = -1;
- stub = fop_flush_stub (frame, iot_flush_wrapper, fd);
+ stub = fop_flush_stub (frame, iot_flush_wrapper, fd, xdata);
if (!stub) {
gf_log (this->name, GF_LOG_ERROR,
"cannot create flush_cbk call stub"
@@ -1114,11 +1050,10 @@ iot_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
goto out;
}
- ret = iot_schedule_ordered ((iot_conf_t *)this->private, fd->inode,
- stub);
+ ret = iot_schedule (frame, this, stub);
out:
if (ret < 0) {
- STACK_UNWIND (frame, -1, -ret);
+ STACK_UNWIND_STRICT (flush, frame, -1, -ret, NULL);
if (stub != NULL) {
call_stub_destroy (stub);
@@ -1130,32 +1065,35 @@ out:
int
iot_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
return 0;
}
int
iot_fsync_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t datasync)
+ int32_t datasync, dict_t *xdata)
{
STACK_WIND (frame, iot_fsync_cbk,
FIRST_CHILD (this),
FIRST_CHILD (this)->fops->fsync,
- fd, datasync);
+ fd, datasync, xdata);
return 0;
}
int
-iot_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync)
+iot_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
{
call_stub_t *stub = NULL;
int ret = -1;
- stub = fop_fsync_stub (frame, iot_fsync_wrapper, fd, datasync);
+ stub = fop_fsync_stub (frame, iot_fsync_wrapper, fd, datasync, xdata);
if (!stub) {
gf_log (this->name, GF_LOG_ERROR,
"cannot create fsync_cbk call stub"
@@ -1164,13 +1102,12 @@ iot_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync)
goto out;
}
- ret = iot_schedule_ordered ((iot_conf_t *)this->private, fd->inode,
- stub);
+ ret = iot_schedule (frame, this, stub);
out:
if (ret < 0) {
- STACK_UNWIND (frame, -1, -ret);
-
+ STACK_UNWIND_STRICT (fsync, frame, -1, -ret, NULL, NULL, NULL);
+
if (stub != NULL) {
call_stub_destroy (stub);
}
@@ -1181,9 +1118,11 @@ out:
int
iot_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *stbuf)
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, stbuf);
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
return 0;
}
@@ -1191,12 +1130,13 @@ iot_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
iot_writev_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iovec *vector, int32_t count,
- off_t offset, struct iobref *iobref)
+ off_t offset, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
{
STACK_WIND (frame, iot_writev_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->writev,
- fd, vector, count, offset, iobref);
+ fd, vector, count, offset, flags, iobref, xdata);
return 0;
}
@@ -1204,13 +1144,13 @@ iot_writev_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd,
int
iot_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iovec *vector, int32_t count, off_t offset,
- struct iobref *iobref)
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
call_stub_t *stub = NULL;
int ret = -1;
- stub = fop_writev_stub (frame, iot_writev_wrapper,
- fd, vector, count, offset, iobref);
+ stub = fop_writev_stub (frame, iot_writev_wrapper, fd, vector,
+ count, offset, flags, iobref, xdata);
if (!stub) {
gf_log (this->name, GF_LOG_ERROR,
@@ -1220,12 +1160,11 @@ iot_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
goto out;
}
- ret = iot_schedule_ordered ((iot_conf_t *)this->private, fd->inode,
- stub);
+ ret = iot_schedule (frame, this, stub);
out:
if (ret < 0) {
- STACK_UNWIND (frame, -1, -ret, NULL);
-
+ STACK_UNWIND_STRICT (writev, frame, -1, -ret, NULL, NULL, NULL);
+
if (stub != NULL) {
call_stub_destroy (stub);
}
@@ -1237,33 +1176,34 @@ out:
int32_t
iot_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct flock *flock)
+ int32_t op_ret, int32_t op_errno, struct gf_flock *flock,
+ dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, flock);
+ STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, flock, xdata);
return 0;
}
int
iot_lk_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t cmd, struct flock *flock)
+ int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
STACK_WIND (frame, iot_lk_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->lk,
- fd, cmd, flock);
+ fd, cmd, flock, xdata);
return 0;
}
int
iot_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
- struct flock *flock)
+ struct gf_flock *flock, dict_t *xdata)
{
call_stub_t *stub = NULL;
int ret = -1;
- stub = fop_lk_stub (frame, iot_lk_wrapper, fd, cmd, flock);
+ stub = fop_lk_stub (frame, iot_lk_wrapper, fd, cmd, flock, xdata);
if (!stub) {
gf_log (this->name, GF_LOG_ERROR,
@@ -1273,11 +1213,10 @@ iot_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
goto out;
}
- ret = iot_schedule_ordered ((iot_conf_t *)this->private, fd->inode,
- stub);
+ ret = iot_schedule (frame, this, stub);
out:
if (ret < 0) {
- STACK_UNWIND (frame, -1, -ret, NULL);
+ STACK_UNWIND_STRICT (lk, frame, -1, -ret, NULL, NULL);
if (stub != NULL) {
call_stub_destroy (stub);
@@ -1289,32 +1228,31 @@ out:
int
iot_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf, xdata);
return 0;
}
int
-iot_stat_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc)
+iot_stat_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
STACK_WIND (frame, iot_stat_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->stat,
- loc);
+ loc, xdata);
return 0;
}
int
-iot_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
+iot_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
call_stub_t *stub = NULL;
- fd_t *fd = NULL;
int ret = -1;
- stub = fop_stat_stub (frame, iot_stat_wrapper, loc);
+ stub = fop_stat_stub (frame, iot_stat_wrapper, loc, xdata);
if (!stub) {
gf_log (this->name, GF_LOG_ERROR,
"cannot create fop_stat call stub"
@@ -1323,21 +1261,11 @@ iot_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
goto out;
}
- fd = fd_lookup (loc->inode, frame->root->pid);
- /* File is not open, so we can send it through unordered pool.
- */
- if (fd == NULL)
- ret = iot_schedule_unordered ((iot_conf_t *)this->private,
- loc->inode, stub);
- else {
- ret = iot_schedule_ordered ((iot_conf_t *)this->private,
- loc->inode, stub);
- fd_unref (fd);
- }
+ ret = iot_schedule (frame, this, stub);
out:
if (ret < 0) {
- STACK_UNWIND (frame, -1, -ret, NULL);
+ STACK_UNWIND_STRICT (stat, frame, -1, -ret, NULL, NULL);
if (stub != NULL) {
call_stub_destroy (stub);
@@ -1349,31 +1277,31 @@ out:
int
iot_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf, xdata);
return 0;
}
int
-iot_fstat_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd)
+iot_fstat_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
STACK_WIND (frame, iot_fstat_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->fstat,
- fd);
+ fd, xdata);
return 0;
}
int
-iot_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd)
+iot_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
call_stub_t *stub = NULL;
int ret = -1;
- stub = fop_fstat_stub (frame, iot_fstat_wrapper, fd);
+ stub = fop_fstat_stub (frame, iot_fstat_wrapper, fd, xdata);
if (!stub) {
gf_log (this->name, GF_LOG_ERROR,
"cannot create fop_fstat call stub"
@@ -1382,11 +1310,10 @@ iot_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd)
goto out;
}
- ret = iot_schedule_ordered ((iot_conf_t *)this->private, fd->inode,
- stub);
+ ret = iot_schedule (frame, this, stub);
out:
if (ret < 0) {
- STACK_UNWIND (frame, -1, -ret, NULL);
+ STACK_UNWIND_STRICT (fstat, frame, -1, -ret, NULL, NULL);
if (stub != NULL) {
call_stub_destroy (stub);
@@ -1398,34 +1325,36 @@ out:
int
iot_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
return 0;
}
int
iot_truncate_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- off_t offset)
+ off_t offset, dict_t *xdata)
{
STACK_WIND (frame, iot_truncate_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->truncate,
- loc, offset);
+ loc, offset, xdata);
return 0;
}
int
-iot_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
+iot_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
{
call_stub_t *stub;
- fd_t *fd = NULL;
int ret = -1;
- stub = fop_truncate_stub (frame, iot_truncate_wrapper, loc, offset);
-
+ stub = fop_truncate_stub (frame, iot_truncate_wrapper, loc, offset,
+ xdata);
if (!stub) {
gf_log (this->name, GF_LOG_ERROR,
"cannot create fop_stat call stub"
@@ -1434,19 +1363,12 @@ iot_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
goto out;
}
- fd = fd_lookup (loc->inode, frame->root->pid);
- if (fd == NULL)
- ret = iot_schedule_unordered ((iot_conf_t *)this->private,
- loc->inode, stub);
- else {
- ret = iot_schedule_ordered ((iot_conf_t *)this->private,
- loc->inode, stub);
- fd_unref (fd);
- }
+ ret = iot_schedule (frame, this, stub);
out:
if (ret < 0) {
- STACK_UNWIND (frame, -1, -ret, NULL);
+ STACK_UNWIND_STRICT (truncate, frame, -1, -ret, NULL, NULL,
+ NULL);
if (stub != NULL) {
call_stub_destroy (stub);
@@ -1459,32 +1381,36 @@ out:
int
iot_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
return 0;
}
int
iot_ftruncate_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- off_t offset)
+ off_t offset, dict_t *xdata)
{
STACK_WIND (frame, iot_ftruncate_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->ftruncate,
- fd, offset);
+ fd, offset, xdata);
return 0;
}
int
-iot_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
+iot_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
{
call_stub_t *stub = NULL;
int ret = -1;
- stub = fop_ftruncate_stub (frame, iot_ftruncate_wrapper, fd, offset);
+ stub = fop_ftruncate_stub (frame, iot_ftruncate_wrapper, fd, offset,
+ xdata);
if (!stub) {
gf_log (this->name, GF_LOG_ERROR,
"cannot create fop_ftruncate call stub"
@@ -1492,72 +1418,11 @@ iot_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
ret = -ENOMEM;
goto out;
}
- ret = iot_schedule_ordered ((iot_conf_t *)this->private, fd->inode,
- stub);
-out:
- if (ret < 0) {
- STACK_UNWIND (frame, -1, -ret, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
-}
-
-
-int
-iot_utimens_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct stat *buf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
-
-
-int
-iot_utimens_wrapper (call_frame_t *frame, xlator_t *this,
- loc_t *loc, struct timespec tv[2])
-{
- STACK_WIND (frame, iot_utimens_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->utimens,
- loc, tv);
- return 0;
-}
-
-
-int
-iot_utimens (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct timespec tv[2])
-{
- call_stub_t *stub;
- fd_t *fd = NULL;
- int ret = -1;
-
- stub = fop_utimens_stub (frame, iot_utimens_wrapper, loc, tv);
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot create fop_utimens call stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
-
- fd = fd_lookup (loc->inode, frame->root->pid);
- if (fd == NULL)
- ret = iot_schedule_unordered ((iot_conf_t *)this->private,
- loc->inode, stub);
- else {
- ret = iot_schedule_ordered ((iot_conf_t *)this->private,
- loc->inode, stub);
- fd_unref (fd);
- }
+ ret = iot_schedule (frame, this, stub);
out:
if (ret < 0) {
- STACK_UNWIND (frame, -1, -ret, NULL);
+ STACK_UNWIND_STRICT (ftruncate, frame, -1, -ret, NULL, NULL, NULL);
if (stub != NULL) {
call_stub_destroy (stub);
@@ -1567,86 +1432,38 @@ out:
}
-int
-iot_checksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, uint8_t *file_checksum,
- uint8_t *dir_checksum)
-{
- STACK_UNWIND (frame, op_ret, op_errno, file_checksum, dir_checksum);
- return 0;
-}
-
-
-int
-iot_checksum_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags)
-{
- STACK_WIND (frame, iot_checksum_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->checksum,
- loc, flags);
-
- return 0;
-}
-
-
-int
-iot_checksum (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags)
-{
- call_stub_t *stub = NULL;
- int ret = -1;
-
- stub = fop_checksum_stub (frame, iot_checksum_wrapper, loc, flags);
-
- if (!stub) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot create fop_checksum call stub"
- "(out of memory)");
- ret = -ENOMEM;
- goto out;
- }
- ret = iot_schedule_unordered ((iot_conf_t *)this->private, loc->inode,
- stub);
-out:
- if (ret < 0) {
- STACK_UNWIND (frame, -1, -ret, NULL, NULL);
-
- if (stub != NULL) {
- call_stub_destroy (stub);
- }
- }
- return 0;
-}
-
int
iot_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno, preparent,
+ postparent, xdata);
return 0;
}
int
-iot_unlink_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc)
+iot_unlink_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t xflag, dict_t *xdata)
{
STACK_WIND (frame, iot_unlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->unlink,
- loc);
-
+ loc, xflag, xdata);
return 0;
}
int
-iot_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
+iot_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t xflag,
+ dict_t *xdata)
{
call_stub_t *stub = NULL;
int ret = -1;
-
- stub = fop_unlink_stub (frame, iot_unlink_wrapper, loc);
+
+ stub = fop_unlink_stub (frame, iot_unlink_wrapper, loc, xflag, xdata);
if (!stub) {
gf_log (this->name, GF_LOG_ERROR,
"cannot create fop_unlink call stub"
@@ -1655,17 +1472,16 @@ iot_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
goto out;
}
- ret = iot_schedule_unordered((iot_conf_t *)this->private, loc->inode,
- stub);
+ ret = iot_schedule (frame, this, stub);
out:
if (ret < 0) {
- STACK_UNWIND (frame, -1, -ret);
-
+ STACK_UNWIND_STRICT (unlink, frame, -1, -ret, NULL, NULL, NULL);
+
if (stub != NULL) {
call_stub_destroy (stub);
}
- }
+ }
return 0;
}
@@ -1674,30 +1490,34 @@ out:
int
iot_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct stat *buf)
+ struct iatt *buf, struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
+ STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
return 0;
}
int
-iot_link_wrapper (call_frame_t *frame, xlator_t *this, loc_t *old, loc_t *new)
+iot_link_wrapper (call_frame_t *frame, xlator_t *this, loc_t *old, loc_t *new,
+ dict_t *xdata)
{
STACK_WIND (frame, iot_link_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->link, old, new);
+ FIRST_CHILD (this)->fops->link, old, new, xdata);
return 0;
}
int
-iot_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
+iot_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
call_stub_t *stub = NULL;
int ret = -1;
- stub = fop_link_stub (frame, iot_link_wrapper, oldloc, newloc);
+ stub = fop_link_stub (frame, iot_link_wrapper, oldloc, newloc, xdata);
if (!stub) {
gf_log (this->name, GF_LOG_ERROR, "cannot create link stub"
"(out of memory)");
@@ -1705,11 +1525,11 @@ iot_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
goto out;
}
- ret = iot_schedule_unordered ((iot_conf_t *)this->private,
- oldloc->inode, stub);
+ ret = iot_schedule (frame, this, stub);
out:
if (ret < 0) {
- STACK_UNWIND (frame, -1, -ret, NULL, NULL);
+ STACK_UNWIND_STRICT (link, frame, -1, -ret, NULL, NULL, NULL,
+ NULL, NULL);
if (stub != NULL) {
call_stub_destroy (stub);
@@ -1721,29 +1541,31 @@ out:
int
iot_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, fd);
+ STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd, xdata);
return 0;
}
int
-iot_opendir_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
+iot_opendir_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
{
STACK_WIND (frame, iot_opendir_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->opendir, loc, fd);
+ FIRST_CHILD (this)->fops->opendir, loc, fd, xdata);
return 0;
}
int
-iot_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
+iot_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
{
call_stub_t *stub = NULL;
int ret = -1;
- stub = fop_opendir_stub (frame, iot_opendir_wrapper, loc, fd);
+ stub = fop_opendir_stub (frame, iot_opendir_wrapper, loc, fd, xdata);
if (!stub) {
gf_log (this->name, GF_LOG_ERROR, "cannot create opendir stub"
"(out of memory)");
@@ -1751,11 +1573,10 @@ iot_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
goto out;
}
- ret = iot_schedule_unordered ((iot_conf_t *)this->private, loc->inode,
- stub);
+ ret = iot_schedule (frame, this, stub);
out:
if (ret < 0) {
- STACK_UNWIND (frame, -1, -ret, NULL);
+ STACK_UNWIND_STRICT (opendir, frame, -1, -ret, NULL, NULL);
if (stub != NULL) {
call_stub_destroy (stub);
@@ -1767,30 +1588,32 @@ out:
int
iot_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno, xdata);
return 0;
}
int
iot_fsyncdir_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int datasync)
+ int datasync, dict_t *xdata)
{
STACK_WIND (frame, iot_fsyncdir_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fsyncdir, fd, datasync);
+ FIRST_CHILD (this)->fops->fsyncdir, fd, datasync, xdata);
return 0;
}
int
-iot_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync)
+iot_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync,
+ dict_t *xdata)
{
call_stub_t *stub = NULL;
int ret = -1;
- stub = fop_fsyncdir_stub (frame, iot_fsyncdir_wrapper, fd, datasync);
+ stub = fop_fsyncdir_stub (frame, iot_fsyncdir_wrapper, fd, datasync,
+ xdata);
if (!stub) {
gf_log (this->name, GF_LOG_ERROR, "cannot create fsyncdir stub"
"(out of memory)");
@@ -1798,11 +1621,10 @@ iot_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync)
goto out;
}
- ret = iot_schedule_ordered ((iot_conf_t *)this->private, fd->inode,
- stub);
+ ret = iot_schedule (frame, this, stub);
out:
if (ret < 0) {
- STACK_UNWIND (frame, -1, -ret);
+ STACK_UNWIND_STRICT (fsyncdir, frame, -1, -ret, NULL);
if (stub != NULL) {
call_stub_destroy (stub);
@@ -1814,29 +1636,31 @@ out:
int
iot_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *buf)
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf,
+ dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf, xdata);
return 0;
}
int
-iot_statfs_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc)
+iot_statfs_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
{
STACK_WIND (frame, iot_statfs_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->statfs, loc);
+ FIRST_CHILD (this)->fops->statfs, loc, xdata);
return 0;
}
int
-iot_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
+iot_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
call_stub_t *stub = NULL;
int ret = -1;
- stub = fop_statfs_stub (frame, iot_statfs_wrapper, loc);
+ stub = fop_statfs_stub (frame, iot_statfs_wrapper, loc, xdata);
if (!stub) {
gf_log (this->name, GF_LOG_ERROR, "cannot create statfs stub"
"(out of memory)");
@@ -1844,11 +1668,10 @@ iot_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
goto out;
}
- ret = iot_schedule_unordered ((iot_conf_t *)this->private, loc->inode,
- stub);
+ ret = iot_schedule (frame, this, stub);
out:
if (ret < 0) {
- STACK_UNWIND (frame, -1, -ret, NULL);
+ STACK_UNWIND_STRICT (statfs, frame, -1, -ret, NULL, NULL);
if (stub != NULL) {
call_stub_destroy (stub);
@@ -1860,33 +1683,32 @@ out:
int
iot_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata);
return 0;
}
int
iot_setxattr_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *dict, int32_t flags)
+ dict_t *dict, int32_t flags, dict_t *xdata)
{
STACK_WIND (frame, iot_setxattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->setxattr, loc, dict, flags);
+ FIRST_CHILD (this)->fops->setxattr, loc, dict, flags, xdata);
return 0;
}
int
iot_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
- int32_t flags)
+ int32_t flags, dict_t *xdata)
{
call_stub_t *stub = NULL;
- fd_t *fd = NULL;
int ret = -1;
stub = fop_setxattr_stub (frame, iot_setxattr_wrapper, loc, dict,
- flags);
+ flags, xdata);
if (!stub) {
gf_log (this->name, GF_LOG_ERROR, "cannot create setxattr stub"
"(out of memory)");
@@ -1894,19 +1716,11 @@ iot_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
goto out;
}
- fd = fd_lookup (loc->inode, frame->root->pid);
- if (fd == NULL)
- ret = iot_schedule_unordered ((iot_conf_t *)this->private,
- loc->inode, stub);
- else {
- ret = iot_schedule_ordered ((iot_conf_t *)this->private,
- loc->inode, stub);
- fd_unref (fd);
- }
+ ret = iot_schedule (frame, this, stub);
out:
if (ret < 0) {
- STACK_UNWIND (frame, -1, -ret, NULL);
+ STACK_UNWIND_STRICT (setxattr, frame, -1, -ret, NULL);
if (stub != NULL) {
call_stub_destroy (stub);
@@ -1918,32 +1732,31 @@ out:
int
iot_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, dict);
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata);
return 0;
}
int
iot_getxattr_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name)
+ const char *name, dict_t *xdata)
{
STACK_WIND (frame, iot_getxattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->getxattr, loc, name);
+ FIRST_CHILD (this)->fops->getxattr, loc, name, xdata);
return 0;
}
int
iot_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name)
+ const char *name, dict_t *xdata)
{
call_stub_t *stub = NULL;
- fd_t *fd = NULL;
- int ret = -1;
+ int ret = -1;
- stub = fop_getxattr_stub (frame, iot_getxattr_wrapper, loc, name);
+ stub = fop_getxattr_stub (frame, iot_getxattr_wrapper, loc, name, xdata);
if (!stub) {
gf_log (this->name, GF_LOG_ERROR, "cannot create getxattr stub"
"(out of memory)");
@@ -1951,19 +1764,11 @@ iot_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
goto out;
}
- fd = fd_lookup (loc->inode, frame->root->pid);
- if (!fd)
- ret = iot_schedule_unordered ((iot_conf_t *)this->private,
- loc->inode, stub);
- else {
- ret = iot_schedule_ordered ((iot_conf_t *)this->private,
- loc->inode, stub);
- fd_unref (fd);
- }
+ ret = iot_schedule (frame, this, stub);
out:
if (ret < 0) {
- STACK_UNWIND (frame, -1, -ret, NULL);
+ STACK_UNWIND_STRICT (getxattr, frame, -1, -ret, NULL, NULL);
if (stub != NULL) {
call_stub_destroy (stub);
@@ -1975,31 +1780,32 @@ out:
int
iot_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, dict);
+ STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict, xdata);
return 0;
}
int
iot_fgetxattr_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name)
+ const char *name, dict_t *xdata)
{
STACK_WIND (frame, iot_fgetxattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fgetxattr, fd, name);
+ FIRST_CHILD (this)->fops->fgetxattr, fd, name, xdata);
return 0;
}
int
iot_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name)
+ const char *name, dict_t *xdata)
{
call_stub_t *stub = NULL;
int ret = -1;
- stub = fop_fgetxattr_stub (frame, iot_fgetxattr_wrapper, fd, name);
+ stub = fop_fgetxattr_stub (frame, iot_fgetxattr_wrapper, fd, name, xdata);
if (!stub) {
gf_log (this->name, GF_LOG_ERROR, "cannot create fgetxattr stub"
"(out of memory)");
@@ -2007,11 +1813,10 @@ iot_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
goto out;
}
- ret = iot_schedule_ordered ((iot_conf_t *)this->private, fd->inode,
- stub);
+ ret = iot_schedule (frame, this, stub);
out:
if (ret < 0) {
- STACK_UNWIND (frame, -1, -ret, NULL);
+ STACK_UNWIND_STRICT (fgetxattr, frame, -1, -ret, NULL, NULL);
if (stub != NULL) {
call_stub_destroy (stub);
@@ -2023,32 +1828,33 @@ out:
int
iot_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xdata);
return 0;
}
int
iot_fsetxattr_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- dict_t *dict, int32_t flags)
+ dict_t *dict, int32_t flags, dict_t *xdata)
{
STACK_WIND (frame, iot_fsetxattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fsetxattr, fd, dict, flags);
+ FIRST_CHILD (this)->fops->fsetxattr, fd, dict, flags,
+ xdata);
return 0;
}
int
iot_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
- int32_t flags)
+ int32_t flags, dict_t *xdata)
{
call_stub_t *stub = NULL;
int ret = -1;
stub = fop_fsetxattr_stub (frame, iot_fsetxattr_wrapper, fd, dict,
- flags);
+ flags, xdata);
if (!stub) {
gf_log (this->name, GF_LOG_ERROR, "cannot create fsetxattr stub"
"(out of memory)");
@@ -2056,11 +1862,10 @@ iot_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
goto out;
}
- ret = iot_schedule_ordered ((iot_conf_t *)this->private, fd->inode,
- stub);
+ ret = iot_schedule (frame, this, stub);
out:
if (ret < 0) {
- STACK_UNWIND (frame, -1, -ret);
+ STACK_UNWIND_STRICT (fsetxattr, frame, -1, -ret, NULL);
if (stub != NULL) {
call_stub_destroy (stub);
@@ -2072,33 +1877,32 @@ out:
int
iot_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, xdata);
return 0;
}
int
iot_removexattr_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name)
+ const char *name, dict_t *xdata)
{
STACK_WIND (frame, iot_removexattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->removexattr, loc, name);
+ FIRST_CHILD (this)->fops->removexattr, loc, name, xdata);
return 0;
}
int
iot_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name)
+ const char *name, dict_t *xdata)
{
call_stub_t *stub = NULL;
- fd_t *fd = NULL;
int ret = -1;
stub = fop_removexattr_stub (frame, iot_removexattr_wrapper, loc,
- name);
+ name, xdata);
if (!stub) {
gf_log (this->name, GF_LOG_ERROR,"cannot get removexattr fop"
"(out of memory)");
@@ -2106,20 +1910,11 @@ iot_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
goto out;
}
- fd = fd_lookup (loc->inode, frame->root->pid);
- if (!fd)
- ret = iot_schedule_unordered ((iot_conf_t *)this->private,
- loc->inode, stub);
- else {
- ret = iot_schedule_ordered ((iot_conf_t *)this->private,
- loc->inode, stub);
- fd_unref (fd);
- }
-
+ ret = iot_schedule (frame, this, stub);
out:
if (ret < 0) {
- STACK_UNWIND (frame, -1, -ret);
-
+ STACK_UNWIND_STRICT (removexattr, frame, -1, -ret, NULL);
+
if (stub != NULL) {
call_stub_destroy (stub);
}
@@ -2128,44 +1923,44 @@ out:
}
int
-iot_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries)
+iot_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, entries);
+ STACK_UNWIND_STRICT (fremovexattr, frame, op_ret, op_errno, xdata);
return 0;
}
int
-iot_readdir_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t offset)
+iot_fremovexattr_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
{
- STACK_WIND (frame, iot_readdir_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->readdir, fd, size, offset);
+ STACK_WIND (frame, iot_fremovexattr_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fremovexattr, fd, name, xdata);
return 0;
}
int
-iot_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
+iot_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
{
call_stub_t *stub = NULL;
int ret = -1;
- stub = fop_readdir_stub (frame, iot_readdir_wrapper, fd, size, offset);
+ stub = fop_fremovexattr_stub (frame, iot_fremovexattr_wrapper, fd,
+ name, xdata);
if (!stub) {
- gf_log (this->private, GF_LOG_ERROR,"cannot get readdir stub"
+ gf_log (this->name, GF_LOG_ERROR,"cannot get fremovexattr fop"
"(out of memory)");
ret = -ENOMEM;
goto out;
}
- ret = iot_schedule_ordered ((iot_conf_t *)this->private, fd->inode,
- stub);
+ ret = iot_schedule (frame, this, stub);
out:
if (ret < 0) {
- STACK_UNWIND (frame, -1, -ret, NULL);
+ STACK_UNWIND_STRICT (fremovexattr, frame, -1, -ret, NULL);
if (stub != NULL) {
call_stub_destroy (stub);
@@ -2176,54 +1971,45 @@ out:
int
-iot_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr)
+iot_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, xattr);
+ STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata);
return 0;
}
int
-iot_xattrop_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t optype, dict_t *xattr)
+iot_readdirp_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t offset, dict_t *xdata)
{
- STACK_WIND (frame, iot_xattrop_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->xattrop, loc, optype, xattr);
+ STACK_WIND (frame, iot_readdirp_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->readdirp, fd, size, offset, xdata);
return 0;
}
int
-iot_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t optype, dict_t *xattr)
+iot_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
{
call_stub_t *stub = NULL;
- fd_t *fd = NULL;
int ret = -1;
- stub = fop_xattrop_stub (frame, iot_xattrop_wrapper, loc, optype,
- xattr);
+ stub = fop_readdirp_stub (frame, iot_readdirp_wrapper, fd, size,
+ offset, xdata);
if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create xattrop stub"
+ gf_log (this->private, GF_LOG_ERROR,"cannot get readdir stub"
"(out of memory)");
ret = -ENOMEM;
goto out;
}
- fd = fd_lookup (loc->inode, frame->root->pid);
- if (!fd)
- ret = iot_schedule_unordered ((iot_conf_t *)this->private,
- loc->inode, stub);
- else {
- ret = iot_schedule_ordered ((iot_conf_t *)this->private,
- loc->inode, stub);
- fd_unref (fd);
- }
-
+ ret = iot_schedule (frame, this, stub);
out:
if (ret < 0) {
- STACK_UNWIND (frame, -1, -ret, NULL);
+ STACK_UNWIND_STRICT (readdirp, frame, -1, -ret, NULL, NULL);
if (stub != NULL) {
call_stub_destroy (stub);
@@ -2234,43 +2020,46 @@ out:
int
-iot_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr)
+iot_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, xattr);
+ STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, entries, xdata);
return 0;
}
+
int
-iot_fxattrop_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- gf_xattrop_flags_t optype, dict_t *xattr)
+iot_readdir_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t offset, dict_t *xdata)
{
- STACK_WIND (frame, iot_fxattrop_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fxattrop, fd, optype, xattr);
+ STACK_WIND (frame, iot_readdir_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->readdir, fd, size, offset, xdata);
return 0;
}
+
int
-iot_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
- gf_xattrop_flags_t optype, dict_t *xattr)
+iot_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
{
call_stub_t *stub = NULL;
int ret = -1;
- stub = fop_fxattrop_stub (frame, iot_fxattrop_wrapper, fd, optype,
- xattr);
+ stub = fop_readdir_stub (frame, iot_readdir_wrapper, fd, size, offset,
+ xdata);
if (!stub) {
- gf_log (this->name, GF_LOG_ERROR, "cannot create fxattrop stub"
+ gf_log (this->private, GF_LOG_ERROR,"cannot get readdir stub"
"(out of memory)");
ret = -ENOMEM;
goto out;
}
- ret = iot_schedule_ordered ((iot_conf_t *)this->private, fd->inode,
- stub);
+ ret = iot_schedule (frame, this, stub);
out:
if (ret < 0) {
- STACK_UNWIND (frame, -1, -ret, NULL);
+ STACK_UNWIND_STRICT (readdir, frame, -1, -ret, NULL, NULL);
+
if (stub != NULL) {
call_stub_destroy (stub);
}
@@ -2278,426 +2067,554 @@ out:
return 0;
}
-
-/* Must be called with worker lock held */
-void
-_iot_queue (iot_worker_t *worker, iot_request_t *req)
+int
+iot_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- list_add_tail (&req->list, &worker->rqlist);
+ STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
- /* dq_cond */
- worker->queue_size++;
- iot_notify_worker(worker);
+int
+iot_inodelk_wrapper (call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, int32_t cmd, struct gf_flock *lock,
+ dict_t *xdata)
+{
+ STACK_WIND (frame, iot_inodelk_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->inodelk, volume, loc, cmd, lock,
+ xdata);
+ return 0;
}
-iot_request_t *
-iot_init_request (iot_worker_t *worker, call_stub_t *stub)
+int
+iot_inodelk (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *lock,
+ dict_t *xdata)
{
- iot_request_t *req = NULL;
+ call_stub_t *stub = NULL;
+ int ret = -1;
- req = mem_get (worker->req_pool);
- if (req == NULL) {
+ stub = fop_inodelk_stub (frame, iot_inodelk_wrapper,
+ volume, loc, cmd, lock, xdata);
+ if (!stub) {
+ ret = -ENOMEM;
goto out;
}
- req->stub = stub;
+ ret = iot_schedule (frame, this, stub);
out:
- return req;
-}
+ if (ret < 0) {
+ STACK_UNWIND_STRICT (inodelk, frame, -1, -ret, NULL);
+ if (stub != NULL) {
+ call_stub_destroy (stub);
+ }
+ }
+ return 0;
+}
-void
-iot_destroy_request (iot_worker_t *worker, iot_request_t * req)
+int
+iot_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- if ((req == NULL) || (worker == NULL))
- return;
-
- mem_put (worker->req_pool, req);
+ STACK_UNWIND_STRICT (finodelk, frame, op_ret, op_errno, xdata);
+ return 0;
}
-/* Must be called with worker lock held. */
-gf_boolean_t
-iot_can_ordered_exit (iot_worker_t * worker)
+int
+iot_finodelk_wrapper (call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd, int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata)
{
- gf_boolean_t allow_exit = _gf_false;
- iot_conf_t *conf = NULL;
-
- conf = worker->conf;
- /* We dont want this thread to exit if its index is
- * below the min thread count.
- */
- if (worker->thread_idx >= conf->min_o_threads)
- allow_exit = _gf_true;
-
- return allow_exit;
+ STACK_WIND (frame, iot_finodelk_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->finodelk, volume, fd, cmd, lock,
+ xdata);
+ return 0;
}
-/* Must be called with worker lock held. */
-gf_boolean_t
-iot_ordered_exit (int cond_waitres, iot_worker_t *worker)
+
+int
+iot_finodelk (call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *lock,
+ dict_t *xdata)
{
- gf_boolean_t allow_exit = _gf_false;
+ call_stub_t *stub = NULL;
+ int ret = -1;
- if (worker->state == IOT_STATE_EXIT_REQUEST) {
- allow_exit = _gf_true;
- } else if (cond_waitres == ETIMEDOUT) {
- allow_exit = iot_can_ordered_exit (worker);
+ stub = fop_finodelk_stub (frame, iot_finodelk_wrapper,
+ volume, fd, cmd, lock, xdata);
+ if (!stub) {
+ gf_log (this->private, GF_LOG_ERROR,"cannot get finodelk stub"
+ "(out of memory)");
+ ret = -ENOMEM;
+ goto out;
}
- if (allow_exit) {
- worker->state = IOT_STATE_DEAD;
- worker->thread = 0;
+ ret = iot_schedule (frame, this, stub);
+out:
+ if (ret < 0) {
+ STACK_UNWIND_STRICT (finodelk, frame, -1, -ret, NULL);
+
+ if (stub != NULL) {
+ call_stub_destroy (stub);
+ }
}
+ return 0;
+}
- return allow_exit;
+int
+iot_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno, xdata);
+ return 0;
}
int
-iot_ordered_request_wait (iot_worker_t * worker)
+iot_entrylk_wrapper (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
- int waitres = 0;
- int retstat = 0;
+ STACK_WIND (frame, iot_entrylk_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->entrylk,
+ volume, loc, basename, cmd, type, xdata);
+ return 0;
+}
- if (worker->state == IOT_STATE_EXIT_REQUEST) {
- retstat = -1;
- goto out;
- }
- waitres = iot_notify_wait (worker, worker->conf->o_idle_time);
- if (iot_ordered_exit (waitres, worker)) {
- retstat = -1;
+int
+iot_entrylk (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ int ret = -1;
+
+ stub = fop_entrylk_stub (frame, iot_entrylk_wrapper,
+ volume, loc, basename, cmd, type, xdata);
+ if (!stub) {
+ gf_log (this->private, GF_LOG_ERROR,"cannot get entrylk stub"
+ "(out of memory)");
+ ret = -ENOMEM;
+ goto out;
}
+ ret = iot_schedule (frame, this, stub);
out:
- return retstat;
-}
-
-
-call_stub_t *
-iot_dequeue_ordered (iot_worker_t *worker)
-{
- call_stub_t *stub = NULL;
- iot_request_t *req = NULL;
- int waitstat = 0;
+ if (ret < 0) {
+ STACK_UNWIND_STRICT (entrylk, frame, -1, -ret, NULL);
- LOCK (&worker->qlock);
- {
- while (!worker->queue_size) {
- waitstat = 0;
- waitstat = iot_ordered_request_wait (worker);
- /* We must've timed out and are now required to
- * exit.
- */
- if (waitstat == -1)
- goto out;
+ if (stub != NULL) {
+ call_stub_destroy (stub);
}
+ }
+ return 0;
+}
- list_for_each_entry (req, &worker->rqlist, list)
- break;
- list_del (&req->list);
- stub = req->stub;
+int
+iot_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (fentrylk, frame, op_ret, op_errno, xdata);
+ return 0;
+}
- worker->queue_size--;
- }
-out:
- UNLOCK (&worker->qlock);
- iot_destroy_request (worker, req);
- return stub;
+int
+iot_fentrylk_wrapper (call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
+{
+ STACK_WIND (frame, iot_fentrylk_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fentrylk,
+ volume, fd, basename, cmd, type, xdata);
+ return 0;
}
-void *
-iot_worker_ordered (void *arg)
+int
+iot_fentrylk (call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
- iot_worker_t *worker = arg;
call_stub_t *stub = NULL;
+ int ret = -1;
- while (1) {
+ stub = fop_fentrylk_stub (frame, iot_fentrylk_wrapper,
+ volume, fd, basename, cmd, type, xdata);
+ if (!stub) {
+ gf_log (this->private, GF_LOG_ERROR,"cannot get fentrylk stub"
+ "(out of memory)");
+ ret = -ENOMEM;
+ goto out;
+ }
- stub = iot_dequeue_ordered (worker);
- /* If stub is NULL, we must've timed out waiting for a
- * request and have now been allowed to exit.
- */
- if (!stub)
- break;
- call_resume (stub);
- }
+ ret = iot_schedule (frame, this, stub);
+out:
+ if (ret < 0) {
+ STACK_UNWIND_STRICT (fentrylk, frame, -1, -ret, NULL);
- return NULL;
+ if (stub != NULL) {
+ call_stub_destroy (stub);
+ }
+ }
+ return 0;
}
-/* Must be called with worker lock held. */
-gf_boolean_t
-iot_can_unordered_exit (iot_worker_t * worker)
+int
+iot_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr, dict_t *xdata)
{
- gf_boolean_t allow_exit = _gf_false;
- iot_conf_t *conf = NULL;
+ STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, xattr, xdata);
+ return 0;
+}
- conf = worker->conf;
- /* We dont want this thread to exit if its index is
- * below the min thread count.
- */
- if (worker->thread_idx >= conf->min_u_threads)
- allow_exit = _gf_true;
- return allow_exit;
+int
+iot_xattrop_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+{
+ STACK_WIND (frame, iot_xattrop_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->xattrop, loc, optype, xattr, xdata);
+ return 0;
}
-/* Must be called with worker lock held. */
-gf_boolean_t
-iot_unordered_exit (int cond_waitres, iot_worker_t *worker)
+int
+iot_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
{
- gf_boolean_t allow_exit = _gf_false;
+ call_stub_t *stub = NULL;
+ int ret = -1;
- if (worker->state == IOT_STATE_EXIT_REQUEST) {
- allow_exit = _gf_true;
- } else if (cond_waitres == ETIMEDOUT) {
- allow_exit = iot_can_unordered_exit (worker);
+ stub = fop_xattrop_stub (frame, iot_xattrop_wrapper, loc, optype,
+ xattr, xdata);
+ if (!stub) {
+ gf_log (this->name, GF_LOG_ERROR, "cannot create xattrop stub"
+ "(out of memory)");
+ ret = -ENOMEM;
+ goto out;
}
- if (allow_exit) {
- worker->state = IOT_STATE_DEAD;
- worker->thread = 0;
+ ret = iot_schedule (frame, this, stub);
+out:
+ if (ret < 0) {
+ STACK_UNWIND_STRICT (xattrop, frame, -1, -ret, NULL, NULL);
+
+ if (stub != NULL) {
+ call_stub_destroy (stub);
+ }
}
+ return 0;
+}
- return allow_exit;
+
+int
+iot_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (fxattrop, frame, op_ret, op_errno, xattr, xdata);
+ return 0;
+}
+
+int
+iot_fxattrop_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+{
+ STACK_WIND (frame, iot_fxattrop_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fxattrop, fd, optype, xattr, xdata);
+ return 0;
}
int
-iot_unordered_request_wait (iot_worker_t * worker)
+iot_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
{
- int waitres = 0;
- int retstat = 0;
+ call_stub_t *stub = NULL;
+ int ret = -1;
- if (worker->state == IOT_STATE_EXIT_REQUEST) {
- retstat = -1;
+ stub = fop_fxattrop_stub (frame, iot_fxattrop_wrapper, fd, optype,
+ xattr, xdata);
+ if (!stub) {
+ gf_log (this->name, GF_LOG_ERROR, "cannot create fxattrop stub"
+ "(out of memory)");
+ ret = -ENOMEM;
goto out;
}
- waitres = iot_notify_wait (worker, worker->conf->u_idle_time);
- if (iot_unordered_exit (waitres, worker)) {
- retstat = -1;
- }
-
+ ret = iot_schedule (frame, this, stub);
out:
- return retstat;
+ if (ret < 0) {
+ STACK_UNWIND_STRICT (fxattrop, frame, -1, -ret, NULL, NULL);
+ if (stub != NULL) {
+ call_stub_destroy (stub);
+ }
+ }
+ return 0;
}
-call_stub_t *
-iot_dequeue_unordered (iot_worker_t *worker)
+int32_t
+iot_rchecksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, uint32_t weak_checksum,
+ uint8_t *strong_checksum, dict_t *xdata)
{
- call_stub_t *stub= NULL;
- iot_request_t *req = NULL;
- int waitstat = 0;
-
- LOCK (&worker->qlock);
- {
- while (!worker->queue_size) {
- waitstat = 0;
- waitstat = iot_unordered_request_wait (worker);
- /* If -1, request wait must've timed
- * out.
- */
- if (waitstat == -1)
- goto out;
- }
-
- list_for_each_entry (req, &worker->rqlist, list)
- break;
- list_del (&req->list);
- stub = req->stub;
+ STACK_UNWIND_STRICT (rchecksum, frame, op_ret, op_errno, weak_checksum,
+ strong_checksum, xdata);
+ return 0;
+}
- worker->queue_size--;
- }
-out:
- UNLOCK (&worker->qlock);
- iot_destroy_request (worker, req);
- return stub;
+int32_t
+iot_rchecksum_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, int32_t len, dict_t *xdata)
+{
+ STACK_WIND (frame, iot_rchecksum_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rchecksum, fd, offset, len, xdata);
+ return 0;
}
-void *
-iot_worker_unordered (void *arg)
+int32_t
+iot_rchecksum (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ int32_t len, dict_t *xdata)
{
- iot_worker_t *worker = arg;
call_stub_t *stub = NULL;
+ int ret = -1;
- while (1) {
+ stub = fop_rchecksum_stub (frame, iot_rchecksum_wrapper, fd, offset,
+ len, xdata);
+ if (!stub) {
+ gf_log (this->name, GF_LOG_ERROR, "cannot create rchecksum stub"
+ "(out of memory)");
+ ret = -ENOMEM;
+ goto out;
+ }
- stub = iot_dequeue_unordered (worker);
- /* If no request was received, we must've timed out,
- * and can exit. */
- if (!stub)
- break;
+ ret = iot_schedule (frame, this, stub);
+out:
+ if (ret < 0) {
+ STACK_UNWIND_STRICT (rchecksum, frame, -1, -ret, -1, NULL, NULL);
+ if (stub != NULL) {
+ call_stub_destroy (stub);
+ }
+ }
- call_resume (stub);
- }
+ return 0;
+}
- return NULL;
+int
+iot_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (fallocate, frame, op_ret, op_errno, preop, postop,
+ xdata);
+ return 0;
}
-void
-deallocate_worker_array (iot_worker_t **workers)
+int
+iot_fallocate_wrapper(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
{
- FREE (workers);
+ STACK_WIND (frame, iot_fallocate_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fallocate, fd, mode, offset, len,
+ xdata);
+ return 0;
}
-void
-deallocate_workers (iot_worker_t **workers,
- int start_alloc_idx, int count)
-{
- int i;
- int end_count;
-
- end_count = count + start_alloc_idx;
- for (i = start_alloc_idx; (i < end_count); i++) {
- if (workers[i] != NULL) {
- mem_pool_destroy (workers[i]->req_pool);
- FREE (workers[i]);
- workers[i] = NULL;
+
+int
+iot_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ int ret = -1;
+
+ stub = fop_fallocate_stub(frame, iot_fallocate_wrapper, fd, mode, offset,
+ len, xdata);
+ if (!stub) {
+ gf_log (this->name, GF_LOG_ERROR, "cannot create fallocate stub"
+ "(out of memory)");
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = iot_schedule (frame, this, stub);
+
+out:
+ if (ret < 0) {
+ STACK_UNWIND_STRICT (fallocate, frame, -1, -ret, NULL, NULL,
+ NULL);
+ if (stub != NULL) {
+ call_stub_destroy (stub);
}
}
-
+ return 0;
}
-
-iot_worker_t **
-allocate_worker_array (int count)
+int
+iot_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
{
- iot_worker_t **warr = NULL;
+ STACK_UNWIND_STRICT (discard, frame, op_ret, op_errno, preop, postop,
+ xdata);
+ return 0;
+}
- warr = CALLOC (count, sizeof (iot_worker_t *));
- return warr;
+int
+iot_discard_wrapper(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ STACK_WIND (frame, iot_discard_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->discard, fd, offset, len, xdata);
+ return 0;
}
-iot_worker_t *
-allocate_worker (iot_conf_t * conf)
+int
+iot_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
{
- iot_worker_t *wrk = NULL;
+ call_stub_t *stub = NULL;
+ int ret = -1;
- wrk = CALLOC (1, sizeof (iot_worker_t));
- if (wrk == NULL) {
- gf_log (conf->this->name, GF_LOG_ERROR, "out of memory");
+ stub = fop_discard_stub(frame, iot_discard_wrapper, fd, offset, len,
+ xdata);
+ if (!stub) {
+ gf_log (this->name, GF_LOG_ERROR, "cannot create discard stub"
+ "(out of memory)");
+ ret = -ENOMEM;
goto out;
}
- wrk->req_pool = mem_pool_new (iot_request_t, IOT_REQUEST_MEMPOOL_SIZE);
- if (wrk->req_pool == NULL)
- goto free_wrk;
-
- INIT_LIST_HEAD (&wrk->rqlist);
- wrk->conf = conf;
- iot_notify_init (wrk);
- wrk->state = IOT_STATE_DEAD;
+ ret = iot_schedule (frame, this, stub);
out:
- return wrk;
+ if (ret < 0) {
+ STACK_UNWIND_STRICT (discard, frame, -1, -ret, NULL, NULL,
+ NULL);
+ if (stub != NULL) {
+ call_stub_destroy (stub);
+ }
+ }
+ return 0;
+}
-free_wrk:
- FREE (wrk);
- return NULL;
+int
+iot_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (zerofill, frame, op_ret, op_errno, preop, postop,
+ xdata);
+ return 0;
}
+int
+iot_zerofill_wrapper(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ STACK_WIND (frame, iot_zerofill_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->zerofill, fd, offset, len, xdata);
+ return 0;
+}
int
-allocate_workers (iot_conf_t *conf, iot_worker_t **workers, int start_alloc_idx,
- int count)
+iot_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
{
- int i;
- int end_count, ret = -1;
+ call_stub_t *stub = NULL;
+ int ret = -1;
- end_count = count + start_alloc_idx;
- for (i = start_alloc_idx; i < end_count; i++) {
- workers[i] = allocate_worker (conf);
- if (workers[i] == NULL) {
- ret = -ENOMEM;
- goto out;
- }
- workers[i]->thread_idx = i;
+ stub = fop_zerofill_stub(frame, iot_zerofill_wrapper, fd,
+ offset, len, xdata);
+ if (!stub) {
+ gf_log (this->name, GF_LOG_ERROR, "cannot create zerofill stub"
+ "(out of memory)");
+ ret = -ENOMEM;
+ goto out;
}
- ret = 0;
+
+ ret = iot_schedule (frame, this, stub);
out:
- return ret;
+ if (ret < 0) {
+ STACK_UNWIND_STRICT (zerofill, frame, -1, -ret, NULL, NULL,
+ NULL);
+ if (stub != NULL) {
+ call_stub_destroy (stub);
+ }
+ }
+ return 0;
}
-void
-iot_stop_worker (iot_worker_t *worker)
+int
+__iot_workers_scale (iot_conf_t *conf)
{
- LOCK (&worker->qlock);
- {
- worker->state = IOT_STATE_EXIT_REQUEST;
- }
- UNLOCK (&worker->qlock);
+ int scale = 0;
+ int diff = 0;
+ pthread_t thread;
+ int ret = 0;
+ int i = 0;
- iot_notify_worker (worker);
- pthread_join (worker->thread, NULL);
-}
+ for (i = 0; i < IOT_PRI_MAX; i++)
+ scale += min (conf->queue_sizes[i], conf->ac_iot_limit[i]);
+ if (scale < IOT_MIN_THREADS)
+ scale = IOT_MIN_THREADS;
-void
-iot_stop_workers (iot_worker_t **workers, int start_idx, int count)
-{
- int i = 0;
- int end_idx = 0;
+ if (scale > conf->max_count)
+ scale = conf->max_count;
- end_idx = start_idx + count;
- for (i = start_idx; i < end_idx; i++) {
- if (workers[i] != NULL) {
- iot_stop_worker (workers[i]);
- }
+ if (conf->curr_count < scale) {
+ diff = scale - conf->curr_count;
}
-}
+ while (diff) {
+ diff --;
-int
-iot_startup_worker (iot_worker_t *worker, iot_worker_fn workerfunc)
-{
- int ret = -1;
- ret = pthread_create (&worker->thread, &worker->conf->w_attr,
- workerfunc, worker);
- if (ret != 0) {
- gf_log (worker->conf->this->name, GF_LOG_ERROR,
- "cannot start worker (%s)", strerror (errno));
- ret = -ret;
- } else {
- worker->state = IOT_STATE_ACTIVE;
+ ret = gf_thread_create (&thread, &conf->w_attr, iot_worker, conf);
+ if (ret == 0) {
+ conf->curr_count++;
+ gf_log (conf->this->name, GF_LOG_DEBUG,
+ "scaled threads to %d (queue_size=%d/%d)",
+ conf->curr_count, conf->queue_size, scale);
+ } else {
+ break;
+ }
}
- return ret;
+ return diff;
}
int
-iot_startup_workers (iot_worker_t **workers, int start_idx, int count,
- iot_worker_fn workerfunc)
+iot_workers_scale (iot_conf_t *conf)
{
- int i = 0;
- int end_idx = 0;
- int ret = -1;
+ int ret = -1;
- end_idx = start_idx + count;
- for (i = start_idx; i < end_idx; i++) {
- ret = iot_startup_worker (workers[i], workerfunc);
- if (ret < 0) {
- goto out;
- }
+ if (conf == NULL) {
+ ret = -EINVAL;
+ goto out;
}
- ret = 0;
+ pthread_mutex_lock (&conf->mutex);
+ {
+ ret = __iot_workers_scale (conf);
+ }
+ pthread_mutex_unlock (&conf->mutex);
+
out:
return ret;
}
@@ -2708,117 +2625,129 @@ set_stack_size (iot_conf_t *conf)
{
int err = 0;
size_t stacksize = IOT_THREAD_STACK_SIZE;
+ xlator_t *this = NULL;
+
+ this = THIS;
pthread_attr_init (&conf->w_attr);
err = pthread_attr_setstacksize (&conf->w_attr, stacksize);
if (err == EINVAL) {
- gf_log (conf->this->name, GF_LOG_WARNING,
+ err = pthread_attr_getstacksize (&conf->w_attr, &stacksize);
+ if (!err)
+ gf_log (this->name, GF_LOG_WARNING,
+ "Using default thread stack size %zd",
+ stacksize);
+ else
+ gf_log (this->name, GF_LOG_WARNING,
"Using default thread stack size");
}
+
+ conf->stack_size = stacksize;
}
-void
-iot_cleanup_workers (iot_conf_t *conf)
+int32_t
+mem_acct_init (xlator_t *this)
{
- if (conf->uworkers != NULL) {
- iot_stop_workers (conf->uworkers, 0,
- conf->max_u_threads);
-
- deallocate_workers (conf->uworkers, 0,
- conf->max_u_threads);
+ int ret = -1;
- deallocate_worker_array (conf->uworkers);
- }
-
- if (conf->oworkers != NULL) {
- iot_stop_workers (conf->oworkers, 0,
- conf->max_o_threads);
-
- deallocate_workers (conf->oworkers, 0,
- conf->max_o_threads);
-
- deallocate_worker_array (conf->oworkers);
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_iot_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
+ "failed");
+ return ret;
}
-}
+ return ret;
+}
int
-workers_init (iot_conf_t *conf)
+iot_priv_dump (xlator_t *this)
{
- int ret = -1;
+ iot_conf_t *conf = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
- if (conf == NULL) {
- ret = -EINVAL;
- goto err;
- }
+ if (!this)
+ return 0;
- /* Initialize un-ordered workers */
- conf->uworkers = allocate_worker_array (conf->max_u_threads);
- if (conf->uworkers == NULL) {
- gf_log (conf->this->name, GF_LOG_ERROR, "out of memory");
- ret = -ENOMEM;
- goto err;
- }
+ conf = this->private;
+ if (!conf)
+ return 0;
- ret = allocate_workers (conf, conf->uworkers, 0,
- conf->max_u_threads);
- if (ret < 0) {
- gf_log (conf->this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
+ snprintf (key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type,
+ this->name);
- /* Initialize ordered workers */
- conf->oworkers = allocate_worker_array (conf->max_o_threads);
- if (conf->oworkers == NULL) {
- gf_log (conf->this->name, GF_LOG_ERROR, "out of memory");
- ret = -ENOMEM;
- goto err;
- }
-
- ret = allocate_workers (conf, conf->oworkers, 0,
- conf->max_o_threads);
- if (ret < 0) {
- gf_log (conf->this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
+ gf_proc_dump_add_section(key_prefix);
- set_stack_size (conf);
- ret = iot_startup_workers (conf->oworkers, 0, conf->min_o_threads,
- iot_worker_ordered);
- if (ret == -1) {
- /* logged inside iot_startup_workers */
- goto err;
- }
+ gf_proc_dump_write("maximum_threads_count", "%d", conf->max_count);
+ gf_proc_dump_write("current_threads_count", "%d", conf->curr_count);
+ gf_proc_dump_write("sleep_count", "%d", conf->sleep_count);
+ gf_proc_dump_write("idle_time", "%d", conf->idle_time);
+ gf_proc_dump_write("stack_size", "%zd", conf->stack_size);
+ gf_proc_dump_write("high_priority_threads", "%d",
+ conf->ac_iot_limit[IOT_PRI_HI]);
+ gf_proc_dump_write("normal_priority_threads", "%d",
+ conf->ac_iot_limit[IOT_PRI_NORMAL]);
+ gf_proc_dump_write("low_priority_threads", "%d",
+ conf->ac_iot_limit[IOT_PRI_LO]);
+ gf_proc_dump_write("least_priority_threads", "%d",
+ conf->ac_iot_limit[IOT_PRI_LEAST]);
- ret = iot_startup_workers (conf->uworkers, 0, conf->min_u_threads,
- iot_worker_unordered);
- if (ret == -1) {
- /* logged inside iot_startup_workers */
- goto err;
- }
+ gf_proc_dump_write("cached least rate", "%u",
+ conf->throttle.cached_rate);
+ gf_proc_dump_write("least rate limit", "%u", conf->throttle.rate_limit);
return 0;
+}
-err:
- if (conf != NULL) {
- iot_cleanup_workers (conf);
- }
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ iot_conf_t *conf = NULL;
+ int ret = -1;
- return ret;
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ GF_OPTION_RECONF ("thread-count", conf->max_count, options, int32, out);
+
+ GF_OPTION_RECONF ("high-prio-threads",
+ conf->ac_iot_limit[IOT_PRI_HI], options, int32, out);
+
+ GF_OPTION_RECONF ("normal-prio-threads",
+ conf->ac_iot_limit[IOT_PRI_NORMAL], options, int32,
+ out);
+
+ GF_OPTION_RECONF ("low-prio-threads",
+ conf->ac_iot_limit[IOT_PRI_LO], options, int32, out);
+
+ GF_OPTION_RECONF ("least-prio-threads",
+ conf->ac_iot_limit[IOT_PRI_LEAST], options, int32,
+ out);
+ GF_OPTION_RECONF ("enable-least-priority", conf->least_priority,
+ options, bool, out);
+
+ GF_OPTION_RECONF("least-rate-limit", conf->throttle.rate_limit, options,
+ int32, out);
+
+ ret = 0;
+out:
+ return ret;
}
int
init (xlator_t *this)
{
- iot_conf_t *conf = NULL;
- dict_t *options = this->options;
- int thread_count = IOT_DEFAULT_THREADS;
- gf_boolean_t autoscaling = IOT_SCALING_OFF;
- char *scalestr = NULL;
- int min_threads, max_threads, ret = -1;
-
+ iot_conf_t *conf = NULL;
+ int ret = -1;
+ int i = 0;
+
if (!this->children || this->children->next) {
gf_log ("io-threads", GF_LOG_ERROR,
"FATAL: iot not configured with exactly one child");
@@ -2830,117 +2759,74 @@ init (xlator_t *this)
"dangling volume. check volfile ");
}
- conf = (void *) CALLOC (1, sizeof (*conf));
+ conf = (void *) GF_CALLOC (1, sizeof (*conf),
+ gf_iot_mt_iot_conf_t);
if (conf == NULL) {
gf_log (this->name, GF_LOG_ERROR,
"out of memory");
goto out;
}
- if ((dict_get_str (options, "autoscaling", &scalestr)) == 0) {
- if ((gf_string2boolean (scalestr, &autoscaling)) == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "'autoscaling' option must be"
- " boolean");
- goto out;
- }
+ if ((ret = pthread_cond_init(&conf->cond, NULL)) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "pthread_cond_init failed (%d)", ret);
+ goto out;
}
- if (dict_get (options, "thread-count")) {
- thread_count = data_to_int32 (dict_get (options,
- "thread-count"));
- if (scalestr != NULL)
- gf_log (this->name, GF_LOG_WARNING,
- "'thread-count' is specified with "
- "'autoscaling' on. Ignoring"
- "'thread-count' option.");
- if (thread_count < 2)
- thread_count = IOT_MIN_THREADS;
- }
-
- min_threads = IOT_DEFAULT_THREADS;
- max_threads = IOT_MAX_THREADS;
- if (dict_get (options, "min-threads"))
- min_threads = data_to_int32 (dict_get (options,
- "min-threads"));
-
- if (dict_get (options, "max-threads"))
- max_threads = data_to_int32 (dict_get (options,
- "max-threads"));
-
- if (min_threads > max_threads) {
- gf_log (this->name, GF_LOG_ERROR, " min-threads must be less "
- "than max-threads");
+ if ((ret = pthread_mutex_init(&conf->mutex, NULL)) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "pthread_mutex_init failed (%d)", ret);
goto out;
}
- /* If autoscaling is off, then adjust the min and max
- * threads according to thread-count.
- * This is based on the assumption that despite autoscaling
- * being off, we still want to have separate pools for data
- * and meta-data threads.
- */
- if (!autoscaling)
- max_threads = min_threads = thread_count;
-
- /* If user specifies an odd number of threads, increase it by
- * one. The reason for having an even number of threads is
- * explained later.
- */
- if (max_threads % 2)
- max_threads++;
-
- if(min_threads % 2)
- min_threads++;
-
- /* If the user wants to have only a single thread for
- * some strange reason, make sure we set this count to
- * 2. Explained later.
- */
- if (min_threads < IOT_MIN_THREADS)
- min_threads = IOT_MIN_THREADS;
-
- /* Again, have atleast two. Read on. */
- if (max_threads < IOT_MIN_THREADS)
- max_threads = IOT_MIN_THREADS;
-
- /* This is why we need atleast two threads.
- * We're dividing the specified thread pool into
- * 2 halves, equally between ordered and unordered
- * pools.
- */
-
- /* Init params for un-ordered workers. */
- pthread_mutex_init (&conf->utlock, NULL);
- conf->max_u_threads = max_threads / 2;
- conf->min_u_threads = min_threads / 2;
- conf->u_idle_time = IOT_DEFAULT_IDLE;
- conf->u_scaling = autoscaling;
-
- /* Init params for ordered workers. */
- pthread_mutex_init (&conf->otlock, NULL);
- conf->max_o_threads = max_threads / 2;
- conf->min_o_threads = min_threads / 2;
- conf->o_idle_time = IOT_DEFAULT_IDLE;
- conf->o_scaling = autoscaling;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "io-threads: Autoscaling: %s, "
- "min_threads: %d, max_threads: %d",
- (autoscaling) ? "on":"off", min_threads, max_threads);
+ set_stack_size (conf);
+
+ GF_OPTION_INIT ("thread-count", conf->max_count, int32, out);
+
+ GF_OPTION_INIT ("high-prio-threads",
+ conf->ac_iot_limit[IOT_PRI_HI], int32, out);
+
+ GF_OPTION_INIT ("normal-prio-threads",
+ conf->ac_iot_limit[IOT_PRI_NORMAL], int32, out);
+
+ GF_OPTION_INIT ("low-prio-threads",
+ conf->ac_iot_limit[IOT_PRI_LO], int32, out);
+
+ GF_OPTION_INIT ("least-prio-threads",
+ conf->ac_iot_limit[IOT_PRI_LEAST], int32, out);
+
+ GF_OPTION_INIT ("idle-time", conf->idle_time, int32, out);
+ GF_OPTION_INIT ("enable-least-priority", conf->least_priority,
+ bool, out);
+
+ GF_OPTION_INIT("least-rate-limit", conf->throttle.rate_limit, int32,
+ out);
+ if ((ret = pthread_mutex_init(&conf->throttle.lock, NULL)) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "pthread_mutex_init failed (%d)", ret);
+ goto out;
+ }
conf->this = this;
- ret = workers_init (conf);
+
+ for (i = 0; i < IOT_PRI_MAX; i++) {
+ INIT_LIST_HEAD (&conf->reqs[i]);
+ }
+
+ ret = iot_workers_scale (conf);
+
if (ret == -1) {
gf_log (this->name, GF_LOG_ERROR,
"cannot initialize worker threads, exiting init");
- FREE (conf);
goto out;
}
this->private = conf;
ret = 0;
out:
+ if (ret)
+ GF_FREE (conf);
+
return ret;
}
@@ -2950,90 +2836,129 @@ fini (xlator_t *this)
{
iot_conf_t *conf = this->private;
- FREE (conf);
+ GF_FREE (conf);
this->private = NULL;
return;
}
-/*
- * O - Goes to ordered threadpool.
- * U - Goes to un-ordered threadpool.
- * V - Variable, depends on whether the file is open.
- * If it is, then goes to ordered, otherwise to
- * un-ordered.
- */
-struct xlator_fops fops = {
- .open = iot_open, /* U */
- .create = iot_create, /* U */
- .readv = iot_readv, /* O */
- .writev = iot_writev, /* O */
- .flush = iot_flush, /* O */
- .fsync = iot_fsync, /* O */
- .lk = iot_lk, /* O */
- .stat = iot_stat, /* V */
- .fstat = iot_fstat, /* O */
- .truncate = iot_truncate, /* V */
- .ftruncate = iot_ftruncate, /* O */
- .utimens = iot_utimens, /* V */
- .checksum = iot_checksum, /* U */
- .unlink = iot_unlink, /* U */
- .lookup = iot_lookup, /* U */
- .chmod = iot_chmod, /* V */
- .fchmod = iot_fchmod, /* O */
- .chown = iot_chown, /* V */
- .fchown = iot_fchown, /* O */
- .access = iot_access, /* U */
- .readlink = iot_readlink, /* U */
- .mknod = iot_mknod, /* U */
- .mkdir = iot_mkdir, /* U */
- .rmdir = iot_rmdir, /* U */
- .symlink = iot_symlink, /* U */
- .rename = iot_rename, /* U */
- .link = iot_link, /* U */
- .opendir = iot_opendir, /* U */
- .fsyncdir = iot_fsyncdir, /* O */
- .statfs = iot_statfs, /* U */
- .setxattr = iot_setxattr, /* U */
- .getxattr = iot_getxattr, /* U */
- .fgetxattr = iot_fgetxattr, /* O */
- .fsetxattr = iot_fsetxattr, /* O */
- .removexattr = iot_removexattr, /* U */
- .readdir = iot_readdir, /* O */
- .xattrop = iot_xattrop, /* U */
- .fxattrop = iot_fxattrop, /* O */
+struct xlator_dumpops dumpops = {
+ .priv = iot_priv_dump,
};
-struct xlator_mops mops = {
+struct xlator_fops fops = {
+ .open = iot_open,
+ .create = iot_create,
+ .readv = iot_readv,
+ .writev = iot_writev,
+ .flush = iot_flush,
+ .fsync = iot_fsync,
+ .lk = iot_lk,
+ .stat = iot_stat,
+ .fstat = iot_fstat,
+ .truncate = iot_truncate,
+ .ftruncate = iot_ftruncate,
+ .unlink = iot_unlink,
+ .lookup = iot_lookup,
+ .setattr = iot_setattr,
+ .fsetattr = iot_fsetattr,
+ .access = iot_access,
+ .readlink = iot_readlink,
+ .mknod = iot_mknod,
+ .mkdir = iot_mkdir,
+ .rmdir = iot_rmdir,
+ .symlink = iot_symlink,
+ .rename = iot_rename,
+ .link = iot_link,
+ .opendir = iot_opendir,
+ .fsyncdir = iot_fsyncdir,
+ .statfs = iot_statfs,
+ .setxattr = iot_setxattr,
+ .getxattr = iot_getxattr,
+ .fgetxattr = iot_fgetxattr,
+ .fsetxattr = iot_fsetxattr,
+ .removexattr = iot_removexattr,
+ .fremovexattr = iot_fremovexattr,
+ .readdir = iot_readdir,
+ .readdirp = iot_readdirp,
+ .inodelk = iot_inodelk,
+ .finodelk = iot_finodelk,
+ .entrylk = iot_entrylk,
+ .fentrylk = iot_fentrylk,
+ .xattrop = iot_xattrop,
+ .fxattrop = iot_fxattrop,
+ .rchecksum = iot_rchecksum,
+ .fallocate = iot_fallocate,
+ .discard = iot_discard,
+ .zerofill = iot_zerofill,
};
-struct xlator_cbks cbks = {
-};
+struct xlator_cbks cbks;
struct volume_options options[] = {
- { .key = {"thread-count"},
- .type = GF_OPTION_TYPE_INT,
- .min = IOT_MIN_THREADS,
- .max = IOT_MAX_THREADS
+ { .key = {"thread-count"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = IOT_MIN_THREADS,
+ .max = IOT_MAX_THREADS,
+ .default_value = "16",
+ .description = "Number of threads in IO threads translator which "
+ "perform concurrent IO operations"
+
+ },
+ { .key = {"high-prio-threads"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = IOT_MIN_THREADS,
+ .max = IOT_MAX_THREADS,
+ .default_value = "16",
+ .description = "Max number of threads in IO threads translator which "
+ "perform high priority IO operations at a given time"
+
+ },
+ { .key = {"normal-prio-threads"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = IOT_MIN_THREADS,
+ .max = IOT_MAX_THREADS,
+ .default_value = "16",
+ .description = "Max number of threads in IO threads translator which "
+ "perform normal priority IO operations at a given time"
+
+ },
+ { .key = {"low-prio-threads"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = IOT_MIN_THREADS,
+ .max = IOT_MAX_THREADS,
+ .default_value = "16",
+ .description = "Max number of threads in IO threads translator which "
+ "perform low priority IO operations at a given time"
+
},
- { .key = {"autoscaling"},
- .type = GF_OPTION_TYPE_BOOL
+ { .key = {"least-prio-threads"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = IOT_MIN_THREADS,
+ .max = IOT_MAX_THREADS,
+ .default_value = "1",
+ .description = "Max number of threads in IO threads translator which "
+ "perform least priority IO operations at a given time"
+ },
+ { .key = {"enable-least-priority"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Enable/Disable least priority"
},
- { .key = {"min-threads"},
- .type = GF_OPTION_TYPE_INT,
- .min = IOT_MIN_THREADS,
- .max = IOT_MAX_THREADS,
- .description = "Minimum number of threads must be greater than or "
- "equal to 2. If the specified value is less than 2 "
- "it is adjusted upwards to 2. This is a requirement"
- " for the current model of threading in io-threads."
+ {.key = {"idle-time"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 0x7fffffff,
+ .default_value = "120",
},
- { .key = {"max-threads"},
- .type = GF_OPTION_TYPE_INT,
- .min = IOT_MIN_THREADS,
- .max = IOT_MAX_THREADS,
- .description = "Maximum number of threads is advisory only so the "
- "user specified value will be used."
+ {.key = {"least-rate-limit"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = INT_MAX,
+ .default_value = "0",
+ .description = "Max number of least priority operations to handle "
+ "per-second"
+ },
+ { .key = {NULL},
},
- { .key = {NULL} },
};
diff --git a/xlators/performance/io-threads/src/io-threads.h b/xlators/performance/io-threads/src/io-threads.h
index 3e806aa78..1a9dee9ae 100644
--- a/xlators/performance/io-threads/src/io-threads.h
+++ b/xlators/performance/io-threads/src/io-threads.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __IOT_H
@@ -35,146 +26,67 @@
#include "list.h"
#include <stdlib.h>
#include "locking.h"
+#include "iot-mem-types.h"
#include <semaphore.h>
+#include "statedump.h"
-#define min(a,b) ((a)<(b)?(a):(b))
-#define max(a,b) ((a)>(b)?(a):(b))
struct iot_conf;
-struct iot_worker;
-struct iot_request;
-
-struct iot_request {
- struct list_head list; /* Attaches this request to the list of
- requests.
- */
- call_stub_t *stub;
-};
-
-typedef enum {
- IOT_STATE_ACTIVE,
- IOT_STATE_EXIT_REQUEST,
- IOT_STATE_DEAD
-}iot_state_t;
-#define iot_worker_active(wrk) ((wrk)->state == IOT_STATE_ACTIVE)
#define MAX_IDLE_SKEW 4 /* In secs */
#define skew_sec_idle_time(sec) ((sec) + (random () % MAX_IDLE_SKEW))
-#define IOT_DEFAULT_IDLE 180 /* In secs. */
+#define IOT_DEFAULT_IDLE 120 /* In secs. */
-#define IOT_MIN_THREADS 2
+#define IOT_MIN_THREADS 1
#define IOT_DEFAULT_THREADS 16
#define IOT_MAX_THREADS 64
-#define IOT_SCALING_OFF _gf_false
-#define IOT_SCALING_ON _gf_true
-#define iot_ordered_scaling_on(conf) ((conf)->o_scaling == IOT_SCALING_ON)
-#define iot_unordered_scaling_on(conf) ((conf)->u_scaling == IOT_SCALING_ON)
#define IOT_THREAD_STACK_SIZE ((size_t)(1024*1024))
-/* This signifies the max number of outstanding request we're expecting
- * at a point for every worker thread.
- * For an idea of the memory foot-print, consider at most 16 Bytes per
- * iot_request_t on a 64-bit system with another 16 bytes per chunk in the
- * header. For 64 slots in the pool, we'll use up 2 KiB, with 64 threads this
- * goes up to 128 KiB.
- *
- * Note that this size defines the size of the per-worker mem pool. The
- * advantage is that, we're not only reducing the rate of small iot_request_t
- * allocations from the heap but also reducing the contention on the libc heap
- * by having a mem pool, though small, for each worker.
- */
-#define IOT_REQUEST_MEMPOOL_SIZE 64
-
-struct iot_worker {
- struct list_head rqlist; /* List of requests assigned to me. */
- struct iot_conf *conf;
-#ifndef HAVE_SPINLOCK
- pthread_cond_t notifier;
-#else
- sem_t notifier;
-#endif
- int64_t q,dq;
- gf_lock_t qlock;
- int32_t queue_size;
- pthread_t thread;
- iot_state_t state; /* What state is the thread in. */
- int thread_idx; /* Thread's index into the worker
- array. Since this will be thread
- local data, for ensuring that
- number of threads dont fall below
- a minimum, we just dont allow
- threads with specific indices to
- exit. Helps us in eliminating one
- place where otherwise a lock
- would have been required to update
- centralized state inside conf.
- */
- struct mem_pool *req_pool; /* iot_request_t's come from here. */
+
+typedef enum {
+ IOT_PRI_HI = 0, /* low latency */
+ IOT_PRI_NORMAL, /* normal */
+ IOT_PRI_LO, /* bulk */
+ IOT_PRI_LEAST, /* least */
+ IOT_PRI_MAX,
+} iot_pri_t;
+
+#define IOT_LEAST_THROTTLE_DELAY 1 /* sample interval in seconds */
+struct iot_least_throttle {
+ struct timeval sample_time; /* timestamp of current sample */
+ uint32_t sample_cnt; /* sample count for active interval */
+ uint32_t cached_rate; /* the most recently measured rate */
+ int32_t rate_limit; /* user-specified rate limit */
+ pthread_mutex_t lock;
};
struct iot_conf {
- int32_t thread_count;
- struct iot_worker **workers;
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+
+ int32_t max_count; /* configured maximum */
+ int32_t curr_count; /* actual number of threads running */
+ int32_t sleep_count;
+
+ int32_t idle_time; /* in seconds */
+
+ struct list_head reqs[IOT_PRI_MAX];
+
+ int32_t ac_iot_limit[IOT_PRI_MAX];
+ int32_t ac_iot_count[IOT_PRI_MAX];
+ int queue_sizes[IOT_PRI_MAX];
+ int queue_size;
+ pthread_attr_t w_attr;
+ gf_boolean_t least_priority; /*Enable/Disable least-priority */
xlator_t *this;
- /* Config state for ordered threads. */
- pthread_mutex_t otlock; /* Used to sync any state that needs
- to be changed by the ordered
- threads.
- */
-
- int max_o_threads; /* Max. number of ordered threads */
- int min_o_threads; /* Min. number of ordered threads.
- Ordered thread count never falls
- below this threshold.
- */
-
- int o_idle_time; /* in Secs. The idle time after
- which an ordered thread exits.
- */
- gf_boolean_t o_scaling; /* Set to IOT_SCALING_OFF if user
- does not want thread scaling on
- ordered threads. If scaling is
- off, io-threads maintains at
- least min_o_threads number of
- threads and never lets any thread
- exit.
- */
- struct iot_worker **oworkers; /* Ordered thread pool. */
-
-
- /* Config state for unordered threads */
- pthread_mutex_t utlock; /* Used for scaling un-ordered
- threads. */
- struct iot_worker **uworkers; /* Un-ordered thread pool. */
- int max_u_threads; /* Number of unordered threads will
- not be higher than this. */
- int min_u_threads; /* Number of unordered threads
- should not fall below this value.
- */
- int u_idle_time; /* If an unordered thread does not
- get a request for this amount of
- secs, it should try to die.
- */
- gf_boolean_t u_scaling; /* Set to IOT_SCALING_OFF if user
- does not want thread scaling on
- unordered threads. If scaling is
- off, io-threads maintains at
- least min_u_threads number of
- threads and never lets any thread
- exit.
- */
-
- pthread_attr_t w_attr; /* Used to reduce the stack size of
- the pthread worker down from the
- default of 8MiB.
- */
+ size_t stack_size;
+
+ struct iot_least_throttle throttle;
};
typedef struct iot_conf iot_conf_t;
-typedef struct iot_worker iot_worker_t;
-typedef struct iot_request iot_request_t;
#endif /* __IOT_H */
diff --git a/xlators/performance/io-threads/src/iot-mem-types.h b/xlators/performance/io-threads/src/iot-mem-types.h
new file mode 100644
index 000000000..4fa8302d1
--- /dev/null
+++ b/xlators/performance/io-threads/src/iot-mem-types.h
@@ -0,0 +1,22 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef __IOT_MEM_TYPES_H__
+#define __IOT_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_iot_mem_types_ {
+ gf_iot_mt_iot_conf_t = gf_common_mt_end + 1,
+ gf_iot_mt_end
+};
+#endif
+
diff --git a/xlators/performance/md-cache/Makefile.am b/xlators/performance/md-cache/Makefile.am
new file mode 100644
index 000000000..af437a64d
--- /dev/null
+++ b/xlators/performance/md-cache/Makefile.am
@@ -0,0 +1 @@
+SUBDIRS = src
diff --git a/xlators/performance/md-cache/src/Makefile.am b/xlators/performance/md-cache/src/Makefile.am
new file mode 100644
index 000000000..8c9f5a858
--- /dev/null
+++ b/xlators/performance/md-cache/src/Makefile.am
@@ -0,0 +1,25 @@
+xlator_LTLIBRARIES = md-cache.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance
+
+md_cache_la_LDFLAGS = -module -avoid-version
+
+md_cache_la_SOURCES = md-cache.c
+md_cache_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = md-cache-mem-types.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(CONTRIBDIR)/rbtree
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
+
+
+stat-prefetch-compat:
+ mkdir -p $(DESTDIR)$(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance
+ rm -rf $(DESTDIR)$(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance/stat-prefetch.so
+ ln -s ./md-cache.so $(DESTDIR)$(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance/stat-prefetch.so
+
+
+install-exec-local: stat-prefetch-compat
diff --git a/xlators/performance/md-cache/src/md-cache-mem-types.h b/xlators/performance/md-cache/src/md-cache-mem-types.h
new file mode 100644
index 000000000..6634cf962
--- /dev/null
+++ b/xlators/performance/md-cache/src/md-cache-mem-types.h
@@ -0,0 +1,24 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef __MDC_MEM_TYPES_H__
+#define __MDC_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_mdc_mem_types_ {
+ gf_mdc_mt_mdc_local_t = gf_common_mt_end + 1,
+ gf_mdc_mt_md_cache_t,
+ gf_mdc_mt_mdc_conf_t,
+ gf_mdc_mt_end
+};
+#endif
+
diff --git a/xlators/performance/md-cache/src/md-cache.c b/xlators/performance/md-cache/src/md-cache.c
new file mode 100644
index 000000000..84c363ad9
--- /dev/null
+++ b/xlators/performance/md-cache/src/md-cache.c
@@ -0,0 +1,2303 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "dict.h"
+#include "xlator.h"
+#include "md-cache-mem-types.h"
+#include "glusterfs-acl.h"
+#include <assert.h>
+#include <sys/time.h>
+
+
+/* TODO:
+ - cache symlink() link names and nuke symlink-cache
+ - send proper postbuf in setattr_cbk even when op_ret = -1
+*/
+
+
+struct mdc_conf {
+ int timeout;
+ gf_boolean_t cache_posix_acl;
+ gf_boolean_t cache_selinux;
+ gf_boolean_t force_readdirp;
+};
+
+
+static struct mdc_key {
+ const char *name;
+ int load;
+ int check;
+} mdc_keys[] = {
+ {
+ .name = POSIX_ACL_ACCESS_XATTR,
+ .load = 0,
+ .check = 1,
+ },
+ {
+ .name = POSIX_ACL_DEFAULT_XATTR,
+ .load = 0,
+ .check = 1,
+ },
+ {
+ .name = GF_SELINUX_XATTR_KEY,
+ .load = 0,
+ .check = 1,
+ },
+ {
+ .name = "security.capability",
+ .load = 0,
+ .check = 1,
+ },
+ {
+ .name = "gfid-req",
+ .load = 0,
+ .check = 1,
+ },
+ {
+ .name = NULL,
+ .load = 0,
+ .check = 0,
+ }
+};
+
+
+static uint64_t
+gfid_to_ino (uuid_t gfid)
+{
+ uint64_t ino = 0;
+ int i = 0, j = 0;
+
+ for (i = 15; i > (15 - 8); i--) {
+ ino += (uint64_t)(gfid[i]) << j;
+ j += 8;
+ }
+
+ return ino;
+}
+
+
+struct mdc_local;
+typedef struct mdc_local mdc_local_t;
+
+#define MDC_STACK_UNWIND(fop, frame, params ...) do { \
+ mdc_local_t *__local = NULL; \
+ xlator_t *__xl = NULL; \
+ if (frame) { \
+ __xl = frame->this; \
+ __local = frame->local; \
+ frame->local = NULL; \
+ } \
+ STACK_UNWIND_STRICT (fop, frame, params); \
+ mdc_local_wipe (__xl, __local); \
+ } while (0)
+
+
+struct md_cache {
+ ia_prot_t md_prot;
+ uint32_t md_nlink;
+ uint32_t md_uid;
+ uint32_t md_gid;
+ uint32_t md_atime;
+ uint32_t md_atime_nsec;
+ uint32_t md_mtime;
+ uint32_t md_mtime_nsec;
+ uint32_t md_ctime;
+ uint32_t md_ctime_nsec;
+ uint64_t md_rdev;
+ uint64_t md_size;
+ uint64_t md_blocks;
+ dict_t *xattr;
+ char *linkname;
+ time_t ia_time;
+ time_t xa_time;
+ gf_lock_t lock;
+};
+
+
+struct mdc_local {
+ loc_t loc;
+ loc_t loc2;
+ fd_t *fd;
+ char *linkname;
+ char *key;
+ dict_t *xattr;
+};
+
+
+int
+__mdc_inode_ctx_get (xlator_t *this, inode_t *inode, struct md_cache **mdc_p)
+{
+ int ret = 0;
+ struct md_cache *mdc = NULL;
+ uint64_t mdc_int = 0;
+
+ ret = __inode_ctx_get (inode, this, &mdc_int);
+ mdc = (void *) (long) (mdc_int);
+ if (ret == 0 && mdc_p)
+ *mdc_p = mdc;
+
+ return ret;
+}
+
+
+int
+mdc_inode_ctx_get (xlator_t *this, inode_t *inode, struct md_cache **mdc_p)
+{
+ int ret;
+
+ LOCK(&inode->lock);
+ {
+ ret = __mdc_inode_ctx_get (this, inode, mdc_p);
+ }
+ UNLOCK(&inode->lock);
+
+ return ret;
+}
+
+
+int
+__mdc_inode_ctx_set (xlator_t *this, inode_t *inode, struct md_cache *mdc)
+{
+ int ret = 0;
+ uint64_t mdc_int = 0;
+
+ mdc_int = (long) mdc;
+ ret = __inode_ctx_set (inode, this, &mdc_int);
+
+ return ret;
+}
+
+
+int
+mdc_inode_ctx_set (xlator_t *this, inode_t *inode, struct md_cache *mdc)
+{
+ int ret;
+
+ LOCK(&inode->lock);
+ {
+ ret = __mdc_inode_ctx_set (this, inode, mdc);
+ }
+ UNLOCK(&inode->lock);
+
+ return ret;
+}
+
+
+mdc_local_t *
+mdc_local_get (call_frame_t *frame)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+ if (local)
+ goto out;
+
+ local = GF_CALLOC (sizeof (*local), 1, gf_mdc_mt_mdc_local_t);
+ if (!local)
+ goto out;
+
+ frame->local = local;
+out:
+ return local;
+}
+
+
+void
+mdc_local_wipe (xlator_t *this, mdc_local_t *local)
+{
+ if (!local)
+ return;
+
+ loc_wipe (&local->loc);
+
+ loc_wipe (&local->loc2);
+
+ if (local->fd)
+ fd_unref (local->fd);
+
+ GF_FREE (local->linkname);
+
+ GF_FREE (local->key);
+
+ if (local->xattr)
+ dict_unref (local->xattr);
+
+ GF_FREE (local);
+ return;
+}
+
+
+int
+mdc_inode_wipe (xlator_t *this, inode_t *inode)
+{
+ int ret = 0;
+ uint64_t mdc_int = 0;
+ struct md_cache *mdc = NULL;
+
+ ret = inode_ctx_del (inode, this, &mdc_int);
+ if (ret != 0)
+ goto out;
+
+ mdc = (void *) (long) mdc_int;
+
+ if (mdc->xattr)
+ dict_unref (mdc->xattr);
+
+ GF_FREE (mdc->linkname);
+
+ GF_FREE (mdc);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+struct md_cache *
+mdc_inode_prep (xlator_t *this, inode_t *inode)
+{
+ int ret = 0;
+ struct md_cache *mdc = NULL;
+
+ LOCK (&inode->lock);
+ {
+ ret = __mdc_inode_ctx_get (this, inode, &mdc);
+ if (ret == 0)
+ goto unlock;
+
+ mdc = GF_CALLOC (sizeof (*mdc), 1, gf_mdc_mt_md_cache_t);
+ if (!mdc) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ goto unlock;
+ }
+
+ LOCK_INIT (&mdc->lock);
+
+ ret = __mdc_inode_ctx_set (this, inode, mdc);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ GF_FREE (mdc);
+ mdc = NULL;
+ }
+ }
+unlock:
+ UNLOCK (&inode->lock);
+
+ return mdc;
+}
+
+
+static gf_boolean_t
+is_md_cache_iatt_valid (xlator_t *this, struct md_cache *mdc)
+{
+ struct mdc_conf *conf = NULL;
+ time_t now = 0;
+ gf_boolean_t ret = _gf_true;
+ conf = this->private;
+
+ time (&now);
+
+ LOCK (&mdc->lock);
+ {
+ if (now >= (mdc->ia_time + conf->timeout))
+ ret = _gf_false;
+ }
+ UNLOCK (&mdc->lock);
+
+ return ret;
+}
+
+
+static gf_boolean_t
+is_md_cache_xatt_valid (xlator_t *this, struct md_cache *mdc)
+{
+ struct mdc_conf *conf = NULL;
+ time_t now = 0;
+ gf_boolean_t ret = _gf_true;
+
+ conf = this->private;
+
+ time (&now);
+
+ LOCK (&mdc->lock);
+ {
+ if (now >= (mdc->xa_time + conf->timeout))
+ ret = _gf_false;
+ }
+ UNLOCK (&mdc->lock);
+
+ return ret;
+}
+
+
+void
+mdc_from_iatt (struct md_cache *mdc, struct iatt *iatt)
+{
+ mdc->md_prot = iatt->ia_prot;
+ mdc->md_nlink = iatt->ia_nlink;
+ mdc->md_uid = iatt->ia_uid;
+ mdc->md_gid = iatt->ia_gid;
+ mdc->md_atime = iatt->ia_atime;
+ mdc->md_atime_nsec = iatt->ia_atime_nsec;
+ mdc->md_mtime = iatt->ia_mtime;
+ mdc->md_mtime_nsec = iatt->ia_mtime_nsec;
+ mdc->md_ctime = iatt->ia_ctime;
+ mdc->md_ctime_nsec = iatt->ia_ctime_nsec;
+ mdc->md_rdev = iatt->ia_rdev;
+ mdc->md_size = iatt->ia_size;
+ mdc->md_blocks = iatt->ia_blocks;
+}
+
+
+void
+mdc_to_iatt (struct md_cache *mdc, struct iatt *iatt)
+{
+ iatt->ia_prot = mdc->md_prot;
+ iatt->ia_nlink = mdc->md_nlink;
+ iatt->ia_uid = mdc->md_uid;
+ iatt->ia_gid = mdc->md_gid;
+ iatt->ia_atime = mdc->md_atime;
+ iatt->ia_atime_nsec = mdc->md_atime_nsec;
+ iatt->ia_mtime = mdc->md_mtime;
+ iatt->ia_mtime_nsec = mdc->md_mtime_nsec;
+ iatt->ia_ctime = mdc->md_ctime;
+ iatt->ia_ctime_nsec = mdc->md_ctime_nsec;
+ iatt->ia_rdev = mdc->md_rdev;
+ iatt->ia_size = mdc->md_size;
+ iatt->ia_blocks = mdc->md_blocks;
+}
+
+
+int
+mdc_inode_iatt_set_validate(xlator_t *this, inode_t *inode, struct iatt *prebuf,
+ struct iatt *iatt)
+{
+ int ret = -1;
+ struct md_cache *mdc = NULL;
+
+ mdc = mdc_inode_prep (this, inode);
+ if (!mdc)
+ goto out;
+
+ LOCK (&mdc->lock);
+ {
+ if (!iatt || !iatt->ia_ctime) {
+ mdc->ia_time = 0;
+ goto unlock;
+ }
+
+ /*
+ * Invalidate the inode if the mtime or ctime has changed
+ * and the prebuf doesn't match the value we have cached.
+ * TODO: writev returns with a NULL iatt due to
+ * performance/write-behind, causing invalidation on writes.
+ */
+ if (IA_ISREG(inode->ia_type) &&
+ ((iatt->ia_mtime != mdc->md_mtime) ||
+ (iatt->ia_ctime != mdc->md_ctime)))
+ if (!prebuf || (prebuf->ia_ctime != mdc->md_ctime) ||
+ (prebuf->ia_mtime != mdc->md_mtime))
+ inode_invalidate(inode);
+
+ mdc_from_iatt (mdc, iatt);
+
+ time (&mdc->ia_time);
+ }
+unlock:
+ UNLOCK (&mdc->lock);
+ ret = 0;
+out:
+ return ret;
+}
+
+int mdc_inode_iatt_set(xlator_t *this, inode_t *inode, struct iatt *iatt)
+{
+ return mdc_inode_iatt_set_validate(this, inode, NULL, iatt);
+}
+
+int
+mdc_inode_iatt_get (xlator_t *this, inode_t *inode, struct iatt *iatt)
+{
+ int ret = -1;
+ struct md_cache *mdc = NULL;
+
+ if (mdc_inode_ctx_get (this, inode, &mdc) != 0)
+ goto out;
+
+ if (!is_md_cache_iatt_valid (this, mdc))
+ goto out;
+
+ LOCK (&mdc->lock);
+ {
+ mdc_to_iatt (mdc, iatt);
+ }
+ UNLOCK (&mdc->lock);
+
+ uuid_copy (iatt->ia_gfid, inode->gfid);
+ iatt->ia_ino = gfid_to_ino (inode->gfid);
+ iatt->ia_dev = 42;
+ iatt->ia_type = inode->ia_type;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+struct updatedict {
+ dict_t *dict;
+ int ret;
+};
+
+static int
+updatefn(dict_t *dict, char *key, data_t *value, void *data)
+{
+ struct updatedict *u = data;
+ const char *mdc_key;
+ int i = 0;
+
+ for (mdc_key = mdc_keys[i].name; (mdc_key = mdc_keys[i].name); i++) {
+ if (!mdc_keys[i].check)
+ continue;
+ if (strcmp(mdc_key, key))
+ continue;
+
+ if (!u->dict) {
+ u->dict = dict_new();
+ if (!u->dict) {
+ u->ret = -1;
+ return -1;
+ }
+ }
+
+ if (dict_set(u->dict, key, value) < 0) {
+ u->ret = -1;
+ return -1;
+ }
+
+ break;
+ }
+ return 0;
+}
+
+static int
+mdc_dict_update(dict_t **tgt, dict_t *src)
+{
+ struct updatedict u = {
+ .dict = *tgt,
+ .ret = 0,
+ };
+
+ dict_foreach(src, updatefn, &u);
+
+ if (*tgt)
+ return u.ret;
+
+ if ((u.ret < 0) && u.dict) {
+ dict_unref(u.dict);
+ return u.ret;
+ }
+
+ *tgt = u.dict;
+
+ return u.ret;
+}
+
+int
+mdc_inode_xatt_set (xlator_t *this, inode_t *inode, dict_t *dict)
+{
+ int ret = -1;
+ struct md_cache *mdc = NULL;
+ dict_t *newdict = NULL;
+
+ mdc = mdc_inode_prep (this, inode);
+ if (!mdc)
+ goto out;
+
+ if (!dict)
+ goto out;
+
+ LOCK (&mdc->lock);
+ {
+ if (mdc->xattr) {
+ dict_unref (mdc->xattr);
+ mdc->xattr = NULL;
+ }
+
+ ret = mdc_dict_update(&newdict, dict);
+ if (ret < 0) {
+ UNLOCK(&mdc->lock);
+ goto out;
+ }
+
+ if (newdict)
+ mdc->xattr = newdict;
+
+ time (&mdc->xa_time);
+ }
+ UNLOCK (&mdc->lock);
+ ret = 0;
+out:
+ return ret;
+}
+
+
+int
+mdc_inode_xatt_update (xlator_t *this, inode_t *inode, dict_t *dict)
+{
+ int ret = -1;
+ struct md_cache *mdc = NULL;
+
+ mdc = mdc_inode_prep (this, inode);
+ if (!mdc)
+ goto out;
+
+ if (!dict)
+ goto out;
+
+ LOCK (&mdc->lock);
+ {
+ ret = mdc_dict_update(&mdc->xattr, dict);
+ if (ret < 0) {
+ UNLOCK(&mdc->lock);
+ goto out;
+ }
+
+ time (&mdc->xa_time);
+ }
+ UNLOCK (&mdc->lock);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+int
+mdc_inode_xatt_unset (xlator_t *this, inode_t *inode, char *name)
+{
+ int ret = -1;
+ struct md_cache *mdc = NULL;
+
+ mdc = mdc_inode_prep (this, inode);
+ if (!mdc)
+ goto out;
+
+ if (!name)
+ goto out;
+
+ LOCK (&mdc->lock);
+ {
+ dict_del (mdc->xattr, name);
+ }
+ UNLOCK (&mdc->lock);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+int
+mdc_inode_xatt_get (xlator_t *this, inode_t *inode, dict_t **dict)
+{
+ int ret = -1;
+ struct md_cache *mdc = NULL;
+
+ if (mdc_inode_ctx_get (this, inode, &mdc) != 0)
+ goto out;
+
+ if (!is_md_cache_xatt_valid (this, mdc))
+ goto out;
+
+ LOCK (&mdc->lock);
+ {
+ ret = 0;
+ /* Missing xattr only means no keys were there, i.e
+ a negative cache for the "loaded" keys
+ */
+ if (!mdc->xattr)
+ goto unlock;
+
+ if (dict)
+ *dict = dict_ref (mdc->xattr);
+ }
+unlock:
+ UNLOCK (&mdc->lock);
+
+out:
+ return ret;
+}
+
+
+int
+mdc_inode_iatt_invalidate (xlator_t *this, inode_t *inode)
+{
+ int ret = -1;
+ struct md_cache *mdc = NULL;
+
+ if (mdc_inode_ctx_get (this, inode, &mdc) != 0)
+ goto out;
+
+ LOCK (&mdc->lock);
+ {
+ mdc->ia_time = 0;
+ }
+ UNLOCK (&mdc->lock);
+
+out:
+ return ret;
+}
+
+
+int
+mdc_inode_xatt_invalidate (xlator_t *this, inode_t *inode)
+{
+ int ret = -1;
+ struct md_cache *mdc = NULL;
+
+ if (mdc_inode_ctx_get (this, inode, &mdc) != 0)
+ goto out;
+
+ LOCK (&mdc->lock);
+ {
+ mdc->xa_time = 0;
+ }
+ UNLOCK (&mdc->lock);
+
+out:
+ return ret;
+}
+
+
+void
+mdc_load_reqs (xlator_t *this, dict_t *dict)
+{
+ const char *mdc_key = NULL;
+ int i = 0;
+ int ret = 0;
+
+ for (mdc_key = mdc_keys[i].name; (mdc_key = mdc_keys[i].name); i++) {
+ if (!mdc_keys[i].load)
+ continue;
+ ret = dict_set_int8 (dict, (char *)mdc_key, 0);
+ if (ret)
+ return;
+ }
+}
+
+
+struct checkpair {
+ int ret;
+ dict_t *rsp;
+};
+
+
+static int
+is_mdc_key_satisfied (const char *key)
+{
+ const char *mdc_key = NULL;
+ int i = 0;
+
+ if (!key)
+ return 0;
+
+ for (mdc_key = mdc_keys[i].name; (mdc_key = mdc_keys[i].name); i++) {
+ if (!mdc_keys[i].load)
+ continue;
+ if (strcmp (mdc_key, key) == 0)
+ return 1;
+ }
+
+ return 0;
+}
+
+
+static int
+checkfn (dict_t *this, char *key, data_t *value, void *data)
+{
+ struct checkpair *pair = data;
+
+ if (!is_mdc_key_satisfied (key))
+ pair->ret = 0;
+
+ return 0;
+}
+
+
+int
+mdc_xattr_satisfied (xlator_t *this, dict_t *req, dict_t *rsp)
+{
+ struct checkpair pair = {
+ .ret = 1,
+ .rsp = rsp,
+ };
+
+ dict_foreach (req, checkfn, &pair);
+
+ return pair.ret;
+}
+
+
+int
+mdc_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *dict, struct iatt *postparent)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret != 0)
+ goto out;
+
+ if (!local)
+ goto out;
+
+ if (local->loc.parent) {
+ mdc_inode_iatt_set (this, local->loc.parent, postparent);
+ }
+
+ if (local->loc.inode) {
+ mdc_inode_iatt_set (this, local->loc.inode, stbuf);
+ mdc_inode_xatt_set (this, local->loc.inode, dict);
+ }
+out:
+ MDC_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, stbuf,
+ dict, postparent);
+ return 0;
+}
+
+
+int
+mdc_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ int ret = 0;
+ struct iatt stbuf = {0, };
+ struct iatt postparent = {0, };
+ dict_t *xattr_rsp = NULL;
+ dict_t *xattr_alloc = NULL;
+ mdc_local_t *local = NULL;
+
+
+ local = mdc_local_get (frame);
+ if (!local)
+ goto uncached;
+
+ if (!loc->name)
+ /* A nameless discovery is dangerous to cache. We
+ perform nameless lookup with the intention of
+ re-establishing an inode "properly"
+ */
+ goto uncached;
+
+ loc_copy (&local->loc, loc);
+
+ ret = mdc_inode_iatt_get (this, loc->inode, &stbuf);
+ if (ret != 0)
+ goto uncached;
+
+ if (xdata) {
+ ret = mdc_inode_xatt_get (this, loc->inode, &xattr_rsp);
+ if (ret != 0)
+ goto uncached;
+
+ if (!mdc_xattr_satisfied (this, xdata, xattr_rsp))
+ goto uncached;
+ }
+
+ MDC_STACK_UNWIND (lookup, frame, 0, 0, loc->inode, &stbuf,
+ xattr_rsp, &postparent);
+
+ if (xattr_rsp)
+ dict_unref (xattr_rsp);
+
+ return 0;
+
+uncached:
+ if (!xdata)
+ xdata = xattr_alloc = dict_new ();
+ if (xdata)
+ mdc_load_reqs (this, xdata);
+
+ STACK_WIND (frame, mdc_lookup_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->lookup, loc, xdata);
+
+ if (xattr_rsp)
+ dict_unref (xattr_rsp);
+ if (xattr_alloc)
+ dict_unref (xattr_alloc);
+ return 0;
+}
+
+
+int
+mdc_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ if (op_ret != 0)
+ goto out;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ mdc_inode_iatt_set (this, local->loc.inode, buf);
+
+out:
+ MDC_STACK_UNWIND (stat, frame, op_ret, op_errno, buf, xdata);
+
+ return 0;
+}
+
+
+int
+mdc_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ int ret;
+ struct iatt stbuf;
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get (frame);
+ if (!local)
+ goto uncached;
+
+ loc_copy (&local->loc, loc);
+
+ ret = mdc_inode_iatt_get (this, loc->inode, &stbuf);
+ if (ret != 0)
+ goto uncached;
+
+ MDC_STACK_UNWIND (stat, frame, 0, 0, &stbuf, xdata);
+
+ return 0;
+
+uncached:
+ STACK_WIND (frame, mdc_stat_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->stat,
+ loc, xdata);
+ return 0;
+}
+
+
+int
+mdc_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ if (op_ret != 0)
+ goto out;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ mdc_inode_iatt_set (this, local->fd->inode, buf);
+
+out:
+ MDC_STACK_UNWIND (fstat, frame, op_ret, op_errno, buf, xdata);
+
+ return 0;
+}
+
+
+int
+mdc_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ int ret;
+ struct iatt stbuf;
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get (frame);
+ if (!local)
+ goto uncached;
+
+ local->fd = fd_ref (fd);
+
+ ret = mdc_inode_iatt_get (this, fd->inode, &stbuf);
+ if (ret != 0)
+ goto uncached;
+
+ MDC_STACK_UNWIND (fstat, frame, 0, 0, &stbuf, xdata);
+
+ return 0;
+
+uncached:
+ STACK_WIND (frame, mdc_fstat_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->fstat,
+ fd, xdata);
+ return 0;
+}
+
+
+int
+mdc_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret != 0)
+ goto out;
+
+ if (!local)
+ goto out;
+
+ mdc_inode_iatt_set_validate(this, local->loc.inode, prebuf, postbuf);
+
+out:
+ MDC_STACK_UNWIND (truncate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+
+ return 0;
+}
+
+
+int
+mdc_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ off_t offset, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get (frame);
+
+ local->loc.inode = inode_ref (loc->inode);
+
+ STACK_WIND (frame, mdc_truncate_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->truncate,
+ loc, offset, xdata);
+ return 0;
+}
+
+
+int
+mdc_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret != 0)
+ goto out;
+
+ if (!local)
+ goto out;
+
+ mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf);
+
+out:
+ MDC_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+
+ return 0;
+}
+
+
+int
+mdc_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get (frame);
+
+ local->fd = fd_ref (fd);
+
+ STACK_WIND (frame, mdc_ftruncate_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->ftruncate,
+ fd, offset, xdata);
+ return 0;
+}
+
+
+int
+mdc_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret != 0)
+ goto out;
+
+ if (!local)
+ goto out;
+
+ if (local->loc.parent) {
+ mdc_inode_iatt_set (this, local->loc.parent, postparent);
+ }
+
+ if (local->loc.inode) {
+ mdc_inode_iatt_set (this, local->loc.inode, buf);
+ mdc_inode_xatt_set (this, local->loc.inode, local->xattr);
+ }
+out:
+ MDC_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+
+int
+mdc_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get (frame);
+
+ loc_copy (&local->loc, loc);
+ local->xattr = dict_ref (xdata);
+
+ STACK_WIND (frame, mdc_mknod_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod,
+ loc, mode, rdev, umask, xdata);
+ return 0;
+}
+
+
+int
+mdc_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret != 0)
+ goto out;
+
+ if (!local)
+ goto out;
+
+ if (local->loc.parent) {
+ mdc_inode_iatt_set (this, local->loc.parent, postparent);
+ }
+
+ if (local->loc.inode) {
+ mdc_inode_iatt_set (this, local->loc.inode, buf);
+ mdc_inode_xatt_set (this, local->loc.inode, local->xattr);
+ }
+out:
+ MDC_STACK_UNWIND (mkdir, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+
+int
+mdc_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ mode_t mode, mode_t umask, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get (frame);
+
+ loc_copy (&local->loc, loc);
+ local->xattr = dict_ref (xdata);
+
+ STACK_WIND (frame, mdc_mkdir_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir,
+ loc, mode, umask, xdata);
+ return 0;
+}
+
+
+int
+mdc_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret != 0)
+ goto out;
+
+ if (!local)
+ goto out;
+
+ if (local->loc.parent) {
+ mdc_inode_iatt_set (this, local->loc.parent, postparent);
+ }
+
+ if (local->loc.inode) {
+ mdc_inode_iatt_set (this, local->loc.inode, NULL);
+ }
+
+out:
+ MDC_STACK_UNWIND (unlink, frame, op_ret, op_errno,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+
+int
+mdc_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t xflag,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get (frame);
+
+ loc_copy (&local->loc, loc);
+
+ STACK_WIND (frame, mdc_unlink_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->unlink,
+ loc, xflag, xdata);
+ return 0;
+}
+
+
+int
+mdc_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret != 0)
+ goto out;
+
+ if (!local)
+ goto out;
+
+ if (local->loc.parent) {
+ mdc_inode_iatt_set (this, local->loc.parent, postparent);
+ }
+
+out:
+ MDC_STACK_UNWIND (rmdir, frame, op_ret, op_errno,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+
+int
+mdc_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flag,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get (frame);
+
+ loc_copy (&local->loc, loc);
+
+ STACK_WIND (frame, mdc_rmdir_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->rmdir,
+ loc, flag, xdata);
+ return 0;
+}
+
+
+int
+mdc_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret != 0)
+ goto out;
+
+ if (!local)
+ goto out;
+
+ if (local->loc.parent) {
+ mdc_inode_iatt_set (this, local->loc.parent, postparent);
+ }
+
+ if (local->loc.inode) {
+ mdc_inode_iatt_set (this, local->loc.inode, buf);
+ }
+out:
+ MDC_STACK_UNWIND (symlink, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+
+int
+mdc_symlink (call_frame_t *frame, xlator_t *this, const char *linkname,
+ loc_t *loc, mode_t umask, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get (frame);
+
+ loc_copy (&local->loc, loc);
+
+ local->linkname = gf_strdup (linkname);
+
+ STACK_WIND (frame, mdc_symlink_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->symlink,
+ linkname, loc, umask, xdata);
+ return 0;
+}
+
+
+int
+mdc_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret != 0)
+ goto out;
+
+ if (!local)
+ goto out;
+
+ if (local->loc.parent) {
+ mdc_inode_iatt_set (this, local->loc.parent, postoldparent);
+ }
+
+ if (local->loc.inode) {
+ /* TODO: fix dht_rename() not to return linkfile
+ attributes before setting attributes here
+ */
+
+ mdc_inode_iatt_set (this, local->loc.inode, NULL);
+ }
+
+ if (local->loc2.parent) {
+ mdc_inode_iatt_set (this, local->loc2.parent, postnewparent);
+ }
+out:
+ MDC_STACK_UNWIND (rename, frame, op_ret, op_errno, buf,
+ preoldparent, postoldparent, prenewparent,
+ postnewparent, xdata);
+ return 0;
+}
+
+
+int
+mdc_rename (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get (frame);
+
+ loc_copy (&local->loc, oldloc);
+ loc_copy (&local->loc2, newloc);
+
+ STACK_WIND (frame, mdc_rename_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->rename,
+ oldloc, newloc, xdata);
+ return 0;
+}
+
+
+int
+mdc_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret != 0)
+ goto out;
+
+ if (!local)
+ goto out;
+
+ if (local->loc.inode) {
+ mdc_inode_iatt_set (this, local->loc.inode, buf);
+ }
+
+ if (local->loc2.parent) {
+ mdc_inode_iatt_set (this, local->loc2.parent, postparent);
+ }
+out:
+ MDC_STACK_UNWIND (link, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+
+int
+mdc_link (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get (frame);
+
+ loc_copy (&local->loc, oldloc);
+ loc_copy (&local->loc2, newloc);
+
+ STACK_WIND (frame, mdc_link_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->link,
+ oldloc, newloc, xdata);
+ return 0;
+}
+
+
+int
+mdc_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret != 0)
+ goto out;
+
+ if (!local)
+ goto out;
+
+ if (local->loc.parent) {
+ mdc_inode_iatt_set (this, local->loc.parent, postparent);
+ }
+
+ if (local->loc.inode) {
+ mdc_inode_iatt_set (this, inode, buf);
+ mdc_inode_xatt_set (this, local->loc.inode, local->xattr);
+ }
+out:
+ MDC_STACK_UNWIND (create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+
+int
+mdc_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get (frame);
+
+ loc_copy (&local->loc, loc);
+ local->xattr = dict_ref (xdata);
+
+ STACK_WIND (frame, mdc_create_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->create,
+ loc, flags, mode, umask, fd, xdata);
+ return 0;
+}
+
+
+int
+mdc_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iovec *vector, int32_t count,
+ struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret != 0)
+ goto out;
+
+ if (!local)
+ goto out;
+
+ mdc_inode_iatt_set (this, local->fd->inode, stbuf);
+
+out:
+ MDC_STACK_UNWIND (readv, frame, op_ret, op_errno, vector, count,
+ stbuf, iobref, xdata);
+
+ return 0;
+}
+
+
+int
+mdc_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get (frame);
+
+ local->fd = fd_ref (fd);
+
+ STACK_WIND (frame, mdc_readv_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->readv,
+ fd, size, offset, flags, xdata);
+ return 0;
+}
+
+
+int
+mdc_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret == -1)
+ goto out;
+
+ if (!local)
+ goto out;
+
+ mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf);
+
+out:
+ MDC_STACK_UNWIND (writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+
+ return 0;
+}
+
+
+int
+mdc_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
+ int count, off_t offset, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get (frame);
+
+ local->fd = fd_ref (fd);
+
+ STACK_WIND (frame, mdc_writev_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev,
+ fd, vector, count, offset, flags, iobref, xdata);
+ return 0;
+}
+
+
+int
+mdc_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret != 0) {
+ mdc_inode_iatt_set (this, local->loc.inode, NULL);
+ goto out;
+ }
+
+ if (!local)
+ goto out;
+
+ mdc_inode_iatt_set_validate(this, local->loc.inode, prebuf, postbuf);
+
+out:
+ MDC_STACK_UNWIND (setattr, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+
+ return 0;
+}
+
+
+int
+mdc_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int valid, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get (frame);
+
+ loc_copy (&local->loc, loc);
+
+ STACK_WIND (frame, mdc_setattr_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->setattr,
+ loc, stbuf, valid, xdata);
+ return 0;
+}
+
+
+int
+mdc_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret != 0)
+ goto out;
+
+ if (!local)
+ goto out;
+
+ mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf);
+
+out:
+ MDC_STACK_UNWIND (fsetattr, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+
+ return 0;
+}
+
+
+int
+mdc_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int valid, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get (frame);
+
+ local->fd = fd_ref (fd);
+
+ STACK_WIND (frame, mdc_fsetattr_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsetattr,
+ fd, stbuf, valid, xdata);
+ return 0;
+}
+
+
+int
+mdc_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret != 0)
+ goto out;
+
+ if (!local)
+ goto out;
+
+ mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf);
+
+out:
+ MDC_STACK_UNWIND (fsync, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+
+ return 0;
+}
+
+
+int
+mdc_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get (frame);
+
+ local->fd = fd_ref (fd);
+
+ STACK_WIND (frame, mdc_fsync_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync,
+ fd, datasync, xdata);
+ return 0;
+}
+
+
+int
+mdc_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret != 0)
+ goto out;
+
+ if (!local)
+ goto out;
+
+ mdc_inode_xatt_update (this, local->loc.inode, local->xattr);
+
+ mdc_inode_iatt_invalidate (this, local->loc.inode);
+
+out:
+ MDC_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+
+int
+mdc_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xattr, int flags, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get (frame);
+
+ loc_copy (&local->loc, loc);
+ local->xattr = dict_ref (xattr);
+
+ STACK_WIND (frame, mdc_setxattr_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr,
+ loc, xattr, flags, xdata);
+ return 0;
+}
+
+
+int
+mdc_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret != 0)
+ goto out;
+
+ if (!local)
+ goto out;
+
+ mdc_inode_xatt_update (this, local->fd->inode, local->xattr);
+
+ mdc_inode_iatt_invalidate (this, local->fd->inode);
+out:
+ MDC_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+
+int
+mdc_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *xattr, int flags, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get (frame);
+
+ local->fd = fd_ref (fd);
+ local->xattr = dict_ref (xattr);
+
+ STACK_WIND (frame, mdc_fsetxattr_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsetxattr,
+ fd, xattr, flags, xdata);
+ return 0;
+}
+
+int
+mdc_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ if (op_ret != 0)
+ goto out;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ mdc_inode_xatt_update (this, local->loc.inode, xattr);
+
+out:
+ MDC_STACK_UNWIND (getxattr, frame, op_ret, op_errno, xattr, xdata);
+
+ return 0;
+}
+
+
+int
+mdc_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key,
+ dict_t *xdata)
+{
+ int ret;
+ int op_errno = ENODATA;
+ mdc_local_t *local = NULL;
+ dict_t *xattr = NULL;
+
+ local = mdc_local_get (frame);
+ if (!local)
+ goto uncached;
+
+ loc_copy (&local->loc, loc);
+
+ if (!is_mdc_key_satisfied (key))
+ goto uncached;
+
+ ret = mdc_inode_xatt_get (this, loc->inode, &xattr);
+ if (ret != 0)
+ goto uncached;
+
+ if (!xattr || !dict_get (xattr, (char *)key)) {
+ ret = -1;
+ op_errno = ENODATA;
+ }
+
+ MDC_STACK_UNWIND (getxattr, frame, ret, op_errno, xattr, xdata);
+
+ return 0;
+
+uncached:
+ STACK_WIND (frame, mdc_getxattr_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->getxattr,
+ loc, key, xdata);
+ return 0;
+}
+
+
+int
+mdc_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ if (op_ret != 0)
+ goto out;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ mdc_inode_xatt_update (this, local->fd->inode, xattr);
+
+out:
+ MDC_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, xattr, xdata);
+
+ return 0;
+}
+
+
+int
+mdc_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key,
+ dict_t *xdata)
+{
+ int ret;
+ mdc_local_t *local = NULL;
+ dict_t *xattr = NULL;
+ int op_errno = ENODATA;
+
+ local = mdc_local_get (frame);
+ if (!local)
+ goto uncached;
+
+ local->fd = fd_ref (fd);
+
+ if (!is_mdc_key_satisfied (key))
+ goto uncached;
+
+ ret = mdc_inode_xatt_get (this, fd->inode, &xattr);
+ if (ret != 0)
+ goto uncached;
+
+ if (!xattr || !dict_get (xattr, (char *)key)) {
+ ret = -1;
+ op_errno = ENODATA;
+ }
+
+ MDC_STACK_UNWIND (fgetxattr, frame, ret, op_errno, xattr, xdata);
+
+ return 0;
+
+uncached:
+ STACK_WIND (frame, mdc_fgetxattr_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->fgetxattr,
+ fd, key, xdata);
+ return 0;
+}
+
+int
+mdc_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret != 0)
+ goto out;
+
+ if (!local)
+ goto out;
+
+ if (local->key)
+ mdc_inode_xatt_unset (this, local->loc.inode, local->key);
+ else
+ mdc_inode_xatt_invalidate (this, local->loc.inode);
+
+ mdc_inode_iatt_invalidate (this, local->loc.inode);
+out:
+ MDC_STACK_UNWIND (removexattr, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+
+int
+mdc_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get (frame);
+
+ loc_copy (&local->loc, loc);
+
+ local->key = gf_strdup (name);
+
+ STACK_WIND (frame, mdc_removexattr_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->removexattr,
+ loc, name, xdata);
+ return 0;
+}
+
+
+int
+mdc_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret != 0)
+ goto out;
+
+ if (!local)
+ goto out;
+
+ if (local->key)
+ mdc_inode_xatt_unset (this, local->fd->inode, local->key);
+ else
+ mdc_inode_xatt_invalidate (this, local->fd->inode);
+
+ mdc_inode_iatt_invalidate (this, local->fd->inode);
+out:
+ MDC_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+
+int
+mdc_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get (frame);
+
+ local->fd = fd_ref (fd);
+
+ local->key = gf_strdup (name);
+
+ STACK_WIND (frame, mdc_fremovexattr_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->fremovexattr,
+ fd, name, xdata);
+ return 0;
+}
+
+
+int
+mdc_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *entries, dict_t *xdata)
+{
+ gf_dirent_t *entry = NULL;
+
+ if (op_ret <= 0)
+ goto unwind;
+
+ list_for_each_entry (entry, &entries->list, list) {
+ if (!entry->inode)
+ continue;
+ mdc_inode_iatt_set (this, entry->inode, &entry->d_stat);
+ mdc_inode_xatt_set (this, entry->inode, entry->dict);
+ }
+
+unwind:
+ STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata);
+ return 0;
+}
+
+
+int
+mdc_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t offset, dict_t *xdata)
+{
+ dict_t *xattr_alloc = NULL;
+
+ if (!xdata)
+ xdata = xattr_alloc = dict_new ();
+ if (xdata)
+ mdc_load_reqs (this, xdata);
+
+ STACK_WIND (frame, mdc_readdirp_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->readdirp,
+ fd, size, offset, xdata);
+ if (xattr_alloc)
+ dict_unref (xattr_alloc);
+ return 0;
+}
+
+int
+mdc_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, gf_dirent_t *entries, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, entries, xdata);
+ return 0;
+}
+
+int
+mdc_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t offset, dict_t *xdata)
+{
+ int need_unref = 0;
+ struct mdc_conf *conf = this->private;
+
+ if (!conf->force_readdirp) {
+ STACK_WIND(frame, mdc_readdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdir, fd, size, offset,
+ xdata);
+ return 0;
+ }
+
+ if (!xdata) {
+ xdata = dict_new ();
+ need_unref = 1;
+ }
+
+ if (xdata)
+ mdc_load_reqs (this, xdata);
+
+ STACK_WIND(frame, mdc_readdirp_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp, fd, size, offset,
+ xdata);
+
+ if (need_unref && xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+mdc_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret != 0)
+ goto out;
+
+ if (!local)
+ goto out;
+
+ mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf);
+
+out:
+ MDC_STACK_UNWIND (fallocate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+
+ return 0;
+}
+
+int mdc_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ mdc_local_t *local;
+
+ local = mdc_local_get(frame);
+ local->fd = fd_ref(fd);
+
+ STACK_WIND(frame, mdc_fallocate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len,
+ xdata);
+
+ return 0;
+}
+
+int
+mdc_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret != 0)
+ goto out;
+
+ if (!local)
+ goto out;
+
+ mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf);
+
+out:
+ MDC_STACK_UNWIND(discard, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+
+ return 0;
+}
+
+int mdc_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ mdc_local_t *local;
+
+ local = mdc_local_get(frame);
+ local->fd = fd_ref(fd);
+
+ STACK_WIND(frame, mdc_discard_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->discard, fd, offset, len,
+ xdata);
+
+ return 0;
+}
+
+int
+mdc_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret != 0)
+ goto out;
+
+ if (!local)
+ goto out;
+
+ mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf);
+
+out:
+ MDC_STACK_UNWIND(zerofill, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+
+ return 0;
+}
+
+int mdc_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ mdc_local_t *local;
+
+ local = mdc_local_get(frame);
+ local->fd = fd_ref(fd);
+
+ STACK_WIND(frame, mdc_zerofill_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->zerofill, fd, offset, len,
+ xdata);
+
+ return 0;
+}
+
+
+int
+mdc_forget (xlator_t *this, inode_t *inode)
+{
+ mdc_inode_wipe (this, inode);
+
+ return 0;
+}
+
+
+int
+is_strpfx (const char *str1, const char *str2)
+{
+ /* is one of the string a prefix of the other? */
+ int i;
+
+ for (i = 0; str1[i] == str2[i]; i++) {
+ if (!str1[i] || !str2[i])
+ break;
+ }
+
+ return !(str1[i] && str2[i]);
+}
+
+
+int
+mdc_key_load_set (struct mdc_key *keys, char *pattern, gf_boolean_t val)
+{
+ struct mdc_key *key = NULL;
+
+ for (key = keys; key->name; key++) {
+ if (is_strpfx (key->name, pattern))
+ key->load = val;
+ }
+
+ return 0;
+}
+
+
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ struct mdc_conf *conf = NULL;
+
+ conf = this->private;
+
+ GF_OPTION_RECONF ("md-cache-timeout", conf->timeout, options, int32, out);
+
+ GF_OPTION_RECONF ("cache-selinux", conf->cache_selinux, options, bool, out);
+ mdc_key_load_set (mdc_keys, "security.", conf->cache_selinux);
+
+ GF_OPTION_RECONF ("cache-posix-acl", conf->cache_posix_acl, options, bool, out);
+ mdc_key_load_set (mdc_keys, "system.posix_acl_", conf->cache_posix_acl);
+
+ GF_OPTION_RECONF("force-readdirp", conf->force_readdirp, options, bool, out);
+
+out:
+ return 0;
+}
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ ret = xlator_mem_acct_init (this, gf_mdc_mt_end + 1);
+ return ret;
+}
+
+int
+init (xlator_t *this)
+{
+ struct mdc_conf *conf = NULL;
+
+ conf = GF_CALLOC (sizeof (*conf), 1, gf_mdc_mt_mdc_conf_t);
+ if (!conf) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory");
+ return -1;
+ }
+
+ GF_OPTION_INIT ("md-cache-timeout", conf->timeout, int32, out);
+
+ GF_OPTION_INIT ("cache-selinux", conf->cache_selinux, bool, out);
+ mdc_key_load_set (mdc_keys, "security.", conf->cache_selinux);
+
+ GF_OPTION_INIT ("cache-posix-acl", conf->cache_posix_acl, bool, out);
+ mdc_key_load_set (mdc_keys, "system.posix_acl_", conf->cache_posix_acl);
+
+ GF_OPTION_INIT("force-readdirp", conf->force_readdirp, bool, out);
+out:
+ this->private = conf;
+
+ return 0;
+}
+
+
+void
+fini (xlator_t *this)
+{
+ return;
+}
+
+
+struct xlator_fops fops = {
+ .lookup = mdc_lookup,
+ .stat = mdc_stat,
+ .fstat = mdc_fstat,
+ .truncate = mdc_truncate,
+ .ftruncate = mdc_ftruncate,
+ .mknod = mdc_mknod,
+ .mkdir = mdc_mkdir,
+ .unlink = mdc_unlink,
+ .rmdir = mdc_rmdir,
+ .symlink = mdc_symlink,
+ .rename = mdc_rename,
+ .link = mdc_link,
+ .create = mdc_create,
+ .readv = mdc_readv,
+ .writev = mdc_writev,
+ .setattr = mdc_setattr,
+ .fsetattr = mdc_fsetattr,
+ .fsync = mdc_fsync,
+ .setxattr = mdc_setxattr,
+ .fsetxattr = mdc_fsetxattr,
+ .getxattr = mdc_getxattr,
+ .fgetxattr = mdc_fgetxattr,
+ .removexattr = mdc_removexattr,
+ .fremovexattr= mdc_fremovexattr,
+ .readdirp = mdc_readdirp,
+ .readdir = mdc_readdir,
+ .fallocate = mdc_fallocate,
+ .discard = mdc_discard,
+ .zerofill = mdc_zerofill,
+};
+
+
+struct xlator_cbks cbks = {
+ .forget = mdc_forget,
+};
+
+struct volume_options options[] = {
+ { .key = {"cache-selinux"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ },
+ { .key = {"cache-posix-acl"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ },
+ { .key = {"md-cache-timeout"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = 60,
+ .default_value = "1",
+ .description = "Time period after which cache has to be refreshed",
+ },
+ { .key = {"force-readdirp"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "true",
+ .description = "Convert all readdir requests to readdirplus to "
+ "collect stat info on each entry.",
+ },
+ { .key = {NULL} },
+};
diff --git a/xlators/performance/open-behind/Makefile.am b/xlators/performance/open-behind/Makefile.am
new file mode 100644
index 000000000..af437a64d
--- /dev/null
+++ b/xlators/performance/open-behind/Makefile.am
@@ -0,0 +1 @@
+SUBDIRS = src
diff --git a/xlators/performance/open-behind/src/Makefile.am b/xlators/performance/open-behind/src/Makefile.am
new file mode 100644
index 000000000..125285707
--- /dev/null
+++ b/xlators/performance/open-behind/src/Makefile.am
@@ -0,0 +1,15 @@
+xlator_LTLIBRARIES = open-behind.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance
+
+open_behind_la_LDFLAGS = -module -avoid-version
+
+open_behind_la_SOURCES = open-behind.c
+open_behind_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = open-behind-mem-types.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/performance/open-behind/src/open-behind-mem-types.h b/xlators/performance/open-behind/src/open-behind-mem-types.h
new file mode 100644
index 000000000..1e94296f4
--- /dev/null
+++ b/xlators/performance/open-behind/src/open-behind-mem-types.h
@@ -0,0 +1,21 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __OB_MEM_TYPES_H__
+#define __OB_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_ob_mem_types_ {
+ gf_ob_mt_fd_t = gf_common_mt_end + 1,
+ gf_ob_mt_conf_t,
+ gf_ob_mt_end
+};
+#endif
diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c
new file mode 100644
index 000000000..7e5b57278
--- /dev/null
+++ b/xlators/performance/open-behind/src/open-behind.c
@@ -0,0 +1,1001 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "open-behind-mem-types.h"
+#include "xlator.h"
+#include "statedump.h"
+#include "call-stub.h"
+#include "defaults.h"
+
+typedef struct ob_conf {
+ gf_boolean_t use_anonymous_fd; /* use anonymous FDs wherever safe
+ e.g - fstat() readv()
+
+ whereas for fops like writev(), lk(),
+ the fd is important for side effects
+ like mandatory locks
+ */
+ gf_boolean_t lazy_open; /* delay backend open as much as possible */
+} ob_conf_t;
+
+
+typedef struct ob_fd {
+ call_frame_t *open_frame;
+ loc_t loc;
+ dict_t *xdata;
+ int flags;
+ int op_errno;
+ struct list_head list;
+} ob_fd_t;
+
+
+ob_fd_t *
+__ob_fd_ctx_get (xlator_t *this, fd_t *fd)
+{
+ uint64_t value = 0;
+ int ret = -1;
+ ob_fd_t *ob_fd = NULL;
+
+ ret = __fd_ctx_get (fd, this, &value);
+ if (ret)
+ return NULL;
+
+ ob_fd = (void *) ((long) value);
+
+ return ob_fd;
+}
+
+
+ob_fd_t *
+ob_fd_ctx_get (xlator_t *this, fd_t *fd)
+{
+ ob_fd_t *ob_fd = NULL;
+
+ LOCK (&fd->lock);
+ {
+ ob_fd = __ob_fd_ctx_get (this, fd);
+ }
+ UNLOCK (&fd->lock);
+
+ return ob_fd;
+}
+
+
+int
+__ob_fd_ctx_set (xlator_t *this, fd_t *fd, ob_fd_t *ob_fd)
+{
+ uint64_t value = 0;
+ int ret = -1;
+
+ value = (long) ((void *) ob_fd);
+
+ ret = __fd_ctx_set (fd, this, value);
+
+ return ret;
+}
+
+
+int
+ob_fd_ctx_set (xlator_t *this, fd_t *fd, ob_fd_t *ob_fd)
+{
+ int ret = -1;
+
+ LOCK (&fd->lock);
+ {
+ ret = __ob_fd_ctx_set (this, fd, ob_fd);
+ }
+ UNLOCK (&fd->lock);
+
+ return ret;
+}
+
+
+ob_fd_t *
+ob_fd_new (void)
+{
+ ob_fd_t *ob_fd = NULL;
+
+ ob_fd = GF_CALLOC (1, sizeof (*ob_fd), gf_ob_mt_fd_t);
+
+ INIT_LIST_HEAD (&ob_fd->list);
+
+ return ob_fd;
+}
+
+
+void
+ob_fd_free (ob_fd_t *ob_fd)
+{
+ loc_wipe (&ob_fd->loc);
+
+ if (ob_fd->xdata)
+ dict_unref (ob_fd->xdata);
+
+ if (ob_fd->open_frame)
+ STACK_DESTROY (ob_fd->open_frame->root);
+
+ GF_FREE (ob_fd);
+}
+
+
+int
+ob_wake_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, fd_t *fd_ret, dict_t *xdata)
+{
+ fd_t *fd = NULL;
+ struct list_head list;
+ ob_fd_t *ob_fd = NULL;
+ call_stub_t *stub = NULL, *tmp = NULL;
+
+ fd = frame->local;
+ frame->local = NULL;
+
+ INIT_LIST_HEAD (&list);
+
+ LOCK (&fd->lock);
+ {
+ ob_fd = __ob_fd_ctx_get (this, fd);
+
+ list_splice_init (&ob_fd->list, &list);
+
+ if (op_ret < 0) {
+ /* mark fd BAD for ever */
+ ob_fd->op_errno = op_errno;
+ } else {
+ __fd_ctx_del (fd, this, NULL);
+ ob_fd_free (ob_fd);
+ }
+ }
+ UNLOCK (&fd->lock);
+
+ list_for_each_entry_safe (stub, tmp, &list, list) {
+ list_del_init (&stub->list);
+
+ if (op_ret < 0)
+ call_unwind_error (stub, -1, op_errno);
+ else
+ call_resume (stub);
+ }
+
+ fd_unref (fd);
+
+ STACK_DESTROY (frame->root);
+
+ return 0;
+}
+
+
+int
+ob_fd_wake (xlator_t *this, fd_t *fd)
+{
+ call_frame_t *frame = NULL;
+ ob_fd_t *ob_fd = NULL;
+
+ LOCK (&fd->lock);
+ {
+ ob_fd = __ob_fd_ctx_get (this, fd);
+ if (!ob_fd)
+ goto unlock;
+
+ frame = ob_fd->open_frame;
+ ob_fd->open_frame = NULL;
+ }
+unlock:
+ UNLOCK (&fd->lock);
+
+ if (frame) {
+ frame->local = fd_ref (fd);
+
+ STACK_WIND (frame, ob_wake_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->open,
+ &ob_fd->loc, ob_fd->flags, fd, ob_fd->xdata);
+ }
+
+ return 0;
+}
+
+
+int
+open_and_resume (xlator_t *this, fd_t *fd, call_stub_t *stub)
+{
+ ob_fd_t *ob_fd = NULL;
+ int op_errno = 0;
+
+ if (!fd)
+ goto nofd;
+
+ LOCK (&fd->lock);
+ {
+ ob_fd = __ob_fd_ctx_get (this, fd);
+ if (!ob_fd)
+ goto unlock;
+
+ if (ob_fd->op_errno) {
+ op_errno = ob_fd->op_errno;
+ goto unlock;
+ }
+
+ list_add_tail (&stub->list, &ob_fd->list);
+ }
+unlock:
+ UNLOCK (&fd->lock);
+
+nofd:
+ if (op_errno)
+ call_unwind_error (stub, -1, op_errno);
+ else if (ob_fd)
+ ob_fd_wake (this, fd);
+ else
+ call_resume (stub);
+
+ return 0;
+}
+
+
+int
+ob_open_behind (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ fd_t *fd, dict_t *xdata)
+{
+ ob_fd_t *ob_fd = NULL;
+ int ret = -1;
+ ob_conf_t *conf = NULL;
+
+
+ conf = this->private;
+
+ if (flags & O_TRUNC) {
+ STACK_WIND (frame, default_open_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->open,
+ loc, flags, fd, xdata);
+ return 0;
+ }
+
+ ob_fd = ob_fd_new ();
+ if (!ob_fd)
+ goto enomem;
+
+ ob_fd->open_frame = copy_frame (frame);
+ if (!ob_fd->open_frame)
+ goto enomem;
+ ret = loc_copy (&ob_fd->loc, loc);
+ if (ret)
+ goto enomem;
+
+ ob_fd->flags = flags;
+ if (xdata)
+ ob_fd->xdata = dict_ref (xdata);
+
+ ret = ob_fd_ctx_set (this, fd, ob_fd);
+ if (ret)
+ goto enomem;
+
+ fd_ref (fd);
+
+ STACK_UNWIND_STRICT (open, frame, 0, 0, fd, xdata);
+
+ if (!conf->lazy_open)
+ ob_fd_wake (this, fd);
+
+ fd_unref (fd);
+
+ return 0;
+enomem:
+ if (ob_fd) {
+ if (ob_fd->open_frame)
+ STACK_DESTROY (ob_fd->open_frame->root);
+ loc_wipe (&ob_fd->loc);
+ if (ob_fd->xdata)
+ dict_unref (ob_fd->xdata);
+ GF_FREE (ob_fd);
+ }
+
+ return -1;
+}
+
+
+int
+ob_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ fd_t *fd, dict_t *xdata)
+{
+ fd_t *old_fd = NULL;
+ int ret = -1;
+ int op_errno = 0;
+ call_stub_t *stub = NULL;
+
+ old_fd = fd_lookup (fd->inode, 0);
+ if (old_fd) {
+ /* open-behind only when this is the first FD */
+ stub = fop_open_stub (frame, default_open_resume,
+ loc, flags, fd, xdata);
+ if (!stub) {
+ op_errno = ENOMEM;
+ fd_unref (old_fd);
+ goto err;
+ }
+
+ open_and_resume (this, old_fd, stub);
+
+ fd_unref (old_fd);
+
+ return 0;
+ }
+
+ ret = ob_open_behind (frame, this, loc, flags, fd, xdata);
+ if (ret) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ return 0;
+err:
+ gf_log (this->name, GF_LOG_ERROR, "%s: %s", loc->path,
+ strerror (op_errno));
+
+ STACK_UNWIND_STRICT (open, frame, -1, op_errno, 0, 0);
+
+ return 0;
+}
+
+
+fd_t *
+ob_get_wind_fd (xlator_t *this, fd_t *fd)
+{
+ ob_conf_t *conf = NULL;
+ ob_fd_t *ob_fd = NULL;
+
+ conf = this->private;
+
+ ob_fd = ob_fd_ctx_get (this, fd);
+
+ if (ob_fd && conf->use_anonymous_fd)
+ return fd_anonymous (fd->inode);
+
+ return fd_ref (fd);
+}
+
+
+int
+ob_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ fd_t *wind_fd = NULL;
+
+ wind_fd = ob_get_wind_fd (this, fd);
+
+ stub = fop_readv_stub (frame, default_readv_resume, wind_fd,
+ size, offset, flags, xdata);
+ fd_unref (wind_fd);
+
+ if (!stub)
+ goto err;
+
+ open_and_resume (this, wind_fd, stub);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (readv, frame, -1, ENOMEM, 0, 0, 0, 0, 0);
+
+ return 0;
+}
+
+
+int
+ob_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *iov,
+ int count, off_t offset, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+
+ stub = fop_writev_stub (frame, default_writev_resume, fd, iov, count,
+ offset, flags, iobref, xdata);
+ if (!stub)
+ goto err;
+
+ open_and_resume (this, fd, stub);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (writev, frame, -1, ENOMEM, 0, 0, 0);
+
+ return 0;
+}
+
+
+int
+ob_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ fd_t *wind_fd = NULL;
+
+ wind_fd = ob_get_wind_fd (this, fd);
+
+ stub = fop_fstat_stub (frame, default_fstat_resume, wind_fd, xdata);
+
+ fd_unref (wind_fd);
+
+ if (!stub)
+ goto err;
+
+ open_and_resume (this, wind_fd, stub);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (fstat, frame, -1, ENOMEM, 0, 0);
+
+ return 0;
+}
+
+
+int
+ob_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ ob_fd_t *ob_fd = NULL;
+ gf_boolean_t unwind = _gf_false;
+
+ LOCK (&fd->lock);
+ {
+ ob_fd = __ob_fd_ctx_get (this, fd);
+ if (ob_fd && ob_fd->open_frame)
+ /* if open() was never wound to backend,
+ no need to wind flush() either.
+ */
+ unwind = _gf_true;
+ }
+ UNLOCK (&fd->lock);
+
+ if (unwind)
+ goto unwind;
+
+ stub = fop_flush_stub (frame, default_flush_resume, fd, xdata);
+ if (!stub)
+ goto err;
+
+ open_and_resume (this, fd, stub);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM, 0);
+
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT (flush, frame, 0, 0, 0);
+
+ return 0;
+}
+
+
+int
+ob_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int flag,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+
+ stub = fop_fsync_stub (frame, default_fsync_resume, fd, flag, xdata);
+ if (!stub)
+ goto err;
+
+ open_and_resume (this, fd, stub);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (fsync, frame, -1, ENOMEM, 0, 0, 0);
+
+ return 0;
+}
+
+
+int
+ob_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int cmd,
+ struct gf_flock *flock, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+
+ stub = fop_lk_stub (frame, default_lk_resume, fd, cmd, flock, xdata);
+ if (!stub)
+ goto err;
+
+ open_and_resume (this, fd, stub);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (lk, frame, -1, ENOMEM, 0, 0);
+
+ return 0;
+}
+
+int
+ob_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+
+ stub = fop_ftruncate_stub (frame, default_ftruncate_resume, fd, offset,
+ xdata);
+ if (!stub)
+ goto err;
+
+ open_and_resume (this, fd, stub);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (ftruncate, frame, -1, ENOMEM, 0, 0, 0);
+
+ return 0;
+}
+
+
+int
+ob_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xattr,
+ int flags, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+
+ stub = fop_fsetxattr_stub (frame, default_fsetxattr_resume, fd, xattr,
+ flags, xdata);
+ if (!stub)
+ goto err;
+
+ open_and_resume (this, fd, stub);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (fsetxattr, frame, -1, ENOMEM, 0);
+
+ return 0;
+}
+
+
+int
+ob_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+
+ stub = fop_fgetxattr_stub (frame, default_fgetxattr_resume, fd, name,
+ xdata);
+ if (!stub)
+ goto err;
+
+ open_and_resume (this, fd, stub);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (fgetxattr, frame, -1, ENOMEM, 0, 0);
+
+ return 0;
+}
+
+
+int
+ob_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+
+ stub = fop_fremovexattr_stub (frame, default_fremovexattr_resume, fd,
+ name, xdata);
+ if (!stub)
+ goto err;
+
+ open_and_resume (this, fd, stub);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (fremovexattr, frame, -1, ENOMEM, 0);
+
+ return 0;
+}
+
+
+int
+ob_finodelk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ int cmd, struct gf_flock *flock, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+
+ stub = fop_finodelk_stub (frame, default_finodelk_resume, volume, fd,
+ cmd, flock, xdata);
+ if (!stub)
+ goto err;
+
+ open_and_resume (this, fd, stub);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (finodelk, frame, -1, ENOMEM, 0);
+
+ return 0;
+}
+
+
+int
+ob_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+
+ stub = fop_fentrylk_stub (frame, default_fentrylk_resume, volume, fd,
+ basename, cmd, type, xdata);
+ if (!stub)
+ goto err;
+
+ open_and_resume (this, fd, stub);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (fentrylk, frame, -1, ENOMEM, 0);
+
+ return 0;
+}
+
+
+int
+ob_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+
+ stub = fop_fxattrop_stub (frame, default_fxattrop_resume, fd, optype,
+ xattr, xdata);
+ if (!stub)
+ goto err;
+
+ open_and_resume (this, fd, stub);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (fxattrop, frame, -1, ENOMEM, 0, 0);
+
+ return 0;
+}
+
+
+int
+ob_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *iatt, int valid, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+
+ stub = fop_fsetattr_stub (frame, default_fsetattr_resume, fd,
+ iatt, valid, xdata);
+ if (!stub)
+ goto err;
+
+ open_and_resume (this, fd, stub);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (fsetattr, frame, -1, ENOMEM, 0, 0, 0);
+
+ return 0;
+}
+
+int
+ob_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ call_stub_t *stub;
+
+ stub = fop_fallocate_stub(frame, default_fallocate_resume, fd, mode,
+ offset, len, xdata);
+ if (!stub)
+ goto err;
+
+ open_and_resume(this, fd, stub);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT(fallocate, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+}
+
+int
+ob_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ call_stub_t *stub;
+
+ stub = fop_discard_stub(frame, default_discard_resume, fd, offset, len,
+ xdata);
+ if (!stub)
+ goto err;
+
+ open_and_resume(this, fd, stub);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT(discard, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+}
+
+int
+ob_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ call_stub_t *stub;
+
+ stub = fop_zerofill_stub(frame, default_zerofill_resume, fd,
+ offset, len, xdata);
+ if (!stub)
+ goto err;
+
+ open_and_resume(this, fd, stub);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT(zerofill, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+}
+
+
+int
+ob_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags,
+ dict_t *xdata)
+{
+ fd_t *fd = NULL;
+ call_stub_t *stub = NULL;
+
+ stub = fop_unlink_stub (frame, default_unlink_resume, loc,
+ xflags, xdata);
+ if (!stub)
+ goto err;
+
+ fd = fd_lookup (loc->inode, 0);
+
+ open_and_resume (this, fd, stub);
+ if (fd)
+ fd_unref (fd);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (unlink, frame, -1, ENOMEM, 0, 0, 0);
+
+ return 0;
+}
+
+
+int
+ob_rename (call_frame_t *frame, xlator_t *this, loc_t *src, loc_t *dst,
+ dict_t *xdata)
+{
+ fd_t *fd = NULL;
+ call_stub_t *stub = NULL;
+
+ stub = fop_rename_stub (frame, default_rename_resume, src, dst, xdata);
+ if (!stub)
+ goto err;
+
+ if (dst->inode)
+ fd = fd_lookup (dst->inode, 0);
+
+ open_and_resume (this, fd, stub);
+ if (fd)
+ fd_unref (fd);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (rename, frame, -1, ENOMEM, 0, 0, 0, 0, 0, 0);
+
+ return 0;
+}
+
+
+int
+ob_release (xlator_t *this, fd_t *fd)
+{
+ ob_fd_t *ob_fd = NULL;
+
+ ob_fd = ob_fd_ctx_get (this, fd);
+
+ ob_fd_free (ob_fd);
+
+ return 0;
+}
+
+
+int
+ob_priv_dump (xlator_t *this)
+{
+ ob_conf_t *conf = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+
+ conf = this->private;
+
+ if (!conf)
+ return -1;
+
+ gf_proc_dump_build_key (key_prefix, "xlator.performance.open-behind",
+ "priv");
+
+ gf_proc_dump_add_section (key_prefix);
+
+ gf_proc_dump_write ("use_anonymous_fd", "%d", conf->use_anonymous_fd);
+
+ gf_proc_dump_write ("lazy_open", "%d", conf->lazy_open);
+
+ return 0;
+}
+
+
+int
+ob_fdctx_dump (xlator_t *this, fd_t *fd)
+{
+ ob_fd_t *ob_fd = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, };
+ int ret = 0;
+
+ ret = TRY_LOCK (&fd->lock);
+ if (ret)
+ return 0;
+
+ ob_fd = __ob_fd_ctx_get (this, fd);
+ if (!ob_fd) {
+ UNLOCK (&fd->lock);
+ return 0;
+ }
+
+ gf_proc_dump_build_key (key_prefix, "xlator.performance.open-behind",
+ "file");
+ gf_proc_dump_add_section (key_prefix);
+
+ gf_proc_dump_write ("fd", "%p", fd);
+
+ gf_proc_dump_write ("open_frame", "%p", ob_fd->open_frame);
+
+ gf_proc_dump_write ("open_frame.root.unique", "%p",
+ ob_fd->open_frame->root->unique);
+
+ gf_proc_dump_write ("loc.path", "%s", ob_fd->loc.path);
+
+ gf_proc_dump_write ("loc.ino", "%s", uuid_utoa (ob_fd->loc.gfid));
+
+ gf_proc_dump_write ("flags", "%p", ob_fd->open_frame);
+
+ UNLOCK (&fd->lock);
+
+ return 0;
+}
+
+
+int
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ ret = xlator_mem_acct_init (this, gf_ob_mt_end + 1);
+
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "Memory accounting failed");
+
+ return ret;
+}
+
+
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ ob_conf_t *conf = NULL;
+ int ret = -1;
+
+ conf = this->private;
+
+ GF_OPTION_RECONF ("use-anonymous-fd", conf->use_anonymous_fd, options,
+ bool, out);
+
+ GF_OPTION_RECONF ("lazy-open", conf->lazy_open, options, bool, out);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+int
+init (xlator_t *this)
+{
+ ob_conf_t *conf = NULL;
+
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "FATAL: volume (%s) not configured with exactly one "
+ "child", this->name);
+ return -1;
+ }
+
+ if (!this->parents)
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+
+ conf = GF_CALLOC (1, sizeof (*conf), gf_ob_mt_conf_t);
+ if (!conf)
+ goto err;
+
+ GF_OPTION_INIT ("use-anonymous-fd", conf->use_anonymous_fd, bool, err);
+
+ GF_OPTION_INIT ("lazy-open", conf->lazy_open, bool, err);
+
+ this->private = conf;
+
+ return 0;
+err:
+ if (conf)
+ GF_FREE (conf);
+
+ return -1;
+}
+
+
+void
+fini (xlator_t *this)
+{
+ ob_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ GF_FREE (conf);
+
+ return;
+}
+
+
+struct xlator_fops fops = {
+ .open = ob_open,
+ .readv = ob_readv,
+ .writev = ob_writev,
+ .flush = ob_flush,
+ .fsync = ob_fsync,
+ .fstat = ob_fstat,
+ .ftruncate = ob_ftruncate,
+ .fsetxattr = ob_fsetxattr,
+ .fgetxattr = ob_fgetxattr,
+ .fremovexattr = ob_fremovexattr,
+ .finodelk = ob_finodelk,
+ .fentrylk = ob_fentrylk,
+ .fxattrop = ob_fxattrop,
+ .fsetattr = ob_fsetattr,
+ .fallocate = ob_fallocate,
+ .discard = ob_discard,
+ .zerofill = ob_zerofill,
+ .unlink = ob_unlink,
+ .rename = ob_rename,
+ .lk = ob_lk,
+};
+
+struct xlator_cbks cbks = {
+ .release = ob_release,
+};
+
+struct xlator_dumpops dumpops = {
+ .priv = ob_priv_dump,
+ .fdctx = ob_fdctx_dump,
+};
+
+
+struct volume_options options[] = {
+ { .key = {"use-anonymous-fd"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "yes",
+ .description = "For read operations, use anonymous FD when "
+ "original FD is open-behind and not yet opened in the backend.",
+ },
+ { .key = {"lazy-open"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "yes",
+ .description = "Perform open in the backend only when a necessary "
+ "FOP arrives (e.g writev on the FD, unlink of the file). When option "
+ "is disabled, perform backend open right after unwinding open().",
+ },
+ { .key = {NULL} }
+
+};
diff --git a/xlators/performance/quick-read/src/Makefile.am b/xlators/performance/quick-read/src/Makefile.am
index 644f27e3f..4906f408a 100644
--- a/xlators/performance/quick-read/src/Makefile.am
+++ b/xlators/performance/quick-read/src/Makefile.am
@@ -1,14 +1,15 @@
xlator_LTLIBRARIES = quick-read.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance
-quick_read_la_LDFLAGS = -module -avoidversion
+quick_read_la_LDFLAGS = -module -avoid-version
quick_read_la_SOURCES = quick-read.c
quick_read_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = quick-read.h
+noinst_HEADERS = quick-read.h quick-read-mem-types.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/performance/quick-read/src/quick-read-mem-types.h b/xlators/performance/quick-read/src/quick-read-mem-types.h
new file mode 100644
index 000000000..78547f641
--- /dev/null
+++ b/xlators/performance/quick-read/src/quick-read-mem-types.h
@@ -0,0 +1,27 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __QR_MEM_TYPES_H__
+#define __QR_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_qr_mem_types_ {
+ gf_qr_mt_qr_inode_t = gf_common_mt_end + 1,
+ gf_qr_mt_content_t,
+ gf_qr_mt_qr_fd_ctx_t,
+ gf_qr_mt_iovec,
+ gf_qr_mt_qr_conf_t,
+ gf_qr_mt_qr_priority_t,
+ gf_qr_mt_qr_private_t,
+ gf_qr_mt_qr_unlink_ctx_t,
+ gf_qr_mt_end
+};
+#endif
diff --git a/xlators/performance/quick-read/src/quick-read.c b/xlators/performance/quick-read/src/quick-read.c
index c7d282aff..445ea8658 100644
--- a/xlators/performance/quick-read/src/quick-read.c
+++ b/xlators/performance/quick-read/src/quick-read.c
@@ -1,2328 +1,1147 @@
/*
- Copyright (c) 2009-2010 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include "quick-read.h"
+#include "statedump.h"
-int32_t
-qr_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset);
+qr_inode_t *qr_inode_ctx_get (xlator_t *this, inode_t *inode);
+void __qr_inode_prune (qr_inode_table_t *table, qr_inode_t *qr_inode);
-static void
-qr_loc_wipe (loc_t *loc)
+int
+__qr_inode_ctx_set (xlator_t *this, inode_t *inode, qr_inode_t *qr_inode)
{
- if (loc == NULL) {
- goto out;
- }
+ uint64_t value = 0;
+ int ret = -1;
- if (loc->path) {
- FREE (loc->path);
- loc->path = NULL;
- }
-
- if (loc->inode) {
- inode_unref (loc->inode);
- loc->inode = NULL;
- }
+ value = (long) qr_inode;
- if (loc->parent) {
- inode_unref (loc->parent);
- loc->parent = NULL;
- }
+ ret = __inode_ctx_set (inode, this, &value);
-out:
- return;
+ return ret;
}
-static int32_t
-qr_loc_fill (loc_t *loc, inode_t *inode, char *path)
+qr_inode_t *
+__qr_inode_ctx_get (xlator_t *this, inode_t *inode)
{
- int32_t ret = -1;
- char *parent = NULL;
-
- if ((loc == NULL) || (inode == NULL) || (path == NULL)) {
- ret = -1;
- errno = EINVAL;
- goto out;
- }
+ qr_inode_t *qr_inode = NULL;
+ uint64_t value = 0;
+ int ret = -1;
- loc->inode = inode_ref (inode);
- loc->path = strdup (path);
- loc->ino = inode->ino;
+ ret = __inode_ctx_get (inode, this, &value);
+ if (ret)
+ return NULL;
- parent = strdup (path);
- if (parent == NULL) {
- ret = -1;
- goto out;
- }
+ qr_inode = (void *) ((long) value);
- parent = dirname (parent);
+ return qr_inode;
+}
- loc->parent = inode_from_path (inode->table, parent);
- if (loc->parent == NULL) {
- ret = -1;
- errno = EINVAL;
- goto out;
- }
- loc->name = strrchr (loc->path, '/');
- ret = 0;
-out:
- if (ret == -1) {
- qr_loc_wipe (loc);
+qr_inode_t *
+qr_inode_ctx_get (xlator_t *this, inode_t *inode)
+{
+ qr_inode_t *qr_inode = NULL;
- }
+ LOCK (&inode->lock);
+ {
+ qr_inode = __qr_inode_ctx_get (this, inode);
+ }
+ UNLOCK (&inode->lock);
- if (parent) {
- FREE (parent);
- }
-
- return ret;
+ return qr_inode;
}
-void
-qr_resume_pending_ops (qr_fd_ctx_t *qr_fd_ctx)
+qr_inode_t *
+qr_inode_new (xlator_t *this, inode_t *inode)
{
- struct list_head waiting_ops;
- call_stub_t *stub = NULL, *tmp = NULL;
-
- if (qr_fd_ctx == NULL) {
- goto out;
- }
+ qr_inode_t *qr_inode = NULL;
- INIT_LIST_HEAD (&waiting_ops);
+ qr_inode = GF_CALLOC (1, sizeof (*qr_inode), gf_qr_mt_qr_inode_t);
+ if (!qr_inode)
+ return NULL;
- LOCK (&qr_fd_ctx->lock);
- {
- list_splice_init (&qr_fd_ctx->waiting_ops,
- &waiting_ops);
- }
- UNLOCK (&qr_fd_ctx->lock);
+ INIT_LIST_HEAD (&qr_inode->lru);
- if (!list_empty (&waiting_ops)) {
- list_for_each_entry_safe (stub, tmp, &waiting_ops, list) {
- list_del_init (&stub->list);
- call_resume (stub);
- }
- }
+ qr_inode->priority = 0; /* initial priority */
-out:
- return;
+ return qr_inode;
}
-static void
-qr_fd_ctx_free (qr_fd_ctx_t *qr_fd_ctx)
+qr_inode_t *
+qr_inode_ctx_get_or_new (xlator_t *this, inode_t *inode)
{
- if (qr_fd_ctx == NULL) {
- goto out;
- }
-
- assert (list_empty (&qr_fd_ctx->waiting_ops));
-
- FREE (qr_fd_ctx->path);
- FREE (qr_fd_ctx);
+ qr_inode_t *qr_inode = NULL;
+ int ret = -1;
+ qr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ LOCK (&inode->lock);
+ {
+ qr_inode = __qr_inode_ctx_get (this, inode);
+ if (qr_inode)
+ goto unlock;
+
+ qr_inode = qr_inode_new (this, inode);
+ if (!qr_inode)
+ goto unlock;
+
+ ret = __qr_inode_ctx_set (this, inode, qr_inode);
+ if (ret) {
+ __qr_inode_prune (&priv->table, qr_inode);
+ GF_FREE (qr_inode);
+ }
+ }
+unlock:
+ UNLOCK (&inode->lock);
-out:
- return;
+ return qr_inode;
}
-
-int32_t
-qr_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct stat *buf, dict_t *dict)
+
+uint32_t
+qr_get_priority (qr_conf_t *conf, const char *path)
{
- data_t *content = NULL;
- qr_file_t *qr_file = NULL;
- uint64_t value = 0;
- int ret = -1;
- qr_conf_t *conf = NULL;
+ uint32_t priority = 0;
+ struct qr_priority *curr = NULL;
- if ((op_ret == -1) || (dict == NULL)) {
- goto out;
+ list_for_each_entry (curr, &conf->priority_list, list) {
+ if (fnmatch (curr->pattern, path, FNM_NOESCAPE) == 0)
+ priority = curr->priority;
}
- conf = this->private;
+ return priority;
+}
- if (buf->st_size > conf->max_file_size) {
- goto out;
- }
- if (S_ISDIR (buf->st_mode)) {
- goto out;
- }
+void
+__qr_inode_register (qr_inode_table_t *table, qr_inode_t *qr_inode)
+{
+ if (!qr_inode->data)
+ return;
- if (inode == NULL) {
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
+ if (list_empty (&qr_inode->lru))
+ /* first time addition of this qr_inode into table */
+ table->cache_used += qr_inode->size;
+ else
+ list_del_init (&qr_inode->lru);
- content = dict_get (dict, GLUSTERFS_CONTENT_KEY);
-
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_get (inode, this, &value);
- if (ret == -1) {
- qr_file = CALLOC (1, sizeof (*qr_file));
- if (qr_file == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- goto unlock;
- }
-
- LOCK_INIT (&qr_file->lock);
- ret = __inode_ctx_put (inode, this,
- (uint64_t)(long)qr_file);
- if (ret == -1) {
- FREE (qr_file);
- qr_file = NULL;
- op_ret = -1;
- op_errno = EINVAL;
- }
- } else {
- qr_file = (qr_file_t *)(long)value;
- if (qr_file == NULL) {
- op_ret = -1;
- op_errno = EINVAL;
- }
- }
- }
-unlock:
- UNLOCK (&inode->lock);
-
- if (qr_file != NULL) {
- LOCK (&qr_file->lock);
- {
- if (qr_file->xattr
- && (qr_file->stbuf.st_mtime != buf->st_mtime)) {
- dict_unref (qr_file->xattr);
- qr_file->xattr = NULL;
- }
+ list_add_tail (&qr_inode->lru, &table->lru[qr_inode->priority]);
+}
- if (content) {
- if (qr_file->xattr) {
- dict_unref (qr_file->xattr);
- qr_file->xattr = NULL;
- }
- qr_file->xattr = dict_ref (dict);
- qr_file->stbuf = *buf;
- }
+void
+qr_inode_set_priority (xlator_t *this, inode_t *inode, const char *path)
+{
+ uint32_t priority = 0;
+ qr_inode_table_t *table = NULL;
+ qr_inode_t *qr_inode = NULL;
+ qr_private_t *priv = NULL;
+ qr_conf_t *conf = NULL;
+
+ qr_inode = qr_inode_ctx_get (this, inode);
+ if (!qr_inode)
+ return;
+
+ priv = this->private;
+ table = &priv->table;
+ conf = &priv->conf;
+
+ if (path)
+ priority = qr_get_priority (conf, path);
+ else
+ /* retain existing priority, just bump LRU */
+ priority = qr_inode->priority;
+
+ LOCK (&table->lock);
+ {
+ qr_inode->priority = priority;
+
+ __qr_inode_register (table, qr_inode);
+ }
+ UNLOCK (&table->lock);
+}
- gettimeofday (&qr_file->tv, NULL);
- }
- UNLOCK (&qr_file->lock);
- }
-out:
- /*
- * FIXME: content size in dict can be greater than the size application
- * requested for. Applications need to be careful till this is fixed.
- */
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf, dict);
- return 0;
+/* To be called with priv->table.lock held */
+void
+__qr_inode_prune (qr_inode_table_t *table, qr_inode_t *qr_inode)
+{
+ GF_FREE (qr_inode->data);
+ qr_inode->data = NULL;
+
+ if (!list_empty (&qr_inode->lru)) {
+ table->cache_used -= qr_inode->size;
+ qr_inode->size = 0;
+
+ list_del_init (&qr_inode->lru);
+ }
+
+ memset (&qr_inode->buf, 0, sizeof (qr_inode->buf));
}
-int32_t
-qr_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
+void
+qr_inode_prune (xlator_t *this, inode_t *inode)
{
- qr_conf_t *conf = NULL;
- dict_t *new_req_dict = NULL;
- int32_t op_ret = -1, op_errno = -1;
- data_t *content = NULL;
- uint64_t requested_size = 0, size = 0, value = 0;
- char cached = 0;
- qr_file_t *qr_file = NULL;
-
- conf = this->private;
- if (conf == NULL) {
- op_ret = -1;
- op_errno = EINVAL;
- goto unwind;
- }
+ qr_private_t *priv = NULL;
+ qr_inode_table_t *table = NULL;
+ qr_inode_t *qr_inode = NULL;
- op_ret = inode_ctx_get (loc->inode, this, &value);
- if (op_ret == 0) {
- qr_file = (qr_file_t *)(long)value;
- }
-
- if (qr_file != NULL) {
- LOCK (&qr_file->lock);
- {
- if (qr_file->xattr) {
- cached = 1;
- }
- }
- UNLOCK (&qr_file->lock);
- }
+ qr_inode = qr_inode_ctx_get (this, inode);
+ if (!qr_inode)
+ return;
- if ((xattr_req == NULL) && (conf->max_file_size > 0)) {
- new_req_dict = xattr_req = dict_new ();
- if (xattr_req == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto unwind;
- }
- }
+ priv = this->private;
+ table = &priv->table;
- if (!cached) {
- if (xattr_req) {
- content = dict_get (xattr_req, GLUSTERFS_CONTENT_KEY);
- if (content) {
- requested_size = data_to_uint64 (content);
- }
- }
+ LOCK (&table->lock);
+ {
+ __qr_inode_prune (table, qr_inode);
+ }
+ UNLOCK (&table->lock);
+}
- if ((conf->max_file_size > 0)
- && (conf->max_file_size != requested_size)) {
- size = (conf->max_file_size > requested_size) ?
- conf->max_file_size : requested_size;
-
- op_ret = dict_set (xattr_req, GLUSTERFS_CONTENT_KEY,
- data_from_uint64 (size));
- if (op_ret < 0) {
- op_ret = -1;
- op_errno = ENOMEM;
- goto unwind;
- }
- }
- }
- STACK_WIND (frame, qr_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
+/* To be called with priv->table.lock held */
+void
+__qr_cache_prune (qr_inode_table_t *table, qr_conf_t *conf)
+{
+ qr_inode_t *curr = NULL;
+ qr_inode_t *next = NULL;
+ int index = 0;
+ size_t size_pruned = 0;
- if (new_req_dict) {
- dict_unref (new_req_dict);
- }
+ for (index = 0; index < conf->max_pri; index++) {
+ list_for_each_entry_safe (curr, next, &table->lru[index], lru) {
- return 0;
+ size_pruned += curr->size;
-unwind:
- STACK_UNWIND (frame, op_ret, op_errno, NULL, NULL, NULL);
+ __qr_inode_prune (table, curr);
- if (new_req_dict) {
- dict_unref (new_req_dict);
+ if (table->cache_used < conf->cache_size)
+ return;
+ }
}
- return 0;
+ return;
}
-int32_t
-qr_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, fd_t *fd)
+void
+qr_cache_prune (xlator_t *this)
{
- uint64_t value = 0;
- int32_t ret = -1;
- struct list_head waiting_ops;
- qr_local_t *local = NULL;
- qr_file_t *qr_file = NULL;
- qr_fd_ctx_t *qr_fd_ctx = NULL;
- call_stub_t *stub = NULL, *tmp = NULL;
- char is_open = 0;
-
- local = frame->local;
- if (local == NULL) {
- op_ret = -1;
- op_errno = EINVAL;
- } else {
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- is_open = local->is_open;
- }
+ qr_private_t *priv = NULL;
+ qr_conf_t *conf = NULL;
+ qr_inode_table_t *table = NULL;
- INIT_LIST_HEAD (&waiting_ops);
+ priv = this->private;
+ table = &priv->table;
+ conf = &priv->conf;
- ret = fd_ctx_get (fd, this, &value);
- if ((ret == -1) && (op_ret != -1)) {
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
+ LOCK (&table->lock);
+ {
+ if (table->cache_used > conf->cache_size)
+ __qr_cache_prune (table, conf);
+ }
+ UNLOCK (&table->lock);
+}
- if (value) {
- qr_fd_ctx = (qr_fd_ctx_t *) (long)value;
- }
- if (qr_fd_ctx) {
- LOCK (&qr_fd_ctx->lock);
- {
- qr_fd_ctx->open_in_transit = 0;
+void *
+qr_content_extract (dict_t *xdata)
+{
+ data_t *data = NULL;
+ void *content = NULL;
- if (op_ret == 0) {
- qr_fd_ctx->opened = 1;
- }
- list_splice_init (&qr_fd_ctx->waiting_ops,
- &waiting_ops);
- }
- UNLOCK (&qr_fd_ctx->lock);
-
- if (local && local->is_open
- && ((local->open_flags & O_TRUNC) == O_TRUNC)) {
- ret = inode_ctx_get (fd->inode, this, &value);
- if (ret == 0) {
- qr_file = (qr_file_t *)(long) value;
-
- if (qr_file) {
- LOCK (&qr_file->lock);
- {
- dict_unref (qr_file->xattr);
- qr_file->xattr = NULL;
- }
- UNLOCK (&qr_file->lock);
- }
- }
- }
+ data = dict_get (xdata, GF_CONTENT_KEY);
+ if (!data)
+ return NULL;
- if (!list_empty (&waiting_ops)) {
- list_for_each_entry_safe (stub, tmp, &waiting_ops,
- list) {
- list_del_init (&stub->list);
- call_resume (stub);
- }
- }
- }
-out:
- if (is_open) {
- STACK_UNWIND (frame, op_ret, op_errno, fd);
- }
+ content = GF_CALLOC (1, data->len, gf_qr_mt_content_t);
+ if (!content)
+ return NULL;
- return 0;
+ memcpy (content, data->data, data->len);
+
+ return content;
}
-int32_t
-qr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd)
+void
+qr_content_update (xlator_t *this, qr_inode_t *qr_inode, void *data,
+ struct iatt *buf)
{
- qr_file_t *qr_file = NULL;
- int32_t ret = -1;
- uint64_t filep = 0;
- char content_cached = 0;
- qr_fd_ctx_t *qr_fd_ctx = NULL, *tmp_fd_ctx = NULL;
- int32_t op_ret = -1, op_errno = -1;
- qr_local_t *local = NULL;
- qr_conf_t *conf = NULL;
-
- conf = this->private;
-
- tmp_fd_ctx = qr_fd_ctx = CALLOC (1, sizeof (*qr_fd_ctx));
- if (qr_fd_ctx == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto unwind;
- }
+ qr_private_t *priv = NULL;
+ qr_inode_table_t *table = NULL;
- LOCK_INIT (&qr_fd_ctx->lock);
- INIT_LIST_HEAD (&qr_fd_ctx->waiting_ops);
+ priv = this->private;
+ table = &priv->table;
- qr_fd_ctx->path = strdup (loc->path);
- qr_fd_ctx->flags = flags;
+ LOCK (&table->lock);
+ {
+ __qr_inode_prune (table, qr_inode);
- ret = fd_ctx_set (fd, this, (uint64_t)(long)qr_fd_ctx);
- if (ret == -1) {
- op_ret = -1;
- op_errno = EINVAL;
- goto unwind;
- }
- tmp_fd_ctx = NULL;
-
- local = CALLOC (1, sizeof (*local));
- if (local == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto unwind;
- }
+ qr_inode->data = data;
+ qr_inode->size = buf->ia_size;
- local->is_open = 1;
- local->open_flags = flags;
- frame->local = local;
- local = NULL;
-
- ret = inode_ctx_get (fd->inode, this, &filep);
- if (ret == 0) {
- qr_file = (qr_file_t *)(long) filep;
- if (qr_file) {
- LOCK (&qr_file->lock);
- {
- if (qr_file->xattr) {
- content_cached = 1;
- }
- }
- UNLOCK (&qr_file->lock);
- }
- }
+ qr_inode->ia_mtime = buf->ia_mtime;
+ qr_inode->ia_mtime_nsec = buf->ia_mtime_nsec;
- if (content_cached && ((flags & O_DIRECTORY) == O_DIRECTORY)) {
- op_ret = -1;
- op_errno = ENOTDIR;
- goto unwind;
- }
+ qr_inode->buf = *buf;
- if (!content_cached || ((flags & O_WRONLY) == O_WRONLY)
- || ((flags & O_TRUNC) == O_TRUNC)) {
- LOCK (&qr_fd_ctx->lock);
- {
- /*
- * we really need not set this flag, since open is
- * not yet unwounded.
- */
-
- qr_fd_ctx->open_in_transit = 1;
- }
- UNLOCK (&qr_fd_ctx->lock);
- goto wind;
- } else {
- op_ret = 0;
- op_errno = 0;
- goto unwind;
- }
+ gettimeofday (&qr_inode->last_refresh, NULL);
-unwind:
- if (tmp_fd_ctx != NULL) {
- qr_fd_ctx_free (tmp_fd_ctx);
- }
-
- if (local != NULL) {
- FREE (local);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, fd);
- return 0;
+ __qr_inode_register (table, qr_inode);
+ }
+ UNLOCK (&table->lock);
-wind:
- STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, loc, flags, fd);
- return 0;
+ qr_cache_prune (this);
}
-static inline char
-qr_time_elapsed (struct timeval *now, struct timeval *then)
+gf_boolean_t
+qr_size_fits (qr_conf_t *conf, struct iatt *buf)
{
- return now->tv_sec - then->tv_sec;
+ return (buf->ia_size <= conf->max_file_size);
}
-static inline char
-qr_need_validation (qr_conf_t *conf, qr_file_t *file)
+gf_boolean_t
+qr_mtime_equal (qr_inode_t *qr_inode, struct iatt *buf)
{
- struct timeval now = {0, };
- char need_validation = 0;
-
- gettimeofday (&now, NULL);
-
- if (qr_time_elapsed (&now, &file->tv) >= conf->cache_timeout)
- need_validation = 1;
-
- return need_validation;
+ return (qr_inode->ia_mtime == buf->ia_mtime &&
+ qr_inode->ia_mtime_nsec == buf->ia_mtime_nsec);
}
-static int32_t
-qr_validate_cache_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+void
+__qr_content_refresh (xlator_t *this, qr_inode_t *qr_inode, struct iatt *buf)
{
- qr_file_t *qr_file = NULL;
- qr_local_t *local = NULL;
- uint64_t value = 0;
- int32_t ret = 0;
-
- local = frame->local;
- if ((local == NULL) || ((local->fd) == NULL)) {
- op_ret = -1;
- op_errno = EINVAL;
- goto unwind;
- }
-
- if (op_ret == -1) {
- goto unwind;
- }
+ qr_private_t *priv = NULL;
+ qr_inode_table_t *table = NULL;
+ qr_conf_t *conf = NULL;
- ret = inode_ctx_get (local->fd->inode, this, &value);
- if (ret == -1) {
- op_ret = -1;
- op_errno = EINVAL;
- goto unwind;
- }
+ priv = this->private;
+ table = &priv->table;
+ conf = &priv->conf;
- qr_file = (qr_file_t *)(long) value;
- if (qr_file == NULL) {
- op_ret = -1;
- op_errno = EINVAL;
- goto unwind;
- }
+ if (qr_size_fits (conf, buf) && qr_mtime_equal (qr_inode, buf)) {
+ qr_inode->buf = *buf;
- LOCK (&qr_file->lock);
- {
- if (qr_file->stbuf.st_mtime != buf->st_mtime) {
- dict_unref (qr_file->xattr);
- qr_file->xattr = NULL;
- }
+ gettimeofday (&qr_inode->last_refresh, NULL);
- gettimeofday (&qr_file->tv, NULL);
- }
- UNLOCK (&qr_file->lock);
+ __qr_inode_register (table, qr_inode);
+ } else {
+ __qr_inode_prune (table, qr_inode);
+ }
- frame->local = NULL;
+ return;
+}
- call_resume (local->stub);
-
- FREE (local);
- return 0;
-unwind:
- if (local && local->stub) {
- call_stub_destroy (local->stub);
- local->stub = NULL;
- }
+void
+qr_content_refresh (xlator_t *this, qr_inode_t *qr_inode, struct iatt *buf)
+{
+ qr_private_t *priv = NULL;
+ qr_inode_table_t *table = NULL;
- /* this is actually unwind of readv */
- STACK_UNWIND (frame, op_ret, op_errno, NULL, -1, NULL, NULL);
- return 0;
+ priv = this->private;
+ table = &priv->table;
+
+ LOCK (&table->lock);
+ {
+ __qr_content_refresh (this, qr_inode, buf);
+ }
+ UNLOCK (&table->lock);
}
-int32_t
-qr_validate_cache_helper (call_frame_t *frame, xlator_t *this, fd_t *fd)
+gf_boolean_t
+__qr_cache_is_fresh (xlator_t *this, qr_inode_t *qr_inode)
{
- qr_local_t *local = NULL;
- int32_t op_ret = -1, op_errno = -1;
-
- local = frame->local;
- if (local == NULL) {
- op_ret = -1;
- op_errno = EINVAL;
- } else {
- op_ret = local->op_ret;
- op_errno = local->op_errno;
- }
+ qr_conf_t *conf = NULL;
+ qr_private_t *priv = NULL;
+ struct timeval now;
+ struct timeval diff;
- if (op_ret == -1) {
- qr_validate_cache_cbk (frame, NULL, this, op_ret, op_errno,
- NULL);
- } else {
- STACK_WIND (frame, qr_validate_cache_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fstat, fd);
- }
+ priv = this->private;
+ conf = &priv->conf;
- return 0;
+ gettimeofday (&now, NULL);
+
+ timersub (&now, &qr_inode->last_refresh, &diff);
+
+ if (diff.tv_sec >= conf->cache_timeout)
+ return _gf_false;
+
+ return _gf_true;
}
int
-qr_validate_cache (call_frame_t *frame, xlator_t *this, fd_t *fd,
- call_stub_t *stub)
+qr_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode_ret,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
{
- int ret = -1;
- int flags = 0;
- uint64_t value = 0;
- loc_t loc = {0, };
- char *path = NULL;
- qr_local_t *local = NULL;
- qr_fd_ctx_t *qr_fd_ctx = NULL;
- call_stub_t *validate_stub = NULL;
- char need_open = 0, can_wind = 0;
-
- local = CALLOC (1, sizeof (*local));
- if (local == NULL) {
- goto out;
- }
+ void *content = NULL;
+ qr_inode_t *qr_inode = NULL;
+ inode_t *inode = NULL;
- local->fd = fd;
- local->stub = stub;
- frame->local = local;
+ inode = frame->local;
+ frame->local = NULL;
- ret = fd_ctx_get (fd, this, &value);
- if (ret == 0) {
- qr_fd_ctx = (qr_fd_ctx_t *)(long) value;
- }
+ if (op_ret == -1) {
+ qr_inode_prune (this, inode);
+ goto out;
+ }
- if (qr_fd_ctx) {
- LOCK (&qr_fd_ctx->lock);
- {
- path = qr_fd_ctx->path;
- flags = qr_fd_ctx->flags;
-
- if (!(qr_fd_ctx->opened
- || qr_fd_ctx->open_in_transit)) {
- need_open = 1;
- qr_fd_ctx->open_in_transit = 1;
- }
-
- if (qr_fd_ctx->opened) {
- can_wind = 1;
- } else {
- validate_stub = fop_fstat_stub (frame,
- qr_validate_cache_helper,
- fd);
- if (validate_stub == NULL) {
- ret = -1;
- qr_fd_ctx->open_in_transit = 0;
- goto unlock;
- }
-
- list_add_tail (&validate_stub->list,
- &qr_fd_ctx->waiting_ops);
- }
- }
- unlock:
- UNLOCK (&qr_fd_ctx->lock);
+ if (dict_get (xdata, "sh-failed")) {
+ qr_inode_prune (this, inode);
+ goto out;
+ }
- if (ret == -1) {
- goto out;
- }
- } else {
- can_wind = 1;
- }
+ content = qr_content_extract (xdata);
- if (need_open) {
- ret = qr_loc_fill (&loc, fd->inode, path);
- if (ret == -1) {
- qr_resume_pending_ops (qr_fd_ctx);
- goto out;
- }
+ if (content) {
+ /* new content came along, always replace old content */
+ qr_inode = qr_inode_ctx_get_or_new (this, inode);
+ if (!qr_inode)
+ /* no harm done */
+ goto out;
- STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open,
- &loc, flags, fd);
-
- qr_loc_wipe (&loc);
- } else if (can_wind) {
- STACK_WIND (frame, qr_validate_cache_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fstat, fd);
- }
+ qr_content_update (this, qr_inode, content, buf);
+ } else {
+ /* purge old content if necessary */
+ qr_inode = qr_inode_ctx_get (this, inode);
+ if (!qr_inode)
+ /* usual path for large files */
+ goto out;
- ret = 0;
+ qr_content_refresh (this, qr_inode, buf);
+ }
out:
- return ret;
+ if (inode)
+ inode_unref (inode);
+
+ STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode_ret,
+ buf, xdata, postparent);
+ return 0;
}
-int32_t
-qr_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iovec *vector, int32_t count,
- struct stat *stbuf, struct iobref *iobref)
+int
+qr_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf, iobref);
- return 0;
-}
+ qr_private_t *priv = NULL;
+ qr_conf_t *conf = NULL;
+ qr_inode_t *qr_inode = NULL;
+ int ret = -1;
+ dict_t *new_xdata = NULL;
+
+ priv = this->private;
+ conf = &priv->conf;
+
+ qr_inode = qr_inode_ctx_get (this, loc->inode);
+ if (qr_inode && qr_inode->data)
+ /* cached. only validate in qr_lookup_cbk */
+ goto wind;
+
+ if (!xdata)
+ xdata = new_xdata = dict_new ();
+
+ if (!xdata)
+ goto wind;
+
+ ret = 0;
+ if (conf->max_file_size)
+ ret = dict_set (xdata, GF_CONTENT_KEY,
+ data_from_uint64 (conf->max_file_size));
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "cannot set key in request dict (%s)",
+ loc->path);
+wind:
+ frame->local = inode_ref (loc->inode);
+ STACK_WIND (frame, qr_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
+
+ if (new_xdata)
+ dict_unref (new_xdata);
-int32_t
-qr_readv_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
-{
- STACK_WIND (frame, qr_readv_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->readv, fd, size, offset);
return 0;
}
-int32_t
-qr_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
+int
+qr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *entries, dict_t *xdata)
{
- qr_file_t *file = NULL;
- int32_t ret = -1, op_ret = -1, op_errno = -1;
- uint64_t value = 0;
- int count = -1, flags = 0, i = 0;
- char content_cached = 0, need_validation = 0;
- char need_open = 0, can_wind = 0, need_unwind = 0;
- struct iobuf *iobuf = NULL;
- struct iobref *iobref = NULL;
- struct stat stbuf = {0, };
- data_t *content = NULL;
- qr_fd_ctx_t *qr_fd_ctx = NULL;
- call_stub_t *stub = NULL;
- loc_t loc = {0, };
- qr_conf_t *conf = NULL;
- struct iovec *vector = NULL;
- char *path = NULL;
- glusterfs_ctx_t *ctx = NULL;
- off_t start = 0, end = 0;
- size_t len = 0;
- struct iobuf_pool *iobuf_pool = NULL;
-
- op_ret = 0;
- conf = this->private;
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == 0) {
- qr_fd_ctx = (qr_fd_ctx_t *)(long) value;
- }
-
- iobuf_pool = this->ctx->iobuf_pool;
-
- ret = inode_ctx_get (fd->inode, this, &value);
- if (ret == 0) {
- file = (qr_file_t *)(long)value;
- if (file) {
- LOCK (&file->lock);
- {
- if (file->xattr){
- if (qr_need_validation (conf,file)) {
- need_validation = 1;
- goto unlock;
- }
-
- content = dict_get (file->xattr,
- GLUSTERFS_CONTENT_KEY);
-
-
- stbuf = file->stbuf;
- content_cached = 1;
-
- if (offset > content->len) {
- op_ret = 0;
- end = content->len;
- } else {
- if ((offset + size)
- > content->len) {
- op_ret = content->len - offset;
- end = content->len;
- } else {
- op_ret = size;
- end = offset + size;
- }
- }
-
- ctx = this->ctx;
- count = (op_ret / iobuf_pool->page_size);
- if ((op_ret % iobuf_pool->page_size)
- != 0) {
- count++;
- }
-
- if (count == 0) {
- op_ret = 0;
- goto unlock;
- }
-
- vector = CALLOC (count,
- sizeof (*vector));
- if (vector == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- need_unwind = 1;
- goto unlock;
- }
-
- iobref = iobref_new ();
- if (iobref == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- need_unwind = 1;
- goto unlock;
- }
-
- for (i = 0; i < count; i++) {
- iobuf = iobuf_get (iobuf_pool);
- if (iobuf == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- need_unwind = 1;
- goto unlock;
- }
-
- start = offset + (iobuf_pool->page_size * i);
- if (start > end) {
- len = 0;
- } else {
- len = (iobuf_pool->page_size
- > (end - start))
- ? (end - start)
- : iobuf_pool->page_size;
-
- memcpy (iobuf->ptr,
- content->data + start,
- len);
- }
-
- iobref_add (iobref, iobuf);
- iobuf_unref (iobuf);
-
- vector[i].iov_base = iobuf->ptr;
- vector[i].iov_len = len;
- }
- }
- }
- unlock:
- UNLOCK (&file->lock);
- }
- }
-
-out:
- if (content_cached || need_unwind) {
- STACK_UNWIND (frame, op_ret, op_errno, vector, count, &stbuf,
- iobref);
-
- } else if (need_validation) {
- stub = fop_readv_stub (frame, qr_readv, fd, size, offset);
- if (stub == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
-
- op_ret = qr_validate_cache (frame, this, fd, stub);
- if (op_ret == -1) {
- need_unwind = 1;
- op_errno = errno;
- call_stub_destroy (stub);
- goto out;
- }
- } else {
- if (qr_fd_ctx) {
- LOCK (&qr_fd_ctx->lock);
- {
- path = qr_fd_ctx->path;
- flags = qr_fd_ctx->flags;
-
- if (!(qr_fd_ctx->opened
- || qr_fd_ctx->open_in_transit)) {
- need_open = 1;
- qr_fd_ctx->open_in_transit = 1;
- }
-
- if (qr_fd_ctx->opened) {
- can_wind = 1;
- } else {
- stub = fop_readv_stub (frame,
- qr_readv_helper,
- fd, size,
- offset);
- if (stub == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- need_unwind = 1;
- qr_fd_ctx->open_in_transit = 0;
- goto fdctx_unlock;
- }
-
- list_add_tail (&stub->list,
- &qr_fd_ctx->waiting_ops);
- }
- }
- fdctx_unlock:
- UNLOCK (&qr_fd_ctx->lock);
-
- if (op_ret == -1) {
- need_unwind = 1;
- goto out;
- }
- } else {
- can_wind = 1;
- }
+ gf_dirent_t *entry = NULL;
+ qr_inode_t *qr_inode = NULL;
- if (need_open) {
- op_ret = qr_loc_fill (&loc, fd->inode, path);
- if (op_ret == -1) {
- qr_resume_pending_ops (qr_fd_ctx);
- goto out;
- }
-
- STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open,
- &loc, flags, fd);
-
- qr_loc_wipe (&loc);
- } else if (can_wind) {
- STACK_WIND (frame, qr_readv_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->readv, fd, size,
- offset);
- }
+ if (op_ret <= 0)
+ goto unwind;
- }
+ list_for_each_entry (entry, &entries->list, list) {
+ if (!entry->inode)
+ continue;
- if (vector) {
- FREE (vector);
- }
+ qr_inode = qr_inode_ctx_get (this, entry->inode);
+ if (!qr_inode)
+ /* no harm */
+ continue;
- if (iobref) {
- iobref_unref (iobref);
+ qr_content_refresh (this, qr_inode, &entry->d_stat);
}
- return 0;
+unwind:
+ STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata);
+ return 0;
}
-int32_t
-qr_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct stat *stbuf)
+int
+qr_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t offset, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, stbuf);
+ STACK_WIND (frame, qr_readdirp_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->readdirp,
+ fd, size, offset, xdata);
return 0;
}
-int32_t
-qr_writev_helper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int32_t count, off_t off,
- struct iobref *iobref)
+int
+qr_readv_cached (call_frame_t *frame, qr_inode_t *qr_inode, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
{
- STACK_WIND (frame, qr_writev_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->writev, fd, vector, count, off,
- iobref);
- return 0;
-}
+ xlator_t *this = NULL;
+ qr_private_t *priv = NULL;
+ qr_inode_table_t *table = NULL;
+ int op_ret = -1;
+ struct iobuf *iobuf = NULL;
+ struct iobref *iobref = NULL;
+ struct iovec iov = {0, };
+ struct iatt buf = {0, };
+ this = frame->this;
+ priv = this->private;
+ table = &priv->table;
-int32_t
-qr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
- int32_t count, off_t off, struct iobref *iobref)
-{
- uint64_t value = 0;
- int flags = 0;
- call_stub_t *stub = NULL;
- char *path = NULL;
- loc_t loc = {0, };
- qr_file_t *qr_file = NULL;
- qr_fd_ctx_t *qr_fd_ctx = NULL;
- int32_t op_ret = -1, op_errno = -1, ret = -1;
- char can_wind = 0, need_unwind = 0, need_open = 0;
-
- ret = fd_ctx_get (fd, this, &value);
-
- if (ret == 0) {
- qr_fd_ctx = (qr_fd_ctx_t *)(long) value;
- }
+ LOCK (&table->lock);
+ {
+ op_ret = -1;
- ret = inode_ctx_get (fd->inode, this, &value);
- if (ret == 0) {
- qr_file = (qr_file_t *)(long)value;
- }
+ if (!qr_inode->data)
+ goto unlock;
- if (qr_file) {
- LOCK (&qr_file->lock);
- {
- if (qr_file->xattr) {
- dict_unref (qr_file->xattr);
- qr_file->xattr = NULL;
- }
- }
- UNLOCK (&qr_file->lock);
- }
-
- if (qr_fd_ctx) {
- LOCK (&qr_fd_ctx->lock);
- {
- path = qr_fd_ctx->path;
- flags = qr_fd_ctx->flags;
-
- if (!(qr_fd_ctx->opened
- || qr_fd_ctx->open_in_transit)) {
- need_open = 1;
- qr_fd_ctx->open_in_transit = 1;
- }
+ if (offset >= qr_inode->size)
+ goto unlock;
- if (qr_fd_ctx->opened) {
- can_wind = 1;
- } else {
- stub = fop_writev_stub (frame, qr_writev_helper,
- fd, vector, count, off,
- iobref);
- if (stub == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- need_unwind = 1;
- qr_fd_ctx->open_in_transit = 0;
- goto unlock;
- }
-
- list_add_tail (&stub->list,
- &qr_fd_ctx->waiting_ops);
- }
- }
- unlock:
- UNLOCK (&qr_fd_ctx->lock);
- } else {
- can_wind = 1;
- }
+ if (!__qr_cache_is_fresh (this, qr_inode))
+ goto unlock;
-out:
- if (need_unwind) {
- STACK_UNWIND (frame, op_ret, op_errno, NULL);
- } else if (can_wind) {
- STACK_WIND (frame, qr_writev_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->writev, fd, vector, count,
- off, iobref);
- } else if (need_open) {
- op_ret = qr_loc_fill (&loc, fd->inode, path);
- if (op_ret == -1) {
- qr_resume_pending_ops (qr_fd_ctx);
- goto out;
- }
-
- STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, &loc, flags, fd);
+ op_ret = min (size, (qr_inode->size - offset));
- qr_loc_wipe (&loc);
- }
+ iobuf = iobuf_get2 (this->ctx->iobuf_pool, op_ret);
+ if (!iobuf) {
+ op_ret = -1;
+ goto unlock;
+ }
- return 0;
-}
+ iobref = iobref_new ();
+ if (!iobref) {
+ op_ret = -1;
+ iobuf_unref (iobuf);
+ goto unlock;
+ }
+ iobref_add (iobref, iobuf);
-int32_t
-qr_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct stat *buf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
+ memcpy (iobuf->ptr, qr_inode->data + offset, op_ret);
+ buf = qr_inode->buf;
-int32_t
-qr_fstat_helper (call_frame_t *frame, xlator_t *this, fd_t *fd)
-{
- STACK_WIND (frame, qr_fstat_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fstat, fd);
- return 0;
-}
+ /* bump LRU */
+ __qr_inode_register (table, qr_inode);
+ }
+unlock:
+ UNLOCK (&table->lock);
+ if (op_ret > 0) {
+ iov.iov_base = iobuf->ptr;
+ iov.iov_len = op_ret;
-int32_t
-qr_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd)
-{
- qr_fd_ctx_t *qr_fd_ctx = NULL;
- char need_open = 0, can_wind = 0, need_unwind = 0;
- uint64_t value = 0;
- int32_t ret = -1, op_ret = -1, op_errno = -1;
- call_stub_t *stub = NULL;
- loc_t loc = {0, };
- char *path = NULL;
- int flags = 0;
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == 0) {
- qr_fd_ctx = (qr_fd_ctx_t *)(long) value;
- }
+ STACK_UNWIND_STRICT (readv, frame, op_ret, 0, &iov, 1,
+ &buf, iobref, xdata);
+ }
- if (qr_fd_ctx) {
- LOCK (&qr_fd_ctx->lock);
- {
- path = qr_fd_ctx->path;
- flags = qr_fd_ctx->flags;
+ if (iobuf)
+ iobuf_unref (iobuf);
- if (!(qr_fd_ctx->opened
- || qr_fd_ctx->open_in_transit)) {
- need_open = 1;
- qr_fd_ctx->open_in_transit = 1;
- }
+ if (iobref)
+ iobref_unref (iobref);
- if (qr_fd_ctx->opened) {
- can_wind = 1;
- } else {
- stub = fop_fstat_stub (frame, qr_fstat_helper,
- fd);
- if (stub == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- need_unwind = 1;
- qr_fd_ctx->open_in_transit = 0;
- goto unlock;
- }
-
- list_add_tail (&stub->list,
- &qr_fd_ctx->waiting_ops);
- }
- }
- unlock:
- UNLOCK (&qr_fd_ctx->lock);
- } else {
- can_wind = 1;
- }
+ return op_ret;
+}
-out:
- if (need_unwind) {
- STACK_UNWIND (frame, op_ret, op_errno, NULL);
- } else if (can_wind) {
- STACK_WIND (frame, qr_fstat_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fstat, fd);
- } else if (need_open) {
- op_ret = qr_loc_fill (&loc, fd->inode, path);
- if (op_ret == -1) {
- qr_resume_pending_ops (qr_fd_ctx);
- goto out;
- }
- STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, &loc, flags, fd);
+int
+qr_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ qr_inode_t *qr_inode = NULL;
- qr_loc_wipe (&loc);
- }
-
- return 0;
-}
+ qr_inode = qr_inode_ctx_get (this, fd->inode);
+ if (!qr_inode)
+ goto wind;
+ if (qr_readv_cached (frame, qr_inode, size, offset, flags, xdata) <= 0)
+ goto wind;
-static int32_t
-qr_fchown_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ return 0;
+wind:
+ STACK_WIND (frame, default_readv_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->readv,
+ fd, size, offset, flags, xdata);
return 0;
}
-int32_t
-qr_fchown_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, uid_t uid,
- gid_t gid)
+int
+qr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *iov,
+ int count, off_t offset, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
{
- STACK_WIND (frame, qr_fchown_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fchown, fd, uid, gid);
- return 0;
+ qr_inode_prune (this, fd->inode);
+
+ STACK_WIND (frame, default_writev_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->writev,
+ fd, iov, count, offset, flags, iobref, xdata);
+ return 0;
}
-int32_t
-qr_fchown (call_frame_t *frame, xlator_t *this, fd_t *fd, uid_t uid, gid_t gid)
+int
+qr_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
{
- uint64_t value = 0;
- int flags = 0;
- call_stub_t *stub = NULL;
- char *path = NULL;
- loc_t loc = {0, };
- qr_fd_ctx_t *qr_fd_ctx = NULL;
- int32_t ret = -1, op_ret = -1, op_errno = -1;
- char need_open = 0, can_wind = 0, need_unwind = 0;
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == 0) {
- qr_fd_ctx = (qr_fd_ctx_t *)(long) value;
- }
-
- if (qr_fd_ctx) {
- LOCK (&qr_fd_ctx->lock);
- {
- path = qr_fd_ctx->path;
- flags = qr_fd_ctx->flags;
-
- if (!(qr_fd_ctx->opened
- || qr_fd_ctx->open_in_transit)) {
- need_open = 1;
- qr_fd_ctx->open_in_transit = 1;
- }
-
- if (qr_fd_ctx->opened) {
- can_wind = 1;
- } else {
- stub = fop_fchown_stub (frame, qr_fchown_helper,
- fd, uid, gid);
- if (stub == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- need_unwind = 1;
- qr_fd_ctx->open_in_transit = 0;
- goto unlock;
- }
-
- list_add_tail (&stub->list,
- &qr_fd_ctx->waiting_ops);
- }
- }
- unlock:
- UNLOCK (&qr_fd_ctx->lock);
- } else {
- can_wind = 1;
- }
-
-out:
- if (need_unwind) {
- STACK_UNWIND (frame, op_ret, op_errno, NULL);
- } else if (can_wind) {
- STACK_WIND (frame, qr_fchown_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fchown, fd, uid, gid);
- } else if (need_open) {
- op_ret = qr_loc_fill (&loc, fd->inode, path);
- if (op_ret == -1) {
- qr_resume_pending_ops (qr_fd_ctx);
- goto out;
- }
+ qr_inode_prune (this, loc->inode);
- STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, &loc, flags, fd);
-
- qr_loc_wipe (&loc);
- }
-
- return 0;
+ STACK_WIND (frame, default_truncate_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->truncate,
+ loc, offset, xdata);
+ return 0;
}
-int32_t
-qr_fchmod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+int
+qr_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ qr_inode_prune (this, fd->inode);
+
+ STACK_WIND (frame, default_ftruncate_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->ftruncate,
+ fd, offset, xdata);
return 0;
}
-int32_t
-qr_fchmod_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, mode_t mode)
+int
+qr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ fd_t *fd, dict_t *xdata)
{
- STACK_WIND(frame, qr_fchmod_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fchmod, fd, mode);
- return 0;
-}
+ qr_inode_set_priority (this, fd->inode, loc->path);
+ STACK_WIND (frame, default_open_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->open,
+ loc, flags, fd, xdata);
+ return 0;
+}
-int32_t
-qr_fchmod (call_frame_t *frame, xlator_t *this, fd_t *fd, mode_t mode)
+int
+qr_forget (xlator_t *this, inode_t *inode)
{
- uint64_t value = 0;
- int flags = 0;
- call_stub_t *stub = NULL;
- char *path = NULL;
- loc_t loc = {0, };
- qr_fd_ctx_t *qr_fd_ctx = NULL;
- int32_t ret = -1, op_ret = -1, op_errno = -1;
- char need_open = 0, can_wind = 0, need_unwind = 0;
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == 0) {
- qr_fd_ctx = (qr_fd_ctx_t *)(long) value;
- }
+ qr_inode_t *qr_inode = NULL;
- if (qr_fd_ctx) {
- LOCK (&qr_fd_ctx->lock);
- {
- path = qr_fd_ctx->path;
- flags = qr_fd_ctx->flags;
- if (!(qr_fd_ctx->opened
- || qr_fd_ctx->open_in_transit)) {
- need_open = 1;
- qr_fd_ctx->open_in_transit = 1;
- }
+ qr_inode = qr_inode_ctx_get (this, inode);
- if (qr_fd_ctx->opened) {
- can_wind = 1;
- } else {
- stub = fop_fchmod_stub (frame, qr_fchmod_helper,
- fd, mode);
- if (stub == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- need_unwind = 1;
- qr_fd_ctx->open_in_transit = 0;
- goto unlock;
- }
-
- list_add_tail (&stub->list,
- &qr_fd_ctx->waiting_ops);
- }
- }
- unlock:
- UNLOCK (&qr_fd_ctx->lock);
- } else {
- can_wind = 1;
- }
-
-out:
- if (need_unwind) {
- STACK_UNWIND (frame, op_ret, op_errno, NULL);
- } else if (can_wind) {
- STACK_WIND (frame, qr_fchmod_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fchmod, fd, mode);
- } else if (need_open) {
- op_ret = qr_loc_fill (&loc, fd->inode, path);
- if (op_ret == -1) {
- qr_resume_pending_ops (qr_fd_ctx);
- goto out;
- }
-
- STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, &loc, flags, fd);
-
- qr_loc_wipe (&loc);
- }
+ if (!qr_inode)
+ return 0;
- return 0;
-}
+ qr_inode_prune (this, inode);
+ GF_FREE (qr_inode);
-int32_t
-qr_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
return 0;
}
int32_t
-qr_fsetxattr_helper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- dict_t *dict, int32_t flags)
+qr_inodectx_dump (xlator_t *this, inode_t *inode)
{
- STACK_WIND (frame, qr_fsetxattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fsetxattr, fd, dict, flags);
- return 0;
-}
+ qr_inode_t *qr_inode = NULL;
+ int32_t ret = -1;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, };
+ char buf[256] = {0, };
+ qr_inode = qr_inode_ctx_get (this, inode);
+ if (!qr_inode)
+ goto out;
-int32_t
-qr_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
- int32_t flags)
-{
- uint64_t value = 0;
- call_stub_t *stub = NULL;
- char *path = NULL;
- loc_t loc = {0, };
- int open_flags = 0;
- qr_fd_ctx_t *qr_fd_ctx = NULL;
- int32_t ret = -1, op_ret = -1, op_errno = -1;
- char need_open = 0, can_wind = 0, need_unwind = 0;
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == 0) {
- qr_fd_ctx = (qr_fd_ctx_t *)(long) value;
- }
+ gf_proc_dump_build_key (key_prefix, "xlator.performance.quick-read",
+ "inodectx");
+ gf_proc_dump_add_section (key_prefix);
- if (qr_fd_ctx) {
- LOCK (&qr_fd_ctx->lock);
- {
- path = qr_fd_ctx->path;
- open_flags = qr_fd_ctx->flags;
+ gf_proc_dump_write ("entire-file-cached", "%s", qr_inode->data ? "yes" : "no");
- if (!(qr_fd_ctx->opened
- || qr_fd_ctx->open_in_transit)) {
- need_open = 1;
- qr_fd_ctx->open_in_transit = 1;
- }
+ if (qr_inode->last_refresh.tv_sec) {
+ gf_time_fmt (buf, sizeof buf, qr_inode->last_refresh.tv_sec,
+ gf_timefmt_FT);
+ snprintf (buf + strlen (buf), sizeof buf - strlen (buf),
+ ".%"GF_PRI_SUSECONDS, qr_inode->last_refresh.tv_usec);
- if (qr_fd_ctx->opened) {
- can_wind = 1;
- } else {
- stub = fop_fsetxattr_stub (frame,
- qr_fsetxattr_helper,
- fd, dict, flags);
- if (stub == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- need_unwind = 1;
- qr_fd_ctx->open_in_transit = 0;
- goto unlock;
- }
-
- list_add_tail (&stub->list,
- &qr_fd_ctx->waiting_ops);
- }
- }
- unlock:
- UNLOCK (&qr_fd_ctx->lock);
- } else {
- can_wind = 1;
+ gf_proc_dump_write ("last-cache-validation-time", "%s", buf);
}
+ ret = 0;
out:
- if (need_unwind) {
- STACK_UNWIND (frame, op_ret, op_errno);
- } else if (can_wind) {
- STACK_WIND (frame, qr_fsetxattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fsetxattr, fd, dict,
- flags);
- } else if (need_open) {
- op_ret = qr_loc_fill (&loc, fd->inode, path);
- if (op_ret == -1) {
- qr_resume_pending_ops (qr_fd_ctx);
- goto out;
- }
-
- STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, &loc, open_flags,
- fd);
-
- qr_loc_wipe (&loc);
- }
-
- return 0;
+ return ret;
}
-int32_t
-qr_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
+int
+qr_priv_dump (xlator_t *this)
{
- STACK_UNWIND (frame, op_ret, op_errno, dict);
- return 0;
-}
+ qr_conf_t *conf = NULL;
+ qr_private_t *priv = NULL;
+ qr_inode_table_t *table = NULL;
+ uint32_t file_count = 0;
+ uint32_t i = 0;
+ qr_inode_t *curr = NULL;
+ uint64_t total_size = 0;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+
+ if (!this) {
+ return -1;
+ }
+ priv = this->private;
+ conf = &priv->conf;
-int32_t
-qr_fgetxattr_helper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name)
-{
- STACK_WIND (frame, qr_fgetxattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fgetxattr, fd, name);
- return 0;
-}
+ if (!conf)
+ return -1;
+ table = &priv->table;
-int32_t
-qr_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name)
-{
- int flags = 0;
- uint64_t value = 0;
- call_stub_t *stub = NULL;
- char *path = NULL;
- loc_t loc = {0, };
- qr_fd_ctx_t *qr_fd_ctx = NULL;
- int32_t ret = -1, op_ret = -1, op_errno = -1;
- char need_open = 0, can_wind = 0, need_unwind = 0;
-
- /*
- * FIXME: Can quick-read use the extended attributes stored in the
- * cache? this needs to be discussed.
- */
+ gf_proc_dump_build_key (key_prefix, "xlator.performance.quick-read",
+ "priv");
- ret = fd_ctx_get (fd, this, &value);
- if (ret == 0) {
- qr_fd_ctx = (qr_fd_ctx_t *)(long) value;
- }
-
- if (qr_fd_ctx) {
- LOCK (&qr_fd_ctx->lock);
- {
- path = qr_fd_ctx->path;
- flags = qr_fd_ctx->flags;
+ gf_proc_dump_add_section (key_prefix);
- if (!(qr_fd_ctx->opened
- || qr_fd_ctx->open_in_transit)) {
- need_open = 1;
- qr_fd_ctx->open_in_transit = 1;
- }
+ gf_proc_dump_write ("max_file_size", "%d", conf->max_file_size);
+ gf_proc_dump_write ("cache_timeout", "%d", conf->cache_timeout);
- if (qr_fd_ctx->opened) {
- can_wind = 1;
- } else {
- stub = fop_fgetxattr_stub (frame,
- qr_fgetxattr_helper,
- fd, name);
- if (stub == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- need_unwind = 1;
- qr_fd_ctx->open_in_transit = 0;
- goto unlock;
- }
-
- list_add_tail (&stub->list,
- &qr_fd_ctx->waiting_ops);
+ if (!table) {
+ goto out;
+ } else {
+ for (i = 0; i < conf->max_pri; i++) {
+ list_for_each_entry (curr, &table->lru[i], lru) {
+ file_count++;
+ total_size += curr->size;
}
}
- unlock:
- UNLOCK (&qr_fd_ctx->lock);
- } else {
- can_wind = 1;
}
-out:
- if (need_unwind) {
- STACK_UNWIND (frame, op_ret, op_errno, NULL);
- } else if (can_wind) {
- STACK_WIND (frame, qr_fgetxattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fgetxattr, fd, name);
- } else if (need_open) {
- op_ret = qr_loc_fill (&loc, fd->inode, path);
- if (op_ret == -1) {
- qr_resume_pending_ops (qr_fd_ctx);
- goto out;
- }
-
- STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, &loc, flags, fd);
+ gf_proc_dump_write ("total_files_cached", "%d", file_count);
+ gf_proc_dump_write ("total_cache_used", "%d", total_size);
- qr_loc_wipe (&loc);
- }
-
+out:
return 0;
}
int32_t
-qr_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno)
+mem_acct_init (xlator_t *this)
{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
+ int ret = -1;
+ if (!this)
+ return ret;
-int32_t
-qr_flush_helper (call_frame_t *frame, xlator_t *this, fd_t *fd)
-{
- STACK_WIND (frame, qr_flush_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->flush, fd);
- return 0;
+ ret = xlator_mem_acct_init (this, gf_qr_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
+ "failed");
+ return ret;
+ }
+
+ return ret;
}
-int32_t
-qr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
+static gf_boolean_t
+check_cache_size_ok (xlator_t *this, int64_t cache_size)
{
- uint64_t value = 0;
- call_stub_t *stub = NULL;
- qr_fd_ctx_t *qr_fd_ctx = NULL;
- int32_t ret = -1, op_ret = -1, op_errno = -1;
- char can_wind = 0, need_unwind = 0;
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == 0) {
- qr_fd_ctx = (qr_fd_ctx_t *)(long)value;
+ int ret = _gf_true;
+ uint64_t total_mem = 0;
+ uint64_t max_cache_size = 0;
+ volume_option_t *opt = NULL;
+
+ GF_ASSERT (this);
+ opt = xlator_volume_option_get (this, "cache-size");
+ if (!opt) {
+ ret = _gf_false;
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not get cache-size option");
+ goto out;
}
- if (qr_fd_ctx) {
- LOCK (&qr_fd_ctx->lock);
- {
- if (qr_fd_ctx->opened) {
- can_wind = 1;
- } else if (qr_fd_ctx->open_in_transit) {
- stub = fop_flush_stub (frame, qr_flush_helper,
- fd);
- if (stub == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- need_unwind = 1;
- qr_fd_ctx->open_in_transit = 0;
- goto unlock;
- }
-
- list_add_tail (&stub->list,
- &qr_fd_ctx->waiting_ops);
- } else {
- op_ret = 0;
- need_unwind = 1;
- }
- }
- unlock:
- UNLOCK (&qr_fd_ctx->lock);
- } else {
- op_ret = 0;
- need_unwind = 1;
- }
+ total_mem = get_mem_size ();
+ if (-1 == total_mem)
+ max_cache_size = opt->max;
+ else
+ max_cache_size = total_mem;
- if (need_unwind) {
- STACK_UNWIND (frame, op_ret, op_errno);
- } else if (can_wind) {
- STACK_WIND (frame, qr_flush_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->flush, fd);
+ gf_log (this->name, GF_LOG_DEBUG, "Max cache size is %"PRIu64,
+ max_cache_size);
+ if (cache_size > max_cache_size) {
+ ret = _gf_false;
+ gf_log (this->name, GF_LOG_ERROR, "Cache size %"PRIu64
+ " is greater than the max size of %"PRIu64,
+ cache_size, max_cache_size);
+ goto out;
}
-
- return 0;
+out:
+ return ret;
}
-
-int32_t
-qr_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+int
+reconfigure (xlator_t *this, dict_t *options)
{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
+ int32_t ret = -1;
+ qr_private_t *priv = NULL;
+ qr_conf_t *conf = NULL;
+ uint64_t cache_size_new = 0;
-int32_t
-qr_fentrylk_helper (call_frame_t *frame, xlator_t *this, const char *volume,
- fd_t *fd, const char *basename, entrylk_cmd cmd,
- entrylk_type type)
-{
- STACK_WIND(frame, qr_fentrylk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fentrylk, volume, fd, basename,
- cmd, type);
- return 0;
-}
+ GF_VALIDATE_OR_GOTO ("quick-read", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+ GF_VALIDATE_OR_GOTO (this->name, options, out);
+ priv = this->private;
-int32_t
-qr_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
- const char *basename, entrylk_cmd cmd, entrylk_type type)
-{
- int flags = 0;
- uint64_t value = 0;
- call_stub_t *stub = NULL;
- char *path = NULL;
- loc_t loc = {0, };
- qr_fd_ctx_t *qr_fd_ctx = NULL;
- int32_t ret = -1, op_ret = -1, op_errno = -1;
- char need_open = 0, can_wind = 0, need_unwind = 0;
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == 0) {
- qr_fd_ctx = (qr_fd_ctx_t *)(long)value;
+ conf = &priv->conf;
+ if (!conf) {
+ goto out;
}
- if (qr_fd_ctx) {
- LOCK (&qr_fd_ctx->lock);
- {
- path = qr_fd_ctx->path;
- flags = qr_fd_ctx->flags;
-
- if (!(qr_fd_ctx->opened
- || qr_fd_ctx->open_in_transit)) {
- need_open = 1;
- qr_fd_ctx->open_in_transit = 1;
- }
+ GF_OPTION_RECONF ("cache-timeout", conf->cache_timeout, options, int32,
+ out);
- if (qr_fd_ctx->opened) {
- can_wind = 1;
- } else {
- stub = fop_fentrylk_stub (frame,
- qr_fentrylk_helper,
- volume, fd, basename,
- cmd, type);
- if (stub == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- need_unwind = 1;
- qr_fd_ctx->open_in_transit = 0;
- goto unlock;
- }
-
- list_add_tail (&stub->list,
- &qr_fd_ctx->waiting_ops);
- }
- }
- unlock:
- UNLOCK (&qr_fd_ctx->lock);
- } else {
- can_wind = 1;
+ GF_OPTION_RECONF ("cache-size", cache_size_new, options, size, out);
+ if (!check_cache_size_ok (this, cache_size_new)) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Not reconfiguring cache-size");
+ goto out;
}
+ conf->cache_size = cache_size_new;
+ ret = 0;
out:
- if (need_unwind) {
- STACK_UNWIND (frame, op_ret, op_errno);
- } else if (can_wind) {
- STACK_WIND (frame, qr_fentrylk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fentrylk, volume, fd,
- basename, cmd, type);
- } else if (need_open) {
- op_ret = qr_loc_fill (&loc, fd->inode, path);
- if (op_ret == -1) {
- qr_resume_pending_ops (qr_fd_ctx);
- goto out;
- }
-
- STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, &loc, flags, fd);
-
- qr_loc_wipe (&loc);
- }
-
- return 0;
-}
-
-
-int32_t
-qr_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-
-int32_t
-qr_finodelk_helper (call_frame_t *frame, xlator_t *this, const char *volume,
- fd_t *fd, int32_t cmd, struct flock *lock)
-{
- STACK_WIND (frame, qr_finodelk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->finodelk, volume, fd, cmd, lock);
- return 0;
+ return ret;
}
int32_t
-qr_finodelk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
- int32_t cmd, struct flock *lock)
+qr_get_priority_list (const char *opt_str, struct list_head *first)
{
- int flags = 0;
- uint64_t value = 0;
- call_stub_t *stub = NULL;
- char *path = NULL;
- loc_t loc = {0, };
- qr_fd_ctx_t *qr_fd_ctx = NULL;
- int32_t ret = -1, op_ret = -1, op_errno = -1;
- char need_open = 0, can_wind = 0, need_unwind = 0;
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == 0) {
- qr_fd_ctx = (qr_fd_ctx_t *)(long)value;
+ int32_t max_pri = 1;
+ char *tmp_str = NULL;
+ char *tmp_str1 = NULL;
+ char *tmp_str2 = NULL;
+ char *dup_str = NULL;
+ char *priority_str = NULL;
+ char *pattern = NULL;
+ char *priority = NULL;
+ char *string = NULL;
+ struct qr_priority *curr = NULL, *tmp = NULL;
+
+ GF_VALIDATE_OR_GOTO ("quick-read", opt_str, out);
+ GF_VALIDATE_OR_GOTO ("quick-read", first, out);
+
+ string = gf_strdup (opt_str);
+ if (string == NULL) {
+ max_pri = -1;
+ goto out;
}
- if (qr_fd_ctx) {
- LOCK (&qr_fd_ctx->lock);
- {
- path = qr_fd_ctx->path;
- flags = qr_fd_ctx->flags;
+ /* Get the pattern for cache priority.
+ * "option priority *.jpg:1,abc*:2" etc
+ */
+ /* TODO: inode_lru in table is statically hard-coded to 5,
+ * should be changed to run-time configuration
+ */
+ priority_str = strtok_r (string, ",", &tmp_str);
+ while (priority_str) {
+ curr = GF_CALLOC (1, sizeof (*curr), gf_qr_mt_qr_priority_t);
+ if (curr == NULL) {
+ max_pri = -1;
+ goto out;
+ }
- if (!(qr_fd_ctx->opened
- || qr_fd_ctx->open_in_transit)) {
- need_open = 1;
- qr_fd_ctx->open_in_transit = 1;
- }
+ list_add_tail (&curr->list, first);
- if (qr_fd_ctx->opened) {
- can_wind = 1;
- } else {
- stub = fop_finodelk_stub (frame,
- qr_finodelk_helper,
- volume, fd, cmd,
- lock);
- if (stub == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- need_unwind = 1;
- qr_fd_ctx->open_in_transit = 0;
- goto unlock;
- }
-
- list_add_tail (&stub->list,
- &qr_fd_ctx->waiting_ops);
- }
+ dup_str = gf_strdup (priority_str);
+ if (dup_str == NULL) {
+ max_pri = -1;
+ goto out;
}
- unlock:
- UNLOCK (&qr_fd_ctx->lock);
- } else {
- can_wind = 1;
- }
-out:
- if (need_unwind) {
- STACK_UNWIND (frame, op_ret, op_errno);
- } else if (can_wind) {
- STACK_WIND (frame, qr_finodelk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->finodelk, volume, fd,
- cmd, lock);
- } else if (need_open) {
- op_ret = qr_loc_fill (&loc, fd->inode, path);
- if (op_ret == -1) {
- qr_resume_pending_ops (qr_fd_ctx);
+ pattern = strtok_r (dup_str, ":", &tmp_str1);
+ if (!pattern) {
+ max_pri = -1;
goto out;
}
- STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, &loc, flags, fd);
-
- qr_loc_wipe (&loc);
- }
-
- return 0;
-}
+ priority = strtok_r (NULL, ":", &tmp_str1);
+ if (!priority) {
+ max_pri = -1;
+ goto out;
+ }
+ gf_log ("quick-read", GF_LOG_TRACE,
+ "quick-read priority : pattern %s : priority %s",
+ pattern,
+ priority);
-int32_t
-qr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
+ curr->pattern = gf_strdup (pattern);
+ if (curr->pattern == NULL) {
+ max_pri = -1;
+ goto out;
+ }
+ curr->priority = strtol (priority, &tmp_str2, 0);
+ if (tmp_str2 && (*tmp_str2)) {
+ max_pri = -1;
+ goto out;
+ } else {
+ max_pri = max (max_pri, curr->priority);
+ }
-int32_t
-qr_fsync_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
-{
- STACK_WIND (frame, qr_fsync_cbk, FIRST_CHILD (this),
- FIRST_CHILD(this)->fops->fsync, fd, flags);
- return 0;
-}
+ GF_FREE (dup_str);
+ dup_str = NULL;
-int32_t
-qr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
-{
- uint64_t value = 0;
- call_stub_t *stub = NULL;
- char *path = NULL;
- loc_t loc = {0, };
- int open_flags = 0;
- qr_fd_ctx_t *qr_fd_ctx = NULL;
- int32_t ret = -1, op_ret = -1, op_errno = -1;
- char need_open = 0, can_wind = 0, need_unwind = 0;
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == 0) {
- qr_fd_ctx = (qr_fd_ctx_t *)(long)value;
+ priority_str = strtok_r (NULL, ",", &tmp_str);
}
+out:
+ GF_FREE (string);
- if (qr_fd_ctx) {
- LOCK (&qr_fd_ctx->lock);
- {
- path = qr_fd_ctx->path;
- open_flags = qr_fd_ctx->flags;
+ GF_FREE (dup_str);
- if (!(qr_fd_ctx->opened
- || qr_fd_ctx->open_in_transit)) {
- need_open = 1;
- qr_fd_ctx->open_in_transit = 1;
- }
-
- if (qr_fd_ctx->opened) {
- can_wind = 1;
- } else {
- stub = fop_fsync_stub (frame, qr_fsync_helper,
- fd, flags);
- if (stub == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- need_unwind = 1;
- qr_fd_ctx->open_in_transit = 0;
- goto unlock;
- }
-
- list_add_tail (&stub->list,
- &qr_fd_ctx->waiting_ops);
- }
+ if (max_pri == -1) {
+ list_for_each_entry_safe (curr, tmp, first, list) {
+ list_del_init (&curr->list);
+ GF_FREE (curr->pattern);
+ GF_FREE (curr);
}
- unlock:
- UNLOCK (&qr_fd_ctx->lock);
- } else {
- can_wind = 1;
}
-out:
- if (need_unwind) {
- STACK_UNWIND (frame, op_ret, op_errno);
- } else if (can_wind) {
- STACK_WIND (frame, qr_fsync_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fsync, fd, flags);
- } else if (need_open) {
- op_ret = qr_loc_fill (&loc, fd->inode, path);
- if (op_ret == -1) {
- qr_resume_pending_ops (qr_fd_ctx);
- goto out;
- }
-
- STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, &loc, open_flags,
- fd);
-
- qr_loc_wipe (&loc);
- }
-
- return 0;
+ return max_pri;
}
int32_t
-qr_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+init (xlator_t *this)
{
- int32_t ret = 0;
- uint64_t value = 0;
- qr_file_t *qr_file = NULL;
- qr_local_t *local = NULL;
+ int32_t ret = -1, i = 0;
+ qr_private_t *priv = NULL;
+ qr_conf_t *conf = NULL;
- if (op_ret == -1) {
- goto out;
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "FATAL: volume (%s) not configured with exactly one "
+ "child", this->name);
+ return -1;
}
- local = frame->local;
- if ((local == NULL) || (local->fd == NULL)
- || (local->fd->inode == NULL)) {
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
-
- ret = inode_ctx_get (local->fd->inode, this, &value);
- if (ret == 0) {
- qr_file = (qr_file_t *)(long) value;
-
- if (qr_file) {
- LOCK (&qr_file->lock);
- {
- if (qr_file->stbuf.st_size != buf->st_size) {
- dict_unref (qr_file->xattr);
- qr_file->xattr = NULL;
- }
- }
- UNLOCK (&qr_file->lock);
- }
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
}
-out:
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
+ priv = GF_CALLOC (1, sizeof (*priv), gf_qr_mt_qr_private_t);
+ if (priv == NULL) {
+ ret = -1;
+ goto out;
+ }
+ LOCK_INIT (&priv->table.lock);
+ conf = &priv->conf;
-int32_t
-qr_ftruncate_helper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- off_t offset)
-{
- STACK_WIND (frame, qr_ftruncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate, fd, offset);
- return 0;
-}
+ GF_OPTION_INIT ("max-file-size", conf->max_file_size, size, out);
+ GF_OPTION_INIT ("cache-timeout", conf->cache_timeout, int32, out);
-int32_t
-qr_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
-{
- int flags = 0;
- uint64_t value = 0;
- call_stub_t *stub = NULL;
- char *path = NULL;
- loc_t loc = {0, };
- qr_local_t *local = NULL;
- qr_fd_ctx_t *qr_fd_ctx = NULL;
- int32_t ret = -1, op_ret = -1, op_errno = -1;
- char need_open = 0, can_wind = 0, need_unwind = 0;
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == 0) {
- qr_fd_ctx = (qr_fd_ctx_t *)(long)value;
- }
-
- local = CALLOC (1, sizeof (*local));
- if (local == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- need_unwind = 1;
+ GF_OPTION_INIT ("cache-size", conf->cache_size, size, out);
+ if (!check_cache_size_ok (this, conf->cache_size)) {
+ ret = -1;
goto out;
}
- local->fd = fd;
- frame->local = local;
-
- if (qr_fd_ctx) {
- LOCK (&qr_fd_ctx->lock);
- {
- path = qr_fd_ctx->path;
- flags = qr_fd_ctx->flags;
-
- if (!(qr_fd_ctx->opened
- || qr_fd_ctx->open_in_transit)) {
- need_open = 1;
- qr_fd_ctx->open_in_transit = 1;
- }
-
- if (qr_fd_ctx->opened) {
- can_wind = 1;
- } else {
- stub = fop_ftruncate_stub (frame,
- qr_ftruncate_helper,
- fd, offset);
- if (stub == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- need_unwind = 1;
- qr_fd_ctx->open_in_transit = 0;
- goto unlock;
- }
-
- list_add_tail (&stub->list,
- &qr_fd_ctx->waiting_ops);
- }
- }
- unlock:
- UNLOCK (&qr_fd_ctx->lock);
- } else {
- can_wind = 1;
- }
+ INIT_LIST_HEAD (&conf->priority_list);
+ conf->max_pri = 1;
+ if (dict_get (this->options, "priority")) {
+ char *option_list = data_to_str (dict_get (this->options,
+ "priority"));
+ gf_log (this->name, GF_LOG_TRACE,
+ "option path %s", option_list);
+ /* parse the list of pattern:priority */
+ conf->max_pri = qr_get_priority_list (option_list,
+ &conf->priority_list);
-out:
- if (need_unwind) {
- STACK_UNWIND (frame, op_ret, op_errno, NULL);
- } else if (can_wind) {
- STACK_WIND (frame, qr_ftruncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate, fd, offset);
- } else if (need_open) {
- op_ret = qr_loc_fill (&loc, fd->inode, path);
- if (op_ret == -1) {
- qr_resume_pending_ops (qr_fd_ctx);
+ if (conf->max_pri == -1) {
goto out;
}
-
- STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, &loc, flags, fd);
-
- qr_loc_wipe (&loc);
+ conf->max_pri ++;
}
-
- return 0;
-}
+ priv->table.lru = GF_CALLOC (conf->max_pri, sizeof (*priv->table.lru),
+ gf_common_mt_list_head);
+ if (priv->table.lru == NULL) {
+ ret = -1;
+ goto out;
+ }
-int32_t
-qr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct flock *lock)
-{
- STACK_UNWIND (frame, op_ret, op_errno, lock);
- return 0;
-}
+ for (i = 0; i < conf->max_pri; i++) {
+ INIT_LIST_HEAD (&priv->table.lru[i]);
+ }
+ ret = 0;
-int32_t
-qr_lk_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
- struct flock *lock)
-{
- STACK_WIND (frame, qr_lk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lk, fd, cmd, lock);
+ this->private = priv;
+out:
+ if ((ret == -1) && priv) {
+ GF_FREE (priv);
+ }
- return 0;
+ return ret;
}
-int32_t
-qr_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
- struct flock *lock)
+void
+qr_inode_table_destroy (qr_private_t *priv)
{
- int flags = 0;
- uint64_t value = 0;
- call_stub_t *stub = NULL;
- char *path = NULL;
- loc_t loc = {0, };
- qr_fd_ctx_t *qr_fd_ctx = NULL;
- int32_t ret = -1, op_ret = -1, op_errno = -1;
- char need_open = 0, can_wind = 0, need_unwind = 0;
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == 0) {
- qr_fd_ctx = (qr_fd_ctx_t *)(long)value;
- }
-
- if (qr_fd_ctx) {
- LOCK (&qr_fd_ctx->lock);
- {
- path = qr_fd_ctx->path;
- flags = qr_fd_ctx->flags;
-
- if (!(qr_fd_ctx->opened
- || qr_fd_ctx->open_in_transit)) {
- need_open = 1;
- qr_fd_ctx->open_in_transit = 1;
- }
-
- if (qr_fd_ctx->opened) {
- can_wind = 1;
- } else {
- stub = fop_lk_stub (frame, qr_lk_helper, fd,
- cmd, lock);
- if (stub == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- need_unwind = 1;
- qr_fd_ctx->open_in_transit = 0;
- goto unlock;
- }
-
- list_add_tail (&stub->list,
- &qr_fd_ctx->waiting_ops);
- }
- }
- unlock:
- UNLOCK (&qr_fd_ctx->lock);
- } else {
- can_wind = 1;
- }
-
-out:
- if (need_unwind) {
- STACK_UNWIND (frame, op_ret, op_errno, NULL);
- } else if (can_wind) {
- STACK_WIND (frame, qr_lk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lk, fd, cmd, lock);
- } else if (need_open) {
- op_ret = qr_loc_fill (&loc, fd->inode, path);
- if (op_ret == -1) {
- qr_resume_pending_ops (qr_fd_ctx);
- goto out;
- }
+ int i = 0;
+ qr_conf_t *conf = NULL;
- STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, &loc, flags, fd);
+ conf = &priv->conf;
- qr_loc_wipe (&loc);
+ for (i = 0; i < conf->max_pri; i++) {
+ GF_ASSERT (list_empty (&priv->table.lru[i]));
}
-
- return 0;
-}
+ LOCK_DESTROY (&priv->table.lock);
-int32_t
-qr_release (xlator_t *this, fd_t *fd)
-{
- qr_fd_ctx_t *qr_fd_ctx = NULL;
- int32_t ret = 0;
- uint64_t value = 0;
-
- ret = fd_ctx_del (fd, this, &value);
- if (ret == 0) {
- qr_fd_ctx = (qr_fd_ctx_t *)(long) value;
- if (qr_fd_ctx) {
- qr_fd_ctx_free (qr_fd_ctx);
- }
- }
-
- return 0;
+ return;
}
-int32_t
-qr_forget (xlator_t *this, inode_t *inode)
+void
+qr_conf_destroy (qr_conf_t *conf)
{
- qr_file_t *qr_file = NULL;
- uint64_t value = 0;
- int32_t ret = -1;
-
- ret = inode_ctx_del (inode, this, &value);
- if (ret == 0) {
- qr_file = (qr_file_t *)(long) value;
- if (qr_file) {
- LOCK (&qr_file->lock);
- {
- if (qr_file->xattr) {
- dict_unref (qr_file->xattr);
- qr_file->xattr = NULL;
- }
- }
- UNLOCK (&qr_file->lock);
- }
-
- FREE (qr_file);
+ struct qr_priority *curr = NULL, *tmp = NULL;
+
+ list_for_each_entry_safe (curr, tmp, &conf->priority_list, list) {
+ list_del (&curr->list);
+ GF_FREE (curr->pattern);
+ GF_FREE (curr);
}
- return 0;
+ return;
}
-int32_t
-init (xlator_t *this)
+void
+fini (xlator_t *this)
{
- char *str = NULL;
- int32_t ret = -1;
- qr_conf_t *conf = NULL;
-
- if (!this->children || this->children->next) {
- gf_log (this->name, GF_LOG_ERROR,
- "FATAL: volume (%s) not configured with exactly one "
- "child", this->name);
- return -1;
- }
+ qr_private_t *priv = NULL;
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile ");
- }
-
- conf = CALLOC (1, sizeof (*conf));
- if (conf == NULL) {
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory");
- ret = -1;
+ if (this == NULL) {
goto out;
}
- ret = dict_get_str (this->options, "max-file-size",
- &str);
- if (ret == 0) {
- ret = gf_string2bytesize (str, &conf->max_file_size);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "invalid number format \"%s\" of \"option "
- "max-file-size\"",
- str);
- ret = -1;
- goto out;
- }
- }
-
- conf->cache_timeout = -1;
- ret = dict_get_str (this->options, "cache-timeout", &str);
- if (ret == 0) {
- ret = gf_string2uint_base10 (str,
- (unsigned int *)&conf->cache_timeout);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "invalid cache-timeout value %s", str);
- ret = -1;
- goto out;
- }
- }
-
- this->private = conf;
-out:
- if ((ret == -1) && conf) {
- FREE (conf);
+ priv = this->private;
+ if (priv == NULL) {
+ goto out;
}
- return ret;
-}
+ qr_inode_table_destroy (priv);
+ qr_conf_destroy (&priv->conf);
+ this->private = NULL;
-void
-fini (xlator_t *this)
-{
+ GF_FREE (priv);
+out:
return;
}
-
struct xlator_fops fops = {
- .lookup = qr_lookup,
+ .lookup = qr_lookup,
+ .readdirp = qr_readdirp,
.open = qr_open,
.readv = qr_readv,
- .writev = qr_writev,
- .fstat = qr_fstat,
- .fchown = qr_fchown,
- .fchmod = qr_fchmod,
- .fsetxattr = qr_fsetxattr,
- .fgetxattr = qr_fgetxattr,
- .flush = qr_flush,
- .fentrylk = qr_fentrylk,
- .finodelk = qr_finodelk,
- .fsync = qr_fsync,
- .ftruncate = qr_ftruncate,
- .lk = qr_lk,
+ .writev = qr_writev,
+ .truncate = qr_truncate,
+ .ftruncate = qr_ftruncate
};
-
-struct xlator_mops mops = {
-};
-
-
struct xlator_cbks cbks = {
.forget = qr_forget,
- .release = qr_release,
+};
+
+struct xlator_dumpops dumpops = {
+ .priv = qr_priv_dump,
+ .inodectx = qr_inodectx_dump,
};
struct volume_options options[] = {
- { .key = {"cache-timeout"},
+ { .key = {"priority"},
+ .type = GF_OPTION_TYPE_ANY
+ },
+ { .key = {"cache-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .min = 0,
+ .max = 32 * GF_UNIT_GB,
+ .default_value = "128MB",
+ .description = "Size of the read cache."
+ },
+ { .key = {"cache-timeout"},
.type = GF_OPTION_TYPE_INT,
.min = 1,
- .max = 60
+ .max = 60,
+ .default_value = "1",
},
- { .key = {"max-file-size"},
- .type = GF_OPTION_TYPE_SIZET,
+ { .key = {"max-file-size"},
+ .type = GF_OPTION_TYPE_SIZET,
.min = 0,
- .max = 1 * GF_UNIT_MB
+ .max = 1 * GF_UNIT_KB * 1000,
+ .default_value = "64KB",
},
+ { .key = {NULL} }
};
diff --git a/xlators/performance/quick-read/src/quick-read.h b/xlators/performance/quick-read/src/quick-read.h
index c49ab167b..6f0a05417 100644
--- a/xlators/performance/quick-read/src/quick-read.h
+++ b/xlators/performance/quick-read/src/quick-read.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2009-2010 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __QUICK_READ_H
@@ -40,41 +31,51 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
+#include <fnmatch.h>
+#include "quick-read-mem-types.h"
-#define GLUSTERFS_CONTENT_KEY "glusterfs.content"
-struct qr_fd_ctx {
- char opened;
- char open_in_transit;
- char *path;
- int flags;
- struct list_head waiting_ops;
- gf_lock_t lock;
+struct qr_inode {
+ void *data;
+ size_t size;
+ int priority;
+ uint32_t ia_mtime;
+ uint32_t ia_mtime_nsec;
+ struct iatt buf;
+ struct timeval last_refresh;
+ struct list_head lru;
};
-typedef struct qr_fd_ctx qr_fd_ctx_t;
+typedef struct qr_inode qr_inode_t;
-struct qr_local {
- char is_open;
- fd_t *fd;
- int open_flags;
- int32_t op_ret;
- int32_t op_errno;
- call_stub_t *stub;
-};
-typedef struct qr_local qr_local_t;
-struct qr_file {
- dict_t *xattr;
- struct stat stbuf;
- struct timeval tv;
- gf_lock_t lock;
+struct qr_priority {
+ char *pattern;
+ int32_t priority;
+ struct list_head list;
};
-typedef struct qr_file qr_file_t;
+typedef struct qr_priority qr_priority_t;
struct qr_conf {
- uint64_t max_file_size;
- int32_t cache_timeout;
+ uint64_t max_file_size;
+ int32_t cache_timeout;
+ uint64_t cache_size;
+ int max_pri;
+ struct list_head priority_list;
};
typedef struct qr_conf qr_conf_t;
+struct qr_inode_table {
+ uint64_t cache_used;
+ struct list_head *lru;
+ gf_lock_t lock;
+};
+typedef struct qr_inode_table qr_inode_table_t;
+
+struct qr_private {
+ qr_conf_t conf;
+ qr_inode_table_t table;
+};
+typedef struct qr_private qr_private_t;
+
+
#endif /* #ifndef __QUICK_READ_H */
diff --git a/xlators/performance/read-ahead/src/Makefile.am b/xlators/performance/read-ahead/src/Makefile.am
index 7bb902282..be80ae7ac 100644
--- a/xlators/performance/read-ahead/src/Makefile.am
+++ b/xlators/performance/read-ahead/src/Makefile.am
@@ -1,14 +1,15 @@
xlator_LTLIBRARIES = read-ahead.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance
-read_ahead_la_LDFLAGS = -module -avoidversion
+read_ahead_la_LDFLAGS = -module -avoid-version
read_ahead_la_SOURCES = read-ahead.c page.c
read_ahead_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = read-ahead.h
+noinst_HEADERS = read-ahead.h read-ahead-mem-types.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/performance/read-ahead/src/page.c b/xlators/performance/read-ahead/src/page.c
index 7a311fc3f..e79e7ae78 100644
--- a/xlators/performance/read-ahead/src/page.c
+++ b/xlators/performance/read-ahead/src/page.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
@@ -32,79 +23,88 @@
ra_page_t *
ra_page_get (ra_file_t *file, off_t offset)
{
- ra_page_t *page = NULL;
- off_t rounded_offset = 0;
+ ra_page_t *page = NULL;
+ off_t rounded_offset = 0;
- page = file->pages.next;
- rounded_offset = floor (offset, file->page_size);
+ GF_VALIDATE_OR_GOTO ("read-ahead", file, out);
- while (page != &file->pages && page->offset < rounded_offset)
- page = page->next;
+ page = file->pages.next;
+ rounded_offset = floor (offset, file->page_size);
- if (page == &file->pages || page->offset != rounded_offset)
- page = NULL;
+ while (page != &file->pages && page->offset < rounded_offset)
+ page = page->next;
- return page;
+ if (page == &file->pages || page->offset != rounded_offset)
+ page = NULL;
+
+out:
+ return page;
}
ra_page_t *
ra_page_create (ra_file_t *file, off_t offset)
{
- ra_page_t *page = NULL;
- off_t rounded_offset = 0;
- ra_page_t *newpage = NULL;
+ ra_page_t *page = NULL;
+ off_t rounded_offset = 0;
+ ra_page_t *newpage = NULL;
- page = file->pages.next;
- rounded_offset = floor (offset, file->page_size);
+ GF_VALIDATE_OR_GOTO ("read-ahead", file, out);
- while (page != &file->pages && page->offset < rounded_offset)
- page = page->next;
+ page = file->pages.next;
+ rounded_offset = floor (offset, file->page_size);
- if (page == &file->pages || page->offset != rounded_offset) {
- newpage = CALLOC (1, sizeof (*newpage));
- if (!newpage)
- return NULL;
+ while (page != &file->pages && page->offset < rounded_offset)
+ page = page->next;
- newpage->offset = rounded_offset;
- newpage->prev = page->prev;
- newpage->next = page;
- newpage->file = file;
- page->prev->next = newpage;
- page->prev = newpage;
+ if (page == &file->pages || page->offset != rounded_offset) {
+ newpage = GF_CALLOC (1, sizeof (*newpage), gf_ra_mt_ra_page_t);
+ if (!newpage) {
+ goto out;
+ }
- page = newpage;
- }
+ newpage->offset = rounded_offset;
+ newpage->prev = page->prev;
+ newpage->next = page;
+ newpage->file = file;
+ page->prev->next = newpage;
+ page->prev = newpage;
+
+ page = newpage;
+ }
- return page;
+out:
+ return page;
}
void
ra_wait_on_page (ra_page_t *page, call_frame_t *frame)
{
- ra_waitq_t *waitq = NULL;
- ra_local_t *local = NULL;
-
- local = frame->local;
- waitq = CALLOC (1, sizeof (*waitq));
- if (!waitq) {
- gf_log (frame->this->name, GF_LOG_ERROR,
- "out of memory");
+ ra_waitq_t *waitq = NULL;
+ ra_local_t *local = NULL;
+
+ GF_VALIDATE_OR_GOTO ("read-ahead", frame, out);
+ GF_VALIDATE_OR_GOTO (frame->this->name, page, out);
+
+ local = frame->local;
+
+ waitq = GF_CALLOC (1, sizeof (*waitq), gf_ra_mt_ra_waitq_t);
+ if (!waitq) {
local->op_ret = -1;
local->op_errno = ENOMEM;
goto out;
- }
+ }
- waitq->data = frame;
- waitq->next = page->waitq;
- page->waitq = waitq;
+ waitq->data = frame;
+ waitq->next = page->waitq;
+ page->waitq = waitq;
- ra_local_lock (local);
- {
- local->wait_count++;
- }
- ra_local_unlock (local);
+ ra_local_lock (local);
+ {
+ local->wait_count++;
+ }
+ ra_local_unlock (local);
out:
return;
@@ -114,118 +114,146 @@ out:
void
ra_waitq_return (ra_waitq_t *waitq)
{
- ra_waitq_t *trav = NULL;
- ra_waitq_t *next = NULL;
- call_frame_t *frame = NULL;
+ ra_waitq_t *trav = NULL;
+ ra_waitq_t *next = NULL;
+ call_frame_t *frame = NULL;
+
+ for (trav = waitq; trav; trav = next) {
+ next = trav->next;
- for (trav = waitq; trav; trav = next) {
- next = trav->next;
+ frame = trav->data;
+ ra_frame_return (frame);
+ GF_FREE (trav);
+ }
- frame = trav->data;
- ra_frame_return (frame);
- free (trav);
- }
+ return;
}
int
ra_fault_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct stat *stbuf, struct iobref *iobref)
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata)
{
- ra_local_t *local = NULL;
- off_t pending_offset = 0;
- ra_file_t *file = NULL;
- ra_page_t *page = NULL;
- off_t trav_offset = 0;
- size_t payload_size = 0;
- ra_waitq_t *waitq = NULL;
- fd_t *fd = NULL;
- int ret = 0;
- uint64_t tmp_file = 0;
-
- local = frame->local;
- fd = local->fd;
-
- ret = fd_ctx_get (fd, this, &tmp_file);
-
- file = (ra_file_t *)(long)tmp_file;
- pending_offset = local->pending_offset;
- trav_offset = pending_offset;
- payload_size = op_ret;
-
- ra_file_lock (file);
- {
- if (op_ret >= 0)
- file->stbuf = *stbuf;
-
- if (op_ret < 0) {
- page = ra_page_get (file, pending_offset);
- if (page)
- waitq = ra_page_error (page, op_ret, op_errno);
- goto unlock;
- }
-
- page = ra_page_get (file, pending_offset);
- if (!page) {
- gf_log (this->name, GF_LOG_DEBUG,
- "wasted copy: %"PRId64"[+%"PRId64"] file=%p",
- pending_offset, file->page_size, file);
- goto unlock;
- }
-
- if (page->vector) {
- iobref_unref (page->iobref);
- free (page->vector);
- }
-
- page->vector = iov_dup (vector, count);
+ ra_local_t *local = NULL;
+ off_t pending_offset = 0;
+ ra_file_t *file = NULL;
+ ra_page_t *page = NULL;
+ ra_waitq_t *waitq = NULL;
+ fd_t *fd = NULL;
+ uint64_t tmp_file = 0;
+
+ GF_ASSERT (frame);
+
+ local = frame->local;
+ fd = local->fd;
+
+ fd_ctx_get (fd, this, &tmp_file);
+
+ file = (ra_file_t *)(long)tmp_file;
+ pending_offset = local->pending_offset;
+
+ if (file == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "read-ahead context not set in fd (%p)", fd);
+ op_ret = -1;
+ op_errno = EBADF;
+ goto out;
+ }
+
+ ra_file_lock (file);
+ {
+ if (op_ret >= 0)
+ file->stbuf = *stbuf;
+
+ page = ra_page_get (file, pending_offset);
+
+ if (!page) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "wasted copy: %"PRId64"[+%"PRId64"] file=%p",
+ pending_offset, file->page_size, file);
+ goto unlock;
+ }
+
+ /*
+ * "Dirty" means that the request was a pure read-ahead; it's
+ * set for requests we issue ourselves, and cleared when user
+ * requests are issued or put on the waitq. "Poisoned" means
+ * that we got a write while a read was still in flight, and we
+ * couldn't stop it so we marked it instead. If it's both
+ * dirty and poisoned by the time we get here, we cancel its
+ * effect so that a subsequent user read doesn't get data that
+ * we know is stale (because we made it stale ourselves). We
+ * can't use ESTALE because that has special significance.
+ * ECANCELED has no such special meaning, and is close to what
+ * we're trying to indicate.
+ */
+ if (page->dirty && page->poisoned) {
+ op_ret = -1;
+ op_errno = ECANCELED;
+ }
+
+ if (op_ret < 0) {
+ waitq = ra_page_error (page, op_ret, op_errno);
+ goto unlock;
+ }
+
+ if (page->vector) {
+ iobref_unref (page->iobref);
+ GF_FREE (page->vector);
+ }
+
+ page->vector = iov_dup (vector, count);
if (page->vector == NULL) {
waitq = ra_page_error (page, -1, ENOMEM);
goto unlock;
}
- page->count = count;
- page->iobref = iobref_ref (iobref);
- page->ready = 1;
+ page->count = count;
+ page->iobref = iobref_ref (iobref);
+ page->ready = 1;
- page->size = iov_length (vector, count);
+ page->size = iov_length (vector, count);
- waitq = ra_page_wakeup (page);
- }
+ waitq = ra_page_wakeup (page);
+ }
unlock:
- ra_file_unlock (file);
+ ra_file_unlock (file);
- ra_waitq_return (waitq);
+ ra_waitq_return (waitq);
- fd_unref (local->fd);
+ fd_unref (local->fd);
- free (frame->local);
- frame->local = NULL;
+ mem_put (frame->local);
+ frame->local = NULL;
- STACK_DESTROY (frame->root);
- return 0;
+out:
+ STACK_DESTROY (frame->root);
+ return 0;
}
void
ra_page_fault (ra_file_t *file, call_frame_t *frame, off_t offset)
{
- call_frame_t *fault_frame = NULL;
- ra_local_t *fault_local = NULL, *local = NULL;
- ra_page_t *page = NULL;
- ra_waitq_t *waitq = NULL;
- int32_t op_ret = -1, op_errno = -1;
-
- local = frame->local;
- fault_frame = copy_frame (frame);
+ call_frame_t *fault_frame = NULL;
+ ra_local_t *fault_local = NULL;
+ ra_page_t *page = NULL;
+ ra_waitq_t *waitq = NULL;
+ int32_t op_ret = -1, op_errno = -1;
+
+ GF_VALIDATE_OR_GOTO ("read-ahead", frame, out);
+ GF_VALIDATE_OR_GOTO (frame->this->name, file, out);
+
+ fault_frame = copy_frame (frame);
if (fault_frame == NULL) {
op_ret = -1;
op_errno = ENOMEM;
goto err;
}
- fault_local = CALLOC (1, sizeof (ra_local_t));
+ fault_local = mem_get0 (THIS->local_pool);
if (fault_local == NULL) {
STACK_DESTROY (fault_frame->root);
op_ret = -1;
@@ -233,18 +261,18 @@ ra_page_fault (ra_file_t *file, call_frame_t *frame, off_t offset)
goto err;
}
- fault_frame->local = fault_local;
- fault_local->pending_offset = offset;
- fault_local->pending_size = file->page_size;
+ fault_frame->local = fault_local;
+ fault_local->pending_offset = offset;
+ fault_local->pending_size = file->page_size;
- fault_local->fd = fd_ref (file->fd);
+ fault_local->fd = fd_ref (file->fd);
- STACK_WIND (fault_frame, ra_fault_cbk,
- FIRST_CHILD (fault_frame->this),
- FIRST_CHILD (fault_frame->this)->fops->readv,
- file->fd, file->page_size, offset);
+ STACK_WIND (fault_frame, ra_fault_cbk,
+ FIRST_CHILD (fault_frame->this),
+ FIRST_CHILD (fault_frame->this)->fops->readv,
+ file->fd, file->page_size, offset, 0, NULL);
- return;
+ return;
err:
ra_file_lock (file);
@@ -255,80 +283,88 @@ err:
op_errno);
}
ra_file_unlock (file);
-
+
if (waitq != NULL) {
ra_waitq_return (waitq);
}
+
+out:
+ return;
}
+
void
ra_frame_fill (ra_page_t *page, call_frame_t *frame)
{
- ra_local_t *local = NULL;
- ra_fill_t *fill = NULL;
- off_t src_offset = 0;
- off_t dst_offset = 0;
- ssize_t copy_size = 0;
- ra_fill_t *new = NULL;
-
- local = frame->local;
- fill = &local->fill;
-
- if (local->op_ret != -1 && page->size) {
- if (local->offset > page->offset)
- src_offset = local->offset - page->offset;
- else
- dst_offset = page->offset - local->offset;
-
- copy_size = min (page->size - src_offset,
- local->size - dst_offset);
-
- if (copy_size < 0) {
- /* if page contains fewer bytes and the required offset
- is beyond the page size in the page */
- copy_size = src_offset = 0;
- }
-
- fill = fill->next;
- while (fill != &local->fill) {
- if (fill->offset > page->offset) {
- break;
- }
- fill = fill->next;
- }
-
- new = CALLOC (1, sizeof (*new));
+ ra_local_t *local = NULL;
+ ra_fill_t *fill = NULL;
+ off_t src_offset = 0;
+ off_t dst_offset = 0;
+ ssize_t copy_size = 0;
+ ra_fill_t *new = NULL;
+
+ GF_VALIDATE_OR_GOTO ("read-ahead", frame, out);
+ GF_VALIDATE_OR_GOTO (frame->this->name, page, out);
+
+ local = frame->local;
+ fill = &local->fill;
+
+ if (local->op_ret != -1 && page->size) {
+ if (local->offset > page->offset)
+ src_offset = local->offset - page->offset;
+ else
+ dst_offset = page->offset - local->offset;
+
+ copy_size = min (page->size - src_offset,
+ local->size - dst_offset);
+
+ if (copy_size < 0) {
+ /* if page contains fewer bytes and the required offset
+ is beyond the page size in the page */
+ copy_size = src_offset = 0;
+ }
+
+ fill = fill->next;
+ while (fill != &local->fill) {
+ if (fill->offset > page->offset) {
+ break;
+ }
+ fill = fill->next;
+ }
+
+ new = GF_CALLOC (1, sizeof (*new), gf_ra_mt_ra_fill_t);
if (new == NULL) {
local->op_ret = -1;
local->op_errno = ENOMEM;
goto out;
}
- new->offset = page->offset;
- new->size = copy_size;
- new->iobref = iobref_ref (page->iobref);
- new->count = iov_subset (page->vector, page->count,
- src_offset, src_offset+copy_size,
- NULL);
- new->vector = CALLOC (new->count, sizeof (struct iovec));
+ new->offset = page->offset;
+ new->size = copy_size;
+ new->iobref = iobref_ref (page->iobref);
+ new->count = iov_subset (page->vector, page->count,
+ src_offset, src_offset+copy_size,
+ NULL);
+ new->vector = GF_CALLOC (new->count, sizeof (struct iovec),
+ gf_ra_mt_iovec);
if (new->vector == NULL) {
local->op_ret = -1;
local->op_errno = ENOMEM;
- FREE (new);
+ GF_FREE (new);
goto out;
}
- new->count = iov_subset (page->vector, page->count,
- src_offset, src_offset+copy_size,
- new->vector);
+ new->count = iov_subset (page->vector, page->count,
+ src_offset, src_offset+copy_size,
+ new->vector);
- new->next = fill;
- new->prev = new->next->prev;
- new->next->prev = new;
- new->prev->next = new;
+ new->next = fill;
+ new->prev = new->next->prev;
+ new->next->prev = new;
+ new->prev->next = new;
- local->op_ret += copy_size;
- }
+ local->op_ret += copy_size;
+ }
out:
return;
@@ -338,35 +374,36 @@ out:
void
ra_frame_unwind (call_frame_t *frame)
{
- ra_local_t *local = NULL;
- ra_fill_t *fill = NULL;
- int32_t count = 0;
- struct iovec *vector;
- int32_t copied = 0;
- struct iobref *iobref = NULL;
- ra_fill_t *next = NULL;
- fd_t *fd = NULL;
- ra_file_t *file = NULL;
- int ret = 0;
- uint64_t tmp_file = 0;
-
- local = frame->local;
- fill = local->fill.next;
-
- iobref = iobref_new ();
+ ra_local_t *local = NULL;
+ ra_fill_t *fill = NULL;
+ int32_t count = 0;
+ struct iovec *vector = NULL;
+ int32_t copied = 0;
+ struct iobref *iobref = NULL;
+ ra_fill_t *next = NULL;
+ fd_t *fd = NULL;
+ ra_file_t *file = NULL;
+ uint64_t tmp_file = 0;
+
+ GF_VALIDATE_OR_GOTO ("read-ahead", frame, out);
+
+ local = frame->local;
+ fill = local->fill.next;
+
+ iobref = iobref_new ();
if (iobref == NULL) {
local->op_ret = -1;
local->op_errno = ENOMEM;
}
- frame->local = NULL;
+ frame->local = NULL;
- while (fill != &local->fill) {
- count += fill->count;
- fill = fill->next;
- }
+ while (fill != &local->fill) {
+ count += fill->count;
+ fill = fill->next;
+ }
- vector = CALLOC (count, sizeof (*vector));
+ vector = GF_CALLOC (count, sizeof (*vector), gf_ra_mt_iovec);
if (vector == NULL) {
local->op_ret = -1;
local->op_errno = ENOMEM;
@@ -374,42 +411,43 @@ ra_frame_unwind (call_frame_t *frame)
iobref = NULL;
}
- fill = local->fill.next;
+ fill = local->fill.next;
- while (fill != &local->fill) {
- next = fill->next;
+ while (fill != &local->fill) {
+ next = fill->next;
if ((vector != NULL) && (iobref != NULL)) {
memcpy (((char *)vector) + copied, fill->vector,
fill->count * sizeof (*vector));
-
+
copied += (fill->count * sizeof (*vector));
iobref_merge (iobref, fill->iobref);
}
- fill->next->prev = fill->prev;
- fill->prev->next = fill->prev;
-
- iobref_unref (fill->iobref);
- free (fill->vector);
- free (fill);
-
- fill = next;
- }
-
- fd = local->fd;
- ret = fd_ctx_get (fd, frame->this, &tmp_file);
- file = (ra_file_t *)(long)tmp_file;
-
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- vector, count, &file->stbuf, iobref);
-
- iobref_unref (iobref);
- pthread_mutex_destroy (&local->local_lock);
- free (local);
- free (vector);
-
- return;
+ fill->next->prev = fill->prev;
+ fill->prev->next = fill->prev;
+
+ iobref_unref (fill->iobref);
+ GF_FREE (fill->vector);
+ GF_FREE (fill);
+
+ fill = next;
+ }
+
+ fd = local->fd;
+ fd_ctx_get (fd, frame->this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
+
+ STACK_UNWIND_STRICT (readv, frame, local->op_ret, local->op_errno,
+ vector, count, &file->stbuf, iobref, NULL);
+
+ iobref_unref (iobref);
+ pthread_mutex_destroy (&local->local_lock);
+ mem_put (local);
+ GF_FREE (vector);
+
+out:
+ return;
}
/*
@@ -420,25 +458,28 @@ ra_frame_unwind (call_frame_t *frame)
void
ra_frame_return (call_frame_t *frame)
{
- ra_local_t *local = NULL;
- int32_t wait_count = 0;
+ ra_local_t *local = NULL;
+ int32_t wait_count = 0;
- local = frame->local;
- assert (local->wait_count > 0);
+ GF_VALIDATE_OR_GOTO ("read-ahead", frame, out);
- ra_local_lock (local);
- {
- wait_count = --local->wait_count;
- }
- ra_local_unlock (local);
+ local = frame->local;
+ GF_ASSERT (local->wait_count > 0);
- if (!wait_count)
- ra_frame_unwind (frame);
+ ra_local_lock (local);
+ {
+ wait_count = --local->wait_count;
+ }
+ ra_local_unlock (local);
- return;
+ if (!wait_count)
+ ra_frame_unwind (frame);
+
+out:
+ return;
}
-/*
+/*
* ra_page_wakeup -
* @page:
*
@@ -446,19 +487,24 @@ ra_frame_return (call_frame_t *frame)
ra_waitq_t *
ra_page_wakeup (ra_page_t *page)
{
- ra_waitq_t *waitq = NULL, *trav = NULL;
- call_frame_t *frame;
+ ra_waitq_t *waitq = NULL, *trav = NULL;
+ call_frame_t *frame = NULL;
+
+ GF_VALIDATE_OR_GOTO ("read-ahead", page, out);
- waitq = page->waitq;
- page->waitq = NULL;
+ waitq = page->waitq;
+ page->waitq = NULL;
- trav = waitq;
- for (trav = waitq; trav; trav = trav->next) {
- frame = trav->data;
- ra_frame_fill (page, frame);
- }
+ for (trav = waitq; trav; trav = trav->next) {
+ frame = trav->data;
+ ra_frame_fill (page, frame);
+ }
- return waitq;
+ if (page->stale) {
+ ra_page_purge (page);
+ }
+out:
+ return waitq;
}
/*
@@ -469,14 +515,20 @@ ra_page_wakeup (ra_page_t *page)
void
ra_page_purge (ra_page_t *page)
{
- page->prev->next = page->next;
- page->next->prev = page->prev;
-
- if (page->iobref) {
- iobref_unref (page->iobref);
- }
- free (page->vector);
- free (page);
+ GF_VALIDATE_OR_GOTO ("read-ahead", page, out);
+
+ page->prev->next = page->next;
+ page->next->prev = page->prev;
+
+ if (page->iobref) {
+ iobref_unref (page->iobref);
+ }
+
+ GF_FREE (page->vector);
+ GF_FREE (page);
+
+out:
+ return;
}
/*
@@ -489,32 +541,33 @@ ra_page_purge (ra_page_t *page)
ra_waitq_t *
ra_page_error (ra_page_t *page, int32_t op_ret, int32_t op_errno)
{
+ ra_waitq_t *waitq = NULL;
+ ra_waitq_t *trav = NULL;
+ call_frame_t *frame = NULL;
+ ra_local_t *local = NULL;
- ra_waitq_t *waitq = NULL;
- ra_waitq_t *trav = NULL;
- call_frame_t *frame = NULL;
- ra_local_t *local = NULL;
+ GF_VALIDATE_OR_GOTO ("read-ahead", page, out);
- waitq = page->waitq;
- page->waitq = NULL;
+ waitq = page->waitq;
+ page->waitq = NULL;
- trav = waitq;
- for (trav = waitq; trav; trav = trav->next) {
- frame = trav->data;
+ for (trav = waitq; trav; trav = trav->next) {
+ frame = trav->data;
- local = frame->local;
- if (local->op_ret != -1) {
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- }
- }
+ local = frame->local;
+ if (local->op_ret != -1) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ }
+ }
- ra_page_purge (page);
+ ra_page_purge (page);
- return waitq;
+out:
+ return waitq;
}
-/*
+/*
* ra_file_destroy -
* @file:
*
@@ -522,24 +575,29 @@ ra_page_error (ra_page_t *page, int32_t op_ret, int32_t op_errno)
void
ra_file_destroy (ra_file_t *file)
{
- ra_conf_t *conf = NULL;
- ra_page_t *trav = NULL;
-
- conf = file->conf;
-
- ra_conf_lock (conf);
- {
- file->prev->next = file->next;
- file->next->prev = file->prev;
- }
- ra_conf_unlock (conf);
-
- trav = file->pages.next;
- while (trav != &file->pages) {
- ra_page_error (trav, -1, EINVAL);
- trav = file->pages.next;
- }
-
- pthread_mutex_destroy (&file->file_lock);
- free (file);
+ ra_conf_t *conf = NULL;
+ ra_page_t *trav = NULL;
+
+ GF_VALIDATE_OR_GOTO ("read-ahead", file, out);
+
+ conf = file->conf;
+
+ ra_conf_lock (conf);
+ {
+ file->prev->next = file->next;
+ file->next->prev = file->prev;
+ }
+ ra_conf_unlock (conf);
+
+ trav = file->pages.next;
+ while (trav != &file->pages) {
+ ra_page_error (trav, -1, EINVAL);
+ trav = file->pages.next;
+ }
+
+ pthread_mutex_destroy (&file->file_lock);
+ GF_FREE (file);
+
+out:
+ return;
}
diff --git a/xlators/performance/read-ahead/src/read-ahead-mem-types.h b/xlators/performance/read-ahead/src/read-ahead-mem-types.h
new file mode 100644
index 000000000..219e29289
--- /dev/null
+++ b/xlators/performance/read-ahead/src/read-ahead-mem-types.h
@@ -0,0 +1,26 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef __RA_MEM_TYPES_H__
+#define __RA_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_ra_mem_types_ {
+ gf_ra_mt_ra_file_t = gf_common_mt_end + 1,
+ gf_ra_mt_ra_conf_t,
+ gf_ra_mt_ra_page_t,
+ gf_ra_mt_ra_waitq_t,
+ gf_ra_mt_ra_fill_t,
+ gf_ra_mt_iovec,
+ gf_ra_mt_end
+};
+#endif
diff --git a/xlators/performance/read-ahead/src/read-ahead.c b/xlators/performance/read-ahead/src/read-ahead.c
index 49cf0b9bf..069ab1f1a 100644
--- a/xlators/performance/read-ahead/src/read-ahead.c
+++ b/xlators/performance/read-ahead/src/read-ahead.c
@@ -1,27 +1,18 @@
/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-/*
- TODO:
- - handle O_DIRECT
- - maintain offset, flush on lseek
- - ensure efficient memory managment in case of random seek
+/*
+ TODO:
+ - handle O_DIRECT
+ - maintain offset, flush on lseek
+ - ensure efficient memory management in case of random seek
*/
#ifndef _CONFIG_H
@@ -34,6 +25,7 @@
#include "dict.h"
#include "xlator.h"
#include "read-ahead.h"
+#include "statedump.h"
#include <assert.h>
#include <sys/time.h>
@@ -43,160 +35,177 @@ read_ahead (call_frame_t *frame, ra_file_t *file);
int
ra_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- ra_conf_t *conf = NULL;
- ra_file_t *file = NULL;
- int ret = 0;
+ ra_conf_t *conf = NULL;
+ ra_file_t *file = NULL;
+ int ret = 0;
- conf = this->private;
+ GF_ASSERT (frame);
+ GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind);
- if (op_ret == -1) {
- goto unwind;
- }
+ conf = this->private;
+
+ if (op_ret == -1) {
+ goto unwind;
+ }
- file = CALLOC (1, sizeof (*file));
- if (!file) {
+ file = GF_CALLOC (1, sizeof (*file), gf_ra_mt_ra_file_t);
+ if (!file) {
op_ret = -1;
op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory");
- goto unwind;
- }
-
- ret = fd_ctx_set (fd, this, (uint64_t)(long)file);
-
- /* If mandatory locking has been enabled on this file,
- we disable caching on it */
+ goto unwind;
+ }
- if ((fd->inode->st_mode & S_ISGID) && !(fd->inode->st_mode & S_IXGRP))
- file->disabled = 1;
+ /* If O_DIRECT open, we disable caching on it */
- /* If O_DIRECT open, we disable caching on it */
+ if ((fd->flags & O_DIRECT) || ((fd->flags & O_ACCMODE) == O_WRONLY))
+ file->disabled = 1;
- if ((fd->flags & O_DIRECT) || ((fd->flags & O_ACCMODE) == O_WRONLY))
- file->disabled = 1;
+ file->offset = (unsigned long long) 0;
+ file->conf = conf;
+ file->pages.next = &file->pages;
+ file->pages.prev = &file->pages;
+ file->pages.offset = (unsigned long long) 0;
+ file->pages.file = file;
- file->offset = (unsigned long long) 0;
- file->conf = conf;
- file->pages.next = &file->pages;
- file->pages.prev = &file->pages;
- file->pages.offset = (unsigned long long) 0;
- file->pages.file = file;
+ ra_conf_lock (conf);
+ {
+ file->next = conf->files.next;
+ conf->files.next = file;
+ file->next->prev = file;
+ file->prev = &conf->files;
+ }
+ ra_conf_unlock (conf);
- ra_conf_lock (conf);
- {
- file->next = conf->files.next;
- conf->files.next = file;
- file->next->prev = file;
- file->prev = &conf->files;
- }
- ra_conf_unlock (conf);
+ file->fd = fd;
+ file->page_count = conf->page_count;
+ file->page_size = conf->page_size;
+ pthread_mutex_init (&file->file_lock, NULL);
- file->fd = fd;
- file->page_count = conf->page_count;
- file->page_size = conf->page_size;
- pthread_mutex_init (&file->file_lock, NULL);
+ if (!file->disabled) {
+ file->page_count = 1;
+ }
- if (!file->disabled) {
- file->page_count = 1;
- }
+ ret = fd_ctx_set (fd, this, (uint64_t)(long)file);
+ if (ret == -1) {
+ gf_log (frame->this->name, GF_LOG_WARNING,
+ "cannot set read-ahead context information in fd (%p)",
+ fd);
+ ra_file_destroy (file);
+ op_ret = -1;
+ op_errno = ENOMEM;
+ }
unwind:
- STACK_UNWIND (frame, op_ret, op_errno, fd);
+ frame->local = NULL;
- return 0;
+ STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
+
+ return 0;
}
int
ra_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
- struct stat *buf)
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- ra_conf_t *conf = NULL;
- ra_file_t *file = NULL;
- int ret = 0;
+ ra_conf_t *conf = NULL;
+ ra_file_t *file = NULL;
+ int ret = 0;
- conf = this->private;
+ GF_ASSERT (frame);
+ GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind);
- if (op_ret == -1) {
- goto unwind;
- }
+ conf = this->private;
+
+ if (op_ret == -1) {
+ goto unwind;
+ }
- file = CALLOC (1, sizeof (*file));
- if (!file) {
+ file = GF_CALLOC (1, sizeof (*file), gf_ra_mt_ra_file_t);
+ if (!file) {
op_ret = -1;
op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory");
- goto unwind;
- }
-
- ret = fd_ctx_set (fd, this, (uint64_t)(long)file);
-
- /* If mandatory locking has been enabled on this file,
- we disable caching on it */
-
- if ((fd->inode->st_mode & S_ISGID) && !(fd->inode->st_mode & S_IXGRP))
- file->disabled = 1;
-
- /* If O_DIRECT open, we disable caching on it */
-
- if ((fd->flags & O_DIRECT) || ((fd->flags & O_ACCMODE) == O_WRONLY))
- file->disabled = 1;
+ goto unwind;
+ }
- file->offset = (unsigned long long) 0;
- //file->size = fd->inode->buf.st_size;
- file->conf = conf;
- file->pages.next = &file->pages;
- file->pages.prev = &file->pages;
- file->pages.offset = (unsigned long long) 0;
- file->pages.file = file;
+ /* If O_DIRECT open, we disable caching on it */
+
+ if ((fd->flags & O_DIRECT) || ((fd->flags & O_ACCMODE) == O_WRONLY))
+ file->disabled = 1;
+
+ file->offset = (unsigned long long) 0;
+ //file->size = fd->inode->buf.ia_size;
+ file->conf = conf;
+ file->pages.next = &file->pages;
+ file->pages.prev = &file->pages;
+ file->pages.offset = (unsigned long long) 0;
+ file->pages.file = file;
+
+ ra_conf_lock (conf);
+ {
+ file->next = conf->files.next;
+ conf->files.next = file;
+ file->next->prev = file;
+ file->prev = &conf->files;
+ }
+ ra_conf_unlock (conf);
- ra_conf_lock (conf);
- {
- file->next = conf->files.next;
- conf->files.next = file;
- file->next->prev = file;
- file->prev = &conf->files;
- }
- ra_conf_unlock (conf);
+ file->fd = fd;
+ file->page_count = conf->page_count;
+ file->page_size = conf->page_size;
+ pthread_mutex_init (&file->file_lock, NULL);
- file->fd = fd;
- file->page_count = conf->page_count;
- file->page_size = conf->page_size;
- pthread_mutex_init (&file->file_lock, NULL);
+ ret = fd_ctx_set (fd, this, (uint64_t)(long)file);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "cannot set read ahead context information in fd (%p)",
+ fd);
+ ra_file_destroy (file);
+ op_ret = -1;
+ op_errno = ENOMEM;
+ }
unwind:
- STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf);
+ STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
- return 0;
+ return 0;
}
int
ra_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd)
+ fd_t *fd, dict_t *xdata)
{
- STACK_WIND (frame, ra_open_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->open,
- loc, flags, fd);
+ GF_ASSERT (frame);
+ GF_ASSERT (this);
+
+ STACK_WIND (frame, ra_open_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->open,
+ loc, flags, fd, xdata);
- return 0;
+ return 0;
}
+
int
ra_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, fd_t *fd)
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
- STACK_WIND (frame, ra_create_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create,
- loc, flags, mode, fd);
+ GF_ASSERT (frame);
+ GF_ASSERT (this);
+
+ STACK_WIND (frame, ra_create_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create,
+ loc, flags, mode, umask, fd, xdata);
- return 0;
+ return 0;
}
/* free cache pages between offset and offset+size,
@@ -204,712 +213,1047 @@ ra_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
*/
static void
-flush_region (call_frame_t *frame, ra_file_t *file, off_t offset, off_t size)
-{
- ra_page_t *trav = NULL;
- ra_page_t *next = NULL;
-
- ra_file_lock (file);
- {
- trav = file->pages.next;
- while (trav != &file->pages
- && trav->offset < (offset + size)) {
-
- next = trav->next;
- if (trav->offset >= offset && !trav->waitq) {
- ra_page_purge (trav);
- }
- trav = next;
- }
- }
- ra_file_unlock (file);
+flush_region (call_frame_t *frame, ra_file_t *file, off_t offset, off_t size,
+ int for_write)
+{
+ ra_page_t *trav = NULL;
+ ra_page_t *next = NULL;
+
+ ra_file_lock (file);
+ {
+ trav = file->pages.next;
+ while (trav != &file->pages
+ && trav->offset < (offset + size)) {
+
+ next = trav->next;
+ if (trav->offset >= offset) {
+ if (!trav->waitq) {
+ ra_page_purge (trav);
+ }
+ else {
+ trav->stale = 1;
+
+ if (for_write) {
+ trav->poisoned = 1;
+ }
+ }
+ }
+ trav = next;
+ }
+ }
+ ra_file_unlock (file);
}
int
ra_release (xlator_t *this, fd_t *fd)
{
- uint64_t tmp_file = 0;
- int ret = 0;
+ uint64_t tmp_file = 0;
+ int ret = 0;
- ret = fd_ctx_del (fd, this, &tmp_file);
-
- if (!ret) {
- ra_file_destroy ((ra_file_t *)(long)tmp_file);
- }
+ GF_VALIDATE_OR_GOTO ("read-ahead", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+
+ ret = fd_ctx_del (fd, this, &tmp_file);
+
+ if (!ret) {
+ ra_file_destroy ((ra_file_t *)(long)tmp_file);
+ }
- return 0;
+out:
+ return 0;
}
void
read_ahead (call_frame_t *frame, ra_file_t *file)
{
- off_t ra_offset = 0;
- size_t ra_size = 0;
- off_t trav_offset = 0;
- ra_page_t *trav = NULL;
- off_t cap = 0;
- char fault = 0;
+ off_t ra_offset = 0;
+ size_t ra_size = 0;
+ off_t trav_offset = 0;
+ ra_page_t *trav = NULL;
+ off_t cap = 0;
+ char fault = 0;
+
+ GF_VALIDATE_OR_GOTO ("read-ahead", frame, out);
+ GF_VALIDATE_OR_GOTO (frame->this->name, file, out);
+
+ if (!file->page_count) {
+ goto out;
+ }
- if (!file->page_count)
- return;
+ ra_size = file->page_size * file->page_count;
+ ra_offset = floor (file->offset, file->page_size);
+ cap = file->size ? file->size : file->offset + ra_size;
- ra_size = file->page_size * file->page_count;
- ra_offset = floor (file->offset, file->page_size);
- cap = file->size ? file->size : file->offset + ra_size;
+ while (ra_offset < min (file->offset + ra_size, cap)) {
- while (ra_offset < min (file->offset + ra_size, cap)) {
+ ra_file_lock (file);
+ {
+ trav = ra_page_get (file, ra_offset);
+ }
+ ra_file_unlock (file);
- ra_file_lock (file);
- {
- trav = ra_page_get (file, ra_offset);
- }
- ra_file_unlock (file);
+ if (!trav)
+ break;
- if (!trav)
- break;
+ ra_offset += file->page_size;
+ }
- ra_offset += file->page_size;
- }
+ if (trav) {
+ /* comfortable enough */
+ goto out;
+ }
- if (trav)
- /* comfortable enough */
- return;
-
- trav_offset = ra_offset;
-
- trav = file->pages.next;
- cap = file->size ? file->size : ra_offset + ra_size;
-
- while (trav_offset < min(ra_offset + ra_size, cap)) {
- fault = 0;
- ra_file_lock (file);
- {
- trav = ra_page_get (file, trav_offset);
- if (!trav) {
- fault = 1;
- trav = ra_page_create (file, trav_offset);
- if (trav)
- trav->dirty = 1;
- }
- }
- ra_file_unlock (file);
-
- if (!trav) {
- /* OUT OF MEMORY */
- break;
- }
-
- if (fault) {
- gf_log (frame->this->name, GF_LOG_TRACE,
- "RA at offset=%"PRId64, trav_offset);
- ra_page_fault (file, frame, trav_offset);
- }
- trav_offset += file->page_size;
- }
+ trav_offset = ra_offset;
+
+ cap = file->size ? file->size : ra_offset + ra_size;
+
+ while (trav_offset < min(ra_offset + ra_size, cap)) {
+ fault = 0;
+ ra_file_lock (file);
+ {
+ trav = ra_page_get (file, trav_offset);
+ if (!trav) {
+ fault = 1;
+ trav = ra_page_create (file, trav_offset);
+ if (trav)
+ trav->dirty = 1;
+ }
+ }
+ ra_file_unlock (file);
+
+ if (!trav) {
+ /* OUT OF MEMORY */
+ break;
+ }
+
+ if (fault) {
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "RA at offset=%"PRId64, trav_offset);
+ ra_page_fault (file, frame, trav_offset);
+ }
+ trav_offset += file->page_size;
+ }
- return;
+out:
+ return;
}
int
ra_need_atime_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct stat *stbuf, struct iobref *iobref)
+ int32_t count, struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata)
{
- STACK_DESTROY (frame->root);
- return 0;
+ GF_ASSERT (frame);
+ STACK_DESTROY (frame->root);
+ return 0;
}
static void
dispatch_requests (call_frame_t *frame, ra_file_t *file)
{
- ra_local_t *local = NULL;
- ra_conf_t *conf = NULL;
- off_t rounded_offset = 0;
- off_t rounded_end = 0;
- off_t trav_offset = 0;
- ra_page_t *trav = NULL;
- call_frame_t *ra_frame = NULL;
- char need_atime_update = 1;
- char fault = 0;
-
- local = frame->local;
- conf = file->conf;
-
- rounded_offset = floor (local->offset, file->page_size);
- rounded_end = roof (local->offset + local->size, file->page_size);
-
- trav_offset = rounded_offset;
- trav = file->pages.next;
-
- while (trav_offset < rounded_end) {
- fault = 0;
-
- ra_file_lock (file);
- {
- trav = ra_page_get (file, trav_offset);
- if (!trav) {
- trav = ra_page_create (file, trav_offset);
- fault = 1;
- need_atime_update = 0;
- }
-
- if (!trav) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- goto unlock;
+ ra_local_t *local = NULL;
+ ra_conf_t *conf = NULL;
+ off_t rounded_offset = 0;
+ off_t rounded_end = 0;
+ off_t trav_offset = 0;
+ ra_page_t *trav = NULL;
+ call_frame_t *ra_frame = NULL;
+ char need_atime_update = 1;
+ char fault = 0;
+
+ GF_VALIDATE_OR_GOTO ("read-ahead", frame, out);
+ GF_VALIDATE_OR_GOTO (frame->this->name, file, out);
+
+ local = frame->local;
+ conf = file->conf;
+
+ rounded_offset = floor (local->offset, file->page_size);
+ rounded_end = roof (local->offset + local->size, file->page_size);
+
+ trav_offset = rounded_offset;
+
+ while (trav_offset < rounded_end) {
+ fault = 0;
+
+ ra_file_lock (file);
+ {
+ trav = ra_page_get (file, trav_offset);
+ if (!trav) {
+ trav = ra_page_create (file, trav_offset);
+ if (!trav) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
+ }
+ fault = 1;
+ need_atime_update = 0;
}
+ trav->dirty = 0;
+
+ if (trav->ready) {
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "HIT at offset=%"PRId64".",
+ trav_offset);
+ ra_frame_fill (trav, frame);
+ } else {
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "IN-TRANSIT at offset=%"PRId64".",
+ trav_offset);
+ ra_wait_on_page (trav, frame);
+ need_atime_update = 0;
+ }
+ }
+ unlock:
+ ra_file_unlock (file);
- if (trav->ready) {
- gf_log (frame->this->name, GF_LOG_TRACE,
- "HIT at offset=%"PRId64".",
- trav_offset);
- ra_frame_fill (trav, frame);
- } else {
- gf_log (frame->this->name, GF_LOG_TRACE,
- "IN-TRANSIT at offset=%"PRId64".",
- trav_offset);
- ra_wait_on_page (trav, frame);
- need_atime_update = 0;
- }
- }
- unlock:
- ra_file_unlock (file);
-
- if (fault) {
- gf_log (frame->this->name, GF_LOG_TRACE,
- "MISS at offset=%"PRId64".",
- trav_offset);
- ra_page_fault (file, frame, trav_offset);
- }
-
- trav_offset += file->page_size;
- }
+ if (local->op_ret == -1) {
+ goto out;
+ }
+
+ if (fault) {
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "MISS at offset=%"PRId64".",
+ trav_offset);
+ ra_page_fault (file, frame, trav_offset);
+ }
- if (need_atime_update && conf->force_atime_update) {
- /* TODO: use untimens() since readv() can confuse underlying
- io-cache and others */
- ra_frame = copy_frame (frame);
+ trav_offset += file->page_size;
+ }
+
+ if (need_atime_update && conf->force_atime_update) {
+ /* TODO: use untimens() since readv() can confuse underlying
+ io-cache and others */
+ ra_frame = copy_frame (frame);
if (ra_frame == NULL) {
goto out;
}
- STACK_WIND (ra_frame, ra_need_atime_cbk,
- FIRST_CHILD (frame->this),
- FIRST_CHILD (frame->this)->fops->readv,
- file->fd, 1, 1);
- }
+ STACK_WIND (ra_frame, ra_need_atime_cbk,
+ FIRST_CHILD (frame->this),
+ FIRST_CHILD (frame->this)->fops->readv,
+ file->fd, 1, 1, 0, NULL);
+ }
out:
- return ;
+ return ;
}
int
ra_readv_disabled_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct stat *stbuf, struct iobref *iobref)
+ int32_t count, struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf, iobref);
+ GF_ASSERT (frame);
+
+ STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count,
+ stbuf, iobref, xdata);
- return 0;
+ return 0;
}
int
ra_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
+ off_t offset, uint32_t flags, dict_t *xdata)
{
- ra_file_t *file = NULL;
- ra_local_t *local = NULL;
- ra_conf_t *conf = NULL;
- int op_errno = 0;
- int ret = 0;
- char expected_offset = 1;
- uint64_t tmp_file = 0;
-
- conf = this->private;
+ ra_file_t *file = NULL;
+ ra_local_t *local = NULL;
+ ra_conf_t *conf = NULL;
+ int op_errno = EINVAL;
+ char expected_offset = 1;
+ uint64_t tmp_file = 0;
- gf_log (this->name, GF_LOG_TRACE,
- "NEW REQ at offset=%"PRId64" for size=%"GF_PRI_SIZET"",
- offset, size);
+ GF_ASSERT (frame);
+ GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind);
+ GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind);
- ret = fd_ctx_get (fd, this, &tmp_file);
- file = (ra_file_t *)(long)tmp_file;
-
- if (file == NULL) {
- op_errno = EBADF;
- gf_log (this->name, GF_LOG_DEBUG, "readv received on fd with no"
- " file set in its context");
- goto unwind;
- }
+ conf = this->private;
- if (file->offset != offset) {
- gf_log (this->name, GF_LOG_DEBUG,
- "unexpected offset (%"PRId64" != %"PRId64") resetting",
- file->offset, offset);
+ gf_log (this->name, GF_LOG_TRACE,
+ "NEW REQ at offset=%"PRId64" for size=%"GF_PRI_SIZET"",
+ offset, size);
- expected_offset = file->expected = file->page_count = 0;
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "expected offset (%"PRId64") when page_count=%d",
- offset, file->page_count);
+ fd_ctx_get (fd, this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
- if (file->expected < (conf->page_size * conf->page_count)) {
- file->expected += size;
- file->page_count = min ((file->expected / file->page_size),
- conf->page_count);
- }
- }
+ if (!file || file->disabled) {
+ goto disabled;
+ }
- if (!expected_offset) {
- flush_region (frame, file, 0, file->pages.prev->offset + 1);
- }
+ if (file->offset != offset) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "unexpected offset (%"PRId64" != %"PRId64") resetting",
+ file->offset, offset);
+
+ expected_offset = file->expected = file->page_count = 0;
+ } else {
+ gf_log (this->name, GF_LOG_TRACE,
+ "expected offset (%"PRId64") when page_count=%d",
+ offset, file->page_count);
+
+ if (file->expected < (file->page_size * conf->page_count)) {
+ file->expected += size;
+ file->page_count = min ((file->expected
+ / file->page_size),
+ conf->page_count);
+ }
+ }
- if (file->disabled) {
- STACK_WIND (frame, ra_readv_disabled_cbk,
- FIRST_CHILD (frame->this),
- FIRST_CHILD (frame->this)->fops->readv,
- file->fd, size, offset);
- return 0;
- }
+ if (!expected_offset) {
+ flush_region (frame, file, 0, file->pages.prev->offset + 1, 0);
+ }
- local = (void *) CALLOC (1, sizeof (*local));
- if (!local) {
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory");
- op_errno = ENOMEM;
- goto unwind;
- }
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
- local->fd = fd;
- local->offset = offset;
- local->size = size;
- local->wait_count = 1;
+ local->fd = fd;
+ local->offset = offset;
+ local->size = size;
+ local->wait_count = 1;
- local->fill.next = &local->fill;
- local->fill.prev = &local->fill;
+ local->fill.next = &local->fill;
+ local->fill.prev = &local->fill;
- pthread_mutex_init (&local->local_lock, NULL);
+ pthread_mutex_init (&local->local_lock, NULL);
- frame->local = local;
+ frame->local = local;
- dispatch_requests (frame, file);
+ dispatch_requests (frame, file);
- flush_region (frame, file, 0, floor (offset, file->page_size));
+ flush_region (frame, file, 0, floor (offset, file->page_size), 0);
read_ahead (frame, file);
-
- ra_frame_return (frame);
- file->offset = offset + size;
+ ra_frame_return (frame);
- return 0;
+ file->offset = offset + size;
+
+ return 0;
unwind:
- STACK_UNWIND (frame, -1, op_errno, NULL, 0, NULL);
+ STACK_UNWIND_STRICT (readv, frame, -1, op_errno, NULL, 0, NULL, NULL,
+ NULL);
- return 0;
+ return 0;
+
+disabled:
+ STACK_WIND (frame, ra_readv_disabled_cbk,
+ FIRST_CHILD (frame->this),
+ FIRST_CHILD (frame->this)->fops->readv,
+ fd, size, offset, flags, xdata);
+ return 0;
}
int
ra_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno)
+ int32_t op_errno, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
+ GF_ASSERT (frame);
+ STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, xdata);
+ return 0;
}
+
int
-ra_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
+ra_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
{
- ra_file_t *file = NULL;
- int ret = 0;
- uint64_t tmp_file = 0;
- int32_t op_errno = 0;
+ GF_ASSERT (frame);
+ STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
- ret = fd_ctx_get (fd, this, &tmp_file);
- file = (ra_file_t *)(long)tmp_file;
- if (file == NULL) {
- op_errno = EBADF;
- gf_log (this->name, GF_LOG_DEBUG, "flush received on fd with no"
- " file set in its context");
- goto unwind;
- }
- flush_region (frame, file, 0, file->pages.prev->offset+1);
+int
+ra_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ ra_file_t *file = NULL;
+ uint64_t tmp_file = 0;
+ int32_t op_errno = EINVAL;
+
+ GF_ASSERT (frame);
+ GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind);
+ GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind);
+
+ fd_ctx_get (fd, this, &tmp_file);
- STACK_WIND (frame, ra_flush_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->flush,
- fd);
- return 0;
+ file = (ra_file_t *)(long)tmp_file;
+ if (file) {
+ flush_region (frame, file, 0, file->pages.prev->offset+1, 0);
+ }
+
+ STACK_WIND (frame, ra_flush_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->flush, fd, xdata);
+ return 0;
unwind:
- STACK_UNWIND (frame, -1, op_errno);
+ STACK_UNWIND_STRICT (flush, frame, -1, op_errno, NULL);
return 0;
}
int
-ra_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync)
+ra_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
{
- ra_file_t *file = NULL;
- int ret = 0;
- uint64_t tmp_file = 0;
- int32_t op_errno = 0;
+ ra_file_t *file = NULL;
+ uint64_t tmp_file = 0;
+ int32_t op_errno = EINVAL;
- ret = fd_ctx_get (fd, this, &tmp_file);
- file = (ra_file_t *)(long)tmp_file;
- if (file == NULL) {
- op_errno = EBADF;
- gf_log (this->name, GF_LOG_DEBUG, "fsync received on fd with no"
- " file set in its context");
- goto unwind;
- }
+ GF_ASSERT (frame);
+ GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind);
+ GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind);
- if (file) {
- flush_region (frame, file, 0, file->pages.prev->offset+1);
- }
+ fd_ctx_get (fd, this, &tmp_file);
+
+ file = (ra_file_t *)(long)tmp_file;
+ if (file) {
+ flush_region (frame, file, 0, file->pages.prev->offset+1, 0);
+ }
- STACK_WIND (frame, ra_flush_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fsync,
- fd, datasync);
- return 0;
+ STACK_WIND (frame, ra_fsync_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fsync, fd, datasync, xdata);
+ return 0;
unwind:
- STACK_UNWIND (frame, -1, op_errno);
+ STACK_UNWIND_STRICT (fsync, frame, -1, op_errno, NULL, NULL, NULL);
return 0;
}
int
ra_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *stbuf)
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- fd_t *fd = NULL;
- ra_file_t *file = NULL;
- int ret = 0;
- uint64_t tmp_file = 0;
+ ra_file_t *file = NULL;
- fd = frame->local;
+ GF_ASSERT (frame);
- ret = fd_ctx_get (fd, this, &tmp_file);
- file = (ra_file_t *)(long)tmp_file;
+ file = frame->local;
- flush_region (frame, file, 0, file->pages.prev->offset+1);
+ if (file) {
+ flush_region (frame, file, 0, file->pages.prev->offset+1, 1);
+ }
- frame->local = NULL;
- STACK_UNWIND (frame, op_ret, op_errno, stbuf);
- return 0;
+ frame->local = NULL;
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
}
int
ra_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
- int32_t count, off_t offset, struct iobref *iobref)
+ int32_t count, off_t offset, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
{
- ra_file_t *file = NULL;
- int ret = 0;
- uint64_t tmp_file = 0;
- int32_t op_errno = 0;
-
- ret = fd_ctx_get (fd, this, &tmp_file);
- file = (ra_file_t *)(long)tmp_file;
- if (file == NULL) {
- op_errno = EBADF;
- gf_log (this->name, GF_LOG_DEBUG, "writev received on fd with"
- "no file set in its context");
- goto unwind;
+ ra_file_t *file = NULL;
+ uint64_t tmp_file = 0;
+ int32_t op_errno = EINVAL;
+
+ GF_ASSERT (frame);
+ GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind);
+ GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind);
+
+ fd_ctx_get (fd, this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
+ if (file) {
+ flush_region (frame, file, 0, file->pages.prev->offset+1, 1);
+ frame->local = file;
+ /* reset the read-ahead counters too */
+ file->expected = file->page_count = 0;
}
- flush_region (frame, file, 0, file->pages.prev->offset+1);
+ STACK_WIND (frame, ra_writev_cbk,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev,
+ fd, vector, count, offset, flags, iobref, xdata);
- /* reset the read-ahead counters too */
- file->expected = file->page_count = 0;
+ return 0;
- frame->local = fd;
+unwind:
+ STACK_UNWIND_STRICT (writev, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
- STACK_WIND (frame, ra_writev_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev,
- fd, vector, count, offset, iobref);
- return 0;
+int
+ra_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ GF_ASSERT (frame);
-unwind:
- STACK_UNWIND (frame, -1, op_errno, NULL);
+ STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
return 0;
}
int
ra_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
+ GF_ASSERT (frame);
+
+ STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
}
int
-ra_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
-{
- ra_file_t *file = NULL;
- fd_t *iter_fd = NULL;
- inode_t *inode = NULL;
- int ret = 0;
- uint64_t tmp_file = 0;
-
- inode = loc->inode;
-
- LOCK (&inode->lock);
- {
- list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
- ret = fd_ctx_get (iter_fd, this, &tmp_file);
- file = (ra_file_t *)(long)tmp_file;
-
- if (!file)
- continue;
- flush_region (frame, file, 0,
- file->pages.prev->offset + 1);
- }
+ra_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
+{
+ ra_file_t *file = NULL;
+ fd_t *iter_fd = NULL;
+ inode_t *inode = NULL;
+ uint64_t tmp_file = 0;
+ int32_t op_errno = EINVAL;
+
+ GF_ASSERT (frame);
+ GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind);
+ GF_VALIDATE_OR_GOTO (frame->this->name, loc, unwind);
+
+ inode = loc->inode;
+
+ LOCK (&inode->lock);
+ {
+ list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
+ fd_ctx_get (iter_fd, this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
+
+ if (!file)
+ continue;
+ /*
+ * Truncation invalidates reads just like writing does.
+ * TBD: this seems to flush more than it should. The
+ * only time we should flush at all is when we're
+ * shortening (not lengthening) the file, and then only
+ * from new EOF to old EOF. The same problem exists in
+ * ra_ftruncate.
+ */
+ flush_region (frame, file, 0,
+ file->pages.prev->offset + 1, 1);
+ }
+ }
+ UNLOCK (&inode->lock);
+
+ STACK_WIND (frame, ra_truncate_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->truncate,
+ loc, offset, xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT (truncate, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+
+void
+ra_page_dump (struct ra_page *page)
+{
+ int i = 0;
+ call_frame_t *frame = NULL;
+ char key[GF_DUMP_MAX_BUF_LEN] = {0, };
+ ra_waitq_t *trav = NULL;
+
+ if (page == NULL) {
+ goto out;
+ }
+
+ gf_proc_dump_write ("offset", "%"PRId64, page->offset);
+
+ gf_proc_dump_write ("size", "%"PRId64, page->size);
+
+ gf_proc_dump_write ("dirty", "%s", page->dirty ? "yes" : "no");
+
+ gf_proc_dump_write ("poisoned", "%s", page->poisoned ? "yes" : "no");
+
+ gf_proc_dump_write ("ready", "%s", page->ready ? "yes" : "no");
+
+ for (trav = page->waitq; trav; trav = trav->next) {
+ frame = trav->data;
+ sprintf (key, "waiting-frame[%d]", i++);
+ gf_proc_dump_write (key, "%"PRId64, frame->root->unique);
}
- UNLOCK (&inode->lock);
- STACK_WIND (frame, ra_attr_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->truncate,
- loc, offset);
- return 0;
+out:
+ return;
}
+int32_t
+ra_fdctx_dump (xlator_t *this, fd_t *fd)
+{
+ ra_file_t *file = NULL;
+ ra_page_t *page = NULL;
+ int32_t ret = 0, i = 0;
+ uint64_t tmp_file = 0;
+ char *path = NULL;
+ char key[GF_DUMP_MAX_BUF_LEN] = {0, };
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, };
+
+ fd_ctx_get (fd, this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
+
+ if (file == NULL) {
+ ret = 0;
+ goto out;
+ }
+
+ gf_proc_dump_build_key (key_prefix,
+ "xlator.performance.read-ahead",
+ "file");
+
+ gf_proc_dump_add_section (key_prefix);
+
+ ret = __inode_path (fd->inode, NULL, &path);
+ if (path != NULL) {
+ gf_proc_dump_write ("path", "%s", path);
+ GF_FREE (path);
+ }
+
+ gf_proc_dump_write ("fd", "%p", fd);
+
+ gf_proc_dump_write ("disabled", "%s", file->disabled ? "yes" : "no");
+
+ if (file->disabled) {
+ ret = 0;
+ goto out;
+ }
+
+ gf_proc_dump_write ("page-size", "%"PRId64, file->page_size);
+
+ gf_proc_dump_write ("page-count", "%u", file->page_count);
+
+ gf_proc_dump_write ("next-expected-offset-for-sequential-reads",
+ "%"PRId64, file->offset);
+
+ for (page = file->pages.next; page != &file->pages;
+ page = page->next) {
+ sprintf (key, "page[%d]", i);
+ gf_proc_dump_write (key, "%p", page[i++]);
+ ra_page_dump (page);
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
int
-ra_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd)
-{
- ra_file_t *file = NULL;
- fd_t *iter_fd = NULL;
- inode_t *inode = NULL;
- int ret = 0;
- uint64_t tmp_file = 0;
-
- inode = fd->inode;
-
- LOCK (&inode->lock);
- {
- list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
- ret = fd_ctx_get (iter_fd, this, &tmp_file);
- file = (ra_file_t *)(long)tmp_file;
-
- if (!file)
- continue;
- flush_region (frame, file, 0,
- file->pages.prev->offset + 1);
- }
- }
- UNLOCK (&inode->lock);
+ra_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ ra_file_t *file = NULL;
+ fd_t *iter_fd = NULL;
+ inode_t *inode = NULL;
+ uint64_t tmp_file = 0;
+ int32_t op_errno = EINVAL;
+
+ GF_ASSERT (frame);
+ GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind);
+ GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind);
+
+ inode = fd->inode;
+
+ LOCK (&inode->lock);
+ {
+ list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
+ fd_ctx_get (iter_fd, this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
+
+ if (!file)
+ continue;
+ flush_region (frame, file, 0,
+ file->pages.prev->offset + 1, 0);
+ }
+ }
+ UNLOCK (&inode->lock);
+
+ STACK_WIND (frame, ra_attr_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fstat, fd, xdata);
+ return 0;
- STACK_WIND (frame, ra_attr_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fstat,
- fd);
- return 0;
+unwind:
+ STACK_UNWIND_STRICT (stat, frame, -1, op_errno, NULL, NULL);
+ return 0;
}
int
-ra_fchown (call_frame_t *frame, xlator_t *this, fd_t *fd, uid_t uid, gid_t gid)
-{
- ra_file_t *file = NULL;
- fd_t *iter_fd = NULL;
- inode_t *inode = NULL;
- int ret = 0;
- uint64_t tmp_file = 0;
-
- inode = fd->inode;
-
- LOCK (&inode->lock);
- {
- list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
- ret = fd_ctx_get (iter_fd, this, &tmp_file);
- file = (ra_file_t *)(long)tmp_file;
-
- if (!file)
- continue;
- flush_region (frame, file, 0,
- file->pages.prev->offset + 1);
- }
- }
- UNLOCK (&inode->lock);
+ra_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+{
+ ra_file_t *file = NULL;
+ fd_t *iter_fd = NULL;
+ inode_t *inode = NULL;
+ uint64_t tmp_file = 0;
+ int32_t op_errno = EINVAL;
+
+ GF_ASSERT (frame);
+ GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind);
+ GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind);
+
+ inode = fd->inode;
+
+ LOCK (&inode->lock);
+ {
+ list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
+ fd_ctx_get (iter_fd, this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
+ if (!file)
+ continue;
+ /*
+ * Truncation invalidates reads just like writing does.
+ * TBD: this seems to flush more than it should. The
+ * only time we should flush at all is when we're
+ * shortening (not lengthening) the file, and then only
+ * from new EOF to old EOF. The same problem exists in
+ * ra_truncate.
+ */
+ flush_region (frame, file, 0,
+ file->pages.prev->offset + 1, 1);
+ }
+ }
+ UNLOCK (&inode->lock);
+
+ STACK_WIND (frame, ra_truncate_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->ftruncate, fd, offset, xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT (truncate, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int
+ra_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ GF_ASSERT (frame);
- STACK_WIND (frame, ra_attr_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fchown,
- fd, uid, gid);
- return 0;
+ STACK_UNWIND_STRICT (discard, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+ return 0;
}
+static int
+ra_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ ra_file_t *file = NULL;
+ fd_t *iter_fd = NULL;
+ inode_t *inode = NULL;
+ uint64_t tmp_file = 0;
+ int32_t op_errno = EINVAL;
+
+ GF_ASSERT (frame);
+ GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind);
+ GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind);
+
+ inode = fd->inode;
+
+ LOCK (&inode->lock);
+ {
+ list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
+ fd_ctx_get (iter_fd, this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
+ if (!file)
+ continue;
+
+ flush_region(frame, file, offset, len, 1);
+ }
+ }
+ UNLOCK (&inode->lock);
+
+ STACK_WIND (frame, ra_discard_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->discard, fd, offset, len, xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT (discard, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
int
-ra_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
-{
- ra_file_t *file = NULL;
- fd_t *iter_fd = NULL;
- inode_t *inode = NULL;
- int ret = 0;
- uint64_t tmp_file = 0;
-
- inode = fd->inode;
-
- LOCK (&inode->lock);
- {
- list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
- ret = fd_ctx_get (iter_fd, this, &tmp_file);
- file = (ra_file_t *)(long)tmp_file;
- if (!file)
- continue;
- flush_region (frame, file, 0,
- file->pages.prev->offset + 1);
- }
- }
- UNLOCK (&inode->lock);
+ra_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ GF_ASSERT (frame);
+
+ STACK_UNWIND_STRICT (zerofill, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+ return 0;
+}
+
+static int
+ra_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ ra_file_t *file = NULL;
+ fd_t *iter_fd = NULL;
+ inode_t *inode = NULL;
+ uint64_t tmp_file = 0;
+ int32_t op_errno = EINVAL;
+
+ GF_ASSERT (frame);
+ GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind);
+ GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind);
+
+ inode = fd->inode;
+
+ LOCK (&inode->lock);
+ {
+ list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
+ fd_ctx_get (iter_fd, this, &tmp_file);
+ file = (ra_file_t *)(long)tmp_file;
+ if (!file)
+ continue;
+
+ flush_region(frame, file, offset, len, 1);
+ }
+ }
+ UNLOCK (&inode->lock);
- STACK_WIND (frame, ra_attr_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->ftruncate,
- fd, offset);
- return 0;
+ STACK_WIND (frame, ra_zerofill_cbk, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->zerofill, fd,
+ offset, len, xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT (zerofill, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int
+ra_priv_dump (xlator_t *this)
+{
+ ra_conf_t *conf = NULL;
+ int ret = -1;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, };
+ gf_boolean_t add_section = _gf_false;
+
+ if (!this) {
+ goto out;
+ }
+
+ conf = this->private;
+ if (!conf) {
+ gf_log (this->name, GF_LOG_WARNING, "conf null in xlator");
+ goto out;
+ }
+
+ gf_proc_dump_build_key (key_prefix, "xlator.performance.read-ahead",
+ "priv");
+
+ gf_proc_dump_add_section (key_prefix);
+ add_section = _gf_true;
+
+ ret = pthread_mutex_trylock (&conf->conf_lock);
+ if (ret)
+ goto out;
+ {
+ gf_proc_dump_write ("page_size", "%d", conf->page_size);
+ gf_proc_dump_write ("page_count", "%d", conf->page_count);
+ gf_proc_dump_write ("force_atime_update", "%d",
+ conf->force_atime_update);
+ }
+ pthread_mutex_unlock (&conf->conf_lock);
+
+ ret = 0;
+out:
+ if (ret && conf) {
+ if (add_section == _gf_false)
+ gf_proc_dump_add_section (key_prefix);
+
+ gf_proc_dump_write ("Unable to dump priv",
+ "(Lock acquisition failed) %s", this->name);
+ }
+ return ret;
+}
+
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this) {
+ goto out;
+ }
+
+ ret = xlator_mem_acct_init (this, gf_ra_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
+ "failed");
+ }
+
+out:
+ return ret;
}
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ ra_conf_t *conf = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("read-ahead", this, out);
+ GF_VALIDATE_OR_GOTO ("read-ahead", this->private, out);
+
+ conf = this->private;
+
+ GF_OPTION_RECONF ("page-count", conf->page_count, options, uint32, out);
+
+ GF_OPTION_RECONF ("page-size", conf->page_size, options, size, out);
+
+ ret = 0;
+ out:
+ return ret;
+}
int
init (xlator_t *this)
{
- ra_conf_t *conf = NULL;
- dict_t *options = this->options;
- char *page_count_string = NULL;
- int32_t ret = -1;
+ ra_conf_t *conf = NULL;
+ int32_t ret = -1;
- if (!this->children || this->children->next) {
- gf_log (this->name, GF_LOG_ERROR,
- "FATAL: read-ahead not configured with exactly one"
+ GF_VALIDATE_OR_GOTO ("read-ahead", this, out);
+
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "FATAL: read-ahead not configured with exactly one"
" child");
goto out;
- }
+ }
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile ");
- }
-
- conf = (void *) CALLOC (1, sizeof (*conf));
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ conf = (void *) GF_CALLOC (1, sizeof (*conf), gf_ra_mt_ra_conf_t);
if (conf == NULL) {
- gf_log (this->name, GF_LOG_ERROR,
- "FATAL: Out of memory");
goto out;
}
- conf->page_size = this->ctx->page_size;
- conf->page_count = 4;
-
- if (dict_get (options, "page-count"))
- page_count_string = data_to_str (dict_get (options,
- "page-count"));
- if (page_count_string)
- {
- if (gf_string2uint_base10 (page_count_string, &conf->page_count)
- != 0)
- {
- gf_log ("read-ahead",
- GF_LOG_ERROR,
- "invalid number format \"%s\" of \"option "
- "page-count\"",
- page_count_string);
- goto out;
- }
- gf_log (this->name, GF_LOG_DEBUG, "Using conf->page_count = %u",
- conf->page_count);
- }
-
- if (dict_get (options, "force-atime-update")) {
- char *force_atime_update_str = data_to_str (dict_get (options,
- "force-atime-update"));
- if (gf_string2boolean (force_atime_update_str,
- &conf->force_atime_update) == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "'force-atime-update' takes only boolean "
- "options");
- goto out;
- }
- if (conf->force_atime_update)
- gf_log (this->name, GF_LOG_DEBUG, "Forcing atime "
- "updates on cache hit");
- }
+ conf->page_size = this->ctx->page_size;
+
+ GF_OPTION_INIT ("page-size", conf->page_size, size, out);
- conf->files.next = &conf->files;
- conf->files.prev = &conf->files;
+ GF_OPTION_INIT ("page-count", conf->page_count, uint32, out);
- pthread_mutex_init (&conf->conf_lock, NULL);
- this->private = conf;
+ GF_OPTION_INIT ("force-atime-update", conf->force_atime_update, bool, out);
+
+ conf->files.next = &conf->files;
+ conf->files.prev = &conf->files;
+
+ pthread_mutex_init (&conf->conf_lock, NULL);
+
+ this->local_pool = mem_pool_new (ra_local_t, 64);
+ if (!this->local_pool) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
+ goto out;
+ }
+
+ this->private = conf;
ret = 0;
out:
if (ret == -1) {
- if (conf != NULL) {
- FREE (conf);
- }
+ GF_FREE (conf);
}
return ret;
}
+
void
fini (xlator_t *this)
{
- ra_conf_t *conf = this->private;
+ ra_conf_t *conf = NULL;
+
+ GF_VALIDATE_OR_GOTO ("read-ahead", this, out);
- pthread_mutex_destroy (&conf->conf_lock);
- FREE (conf);
+ conf = this->private;
+ if (conf == NULL) {
+ goto out;
+ }
+
+ this->private = NULL;
+
+ GF_ASSERT ((conf->files.next == &conf->files)
+ && (conf->files.prev == &conf->files));
- this->private = NULL;
- return;
+ pthread_mutex_destroy (&conf->conf_lock);
+ GF_FREE (conf);
+
+out:
+ return;
}
struct xlator_fops fops = {
- .open = ra_open,
- .create = ra_create,
- .readv = ra_readv,
- .writev = ra_writev,
- .flush = ra_flush,
- .fsync = ra_fsync,
- .truncate = ra_truncate,
- .ftruncate = ra_ftruncate,
- .fstat = ra_fstat,
- .fchown = ra_fchown,
+ .open = ra_open,
+ .create = ra_create,
+ .readv = ra_readv,
+ .writev = ra_writev,
+ .flush = ra_flush,
+ .fsync = ra_fsync,
+ .truncate = ra_truncate,
+ .ftruncate = ra_ftruncate,
+ .fstat = ra_fstat,
+ .discard = ra_discard,
+ .zerofill = ra_zerofill,
};
-struct xlator_mops mops = {
+struct xlator_cbks cbks = {
+ .release = ra_release,
};
-struct xlator_cbks cbks = {
- .release = ra_release,
+struct xlator_dumpops dumpops = {
+ .priv = ra_priv_dump,
+ .fdctx = ra_fdctx_dump,
};
struct volume_options options[] = {
- { .key = {"force-atime-update"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {"page-count"},
- .type = GF_OPTION_TYPE_INT,
- .min = 1,
- .max = 16
+ { .key = {"force-atime-update"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false"
+ },
+ { .key = {"page-count"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 16,
+ .default_value = "4",
+ .description = "Number of pages that will be pre-fetched"
+ },
+ { .key = {"page-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .min = 4096,
+ .max = 1048576 * 64,
+ .default_value = "131072",
+ .description = "Page size with which read-ahead performs server I/O"
},
- { .key = {NULL} },
+ { .key = {NULL} },
};
diff --git a/xlators/performance/read-ahead/src/read-ahead.h b/xlators/performance/read-ahead/src/read-ahead.h
index 0f5add75b..d1d768c34 100644
--- a/xlators/performance/read-ahead/src/read-ahead.h
+++ b/xlators/performance/read-ahead/src/read-ahead.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef __READ_AHEAD_H
@@ -31,6 +22,7 @@
#include "dict.h"
#include "xlator.h"
#include "common-utils.h"
+#include "read-ahead-mem-types.h"
struct ra_conf;
struct ra_local;
@@ -40,77 +32,79 @@ struct ra_waitq;
struct ra_waitq {
- struct ra_waitq *next;
- void *data;
+ struct ra_waitq *next;
+ void *data;
};
struct ra_fill {
- struct ra_fill *next;
- struct ra_fill *prev;
- off_t offset;
- size_t size;
- struct iovec *vector;
- int32_t count;
+ struct ra_fill *next;
+ struct ra_fill *prev;
+ off_t offset;
+ size_t size;
+ struct iovec *vector;
+ int32_t count;
struct iobref *iobref;
};
struct ra_local {
- mode_t mode;
- struct ra_fill fill;
- off_t offset;
- size_t size;
- int32_t op_ret;
- int32_t op_errno;
- off_t pending_offset;
- size_t pending_size;
- fd_t *fd;
- int32_t wait_count;
- pthread_mutex_t local_lock;
+ mode_t mode;
+ struct ra_fill fill;
+ off_t offset;
+ size_t size;
+ int32_t op_ret;
+ int32_t op_errno;
+ off_t pending_offset;
+ size_t pending_size;
+ fd_t *fd;
+ int32_t wait_count;
+ pthread_mutex_t local_lock;
};
struct ra_page {
- struct ra_page *next;
- struct ra_page *prev;
- struct ra_file *file;
- char dirty;
- char ready;
- struct iovec *vector;
- int32_t count;
- off_t offset;
- size_t size;
- struct ra_waitq *waitq;
+ struct ra_page *next;
+ struct ra_page *prev;
+ struct ra_file *file;
+ char dirty; /* Internal request, not from user. */
+ char poisoned; /* Pending read invalidated by write. */
+ char ready;
+ struct iovec *vector;
+ int32_t count;
+ off_t offset;
+ size_t size;
+ struct ra_waitq *waitq;
struct iobref *iobref;
+ char stale;
};
struct ra_file {
- struct ra_file *next;
- struct ra_file *prev;
- struct ra_conf *conf;
- fd_t *fd;
- int disabled;
- size_t expected;
- struct ra_page pages;
- off_t offset;
- size_t size;
- int32_t refcount;
- pthread_mutex_t file_lock;
- struct stat stbuf;
- uint64_t page_size;
- uint32_t page_count;
+ struct ra_file *next;
+ struct ra_file *prev;
+ struct ra_conf *conf;
+ fd_t *fd;
+ int disabled;
+ size_t expected;
+ struct ra_page pages;
+ off_t offset;
+ size_t size;
+ int32_t refcount;
+ pthread_mutex_t file_lock;
+ struct iatt stbuf;
+ uint64_t page_size;
+ uint32_t page_count;
};
struct ra_conf {
- uint64_t page_size;
- uint32_t page_count;
- void *cache_block;
- struct ra_file files;
- gf_boolean_t force_atime_update;
- pthread_mutex_t conf_lock;
+ uint64_t page_size;
+ uint32_t page_count;
+ void *cache_block;
+ struct ra_file files;
+ gf_boolean_t force_atime_update;
+ pthread_mutex_t conf_lock;
};
@@ -123,19 +117,19 @@ typedef struct ra_fill ra_fill_t;
ra_page_t *
ra_page_get (ra_file_t *file,
- off_t offset);
+ off_t offset);
ra_page_t *
ra_page_create (ra_file_t *file,
- off_t offset);
+ off_t offset);
void
ra_page_fault (ra_file_t *file,
- call_frame_t *frame,
- off_t offset);
+ call_frame_t *frame,
+ off_t offset);
void
ra_wait_on_page (ra_page_t *page,
- call_frame_t *frame);
+ call_frame_t *frame);
ra_waitq_t *
ra_page_wakeup (ra_page_t *page);
@@ -145,8 +139,8 @@ ra_page_flush (ra_page_t *page);
ra_waitq_t *
ra_page_error (ra_page_t *page,
- int32_t op_ret,
- int32_t op_errno);
+ int32_t op_ret,
+ int32_t op_errno);
void
ra_page_purge (ra_page_t *page);
@@ -155,7 +149,7 @@ ra_frame_return (call_frame_t *frame);
void
ra_frame_fill (ra_page_t *page,
- call_frame_t *frame);
+ call_frame_t *frame);
void
ra_file_destroy (ra_file_t *file);
@@ -163,36 +157,36 @@ ra_file_destroy (ra_file_t *file);
static inline void
ra_file_lock (ra_file_t *file)
{
- pthread_mutex_lock (&file->file_lock);
+ pthread_mutex_lock (&file->file_lock);
}
static inline void
ra_file_unlock (ra_file_t *file)
{
- pthread_mutex_unlock (&file->file_lock);
+ pthread_mutex_unlock (&file->file_lock);
}
static inline void
ra_conf_lock (ra_conf_t *conf)
{
- pthread_mutex_lock (&conf->conf_lock);
+ pthread_mutex_lock (&conf->conf_lock);
}
static inline void
ra_conf_unlock (ra_conf_t *conf)
{
- pthread_mutex_unlock (&conf->conf_lock);
+ pthread_mutex_unlock (&conf->conf_lock);
}
static inline void
ra_local_lock (ra_local_t *local)
{
- pthread_mutex_lock (&local->local_lock);
+ pthread_mutex_lock (&local->local_lock);
}
static inline void
ra_local_unlock (ra_local_t *local)
{
- pthread_mutex_unlock (&local->local_lock);
+ pthread_mutex_unlock (&local->local_lock);
}
#endif /* __READ_AHEAD_H */
diff --git a/xlators/performance/readdir-ahead/Makefile.am b/xlators/performance/readdir-ahead/Makefile.am
new file mode 100644
index 000000000..a985f42a8
--- /dev/null
+++ b/xlators/performance/readdir-ahead/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/performance/readdir-ahead/src/Makefile.am b/xlators/performance/readdir-ahead/src/Makefile.am
new file mode 100644
index 000000000..cdabd1428
--- /dev/null
+++ b/xlators/performance/readdir-ahead/src/Makefile.am
@@ -0,0 +1,15 @@
+xlator_LTLIBRARIES = readdir-ahead.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance
+
+readdir_ahead_la_LDFLAGS = -module -avoidversion
+
+readdir_ahead_la_SOURCES = readdir-ahead.c
+readdir_ahead_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = readdir-ahead.h readdir-ahead-mem-types.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h b/xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h
new file mode 100644
index 000000000..39e2c5369
--- /dev/null
+++ b/xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h
@@ -0,0 +1,24 @@
+/*
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef __RDA_MEM_TYPES_H__
+#define __RDA_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_rda_mem_types_ {
+ gf_rda_mt_rda_local = gf_common_mt_end + 1,
+ gf_rda_mt_rda_fd_ctx,
+ gf_rda_mt_rda_priv,
+ gf_rda_mt_end
+};
+
+#endif
diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead.c b/xlators/performance/readdir-ahead/src/readdir-ahead.c
new file mode 100644
index 000000000..53e6756f0
--- /dev/null
+++ b/xlators/performance/readdir-ahead/src/readdir-ahead.c
@@ -0,0 +1,560 @@
+/*
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/*
+ * performance/readdir-ahead preloads a local buffer with directory entries
+ * on opendir. The optimization involves using maximum sized gluster rpc
+ * requests (128k) to minimize overhead of smaller client requests.
+ *
+ * For example, fuse currently supports a maximum readdir buffer of 4k
+ * (regardless of the filesystem client's buffer size). readdir-ahead should
+ * effectively convert these smaller requests into fewer, larger sized requests
+ * for simple, sequential workloads (i.e., ls).
+ *
+ * The translator is currently designed to handle the simple, sequential case
+ * only. If a non-sequential directory read occurs, readdir-ahead disables
+ * preloads on the directory.
+ */
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "xlator.h"
+#include "call-stub.h"
+#include "readdir-ahead.h"
+#include "readdir-ahead-mem-types.h"
+#include "defaults.h"
+
+static int rda_fill_fd(call_frame_t *, xlator_t *, fd_t *);
+
+/*
+ * Get (or create) the fd context for storing prepopulated directory
+ * entries.
+ */
+static struct
+rda_fd_ctx *get_rda_fd_ctx(fd_t *fd, xlator_t *this)
+{
+ uint64_t val;
+ struct rda_fd_ctx *ctx;
+
+ LOCK(&fd->lock);
+
+ if (__fd_ctx_get(fd, this, &val) < 0) {
+ ctx = GF_CALLOC(1, sizeof(struct rda_fd_ctx),
+ gf_rda_mt_rda_fd_ctx);
+ if (!ctx)
+ goto out;
+
+ LOCK_INIT(&ctx->lock);
+ INIT_LIST_HEAD(&ctx->entries.list);
+ ctx->state = RDA_FD_NEW;
+ /* ctx offset values initialized to 0 */
+
+ if (__fd_ctx_set(fd, this, (uint64_t) ctx) < 0) {
+ GF_FREE(ctx);
+ ctx = NULL;
+ goto out;
+ }
+ } else {
+ ctx = (struct rda_fd_ctx *) val;
+ }
+out:
+ UNLOCK(&fd->lock);
+ return ctx;
+}
+
+/*
+ * Reset the tracking state of the context.
+ */
+static void
+rda_reset_ctx(struct rda_fd_ctx *ctx)
+{
+ ctx->state = RDA_FD_NEW;
+ ctx->cur_offset = 0;
+ ctx->cur_size = 0;
+ ctx->next_offset = 0;
+ gf_dirent_free(&ctx->entries);
+}
+
+/*
+ * Check whether we can handle a request. Offset verification is done by the
+ * caller, so we only check whether the preload buffer has completion status
+ * (including an error) or has some data to return.
+ */
+static gf_boolean_t
+rda_can_serve_readdirp(struct rda_fd_ctx *ctx, size_t request_size)
+{
+ if ((ctx->state & RDA_FD_EOD) ||
+ (ctx->state & RDA_FD_ERROR) ||
+ (!(ctx->state & RDA_FD_PLUGGED) && (ctx->cur_size > 0)))
+ return _gf_true;
+
+ return _gf_false;
+}
+
+/*
+ * Serve a request from the fd dentry list based on the size of the request
+ * buffer. ctx must be locked.
+ */
+static int32_t
+__rda_serve_readdirp(xlator_t *this, gf_dirent_t *entries, size_t request_size,
+ struct rda_fd_ctx *ctx)
+{
+ gf_dirent_t *dirent, *tmp;
+ size_t dirent_size, size = 0;
+ int32_t count = 0;
+ struct rda_priv *priv = this->private;
+
+ list_for_each_entry_safe(dirent, tmp, &ctx->entries.list, list) {
+ dirent_size = gf_dirent_size(dirent->d_name);
+ if (size + dirent_size > request_size)
+ break;
+
+ size += dirent_size;
+ list_del_init(&dirent->list);
+ ctx->cur_size -= dirent_size;
+
+ list_add_tail(&dirent->list, &entries->list);
+ ctx->cur_offset = dirent->d_off;
+ count++;
+ }
+
+ if (ctx->cur_size <= priv->rda_low_wmark)
+ ctx->state |= RDA_FD_PLUGGED;
+
+ return count;
+}
+
+static int32_t
+rda_readdirp_stub(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
+{
+ gf_dirent_t entries;
+ int32_t ret;
+ struct rda_fd_ctx *ctx;
+ int op_errno = 0;
+
+ ctx = get_rda_fd_ctx(fd, this);
+ INIT_LIST_HEAD(&entries.list);
+ ret = __rda_serve_readdirp(this, &entries, size, ctx);
+
+ if (!ret && (ctx->state & RDA_FD_ERROR)) {
+ ret = -1;
+ op_errno = ctx->op_errno;
+ ctx->state &= ~RDA_FD_ERROR;
+
+ /*
+ * the preload has stopped running in the event of an error, so
+ * pass all future requests along
+ */
+ ctx->state |= RDA_FD_BYPASS;
+ }
+
+ STACK_UNWIND_STRICT(readdirp, frame, ret, op_errno, &entries, xdata);
+ gf_dirent_free(&entries);
+
+ return 0;
+}
+
+static int32_t
+rda_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata)
+{
+ struct rda_fd_ctx *ctx;
+ call_stub_t *stub;
+ int fill = 0;
+
+ ctx = get_rda_fd_ctx(fd, this);
+ if (!ctx)
+ goto err;
+
+ if (ctx->state & RDA_FD_BYPASS)
+ goto bypass;
+
+ LOCK(&ctx->lock);
+
+ /* recheck now that we have the lock */
+ if (ctx->state & RDA_FD_BYPASS) {
+ UNLOCK(&ctx->lock);
+ goto bypass;
+ }
+
+ /*
+ * If a new read comes in at offset 0 and the buffer has been
+ * completed, reset the context and kickstart the filler again.
+ */
+ if (!off && (ctx->state & RDA_FD_EOD) && (ctx->cur_size == 0)) {
+ rda_reset_ctx(ctx);
+ fill = 1;
+ }
+
+ /*
+ * If a readdir occurs at an unexpected offset or we already have a
+ * request pending, admit defeat and just get out of the way.
+ */
+ if (off != ctx->cur_offset || ctx->stub) {
+ ctx->state |= RDA_FD_BYPASS;
+ UNLOCK(&ctx->lock);
+ goto bypass;
+ }
+
+ stub = fop_readdirp_stub(frame, rda_readdirp_stub, fd, size, off, xdata);
+ if (!stub) {
+ UNLOCK(&ctx->lock);
+ goto err;
+ }
+
+ /*
+ * If we haven't bypassed the preload, this means we can either serve
+ * the request out of the preload or the request that enables us to do
+ * so is in flight...
+ */
+ if (rda_can_serve_readdirp(ctx, size))
+ call_resume(stub);
+ else
+ ctx->stub = stub;
+
+ UNLOCK(&ctx->lock);
+
+ if (fill)
+ rda_fill_fd(frame, this, fd);
+
+ return 0;
+
+bypass:
+ STACK_WIND(frame, default_readdirp_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp, fd, size, off, xdata);
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT(readdirp, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+}
+
+static int32_t
+rda_fill_fd_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ gf_dirent_t *dirent, *tmp;
+ struct rda_local *local = frame->local;
+ struct rda_fd_ctx *ctx = local->ctx;
+ struct rda_priv *priv = this->private;
+ int fill = 1;
+
+ LOCK(&ctx->lock);
+
+ /* Verify that the preload buffer is still pending on this data. */
+ if (ctx->next_offset != local->offset) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Out of sequence directory preload.");
+ ctx->state |= (RDA_FD_BYPASS|RDA_FD_ERROR);
+ ctx->op_errno = EUCLEAN;
+
+ goto out;
+ }
+
+ if (entries) {
+ list_for_each_entry_safe(dirent, tmp, &entries->list, list) {
+ list_del_init(&dirent->list);
+ /* must preserve entry order */
+ list_add_tail(&dirent->list, &ctx->entries.list);
+
+ ctx->cur_size += gf_dirent_size(dirent->d_name);
+ ctx->next_offset = dirent->d_off;
+ }
+ }
+
+ if (ctx->cur_size >= priv->rda_high_wmark)
+ ctx->state &= ~RDA_FD_PLUGGED;
+
+ if (!op_ret) {
+ /* we've hit eod */
+ ctx->state &= ~RDA_FD_RUNNING;
+ ctx->state |= RDA_FD_EOD;
+ } else if (op_ret == -1) {
+ /* kill the preload and pend the error */
+ ctx->state &= ~RDA_FD_RUNNING;
+ ctx->state |= RDA_FD_ERROR;
+ ctx->op_errno = op_errno;
+ }
+
+ /*
+ * NOTE: The strict bypass logic in readdirp() means a pending request
+ * is always based on ctx->cur_offset.
+ */
+ if (ctx->stub &&
+ rda_can_serve_readdirp(ctx, ctx->stub->args.size)) {
+ call_resume(ctx->stub);
+ ctx->stub = NULL;
+ }
+
+out:
+ /*
+ * If we have been marked for bypass and have no pending stub, clear the
+ * run state so we stop preloading the context with entries.
+ */
+ if ((ctx->state & RDA_FD_BYPASS) && !ctx->stub)
+ ctx->state &= ~RDA_FD_RUNNING;
+
+ if (!(ctx->state & RDA_FD_RUNNING)) {
+ fill = 0;
+ STACK_DESTROY(ctx->fill_frame->root);
+ ctx->fill_frame = NULL;
+ }
+
+ UNLOCK(&ctx->lock);
+
+ if (fill)
+ rda_fill_fd(frame, this, local->fd);
+
+ return 0;
+}
+
+/*
+ * Start prepopulating the fd context with directory entries.
+ */
+static int
+rda_fill_fd(call_frame_t *frame, xlator_t *this, fd_t *fd)
+{
+ call_frame_t *nframe = NULL;
+ struct rda_local *local = NULL;
+ struct rda_fd_ctx *ctx;
+ off_t offset;
+ struct rda_priv *priv = this->private;
+
+ ctx = get_rda_fd_ctx(fd, this);
+ if (!ctx)
+ goto err;
+
+ LOCK(&ctx->lock);
+
+ if (ctx->state & RDA_FD_NEW) {
+ ctx->state &= ~RDA_FD_NEW;
+ ctx->state |= RDA_FD_RUNNING;
+ if (priv->rda_low_wmark)
+ ctx->state |= RDA_FD_PLUGGED;
+ }
+
+ offset = ctx->next_offset;
+
+ if (!ctx->fill_frame) {
+ nframe = copy_frame(frame);
+ if (!nframe) {
+ UNLOCK(&ctx->lock);
+ goto err;
+ }
+
+ local = mem_get0(this->local_pool);
+ if (!local) {
+ UNLOCK(&ctx->lock);
+ goto err;
+ }
+
+ local->ctx = ctx;
+ local->fd = fd;
+ nframe->local = local;
+
+ ctx->fill_frame = nframe;
+ } else {
+ nframe = ctx->fill_frame;
+ local = nframe->local;
+ }
+
+ local->offset = offset;
+
+ UNLOCK(&ctx->lock);
+
+ STACK_WIND(nframe, rda_fill_fd_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp, fd, priv->rda_req_size,
+ offset, NULL);
+
+ return 0;
+
+err:
+ if (nframe)
+ FRAME_DESTROY(nframe);
+
+ return -1;
+}
+
+static int32_t
+rda_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ if (!op_ret)
+ rda_fill_fd(frame, this, fd);
+
+ STACK_UNWIND_STRICT(opendir, frame, op_ret, op_errno, fd, xdata);
+ return 0;
+}
+
+static int32_t
+rda_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
+{
+ STACK_WIND(frame, rda_opendir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->opendir, loc, fd, xdata);
+ return 0;
+}
+
+static int32_t
+rda_releasedir(xlator_t *this, fd_t *fd)
+{
+ uint64_t val;
+ struct rda_fd_ctx *ctx;
+
+ if (fd_ctx_del(fd, this, &val) < 0)
+ return -1;
+
+ ctx = (struct rda_fd_ctx *) val;
+ if (!ctx)
+ return 0;
+
+ rda_reset_ctx(ctx);
+
+ if (ctx->fill_frame)
+ STACK_DESTROY(ctx->fill_frame->root);
+
+ if (ctx->stub)
+ gf_log(this->name, GF_LOG_ERROR,
+ "released a directory with a pending stub");
+
+ GF_FREE(ctx);
+ return 0;
+}
+
+int32_t
+mem_acct_init(xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ goto out;
+
+ ret = xlator_mem_acct_init(this, gf_rda_mt_end + 1);
+
+ if (ret != 0)
+ gf_log(this->name, GF_LOG_ERROR, "Memory accounting init"
+ "failed");
+
+out:
+ return ret;
+}
+
+int
+reconfigure(xlator_t *this, dict_t *options)
+{
+ struct rda_priv *priv = this->private;
+
+ GF_OPTION_RECONF("rda-request-size", priv->rda_req_size, options,
+ uint32, err);
+ GF_OPTION_RECONF("rda-low-wmark", priv->rda_low_wmark, options, size,
+ err);
+ GF_OPTION_RECONF("rda-high-wmark", priv->rda_high_wmark, options, size,
+ err);
+
+ return 0;
+err:
+ return -1;
+}
+
+int
+init(xlator_t *this)
+{
+ struct rda_priv *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO("readdir-ahead", this, err);
+
+ if (!this->children || this->children->next) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "FATAL: readdir-ahead not configured with exactly one"
+ " child");
+ goto err;
+ }
+
+ if (!this->parents) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ priv = GF_CALLOC(1, sizeof(struct rda_priv), gf_rda_mt_rda_priv);
+ if (!priv)
+ goto err;
+ this->private = priv;
+
+ this->local_pool = mem_pool_new(struct rda_local, 32);
+ if (!this->local_pool)
+ goto err;
+
+ GF_OPTION_INIT("rda-request-size", priv->rda_req_size, uint32, err);
+ GF_OPTION_INIT("rda-low-wmark", priv->rda_low_wmark, size, err);
+ GF_OPTION_INIT("rda-high-wmark", priv->rda_high_wmark, size, err);
+
+ return 0;
+
+err:
+ if (this->local_pool)
+ mem_pool_destroy(this->local_pool);
+ if (priv)
+ GF_FREE(priv);
+
+ return -1;
+}
+
+
+void
+fini(xlator_t *this)
+{
+ GF_VALIDATE_OR_GOTO ("readdir-ahead", this, out);
+
+ GF_FREE(this->private);
+
+out:
+ return;
+}
+
+struct xlator_fops fops = {
+ .opendir = rda_opendir,
+ .readdirp = rda_readdirp,
+};
+
+struct xlator_cbks cbks = {
+ .releasedir = rda_releasedir,
+};
+
+struct volume_options options[] = {
+ { .key = {"rda-request-size"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 4096,
+ .max = 131072,
+ .default_value = "131072",
+ .description = "readdir-ahead request size",
+ },
+ { .key = {"rda-low-wmark"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .min = 0,
+ .max = 10 * GF_UNIT_MB,
+ .default_value = "4096",
+ .description = "the value under which we plug",
+ },
+ { .key = {"rda-high-wmark"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .min = 0,
+ .max = 100 * GF_UNIT_MB,
+ .default_value = "131072",
+ .description = "the value over which we unplug",
+ },
+ { .key = {NULL} },
+};
+
diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead.h b/xlators/performance/readdir-ahead/src/readdir-ahead.h
new file mode 100644
index 000000000..e48786dae
--- /dev/null
+++ b/xlators/performance/readdir-ahead/src/readdir-ahead.h
@@ -0,0 +1,46 @@
+/*
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __READDIR_AHEAD_H
+#define __READDIR_AHEAD_H
+
+/* state flags */
+#define RDA_FD_NEW (1 << 0)
+#define RDA_FD_RUNNING (1 << 1)
+#define RDA_FD_EOD (1 << 2)
+#define RDA_FD_ERROR (1 << 3)
+#define RDA_FD_BYPASS (1 << 4)
+#define RDA_FD_PLUGGED (1 << 5)
+
+struct rda_fd_ctx {
+ off_t cur_offset; /* current head of the ctx */
+ size_t cur_size; /* current size of the preload */
+ off_t next_offset; /* tail of the ctx */
+ uint32_t state;
+ gf_lock_t lock;
+ gf_dirent_t entries;
+ call_frame_t *fill_frame;
+ call_stub_t *stub;
+ int op_errno;
+};
+
+struct rda_local {
+ struct rda_fd_ctx *ctx;
+ fd_t *fd;
+ off_t offset;
+};
+
+struct rda_priv {
+ uint32_t rda_req_size;
+ uint64_t rda_low_wmark;
+ uint64_t rda_high_wmark;
+};
+
+#endif /* __READDIR_AHEAD_H */
diff --git a/xlators/performance/stat-prefetch/src/Makefile.am b/xlators/performance/stat-prefetch/src/Makefile.am
deleted file mode 100644
index b16c133a1..000000000
--- a/xlators/performance/stat-prefetch/src/Makefile.am
+++ /dev/null
@@ -1,14 +0,0 @@
-xlator_LTLIBRARIES = stat-prefetch.la
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance
-
-stat_prefetch_la_LDFLAGS = -module -avoidversion
-stat_prefetch_la_SOURCES = stat-prefetch.c
-noinst_HEADERS = stat-prefetch.h
-
-stat_prefetch_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
-
-CLEANFILES =
-
diff --git a/xlators/performance/stat-prefetch/src/stat-prefetch.c b/xlators/performance/stat-prefetch/src/stat-prefetch.c
deleted file mode 100644
index 57bed9a52..000000000
--- a/xlators/performance/stat-prefetch/src/stat-prefetch.c
+++ /dev/null
@@ -1,1838 +0,0 @@
-/*
- Copyright (c) 2009-2010 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include "stat-prefetch.h"
-#include "locking.h"
-#include "inode.h"
-#include <libgen.h>
-
-
-sp_cache_t *
-sp_cache_init (void)
-{
- sp_cache_t *cache = NULL;
-
- cache = CALLOC (1, sizeof (*cache));
- if (cache) {
- INIT_LIST_HEAD (&cache->entries.list);
- LOCK_INIT (&cache->lock);
- }
-
- return cache;
-}
-
-
-void
-sp_local_free (sp_local_t *local)
-{
- if (local) {
- loc_wipe (&local->loc);
- FREE (local);
- }
-}
-
-
-int32_t
-sp_cache_remove_entry (sp_cache_t *cache, char *name, char remove_all)
-{
- int32_t ret = -1;
- gf_dirent_t *entry = NULL, *tmp = NULL;
-
- if ((cache == NULL) || ((name == NULL) && !remove_all)) {
- goto out;
- }
-
- LOCK (&cache->lock);
- {
- list_for_each_entry_safe (entry, tmp, &cache->entries.list,
- list) {
- if (remove_all || (!strcmp (name, entry->d_name))) {
- list_del_init (&entry->list);
- FREE (entry);
- ret = 0;
-
- if (!remove_all) {
- break;
- }
- }
- }
- }
- UNLOCK (&cache->lock);
-
-out:
- return ret;
-}
-
-
-int32_t
-sp_cache_get_entry (sp_cache_t *cache, char *name, gf_dirent_t *entry)
-{
- int32_t ret = -1;
- gf_dirent_t *tmp = NULL;
-
- if ((cache == NULL) || (name == NULL) || (entry == NULL)) {
- goto out;
- }
-
- LOCK (&cache->lock);
- {
- list_for_each_entry (tmp, &cache->entries.list, list) {
- if (!strcmp (name, tmp->d_name)) {
- memcpy (entry, tmp, sizeof (*entry));
- ret = 0;
- break;
- }
- }
-
- }
- UNLOCK (&cache->lock);
-
-out:
- return ret;
-}
-
-
-void
-sp_cache_free (sp_cache_t *cache)
-{
- sp_cache_remove_entry (cache, NULL, 1);
- FREE (cache);
-}
-
-
-sp_cache_t *
-sp_get_cache_fd (xlator_t *this, fd_t *fd)
-{
- sp_cache_t *cache = NULL;
- uint64_t value = 0;
- int32_t ret = -1;
- sp_fd_ctx_t *fd_ctx = NULL;
-
- if (fd == NULL) {
- goto out;
- }
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == -1) {
- goto out;
- }
-
- fd_ctx = (void *)(long) value;
-
- LOCK (&fd_ctx->lock);
- {
- cache = fd_ctx->cache;
- }
- UNLOCK (&fd_ctx->lock);
-out:
- return cache;
-}
-
-
-void
-sp_fd_ctx_free (sp_fd_ctx_t *fd_ctx)
-{
- if (fd_ctx == NULL) {
- goto out;
- }
-
- if (fd_ctx->parent_inode) {
- inode_unref (fd_ctx->parent_inode);
- fd_ctx->parent_inode = NULL;
- }
-
- if (fd_ctx->name) {
- FREE (fd_ctx->name);
- fd_ctx->name = NULL;
- }
-
- if (fd_ctx->cache) {
- sp_cache_free (fd_ctx->cache);
- }
-
- FREE (fd_ctx);
-out:
- return;
-}
-
-
-inline sp_fd_ctx_t *
-sp_fd_ctx_init (void)
-{
- sp_fd_ctx_t *fd_ctx = NULL;
-
- fd_ctx = CALLOC (1, sizeof (*fd_ctx));
- if (fd_ctx) {
- LOCK_INIT (&fd_ctx->lock);
- }
-
- return fd_ctx;
-}
-
-
-sp_fd_ctx_t *
-sp_fd_ctx_new (xlator_t *this, inode_t *parent, char *name, sp_cache_t *cache)
-{
- sp_fd_ctx_t *fd_ctx = NULL;
-
- fd_ctx = sp_fd_ctx_init ();
- if (fd_ctx == NULL) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- if (parent) {
- fd_ctx->parent_inode = inode_ref (parent);
- }
-
- if (name) {
- fd_ctx->name = strdup (name);
- if (fd_ctx->name == NULL) {
- sp_fd_ctx_free (fd_ctx);
- fd_ctx = NULL;
- }
- }
-
- fd_ctx->cache = cache;
-
-out:
- return fd_ctx;
-}
-
-
-sp_cache_t *
-sp_del_cache_fd (xlator_t *this, fd_t *fd)
-{
- sp_cache_t *cache = NULL;
- uint64_t value = 0;
- int32_t ret = -1;
- sp_fd_ctx_t *fd_ctx = NULL;
-
- if (fd == NULL) {
- goto out;
- }
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == -1) {
- goto out;
- }
-
- fd_ctx = (void *)(long) value;
-
- LOCK (&fd_ctx->lock);
- {
- cache = fd_ctx->cache;
- fd_ctx->cache = NULL;
- }
- UNLOCK (&fd_ctx->lock);
-
-out:
- return cache;
-}
-
-
-sp_cache_t *
-sp_get_cache_inode (xlator_t *this, inode_t *inode, int32_t pid)
-{
- fd_t *fd = NULL;
- sp_cache_t *cache = NULL;
-
- if (inode == NULL) {
- goto out;
- }
-
- fd = fd_lookup (inode, pid);
- if (fd == NULL) {
- goto out;
- }
-
- cache = sp_get_cache_fd (this, fd);
-out:
- return cache;
-}
-
-
-inline int32_t
-sp_put_cache (xlator_t *this, fd_t *fd, sp_cache_t *cache)
-{
- sp_fd_ctx_t *fd_ctx = NULL;
- int32_t ret = -1;
- uint64_t value = 0;
-
- ret = fd_ctx_get (fd, this, &value);
- if (!ret) {
- fd_ctx = (void *)(long)value;
- } else {
- fd_ctx = sp_fd_ctx_init ();
- if (fd_ctx == NULL) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- ret = -1;
- goto out;
- }
-
- ret = fd_ctx_set (fd, this, (long)(void *)fd_ctx);
- if (ret == -1) {
- sp_fd_ctx_free (fd_ctx);
- goto out;
- }
- }
-
- LOCK (&fd_ctx->lock);
- {
- if (fd_ctx->cache) {
- sp_cache_free (fd_ctx->cache);
- }
-
- fd_ctx->cache = cache;
- }
- UNLOCK (&fd_ctx->lock);
-
-out:
- return ret;
-}
-
-
-int32_t
-sp_cache_add_entries (sp_cache_t *cache, gf_dirent_t *entries)
-{
- gf_dirent_t copy;
- gf_dirent_t *entry = NULL, *new = NULL;
- int32_t ret = -1;
- uint64_t expected_offset = 0;
-
- memset (&copy, 0, sizeof (copy));
- INIT_LIST_HEAD (&copy.list);
-
- LOCK (&cache->lock);
- {
- list_for_each_entry (entry, &entries->list, list) {
- new = gf_dirent_for_name (entry->d_name);
- if (new == NULL) {
- gf_dirent_free (&copy);
- goto unlock;
- }
-
- new->d_ino = entry->d_ino;
- new->d_off = entry->d_off;
- new->d_len = entry->d_len;
- new->d_type = entry->d_type;
- new->d_stat = entry->d_stat;
-
- list_add_tail (&new->list, &copy.list);
-
- expected_offset = new->d_off;
- }
-
- /*
- * splice entries in cache to copy, so that we have a list in
- * ascending order of offsets
- */
- list_splice_init (&cache->entries.list, &copy.list);
-
- /* splice back the copy into cache */
- list_splice_init (&copy.list, &cache->entries.list);
-
- cache->expected_offset = expected_offset;
-
- ret = 0;
- }
-unlock:
- UNLOCK (&cache->lock);
-
- return ret;
-}
-
-
-int32_t
-sp_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct stat *buf, dict_t *dict)
-{
- struct stat *stbuf = NULL;
- int32_t ret = -1;
-
- if (op_ret == -1) {
- goto out;
- }
-
- if (S_ISDIR (buf->st_mode)) {
- stbuf = CALLOC (1, sizeof (*stbuf));
- if (stbuf == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- memcpy (stbuf, buf, sizeof (*stbuf));
- ret = inode_ctx_put (inode, this, (long)stbuf);
- if (ret == -1) {
- op_ret = -1;
-
- /* FIXME: EINVAL is not correct */
- op_errno = EINVAL;
- FREE (stbuf);
- goto out;
- }
- }
-
-out:
- SP_STACK_UNWIND (frame, op_ret, op_errno, inode, buf, dict);
- return 0;
-}
-
-
-int32_t
-sp_lookup_behind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct stat *buf, dict_t *dict)
-{
- sp_local_t *local = NULL;
- sp_cache_t *cache = NULL;
-
- local = frame->local;
- if (local == NULL) {
- goto out;
- }
-
- if ((op_ret == -1) && (op_errno = ENOENT)) {
- cache = sp_get_cache_inode (this, local->loc.parent,
- frame->root->pid);
-
- if (cache) {
- sp_cache_remove_entry (cache, (char *)local->loc.name,
- 0);
- }
- }
-
-out:
- SP_STACK_DESTROY (frame);
- return 0;
-}
-
-
-int32_t
-sp_get_ancestors (char *path, char **parent, char **grand_parent)
-{
- int32_t ret = -1, i = 0;
- char *cpy = NULL;
-
- if (!path || !parent || !grand_parent) {
- ret = 0;
- goto out;
- }
-
- for (i = 0; i < 2; i++) {
- if (!strcmp (path, "/")) {
- break;
- }
-
- cpy = strdup (path);
- if (cpy == NULL) {
- goto out;
- }
-
- path = dirname (cpy);
- switch (i)
- {
- case 0:
- *parent = path;
- break;
- case 1:
- *grand_parent = path;
- break;
- }
- }
-
- ret = 0;
-out:
- return ret;
-}
-
-
-int32_t
-sp_cache_remove_parent_entry (call_frame_t *frame, xlator_t *this, char *path)
-{
- char *parent = NULL, *grand_parent = NULL, *cpy = NULL;
- inode_t *inode_gp = NULL;
- sp_cache_t *cache_gp = NULL;
- int32_t ret = -1;
-
- ret = sp_get_ancestors (path, &parent, &grand_parent);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- if (grand_parent && strcmp (grand_parent, "/")) {
- inode_gp = inode_from_path (frame->root->frames.this->itable,
- grand_parent);
- if (inode_gp) {
- cache_gp = sp_get_cache_inode (this, inode_gp,
- frame->root->pid);
- if (cache_gp) {
- cpy = strdup (parent);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name,
- cpy, out,
- ENOMEM);
- path = basename (cpy);
- sp_cache_remove_entry (cache_gp, path, 0);
- FREE (cpy);
- }
- inode_unref (inode_gp);
- }
- }
-
- ret = 0;
-out:
- if (parent) {
- FREE (parent);
- }
-
- if (grand_parent) {
- FREE (grand_parent);
- }
-
- return ret;
-}
-
-
-int32_t
-sp_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
-{
- sp_local_t *local = NULL;
- gf_dirent_t dirent;
- int32_t ret = -1, op_ret = -1, op_errno = EINVAL;
- sp_cache_t *cache = NULL;
- struct stat *postparent = NULL, *buf = NULL;
- uint64_t value = 0;
- call_frame_t *wind_frame = NULL;
- char lookup_behind = 0;
-
- if (loc == NULL) {
- goto unwind;
- }
-
- if (xattr_req || (loc->parent == NULL) || (loc->name == NULL)) {
- goto wind;
- }
-
- memset (&dirent, 0, sizeof (dirent));
- cache = sp_get_cache_inode (this, loc->parent, frame->root->pid);
- if (cache) {
- ret = sp_cache_get_entry (cache, (char *)loc->name, &dirent);
- if (ret == 0) {
- ret = inode_ctx_get (loc->parent, this, &value);
- if (ret == 0) {
- postparent = (void *)(long)value;
- buf = &dirent.d_stat;
- op_ret = 0;
- op_errno = 0;
- lookup_behind = 1;
- }
- }
- }
-
-wind:
- if (lookup_behind) {
- wind_frame = copy_frame (frame);
- if (wind_frame == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto unwind;
- }
-
- local = CALLOC (1, sizeof (*local));
- if (local == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- STACK_DESTROY (wind_frame->root);
- goto unwind;
- }
-
- loc_copy (&local->loc, loc);
- wind_frame->local = local;
- STACK_WIND (wind_frame, sp_lookup_behind_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
- } else {
- STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
-
- return 0;
- }
-
-unwind:
- SP_STACK_UNWIND (frame, op_ret, op_errno, loc->inode, buf, postparent,
- NULL);
- return 0;
-
-}
-
-
-int32_t
-sp_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries)
-{
- sp_local_t *local = NULL;
- sp_cache_t *cache = NULL;
- fd_t *fd = NULL;
- int32_t ret = 0;
-
- local = frame->local;
- if (local == NULL) {
- goto out;
- }
-
- fd = local->fd;
-
- cache = sp_get_cache_fd (this, fd);
- if (cache == NULL) {
- cache = sp_cache_init ();
- if (cache == NULL) {
- goto out;
- }
-
- ret = sp_put_cache (this, fd, cache);
- if (ret == -1) {
- sp_cache_free (cache);
- goto out;
- }
- }
-
- sp_cache_add_entries (cache, entries);
-
-out:
- SP_STACK_UNWIND (frame, op_ret, op_errno, entries);
- return 0;
-}
-
-
-int32_t
-sp_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t off)
-{
- sp_cache_t *cache = NULL;
- sp_local_t *local = NULL;
- char *path = NULL;
- int32_t ret = -1;
-
- cache = sp_get_cache_fd (this, fd);
- if (cache) {
- if (off != cache->expected_offset) {
- cache = sp_del_cache_fd (this, fd);
- if (cache) {
- sp_cache_free (cache);
- }
- }
- }
-
- ret = inode_path (fd->inode, NULL, &path);
- if (ret == -1) {
- goto unwind;
- }
-
- ret = sp_cache_remove_parent_entry (frame, this, path);
- if (ret < 0) {
- errno = -ret;
- goto unwind;
- }
-
- local = CALLOC (1, sizeof (*local));
- if (local) {
- local->fd = fd;
- frame->local = local;
- }
-
- STACK_WIND (frame, sp_readdir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readdir, fd, size, off);
-
- return 0;
-
-unwind:
- SP_STACK_UNWIND (frame, -1, errno, NULL);
- return 0;
-}
-
-
-int32_t
-sp_stbuf_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct stat *buf)
-{
- SP_STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
-
-
-int32_t
-sp_chmod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode)
-{
- sp_cache_t *cache = NULL;
-
- GF_VALIDATE_OR_GOTO (this->name, loc, unwind);
- GF_VALIDATE_OR_GOTO (this->name, loc->parent, unwind);
- GF_VALIDATE_OR_GOTO (this->name, loc->name, unwind);
-
- cache = sp_get_cache_inode (this, loc->parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, (char *)loc->name, 0);
- }
-
- STACK_WIND (frame, sp_stbuf_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->chmod, loc, mode);
- return 0;
-
-unwind:
- SP_STACK_UNWIND (frame, -1, errno, NULL);
- return 0;
-}
-
-
-int32_t
-sp_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, fd_t *fd)
-{
- sp_local_t *local = NULL;
- sp_fd_ctx_t *fd_ctx = NULL;
-
- if (op_ret == -1) {
- goto out;
- }
-
- local = frame->local;
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, local, out, EINVAL);
-
- fd_ctx = sp_fd_ctx_new (this, local->loc.parent,
- (char *)local->loc.name, NULL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, fd_ctx, out, ENOMEM);
-
- op_ret = fd_ctx_set (fd, this, (long)(void *)fd_ctx);
- if (op_ret == -1) {
- sp_fd_ctx_free (fd_ctx);
- op_errno = ENOMEM;
- }
-
-out:
- SP_STACK_UNWIND (frame, op_ret, op_errno, fd);
- return 0;
-}
-
-
-int32_t
-sp_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd)
-{
- sp_local_t *local = NULL;
- int32_t ret = -1;
-
- local = CALLOC (1, sizeof (*local));
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, local, unwind, ENOMEM);
-
- frame->local = local;
-
- ret = loc_copy (&local->loc, loc);
- if (ret == -1) {
- goto unwind;
- }
-
- STACK_WIND (frame, sp_fd_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, loc, flags, fd);
- return 0;
-
-unwind:
- SP_STACK_UNWIND (frame, -1, errno, fd);
- return 0;
-}
-
-
-static int32_t
-sp_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
- struct stat *buf)
-{
- sp_local_t *local = NULL;
- sp_fd_ctx_t *fd_ctx = NULL;
-
- if (op_ret == -1) {
- goto out;
- }
-
- local = frame->local;
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, local, out, EINVAL);
-
- fd_ctx = sp_fd_ctx_new (this, local->loc.parent,
- (char *)local->loc.name, NULL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, fd_ctx, out, ENOMEM);
-
- op_ret = fd_ctx_set (fd, this, (long)(void *)fd_ctx);
- if (op_ret == -1) {
- sp_fd_ctx_free (fd_ctx);
- op_errno = ENOMEM;
- }
-
-out:
- SP_STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf);
- return 0;
-}
-
-
-int32_t
-sp_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, fd_t *fd)
-{
- sp_local_t *local = NULL;
- int32_t ret = -1;
-
- GF_VALIDATE_OR_GOTO (this->name, loc, unwind);
- GF_VALIDATE_OR_GOTO (this->name, loc->parent, unwind);
- GF_VALIDATE_OR_GOTO (this->name, loc->path, unwind);
- GF_VALIDATE_OR_GOTO (this->name, loc->name, unwind);
- GF_VALIDATE_OR_GOTO (this->name, loc->inode, unwind);
-
- ret = sp_cache_remove_parent_entry (frame, this, (char *)loc->path);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto unwind;
- }
-
- local = CALLOC (1, sizeof (*local));
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, local, unwind, ENOMEM);
-
- frame->local = local;
-
- ret = loc_copy (&local->loc, loc);
- if (ret == -1) {
- goto unwind;
- }
-
- STACK_WIND (frame, sp_create_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create, loc, flags, mode, fd);
-
- return 0;
-
-unwind:
- SP_STACK_UNWIND (frame, -1, errno, fd);
- return 0;
-}
-
-
-int32_t
-sp_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
-{
- sp_local_t *local = NULL;
- int32_t ret = -1;
-
- local = CALLOC (1, sizeof (*local));
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, local, unwind, ENOMEM);
-
- frame->local = local;
-
- ret = loc_copy (&local->loc, loc);
- if (ret == -1) {
- goto unwind;
- }
-
- STACK_WIND (frame, sp_fd_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->opendir, loc, fd);
- return 0;
-
-unwind:
- SP_STACK_UNWIND (frame, -1, errno, fd);
- return 0;
-}
-
-
-int32_t
-sp_new_entry_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct stat *buf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
- return 0;
-}
-
-
-int32_t
-sp_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode)
-{
- int32_t ret = 0;
-
- GF_VALIDATE_OR_GOTO (this->name, loc, unwind);
- GF_VALIDATE_OR_GOTO (this->name, loc->parent, unwind);
- GF_VALIDATE_OR_GOTO (this->name, loc->path, unwind);
- GF_VALIDATE_OR_GOTO (this->name, loc->name, unwind);
- GF_VALIDATE_OR_GOTO (this->name, loc->inode, unwind);
-
- ret = sp_cache_remove_parent_entry (frame, this, (char *)loc->path);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto unwind;
- }
-
- STACK_WIND (frame, sp_new_entry_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mkdir, loc, mode);
-
- return 0;
-
-unwind:
- SP_STACK_UNWIND (frame, -1, errno, loc->inode, NULL);
- return 0;
-}
-
-
-int32_t
-sp_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dev_t rdev)
-{
- int32_t ret = 0;
-
- GF_VALIDATE_OR_GOTO (this->name, loc, unwind);
- GF_VALIDATE_OR_GOTO (this->name, loc->parent, unwind);
- GF_VALIDATE_OR_GOTO (this->name, loc->path, unwind);
- GF_VALIDATE_OR_GOTO (this->name, loc->name, unwind);
- GF_VALIDATE_OR_GOTO (this->name, loc->inode, unwind);
-
- ret = sp_cache_remove_parent_entry (frame, this, (char *)loc->path);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto unwind;
- }
-
- STACK_WIND (frame, sp_new_entry_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mknod, loc, mode, rdev);
-
- return 0;
-
-unwind:
- SP_STACK_UNWIND (frame, -1, errno, loc->inode, NULL);
- return 0;
-}
-
-
-int32_t
-sp_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
- loc_t *loc)
-{
- int32_t ret = 0;
-
- GF_VALIDATE_OR_GOTO (this->name, loc, unwind);
- GF_VALIDATE_OR_GOTO (this->name, loc->parent, unwind);
- GF_VALIDATE_OR_GOTO (this->name, loc->path, unwind);
- GF_VALIDATE_OR_GOTO (this->name, loc->name, unwind);
- GF_VALIDATE_OR_GOTO (this->name, loc->inode, unwind);
-
- ret = sp_cache_remove_parent_entry (frame, this, (char *)loc->path);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto unwind;
- }
-
- STACK_WIND (frame, sp_new_entry_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->symlink, linkpath, loc);
-
- return 0;
-
-unwind:
- SP_STACK_UNWIND (frame, -1, errno, loc->inode, NULL);
- return 0;
-}
-
-
-int32_t
-sp_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
-{
- int32_t ret = 0;
-
- GF_VALIDATE_OR_GOTO (this->name, newloc, unwind);
- GF_VALIDATE_OR_GOTO (this->name, newloc->parent, unwind);
- GF_VALIDATE_OR_GOTO (this->name, newloc->path, unwind);
- GF_VALIDATE_OR_GOTO (this->name, newloc->name, unwind);
- GF_VALIDATE_OR_GOTO (this->name, newloc->inode, unwind);
-
- ret = sp_cache_remove_parent_entry (frame, this, (char *)newloc->path);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto unwind;
- }
-
- STACK_WIND (frame, sp_new_entry_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->link, oldloc, newloc);
-
- return 0;
-
-unwind:
- SP_STACK_UNWIND (frame, -1, errno, oldloc->inode, NULL);
- return 0;
-}
-
-
-int32_t
-sp_fchmod (call_frame_t *frame, xlator_t *this, fd_t *fd, mode_t mode)
-{
- sp_fd_ctx_t *fd_ctx = NULL;
- sp_cache_t *cache = NULL;
- uint64_t value = 0;
- int32_t ret = 0;
- inode_t *parent = NULL;
- char *name = NULL;
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == -1) {
- errno = EINVAL;
- goto unwind;
- }
-
- fd_ctx = (void *)(long)value;
- name = fd_ctx->name;
- parent = fd_ctx->parent_inode;
-
- cache = sp_get_cache_inode (this, parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, name, 0);
- }
-
- STACK_WIND (frame, sp_stbuf_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fchmod, fd, mode);
- return 0;
-
-unwind:
- SP_STACK_UNWIND (frame, -1, errno, NULL);
- return 0;
-}
-
-
-int32_t
-sp_chown (call_frame_t *frame, xlator_t *this, loc_t *loc, uid_t uid, gid_t gid)
-{
- sp_cache_t *cache = NULL;
-
- GF_VALIDATE_OR_GOTO (this->name, loc, unwind);
- GF_VALIDATE_OR_GOTO (this->name, loc->parent, unwind);
- GF_VALIDATE_OR_GOTO (this->name, loc->name, unwind);
-
- cache = sp_get_cache_inode (this, loc->parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, (char *)loc->name, 0);
- }
-
- STACK_WIND (frame, sp_stbuf_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->chown, loc, uid, gid);
- return 0;
-
-unwind:
- SP_STACK_UNWIND (frame, -1, errno, NULL);
- return 0;
-}
-
-
-int32_t
-sp_fchown (call_frame_t *frame, xlator_t *this, fd_t *fd, uid_t uid, gid_t gid)
-{
- sp_fd_ctx_t *fd_ctx = NULL;
- sp_cache_t *cache = NULL;
- uint64_t value = 0;
- int32_t ret = 0;
- inode_t *parent = NULL;
- char *name = NULL;
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == -1) {
- errno = EINVAL;
- goto unwind;
- }
-
- fd_ctx = (void *)(long)value;
- name = fd_ctx->name;
- parent = fd_ctx->parent_inode;
-
- cache = sp_get_cache_inode (this, parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, name, 0);
- }
-
- STACK_WIND (frame, sp_stbuf_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fchown, fd, uid, gid);
- return 0;
-
-unwind:
- SP_STACK_UNWIND (frame, -1, errno, NULL);
- return 0;
-}
-
-
-int32_t
-sp_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
-{
- sp_cache_t *cache = NULL;
-
- GF_VALIDATE_OR_GOTO (this->name, loc, unwind);
- GF_VALIDATE_OR_GOTO (this->name, loc->parent, unwind);
- GF_VALIDATE_OR_GOTO (this->name, loc->name, unwind);
-
- cache = sp_get_cache_inode (this, loc->parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, (char *)loc->name, 0);
- }
-
- STACK_WIND (frame, sp_stbuf_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate, loc, offset);
- return 0;
-
-unwind:
- SP_STACK_UNWIND (frame, -1, errno, NULL);
- return 0;
-}
-
-
-int32_t
-sp_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
-{
- sp_fd_ctx_t *fd_ctx = NULL;
- sp_cache_t *cache = NULL;
- uint64_t value = 0;
- int32_t ret = 0;
- inode_t *parent = NULL;
- char *name = NULL;
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == -1) {
- errno = EINVAL;
- goto unwind;
- }
-
- fd_ctx = (void *)(long)value;
- name = fd_ctx->name;
- parent = fd_ctx->parent_inode;
-
- cache = sp_get_cache_inode (this, parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, name, 0);
- }
-
- STACK_WIND (frame, sp_stbuf_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate, fd, offset);
- return 0;
-
-unwind:
- SP_STACK_UNWIND (frame, -1, errno, NULL);
- return 0;
-}
-
-
-int32_t
-sp_utimens (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct timespec tv[2])
-{
- sp_cache_t *cache = NULL;
-
- GF_VALIDATE_OR_GOTO (this->name, loc, unwind);
- GF_VALIDATE_OR_GOTO (this->name, loc->parent, unwind);
- GF_VALIDATE_OR_GOTO (this->name, loc->name, unwind);
-
- cache = sp_get_cache_inode (this, loc->parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, (char *)loc->name, 0);
- }
-
- STACK_WIND (frame, sp_stbuf_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->utimens, loc, tv);
- return 0;
-
-unwind:
- SP_STACK_UNWIND (frame, -1, errno, NULL);
- return 0;
-}
-
-
-int32_t
-sp_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, const char *path)
-{
- SP_STACK_UNWIND (frame, op_ret, op_errno, path);
- return 0;
-}
-
-
-int32_t
-sp_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size)
-{
- sp_cache_t *cache = NULL;
-
- GF_VALIDATE_OR_GOTO (this->name, loc, unwind);
- GF_VALIDATE_OR_GOTO (this->name, loc->parent, unwind);
- GF_VALIDATE_OR_GOTO (this->name, loc->name, unwind);
-
- cache = sp_get_cache_inode (this, loc->parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, (char *)loc->name, 0);
- }
-
- STACK_WIND (frame, sp_readlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readlink, loc, size);
- return 0;
-
-unwind:
- SP_STACK_UNWIND (frame, -1, errno, NULL);
- return 0;
-}
-
-
-int32_t
-sp_err_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- SP_STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-
-int32_t
-sp_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
-{
- sp_cache_t *cache = NULL;
- int32_t ret = 0;
-
- GF_VALIDATE_OR_GOTO (this->name, loc, unwind);
- GF_VALIDATE_OR_GOTO (this->name, loc->parent, unwind);
- GF_VALIDATE_OR_GOTO (this->name, loc->name, unwind);
-
- cache = sp_get_cache_inode (this, loc->parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, (char *)loc->name, 0);
- }
-
- ret = sp_cache_remove_parent_entry (frame, this, (char *)loc->path);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto unwind;
- }
-
- STACK_WIND (frame, sp_err_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink, loc);
- return 0;
-
-unwind:
- SP_STACK_UNWIND (frame, -1, errno);
- return 0;
-}
-
-
-void
-sp_remove_caches_from_all_fds_opened (xlator_t *this, inode_t *inode)
-{
- fd_t *fd = NULL;
- sp_cache_t *cache = NULL;
-
- LOCK (&inode->lock);
- {
- list_for_each_entry (fd, &inode->fd_list, inode_list) {
- cache = sp_get_cache_fd (this, fd);
- if (cache) {
- sp_cache_remove_entry (cache, NULL, 1);
- }
- }
- }
- UNLOCK (&inode->lock);
-}
-
-
-int32_t
-sp_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc)
-{
- sp_cache_t *cache = NULL;
- int32_t ret = -1;
-
- GF_VALIDATE_OR_GOTO (this->name, loc, unwind);
- GF_VALIDATE_OR_GOTO (this->name, loc->name, unwind);
- GF_VALIDATE_OR_GOTO (this->name, loc->path, unwind);
- GF_VALIDATE_OR_GOTO (this->name, loc->inode, unwind);
- GF_VALIDATE_OR_GOTO (this->name, loc->parent, unwind);
-
- sp_remove_caches_from_all_fds_opened (this, loc->inode);
-
- cache = sp_get_cache_inode (this, loc->parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, (char *)loc->name, 0);
- }
-
- ret = sp_cache_remove_parent_entry (frame, this, (char *)loc->path);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto unwind;
- }
-
- STACK_WIND (frame, sp_err_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rmdir, loc);
- return 0;
-
-unwind:
- STACK_UNWIND (frame, -1, errno);
- return 0;
-}
-
-
-int32_t
-sp_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iovec *vector, int32_t count,
- struct stat *stbuf, struct iobref *iobref)
-{
- SP_STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf, iobref);
- return 0;
-}
-
-
-int32_t
-sp_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
-{
- sp_fd_ctx_t *fd_ctx = NULL;
- sp_cache_t *cache = NULL;
- uint64_t value = 0;
- int32_t ret = 0;
- inode_t *parent = NULL;
- char *name = NULL;
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == -1) {
- errno = EINVAL;
- goto unwind;
- }
-
- fd_ctx = (void *)(long)value;
- name = fd_ctx->name;
- parent = fd_ctx->parent_inode;
-
- cache = sp_get_cache_inode (this, parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, name, 0);
- }
-
- STACK_WIND (frame, sp_readv_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readv, fd, size, offset);
- return 0;
-
-unwind:
- SP_STACK_UNWIND (frame, -1, errno, NULL, -1, NULL, NULL);
- return 0;
-}
-
-
-int32_t
-sp_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
- int32_t count, off_t off, struct iobref *iobref)
-{
- sp_fd_ctx_t *fd_ctx = NULL;
- sp_cache_t *cache = NULL;
- uint64_t value = 0;
- int32_t ret = 0;
- inode_t *parent = NULL;
- char *name = NULL;
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == -1) {
- errno = EINVAL;
- goto unwind;
- }
-
- fd_ctx = (void *)(long)value;
- name = fd_ctx->name;
- parent = fd_ctx->parent_inode;
-
- cache = sp_get_cache_inode (this, parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, name, 0);
- }
-
- STACK_WIND (frame, sp_stbuf_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev, fd, vector, count, off,
- iobref);
- return 0;
-
-unwind:
- SP_STACK_UNWIND (frame, -1, errno, NULL);
- return 0;
-}
-
-
-int32_t
-sp_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
-{
- sp_fd_ctx_t *fd_ctx = NULL;
- sp_cache_t *cache = NULL;
- uint64_t value = 0;
- int32_t ret = 0;
- inode_t *parent = NULL;
- char *name = NULL;
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == -1) {
- errno = EINVAL;
- goto unwind;
- }
-
- fd_ctx = (void *)(long)value;
- name = fd_ctx->name;
- parent = fd_ctx->parent_inode;
-
- cache = sp_get_cache_inode (this, parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, name, 0);
- }
-
- STACK_WIND (frame, sp_err_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsync, fd, flags);
- return 0;
-
-unwind:
- SP_STACK_UNWIND (frame, -1, errno);
- return 0;
-}
-
-
-int32_t
-sp_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,loc_t *newloc)
-{
- sp_cache_t *cache = NULL;
- int32_t ret = -1;
-
- GF_VALIDATE_OR_GOTO (this->name, oldloc, unwind);
- GF_VALIDATE_OR_GOTO (this->name, oldloc->path, unwind);
- GF_VALIDATE_OR_GOTO (this->name, oldloc->name, unwind);
- GF_VALIDATE_OR_GOTO (this->name, oldloc->parent, unwind);
- GF_VALIDATE_OR_GOTO (this->name, oldloc->inode, unwind);
-
- GF_VALIDATE_OR_GOTO (this->name, newloc, unwind);
- GF_VALIDATE_OR_GOTO (this->name, newloc->path, unwind);
-
- cache = sp_get_cache_inode (this, oldloc->parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, (char *)oldloc->name, 0);
- }
-
- cache = sp_get_cache_inode (this, newloc->parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, (char *)newloc->name, 0);
- }
-
- ret = sp_cache_remove_parent_entry (frame, this, (char *)oldloc->path);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto unwind;
- }
-
- ret = sp_cache_remove_parent_entry (frame, this, (char *)newloc->path);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto unwind;
- }
-
- if (S_ISDIR (oldloc->inode->st_mode)) {
- sp_remove_caches_from_all_fds_opened (this, oldloc->inode);
- }
-
- STACK_WIND (frame, sp_stbuf_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rename, oldloc, newloc);
- return 0;
-
-unwind:
- SP_STACK_UNWIND (frame, -1, errno, NULL);
- return 0;
-}
-
-
-int32_t
-sp_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
- int32_t flags)
-{
- sp_cache_t *cache = NULL;
-
- GF_VALIDATE_OR_GOTO (this->name, loc, unwind);
- GF_VALIDATE_OR_GOTO (this->name, loc->parent, unwind);
- GF_VALIDATE_OR_GOTO (this->name, loc->name, unwind);
-
- cache = sp_get_cache_inode (this, loc->parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, (char *)loc->name, 0);
- }
-
- STACK_WIND (frame, sp_err_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr, loc, dict, flags);
- return 0;
-
-unwind:
- SP_STACK_UNWIND (frame, -1, errno);
- return 0;
-}
-
-
-int32_t
-sp_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name)
-{
- sp_cache_t *cache = NULL;
-
- GF_VALIDATE_OR_GOTO (this->name, loc, unwind);
- GF_VALIDATE_OR_GOTO (this->name, loc->parent, unwind);
- GF_VALIDATE_OR_GOTO (this->name, loc->name, unwind);
-
- cache = sp_get_cache_inode (this, loc->parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, (char *)loc->name, 0);
- }
-
- STACK_WIND (frame, sp_err_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->removexattr, loc, name);
- return 0;
-
-unwind:
- SP_STACK_UNWIND (frame, -1, errno);
- return 0;
-}
-
-
-int32_t
-sp_setdents (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
- dir_entry_t *entries, int32_t count)
-{
- sp_fd_ctx_t *fd_ctx = NULL;
- sp_cache_t *cache = NULL;
- uint64_t value = 0;
- int32_t ret = 0;
- inode_t *parent = NULL;
- char *name = NULL;
- dir_entry_t *trav = NULL;
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == -1) {
- errno = EINVAL;
- goto unwind;
- }
-
- fd_ctx = (void *)(long)value;
- name = fd_ctx->name;
- parent = fd_ctx->parent_inode;
-
- cache = sp_get_cache_inode (this, parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, name, 0);
- }
-
- cache = sp_get_cache_fd (this, fd);
- if (cache) {
- for (trav = entries->next; trav; trav = trav->next) {
- sp_cache_remove_entry (cache, trav->name, 0);
- }
- }
-
- STACK_WIND (frame, sp_err_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setdents, fd, flags, entries,
- count);
- return 0;
-
-unwind:
- SP_STACK_UNWIND (frame, -1, errno);
- return 0;
-}
-
-
-int32_t
-sp_getdents_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dir_entry_t *entries,
- int32_t count)
-{
- dir_entry_t *trav = NULL;
- sp_local_t *local = NULL;
- sp_cache_t *cache = NULL;
-
- if (op_ret == -1) {
- goto out;
- }
-
- local = frame->local;
- if ((local == NULL) || (local->fd == NULL)) {
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
-
- cache = sp_get_cache_fd (this, local->fd);
- if (cache) {
- for (trav = entries->next; trav; trav = trav->next) {
- if (S_ISLNK (trav->buf.st_mode)) {
- sp_cache_remove_entry (cache, trav->name, 0);
- }
- }
- }
-
-out:
- SP_STACK_UNWIND (frame, op_ret, op_errno, entries, count);
- return 0;
-}
-
-
-int32_t
-sp_getdents (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset, int32_t flags)
-{
- sp_fd_ctx_t *fd_ctx = NULL;
- sp_cache_t *cache = NULL;
- uint64_t value = 0;
- int32_t ret = 0;
- inode_t *parent = NULL;
- char *name = NULL;
- sp_local_t *local = NULL;
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == -1) {
- errno = EINVAL;
- goto unwind;
- }
-
- fd_ctx = (void *)(long)value;
- name = fd_ctx->name;
- parent = fd_ctx->parent_inode;
-
- cache = sp_get_cache_inode (this, parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, name, 0);
- }
-
- local = CALLOC (1, sizeof (*local));
- if (local == NULL) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto unwind;
- }
-
- local->fd = fd;
- frame->local = local;
-
- STACK_WIND (frame, sp_getdents_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getdents, fd, size, offset, flags);
- return 0;
-
-unwind:
- SP_STACK_UNWIND (frame, -1, errno, NULL, -1);
- return 0;
-}
-
-
-int32_t
-sp_checksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, uint8_t *file_checksum,
- uint8_t *dir_checksum)
-{
- SP_STACK_UNWIND (frame, op_ret, op_errno, file_checksum, dir_checksum);
- return 0;
-}
-
-
-int32_t
-sp_checksum (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flag)
-{
- sp_cache_t *cache = NULL;
-
- GF_VALIDATE_OR_GOTO (this->name, loc, unwind);
- GF_VALIDATE_OR_GOTO (this->name, loc->parent, unwind);
- GF_VALIDATE_OR_GOTO (this->name, loc->name, unwind);
-
- cache = sp_get_cache_inode (this, loc->parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, (char *)loc->name, 0);
- }
-
- STACK_WIND (frame, sp_checksum_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->checksum, loc, flag);
- return 0;
-
-unwind:
- SP_STACK_UNWIND (frame, -1, errno, NULL);
- return 0;
-}
-
-
-int32_t
-sp_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
-{
- SP_STACK_UNWIND (frame, op_ret, op_errno, dict);
- return 0;
-}
-
-
-int32_t
-sp_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t flags, dict_t *dict)
-{
- sp_cache_t *cache = NULL;
-
- GF_VALIDATE_OR_GOTO (this->name, loc, unwind);
- GF_VALIDATE_OR_GOTO (this->name, loc->parent, unwind);
- GF_VALIDATE_OR_GOTO (this->name, loc->name, unwind);
-
- cache = sp_get_cache_inode (this, loc->parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, (char *)loc->name, 0);
- }
-
- STACK_WIND (frame, sp_xattrop_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->xattrop, loc, flags, dict);
- return 0;
-
-unwind:
- SP_STACK_UNWIND (frame, -1, errno, NULL);
- return 0;
-}
-
-
-int32_t
-sp_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
- gf_xattrop_flags_t flags, dict_t *dict)
-{
- sp_fd_ctx_t *fd_ctx = NULL;
- sp_cache_t *cache = NULL;
- uint64_t value = 0;
- int32_t ret = 0;
- inode_t *parent = NULL;
- char *name = NULL;
-
- ret = fd_ctx_get (fd, this, &value);
- if (ret == -1) {
- errno = EINVAL;
- goto unwind;
- }
-
- fd_ctx = (void *)(long)value;
- name = fd_ctx->name;
- parent = fd_ctx->parent_inode;
-
- cache = sp_get_cache_inode (this, parent, frame->root->pid);
- if (cache) {
- sp_cache_remove_entry (cache, name, 0);
- }
-
- STACK_WIND (frame, sp_xattrop_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fxattrop, fd, flags, dict);
- return 0;
-
-unwind:
- SP_STACK_UNWIND (frame, -1, errno, NULL);
- return 0;
-}
-
-
-int32_t
-sp_forget (xlator_t *this, inode_t *inode)
-{
- struct stat *buf = NULL;
- uint64_t value = 0;
-
- inode_ctx_del (inode, this, &value);
-
- if (value) {
- buf = (void *)(long)value;
- FREE (buf);
- }
-
- return 0;
-}
-
-
-int32_t
-sp_release (xlator_t *this, fd_t *fd)
-{
- sp_fd_ctx_t *fd_ctx = NULL;
- uint64_t value = 0;
- int32_t ret = 0;
-
- ret = fd_ctx_del (fd, this, &value);
- if (!ret) {
- fd_ctx = (void *)(long) value;
- sp_fd_ctx_free (fd_ctx);
- }
-
- return 0;
-}
-
-
-
-int32_t
-init (xlator_t *this)
-{
- int32_t ret = -1;
- if (!this->children || this->children->next) {
- gf_log ("stat-prefetch",
- GF_LOG_ERROR,
- "FATAL: translator %s does not have exactly one child "
- "node", this->name);
- goto out;
- }
-
- ret = 0;
-out:
- return ret;
-}
-
-void
-fini (xlator_t *this)
-{
- return;
-}
-
-
-struct xlator_fops fops = {
- .lookup = sp_lookup,
- .readdir = sp_readdir,
- .chmod = sp_chmod,
- .open = sp_open,
- .create = sp_create,
- .opendir = sp_opendir,
- .mkdir = sp_mkdir,
- .mknod = sp_mknod,
- .symlink = sp_symlink,
- .link = sp_link,
- .fchmod = sp_fchmod,
- .chown = sp_chown,
- .fchown = sp_fchown,
- .truncate = sp_truncate,
- .ftruncate = sp_ftruncate,
- .utimens = sp_utimens,
- .readlink = sp_readlink,
- .unlink = sp_unlink,
- .rmdir = sp_rmdir,
- .readv = sp_readv,
- .writev = sp_writev,
- .fsync = sp_fsync,
- .rename = sp_rename,
- .setxattr = sp_setxattr,
- .removexattr = sp_removexattr,
- .setdents = sp_setdents,
- .getdents = sp_getdents,
- .checksum = sp_checksum,
- .xattrop = sp_xattrop,
- .fxattrop = sp_fxattrop,
-};
-
-struct xlator_mops mops = {
-};
-
-struct xlator_cbks cbks = {
- .forget = sp_forget,
- .release = sp_release,
- .releasedir = sp_release
-};
diff --git a/xlators/performance/stat-prefetch/src/stat-prefetch.h b/xlators/performance/stat-prefetch/src/stat-prefetch.h
deleted file mode 100644
index 05c5d4361..000000000
--- a/xlators/performance/stat-prefetch/src/stat-prefetch.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- Copyright (c) 2009-2010 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _STAT_PREFETCH_H
-#define _STAT_PREFETCH_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "dict.h"
-#include "xlator.h"
-
-struct sp_cache {
- gf_dirent_t entries; /* Head of list of cached dirents */
- uint64_t expected_offset; /* Offset where the next read will
- * happen.
- */
- gf_lock_t lock;
-};
-typedef struct sp_cache sp_cache_t;
-
-struct sp_fd_ctx {
- sp_cache_t *cache;
- inode_t *parent_inode; /*
- * inode corresponding to dirname (path)
- */
- char *name; /*
- * basename of path on which this fd is
- * opened
- */
- gf_lock_t lock;
-};
-typedef struct sp_fd_ctx sp_fd_ctx_t;
-
-struct sp_local {
- loc_t loc;
- fd_t *fd;
-};
-typedef struct sp_local sp_local_t;
-
-
-void sp_local_free (sp_local_t *local);
-
-#define SP_STACK_UNWIND(frame, params ...) do { \
- sp_local_t *__local = frame->local; \
- frame->local = NULL; \
- STACK_UNWIND (frame, params); \
- sp_local_free (__local); \
-} while (0)
-
-#define SP_STACK_DESTROY(frame) do { \
- sp_local_t *__local = frame->local; \
- frame->local = NULL; \
- STACK_DESTROY (frame->root); \
- sp_local_free (__local); \
-} while (0)
-
-#endif /* #ifndef _STAT_PREFETCH_H */
diff --git a/xlators/performance/symlink-cache/src/Makefile.am b/xlators/performance/symlink-cache/src/Makefile.am
index 06e85fc92..4091c3293 100644
--- a/xlators/performance/symlink-cache/src/Makefile.am
+++ b/xlators/performance/symlink-cache/src/Makefile.am
@@ -1,12 +1,13 @@
xlator_LTLIBRARIES = symlink-cache.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/testing/performance
-symlink_cache_la_LDFLAGS = -module -avoidversion
+symlink_cache_la_LDFLAGS = -module -avoid-version
symlink_cache_la_SOURCES = symlink-cache.c
symlink_cache_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/performance/symlink-cache/src/symlink-cache.c b/xlators/performance/symlink-cache/src/symlink-cache.c
index ad0836c5e..3b5fbc252 100644
--- a/xlators/performance/symlink-cache/src/symlink-cache.c
+++ b/xlators/performance/symlink-cache/src/symlink-cache.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
@@ -90,7 +81,7 @@ sc_cache_update (xlator_t *this, inode_t *inode, const char *link)
int
-sc_cache_set (xlator_t *this, inode_t *inode, struct stat *buf,
+sc_cache_set (xlator_t *this, inode_t *inode, struct iatt *buf,
const char *link)
{
struct symlink_cache *sc = NULL;
@@ -126,7 +117,7 @@ sc_cache_set (xlator_t *this, inode_t *inode, struct stat *buf,
}
}
- sc->ctime = buf->st_ctime;
+ sc->ctime = buf->ia_ctime;
gf_log (this->name, GF_LOG_DEBUG,
"setting symlink cache: %s", link);
@@ -146,8 +137,7 @@ sc_cache_set (xlator_t *this, inode_t *inode, struct stat *buf,
err:
if (sc) {
- if (sc->readlink)
- FREE (sc->readlink);
+ FREE (sc->readlink);
sc->readlink = NULL;
FREE (sc);
}
@@ -180,12 +170,12 @@ sc_cache_flush (xlator_t *this, inode_t *inode)
int
-sc_cache_validate (xlator_t *this, inode_t *inode, struct stat *buf)
+sc_cache_validate (xlator_t *this, inode_t *inode, struct iatt *buf)
{
struct symlink_cache *sc = NULL;
uint64_t tmp_sc = 0;
- if (!S_ISLNK (buf->st_mode)) {
+ if (!IA_ISLNK (buf->ia_type)) {
sc_cache_flush (this, inode);
return 0;
}
@@ -204,7 +194,7 @@ sc_cache_validate (xlator_t *this, inode_t *inode, struct stat *buf)
sc = (struct symlink_cache *)(long)tmp_sc;
}
- if (sc->ctime == buf->st_ctime)
+ if (sc->ctime == buf->ia_ctime)
return 0;
/* STALE */
@@ -216,7 +206,7 @@ sc_cache_validate (xlator_t *this, inode_t *inode, struct stat *buf)
sc->readlink = NULL;
}
- sc->ctime = buf->st_ctime;
+ sc->ctime = buf->ia_ctime;
return 0;
}
@@ -242,7 +232,7 @@ sc_cache_get (xlator_t *this, inode_t *inode, char **link)
int
sc_readlink_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int op_ret, int op_errno,
- const char *link)
+ const char *link, struct iatt *sbuf, dict_t *xdata)
{
if (op_ret > 0)
sc_cache_update (this, frame->local, link);
@@ -250,16 +240,18 @@ sc_readlink_cbk (call_frame_t *frame, void *cookie,
inode_unref (frame->local);
frame->local = NULL;
- STACK_UNWIND (frame, op_ret, op_errno, link);
+ STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, link, sbuf,
+ xdata);
return 0;
}
int
sc_readlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc, size_t size)
+ loc_t *loc, size_t size, dict_t *xdata)
{
char *link = NULL;
+ struct iatt buf = {0, };
sc_cache_get (this, loc->inode, &link);
@@ -268,7 +260,14 @@ sc_readlink (call_frame_t *frame, xlator_t *this,
gf_log (this->name, GF_LOG_DEBUG,
"cache hit %s -> %s",
loc->path, link);
- STACK_UNWIND (frame, strlen (link), 0, link);
+
+ /*
+ libglusterfsclient, nfs or any other translators
+ using buf in readlink_cbk should be aware that @buf
+ is 0 filled
+ */
+ STACK_UNWIND_STRICT (readlink, frame, strlen (link), 0, link,
+ &buf, NULL);
FREE (link);
return 0;
}
@@ -278,7 +277,7 @@ sc_readlink (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, sc_readlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->readlink,
- loc, size);
+ loc, size, xdata);
return 0;
}
@@ -287,7 +286,8 @@ sc_readlink (call_frame_t *frame, xlator_t *this,
int
sc_symlink_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int op_ret, int op_errno,
- inode_t *inode, struct stat *buf)
+ inode_t *inode, struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
if (op_ret == 0) {
if (frame->local) {
@@ -295,21 +295,22 @@ sc_symlink_cbk (call_frame_t *frame, void *cookie,
}
}
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
+ STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
return 0;
}
int
sc_symlink (call_frame_t *frame, xlator_t *this,
- const char *dst, loc_t *src)
+ const char *dst, loc_t *src, mode_t umask, dict_t *xdata)
{
frame->local = strdup (dst);
STACK_WIND (frame, sc_symlink_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->symlink,
- dst, src);
+ dst, src, umask, xdata);
return 0;
}
@@ -318,26 +319,28 @@ sc_symlink (call_frame_t *frame, xlator_t *this,
int
sc_lookup_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int op_ret, int op_errno,
- inode_t *inode, struct stat *buf, dict_t *xattr)
+ inode_t *inode, struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
{
if (op_ret == 0)
sc_cache_validate (this, inode, buf);
else
sc_cache_flush (this, inode);
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf, xattr);
+ STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf,
+ xdata, postparent);
return 0;
}
int
sc_lookup (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xattr_req)
+ loc_t *loc, dict_t *xdata)
{
STACK_WIND (frame, sc_lookup_cbk,
FIRST_CHILD(this),
FIRST_CHILD(this)->fops->lookup,
- loc, xattr_req);
+ loc, xdata);
return 0;
}
@@ -353,10 +356,9 @@ sc_forget (xlator_t *this,
}
-int32_t
+int32_t
init (xlator_t *this)
{
-
if (!this->children || this->children->next)
{
gf_log (this->name, GF_LOG_ERROR,
@@ -387,8 +389,6 @@ struct xlator_fops fops = {
.readlink = sc_readlink,
};
-struct xlator_mops mops = {
-};
struct xlator_cbks cbks = {
.forget = sc_forget,
diff --git a/xlators/performance/write-behind/src/Makefile.am b/xlators/performance/write-behind/src/Makefile.am
index f800abad5..6c829d8ee 100644
--- a/xlators/performance/write-behind/src/Makefile.am
+++ b/xlators/performance/write-behind/src/Makefile.am
@@ -1,12 +1,15 @@
xlator_LTLIBRARIES = write-behind.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance
-write_behind_la_LDFLAGS = -module -avoidversion
+write_behind_la_LDFLAGS = -module -avoid-version
write_behind_la_SOURCES = write-behind.c
write_behind_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+noinst_HEADERS = write-behind-mem-types.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/performance/write-behind/src/write-behind-mem-types.h b/xlators/performance/write-behind/src/write-behind-mem-types.h
new file mode 100644
index 000000000..f64f429ce
--- /dev/null
+++ b/xlators/performance/write-behind/src/write-behind-mem-types.h
@@ -0,0 +1,26 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef __WB_MEM_TYPES_H__
+#define __WB_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_wb_mem_types_ {
+ gf_wb_mt_wb_file_t = gf_common_mt_end + 1,
+ gf_wb_mt_wb_request_t,
+ gf_wb_mt_iovec,
+ gf_wb_mt_wb_conf_t,
+ gf_wb_mt_wb_inode_t,
+ gf_wb_mt_end
+};
+#endif
+
diff --git a/xlators/performance/write-behind/src/write-behind.c b/xlators/performance/write-behind/src/write-behind.c
index b38f1665b..95c5921c6 100644
--- a/xlators/performance/write-behind/src/write-behind.c
+++ b/xlators/performance/write-behind/src/write-behind.c
@@ -1,24 +1,13 @@
/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-/*TODO: check for non null wb_file_data before getting wb_file */
-
#ifndef _CONFIG_H
#define _CONFIG_H
@@ -34,2328 +23,1989 @@
#include "compat-errno.h"
#include "common-utils.h"
#include "call-stub.h"
+#include "statedump.h"
+#include "defaults.h"
+#include "write-behind-mem-types.h"
+
+#define MAX_VECTOR_COUNT 8
+#define WB_AGGREGATE_SIZE 131072 /* 128 KB */
+#define WB_WINDOW_SIZE 1048576 /* 1MB */
-#define MAX_VECTOR_COUNT 8
-#define WB_AGGREGATE_SIZE 131072 /* 128 KB */
-#define WB_WINDOW_SIZE 1048576 /* 1MB */
-
typedef struct list_head list_head_t;
struct wb_conf;
-struct wb_page;
-struct wb_file;
-
-
-typedef struct wb_file {
- int disabled;
- uint64_t disable_till;
- size_t window_size;
- int32_t refcount;
- int32_t op_ret;
- int32_t op_errno;
- list_head_t request;
- list_head_t passive_requests;
- fd_t *fd;
+struct wb_inode;
+
+typedef struct wb_inode {
+ ssize_t window_conf;
+ ssize_t window_current;
+ ssize_t transit; /* size of data stack_wound, and yet
+ to be fulfilled (wb_fulfill_cbk).
+ used for trickling_writes
+ */
+
+ list_head_t all; /* All requests, from enqueue() till destroy().
+ Used only for resetting generation
+ number when empty.
+ */
+ list_head_t todo; /* Work to do (i.e, STACK_WIND to server).
+ Once we STACK_WIND, the entry is taken
+ off the list. If it is non-sync write,
+ then we continue to track it via @liability
+ or @temptation depending on the status
+ of its writeback.
+ */
+ list_head_t liability; /* Non-sync writes which are lied
+ (STACK_UNWIND'ed to caller) but ack
+ from server not yet complete. This
+ is the "liability" which we hold, and
+ must guarantee that dependent operations
+ which arrive later (which overlap, etc.)
+ are issued only after their dependencies
+ in this list are "fulfilled".
+
+ Server acks for entries in this list
+ shrinks the window.
+
+ The sum total of all req->write_size
+ of entries in this list must be kept less
+ than the permitted window size.
+ */
+ list_head_t temptation; /* Operations for which we are tempted
+ to 'lie' (write-behind), but temporarily
+ holding off (because of insufficient
+ window capacity, etc.)
+
+ This is the list to look at to grow
+ the window (in __wb_pick_unwinds()).
+
+ Entries typically get chosen from
+ write-behind from this list, and therefore
+ get "upgraded" to the "liability" list.
+ */
+ list_head_t wip; /* List of write calls in progress, SYNC or non-SYNC
+ which are currently STACK_WIND'ed towards the server.
+ This is for guaranteeing that no two overlapping
+ writes are in progress at the same time. Modules
+ like eager-lock in AFR depend on this behavior.
+ */
+ uint64_t gen; /* Liability generation number. Represents
+ the current 'state' of liability. Every
+ new addition to the liability list bumps
+ the generation number.
+
+ a newly arrived request is only required
+ to perform causal checks against the entries
+ in the liability list which were present
+ at the time of its addition. the generation
+ number at the time of its addition is stored
+ in the request and used during checks.
+
+ the liability list can grow while the request
+ waits in the todo list waiting for its
+ dependent operations to complete. however
+ it is not of the request's concern to depend
+ itself on those new entries which arrived
+ after it arrived (i.e, those that have a
+ liability generation higher than itself)
+ */
gf_lock_t lock;
xlator_t *this;
-}wb_file_t;
+} wb_inode_t;
typedef struct wb_request {
- list_head_t list;
- list_head_t winds;
- list_head_t unwinds;
- list_head_t other_requests;
- call_stub_t *stub;
- int32_t refcount;
- wb_file_t *file;
- union {
- struct {
- char write_behind;
- char stack_wound;
- char got_reply;
- }write_request;
-
- struct {
- char marked_for_resume;
- }other_requests;
- }flags;
+ list_head_t all;
+ list_head_t todo;
+ list_head_t lie; /* either in @liability or @temptation */
+ list_head_t winds;
+ list_head_t unwinds;
+ list_head_t wip;
+
+ call_stub_t *stub;
+
+ ssize_t write_size; /* currently held size
+ (after collapsing) */
+ size_t orig_size; /* size which arrived with the request.
+ This is the size by which we grow
+ the window when unwinding the frame.
+ */
+ size_t total_size; /* valid only in @head in wb_fulfill().
+ This is the size with which we perform
+ STACK_WIND to server and therefore the
+ amount by which we shrink the window.
+ */
+
+ int op_ret;
+ int op_errno;
+
+ int32_t refcount;
+ wb_inode_t *wb_inode;
+ glusterfs_fop_t fop;
+ gf_lkowner_t lk_owner;
+ struct iobref *iobref;
+ uint64_t gen; /* inode liability state at the time of
+ request arrival */
+
+ fd_t *fd;
+ struct {
+ size_t size; /* 0 size == till infinity */
+ off_t off;
+ int append:1; /* offset is invalid. only one
+ outstanding append at a time */
+ int tempted:1; /* true only for non-sync writes */
+ int lied:1; /* sin committed */
+ int fulfilled:1; /* got server acknowledgement */
+ int go:1; /* enough aggregating, good to go */
+ } ordering;
} wb_request_t;
-struct wb_conf {
- uint64_t aggregate_size;
- uint64_t window_size;
- uint64_t disable_till;
- gf_boolean_t enable_O_SYNC;
- gf_boolean_t flush_behind;
-};
+typedef struct wb_conf {
+ uint64_t aggregate_size;
+ uint64_t window_size;
+ gf_boolean_t flush_behind;
+ gf_boolean_t trickling_writes;
+ gf_boolean_t strict_write_ordering;
+ gf_boolean_t strict_O_DIRECT;
+} wb_conf_t;
-typedef struct wb_local {
- list_head_t winds;
- struct wb_file *file;
- wb_request_t *request;
- int op_ret;
- int op_errno;
- call_frame_t *frame;
- int32_t reply_count;
-} wb_local_t;
+void
+wb_process_queue (wb_inode_t *wb_inode);
-typedef struct wb_conf wb_conf_t;
-typedef struct wb_page wb_page_t;
+wb_inode_t *
+__wb_inode_ctx_get (xlator_t *this, inode_t *inode)
+{
+ uint64_t value = 0;
+ wb_inode_t *wb_inode = NULL;
+ __inode_ctx_get (inode, this, &value);
+ wb_inode = (wb_inode_t *)(unsigned long) value;
-int32_t
-wb_process_queue (call_frame_t *frame, wb_file_t *file, char flush_all);
+ return wb_inode;
+}
-ssize_t
-wb_sync (call_frame_t *frame, wb_file_t *file, list_head_t *winds);
-ssize_t
-__wb_mark_winds (list_head_t *list, list_head_t *winds, size_t aggregate_size,
- char wind_all);
+wb_inode_t *
+wb_inode_ctx_get (xlator_t *this, inode_t *inode)
+{
+ wb_inode_t *wb_inode = NULL;
+ GF_VALIDATE_OR_GOTO ("write-behind", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
-static void
-__wb_request_unref (wb_request_t *this)
-{
- if (this->refcount <= 0) {
- gf_log ("wb-request", GF_LOG_DEBUG,
- "refcount(%d) is <= 0", this->refcount);
- return;
+ LOCK (&inode->lock);
+ {
+ wb_inode = __wb_inode_ctx_get (this, inode);
}
+ UNLOCK (&inode->lock);
+out:
+ return wb_inode;
+}
- this->refcount--;
- if (this->refcount == 0) {
- list_del_init (&this->list);
- if (this->stub && this->stub->fop == GF_FOP_WRITE) {
- call_stub_destroy (this->stub);
+
+gf_boolean_t
+wb_fd_err (fd_t *fd, xlator_t *this, int32_t *op_errno)
+{
+ gf_boolean_t err = _gf_false;
+ uint64_t value = 0;
+ int32_t tmp = 0;
+
+ if (fd_ctx_get (fd, this, &value) == 0) {
+ if (op_errno) {
+ tmp = value;
+ *op_errno = tmp;
}
- FREE (this);
+ err = _gf_true;
}
+
+ return err;
}
-static void
-wb_request_unref (wb_request_t *this)
+/*
+ Below is a succinct explanation of the code deciding whether two regions
+ overlap, from Pavan <tcp@gluster.com>.
+
+ For any two ranges to be non-overlapping, either the end of the first
+ range is lesser than the start of the second, or vice versa. Example -
+
+ <---------> <-------------->
+ p q x y
+
+ ( q < x ) or (y < p) = > No overlap.
+
+ To check for *overlap*, we can negate this (using de morgan's laws), and
+ it becomes -
+
+ (q >= x ) and (y >= p)
+
+ Either that, or you write the negation using -
+
+ if (! ((q < x) or (y < p)) ) {
+ "Overlap"
+ }
+*/
+
+gf_boolean_t
+wb_requests_overlap (wb_request_t *req1, wb_request_t *req2)
{
- wb_file_t *file = NULL;
- if (this == NULL) {
- gf_log ("wb-request", GF_LOG_DEBUG,
- "request is NULL");
- return;
- }
-
- file = this->file;
- LOCK (&file->lock);
- {
- __wb_request_unref (this);
- }
- UNLOCK (&file->lock);
+ uint64_t r1_start = 0;
+ uint64_t r1_end = 0;
+ uint64_t r2_start = 0;
+ uint64_t r2_end = 0;
+ enum _gf_boolean do_overlap = 0;
+
+ r1_start = req1->ordering.off;
+ if (req1->ordering.size)
+ r1_end = r1_start + req1->ordering.size - 1;
+ else
+ r1_end = ULLONG_MAX;
+
+ r2_start = req2->ordering.off;
+ if (req2->ordering.size)
+ r2_end = r2_start + req2->ordering.size - 1;
+ else
+ r2_end = ULLONG_MAX;
+
+ do_overlap = ((r1_end >= r2_start) && (r2_end >= r1_start));
+
+ return do_overlap;
}
-static wb_request_t *
-__wb_request_ref (wb_request_t *this)
+gf_boolean_t
+wb_requests_conflict (wb_request_t *lie, wb_request_t *req)
{
- if (this->refcount < 0) {
- gf_log ("wb-request", GF_LOG_DEBUG,
- "refcount(%d) is < 0", this->refcount);
- return NULL;
- }
+ wb_conf_t *conf = NULL;
+
+ conf = req->wb_inode->this->private;
+
+ if (lie == req)
+ /* request cannot conflict with itself */
+ return _gf_false;
+
+ if (lie->gen >= req->gen)
+ /* this liability entry was behind
+ us in the todo list */
+ return _gf_false;
+
+ if (lie->ordering.append)
+ /* all modifications wait for the completion
+ of outstanding append */
+ return _gf_true;
+
+ if (conf->strict_write_ordering)
+ /* We are sure (lie->gen < req->gen) by now. So
+ skip overlap check if strict write ordering is
+ requested and always return "conflict" against a
+ lower generation lie. */
+ return _gf_true;
- this->refcount++;
- return this;
+ return wb_requests_overlap (lie, req);
}
-wb_request_t *
-wb_request_ref (wb_request_t *this)
+gf_boolean_t
+wb_liability_has_conflict (wb_inode_t *wb_inode, wb_request_t *req)
{
- wb_file_t *file = NULL;
- if (this == NULL) {
- gf_log ("wb-request", GF_LOG_DEBUG,
- "request is NULL");
- return NULL;
+ wb_request_t *each = NULL;
+
+ list_for_each_entry (each, &wb_inode->liability, lie) {
+ if (wb_requests_conflict (each, req))
+ return _gf_true;
}
- file = this->file;
- LOCK (&file->lock);
- {
- this = __wb_request_ref (this);
+ return _gf_false;
+}
+
+
+gf_boolean_t
+wb_wip_has_conflict (wb_inode_t *wb_inode, wb_request_t *req)
+{
+ wb_request_t *each = NULL;
+
+ if (req->stub->fop != GF_FOP_WRITE)
+ /* non-writes fundamentally never conflict with WIP requests */
+ return _gf_false;
+
+ list_for_each_entry (each, &wb_inode->wip, wip) {
+ if (each == req)
+ /* request never conflicts with itself,
+ though this condition should never occur.
+ */
+ continue;
+
+ if (wb_requests_overlap (each, req))
+ return _gf_true;
}
- UNLOCK (&file->lock);
- return this;
+ return _gf_false;
}
-wb_request_t *
-wb_enqueue (wb_file_t *file, call_stub_t *stub)
+static int
+__wb_request_unref (wb_request_t *req)
{
- wb_request_t *request = NULL;
- call_frame_t *frame = NULL;
- wb_local_t *local = NULL;
- struct iovec *vector = NULL;
- int32_t count = 0;
-
- request = CALLOC (1, sizeof (*request));
- if (request == NULL) {
+ int ret = -1;
+ wb_inode_t *wb_inode = NULL;
+
+ wb_inode = req->wb_inode;
+
+ if (req->refcount <= 0) {
+ gf_log ("wb-request", GF_LOG_WARNING,
+ "refcount(%d) is <= 0", req->refcount);
goto out;
}
- INIT_LIST_HEAD (&request->list);
- INIT_LIST_HEAD (&request->winds);
- INIT_LIST_HEAD (&request->unwinds);
- INIT_LIST_HEAD (&request->other_requests);
+ ret = --req->refcount;
+ if (req->refcount == 0) {
+ list_del_init (&req->todo);
+ list_del_init (&req->lie);
+ list_del_init (&req->wip);
- request->stub = stub;
- request->file = file;
+ list_del_init (&req->all);
+ if (list_empty (&wb_inode->all)) {
+ wb_inode->gen = 0;
+ /* in case of accounting errors? */
+ wb_inode->window_current = 0;
+ }
- frame = stub->frame;
- local = frame->local;
- if (local) {
- local->request = request;
- }
+ list_del_init (&req->winds);
+ list_del_init (&req->unwinds);
- if (stub->fop == GF_FOP_WRITE) {
- vector = stub->args.writev.vector;
- count = stub->args.writev.count;
+ if (req->stub && req->ordering.tempted) {
+ call_stub_destroy (req->stub);
+ req->stub = NULL;
+ } /* else we would have call_resume()'ed */
- frame = stub->frame;
- local = frame->local;
- local->op_ret = iov_length (vector, count);
- local->op_errno = 0;
+ if (req->iobref)
+ iobref_unref (req->iobref);
+
+ if (req->fd)
+ fd_unref (req->fd);
+
+ GF_FREE (req);
}
+out:
+ return ret;
+}
+
+
+static int
+wb_request_unref (wb_request_t *req)
+{
+ wb_inode_t *wb_inode = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("write-behind", req, out);
- LOCK (&file->lock);
+ wb_inode = req->wb_inode;
+
+ LOCK (&wb_inode->lock);
{
- list_add_tail (&request->list, &file->request);
- if (stub->fop == GF_FOP_WRITE) {
- /* reference for stack winding */
- __wb_request_ref (request);
-
- /* reference for stack unwinding */
- __wb_request_ref (request);
- } else {
- /*reference for resuming */
- __wb_request_ref (request);
- }
+ ret = __wb_request_unref (req);
}
- UNLOCK (&file->lock);
+ UNLOCK (&wb_inode->lock);
out:
- return request;
+ return ret;
}
-wb_file_t *
-wb_file_create (xlator_t *this, fd_t *fd)
+static wb_request_t *
+__wb_request_ref (wb_request_t *req)
{
- wb_file_t *file = NULL;
- wb_conf_t *conf = this->private;
+ GF_VALIDATE_OR_GOTO ("write-behind", req, out);
- file = CALLOC (1, sizeof (*file));
- if (file == NULL) {
+ if (req->refcount < 0) {
+ gf_log ("wb-request", GF_LOG_WARNING,
+ "refcount(%d) is < 0", req->refcount);
+ req = NULL;
goto out;
}
- INIT_LIST_HEAD (&file->request);
- INIT_LIST_HEAD (&file->passive_requests);
-
- /*
- fd_ref() not required, file should never decide the existance of
- an fd
- */
- file->fd= fd;
- file->disable_till = conf->disable_till;
- file->this = this;
- file->refcount = 1;
-
- fd_ctx_set (fd, this, (uint64_t)(long)file);
+ req->refcount++;
out:
- return file;
+ return req;
}
-void
-wb_file_destroy (wb_file_t *file)
+
+wb_request_t *
+wb_request_ref (wb_request_t *req)
{
- int32_t refcount = 0;
+ wb_inode_t *wb_inode = NULL;
- LOCK (&file->lock);
- {
- refcount = --file->refcount;
- }
- UNLOCK (&file->lock);
+ GF_VALIDATE_OR_GOTO ("write-behind", req, out);
- if (!refcount){
- LOCK_DESTROY (&file->lock);
- FREE (file);
+ wb_inode = req->wb_inode;
+ LOCK (&wb_inode->lock);
+ {
+ req = __wb_request_ref (req);
}
+ UNLOCK (&wb_inode->lock);
- return;
+out:
+ return req;
}
-int32_t
-wb_sync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct stat *stbuf)
+gf_boolean_t
+wb_enqueue_common (wb_inode_t *wb_inode, call_stub_t *stub, int tempted)
{
- wb_local_t *local = NULL;
- list_head_t *winds = NULL;
- wb_file_t *file = NULL;
- wb_request_t *request = NULL, *dummy = NULL;
- wb_local_t *per_request_local = NULL;
- int32_t ret = -1;
- fd_t *fd = NULL;
+ wb_request_t *req = NULL;
+ GF_VALIDATE_OR_GOTO ("write-behind", wb_inode, out);
+ GF_VALIDATE_OR_GOTO (wb_inode->this->name, stub, out);
- local = frame->local;
- winds = &local->winds;
- file = local->file;
+ req = GF_CALLOC (1, sizeof (*req), gf_wb_mt_wb_request_t);
+ if (!req)
+ goto out;
- LOCK (&file->lock);
- {
- list_for_each_entry_safe (request, dummy, winds, winds) {
- request->flags.write_request.got_reply = 1;
+ INIT_LIST_HEAD (&req->all);
+ INIT_LIST_HEAD (&req->todo);
+ INIT_LIST_HEAD (&req->lie);
+ INIT_LIST_HEAD (&req->winds);
+ INIT_LIST_HEAD (&req->unwinds);
+ INIT_LIST_HEAD (&req->wip);
- if (!request->flags.write_request.write_behind
- && (op_ret == -1)) {
- per_request_local = request->stub->frame->local;
- per_request_local->op_ret = op_ret;
- per_request_local->op_errno = op_errno;
- }
+ req->stub = stub;
+ req->wb_inode = wb_inode;
+ req->fop = stub->fop;
+ req->ordering.tempted = tempted;
- __wb_request_unref (request);
- }
-
- if (op_ret == -1) {
- file->op_ret = op_ret;
- file->op_errno = op_errno;
- }
- fd = file->fd;
- }
- UNLOCK (&file->lock);
+ if (stub->fop == GF_FOP_WRITE) {
+ req->write_size = iov_length (stub->args.vector,
+ stub->args.count);
- ret = wb_process_queue (frame, file, 0);
- if ((ret == -1) && (errno == ENOMEM)) {
- LOCK (&file->lock);
- {
- file->op_ret = -1;
- file->op_errno = ENOMEM;
- }
- UNLOCK (&file->lock);
+ /* req->write_size can change as we collapse
+ small writes. But the window needs to grow
+ only by how much we acknowledge the app. so
+ copy the original size in orig_size for the
+ purpose of accounting.
+ */
+ req->orig_size = req->write_size;
+
+ /* Let's be optimistic that we can
+ lie about it
+ */
+ req->op_ret = req->write_size;
+ req->op_errno = 0;
+
+ if (stub->args.fd->flags & O_APPEND)
+ req->ordering.append = 1;
}
- /* safe place to do fd_unref */
- fd_unref (fd);
+ req->lk_owner = stub->frame->root->lk_owner;
- STACK_DESTROY (frame->root);
+ switch (stub->fop) {
+ case GF_FOP_WRITE:
+ req->ordering.off = stub->args.offset;
+ req->ordering.size = req->write_size;
- return 0;
-}
+ req->fd = fd_ref (stub->args.fd);
+ break;
+ case GF_FOP_READ:
+ req->ordering.off = stub->args.offset;
+ req->ordering.size = stub->args.size;
-ssize_t
-wb_sync (call_frame_t *frame, wb_file_t *file, list_head_t *winds)
-{
- wb_request_t *dummy = NULL, *request = NULL;
- wb_request_t *first_request = NULL, *next = NULL;
- size_t total_count = 0, count = 0;
- size_t copied = 0;
- call_frame_t *sync_frame = NULL;
- struct iobref *iobref = NULL;
- wb_local_t *local = NULL;
- struct iovec *vector = NULL;
- ssize_t bytes = 0, current_size = 0;
- size_t bytecount = 0;
- wb_conf_t *conf = NULL;
- fd_t *fd = NULL;
-
- conf = file->this->private;
- list_for_each_entry (request, winds, winds) {
- total_count += request->stub->args.writev.count;
- bytes += iov_length (request->stub->args.writev.vector,
- request->stub->args.writev.count);
- }
+ req->fd = fd_ref (stub->args.fd);
- if (total_count == 0) {
- goto out;
- }
-
- list_for_each_entry_safe (request, dummy, winds, winds) {
- if (!vector) {
- vector = MALLOC (VECTORSIZE (MAX_VECTOR_COUNT));
- if (vector == NULL) {
- bytes = -1;
- goto out;
- }
-
- iobref = iobref_new ();
- if (iobref == NULL) {
- bytes = -1;
- goto out;
- }
-
- local = CALLOC (1, sizeof (*local));
- if (local == NULL) {
- bytes = -1;
- goto out;
- }
-
- INIT_LIST_HEAD (&local->winds);
-
- first_request = request;
- current_size = 0;
- }
+ break;
+ case GF_FOP_TRUNCATE:
+ req->ordering.off = stub->args.offset;
+ req->ordering.size = 0; /* till infinity */
+ break;
+ case GF_FOP_FTRUNCATE:
+ req->ordering.off = stub->args.offset;
+ req->ordering.size = 0; /* till infinity */
- count += request->stub->args.writev.count;
- bytecount = VECTORSIZE (request->stub->args.writev.count);
- memcpy (((char *)vector)+copied,
- request->stub->args.writev.vector,
- bytecount);
- copied += bytecount;
-
- current_size += iov_length (request->stub->args.writev.vector,
- request->stub->args.writev.count);
-
- if (request->stub->args.writev.iobref) {
- iobref_merge (iobref,
- request->stub->args.writev.iobref);
- }
+ req->fd = fd_ref (stub->args.fd);
- next = NULL;
- if (request->winds.next != winds) {
- next = list_entry (request->winds.next,
- wb_request_t, winds);
- }
+ break;
+ default:
+ break;
+ }
- list_del_init (&request->winds);
- list_add_tail (&request->winds, &local->winds);
+ LOCK (&wb_inode->lock);
+ {
+ list_add_tail (&req->all, &wb_inode->all);
- if ((!next)
- || ((count + next->stub->args.writev.count)
- > MAX_VECTOR_COUNT)
- || ((current_size + iov_length (next->stub->args.writev.vector,
- next->stub->args.writev.count))
- > conf->aggregate_size))
- {
- sync_frame = copy_frame (frame);
- if (sync_frame == NULL) {
- bytes = -1;
- goto out;
- }
-
- sync_frame->local = local;
- local->file = file;
-
- LOCK (&file->lock);
- {
- fd = file->fd;
- }
- UNLOCK (&file->lock);
-
- fd_ref (fd);
-
- STACK_WIND (sync_frame,
- wb_sync_cbk,
- FIRST_CHILD(sync_frame->this),
- FIRST_CHILD(sync_frame->this)->fops->writev,
- fd, vector,
- count,
- first_request->stub->args.writev.off,
- iobref);
-
- iobref_unref (iobref);
- FREE (vector);
- first_request = NULL;
- iobref = NULL;
- vector = NULL;
- sync_frame = NULL;
- local = NULL;
- copied = count = 0;
- }
+ req->gen = wb_inode->gen;
+
+ list_add_tail (&req->todo, &wb_inode->todo);
+ __wb_request_ref (req); /* for wind */
+
+ if (req->ordering.tempted) {
+ list_add_tail (&req->lie, &wb_inode->temptation);
+ __wb_request_ref (req); /* for unwind */
+ }
}
+ UNLOCK (&wb_inode->lock);
out:
- if (sync_frame != NULL) {
- sync_frame->local = NULL;
- STACK_DESTROY (sync_frame->root);
- }
+ if (!req)
+ return _gf_false;
- if (local != NULL) {
- FREE (local);
- }
+ return _gf_true;
+}
- if (iobref != NULL) {
- iobref_unref (iobref);
- }
- if (vector != NULL) {
- FREE (vector);
- }
+gf_boolean_t
+wb_enqueue (wb_inode_t *wb_inode, call_stub_t *stub)
+{
+ return wb_enqueue_common (wb_inode, stub, 0);
+}
- return bytes;
+
+gf_boolean_t
+wb_enqueue_tempted (wb_inode_t *wb_inode, call_stub_t *stub)
+{
+ return wb_enqueue_common (wb_inode, stub, 1);
}
-int32_t
-wb_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct stat *buf)
+wb_inode_t *
+__wb_inode_create (xlator_t *this, inode_t *inode)
{
- wb_local_t *local = NULL;
- wb_request_t *request = NULL;
- call_frame_t *process_frame = NULL;
- wb_file_t *file = NULL;
- int32_t ret = -1;
- fd_t *fd = NULL;
-
- local = frame->local;
- file = local->file;
-
- request = local->request;
- if (request) {
- process_frame = copy_frame (frame);
- if (process_frame == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- }
- }
+ wb_inode_t *wb_inode = NULL;
+ wb_conf_t *conf = NULL;
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
- if (request != NULL) {
- wb_request_unref (request);
- }
+ conf = this->private;
- if (process_frame != NULL) {
- ret = wb_process_queue (process_frame, file, 0);
- if ((ret == -1) && (errno == ENOMEM) && (file != NULL)) {
- LOCK (&file->lock);
- {
- file->op_ret = -1;
- file->op_errno = ENOMEM;
- }
- UNLOCK (&file->lock);
- }
+ wb_inode = GF_CALLOC (1, sizeof (*wb_inode), gf_wb_mt_wb_inode_t);
+ if (!wb_inode)
+ goto out;
- STACK_DESTROY (process_frame->root);
- }
+ INIT_LIST_HEAD (&wb_inode->all);
+ INIT_LIST_HEAD (&wb_inode->todo);
+ INIT_LIST_HEAD (&wb_inode->liability);
+ INIT_LIST_HEAD (&wb_inode->temptation);
+ INIT_LIST_HEAD (&wb_inode->wip);
- if (file) {
- LOCK (&file->lock);
- {
- fd = file->fd;
- }
- UNLOCK (&file->lock);
+ wb_inode->this = this;
- fd_unref (fd);
- }
+ wb_inode->window_conf = conf->window_size;
- return 0;
-}
+ LOCK_INIT (&wb_inode->lock);
+ __inode_ctx_put (inode, this, (uint64_t)(unsigned long)wb_inode);
-static int32_t
-wb_stat_helper (call_frame_t *frame, xlator_t *this, loc_t *loc)
-{
- STACK_WIND (frame, wb_stat_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat,
- loc);
- return 0;
+out:
+ return wb_inode;
}
-int32_t
-wb_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
+wb_inode_t *
+wb_inode_create (xlator_t *this, inode_t *inode)
{
- wb_file_t *file = NULL;
- fd_t *iter_fd = NULL;
- wb_local_t *local = NULL;
- uint64_t tmp_file = 0;
- call_stub_t *stub = NULL;
- wb_request_t *request = NULL;
- int32_t ret = -1;
-
- if (loc->inode) {
- /* FIXME: fd_lookup extends life of fd till stat returns */
- iter_fd = fd_lookup (loc->inode, frame->root->pid);
- if (iter_fd) {
- if (!fd_ctx_get (iter_fd, this, &tmp_file)) {
- file = (wb_file_t *)(long)tmp_file;
- } else {
- fd_unref (iter_fd);
- iter_fd = NULL;
- }
- }
- }
+ wb_inode_t *wb_inode = NULL;
- local = CALLOC (1, sizeof (*local));
- if (local == NULL) {
- STACK_UNWIND (frame, -1, ENOMEM, NULL);
-
- if (iter_fd != NULL) {
- fd_unref (iter_fd);
- }
- return 0;
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+
+ LOCK (&inode->lock);
+ {
+ wb_inode = __wb_inode_ctx_get (this, inode);
+ if (!wb_inode)
+ wb_inode = __wb_inode_create (this, inode);
}
+ UNLOCK (&inode->lock);
- local->file = file;
+out:
+ return wb_inode;
+}
- frame->local = local;
- if (file) {
- stub = fop_stat_stub (frame, wb_stat_helper, loc);
- if (stub == NULL) {
- STACK_UNWIND (frame, -1, ENOMEM, NULL);
+void
+wb_inode_destroy (wb_inode_t *wb_inode)
+{
+ GF_VALIDATE_OR_GOTO ("write-behind", wb_inode, out);
+
+ LOCK_DESTROY (&wb_inode->lock);
+ GF_FREE (wb_inode);
+out:
+ return;
+}
- if (iter_fd != NULL) {
- fd_unref (iter_fd);
- }
- return 0;
- }
-
- request = wb_enqueue (file, stub);
- if (request == NULL) {
- STACK_UNWIND (frame, -1, ENOMEM, NULL);
- call_stub_destroy (stub);
-
- if (iter_fd != NULL) {
- fd_unref (iter_fd);
- }
- return 0;
- }
+void
+__wb_fulfill_request (wb_request_t *req)
+{
+ wb_inode_t *wb_inode = NULL;
- ret = wb_process_queue (frame, file, 1);
- if ((ret == -1) && (errno == ENOMEM)) {
- STACK_UNWIND (frame, -1, ENOMEM, NULL);
- call_stub_destroy (stub);
+ wb_inode = req->wb_inode;
- if (iter_fd != NULL) {
- fd_unref (iter_fd);
- }
- return 0;
- }
+ req->ordering.fulfilled = 1;
+ wb_inode->window_current -= req->total_size;
+ wb_inode->transit -= req->total_size;
- } else {
- STACK_WIND (frame, wb_stat_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat,
- loc);
- }
+ if (!req->ordering.lied) {
+ /* TODO: fail the req->frame with error if
+ necessary
+ */
+ }
- return 0;
+ __wb_request_unref (req);
}
-int32_t
-wb_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct stat *buf)
+void
+wb_head_done (wb_request_t *head)
{
- wb_local_t *local = NULL;
- wb_request_t *request = NULL;
- wb_file_t *file = NULL;
- int32_t ret = -1;
-
- local = frame->local;
- file = local->file;
-
- request = local->request;
- if (request) {
- wb_request_unref (request);
- ret = wb_process_queue (frame, file, 0);
- if ((ret == -1) && (errno == ENOMEM)) {
- op_ret = -1;
- op_errno = ENOMEM;
- }
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, buf);
-
- return 0;
+ wb_request_t *req = NULL;
+ wb_request_t *tmp = NULL;
+ wb_inode_t *wb_inode = NULL;
+
+ wb_inode = head->wb_inode;
+
+ LOCK (&wb_inode->lock);
+ {
+ list_for_each_entry_safe (req, tmp, &head->winds, winds) {
+ __wb_fulfill_request (req);
+ }
+ __wb_fulfill_request (head);
+ }
+ UNLOCK (&wb_inode->lock);
}
-int32_t
-wb_fstat_helper (call_frame_t *frame, xlator_t *this, fd_t *fd)
+void
+wb_fulfill_err (wb_request_t *head, int op_errno)
{
- STACK_WIND (frame,
- wb_fstat_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat,
- fd);
- return 0;
+ wb_inode_t *wb_inode;
+ wb_request_t *req;
+
+ wb_inode = head->wb_inode;
+
+ /* for all future requests yet to arrive */
+ fd_ctx_set (head->fd, THIS, op_errno);
+
+ LOCK (&wb_inode->lock);
+ {
+ /* for all requests already arrived */
+ list_for_each_entry (req, &wb_inode->all, all) {
+ if (req->fd != head->fd)
+ continue;
+ req->op_ret = -1;
+ req->op_errno = op_errno;
+ }
+ }
+ UNLOCK (&wb_inode->lock);
}
-int32_t
-wb_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd)
+int
+wb_fulfill_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- wb_file_t *file = NULL;
- wb_local_t *local = NULL;
- uint64_t tmp_file = 0;
- call_stub_t *stub = NULL;
- wb_request_t *request = NULL;
- int32_t ret = -1;
-
- if ((!S_ISDIR (fd->inode->st_mode))
- && fd_ctx_get (fd, this, &tmp_file)) {
- gf_log (this->name, GF_LOG_DEBUG, "write behind file pointer is"
- " not stored in context of fd(%p), returning EBADFD",
- fd);
-
- STACK_UNWIND (frame, -1, EBADFD, NULL);
- return 0;
- }
+ wb_inode_t *wb_inode = NULL;
+ wb_request_t *head = NULL;
- file = (wb_file_t *)(long)tmp_file;
- local = CALLOC (1, sizeof (*local));
- if (local == NULL) {
- STACK_UNWIND (frame, -1, ENOMEM, NULL);
- return 0;
- }
+ head = frame->local;
+ frame->local = NULL;
- local->file = file;
+ wb_inode = head->wb_inode;
- frame->local = local;
+ if (op_ret == -1) {
+ wb_fulfill_err (head, op_errno);
+ } else if (op_ret < head->total_size) {
+ /*
+ * We've encountered a short write, for whatever reason.
+ * Set an EIO error for the next fop. This should be
+ * valid for writev or flush (close).
+ *
+ * TODO: Retry the write so we can potentially capture
+ * a real error condition (i.e., ENOSPC).
+ */
+ wb_fulfill_err (head, EIO);
+ }
- if (file) {
- stub = fop_fstat_stub (frame, wb_fstat_helper, fd);
- if (stub == NULL) {
- STACK_UNWIND (frame, -1, ENOMEM, NULL);
- return 0;
- }
-
- request = wb_enqueue (file, stub);
- if (request == NULL) {
- STACK_UNWIND (frame, -1, ENOMEM, NULL);
- call_stub_destroy (stub);
- return 0;
- }
+ wb_head_done (head);
- /*
- FIXME:should the request queue be emptied in case of error?
- */
- ret = wb_process_queue (frame, file, 1);
- if ((ret == -1) && (errno == ENOMEM)) {
- STACK_UNWIND (frame, -1, ENOMEM, NULL);
- call_stub_destroy (stub);
+ wb_process_queue (wb_inode);
- return 0;
- }
- } else {
- STACK_WIND (frame,
- wb_fstat_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat,
- fd);
- }
+ STACK_DESTROY (frame->root);
return 0;
}
-int32_t
-wb_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+#define WB_IOV_LOAD(vec, cnt, req, head) do { \
+ memcpy (&vec[cnt], req->stub->args.vector, \
+ (req->stub->args.count * sizeof(vec[0]))); \
+ cnt += req->stub->args.count; \
+ head->total_size += req->write_size; \
+ } while (0)
+
+
+int
+wb_fulfill_head (wb_inode_t *wb_inode, wb_request_t *head)
{
- wb_local_t *local = NULL;
- wb_request_t *request = NULL;
- wb_file_t *file = NULL;
- call_frame_t *process_frame = NULL;
- int32_t ret = -1;
- fd_t *fd = NULL;
-
- local = frame->local;
- file = local->file;
- request = local->request;
-
- if (request) {
- process_frame = copy_frame (frame);
- if (process_frame == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- }
- }
+ struct iovec vector[MAX_VECTOR_COUNT];
+ int count = 0;
+ wb_request_t *req = NULL;
+ call_frame_t *frame = NULL;
+ gf_boolean_t fderr = _gf_false;
+ xlator_t *this = NULL;
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ this = THIS;
- if (request) {
- wb_request_unref (request);
- }
+ /* make sure head->total_size is updated before we run into any
+ * errors
+ */
- if (process_frame != NULL) {
- ret = wb_process_queue (process_frame, file, 0);
- if ((ret == -1) && (errno == ENOMEM) && (file != NULL)) {
- LOCK (&file->lock);
- {
- file->op_ret = -1;
- file->op_errno = ENOMEM;
- }
- UNLOCK (&file->lock);
- }
+ WB_IOV_LOAD (vector, count, head, head);
+
+ list_for_each_entry (req, &head->winds, winds) {
+ WB_IOV_LOAD (vector, count, req, head);
- STACK_DESTROY (process_frame->root);
+ iobref_merge (head->stub->args.iobref,
+ req->stub->args.iobref);
+ }
+
+ if (wb_fd_err (head->fd, this, NULL)) {
+ fderr = _gf_true;
+ goto err;
}
- if (file) {
- LOCK (&file->lock);
- {
- fd = file->fd;
- }
- UNLOCK (&file->lock);
+ frame = create_frame (wb_inode->this, wb_inode->this->ctx->pool);
+ if (!frame)
+ goto err;
+
+ frame->root->lk_owner = head->lk_owner;
+ frame->local = head;
+
+ LOCK (&wb_inode->lock);
+ {
+ wb_inode->transit += head->total_size;
+ }
+ UNLOCK (&wb_inode->lock);
+
+ STACK_WIND (frame, wb_fulfill_cbk, FIRST_CHILD (frame->this),
+ FIRST_CHILD (frame->this)->fops->writev,
+ head->fd, vector, count,
+ head->stub->args.offset,
+ head->stub->args.flags,
+ head->stub->args.iobref, NULL);
- fd_unref (fd);
+ return 0;
+err:
+ if (!fderr) {
+ /* frame creation failure */
+ fderr = ENOMEM;
+ wb_fulfill_err (head, fderr);
}
- return 0;
+ wb_head_done (head);
+
+ return fderr;
}
-static int32_t
-wb_truncate_helper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- off_t offset)
-{
- STACK_WIND (frame,
- wb_truncate_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate,
- loc,
- offset);
+#define NEXT_HEAD(head, req) do { \
+ if (head) \
+ ret |= wb_fulfill_head (wb_inode, head); \
+ head = req; \
+ expected_offset = req->stub->args.offset + \
+ req->write_size; \
+ curr_aggregate = 0; \
+ vector_count = 0; \
+ } while (0)
- return 0;
+
+int
+wb_fulfill (wb_inode_t *wb_inode, list_head_t *liabilities)
+{
+ wb_request_t *req = NULL;
+ wb_request_t *head = NULL;
+ wb_request_t *tmp = NULL;
+ wb_conf_t *conf = NULL;
+ off_t expected_offset = 0;
+ size_t curr_aggregate = 0;
+ size_t vector_count = 0;
+ int ret = 0;
+
+ conf = wb_inode->this->private;
+
+ list_for_each_entry_safe (req, tmp, liabilities, winds) {
+ list_del_init (&req->winds);
+
+ if (!head) {
+ NEXT_HEAD (head, req);
+ continue;
+ }
+
+ if (req->fd != head->fd) {
+ NEXT_HEAD (head, req);
+ continue;
+ }
+
+ if (!is_same_lkowner (&req->lk_owner, &head->lk_owner)) {
+ NEXT_HEAD (head, req);
+ continue;
+ }
+
+ if (expected_offset != req->stub->args.offset) {
+ NEXT_HEAD (head, req);
+ continue;
+ }
+
+ if ((curr_aggregate + req->write_size) > conf->aggregate_size) {
+ NEXT_HEAD (head, req);
+ continue;
+ }
+
+ if (vector_count + req->stub->args.count >
+ MAX_VECTOR_COUNT) {
+ NEXT_HEAD (head, req);
+ continue;
+ }
+
+ list_add_tail (&req->winds, &head->winds);
+ curr_aggregate += req->write_size;
+ vector_count += req->stub->args.count;
+ }
+
+ if (head)
+ ret |= wb_fulfill_head (wb_inode, head);
+
+ return ret;
}
-int32_t
-wb_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
+void
+wb_do_unwinds (wb_inode_t *wb_inode, list_head_t *lies)
{
- wb_file_t *file = NULL;
- fd_t *iter_fd = NULL;
- wb_local_t *local = NULL;
- uint64_t tmp_file = 0;
- call_stub_t *stub = NULL;
- wb_request_t *request = NULL;
- int32_t ret = -1;
-
- if (loc->inode)
- {
- /*
- FIXME: fd_lookup extends life of fd till the execution of
- truncate_cbk
- */
- iter_fd = fd_lookup (loc->inode, frame->root->pid);
- if (iter_fd) {
- if (!fd_ctx_get (iter_fd, this, &tmp_file)){
- file = (wb_file_t *)(long)tmp_file;
- } else {
- fd_unref (iter_fd);
- }
- }
- }
-
- local = CALLOC (1, sizeof (*local));
- if (local == NULL) {
- STACK_UNWIND (frame, -1, ENOMEM, NULL);
- return 0;
- }
+ wb_request_t *req = NULL;
+ wb_request_t *tmp = NULL;
+ call_frame_t *frame = NULL;
+ struct iatt buf = {0, };
- local->file = file;
-
- frame->local = local;
- if (file) {
- stub = fop_truncate_stub (frame, wb_truncate_helper, loc,
- offset);
- if (stub == NULL) {
- STACK_UNWIND (frame, -1, ENOMEM, NULL);
- return 0;
- }
+ list_for_each_entry_safe (req, tmp, lies, unwinds) {
+ frame = req->stub->frame;
- request = wb_enqueue (file, stub);
- if (request == NULL) {
- STACK_UNWIND (frame, -1, ENOMEM, NULL);
- call_stub_destroy (stub);
- return 0;
- }
-
- ret = wb_process_queue (frame, file, 1);
- if ((ret == -1) && (errno == ENOMEM)) {
- STACK_UNWIND (frame, -1, ENOMEM, NULL);
- call_stub_destroy (stub);
- return 0;
- }
- } else {
- STACK_WIND (frame,
- wb_truncate_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate,
- loc,
- offset);
+ STACK_UNWIND_STRICT (writev, frame, req->op_ret, req->op_errno,
+ &buf, &buf, NULL); /* :O */
+ req->stub->frame = NULL;
+
+ list_del_init (&req->unwinds);
+ wb_request_unref (req);
}
- return 0;
+ return;
}
-int32_t
-wb_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+void
+__wb_pick_unwinds (wb_inode_t *wb_inode, list_head_t *lies)
{
- wb_local_t *local = NULL;
- wb_request_t *request = NULL;
- wb_file_t *file = NULL;
- int32_t ret = -1;
-
- local = frame->local;
- file = local->file;
- request = local->request;
-
- if (request) {
- wb_request_unref (request);
- ret = wb_process_queue (frame, file, 0);
- if ((ret == -1) && (errno == ENOMEM)) {
- op_ret = -1;
- op_errno = ENOMEM;
- }
- }
+ wb_request_t *req = NULL;
+ wb_request_t *tmp = NULL;
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ list_for_each_entry_safe (req, tmp, &wb_inode->temptation, lie) {
+ if (!req->ordering.fulfilled &&
+ wb_inode->window_current > wb_inode->window_conf)
+ continue;
- return 0;
-}
+ list_del_init (&req->lie);
+ list_move_tail (&req->unwinds, lies);
+ wb_inode->window_current += req->orig_size;
-static int32_t
-wb_ftruncate_helper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- off_t offset)
-{
- STACK_WIND (frame,
- wb_ftruncate_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate,
- fd,
- offset);
- return 0;
+ if (!req->ordering.fulfilled) {
+ /* burden increased */
+ list_add_tail (&req->lie, &wb_inode->liability);
+
+ req->ordering.lied = 1;
+
+ wb_inode->gen++;
+ }
+ }
+
+ return;
}
-
-int32_t
-wb_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
+
+int
+__wb_collapse_small_writes (wb_request_t *holder, wb_request_t *req)
{
- wb_file_t *file = NULL;
- wb_local_t *local = NULL;
- uint64_t tmp_file = 0;
- call_stub_t *stub = NULL;
- wb_request_t *request = NULL;
- int32_t ret = -1;
-
- if ((!S_ISDIR (fd->inode->st_mode))
- && fd_ctx_get (fd, this, &tmp_file)) {
- gf_log (this->name, GF_LOG_DEBUG, "write behind file pointer is"
- " not stored in context of fd(%p), returning EBADFD",
- fd);
-
- STACK_UNWIND (frame, -1, EBADFD, NULL);
- return 0;
- }
+ char *ptr = NULL;
+ struct iobuf *iobuf = NULL;
+ struct iobref *iobref = NULL;
+ int ret = -1;
+ ssize_t required_size = 0;
+ size_t holder_len = 0;
+ size_t req_len = 0;
+
+ if (!holder->iobref) {
+ holder_len = iov_length (holder->stub->args.vector,
+ holder->stub->args.count);
+ req_len = iov_length (req->stub->args.vector,
+ req->stub->args.count);
+
+ required_size = max ((THIS->ctx->page_size),
+ (holder_len + req_len));
+ iobuf = iobuf_get2 (req->wb_inode->this->ctx->iobuf_pool,
+ required_size);
+ if (iobuf == NULL) {
+ goto out;
+ }
+
+ iobref = iobref_new ();
+ if (iobref == NULL) {
+ iobuf_unref (iobuf);
+ goto out;
+ }
+
+ ret = iobref_add (iobref, iobuf);
+ if (ret != 0) {
+ iobuf_unref (iobuf);
+ iobref_unref (iobref);
+ gf_log (req->wb_inode->this->name, GF_LOG_WARNING,
+ "cannot add iobuf (%p) into iobref (%p)",
+ iobuf, iobref);
+ goto out;
+ }
- file = (wb_file_t *)(long)tmp_file;
+ iov_unload (iobuf->ptr, holder->stub->args.vector,
+ holder->stub->args.count);
+ holder->stub->args.vector[0].iov_base = iobuf->ptr;
+ holder->stub->args.count = 1;
- local = CALLOC (1, sizeof (*local));
- if (local == NULL) {
- STACK_UNWIND (frame, -1, ENOMEM, NULL);
- return 0;
- }
+ iobref_unref (holder->stub->args.iobref);
+ holder->stub->args.iobref = iobref;
- local->file = file;
+ iobuf_unref (iobuf);
- frame->local = local;
+ holder->iobref = iobref_ref (iobref);
+ }
- if (file) {
- stub = fop_ftruncate_stub (frame, wb_ftruncate_helper, fd,
- offset);
- if (stub == NULL) {
- STACK_UNWIND (frame, -1, ENOMEM, NULL);
- return 0;
- }
+ ptr = holder->stub->args.vector[0].iov_base + holder->write_size;
- request = wb_enqueue (file, stub);
- if (request == NULL) {
- STACK_UNWIND (frame, -1, ENOMEM, NULL);
- call_stub_destroy (stub);
- return 0;
- }
+ iov_unload (ptr, req->stub->args.vector,
+ req->stub->args.count);
- ret = wb_process_queue (frame, file, 1);
- if ((ret == -1) && (errno == ENOMEM)) {
- STACK_UNWIND (frame, -1, ENOMEM, NULL);
- call_stub_destroy (stub);
- return 0;
- }
- } else {
- STACK_WIND (frame,
- wb_ftruncate_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate,
- fd,
- offset);
- }
+ holder->stub->args.vector[0].iov_len += req->write_size;
+ holder->write_size += req->write_size;
+ holder->ordering.size += req->write_size;
- return 0;
+ ret = 0;
+out:
+ return ret;
}
-int32_t
-wb_utimens_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *buf)
+void
+__wb_preprocess_winds (wb_inode_t *wb_inode)
{
- wb_local_t *local = NULL;
- wb_request_t *request = NULL;
- call_frame_t *process_frame = NULL;
- wb_file_t *file = NULL;
- int32_t ret = -1;
- fd_t *fd = NULL;
-
- local = frame->local;
- file = local->file;
- request = local->request;
-
- if (request) {
- process_frame = copy_frame (frame);
- if (process_frame == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
+ off_t offset_expected = 0;
+ ssize_t space_left = 0;
+ wb_request_t *req = NULL;
+ wb_request_t *tmp = NULL;
+ wb_request_t *holder = NULL;
+ wb_conf_t *conf = NULL;
+ int ret = 0;
+ ssize_t page_size = 0;
+
+ /* With asynchronous IO from a VM guest (as a file), there
+ can be two sequential writes happening in two regions
+ of the file. But individual (broken down) IO requests
+ can arrive interleaved.
+
+ TODO: cycle for each such sequence sifting
+ through the interleaved ops
+ */
+
+ page_size = wb_inode->this->ctx->page_size;
+ conf = wb_inode->this->private;
+
+ list_for_each_entry_safe (req, tmp, &wb_inode->todo, todo) {
+ if (!req->ordering.tempted) {
+ if (holder) {
+ if (wb_requests_conflict (holder, req))
+ /* do not hold on write if a
+ dependent write is in queue */
+ holder->ordering.go = 1;
+ }
+ /* collapse only non-sync writes */
+ continue;
+ } else if (!holder) {
+ /* holder is always a non-sync write */
+ holder = req;
+ continue;
+ }
+
+ offset_expected = holder->stub->args.offset
+ + holder->write_size;
+
+ if (req->stub->args.offset != offset_expected) {
+ holder->ordering.go = 1;
+ holder = req;
+ continue;
+ }
+
+ if (!is_same_lkowner (&req->lk_owner, &holder->lk_owner)) {
+ holder->ordering.go = 1;
+ holder = req;
+ continue;
+ }
+
+ if (req->fd != holder->fd) {
+ holder->ordering.go = 1;
+ holder = req;
+ continue;
}
- }
- STACK_UNWIND (frame, op_ret, op_errno, buf);
+ space_left = page_size - holder->write_size;
- if (request) {
- wb_request_unref (request);
- }
+ if (space_left < req->write_size) {
+ holder->ordering.go = 1;
+ holder = req;
+ continue;
+ }
- if (request && (process_frame != NULL)) {
- ret = wb_process_queue (process_frame, file, 0);
- if ((ret == -1) && (errno == ENOMEM) && (file != NULL)) {
- LOCK (&file->lock);
- {
- file->op_ret = -1;
- file->op_errno = ENOMEM;
- }
- UNLOCK (&file->lock);
- }
+ ret = __wb_collapse_small_writes (holder, req);
+ if (ret)
+ continue;
- STACK_DESTROY (process_frame->root);
- }
+ /* collapsed request is as good as wound
+ (from its p.o.v)
+ */
+ list_del_init (&req->todo);
+ __wb_fulfill_request (req);
- if (file) {
- LOCK (&file->lock);
- {
- fd = file->fd;
- }
- UNLOCK (&file->lock);
+ /* Only the last @holder in queue which
- fd_unref (fd);
- }
+ - does not have any non-buffered-writes following it
+ - has not yet filled its capacity
- return 0;
-}
+ does not get its 'go' set, in anticipation of the arrival
+ of consecutive smaller writes.
+ */
+ }
+ /* but if trickling writes are enabled, then do not hold back
+ writes if there are no outstanding requests
+ */
-static int32_t
-wb_utimens_helper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct timespec tv[2])
-{
- STACK_WIND (frame,
- wb_utimens_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->utimens,
- loc,
- tv);
+ if (conf->trickling_writes && !wb_inode->transit && holder)
+ holder->ordering.go = 1;
- return 0;
+ return;
}
-int32_t
-wb_utimens (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct timespec tv[2])
+void
+__wb_pick_winds (wb_inode_t *wb_inode, list_head_t *tasks,
+ list_head_t *liabilities)
{
- wb_file_t *file = NULL;
- fd_t *iter_fd = NULL;
- wb_local_t *local = NULL;
- uint64_t tmp_file = 0;
- call_stub_t *stub = NULL;
- wb_request_t *request = NULL;
- int32_t ret = -1;
-
- if (loc->inode) {
- /*
- FIXME: fd_lookup extends life of fd till the execution
- of wb_utimens_cbk
- */
- iter_fd = fd_lookup (loc->inode, frame->root->pid);
- if (iter_fd) {
- if (!fd_ctx_get (iter_fd, this, &tmp_file)) {
- file = (wb_file_t *)(long)tmp_file;
- } else {
- fd_unref (iter_fd);
- }
- }
+ wb_request_t *req = NULL;
+ wb_request_t *tmp = NULL;
- }
+ list_for_each_entry_safe (req, tmp, &wb_inode->todo, todo) {
+ if (wb_liability_has_conflict (wb_inode, req))
+ continue;
- local = CALLOC (1, sizeof (*local));
- if (local == NULL) {
- STACK_UNWIND (frame, -1, ENOMEM, NULL);
- return 0;
- }
+ if (req->ordering.tempted && !req->ordering.go)
+ /* wait some more */
+ continue;
- local->file = file;
+ if (req->stub->fop == GF_FOP_WRITE) {
+ if (wb_wip_has_conflict (wb_inode, req))
+ continue;
- frame->local = local;
+ list_add_tail (&req->wip, &wb_inode->wip);
- if (file) {
- stub = fop_utimens_stub (frame, wb_utimens_helper, loc, tv);
- if (stub == NULL) {
- STACK_UNWIND (frame, -1, ENOMEM, NULL);
- return 0;
- }
+ if (!req->ordering.tempted)
+ /* unrefed in wb_writev_cbk */
+ req->stub->frame->local =
+ __wb_request_ref (req);
+ }
- request = wb_enqueue (file, stub);
- if (request == NULL) {
- STACK_UNWIND (frame, -1, ENOMEM, NULL);
- call_stub_destroy (stub);
- return 0;
- }
+ list_del_init (&req->todo);
- ret = wb_process_queue (frame, file, 1);
- if ((ret == -1) && (errno == ENOMEM)) {
- STACK_UNWIND (frame, -1, ENOMEM, NULL);
- call_stub_destroy (stub);
- return 0;
- }
- } else {
- STACK_WIND (frame,
- wb_utimens_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->utimens,
- loc,
- tv);
- }
-
- return 0;
+ if (req->ordering.tempted)
+ list_add_tail (&req->winds, liabilities);
+ else
+ list_add_tail (&req->winds, tasks);
+ }
}
-int32_t
-wb_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, fd_t *fd)
+
+void
+wb_do_winds (wb_inode_t *wb_inode, list_head_t *tasks)
{
- long flags = 0;
- wb_file_t *file = NULL;
- wb_conf_t *conf = this->private;
-
- if (op_ret != -1) {
- file = wb_file_create (this, fd);
- if (file == NULL) {
- STACK_UNWIND (frame, -1, ENOMEM, fd);
- return 0;
- }
+ wb_request_t *req = NULL;
+ wb_request_t *tmp = NULL;
- /*
- If mandatory locking has been enabled on this file,
- we disable caching on it
- */
-
- if ((fd->inode->st_mode & S_ISGID)
- && !(fd->inode->st_mode & S_IXGRP))
- file->disabled = 1;
-
- /* If O_DIRECT then, we disable chaching */
- if (frame->local) {
- flags = (long)frame->local;
- if (((flags & O_DIRECT) == O_DIRECT)
- || ((flags & O_ACCMODE) == O_RDONLY)
- || (((flags & O_SYNC) == O_SYNC)
- && conf->enable_O_SYNC == _gf_true)) {
- file->window_size = 0;
- }
- }
+ list_for_each_entry_safe (req, tmp, tasks, winds) {
+ list_del_init (&req->winds);
- LOCK_INIT (&file->lock);
- }
-
- frame->local = NULL;
+ call_resume (req->stub);
- STACK_UNWIND (frame, op_ret, op_errno, fd);
- return 0;
+ wb_request_unref (req);
+ }
}
-int32_t
-wb_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd)
+void
+wb_process_queue (wb_inode_t *wb_inode)
{
- frame->local = (void *)(long)flags;
+ list_head_t tasks = {0, };
+ list_head_t lies = {0, };
+ list_head_t liabilities = {0, };
+ int retry = 0;
- STACK_WIND (frame,
- wb_open_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open,
- loc, flags, fd);
- return 0;
-}
+ INIT_LIST_HEAD (&tasks);
+ INIT_LIST_HEAD (&lies);
+ INIT_LIST_HEAD (&liabilities);
+ do {
+ LOCK (&wb_inode->lock);
+ {
+ __wb_preprocess_winds (wb_inode);
+
+ __wb_pick_winds (wb_inode, &tasks, &liabilities);
+
+ __wb_pick_unwinds (wb_inode, &lies);
-int32_t
-wb_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
- struct stat *buf)
-{
- long flags = 0;
- wb_file_t *file = NULL;
- wb_conf_t *conf = this->private;
-
- if (op_ret != -1) {
- file = wb_file_create (this, fd);
- if (file == NULL) {
- STACK_UNWIND (frame, -1, ENOMEM, fd, inode, buf);
- return 0;
}
- /*
- * If mandatory locking has been enabled on this file,
- * we disable caching on it
+ UNLOCK (&wb_inode->lock);
+
+ wb_do_unwinds (wb_inode, &lies);
+
+ wb_do_winds (wb_inode, &tasks);
+
+ /* fd might've been marked bad due to previous errors.
+ * Since, caller of wb_process_queue might be the last fop on
+ * inode, make sure we keep processing request queue, till there
+ * are no requests left.
*/
- if ((fd->inode->st_mode & S_ISGID)
- && !(fd->inode->st_mode & S_IXGRP))
- file->disabled = 1;
-
- /* If O_DIRECT then, we disable chaching */
- if (frame->local) {
- flags = (long)frame->local;
- if (((flags & O_DIRECT) == O_DIRECT)
- || ((flags & O_ACCMODE) == O_RDONLY)
- || (((flags & O_SYNC) == O_SYNC)
- && (conf->enable_O_SYNC == _gf_true))) {
- file->window_size = 0;
- }
- }
+ retry = wb_fulfill (wb_inode, &liabilities);
+ } while (retry);
- LOCK_INIT (&file->lock);
- }
-
- frame->local = NULL;
-
- STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf);
- return 0;
+ return;
}
-int32_t
-wb_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, fd_t *fd)
+int
+wb_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
- frame->local = (void *)(long)flags;
+ wb_request_t *req = NULL;
+ wb_inode_t *wb_inode;
- STACK_WIND (frame,
- wb_create_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create,
- loc, flags, mode, fd);
- return 0;
-}
+ req = frame->local;
+ frame->local = NULL;
+ wb_inode = req->wb_inode;
+ wb_request_unref (req);
-size_t
-__wb_mark_wind_all (list_head_t *list, list_head_t *winds)
-{
- wb_request_t *request = NULL;
- size_t size = 0;
- struct iovec *vector = NULL;
- int32_t count = 0;
- char first_request = 1;
- off_t offset_expected = 0;
- size_t length = 0;
+ /* requests could be pending while this was in progress */
+ wb_process_queue(wb_inode);
- list_for_each_entry (request, list, list)
- {
- if ((request->stub == NULL)
- || (request->stub->fop != GF_FOP_WRITE)) {
- break;
- }
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
- vector = request->stub->args.writev.vector;
- count = request->stub->args.writev.count;
- if (!request->flags.write_request.stack_wound) {
- if (first_request) {
- first_request = 0;
- offset_expected = request->stub->args.writev.off;
- }
-
- if (request->stub->args.writev.off != offset_expected) {
- break;
- }
-
- length = iov_length (vector, count);
- size += length;
- offset_expected += length;
-
- request->flags.write_request.stack_wound = 1;
- list_add_tail (&request->winds, winds);
- }
- }
-
- return size;
+
+int
+wb_writev_helper (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
+{
+ STACK_WIND (frame, wb_writev_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->writev,
+ fd, vector, count, offset, flags, iobref, xdata);
+ return 0;
}
-size_t
-__wb_get_aggregate_size (list_head_t *list, char *other_fop_in_queue,
- char *non_contiguous_writes)
+int
+wb_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
+ int32_t count, off_t offset, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
{
- wb_request_t *request = NULL;
- size_t size = 0, length = 0;
- struct iovec *vector = NULL;
- int32_t count = 0;
- char first_request = 1;
- off_t offset_expected = 0;
+ wb_inode_t *wb_inode = NULL;
+ wb_conf_t *conf = NULL;
+ gf_boolean_t wb_disabled = 0;
+ call_stub_t *stub = NULL;
+ int ret = -1;
+ int32_t op_errno = EINVAL;
+ int o_direct = O_DIRECT;
- list_for_each_entry (request, list, list)
- {
- if ((request->stub == NULL)
- || (request->stub->fop != GF_FOP_WRITE)) {
- if (request->stub && other_fop_in_queue) {
- *other_fop_in_queue = 1;
- }
- break;
- }
+ conf = this->private;
- vector = request->stub->args.writev.vector;
- count = request->stub->args.writev.count;
- if (!request->flags.write_request.stack_wound) {
- if (first_request) {
- first_request = 0;
- offset_expected = request->stub->args.writev.off;
- }
-
- if (offset_expected != request->stub->args.writev.off) {
- if (non_contiguous_writes) {
- *non_contiguous_writes = 1;
- }
- break;
- }
-
- length = iov_length (vector, count);
- size += length;
- offset_expected += length;
- }
+ if (wb_fd_err (fd, this, &op_errno)) {
+ goto unwind;
}
- return size;
-}
+ wb_inode = wb_inode_create (this, fd->inode);
+ if (!wb_inode) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ if (!conf->strict_O_DIRECT)
+ o_direct = 0;
-char
-__wb_any_incomplete_writes (list_head_t *list)
-{
- wb_request_t *request = NULL;
- char incomplete_writes = 0;
+ if (fd->flags & (O_SYNC|O_DSYNC|o_direct))
+ wb_disabled = 1;
- list_for_each_entry (request, list, list)
- {
- if ((request->stub == NULL)
- || (request->stub->fop != GF_FOP_WRITE)) {
- break;
- }
+ if (flags & (O_SYNC|O_DSYNC|o_direct))
+ wb_disabled = 1;
- if (request->flags.write_request.stack_wound
- && !request->flags.write_request.got_reply) {
- incomplete_writes = 1;
- break;
- }
+ if (wb_disabled)
+ stub = fop_writev_stub (frame, wb_writev_helper, fd, vector,
+ count, offset, flags, iobref, xdata);
+ else
+ stub = fop_writev_stub (frame, NULL, fd, vector, count, offset,
+ flags, iobref, xdata);
+ if (!stub) {
+ op_errno = ENOMEM;
+ goto unwind;
}
- return incomplete_writes;
-}
+ if (wb_disabled)
+ ret = wb_enqueue (wb_inode, stub);
+ else
+ ret = wb_enqueue_tempted (wb_inode, stub);
+ if (!ret) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
-ssize_t
-__wb_mark_winds (list_head_t *list, list_head_t *winds, size_t aggregate_conf,
- char wind_all)
-{
- size_t aggregate_current = 0, size = 0;
- char incomplete_writes = 0;
- char other_fop_in_queue = 0;
- char non_contiguous_writes = 0;
+ wb_process_queue (wb_inode);
- incomplete_writes = __wb_any_incomplete_writes (list);
+ return 0;
- aggregate_current = __wb_get_aggregate_size (list, &other_fop_in_queue,
- &non_contiguous_writes);
+unwind:
+ STACK_UNWIND_STRICT (writev, frame, -1, op_errno, NULL, NULL, NULL);
- if ((!incomplete_writes) || (wind_all) || (non_contiguous_writes)
- || (other_fop_in_queue) || (aggregate_current >= aggregate_conf)) {
- size = __wb_mark_wind_all (list, winds);
- }
+ if (stub)
+ call_stub_destroy (stub);
- return size;
+ return 0;
}
-size_t
-__wb_get_window_size (list_head_t *list)
+int
+wb_readv_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
{
- wb_request_t *request = NULL;
- size_t size = 0;
- struct iovec *vector = NULL;
- int32_t count = 0;
+ STACK_WIND (frame, default_readv_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags,
+ xdata);
+ return 0;
+}
- list_for_each_entry (request, list, list)
- {
- if ((request->stub == NULL)
- || (request->stub->fop != GF_FOP_WRITE)) {
- continue;
- }
- vector = request->stub->args.writev.vector;
- count = request->stub->args.writev.count;
+int
+wb_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
- if (request->flags.write_request.write_behind
- && !request->flags.write_request.got_reply)
- {
- size += iov_length (vector, count);
- }
- }
+ wb_inode = wb_inode_ctx_get (this, fd->inode);
+ if (!wb_inode)
+ goto noqueue;
- return size;
-}
+ stub = fop_readv_stub (frame, wb_readv_helper, fd, size,
+ offset, flags, xdata);
+ if (!stub)
+ goto unwind;
+ if (!wb_enqueue (wb_inode, stub))
+ goto unwind;
-size_t
-__wb_mark_unwind_till (list_head_t *list, list_head_t *unwinds, size_t size)
-{
- size_t written_behind = 0;
- wb_request_t *request = NULL;
- struct iovec *vector = NULL;
- int32_t count = 0;
+ wb_process_queue (wb_inode);
- list_for_each_entry (request, list, list)
- {
- if ((request->stub == NULL)
- || (request->stub->fop != GF_FOP_WRITE)) {
- continue;
- }
+ return 0;
- vector = request->stub->args.writev.vector;
- count = request->stub->args.writev.count;
-
- if (written_behind <= size) {
- if (!request->flags.write_request.write_behind) {
- written_behind += iov_length (vector, count);
- request->flags.write_request.write_behind = 1;
- list_add_tail (&request->unwinds, unwinds);
- }
- } else {
- break;
- }
- }
+unwind:
+ STACK_UNWIND_STRICT (readv, frame, -1, ENOMEM, NULL, 0, NULL, NULL,
+ NULL);
+ return 0;
- return written_behind;
+noqueue:
+ STACK_WIND (frame, default_readv_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags,
+ xdata);
+ return 0;
}
-int32_t
-__wb_mark_unwinds (list_head_t *list, list_head_t *unwinds, size_t window_conf)
+int
+wb_flush_bg_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- size_t window_current = 0;
-
- window_current = __wb_get_window_size (list);
- if (window_current <= window_conf)
- {
- window_current += __wb_mark_unwind_till (list, unwinds,
- window_conf - window_current);
- }
-
- return window_current;
+ STACK_DESTROY (frame->root);
+ return 0;
}
-uint32_t
-__wb_get_other_requests (list_head_t *list, list_head_t *other_requests)
+int
+wb_flush_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- wb_request_t *request = NULL;
- uint32_t count = 0;
- list_for_each_entry (request, list, list) {
- if ((request->stub == NULL)
- || (request->stub->fop == GF_FOP_WRITE)) {
- break;
- }
-
- if (!request->flags.other_requests.marked_for_resume) {
- request->flags.other_requests.marked_for_resume = 1;
- list_add_tail (&request->other_requests,
- other_requests);
- count++;
-
- /* lets handle one at a time */
- break;
- }
- }
+ wb_conf_t *conf = NULL;
+ wb_inode_t *wb_inode = NULL;
+ call_frame_t *bg_frame = NULL;
+ int32_t op_errno = 0;
+ int op_ret = 0;
- return count;
+ conf = this->private;
+
+ wb_inode = wb_inode_ctx_get (this, fd->inode);
+ if (!wb_inode) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto unwind;
+ }
+
+ if (wb_fd_err (fd, this, &op_errno)) {
+ op_ret = -1;
+ goto unwind;
+ }
+
+ if (conf->flush_behind)
+ goto flushbehind;
+
+ STACK_WIND (frame, default_flush_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush, fd, xdata);
+ return 0;
+
+flushbehind:
+ bg_frame = copy_frame (frame);
+ if (!bg_frame) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ STACK_WIND (bg_frame, wb_flush_bg_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush, fd, xdata);
+ /* fall through */
+unwind:
+ STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, NULL);
+
+ return 0;
}
-int32_t
-wb_stack_unwind (list_head_t *unwinds)
+int
+wb_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- struct stat buf = {0,};
- wb_request_t *request = NULL, *dummy = NULL;
- call_frame_t *frame = NULL;
- wb_local_t *local = NULL;
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
- list_for_each_entry_safe (request, dummy, unwinds, unwinds)
- {
- frame = request->stub->frame;
- local = frame->local;
+ wb_inode = wb_inode_ctx_get (this, fd->inode);
+ if (!wb_inode)
+ goto noqueue;
- STACK_UNWIND (frame, local->op_ret, local->op_errno, &buf);
+ stub = fop_flush_stub (frame, wb_flush_helper, fd, xdata);
+ if (!stub)
+ goto unwind;
- wb_request_unref (request);
- }
+ if (!wb_enqueue (wb_inode, stub))
+ goto unwind;
+
+ wb_process_queue (wb_inode);
return 0;
-}
+unwind:
+ STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM, NULL);
-int32_t
-wb_resume_other_requests (call_frame_t *frame, wb_file_t *file,
- list_head_t *other_requests)
-{
- int32_t ret = 0;
- wb_request_t *request = NULL, *dummy = NULL;
- int32_t fops_removed = 0;
- char wind = 0;
- call_stub_t *stub = NULL;
+ return 0;
- if (list_empty (other_requests)) {
- goto out;
- }
+noqueue:
+ STACK_WIND (frame, default_flush_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush, fd, xdata);
+ return 0;
+}
- list_for_each_entry_safe (request, dummy, other_requests,
- other_requests) {
- wind = request->stub->wind;
- stub = request->stub;
-
- LOCK (&file->lock);
- {
- request->stub = NULL;
- }
- UNLOCK (&file->lock);
-
- if (!wind) {
- wb_request_unref (request);
- fops_removed++;
- }
-
- call_resume (stub);
- }
- if (fops_removed > 0) {
- ret = wb_process_queue (frame, file, 0);
- }
-
-out:
- return ret;
+
+int
+wb_fsync_helper (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t datasync, dict_t *xdata)
+{
+ STACK_WIND (frame, default_fsync_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata);
+ return 0;
}
-int32_t
-wb_do_ops (call_frame_t *frame, wb_file_t *file, list_head_t *winds,
- list_head_t *unwinds, list_head_t *other_requests)
+int
+wb_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
{
- int32_t ret = -1;
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
+ int32_t op_errno = EINVAL;
- ret = wb_stack_unwind (unwinds);
- if (ret == -1) {
- goto out;
- }
+ if (wb_fd_err (fd, this, &op_errno))
+ goto unwind;
- ret = wb_sync (frame, file, winds);
- if (ret == -1) {
- goto out;
- }
+ wb_inode = wb_inode_ctx_get (this, fd->inode);
+ if (!wb_inode)
+ goto noqueue;
- ret = wb_resume_other_requests (frame, file, other_requests);
+ stub = fop_fsync_stub (frame, wb_fsync_helper, fd, datasync, xdata);
+ if (!stub)
+ goto unwind;
-out:
- return ret;
-}
+ if (!wb_enqueue (wb_inode, stub))
+ goto unwind;
+ wb_process_queue (wb_inode);
-/* this procedure assumes that write requests have only one vector to write */
-void
-__wb_collapse_write_bufs (list_head_t *requests, size_t page_size)
-{
+ return 0;
- off_t offset_expected = 0;
- size_t space_left = 0, length = 0, *iov_len = NULL;
- char *ptr = NULL, first_request = 1;
- wb_request_t *request = NULL, *tmp = NULL;
-
- list_for_each_entry_safe (request, tmp, requests, list) {
- if ((request->stub->fop != GF_FOP_WRITE)
- || (request->flags.write_request.stack_wound)) {
- space_left = 0;
- ptr = NULL;
- first_request = 1;
- continue;
- }
+unwind:
+ STACK_UNWIND_STRICT (fsync, frame, -1, op_errno, NULL, NULL, NULL);
- if (request->flags.write_request.write_behind) {
- length = iov_length (request->stub->args.writev.vector,
- request->stub->args.writev.count);
-
- if (first_request) {
- first_request = 0;
- offset_expected = request->stub->args.writev.off;
- }
-
- if (request->stub->args.writev.off != offset_expected) {
- offset_expected = request->stub->args.writev.off + length;
- space_left = page_size - length;
- ptr = request->stub->args.writev.vector[0].iov_base
- + length;
- iov_len = &request->stub->args.writev.vector[0].iov_len;
- continue;
- }
-
- if (space_left >= length) {
- iov_unload (ptr,
- request->stub->args.writev.vector,
- request->stub->args.writev.count);
- space_left -= length;
- ptr += length;
- *iov_len = *iov_len + length;
-
- list_move_tail (&request->list,
- &request->file->passive_requests);
-
- __wb_request_unref (request);
- } else {
- space_left = page_size - length;
- ptr = request->stub->args.writev.vector[0].iov_base
- + length;
- iov_len = &request->stub->args.writev.vector[0].iov_len;
- }
- } else {
- break;
- }
+ return 0;
- offset_expected += length;
- }
+noqueue:
+ STACK_WIND (frame, default_fsync_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata);
+ return 0;
}
-int32_t
-wb_process_queue (call_frame_t *frame, wb_file_t *file, char flush_all)
+int
+wb_stat_helper (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- list_head_t winds, unwinds, other_requests;
- size_t size = 0;
- wb_conf_t *conf = file->this->private;
- uint32_t count = 0;
- int32_t ret = -1;
-
- INIT_LIST_HEAD (&winds);
- INIT_LIST_HEAD (&unwinds);
- INIT_LIST_HEAD (&other_requests);
-
- if (file == NULL) {
- errno = EINVAL;
- goto out;
- }
+ STACK_WIND (frame, default_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, loc, xdata);
+ return 0;
+}
- size = conf->aggregate_size;
- LOCK (&file->lock);
- {
- /*
- * make sure requests are marked for unwinding and adjacent
- * continguous write buffers (each of size less than that of
- * an iobuf) are packed properly so that iobufs are filled to
- * their maximum capacity, before calling __wb_mark_winds.
- */
- __wb_mark_unwinds (&file->request, &unwinds, conf->window_size);
- __wb_collapse_write_bufs (&file->request,
- file->this->ctx->page_size);
+int
+wb_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
- count = __wb_get_other_requests (&file->request,
- &other_requests);
- if (count == 0) {
- __wb_mark_winds (&file->request, &winds, size,
- flush_all);
- }
+ wb_inode = wb_inode_ctx_get (this, loc->inode);
+ if (!wb_inode)
+ goto noqueue;
- }
- UNLOCK (&file->lock);
+ stub = fop_stat_stub (frame, wb_stat_helper, loc, xdata);
+ if (!stub)
+ goto unwind;
- ret = wb_do_ops (frame, file, &winds, &unwinds, &other_requests);
+ if (!wb_enqueue (wb_inode, stub))
+ goto unwind;
-out:
- return ret;
-}
+ wb_process_queue (wb_inode);
+ return 0;
-int32_t
-wb_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *stbuf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, stbuf);
+unwind:
+ STACK_UNWIND_STRICT (stat, frame, -1, ENOMEM, NULL, NULL);
+
+ if (stub)
+ call_stub_destroy (stub);
return 0;
+
+noqueue:
+ STACK_WIND (frame, default_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, loc, xdata);
+ return 0;
}
-int32_t
-wb_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
- int32_t count, off_t offset, struct iobref *iobref)
+int
+wb_fstat_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- wb_file_t *file = NULL;
- char wb_disabled = 0;
- call_frame_t *process_frame = NULL;
- size_t size = 0;
- uint64_t tmp_file = 0;
- call_stub_t *stub = NULL;
- wb_local_t *local = NULL;
- wb_request_t *request = NULL;
- int32_t ret = -1;
- int32_t op_ret = -1, op_errno = -1;
-
- if (vector != NULL)
- size = iov_length (vector, count);
-
- if ((!S_ISDIR (fd->inode->st_mode))
- && fd_ctx_get (fd, this, &tmp_file)) {
- gf_log (this->name, GF_LOG_DEBUG, "write behind file pointer is"
- " not stored in context of fd(%p), returning EBADFD",
- fd);
-
- STACK_UNWIND (frame, -1, EBADFD, NULL);
- return 0;
- }
-
- file = (wb_file_t *)(long)tmp_file;
- if (!file) {
- gf_log (this->name, GF_LOG_DEBUG,
- "wb_file not found for fd %p", fd);
- STACK_UNWIND (frame, -1, EBADFD, NULL);
- return 0;
- }
+ STACK_WIND (frame, default_fstat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
+ return 0;
+}
- LOCK (&file->lock);
- {
- op_ret = file->op_ret;
- op_errno = file->op_errno;
-
- file->op_ret = 0;
-
- if ((op_ret == 0) && (file->disabled || file->disable_till)) {
- if (size > file->disable_till) {
- file->disable_till = 0;
- } else {
- file->disable_till -= size;
- }
- wb_disabled = 1;
- }
- }
- UNLOCK (&file->lock);
- if (op_ret == -1) {
- STACK_UNWIND (frame, op_ret, op_errno, NULL);
- return 0;
- }
+int
+wb_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
- if (wb_disabled) {
- STACK_WIND (frame, wb_writev_cbk,
- FIRST_CHILD (frame->this),
- FIRST_CHILD (frame->this)->fops->writev,
- fd, vector, count, offset, iobref);
- return 0;
- }
- process_frame = copy_frame (frame);
- if (process_frame == NULL) {
- STACK_UNWIND (frame, -1, ENOMEM, NULL);
- return 0;
- }
+ wb_inode = wb_inode_ctx_get (this, fd->inode);
+ if (!wb_inode)
+ goto noqueue;
- local = CALLOC (1, sizeof (*local));
- if (local == NULL) {
- STACK_UNWIND (frame, -1, ENOMEM, NULL);
- STACK_DESTROY (process_frame->root);
- return 0;
- }
+ stub = fop_fstat_stub (frame, wb_fstat_helper, fd, xdata);
+ if (!stub)
+ goto unwind;
- frame->local = local;
- local->file = file;
+ if (!wb_enqueue (wb_inode, stub))
+ goto unwind;
- stub = fop_writev_stub (frame, NULL, fd, vector, count, offset,
- iobref);
- if (stub == NULL) {
- STACK_UNWIND (frame, -1, ENOMEM, NULL);
+ wb_process_queue (wb_inode);
- STACK_DESTROY (process_frame->root);
- return 0;
- }
+ return 0;
- request = wb_enqueue (file, stub);
- if (request == NULL) {
- STACK_UNWIND (frame, -1, ENOMEM, NULL);
+unwind:
+ STACK_UNWIND_STRICT (fstat, frame, -1, ENOMEM, NULL, NULL);
- STACK_DESTROY (process_frame->root);
+ if (stub)
call_stub_destroy (stub);
- return 0;
- }
-
- ret = wb_process_queue (process_frame, file, 0);
- if ((ret == -1) && (errno == ENOMEM)) {
- STACK_UNWIND (frame, -1, ENOMEM, NULL);
+ return 0;
- STACK_DESTROY (process_frame->root);
- call_stub_destroy (stub);
- }
+noqueue:
+ STACK_WIND (frame, default_fstat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
+ return 0;
+}
- STACK_DESTROY (process_frame->root);
+int
+wb_truncate_helper (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ off_t offset, dict_t *xdata)
+{
+ STACK_WIND (frame, default_truncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
return 0;
}
-int32_t
-wb_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iovec *vector, int32_t count,
- struct stat *stbuf, struct iobref *iobref)
+int
+wb_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
{
- wb_local_t *local = NULL;
- wb_file_t *file = NULL;
- wb_request_t *request = NULL;
- int32_t ret = 0;
-
- local = frame->local;
- file = local->file;
- request = local->request;
-
- if (request) {
- wb_request_unref (request);
-
- ret = wb_process_queue (frame, file, 0);
- if ((ret == -1) && (errno == ENOMEM)) {
- STACK_UNWIND (frame, -1, ENOMEM, NULL, -1, NULL, NULL);
- return 0;
- }
- }
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
+
+ wb_inode = wb_inode_create (this, loc->inode);
+ if (!wb_inode)
+ goto unwind;
+
+ stub = fop_truncate_stub (frame, wb_truncate_helper, loc,
+ offset, xdata);
+ if (!stub)
+ goto unwind;
+
+ if (!wb_enqueue (wb_inode, stub))
+ goto unwind;
- STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf, iobref);
+ wb_process_queue (wb_inode);
+
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT (truncate, frame, -1, ENOMEM, NULL, NULL, NULL);
+
+ if (stub)
+ call_stub_destroy (stub);
return 0;
}
-static int32_t
-wb_readv_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
+int
+wb_ftruncate_helper (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, dict_t *xdata)
{
- STACK_WIND (frame,
- wb_readv_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readv,
- fd, size, offset);
-
+ STACK_WIND (frame, default_ftruncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
return 0;
}
-int32_t
-wb_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
+int
+wb_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
{
- wb_file_t *file = NULL;
- wb_local_t *local = NULL;
- uint64_t tmp_file = 0;
- call_stub_t *stub = NULL;
- int32_t ret = -1;
- wb_request_t *request = NULL;
-
- if ((!S_ISDIR (fd->inode->st_mode))
- && fd_ctx_get (fd, this, &tmp_file)) {
- gf_log (this->name, GF_LOG_DEBUG, "write behind file pointer is"
- " not stored in context of fd(%p), returning EBADFD",
- fd);
-
- STACK_UNWIND (frame, -1, EBADFD, NULL);
- return 0;
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
+ int32_t op_errno = 0;
+
+ wb_inode = wb_inode_create (this, fd->inode);
+ if (!wb_inode) {
+ op_errno = ENOMEM;
+ goto unwind;
}
- file = (wb_file_t *)(long)tmp_file;
+ if (wb_fd_err (fd, this, &op_errno))
+ goto unwind;
- local = CALLOC (1, sizeof (*local));
- if (local == NULL) {
- STACK_UNWIND (frame, -1, ENOMEM);
- return 0;
+ stub = fop_ftruncate_stub (frame, wb_ftruncate_helper, fd,
+ offset, xdata);
+ if (!stub) {
+ op_errno = ENOMEM;
+ goto unwind;
}
- local->file = file;
-
- frame->local = local;
- if (file) {
- stub = fop_readv_stub (frame, wb_readv_helper, fd, size,
- offset);
- if (stub == NULL) {
- STACK_UNWIND (frame, -1, ENOMEM, NULL);
- return 0;
- }
+ if (!wb_enqueue (wb_inode, stub)) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
- request = wb_enqueue (file, stub);
- if (request == NULL) {
- STACK_UNWIND (frame, -1, ENOMEM, NULL);
- call_stub_destroy (stub);
- return 0;
- }
+ wb_process_queue (wb_inode);
- ret = wb_process_queue (frame, file, 1);
- if ((ret == -1) && (errno == ENOMEM)) {
- STACK_UNWIND (frame, -1, ENOMEM, NULL);
- call_stub_destroy (stub);
- return 0;
- }
+ return 0;
- } else {
- STACK_WIND (frame,
- wb_readv_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readv,
- fd, size, offset);
- }
+unwind:
+ STACK_UNWIND_STRICT (ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
+ if (stub)
+ call_stub_destroy (stub);
return 0;
}
-int32_t
-wb_ffr_bg_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+int
+wb_setattr_helper (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- STACK_DESTROY (frame->root);
+ STACK_WIND (frame, default_setattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
return 0;
}
-int32_t
-wb_ffr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno)
+int
+wb_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- wb_local_t *local = NULL;
- wb_file_t *file = NULL;
- wb_conf_t *conf = NULL;
- char unwind = 0;
- int32_t ret = -1;
- int disabled = 0;
- int64_t disable_till = 0;
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
- conf = this->private;
- local = frame->local;
- file = local->file;
+ wb_inode = wb_inode_ctx_get (this, loc->inode);
+ if (!wb_inode)
+ goto noqueue;
- LOCK (&file->lock);
- {
- disabled = file->disabled;
- disable_till = file->disable_till;
- }
- UNLOCK (&file->lock);
-
- if (conf->flush_behind
- && (!disabled) && (disable_till == 0)) {
- unwind = 1;
- } else {
- local->reply_count++;
- /*
- without flush-behind, unwind should wait for replies of
- writes queued before and the flush
- */
- if (local->reply_count == 2) {
- unwind = 1;
- }
- }
+ stub = fop_setattr_stub (frame, wb_setattr_helper, loc, stbuf,
+ valid, xdata);
+ if (!stub)
+ goto unwind;
- if (unwind) {
- LOCK (&file->lock);
- {
- if (file->op_ret == -1) {
- op_ret = file->op_ret;
- op_errno = file->op_errno;
+ if (!wb_enqueue (wb_inode, stub))
+ goto unwind;
- file->op_ret = 0;
- }
- }
- UNLOCK (&file->lock);
+ wb_process_queue (wb_inode);
- ret = wb_process_queue (frame, file, 0);
- if ((ret == -1) && (errno == ENOMEM)) {
- op_ret = -1;
- op_errno = ENOMEM;
- }
-
- STACK_UNWIND (frame, op_ret, op_errno);
- }
+ return 0;
+unwind:
+ STACK_UNWIND_STRICT (setattr, frame, -1, ENOMEM, NULL, NULL, NULL);
+ if (stub)
+ call_stub_destroy (stub);
+ return 0;
+
+noqueue:
+ STACK_WIND (frame, default_setattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
return 0;
}
-int32_t
-wb_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
+int
+wb_fsetattr_helper (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- wb_conf_t *conf = NULL;
- wb_file_t *file = NULL;
- wb_local_t *local = NULL;
- uint64_t tmp_file = 0;
- call_stub_t *stub = NULL;
- call_frame_t *process_frame = NULL;
- wb_local_t *tmp_local = NULL;
- wb_request_t *request = NULL;
- int32_t ret = 0;
- int disabled = 0;
- int64_t disable_till = 0;
+ STACK_WIND (frame, default_fsetattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata);
+ return 0;
+}
- conf = this->private;
- if ((!S_ISDIR (fd->inode->st_mode))
- && fd_ctx_get (fd, this, &tmp_file)) {
- gf_log (this->name, GF_LOG_DEBUG, "write behind file pointer is"
- " not stored in context of fd(%p), returning EBADFD",
- fd);
+int
+wb_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
- STACK_UNWIND (frame, -1, EBADFD);
- return 0;
- }
+ wb_inode = wb_inode_ctx_get (this, fd->inode);
+ if (!wb_inode)
+ goto noqueue;
- file = (wb_file_t *)(long)tmp_file;
+ stub = fop_fsetattr_stub (frame, wb_fsetattr_helper, fd, stbuf,
+ valid, xdata);
+ if (!stub)
+ goto unwind;
- local = CALLOC (1, sizeof (*local));
- if (local == NULL) {
- STACK_UNWIND (frame, -1, ENOMEM, NULL);
- return 0;
- }
+ if (!wb_enqueue (wb_inode, stub))
+ goto unwind;
- local->file = file;
+ wb_process_queue (wb_inode);
- frame->local = local;
- stub = fop_flush_cbk_stub (frame, wb_ffr_cbk, 0, 0);
- if (stub == NULL) {
- STACK_UNWIND (frame, -1, ENOMEM);
- return 0;
- }
+ return 0;
+unwind:
+ STACK_UNWIND_STRICT (fsetattr, frame, -1, ENOMEM, NULL, NULL, NULL);
- process_frame = copy_frame (frame);
- if (process_frame == NULL) {
- STACK_UNWIND (frame, -1, ENOMEM);
+ if (stub)
call_stub_destroy (stub);
- return 0;
- }
+ return 0;
- LOCK (&file->lock);
- {
- disabled = file->disabled;
- disable_till = file->disable_till;
- }
- UNLOCK (&file->lock);
-
- if (conf->flush_behind
- && (!disabled) && (disable_till == 0)) {
- tmp_local = CALLOC (1, sizeof (*local));
- if (tmp_local == NULL) {
- STACK_UNWIND (frame, -1, ENOMEM);
-
- STACK_DESTROY (process_frame->root);
- call_stub_destroy (stub);
- return 0;
- }
- tmp_local->file = file;
+noqueue:
+ STACK_WIND (frame, default_fsetattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata);
+ return 0;
+}
- process_frame->local = tmp_local;
- }
- fd_ref (fd);
+int
+wb_forget (xlator_t *this, inode_t *inode)
+{
+ uint64_t tmp = 0;
+ wb_inode_t *wb_inode = NULL;
- request = wb_enqueue (file, stub);
- if (request == NULL) {
- STACK_UNWIND (frame, -1, ENOMEM);
+ inode_ctx_del (inode, this, &tmp);
- fd_unref (fd);
- call_stub_destroy (stub);
- STACK_DESTROY (process_frame->root);
- return 0;
- }
+ wb_inode = (wb_inode_t *)(long)tmp;
- ret = wb_process_queue (process_frame, file, 1);
- if ((ret == -1) && (errno == ENOMEM)) {
- STACK_UNWIND (frame, -1, ENOMEM);
+ if (!wb_inode)
+ return 0;
- fd_unref (fd);
- call_stub_destroy (stub);
- STACK_DESTROY (process_frame->root);
- return 0;
- }
-
- if (conf->flush_behind
- && (!disabled) && (disable_till == 0)) {
- STACK_WIND (process_frame,
- wb_ffr_bg_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush,
- fd);
- } else {
- STACK_WIND (frame,
- wb_ffr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush,
- fd);
- STACK_DESTROY (process_frame->root);
- }
+ GF_ASSERT (list_empty (&wb_inode->todo));
+ GF_ASSERT (list_empty (&wb_inode->liability));
+ GF_ASSERT (list_empty (&wb_inode->temptation));
- fd_unref (fd);
+ GF_FREE (wb_inode);
return 0;
}
-static int32_t
-wb_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno)
+int
+wb_release (xlator_t *this, fd_t *fd)
{
- wb_local_t *local = NULL;
- wb_file_t *file = NULL;
- wb_request_t *request = NULL;
- int32_t ret = -1;
+ uint64_t tmp = 0;
- local = frame->local;
- file = local->file;
- request = local->request;
+ fd_ctx_del (fd, this, &tmp);
- LOCK (&file->lock);
- {
- if (file->op_ret == -1) {
- op_ret = file->op_ret;
- op_errno = file->op_errno;
+ return 0;
+}
- file->op_ret = 0;
- }
- }
- UNLOCK (&file->lock);
-
- if (request) {
- wb_request_unref (request);
- ret = wb_process_queue (frame, file, 0);
- if ((ret == -1) && (errno == ENOMEM)) {
- op_ret = -1;
- op_errno = ENOMEM;
- }
- }
- STACK_UNWIND (frame, op_ret, op_errno);
-
- return 0;
+int
+wb_priv_dump (xlator_t *this)
+{
+ wb_conf_t *conf = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, };
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("write-behind", this, out);
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO (this->name, conf, out);
+
+ gf_proc_dump_build_key (key_prefix, "xlator.performance.write-behind",
+ "priv");
+
+ gf_proc_dump_add_section (key_prefix);
+
+ gf_proc_dump_write ("aggregate_size", "%d", conf->aggregate_size);
+ gf_proc_dump_write ("window_size", "%d", conf->window_size);
+ gf_proc_dump_write ("flush_behind", "%d", conf->flush_behind);
+ gf_proc_dump_write ("trickling_writes", "%d", conf->trickling_writes);
+
+ ret = 0;
+out:
+ return ret;
}
-static int32_t
-wb_fsync_helper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t datasync)
+void
+__wb_dump_requests (struct list_head *head, char *prefix)
{
- STACK_WIND (frame,
- wb_fsync_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsync,
- fd, datasync);
- return 0;
+ char key[GF_DUMP_MAX_BUF_LEN] = {0, };
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, }, flag = 0;
+ wb_request_t *req = NULL;
+
+ list_for_each_entry (req, head, all) {
+ gf_proc_dump_build_key (key_prefix, key,
+ (char *)gf_fop_list[req->fop]);
+
+ gf_proc_dump_add_section(key_prefix);
+
+ gf_proc_dump_write ("request-ptr", "%p", req);
+
+ gf_proc_dump_write ("refcount", "%d", req->refcount);
+
+ if (list_empty (&req->todo))
+ gf_proc_dump_write ("wound", "yes");
+ else
+ gf_proc_dump_write ("wound", "no");
+
+ if (req->fop == GF_FOP_WRITE) {
+ gf_proc_dump_write ("size", "%"GF_PRI_SIZET,
+ req->write_size);
+
+ gf_proc_dump_write ("offset", "%"PRId64,
+ req->stub->args.offset);
+
+ flag = req->ordering.lied;
+ gf_proc_dump_write ("lied", "%d", flag);
+
+ flag = req->ordering.append;
+ gf_proc_dump_write ("append", "%d", flag);
+
+ flag = req->ordering.fulfilled;
+ gf_proc_dump_write ("fulfilled", "%d", flag);
+
+ flag = req->ordering.go;
+ gf_proc_dump_write ("go", "%d", flag);
+ }
+ }
}
-int32_t
-wb_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync)
+int
+wb_inode_dump (xlator_t *this, inode_t *inode)
{
- wb_file_t *file = NULL;
- wb_local_t *local = NULL;
- uint64_t tmp_file = 0;
- call_stub_t *stub = NULL;
- wb_request_t *request = NULL;
- int32_t ret = -1;
-
- if ((!S_ISDIR (fd->inode->st_mode))
- && fd_ctx_get (fd, this, &tmp_file)) {
- gf_log (this->name, GF_LOG_DEBUG, "write behind file pointer is"
- " not stored in context of fd(%p), returning EBADFD",
- fd);
-
- STACK_UNWIND (frame, -1, EBADFD);
- return 0;
+ wb_inode_t *wb_inode = NULL;
+ int32_t ret = -1;
+ char *path = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, };
+ char uuid_str[64] = {0,};
+
+ if ((inode == NULL) || (this == NULL)) {
+ ret = 0;
+ goto out;
}
- file = (wb_file_t *)(long)tmp_file;
+ wb_inode = wb_inode_ctx_get (this, inode);
+ if (wb_inode == NULL) {
+ ret = 0;
+ goto out;
+ }
+
+ gf_proc_dump_build_key (key_prefix, "xlator.performance.write-behind",
+ "wb_inode");
+
+ gf_proc_dump_add_section (key_prefix);
- local = CALLOC (1, sizeof (*local));
- if (local == NULL) {
- STACK_UNWIND (frame, -1, ENOMEM);
- return 0;
+ __inode_path (inode, NULL, &path);
+ if (path != NULL) {
+ gf_proc_dump_write ("path", "%s", path);
+ GF_FREE (path);
}
- local->file = file;
+ gf_proc_dump_write ("inode", "%p", inode);
- frame->local = local;
+ gf_proc_dump_write ("window_conf", "%"GF_PRI_SIZET,
+ wb_inode->window_conf);
+
+ gf_proc_dump_write ("window_current", "%"GF_PRI_SIZET,
+ wb_inode->window_current);
- if (file) {
- stub = fop_fsync_stub (frame, wb_fsync_helper, fd, datasync);
- if (stub == NULL) {
- STACK_UNWIND (frame, -1, ENOMEM);
- return 0;
- }
-
- request = wb_enqueue (file, stub);
- if (request == NULL) {
- STACK_UNWIND (frame, -1, ENOMEM);
- call_stub_destroy (stub);
- return 0;
- }
- ret = wb_process_queue (frame, file, 1);
- if ((ret == -1) && (errno == ENOMEM)) {
- STACK_UNWIND (frame, -1, ENOMEM);
- call_stub_destroy (stub);
- return 0;
+ ret = TRY_LOCK (&wb_inode->lock);
+ if (!ret)
+ {
+ if (!list_empty (&wb_inode->all)) {
+ __wb_dump_requests (&wb_inode->all, key_prefix);
}
-
- } else {
- STACK_WIND (frame,
- wb_fsync_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsync,
- fd, datasync);
+ UNLOCK (&wb_inode->lock);
}
- return 0;
+ if (ret && wb_inode)
+ gf_proc_dump_write ("Unable to dump the inode information",
+ "(Lock acquisition failed) %p (gfid: %s)",
+ wb_inode,
+ uuid_utoa_r (inode->gfid, uuid_str));
+ ret = 0;
+out:
+ return ret;
}
-int32_t
-wb_release (xlator_t *this, fd_t *fd)
+int
+mem_acct_init (xlator_t *this)
{
- uint64_t file_ptr = 0;
- wb_file_t *file = NULL;
+ int ret = -1;
- fd_ctx_get (fd, this, &file_ptr);
- file = (wb_file_t *) (long) file_ptr;
+ if (!this) {
+ goto out;
+ }
- LOCK (&file->lock);
- {
- assert (list_empty (&file->request));
+ ret = xlator_mem_acct_init (this, gf_wb_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
+ "failed");
}
- UNLOCK (&file->lock);
- wb_file_destroy (file);
+out:
+ return ret;
+}
- return 0;
+
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ wb_conf_t *conf = NULL;
+ int ret = -1;
+
+ conf = this->private;
+
+ GF_OPTION_RECONF ("cache-size", conf->window_size, options, size, out);
+
+ GF_OPTION_RECONF ("flush-behind", conf->flush_behind, options, bool,
+ out);
+
+ GF_OPTION_RECONF ("trickling-writes", conf->trickling_writes, options,
+ bool, out);
+
+ GF_OPTION_RECONF ("strict-O_DIRECT", conf->strict_O_DIRECT, options,
+ bool, out);
+
+ GF_OPTION_RECONF ("strict-write-ordering", conf->strict_write_ordering,
+ options, bool, out);
+ ret = 0;
+out:
+ return ret;
}
-int32_t
+int32_t
init (xlator_t *this)
{
- dict_t *options = NULL;
- wb_conf_t *conf = NULL;
- char *window_size_string = NULL;
- char *flush_behind_string = NULL;
- char *disable_till_string = NULL;
- char *enable_O_SYNC_string = NULL;
- int32_t ret = -1;
+ wb_conf_t *conf = NULL;
+ int32_t ret = -1;
if ((this->children == NULL)
|| this->children->next) {
gf_log (this->name, GF_LOG_ERROR,
"FATAL: write-behind (%s) not configured with exactly "
- "one child",
- this->name);
- return -1;
+ "one child", this->name);
+ goto out;
}
if (this->parents == NULL) {
gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile");
+ "dangling volume. check volfilex");
}
-
- options = this->options;
- conf = CALLOC (1, sizeof (*conf));
+ conf = GF_CALLOC (1, sizeof (*conf), gf_wb_mt_wb_conf_t);
if (conf == NULL) {
- gf_log (this->name, GF_LOG_ERROR,
- "FATAL: Out of memory");
- return -1;
- }
-
- conf->enable_O_SYNC = _gf_false;
- ret = dict_get_str (options, "enable-O_SYNC",
- &enable_O_SYNC_string);
- if (ret == 0) {
- ret = gf_string2boolean (enable_O_SYNC_string,
- &conf->enable_O_SYNC);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "'enable-O_SYNC' takes only boolean arguments");
- return -1;
- }
+ goto out;
}
/* configure 'options aggregate-size <size>' */
conf->aggregate_size = WB_AGGREGATE_SIZE;
- conf->disable_till = 1;
- ret = dict_get_str (options, "disable-for-first-nbytes",
- &disable_till_string);
- if (ret == 0) {
- ret = gf_string2bytesize (disable_till_string,
- &conf->disable_till);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "invalid number format \"%s\" of \"option "
- "disable-for-first-nbytes\"",
- disable_till_string);
- return -1;
- }
- }
- gf_log (this->name, GF_LOG_DEBUG,
- "disabling write-behind for first %"PRIu64" bytes",
- conf->disable_till);
-
/* configure 'option window-size <size>' */
- conf->window_size = WB_WINDOW_SIZE;
- ret = dict_get_str (options, "cache-size",
- &window_size_string);
- if (ret == 0) {
- ret = gf_string2bytesize (window_size_string,
- &conf->window_size);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "invalid number format \"%s\" of \"option "
- "window-size\"",
- window_size_string);
- FREE (conf);
- return -1;
- }
- }
+ GF_OPTION_INIT ("cache-size", conf->window_size, size, out);
if (!conf->window_size && conf->aggregate_size) {
gf_log (this->name, GF_LOG_WARNING,
@@ -2368,49 +2018,54 @@ init (xlator_t *this)
if (conf->window_size < conf->aggregate_size) {
gf_log (this->name, GF_LOG_ERROR,
"aggregate-size(%"PRIu64") cannot be more than "
- "window-size"
- "(%"PRIu64")", conf->window_size, conf->aggregate_size);
- FREE (conf);
- return -1;
+ "window-size(%"PRIu64")", conf->aggregate_size,
+ conf->window_size);
+ goto out;
}
/* configure 'option flush-behind <on/off>' */
- conf->flush_behind = 0;
- ret = dict_get_str (options, "flush-behind",
- &flush_behind_string);
- if (ret == 0) {
- ret = gf_string2boolean (flush_behind_string,
- &conf->flush_behind);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "'flush-behind' takes only boolean arguments");
- return -1;
- }
+ GF_OPTION_INIT ("flush-behind", conf->flush_behind, bool, out);
+
+ GF_OPTION_INIT ("trickling-writes", conf->trickling_writes, bool, out);
+
+ GF_OPTION_INIT ("strict-O_DIRECT", conf->strict_O_DIRECT, bool, out);
+
+ GF_OPTION_INIT ("strict-write-ordering", conf->strict_write_ordering,
+ bool, out);
- if (conf->flush_behind) {
- gf_log (this->name, GF_LOG_DEBUG,
- "enabling flush-behind");
- }
- }
this->private = conf;
- return 0;
+ ret = 0;
+
+out:
+ if (ret) {
+ GF_FREE (conf);
+ }
+ return ret;
}
void
fini (xlator_t *this)
{
- wb_conf_t *conf = this->private;
+ wb_conf_t *conf = NULL;
+
+ GF_VALIDATE_OR_GOTO ("write-behind", this, out);
- FREE (conf);
+ conf = this->private;
+ if (!conf) {
+ goto out;
+ }
+
+ this->private = NULL;
+ GF_FREE (conf);
+
+out:
return;
}
struct xlator_fops fops = {
.writev = wb_writev,
- .open = wb_open,
- .create = wb_create,
.readv = wb_readv,
.flush = wb_flush,
.fsync = wb_fsync,
@@ -2418,32 +2073,56 @@ struct xlator_fops fops = {
.fstat = wb_fstat,
.truncate = wb_truncate,
.ftruncate = wb_ftruncate,
- .utimens = wb_utimens,
+ .setattr = wb_setattr,
+ .fsetattr = wb_fsetattr,
};
-struct xlator_mops mops = {
-};
struct xlator_cbks cbks = {
+ .forget = wb_forget,
.release = wb_release
};
+
+struct xlator_dumpops dumpops = {
+ .priv = wb_priv_dump,
+ .inodectx = wb_inode_dump,
+};
+
+
struct volume_options options[] = {
- { .key = {"flush-behind"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {"cache-size", "window-size"},
- .type = GF_OPTION_TYPE_SIZET,
- .min = 512 * GF_UNIT_KB,
- .max = 1 * GF_UNIT_GB
+ { .key = {"flush-behind"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "If this option is set ON, instructs write-behind "
+ "translator to perform flush in background, by "
+ "returning success (or any errors, if any of "
+ "previous writes were failed) to application even "
+ "before flush FOP is sent to backend filesystem. "
},
- { .key = {"disable-for-first-nbytes"},
+ { .key = {"cache-size", "window-size"},
.type = GF_OPTION_TYPE_SIZET,
- .min = 1,
- .max = 1 * GF_UNIT_MB,
+ .min = 512 * GF_UNIT_KB,
+ .max = 1 * GF_UNIT_GB,
+ .default_value = "1MB",
+ .description = "Size of the write-behind buffer for a single file "
+ "(inode)."
},
- { .key = {"enable-O_SYNC"},
+ { .key = {"trickling-writes"},
.type = GF_OPTION_TYPE_BOOL,
- },
+ .default_value = "on",
+ },
+ { .key = {"strict-O_DIRECT"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "This option when set to off, ignores the "
+ "O_DIRECT flag."
+ },
+ { .key = {"strict-write-ordering"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "Do not let later writes overtake earlier writes even "
+ "if they do not overlap",
+ },
{ .key = {NULL} },
};
diff --git a/xlators/playground/Makefile.am b/xlators/playground/Makefile.am
new file mode 100644
index 000000000..e7de6b31a
--- /dev/null
+++ b/xlators/playground/Makefile.am
@@ -0,0 +1,2 @@
+SUBDIRS = template
+CLEANFILES =
diff --git a/xlators/playground/template/Makefile.am b/xlators/playground/template/Makefile.am
new file mode 100644
index 000000000..f26892443
--- /dev/null
+++ b/xlators/playground/template/Makefile.am
@@ -0,0 +1,2 @@
+SUBDIRS = src
+
diff --git a/xlators/playground/template/src/Makefile.am b/xlators/playground/template/src/Makefile.am
new file mode 100644
index 000000000..21f1c5f6b
--- /dev/null
+++ b/xlators/playground/template/src/Makefile.am
@@ -0,0 +1,16 @@
+xlator_LTLIBRARIES = template.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/testing/features
+
+template_la_LDFLAGS = -module -avoid-version
+
+template_la_SOURCES = template.c
+template_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = template.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
+
diff --git a/xlators/playground/template/src/template.c b/xlators/playground/template/src/template.c
new file mode 100644
index 000000000..37a7794a0
--- /dev/null
+++ b/xlators/playground/template/src/template.c
@@ -0,0 +1,49 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "template.h"
+
+int32_t
+init (xlator_t *this)
+{
+
+ if (!this->children || this->children->next) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "not configured with exactly one child. exiting");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ return 0;
+}
+
+void
+fini (xlator_t *this)
+{
+ return;
+}
+
+struct xlator_fops fops = {
+};
+
+struct xlator_cbks cbks = {
+};
+
+struct volume_options options[] = {
+ { .key = {NULL} },
+};
diff --git a/xlators/playground/template/src/template.h b/xlators/playground/template/src/template.h
new file mode 100644
index 000000000..d6aced501
--- /dev/null
+++ b/xlators/playground/template/src/template.h
@@ -0,0 +1,24 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef __TEMPLATE_H__
+#define __TEMPLATE_H__
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "dict.h"
+#include "xlator.h"
+#include "defaults.h"
+
+#endif /* __TEMPLATE_H__ */
diff --git a/xlators/protocol/Makefile.am b/xlators/protocol/Makefile.am
index 745e277c2..91b03b141 100644
--- a/xlators/protocol/Makefile.am
+++ b/xlators/protocol/Makefile.am
@@ -1,3 +1 @@
-SUBDIRS = client server
-
-CLEANFILES =
+SUBDIRS = auth client server
diff --git a/auth/Makefile.am b/xlators/protocol/auth/Makefile.am
index 6bd54eee3..e9e0ba97e 100644
--- a/auth/Makefile.am
+++ b/xlators/protocol/auth/Makefile.am
@@ -1,3 +1 @@
SUBDIRS = addr login
-
-CLEANFILES =
diff --git a/xlators/protocol/auth/addr/Makefile.am b/xlators/protocol/auth/addr/Makefile.am
new file mode 100644
index 000000000..af437a64d
--- /dev/null
+++ b/xlators/protocol/auth/addr/Makefile.am
@@ -0,0 +1 @@
+SUBDIRS = src
diff --git a/xlators/protocol/auth/addr/src/Makefile.am b/xlators/protocol/auth/addr/src/Makefile.am
new file mode 100644
index 000000000..426e7c2fb
--- /dev/null
+++ b/xlators/protocol/auth/addr/src/Makefile.am
@@ -0,0 +1,14 @@
+auth_LTLIBRARIES = addr.la
+authdir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/auth
+
+addr_la_LDFLAGS = -module -avoid-version
+
+addr_la_SOURCES = addr.c
+addr_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) \
+ -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/xlators/protocol/server/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src/
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
diff --git a/xlators/protocol/auth/addr/src/addr.c b/xlators/protocol/auth/addr/src/addr.c
new file mode 100644
index 000000000..64e8d0fc6
--- /dev/null
+++ b/xlators/protocol/auth/addr/src/addr.c
@@ -0,0 +1,229 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <fnmatch.h>
+#include <sys/socket.h>
+#include <netdb.h>
+#include "authenticate.h"
+#include "dict.h"
+#include "rpc-transport.h"
+
+#define ADDR_DELIMITER " ,"
+#define PRIVILEGED_PORT_CEILING 1024
+
+#ifndef AF_INET_SDP
+#define AF_INET_SDP 27
+#endif
+
+auth_result_t
+gf_auth (dict_t *input_params, dict_t *config_params)
+{
+ auth_result_t result = AUTH_DONT_CARE;
+ int ret = 0;
+ char *name = NULL;
+ char *searchstr = NULL;
+ peer_info_t *peer_info = NULL;
+ data_t *peer_info_data = NULL;
+ data_t *allow_addr = NULL;
+ data_t *reject_addr = NULL;
+ char *addr_str = NULL;
+ char *tmp = NULL;
+ char *addr_cpy = NULL;
+ char *service = NULL;
+ uint16_t peer_port = 0;
+ char is_inet_sdp = 0;
+ char negate = 0;
+ char match = 0;
+ char peer_addr[UNIX_PATH_MAX];
+ char *type = NULL;
+ gf_boolean_t allow_insecure = _gf_false;
+
+ name = data_to_str (dict_get (input_params, "remote-subvolume"));
+ if (!name) {
+ gf_log ("authenticate/addr", GF_LOG_DEBUG,
+ "remote-subvolume not specified");
+ goto out;
+ }
+
+ ret = gf_asprintf (&searchstr, "auth.addr.%s.allow", name);
+ if (-1 == ret) {
+ gf_log ("auth/addr", GF_LOG_DEBUG,
+ "asprintf failed while setting search string");
+ goto out;
+ }
+
+ allow_addr = dict_get (config_params, searchstr);
+ GF_FREE (searchstr);
+
+ ret = gf_asprintf (&searchstr, "auth.addr.%s.reject", name);
+ if (-1 == ret) {
+ gf_log ("auth/addr", GF_LOG_ERROR,
+ "asprintf failed while setting search string");
+ goto out;
+ }
+ reject_addr = dict_get (config_params, searchstr);
+ GF_FREE (searchstr);
+
+ if (!allow_addr) {
+ /* TODO: backword compatibility */
+ ret = gf_asprintf (&searchstr, "auth.ip.%s.allow", name);
+ if (-1 == ret) {
+ gf_log ("auth/addr", GF_LOG_ERROR,
+ "asprintf failed while setting search string");
+ goto out;
+ }
+ allow_addr = dict_get (config_params, searchstr);
+ GF_FREE (searchstr);
+ }
+
+ if (!(allow_addr || reject_addr)) {
+ gf_log ("auth/addr", GF_LOG_DEBUG,
+ "none of the options auth.addr.%s.allow or "
+ "auth.addr.%s.reject specified, returning auth_dont_care",
+ name, name);
+ goto out;
+ }
+
+ peer_info_data = dict_get (input_params, "peer-info");
+ if (!peer_info_data) {
+ gf_log ("auth/addr", GF_LOG_ERROR,
+ "peer-info not present");
+ goto out;
+ }
+
+ peer_info = data_to_ptr (peer_info_data);
+
+ switch (((struct sockaddr *) &peer_info->sockaddr)->sa_family)
+ {
+ case AF_INET_SDP:
+ is_inet_sdp = 1;
+ ((struct sockaddr *) &peer_info->sockaddr)->sa_family = AF_INET;
+
+ case AF_INET:
+ case AF_INET6:
+ {
+ strcpy (peer_addr, peer_info->identifier);
+ service = strrchr (peer_addr, ':');
+ *service = '\0';
+ service ++;
+
+ if (is_inet_sdp) {
+ ((struct sockaddr *) &peer_info->sockaddr)->sa_family = AF_INET_SDP;
+ }
+
+ ret = dict_get_str (config_params, "rpc-auth-allow-insecure",
+ &type);
+ if (ret == 0) {
+ ret = gf_string2boolean (type, &allow_insecure);
+ if (ret < 0) {
+ gf_log ("auth/addr", GF_LOG_WARNING,
+ "rpc-auth-allow-insecure option %s "
+ "is not a valid bool option", type);
+ goto out;
+ }
+ }
+
+ peer_port = atoi (service);
+ if (peer_port >= PRIVILEGED_PORT_CEILING && !allow_insecure) {
+ gf_log ("auth/addr", GF_LOG_ERROR,
+ "client is bound to port %d which is not privileged",
+ peer_port);
+ goto out;
+ }
+ break;
+
+ case AF_UNIX:
+ strcpy (peer_addr, peer_info->identifier);
+ break;
+
+ default:
+ gf_log ("authenticate/addr", GF_LOG_ERROR,
+ "unknown address family %d",
+ ((struct sockaddr *) &peer_info->sockaddr)->sa_family);
+ goto out;
+ }
+ }
+
+ if (reject_addr) {
+ addr_cpy = gf_strdup (reject_addr->data);
+ if (!addr_cpy)
+ goto out;
+
+ addr_str = strtok_r (addr_cpy, ADDR_DELIMITER, &tmp);
+
+ while (addr_str) {
+ gf_log (name, GF_LOG_DEBUG,
+ "rejected = \"%s\", received addr = \"%s\"",
+ addr_str, peer_addr);
+ if (addr_str[0] == '!') {
+ negate = 1;
+ addr_str++;
+ }
+
+ match = fnmatch (addr_str, peer_addr, 0);
+ if (negate ? match : !match) {
+ result = AUTH_REJECT;
+ goto out;
+ }
+ addr_str = strtok_r (NULL, ADDR_DELIMITER, &tmp);
+ }
+ GF_FREE (addr_cpy);
+ }
+
+ if (allow_addr) {
+ addr_cpy = gf_strdup (allow_addr->data);
+ if (!addr_cpy)
+ goto out;
+
+ addr_str = strtok_r (addr_cpy, ADDR_DELIMITER, &tmp);
+
+ while (addr_str) {
+ gf_log (name, GF_LOG_DEBUG,
+ "allowed = \"%s\", received addr = \"%s\"",
+ addr_str, peer_addr);
+ if (addr_str[0] == '!') {
+ negate = 1;
+ addr_str++;
+ }
+
+ match = fnmatch (addr_str, peer_addr, 0);
+ if (negate ? match : !match) {
+ result = AUTH_ACCEPT;
+ goto out;
+ }
+ addr_str = strtok_r (NULL, ADDR_DELIMITER, &tmp);
+ }
+ }
+
+out:
+ GF_FREE (addr_cpy);
+
+ return result;
+}
+
+struct volume_options options[] = {
+ { .key = {"auth.addr.*.allow"},
+ .type = GF_OPTION_TYPE_INTERNET_ADDRESS_LIST
+ },
+ { .key = {"auth.addr.*.reject"},
+ .type = GF_OPTION_TYPE_INTERNET_ADDRESS_LIST
+ },
+ /* Backword compatibility */
+ { .key = {"auth.ip.*.allow"},
+ .type = GF_OPTION_TYPE_INTERNET_ADDRESS_LIST
+ },
+ { .key = {NULL} }
+};
diff --git a/xlators/protocol/auth/login/Makefile.am b/xlators/protocol/auth/login/Makefile.am
new file mode 100644
index 000000000..af437a64d
--- /dev/null
+++ b/xlators/protocol/auth/login/Makefile.am
@@ -0,0 +1 @@
+SUBDIRS = src
diff --git a/xlators/protocol/auth/login/src/Makefile.am b/xlators/protocol/auth/login/src/Makefile.am
new file mode 100644
index 000000000..d84db91c4
--- /dev/null
+++ b/xlators/protocol/auth/login/src/Makefile.am
@@ -0,0 +1,12 @@
+auth_LTLIBRARIES = login.la
+authdir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/auth
+
+login_la_LDFLAGS = -module -avoid-version
+
+login_la_SOURCES = login.c
+login_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/xlators/protocol/server/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
diff --git a/xlators/protocol/auth/login/src/login.c b/xlators/protocol/auth/login/src/login.c
new file mode 100644
index 000000000..c2f0bf0d0
--- /dev/null
+++ b/xlators/protocol/auth/login/src/login.c
@@ -0,0 +1,128 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <fnmatch.h>
+#include "authenticate.h"
+
+auth_result_t gf_auth (dict_t *input_params, dict_t *config_params)
+{
+ auth_result_t result = AUTH_DONT_CARE;
+ int ret = 0;
+ data_t *allow_user = NULL;
+ data_t *username_data = NULL;
+ data_t *passwd_data = NULL;
+ data_t *password_data = NULL;
+ char *username = NULL;
+ char *password = NULL;
+ char *brick_name = NULL;
+ char *searchstr = NULL;
+ char *username_str = NULL;
+ char *tmp = NULL;
+ char *username_cpy = NULL;
+
+ username_data = dict_get (input_params, "username");
+ if (!username_data) {
+ gf_log ("auth/login", GF_LOG_DEBUG,
+ "username not found, returning DONT-CARE");
+ goto out;
+ }
+
+ username = data_to_str (username_data);
+
+ password_data = dict_get (input_params, "password");
+ if (!password_data) {
+ gf_log ("auth/login", GF_LOG_WARNING,
+ "password not found, returning DONT-CARE");
+ goto out;
+ }
+
+ password = data_to_str (password_data);
+
+ brick_name = data_to_str (dict_get (input_params, "remote-subvolume"));
+ if (!brick_name) {
+ gf_log ("auth/login", GF_LOG_ERROR,
+ "remote-subvolume not specified");
+ result = AUTH_REJECT;
+ goto out;
+ }
+
+ ret = gf_asprintf (&searchstr, "auth.login.%s.allow", brick_name);
+ if (-1 == ret) {
+ gf_log ("auth/login", GF_LOG_WARNING,
+ "asprintf failed while setting search string, "
+ "returning DONT-CARE");
+ goto out;
+ }
+
+ allow_user = dict_get (config_params, searchstr);
+ GF_FREE (searchstr);
+
+ if (allow_user) {
+ username_cpy = gf_strdup (allow_user->data);
+ if (!username_cpy)
+ goto out;
+
+ username_str = strtok_r (username_cpy, " ,", &tmp);
+
+ while (username_str) {
+ if (!fnmatch (username_str, username, 0)) {
+ ret = gf_asprintf (&searchstr,
+ "auth.login.%s.password",
+ username);
+ if (-1 == ret) {
+ gf_log ("auth/login", GF_LOG_WARNING,
+ "asprintf failed while setting search string");
+ goto out;
+ }
+ passwd_data = dict_get (config_params, searchstr);
+ GF_FREE (searchstr);
+
+ if (!passwd_data) {
+ gf_log ("auth/login", GF_LOG_ERROR,
+ "wrong username/password combination");
+ result = AUTH_REJECT;
+ goto out;
+ }
+
+ result = !((strcmp (data_to_str (passwd_data),
+ password)) ?
+ AUTH_ACCEPT :
+ AUTH_REJECT);
+ if (result == AUTH_REJECT)
+ gf_log ("auth/login", GF_LOG_ERROR,
+ "wrong password for user %s",
+ username);
+
+ break;
+ }
+ username_str = strtok_r (NULL, " ,", &tmp);
+ }
+ }
+
+out:
+ GF_FREE (username_cpy);
+
+ return result;
+}
+
+struct volume_options options[] = {
+ { .key = {"auth.login.*.allow"},
+ .type = GF_OPTION_TYPE_ANY
+ },
+ { .key = {"auth.login.*.password"},
+ .type = GF_OPTION_TYPE_ANY
+ },
+ { .key = {NULL} }
+};
diff --git a/xlators/protocol/client/Makefile.am b/xlators/protocol/client/Makefile.am
index d471a3f92..af437a64d 100644
--- a/xlators/protocol/client/Makefile.am
+++ b/xlators/protocol/client/Makefile.am
@@ -1,3 +1 @@
SUBDIRS = src
-
-CLEANFILES =
diff --git a/xlators/protocol/client/src/Makefile.am b/xlators/protocol/client/src/Makefile.am
index fb720942c..cf89d42da 100644
--- a/xlators/protocol/client/src/Makefile.am
+++ b/xlators/protocol/client/src/Makefile.am
@@ -2,15 +2,19 @@
xlator_LTLIBRARIES = client.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/protocol
-client_la_LDFLAGS = -module -avoidversion
+client_la_LDFLAGS = -module -avoid-version
-client_la_SOURCES = client-protocol.c saved-frames.c
-client_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+client_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
+ $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \
+ $(top_builddir)/rpc/xdr/src/libgfxdr.la
-noinst_HEADERS = client-protocol.h saved-frames.h
+client_la_SOURCES = client.c client-helpers.c client-rpc-fops.c \
+ client-handshake.c client-callback.c client-lk.c
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+noinst_HEADERS = client.h client-mem-types.h
-CLEANFILES =
+AM_CPPFLAGS = $(GF_CPPFLAGS) \
+ -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_srcdir)/rpc/rpc-lib/src/
+AM_CFLAGS = -Wall $(GF_CFLAGS)
diff --git a/xlators/protocol/client/src/client-callback.c b/xlators/protocol/client/src/client-callback.c
new file mode 100644
index 000000000..d886862f7
--- /dev/null
+++ b/xlators/protocol/client/src/client-callback.c
@@ -0,0 +1,56 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "client.h"
+#include "rpc-clnt.h"
+
+int
+client_cbk_null (struct rpc_clnt *rpc, void *mydata, void *data)
+{
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "this function should not be called");
+ return 0;
+}
+
+int
+client_cbk_fetchspec (struct rpc_clnt *rpc, void *mydata, void *data)
+{
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "this function should not be called");
+ return 0;
+}
+
+int
+client_cbk_ino_flush (struct rpc_clnt *rpc, void *mydata, void *data)
+{
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "this function should not be called");
+ return 0;
+}
+
+rpcclnt_cb_actor_t gluster_cbk_actors[] = {
+ [GF_CBK_NULL] = {"NULL", GF_CBK_NULL, client_cbk_null },
+ [GF_CBK_FETCHSPEC] = {"FETCHSPEC", GF_CBK_FETCHSPEC, client_cbk_fetchspec },
+ [GF_CBK_INO_FLUSH] = {"INO_FLUSH", GF_CBK_INO_FLUSH, client_cbk_ino_flush },
+};
+
+
+struct rpcclnt_cb_program gluster_cbk_prog = {
+ .progname = "GlusterFS Callback",
+ .prognum = GLUSTER_CBK_PROGRAM,
+ .progver = GLUSTER_CBK_VERSION,
+ .actors = gluster_cbk_actors,
+ .numactors = GF_CBK_MAXVALUE,
+};
diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c
new file mode 100644
index 000000000..5668fea53
--- /dev/null
+++ b/xlators/protocol/client/src/client-handshake.c
@@ -0,0 +1,1960 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "fd-lk.h"
+#include "client.h"
+#include "xlator.h"
+#include "defaults.h"
+#include "glusterfs.h"
+#include "statedump.h"
+#include "compat-errno.h"
+
+#include "glusterfs3.h"
+#include "portmap-xdr.h"
+#include "rpc-common-xdr.h"
+
+#define CLIENT_REOPEN_MAX_ATTEMPTS 1024
+extern rpc_clnt_prog_t clnt3_3_fop_prog;
+extern rpc_clnt_prog_t clnt_pmap_prog;
+
+int client_ping_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe);
+
+int client_set_lk_version_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe);
+
+int client_set_lk_version (xlator_t *this);
+
+typedef struct client_fd_lk_local {
+ int ref;
+ gf_boolean_t error;
+ gf_lock_t lock;
+ clnt_fd_ctx_t *fdctx;
+}clnt_fd_lk_local_t;
+
+/* Handshake */
+
+void
+rpc_client_ping_timer_expired (void *data)
+{
+ rpc_transport_t *trans = NULL;
+ rpc_clnt_connection_t *conn = NULL;
+ int disconnect = 0;
+ int transport_activity = 0;
+ struct timespec timeout = {0, };
+ struct timeval current = {0, };
+ struct rpc_clnt *clnt = NULL;
+ xlator_t *this = NULL;
+ clnt_conf_t *conf = NULL;
+
+ this = data;
+
+ if (!this || !this->private) {
+ gf_log (THIS->name, GF_LOG_WARNING, "xlator initialization not done");
+ goto out;
+ }
+
+ conf = this->private;
+
+ clnt = conf->rpc;
+ if (!clnt) {
+ gf_log (this->name, GF_LOG_WARNING, "rpc not initialized");
+ goto out;
+ }
+
+ conn = &clnt->conn;
+ trans = conn->trans;
+
+ if (!trans) {
+ gf_log (this->name, GF_LOG_WARNING, "transport not initialized");
+ goto out;
+ }
+
+ pthread_mutex_lock (&conn->lock);
+ {
+ if (conn->ping_timer)
+ gf_timer_call_cancel (this->ctx,
+ conn->ping_timer);
+ gettimeofday (&current, NULL);
+
+ if (((current.tv_sec - conn->last_received.tv_sec) <
+ conf->opt.ping_timeout)
+ || ((current.tv_sec - conn->last_sent.tv_sec) <
+ conf->opt.ping_timeout)) {
+ transport_activity = 1;
+ }
+
+ if (transport_activity) {
+ gf_log (trans->name, GF_LOG_TRACE,
+ "ping timer expired but transport activity "
+ "detected - not bailing transport");
+ timeout.tv_sec = conf->opt.ping_timeout;
+ timeout.tv_nsec = 0;
+
+ conn->ping_timer =
+ gf_timer_call_after (this->ctx, timeout,
+ rpc_client_ping_timer_expired,
+ (void *) this);
+ if (conn->ping_timer == NULL)
+ gf_log (trans->name, GF_LOG_WARNING,
+ "unable to setup ping timer");
+
+ } else {
+ conn->ping_started = 0;
+ conn->ping_timer = NULL;
+ disconnect = 1;
+ }
+ }
+ pthread_mutex_unlock (&conn->lock);
+
+ if (disconnect) {
+ gf_log (trans->name, GF_LOG_CRITICAL,
+ "server %s has not responded in the last %d "
+ "seconds, disconnecting.",
+ conn->trans->peerinfo.identifier,
+ conf->opt.ping_timeout);
+
+ rpc_transport_disconnect (conn->trans);
+ }
+
+out:
+ return;
+}
+
+void
+client_start_ping (void *data)
+{
+ xlator_t *this = NULL;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_connection_t *conn = NULL;
+ int32_t ret = -1;
+ struct timespec timeout = {0, };
+ call_frame_t *frame = NULL;
+ int frame_count = 0;
+
+ this = data;
+ if (!this || !this->private) {
+ gf_log (THIS->name, GF_LOG_WARNING, "xlator not initialized");
+ goto fail;
+ }
+
+ conf = this->private;
+ if (!conf->rpc) {
+ gf_log (this->name, GF_LOG_WARNING, "rpc not initialized");
+ goto fail;
+ }
+ conn = &conf->rpc->conn;
+
+ if (conf->opt.ping_timeout == 0) {
+ gf_log (this->name, GF_LOG_INFO, "ping timeout is 0, returning");
+ return;
+ }
+
+ pthread_mutex_lock (&conn->lock);
+ {
+ if (conn->ping_timer)
+ gf_timer_call_cancel (this->ctx, conn->ping_timer);
+
+ conn->ping_timer = NULL;
+ conn->ping_started = 0;
+
+ if (conn->saved_frames)
+ /* treat the case where conn->saved_frames is NULL
+ as no pending frames */
+ frame_count = conn->saved_frames->count;
+
+ if ((frame_count == 0) || !conn->connected) {
+ /* using goto looked ugly here,
+ * hence getting out this way */
+ /* unlock */
+ gf_log (this->name, GF_LOG_DEBUG,
+ "returning as transport is already disconnected"
+ " OR there are no frames (%d || %d)",
+ frame_count, !conn->connected);
+
+ pthread_mutex_unlock (&conn->lock);
+ return;
+ }
+
+ if (frame_count < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "saved_frames->count is %"PRId64,
+ conn->saved_frames->count);
+ conn->saved_frames->count = 0;
+ }
+
+ timeout.tv_sec = conf->opt.ping_timeout;
+ timeout.tv_nsec = 0;
+
+ conn->ping_timer =
+ gf_timer_call_after (this->ctx, timeout,
+ rpc_client_ping_timer_expired,
+ (void *) this);
+
+ if (conn->ping_timer == NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "unable to setup ping timer");
+ } else {
+ conn->ping_started = 1;
+ }
+ }
+ pthread_mutex_unlock (&conn->lock);
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame)
+ goto fail;
+
+ ret = client_submit_request (this, NULL, frame, conf->handshake,
+ GF_HNDSK_PING, client_ping_cbk, NULL,
+ NULL, 0, NULL, 0, NULL, (xdrproc_t)NULL);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "failed to start ping timer");
+ }
+
+ return;
+
+fail:
+ if (frame) {
+ STACK_DESTROY (frame->root);
+ }
+
+ return;
+}
+
+
+int
+client_ping_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ xlator_t *this = NULL;
+ rpc_clnt_connection_t *conn = NULL;
+ struct timespec timeout = {0, };
+ call_frame_t *frame = NULL;
+ clnt_conf_t *conf = NULL;
+
+ if (!myframe) {
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "frame with the request is NULL");
+ goto out;
+ }
+ frame = myframe;
+ this = frame->this;
+ if (!this || !this->private) {
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "xlator private is not set");
+ goto out;
+ }
+
+ conf = this->private;
+ conn = &conf->rpc->conn;
+
+ pthread_mutex_lock (&conn->lock);
+ {
+ if (req->rpc_status == -1) {
+ if (conn->ping_timer != NULL) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "socket or ib related error");
+ gf_timer_call_cancel (this->ctx,
+ conn->ping_timer);
+ conn->ping_timer = NULL;
+ } else {
+ /* timer expired and transport bailed out */
+ gf_log (this->name, GF_LOG_WARNING,
+ "timer must have expired");
+ }
+
+ goto unlock;
+ }
+
+
+ timeout.tv_sec = conf->opt.ping_timeout;
+ timeout.tv_nsec = 0;
+
+ gf_timer_call_cancel (this->ctx,
+ conn->ping_timer);
+
+ conn->ping_timer =
+ gf_timer_call_after (this->ctx, timeout,
+ client_start_ping, (void *)this);
+
+ if (conn->ping_timer == NULL)
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set the ping timer");
+ }
+unlock:
+ pthread_mutex_unlock (&conn->lock);
+out:
+ if (frame)
+ STACK_DESTROY (frame->root);
+ return 0;
+}
+
+
+int
+client3_getspec_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf_getspec_rsp rsp = {0,};
+ call_frame_t *frame = NULL;
+ int ret = 0;
+
+ frame = myframe;
+
+ if (!frame || !frame->this) {
+ gf_log (THIS->name, GF_LOG_ERROR, "frame not found with the request, "
+ "returning EINVAL");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+ if (-1 == req->rpc_status) {
+ gf_log (frame->this->name, GF_LOG_WARNING,
+ "received RPC status error, returning ENOTCONN");
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_getspec_rsp);
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "XDR decoding failed, returning EINVAL");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ if (-1 == rsp.op_ret) {
+ gf_log (frame->this->name, GF_LOG_WARNING,
+ "failed to get the 'volume file' from server");
+ goto out;
+ }
+
+out:
+ CLIENT_STACK_UNWIND (getspec, frame, rsp.op_ret, rsp.op_errno,
+ rsp.spec);
+
+ /* Don't use 'GF_FREE', this is allocated by libc */
+ free (rsp.spec);
+
+ return 0;
+}
+
+int32_t client3_getspec (call_frame_t *frame, xlator_t *this, void *data)
+{
+ clnt_conf_t *conf = NULL;
+ clnt_args_t *args = NULL;
+ gf_getspec_req req = {0,};
+ int op_errno = ESTALE;
+ int ret = 0;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+ req.flags = args->flags;
+ req.key = (char *)args->name;
+
+ ret = client_submit_request (this, &req, frame, conf->handshake,
+ GF_HNDSK_GETSPEC, client3_getspec_cbk,
+ NULL, NULL, 0, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_getspec_req);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to send the request");
+ }
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (getspec, frame, -1, op_errno, NULL);
+ return 0;
+
+}
+
+int
+client_notify_parents_child_up (xlator_t *this)
+{
+ clnt_conf_t *conf = NULL;
+ int ret = 0;
+
+ conf = this->private;
+ ret = default_notify (this, GF_EVENT_CHILD_UP, NULL);
+ if (ret)
+ gf_log (this->name, GF_LOG_INFO,
+ "notify of CHILD_UP failed");
+
+ conf->last_sent_event = GF_EVENT_CHILD_UP;
+ return 0;
+}
+
+int
+clnt_fd_lk_reacquire_failed (xlator_t *this, clnt_fd_ctx_t *fdctx,
+ clnt_conf_t *conf)
+{
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("client", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, conf, out);
+ GF_VALIDATE_OR_GOTO (this->name, fdctx, out);
+
+ pthread_mutex_lock (&conf->lock);
+ {
+ fdctx->remote_fd = -1;
+ fdctx->lk_heal_state = GF_LK_HEAL_DONE;
+ }
+ pthread_mutex_unlock (&conf->lock);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+client_set_lk_version_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int32_t ret = -1;
+ call_frame_t *fr = NULL;
+ gf_set_lk_ver_rsp rsp = {0,};
+
+ fr = (call_frame_t *) myframe;
+ GF_VALIDATE_OR_GOTO ("client", fr, out);
+
+ if (req->rpc_status == -1) {
+ gf_log (fr->this->name, GF_LOG_WARNING,
+ "received RPC status error");
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_set_lk_ver_rsp);
+ if (ret < 0)
+ gf_log (fr->this->name, GF_LOG_WARNING,
+ "xdr decoding failed");
+ else
+ gf_log (fr->this->name, GF_LOG_INFO,
+ "Server lk version = %d", rsp.lk_ver);
+
+ ret = 0;
+out:
+ if (fr)
+ STACK_DESTROY (fr->root);
+
+ return ret;
+}
+
+//TODO: Check for all released fdctx and destroy them
+int
+client_set_lk_version (xlator_t *this)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ call_frame_t *frame = NULL;
+ gf_set_lk_ver_req req = {0, };
+
+ GF_VALIDATE_OR_GOTO ("client", this, err);
+
+ conf = (clnt_conf_t *) this->private;
+
+ req.lk_ver = client_get_lk_ver (conf);
+ ret = gf_asprintf (&req.uid, "%s-%s-%d",
+ this->ctx->process_uuid, this->name,
+ this->graph->id);
+ if (ret == -1)
+ goto err;
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG, "Sending SET_LK_VERSION");
+
+ ret = client_submit_request (this, &req, frame,
+ conf->handshake,
+ GF_HNDSK_SET_LK_VER,
+ client_set_lk_version_cbk,
+ NULL, NULL, 0, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_set_lk_ver_req);
+out:
+ GF_FREE (req.uid);
+ return ret;
+err:
+ gf_log (this->name, GF_LOG_WARNING,
+ "Failed to send SET_LK_VERSION to server");
+
+ return ret;
+}
+
+int
+client_fd_lk_count (fd_lk_ctx_t *lk_ctx)
+{
+ int count = 0;
+ fd_lk_ctx_node_t *fd_lk = NULL;
+
+ GF_VALIDATE_OR_GOTO ("client", lk_ctx, err);
+
+ LOCK (&lk_ctx->lock);
+ {
+ list_for_each_entry (fd_lk, &lk_ctx->lk_list, next)
+ count++;
+ }
+ UNLOCK (&lk_ctx->lock);
+
+ return count;
+err:
+ return -1;
+}
+
+clnt_fd_lk_local_t *
+clnt_fd_lk_local_ref (xlator_t *this, clnt_fd_lk_local_t *local)
+{
+ GF_VALIDATE_OR_GOTO (this->name, local, out);
+
+ LOCK (&local->lock);
+ {
+ local->ref++;
+ }
+ UNLOCK (&local->lock);
+out:
+ return local;
+}
+
+int
+clnt_fd_lk_local_unref (xlator_t *this, clnt_fd_lk_local_t *local)
+{
+ int ref = -1;
+
+ GF_VALIDATE_OR_GOTO (this->name, local, out);
+
+ LOCK (&local->lock);
+ {
+ ref = --local->ref;
+ }
+ UNLOCK (&local->lock);
+
+ if (ref == 0) {
+ LOCK_DESTROY (&local->lock);
+ GF_FREE (local);
+ }
+out:
+ return ref;
+}
+
+clnt_fd_lk_local_t *
+clnt_fd_lk_local_create (clnt_fd_ctx_t *fdctx)
+{
+ clnt_fd_lk_local_t *local = NULL;
+
+ local = GF_CALLOC (1, sizeof (clnt_fd_lk_local_t),
+ gf_client_mt_clnt_fd_lk_local_t);
+ if (!local)
+ goto out;
+
+ local->ref = 1;
+ local->error = _gf_false;
+ local->fdctx = fdctx;
+
+ LOCK_INIT (&local->lock);
+out:
+ return local;
+}
+
+void
+clnt_mark_fd_bad (clnt_conf_t *conf, clnt_fd_ctx_t *fdctx)
+{
+ pthread_mutex_lock (&conf->lock);
+ {
+ fdctx->remote_fd = -1;
+ }
+ pthread_mutex_unlock (&conf->lock);
+}
+
+int
+clnt_release_reopen_fd_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ xlator_t *this = NULL;
+ call_frame_t *frame = NULL;
+ clnt_conf_t *conf = NULL;
+ clnt_fd_ctx_t *fdctx = NULL;
+
+ frame = myframe;
+ this = frame->this;
+ fdctx = (clnt_fd_ctx_t *) frame->local;
+ conf = (clnt_conf_t *) this->private;
+
+ clnt_fd_lk_reacquire_failed (this, fdctx, conf);
+
+ fdctx->reopen_done (fdctx, this);
+
+ frame->local = NULL;
+ STACK_DESTROY (frame->root);
+
+ return 0;
+}
+
+int
+clnt_release_reopen_fd (xlator_t *this, clnt_fd_ctx_t *fdctx)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ call_frame_t *frame = NULL;
+ gfs3_release_req req = {{0,},};
+
+ conf = (clnt_conf_t *) this->private;
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame)
+ goto out;
+
+ frame->local = (void *) fdctx;
+ req.fd = fdctx->remote_fd;
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_RELEASE,
+ clnt_release_reopen_fd_cbk, NULL,
+ NULL, 0, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_releasedir_req);
+ return 0;
+ out:
+ if (ret) {
+ clnt_fd_lk_reacquire_failed (this, fdctx, conf);
+ fdctx->reopen_done (fdctx, this);
+ if (frame) {
+ frame->local = NULL;
+ STACK_DESTROY (frame->root);
+ }
+ }
+ return 0;
+}
+
+int
+clnt_reacquire_lock_error (xlator_t *this, clnt_fd_ctx_t *fdctx,
+ clnt_conf_t *conf)
+{
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("client", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, fdctx, out);
+ GF_VALIDATE_OR_GOTO (this->name, conf, out);
+
+ clnt_release_reopen_fd (this, fdctx);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+gf_boolean_t
+clnt_fd_lk_local_error_status (xlator_t *this,
+ clnt_fd_lk_local_t *local)
+{
+ gf_boolean_t error = _gf_false;
+
+ LOCK (&local->lock);
+ {
+ error = local->error;
+ }
+ UNLOCK (&local->lock);
+
+ return error;
+}
+
+int
+clnt_fd_lk_local_mark_error (xlator_t *this,
+ clnt_fd_lk_local_t *local)
+{
+ int32_t ret = -1;
+ clnt_conf_t *conf = NULL;
+ gf_boolean_t error = _gf_false;
+
+ GF_VALIDATE_OR_GOTO ("client", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, local, out);
+
+ conf = (clnt_conf_t *) this->private;
+
+ LOCK (&local->lock);
+ {
+ error = local->error;
+ local->error = _gf_true;
+ }
+ UNLOCK (&local->lock);
+
+ if (!error)
+ clnt_reacquire_lock_error (this, local->fdctx, conf);
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+client_reacquire_lock_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ gfs3_lk_rsp rsp = {0,};
+ call_frame_t *frame = NULL;
+ clnt_conf_t *conf = NULL;
+ clnt_fd_ctx_t *fdctx = NULL;
+ clnt_fd_lk_local_t *local = NULL;
+ struct gf_flock lock = {0,};
+
+ frame = (call_frame_t *) myframe;
+ this = frame->this;
+ local = (clnt_fd_lk_local_t *) frame->local;
+ conf = (clnt_conf_t *) this->private;
+
+ if (req->rpc_status == -1) {
+ gf_log ("client", GF_LOG_WARNING,
+ "request failed at rpc");
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_lk_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed");
+ goto out;
+ }
+
+ if (rsp.op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "lock request failed");
+ ret = -1;
+ goto out;
+ }
+
+ fdctx = local->fdctx;
+
+ gf_proto_flock_to_flock (&rsp.flock, &lock);
+
+ gf_log (this->name, GF_LOG_DEBUG, "%s type lock reacquired on file "
+ "with gfid %s from %"PRIu64 " to %"PRIu64,
+ get_lk_type (lock.l_type), uuid_utoa (fdctx->gfid),
+ lock.l_start, lock.l_start + lock.l_len);
+
+ if (!clnt_fd_lk_local_error_status (this, local) &&
+ clnt_fd_lk_local_unref (this, local) == 0) {
+ pthread_mutex_lock (&conf->lock);
+ {
+ fdctx->lk_heal_state = GF_LK_HEAL_DONE;
+ }
+ pthread_mutex_unlock (&conf->lock);
+
+ fdctx->reopen_done (fdctx, this);
+ }
+
+ ret = 0;
+out:
+ if (ret < 0) {
+ clnt_fd_lk_local_mark_error (this, local);
+
+ clnt_fd_lk_local_unref (this, local);
+ }
+
+ frame->local = NULL;
+ STACK_DESTROY (frame->root);
+
+ return ret;
+}
+
+int
+_client_reacquire_lock (xlator_t *this, clnt_fd_ctx_t *fdctx)
+{
+ int32_t ret = -1;
+ int32_t gf_cmd = 0;
+ int32_t gf_type = 0;
+ gfs3_lk_req req = {{0,},};
+ struct gf_flock flock = {0,};
+ fd_lk_ctx_t *lk_ctx = NULL;
+ clnt_fd_lk_local_t *local = NULL;
+ fd_lk_ctx_node_t *fd_lk = NULL;
+ call_frame_t *frame = NULL;
+ clnt_conf_t *conf = NULL;
+
+ conf = (clnt_conf_t *) this->private;
+ lk_ctx = fdctx->lk_ctx;
+
+ local = clnt_fd_lk_local_create (fdctx);
+ if (!local) {
+ gf_log (this->name, GF_LOG_WARNING, "clnt_fd_lk_local_create "
+ "failed, aborting reacquring of locks on %s.",
+ uuid_utoa (fdctx->gfid));
+ clnt_reacquire_lock_error (this, fdctx, conf);
+ goto out;
+ }
+
+ list_for_each_entry (fd_lk, &lk_ctx->lk_list, next) {
+ memcpy (&flock, &fd_lk->user_flock,
+ sizeof (struct gf_flock));
+
+ /* Always send F_SETLK even if the cmd was F_SETLKW */
+ /* to avoid frame being blocked if lock cannot be granted. */
+ ret = client_cmd_to_gf_cmd (F_SETLK, &gf_cmd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "client_cmd_to_gf_cmd failed, "
+ "aborting reacquiring of locks");
+ break;
+ }
+
+ gf_type = client_type_to_gf_type (flock.l_type);
+ req.fd = fdctx->remote_fd;
+ req.cmd = gf_cmd;
+ req.type = gf_type;
+ (void) gf_proto_flock_from_flock (&req.flock,
+ &flock);
+
+ memcpy (req.gfid, fdctx->gfid, 16);
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ break;
+ }
+
+ frame->local = clnt_fd_lk_local_ref (this, local);
+ frame->root->lk_owner = fd_lk->user_flock.l_owner;
+
+ ret = client_submit_request (this, &req, frame,
+ conf->fops, GFS3_OP_LK,
+ client_reacquire_lock_cbk,
+ NULL, NULL, 0, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_lk_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "reacquiring locks failed on file with gfid %s",
+ uuid_utoa (fdctx->gfid));
+ break;
+ }
+
+ ret = 0;
+ frame = NULL;
+ }
+
+ if (local)
+ (void) clnt_fd_lk_local_unref (this, local);
+out:
+ return ret;
+}
+
+int
+client_reacquire_lock (xlator_t *this, clnt_fd_ctx_t *fdctx)
+{
+ int32_t ret = -1;
+ fd_lk_ctx_t *lk_ctx = NULL;
+
+ GF_VALIDATE_OR_GOTO ("client", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, fdctx, out);
+
+ if (client_fd_lk_list_empty (fdctx->lk_ctx, _gf_false)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "fd lock list is empty");
+ fdctx->reopen_done (fdctx, this);
+ } else {
+ lk_ctx = fdctx->lk_ctx;
+
+ LOCK (&lk_ctx->lock);
+ {
+ (void) _client_reacquire_lock (this, fdctx);
+ }
+ UNLOCK (&lk_ctx->lock);
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+void
+client_default_reopen_done (clnt_fd_ctx_t *fdctx, xlator_t *this)
+{
+ gf_log_callingfn (this->name, GF_LOG_WARNING,
+ "This function should never be called");
+}
+
+void
+client_reopen_done (clnt_fd_ctx_t *fdctx, xlator_t *this)
+{
+ clnt_conf_t *conf = NULL;
+ gf_boolean_t destroy = _gf_false;
+
+ conf = this->private;
+
+ pthread_mutex_lock (&conf->lock);
+ {
+ fdctx->reopen_attempts = 0;
+ if (!fdctx->released)
+ list_add_tail (&fdctx->sfd_pos, &conf->saved_fds);
+ else
+ destroy = _gf_true;
+ fdctx->reopen_done = client_default_reopen_done;
+ }
+ pthread_mutex_unlock (&conf->lock);
+
+ if (destroy)
+ client_fdctx_destroy (this, fdctx);
+}
+
+void
+client_child_up_reopen_done (clnt_fd_ctx_t *fdctx, xlator_t *this)
+{
+ clnt_conf_t *conf = NULL;
+ uint64_t fd_count = 0;
+
+ conf = this->private;
+
+ LOCK (&conf->rec_lock);
+ {
+ fd_count = --(conf->reopen_fd_count);
+ }
+ UNLOCK (&conf->rec_lock);
+
+ client_reopen_done (fdctx, this);
+ if (fd_count == 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "last fd open'd/lock-self-heal'd - notifying CHILD-UP");
+ client_set_lk_version (this);
+ client_notify_parents_child_up (this);
+ }
+}
+
+int
+client3_3_reopen_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ int32_t ret = -1;
+ gfs3_open_rsp rsp = {0,};
+ gf_boolean_t attempt_lock_recovery = _gf_false;
+ clnt_local_t *local = NULL;
+ clnt_conf_t *conf = NULL;
+ clnt_fd_ctx_t *fdctx = NULL;
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+
+ frame = myframe;
+ this = frame->this;
+ conf = this->private;
+ local = frame->local;
+ fdctx = local->fdctx;
+
+ if (-1 == req->rpc_status) {
+ gf_log (frame->this->name, GF_LOG_WARNING,
+ "received RPC status error, returning ENOTCONN");
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_open_rsp);
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_ERROR, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ if (rsp.op_ret < 0) {
+ gf_log (frame->this->name, GF_LOG_WARNING,
+ "reopen on %s failed (%s)",
+ local->loc.path, strerror (rsp.op_errno));
+ } else {
+ gf_log (frame->this->name, GF_LOG_DEBUG,
+ "reopen on %s succeeded (remote-fd = %"PRId64")",
+ local->loc.path, rsp.fd);
+ }
+
+ if (rsp.op_ret == -1) {
+ ret = -1;
+ goto out;
+ }
+
+ pthread_mutex_lock (&conf->lock);
+ {
+ fdctx->remote_fd = rsp.fd;
+ if (!fdctx->released) {
+ if (conf->lk_heal &&
+ !client_fd_lk_list_empty (fdctx->lk_ctx,
+ _gf_false)) {
+ attempt_lock_recovery = _gf_true;
+ fdctx->lk_heal_state = GF_LK_HEAL_IN_PROGRESS;
+ }
+ }
+ }
+ pthread_mutex_unlock (&conf->lock);
+
+ ret = 0;
+
+ if (attempt_lock_recovery) {
+ /* Delay decrementing the reopen fd count untill all the
+ locks corresponding to this fd are acquired.*/
+ gf_log (this->name, GF_LOG_DEBUG, "acquiring locks "
+ "on %s", local->loc.path);
+ ret = client_reacquire_lock (frame->this, local->fdctx);
+ if (ret) {
+ clnt_reacquire_lock_error (this, local->fdctx, conf);
+ gf_log (this->name, GF_LOG_WARNING, "acquiring locks "
+ "failed on %s", local->loc.path);
+ }
+ }
+
+out:
+ if (!attempt_lock_recovery)
+ fdctx->reopen_done (fdctx, this);
+
+ frame->local = NULL;
+ STACK_DESTROY (frame->root);
+
+ client_local_wipe (local);
+
+ return 0;
+}
+
+int
+client3_3_reopendir_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ int32_t ret = -1;
+ gfs3_open_rsp rsp = {0,};
+ clnt_local_t *local = NULL;
+ clnt_conf_t *conf = NULL;
+ clnt_fd_ctx_t *fdctx = NULL;
+ call_frame_t *frame = NULL;
+
+ frame = myframe;
+ local = frame->local;
+ fdctx = local->fdctx;
+ conf = frame->this->private;
+
+
+ if (-1 == req->rpc_status) {
+ gf_log (frame->this->name, GF_LOG_WARNING,
+ "received RPC status error, returning ENOTCONN");
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_opendir_rsp);
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_ERROR, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ if (rsp.op_ret < 0) {
+ gf_log (frame->this->name, GF_LOG_WARNING,
+ "reopendir on %s failed (%s)",
+ local->loc.path, strerror (rsp.op_errno));
+ } else {
+ gf_log (frame->this->name, GF_LOG_INFO,
+ "reopendir on %s succeeded (fd = %"PRId64")",
+ local->loc.path, rsp.fd);
+ }
+
+ if (-1 == rsp.op_ret) {
+ ret = -1;
+ goto out;
+ }
+
+ pthread_mutex_lock (&conf->lock);
+ {
+ fdctx->remote_fd = rsp.fd;
+ }
+ pthread_mutex_unlock (&conf->lock);
+
+out:
+ fdctx->reopen_done (fdctx, frame->this);
+
+ frame->local = NULL;
+ STACK_DESTROY (frame->root);
+ client_local_wipe (local);
+
+ return 0;
+}
+
+static int
+protocol_client_reopendir (clnt_fd_ctx_t *fdctx, xlator_t *this)
+{
+ int ret = -1;
+ gfs3_opendir_req req = {{0,},};
+ clnt_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ clnt_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ ret = -1;
+ goto out;
+ }
+ local->fdctx = fdctx;
+
+ uuid_copy (local->loc.gfid, fdctx->gfid);
+ ret = loc_path (&local->loc, NULL);
+ if (ret < 0)
+ goto out;
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
+ memcpy (req.gfid, fdctx->gfid, 16);
+
+ gf_log (frame->this->name, GF_LOG_DEBUG,
+ "attempting reopen on %s", local->loc.path);
+
+ frame->local = local;
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_OPENDIR,
+ client3_3_reopendir_cbk, NULL,
+ NULL, 0, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_opendir_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to send the re-opendir request");
+ }
+
+ return 0;
+
+out:
+ if (frame) {
+ frame->local = NULL;
+ STACK_DESTROY (frame->root);
+ }
+
+ if (local)
+ client_local_wipe (local);
+
+ fdctx->reopen_done (fdctx, this);
+
+ return 0;
+
+}
+
+static int
+protocol_client_reopenfile (clnt_fd_ctx_t *fdctx, xlator_t *this)
+{
+ int ret = -1;
+ gfs3_open_req req = {{0,},};
+ clnt_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ clnt_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ ret = -1;
+ goto out;
+ }
+
+ local->fdctx = fdctx;
+ uuid_copy (local->loc.gfid, fdctx->gfid);
+ ret = loc_path (&local->loc, NULL);
+ if (ret < 0)
+ goto out;
+
+ frame->local = local;
+
+ memcpy (req.gfid, fdctx->gfid, 16);
+ req.flags = gf_flags_from_flags (fdctx->flags);
+ req.flags = req.flags & (~(O_TRUNC|O_CREAT|O_EXCL));
+
+ gf_log (frame->this->name, GF_LOG_DEBUG,
+ "attempting reopen on %s", local->loc.path);
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_OPEN, client3_3_reopen_cbk, NULL,
+ NULL, 0, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_open_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to send the re-open request");
+ }
+
+ return 0;
+
+out:
+ if (frame) {
+ frame->local = NULL;
+ STACK_DESTROY (frame->root);
+ }
+
+ if (local)
+ client_local_wipe (local);
+
+ fdctx->reopen_done (fdctx, this);
+
+ return 0;
+
+}
+
+static void
+protocol_client_reopen (clnt_fd_ctx_t *fdctx, xlator_t *this)
+{
+ if (fdctx->is_dir)
+ protocol_client_reopendir (fdctx, this);
+ else
+ protocol_client_reopenfile (fdctx, this);
+}
+
+gf_boolean_t
+__is_fd_reopen_in_progress (clnt_fd_ctx_t *fdctx)
+{
+ if (fdctx->reopen_done == client_default_reopen_done)
+ return _gf_false;
+ return _gf_true;
+}
+
+void
+client_attempt_reopen (fd_t *fd, xlator_t *this)
+{
+ clnt_conf_t *conf = NULL;
+ clnt_fd_ctx_t *fdctx = NULL;
+ gf_boolean_t reopen = _gf_false;
+
+ if (!fd || !this)
+ goto out;
+
+ conf = this->private;
+ pthread_mutex_lock (&conf->lock);
+ {
+ fdctx = this_fd_get_ctx (fd, this);
+ if (!fdctx)
+ goto unlock;
+ if (__is_fd_reopen_in_progress (fdctx))
+ goto unlock;
+ if (fdctx->remote_fd != -1)
+ goto unlock;
+
+ if (fdctx->reopen_attempts == CLIENT_REOPEN_MAX_ATTEMPTS) {
+ reopen = _gf_true;
+ fdctx->reopen_done = client_reopen_done;
+ list_del_init (&fdctx->sfd_pos);
+ } else {
+ fdctx->reopen_attempts++;
+ }
+ }
+unlock:
+ pthread_mutex_unlock (&conf->lock);
+ if (reopen)
+ protocol_client_reopen (fdctx, this);
+out:
+ return;
+}
+
+int
+client_post_handshake (call_frame_t *frame, xlator_t *this)
+{
+ clnt_conf_t *conf = NULL;
+ clnt_fd_ctx_t *tmp = NULL;
+ clnt_fd_ctx_t *fdctx = NULL;
+ struct list_head reopen_head;
+
+ int count = 0;
+
+ if (!this || !this->private)
+ goto out;
+
+ conf = this->private;
+ INIT_LIST_HEAD (&reopen_head);
+
+ pthread_mutex_lock (&conf->lock);
+ {
+ list_for_each_entry_safe (fdctx, tmp, &conf->saved_fds,
+ sfd_pos) {
+ if (fdctx->remote_fd != -1)
+ continue;
+
+ fdctx->reopen_done = client_child_up_reopen_done;
+ list_del_init (&fdctx->sfd_pos);
+ list_add_tail (&fdctx->sfd_pos, &reopen_head);
+ count++;
+ }
+ }
+ pthread_mutex_unlock (&conf->lock);
+
+ /* Delay notifying CHILD_UP to parents
+ until all locks are recovered */
+ if (count > 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "%d fds open - Delaying child_up until they are re-opened",
+ count);
+ client_save_number_fds (conf, count);
+
+ list_for_each_entry_safe (fdctx, tmp, &reopen_head, sfd_pos) {
+ list_del_init (&fdctx->sfd_pos);
+
+ protocol_client_reopen (fdctx, this);
+ }
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "No fds to open - notifying all parents child up");
+ client_set_lk_version (this);
+ client_notify_parents_child_up (this);
+ }
+out:
+ return 0;
+}
+
+int
+client_setvolume_cbk (struct rpc_req *req, struct iovec *iov, int count, void *myframe)
+{
+ call_frame_t *frame = NULL;
+ clnt_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+ dict_t *reply = NULL;
+ char *process_uuid = NULL;
+ char *remote_error = NULL;
+ char *remote_subvol = NULL;
+ gf_setvolume_rsp rsp = {0,};
+ int ret = 0;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+ gf_boolean_t auth_fail = _gf_false;
+ uint32_t lk_ver = 0;
+
+ frame = myframe;
+ this = frame->this;
+ conf = this->private;
+
+ if (-1 == req->rpc_status) {
+ gf_log (frame->this->name, GF_LOG_WARNING,
+ "received RPC status error");
+ op_ret = -1;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_setvolume_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed");
+ op_ret = -1;
+ goto out;
+ }
+ op_ret = rsp.op_ret;
+ op_errno = gf_error_to_errno (rsp.op_errno);
+ if (-1 == rsp.op_ret) {
+ gf_log (frame->this->name, GF_LOG_WARNING,
+ "failed to set the volume (%s)",
+ (op_errno)? strerror (op_errno) : "--");
+ }
+
+ reply = dict_new ();
+ if (!reply)
+ goto out;
+
+ if (rsp.dict.dict_len) {
+ ret = dict_unserialize (rsp.dict.dict_val,
+ rsp.dict.dict_len, &reply);
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_WARNING,
+ "failed to unserialize buffer to dict");
+ goto out;
+ }
+ }
+
+ ret = dict_get_str (reply, "ERROR", &remote_error);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to get ERROR string from reply dict");
+ }
+
+ ret = dict_get_str (reply, "process-uuid", &process_uuid);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to get 'process-uuid' from reply dict");
+ }
+
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "SETVOLUME on remote-host failed: %s",
+ remote_error ? remote_error : strerror (op_errno));
+ errno = op_errno;
+ if (remote_error &&
+ (strcmp ("Authentication failed", remote_error) == 0)) {
+ auth_fail = _gf_true;
+ op_ret = 0;
+ }
+ if (op_errno == ESTALE) {
+ ret = default_notify (this, GF_EVENT_VOLFILE_MODIFIED, NULL);
+ if (ret)
+ gf_log (this->name, GF_LOG_INFO,
+ "notify of VOLFILE_MODIFIED failed");
+ conf->last_sent_event = GF_EVENT_VOLFILE_MODIFIED;
+ }
+ goto out;
+ }
+
+ ret = dict_get_str (this->options, "remote-subvolume",
+ &remote_subvol);
+ if (ret || !remote_subvol) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to find key 'remote-subvolume' in the options");
+ goto out;
+ }
+
+ ret = dict_get_uint32 (reply, "clnt-lk-version", &lk_ver);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to find key 'clnt-lk-version' in the options");
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG, "clnt-lk-version = %d, "
+ "server-lk-version = %d", client_get_lk_ver (conf), lk_ver);
+ /* TODO: currently setpeer path is broken */
+ /*
+ if (process_uuid && req->conn &&
+ !strcmp (this->ctx->process_uuid, process_uuid)) {
+ rpc_transport_t *peer_trans = NULL;
+ uint64_t peertrans_int = 0;
+
+ ret = dict_get_uint64 (reply, "transport-ptr",
+ &peertrans_int);
+ if (ret)
+ goto out;
+
+ gf_log (this->name, GF_LOG_WARNING,
+ "attaching to the local volume '%s'",
+ remote_subvol);
+
+ peer_trans = (void *) (long) (peertrans_int);
+
+ rpc_transport_setpeer (req->conn->trans, peer_trans);
+ }
+ */
+
+ gf_log (this->name, GF_LOG_INFO,
+ "Connected to %s, attached to remote volume '%s'.",
+ conf->rpc->conn.trans->peerinfo.identifier,
+ remote_subvol);
+
+ rpc_clnt_set_connected (&conf->rpc->conn);
+
+ op_ret = 0;
+ conf->connecting = 0;
+ conf->connected = 1;
+
+ conf->need_different_port = 0;
+
+ if (lk_ver != client_get_lk_ver (conf)) {
+ gf_log (this->name, GF_LOG_INFO, "Server and Client "
+ "lk-version numbers are not same, reopening the fds");
+ client_mark_fd_bad (this);
+ client_post_handshake (frame, frame->this);
+ } else {
+ /*TODO: Traverse the saved fd list, and send
+ release to the server on fd's that were closed
+ during grace period */
+ gf_log (this->name, GF_LOG_INFO, "Server and Client "
+ "lk-version numbers are same, no need to "
+ "reopen the fds");
+ }
+
+out:
+ if (auth_fail) {
+ gf_log (this->name, GF_LOG_INFO, "sending AUTH_FAILED event");
+ ret = default_notify (this, GF_EVENT_AUTH_FAILED, NULL);
+ if (ret)
+ gf_log (this->name, GF_LOG_INFO,
+ "notify of AUTH_FAILED failed");
+ conf->connecting = 0;
+ conf->connected = 0;
+ conf->last_sent_event = GF_EVENT_AUTH_FAILED;
+ ret = -1;
+ }
+ if (-1 == op_ret) {
+ /* Let the connection/re-connection happen in
+ * background, for now, don't hang here,
+ * tell the parents that i am all ok..
+ */
+ gf_log (this->name, GF_LOG_INFO, "sending CHILD_CONNECTING event");
+ ret = default_notify (this, GF_EVENT_CHILD_CONNECTING, NULL);
+ if (ret)
+ gf_log (this->name, GF_LOG_INFO,
+ "notify of CHILD_CONNECTING failed");
+ conf->last_sent_event = GF_EVENT_CHILD_CONNECTING;
+ conf->connecting= 1;
+ ret = 0;
+ }
+
+ free (rsp.dict.dict_val);
+
+ STACK_DESTROY (frame->root);
+
+ if (reply)
+ dict_unref (reply);
+
+ return ret;
+}
+
+int
+client_setvolume (xlator_t *this, struct rpc_clnt *rpc)
+{
+ int ret = 0;
+ gf_setvolume_req req = {{0,},};
+ call_frame_t *fr = NULL;
+ char *process_uuid_xl = NULL;
+ clnt_conf_t *conf = NULL;
+ dict_t *options = NULL;
+
+ options = this->options;
+ conf = this->private;
+
+ if (conf->fops) {
+ ret = dict_set_int32 (options, "fops-version",
+ conf->fops->prognum);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to set version-fops(%d) in handshake msg",
+ conf->fops->prognum);
+ goto fail;
+ }
+ }
+
+ if (conf->mgmt) {
+ ret = dict_set_int32 (options, "mgmt-version", conf->mgmt->prognum);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to set version-mgmt(%d) in handshake msg",
+ conf->mgmt->prognum);
+ goto fail;
+ }
+ }
+
+ /* With multiple graphs possible in the same process, we need a
+ field to bring the uniqueness. Graph-ID should be enough to get the
+ job done
+ */
+ ret = gf_asprintf (&process_uuid_xl, "%s-%s-%d",
+ this->ctx->process_uuid, this->name,
+ this->graph->id);
+ if (-1 == ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "asprintf failed while setting process_uuid");
+ goto fail;
+ }
+
+ ret = dict_set_dynstr (options, "process-uuid", process_uuid_xl);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to set process-uuid(%s) in handshake msg",
+ process_uuid_xl);
+ goto fail;
+ }
+
+ ret = dict_set_str (options, "client-version", PACKAGE_VERSION);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set client-version(%s) in handshake msg",
+ PACKAGE_VERSION);
+ }
+
+ if (this->ctx->cmd_args.volfile_server) {
+ if (this->ctx->cmd_args.volfile_id) {
+ ret = dict_set_str (options, "volfile-key",
+ this->ctx->cmd_args.volfile_id);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to set 'volfile-key'");
+ }
+ ret = dict_set_uint32 (options, "volfile-checksum",
+ this->graph->volfile_checksum);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to set 'volfile-checksum'");
+ }
+
+ ret = dict_set_int16 (options, "clnt-lk-version",
+ client_get_lk_ver (conf));
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set clnt-lk-version(%"PRIu32") in handshake msg",
+ client_get_lk_ver (conf));
+ }
+
+ ret = dict_serialized_length (options);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to get serialized length of dict");
+ ret = -1;
+ goto fail;
+ }
+ req.dict.dict_len = ret;
+ req.dict.dict_val = GF_CALLOC (1, req.dict.dict_len,
+ gf_client_mt_clnt_req_buf_t);
+ ret = dict_serialize (options, req.dict.dict_val);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to serialize dictionary");
+ goto fail;
+ }
+
+ fr = create_frame (this, this->ctx->pool);
+ if (!fr)
+ goto fail;
+
+ ret = client_submit_request (this, &req, fr, conf->handshake,
+ GF_HNDSK_SETVOLUME, client_setvolume_cbk,
+ NULL, NULL, 0, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_setvolume_req);
+
+fail:
+ GF_FREE (req.dict.dict_val);
+
+ return ret;
+}
+
+int
+select_server_supported_programs (xlator_t *this, gf_prog_detail *prog)
+{
+ gf_prog_detail *trav = NULL;
+ clnt_conf_t *conf = NULL;
+ int ret = -1;
+
+ if (!this || !prog) {
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "xlator not found OR RPC program not found");
+ goto out;
+ }
+
+ conf = this->private;
+ trav = prog;
+
+ while (trav) {
+ /* Select 'programs' */
+ if ((clnt3_3_fop_prog.prognum == trav->prognum) &&
+ (clnt3_3_fop_prog.progver == trav->progver)) {
+ conf->fops = &clnt3_3_fop_prog;
+ gf_log (this->name, GF_LOG_INFO,
+ "Using Program %s, Num (%"PRId64"), "
+ "Version (%"PRId64")",
+ trav->progname, trav->prognum, trav->progver);
+ ret = 0;
+ }
+ if (ret) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "%s (%"PRId64") not supported", trav->progname,
+ trav->progver);
+ }
+ trav = trav->next;
+ }
+
+out:
+ return ret;
+}
+
+
+int
+server_has_portmap (xlator_t *this, gf_prog_detail *prog)
+{
+ gf_prog_detail *trav = NULL;
+ int ret = -1;
+
+ if (!this || !prog) {
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "xlator not found OR RPC program not found");
+ goto out;
+ }
+
+ trav = prog;
+
+ while (trav) {
+ if ((trav->prognum == GLUSTER_PMAP_PROGRAM) &&
+ (trav->progver == GLUSTER_PMAP_VERSION)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "detected portmapper on server");
+ ret = 0;
+ break;
+ }
+ trav = trav->next;
+ }
+
+out:
+ return ret;
+}
+
+
+int
+client_query_portmap_cbk (struct rpc_req *req, struct iovec *iov, int count, void *myframe)
+{
+ struct pmap_port_by_brick_rsp rsp = {0,};
+ call_frame_t *frame = NULL;
+ clnt_conf_t *conf = NULL;
+ int ret = -1;
+ struct rpc_clnt_config config = {0, };
+ xlator_t *this = NULL;
+
+ frame = myframe;
+ if (!frame || !frame->this || !frame->this->private) {
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "frame not found with rpc request");
+ goto out;
+ }
+ this = frame->this;
+ conf = frame->this->private;
+
+ if (-1 == req->rpc_status) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "received RPC status error, try again later");
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_pmap_port_by_brick_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed");
+ goto out;
+ }
+
+ if (-1 == rsp.op_ret) {
+ ret = -1;
+ gf_log (this->name, ((!conf->portmap_err_logged) ?
+ GF_LOG_ERROR : GF_LOG_DEBUG),
+ "failed to get the port number for remote subvolume. "
+ "Please run 'gluster volume status' on server to see "
+ "if brick process is running.");
+ conf->portmap_err_logged = 1;
+ goto out;
+ }
+
+ conf->portmap_err_logged = 0;
+ conf->disconnect_err_logged = 0;
+
+ config.remote_port = rsp.port;
+ rpc_clnt_reconfig (conf->rpc, &config);
+
+ conf->skip_notify = 1;
+ conf->quick_reconnect = 1;
+
+out:
+ if (frame)
+ STACK_DESTROY (frame->root);
+
+ if (conf) {
+ /* Need this to connect the same transport on different port */
+ /* ie, glusterd to glusterfsd */
+ rpc_transport_disconnect (conf->rpc->conn.trans);
+ }
+
+ return ret;
+}
+
+
+int
+client_query_portmap (xlator_t *this, struct rpc_clnt *rpc)
+{
+ int ret = -1;
+ pmap_port_by_brick_req req = {0,};
+ call_frame_t *fr = NULL;
+ clnt_conf_t *conf = NULL;
+ dict_t *options = NULL;
+ char *remote_subvol = NULL;
+ char *xprt = NULL;
+ char brick_name[PATH_MAX] = {0,};
+
+ options = this->options;
+ conf = this->private;
+
+ ret = dict_get_str (options, "remote-subvolume", &remote_subvol);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "remote-subvolume not set in volfile");
+ goto fail;
+ }
+
+ req.brick = remote_subvol;
+
+ /* FIXME: Dirty work around */
+ if (!dict_get_str (options, "transport-type", &xprt)) {
+ /* This logic is required only in case of 'rdma' client
+ transport-type and the volume is of 'tcp,rdma'
+ transport type. */
+ if (!strcmp (xprt, "rdma")) {
+ if (!conf->need_different_port) {
+ snprintf (brick_name, PATH_MAX, "%s.rdma",
+ remote_subvol);
+ req.brick = brick_name;
+ conf->need_different_port = 1;
+ conf->skip_notify = 1;
+ } else {
+ conf->need_different_port = 0;
+ conf->skip_notify = 0;
+ }
+ }
+ }
+
+ fr = create_frame (this, this->ctx->pool);
+ if (!fr) {
+ ret = -1;
+ goto fail;
+ }
+
+ ret = client_submit_request (this, &req, fr, &clnt_pmap_prog,
+ GF_PMAP_PORTBYBRICK,
+ client_query_portmap_cbk,
+ NULL, NULL, 0, NULL, 0, NULL,
+ (xdrproc_t)xdr_pmap_port_by_brick_req);
+
+fail:
+ return ret;
+}
+
+
+int
+client_dump_version_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf_dump_rsp rsp = {0,};
+ gf_prog_detail *trav = NULL;
+ gf_prog_detail *next = NULL;
+ call_frame_t *frame = NULL;
+ clnt_conf_t *conf = NULL;
+ int ret = 0;
+
+ frame = myframe;
+ conf = frame->this->private;
+
+ if (-1 == req->rpc_status) {
+ gf_log (frame->this->name, GF_LOG_WARNING,
+ "received RPC status error");
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_dump_rsp);
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_ERROR, "XDR decoding failed");
+ goto out;
+ }
+ if (-1 == rsp.op_ret) {
+ gf_log (frame->this->name, GF_LOG_WARNING,
+ "failed to get the 'versions' from server");
+ goto out;
+ }
+
+ if (server_has_portmap (frame->this, rsp.prog) == 0) {
+ ret = client_query_portmap (frame->this, conf->rpc);
+ goto out;
+ }
+
+ /* Check for the proper version string */
+ /* Reply in "Name:Program-Number:Program-Version,..." format */
+ ret = select_server_supported_programs (frame->this, rsp.prog);
+ if (ret) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "server doesn't support the version");
+ goto out;
+ }
+
+ client_setvolume (frame->this, conf->rpc);
+
+out:
+ /* don't use GF_FREE, buffer was allocated by libc */
+ if (rsp.prog) {
+ trav = rsp.prog;
+ while (trav) {
+ next = trav->next;
+ free (trav->progname);
+ free (trav);
+ trav = next;
+ }
+ }
+
+ STACK_DESTROY (frame->root);
+
+ if (ret != 0)
+ rpc_transport_disconnect (conf->rpc->conn.trans);
+
+ return ret;
+}
+
+int
+client_handshake (xlator_t *this, struct rpc_clnt *rpc)
+{
+ call_frame_t *frame = NULL;
+ clnt_conf_t *conf = NULL;
+ gf_dump_req req = {0,};
+ int ret = 0;
+
+ conf = this->private;
+ if (!conf->handshake) {
+ gf_log (this->name, GF_LOG_WARNING, "handshake program not found");
+ goto out;
+ }
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame)
+ goto out;
+
+ req.gfs_id = 0xbabe;
+ ret = client_submit_request (this, &req, frame, conf->dump,
+ GF_DUMP_DUMP, client_dump_version_cbk,
+ NULL, NULL, 0, NULL, 0,
+ NULL, (xdrproc_t)xdr_gf_dump_req);
+
+out:
+ return ret;
+}
+
+char *clnt_handshake_procs[GF_HNDSK_MAXVALUE] = {
+ [GF_HNDSK_NULL] = "NULL",
+ [GF_HNDSK_SETVOLUME] = "SETVOLUME",
+ [GF_HNDSK_GETSPEC] = "GETSPEC",
+ [GF_HNDSK_PING] = "PING",
+ [GF_HNDSK_SET_LK_VER] = "SET_LK_VER"
+};
+
+rpc_clnt_prog_t clnt_handshake_prog = {
+ .progname = "GlusterFS Handshake",
+ .prognum = GLUSTER_HNDSK_PROGRAM,
+ .progver = GLUSTER_HNDSK_VERSION,
+ .procnames = clnt_handshake_procs,
+};
+
+char *clnt_dump_proc[GF_DUMP_MAXVALUE] = {
+ [GF_DUMP_NULL] = "NULL",
+ [GF_DUMP_DUMP] = "DUMP",
+};
+
+rpc_clnt_prog_t clnt_dump_prog = {
+ .progname = "GF-DUMP",
+ .prognum = GLUSTER_DUMP_PROGRAM,
+ .progver = GLUSTER_DUMP_VERSION,
+ .procnames = clnt_dump_proc,
+};
+
+char *clnt_pmap_procs[GF_PMAP_MAXVALUE] = {
+ [GF_PMAP_PORTBYBRICK] = "PORTBYBRICK",
+};
+
+rpc_clnt_prog_t clnt_pmap_prog = {
+ .progname = "PORTMAP",
+ .prognum = GLUSTER_PMAP_PROGRAM,
+ .progver = GLUSTER_PMAP_VERSION,
+ .procnames = clnt_pmap_procs,
+};
diff --git a/xlators/protocol/client/src/client-helpers.c b/xlators/protocol/client/src/client-helpers.c
new file mode 100644
index 000000000..5d9f00fdc
--- /dev/null
+++ b/xlators/protocol/client/src/client-helpers.c
@@ -0,0 +1,349 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "client.h"
+#include "fd.h"
+
+int
+client_fd_lk_list_empty (fd_lk_ctx_t *lk_ctx, gf_boolean_t try_lock)
+{
+ int ret = 1;
+
+ if (!lk_ctx) {
+ ret = -1;
+ goto out;
+ }
+
+ if (try_lock) {
+ ret = TRY_LOCK (&lk_ctx->lock);
+ if (ret != 0) {
+ ret = -1;
+ goto out;
+ }
+ } else {
+ LOCK (&lk_ctx->lock);
+ }
+
+ ret = list_empty (&lk_ctx->lk_list);
+ UNLOCK (&lk_ctx->lock);
+out:
+ return ret;
+}
+
+clnt_fd_ctx_t *
+this_fd_del_ctx (fd_t *file, xlator_t *this)
+{
+ int dict_ret = -1;
+ uint64_t ctxaddr = 0;
+
+ GF_VALIDATE_OR_GOTO ("client", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, file, out);
+
+ dict_ret = fd_ctx_del (file, this, &ctxaddr);
+
+ if (dict_ret < 0) {
+ ctxaddr = 0;
+ }
+
+out:
+ return (clnt_fd_ctx_t *)(unsigned long)ctxaddr;
+}
+
+
+clnt_fd_ctx_t *
+this_fd_get_ctx (fd_t *file, xlator_t *this)
+{
+ int dict_ret = -1;
+ uint64_t ctxaddr = 0;
+
+ GF_VALIDATE_OR_GOTO ("client", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, file, out);
+
+ dict_ret = fd_ctx_get (file, this, &ctxaddr);
+
+ if (dict_ret < 0) {
+ ctxaddr = 0;
+ }
+
+out:
+ return (clnt_fd_ctx_t *)(unsigned long)ctxaddr;
+}
+
+
+void
+this_fd_set_ctx (fd_t *file, xlator_t *this, loc_t *loc, clnt_fd_ctx_t *ctx)
+{
+ uint64_t oldaddr = 0;
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("client", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, file, out);
+
+ ret = fd_ctx_get (file, this, &oldaddr);
+ if (ret >= 0) {
+ if (loc)
+ gf_log (this->name, GF_LOG_INFO,
+ "%s (%s): trying duplicate remote fd set. ",
+ loc->path, uuid_utoa (loc->inode->gfid));
+ else
+ gf_log (this->name, GF_LOG_INFO,
+ "%p: trying duplicate remote fd set. ", file);
+ }
+
+ ret = fd_ctx_set (file, this, (uint64_t)(unsigned long)ctx);
+ if (ret < 0) {
+ if (loc)
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s (%s): failed to set remote fd",
+ loc->path, uuid_utoa (loc->inode->gfid));
+ else
+ gf_log (this->name, GF_LOG_WARNING,
+ "%p: failed to set remote fd", file);
+ }
+out:
+ return;
+}
+
+
+int
+client_local_wipe (clnt_local_t *local)
+{
+ if (local) {
+ loc_wipe (&local->loc);
+ loc_wipe (&local->loc2);
+
+ if (local->fd) {
+ fd_unref (local->fd);
+ }
+
+ if (local->iobref) {
+ iobref_unref (local->iobref);
+ }
+
+ GF_FREE (local->name);
+
+ mem_put (local);
+ }
+
+ return 0;
+}
+
+int
+unserialize_rsp_dirent (struct gfs3_readdir_rsp *rsp, gf_dirent_t *entries)
+{
+ struct gfs3_dirlist *trav = NULL;
+ gf_dirent_t *entry = NULL;
+ int entry_len = 0;
+ int ret = -1;
+
+ trav = rsp->reply;
+ while (trav) {
+ entry_len = gf_dirent_size (trav->name);
+ entry = GF_CALLOC (1, entry_len, gf_common_mt_gf_dirent_t);
+ if (!entry)
+ goto out;
+
+ entry->d_ino = trav->d_ino;
+ entry->d_off = trav->d_off;
+ entry->d_len = trav->d_len;
+ entry->d_type = trav->d_type;
+
+ strcpy (entry->d_name, trav->name);
+
+ list_add_tail (&entry->list, &entries->list);
+
+ trav = trav->nextentry;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+unserialize_rsp_direntp (xlator_t *this, fd_t *fd,
+ struct gfs3_readdirp_rsp *rsp, gf_dirent_t *entries)
+{
+ struct gfs3_dirplist *trav = NULL;
+ char *buf = NULL;
+ gf_dirent_t *entry = NULL;
+ inode_table_t *itable = NULL;
+ int entry_len = 0;
+ int ret = -1;
+
+ trav = rsp->reply;
+
+ if (fd)
+ itable = fd->inode->table;
+
+ while (trav) {
+ entry_len = gf_dirent_size (trav->name);
+ entry = GF_CALLOC (1, entry_len, gf_common_mt_gf_dirent_t);
+ if (!entry)
+ goto out;
+
+ entry->d_ino = trav->d_ino;
+ entry->d_off = trav->d_off;
+ entry->d_len = trav->d_len;
+ entry->d_type = trav->d_type;
+
+ gf_stat_to_iatt (&trav->stat, &entry->d_stat);
+
+ strcpy (entry->d_name, trav->name);
+
+ if (trav->dict.dict_val) {
+ /* Dictionary is sent along with response */
+ buf = memdup (trav->dict.dict_val, trav->dict.dict_len);
+ if (!buf)
+ goto out;
+
+ entry->dict = dict_new ();
+
+ ret = dict_unserialize (buf, trav->dict.dict_len,
+ &entry->dict);
+ if (ret < 0) {
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to unserialize xattr dict");
+ errno = EINVAL;
+ goto out;
+ }
+ entry->dict->extra_free = buf;
+ buf = NULL;
+ }
+
+ entry->inode = inode_find (itable, entry->d_stat.ia_gfid);
+ if (!entry->inode)
+ entry->inode = inode_new (itable);
+
+ list_add_tail (&entry->list, &entries->list);
+
+ trav = trav->nextentry;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+clnt_readdirp_rsp_cleanup (gfs3_readdirp_rsp *rsp)
+{
+ gfs3_dirplist *prev = NULL;
+ gfs3_dirplist *trav = NULL;
+
+ trav = rsp->reply;
+ prev = trav;
+ while (trav) {
+ trav = trav->nextentry;
+ /* on client, the rpc lib allocates this */
+ free (prev->dict.dict_val);
+ free (prev->name);
+ free (prev);
+ prev = trav;
+ }
+
+ return 0;
+}
+
+int
+clnt_readdir_rsp_cleanup (gfs3_readdir_rsp *rsp)
+{
+ gfs3_dirlist *prev = NULL;
+ gfs3_dirlist *trav = NULL;
+
+ trav = rsp->reply;
+ prev = trav;
+ while (trav) {
+ trav = trav->nextentry;
+ /* on client, the rpc lib allocates this */
+ free (prev->name);
+ free (prev);
+ prev = trav;
+ }
+
+ return 0;
+}
+
+int
+client_get_remote_fd (xlator_t *this, fd_t *fd, int flags, int64_t *remote_fd)
+{
+ clnt_fd_ctx_t *fdctx = NULL;
+ clnt_conf_t *conf = NULL;
+
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+ GF_VALIDATE_OR_GOTO (this->name, remote_fd, out);
+
+ conf = this->private;
+ pthread_mutex_lock (&conf->lock);
+ {
+ fdctx = this_fd_get_ctx (fd, this);
+ if (!fdctx)
+ *remote_fd = GF_ANON_FD_NO;
+ else if (__is_fd_reopen_in_progress (fdctx))
+ *remote_fd = -1;
+ else
+ *remote_fd = fdctx->remote_fd;
+ }
+ pthread_mutex_unlock (&conf->lock);
+
+ if ((flags & FALLBACK_TO_ANON_FD) && (*remote_fd == -1))
+ *remote_fd = GF_ANON_FD_NO;
+
+ return 0;
+out:
+ return -1;
+}
+
+gf_boolean_t
+client_is_reopen_needed (fd_t *fd, xlator_t *this, int64_t remote_fd)
+{
+ clnt_fd_ctx_t *fdctx = NULL;
+
+ fdctx = this_fd_get_ctx (fd, this);
+ if (fdctx && (fdctx->remote_fd == -1) &&
+ (remote_fd == GF_ANON_FD_NO))
+ return _gf_true;
+ return _gf_false;
+}
+
+int
+client_fd_fop_prepare_local (call_frame_t *frame, fd_t *fd, int64_t remote_fd)
+{
+ xlator_t *this = NULL;
+ clnt_conf_t *conf = NULL;
+ clnt_local_t *local = NULL;
+ int ret = 0;
+
+ this = frame->this;
+ conf = this->private;
+
+ if (!frame || !fd) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ frame->local = mem_get0 (this->local_pool);
+ if (frame->local == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ local = frame->local;
+ local->fd = fd_ref (fd);
+ local->attempt_reopen = client_is_reopen_needed (fd, this, remote_fd);
+ return 0;
+out:
+ return ret;
+}
diff --git a/xlators/protocol/client/src/client-lk.c b/xlators/protocol/client/src/client-lk.c
new file mode 100644
index 000000000..1fd8f0d50
--- /dev/null
+++ b/xlators/protocol/client/src/client-lk.c
@@ -0,0 +1,573 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "common-utils.h"
+#include "xlator.h"
+#include "client.h"
+#include "lkowner.h"
+
+static void
+__insert_and_merge (clnt_fd_ctx_t *fdctx, client_posix_lock_t *lock);
+
+static void
+__dump_client_lock (client_posix_lock_t *lock)
+{
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ gf_log (this->name, GF_LOG_INFO,
+ "{fd=%p}"
+ "{%s lk-owner:%s %"PRId64" - %"PRId64"}"
+ "{start=%"PRId64" end=%"PRId64"}",
+ lock->fd,
+ lock->fl_type == F_WRLCK ? "Write-Lock" : "Read-Lock",
+ lkowner_utoa (&lock->owner),
+ lock->user_flock.l_start,
+ lock->user_flock.l_len,
+ lock->fl_start,
+ lock->fl_end);
+}
+
+static int
+dump_client_locks_fd (clnt_fd_ctx_t *fdctx)
+{
+ client_posix_lock_t *lock = NULL;
+ int count = 0;
+
+ pthread_mutex_lock (&fdctx->mutex);
+ {
+ list_for_each_entry (lock, &fdctx->lock_list, list) {
+ __dump_client_lock (lock);
+ count++;
+ }
+ }
+ pthread_mutex_unlock (&fdctx->mutex);
+
+ return count;
+
+}
+
+int
+dump_client_locks (inode_t *inode)
+{
+ fd_t *fd = NULL;
+ clnt_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+ clnt_fd_ctx_t *fdctx = NULL;
+
+ int total_count = 0;
+ int locks_fd_count = 0;
+
+ this = THIS;
+ conf = this->private;
+
+ LOCK (&inode->lock);
+ {
+ list_for_each_entry (fd, &inode->fd_list, inode_list) {
+ locks_fd_count = 0;
+
+ pthread_mutex_lock (&conf->lock);
+ {
+ fdctx = this_fd_get_ctx (fd, this);
+ }
+ pthread_mutex_unlock (&conf->lock);
+
+ if (fdctx)
+ locks_fd_count = dump_client_locks_fd (fdctx);
+
+ total_count += locks_fd_count;
+ }
+
+ }
+ UNLOCK (&inode->lock);
+
+ return total_count;
+
+}
+
+static off_t
+__get_lock_length (off_t start, off_t end)
+{
+ if (end == LLONG_MAX)
+ return 0;
+ else
+ return (end - start + 1);
+}
+
+/* Add two locks */
+static client_posix_lock_t *
+add_locks (client_posix_lock_t *l1, client_posix_lock_t *l2)
+{
+ client_posix_lock_t *sum = NULL;
+
+ sum = GF_CALLOC (1, sizeof (*sum), gf_client_mt_clnt_lock_t);
+ if (!sum)
+ return NULL;
+
+ sum->fl_start = min (l1->fl_start, l2->fl_start);
+ sum->fl_end = max (l1->fl_end, l2->fl_end);
+
+ sum->user_flock.l_start = sum->fl_start;
+ sum->user_flock.l_len = __get_lock_length (sum->fl_start,
+ sum->fl_end);
+
+ return sum;
+}
+
+
+/* Return true if the locks overlap, false otherwise */
+static int
+locks_overlap (client_posix_lock_t *l1, client_posix_lock_t *l2)
+{
+ /*
+ Note:
+ FUSE always gives us absolute offsets, so no need to worry
+ about SEEK_CUR or SEEK_END
+ */
+
+ return ((l1->fl_end >= l2->fl_start) &&
+ (l2->fl_end >= l1->fl_start));
+}
+
+static void
+__delete_client_lock (client_posix_lock_t *lock)
+{
+ list_del_init (&lock->list);
+}
+
+/* Destroy a posix_lock */
+static void
+__destroy_client_lock (client_posix_lock_t *lock)
+{
+ GF_FREE (lock);
+}
+
+/* Subtract two locks */
+struct _values {
+ client_posix_lock_t *locks[3];
+};
+
+/* {big} must always be contained inside {small} */
+static struct _values
+subtract_locks (client_posix_lock_t *big, client_posix_lock_t *small)
+{
+ struct _values v = { .locks = {0, 0, 0} };
+
+ if ((big->fl_start == small->fl_start) &&
+ (big->fl_end == small->fl_end)) {
+ /* both edges coincide with big */
+ v.locks[0] = GF_CALLOC (1, sizeof (client_posix_lock_t),
+ gf_client_mt_clnt_lock_t );
+ GF_ASSERT (v.locks[0]);
+ memcpy (v.locks[0], big, sizeof (client_posix_lock_t));
+ v.locks[0]->fl_type = small->fl_type;
+ }
+ else if ((small->fl_start > big->fl_start) &&
+ (small->fl_end < big->fl_end)) {
+ /* both edges lie inside big */
+ v.locks[0] = GF_CALLOC (1, sizeof (client_posix_lock_t),
+ gf_client_mt_clnt_lock_t);
+ GF_ASSERT (v.locks[0]);
+ v.locks[1] = GF_CALLOC (1, sizeof (client_posix_lock_t),
+ gf_client_mt_clnt_lock_t);
+ GF_ASSERT (v.locks[1]);
+ v.locks[2] = GF_CALLOC (1, sizeof (client_posix_lock_t),
+ gf_client_mt_clnt_lock_t);
+ GF_ASSERT (v.locks[2]);
+
+ memcpy (v.locks[0], big, sizeof (client_posix_lock_t));
+ v.locks[0]->fl_end = small->fl_start - 1;
+ v.locks[0]->user_flock.l_len = __get_lock_length (v.locks[0]->fl_start,
+ v.locks[0]->fl_end);
+
+ memcpy (v.locks[1], small, sizeof (client_posix_lock_t));
+ memcpy (v.locks[2], big, sizeof (client_posix_lock_t));
+ v.locks[2]->fl_start = small->fl_end + 1;
+ v.locks[2]->user_flock.l_start = small->fl_end + 1;
+ }
+ /* one edge coincides with big */
+ else if (small->fl_start == big->fl_start) {
+ v.locks[0] = GF_CALLOC (1, sizeof (client_posix_lock_t),
+ gf_client_mt_clnt_lock_t);
+ GF_ASSERT (v.locks[0]);
+ v.locks[1] = GF_CALLOC (1, sizeof (client_posix_lock_t),
+ gf_client_mt_clnt_lock_t);
+ GF_ASSERT (v.locks[1]);
+
+ memcpy (v.locks[0], big, sizeof (client_posix_lock_t));
+ v.locks[0]->fl_start = small->fl_end + 1;
+ v.locks[0]->user_flock.l_start = small->fl_end + 1;
+
+ memcpy (v.locks[1], small, sizeof (client_posix_lock_t));
+ }
+ else if (small->fl_end == big->fl_end) {
+ v.locks[0] = GF_CALLOC (1, sizeof (client_posix_lock_t),
+ gf_client_mt_clnt_lock_t);
+ GF_ASSERT (v.locks[0]);
+ v.locks[1] = GF_CALLOC (1, sizeof (client_posix_lock_t),
+ gf_client_mt_clnt_lock_t);
+ GF_ASSERT (v.locks[1]);
+
+ memcpy (v.locks[0], big, sizeof (client_posix_lock_t));
+ v.locks[0]->fl_end = small->fl_start - 1;
+ v.locks[0]->user_flock.l_len = __get_lock_length (v.locks[0]->fl_start,
+ v.locks[0]->fl_end);
+
+ memcpy (v.locks[1], small, sizeof (client_posix_lock_t));
+ }
+ else {
+ /* LOG-TODO : decide what more info is required here*/
+ gf_log ("client-protocol", GF_LOG_CRITICAL,
+ "Unexpected case in subtract_locks. Please send "
+ "a bug report to gluster-devel@nongnu.org");
+ }
+
+ return v;
+}
+
+static void
+__delete_unlck_locks (clnt_fd_ctx_t *fdctx)
+{
+ client_posix_lock_t *l = NULL;
+ client_posix_lock_t *tmp = NULL;
+
+ list_for_each_entry_safe (l, tmp, &fdctx->lock_list, list) {
+ if (l->fl_type == F_UNLCK) {
+ __delete_client_lock (l);
+ __destroy_client_lock (l);
+ }
+ }
+}
+
+static void
+__insert_lock (clnt_fd_ctx_t *fdctx, client_posix_lock_t *lock)
+{
+ list_add_tail (&lock->list, &fdctx->lock_list);
+
+ return;
+}
+
+static void
+__insert_and_merge (clnt_fd_ctx_t *fdctx, client_posix_lock_t *lock)
+{
+ client_posix_lock_t *conf = NULL;
+ client_posix_lock_t *t = NULL;
+ client_posix_lock_t *sum = NULL;
+ int i = 0;
+ struct _values v = { .locks = {0, 0, 0} };
+
+ list_for_each_entry_safe (conf, t, &fdctx->lock_list, list) {
+ if (!locks_overlap (conf, lock))
+ continue;
+
+ if (is_same_lkowner (&conf->owner, &lock->owner)) {
+ if (conf->fl_type == lock->fl_type) {
+ sum = add_locks (lock, conf);
+
+ sum->fd = lock->fd;
+
+ __delete_client_lock (conf);
+ __destroy_client_lock (conf);
+
+ __destroy_client_lock (lock);
+ __insert_and_merge (fdctx, sum);
+
+ return;
+ } else {
+ sum = add_locks (lock, conf);
+
+ sum->fd = conf->fd;
+ sum->owner = conf->owner;
+
+ v = subtract_locks (sum, lock);
+
+ __delete_client_lock (conf);
+ __destroy_client_lock (conf);
+
+ __delete_client_lock (lock);
+ __destroy_client_lock (lock);
+
+ __destroy_client_lock (sum);
+
+ for (i = 0; i < 3; i++) {
+ if (!v.locks[i])
+ continue;
+
+ INIT_LIST_HEAD (&v.locks[i]->list);
+ __insert_and_merge (fdctx,
+ v.locks[i]);
+ }
+
+ __delete_unlck_locks (fdctx);
+ return;
+ }
+ }
+
+ if (lock->fl_type == F_UNLCK) {
+ continue;
+ }
+
+ if ((conf->fl_type == F_RDLCK) && (lock->fl_type == F_RDLCK)) {
+ __insert_lock (fdctx, lock);
+ return;
+ }
+ }
+
+ /* no conflicts, so just insert */
+ if (lock->fl_type != F_UNLCK) {
+ __insert_lock (fdctx, lock);
+ } else {
+ __destroy_client_lock (lock);
+ }
+}
+
+static void
+client_setlk (clnt_fd_ctx_t *fdctx, client_posix_lock_t *lock)
+{
+ pthread_mutex_lock (&fdctx->mutex);
+ {
+ __insert_and_merge (fdctx, lock);
+ }
+ pthread_mutex_unlock (&fdctx->mutex);
+
+ return;
+}
+
+static void
+destroy_client_lock (client_posix_lock_t *lock)
+{
+ GF_FREE (lock);
+}
+
+int32_t
+delete_granted_locks_owner (fd_t *fd, gf_lkowner_t *owner)
+{
+ clnt_fd_ctx_t *fdctx = NULL;
+ client_posix_lock_t *lock = NULL;
+ client_posix_lock_t *tmp = NULL;
+ xlator_t *this = NULL;
+
+ struct list_head delete_list;
+ int ret = 0;
+ int count = 0;
+
+ INIT_LIST_HEAD (&delete_list);
+ this = THIS;
+ fdctx = this_fd_get_ctx (fd, this);
+ if (!fdctx) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "fdctx not valid");
+ ret = -1;
+ goto out;
+ }
+
+ pthread_mutex_lock (&fdctx->mutex);
+ {
+ list_for_each_entry_safe (lock, tmp, &fdctx->lock_list, list) {
+ if (!is_same_lkowner (&lock->owner, owner)) {
+ list_del_init (&lock->list);
+ list_add_tail (&lock->list, &delete_list);
+ count++;
+ }
+ }
+ }
+ pthread_mutex_unlock (&fdctx->mutex);
+
+ list_for_each_entry_safe (lock, tmp, &delete_list, list) {
+ list_del_init (&lock->list);
+ destroy_client_lock (lock);
+ }
+
+ /* FIXME: Need to actually print the locks instead of count */
+ gf_log (this->name, GF_LOG_TRACE,
+ "Number of locks cleared=%d", count);
+
+out:
+ return ret;
+}
+
+int32_t
+delete_granted_locks_fd (clnt_fd_ctx_t *fdctx)
+{
+ client_posix_lock_t *lock = NULL;
+ client_posix_lock_t *tmp = NULL;
+ xlator_t *this = NULL;
+
+ struct list_head delete_list;
+ int ret = 0;
+ int count = 0;
+
+ INIT_LIST_HEAD (&delete_list);
+ this = THIS;
+
+ pthread_mutex_lock (&fdctx->mutex);
+ {
+ list_splice_init (&fdctx->lock_list, &delete_list);
+ }
+ pthread_mutex_unlock (&fdctx->mutex);
+
+ list_for_each_entry_safe (lock, tmp, &delete_list, list) {
+ list_del_init (&lock->list);
+ count++;
+ destroy_client_lock (lock);
+ }
+
+ /* FIXME: Need to actually print the locks instead of count */
+ gf_log (this->name, GF_LOG_TRACE,
+ "Number of locks cleared=%d", count);
+
+ return ret;
+}
+
+int32_t
+client_cmd_to_gf_cmd (int32_t cmd, int32_t *gf_cmd)
+{
+ int ret = 0;
+
+ if (cmd == F_GETLK || cmd == F_GETLK64)
+ *gf_cmd = GF_LK_GETLK;
+ else if (cmd == F_SETLK || cmd == F_SETLK64)
+ *gf_cmd = GF_LK_SETLK;
+ else if (cmd == F_SETLKW || cmd == F_SETLKW64)
+ *gf_cmd = GF_LK_SETLKW;
+ else if (cmd == F_RESLK_LCK)
+ *gf_cmd = GF_LK_RESLK_LCK;
+ else if (cmd == F_RESLK_LCKW)
+ *gf_cmd = GF_LK_RESLK_LCKW;
+ else if (cmd == F_RESLK_UNLCK)
+ *gf_cmd = GF_LK_RESLK_UNLCK;
+ else if (cmd == F_GETLK_FD)
+ *gf_cmd = GF_LK_GETLK_FD;
+ else
+ ret = -1;
+
+ return ret;
+
+}
+
+static client_posix_lock_t *
+new_client_lock (struct gf_flock *flock, gf_lkowner_t *owner,
+ int32_t cmd, fd_t *fd)
+{
+ client_posix_lock_t *new_lock = NULL;
+
+ new_lock = GF_CALLOC (1, sizeof (*new_lock),
+ gf_client_mt_clnt_lock_t);
+ if (!new_lock) {
+ goto out;
+ }
+
+ INIT_LIST_HEAD (&new_lock->list);
+ new_lock->fd = fd;
+ memcpy (&new_lock->user_flock, flock, sizeof (struct gf_flock));
+
+ new_lock->fl_type = flock->l_type;
+ new_lock->fl_start = flock->l_start;
+
+ if (flock->l_len == 0)
+ new_lock->fl_end = LLONG_MAX;
+ else
+ new_lock->fl_end = flock->l_start + flock->l_len - 1;
+
+ new_lock->owner = *owner;
+
+ new_lock->cmd = cmd; /* Not really useful */
+
+out:
+ return new_lock;
+}
+
+void
+client_save_number_fds (clnt_conf_t *conf, int count)
+{
+ LOCK (&conf->rec_lock);
+ {
+ conf->reopen_fd_count = count;
+ }
+ UNLOCK (&conf->rec_lock);
+}
+
+int
+client_add_lock_for_recovery (fd_t *fd, struct gf_flock *flock,
+ gf_lkowner_t *owner, int32_t cmd)
+{
+ clnt_fd_ctx_t *fdctx = NULL;
+ xlator_t *this = NULL;
+ client_posix_lock_t *lock = NULL;
+ clnt_conf_t *conf = NULL;
+
+ int ret = 0;
+
+ this = THIS;
+ conf = this->private;
+
+ pthread_mutex_lock (&conf->lock);
+ {
+ fdctx = this_fd_get_ctx (fd, this);
+ }
+ pthread_mutex_unlock (&conf->lock);
+
+ if (!fdctx) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to get fd context. sending EBADFD");
+ ret = -EBADFD;
+ goto out;
+ }
+
+ lock = new_client_lock (flock, owner, cmd, fd);
+ if (!lock) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ client_setlk (fdctx, lock);
+
+out:
+ return ret;
+
+}
+
+int32_t
+client_dump_locks (char *name, inode_t *inode,
+ dict_t *dict)
+{
+ int ret = 0;
+ dict_t *new_dict = NULL;
+ char dict_string[256];
+
+ GF_ASSERT (dict);
+ new_dict = dict;
+
+ ret = dump_client_locks (inode);
+ snprintf (dict_string, 256, "%d locks dumped in log file", ret);
+
+ ret = dict_set_dynstr(new_dict, CLIENT_DUMP_LOCKS, dict_string);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "could not set dict with %s", CLIENT_DUMP_LOCKS);
+ goto out;
+ }
+
+out:
+
+ return ret;
+}
+
+int32_t
+is_client_dump_locks_cmd (char *name)
+{
+ int ret = 0;
+
+ if (strcmp (name, CLIENT_DUMP_LOCKS) == 0)
+ ret = 1;
+
+ return ret;
+}
diff --git a/xlators/protocol/client/src/client-mem-types.h b/xlators/protocol/client/src/client-mem-types.h
new file mode 100644
index 000000000..f6573da2d
--- /dev/null
+++ b/xlators/protocol/client/src/client-mem-types.h
@@ -0,0 +1,25 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef __CLIENT_MEM_TYPES_H__
+#define __CLIENT_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_client_mem_types_ {
+ gf_client_mt_clnt_conf_t = gf_common_mt_end + 1,
+ gf_client_mt_clnt_req_buf_t,
+ gf_client_mt_clnt_fdctx_t,
+ gf_client_mt_clnt_lock_t,
+ gf_client_mt_clnt_fd_lk_local_t,
+ gf_client_mt_end,
+};
+#endif /* __CLIENT_MEM_TYPES_H__ */
diff --git a/xlators/protocol/client/src/client-protocol.c b/xlators/protocol/client/src/client-protocol.c
deleted file mode 100644
index 6572dbf6f..000000000
--- a/xlators/protocol/client/src/client-protocol.c
+++ /dev/null
@@ -1,6589 +0,0 @@
-/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-#include <inttypes.h>
-
-
-#include "glusterfs.h"
-#include "client-protocol.h"
-#include "compat.h"
-#include "dict.h"
-#include "protocol.h"
-#include "transport.h"
-#include "xlator.h"
-#include "logging.h"
-#include "timer.h"
-#include "defaults.h"
-#include "compat.h"
-#include "compat-errno.h"
-#include "statedump.h"
-
-#include <sys/resource.h>
-#include <inttypes.h>
-
-/* for default_*_cbk functions */
-#include "defaults.c"
-#include "saved-frames.h"
-#include "common-utils.h"
-
-int protocol_client_cleanup (transport_t *trans);
-int protocol_client_interpret (xlator_t *this, transport_t *trans,
- char *hdr_p, size_t hdrlen,
- struct iobuf *iobuf);
-int
-protocol_client_xfer (call_frame_t *frame, xlator_t *this, transport_t *trans,
- int type, int op,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iovec *vector, int count,
- struct iobref *iobref);
-
-static gf_op_t gf_fops[];
-static gf_op_t gf_mops[];
-static gf_op_t gf_cbks[];
-
-
-transport_t *
-client_channel (xlator_t *this, int id)
-{
- transport_t *trans = NULL;
- client_conf_t *conf = NULL;
- int i = 0;
- struct client_connection *conn = NULL;
-
- conf = this->private;
-
- trans = conf->transport[id];
- conn = trans->xl_private;
-
- if (conn->connected == 1)
- goto ret;
-
- for (i = 0; i < CHANNEL_MAX; i++) {
- trans = conf->transport[i];
- conn = trans->xl_private;
- if (conn->connected == 1)
- break;
- }
-
-ret:
- return trans;
-}
-
-
-client_fd_ctx_t *
-this_fd_del_ctx (fd_t *file, xlator_t *this)
-{
- int dict_ret = -1;
- uint64_t ctxaddr = 0;
-
- GF_VALIDATE_OR_GOTO ("client", this, out);
- GF_VALIDATE_OR_GOTO (this->name, file, out);
-
- dict_ret = fd_ctx_del (file, this, &ctxaddr);
-
- if (dict_ret < 0) {
- ctxaddr = 0;
- }
-
-out:
- return (client_fd_ctx_t *)(unsigned long)ctxaddr;
-}
-
-
-client_fd_ctx_t *
-this_fd_get_ctx (fd_t *file, xlator_t *this)
-{
- int dict_ret = -1;
- uint64_t ctxaddr = 0;
-
- GF_VALIDATE_OR_GOTO ("client", this, out);
- GF_VALIDATE_OR_GOTO (this->name, file, out);
-
- dict_ret = fd_ctx_get (file, this, &ctxaddr);
-
- if (dict_ret < 0) {
- ctxaddr = 0;
- }
-
-out:
- return (client_fd_ctx_t *)(unsigned long)ctxaddr;
-}
-
-
-static void
-this_fd_set_ctx (fd_t *file, xlator_t *this, loc_t *loc, client_fd_ctx_t *ctx)
-{
- uint64_t oldaddr = 0;
- int32_t ret = -1;
-
- GF_VALIDATE_OR_GOTO ("client", this, out);
- GF_VALIDATE_OR_GOTO (this->name, file, out);
-
- ret = fd_ctx_get (file, this, &oldaddr);
- if (ret >= 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s (%"PRId64"): trying duplicate remote fd set. ",
- loc->path, loc->inode->ino);
- }
-
- ret = fd_ctx_set (file, this, (uint64_t)(unsigned long)ctx);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s (%"PRId64"): failed to set remote fd",
- loc->path, loc->inode->ino);
- }
-out:
- return;
-}
-
-
-static int
-client_local_wipe (client_local_t *local)
-{
- if (local) {
- loc_wipe (&local->loc);
-
- if (local->fd)
- fd_unref (local->fd);
-
- free (local);
- }
-
- return 0;
-}
-
-/*
- * lookup_frame - lookup call frame corresponding to a given callid
- * @trans: transport object
- * @callid: call id of the frame
- *
- * not for external reference
- */
-
-static call_frame_t *
-lookup_frame (transport_t *trans, int32_t op, int8_t type, int64_t callid)
-{
- client_connection_t *conn = NULL;
- call_frame_t *frame = NULL;
-
- conn = trans->xl_private;
-
- pthread_mutex_lock (&conn->lock);
- {
- frame = saved_frames_get (conn->saved_frames,
- op, type, callid);
- }
- pthread_mutex_unlock (&conn->lock);
-
- return frame;
-}
-
-
-static void
-call_bail (void *data)
-{
- client_connection_t *conn = NULL;
- struct timeval current;
- transport_t *trans = NULL;
- struct list_head list;
- struct saved_frame *saved_frame = NULL;
- struct saved_frame *trav = NULL;
- struct saved_frame *tmp = NULL;
- call_frame_t *frame = NULL;
- gf_hdr_common_t hdr = {0, };
- char **gf_op_list = NULL;
- gf_op_t *gf_ops = NULL;
- struct tm frame_sent_tm;
- char frame_sent[32] = {0,};
- struct timeval timeout = {0,};
- gf_timer_cbk_t timer_cbk = NULL;
-
- GF_VALIDATE_OR_GOTO("client", data, out);
- trans = data;
-
- conn = trans->xl_private;
-
- gettimeofday (&current, NULL);
- INIT_LIST_HEAD (&list);
-
- pthread_mutex_lock (&conn->lock);
- {
- /* Chaining to get call-always functionality from
- call-once timer */
- if (conn->timer) {
- timer_cbk = conn->timer->cbk;
-
- timeout.tv_sec = 10;
- timeout.tv_usec = 0;
-
- gf_timer_call_cancel (trans->xl->ctx, conn->timer);
- conn->timer = gf_timer_call_after (trans->xl->ctx,
- timeout,
- timer_cbk,
- trans);
- if (conn->timer == NULL) {
- gf_log (trans->xl->name, GF_LOG_DEBUG,
- "Cannot create bailout timer");
- }
- }
-
- do {
- saved_frame =
- saved_frames_get_timedout (conn->saved_frames,
- GF_OP_TYPE_MOP_REQUEST,
- conn->frame_timeout,
- &current);
- if (saved_frame)
- list_add (&saved_frame->list, &list);
-
- } while (saved_frame);
-
- do {
- saved_frame =
- saved_frames_get_timedout (conn->saved_frames,
- GF_OP_TYPE_FOP_REQUEST,
- conn->frame_timeout,
- &current);
- if (saved_frame)
- list_add (&saved_frame->list, &list);
- } while (saved_frame);
-
- do {
- saved_frame =
- saved_frames_get_timedout (conn->saved_frames,
- GF_OP_TYPE_CBK_REQUEST,
- conn->frame_timeout,
- &current);
- if (saved_frame)
- list_add (&saved_frame->list, &list);
- } while (saved_frame);
- }
- pthread_mutex_unlock (&conn->lock);
-
- hdr.rsp.op_ret = hton32 (-1);
- hdr.rsp.op_errno = hton32 (ENOTCONN);
-
- list_for_each_entry_safe (trav, tmp, &list, list) {
- switch (trav->type)
- {
- case GF_OP_TYPE_FOP_REQUEST:
- gf_ops = gf_fops;
- gf_op_list = gf_fop_list;
- break;
- case GF_OP_TYPE_MOP_REQUEST:
- gf_ops = gf_mops;
- gf_op_list = gf_mop_list;
- break;
- case GF_OP_TYPE_CBK_REQUEST:
- gf_ops = gf_cbks;
- gf_op_list = gf_cbk_list;
- break;
- }
-
- localtime_r (&trav->saved_at.tv_sec, &frame_sent_tm);
- strftime (frame_sent, 32, "%Y-%m-%d %H:%M:%S", &frame_sent_tm);
-
- gf_log (trans->xl->name, GF_LOG_ERROR,
- "bailing out frame %s(%d) "
- "frame sent = %s. frame-timeout = %d",
- gf_op_list[trav->op], trav->op,
- frame_sent, conn->frame_timeout);
-
- hdr.type = hton32 (trav->type);
- hdr.op = hton32 (trav->op);
-
- frame = trav->frame;
-
- gf_ops[trav->op] (frame, &hdr, sizeof (hdr), NULL);
-
- list_del_init (&trav->list);
- FREE (trav);
- }
-out:
- return;
-}
-
-
-void
-save_frame (transport_t *trans, call_frame_t *frame,
- int32_t op, int8_t type, uint64_t callid)
-{
- client_connection_t *conn = NULL;
- struct timeval timeout = {0, };
-
-
- conn = trans->xl_private;
-
- saved_frames_put (conn->saved_frames, frame, op, type, callid);
-
- if (conn->timer == NULL) {
- timeout.tv_sec = 10;
- timeout.tv_usec = 0;
- conn->timer = gf_timer_call_after (trans->xl->ctx, timeout,
- call_bail, (void *) trans);
- }
-}
-
-
-
-void
-client_ping_timer_expired (void *data)
-{
- xlator_t *this = NULL;
- transport_t *trans = NULL;
- client_conf_t *conf = NULL;
- client_connection_t *conn = NULL;
- int disconnect = 0;
- int transport_activity = 0;
- struct timeval timeout = {0, };
- struct timeval current = {0, };
-
- trans = data;
- this = trans->xl;
- conf = this->private;
- conn = trans->xl_private;
-
- pthread_mutex_lock (&conn->lock);
- {
- if (conn->ping_timer)
- gf_timer_call_cancel (trans->xl->ctx,
- conn->ping_timer);
- gettimeofday (&current, NULL);
-
- pthread_mutex_lock (&conf->mutex);
- {
- if (((current.tv_sec - conf->last_received.tv_sec) <
- conn->ping_timeout)
- || ((current.tv_sec - conf->last_sent.tv_sec) <
- conn->ping_timeout)) {
- transport_activity = 1;
- }
- }
- pthread_mutex_unlock (&conf->mutex);
-
- if (transport_activity) {
- gf_log (this->name, GF_LOG_TRACE,
- "ping timer expired but transport activity "
- "detected - not bailing transport");
- conn->transport_activity = 0;
- timeout.tv_sec = conn->ping_timeout;
- timeout.tv_usec = 0;
-
- conn->ping_timer =
- gf_timer_call_after (trans->xl->ctx, timeout,
- client_ping_timer_expired,
- (void *) trans);
- if (conn->ping_timer == NULL)
- gf_log (this->name, GF_LOG_DEBUG,
- "unable to setup timer");
-
- } else {
- conn->ping_started = 0;
- conn->ping_timer = NULL;
- disconnect = 1;
- }
- }
- pthread_mutex_unlock (&conn->lock);
- if (disconnect) {
- gf_log (this->name, GF_LOG_ERROR,
- "Server %s has not responded in the last %d "
- "seconds, disconnecting.",
- conf->transport[0]->peerinfo.identifier,
- conn->ping_timeout);
-
- transport_disconnect (conf->transport[0]);
- transport_disconnect (conf->transport[1]);
- }
-}
-
-
-void
-client_start_ping (void *data)
-{
- xlator_t *this = NULL;
- transport_t *trans = NULL;
- client_conf_t *conf = NULL;
- client_connection_t *conn = NULL;
- int32_t ret = -1;
- gf_hdr_common_t *hdr = NULL;
- struct timeval timeout = {0, };
- call_frame_t *dummy_frame = NULL;
- size_t hdrlen = -1;
- gf_mop_ping_req_t *req = NULL;
-
-
- trans = data;
- this = trans->xl;
- conf = this->private;
- conn = trans->xl_private;
-
- pthread_mutex_lock (&conn->lock);
- {
- if ((conn->saved_frames->count == 0) ||
- !conn->connected) {
- /* using goto looked ugly here,
- * hence getting out this way */
- if (conn->ping_timer)
- gf_timer_call_cancel (trans->xl->ctx,
- conn->ping_timer);
- conn->ping_timer = NULL;
- conn->ping_started = 0;
- /* unlock */
- pthread_mutex_unlock (&conn->lock);
- return;
- }
-
- if (conn->saved_frames->count < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "saved_frames->count is %"PRId64,
- conn->saved_frames->count);
- conn->saved_frames->count = 0;
- }
- timeout.tv_sec = conn->ping_timeout;
- timeout.tv_usec = 0;
-
- if (conn->ping_timer)
- gf_timer_call_cancel (trans->xl->ctx,
- conn->ping_timer);
-
- conn->ping_timer =
- gf_timer_call_after (trans->xl->ctx, timeout,
- client_ping_timer_expired,
- (void *) trans);
-
- if (conn->ping_timer == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "unable to setup timer");
- } else {
- conn->ping_started = 1;
- }
- }
- pthread_mutex_unlock (&conn->lock);
-
- hdrlen = gf_hdr_len (req, 0);
- hdr = gf_hdr_new (req, 0);
-
- dummy_frame = create_frame (this, this->ctx->pool);
- dummy_frame->local = trans;
-
- ret = protocol_client_xfer (dummy_frame, this, trans,
- GF_OP_TYPE_MOP_REQUEST, GF_MOP_PING,
- hdr, hdrlen, NULL, 0, NULL);
-}
-
-
-int
-client_ping_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- xlator_t *this = NULL;
- transport_t *trans = NULL;
- client_connection_t *conn = NULL;
- struct timeval timeout = {0, };
- int op_ret = 0;
-
- trans = frame->local; frame->local = NULL;
- this = trans->xl;
- conn = trans->xl_private;
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
-
- if (op_ret == -1) {
- /* timer expired and transport bailed out */
- gf_log (this->name, GF_LOG_DEBUG, "timer must have expired");
- goto out;
- }
-
- pthread_mutex_lock (&conn->lock);
- {
- timeout.tv_sec = conn->ping_timeout;
- timeout.tv_usec = 0;
-
- gf_timer_call_cancel (trans->xl->ctx,
- conn->ping_timer);
-
- conn->ping_timer =
- gf_timer_call_after (trans->xl->ctx, timeout,
- client_start_ping, (void *)trans);
- if (conn->ping_timer == NULL)
- gf_log (this->name, GF_LOG_DEBUG,
- "gf_timer_call_after() returned NULL");
- }
- pthread_mutex_unlock (&conn->lock);
-out:
- STACK_DESTROY (frame->root);
- return 0;
-}
-
-
-int
-protocol_client_xfer (call_frame_t *frame, xlator_t *this, transport_t *trans,
- int type, int op,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iovec *vector, int count,
- struct iobref *iobref)
-{
- client_conf_t *conf = NULL;
- client_connection_t *conn = NULL;
- uint64_t callid = 0;
- int32_t ret = -1;
- int start_ping = 0;
- gf_hdr_common_t rsphdr = {0, };
-
- conf = this->private;
-
- if (!trans) {
- /* default to bulk op since it is 'safer' */
- trans = conf->transport[CHANNEL_BULK];
- }
- conn = trans->xl_private;
-
- pthread_mutex_lock (&conn->lock);
- {
- callid = ++conn->callid;
-
- hdr->callid = hton64 (callid);
- hdr->op = hton32 (op);
- hdr->type = hton32 (type);
-
- if (frame) {
- hdr->req.uid = hton32 (frame->root->uid);
- hdr->req.gid = hton32 (frame->root->gid);
- hdr->req.pid = hton32 (frame->root->pid);
- }
-
- if (conn->connected == 0)
- transport_connect (trans);
-
- ret = -1;
-
- if (conn->connected ||
- ((type == GF_OP_TYPE_MOP_REQUEST) &&
- (op == GF_MOP_SETVOLUME))) {
- ret = transport_submit (trans, (char *)hdr, hdrlen,
- vector, count, iobref);
- }
-
- if ((ret >= 0) && frame) {
- pthread_mutex_lock (&conf->mutex);
- {
- gettimeofday (&conf->last_sent, NULL);
- }
- pthread_mutex_unlock (&conf->mutex);
- save_frame (trans, frame, op, type, callid);
- }
-
- if (!conn->ping_started && (ret >= 0)) {
- start_ping = 1;
- }
- }
- pthread_mutex_unlock (&conn->lock);
-
- if (start_ping)
- client_start_ping ((void *) trans);
-
- if (frame && (ret < 0)) {
- rsphdr.op = op;
- rsphdr.rsp.op_ret = hton32 (-1);
- rsphdr.rsp.op_errno = hton32 (ENOTCONN);
-
- if (type == GF_OP_TYPE_FOP_REQUEST) {
- rsphdr.type = GF_OP_TYPE_FOP_REPLY;
- gf_fops[op] (frame, &rsphdr, sizeof (rsphdr), NULL);
- } else if (type == GF_OP_TYPE_MOP_REQUEST) {
- rsphdr.type = GF_OP_TYPE_MOP_REPLY;
- gf_mops[op] (frame, &rsphdr, sizeof (rsphdr), NULL);
- } else {
- rsphdr.type = GF_OP_TYPE_CBK_REPLY;
- gf_cbks[op] (frame, &rsphdr, sizeof (rsphdr), NULL);
- }
-
- FREE (hdr);
- }
-
- return ret;
-}
-
-
-
-/**
- * client_create - create function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @path: complete path to file
- * @flags: create flags
- * @mode: create mode
- *
- * external reference through client_protocol_xlator->fops->create
- */
-
-int
-client_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, fd_t *fd)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_create_req_t *req = NULL;
- size_t hdrlen = 0;
- size_t pathlen = 0;
- size_t baselen = 0;
- int32_t ret = -1;
- ino_t par = 0;
- client_local_t *local = NULL;
-
-
- local = calloc (1, sizeof (*local));
- GF_VALIDATE_OR_GOTO(this->name, local, unwind);
-
- local->fd = fd_ref (fd);
- loc_copy (&local->loc, loc);
-
- frame->local = local;
-
- pathlen = STRLEN_0(loc->path);
- baselen = STRLEN_0(loc->name);
-
- ret = inode_ctx_get (loc->parent, this, &par);
- if (loc->parent->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "CREATE %"PRId64"/%s (%s): failed to get remote inode "
- "number for parent inode",
- loc->parent->ino, loc->name, loc->path);
- }
-
- hdrlen = gf_hdr_len (req, pathlen + baselen);
- hdr = gf_hdr_new (req, pathlen + baselen);
- GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->flags = hton32 (gf_flags_from_flags (flags));
- req->mode = hton32 (mode);
- req->par = hton64 (par);
- strcpy (req->path, loc->path);
- strcpy (req->bname + pathlen, loc->name);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
- GF_OP_TYPE_FOP_REQUEST, GF_FOP_CREATE,
- hdr, hdrlen, NULL, 0, NULL);
- return ret;
-unwind:
- if (hdr)
- free (hdr);
- STACK_UNWIND(frame, -1, EINVAL, fd, NULL, NULL);
- return 0;
-
-}
-
-/**
- * client_open - open function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @loc: location of file
- * @flags: open flags
- * @mode: open modes
- *
- * external reference through client_protocol_xlator->fops->open
- */
-
-int
-client_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd)
-{
- int ret = -1;
- gf_hdr_common_t *hdr = NULL;
- size_t hdrlen = 0;
- gf_fop_open_req_t *req = NULL;
- size_t pathlen = 0;
- ino_t ino = 0;
- client_local_t *local = NULL;
-
- local = calloc (1, sizeof (*local));
- GF_VALIDATE_OR_GOTO(this->name, local, unwind);
-
- local->fd = fd_ref (fd);
- loc_copy (&local->loc, loc);
-
- frame->local = local;
-
- pathlen = STRLEN_0(loc->path);
-
- ret = inode_ctx_get (loc->inode, this, &ino);
- if (loc->inode->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "OPEN %"PRId64" (%s): "
- "failed to get remote inode number",
- loc->inode->ino, loc->path);
- }
-
- hdrlen = gf_hdr_len (req, pathlen);
- hdr = gf_hdr_new (req, pathlen);
- GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->ino = hton64 (ino);
- req->flags = hton32 (gf_flags_from_flags (flags));
- strcpy (req->path, loc->path);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
- GF_OP_TYPE_FOP_REQUEST, GF_FOP_OPEN,
- hdr, hdrlen, NULL, 0, NULL);
-
- return ret;
-unwind:
- if (hdr)
- free (hdr);
- STACK_UNWIND(frame, -1, EINVAL, fd);
- return 0;
-
-}
-
-
-/**
- * client_stat - stat function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @loc: location
- *
- * external reference through client_protocol_xlator->fops->stat
- */
-
-int
-client_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_stat_req_t *req = NULL;
- size_t hdrlen = -1;
- int32_t ret = -1;
- size_t pathlen = 0;
- ino_t ino = 0;
-
- pathlen = STRLEN_0(loc->path);
-
- ret = inode_ctx_get (loc->inode, this, &ino);
- if (loc->inode->ino && ret < 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "STAT %"PRId64" (%s): "
- "failed to get remote inode number",
- loc->inode->ino, loc->path);
- }
-
- hdrlen = gf_hdr_len (req, pathlen);
- hdr = gf_hdr_new (req, pathlen);
- GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->ino = hton64 (ino);
- strcpy (req->path, loc->path);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
- GF_OP_TYPE_FOP_REQUEST, GF_FOP_STAT,
- hdr, hdrlen, NULL, 0, NULL);
-
- return ret;
-unwind:
- if (hdr)
- free (hdr);
- STACK_UNWIND (frame, -1, EINVAL, NULL);
- return 0;
-
-}
-
-
-/**
- * client_readlink - readlink function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @loc: location
- * @size:
- *
- * external reference through client_protocol_xlator->fops->readlink
- */
-int
-client_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_readlink_req_t *req = NULL;
- size_t hdrlen = -1;
- int ret = -1;
- size_t pathlen = 0;
- ino_t ino = 0;
-
- pathlen = STRLEN_0(loc->path);
-
- ret = inode_ctx_get (loc->inode, this, &ino);
- if (loc->inode->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "READLINK %"PRId64" (%s): "
- "failed to get remote inode number",
- loc->inode->ino, loc->path);
- }
-
- hdrlen = gf_hdr_len (req, pathlen);
- hdr = gf_hdr_new (req, pathlen);
- GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->ino = hton64 (ino);
- req->size = hton32 (size);
- strcpy (req->path, loc->path);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
- GF_OP_TYPE_FOP_REQUEST, GF_FOP_READLINK,
- hdr, hdrlen, NULL, 0, NULL);
-
- return ret;
-unwind:
- if (hdr)
- free (hdr);
- STACK_UNWIND(frame, -1, EINVAL, NULL);
- return 0;
-
-}
-
-
-/**
- * client_mknod - mknod function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @path: pathname of node
- * @mode:
- * @dev:
- *
- * external reference through client_protocol_xlator->fops->mknod
- */
-int
-client_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dev_t dev)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_mknod_req_t *req = NULL;
- size_t hdrlen = -1;
- int ret = -1;
- size_t pathlen = 0;
- size_t baselen = 0;
- ino_t par = 0;
- client_local_t *local = NULL;
-
- local = calloc (1, sizeof (*local));
- GF_VALIDATE_OR_GOTO(this->name, local, unwind);
-
- loc_copy (&local->loc, loc);
-
- frame->local = local;
-
- pathlen = STRLEN_0(loc->path);
- baselen = STRLEN_0(loc->name);
- ret = inode_ctx_get (loc->parent, this, &par);
- if (loc->parent->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "MKNOD %"PRId64"/%s (%s): failed to get remote inode "
- "number for parent",
- loc->parent->ino, loc->name, loc->path);
- }
-
- hdrlen = gf_hdr_len (req, pathlen + baselen);
- hdr = gf_hdr_new (req, pathlen + baselen);
- GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->par = hton64 (par);
- req->mode = hton32 (mode);
- req->dev = hton64 (dev);
- strcpy (req->path, loc->path);
- strcpy (req->bname + pathlen, loc->name);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
- GF_OP_TYPE_FOP_REQUEST, GF_FOP_MKNOD,
- hdr, hdrlen, NULL, 0, NULL);
-
- return ret;
-unwind:
- if (hdr)
- free (hdr);
- STACK_UNWIND(frame, -1, EINVAL, loc->inode, NULL);
- return 0;
-
-}
-
-
-/**
- * client_mkdir - mkdir function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @path: pathname of directory
- * @mode:
- *
- * external reference through client_protocol_xlator->fops->mkdir
- */
-int
-client_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_mkdir_req_t *req = NULL;
- size_t hdrlen = -1;
- int ret = -1;
- size_t pathlen = 0;
- size_t baselen = 0;
- ino_t par = 0;
- client_local_t *local = NULL;
-
- local = calloc (1, sizeof (*local));
- GF_VALIDATE_OR_GOTO(this->name, local, unwind);
-
- loc_copy (&local->loc, loc);
-
- frame->local = local;
-
- pathlen = STRLEN_0(loc->path);
- baselen = STRLEN_0(loc->name);
- ret = inode_ctx_get (loc->parent, this, &par);
- if (loc->parent->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "MKDIR %"PRId64"/%s (%s): failed to get remote inode "
- "number for parent",
- loc->parent->ino, loc->name, loc->path);
- }
-
- hdrlen = gf_hdr_len (req, pathlen + baselen);
- hdr = gf_hdr_new (req, pathlen + baselen);
- GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->par = hton64 (par);
- req->mode = hton32 (mode);
- strcpy (req->path, loc->path);
- strcpy (req->bname + pathlen, loc->name);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
- GF_OP_TYPE_FOP_REQUEST, GF_FOP_MKDIR,
- hdr, hdrlen, NULL, 0, NULL);
-
- return ret;
-unwind:
- if (hdr)
- free (hdr);
- STACK_UNWIND(frame, -1, EINVAL, loc->inode, NULL);
- return 0;
-
-}
-
-/**
- * client_unlink - unlink function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @loc: location of file
- *
- * external reference through client_protocol_xlator->fops->unlink
- */
-
-int
-client_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_unlink_req_t *req = NULL;
- size_t hdrlen = -1;
- int ret = -1;
- size_t pathlen = 0;
- size_t baselen = 0;
- ino_t par = 0;
-
- pathlen = STRLEN_0(loc->path);
- baselen = STRLEN_0(loc->name);
- ret = inode_ctx_get (loc->parent, this, &par);
- if (loc->parent->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "UNLINK %"PRId64"/%s (%s): failed to get remote inode "
- "number for parent",
- loc->parent->ino, loc->name, loc->path);
- }
-
- hdrlen = gf_hdr_len (req, pathlen + baselen);
- hdr = gf_hdr_new (req, pathlen + baselen);
- GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->par = hton64 (par);
- strcpy (req->path, loc->path);
- strcpy (req->bname + pathlen, loc->name);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_BULK),
- GF_OP_TYPE_FOP_REQUEST, GF_FOP_UNLINK,
- hdr, hdrlen, NULL, 0, NULL);
-
- return ret;
-unwind:
- if (hdr)
- free (hdr);
- STACK_UNWIND(frame, -1, EINVAL);
- return 0;
-
-}
-
-/**
- * client_rmdir - rmdir function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @loc: location
- *
- * external reference through client_protocol_xlator->fops->rmdir
- */
-
-int
-client_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_rmdir_req_t *req = NULL;
- size_t hdrlen = -1;
- int ret = -1;
- size_t pathlen = 0;
- size_t baselen = 0;
- ino_t par = 0;
-
- pathlen = STRLEN_0(loc->path);
- baselen = STRLEN_0(loc->name);
- ret = inode_ctx_get (loc->parent, this, &par);
- if (loc->parent->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "RMDIR %"PRId64"/%s (%s): failed to get remote inode "
- "number for parent",
- loc->parent->ino, loc->name, loc->path);
- }
-
- hdrlen = gf_hdr_len (req, pathlen + baselen);
- hdr = gf_hdr_new (req, pathlen + baselen);
- GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->par = hton64 (par);
- strcpy (req->path, loc->path);
- strcpy (req->bname + pathlen, loc->name);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_BULK),
- GF_OP_TYPE_FOP_REQUEST, GF_FOP_RMDIR,
- hdr, hdrlen, NULL, 0, NULL);
-
- return ret;
-unwind:
- if (hdr)
- free (hdr);
- STACK_UNWIND(frame, -1, EINVAL);
- return 0;
-
-}
-
-
-/**
- * client_symlink - symlink function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @oldpath: pathname of target
- * @newpath: pathname of symlink
- *
- * external reference through client_protocol_xlator->fops->symlink
- */
-
-int
-client_symlink (call_frame_t *frame, xlator_t *this, const char *linkname,
- loc_t *loc)
-{
- int ret = -1;
- gf_hdr_common_t *hdr = NULL;
- gf_fop_symlink_req_t *req = NULL;
- size_t hdrlen = 0;
- size_t pathlen = 0;
- size_t newlen = 0;
- size_t baselen = 0;
- ino_t par = 0;
- client_local_t *local = NULL;
-
- local = calloc (1, sizeof (*local));
- GF_VALIDATE_OR_GOTO(this->name, local, unwind);
-
- loc_copy (&local->loc, loc);
-
- frame->local = local;
-
- pathlen = STRLEN_0 (loc->path);
- baselen = STRLEN_0 (loc->name);
- newlen = STRLEN_0 (linkname);
- ret = inode_ctx_get (loc->parent, this, &par);
- if (loc->parent->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "SYMLINK %"PRId64"/%s (%s): failed to get remote inode"
- " number parent",
- loc->parent->ino, loc->name, loc->path);
- }
-
- hdrlen = gf_hdr_len (req, pathlen + baselen + newlen);
- hdr = gf_hdr_new (req, pathlen + baselen + newlen);
- GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->par = hton64 (par);
- strcpy (req->path, loc->path);
- strcpy (req->bname + pathlen, loc->name);
- strcpy (req->linkname + pathlen + baselen, linkname);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
- GF_OP_TYPE_FOP_REQUEST, GF_FOP_SYMLINK,
- hdr, hdrlen, NULL, 0, NULL);
- return ret;
-unwind:
- if (hdr)
- free (hdr);
- STACK_UNWIND(frame, -1, EINVAL, loc->inode, NULL);
- return 0;
-
-}
-
-/**
- * client_rename - rename function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @oldloc: location of old pathname
- * @newloc: location of new pathname
- *
- * external reference through client_protocol_xlator->fops->rename
- */
-
-int
-client_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
- loc_t *newloc)
-{
- int ret = -1;
- gf_hdr_common_t *hdr = NULL;
- gf_fop_rename_req_t *req = NULL;
- size_t hdrlen = 0;
- size_t oldpathlen = 0;
- size_t oldbaselen = 0;
- size_t newpathlen = 0;
- size_t newbaselen = 0;
- ino_t oldpar = 0;
- ino_t newpar = 0;
-
- oldpathlen = STRLEN_0(oldloc->path);
- oldbaselen = STRLEN_0(oldloc->name);
- newpathlen = STRLEN_0(newloc->path);
- newbaselen = STRLEN_0(newloc->name);
- ret = inode_ctx_get (oldloc->parent, this, &oldpar);
- if (oldloc->parent->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "RENAME %"PRId64"/%s (%s): failed to get remote inode "
- "number for source parent",
- oldloc->parent->ino, oldloc->name, oldloc->path);
- }
-
- ret = inode_ctx_get (newloc->parent, this, &newpar);
- if (newloc->parent->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "CREATE %"PRId64"/%s (%s): failed to get remote inode "
- "number for destination parent",
- newloc->parent->ino, newloc->name, newloc->path);
- }
-
- hdrlen = gf_hdr_len (req, (oldpathlen + oldbaselen +
- newpathlen + newbaselen));
- hdr = gf_hdr_new (req, (oldpathlen + oldbaselen +
- newpathlen + newbaselen));
-
- GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->oldpar = hton64 (oldpar);
- req->newpar = hton64 (newpar);
-
- strcpy (req->oldpath, oldloc->path);
- strcpy (req->oldbname + oldpathlen, oldloc->name);
- strcpy (req->newpath + oldpathlen + oldbaselen, newloc->path);
- strcpy (req->newbname + oldpathlen + oldbaselen + newpathlen,
- newloc->name);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
- GF_OP_TYPE_FOP_REQUEST, GF_FOP_RENAME,
- hdr, hdrlen, NULL, 0, NULL);
- return ret;
-unwind:
- if (hdr)
- free (hdr);
- STACK_UNWIND(frame, -1, EINVAL, NULL);
- return 0;
-
-}
-
-/**
- * client_link - link function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @oldloc: location of old pathname
- * @newpath: new pathname
- *
- * external reference through client_protocol_xlator->fops->link
- */
-
-int
-client_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc)
-{
- int ret = -1;
- gf_hdr_common_t *hdr = NULL;
- gf_fop_link_req_t *req = NULL;
- size_t hdrlen = 0;
- size_t oldpathlen = 0;
- size_t newpathlen = 0;
- size_t newbaselen = 0;
- ino_t oldino = 0;
- ino_t newpar = 0;
- client_local_t *local = NULL;
-
- local = calloc (1, sizeof (*local));
- GF_VALIDATE_OR_GOTO(this->name, local, unwind);
-
- loc_copy (&local->loc, oldloc);
-
- frame->local = local;
-
- oldpathlen = STRLEN_0(oldloc->path);
- newpathlen = STRLEN_0(newloc->path);
- newbaselen = STRLEN_0(newloc->name);
-
- ret = inode_ctx_get (oldloc->inode, this, &oldino);
- if (oldloc->inode->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "LINK %"PRId64"/%s (%s) ==> %"PRId64" (%s): "
- "failed to get remote inode number for source inode",
- newloc->parent->ino, newloc->name, newloc->path,
- oldloc->ino, oldloc->path);
- }
-
- ret = inode_ctx_get (newloc->parent, this, &newpar);
- if (newloc->parent->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "LINK %"PRId64"/%s (%s) ==> %"PRId64" (%s): "
- "failed to get remote inode number destination parent",
- newloc->parent->ino, newloc->name, newloc->path,
- oldloc->ino, oldloc->path);
- }
-
- hdrlen = gf_hdr_len (req, oldpathlen + newpathlen + newbaselen);
- hdr = gf_hdr_new (req, oldpathlen + newpathlen + newbaselen);
- GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- strcpy (req->oldpath, oldloc->path);
- strcpy (req->newpath + oldpathlen, newloc->path);
- strcpy (req->newbname + oldpathlen + newpathlen, newloc->name);
-
- req->oldino = hton64 (oldino);
- req->newpar = hton64 (newpar);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
- GF_OP_TYPE_FOP_REQUEST, GF_FOP_LINK,
- hdr, hdrlen, NULL, 0, NULL);
- return ret;
-unwind:
- if (hdr)
- free (hdr);
- STACK_UNWIND(frame, -1, EINVAL, oldloc->inode, NULL);
- return 0;
-}
-
-/**
- * client_chmod - chmod function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @loc: location
- * @mode:
- *
- * external reference through client_protocol_xlator->fops->chmod
- */
-
-int
-client_chmod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_chmod_req_t *req = NULL;
- size_t hdrlen = -1;
- int ret = -1;
- size_t pathlen = 0;
- ino_t ino = 0;
-
- pathlen = STRLEN_0(loc->path);
-
- ret = inode_ctx_get (loc->inode, this, &ino);
- if (loc->inode->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "CHMOD %"PRId64" (%s): "
- "failed to get remote inode number",
- loc->inode->ino, loc->path);
- }
-
- hdrlen = gf_hdr_len (req, pathlen);
- hdr = gf_hdr_new (req, pathlen);
- GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->ino = hton64 (ino);
- req->mode = hton32 (mode);
- strcpy (req->path, loc->path);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
- GF_OP_TYPE_FOP_REQUEST, GF_FOP_CHMOD,
- hdr, hdrlen, NULL, 0, NULL);
-
- return ret;
-unwind:
- if (hdr)
- free (hdr);
- STACK_UNWIND(frame, -1, EINVAL, NULL);
- return 0;
-
-}
-
-/**
- * client_chown - chown function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @loc: location
- * @uid: uid of new owner
- * @gid: gid of new owner group
- *
- * external reference through client_protocol_xlator->fops->chown
- */
-
-int
-client_chown (call_frame_t *frame, xlator_t *this, loc_t *loc, uid_t uid,
- gid_t gid)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_chown_req_t *req = NULL;
- size_t hdrlen = -1;
- int ret = -1;
- size_t pathlen = 0;
- ino_t ino = 0;
-
- pathlen = STRLEN_0(loc->path);
-
- ret = inode_ctx_get (loc->inode, this, &ino);
- if (loc->inode->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "CHOWN %"PRId64" (%s): "
- "failed to get remote inode number",
- loc->inode->ino, loc->path);
- }
-
- hdrlen = gf_hdr_len (req, pathlen);
- hdr = gf_hdr_new (req, pathlen);
- GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->ino = hton64 (ino);
- req->uid = hton32 (uid);
- req->gid = hton32 (gid);
- strcpy (req->path, loc->path);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
- GF_OP_TYPE_FOP_REQUEST, GF_FOP_CHOWN,
- hdr, hdrlen, NULL, 0, NULL);
-
- return ret;
-unwind:
- if (hdr)
- free (hdr);
- STACK_UNWIND(frame, -1, EINVAL, NULL);
- return 0;
-
-}
-
-/**
- * client_truncate - truncate function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @loc: location
- * @offset:
- *
- * external reference through client_protocol_xlator->fops->truncate
- */
-
-int
-client_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_truncate_req_t *req = NULL;
- size_t hdrlen = -1;
- int ret = -1;
- size_t pathlen = 0;
- ino_t ino = 0;
-
- pathlen = STRLEN_0(loc->path);
- ret = inode_ctx_get (loc->inode, this, &ino);
- if (loc->inode->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "TRUNCATE %"PRId64" (%s): "
- "failed to get remote inode number",
- loc->inode->ino, loc->path);
- }
-
- hdrlen = gf_hdr_len (req, pathlen);
- hdr = gf_hdr_new (req, pathlen);
- GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->ino = hton64 (ino);
- req->offset = hton64 (offset);
- strcpy (req->path, loc->path);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_BULK),
- GF_OP_TYPE_FOP_REQUEST, GF_FOP_TRUNCATE,
- hdr, hdrlen, NULL, 0, NULL);
-
- return ret;
-unwind:
- if (hdr)
- free (hdr);
- STACK_UNWIND(frame, -1, EINVAL, NULL);
- return 0;
-
-}
-
-/**
- * client_utimes - utimes function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @loc: location
- * @tvp:
- *
- * external reference through client_protocol_xlator->fops->utimes
- */
-
-int
-client_utimens (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct timespec *tvp)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_utimens_req_t *req = NULL;
- size_t hdrlen = -1;
- int ret = -1;
- size_t pathlen = 0;
- ino_t ino = 0;
-
- pathlen = STRLEN_0(loc->path);
- ret = inode_ctx_get (loc->inode, this, &ino);
- if (loc->inode->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "UTIMENS %"PRId64" (%s): "
- "failed to get remote inode number",
- loc->inode->ino, loc->path);
- }
-
- hdrlen = gf_hdr_len (req, pathlen);
- hdr = gf_hdr_new (req, pathlen);
- GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->ino = hton64 (ino);
- gf_timespec_from_timespec (req->tv, tvp);
- strcpy (req->path, loc->path);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
- GF_OP_TYPE_FOP_REQUEST, GF_FOP_UTIMENS,
- hdr, hdrlen, NULL, 0, NULL);
-
- return ret;
-unwind:
- if (hdr)
- free (hdr);
- STACK_UNWIND(frame, -1, EINVAL, NULL);
- return 0;
-
-}
-
-/**
- * client_readv - readv function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @fd: file descriptor structure
- * @size:
- * @offset:
- *
- * external reference through client_protocol_xlator->fops->readv
- */
-
-int
-client_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_read_req_t *req = NULL;
- size_t hdrlen = 0;
- int64_t remote_fd = -1;
- int ret = -1;
- client_fd_ctx_t *fdctx = NULL;
- client_conf_t *conf = NULL;
-
- conf = this->private;
-
- pthread_mutex_lock (&conf->mutex);
- {
- fdctx = this_fd_get_ctx (fd, this);
- }
- pthread_mutex_unlock (&conf->mutex);
-
- if (fdctx == NULL) {
- gf_log (this->name, GF_LOG_TRACE,
- "(%"PRId64"): failed to get fd ctx, EBADFD",
- fd->inode->ino);
- STACK_UNWIND (frame, -1, EBADFD, NULL, 0, NULL);
- return 0;
- }
- remote_fd = fdctx->remote_fd;
- hdrlen = gf_hdr_len (req, 0);
- hdr = gf_hdr_new (req, 0);
- GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->fd = hton64 (remote_fd);
- req->size = hton32 (size);
- req->offset = hton64 (offset);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_BULK),
- GF_OP_TYPE_FOP_REQUEST, GF_FOP_READ,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-unwind:
- if (hdr)
- free (hdr);
- STACK_UNWIND(frame, -1, EINVAL, NULL, 0, NULL);
- return 0;
-
-}
-
-/**
- * client_writev - writev function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @fd: file descriptor structure
- * @vector:
- * @count:
- * @offset:
- *
- * external reference through client_protocol_xlator->fops->writev
- */
-
-int
-client_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int32_t count, off_t offset,
- struct iobref *iobref)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_write_req_t *req = NULL;
- size_t hdrlen = 0;
- int64_t remote_fd = -1;
- int ret = -1;
- client_fd_ctx_t *fdctx = NULL;
- client_conf_t *conf = NULL;
-
- conf = this->private;
-
- pthread_mutex_lock (&conf->mutex);
- {
- fdctx = this_fd_get_ctx (fd, this);
- }
- pthread_mutex_unlock (&conf->mutex);
-
- if (fdctx == NULL) {
- gf_log (this->name, GF_LOG_TRACE,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- fd->inode->ino);
- STACK_UNWIND (frame, -1, EBADFD, NULL);
- return 0;
- }
- remote_fd = fdctx->remote_fd;
- hdrlen = gf_hdr_len (req, 0);
- hdr = gf_hdr_new (req, 0);
- GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->fd = hton64 (remote_fd);
- req->size = hton32 (iov_length (vector, count));
- req->offset = hton64 (offset);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_BULK),
- GF_OP_TYPE_FOP_REQUEST, GF_FOP_WRITE,
- hdr, hdrlen, vector, count, iobref);
- return ret;
-unwind:
- if (hdr)
- free (hdr);
- STACK_UNWIND(frame, -1, EINVAL, NULL);
- return 0;
-
-}
-
-
-/**
- * client_statfs - statfs function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @loc: location
- *
- * external reference through client_protocol_xlator->fops->statfs
- */
-
-int
-client_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_statfs_req_t *req = NULL;
- size_t hdrlen = -1;
- int ret = -1;
- size_t pathlen = 0;
- ino_t ino = 0;
-
- pathlen = STRLEN_0(loc->path);
-
- if (loc->inode) {
- ret = inode_ctx_get (loc->inode, this, &ino);
- if (loc->inode->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "STATFS %"PRId64" (%s): "
- "failed to get remote inode number",
- loc->inode->ino, loc->path);
- }
- }
-
- hdrlen = gf_hdr_len (req, pathlen);
- hdr = gf_hdr_new (req, pathlen);
- GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->ino = hton64 (ino);
- strcpy (req->path, loc->path);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
- GF_OP_TYPE_FOP_REQUEST, GF_FOP_STATFS,
- hdr, hdrlen, NULL, 0, NULL);
-
- return ret;
-unwind:
- if (hdr)
- free (hdr);
- STACK_UNWIND(frame, -1, EINVAL, NULL);
- return 0;
-
-}
-
-
-/**
- * client_flush - flush function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @fd: file descriptor structure
- *
- * external reference through client_protocol_xlator->fops->flush
- */
-
-int
-client_flush (call_frame_t *frame, xlator_t *this, fd_t *fd)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_flush_req_t *req = NULL;
- size_t hdrlen = 0;
- int64_t remote_fd = -1;
- int ret = -1;
- client_fd_ctx_t *fdctx = NULL;
- client_conf_t *conf = NULL;
-
- conf = this->private;
-
- pthread_mutex_lock (&conf->mutex);
- {
- fdctx = this_fd_get_ctx (fd, this);
- }
- pthread_mutex_unlock (&conf->mutex);
-
- if (fdctx == NULL) {
- gf_log (this->name, GF_LOG_TRACE,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- fd->inode->ino);
- STACK_UNWIND (frame, -1, EBADFD);
- return 0;
- }
- remote_fd = fdctx->remote_fd;
- hdrlen = gf_hdr_len (req, 0);
- hdr = gf_hdr_new (req, 0);
- GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->fd = hton64 (remote_fd);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_BULK),
- GF_OP_TYPE_FOP_REQUEST, GF_FOP_FLUSH,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-unwind:
- if (hdr)
- free (hdr);
- STACK_UNWIND(frame, -1, EINVAL);
- return 0;
-
-}
-
-/**
- * client_fsync - fsync function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @fd: file descriptor structure
- * @flags:
- *
- * external reference through client_protocol_xlator->fops->fsync
- */
-
-int
-client_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_fsync_req_t *req = NULL;
- size_t hdrlen = 0;
- int64_t remote_fd = -1;
- int32_t ret = -1;
- client_fd_ctx_t *fdctx = NULL;
- client_conf_t *conf = NULL;
-
- conf = this->private;
-
- pthread_mutex_lock (&conf->mutex);
- {
- fdctx = this_fd_get_ctx (fd, this);
- }
- pthread_mutex_unlock (&conf->mutex);
-
- if (fdctx == NULL) {
- gf_log (this->name, GF_LOG_TRACE,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- fd->inode->ino);
- STACK_UNWIND(frame, -1, EBADFD);
- return 0;
- }
- remote_fd = fdctx->remote_fd;
- hdrlen = gf_hdr_len (req, 0);
- hdr = gf_hdr_new (req, 0);
- GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->fd = hton64 (remote_fd);
- req->data = hton32 (flags);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_BULK),
- GF_OP_TYPE_FOP_REQUEST, GF_FOP_FSYNC,
- hdr, hdrlen, NULL, 0, NULL);
-
- return ret;
-unwind:
- if (hdr)
- free (hdr);
- STACK_UNWIND(frame, -1, EINVAL);
- return 0;
-
-}
-
-int
-client_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t flags, dict_t *dict)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_xattrop_req_t *req = NULL;
- size_t hdrlen = 0;
- size_t dict_len = 0;
- int32_t ret = -1;
- size_t pathlen = 0;
- ino_t ino = 0;
- char *buf = NULL;
-
- GF_VALIDATE_OR_GOTO("client", this, unwind);
-
- GF_VALIDATE_OR_GOTO(this->name, loc, unwind);
-
- if (dict) {
- ret = dict_allocate_and_serialize (dict, &buf, &dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to get serialized length of dict(%p)",
- dict);
- goto unwind;
- }
- }
-
- pathlen = STRLEN_0(loc->path);
-
- ret = inode_ctx_get (loc->inode, this, &ino);
- if (loc->inode->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "XATTROP %"PRId64" (%s): "
- "failed to get remote inode number",
- loc->inode->ino, loc->path);
- }
-
- hdrlen = gf_hdr_len (req, dict_len + pathlen);
- hdr = gf_hdr_new (req, dict_len + pathlen);
- GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->flags = hton32 (flags);
- req->dict_len = hton32 (dict_len);
- if (dict) {
- memcpy (req->dict, buf, dict_len);
- FREE (buf);
- }
-
- req->ino = hton64 (ino);
- strcpy (req->path + dict_len, loc->path);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_BULK),
- GF_OP_TYPE_FOP_REQUEST, GF_FOP_XATTROP,
- hdr, hdrlen, NULL, 0, NULL);
- return ret;
-unwind:
- if (hdr)
- free (hdr);
-
- STACK_UNWIND(frame, -1, EINVAL, NULL);
- return 0;
-}
-
-
-int
-client_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
- gf_xattrop_flags_t flags, dict_t *dict)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_fxattrop_req_t *req = NULL;
- size_t hdrlen = 0;
- size_t dict_len = 0;
- int64_t remote_fd = -1;
- int32_t ret = -1;
- ino_t ino = 0;
- client_fd_ctx_t *fdctx = NULL;
- client_conf_t *conf = NULL;
-
- conf = this->private;
-
- if (dict) {
- dict_len = dict_serialized_length (dict);
- if (dict_len < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to get serialized length of dict(%p)",
- dict);
- goto unwind;
- }
- }
-
- if (fd) {
- pthread_mutex_lock (&conf->mutex);
- {
- fdctx = this_fd_get_ctx (fd, this);
- }
- pthread_mutex_unlock (&conf->mutex);
-
- if (fdctx == NULL) {
- gf_log (this->name, GF_LOG_TRACE,
- "(%"PRId64"): failed to get fd ctx. "
- "returning EBADFD",
- fd->inode->ino);
- goto unwind;
- }
- ino = fd->inode->ino;
- remote_fd = fdctx->remote_fd;
- }
-
- hdrlen = gf_hdr_len (req, dict_len);
- hdr = gf_hdr_new (req, dict_len);
- GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->flags = hton32 (flags);
- req->dict_len = hton32 (dict_len);
- if (dict) {
- ret = dict_serialize (dict, req->dict);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to serialize dictionary(%p)",
- dict);
- goto unwind;
- }
- }
- req->fd = hton64 (remote_fd);
- req->ino = hton64 (ino);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_BULK),
- GF_OP_TYPE_FOP_REQUEST, GF_FOP_FXATTROP,
- hdr, hdrlen, NULL, 0, NULL);
- return ret;
-unwind:
- if (hdr)
- free (hdr);
-
- STACK_UNWIND (frame, -1, EBADFD, NULL);
- return 0;
-
-}
-
-/**
- * client_setxattr - setxattr function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @loc: location
- * @dict: dictionary which contains key:value to be set.
- * @flags:
- *
- * external reference through client_protocol_xlator->fops->setxattr
- */
-
-int
-client_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *dict, int32_t flags)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_setxattr_req_t *req = NULL;
- size_t hdrlen = 0;
- size_t dict_len = 0;
- int ret = -1;
- size_t pathlen = 0;
- ino_t ino = 0;
-
- dict_len = dict_serialized_length (dict);
- if (dict_len < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to get serialized length of dict(%p)",
- dict);
- goto unwind;
- }
-
- pathlen = STRLEN_0(loc->path);
-
- ret = inode_ctx_get (loc->inode, this, &ino);
- if (loc->inode->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "SETXATTR %"PRId64" (%s): "
- "failed to get remote inode number",
- loc->inode->ino, loc->path);
- }
-
- hdrlen = gf_hdr_len (req, dict_len + pathlen);
- hdr = gf_hdr_new (req, dict_len + pathlen);
- GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->ino = hton64 (ino);
- req->flags = hton32 (flags);
- req->dict_len = hton32 (dict_len);
-
- ret = dict_serialize (dict, req->dict);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to serialize dictionary(%p)",
- dict);
- goto unwind;
- }
-
- strcpy (req->path + dict_len, loc->path);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_BULK),
- GF_OP_TYPE_FOP_REQUEST, GF_FOP_SETXATTR,
- hdr, hdrlen, NULL, 0, NULL);
- return ret;
-unwind:
- if (hdr)
- free (hdr);
-
- STACK_UNWIND(frame, -1, EINVAL);
- return 0;
-}
-
-/**
- * client_fsetxattr - fsetxattr function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @fd: fd
- * @dict: dictionary which contains key:value to be set.
- * @flags:
- *
- * external reference through client_protocol_xlator->fops->fsetxattr
- */
-
-int
-client_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- dict_t *dict, int32_t flags)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_fsetxattr_req_t *req = NULL;
- size_t hdrlen = 0;
- size_t dict_len = 0;
- ino_t ino;
- int ret = -1;
- int64_t remote_fd = -1;
- client_fd_ctx_t *fdctx = NULL;
- client_conf_t *conf = NULL;
-
- conf = this->private;
-
- dict_len = dict_serialized_length (dict);
- if (dict_len < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to get serialized length of dict(%p)",
- dict);
- goto unwind;
- }
-
- pthread_mutex_lock (&conf->mutex);
- {
- fdctx = this_fd_get_ctx (fd, this);
- }
- pthread_mutex_unlock (&conf->mutex);
-
- if (fdctx == NULL) {
- gf_log (this->name, GF_LOG_TRACE,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- fd->inode->ino);
- goto unwind;
- }
- ino = fd->inode->ino;
- remote_fd = fdctx->remote_fd;
-
- hdrlen = gf_hdr_len (req, dict_len);
- hdr = gf_hdr_new (req, dict_len);
-
- GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->ino = hton64 (ino);
- req->fd = hton64 (remote_fd);
- req->flags = hton32 (flags);
- req->dict_len = hton32 (dict_len);
-
- ret = dict_serialize (dict, req->dict);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to serialize dictionary(%p)",
- dict);
- goto unwind;
- }
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_BULK),
- GF_OP_TYPE_FOP_REQUEST, GF_FOP_FSETXATTR,
- hdr, hdrlen, NULL, 0, NULL);
- return ret;
-unwind:
- if (hdr)
- free (hdr);
-
- STACK_UNWIND(frame, -1, EINVAL);
- return 0;
-}
-
-/**
- * client_getxattr - getxattr function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @loc: location structure
- *
- * external reference through client_protocol_xlator->fops->getxattr
- */
-
-int
-client_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name)
-{
- int ret = -1;
- gf_hdr_common_t *hdr = NULL;
- gf_fop_getxattr_req_t *req = NULL;
- size_t hdrlen = 0;
- size_t pathlen = 0;
- size_t namelen = 0;
- ino_t ino = 0;
-
- pathlen = STRLEN_0(loc->path);
- if (name)
- namelen = STRLEN_0(name);
-
- ret = inode_ctx_get (loc->inode, this, &ino);
- if (loc->inode->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "GETXATTR %"PRId64" (%s): "
- "failed to get remote inode number",
- loc->inode->ino, loc->path);
- }
-
- hdrlen = gf_hdr_len (req, pathlen + namelen);
- hdr = gf_hdr_new (req, pathlen + namelen);
- GF_VALIDATE_OR_GOTO(frame->this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->ino = hton64 (ino);
- req->namelen = hton32 (namelen);
- strcpy (req->path, loc->path);
- if (name)
- strcpy (req->name + pathlen, name);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
- GF_OP_TYPE_FOP_REQUEST, GF_FOP_GETXATTR,
- hdr, hdrlen, NULL, 0, NULL);
- return ret;
-unwind:
- if (hdr)
- free (hdr);
-
- STACK_UNWIND(frame, -1, EINVAL, NULL);
- return 0;
-}
-
-
-/**
- * client_fgetxattr - fgetxattr function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @fd: fd
- *
- * external reference through client_protocol_xlator->fops->fgetxattr
- */
-
-int
-client_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name)
-{
- int ret = -1;
- gf_hdr_common_t *hdr = NULL;
- gf_fop_fgetxattr_req_t *req = NULL;
- size_t hdrlen = 0;
- int64_t remote_fd = -1;
- size_t namelen = 0;
- ino_t ino = 0;
- client_fd_ctx_t *fdctx = NULL;
- client_conf_t *conf = NULL;
-
- if (name)
- namelen = STRLEN_0(name);
-
- conf = this->private;
-
- pthread_mutex_lock (&conf->mutex);
- {
- fdctx = this_fd_get_ctx (fd, this);
- }
- pthread_mutex_unlock (&conf->mutex);
-
- if (fdctx == NULL) {
- gf_log (this->name, GF_LOG_TRACE,
- "(%"PRId64"): failed to get remote fd. EBADFD",
- fd->inode->ino);
- goto unwind;
- }
- ino = fd->inode->ino;
- remote_fd = fdctx->remote_fd;
-
- hdrlen = gf_hdr_len (req, namelen);
- hdr = gf_hdr_new (req, namelen);
-
- GF_VALIDATE_OR_GOTO(frame->this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->ino = hton64 (ino);
- req->fd = hton64 (remote_fd);
- req->namelen = hton32 (namelen);
-
- if (name)
- strcpy (req->name, name);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
- GF_OP_TYPE_FOP_REQUEST, GF_FOP_FGETXATTR,
- hdr, hdrlen, NULL, 0, NULL);
- return ret;
-unwind:
- if (hdr)
- free (hdr);
-
- STACK_UNWIND(frame, -1, EINVAL, NULL);
- return 0;
-}
-
-
-/**
- * client_removexattr - removexattr function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @loc: location structure
- * @name:
- *
- * external reference through client_protocol_xlator->fops->removexattr
- */
-
-int
-client_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name)
-{
- int ret = -1;
- gf_hdr_common_t *hdr = NULL;
- gf_fop_removexattr_req_t *req = NULL;
- size_t hdrlen = 0;
- size_t namelen = 0;
- size_t pathlen = 0;
- ino_t ino = 0;
-
- pathlen = STRLEN_0(loc->path);
- namelen = STRLEN_0(name);
-
- ret = inode_ctx_get (loc->inode, this, &ino);
- if (loc->inode->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "REMOVEXATTR %"PRId64" (%s): "
- "failed to get remote inode number",
- loc->inode->ino, loc->path);
- }
-
- hdrlen = gf_hdr_len (req, pathlen + namelen);
- hdr = gf_hdr_new (req, pathlen + namelen);
- GF_VALIDATE_OR_GOTO(frame->this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->ino = hton64 (ino);
- strcpy (req->path, loc->path);
- strcpy (req->name + pathlen, name);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
- GF_OP_TYPE_FOP_REQUEST, GF_FOP_REMOVEXATTR,
- hdr, hdrlen, NULL, 0, NULL);
- return ret;
-unwind:
- if (hdr)
- free (hdr);
- STACK_UNWIND(frame, -1, EINVAL);
- return 0;
-}
-
-/**
- * client_opendir - opendir function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @loc: location structure
- *
- * external reference through client_protocol_xlator->fops->opendir
- */
-
-int
-client_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc,
- fd_t *fd)
-{
- gf_fop_opendir_req_t *req = NULL;
- gf_hdr_common_t *hdr = NULL;
- size_t hdrlen = 0;
- int ret = -1;
- ino_t ino = 0;
- size_t pathlen = 0;
- client_local_t *local = NULL;
-
- local = calloc (1, sizeof (*local));
- GF_VALIDATE_OR_GOTO(this->name, local, unwind);
-
- loc_copy (&local->loc, loc);
- local->fd = fd_ref (fd);
-
- frame->local = local;
-
- ret = inode_ctx_get (loc->inode, this, &ino);
- if (loc->inode->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "OPENDIR %"PRId64" (%s): "
- "failed to get remote inode number",
- loc->inode->ino, loc->path);
- }
-
- pathlen = STRLEN_0(loc->path);
-
- hdrlen = gf_hdr_len (req, pathlen);
- hdr = gf_hdr_new (req, pathlen);
- GF_VALIDATE_OR_GOTO(frame->this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->ino = hton64 (ino);
- strcpy (req->path, loc->path);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
- GF_OP_TYPE_FOP_REQUEST, GF_FOP_OPENDIR,
- hdr, hdrlen, NULL, 0, NULL);
-
- return ret;
-unwind:
- if (hdr)
- free (hdr);
- STACK_UNWIND(frame, -1, EINVAL, fd);
- return 0;
-
-}
-
-
-/**
- * client_readdir - readdir function for client protocol
- * @frame: call frame
- * @this: this translator structure
- *
- * external reference through client_protocol_xlator->fops->readdir
- */
-
-int
-client_getdents (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t offset, int32_t flag)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_getdents_req_t *req = NULL;
- size_t hdrlen = 0;
- int64_t remote_fd = -1;
- int ret = -1;
- client_fd_ctx_t *fdctx = NULL;
- client_conf_t *conf = NULL;
-
- conf = this->private;
-
- pthread_mutex_lock (&conf->mutex);
- {
- fdctx = this_fd_get_ctx (fd, this);
- }
- pthread_mutex_unlock (&conf->mutex);
-
- if (fdctx == NULL) {
- gf_log (this->name, GF_LOG_TRACE,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- fd->inode->ino);
- STACK_UNWIND (frame, -1, EBADFD, NULL);
- return 0;
- }
- remote_fd = fdctx->remote_fd;
- hdrlen = gf_hdr_len (req, 0);
- hdr = gf_hdr_new (req, 0);
- GF_VALIDATE_OR_GOTO(frame->this->name, hdr, unwind);
-
- req = gf_param (hdr);
- GF_VALIDATE_OR_GOTO(frame->this->name, hdr, unwind);
-
- req->fd = hton64 (remote_fd);
- req->size = hton32 (size);
- req->offset = hton64 (offset);
- req->flags = hton32 (flag);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_BULK),
- GF_OP_TYPE_FOP_REQUEST, GF_FOP_GETDENTS,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-unwind:
- STACK_UNWIND(frame, -1, EINVAL, NULL, 0);
- return 0;
-}
-
-/**
- * client_readdir - readdir function for client protocol
- * @frame: call frame
- * @this: this translator structure
- *
- * external reference through client_protocol_xlator->fops->readdir
- */
-
-int
-client_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_readdir_req_t *req = NULL;
- size_t hdrlen = 0;
- int64_t remote_fd = -1;
- int ret = -1;
- client_fd_ctx_t *fdctx = NULL;
- client_conf_t *conf = NULL;
-
- conf = this->private;
-
- pthread_mutex_lock (&conf->mutex);
- {
- fdctx = this_fd_get_ctx (fd, this);
- }
- pthread_mutex_unlock (&conf->mutex);
-
- if (fdctx == NULL) {
- gf_log (this->name, GF_LOG_TRACE,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- fd->inode->ino);
- goto unwind;
- }
- remote_fd = fdctx->remote_fd;
- hdrlen = gf_hdr_len (req, 0);
- hdr = gf_hdr_new (req, 0);
- GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
-
- req = gf_param (hdr);
- GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
-
- req->fd = hton64 (remote_fd);
- req->size = hton32 (size);
- req->offset = hton64 (offset);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
- GF_OP_TYPE_FOP_REQUEST, GF_FOP_READDIR,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-unwind:
- if (hdr)
- free (hdr);
- STACK_UNWIND (frame, -1, EBADFD, NULL);
- return 0;
-
-}
-
-/**
- * client_fsyncdir - fsyncdir function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @fd: file descriptor structure
- * @flags:
- *
- * external reference through client_protocol_xlator->fops->fsyncdir
- */
-
-int
-client_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_fsyncdir_req_t *req = NULL;
- size_t hdrlen = 0;
- int64_t remote_fd = -1;
- int32_t ret = -1;
- client_fd_ctx_t *fdctx = NULL;
- client_conf_t *conf = NULL;
-
- conf = this->private;
-
- pthread_mutex_lock (&conf->mutex);
- {
- fdctx = this_fd_get_ctx (fd, this);
- }
- pthread_mutex_unlock (&conf->mutex);
-
- if (fdctx == NULL) {
- gf_log (this->name, GF_LOG_TRACE,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- fd->inode->ino);
- goto unwind;
- }
- remote_fd = fdctx->remote_fd;
- hdrlen = gf_hdr_len (req, 0);
- hdr = gf_hdr_new (req, 0);
- GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->data = hton32 (flags);
- req->fd = hton64 (remote_fd);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_BULK),
- GF_OP_TYPE_FOP_REQUEST, GF_FOP_FSYNCDIR,
- hdr, hdrlen, NULL, 0, NULL);
-
- return ret;
-unwind:
- STACK_UNWIND (frame, -1, EBADFD);
- return 0;
-}
-
-/**
- * client_access - access function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @loc: location structure
- * @mode:
- *
- * external reference through client_protocol_xlator->fops->access
- */
-
-int
-client_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_access_req_t *req = NULL;
- size_t hdrlen = -1;
- int ret = -1;
- ino_t ino = 0;
- size_t pathlen = 0;
-
- ret = inode_ctx_get (loc->inode, this, &ino);
- if (loc->inode->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "ACCESS %"PRId64" (%s): "
- "failed to get remote inode number",
- loc->inode->ino, loc->path);
- }
-
- pathlen = STRLEN_0(loc->path);
-
- hdrlen = gf_hdr_len (req, pathlen);
- hdr = gf_hdr_new (req, pathlen);
- GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->ino = hton64 (ino);
- req->mask = hton32 (mask);
- strcpy (req->path, loc->path);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
- GF_OP_TYPE_FOP_REQUEST, GF_FOP_ACCESS,
- hdr, hdrlen, NULL, 0, NULL);
-
- return ret;
-unwind:
- if (hdr)
- free (hdr);
-
- STACK_UNWIND(frame, -1, EINVAL);
- return 0;
-
-}
-
-/**
- * client_ftrucate - ftruncate function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @fd: file descriptor structure
- * @offset: offset to truncate to
- *
- * external reference through client_protocol_xlator->fops->ftruncate
- */
-
-int
-client_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd,
- off_t offset)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_ftruncate_req_t *req = NULL;
- int64_t remote_fd = -1;
- size_t hdrlen = -1;
- int ret = -1;
- client_fd_ctx_t *fdctx = NULL;
- client_conf_t *conf = NULL;
-
- conf = this->private;
-
- pthread_mutex_lock (&conf->mutex);
- {
- fdctx = this_fd_get_ctx (fd, this);
- }
- pthread_mutex_unlock (&conf->mutex);
-
- if (fdctx == NULL) {
- gf_log (this->name, GF_LOG_TRACE,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- fd->inode->ino);
- STACK_UNWIND (frame, -1, EBADFD, NULL);
- return 0;
- }
- remote_fd = fdctx->remote_fd;
- hdrlen = gf_hdr_len (req, 0);
- hdr = gf_hdr_new (req, 0);
- GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->fd = hton64 (remote_fd);
- req->offset = hton64 (offset);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_BULK),
- GF_OP_TYPE_FOP_REQUEST, GF_FOP_FTRUNCATE,
- hdr, hdrlen, NULL, 0, NULL);
-
- return ret;
-unwind:
- if (hdr)
- free (hdr);
-
- STACK_UNWIND(frame, -1, EINVAL, NULL);
- return 0;
-
-}
-
-/**
- * client_fstat - fstat function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @fd: file descriptor structure
- *
- * external reference through client_protocol_xlator->fops->fstat
- */
-
-int
-client_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_fstat_req_t *req = NULL;
- int64_t remote_fd = -1;
- size_t hdrlen = -1;
- int ret = -1;
- client_fd_ctx_t *fdctx = NULL;
- client_conf_t *conf = NULL;
-
- conf = this->private;
-
- pthread_mutex_lock (&conf->mutex);
- {
- fdctx = this_fd_get_ctx (fd, this);
- }
- pthread_mutex_unlock (&conf->mutex);
-
- if (fdctx == NULL) {
- gf_log (this->name, GF_LOG_TRACE,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- fd->inode->ino);
- STACK_UNWIND (frame, -1, EBADFD, NULL);
- return 0;
- }
- remote_fd = fdctx->remote_fd;
- hdrlen = gf_hdr_len (req, 0);
- hdr = gf_hdr_new (req, 0);
- GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->fd = hton64 (remote_fd);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_BULK),
- GF_OP_TYPE_FOP_REQUEST, GF_FOP_FSTAT,
- hdr, hdrlen, NULL, 0, NULL);
-
- return ret;
-unwind:
- if (hdr)
- free (hdr);
-
- STACK_UNWIND(frame, -1, EINVAL, NULL);
- return 0;
-
-}
-
-/**
- * client_lk - lk function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @fd: file descriptor structure
- * @cmd: lock command
- * @lock:
- *
- * external reference through client_protocol_xlator->fops->lk
- */
-
-int
-client_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
- struct flock *flock)
-{
- int ret = -1;
- gf_hdr_common_t *hdr = NULL;
- gf_fop_lk_req_t *req = NULL;
- size_t hdrlen = 0;
- int64_t remote_fd = -1;
- int32_t gf_cmd = 0;
- int32_t gf_type = 0;
- client_fd_ctx_t *fdctx = NULL;
- client_conf_t *conf = NULL;
-
- conf = this->private;
-
- pthread_mutex_lock (&conf->mutex);
- {
- fdctx = this_fd_get_ctx (fd, this);
- }
- pthread_mutex_unlock (&conf->mutex);
-
- if (fdctx == NULL) {
- gf_log (this->name, GF_LOG_TRACE,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- fd->inode->ino);
- STACK_UNWIND(frame, -1, EBADFD, NULL);
- return 0;
- }
- remote_fd = fdctx->remote_fd;
- if (cmd == F_GETLK || cmd == F_GETLK64)
- gf_cmd = GF_LK_GETLK;
- else if (cmd == F_SETLK || cmd == F_SETLK64)
- gf_cmd = GF_LK_SETLK;
- else if (cmd == F_SETLKW || cmd == F_SETLKW64)
- gf_cmd = GF_LK_SETLKW;
- else {
- gf_log (this->name, GF_LOG_DEBUG,
- "Unknown cmd (%d)!", gf_cmd);
- goto unwind;
- }
-
- switch (flock->l_type) {
- case F_RDLCK:
- gf_type = GF_LK_F_RDLCK;
- break;
- case F_WRLCK:
- gf_type = GF_LK_F_WRLCK;
- break;
- case F_UNLCK:
- gf_type = GF_LK_F_UNLCK;
- break;
- }
-
- hdrlen = gf_hdr_len (req, 0);
- hdr = gf_hdr_new (req, 0);
- GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->fd = hton64 (remote_fd);
- req->cmd = hton32 (gf_cmd);
- req->type = hton32 (gf_type);
- gf_flock_from_flock (&req->flock, flock);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_BULK),
- GF_OP_TYPE_FOP_REQUEST, GF_FOP_LK,
- hdr, hdrlen, NULL, 0, NULL);
- return ret;
-unwind:
- if (hdr)
- free (hdr);
-
- STACK_UNWIND(frame, -1, EINVAL, NULL);
- return 0;
-}
-
-/**
- * client_inodelk - inodelk function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @inode: inode structure
- * @cmd: lock command
- * @lock: flock struct
- *
- * external reference through client_protocol_xlator->fops->inodelk
- */
-
-int
-client_inodelk (call_frame_t *frame, xlator_t *this, const char *volume,
- loc_t *loc, int32_t cmd, struct flock *flock)
-{
- int ret = -1;
- gf_hdr_common_t *hdr = NULL;
- gf_fop_inodelk_req_t *req = NULL;
- size_t hdrlen = 0;
- int32_t gf_cmd = 0;
- int32_t gf_type = 0;
- ino_t ino = 0;
- size_t pathlen = 0;
- size_t vollen = 0;
-
- pathlen = STRLEN_0(loc->path);
- vollen = STRLEN_0(volume);
-
- ret = inode_ctx_get (loc->inode, this, &ino);
- if (loc->inode->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "INODELK %"PRId64" (%s): "
- "failed to get remote inode number",
- loc->inode->ino, loc->path);
- }
-
- if (cmd == F_GETLK || cmd == F_GETLK64)
- gf_cmd = GF_LK_GETLK;
- else if (cmd == F_SETLK || cmd == F_SETLK64)
- gf_cmd = GF_LK_SETLK;
- else if (cmd == F_SETLKW || cmd == F_SETLKW64)
- gf_cmd = GF_LK_SETLKW;
- else {
- gf_log (this->name, GF_LOG_DEBUG,
- "Unknown cmd (%d)!", gf_cmd);
- goto unwind;
- }
-
- switch (flock->l_type) {
- case F_RDLCK:
- gf_type = GF_LK_F_RDLCK;
- break;
- case F_WRLCK:
- gf_type = GF_LK_F_WRLCK;
- break;
- case F_UNLCK:
- gf_type = GF_LK_F_UNLCK;
- break;
- }
-
- hdrlen = gf_hdr_len (req, pathlen + vollen);
- hdr = gf_hdr_new (req, pathlen + vollen);
- GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- strcpy (req->path, loc->path);
- strcpy (req->path + pathlen, volume);
-
- req->ino = hton64 (ino);
-
- req->cmd = hton32 (gf_cmd);
- req->type = hton32 (gf_type);
- gf_flock_from_flock (&req->flock, flock);
-
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_BULK),
- GF_OP_TYPE_FOP_REQUEST,
- GF_FOP_INODELK,
- hdr, hdrlen, NULL, 0, NULL);
- return ret;
-unwind:
- if (hdr)
- free (hdr);
-
- STACK_UNWIND(frame, -1, EINVAL);
- return 0;
-
-}
-
-
-/**
- * client_finodelk - finodelk function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @inode: inode structure
- * @cmd: lock command
- * @lock: flock struct
- *
- * external reference through client_protocol_xlator->fops->finodelk
- */
-
-int
-client_finodelk (call_frame_t *frame, xlator_t *this, const char *volume,
- fd_t *fd, int32_t cmd, struct flock *flock)
-{
- int ret = -1;
- gf_hdr_common_t *hdr = NULL;
- gf_fop_finodelk_req_t *req = NULL;
- size_t hdrlen = 0;
- size_t vollen = 0;
- int32_t gf_cmd = 0;
- int32_t gf_type = 0;
- int64_t remote_fd = -1;
- client_fd_ctx_t *fdctx = NULL;
- client_conf_t *conf = NULL;
-
- vollen = STRLEN_0(volume);
-
- conf = this->private;
-
- pthread_mutex_lock (&conf->mutex);
- {
- fdctx = this_fd_get_ctx (fd, this);
- }
- pthread_mutex_unlock (&conf->mutex);
-
- if (fdctx == NULL) {
- gf_log (this->name, GF_LOG_TRACE,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- fd->inode->ino);
- STACK_UNWIND(frame, -1, EBADFD);
- return 0;
- }
- remote_fd = fdctx->remote_fd;
- if (cmd == F_GETLK || cmd == F_GETLK64)
- gf_cmd = GF_LK_GETLK;
- else if (cmd == F_SETLK || cmd == F_SETLK64)
- gf_cmd = GF_LK_SETLK;
- else if (cmd == F_SETLKW || cmd == F_SETLKW64)
- gf_cmd = GF_LK_SETLKW;
- else {
- gf_log (this->name, GF_LOG_DEBUG,
- "Unknown cmd (%d)!", gf_cmd);
- goto unwind;
- }
-
- switch (flock->l_type) {
- case F_RDLCK:
- gf_type = GF_LK_F_RDLCK;
- break;
- case F_WRLCK:
- gf_type = GF_LK_F_WRLCK;
- break;
- case F_UNLCK:
- gf_type = GF_LK_F_UNLCK;
- break;
- }
-
- hdrlen = gf_hdr_len (req, vollen);
- hdr = gf_hdr_new (req, vollen);
- GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- strcpy (req->volume, volume);
-
- req->fd = hton64 (remote_fd);
-
- req->cmd = hton32 (gf_cmd);
- req->type = hton32 (gf_type);
- gf_flock_from_flock (&req->flock, flock);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_BULK),
- GF_OP_TYPE_FOP_REQUEST,
- GF_FOP_FINODELK,
- hdr, hdrlen, NULL, 0, NULL);
- return ret;
-unwind:
- if (hdr)
- free (hdr);
-
- STACK_UNWIND(frame, -1, EINVAL);
- return 0;
-}
-
-
-int
-client_entrylk (call_frame_t *frame, xlator_t *this, const char *volume,
- loc_t *loc, const char *name, entrylk_cmd cmd,
- entrylk_type type)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_entrylk_req_t *req = NULL;
- size_t pathlen = 0;
- size_t vollen = 0;
- size_t hdrlen = -1;
- int ret = -1;
- ino_t ino = 0;
- size_t namelen = 0;
-
- pathlen = STRLEN_0(loc->path);
- vollen = STRLEN_0(volume);
-
- if (name)
- namelen = STRLEN_0(name);
-
- ret = inode_ctx_get (loc->inode, this, &ino);
- if (loc->inode->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "ENTRYLK %"PRId64" (%s): "
- "failed to get remote inode number",
- loc->inode->ino, loc->path);
- }
-
- hdrlen = gf_hdr_len (req, pathlen + vollen + namelen);
- hdr = gf_hdr_new (req, pathlen + vollen + namelen);
- GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->ino = hton64 (ino);
- req->namelen = hton64 (namelen);
-
- strcpy (req->path, loc->path);
- if (name)
- strcpy (req->name + pathlen, name);
- strcpy (req->volume + pathlen + namelen, volume);
-
- req->cmd = hton32 (cmd);
- req->type = hton32 (type);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
- GF_OP_TYPE_FOP_REQUEST, GF_FOP_ENTRYLK,
- hdr, hdrlen, NULL, 0, NULL);
-
- return ret;
-unwind:
- if (hdr)
- free (hdr);
-
- STACK_UNWIND(frame, -1, EINVAL);
- return 0;
-
-}
-
-
-int
-client_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume,
- fd_t *fd, const char *name, entrylk_cmd cmd,
- entrylk_type type)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_fentrylk_req_t *req = NULL;
- int64_t remote_fd = -1;
- size_t vollen = 0;
- size_t namelen = 0;
- size_t hdrlen = -1;
- int ret = -1;
- client_fd_ctx_t *fdctx = NULL;
- client_conf_t *conf = NULL;
-
- if (name)
- namelen = STRLEN_0(name);
-
- conf = this->private;
-
- vollen = STRLEN_0(volume);
-
- pthread_mutex_lock (&conf->mutex);
- {
- fdctx = this_fd_get_ctx (fd, this);
- }
- pthread_mutex_unlock (&conf->mutex);
-
- if (fdctx == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- fd->inode->ino);
- STACK_UNWIND(frame, -1, EBADFD);
- return 0;
- }
- remote_fd = fdctx->remote_fd;
- hdrlen = gf_hdr_len (req, namelen + vollen);
- hdr = gf_hdr_new (req, namelen + vollen);
- GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->fd = hton64 (remote_fd);
- req->namelen = hton64 (namelen);
-
- if (name)
- strcpy (req->name, name);
-
- strcpy (req->volume + namelen, volume);
-
- req->cmd = hton32 (cmd);
- req->type = hton32 (type);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
- GF_OP_TYPE_FOP_REQUEST, GF_FOP_FENTRYLK,
- hdr, hdrlen, NULL, 0, NULL);
-
- return ret;
-unwind:
- if (hdr)
- free (hdr);
-
- STACK_UNWIND(frame, -1, EINVAL);
- return 0;
-}
-
-/*
- * client_lookup - lookup function for client protocol
- * @frame: call frame
- * @this:
- * @loc: location
- *
- * not for external reference
- */
-
-int
-client_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *xattr_req)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_lookup_req_t *req = NULL;
- size_t hdrlen = -1;
- int ret = -1;
- ino_t ino = 0;
- ino_t par = 0;
- size_t dictlen = 0;
- size_t pathlen = 0;
- size_t baselen = 0;
- int32_t op_ret = -1;
- int32_t op_errno = EINVAL;
- client_local_t *local = NULL;
- char *buf = NULL;
-
- local = calloc (1, sizeof (*local));
- GF_VALIDATE_OR_GOTO(this->name, local, unwind);
-
- loc_copy (&local->loc, loc);
-
- frame->local = local;
-
- GF_VALIDATE_OR_GOTO (this->name, loc, unwind);
- GF_VALIDATE_OR_GOTO (this->name, loc->path, unwind);
-
- if (loc->ino != 1) {
- ret = inode_ctx_get (loc->parent, this, &par);
- if (loc->parent->ino && ret < 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "LOOKUP %"PRId64"/%s (%s): failed to get "
- "remote inode number for parent",
- loc->parent->ino, loc->name, loc->path);
- }
- GF_VALIDATE_OR_GOTO (this->name, loc->name, unwind);
- baselen = STRLEN_0(loc->name);
- } else {
- ino = 1;
- }
-
- pathlen = STRLEN_0(loc->path);
-
- if (xattr_req) {
- ret = dict_allocate_and_serialize (xattr_req, &buf, &dictlen);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to get serialized length of dict(%p)",
- xattr_req);
- goto unwind;
- }
- }
-
- hdrlen = gf_hdr_len (req, pathlen + baselen + dictlen);
- hdr = gf_hdr_new (req, pathlen + baselen + dictlen);
- GF_VALIDATE_OR_GOTO (this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->ino = hton64 (ino);
- req->par = hton64 (par);
- strcpy (req->path, loc->path);
- if (baselen)
- strcpy (req->path + pathlen, loc->name);
-
- if (dictlen > 0) {
- memcpy (req->dict + pathlen + baselen, buf, dictlen);
- FREE (buf);
- }
-
- req->dictlen = hton32 (dictlen);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
- GF_OP_TYPE_FOP_REQUEST, GF_FOP_LOOKUP,
- hdr, hdrlen, NULL, 0, NULL);
- return ret;
-
-unwind:
- STACK_UNWIND (frame, op_ret, op_errno, loc->inode, NULL, NULL);
- return ret;
-}
-
-
-int
-client_fchmod (call_frame_t *frame, xlator_t *this, fd_t *fd, mode_t mode)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_fchmod_req_t *req = NULL;
- int64_t remote_fd = -1;
- size_t hdrlen = -1;
- int ret = -1;
- int32_t op_errno = EINVAL;
- int32_t op_ret = -1;
- client_fd_ctx_t *fdctx = NULL;
- client_conf_t *conf = NULL;
-
- GF_VALIDATE_OR_GOTO (this->name, fd, unwind);
-
- conf = this->private;
-
- pthread_mutex_lock (&conf->mutex);
- {
- fdctx = this_fd_get_ctx (fd, this);
- }
- pthread_mutex_unlock (&conf->mutex);
-
- if (fdctx == NULL) {
- op_errno = EBADFD;
- gf_log (this->name, GF_LOG_DEBUG,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- fd->inode->ino);
- goto unwind;
- }
- remote_fd = fdctx->remote_fd;
- hdrlen = gf_hdr_len (req, 0);
- hdr = gf_hdr_new (req, 0);
- GF_VALIDATE_OR_GOTO (this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->fd = hton64 (remote_fd);
- req->mode = hton32 (mode);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
- GF_OP_TYPE_FOP_REQUEST, GF_FOP_FCHMOD,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-
-unwind:
- STACK_UNWIND (frame, op_ret, op_errno, NULL);
- return 0;
-}
-
-
-int
-client_fchown (call_frame_t *frame, xlator_t *this, fd_t *fd, uid_t uid,
- gid_t gid)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_fchown_req_t *req = NULL;
- int64_t remote_fd = 0;
- size_t hdrlen = -1;
- int32_t op_ret = -1;
- int32_t op_errno = EINVAL;
- int32_t ret = -1;
- client_fd_ctx_t *fdctx = NULL;
- client_conf_t *conf = NULL;
-
- GF_VALIDATE_OR_GOTO (this->name, fd, unwind);
-
- conf = this->private;
-
- pthread_mutex_lock (&conf->mutex);
- {
- fdctx = this_fd_get_ctx (fd, this);
- }
- pthread_mutex_unlock (&conf->mutex);
-
- if (fdctx == NULL) {
- op_errno = EBADFD;
- gf_log (this->name, GF_LOG_DEBUG,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- fd->inode->ino);
- goto unwind;
- }
- remote_fd = fdctx->remote_fd;
- hdrlen = gf_hdr_len (req, 0);
- hdr = gf_hdr_new (req, 0);
- GF_VALIDATE_OR_GOTO (this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->fd = hton64 (remote_fd);
- req->uid = hton32 (uid);
- req->gid = hton32 (gid);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_LOWLAT),
- GF_OP_TYPE_FOP_REQUEST, GF_FOP_FCHOWN,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-
-unwind:
- STACK_UNWIND (frame, op_ret, op_errno, NULL);
- return 0;
-
-}
-
-
-int
-client_setdents (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
- dir_entry_t *entries, int32_t count)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_setdents_req_t *req = NULL;
- int64_t remote_fd = 0;
- char *ptr = NULL;
- dir_entry_t *trav = NULL;
- uint32_t len = 0;
- int32_t buf_len = 0;
- int32_t ret = -1;
- int32_t op_ret = -1;
- int32_t op_errno = EINVAL;
- int32_t vec_count = 0;
- size_t hdrlen = -1;
- struct iovec vector[1];
- struct iobref *iobref = NULL;
- struct iobuf *iobuf = NULL;
- client_fd_ctx_t *fdctx = NULL;
- client_conf_t *conf = NULL;
-
- GF_VALIDATE_OR_GOTO (this->name, fd, unwind);
-
- conf = this->private;
-
- pthread_mutex_lock (&conf->mutex);
- {
- fdctx = this_fd_get_ctx (fd, this);
- }
- pthread_mutex_unlock (&conf->mutex);
-
- if (fdctx == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "(%"PRId64"): failed to get fd ctx. EBADFD",
- fd->inode->ino);
- op_errno = EBADFD;
- goto unwind;
- }
- remote_fd = fdctx->remote_fd;
- GF_VALIDATE_OR_GOTO (this->name, entries, unwind);
- GF_VALIDATE_OR_GOTO (this->name, count, unwind);
-
- trav = entries->next;
- while (trav) {
- len += strlen (trav->name);
- len += 1;
- len += strlen (trav->link);
- len += 1;
- len += 256; // max possible for statbuf;
- trav = trav->next;
- }
- iobuf = iobuf_get (this->ctx->iobuf_pool);
- GF_VALIDATE_OR_GOTO (this->name, iobuf, unwind);
-
- ptr = iobuf->ptr;
-
- trav = entries->next;
- while (trav) {
- int32_t this_len = 0;
- char *tmp_buf = NULL;
- struct stat *stbuf = &trav->buf;
- {
- /* Convert the stat buf to string */
- uint64_t dev = stbuf->st_dev;
- uint64_t ino = stbuf->st_ino;
- uint32_t mode = stbuf->st_mode;
- uint32_t nlink = stbuf->st_nlink;
- uint32_t uid = stbuf->st_uid;
- uint32_t gid = stbuf->st_gid;
- uint64_t rdev = stbuf->st_rdev;
- uint64_t size = stbuf->st_size;
- uint32_t blksize = stbuf->st_blksize;
- uint64_t blocks = stbuf->st_blocks;
-
- uint32_t atime = stbuf->st_atime;
- uint32_t mtime = stbuf->st_mtime;
- uint32_t ctime = stbuf->st_ctime;
-
- uint32_t atime_nsec = ST_ATIM_NSEC(stbuf);
- uint32_t mtime_nsec = ST_MTIM_NSEC(stbuf);
- uint32_t ctime_nsec = ST_CTIM_NSEC(stbuf);
-
- ret = asprintf (&tmp_buf, GF_STAT_PRINT_FMT_STR,
- dev, ino, mode, nlink, uid, gid,
- rdev, size, blksize, blocks,
- atime, atime_nsec, mtime, mtime_nsec,
- ctime, ctime_nsec);
- if (-1 == ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "asprintf failed while setting stat "
- "buf to string");
- STACK_UNWIND (frame, -1, ENOMEM);
- return 0;
- }
- }
- this_len = sprintf (ptr, "%s/%s%s\n",
- trav->name, tmp_buf, trav->link);
-
- FREE (tmp_buf);
- trav = trav->next;
- ptr += this_len;
- }
- buf_len = strlen (iobuf->ptr);
-
- hdrlen = gf_hdr_len (req, 0);
- hdr = gf_hdr_new (req, 0);
- GF_VALIDATE_OR_GOTO (this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->fd = hton64 (remote_fd);
- req->flags = hton32 (flags);
- req->count = hton32 (count);
-
- iobref = iobref_new ();
- iobref_add (iobref, iobuf);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_BULK),
- GF_OP_TYPE_FOP_REQUEST, GF_FOP_SETDENTS,
- hdr, hdrlen, vector, vec_count, iobref);
-
- if (iobref)
- iobref_unref (iobref);
-
- if (iobuf)
- iobuf_unref (iobuf);
-
- return ret;
-unwind:
-
- if (iobref)
- iobref_unref (iobref);
-
- if (iobuf)
- iobuf_unref (iobuf);
-
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-
-/**
- * client_releasedir - releasedir function for client protocol
- * @this: this translator structure
- * @fd: file descriptor structure
- *
- * external reference through client_protocol_xlator->cbks->releasedir
- */
-
-int
-client_releasedir (xlator_t *this, fd_t *fd)
-{
- call_frame_t *fr = NULL;
- int32_t ret = -1;
- int64_t remote_fd = 0;
- gf_hdr_common_t *hdr = NULL;
- size_t hdrlen = 0;
- gf_cbk_releasedir_req_t *req = NULL;
- client_conf_t *conf = NULL;
- client_fd_ctx_t *fdctx = NULL;
-
- GF_VALIDATE_OR_GOTO ("client", this, out);
- GF_VALIDATE_OR_GOTO (this->name, fd, out);
-
- conf = this->private;
-
- pthread_mutex_lock (&conf->mutex);
- {
- fdctx = this_fd_del_ctx (fd, this);
- if (fdctx != NULL) {
- list_del_init (&fdctx->sfd_pos);
- }
- }
- pthread_mutex_unlock (&conf->mutex);
-
- if (fdctx == NULL){
- gf_log (this->name, GF_LOG_DEBUG,
- "(%"PRId64"): failed to get fd ctx.",
- fd->inode->ino);
- goto out;
- }
-
- remote_fd = fdctx->remote_fd;
- hdrlen = gf_hdr_len (req, 0);
- hdr = gf_hdr_new (req, 0);
- GF_VALIDATE_OR_GOTO (this->name, hdr, out);
-
- req = gf_param (hdr);
-
- req->fd = hton64 (remote_fd);
-
- FREE (fdctx);
-
- fr = create_frame (this, this->ctx->pool);
- GF_VALIDATE_OR_GOTO (this->name, fr, out);
-
- ret = protocol_client_xfer (fr, this,
- CLIENT_CHANNEL (this, CHANNEL_BULK),
- GF_OP_TYPE_CBK_REQUEST, GF_CBK_RELEASEDIR,
- hdr, hdrlen, NULL, 0, NULL);
-out:
- return ret;
-}
-
-
-/**
- * client_release - release function for client protocol
- * @this: this translator structure
- * @fd: file descriptor structure
- *
- * external reference through client_protocol_xlator->cbks->release
- *
- */
-int
-client_release (xlator_t *this, fd_t *fd)
-{
- call_frame_t *fr = NULL;
- int32_t ret = -1;
- int64_t remote_fd = 0;
- gf_hdr_common_t *hdr = NULL;
- size_t hdrlen = 0;
- gf_cbk_release_req_t *req = NULL;
- client_conf_t *conf = NULL;
- client_fd_ctx_t *fdctx = NULL;
-
- GF_VALIDATE_OR_GOTO ("client", this, out);
- GF_VALIDATE_OR_GOTO (this->name, fd, out);
-
- conf = this->private;
-
- pthread_mutex_lock (&conf->mutex);
- {
- fdctx = this_fd_del_ctx (fd, this);
- if (fdctx != NULL) {
- list_del_init (&fdctx->sfd_pos);
- }
- }
- pthread_mutex_unlock (&conf->mutex);
-
- if (fdctx == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "(%"PRId64"): failed to get fd ctx.",
- fd->inode->ino);
- goto out;
- }
-
- remote_fd = fdctx->remote_fd;
- hdrlen = gf_hdr_len (req, 0);
- hdr = gf_hdr_new (req, 0);
- GF_VALIDATE_OR_GOTO (this->name, hdr, out);
- req = gf_param (hdr);
-
- req->fd = hton64 (remote_fd);
-
- FREE (fdctx);
-
- fr = create_frame (this, this->ctx->pool);
- GF_VALIDATE_OR_GOTO (this->name, fr, out);
-
- ret = protocol_client_xfer (fr, this,
- CLIENT_CHANNEL (this, CHANNEL_BULK),
- GF_OP_TYPE_CBK_REQUEST, GF_CBK_RELEASE,
- hdr, hdrlen, NULL, 0, NULL);
-out:
- return ret;
-}
-
-/*
- * MGMT_OPS
- */
-
-/**
- * client_stats - stats function for client protocol
- * @frame: call frame
- * @this: this translator structure
- * @flags:
- *
- * external reference through client_protocol_xlator->mops->stats
- */
-
-int
-client_stats (call_frame_t *frame, xlator_t *this, int32_t flags)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_mop_stats_req_t *req = NULL;
- size_t hdrlen = -1;
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO ("client", this, unwind);
-
- hdrlen = gf_hdr_len (req, 0);
- hdr = gf_hdr_new (req, 0);
- GF_VALIDATE_OR_GOTO (this->name, hdr, unwind);
-
- req = gf_param (hdr);
-
- req->flags = hton32 (flags);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_BULK),
- GF_OP_TYPE_MOP_REQUEST, GF_MOP_STATS,
- hdr, hdrlen, NULL, 0, NULL);
-
- return ret;
-unwind:
- STACK_UNWIND (frame, -1, EINVAL, NULL);
- return 0;
-}
-
-
-/* Callbacks */
-
-int
-client_fxattrop_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_xattrop_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t gf_errno = 0;
- int32_t op_errno = 0;
- int32_t dict_len = 0;
- dict_t *dict = NULL;
- int32_t ret = -1;
- char *dictbuf = NULL;
-
- rsp = gf_param (hdr);
- GF_VALIDATE_OR_GOTO(frame->this->name, rsp, fail);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
-
- if (op_ret >= 0) {
- op_ret = -1;
- dict_len = ntoh32 (rsp->dict_len);
-
- if (dict_len > 0) {
- dictbuf = memdup (rsp->dict, dict_len);
- GF_VALIDATE_OR_GOTO(frame->this->name, dictbuf, fail);
-
- dict = dict_new();
- GF_VALIDATE_OR_GOTO(frame->this->name, dict, fail);
-
- ret = dict_unserialize (dictbuf, dict_len, &dict);
- if (ret < 0) {
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "failed to serialize dictionary(%p)",
- dict);
- op_errno = -ret;
- goto fail;
- } else {
- dict->extra_free = dictbuf;
- dictbuf = NULL;
- }
- }
- op_ret = 0;
- }
- gf_errno = ntoh32 (hdr->rsp.op_errno);
- op_errno = gf_error_to_errno (gf_errno);
-
-fail:
- STACK_UNWIND (frame, op_ret, op_errno, dict);
-
- if (dictbuf)
- free (dictbuf);
-
- if (dict)
- dict_unref (dict);
-
- return 0;
-}
-
-
-int
-client_xattrop_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_xattrop_rsp_t *rsp = NULL;
- int32_t op_ret = -1;
- int32_t gf_errno = EINVAL;
- int32_t op_errno = 0;
- int32_t dict_len = 0;
- dict_t *dict = NULL;
- int32_t ret = -1;
- char *dictbuf = NULL;
-
- rsp = gf_param (hdr);
- GF_VALIDATE_OR_GOTO(frame->this->name, rsp, fail);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- if (op_ret >= 0) {
- op_ret = -1;
- dict_len = ntoh32 (rsp->dict_len);
-
- if (dict_len > 0) {
- dictbuf = memdup (rsp->dict, dict_len);
- GF_VALIDATE_OR_GOTO(frame->this->name, dictbuf, fail);
-
- dict = get_new_dict();
- GF_VALIDATE_OR_GOTO(frame->this->name, dict, fail);
- dict_ref (dict);
-
- ret = dict_unserialize (dictbuf, dict_len, &dict);
- if (ret < 0) {
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "failed to serialize dictionary(%p)",
- dict);
- goto fail;
- } else {
- dict->extra_free = dictbuf;
- dictbuf = NULL;
- }
- }
- op_ret = 0;
- }
- gf_errno = ntoh32 (hdr->rsp.op_errno);
- op_errno = gf_error_to_errno (gf_errno);
-
-
-fail:
- STACK_UNWIND (frame, op_ret, op_errno, dict);
-
- if (dictbuf)
- free (dictbuf);
- if (dict)
- dict_unref (dict);
-
- return 0;
-}
-
-/*
- * client_chown_cbk -
- *
- * @frame:
- * @args:
- *
- * not for external reference
- */
-
-int
-client_fchown_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- struct stat stbuf = {0, };
- gf_fop_fchown_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- if (op_ret == 0) {
- gf_stat_to_stat (&rsp->stat, &stbuf);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, &stbuf);
-
- return 0;
-}
-
-
-int
-client_fchmod_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- struct stat stbuf = {0, };
- gf_fop_fchmod_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- if (op_ret == 0) {
- gf_stat_to_stat (&rsp->stat, &stbuf);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, &stbuf);
-
- return 0;
-}
-
-
-/*
- * client_create_cbk - create callback function for client protocol
- * @frame: call frame
- * @args: arguments in dictionary
- *
- * not for external reference
- */
-
-int
-client_create_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_create_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
- fd_t *fd = NULL;
- inode_t *inode = NULL;
- struct stat stbuf = {0, };
- int64_t remote_fd = 0;
- int32_t ret = -1;
- client_local_t *local = NULL;
- client_conf_t *conf = NULL;
- client_fd_ctx_t *fdctx = NULL;
-
- local = frame->local; frame->local = NULL;
- conf = frame->this->private;
- fd = local->fd;
- inode = local->loc.inode;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = ntoh32 (hdr->rsp.op_errno);
-
- if (op_ret >= 0) {
- remote_fd = ntoh64 (rsp->fd);
- gf_stat_to_stat (&rsp->stat, &stbuf);
- }
-
- if (op_ret >= 0) {
- ret = inode_ctx_put (local->loc.inode, frame->this,
- stbuf.st_ino);
- if (ret < 0) {
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "CREATE %"PRId64"/%s (%s): failed to set "
- "remote inode number to inode ctx",
- local->loc.parent->ino, local->loc.name,
- local->loc.path);
- }
-
- fdctx = CALLOC (1, sizeof (*fdctx));
- if (!fdctx) {
- op_ret = -1;
- op_errno = ENOMEM;
- goto unwind_out;
- }
-
- fdctx->remote_fd = remote_fd;
- INIT_LIST_HEAD (&fdctx->sfd_pos);
- fdctx->fd = fd;
- this_fd_set_ctx (fd, frame->this, &local->loc, fdctx);
-
- pthread_mutex_lock (&conf->mutex);
- {
- list_add_tail (&fdctx->sfd_pos, &conf->saved_fds);
- }
- pthread_mutex_unlock (&conf->mutex);
- }
-unwind_out:
- STACK_UNWIND (frame, op_ret, op_errno, fd, inode, &stbuf);
-
- client_local_wipe (local);
-
- return 0;
-}
-
-
-/*
- * client_open_cbk - open callback for client protocol
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-int
-client_open_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- int32_t op_ret = -1;
- int32_t op_errno = ENOTCONN;
- fd_t *fd = NULL;
- int64_t remote_fd = 0;
- gf_fop_open_rsp_t *rsp = NULL;
- client_local_t *local = NULL;
- client_conf_t *conf = NULL;
- client_fd_ctx_t *fdctx = NULL;
-
-
- local = frame->local; frame->local = NULL;
- conf = frame->this->private;
- fd = local->fd;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = ntoh32 (hdr->rsp.op_errno);
-
- if (op_ret >= 0) {
- remote_fd = ntoh64 (rsp->fd);
- }
-
- if (op_ret >= 0) {
- fdctx = CALLOC (1, sizeof (*fdctx));
- if (!fdctx) {
- op_ret = -1;
- op_errno = ENOMEM;
- goto unwind_out;
- }
- fdctx->remote_fd = remote_fd;
- INIT_LIST_HEAD (&fdctx->sfd_pos);
- fdctx->fd = fd;
- this_fd_set_ctx (fd, frame->this, &local->loc, fdctx);
-
- pthread_mutex_lock (&conf->mutex);
- {
- list_add_tail (&fdctx->sfd_pos, &conf->saved_fds);
- }
- pthread_mutex_unlock (&conf->mutex);
- }
-unwind_out:
- STACK_UNWIND (frame, op_ret, op_errno, fd);
-
- client_local_wipe (local);
-
- return 0;
-}
-
-/*
- * client_stat_cbk - stat callback for client protocol
- * @frame: call frame
- * @args: arguments dictionary
- *
- * not for external reference
- */
-
-int
-client_stat_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- struct stat stbuf = {0, };
- gf_fop_stat_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- if (op_ret == 0) {
- gf_stat_to_stat (&rsp->stat, &stbuf);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, &stbuf);
-
- return 0;
-}
-
-/*
- * client_utimens_cbk - utimens callback for client protocol
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_utimens_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- struct stat stbuf = {0, };
- gf_fop_utimens_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- if (op_ret == 0) {
- gf_stat_to_stat (&rsp->stat, &stbuf);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, &stbuf);
-
- return 0;
-}
-
-/*
- * client_chmod_cbk - chmod for client protocol
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_chmod_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- struct stat stbuf = {0, };
- gf_fop_chmod_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- if (op_ret == 0) {
- gf_stat_to_stat (&rsp->stat, &stbuf);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, &stbuf);
-
- return 0;
-}
-
-/*
- * client_chown_cbk - chown for client protocol
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_chown_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- struct stat stbuf = {0, };
- gf_fop_chown_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- if (op_ret == 0) {
- gf_stat_to_stat (&rsp->stat, &stbuf);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, &stbuf);
-
- return 0;
-}
-
-/*
- * client_mknod_cbk - mknod callback for client protocol
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_mknod_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_mknod_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
- struct stat stbuf = {0, };
- inode_t *inode = NULL;
- client_local_t *local = NULL;
- int ret = 0;
-
- local = frame->local;
- frame->local = NULL;
- inode = local->loc.inode;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- if (op_ret >= 0) {
- gf_stat_to_stat (&rsp->stat, &stbuf);
-
- ret = inode_ctx_put (local->loc.inode, frame->this,
- stbuf.st_ino);
- if (ret < 0) {
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "MKNOD %"PRId64"/%s (%s): failed to set remote"
- " inode number to inode ctx",
- local->loc.parent->ino, local->loc.name,
- local->loc.path);
- }
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, inode, &stbuf);
-
- client_local_wipe (local);
-
- return 0;
-}
-
-/*
- * client_symlink_cbk - symlink callback for client protocol
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_symlink_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_symlink_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
- struct stat stbuf = {0, };
- inode_t *inode = NULL;
- client_local_t *local = NULL;
- int ret = 0;
-
- local = frame->local;
- frame->local = NULL;
- inode = local->loc.inode;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- if (op_ret >= 0) {
- gf_stat_to_stat (&rsp->stat, &stbuf);
-
- ret = inode_ctx_put (inode, frame->this,
- stbuf.st_ino);
- if (ret < 0) {
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "SYMLINK %"PRId64"/%s (%s): failed to set "
- "remote inode number to inode ctx",
- local->loc.parent->ino, local->loc.name,
- local->loc.path);
- }
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, inode, &stbuf);
-
- client_local_wipe (local);
-
- return 0;
-}
-
-/*
- * client_link_cbk - link callback for client protocol
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_link_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_link_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
- struct stat stbuf = {0, };
- inode_t *inode = NULL;
- client_local_t *local = NULL;
-
- local = frame->local;
- frame->local = NULL;
- inode = local->loc.inode;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- if (op_ret >= 0) {
- gf_stat_to_stat (&rsp->stat, &stbuf);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, inode, &stbuf);
-
- client_local_wipe (local);
-
- return 0;
-}
-
-/*
- * client_truncate_cbk - truncate callback for client protocol
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_truncate_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- struct stat stbuf = {0, };
- gf_fop_truncate_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- if (op_ret == 0) {
- gf_stat_to_stat (&rsp->stat, &stbuf);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, &stbuf);
-
- return 0;
-}
-
-/* client_fstat_cbk - fstat callback for client protocol
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_fstat_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- struct stat stbuf = {0, };
- gf_fop_fstat_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- if (op_ret == 0) {
- gf_stat_to_stat (&rsp->stat, &stbuf);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, &stbuf);
-
- return 0;
-}
-
-/*
- * client_ftruncate_cbk - ftruncate callback for client protocol
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-int
-client_ftruncate_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- struct stat stbuf = {0, };
- gf_fop_ftruncate_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- if (op_ret == 0) {
- gf_stat_to_stat (&rsp->stat, &stbuf);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, &stbuf);
-
- return 0;
-}
-
-
-/* client_readv_cbk - readv callback for client protocol
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external referece
- */
-
-int
-client_readv_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_read_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
- struct iovec vector = {0, };
- struct stat stbuf = {0, };
- struct iobref *iobref = NULL;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- if (op_ret != -1) {
- iobref = iobref_new ();
- gf_stat_to_stat (&rsp->stat, &stbuf);
- vector.iov_len = op_ret;
-
- if (op_ret > 0) {
- vector.iov_base = iobuf->ptr;
- iobref_add (iobref, iobuf);
- }
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, &vector, 1, &stbuf, iobref);
-
- if (iobref)
- iobref_unref (iobref);
-
- if (iobuf)
- iobuf_unref (iobuf);
-
- return 0;
-}
-
-/*
- * client_write_cbk - write callback for client protocol
- * @frame: cal frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_write_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_write_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
- struct stat stbuf = {0, };
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- if (op_ret >= 0)
- gf_stat_to_stat (&rsp->stat, &stbuf);
-
- STACK_UNWIND (frame, op_ret, op_errno, &stbuf);
-
- return 0;
-}
-
-
-int
-client_readdir_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_readdir_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
- uint32_t buf_size = 0;
- gf_dirent_t entries;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = ntoh32 (hdr->rsp.op_errno);
-
- INIT_LIST_HEAD (&entries.list);
- if (op_ret > 0) {
- buf_size = ntoh32 (rsp->size);
- gf_dirent_unserialize (&entries, rsp->buf, buf_size);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, &entries);
-
- gf_dirent_free (&entries);
-
- return 0;
-}
-
-/*
- * client_fsync_cbk - fsync callback for client protocol
- *
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_fsync_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- struct stat stbuf = {0, };
- gf_fop_fsync_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- STACK_UNWIND (frame, op_ret, op_errno, &stbuf);
-
- return 0;
-}
-
-/*
- * client_unlink_cbk - unlink callback for client protocol
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_unlink_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_unlink_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- STACK_UNWIND (frame, op_ret, op_errno);
-
- return 0;
-}
-
-/*
- * client_rename_cbk - rename callback for client protocol
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_rename_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- struct stat stbuf = {0, };
- gf_fop_rename_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- if (op_ret == 0) {
- gf_stat_to_stat (&rsp->stat, &stbuf);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, &stbuf);
-
- return 0;
-}
-
-
-/*
- * client_readlink_cbk - readlink callback for client protocol
- *
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-int
-client_readlink_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_readlink_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
- char *link = NULL;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- if (op_ret > 0) {
- link = rsp->path;
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, link);
- return 0;
-}
-
-/*
- * client_mkdir_cbk - mkdir callback for client protocol
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_mkdir_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_mkdir_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
- struct stat stbuf = {0, };
- inode_t *inode = NULL;
- client_local_t *local = NULL;
- int ret = 0;
-
- local = frame->local;
- inode = local->loc.inode;
- frame->local = NULL;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- if (op_ret >= 0) {
- gf_stat_to_stat (&rsp->stat, &stbuf);
-
- ret = inode_ctx_put (inode, frame->this, stbuf.st_ino);
- if (ret < 0) {
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "MKDIR %"PRId64"/%s (%s): failed to set "
- "remote inode number to inode ctx",
- local->loc.parent->ino, local->loc.name,
- local->loc.path);
- }
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, inode, &stbuf);
-
- client_local_wipe (local);
-
- return 0;
-}
-
-/*
- * client_flush_cbk - flush callback for client protocol
- *
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_flush_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- int32_t op_ret = 0;
- int32_t op_errno = 0;
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- STACK_UNWIND (frame, op_ret, op_errno);
-
- return 0;
-}
-
-/*
- * client_opendir_cbk - opendir callback for client protocol
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_opendir_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- int32_t op_ret = -1;
- int32_t op_errno = ENOTCONN;
- fd_t *fd = NULL;
- int64_t remote_fd = 0;
- gf_fop_opendir_rsp_t *rsp = NULL;
- client_local_t *local = NULL;
- client_conf_t *conf = NULL;
- client_fd_ctx_t *fdctx = NULL;
-
- local = frame->local; frame->local = NULL;
- conf = frame->this->private;
- fd = local->fd;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = ntoh32 (hdr->rsp.op_errno);
-
- if (op_ret >= 0) {
- remote_fd = ntoh64 (rsp->fd);
- }
-
- if (op_ret >= 0) {
- fdctx = CALLOC (1, sizeof (*fdctx));
- if (!fdctx) {
- op_ret = -1;
- op_errno = ENOMEM;
- goto unwind_out;
- }
- fdctx->remote_fd = remote_fd;
- INIT_LIST_HEAD (&fdctx->sfd_pos);
- fdctx->fd = fd;
- this_fd_set_ctx (fd, frame->this, &local->loc, fdctx);
-
- pthread_mutex_lock (&conf->mutex);
- {
- list_add_tail (&fdctx->sfd_pos, &conf->saved_fds);
- }
- pthread_mutex_unlock (&conf->mutex);
- }
-unwind_out:
- STACK_UNWIND (frame, op_ret, op_errno, fd);
-
- client_local_wipe (local);
-
- return 0;
-}
-
-/*
- * client_rmdir_cbk - rmdir callback for client protocol
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_rmdir_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_rmdir_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- STACK_UNWIND (frame, op_ret, op_errno);
-
- return 0;
-}
-
-/*
- * client_access_cbk - access callback for client protocol
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_access_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_access_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- STACK_UNWIND (frame, op_ret, op_errno);
-
- return 0;
-}
-
-/*
- * client_lookup_cbk - lookup callback for client protocol
- *
- * @frame: call frame
- * @args: arguments dictionary
- *
- * not for external reference
- */
-
-int
-client_lookup_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- struct stat stbuf = {0, };
- inode_t *inode = NULL;
- dict_t *xattr = NULL;
- gf_fop_lookup_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
- size_t dict_len = 0;
- char *dictbuf = NULL;
- int32_t ret = -1;
- int32_t gf_errno = 0;
- client_local_t *local = NULL;
- ino_t oldino = 0;
-
- local = frame->local;
- inode = local->loc.inode;
- frame->local = NULL;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
-
- if (op_ret == 0) {
- op_ret = -1;
- gf_stat_to_stat (&rsp->stat, &stbuf);
-
- ret = inode_ctx_get (inode, frame->this, &oldino);
- if (oldino != stbuf.st_ino) {
- if (oldino)
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "LOOKUP %"PRId64"/%s (%s): "
- "inode number changed from "
- "%"PRId64" to %"PRId64,
- local->loc.parent->ino,
- local->loc.name,
- local->loc.path,
- oldino, stbuf.st_ino);
-
- ret = inode_ctx_put (inode, frame->this,
- stbuf.st_ino);
- if (ret < 0) {
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "LOOKUP %"PRId64"/%s (%s) : "
- "failed to set remote inode "
- "number to inode ctx",
- local->loc.parent->ino,
- local->loc.name,
- local->loc.path);
- }
- }
-
- dict_len = ntoh32 (rsp->dict_len);
-
- if (dict_len > 0) {
- dictbuf = memdup (rsp->dict, dict_len);
- GF_VALIDATE_OR_GOTO(frame->this->name, dictbuf, fail);
-
- xattr = dict_new();
- GF_VALIDATE_OR_GOTO(frame->this->name, xattr, fail);
-
- ret = dict_unserialize (dictbuf, dict_len, &xattr);
- if (ret < 0) {
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "%s (%"PRId64"): failed to "
- "unserialize dictionary",
- local->loc.path, inode->ino);
- goto fail;
- } else {
- xattr->extra_free = dictbuf;
- dictbuf = NULL;
- }
- }
- op_ret = 0;
- }
- gf_errno = ntoh32 (hdr->rsp.op_errno);
- op_errno = gf_error_to_errno (gf_errno);
-
-fail:
- STACK_UNWIND (frame, op_ret, op_errno, inode, &stbuf, xattr);
-
- client_local_wipe (local);
-
- if (dictbuf)
- free (dictbuf);
-
- if (xattr)
- dict_unref (xattr);
-
- return 0;
-}
-
-static dir_entry_t *
-gf_bin_to_direntry (char *buf, size_t count)
-{
- int idx = 0;
- int bread = 0;
- size_t rcount = 0;
- char *ender = NULL;
- char *buffer = NULL;
- char tmp_buf[512] = {0,};
- dir_entry_t *trav = NULL;
- dir_entry_t *prev = NULL;
- dir_entry_t *thead = NULL;
- dir_entry_t *head = NULL;
-
- thead = CALLOC (1, sizeof (dir_entry_t));
- GF_VALIDATE_OR_GOTO("client-protocol", thead, fail);
-
- buffer = buf;
- prev = thead;
-
- for (idx = 0; idx < count ; idx++) {
- bread = 0;
- trav = CALLOC (1, sizeof (dir_entry_t));
- GF_VALIDATE_OR_GOTO("client-protocol", trav, fail);
-
- ender = strchr (buffer, '/');
- if (!ender)
- break;
- rcount = ender - buffer;
- trav->name = CALLOC (1, rcount + 2);
- GF_VALIDATE_OR_GOTO("client-protocol", trav->name, fail);
-
- strncpy (trav->name, buffer, rcount);
- bread = rcount + 1;
- buffer += bread;
-
- ender = strchr (buffer, '\n');
- if (!ender)
- break;
- rcount = ender - buffer;
- strncpy (tmp_buf, buffer, rcount);
- bread = rcount + 1;
- buffer += bread;
-
- gf_string_to_stat (tmp_buf, &trav->buf);
-
- ender = strchr (buffer, '\n');
- if (!ender)
- break;
- rcount = ender - buffer;
- *ender = '\0';
- if (S_ISLNK (trav->buf.st_mode))
- trav->link = strdup (buffer);
- else
- trav->link = "";
-
- bread = rcount + 1;
- buffer += bread;
-
- prev->next = trav;
- prev = trav;
- }
-
- head = thead;
-fail:
- return head;
-}
-
-
-int
-gf_free_direntry (dir_entry_t *head)
-{
- dir_entry_t *prev = NULL;
- dir_entry_t *trav = NULL;
-
- prev = head;
- GF_VALIDATE_OR_GOTO("client-protocol", prev, fail);
-
- trav = head->next;
- while (trav) {
- prev->next = trav->next;
- FREE (trav->name);
- if (S_ISLNK (trav->buf.st_mode))
- FREE (trav->link);
- FREE (trav);
- trav = prev->next;
- }
- FREE (head);
-fail:
- return 0;
-}
-
-/*
- * client_getdents_cbk - readdir callback for client protocol
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_getdents_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_getdents_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
- int32_t gf_errno = 0;
- int32_t nr_count = 0;
- dir_entry_t *entry = NULL;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- gf_errno = ntoh32 (hdr->rsp.op_errno);
- op_errno = gf_error_to_errno (gf_errno);
-
- if (op_ret >= 0) {
- nr_count = ntoh32 (rsp->count);
- entry = gf_bin_to_direntry(iobuf->ptr, nr_count);
- if (entry == NULL) {
- op_ret = -1;
- op_errno = EINVAL;
- }
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, entry, nr_count);
-
- if (iobuf)
- iobuf_unref (iobuf);
- if (entry)
- gf_free_direntry(entry);
-
- return 0;
-}
-
-/*
- * client_statfs_cbk - statfs callback for client protocol
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_statfs_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- struct statvfs stbuf = {0, };
- gf_fop_statfs_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- if (op_ret == 0) {
- gf_statfs_to_statfs (&rsp->statfs, &stbuf);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, &stbuf);
-
- return 0;
-}
-
-/*
- * client_fsyncdir_cbk - fsyncdir callback for client protocol
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_fsyncdir_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- int32_t op_ret = 0;
- int32_t op_errno = 0;
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- STACK_UNWIND (frame, op_ret, op_errno);
-
- return 0;
-}
-
-/*
- * client_setxattr_cbk - setxattr callback for client protocol
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_setxattr_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_setxattr_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- STACK_UNWIND (frame, op_ret, op_errno);
-
- return 0;
-}
-
-/*
- * client_getxattr_cbk - getxattr callback for client protocol
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_getxattr_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_getxattr_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t gf_errno = 0;
- int32_t op_errno = 0;
- int32_t dict_len = 0;
- dict_t *dict = NULL;
- int32_t ret = -1;
- char *dictbuf = NULL;
- client_local_t *local = NULL;
-
- local = frame->local;
- frame->local = NULL;
-
- rsp = gf_param (hdr);
- GF_VALIDATE_OR_GOTO(frame->this->name, rsp, fail);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
-
- if (op_ret >= 0) {
- op_ret = -1;
- dict_len = ntoh32 (rsp->dict_len);
-
- if (dict_len > 0) {
- dictbuf = memdup (rsp->dict, dict_len);
- GF_VALIDATE_OR_GOTO(frame->this->name, dictbuf, fail);
-
- dict = dict_new();
- GF_VALIDATE_OR_GOTO(frame->this->name, dict, fail);
-
- ret = dict_unserialize (dictbuf, dict_len, &dict);
- if (ret < 0) {
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "%s (%"PRId64"): failed to "
- "unserialize xattr dictionary",
- local->loc.path,
- local->loc.inode->ino);
- goto fail;
- } else {
- dict->extra_free = dictbuf;
- dictbuf = NULL;
- }
- }
- op_ret = 0;
- }
- gf_errno = ntoh32 (hdr->rsp.op_errno);
- op_errno = gf_error_to_errno (gf_errno);
-fail:
- STACK_UNWIND (frame, op_ret, op_errno, dict);
-
- client_local_wipe (local);
-
- if (dictbuf)
- free (dictbuf);
-
- if (dict)
- dict_unref (dict);
-
- return 0;
-}
-
-/*
- * client_removexattr_cbk - removexattr callback for client protocol
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_removexattr_cbk (call_frame_t *frame, gf_hdr_common_t *hdr,
- size_t hdrlen, struct iobuf *iobuf)
-{
- int32_t op_ret = 0;
- int32_t op_errno = 0;
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- STACK_UNWIND (frame, op_ret, op_errno);
-
- return 0;
-}
-
-/*
- * client_lk_cbk - lk callback for client protocol
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_lk_common_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- struct flock lock = {0,};
- gf_fop_lk_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- if (op_ret >= 0) {
- gf_flock_to_flock (&rsp->flock, &lock);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, &lock);
- return 0;
-}
-
-/*
- * client_gf_file_lk_cbk - gf_file_lk callback for client protocol
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_inodelk_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_inodelk_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-
-int
-client_finodelk_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_finodelk_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-/*
- * client_entrylk_cbk - entrylk callback for client protocol
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_entrylk_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_entrylk_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-int
-client_fentrylk_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_fentrylk_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-/**
- * client_writedir_cbk -
- *
- * @frame:
- * @args:
- *
- * not for external reference
- */
-
-int
-client_setdents_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- int32_t op_ret = 0;
- int32_t op_errno = 0;
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- STACK_UNWIND (frame, op_ret, op_errno);
-
- return 0;
-}
-
-/*
- * client_stats_cbk - stats callback for client protocol
- *
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_stats_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- struct xlator_stats stats = {0,};
- gf_mop_stats_rsp_t *rsp = NULL;
- char *buffer = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- if (op_ret >= 0)
- {
- buffer = rsp->buf;
-
- sscanf (buffer, "%"SCNx64",%"SCNx64",%"SCNx64",%"SCNx64
- ",%"SCNx64",%"SCNx64",%"SCNx64",%"SCNx64"\n",
- &stats.nr_files, &stats.disk_usage, &stats.free_disk,
- &stats.total_disk_size, &stats.read_usage,
- &stats.write_usage, &stats.disk_speed,
- &stats.nr_clients);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, &stats);
- return 0;
-}
-
-/*
- * client_getspec - getspec function for client protocol
- * @frame: call frame
- * @this: client protocol xlator structure
- * @flag:
- *
- * external reference through client_protocol_xlator->fops->getspec
- */
-
-int
-client_getspec (call_frame_t *frame, xlator_t *this, const char *key,
- int32_t flag)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_mop_getspec_req_t *req = NULL;
- size_t hdrlen = -1;
- int keylen = 0;
- int ret = -1;
-
- if (key)
- keylen = STRLEN_0(key);
-
- hdrlen = gf_hdr_len (req, keylen);
- hdr = gf_hdr_new (req, keylen);
- GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
-
- req = gf_param (hdr);
- req->flags = hton32 (flag);
- req->keylen = hton32 (keylen);
- if (keylen)
- strcpy (req->key, key);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_BULK),
- GF_OP_TYPE_MOP_REQUEST, GF_MOP_GETSPEC,
- hdr, hdrlen, NULL, 0, NULL);
-
- return ret;
-unwind:
- if (hdr)
- free (hdr);
- STACK_UNWIND(frame, -1, EINVAL, NULL);
- return 0;
-}
-
-/*
- * client_getspec_cbk - getspec callback for client protocol
- *
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_getspec_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_mop_getspec_rsp_t *rsp = NULL;
- char *spec_data = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
- int32_t gf_errno = 0;
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- gf_errno = ntoh32 (hdr->rsp.op_errno);
- op_errno = gf_error_to_errno (gf_errno);
- rsp = gf_param (hdr);
-
- if (op_ret >= 0) {
- spec_data = rsp->spec;
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, spec_data);
- return 0;
-}
-
-
-int
-client_log (call_frame_t *frame, xlator_t *this, const char *msg)
-{
- gf_hdr_common_t * hdr = NULL;
- gf_mop_log_req_t * req = NULL;
- size_t hdrlen = -1;
- int msglen = 0;
- int ret = -1;
-
- if (msg)
- msglen = STRLEN_0(msg);
-
- hdrlen = gf_hdr_len (req, msglen);
- hdr = gf_hdr_new (req, msglen);
-
- GF_VALIDATE_OR_GOTO(this->name, hdr, unwind);
-
- req = gf_param (hdr);
- req->msglen = hton32 (msglen);
-
- if (msglen)
- strcpy (req->msg, msg);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_BULK),
- GF_OP_TYPE_MOP_REQUEST, GF_MOP_LOG,
- hdr, hdrlen, NULL, 0, NULL);
-
- return ret;
-
-unwind:
- if (hdr)
- free (hdr);
-
- STACK_UNWIND(frame, -1, EINVAL, NULL);
- return 0;
-}
-
-
-int
-client_log_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen)
-{
- gf_mop_log_rsp_t * rsp = NULL;
-
- int32_t op_ret = 0;
- int32_t op_errno = 0;
- int32_t gf_errno = 0;
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- gf_errno = ntoh32 (hdr->rsp.op_errno);
- op_errno = gf_error_to_errno (gf_errno);
-
- rsp = gf_param (hdr);
-
- STACK_UNWIND (frame, op_ret, op_errno);
-
- return 0;
-}
-
-
-int
-client_checksum (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flag)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_checksum_req_t *req = NULL;
- size_t hdrlen = -1;
- int ret = -1;
- ino_t ino = 0;
-
- hdrlen = gf_hdr_len (req, strlen (loc->path) + 1);
- hdr = gf_hdr_new (req, strlen (loc->path) + 1);
- req = gf_param (hdr);
-
- ret = inode_ctx_get (loc->inode, this, &ino);
- if (loc->inode->ino && ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "CHECKSUM %"PRId64" (%s): "
- "failed to get remote inode number",
- loc->inode->ino, loc->path);
- }
-
- req->ino = hton64 (ino);
- req->flag = hton32 (flag);
- strcpy (req->path, loc->path);
-
- ret = protocol_client_xfer (frame, this,
- CLIENT_CHANNEL (this, CHANNEL_BULK),
- GF_OP_TYPE_FOP_REQUEST, GF_FOP_CHECKSUM,
- hdr, hdrlen, NULL, 0, NULL);
-
- return ret;
-}
-
-
-int
-client_checksum_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_checksum_rsp_t *rsp = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
- int32_t gf_errno = 0;
- unsigned char *fchecksum = NULL;
- unsigned char *dchecksum = NULL;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- gf_errno = ntoh32 (hdr->rsp.op_errno);
- op_errno = gf_error_to_errno (gf_errno);
-
- if (op_ret >= 0) {
- fchecksum = rsp->fchecksum;
- dchecksum = rsp->dchecksum + NAME_MAX;
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, fchecksum, dchecksum);
- return 0;
-}
-
-/*
- * client_setspec_cbk - setspec callback for client protocol
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_setspec_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- int32_t op_ret = 0;
- int32_t op_errno = 0;
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- STACK_UNWIND (frame, op_ret, op_errno);
-
- return 0;
-}
-
-/*
- * client_setvolume_cbk - setvolume callback for client protocol
- * @frame: call frame
- * @args: argument dictionary
- *
- * not for external reference
- */
-
-int
-client_setvolume_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- client_conf_t *conf = NULL;
- gf_mop_setvolume_rsp_t *rsp = NULL;
- client_connection_t *conn = NULL;
- glusterfs_ctx_t *ctx = NULL;
- xlator_t *this = NULL;
- xlator_list_t *parent = NULL;
- transport_t *trans = NULL;
- dict_t *reply = NULL;
- char *remote_subvol = NULL;
- char *remote_error = NULL;
- char *process_uuid = NULL;
- int32_t ret = -1;
- int32_t op_ret = -1;
- int32_t op_errno = EINVAL;
- int32_t dict_len = 0;
- transport_t *peer_trans = NULL;
- uint64_t peer_trans_int = 0;
-
- trans = frame->local; frame->local = NULL;
- this = frame->this;
- conn = trans->xl_private;
- conf = this->private;
-
- rsp = gf_param (hdr);
-
- op_ret = ntoh32 (hdr->rsp.op_ret);
- op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno));
-
- if ((op_ret < 0) && (op_errno == ENOTCONN)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "setvolume failed (%s)",
- strerror (op_errno));
- goto out;
- }
-
- reply = dict_new ();
- GF_VALIDATE_OR_GOTO(this->name, reply, out);
-
- dict_len = ntoh32 (rsp->dict_len);
- ret = dict_unserialize (rsp->buf, dict_len, &reply);
- if (ret < 0) {
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "failed to unserialize buffer(%p) to dictionary",
- rsp->buf);
- goto out;
- }
-
- ret = dict_get_str (reply, "ERROR", &remote_error);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to get ERROR string from reply dictionary");
- }
-
- ret = dict_get_str (reply, "process-uuid", &process_uuid);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to get 'process-uuid' from reply dictionary");
- }
-
- if (op_ret < 0) {
- gf_log (trans->xl->name, GF_LOG_DEBUG,
- "SETVOLUME on remote-host failed: %s",
- remote_error ? remote_error : strerror (op_errno));
- errno = op_errno;
- if (op_errno == ESTALE) {
- parent = trans->xl->parents;
- while (parent) {
- xlator_notify (parent->xlator,
- GF_EVENT_VOLFILE_MODIFIED,
- trans->xl);
- parent = parent->next;
- }
- }
-
- } else {
- ret = dict_get_str (this->options, "remote-subvolume",
- &remote_subvol);
- if (!remote_subvol)
- goto out;
-
- ctx = this->ctx;
-
- if (process_uuid && !strcmp (ctx->process_uuid,process_uuid)) {
- ret = dict_get_uint64 (reply, "transport-ptr",
- &peer_trans_int);
-
- peer_trans = (void *) (long) (peer_trans_int);
-
- gf_log (this->name, GF_LOG_WARNING,
- "attaching to the local volume '%s'",
- remote_subvol);
-
- transport_setpeer (trans, peer_trans);
-
- }
-
- gf_log (trans->xl->name, GF_LOG_NORMAL,
- "Connected to %s, attached "
- "to remote volume '%s'.",
- trans->peerinfo.identifier, remote_subvol);
-
- pthread_mutex_lock (&(conn->lock));
- {
- conn->connected = 1;
- }
- pthread_mutex_unlock (&(conn->lock));
-
- parent = trans->xl->parents;
- while (parent) {
- xlator_notify (parent->xlator, GF_EVENT_CHILD_UP,
- trans->xl);
- parent = parent->next;
- }
- }
-
- conf->connecting = 0;
-out:
-
- if (-1 == op_ret) {
- /* Let the connection/re-connection happen in
- * background, for now, don't hang here,
- * tell the parents that i am all ok..
- */
- parent = trans->xl->parents;
- while (parent) {
- xlator_notify (parent->xlator,
- GF_EVENT_CHILD_CONNECTING, trans->xl);
- parent = parent->next;
- }
- conf->connecting= 1;
- }
-
- STACK_DESTROY (frame->root);
-
- if (reply)
- dict_unref (reply);
-
- return op_ret;
-}
-
-/*
- * client_enosys_cbk -
- * @frame: call frame
- *
- * not for external reference
- */
-
-int
-client_enosys_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- STACK_DESTROY (frame->root);
- return 0;
-}
-
-
-void
-client_protocol_reconnect (void *trans_ptr)
-{
- transport_t *trans = NULL;
- client_connection_t *conn = NULL;
- struct timeval tv = {0, 0};
- int32_t ret = 0;
-
- trans = trans_ptr;
- conn = trans->xl_private;
- pthread_mutex_lock (&conn->lock);
- {
- if (conn->reconnect)
- gf_timer_call_cancel (trans->xl->ctx,
- conn->reconnect);
- conn->reconnect = 0;
-
- if (conn->connected == 0) {
- tv.tv_sec = 10;
-
- gf_log (trans->xl->name, GF_LOG_TRACE,
- "attempting reconnect");
- ret = transport_connect (trans);
-
- conn->reconnect =
- gf_timer_call_after (trans->xl->ctx, tv,
- client_protocol_reconnect,
- trans);
- } else {
- gf_log (trans->xl->name, GF_LOG_TRACE,
- "breaking reconnect chain");
- }
- }
- pthread_mutex_unlock (&conn->lock);
-
- if (ret == -1 && errno != EINPROGRESS) {
- default_notify (trans->xl, GF_EVENT_CHILD_DOWN, NULL);
- }
-}
-
-int
-protocol_client_mark_fd_bad (xlator_t *this)
-{
- client_conf_t *conf = NULL;
- client_fd_ctx_t *tmp = NULL;
- client_fd_ctx_t *fdctx = NULL;
-
- conf = this->private;
-
- pthread_mutex_lock (&conf->mutex);
- {
- list_for_each_entry_safe (fdctx, tmp, &conf->saved_fds,
- sfd_pos) {
- fd_ctx_del (fdctx->fd, this, NULL);
- list_del_init (&fdctx->sfd_pos);
- FREE (fdctx);
- }
-
- INIT_LIST_HEAD(&conf->saved_fds);
- }
- pthread_mutex_unlock (&conf->mutex);
-
- return 0;
-}
-
-/*
- * client_protocol_cleanup - cleanup function
- * @trans: transport object
- *
- */
-
-int
-protocol_client_cleanup (transport_t *trans)
-{
- client_connection_t *conn = NULL;
- struct saved_frames *saved_frames = NULL;
-
- conn = trans->xl_private;
-
- gf_log (trans->xl->name, GF_LOG_TRACE,
- "cleaning up state in transport object %p", trans);
-
- pthread_mutex_lock (&conn->lock);
- {
- saved_frames = conn->saved_frames;
- conn->saved_frames = saved_frames_new ();
-
- /* bailout logic cleanup */
- if (conn->timer) {
- gf_timer_call_cancel (trans->xl->ctx, conn->timer);
- conn->timer = NULL;
- }
-
- if (conn->reconnect == NULL) {
- /* :O This part is empty.. any thing missing? */
- }
- }
- pthread_mutex_unlock (&conn->lock);
-
- saved_frames_destroy (trans->xl, saved_frames,
- gf_fops, gf_mops, gf_cbks);
-
- return 0;
-}
-
-
-/* cbk callbacks */
-int
-client_releasedir_cbk (call_frame_t *frame, gf_hdr_common_t *hdr,
- size_t hdrlen, struct iobuf *iobuf)
-{
- STACK_DESTROY (frame->root);
- return 0;
-}
-
-
-int
-client_release_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- STACK_DESTROY (frame->root);
- return 0;
-}
-
-
-int
-client_forget_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_log ("", GF_LOG_CRITICAL, "fop not implemented");
- return 0;
-}
-
-
-static gf_op_t gf_fops[] = {
- [GF_FOP_STAT] = client_stat_cbk,
- [GF_FOP_READLINK] = client_readlink_cbk,
- [GF_FOP_MKNOD] = client_mknod_cbk,
- [GF_FOP_MKDIR] = client_mkdir_cbk,
- [GF_FOP_UNLINK] = client_unlink_cbk,
- [GF_FOP_RMDIR] = client_rmdir_cbk,
- [GF_FOP_SYMLINK] = client_symlink_cbk,
- [GF_FOP_RENAME] = client_rename_cbk,
- [GF_FOP_LINK] = client_link_cbk,
- [GF_FOP_CHMOD] = client_chmod_cbk,
- [GF_FOP_CHOWN] = client_chown_cbk,
- [GF_FOP_TRUNCATE] = client_truncate_cbk,
- [GF_FOP_OPEN] = client_open_cbk,
- [GF_FOP_READ] = client_readv_cbk,
- [GF_FOP_WRITE] = client_write_cbk,
- [GF_FOP_STATFS] = client_statfs_cbk,
- [GF_FOP_FLUSH] = client_flush_cbk,
- [GF_FOP_FSYNC] = client_fsync_cbk,
- [GF_FOP_SETXATTR] = client_setxattr_cbk,
- [GF_FOP_GETXATTR] = client_getxattr_cbk,
- [GF_FOP_REMOVEXATTR] = client_removexattr_cbk,
- [GF_FOP_OPENDIR] = client_opendir_cbk,
- [GF_FOP_GETDENTS] = client_getdents_cbk,
- [GF_FOP_FSYNCDIR] = client_fsyncdir_cbk,
- [GF_FOP_ACCESS] = client_access_cbk,
- [GF_FOP_CREATE] = client_create_cbk,
- [GF_FOP_FTRUNCATE] = client_ftruncate_cbk,
- [GF_FOP_FSTAT] = client_fstat_cbk,
- [GF_FOP_LK] = client_lk_common_cbk,
- [GF_FOP_UTIMENS] = client_utimens_cbk,
- [GF_FOP_FCHMOD] = client_fchmod_cbk,
- [GF_FOP_FCHOWN] = client_fchown_cbk,
- [GF_FOP_LOOKUP] = client_lookup_cbk,
- [GF_FOP_SETDENTS] = client_setdents_cbk,
- [GF_FOP_READDIR] = client_readdir_cbk,
- [GF_FOP_INODELK] = client_inodelk_cbk,
- [GF_FOP_FINODELK] = client_finodelk_cbk,
- [GF_FOP_ENTRYLK] = client_entrylk_cbk,
- [GF_FOP_FENTRYLK] = client_fentrylk_cbk,
- [GF_FOP_CHECKSUM] = client_checksum_cbk,
- [GF_FOP_XATTROP] = client_xattrop_cbk,
- [GF_FOP_FXATTROP] = client_fxattrop_cbk,
-};
-
-static gf_op_t gf_mops[] = {
- [GF_MOP_SETVOLUME] = client_setvolume_cbk,
- [GF_MOP_GETVOLUME] = client_enosys_cbk,
- [GF_MOP_STATS] = client_stats_cbk,
- [GF_MOP_SETSPEC] = client_setspec_cbk,
- [GF_MOP_GETSPEC] = client_getspec_cbk,
- [GF_MOP_PING] = client_ping_cbk,
-};
-
-static gf_op_t gf_cbks[] = {
- [GF_CBK_FORGET] = client_forget_cbk,
- [GF_CBK_RELEASE] = client_release_cbk,
- [GF_CBK_RELEASEDIR] = client_releasedir_cbk
-};
-
-/*
- * client_protocol_interpret - protocol interpreter
- * @trans: transport object
- * @blk: data block
- *
- */
-int
-protocol_client_interpret (xlator_t *this, transport_t *trans,
- char *hdr_p, size_t hdrlen, struct iobuf *iobuf)
-{
- int ret = -1;
- call_frame_t *frame = NULL;
- gf_hdr_common_t *hdr = NULL;
- uint64_t callid = 0;
- int type = -1;
- int op = -1;
- client_connection_t *conn = NULL;
-
- conn = trans->xl_private;
-
- hdr = (gf_hdr_common_t *)hdr_p;
-
- type = ntoh32 (hdr->type);
- op = ntoh32 (hdr->op);
- callid = ntoh64 (hdr->callid);
-
- frame = lookup_frame (trans, op, type, callid);
- if (frame == NULL) {
- gf_log (this->name, GF_LOG_WARNING,
- "no frame for callid=%"PRId64" type=%d op=%d",
- callid, type, op);
- return 0;
- }
-
- switch (type) {
- case GF_OP_TYPE_FOP_REPLY:
- if ((op > GF_FOP_MAXVALUE) ||
- (op < 0)) {
- gf_log (trans->xl->name, GF_LOG_WARNING,
- "invalid fop '%d'", op);
- } else {
- ret = gf_fops[op] (frame, hdr, hdrlen, iobuf);
- }
- break;
- case GF_OP_TYPE_MOP_REPLY:
- if ((op > GF_MOP_MAXVALUE) ||
- (op < 0)) {
- gf_log (trans->xl->name, GF_LOG_WARNING,
- "invalid fop '%d'", op);
- } else {
- ret = gf_mops[op] (frame, hdr, hdrlen, iobuf);
- }
- break;
- case GF_OP_TYPE_CBK_REPLY:
- if ((op > GF_CBK_MAXVALUE) ||
- (op < 0)) {
- gf_log (trans->xl->name, GF_LOG_WARNING,
- "invalid cbk '%d'", op);
- } else {
- ret = gf_cbks[op] (frame, hdr, hdrlen, iobuf);
- }
- break;
- default:
- gf_log (trans->xl->name, GF_LOG_DEBUG,
- "invalid packet type: %d", type);
- break;
- }
-
- return ret;
-}
-
-/*
- * init - initiliazation function. called during loading of client protocol
- * @this:
- *
- */
-
-int
-init (xlator_t *this)
-{
- transport_t *trans = NULL;
- client_conf_t *conf = NULL;
- client_connection_t *conn = NULL;
- int32_t frame_timeout = 0;
- int32_t ping_timeout = 0;
- data_t *remote_subvolume = NULL;
- int32_t ret = -1;
- int i = 0;
-
- if (this->children) {
- gf_log (this->name, GF_LOG_ERROR,
- "FATAL: client protocol translator cannot have any "
- "subvolumes");
- goto out;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "Volume is dangling. ");
- }
-
- remote_subvolume = dict_get (this->options, "remote-subvolume");
- if (remote_subvolume == NULL) {
- gf_log (this->name, GF_LOG_ERROR,
- "Option 'remote-subvolume' is not specified.");
- goto out;
- }
-
- ret = dict_get_int32 (this->options, "frame-timeout",
- &frame_timeout);
- if (ret >= 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "setting frame-timeout to %d", frame_timeout);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "defaulting frame-timeout to 30mins");
- frame_timeout = 1800;
- }
-
- ret = dict_get_int32 (this->options, "ping-timeout",
- &ping_timeout);
- if (ret >= 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "setting ping-timeout to %d", ping_timeout);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "defaulting ping-timeout to 10");
- ping_timeout = 10;
- }
-
- conf = CALLOC (1, sizeof (client_conf_t));
-
- pthread_mutex_init (&conf->mutex, NULL);
- INIT_LIST_HEAD (&conf->saved_fds);
-
- this->private = conf;
-
- for (i = 0; i < CHANNEL_MAX; i++) {
- trans = transport_load (this->options, this);
- if (trans == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Failed to load transport");
- ret = -1;
- goto out;
- }
-
- conn = CALLOC (1, sizeof (*conn));
-
- conn->saved_frames = saved_frames_new ();
-
- conn->callid = 1;
-
- conn->frame_timeout = frame_timeout;
- conn->ping_timeout = ping_timeout;
-
- pthread_mutex_init (&conn->lock, NULL);
-
- trans->xl_private = conn;
- conf->transport[i] = transport_ref (trans);
- }
-
-#ifndef GF_DARWIN_HOST_OS
- {
- struct rlimit lim;
-
- lim.rlim_cur = 1048576;
- lim.rlim_max = 1048576;
-
- ret = setrlimit (RLIMIT_NOFILE, &lim);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "WARNING: Failed to set 'ulimit -n 1M': %s",
- strerror(errno));
- lim.rlim_cur = 65536;
- lim.rlim_max = 65536;
-
- ret = setrlimit (RLIMIT_NOFILE, &lim);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Failed to set max open fd to 64k: %s",
- strerror(errno));
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "max open fd set to 64k");
- }
-
- }
- }
-#endif
- ret = 0;
-out:
- return ret;
-}
-
-/*
- * fini - finish function called during unloading of client protocol
- * @this:
- *
- */
-void
-fini (xlator_t *this)
-{
- /* TODO: Check if its enough.. how to call transport's fini () */
- client_conf_t *conf = NULL;
-
- conf = this->private;
- this->private = NULL;
-
- if (conf) {
- FREE (conf);
- }
- return;
-}
-
-
-int
-protocol_client_handshake (xlator_t *this, transport_t *trans)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_mop_setvolume_req_t *req = NULL;
- dict_t *options = NULL;
- int32_t ret = -1;
- int hdrlen = 0;
- int dict_len = 0;
- call_frame_t *fr = NULL;
- char *process_uuid_xl;
-
- options = this->options;
- ret = dict_set_str (options, "protocol-version", GF_PROTOCOL_VERSION);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to set protocol version(%s) in handshake msg",
- GF_PROTOCOL_VERSION);
- }
-
- ret = asprintf (&process_uuid_xl, "%s-%s", this->ctx->process_uuid,
- this->name);
- if (-1 == ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "asprintf failed while setting process_uuid");
- goto fail;
- }
- ret = dict_set_dynstr (options, "process-uuid",
- process_uuid_xl);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to set process-uuid(%s) in handshake msg",
- process_uuid_xl);
- }
-
- if (this->ctx->cmd_args.volfile_server) {
- if (this->ctx->cmd_args.volfile_id)
- ret = dict_set_str (options, "volfile-key",
- this->ctx->cmd_args.volfile_id);
- ret = dict_set_uint32 (options, "volfile-checksum",
- this->ctx->volfile_checksum);
- }
-
- dict_len = dict_serialized_length (options);
- if (dict_len < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to get serialized length of dict(%p)",
- options);
- ret = dict_len;
- goto fail;
- }
-
- hdrlen = gf_hdr_len (req, dict_len);
- hdr = gf_hdr_new (req, dict_len);
- GF_VALIDATE_OR_GOTO(this->name, hdr, fail);
-
- req = gf_param (hdr);
-
- ret = dict_serialize (options, req->buf);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to serialize dictionary(%p)",
- options);
- goto fail;
- }
-
- req->dict_len = hton32 (dict_len);
- fr = create_frame (this, this->ctx->pool);
- GF_VALIDATE_OR_GOTO(this->name, fr, fail);
-
- fr->local = trans;
- ret = protocol_client_xfer (fr, this, trans,
- GF_OP_TYPE_MOP_REQUEST, GF_MOP_SETVOLUME,
- hdr, hdrlen, NULL, 0, NULL);
- return ret;
-fail:
- if (hdr)
- free (hdr);
- return ret;
-}
-
-
-int
-protocol_client_pollout (xlator_t *this, transport_t *trans)
-{
- client_conf_t *conf = NULL;
-
- conf = trans->xl->private;
-
- pthread_mutex_lock (&conf->mutex);
- {
- gettimeofday (&conf->last_sent, NULL);
- }
- pthread_mutex_unlock (&conf->mutex);
-
- return 0;
-}
-
-
-int
-protocol_client_pollin (xlator_t *this, transport_t *trans)
-{
- client_conf_t *conf = NULL;
- int ret = -1;
- struct iobuf *iobuf = NULL;
- char *hdr = NULL;
- size_t hdrlen = 0;
-
- conf = trans->xl->private;
-
- pthread_mutex_lock (&conf->mutex);
- {
- gettimeofday (&conf->last_received, NULL);
- }
- pthread_mutex_unlock (&conf->mutex);
-
- ret = transport_receive (trans, &hdr, &hdrlen, &iobuf);
-
- if (ret == 0)
- {
- ret = protocol_client_interpret (this, trans, hdr, hdrlen,
- iobuf);
- }
-
- /* TODO: use mem-pool */
- FREE (hdr);
-
- return ret;
-}
-
-int
-client_priv_dump (xlator_t *this)
-{
- client_conf_t *conf = NULL;
- int ret = -1;
- client_fd_ctx_t *tmp = NULL;
- int i = 0;
- char key[GF_DUMP_MAX_BUF_LEN];
- char key_prefix[GF_DUMP_MAX_BUF_LEN];
-
- if (!this)
- return -1;
-
- conf = this->private;
- if (!conf) {
- gf_log (this->name, GF_LOG_WARNING,
- "conf null in xlator");
- return -1;
- }
-
- ret = pthread_mutex_trylock(&conf->mutex);
- if (ret) {
- gf_log("", GF_LOG_WARNING, "Unable to lock client %s"
- " errno: %d", this->name, errno);
- return -1;
- }
-
- gf_proc_dump_build_key(key_prefix, "xlator.protocol.client",
- "%s.priv", this->name);
-
- gf_proc_dump_add_section(key_prefix);
-
- list_for_each_entry(tmp, &conf->saved_fds, sfd_pos) {
- gf_proc_dump_build_key(key, key_prefix,
- "fd.%d.remote_fd", ++i);
- gf_proc_dump_write(key, "%d", tmp->remote_fd);
- }
-
- gf_proc_dump_build_key(key, key_prefix, "connecting");
- gf_proc_dump_write(key, "%d", conf->connecting);
- gf_proc_dump_build_key(key, key_prefix, "last_sent");
- gf_proc_dump_write(key, "%s", ctime(&conf->last_sent.tv_sec));
- gf_proc_dump_build_key(key, key_prefix, "last_received");
- gf_proc_dump_write(key, "%s", ctime(&conf->last_received.tv_sec));
-
- pthread_mutex_unlock(&conf->mutex);
-
- return 0;
-
-}
-
-int32_t
-client_inodectx_dump (xlator_t *this, inode_t *inode)
-{
- ino_t par = 0;
- int ret = -1;
- char key[GF_DUMP_MAX_BUF_LEN];
-
- if (!inode)
- return -1;
-
- if (!this)
- return -1;
-
- ret = inode_ctx_get (inode, this, &par);
-
- if (ret != 0)
- return ret;
-
- gf_proc_dump_build_key(key, "xlator.protocol.client",
- "%s.inode.%ld.par",
- this->name,inode->ino);
- gf_proc_dump_write(key, "%ld", par);
-
- return 0;
-}
-
-/*
- * client_protocol_notify - notify function for client protocol
- * @this:
- * @trans: transport object
- * @event
- *
- */
-
-int
-notify (xlator_t *this, int32_t event, void *data, ...)
-{
- int i = 0;
- int ret = -1;
- int child_down = 1;
- int was_not_down = 0;
- transport_t *trans = NULL;
- client_connection_t *conn = NULL;
- client_conf_t *conf = NULL;
- xlator_list_t *parent = NULL;
-
- conf = this->private;
- trans = data;
-
- switch (event) {
- case GF_EVENT_POLLOUT:
- {
- ret = protocol_client_pollout (this, trans);
-
- break;
- }
- case GF_EVENT_POLLIN:
- {
- ret = protocol_client_pollin (this, trans);
-
- break;
- }
- /* no break for ret check to happen below */
- case GF_EVENT_POLLERR:
- {
- ret = -1;
- protocol_client_cleanup (trans);
-
- if (conf->connecting == 0) {
- /* Let the connection/re-connection happen in
- * background, for now, don't hang here,
- * tell the parents that i am all ok..
- */
- parent = trans->xl->parents;
- while (parent) {
- parent->xlator->notify (parent->xlator,
- GF_EVENT_CHILD_CONNECTING,
- trans->xl);
- parent = parent->next;
- }
- conf->connecting = 1;
- }
-
- was_not_down = 0;
- for (i = 0; i < CHANNEL_MAX; i++) {
- conn = conf->transport[i]->xl_private;
- if (conn->connected == 1)
- was_not_down = 1;
- }
-
- conn = trans->xl_private;
- if (conn->connected) {
- conn->connected = 0;
- if (conn->reconnect == 0)
- client_protocol_reconnect (trans);
- }
-
- child_down = 1;
- for (i = 0; i < CHANNEL_MAX; i++) {
- trans = conf->transport[i];
- conn = trans->xl_private;
- if (conn->connected == 1)
- child_down = 0;
- }
-
- if (child_down && was_not_down) {
- gf_log (this->name, GF_LOG_INFO, "disconnected");
-
- protocol_client_mark_fd_bad (this);
-
- parent = this->parents;
- while (parent) {
- xlator_notify (parent->xlator,
- GF_EVENT_CHILD_DOWN, this);
- parent = parent->next;
- }
- }
- }
- break;
-
- case GF_EVENT_PARENT_UP:
- {
- client_conf_t *conf = NULL;
- int i = 0;
- transport_t *trans = NULL;
-
- conf = this->private;
- for (i = 0; i < CHANNEL_MAX; i++) {
- trans = conf->transport[i];
- if (!trans) {
- gf_log (this->name, GF_LOG_DEBUG,
- "transport init failed");
- return -1;
- }
-
- conn = trans->xl_private;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "got GF_EVENT_PARENT_UP, attempting connect "
- "on transport");
-
- client_protocol_reconnect (trans);
- }
- }
- break;
-
- case GF_EVENT_CHILD_UP:
- {
- char *handshake = NULL;
-
- ret = dict_get_str (this->options, "disable-handshake",
- &handshake);
- gf_log (this->name, GF_LOG_DEBUG,
- "got GF_EVENT_CHILD_UP");
- if ((ret < 0) ||
- (strcasecmp (handshake, "on"))) {
- ret = protocol_client_handshake (this, trans);
- } else {
- conn = trans->xl_private;
- conn->connected = 1;
- ret = default_notify (this, event, trans);
- }
-
- if (ret)
- transport_disconnect (trans);
-
- }
- break;
-
- default:
- gf_log (this->name, GF_LOG_DEBUG,
- "got %d, calling default_notify ()", event);
-
- default_notify (this, event, data);
- break;
- }
-
- return ret;
-}
-
-
-struct xlator_fops fops = {
- .stat = client_stat,
- .readlink = client_readlink,
- .mknod = client_mknod,
- .mkdir = client_mkdir,
- .unlink = client_unlink,
- .rmdir = client_rmdir,
- .symlink = client_symlink,
- .rename = client_rename,
- .link = client_link,
- .chmod = client_chmod,
- .chown = client_chown,
- .truncate = client_truncate,
- .utimens = client_utimens,
- .open = client_open,
- .readv = client_readv,
- .writev = client_writev,
- .statfs = client_statfs,
- .flush = client_flush,
- .fsync = client_fsync,
- .setxattr = client_setxattr,
- .getxattr = client_getxattr,
- .fsetxattr = client_fsetxattr,
- .fgetxattr = client_fgetxattr,
- .removexattr = client_removexattr,
- .opendir = client_opendir,
- .readdir = client_readdir,
- .fsyncdir = client_fsyncdir,
- .access = client_access,
- .ftruncate = client_ftruncate,
- .fstat = client_fstat,
- .create = client_create,
- .lk = client_lk,
- .inodelk = client_inodelk,
- .finodelk = client_finodelk,
- .entrylk = client_entrylk,
- .fentrylk = client_fentrylk,
- .lookup = client_lookup,
- .fchmod = client_fchmod,
- .fchown = client_fchown,
- .setdents = client_setdents,
- .getdents = client_getdents,
- .checksum = client_checksum,
- .xattrop = client_xattrop,
- .fxattrop = client_fxattrop,
-};
-
-struct xlator_mops mops = {
- .stats = client_stats,
- .getspec = client_getspec,
- .log = client_log,
-};
-
-struct xlator_cbks cbks = {
- .release = client_release,
- .releasedir = client_releasedir
-};
-
-
-struct xlator_dumpops dumpops = {
- .priv = client_priv_dump,
- .inodectx = client_inodectx_dump,
-};
-
-struct volume_options options[] = {
- { .key = {"username"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {"password"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {"transport-type"},
- .value = {"tcp", "socket", "ib-verbs", "unix", "ib-sdp",
- "tcp/client", "ib-verbs/client"},
- .type = GF_OPTION_TYPE_STR
- },
- { .key = {"remote-host"},
- .type = GF_OPTION_TYPE_INTERNET_ADDRESS
- },
- { .key = {"remote-subvolume"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {"frame-timeout"},
- .type = GF_OPTION_TYPE_TIME,
- .min = 5,
- .max = 1013,
- },
- { .key = {"ping-timeout"},
- .type = GF_OPTION_TYPE_TIME,
- .min = 5,
- .max = 1013,
- },
- { .key = {NULL} },
-};
diff --git a/xlators/protocol/client/src/client-protocol.h b/xlators/protocol/client/src/client-protocol.h
deleted file mode 100644
index 7249853a6..000000000
--- a/xlators/protocol/client/src/client-protocol.h
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CLIENT_PROTOCOL_H
-#define _CLIENT_PROTOCOL_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <stdio.h>
-#include <arpa/inet.h>
-#include "inode.h"
-#include "timer.h"
-#include "byte-order.h"
-
-#define CLIENT_PORT_CEILING 1023
-
-#define GF_CLIENT_INODE_SELF 0
-#define GF_CLIENT_INODE_PARENT 1
-
-#define CLIENT_CONF(this) ((client_conf_t *)(this->private))
-
-#define RECEIVE_TIMEOUT(_cprivate,_current) \
- ((_cprivate->last_received.tv_sec + \
- _cprivate->frame_timeout) < \
- _current.tv_sec)
-
-#define SEND_TIMEOUT(_cprivate,_current) \
- ((_cprivate->last_sent.tv_sec + \
- _cprivate->frame_timeout) < \
- _current.tv_sec)
-
-enum {
- CHANNEL_BULK = 0,
- CHANNEL_LOWLAT = 1,
- CHANNEL_MAX
-};
-
-#define CLIENT_CHANNEL client_channel
-
-struct client_connection;
-typedef struct client_connection client_connection_t;
-
-#include "stack.h"
-#include "xlator.h"
-#include "transport.h"
-#include "protocol.h"
-
-typedef struct _client_fd_ctx {
- int remote_fd;
- struct list_head sfd_pos; /* Stores the reference to this
- fd's position in the saved_fds list.
- */
- fd_t *fd; /* Reverse reference to the fd itself.
- This is needed to delete this fdctx
- from the fd's context in
- protocol_client_mark_fd_bad.
- */
-} client_fd_ctx_t;
-
-struct _client_conf {
- transport_t *transport[CHANNEL_MAX];
- struct list_head saved_fds;
- struct timeval last_sent;
- struct timeval last_received;
- pthread_mutex_t mutex;
- int connecting;
-};
-typedef struct _client_conf client_conf_t;
-
-/* This will be stored in transport_t->xl_private */
-struct client_connection {
- pthread_mutex_t lock;
- uint64_t callid;
- struct saved_frames *saved_frames;
- int32_t frame_timeout;
- int32_t ping_started;
- int32_t ping_timeout;
- int32_t transport_activity;
- gf_timer_t *reconnect;
- char connected;
- uint64_t max_block_size;
- gf_timer_t *timer;
- gf_timer_t *ping_timer;
-};
-
-typedef struct {
- loc_t loc;
- loc_t loc2;
- fd_t *fd;
-} client_local_t;
-
-
-static inline void
-gf_string_to_stat(char *string, struct stat *stbuf)
-{
- uint64_t dev = 0;
- uint64_t ino = 0;
- uint32_t mode = 0;
- uint32_t nlink = 0;
- uint32_t uid = 0;
- uint32_t gid = 0;
- uint64_t rdev = 0;
- uint64_t size = 0;
- uint32_t blksize = 0;
- uint64_t blocks = 0;
- uint32_t atime = 0;
- uint32_t atime_nsec = 0;
- uint32_t mtime = 0;
- uint32_t mtime_nsec = 0;
- uint32_t ctime = 0;
- uint32_t ctime_nsec = 0;
-
- sscanf (string, GF_STAT_PRINT_FMT_STR,
- &dev,
- &ino,
- &mode,
- &nlink,
- &uid,
- &gid,
- &rdev,
- &size,
- &blksize,
- &blocks,
- &atime,
- &atime_nsec,
- &mtime,
- &mtime_nsec,
- &ctime,
- &ctime_nsec);
-
- stbuf->st_dev = dev;
- stbuf->st_ino = ino;
- stbuf->st_mode = mode;
- stbuf->st_nlink = nlink;
- stbuf->st_uid = uid;
- stbuf->st_gid = gid;
- stbuf->st_rdev = rdev;
- stbuf->st_size = size;
- stbuf->st_blksize = blksize;
- stbuf->st_blocks = blocks;
-
- stbuf->st_atime = atime;
- stbuf->st_mtime = mtime;
- stbuf->st_ctime = ctime;
-
- ST_ATIM_NSEC_SET(stbuf, atime_nsec);
- ST_MTIM_NSEC_SET(stbuf, mtime_nsec);
- ST_CTIM_NSEC_SET(stbuf, ctime_nsec);
-
-}
-
-#endif
diff --git a/xlators/protocol/client/src/client-rpc-fops.c b/xlators/protocol/client/src/client-rpc-fops.c
new file mode 100644
index 000000000..6355450c3
--- /dev/null
+++ b/xlators/protocol/client/src/client-rpc-fops.c
@@ -0,0 +1,6203 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "client.h"
+#include "glusterfs3-xdr.h"
+#include "glusterfs3.h"
+#include "compat-errno.h"
+
+int32_t client3_getspec (call_frame_t *frame, xlator_t *this, void *data);
+void client_start_ping (void *data);
+rpc_clnt_prog_t clnt3_3_fop_prog;
+
+
+int
+client_submit_vec_request (xlator_t *this, void *req, call_frame_t *frame,
+ rpc_clnt_prog_t *prog, int procnum,
+ fop_cbk_fn_t cbkfn,
+ struct iovec *payload, int payloadcnt,
+ struct iobref *iobref, xdrproc_t xdrproc)
+{
+ int ret = 0;
+ clnt_conf_t *conf = NULL;
+ struct iovec iov = {0, };
+ struct iobuf *iobuf = NULL;
+ int count = 0;
+ int start_ping = 0;
+ struct iobref *new_iobref = NULL;
+ ssize_t xdr_size = 0;
+ struct rpc_req rpcreq = {0, };
+
+ start_ping = 0;
+
+ conf = this->private;
+
+ if (req && xdrproc) {
+ xdr_size = xdr_sizeof (xdrproc, req);
+ iobuf = iobuf_get2 (this->ctx->iobuf_pool, xdr_size);
+ if (!iobuf) {
+ goto unwind;
+ };
+
+ new_iobref = iobref_new ();
+ if (!new_iobref) {
+ goto unwind;
+ }
+
+ if (iobref != NULL) {
+ ret = iobref_merge (new_iobref, iobref);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "cannot merge iobref passed from caller "
+ "into new_iobref");
+ }
+ }
+
+ ret = iobref_add (new_iobref, iobuf);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "cannot add iobuf into iobref");
+ goto unwind;
+ }
+
+ iov.iov_base = iobuf->ptr;
+ iov.iov_len = iobuf_size (iobuf);
+
+ /* Create the xdr payload */
+ ret = xdr_serialize_generic (iov, req, xdrproc);
+ if (ret == -1) {
+ gf_log_callingfn ("", GF_LOG_WARNING,
+ "XDR function failed");
+ goto unwind;
+ }
+
+ iov.iov_len = ret;
+ count = 1;
+ }
+
+ /* Send the msg */
+ ret = rpc_clnt_submit (conf->rpc, prog, procnum, cbkfn, &iov, count,
+ payload, payloadcnt, new_iobref, frame, NULL, 0,
+ NULL, 0, NULL);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG, "rpc_clnt_submit failed");
+ }
+
+ if (ret == 0) {
+ pthread_mutex_lock (&conf->rpc->conn.lock);
+ {
+ if (!conf->rpc->conn.ping_started) {
+ start_ping = 1;
+ }
+ }
+ pthread_mutex_unlock (&conf->rpc->conn.lock);
+ }
+
+ if (start_ping)
+ client_start_ping ((void *) this);
+
+ if (new_iobref)
+ iobref_unref (new_iobref);
+
+ if (iobuf)
+ iobuf_unref (iobuf);
+
+ return ret;
+
+unwind:
+ rpcreq.rpc_status = -1;
+ cbkfn (&rpcreq, NULL, 0, frame);
+
+ if (new_iobref)
+ iobref_unref (new_iobref);
+
+ if (iobuf)
+ iobuf_unref (iobuf);
+
+ return ret;
+}
+
+/* CBK */
+
+int
+client3_3_symlink_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ gfs3_symlink_rsp rsp = {0,};
+ struct iatt stbuf = {0,};
+ struct iatt preparent = {0,};
+ struct iatt postparent = {0,};
+ int ret = 0;
+ clnt_local_t *local = NULL;
+ inode_t *inode = NULL;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+ this = THIS;
+
+ frame = myframe;
+
+ local = frame->local;
+ inode = local->loc.inode;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_symlink_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ if (-1 != rsp.op_ret) {
+ gf_stat_to_iatt (&rsp.stat, &stbuf);
+
+ gf_stat_to_iatt (&rsp.preparent, &preparent);
+ gf_stat_to_iatt (&rsp.postparent, &postparent);
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, xdata, (rsp.xdata.xdata_val),
+ (rsp.xdata.xdata_len), ret,
+ rsp.op_errno, out);
+
+out:
+ if (rsp.op_ret == -1) {
+ /* no need to print the gfid, because it will be null, since
+ * symlink operation failed.
+ */
+ gf_log (this->name, GF_LOG_WARNING,
+ "remote operation failed: %s. Path: (%s to %s)",
+ strerror (gf_error_to_errno (rsp.op_errno)),
+ local->loc.path, local->loc2.path);
+ }
+
+ CLIENT_STACK_UNWIND (symlink, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), inode, &stbuf,
+ &preparent, &postparent, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+
+int
+client3_3_mknod_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ gfs3_mknod_rsp rsp = {0,};
+ struct iatt stbuf = {0,};
+ struct iatt preparent = {0,};
+ struct iatt postparent = {0,};
+ int ret = 0;
+ clnt_local_t *local = NULL;
+ inode_t *inode = NULL;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+ this = THIS;
+
+ frame = myframe;
+
+ local = frame->local;
+
+ inode = local->loc.inode;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_mknod_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ if (-1 != rsp.op_ret) {
+ gf_stat_to_iatt (&rsp.stat, &stbuf);
+
+ gf_stat_to_iatt (&rsp.preparent, &preparent);
+ gf_stat_to_iatt (&rsp.postparent, &postparent);
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, xdata, (rsp.xdata.xdata_val),
+ (rsp.xdata.xdata_len), ret,
+ rsp.op_errno, out);
+
+out:
+ if (rsp.op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "remote operation failed: %s. Path: %s",
+ strerror (gf_error_to_errno (rsp.op_errno)),
+ local->loc.path);
+ }
+
+ CLIENT_STACK_UNWIND (mknod, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), inode,
+ &stbuf, &preparent, &postparent, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_mkdir_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ gfs3_mkdir_rsp rsp = {0,};
+ struct iatt stbuf = {0,};
+ struct iatt preparent = {0,};
+ struct iatt postparent = {0,};
+ int ret = 0;
+ clnt_local_t *local = NULL;
+ inode_t *inode = NULL;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+ this = THIS;
+
+ frame = myframe;
+
+ local = frame->local;
+ inode = local->loc.inode;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_mkdir_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ if (-1 != rsp.op_ret) {
+ gf_stat_to_iatt (&rsp.stat, &stbuf);
+
+ gf_stat_to_iatt (&rsp.preparent, &preparent);
+ gf_stat_to_iatt (&rsp.postparent, &postparent);
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, xdata, (rsp.xdata.xdata_val),
+ (rsp.xdata.xdata_len), ret,
+ rsp.op_errno, out);
+
+out:
+ if (rsp.op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "remote operation failed: %s. Path: %s",
+ strerror (gf_error_to_errno (rsp.op_errno)),
+ local->loc.path);
+ }
+
+ CLIENT_STACK_UNWIND (mkdir, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), inode,
+ &stbuf, &preparent, &postparent, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+_copy_gfid_from_inode_holders (uuid_t gfid, loc_t *loc, fd_t *fd)
+{
+ int ret = 0;
+
+ if (fd && fd->inode && !uuid_is_null (fd->inode->gfid)) {
+ uuid_copy (gfid, fd->inode->gfid);
+ goto out;
+ }
+
+ if (!loc) {
+ GF_ASSERT (0);
+ ret = -1;
+ goto out;
+ }
+
+ if (loc->inode && !uuid_is_null (loc->inode->gfid)) {
+ uuid_copy (gfid, loc->inode->gfid);
+ } else if (!uuid_is_null (loc->gfid)) {
+ uuid_copy (gfid, loc->gfid);
+ } else {
+ GF_ASSERT (0);
+ ret = -1;
+ }
+out:
+ return ret;
+}
+
+int
+client_add_fd_to_saved_fds (xlator_t *this, fd_t *fd, loc_t *loc, int32_t flags,
+ int64_t remote_fd, int is_dir)
+{
+ int ret = 0;
+ uuid_t gfid = {0};
+ clnt_conf_t *conf = NULL;
+ clnt_fd_ctx_t *fdctx = NULL;
+
+ conf = this->private;
+ ret = _copy_gfid_from_inode_holders (gfid, loc, fd);
+ if (ret) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ fdctx = GF_CALLOC (1, sizeof (*fdctx),
+ gf_client_mt_clnt_fdctx_t);
+ if (!fdctx) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ uuid_copy (fdctx->gfid, gfid);
+ fdctx->is_dir = is_dir;
+ fdctx->remote_fd = remote_fd;
+ fdctx->flags = flags;
+ fdctx->lk_ctx = fd_lk_ctx_ref (fd->lk_ctx);
+ fdctx->lk_heal_state = GF_LK_HEAL_DONE;
+ fdctx->reopen_done = client_default_reopen_done;
+
+ INIT_LIST_HEAD (&fdctx->sfd_pos);
+ INIT_LIST_HEAD (&fdctx->lock_list);
+
+ this_fd_set_ctx (fd, this, loc, fdctx);
+
+ pthread_mutex_lock (&conf->lock);
+ {
+ list_add_tail (&fdctx->sfd_pos, &conf->saved_fds);
+ }
+ pthread_mutex_unlock (&conf->lock);
+out:
+ return ret;
+}
+
+int
+client3_3_open_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ clnt_local_t *local = NULL;
+ clnt_conf_t *conf = NULL;
+ call_frame_t *frame = NULL;
+ fd_t *fd = NULL;
+ int ret = 0;
+ gfs3_open_rsp rsp = {0,};
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+
+ this = THIS;
+
+ frame = myframe;
+ local = frame->local;
+
+ conf = frame->this->private;
+ fd = local->fd;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_open_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ if (-1 != rsp.op_ret) {
+ ret = client_add_fd_to_saved_fds (frame->this, fd, &local->loc,
+ local->flags, rsp.fd, 0);
+ if (ret) {
+ rsp.op_ret = -1;
+ rsp.op_errno = -ret;
+ goto out;
+ }
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, xdata, (rsp.xdata.xdata_val),
+ (rsp.xdata.xdata_len), ret,
+ rsp.op_errno, out);
+
+out:
+ if (rsp.op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "remote operation failed: %s. Path: %s (%s)",
+ strerror (gf_error_to_errno (rsp.op_errno)),
+ local->loc.path, loc_gfid_utoa (&local->loc));
+ }
+
+ CLIENT_STACK_UNWIND (open, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), fd, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+
+int
+client3_3_stat_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gfs3_stat_rsp rsp = {0,};
+ call_frame_t *frame = NULL;
+ struct iatt iatt = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_stat_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ if (-1 != rsp.op_ret) {
+ gf_stat_to_iatt (&rsp.stat, &iatt);
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, xdata, (rsp.xdata.xdata_val),
+ (rsp.xdata.xdata_len), ret,
+ rsp.op_errno, out);
+
+out:
+ if (rsp.op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING, "remote operation failed: %s",
+ strerror (gf_error_to_errno (rsp.op_errno)));
+ }
+
+ CLIENT_STACK_UNWIND (stat, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), &iatt, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_readlink_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gfs3_readlink_rsp rsp = {0,};
+ call_frame_t *frame = NULL;
+ struct iatt iatt = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_readlink_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ if (-1 != rsp.op_ret) {
+ gf_stat_to_iatt (&rsp.buf, &iatt);
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, xdata, (rsp.xdata.xdata_val),
+ (rsp.xdata.xdata_len), ret,
+ rsp.op_errno, out);
+
+out:
+ if (rsp.op_ret == -1) {
+ gf_log (this->name, (gf_error_to_errno(rsp.op_errno) == ENOENT)?
+ GF_LOG_DEBUG:GF_LOG_WARNING, "remote operation failed:"
+ " %s", strerror (gf_error_to_errno (rsp.op_errno)));
+ }
+
+ CLIENT_STACK_UNWIND (readlink, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), rsp.path,
+ &iatt, xdata);
+
+ /* This is allocated by the libc while decoding RPC msg */
+ /* Hence no 'GF_FREE', but just 'free' */
+ free (rsp.path);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_unlink_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ gfs3_unlink_rsp rsp = {0,};
+ struct iatt preparent = {0,};
+ struct iatt postparent = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_unlink_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ if (-1 != rsp.op_ret) {
+ gf_stat_to_iatt (&rsp.preparent, &preparent);
+ gf_stat_to_iatt (&rsp.postparent, &postparent);
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, xdata, (rsp.xdata.xdata_val),
+ (rsp.xdata.xdata_len), ret,
+ rsp.op_errno, out);
+
+out:
+ if (rsp.op_ret == -1) {
+ gf_log (this->name,
+ ((gf_error_to_errno (rsp.op_errno) == ENOENT)
+ ? GF_LOG_DEBUG : GF_LOG_WARNING),
+ "remote operation failed: %s",
+ strerror (gf_error_to_errno (rsp.op_errno)));
+ }
+
+ CLIENT_STACK_UNWIND (unlink, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), &preparent,
+ &postparent, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_rmdir_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gfs3_rmdir_rsp rsp = {0,};
+ call_frame_t *frame = NULL;
+ struct iatt preparent = {0,};
+ struct iatt postparent = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_rmdir_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ if (-1 != rsp.op_ret) {
+ gf_stat_to_iatt (&rsp.preparent, &preparent);
+ gf_stat_to_iatt (&rsp.postparent, &postparent);
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, xdata, (rsp.xdata.xdata_val),
+ (rsp.xdata.xdata_len), ret,
+ rsp.op_errno, out);
+
+out:
+ if (rsp.op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING, "remote operation failed: %s",
+ strerror (gf_error_to_errno (rsp.op_errno)));
+ }
+ CLIENT_STACK_UNWIND (rmdir, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), &preparent,
+ &postparent, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+
+int
+client3_3_truncate_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gfs3_truncate_rsp rsp = {0,};
+ call_frame_t *frame = NULL;
+ struct iatt prestat = {0,};
+ struct iatt poststat = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_truncate_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ if (-1 != rsp.op_ret) {
+ gf_stat_to_iatt (&rsp.prestat, &prestat);
+ gf_stat_to_iatt (&rsp.poststat, &poststat);
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, xdata, (rsp.xdata.xdata_val),
+ (rsp.xdata.xdata_len), ret,
+ rsp.op_errno, out);
+
+out:
+ if (rsp.op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING, "remote operation failed: %s",
+ strerror (gf_error_to_errno (rsp.op_errno)));
+ }
+ CLIENT_STACK_UNWIND (truncate, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), &prestat,
+ &poststat, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+
+int
+client3_3_statfs_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gfs3_statfs_rsp rsp = {0,};
+ call_frame_t *frame = NULL;
+ struct statvfs statfs = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_statfs_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ if (-1 != rsp.op_ret) {
+ gf_statfs_to_statfs (&rsp.statfs, &statfs);
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, xdata, (rsp.xdata.xdata_val),
+ (rsp.xdata.xdata_len), ret,
+ rsp.op_errno, out);
+
+out:
+ if (rsp.op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING, "remote operation failed: %s",
+ strerror (gf_error_to_errno (rsp.op_errno)));
+ }
+ CLIENT_STACK_UNWIND (statfs, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), &statfs, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+
+int
+client3_3_writev_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gfs3_write_rsp rsp = {0,};
+ call_frame_t *frame = NULL;
+ struct iatt prestat = {0,};
+ struct iatt poststat = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+ clnt_local_t *local = NULL;
+
+
+ this = THIS;
+
+ frame = myframe;
+ local = frame->local;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_truncate_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ if (-1 != rsp.op_ret) {
+ gf_stat_to_iatt (&rsp.prestat, &prestat);
+ gf_stat_to_iatt (&rsp.poststat, &poststat);
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, xdata, (rsp.xdata.xdata_val),
+ (rsp.xdata.xdata_len), ret,
+ rsp.op_errno, out);
+
+out:
+ if (rsp.op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING, "remote operation failed: %s",
+ strerror (gf_error_to_errno (rsp.op_errno)));
+ } else if (rsp.op_ret >= 0) {
+ if (local->attempt_reopen)
+ client_attempt_reopen (local->fd, this);
+ }
+ CLIENT_STACK_UNWIND (writev, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), &prestat,
+ &poststat, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_flush_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ clnt_local_t *local = NULL;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+ gf_common_rsp rsp = {0,};
+ int ret = 0;
+
+ frame = myframe;
+ this = THIS;
+ local = frame->local;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_common_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ if (rsp.op_ret >= 0 && !fd_is_anonymous (local->fd)) {
+ /* Delete all saved locks of the owner issuing flush */
+ ret = delete_granted_locks_owner (local->fd, &local->owner);
+ gf_log (this->name, GF_LOG_TRACE,
+ "deleting locks of owner (%s) returned %d",
+ lkowner_utoa (&local->owner), ret);
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, xdata, (rsp.xdata.xdata_val),
+ (rsp.xdata.xdata_len), ret,
+ rsp.op_errno, out);
+
+out:
+ if (rsp.op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "remote operation failed: %s",
+ strerror (gf_error_to_errno (rsp.op_errno)));
+ }
+ CLIENT_STACK_UNWIND (flush, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_fsync_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gfs3_fsync_rsp rsp = {0,};
+ call_frame_t *frame = NULL;
+ struct iatt prestat = {0,};
+ struct iatt poststat = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_truncate_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ if (-1 != rsp.op_ret) {
+ gf_stat_to_iatt (&rsp.prestat, &prestat);
+ gf_stat_to_iatt (&rsp.poststat, &poststat);
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, xdata, (rsp.xdata.xdata_val),
+ (rsp.xdata.xdata_len), ret,
+ rsp.op_errno, out);
+
+out:
+ if (rsp.op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING, "remote operation failed: %s",
+ strerror (gf_error_to_errno (rsp.op_errno)));
+ }
+ CLIENT_STACK_UNWIND (fsync, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), &prestat,
+ &poststat, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_setxattr_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ gf_common_rsp rsp = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+ int op_errno = EINVAL;
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_common_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, xdata, (rsp.xdata.xdata_val),
+ (rsp.xdata.xdata_len), ret,
+ rsp.op_errno, out);
+
+out:
+ op_errno = gf_error_to_errno (rsp.op_errno);
+ if (rsp.op_ret == -1) {
+ gf_log (this->name, ((op_errno == ENOTSUP) ?
+ GF_LOG_DEBUG : GF_LOG_WARNING),
+ "remote operation failed: %s",
+ strerror (op_errno));
+ }
+ CLIENT_STACK_UNWIND (setxattr, frame, rsp.op_ret, op_errno, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_getxattr_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+ int op_errno = EINVAL;
+ gfs3_getxattr_rsp rsp = {0,};
+ int ret = 0;
+ clnt_local_t *local = NULL;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+
+ this = THIS;
+
+ frame = myframe;
+ local = frame->local;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_getxattr_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed");
+ rsp.op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ op_errno = gf_error_to_errno (rsp.op_errno);
+ if (-1 != rsp.op_ret) {
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->this, dict,
+ (rsp.dict.dict_val),
+ (rsp.dict.dict_len), rsp.op_ret,
+ op_errno, out);
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, xdata, (rsp.xdata.xdata_val),
+ (rsp.xdata.xdata_len), ret,
+ op_errno, out);
+
+out:
+ if (rsp.op_ret == -1) {
+ gf_log (this->name, (((op_errno == ENOTSUP) ||
+ (op_errno == ENODATA) ||
+ (op_errno == ENOENT)) ?
+ GF_LOG_DEBUG : GF_LOG_WARNING),
+ "remote operation failed: %s. Path: %s (%s). Key: %s",
+ strerror (op_errno),
+ local->loc.path, loc_gfid_utoa (&local->loc),
+ (local->name) ? local->name : "(null)");
+ }
+
+ CLIENT_STACK_UNWIND (getxattr, frame, rsp.op_ret, op_errno, dict, xdata);
+
+ /* don't use GF_FREE, this memory was allocated by libc */
+ free (rsp.dict.dict_val);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ if (dict)
+ dict_unref (dict);
+
+ return 0;
+}
+
+int
+client3_3_fgetxattr_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+ gfs3_fgetxattr_rsp rsp = {0,};
+ int ret = 0;
+ int op_errno = EINVAL;
+ clnt_local_t *local = NULL;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+
+ this = THIS;
+
+ frame = myframe;
+ local = frame->local;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_fgetxattr_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed");
+ rsp.op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ op_errno = gf_error_to_errno (rsp.op_errno);
+ if (-1 != rsp.op_ret) {
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->this, dict,
+ (rsp.dict.dict_val),
+ (rsp.dict.dict_len), rsp.op_ret,
+ op_errno, out);
+ }
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, xdata, (rsp.xdata.xdata_val),
+ (rsp.xdata.xdata_len), ret,
+ op_errno, out);
+
+out:
+ if (rsp.op_ret == -1) {
+ gf_log (this->name, ((op_errno == ENOTSUP) ?
+ GF_LOG_DEBUG : GF_LOG_WARNING),
+ "remote operation failed: %s",
+ strerror (op_errno));
+ }
+
+ CLIENT_STACK_UNWIND (fgetxattr, frame, rsp.op_ret, op_errno, dict, xdata);
+
+ free (rsp.dict.dict_val);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ if (dict)
+ dict_unref (dict);
+
+ return 0;
+}
+
+int
+client3_3_removexattr_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ gf_common_rsp rsp = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_common_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, xdata, (rsp.xdata.xdata_val),
+ (rsp.xdata.xdata_len), ret,
+ rsp.op_errno, out);
+
+out:
+ if (rsp.op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING, "remote operation failed: %s",
+ strerror (gf_error_to_errno (rsp.op_errno)));
+ }
+
+ CLIENT_STACK_UNWIND (removexattr, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_fremovexattr_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ gf_common_rsp rsp = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_common_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, xdata, (rsp.xdata.xdata_val),
+ (rsp.xdata.xdata_len), ret,
+ rsp.op_errno, out);
+
+out:
+ if (rsp.op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING, "remote operation failed: %s",
+ strerror (gf_error_to_errno (rsp.op_errno)));
+ }
+ CLIENT_STACK_UNWIND (fremovexattr, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_fsyncdir_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ gf_common_rsp rsp = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_common_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, xdata, (rsp.xdata.xdata_val),
+ (rsp.xdata.xdata_len), ret,
+ rsp.op_errno, out);
+
+out:
+ if (rsp.op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING, "remote operation failed: %s",
+ strerror (gf_error_to_errno (rsp.op_errno)));
+ }
+ CLIENT_STACK_UNWIND (fsyncdir, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_access_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ gf_common_rsp rsp = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_common_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, xdata, (rsp.xdata.xdata_val),
+ (rsp.xdata.xdata_len), ret,
+ rsp.op_errno, out);
+
+out:
+ if (rsp.op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING, "remote operation failed: %s",
+ strerror (gf_error_to_errno (rsp.op_errno)));
+ }
+ CLIENT_STACK_UNWIND (access, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+
+int
+client3_3_ftruncate_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gfs3_ftruncate_rsp rsp = {0,};
+ call_frame_t *frame = NULL;
+ struct iatt prestat = {0,};
+ struct iatt poststat = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_ftruncate_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ if (-1 != rsp.op_ret) {
+ gf_stat_to_iatt (&rsp.prestat, &prestat);
+ gf_stat_to_iatt (&rsp.poststat, &poststat);
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, xdata, (rsp.xdata.xdata_val),
+ (rsp.xdata.xdata_len), ret,
+ rsp.op_errno, out);
+
+out:
+ if (rsp.op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING, "remote operation failed: %s",
+ strerror (gf_error_to_errno (rsp.op_errno)));
+ }
+ CLIENT_STACK_UNWIND (ftruncate, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), &prestat,
+ &poststat, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_fstat_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gfs3_fstat_rsp rsp = {0,};
+ call_frame_t *frame = NULL;
+ struct iatt stat = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_fstat_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ if (-1 != rsp.op_ret) {
+ gf_stat_to_iatt (&rsp.stat, &stat);
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, xdata, (rsp.xdata.xdata_val),
+ (rsp.xdata.xdata_len), ret,
+ rsp.op_errno, out);
+
+out:
+ if (rsp.op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING, "remote operation failed: %s",
+ strerror (gf_error_to_errno (rsp.op_errno)));
+ }
+ CLIENT_STACK_UNWIND (fstat, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), &stat, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+
+int
+client3_3_inodelk_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ gf_common_rsp rsp = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_common_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, xdata, (rsp.xdata.xdata_val),
+ (rsp.xdata.xdata_len), ret,
+ rsp.op_errno, out);
+
+out:
+ if ((rsp.op_ret == -1) &&
+ (EAGAIN != gf_error_to_errno (rsp.op_errno))) {
+ gf_log (this->name, GF_LOG_WARNING, "remote operation failed: %s",
+ strerror (gf_error_to_errno (rsp.op_errno)));
+ }
+ CLIENT_STACK_UNWIND (inodelk, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_finodelk_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ gf_common_rsp rsp = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+ clnt_local_t *local = NULL;
+
+
+ frame = myframe;
+ this = frame->this;
+ local = frame->local;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_common_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, xdata, (rsp.xdata.xdata_val),
+ (rsp.xdata.xdata_len), ret,
+ rsp.op_errno, out);
+
+out:
+ if ((rsp.op_ret == -1) &&
+ (EAGAIN != gf_error_to_errno (rsp.op_errno))) {
+ gf_log (this->name, GF_LOG_WARNING, "remote operation failed: %s",
+ strerror (gf_error_to_errno (rsp.op_errno)));
+ } else if (rsp.op_ret == 0) {
+ if (local->attempt_reopen)
+ client_attempt_reopen (local->fd, this);
+ }
+ CLIENT_STACK_UNWIND (finodelk, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_entrylk_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ gf_common_rsp rsp = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_common_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, xdata, (rsp.xdata.xdata_val),
+ (rsp.xdata.xdata_len), ret,
+ rsp.op_errno, out);
+
+out:
+ if ((rsp.op_ret == -1) &&
+ (EAGAIN != gf_error_to_errno (rsp.op_errno))) {
+ gf_log (this->name, GF_LOG_WARNING, "remote operation failed: %s",
+ strerror (gf_error_to_errno (rsp.op_errno)));
+ }
+
+ CLIENT_STACK_UNWIND (entrylk, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_fentrylk_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ gf_common_rsp rsp = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_common_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, xdata, (rsp.xdata.xdata_val),
+ (rsp.xdata.xdata_len), ret,
+ rsp.op_errno, out);
+
+out:
+ if ((rsp.op_ret == -1) &&
+ (EAGAIN != gf_error_to_errno (rsp.op_errno))) {
+ gf_log (this->name, GF_LOG_WARNING, "remote operation failed: %s",
+ strerror (gf_error_to_errno (rsp.op_errno)));
+ }
+
+ CLIENT_STACK_UNWIND (fentrylk, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_xattrop_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+ gfs3_xattrop_rsp rsp = {0,};
+ int ret = 0;
+ int op_errno = EINVAL;
+ clnt_local_t *local = NULL;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+
+ this = THIS;
+
+ frame = myframe;
+ local = frame->local;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_xattrop_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed");
+ rsp.op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ op_errno = rsp.op_errno;
+ if (-1 != rsp.op_ret) {
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->this, dict,
+ (rsp.dict.dict_val),
+ (rsp.dict.dict_len), rsp.op_ret,
+ op_errno, out);
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, xdata, (rsp.xdata.xdata_val),
+ (rsp.xdata.xdata_len), ret,
+ op_errno, out);
+
+out:
+ if (rsp.op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "remote operation failed: %s. Path: %s (%s)",
+ strerror (gf_error_to_errno (rsp.op_errno)),
+ local->loc.path, loc_gfid_utoa (&local->loc));
+ }
+
+ CLIENT_STACK_UNWIND (xattrop, frame, rsp.op_ret,
+ gf_error_to_errno (op_errno), dict, xdata);
+
+ free (rsp.dict.dict_val);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ if (dict)
+ dict_unref (dict);
+
+ return 0;
+}
+
+int
+client3_3_fxattrop_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+ dict_t *xdata = NULL;
+ gfs3_fxattrop_rsp rsp = {0,};
+ int ret = 0;
+ int op_errno = 0;
+ clnt_local_t *local = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ frame = myframe;
+ local = frame->local;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_fxattrop_rsp);
+ if (ret < 0) {
+ rsp.op_ret = -1;
+ op_errno = EINVAL;
+ gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed");
+ goto out;
+ }
+ op_errno = rsp.op_errno;
+ if (-1 != rsp.op_ret) {
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->this, dict,
+ (rsp.dict.dict_val),
+ (rsp.dict.dict_len), rsp.op_ret,
+ op_errno, out);
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->this, xdata,
+ (rsp.xdata.xdata_val),
+ (rsp.xdata.xdata_len), rsp.op_ret,
+ op_errno, out);
+out:
+
+ if (rsp.op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "remote operation failed: %s",
+ strerror (gf_error_to_errno (op_errno)));
+ } else if (rsp.op_ret == 0) {
+ if (local->attempt_reopen)
+ client_attempt_reopen (local->fd, this);
+ }
+ CLIENT_STACK_UNWIND (fxattrop, frame, rsp.op_ret,
+ gf_error_to_errno (op_errno), dict, xdata);
+
+ free (rsp.dict.dict_val);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ if (dict)
+ dict_unref (dict);
+
+ return 0;
+}
+
+int
+client3_3_fsetxattr_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ gf_common_rsp rsp = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+ int op_errno = EINVAL;
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_common_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, xdata, (rsp.xdata.xdata_val),
+ (rsp.xdata.xdata_len), ret,
+ rsp.op_errno, out);
+
+out:
+ op_errno = gf_error_to_errno (rsp.op_errno);
+ if (rsp.op_ret == -1) {
+ gf_log (this->name, ((op_errno == ENOTSUP) ?
+ GF_LOG_DEBUG : GF_LOG_WARNING),
+ "remote operation failed: %s",
+ strerror (op_errno));
+ }
+
+ CLIENT_STACK_UNWIND (fsetxattr, frame, rsp.op_ret, op_errno, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_fsetattr_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ gfs3_fsetattr_rsp rsp = {0,};
+ struct iatt prestat = {0,};
+ struct iatt poststat = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_fsetattr_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ if (-1 != rsp.op_ret) {
+ gf_stat_to_iatt (&rsp.statpre, &prestat);
+ gf_stat_to_iatt (&rsp.statpost, &poststat);
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, xdata, (rsp.xdata.xdata_val),
+ (rsp.xdata.xdata_len), ret,
+ rsp.op_errno, out);
+
+out:
+ if (rsp.op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING, "remote operation failed: %s",
+ strerror (gf_error_to_errno (rsp.op_errno)));
+ }
+ CLIENT_STACK_UNWIND (fsetattr, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), &prestat,
+ &poststat, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_fallocate_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ gfs3_fallocate_rsp rsp = {0,};
+ struct iatt prestat = {0,};
+ struct iatt poststat = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_fallocate_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ if (-1 != rsp.op_ret) {
+ gf_stat_to_iatt (&rsp.statpre, &prestat);
+ gf_stat_to_iatt (&rsp.statpost, &poststat);
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, xdata, (rsp.xdata.xdata_val),
+ (rsp.xdata.xdata_len), ret,
+ rsp.op_errno, out);
+
+out:
+ if (rsp.op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING, "remote operation failed: %s",
+ strerror (gf_error_to_errno (rsp.op_errno)));
+ }
+ CLIENT_STACK_UNWIND (fallocate, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), &prestat,
+ &poststat, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_discard_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ gfs3_discard_rsp rsp = {0,};
+ struct iatt prestat = {0,};
+ struct iatt poststat = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t) xdr_gfs3_discard_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ if (-1 != rsp.op_ret) {
+ gf_stat_to_iatt (&rsp.statpre, &prestat);
+ gf_stat_to_iatt (&rsp.statpost, &poststat);
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, xdata, (rsp.xdata.xdata_val),
+ (rsp.xdata.xdata_len), ret,
+ rsp.op_errno, out);
+
+out:
+ if (rsp.op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING, "remote operation failed: %s",
+ strerror (gf_error_to_errno (rsp.op_errno)));
+ }
+ CLIENT_STACK_UNWIND (discard, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), &prestat,
+ &poststat, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_zerofill_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ gfs3_zerofill_rsp rsp = {0,};
+ struct iatt prestat = {0,};
+ struct iatt poststat = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t) xdr_gfs3_zerofill_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ if (-1 != rsp.op_ret) {
+ gf_stat_to_iatt (&rsp.statpre, &prestat);
+ gf_stat_to_iatt (&rsp.statpost, &poststat);
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, xdata, (rsp.xdata.xdata_val),
+ (rsp.xdata.xdata_len), ret,
+ rsp.op_errno, out);
+
+out:
+ if (rsp.op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "remote operation failed: %s",
+ strerror (gf_error_to_errno (rsp.op_errno)));
+ }
+ CLIENT_STACK_UNWIND (zerofill, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), &prestat,
+ &poststat, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_setattr_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ gfs3_setattr_rsp rsp = {0,};
+ struct iatt prestat = {0,};
+ struct iatt poststat = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_setattr_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ if (-1 != rsp.op_ret) {
+ gf_stat_to_iatt (&rsp.statpre, &prestat);
+ gf_stat_to_iatt (&rsp.statpost, &poststat);
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, xdata, (rsp.xdata.xdata_val),
+ (rsp.xdata.xdata_len), ret,
+ rsp.op_errno, out);
+
+out:
+ if (rsp.op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING, "remote operation failed: %s",
+ strerror (gf_error_to_errno (rsp.op_errno)));
+ }
+ CLIENT_STACK_UNWIND (setattr, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), &prestat,
+ &poststat, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_create_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ fd_t *fd = NULL;
+ inode_t *inode = NULL;
+ struct iatt stbuf = {0, };
+ struct iatt preparent = {0, };
+ struct iatt postparent = {0, };
+ int32_t ret = -1;
+ clnt_local_t *local = NULL;
+ gfs3_create_rsp rsp = {0,};
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+ this = THIS;
+
+ frame = myframe;
+ local = frame->local;
+ fd = local->fd;
+ inode = local->loc.inode;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_create_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ if (-1 != rsp.op_ret) {
+ gf_stat_to_iatt (&rsp.stat, &stbuf);
+
+ gf_stat_to_iatt (&rsp.preparent, &preparent);
+ gf_stat_to_iatt (&rsp.postparent, &postparent);
+ uuid_copy (local->loc.gfid, stbuf.ia_gfid);
+ ret = client_add_fd_to_saved_fds (frame->this, fd, &local->loc,
+ local->flags, rsp.fd, 0);
+ if (ret) {
+ rsp.op_ret = -1;
+ rsp.op_errno = -ret;
+ goto out;
+ }
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, xdata, (rsp.xdata.xdata_val),
+ (rsp.xdata.xdata_len), ret,
+ rsp.op_errno, out);
+
+out:
+ if (rsp.op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "remote operation failed: %s. Path: %s",
+ strerror (gf_error_to_errno (rsp.op_errno)),
+ local->loc.path);
+ }
+
+ CLIENT_STACK_UNWIND (create, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), fd, inode,
+ &stbuf, &preparent, &postparent, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+
+int
+client3_3_rchecksum_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ gfs3_rchecksum_rsp rsp = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_rchecksum_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, xdata, (rsp.xdata.xdata_val),
+ (rsp.xdata.xdata_len), ret,
+ rsp.op_errno, out);
+
+out:
+ if (rsp.op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING, "remote operation failed: %s",
+ strerror (gf_error_to_errno (rsp.op_errno)));
+ }
+ CLIENT_STACK_UNWIND (rchecksum, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno),
+ rsp.weak_checksum,
+ (uint8_t *)rsp.strong_checksum.strong_checksum_val,
+ xdata);
+
+ if (rsp.strong_checksum.strong_checksum_val) {
+ /* This is allocated by the libc while decoding RPC msg */
+ /* Hence no 'GF_FREE', but just 'free' */
+ free (rsp.strong_checksum.strong_checksum_val);
+ }
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_lk_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ clnt_local_t *local = NULL;
+ struct gf_flock lock = {0,};
+ gfs3_lk_rsp rsp = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+ this = THIS;
+
+ frame = myframe;
+ local = frame->local;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_lk_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ if (rsp.op_ret >= 0) {
+ gf_proto_flock_to_flock (&rsp.flock, &lock);
+ }
+
+ /* Save the lock to the client lock cache to be able
+ to recover in the case of server reboot.*/
+ /*
+ temporarily
+ if (local->cmd == F_SETLK || local->cmd == F_SETLKW) {
+ ret = client_add_lock_for_recovery (local->fd, &lock,
+ local->owner, local->cmd);
+ if (ret < 0) {
+ rsp.op_ret = -1;
+ rsp.op_errno = -ret;
+ }
+ }
+ */
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, xdata, (rsp.xdata.xdata_val),
+ (rsp.xdata.xdata_len), ret,
+ rsp.op_errno, out);
+
+out:
+ if ((rsp.op_ret == -1) &&
+ (EAGAIN != gf_error_to_errno (rsp.op_errno))) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "remote operation failed: %s",
+ strerror (gf_error_to_errno (rsp.op_errno)));
+ }
+
+ CLIENT_STACK_UNWIND (lk, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), &lock, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ free (rsp.flock.lk_owner.lk_owner_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_readdir_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ gfs3_readdir_rsp rsp = {0,};
+ int32_t ret = 0;
+ clnt_local_t *local = NULL;
+ gf_dirent_t entries;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+ this = THIS;
+
+ frame = myframe;
+ local = frame->local;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_readdir_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ INIT_LIST_HEAD (&entries.list);
+ if (rsp.op_ret > 0) {
+ unserialize_rsp_dirent (&rsp, &entries);
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->this, xdata,
+ (rsp.xdata.xdata_val),
+ (rsp.xdata.xdata_len), rsp.op_ret,
+ rsp.op_errno, out);
+
+out:
+ if (rsp.op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "remote operation failed: %s remote_fd = %d",
+ strerror (gf_error_to_errno (rsp.op_errno)),
+ local->cmd);
+ }
+ CLIENT_STACK_UNWIND (readdir, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), &entries, xdata);
+
+ if (rsp.op_ret != -1) {
+ gf_dirent_free (&entries);
+ }
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ clnt_readdir_rsp_cleanup (&rsp);
+
+ return 0;
+}
+
+
+int
+client3_3_readdirp_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ gfs3_readdirp_rsp rsp = {0,};
+ int32_t ret = 0;
+ clnt_local_t *local = NULL;
+ gf_dirent_t entries;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+ this = THIS;
+
+ frame = myframe;
+ local = frame->local;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_readdirp_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ INIT_LIST_HEAD (&entries.list);
+ if (rsp.op_ret > 0) {
+ unserialize_rsp_direntp (this, local->fd, &rsp, &entries);
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, xdata, (rsp.xdata.xdata_val),
+ (rsp.xdata.xdata_len), ret,
+ rsp.op_errno, out);
+
+out:
+ if (rsp.op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "remote operation failed: %s",
+ strerror (gf_error_to_errno (rsp.op_errno)));
+ }
+ CLIENT_STACK_UNWIND (readdirp, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), &entries, xdata);
+
+ if (rsp.op_ret != -1) {
+ gf_dirent_free (&entries);
+ }
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ clnt_readdirp_rsp_cleanup (&rsp);
+
+ return 0;
+}
+
+
+int
+client3_3_rename_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ gfs3_rename_rsp rsp = {0,};
+ struct iatt stbuf = {0,};
+ struct iatt preoldparent = {0,};
+ struct iatt postoldparent = {0,};
+ struct iatt prenewparent = {0,};
+ struct iatt postnewparent = {0,};
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+ this = THIS;
+
+ frame = myframe;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_rename_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ if (-1 != rsp.op_ret) {
+ gf_stat_to_iatt (&rsp.stat, &stbuf);
+
+ gf_stat_to_iatt (&rsp.preoldparent, &preoldparent);
+ gf_stat_to_iatt (&rsp.postoldparent, &postoldparent);
+
+ gf_stat_to_iatt (&rsp.prenewparent, &prenewparent);
+ gf_stat_to_iatt (&rsp.postnewparent, &postnewparent);
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, xdata, (rsp.xdata.xdata_val),
+ (rsp.xdata.xdata_len), ret,
+ rsp.op_errno, out);
+
+out:
+ if (rsp.op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING, "remote operation failed: %s",
+ strerror (gf_error_to_errno (rsp.op_errno)));
+ }
+ CLIENT_STACK_UNWIND (rename, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno),
+ &stbuf, &preoldparent, &postoldparent,
+ &prenewparent, &postnewparent, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_link_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ gfs3_link_rsp rsp = {0,};
+ struct iatt stbuf = {0,};
+ struct iatt preparent = {0,};
+ struct iatt postparent = {0,};
+ int ret = 0;
+ clnt_local_t *local = NULL;
+ inode_t *inode = NULL;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+
+ this = THIS;
+
+ frame = myframe;
+
+ local = frame->local;
+ inode = local->loc.inode;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_link_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ if (-1 != rsp.op_ret) {
+ gf_stat_to_iatt (&rsp.stat, &stbuf);
+
+ gf_stat_to_iatt (&rsp.preparent, &preparent);
+ gf_stat_to_iatt (&rsp.postparent, &postparent);
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, xdata, (rsp.xdata.xdata_val),
+ (rsp.xdata.xdata_len), ret,
+ rsp.op_errno, out);
+
+out:
+ if (rsp.op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "remote operation failed: %s (%s -> %s)",
+ strerror (gf_error_to_errno (rsp.op_errno)),
+ local->loc.path, local->loc2.path);
+ }
+
+ CLIENT_STACK_UNWIND (link, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), inode,
+ &stbuf, &preparent, &postparent, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+
+int
+client3_3_opendir_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ clnt_local_t *local = NULL;
+ clnt_conf_t *conf = NULL;
+ call_frame_t *frame = NULL;
+ fd_t *fd = NULL;
+ int ret = 0;
+ gfs3_opendir_rsp rsp = {0,};
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+ this = THIS;
+
+ frame = myframe;
+ local = frame->local;
+
+ conf = frame->this->private;
+ fd = local->fd;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_opendir_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ if (-1 != rsp.op_ret) {
+ ret = client_add_fd_to_saved_fds (frame->this, fd, &local->loc,
+ 0, rsp.fd, 1);
+ if (ret) {
+ rsp.op_ret = -1;
+ rsp.op_errno = -ret;
+ goto out;
+ }
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, xdata, (rsp.xdata.xdata_val),
+ (rsp.xdata.xdata_len), ret,
+ rsp.op_errno, out);
+
+out:
+ if (rsp.op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "remote operation failed: %s. Path: %s (%s)",
+ strerror (gf_error_to_errno (rsp.op_errno)),
+ local->loc.path, loc_gfid_utoa (&local->loc));
+ }
+ CLIENT_STACK_UNWIND (opendir, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), fd, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+
+int
+client3_3_lookup_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ clnt_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ int ret = 0;
+ gfs3_lookup_rsp rsp = {0,};
+ struct iatt stbuf = {0,};
+ struct iatt postparent = {0,};
+ int op_errno = EINVAL;
+ dict_t *xdata = NULL;
+ inode_t *inode = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ frame = myframe;
+ local = frame->local;
+ inode = local->loc.inode;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_lookup_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed");
+ rsp.op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ op_errno = gf_error_to_errno (rsp.op_errno);
+ gf_stat_to_iatt (&rsp.postparent, &postparent);
+
+ if (rsp.op_ret == -1)
+ goto out;
+
+ rsp.op_ret = -1;
+ gf_stat_to_iatt (&rsp.stat, &stbuf);
+
+ GF_PROTOCOL_DICT_UNSERIALIZE (frame->this, xdata, (rsp.xdata.xdata_val),
+ (rsp.xdata.xdata_len), rsp.op_ret,
+ op_errno, out);
+
+ if ((!uuid_is_null (inode->gfid))
+ && (uuid_compare (stbuf.ia_gfid, inode->gfid) != 0)) {
+ gf_log (frame->this->name, GF_LOG_DEBUG,
+ "gfid changed for %s", local->loc.path);
+ rsp.op_ret = -1;
+ op_errno = ESTALE;
+ goto out;
+ }
+
+ rsp.op_ret = 0;
+
+out:
+ rsp.op_errno = op_errno;
+ if (rsp.op_ret == -1) {
+ /* any error other than ENOENT */
+ if (rsp.op_errno != ENOENT)
+ gf_log (this->name, GF_LOG_WARNING,
+ "remote operation failed: %s. Path: %s (%s)",
+ strerror (rsp.op_errno), local->loc.path,
+ loc_gfid_utoa (&local->loc));
+ else
+ gf_log (this->name, GF_LOG_TRACE, "not found on remote node");
+
+ }
+
+ CLIENT_STACK_UNWIND (lookup, frame, rsp.op_ret, rsp.op_errno, inode,
+ &stbuf, xdata, &postparent);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+client3_3_readv_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+ struct iobref *iobref = NULL;
+ struct iovec vector[MAX_IOVEC] = {{0}, };
+ struct iatt stat = {0,};
+ gfs3_read_rsp rsp = {0,};
+ int ret = 0, rspcount = 0;
+ clnt_local_t *local = NULL;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+
+ this = THIS;
+
+ memset (vector, 0, sizeof (vector));
+
+ frame = myframe;
+ local = frame->local;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_read_rsp);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ if (rsp.op_ret != -1) {
+ iobref = req->rsp_iobref;
+ gf_stat_to_iatt (&rsp.stat, &stat);
+
+ vector[0].iov_len = rsp.op_ret;
+ if (rsp.op_ret > 0)
+ vector[0].iov_base = req->rsp[1].iov_base;
+ rspcount = 1;
+ }
+ GF_PROTOCOL_DICT_UNSERIALIZE (this, xdata, (rsp.xdata.xdata_val),
+ (rsp.xdata.xdata_len), ret,
+ rsp.op_errno, out);
+
+#ifdef GF_TESTING_IO_XDATA
+ dict_dump (xdata);
+#endif
+
+out:
+ if (rsp.op_ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "remote operation failed: %s",
+ strerror (gf_error_to_errno (rsp.op_errno)));
+ } else if (rsp.op_ret >= 0) {
+ if (local->attempt_reopen)
+ client_attempt_reopen (local->fd, this);
+ }
+ CLIENT_STACK_UNWIND (readv, frame, rsp.op_ret,
+ gf_error_to_errno (rsp.op_errno), vector, rspcount,
+ &stat, iobref, xdata);
+
+ free (rsp.xdata.xdata_val);
+
+ if (xdata)
+ dict_unref (xdata);
+
+ return 0;
+}
+
+int
+client3_3_release_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+
+ frame = myframe;
+ STACK_DESTROY (frame->root);
+ return 0;
+}
+int
+client3_3_releasedir_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ call_frame_t *frame = NULL;
+
+ frame = myframe;
+ STACK_DESTROY (frame->root);
+ return 0;
+}
+
+int
+client_fdctx_destroy (xlator_t *this, clnt_fd_ctx_t *fdctx)
+{
+ clnt_conf_t *conf = NULL;
+ call_frame_t *fr = NULL;
+ int32_t ret = -1;
+ char parent_down = 0;
+ fd_lk_ctx_t *lk_ctx = NULL;
+
+ GF_VALIDATE_OR_GOTO ("client", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, fdctx, out);
+
+ conf = (clnt_conf_t *) this->private;
+
+ if (fdctx->remote_fd == -1) {
+ gf_log (this->name, GF_LOG_DEBUG, "not a valid fd");
+ goto out;
+ }
+
+ pthread_mutex_lock (&conf->lock);
+ {
+ parent_down = conf->parent_down;
+ lk_ctx = fdctx->lk_ctx;
+ fdctx->lk_ctx = NULL;
+ }
+ pthread_mutex_unlock (&conf->lock);
+
+ if (lk_ctx)
+ fd_lk_ctx_unref (lk_ctx);
+
+ if (!parent_down)
+ rpc_clnt_ref (conf->rpc);
+ else
+ goto out;
+
+ fr = create_frame (this, this->ctx->pool);
+ if (fr == NULL) {
+ goto out;
+ }
+
+ ret = 0;
+
+ if (fdctx->is_dir) {
+ gfs3_releasedir_req req = {{0,},};
+ req.fd = fdctx->remote_fd;
+ gf_log (this->name, GF_LOG_TRACE, "sending releasedir on fd");
+ client_submit_request (this, &req, fr, &clnt3_3_fop_prog,
+ GFS3_OP_RELEASEDIR,
+ client3_3_releasedir_cbk,
+ NULL, NULL, 0, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_releasedir_req);
+ } else {
+ gfs3_release_req req = {{0,},};
+ req.fd = fdctx->remote_fd;
+ gf_log (this->name, GF_LOG_TRACE, "sending release on fd");
+ client_submit_request (this, &req, fr, &clnt3_3_fop_prog,
+ GFS3_OP_RELEASE,
+ client3_3_release_cbk, NULL,
+ NULL, 0, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_release_req);
+ }
+
+ rpc_clnt_unref (conf->rpc);
+out:
+ if (fdctx) {
+ fdctx->remote_fd = -1;
+ GF_FREE (fdctx);
+ }
+
+ if (ret && fr)
+ STACK_DESTROY (fr->root);
+
+ return ret;
+}
+
+int32_t
+client3_3_releasedir (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_conf_t *conf = NULL;
+ clnt_fd_ctx_t *fdctx = NULL;
+ clnt_args_t *args = NULL;
+ int64_t remote_fd = -1;
+
+ if (!this || !data)
+ goto out;
+
+ args = data;
+ conf = this->private;
+
+ pthread_mutex_lock (&conf->lock);
+ {
+ fdctx = this_fd_del_ctx (args->fd, this);
+ if (fdctx != NULL) {
+ remote_fd = fdctx->remote_fd;
+
+ /* fdctx->remote_fd == -1 indicates a reopen attempt
+ in progress. Just mark ->released = 1 and let
+ reopen_cbk handle releasing
+ */
+
+ if (remote_fd != -1)
+ list_del_init (&fdctx->sfd_pos);
+
+ fdctx->released = 1;
+ }
+ }
+ pthread_mutex_unlock (&conf->lock);
+
+ if (remote_fd != -1)
+ client_fdctx_destroy (this, fdctx);
+out:
+
+ return 0;
+}
+
+int32_t
+client3_3_release (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ int64_t remote_fd = -1;
+ clnt_conf_t *conf = NULL;
+ clnt_fd_ctx_t *fdctx = NULL;
+ clnt_args_t *args = NULL;
+ lk_heal_state_t lk_heal_state = GF_LK_HEAL_DONE;
+
+ if (!this || !data)
+ goto out;
+
+ args = data;
+ conf = this->private;
+
+ pthread_mutex_lock (&conf->lock);
+ {
+ fdctx = this_fd_del_ctx (args->fd, this);
+ if (fdctx != NULL) {
+ remote_fd = fdctx->remote_fd;
+ lk_heal_state = fdctx->lk_heal_state;
+
+ /* fdctx->remote_fd == -1 indicates a reopen attempt
+ in progress. Just mark ->released = 1 and let
+ reopen_cbk handle releasing
+ */
+
+ if (remote_fd != -1 &&
+ lk_heal_state == GF_LK_HEAL_DONE)
+ list_del_init (&fdctx->sfd_pos);
+
+ fdctx->released = 1;
+ }
+ }
+ pthread_mutex_unlock (&conf->lock);
+
+ if (remote_fd != -1 && lk_heal_state == GF_LK_HEAL_DONE)
+ client_fdctx_destroy (this, fdctx);
+out:
+ return 0;
+}
+
+
+int32_t
+client3_3_lookup (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_conf_t *conf = NULL;
+ clnt_local_t *local = NULL;
+ clnt_args_t *args = NULL;
+ gfs3_lookup_req req = {{0,},};
+ int ret = 0;
+ int op_errno = ESTALE;
+ data_t *content = NULL;
+ struct iovec vector[MAX_IOVEC] = {{0}, };
+ int count = 0;
+ struct iobref *rsp_iobref = NULL;
+ struct iobuf *rsp_iobuf = NULL;
+ struct iovec *rsphdr = NULL;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ memset (vector, 0, sizeof (vector));
+
+ conf = this->private;
+ args = data;
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ if (!(args->loc && args->loc->inode))
+ goto unwind;
+
+ loc_copy (&local->loc, args->loc);
+ loc_path (&local->loc, NULL);
+ frame->local = local;
+
+ if (args->loc->parent) {
+ if (!uuid_is_null (args->loc->parent->gfid))
+ memcpy (req.pargfid, args->loc->parent->gfid, 16);
+ else
+ memcpy (req.pargfid, args->loc->pargfid, 16);
+ } else {
+ if (!uuid_is_null (args->loc->inode->gfid))
+ memcpy (req.gfid, args->loc->inode->gfid, 16);
+ else
+ memcpy (req.gfid, args->loc->gfid, 16);
+ }
+
+ if (args->xdata) {
+ content = dict_get (args->xdata, GF_CONTENT_KEY);
+ if (content != NULL) {
+ rsp_iobref = iobref_new ();
+ if (rsp_iobref == NULL) {
+ goto unwind;
+ }
+
+ /* TODO: what is the size we should send ? */
+ /* This change very much depends on quick-read
+ changes */
+ rsp_iobuf = iobuf_get (this->ctx->iobuf_pool);
+ if (rsp_iobuf == NULL) {
+ goto unwind;
+ }
+
+ iobref_add (rsp_iobref, rsp_iobuf);
+ iobuf_unref (rsp_iobuf);
+ rsphdr = &vector[0];
+ rsphdr->iov_base = iobuf_ptr (rsp_iobuf);
+ rsphdr->iov_len = iobuf_pagesize (rsp_iobuf);
+ count = 1;
+ local->iobref = rsp_iobref;
+ rsp_iobuf = NULL;
+ rsp_iobref = NULL;
+ }
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, args->xdata,
+ (&req.xdata.xdata_val),
+ req.xdata.xdata_len,
+ op_errno, unwind);
+ }
+
+ if (args->loc->name)
+ req.bname = (char *)args->loc->name;
+ else
+ req.bname = "";
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_LOOKUP, client3_3_lookup_cbk,
+ NULL, rsphdr, count,
+ NULL, 0, local->iobref,
+ (xdrproc_t)xdr_gfs3_lookup_req);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ if (rsp_iobref)
+ iobref_unref (rsp_iobref);
+
+ if (rsp_iobuf)
+ iobuf_unref (rsp_iobuf);
+
+ return 0;
+
+unwind:
+ CLIENT_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL);
+
+ GF_FREE (req.xdata.xdata_val);
+
+ if (rsp_iobref)
+ iobref_unref (rsp_iobref);
+
+ if (rsp_iobuf)
+ iobuf_unref (rsp_iobuf);
+
+ return 0;
+}
+
+int32_t
+client3_3_stat (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_conf_t *conf = NULL;
+ clnt_args_t *args = NULL;
+ gfs3_stat_req req = {{0,},};
+ int ret = 0;
+ int op_errno = ESTALE;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ if (!(args->loc && args->loc->inode))
+ goto unwind;
+
+ if (!uuid_is_null (args->loc->inode->gfid))
+ memcpy (req.gfid, args->loc->inode->gfid, 16);
+ else
+ memcpy (req.gfid, args->loc->gfid, 16);
+
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name,
+ !uuid_is_null (*((uuid_t*)req.gfid)),
+ unwind, op_errno, EINVAL);
+ conf = this->private;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, args->xdata, (&req.xdata.xdata_val),
+ req.xdata.xdata_len, op_errno, unwind);
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_STAT, client3_3_stat_cbk, NULL,
+ NULL, 0, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_stat_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL);
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int32_t
+client3_3_truncate (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_conf_t *conf = NULL;
+ clnt_args_t *args = NULL;
+ gfs3_truncate_req req = {{0,},};
+ int ret = 0;
+ int op_errno = ESTALE;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+
+ if (!(args->loc && args->loc->inode))
+ goto unwind;
+
+ if (!uuid_is_null (args->loc->inode->gfid))
+ memcpy (req.gfid, args->loc->inode->gfid, 16);
+ else
+ memcpy (req.gfid, args->loc->gfid, 16);
+
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name,
+ !uuid_is_null (*((uuid_t*)req.gfid)),
+ unwind, op_errno, EINVAL);
+ req.offset = args->offset;
+
+ conf = this->private;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, args->xdata, (&req.xdata.xdata_val),
+ req.xdata.xdata_len, op_errno, unwind);
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_TRUNCATE,
+ client3_3_truncate_cbk, NULL,
+ NULL, 0, NULL, 0,
+ NULL, (xdrproc_t)xdr_gfs3_truncate_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int32_t
+client3_3_ftruncate (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_args_t *args = NULL;
+ int64_t remote_fd = -1;
+ clnt_conf_t *conf = NULL;
+ gfs3_ftruncate_req req = {{0,},};
+ int op_errno = EINVAL;
+ int ret = 0;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+
+ conf = this->private;
+
+ CLIENT_GET_REMOTE_FD (this, args->fd, DEFAULT_REMOTE_FD,
+ remote_fd, op_errno, unwind);
+
+ req.offset = args->offset;
+ req.fd = remote_fd;
+ memcpy (req.gfid, args->fd->inode->gfid, 16);
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, args->xdata, (&req.xdata.xdata_val),
+ req.xdata.xdata_len, op_errno, unwind);
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_FTRUNCATE,
+ client3_3_ftruncate_cbk, NULL,
+ NULL, 0, NULL, 0,
+ NULL, (xdrproc_t)xdr_gfs3_ftruncate_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+
+int32_t
+client3_3_access (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_conf_t *conf = NULL;
+ clnt_args_t *args = NULL;
+ gfs3_access_req req = {{0,},};
+ int ret = 0;
+ int op_errno = ESTALE;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+
+ if (!(args->loc && args->loc->inode))
+ goto unwind;
+
+ if (!uuid_is_null (args->loc->inode->gfid))
+ memcpy (req.gfid, args->loc->inode->gfid, 16);
+ else
+ memcpy (req.gfid, args->loc->gfid, 16);
+
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name,
+ !uuid_is_null (*((uuid_t*)req.gfid)),
+ unwind, op_errno, EINVAL);
+ req.mask = args->mask;
+
+ conf = this->private;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, args->xdata, (&req.xdata.xdata_val),
+ req.xdata.xdata_len, op_errno, unwind);
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_ACCESS,
+ client3_3_access_cbk, NULL,
+ NULL, 0, NULL, 0,
+ NULL, (xdrproc_t)xdr_gfs3_access_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (access, frame, -1, op_errno, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+int32_t
+client3_3_readlink (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_conf_t *conf = NULL;
+ clnt_args_t *args = NULL;
+ gfs3_readlink_req req = {{0,},};
+ int ret = 0;
+ int op_errno = ESTALE;
+ clnt_local_t *local = NULL;
+ struct iobuf *rsp_iobuf = NULL;
+ struct iobref *rsp_iobref = NULL;
+ struct iovec *rsphdr = NULL;
+ int count = 0;
+ struct iovec vector[MAX_IOVEC] = {{0}, };
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+
+ if (!(args->loc && args->loc->inode))
+ goto unwind;
+
+ if (!uuid_is_null (args->loc->inode->gfid))
+ memcpy (req.gfid, args->loc->inode->gfid, 16);
+ else
+ memcpy (req.gfid, args->loc->gfid, 16);
+
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name,
+ !uuid_is_null (*((uuid_t*)req.gfid)),
+ unwind, op_errno, EINVAL);
+ req.size = args->size;
+ conf = this->private;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ frame->local = local;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, args->xdata, (&req.xdata.xdata_val),
+ req.xdata.xdata_len, op_errno, unwind);
+
+ rsp_iobref = iobref_new ();
+ if (rsp_iobref == NULL) {
+ goto unwind;
+ }
+
+ rsp_iobuf = iobuf_get (this->ctx->iobuf_pool);
+ if (rsp_iobuf == NULL) {
+ goto unwind;
+ }
+
+ iobref_add (rsp_iobref, rsp_iobuf);
+ iobuf_unref (rsp_iobuf);
+ rsphdr = &vector[0];
+ rsphdr->iov_base = iobuf_ptr (rsp_iobuf);
+ rsphdr->iov_len = iobuf_pagesize (rsp_iobuf);
+ count = 1;
+ local->iobref = rsp_iobref;
+ rsp_iobuf = NULL;
+ rsp_iobref = NULL;
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_READLINK,
+ client3_3_readlink_cbk, NULL,
+ rsphdr, count, NULL, 0,
+ local->iobref,
+ (xdrproc_t)xdr_gfs3_readlink_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ if (rsp_iobref != NULL) {
+ iobref_unref (rsp_iobref);
+ }
+
+ CLIENT_STACK_UNWIND (readlink, frame, -1, op_errno, NULL, NULL, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+
+
+int32_t
+client3_3_unlink (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_conf_t *conf = NULL;
+ clnt_args_t *args = NULL;
+ gfs3_unlink_req req = {{0,},};
+ int ret = 0;
+ int op_errno = 0;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+
+ if (!(args->loc && args->loc->parent))
+ goto unwind;
+
+ if (!uuid_is_null (args->loc->parent->gfid))
+ memcpy (req.pargfid, args->loc->parent->gfid, 16);
+ else
+ memcpy (req.pargfid, args->loc->pargfid, 16);
+
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name,
+ !uuid_is_null (*((uuid_t*)req.pargfid)),
+ unwind, op_errno, EINVAL);
+ req.bname = (char *)args->loc->name;
+ conf = this->private;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, args->xdata, (&req.xdata.xdata_val),
+ req.xdata.xdata_len, op_errno, unwind);
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_UNLINK,
+ client3_3_unlink_cbk, NULL,
+ NULL, 0, NULL, 0,
+ NULL, (xdrproc_t)xdr_gfs3_unlink_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+
+int32_t
+client3_3_rmdir (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_conf_t *conf = NULL;
+ clnt_args_t *args = NULL;
+ gfs3_rmdir_req req = {{0,},};
+ int ret = 0;
+ int op_errno = ESTALE;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+
+ if (!(args->loc && args->loc->parent))
+ goto unwind;
+
+ if (!uuid_is_null (args->loc->parent->gfid))
+ memcpy (req.pargfid, args->loc->parent->gfid, 16);
+ else
+ memcpy (req.pargfid, args->loc->pargfid, 16);
+
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name,
+ !uuid_is_null (*((uuid_t*)req.pargfid)),
+ unwind, op_errno, EINVAL);
+ req.bname = (char *)args->loc->name;
+ req.xflags = args->flags;
+ conf = this->private;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, args->xdata, (&req.xdata.xdata_val),
+ req.xdata.xdata_len, op_errno, unwind);
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_RMDIR, client3_3_rmdir_cbk, NULL,
+ NULL, 0, NULL, 0,
+ NULL, (xdrproc_t)xdr_gfs3_rmdir_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to send the fop");
+ }
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (rmdir, frame, -1, op_errno, NULL, NULL, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+
+int32_t
+client3_3_symlink (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_local_t *local = NULL;
+ clnt_conf_t *conf = NULL;
+ clnt_args_t *args = NULL;
+ gfs3_symlink_req req = {{0,},};
+ int ret = 0;
+ int op_errno = ESTALE;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ frame->local = local;
+
+ if (!(args->loc && args->loc->parent))
+ goto unwind;
+
+ loc_copy (&local->loc, args->loc);
+ loc_path (&local->loc, NULL);
+
+ if (!uuid_is_null (args->loc->parent->gfid))
+ memcpy (req.pargfid, args->loc->parent->gfid, 16);
+ else
+ memcpy (req.pargfid, args->loc->pargfid, 16);
+
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name,
+ !uuid_is_null (*((uuid_t*)req.pargfid)),
+ unwind, op_errno, EINVAL);
+ req.linkname = (char *)args->linkname;
+ req.bname = (char *)args->loc->name;
+ req.umask = args->umask;
+ local->loc2.path = gf_strdup (req.linkname);
+
+ conf = this->private;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, args->xdata, (&req.xdata.xdata_val),
+ req.xdata.xdata_len, op_errno, unwind);
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_SYMLINK, client3_3_symlink_cbk,
+ NULL, NULL, 0, NULL,
+ 0, NULL, (xdrproc_t)xdr_gfs3_symlink_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+
+ CLIENT_STACK_UNWIND (symlink, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL);
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+
+int32_t
+client3_3_rename (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_conf_t *conf = NULL;
+ clnt_args_t *args = NULL;
+ gfs3_rename_req req = {{0,},};
+ int ret = 0;
+ int op_errno = ESTALE;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+
+ if (!(args->oldloc && args->newloc && args->oldloc->parent &&
+ args->newloc->parent))
+ goto unwind;
+
+ if (!uuid_is_null (args->oldloc->parent->gfid))
+ memcpy (req.oldgfid, args->oldloc->parent->gfid, 16);
+ else
+ memcpy (req.oldgfid, args->oldloc->pargfid, 16);
+
+ if (!uuid_is_null (args->newloc->parent->gfid))
+ memcpy (req.newgfid, args->newloc->parent->gfid, 16);
+ else
+ memcpy (req.newgfid, args->newloc->pargfid, 16);
+
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name,
+ !uuid_is_null (*((uuid_t*)req.oldgfid)),
+ unwind, op_errno, EINVAL);
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name,
+ !uuid_is_null (*((uuid_t*)req.newgfid)),
+ unwind, op_errno, EINVAL);
+ req.oldbname = (char *)args->oldloc->name;
+ req.newbname = (char *)args->newloc->name;
+ conf = this->private;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, args->xdata, (&req.xdata.xdata_val),
+ req.xdata.xdata_len, op_errno, unwind);
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_RENAME, client3_3_rename_cbk, NULL,
+ NULL, 0, NULL, 0,
+ NULL, (xdrproc_t)xdr_gfs3_rename_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL, NULL);
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+
+int32_t
+client3_3_link (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_local_t *local = NULL;
+ clnt_conf_t *conf = NULL;
+ clnt_args_t *args = NULL;
+ gfs3_link_req req = {{0,},};
+ int ret = 0;
+ int op_errno = ESTALE;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+
+ if (!(args->oldloc && args->oldloc->inode && args->newloc &&
+ args->newloc->parent))
+ goto unwind;
+
+ if (!uuid_is_null (args->oldloc->inode->gfid))
+ memcpy (req.oldgfid, args->oldloc->inode->gfid, 16);
+ else
+ memcpy (req.oldgfid, args->oldloc->gfid, 16);
+
+ if (!uuid_is_null (args->newloc->parent->gfid))
+ memcpy (req.newgfid, args->newloc->parent->gfid, 16);
+ else
+ memcpy (req.newgfid, args->newloc->pargfid, 16);
+
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name,
+ !uuid_is_null (*((uuid_t*)req.oldgfid)),
+ unwind, op_errno, EINVAL);
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name,
+ !uuid_is_null (*((uuid_t*)req.newgfid)),
+ unwind, op_errno, EINVAL);
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ loc_copy (&local->loc, args->oldloc);
+ loc_path (&local->loc, NULL);
+ loc_copy (&local->loc2, args->newloc);
+ loc_path (&local->loc2, NULL);
+ frame->local = local;
+
+ req.newbname = (char *)args->newloc->name;
+ conf = this->private;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, args->xdata, (&req.xdata.xdata_val),
+ req.xdata.xdata_len, op_errno, unwind);
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_LINK, client3_3_link_cbk, NULL,
+ NULL, 0, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_link_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+
+int32_t
+client3_3_mknod (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_local_t *local = NULL;
+ clnt_conf_t *conf = NULL;
+ clnt_args_t *args = NULL;
+ gfs3_mknod_req req = {{0,},};
+ int ret = 0;
+ int op_errno = ESTALE;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ if (!(args->loc && args->loc->parent))
+ goto unwind;
+
+ loc_copy (&local->loc, args->loc);
+ loc_path (&local->loc, NULL);
+ frame->local = local;
+
+ if (!uuid_is_null (args->loc->parent->gfid))
+ memcpy (req.pargfid, args->loc->parent->gfid, 16);
+ else
+ memcpy (req.pargfid, args->loc->pargfid, 16);
+
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name,
+ !uuid_is_null (*((uuid_t*)req.pargfid)),
+ unwind, op_errno, EINVAL);
+ req.bname = (char *)args->loc->name;
+ req.mode = args->mode;
+ req.dev = args->rdev;
+ req.umask = args->umask;
+
+ conf = this->private;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, args->xdata, (&req.xdata.xdata_val),
+ req.xdata.xdata_len, op_errno, unwind);
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_MKNOD, client3_3_mknod_cbk, NULL,
+ NULL, 0, NULL, 0,
+ NULL, (xdrproc_t)xdr_gfs3_mknod_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to send the fop");
+ }
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (mknod, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL);
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+
+int32_t
+client3_3_mkdir (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_local_t *local = NULL;
+ clnt_conf_t *conf = NULL;
+ clnt_args_t *args = NULL;
+ gfs3_mkdir_req req = {{0,},};
+ int ret = 0;
+ int op_errno = ESTALE;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ if (!(args->loc && args->loc->parent))
+ goto unwind;
+
+ loc_copy (&local->loc, args->loc);
+ loc_path (&local->loc, NULL);
+ frame->local = local;
+
+ if (!uuid_is_null (args->loc->parent->gfid))
+ memcpy (req.pargfid, args->loc->parent->gfid, 16);
+ else
+ memcpy (req.pargfid, args->loc->pargfid, 16);
+
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name,
+ !uuid_is_null (*((uuid_t*)req.pargfid)),
+ unwind, op_errno, EINVAL);
+
+ req.bname = (char *)args->loc->name;
+ req.mode = args->mode;
+ req.umask = args->umask;
+
+ conf = this->private;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, args->xdata, (&req.xdata.xdata_val),
+ req.xdata.xdata_len, op_errno, unwind);
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_MKDIR, client3_3_mkdir_cbk, NULL,
+ NULL, 0, NULL, 0,
+ NULL, (xdrproc_t)xdr_gfs3_mkdir_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to send the fop");
+ }
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL);
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int32_t
+client3_3_create (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_local_t *local = NULL;
+ clnt_conf_t *conf = NULL;
+ clnt_args_t *args = NULL;
+ gfs3_create_req req = {{0,},};
+ int ret = 0;
+ int op_errno = ESTALE;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ if (!(args->loc && args->loc->parent))
+ goto unwind;
+
+ local->fd = fd_ref (args->fd);
+ local->flags = args->flags;
+
+ loc_copy (&local->loc, args->loc);
+ loc_path (&local->loc, NULL);
+ frame->local = local;
+
+ if (!uuid_is_null (args->loc->parent->gfid))
+ memcpy (req.pargfid, args->loc->parent->gfid, 16);
+ else
+ memcpy (req.pargfid, args->loc->pargfid, 16);
+
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name,
+ !uuid_is_null (*((uuid_t*)req.pargfid)),
+ unwind, op_errno, EINVAL);
+ req.bname = (char *)args->loc->name;
+ req.mode = args->mode;
+ req.flags = gf_flags_from_flags (args->flags);
+ req.umask = args->umask;
+
+ conf = this->private;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, args->xdata, (&req.xdata.xdata_val),
+ req.xdata.xdata_len, op_errno, unwind);
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_CREATE, client3_3_create_cbk, NULL,
+ NULL, 0, NULL, 0,
+ NULL, (xdrproc_t)xdr_gfs3_create_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL, NULL);
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+
+int32_t
+client3_3_open (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_local_t *local = NULL;
+ clnt_conf_t *conf = NULL;
+ clnt_args_t *args = NULL;
+ gfs3_open_req req = {{0,},};
+ int ret = 0;
+ int op_errno = ESTALE;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ if (!(args->loc && args->loc->inode))
+ goto unwind;
+
+ local->fd = fd_ref (args->fd);
+ local->flags = args->flags;
+ loc_copy (&local->loc, args->loc);
+ loc_path (&local->loc, NULL);
+ frame->local = local;
+
+ if (!uuid_is_null (args->loc->inode->gfid))
+ memcpy (req.gfid, args->loc->inode->gfid, 16);
+ else
+ memcpy (req.gfid, args->loc->gfid, 16);
+
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name,
+ !uuid_is_null (*((uuid_t*)req.gfid)),
+ unwind, op_errno, EINVAL);
+ req.flags = gf_flags_from_flags (args->flags);
+
+ conf = this->private;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, args->xdata, (&req.xdata.xdata_val),
+ req.xdata.xdata_len, op_errno, unwind);
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_OPEN, client3_3_open_cbk, NULL,
+ NULL, 0, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_open_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (open, frame, -1, op_errno, NULL, NULL);
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+
+int32_t
+client3_3_readv (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_args_t *args = NULL;
+ int64_t remote_fd = -1;
+ clnt_conf_t *conf = NULL;
+ clnt_local_t *local = NULL;
+ int op_errno = ESTALE;
+ gfs3_read_req req = {{0,},};
+ int ret = 0;
+ struct iovec rsp_vec = {0, };
+ struct iobuf *rsp_iobuf = NULL;
+ struct iobref *rsp_iobref = NULL;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ CLIENT_GET_REMOTE_FD (this, args->fd, FALLBACK_TO_ANON_FD,
+ remote_fd, op_errno, unwind);
+ ret = client_fd_fop_prepare_local (frame, args->fd, remote_fd);
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+ local = frame->local;
+
+ req.size = args->size;
+ req.offset = args->offset;
+ req.fd = remote_fd;
+ req.flag = args->flags;
+
+ memcpy (req.gfid, args->fd->inode->gfid, 16);
+
+ rsp_iobuf = iobuf_get2 (this->ctx->iobuf_pool, args->size);
+ if (rsp_iobuf == NULL) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ rsp_iobref = iobref_new ();
+ if (rsp_iobref == NULL) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ iobref_add (rsp_iobref, rsp_iobuf);
+ iobuf_unref (rsp_iobuf);
+
+ rsp_vec.iov_base = iobuf_ptr (rsp_iobuf);
+ rsp_vec.iov_len = iobuf_pagesize (rsp_iobuf);
+
+ rsp_iobuf = NULL;
+
+ if (args->size > rsp_vec.iov_len) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "read-size (%lu) is bigger than iobuf size (%lu)",
+ (unsigned long)args->size,
+ (unsigned long)rsp_vec.iov_len);
+ op_errno = EINVAL;
+ goto unwind;
+ }
+
+ local->iobref = rsp_iobref;
+ rsp_iobref = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, args->xdata, (&req.xdata.xdata_val),
+ req.xdata.xdata_len, op_errno, unwind);
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_READ, client3_3_readv_cbk, NULL,
+ NULL, 0, &rsp_vec, 1,
+ local->iobref,
+ (xdrproc_t)xdr_gfs3_read_req);
+ if (ret) {
+ //unwind is done in the cbk
+ gf_log (this->name, GF_LOG_WARNING, "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ if (rsp_iobuf)
+ iobuf_unref (rsp_iobuf);
+
+ if (rsp_iobref)
+ iobref_unref (rsp_iobref);
+
+ CLIENT_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int32_t
+client3_3_writev (call_frame_t *frame, xlator_t *this, void *data)
+{
+ clnt_args_t *args = NULL;
+ int64_t remote_fd = -1;
+ clnt_conf_t *conf = NULL;
+ gfs3_write_req req = {{0,},};
+ int op_errno = ESTALE;
+ int ret = 0;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ CLIENT_GET_REMOTE_FD (this, args->fd, FALLBACK_TO_ANON_FD,
+ remote_fd, op_errno, unwind);
+ ret = client_fd_fop_prepare_local (frame, args->fd, remote_fd);
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+
+ req.size = args->size;
+ req.offset = args->offset;
+ req.fd = remote_fd;
+ req.flag = args->flags;
+
+ memcpy (req.gfid, args->fd->inode->gfid, 16);
+
+#ifdef GF_TESTING_IO_XDATA
+ if (!args->xdata)
+ args->xdata = dict_new ();
+
+ ret = dict_set_str (args->xdata, "testing-the-xdata-key",
+ "testing-the-xdata-value");
+#endif
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, args->xdata, (&req.xdata.xdata_val),
+ req.xdata.xdata_len, op_errno, unwind);
+
+ ret = client_submit_vec_request (this, &req, frame, conf->fops,
+ GFS3_OP_WRITE, client3_3_writev_cbk,
+ args->vector, args->count,
+ args->iobref,
+ (xdrproc_t)xdr_gfs3_write_req);
+ if (ret) {
+ /*
+ * If the lower layers fail to submit a request, they'll also
+ * do the unwind for us (see rpc_clnt_submit), so don't unwind
+ * here in such cases.
+ */
+ gf_log (this->name, GF_LOG_WARNING, "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+
+unwind:
+ CLIENT_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int32_t
+client3_3_flush (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_args_t *args = NULL;
+ gfs3_flush_req req = {{0,},};
+ int64_t remote_fd = -1;
+ clnt_conf_t *conf = NULL;
+ clnt_local_t *local = NULL;
+ int op_errno = ESTALE;
+ int ret = 0;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ CLIENT_GET_REMOTE_FD (this, args->fd, DEFAULT_REMOTE_FD,
+ remote_fd, op_errno, unwind);
+
+ conf = this->private;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ local->fd = fd_ref (args->fd);
+ local->owner = frame->root->lk_owner;
+ frame->local = local;
+
+ req.fd = remote_fd;
+ memcpy (req.gfid, args->fd->inode->gfid, 16);
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, args->xdata, (&req.xdata.xdata_val),
+ req.xdata.xdata_len, op_errno, unwind);
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_FLUSH, client3_3_flush_cbk, NULL,
+ NULL, 0, NULL, 0,
+ NULL, (xdrproc_t)xdr_gfs3_flush_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+
+ return 0;
+
+unwind:
+ CLIENT_STACK_UNWIND (flush, frame, -1, op_errno, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+
+int32_t
+client3_3_fsync (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_args_t *args = NULL;
+ gfs3_fsync_req req = {{0,},};
+ int64_t remote_fd = -1;
+ clnt_conf_t *conf = NULL;
+ int op_errno = 0;
+ int ret = 0;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ CLIENT_GET_REMOTE_FD (this, args->fd, DEFAULT_REMOTE_FD,
+ remote_fd, op_errno, unwind);
+
+ req.fd = remote_fd;
+ req.data = args->flags;
+ memcpy (req.gfid, args->fd->inode->gfid, 16);
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, args->xdata, (&req.xdata.xdata_val),
+ req.xdata.xdata_len, op_errno, unwind);
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_FSYNC, client3_3_fsync_cbk, NULL,
+ NULL, 0, NULL, 0,
+ NULL, (xdrproc_t)xdr_gfs3_fsync_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to send the fop");
+
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+
+unwind:
+ CLIENT_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+
+int32_t
+client3_3_fstat (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_args_t *args = NULL;
+ gfs3_fstat_req req = {{0,},};
+ int64_t remote_fd = -1;
+ clnt_conf_t *conf = NULL;
+ int op_errno = ESTALE;
+ int ret = 0;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ CLIENT_GET_REMOTE_FD (this, args->fd, DEFAULT_REMOTE_FD,
+ remote_fd, op_errno, unwind);
+
+ req.fd = remote_fd;
+ memcpy (req.gfid, args->fd->inode->gfid, 16);
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, args->xdata, (&req.xdata.xdata_val),
+ req.xdata.xdata_len, op_errno, unwind);
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_FSTAT, client3_3_fstat_cbk, NULL,
+ NULL, 0, NULL, 0,
+ NULL, (xdrproc_t)xdr_gfs3_fstat_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+
+unwind:
+ CLIENT_STACK_UNWIND (fstat, frame, -1, op_errno, NULL, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+
+int32_t
+client3_3_opendir (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_local_t *local = NULL;
+ clnt_conf_t *conf = NULL;
+ clnt_args_t *args = NULL;
+ gfs3_opendir_req req = {{0,},};
+ int ret = 0;
+ int op_errno = ESTALE;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ if (!(args->loc && args->loc->inode))
+ goto unwind;
+
+ local->fd = fd_ref (args->fd);
+ loc_copy (&local->loc, args->loc);
+ loc_path (&local->loc, NULL);
+ frame->local = local;
+
+ if (!uuid_is_null (args->loc->inode->gfid))
+ memcpy (req.gfid, args->loc->inode->gfid, 16);
+ else
+ memcpy (req.gfid, args->loc->gfid, 16);
+
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name,
+ !uuid_is_null (*((uuid_t*)req.gfid)),
+ unwind, op_errno, EINVAL);
+
+ conf = this->private;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, args->xdata, (&req.xdata.xdata_val),
+ req.xdata.xdata_len, op_errno, unwind);
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_OPENDIR, client3_3_opendir_cbk,
+ NULL, NULL, 0, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_opendir_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+
+unwind:
+ CLIENT_STACK_UNWIND (opendir, frame, -1, op_errno, NULL, NULL);
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+
+int32_t
+client3_3_fsyncdir (call_frame_t *frame, xlator_t *this, void *data)
+{
+ clnt_args_t *args = NULL;
+ int64_t remote_fd = -1;
+ clnt_conf_t *conf = NULL;
+ gfs3_fsyncdir_req req = {{0,},};
+ int ret = 0;
+ int32_t op_errno = ESTALE;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ CLIENT_GET_REMOTE_FD (this, args->fd, DEFAULT_REMOTE_FD,
+ remote_fd, op_errno, unwind);
+
+ req.fd = remote_fd;
+ req.data = args->flags;
+ memcpy (req.gfid, args->fd->inode->gfid, 16);
+
+ conf = this->private;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, args->xdata, (&req.xdata.xdata_val),
+ req.xdata.xdata_len, op_errno, unwind);
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_FSYNCDIR, client3_3_fsyncdir_cbk,
+ NULL, NULL, 0,
+ NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_fsyncdir_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+
+unwind:
+ CLIENT_STACK_UNWIND (fsyncdir, frame, -1, op_errno, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+
+int32_t
+client3_3_statfs (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_conf_t *conf = NULL;
+ clnt_args_t *args = NULL;
+ gfs3_statfs_req req = {{0,},};
+ int ret = 0;
+ int op_errno = ESTALE;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+
+ if (!args->loc)
+ goto unwind;
+
+ if (args->loc->inode) {
+ if (!uuid_is_null (args->loc->inode->gfid))
+ memcpy (req.gfid, args->loc->inode->gfid, 16);
+ else
+ memcpy (req.gfid, args->loc->gfid, 16);
+ } else
+ req.gfid[15] = 1;
+
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name,
+ !uuid_is_null (*((uuid_t*)req.gfid)),
+ unwind, op_errno, EINVAL);
+
+ conf = this->private;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, args->xdata, (&req.xdata.xdata_val),
+ req.xdata.xdata_len, op_errno, unwind);
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_STATFS, client3_3_statfs_cbk, NULL,
+ NULL, 0, NULL, 0,
+ NULL, (xdrproc_t)xdr_gfs3_statfs_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+
+unwind:
+ CLIENT_STACK_UNWIND (statfs, frame, -1, op_errno, NULL, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+
+int32_t
+client3_3_setxattr (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_conf_t *conf = NULL;
+ clnt_args_t *args = NULL;
+ gfs3_setxattr_req req = {{0,},};
+ int ret = 0;
+ int op_errno = ESTALE;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+
+ if (!(args->loc && args->loc->inode))
+ goto unwind;
+
+ if (!uuid_is_null (args->loc->inode->gfid))
+ memcpy (req.gfid, args->loc->inode->gfid, 16);
+ else
+ memcpy (req.gfid, args->loc->gfid, 16);
+
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name,
+ !uuid_is_null (*((uuid_t*)req.gfid)),
+ unwind, op_errno, EINVAL);
+ if (args->xattr) {
+ GF_PROTOCOL_DICT_SERIALIZE (this, args->xattr,
+ (&req.dict.dict_val),
+ req.dict.dict_len,
+ op_errno, unwind);
+ }
+
+ req.flags = args->flags;
+
+ conf = this->private;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, args->xdata, (&req.xdata.xdata_val),
+ req.xdata.xdata_len, op_errno, unwind);
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_SETXATTR, client3_3_setxattr_cbk,
+ NULL, NULL, 0, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_setxattr_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to send the fop");
+ }
+ GF_FREE (req.dict.dict_val);
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
+ GF_FREE (req.dict.dict_val);
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+
+int32_t
+client3_3_fsetxattr (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_args_t *args = NULL;
+ int64_t remote_fd = -1;
+ clnt_conf_t *conf = NULL;
+ gfs3_fsetxattr_req req = {{0,},};
+ int op_errno = ESTALE;
+ int ret = 0;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ CLIENT_GET_REMOTE_FD (this, args->fd, DEFAULT_REMOTE_FD,
+ remote_fd, op_errno, unwind);
+
+ req.fd = remote_fd;
+ req.flags = args->flags;
+ memcpy (req.gfid, args->fd->inode->gfid, 16);
+
+ if (args->xattr) {
+ GF_PROTOCOL_DICT_SERIALIZE (this, args->xattr,
+ (&req.dict.dict_val),
+ req.dict.dict_len,
+ op_errno, unwind);
+ }
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, args->xdata, (&req.xdata.xdata_val),
+ req.xdata.xdata_len, op_errno, unwind);
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_FSETXATTR, client3_3_fsetxattr_cbk,
+ NULL, NULL, 0, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_fsetxattr_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to send the fop");
+ }
+
+ GF_FREE (req.dict.dict_val);
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL);
+ GF_FREE (req.dict.dict_val);
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+
+
+int32_t
+client3_3_fgetxattr (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_args_t *args = NULL;
+ int64_t remote_fd = -1;
+ clnt_conf_t *conf = NULL;
+ gfs3_fgetxattr_req req = {{0,},};
+ int op_errno = ESTALE;
+ int ret = 0;
+ int count = 0;
+ clnt_local_t *local = NULL;
+ struct iobref *rsp_iobref = NULL;
+ struct iobuf *rsp_iobuf = NULL;
+ struct iovec *rsphdr = NULL;
+ struct iovec vector[MAX_IOVEC] = {{0}, };
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ CLIENT_GET_REMOTE_FD (this, args->fd, DEFAULT_REMOTE_FD,
+ remote_fd, op_errno, unwind);
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ frame->local = local;
+
+ rsp_iobref = iobref_new ();
+ if (rsp_iobref == NULL) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ /* TODO: what is the size we should send ? */
+ rsp_iobuf = iobuf_get2 (this->ctx->iobuf_pool, 8 * GF_UNIT_KB);
+ if (rsp_iobuf == NULL) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ iobref_add (rsp_iobref, rsp_iobuf);
+ iobuf_unref (rsp_iobuf);
+
+ rsphdr = &vector[0];
+ rsphdr->iov_base = iobuf_ptr (rsp_iobuf);
+ rsphdr->iov_len = iobuf_pagesize (rsp_iobuf);;
+ count = 1;
+ local->iobref = rsp_iobref;
+ rsp_iobuf = NULL;
+ rsp_iobref = NULL;
+
+ req.namelen = 1; /* Use it as a flag */
+ req.fd = remote_fd;
+ req.name = (char *)args->name;
+ if (!req.name) {
+ req.name = "";
+ req.namelen = 0;
+ }
+ memcpy (req.gfid, args->fd->inode->gfid, 16);
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, args->xdata, (&req.xdata.xdata_val),
+ req.xdata.xdata_len, op_errno, unwind);
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_FGETXATTR,
+ client3_3_fgetxattr_cbk, NULL,
+ rsphdr, count,
+ NULL, 0, local->iobref,
+ (xdrproc_t)xdr_gfs3_fgetxattr_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ if (rsp_iobuf)
+ iobuf_unref (rsp_iobuf);
+
+ if (rsp_iobref)
+ iobref_unref (rsp_iobref);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (fgetxattr, frame, -1, op_errno, NULL, NULL);
+
+ if (rsp_iobuf)
+ iobuf_unref (rsp_iobuf);
+
+ if (rsp_iobref)
+ iobref_unref (rsp_iobref);
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+
+int32_t
+client3_3_getxattr (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_conf_t *conf = NULL;
+ clnt_args_t *args = NULL;
+ gfs3_getxattr_req req = {{0,},};
+ dict_t *dict = NULL;
+ int ret = 0;
+ int32_t op_ret = -1;
+ int op_errno = ESTALE;
+ int count = 0;
+ clnt_local_t *local = NULL;
+ struct iobref *rsp_iobref = NULL;
+ struct iobuf *rsp_iobuf = NULL;
+ struct iovec *rsphdr = NULL;
+ struct iovec vector[MAX_IOVEC] = {{0}, };
+
+ if (!frame || !this || !data) {
+ op_errno = 0;
+ goto unwind;
+ }
+ args = data;
+
+ if (!args->loc) {
+ op_errno = EINVAL;
+ goto unwind;
+ }
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ loc_copy (&local->loc, args->loc);
+ loc_path (&local->loc, NULL);
+ if (args->name)
+ local->name = gf_strdup (args->name);
+
+ frame->local = local;
+
+ rsp_iobref = iobref_new ();
+ if (rsp_iobref == NULL) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ /* TODO: what is the size we should send ? */
+ rsp_iobuf = iobuf_get2 (this->ctx->iobuf_pool, 8 * GF_UNIT_KB);
+ if (rsp_iobuf == NULL) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ iobref_add (rsp_iobref, rsp_iobuf);
+ iobuf_unref (rsp_iobuf);
+
+ rsphdr = &vector[0];
+ rsphdr->iov_base = iobuf_ptr (rsp_iobuf);
+ rsphdr->iov_len = iobuf_pagesize (rsp_iobuf);
+ count = 1;
+ local->iobref = rsp_iobref;
+ rsp_iobuf = NULL;
+ rsp_iobref = NULL;
+
+ if (args->loc->inode && !uuid_is_null (args->loc->inode->gfid))
+ memcpy (req.gfid, args->loc->inode->gfid, 16);
+ else
+ memcpy (req.gfid, args->loc->gfid, 16);
+
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name,
+ !uuid_is_null (*((uuid_t*)req.gfid)),
+ unwind, op_errno, EINVAL);
+ req.namelen = 1; /* Use it as a flag */
+
+ req.name = (char *)args->name;
+ if (!req.name) {
+ req.name = "";
+ req.namelen = 0;
+ }
+
+ conf = this->private;
+
+ if (args && args->name) {
+ if (is_client_dump_locks_cmd ((char *)args->name)) {
+ dict = dict_new ();
+ ret = client_dump_locks ((char *)args->name,
+ args->loc->inode,
+ dict);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Client dump locks failed");
+ op_errno = EINVAL;
+ }
+
+ GF_ASSERT (dict);
+ op_ret = 0;
+ op_errno = 0;
+ goto unwind;
+ }
+ }
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, args->xdata, (&req.xdata.xdata_val),
+ req.xdata.xdata_len, op_errno, unwind);
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_GETXATTR,
+ client3_3_getxattr_cbk, NULL,
+ rsphdr, count,
+ NULL, 0, local->iobref,
+ (xdrproc_t)xdr_gfs3_getxattr_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ if (rsp_iobuf)
+ iobuf_unref (rsp_iobuf);
+
+ if (rsp_iobref)
+ iobref_unref (rsp_iobref);
+
+ return 0;
+unwind:
+ if (rsp_iobuf)
+ iobuf_unref (rsp_iobuf);
+
+ if (rsp_iobref)
+ iobref_unref (rsp_iobref);
+
+ CLIENT_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, NULL);
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+
+int32_t
+client3_3_xattrop (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_conf_t *conf = NULL;
+ clnt_args_t *args = NULL;
+ gfs3_xattrop_req req = {{0,},};
+ int ret = 0;
+ int op_errno = ESTALE;
+ int count = 0;
+ clnt_local_t *local = NULL;
+ struct iobref *rsp_iobref = NULL;
+ struct iobuf *rsp_iobuf = NULL;
+ struct iovec *rsphdr = NULL;
+ struct iovec vector[MAX_IOVEC] = {{0}, };
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+
+ if (!(args->loc && args->loc->inode))
+ goto unwind;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ frame->local = local;
+
+ rsp_iobref = iobref_new ();
+ if (rsp_iobref == NULL) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ /* TODO: what is the size we should send ? */
+ rsp_iobuf = iobuf_get2 (this->ctx->iobuf_pool, 8 * GF_UNIT_KB);
+ if (rsp_iobuf == NULL) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ iobref_add (rsp_iobref, rsp_iobuf);
+ iobuf_unref (rsp_iobuf);
+
+ rsphdr = &vector[0];
+ rsphdr->iov_base = iobuf_ptr (rsp_iobuf);
+ rsphdr->iov_len = iobuf_pagesize (rsp_iobuf);
+ count = 1;
+ local->iobref = rsp_iobref;
+ rsp_iobuf = NULL;
+ rsp_iobref = NULL;
+
+ if (!uuid_is_null (args->loc->inode->gfid))
+ memcpy (req.gfid, args->loc->inode->gfid, 16);
+ else
+ memcpy (req.gfid, args->loc->gfid, 16);
+
+ loc_copy (&local->loc, args->loc);
+ loc_path (&local->loc, NULL);
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name,
+ !uuid_is_null (*((uuid_t*)req.gfid)),
+ unwind, op_errno, EINVAL);
+ if (args->xattr) {
+ GF_PROTOCOL_DICT_SERIALIZE (this, args->xattr,
+ (&req.dict.dict_val),
+ req.dict.dict_len,
+ op_errno, unwind);
+ }
+
+ req.flags = args->flags;
+
+ conf = this->private;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, args->xdata, (&req.xdata.xdata_val),
+ req.xdata.xdata_len, op_errno, unwind);
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_XATTROP,
+ client3_3_xattrop_cbk, NULL,
+ rsphdr, count,
+ NULL, 0, local->iobref,
+ (xdrproc_t)xdr_gfs3_xattrop_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to send the fop");
+ }
+
+ GF_FREE (req.dict.dict_val);
+
+ GF_FREE (req.xdata.xdata_val);
+
+ if (rsp_iobuf)
+ iobuf_unref (rsp_iobuf);
+
+ if (rsp_iobref)
+ iobref_unref (rsp_iobref);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL, NULL);
+
+ GF_FREE (req.dict.dict_val);
+
+ if (rsp_iobuf)
+ iobuf_unref (rsp_iobuf);
+
+ if (rsp_iobref)
+ iobref_unref (rsp_iobref);
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+
+int32_t
+client3_3_fxattrop (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_args_t *args = NULL;
+ int64_t remote_fd = -1;
+ clnt_conf_t *conf = NULL;
+ clnt_local_t *local = NULL;
+ gfs3_fxattrop_req req = {{0,},};
+ int op_errno = ESTALE;
+ int ret = 0;
+ int count = 0;
+ struct iobref *rsp_iobref = NULL;
+ struct iobuf *rsp_iobuf = NULL;
+ struct iovec *rsphdr = NULL;
+ struct iovec vector[MAX_IOVEC] = {{0}, };
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ CLIENT_GET_REMOTE_FD (this, args->fd, FALLBACK_TO_ANON_FD,
+ remote_fd, op_errno, unwind);
+ ret = client_fd_fop_prepare_local (frame, args->fd, remote_fd);
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+
+ local = frame->local;
+
+ req.fd = remote_fd;
+ req.flags = args->flags;
+ memcpy (req.gfid, args->fd->inode->gfid, 16);
+
+ rsp_iobref = iobref_new ();
+ if (rsp_iobref == NULL) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ /* TODO: what is the size we should send ? */
+ rsp_iobuf = iobuf_get2 (this->ctx->iobuf_pool, 8 * GF_UNIT_KB);
+ if (rsp_iobuf == NULL) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ iobref_add (rsp_iobref, rsp_iobuf);
+ iobuf_unref (rsp_iobuf);
+ rsphdr = &vector[0];
+ rsphdr->iov_base = iobuf_ptr (rsp_iobuf);
+ rsphdr->iov_len = iobuf_pagesize (rsp_iobuf);
+ count = 1;
+ local->iobref = rsp_iobref;
+ rsp_iobuf = NULL;
+ rsp_iobref = NULL;
+
+ if (args->xattr) {
+ GF_PROTOCOL_DICT_SERIALIZE (this, args->xattr,
+ (&req.dict.dict_val),
+ req.dict.dict_len,
+ op_errno, unwind);
+ }
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, args->xdata, (&req.xdata.xdata_val),
+ req.xdata.xdata_len, op_errno, unwind);
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_FXATTROP,
+ client3_3_fxattrop_cbk, NULL,
+ rsphdr, count,
+ NULL, 0, local->iobref,
+ (xdrproc_t)xdr_gfs3_fxattrop_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to send the fop");
+ }
+
+ GF_FREE (req.dict.dict_val);
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (fxattrop, frame, -1, op_errno, NULL, NULL);
+
+ GF_FREE (req.dict.dict_val);
+
+ if (rsp_iobref)
+ iobref_unref (rsp_iobref);
+
+ if (rsp_iobuf)
+ iobuf_unref (rsp_iobuf);
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int32_t
+client3_3_removexattr (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_conf_t *conf = NULL;
+ clnt_args_t *args = NULL;
+ gfs3_removexattr_req req = {{0,},};
+ int ret = 0;
+ int op_errno = ESTALE;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+
+ if (!(args->loc && args->loc->inode))
+ goto unwind;
+
+ if (!uuid_is_null (args->loc->inode->gfid))
+ memcpy (req.gfid, args->loc->inode->gfid, 16);
+ else
+ memcpy (req.gfid, args->loc->gfid, 16);
+
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name,
+ !uuid_is_null (*((uuid_t*)req.gfid)),
+ unwind, op_errno, EINVAL);
+ req.name = (char *)args->name;
+
+ conf = this->private;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, args->xdata, (&req.xdata.xdata_val),
+ req.xdata.xdata_len, op_errno, unwind);
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_REMOVEXATTR,
+ client3_3_removexattr_cbk, NULL,
+ NULL, 0, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_removexattr_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+int32_t
+client3_3_fremovexattr (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_conf_t *conf = NULL;
+ clnt_args_t *args = NULL;
+ gfs3_fremovexattr_req req = {{0,},};
+ int ret = 0;
+ int64_t remote_fd = -1;
+ int op_errno = ESTALE;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+
+ if (!(args->fd && args->fd->inode))
+ goto unwind;
+
+ conf = this->private;
+
+ CLIENT_GET_REMOTE_FD (this, args->fd, DEFAULT_REMOTE_FD,
+ remote_fd, op_errno, unwind);
+
+ memcpy (req.gfid, args->fd->inode->gfid, 16);
+ req.name = (char *)args->name;
+ req.fd = remote_fd;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, args->xdata, (&req.xdata.xdata_val),
+ req.xdata.xdata_len, op_errno, unwind);
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_FREMOVEXATTR,
+ client3_3_fremovexattr_cbk, NULL,
+ NULL, 0, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_fremovexattr_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (fremovexattr, frame, -1, op_errno, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+int32_t
+client3_3_lk (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_args_t *args = NULL;
+ gfs3_lk_req req = {{0,},};
+ int32_t gf_cmd = 0;
+ int32_t gf_type = 0;
+ int64_t remote_fd = -1;
+ clnt_local_t *local = NULL;
+ clnt_conf_t *conf = NULL;
+ int op_errno = ESTALE;
+ int ret = 0;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ CLIENT_GET_REMOTE_FD (this, args->fd, DEFAULT_REMOTE_FD,
+ remote_fd, op_errno, unwind);
+
+ ret = client_cmd_to_gf_cmd (args->cmd, &gf_cmd);
+ if (ret) {
+ op_errno = EINVAL;
+ gf_log (this->name, GF_LOG_WARNING,
+ "Unknown cmd (%d)!", gf_cmd);
+ goto unwind;
+ }
+
+ switch (args->flock->l_type) {
+ case F_RDLCK:
+ gf_type = GF_LK_F_RDLCK;
+ break;
+ case F_WRLCK:
+ gf_type = GF_LK_F_WRLCK;
+ break;
+ case F_UNLCK:
+ gf_type = GF_LK_F_UNLCK;
+ break;
+ }
+
+ local->owner = frame->root->lk_owner;
+ local->cmd = args->cmd;
+ local->fd = fd_ref (args->fd);
+ frame->local = local;
+
+ req.fd = remote_fd;
+ req.cmd = gf_cmd;
+ req.type = gf_type;
+ gf_proto_flock_from_flock (&req.flock, args->flock);
+
+ memcpy (req.gfid, args->fd->inode->gfid, 16);
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, args->xdata, (&req.xdata.xdata_val),
+ req.xdata.xdata_len, op_errno, unwind);
+
+ ret = client_submit_request (this, &req, frame, conf->fops, GFS3_OP_LK,
+ client3_3_lk_cbk, NULL,
+ NULL, 0, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_lk_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (lk, frame, -1, op_errno, NULL, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int32_t
+client3_3_inodelk (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_conf_t *conf = NULL;
+ clnt_args_t *args = NULL;
+ gfs3_inodelk_req req = {{0,},};
+ int ret = 0;
+ int32_t gf_cmd = 0;
+ int32_t gf_type = 0;
+ int op_errno = ESTALE;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ if (!(args->loc && args->loc->inode))
+ goto unwind;
+
+ if (!uuid_is_null (args->loc->inode->gfid))
+ memcpy (req.gfid, args->loc->inode->gfid, 16);
+ else
+ memcpy (req.gfid, args->loc->gfid, 16);
+
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name,
+ !uuid_is_null (*((uuid_t*)req.gfid)),
+ unwind, op_errno, EINVAL);
+ if (args->cmd == F_GETLK || args->cmd == F_GETLK64)
+ gf_cmd = GF_LK_GETLK;
+ else if (args->cmd == F_SETLK || args->cmd == F_SETLK64)
+ gf_cmd = GF_LK_SETLK;
+ else if (args->cmd == F_SETLKW || args->cmd == F_SETLKW64)
+ gf_cmd = GF_LK_SETLKW;
+ else {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Unknown cmd (%d)!", gf_cmd);
+ op_errno = EINVAL;
+ goto unwind;
+ }
+
+ switch (args->flock->l_type) {
+ case F_RDLCK:
+ gf_type = GF_LK_F_RDLCK;
+ break;
+ case F_WRLCK:
+ gf_type = GF_LK_F_WRLCK;
+ break;
+ case F_UNLCK:
+ gf_type = GF_LK_F_UNLCK;
+ break;
+ }
+
+ req.volume = (char *)args->volume;
+ req.cmd = gf_cmd;
+ req.type = gf_type;
+ gf_proto_flock_from_flock (&req.flock, args->flock);
+
+ conf = this->private;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, args->xdata, (&req.xdata.xdata_val),
+ req.xdata.xdata_len, op_errno, unwind);
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_INODELK,
+ client3_3_inodelk_cbk, NULL,
+ NULL, 0, NULL, 0,
+ NULL, (xdrproc_t)xdr_gfs3_inodelk_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (inodelk, frame, -1, op_errno, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+
+int32_t
+client3_3_finodelk (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_args_t *args = NULL;
+ gfs3_finodelk_req req = {{0,},};
+ int32_t gf_cmd = 0;
+ int32_t gf_type = 0;
+ int64_t remote_fd = -1;
+ clnt_conf_t *conf = NULL;
+ int op_errno = ESTALE;
+ int ret = 0;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+ CLIENT_GET_REMOTE_FD (this, args->fd, FALLBACK_TO_ANON_FD,
+ remote_fd, op_errno, unwind);
+ ret = client_fd_fop_prepare_local (frame, args->fd, remote_fd);
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+
+ if (args->cmd == F_GETLK || args->cmd == F_GETLK64)
+ gf_cmd = GF_LK_GETLK;
+ else if (args->cmd == F_SETLK || args->cmd == F_SETLK64)
+ gf_cmd = GF_LK_SETLK;
+ else if (args->cmd == F_SETLKW || args->cmd == F_SETLKW64)
+ gf_cmd = GF_LK_SETLKW;
+ else {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Unknown cmd (%d)!", gf_cmd);
+ goto unwind;
+ }
+
+ switch (args->flock->l_type) {
+ case F_RDLCK:
+ gf_type = GF_LK_F_RDLCK;
+ break;
+ case F_WRLCK:
+ gf_type = GF_LK_F_WRLCK;
+ break;
+ case F_UNLCK:
+ gf_type = GF_LK_F_UNLCK;
+ break;
+ }
+
+ req.volume = (char *)args->volume;
+ req.fd = remote_fd;
+ req.cmd = gf_cmd;
+ req.type = gf_type;
+ gf_proto_flock_from_flock (&req.flock, args->flock);
+ memcpy (req.gfid, args->fd->inode->gfid, 16);
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, args->xdata, (&req.xdata.xdata_val),
+ req.xdata.xdata_len, op_errno, unwind);
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_FINODELK,
+ client3_3_finodelk_cbk, NULL,
+ NULL, 0, NULL, 0,
+ NULL, (xdrproc_t)xdr_gfs3_finodelk_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (finodelk, frame, -1, op_errno, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int32_t
+client3_3_entrylk (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_conf_t *conf = NULL;
+ clnt_args_t *args = NULL;
+ gfs3_entrylk_req req = {{0,},};
+ int ret = 0;
+ int op_errno = ESTALE;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+
+ if (!(args->loc && args->loc->inode))
+ goto unwind;
+
+ if (!uuid_is_null (args->loc->inode->gfid))
+ memcpy (req.gfid, args->loc->inode->gfid, 16);
+ else
+ memcpy (req.gfid, args->loc->gfid, 16);
+
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name,
+ !uuid_is_null (*((uuid_t*)req.gfid)),
+ unwind, op_errno, EINVAL);
+ req.cmd = args->cmd_entrylk;
+ req.type = args->type;
+ req.volume = (char *)args->volume;
+ req.name = "";
+ if (args->basename) {
+ req.name = (char *)args->basename;
+ req.namelen = 1;
+ }
+
+ conf = this->private;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, args->xdata, (&req.xdata.xdata_val),
+ req.xdata.xdata_len, op_errno, unwind);
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_ENTRYLK,
+ client3_3_entrylk_cbk, NULL,
+ NULL, 0, NULL, 0,
+ NULL, (xdrproc_t)xdr_gfs3_entrylk_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (entrylk, frame, -1, op_errno, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+
+int32_t
+client3_3_fentrylk (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_args_t *args = NULL;
+ gfs3_fentrylk_req req = {{0,},};
+ int64_t remote_fd = -1;
+ clnt_conf_t *conf = NULL;
+ int op_errno = ESTALE;
+ int ret = 0;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ CLIENT_GET_REMOTE_FD (this, args->fd, DEFAULT_REMOTE_FD,
+ remote_fd, op_errno, unwind);
+
+ req.fd = remote_fd;
+ req.cmd = args->cmd_entrylk;
+ req.type = args->type;
+ req.volume = (char *)args->volume;
+ req.name = "";
+ if (args->basename) {
+ req.name = (char *)args->basename;
+ req.namelen = 1;
+ }
+ memcpy (req.gfid, args->fd->inode->gfid, 16);
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, args->xdata, (&req.xdata.xdata_val),
+ req.xdata.xdata_len, op_errno, unwind);
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_FENTRYLK,
+ client3_3_fentrylk_cbk, NULL,
+ NULL, 0, NULL, 0,
+ NULL, (xdrproc_t)xdr_gfs3_fentrylk_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (fentrylk, frame, -1, op_errno, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int32_t
+client3_3_rchecksum (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_args_t *args = NULL;
+ int64_t remote_fd = -1;
+ clnt_conf_t *conf = NULL;
+ gfs3_rchecksum_req req = {0,};
+ int op_errno = ESTALE;
+ int ret = 0;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ CLIENT_GET_REMOTE_FD (this, args->fd, DEFAULT_REMOTE_FD,
+ remote_fd, op_errno, unwind);
+
+ req.len = args->len;
+ req.offset = args->offset;
+ req.fd = remote_fd;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, args->xdata, (&req.xdata.xdata_val),
+ req.xdata.xdata_len, op_errno, unwind);
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_RCHECKSUM,
+ client3_3_rchecksum_cbk, NULL,
+ NULL, 0, NULL,
+ 0, NULL,
+ (xdrproc_t)xdr_gfs3_rchecksum_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (rchecksum, frame, -1, op_errno, 0, NULL, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+
+int32_t
+client3_3_readdir (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_args_t *args = NULL;
+ int64_t remote_fd = -1;
+ clnt_conf_t *conf = NULL;
+ gfs3_readdir_req req = {{0,},};
+ gfs3_readdir_rsp rsp = {0, };
+ clnt_local_t *local = NULL;
+ int op_errno = ESTALE;
+ int ret = 0;
+ int count = 0;
+ struct iobref *rsp_iobref = NULL;
+ struct iobuf *rsp_iobuf = NULL;
+ struct iovec *rsphdr = NULL;
+ struct iovec vector[MAX_IOVEC] = {{0}, };
+ int readdir_rsp_size = 0;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ CLIENT_GET_REMOTE_FD (this, args->fd, DEFAULT_REMOTE_FD,
+ remote_fd, op_errno, unwind);
+
+ readdir_rsp_size = xdr_sizeof ((xdrproc_t) xdr_gfs3_readdir_rsp, &rsp)
+ + args->size;
+
+ if ((readdir_rsp_size + GLUSTERFS_RPC_REPLY_SIZE + GLUSTERFS_RDMA_MAX_HEADER_SIZE)
+ > (GLUSTERFS_RDMA_INLINE_THRESHOLD)) {
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ frame->local = local;
+
+ rsp_iobref = iobref_new ();
+ if (rsp_iobref == NULL) {
+ goto unwind;
+ }
+
+ /* TODO: what is the size we should send ? */
+ /* This iobuf will live for only receiving the response,
+ so not harmful */
+ rsp_iobuf = iobuf_get (this->ctx->iobuf_pool);
+ if (rsp_iobuf == NULL) {
+ goto unwind;
+ }
+
+ iobref_add (rsp_iobref, rsp_iobuf);
+ iobuf_unref (rsp_iobuf);
+
+ rsphdr = &vector[0];
+ rsphdr->iov_base = iobuf_ptr (rsp_iobuf);
+ rsphdr->iov_len = iobuf_pagesize (rsp_iobuf);
+ count = 1;
+ local->iobref = rsp_iobref;
+ rsp_iobuf = NULL;
+ rsp_iobref = NULL;
+ }
+
+ req.size = args->size;
+ req.offset = args->offset;
+ req.fd = remote_fd;
+
+ local->cmd = remote_fd;
+
+ memcpy (req.gfid, args->fd->inode->gfid, 16);
+ GF_PROTOCOL_DICT_SERIALIZE (this, args->xdata, (&req.xdata.xdata_val),
+ req.xdata.xdata_len, op_errno, unwind);
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_READDIR,
+ client3_3_readdir_cbk, NULL,
+ rsphdr, count,
+ NULL, 0, rsp_iobref,
+ (xdrproc_t)xdr_gfs3_readdir_req);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ if (rsp_iobuf)
+ iobuf_unref (rsp_iobuf);
+
+ if (rsp_iobref)
+ iobref_unref (rsp_iobref);
+
+ return 0;
+
+unwind:
+ if (rsp_iobref)
+ iobref_unref (rsp_iobref);
+
+ if (rsp_iobuf)
+ iobuf_unref (rsp_iobuf);
+
+ CLIENT_STACK_UNWIND (readdir, frame, -1, op_errno, NULL, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int32_t
+client3_3_readdirp (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_args_t *args = NULL;
+ gfs3_readdirp_req req = {{0,},};
+ gfs3_readdirp_rsp rsp = {0,};
+ int64_t remote_fd = -1;
+ clnt_conf_t *conf = NULL;
+ int op_errno = ESTALE;
+ int ret = 0;
+ int count = 0;
+ int readdirp_rsp_size = 0;
+ struct iobref *rsp_iobref = NULL;
+ struct iobuf *rsp_iobuf = NULL;
+ struct iovec *rsphdr = NULL;
+ struct iovec vector[MAX_IOVEC] = {{0}, };
+ clnt_local_t *local = NULL;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ CLIENT_GET_REMOTE_FD (this, args->fd, DEFAULT_REMOTE_FD,
+ remote_fd, op_errno, unwind);
+
+ readdirp_rsp_size = xdr_sizeof ((xdrproc_t) xdr_gfs3_readdirp_rsp, &rsp)
+ + args->size;
+
+ local = mem_get0 (this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ frame->local = local;
+
+ if ((readdirp_rsp_size + GLUSTERFS_RPC_REPLY_SIZE
+ + GLUSTERFS_RDMA_MAX_HEADER_SIZE)
+ > (GLUSTERFS_RDMA_INLINE_THRESHOLD)) {
+ rsp_iobref = iobref_new ();
+ if (rsp_iobref == NULL) {
+ goto unwind;
+ }
+
+ /* TODO: what is the size we should send ? */
+ /* This iobuf will live for only receiving the response,
+ so not harmful */
+ rsp_iobuf = iobuf_get (this->ctx->iobuf_pool);
+ if (rsp_iobuf == NULL) {
+ goto unwind;
+ }
+
+ iobref_add (rsp_iobref, rsp_iobuf);
+ iobuf_unref (rsp_iobuf);
+
+ rsphdr = &vector[0];
+ rsphdr->iov_base = iobuf_ptr (rsp_iobuf);
+ rsphdr->iov_len = iobuf_pagesize (rsp_iobuf);
+ count = 1;
+ local->iobref = rsp_iobref;
+ rsp_iobuf = NULL;
+ rsp_iobref = NULL;
+ }
+
+ local->fd = fd_ref (args->fd);
+
+ req.size = args->size;
+ req.offset = args->offset;
+ req.fd = remote_fd;
+ memcpy (req.gfid, args->fd->inode->gfid, 16);
+
+ /* dict itself is 'xdata' here */
+ GF_PROTOCOL_DICT_SERIALIZE (this, args->xdata, (&req.dict.dict_val),
+ req.dict.dict_len, op_errno, unwind);
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_READDIRP,
+ client3_3_readdirp_cbk, NULL,
+ rsphdr, count, NULL,
+ 0, rsp_iobref,
+ (xdrproc_t)xdr_gfs3_readdirp_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to send the fop");
+ }
+
+ GF_FREE (req.dict.dict_val);
+
+ if (rsp_iobuf)
+ iobuf_unref (rsp_iobuf);
+
+ if (rsp_iobref)
+ iobref_unref (rsp_iobref);
+
+ return 0;
+unwind:
+ if (rsp_iobref)
+ iobref_unref (rsp_iobref);
+
+ if (rsp_iobuf)
+ iobuf_unref (rsp_iobuf);
+
+ GF_FREE (req.dict.dict_val);
+
+ CLIENT_STACK_UNWIND (readdirp, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
+
+
+int32_t
+client3_3_setattr (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ clnt_conf_t *conf = NULL;
+ clnt_args_t *args = NULL;
+ gfs3_setattr_req req = {{0,},};
+ int ret = 0;
+ int op_errno = ESTALE;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+
+ if (!(args->loc && args->loc->inode))
+ goto unwind;
+
+ if (!uuid_is_null (args->loc->inode->gfid))
+ memcpy (req.gfid, args->loc->inode->gfid, 16);
+ else
+ memcpy (req.gfid, args->loc->gfid, 16);
+
+ GF_ASSERT_AND_GOTO_WITH_ERROR (this->name,
+ !uuid_is_null (*((uuid_t*)req.gfid)),
+ unwind, op_errno, EINVAL);
+ req.valid = args->valid;
+ gf_stat_from_iatt (&req.stbuf, args->stbuf);
+
+ conf = this->private;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, args->xdata, (&req.xdata.xdata_val),
+ req.xdata.xdata_len, op_errno, unwind);
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_SETATTR,
+ client3_3_setattr_cbk, NULL,
+ NULL, 0, NULL, 0,
+ NULL, (xdrproc_t)xdr_gfs3_setattr_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+int32_t
+client3_3_fsetattr (call_frame_t *frame, xlator_t *this, void *data)
+{
+ clnt_args_t *args = NULL;
+ int64_t remote_fd = -1;
+ clnt_conf_t *conf = NULL;
+ gfs3_fsetattr_req req = {0,};
+ int op_errno = ESTALE;
+ int ret = 0;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ CLIENT_GET_REMOTE_FD (this, args->fd, DEFAULT_REMOTE_FD,
+ remote_fd, op_errno, unwind);
+
+ req.fd = remote_fd;
+ req.valid = args->valid;
+ gf_stat_from_iatt (&req.stbuf, args->stbuf);
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, args->xdata, (&req.xdata.xdata_val),
+ req.xdata.xdata_len, op_errno, unwind);
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_FSETATTR,
+ client3_3_fsetattr_cbk, NULL,
+ NULL, 0, NULL, 0,
+ NULL, (xdrproc_t)xdr_gfs3_fsetattr_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+int32_t
+client3_3_fallocate(call_frame_t *frame, xlator_t *this, void *data)
+{
+ clnt_args_t *args = NULL;
+ int64_t remote_fd = -1;
+ clnt_conf_t *conf = NULL;
+ gfs3_fallocate_req req = {{0},};
+ int op_errno = ESTALE;
+ int ret = 0;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ CLIENT_GET_REMOTE_FD (this, args->fd, DEFAULT_REMOTE_FD,
+ remote_fd, op_errno, unwind);
+
+ req.fd = remote_fd;
+ req.flags = args->flags;
+ req.offset = args->offset;
+ req.size = args->size;
+ memcpy(req.gfid, args->fd->inode->gfid, 16);
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, args->xdata, (&req.xdata.xdata_val),
+ req.xdata.xdata_len, op_errno, unwind);
+
+ ret = client_submit_request (this, &req, frame, conf->fops,
+ GFS3_OP_FALLOCATE,
+ client3_3_fallocate_cbk, NULL,
+ NULL, 0, NULL, 0,
+ NULL, (xdrproc_t)xdr_gfs3_fallocate_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING, "failed to send the fop");
+ }
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+int32_t
+client3_3_discard(call_frame_t *frame, xlator_t *this, void *data)
+{
+ clnt_args_t *args = NULL;
+ int64_t remote_fd = -1;
+ clnt_conf_t *conf = NULL;
+ gfs3_discard_req req = {{0},};
+ int op_errno = ESTALE;
+ int ret = 0;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ CLIENT_GET_REMOTE_FD (this, args->fd, DEFAULT_REMOTE_FD,
+ remote_fd, op_errno, unwind);
+
+ req.fd = remote_fd;
+ req.offset = args->offset;
+ req.size = args->size;
+ memcpy(req.gfid, args->fd->inode->gfid, 16);
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, args->xdata, (&req.xdata.xdata_val),
+ req.xdata.xdata_len, op_errno, unwind);
+
+ ret = client_submit_request(this, &req, frame, conf->fops,
+ GFS3_OP_DISCARD, client3_3_discard_cbk,
+ NULL, NULL, 0, NULL, 0, NULL,
+ (xdrproc_t) xdr_gfs3_discard_req);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING, "failed to send the fop");
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND(discard, frame, -1, op_errno, NULL, NULL, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+int32_t
+client3_3_zerofill(call_frame_t *frame, xlator_t *this, void *data)
+{
+ clnt_args_t *args = NULL;
+ int64_t remote_fd = -1;
+ clnt_conf_t *conf = NULL;
+ gfs3_zerofill_req req = {{0},};
+ int op_errno = ESTALE;
+ int ret = 0;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ CLIENT_GET_REMOTE_FD (this, args->fd, DEFAULT_REMOTE_FD,
+ remote_fd, op_errno, unwind);
+
+ req.fd = remote_fd;
+ req.offset = args->offset;
+ req.size = args->size;
+ memcpy(req.gfid, args->fd->inode->gfid, 16);
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, args->xdata, (&req.xdata.xdata_val),
+ req.xdata.xdata_len, op_errno, unwind);
+
+ ret = client_submit_request(this, &req, frame, conf->fops,
+ GFS3_OP_ZEROFILL, client3_3_zerofill_cbk,
+ NULL, NULL, 0, NULL, 0, NULL,
+ (xdrproc_t) xdr_gfs3_zerofill_req);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING, "failed to send the fop");
+
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+unwind:
+ CLIENT_STACK_UNWIND(zerofill, frame, -1, op_errno, NULL, NULL, NULL);
+ GF_FREE (req.xdata.xdata_val);
+
+ return 0;
+}
+
+/* Table Specific to FOPS */
+
+
+rpc_clnt_procedure_t clnt3_3_fop_actors[GF_FOP_MAXVALUE] = {
+ [GF_FOP_NULL] = { "NULL", NULL},
+ [GF_FOP_STAT] = { "STAT", client3_3_stat },
+ [GF_FOP_READLINK] = { "READLINK", client3_3_readlink },
+ [GF_FOP_MKNOD] = { "MKNOD", client3_3_mknod },
+ [GF_FOP_MKDIR] = { "MKDIR", client3_3_mkdir },
+ [GF_FOP_UNLINK] = { "UNLINK", client3_3_unlink },
+ [GF_FOP_RMDIR] = { "RMDIR", client3_3_rmdir },
+ [GF_FOP_SYMLINK] = { "SYMLINK", client3_3_symlink },
+ [GF_FOP_RENAME] = { "RENAME", client3_3_rename },
+ [GF_FOP_LINK] = { "LINK", client3_3_link },
+ [GF_FOP_TRUNCATE] = { "TRUNCATE", client3_3_truncate },
+ [GF_FOP_OPEN] = { "OPEN", client3_3_open },
+ [GF_FOP_READ] = { "READ", client3_3_readv },
+ [GF_FOP_WRITE] = { "WRITE", client3_3_writev },
+ [GF_FOP_STATFS] = { "STATFS", client3_3_statfs },
+ [GF_FOP_FLUSH] = { "FLUSH", client3_3_flush },
+ [GF_FOP_FSYNC] = { "FSYNC", client3_3_fsync },
+ [GF_FOP_SETXATTR] = { "SETXATTR", client3_3_setxattr },
+ [GF_FOP_GETXATTR] = { "GETXATTR", client3_3_getxattr },
+ [GF_FOP_REMOVEXATTR] = { "REMOVEXATTR", client3_3_removexattr },
+ [GF_FOP_OPENDIR] = { "OPENDIR", client3_3_opendir },
+ [GF_FOP_FSYNCDIR] = { "FSYNCDIR", client3_3_fsyncdir },
+ [GF_FOP_ACCESS] = { "ACCESS", client3_3_access },
+ [GF_FOP_CREATE] = { "CREATE", client3_3_create },
+ [GF_FOP_FTRUNCATE] = { "FTRUNCATE", client3_3_ftruncate },
+ [GF_FOP_FSTAT] = { "FSTAT", client3_3_fstat },
+ [GF_FOP_LK] = { "LK", client3_3_lk },
+ [GF_FOP_LOOKUP] = { "LOOKUP", client3_3_lookup },
+ [GF_FOP_READDIR] = { "READDIR", client3_3_readdir },
+ [GF_FOP_INODELK] = { "INODELK", client3_3_inodelk },
+ [GF_FOP_FINODELK] = { "FINODELK", client3_3_finodelk },
+ [GF_FOP_ENTRYLK] = { "ENTRYLK", client3_3_entrylk },
+ [GF_FOP_FENTRYLK] = { "FENTRYLK", client3_3_fentrylk },
+ [GF_FOP_XATTROP] = { "XATTROP", client3_3_xattrop },
+ [GF_FOP_FXATTROP] = { "FXATTROP", client3_3_fxattrop },
+ [GF_FOP_FGETXATTR] = { "FGETXATTR", client3_3_fgetxattr },
+ [GF_FOP_FSETXATTR] = { "FSETXATTR", client3_3_fsetxattr },
+ [GF_FOP_RCHECKSUM] = { "RCHECKSUM", client3_3_rchecksum },
+ [GF_FOP_SETATTR] = { "SETATTR", client3_3_setattr },
+ [GF_FOP_FSETATTR] = { "FSETATTR", client3_3_fsetattr },
+ [GF_FOP_READDIRP] = { "READDIRP", client3_3_readdirp },
+ [GF_FOP_FALLOCATE] = { "FALLOCATE", client3_3_fallocate },
+ [GF_FOP_DISCARD] = { "DISCARD", client3_3_discard },
+ [GF_FOP_ZEROFILL] = { "ZEROFILL", client3_3_zerofill},
+ [GF_FOP_RELEASE] = { "RELEASE", client3_3_release },
+ [GF_FOP_RELEASEDIR] = { "RELEASEDIR", client3_3_releasedir },
+ [GF_FOP_GETSPEC] = { "GETSPEC", client3_getspec },
+ [GF_FOP_FREMOVEXATTR] = { "FREMOVEXATTR", client3_3_fremovexattr },
+};
+
+/* Used From RPC-CLNT library to log proper name of procedure based on number */
+char *clnt3_3_fop_names[GFS3_OP_MAXVALUE] = {
+ [GFS3_OP_NULL] = "NULL",
+ [GFS3_OP_STAT] = "STAT",
+ [GFS3_OP_READLINK] = "READLINK",
+ [GFS3_OP_MKNOD] = "MKNOD",
+ [GFS3_OP_MKDIR] = "MKDIR",
+ [GFS3_OP_UNLINK] = "UNLINK",
+ [GFS3_OP_RMDIR] = "RMDIR",
+ [GFS3_OP_SYMLINK] = "SYMLINK",
+ [GFS3_OP_RENAME] = "RENAME",
+ [GFS3_OP_LINK] = "LINK",
+ [GFS3_OP_TRUNCATE] = "TRUNCATE",
+ [GFS3_OP_OPEN] = "OPEN",
+ [GFS3_OP_READ] = "READ",
+ [GFS3_OP_WRITE] = "WRITE",
+ [GFS3_OP_STATFS] = "STATFS",
+ [GFS3_OP_FLUSH] = "FLUSH",
+ [GFS3_OP_FSYNC] = "FSYNC",
+ [GFS3_OP_SETXATTR] = "SETXATTR",
+ [GFS3_OP_GETXATTR] = "GETXATTR",
+ [GFS3_OP_REMOVEXATTR] = "REMOVEXATTR",
+ [GFS3_OP_OPENDIR] = "OPENDIR",
+ [GFS3_OP_FSYNCDIR] = "FSYNCDIR",
+ [GFS3_OP_ACCESS] = "ACCESS",
+ [GFS3_OP_CREATE] = "CREATE",
+ [GFS3_OP_FTRUNCATE] = "FTRUNCATE",
+ [GFS3_OP_FSTAT] = "FSTAT",
+ [GFS3_OP_LK] = "LK",
+ [GFS3_OP_LOOKUP] = "LOOKUP",
+ [GFS3_OP_READDIR] = "READDIR",
+ [GFS3_OP_INODELK] = "INODELK",
+ [GFS3_OP_FINODELK] = "FINODELK",
+ [GFS3_OP_ENTRYLK] = "ENTRYLK",
+ [GFS3_OP_FENTRYLK] = "FENTRYLK",
+ [GFS3_OP_XATTROP] = "XATTROP",
+ [GFS3_OP_FXATTROP] = "FXATTROP",
+ [GFS3_OP_FGETXATTR] = "FGETXATTR",
+ [GFS3_OP_FSETXATTR] = "FSETXATTR",
+ [GFS3_OP_RCHECKSUM] = "RCHECKSUM",
+ [GFS3_OP_SETATTR] = "SETATTR",
+ [GFS3_OP_FSETATTR] = "FSETATTR",
+ [GFS3_OP_READDIRP] = "READDIRP",
+ [GFS3_OP_RELEASE] = "RELEASE",
+ [GFS3_OP_RELEASEDIR] = "RELEASEDIR",
+ [GFS3_OP_FREMOVEXATTR] = "FREMOVEXATTR",
+ [GFS3_OP_FALLOCATE] = "FALLOCATE",
+ [GFS3_OP_DISCARD] = "DISCARD",
+ [GFS3_OP_ZEROFILL] = "ZEROFILL",
+
+};
+
+rpc_clnt_prog_t clnt3_3_fop_prog = {
+ .progname = "GlusterFS 3.3",
+ .prognum = GLUSTER_FOP_PROGRAM,
+ .progver = GLUSTER_FOP_VERSION,
+ .numproc = GLUSTER_FOP_PROCCNT,
+ .proctable = clnt3_3_fop_actors,
+ .procnames = clnt3_3_fop_names,
+};
diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c
new file mode 100644
index 000000000..1f7d13ea4
--- /dev/null
+++ b/xlators/protocol/client/src/client.c
@@ -0,0 +1,2867 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "client.h"
+#include "xlator.h"
+#include "defaults.h"
+#include "glusterfs.h"
+#include "statedump.h"
+#include "compat-errno.h"
+
+#include "glusterfs3.h"
+
+extern rpc_clnt_prog_t clnt_handshake_prog;
+extern rpc_clnt_prog_t clnt_dump_prog;
+extern struct rpcclnt_cb_program gluster_cbk_prog;
+
+int client_handshake (xlator_t *this, struct rpc_clnt *rpc);
+void client_start_ping (void *data);
+int client_init_rpc (xlator_t *this);
+int client_destroy_rpc (xlator_t *this);
+int client_mark_fd_bad (xlator_t *this);
+
+int32_t
+client_type_to_gf_type (short l_type)
+{
+ int32_t gf_type = GF_LK_EOL;
+
+ switch (l_type) {
+ case F_RDLCK:
+ gf_type = GF_LK_F_RDLCK;
+ break;
+ case F_WRLCK:
+ gf_type = GF_LK_F_WRLCK;
+ break;
+ case F_UNLCK:
+ gf_type = GF_LK_F_UNLCK;
+ break;
+ }
+
+ return gf_type;
+}
+
+uint32_t
+client_get_lk_ver (clnt_conf_t *conf)
+{
+ uint32_t lk_ver = 0;
+
+ GF_VALIDATE_OR_GOTO ("client", conf, out);
+
+ pthread_mutex_lock (&conf->lock);
+ {
+ lk_ver = conf->lk_version;
+ }
+ pthread_mutex_unlock (&conf->lock);
+out:
+ return lk_ver;
+}
+
+void
+client_grace_timeout (void *data)
+{
+ int ver = 0;
+ xlator_t *this = NULL;
+ struct clnt_conf *conf = NULL;
+ struct rpc_clnt *rpc = NULL;
+
+ GF_VALIDATE_OR_GOTO ("client", data, out);
+
+ this = THIS;
+
+ rpc = (struct rpc_clnt *) data;
+
+ conf = (struct clnt_conf *) this->private;
+
+ pthread_mutex_lock (&conf->lock);
+ {
+ ver = ++conf->lk_version;
+ /* ver == 0 is a special value used by server
+ to notify client that this is a fresh connect.*/
+ if (ver == 0)
+ ver = ++conf->lk_version;
+
+ gf_timer_call_cancel (this->ctx, conf->grace_timer);
+ conf->grace_timer = NULL;
+ }
+ pthread_mutex_unlock (&conf->lock);
+
+ gf_log (this->name, GF_LOG_WARNING,
+ "client grace timer expired, updating "
+ "the lk-version to %d", ver);
+
+ client_mark_fd_bad (this);
+out:
+ return;
+}
+
+int32_t
+client_register_grace_timer (xlator_t *this, clnt_conf_t *conf)
+{
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("client", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, conf, out);
+
+ pthread_mutex_lock (&conf->lock);
+ {
+ if (conf->grace_timer || !conf->grace_timer_needed) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "Client grace timer is already set "
+ "or a grace-timer has already time out, "
+ "not registering a new timer");
+ } else {
+ gf_log (this->name, GF_LOG_INFO,
+ "Registering a grace timer");
+
+ conf->grace_timer_needed = _gf_false;
+
+ conf->grace_timer =
+ gf_timer_call_after (this->ctx,
+ conf->grace_ts,
+ client_grace_timeout,
+ conf->rpc);
+ }
+ }
+ pthread_mutex_unlock (&conf->lock);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+client_submit_request (xlator_t *this, void *req, call_frame_t *frame,
+ rpc_clnt_prog_t *prog, int procnum, fop_cbk_fn_t cbkfn,
+ struct iobref *iobref, struct iovec *rsphdr,
+ int rsphdr_count, struct iovec *rsp_payload,
+ int rsp_payload_count, struct iobref *rsp_iobref,
+ xdrproc_t xdrproc)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ struct iovec iov = {0, };
+ struct iobuf *iobuf = NULL;
+ int count = 0;
+ char start_ping = 0;
+ struct iobref *new_iobref = NULL;
+ ssize_t xdr_size = 0;
+ struct rpc_req rpcreq = {0, };
+
+ GF_VALIDATE_OR_GOTO ("client", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, prog, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+
+ conf = this->private;
+
+ /* If 'setvolume' is not successful, we should not send frames to
+ server, mean time we should be able to send 'DUMP' and 'SETVOLUME'
+ call itself even if its not connected */
+ if (!(conf->connected ||
+ ((prog->prognum == GLUSTER_DUMP_PROGRAM) ||
+ (prog->prognum == GLUSTER_PMAP_PROGRAM) ||
+ ((prog->prognum == GLUSTER_HNDSK_PROGRAM) &&
+ (procnum == GF_HNDSK_SETVOLUME))))) {
+ /* This particular error captured/logged in
+ functions calling this */
+ gf_log (this->name, GF_LOG_DEBUG,
+ "connection in disconnected state");
+ goto out;
+ }
+
+ if (req && xdrproc) {
+ xdr_size = xdr_sizeof (xdrproc, req);
+ iobuf = iobuf_get2 (this->ctx->iobuf_pool, xdr_size);
+ if (!iobuf) {
+ goto out;
+ };
+
+ new_iobref = iobref_new ();
+ if (!new_iobref) {
+ goto out;
+ }
+
+ if (iobref != NULL) {
+ ret = iobref_merge (new_iobref, iobref);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "cannot merge iobref passed from caller "
+ "into new_iobref");
+ }
+ }
+
+ ret = iobref_add (new_iobref, iobuf);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "cannot add iobuf into iobref");
+ goto out;
+ }
+
+ iov.iov_base = iobuf->ptr;
+ iov.iov_len = iobuf_size (iobuf);
+
+ /* Create the xdr payload */
+ ret = xdr_serialize_generic (iov, req, xdrproc);
+ if (ret == -1) {
+ /* callingfn so that, we can get to know which xdr
+ function was called */
+ gf_log_callingfn (this->name, GF_LOG_WARNING,
+ "XDR payload creation failed");
+ goto out;
+ }
+ iov.iov_len = ret;
+ count = 1;
+ }
+
+ /* Send the msg */
+ ret = rpc_clnt_submit (conf->rpc, prog, procnum, cbkfn, &iov, count,
+ NULL, 0, new_iobref, frame, rsphdr, rsphdr_count,
+ rsp_payload, rsp_payload_count, rsp_iobref);
+
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG, "rpc_clnt_submit failed");
+ }
+
+ if (ret == 0) {
+ pthread_mutex_lock (&conf->rpc->conn.lock);
+ {
+ if (!conf->rpc->conn.ping_started) {
+ start_ping = 1;
+ }
+ }
+ pthread_mutex_unlock (&conf->rpc->conn.lock);
+ }
+
+ if (start_ping)
+ client_start_ping ((void *) this);
+
+ ret = 0;
+
+ if (new_iobref)
+ iobref_unref (new_iobref);
+
+ if (iobuf)
+ iobuf_unref (iobuf);
+
+ return ret;
+
+out:
+ rpcreq.rpc_status = -1;
+
+ cbkfn (&rpcreq, NULL, 0, frame);
+
+ if (new_iobref)
+ iobref_unref (new_iobref);
+
+ if (iobuf)
+ iobuf_unref (iobuf);
+
+ return 0;
+}
+
+
+int32_t
+client_forget (xlator_t *this, inode_t *inode)
+{
+ /* Nothing here */
+ return 0;
+}
+
+int32_t
+client_releasedir (xlator_t *this, fd_t *fd)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.fd = fd;
+
+ proc = &conf->fops->proctable[GF_FOP_RELEASEDIR];
+ if (!proc) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "rpc procedure not found for %s",
+ gf_fop_list[GF_FOP_RELEASEDIR]);
+ goto out;
+ }
+ if (proc->fn) {
+ ret = proc->fn (NULL, this, &args);
+ }
+out:
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "releasedir fop failed");
+ return 0;
+}
+
+int32_t
+client_release (xlator_t *this, fd_t *fd)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.fd = fd;
+ proc = &conf->fops->proctable[GF_FOP_RELEASE];
+ if (!proc) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "rpc procedure not found for %s",
+ gf_fop_list[GF_FOP_RELEASE]);
+ goto out;
+ }
+ if (proc->fn) {
+ ret = proc->fn (NULL, this, &args);
+ }
+out:
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "release fop failed");
+ return 0;
+}
+
+
+int32_t
+client_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.loc = loc;
+ args.xdata = xdata;
+
+ proc = &conf->fops->proctable[GF_FOP_LOOKUP];
+ if (!proc) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "rpc procedure not found for %s",
+ gf_fop_list[GF_FOP_LOOKUP]);
+ goto out;
+ }
+ if (proc->fn)
+ ret = proc->fn (frame, this, &args);
+out:
+ /* think of avoiding a missing frame */
+ if (ret)
+ STACK_UNWIND_STRICT (lookup, frame, -1, ENOTCONN,
+ NULL, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+int32_t
+client_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.loc = loc;
+ args.xdata = xdata;
+
+ proc = &conf->fops->proctable[GF_FOP_STAT];
+ if (!proc) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "rpc procedure not found for %s",
+ gf_fop_list[GF_FOP_STAT]);
+ goto out;
+ }
+ if (proc->fn)
+ ret = proc->fn (frame, this, &args);
+out:
+ if (ret)
+ STACK_UNWIND_STRICT (stat, frame, -1, ENOTCONN, NULL, NULL);
+
+ return 0;
+}
+
+
+int32_t
+client_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ off_t offset, dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.loc = loc;
+ args.offset = offset;
+ args.xdata = xdata;
+
+ proc = &conf->fops->proctable[GF_FOP_TRUNCATE];
+ if (!proc) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "rpc procedure not found for %s",
+ gf_fop_list[GF_FOP_TRUNCATE]);
+ goto out;
+ }
+ if (proc->fn)
+ ret = proc->fn (frame, this, &args);
+out:
+ if (ret)
+ STACK_UNWIND_STRICT (truncate, frame, -1, ENOTCONN, NULL, NULL, NULL);
+
+
+ return 0;
+}
+
+
+int32_t
+client_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.fd = fd;
+ args.offset = offset;
+ args.xdata = xdata;
+
+ proc = &conf->fops->proctable[GF_FOP_FTRUNCATE];
+ if (!proc) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "rpc procedure not found for %s",
+ gf_fop_list[GF_FOP_FTRUNCATE]);
+ goto out;
+ }
+ if (proc->fn)
+ ret = proc->fn (frame, this, &args);
+out:
+ if (ret)
+ STACK_UNWIND_STRICT (ftruncate, frame, -1, ENOTCONN, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+
+int32_t
+client_access (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t mask, dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.loc = loc;
+ args.mask = mask;
+ args.xdata = xdata;
+
+ proc = &conf->fops->proctable[GF_FOP_ACCESS];
+ if (!proc) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "rpc procedure not found for %s",
+ gf_fop_list[GF_FOP_ACCESS]);
+ goto out;
+ }
+ if (proc->fn)
+ ret = proc->fn (frame, this, &args);
+out:
+ if (ret)
+ STACK_UNWIND_STRICT (access, frame, -1, ENOTCONN, NULL);
+
+ return 0;
+}
+
+
+
+
+int32_t
+client_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ size_t size, dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.loc = loc;
+ args.size = size;
+ args.xdata = xdata;
+
+ proc = &conf->fops->proctable[GF_FOP_READLINK];
+ if (!proc) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "rpc procedure not found for %s",
+ gf_fop_list[GF_FOP_READLINK]);
+ goto out;
+ }
+ if (proc->fn)
+ ret = proc->fn (frame, this, &args);
+out:
+ if (ret)
+ STACK_UNWIND_STRICT (readlink, frame, -1, ENOTCONN, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+int
+client_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.loc = loc;
+ args.mode = mode;
+ args.rdev = rdev;
+ args.umask = umask;
+ args.xdata = xdata;
+
+ proc = &conf->fops->proctable[GF_FOP_MKNOD];
+ if (!proc) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "rpc procedure not found for %s",
+ gf_fop_list[GF_FOP_MKNOD]);
+ goto out;
+ }
+ if (proc->fn)
+ ret = proc->fn (frame, this, &args);
+out:
+ if (ret)
+ STACK_UNWIND_STRICT (mknod, frame, -1, ENOTCONN,
+ NULL, NULL, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+int
+client_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ mode_t mode, mode_t umask, dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.loc = loc;
+ args.mode = mode;
+ args.umask = umask;
+ args.xdata = xdata;
+
+ proc = &conf->fops->proctable[GF_FOP_MKDIR];
+ if (!proc) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "rpc procedure not found for %s",
+ gf_fop_list[GF_FOP_MKDIR]);
+ goto out;
+ }
+ if (proc->fn)
+ ret = proc->fn (frame, this, &args);
+out:
+ if (ret)
+ STACK_UNWIND_STRICT (mkdir, frame, -1, ENOTCONN,
+ NULL, NULL, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+
+int32_t
+client_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int xflag, dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.loc = loc;
+ args.xdata = xdata;
+ args.flags = xflag;
+
+ proc = &conf->fops->proctable[GF_FOP_UNLINK];
+ if (!proc) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "rpc procedure not found for %s",
+ gf_fop_list[GF_FOP_UNLINK]);
+ goto out;
+ }
+ if (proc->fn)
+ ret = proc->fn (frame, this, &args);
+out:
+ if (ret)
+ STACK_UNWIND_STRICT (unlink, frame, -1, ENOTCONN,
+ NULL, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+client_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.loc = loc;
+ args.flags = flags;
+ args.xdata = xdata;
+
+ proc = &conf->fops->proctable[GF_FOP_RMDIR];
+ if (!proc) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "rpc procedure not found for %s",
+ gf_fop_list[GF_FOP_RMDIR]);
+ goto out;
+ }
+ if (proc->fn)
+ ret = proc->fn (frame, this, &args);
+out:
+ /* think of avoiding a missing frame */
+ if (ret)
+ STACK_UNWIND_STRICT (rmdir, frame, -1, ENOTCONN,
+ NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+int
+client_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.linkname = linkpath;
+ args.loc = loc;
+ args.umask = umask;
+ args.xdata = xdata;
+
+ proc = &conf->fops->proctable[GF_FOP_SYMLINK];
+ if (!proc) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "rpc procedure not found for %s",
+ gf_fop_list[GF_FOP_SYMLINK]);
+ goto out;
+ }
+ if (proc->fn)
+ ret = proc->fn (frame, this, &args);
+out:
+ if (ret)
+ STACK_UNWIND_STRICT (symlink, frame, -1, ENOTCONN,
+ NULL, NULL, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+
+int32_t
+client_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.oldloc = oldloc;
+ args.newloc = newloc;
+ args.xdata = xdata;
+
+ proc = &conf->fops->proctable[GF_FOP_RENAME];
+ if (!proc) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "rpc procedure not found for %s",
+ gf_fop_list[GF_FOP_RENAME]);
+ goto out;
+ }
+ if (proc->fn)
+ ret = proc->fn (frame, this, &args);
+out:
+ if (ret)
+ STACK_UNWIND_STRICT (rename, frame, -1, ENOTCONN,
+ NULL, NULL, NULL, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+
+int32_t
+client_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.oldloc = oldloc;
+ args.newloc = newloc;
+ args.xdata = xdata;
+
+ proc = &conf->fops->proctable[GF_FOP_LINK];
+ if (!proc) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "rpc procedure not found for %s",
+ gf_fop_list[GF_FOP_LINK]);
+ goto out;
+ }
+ if (proc->fn)
+ ret = proc->fn (frame, this, &args);
+out:
+ if (ret)
+ STACK_UNWIND_STRICT (link, frame, -1, ENOTCONN,
+ NULL, NULL, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+
+int32_t
+client_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.loc = loc;
+ args.mode = mode;
+ args.fd = fd;
+ args.umask = umask;
+ args.xdata = xdata;
+
+ if (!conf->filter_o_direct)
+ args.flags = flags;
+ else
+ args.flags = (flags & ~O_DIRECT);
+
+ proc = &conf->fops->proctable[GF_FOP_CREATE];
+ if (!proc) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "rpc procedure not found for %s",
+ gf_fop_list[GF_FOP_CREATE]);
+ goto out;
+ }
+ if (proc->fn)
+ ret = proc->fn (frame, this, &args);
+out:
+ if (ret)
+ STACK_UNWIND_STRICT (create, frame, -1, ENOTCONN,
+ NULL, NULL, NULL, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+
+int32_t
+client_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flags, fd_t *fd, dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.loc = loc;
+ args.fd = fd;
+ args.xdata = xdata;
+
+ if (!conf->filter_o_direct)
+ args.flags = flags;
+ else
+ args.flags = (flags & ~O_DIRECT);
+
+ proc = &conf->fops->proctable[GF_FOP_OPEN];
+ if (!proc) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "rpc procedure not found for %s",
+ gf_fop_list[GF_FOP_OPEN]);
+ goto out;
+ }
+ if (proc->fn)
+ ret = proc->fn (frame, this, &args);
+
+out:
+ if (ret)
+ STACK_UNWIND_STRICT (open, frame, -1, ENOTCONN, NULL, NULL);
+
+ return 0;
+}
+
+
+
+int32_t
+client_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.fd = fd;
+ args.size = size;
+ args.offset = offset;
+ args.flags = flags;
+ args.xdata = xdata;
+
+ proc = &conf->fops->proctable[GF_FOP_READ];
+ if (!proc) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "rpc procedure not found for %s",
+ gf_fop_list[GF_FOP_READ]);
+ goto out;
+ }
+ if (proc->fn)
+ ret = proc->fn (frame, this, &args);
+
+out:
+ if (ret)
+ STACK_UNWIND_STRICT (readv, frame, -1, ENOTCONN,
+ NULL, 0, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+
+
+int32_t
+client_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t off,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.fd = fd;
+ args.vector = vector;
+ args.count = count;
+ args.offset = off;
+ args.flags = flags;
+ args.iobref = iobref;
+ args.xdata = xdata;
+
+ proc = &conf->fops->proctable[GF_FOP_WRITE];
+ if (!proc) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "rpc procedure not found for %s",
+ gf_fop_list[GF_FOP_WRITE]);
+ goto out;
+ }
+ if (proc->fn)
+ ret = proc->fn (frame, this, &args);
+out:
+ if (ret)
+ STACK_UNWIND_STRICT (writev, frame, -1, ENOTCONN, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+int32_t
+client_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.fd = fd;
+ args.xdata = xdata;
+
+ proc = &conf->fops->proctable[GF_FOP_FLUSH];
+ if (!proc) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "rpc procedure not found for %s",
+ gf_fop_list[GF_FOP_FLUSH]);
+ goto out;
+ }
+ if (proc->fn)
+ ret = proc->fn (frame, this, &args);
+out:
+ if (ret)
+ STACK_UNWIND_STRICT (flush, frame, -1, ENOTCONN, NULL);
+
+ return 0;
+}
+
+
+
+int32_t
+client_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t flags, dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.fd = fd;
+ args.flags = flags;
+ args.xdata = xdata;
+
+ proc = &conf->fops->proctable[GF_FOP_FSYNC];
+ if (!proc) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "rpc procedure not found for %s",
+ gf_fop_list[GF_FOP_FSYNC]);
+ goto out;
+ }
+ if (proc->fn)
+ ret = proc->fn (frame, this, &args);
+out:
+ if (ret)
+ STACK_UNWIND_STRICT (fsync, frame, -1, ENOTCONN, NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+
+int32_t
+client_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.fd = fd;
+ args.xdata = xdata;
+
+ proc = &conf->fops->proctable[GF_FOP_FSTAT];
+ if (!proc) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "rpc procedure not found for %s",
+ gf_fop_list[GF_FOP_FSTAT]);
+ goto out;
+ }
+ if (proc->fn)
+ ret = proc->fn (frame, this, &args);
+out:
+ if (ret)
+ STACK_UNWIND_STRICT (fstat, frame, -1, ENOTCONN, NULL, NULL);
+
+ return 0;
+}
+
+
+
+int32_t
+client_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.loc = loc;
+ args.fd = fd;
+ args.xdata = xdata;
+
+ proc = &conf->fops->proctable[GF_FOP_OPENDIR];
+ if (!proc) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "rpc procedure not found for %s",
+ gf_fop_list[GF_FOP_OPENDIR]);
+ goto out;
+ }
+ if (proc->fn)
+ ret = proc->fn (frame, this, &args);
+out:
+ if (ret)
+ STACK_UNWIND_STRICT (opendir, frame, -1, ENOTCONN, NULL, NULL);
+
+ return 0;
+}
+
+
+
+int32_t
+client_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.fd = fd;
+ args.flags = flags;
+ args.xdata = xdata;
+
+ proc = &conf->fops->proctable[GF_FOP_FSYNCDIR];
+ if (!proc) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "rpc procedure not found for %s",
+ gf_fop_list[GF_FOP_FSYNCDIR]);
+ goto out;
+ }
+ if (proc->fn)
+ ret = proc->fn (frame, this, &args);
+out:
+ if (ret)
+ STACK_UNWIND_STRICT (fsyncdir, frame, -1, ENOTCONN, NULL);
+
+ return 0;
+}
+
+
+
+int32_t
+client_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.loc = loc;
+ args.xdata = xdata;
+
+ proc = &conf->fops->proctable[GF_FOP_STATFS];
+ if (!proc) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "rpc procedure not found for %s",
+ gf_fop_list[GF_FOP_STATFS]);
+ goto out;
+ }
+ if (proc->fn)
+ ret = proc->fn (frame, this, &args);
+out:
+ if (ret)
+ STACK_UNWIND_STRICT (statfs, frame, -1, ENOTCONN, NULL, NULL);
+
+ return 0;
+}
+
+static gf_boolean_t
+is_client_rpc_init_command (dict_t *dict, xlator_t *this,
+ char **value)
+{
+ gf_boolean_t ret = _gf_false;
+ int dict_ret = -1;
+
+ if (!strstr (this->name, "replace-brick")) {
+ gf_log (this->name, GF_LOG_TRACE, "name is !replace-brick");
+ goto out;
+ }
+ dict_ret = dict_get_str (dict, CLIENT_CMD_CONNECT, value);
+ if (dict_ret) {
+ gf_log (this->name, GF_LOG_TRACE, "key %s not present",
+ CLIENT_CMD_CONNECT);
+ goto out;
+ }
+
+ ret = _gf_true;
+
+out:
+ return ret;
+
+}
+
+static gf_boolean_t
+is_client_rpc_destroy_command (dict_t *dict, xlator_t *this)
+{
+ gf_boolean_t ret = _gf_false;
+ int dict_ret = -1;
+ char *dummy = NULL;
+
+ if (strncmp (this->name, "replace-brick", 13)) {
+ gf_log (this->name, GF_LOG_TRACE, "name is !replace-brick");
+ goto out;
+ }
+
+ dict_ret = dict_get_str (dict, CLIENT_CMD_DISCONNECT, &dummy);
+ if (dict_ret) {
+ gf_log (this->name, GF_LOG_TRACE, "key %s not present",
+ CLIENT_CMD_DISCONNECT);
+ goto out;
+ }
+
+ ret = _gf_true;
+
+out:
+ return ret;
+
+}
+
+static gf_boolean_t
+client_set_remote_options (char *value, xlator_t *this)
+{
+ char *dup_value = NULL;
+ char *host = NULL;
+ char *subvol = NULL;
+ char *host_dup = NULL;
+ char *subvol_dup = NULL;
+ char *remote_port_str = NULL;
+ char *tmp = NULL;
+ int remote_port = 0;
+ gf_boolean_t ret = _gf_false;
+
+ dup_value = gf_strdup (value);
+ host = strtok_r (dup_value, ":", &tmp);
+ subvol = strtok_r (NULL, ":", &tmp);
+ remote_port_str = strtok_r (NULL, ":", &tmp);
+
+ if (!subvol) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "proper value not passed as subvolume");
+ goto out;
+ }
+
+ host_dup = gf_strdup (host);
+ if (!host_dup) {
+ goto out;
+ }
+
+ ret = dict_set_dynstr (this->options, "remote-host", host_dup);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set remote-host with %s", host);
+ goto out;
+ }
+
+ subvol_dup = gf_strdup (subvol);
+ if (!subvol_dup) {
+ goto out;
+ }
+
+ ret = dict_set_dynstr (this->options, "remote-subvolume", subvol_dup);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set remote-host with %s", host);
+ goto out;
+ }
+
+ remote_port = atoi (remote_port_str);
+ GF_ASSERT (remote_port);
+
+ ret = dict_set_int32 (this->options, "remote-port",
+ remote_port);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to set remote-port to %d", remote_port);
+ goto out;
+ }
+
+ ret = _gf_true;
+out:
+ GF_FREE (dup_value);
+
+ return ret;
+}
+
+
+int32_t
+client_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ int ret = -1;
+ int op_ret = -1;
+ int op_errno = ENOTCONN;
+ int need_unwind = 0;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+ char *value = NULL;
+
+
+ if (is_client_rpc_init_command (dict, this, &value) == _gf_true) {
+ GF_ASSERT (value);
+ gf_log (this->name, GF_LOG_INFO, "client rpc init command");
+ ret = client_set_remote_options (value, this);
+ if (ret) {
+ (void) client_destroy_rpc (this);
+ ret = client_init_rpc (this);
+ }
+
+ if (!ret) {
+ op_ret = 0;
+ op_errno = 0;
+ }
+ need_unwind = 1;
+ goto out;
+ }
+
+ if (is_client_rpc_destroy_command (dict, this) == _gf_true) {
+ gf_log (this->name, GF_LOG_INFO, "client rpc destroy command");
+ ret = client_destroy_rpc (this);
+ if (ret) {
+ op_ret = 0;
+ op_errno = 0;
+ }
+ need_unwind = 1;
+ goto out;
+ }
+
+ conf = this->private;
+ if (!conf || !conf->fops) {
+ op_errno = ENOTCONN;
+ need_unwind = 1;
+ goto out;
+ }
+
+ args.loc = loc;
+ args.xattr = dict;
+ args.flags = flags;
+ args.xdata = xdata;
+
+ proc = &conf->fops->proctable[GF_FOP_SETXATTR];
+ if (!proc) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "rpc procedure not found for %s",
+ gf_fop_list[GF_FOP_SETXATTR]);
+ goto out;
+ }
+ if (proc->fn) {
+ ret = proc->fn (frame, this, &args);
+ if (ret) {
+ need_unwind = 1;
+ }
+ }
+out:
+ if (need_unwind)
+ STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, NULL);
+
+ return 0;
+}
+
+
+
+int32_t
+client_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.fd = fd;
+ args.xattr = dict;
+ args.flags = flags;
+ args.xdata = xdata;
+
+ proc = &conf->fops->proctable[GF_FOP_FSETXATTR];
+ if (!proc) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "rpc procedure not found for %s",
+ gf_fop_list[GF_FOP_FSETXATTR]);
+ goto out;
+ }
+ if (proc->fn)
+ ret = proc->fn (frame, this, &args);
+out:
+ if (ret)
+ STACK_UNWIND_STRICT (fsetxattr, frame, -1, ENOTCONN, NULL);
+
+ return 0;
+}
+
+
+
+
+int32_t
+client_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.fd = fd;
+ args.name = name;
+ args.xdata = xdata;
+
+ proc = &conf->fops->proctable[GF_FOP_FGETXATTR];
+ if (!proc) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "rpc procedure not found for %s",
+ gf_fop_list[GF_FOP_FGETXATTR]);
+ goto out;
+ }
+ if (proc->fn)
+ ret = proc->fn (frame, this, &args);
+out:
+ if (ret)
+ STACK_UNWIND_STRICT (fgetxattr, frame, -1, ENOTCONN, NULL, NULL);
+
+ return 0;
+}
+
+
+
+int32_t
+client_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.name = name;
+ args.loc = loc;
+ args.xdata = xdata;
+
+ proc = &conf->fops->proctable[GF_FOP_GETXATTR];
+ if (!proc) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "rpc procedure not found for %s",
+ gf_fop_list[GF_FOP_GETXATTR]);
+ goto out;
+ }
+ if (proc->fn)
+ ret = proc->fn (frame, this, &args);
+out:
+ if (ret)
+ STACK_UNWIND_STRICT (getxattr, frame, -1, ENOTCONN, NULL, NULL);
+
+ return 0;
+}
+
+
+
+int32_t
+client_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.loc = loc;
+ args.flags = flags;
+ args.xattr = dict;
+ args.xdata = xdata;
+
+ proc = &conf->fops->proctable[GF_FOP_XATTROP];
+ if (!proc) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "rpc procedure not found for %s",
+ gf_fop_list[GF_FOP_XATTROP]);
+ goto out;
+ }
+ if (proc->fn)
+ ret = proc->fn (frame, this, &args);
+out:
+ if (ret)
+ STACK_UNWIND_STRICT (xattrop, frame, -1, ENOTCONN, NULL, NULL);
+
+ return 0;
+}
+
+
+
+int32_t
+client_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.fd = fd;
+ args.flags = flags;
+ args.xattr = dict;
+ args.xdata = xdata;
+
+ proc = &conf->fops->proctable[GF_FOP_FXATTROP];
+ if (!proc) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "rpc procedure not found for %s",
+ gf_fop_list[GF_FOP_FXATTROP]);
+ goto out;
+ }
+ if (proc->fn)
+ ret = proc->fn (frame, this, &args);
+out:
+ if (ret)
+ STACK_UNWIND_STRICT (fxattrop, frame, -1, ENOTCONN, NULL, NULL);
+
+ return 0;
+}
+
+
+
+int32_t
+client_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.name = name;
+ args.loc = loc;
+ args.xdata = xdata;
+
+ proc = &conf->fops->proctable[GF_FOP_REMOVEXATTR];
+ if (!proc) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "rpc procedure not found for %s",
+ gf_fop_list[GF_FOP_REMOVEXATTR]);
+ goto out;
+ }
+ if (proc->fn)
+ ret = proc->fn (frame, this, &args);
+out:
+ if (ret)
+ STACK_UNWIND_STRICT (removexattr, frame, -1, ENOTCONN, NULL);
+
+ return 0;
+}
+
+int32_t
+client_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.name = name;
+ args.fd = fd;
+ args.xdata = xdata;
+
+ proc = &conf->fops->proctable[GF_FOP_FREMOVEXATTR];
+ if (!proc) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "rpc procedure not found for %s",
+ gf_fop_list[GF_FOP_FREMOVEXATTR]);
+ goto out;
+ }
+ if (proc->fn)
+ ret = proc->fn (frame, this, &args);
+out:
+ if (ret)
+ STACK_UNWIND_STRICT (fremovexattr, frame, -1, ENOTCONN, NULL);
+
+ return 0;
+}
+
+int32_t
+client_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.fd = fd;
+ args.cmd = cmd;
+ args.flock = lock;
+ args.xdata = xdata;
+
+ proc = &conf->fops->proctable[GF_FOP_LK];
+ if (!proc) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "rpc procedure not found for %s",
+ gf_fop_list[GF_FOP_LK]);
+ goto out;
+ }
+ if (proc->fn)
+ ret = proc->fn (frame, this, &args);
+out:
+ if (ret)
+ STACK_UNWIND_STRICT (lk, frame, -1, ENOTCONN, NULL, NULL);
+
+ return 0;
+}
+
+
+int32_t
+client_inodelk (call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.loc = loc;
+ args.cmd = cmd;
+ args.flock = lock;
+ args.volume = volume;
+ args.xdata = xdata;
+
+ proc = &conf->fops->proctable[GF_FOP_INODELK];
+ if (!proc) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "rpc procedure not found for %s",
+ gf_fop_list[GF_FOP_INODELK]);
+ goto out;
+ }
+ if (proc->fn)
+ ret = proc->fn (frame, this, &args);
+out:
+ if (ret)
+ STACK_UNWIND_STRICT (inodelk, frame, -1, ENOTCONN, NULL);
+
+ return 0;
+}
+
+
+
+int32_t
+client_finodelk (call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.fd = fd;
+ args.cmd = cmd;
+ args.flock = lock;
+ args.volume = volume;
+ args.xdata = xdata;
+
+ proc = &conf->fops->proctable[GF_FOP_FINODELK];
+ if (!proc) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "rpc procedure not found for %s",
+ gf_fop_list[GF_FOP_FINODELK]);
+ goto out;
+ }
+ if (proc->fn)
+ ret = proc->fn (frame, this, &args);
+out:
+ if (ret)
+ STACK_UNWIND_STRICT (finodelk, frame, -1, ENOTCONN, NULL);
+
+ return 0;
+}
+
+
+int32_t
+client_entrylk (call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.loc = loc;
+ args.basename = basename;
+ args.type = type;
+ args.volume = volume;
+ args.cmd_entrylk = cmd;
+ args.xdata = xdata;
+
+ proc = &conf->fops->proctable[GF_FOP_ENTRYLK];
+ if (!proc) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "rpc procedure not found for %s",
+ gf_fop_list[GF_FOP_ENTRYLK]);
+ goto out;
+ }
+ if (proc->fn)
+ ret = proc->fn (frame, this, &args);
+out:
+ if (ret)
+ STACK_UNWIND_STRICT (entrylk, frame, -1, ENOTCONN, NULL);
+
+ return 0;
+}
+
+
+
+int32_t
+client_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.fd = fd;
+ args.basename = basename;
+ args.type = type;
+ args.volume = volume;
+ args.cmd_entrylk = cmd;
+ args.xdata = xdata;
+
+ proc = &conf->fops->proctable[GF_FOP_FENTRYLK];
+ if (!proc) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "rpc procedure not found for %s",
+ gf_fop_list[GF_FOP_FENTRYLK]);
+ goto out;
+ }
+ if (proc->fn)
+ ret = proc->fn (frame, this, &args);
+out:
+ if (ret)
+ STACK_UNWIND_STRICT (fentrylk, frame, -1, ENOTCONN, NULL);
+
+ return 0;
+}
+
+
+int32_t
+client_rchecksum (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ int32_t len, dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.fd = fd;
+ args.offset = offset;
+ args.len = len;
+ args.xdata = xdata;
+
+ proc = &conf->fops->proctable[GF_FOP_RCHECKSUM];
+ if (!proc) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "rpc procedure not found for %s",
+ gf_fop_list[GF_FOP_RCHECKSUM]);
+ goto out;
+ }
+ if (proc->fn)
+ ret = proc->fn (frame, this, &args);
+out:
+ if (ret)
+ STACK_UNWIND_STRICT (rchecksum, frame, -1, ENOTCONN, 0, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+client_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t off, dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.fd = fd;
+ args.size = size;
+ args.offset = off;
+ args.xdata = xdata;
+
+ proc = &conf->fops->proctable[GF_FOP_READDIR];
+ if (!proc) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "rpc procedure not found for %s",
+ gf_fop_list[GF_FOP_READDIR]);
+ goto out;
+ }
+ if (proc->fn)
+ ret = proc->fn (frame, this, &args);
+out:
+ if (ret)
+ STACK_UNWIND_STRICT (readdir, frame, -1, ENOTCONN, NULL, NULL);
+
+ return 0;
+}
+
+
+int32_t
+client_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t off, dict_t *dict)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.fd = fd;
+ args.size = size;
+ args.offset = off;
+ args.xdata = dict;
+
+ proc = &conf->fops->proctable[GF_FOP_READDIRP];
+ if (!proc) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "rpc procedure not found for %s",
+ gf_fop_list[GF_FOP_READDIRP]);
+ goto out;
+ }
+ if (proc->fn)
+ ret = proc->fn (frame, this, &args);
+out:
+ if (ret)
+ STACK_UNWIND_STRICT (readdirp, frame, -1, ENOTCONN, NULL, NULL);
+
+ return 0;
+}
+
+
+int32_t
+client_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.loc = loc;
+ args.stbuf = stbuf;
+ args.valid = valid;
+ args.xdata = xdata;
+
+ proc = &conf->fops->proctable[GF_FOP_SETATTR];
+ if (!proc) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "rpc procedure not found for %s",
+ gf_fop_list[GF_FOP_SETATTR]);
+ goto out;
+ }
+ if (proc->fn)
+ ret = proc->fn (frame, this, &args);
+out:
+ if (ret)
+ STACK_UNWIND_STRICT (setattr, frame, -1, ENOTCONN, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+client_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.fd = fd;
+ args.stbuf = stbuf;
+ args.valid = valid;
+ args.xdata = xdata;
+
+ proc = &conf->fops->proctable[GF_FOP_FSETATTR];
+ if (!proc) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "rpc procedure not found for %s",
+ gf_fop_list[GF_FOP_FSETATTR]);
+ goto out;
+ }
+ if (proc->fn)
+ ret = proc->fn (frame, this, &args);
+out:
+ if (ret)
+ STACK_UNWIND_STRICT (fsetattr, frame, -1, ENOTCONN, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+client_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.fd = fd;
+ args.flags = mode;
+ args.offset = offset;
+ args.size = len;
+ args.xdata = xdata;
+
+ proc = &conf->fops->proctable[GF_FOP_FALLOCATE];
+ if (!proc) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "rpc procedure not found for %s",
+ gf_fop_list[GF_FOP_FALLOCATE]);
+ goto out;
+ }
+ if (proc->fn)
+ ret = proc->fn (frame, this, &args);
+out:
+ if (ret)
+ STACK_UNWIND_STRICT (fallocate, frame, -1, ENOTCONN, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+client_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.fd = fd;
+ args.offset = offset;
+ args.size = len;
+ args.xdata = xdata;
+
+ proc = &conf->fops->proctable[GF_FOP_DISCARD];
+ if (!proc) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "rpc procedure not found for %s",
+ gf_fop_list[GF_FOP_DISCARD]);
+ goto out;
+ }
+ if (proc->fn)
+ ret = proc->fn (frame, this, &args);
+out:
+ if (ret)
+ STACK_UNWIND_STRICT(discard, frame, -1, ENOTCONN, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+client_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.fd = fd;
+ args.offset = offset;
+ args.size = len;
+ args.xdata = xdata;
+
+ proc = &conf->fops->proctable[GF_FOP_ZEROFILL];
+ if (!proc) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "rpc procedure not found for %s",
+ gf_fop_list[GF_FOP_ZEROFILL]);
+ goto out;
+ }
+ if (proc->fn)
+ ret = proc->fn (frame, this, &args);
+out:
+ if (ret)
+ STACK_UNWIND_STRICT(zerofill, frame, -1, ENOTCONN,
+ NULL, NULL, NULL);
+
+ return 0;
+}
+
+
+int32_t
+client_getspec (call_frame_t *frame, xlator_t *this, const char *key,
+ int32_t flags)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {0,};
+
+ conf = this->private;
+ if (!conf || !conf->fops || !conf->handshake)
+ goto out;
+
+ args.name = key;
+ args.flags = flags;
+
+ /* For all other xlators, getspec is an fop, hence its in fops table */
+ proc = &conf->fops->proctable[GF_FOP_GETSPEC];
+ if (!proc) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "rpc procedure not found for %s",
+ gf_fop_list[GF_FOP_GETSPEC]);
+ goto out;
+ }
+ if (proc->fn) {
+ /* But at protocol level, this is handshake */
+ ret = proc->fn (frame, this, &args);
+ }
+out:
+ if (ret)
+ STACK_UNWIND_STRICT (getspec, frame, -1, EINVAL, NULL);
+
+ return 0;
+}
+
+
+int
+client_mark_fd_bad (xlator_t *this)
+{
+ clnt_conf_t *conf = NULL;
+ clnt_fd_ctx_t *tmp = NULL, *fdctx = NULL;
+
+ conf = this->private;
+
+ pthread_mutex_lock (&conf->lock);
+ {
+ list_for_each_entry_safe (fdctx, tmp, &conf->saved_fds,
+ sfd_pos) {
+ fdctx->remote_fd = -1;
+ }
+ }
+ pthread_mutex_unlock (&conf->lock);
+
+ return 0;
+}
+
+
+int
+client_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
+ void *data)
+{
+ xlator_t *this = NULL;
+ char *handshake = NULL;
+ clnt_conf_t *conf = NULL;
+ int ret = 0;
+
+ this = mydata;
+ if (!this || !this->private) {
+ gf_log ("client", GF_LOG_ERROR,
+ (this != NULL) ?
+ "private structure of the xlator is NULL":
+ "xlator is NULL");
+ goto out;
+ }
+
+ conf = this->private;
+
+ switch (event) {
+ case RPC_CLNT_CONNECT:
+ {
+ conf->connected = 1;
+ // connect happened, send 'get_supported_versions' mop
+ ret = dict_get_str (this->options, "disable-handshake",
+ &handshake);
+
+ gf_log (this->name, GF_LOG_DEBUG, "got RPC_CLNT_CONNECT");
+
+ if ((ret < 0) || (strcasecmp (handshake, "on"))) {
+ ret = client_handshake (this, rpc);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "handshake msg returned %d", ret);
+ } else {
+ //conf->rpc->connected = 1;
+ if (conf->last_sent_event != GF_EVENT_CHILD_UP) {
+ ret = default_notify (this, GF_EVENT_CHILD_UP,
+ NULL);
+ if (ret)
+ gf_log (this->name, GF_LOG_INFO,
+ "CHILD_UP notify failed");
+ conf->last_sent_event = GF_EVENT_CHILD_UP;
+ }
+ }
+
+ /* Cancel grace timer if set */
+ pthread_mutex_lock (&conf->lock);
+ {
+ conf->grace_timer_needed = _gf_true;
+
+ if (conf->grace_timer) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Cancelling the grace timer");
+
+ gf_timer_call_cancel (this->ctx,
+ conf->grace_timer);
+
+ conf->grace_timer = NULL;
+ }
+ }
+ pthread_mutex_unlock (&conf->lock);
+
+ break;
+ }
+ case RPC_CLNT_DISCONNECT:
+ if (!conf->lk_heal)
+ client_mark_fd_bad (this);
+ else
+ client_register_grace_timer (this, conf);
+
+ if (!conf->skip_notify) {
+ if (conf->connected) {
+ gf_log (this->name,
+ ((!conf->disconnect_err_logged)
+ ? GF_LOG_INFO : GF_LOG_DEBUG),
+ "disconnected from %s. Client process "
+ "will keep trying to connect to "
+ "glusterd until brick's port is "
+ "available",
+ conf->rpc->conn.trans->peerinfo.identifier);
+
+ if (conf->portmap_err_logged)
+ conf->disconnect_err_logged = 1;
+ }
+
+ /* If the CHILD_DOWN event goes to parent xlator
+ multiple times, the logic of parent xlator notify
+ may get screwed up.. (eg. CHILD_MODIFIED event in
+ replicate), hence make sure events which are passed
+ to parent are genuine */
+ if (conf->last_sent_event != GF_EVENT_CHILD_DOWN) {
+ ret = default_notify (this, GF_EVENT_CHILD_DOWN,
+ NULL);
+ if (ret)
+ gf_log (this->name, GF_LOG_INFO,
+ "CHILD_DOWN notify failed");
+ conf->last_sent_event = GF_EVENT_CHILD_DOWN;
+ }
+ } else {
+ if (conf->connected)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "disconnected (skipped notify)");
+ }
+
+ conf->connected = 0;
+ conf->skip_notify = 0;
+
+ if (conf->quick_reconnect) {
+ conf->quick_reconnect = 0;
+ rpc_clnt_start (rpc);
+
+ } else {
+ rpc->conn.config.remote_port = 0;
+
+ }
+
+ break;
+
+ default:
+ gf_log (this->name, GF_LOG_TRACE,
+ "got some other RPC event %d", event);
+
+ break;
+ }
+
+out:
+ return 0;
+}
+
+
+int
+notify (xlator_t *this, int32_t event, void *data, ...)
+{
+ clnt_conf_t *conf = NULL;
+
+ conf = this->private;
+ if (!conf)
+ return 0;
+
+ switch (event) {
+ case GF_EVENT_PARENT_UP:
+ {
+ gf_log (this->name, GF_LOG_INFO,
+ "parent translators are ready, attempting connect "
+ "on transport");
+
+ rpc_clnt_start (conf->rpc);
+ break;
+ }
+
+ case GF_EVENT_PARENT_DOWN:
+ gf_log (this->name, GF_LOG_INFO,
+ "current graph is no longer active, destroying "
+ "rpc_client ");
+
+ pthread_mutex_lock (&conf->lock);
+ {
+ conf->parent_down = 1;
+ }
+ pthread_mutex_unlock (&conf->lock);
+
+ rpc_clnt_disable (conf->rpc);
+ break;
+
+ default:
+ gf_log (this->name, GF_LOG_DEBUG,
+ "got %d, calling default_notify ()", event);
+
+ default_notify (this, event, data);
+ conf->last_sent_event = event;
+ break;
+ }
+
+ return 0;
+}
+
+int
+build_client_config (xlator_t *this, clnt_conf_t *conf)
+{
+ int ret = -1;
+
+ if (!conf)
+ goto out;
+
+ GF_OPTION_INIT ("frame-timeout", conf->rpc_conf.rpc_timeout,
+ int32, out);
+
+ GF_OPTION_INIT ("remote-port", conf->rpc_conf.remote_port,
+ int32, out);
+
+ GF_OPTION_INIT ("ping-timeout", conf->opt.ping_timeout,
+ int32, out);
+
+ GF_OPTION_INIT ("remote-subvolume", conf->opt.remote_subvolume,
+ path, out);
+ if (!conf->opt.remote_subvolume)
+ gf_log (this->name, GF_LOG_WARNING,
+ "option 'remote-subvolume' not given");
+
+ GF_OPTION_INIT ("filter-O_DIRECT", conf->filter_o_direct,
+ bool, out);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_client_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
+ "failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+int
+client_destroy_rpc (xlator_t *this)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ if (conf->rpc) {
+ /* cleanup the saved-frames before last unref */
+ rpc_clnt_connection_cleanup (&conf->rpc->conn);
+
+ conf->rpc = rpc_clnt_unref (conf->rpc);
+ ret = 0;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Client rpc conn destroyed");
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_WARNING,
+ "RPC destroy called on already destroyed "
+ "connection");
+
+out:
+ return ret;
+}
+
+int
+client_init_rpc (xlator_t *this)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (conf->rpc) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "client rpc already init'ed");
+ ret = -1;
+ goto out;
+ }
+
+ conf->rpc = rpc_clnt_new (this->options, this->ctx, this->name, 0);
+ if (!conf->rpc) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to initialize RPC");
+ goto out;
+ }
+
+ ret = rpc_clnt_register_notify (conf->rpc, client_rpc_notify, this);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to register notify");
+ goto out;
+ }
+
+ conf->handshake = &clnt_handshake_prog;
+ conf->dump = &clnt_dump_prog;
+
+ ret = rpcclnt_cbk_program_register (conf->rpc, &gluster_cbk_prog,
+ this);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to register callback program");
+ goto out;
+ }
+
+ ret = 0;
+
+ gf_log (this->name, GF_LOG_DEBUG, "client init successful");
+out:
+ return ret;
+}
+
+
+int
+client_init_grace_timer (xlator_t *this, dict_t *options,
+ clnt_conf_t *conf)
+{
+ char *lk_heal = NULL;
+ int32_t ret = -1;
+ int32_t grace_timeout = -1;
+
+ GF_VALIDATE_OR_GOTO ("client", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, options, out);
+ GF_VALIDATE_OR_GOTO (this->name, conf, out);
+
+ conf->lk_heal = _gf_false;
+
+ ret = dict_get_str (options, "lk-heal", &lk_heal);
+ if (!ret)
+ gf_string2boolean (lk_heal, &conf->lk_heal);
+
+ gf_log (this->name, GF_LOG_DEBUG, "lk-heal = %s",
+ (conf->lk_heal) ? "on" : "off");
+
+ ret = dict_get_int32 (options, "grace-timeout", &grace_timeout);
+ if (!ret)
+ conf->grace_ts.tv_sec = grace_timeout;
+ else
+ conf->grace_ts.tv_sec = 10;
+
+ conf->grace_ts.tv_nsec = 0;
+
+ gf_log (this->name, GF_LOG_DEBUG, "Client grace timeout "
+ "value = %"PRIu64, conf->grace_ts.tv_sec);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ clnt_conf_t *conf = NULL;
+ int ret = -1;
+ int subvol_ret = 0;
+ char *old_remote_subvol = NULL;
+ char *new_remote_subvol = NULL;
+ char *old_remote_host = NULL;
+ char *new_remote_host = NULL;
+
+ conf = this->private;
+
+ GF_OPTION_RECONF ("frame-timeout", conf->rpc_conf.rpc_timeout,
+ options, int32, out);
+
+ GF_OPTION_RECONF ("ping-timeout", conf->opt.ping_timeout,
+ options, int32, out);
+
+ subvol_ret = dict_get_str (this->options, "remote-host",
+ &old_remote_host);
+
+ if (subvol_ret == 0) {
+ subvol_ret = dict_get_str (options, "remote-host",
+ &new_remote_host);
+ if (subvol_ret == 0) {
+ if (strcmp (old_remote_host, new_remote_host)) {
+ ret = 1;
+ goto out;
+ }
+ }
+ }
+
+ subvol_ret = dict_get_str (this->options, "remote-subvolume",
+ &old_remote_subvol);
+
+ if (subvol_ret == 0) {
+ subvol_ret = dict_get_str (options, "remote-subvolume",
+ &new_remote_subvol);
+ if (subvol_ret == 0) {
+ if (strcmp (old_remote_subvol, new_remote_subvol)) {
+ ret = 1;
+ goto out;
+ }
+ }
+ }
+
+ GF_OPTION_RECONF ("filter-O_DIRECT", conf->filter_o_direct,
+ options, bool, out);
+
+ ret = client_init_grace_timer (this, options, conf);
+ if (ret)
+ goto out;
+
+ ret = 0;
+out:
+ return ret;
+
+}
+
+
+int
+init (xlator_t *this)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+
+ if (this->children) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "FATAL: client protocol translator cannot have any "
+ "subvolumes");
+ goto out;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Volume is dangling. ");
+ }
+
+ conf = GF_CALLOC (1, sizeof (*conf), gf_client_mt_clnt_conf_t);
+ if (!conf)
+ goto out;
+
+ pthread_mutex_init (&conf->lock, NULL);
+ INIT_LIST_HEAD (&conf->saved_fds);
+
+ /* Initialize parameters for lock self healing*/
+ conf->lk_version = 1;
+ conf->grace_timer = NULL;
+ conf->grace_timer_needed = _gf_true;
+
+ ret = client_init_grace_timer (this, this->options, conf);
+ if (ret)
+ goto out;
+
+ LOCK_INIT (&conf->rec_lock);
+
+ conf->last_sent_event = -1; /* To start with we don't have any events */
+
+ this->private = conf;
+
+ /* If it returns -1, then its a failure, if it returns +1 we need
+ have to understand that 'this' is subvolume of a xlator which,
+ will set the remote host and remote subvolume in a setxattr
+ call.
+ */
+
+ ret = build_client_config (this, conf);
+ if (ret == -1)
+ goto out;
+
+ if (ret) {
+ ret = 0;
+ goto out;
+ }
+
+ this->local_pool = mem_pool_new (clnt_local_t, 64);
+ if (!this->local_pool) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
+ goto out;
+ }
+
+ ret = client_init_rpc (this);
+out:
+ if (ret)
+ this->fini (this);
+
+ return ret;
+}
+
+void
+fini (xlator_t *this)
+{
+ clnt_conf_t *conf = NULL;
+
+ conf = this->private;
+ this->private = NULL;
+
+ if (conf) {
+ if (conf->rpc) {
+ /* cleanup the saved-frames before last unref */
+ rpc_clnt_connection_cleanup (&conf->rpc->conn);
+
+ rpc_clnt_unref (conf->rpc);
+ }
+
+ /* Saved Fds */
+ /* TODO: */
+
+ pthread_mutex_destroy (&conf->lock);
+
+ GF_FREE (conf);
+ }
+ return;
+}
+
+static void
+client_fd_lk_ctx_dump (xlator_t *this, fd_lk_ctx_t *lk_ctx, int nth_fd)
+{
+ gf_boolean_t use_try_lock = _gf_true;
+ int ret = -1;
+ int lock_no = 0;
+ fd_lk_ctx_t *lk_ctx_ref = NULL;
+ fd_lk_ctx_node_t *plock = NULL;
+ char key[GF_DUMP_MAX_BUF_LEN] = {0,};
+
+ lk_ctx_ref = fd_lk_ctx_try_ref (lk_ctx);
+ if (!lk_ctx_ref)
+ return;
+
+ ret = client_fd_lk_list_empty (lk_ctx_ref, (use_try_lock = _gf_true));
+ if (ret != 0)
+ return;
+
+ ret = TRY_LOCK (&lk_ctx_ref->lock);
+ if (ret)
+ return;
+
+ gf_proc_dump_write ("------","------");
+
+ lock_no = 0;
+ list_for_each_entry (plock, &lk_ctx_ref->lk_list, next) {
+ snprintf (key, sizeof (key), "granted-posix-lock[%d]",
+ lock_no++);
+ gf_proc_dump_write (key, "owner = %s, cmd = %s "
+ "fl_type = %s, fl_start = %"
+ PRId64", fl_end = %"PRId64
+ ", user_flock: l_type = %s, "
+ "l_start = %"PRId64", l_len = %"PRId64,
+ lkowner_utoa (&plock->user_flock.l_owner),
+ get_lk_cmd (plock->cmd),
+ get_lk_type (plock->fl_type),
+ plock->fl_start, plock->fl_end,
+ get_lk_type (plock->user_flock.l_type),
+ plock->user_flock.l_start,
+ plock->user_flock.l_len);
+ }
+ gf_proc_dump_write ("------","------");
+
+ UNLOCK (&lk_ctx_ref->lock);
+ fd_lk_ctx_unref (lk_ctx_ref);
+
+}
+
+int
+client_priv_dump (xlator_t *this)
+{
+ clnt_conf_t *conf = NULL;
+ int ret = -1;
+ clnt_fd_ctx_t *tmp = NULL;
+ int i = 0;
+ char key[GF_DUMP_MAX_BUF_LEN];
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+
+ if (!this)
+ return -1;
+
+ conf = this->private;
+ if (!conf)
+ return -1;
+
+ ret = pthread_mutex_trylock(&conf->lock);
+ if (ret)
+ return -1;
+
+ gf_proc_dump_build_key(key_prefix, "xlator.protocol.client",
+ "%s.priv", this->name);
+
+ gf_proc_dump_add_section(key_prefix);
+
+ list_for_each_entry(tmp, &conf->saved_fds, sfd_pos) {
+ sprintf (key, "fd.%d.remote_fd", i);
+ gf_proc_dump_write(key, "%d", tmp->remote_fd);
+ client_fd_lk_ctx_dump (this, tmp->lk_ctx, i);
+ i++;
+ }
+
+ gf_proc_dump_write("connecting", "%d", conf->connecting);
+
+ if (conf->rpc) {
+ gf_proc_dump_write("total_bytes_read", "%"PRIu64,
+ conf->rpc->conn.trans->total_bytes_read);
+
+ gf_proc_dump_write("total_bytes_written", "%"PRIu64,
+ conf->rpc->conn.trans->total_bytes_write);
+ }
+ pthread_mutex_unlock(&conf->lock);
+
+ return 0;
+
+}
+
+int32_t
+client_inodectx_dump (xlator_t *this, inode_t *inode)
+{
+ if (!inode)
+ return -1;
+
+ if (!this)
+ return -1;
+
+ /*TODO*/
+
+ return 0;
+}
+
+
+
+
+struct xlator_cbks cbks = {
+ .forget = client_forget,
+ .release = client_release,
+ .releasedir = client_releasedir
+};
+
+struct xlator_fops fops = {
+ .stat = client_stat,
+ .readlink = client_readlink,
+ .mknod = client_mknod,
+ .mkdir = client_mkdir,
+ .unlink = client_unlink,
+ .rmdir = client_rmdir,
+ .symlink = client_symlink,
+ .rename = client_rename,
+ .link = client_link,
+ .truncate = client_truncate,
+ .open = client_open,
+ .readv = client_readv,
+ .writev = client_writev,
+ .statfs = client_statfs,
+ .flush = client_flush,
+ .fsync = client_fsync,
+ .setxattr = client_setxattr,
+ .getxattr = client_getxattr,
+ .fsetxattr = client_fsetxattr,
+ .fgetxattr = client_fgetxattr,
+ .removexattr = client_removexattr,
+ .fremovexattr = client_fremovexattr,
+ .opendir = client_opendir,
+ .readdir = client_readdir,
+ .readdirp = client_readdirp,
+ .fsyncdir = client_fsyncdir,
+ .access = client_access,
+ .ftruncate = client_ftruncate,
+ .fstat = client_fstat,
+ .create = client_create,
+ .lk = client_lk,
+ .inodelk = client_inodelk,
+ .finodelk = client_finodelk,
+ .entrylk = client_entrylk,
+ .fentrylk = client_fentrylk,
+ .lookup = client_lookup,
+ .rchecksum = client_rchecksum,
+ .xattrop = client_xattrop,
+ .fxattrop = client_fxattrop,
+ .setattr = client_setattr,
+ .fsetattr = client_fsetattr,
+ .fallocate = client_fallocate,
+ .discard = client_discard,
+ .zerofill = client_zerofill,
+ .getspec = client_getspec,
+};
+
+
+struct xlator_dumpops dumpops = {
+ .priv = client_priv_dump,
+ .inodectx = client_inodectx_dump,
+};
+
+
+struct volume_options options[] = {
+ { .key = {"username"},
+ .type = GF_OPTION_TYPE_ANY
+ },
+ { .key = {"password"},
+ .type = GF_OPTION_TYPE_ANY
+ },
+ { .key = {"transport-type"},
+ .value = {"tcp", "socket", "ib-verbs", "unix", "ib-sdp",
+ "tcp/client", "ib-verbs/client", "rdma"},
+ .type = GF_OPTION_TYPE_STR
+ },
+ { .key = {"remote-host"},
+ .type = GF_OPTION_TYPE_INTERNET_ADDRESS
+ },
+ { .key = {"remote-port"},
+ .type = GF_OPTION_TYPE_INT,
+ },
+ { .key = {"remote-subvolume"},
+ .type = GF_OPTION_TYPE_ANY
+ },
+ { .key = {"frame-timeout",
+ "rpc-timeout" },
+ .type = GF_OPTION_TYPE_TIME,
+ .min = 0,
+ .max = 86400,
+ .default_value = "1800",
+ .description = "Time frame after which the (file) operation would be "
+ "declared as dead, if the server does not respond for "
+ "a particular (file) operation."
+ },
+ { .key = {"ping-timeout"},
+ .type = GF_OPTION_TYPE_TIME,
+ .min = 1,
+ .max = 1013,
+ .default_value = "42",
+ .description = "Time duration for which the client waits to "
+ "check if the server is responsive."
+ },
+ { .key = {"client-bind-insecure"},
+ .type = GF_OPTION_TYPE_BOOL
+ },
+ { .key = {"lk-heal"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "Enables or disables the lock heal."
+ },
+ { .key = {"grace-timeout"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 10,
+ .max = 1800,
+ .description = "Sets the grace-timeout value. Valid range 10-1800."
+ },
+ {.key = {"tcp-window-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .min = GF_MIN_SOCKET_WINDOW_SIZE,
+ .max = GF_MAX_SOCKET_WINDOW_SIZE,
+ .description = "Specifies the window size for tcp socket."
+ },
+ { .key = {"filter-O_DIRECT"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "disable",
+ .description = "If enabled, in open() and creat() calls, O_DIRECT "
+ "flag will be filtered at the client protocol level so server will "
+ "still continue to cache the file. This works similar to NFS's "
+ "behavior of O_DIRECT",
+ },
+ { .key = {NULL} },
+};
diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h
new file mode 100644
index 000000000..afab2d74f
--- /dev/null
+++ b/xlators/protocol/client/src/client.h
@@ -0,0 +1,257 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CLIENT_H
+#define _CLIENT_H
+
+#include <pthread.h>
+#include <stdint.h>
+
+#include "rpc-clnt.h"
+#include "list.h"
+#include "inode.h"
+#include "client-mem-types.h"
+#include "protocol-common.h"
+#include "glusterfs3.h"
+#include "fd-lk.h"
+
+/* FIXME: Needs to be defined in a common file */
+#define CLIENT_CMD_CONNECT "trusted.glusterfs.client-connect"
+#define CLIENT_CMD_DISCONNECT "trusted.glusterfs.client-disconnect"
+#define CLIENT_DUMP_LOCKS "trusted.glusterfs.clientlk-dump"
+#define GF_MAX_SOCKET_WINDOW_SIZE (1 * GF_UNIT_MB)
+#define GF_MIN_SOCKET_WINDOW_SIZE (0)
+
+typedef enum {
+ GF_LK_HEAL_IN_PROGRESS,
+ GF_LK_HEAL_DONE,
+} lk_heal_state_t;
+
+typedef enum {
+ DEFAULT_REMOTE_FD = 0,
+ FALLBACK_TO_ANON_FD = 1
+} clnt_remote_fd_flags_t;
+
+#define CLIENT_GET_REMOTE_FD(xl, fd, flags, remote_fd, op_errno, label) \
+ do { \
+ int _ret = 0; \
+ _ret = client_get_remote_fd (xl, fd, flags, &remote_fd);\
+ if (_ret < 0) { \
+ op_errno = errno; \
+ goto label; \
+ } \
+ if (remote_fd == -1) { \
+ gf_log (xl->name, GF_LOG_WARNING, " (%s) " \
+ "remote_fd is -1. EBADFD", \
+ uuid_utoa (fd->inode->gfid)); \
+ op_errno = EBADFD; \
+ goto label; \
+ } \
+ } while (0)
+
+#define CLIENT_STACK_UNWIND(op, frame, params ...) do { \
+ clnt_local_t *__local = frame->local; \
+ frame->local = NULL; \
+ STACK_UNWIND_STRICT (op, frame, params); \
+ client_local_wipe (__local); \
+ } while (0)
+
+
+struct clnt_options {
+ char *remote_subvolume;
+ int ping_timeout;
+};
+
+typedef struct clnt_conf {
+ struct rpc_clnt *rpc;
+ struct clnt_options opt;
+ struct rpc_clnt_config rpc_conf;
+ struct list_head saved_fds;
+ pthread_mutex_t lock;
+ int connecting;
+ int connected;
+
+ rpc_clnt_prog_t *fops;
+ rpc_clnt_prog_t *mgmt;
+ rpc_clnt_prog_t *handshake;
+ rpc_clnt_prog_t *dump;
+
+ uint64_t reopen_fd_count; /* Count of fds reopened after a
+ connection is established */
+ gf_lock_t rec_lock;
+ int skip_notify;
+
+ int last_sent_event; /* Flag used to make sure we are
+ not repeating the same event
+ which was sent earlier */
+ char portmap_err_logged; /* flag used to prevent
+ excessive logging */
+ char disconnect_err_logged; /* flag used to prevent
+ excessive disconnect
+ logging */
+
+ char need_different_port; /* flag used to change the
+ portmap path in case of
+ 'tcp,rdma' on server */
+ gf_boolean_t lk_heal;
+ uint16_t lk_version; /* this variable is used to distinguish
+ client-server transaction while
+ performing lock healing */
+ struct timespec grace_ts;
+ gf_timer_t *grace_timer;
+ gf_boolean_t grace_timer_needed; /* The state of this flag will
+ be used to decide whether
+ a new grace-timer must be
+ registered or not. False
+ means dont register, true
+ means register */
+ char parent_down;
+ gf_boolean_t quick_reconnect; /* When reconnecting after
+ portmap query, do not let
+ the reconnection happen after
+ the usual 3-second wait
+ */
+ gf_boolean_t filter_o_direct; /* if set, filter O_DIRECT from
+ the flags list of open() */
+} clnt_conf_t;
+
+typedef struct _client_fd_ctx {
+ struct list_head sfd_pos; /* Stores the reference to this
+ fd's position in the saved_fds list.
+ */
+ int64_t remote_fd;
+ char is_dir;
+ char released;
+ int32_t flags;
+ fd_lk_ctx_t *lk_ctx;
+ pthread_mutex_t mutex;
+ lk_heal_state_t lk_heal_state;
+ uuid_t gfid;
+ void (*reopen_done) (struct _client_fd_ctx*, xlator_t *);
+ struct list_head lock_list; /* List of all granted locks on this fd */
+ int32_t reopen_attempts;
+} clnt_fd_ctx_t;
+
+typedef struct _client_posix_lock {
+ fd_t *fd; /* The fd on which the lk operation was made */
+
+ struct gf_flock user_flock; /* the flock supplied by the user */
+ off_t fl_start;
+ off_t fl_end;
+ short fl_type;
+ int32_t cmd; /* the cmd for the lock call */
+ gf_lkowner_t owner; /* lock owner from fuse */
+ struct list_head list; /* reference used to add to the fdctx list of locks */
+} client_posix_lock_t;
+
+typedef struct client_local {
+ loc_t loc;
+ loc_t loc2;
+ fd_t *fd;
+ clnt_fd_ctx_t *fdctx;
+ uint32_t flags;
+ struct iobref *iobref;
+
+ client_posix_lock_t *client_lock;
+ gf_lkowner_t owner;
+ int32_t cmd;
+ struct list_head lock_list;
+ pthread_mutex_t mutex;
+ char *name;
+ gf_boolean_t attempt_reopen;
+} clnt_local_t;
+
+typedef struct client_args {
+ loc_t *loc;
+ fd_t *fd;
+ const char *linkname;
+ struct iobref *iobref;
+ struct iovec *vector;
+ dict_t *xattr;
+ struct iatt *stbuf;
+ loc_t *oldloc;
+ loc_t *newloc;
+ const char *name;
+ struct gf_flock *flock;
+ const char *volume;
+ const char *basename;
+ off_t offset;
+ int32_t mask;
+ int32_t cmd;
+ size_t size;
+ mode_t mode;
+ dev_t rdev;
+ int32_t flags;
+ int32_t count;
+ int32_t datasync;
+ entrylk_cmd cmd_entrylk;
+ entrylk_type type;
+ gf_xattrop_flags_t optype;
+ int32_t valid;
+ int32_t len;
+
+ mode_t umask;
+ dict_t *xdata;
+} clnt_args_t;
+
+typedef ssize_t (*gfs_serialize_t) (struct iovec outmsg, void *args);
+
+clnt_fd_ctx_t *this_fd_get_ctx (fd_t *file, xlator_t *this);
+clnt_fd_ctx_t *this_fd_del_ctx (fd_t *file, xlator_t *this);
+void this_fd_set_ctx (fd_t *file, xlator_t *this, loc_t *loc,
+ clnt_fd_ctx_t *ctx);
+
+int client_local_wipe (clnt_local_t *local);
+int client_submit_request (xlator_t *this, void *req,
+ call_frame_t *frame, rpc_clnt_prog_t *prog,
+ int procnum, fop_cbk_fn_t cbk,
+ struct iobref *iobref,
+ struct iovec *rsphdr, int rsphdr_count,
+ struct iovec *rsp_payload, int rsp_count,
+ struct iobref *rsp_iobref, xdrproc_t xdrproc);
+
+int unserialize_rsp_dirent (struct gfs3_readdir_rsp *rsp, gf_dirent_t *entries);
+int unserialize_rsp_direntp (xlator_t *this, fd_t *fd,
+ struct gfs3_readdirp_rsp *rsp, gf_dirent_t *entries);
+
+int clnt_readdir_rsp_cleanup (gfs3_readdir_rsp *rsp);
+int clnt_readdirp_rsp_cleanup (gfs3_readdirp_rsp *rsp);
+int client_attempt_lock_recovery (xlator_t *this, clnt_fd_ctx_t *fdctx);
+int32_t delete_granted_locks_owner (fd_t *fd, gf_lkowner_t *owner);
+int client_add_lock_for_recovery (fd_t *fd, struct gf_flock *flock,
+ gf_lkowner_t *owner, int32_t cmd);
+int32_t delete_granted_locks_fd (clnt_fd_ctx_t *fdctx);
+int32_t client_cmd_to_gf_cmd (int32_t cmd, int32_t *gf_cmd);
+void client_save_number_fds (clnt_conf_t *conf, int count);
+int dump_client_locks (inode_t *inode);
+int client_notify_parents_child_up (xlator_t *this);
+int32_t is_client_dump_locks_cmd (char *name);
+int32_t client_dump_locks (char *name, inode_t *inode,
+ dict_t *dict);
+int client_fdctx_destroy (xlator_t *this, clnt_fd_ctx_t *fdctx);
+
+uint32_t client_get_lk_ver (clnt_conf_t *conf);
+
+int32_t client_type_to_gf_type (short l_type);
+
+int client_mark_fd_bad (xlator_t *this);
+
+int client_set_lk_version (xlator_t *this);
+
+int client_fd_lk_list_empty (fd_lk_ctx_t *lk_ctx, gf_boolean_t use_try_lock);
+void client_default_reopen_done (clnt_fd_ctx_t *fdctx, xlator_t *this);
+void client_attempt_reopen (fd_t *fd, xlator_t *this);
+int client_get_remote_fd (xlator_t *this, fd_t *fd, int flags,
+ int64_t *remote_fd);
+int client_fd_fop_prepare_local (call_frame_t *frame, fd_t *fd,
+ int64_t remote_fd);
+gf_boolean_t
+__is_fd_reopen_in_progress (clnt_fd_ctx_t *fdctx);
+#endif /* !_CLIENT_H */
diff --git a/xlators/protocol/client/src/saved-frames.c b/xlators/protocol/client/src/saved-frames.c
deleted file mode 100644
index 501045fd1..000000000
--- a/xlators/protocol/client/src/saved-frames.c
+++ /dev/null
@@ -1,191 +0,0 @@
-/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-#include "saved-frames.h"
-#include "common-utils.h"
-#include "protocol.h"
-#include "xlator.h"
-
-
-
-struct saved_frames *
-saved_frames_new (void)
-{
- struct saved_frames *saved_frames = NULL;
-
- saved_frames = CALLOC (sizeof (*saved_frames), 1);
- if (!saved_frames) {
- return NULL;
- }
-
- INIT_LIST_HEAD (&saved_frames->fops.list);
- INIT_LIST_HEAD (&saved_frames->mops.list);
- INIT_LIST_HEAD (&saved_frames->cbks.list);
-
- return saved_frames;
-}
-
-
-struct saved_frame *
-get_head_frame_for_type (struct saved_frames *frames, int8_t type)
-{
- struct saved_frame *head_frame = NULL;
-
- switch (type) {
- case GF_OP_TYPE_FOP_REQUEST:
- case GF_OP_TYPE_FOP_REPLY:
- head_frame = &frames->fops;
- break;
- case GF_OP_TYPE_MOP_REQUEST:
- case GF_OP_TYPE_MOP_REPLY:
- head_frame = &frames->mops;
- break;
- case GF_OP_TYPE_CBK_REQUEST:
- case GF_OP_TYPE_CBK_REPLY:
- head_frame = &frames->cbks;
- break;
- }
-
- return head_frame;
-}
-
-
-int
-saved_frames_put (struct saved_frames *frames, call_frame_t *frame,
- int32_t op, int8_t type, int64_t callid)
-{
- struct saved_frame *saved_frame = NULL;
- struct saved_frame *head_frame = NULL;
-
- head_frame = get_head_frame_for_type (frames, type);
-
- saved_frame = CALLOC (sizeof (*saved_frame), 1);
- if (!saved_frame) {
- return -ENOMEM;
- }
-
- INIT_LIST_HEAD (&saved_frame->list);
- saved_frame->frame = frame;
- saved_frame->op = op;
- saved_frame->type = type;
- saved_frame->callid = callid;
-
- gettimeofday (&saved_frame->saved_at, NULL);
-
- list_add_tail (&saved_frame->list, &head_frame->list);
- frames->count++;
-
- return 0;
-}
-
-
-call_frame_t *
-saved_frames_get (struct saved_frames *frames, int32_t op,
- int8_t type, int64_t callid)
-{
- struct saved_frame *saved_frame = NULL;
- struct saved_frame *tmp = NULL;
- struct saved_frame *head_frame = NULL;
- call_frame_t *frame = NULL;
-
- head_frame = get_head_frame_for_type (frames, type);
-
- list_for_each_entry (tmp, &head_frame->list, list) {
- if (tmp->callid == callid) {
- list_del_init (&tmp->list);
- frames->count--;
- saved_frame = tmp;
- break;
- }
- }
-
- if (saved_frame)
- frame = saved_frame->frame;
-
- FREE (saved_frame);
-
- return frame;
-}
-
-struct saved_frame *
-saved_frames_get_timedout (struct saved_frames *frames, int8_t type,
- uint32_t timeout, struct timeval *current)
-{
- struct saved_frame *bailout_frame = NULL, *tmp = NULL;
- struct saved_frame *head_frame = NULL;
-
- head_frame = get_head_frame_for_type (frames, type);
-
- if (!list_empty(&head_frame->list)) {
- tmp = list_entry (head_frame->list.next, typeof (*tmp), list);
- if ((tmp->saved_at.tv_sec + timeout) < current->tv_sec) {
- bailout_frame = tmp;
- list_del_init (&bailout_frame->list);
- frames->count--;
- }
- }
-
- return bailout_frame;
-}
-
-void
-saved_frames_unwind (xlator_t *this, struct saved_frames *saved_frames,
- struct saved_frame *head,
- gf_op_t gf_ops[], char *gf_op_list[])
-{
- struct saved_frame *trav = NULL;
- struct saved_frame *tmp = NULL;
-
- gf_hdr_common_t hdr = {0, };
- call_frame_t *frame = NULL;
-
- hdr.rsp.op_ret = hton32 (-1);
- hdr.rsp.op_errno = hton32 (ENOTCONN);
-
- list_for_each_entry_safe (trav, tmp, &head->list, list) {
- gf_log (this->name, GF_LOG_ERROR,
- "forced unwinding frame type(%d) op(%s)",
- trav->type, gf_op_list[trav->op]);
-
- hdr.type = hton32 (trav->type);
- hdr.op = hton32 (trav->op);
-
- frame = trav->frame;
-
- saved_frames->count--;
-
- gf_ops[trav->op] (frame, &hdr, sizeof (hdr), NULL);
-
- list_del_init (&trav->list);
- FREE (trav);
- }
-}
-
-
-void
-saved_frames_destroy (xlator_t *this, struct saved_frames *frames,
- gf_op_t gf_fops[], gf_op_t gf_mops[], gf_op_t gf_cbks[])
-{
- saved_frames_unwind (this, frames, &frames->fops, gf_fops, gf_fop_list);
- saved_frames_unwind (this, frames, &frames->mops, gf_mops, gf_mop_list);
- saved_frames_unwind (this, frames, &frames->cbks, gf_cbks, gf_cbk_list);
-
- FREE (frames);
-}
diff --git a/xlators/protocol/client/src/saved-frames.h b/xlators/protocol/client/src/saved-frames.h
deleted file mode 100644
index cd628232b..000000000
--- a/xlators/protocol/client/src/saved-frames.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- Copyright (c) 2008-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _SAVED_FRAMES_H
-#define _SAVED_FRAMES_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <stdint.h>
-#include <sys/time.h>
-#include "stack.h"
-#include "list.h"
-#include "protocol.h"
-
-/* UGLY: have common typedef b/w saved-frames.c and protocol-client.c */
-typedef int32_t (*gf_op_t) (call_frame_t *frame,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf);
-
-
-struct saved_frame {
- union {
- struct list_head list;
- struct {
- struct saved_frame *frame_next;
- struct saved_frame *frame_prev;
- };
- };
-
- struct timeval saved_at;
- call_frame_t *frame;
- int32_t op;
- int8_t type;
- uint64_t callid;
-};
-
-
-struct saved_frames {
- int64_t count;
- struct saved_frame fops;
- struct saved_frame mops;
- struct saved_frame cbks;
-};
-
-
-struct saved_frames *saved_frames_new ();
-int saved_frames_put (struct saved_frames *frames, call_frame_t *frame,
- int32_t op, int8_t type, int64_t callid);
-call_frame_t *saved_frames_get (struct saved_frames *frames, int32_t op,
- int8_t type, int64_t callid);
-
-struct saved_frame *
-saved_frames_get_timedout (struct saved_frames *frames, int8_t type,
- uint32_t timeout, struct timeval *current);
-
-void saved_frames_destroy (xlator_t *this, struct saved_frames *frames,
- gf_op_t gf_fops[], gf_op_t gf_mops[],
- gf_op_t gf_cbks[]);
-
-#endif /* _SAVED_FRAMES_H */
diff --git a/xlators/protocol/server/Makefile.am b/xlators/protocol/server/Makefile.am
index d471a3f92..af437a64d 100644
--- a/xlators/protocol/server/Makefile.am
+++ b/xlators/protocol/server/Makefile.am
@@ -1,3 +1 @@
SUBDIRS = src
-
-CLEANFILES =
diff --git a/xlators/protocol/server/src/Makefile.am b/xlators/protocol/server/src/Makefile.am
index dcd92aeed..6a18bf025 100644
--- a/xlators/protocol/server/src/Makefile.am
+++ b/xlators/protocol/server/src/Makefile.am
@@ -1,18 +1,26 @@
-
xlator_LTLIBRARIES = server.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/protocol
-server_la_LDFLAGS = -module -avoidversion
+server_la_LDFLAGS = -module -avoid-version
+
+server_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
+ $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \
+ $(top_builddir)/rpc/xdr/src/libgfxdr.la
-server_la_SOURCES = server-protocol.c server-dentry.c server-helpers.c
-server_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+server_la_SOURCES = server.c server-resolve.c server-helpers.c \
+ server-rpc-fops.c server-handshake.c authenticate.c
-noinst_HEADERS = server-protocol.h server-helpers.h
+noinst_HEADERS = server.h server-helpers.h server-mem-types.h authenticate.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles \
- -DDATADIR=\"$(localstatedir)\" -DCONFDIR=\"$(sysconfdir)/glusterfs\" \
- $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) \
+ -I$(top_srcdir)/libglusterfs/src \
+ -DCONFDIR=\"$(sysconfdir)/glusterfs\" \
+ -DLIBDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/auth\" \
+ -I$(top_srcdir)/xlators/protocol/lib/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src \
+ -I$(top_srcdir)/rpc/xdr/src
-CLEANFILES =
+AM_CFLAGS = -Wall $(GF_CFLAGS) \
+ -DDATADIR=\"$(localstatedir)\"
+CLEANFILES = *~
diff --git a/xlators/protocol/server/src/authenticate.c b/xlators/protocol/server/src/authenticate.c
new file mode 100644
index 000000000..d8d138a84
--- /dev/null
+++ b/xlators/protocol/server/src/authenticate.c
@@ -0,0 +1,253 @@
+/*
+ Copyright (c) 2007-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <stdio.h>
+#include <dlfcn.h>
+#include <errno.h>
+#include "authenticate.h"
+
+static int
+init (dict_t *this, char *key, data_t *value, void *data)
+{
+ void *handle = NULL;
+ char *auth_file = NULL;
+ auth_handle_t *auth_handle = NULL;
+ auth_fn_t authenticate = NULL;
+ int *error = NULL;
+ int ret = 0;
+
+ /* It gets over written */
+ error = data;
+
+ if (!strncasecmp (key, "ip", strlen ("ip"))) {
+ gf_log ("authenticate", GF_LOG_ERROR,
+ "AUTHENTICATION MODULE \"IP\" HAS BEEN REPLACED "
+ "BY \"ADDR\"");
+ dict_set (this, key, data_from_dynptr (NULL, 0));
+ /* TODO: 1.3.x backword compatibility */
+ // *error = -1;
+ // return;
+ key = "addr";
+ }
+
+ ret = gf_asprintf (&auth_file, "%s/%s.so", LIBDIR, key);
+ if (-1 == ret) {
+ dict_set (this, key, data_from_dynptr (NULL, 0));
+ *error = -1;
+ return -1;
+ }
+
+ handle = dlopen (auth_file, RTLD_LAZY);
+ if (!handle) {
+ gf_log ("authenticate", GF_LOG_ERROR, "dlopen(%s): %s\n",
+ auth_file, dlerror ());
+ dict_set (this, key, data_from_dynptr (NULL, 0));
+ GF_FREE (auth_file);
+ *error = -1;
+ return -1;
+ }
+ GF_FREE (auth_file);
+
+ authenticate = dlsym (handle, "gf_auth");
+ if (!authenticate) {
+ gf_log ("authenticate", GF_LOG_ERROR,
+ "dlsym(gf_auth) on %s\n", dlerror ());
+ dict_set (this, key, data_from_dynptr (NULL, 0));
+ dlclose (handle);
+ *error = -1;
+ return -1;
+ }
+
+ auth_handle = GF_CALLOC (1, sizeof (*auth_handle),
+ gf_common_mt_auth_handle_t);
+ if (!auth_handle) {
+ dict_set (this, key, data_from_dynptr (NULL, 0));
+ *error = -1;
+ dlclose (handle);
+ return -1;
+ }
+ auth_handle->vol_opt = GF_CALLOC (1, sizeof (volume_opt_list_t),
+ gf_common_mt_volume_opt_list_t);
+ if (!auth_handle->vol_opt) {
+ dict_set (this, key, data_from_dynptr (NULL, 0));
+ *error = -1;
+ GF_FREE (auth_handle);
+ dlclose (handle);
+ return -1;
+ }
+ auth_handle->vol_opt->given_opt = dlsym (handle, "options");
+ if (auth_handle->vol_opt->given_opt == NULL) {
+ gf_log ("authenticate", GF_LOG_DEBUG,
+ "volume option validation not specified");
+ }
+
+ auth_handle->authenticate = authenticate;
+ auth_handle->handle = handle;
+
+ dict_set (this, key,
+ data_from_dynptr (auth_handle, sizeof (*auth_handle)));
+ return 0;
+}
+
+static int
+fini (dict_t *this, char *key, data_t *value, void *data)
+{
+ auth_handle_t *handle = data_to_ptr (value);
+ if (handle) {
+ dlclose (handle->handle);
+ }
+ return 0;
+}
+
+static int
+_gf_auth_option_validate (dict_t *d, char *k, data_t *v, void *tmp)
+{
+ auth_handle_t *handle = NULL;
+ xlator_t *xl = NULL;
+ int ret = 0;
+
+ xl = tmp;
+
+ handle = data_to_ptr (v);
+ if (!handle)
+ return 0;
+
+ list_add_tail (&(handle->vol_opt->list), &(xl->volume_options));
+
+ ret = xlator_options_validate_list (xl, xl->options,
+ handle->vol_opt, NULL);
+ if (ret) {
+ gf_log ("authenticate", GF_LOG_ERROR,
+ "volume option validation failed");
+ return -1;
+ }
+ return 0;
+}
+
+int32_t
+gf_auth_init (xlator_t *xl, dict_t *auth_modules)
+{
+ int ret = 0;
+
+ dict_foreach (auth_modules, init, &ret);
+ if (ret)
+ goto out;
+
+ ret = dict_foreach (auth_modules, _gf_auth_option_validate, xl);
+
+out:
+ if (ret) {
+ gf_log (xl->name, GF_LOG_ERROR, "authentication init failed");
+ dict_foreach (auth_modules, fini, &ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+static dict_t *__input_params;
+static dict_t *__config_params;
+
+int
+map (dict_t *this, char *key, data_t *value, void *data)
+{
+ dict_t *res = data;
+ auth_fn_t authenticate;
+ auth_handle_t *handle = NULL;
+
+ if (value && (handle = data_to_ptr (value)) &&
+ (authenticate = handle->authenticate)) {
+ dict_set (res, key,
+ int_to_data (authenticate (__input_params,
+ __config_params)));
+ } else {
+ dict_set (res, key, int_to_data (AUTH_DONT_CARE));
+ }
+ return 0;
+}
+
+int
+reduce (dict_t *this, char *key, data_t *value, void *data)
+{
+ int64_t val = 0;
+ int64_t *res = data;
+ if (!data)
+ return 0;
+
+ val = data_to_int64 (value);
+ switch (val)
+ {
+ case AUTH_ACCEPT:
+ if (AUTH_DONT_CARE == *res)
+ *res = AUTH_ACCEPT;
+ break;
+
+ case AUTH_REJECT:
+ *res = AUTH_REJECT;
+ break;
+
+ case AUTH_DONT_CARE:
+ break;
+ }
+ return 0;
+}
+
+
+auth_result_t
+gf_authenticate (dict_t *input_params,
+ dict_t *config_params,
+ dict_t *auth_modules)
+{
+ char *name = NULL;
+ dict_t *results = NULL;
+ int64_t result = AUTH_DONT_CARE;
+ data_t *peerinfo_data = NULL;
+
+ results = get_new_dict ();
+ __input_params = input_params;
+ __config_params = config_params;
+
+ dict_foreach (auth_modules, map, results);
+
+ dict_foreach (results, reduce, &result);
+ if (AUTH_DONT_CARE == result) {
+ peerinfo_data = dict_get (input_params, "peer-info-name");
+
+ if (peerinfo_data) {
+ name = peerinfo_data->data;
+ }
+
+ gf_log ("auth", GF_LOG_ERROR,
+ "no authentication module is interested in "
+ "accepting remote-client %s", name);
+ result = AUTH_REJECT;
+ }
+
+ dict_destroy (results);
+ return result;
+}
+
+void
+gf_auth_fini (dict_t *auth_modules)
+{
+ int32_t dummy;
+
+ dict_foreach (auth_modules, fini, &dummy);
+}
diff --git a/xlators/protocol/server/src/authenticate.h b/xlators/protocol/server/src/authenticate.h
new file mode 100644
index 000000000..d4d43e498
--- /dev/null
+++ b/xlators/protocol/server/src/authenticate.h
@@ -0,0 +1,51 @@
+/*
+ Copyright (c) 2007-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _AUTHENTICATE_H
+#define _AUTHENTICATE_H
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <stdio.h>
+#include <fnmatch.h>
+#include "dict.h"
+#include "compat.h"
+#include "list.h"
+#include "xlator.h"
+
+typedef enum {
+ AUTH_ACCEPT,
+ AUTH_REJECT,
+ AUTH_DONT_CARE
+} auth_result_t;
+
+typedef auth_result_t (*auth_fn_t) (dict_t *input_params,
+ dict_t *config_params);
+
+typedef struct {
+ void *handle;
+ auth_fn_t authenticate;
+ volume_opt_list_t *vol_opt;
+} auth_handle_t;
+
+auth_result_t gf_authenticate (dict_t *input_params,
+ dict_t *config_params,
+ dict_t *auth_modules);
+int32_t gf_auth_init (xlator_t *xl, dict_t *auth_modules);
+void gf_auth_fini (dict_t *auth_modules);
+
+#endif /* _AUTHENTICATE_H */
diff --git a/xlators/protocol/server/src/server-dentry.c b/xlators/protocol/server/src/server-dentry.c
deleted file mode 100644
index d3a69a393..000000000
--- a/xlators/protocol/server/src/server-dentry.c
+++ /dev/null
@@ -1,413 +0,0 @@
-#include "glusterfs.h"
-#include "xlator.h"
-#include "server-protocol.h"
-#include "server-helpers.h"
-#include <libgen.h>
-
-/* SERVER_DENTRY_STATE_PREPARE - prepare a fresh state for use
- *
- * @state - an empty state
- * @loc - loc_t which needs to resolved
- * @parent - most immediate parent of @loc available in dentry cache
- * @resolved - component of @loc->path which has been resolved
- * through dentry cache
- */
-#define SERVER_DENTRY_STATE_PREPARE(_state,_loc,_parent,_resolved) do { \
- size_t pathlen = 0; \
- size_t resolvedlen = 0; \
- char *path = NULL; \
- int pad = 0; \
- pathlen = strlen (_loc->path) + 1; \
- path = CALLOC (1, pathlen); \
- _state->loc.parent = inode_ref (_parent); \
- _state->loc.inode = inode_new (_state->itable); \
- if (_resolved) { \
- resolvedlen = strlen (_resolved); \
- strncpy (path, _resolved, resolvedlen); \
- _state->resolved = memdup (path, pathlen); \
- if (resolvedlen == 1) /* only root resolved */ \
- pad = 0; \
- else { \
- pad = 1; \
- path[resolvedlen] = '/'; \
- } \
- strcpy_till (path + resolvedlen + pad, loc->path + resolvedlen + pad, '/'); \
- } else { \
- strncpy (path, _loc->path, pathlen); \
- } \
- _state->loc.path = path; \
- _state->loc.name = strrchr (path, '/'); \
- if (_state->loc.name) \
- _state->loc.name++; \
- _state->path = strdup (_loc->path); \
- }while (0);
-
-/* SERVER_DENTRY_UPDATE_STATE - update a server_state_t, to prepare state
- * for new lookup
- *
- * @state - state to be updated.
- */
-#define SERVER_DENTRY_UPDATE_STATE(_state) do { \
- char *path = NULL; \
- size_t pathlen = 0; \
- strcpy (_state->resolved, _state->loc.path); \
- pathlen = strlen (_state->loc.path); \
- if (!strcmp (_state->resolved, _state->path)) { \
- free (_state->resolved); \
- _state->resolved = NULL; \
- goto resume; \
- } \
- \
- path = (char *)(_state->loc.path + pathlen); \
- path[0] = '/'; \
- strcpy_till (path + 1, \
- _state->path + pathlen + 1, '/'); \
- _state->loc.name = strrchr (_state->loc.path, '/'); \
- if (_state->loc.name) \
- _state->loc.name++; \
- inode_unref (_state->loc.parent); \
- _state->loc.parent = inode_ref (_state->loc.inode); \
- inode_unref (_state->loc.inode); \
- _state->loc.inode = inode_new (_state->itable); \
- }while (0);
-
-/* NOTE: should be used only for a state which was created by __do_path_resolve
- * using any other state will result in double free corruption.
- */
-#define SERVER_STATE_CLEANUP(_state) do { \
- if (_state->resolved) \
- free (_state->resolved); \
- if (_state->path) \
- free (_state->path); \
- server_loc_wipe (&_state->loc); \
- free_state (_state); \
- } while (0);
-
-/* strcpy_till - copy @dname to @dest, until 'delim' is encountered in @dest
- * @dest - destination string
- * @dname - source string
- * @delim - delimiter character
- *
- * return - NULL is returned if '0' is encountered in @dname, otherwise returns
- * a pointer to remaining string begining in @dest.
- */
-static char *
-strcpy_till (char *dest, const char *dname, char delim)
-{
- char *src = NULL;
- int idx = 0;
- char *ret = NULL;
-
- src = (char *)dname;
- while (src[idx] && (src[idx] != delim)) {
- dest[idx] = src[idx];
- idx++;
- }
-
- dest[idx] = 0;
-
- if (src[idx] == 0)
- ret = NULL;
- else
- ret = &(src[idx]);
-
- return ret;
-}
-
-/* __server_path_to_parenti - derive parent inode for @path. if immediate parent is
- * not available in the dentry cache, return nearest
- * available parent inode and set @reslv to the path of
- * the returned directory.
- *
- * @itable - inode table
- * @path - path whose parent has to be looked up.
- * @reslv - if immediate parent is not available, reslv will be set to path of the
- * resolved parent.
- *
- * return - should never return NULL. should at least return '/' inode.
- */
-static inode_t *
-__server_path_to_parenti (inode_table_t *itable,
- const char *path,
- char **reslv)
-{
- char *resolved_till = NULL;
- char *strtokptr = NULL;
- char *component = NULL;
- char *next_component = NULL;
- char *pathdup = NULL;
- inode_t *curr = NULL;
- inode_t *parent = NULL;
- size_t pathlen = 0;
-
-
- pathlen = STRLEN_0 (path);
- resolved_till = CALLOC (1, pathlen);
-
- GF_VALIDATE_OR_GOTO("server-dentry", resolved_till, out);
- pathdup = strdup (path);
- GF_VALIDATE_OR_GOTO("server-dentry", pathdup, out);
-
- parent = inode_ref (itable->root);
- curr = NULL;
-
- component = strtok_r (pathdup, "/", &strtokptr);
-
- while (component) {
- curr = inode_search (itable, parent->ino, component);
- if (!curr) {
- /* if current component was the last component
- set it to NULL
- */
- component = strtok_r (NULL, "/", &strtokptr);
- break;
- }
-
- /* It is OK to append the component even if it is the
- last component in the path, because, if 'next_component'
- returns NULL, @parent will remain the same and
- @resolved_till will not be sent back
- */
-
- strcat (resolved_till, "/");
- strcat (resolved_till, component);
-
- next_component = strtok_r (NULL, "/", &strtokptr);
-
- if (next_component) {
- inode_unref (parent);
- parent = curr;
- curr = NULL;
- } else {
- /* will break */
- inode_unref (curr);
- }
-
- component = next_component;
- }
-
- free (pathdup);
-
- if (component) {
- *reslv = resolved_till;
- } else {
- free (resolved_till);
- }
-out:
- return parent;
-}
-
-
-/* __do_path_resolve_cbk -
- *
- * @frame -
- * @cookie -
- * @this -
- * @op_ret -
- * @op_errno -
- * @inode -
- * @stbuf -
- * @dict -
- *
- */
-static int32_t
-__do_path_resolve_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *stbuf,
- dict_t *dict)
-{
- server_state_t *state = NULL;
- call_stub_t *stub = NULL;
- inode_t *parent = NULL;
-
- stub = frame->local;
- state = CALL_STATE(frame);
-
- parent = state->loc.parent;
-
- if (op_ret == -1) {
- if (strcmp (state->path, state->loc.path))
- parent = NULL;
-
- server_stub_resume (stub, op_ret, op_errno, NULL, parent);
- goto cleanup;
- } else {
- if (inode->ino == 0) {
- gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG,
- "looked up for %s (%"PRId64"/%s)",
- state->loc.path, state->loc.parent->ino, state->loc.name);
- inode_link (inode, state->loc.parent, state->loc.name, stbuf);
- inode_lookup (inode);
- }
-
- if (state->resolved) {
- SERVER_DENTRY_UPDATE_STATE(state);
-
- gf_log (BOUND_XL(frame)->name, GF_LOG_DEBUG,
- "looking up for %s (%"PRId64"/%s)",
- state->loc.path, state->loc.parent->ino, state->loc.name);
-
- STACK_WIND (frame,
- __do_path_resolve_cbk,
- BOUND_XL(frame),
- BOUND_XL(frame)->fops->lookup,
- &(state->loc),
- 0);
-
- goto out;
- }
- resume:
- /* we are done, call stub_resume() to do rest of the job */
- server_stub_resume (stub, op_ret, op_errno, inode, parent);
- cleanup:
- SERVER_STATE_CLEANUP(state);
- /* stub will be freed by stub_resume, leave no traces */
- frame->local = NULL;
- STACK_DESTROY (frame->root);
- }
-out:
- return 0;
-}
-
-/* __do_path_resolve - resolve @loc->path into @loc->inode and @loc->parent. also
- * update the dentry cache
- *
- * @stub - call stub to resume after resolving @loc->path
- * @loc - loc to resolve before resuming @stub.
- *
- * return - return value of __do_path_resolve doesn't matter to the caller, if @stub
- * is not NULL.
- */
-static int32_t
-__do_path_resolve (call_stub_t *stub,
- const loc_t *loc)
-{
- int32_t ret = -1;
- char *resolved = NULL;
- call_frame_t *new_frame = NULL;
- server_state_t *state = NULL, *new_state = NULL;
- inode_t *parent = NULL;
-
- state = CALL_STATE(stub->frame);
- parent = loc->parent;
- if (parent) {
- inode_ref (parent);
- gf_log (BOUND_XL(stub->frame)->name, GF_LOG_DEBUG,
- "loc->parent(%"PRId64") already present. sending lookup "
- "for %"PRId64"/%s", parent->ino, parent->ino, loc->name);
- resolved = strdup (loc->path);
- resolved = dirname (resolved);
- } else {
- parent = __server_path_to_parenti (state->itable, loc->path, &resolved);
- }
-
- if (parent == NULL) {
- /* fire in the bush.. run! run!! run!!! */
- gf_log ("server",
- GF_LOG_CRITICAL,
- "failed to get parent inode number");
- goto panic;
- }
-
- if (resolved) {
- gf_log (BOUND_XL(stub->frame)->name,
- GF_LOG_DEBUG,
- "resolved path(%s) till %"PRId64"(%s). "
- "sending lookup for remaining path",
- loc->path, parent->ino, resolved);
- }
-
- {
- new_frame = server_copy_frame (stub->frame);
- new_state = CALL_STATE(new_frame);
-
- SERVER_DENTRY_STATE_PREPARE(new_state, loc, parent, resolved);
-
- if (parent)
- inode_unref (parent); /* __server_path_to_parenti()'s inode_ref */
- free (resolved);
- /* now interpret state as:
- * state->path - compelete pathname to resolve
- * state->resolved - pathname resolved from dentry cache
- */
- new_frame->local = stub;
- STACK_WIND (new_frame,
- __do_path_resolve_cbk,
- BOUND_XL(new_frame),
- BOUND_XL(new_frame)->fops->lookup,
- &(new_state->loc),
- 0);
- goto out;
- }
-panic:
- server_stub_resume (stub, -1, ENOENT, NULL, NULL);
-out:
- return ret;
-}
-
-
-/*
- * do_path_lookup - transform a pathname into inode, with the compelete
- * dentry tree upto inode built.
- *
- * @stub - call stub to resume after completing pathname to inode transform
- * @loc - location. valid fields that do_path_lookup() uses in @loc are
- * @loc->path - pathname
- * @loc->ino - inode number
- *
- * return - do_path_lookup returns only after complete dentry tree is built
- * upto @loc->path.
- */
-int32_t
-do_path_lookup (call_stub_t *stub,
- const loc_t *loc)
-{
- char *pathname = NULL;
- char *directory = NULL;
- inode_t *inode = NULL;
- inode_t *parent = NULL;
- server_state_t *state = NULL;
-
- state = CALL_STATE(stub->frame);
-
- inode = inode_from_path (state->itable, loc->path);
- pathname = strdup (loc->path);
- directory = dirname (pathname);
- parent = inode_from_path (state->itable, directory);
-
- if (inode && parent) {
- gf_log (BOUND_XL(stub->frame)->name,
- GF_LOG_DEBUG,
- "resolved path(%s) to %"PRId64"/%"PRId64"(%s)",
- loc->path, parent->ino, inode->ino, loc->name);
- server_stub_resume (stub, 0, 0, inode, parent);
- inode_unref (inode);
- inode_unref (parent);
- } else {
- gf_log (BOUND_XL(stub->frame)->name,
- GF_LOG_DEBUG,
- "resolved path(%s) to %p(%"PRId64")/%p(%"PRId64")",
- loc->path, parent, (parent ? parent->ino : 0),
- inode, (inode ? inode->ino : 0));
- if (parent) {
- inode_unref (parent);
- } else if (inode) {
- inode_unref (inode);
- gf_log (BOUND_XL(stub->frame)->name,
- GF_LOG_ERROR,
- "undesired behaviour. inode(%"PRId64") for %s "
- "exists without parent (%s)",
- inode->ino, loc->path, directory);
- }
- __do_path_resolve (stub, loc);
- }
-
- if (pathname)
- free (pathname);
-
- return 0;
-}
diff --git a/xlators/protocol/server/src/server-handshake.c b/xlators/protocol/server/src/server-handshake.c
new file mode 100644
index 000000000..d4941011d
--- /dev/null
+++ b/xlators/protocol/server/src/server-handshake.c
@@ -0,0 +1,781 @@
+/*
+ Copyright (c) 2010-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "server.h"
+#include "server-helpers.h"
+#include "glusterfs3-xdr.h"
+#include "compat-errno.h"
+#include "glusterfs3.h"
+#include "authenticate.h"
+
+struct __get_xl_struct {
+ const char *name;
+ xlator_t *reply;
+};
+int
+gf_compare_client_version (rpcsvc_request_t *req, int fop_prognum,
+ int mgmt_prognum)
+{
+ int ret = -1;
+ /* TODO: think.. */
+ if (glusterfs3_3_fop_prog.prognum == fop_prognum)
+ ret = 0;
+
+ return ret;
+}
+
+void __check_and_set (xlator_t *each, void *data)
+{
+ if (!strcmp (each->name,
+ ((struct __get_xl_struct *) data)->name))
+ ((struct __get_xl_struct *) data)->reply = each;
+}
+
+static xlator_t *
+get_xlator_by_name (xlator_t *some_xl, const char *name)
+{
+ struct __get_xl_struct get = {
+ .name = name,
+ .reply = NULL
+ };
+
+ xlator_foreach (some_xl, __check_and_set, &get);
+
+ return get.reply;
+}
+
+
+int
+_volfile_update_checksum (xlator_t *this, char *key, uint32_t checksum)
+{
+ server_conf_t *conf = NULL;
+ struct _volfile_ctx *temp_volfile = NULL;
+
+ conf = this->private;
+ temp_volfile = conf->volfile;
+
+ while (temp_volfile) {
+ if ((NULL == key) && (NULL == temp_volfile->key))
+ break;
+ if ((NULL == key) || (NULL == temp_volfile->key)) {
+ temp_volfile = temp_volfile->next;
+ continue;
+ }
+ if (strcmp (temp_volfile->key, key) == 0)
+ break;
+ temp_volfile = temp_volfile->next;
+ }
+
+ if (!temp_volfile) {
+ temp_volfile = GF_CALLOC (1, sizeof (struct _volfile_ctx),
+ gf_server_mt_volfile_ctx_t);
+ if (!temp_volfile)
+ goto out;
+ temp_volfile->next = conf->volfile;
+ temp_volfile->key = (key)? gf_strdup (key): NULL;
+ temp_volfile->checksum = checksum;
+
+ conf->volfile = temp_volfile;
+ goto out;
+ }
+
+ if (temp_volfile->checksum != checksum) {
+ gf_log (this->name, GF_LOG_INFO,
+ "the volume file was modified between a prior access "
+ "and now. This may lead to inconsistency between "
+ "clients, you are advised to remount client");
+ temp_volfile->checksum = checksum;
+ }
+
+out:
+ return 0;
+}
+
+
+static size_t
+getspec_build_volfile_path (xlator_t *this, const char *key, char *path,
+ size_t path_len)
+{
+ char *filename = NULL;
+ server_conf_t *conf = NULL;
+ int ret = -1;
+ int free_filename = 0;
+ char data_key[256] = {0,};
+
+ conf = this->private;
+
+ /* Inform users that this option is changed now */
+ ret = dict_get_str (this->options, "client-volume-filename",
+ &filename);
+ if (ret == 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "option 'client-volume-filename' is changed to "
+ "'volume-filename.<key>' which now takes 'key' as an "
+ "option to choose/fetch different files from server. "
+ "Refer documentation or contact developers for more "
+ "info. Currently defaulting to given file '%s'",
+ filename);
+ }
+
+ if (key && !filename) {
+ sprintf (data_key, "volume-filename.%s", key);
+ ret = dict_get_str (this->options, data_key, &filename);
+ if (ret < 0) {
+ /* Make sure that key doesn't contain "../" in path */
+ if ((gf_strstr (key, "/", "..")) == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: invalid key", key);
+ goto out;
+ }
+ }
+ }
+
+ if (!filename) {
+ ret = dict_get_str (this->options,
+ "volume-filename.default", &filename);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "no default volume filename given, "
+ "defaulting to %s", DEFAULT_VOLUME_FILE_PATH);
+ }
+ }
+
+ if (!filename && key) {
+ ret = gf_asprintf (&filename, "%s/%s.vol", conf->conf_dir, key);
+ if (-1 == ret)
+ goto out;
+
+ free_filename = 1;
+ }
+ if (!filename)
+ filename = DEFAULT_VOLUME_FILE_PATH;
+
+ ret = -1;
+
+ if ((filename) && (path_len > strlen (filename))) {
+ strcpy (path, filename);
+ ret = strlen (filename);
+ }
+
+out:
+ if (free_filename)
+ GF_FREE (filename);
+
+ return ret;
+}
+
+int
+_validate_volfile_checksum (xlator_t *this, char *key,
+ uint32_t checksum)
+{
+ char filename[PATH_MAX] = {0,};
+ server_conf_t *conf = NULL;
+ struct _volfile_ctx *temp_volfile = NULL;
+ int ret = 0;
+ int fd = 0;
+ uint32_t local_checksum = 0;
+
+ conf = this->private;
+ temp_volfile = conf->volfile;
+
+ if (!checksum)
+ goto out;
+
+ if (!temp_volfile) {
+ ret = getspec_build_volfile_path (this, key, filename,
+ sizeof (filename));
+ if (ret <= 0)
+ goto out;
+ fd = open (filename, O_RDONLY);
+ if (-1 == fd) {
+ ret = 0;
+ gf_log (this->name, GF_LOG_INFO,
+ "failed to open volume file (%s) : %s",
+ filename, strerror (errno));
+ goto out;
+ }
+ get_checksum_for_file (fd, &local_checksum);
+ _volfile_update_checksum (this, key, local_checksum);
+ close (fd);
+ }
+
+ temp_volfile = conf->volfile;
+ while (temp_volfile) {
+ if ((NULL == key) && (NULL == temp_volfile->key))
+ break;
+ if ((NULL == key) || (NULL == temp_volfile->key)) {
+ temp_volfile = temp_volfile->next;
+ continue;
+ }
+ if (strcmp (temp_volfile->key, key) == 0)
+ break;
+ temp_volfile = temp_volfile->next;
+ }
+
+ if (!temp_volfile)
+ goto out;
+
+ if ((temp_volfile->checksum) &&
+ (checksum != temp_volfile->checksum))
+ ret = -1;
+
+out:
+ return ret;
+}
+
+
+int
+server_getspec (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ int32_t op_errno = ENOENT;
+ int32_t spec_fd = -1;
+ size_t file_len = 0;
+ char filename[PATH_MAX] = {0,};
+ struct stat stbuf = {0,};
+ uint32_t checksum = 0;
+ char *key = NULL;
+ server_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+ gf_getspec_req args = {0,};
+ gf_getspec_rsp rsp = {0,};
+
+ this = req->svc->mydata;
+ conf = this->private;
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gf_getspec_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ op_errno = EINVAL;
+ goto fail;
+ }
+
+ ret = getspec_build_volfile_path (this, args.key,
+ filename, sizeof (filename));
+ if (ret > 0) {
+ /* to allocate the proper buffer to hold the file data */
+ ret = stat (filename, &stbuf);
+ if (ret < 0){
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to stat %s (%s)",
+ filename, strerror (errno));
+ op_errno = errno;
+ goto fail;
+ }
+
+ spec_fd = open (filename, O_RDONLY);
+ if (spec_fd < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unable to open %s (%s)",
+ filename, strerror (errno));
+ op_errno = errno;
+ goto fail;
+ }
+ ret = file_len = stbuf.st_size;
+
+ if (conf->verify_volfile) {
+ get_checksum_for_file (spec_fd, &checksum);
+ _volfile_update_checksum (this, key, checksum);
+ }
+ } else {
+ op_errno = ENOENT;
+ }
+
+ if (file_len) {
+ rsp.spec = GF_CALLOC (file_len, sizeof (char),
+ gf_server_mt_rsp_buf_t);
+ if (!rsp.spec) {
+ ret = -1;
+ op_errno = ENOMEM;
+ goto fail;
+ }
+ ret = read (spec_fd, rsp.spec, file_len);
+ }
+
+ /* convert to XDR */
+ op_errno = errno;
+fail:
+ if (!rsp.spec)
+ rsp.spec = "";
+ rsp.op_errno = gf_errno_to_error (op_errno);
+ rsp.op_ret = ret;
+
+ if (spec_fd != -1)
+ close (spec_fd);
+
+ server_submit_reply (NULL, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_getspec_rsp);
+
+ return 0;
+}
+
+
+int
+server_setvolume (rpcsvc_request_t *req)
+{
+ gf_setvolume_req args = {{0,},};
+ gf_setvolume_rsp rsp = {0,};
+ client_t *client = NULL;
+ server_ctx_t *serv_ctx = NULL;
+ server_conf_t *conf = NULL;
+ peer_info_t *peerinfo = NULL;
+ dict_t *reply = NULL;
+ dict_t *config_params = NULL;
+ dict_t *params = NULL;
+ char *name = NULL;
+ char *client_uid = NULL;
+ char *clnt_version = NULL;
+ xlator_t *xl = NULL;
+ char *msg = NULL;
+ char *volfile_key = NULL;
+ xlator_t *this = NULL;
+ uint32_t checksum = 0;
+ int32_t ret = -1;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ int32_t fop_version = 0;
+ int32_t mgmt_version = 0;
+ uint32_t lk_version = 0;
+ char *buf = NULL;
+ gf_boolean_t cancelled = _gf_false;
+
+ params = dict_new ();
+ reply = dict_new ();
+ ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gf_setvolume_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ goto fail;
+ }
+
+ this = req->svc->mydata;
+
+ config_params = dict_copy_with_ref (this->options, NULL);
+ conf = this->private;
+
+ buf = memdup (args.dict.dict_val, args.dict.dict_len);
+ if (buf == NULL) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto fail;
+ }
+
+ ret = dict_unserialize (buf, args.dict.dict_len, &params);
+ if (ret < 0) {
+ ret = dict_set_str (reply, "ERROR",
+ "Internal error: failed to unserialize "
+ "request dictionary");
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "failed to set error msg \"%s\"",
+ "Internal error: failed to unserialize "
+ "request dictionary");
+
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto fail;
+ }
+
+ params->extra_free = buf;
+ buf = NULL;
+
+ ret = dict_get_str (params, "process-uuid", &client_uid);
+ if (ret < 0) {
+ ret = dict_set_str (reply, "ERROR",
+ "UUID not specified");
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "failed to set error msg");
+
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto fail;
+ }
+
+ /*lk_verion :: [1..2^31-1]*/
+ ret = dict_get_uint32 (params, "clnt-lk-version", &lk_version);
+ if (ret < 0) {
+ ret = dict_set_str (reply, "ERROR",
+ "lock state version not supplied");
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "failed to set error msg");
+
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto fail;
+ }
+
+ client = gf_client_get (this, &req->cred, client_uid);
+ if (client == NULL) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto fail;
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG, "Connected to %s", client->client_uid);
+ cancelled = server_cancel_grace_timer (this, client);
+ if (cancelled)//Do gf_client_put on behalf of grace-timer-handler.
+ gf_client_put (client, NULL);
+
+ serv_ctx = server_ctx_get (client, client->this);
+ if (serv_ctx == NULL) {
+ gf_log (this->name, GF_LOG_INFO, "server_ctx_get() failed");
+ goto fail;
+ }
+
+ if (serv_ctx->lk_version != 0 &&
+ serv_ctx->lk_version != lk_version) {
+ (void) server_connection_cleanup (this, client,
+ INTERNAL_LOCKS | POSIX_LOCKS);
+ }
+
+ if (req->trans->xl_private != client)
+ req->trans->xl_private = client;
+
+ ret = dict_get_int32 (params, "fops-version", &fop_version);
+ if (ret < 0) {
+ ret = dict_set_str (reply, "ERROR",
+ "No FOP version number specified");
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "failed to set error msg");
+ }
+
+ ret = dict_get_int32 (params, "mgmt-version", &mgmt_version);
+ if (ret < 0) {
+ ret = dict_set_str (reply, "ERROR",
+ "No MGMT version number specified");
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "failed to set error msg");
+ }
+
+ ret = gf_compare_client_version (req, fop_version, mgmt_version);
+ if (ret != 0) {
+ ret = gf_asprintf (&msg, "version mismatch: client(%d)"
+ " - client-mgmt(%d)",
+ fop_version, mgmt_version);
+ /* get_supported_version (req)); */
+ if (-1 == ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "asprintf failed while setting up error msg");
+ goto fail;
+ }
+ ret = dict_set_dynstr (reply, "ERROR", msg);
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "failed to set error msg");
+
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto fail;
+ }
+
+ ret = dict_get_str (params, "remote-subvolume", &name);
+ if (ret < 0) {
+ ret = dict_set_str (reply, "ERROR",
+ "No remote-subvolume option specified");
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "failed to set error msg");
+
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto fail;
+ }
+
+ xl = get_xlator_by_name (this, name);
+ if (xl == NULL) {
+ ret = gf_asprintf (&msg, "remote-subvolume \"%s\" is not found",
+ name);
+ if (-1 == ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "asprintf failed while setting error msg");
+ goto fail;
+ }
+ ret = dict_set_dynstr (reply, "ERROR", msg);
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "failed to set error msg");
+
+ op_ret = -1;
+ op_errno = ENOENT;
+ goto fail;
+ }
+
+ if (conf->verify_volfile) {
+ ret = dict_get_uint32 (params, "volfile-checksum", &checksum);
+ if (ret == 0) {
+ ret = dict_get_str (params, "volfile-key",
+ &volfile_key);
+ if (ret)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "failed to set 'volfile-key'");
+
+ ret = _validate_volfile_checksum (this, volfile_key,
+ checksum);
+ if (-1 == ret) {
+ ret = dict_set_str (reply, "ERROR",
+ "volume-file checksum "
+ "varies from earlier "
+ "access");
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "failed to set error msg");
+
+ op_ret = -1;
+ op_errno = ESTALE;
+ goto fail;
+ }
+ }
+ }
+
+
+ peerinfo = &req->trans->peerinfo;
+ if (peerinfo) {
+ ret = dict_set_static_ptr (params, "peer-info", peerinfo);
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "failed to set peer-info");
+ }
+ if (conf->auth_modules == NULL) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Authentication module not initialized");
+ }
+
+ ret = dict_get_str (params, "client-version", &clnt_version);
+ if (ret)
+ gf_log (this->name, GF_LOG_INFO, "client-version not set, "
+ "may be of older version");
+
+ ret = gf_authenticate (params, config_params,
+ conf->auth_modules);
+
+ if (ret == AUTH_ACCEPT) {
+
+ gf_log (this->name, GF_LOG_INFO,
+ "accepted client from %s (version: %s)",
+ client->client_uid,
+ (clnt_version) ? clnt_version : "old");
+ op_ret = 0;
+ client->bound_xl = xl;
+ ret = dict_set_str (reply, "ERROR", "Success");
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "failed to set error msg");
+ } else {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Cannot authenticate client from %s %s",
+ client->client_uid,
+ (clnt_version) ? clnt_version : "old");
+
+ op_ret = -1;
+ op_errno = EACCES;
+ ret = dict_set_str (reply, "ERROR", "Authentication failed");
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "failed to set error msg");
+ goto fail;
+ }
+
+ if (client->bound_xl == NULL) {
+ ret = dict_set_str (reply, "ERROR",
+ "Check volfile and handshake "
+ "options in protocol/client");
+ if (ret < 0)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "failed to set error msg");
+
+ op_ret = -1;
+ op_errno = EACCES;
+ goto fail;
+ }
+
+ if ((client->bound_xl != NULL) &&
+ (ret >= 0) &&
+ (client->bound_xl->itable == NULL)) {
+ /* create inode table for this bound_xl, if one doesn't
+ already exist */
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "creating inode table with lru_limit=%"PRId32", "
+ "xlator=%s", conf->inode_lru_limit,
+ client->bound_xl->name);
+
+ /* TODO: what is this ? */
+ client->bound_xl->itable =
+ inode_table_new (conf->inode_lru_limit,
+ client->bound_xl);
+ }
+
+ ret = dict_set_str (reply, "process-uuid",
+ this->ctx->process_uuid);
+ if (ret)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "failed to set 'process-uuid'");
+
+ ret = dict_set_uint32 (reply, "clnt-lk-version", serv_ctx->lk_version);
+ if (ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set 'clnt-lk-version'");
+
+ ret = dict_set_uint64 (reply, "transport-ptr",
+ ((uint64_t) (long) req->trans));
+ if (ret)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "failed to set 'transport-ptr'");
+
+fail:
+ rsp.dict.dict_len = dict_serialized_length (reply);
+ if (rsp.dict.dict_len < 0) {
+ gf_log ("server-handshake", GF_LOG_DEBUG,
+ "failed to get serialized length of reply dict");
+ op_ret = -1;
+ op_errno = EINVAL;
+ rsp.dict.dict_len = 0;
+ }
+
+ if (rsp.dict.dict_len) {
+ rsp.dict.dict_val = GF_CALLOC (1, rsp.dict.dict_len,
+ gf_server_mt_rsp_buf_t);
+ if (rsp.dict.dict_val) {
+ ret = dict_serialize (reply, rsp.dict.dict_val);
+ if (ret < 0) {
+ gf_log ("server-handshake", GF_LOG_DEBUG,
+ "failed to serialize reply dict");
+ op_ret = -1;
+ op_errno = -ret;
+ }
+ }
+ }
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ /* if bound_xl is NULL or something fails, then put the connection
+ * back. Otherwise the connection would have been added to the
+ * list of connections the server is maintaining and might segfault
+ * during statedump when bound_xl of the connection is accessed.
+ */
+ if (op_ret && !xl) {
+ /* We would have set the xl_private of the transport to the
+ * @conn. But if we have put the connection i.e shutting down
+ * the connection, then we should set xl_private to NULL as it
+ * would be pointing to a freed memory and would segfault when
+ * accessed upon getting DISCONNECT.
+ */
+ gf_client_put (client, NULL);
+ req->trans->xl_private = NULL;
+ }
+ server_submit_reply (NULL, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_setvolume_rsp);
+
+
+ free (args.dict.dict_val);
+
+ GF_FREE (rsp.dict.dict_val);
+
+ dict_unref (params);
+ dict_unref (reply);
+ dict_unref (config_params);
+
+ GF_FREE (buf);
+
+ return 0;
+}
+
+
+int
+server_ping (rpcsvc_request_t *req)
+{
+ gf_common_rsp rsp = {0,};
+
+ /* Accepted */
+ rsp.op_ret = 0;
+
+ server_submit_reply (NULL, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_common_rsp);
+
+ return 0;
+}
+
+int
+server_set_lk_version (rpcsvc_request_t *req)
+{
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ gf_set_lk_ver_req args = {0,};
+ gf_set_lk_ver_rsp rsp = {0,};
+ client_t *client = NULL;
+ server_ctx_t *serv_ctx = NULL;
+ xlator_t *this = NULL;
+
+ this = req->svc->mydata;
+ //TODO: Decide on an appropriate errno for the error-path
+ //below
+ if (!this)
+ goto fail;
+
+ op_ret = xdr_to_generic (req->msg[0], &args,
+ (xdrproc_t)xdr_gf_set_lk_ver_req);
+ if (op_ret < 0) {
+ //failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ goto fail;
+ }
+
+ client = gf_client_get (this, &req->cred, args.uid);
+ serv_ctx = server_ctx_get (client, client->this);
+ if (serv_ctx == NULL) {
+ gf_log (this->name, GF_LOG_INFO, "server_ctx_get() failed");
+ goto fail;
+ }
+
+ serv_ctx->lk_version = args.lk_ver;
+ gf_client_put (client, NULL);
+
+ rsp.lk_ver = args.lk_ver;
+
+ op_ret = 0;
+fail:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = op_errno;
+ server_submit_reply (NULL, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_set_lk_ver_rsp);
+
+ free (args.uid);
+
+ return 0;
+}
+
+rpcsvc_actor_t gluster_handshake_actors[] = {
+ [GF_HNDSK_NULL] = {"NULL", GF_HNDSK_NULL, server_null, NULL, 0, DRC_NA},
+ [GF_HNDSK_SETVOLUME] = {"SETVOLUME", GF_HNDSK_SETVOLUME, server_setvolume, NULL, 0, DRC_NA},
+ [GF_HNDSK_GETSPEC] = {"GETSPEC", GF_HNDSK_GETSPEC, server_getspec, NULL, 0, DRC_NA},
+ [GF_HNDSK_PING] = {"PING", GF_HNDSK_PING, server_ping, NULL, 0, DRC_NA},
+ [GF_HNDSK_SET_LK_VER] = {"SET_LK_VER", GF_HNDSK_SET_LK_VER, server_set_lk_version, NULL, 0, DRC_NA},
+};
+
+
+struct rpcsvc_program gluster_handshake_prog = {
+ .progname = "GlusterFS Handshake",
+ .prognum = GLUSTER_HNDSK_PROGRAM,
+ .progver = GLUSTER_HNDSK_VERSION,
+ .actors = gluster_handshake_actors,
+ .numactors = GF_HNDSK_MAXVALUE,
+};
diff --git a/xlators/protocol/server/src/server-helpers.c b/xlators/protocol/server/src/server-helpers.c
index 14797508d..f0b040c74 100644
--- a/xlators/protocol/server/src/server-helpers.c
+++ b/xlators/protocol/server/src/server-helpers.c
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2010-2013 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _CONFIG_H
@@ -22,841 +13,1249 @@
#include "config.h"
#endif
-#include "server-protocol.h"
+#include "server.h"
#include "server-helpers.h"
+#include <fnmatch.h>
-/* server_loc_fill - derive a loc_t for a given inode number
- *
- * NOTE: make sure that @loc is empty, because any pointers it holds with reference will
- * be leaked after returning from here.
- */
int
-server_loc_fill (loc_t *loc, server_state_t *state,
- ino_t ino, ino_t par,
- const char *name, const char *path)
-{
- inode_t *inode = NULL;
- inode_t *parent = NULL;
- int32_t ret = -1;
- char *dentry_path = NULL;
-
-
- GF_VALIDATE_OR_GOTO ("server", loc, out);
- GF_VALIDATE_OR_GOTO ("server", state, out);
- GF_VALIDATE_OR_GOTO ("server", path, out);
-
- /* anything beyond this point is success */
- ret = 0;
- loc->ino = ino;
- inode = loc->inode;
- if (inode == NULL) {
- if (ino)
- inode = inode_search (state->itable, ino, NULL);
-
- if ((inode == NULL) &&
- (par && name))
- inode = inode_search (state->itable, par, name);
-
- loc->inode = inode;
- if (inode)
- loc->ino = inode->ino;
- }
-
- parent = loc->parent;
- if (parent == NULL) {
- if (inode)
- parent = inode_parent (inode, par, name);
- else
- parent = inode_search (state->itable, par, NULL);
- loc->parent = parent;
- }
-
- if (name && parent) {
- ret = inode_path (parent, name, &dentry_path);
- if (ret < 0) {
- gf_log (state->bound_xl->name, GF_LOG_DEBUG,
- "failed to build path for %"PRId64"/%s: %s",
- parent->ino, name, strerror (-ret));
- }
- } else if (inode) {
- ret = inode_path (inode, NULL, &dentry_path);
- if (ret < 0) {
- gf_log (state->bound_xl->name, GF_LOG_DEBUG,
- "failed to build path for %"PRId64": %s",
- inode->ino, strerror (-ret));
- }
- }
-
- if (dentry_path) {
- if (strcmp (dentry_path, path)) {
- gf_log (state->bound_xl->name, GF_LOG_DEBUG,
- "paths differ for inode(%"PRId64"): "
- "client path = %s. dentry path = %s",
- ino, path, dentry_path);
- }
-
- loc->path = dentry_path;
- loc->name = strrchr (loc->path, '/');
- if (loc->name)
- loc->name++;
- } else {
- loc->path = strdup (path);
- loc->name = strrchr (loc->path, '/');
- if (loc->name)
- loc->name++;
- }
+server_decode_groups (call_frame_t *frame, rpcsvc_request_t *req)
+{
+ int i = 0;
-out:
- return ret;
-}
+ GF_VALIDATE_OR_GOTO ("server", frame, out);
+ GF_VALIDATE_OR_GOTO ("server", req, out);
-/*
- * stat_to_str - convert struct stat to a ASCII string
- * @stbuf: struct stat pointer
- *
- * not for external reference
- */
-char *
-stat_to_str (struct stat *stbuf)
-{
- int ret = 0;
- char *tmp_buf = NULL;
-
- uint64_t dev = stbuf->st_dev;
- uint64_t ino = stbuf->st_ino;
- uint32_t mode = stbuf->st_mode;
- uint32_t nlink = stbuf->st_nlink;
- uint32_t uid = stbuf->st_uid;
- uint32_t gid = stbuf->st_gid;
- uint64_t rdev = stbuf->st_rdev;
- uint64_t size = stbuf->st_size;
- uint32_t blksize = stbuf->st_blksize;
- uint64_t blocks = stbuf->st_blocks;
- uint32_t atime = stbuf->st_atime;
- uint32_t mtime = stbuf->st_mtime;
- uint32_t ctime = stbuf->st_ctime;
-
- uint32_t atime_nsec = ST_ATIM_NSEC(stbuf);
- uint32_t mtime_nsec = ST_MTIM_NSEC(stbuf);
- uint32_t ctime_nsec = ST_CTIM_NSEC(stbuf);
-
-
- ret = asprintf (&tmp_buf,
- GF_STAT_PRINT_FMT_STR,
- dev,
- ino,
- mode,
- nlink,
- uid,
- gid,
- rdev,
- size,
- blksize,
- blocks,
- atime,
- atime_nsec,
- mtime,
- mtime_nsec,
- ctime,
- ctime_nsec);
- if (-1 == ret) {
- gf_log ("protocol/server", GF_LOG_DEBUG,
- "asprintf failed while setting up stat buffer string");
- return NULL;
- }
- return tmp_buf;
+ if (call_stack_alloc_groups (frame->root, req->auxgidcount) != 0)
+ return -1;
+
+ frame->root->ngrps = req->auxgidcount;
+ if (frame->root->ngrps == 0)
+ return 0;
+
+ if (frame->root->ngrps > GF_MAX_AUX_GROUPS)
+ return -1;
+
+ for (; i < frame->root->ngrps; ++i)
+ frame->root->groups[i] = req->auxgids[i];
+out:
+ return 0;
}
void
server_loc_wipe (loc_t *loc)
{
- if (loc->parent)
- inode_unref (loc->parent);
- if (loc->inode)
- inode_unref (loc->inode);
- if (loc->path)
- free ((char *)loc->path);
-}
+ if (loc->parent) {
+ inode_unref (loc->parent);
+ loc->parent = NULL;
+ }
-void
-free_state (server_state_t *state)
-{
- transport_t *trans = NULL;
+ if (loc->inode) {
+ inode_unref (loc->inode);
+ loc->inode = NULL;
+ }
- trans = state->trans;
+ GF_FREE ((void *)loc->path);
+}
- if (state->fd)
- fd_unref (state->fd);
- transport_unref (trans);
-
- if (state->xattr_req)
- dict_unref (state->xattr_req);
+void
+server_resolve_wipe (server_resolve_t *resolve)
+{
+ GF_FREE ((void *)resolve->path);
- if (state->volume)
- FREE (state->volume);
+ GF_FREE ((void *)resolve->bname);
- FREE (state);
+ loc_wipe (&resolve->resolve_loc);
}
-call_frame_t *
-server_copy_frame (call_frame_t *frame)
+void
+free_state (server_state_t *state)
{
- call_frame_t *new_frame = NULL;
- server_state_t *state = NULL, *new_state = NULL;
+ if (state->xprt) {
+ rpc_transport_unref (state->xprt);
+ state->xprt = NULL;
+ }
+ if (state->fd) {
+ fd_unref (state->fd);
+ state->fd = NULL;
+ }
- state = frame->root->state;
+ if (state->params) {
+ dict_unref (state->params);
+ state->params = NULL;
+ }
- new_frame = copy_frame (frame);
+ if (state->iobref) {
+ iobref_unref (state->iobref);
+ state->iobref = NULL;
+ }
- new_state = CALLOC (1, sizeof (server_state_t));
+ if (state->iobuf) {
+ iobuf_unref (state->iobuf);
+ state->iobuf = NULL;
+ }
- new_frame->root->op = frame->root->op;
- new_frame->root->type = frame->root->type;
- new_frame->root->trans = state->trans;
- new_frame->root->state = new_state;
+ if (state->dict) {
+ dict_unref (state->dict);
+ state->dict = NULL;
+ }
- new_state->bound_xl = state->bound_xl;
- new_state->trans = transport_ref (state->trans);
- new_state->itable = state->itable;
+ if (state->xdata) {
+ dict_unref (state->xdata);
+ state->xdata = NULL;
+ }
- return new_frame;
-}
+ GF_FREE ((void *)state->volume);
-int32_t
-gf_add_locker (struct _lock_table *table,
- const char *volume,
- loc_t *loc,
- fd_t *fd,
- pid_t pid)
-{
- int32_t ret = -1;
- struct _locker *new = NULL;
- uint8_t dir = 0;
-
- new = CALLOC (1, sizeof (struct _locker));
- if (new == NULL) {
- gf_log ("server", GF_LOG_ERROR,
- "failed to allocate memory for \'struct _locker\'");
- goto out;
- }
- INIT_LIST_HEAD (&new->lockers);
-
- new->volume = strdup (volume);
-
- if (fd == NULL) {
- loc_copy (&new->loc, loc);
- dir = S_ISDIR (new->loc.inode->st_mode);
- } else {
- new->fd = fd_ref (fd);
- dir = S_ISDIR (fd->inode->st_mode);
- }
-
- new->pid = pid;
-
- LOCK (&table->lock);
- {
- if (dir)
- list_add_tail (&new->lockers, &table->dir_lockers);
- else
- list_add_tail (&new->lockers, &table->file_lockers);
- }
- UNLOCK (&table->lock);
-out:
- return ret;
-}
+ GF_FREE ((void *)state->name);
-int32_t
-gf_del_locker (struct _lock_table *table,
- const char *volume,
- loc_t *loc,
- fd_t *fd,
- pid_t pid)
-{
- struct _locker *locker = NULL, *tmp = NULL;
- int32_t ret = 0;
- uint8_t dir = 0;
- struct list_head *head = NULL;
- struct list_head del;
-
- INIT_LIST_HEAD (&del);
-
- if (fd) {
- dir = S_ISDIR (fd->inode->st_mode);
- } else {
- dir = S_ISDIR (loc->inode->st_mode);
- }
-
- LOCK (&table->lock);
- {
- if (dir) {
- head = &table->dir_lockers;
- } else {
- head = &table->file_lockers;
- }
-
- list_for_each_entry_safe (locker, tmp, head, lockers) {
- if (locker->fd &&
- fd &&
- (locker->fd == fd) && (locker->pid == pid)
- && !strcmp (locker->volume, volume)) {
- list_move_tail (&locker->lockers, &del);
- } else if (locker->loc.inode &&
- loc &&
- (locker->loc.inode == loc->inode) &&
- (locker->pid == pid)
- && !strcmp (locker->volume, volume)) {
- list_move_tail (&locker->lockers, &del);
- }
- }
- }
- UNLOCK (&table->lock);
-
- tmp = NULL;
- locker = NULL;
-
- list_for_each_entry_safe (locker, tmp, &del, lockers) {
- list_del_init (&locker->lockers);
- if (locker->fd)
- fd_unref (locker->fd);
- else
- loc_wipe (&locker->loc);
-
- free (locker->volume);
- free (locker);
- }
-
- return ret;
-}
-
-int32_t
-gf_direntry_to_bin (dir_entry_t *head,
- char *buffer)
-{
- dir_entry_t *trav = NULL;
- uint32_t len = 0;
- uint32_t this_len = 0;
- size_t buflen = -1;
- char *ptr = NULL;
- char *tmp_buf = NULL;
-
- trav = head->next;
- while (trav) {
- len += strlen (trav->name);
- len += 1;
- len += strlen (trav->link);
- len += 1; /* for '\n' */
- len += 256; // max possible for statbuf;
- trav = trav->next;
- }
-
- ptr = buffer;
- trav = head->next;
- while (trav) {
- tmp_buf = stat_to_str (&trav->buf);
- /* tmp_buf will have \n before \0 */
-
- this_len = sprintf (ptr, "%s/%s%s\n",
- trav->name, tmp_buf,
- trav->link);
-
- FREE (tmp_buf);
- trav = trav->next;
- ptr += this_len;
- }
-
- buflen = strlen (buffer);
-
- return buflen;
-}
-
-
-static struct _lock_table *
-gf_lock_table_new (void)
-{
- struct _lock_table *new = NULL;
-
- new = CALLOC (1, sizeof (struct _lock_table));
- if (new == NULL) {
- gf_log ("server-protocol", GF_LOG_CRITICAL,
- "failed to allocate memory for new lock table");
- goto out;
- }
- INIT_LIST_HEAD (&new->dir_lockers);
- INIT_LIST_HEAD (&new->file_lockers);
- LOCK_INIT (&new->lock);
-out:
- return new;
-}
-
-
-int
-do_lock_table_cleanup (xlator_t *this, server_connection_t *conn,
- call_frame_t *frame, struct _lock_table *ltable)
-{
- struct list_head file_lockers, dir_lockers;
- call_frame_t *tmp_frame = NULL;
- struct flock flock = {0, };
- xlator_t *bound_xl = NULL;
- struct _locker *locker = NULL, *tmp = NULL;
- int ret = -1;
-
- bound_xl = conn->bound_xl;
- INIT_LIST_HEAD (&file_lockers);
- INIT_LIST_HEAD (&dir_lockers);
-
- LOCK (&ltable->lock);
- {
- list_splice_init (&ltable->file_lockers,
- &file_lockers);
-
- list_splice_init (&ltable->dir_lockers, &dir_lockers);
- }
- UNLOCK (&ltable->lock);
-
- free (ltable);
-
- flock.l_type = F_UNLCK;
- flock.l_start = 0;
- flock.l_len = 0;
- list_for_each_entry_safe (locker,
- tmp, &file_lockers, lockers) {
- tmp_frame = copy_frame (frame);
- if (tmp_frame == NULL) {
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory");
- goto out;
- }
- /*
- pid = 0 is a special case that tells posix-locks
- to release all locks from this transport
- */
- tmp_frame->root->pid = 0;
- tmp_frame->root->trans = conn;
-
- if (locker->fd) {
- STACK_WIND (tmp_frame, server_nop_cbk,
- bound_xl,
- bound_xl->fops->finodelk,
- locker->volume,
- locker->fd, F_SETLK, &flock);
- fd_unref (locker->fd);
- } else {
- STACK_WIND (tmp_frame, server_nop_cbk,
- bound_xl,
- bound_xl->fops->inodelk,
- locker->volume,
- &(locker->loc), F_SETLK, &flock);
- loc_wipe (&locker->loc);
- }
-
- free (locker->volume);
-
- list_del_init (&locker->lockers);
- free (locker);
- }
-
- tmp = NULL;
- locker = NULL;
- list_for_each_entry_safe (locker, tmp, &dir_lockers, lockers) {
- tmp_frame = copy_frame (frame);
-
- tmp_frame->root->pid = 0;
- tmp_frame->root->trans = conn;
-
- if (locker->fd) {
- STACK_WIND (tmp_frame, server_nop_cbk,
- bound_xl,
- bound_xl->fops->fentrylk,
- locker->volume,
- locker->fd, NULL,
- ENTRYLK_UNLOCK, ENTRYLK_WRLCK);
- fd_unref (locker->fd);
- } else {
- STACK_WIND (tmp_frame, server_nop_cbk,
- bound_xl,
- bound_xl->fops->entrylk,
- locker->volume,
- &(locker->loc), NULL,
- ENTRYLK_UNLOCK, ENTRYLK_WRLCK);
- loc_wipe (&locker->loc);
- }
+ server_loc_wipe (&state->loc);
+ server_loc_wipe (&state->loc2);
- free (locker->volume);
-
- list_del_init (&locker->lockers);
- free (locker);
- }
- ret = 0;
+ server_resolve_wipe (&state->resolve);
+ server_resolve_wipe (&state->resolve2);
-out:
- return ret;
+ GF_FREE (state);
}
-static int32_t
-server_connection_cleanup_flush_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+static int
+server_connection_cleanup_flush_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
{
- fd_t *fd = NULL;
+ int32_t ret = -1;
+ fd_t *fd = NULL;
+ client_t *client = NULL;
+
+ GF_VALIDATE_OR_GOTO ("server", this, out);
+ GF_VALIDATE_OR_GOTO ("server", cookie, out);
+ GF_VALIDATE_OR_GOTO ("server", frame, out);
fd = frame->local;
+ client = frame->root->client;
fd_unref (fd);
frame->local = NULL;
+ gf_client_unref (client);
STACK_DESTROY (frame->root);
- return 0;
+
+ ret = 0;
+out:
+ return ret;
}
-int
-do_fd_cleanup (xlator_t *this, server_connection_t *conn, call_frame_t *frame,
- fdentry_t *fdentries, int fd_count)
+static int
+do_fd_cleanup (xlator_t *this, client_t* client, fdentry_t *fdentries, int fd_count)
{
fd_t *fd = NULL;
int i = 0, ret = -1;
call_frame_t *tmp_frame = NULL;
xlator_t *bound_xl = NULL;
-
- bound_xl = conn->bound_xl;
+ char *path = NULL;
+
+ GF_VALIDATE_OR_GOTO ("server", this, out);
+ GF_VALIDATE_OR_GOTO ("server", fdentries, out);
+
+ bound_xl = client->bound_xl;
for (i = 0;i < fd_count; i++) {
fd = fdentries[i].fd;
-
+
if (fd != NULL) {
- tmp_frame = copy_frame (frame);
+ tmp_frame = create_frame (this, this->ctx->pool);
if (tmp_frame == NULL) {
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory");
goto out;
}
+
+ GF_ASSERT (fd->inode);
+
+ ret = inode_path (fd->inode, NULL, &path);
+
+ if (ret > 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "fd cleanup on %s", path);
+ GF_FREE (path);
+ } else {
+
+ gf_log (this->name, GF_LOG_INFO,
+ "fd cleanup on inode with gfid %s",
+ uuid_utoa (fd->inode->gfid));
+ }
+
tmp_frame->local = fd;
-
+
tmp_frame->root->pid = 0;
- tmp_frame->root->trans = conn;
+ gf_client_ref (client);
+ memset (&tmp_frame->root->lk_owner, 0,
+ sizeof (gf_lkowner_t));
+
STACK_WIND (tmp_frame,
server_connection_cleanup_flush_cbk,
- bound_xl,
- bound_xl->fops->flush,
- fd);
+ bound_xl, bound_xl->fops->flush, fd, NULL);
}
}
- FREE (fdentries);
+
+ GF_FREE (fdentries);
ret = 0;
out:
return ret;
}
+
int
-do_connection_cleanup (xlator_t *this, server_connection_t *conn,
- struct _lock_table *ltable, fdentry_t *fdentries, int fd_count)
+server_connection_cleanup (xlator_t *this, client_t *client,
+ int32_t flags)
{
- int32_t ret = 0, saved_ret = 0;
- call_frame_t *frame = NULL;
- server_state_t *state = NULL;
+ server_ctx_t *serv_ctx = NULL;
+ fdentry_t *fdentries = NULL;
+ uint32_t fd_count = 0;
+ int cd_ret = 0;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO (this->name, this, out);
+ GF_VALIDATE_OR_GOTO (this->name, client, out);
+ GF_VALIDATE_OR_GOTO (this->name, flags, out);
+
+ serv_ctx = server_ctx_get (client, client->this);
- frame = create_frame (this, this->ctx->pool);
- if (frame == NULL) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
+ if (serv_ctx == NULL) {
+ gf_log (this->name, GF_LOG_INFO, "server_ctx_get() failed");
goto out;
}
- saved_ret = do_lock_table_cleanup (this, conn, frame, ltable);
-
- if (fdentries != NULL) {
- ret = do_fd_cleanup (this, conn, frame, fdentries, fd_count);
+ LOCK (&serv_ctx->fdtable_lock);
+ {
+ if (serv_ctx->fdtable && (flags & POSIX_LOCKS))
+ fdentries = gf_fd_fdtable_get_all_fds (serv_ctx->fdtable,
+ &fd_count);
}
+ UNLOCK (&serv_ctx->fdtable_lock);
- state = CALL_STATE (frame);
- if (state)
- free (state);
+ if (client->bound_xl == NULL)
+ goto out;
- STACK_DESTROY (frame->root);
+ if (flags & INTERNAL_LOCKS) {
+ cd_ret = gf_client_disconnect (client);
+ }
+
+ if (fdentries != NULL)
+ ret = do_fd_cleanup (this, client, fdentries, fd_count);
+ else
+ gf_log (this->name, GF_LOG_INFO, "no fdentries to clean");
- if (saved_ret || ret) {
+ if (cd_ret || ret)
ret = -1;
+
+out:
+ return ret;
+}
+
+
+static call_frame_t *
+server_alloc_frame (rpcsvc_request_t *req)
+{
+ call_frame_t *frame = NULL;
+ server_state_t *state = NULL;
+ client_t *client = NULL;
+
+ GF_VALIDATE_OR_GOTO ("server", req, out);
+ GF_VALIDATE_OR_GOTO ("server", req->trans, out);
+ GF_VALIDATE_OR_GOTO ("server", req->svc, out);
+ GF_VALIDATE_OR_GOTO ("server", req->svc->ctx, out);
+
+ client = req->trans->xl_private;
+ GF_VALIDATE_OR_GOTO ("server", client, out);
+
+ frame = create_frame (client->this, req->svc->ctx->pool);
+ if (!frame)
+ goto out;
+
+ state = GF_CALLOC (1, sizeof (*state), gf_server_mt_state_t);
+ if (!state)
+ goto out;
+
+ if (client->bound_xl)
+ state->itable = client->bound_xl->itable;
+
+ state->xprt = rpc_transport_ref (req->trans);
+ state->resolve.fd_no = -1;
+ state->resolve2.fd_no = -1;
+
+ frame->root->client = client;
+ frame->root->state = state; /* which socket */
+ frame->root->unique = 0; /* which call */
+
+ frame->this = client->this;
+out:
+ return frame;
+}
+
+
+call_frame_t *
+get_frame_from_request (rpcsvc_request_t *req)
+{
+ call_frame_t *frame = NULL;
+ client_t *client = NULL;
+
+ GF_VALIDATE_OR_GOTO ("server", req, out);
+
+ client = req->trans->xl_private;
+
+ frame = server_alloc_frame (req);
+ if (!frame)
+ goto out;
+
+ frame->root->op = req->procnum;
+
+ frame->root->unique = req->xid;
+
+ frame->root->uid = req->uid;
+ frame->root->gid = req->gid;
+ frame->root->pid = req->pid;
+ gf_client_ref (client);
+ frame->root->client = client;
+ frame->root->lk_owner = req->lk_owner;
+
+ server_decode_groups (frame, req);
+
+ frame->local = req;
+out:
+ return frame;
+}
+
+
+int
+server_build_config (xlator_t *this, server_conf_t *conf)
+{
+ data_t *data = NULL;
+ int ret = -1;
+ struct stat buf = {0,};
+
+ GF_VALIDATE_OR_GOTO ("server", this, out);
+ GF_VALIDATE_OR_GOTO ("server", conf, out);
+
+ ret = dict_get_int32 (this->options, "inode-lru-limit",
+ &conf->inode_lru_limit);
+ if (ret < 0) {
+ conf->inode_lru_limit = 16384;
+ }
+
+ conf->verify_volfile = 1;
+ data = dict_get (this->options, "verify-volfile-checksum");
+ if (data) {
+ ret = gf_string2boolean(data->data, &conf->verify_volfile);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "wrong value for 'verify-volfile-checksum', "
+ "Neglecting option");
+ }
}
-
+
+ data = dict_get (this->options, "trace");
+ if (data) {
+ ret = gf_string2boolean (data->data, &conf->trace);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "'trace' takes on only boolean values. "
+ "Neglecting option");
+ }
+ }
+
+ /* TODO: build_rpc_config (); */
+ ret = dict_get_int32 (this->options, "limits.transaction-size",
+ &conf->rpc_conf.max_block_size);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "defaulting limits.transaction-size to %d",
+ DEFAULT_BLOCK_SIZE);
+ conf->rpc_conf.max_block_size = DEFAULT_BLOCK_SIZE;
+ }
+
+ data = dict_get (this->options, "config-directory");
+ if (data) {
+ /* Check whether the specified directory exists,
+ or directory specified is non standard */
+ ret = stat (data->data, &buf);
+ if ((ret != 0) || !S_ISDIR (buf.st_mode)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Directory '%s' doesn't exist, exiting.",
+ data->data);
+ ret = -1;
+ goto out;
+ }
+ /* Make sure that conf-dir doesn't contain ".." in path */
+ if ((gf_strstr (data->data, "/", "..")) == -1) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: invalid conf_dir", data->data);
+ goto out;
+ }
+
+ conf->conf_dir = gf_strdup (data->data);
+ }
+ ret = 0;
out:
return ret;
}
+
+void
+print_caller (char *str, int size, call_frame_t *frame)
+{
+ server_state_t *state = NULL;
+
+ GF_VALIDATE_OR_GOTO ("server", str, out);
+ GF_VALIDATE_OR_GOTO ("server", frame, out);
+
+ state = CALL_STATE (frame);
+
+ snprintf (str, size,
+ " Callid=%"PRId64", Client=%s",
+ frame->root->unique,
+ state->xprt->peerinfo.identifier);
+
+out:
+ return;
+}
+
+
+void
+server_print_resolve (char *str, int size, server_resolve_t *resolve)
+{
+ int filled = 0;
+
+ GF_VALIDATE_OR_GOTO ("server", str, out);
+
+ if (!resolve) {
+ snprintf (str, size, "<nul>");
+ return;
+ }
+
+ filled += snprintf (str + filled, size - filled,
+ " Resolve={");
+ if (resolve->fd_no != -1)
+ filled += snprintf (str + filled, size - filled,
+ "fd=%"PRId64",", (uint64_t) resolve->fd_no);
+ if (resolve->bname)
+ filled += snprintf (str + filled, size - filled,
+ "bname=%s,", resolve->bname);
+ if (resolve->path)
+ filled += snprintf (str + filled, size - filled,
+ "path=%s", resolve->path);
+
+ snprintf (str + filled, size - filled, "}");
+out:
+ return;
+}
+
+
+void
+server_print_loc (char *str, int size, loc_t *loc)
+{
+ int filled = 0;
+
+ GF_VALIDATE_OR_GOTO ("server", str, out);
+
+ if (!loc) {
+ snprintf (str, size, "<nul>");
+ return;
+ }
+
+ filled += snprintf (str + filled, size - filled,
+ " Loc={");
+
+ if (loc->path)
+ filled += snprintf (str + filled, size - filled,
+ "path=%s,", loc->path);
+ if (loc->inode)
+ filled += snprintf (str + filled, size - filled,
+ "inode=%p,", loc->inode);
+ if (loc->parent)
+ filled += snprintf (str + filled, size - filled,
+ "parent=%p", loc->parent);
+
+ snprintf (str + filled, size - filled, "}");
+out:
+ return;
+}
+
+
+void
+server_print_params (char *str, int size, server_state_t *state)
+{
+ int filled = 0;
+
+ GF_VALIDATE_OR_GOTO ("server", str, out);
+
+ filled += snprintf (str + filled, size - filled,
+ " Params={");
+
+ if (state->fd)
+ filled += snprintf (str + filled, size - filled,
+ "fd=%p,", state->fd);
+ if (state->valid)
+ filled += snprintf (str + filled, size - filled,
+ "valid=%d,", state->valid);
+ if (state->flags)
+ filled += snprintf (str + filled, size - filled,
+ "flags=%d,", state->flags);
+ if (state->wbflags)
+ filled += snprintf (str + filled, size - filled,
+ "wbflags=%d,", state->wbflags);
+ if (state->size)
+ filled += snprintf (str + filled, size - filled,
+ "size=%zu,", state->size);
+ if (state->offset)
+ filled += snprintf (str + filled, size - filled,
+ "offset=%"PRId64",", state->offset);
+ if (state->cmd)
+ filled += snprintf (str + filled, size - filled,
+ "cmd=%d,", state->cmd);
+ if (state->type)
+ filled += snprintf (str + filled, size - filled,
+ "type=%d,", state->type);
+ if (state->name)
+ filled += snprintf (str + filled, size - filled,
+ "name=%s,", state->name);
+ if (state->mask)
+ filled += snprintf (str + filled, size - filled,
+ "mask=%d,", state->mask);
+ if (state->volume)
+ filled += snprintf (str + filled, size - filled,
+ "volume=%s,", state->volume);
+
+/* FIXME
+ snprintf (str + filled, size - filled,
+ "bound_xl=%s}", state->client->bound_xl->name);
+*/
+out:
+ return;
+}
+
+
int
-server_connection_cleanup (xlator_t *this, server_connection_t *conn)
+server_resolve_is_empty (server_resolve_t *resolve)
+{
+ if (resolve->fd_no != -1)
+ return 0;
+
+ if (resolve->path != 0)
+ return 0;
+
+ if (resolve->bname != 0)
+ return 0;
+
+ return 1;
+}
+
+
+void
+server_print_reply (call_frame_t *frame, int op_ret, int op_errno)
+{
+ server_conf_t *conf = NULL;
+ server_state_t *state = NULL;
+ xlator_t *this = NULL;
+ char caller[512];
+ char fdstr[32];
+ char *op = "UNKNOWN";
+
+ GF_VALIDATE_OR_GOTO ("server", frame, out);
+
+ this = frame->this;
+ conf = this->private;
+
+ GF_VALIDATE_OR_GOTO ("server", conf, out);
+ GF_VALIDATE_OR_GOTO ("server", conf->trace, out);
+
+ state = CALL_STATE (frame);
+
+ print_caller (caller, 256, frame);
+
+ switch (frame->root->type) {
+ case GF_OP_TYPE_FOP:
+ op = (char *)gf_fop_list[frame->root->op];
+ break;
+ default:
+ op = "";
+ }
+
+ fdstr[0] = '\0';
+ if (state->fd)
+ snprintf (fdstr, 32, " fd=%p", state->fd);
+
+ gf_log (this->name, GF_LOG_INFO,
+ "%s%s => (%d, %d)%s",
+ op, caller, op_ret, op_errno, fdstr);
+out:
+ return;
+}
+
+
+void
+server_print_request (call_frame_t *frame)
{
- char do_cleanup = 0;
- struct _lock_table *ltable = NULL;
- fdentry_t *fdentries = NULL;
- uint32_t fd_count = 0;
- int ret = 0;
+ server_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+ server_state_t *state = NULL;
+ char *op = "UNKNOWN";
+ char resolve_vars[256];
+ char resolve2_vars[256];
+ char loc_vars[256];
+ char loc2_vars[256];
+ char other_vars[512];
+ char caller[512];
+
+ GF_VALIDATE_OR_GOTO ("server", frame, out);
+
+ this = frame->this;
+ conf = this->private;
- if (conn == NULL) {
+ GF_VALIDATE_OR_GOTO ("server", conf, out);
+
+ if (!conf->trace)
goto out;
+
+ state = CALL_STATE (frame);
+
+ memset (resolve_vars, '\0', 256);
+ memset (resolve2_vars, '\0', 256);
+ memset (loc_vars, '\0', 256);
+ memset (loc2_vars, '\0', 256);
+ memset (other_vars, '\0', 256);
+
+ print_caller (caller, 256, frame);
+
+ if (!server_resolve_is_empty (&state->resolve)) {
+ server_print_resolve (resolve_vars, 256, &state->resolve);
+ server_print_loc (loc_vars, 256, &state->loc);
}
- pthread_mutex_lock (&conn->lock);
- {
- conn->active_transports--;
- if (conn->active_transports == 0) {
- if (conn->ltable) {
- ltable = conn->ltable;
- conn->ltable = gf_lock_table_new ();
- }
-
- if (conn->fdtable) {
- fdentries = gf_fd_fdtable_get_all_fds (conn->fdtable,
- &fd_count);
+ if (!server_resolve_is_empty (&state->resolve2)) {
+ server_print_resolve (resolve2_vars, 256, &state->resolve2);
+ server_print_loc (loc2_vars, 256, &state->loc2);
+ }
+
+ server_print_params (other_vars, 512, state);
+
+ switch (frame->root->type) {
+ case GF_OP_TYPE_FOP:
+ op = (char *)gf_fop_list[frame->root->op];
+ break;
+ default:
+ op = "";
+ break;
+ }
+
+ gf_log (this->name, GF_LOG_INFO,
+ "%s%s%s%s%s%s%s",
+ op, caller,
+ resolve_vars, loc_vars, resolve2_vars, loc2_vars, other_vars);
+out:
+ return;
+}
+
+
+int
+serialize_rsp_direntp (gf_dirent_t *entries, gfs3_readdirp_rsp *rsp)
+{
+ gf_dirent_t *entry = NULL;
+ gfs3_dirplist *trav = NULL;
+ gfs3_dirplist *prev = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("server", entries, out);
+ GF_VALIDATE_OR_GOTO ("server", rsp, out);
+
+ list_for_each_entry (entry, &entries->list, list) {
+ trav = GF_CALLOC (1, sizeof (*trav), gf_server_mt_dirent_rsp_t);
+ if (!trav)
+ goto out;
+
+ trav->d_ino = entry->d_ino;
+ trav->d_off = entry->d_off;
+ trav->d_len = entry->d_len;
+ trav->d_type = entry->d_type;
+ trav->name = entry->d_name;
+
+ gf_stat_from_iatt (&trav->stat, &entry->d_stat);
+
+ /* if 'dict' is present, pack it */
+ if (entry->dict) {
+ trav->dict.dict_len = dict_serialized_length (entry->dict);
+ if (trav->dict.dict_len < 0) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "failed to get serialized length "
+ "of reply dict");
+ errno = EINVAL;
+ trav->dict.dict_len = 0;
+ goto out;
+ }
+
+ trav->dict.dict_val = GF_CALLOC (1, trav->dict.dict_len,
+ gf_server_mt_rsp_buf_t);
+ if (!trav->dict.dict_val) {
+ errno = ENOMEM;
+ trav->dict.dict_len = 0;
+ goto out;
+ }
+
+ ret = dict_serialize (entry->dict, trav->dict.dict_val);
+ if (ret < 0) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "failed to serialize reply dict");
+ errno = -ret;
+ trav->dict.dict_len = 0;
+ goto out;
}
- do_cleanup = 1;
}
+
+ if (prev)
+ prev->nextentry = trav;
+ else
+ rsp->reply = trav;
+
+ prev = trav;
+ trav = NULL;
}
- pthread_mutex_unlock (&conn->lock);
- if (do_cleanup && conn->bound_xl)
- ret = do_connection_cleanup (this, conn, ltable, fdentries, fd_count);
+ ret = 0;
+out:
+ GF_FREE (trav);
+
+ return ret;
+}
+
+
+int
+serialize_rsp_dirent (gf_dirent_t *entries, gfs3_readdir_rsp *rsp)
+{
+ gf_dirent_t *entry = NULL;
+ gfs3_dirlist *trav = NULL;
+ gfs3_dirlist *prev = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("server", entries, out);
+ GF_VALIDATE_OR_GOTO ("server", rsp, out);
+
+ list_for_each_entry (entry, &entries->list, list) {
+ trav = GF_CALLOC (1, sizeof (*trav), gf_server_mt_dirent_rsp_t);
+ if (!trav)
+ goto out;
+ trav->d_ino = entry->d_ino;
+ trav->d_off = entry->d_off;
+ trav->d_len = entry->d_len;
+ trav->d_type = entry->d_type;
+ trav->name = entry->d_name;
+ if (prev)
+ prev->nextentry = trav;
+ else
+ rsp->reply = trav;
+
+ prev = trav;
+ }
+ ret = 0;
out:
return ret;
}
int
-server_connection_destroy (xlator_t *this, server_connection_t *conn)
-{
- call_frame_t *frame = NULL, *tmp_frame = NULL;
- xlator_t *bound_xl = NULL;
- int32_t ret = -1;
- server_state_t *state = NULL;
- struct list_head file_lockers;
- struct list_head dir_lockers;
- struct _lock_table *ltable = NULL;
- struct _locker *locker = NULL, *tmp = NULL;
- struct flock flock = {0,};
- fd_t *fd = NULL;
- int32_t i = 0;
- fdentry_t *fdentries = NULL;
- uint32_t fd_count = 0;
-
- if (conn == NULL) {
- ret = 0;
- goto out;
+readdir_rsp_cleanup (gfs3_readdir_rsp *rsp)
+{
+ gfs3_dirlist *prev = NULL;
+ gfs3_dirlist *trav = NULL;
+
+ trav = rsp->reply;
+ prev = trav;
+ while (trav) {
+ trav = trav->nextentry;
+ GF_FREE (prev);
+ prev = trav;
}
- bound_xl = (xlator_t *) (conn->bound_xl);
-
- if (bound_xl) {
- /* trans will have ref_count = 1 after this call, but its
- ok since this function is called in
- GF_EVENT_TRANSPORT_CLEANUP */
- frame = create_frame (this, this->ctx->pool);
-
- pthread_mutex_lock (&(conn->lock));
- {
- if (conn->ltable) {
- ltable = conn->ltable;
- conn->ltable = NULL;
- }
- }
- pthread_mutex_unlock (&conn->lock);
-
- INIT_LIST_HEAD (&file_lockers);
- INIT_LIST_HEAD (&dir_lockers);
-
- LOCK (&ltable->lock);
- {
- list_splice_init (&ltable->file_lockers,
- &file_lockers);
-
- list_splice_init (&ltable->dir_lockers, &dir_lockers);
- }
- UNLOCK (&ltable->lock);
- free (ltable);
-
- flock.l_type = F_UNLCK;
- flock.l_start = 0;
- flock.l_len = 0;
- list_for_each_entry_safe (locker,
- tmp, &file_lockers, lockers) {
- tmp_frame = copy_frame (frame);
- /*
- pid = 0 is a special case that tells posix-locks
- to release all locks from this transport
- */
- tmp_frame->root->pid = 0;
- tmp_frame->root->trans = conn;
-
- if (locker->fd) {
- STACK_WIND (tmp_frame, server_nop_cbk,
- bound_xl,
- bound_xl->fops->finodelk,
- locker->volume,
- locker->fd, F_SETLK, &flock);
- fd_unref (locker->fd);
- } else {
- STACK_WIND (tmp_frame, server_nop_cbk,
- bound_xl,
- bound_xl->fops->inodelk,
- locker->volume,
- &(locker->loc), F_SETLK, &flock);
- loc_wipe (&locker->loc);
- }
-
- free (locker->volume);
-
- list_del_init (&locker->lockers);
- free (locker);
- }
-
- tmp = NULL;
- locker = NULL;
- list_for_each_entry_safe (locker, tmp, &dir_lockers, lockers) {
- tmp_frame = copy_frame (frame);
-
- tmp_frame->root->pid = 0;
- tmp_frame->root->trans = conn;
-
- if (locker->fd) {
- STACK_WIND (tmp_frame, server_nop_cbk,
- bound_xl,
- bound_xl->fops->fentrylk,
- locker->volume,
- locker->fd, NULL,
- ENTRYLK_UNLOCK, ENTRYLK_WRLCK);
- fd_unref (locker->fd);
- } else {
- STACK_WIND (tmp_frame, server_nop_cbk,
- bound_xl,
- bound_xl->fops->entrylk,
- locker->volume,
- &(locker->loc), NULL,
- ENTRYLK_UNLOCK, ENTRYLK_WRLCK);
- loc_wipe (&locker->loc);
- }
-
- free (locker->volume);
-
- list_del_init (&locker->lockers);
- free (locker);
- }
-
- state = CALL_STATE (frame);
- if (state)
- free (state);
- STACK_DESTROY (frame->root);
-
- pthread_mutex_lock (&(conn->lock));
- {
- if (conn->fdtable) {
- fdentries = gf_fd_fdtable_get_all_fds (conn->fdtable,
- &fd_count);
- gf_fd_fdtable_destroy (conn->fdtable);
- conn->fdtable = NULL;
- }
- }
- pthread_mutex_unlock (&conn->lock);
-
- if (fdentries != NULL) {
- for (i = 0; i < fd_count; i++) {
- fd = fdentries[i].fd;
- if (fd != NULL) {
- tmp_frame = copy_frame (frame);
- tmp_frame->local = fd;
-
- STACK_WIND (tmp_frame,
- server_connection_cleanup_flush_cbk,
- bound_xl,
- bound_xl->fops->flush,
- fd);
- }
+ return 0;
+}
+
+
+int
+readdirp_rsp_cleanup (gfs3_readdirp_rsp *rsp)
+{
+ gfs3_dirplist *prev = NULL;
+ gfs3_dirplist *trav = NULL;
+
+ trav = rsp->reply;
+ prev = trav;
+ while (trav) {
+ trav = trav->nextentry;
+ GF_FREE (prev->dict.dict_val);
+ GF_FREE (prev);
+ prev = trav;
+ }
+
+ return 0;
+}
+
+
+int
+gf_server_check_getxattr_cmd (call_frame_t *frame, const char *key)
+{
+
+ server_conf_t *conf = NULL;
+ rpc_transport_t *xprt = NULL;
+
+ conf = frame->this->private;
+ if (!conf)
+ return 0;
+
+ if (fnmatch ("*list*mount*point*", key, 0) == 0) {
+ /* list all the client protocol connecting to this process */
+ pthread_mutex_lock (&conf->mutex);
+ {
+ list_for_each_entry (xprt, &conf->xprt_list, list) {
+ gf_log ("mount-point-list", GF_LOG_INFO,
+ "%s", xprt->peerinfo.identifier);
}
- FREE (fdentries);
}
- }
+ pthread_mutex_unlock (&conf->mutex);
+ }
+
+ /* Add more options/keys here */
- gf_log (this->name, GF_LOG_INFO, "destroyed connection of %s",
- conn->id);
+ return 0;
+}
- FREE (conn->id);
- FREE (conn);
-out:
- return ret;
+int
+gf_server_check_setxattr_cmd (call_frame_t *frame, dict_t *dict)
+{
+
+ server_conf_t *conf = NULL;
+ rpc_transport_t *xprt = NULL;
+ uint64_t total_read = 0;
+ uint64_t total_write = 0;
+
+ conf = frame->this->private;
+ if (!conf || !dict)
+ return 0;
+
+ if (dict_foreach_fnmatch (dict, "*io*stat*dump",
+ dict_null_foreach_fn, NULL ) > 0) {
+ list_for_each_entry (xprt, &conf->xprt_list, list) {
+ total_read += xprt->total_bytes_read;
+ total_write += xprt->total_bytes_write;
+ }
+ gf_log ("stats", GF_LOG_INFO,
+ "total-read %"PRIu64", total-write %"PRIu64,
+ total_read, total_write);
+ }
+
+ return 0;
}
-server_connection_t *
-server_connection_get (xlator_t *this, const char *id)
+gf_boolean_t
+server_cancel_grace_timer (xlator_t *this, client_t *client)
{
- server_connection_t *conn = NULL;
- server_connection_t *trav = NULL;
- server_conf_t *conf = NULL;
+ server_ctx_t *serv_ctx = NULL;
+ gf_timer_t *timer = NULL;
+ gf_boolean_t cancelled = _gf_false;
+
+ if (!this || !client) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Invalid arguments to cancel connection timer");
+ return cancelled;
+ }
+
+ serv_ctx = server_ctx_get (client, client->this);
+
+ if (serv_ctx == NULL) {
+ gf_log (this->name, GF_LOG_INFO, "server_ctx_get() failed");
+ goto out;
+ }
+
+ LOCK (&serv_ctx->fdtable_lock);
+ {
+ if (serv_ctx->grace_timer) {
+ timer = serv_ctx->grace_timer;
+ serv_ctx->grace_timer = NULL;
+ }
+ }
+ UNLOCK (&serv_ctx->fdtable_lock);
+
+ if (timer) {
+ gf_timer_call_cancel (this->ctx, timer);
+ cancelled = _gf_true;
+ }
+out:
+ return cancelled;
+}
+
+server_ctx_t*
+server_ctx_get (client_t *client, xlator_t *xlator)
+{
+ void *tmp = NULL;
+ server_ctx_t *ctx = NULL;
+
+ client_ctx_get (client, xlator, &tmp);
- conf = this->private;
+ ctx = tmp;
- pthread_mutex_lock (&conf->mutex);
- {
- list_for_each_entry (trav, &conf->conns, list) {
- if (!strcmp (id, trav->id)) {
- conn = trav;
- break;
- }
- }
+ if (ctx != NULL)
+ goto out;
+
+ ctx = GF_CALLOC (1, sizeof (server_ctx_t), gf_server_mt_server_conf_t);
+
+ if (ctx == NULL)
+ goto out;
- if (!conn) {
- conn = (void *) CALLOC (1, sizeof (*conn));
+ /* ctx->lk_version = 0; redundant */
+ ctx->fdtable = gf_fd_fdtable_alloc ();
+
+ if (ctx->fdtable == NULL) {
+ GF_FREE (ctx);
+ ctx = NULL;
+ goto out;
+ }
- conn->id = strdup (id);
- conn->fdtable = gf_fd_fdtable_alloc ();
- conn->ltable = gf_lock_table_new ();
+ LOCK_INIT (&ctx->fdtable_lock);
- pthread_mutex_init (&conn->lock, NULL);
+ if (client_ctx_set (client, xlator, ctx) != 0) {
+ LOCK_DESTROY (&ctx->fdtable_lock);
+ GF_FREE (ctx);
+ ctx = NULL;
+ }
+
+out:
+ return ctx;
+}
+
+int32_t
+gf_barrier_transmit (server_conf_t *conf, gf_barrier_payload_t *payload)
+{
+ gf_barrier_t *barrier = NULL;
+ int32_t ret = -1;
+ client_t *client = NULL;
+ gf_boolean_t lk_heal = _gf_false;
+ call_frame_t *frame = NULL;
+ server_state_t *state = NULL;
+
+ GF_VALIDATE_OR_GOTO ("barrier", conf, out);
+ GF_VALIDATE_OR_GOTO ("barrier", conf->barrier, out);
+ GF_VALIDATE_OR_GOTO ("barrier", payload, out);
+
+ barrier = conf->barrier;
+
+ frame = payload->frame;
+ if (frame) {
+ state = CALL_STATE (frame);
+ frame->local = NULL;
+ client = frame->root->client;
+ }
+ /* currently lk fops are not barrier'ed. This is reflecting code in
+ * server_submit_reply */
+ if (client)
+ lk_heal = ((server_conf_t *) client->this->private)->lk_heal;
+
+ ret = rpcsvc_submit_generic (payload->req, &payload->rsp, 1,
+ payload->payload, payload->payload_count,
+ payload->iobref);
+ iobuf_unref (payload->iob);
+ if (ret == -1) {
+ gf_log_callingfn ("", GF_LOG_ERROR, "Reply submission failed");
+ if (frame && client && !lk_heal) {
+ server_connection_cleanup (frame->this, client,
+ INTERNAL_LOCKS | POSIX_LOCKS);
+ } else {
+ /* TODO: Failure of open(dir), create, inodelk, entrylk
+ or lk fops send failure must be handled specially. */
+ }
+ goto ret;
+ }
- list_add (&conn->list, &conf->conns);
- }
+ ret = 0;
+ret:
+ if (state) {
+ free_state (state);
+ }
- conn->ref++;
- conn->active_transports++;
- }
- pthread_mutex_unlock (&conf->mutex);
+ if (frame) {
+ gf_client_unref (client);
+ STACK_DESTROY (frame->root);
+ }
- return conn;
+ if (payload->free_iobref) {
+ iobref_unref (payload->iobref);
+ }
+out:
+ return ret;
}
+gf_barrier_payload_t *
+gf_barrier_dequeue (gf_barrier_t *barrier)
+{
+ gf_barrier_payload_t *payload = NULL;
+
+ if (!barrier || list_empty (&barrier->queue))
+ return NULL;
+
+ payload = list_entry (barrier->queue.next,
+ gf_barrier_payload_t, list);
+ if (payload) {
+ list_del_init (&payload->list);
+ barrier->cur_size--;
+ }
+
+ return payload;
+}
+
+
+void*
+gf_barrier_dequeue_start (void *data)
+{
+ server_conf_t *conf = NULL;
+ gf_barrier_t *barrier = NULL;
+ gf_barrier_payload_t *payload = NULL;
+
+ conf = (server_conf_t *)data;
+ if (!conf || !conf->barrier)
+ return NULL;
+ barrier = conf->barrier;
+
+ LOCK (&barrier->lock);
+ {
+ while (barrier->cur_size) {
+ payload = gf_barrier_dequeue (barrier);
+ if (payload) {
+ if (gf_barrier_transmit (conf, payload)) {
+ gf_log ("server", GF_LOG_WARNING,
+ "Failed to transmit");
+ }
+ GF_FREE (payload);
+ }
+ }
+ }
+ UNLOCK (&barrier->lock);
+ return NULL;
+}
void
-server_connection_put (xlator_t *this, server_connection_t *conn)
+gf_barrier_timeout (void *data)
{
- server_conf_t *conf = NULL;
- server_connection_t *todel = NULL;
+ server_conf_t *conf = NULL;
+ gf_barrier_t *barrier = NULL;
+ gf_boolean_t need_dequeue = _gf_false;
- if (conn == NULL) {
+ conf = (server_conf_t *)data;
+ if (!conf || !conf->barrier)
goto out;
+ barrier = conf->barrier;
+
+ gf_log ("", GF_LOG_INFO, "barrier timed-out");
+ LOCK (&barrier->lock);
+ {
+ need_dequeue = barrier->on;
+ barrier->on = _gf_false;
+ }
+ UNLOCK (&barrier->lock);
+
+ if (need_dequeue == _gf_true)
+ gf_barrier_dequeue_start (data);
+out:
+ return;
+}
+
+
+int32_t
+gf_barrier_start (xlator_t *this)
+{
+ server_conf_t *conf = NULL;
+ gf_barrier_t *barrier = NULL;
+ int32_t ret = -1;
+ struct timespec time = {0,};
+
+ conf = this->private;
+
+ GF_VALIDATE_OR_GOTO ("server", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, conf, out);
+ GF_VALIDATE_OR_GOTO (this->name, conf->barrier, out);
+
+ barrier = conf->barrier;
+
+ gf_log (this->name, GF_LOG_INFO, "barrier start called");
+ LOCK (&barrier->lock);
+ {
+ /* if barrier is on, reset timer */
+ if (barrier->on == _gf_true) {
+ ret = gf_timer_call_cancel (this->ctx, barrier->timer);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to "
+ "unset timer, failing barrier start");
+ goto unlock;
+ }
+ }
+
+ barrier->on = _gf_true;
+ time.tv_sec = barrier->time_out;
+ time.tv_nsec = 0;
+
+ barrier->timer = gf_timer_call_after (this->ctx, time,
+ gf_barrier_timeout,
+ (void *)conf);
+ if (!barrier->timer) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set "
+ "timer, failing barrier start");
+ barrier->on = _gf_false;
+ }
}
+unlock:
+ UNLOCK (&barrier->lock);
+
+ ret = 0;
+out:
+ return ret;
+}
- conf = this->private;
+int32_t
+gf_barrier_stop (xlator_t *this)
+{
+ server_conf_t *conf = NULL;
+ gf_barrier_t *barrier = NULL;
+ int32_t ret = -1;
+ gf_boolean_t need_dequeue = _gf_false;
- pthread_mutex_lock (&conf->mutex);
- {
- conn->ref--;
+ conf = this->private;
- if (!conn->ref) {
- list_del_init (&conn->list);
- todel = conn;
- }
- }
- pthread_mutex_unlock (&conf->mutex);
+ GF_VALIDATE_OR_GOTO ("server", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, conf, out);
+ GF_VALIDATE_OR_GOTO (this->name, conf->barrier, out);
- if (todel) {
- server_connection_destroy (this, todel);
- }
+ barrier = conf->barrier;
+ gf_log (this->name, GF_LOG_INFO, "barrier stop called");
+ LOCK (&barrier->lock);
+ {
+ need_dequeue = barrier->on;
+ barrier->on = _gf_false;
+ }
+ UNLOCK (&barrier->lock);
+
+ if (need_dequeue == _gf_true) {
+ gf_timer_call_cancel (this->ctx, barrier->timer);
+ ret = gf_thread_create (&conf->barrier_th, NULL,
+ gf_barrier_dequeue_start,
+ conf);
+ if (ret) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Failed to start un-barriering");
+ goto out;
+ }
+ }
+ ret = 0;
out:
- return;
+ return ret;
+}
+
+int32_t
+gf_barrier_fops_configure (xlator_t *this, gf_barrier_t *barrier, char *str)
+{
+ int32_t ret = -1;
+ char *dup_str = NULL;
+ char *str_tok = NULL;
+ char *save_ptr = NULL;
+ uint64_t fops = 0;
+
+ /* by defaul fsync & flush needs to be barriered */
+
+ fops |= 1 << GFS3_OP_FSYNC;
+ fops |= 1 << GFS3_OP_FLUSH;
+
+ if (!str)
+ goto done;
+
+ dup_str = gf_strdup (str);
+ if (!dup_str)
+ goto done;
+
+ str_tok = strtok_r (dup_str, ",", &save_ptr);
+ if (!str_tok)
+ goto done;
+
+ fops = 0;
+ while (str_tok) {
+ if (!strcmp(str_tok, "writev")) {
+ fops |= ((uint64_t)1 << GFS3_OP_WRITE);
+ } else if (!strcmp(str_tok, "fsync")) {
+ fops |= ((uint64_t)1 << GFS3_OP_FSYNC);
+ } else if (!strcmp(str_tok, "read")) {
+ fops |= ((uint64_t)1 << GFS3_OP_READ);
+ } else if (!strcmp(str_tok, "rename")) {
+ fops |= ((uint64_t)1 << GFS3_OP_RENAME);
+ } else if (!strcmp(str_tok, "flush")) {
+ fops |= ((uint64_t)1 << GFS3_OP_FLUSH);
+ } else if (!strcmp(str_tok, "ftruncate")) {
+ fops |= ((uint64_t)1 << GFS3_OP_FTRUNCATE);
+ } else if (!strcmp(str_tok, "fallocate")) {
+ fops |= ((uint64_t)1 << GFS3_OP_FALLOCATE);
+ } else if (!strcmp(str_tok, "rmdir")) {
+ fops |= ((uint64_t)1 << GFS3_OP_RMDIR);
+ } else {
+ gf_log ("barrier", GF_LOG_ERROR,
+ "Invalid barrier fop %s", str_tok);
+ }
+
+ str_tok = strtok_r (NULL, ",", &save_ptr);
+ }
+done:
+ LOCK (&barrier->lock);
+ {
+ barrier->fops = fops;
+ }
+ UNLOCK (&barrier->lock);
+ ret = 0;
+
+ GF_FREE (dup_str);
+ return ret;
+}
+
+void
+gf_barrier_enqueue (gf_barrier_t *barrier, gf_barrier_payload_t *payload)
+{
+ list_add_tail (&payload->list, &barrier->queue);
+ barrier->cur_size++;
+}
+
+gf_barrier_payload_t *
+gf_barrier_payload (rpcsvc_request_t *req, struct iovec *rsp,
+ call_frame_t *frame, struct iovec *payload_orig,
+ int payloadcount, struct iobref *iobref,
+ struct iobuf *iob, gf_boolean_t free_iobref)
+{
+ gf_barrier_payload_t *payload = NULL;
+
+ if (!rsp)
+ return NULL;
+
+ payload = GF_CALLOC (1, sizeof (*payload),1);
+ if (!payload)
+ return NULL;
+
+ INIT_LIST_HEAD (&payload->list);
+
+ payload->req = req;
+ memcpy (&payload->rsp, rsp, sizeof (struct iovec));
+ payload->frame = frame;
+ payload->payload = payload_orig;
+ payload->payload_count = payloadcount;
+ payload->iobref = iobref;
+ payload->iob = iob;
+ payload->free_iobref = free_iobref;
+
+ return payload;
}
diff --git a/xlators/protocol/server/src/server-helpers.h b/xlators/protocol/server/src/server-helpers.h
index 48f5f4a6d..b455aa6df 100644
--- a/xlators/protocol/server/src/server-helpers.h
+++ b/xlators/protocol/server/src/server-helpers.h
@@ -1,77 +1,74 @@
/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ Copyright (c) 2010-2013 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#ifndef __SERVER_HELPERS_H__
-#define __SERVER_HELPERS_H__
-
-#define CALL_STATE(frame) ((server_state_t *)frame->root->state)
+#ifndef _SERVER_HELPERS_H
+#define _SERVER_HELPERS_H
-#define BOUND_XL(frame) ((xlator_t *) CALL_STATE(frame)->bound_xl)
+#include "server.h"
-#define TRANSPORT_FROM_FRAME(frame) ((transport_t *) CALL_STATE(frame)->trans)
+#define CALL_STATE(frame) ((server_state_t *)frame->root->state)
-#define SERVER_CONNECTION(frame) \
- ((server_connection_t *) TRANSPORT_FROM_FRAME(frame)->xl_private)
+#define XPRT_FROM_FRAME(frame) ((rpc_transport_t *) CALL_STATE(frame)->xprt)
-#define SERVER_CONF(frame) \
- ((server_conf_t *)TRANSPORT_FROM_FRAME(frame)->xl->private)
+#define SERVER_CONF(frame) \
+ ((server_conf_t *)XPRT_FROM_FRAME(frame)->this->private)
-#define TRANSPORT_FROM_XLATOR(this) ((((server_conf_t *)this->private))->trans)
+#define XPRT_FROM_XLATOR(this) ((((server_conf_t *)this->private))->listen)
-#define INODE_LRU_LIMIT(this) \
- (((server_conf_t *)(this->private))->inode_lru_limit)
+#define INODE_LRU_LIMIT(this) \
+ (((server_conf_t *)(this->private))->config.inode_lru_limit)
#define IS_ROOT_INODE(inode) (inode == inode->table->root)
#define IS_NOT_ROOT(pathlen) ((pathlen > 2)? 1 : 0)
-int32_t
-server_loc_fill (loc_t *loc,
- server_state_t *state,
- ino_t ino,
- ino_t par,
- const char *name,
- const char *path);
-
-char *
-stat_to_str (struct stat *stbuf);
+#define is_fop_barriered(fops, procnum) (fops & ((uint64_t)1 << procnum))
-call_frame_t *
-server_copy_frame (call_frame_t *frame);
+#define barrier_add_to_queue(barrier) (barrier->on || barrier->cur_size)
void free_state (server_state_t *state);
void server_loc_wipe (loc_t *loc);
-int32_t
-gf_add_locker (struct _lock_table *table, const char *volume,
- loc_t *loc,
- fd_t *fd,
- pid_t pid);
+void
+server_print_request (call_frame_t *frame);
+
+call_frame_t *
+get_frame_from_request (rpcsvc_request_t *req);
+
+int
+server_connection_cleanup (xlator_t *this, struct _client_t *client,
+ int32_t flags);
+
+gf_boolean_t
+server_cancel_grace_timer (xlator_t *this, struct _client_t *client);
+
+int
+server_build_config (xlator_t *this, server_conf_t *conf);
+
+int serialize_rsp_dirent (gf_dirent_t *entries, gfs3_readdir_rsp *rsp);
+int serialize_rsp_direntp (gf_dirent_t *entries, gfs3_readdirp_rsp *rsp);
+int readdirp_rsp_cleanup (gfs3_readdirp_rsp *rsp);
+int readdir_rsp_cleanup (gfs3_readdir_rsp *rsp);
-int32_t
-gf_del_locker (struct _lock_table *table, const char *volume,
- loc_t *loc,
- fd_t *fd,
- pid_t pid);
+server_ctx_t *server_ctx_get (client_t *client, xlator_t *xlator);
-int32_t
-gf_direntry_to_bin (dir_entry_t *head, char *bufferp);
+int32_t gf_barrier_start (xlator_t *this);
+int32_t gf_barrier_stop (xlator_t *this);
+int32_t gf_barrier_fops_configure (xlator_t *this, gf_barrier_t *barrier,
+ char *str);
+void gf_barrier_enqueue (gf_barrier_t *barrier, gf_barrier_payload_t *stub);
+gf_barrier_payload_t *
+gf_barrier_payload (rpcsvc_request_t *req, struct iovec *rsp,
+ call_frame_t *frame, struct iovec *payload,
+ int payloadcount, struct iobref *iobref,
+ struct iobuf *iob, gf_boolean_t free_iobref);
-#endif /* __SERVER_HELPERS_H__ */
+#endif /* !_SERVER_HELPERS_H */
diff --git a/xlators/protocol/server/src/server-mem-types.h b/xlators/protocol/server/src/server-mem-types.h
new file mode 100644
index 000000000..19c3466d3
--- /dev/null
+++ b/xlators/protocol/server/src/server-mem-types.h
@@ -0,0 +1,30 @@
+/*
+ Copyright (c) 2010-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef __SERVER_MEM_TYPES_H__
+#define __SERVER_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_server_mem_types_ {
+ gf_server_mt_server_conf_t = gf_common_mt_end + 1,
+ gf_server_mt_resolv_comp_t,
+ gf_server_mt_state_t,
+ gf_server_mt_locker_t,
+ gf_server_mt_lock_table_t,
+ gf_server_mt_conn_t,
+ gf_server_mt_dirent_rsp_t,
+ gf_server_mt_rsp_buf_t,
+ gf_server_mt_volfile_ctx_t,
+ gf_server_mt_timer_data_t,
+ gf_server_mt_end,
+};
+#endif /* __SERVER_MEM_TYPES_H__ */
diff --git a/xlators/protocol/server/src/server-protocol.c b/xlators/protocol/server/src/server-protocol.c
deleted file mode 100644
index e7660d245..000000000
--- a/xlators/protocol/server/src/server-protocol.c
+++ /dev/null
@@ -1,7989 +0,0 @@
-/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-#include <time.h>
-#include <sys/uio.h>
-#include <sys/resource.h>
-
-#include <libgen.h>
-
-#include "transport.h"
-#include "fnmatch.h"
-#include "xlator.h"
-#include "protocol.h"
-#include "server-protocol.h"
-#include "server-helpers.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "list.h"
-#include "dict.h"
-#include "compat.h"
-#include "compat-errno.h"
-#include "statedump.h"
-
-
-static void
-protocol_server_reply (call_frame_t *frame, int type, int op,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iovec *vector, int count,
- struct iobref *iobref)
-{
- server_state_t *state = NULL;
- xlator_t *bound_xl = NULL;
- transport_t *trans = NULL;
- int ret = 0;
-
- bound_xl = BOUND_XL (frame);
- state = CALL_STATE (frame);
- trans = state->trans;
-
- hdr->callid = hton64 (frame->root->unique);
- hdr->type = hton32 (type);
- hdr->op = hton32 (op);
-
- ret = transport_submit (trans, (char *)hdr, hdrlen, vector,
- count, iobref);
- if (ret < 0) {
- gf_log ("protocol/server", GF_LOG_ERROR,
- "frame %"PRId64": failed to submit. op= %d, type= %d",
- frame->root->unique, op, type);
- }
-
- STACK_DESTROY (frame->root);
-
- if (state)
- free_state (state);
-
-}
-
-
-/*
- * server_fchmod_cbk
- */
-int
-server_fchmod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *stbuf)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_fchmod_rsp_t *rsp = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
- server_state_t *state = NULL;
-
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- if (op_ret == 0) {
- gf_stat_from_stat (&rsp->stat, stbuf);
- } else {
- state = CALL_STATE (frame);
-
- gf_log (this->name, GF_LOG_TRACE,
- "%"PRId64": FCHMOD %"PRId64" (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->fd_no,
- state->fd ? state->fd->inode->ino : 0, op_ret,
- strerror (op_errno));
- }
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_FCHMOD,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-/*
- * server_fchmod
- *
- */
-int
-server_fchmod (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- server_connection_t *conn = NULL;
- gf_fop_fchmod_req_t *req = NULL;
- server_state_t *state = NULL;
-
- conn = SERVER_CONNECTION(frame);
-
- req = gf_param (hdr);
- state = CALL_STATE (frame);
- {
- state->fd_no = ntoh64 (req->fd);
- if (state->fd_no >= 0)
- state->fd = gf_fd_fdptr_get (conn->fdtable,
- state->fd_no);
-
- state->mode = ntoh32 (req->mode);
- }
-
- GF_VALIDATE_OR_GOTO(bound_xl->name, state->fd, fail);
-
- STACK_WIND (frame, server_fchmod_cbk,
- BOUND_XL(frame),
- BOUND_XL(frame)->fops->fchmod,
- state->fd, state->mode);
-
- return 0;
-fail:
- server_fchmod_cbk (frame, NULL, frame->this, -1, EINVAL, NULL);
- return 0;
-}
-
-
-/*
- * server_fchown_cbk
- */
-int
-server_fchown_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *stbuf)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_fchown_rsp_t *rsp = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
- server_state_t *state = NULL;
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- if (op_ret == 0) {
- gf_stat_from_stat (&rsp->stat, stbuf);
- } else {
- state = CALL_STATE(frame);
-
- gf_log (this->name, GF_LOG_TRACE,
- "%"PRId64": FCHOWN %"PRId64" (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->fd_no,
- state->fd ? state->fd->inode->ino : 0, op_ret,
- strerror (op_errno));
- }
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_FCHOWN,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-/*
- * server_fchown
- *
- */
-int
-server_fchown (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- server_connection_t *conn = NULL;
- gf_fop_fchown_req_t *req = NULL;
- server_state_t *state = NULL;
-
- conn = SERVER_CONNECTION(frame);
-
- req = gf_param (hdr);
- state = CALL_STATE(frame);
- {
- state->fd_no = ntoh64 (req->fd);
- if (state->fd_no >= 0)
- state->fd = gf_fd_fdptr_get (conn->fdtable,
- state->fd_no);
-
- state->uid = ntoh32 (req->uid);
- state->gid = ntoh32 (req->gid);
- }
-
- GF_VALIDATE_OR_GOTO(bound_xl->name, state->fd, fail);
-
- STACK_WIND (frame, server_fchown_cbk,
- BOUND_XL(frame),
- BOUND_XL(frame)->fops->fchown,
- state->fd, state->uid, state->gid);
-
- return 0;
-fail:
- server_fchown_cbk (frame, NULL, frame->this, -1, EINVAL, NULL);
- return 0;
-}
-
-/*
- * server_setdents_cbk - writedir callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret: return value
- * @op_errno: errno
- *
- * not for external reference
- */
-int
-server_setdents_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_setdents_rsp_t *rsp = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_SETDENTS,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-/*
- * server_lk_cbk - lk callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret:
- * @op_errno:
- * @lock:
- *
- * not for external reference
- */
-int
-server_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct flock *lock)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_lk_rsp_t *rsp = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
- server_state_t *state = NULL;
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- if (op_ret == 0) {
- gf_flock_from_flock (&rsp->flock, lock);
- } else if (op_errno != ENOSYS) {
- state = CALL_STATE(frame);
-
- gf_log (this->name, GF_LOG_TRACE,
- "%"PRId64": LK %"PRId64" (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->fd_no,
- state->fd ? state->fd->inode->ino : 0, op_ret,
- strerror (op_errno));
- }
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_LK,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-
-int
-server_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- server_connection_t *conn = NULL;
- gf_hdr_common_t *hdr = NULL;
- gf_fop_inodelk_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
-
- conn = SERVER_CONNECTION(frame);
-
- state = CALL_STATE(frame);
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- if (op_ret >= 0) {
- if (state->flock.l_type == F_UNLCK)
- gf_del_locker (conn->ltable, state->volume,
- &state->loc, NULL, frame->root->pid);
- else
- gf_add_locker (conn->ltable, state->volume,
- &state->loc, NULL, frame->root->pid);
- } else if (op_errno != ENOSYS) {
- gf_log (this->name, GF_LOG_TRACE,
- "%"PRId64": INODELK %s (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->loc.path,
- state->loc.inode ? state->loc.inode->ino : 0, op_ret,
- strerror (op_errno));
- }
-
- server_loc_wipe (&state->loc);
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_INODELK,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-
-int
-server_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- server_connection_t *conn = NULL;
- gf_hdr_common_t *hdr = NULL;
- gf_fop_finodelk_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
-
- conn = SERVER_CONNECTION(frame);
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- state = CALL_STATE(frame);
-
- if (op_ret >= 0) {
- if (state->flock.l_type == F_UNLCK)
- gf_del_locker (conn->ltable, state->volume,
- NULL, state->fd,
- frame->root->pid);
- else
- gf_add_locker (conn->ltable, state->volume,
- NULL, state->fd,
- frame->root->pid);
- } else if (op_errno != ENOSYS) {
- gf_log (this->name, GF_LOG_TRACE,
- "%"PRId64": FINODELK %"PRId64" (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->fd_no,
- state->fd ? state->fd->inode->ino : 0, op_ret,
- strerror (op_errno));
- }
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_FINODELK,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-
-/*
- * server_entrylk_cbk -
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret:
- * @op_errno:
- * @lock:
- *
- * not for external reference
- */
-int
-server_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- server_connection_t *conn = NULL;
- gf_hdr_common_t *hdr = NULL;
- gf_fop_entrylk_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
-
- conn = SERVER_CONNECTION(frame);
-
- state = CALL_STATE(frame);
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- if (op_ret >= 0) {
- if (state->cmd == ENTRYLK_UNLOCK)
- gf_del_locker (conn->ltable, state->volume,
- &state->loc, NULL, frame->root->pid);
- else
- gf_add_locker (conn->ltable, state->volume,
- &state->loc, NULL, frame->root->pid);
- } else if (op_errno != ENOSYS) {
- gf_log (this->name, GF_LOG_TRACE,
- "%"PRId64": INODELK %s (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->loc.path,
- state->loc.inode ? state->loc.inode->ino : 0, op_ret,
- strerror (op_errno));
- }
-
- server_loc_wipe (&state->loc);
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_ENTRYLK,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-
-int
-server_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- server_connection_t *conn = NULL;
- gf_hdr_common_t *hdr = NULL;
- gf_fop_fentrylk_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
-
- conn = SERVER_CONNECTION(frame);
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- if (op_ret >= 0) {
- state = CALL_STATE(frame);
- if (state->cmd == ENTRYLK_UNLOCK)
- gf_del_locker (conn->ltable, state->volume,
- NULL, state->fd, frame->root->pid);
- else
- gf_add_locker (conn->ltable, state->volume,
- NULL, state->fd, frame->root->pid);
- } else if (op_errno != ENOSYS) {
- gf_log (this->name, GF_LOG_TRACE,
- "%"PRId64": FENTRYLK %"PRId64" (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->fd_no,
- state->fd ? state->fd->inode->ino : 0, op_ret,
- strerror (op_errno));
- }
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_FENTRYLK,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-
-/*
- * server_access_cbk - access callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret:
- * @op_errno:
- *
- * not for external reference
- */
-int
-server_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_access_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
-
- state = CALL_STATE(frame);
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- server_loc_wipe (&(state->loc));
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_ACCESS,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-/*
- * server_utimens_cbk - utimens callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret:
- * @op_errno:
- * @stbuf:
- *
- * not for external reference
- */
-int
-server_utimens_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *stbuf)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_utimens_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
-
-
- state = CALL_STATE(frame);
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- if (op_ret == 0)
- gf_stat_from_stat (&rsp->stat, stbuf);
-
- server_loc_wipe (&(state->loc));
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_UTIMENS,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-/*
- * server_chmod_cbk - chmod callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret:
- * @op_errno:
- * @stbuf:
- *
- * not for external reference
- */
-int
-server_chmod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *stbuf)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_chmod_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
-
- state = CALL_STATE(frame);
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- if (op_ret == 0)
- gf_stat_from_stat (&rsp->stat, stbuf);
-
- server_loc_wipe (&(state->loc));
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_CHMOD,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-/*
- * server_chown_cbk - chown callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret:
- * @op_errno:
- * @stbuf:
- *
- * not for external reference
- */
-int
-server_chown_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *stbuf)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_chown_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- int32_t gf_errno = 0;
- size_t hdrlen = 0;
-
- state = CALL_STATE(frame);
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- if (op_ret == 0)
- gf_stat_from_stat (&rsp->stat, stbuf);
-
- server_loc_wipe (&(state->loc));
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_CHOWN,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-/*
- * server_rmdir_cbk - rmdir callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret:
- * @op_errno:
- *
- * not for external reference
- */
-int
-server_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_rmdir_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- int32_t gf_errno = 0;
- size_t hdrlen = 0;
-
- state = CALL_STATE(frame);
-
- if (op_ret == 0) {
- inode_unlink (state->loc.inode, state->loc.parent,
- state->loc.name);
- inode_forget (state->loc.inode, 0);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "%"PRId64": RMDIR %s (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->loc.path,
- state->loc.inode ? state->loc.inode->ino : 0,
- op_ret, strerror (op_errno));
- }
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- server_loc_wipe (&(state->loc));
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_RMDIR,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-/*
- * server_mkdir_cbk - mkdir callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret:
- * @op_errno:
- * @stbuf:
- *
- * not for external reference
- */
-int
-server_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct stat *stbuf)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_mkdir_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
-
- state = CALL_STATE(frame);
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- if (op_ret >= 0) {
- gf_stat_from_stat (&rsp->stat, stbuf);
- inode_link (inode, state->loc.parent, state->loc.name, stbuf);
- inode_lookup (inode);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "%"PRId64": MKDIR %s ==> %"PRId32" (%s)",
- frame->root->unique, state->loc.path,
- op_ret, strerror (op_errno));
- }
-
- server_loc_wipe (&(state->loc));
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_MKDIR,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-/*
- * server_mknod_cbk - mknod callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret:
- * @op_errno:
- * @stbuf:
- *
- * not for external reference
- */
-int
-server_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct stat *stbuf)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_mknod_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- int32_t gf_errno = 0;
- size_t hdrlen = 0;
-
- state = CALL_STATE(frame);
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- if (op_ret >= 0) {
- gf_stat_from_stat (&rsp->stat, stbuf);
- inode_link (inode, state->loc.parent, state->loc.name, stbuf);
- inode_lookup (inode);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "%"PRId64": MKNOD %s ==> %"PRId32" (%s)",
- frame->root->unique, state->loc.path,
- op_ret, strerror (op_errno));
- }
-
- server_loc_wipe (&(state->loc));
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_MKNOD,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-/*
- * server_fsyncdir_cbk - fsyncdir callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret:
- * @op_errno:
- *
- * not for external reference
- */
-int
-server_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_fsyncdir_rsp_t *rsp = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
- server_state_t *state = NULL;
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- if (op_ret < 0) {
- state = CALL_STATE(frame);
-
- gf_log (this->name, GF_LOG_TRACE,
- "%"PRId64": FSYNCDIR %"PRId64" (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->fd_no,
- state->fd ? state->fd->inode->ino : 0, op_ret,
- strerror (op_errno));
- }
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_FSYNCDIR,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-
-/*
- * server_getdents_cbk - readdir callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret: return value
- * @op_errno: errno
- * @entries:
- * @count:
- *
- * not for external reference
- */
-int
-server_getdents_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dir_entry_t *entries,
- int32_t count)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_getdents_rsp_t *rsp = NULL;
- size_t hdrlen = 0;
- int32_t vec_count = 0;
- int32_t gf_errno = 0;
- struct iobref *iobref = NULL;
- struct iobuf *iobuf = NULL;
- size_t buflen = 0;
- struct iovec vector[1];
- server_state_t *state = NULL;
-
- state = CALL_STATE(frame);
-
- if (op_ret >= 0) {
- iobuf = iobuf_get (this->ctx->iobuf_pool);
- if (!iobuf) {
- op_ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
-
- buflen = gf_direntry_to_bin (entries, iobuf->ptr);
- if (buflen < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "fd - %"PRId64" (%"PRId64"): failed to convert "
- "entries list to string buffer",
- state->fd_no, state->fd->inode->ino);
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
-
- iobref = iobref_new ();
- if (iobref == NULL) {
- gf_log (this->name, GF_LOG_ERROR,
- "fd - %"PRId64" (%"PRId64"): failed to get iobref",
- state->fd_no, state->fd->inode->ino);
- op_ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
-
- iobref_add (iobref, iobuf);
-
- vector[0].iov_base = iobuf->ptr;
- vector[0].iov_len = buflen;
- vec_count = 1;
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "%"PRId64": GETDENTS %"PRId64" (%"PRId64"): %"PRId32" (%s)",
- frame->root->unique,
- state->fd_no,
- state->fd ? state->fd->inode->ino : 0,
- op_ret, strerror (op_errno));
- vector[0].iov_base = NULL;
- vector[0].iov_len = 0;
- }
-
-out:
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- rsp->count = hton32 (count);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_GETDENTS,
- hdr, hdrlen, vector, vec_count, iobref);
-
- if (iobref)
- iobref_unref (iobref);
- if (iobuf)
- iobuf_unref (iobuf);
-
- return 0;
-}
-
-
-/*
- * server_readdir_cbk - getdents callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret:
- * @op_errno:
- *
- * not for external reference
- */
-int
-server_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_readdir_rsp_t *rsp = NULL;
- size_t hdrlen = 0;
- size_t buf_size = 0;
- int32_t gf_errno = 0;
- server_state_t *state = NULL;
-
- if (op_ret > 0)
- buf_size = gf_dirent_serialize (entries, NULL, 0);
-
- hdrlen = gf_hdr_len (rsp, buf_size);
- hdr = gf_hdr_new (rsp, buf_size);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- if (op_ret > 0) {
- rsp->size = hton32 (buf_size);
- gf_dirent_serialize (entries, rsp->buf, buf_size);
- } else {
- state = CALL_STATE(frame);
-
- gf_log (this->name, GF_LOG_TRACE,
- "%"PRId64": READDIR %"PRId64" (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->fd_no,
- state->fd ? state->fd->inode->ino : 0, op_ret,
- strerror (op_errno));
- }
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_READDIR,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-
-/*
- * server_releasedir_cbk - releasedir callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret: return value
- * @op_errno: errno
- *
- * not for external reference
- */
-int
-server_releasedir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_cbk_releasedir_rsp_t *rsp = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- protocol_server_reply (frame, GF_OP_TYPE_CBK_REPLY, GF_CBK_RELEASEDIR,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-
-/*
- * server_opendir_cbk - opendir callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret: return value
- * @op_errno: errno
- * @fd: file descriptor structure of opened directory
- *
- * not for external reference
- */
-int
-server_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
-{
- server_connection_t *conn = NULL;
- gf_hdr_common_t *hdr = NULL;
- gf_fop_opendir_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
-
- conn = SERVER_CONNECTION (frame);
-
- state = CALL_STATE (frame);
-
- if (op_ret >= 0) {
- fd_bind (fd);
-
- state->fd_no = gf_fd_unused_get (conn->fdtable, fd);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "%"PRId64": OPENDIR %s (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->loc.path,
- state->loc.inode ? state->loc.inode->ino : 0,
- op_ret, strerror (op_errno));
-
- /* NOTE: corresponding to fd_create()'s ref */
- if (state->fd)
- fd_unref (state->fd);
- }
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
- rsp->fd = hton64 (state->fd_no);
-
- server_loc_wipe (&(state->loc));
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_OPENDIR,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-/*
- * server_statfs_cbk - statfs callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret: return value
- * @op_errno: errno
- * @buf:
- *
- * not for external reference
- */
-int
-server_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *buf)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_statfs_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
-
- state = CALL_STATE (frame);
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- if (op_ret >= 0) {
- gf_statfs_from_statfs (&rsp->statfs, buf);
- }
-
- server_loc_wipe (&(state->loc));
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_STATFS,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-/*
- * server_removexattr_cbk - removexattr callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret: return value
- * @op_errno: errno
- *
- * not for external reference
- */
-int
-server_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_removexattr_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
-
- state = CALL_STATE (frame);
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- server_loc_wipe (&(state->loc));
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_REMOVEXATTR,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-/*
- * server_getxattr_cbk - getxattr callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret: return value
- * @op_errno: errno
- * @value:
- *
- * not for external reference
- */
-int
-server_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_getxattr_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- size_t hdrlen = 0;
- int32_t len = 0;
- int32_t gf_errno = 0;
- int32_t ret = -1;
-
- state = CALL_STATE (frame);
-
- if (op_ret >= 0) {
- len = dict_serialized_length (dict);
- if (len < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s (%"PRId64"): failed to get serialized length of "
- "reply dict",
- state->loc.path, state->ino);
- op_ret = -1;
- op_errno = EINVAL;
- len = 0;
- }
- }
-
- hdrlen = gf_hdr_len (rsp, len + 1);
- hdr = gf_hdr_new (rsp, len + 1);
- rsp = gf_param (hdr);
-
- if (op_ret >= 0) {
- ret = dict_serialize (dict, rsp->dict);
- if (len < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s (%"PRId64"): failed to serialize reply dict",
- state->loc.path, state->ino);
- op_ret = -1;
- op_errno = -ret;
- }
- }
- rsp->dict_len = hton32 (len);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
-
- server_loc_wipe (&(state->loc));
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_GETXATTR,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-
-int
-server_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_fgetxattr_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- size_t hdrlen = 0;
- int32_t len = 0;
- int32_t gf_errno = 0;
- int32_t ret = -1;
-
- state = CALL_STATE (frame);
-
- if (op_ret >= 0) {
- len = dict_serialized_length (dict);
- if (len < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s (%"PRId64"): failed to get serialized length of "
- "reply dict",
- state->loc.path, state->ino);
- op_ret = -1;
- op_errno = EINVAL;
- len = 0;
- }
- }
-
- hdrlen = gf_hdr_len (rsp, len + 1);
- hdr = gf_hdr_new (rsp, len + 1);
- rsp = gf_param (hdr);
-
- if (op_ret >= 0) {
- ret = dict_serialize (dict, rsp->dict);
- if (len < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s (%"PRId64"): failed to serialize reply dict",
- state->loc.path, state->ino);
- op_ret = -1;
- op_errno = -ret;
- }
- }
- rsp->dict_len = hton32 (len);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- fd_unref (state->fd);
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_FGETXATTR,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-
-/*
- * server_setxattr_cbk - setxattr callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret: return value
- * @op_errno: errno
- *
- * not for external reference
- */
-int
-server_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_setxattr_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
-
- state = CALL_STATE (frame);
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- server_loc_wipe (&(state->loc));
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_SETXATTR,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-
-int
-server_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_fsetxattr_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
-
- state = CALL_STATE(frame);
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_FSETXATTR,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-
-/*
- * server_rename_cbk - rename callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret: return value
- * @op_errno: errno
- *
- * not for external reference
- */
-int
-server_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *stbuf)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_rename_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
-
- state = CALL_STATE(frame);
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- if (op_ret == 0) {
- stbuf->st_ino = state->loc.inode->ino;
- stbuf->st_mode = state->loc.inode->st_mode;
-
- gf_log (state->bound_xl->name, GF_LOG_TRACE,
- "%"PRId64": RENAME_CBK (%"PRId64") %"PRId64"/%s "
- "==> %"PRId64"/%s",
- frame->root->unique, state->loc.inode->ino,
- state->loc.parent->ino, state->loc.name,
- state->loc2.parent->ino, state->loc2.name);
-
- inode_rename (state->itable,
- state->loc.parent, state->loc.name,
- state->loc2.parent, state->loc2.name,
- state->loc.inode, stbuf);
- gf_stat_from_stat (&rsp->stat, stbuf);
- }
-
- server_loc_wipe (&(state->loc));
- server_loc_wipe (&(state->loc2));
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_RENAME,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-
-/*
- * server_unlink_cbk - unlink callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret: return value
- * @op_errno: errno
- *
- * not for external reference
- */
-int
-server_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_unlink_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
-
- state = CALL_STATE(frame);
-
- if (op_ret == 0) {
- gf_log (state->bound_xl->name, GF_LOG_TRACE,
- "%"PRId64": UNLINK_CBK %"PRId64"/%s (%"PRId64")",
- frame->root->unique, state->loc.parent->ino,
- state->loc.name, state->loc.inode->ino);
-
- inode_unlink (state->loc.inode, state->loc.parent,
- state->loc.name);
-
- inode_forget (state->loc.inode, 0);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "%"PRId64": UNLINK %s (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->loc.path,
- state->loc.inode ? state->loc.inode->ino : 0,
- op_ret, strerror (op_errno));
- }
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- server_loc_wipe (&(state->loc));
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_UNLINK,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-/*
- * server_symlink_cbk - symlink callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret: return value
- * @op_errno: errno
- *
- * not for external reference
- */
-int
-server_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct stat *stbuf)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_symlink_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
-
- state = CALL_STATE(frame);
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno_to_error (op_errno));
-
- if (op_ret >= 0) {
- gf_stat_from_stat (&rsp->stat, stbuf);
- inode_link (inode, state->loc.parent, state->loc.name, stbuf);
- inode_lookup (inode);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "%"PRId64": SYMLINK %s (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->loc.path,
- state->loc.inode ? state->loc.inode->ino : 0,
- op_ret, strerror (op_errno));
- }
-
- server_loc_wipe (&(state->loc));
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_SYMLINK,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-
-/*
- * server_link_cbk - link callback for server protocol
- * @frame: call frame
- * @this:
- * @op_ret:
- * @op_errno:
- * @stbuf:
- *
- * not for external reference
- */
-int
-server_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct stat *stbuf)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_link_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- int32_t gf_errno = 0;
- size_t hdrlen = 0;
-
- state = CALL_STATE(frame);
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- if (op_ret == 0) {
- stbuf->st_ino = state->loc.inode->ino;
- gf_stat_from_stat (&rsp->stat, stbuf);
- gf_log (state->bound_xl->name, GF_LOG_TRACE,
- "%"PRId64": LINK (%"PRId64") %"PRId64"/%s ==> %"PRId64"/%s",
- frame->root->unique, inode->ino,
- state->loc2.parent->ino,
- state->loc2.name, state->loc.parent->ino,
- state->loc.name);
-
- inode_link (inode, state->loc2.parent,
- state->loc2.name, stbuf);
- } else {
- gf_log (state->bound_xl->name, GF_LOG_DEBUG,
- "%"PRId64": LINK (%"PRId64") %"PRId64"/%s ==> %"PRId64"/%s "
- " ==> %"PRId32" (%s)",
- frame->root->unique, inode->ino,
- state->loc2.parent->ino,
- state->loc2.name, state->loc.parent->ino,
- state->loc.name,
- op_ret, strerror (op_errno));
- }
-
- server_loc_wipe (&(state->loc));
- server_loc_wipe (&(state->loc2));
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_LINK,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-
-/*
- * server_truncate_cbk - truncate callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret:
- * @op_errno:
- * @stbuf:
- *
- * not for external reference
- */
-int
-server_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *stbuf)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_truncate_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
-
- state = CALL_STATE (frame);
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- if (op_ret == 0) {
- gf_stat_from_stat (&rsp->stat, stbuf);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "%"PRId64": TRUNCATE %s (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->loc.path,
- state->loc.inode ? state->loc.inode->ino : 0,
- op_ret, strerror (op_errno));
- }
-
- server_loc_wipe (&(state->loc));
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_TRUNCATE,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-/*
- * server_fstat_cbk - fstat callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret:
- * @op_errno:
- * @stbuf:
- *
- * not for external reference
- */
-int
-server_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *stbuf)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_fstat_rsp_t *rsp = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
- server_state_t *state = NULL;
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- if (op_ret == 0) {
- gf_stat_from_stat (&rsp->stat, stbuf);
- } else {
- state = CALL_STATE(frame);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "%"PRId64": FSTAT %"PRId64" (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->fd_no,
- state->fd ? state->fd->inode->ino : 0, op_ret,
- strerror (op_errno));
- }
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_FSTAT,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-/*
- * server_ftruncate_cbk - ftruncate callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret:
- * @op_errno:
- * @stbuf:
- *
- * not for external reference
- */
-int
-server_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *stbuf)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_ftruncate_rsp_t *rsp = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
- server_state_t *state = NULL;
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- if (op_ret == 0) {
- gf_stat_from_stat (&rsp->stat, stbuf);
- } else {
- state = CALL_STATE (frame);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "%"PRId64": FTRUNCATE %"PRId64" (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->fd_no,
- state->fd ? state->fd->inode->ino : 0, op_ret,
- strerror (op_errno));
- }
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_FTRUNCATE,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-
-/*
- * server_flush_cbk - flush callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret:
- * @op_errno:
- *
- * not for external reference
- */
-int
-server_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_flush_rsp_t *rsp = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
- server_state_t *state = NULL;
-
- if (op_ret < 0) {
- state = CALL_STATE(frame);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "%"PRId64": FLUSH %"PRId64" (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->fd_no,
- state->fd ? state->fd->inode->ino : 0, op_ret,
- strerror (op_errno));
- }
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_FLUSH,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-/*
- * server_fsync_cbk - fsync callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret:
- * @op_errno:
- *
- * not for external reference
- */
-int
-server_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_fsync_rsp_t *rsp = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
- server_state_t *state = NULL;
-
- if (op_ret < 0) {
- state = CALL_STATE(frame);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "%"PRId64": FSYNC %"PRId64" (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->fd_no,
- state->fd ? state->fd->inode->ino : 0, op_ret,
- strerror (op_errno));
- }
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_FSYNC,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-/*
- * server_release_cbk - rleease callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret:
- * @op_errno:
- *
- * not for external reference
- */
-int
-server_release_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_cbk_release_rsp_t *rsp = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- protocol_server_reply (frame, GF_OP_TYPE_CBK_REPLY, GF_CBK_RELEASE,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-
-/*
- * server_writev_cbk - writev callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret:
- * @op_errno:
- *
- * not for external reference
- */
-
-int
-server_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *stbuf)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_write_rsp_t *rsp = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
- server_state_t *state = NULL;
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno_to_error (op_errno));
-
- if (op_ret >= 0) {
- gf_stat_from_stat (&rsp->stat, stbuf);
- } else {
- state = CALL_STATE(frame);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "%"PRId64": WRITEV %"PRId64" (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->fd_no,
- state->fd ? state->fd->inode->ino : 0, op_ret,
- strerror (op_errno));
- }
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_WRITE,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-
-/*
- * server_readv_cbk - readv callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret:
- * @op_errno:
- * @vector:
- * @count:
- *
- * not for external reference
- */
-int
-server_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iovec *vector, int32_t count,
- struct stat *stbuf, struct iobref *iobref)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_read_rsp_t *rsp = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
- server_state_t *state = NULL;
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- if (op_ret >= 0) {
- gf_stat_from_stat (&rsp->stat, stbuf);
- } else {
- state = CALL_STATE(frame);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "%"PRId64": READV %"PRId64" (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->fd_no,
- state->fd ? state->fd->inode->ino : 0, op_ret,
- strerror (op_errno));
- }
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_READ,
- hdr, hdrlen, vector, count, iobref);
-
- return 0;
-}
-
-
-/*
- * server_open_cbk - open callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret:
- * @op_errno:
- * @fd:
- *
- * not for external reference
- */
-int
-server_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
-{
- server_connection_t *conn = NULL;
- gf_hdr_common_t *hdr = NULL;
- gf_fop_open_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
-
- conn = SERVER_CONNECTION (frame);
-
- state = CALL_STATE (frame);
-
- if (op_ret >= 0) {
- fd_bind (fd);
-
- state->fd_no = gf_fd_unused_get (conn->fdtable, fd);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "%"PRId64": OPEN %s (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->loc.path,
- state->loc.inode ? state->loc.inode->ino : 0,
- op_ret, strerror (op_errno));
-
- /* NOTE: corresponding to fd_create()'s ref */
- if (state->fd)
- fd_unref (state->fd);
- }
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
- rsp->fd = hton64 (state->fd_no);
-
- server_loc_wipe (&(state->loc));
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_OPEN,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-
-/*
- * server_create_cbk - create callback for server
- * @frame: call frame
- * @cookie:
- * @this: translator structure
- * @op_ret:
- * @op_errno:
- * @fd: file descriptor
- * @inode: inode structure
- * @stbuf: struct stat of created file
- *
- * not for external reference
- */
-int
-server_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- fd_t *fd, inode_t *inode, struct stat *stbuf)
-{
- server_connection_t *conn = NULL;
- gf_hdr_common_t *hdr = NULL;
- gf_fop_create_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
-
- conn = SERVER_CONNECTION (frame);
-
- state = CALL_STATE (frame);
-
- if (op_ret >= 0) {
- gf_log (state->bound_xl->name, GF_LOG_TRACE,
- "%"PRId64": CREATE %"PRId64"/%s (%"PRId64")",
- frame->root->unique, state->loc.parent->ino,
- state->loc.name, stbuf->st_ino);
-
- inode_link (inode, state->loc.parent, state->loc.name, stbuf);
- inode_lookup (inode);
-
- fd_bind (fd);
-
- state->fd_no = gf_fd_unused_get (conn->fdtable, fd);
-
- if ((state->fd_no < 0) || (fd == 0)) {
- op_ret = state->fd_no;
- op_errno = errno;
- }
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "%"PRId64": CREATE %s (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->loc.path,
- state->loc.inode ? state->loc.inode->ino : 0,
- op_ret, strerror (op_errno));
-
- /* NOTE: corresponding to fd_create()'s ref */
- if (state->fd)
- fd_unref (state->fd);
-
- }
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
- rsp->fd = hton64 (state->fd_no);
-
- if (op_ret >= 0)
- gf_stat_from_stat (&rsp->stat, stbuf);
-
- server_loc_wipe (&(state->loc));
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_CREATE,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-/*
- * server_readlink_cbk - readlink callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret:
- * @op_errno:
- * @buf:
- *
- * not for external reference
- */
-int
-server_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, const char *buf)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_readlink_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- size_t hdrlen = 0;
- size_t linklen = 0;
- int32_t gf_errno = 0;
-
- state = CALL_STATE(frame);
-
- if (op_ret >= 0) {
- linklen = strlen (buf) + 1;
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "%"PRId64": READLINK %s (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->loc.path,
- state->loc.inode ? state->loc.inode->ino : 0,
- op_ret, strerror (op_errno));
- }
-
- hdrlen = gf_hdr_len (rsp, linklen);
- hdr = gf_hdr_new (rsp, linklen);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno_to_error (op_errno));
-
- if (op_ret >= 0)
- strcpy (rsp->path, buf);
-
- server_loc_wipe (&(state->loc));
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_READLINK,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-/*
- * server_stat_cbk - stat callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret:
- * @op_errno:
- * @stbuf:
- *
- * not for external reference
- */
-int
-server_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct stat *stbuf)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_stat_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
-
- state = CALL_STATE (frame);
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno_to_error (op_errno));
-
- if (op_ret == 0) {
- gf_stat_from_stat (&rsp->stat, stbuf);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "%"PRId64": STAT %s (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->loc.path,
- state->loc.inode ? state->loc.inode->ino : 0,
- op_ret, strerror (op_errno));
- }
-
- server_loc_wipe (&(state->loc));
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_STAT,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-
-/*
- * server_lookup_cbk - lookup callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret:
- * @op_errno:
- * @inode:
- * @stbuf:
- *
- * not for external reference
- */
-int
-server_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct stat *stbuf, dict_t *dict)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_lookup_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- inode_t *root_inode = NULL;
- int32_t dict_len = 0;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
- int32_t ret = -1;
- loc_t loc = {0,};
-
- state = CALL_STATE(frame);
- if ((op_errno == ESTALE) && (op_ret == -1)) {
- /* Send lookup again with new ctx dictionary */
-
- root_inode = BOUND_XL(frame)->itable->root;
- if (state->loc.inode != root_inode) {
- if (state->loc.inode)
- inode_unref (state->loc.inode);
- state->loc.inode = inode_new (BOUND_XL(frame)->itable);
- }
- loc.inode = state->loc.inode;
- loc.path = state->path;
- state->is_revalidate = 2;
-
- STACK_WIND (frame, server_lookup_cbk,
- BOUND_XL(frame),
- BOUND_XL(frame)->fops->lookup,
- &loc,
- state->xattr_req);
- return 0;
- }
-
- if (dict) {
- dict_len = dict_serialized_length (dict);
- if (dict_len < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s (%"PRId64"): failed to get serialized "
- "length of reply dict",
- state->loc.path, state->loc.inode->ino);
- op_ret = -1;
- op_errno = EINVAL;
- dict_len = 0;
- }
- }
-
- hdrlen = gf_hdr_len (rsp, dict_len);
- hdr = gf_hdr_new (rsp, dict_len);
- rsp = gf_param (hdr);
-
- if ((op_ret >= 0) && dict) {
- ret = dict_serialize (dict, rsp->dict);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s (%"PRId64"): failed to serialize reply dict",
- state->loc.path, state->loc.inode->ino);
- op_ret = -1;
- op_errno = -ret;
- dict_len = 0;
- }
- }
- rsp->dict_len = hton32 (dict_len);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- if (op_ret == 0) {
- root_inode = BOUND_XL(frame)->itable->root;
- if (inode == root_inode) {
- /* we just looked up root ("/") */
- stbuf->st_ino = 1;
- if (inode->st_mode == 0)
- inode->st_mode = stbuf->st_mode;
- }
-
- gf_stat_from_stat (&rsp->stat, stbuf);
-
- if (inode->ino == 0) {
- inode_link (inode, state->loc.parent,
- state->loc.name, stbuf);
- inode_lookup (inode);
- }
- } else {
- gf_log (this->name,
- (op_errno == ENOENT ? GF_LOG_TRACE : GF_LOG_DEBUG),
- "%"PRId64": LOOKUP %s (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->loc.path,
- state->loc.inode ? state->loc.inode->ino : 0,
- op_ret, strerror (op_errno));
- }
-
- server_loc_wipe (&state->loc);
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_LOOKUP,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-int
-server_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_xattrop_rsp_t *rsp = NULL;
- server_state_t *state = NULL;
- size_t hdrlen = 0;
- int32_t len = 0;
- int32_t gf_errno = 0;
- int32_t ret = -1;
-
- state = CALL_STATE (frame);
-
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%"PRId64": XATTROP %s (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->loc.path,
- state->loc.inode ? state->loc.inode->ino : 0,
- op_ret, strerror (op_errno));
- }
-
- if ((op_ret >= 0) && dict) {
- len = dict_serialized_length (dict);
- if (len < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s (%"PRId64"): failed to get serialized length"
- " for reply dict",
- state->loc.path, state->loc.inode->ino);
- op_ret = -1;
- op_errno = EINVAL;
- len = 0;
- }
- }
-
- hdrlen = gf_hdr_len (rsp, len + 1);
- hdr = gf_hdr_new (rsp, len + 1);
- rsp = gf_param (hdr);
-
- if ((op_ret >= 0) && dict) {
- ret = dict_serialize (dict, rsp->dict);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s (%"PRId64"): failed to serialize reply dict",
- state->loc.path, state->loc.inode->ino);
- op_ret = -1;
- op_errno = -ret;
- len = 0;
- }
- }
- rsp->dict_len = hton32 (len);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- server_loc_wipe (&(state->loc));
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_XATTROP,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-
-int
-server_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_xattrop_rsp_t *rsp = NULL;
- size_t hdrlen = 0;
- int32_t len = 0;
- int32_t gf_errno = 0;
- int32_t ret = -1;
- server_state_t *state = NULL;
-
- state = CALL_STATE(frame);
-
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%"PRId64": FXATTROP %"PRId64" (%"PRId64") ==> %"PRId32" (%s)",
- frame->root->unique, state->fd_no,
- state->fd ? state->fd->inode->ino : 0, op_ret,
- strerror (op_errno));
- }
-
- if ((op_ret >= 0) && dict) {
- len = dict_serialized_length (dict);
- if (len < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "fd - %"PRId64" (%"PRId64"): failed to get "
- "serialized length for reply dict",
- state->fd_no, state->fd->inode->ino);
- op_ret = -1;
- op_errno = EINVAL;
- len = 0;
- }
- }
-
- hdrlen = gf_hdr_len (rsp, len + 1);
- hdr = gf_hdr_new (rsp, len + 1);
- rsp = gf_param (hdr);
-
- if ((op_ret >= 0) && dict) {
- ret = dict_serialize (dict, rsp->dict);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "fd - %"PRId64" (%"PRId64"): failed to "
- "serialize reply dict",
- state->fd_no, state->fd->inode->ino);
- op_ret = -1;
- op_errno = -ret;
- len = 0;
- }
- }
- rsp->dict_len = hton32 (len);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_FXATTROP,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-/*
- * server_stub_resume - this is callback function used whenever an fop does
- * STACK_WIND to fops->lookup in order to lookup the inode
- * for a pathname. this case of doing fops->lookup arises
- * when fop searches in inode table for pathname and search
- * fails.
- *
- * @stub: call stub
- * @op_ret:
- * @op_errno:
- * @inode:
- * @parent:
- *
- * not for external reference
- */
-int
-server_stub_resume (call_stub_t *stub, int32_t op_ret, int32_t op_errno,
- inode_t *inode, inode_t *parent)
-{
- inode_t *server_inode = NULL;
- loc_t *newloc = NULL;
- dict_t *dict = NULL;
-
- server_inode = inode;
-
- if (!stub) {
- return 0;
- }
-
- switch (stub->fop)
- {
- case GF_FOP_RENAME:
- if ((op_ret < 0 && stub->args.rename.old.inode == NULL) ||
- (op_ret < 0 && parent == NULL)) {
- /* Either oldloc lookup failed OR newloc lookup
- * failed and parent was not found
- */
- gf_log (stub->frame->this->name, GF_LOG_DEBUG,
- "%"PRId64": RENAME (%s -> %s) on %s "
- "returning error: "
- "%"PRId32" (%"PRId32")",
- stub->frame->root->unique,
- stub->args.rename.old.path,
- stub->args.rename.new.path,
- BOUND_XL(stub->frame)->name,
- op_ret, op_errno);
-
- /* lookup of oldpath failed, UNWIND to
- * server_rename_cbk with ret=-1 and
- * errno=ENOENT
- */
- server_rename_cbk (stub->frame, NULL,
- stub->frame->this,
- -1, ENOENT, NULL);
- server_loc_wipe (&stub->args.rename.old);
- server_loc_wipe (&stub->args.rename.new);
- FREE (stub);
- return 0;
- }
-
- if (stub->args.rename.old.inode == NULL) {
- /* now we are called by lookup of oldpath. */
- if (stub->args.rename.old.parent == NULL)
- stub->args.rename.old.parent =
- inode_ref (parent);
-
- /* store inode information of oldpath in our stub
- * and search for newpath in inode table.
- */
- if (server_inode) {
- stub->args.rename.old.inode =
- inode_ref (server_inode);
-
- stub->args.rename.old.ino =
- server_inode->ino;
- }
-
- /* now lookup for newpath */
- newloc = &stub->args.rename.new;
-
- if (newloc->parent == NULL) {
- /* lookup for newpath */
- do_path_lookup (stub, newloc);
- break;
- } else {
- /* found newpath in inode cache */
- call_resume (stub);
- break;
- }
- } else {
- /* we are called by the lookup of newpath */
- if (stub->args.rename.new.parent == NULL)
- stub->args.rename.new.parent =
- inode_ref (parent);
- }
-
- /* after looking up for oldpath as well as newpath,
- * we are ready to resume */
- {
- call_resume (stub);
- }
- break;
-
- case GF_FOP_OPEN:
- {
- if (op_ret < 0) {
- gf_log (stub->frame->this->name, GF_LOG_DEBUG,
- "%"PRId64": OPEN (%s) on %s returning error: "
- "%"PRId32" (%"PRId32")",
- stub->frame->root->unique,
- stub->args.open.loc.path,
- BOUND_XL(stub->frame)->name,
- op_ret, op_errno);
-
- server_open_cbk (stub->frame, NULL, stub->frame->this,
- -1, ENOENT, NULL);
- FREE (stub->args.open.loc.path);
- FREE (stub);
- return 0;
- }
- if (stub->args.open.loc.parent == NULL)
- stub->args.open.loc.parent = inode_ref (parent);
-
- if (server_inode && (stub->args.open.loc.inode == NULL)) {
- stub->args.open.loc.inode = inode_ref (server_inode);
- stub->args.open.loc.ino = server_inode->ino;
- }
- call_resume (stub);
- break;
- }
-
- case GF_FOP_LOOKUP:
- {
- if (op_ret < 0) {
- gf_log (stub->frame->this->name,
- GF_LOG_DEBUG,
- "%"PRId64": LOOKUP (%s) on %s returning error: "
- "%"PRId32" (%"PRId32")",
- stub->frame->root->unique,
- stub->args.lookup.loc.path,
- BOUND_XL(stub->frame)->name,
- op_ret, op_errno);
-
- server_lookup_cbk (stub->frame, NULL,
- stub->frame->this, -1, ENOENT,
- NULL, NULL, NULL);
- server_loc_wipe (&stub->args.lookup.loc);
- FREE (stub);
- return 0;
- }
-
- if (stub->args.lookup.loc.parent == NULL)
- stub->args.lookup.loc.parent = inode_ref (parent);
-
- if (server_inode && (stub->args.lookup.loc.inode == NULL)) {
- stub->args.lookup.loc.inode = inode_ref (server_inode);
- stub->args.lookup.loc.ino = server_inode->ino;
- }
-
- call_resume (stub);
-
- break;
- }
-
- case GF_FOP_STAT:
- {
- if (op_ret < 0) {
- gf_log (stub->frame->this->name, GF_LOG_DEBUG,
- "%"PRId64": STAT (%s) on %s returning error: "
- "%"PRId32" (%"PRId32")",
- stub->frame->root->unique,
- stub->args.stat.loc.path,
- BOUND_XL(stub->frame)->name,
- op_ret, op_errno);
- server_stat_cbk (stub->frame, NULL, stub->frame->this,
- -1, ENOENT, NULL);
- server_loc_wipe (&stub->args.stat.loc);
- FREE (stub);
- return 0;
- }
-
- /* TODO:reply from here only, we already have stat structure */
- if (stub->args.stat.loc.parent == NULL)
- stub->args.stat.loc.parent = inode_ref (parent);
-
- if (server_inode && (stub->args.stat.loc.inode == NULL)) {
- stub->args.stat.loc.inode = inode_ref (server_inode);
- stub->args.stat.loc.ino = server_inode->ino;
- }
- call_resume (stub);
- break;
- }
-
- case GF_FOP_XATTROP:
- {
- if (op_ret < 0) {
- gf_log (stub->frame->this->name, GF_LOG_DEBUG,
- "%"PRId64": XATTROP (%s) on %s returning error: "
- "%"PRId32" (%"PRId32")",
- stub->frame->root->unique,
- stub->args.xattrop.loc.path,
- BOUND_XL(stub->frame)->name,
- op_ret, op_errno);
- server_xattrop_cbk (stub->frame, NULL,
- stub->frame->this, -1, ENOENT,
- NULL);
- server_loc_wipe (&stub->args.xattrop.loc);
- FREE (stub);
- return 0;
- }
-
- if (stub->args.xattrop.loc.parent == NULL)
- stub->args.xattrop.loc.parent = inode_ref (parent);
-
- if (server_inode && (stub->args.xattrop.loc.inode == NULL)) {
- stub->args.xattrop.loc.inode =
- inode_ref (server_inode);
-
- stub->args.xattrop.loc.ino = server_inode->ino;
- }
- call_resume (stub);
- break;
- }
-
- case GF_FOP_UNLINK:
- {
- if (op_ret < 0) {
- gf_log (stub->frame->this->name, GF_LOG_DEBUG,
- "%"PRId64": UNLINK (%s) on %s returning error: "
- "%"PRId32" (%"PRId32")",
- stub->frame->root->unique,
- stub->args.unlink.loc.path,
- BOUND_XL(stub->frame)->name,
- op_ret, op_errno);
- server_unlink_cbk (stub->frame, NULL,
- stub->frame->this, -1, ENOENT);
- server_loc_wipe (&stub->args.unlink.loc);
- FREE (stub);
- return 0;
- }
-
- if (stub->args.unlink.loc.parent == NULL)
- stub->args.unlink.loc.parent = inode_ref (parent);
-
- if (server_inode && (stub->args.unlink.loc.inode == NULL)) {
- stub->args.unlink.loc.inode = inode_ref (server_inode);
- stub->args.unlink.loc.ino = server_inode->ino;
- }
- call_resume (stub);
- break;
- }
-
- case GF_FOP_SYMLINK:
- {
- if ((op_ret < 0) && (parent == NULL)) {
- gf_log (stub->frame->this->name, GF_LOG_DEBUG,
- "%"PRId64": SYMLINK (%s -> %s) on %s returning error: "
- "%"PRId32" (%"PRId32")",
- stub->frame->root->unique,
- stub->args.symlink.loc.path,
- stub->args.symlink.linkname,
- BOUND_XL(stub->frame)->name,
- op_ret, op_errno);
- server_symlink_cbk (stub->frame, NULL,
- stub->frame->this, -1, ENOENT,
- NULL, NULL);
- server_loc_wipe (&stub->args.symlink.loc);
- FREE (stub);
- return 0;
- }
-
- if (stub->args.symlink.loc.parent == NULL)
- stub->args.symlink.loc.parent = inode_ref (parent);
-
- if (server_inode && (stub->args.symlink.loc.inode == NULL)) {
- stub->args.symlink.loc.inode =
- inode_ref (server_inode);
- stub->args.symlink.loc.ino = server_inode->ino;
- }
- call_resume (stub);
- break;
- }
-
- case GF_FOP_RMDIR:
- {
- if (op_ret < 0) {
- gf_log (stub->frame->this->name, GF_LOG_DEBUG,
- "%"PRId64": RMDIR (%s) on %s returning error: "
- "%"PRId32" (%"PRId32")",
- stub->frame->root->unique,
- stub->args.rmdir.loc.path,
- BOUND_XL(stub->frame)->name,
- op_ret, op_errno);
- server_rmdir_cbk (stub->frame, NULL, stub->frame->this,
- -1, ENOENT);
- server_loc_wipe (&stub->args.rmdir.loc);
- FREE (stub);
- return 0;
- }
-
- if (stub->args.rmdir.loc.parent == NULL)
- stub->args.rmdir.loc.parent = inode_ref (parent);
-
- if (server_inode && (stub->args.rmdir.loc.inode == NULL)) {
- stub->args.rmdir.loc.inode = inode_ref (server_inode);
- stub->args.rmdir.loc.ino = server_inode->ino;
- }
- call_resume (stub);
- break;
- }
-
- case GF_FOP_CHMOD:
- {
- if (op_ret < 0) {
- gf_log (stub->frame->this->name, GF_LOG_DEBUG,
- "%"PRId64": CHMOD (%s) on %s returning error: "
- "%"PRId32" (%"PRId32")",
- stub->frame->root->unique,
- stub->args.chmod.loc.path,
- BOUND_XL(stub->frame)->name,
- op_ret, op_errno);
- server_chmod_cbk (stub->frame, NULL, stub->frame->this,
- -1, ENOENT, NULL);
- server_loc_wipe (&stub->args.chmod.loc);
- FREE (stub);
- return 0;
- }
-
- if (stub->args.chmod.loc.parent == NULL)
- stub->args.chmod.loc.parent = inode_ref (parent);
-
- if (server_inode && (stub->args.chmod.loc.inode == NULL)) {
- stub->args.chmod.loc.inode = inode_ref (server_inode);
- stub->args.chmod.loc.ino = server_inode->ino;
- }
- call_resume (stub);
- break;
- }
-
- case GF_FOP_CHOWN:
- {
- if (op_ret < 0) {
- gf_log (stub->frame->this->name, GF_LOG_DEBUG,
- "%"PRId64": CHOWN (%s) on %s returning ENOENT: "
- "%"PRId32" (%"PRId32")",
- stub->frame->root->unique,
- stub->args.chown.loc.path,
- BOUND_XL(stub->frame)->name,
- op_ret, op_errno);
- server_chown_cbk (stub->frame, NULL, stub->frame->this,
- -1, ENOENT, NULL);
- server_loc_wipe (&stub->args.chown.loc);
- FREE (stub);
- return 0;
- }
-
- if (stub->args.chown.loc.parent == NULL)
- stub->args.chown.loc.parent = inode_ref (parent);
-
- if (server_inode && (stub->args.chown.loc.inode == NULL)) {
- stub->args.chown.loc.inode = inode_ref (server_inode);
- stub->args.chown.loc.ino = server_inode->ino;
- }
- call_resume (stub);
- break;
- }
-
- case GF_FOP_LINK:
- {
- if ((stub->args.link.oldloc.inode == NULL)
- || (stub->args.link.oldloc.parent == NULL)) {
- if (op_ret < 0) {
- gf_log (stub->frame->this->name, GF_LOG_DEBUG,
- "%"PRId64": LINK (%s -> %s) on %s returning "
- "error for oldloc: "
- "%"PRId32" (%"PRId32")",
- stub->frame->root->unique,
- stub->args.link.oldloc.path,
- stub->args.link.newloc.path,
- BOUND_XL(stub->frame)->name,
- op_ret, op_errno);
-
- server_link_cbk (stub->frame, NULL,
- stub->frame->this, -1, ENOENT,
- NULL, NULL);
- server_loc_wipe (&stub->args.link.oldloc);
- server_loc_wipe (&stub->args.link.newloc);
- FREE (stub);
- return 0;
- }
-
- if (stub->args.link.oldloc.parent == NULL)
- stub->args.link.oldloc.parent =
- inode_ref (parent);
-
- if (server_inode &&
- (stub->args.link.oldloc.inode == NULL)) {
- stub->args.link.oldloc.inode =
- inode_ref (server_inode);
- stub->args.link.oldloc.ino = server_inode->ino;
- }
-
- if (stub->args.link.newloc.parent == NULL) {
- do_path_lookup (stub,
- &(stub->args.link.newloc));
- break;
- }
- } else {
- /* we are called by the lookup of newpath */
- if ((op_ret < 0) && (parent == NULL)) {
- gf_log (stub->frame->this->name, GF_LOG_DEBUG,
- "%"PRId64": LINK (%s -> %s) on %s returning "
- "error for newloc: "
- "%"PRId32" (%"PRId32")",
- stub->frame->root->unique,
- stub->args.link.oldloc.path,
- stub->args.link.newloc.path,
- BOUND_XL(stub->frame)->name,
- op_ret, op_errno);
-
- server_link_cbk (stub->frame, NULL,
- stub->frame->this, -1, ENOENT,
- NULL, NULL);
-
- server_loc_wipe (&stub->args.link.oldloc);
- server_loc_wipe (&stub->args.link.newloc);
- FREE (stub);
- break;
- }
-
- if (stub->args.link.newloc.parent == NULL) {
- stub->args.link.newloc.parent =
- inode_ref (parent);
- }
-
- if (server_inode &&
- (stub->args.link.newloc.inode == NULL)) {
- /* as new.inode doesn't get forget, it
- * needs to be unref'd here */
- stub->args.link.newloc.inode =
- inode_ref (server_inode);
- stub->args.link.newloc.ino = server_inode->ino;
- }
- }
- call_resume (stub);
- break;
- }
-
- case GF_FOP_TRUNCATE:
- {
- if (op_ret < 0) {
- gf_log (stub->frame->this->name, GF_LOG_DEBUG,
- "%"PRId64": TRUNCATE (%s) on %s returning error: "
- "%"PRId32" (%"PRId32")",
- stub->frame->root->unique,
- stub->args.truncate.loc.path,
- BOUND_XL(stub->frame)->name,
- op_ret, op_errno);
-
- server_truncate_cbk (stub->frame, NULL,
- stub->frame->this, -1, ENOENT,
- NULL);
- server_loc_wipe (&stub->args.truncate.loc);
- FREE (stub);
- return 0;
- }
-
- if (stub->args.truncate.loc.parent == NULL)
- stub->args.truncate.loc.parent = inode_ref (parent);
-
- if (server_inode && (stub->args.truncate.loc.inode == NULL)) {
- stub->args.truncate.loc.inode =
- inode_ref (server_inode);
- stub->args.truncate.loc.ino = server_inode->ino;
- }
- call_resume (stub);
- break;
- }
-
- case GF_FOP_STATFS:
- {
- if (op_ret < 0) {
- gf_log (stub->frame->this->name, GF_LOG_DEBUG,
- "%"PRId64": STATFS (%s) on %s returning ENOENT: "
- "%"PRId32" (%"PRId32")",
- stub->frame->root->unique,
- stub->args.statfs.loc.path,
- BOUND_XL(stub->frame)->name,
- op_ret, op_errno);
-
- server_statfs_cbk (stub->frame, NULL,
- stub->frame->this, -1, ENOENT,
- NULL);
- server_loc_wipe (&stub->args.statfs.loc);
- FREE (stub);
- return 0;
- }
-
- if (stub->args.statfs.loc.parent == NULL)
- stub->args.statfs.loc.parent = inode_ref (parent);
-
- if (server_inode && (stub->args.statfs.loc.inode == NULL)) {
- stub->args.statfs.loc.inode = inode_ref (server_inode);
- stub->args.statfs.loc.ino = server_inode->ino;
- }
- call_resume (stub);
- break;
- }
-
- case GF_FOP_SETXATTR:
- {
- dict = stub->args.setxattr.dict;
- if (op_ret < 0) {
- gf_log (stub->frame->this->name, GF_LOG_DEBUG,
- "%"PRId64": SETXATTR (%s) on %s returning error: "
- "%"PRId32" (%"PRId32")",
- stub->frame->root->unique,
- stub->args.setxattr.loc.path,
- BOUND_XL(stub->frame)->name,
- op_ret, op_errno);
-
- server_setxattr_cbk (stub->frame, NULL,
- stub->frame->this, -1, ENOENT);
-
- server_loc_wipe (&stub->args.setxattr.loc);
- dict_unref (dict);
- FREE (stub);
- return 0;
- }
-
- if (stub->args.setxattr.loc.parent == NULL)
- stub->args.setxattr.loc.parent = inode_ref (parent);
-
- if (server_inode && (stub->args.setxattr.loc.inode == NULL)) {
- stub->args.setxattr.loc.inode =
- inode_ref (server_inode);
- stub->args.setxattr.loc.ino = server_inode->ino;
- }
- call_resume (stub);
- break;
- }
-
- case GF_FOP_GETXATTR:
- {
- if (op_ret < 0) {
- gf_log (stub->frame->this->name, GF_LOG_DEBUG,
- "%"PRId64": GETXATTR (%s) on %s for key %s "
- "returning error: %"PRId32" (%"PRId32")",
- stub->frame->root->unique,
- stub->args.getxattr.loc.path,
- BOUND_XL(stub->frame)->name,
- stub->args.getxattr.name ?
- stub->args.getxattr.name : "<nul>",
- op_ret, op_errno);
-
- server_getxattr_cbk (stub->frame, NULL,
- stub->frame->this, -1, ENOENT,
- NULL);
- server_loc_wipe (&stub->args.getxattr.loc);
- FREE (stub);
- return 0;
- }
-
- if (stub->args.getxattr.loc.parent == NULL)
- stub->args.getxattr.loc.parent = inode_ref (parent);
-
- if (server_inode && (stub->args.getxattr.loc.inode == NULL)) {
- stub->args.getxattr.loc.inode =
- inode_ref (server_inode);
- stub->args.getxattr.loc.ino = server_inode->ino;
- }
- call_resume (stub);
- break;
- }
-
- case GF_FOP_REMOVEXATTR:
- {
- if (op_ret < 0) {
- gf_log (stub->frame->this->name, GF_LOG_DEBUG,
- "%"PRId64": REMOVEXATTR (%s) on %s for key %s "
- "returning error: %"PRId32" (%"PRId32")",
- stub->frame->root->unique,
- stub->args.removexattr.loc.path,
- BOUND_XL(stub->frame)->name,
- stub->args.removexattr.name,
- op_ret, op_errno);
-
- server_removexattr_cbk (stub->frame, NULL,
- stub->frame->this, -1,
- ENOENT);
- server_loc_wipe (&stub->args.removexattr.loc);
- FREE (stub);
- return 0;
- }
-
- if (stub->args.removexattr.loc.parent == NULL)
- stub->args.removexattr.loc.parent = inode_ref (parent);
-
- if (server_inode &&
- (stub->args.removexattr.loc.inode == NULL)) {
- stub->args.removexattr.loc.inode =
- inode_ref (server_inode);
- stub->args.removexattr.loc.ino = server_inode->ino;
- }
- call_resume (stub);
- break;
- }
-
- case GF_FOP_OPENDIR:
- {
- if (op_ret < 0) {
- gf_log (stub->frame->this->name, GF_LOG_DEBUG,
- "%"PRId64": OPENDIR (%s) on %s returning error: "
- "%"PRId32" (%"PRId32")",
- stub->frame->root->unique,
- stub->args.opendir.loc.path,
- BOUND_XL(stub->frame)->name,
- op_ret, op_errno);
-
- server_opendir_cbk (stub->frame, NULL,
- stub->frame->this, -1, ENOENT,
- NULL);
- server_loc_wipe (&stub->args.opendir.loc);
- FREE (stub);
- return 0;
- }
-
- if (stub->args.opendir.loc.parent == NULL)
- stub->args.opendir.loc.parent = inode_ref (parent);
-
- if (server_inode && (stub->args.opendir.loc.inode == NULL)) {
- stub->args.opendir.loc.inode =
- inode_ref (server_inode);
- stub->args.opendir.loc.ino = server_inode->ino;
- }
- call_resume (stub);
- break;
- }
-
- case GF_FOP_ACCESS:
- {
- if (op_ret < 0) {
- gf_log (stub->frame->this->name, GF_LOG_DEBUG,
- "%"PRId64": ACCESS (%s) on %s returning error: "
- "%"PRId32" (%"PRId32")",
- stub->frame->root->unique,
- stub->args.access.loc.path,
- BOUND_XL(stub->frame)->name,
- op_ret, op_errno);
-
- server_access_cbk (stub->frame, NULL,
- stub->frame->this, -1, ENOENT);
- server_loc_wipe (&stub->args.access.loc);
- FREE (stub);
- return 0;
- }
-
- if (stub->args.access.loc.parent == NULL)
- stub->args.access.loc.parent = inode_ref (parent);
-
- if (server_inode && (stub->args.access.loc.inode == NULL)) {
- stub->args.access.loc.inode = inode_ref (server_inode);
- stub->args.access.loc.ino = server_inode->ino;
- }
- call_resume (stub);
- break;
- }
-
-
- case GF_FOP_UTIMENS:
- {
- if (op_ret < 0) {
- gf_log (stub->frame->this->name, GF_LOG_DEBUG,
- "%"PRId64": UTIMENS (%s) on %s returning error: "
- "%"PRId32" (%"PRId32")",
- stub->frame->root->unique,
- stub->args.utimens.loc.path,
- BOUND_XL(stub->frame)->name,
- op_ret, op_errno);
-
- server_utimens_cbk (stub->frame, NULL,
- stub->frame->this, -1, ENOENT,
- NULL);
- server_loc_wipe (&stub->args.utimens.loc);
- FREE (stub);
- return 0;
- }
-
- if (stub->args.utimens.loc.parent == NULL)
- stub->args.utimens.loc.parent = inode_ref (parent);
-
- if (server_inode && (stub->args.utimens.loc.inode == NULL)) {
- stub->args.utimens.loc.inode =
- inode_ref (server_inode);
- stub->args.utimens.loc.ino = server_inode->ino;
- }
- call_resume (stub);
- break;
- }
-
- case GF_FOP_READLINK:
- {
- if (op_ret < 0) {
- gf_log (stub->frame->this->name, GF_LOG_DEBUG,
- "%"PRId64": READLINK (%s) on %s returning error: "
- "%"PRId32" (%"PRId32")",
- stub->frame->root->unique,
- stub->args.readlink.loc.path,
- BOUND_XL(stub->frame)->name,
- op_ret, op_errno);
-
- server_readlink_cbk (stub->frame, NULL,
- stub->frame->this, -1, ENOENT,
- NULL);
- server_loc_wipe (&stub->args.readlink.loc);
- FREE (stub);
- return 0;
- }
-
- if (stub->args.readlink.loc.parent == NULL)
- stub->args.readlink.loc.parent = inode_ref (parent);
-
- if (server_inode && (stub->args.readlink.loc.inode == NULL)) {
- stub->args.readlink.loc.inode =
- inode_ref (server_inode);
- stub->args.readlink.loc.ino = server_inode->ino;
- }
- call_resume (stub);
- break;
- }
- case GF_FOP_MKDIR:
- {
- if ((op_ret < 0) && (parent == NULL)) {
- gf_log (stub->frame->this->name, GF_LOG_DEBUG,
- "%"PRId64": MKDIR (%s) on %s returning error: "
- "%"PRId32" (%"PRId32")",
- stub->frame->root->unique,
- stub->args.mkdir.loc.path,
- BOUND_XL(stub->frame)->name,
- op_ret, op_errno);
-
- server_mkdir_cbk (stub->frame, NULL,
- stub->frame->this, -1, ENOENT,
- NULL, NULL);
- server_loc_wipe (&stub->args.mkdir.loc);
- FREE (stub);
- break;
- }
-
- if (stub->args.mkdir.loc.parent == NULL)
- stub->args.mkdir.loc.parent = inode_ref (parent);
-
- if (server_inode && (stub->args.mkdir.loc.inode == NULL)) {
- stub->args.mkdir.loc.inode = inode_ref (server_inode);
- stub->args.mkdir.loc.ino = server_inode->ino;
- }
-
- call_resume (stub);
- break;
- }
-
- case GF_FOP_CREATE:
- {
- if ((op_ret < 0) && (parent == NULL)) {
- gf_log (stub->frame->this->name, GF_LOG_DEBUG,
- "%"PRId64": CREATE (%s) on %s returning error: "
- "%"PRId32" (%"PRId32")",
- stub->frame->root->unique,
- stub->args.create.loc.path,
- BOUND_XL(stub->frame)->name,
- op_ret, op_errno);
-
- server_create_cbk (stub->frame, NULL,
- stub->frame->this, -1, ENOENT,
- NULL, NULL, NULL);
- if (stub->args.create.fd)
- fd_unref (stub->args.create.fd);
- server_loc_wipe (&stub->args.create.loc);
- FREE (stub);
- break;
- }
-
- if (stub->args.create.loc.parent == NULL)
- stub->args.create.loc.parent = inode_ref (parent);
-
- if (server_inode && (stub->args.create.loc.inode == NULL)) {
- stub->args.create.loc.inode = inode_ref (server_inode);
- stub->args.create.loc.ino = server_inode->ino;
- }
-
- call_resume (stub);
- break;
- }
-
- case GF_FOP_MKNOD:
- {
- if ((op_ret < 0) && (parent == NULL)) {
- gf_log (stub->frame->this->name, GF_LOG_DEBUG,
- "%"PRId64": MKNOD (%s) on %s returning error: "
- "%"PRId32" (%"PRId32")",
- stub->frame->root->unique,
- stub->args.mknod.loc.path,
- BOUND_XL(stub->frame)->name,
- op_ret, op_errno);
-
- server_mknod_cbk (stub->frame, NULL,
- stub->frame->this, -1, ENOENT, NULL,
- NULL);
- server_loc_wipe (&stub->args.mknod.loc);
- FREE (stub);
- break;
- }
-
- if (stub->args.mknod.loc.parent == NULL)
- stub->args.mknod.loc.parent = inode_ref (parent);
-
- if (server_inode && (stub->args.mknod.loc.inode == NULL)) {
- stub->args.mknod.loc.inode = inode_ref (server_inode);
- stub->args.mknod.loc.ino = server_inode->ino;
- }
- call_resume (stub);
- break;
- }
- case GF_FOP_ENTRYLK:
- {
- if (op_ret < 0) {
- gf_log (stub->frame->this->name, GF_LOG_DEBUG,
- "%"PRId64": ENTRYLK (%s) on %s for key %s returning "
- "error: %"PRId32" (%"PRId32")",
- stub->frame->root->unique,
- stub->args.entrylk.loc.path,
- BOUND_XL(stub->frame)->name,
- stub->args.entrylk.name ?
- stub->args.entrylk.name : "<nul>",
- op_ret, op_errno);
-
- server_entrylk_cbk (stub->frame, NULL,
- stub->frame->this, -1, ENOENT);
- server_loc_wipe (&stub->args.entrylk.loc);
- FREE (stub);
- break;
- }
-
- if (stub->args.entrylk.loc.parent == NULL)
- stub->args.entrylk.loc.parent = inode_ref (parent);
-
- if (server_inode && (stub->args.entrylk.loc.inode == NULL)) {
- stub->args.entrylk.loc.inode = inode_ref (server_inode);
- stub->args.entrylk.loc.ino = server_inode->ino;
- }
- call_resume (stub);
- break;
- }
- case GF_FOP_INODELK:
- {
- if (op_ret < 0) {
- gf_log (stub->frame->this->name, GF_LOG_DEBUG,
- "%"PRId64": INODELK (%s) on %s returning error: "
- "%"PRId32" (%"PRId32")",
- stub->frame->root->unique,
- stub->args.inodelk.loc.path,
- BOUND_XL(stub->frame)->name,
- op_ret, op_errno);
-
- server_inodelk_cbk (stub->frame, NULL,
- stub->frame->this, -1, ENOENT);
- server_loc_wipe (&stub->args.inodelk.loc);
- FREE (stub);
- break;
- }
-
- if (stub->args.inodelk.loc.parent == NULL)
- stub->args.inodelk.loc.parent = inode_ref (parent);
-
- if (server_inode && (stub->args.inodelk.loc.inode == NULL)) {
- stub->args.inodelk.loc.inode =
- inode_ref (server_inode);
- stub->args.inodelk.loc.ino = server_inode->ino;
- }
- call_resume (stub);
- break;
- }
- default:
- call_resume (stub);
- }
-
- return 0;
-}
-
-int
-server_lookup_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *xattr_req)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE(frame);
-
- if ((state->loc.parent == NULL) &&
- (loc->parent))
- state->loc.parent = inode_ref (loc->parent);
-
- if (state->loc.inode == NULL) {
- if (loc->inode == NULL)
- state->loc.inode = inode_new (state->itable);
- else
- /* FIXME: why another lookup? */
- state->loc.inode = inode_ref (loc->inode);
- } else {
- if (loc->inode && (state->loc.inode != loc->inode)) {
- if (state->loc.inode)
- inode_unref (state->loc.inode);
- state->loc.inode = inode_ref (loc->inode);
- }
- }
-
- gf_log (BOUND_XL(frame)->name, GF_LOG_TRACE,
- "%"PRId64": LOOKUP \'%"PRId64"/%s\'",
- frame->root->unique, state->par, state->bname);
-
- STACK_WIND (frame, server_lookup_cbk,
- BOUND_XL(frame),
- BOUND_XL(frame)->fops->lookup,
- &(state->loc), xattr_req);
- return 0;
-}
-
-/*
- * server_lookup - lookup function for server protocol
- * @frame: call frame
- * @bound_xl:
- * @params: parameter dictionary
- *
- * not for external reference
- */
-int
-server_lookup (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_lookup_req_t *req = NULL;
- server_state_t *state = NULL;
- call_stub_t *lookup_stub = NULL;
- int32_t ret = -1;
- size_t pathlen = 0;
- size_t baselen = 0;
- size_t dictlen = 0;
- dict_t *xattr_req = NULL;
- char *req_dictbuf = NULL;
-
- req = gf_param (hdr);
-
- state = CALL_STATE(frame);
- {
-
- pathlen = STRLEN_0 (req->path);
- dictlen = ntoh32 (req->dictlen);
-
- /* NOTE: lookup() uses req->ino only to identify if a lookup()
- * is requested for 'root' or not
- */
- state->ino = ntoh64 (req->ino);
- if (state->ino != 1)
- state->ino = 0;
-
- state->par = ntoh64 (req->par);
- state->path = req->path;
- if (IS_NOT_ROOT(pathlen)) {
- state->bname = req->bname + pathlen;
- baselen = STRLEN_0 (state->bname);
- }
-
- if (dictlen) {
- /* Unserialize the dictionary */
- req_dictbuf = memdup (req->dict + pathlen + baselen,
- dictlen);
- GF_VALIDATE_OR_GOTO(bound_xl->name, req_dictbuf, fail);
-
- xattr_req = dict_new ();
- GF_VALIDATE_OR_GOTO(bound_xl->name, xattr_req, fail);
-
- ret = dict_unserialize (req_dictbuf, dictlen,
- &xattr_req);
- if (ret < 0) {
- gf_log (bound_xl->name, GF_LOG_ERROR,
- "%"PRId64": %s (%"PRId64"): failed to "
- "unserialize req-buffer to dictionary",
- frame->root->unique, state->path,
- state->ino);
- free (req_dictbuf);
- goto fail;
- } else{
- xattr_req->extra_free = req_dictbuf;
- state->xattr_req = xattr_req;
- xattr_req = NULL;
- }
- }
- }
-
- ret = server_loc_fill (&state->loc, state, state->ino, state->par,
- state->bname, state->path);
-
- if (state->loc.inode) {
- /* revalidate */
- state->is_revalidate = 1;
- } else {
- /* fresh lookup or inode was previously pruned out */
- state->is_revalidate = -1;
- }
-
- lookup_stub = fop_lookup_stub (frame, server_lookup_resume,
- &(state->loc), state->xattr_req);
- GF_VALIDATE_OR_GOTO(bound_xl->name, lookup_stub, fail);
-
- if ((state->loc.parent == NULL) &&
- IS_NOT_ROOT(pathlen))
- do_path_lookup (lookup_stub, &(state->loc));
- else
- call_resume (lookup_stub);
-
- return 0;
-fail:
- server_lookup_cbk (frame, NULL, frame->this, -1,EINVAL, NULL, NULL,
- NULL);
- if (xattr_req)
- dict_unref (xattr_req);
-
- return 0;
-}
-
-
-/*
- * server_forget - forget function for server protocol
- *
- * not for external reference
- */
-int
-server_forget (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_log ("forget", GF_LOG_CRITICAL, "function not implemented");
- return 0;
-}
-
-
-int
-server_stat_resume (call_frame_t *frame, xlator_t *this, loc_t *loc)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE(frame);
-
- gf_log (BOUND_XL(frame)->name, GF_LOG_TRACE,
- "%"PRId64": STAT \'%s (%"PRId64")\'",
- frame->root->unique, state->loc.path, state->loc.ino);
-
- STACK_WIND (frame, server_stat_cbk,
- BOUND_XL(frame),
- BOUND_XL(frame)->fops->stat,
- loc);
- return 0;
-}
-
-/*
- * server_stat - stat function for server
- * @frame: call frame
- * @bound_xl: translator this server is bound to
- * @params: parameters dictionary
- *
- * not for external reference
- */
-int
-server_stat (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- call_stub_t *stat_stub = NULL;
- gf_fop_stat_req_t *req = NULL;
- server_state_t *state = NULL;
- int32_t ret = -1;
- size_t pathlen = 0;
-
- req = gf_param (hdr);
- state = CALL_STATE(frame);
-
- state->ino = ntoh64 (req->ino);
- state->path = req->path;
- pathlen = STRLEN_0(state->path);
-
- ret = server_loc_fill (&(state->loc), state, state->ino, state->par,
- state->bname, state->path);
-
- stat_stub = fop_stat_stub (frame, server_stat_resume,
- &(state->loc));
- GF_VALIDATE_OR_GOTO(bound_xl->name, stat_stub, fail);
-
- if (((state->loc.parent == NULL) && IS_NOT_ROOT(pathlen)) ||
- (state->loc.inode == NULL)) {
- do_path_lookup (stat_stub, &(state->loc));
- } else {
- call_resume (stat_stub);
- }
- return 0;
-fail:
- server_stat_cbk (frame, NULL, frame->this, -1, EINVAL, NULL);
- return 0;
-}
-
-
-int
-server_readlink_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- size_t size)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE(frame);
-
- gf_log (BOUND_XL(frame)->name, GF_LOG_TRACE,
- "%"PRId64": READLINK \'%s (%"PRId64")\'",
- frame->root->unique, state->loc.path, state->loc.ino);
-
- STACK_WIND (frame, server_readlink_cbk,
- BOUND_XL(frame),
- BOUND_XL(frame)->fops->readlink,
- loc, size);
- return 0;
-}
-
-/*
- * server_readlink - readlink function for server
- * @frame: call frame
- * @bound_xl: translator this server is bound to
- * @params: parameters dictionary
- *
- * not for external reference
- */
-int
-server_readlink (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- call_stub_t *readlink_stub = NULL;
- gf_fop_readlink_req_t *req = NULL;
- server_state_t *state = NULL;
- int32_t ret = -1;
-
- req = gf_param (hdr);
- state = CALL_STATE(frame);
-
- state->size = ntoh32 (req->size);
-
- state->ino = ntoh64 (req->ino);
- state->path = req->path;
-
- ret = server_loc_fill (&(state->loc), state, state->ino, 0, NULL,
- state->path);
-
- readlink_stub = fop_readlink_stub (frame, server_readlink_resume,
- &(state->loc), state->size);
- GF_VALIDATE_OR_GOTO(bound_xl->name, readlink_stub, fail);
-
- if ((state->loc.parent == NULL) ||
- (state->loc.inode == NULL)) {
- do_path_lookup (readlink_stub, &(state->loc));
- } else {
- call_resume (readlink_stub);
- }
- return 0;
-fail:
- server_readlink_cbk (frame, NULL,frame->this, -1, EINVAL, NULL);
- return 0;
-}
-
-int
-server_create_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags, mode_t mode, fd_t *fd)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE(frame);
-
- if (state->loc.parent == NULL)
- state->loc.parent = inode_ref (loc->parent);
-
- state->loc.inode = inode_new (state->itable);
- GF_VALIDATE_OR_GOTO(BOUND_XL(frame)->name, state->loc.inode, fail);
-
- state->fd = fd_create (state->loc.inode, frame->root->pid);
- GF_VALIDATE_OR_GOTO(BOUND_XL(frame)->name, state->fd, fail);
-
- state->fd->flags = flags;
- state->fd = fd_ref (state->fd);
-
- gf_log (BOUND_XL(frame)->name, GF_LOG_TRACE,
- "%"PRId64": CREATE \'%"PRId64"/%s\'",
- frame->root->unique, state->par, state->bname);
-
- STACK_WIND (frame, server_create_cbk,
- BOUND_XL(frame),
- BOUND_XL(frame)->fops->create,
- &(state->loc), flags, mode, state->fd);
-
- return 0;
-fail:
- server_create_cbk (frame, NULL, frame->this, -1, EINVAL, NULL, NULL,
- NULL);
- return 0;
-}
-
-
-/*
- * server_create - create function for server
- * @frame: call frame
- * @bound_xl: translator this server is bound to
- * @params: parameters dictionary
- *
- * not for external reference
- */
-int
-server_create (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_create_req_t *req = NULL;
- server_state_t *state = NULL;
- call_stub_t *create_stub = NULL;
- int32_t ret = -1;
- size_t pathlen = 0;
-
- req = gf_param (hdr);
- state = CALL_STATE(frame);
- {
- pathlen = STRLEN_0(req->path);
-
- state->par = ntoh64 (req->par);
- state->path = req->path;
- if (IS_NOT_ROOT(pathlen))
- state->bname = req->bname + pathlen;
-
- state->mode = ntoh32 (req->mode);
- state->flags = gf_flags_to_flags (ntoh32 (req->flags));
- }
-
- ret = server_loc_fill (&(state->loc), state,
- 0, state->par, state->bname,
- state->path);
-
- create_stub = fop_create_stub (frame, server_create_resume,
- &(state->loc), state->flags,
- state->mode, state->fd);
- GF_VALIDATE_OR_GOTO(bound_xl->name, create_stub, fail);
-
- if (state->loc.parent == NULL) {
- do_path_lookup (create_stub, &state->loc);
- } else {
- call_resume (create_stub);
- }
- return 0;
-fail:
- server_create_cbk (frame, NULL, frame->this, -1, EINVAL, NULL, NULL,
- NULL);
- return 0;
-}
-
-
-int
-server_open_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags, fd_t *fd)
-{
- server_state_t *state = NULL;
- fd_t *new_fd = NULL;
-
- state = CALL_STATE(frame);
- new_fd = fd_create (loc->inode, frame->root->pid);
- GF_VALIDATE_OR_GOTO(BOUND_XL(frame)->name, new_fd, fail);
-
- new_fd->flags = flags;
-
- state->fd = fd_ref (new_fd);
-
- gf_log (BOUND_XL(frame)->name, GF_LOG_TRACE,
- "%"PRId64": OPEN \'%s (%"PRId64")\'",
- frame->root->unique, state->path, state->ino);
-
- STACK_WIND (frame, server_open_cbk,
- BOUND_XL(frame),
- BOUND_XL(frame)->fops->open,
- loc, flags, state->fd);
-
- return 0;
-fail:
- server_open_cbk (frame, NULL, frame->this, -1, EINVAL, NULL);
- return 0;
-}
-
-/*
- * server_open - open function for server protocol
- * @frame: call frame
- * @bound_xl: translator this server protocol is bound to
- * @params: parameters dictionary
- *
- * not for external reference
- */
-int
-server_open (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- call_stub_t *open_stub = NULL;
- gf_fop_open_req_t *req = NULL;
- server_state_t *state = NULL;
- int32_t ret = -1;
- size_t pathlen = 0;
-
- req = gf_param (hdr);
- state = CALL_STATE (frame);
- {
- state->ino = ntoh64 (req->ino);
- state->path = req->path;
- pathlen = STRLEN_0(state->path);
- state->flags = gf_flags_to_flags (ntoh32 (req->flags));
- }
- ret = server_loc_fill (&(state->loc), state,
- state->ino, 0, NULL, state->path);
-
- open_stub = fop_open_stub (frame,
- server_open_resume,
- &(state->loc), state->flags, NULL);
- GF_VALIDATE_OR_GOTO(bound_xl->name, open_stub, fail);
-
- if (((state->loc.parent == NULL) && IS_NOT_ROOT(pathlen)) ||
- (state->loc.inode == NULL)) {
- do_path_lookup (open_stub, &state->loc);
- } else {
- call_resume (open_stub);
- }
- return 0;
-fail:
- server_open_cbk (frame, NULL, frame->this, -1, EINVAL, NULL);
- return 0;
-}
-
-
-/*
- * server_readv - readv function for server protocol
- * @frame: call frame
- * @bound_xl:
- * @params: parameter dictionary
- *
- * not for external reference
- */
-int
-server_readv (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_read_req_t *req = NULL;
- server_state_t *state = NULL;
- server_connection_t *conn = NULL;
-
- conn = SERVER_CONNECTION (frame);
-
- req = gf_param (hdr);
-
- state = CALL_STATE(frame);
- {
- state->fd_no = ntoh64 (req->fd);
- if (state->fd_no >= 0)
- state->fd = gf_fd_fdptr_get (conn->fdtable,
- state->fd_no);
-
- state->size = ntoh32 (req->size);
- state->offset = ntoh64 (req->offset);
- }
-
- GF_VALIDATE_OR_GOTO(bound_xl->name, state->fd, fail);
-
- gf_log (bound_xl->name, GF_LOG_TRACE,
- "%"PRId64": READV \'fd=%"PRId64" (%"PRId64"); "
- "offset=%"PRId64"; size=%"PRId64,
- frame->root->unique, state->fd_no, state->fd->inode->ino,
- state->offset, (int64_t)state->size);
-
- STACK_WIND (frame, server_readv_cbk,
- BOUND_XL(frame),
- BOUND_XL(frame)->fops->readv,
- state->fd, state->size, state->offset);
- return 0;
-fail:
- server_readv_cbk (frame, NULL, frame->this, -1, EINVAL, NULL, 0,
- NULL, NULL);
- return 0;
-}
-
-
-/*
- * server_writev - writev function for server
- * @frame: call frame
- * @bound_xl:
- * @params: parameter dictionary
- *
- * not for external reference
- */
-int
-server_writev (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- server_connection_t *conn = NULL;
- gf_fop_write_req_t *req = NULL;
- struct iovec iov = {0, };
- struct iobref *iobref = NULL;
- server_state_t *state = NULL;
-
- conn = SERVER_CONNECTION(frame);
-
- req = gf_param (hdr);
- state = CALL_STATE(frame);
- {
- state->fd_no = ntoh64 (req->fd);
- if (state->fd_no >= 0)
- state->fd = gf_fd_fdptr_get (conn->fdtable,
- state->fd_no);
-
- state->offset = ntoh64 (req->offset);
- state->size = ntoh32 (req->size);
- }
-
- GF_VALIDATE_OR_GOTO(bound_xl->name, state->fd, fail);
-
- if (iobuf)
- iov.iov_base = iobuf->ptr;
- iov.iov_len = state->size;
-
- iobref = iobref_new ();
- GF_VALIDATE_OR_GOTO(bound_xl->name, iobref, fail);
-
- iobref_add (iobref, iobuf);
-
- gf_log (bound_xl->name, GF_LOG_TRACE,
- "%"PRId64": WRITEV \'fd=%"PRId64" (%"PRId64"); "
- "offset=%"PRId64"; size=%"PRId32,
- frame->root->unique, state->fd_no, state->fd->inode->ino,
- state->offset, (int32_t)state->size);
-
- STACK_WIND (frame, server_writev_cbk,
- BOUND_XL(frame),
- BOUND_XL(frame)->fops->writev,
- state->fd, &iov, 1, state->offset, iobref);
-
- if (iobref)
- iobref_unref (iobref);
- if (iobuf)
- iobuf_unref (iobuf);
-
- return 0;
-fail:
- server_writev_cbk (frame, NULL, frame->this, -1, EINVAL, NULL);
-
- if (iobref)
- iobref_unref (iobref);
- if (iobuf)
- iobuf_unref (iobuf);
-
- return 0;
-}
-
-
-
-/*
- * server_release - release function for server protocol
- * @frame: call frame
- * @bound_xl:
- * @params: parameter dictionary
- *
- * not for external reference
- */
-int
-server_release (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_cbk_release_req_t *req = NULL;
- server_state_t *state = NULL;
- server_connection_t *conn = NULL;
-
- conn = SERVER_CONNECTION(frame);
-
- req = gf_param (hdr);
- state = CALL_STATE(frame);
-
- state->fd_no = ntoh64 (req->fd);
-
- gf_fd_put (conn->fdtable, state->fd_no);
-
- gf_log (bound_xl->name, GF_LOG_TRACE,
- "%"PRId64": RELEASE \'fd=%"PRId64"\'",
- frame->root->unique, state->fd_no);
-
- server_release_cbk (frame, NULL, frame->this, 0, 0);
- return 0;
-}
-
-
-/*
- * server_fsync - fsync function for server protocol
- * @frame: call frame
- * @bound_xl:
- * @params: parameters dictionary
- *
- * not for external reference
- */
-int
-server_fsync (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_fsync_req_t *req = NULL;
- server_state_t *state = NULL;
- server_connection_t *conn = NULL;
-
- conn = SERVER_CONNECTION(frame);
-
- req = gf_param (hdr);
- state = CALL_STATE(frame);
- {
- state->fd_no = ntoh64 (req->fd);
- if (state->fd_no >= 0)
- state->fd = gf_fd_fdptr_get (conn->fdtable,
- state->fd_no);
-
- state->flags = ntoh32 (req->data);
- }
-
- GF_VALIDATE_OR_GOTO(bound_xl->name, state->fd, fail);
-
- gf_log (bound_xl->name, GF_LOG_TRACE,
- "%"PRId64": FSYNC \'fd=%"PRId64" (%"PRId64")\'",
- frame->root->unique, state->fd_no, state->fd->inode->ino);
-
- STACK_WIND (frame, server_fsync_cbk,
- BOUND_XL(frame),
- BOUND_XL(frame)->fops->fsync,
- state->fd, state->flags);
- return 0;
-fail:
- server_fsync_cbk (frame, NULL, frame->this, -1, EINVAL);
-
- return 0;
-}
-
-
-/*
- * server_flush - flush function for server protocol
- * @frame: call frame
- * @bound_xl:
- * @params: parameter dictionary
- *
- * not for external reference
- */
-int
-server_flush (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_flush_req_t *req = NULL;
- server_state_t *state = NULL;
- server_connection_t *conn = NULL;
-
- conn = SERVER_CONNECTION (frame);
-
- req = gf_param (hdr);
- state = CALL_STATE(frame);
- {
- state->fd_no = ntoh64 (req->fd);
- if (state->fd_no >= 0)
- state->fd = gf_fd_fdptr_get (conn->fdtable,
- state->fd_no);
- }
-
- GF_VALIDATE_OR_GOTO (bound_xl->name, state->fd, fail);
-
- gf_log (bound_xl->name, GF_LOG_TRACE,
- "%"PRId64": FLUSH \'fd=%"PRId64" (%"PRId64")\'",
- frame->root->unique, state->fd_no, state->fd->inode->ino);
-
- STACK_WIND (frame, server_flush_cbk,
- BOUND_XL(frame),
- BOUND_XL(frame)->fops->flush,
- state->fd);
- return 0;
-
-fail:
- server_flush_cbk (frame, NULL, frame->this, -1, EINVAL);
-
- return 0;
-}
-
-
-/*
- * server_ftruncate - ftruncate function for server protocol
- * @frame: call frame
- * @bound_xl:
- * @params: parameters dictionary
- *
- * not for external reference
- */
-int
-server_ftruncate (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_ftruncate_req_t *req = NULL;
- server_state_t *state = NULL;
- server_connection_t *conn = NULL;
-
- conn = SERVER_CONNECTION(frame);
-
- req = gf_param (hdr);
-
- state = CALL_STATE(frame);
- {
- state->fd_no = ntoh64 (req->fd);
- if (state->fd_no >= 0)
- state->fd = gf_fd_fdptr_get (conn->fdtable,
- state->fd_no);
-
- state->offset = ntoh64 (req->offset);
- }
-
- GF_VALIDATE_OR_GOTO(bound_xl->name, state->fd, fail);
-
- gf_log (bound_xl->name, GF_LOG_TRACE,
- "%"PRId64": FTRUNCATE \'fd=%"PRId64" (%"PRId64"); "
- "offset=%"PRId64"\'",
- frame->root->unique, state->fd_no, state->fd->inode->ino,
- state->offset);
-
- STACK_WIND (frame, server_ftruncate_cbk,
- bound_xl,
- bound_xl->fops->ftruncate,
- state->fd, state->offset);
- return 0;
-fail:
- server_ftruncate_cbk (frame, NULL, frame->this, -1, EINVAL, NULL);
-
- return 0;
-}
-
-
-/*
- * server_fstat - fstat function for server protocol
- * @frame: call frame
- * @bound_xl:
- * @params: parameter dictionary
- *
- * not for external reference
- */
-int
-server_fstat (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_fstat_req_t *req = NULL;
- server_state_t *state = NULL;
- server_connection_t *conn = NULL;
-
- conn = SERVER_CONNECTION (frame);
-
- req = gf_param (hdr);
- state = CALL_STATE(frame);
- {
- state->fd_no = ntoh64 (req->fd);
- if (state->fd_no >= 0)
- state->fd = gf_fd_fdptr_get (conn->fdtable,
- state->fd_no);
- }
-
-
- if (state->fd == NULL) {
- gf_log (frame->this->name, GF_LOG_WARNING,
- "fd - %"PRId64": unresolved fd",
- state->fd_no);
-
- server_fstat_cbk (frame, NULL, frame->this,
- -1, EBADF, NULL);
-
- goto out;
- }
-
- gf_log (bound_xl->name, GF_LOG_TRACE,
- "%"PRId64": FSTAT \'fd=%"PRId64" (%"PRId64")\'",
- frame->root->unique, state->fd_no, state->fd->inode->ino);
-
- STACK_WIND (frame, server_fstat_cbk,
- bound_xl,
- bound_xl->fops->fstat,
- state->fd);
-out:
- return 0;
-}
-
-
-int
-server_truncate_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- off_t offset)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE(frame);
-
- gf_log (BOUND_XL(frame)->name, GF_LOG_TRACE,
- "%"PRId64": TRUNCATE \'%s (%"PRId64")\'",
- frame->root->unique, state->path, state->ino);
-
- STACK_WIND (frame, server_truncate_cbk,
- BOUND_XL(frame),
- BOUND_XL(frame)->fops->truncate,
- loc, offset);
- return 0;
-}
-
-
-/*
- * server_truncate - truncate function for server protocol
- * @frame: call frame
- * @bound_xl:
- * @params:
- *
- * not for external reference
- */
-int
-server_truncate (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- call_stub_t *truncate_stub = NULL;
- gf_fop_truncate_req_t *req = NULL;
- server_state_t *state = NULL;
- int32_t ret = -1;
- size_t pathlen = 0;
-
- req = gf_param (hdr);
- state = CALL_STATE(frame);
- {
- pathlen = STRLEN_0(req->path);
- state->offset = ntoh64 (req->offset);
-
- state->path = req->path;
- state->ino = ntoh64 (req->ino);
- }
-
-
- ret = server_loc_fill (&(state->loc), state,
- state->ino, 0, NULL, state->path);
-
- truncate_stub = fop_truncate_stub (frame, server_truncate_resume,
- &(state->loc),
- state->offset);
- if ((state->loc.parent == NULL) ||
- (state->loc.inode == NULL)) {
- do_path_lookup (truncate_stub, &(state->loc));
- } else {
- call_resume (truncate_stub);
- }
-
- return 0;
-}
-
-
-int
-server_unlink_resume (call_frame_t *frame, xlator_t *this, loc_t *loc)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE(frame);
-
- if (state->loc.parent == NULL)
- state->loc.parent = inode_ref (loc->parent);
-
- if (state->loc.inode == NULL)
- state->loc.inode = inode_ref (loc->inode);
-
- gf_log (BOUND_XL(frame)->name, GF_LOG_TRACE,
- "%"PRId64": UNLINK \'%"PRId64"/%s (%"PRId64")\'",
- frame->root->unique, state->par, state->path,
- state->loc.inode->ino);
-
- STACK_WIND (frame, server_unlink_cbk,
- BOUND_XL(frame),
- BOUND_XL(frame)->fops->unlink,
- loc);
- return 0;
-}
-
-/*
- * server_unlink - unlink function for server protocol
- * @frame: call frame
- * @bound_xl:
- * @params: parameter dictionary
- *
- * not for external reference
- */
-int
-server_unlink (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- call_stub_t *unlink_stub = NULL;
- gf_fop_unlink_req_t *req = NULL;
- server_state_t *state = NULL;
- int32_t ret = -1;
- size_t pathlen = 0;
-
- req = gf_param (hdr);
- state = CALL_STATE (frame);
-
- pathlen = STRLEN_0(req->path);
-
- state->par = ntoh64 (req->par);
- state->path = req->path;
- if (IS_NOT_ROOT(pathlen))
- state->bname = req->bname + pathlen;
-
- ret = server_loc_fill (&(state->loc), state, 0, state->par,
- state->bname, state->path);
-
- unlink_stub = fop_unlink_stub (frame, server_unlink_resume,
- &(state->loc));
-
- if ((state->loc.parent == NULL) ||
- (state->loc.inode == NULL)) {
- do_path_lookup (unlink_stub, &state->loc);
- } else {
- call_resume (unlink_stub);
- }
-
- return 0;
-}
-
-
-int
-server_setxattr_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *dict, int32_t flags)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE(frame);
-
- gf_log (BOUND_XL(frame)->name, GF_LOG_TRACE,
- "%"PRId64": SETXATTR \'%s (%"PRId64")\'",
- frame->root->unique, state->path, state->ino);
-
- STACK_WIND (frame, server_setxattr_cbk,
- BOUND_XL(frame),
- BOUND_XL(frame)->fops->setxattr,
- loc, dict, flags);
- return 0;
-}
-
-/*
- * server_setxattr - setxattr function for server protocol
- * @frame: call frame
- * @bound_xl:
- * @params: parameter dictionary
- *
- * not for external reference
- */
-
-int
-server_setxattr (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- call_stub_t *setxattr_stub = NULL;
- gf_fop_setxattr_req_t *req = NULL;
- dict_t *dict = NULL;
- server_state_t *state = NULL;
- int32_t ret = -1;
- size_t pathlen = 0;
- size_t dict_len = 0;
- char *req_dictbuf = NULL;
-
- req = gf_param (hdr);
- state = CALL_STATE(frame);
- {
- dict_len = ntoh32 (req->dict_len);
-
- state->path = req->path + dict_len;
-
- pathlen = STRLEN_0(state->path);
- state->ino = ntoh64 (req->ino);
-
- state->flags = ntoh32 (req->flags);
- }
-
- ret = server_loc_fill (&(state->loc), state, state->ino, 0, NULL,
- state->path);
- {
- /* Unserialize the dictionary */
- req_dictbuf = memdup (req->dict, dict_len);
- GF_VALIDATE_OR_GOTO(bound_xl->name, req_dictbuf, fail);
-
- dict = dict_new ();
- GF_VALIDATE_OR_GOTO(bound_xl->name, dict, fail);
-
- ret = dict_unserialize (req_dictbuf, dict_len, &dict);
- if (ret < 0) {
- gf_log (bound_xl->name, GF_LOG_ERROR,
- "%"PRId64": %s (%"PRId64"): failed to "
- "unserialize request buffer to dictionary",
- frame->root->unique, state->loc.path,
- state->ino);
- free (req_dictbuf);
- goto fail;
- } else{
- dict->extra_free = req_dictbuf;
- }
- }
-
- setxattr_stub = fop_setxattr_stub (frame, server_setxattr_resume,
- &(state->loc), dict, state->flags);
- GF_VALIDATE_OR_GOTO(bound_xl->name, setxattr_stub, fail);
-
- if (((state->loc.parent == NULL) && IS_NOT_ROOT(pathlen)) ||
- (state->loc.inode == NULL)) {
- do_path_lookup (setxattr_stub, &(state->loc));
- } else {
- call_resume (setxattr_stub);
- }
-
- if (dict)
- dict_unref (dict);
-
- return 0;
-fail:
- if (dict)
- dict_unref (dict);
-
- server_setxattr_cbk (frame, NULL, frame->this, -1, ENOENT);
- return 0;
-
-}
-
-
-int
-server_fsetxattr (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- server_connection_t *conn = NULL;
- gf_fop_fsetxattr_req_t *req = NULL;
- dict_t *dict = NULL;
- server_state_t *state = NULL;
- int32_t ret = -1;
- size_t pathlen = 0;
- size_t dict_len = 0;
- char *req_dictbuf = NULL;
-
- conn = SERVER_CONNECTION (frame);
-
- req = gf_param (hdr);
- state = CALL_STATE(frame);
- {
- state->fd_no = ntoh64 (req->fd);
- if (state->fd_no >= 0)
- state->fd = gf_fd_fdptr_get (conn->fdtable,
- state->fd_no);
- dict_len = ntoh32 (req->dict_len);
-
- pathlen = STRLEN_0(state->path);
- state->ino = ntoh64 (req->ino);
-
- state->flags = ntoh32 (req->flags);
- }
-
- /* Unserialize the dictionary */
- req_dictbuf = memdup (req->dict, dict_len);
- GF_VALIDATE_OR_GOTO(bound_xl->name, req_dictbuf, fail);
-
- dict = dict_new ();
- GF_VALIDATE_OR_GOTO(bound_xl->name, dict, fail);
-
- ret = dict_unserialize (req_dictbuf, dict_len, &dict);
- if (ret < 0) {
- gf_log (bound_xl->name, GF_LOG_ERROR,
- "%"PRId64": %s (%"PRId64"): failed to "
- "unserialize request buffer to dictionary",
- frame->root->unique, state->loc.path,
- state->ino);
- free (req_dictbuf);
- goto fail;
- } else{
- dict->extra_free = req_dictbuf;
- }
-
- STACK_WIND (frame, server_setxattr_cbk,
- BOUND_XL(frame),
- BOUND_XL(frame)->fops->fsetxattr,
- state->fd, dict, state->flags);
-
- if (dict)
- dict_unref (dict);
-
- return 0;
-fail:
- if (dict)
- dict_unref (dict);
-
- server_fsetxattr_cbk (frame, NULL, frame->this,
- -1, ENOENT);
- return 0;
-}
-
-
-int
-server_fxattrop (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- server_connection_t *conn = NULL;
- gf_fop_fxattrop_req_t *req = NULL;
- dict_t *dict = NULL;
- server_state_t *state = NULL;
- size_t dict_len = 0;
- char *req_dictbuf = NULL;
- int32_t ret = -1;
-
- conn = SERVER_CONNECTION(frame);
-
- req = gf_param (hdr);
- state = CALL_STATE(frame);
- {
- state->fd_no = ntoh64 (req->fd);
- if (state->fd_no >= 0)
- state->fd = gf_fd_fdptr_get (conn->fdtable,
- state->fd_no);
-
- dict_len = ntoh32 (req->dict_len);
- state->ino = ntoh64 (req->ino);
- state->flags = ntoh32 (req->flags);
- }
-
- if (dict_len) {
- /* Unserialize the dictionary */
- req_dictbuf = memdup (req->dict, dict_len);
- GF_VALIDATE_OR_GOTO(bound_xl->name, req_dictbuf, fail);
-
- dict = dict_new ();
- GF_VALIDATE_OR_GOTO(bound_xl->name, dict, fail);
-
- ret = dict_unserialize (req_dictbuf, dict_len, &dict);
- if (ret < 0) {
- gf_log (bound_xl->name, GF_LOG_ERROR,
- "fd - %"PRId64" (%"PRId64"): failed to unserialize "
- "request buffer to dictionary",
- state->fd_no, state->fd->inode->ino);
- free (req_dictbuf);
- goto fail;
- } else {
- dict->extra_free = req_dictbuf;
- }
- }
-
- gf_log (bound_xl->name, GF_LOG_TRACE,
- "%"PRId64": FXATTROP \'fd=%"PRId64" (%"PRId64")\'",
- frame->root->unique, state->fd_no, state->fd->inode->ino);
-
- STACK_WIND (frame, server_fxattrop_cbk,
- bound_xl,
- bound_xl->fops->fxattrop,
- state->fd, state->flags, dict);
- if (dict)
- dict_unref (dict);
- return 0;
-
-fail:
- if (dict)
- dict_unref (dict);
-
- server_fxattrop_cbk (frame, NULL, frame->this, -1, EINVAL, NULL);
- return 0;
-}
-
-
-int
-server_xattrop_resume (call_frame_t *frame, xlator_t *this,
- loc_t *loc, gf_xattrop_flags_t flags, dict_t *dict)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE(frame);
-
- gf_log (BOUND_XL(frame)->name, GF_LOG_TRACE,
- "%"PRId64": XATTROP \'%s (%"PRId64")\'",
- frame->root->unique, state->path, state->ino);
-
- STACK_WIND (frame, server_xattrop_cbk,
- BOUND_XL(frame),
- BOUND_XL(frame)->fops->xattrop,
- loc, flags, dict);
- return 0;
-}
-
-
-int
-server_xattrop (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_xattrop_req_t *req = NULL;
- dict_t *dict = NULL;
- server_state_t *state = NULL;
- call_stub_t *xattrop_stub = NULL;
- int32_t ret = -1;
- size_t pathlen = 0;
- size_t dict_len = 0;
- char *req_dictbuf = NULL;
-
- req = gf_param (hdr);
- state = CALL_STATE(frame);
- {
- dict_len = ntoh32 (req->dict_len);
- state->ino = ntoh64 (req->ino);
- state->path = req->path + dict_len;
- pathlen = STRLEN_0(state->path);
- state->flags = ntoh32 (req->flags);
- }
-
- ret = server_loc_fill (&(state->loc), state,
- state->ino, 0, NULL, state->path);
-
- if (dict_len) {
- /* Unserialize the dictionary */
- req_dictbuf = memdup (req->dict, dict_len);
- GF_VALIDATE_OR_GOTO(bound_xl->name, req_dictbuf, fail);
-
- dict = dict_new ();
- GF_VALIDATE_OR_GOTO(bound_xl->name, dict, fail);
-
- ret = dict_unserialize (req_dictbuf, dict_len, &dict);
- if (ret < 0) {
- gf_log (bound_xl->name, GF_LOG_ERROR,
- "%s (%"PRId64"): failed to unserialize "
- "request buffer to dictionary",
- state->loc.path, state->ino);
- goto fail;
- } else {
- dict->extra_free = req_dictbuf;
- }
- }
- xattrop_stub = fop_xattrop_stub (frame, server_xattrop_resume,
- &(state->loc), state->flags, dict);
- GF_VALIDATE_OR_GOTO(bound_xl->name, xattrop_stub, fail);
-
- if (((state->loc.parent == NULL) && IS_NOT_ROOT(pathlen)) ||
- (state->loc.inode == NULL)) {
- do_path_lookup (xattrop_stub, &(state->loc));
- } else {
- call_resume (xattrop_stub);
- }
-
- if (dict)
- dict_unref (dict);
- return 0;
-fail:
- if (dict)
- dict_unref (dict);
-
- server_xattrop_cbk (frame, NULL, frame->this, -1, EINVAL, NULL);
- return 0;
-}
-
-
-int
-server_getxattr_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE(frame);
-
- gf_log (BOUND_XL(frame)->name, GF_LOG_TRACE,
- "%"PRId64": GETXATTR \'%s (%"PRId64")\'",
- frame->root->unique, state->path, state->ino);
-
- STACK_WIND (frame, server_getxattr_cbk,
- BOUND_XL(frame),
- BOUND_XL(frame)->fops->getxattr,
- loc, name);
- return 0;
-}
-
-/*
- * server_getxattr - getxattr function for server protocol
- * @frame: call frame
- * @bound_xl:
- * @params: parameter dictionary
- *
- * not for external reference
- */
-int
-server_getxattr (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_getxattr_req_t *req = NULL;
- call_stub_t *getxattr_stub = NULL;
- server_state_t *state = NULL;
- int32_t ret = -1;
- size_t namelen = 0;
- size_t pathlen = 0;
-
- req = gf_param (hdr);
- state = CALL_STATE(frame);
- {
- pathlen = STRLEN_0(req->path);
-
- state->path = req->path;
- state->ino = ntoh64 (req->ino);
-
- namelen = ntoh32 (req->namelen);
- if (namelen)
- state->name = (req->name + pathlen);
- }
-
- ret = server_loc_fill (&(state->loc), state,
- state->ino, 0, NULL, state->path);
-
- getxattr_stub = fop_getxattr_stub (frame, server_getxattr_resume,
- &(state->loc), state->name);
-
- if (((state->loc.parent == NULL) && IS_NOT_ROOT(pathlen)) ||
- (state->loc.inode == NULL)) {
- do_path_lookup (getxattr_stub, &(state->loc));
- } else {
- call_resume (getxattr_stub);
- }
-
- return 0;
-}
-
-
-int
-server_fgetxattr (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- server_connection_t *conn = NULL;
- gf_fop_fgetxattr_req_t *req = NULL;
- server_state_t *state = NULL;
- size_t namelen = 0;
-
- conn = SERVER_CONNECTION (frame);
-
- req = gf_param (hdr);
- state = CALL_STATE(frame);
- {
- state->fd_no = ntoh64 (req->fd);
- if (state->fd_no >= 0)
- state->fd = gf_fd_fdptr_get (conn->fdtable,
- state->fd_no);
-
- state->ino = ntoh64 (req->ino);
-
- namelen = ntoh32 (req->namelen);
- if (namelen)
- state->name = (req->name);
- }
-
- STACK_WIND (frame, server_fgetxattr_cbk,
- BOUND_XL(frame),
- BOUND_XL(frame)->fops->fgetxattr,
- state->fd, state->name);
- return 0;
-}
-
-
-int
-server_removexattr_resume (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- gf_log (BOUND_XL(frame)->name, GF_LOG_TRACE,
- "%"PRId64": REMOVEXATTR \'%s (%"PRId64")\'",
- frame->root->unique, state->path, state->ino);
-
- STACK_WIND (frame, server_removexattr_cbk,
- BOUND_XL(frame),
- BOUND_XL(frame)->fops->removexattr,
- loc, name);
- return 0;
-}
-
-/*
- * server_removexattr - removexattr function for server protocol
- * @frame: call frame
- * @bound_xl:
- * @params: parameter dictionary
- *
- * not for external reference
- */
-int
-server_removexattr (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_removexattr_req_t *req = NULL;
- call_stub_t *removexattr_stub = NULL;
- server_state_t *state = NULL;
- int32_t ret = -1;
- size_t pathlen = 0;
-
- req = gf_param (hdr);
- state = CALL_STATE(frame);
- {
- pathlen = STRLEN_0(req->path);
-
- state->path = req->path;
- state->ino = ntoh64 (req->ino);
-
- state->name = (req->name + pathlen);
- }
-
- ret = server_loc_fill (&(state->loc), state,
- state->ino, 0, NULL, state->path);
-
- removexattr_stub = fop_removexattr_stub (frame,
- server_removexattr_resume,
- &(state->loc),
- state->name);
-
- if (((state->loc.parent == NULL) && IS_NOT_ROOT(pathlen)) ||
- (state->loc.inode == NULL)) {
- do_path_lookup (removexattr_stub, &(state->loc));
- } else {
- call_resume (removexattr_stub);
- }
-
- return 0;
-}
-
-
-/*
- * server_statfs - statfs function for server protocol
- * @frame: call frame
- * @bound_xl:
- * @params: parameter dictionary
- *
- * not for external reference
- */
-int
-server_statfs (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_statfs_req_t *req = NULL;
- server_state_t *state = NULL;
- int32_t ret = -1;
-
- req = gf_param (hdr);
-
- state = CALL_STATE(frame);
- state->ino = ntoh64 (req->ino);
- state->path = req->path;
-
- ret = server_loc_fill (&state->loc, state, state->ino, 0, NULL,
- state->path);
-
- gf_log (BOUND_XL(frame)->name, GF_LOG_TRACE,
- "%"PRId64": STATFS \'%s (%"PRId64")\'",
- frame->root->unique, state->path, state->ino);
-
- STACK_WIND (frame, server_statfs_cbk,
- BOUND_XL(frame),
- BOUND_XL(frame)->fops->statfs,
- &(state->loc));
-
- return 0;
-}
-
-
-int
-server_opendir_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- fd_t *fd)
-{
- server_state_t *state = NULL;
- fd_t *new_fd = NULL;
-
- state = CALL_STATE (frame);
- new_fd = fd_create (loc->inode, frame->root->pid);
- state->fd = fd_ref (new_fd);
-
- gf_log (BOUND_XL(frame)->name, GF_LOG_TRACE,
- "%"PRId64": OPENDIR \'%s (%"PRId64")\'",
- frame->root->unique, state->path, state->ino);
-
- STACK_WIND (frame, server_opendir_cbk,
- BOUND_XL(frame),
- BOUND_XL(frame)->fops->opendir,
- loc, state->fd);
-
- return 0;
-}
-
-
-/*
- * server_opendir - opendir function for server protocol
- * @frame: call frame
- * @bound_xl:
- * @params: parameter dictionary
- *
- * not for external reference
- */
-int
-server_opendir (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- call_stub_t *opendir_stub = NULL;
- gf_fop_opendir_req_t *req = NULL;
- server_state_t *state = NULL;
- int32_t ret = -1;
- size_t pathlen = 0;
-
- req = gf_param (hdr);
- state = CALL_STATE(frame);
- {
- state->path = req->path;
- pathlen = STRLEN_0(state->path);
- state->ino = ntoh64 (req->ino);
- }
-
- ret = server_loc_fill (&state->loc, state, state->ino, 0, NULL,
- state->path);
-
- opendir_stub = fop_opendir_stub (frame, server_opendir_resume,
- &(state->loc), NULL);
-
- if (((state->loc.parent == NULL) && IS_NOT_ROOT(pathlen)) ||
- (state->loc.inode == NULL)) {
- do_path_lookup (opendir_stub, &(state->loc));
- } else {
- call_resume (opendir_stub);
- }
-
- return 0;
-}
-
-
-/*
- * server_releasedir - releasedir function for server protocol
- * @frame: call frame
- * @bound_xl:
- * @params: parameter dictionary
- *
- * not for external reference
- */
-int
-server_releasedir (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_cbk_releasedir_req_t *req = NULL;
- server_state_t *state = NULL;
- server_connection_t *conn = NULL;
-
- conn = SERVER_CONNECTION(frame);
-
- req = gf_param (hdr);
- state = CALL_STATE(frame);
-
- state->fd_no = ntoh64 (req->fd);
- state->fd = gf_fd_fdptr_get (conn->fdtable, state->fd_no);
-
- if (state->fd == NULL) {
- gf_log (frame->this->name, GF_LOG_ERROR,
- "fd - %"PRId64": unresolved fd",
- state->fd_no);
-
- server_releasedir_cbk (frame, NULL, frame->this, -1, EBADF);
- goto out;
- }
-
- gf_log (bound_xl->name, GF_LOG_TRACE,
- "%"PRId64": RELEASEDIR \'fd=%"PRId64" (%"PRId64")\'",
- frame->root->unique, state->fd_no, state->fd->inode->ino);
-
- gf_fd_put (conn->fdtable, state->fd_no);
-
- server_releasedir_cbk (frame, NULL, frame->this, 0, 0);
-out:
- return 0;
-}
-
-
-/*
- * server_readdir - readdir function for server protocol
- * @frame: call frame
- * @bound_xl:
- * @params: parameter dictionary
- *
- * not for external reference
- */
-int
-server_getdents (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_getdents_req_t *req = NULL;
- server_state_t *state = NULL;
- server_connection_t *conn = NULL;
-
- conn = SERVER_CONNECTION (frame);
-
- req = gf_param (hdr);
- state = CALL_STATE(frame);
- {
- state->fd_no = ntoh64 (req->fd);
- if (state->fd_no >= 0)
- state->fd = gf_fd_fdptr_get (conn->fdtable,
- state->fd_no);
-
- state->size = ntoh32 (req->size);
- state->offset = ntoh64 (req->offset);
- state->flags = ntoh32 (req->flags);
- }
-
-
- if (state->fd == NULL) {
- gf_log (frame->this->name, GF_LOG_ERROR,
- "fd - %"PRId64": unresolved fd",
- state->fd_no);
-
- server_getdents_cbk (frame, NULL, frame->this, -1, EBADF,
- NULL, 0);
- goto out;
- }
-
- gf_log (bound_xl->name, GF_LOG_TRACE,
- "%"PRId64": GETDENTS \'fd=%"PRId64" (%"PRId64"); "
- "offset=%"PRId64"; size=%"PRId64,
- frame->root->unique, state->fd_no, state->fd->inode->ino,
- state->offset, (int64_t)state->size);
-
- STACK_WIND (frame, server_getdents_cbk,
- bound_xl,
- bound_xl->fops->getdents,
- state->fd, state->size, state->offset, state->flags);
-out:
- return 0;
-}
-
-
-/*
- * server_readdir - readdir function for server protocol
- * @frame: call frame
- * @bound_xl:
- * @params: parameter dictionary
- *
- * not for external reference
- */
-int
-server_readdir (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_readdir_req_t *req = NULL;
- server_state_t *state = NULL;
- server_connection_t *conn = NULL;
-
- conn = SERVER_CONNECTION(frame);
-
- req = gf_param (hdr);
- state = CALL_STATE(frame);
- {
- state->fd_no = ntoh64 (req->fd);
- if (state->fd_no >= 0)
- state->fd = gf_fd_fdptr_get (conn->fdtable,
- state->fd_no);
-
- state->size = ntoh32 (req->size);
- state->offset = ntoh64 (req->offset);
- }
-
-
- if (state->fd == NULL) {
- gf_log (frame->this->name, GF_LOG_ERROR,
- "fd - %"PRId64": unresolved fd",
- state->fd_no);
-
- server_readdir_cbk (frame, NULL, frame->this, -1, EBADF, NULL);
- goto out;
- }
-
- gf_log (bound_xl->name, GF_LOG_TRACE,
- "%"PRId64": READDIR \'fd=%"PRId64" (%"PRId64"); "
- "offset=%"PRId64"; size=%"PRId64,
- frame->root->unique, state->fd_no, state->fd->inode->ino,
- state->offset, (int64_t)state->size);
-
- STACK_WIND (frame, server_readdir_cbk,
- bound_xl,
- bound_xl->fops->readdir,
- state->fd, state->size, state->offset);
-out:
- return 0;
-}
-
-
-
-/*
- * server_fsyncdir - fsyncdir function for server protocol
- * @frame: call frame
- * @bound_xl:
- * @params: parameter dictionary
- *
- * not for external reference
- */
-int
-server_fsyncdir (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_fsyncdir_req_t *req = NULL;
- server_state_t *state = NULL;
- server_connection_t *conn = NULL;
-
- conn = SERVER_CONNECTION (frame);
-
- req = gf_param (hdr);
- state = CALL_STATE(frame);
- {
- state->fd_no = ntoh64 (req->fd);
- if (state->fd_no >= 0)
- state->fd = gf_fd_fdptr_get (conn->fdtable,
- state->fd_no);
-
- state->flags = ntoh32 (req->data);
- }
-
- if (state->fd == NULL) {
- gf_log (frame->this->name, GF_LOG_ERROR,
- "fd - %"PRId64": unresolved fd",
- state->fd_no);
-
- server_fsyncdir_cbk (frame, NULL, frame->this, -1, EBADF);
- goto out;
- }
-
- gf_log (bound_xl->name, GF_LOG_TRACE,
- "%"PRId64": FSYNCDIR \'fd=%"PRId64" (%"PRId64")\'",
- frame->root->unique, state->fd_no, state->fd->inode->ino);
-
- STACK_WIND (frame, server_fsyncdir_cbk,
- bound_xl,
- bound_xl->fops->fsyncdir,
- state->fd, state->flags);
-out:
- return 0;
-}
-
-
-int
-server_mknod_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- mode_t mode, dev_t dev)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->loc.parent == NULL)
- state->loc.parent = inode_ref (loc->parent);
-
- state->loc.inode = inode_new (state->itable);
-
- gf_log (BOUND_XL(frame)->name, GF_LOG_TRACE,
- "%"PRId64": MKNOD \'%"PRId64"/%s\'",
- frame->root->unique, state->par, state->bname);
-
- STACK_WIND (frame, server_mknod_cbk,
- BOUND_XL(frame),
- BOUND_XL(frame)->fops->mknod,
- &(state->loc), mode, dev);
-
- return 0;
-}
-
-/*
- * server_mknod - mknod function for server protocol
- * @frame: call frame
- * @bound_xl:
- * @params: parameter dictionary
- *
- * not for external reference
- */
-int
-server_mknod (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_mknod_req_t *req = NULL;
- server_state_t *state = NULL;
- call_stub_t *mknod_stub = NULL;
- int32_t ret = -1;
- size_t pathlen = 0;
-
- req = gf_param (hdr);
- state = CALL_STATE (frame);
- {
- pathlen = STRLEN_0(req->path);
-
- state->par = ntoh64 (req->par);
- state->path = req->path;
- if (IS_NOT_ROOT(pathlen))
- state->bname = req->bname + pathlen;
-
- state->mode = ntoh32 (req->mode);
- state->dev = ntoh64 (req->dev);
- }
- ret = server_loc_fill (&(state->loc), state, 0, state->par,
- state->bname, state->path);
-
- mknod_stub = fop_mknod_stub (frame, server_mknod_resume,
- &(state->loc), state->mode, state->dev);
-
- if (state->loc.parent == NULL) {
- do_path_lookup (mknod_stub, &(state->loc));
- } else {
- call_resume (mknod_stub);
- }
-
- return 0;
-}
-
-
-int
-server_mkdir_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- mode_t mode)
-
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->loc.parent == NULL)
- state->loc.parent = inode_ref (loc->parent);
-
- state->loc.inode = inode_new (state->itable);
-
- gf_log (BOUND_XL(frame)->name, GF_LOG_TRACE,
- "%"PRId64": MKDIR \'%"PRId64"/%s\'",
- frame->root->unique, state->par, state->bname);
-
- STACK_WIND (frame, server_mkdir_cbk,
- BOUND_XL(frame),
- BOUND_XL(frame)->fops->mkdir,
- &(state->loc), state->mode);
-
- return 0;
-}
-
-/*
- * server_mkdir - mkdir function for server protocol
- * @frame: call frame
- * @bound_xl:
- * @params:
- *
- * not for external reference
- */
-int
-server_mkdir (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_mkdir_req_t *req = NULL;
- server_state_t *state = NULL;
- call_stub_t *mkdir_stub = NULL;
- int32_t ret = -1;
- size_t pathlen = 0;
-
- req = gf_param (hdr);
- state = CALL_STATE(frame);
- {
- pathlen = STRLEN_0(req->path);
- state->mode = ntoh32 (req->mode);
-
- state->path = req->path;
- state->bname = req->bname + pathlen;
- state->par = ntoh64 (req->par);
- }
-
-
- ret = server_loc_fill (&(state->loc), state, 0, state->par,
- state->bname, state->path);
-
- mkdir_stub = fop_mkdir_stub (frame, server_mkdir_resume,
- &(state->loc), state->mode);
-
- if (state->loc.parent == NULL) {
- do_path_lookup (mkdir_stub, &(state->loc));
- } else {
- call_resume (mkdir_stub);
- }
-
- return 0;
-}
-
-
-int
-server_rmdir_resume (call_frame_t *frame, xlator_t *this, loc_t *loc)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE(frame);
-
- if (state->loc.parent == NULL)
- state->loc.parent = inode_ref (loc->parent);
-
- if (state->loc.inode == NULL)
- state->loc.inode = inode_ref (loc->inode);
-
- gf_log (BOUND_XL(frame)->name, GF_LOG_TRACE,
- "%"PRId64": RMDIR \'%"PRId64"/%s\'",
- frame->root->unique, state->par, state->bname);
-
- STACK_WIND (frame, server_rmdir_cbk,
- BOUND_XL(frame),
- BOUND_XL(frame)->fops->rmdir,
- loc);
- return 0;
-}
-
-/*
- * server_rmdir - rmdir function for server protocol
- * @frame: call frame
- * @bound_xl:
- * @params:
- *
- * not for external reference
- */
-int
-server_rmdir (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- call_stub_t *rmdir_stub = NULL;
- gf_fop_rmdir_req_t *req = NULL;
- server_state_t *state = NULL;
- int32_t ret = -1;
- size_t pathlen = 0;
-
- req = gf_param (hdr);
- state = CALL_STATE(frame);
- {
- pathlen = STRLEN_0(req->path);
- state->path = req->path;
- state->par = ntoh64 (req->par);
- state->bname = req->bname + pathlen;
- }
-
-
- ret = server_loc_fill (&(state->loc), state, state->ino, state->par,
- state->bname, state->path);
-
- rmdir_stub = fop_rmdir_stub (frame, server_rmdir_resume,
- &(state->loc));
-
- if ((state->loc.parent == NULL) ||
- (state->loc.inode == NULL)) {
- do_path_lookup (rmdir_stub, &(state->loc));
- } else {
- call_resume (rmdir_stub);
- }
-
- return 0;
-}
-
-
-int
-server_chown_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- uid_t uid, gid_t gid)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- gf_log (BOUND_XL(frame)->name, GF_LOG_TRACE,
- "%"PRId64": CHOWN \'%s (%"PRId64")\'",
- frame->root->unique, state->path, state->ino);
-
- STACK_WIND (frame, server_chown_cbk,
- BOUND_XL(frame),
- BOUND_XL(frame)->fops->chown,
- loc, uid, gid);
- return 0;
-}
-
-
-/*
- * server_chown - chown function for server protocol
- * @frame: call frame
- * @bound_xl:
- * @params: parameter dictionary
- *
- * not for external reference
- */
-int
-server_chown (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- call_stub_t *chown_stub = NULL;
- gf_fop_chown_req_t *req = NULL;
- server_state_t *state = NULL;
- int32_t ret = -1;
- size_t pathlen = 0;
-
- req = gf_param (hdr);
- state = CALL_STATE(frame);
- {
- state->ino = ntoh64 (req->ino);
- state->path = req->path;
- pathlen = STRLEN_0(state->path);
- state->uid = ntoh32 (req->uid);
- state->gid = ntoh32 (req->gid);
- }
-
- ret = server_loc_fill (&(state->loc), state, state->ino, 0, NULL,
- state->path);
-
- chown_stub = fop_chown_stub (frame, server_chown_resume,
- &(state->loc), state->uid, state->gid);
-
- if (((state->loc.parent == NULL) && IS_NOT_ROOT(pathlen)) ||
- (state->loc.inode == NULL)) {
- do_path_lookup (chown_stub, &(state->loc));
- } else {
- call_resume (chown_stub);
- }
-
- return 0;
-}
-
-
-int
-server_chmod_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- mode_t mode)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE(frame);
-
- gf_log (BOUND_XL(frame)->name, GF_LOG_TRACE,
- "%"PRId64": CHMOD \'%s (%"PRId64")\'",
- frame->root->unique, state->path, state->ino);
-
- STACK_WIND (frame, server_chmod_cbk,
- BOUND_XL(frame),
- BOUND_XL(frame)->fops->chmod,
- loc, mode);
- return 0;
-
-}
-
-/*
- * server_chmod - chmod function for server protocol
- * @frame: call frame
- * @bound_xl:
- * @params: parameter dictionary
- *
- * not for external reference
- */
-int
-server_chmod (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- call_stub_t *chmod_stub = NULL;
- gf_fop_chmod_req_t *req = NULL;
- server_state_t *state = NULL;
- int32_t ret = -1;
- size_t pathlen = 0;
-
- req = gf_param (hdr);
-
- state = CALL_STATE(frame);
- {
- state->ino = ntoh64 (req->ino);
- state->path = req->path;
- pathlen = STRLEN_0(state->path);
-
- state->mode = ntoh32 (req->mode);
- }
-
- ret = server_loc_fill (&(state->loc), state, state->ino, 0, NULL,
- state->path);
-
- chmod_stub = fop_chmod_stub (frame, server_chmod_resume,
- &(state->loc), state->mode);
-
- if (((state->loc.parent == NULL) && IS_NOT_ROOT(pathlen)) ||
- (state->loc.inode == NULL)) {
- do_path_lookup (chmod_stub, &(state->loc));
- } else {
- call_resume (chmod_stub);
- }
-
- return 0;
-}
-
-
-int
-server_utimens_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct timespec *tv)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE(frame);
-
- gf_log (BOUND_XL(frame)->name, GF_LOG_TRACE,
- "%"PRId64": UTIMENS \'%s (%"PRId64")\'",
- frame->root->unique, state->path, state->ino);
-
- STACK_WIND (frame, server_utimens_cbk,
- BOUND_XL(frame),
- BOUND_XL(frame)->fops->utimens,
- loc, tv);
- return 0;
-}
-
-/*
- * server_utimens - utimens function for server protocol
- * @frame: call frame
- * @bound_xl:
- * @params: parameter dictionary
- *
- * not for external reference
- */
-int
-server_utimens (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- call_stub_t *utimens_stub = NULL;
- gf_fop_utimens_req_t *req = NULL;
- server_state_t *state = NULL;
- int32_t ret = -1;
- size_t pathlen = 0;
-
- req = gf_param (hdr);
- state = CALL_STATE(frame);
- {
- state->ino = ntoh64 (req->ino);
- state->path = req->path;
- pathlen = STRLEN_0(state->path);
-
- gf_timespec_to_timespec (req->tv, state->tv);
- }
-
-
- ret = server_loc_fill (&(state->loc), state, state->ino, 0, NULL,
- state->path);
-
- utimens_stub = fop_utimens_stub (frame, server_utimens_resume,
- &(state->loc), state->tv);
-
- if (((state->loc.parent == NULL) && IS_NOT_ROOT(pathlen)) ||
- (state->loc.inode == NULL)) {
- do_path_lookup (utimens_stub, &(state->loc));
- } else {
- call_resume (utimens_stub);
- }
-
- return 0;
-}
-
-
-int
-server_inodelk_resume (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd,
- struct flock *flock)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE(frame);
- if (state->loc.inode == NULL) {
- state->loc.inode = inode_ref (loc->inode);
- }
-
- if (state->loc.parent == NULL) {
- state->loc.parent = inode_ref (loc->parent);
- }
-
- gf_log (BOUND_XL(frame)->name, GF_LOG_TRACE,
- "%"PRId64": INODELK \'%s (%"PRId64")\'",
- frame->root->unique, state->path, state->ino);
-
- STACK_WIND (frame, server_inodelk_cbk,
- BOUND_XL(frame),
- BOUND_XL(frame)->fops->inodelk,
- volume, loc, cmd, flock);
- return 0;
-
-}
-
-
-int
-server_inodelk (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- call_stub_t *inodelk_stub = NULL;
- gf_fop_inodelk_req_t *req = NULL;
- server_state_t *state = NULL;
- size_t pathlen = 0;
- size_t vollen = 0;
-
- req = gf_param (hdr);
- state = CALL_STATE(frame);
- {
- state->cmd = ntoh32 (req->cmd);
- switch (state->cmd) {
- case GF_LK_GETLK:
- state->cmd = F_GETLK;
- break;
- case GF_LK_SETLK:
- state->cmd = F_SETLK;
- break;
- case GF_LK_SETLKW:
- state->cmd = F_SETLKW;
- break;
- }
-
- state->type = ntoh32 (req->type);
-
- pathlen = STRLEN_0(req->path);
- vollen = STRLEN_0(req->volume + vollen);
-
- state->path = req->path;
- state->volume = strdup (req->volume + vollen);
- state->ino = ntoh64 (req->ino);
-
- gf_flock_to_flock (&req->flock, &state->flock);
-
- switch (state->type) {
- case GF_LK_F_RDLCK:
- state->flock.l_type = F_RDLCK;
- break;
- case GF_LK_F_WRLCK:
- state->flock.l_type = F_WRLCK;
- break;
- case GF_LK_F_UNLCK:
- state->flock.l_type = F_UNLCK;
- break;
- }
-
- }
-
- server_loc_fill (&(state->loc), state, state->ino, 0, NULL,
- state->path);
-
- inodelk_stub = fop_inodelk_stub (frame, server_inodelk_resume,
- state->volume, &state->loc,
- state->cmd, &state->flock);
-
- if (state->loc.inode == NULL) {
- do_path_lookup (inodelk_stub, &(state->loc));
- } else {
- call_resume (inodelk_stub);
- }
-
- return 0;
-}
-
-
-int
-server_finodelk (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_finodelk_req_t *req = NULL;
- server_state_t *state = NULL;
- server_connection_t *conn = NULL;
-
- conn = SERVER_CONNECTION(frame);
-
- req = gf_param (hdr);
- state = CALL_STATE(frame);
- {
- state->volume = strdup (req->volume);
-
- state->fd_no = ntoh64 (req->fd);
- if (state->fd_no >= 0)
- state->fd = gf_fd_fdptr_get (conn->fdtable,
- state->fd_no);
-
- state->cmd = ntoh32 (req->cmd);
- switch (state->cmd) {
- case GF_LK_GETLK:
- state->cmd = F_GETLK;
- break;
- case GF_LK_SETLK:
- state->cmd = F_SETLK;
- break;
- case GF_LK_SETLKW:
- state->cmd = F_SETLKW;
- break;
- }
-
- state->type = ntoh32 (req->type);
-
- gf_flock_to_flock (&req->flock, &state->flock);
-
- switch (state->type) {
- case GF_LK_F_RDLCK:
- state->flock.l_type = F_RDLCK;
- break;
- case GF_LK_F_WRLCK:
- state->flock.l_type = F_WRLCK;
- break;
- case GF_LK_F_UNLCK:
- state->flock.l_type = F_UNLCK;
- break;
- }
-
- }
-
- if (state->fd == NULL) {
- gf_log (frame->this->name, GF_LOG_ERROR,
- "fd - %"PRId64": unresolved fd",
- state->fd_no);
-
- server_finodelk_cbk (frame, NULL, frame->this,
- -1, EBADF);
- return -1;
- }
-
- gf_log (BOUND_XL(frame)->name, GF_LOG_TRACE,
- "%"PRId64": FINODELK \'fd=%"PRId64" (%"PRId64")\'",
- frame->root->unique, state->fd_no, state->fd->inode->ino);
-
- STACK_WIND (frame, server_finodelk_cbk,
- BOUND_XL(frame),
- BOUND_XL(frame)->fops->finodelk,
- state->volume, state->fd, state->cmd, &state->flock);
- return 0;
-}
-
-
-int
-server_entrylk_resume (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, const char *name,
- entrylk_cmd cmd, entrylk_type type)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE(frame);
-
- if (state->loc.inode == NULL)
- state->loc.inode = inode_ref (loc->inode);
-
- if ((state->loc.parent == NULL) &&
- (loc->parent))
- state->loc.parent = inode_ref (loc->parent);
-
- gf_log (BOUND_XL(frame)->name, GF_LOG_TRACE,
- "%"PRId64": ENTRYLK \'%s (%"PRId64") \'",
- frame->root->unique, state->path, state->ino);
-
- STACK_WIND (frame, server_entrylk_cbk,
- BOUND_XL(frame),
- BOUND_XL(frame)->fops->entrylk,
- volume, loc, name, cmd, type);
- return 0;
-
-}
-
-/*
- * server_entrylk - entrylk function for server protocol
- * @frame: call frame
- * @bound_xl:
- * @params: parameter dictionary
- *
- * not for external reference
- */
-int
-server_entrylk (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_entrylk_req_t *req = NULL;
- server_state_t *state = NULL;
- call_stub_t *entrylk_stub = NULL;
- size_t pathlen = 0;
- size_t namelen = 0;
- size_t vollen = 0;
-
- req = gf_param (hdr);
- state = CALL_STATE (frame);
- {
- pathlen = STRLEN_0(req->path);
-
- state->path = req->path;
- state->ino = ntoh64 (req->ino);
- namelen = ntoh64 (req->namelen);
- if (namelen)
- state->name = req->name + pathlen;
-
- vollen = STRLEN_0(req->volume + pathlen + namelen);
- state->volume = strdup (req->volume + pathlen + namelen);
-
- state->cmd = ntoh32 (req->cmd);
- state->type = ntoh32 (req->type);
- }
-
-
- server_loc_fill (&(state->loc), state, state->ino, 0, NULL,
- state->path);
-
- entrylk_stub = fop_entrylk_stub (frame, server_entrylk_resume,
- state->volume, &state->loc,
- state->name, state->cmd, state->type);
-
- if (state->loc.inode == NULL) {
- do_path_lookup (entrylk_stub, &(state->loc));
- } else {
- call_resume (entrylk_stub);
- }
-
- return 0;
-}
-
-
-int
-server_fentrylk (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_fentrylk_req_t *req = NULL;
- server_state_t *state = NULL;
- size_t namelen = 0;
- size_t vollen = 0;
- server_connection_t *conn = NULL;
-
- conn = SERVER_CONNECTION (frame);
-
- req = gf_param (hdr);
- state = CALL_STATE(frame);
- {
- state->fd_no = ntoh64 (req->fd);
- if (state->fd_no >= 0)
- state->fd = gf_fd_fdptr_get (conn->fdtable,
- state->fd_no);
-
- state->cmd = ntoh32 (req->cmd);
- state->type = ntoh32 (req->type);
- namelen = ntoh64 (req->namelen);
-
- if (namelen)
- state->name = req->name;
-
- vollen = STRLEN_0(req->volume + namelen);
- state->volume = strdup (req->volume + namelen);
- }
-
- if (state->fd == NULL) {
- gf_log (frame->this->name, GF_LOG_ERROR,
- "fd - %"PRId64": unresolved fd",
- state->fd_no);
-
- server_fentrylk_cbk (frame, NULL, frame->this, -1, EBADF);
- return -1;
- }
-
- gf_log (BOUND_XL(frame)->name, GF_LOG_TRACE,
- "%"PRId64": FENTRYLK \'fd=%"PRId64" (%"PRId64")\'",
- frame->root->unique, state->fd_no, state->fd->inode->ino);
-
- STACK_WIND (frame, server_fentrylk_cbk,
- BOUND_XL(frame),
- BOUND_XL(frame)->fops->fentrylk,
- state->volume, state->fd, state->name,
- state->cmd, state->type);
- return 0;
-}
-
-
-int
-server_access_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t mask)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- gf_log (BOUND_XL(frame)->name, GF_LOG_TRACE,
- "%"PRId64": ACCESS \'%s (%"PRId64")\'",
- frame->root->unique, state->path, state->ino);
-
- STACK_WIND (frame, server_access_cbk,
- BOUND_XL(frame),
- BOUND_XL(frame)->fops->access,
- loc, mask);
- return 0;
-}
-
-/*
- * server_access - access function for server protocol
- * @frame: call frame
- * @bound_xl:
- * @params: parameter dictionary
- *
- * not for external reference
- */
-int
-server_access (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- call_stub_t *access_stub = NULL;
- gf_fop_access_req_t *req = NULL;
- server_state_t *state = NULL;
- int32_t ret = -1;
- size_t pathlen = 0;
-
- req = gf_param (hdr);
- state = CALL_STATE(frame);
-
- state->mask = ntoh32 (req->mask);
-
- state->ino = ntoh64 (req->ino);
- state->path = req->path;
- pathlen = STRLEN_0(state->path);
-
- ret = server_loc_fill (&(state->loc), state, state->ino, 0, NULL,
- state->path);
-
- access_stub = fop_access_stub (frame, server_access_resume,
- &(state->loc), state->mask);
-
- if (((state->loc.parent == NULL) && IS_NOT_ROOT(pathlen)) ||
- (state->loc.inode == NULL)) {
- do_path_lookup (access_stub, &(state->loc));
- } else {
- call_resume (access_stub);
- }
-
- return 0;
-}
-
-
-int
-server_symlink_resume (call_frame_t *frame, xlator_t *this,
- const char *linkname, loc_t *loc)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
- if (state->loc.parent == NULL)
- state->loc.parent = inode_ref (loc->parent);
-
- state->loc.inode = inode_new (BOUND_XL(frame)->itable);
-
- gf_log (BOUND_XL(frame)->name, GF_LOG_TRACE,
- "%"PRId64": SYMLINK \'%"PRId64"/%s \'",
- frame->root->unique, state->par, state->bname);
-
- STACK_WIND (frame, server_symlink_cbk,
- BOUND_XL(frame),
- BOUND_XL(frame)->fops->symlink,
- linkname, &(state->loc));
-
- return 0;
-}
-
-/*
- * server_symlink- symlink function for server protocol
- * @frame: call frame
- * @bound_xl:
- * @params: parameter dictionary
- *
- * not for external reference
- */
-
-int
-server_symlink (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- server_state_t *state = NULL;
- gf_fop_symlink_req_t *req = NULL;
- call_stub_t *symlink_stub = NULL;
- int32_t ret = -1;
- size_t pathlen = 0;
- size_t baselen = 0;
-
- req = gf_param (hdr);
- state = CALL_STATE(frame);
- {
- pathlen = STRLEN_0(req->path);
- baselen = STRLEN_0(req->bname + pathlen);
-
- state->par = ntoh64 (req->par);
- state->path = req->path;
- state->bname = req->bname + pathlen;
-
- state->name = (req->linkname + pathlen + baselen);
- }
-
- ret = server_loc_fill (&(state->loc), state, 0, state->par,
- state->bname, state->path);
-
- symlink_stub = fop_symlink_stub (frame, server_symlink_resume,
- state->name, &(state->loc));
-
- if (state->loc.parent == NULL) {
- do_path_lookup (symlink_stub, &(state->loc));
- } else {
- call_resume (symlink_stub);
- }
-
- return 0;
-}
-
-
-int
-server_link_resume (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
- loc_t *newloc)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->loc.parent == NULL)
- state->loc.parent = inode_ref (oldloc->parent);
-
- if (state->loc.inode == NULL) {
- state->loc.inode = inode_ref (oldloc->inode);
- } else if (state->loc.inode != oldloc->inode) {
- if (state->loc.inode)
- inode_unref (state->loc.inode);
- state->loc.inode = inode_ref (oldloc->inode);
- }
-
- if (state->loc2.parent == NULL)
- state->loc2.parent = inode_ref (newloc->parent);
-
- state->loc2.inode = inode_ref (state->loc.inode);
-
- gf_log (BOUND_XL(frame)->name, GF_LOG_TRACE,
- "%"PRId64": LINK \'%"PRId64"/%s ==> %s (%"PRId64")\'",
- frame->root->unique, state->par2, state->bname2,
- state->path, state->ino);
-
- STACK_WIND (frame, server_link_cbk,
- BOUND_XL(frame),
- BOUND_XL(frame)->fops->link,
- &(state->loc), &(state->loc2));
- return 0;
-}
-
-/*
- * server_link - link function for server protocol
- * @frame: call frame
- * @bound_xl:
- * @params:
- *
- * not for external reference
- */
-int
-server_link (call_frame_t *frame, xlator_t *this,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_link_req_t *req = NULL;
- server_state_t *state = NULL;
- call_stub_t *link_stub = NULL;
- int32_t ret = -1;
- size_t oldpathlen = 0;
- size_t newpathlen = 0;
- size_t newbaselen = 0;
-
- req = gf_param (hdr);
-
- state = CALL_STATE(frame);
- {
- oldpathlen = STRLEN_0(req->oldpath);
- newpathlen = STRLEN_0(req->newpath + oldpathlen);
- newbaselen = STRLEN_0(req->newbname + oldpathlen + newpathlen);
-
- state->path = req->oldpath;
- state->path2 = req->newpath + oldpathlen;
- state->bname2 = req->newbname + oldpathlen + newpathlen;
- state->ino = ntoh64 (req->oldino);
- state->par2 = ntoh64 (req->newpar);
- }
-
- ret = server_loc_fill (&(state->loc), state, state->ino, 0, NULL,
- state->path);
- ret = server_loc_fill (&(state->loc2), state, 0, state->par2,
- state->bname2, state->path2);
-
- link_stub = fop_link_stub (frame, server_link_resume,
- &(state->loc), &(state->loc2));
-
- if ((state->loc.parent == NULL) ||
- (state->loc.inode == NULL)) {
- do_path_lookup (link_stub, &(state->loc));
- } else if (state->loc2.parent == NULL) {
- do_path_lookup (link_stub, &(state->loc2));
- } else {
- call_resume (link_stub);
- }
-
- return 0;
-}
-
-
-int
-server_rename_resume (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
- loc_t *newloc)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE (frame);
-
- if (state->loc.parent == NULL)
- state->loc.parent = inode_ref (oldloc->parent);
-
- if (state->loc.inode == NULL) {
- state->loc.inode = inode_ref (oldloc->inode);
- }
-
- if (state->loc2.parent == NULL)
- state->loc2.parent = inode_ref (newloc->parent);
-
-
- gf_log (BOUND_XL(frame)->name, GF_LOG_TRACE,
- "%"PRId64": RENAME %s (%"PRId64"/%s) ==> %s (%"PRId64"/%s)",
- frame->root->unique, state->path, state->par, state->bname,
- state->path2, state->par2, state->bname2);
-
- STACK_WIND (frame, server_rename_cbk,
- BOUND_XL(frame),
- BOUND_XL(frame)->fops->rename,
- &(state->loc), &(state->loc2));
- return 0;
-}
-
-/*
- * server_rename - rename function for server protocol
- * @frame: call frame
- * @bound_xl:
- * @params: parameter dictionary
- *
- * not for external reference
- */
-int
-server_rename (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_fop_rename_req_t *req = NULL;
- server_state_t *state = NULL;
- call_stub_t *rename_stub = NULL;
- int32_t ret = -1;
- size_t oldpathlen = 0;
- size_t oldbaselen = 0;
- size_t newpathlen = 0;
- size_t newbaselen = 0;
-
- req = gf_param (hdr);
-
- state = CALL_STATE (frame);
- {
- oldpathlen = STRLEN_0(req->oldpath);
- oldbaselen = STRLEN_0(req->oldbname + oldpathlen);
- newpathlen = STRLEN_0(req->newpath + oldpathlen + oldbaselen);
- newbaselen = STRLEN_0(req->newbname + oldpathlen +
- oldbaselen + newpathlen);
-
- state->path = req->oldpath;
- state->bname = req->oldbname + oldpathlen;
- state->path2 = req->newpath + oldpathlen + oldbaselen;
- state->bname2 = (req->newbname + oldpathlen + oldbaselen +
- newpathlen);
-
- state->par = ntoh64 (req->oldpar);
- state->par2 = ntoh64 (req->newpar);
- }
-
- ret = server_loc_fill (&(state->loc), state, 0, state->par,
- state->bname, state->path);
- ret = server_loc_fill (&(state->loc2), state, 0, state->par2,
- state->bname2, state->path2);
-
- rename_stub = fop_rename_stub (frame, server_rename_resume,
- &(state->loc), &(state->loc2));
-
- if ((state->loc.parent == NULL) ||
- (state->loc.inode == NULL)){
- do_path_lookup (rename_stub, &(state->loc));
- } else if ((state->loc2.parent == NULL)){
- do_path_lookup (rename_stub, &(state->loc2));
- } else {
- /* we have found inode for both oldpath and newpath in
- * inode cache. lets continue with fops->rename() */
- call_resume (rename_stub);
- }
-
- return 0;
-}
-
-
-/*
- * server_lk - lk function for server protocol
- * @frame: call frame
- * @bound_xl:
- * @params: parameter dictionary
- *
- * not for external reference
- */
-
-int
-server_lk (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- struct flock lock = {0, };
- gf_fop_lk_req_t *req = NULL;
- server_state_t *state = NULL;
- server_connection_t *conn = NULL;
-
- conn = SERVER_CONNECTION (frame);
-
- req = gf_param (hdr);
- state = CALL_STATE (frame);
- {
- state->fd_no = ntoh64 (req->fd);
- if (state->fd_no >= 0)
- state->fd = gf_fd_fdptr_get (conn->fdtable,
- state->fd_no);
-
- state->cmd = ntoh32 (req->cmd);
- state->type = ntoh32 (req->type);
- }
-
-
- if (state->fd == NULL) {
- gf_log (frame->this->name, GF_LOG_ERROR,
- "fd - %"PRId64": unresolved fd",
- state->fd_no);
-
- server_lk_cbk (frame, NULL, frame->this, -1, EBADF, NULL);
- goto out;
- }
-
- switch (state->cmd) {
- case GF_LK_GETLK:
- state->cmd = F_GETLK;
- break;
- case GF_LK_SETLK:
- state->cmd = F_SETLK;
- break;
- case GF_LK_SETLKW:
- state->cmd = F_SETLKW;
- break;
- }
-
- switch (state->type) {
- case GF_LK_F_RDLCK:
- lock.l_type = F_RDLCK;
- break;
- case GF_LK_F_WRLCK:
- lock.l_type = F_WRLCK;
- break;
- case GF_LK_F_UNLCK:
- lock.l_type = F_UNLCK;
- break;
- default:
- gf_log (bound_xl->name, GF_LOG_ERROR,
- "fd - %"PRId64" (%"PRId64"): Unknown lock type: %"PRId32"!",
- state->fd_no, state->fd->inode->ino, state->type);
- break;
- }
-
- gf_flock_to_flock (&req->flock, &lock);
-
- gf_log (BOUND_XL(frame)->name, GF_LOG_TRACE,
- "%"PRId64": LK \'fd=%"PRId64" (%"PRId64")\'",
- frame->root->unique, state->fd_no, state->fd->inode->ino);
-
- STACK_WIND (frame, server_lk_cbk,
- BOUND_XL(frame),
- BOUND_XL(frame)->fops->lk,
- state->fd, state->cmd, &lock);
-out:
- return 0;
-}
-
-
-/*
- * server_writedir -
- *
- * @frame:
- * @bound_xl:
- * @params:
- *
- */
-int
-server_setdents (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- server_connection_t *conn = NULL;
- gf_fop_setdents_req_t *req = NULL;
- server_state_t *state = NULL;
- dir_entry_t *entry = NULL;
- dir_entry_t *trav = NULL;
- dir_entry_t *prev = NULL;
- int32_t count = 0;
- int32_t i = 0;
- int32_t bread = 0;
- char *ender = NULL;
- char *buffer_ptr = NULL;
- char tmp_buf[512] = {0,};
-
- conn = SERVER_CONNECTION(frame);
-
- req = gf_param (hdr);
- state = CALL_STATE(frame);
-
- state->fd_no = ntoh64 (req->fd);
- if (state->fd_no >= 0)
- state->fd = gf_fd_fdptr_get (conn->fdtable,
- state->fd_no);
-
- state->nr_count = ntoh32 (req->count);
-
- if (state->fd == NULL) {
- gf_log (frame->this->name, GF_LOG_ERROR,
- "fd - %"PRId64": unresolved fd",
- state->fd_no);
-
- server_setdents_cbk (frame, NULL, frame->this, -1, EBADF);
-
- goto out;
- }
-
- if (iobuf == NULL) {
- gf_log (frame->this->name, GF_LOG_ERROR,
- "fd - %"PRId64" (%"PRId64"): received a null buffer, "
- "returning EINVAL",
- state->fd_no, state->fd->inode->ino);
-
- server_setdents_cbk (frame, NULL, frame->this, -1, ENOMEM);
-
- goto out;
- }
-
- entry = CALLOC (1, sizeof (dir_entry_t));
- ERR_ABORT (entry);
- prev = entry;
- buffer_ptr = iobuf->ptr;
-
- for (i = 0; i < state->nr_count ; i++) {
- bread = 0;
- trav = CALLOC (1, sizeof (dir_entry_t));
- ERR_ABORT (trav);
-
- ender = strchr (buffer_ptr, '/');
- if (!ender)
- break;
- count = ender - buffer_ptr;
- trav->name = CALLOC (1, count + 2);
- ERR_ABORT (trav->name);
-
- strncpy (trav->name, buffer_ptr, count);
- bread = count + 1;
- buffer_ptr += bread;
-
- ender = strchr (buffer_ptr, '\n');
- if (!ender)
- break;
- count = ender - buffer_ptr;
- strncpy (tmp_buf, buffer_ptr, count);
- bread = count + 1;
- buffer_ptr += bread;
-
- /* TODO: use str_to_stat instead */
- {
- uint64_t dev;
- uint64_t ino;
- uint32_t mode;
- uint32_t nlink;
- uint32_t uid;
- uint32_t gid;
- uint64_t rdev;
- uint64_t size;
- uint32_t blksize;
- uint64_t blocks;
- uint32_t atime;
- uint32_t atime_nsec;
- uint32_t mtime;
- uint32_t mtime_nsec;
- uint32_t ctime;
- uint32_t ctime_nsec;
-
- sscanf (tmp_buf, GF_STAT_PRINT_FMT_STR,
- &dev, &ino, &mode, &nlink, &uid, &gid, &rdev,
- &size, &blksize, &blocks, &atime, &atime_nsec,
- &mtime, &mtime_nsec, &ctime, &ctime_nsec);
-
- trav->buf.st_dev = dev;
- trav->buf.st_ino = ino;
- trav->buf.st_mode = mode;
- trav->buf.st_nlink = nlink;
- trav->buf.st_uid = uid;
- trav->buf.st_gid = gid;
- trav->buf.st_rdev = rdev;
- trav->buf.st_size = size;
- trav->buf.st_blksize = blksize;
- trav->buf.st_blocks = blocks;
-
- trav->buf.st_atime = atime;
- trav->buf.st_mtime = mtime;
- trav->buf.st_ctime = ctime;
-
- ST_ATIM_NSEC_SET(&trav->buf, atime_nsec);
- ST_MTIM_NSEC_SET(&trav->buf, mtime_nsec);
- ST_CTIM_NSEC_SET(&trav->buf, ctime_nsec);
-
- }
-
- ender = strchr (buffer_ptr, '\n');
- if (!ender)
- break;
- count = ender - buffer_ptr;
- *ender = '\0';
- if (S_ISLNK (trav->buf.st_mode)) {
- trav->link = strdup (buffer_ptr);
- } else
- trav->link = "";
- bread = count + 1;
- buffer_ptr += bread;
-
- prev->next = trav;
- prev = trav;
- }
-
- gf_log (bound_xl->name, GF_LOG_TRACE,
- "%"PRId64": SETDENTS \'fd=%"PRId64" (%"PRId64"); count=%"PRId64,
- frame->root->unique, state->fd_no, state->fd->inode->ino,
- (int64_t)state->nr_count);
-
- STACK_WIND (frame, server_setdents_cbk,
- BOUND_XL(frame),
- BOUND_XL(frame)->fops->setdents,
- state->fd, state->flags, entry, state->nr_count);
-
-
- /* Free the variables allocated in this fop here */
- trav = entry->next;
- prev = entry;
- while (trav) {
- prev->next = trav->next;
- FREE (trav->name);
- if (S_ISLNK (trav->buf.st_mode))
- FREE (trav->link);
- FREE (trav);
- trav = prev->next;
- }
- FREE (entry);
-
-out:
- if (iobuf)
- iobuf_unref (iobuf);
- return 0;
-}
-
-
-
-/* xxx_MOPS */
-int
-_volfile_update_checksum (xlator_t *this, char *key, uint32_t checksum)
-{
- server_conf_t *conf = NULL;
- struct _volfile_ctx *temp_volfile = NULL;
-
- conf = this->private;
- temp_volfile = conf->volfile;
-
- while (temp_volfile) {
- if ((NULL == key) && (NULL == temp_volfile->key))
- break;
- if ((NULL == key) || (NULL == temp_volfile->key)) {
- temp_volfile = temp_volfile->next;
- continue;
- }
- if (strcmp (temp_volfile->key, key) == 0)
- break;
- temp_volfile = temp_volfile->next;
- }
-
- if (!temp_volfile) {
- temp_volfile = CALLOC (1, sizeof (struct _volfile_ctx));
-
- temp_volfile->next = conf->volfile;
- temp_volfile->key = (key)? strdup (key): NULL;
- temp_volfile->checksum = checksum;
-
- conf->volfile = temp_volfile;
- goto out;
- }
-
- if (temp_volfile->checksum != checksum) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "the volume file got modified between earlier access "
- "and now, this may lead to inconsistency between "
- "clients, advised to remount client");
- temp_volfile->checksum = checksum;
- }
-
- out:
- return 0;
-}
-
-
-size_t
-build_volfile_path (xlator_t *this, const char *key, char *path,
- size_t path_len)
-{
- int ret = -1;
- int free_filename = 0;
- char *filename = NULL;
- char data_key[256] = {0,};
-
- /* Inform users that this option is changed now */
- ret = dict_get_str (this->options, "client-volume-filename",
- &filename);
- if (ret == 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "option 'client-volume-filename' is changed to "
- "'volume-filename.<key>' which now takes 'key' as an "
- "option to choose/fetch different files from server. "
- "Refer documentation or contact developers for more "
- "info. Currently defaulting to given file '%s'",
- filename);
- }
-
- if (key && !filename) {
- sprintf (data_key, "volume-filename.%s", key);
- ret = dict_get_str (this->options, data_key, &filename);
- if (ret < 0) {
- /* Make sure that key doesn't contain
- * "../" in path
- */
- if (!strstr (key, "../")) {
- ret = asprintf (&filename, "%s/%s.vol",
- CONFDIR, key);
- if (-1 == ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "asprintf failed to get "
- "volume file path");
- } else {
- free_filename = 1;
- }
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s: invalid key", key);
- }
- }
- }
-
- if (!filename) {
- ret = dict_get_str (this->options,
- "volume-filename.default", &filename);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no default volume filename given, "
- "defaulting to %s", DEFAULT_VOLUME_FILE_PATH);
-
- filename = DEFAULT_VOLUME_FILE_PATH;
- }
- }
-
- ret = -1;
- if ((filename) && (path_len > strlen (filename))) {
- strcpy (path, filename);
- ret = strlen (filename);
- }
-
- if (free_filename)
- free (filename);
-
- return ret;
-}
-
-int
-_validate_volfile_checksum (xlator_t *this, char *key,
- uint32_t checksum)
-{
- char filename[ZR_PATH_MAX] = {0,};
- server_conf_t *conf = NULL;
- struct _volfile_ctx *temp_volfile = NULL;
- int ret = 0;
- uint32_t local_checksum = 0;
-
- conf = this->private;
- temp_volfile = conf->volfile;
-
- if (!checksum)
- goto out;
-
- if (!temp_volfile) {
- ret = build_volfile_path (this, key, filename,
- sizeof (filename));
- if (ret <= 0)
- goto out;
- ret = open (filename, O_RDONLY);
- if (-1 == ret) {
- ret = 0;
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to open volume file (%s) : %s",
- filename, strerror (errno));
- goto out;
- }
- get_checksum_for_file (ret, &local_checksum);
- _volfile_update_checksum (this, key, local_checksum);
- close (ret);
- }
-
- temp_volfile = conf->volfile;
- while (temp_volfile) {
- if ((NULL == key) && (NULL == temp_volfile->key))
- break;
- if ((NULL == key) || (NULL == temp_volfile->key)) {
- temp_volfile = temp_volfile->next;
- continue;
- }
- if (strcmp (temp_volfile->key, key) == 0)
- break;
- temp_volfile = temp_volfile->next;
- }
-
- if (!temp_volfile)
- goto out;
-
- if ((temp_volfile->checksum) &&
- (checksum != temp_volfile->checksum))
- ret = -1;
-
-out:
- return ret;
-}
-
-/* Management Calls */
-/*
- * mop_getspec - getspec function for server protocol
- * @frame: call frame
- * @bound_xl:
- * @params:
- *
- */
-int
-mop_getspec (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_hdr_common_t *_hdr = NULL;
- gf_mop_getspec_rsp_t *rsp = NULL;
- int32_t ret = -1;
- int32_t op_errno = ENOENT;
- int32_t gf_errno = 0;
- int32_t spec_fd = -1;
- size_t file_len = 0;
- size_t _hdrlen = 0;
- char filename[ZR_PATH_MAX] = {0,};
- struct stat stbuf = {0,};
- gf_mop_getspec_req_t *req = NULL;
- uint32_t checksum = 0;
- uint32_t flags = 0;
- uint32_t keylen = 0;
- char *key = NULL;
- server_conf_t *conf = NULL;
-
- req = gf_param (hdr);
- flags = ntoh32 (req->flags);
- keylen = ntoh32 (req->keylen);
- if (keylen) {
- key = req->key;
- }
-
- conf = frame->this->private;
-
- ret = build_volfile_path (frame->this, key, filename,
- sizeof (filename));
- if (ret > 0) {
- /* to allocate the proper buffer to hold the file data */
- ret = stat (filename, &stbuf);
- if (ret < 0){
- gf_log (frame->this->name, GF_LOG_ERROR,
- "Unable to stat %s (%s)",
- filename, strerror (errno));
- goto fail;
- }
-
- spec_fd = open (filename, O_RDONLY);
- if (spec_fd < 0) {
- gf_log (frame->this->name, GF_LOG_ERROR,
- "Unable to open %s (%s)",
- filename, strerror (errno));
- goto fail;
- }
- ret = 0;
- file_len = stbuf.st_size;
- if (conf->verify_volfile_checksum) {
- get_checksum_for_file (spec_fd, &checksum);
- _volfile_update_checksum (frame->this, key, checksum);
- }
- } else {
- errno = ENOENT;
- }
-
-fail:
- op_errno = errno;
-
- _hdrlen = gf_hdr_len (rsp, file_len + 1);
- _hdr = gf_hdr_new (rsp, file_len + 1);
- rsp = gf_param (_hdr);
-
- _hdr->rsp.op_ret = hton32 (ret);
- gf_errno = gf_errno_to_error (op_errno);
- _hdr->rsp.op_errno = hton32 (gf_errno);
-
- if (file_len) {
- ret = read (spec_fd, rsp->spec, file_len);
- close (spec_fd);
- }
- protocol_server_reply (frame, GF_OP_TYPE_MOP_REPLY, GF_MOP_GETSPEC,
- _hdr, _hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-
-int
-server_checksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- uint8_t *fchecksum, uint8_t *dchecksum)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_checksum_rsp_t *rsp = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
-
- hdrlen = gf_hdr_len (rsp, NAME_MAX + 1 + NAME_MAX + 1);
- hdr = gf_hdr_new (rsp, NAME_MAX + 1 + NAME_MAX + 1);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- if (op_ret >= 0) {
- memcpy (rsp->fchecksum, fchecksum, NAME_MAX);
- rsp->fchecksum[NAME_MAX] = '\0';
- memcpy (rsp->dchecksum + NAME_MAX,
- dchecksum, NAME_MAX);
- rsp->dchecksum[NAME_MAX + NAME_MAX] = '\0';
- }
-
- protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_FOP_CHECKSUM,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-
-int
-server_checksum (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- loc_t loc = {0,};
- int32_t flag = 0;
- gf_fop_checksum_req_t *req = NULL;
-
- req = gf_param (hdr);
-
- loc.path = req->path;
- loc.ino = ntoh64 (req->ino);
- loc.inode = NULL;
- flag = ntoh32 (req->flag);
-
- gf_log (bound_xl->name, GF_LOG_TRACE,
- "%"PRId64": CHECKSUM \'%s (%"PRId64")\'",
- frame->root->unique, loc.path, loc.ino);
-
- STACK_WIND (frame, server_checksum_cbk,
- BOUND_XL(frame),
- BOUND_XL(frame)->fops->checksum,
- &loc, flag);
- return 0;
-}
-
-
-/*
- * mop_unlock - unlock management function for server protocol
- * @frame: call frame
- * @bound_xl:
- * @params: parameter dictionary
- *
- */
-int
-mop_getvolume (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- return 0;
-}
-
-struct __get_xl_struct {
- const char *name;
- xlator_t *reply;
-};
-
-void __check_and_set (xlator_t *each, void *data)
-{
- if (!strcmp (each->name,
- ((struct __get_xl_struct *) data)->name))
- ((struct __get_xl_struct *) data)->reply = each;
-}
-
-static xlator_t *
-get_xlator_by_name (xlator_t *some_xl, const char *name)
-{
- struct __get_xl_struct get = {
- .name = name,
- .reply = NULL
- };
-
- xlator_foreach (some_xl, __check_and_set, &get);
-
- return get.reply;
-}
-
-
-/*
- * mop_setvolume - setvolume management function for server protocol
- * @frame: call frame
- * @bound_xl:
- * @params: parameter dictionary
- *
- */
-int
-mop_setvolume (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *req_hdr, size_t req_hdrlen,
- struct iobuf *iobuf)
-{
- server_connection_t *conn = NULL;
- server_conf_t *conf = NULL;
- gf_hdr_common_t *rsp_hdr = NULL;
- gf_mop_setvolume_req_t *req = NULL;
- gf_mop_setvolume_rsp_t *rsp = NULL;
- peer_info_t *peerinfo = NULL;
- int32_t ret = -1;
- int32_t op_ret = -1;
- int32_t op_errno = EINVAL;
- int32_t gf_errno = 0;
- dict_t *reply = NULL;
- dict_t *config_params = NULL;
- dict_t *params = NULL;
- char *name = NULL;
- char *version = NULL;
- char *process_uuid = NULL;
- xlator_t *xl = NULL;
- transport_t *trans = NULL;
- size_t rsp_hdrlen = -1;
- size_t dict_len = -1;
- size_t req_dictlen = -1;
- char *msg = NULL;
- char *volfile_key = NULL;
- uint32_t checksum = 0;
- int32_t lru_limit = 1024;
-
- params = dict_new ();
- reply = dict_new ();
-
- req = gf_param (req_hdr);
- req_dictlen = ntoh32 (req->dict_len);
- ret = dict_unserialize (req->buf, req_dictlen, &params);
-
- config_params = dict_copy_with_ref (frame->this->options, NULL);
- trans = TRANSPORT_FROM_FRAME(frame);
- conf = SERVER_CONF(frame);
-
- if (ret < 0) {
- ret = dict_set_str (reply, "ERROR",
- "Internal error: failed to unserialize "
- "request dictionary");
- if (ret < 0)
- gf_log (bound_xl->name, GF_LOG_DEBUG,
- "failed to set error msg \"%s\"",
- "Internal error: failed to unserialize "
- "request dictionary");
-
- op_ret = -1;
- op_errno = EINVAL;
- goto fail;
- }
-
- ret = dict_get_str (params, "process-uuid", &process_uuid);
- if (ret < 0) {
- ret = dict_set_str (reply, "ERROR",
- "UUID not specified");
- if (ret < 0)
- gf_log (bound_xl->name, GF_LOG_DEBUG,
- "failed to set error msg");
-
- op_ret = -1;
- op_errno = EINVAL;
- goto fail;
- }
-
-
- conn = server_connection_get (frame->this, process_uuid);
- if (trans->xl_private != conn)
- trans->xl_private = conn;
-
- ret = dict_get_str (params, "protocol-version", &version);
- if (ret < 0) {
- ret = dict_set_str (reply, "ERROR",
- "No version number specified");
- if (ret < 0)
- gf_log (trans->xl->name, GF_LOG_DEBUG,
- "failed to set error msg");
-
- op_ret = -1;
- op_errno = EINVAL;
- goto fail;
- }
-
- ret = strcmp (version, GF_PROTOCOL_VERSION);
- if (ret != 0) {
- ret = asprintf (&msg, "protocol version mismatch: client(%s) "
- "- server(%s)", version, GF_PROTOCOL_VERSION);
- if (-1 == ret) {
- gf_log (trans->xl->name, GF_LOG_ERROR,
- "asprintf failed while setting up error msg");
- goto fail;
- }
- ret = dict_set_dynstr (reply, "ERROR", msg);
- if (ret < 0)
- gf_log (trans->xl->name, GF_LOG_DEBUG,
- "failed to set error msg");
-
- op_ret = -1;
- op_errno = EINVAL;
- goto fail;
- }
-
- ret = dict_get_str (params,
- "remote-subvolume", &name);
- if (ret < 0) {
- ret = dict_set_str (reply, "ERROR",
- "No remote-subvolume option specified");
- if (ret < 0)
- gf_log (trans->xl->name, GF_LOG_DEBUG,
- "failed to set error msg");
-
- op_ret = -1;
- op_errno = EINVAL;
- goto fail;
- }
-
- xl = get_xlator_by_name (frame->this, name);
- if (xl == NULL) {
- ret = asprintf (&msg, "remote-subvolume \"%s\" is not found",
- name);
- if (-1 == ret) {
- gf_log (trans->xl->name, GF_LOG_ERROR,
- "asprintf failed while setting error msg");
- goto fail;
- }
- ret = dict_set_dynstr (reply, "ERROR", msg);
- if (ret < 0)
- gf_log (trans->xl->name, GF_LOG_DEBUG,
- "failed to set error msg");
-
- op_ret = -1;
- op_errno = ENOENT;
- goto fail;
- }
-
- if (conf->verify_volfile_checksum) {
- ret = dict_get_uint32 (params, "volfile-checksum", &checksum);
- if (ret == 0) {
- ret = dict_get_str (params, "volfile-key",
- &volfile_key);
-
- ret = _validate_volfile_checksum (trans->xl,
- volfile_key,
- checksum);
- if (-1 == ret) {
- ret = dict_set_str (reply, "ERROR",
- "volume-file checksum "
- "varies from earlier "
- "access");
- if (ret < 0)
- gf_log (trans->xl->name, GF_LOG_DEBUG,
- "failed to set error msg");
-
- op_ret = -1;
- op_errno = ESTALE;
- goto fail;
- }
- }
- }
-
-
- peerinfo = &trans->peerinfo;
- ret = dict_set_static_ptr (params, "peer-info", peerinfo);
- if (ret < 0)
- gf_log (trans->xl->name, GF_LOG_DEBUG,
- "failed to set peer-info");
-
- if (conf->auth_modules == NULL) {
- gf_log (trans->xl->name, GF_LOG_ERROR,
- "Authentication module not initialized");
- }
-
- ret = gf_authenticate (params, config_params,
- conf->auth_modules);
- if (ret == AUTH_ACCEPT) {
- gf_log (trans->xl->name, GF_LOG_INFO,
- "accepted client from %s",
- peerinfo->identifier);
- op_ret = 0;
- conn->bound_xl = xl;
- ret = dict_set_str (reply, "ERROR", "Success");
- if (ret < 0)
- gf_log (trans->xl->name, GF_LOG_DEBUG,
- "failed to set error msg");
- } else {
- gf_log (trans->xl->name, GF_LOG_ERROR,
- "Cannot authenticate client from %s",
- peerinfo->identifier);
- op_ret = -1;
- op_errno = EACCES;
- ret = dict_set_str (reply, "ERROR", "Authentication failed");
- if (ret < 0)
- gf_log (bound_xl->name, GF_LOG_DEBUG,
- "failed to set error msg");
-
- goto fail;
- }
-
- if (conn->bound_xl == NULL) {
- ret = dict_set_str (reply, "ERROR",
- "Check volfile and handshake "
- "options in protocol/client");
- if (ret < 0)
- gf_log (trans->xl->name, GF_LOG_DEBUG,
- "failed to set error msg");
-
- op_ret = -1;
- op_errno = EACCES;
- goto fail;
- }
-
- if ((conn->bound_xl != NULL) &&
- (ret >= 0) &&
- (conn->bound_xl->itable == NULL)) {
- /* create inode table for this bound_xl, if one doesn't
- already exist */
- lru_limit = INODE_LRU_LIMIT (frame->this);
-
- gf_log (trans->xl->name, GF_LOG_TRACE,
- "creating inode table with lru_limit=%"PRId32", "
- "xlator=%s", lru_limit, conn->bound_xl->name);
-
- conn->bound_xl->itable =
- inode_table_new (lru_limit,
- conn->bound_xl);
- }
-
- ret = dict_set_str (reply, "process-uuid",
- xl->ctx->process_uuid);
-
- ret = dict_set_uint64 (reply, "transport-ptr",
- ((uint64_t) (long) trans));
-
-fail:
- dict_len = dict_serialized_length (reply);
- if (dict_len < 0) {
- gf_log (xl->name, GF_LOG_DEBUG,
- "failed to get serialized length of reply dict");
- op_ret = -1;
- op_errno = EINVAL;
- dict_len = 0;
- }
-
- rsp_hdr = gf_hdr_new (rsp, dict_len);
- rsp_hdrlen = gf_hdr_len (rsp, dict_len);
- rsp = gf_param (rsp_hdr);
-
- if (dict_len) {
- ret = dict_serialize (reply, rsp->buf);
- if (ret < 0) {
- gf_log (xl->name, GF_LOG_DEBUG,
- "failed to serialize reply dict");
- op_ret = -1;
- op_errno = -ret;
- }
- }
- rsp->dict_len = hton32 (dict_len);
-
- rsp_hdr->rsp.op_ret = hton32 (op_ret);
- gf_errno = gf_errno_to_error (op_errno);
- rsp_hdr->rsp.op_errno = hton32 (gf_errno);
-
- protocol_server_reply (frame, GF_OP_TYPE_MOP_REPLY, GF_MOP_SETVOLUME,
- rsp_hdr, rsp_hdrlen, NULL, 0, NULL);
-
- dict_unref (params);
- dict_unref (reply);
- dict_unref (config_params);
-
- return 0;
-}
-
-/*
- * server_mop_stats_cbk - stats callback for server management operation
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret: return value
- * @op_errno: errno
- * @stats:err
- *
- * not for external reference
- */
-
-int
-server_mop_stats_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t ret, int32_t op_errno,
- struct xlator_stats *stats)
-{
- /* TODO: get this information from somewhere else, not extern */
- gf_hdr_common_t *hdr = NULL;
- gf_mop_stats_rsp_t *rsp = NULL;
- char buffer[256] = {0,};
- int64_t glusterfsd_stats_nr_clients = 0;
- size_t hdrlen = 0;
- size_t buf_len = 0;
- int32_t gf_errno = 0;
-
- if (ret >= 0) {
- sprintf (buffer,
- "%"PRIx64",%"PRIx64",%"PRIx64
- ",%"PRIx64",%"PRIx64",%"PRIx64
- ",%"PRIx64",%"PRIx64"\n",
- stats->nr_files, stats->disk_usage, stats->free_disk,
- stats->total_disk_size, stats->read_usage,
- stats->write_usage, stats->disk_speed,
- glusterfsd_stats_nr_clients);
-
- buf_len = strlen (buffer);
- }
-
- hdrlen = gf_hdr_len (rsp, buf_len + 1);
- hdr = gf_hdr_new (rsp, buf_len + 1);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (ret);
- gf_errno = gf_errno_to_error (op_errno);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- strcpy (rsp->buf, buffer);
-
- protocol_server_reply (frame, GF_OP_TYPE_MOP_REPLY, GF_MOP_STATS,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-
-/*
- * mop_unlock - unlock management function for server protocol
- * @frame: call frame
- * @bound_xl:
- * @params: parameter dictionary
- *
- */
-int
-mop_stats (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- int32_t flag = 0;
- gf_mop_stats_req_t *req = NULL;
-
- req = gf_param (hdr);
-
- flag = ntoh32 (req->flags);
-
- STACK_WIND (frame, server_mop_stats_cbk,
- bound_xl,
- bound_xl->mops->stats,
- flag);
-
- return 0;
-}
-
-
-int
-mop_ping (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_hdr_common_t *rsp_hdr = NULL;
- gf_mop_ping_rsp_t *rsp = NULL;
- size_t rsp_hdrlen = 0;
-
- rsp_hdrlen = gf_hdr_len (rsp, 0);
- rsp_hdr = gf_hdr_new (rsp, 0);
-
- hdr->rsp.op_ret = 0;
-
- protocol_server_reply (frame, GF_OP_TYPE_MOP_REPLY, GF_MOP_PING,
- rsp_hdr, rsp_hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-
-int
-mop_log (call_frame_t *frame, xlator_t *bound_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf)
-{
- gf_mop_log_req_t * req = NULL;
- char * msg = NULL;
- uint32_t msglen = 0;
-
- transport_t * trans = NULL;
-
- trans = TRANSPORT_FROM_FRAME (frame);
-
- req = gf_param (hdr);
- msglen = ntoh32 (req->msglen);
-
- if (msglen)
- msg = req->msg;
-
- gf_log_from_client (msg, trans->peerinfo.identifier);
-
- return 0;
-}
-
-
-/*
- * unknown_op_cbk - This function is called when a opcode for unknown
- * type is called. Helps to keep the backward/forward
- * compatiblity
- * @frame: call frame
- * @type:
- * @opcode:
- *
- */
-
-int
-unknown_op_cbk (call_frame_t *frame, int32_t type, int32_t opcode)
-{
- gf_hdr_common_t *hdr = NULL;
- gf_fop_flush_rsp_t *rsp = NULL;
- size_t hdrlen = 0;
- int32_t gf_errno = 0;
-
- hdrlen = gf_hdr_len (rsp, 0);
- hdr = gf_hdr_new (rsp, 0);
- rsp = gf_param (hdr);
-
- hdr->rsp.op_ret = hton32 (-1);
- gf_errno = gf_errno_to_error (ENOSYS);
- hdr->rsp.op_errno = hton32 (gf_errno);
-
- protocol_server_reply (frame, type, opcode,
- hdr, hdrlen, NULL, 0, NULL);
-
- return 0;
-}
-
-/*
- * get_frame_for_transport - get call frame for specified transport object
- *
- * @trans: transport object
- *
- */
-static call_frame_t *
-get_frame_for_transport (transport_t *trans)
-{
- call_frame_t *frame = NULL;
- call_pool_t *pool = NULL;
- server_connection_t *conn = NULL;
- server_state_t *state = NULL;;
-
- GF_VALIDATE_OR_GOTO("server", trans, out);
-
- if (trans->xl && trans->xl->ctx)
- pool = trans->xl->ctx->pool;
- GF_VALIDATE_OR_GOTO("server", pool, out);
-
- frame = create_frame (trans->xl, pool);
- GF_VALIDATE_OR_GOTO("server", frame, out);
-
- state = CALLOC (1, sizeof (*state));
- GF_VALIDATE_OR_GOTO("server", state, out);
-
- conn = trans->xl_private;
- if (conn) {
- if (conn->bound_xl)
- state->itable = conn->bound_xl->itable;
- state->bound_xl = conn->bound_xl;
- }
-
- state->trans = transport_ref (trans);
-
- frame->root->trans = conn;
- frame->root->state = state; /* which socket */
- frame->root->unique = 0; /* which call */
-
-out:
- return frame;
-}
-
-/*
- * get_frame_for_call - create a frame into the capable of
- * generating and replying the reply packet by itself.
- * By making a call with this frame, the last UNWIND
- * function will have all needed state from its
- * frame_t->root to send reply.
- * @trans:
- * @blk:
- * @params:
- *
- * not for external reference
- */
-static call_frame_t *
-get_frame_for_call (transport_t *trans, gf_hdr_common_t *hdr)
-{
- call_frame_t *frame = NULL;
-
- frame = get_frame_for_transport (trans);
-
- frame->root->op = ntoh32 (hdr->op);
- frame->root->type = ntoh32 (hdr->type);
-
- frame->root->uid = ntoh32 (hdr->req.uid);
- frame->root->unique = ntoh64 (hdr->callid); /* which call */
- frame->root->gid = ntoh32 (hdr->req.gid);
- frame->root->pid = ntoh32 (hdr->req.pid);
-
- return frame;
-}
-
-/*
- * prototype of operations function for each of mop and
- * fop at server protocol level
- *
- * @frame: call frame pointer
- * @bound_xl: the xlator that this frame is bound to
- * @params: parameters dictionary
- *
- * to be used by protocol interpret, _not_ for exterenal reference
- */
-typedef int32_t (*gf_op_t) (call_frame_t *frame, xlator_t *bould_xl,
- gf_hdr_common_t *hdr, size_t hdrlen,
- struct iobuf *iobuf);
-
-
-static gf_op_t gf_fops[] = {
- [GF_FOP_STAT] = server_stat,
- [GF_FOP_READLINK] = server_readlink,
- [GF_FOP_MKNOD] = server_mknod,
- [GF_FOP_MKDIR] = server_mkdir,
- [GF_FOP_UNLINK] = server_unlink,
- [GF_FOP_RMDIR] = server_rmdir,
- [GF_FOP_SYMLINK] = server_symlink,
- [GF_FOP_RENAME] = server_rename,
- [GF_FOP_LINK] = server_link,
- [GF_FOP_CHMOD] = server_chmod,
- [GF_FOP_CHOWN] = server_chown,
- [GF_FOP_TRUNCATE] = server_truncate,
- [GF_FOP_OPEN] = server_open,
- [GF_FOP_READ] = server_readv,
- [GF_FOP_WRITE] = server_writev,
- [GF_FOP_STATFS] = server_statfs,
- [GF_FOP_FLUSH] = server_flush,
- [GF_FOP_FSYNC] = server_fsync,
- [GF_FOP_SETXATTR] = server_setxattr,
- [GF_FOP_GETXATTR] = server_getxattr,
- [GF_FOP_FGETXATTR] = server_fgetxattr,
- [GF_FOP_FSETXATTR] = server_fsetxattr,
- [GF_FOP_REMOVEXATTR] = server_removexattr,
- [GF_FOP_OPENDIR] = server_opendir,
- [GF_FOP_GETDENTS] = server_getdents,
- [GF_FOP_FSYNCDIR] = server_fsyncdir,
- [GF_FOP_ACCESS] = server_access,
- [GF_FOP_CREATE] = server_create,
- [GF_FOP_FTRUNCATE] = server_ftruncate,
- [GF_FOP_FSTAT] = server_fstat,
- [GF_FOP_LK] = server_lk,
- [GF_FOP_UTIMENS] = server_utimens,
- [GF_FOP_FCHMOD] = server_fchmod,
- [GF_FOP_FCHOWN] = server_fchown,
- [GF_FOP_LOOKUP] = server_lookup,
- [GF_FOP_SETDENTS] = server_setdents,
- [GF_FOP_READDIR] = server_readdir,
- [GF_FOP_INODELK] = server_inodelk,
- [GF_FOP_FINODELK] = server_finodelk,
- [GF_FOP_ENTRYLK] = server_entrylk,
- [GF_FOP_FENTRYLK] = server_fentrylk,
- [GF_FOP_CHECKSUM] = server_checksum,
- [GF_FOP_XATTROP] = server_xattrop,
- [GF_FOP_FXATTROP] = server_fxattrop,
-};
-
-
-
-static gf_op_t gf_mops[] = {
- [GF_MOP_SETVOLUME] = mop_setvolume,
- [GF_MOP_GETVOLUME] = mop_getvolume,
- [GF_MOP_STATS] = mop_stats,
- [GF_MOP_GETSPEC] = mop_getspec,
- [GF_MOP_PING] = mop_ping,
- [GF_MOP_LOG] = mop_log,
-};
-
-static gf_op_t gf_cbks[] = {
- [GF_CBK_FORGET] = server_forget,
- [GF_CBK_RELEASE] = server_release,
- [GF_CBK_RELEASEDIR] = server_releasedir
-};
-
-int
-protocol_server_interpret (xlator_t *this, transport_t *trans,
- char *hdr_p, size_t hdrlen, struct iobuf *iobuf)
-{
- server_connection_t *conn = NULL;
- gf_hdr_common_t *hdr = NULL;
- xlator_t *bound_xl = NULL;
- call_frame_t *frame = NULL;
- peer_info_t *peerinfo = NULL;
- int32_t type = -1;
- int32_t op = -1;
- int32_t ret = -1;
-
- hdr = (gf_hdr_common_t *)hdr_p;
- type = ntoh32 (hdr->type);
- op = ntoh32 (hdr->op);
-
- conn = trans->xl_private;
- if (conn)
- bound_xl = conn->bound_xl;
-
- peerinfo = &trans->peerinfo;
- switch (type) {
- case GF_OP_TYPE_FOP_REQUEST:
- if ((op < 0) || (op >= GF_FOP_MAXVALUE)) {
- gf_log (this->name, GF_LOG_ERROR,
- "invalid fop %"PRId32" from client %s",
- op, peerinfo->identifier);
- break;
- }
- if (bound_xl == NULL) {
- gf_log (this->name, GF_LOG_ERROR,
- "Received fop %"PRId32" before "
- "authentication.", op);
- break;
- }
- frame = get_frame_for_call (trans, hdr);
- ret = gf_fops[op] (frame, bound_xl, hdr, hdrlen, iobuf);
- break;
-
- case GF_OP_TYPE_MOP_REQUEST:
- if ((op < 0) || (op >= GF_MOP_MAXVALUE)) {
- gf_log (this->name, GF_LOG_ERROR,
- "invalid mop %"PRId32" from client %s",
- op, peerinfo->identifier);
- break;
- }
- frame = get_frame_for_call (trans, hdr);
- ret = gf_mops[op] (frame, bound_xl, hdr, hdrlen, iobuf);
- break;
-
- case GF_OP_TYPE_CBK_REQUEST:
- if ((op < 0) || (op >= GF_CBK_MAXVALUE)) {
- gf_log (this->name, GF_LOG_ERROR,
- "invalid cbk %"PRId32" from client %s",
- op, peerinfo->identifier);
- break;
- }
- if (bound_xl == NULL) {
- gf_log (this->name, GF_LOG_ERROR,
- "Received cbk %d before authentication.", op);
- break;
- }
-
- frame = get_frame_for_call (trans, hdr);
- ret = gf_cbks[op] (frame, bound_xl, hdr, hdrlen, iobuf);
- break;
-
- default:
- break;
- }
-
- return ret;
-}
-
-
-/*
- * server_nop_cbk - nop callback for server protocol
- * @frame: call frame
- * @cookie:
- * @this:
- * @op_ret: return value
- * @op_errno: errno
- *
- * not for external reference
- */
-int
-server_nop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- server_state_t *state = NULL;
-
- state = CALL_STATE(frame);
-
- if (state)
- free_state (state);
- STACK_DESTROY (frame->root);
- return 0;
-}
-
-/*
- * server_fd - fdtable dump function for server protocol
- * @this:
- *
- */
-int
-server_fd (xlator_t *this)
-{
- server_conf_t *conf = NULL;
- server_connection_t *trav = NULL;
- char key[GF_DUMP_MAX_BUF_LEN];
- int i = 1;
- int ret = -1;
-
- if (!this)
- return -1;
-
- conf = this->private;
- if (!conf) {
- gf_log (this->name, GF_LOG_WARNING,
- "conf null in xlator");
- return -1;
- }
-
- gf_proc_dump_add_section("xlator.protocol.server.conn");
-
- ret = pthread_mutex_trylock (&conf->mutex);
- if (ret) {
- gf_log("", GF_LOG_WARNING, "Unable to dump fdtable"
- " errno: %d", errno);
- return -1;
- }
-
- list_for_each_entry (trav, &conf->conns, list) {
- if (trav->id) {
- gf_proc_dump_build_key(key,
- "xlator.protocol.server.conn",
- "%d.id", i);
- gf_proc_dump_write(key, "%s", trav->id);
- }
-
- gf_proc_dump_build_key(key,"xlator.protocol.server.conn",
- "%d.ref",i)
- gf_proc_dump_write(key, "%d", trav->ref);
- if (trav->bound_xl) {
- gf_proc_dump_build_key(key,
- "xlator.protocol.server.conn",
- "%d.bound_xl", i);
- gf_proc_dump_write(key, "%s", trav->bound_xl->name);
- }
-
- gf_proc_dump_build_key(key,
- "xlator.protocol.server.conn",
- "%d.id", i);
- fdtable_dump(trav->fdtable,key);
- i++;
- }
- pthread_mutex_unlock (&conf->mutex);
-
-
- return 0;
- }
-
-int
-server_priv (xlator_t *this)
-{
- return 0;
-}
-
-int
-server_inode (xlator_t *this)
-{
- server_conf_t *conf = NULL;
- server_connection_t *trav = NULL;
- char key[GF_DUMP_MAX_BUF_LEN];
- int i = 1;
- int ret = -1;
-
- if (!this)
- return -1;
-
- conf = this->private;
- if (!conf) {
- gf_log (this->name, GF_LOG_WARNING,
- "conf null in xlator");
- return -1;
- }
-
- ret = pthread_mutex_trylock (&conf->mutex);
- if (ret) {
- gf_log("", GF_LOG_WARNING, "Unable to dump itable"
- " errno: %d", errno);
- return -1;
- }
-
- list_for_each_entry (trav, &conf->conns, list) {
- if (trav->bound_xl && trav->bound_xl->itable) {
- gf_proc_dump_build_key(key,
- "xlator.protocol.server.conn",
- "%d.bound_xl.%s",
- i, trav->bound_xl->name);
- inode_table_dump(trav->bound_xl->itable,key);
- i++;
- }
- }
- pthread_mutex_unlock (&conf->mutex);
-
-
- return 0;
-}
-
-
-static void
-get_auth_types (dict_t *this, char *key, data_t *value, void *data)
-{
- dict_t *auth_dict = NULL;
- char *saveptr = NULL;
- char *tmp = NULL;
- char *key_cpy = NULL;
- int32_t ret = -1;
-
- auth_dict = data;
- key_cpy = strdup (key);
- GF_VALIDATE_OR_GOTO("server", key_cpy, out);
-
- tmp = strtok_r (key_cpy, ".", &saveptr);
- ret = strcmp (tmp, "auth");
- if (ret == 0) {
- tmp = strtok_r (NULL, ".", &saveptr);
- if (strcmp (tmp, "ip") == 0) {
- /* TODO: backward compatibility, remove when
- newer versions are available */
- tmp = "addr";
- gf_log ("server", GF_LOG_WARNING,
- "assuming 'auth.ip' to be 'auth.addr'");
- }
- ret = dict_set_dynptr (auth_dict, tmp, NULL, 0);
- if (ret < 0) {
- gf_log ("server", GF_LOG_DEBUG,
- "failed to dict_set_dynptr");
- }
- }
-
- FREE (key_cpy);
-out:
- return;
-}
-
-
-int
-validate_auth_options (xlator_t *this, dict_t *dict)
-{
- int ret = -1;
- int error = 0;
- xlator_list_t *trav = NULL;
- data_pair_t *pair = NULL;
- char *saveptr = NULL;
- char *tmp = NULL;
- char *key_cpy = NULL;
-
- trav = this->children;
- while (trav) {
- error = -1;
- for (pair = dict->members_list; pair; pair = pair->next) {
- key_cpy = strdup (pair->key);
- tmp = strtok_r (key_cpy, ".", &saveptr);
- ret = strcmp (tmp, "auth");
- if (ret == 0) {
- /* for module type */
- tmp = strtok_r (NULL, ".", &saveptr);
- /* for volume name */
- tmp = strtok_r (NULL, ".", &saveptr);
- }
-
- if (strcmp (tmp, trav->xlator->name) == 0) {
- error = 0;
- free (key_cpy);
- break;
- }
- free (key_cpy);
- }
- if (-1 == error) {
- gf_log (this->name, GF_LOG_ERROR,
- "volume '%s' defined as subvolume, but no "
- "authentication defined for the same",
- trav->xlator->name);
- break;
- }
- trav = trav->next;
- }
-
- return error;
-}
-
-
-/*
- * init - called during server protocol initialization
- *
- * @this:
- *
- */
-int
-init (xlator_t *this)
-{
- int32_t ret = -1;
- transport_t *trans = NULL;
- server_conf_t *conf = NULL;
- data_t *data = NULL;
-
- if (this->children == NULL) {
- gf_log (this->name, GF_LOG_ERROR,
- "protocol/server should have subvolume");
- goto out;
- }
-
- trans = transport_load (this->options, this);
- if (trans == NULL) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to load transport");
- goto out;
- }
-
- ret = transport_listen (trans);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to bind/listen on socket");
- goto out;
- }
-
- conf = CALLOC (1, sizeof (server_conf_t));
- GF_VALIDATE_OR_GOTO(this->name, conf, out);
-
- INIT_LIST_HEAD (&conf->conns);
- pthread_mutex_init (&conf->mutex, NULL);
-
- conf->trans = trans;
-
- conf->auth_modules = dict_new ();
- GF_VALIDATE_OR_GOTO(this->name, conf->auth_modules, out);
-
- dict_foreach (this->options, get_auth_types,
- conf->auth_modules);
- ret = validate_auth_options (this, this->options);
- if (ret == -1) {
- /* logging already done in validate_auth_options function. */
- goto out;
- }
-
- ret = gf_auth_init (this, conf->auth_modules);
- if (ret) {
- dict_unref (conf->auth_modules);
- goto out;
- }
-
- this->private = conf;
-
- ret = dict_get_int32 (this->options, "inode-lru-limit",
- &conf->inode_lru_limit);
- if (ret < 0) {
- conf->inode_lru_limit = 1024;
- }
-
- ret = dict_get_int32 (this->options, "limits.transaction-size",
- &conf->max_block_size);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "defaulting limits.transaction-size to %d",
- DEFAULT_BLOCK_SIZE);
- conf->max_block_size = DEFAULT_BLOCK_SIZE;
- }
-
- conf->verify_volfile_checksum = 1;
- data = dict_get (this->options, "verify-volfile-checksum");
- if (data) {
- ret = gf_string2boolean(data->data,
- &conf->verify_volfile_checksum);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "wrong value for verify-volfile-checksum");
- conf->verify_volfile_checksum = 1;
- }
- }
-
-#ifndef GF_DARWIN_HOST_OS
- {
- struct rlimit lim;
-
- lim.rlim_cur = 1048576;
- lim.rlim_max = 1048576;
-
- if (setrlimit (RLIMIT_NOFILE, &lim) == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "WARNING: Failed to set 'ulimit -n 1M': %s",
- strerror(errno));
- lim.rlim_cur = 65536;
- lim.rlim_max = 65536;
-
- if (setrlimit (RLIMIT_NOFILE, &lim) == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "Failed to set max open fd to 64k: %s",
- strerror(errno));
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "max open fd set to 64k");
- }
- }
- }
-#endif
- this->ctx->top = this;
-
- ret = 0;
-out:
- return ret;
-}
-
-
-
-int
-protocol_server_pollin (xlator_t *this, transport_t *trans)
-{
- char *hdr = NULL;
- size_t hdrlen = 0;
- int ret = -1;
- struct iobuf *iobuf = NULL;
-
-
- ret = transport_receive (trans, &hdr, &hdrlen, &iobuf);
-
- if (ret == 0)
- ret = protocol_server_interpret (this, trans, hdr,
- hdrlen, iobuf);
-
- /* TODO: use mem-pool */
- FREE (hdr);
-
- return ret;
-}
-
-
-/*
- * fini - finish function for server protocol, called before
- * unloading server protocol.
- *
- * @this:
- *
- */
-void
-fini (xlator_t *this)
-{
- server_conf_t *conf = this->private;
-
- GF_VALIDATE_OR_GOTO(this->name, conf, out);
-
- if (conf->auth_modules) {
- dict_unref (conf->auth_modules);
- }
-
- FREE (conf);
- this->private = NULL;
-out:
- return;
-}
-
-/*
- * server_protocol_notify - notify function for server protocol
- * @this:
- * @trans:
- * @event:
- *
- */
-int
-notify (xlator_t *this, int32_t event, void *data, ...)
-{
- int ret = 0;
- transport_t *trans = data;
- peer_info_t *peerinfo = NULL;
- peer_info_t *myinfo = NULL;
-
- if (trans != NULL) {
- peerinfo = &(trans->peerinfo);
- myinfo = &(trans->myinfo);
- }
-
- switch (event) {
- case GF_EVENT_POLLIN:
- ret = protocol_server_pollin (this, trans);
- break;
- case GF_EVENT_POLLERR:
- {
- gf_log (trans->xl->name, GF_LOG_INFO, "%s disconnected",
- peerinfo->identifier);
-
- ret = -1;
- transport_disconnect (trans);
- if (trans->xl_private == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "POLLERR received on (%s) even before "
- "handshake with (%s) is successful",
- myinfo->identifier, peerinfo->identifier);
- } else {
- /*
- * FIXME: shouldn't we check for return value?
- * what should be done if cleanup fails?
- */
- server_connection_cleanup (this, trans->xl_private);
- }
- }
- break;
-
- case GF_EVENT_TRANSPORT_CLEANUP:
- {
- if (trans->xl_private) {
- server_connection_put (this, trans->xl_private);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "transport (%s) cleaned up even before "
- "handshake with (%s) is successful",
- myinfo->identifier, peerinfo->identifier);
- }
- }
- break;
-
- default:
- default_notify (this, event, data);
- break;
- }
-
- return ret;
-}
-
-
-struct xlator_mops mops = {
-};
-
-struct xlator_fops fops = {
-};
-
-struct xlator_cbks cbks = {
-};
-
-struct xlator_dumpops dumpops = {
- .inode = server_inode,
- .priv = server_priv,
- .fd = server_fd,
-};
-
-
-struct volume_options options[] = {
- { .key = {"transport-type"},
- .value = {"tcp", "socket", "ib-verbs", "unix", "ib-sdp",
- "tcp/server", "ib-verbs/server"},
- .type = GF_OPTION_TYPE_STR
- },
- { .key = {"volume-filename.*"},
- .type = GF_OPTION_TYPE_PATH,
- },
- { .key = {"inode-lru-limit"},
- .type = GF_OPTION_TYPE_INT,
- .min = 0,
- .max = (1 * GF_UNIT_MB)
- },
- { .key = {"client-volume-filename"},
- .type = GF_OPTION_TYPE_PATH
- },
- { .key = {"verify-volfile-checksum"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {NULL} },
-};
diff --git a/xlators/protocol/server/src/server-protocol.h b/xlators/protocol/server/src/server-protocol.h
deleted file mode 100644
index dabe6927b..000000000
--- a/xlators/protocol/server/src/server-protocol.h
+++ /dev/null
@@ -1,158 +0,0 @@
-/*
- Copyright (c) 2006-2009 Z RESEARCH, Inc. <http://www.zresearch.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _SERVER_PROTOCOL_H_
-#define _SERVER_PROTOCOL_H_
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <pthread.h>
-
-#include "glusterfs.h"
-#include "xlator.h"
-#include "logging.h"
-#include "call-stub.h"
-#include "authenticate.h"
-#include "fd.h"
-#include "byte-order.h"
-
-#define DEFAULT_BLOCK_SIZE 4194304 /* 4MB */
-#define DEFAULT_VOLUME_FILE_PATH CONFDIR "/glusterfs.vol"
-
-typedef struct _server_state server_state_t;
-
-struct _locker {
- struct list_head lockers;
- char *volume;
- loc_t loc;
- fd_t *fd;
- pid_t pid;
-};
-
-struct _lock_table {
- struct list_head file_lockers;
- struct list_head dir_lockers;
- gf_lock_t lock;
- size_t count;
-};
-
-
-/* private structure per connection (transport object)
- * used as transport_t->xl_private
- */
-struct _server_connection {
- struct list_head list;
- char *id;
- int ref;
- int active_transports;
- pthread_mutex_t lock;
- char disconnected;
- fdtable_t *fdtable;
- struct _lock_table *ltable;
- xlator_t *bound_xl;
-};
-
-typedef struct _server_connection server_connection_t;
-
-
-server_connection_t *
-server_connection_get (xlator_t *this, const char *id);
-
-void
-server_connection_put (xlator_t *this, server_connection_t *conn);
-
-int
-server_connection_destroy (xlator_t *this, server_connection_t *conn);
-
-int
-server_connection_cleanup (xlator_t *this, server_connection_t *conn);
-
-int
-server_nop_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno);
-
-
-struct _volfile_ctx {
- struct _volfile_ctx *next;
- char *key;
- uint32_t checksum;
-};
-
-typedef struct {
- struct _volfile_ctx *volfile;
-
- dict_t *auth_modules;
- transport_t *trans;
- int32_t max_block_size;
- int32_t inode_lru_limit;
- pthread_mutex_t mutex;
- struct list_head conns;
- gf_boolean_t verify_volfile_checksum;
-} server_conf_t;
-
-
-struct _server_state {
- transport_t *trans;
- xlator_t *bound_xl;
- loc_t loc;
- loc_t loc2;
- int flags;
- fd_t *fd;
- size_t size;
- off_t offset;
- mode_t mode;
- dev_t dev;
- uid_t uid;
- gid_t gid;
- size_t nr_count;
- int cmd;
- int type;
- char *name;
- int name_len;
- inode_table_t *itable;
- int64_t fd_no;
- ino_t ino;
- ino_t par;
- ino_t ino2;
- ino_t par2;
- char *path;
- char *path2;
- char *bname;
- char *bname2;
- int mask;
- char is_revalidate;
- dict_t *xattr_req;
- struct flock flock;
- struct timespec tv[2];
- char *resolved;
- const char *volume;
-};
-
-
-int
-server_stub_resume (call_stub_t *stub, int32_t op_ret, int32_t op_errno,
- inode_t *inode, inode_t *parent);
-
-int
-do_path_lookup (call_stub_t *stub, const loc_t *loc);
-
-#endif
diff --git a/xlators/protocol/server/src/server-resolve.c b/xlators/protocol/server/src/server-resolve.c
new file mode 100644
index 000000000..cc4686a03
--- /dev/null
+++ b/xlators/protocol/server/src/server-resolve.c
@@ -0,0 +1,598 @@
+/*
+ Copyright (c) 2010-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "server.h"
+#include "server-helpers.h"
+
+
+int
+server_resolve_all (call_frame_t *frame);
+int
+resolve_entry_simple (call_frame_t *frame);
+int
+resolve_inode_simple (call_frame_t *frame);
+int
+resolve_continue (call_frame_t *frame);
+int
+resolve_anonfd_simple (call_frame_t *frame);
+
+int
+resolve_loc_touchup (call_frame_t *frame)
+{
+ server_state_t *state = NULL;
+ server_resolve_t *resolve = NULL;
+ loc_t *loc = NULL;
+ char *path = NULL;
+ int ret = 0;
+
+ state = CALL_STATE (frame);
+
+ resolve = state->resolve_now;
+ loc = state->loc_now;
+
+ if (!loc->path) {
+ if (loc->parent && resolve->bname) {
+ ret = inode_path (loc->parent, resolve->bname, &path);
+ } else if (loc->inode) {
+ ret = inode_path (loc->inode, NULL, &path);
+ }
+ if (ret)
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "return value inode_path %d", ret);
+ loc->path = path;
+ }
+
+ return 0;
+}
+
+
+int
+resolve_gfid_entry_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ server_state_t *state = NULL;
+ server_resolve_t *resolve = NULL;
+ inode_t *link_inode = NULL;
+ loc_t *resolve_loc = NULL;
+
+ state = CALL_STATE (frame);
+ resolve = state->resolve_now;
+ resolve_loc = &resolve->resolve_loc;
+
+ if (op_ret == -1) {
+ gf_log (this->name, ((op_errno == ENOENT) ? GF_LOG_DEBUG :
+ GF_LOG_WARNING),
+ "%s/%s: failed to resolve (%s)",
+ uuid_utoa (resolve_loc->pargfid), resolve_loc->name,
+ strerror (op_errno));
+ goto out;
+ }
+
+ link_inode = inode_link (inode, resolve_loc->parent,
+ resolve_loc->name, buf);
+
+ if (!link_inode)
+ goto out;
+
+ inode_lookup (link_inode);
+
+ inode_unref (link_inode);
+
+out:
+ loc_wipe (resolve_loc);
+
+ resolve_continue (frame);
+ return 0;
+}
+
+
+int
+resolve_gfid_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode, struct iatt *buf,
+ dict_t *xdata, struct iatt *postparent)
+{
+ server_state_t *state = NULL;
+ server_resolve_t *resolve = NULL;
+ inode_t *link_inode = NULL;
+ loc_t *resolve_loc = NULL;
+
+ state = CALL_STATE (frame);
+ resolve = state->resolve_now;
+ resolve_loc = &resolve->resolve_loc;
+
+ if (op_ret == -1) {
+ gf_log (this->name, ((op_errno == ENOENT) ? GF_LOG_DEBUG :
+ GF_LOG_WARNING),
+ "%s: failed to resolve (%s)",
+ uuid_utoa (resolve_loc->gfid), strerror (op_errno));
+ loc_wipe (&resolve->resolve_loc);
+ goto out;
+ }
+
+ loc_wipe (resolve_loc);
+
+ link_inode = inode_link (inode, NULL, NULL, buf);
+
+ if (!link_inode)
+ goto out;
+
+ inode_lookup (link_inode);
+
+ if (uuid_is_null (resolve->pargfid)) {
+ inode_unref (link_inode);
+ goto out;
+ }
+
+ resolve_loc->parent = link_inode;
+ uuid_copy (resolve_loc->pargfid, resolve_loc->parent->gfid);
+
+ resolve_loc->name = resolve->bname;
+
+ resolve_loc->inode = inode_new (state->itable);
+ inode_path (resolve_loc->parent, resolve_loc->name,
+ (char **) &resolve_loc->path);
+
+ STACK_WIND (frame, resolve_gfid_entry_cbk,
+ frame->root->client->bound_xl,
+ frame->root->client->bound_xl->fops->lookup,
+ &resolve->resolve_loc, NULL);
+ return 0;
+out:
+ resolve_continue (frame);
+ return 0;
+}
+
+
+int
+resolve_gfid (call_frame_t *frame)
+{
+ server_state_t *state = NULL;
+ xlator_t *this = NULL;
+ server_resolve_t *resolve = NULL;
+ loc_t *resolve_loc = NULL;
+ int ret = 0;
+
+ state = CALL_STATE (frame);
+ this = frame->this;
+ resolve = state->resolve_now;
+ resolve_loc = &resolve->resolve_loc;
+
+ if (!uuid_is_null (resolve->pargfid))
+ uuid_copy (resolve_loc->gfid, resolve->pargfid);
+ else if (!uuid_is_null (resolve->gfid))
+ uuid_copy (resolve_loc->gfid, resolve->gfid);
+
+ resolve_loc->inode = inode_new (state->itable);
+ ret = loc_path (resolve_loc, NULL);
+
+ STACK_WIND (frame, resolve_gfid_cbk,
+ frame->root->client->bound_xl,
+ frame->root->client->bound_xl->fops->lookup,
+ &resolve->resolve_loc, NULL);
+ return 0;
+}
+
+int
+resolve_continue (call_frame_t *frame)
+{
+ server_state_t *state = NULL;
+ xlator_t *this = NULL;
+ server_resolve_t *resolve = NULL;
+ int ret = 0;
+
+ state = CALL_STATE (frame);
+ this = frame->this;
+ resolve = state->resolve_now;
+
+ resolve->op_ret = 0;
+ resolve->op_errno = 0;
+
+ if (resolve->fd_no != -1) {
+ ret = resolve_anonfd_simple (frame);
+ goto out;
+ } else if (!uuid_is_null (resolve->pargfid))
+ ret = resolve_entry_simple (frame);
+ else if (!uuid_is_null (resolve->gfid))
+ ret = resolve_inode_simple (frame);
+ if (ret)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "return value of resolve_*_simple %d", ret);
+
+ resolve_loc_touchup (frame);
+out:
+ server_resolve_all (frame);
+
+ return 0;
+}
+
+
+/*
+ Check if the requirements are fulfilled by entries in the inode cache itself
+ Return value:
+ <= 0 - simple resolution was decisive and complete (either success or failure)
+ > 0 - indecisive, need to perform deep resolution
+*/
+
+int
+resolve_entry_simple (call_frame_t *frame)
+{
+ server_state_t *state = NULL;
+ xlator_t *this = NULL;
+ server_resolve_t *resolve = NULL;
+ inode_t *parent = NULL;
+ inode_t *inode = NULL;
+ int ret = 0;
+
+ state = CALL_STATE (frame);
+ this = frame->this;
+ resolve = state->resolve_now;
+
+ parent = inode_find (state->itable, resolve->pargfid);
+ if (!parent) {
+ /* simple resolution is indecisive. need to perform
+ deep resolution */
+ resolve->op_ret = -1;
+ resolve->op_errno = ENOENT;
+ ret = 1;
+ goto out;
+ }
+
+ /* expected @parent was found from the inode cache */
+ uuid_copy (state->loc_now->pargfid, resolve->pargfid);
+ state->loc_now->parent = inode_ref (parent);
+ state->loc_now->name = resolve->bname;
+
+ inode = inode_grep (state->itable, parent, resolve->bname);
+ if (!inode) {
+ switch (resolve->type) {
+ case RESOLVE_DONTCARE:
+ case RESOLVE_NOT:
+ ret = 0;
+ break;
+ case RESOLVE_MAY:
+ ret = 1;
+ break;
+ default:
+ resolve->op_ret = -1;
+ resolve->op_errno = ENOENT;
+ ret = 1;
+ break;
+ }
+
+ goto out;
+ }
+
+ if (resolve->type == RESOLVE_NOT) {
+ gf_log (this->name, GF_LOG_DEBUG, "inode (pointer: %p gfid:%s"
+ " found for path (%s) while type is RESOLVE_NOT",
+ inode, uuid_utoa (inode->gfid), resolve->path);
+ resolve->op_ret = -1;
+ resolve->op_errno = EEXIST;
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+
+ state->loc_now->inode = inode_ref (inode);
+
+out:
+ if (parent)
+ inode_unref (parent);
+
+ if (inode)
+ inode_unref (inode);
+
+ return ret;
+}
+
+
+int
+server_resolve_entry (call_frame_t *frame)
+{
+ server_state_t *state = NULL;
+ int ret = 0;
+ loc_t *loc = NULL;
+
+ state = CALL_STATE (frame);
+ loc = state->loc_now;
+
+ ret = resolve_entry_simple (frame);
+
+ if (ret > 0) {
+ loc_wipe (loc);
+ resolve_gfid (frame);
+ return 0;
+ }
+
+ if (ret == 0)
+ resolve_loc_touchup (frame);
+
+ server_resolve_all (frame);
+
+ return 0;
+}
+
+
+int
+resolve_inode_simple (call_frame_t *frame)
+{
+ server_state_t *state = NULL;
+ server_resolve_t *resolve = NULL;
+ inode_t *inode = NULL;
+ int ret = 0;
+
+ state = CALL_STATE (frame);
+ resolve = state->resolve_now;
+
+ inode = inode_find (state->itable, resolve->gfid);
+
+ if (!inode) {
+ resolve->op_ret = -1;
+ resolve->op_errno = ENOENT;
+ ret = 1;
+ goto out;
+ }
+
+ ret = 0;
+
+ state->loc_now->inode = inode_ref (inode);
+ uuid_copy (state->loc_now->gfid, resolve->gfid);
+
+out:
+ if (inode)
+ inode_unref (inode);
+
+ return ret;
+}
+
+
+int
+server_resolve_inode (call_frame_t *frame)
+{
+ server_state_t *state = NULL;
+ int ret = 0;
+ loc_t *loc = NULL;
+
+ state = CALL_STATE (frame);
+ loc = state->loc_now;
+
+ ret = resolve_inode_simple (frame);
+
+ if (ret > 0) {
+ loc_wipe (loc);
+ resolve_gfid (frame);
+ return 0;
+ }
+
+ if (ret == 0)
+ resolve_loc_touchup (frame);
+
+ server_resolve_all (frame);
+
+ return 0;
+}
+
+
+int
+resolve_anonfd_simple (call_frame_t *frame)
+{
+ server_state_t *state = NULL;
+ server_resolve_t *resolve = NULL;
+ inode_t *inode = NULL;
+ int ret = 0;
+
+ state = CALL_STATE (frame);
+ resolve = state->resolve_now;
+
+ inode = inode_find (state->itable, resolve->gfid);
+
+ if (!inode) {
+ resolve->op_ret = -1;
+ resolve->op_errno = ENOENT;
+ ret = 1;
+ goto out;
+ }
+
+ ret = 0;
+
+ state->fd = fd_anonymous (inode);
+out:
+ if (inode)
+ inode_unref (inode);
+
+ if (ret != 0)
+ gf_log ("server", GF_LOG_WARNING, "inode for the gfid (%s) is "
+ "not found. anonymous fd creation failed",
+ uuid_utoa (resolve->gfid));
+ return ret;
+}
+
+
+int
+server_resolve_anonfd (call_frame_t *frame)
+{
+ server_state_t *state = NULL;
+ int ret = 0;
+ loc_t *loc = NULL;
+
+ state = CALL_STATE (frame);
+ loc = state->loc_now;
+
+ ret = resolve_anonfd_simple (frame);
+
+ if (ret > 0) {
+ loc_wipe (loc);
+ resolve_gfid (frame);
+ return 0;
+ }
+
+ server_resolve_all (frame);
+
+ return 0;
+
+}
+
+
+int
+server_resolve_fd (call_frame_t *frame)
+{
+ server_ctx_t *serv_ctx = NULL;
+ server_state_t *state = NULL;
+ client_t *client = NULL;
+ server_resolve_t *resolve = NULL;
+ uint64_t fd_no = -1;
+
+ state = CALL_STATE (frame);
+ resolve = state->resolve_now;
+
+ fd_no = resolve->fd_no;
+
+ if (fd_no == GF_ANON_FD_NO) {
+ server_resolve_anonfd (frame);
+ return 0;
+ }
+
+ client = frame->root->client;
+
+ serv_ctx = server_ctx_get (client, client->this);
+
+ if (serv_ctx == NULL) {
+ gf_log ("", GF_LOG_INFO, "server_ctx_get() failed");
+ resolve->op_ret = -1;
+ resolve->op_errno = ENOMEM;
+ return 0;
+ }
+
+ state->fd = gf_fd_fdptr_get (serv_ctx->fdtable, fd_no);
+
+ if (!state->fd) {
+ gf_log ("", GF_LOG_INFO, "fd not found in context");
+ resolve->op_ret = -1;
+ resolve->op_errno = EBADF;
+ }
+
+ server_resolve_all (frame);
+
+ return 0;
+}
+
+
+int
+server_resolve (call_frame_t *frame)
+{
+ server_state_t *state = NULL;
+ server_resolve_t *resolve = NULL;
+
+ state = CALL_STATE (frame);
+ resolve = state->resolve_now;
+
+ if (resolve->fd_no != -1) {
+
+ server_resolve_fd (frame);
+
+ } else if (!uuid_is_null (resolve->pargfid)) {
+
+ server_resolve_entry (frame);
+
+ } else if (!uuid_is_null (resolve->gfid)) {
+
+ server_resolve_inode (frame);
+
+ } else {
+ if (resolve == &state->resolve)
+ gf_log (frame->this->name, GF_LOG_WARNING,
+ "no resolution type for %s (%s)",
+ resolve->path, gf_fop_list[frame->root->op]);
+
+ resolve->op_ret = -1;
+ resolve->op_errno = EINVAL;
+
+ server_resolve_all (frame);
+ }
+
+ return 0;
+}
+
+
+int
+server_resolve_done (call_frame_t *frame)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE (frame);
+
+ server_print_request (frame);
+
+ state->resume_fn (frame, frame->root->client->bound_xl);
+
+ return 0;
+}
+
+
+/*
+ * This function is called multiple times, once per resolving one location/fd.
+ * state->resolve_now is used to decide which location/fd is to be resolved now
+ */
+int
+server_resolve_all (call_frame_t *frame)
+{
+ server_state_t *state = NULL;
+ xlator_t *this = NULL;
+
+ this = frame->this;
+ state = CALL_STATE (frame);
+
+ if (state->resolve_now == NULL) {
+
+ state->resolve_now = &state->resolve;
+ state->loc_now = &state->loc;
+
+ server_resolve (frame);
+
+ } else if (state->resolve_now == &state->resolve) {
+
+ state->resolve_now = &state->resolve2;
+ state->loc_now = &state->loc2;
+
+ server_resolve (frame);
+
+ } else if (state->resolve_now == &state->resolve2) {
+
+ server_resolve_done (frame);
+
+ } else {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Invalid pointer for state->resolve_now");
+ }
+
+ return 0;
+}
+
+
+int
+resolve_and_resume (call_frame_t *frame, server_resume_fn_t fn)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE (frame);
+ state->resume_fn = fn;
+
+ server_resolve_all (frame);
+
+ return 0;
+}
diff --git a/xlators/protocol/server/src/server-rpc-fops.c b/xlators/protocol/server/src/server-rpc-fops.c
new file mode 100644
index 000000000..138e601ce
--- /dev/null
+++ b/xlators/protocol/server/src/server-rpc-fops.c
@@ -0,0 +1,6179 @@
+/*
+ Copyright (c) 2010-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <openssl/md5.h>
+
+#include "server.h"
+#include "server-helpers.h"
+#include "glusterfs3-xdr.h"
+#include "glusterfs3.h"
+#include "compat-errno.h"
+
+#include "xdr-nfs3.h"
+
+#define SERVER_REQ_SET_ERROR(req, ret) \
+ do { \
+ rpcsvc_request_seterr (req, GARBAGE_ARGS); \
+ ret = RPCSVC_ACTOR_ERROR; \
+ } while (0)
+
+/* Callback function section */
+int
+server_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf,
+ dict_t *xdata)
+{
+ gfs3_statfs_rsp rsp = {0,};
+ rpcsvc_request_t *req = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING, "%"PRId64": STATFS (%s)",
+ frame->root->unique, strerror (op_errno));
+ goto out;
+ }
+
+ gf_statfs_from_statfs (&rsp.statfs, buf);
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_statfs_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *stbuf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ rpcsvc_request_t *req = NULL;
+ server_state_t *state = NULL;
+ inode_t *root_inode = NULL;
+ inode_t *link_inode = NULL;
+ loc_t fresh_loc = {0,};
+ gfs3_lookup_rsp rsp = {0,};
+ uuid_t rootgfid = {0,};
+
+ state = CALL_STATE (frame);
+
+ if (state->is_revalidate == 1 && op_ret == -1) {
+ state->is_revalidate = 2;
+ loc_copy (&fresh_loc, &state->loc);
+ inode_unref (fresh_loc.inode);
+ fresh_loc.inode = inode_new (state->itable);
+
+ STACK_WIND (frame, server_lookup_cbk,
+ frame->root->client->bound_xl,
+ frame->root->client->bound_xl->fops->lookup,
+ &fresh_loc, state->xdata);
+
+ loc_wipe (&fresh_loc);
+ return 0;
+ }
+
+ gf_stat_from_iatt (&rsp.postparent, postparent);
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret) {
+ if (state->is_revalidate && op_errno == ENOENT) {
+ if (!__is_root_gfid (state->resolve.gfid)) {
+ inode_unlink (state->loc.inode,
+ state->loc.parent,
+ state->loc.name);
+ }
+ }
+ goto out;
+ }
+
+ root_inode = frame->root->client->bound_xl->itable->root;
+ if (inode == root_inode) {
+ /* we just looked up root ("/") */
+ stbuf->ia_ino = 1;
+ rootgfid[15] = 1;
+ uuid_copy (stbuf->ia_gfid, rootgfid);
+ if (inode->ia_type == 0)
+ inode->ia_type = stbuf->ia_type;
+ }
+
+ gf_stat_from_iatt (&rsp.stat, stbuf);
+
+ if (!__is_root_gfid (inode->gfid)) {
+ link_inode = inode_link (inode, state->loc.parent,
+ state->loc.name, stbuf);
+ if (link_inode) {
+ inode_lookup (link_inode);
+ inode_unref (link_inode);
+ }
+ }
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ if (op_ret) {
+ if (state->resolve.bname) {
+ gf_log (this->name, ((op_errno == ENOENT) ?
+ GF_LOG_TRACE : GF_LOG_INFO),
+ "%"PRId64": LOOKUP %s (%s/%s) ==> "
+ "(%s)", frame->root->unique,
+ state->loc.path,
+ uuid_utoa (state->resolve.pargfid),
+ state->resolve.bname,
+ strerror (op_errno));
+ } else {
+ gf_log (this->name, ((op_errno == ENOENT) ?
+ GF_LOG_TRACE : GF_LOG_INFO),
+ "%"PRId64": LOOKUP %s (%s) ==> (%s)",
+ frame->root->unique, state->loc.path,
+ uuid_utoa (state->resolve.gfid),
+ strerror (op_errno));
+ }
+ }
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_lookup_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int
+server_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
+{
+ gfs3_lk_rsp rsp = {0,};
+ rpcsvc_request_t *req = NULL;
+ server_state_t *state = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret) {
+ if ((op_errno != ENOSYS) && (op_errno != EAGAIN)) {
+ state = CALL_STATE (frame);
+ gf_log (this->name, GF_LOG_INFO,
+ "%"PRId64": LK %"PRId64" (%s) ==> "
+ "(%s)", frame->root->unique,
+ state->resolve.fd_no,
+ uuid_utoa (state->resolve.gfid),
+ strerror (op_errno));
+ }
+ goto out;
+ }
+
+ switch (lock->l_type) {
+ case F_RDLCK:
+ lock->l_type = GF_LK_F_RDLCK;
+ break;
+ case F_WRLCK:
+ lock->l_type = GF_LK_F_WRLCK;
+ break;
+ case F_UNLCK:
+ lock->l_type = GF_LK_F_UNLCK;
+ break;
+ default:
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unknown lock type: %"PRId32"!", lock->l_type);
+ break;
+ }
+
+ gf_proto_flock_from_flock (&rsp.flock, lock);
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_lk_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int
+server_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ gf_common_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ state = CALL_STATE (frame);
+
+ if (op_ret < 0) {
+ if ((op_errno != ENOSYS) && (op_errno != EAGAIN)) {
+ gf_log (this->name, (op_errno == ENOENT)?
+ GF_LOG_DEBUG:GF_LOG_ERROR,
+ "%"PRId64": INODELK %s (%s) ==> (%s)",
+ frame->root->unique, state->loc.path,
+ uuid_utoa (state->resolve.gfid),
+ strerror (op_errno));
+ }
+ goto out;
+ }
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_common_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int
+server_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ gf_common_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ state = CALL_STATE (frame);
+
+ if (op_ret < 0) {
+ if ((op_errno != ENOSYS) && (op_errno != EAGAIN)) {
+ gf_log (this->name, GF_LOG_INFO,
+ "%"PRId64": FINODELK %"PRId64" (%s) "
+ "==> (%s)", frame->root->unique,
+ state->resolve.fd_no,
+ uuid_utoa (state->resolve.gfid),
+ strerror (op_errno));
+ }
+ goto out;
+ }
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_common_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ gf_common_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ state = CALL_STATE (frame);
+
+ if (op_ret < 0) {
+ if ((op_errno != ENOSYS) && (op_errno != EAGAIN)) {
+ gf_log (this->name, GF_LOG_INFO,
+ "%"PRId64": ENTRYLK %s (%s) ==> (%s)",
+ frame->root->unique, state->loc.path,
+ uuid_utoa (state->resolve.gfid),
+ strerror (op_errno));
+ }
+ goto out;
+ }
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_common_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int
+server_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ gf_common_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ state = CALL_STATE (frame);
+
+ if (op_ret < 0) {
+ if ((op_errno != ENOSYS) && (op_errno != EAGAIN)) {
+ gf_log (this->name, GF_LOG_INFO,
+ "%"PRId64": FENTRYLK %"PRId64" (%s) ==>(%s)",
+ frame->root->unique, state->resolve.fd_no,
+ uuid_utoa (state->resolve.gfid),
+ strerror (op_errno));
+ }
+ goto out;
+ }
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_common_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int
+server_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ gf_common_rsp rsp = {0,};
+ rpcsvc_request_t *req = NULL;
+ server_state_t *state = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret) {
+ state = CALL_STATE (frame);
+ gf_log (this->name, GF_LOG_INFO,
+ "%"PRId64": ACCESS %s (%s) ==> (%s)",
+ frame->root->unique, state->loc.path,
+ uuid_utoa (state->resolve.gfid),
+ strerror (op_errno));
+ goto out;
+ }
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_common_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ gfs3_rmdir_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ inode_t *parent = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ state = CALL_STATE (frame);
+
+ if (op_ret) {
+ gf_log (this->name, GF_LOG_INFO,
+ "%"PRId64": RMDIR %s (%s/%s) ==> (%s)",
+ frame->root->unique, state->loc.path,
+ uuid_utoa (state->resolve.pargfid),
+ state->resolve.bname, strerror (op_errno));
+ goto out;
+ }
+
+ inode_unlink (state->loc.inode, state->loc.parent,
+ state->loc.name);
+ parent = inode_parent (state->loc.inode, 0, NULL);
+ if (parent)
+ /* parent should not be found for directories after
+ * inode_unlink, since directories cannot have
+ * hardlinks.
+ */
+ inode_unref (parent);
+ else
+ inode_forget (state->loc.inode, 0);
+
+ gf_stat_from_iatt (&rsp.preparent, preparent);
+ gf_stat_from_iatt (&rsp.postparent, postparent);
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_rmdir_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ gfs3_mkdir_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ inode_t *link_inode = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ state = CALL_STATE (frame);
+
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "%"PRId64": MKDIR %s (%s/%s) ==> (%s)",
+ frame->root->unique, state->loc.path,
+ uuid_utoa (state->resolve.pargfid),
+ state->resolve.bname, strerror (op_errno));
+ goto out;
+ }
+
+ gf_stat_from_iatt (&rsp.stat, stbuf);
+ gf_stat_from_iatt (&rsp.preparent, preparent);
+ gf_stat_from_iatt (&rsp.postparent, postparent);
+
+ link_inode = inode_link (inode, state->loc.parent,
+ state->loc.name, stbuf);
+ inode_lookup (link_inode);
+ inode_unref (link_inode);
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_mkdir_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ gfs3_mknod_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ inode_t *link_inode = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ state = CALL_STATE (frame);
+
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "%"PRId64": MKNOD %s (%s/%s) ==> (%s)",
+ frame->root->unique, state->loc.path,
+ uuid_utoa (state->resolve.pargfid),
+ state->resolve.bname, strerror (op_errno));
+ goto out;
+ }
+
+ gf_stat_from_iatt (&rsp.stat, stbuf);
+ gf_stat_from_iatt (&rsp.preparent, preparent);
+ gf_stat_from_iatt (&rsp.postparent, postparent);
+
+ link_inode = inode_link (inode, state->loc.parent,
+ state->loc.name, stbuf);
+ inode_lookup (link_inode);
+ inode_unref (link_inode);
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_mknod_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ gf_common_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret < 0) {
+ state = CALL_STATE (frame);
+ gf_log (this->name, GF_LOG_INFO,
+ "%"PRId64": FSYNCDIR %"PRId64" (%s) ==> (%s)",
+ frame->root->unique, state->resolve.fd_no,
+ uuid_utoa (state->resolve.gfid),
+ strerror (op_errno));
+ goto out;
+ }
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_common_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ gfs3_readdir_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+ int ret = 0;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret < 0) {
+ state = CALL_STATE (frame);
+ gf_log (this->name, GF_LOG_INFO,
+ "%"PRId64": READDIR %"PRId64" (%s) ==> (%s)",
+ frame->root->unique, state->resolve.fd_no,
+ uuid_utoa (state->resolve.gfid),
+ strerror (op_errno));
+ goto out;
+ }
+
+ /* (op_ret == 0) is valid, and means EOF */
+ if (op_ret) {
+ ret = serialize_rsp_dirent (entries, &rsp);
+ if (ret == -1) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+ }
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_readdir_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ readdir_rsp_cleanup (&rsp);
+
+ return 0;
+}
+
+int
+server_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ server_state_t *state = NULL;
+ server_ctx_t *serv_ctx = NULL;
+ rpcsvc_request_t *req = NULL;
+ gfs3_opendir_rsp rsp = {0,};
+ uint64_t fd_no = 0;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret < 0) {
+ state = CALL_STATE (frame);
+ gf_log (this->name, (op_errno == ENOENT)?
+ GF_LOG_DEBUG:GF_LOG_ERROR,
+ "%"PRId64": OPENDIR %s (%s) ==> (%s)",
+ frame->root->unique, state->loc.path,
+ uuid_utoa (state->resolve.gfid), strerror (op_errno));
+ goto out;
+ }
+
+ serv_ctx = server_ctx_get (frame->root->client, this);
+ if (serv_ctx == NULL) {
+ gf_log (this->name, GF_LOG_INFO, "server_ctx_get() failed");
+ goto out;
+ }
+
+ fd_bind (fd);
+ fd_no = gf_fd_unused_get (serv_ctx->fdtable, fd);
+ fd_ref (fd); // on behalf of the client
+
+out:
+ rsp.fd = fd_no;
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_opendir_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ gf_common_rsp rsp = {0,};
+ rpcsvc_request_t *req = NULL;
+ server_state_t *state = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret == -1) {
+ state = CALL_STATE (frame);
+ gf_log (this->name, GF_LOG_INFO,
+ "%"PRId64": REMOVEXATTR %s (%s) of key %s ==> (%s)",
+ frame->root->unique, state->loc.path,
+ uuid_utoa (state->resolve.gfid),
+ state->name, strerror (op_errno));
+ goto out;
+ }
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_common_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ gf_common_rsp rsp = {0,};
+ rpcsvc_request_t *req = NULL;
+ server_state_t *state = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret == -1) {
+ state = CALL_STATE (frame);
+ gf_log (this->name, GF_LOG_INFO,
+ "%"PRId64": FREMOVEXATTR %"PRId64" (%s) (%s) ==> (%s)",
+ frame->root->unique, state->resolve.fd_no,
+ uuid_utoa (state->resolve.gfid), state->name,
+ strerror (op_errno));
+ goto out;
+ }
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_common_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ gfs3_getxattr_rsp rsp = {0,};
+ rpcsvc_request_t *req = NULL;
+ server_state_t *state = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret == -1) {
+ state = CALL_STATE (frame);
+ gf_log (this->name, (((op_errno == ENOTSUP) ||
+ (op_errno == ENODATA) ||
+ (op_errno == ENOENT)) ?
+ GF_LOG_DEBUG : GF_LOG_INFO),
+ "%"PRId64": GETXATTR %s (%s) (%s) ==> (%s)",
+ frame->root->unique, state->loc.path,
+ uuid_utoa (state->resolve.gfid),
+ state->name, strerror (op_errno));
+ goto out;
+ }
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, dict, &rsp.dict.dict_val,
+ rsp.dict.dict_len, op_errno, out);
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_getxattr_rsp);
+
+ GF_FREE (rsp.dict.dict_val);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int
+server_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ gfs3_fgetxattr_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret == -1) {
+ state = CALL_STATE (frame);
+ gf_log (this->name, ((op_errno == ENOTSUP) ?
+ GF_LOG_DEBUG : GF_LOG_INFO),
+ "%"PRId64": FGETXATTR %"PRId64" (%s) (%s) ==> (%s)",
+ frame->root->unique, state->resolve.fd_no,
+ uuid_utoa (state->resolve.gfid),
+ state->name, strerror (op_errno));
+ goto out;
+ }
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, dict, &rsp.dict.dict_val,
+ rsp.dict.dict_len, op_errno, out);
+
+out:
+
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_fgetxattr_rsp);
+
+ GF_FREE (rsp.dict.dict_val);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+/* print every key */
+static int
+_gf_server_log_setxattr_failure (dict_t *d, char *k, data_t *v,
+ void *tmp)
+{
+ server_state_t *state = NULL;
+ call_frame_t *frame = NULL;
+
+ frame = tmp;
+ state = CALL_STATE (frame);
+
+ gf_log (THIS->name, GF_LOG_INFO,
+ "%"PRId64": SETXATTR %s (%s) ==> %s",
+ frame->root->unique, state->loc.path,
+ uuid_utoa (state->resolve.gfid), k);
+ return 0;
+}
+
+int
+server_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ gf_common_rsp rsp = {0,};
+ rpcsvc_request_t *req = NULL;
+ server_state_t *state = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret == -1) {
+ state = CALL_STATE (frame);
+ if (op_errno != ENOTSUP)
+ dict_foreach (state->dict,
+ _gf_server_log_setxattr_failure,
+ frame);
+
+ gf_log (THIS->name, ((op_errno == ENOTSUP) ?
+ GF_LOG_DEBUG : GF_LOG_INFO),
+ "%s", strerror (op_errno));
+ goto out;
+ }
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_common_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+/* print every key here */
+static int
+_gf_server_log_fsetxattr_failure (dict_t *d, char *k, data_t *v,
+ void *tmp)
+{
+ call_frame_t *frame = NULL;
+ server_state_t *state = NULL;
+
+ frame = tmp;
+ state = CALL_STATE (frame);
+
+ gf_log (THIS->name, GF_LOG_INFO,
+ "%"PRId64": FSETXATTR %"PRId64" (%s) ==> %s",
+ frame->root->unique, state->resolve.fd_no,
+ uuid_utoa (state->resolve.gfid), k);
+
+ return 0;
+}
+
+int
+server_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ gf_common_rsp rsp = {0,};
+ rpcsvc_request_t *req = NULL;
+ server_state_t *state = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret == -1) {
+ state = CALL_STATE (frame);
+ if (op_errno != ENOTSUP) {
+ dict_foreach (state->dict,
+ _gf_server_log_fsetxattr_failure,
+ frame);
+ }
+ gf_log (THIS->name, ((op_errno == ENOTSUP) ?
+ GF_LOG_DEBUG : GF_LOG_INFO),
+ "%s", strerror (op_errno));
+ goto out;
+ }
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_common_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ gfs3_rename_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+ inode_t *tmp_inode = NULL;
+ inode_t *tmp_parent = NULL;
+ char oldpar_str[50] = {0,};
+ char newpar_str[50] = {0,};
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ state = CALL_STATE (frame);
+
+ if (op_ret == -1) {
+ uuid_utoa_r (state->resolve.gfid, oldpar_str);
+ uuid_utoa_r (state->resolve2.gfid, newpar_str);
+ gf_log (this->name, GF_LOG_INFO,
+ "%"PRId64": RENAME %s (%s/%s) -> %s (%s/%s) ==> (%s)",
+ frame->root->unique, state->loc.path,
+ oldpar_str, state->resolve.bname, state->loc2.path,
+ newpar_str, state->resolve2.bname, strerror (op_errno));
+ goto out;
+ }
+
+ stbuf->ia_type = state->loc.inode->ia_type;
+
+ /* TODO: log gfid of the inodes */
+ gf_log (frame->root->client->bound_xl->name, GF_LOG_TRACE,
+ "%"PRId64": RENAME_CBK %s ==> %s",
+ frame->root->unique, state->loc.name, state->loc2.name);
+
+ /* Before renaming the inode, we have to get the inode for the
+ * destination entry (i.e. inode with state->loc2.parent as
+ * parent and state->loc2.name as name). If it exists, then
+ * unlink that inode, and send forget on that inode if the
+ * unlinked entry is the last entry. In case of fuse client
+ * the fuse kernel module itself sends the forget on the
+ * unlinked inode.
+ */
+ tmp_inode = inode_grep (state->loc.inode->table,
+ state->loc2.parent, state->loc2.name);
+ if (tmp_inode) {
+ inode_unlink (tmp_inode, state->loc2.parent,
+ state->loc2.name);
+ tmp_parent = inode_parent (tmp_inode, 0, NULL);
+ if (tmp_parent)
+ inode_unref (tmp_parent);
+ else
+ inode_forget (tmp_inode, 0);
+
+ inode_unref (tmp_inode);
+ }
+
+ inode_rename (state->itable,
+ state->loc.parent, state->loc.name,
+ state->loc2.parent, state->loc2.name,
+ state->loc.inode, stbuf);
+ gf_stat_from_iatt (&rsp.stat, stbuf);
+
+ gf_stat_from_iatt (&rsp.preoldparent, preoldparent);
+ gf_stat_from_iatt (&rsp.postoldparent, postoldparent);
+
+ gf_stat_from_iatt (&rsp.prenewparent, prenewparent);
+ gf_stat_from_iatt (&rsp.postnewparent, postnewparent);
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_rename_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ gfs3_unlink_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ inode_t *parent = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ state = CALL_STATE (frame);
+
+ if (op_ret) {
+ gf_log (this->name, (op_errno == ENOENT)?
+ GF_LOG_DEBUG:GF_LOG_ERROR,
+ "%"PRId64": UNLINK %s (%s/%s) ==> (%s)",
+ frame->root->unique, state->loc.path,
+ uuid_utoa (state->resolve.pargfid),
+ state->resolve.bname, strerror (op_errno));
+ goto out;
+ }
+
+ /* TODO: log gfid of the inodes */
+ gf_log (frame->root->client->bound_xl->name, GF_LOG_TRACE,
+ "%"PRId64": UNLINK_CBK %s",
+ frame->root->unique, state->loc.name);
+
+ inode_unlink (state->loc.inode, state->loc.parent,
+ state->loc.name);
+
+ parent = inode_parent (state->loc.inode, 0, NULL);
+ if (parent)
+ inode_unref (parent);
+ else
+ inode_forget (state->loc.inode, 0);
+
+ gf_stat_from_iatt (&rsp.preparent, preparent);
+ gf_stat_from_iatt (&rsp.postparent, postparent);
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_unlink_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ gfs3_symlink_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ inode_t *link_inode = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ state = CALL_STATE (frame);
+
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "%"PRId64": SYMLINK %s (%s/%s) ==> (%s)",
+ frame->root->unique, state->loc.path,
+ uuid_utoa (state->resolve.pargfid),
+ state->resolve.bname, strerror (op_errno));
+ goto out;
+ }
+
+ gf_stat_from_iatt (&rsp.stat, stbuf);
+ gf_stat_from_iatt (&rsp.preparent, preparent);
+ gf_stat_from_iatt (&rsp.postparent, postparent);
+
+ link_inode = inode_link (inode, state->loc.parent,
+ state->loc.name, stbuf);
+ inode_lookup (link_inode);
+ inode_unref (link_inode);
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_symlink_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int
+server_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ gfs3_link_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ inode_t *link_inode = NULL;
+ rpcsvc_request_t *req = NULL;
+ char gfid_str[50] = {0,};
+ char newpar_str[50] = {0,};
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ state = CALL_STATE (frame);
+
+ if (op_ret) {
+ uuid_utoa_r (state->resolve.gfid, gfid_str);
+ uuid_utoa_r (state->resolve2.pargfid, newpar_str);
+
+ gf_log (this->name, GF_LOG_INFO,
+ "%"PRId64": LINK %s (%s) -> %s/%s ==> (%s)",
+ frame->root->unique, state->loc.path,
+ gfid_str, newpar_str, state->resolve2.bname,
+ strerror (op_errno));
+ goto out;
+ }
+
+ gf_stat_from_iatt (&rsp.stat, stbuf);
+ gf_stat_from_iatt (&rsp.preparent, preparent);
+ gf_stat_from_iatt (&rsp.postparent, postparent);
+
+ link_inode = inode_link (inode, state->loc2.parent,
+ state->loc2.name, stbuf);
+ inode_unref (link_inode);
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_link_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ gfs3_truncate_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret) {
+ state = CALL_STATE (frame);
+ gf_log (this->name, GF_LOG_INFO,
+ "%"PRId64": TRUNCATE %s (%s) ==> (%s)",
+ frame->root->unique, state->loc.path,
+ uuid_utoa (state->resolve.gfid), strerror (op_errno));
+ goto out;
+ }
+
+ gf_stat_from_iatt (&rsp.prestat, prebuf);
+ gf_stat_from_iatt (&rsp.poststat, postbuf);
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_truncate_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
+ dict_t *xdata)
+{
+ gfs3_fstat_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret) {
+ state = CALL_STATE (frame);
+ gf_log (this->name, GF_LOG_INFO,
+ "%"PRId64": FSTAT %"PRId64" (%s) ==> (%s)",
+ frame->root->unique, state->resolve.fd_no,
+ uuid_utoa (state->resolve.gfid), strerror (op_errno));
+ goto out;
+ }
+
+ gf_stat_from_iatt (&rsp.stat, stbuf);
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_fstat_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ gfs3_ftruncate_rsp rsp = {0};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret) {
+ state = CALL_STATE (frame);
+ gf_log (this->name, GF_LOG_INFO,
+ "%"PRId64": FTRUNCATE %"PRId64" (%s)==> (%s)",
+ frame->root->unique, state->resolve.fd_no,
+ uuid_utoa (state->resolve.gfid), strerror (op_errno));
+ goto out;
+ }
+
+ gf_stat_from_iatt (&rsp.prestat, prebuf);
+ gf_stat_from_iatt (&rsp.poststat, postbuf);
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_ftruncate_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ gf_common_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret < 0) {
+ state = CALL_STATE (frame);
+ gf_log (this->name, (op_errno == ENOENT)?
+ GF_LOG_DEBUG:GF_LOG_ERROR,
+ "%"PRId64": FLUSH %"PRId64" (%s) ==> (%s)",
+ frame->root->unique, state->resolve.fd_no,
+ uuid_utoa (state->resolve.gfid), strerror (op_errno));
+ goto out;
+ }
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_common_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ gfs3_fsync_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret < 0) {
+ state = CALL_STATE (frame);
+ gf_log (this->name, GF_LOG_INFO,
+ "%"PRId64": FSYNC %"PRId64" (%s) ==> (%s)",
+ frame->root->unique, state->resolve.fd_no,
+ uuid_utoa (state->resolve.gfid), strerror (op_errno));
+ goto out;
+ }
+
+ gf_stat_from_iatt (&(rsp.prestat), prebuf);
+ gf_stat_from_iatt (&(rsp.poststat), postbuf);
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_fsync_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ gfs3_write_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret < 0) {
+ state = CALL_STATE (frame);
+ gf_log (this->name, GF_LOG_INFO,
+ "%"PRId64": WRITEV %"PRId64" (%s) ==> (%s)",
+ frame->root->unique, state->resolve.fd_no,
+ uuid_utoa (state->resolve.gfid), strerror (op_errno));
+ goto out;
+ }
+
+ gf_stat_from_iatt (&rsp.prestat, prebuf);
+ gf_stat_from_iatt (&rsp.poststat, postbuf);
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_write_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int
+server_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iovec *vector, int32_t count,
+ struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
+{
+ gfs3_read_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+
+#ifdef GF_TESTING_IO_XDATA
+ {
+ int ret = 0;
+ if (!xdata)
+ xdata = dict_new ();
+
+ ret = dict_set_str (xdata, "testing-the-xdata-key",
+ "testing-xdata-value");
+ }
+#endif
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret < 0) {
+ state = CALL_STATE (frame);
+ gf_log (this->name, GF_LOG_INFO,
+ "%"PRId64": READV %"PRId64" (%s) ==> (%s)",
+ frame->root->unique, state->resolve.fd_no,
+ uuid_utoa (state->resolve.gfid), strerror (op_errno));
+ goto out;
+ }
+
+ gf_stat_from_iatt (&rsp.stat, stbuf);
+ rsp.size = op_ret;
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, vector, count, iobref,
+ (xdrproc_t)xdr_gfs3_read_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_rchecksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ uint32_t weak_checksum, uint8_t *strong_checksum,
+ dict_t *xdata)
+{
+ gfs3_rchecksum_rsp rsp = {0,};
+ rpcsvc_request_t *req = NULL;
+ server_state_t *state = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret < 0) {
+ state = CALL_STATE (frame);
+ gf_log (this->name, GF_LOG_INFO,
+ "%"PRId64": RCHECKSUM %"PRId64" (%s)==> (%s)",
+ frame->root->unique, state->resolve.fd_no,
+ uuid_utoa (state->resolve.gfid), strerror (op_errno));
+ goto out;
+ }
+
+ rsp.weak_checksum = weak_checksum;
+
+ rsp.strong_checksum.strong_checksum_val = (char *)strong_checksum;
+ rsp.strong_checksum.strong_checksum_len = MD5_DIGEST_LENGTH;
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_rchecksum_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int
+server_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ server_state_t *state = NULL;
+ server_ctx_t *serv_ctx = NULL;
+ rpcsvc_request_t *req = NULL;
+ uint64_t fd_no = 0;
+ gfs3_open_rsp rsp = {0,};
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret < 0) {
+ state = CALL_STATE (frame);
+ gf_log (this->name, (op_errno == ENOENT)?
+ GF_LOG_DEBUG:GF_LOG_ERROR,
+ "%"PRId64": OPEN %s (%s) ==> (%s)",
+ frame->root->unique, state->loc.path,
+ uuid_utoa (state->resolve.gfid),
+ strerror (op_errno));
+ goto out;
+ }
+
+ serv_ctx = server_ctx_get (frame->root->client, this);
+ if (serv_ctx == NULL) {
+ gf_log (this->name, GF_LOG_INFO, "server_ctx_get() failed");
+ goto out;
+ }
+
+ fd_bind (fd);
+ fd_no = gf_fd_unused_get (serv_ctx->fdtable, fd);
+ fd_ref (fd);
+ rsp.fd = fd_no;
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_open_rsp);
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int
+server_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ server_state_t *state = NULL;
+ server_ctx_t *serv_ctx = NULL;
+ inode_t *link_inode = NULL;
+ rpcsvc_request_t *req = NULL;
+ uint64_t fd_no = 0;
+ gfs3_create_rsp rsp = {0,};
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ state = CALL_STATE (frame);
+
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "%"PRId64": CREATE %s (%s/%s) ==> (%s)",
+ frame->root->unique, state->loc.path,
+ uuid_utoa (state->resolve.pargfid),
+ state->resolve.bname, strerror (op_errno));
+ goto out;
+ }
+
+ /* TODO: log gfid too */
+ gf_log (frame->root->client->bound_xl->name, GF_LOG_TRACE,
+ "%"PRId64": CREATE %s (%s)",
+ frame->root->unique, state->loc.name,
+ uuid_utoa (stbuf->ia_gfid));
+
+ link_inode = inode_link (inode, state->loc.parent,
+ state->loc.name, stbuf);
+
+ if (!link_inode) {
+ op_ret = -1;
+ op_errno = ENOENT;
+ goto out;
+ }
+
+ if (link_inode != inode) {
+ /*
+ VERY racy code (if used anywhere else)
+ -- don't do this without understanding
+ */
+
+ inode_unref (fd->inode);
+ fd->inode = inode_ref (link_inode);
+ }
+
+ inode_lookup (link_inode);
+ inode_unref (link_inode);
+
+ serv_ctx = server_ctx_get (frame->root->client, this);
+ if (serv_ctx == NULL) {
+ gf_log (this->name, GF_LOG_INFO, "server_ctx_get() failed");
+ goto out;
+ }
+
+ fd_bind (fd);
+ fd_no = gf_fd_unused_get (serv_ctx->fdtable, fd);
+ fd_ref (fd);
+
+ if ((fd_no < 0) || (fd == 0)) {
+ op_ret = fd_no;
+ op_errno = errno;
+ }
+
+ gf_stat_from_iatt (&rsp.stat, stbuf);
+ gf_stat_from_iatt (&rsp.preparent, preparent);
+ gf_stat_from_iatt (&rsp.postparent, postparent);
+
+out:
+ rsp.fd = fd_no;
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_create_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, const char *buf,
+ struct iatt *stbuf, dict_t *xdata)
+{
+ gfs3_readlink_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret < 0) {
+ state = CALL_STATE (frame);
+ gf_log (this->name, GF_LOG_INFO,
+ "%"PRId64": READLINK %s (%s) ==> (%s)",
+ frame->root->unique, state->loc.path,
+ uuid_utoa (state->resolve.gfid),
+ strerror (op_errno));
+ goto out;
+ }
+
+ gf_stat_from_iatt (&rsp.buf, stbuf);
+ rsp.path = (char *)buf;
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ if (!rsp.path)
+ rsp.path = "";
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_readlink_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
+ dict_t *xdata)
+{
+ gfs3_stat_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret) {
+ state = CALL_STATE (frame);
+ gf_log (this->name, (op_errno == ENOENT)?
+ GF_LOG_DEBUG:GF_LOG_ERROR,
+ "%"PRId64": STAT %s (%s) ==> (%s)",
+ frame->root->unique, state->loc.path,
+ uuid_utoa (state->resolve.gfid),
+ strerror (op_errno));
+ goto out;
+ }
+
+ gf_stat_from_iatt (&rsp.stat, stbuf);
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_stat_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int
+server_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *statpre, struct iatt *statpost, dict_t *xdata)
+{
+ gfs3_setattr_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret) {
+ state = CALL_STATE (frame);
+ gf_log (this->name, GF_LOG_INFO,
+ "%"PRId64": SETATTR %s (%s) ==> (%s)",
+ frame->root->unique, state->loc.path,
+ uuid_utoa (state->resolve.gfid),
+ strerror (op_errno));
+ goto out;
+ }
+
+ gf_stat_from_iatt (&rsp.statpre, statpre);
+ gf_stat_from_iatt (&rsp.statpost, statpost);
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_setattr_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *statpre, struct iatt *statpost, dict_t *xdata)
+{
+ gfs3_fsetattr_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret) {
+ state = CALL_STATE (frame);
+ gf_log (this->name, GF_LOG_INFO,
+ "%"PRId64": FSETATTR %"PRId64" (%s) ==> (%s)",
+ frame->root->unique, state->resolve.fd_no,
+ uuid_utoa (state->resolve.gfid),
+ strerror (op_errno));
+ goto out;
+ }
+
+ gf_stat_from_iatt (&rsp.statpre, statpre);
+ gf_stat_from_iatt (&rsp.statpost, statpost);
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_fsetattr_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int
+server_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ gfs3_xattrop_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret < 0) {
+ state = CALL_STATE (frame);
+ gf_log (this->name, GF_LOG_INFO,
+ "%"PRId64": XATTROP %s (%s) ==> (%s)",
+ frame->root->unique, state->loc.path,
+ uuid_utoa (state->resolve.gfid),
+ strerror (op_errno));
+ goto out;
+ }
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, dict, &rsp.dict.dict_val,
+ rsp.dict.dict_len, op_errno, out);
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_xattrop_rsp);
+
+ GF_FREE (rsp.dict.dict_val);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int
+server_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ gfs3_xattrop_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret < 0) {
+ state = CALL_STATE (frame);
+ gf_log (this->name, GF_LOG_INFO,
+ "%"PRId64": FXATTROP %"PRId64" (%s) ==> (%s)",
+ frame->root->unique, state->resolve.fd_no,
+ uuid_utoa (state->resolve.gfid),
+ strerror (op_errno));
+ goto out;
+ }
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, dict, &rsp.dict.dict_val,
+ rsp.dict.dict_len, op_errno, out);
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_fxattrop_rsp);
+
+ GF_FREE (rsp.dict.dict_val);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+
+int
+server_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ gfs3_readdirp_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+ int ret = 0;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret < 0) {
+ state = CALL_STATE (frame);
+ gf_log (this->name, GF_LOG_INFO,
+ "%"PRId64": READDIRP %"PRId64" (%s) ==> (%s)",
+ frame->root->unique, state->resolve.fd_no,
+ uuid_utoa (state->resolve.gfid),
+ strerror (op_errno));
+ goto out;
+ }
+
+ /* (op_ret == 0) is valid, and means EOF */
+ if (op_ret) {
+ ret = serialize_rsp_direntp (entries, &rsp);
+ if (ret == -1) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+ }
+
+ /* TODO: need more clear thoughts before calling this function. */
+ /* gf_link_inodes_from_dirent (this, state->fd->inode, entries); */
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply (frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_readdirp_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ readdirp_rsp_cleanup (&rsp);
+
+ return 0;
+}
+
+int
+server_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *statpre, struct iatt *statpost, dict_t *xdata)
+{
+ gfs3_fallocate_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret) {
+ state = CALL_STATE (frame);
+ gf_log (this->name, GF_LOG_INFO,
+ "%"PRId64": FALLOCATE %"PRId64" (%s) ==> (%s)",
+ frame->root->unique, state->resolve.fd_no,
+ uuid_utoa (state->resolve.gfid),
+ strerror (op_errno));
+ goto out;
+ }
+
+ gf_stat_from_iatt (&rsp.statpre, statpre);
+ gf_stat_from_iatt (&rsp.statpost, statpost);
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply(frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t) xdr_gfs3_fallocate_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *statpre, struct iatt *statpost, dict_t *xdata)
+{
+ gfs3_discard_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, &rsp.xdata.xdata_val,
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret) {
+ state = CALL_STATE (frame);
+ gf_log (this->name, GF_LOG_INFO,
+ "%"PRId64": DISCARD %"PRId64" (%s) ==> (%s)",
+ frame->root->unique, state->resolve.fd_no,
+ uuid_utoa (state->resolve.gfid),
+ strerror (op_errno));
+ goto out;
+ }
+
+ gf_stat_from_iatt (&rsp.statpre, statpre);
+ gf_stat_from_iatt (&rsp.statpost, statpost);
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ req = frame->local;
+ server_submit_reply(frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t) xdr_gfs3_discard_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+int
+server_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *statpre, struct iatt *statpost, dict_t *xdata)
+{
+ gfs3_zerofill_rsp rsp = {0,};
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+
+ req = frame->local;
+ state = CALL_STATE (frame);
+
+ GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&rsp.xdata.xdata_val),
+ rsp.xdata.xdata_len, op_errno, out);
+
+ if (op_ret) {
+ gf_log (this->name, GF_LOG_INFO,
+ "%"PRId64": ZEROFILL%"PRId64" (%s) ==> (%s)",
+ frame->root->unique, state->resolve.fd_no,
+ uuid_utoa (state->resolve.gfid),
+ strerror (op_errno));
+ goto out;
+ }
+
+ gf_stat_from_iatt (&rsp.statpre, statpre);
+ gf_stat_from_iatt (&rsp.statpost, statpost);
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error (op_errno);
+
+ server_submit_reply(frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t) xdr_gfs3_zerofill_rsp);
+
+ GF_FREE (rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+
+/* Resume function section */
+
+int
+server_rchecksum_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ server_state_t *state = NULL;
+ int op_ret = 0;
+ int op_errno = EINVAL;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0) {
+ op_ret = state->resolve.op_ret;
+ op_errno = state->resolve.op_errno;
+ goto err;
+ }
+
+ STACK_WIND (frame, server_rchecksum_cbk, bound_xl,
+ bound_xl->fops->rchecksum, state->fd,
+ state->offset, state->size, state->xdata);
+
+ return 0;
+err:
+ server_rchecksum_cbk (frame, NULL, frame->this, op_ret, op_errno, 0,
+ NULL, NULL);
+
+ return 0;
+
+}
+
+int
+server_lk_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0)
+ goto err;
+
+ STACK_WIND (frame, server_lk_cbk, bound_xl, bound_xl->fops->lk,
+ state->fd, state->cmd, &state->flock, state->xdata);
+
+ return 0;
+
+err:
+ server_lk_cbk (frame, NULL, frame->this, state->resolve.op_ret,
+ state->resolve.op_errno, NULL, NULL);
+ return 0;
+}
+
+int
+server_rename_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ server_state_t *state = NULL;
+ int op_ret = 0;
+ int op_errno = 0;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0) {
+ op_ret = state->resolve.op_ret;
+ op_errno = state->resolve.op_errno;
+ goto err;
+ }
+
+ if (state->resolve2.op_ret != 0) {
+ op_ret = state->resolve2.op_ret;
+ op_errno = state->resolve2.op_errno;
+ goto err;
+ }
+
+ STACK_WIND (frame, server_rename_cbk,
+ bound_xl, bound_xl->fops->rename,
+ &state->loc, &state->loc2, state->xdata);
+ return 0;
+err:
+ server_rename_cbk (frame, NULL, frame->this, op_ret, op_errno,
+ NULL, NULL, NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+
+int
+server_link_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ server_state_t *state = NULL;
+ int op_ret = 0;
+ int op_errno = 0;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0) {
+ op_ret = state->resolve.op_ret;
+ op_errno = state->resolve.op_errno;
+ goto err;
+ }
+
+ if (state->resolve2.op_ret != 0) {
+ op_ret = state->resolve2.op_ret;
+ op_errno = state->resolve2.op_errno;
+ goto err;
+ }
+
+ state->loc2.inode = inode_ref (state->loc.inode);
+
+ STACK_WIND (frame, server_link_cbk, bound_xl, bound_xl->fops->link,
+ &state->loc, &state->loc2, state->xdata);
+
+ return 0;
+err:
+ server_link_cbk (frame, NULL, frame->this, op_ret, op_errno,
+ NULL, NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+int
+server_symlink_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0)
+ goto err;
+
+ state->loc.inode = inode_new (state->itable);
+
+ STACK_WIND (frame, server_symlink_cbk,
+ bound_xl, bound_xl->fops->symlink,
+ state->name, &state->loc, state->umask, state->xdata);
+
+ return 0;
+err:
+ server_symlink_cbk (frame, NULL, frame->this, state->resolve.op_ret,
+ state->resolve.op_errno, NULL, NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+
+int
+server_access_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0)
+ goto err;
+
+ STACK_WIND (frame, server_access_cbk,
+ bound_xl, bound_xl->fops->access,
+ &state->loc, state->mask, state->xdata);
+ return 0;
+err:
+ server_access_cbk (frame, NULL, frame->this, state->resolve.op_ret,
+ state->resolve.op_errno, NULL);
+ return 0;
+}
+
+int
+server_fentrylk_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ GF_UNUSED int ret = -1;
+ server_state_t *state = NULL;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0)
+ goto err;
+
+ if (!state->xdata)
+ state->xdata = dict_new ();
+
+ if (state->xdata)
+ ret = dict_set_str (state->xdata, "connection-id",
+ frame->root->client->client_uid);
+
+ STACK_WIND (frame, server_fentrylk_cbk, bound_xl,
+ bound_xl->fops->fentrylk,
+ state->volume, state->fd, state->name,
+ state->cmd, state->type, state->xdata);
+
+ return 0;
+err:
+ server_fentrylk_cbk (frame, NULL, frame->this, state->resolve.op_ret,
+ state->resolve.op_errno, NULL);
+ return 0;
+}
+
+
+int
+server_entrylk_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ GF_UNUSED int ret = -1;
+ server_state_t *state = NULL;
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0)
+ goto err;
+
+ if (!state->xdata)
+ state->xdata = dict_new ();
+
+ if (state->xdata)
+ ret = dict_set_str (state->xdata, "connection-id",
+ frame->root->client->client_uid);
+
+ STACK_WIND (frame, server_entrylk_cbk,
+ bound_xl, bound_xl->fops->entrylk,
+ state->volume, &state->loc, state->name,
+ state->cmd, state->type, state->xdata);
+ return 0;
+err:
+ server_entrylk_cbk (frame, NULL, frame->this, state->resolve.op_ret,
+ state->resolve.op_errno, NULL);
+ return 0;
+}
+
+
+int
+server_finodelk_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ GF_UNUSED int ret = -1;
+ server_state_t *state = NULL;
+
+ gf_log (bound_xl->name, GF_LOG_WARNING, "frame %p, xlator %p",
+ frame, bound_xl);
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0)
+ goto err;
+
+ if (!state->xdata)
+ state->xdata = dict_new ();
+
+ if (state->xdata)
+ ret = dict_set_str (state->xdata, "connection-id",
+ frame->root->client->client_uid);
+
+ STACK_WIND (frame, server_finodelk_cbk, bound_xl,
+ bound_xl->fops->finodelk, state->volume, state->fd,
+ state->cmd, &state->flock, state->xdata);
+
+ return 0;
+err:
+ server_finodelk_cbk (frame, NULL, frame->this, state->resolve.op_ret,
+ state->resolve.op_errno, NULL);
+
+ return 0;
+}
+
+int
+server_inodelk_resume (call_frame_t *frame, xlator_t *bound_xl)
+{
+ GF_UNUSED int ret = -1;
+ server_state_t *state = NULL;
+
+ gf_log (bound_xl->name, GF_LOG_WARNING, "frame %p, xlator %p",
+ frame, bound_xl);
+
+ state = CALL_STATE (frame);
+
+ if (state->resolve.op_ret != 0)
+ goto err;
+
+ if (!state->xdata)
+ state->xdata = dict_new ();
+
+ if (state->xdata)
+ ret = dict_set_str (state->xdata, "connection-id",
+ frame->root->client->client_uid);
+
+ STACK_WIND (frame, server_inodelk_cbk, bound_xl,
+ bound_xl->fops->inodelk, state->volume, &state->loc,
+ state->cmd, &state->flock, state->xdata);
+ return 0;
+err:
+ server_inodelk_cbk (frame, NULL, frame->this, state->resolve.op_ret,
+ state->resolve.op_errno, NULL);
+ return 0;
+}
+
+int